diff --git a/.gitattributes b/.gitattributes index c017438702131025c989f3591ec11719c7eb6686..2e1cbfdf57232fecb188f4cd7441d6133573e93c 100644 --- a/.gitattributes +++ b/.gitattributes @@ -58,3 +58,7 @@ wandb/run-20220302_135216-v36bieky/run-v36bieky.wandb filter=lfs diff=lfs merge= wandb/run-20220302_154455-17zs7rwf/run-17zs7rwf.wandb filter=lfs diff=lfs merge=lfs -text wandb/run-20220302_180214-gd4yxtv7/run-gd4yxtv7.wandb filter=lfs diff=lfs merge=lfs -text wandb/run-20220302_200036-31e4k99c/run-31e4k99c.wandb filter=lfs diff=lfs merge=lfs -text +wandb/run-20220302_214437-2u4nhnsf/run-2u4nhnsf.wandb filter=lfs diff=lfs merge=lfs -text +wandb/run-20220302_222605-10glutwr/run-10glutwr.wandb filter=lfs diff=lfs merge=lfs -text +wandb/run-20220302_233655-33dtvgaa/run-33dtvgaa.wandb filter=lfs diff=lfs merge=lfs -text +wandb/run-20220303_004520-25bnjrx1/run-25bnjrx1.wandb filter=lfs diff=lfs merge=lfs -text diff --git a/pytorch_model.bin b/pytorch_model.bin index 8964b467c95d1fdf30b72d907b06b5492f066d69..dd9811eba6c1ba23eb9f9181e79a1f44362f1532 100644 --- a/pytorch_model.bin +++ b/pytorch_model.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:156ed52f7b75b3b9e2a76618995ada1f013231ce2df078df8390d1a85e4b86d7 +oid sha256:b81709a6ce9426bdaff484adc1608f8d99dbf7740bc8e82cddfc67464982a7e9 size 3210531882 diff --git a/training_args.bin b/training_args.bin index ba8a23a43357e607f17817f1db148a3d8b225f55..fe00642f1b5f6bdcf25936bbc65bb45daa0bf996 100644 --- a/training_args.bin +++ b/training_args.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1761634195e3a1808d695078d68cd84c32a851cb17c65f31d192af379bc3784b +oid sha256:1d43aac09004b2df048bb8c8c596b53ee14f8c791e701e3e501cb95b28c53f18 size 3119 diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log index 8859a102181ad85a363390d69f9bc78e23549759..60fcb2ef5567cef881d8e0950c059eefd8507ade 120000 --- a/wandb/debug-internal.log +++ b/wandb/debug-internal.log @@ -1 +1 @@ -run-20220302_200036-31e4k99c/logs/debug-internal.log \ No newline at end of file +run-20220303_004520-25bnjrx1/logs/debug-internal.log \ No newline at end of file diff --git a/wandb/debug.log b/wandb/debug.log index d9cdeaa879d792e12dc0899975fb7cb721ce0a6a..37575d266e8df9cbfc6d7eb25b3d602c86c6507e 120000 --- a/wandb/debug.log +++ b/wandb/debug.log @@ -1 +1 @@ -run-20220302_200036-31e4k99c/logs/debug.log \ No newline at end of file +run-20220303_004520-25bnjrx1/logs/debug.log \ No newline at end of file diff --git a/wandb/latest-run b/wandb/latest-run index ec4fbad0677ef6aeeed97fdae1eeeca035922ec9..07e94138196db5bbafaad6e2e1ef11f35f92c24e 120000 --- a/wandb/latest-run +++ b/wandb/latest-run @@ -1 +1 @@ -run-20220302_200036-31e4k99c \ No newline at end of file +run-20220303_004520-25bnjrx1 \ No newline at end of file diff --git a/wandb/run-20220302_200036-31e4k99c/files/config.yaml b/wandb/run-20220302_200036-31e4k99c/files/config.yaml index 01e601b1ef73af4abb4f180927aae186ba1242c3..ab250d88dc8e0b30907346d0d4aeb74d2dd7cfce 100644 --- a/wandb/run-20220302_200036-31e4k99c/files/config.yaml +++ b/wandb/run-20220302_200036-31e4k99c/files/config.yaml @@ -10673,7 +10673,14 @@ _wandb: - 1 - 5 - 11 + 2: + - 1 + - 5 + - 11 + - 12 3: + - 1 + - 7 - 13 4: 3.9.5 5: 0.12.10 diff --git a/wandb/run-20220302_200036-31e4k99c/files/output.log b/wandb/run-20220302_200036-31e4k99c/files/output.log index d0179dbd91ab289ad8eb58c9e8408babe0cf3f02..cf23c04447a8aa14799ce7d7598cd8995a3699b3 100644 --- a/wandb/run-20220302_200036-31e4k99c/files/output.log +++ b/wandb/run-20220302_200036-31e4k99c/files/output.log @@ -2792,3 +2792,10 @@ Upload file wandb/run-20220302_200036-31e4k99c/run-31e4k99c.wandb: 43%|██ eval_samples_per_second = 2.793 eval_steps_per_second = 0.234 [INFO|modeling_utils.py:1081] 2022-03-02 21:42:29,550 >> Model weights saved in ./pytorch_model.bin:11<11:47, 3.26s/it] argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. +Upload file wandb/run-20220302_200036-31e4k99c/run-31e4k99c.wandb: 0%| | 32.0k/34.7M [00:00ent in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + return ModelInfo(**d)f.finetuned_from)formers/src/transformers/modelcard.py", line 611, in from_trainercard31, in mainule>ent in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + return ModelInfo(**d)f.finetuned_from)formers/src/transformers/modelcard.py", line 611, in from_trainercard31, in mainule>ent in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. \ No newline at end of file diff --git a/wandb/run-20220302_200036-31e4k99c/files/wandb-summary.json b/wandb/run-20220302_200036-31e4k99c/files/wandb-summary.json index 54eae5c4b8beeaa8121b7cddb276535484d656dd..0b278c0268d2549c5162eb286135b3779e1e22a3 100644 --- a/wandb/run-20220302_200036-31e4k99c/files/wandb-summary.json +++ b/wandb/run-20220302_200036-31e4k99c/files/wandb-summary.json @@ -1 +1 @@ -{"train/loss": 4.5291, "train/learning_rate": 0.00017759999999999998, "train/epoch": 1.0, "train/global_step": 297, "_runtime": 6097, "_timestamp": 1646257333, "_step": 298, "gradients/decoder.transformer.ln_f.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 32.0, 208.0, 492.0, 240.0, 41.0, 4.0, 1.0, 0.0, 1.0], "bins": [-272.8752136230469, -268.13763427734375, -263.4000549316406, -258.6624755859375, -253.92489624023438, -249.18731689453125, -244.44973754882812, -239.712158203125, -234.97457885742188, -230.23699951171875, -225.49942016601562, -220.7618408203125, -216.02426147460938, -211.28668212890625, -206.54910278320312, -201.8115234375, -197.07394409179688, -192.33636474609375, -187.59878540039062, -182.8612060546875, -178.12362670898438, -173.38604736328125, -168.64846801757812, -163.910888671875, -159.17330932617188, -154.43572998046875, -149.69815063476562, -144.9605712890625, -140.22299194335938, -135.48541259765625, -130.74783325195312, -126.01025390625, -121.2726821899414, -116.53510284423828, -111.79752349853516, -107.05994415283203, -102.3223648071289, -97.58478546142578, -92.84720611572266, -88.10962677001953, -83.3720474243164, -78.63446807861328, -73.89688873291016, -69.15930938720703, -64.4217300415039, -59.68415069580078, -54.946571350097656, -50.20899200439453, -45.471412658691406, -40.73383331298828, -35.996253967285156, -31.25867462158203, -26.521095275878906, -21.78351593017578, -17.045936584472656, -12.308357238769531, -7.570777893066406, -2.8331985473632812, 1.9043807983398438, 6.641960144042969, 11.379539489746094, 16.11711883544922, 20.854698181152344, 25.59227752685547, 30.329856872558594]}, "gradients/decoder.transformer.ln_f.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 5.0, 1.0, 2.0, 5.0, 4.0, 5.0, 10.0, 13.0, 18.0, 21.0, 25.0, 28.0, 32.0, 47.0, 44.0, 50.0, 56.0, 52.0, 60.0, 49.0, 59.0, 59.0, 57.0, 49.0, 52.0, 43.0, 39.0, 26.0, 24.0, 20.0, 13.0, 9.0, 10.0, 13.0, 7.0, 3.0, 3.0, 1.0, 2.0, 1.0, 2.0, 1.0, 1.0], "bins": [-83.19218444824219, -81.14936828613281, -79.10655212402344, -77.06373596191406, -75.02091217041016, -72.97809600830078, -70.9352798461914, -68.89246368408203, -66.84964752197266, -64.80683135986328, -62.76401138305664, -60.721195220947266, -58.67837905883789, -56.63555908203125, -54.592742919921875, -52.5499267578125, -50.50710678100586, -48.464290618896484, -46.421470642089844, -44.37865447998047, -42.335838317871094, -40.29302215576172, -38.25020217895508, -36.2073860168457, -34.16456604003906, -32.12174987792969, -30.07893180847168, -28.036113739013672, -25.993297576904297, -23.95047950744629, -21.90766143798828, -19.864845275878906, -17.822025299072266, -15.779208183288574, -13.736391067504883, -11.693572998046875, -9.650755882263184, -7.607938766479492, -5.565120697021484, -3.522303581237793, -1.4794864654541016, 0.563330888748169, 2.6061482429504395, 4.648965835571289, 6.6917829513549805, 8.734600067138672, 10.77741813659668, 12.820235252380371, 14.863052368164062, 16.90587043762207, 18.948686599731445, 20.991504669189453, 23.034320831298828, 25.077138900756836, 27.119956970214844, 29.16277313232422, 31.205591201782227, 33.248409271240234, 35.29122543334961, 37.33404541015625, 39.376861572265625, 41.419677734375, 43.462493896484375, 45.505313873291016, 47.54813003540039]}, "gradients/decoder.transformer.h.23.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 3.0, 3.0, 1.0, 6.0, 7.0, 9.0, 15.0, 16.0, 23.0, 27.0, 39.0, 40.0, 35.0, 58.0, 64.0, 56.0, 63.0, 42.0, 76.0, 66.0, 58.0, 52.0, 56.0, 49.0, 28.0, 27.0, 23.0, 23.0, 14.0, 9.0, 6.0, 6.0, 4.0, 4.0, 3.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.677734375, -2.58111572265625, -2.4844970703125, -2.38787841796875, -2.291259765625, -2.19464111328125, -2.0980224609375, -2.00140380859375, -1.90478515625, -1.80816650390625, -1.7115478515625, -1.61492919921875, -1.518310546875, -1.42169189453125, -1.3250732421875, -1.22845458984375, -1.1318359375, -1.03521728515625, -0.9385986328125, -0.84197998046875, -0.745361328125, -0.64874267578125, -0.5521240234375, -0.45550537109375, -0.35888671875, -0.26226806640625, -0.1656494140625, -0.06903076171875, 0.027587890625, 0.12420654296875, 0.2208251953125, 0.31744384765625, 0.4140625, 0.51068115234375, 0.6072998046875, 0.70391845703125, 0.800537109375, 0.89715576171875, 0.9937744140625, 1.09039306640625, 1.18701171875, 1.28363037109375, 1.3802490234375, 1.47686767578125, 1.573486328125, 1.67010498046875, 1.7667236328125, 1.86334228515625, 1.9599609375, 2.05657958984375, 2.1531982421875, 2.24981689453125, 2.346435546875, 2.44305419921875, 2.5396728515625, 2.63629150390625, 2.73291015625, 2.82952880859375, 2.9261474609375, 3.02276611328125, 3.119384765625, 3.21600341796875, 3.3126220703125, 3.40924072265625, 3.505859375]}, "gradients/decoder.transformer.h.23.mlp.c_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 4.0, 4.0, 5.0, 2.0, 6.0, 19.0, 12.0, 26.0, 46.0, 38.0, 51.0, 101.0, 185.0, 304.0, 489.0, 970.0, 2110.0, 5697.0, 21435.0, 140499.0, 2710044.0, 1216418.0, 74188.0, 14342.0, 4146.0, 1494.0, 696.0, 343.0, 188.0, 141.0, 85.0, 64.0, 43.0, 36.0, 13.0, 10.0, 14.0, 10.0, 5.0, 7.0, 2.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-11.8671875, -11.4825439453125, -11.097900390625, -10.7132568359375, -10.32861328125, -9.9439697265625, -9.559326171875, -9.1746826171875, -8.7900390625, -8.4053955078125, -8.020751953125, -7.6361083984375, -7.25146484375, -6.8668212890625, -6.482177734375, -6.0975341796875, -5.712890625, -5.3282470703125, -4.943603515625, -4.5589599609375, -4.17431640625, -3.7896728515625, -3.405029296875, -3.0203857421875, -2.6357421875, -2.2510986328125, -1.866455078125, -1.4818115234375, -1.09716796875, -0.7125244140625, -0.327880859375, 0.0567626953125, 0.44140625, 0.8260498046875, 1.210693359375, 1.5953369140625, 1.97998046875, 2.3646240234375, 2.749267578125, 3.1339111328125, 3.5185546875, 3.9031982421875, 4.287841796875, 4.6724853515625, 5.05712890625, 5.4417724609375, 5.826416015625, 6.2110595703125, 6.595703125, 6.9803466796875, 7.364990234375, 7.7496337890625, 8.13427734375, 8.5189208984375, 8.903564453125, 9.2882080078125, 9.6728515625, 10.0574951171875, 10.442138671875, 10.8267822265625, 11.21142578125, 11.5960693359375, 11.980712890625, 12.3653564453125, 12.75]}, "gradients/decoder.transformer.h.23.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 2.0, 1.0, 1.0, 2.0, 2.0, 8.0, 10.0, 6.0, 16.0, 17.0, 39.0, 54.0, 63.0, 97.0, 138.0, 185.0, 271.0, 386.0, 515.0, 551.0, 461.0, 377.0, 260.0, 172.0, 150.0, 80.0, 60.0, 46.0, 29.0, 23.0, 18.0, 11.0, 4.0, 5.0, 4.0, 2.0, 5.0, 2.0, 1.0, 3.0, 2.0, 1.0, 2.0, 0.0, 1.0], "bins": [-11.390625, -11.09283447265625, -10.7950439453125, -10.49725341796875, -10.199462890625, -9.90167236328125, -9.6038818359375, -9.30609130859375, -9.00830078125, -8.71051025390625, -8.4127197265625, -8.11492919921875, -7.817138671875, -7.51934814453125, -7.2215576171875, -6.92376708984375, -6.6259765625, -6.32818603515625, -6.0303955078125, -5.73260498046875, -5.434814453125, -5.13702392578125, -4.8392333984375, -4.54144287109375, -4.24365234375, -3.94586181640625, -3.6480712890625, -3.35028076171875, -3.052490234375, -2.75469970703125, -2.4569091796875, -2.15911865234375, -1.861328125, -1.56353759765625, -1.2657470703125, -0.96795654296875, -0.670166015625, -0.37237548828125, -0.0745849609375, 0.22320556640625, 0.52099609375, 0.81878662109375, 1.1165771484375, 1.41436767578125, 1.712158203125, 2.00994873046875, 2.3077392578125, 2.60552978515625, 2.9033203125, 3.20111083984375, 3.4989013671875, 3.79669189453125, 4.094482421875, 4.39227294921875, 4.6900634765625, 4.98785400390625, 5.28564453125, 5.58343505859375, 5.8812255859375, 6.17901611328125, 6.476806640625, 6.77459716796875, 7.0723876953125, 7.37017822265625, 7.66796875]}, "gradients/decoder.transformer.h.23.mlp.c_fc.weight": {"_type": "histogram", "values": [2.0, 2.0, 1.0, 1.0, 0.0, 2.0, 4.0, 1.0, 1.0, 1.0, 1.0, 4.0, 3.0, 4.0, 4.0, 3.0, 5.0, 7.0, 6.0, 13.0, 25.0, 25.0, 43.0, 71.0, 116.0, 199.0, 330.0, 509.0, 1161.0, 3186.0, 21597.0, 1087839.0, 3027962.0, 43350.0, 4717.0, 1457.0, 703.0, 341.0, 230.0, 119.0, 69.0, 66.0, 30.0, 19.0, 14.0, 11.0, 10.0, 8.0, 6.0, 7.0, 4.0, 3.0, 3.0, 0.0, 2.0, 2.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-32.78125, -31.7607421875, -30.740234375, -29.7197265625, -28.69921875, -27.6787109375, -26.658203125, -25.6376953125, -24.6171875, -23.5966796875, -22.576171875, -21.5556640625, -20.53515625, -19.5146484375, -18.494140625, -17.4736328125, -16.453125, -15.4326171875, -14.412109375, -13.3916015625, -12.37109375, -11.3505859375, -10.330078125, -9.3095703125, -8.2890625, -7.2685546875, -6.248046875, -5.2275390625, -4.20703125, -3.1865234375, -2.166015625, -1.1455078125, -0.125, 0.8955078125, 1.916015625, 2.9365234375, 3.95703125, 4.9775390625, 5.998046875, 7.0185546875, 8.0390625, 9.0595703125, 10.080078125, 11.1005859375, 12.12109375, 13.1416015625, 14.162109375, 15.1826171875, 16.203125, 17.2236328125, 18.244140625, 19.2646484375, 20.28515625, 21.3056640625, 22.326171875, 23.3466796875, 24.3671875, 25.3876953125, 26.408203125, 27.4287109375, 28.44921875, 29.4697265625, 30.490234375, 31.5107421875, 32.53125]}, "gradients/decoder.transformer.h.23.ln_2.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 7.0, 28.0, 127.0, 301.0, 333.0, 141.0, 62.0, 10.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-120.14483642578125, -117.35649108886719, -114.56815338134766, -111.77981567382812, -108.99147033691406, -106.203125, -103.41478729248047, -100.62644958496094, -97.83810424804688, -95.04975891113281, -92.26142120361328, -89.47308349609375, -86.68473815917969, -83.89639282226562, -81.1080551147461, -78.31971740722656, -75.5313720703125, -72.74302673339844, -69.9546890258789, -67.16635131835938, -64.37800598144531, -61.589664459228516, -58.80132293701172, -56.01298141479492, -53.224639892578125, -50.43629837036133, -47.64795684814453, -44.859615325927734, -42.07127380371094, -39.28293228149414, -36.494590759277344, -33.70624923706055, -30.91790008544922, -28.129558563232422, -25.341217041015625, -22.552875518798828, -19.76453399658203, -16.976192474365234, -14.187850952148438, -11.39950942993164, -8.611167907714844, -5.822826385498047, -3.03448486328125, -0.24614334106445312, 2.5421981811523438, 5.330539703369141, 8.118881225585938, 10.907222747802734, 13.695564270019531, 16.483905792236328, 19.272247314453125, 22.060588836669922, 24.84893035888672, 27.637271881103516, 30.425613403320312, 33.21395492553711, 36.002296447753906, 38.7906379699707, 41.5789794921875, 44.3673210144043, 47.155662536621094, 49.94400405883789, 52.73234558105469, 55.520687103271484, 58.30902862548828]}, "gradients/decoder.transformer.h.23.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 3.0, 0.0, 1.0, 2.0, 2.0, 8.0, 4.0, 9.0, 7.0, 17.0, 19.0, 19.0, 25.0, 25.0, 25.0, 37.0, 42.0, 25.0, 52.0, 49.0, 56.0, 47.0, 68.0, 43.0, 57.0, 42.0, 40.0, 35.0, 49.0, 41.0, 22.0, 29.0, 26.0, 20.0, 11.0, 19.0, 10.0, 8.0, 6.0, 3.0, 6.0, 3.0, 1.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-39.958919525146484, -38.758872985839844, -37.5588264465332, -36.35877990722656, -35.15873718261719, -33.95869064331055, -32.758644104003906, -31.558597564697266, -30.358551025390625, -29.158504486083984, -27.958457946777344, -26.758413314819336, -25.558366775512695, -24.358320236206055, -23.158275604248047, -21.958229064941406, -20.758182525634766, -19.558135986328125, -18.358089447021484, -17.158044815063477, -15.957998275756836, -14.757951736450195, -13.557906150817871, -12.357860565185547, -11.157814025878906, -9.957767486572266, -8.757721900939941, -7.557675838470459, -6.357629776000977, -5.157583713531494, -3.9575376510620117, -2.7574920654296875, -1.5574455261230469, -0.35739946365356445, 0.842646598815918, 2.0426926612854004, 3.242738723754883, 4.442784786224365, 5.642830848693848, 6.842876434326172, 8.042922973632812, 9.242969512939453, 10.443015098571777, 11.643060684204102, 12.843107223510742, 14.043153762817383, 15.243199348449707, 16.44324493408203, 17.643291473388672, 18.843338012695312, 20.043384552001953, 21.24342918395996, 22.4434757232666, 23.643522262573242, 24.84356689453125, 26.04361343383789, 27.24365997314453, 28.443706512451172, 29.643753051757812, 30.84379768371582, 32.043846130371094, 33.24388885498047, 34.44393539428711, 35.64398193359375, 36.84402847290039]}, "gradients/decoder.transformer.h.23.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 2.0, 3.0, 7.0, 8.0, 11.0, 12.0, 18.0, 18.0, 33.0, 23.0, 30.0, 35.0, 39.0, 42.0, 51.0, 49.0, 46.0, 58.0, 48.0, 53.0, 55.0, 50.0, 44.0, 48.0, 36.0, 40.0, 24.0, 23.0, 25.0, 20.0, 16.0, 17.0, 8.0, 2.0, 2.0, 5.0, 2.0, 6.0, 2.0, 2.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.53125, -2.4453125, -2.359375, -2.2734375, -2.1875, -2.1015625, -2.015625, -1.9296875, -1.84375, -1.7578125, -1.671875, -1.5859375, -1.5, -1.4140625, -1.328125, -1.2421875, -1.15625, -1.0703125, -0.984375, -0.8984375, -0.8125, -0.7265625, -0.640625, -0.5546875, -0.46875, -0.3828125, -0.296875, -0.2109375, -0.125, -0.0390625, 0.046875, 0.1328125, 0.21875, 0.3046875, 0.390625, 0.4765625, 0.5625, 0.6484375, 0.734375, 0.8203125, 0.90625, 0.9921875, 1.078125, 1.1640625, 1.25, 1.3359375, 1.421875, 1.5078125, 1.59375, 1.6796875, 1.765625, 1.8515625, 1.9375, 2.0234375, 2.109375, 2.1953125, 2.28125, 2.3671875, 2.453125, 2.5390625, 2.625, 2.7109375, 2.796875, 2.8828125, 2.96875]}, "gradients/decoder.transformer.h.23.crossattention.c_proj.weight": {"_type": "histogram", "values": [3.0, 2.0, 3.0, 2.0, 7.0, 6.0, 8.0, 6.0, 14.0, 16.0, 22.0, 42.0, 42.0, 51.0, 71.0, 103.0, 142.0, 225.0, 322.0, 529.0, 884.0, 1431.0, 2400.0, 4305.0, 7995.0, 15463.0, 29674.0, 57031.0, 107705.0, 181675.0, 235497.0, 178967.0, 105655.0, 55694.0, 29226.0, 14976.0, 7870.0, 4261.0, 2333.0, 1410.0, 882.0, 502.0, 359.0, 235.0, 129.0, 115.0, 71.0, 50.0, 47.0, 27.0, 15.0, 20.0, 19.0, 7.0, 8.0, 8.0, 3.0, 3.0, 1.0, 3.0, 2.0, 1.0, 0.0, 1.0], "bins": [-0.39013671875, -0.3773384094238281, -0.36454010009765625, -0.3517417907714844, -0.3389434814453125, -0.3261451721191406, -0.31334686279296875, -0.3005485534667969, -0.287750244140625, -0.2749519348144531, -0.26215362548828125, -0.24935531616210938, -0.2365570068359375, -0.22375869750976562, -0.21096038818359375, -0.19816207885742188, -0.18536376953125, -0.17256546020507812, -0.15976715087890625, -0.14696884155273438, -0.1341705322265625, -0.12137222290039062, -0.10857391357421875, -0.09577560424804688, -0.082977294921875, -0.07017898559570312, -0.05738067626953125, -0.044582366943359375, -0.0317840576171875, -0.018985748291015625, -0.00618743896484375, 0.006610870361328125, 0.0194091796875, 0.032207489013671875, 0.04500579833984375, 0.057804107666015625, 0.0706024169921875, 0.08340072631835938, 0.09619903564453125, 0.10899734497070312, 0.121795654296875, 0.13459396362304688, 0.14739227294921875, 0.16019058227539062, 0.1729888916015625, 0.18578720092773438, 0.19858551025390625, 0.21138381958007812, 0.22418212890625, 0.23698043823242188, 0.24977874755859375, 0.2625770568847656, 0.2753753662109375, 0.2881736755371094, 0.30097198486328125, 0.3137702941894531, 0.326568603515625, 0.3393669128417969, 0.35216522216796875, 0.3649635314941406, 0.3777618408203125, 0.3905601501464844, 0.40335845947265625, 0.4161567687988281, 0.428955078125]}, "gradients/decoder.transformer.h.23.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 2.0, 3.0, 5.0, 6.0, 6.0, 3.0, 8.0, 7.0, 11.0, 18.0, 16.0, 15.0, 34.0, 24.0, 31.0, 39.0, 29.0, 37.0, 40.0, 35.0, 36.0, 57.0, 48.0, 1066.0, 42.0, 40.0, 37.0, 35.0, 30.0, 33.0, 35.0, 36.0, 22.0, 25.0, 22.0, 17.0, 22.0, 13.0, 14.0, 6.0, 10.0, 11.0, 3.0, 2.0, 1.0, 4.0, 2.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-1.568359375, -1.52252197265625, -1.4766845703125, -1.43084716796875, -1.385009765625, -1.33917236328125, -1.2933349609375, -1.24749755859375, -1.20166015625, -1.15582275390625, -1.1099853515625, -1.06414794921875, -1.018310546875, -0.97247314453125, -0.9266357421875, -0.88079833984375, -0.8349609375, -0.78912353515625, -0.7432861328125, -0.69744873046875, -0.651611328125, -0.60577392578125, -0.5599365234375, -0.51409912109375, -0.46826171875, -0.42242431640625, -0.3765869140625, -0.33074951171875, -0.284912109375, -0.23907470703125, -0.1932373046875, -0.14739990234375, -0.1015625, -0.05572509765625, -0.0098876953125, 0.03594970703125, 0.081787109375, 0.12762451171875, 0.1734619140625, 0.21929931640625, 0.26513671875, 0.31097412109375, 0.3568115234375, 0.40264892578125, 0.448486328125, 0.49432373046875, 0.5401611328125, 0.58599853515625, 0.6318359375, 0.67767333984375, 0.7235107421875, 0.76934814453125, 0.815185546875, 0.86102294921875, 0.9068603515625, 0.95269775390625, 0.99853515625, 1.04437255859375, 1.0902099609375, 1.13604736328125, 1.181884765625, 1.22772216796875, 1.2735595703125, 1.31939697265625, 1.365234375]}, "gradients/decoder.transformer.h.23.crossattention.c_attn.weight": {"_type": "histogram", "values": [3.0, 2.0, 3.0, 6.0, 13.0, 14.0, 30.0, 34.0, 45.0, 67.0, 97.0, 164.0, 244.0, 350.0, 571.0, 919.0, 1389.0, 2003.0, 3135.0, 5070.0, 7678.0, 11961.0, 18538.0, 29104.0, 45075.0, 69260.0, 100979.0, 137419.0, 1205524.0, 141713.0, 106626.0, 72900.0, 48477.0, 31430.0, 19933.0, 12925.0, 8342.0, 5217.0, 3542.0, 2277.0, 1372.0, 936.0, 609.0, 432.0, 250.0, 146.0, 105.0, 81.0, 46.0, 35.0, 17.0, 17.0, 5.0, 6.0, 2.0, 3.0, 4.0, 3.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0], "bins": [-0.160400390625, -0.1547870635986328, -0.14917373657226562, -0.14356040954589844, -0.13794708251953125, -0.13233375549316406, -0.12672042846679688, -0.12110710144042969, -0.1154937744140625, -0.10988044738769531, -0.10426712036132812, -0.09865379333496094, -0.09304046630859375, -0.08742713928222656, -0.08181381225585938, -0.07620048522949219, -0.070587158203125, -0.06497383117675781, -0.059360504150390625, -0.05374717712402344, -0.04813385009765625, -0.04252052307128906, -0.036907196044921875, -0.03129386901855469, -0.0256805419921875, -0.020067214965820312, -0.014453887939453125, -0.008840560913085938, -0.00322723388671875, 0.0023860931396484375, 0.007999420166015625, 0.013612747192382812, 0.01922607421875, 0.024839401245117188, 0.030452728271484375, 0.03606605529785156, 0.04167938232421875, 0.04729270935058594, 0.052906036376953125, 0.05851936340332031, 0.0641326904296875, 0.06974601745605469, 0.07535934448242188, 0.08097267150878906, 0.08658599853515625, 0.09219932556152344, 0.09781265258789062, 0.10342597961425781, 0.109039306640625, 0.11465263366699219, 0.12026596069335938, 0.12587928771972656, 0.13149261474609375, 0.13710594177246094, 0.14271926879882812, 0.1483325958251953, 0.1539459228515625, 0.1595592498779297, 0.16517257690429688, 0.17078590393066406, 0.17639923095703125, 0.18201255798339844, 0.18762588500976562, 0.1932392120361328, 0.1988525390625]}, "gradients/decoder.transformer.h.23.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 3.0, 1.0, 2.0, 2.0, 0.0, 4.0, 8.0, 3.0, 3.0, 2.0, 7.0, 5.0, 11.0, 12.0, 12.0, 32.0, 54.0, 61.0, 81.0, 136.0, 148.0, 127.0, 95.0, 59.0, 31.0, 18.0, 18.0, 14.0, 16.0, 12.0, 6.0, 5.0, 7.0, 5.0, 3.0, 2.0, 3.0, 1.0, 2.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.00228118896484375, -0.002206355333328247, -0.002131521701812744, -0.002056688070297241, -0.0019818544387817383, -0.0019070208072662354, -0.0018321871757507324, -0.0017573535442352295, -0.0016825199127197266, -0.0016076862812042236, -0.0015328526496887207, -0.0014580190181732178, -0.0013831853866577148, -0.001308351755142212, -0.001233518123626709, -0.001158684492111206, -0.0010838508605957031, -0.0010090172290802002, -0.0009341835975646973, -0.0008593499660491943, -0.0007845163345336914, -0.0007096827030181885, -0.0006348490715026855, -0.0005600154399871826, -0.0004851818084716797, -0.00041034817695617676, -0.00033551454544067383, -0.0002606809139251709, -0.00018584728240966797, -0.00011101365089416504, -3.618001937866211e-05, 3.865361213684082e-05, 0.00011348724365234375, 0.00018832087516784668, 0.0002631545066833496, 0.00033798813819885254, 0.00041282176971435547, 0.0004876554012298584, 0.0005624890327453613, 0.0006373226642608643, 0.0007121562957763672, 0.0007869899272918701, 0.000861823558807373, 0.000936657190322876, 0.001011490821838379, 0.0010863244533538818, 0.0011611580848693848, 0.0012359917163848877, 0.0013108253479003906, 0.0013856589794158936, 0.0014604926109313965, 0.0015353262424468994, 0.0016101598739624023, 0.0016849935054779053, 0.0017598271369934082, 0.0018346607685089111, 0.001909494400024414, 0.001984328031539917, 0.00205916166305542, 0.002133995294570923, 0.0022088289260864258, 0.0022836625576019287, 0.0023584961891174316, 0.0024333298206329346, 0.0025081634521484375]}, "gradients/decoder.transformer.h.23.crossattention.q_attn.weight": {"_type": "histogram", "values": [2.0, 0.0, 2.0, 3.0, 1.0, 2.0, 1.0, 2.0, 1.0, 3.0, 2.0, 9.0, 2.0, 7.0, 7.0, 6.0, 14.0, 13.0, 10.0, 16.0, 21.0, 25.0, 41.0, 38.0, 74.0, 146.0, 166.0, 328.0, 930.0, 82191.0, 961044.0, 2316.0, 451.0, 228.0, 129.0, 72.0, 57.0, 34.0, 44.0, 24.0, 23.0, 16.0, 19.0, 11.0, 5.0, 8.0, 6.0, 3.0, 4.0, 3.0, 4.0, 1.0, 2.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 0.0, 2.0], "bins": [-0.039276123046875, -0.03797483444213867, -0.036673545837402344, -0.035372257232666016, -0.03407096862792969, -0.03276968002319336, -0.03146839141845703, -0.030167102813720703, -0.028865814208984375, -0.027564525604248047, -0.02626323699951172, -0.02496194839477539, -0.023660659790039062, -0.022359371185302734, -0.021058082580566406, -0.019756793975830078, -0.01845550537109375, -0.017154216766357422, -0.015852928161621094, -0.014551639556884766, -0.013250350952148438, -0.01194906234741211, -0.010647773742675781, -0.009346485137939453, -0.008045196533203125, -0.006743907928466797, -0.005442619323730469, -0.004141330718994141, -0.0028400421142578125, -0.0015387535095214844, -0.00023746490478515625, 0.0010638236999511719, 0.0023651123046875, 0.003666400909423828, 0.004967689514160156, 0.006268978118896484, 0.0075702667236328125, 0.00887155532836914, 0.010172843933105469, 0.011474132537841797, 0.012775421142578125, 0.014076709747314453, 0.015377998352050781, 0.01667928695678711, 0.017980575561523438, 0.019281864166259766, 0.020583152770996094, 0.021884441375732422, 0.02318572998046875, 0.024487018585205078, 0.025788307189941406, 0.027089595794677734, 0.028390884399414062, 0.02969217300415039, 0.03099346160888672, 0.03229475021362305, 0.033596038818359375, 0.0348973274230957, 0.03619861602783203, 0.03749990463256836, 0.03880119323730469, 0.040102481842041016, 0.041403770446777344, 0.04270505905151367, 0.04400634765625]}, "gradients/decoder.transformer.h.23.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 11.0, 538.0, 460.0, 5.0, 1.0, 1.0], "bins": [-0.030485354363918304, -0.029977165162563324, -0.029468975961208344, -0.028960786759853363, -0.028452597558498383, -0.02794441021978855, -0.02743622101843357, -0.02692803181707859, -0.02641984261572361, -0.02591165341436863, -0.02540346421301365, -0.02489527501165867, -0.024387087672948837, -0.023878898471593857, -0.023370709270238876, -0.022862520068883896, -0.022354330867528915, -0.021846141666173935, -0.021337952464818954, -0.020829763263463974, -0.020321574062108994, -0.019813386723399162, -0.019305197522044182, -0.0187970083206892, -0.01828881911933422, -0.01778062991797924, -0.01727244071662426, -0.01676425151526928, -0.016256064176559448, -0.015747874975204468, -0.015239685773849487, -0.014731496572494507, -0.014223309233784676, -0.013715120032429695, -0.013206930831074715, -0.012698742561042309, -0.012190553359687328, -0.011682364158332348, -0.011174175888299942, -0.010665986686944962, -0.010157797485589981, -0.009649608284235, -0.00914141908288002, -0.008633230812847614, -0.008125041611492634, -0.007616852410137653, -0.00710866367444396, -0.006600474938750267, -0.006092285271733999, -0.005584096536040306, -0.005075907334685326, -0.004567718133330345, -0.004059529397636652, -0.003551340429112315, -0.0030431514605879784, -0.0025349624920636415, -0.0020267735235393047, -0.001518584555014968, -0.001010395586490631, -0.0005022066179662943, 5.982350558042526e-06, 0.0005141713190823793, 0.0010223602876067162, 0.001530549256131053, 0.00203873822465539]}, "gradients/decoder.transformer.h.23.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0, 2.0, 0.0, 5.0, 2.0, 6.0, 6.0, 9.0, 14.0, 18.0, 27.0, 27.0, 41.0, 23.0, 36.0, 42.0, 47.0, 46.0, 49.0, 53.0, 63.0, 42.0, 53.0, 41.0, 58.0, 42.0, 44.0, 32.0, 32.0, 33.0, 28.0, 24.0, 9.0, 16.0, 15.0, 11.0, 8.0, 6.0, 0.0, 4.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.0014300942420959473, -0.0013913484290242195, -0.0013526026159524918, -0.001313856802880764, -0.0012751109898090363, -0.0012363651767373085, -0.0011976193636655807, -0.001158873550593853, -0.0011201277375221252, -0.0010813819244503975, -0.0010426361113786697, -0.001003890298306942, -0.0009651444852352142, -0.0009263986721634865, -0.0008876528590917587, -0.000848907046020031, -0.0008101612329483032, -0.0007714154198765755, -0.0007326696068048477, -0.00069392379373312, -0.0006551779806613922, -0.0006164321675896645, -0.0005776863545179367, -0.000538940541446209, -0.0005001947283744812, -0.00046144891530275345, -0.0004227031022310257, -0.00038395728915929794, -0.0003452114760875702, -0.00030646566301584244, -0.0002677198499441147, -0.00022897403687238693, -0.00019022822380065918, -0.00015148241072893143, -0.00011273659765720367, -7.399078458547592e-05, -3.524497151374817e-05, 3.5008415579795837e-06, 4.2246654629707336e-05, 8.099246770143509e-05, 0.00011973828077316284, 0.0001584840938448906, 0.00019722990691661835, 0.0002359757199883461, 0.00027472153306007385, 0.0003134673461318016, 0.00035221315920352936, 0.0003909589722752571, 0.00042970478534698486, 0.0004684505984187126, 0.0005071964114904404, 0.0005459422245621681, 0.0005846880376338959, 0.0006234338507056236, 0.0006621796637773514, 0.0007009254768490791, 0.0007396712899208069, 0.0007784171029925346, 0.0008171629160642624, 0.0008559087291359901, 0.0008946545422077179, 0.0009334003552794456, 0.0009721461683511734, 0.0010108919814229012, 0.001049637794494629]}, "gradients/decoder.transformer.h.23.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 2.0, 3.0, 7.0, 8.0, 11.0, 12.0, 18.0, 18.0, 33.0, 23.0, 30.0, 35.0, 40.0, 41.0, 51.0, 49.0, 46.0, 58.0, 48.0, 53.0, 55.0, 50.0, 44.0, 48.0, 36.0, 40.0, 24.0, 23.0, 25.0, 20.0, 16.0, 17.0, 8.0, 2.0, 2.0, 5.0, 2.0, 6.0, 2.0, 2.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.53125, -2.4453125, -2.359375, -2.2734375, -2.1875, -2.1015625, -2.015625, -1.9296875, -1.84375, -1.7578125, -1.671875, -1.5859375, -1.5, -1.4140625, -1.328125, -1.2421875, -1.15625, -1.0703125, -0.984375, -0.8984375, -0.8125, -0.7265625, -0.640625, -0.5546875, -0.46875, -0.3828125, -0.296875, -0.2109375, -0.125, -0.0390625, 0.046875, 0.1328125, 0.21875, 0.3046875, 0.390625, 0.4765625, 0.5625, 0.6484375, 0.734375, 0.8203125, 0.90625, 0.9921875, 1.078125, 1.1640625, 1.25, 1.3359375, 1.421875, 1.5078125, 1.59375, 1.6796875, 1.765625, 1.8515625, 1.9375, 2.0234375, 2.109375, 2.1953125, 2.28125, 2.3671875, 2.453125, 2.5390625, 2.625, 2.7109375, 2.796875, 2.8828125, 2.96875]}, "gradients/decoder.transformer.h.23.attn.c_proj.weight": {"_type": "histogram", "values": [3.0, 4.0, 2.0, 2.0, 5.0, 7.0, 10.0, 8.0, 10.0, 14.0, 24.0, 31.0, 38.0, 68.0, 76.0, 134.0, 186.0, 273.0, 384.0, 563.0, 888.0, 1241.0, 1969.0, 3037.0, 5092.0, 8990.0, 17954.0, 44195.0, 135317.0, 378143.0, 290944.0, 93054.0, 32445.0, 14050.0, 7151.0, 4352.0, 2738.0, 1726.0, 1112.0, 748.0, 501.0, 323.0, 204.0, 158.0, 97.0, 77.0, 63.0, 45.0, 32.0, 31.0, 13.0, 13.0, 10.0, 4.0, 4.0, 4.0, 3.0, 1.0, 1.0, 1.0, 0.0, 0.0, 2.0, 1.0], "bins": [-6.3125, -6.1009521484375, -5.889404296875, -5.6778564453125, -5.46630859375, -5.2547607421875, -5.043212890625, -4.8316650390625, -4.6201171875, -4.4085693359375, -4.197021484375, -3.9854736328125, -3.77392578125, -3.5623779296875, -3.350830078125, -3.1392822265625, -2.927734375, -2.7161865234375, -2.504638671875, -2.2930908203125, -2.08154296875, -1.8699951171875, -1.658447265625, -1.4468994140625, -1.2353515625, -1.0238037109375, -0.812255859375, -0.6007080078125, -0.38916015625, -0.1776123046875, 0.033935546875, 0.2454833984375, 0.45703125, 0.6685791015625, 0.880126953125, 1.0916748046875, 1.30322265625, 1.5147705078125, 1.726318359375, 1.9378662109375, 2.1494140625, 2.3609619140625, 2.572509765625, 2.7840576171875, 2.99560546875, 3.2071533203125, 3.418701171875, 3.6302490234375, 3.841796875, 4.0533447265625, 4.264892578125, 4.4764404296875, 4.68798828125, 4.8995361328125, 5.111083984375, 5.3226318359375, 5.5341796875, 5.7457275390625, 5.957275390625, 6.1688232421875, 6.38037109375, 6.5919189453125, 6.803466796875, 7.0150146484375, 7.2265625]}, "gradients/decoder.transformer.h.23.attn.c_attn.bias": {"_type": "histogram", "values": [2.0, 2.0, 1.0, 5.0, 3.0, 3.0, 1.0, 4.0, 10.0, 8.0, 9.0, 12.0, 11.0, 11.0, 14.0, 13.0, 23.0, 15.0, 21.0, 30.0, 21.0, 30.0, 43.0, 44.0, 42.0, 56.0, 102.0, 162.0, 287.0, 1312.0, 192.0, 116.0, 87.0, 47.0, 34.0, 45.0, 30.0, 31.0, 26.0, 22.0, 26.0, 13.0, 19.0, 11.0, 14.0, 13.0, 13.0, 8.0, 5.0, 3.0, 6.0, 4.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 1.0, 1.0, 2.0], "bins": [-5.84375, -5.64349365234375, -5.4432373046875, -5.24298095703125, -5.042724609375, -4.84246826171875, -4.6422119140625, -4.44195556640625, -4.24169921875, -4.04144287109375, -3.8411865234375, -3.64093017578125, -3.440673828125, -3.24041748046875, -3.0401611328125, -2.83990478515625, -2.6396484375, -2.43939208984375, -2.2391357421875, -2.03887939453125, -1.838623046875, -1.63836669921875, -1.4381103515625, -1.23785400390625, -1.03759765625, -0.83734130859375, -0.6370849609375, -0.43682861328125, -0.236572265625, -0.03631591796875, 0.1639404296875, 0.36419677734375, 0.564453125, 0.76470947265625, 0.9649658203125, 1.16522216796875, 1.365478515625, 1.56573486328125, 1.7659912109375, 1.96624755859375, 2.16650390625, 2.36676025390625, 2.5670166015625, 2.76727294921875, 2.967529296875, 3.16778564453125, 3.3680419921875, 3.56829833984375, 3.7685546875, 3.96881103515625, 4.1690673828125, 4.36932373046875, 4.569580078125, 4.76983642578125, 4.9700927734375, 5.17034912109375, 5.37060546875, 5.57086181640625, 5.7711181640625, 5.97137451171875, 6.171630859375, 6.37188720703125, 6.5721435546875, 6.77239990234375, 6.97265625]}, "gradients/decoder.transformer.h.23.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 3.0, 3.0, 5.0, 4.0, 5.0, 9.0, 9.0, 8.0, 14.0, 16.0, 17.0, 23.0, 13.0, 17.0, 22.0, 25.0, 44.0, 34.0, 69.0, 112.0, 245.0, 702.0, 7309.0, 2143900.0, 986852.0, 4968.0, 591.0, 212.0, 138.0, 60.0, 44.0, 33.0, 32.0, 22.0, 16.0, 19.0, 16.0, 16.0, 22.0, 6.0, 9.0, 8.0, 11.0, 6.0, 6.0, 6.0, 5.0, 3.0, 4.0, 1.0, 1.0, 6.0], "bins": [-28.765625, -27.96533203125, -27.1650390625, -26.36474609375, -25.564453125, -24.76416015625, -23.9638671875, -23.16357421875, -22.36328125, -21.56298828125, -20.7626953125, -19.96240234375, -19.162109375, -18.36181640625, -17.5615234375, -16.76123046875, -15.9609375, -15.16064453125, -14.3603515625, -13.56005859375, -12.759765625, -11.95947265625, -11.1591796875, -10.35888671875, -9.55859375, -8.75830078125, -7.9580078125, -7.15771484375, -6.357421875, -5.55712890625, -4.7568359375, -3.95654296875, -3.15625, -2.35595703125, -1.5556640625, -0.75537109375, 0.044921875, 0.84521484375, 1.6455078125, 2.44580078125, 3.24609375, 4.04638671875, 4.8466796875, 5.64697265625, 6.447265625, 7.24755859375, 8.0478515625, 8.84814453125, 9.6484375, 10.44873046875, 11.2490234375, 12.04931640625, 12.849609375, 13.64990234375, 14.4501953125, 15.25048828125, 16.05078125, 16.85107421875, 17.6513671875, 18.45166015625, 19.251953125, 20.05224609375, 20.8525390625, 21.65283203125, 22.453125]}, "gradients/decoder.transformer.h.23.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 6.0, 134.0, 807.0, 69.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-40.606422424316406, -37.03171920776367, -33.4570198059082, -29.88231658935547, -26.307615280151367, -22.732913970947266, -19.15821075439453, -15.58350944519043, -12.008808135986328, -8.434106826782227, -4.859404563903809, -1.2847023010253906, 2.289999008178711, 5.8647003173828125, 9.439403533935547, 13.014104843139648, 16.58880615234375, 20.16350746154785, 23.738208770751953, 27.312911987304688, 30.88761329650879, 34.46231460571289, 38.037017822265625, 41.611717224121094, 45.18642044067383, 48.76112365722656, 52.33582305908203, 55.910526275634766, 59.4852294921875, 63.05992889404297, 66.63462829589844, 70.20933532714844, 73.78404235839844, 77.3587417602539, 80.9334487915039, 84.50814819335938, 88.08284759521484, 91.65754699707031, 95.23225402832031, 98.80695343017578, 102.38165283203125, 105.95635223388672, 109.53105926513672, 113.10575866699219, 116.68045806884766, 120.25515747070312, 123.82986450195312, 127.4045639038086, 130.97927856445312, 134.55398559570312, 138.12867736816406, 141.70338439941406, 145.27809143066406, 148.852783203125, 152.427490234375, 156.002197265625, 159.57688903808594, 163.15159606933594, 166.72628784179688, 170.30099487304688, 173.87570190429688, 177.4503936767578, 181.0251007080078, 184.5998077392578, 188.17449951171875]}, "gradients/decoder.transformer.h.23.ln_1.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 2.0, 2.0, 3.0, 2.0, 5.0, 6.0, 6.0, 7.0, 7.0, 10.0, 10.0, 12.0, 21.0, 18.0, 13.0, 39.0, 25.0, 27.0, 33.0, 27.0, 37.0, 42.0, 44.0, 48.0, 36.0, 32.0, 43.0, 46.0, 51.0, 38.0, 35.0, 32.0, 16.0, 32.0, 26.0, 22.0, 19.0, 28.0, 12.0, 21.0, 18.0, 15.0, 7.0, 7.0, 6.0, 4.0, 5.0, 6.0, 4.0, 4.0, 2.0, 2.0, 0.0, 2.0], "bins": [-23.8145694732666, -23.078014373779297, -22.341461181640625, -21.60490608215332, -20.868350982666016, -20.131797790527344, -19.39524269104004, -18.658687591552734, -17.922134399414062, -17.185579299926758, -16.449026107788086, -15.712471008300781, -14.975915908813477, -14.239361763000488, -13.5028076171875, -12.766252517700195, -12.02969741821289, -11.293143272399902, -10.556588172912598, -9.82003402709961, -9.083478927612305, -8.346924781799316, -7.610370635986328, -6.873816013336182, -6.137261390686035, -5.400706768035889, -4.664152145385742, -3.927597999572754, -3.1910433769226074, -2.454488754272461, -1.7179346084594727, -0.9813799858093262, -0.24482345581054688, 0.49173104763031006, 1.228285551071167, 1.9648399353027344, 2.701394557952881, 3.4379491806030273, 4.174503326416016, 4.911057949066162, 5.647612571716309, 6.384167194366455, 7.120721817016602, 7.85727596282959, 8.593830108642578, 9.330385208129883, 10.066939353942871, 10.80349349975586, 11.540048599243164, 12.276602745056152, 13.013157844543457, 13.749711990356445, 14.48626708984375, 15.222821235656738, 15.959375381469727, 16.69593048095703, 17.432483673095703, 18.169038772583008, 18.90559196472168, 19.642147064208984, 20.37870216369629, 21.115257263183594, 21.851810455322266, 22.58836555480957, 23.324920654296875]}, "gradients/decoder.transformer.h.22.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 1.0, 1.0, 2.0, 6.0, 8.0, 6.0, 13.0, 16.0, 19.0, 22.0, 22.0, 29.0, 30.0, 34.0, 41.0, 54.0, 54.0, 46.0, 54.0, 59.0, 41.0, 58.0, 53.0, 38.0, 46.0, 43.0, 39.0, 34.0, 29.0, 30.0, 21.0, 12.0, 15.0, 10.0, 10.0, 5.0, 3.0, 2.0, 2.0, 3.0, 2.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.0078125, -2.91387939453125, -2.8199462890625, -2.72601318359375, -2.632080078125, -2.53814697265625, -2.4442138671875, -2.35028076171875, -2.25634765625, -2.16241455078125, -2.0684814453125, -1.97454833984375, -1.880615234375, -1.78668212890625, -1.6927490234375, -1.59881591796875, -1.5048828125, -1.41094970703125, -1.3170166015625, -1.22308349609375, -1.129150390625, -1.03521728515625, -0.9412841796875, -0.84735107421875, -0.75341796875, -0.65948486328125, -0.5655517578125, -0.47161865234375, -0.377685546875, -0.28375244140625, -0.1898193359375, -0.09588623046875, -0.001953125, 0.09197998046875, 0.1859130859375, 0.27984619140625, 0.373779296875, 0.46771240234375, 0.5616455078125, 0.65557861328125, 0.74951171875, 0.84344482421875, 0.9373779296875, 1.03131103515625, 1.125244140625, 1.21917724609375, 1.3131103515625, 1.40704345703125, 1.5009765625, 1.59490966796875, 1.6888427734375, 1.78277587890625, 1.876708984375, 1.97064208984375, 2.0645751953125, 2.15850830078125, 2.25244140625, 2.34637451171875, 2.4403076171875, 2.53424072265625, 2.628173828125, 2.72210693359375, 2.8160400390625, 2.90997314453125, 3.00390625]}, "gradients/decoder.transformer.h.22.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 2.0, 3.0, 2.0, 3.0, 3.0, 6.0, 8.0, 12.0, 15.0, 16.0, 22.0, 37.0, 59.0, 70.0, 119.0, 153.0, 230.0, 336.0, 516.0, 1094.0, 2519.0, 8697.0, 44155.0, 467351.0, 3195936.0, 418566.0, 41395.0, 8218.0, 2359.0, 944.0, 457.0, 299.0, 197.0, 135.0, 98.0, 61.0, 50.0, 32.0, 28.0, 26.0, 21.0, 11.0, 11.0, 7.0, 3.0, 4.0, 1.0, 6.0, 2.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-11.046875, -10.6844482421875, -10.322021484375, -9.9595947265625, -9.59716796875, -9.2347412109375, -8.872314453125, -8.5098876953125, -8.1474609375, -7.7850341796875, -7.422607421875, -7.0601806640625, -6.69775390625, -6.3353271484375, -5.972900390625, -5.6104736328125, -5.248046875, -4.8856201171875, -4.523193359375, -4.1607666015625, -3.79833984375, -3.4359130859375, -3.073486328125, -2.7110595703125, -2.3486328125, -1.9862060546875, -1.623779296875, -1.2613525390625, -0.89892578125, -0.5364990234375, -0.174072265625, 0.1883544921875, 0.55078125, 0.9132080078125, 1.275634765625, 1.6380615234375, 2.00048828125, 2.3629150390625, 2.725341796875, 3.0877685546875, 3.4501953125, 3.8126220703125, 4.175048828125, 4.5374755859375, 4.89990234375, 5.2623291015625, 5.624755859375, 5.9871826171875, 6.349609375, 6.7120361328125, 7.074462890625, 7.4368896484375, 7.79931640625, 8.1617431640625, 8.524169921875, 8.8865966796875, 9.2490234375, 9.6114501953125, 9.973876953125, 10.3363037109375, 10.69873046875, 11.0611572265625, 11.423583984375, 11.7860107421875, 12.1484375]}, "gradients/decoder.transformer.h.22.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 5.0, 4.0, 6.0, 9.0, 11.0, 10.0, 31.0, 37.0, 57.0, 76.0, 89.0, 128.0, 208.0, 282.0, 389.0, 486.0, 531.0, 456.0, 391.0, 262.0, 179.0, 147.0, 80.0, 66.0, 39.0, 30.0, 25.0, 16.0, 8.0, 8.0, 6.0, 3.0, 2.0, 4.0, 2.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-9.7265625, -9.426513671875, -9.12646484375, -8.826416015625, -8.5263671875, -8.226318359375, -7.92626953125, -7.626220703125, -7.326171875, -7.026123046875, -6.72607421875, -6.426025390625, -6.1259765625, -5.825927734375, -5.52587890625, -5.225830078125, -4.92578125, -4.625732421875, -4.32568359375, -4.025634765625, -3.7255859375, -3.425537109375, -3.12548828125, -2.825439453125, -2.525390625, -2.225341796875, -1.92529296875, -1.625244140625, -1.3251953125, -1.025146484375, -0.72509765625, -0.425048828125, -0.125, 0.175048828125, 0.47509765625, 0.775146484375, 1.0751953125, 1.375244140625, 1.67529296875, 1.975341796875, 2.275390625, 2.575439453125, 2.87548828125, 3.175537109375, 3.4755859375, 3.775634765625, 4.07568359375, 4.375732421875, 4.67578125, 4.975830078125, 5.27587890625, 5.575927734375, 5.8759765625, 6.176025390625, 6.47607421875, 6.776123046875, 7.076171875, 7.376220703125, 7.67626953125, 7.976318359375, 8.2763671875, 8.576416015625, 8.87646484375, 9.176513671875, 9.4765625]}, "gradients/decoder.transformer.h.22.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 2.0, 1.0, 6.0, 7.0, 11.0, 20.0, 21.0, 27.0, 45.0, 87.0, 150.0, 250.0, 479.0, 1013.0, 2619.0, 8860.0, 45050.0, 437731.0, 3009809.0, 614076.0, 58242.0, 10537.0, 2963.0, 1102.0, 561.0, 279.0, 152.0, 68.0, 51.0, 25.0, 15.0, 14.0, 7.0, 8.0, 2.0, 2.0, 1.0, 1.0, 0.0, 3.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-17.59375, -17.053466796875, -16.51318359375, -15.972900390625, -15.4326171875, -14.892333984375, -14.35205078125, -13.811767578125, -13.271484375, -12.731201171875, -12.19091796875, -11.650634765625, -11.1103515625, -10.570068359375, -10.02978515625, -9.489501953125, -8.94921875, -8.408935546875, -7.86865234375, -7.328369140625, -6.7880859375, -6.247802734375, -5.70751953125, -5.167236328125, -4.626953125, -4.086669921875, -3.54638671875, -3.006103515625, -2.4658203125, -1.925537109375, -1.38525390625, -0.844970703125, -0.3046875, 0.235595703125, 0.77587890625, 1.316162109375, 1.8564453125, 2.396728515625, 2.93701171875, 3.477294921875, 4.017578125, 4.557861328125, 5.09814453125, 5.638427734375, 6.1787109375, 6.718994140625, 7.25927734375, 7.799560546875, 8.33984375, 8.880126953125, 9.42041015625, 9.960693359375, 10.5009765625, 11.041259765625, 11.58154296875, 12.121826171875, 12.662109375, 13.202392578125, 13.74267578125, 14.282958984375, 14.8232421875, 15.363525390625, 15.90380859375, 16.444091796875, 16.984375]}, "gradients/decoder.transformer.h.22.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 9.0, 43.0, 181.0, 365.0, 299.0, 98.0, 18.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-44.43085861206055, -40.977272033691406, -37.523685455322266, -34.070098876953125, -30.616514205932617, -27.162927627563477, -23.70934295654297, -20.255756378173828, -16.802169799804688, -13.348583221435547, -9.894997596740723, -6.441411972045898, -2.987825393676758, 0.4657611846923828, 3.9193458557128906, 7.372932434082031, 10.826519012451172, 14.280105590820312, 17.733692169189453, 21.18727684020996, 24.6408634185791, 28.094449996948242, 31.54803466796875, 35.00162124633789, 38.45520782470703, 41.90879440307617, 45.36238098144531, 48.81596374511719, 52.269554138183594, 55.72313690185547, 59.17672348022461, 62.63031005859375, 66.08389282226562, 69.5374755859375, 72.9910659790039, 76.44464874267578, 79.89823913574219, 83.35182189941406, 86.80540466308594, 90.25899505615234, 93.71258544921875, 97.16616821289062, 100.61975860595703, 104.0733413696289, 107.52693176269531, 110.98051452636719, 114.43409729003906, 117.88768768310547, 121.34127044677734, 124.79485321044922, 128.24844360351562, 131.7020263671875, 135.15560913085938, 138.6092071533203, 142.0627899169922, 145.51637268066406, 148.96995544433594, 152.4235382080078, 155.8771209716797, 159.33071899414062, 162.7843017578125, 166.23788452148438, 169.69146728515625, 173.14505004882812, 176.59864807128906]}, "gradients/decoder.transformer.h.22.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 4.0, 0.0, 0.0, 1.0, 1.0, 6.0, 4.0, 13.0, 6.0, 5.0, 6.0, 11.0, 17.0, 14.0, 24.0, 21.0, 26.0, 26.0, 32.0, 29.0, 42.0, 45.0, 33.0, 43.0, 36.0, 39.0, 28.0, 51.0, 44.0, 43.0, 45.0, 44.0, 27.0, 29.0, 28.0, 26.0, 27.0, 26.0, 17.0, 21.0, 15.0, 19.0, 11.0, 6.0, 6.0, 4.0, 8.0, 5.0, 1.0, 2.0, 0.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-26.333749771118164, -25.468366622924805, -24.602985382080078, -23.73760223388672, -22.87221908569336, -22.0068359375, -21.14145278930664, -20.276071548461914, -19.410688400268555, -18.545305252075195, -17.67992401123047, -16.81454086303711, -15.94915771484375, -15.08377456665039, -14.218392372131348, -13.353010177612305, -12.487627029418945, -11.622243881225586, -10.756861686706543, -9.8914794921875, -9.02609634399414, -8.160713195800781, -7.295331001281738, -6.429948329925537, -5.564565658569336, -4.699182987213135, -3.8338003158569336, -2.9684176445007324, -2.1030349731445312, -1.23765230178833, -0.3722696304321289, 0.49311304092407227, 1.3584976196289062, 2.2238802909851074, 3.0892629623413086, 3.9546456336975098, 4.820028305053711, 5.685410976409912, 6.550793647766113, 7.4161763191223145, 8.281558990478516, 9.146942138671875, 10.012324333190918, 10.877706527709961, 11.74308967590332, 12.60847282409668, 13.473855018615723, 14.339237213134766, 15.204620361328125, 16.070003509521484, 16.935386657714844, 17.80076789855957, 18.66615104675293, 19.53153419494629, 20.396915435791016, 21.262298583984375, 22.127681732177734, 22.993064880371094, 23.858448028564453, 24.72382926940918, 25.58921241760254, 26.4545955657959, 27.319976806640625, 28.185359954833984, 29.050743103027344]}, "gradients/decoder.transformer.h.22.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 3.0, 4.0, 4.0, 6.0, 7.0, 8.0, 10.0, 10.0, 15.0, 21.0, 18.0, 17.0, 34.0, 29.0, 27.0, 17.0, 41.0, 40.0, 41.0, 45.0, 49.0, 38.0, 42.0, 42.0, 44.0, 44.0, 40.0, 41.0, 28.0, 25.0, 25.0, 22.0, 30.0, 25.0, 26.0, 20.0, 17.0, 17.0, 8.0, 11.0, 5.0, 2.0, 4.0, 4.0, 2.0, 3.0, 2.0, 0.0, 1.0, 1.0, 4.0], "bins": [-2.814453125, -2.735565185546875, -2.65667724609375, -2.577789306640625, -2.4989013671875, -2.420013427734375, -2.34112548828125, -2.262237548828125, -2.183349609375, -2.104461669921875, -2.02557373046875, -1.946685791015625, -1.8677978515625, -1.788909912109375, -1.71002197265625, -1.631134033203125, -1.55224609375, -1.473358154296875, -1.39447021484375, -1.315582275390625, -1.2366943359375, -1.157806396484375, -1.07891845703125, -1.000030517578125, -0.921142578125, -0.842254638671875, -0.76336669921875, -0.684478759765625, -0.6055908203125, -0.526702880859375, -0.44781494140625, -0.368927001953125, -0.2900390625, -0.211151123046875, -0.13226318359375, -0.053375244140625, 0.0255126953125, 0.104400634765625, 0.18328857421875, 0.262176513671875, 0.341064453125, 0.419952392578125, 0.49884033203125, 0.577728271484375, 0.6566162109375, 0.735504150390625, 0.81439208984375, 0.893280029296875, 0.97216796875, 1.051055908203125, 1.12994384765625, 1.208831787109375, 1.2877197265625, 1.366607666015625, 1.44549560546875, 1.524383544921875, 1.603271484375, 1.682159423828125, 1.76104736328125, 1.839935302734375, 1.9188232421875, 1.997711181640625, 2.07659912109375, 2.155487060546875, 2.234375]}, "gradients/decoder.transformer.h.22.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 3.0, 1.0, 6.0, 5.0, 2.0, 9.0, 17.0, 20.0, 27.0, 34.0, 49.0, 87.0, 108.0, 219.0, 328.0, 497.0, 849.0, 1446.0, 2564.0, 4425.0, 8127.0, 15402.0, 29431.0, 56819.0, 109027.0, 188255.0, 240770.0, 178025.0, 100909.0, 52380.0, 27038.0, 14047.0, 7531.0, 4173.0, 2346.0, 1391.0, 818.0, 500.0, 338.0, 178.0, 140.0, 83.0, 47.0, 42.0, 17.0, 12.0, 11.0, 4.0, 4.0, 4.0, 2.0, 1.0, 2.0, 1.0, 0.0, 0.0, 2.0], "bins": [-0.476806640625, -0.46254730224609375, -0.4482879638671875, -0.43402862548828125, -0.419769287109375, -0.40550994873046875, -0.3912506103515625, -0.37699127197265625, -0.36273193359375, -0.34847259521484375, -0.3342132568359375, -0.31995391845703125, -0.305694580078125, -0.29143524169921875, -0.2771759033203125, -0.26291656494140625, -0.2486572265625, -0.23439788818359375, -0.2201385498046875, -0.20587921142578125, -0.191619873046875, -0.17736053466796875, -0.1631011962890625, -0.14884185791015625, -0.13458251953125, -0.12032318115234375, -0.1060638427734375, -0.09180450439453125, -0.077545166015625, -0.06328582763671875, -0.0490264892578125, -0.03476715087890625, -0.0205078125, -0.00624847412109375, 0.0080108642578125, 0.02227020263671875, 0.036529541015625, 0.05078887939453125, 0.0650482177734375, 0.07930755615234375, 0.09356689453125, 0.10782623291015625, 0.1220855712890625, 0.13634490966796875, 0.150604248046875, 0.16486358642578125, 0.1791229248046875, 0.19338226318359375, 0.2076416015625, 0.22190093994140625, 0.2361602783203125, 0.25041961669921875, 0.264678955078125, 0.27893829345703125, 0.2931976318359375, 0.30745697021484375, 0.32171630859375, 0.33597564697265625, 0.3502349853515625, 0.36449432373046875, 0.378753662109375, 0.39301300048828125, 0.4072723388671875, 0.42153167724609375, 0.435791015625]}, "gradients/decoder.transformer.h.22.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 0.0, 3.0, 1.0, 3.0, 2.0, 6.0, 2.0, 4.0, 9.0, 5.0, 8.0, 6.0, 14.0, 8.0, 8.0, 6.0, 13.0, 14.0, 26.0, 18.0, 33.0, 26.0, 40.0, 34.0, 27.0, 39.0, 41.0, 33.0, 31.0, 36.0, 1065.0, 53.0, 38.0, 26.0, 37.0, 43.0, 32.0, 38.0, 25.0, 24.0, 18.0, 20.0, 22.0, 16.0, 18.0, 9.0, 5.0, 11.0, 8.0, 7.0, 5.0, 6.0, 5.0, 5.0, 0.0, 4.0, 4.0, 2.0, 1.0, 1.0, 1.0], "bins": [-1.6396484375, -1.591278076171875, -1.54290771484375, -1.494537353515625, -1.4461669921875, -1.397796630859375, -1.34942626953125, -1.301055908203125, -1.252685546875, -1.204315185546875, -1.15594482421875, -1.107574462890625, -1.0592041015625, -1.010833740234375, -0.96246337890625, -0.914093017578125, -0.86572265625, -0.817352294921875, -0.76898193359375, -0.720611572265625, -0.6722412109375, -0.623870849609375, -0.57550048828125, -0.527130126953125, -0.478759765625, -0.430389404296875, -0.38201904296875, -0.333648681640625, -0.2852783203125, -0.236907958984375, -0.18853759765625, -0.140167236328125, -0.091796875, -0.043426513671875, 0.00494384765625, 0.053314208984375, 0.1016845703125, 0.150054931640625, 0.19842529296875, 0.246795654296875, 0.295166015625, 0.343536376953125, 0.39190673828125, 0.440277099609375, 0.4886474609375, 0.537017822265625, 0.58538818359375, 0.633758544921875, 0.68212890625, 0.730499267578125, 0.77886962890625, 0.827239990234375, 0.8756103515625, 0.923980712890625, 0.97235107421875, 1.020721435546875, 1.069091796875, 1.117462158203125, 1.16583251953125, 1.214202880859375, 1.2625732421875, 1.310943603515625, 1.35931396484375, 1.407684326171875, 1.4560546875]}, "gradients/decoder.transformer.h.22.crossattention.c_attn.weight": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 3.0, 3.0, 3.0, 10.0, 12.0, 14.0, 27.0, 36.0, 56.0, 95.0, 162.0, 228.0, 353.0, 587.0, 865.0, 1320.0, 2133.0, 3634.0, 5748.0, 9476.0, 15573.0, 26160.0, 43855.0, 70881.0, 111229.0, 161393.0, 1223059.0, 150303.0, 103540.0, 65209.0, 39774.0, 23931.0, 14494.0, 8661.0, 5436.0, 3209.0, 2089.0, 1297.0, 804.0, 514.0, 355.0, 207.0, 129.0, 101.0, 66.0, 30.0, 16.0, 21.0, 18.0, 10.0, 6.0, 5.0, 2.0, 4.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.214111328125, -0.2068328857421875, -0.199554443359375, -0.1922760009765625, -0.18499755859375, -0.1777191162109375, -0.170440673828125, -0.1631622314453125, -0.1558837890625, -0.1486053466796875, -0.141326904296875, -0.1340484619140625, -0.12677001953125, -0.1194915771484375, -0.112213134765625, -0.1049346923828125, -0.09765625, -0.0903778076171875, -0.083099365234375, -0.0758209228515625, -0.06854248046875, -0.0612640380859375, -0.053985595703125, -0.0467071533203125, -0.0394287109375, -0.0321502685546875, -0.024871826171875, -0.0175933837890625, -0.01031494140625, -0.0030364990234375, 0.004241943359375, 0.0115203857421875, 0.018798828125, 0.0260772705078125, 0.033355712890625, 0.0406341552734375, 0.04791259765625, 0.0551910400390625, 0.062469482421875, 0.0697479248046875, 0.0770263671875, 0.0843048095703125, 0.091583251953125, 0.0988616943359375, 0.10614013671875, 0.1134185791015625, 0.120697021484375, 0.1279754638671875, 0.13525390625, 0.1425323486328125, 0.149810791015625, 0.1570892333984375, 0.16436767578125, 0.1716461181640625, 0.178924560546875, 0.1862030029296875, 0.1934814453125, 0.2007598876953125, 0.208038330078125, 0.2153167724609375, 0.22259521484375, 0.2298736572265625, 0.237152099609375, 0.2444305419921875, 0.251708984375]}, "gradients/decoder.transformer.h.22.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 2.0, 3.0, 1.0, 5.0, 3.0, 9.0, 6.0, 7.0, 9.0, 10.0, 28.0, 29.0, 41.0, 45.0, 46.0, 66.0, 82.0, 126.0, 107.0, 87.0, 59.0, 58.0, 44.0, 30.0, 40.0, 14.0, 11.0, 13.0, 7.0, 6.0, 4.0, 4.0, 2.0, 1.0, 3.0, 1.0, 2.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.008209228515625, -0.007926106452941895, -0.007642984390258789, -0.007359862327575684, -0.007076740264892578, -0.006793618202209473, -0.006510496139526367, -0.006227374076843262, -0.005944252014160156, -0.005661129951477051, -0.005378007888793945, -0.00509488582611084, -0.004811763763427734, -0.004528641700744629, -0.0042455196380615234, -0.003962397575378418, -0.0036792755126953125, -0.003396153450012207, -0.0031130313873291016, -0.002829909324645996, -0.0025467872619628906, -0.002263665199279785, -0.0019805431365966797, -0.0016974210739135742, -0.0014142990112304688, -0.0011311769485473633, -0.0008480548858642578, -0.0005649328231811523, -0.0002818107604980469, 1.3113021850585938e-06, 0.00028443336486816406, 0.0005675554275512695, 0.000850677490234375, 0.0011337995529174805, 0.001416921615600586, 0.0017000436782836914, 0.001983165740966797, 0.0022662878036499023, 0.002549409866333008, 0.0028325319290161133, 0.0031156539916992188, 0.0033987760543823242, 0.0036818981170654297, 0.003965020179748535, 0.004248142242431641, 0.004531264305114746, 0.0048143863677978516, 0.005097508430480957, 0.0053806304931640625, 0.005663752555847168, 0.0059468746185302734, 0.006229996681213379, 0.006513118743896484, 0.00679624080657959, 0.007079362869262695, 0.007362484931945801, 0.007645606994628906, 0.007928729057312012, 0.008211851119995117, 0.008494973182678223, 0.008778095245361328, 0.009061217308044434, 0.009344339370727539, 0.009627461433410645, 0.00991058349609375]}, "gradients/decoder.transformer.h.22.crossattention.q_attn.weight": {"_type": "histogram", "values": [3.0, 1.0, 1.0, 1.0, 0.0, 0.0, 2.0, 1.0, 1.0, 1.0, 4.0, 6.0, 6.0, 6.0, 6.0, 7.0, 12.0, 26.0, 15.0, 30.0, 43.0, 55.0, 76.0, 112.0, 142.0, 196.0, 318.0, 928.0, 165053.0, 879136.0, 1261.0, 358.0, 191.0, 168.0, 105.0, 70.0, 59.0, 39.0, 35.0, 20.0, 15.0, 17.0, 12.0, 7.0, 7.0, 3.0, 2.0, 3.0, 2.0, 1.0, 2.0, 3.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.1343994140625, -0.1297779083251953, -0.12515640258789062, -0.12053489685058594, -0.11591339111328125, -0.11129188537597656, -0.10667037963867188, -0.10204887390136719, -0.0974273681640625, -0.09280586242675781, -0.08818435668945312, -0.08356285095214844, -0.07894134521484375, -0.07431983947753906, -0.06969833374023438, -0.06507682800292969, -0.060455322265625, -0.05583381652832031, -0.051212310791015625, -0.04659080505371094, -0.04196929931640625, -0.03734779357910156, -0.032726287841796875, -0.028104782104492188, -0.0234832763671875, -0.018861770629882812, -0.014240264892578125, -0.009618759155273438, -0.00499725341796875, -0.0003757476806640625, 0.004245758056640625, 0.008867263793945312, 0.01348876953125, 0.018110275268554688, 0.022731781005859375, 0.027353286743164062, 0.03197479248046875, 0.03659629821777344, 0.041217803955078125, 0.04583930969238281, 0.0504608154296875, 0.05508232116699219, 0.059703826904296875, 0.06432533264160156, 0.06894683837890625, 0.07356834411621094, 0.07818984985351562, 0.08281135559082031, 0.087432861328125, 0.09205436706542969, 0.09667587280273438, 0.10129737854003906, 0.10591888427734375, 0.11054039001464844, 0.11516189575195312, 0.11978340148925781, 0.1244049072265625, 0.1290264129638672, 0.13364791870117188, 0.13826942443847656, 0.14289093017578125, 0.14751243591308594, 0.15213394165039062, 0.1567554473876953, 0.161376953125]}, "gradients/decoder.transformer.h.22.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 44.0, 974.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.21586118638515472, -0.21063318848609924, -0.20540520548820496, -0.20017720758914948, -0.1949492245912552, -0.1897212266921997, -0.18449324369430542, -0.17926524579524994, -0.17403726279735565, -0.16880926489830017, -0.16358128190040588, -0.1583532840013504, -0.15312530100345612, -0.14789730310440063, -0.14266932010650635, -0.13744132220745087, -0.13221332430839539, -0.1269853264093399, -0.12175734341144562, -0.11652935296297073, -0.11130136251449585, -0.10607337206602097, -0.10084538161754608, -0.0956173837184906, -0.09038940072059631, -0.08516141027212143, -0.07993341982364655, -0.07470542937517166, -0.06947743892669678, -0.0642494484782219, -0.05902145430445671, -0.05379346385598183, -0.04856547713279724, -0.04333748668432236, -0.03810949623584747, -0.03288150578737259, -0.027653513476252556, -0.022425523027777672, -0.01719753071665764, -0.011969540268182755, -0.0067415498197078705, -0.0015135589055716991, 0.003714432008564472, 0.00894242338836193, 0.014170413836836815, 0.0193984042853117, 0.024626396596431732, 0.029854387044906616, 0.0350823774933815, 0.040310367941856384, 0.04553835839033127, 0.05076634883880615, 0.055994339287281036, 0.06122232973575592, 0.0664503276348114, 0.07167831063270569, 0.07690630853176117, 0.08213429898023605, 0.08736228942871094, 0.09259027987718582, 0.0978182703256607, 0.10304626077413559, 0.10827425122261047, 0.11350224912166595, 0.11873023211956024]}, "gradients/decoder.transformer.h.22.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 5.0, 1.0, 6.0, 3.0, 8.0, 16.0, 19.0, 17.0, 41.0, 47.0, 46.0, 52.0, 71.0, 71.0, 62.0, 95.0, 67.0, 68.0, 58.0, 72.0, 36.0, 36.0, 35.0, 25.0, 27.0, 12.0, 11.0, 4.0, 3.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.013050436973571777, -0.012714066542685032, -0.012377696111798286, -0.012041325680911541, -0.011704955250024796, -0.01136858481913805, -0.011032214388251305, -0.01069584395736456, -0.010359473526477814, -0.010023103095591068, -0.009686732664704323, -0.009350362233817577, -0.009013991802930832, -0.008677621372044086, -0.008341250941157341, -0.008004880510270596, -0.00766851007938385, -0.007332139648497105, -0.006995769217610359, -0.006659398786723614, -0.006323028355836868, -0.005986657924950123, -0.005650287494063377, -0.005313917063176632, -0.0049775466322898865, -0.004641176201403141, -0.004304805770516396, -0.00396843533962965, -0.0036320649087429047, -0.003295694477856159, -0.0029593240469694138, -0.0026229536160826683, -0.002286583185195923, -0.0019502127543091774, -0.001613842323422432, -0.0012774718925356865, -0.000941101461648941, -0.0006047310307621956, -0.00026836059987545013, 6.800983101129532e-05, 0.00040438026189804077, 0.0007407506927847862, 0.0010771211236715317, 0.0014134915545582771, 0.0017498619854450226, 0.002086232416331768, 0.0024226028472185135, 0.002758973278105259, 0.0030953437089920044, 0.00343171413987875, 0.0037680845707654953, 0.004104455001652241, 0.004440825432538986, 0.004777195863425732, 0.005113566294312477, 0.0054499367251992226, 0.005786307156085968, 0.0061226775869727135, 0.006459048017859459, 0.006795418448746204, 0.00713178887963295, 0.007468159310519695, 0.007804529741406441, 0.008140900172293186, 0.008477270603179932]}, "gradients/decoder.transformer.h.22.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 3.0, 4.0, 4.0, 6.0, 7.0, 8.0, 10.0, 10.0, 15.0, 21.0, 18.0, 16.0, 35.0, 27.0, 28.0, 18.0, 40.0, 40.0, 42.0, 45.0, 49.0, 37.0, 43.0, 42.0, 44.0, 44.0, 40.0, 41.0, 28.0, 24.0, 26.0, 22.0, 30.0, 25.0, 26.0, 20.0, 17.0, 17.0, 8.0, 11.0, 5.0, 2.0, 4.0, 4.0, 2.0, 3.0, 2.0, 0.0, 1.0, 1.0, 4.0], "bins": [-2.81640625, -2.73748779296875, -2.6585693359375, -2.57965087890625, -2.500732421875, -2.42181396484375, -2.3428955078125, -2.26397705078125, -2.18505859375, -2.10614013671875, -2.0272216796875, -1.94830322265625, -1.869384765625, -1.79046630859375, -1.7115478515625, -1.63262939453125, -1.5537109375, -1.47479248046875, -1.3958740234375, -1.31695556640625, -1.238037109375, -1.15911865234375, -1.0802001953125, -1.00128173828125, -0.92236328125, -0.84344482421875, -0.7645263671875, -0.68560791015625, -0.606689453125, -0.52777099609375, -0.4488525390625, -0.36993408203125, -0.291015625, -0.21209716796875, -0.1331787109375, -0.05426025390625, 0.024658203125, 0.10357666015625, 0.1824951171875, 0.26141357421875, 0.34033203125, 0.41925048828125, 0.4981689453125, 0.57708740234375, 0.656005859375, 0.73492431640625, 0.8138427734375, 0.89276123046875, 0.9716796875, 1.05059814453125, 1.1295166015625, 1.20843505859375, 1.287353515625, 1.36627197265625, 1.4451904296875, 1.52410888671875, 1.60302734375, 1.68194580078125, 1.7608642578125, 1.83978271484375, 1.918701171875, 1.99761962890625, 2.0765380859375, 2.15545654296875, 2.234375]}, "gradients/decoder.transformer.h.22.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 1.0, 5.0, 4.0, 3.0, 3.0, 6.0, 13.0, 13.0, 24.0, 31.0, 40.0, 59.0, 61.0, 118.0, 158.0, 231.0, 352.0, 509.0, 808.0, 1141.0, 1972.0, 3757.0, 7809.0, 17870.0, 44269.0, 107547.0, 219815.0, 290149.0, 194609.0, 90123.0, 36940.0, 15156.0, 6554.0, 3289.0, 1828.0, 1096.0, 721.0, 478.0, 311.0, 172.0, 140.0, 101.0, 84.0, 36.0, 54.0, 37.0, 24.0, 11.0, 10.0, 7.0, 3.0, 2.0, 3.0, 4.0, 1.0, 3.0, 1.0, 2.0], "bins": [-2.822265625, -2.737762451171875, -2.65325927734375, -2.568756103515625, -2.4842529296875, -2.399749755859375, -2.31524658203125, -2.230743408203125, -2.146240234375, -2.061737060546875, -1.97723388671875, -1.892730712890625, -1.8082275390625, -1.723724365234375, -1.63922119140625, -1.554718017578125, -1.47021484375, -1.385711669921875, -1.30120849609375, -1.216705322265625, -1.1322021484375, -1.047698974609375, -0.96319580078125, -0.878692626953125, -0.794189453125, -0.709686279296875, -0.62518310546875, -0.540679931640625, -0.4561767578125, -0.371673583984375, -0.28717041015625, -0.202667236328125, -0.1181640625, -0.033660888671875, 0.05084228515625, 0.135345458984375, 0.2198486328125, 0.304351806640625, 0.38885498046875, 0.473358154296875, 0.557861328125, 0.642364501953125, 0.72686767578125, 0.811370849609375, 0.8958740234375, 0.980377197265625, 1.06488037109375, 1.149383544921875, 1.23388671875, 1.318389892578125, 1.40289306640625, 1.487396240234375, 1.5718994140625, 1.656402587890625, 1.74090576171875, 1.825408935546875, 1.909912109375, 1.994415283203125, 2.07891845703125, 2.163421630859375, 2.2479248046875, 2.332427978515625, 2.41693115234375, 2.501434326171875, 2.5859375]}, "gradients/decoder.transformer.h.22.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 4.0, 6.0, 6.0, 6.0, 6.0, 10.0, 19.0, 7.0, 11.0, 15.0, 22.0, 28.0, 29.0, 31.0, 22.0, 32.0, 41.0, 47.0, 49.0, 70.0, 105.0, 302.0, 1407.0, 235.0, 118.0, 65.0, 50.0, 38.0, 38.0, 33.0, 27.0, 30.0, 22.0, 19.0, 22.0, 15.0, 16.0, 12.0, 8.0, 11.0, 8.0, 5.0, 6.0, 3.0, 0.0, 2.0, 1.0, 0.0, 2.0, 2.0, 1.0, 1.0, 1.0, 0.0, 1.0], "bins": [-8.984375, -8.6983642578125, -8.412353515625, -8.1263427734375, -7.84033203125, -7.5543212890625, -7.268310546875, -6.9822998046875, -6.6962890625, -6.4102783203125, -6.124267578125, -5.8382568359375, -5.55224609375, -5.2662353515625, -4.980224609375, -4.6942138671875, -4.408203125, -4.1221923828125, -3.836181640625, -3.5501708984375, -3.26416015625, -2.9781494140625, -2.692138671875, -2.4061279296875, -2.1201171875, -1.8341064453125, -1.548095703125, -1.2620849609375, -0.97607421875, -0.6900634765625, -0.404052734375, -0.1180419921875, 0.16796875, 0.4539794921875, 0.739990234375, 1.0260009765625, 1.31201171875, 1.5980224609375, 1.884033203125, 2.1700439453125, 2.4560546875, 2.7420654296875, 3.028076171875, 3.3140869140625, 3.60009765625, 3.8861083984375, 4.172119140625, 4.4581298828125, 4.744140625, 5.0301513671875, 5.316162109375, 5.6021728515625, 5.88818359375, 6.1741943359375, 6.460205078125, 6.7462158203125, 7.0322265625, 7.3182373046875, 7.604248046875, 7.8902587890625, 8.17626953125, 8.4622802734375, 8.748291015625, 9.0343017578125, 9.3203125]}, "gradients/decoder.transformer.h.22.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 2.0, 1.0, 2.0, 0.0, 2.0, 0.0, 0.0, 4.0, 7.0, 3.0, 5.0, 8.0, 12.0, 6.0, 17.0, 10.0, 25.0, 19.0, 22.0, 27.0, 34.0, 39.0, 58.0, 83.0, 157.0, 290.0, 831.0, 6067.0, 459507.0, 2659838.0, 16071.0, 1573.0, 389.0, 161.0, 105.0, 69.0, 53.0, 37.0, 32.0, 22.0, 25.0, 20.0, 17.0, 13.0, 12.0, 10.0, 8.0, 7.0, 9.0, 3.0, 6.0, 3.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-20.375, -19.7607421875, -19.146484375, -18.5322265625, -17.91796875, -17.3037109375, -16.689453125, -16.0751953125, -15.4609375, -14.8466796875, -14.232421875, -13.6181640625, -13.00390625, -12.3896484375, -11.775390625, -11.1611328125, -10.546875, -9.9326171875, -9.318359375, -8.7041015625, -8.08984375, -7.4755859375, -6.861328125, -6.2470703125, -5.6328125, -5.0185546875, -4.404296875, -3.7900390625, -3.17578125, -2.5615234375, -1.947265625, -1.3330078125, -0.71875, -0.1044921875, 0.509765625, 1.1240234375, 1.73828125, 2.3525390625, 2.966796875, 3.5810546875, 4.1953125, 4.8095703125, 5.423828125, 6.0380859375, 6.65234375, 7.2666015625, 7.880859375, 8.4951171875, 9.109375, 9.7236328125, 10.337890625, 10.9521484375, 11.56640625, 12.1806640625, 12.794921875, 13.4091796875, 14.0234375, 14.6376953125, 15.251953125, 15.8662109375, 16.48046875, 17.0947265625, 17.708984375, 18.3232421875, 18.9375]}, "gradients/decoder.transformer.h.22.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 4.0, 90.0, 671.0, 243.0, 7.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-64.43699645996094, -61.82619094848633, -59.21538543701172, -56.60457992553711, -53.9937744140625, -51.382965087890625, -48.772159576416016, -46.161354064941406, -43.5505485534668, -40.93974304199219, -38.32893753051758, -35.71813201904297, -33.107322692871094, -30.496519088745117, -27.885711669921875, -25.274906158447266, -22.664100646972656, -20.053295135498047, -17.442489624023438, -14.831682205200195, -12.220876693725586, -9.610071182250977, -6.999264717102051, -4.388458251953125, -1.7776527404785156, 0.833153247833252, 3.4439592361450195, 6.054765224456787, 8.665571212768555, 11.276376724243164, 13.88718318939209, 16.497989654541016, 19.108787536621094, 21.719593048095703, 24.330398559570312, 26.941205978393555, 29.552011489868164, 32.162818908691406, 34.773624420166016, 37.384429931640625, 39.995235443115234, 42.606040954589844, 45.21684646606445, 47.82765197753906, 50.43846130371094, 53.04926300048828, 55.660072326660156, 58.270877838134766, 60.881683349609375, 63.492488861083984, 66.1032943725586, 68.71410369873047, 71.32490539550781, 73.93571472167969, 76.54651641845703, 79.1573257446289, 81.76812744140625, 84.37893676757812, 86.98973846435547, 89.60054779052734, 92.21134948730469, 94.82215881347656, 97.4329605102539, 100.04376983642578, 102.65457916259766]}, "gradients/decoder.transformer.h.22.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 2.0, 2.0, 3.0, 3.0, 9.0, 8.0, 13.0, 11.0, 17.0, 15.0, 17.0, 22.0, 19.0, 26.0, 24.0, 32.0, 33.0, 52.0, 48.0, 34.0, 39.0, 48.0, 41.0, 39.0, 40.0, 47.0, 38.0, 53.0, 41.0, 32.0, 26.0, 40.0, 19.0, 19.0, 26.0, 15.0, 17.0, 14.0, 5.0, 13.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-31.764469146728516, -30.7552490234375, -29.746028900146484, -28.73680877685547, -27.727588653564453, -26.718368530273438, -25.70914649963379, -24.699926376342773, -23.690706253051758, -22.681486129760742, -21.672266006469727, -20.66304588317871, -19.653823852539062, -18.644603729248047, -17.63538360595703, -16.626163482666016, -15.616943359375, -14.607723236083984, -13.598503112792969, -12.589282035827637, -11.580061912536621, -10.570841789245605, -9.561620712280273, -8.552400588989258, -7.543180465698242, -6.533960342407227, -5.524739742279053, -4.515519142150879, -3.5062990188598633, -2.4970788955688477, -1.4878582954406738, -0.4786376953125, 0.5305862426757812, 1.539806604385376, 2.5490269660949707, 3.5582473278045654, 4.56746768951416, 5.576687812805176, 6.58590841293335, 7.595129013061523, 8.604349136352539, 9.613569259643555, 10.62278938293457, 11.632010459899902, 12.641230583190918, 13.650450706481934, 14.659671783447266, 15.668891906738281, 16.678112030029297, 17.687332153320312, 18.696552276611328, 19.705772399902344, 20.71499252319336, 21.724212646484375, 22.733434677124023, 23.74265480041504, 24.751874923706055, 25.76109504699707, 26.770315170288086, 27.7795352935791, 28.78875732421875, 29.797977447509766, 30.80719757080078, 31.816417694091797, 32.82563781738281]}, "gradients/decoder.transformer.h.21.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 3.0, 2.0, 1.0, 5.0, 7.0, 8.0, 9.0, 7.0, 11.0, 14.0, 16.0, 21.0, 26.0, 28.0, 30.0, 27.0, 18.0, 39.0, 42.0, 42.0, 38.0, 44.0, 40.0, 44.0, 43.0, 50.0, 47.0, 46.0, 33.0, 28.0, 25.0, 29.0, 17.0, 41.0, 19.0, 31.0, 14.0, 15.0, 13.0, 8.0, 8.0, 8.0, 3.0, 3.0, 4.0, 4.0, 2.0, 1.0, 1.0, 4.0, 1.0], "bins": [-3.0625, -2.9788818359375, -2.895263671875, -2.8116455078125, -2.72802734375, -2.6444091796875, -2.560791015625, -2.4771728515625, -2.3935546875, -2.3099365234375, -2.226318359375, -2.1427001953125, -2.05908203125, -1.9754638671875, -1.891845703125, -1.8082275390625, -1.724609375, -1.6409912109375, -1.557373046875, -1.4737548828125, -1.39013671875, -1.3065185546875, -1.222900390625, -1.1392822265625, -1.0556640625, -0.9720458984375, -0.888427734375, -0.8048095703125, -0.72119140625, -0.6375732421875, -0.553955078125, -0.4703369140625, -0.38671875, -0.3031005859375, -0.219482421875, -0.1358642578125, -0.05224609375, 0.0313720703125, 0.114990234375, 0.1986083984375, 0.2822265625, 0.3658447265625, 0.449462890625, 0.5330810546875, 0.61669921875, 0.7003173828125, 0.783935546875, 0.8675537109375, 0.951171875, 1.0347900390625, 1.118408203125, 1.2020263671875, 1.28564453125, 1.3692626953125, 1.452880859375, 1.5364990234375, 1.6201171875, 1.7037353515625, 1.787353515625, 1.8709716796875, 1.95458984375, 2.0382080078125, 2.121826171875, 2.2054443359375, 2.2890625]}, "gradients/decoder.transformer.h.21.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 3.0, 1.0, 7.0, 7.0, 7.0, 12.0, 17.0, 22.0, 28.0, 43.0, 53.0, 59.0, 86.0, 133.0, 167.0, 415.0, 958.0, 2855.0, 11308.0, 77685.0, 1180719.0, 2697220.0, 193353.0, 22032.0, 4551.0, 1345.0, 512.0, 239.0, 100.0, 82.0, 73.0, 54.0, 34.0, 38.0, 13.0, 10.0, 11.0, 15.0, 8.0, 5.0, 7.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 3.0], "bins": [-12.6953125, -12.344482421875, -11.99365234375, -11.642822265625, -11.2919921875, -10.941162109375, -10.59033203125, -10.239501953125, -9.888671875, -9.537841796875, -9.18701171875, -8.836181640625, -8.4853515625, -8.134521484375, -7.78369140625, -7.432861328125, -7.08203125, -6.731201171875, -6.38037109375, -6.029541015625, -5.6787109375, -5.327880859375, -4.97705078125, -4.626220703125, -4.275390625, -3.924560546875, -3.57373046875, -3.222900390625, -2.8720703125, -2.521240234375, -2.17041015625, -1.819580078125, -1.46875, -1.117919921875, -0.76708984375, -0.416259765625, -0.0654296875, 0.285400390625, 0.63623046875, 0.987060546875, 1.337890625, 1.688720703125, 2.03955078125, 2.390380859375, 2.7412109375, 3.092041015625, 3.44287109375, 3.793701171875, 4.14453125, 4.495361328125, 4.84619140625, 5.197021484375, 5.5478515625, 5.898681640625, 6.24951171875, 6.600341796875, 6.951171875, 7.302001953125, 7.65283203125, 8.003662109375, 8.3544921875, 8.705322265625, 9.05615234375, 9.406982421875, 9.7578125]}, "gradients/decoder.transformer.h.21.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 4.0, 2.0, 5.0, 8.0, 1.0, 5.0, 8.0, 10.0, 19.0, 22.0, 24.0, 76.0, 98.0, 141.0, 232.0, 348.0, 502.0, 647.0, 611.0, 457.0, 305.0, 193.0, 130.0, 83.0, 54.0, 36.0, 24.0, 17.0, 14.0, 6.0, 4.0, 0.0, 4.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-9.4375, -9.0718994140625, -8.706298828125, -8.3406982421875, -7.97509765625, -7.6094970703125, -7.243896484375, -6.8782958984375, -6.5126953125, -6.1470947265625, -5.781494140625, -5.4158935546875, -5.05029296875, -4.6846923828125, -4.319091796875, -3.9534912109375, -3.587890625, -3.2222900390625, -2.856689453125, -2.4910888671875, -2.12548828125, -1.7598876953125, -1.394287109375, -1.0286865234375, -0.6630859375, -0.2974853515625, 0.068115234375, 0.4337158203125, 0.79931640625, 1.1649169921875, 1.530517578125, 1.8961181640625, 2.26171875, 2.6273193359375, 2.992919921875, 3.3585205078125, 3.72412109375, 4.0897216796875, 4.455322265625, 4.8209228515625, 5.1865234375, 5.5521240234375, 5.917724609375, 6.2833251953125, 6.64892578125, 7.0145263671875, 7.380126953125, 7.7457275390625, 8.111328125, 8.4769287109375, 8.842529296875, 9.2081298828125, 9.57373046875, 9.9393310546875, 10.304931640625, 10.6705322265625, 11.0361328125, 11.4017333984375, 11.767333984375, 12.1329345703125, 12.49853515625, 12.8641357421875, 13.229736328125, 13.5953369140625, 13.9609375]}, "gradients/decoder.transformer.h.21.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 4.0, 1.0, 6.0, 16.0, 18.0, 21.0, 30.0, 43.0, 91.0, 146.0, 219.0, 396.0, 809.0, 1691.0, 4600.0, 15386.0, 74786.0, 538139.0, 2677140.0, 752878.0, 99325.0, 19182.0, 5318.0, 2046.0, 941.0, 452.0, 244.0, 148.0, 73.0, 49.0, 27.0, 33.0, 10.0, 8.0, 5.0, 4.0, 4.0, 3.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-14.5546875, -14.10791015625, -13.6611328125, -13.21435546875, -12.767578125, -12.32080078125, -11.8740234375, -11.42724609375, -10.98046875, -10.53369140625, -10.0869140625, -9.64013671875, -9.193359375, -8.74658203125, -8.2998046875, -7.85302734375, -7.40625, -6.95947265625, -6.5126953125, -6.06591796875, -5.619140625, -5.17236328125, -4.7255859375, -4.27880859375, -3.83203125, -3.38525390625, -2.9384765625, -2.49169921875, -2.044921875, -1.59814453125, -1.1513671875, -0.70458984375, -0.2578125, 0.18896484375, 0.6357421875, 1.08251953125, 1.529296875, 1.97607421875, 2.4228515625, 2.86962890625, 3.31640625, 3.76318359375, 4.2099609375, 4.65673828125, 5.103515625, 5.55029296875, 5.9970703125, 6.44384765625, 6.890625, 7.33740234375, 7.7841796875, 8.23095703125, 8.677734375, 9.12451171875, 9.5712890625, 10.01806640625, 10.46484375, 10.91162109375, 11.3583984375, 11.80517578125, 12.251953125, 12.69873046875, 13.1455078125, 13.59228515625, 14.0390625]}, "gradients/decoder.transformer.h.21.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 6.0, 19.0, 80.0, 220.0, 347.0, 214.0, 96.0, 27.0, 6.0, 3.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-173.36392211914062, -169.86744689941406, -166.3709716796875, -162.87451171875, -159.37803649902344, -155.88156127929688, -152.3850860595703, -148.88861083984375, -145.3921356201172, -141.89566040039062, -138.39918518066406, -134.9027099609375, -131.40625, -127.90977478027344, -124.41329956054688, -120.91682434082031, -117.42036437988281, -113.92388916015625, -110.42742156982422, -106.93094635009766, -103.4344711303711, -99.93800354003906, -96.4415283203125, -92.94505310058594, -89.44857788085938, -85.95210266113281, -82.45563507080078, -78.95915985107422, -75.46268463134766, -71.96621704101562, -68.46974182128906, -64.9732666015625, -61.4767951965332, -57.980323791503906, -54.483848571777344, -50.98737716674805, -47.49090576171875, -43.99443054199219, -40.49795913696289, -37.001487731933594, -33.50501251220703, -30.0085391998291, -26.512067794799805, -23.015594482421875, -19.519123077392578, -16.02264976501465, -12.526176452636719, -9.029705047607422, -5.533233642578125, -2.0367610454559326, 1.4597115516662598, 4.956184387207031, 8.452656745910645, 11.949129104614258, 15.445602416992188, 18.942073822021484, 22.438547134399414, 25.935020446777344, 29.43149185180664, 32.92796325683594, 36.4244384765625, 39.9209098815918, 43.417381286621094, 46.913856506347656, 50.41032791137695]}, "gradients/decoder.transformer.h.21.ln_2.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 3.0, 3.0, 3.0, 3.0, 2.0, 6.0, 8.0, 6.0, 6.0, 18.0, 5.0, 13.0, 19.0, 17.0, 19.0, 22.0, 34.0, 39.0, 44.0, 30.0, 33.0, 30.0, 34.0, 43.0, 40.0, 43.0, 36.0, 35.0, 34.0, 33.0, 45.0, 35.0, 25.0, 24.0, 35.0, 32.0, 28.0, 20.0, 22.0, 12.0, 8.0, 9.0, 12.0, 4.0, 7.0, 11.0, 7.0, 4.0, 2.0, 3.0, 3.0, 5.0, 1.0, 0.0, 2.0, 1.0], "bins": [-28.353797912597656, -27.50921058654785, -26.66462516784668, -25.820037841796875, -24.975452423095703, -24.1308650970459, -23.286277770996094, -22.441692352294922, -21.59710693359375, -20.752519607543945, -19.907934188842773, -19.06334686279297, -18.218761444091797, -17.374174118041992, -16.529586791992188, -15.685001373291016, -14.840414047241211, -13.995827674865723, -13.151241302490234, -12.30665397644043, -11.462068557739258, -10.617481231689453, -9.772894859313965, -8.928308486938477, -8.083722114562988, -7.2391357421875, -6.394549369812012, -5.549962520599365, -4.705376148223877, -3.8607897758483887, -3.016202926635742, -2.171616554260254, -1.3270282745361328, -0.482441782951355, 0.36214470863342285, 1.2067313194274902, 2.0513176918029785, 2.895904064178467, 3.7404909133911133, 4.585077285766602, 5.42966365814209, 6.274250030517578, 7.118836402893066, 7.963423252105713, 8.80801010131836, 9.652595520019531, 10.497182846069336, 11.341769218444824, 12.186355590820312, 13.0309419631958, 13.875528335571289, 14.720115661621094, 15.564701080322266, 16.40928840637207, 17.253875732421875, 18.098461151123047, 18.94304656982422, 19.787633895874023, 20.632219314575195, 21.476806640625, 22.321392059326172, 23.165979385375977, 24.01056671142578, 24.855152130126953, 25.699739456176758]}, "gradients/decoder.transformer.h.21.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 3.0, 4.0, 3.0, 5.0, 7.0, 11.0, 7.0, 14.0, 21.0, 14.0, 19.0, 22.0, 22.0, 27.0, 26.0, 31.0, 31.0, 39.0, 29.0, 50.0, 44.0, 50.0, 48.0, 48.0, 47.0, 35.0, 39.0, 46.0, 31.0, 33.0, 21.0, 27.0, 31.0, 21.0, 14.0, 22.0, 15.0, 16.0, 12.0, 9.0, 6.0, 3.0, 4.0, 1.0, 3.0, 2.0, 2.0, 0.0, 2.0, 1.0, 1.0], "bins": [-3.150390625, -3.061859130859375, -2.97332763671875, -2.884796142578125, -2.7962646484375, -2.707733154296875, -2.61920166015625, -2.530670166015625, -2.442138671875, -2.353607177734375, -2.26507568359375, -2.176544189453125, -2.0880126953125, -1.999481201171875, -1.91094970703125, -1.822418212890625, -1.73388671875, -1.645355224609375, -1.55682373046875, -1.468292236328125, -1.3797607421875, -1.291229248046875, -1.20269775390625, -1.114166259765625, -1.025634765625, -0.937103271484375, -0.84857177734375, -0.760040283203125, -0.6715087890625, -0.582977294921875, -0.49444580078125, -0.405914306640625, -0.3173828125, -0.228851318359375, -0.14031982421875, -0.051788330078125, 0.0367431640625, 0.125274658203125, 0.21380615234375, 0.302337646484375, 0.390869140625, 0.479400634765625, 0.56793212890625, 0.656463623046875, 0.7449951171875, 0.833526611328125, 0.92205810546875, 1.010589599609375, 1.09912109375, 1.187652587890625, 1.27618408203125, 1.364715576171875, 1.4532470703125, 1.541778564453125, 1.63031005859375, 1.718841552734375, 1.807373046875, 1.895904541015625, 1.98443603515625, 2.072967529296875, 2.1614990234375, 2.250030517578125, 2.33856201171875, 2.427093505859375, 2.515625]}, "gradients/decoder.transformer.h.21.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 3.0, 1.0, 1.0, 5.0, 8.0, 14.0, 16.0, 31.0, 48.0, 85.0, 109.0, 187.0, 324.0, 566.0, 1021.0, 1942.0, 3781.0, 7615.0, 15220.0, 31329.0, 64549.0, 128676.0, 225041.0, 250993.0, 158028.0, 80630.0, 39455.0, 19245.0, 9687.0, 4720.0, 2344.0, 1275.0, 672.0, 367.0, 220.0, 140.0, 78.0, 53.0, 29.0, 22.0, 13.0, 9.0, 7.0, 5.0, 4.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.51220703125, -0.4952239990234375, -0.478240966796875, -0.4612579345703125, -0.44427490234375, -0.4272918701171875, -0.410308837890625, -0.3933258056640625, -0.3763427734375, -0.3593597412109375, -0.342376708984375, -0.3253936767578125, -0.30841064453125, -0.2914276123046875, -0.274444580078125, -0.2574615478515625, -0.240478515625, -0.2234954833984375, -0.206512451171875, -0.1895294189453125, -0.17254638671875, -0.1555633544921875, -0.138580322265625, -0.1215972900390625, -0.1046142578125, -0.0876312255859375, -0.070648193359375, -0.0536651611328125, -0.03668212890625, -0.0196990966796875, -0.002716064453125, 0.0142669677734375, 0.03125, 0.0482330322265625, 0.065216064453125, 0.0821990966796875, 0.09918212890625, 0.1161651611328125, 0.133148193359375, 0.1501312255859375, 0.1671142578125, 0.1840972900390625, 0.201080322265625, 0.2180633544921875, 0.23504638671875, 0.2520294189453125, 0.269012451171875, 0.2859954833984375, 0.302978515625, 0.3199615478515625, 0.336944580078125, 0.3539276123046875, 0.37091064453125, 0.3878936767578125, 0.404876708984375, 0.4218597412109375, 0.4388427734375, 0.4558258056640625, 0.472808837890625, 0.4897918701171875, 0.50677490234375, 0.5237579345703125, 0.540740966796875, 0.5577239990234375, 0.57470703125]}, "gradients/decoder.transformer.h.21.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 2.0, 3.0, 2.0, 6.0, 6.0, 14.0, 15.0, 10.0, 14.0, 16.0, 18.0, 22.0, 26.0, 27.0, 20.0, 22.0, 36.0, 42.0, 32.0, 44.0, 40.0, 36.0, 33.0, 1070.0, 38.0, 40.0, 36.0, 38.0, 35.0, 29.0, 34.0, 26.0, 31.0, 23.0, 25.0, 19.0, 29.0, 6.0, 11.0, 13.0, 14.0, 10.0, 8.0, 8.0, 3.0, 5.0, 2.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.6494140625, -1.59417724609375, -1.5389404296875, -1.48370361328125, -1.428466796875, -1.37322998046875, -1.3179931640625, -1.26275634765625, -1.20751953125, -1.15228271484375, -1.0970458984375, -1.04180908203125, -0.986572265625, -0.93133544921875, -0.8760986328125, -0.82086181640625, -0.765625, -0.71038818359375, -0.6551513671875, -0.59991455078125, -0.544677734375, -0.48944091796875, -0.4342041015625, -0.37896728515625, -0.32373046875, -0.26849365234375, -0.2132568359375, -0.15802001953125, -0.102783203125, -0.04754638671875, 0.0076904296875, 0.06292724609375, 0.1181640625, 0.17340087890625, 0.2286376953125, 0.28387451171875, 0.339111328125, 0.39434814453125, 0.4495849609375, 0.50482177734375, 0.56005859375, 0.61529541015625, 0.6705322265625, 0.72576904296875, 0.781005859375, 0.83624267578125, 0.8914794921875, 0.94671630859375, 1.001953125, 1.05718994140625, 1.1124267578125, 1.16766357421875, 1.222900390625, 1.27813720703125, 1.3333740234375, 1.38861083984375, 1.44384765625, 1.49908447265625, 1.5543212890625, 1.60955810546875, 1.664794921875, 1.72003173828125, 1.7752685546875, 1.83050537109375, 1.8857421875]}, "gradients/decoder.transformer.h.21.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 2.0, 1.0, 1.0, 3.0, 11.0, 12.0, 19.0, 25.0, 42.0, 45.0, 84.0, 150.0, 193.0, 325.0, 457.0, 718.0, 1263.0, 2017.0, 3322.0, 5396.0, 8957.0, 14755.0, 24603.0, 41080.0, 67326.0, 108254.0, 159109.0, 1226516.0, 157304.0, 106385.0, 66937.0, 40459.0, 24228.0, 14365.0, 8940.0, 5298.0, 3185.0, 2012.0, 1194.0, 821.0, 483.0, 272.0, 210.0, 126.0, 81.0, 64.0, 33.0, 19.0, 19.0, 8.0, 6.0, 4.0, 2.0, 2.0, 2.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.2420654296875, -0.23412513732910156, -0.22618484497070312, -0.2182445526123047, -0.21030426025390625, -0.2023639678955078, -0.19442367553710938, -0.18648338317871094, -0.1785430908203125, -0.17060279846191406, -0.16266250610351562, -0.1547222137451172, -0.14678192138671875, -0.1388416290283203, -0.13090133666992188, -0.12296104431152344, -0.115020751953125, -0.10708045959472656, -0.09914016723632812, -0.09119987487792969, -0.08325958251953125, -0.07531929016113281, -0.06737899780273438, -0.05943870544433594, -0.0514984130859375, -0.04355812072753906, -0.035617828369140625, -0.027677536010742188, -0.01973724365234375, -0.011796951293945312, -0.003856658935546875, 0.0040836334228515625, 0.01202392578125, 0.019964218139648438, 0.027904510498046875, 0.03584480285644531, 0.04378509521484375, 0.05172538757324219, 0.059665679931640625, 0.06760597229003906, 0.0755462646484375, 0.08348655700683594, 0.09142684936523438, 0.09936714172363281, 0.10730743408203125, 0.11524772644042969, 0.12318801879882812, 0.13112831115722656, 0.139068603515625, 0.14700889587402344, 0.15494918823242188, 0.1628894805908203, 0.17082977294921875, 0.1787700653076172, 0.18671035766601562, 0.19465065002441406, 0.2025909423828125, 0.21053123474121094, 0.21847152709960938, 0.2264118194580078, 0.23435211181640625, 0.2422924041748047, 0.2502326965332031, 0.25817298889160156, 0.26611328125]}, "gradients/decoder.transformer.h.21.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 3.0, 2.0, 6.0, 2.0, 2.0, 8.0, 8.0, 14.0, 19.0, 15.0, 18.0, 18.0, 31.0, 40.0, 44.0, 57.0, 71.0, 90.0, 116.0, 95.0, 72.0, 47.0, 51.0, 36.0, 36.0, 26.0, 21.0, 15.0, 11.0, 11.0, 7.0, 5.0, 6.0, 3.0, 0.0, 2.0, 3.0, 0.0, 1.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0], "bins": [-0.01045989990234375, -0.010173201560974121, -0.009886503219604492, -0.009599804878234863, -0.009313106536865234, -0.009026408195495605, -0.008739709854125977, -0.008453011512756348, -0.008166313171386719, -0.00787961483001709, -0.007592916488647461, -0.007306218147277832, -0.007019519805908203, -0.006732821464538574, -0.006446123123168945, -0.006159424781799316, -0.0058727264404296875, -0.005586028099060059, -0.00529932975769043, -0.005012631416320801, -0.004725933074951172, -0.004439234733581543, -0.004152536392211914, -0.003865838050842285, -0.0035791397094726562, -0.0032924413681030273, -0.0030057430267333984, -0.0027190446853637695, -0.0024323463439941406, -0.0021456480026245117, -0.0018589496612548828, -0.001572251319885254, -0.001285552978515625, -0.000998854637145996, -0.0007121562957763672, -0.0004254579544067383, -0.00013875961303710938, 0.00014793872833251953, 0.00043463706970214844, 0.0007213354110717773, 0.0010080337524414062, 0.0012947320938110352, 0.001581430435180664, 0.001868128776550293, 0.002154827117919922, 0.0024415254592895508, 0.0027282238006591797, 0.0030149221420288086, 0.0033016204833984375, 0.0035883188247680664, 0.0038750171661376953, 0.004161715507507324, 0.004448413848876953, 0.004735112190246582, 0.005021810531616211, 0.00530850887298584, 0.005595207214355469, 0.005881905555725098, 0.0061686038970947266, 0.0064553022384643555, 0.006742000579833984, 0.007028698921203613, 0.007315397262573242, 0.007602095603942871, 0.0078887939453125]}, "gradients/decoder.transformer.h.21.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 0.0, 1.0, 2.0, 1.0, 2.0, 0.0, 1.0, 3.0, 5.0, 6.0, 9.0, 19.0, 13.0, 20.0, 30.0, 45.0, 65.0, 74.0, 83.0, 116.0, 177.0, 291.0, 713.0, 20439.0, 1020223.0, 4863.0, 499.0, 244.0, 159.0, 130.0, 82.0, 55.0, 42.0, 37.0, 40.0, 17.0, 14.0, 10.0, 5.0, 9.0, 7.0, 4.0, 5.0, 2.0, 1.0, 2.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.1531982421875, -0.14846420288085938, -0.14373016357421875, -0.13899612426757812, -0.1342620849609375, -0.12952804565429688, -0.12479400634765625, -0.12005996704101562, -0.115325927734375, -0.11059188842773438, -0.10585784912109375, -0.10112380981445312, -0.0963897705078125, -0.09165573120117188, -0.08692169189453125, -0.08218765258789062, -0.07745361328125, -0.07271957397460938, -0.06798553466796875, -0.06325149536132812, -0.0585174560546875, -0.053783416748046875, -0.04904937744140625, -0.044315338134765625, -0.039581298828125, -0.034847259521484375, -0.03011322021484375, -0.025379180908203125, -0.0206451416015625, -0.015911102294921875, -0.01117706298828125, -0.006443023681640625, -0.001708984375, 0.003025054931640625, 0.00775909423828125, 0.012493133544921875, 0.0172271728515625, 0.021961212158203125, 0.02669525146484375, 0.031429290771484375, 0.036163330078125, 0.040897369384765625, 0.04563140869140625, 0.050365447998046875, 0.0550994873046875, 0.059833526611328125, 0.06456756591796875, 0.06930160522460938, 0.07403564453125, 0.07876968383789062, 0.08350372314453125, 0.08823776245117188, 0.0929718017578125, 0.09770584106445312, 0.10243988037109375, 0.10717391967773438, 0.111907958984375, 0.11664199829101562, 0.12137603759765625, 0.12611007690429688, 0.1308441162109375, 0.13557815551757812, 0.14031219482421875, 0.14504623413085938, 0.1497802734375]}, "gradients/decoder.transformer.h.21.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 4.0, 915.0, 98.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.14734181761741638, -0.14311569929122925, -0.13888958096504211, -0.13466346263885498, -0.13043734431266785, -0.1262112259864807, -0.12198510020971298, -0.11775898188352585, -0.11353286355733871, -0.10930674523115158, -0.10508062690496445, -0.10085450857877731, -0.09662838280200958, -0.09240226447582245, -0.08817614614963531, -0.08395002782344818, -0.07972390949726105, -0.07549779117107391, -0.07127167284488678, -0.06704555451869965, -0.06281943619251251, -0.05859331414103508, -0.05436719208955765, -0.050141073763370514, -0.04591495543718338, -0.041688837110996246, -0.03746271878480911, -0.03323659673333168, -0.029010478407144547, -0.024784360080957413, -0.02055823989212513, -0.016332119703292847, -0.012106016278266907, -0.007879897020757198, -0.00365377776324749, 0.0005723414942622185, 0.004798460751771927, 0.00902457907795906, 0.013250699266791344, 0.017476819455623627, 0.02170293778181076, 0.025929056107997894, 0.030155176296830177, 0.03438129648566246, 0.038607414811849594, 0.04283353313803673, 0.04705965518951416, 0.051285773515701294, 0.05551189184188843, 0.05973801016807556, 0.0639641284942627, 0.06819024682044983, 0.07241636514663696, 0.0766424834728241, 0.08086860924959183, 0.08509472757577896, 0.0893208459019661, 0.09354696422815323, 0.09777308255434036, 0.1019992008805275, 0.10622532665729523, 0.11045144498348236, 0.1146775633096695, 0.11890368163585663, 0.12312979996204376]}, "gradients/decoder.transformer.h.21.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 3.0, 0.0, 2.0, 0.0, 5.0, 6.0, 5.0, 8.0, 15.0, 25.0, 25.0, 28.0, 34.0, 40.0, 39.0, 37.0, 51.0, 48.0, 61.0, 72.0, 56.0, 61.0, 57.0, 61.0, 46.0, 40.0, 34.0, 34.0, 29.0, 26.0, 24.0, 14.0, 10.0, 6.0, 5.0, 5.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.011496543884277344, -0.011163455434143543, -0.010830366984009743, -0.010497278533875942, -0.010164190083742142, -0.009831101633608341, -0.00949801318347454, -0.00916492473334074, -0.00883183628320694, -0.00849874783307314, -0.008165659382939339, -0.007832570932805538, -0.007499482482671738, -0.007166394032537937, -0.006833305582404137, -0.006500217132270336, -0.006167128682136536, -0.005834040232002735, -0.005500951781868935, -0.005167863331735134, -0.004834774881601334, -0.004501686431467533, -0.004168597981333733, -0.003835509531199932, -0.0035024210810661316, -0.003169332630932331, -0.0028362441807985306, -0.00250315573066473, -0.0021700672805309296, -0.001836978830397129, -0.0015038903802633286, -0.001170801930129528, -0.0008377134799957275, -0.000504625029861927, -0.00017153657972812653, 0.00016155187040567398, 0.0004946403205394745, 0.000827728770673275, 0.0011608172208070755, 0.001493905670940876, 0.0018269941210746765, 0.002160082571208477, 0.0024931710213422775, 0.002826259471476078, 0.0031593479216098785, 0.003492436371743679, 0.0038255248218774796, 0.00415861327201128, 0.004491701722145081, 0.004824790172278881, 0.005157878622412682, 0.005490967072546482, 0.005824055522680283, 0.006157143972814083, 0.006490232422947884, 0.006823320873081684, 0.007156409323215485, 0.007489497773349285, 0.007822586223483086, 0.008155674673616886, 0.008488763123750687, 0.008821851573884487, 0.009154940024018288, 0.009488028474152088, 0.009821116924285889]}, "gradients/decoder.transformer.h.21.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 3.0, 4.0, 3.0, 5.0, 7.0, 11.0, 7.0, 14.0, 21.0, 14.0, 19.0, 22.0, 22.0, 27.0, 26.0, 31.0, 31.0, 39.0, 29.0, 50.0, 44.0, 50.0, 48.0, 48.0, 47.0, 35.0, 39.0, 46.0, 31.0, 33.0, 21.0, 27.0, 31.0, 21.0, 14.0, 22.0, 15.0, 15.0, 13.0, 9.0, 6.0, 3.0, 4.0, 1.0, 3.0, 2.0, 2.0, 0.0, 2.0, 1.0, 1.0], "bins": [-3.150390625, -3.061859130859375, -2.97332763671875, -2.884796142578125, -2.7962646484375, -2.707733154296875, -2.61920166015625, -2.530670166015625, -2.442138671875, -2.353607177734375, -2.26507568359375, -2.176544189453125, -2.0880126953125, -1.999481201171875, -1.91094970703125, -1.822418212890625, -1.73388671875, -1.645355224609375, -1.55682373046875, -1.468292236328125, -1.3797607421875, -1.291229248046875, -1.20269775390625, -1.114166259765625, -1.025634765625, -0.937103271484375, -0.84857177734375, -0.760040283203125, -0.6715087890625, -0.582977294921875, -0.49444580078125, -0.405914306640625, -0.3173828125, -0.228851318359375, -0.14031982421875, -0.051788330078125, 0.0367431640625, 0.125274658203125, 0.21380615234375, 0.302337646484375, 0.390869140625, 0.479400634765625, 0.56793212890625, 0.656463623046875, 0.7449951171875, 0.833526611328125, 0.92205810546875, 1.010589599609375, 1.09912109375, 1.187652587890625, 1.27618408203125, 1.364715576171875, 1.4532470703125, 1.541778564453125, 1.63031005859375, 1.718841552734375, 1.807373046875, 1.895904541015625, 1.98443603515625, 2.072967529296875, 2.1614990234375, 2.250030517578125, 2.33856201171875, 2.427093505859375, 2.515625]}, "gradients/decoder.transformer.h.21.attn.c_proj.weight": {"_type": "histogram", "values": [3.0, 0.0, 1.0, 3.0, 1.0, 3.0, 5.0, 12.0, 10.0, 13.0, 25.0, 39.0, 65.0, 58.0, 109.0, 195.0, 286.0, 427.0, 643.0, 1095.0, 1809.0, 2969.0, 5010.0, 9604.0, 22391.0, 70836.0, 258077.0, 441859.0, 156298.0, 43609.0, 15385.0, 7284.0, 4028.0, 2424.0, 1484.0, 903.0, 586.0, 380.0, 222.0, 138.0, 80.0, 53.0, 33.0, 38.0, 16.0, 19.0, 13.0, 9.0, 8.0, 4.0, 1.0, 4.0, 4.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-3.365234375, -3.241973876953125, -3.11871337890625, -2.995452880859375, -2.8721923828125, -2.748931884765625, -2.62567138671875, -2.502410888671875, -2.379150390625, -2.255889892578125, -2.13262939453125, -2.009368896484375, -1.8861083984375, -1.762847900390625, -1.63958740234375, -1.516326904296875, -1.39306640625, -1.269805908203125, -1.14654541015625, -1.023284912109375, -0.9000244140625, -0.776763916015625, -0.65350341796875, -0.530242919921875, -0.406982421875, -0.283721923828125, -0.16046142578125, -0.037200927734375, 0.0860595703125, 0.209320068359375, 0.33258056640625, 0.455841064453125, 0.5791015625, 0.702362060546875, 0.82562255859375, 0.948883056640625, 1.0721435546875, 1.195404052734375, 1.31866455078125, 1.441925048828125, 1.565185546875, 1.688446044921875, 1.81170654296875, 1.934967041015625, 2.0582275390625, 2.181488037109375, 2.30474853515625, 2.428009033203125, 2.55126953125, 2.674530029296875, 2.79779052734375, 2.921051025390625, 3.0443115234375, 3.167572021484375, 3.29083251953125, 3.414093017578125, 3.537353515625, 3.660614013671875, 3.78387451171875, 3.907135009765625, 4.0303955078125, 4.153656005859375, 4.27691650390625, 4.400177001953125, 4.5234375]}, "gradients/decoder.transformer.h.21.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 4.0, 5.0, 0.0, 6.0, 7.0, 9.0, 6.0, 14.0, 10.0, 16.0, 18.0, 25.0, 32.0, 29.0, 21.0, 42.0, 26.0, 34.0, 39.0, 50.0, 55.0, 90.0, 260.0, 1543.0, 199.0, 83.0, 60.0, 46.0, 41.0, 37.0, 34.0, 27.0, 41.0, 25.0, 26.0, 19.0, 16.0, 10.0, 22.0, 9.0, 8.0, 7.0, 5.0, 4.0, 1.0, 3.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-8.09375, -7.7872314453125, -7.480712890625, -7.1741943359375, -6.86767578125, -6.5611572265625, -6.254638671875, -5.9481201171875, -5.6416015625, -5.3350830078125, -5.028564453125, -4.7220458984375, -4.41552734375, -4.1090087890625, -3.802490234375, -3.4959716796875, -3.189453125, -2.8829345703125, -2.576416015625, -2.2698974609375, -1.96337890625, -1.6568603515625, -1.350341796875, -1.0438232421875, -0.7373046875, -0.4307861328125, -0.124267578125, 0.1822509765625, 0.48876953125, 0.7952880859375, 1.101806640625, 1.4083251953125, 1.71484375, 2.0213623046875, 2.327880859375, 2.6343994140625, 2.94091796875, 3.2474365234375, 3.553955078125, 3.8604736328125, 4.1669921875, 4.4735107421875, 4.780029296875, 5.0865478515625, 5.39306640625, 5.6995849609375, 6.006103515625, 6.3126220703125, 6.619140625, 6.9256591796875, 7.232177734375, 7.5386962890625, 7.84521484375, 8.1517333984375, 8.458251953125, 8.7647705078125, 9.0712890625, 9.3778076171875, 9.684326171875, 9.9908447265625, 10.29736328125, 10.6038818359375, 10.910400390625, 11.2169189453125, 11.5234375]}, "gradients/decoder.transformer.h.21.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 5.0, 7.0, 4.0, 7.0, 11.0, 13.0, 14.0, 21.0, 14.0, 27.0, 30.0, 49.0, 52.0, 76.0, 105.0, 223.0, 420.0, 1068.0, 4428.0, 35660.0, 2576694.0, 506261.0, 16334.0, 2613.0, 719.0, 292.0, 149.0, 98.0, 69.0, 55.0, 35.0, 36.0, 32.0, 20.0, 15.0, 16.0, 11.0, 12.0, 4.0, 3.0, 7.0, 5.0, 2.0, 3.0, 1.0, 0.0, 1.0], "bins": [-19.34375, -18.83251953125, -18.3212890625, -17.81005859375, -17.298828125, -16.78759765625, -16.2763671875, -15.76513671875, -15.25390625, -14.74267578125, -14.2314453125, -13.72021484375, -13.208984375, -12.69775390625, -12.1865234375, -11.67529296875, -11.1640625, -10.65283203125, -10.1416015625, -9.63037109375, -9.119140625, -8.60791015625, -8.0966796875, -7.58544921875, -7.07421875, -6.56298828125, -6.0517578125, -5.54052734375, -5.029296875, -4.51806640625, -4.0068359375, -3.49560546875, -2.984375, -2.47314453125, -1.9619140625, -1.45068359375, -0.939453125, -0.42822265625, 0.0830078125, 0.59423828125, 1.10546875, 1.61669921875, 2.1279296875, 2.63916015625, 3.150390625, 3.66162109375, 4.1728515625, 4.68408203125, 5.1953125, 5.70654296875, 6.2177734375, 6.72900390625, 7.240234375, 7.75146484375, 8.2626953125, 8.77392578125, 9.28515625, 9.79638671875, 10.3076171875, 10.81884765625, 11.330078125, 11.84130859375, 12.3525390625, 12.86376953125, 13.375]}, "gradients/decoder.transformer.h.21.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 3.0, 6.0, 25.0, 59.0, 221.0, 243.0, 221.0, 143.0, 70.0, 13.0, 9.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-40.67424774169922, -39.62382125854492, -38.573394775390625, -37.52296829223633, -36.47254180908203, -35.42211151123047, -34.37168502807617, -33.321258544921875, -32.27083206176758, -31.22040557861328, -30.169979095458984, -29.119550704956055, -28.069124221801758, -27.01869773864746, -25.96826934814453, -24.917842864990234, -23.867416381835938, -22.81698989868164, -21.766563415527344, -20.716135025024414, -19.665708541870117, -18.61528205871582, -17.56485366821289, -16.514427185058594, -15.464000701904297, -14.41357421875, -13.363146781921387, -12.312719345092773, -11.262292861938477, -10.21186637878418, -9.161438941955566, -8.111011505126953, -7.060581207275391, -6.0101542472839355, -4.9597272872924805, -3.9093003273010254, -2.8588733673095703, -1.8084464073181152, -0.7580194473266602, 0.2924075126647949, 1.34283447265625, 2.393261432647705, 3.44368839263916, 4.494115352630615, 5.54454231262207, 6.594969272613525, 7.6453962326049805, 8.695823669433594, 9.74625015258789, 10.796676635742188, 11.8471040725708, 12.897531509399414, 13.947957992553711, 14.998384475708008, 16.048812866210938, 17.099239349365234, 18.14966583251953, 19.200092315673828, 20.250518798828125, 21.300947189331055, 22.35137367248535, 23.40180015563965, 24.452228546142578, 25.502655029296875, 26.553081512451172]}, "gradients/decoder.transformer.h.21.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 5.0, 1.0, 4.0, 7.0, 5.0, 3.0, 11.0, 8.0, 11.0, 11.0, 15.0, 24.0, 14.0, 24.0, 18.0, 27.0, 26.0, 33.0, 36.0, 37.0, 36.0, 37.0, 41.0, 37.0, 34.0, 47.0, 38.0, 32.0, 42.0, 34.0, 44.0, 29.0, 33.0, 24.0, 31.0, 23.0, 23.0, 20.0, 25.0, 14.0, 13.0, 6.0, 10.0, 4.0, 1.0, 7.0, 8.0, 2.0, 1.0, 4.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-28.69546890258789, -27.72551918029785, -26.75556755065918, -25.78561782836914, -24.81566619873047, -23.84571647644043, -22.875764846801758, -21.90581512451172, -20.935863494873047, -19.965913772583008, -18.995962142944336, -18.026012420654297, -17.056060791015625, -16.086111068725586, -15.116159439086914, -14.146209716796875, -13.17625904083252, -12.206308364868164, -11.236357688903809, -10.266407012939453, -9.296456336975098, -8.326505661010742, -7.356555461883545, -6.3866047859191895, -5.416654109954834, -4.4467034339904785, -3.476752758026123, -2.5068023204803467, -1.5368516445159912, -0.5669012069702148, 0.4030494689941406, 1.373000144958496, 2.3429508209228516, 3.312901496887207, 4.2828521728515625, 5.252802848815918, 6.222753524780273, 7.192703723907471, 8.162654876708984, 9.132604598999023, 10.102556228637695, 11.07250690460205, 12.042457580566406, 13.012408256530762, 13.982358932495117, 14.952308654785156, 15.922260284423828, 16.892210006713867, 17.862159729003906, 18.832109451293945, 19.802061080932617, 20.772010803222656, 21.741962432861328, 22.711912155151367, 23.68186378479004, 24.651813507080078, 25.62176513671875, 26.59171485900879, 27.56166648864746, 28.5316162109375, 29.501567840576172, 30.47151756286621, 31.441469192504883, 32.41141891479492, 33.381370544433594]}, "gradients/decoder.transformer.h.20.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 1.0, 2.0, 2.0, 6.0, 2.0, 5.0, 13.0, 10.0, 14.0, 12.0, 19.0, 23.0, 15.0, 21.0, 21.0, 26.0, 25.0, 36.0, 41.0, 37.0, 43.0, 39.0, 42.0, 39.0, 68.0, 45.0, 52.0, 41.0, 39.0, 25.0, 35.0, 25.0, 23.0, 20.0, 29.0, 23.0, 14.0, 12.0, 19.0, 12.0, 11.0, 12.0, 2.0, 6.0, 3.0, 0.0, 2.0, 3.0, 2.0, 0.0, 0.0, 3.0], "bins": [-3.34375, -3.250732421875, -3.15771484375, -3.064697265625, -2.9716796875, -2.878662109375, -2.78564453125, -2.692626953125, -2.599609375, -2.506591796875, -2.41357421875, -2.320556640625, -2.2275390625, -2.134521484375, -2.04150390625, -1.948486328125, -1.85546875, -1.762451171875, -1.66943359375, -1.576416015625, -1.4833984375, -1.390380859375, -1.29736328125, -1.204345703125, -1.111328125, -1.018310546875, -0.92529296875, -0.832275390625, -0.7392578125, -0.646240234375, -0.55322265625, -0.460205078125, -0.3671875, -0.274169921875, -0.18115234375, -0.088134765625, 0.0048828125, 0.097900390625, 0.19091796875, 0.283935546875, 0.376953125, 0.469970703125, 0.56298828125, 0.656005859375, 0.7490234375, 0.842041015625, 0.93505859375, 1.028076171875, 1.12109375, 1.214111328125, 1.30712890625, 1.400146484375, 1.4931640625, 1.586181640625, 1.67919921875, 1.772216796875, 1.865234375, 1.958251953125, 2.05126953125, 2.144287109375, 2.2373046875, 2.330322265625, 2.42333984375, 2.516357421875, 2.609375]}, "gradients/decoder.transformer.h.20.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 4.0, 1.0, 6.0, 5.0, 5.0, 8.0, 7.0, 14.0, 12.0, 21.0, 23.0, 29.0, 38.0, 54.0, 80.0, 124.0, 311.0, 862.0, 3286.0, 18306.0, 199800.0, 3075169.0, 838942.0, 48445.0, 6521.0, 1346.0, 401.0, 155.0, 81.0, 58.0, 30.0, 24.0, 27.0, 19.0, 14.0, 15.0, 11.0, 12.0, 5.0, 10.0, 4.0, 2.0, 0.0, 3.0, 1.0, 3.0, 1.0, 1.0, 1.0, 0.0, 1.0], "bins": [-13.8515625, -13.45263671875, -13.0537109375, -12.65478515625, -12.255859375, -11.85693359375, -11.4580078125, -11.05908203125, -10.66015625, -10.26123046875, -9.8623046875, -9.46337890625, -9.064453125, -8.66552734375, -8.2666015625, -7.86767578125, -7.46875, -7.06982421875, -6.6708984375, -6.27197265625, -5.873046875, -5.47412109375, -5.0751953125, -4.67626953125, -4.27734375, -3.87841796875, -3.4794921875, -3.08056640625, -2.681640625, -2.28271484375, -1.8837890625, -1.48486328125, -1.0859375, -0.68701171875, -0.2880859375, 0.11083984375, 0.509765625, 0.90869140625, 1.3076171875, 1.70654296875, 2.10546875, 2.50439453125, 2.9033203125, 3.30224609375, 3.701171875, 4.10009765625, 4.4990234375, 4.89794921875, 5.296875, 5.69580078125, 6.0947265625, 6.49365234375, 6.892578125, 7.29150390625, 7.6904296875, 8.08935546875, 8.48828125, 8.88720703125, 9.2861328125, 9.68505859375, 10.083984375, 10.48291015625, 10.8818359375, 11.28076171875, 11.6796875]}, "gradients/decoder.transformer.h.20.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 2.0, 5.0, 5.0, 6.0, 8.0, 6.0, 13.0, 16.0, 20.0, 31.0, 49.0, 49.0, 63.0, 126.0, 126.0, 192.0, 214.0, 329.0, 422.0, 454.0, 442.0, 405.0, 307.0, 212.0, 161.0, 104.0, 77.0, 59.0, 43.0, 45.0, 23.0, 13.0, 7.0, 11.0, 9.0, 8.0, 5.0, 6.0, 0.0, 4.0, 1.0, 2.0, 3.0, 0.0, 1.0, 0.0, 1.0, 0.0, 3.0], "bins": [-8.9609375, -8.70257568359375, -8.4442138671875, -8.18585205078125, -7.927490234375, -7.66912841796875, -7.4107666015625, -7.15240478515625, -6.89404296875, -6.63568115234375, -6.3773193359375, -6.11895751953125, -5.860595703125, -5.60223388671875, -5.3438720703125, -5.08551025390625, -4.8271484375, -4.56878662109375, -4.3104248046875, -4.05206298828125, -3.793701171875, -3.53533935546875, -3.2769775390625, -3.01861572265625, -2.76025390625, -2.50189208984375, -2.2435302734375, -1.98516845703125, -1.726806640625, -1.46844482421875, -1.2100830078125, -0.95172119140625, -0.693359375, -0.43499755859375, -0.1766357421875, 0.08172607421875, 0.340087890625, 0.59844970703125, 0.8568115234375, 1.11517333984375, 1.37353515625, 1.63189697265625, 1.8902587890625, 2.14862060546875, 2.406982421875, 2.66534423828125, 2.9237060546875, 3.18206787109375, 3.4404296875, 3.69879150390625, 3.9571533203125, 4.21551513671875, 4.473876953125, 4.73223876953125, 4.9906005859375, 5.24896240234375, 5.50732421875, 5.76568603515625, 6.0240478515625, 6.28240966796875, 6.540771484375, 6.79913330078125, 7.0574951171875, 7.31585693359375, 7.57421875]}, "gradients/decoder.transformer.h.20.mlp.c_fc.weight": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 3.0, 3.0, 3.0, 5.0, 4.0, 7.0, 9.0, 7.0, 19.0, 13.0, 23.0, 52.0, 77.0, 117.0, 184.0, 321.0, 730.0, 1806.0, 5567.0, 23808.0, 153242.0, 1539743.0, 2195451.0, 229954.0, 32154.0, 7113.0, 2142.0, 826.0, 373.0, 196.0, 109.0, 78.0, 49.0, 23.0, 25.0, 13.0, 13.0, 7.0, 11.0, 5.0, 3.0, 3.0, 2.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-13.9609375, -13.4639892578125, -12.967041015625, -12.4700927734375, -11.97314453125, -11.4761962890625, -10.979248046875, -10.4822998046875, -9.9853515625, -9.4884033203125, -8.991455078125, -8.4945068359375, -7.99755859375, -7.5006103515625, -7.003662109375, -6.5067138671875, -6.009765625, -5.5128173828125, -5.015869140625, -4.5189208984375, -4.02197265625, -3.5250244140625, -3.028076171875, -2.5311279296875, -2.0341796875, -1.5372314453125, -1.040283203125, -0.5433349609375, -0.04638671875, 0.4505615234375, 0.947509765625, 1.4444580078125, 1.94140625, 2.4383544921875, 2.935302734375, 3.4322509765625, 3.92919921875, 4.4261474609375, 4.923095703125, 5.4200439453125, 5.9169921875, 6.4139404296875, 6.910888671875, 7.4078369140625, 7.90478515625, 8.4017333984375, 8.898681640625, 9.3956298828125, 9.892578125, 10.3895263671875, 10.886474609375, 11.3834228515625, 11.88037109375, 12.3773193359375, 12.874267578125, 13.3712158203125, 13.8681640625, 14.3651123046875, 14.862060546875, 15.3590087890625, 15.85595703125, 16.3529052734375, 16.849853515625, 17.3468017578125, 17.84375]}, "gradients/decoder.transformer.h.20.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 6.0, 38.0, 102.0, 293.0, 304.0, 186.0, 72.0, 15.0, 0.0, 0.0, 0.0, 1.0], "bins": [-226.0171356201172, -221.99790954589844, -217.97866821289062, -213.95944213867188, -209.94020080566406, -205.9209747314453, -201.9017333984375, -197.88250732421875, -193.86326599121094, -189.8440399169922, -185.82479858398438, -181.80557250976562, -177.7863311767578, -173.76710510253906, -169.74786376953125, -165.7286376953125, -161.70941162109375, -157.690185546875, -153.6709442138672, -149.65171813964844, -145.63247680664062, -141.61325073242188, -137.59400939941406, -133.5747833251953, -129.5555419921875, -125.53630828857422, -121.51707458496094, -117.49784088134766, -113.47860717773438, -109.4593734741211, -105.44013977050781, -101.42091369628906, -97.40167999267578, -93.3824462890625, -89.36321258544922, -85.34397888183594, -81.32474517822266, -77.30551147460938, -73.28628540039062, -69.26704406738281, -65.24781799316406, -61.22858428955078, -57.2093505859375, -53.19011688232422, -49.17088317871094, -45.151649475097656, -41.13241958618164, -37.11318588256836, -33.09394836425781, -29.07471466064453, -25.05548095703125, -21.0362491607666, -17.01701545715332, -12.997781753540039, -8.97854995727539, -4.959316253662109, -0.9400844573974609, 3.079148769378662, 7.098381996154785, 11.11761474609375, 15.136848449707031, 19.156082153320312, 23.17531394958496, 27.194547653198242, 31.213781356811523]}, "gradients/decoder.transformer.h.20.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 4.0, 6.0, 4.0, 4.0, 7.0, 7.0, 9.0, 9.0, 10.0, 16.0, 12.0, 14.0, 15.0, 20.0, 23.0, 29.0, 22.0, 29.0, 37.0, 40.0, 39.0, 38.0, 46.0, 37.0, 38.0, 43.0, 25.0, 42.0, 37.0, 41.0, 32.0, 23.0, 34.0, 30.0, 29.0, 19.0, 18.0, 16.0, 13.0, 17.0, 8.0, 18.0, 13.0, 12.0, 8.0, 6.0, 6.0, 3.0, 2.0, 3.0, 2.0, 1.0, 1.0, 2.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-24.757465362548828, -23.924449920654297, -23.091434478759766, -22.258420944213867, -21.425405502319336, -20.592390060424805, -19.759376525878906, -18.926361083984375, -18.093345642089844, -17.260330200195312, -16.42731475830078, -15.594301223754883, -14.761285781860352, -13.92827033996582, -13.095255851745605, -12.26224136352539, -11.42922592163086, -10.596210479736328, -9.763195991516113, -8.930181503295898, -8.097166061401367, -7.264151096343994, -6.431136131286621, -5.598121166229248, -4.765106201171875, -3.932091236114502, -3.099076271057129, -2.266061305999756, -1.4330463409423828, -0.6000313758850098, 0.23298358917236328, 1.0659985542297363, 1.8990116119384766, 2.7320265769958496, 3.5650415420532227, 4.398056507110596, 5.231071472167969, 6.064086437225342, 6.897101402282715, 7.730116367340088, 8.563131332397461, 9.396146774291992, 10.229161262512207, 11.062175750732422, 11.895191192626953, 12.728206634521484, 13.5612211227417, 14.394235610961914, 15.227251052856445, 16.060266494750977, 16.893280029296875, 17.726295471191406, 18.559310913085938, 19.39232635498047, 20.225341796875, 21.0583553314209, 21.89137077331543, 22.72438621520996, 23.55739974975586, 24.39041519165039, 25.223430633544922, 26.056446075439453, 26.889461517333984, 27.722475051879883, 28.555490493774414]}, "gradients/decoder.transformer.h.20.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 5.0, 1.0, 4.0, 7.0, 9.0, 18.0, 14.0, 16.0, 25.0, 17.0, 26.0, 31.0, 29.0, 34.0, 36.0, 42.0, 43.0, 51.0, 38.0, 55.0, 43.0, 67.0, 51.0, 53.0, 30.0, 34.0, 36.0, 35.0, 30.0, 21.0, 26.0, 19.0, 16.0, 13.0, 8.0, 8.0, 10.0, 3.0, 3.0, 2.0, 5.0, 3.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-3.880859375, -3.773223876953125, -3.66558837890625, -3.557952880859375, -3.4503173828125, -3.342681884765625, -3.23504638671875, -3.127410888671875, -3.019775390625, -2.912139892578125, -2.80450439453125, -2.696868896484375, -2.5892333984375, -2.481597900390625, -2.37396240234375, -2.266326904296875, -2.15869140625, -2.051055908203125, -1.94342041015625, -1.835784912109375, -1.7281494140625, -1.620513916015625, -1.51287841796875, -1.405242919921875, -1.297607421875, -1.189971923828125, -1.08233642578125, -0.974700927734375, -0.8670654296875, -0.759429931640625, -0.65179443359375, -0.544158935546875, -0.4365234375, -0.328887939453125, -0.22125244140625, -0.113616943359375, -0.0059814453125, 0.101654052734375, 0.20928955078125, 0.316925048828125, 0.424560546875, 0.532196044921875, 0.63983154296875, 0.747467041015625, 0.8551025390625, 0.962738037109375, 1.07037353515625, 1.178009033203125, 1.28564453125, 1.393280029296875, 1.50091552734375, 1.608551025390625, 1.7161865234375, 1.823822021484375, 1.93145751953125, 2.039093017578125, 2.146728515625, 2.254364013671875, 2.36199951171875, 2.469635009765625, 2.5772705078125, 2.684906005859375, 2.79254150390625, 2.900177001953125, 3.0078125]}, "gradients/decoder.transformer.h.20.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 4.0, 9.0, 7.0, 6.0, 13.0, 20.0, 29.0, 53.0, 62.0, 121.0, 189.0, 328.0, 620.0, 1083.0, 1974.0, 3619.0, 7232.0, 14102.0, 27304.0, 53943.0, 101212.0, 173428.0, 232777.0, 190461.0, 113878.0, 61363.0, 31200.0, 16148.0, 8177.0, 4172.0, 2163.0, 1208.0, 676.0, 387.0, 221.0, 134.0, 79.0, 52.0, 35.0, 23.0, 13.0, 14.0, 9.0, 8.0, 7.0, 4.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0], "bins": [-0.4580078125, -0.44141387939453125, -0.4248199462890625, -0.40822601318359375, -0.391632080078125, -0.37503814697265625, -0.3584442138671875, -0.34185028076171875, -0.32525634765625, -0.30866241455078125, -0.2920684814453125, -0.27547454833984375, -0.258880615234375, -0.24228668212890625, -0.2256927490234375, -0.20909881591796875, -0.1925048828125, -0.17591094970703125, -0.1593170166015625, -0.14272308349609375, -0.126129150390625, -0.10953521728515625, -0.0929412841796875, -0.07634735107421875, -0.05975341796875, -0.04315948486328125, -0.0265655517578125, -0.00997161865234375, 0.006622314453125, 0.02321624755859375, 0.0398101806640625, 0.05640411376953125, 0.072998046875, 0.08959197998046875, 0.1061859130859375, 0.12277984619140625, 0.139373779296875, 0.15596771240234375, 0.1725616455078125, 0.18915557861328125, 0.20574951171875, 0.22234344482421875, 0.2389373779296875, 0.25553131103515625, 0.272125244140625, 0.28871917724609375, 0.3053131103515625, 0.32190704345703125, 0.3385009765625, 0.35509490966796875, 0.3716888427734375, 0.38828277587890625, 0.404876708984375, 0.42147064208984375, 0.4380645751953125, 0.45465850830078125, 0.47125244140625, 0.48784637451171875, 0.5044403076171875, 0.5210342407226562, 0.537628173828125, 0.5542221069335938, 0.5708160400390625, 0.5874099731445312, 0.60400390625]}, "gradients/decoder.transformer.h.20.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 1.0, 0.0, 2.0, 4.0, 7.0, 6.0, 6.0, 7.0, 5.0, 4.0, 14.0, 8.0, 11.0, 19.0, 20.0, 19.0, 21.0, 16.0, 19.0, 25.0, 32.0, 30.0, 35.0, 41.0, 50.0, 34.0, 45.0, 1059.0, 46.0, 37.0, 43.0, 50.0, 30.0, 32.0, 41.0, 30.0, 24.0, 31.0, 22.0, 17.0, 11.0, 16.0, 15.0, 6.0, 8.0, 8.0, 6.0, 10.0, 5.0, 0.0, 1.0, 4.0, 3.0, 2.0, 1.0, 4.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.77734375, -1.7187042236328125, -1.660064697265625, -1.6014251708984375, -1.54278564453125, -1.4841461181640625, -1.425506591796875, -1.3668670654296875, -1.3082275390625, -1.2495880126953125, -1.190948486328125, -1.1323089599609375, -1.07366943359375, -1.0150299072265625, -0.956390380859375, -0.8977508544921875, -0.839111328125, -0.7804718017578125, -0.721832275390625, -0.6631927490234375, -0.60455322265625, -0.5459136962890625, -0.487274169921875, -0.4286346435546875, -0.3699951171875, -0.3113555908203125, -0.252716064453125, -0.1940765380859375, -0.13543701171875, -0.0767974853515625, -0.018157958984375, 0.0404815673828125, 0.09912109375, 0.1577606201171875, 0.216400146484375, 0.2750396728515625, 0.33367919921875, 0.3923187255859375, 0.450958251953125, 0.5095977783203125, 0.5682373046875, 0.6268768310546875, 0.685516357421875, 0.7441558837890625, 0.80279541015625, 0.8614349365234375, 0.920074462890625, 0.9787139892578125, 1.037353515625, 1.0959930419921875, 1.154632568359375, 1.2132720947265625, 1.27191162109375, 1.3305511474609375, 1.389190673828125, 1.4478302001953125, 1.5064697265625, 1.5651092529296875, 1.623748779296875, 1.6823883056640625, 1.74102783203125, 1.7996673583984375, 1.858306884765625, 1.9169464111328125, 1.9755859375]}, "gradients/decoder.transformer.h.20.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 5.0, 5.0, 9.0, 5.0, 15.0, 19.0, 32.0, 28.0, 72.0, 75.0, 144.0, 240.0, 379.0, 589.0, 841.0, 1354.0, 2170.0, 3510.0, 5453.0, 9087.0, 15261.0, 25753.0, 44040.0, 73730.0, 116746.0, 174532.0, 1223734.0, 148816.0, 99586.0, 60863.0, 36350.0, 21071.0, 12501.0, 7509.0, 4585.0, 2980.0, 1848.0, 1155.0, 759.0, 454.0, 310.0, 184.0, 124.0, 88.0, 46.0, 29.0, 26.0, 13.0, 7.0, 7.0, 4.0, 4.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.26708984375, -0.2585601806640625, -0.250030517578125, -0.2415008544921875, -0.23297119140625, -0.2244415283203125, -0.215911865234375, -0.2073822021484375, -0.1988525390625, -0.1903228759765625, -0.181793212890625, -0.1732635498046875, -0.16473388671875, -0.1562042236328125, -0.147674560546875, -0.1391448974609375, -0.130615234375, -0.1220855712890625, -0.113555908203125, -0.1050262451171875, -0.09649658203125, -0.0879669189453125, -0.079437255859375, -0.0709075927734375, -0.0623779296875, -0.0538482666015625, -0.045318603515625, -0.0367889404296875, -0.02825927734375, -0.0197296142578125, -0.011199951171875, -0.0026702880859375, 0.005859375, 0.0143890380859375, 0.022918701171875, 0.0314483642578125, 0.03997802734375, 0.0485076904296875, 0.057037353515625, 0.0655670166015625, 0.0740966796875, 0.0826263427734375, 0.091156005859375, 0.0996856689453125, 0.10821533203125, 0.1167449951171875, 0.125274658203125, 0.1338043212890625, 0.142333984375, 0.1508636474609375, 0.159393310546875, 0.1679229736328125, 0.17645263671875, 0.1849822998046875, 0.193511962890625, 0.2020416259765625, 0.2105712890625, 0.2191009521484375, 0.227630615234375, 0.2361602783203125, 0.24468994140625, 0.2532196044921875, 0.261749267578125, 0.2702789306640625, 0.27880859375]}, "gradients/decoder.transformer.h.20.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 4.0, 6.0, 2.0, 5.0, 5.0, 1.0, 2.0, 8.0, 7.0, 6.0, 9.0, 11.0, 17.0, 16.0, 19.0, 13.0, 20.0, 31.0, 32.0, 26.0, 52.0, 60.0, 79.0, 89.0, 106.0, 69.0, 40.0, 41.0, 34.0, 26.0, 23.0, 29.0, 20.0, 16.0, 18.0, 14.0, 11.0, 11.0, 8.0, 7.0, 6.0, 4.0, 5.0, 1.0, 0.0, 3.0, 1.0, 0.0, 3.0, 1.0, 0.0, 2.0], "bins": [-0.0091400146484375, -0.008888006210327148, -0.008635997772216797, -0.008383989334106445, -0.008131980895996094, -0.007879972457885742, -0.007627964019775391, -0.007375955581665039, -0.0071239471435546875, -0.006871938705444336, -0.006619930267333984, -0.006367921829223633, -0.006115913391113281, -0.00586390495300293, -0.005611896514892578, -0.0053598880767822266, -0.005107879638671875, -0.0048558712005615234, -0.004603862762451172, -0.00435185432434082, -0.004099845886230469, -0.003847837448120117, -0.0035958290100097656, -0.003343820571899414, -0.0030918121337890625, -0.002839803695678711, -0.0025877952575683594, -0.002335786819458008, -0.0020837783813476562, -0.0018317699432373047, -0.0015797615051269531, -0.0013277530670166016, -0.00107574462890625, -0.0008237361907958984, -0.0005717277526855469, -0.0003197193145751953, -6.771087646484375e-05, 0.0001842975616455078, 0.0004363059997558594, 0.0006883144378662109, 0.0009403228759765625, 0.001192331314086914, 0.0014443397521972656, 0.0016963481903076172, 0.0019483566284179688, 0.0022003650665283203, 0.002452373504638672, 0.0027043819427490234, 0.002956390380859375, 0.0032083988189697266, 0.003460407257080078, 0.0037124156951904297, 0.003964424133300781, 0.004216432571411133, 0.004468441009521484, 0.004720449447631836, 0.0049724578857421875, 0.005224466323852539, 0.005476474761962891, 0.005728483200073242, 0.005980491638183594, 0.006232500076293945, 0.006484508514404297, 0.0067365169525146484, 0.006988525390625]}, "gradients/decoder.transformer.h.20.crossattention.q_attn.weight": {"_type": "histogram", "values": [3.0, 0.0, 1.0, 2.0, 2.0, 2.0, 1.0, 5.0, 2.0, 8.0, 14.0, 14.0, 22.0, 16.0, 15.0, 31.0, 29.0, 41.0, 67.0, 68.0, 85.0, 98.0, 162.0, 184.0, 285.0, 628.0, 8702.0, 970005.0, 65461.0, 1295.0, 375.0, 229.0, 155.0, 109.0, 98.0, 80.0, 53.0, 48.0, 39.0, 39.0, 19.0, 19.0, 7.0, 9.0, 9.0, 7.0, 10.0, 5.0, 6.0, 2.0, 3.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.11175537109375, -0.10772228240966797, -0.10368919372558594, -0.0996561050415039, -0.09562301635742188, -0.09158992767333984, -0.08755683898925781, -0.08352375030517578, -0.07949066162109375, -0.07545757293701172, -0.07142448425292969, -0.06739139556884766, -0.06335830688476562, -0.059325218200683594, -0.05529212951660156, -0.05125904083251953, -0.0472259521484375, -0.04319286346435547, -0.03915977478027344, -0.035126686096191406, -0.031093597412109375, -0.027060508728027344, -0.023027420043945312, -0.01899433135986328, -0.01496124267578125, -0.010928153991699219, -0.0068950653076171875, -0.0028619766235351562, 0.001171112060546875, 0.005204200744628906, 0.009237289428710938, 0.013270378112792969, 0.017303466796875, 0.02133655548095703, 0.025369644165039062, 0.029402732849121094, 0.033435821533203125, 0.037468910217285156, 0.04150199890136719, 0.04553508758544922, 0.04956817626953125, 0.05360126495361328, 0.05763435363769531, 0.061667442321777344, 0.06570053100585938, 0.0697336196899414, 0.07376670837402344, 0.07779979705810547, 0.0818328857421875, 0.08586597442626953, 0.08989906311035156, 0.0939321517944336, 0.09796524047851562, 0.10199832916259766, 0.10603141784667969, 0.11006450653076172, 0.11409759521484375, 0.11813068389892578, 0.12216377258300781, 0.12619686126708984, 0.13022994995117188, 0.1342630386352539, 0.13829612731933594, 0.14232921600341797, 0.1463623046875]}, "gradients/decoder.transformer.h.20.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 34.0, 809.0, 169.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.1530391275882721, -0.14990441501140594, -0.1467697024345398, -0.14363498985767365, -0.1405002772808075, -0.13736556470394135, -0.1342308521270752, -0.13109613955020905, -0.1279614269733429, -0.12482671439647675, -0.1216920018196106, -0.11855728924274445, -0.1154225766658783, -0.11228786408901215, -0.109153151512146, -0.10601843893527985, -0.1028837189078331, -0.09974900633096695, -0.0966142937541008, -0.09347958117723465, -0.0903448686003685, -0.08721015602350235, -0.0840754359960556, -0.08094072341918945, -0.0778060108423233, -0.07467129826545715, -0.071536585688591, -0.06840187311172485, -0.0652671605348587, -0.062132447957992554, -0.058997731655836105, -0.055863019078969955, -0.052728310227394104, -0.049593597650527954, -0.046458885073661804, -0.043324172496795654, -0.040189459919929504, -0.037054747343063354, -0.033920031040906906, -0.030785318464040756, -0.027650605887174606, -0.024515893310308456, -0.021381180733442307, -0.018246466293931007, -0.015111753717064857, -0.011977041140198708, -0.008842326700687408, -0.0057076141238212585, -0.0025729015469551086, 0.0005618114955723286, 0.0036965245380997658, 0.00683123804628849, 0.00996595062315464, 0.01310066320002079, 0.01623537763953209, 0.01937009021639824, 0.02250480279326439, 0.02563951537013054, 0.02877422794699669, 0.03190894424915314, 0.03504365682601929, 0.03817836940288544, 0.04131308197975159, 0.04444779455661774, 0.04758250713348389]}, "gradients/decoder.transformer.h.20.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 1.0, 6.0, 1.0, 9.0, 7.0, 15.0, 8.0, 20.0, 29.0, 22.0, 29.0, 32.0, 48.0, 50.0, 56.0, 45.0, 52.0, 43.0, 45.0, 56.0, 48.0, 58.0, 41.0, 51.0, 41.0, 41.0, 24.0, 29.0, 24.0, 17.0, 12.0, 21.0, 11.0, 4.0, 6.0, 4.0, 3.0, 4.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.013088226318359375, -0.012706208974123001, -0.012324191629886627, -0.011942174285650253, -0.01156015694141388, -0.011178139597177505, -0.010796122252941132, -0.010414104908704758, -0.010032087564468384, -0.00965007022023201, -0.009268052875995636, -0.008886035531759262, -0.008504018187522888, -0.008122000843286514, -0.00773998349905014, -0.0073579661548137665, -0.006975948810577393, -0.006593931466341019, -0.006211914122104645, -0.005829896777868271, -0.005447879433631897, -0.005065862089395523, -0.004683844745159149, -0.004301827400922775, -0.003919810056686401, -0.0035377927124500275, -0.0031557753682136536, -0.0027737580239772797, -0.0023917406797409058, -0.002009723335504532, -0.001627705991268158, -0.001245688647031784, -0.0008636713027954102, -0.00048165395855903625, -9.963661432266235e-05, 0.00028238072991371155, 0.0006643980741500854, 0.0010464154183864594, 0.0014284327626228333, 0.0018104501068592072, 0.002192467451095581, 0.002574484795331955, 0.002956502139568329, 0.0033385194838047028, 0.0037205368280410767, 0.0041025541722774506, 0.0044845715165138245, 0.004866588860750198, 0.005248606204986572, 0.005630623549222946, 0.00601264089345932, 0.006394658237695694, 0.006776675581932068, 0.007158692926168442, 0.007540710270404816, 0.00792272761464119, 0.008304744958877563, 0.008686762303113937, 0.009068779647350311, 0.009450796991586685, 0.009832814335823059, 0.010214831680059433, 0.010596849024295807, 0.01097886636853218, 0.011360883712768555]}, "gradients/decoder.transformer.h.20.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 5.0, 1.0, 4.0, 7.0, 9.0, 18.0, 14.0, 16.0, 25.0, 17.0, 26.0, 31.0, 29.0, 34.0, 36.0, 42.0, 43.0, 51.0, 38.0, 55.0, 43.0, 67.0, 51.0, 53.0, 30.0, 34.0, 36.0, 35.0, 31.0, 20.0, 26.0, 19.0, 16.0, 13.0, 8.0, 8.0, 10.0, 3.0, 3.0, 2.0, 5.0, 3.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-3.880859375, -3.773223876953125, -3.66558837890625, -3.557952880859375, -3.4503173828125, -3.342681884765625, -3.23504638671875, -3.127410888671875, -3.019775390625, -2.912139892578125, -2.80450439453125, -2.696868896484375, -2.5892333984375, -2.481597900390625, -2.37396240234375, -2.266326904296875, -2.15869140625, -2.051055908203125, -1.94342041015625, -1.835784912109375, -1.7281494140625, -1.620513916015625, -1.51287841796875, -1.405242919921875, -1.297607421875, -1.189971923828125, -1.08233642578125, -0.974700927734375, -0.8670654296875, -0.759429931640625, -0.65179443359375, -0.544158935546875, -0.4365234375, -0.328887939453125, -0.22125244140625, -0.113616943359375, -0.0059814453125, 0.101654052734375, 0.20928955078125, 0.316925048828125, 0.424560546875, 0.532196044921875, 0.63983154296875, 0.747467041015625, 0.8551025390625, 0.962738037109375, 1.07037353515625, 1.178009033203125, 1.28564453125, 1.393280029296875, 1.50091552734375, 1.608551025390625, 1.7161865234375, 1.823822021484375, 1.93145751953125, 2.039093017578125, 2.146728515625, 2.254364013671875, 2.36199951171875, 2.469635009765625, 2.5772705078125, 2.684906005859375, 2.79254150390625, 2.900177001953125, 3.0078125]}, "gradients/decoder.transformer.h.20.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 3.0, 2.0, 1.0, 4.0, 5.0, 8.0, 18.0, 38.0, 37.0, 42.0, 67.0, 96.0, 173.0, 204.0, 352.0, 635.0, 1100.0, 2250.0, 5073.0, 12518.0, 33915.0, 118664.0, 399620.0, 334746.0, 92111.0, 27570.0, 10341.0, 4366.0, 2011.0, 1058.0, 576.0, 304.0, 213.0, 151.0, 104.0, 62.0, 34.0, 30.0, 25.0, 15.0, 6.0, 9.0, 4.0, 4.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-4.1171875, -3.995758056640625, -3.87432861328125, -3.752899169921875, -3.6314697265625, -3.510040283203125, -3.38861083984375, -3.267181396484375, -3.145751953125, -3.024322509765625, -2.90289306640625, -2.781463623046875, -2.6600341796875, -2.538604736328125, -2.41717529296875, -2.295745849609375, -2.17431640625, -2.052886962890625, -1.93145751953125, -1.810028076171875, -1.6885986328125, -1.567169189453125, -1.44573974609375, -1.324310302734375, -1.202880859375, -1.081451416015625, -0.96002197265625, -0.838592529296875, -0.7171630859375, -0.595733642578125, -0.47430419921875, -0.352874755859375, -0.2314453125, -0.110015869140625, 0.01141357421875, 0.132843017578125, 0.2542724609375, 0.375701904296875, 0.49713134765625, 0.618560791015625, 0.739990234375, 0.861419677734375, 0.98284912109375, 1.104278564453125, 1.2257080078125, 1.347137451171875, 1.46856689453125, 1.589996337890625, 1.71142578125, 1.832855224609375, 1.95428466796875, 2.075714111328125, 2.1971435546875, 2.318572998046875, 2.44000244140625, 2.561431884765625, 2.682861328125, 2.804290771484375, 2.92572021484375, 3.047149658203125, 3.1685791015625, 3.290008544921875, 3.41143798828125, 3.532867431640625, 3.654296875]}, "gradients/decoder.transformer.h.20.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 4.0, 2.0, 3.0, 7.0, 5.0, 6.0, 3.0, 15.0, 7.0, 17.0, 16.0, 19.0, 21.0, 25.0, 33.0, 34.0, 41.0, 45.0, 46.0, 37.0, 64.0, 140.0, 427.0, 1454.0, 132.0, 51.0, 50.0, 48.0, 39.0, 40.0, 34.0, 34.0, 26.0, 17.0, 23.0, 23.0, 11.0, 8.0, 10.0, 5.0, 13.0, 3.0, 5.0, 6.0, 5.0, 4.0, 5.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-9.828125, -9.5006103515625, -9.173095703125, -8.8455810546875, -8.51806640625, -8.1905517578125, -7.863037109375, -7.5355224609375, -7.2080078125, -6.8804931640625, -6.552978515625, -6.2254638671875, -5.89794921875, -5.5704345703125, -5.242919921875, -4.9154052734375, -4.587890625, -4.2603759765625, -3.932861328125, -3.6053466796875, -3.27783203125, -2.9503173828125, -2.622802734375, -2.2952880859375, -1.9677734375, -1.6402587890625, -1.312744140625, -0.9852294921875, -0.65771484375, -0.3302001953125, -0.002685546875, 0.3248291015625, 0.65234375, 0.9798583984375, 1.307373046875, 1.6348876953125, 1.96240234375, 2.2899169921875, 2.617431640625, 2.9449462890625, 3.2724609375, 3.5999755859375, 3.927490234375, 4.2550048828125, 4.58251953125, 4.9100341796875, 5.237548828125, 5.5650634765625, 5.892578125, 6.2200927734375, 6.547607421875, 6.8751220703125, 7.20263671875, 7.5301513671875, 7.857666015625, 8.1851806640625, 8.5126953125, 8.8402099609375, 9.167724609375, 9.4952392578125, 9.82275390625, 10.1502685546875, 10.477783203125, 10.8052978515625, 11.1328125]}, "gradients/decoder.transformer.h.20.attn.c_attn.weight": {"_type": "histogram", "values": [3.0, 1.0, 2.0, 2.0, 2.0, 6.0, 7.0, 2.0, 5.0, 4.0, 6.0, 11.0, 7.0, 6.0, 11.0, 13.0, 21.0, 26.0, 32.0, 23.0, 39.0, 62.0, 76.0, 137.0, 207.0, 337.0, 811.0, 2456.0, 14411.0, 259293.0, 2797555.0, 60917.0, 6415.0, 1446.0, 560.0, 258.0, 151.0, 106.0, 53.0, 49.0, 37.0, 23.0, 31.0, 20.0, 16.0, 12.0, 14.0, 7.0, 6.0, 4.0, 4.0, 9.0, 3.0, 3.0, 3.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 2.0], "bins": [-13.6484375, -13.197998046875, -12.74755859375, -12.297119140625, -11.8466796875, -11.396240234375, -10.94580078125, -10.495361328125, -10.044921875, -9.594482421875, -9.14404296875, -8.693603515625, -8.2431640625, -7.792724609375, -7.34228515625, -6.891845703125, -6.44140625, -5.990966796875, -5.54052734375, -5.090087890625, -4.6396484375, -4.189208984375, -3.73876953125, -3.288330078125, -2.837890625, -2.387451171875, -1.93701171875, -1.486572265625, -1.0361328125, -0.585693359375, -0.13525390625, 0.315185546875, 0.765625, 1.216064453125, 1.66650390625, 2.116943359375, 2.5673828125, 3.017822265625, 3.46826171875, 3.918701171875, 4.369140625, 4.819580078125, 5.27001953125, 5.720458984375, 6.1708984375, 6.621337890625, 7.07177734375, 7.522216796875, 7.97265625, 8.423095703125, 8.87353515625, 9.323974609375, 9.7744140625, 10.224853515625, 10.67529296875, 11.125732421875, 11.576171875, 12.026611328125, 12.47705078125, 12.927490234375, 13.3779296875, 13.828369140625, 14.27880859375, 14.729248046875, 15.1796875]}, "gradients/decoder.transformer.h.20.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 10.0, 92.0, 436.0, 393.0, 79.0, 6.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-81.15377807617188, -78.89427185058594, -76.63475799560547, -74.37525177001953, -72.11573791503906, -69.85623168945312, -67.59671783447266, -65.33721160888672, -63.07769775390625, -60.81818771362305, -58.558677673339844, -56.29916763305664, -54.03965759277344, -51.780147552490234, -49.52063751220703, -47.261131286621094, -45.00162124633789, -42.74211120605469, -40.482601165771484, -38.22309112548828, -35.96358108520508, -33.704071044921875, -31.444562911987305, -29.1850528717041, -26.9255428314209, -24.666032791137695, -22.406522750854492, -20.147014617919922, -17.88750457763672, -15.6279935836792, -13.368484497070312, -11.10897445678711, -8.849468231201172, -6.589958190917969, -4.330448627471924, -2.070939064025879, 0.18857097625732422, 2.4480810165405273, 4.707590103149414, 6.967100143432617, 9.22661018371582, 11.486120223999023, 13.745630264282227, 16.005138397216797, 18.2646484375, 20.524158477783203, 22.783668518066406, 25.04317855834961, 27.302688598632812, 29.562198638916016, 31.82170867919922, 34.08121871948242, 36.340728759765625, 38.60023880004883, 40.85974884033203, 43.11925506591797, 45.37876892089844, 47.63827896118164, 49.897789001464844, 52.15729904174805, 54.41680908203125, 56.67631912231445, 58.935829162597656, 61.195335388183594, 63.4548454284668]}, "gradients/decoder.transformer.h.20.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 6.0, 1.0, 1.0, 5.0, 5.0, 2.0, 4.0, 10.0, 8.0, 11.0, 12.0, 20.0, 20.0, 20.0, 20.0, 22.0, 26.0, 25.0, 27.0, 41.0, 32.0, 26.0, 41.0, 42.0, 33.0, 44.0, 47.0, 47.0, 53.0, 40.0, 27.0, 38.0, 41.0, 25.0, 26.0, 28.0, 19.0, 21.0, 17.0, 10.0, 16.0, 14.0, 8.0, 5.0, 7.0, 8.0, 5.0, 4.0, 4.0, 2.0, 1.0, 1.0, 1.0, 1.0], "bins": [-39.18626403808594, -38.09932327270508, -37.01238250732422, -35.92544174194336, -34.8385009765625, -33.75156021118164, -32.66461944580078, -31.577678680419922, -30.490737915039062, -29.403797149658203, -28.316856384277344, -27.229915618896484, -26.142974853515625, -25.056034088134766, -23.969093322753906, -22.882152557373047, -21.795211791992188, -20.708271026611328, -19.62133026123047, -18.53438949584961, -17.44744873046875, -16.36050796508789, -15.273567199707031, -14.186626434326172, -13.099685668945312, -12.012744903564453, -10.925804138183594, -9.838863372802734, -8.751922607421875, -7.664981842041016, -6.578041076660156, -5.491100311279297, -4.4041595458984375, -3.317218780517578, -2.2302780151367188, -1.1433372497558594, -0.056396484375, 1.0305442810058594, 2.1174850463867188, 3.204425811767578, 4.2913665771484375, 5.378307342529297, 6.465248107910156, 7.552188873291016, 8.639129638671875, 9.726070404052734, 10.813011169433594, 11.899951934814453, 12.986892700195312, 14.073833465576172, 15.160774230957031, 16.24771499633789, 17.33465576171875, 18.42159652709961, 19.50853729248047, 20.595478057861328, 21.682418823242188, 22.769359588623047, 23.856300354003906, 24.943241119384766, 26.030181884765625, 27.117122650146484, 28.204063415527344, 29.291004180908203, 30.377944946289062]}, "gradients/decoder.transformer.h.19.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 3.0, 1.0, 5.0, 3.0, 7.0, 8.0, 18.0, 20.0, 15.0, 23.0, 27.0, 18.0, 34.0, 25.0, 36.0, 34.0, 37.0, 43.0, 40.0, 51.0, 41.0, 54.0, 55.0, 45.0, 50.0, 47.0, 33.0, 30.0, 34.0, 25.0, 24.0, 24.0, 26.0, 13.0, 17.0, 11.0, 13.0, 5.0, 5.0, 5.0, 2.0, 5.0, 4.0, 2.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0], "bins": [-3.80859375, -3.700408935546875, -3.59222412109375, -3.484039306640625, -3.3758544921875, -3.267669677734375, -3.15948486328125, -3.051300048828125, -2.943115234375, -2.834930419921875, -2.72674560546875, -2.618560791015625, -2.5103759765625, -2.402191162109375, -2.29400634765625, -2.185821533203125, -2.07763671875, -1.969451904296875, -1.86126708984375, -1.753082275390625, -1.6448974609375, -1.536712646484375, -1.42852783203125, -1.320343017578125, -1.212158203125, -1.103973388671875, -0.99578857421875, -0.887603759765625, -0.7794189453125, -0.671234130859375, -0.56304931640625, -0.454864501953125, -0.3466796875, -0.238494873046875, -0.13031005859375, -0.022125244140625, 0.0860595703125, 0.194244384765625, 0.30242919921875, 0.410614013671875, 0.518798828125, 0.626983642578125, 0.73516845703125, 0.843353271484375, 0.9515380859375, 1.059722900390625, 1.16790771484375, 1.276092529296875, 1.38427734375, 1.492462158203125, 1.60064697265625, 1.708831787109375, 1.8170166015625, 1.925201416015625, 2.03338623046875, 2.141571044921875, 2.249755859375, 2.357940673828125, 2.46612548828125, 2.574310302734375, 2.6824951171875, 2.790679931640625, 2.89886474609375, 3.007049560546875, 3.115234375]}, "gradients/decoder.transformer.h.19.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 3.0, 4.0, 3.0, 7.0, 4.0, 12.0, 11.0, 13.0, 24.0, 29.0, 52.0, 67.0, 113.0, 178.0, 342.0, 766.0, 1885.0, 5375.0, 18127.0, 81319.0, 568141.0, 2542198.0, 829272.0, 112520.0, 23405.0, 6571.0, 2168.0, 809.0, 390.0, 181.0, 102.0, 57.0, 38.0, 27.0, 18.0, 24.0, 12.0, 5.0, 7.0, 5.0, 3.0, 1.0, 5.0, 0.0, 3.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-8.546875, -8.29241943359375, -8.0379638671875, -7.78350830078125, -7.529052734375, -7.27459716796875, -7.0201416015625, -6.76568603515625, -6.51123046875, -6.25677490234375, -6.0023193359375, -5.74786376953125, -5.493408203125, -5.23895263671875, -4.9844970703125, -4.73004150390625, -4.4755859375, -4.22113037109375, -3.9666748046875, -3.71221923828125, -3.457763671875, -3.20330810546875, -2.9488525390625, -2.69439697265625, -2.43994140625, -2.18548583984375, -1.9310302734375, -1.67657470703125, -1.422119140625, -1.16766357421875, -0.9132080078125, -0.65875244140625, -0.404296875, -0.14984130859375, 0.1046142578125, 0.35906982421875, 0.613525390625, 0.86798095703125, 1.1224365234375, 1.37689208984375, 1.63134765625, 1.88580322265625, 2.1402587890625, 2.39471435546875, 2.649169921875, 2.90362548828125, 3.1580810546875, 3.41253662109375, 3.6669921875, 3.92144775390625, 4.1759033203125, 4.43035888671875, 4.684814453125, 4.93927001953125, 5.1937255859375, 5.44818115234375, 5.70263671875, 5.95709228515625, 6.2115478515625, 6.46600341796875, 6.720458984375, 6.97491455078125, 7.2293701171875, 7.48382568359375, 7.73828125]}, "gradients/decoder.transformer.h.19.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 3.0, 6.0, 2.0, 1.0, 6.0, 5.0, 6.0, 13.0, 19.0, 23.0, 19.0, 33.0, 42.0, 52.0, 61.0, 97.0, 137.0, 163.0, 248.0, 342.0, 453.0, 488.0, 448.0, 393.0, 289.0, 224.0, 127.0, 102.0, 76.0, 46.0, 42.0, 32.0, 22.0, 19.0, 11.0, 8.0, 3.0, 8.0, 4.0, 7.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0], "bins": [-9.421875, -9.1494140625, -8.876953125, -8.6044921875, -8.33203125, -8.0595703125, -7.787109375, -7.5146484375, -7.2421875, -6.9697265625, -6.697265625, -6.4248046875, -6.15234375, -5.8798828125, -5.607421875, -5.3349609375, -5.0625, -4.7900390625, -4.517578125, -4.2451171875, -3.97265625, -3.7001953125, -3.427734375, -3.1552734375, -2.8828125, -2.6103515625, -2.337890625, -2.0654296875, -1.79296875, -1.5205078125, -1.248046875, -0.9755859375, -0.703125, -0.4306640625, -0.158203125, 0.1142578125, 0.38671875, 0.6591796875, 0.931640625, 1.2041015625, 1.4765625, 1.7490234375, 2.021484375, 2.2939453125, 2.56640625, 2.8388671875, 3.111328125, 3.3837890625, 3.65625, 3.9287109375, 4.201171875, 4.4736328125, 4.74609375, 5.0185546875, 5.291015625, 5.5634765625, 5.8359375, 6.1083984375, 6.380859375, 6.6533203125, 6.92578125, 7.1982421875, 7.470703125, 7.7431640625, 8.015625]}, "gradients/decoder.transformer.h.19.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 1.0, 2.0, 1.0, 0.0, 4.0, 0.0, 0.0, 4.0, 9.0, 11.0, 16.0, 14.0, 29.0, 60.0, 91.0, 111.0, 191.0, 344.0, 584.0, 1128.0, 2362.0, 5261.0, 14202.0, 46823.0, 211341.0, 1209604.0, 2137251.0, 440988.0, 86425.0, 22942.0, 7789.0, 3263.0, 1562.0, 813.0, 428.0, 236.0, 136.0, 107.0, 59.0, 25.0, 26.0, 17.0, 12.0, 7.0, 8.0, 2.0, 5.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-12.171875, -11.805419921875, -11.43896484375, -11.072509765625, -10.7060546875, -10.339599609375, -9.97314453125, -9.606689453125, -9.240234375, -8.873779296875, -8.50732421875, -8.140869140625, -7.7744140625, -7.407958984375, -7.04150390625, -6.675048828125, -6.30859375, -5.942138671875, -5.57568359375, -5.209228515625, -4.8427734375, -4.476318359375, -4.10986328125, -3.743408203125, -3.376953125, -3.010498046875, -2.64404296875, -2.277587890625, -1.9111328125, -1.544677734375, -1.17822265625, -0.811767578125, -0.4453125, -0.078857421875, 0.28759765625, 0.654052734375, 1.0205078125, 1.386962890625, 1.75341796875, 2.119873046875, 2.486328125, 2.852783203125, 3.21923828125, 3.585693359375, 3.9521484375, 4.318603515625, 4.68505859375, 5.051513671875, 5.41796875, 5.784423828125, 6.15087890625, 6.517333984375, 6.8837890625, 7.250244140625, 7.61669921875, 7.983154296875, 8.349609375, 8.716064453125, 9.08251953125, 9.448974609375, 9.8154296875, 10.181884765625, 10.54833984375, 10.914794921875, 11.28125]}, "gradients/decoder.transformer.h.19.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 2.0, 3.0, 19.0, 64.0, 138.0, 237.0, 266.0, 175.0, 73.0, 22.0, 13.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-172.69281005859375, -169.1873321533203, -165.68185424804688, -162.1763916015625, -158.67091369628906, -155.16543579101562, -151.6599578857422, -148.15447998046875, -144.6490020751953, -141.14352416992188, -137.63804626464844, -134.132568359375, -130.62710571289062, -127.12162780761719, -123.61614990234375, -120.11067199707031, -116.6052017211914, -113.09972381591797, -109.59425354003906, -106.08877563476562, -102.58329772949219, -99.07781982421875, -95.57234954833984, -92.0668716430664, -88.5614013671875, -85.05592346191406, -81.55045318603516, -78.04497528076172, -74.53949737548828, -71.03402709960938, -67.52854919433594, -64.0230712890625, -60.51759719848633, -57.012123107910156, -53.50664520263672, -50.00117111206055, -46.495697021484375, -42.99021911621094, -39.484745025634766, -35.979270935058594, -32.473793029785156, -28.96831703186035, -25.462841033935547, -21.957366943359375, -18.45189094543457, -14.946414947509766, -11.440940856933594, -7.935464859008789, -4.429988861083984, -0.9245133399963379, 2.5809621810913086, 6.086437225341797, 9.591913223266602, 13.097389221191406, 16.602863311767578, 20.108339309692383, 23.613815307617188, 27.119291305541992, 30.624767303466797, 34.13024139404297, 37.635719299316406, 41.14119338989258, 44.64666748046875, 48.15214538574219, 51.65761947631836]}, "gradients/decoder.transformer.h.19.ln_2.bias": {"_type": "histogram", "values": [1.0, 3.0, 0.0, 0.0, 2.0, 1.0, 5.0, 3.0, 4.0, 1.0, 8.0, 3.0, 4.0, 12.0, 11.0, 10.0, 19.0, 19.0, 16.0, 19.0, 27.0, 14.0, 22.0, 23.0, 28.0, 21.0, 25.0, 30.0, 25.0, 30.0, 38.0, 40.0, 40.0, 40.0, 33.0, 46.0, 29.0, 42.0, 26.0, 34.0, 20.0, 29.0, 31.0, 19.0, 23.0, 23.0, 18.0, 16.0, 12.0, 6.0, 6.0, 7.0, 10.0, 12.0, 8.0, 8.0, 5.0, 3.0, 7.0, 2.0, 2.0, 0.0, 1.0, 2.0], "bins": [-26.002193450927734, -25.206300735473633, -24.41040802001953, -23.61451530456543, -22.818622589111328, -22.022727966308594, -21.226837158203125, -20.43094253540039, -19.63504981994629, -18.839157104492188, -18.043264389038086, -17.247371673583984, -16.451478958129883, -15.655585289001465, -14.859692573547363, -14.063798904418945, -13.26790714263916, -12.472014427185059, -11.676121711730957, -10.880228042602539, -10.084335327148438, -9.288442611694336, -8.492549896240234, -7.696656703948975, -6.900763988494873, -6.1048712730407715, -5.308978080749512, -4.51308536529541, -3.7171924114227295, -2.921299457550049, -2.1254067420959473, -1.3295135498046875, -0.5336208343505859, 0.26227205991744995, 1.0581649541854858, 1.854057788848877, 2.6499507427215576, 3.4458436965942383, 4.24173641204834, 5.0376296043396, 5.833522319793701, 6.629415035247803, 7.4253082275390625, 8.221200942993164, 9.017093658447266, 9.812986373901367, 10.608879089355469, 11.404772758483887, 12.200665473937988, 12.99655818939209, 13.792450904846191, 14.58834457397461, 15.384237289428711, 16.180130004882812, 16.976022720336914, 17.771915435791016, 18.567808151245117, 19.36370086669922, 20.15959358215332, 20.955486297607422, 21.751379013061523, 22.547271728515625, 23.34316635131836, 24.13905906677246, 24.934951782226562]}, "gradients/decoder.transformer.h.19.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 4.0, 0.0, 8.0, 10.0, 8.0, 9.0, 12.0, 18.0, 22.0, 26.0, 20.0, 26.0, 34.0, 29.0, 38.0, 40.0, 39.0, 43.0, 48.0, 48.0, 49.0, 49.0, 52.0, 41.0, 58.0, 27.0, 27.0, 40.0, 30.0, 24.0, 25.0, 23.0, 19.0, 15.0, 10.0, 14.0, 10.0, 5.0, 5.0, 6.0, 3.0, 3.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.87109375, -3.7589111328125, -3.646728515625, -3.5345458984375, -3.42236328125, -3.3101806640625, -3.197998046875, -3.0858154296875, -2.9736328125, -2.8614501953125, -2.749267578125, -2.6370849609375, -2.52490234375, -2.4127197265625, -2.300537109375, -2.1883544921875, -2.076171875, -1.9639892578125, -1.851806640625, -1.7396240234375, -1.62744140625, -1.5152587890625, -1.403076171875, -1.2908935546875, -1.1787109375, -1.0665283203125, -0.954345703125, -0.8421630859375, -0.72998046875, -0.6177978515625, -0.505615234375, -0.3934326171875, -0.28125, -0.1690673828125, -0.056884765625, 0.0552978515625, 0.16748046875, 0.2796630859375, 0.391845703125, 0.5040283203125, 0.6162109375, 0.7283935546875, 0.840576171875, 0.9527587890625, 1.06494140625, 1.1771240234375, 1.289306640625, 1.4014892578125, 1.513671875, 1.6258544921875, 1.738037109375, 1.8502197265625, 1.96240234375, 2.0745849609375, 2.186767578125, 2.2989501953125, 2.4111328125, 2.5233154296875, 2.635498046875, 2.7476806640625, 2.85986328125, 2.9720458984375, 3.084228515625, 3.1964111328125, 3.30859375]}, "gradients/decoder.transformer.h.19.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 3.0, 0.0, 3.0, 2.0, 1.0, 6.0, 15.0, 11.0, 19.0, 32.0, 42.0, 64.0, 115.0, 177.0, 282.0, 470.0, 737.0, 1375.0, 2259.0, 3965.0, 7050.0, 12266.0, 21702.0, 37622.0, 65142.0, 107675.0, 158843.0, 192413.0, 163119.0, 111514.0, 68401.0, 40253.0, 22793.0, 12801.0, 7332.0, 4176.0, 2432.0, 1366.0, 812.0, 488.0, 304.0, 184.0, 110.0, 63.0, 40.0, 33.0, 15.0, 10.0, 11.0, 5.0, 5.0, 5.0, 0.0, 2.0, 3.0, 3.0, 2.0], "bins": [-0.5185546875, -0.5035438537597656, -0.48853302001953125, -0.4735221862792969, -0.4585113525390625, -0.4435005187988281, -0.42848968505859375, -0.4134788513183594, -0.398468017578125, -0.3834571838378906, -0.36844635009765625, -0.3534355163574219, -0.3384246826171875, -0.3234138488769531, -0.30840301513671875, -0.2933921813964844, -0.27838134765625, -0.2633705139160156, -0.24835968017578125, -0.23334884643554688, -0.2183380126953125, -0.20332717895507812, -0.18831634521484375, -0.17330551147460938, -0.158294677734375, -0.14328384399414062, -0.12827301025390625, -0.11326217651367188, -0.0982513427734375, -0.08324050903320312, -0.06822967529296875, -0.053218841552734375, -0.0382080078125, -0.023197174072265625, -0.00818634033203125, 0.006824493408203125, 0.0218353271484375, 0.036846160888671875, 0.05185699462890625, 0.06686782836914062, 0.081878662109375, 0.09688949584960938, 0.11190032958984375, 0.12691116333007812, 0.1419219970703125, 0.15693283081054688, 0.17194366455078125, 0.18695449829101562, 0.20196533203125, 0.21697616577148438, 0.23198699951171875, 0.24699783325195312, 0.2620086669921875, 0.2770195007324219, 0.29203033447265625, 0.3070411682128906, 0.322052001953125, 0.3370628356933594, 0.35207366943359375, 0.3670845031738281, 0.3820953369140625, 0.3971061706542969, 0.41211700439453125, 0.4271278381347656, 0.442138671875]}, "gradients/decoder.transformer.h.19.crossattention.c_attn.bias": {"_type": "histogram", "values": [3.0, 0.0, 1.0, 1.0, 3.0, 1.0, 0.0, 1.0, 3.0, 7.0, 1.0, 5.0, 10.0, 5.0, 14.0, 22.0, 8.0, 26.0, 21.0, 25.0, 30.0, 17.0, 26.0, 26.0, 24.0, 41.0, 34.0, 39.0, 46.0, 42.0, 1047.0, 41.0, 32.0, 35.0, 35.0, 53.0, 34.0, 31.0, 33.0, 35.0, 28.0, 18.0, 20.0, 24.0, 19.0, 15.0, 14.0, 8.0, 8.0, 8.0, 9.0, 3.0, 4.0, 5.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 3.0, 0.0, 0.0, 1.0], "bins": [-2.015625, -1.950164794921875, -1.88470458984375, -1.819244384765625, -1.7537841796875, -1.688323974609375, -1.62286376953125, -1.557403564453125, -1.491943359375, -1.426483154296875, -1.36102294921875, -1.295562744140625, -1.2301025390625, -1.164642333984375, -1.09918212890625, -1.033721923828125, -0.96826171875, -0.902801513671875, -0.83734130859375, -0.771881103515625, -0.7064208984375, -0.640960693359375, -0.57550048828125, -0.510040283203125, -0.444580078125, -0.379119873046875, -0.31365966796875, -0.248199462890625, -0.1827392578125, -0.117279052734375, -0.05181884765625, 0.013641357421875, 0.0791015625, 0.144561767578125, 0.21002197265625, 0.275482177734375, 0.3409423828125, 0.406402587890625, 0.47186279296875, 0.537322998046875, 0.602783203125, 0.668243408203125, 0.73370361328125, 0.799163818359375, 0.8646240234375, 0.930084228515625, 0.99554443359375, 1.061004638671875, 1.12646484375, 1.191925048828125, 1.25738525390625, 1.322845458984375, 1.3883056640625, 1.453765869140625, 1.51922607421875, 1.584686279296875, 1.650146484375, 1.715606689453125, 1.78106689453125, 1.846527099609375, 1.9119873046875, 1.977447509765625, 2.04290771484375, 2.108367919921875, 2.173828125]}, "gradients/decoder.transformer.h.19.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 3.0, 3.0, 2.0, 8.0, 12.0, 13.0, 21.0, 39.0, 42.0, 48.0, 106.0, 138.0, 221.0, 300.0, 445.0, 705.0, 1062.0, 1548.0, 2519.0, 4005.0, 6253.0, 9801.0, 15559.0, 25212.0, 41056.0, 64802.0, 98179.0, 139364.0, 1200802.0, 164308.0, 113327.0, 76461.0, 48953.0, 30737.0, 18894.0, 11689.0, 7322.0, 4758.0, 2861.0, 1896.0, 1161.0, 859.0, 561.0, 389.0, 234.0, 157.0, 100.0, 73.0, 45.0, 29.0, 19.0, 17.0, 14.0, 5.0, 5.0, 2.0, 3.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-0.26318359375, -0.254608154296875, -0.24603271484375, -0.237457275390625, -0.2288818359375, -0.220306396484375, -0.21173095703125, -0.203155517578125, -0.194580078125, -0.186004638671875, -0.17742919921875, -0.168853759765625, -0.1602783203125, -0.151702880859375, -0.14312744140625, -0.134552001953125, -0.1259765625, -0.117401123046875, -0.10882568359375, -0.100250244140625, -0.0916748046875, -0.083099365234375, -0.07452392578125, -0.065948486328125, -0.057373046875, -0.048797607421875, -0.04022216796875, -0.031646728515625, -0.0230712890625, -0.014495849609375, -0.00592041015625, 0.002655029296875, 0.01123046875, 0.019805908203125, 0.02838134765625, 0.036956787109375, 0.0455322265625, 0.054107666015625, 0.06268310546875, 0.071258544921875, 0.079833984375, 0.088409423828125, 0.09698486328125, 0.105560302734375, 0.1141357421875, 0.122711181640625, 0.13128662109375, 0.139862060546875, 0.1484375, 0.157012939453125, 0.16558837890625, 0.174163818359375, 0.1827392578125, 0.191314697265625, 0.19989013671875, 0.208465576171875, 0.217041015625, 0.225616455078125, 0.23419189453125, 0.242767333984375, 0.2513427734375, 0.259918212890625, 0.26849365234375, 0.277069091796875, 0.28564453125]}, "gradients/decoder.transformer.h.19.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 2.0, 1.0, 1.0, 3.0, 7.0, 4.0, 3.0, 5.0, 7.0, 5.0, 14.0, 12.0, 20.0, 21.0, 24.0, 41.0, 37.0, 55.0, 65.0, 78.0, 125.0, 120.0, 92.0, 62.0, 32.0, 41.0, 29.0, 12.0, 15.0, 20.0, 20.0, 9.0, 10.0, 4.0, 4.0, 6.0, 3.0, 1.0, 1.0, 2.0, 1.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0138702392578125, -0.013421177864074707, -0.012972116470336914, -0.012523055076599121, -0.012073993682861328, -0.011624932289123535, -0.011175870895385742, -0.01072680950164795, -0.010277748107910156, -0.009828686714172363, -0.00937962532043457, -0.008930563926696777, -0.008481502532958984, -0.008032441139221191, -0.0075833797454833984, -0.0071343183517456055, -0.0066852569580078125, -0.0062361955642700195, -0.0057871341705322266, -0.005338072776794434, -0.004889011383056641, -0.004439949989318848, -0.003990888595581055, -0.0035418272018432617, -0.0030927658081054688, -0.0026437044143676758, -0.002194643020629883, -0.0017455816268920898, -0.0012965202331542969, -0.0008474588394165039, -0.00039839744567871094, 5.066394805908203e-05, 0.000499725341796875, 0.000948786735534668, 0.001397848129272461, 0.001846909523010254, 0.002295970916748047, 0.00274503231048584, 0.003194093704223633, 0.0036431550979614258, 0.004092216491699219, 0.004541277885437012, 0.004990339279174805, 0.005439400672912598, 0.005888462066650391, 0.006337523460388184, 0.0067865848541259766, 0.0072356462478637695, 0.0076847076416015625, 0.008133769035339355, 0.008582830429077148, 0.009031891822814941, 0.009480953216552734, 0.009930014610290527, 0.01037907600402832, 0.010828137397766113, 0.011277198791503906, 0.0117262601852417, 0.012175321578979492, 0.012624382972717285, 0.013073444366455078, 0.013522505760192871, 0.013971567153930664, 0.014420628547668457, 0.01486968994140625]}, "gradients/decoder.transformer.h.19.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 1.0, 1.0, 3.0, 3.0, 3.0, 4.0, 6.0, 8.0, 9.0, 15.0, 25.0, 36.0, 40.0, 41.0, 54.0, 107.0, 136.0, 241.0, 342.0, 953.0, 52798.0, 989815.0, 2631.0, 465.0, 255.0, 147.0, 123.0, 89.0, 59.0, 46.0, 24.0, 25.0, 13.0, 10.0, 9.0, 3.0, 9.0, 5.0, 6.0, 0.0, 1.0, 4.0, 2.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.2305908203125, -0.2236480712890625, -0.216705322265625, -0.2097625732421875, -0.20281982421875, -0.1958770751953125, -0.188934326171875, -0.1819915771484375, -0.175048828125, -0.1681060791015625, -0.161163330078125, -0.1542205810546875, -0.14727783203125, -0.1403350830078125, -0.133392333984375, -0.1264495849609375, -0.1195068359375, -0.1125640869140625, -0.105621337890625, -0.0986785888671875, -0.09173583984375, -0.0847930908203125, -0.077850341796875, -0.0709075927734375, -0.06396484375, -0.0570220947265625, -0.050079345703125, -0.0431365966796875, -0.03619384765625, -0.0292510986328125, -0.022308349609375, -0.0153656005859375, -0.0084228515625, -0.0014801025390625, 0.005462646484375, 0.0124053955078125, 0.01934814453125, 0.0262908935546875, 0.033233642578125, 0.0401763916015625, 0.047119140625, 0.0540618896484375, 0.061004638671875, 0.0679473876953125, 0.07489013671875, 0.0818328857421875, 0.088775634765625, 0.0957183837890625, 0.1026611328125, 0.1096038818359375, 0.116546630859375, 0.1234893798828125, 0.13043212890625, 0.1373748779296875, 0.144317626953125, 0.1512603759765625, 0.158203125, 0.1651458740234375, 0.172088623046875, 0.1790313720703125, 0.18597412109375, 0.1929168701171875, 0.199859619140625, 0.2068023681640625, 0.2137451171875]}, "gradients/decoder.transformer.h.19.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 61.0, 938.0, 16.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.13172505795955658, -0.1265614628791809, -0.12139785289764404, -0.11623425036668777, -0.1110706478357315, -0.10590704530477524, -0.10074344277381897, -0.0955798402428627, -0.09041623771190643, -0.08525263518095016, -0.0800890326499939, -0.07492543011903763, -0.06976182758808136, -0.06459822505712509, -0.05943462252616882, -0.054271019995212555, -0.04910741746425629, -0.04394381493330002, -0.03878021240234375, -0.03361660987138748, -0.028453007340431213, -0.023289404809474945, -0.018125802278518677, -0.012962199747562408, -0.00779859721660614, -0.002634994685649872, 0.0025286078453063965, 0.007692210376262665, 0.012855812907218933, 0.0180194154381752, 0.02318301796913147, 0.028346620500087738, 0.033510223031044006, 0.038673825562000275, 0.04383742809295654, 0.04900103062391281, 0.05416463315486908, 0.05932823568582535, 0.06449183821678162, 0.06965544074773788, 0.07481904327869415, 0.07998264580965042, 0.08514624834060669, 0.09030985087156296, 0.09547345340251923, 0.1006370559334755, 0.10580065846443176, 0.11096426099538803, 0.1161278635263443, 0.12129146605730057, 0.12645506858825684, 0.1316186785697937, 0.13678227365016937, 0.14194586873054504, 0.1471094787120819, 0.15227308869361877, 0.15743668377399445, 0.16260027885437012, 0.16776388883590698, 0.17292749881744385, 0.17809109389781952, 0.1832546889781952, 0.18841829895973206, 0.19358190894126892, 0.1987455040216446]}, "gradients/decoder.transformer.h.19.ln_cross_attn.bias": {"_type": "histogram", "values": [3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 2.0, 3.0, 2.0, 7.0, 6.0, 9.0, 7.0, 7.0, 11.0, 19.0, 18.0, 17.0, 30.0, 20.0, 19.0, 30.0, 41.0, 39.0, 24.0, 33.0, 36.0, 49.0, 43.0, 36.0, 37.0, 34.0, 39.0, 39.0, 36.0, 40.0, 40.0, 35.0, 28.0, 26.0, 26.0, 23.0, 14.0, 19.0, 11.0, 8.0, 14.0, 4.0, 8.0, 8.0, 4.0, 5.0, 2.0, 0.0, 1.0, 0.0, 4.0, 1.0, 0.0, 1.0, 1.0], "bins": [-0.013681352138519287, -0.01326003111898899, -0.012838710099458694, -0.012417389079928398, -0.011996068060398102, -0.011574747040867805, -0.01115342602133751, -0.010732105001807213, -0.010310783982276917, -0.00988946296274662, -0.009468141943216324, -0.009046820923686028, -0.008625499904155731, -0.008204178884625435, -0.0077828578650951385, -0.007361536845564842, -0.006940215826034546, -0.00651889480650425, -0.006097573786973953, -0.005676252767443657, -0.005254931747913361, -0.004833610728383064, -0.004412289708852768, -0.003990968689322472, -0.0035696476697921753, -0.003148326650261879, -0.0027270056307315826, -0.0023056846112012863, -0.00188436359167099, -0.0014630425721406937, -0.0010417215526103973, -0.000620400533080101, -0.0001990795135498047, 0.00022224150598049164, 0.000643562525510788, 0.0010648835450410843, 0.0014862045645713806, 0.001907525584101677, 0.0023288466036319733, 0.0027501676231622696, 0.003171488642692566, 0.0035928096622228622, 0.004014130681753159, 0.004435451701283455, 0.004856772720813751, 0.0052780937403440475, 0.005699414759874344, 0.00612073577940464, 0.0065420567989349365, 0.006963377818465233, 0.007384698837995529, 0.0078060198575258255, 0.008227340877056122, 0.008648661896586418, 0.009069982916116714, 0.00949130393564701, 0.009912624955177307, 0.010333945974707603, 0.0107552669942379, 0.011176588013768196, 0.011597909033298492, 0.012019230052828789, 0.012440551072359085, 0.012861872091889381, 0.013283193111419678]}, "gradients/decoder.transformer.h.19.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 4.0, 0.0, 8.0, 10.0, 8.0, 9.0, 12.0, 18.0, 22.0, 26.0, 20.0, 26.0, 34.0, 29.0, 38.0, 40.0, 39.0, 43.0, 48.0, 47.0, 50.0, 49.0, 52.0, 41.0, 58.0, 27.0, 28.0, 39.0, 31.0, 23.0, 25.0, 23.0, 19.0, 15.0, 10.0, 14.0, 10.0, 5.0, 5.0, 6.0, 3.0, 3.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.87109375, -3.7589111328125, -3.646728515625, -3.5345458984375, -3.42236328125, -3.3101806640625, -3.197998046875, -3.0858154296875, -2.9736328125, -2.8614501953125, -2.749267578125, -2.6370849609375, -2.52490234375, -2.4127197265625, -2.300537109375, -2.1883544921875, -2.076171875, -1.9639892578125, -1.851806640625, -1.7396240234375, -1.62744140625, -1.5152587890625, -1.403076171875, -1.2908935546875, -1.1787109375, -1.0665283203125, -0.954345703125, -0.8421630859375, -0.72998046875, -0.6177978515625, -0.505615234375, -0.3934326171875, -0.28125, -0.1690673828125, -0.056884765625, 0.0552978515625, 0.16748046875, 0.2796630859375, 0.391845703125, 0.5040283203125, 0.6162109375, 0.7283935546875, 0.840576171875, 0.9527587890625, 1.06494140625, 1.1771240234375, 1.289306640625, 1.4014892578125, 1.513671875, 1.6258544921875, 1.738037109375, 1.8502197265625, 1.96240234375, 2.0745849609375, 2.186767578125, 2.2989501953125, 2.4111328125, 2.5233154296875, 2.635498046875, 2.7476806640625, 2.85986328125, 2.9720458984375, 3.084228515625, 3.1964111328125, 3.30859375]}, "gradients/decoder.transformer.h.19.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 3.0, 4.0, 0.0, 4.0, 7.0, 8.0, 18.0, 18.0, 27.0, 60.0, 64.0, 119.0, 205.0, 306.0, 530.0, 872.0, 1493.0, 2766.0, 5366.0, 10296.0, 20662.0, 44237.0, 99140.0, 224895.0, 312577.0, 177966.0, 77201.0, 34678.0, 16769.0, 8425.0, 4375.0, 2373.0, 1273.0, 713.0, 436.0, 242.0, 153.0, 87.0, 71.0, 39.0, 29.0, 18.0, 14.0, 7.0, 8.0, 5.0, 2.0, 6.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0], "bins": [-2.86328125, -2.765716552734375, -2.66815185546875, -2.570587158203125, -2.4730224609375, -2.375457763671875, -2.27789306640625, -2.180328369140625, -2.082763671875, -1.985198974609375, -1.88763427734375, -1.790069580078125, -1.6925048828125, -1.594940185546875, -1.49737548828125, -1.399810791015625, -1.30224609375, -1.204681396484375, -1.10711669921875, -1.009552001953125, -0.9119873046875, -0.814422607421875, -0.71685791015625, -0.619293212890625, -0.521728515625, -0.424163818359375, -0.32659912109375, -0.229034423828125, -0.1314697265625, -0.033905029296875, 0.06365966796875, 0.161224365234375, 0.2587890625, 0.356353759765625, 0.45391845703125, 0.551483154296875, 0.6490478515625, 0.746612548828125, 0.84417724609375, 0.941741943359375, 1.039306640625, 1.136871337890625, 1.23443603515625, 1.332000732421875, 1.4295654296875, 1.527130126953125, 1.62469482421875, 1.722259521484375, 1.81982421875, 1.917388916015625, 2.01495361328125, 2.112518310546875, 2.2100830078125, 2.307647705078125, 2.40521240234375, 2.502777099609375, 2.600341796875, 2.697906494140625, 2.79547119140625, 2.893035888671875, 2.9906005859375, 3.088165283203125, 3.18572998046875, 3.283294677734375, 3.380859375]}, "gradients/decoder.transformer.h.19.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 0.0, 0.0, 0.0, 5.0, 5.0, 3.0, 8.0, 10.0, 14.0, 14.0, 14.0, 17.0, 20.0, 19.0, 23.0, 36.0, 37.0, 43.0, 60.0, 77.0, 91.0, 140.0, 320.0, 1450.0, 169.0, 94.0, 63.0, 50.0, 40.0, 34.0, 42.0, 20.0, 27.0, 15.0, 20.0, 14.0, 20.0, 16.0, 4.0, 7.0, 5.0, 7.0, 1.0, 2.0, 2.0, 1.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-12.703125, -12.3079833984375, -11.912841796875, -11.5177001953125, -11.12255859375, -10.7274169921875, -10.332275390625, -9.9371337890625, -9.5419921875, -9.1468505859375, -8.751708984375, -8.3565673828125, -7.96142578125, -7.5662841796875, -7.171142578125, -6.7760009765625, -6.380859375, -5.9857177734375, -5.590576171875, -5.1954345703125, -4.80029296875, -4.4051513671875, -4.010009765625, -3.6148681640625, -3.2197265625, -2.8245849609375, -2.429443359375, -2.0343017578125, -1.63916015625, -1.2440185546875, -0.848876953125, -0.4537353515625, -0.05859375, 0.3365478515625, 0.731689453125, 1.1268310546875, 1.52197265625, 1.9171142578125, 2.312255859375, 2.7073974609375, 3.1025390625, 3.4976806640625, 3.892822265625, 4.2879638671875, 4.68310546875, 5.0782470703125, 5.473388671875, 5.8685302734375, 6.263671875, 6.6588134765625, 7.053955078125, 7.4490966796875, 7.84423828125, 8.2393798828125, 8.634521484375, 9.0296630859375, 9.4248046875, 9.8199462890625, 10.215087890625, 10.6102294921875, 11.00537109375, 11.4005126953125, 11.795654296875, 12.1907958984375, 12.5859375]}, "gradients/decoder.transformer.h.19.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 2.0, 0.0, 7.0, 4.0, 6.0, 4.0, 15.0, 15.0, 11.0, 20.0, 21.0, 27.0, 34.0, 51.0, 83.0, 95.0, 172.0, 375.0, 915.0, 3536.0, 41487.0, 2582836.0, 500186.0, 12493.0, 1925.0, 587.0, 271.0, 164.0, 108.0, 70.0, 39.0, 35.0, 24.0, 16.0, 23.0, 14.0, 15.0, 7.0, 7.0, 6.0, 3.0, 0.0, 5.0, 2.0, 1.0, 3.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-20.84375, -20.22705078125, -19.6103515625, -18.99365234375, -18.376953125, -17.76025390625, -17.1435546875, -16.52685546875, -15.91015625, -15.29345703125, -14.6767578125, -14.06005859375, -13.443359375, -12.82666015625, -12.2099609375, -11.59326171875, -10.9765625, -10.35986328125, -9.7431640625, -9.12646484375, -8.509765625, -7.89306640625, -7.2763671875, -6.65966796875, -6.04296875, -5.42626953125, -4.8095703125, -4.19287109375, -3.576171875, -2.95947265625, -2.3427734375, -1.72607421875, -1.109375, -0.49267578125, 0.1240234375, 0.74072265625, 1.357421875, 1.97412109375, 2.5908203125, 3.20751953125, 3.82421875, 4.44091796875, 5.0576171875, 5.67431640625, 6.291015625, 6.90771484375, 7.5244140625, 8.14111328125, 8.7578125, 9.37451171875, 9.9912109375, 10.60791015625, 11.224609375, 11.84130859375, 12.4580078125, 13.07470703125, 13.69140625, 14.30810546875, 14.9248046875, 15.54150390625, 16.158203125, 16.77490234375, 17.3916015625, 18.00830078125, 18.625]}, "gradients/decoder.transformer.h.19.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 3.0, 4.0, 17.0, 53.0, 184.0, 286.0, 275.0, 148.0, 34.0, 11.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-17.094955444335938, -15.357843399047852, -13.620732307434082, -11.883620262145996, -10.146509170532227, -8.40939712524414, -6.672285079956055, -4.935173988342285, -3.198061943054199, -1.460950255393982, 0.27616143226623535, 2.013273239135742, 3.75038480758667, 5.487496376037598, 7.224608421325684, 8.961719512939453, 10.698831558227539, 12.435943603515625, 14.173054695129395, 15.91016674041748, 17.64727783203125, 19.384389877319336, 21.121501922607422, 22.858612060546875, 24.595726013183594, 26.33283805847168, 28.069950103759766, 29.80706024169922, 31.544172286987305, 33.28128433227539, 35.018394470214844, 36.75550842285156, 38.492618560791016, 40.22972869873047, 41.96684265136719, 43.70395278930664, 45.44106674194336, 47.17817687988281, 48.91529083251953, 50.652400970458984, 52.38951110839844, 54.12662124633789, 55.86373519897461, 57.60084533691406, 59.33795928955078, 61.075069427490234, 62.81217956542969, 64.5492935180664, 66.28640747070312, 68.02352142333984, 69.76062774658203, 71.49774169921875, 73.23485565185547, 74.97196960449219, 76.70907592773438, 78.4461898803711, 80.18329620361328, 81.92041015625, 83.65751647949219, 85.3946304321289, 87.13174438476562, 88.86885070800781, 90.60596466064453, 92.34307861328125, 94.08019256591797]}, "gradients/decoder.transformer.h.19.ln_1.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 2.0, 0.0, 1.0, 1.0, 4.0, 1.0, 1.0, 2.0, 7.0, 5.0, 7.0, 7.0, 8.0, 5.0, 11.0, 20.0, 20.0, 12.0, 29.0, 35.0, 32.0, 32.0, 34.0, 44.0, 52.0, 34.0, 45.0, 51.0, 45.0, 52.0, 51.0, 28.0, 39.0, 33.0, 28.0, 30.0, 39.0, 21.0, 32.0, 16.0, 24.0, 21.0, 13.0, 11.0, 11.0, 4.0, 2.0, 6.0, 1.0, 2.0, 3.0, 1.0, 2.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-35.736412048339844, -34.60384750366211, -33.471282958984375, -32.338722229003906, -31.206157684326172, -30.073593139648438, -28.941030502319336, -27.808467864990234, -26.6759033203125, -25.543338775634766, -24.410776138305664, -23.278213500976562, -22.145648956298828, -21.013084411621094, -19.880521774291992, -18.74795913696289, -17.615394592285156, -16.482830047607422, -15.35026741027832, -14.217703819274902, -13.085140228271484, -11.952576637268066, -10.820013046264648, -9.68744945526123, -8.554885864257812, -7.4223222732543945, -6.289758682250977, -5.157195091247559, -4.024631500244141, -2.8920679092407227, -1.7595043182373047, -0.6269407272338867, 0.5056190490722656, 1.6381826400756836, 2.7707462310791016, 3.9033098220825195, 5.0358734130859375, 6.1684370040893555, 7.301000595092773, 8.433564186096191, 9.56612777709961, 10.698691368103027, 11.831254959106445, 12.963818550109863, 14.096382141113281, 15.2289457321167, 16.361509323120117, 17.49407196044922, 18.626636505126953, 19.759201049804688, 20.89176368713379, 22.02432632446289, 23.156890869140625, 24.28945541381836, 25.42201805114746, 26.554580688476562, 27.687145233154297, 28.81970977783203, 29.952272415161133, 31.084835052490234, 32.21739959716797, 33.3499641418457, 34.48252868652344, 35.615089416503906, 36.74765396118164]}, "gradients/decoder.transformer.h.18.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 2.0, 2.0, 5.0, 6.0, 8.0, 9.0, 8.0, 18.0, 21.0, 20.0, 17.0, 30.0, 23.0, 30.0, 28.0, 37.0, 46.0, 38.0, 41.0, 40.0, 41.0, 45.0, 56.0, 42.0, 57.0, 38.0, 36.0, 29.0, 34.0, 28.0, 34.0, 20.0, 26.0, 15.0, 18.0, 9.0, 19.0, 17.0, 7.0, 4.0, 5.0, 2.0, 3.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-3.826171875, -3.712371826171875, -3.59857177734375, -3.484771728515625, -3.3709716796875, -3.257171630859375, -3.14337158203125, -3.029571533203125, -2.915771484375, -2.801971435546875, -2.68817138671875, -2.574371337890625, -2.4605712890625, -2.346771240234375, -2.23297119140625, -2.119171142578125, -2.00537109375, -1.891571044921875, -1.77777099609375, -1.663970947265625, -1.5501708984375, -1.436370849609375, -1.32257080078125, -1.208770751953125, -1.094970703125, -0.981170654296875, -0.86737060546875, -0.753570556640625, -0.6397705078125, -0.525970458984375, -0.41217041015625, -0.298370361328125, -0.1845703125, -0.070770263671875, 0.04302978515625, 0.156829833984375, 0.2706298828125, 0.384429931640625, 0.49822998046875, 0.612030029296875, 0.725830078125, 0.839630126953125, 0.95343017578125, 1.067230224609375, 1.1810302734375, 1.294830322265625, 1.40863037109375, 1.522430419921875, 1.63623046875, 1.750030517578125, 1.86383056640625, 1.977630615234375, 2.0914306640625, 2.205230712890625, 2.31903076171875, 2.432830810546875, 2.546630859375, 2.660430908203125, 2.77423095703125, 2.888031005859375, 3.0018310546875, 3.115631103515625, 3.22943115234375, 3.343231201171875, 3.45703125]}, "gradients/decoder.transformer.h.18.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 5.0, 6.0, 1.0, 10.0, 13.0, 13.0, 11.0, 27.0, 33.0, 34.0, 68.0, 82.0, 157.0, 348.0, 644.0, 1443.0, 4005.0, 11853.0, 44687.0, 226136.0, 1339292.0, 2025390.0, 433630.0, 77884.0, 19111.0, 5727.0, 2081.0, 807.0, 333.0, 182.0, 82.0, 40.0, 39.0, 29.0, 20.0, 20.0, 9.0, 7.0, 10.0, 3.0, 12.0, 4.0, 2.0, 3.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-7.3671875, -7.13824462890625, -6.9093017578125, -6.68035888671875, -6.451416015625, -6.22247314453125, -5.9935302734375, -5.76458740234375, -5.53564453125, -5.30670166015625, -5.0777587890625, -4.84881591796875, -4.619873046875, -4.39093017578125, -4.1619873046875, -3.93304443359375, -3.7041015625, -3.47515869140625, -3.2462158203125, -3.01727294921875, -2.788330078125, -2.55938720703125, -2.3304443359375, -2.10150146484375, -1.87255859375, -1.64361572265625, -1.4146728515625, -1.18572998046875, -0.956787109375, -0.72784423828125, -0.4989013671875, -0.26995849609375, -0.041015625, 0.18792724609375, 0.4168701171875, 0.64581298828125, 0.874755859375, 1.10369873046875, 1.3326416015625, 1.56158447265625, 1.79052734375, 2.01947021484375, 2.2484130859375, 2.47735595703125, 2.706298828125, 2.93524169921875, 3.1641845703125, 3.39312744140625, 3.6220703125, 3.85101318359375, 4.0799560546875, 4.30889892578125, 4.537841796875, 4.76678466796875, 4.9957275390625, 5.22467041015625, 5.45361328125, 5.68255615234375, 5.9114990234375, 6.14044189453125, 6.369384765625, 6.59832763671875, 6.8272705078125, 7.05621337890625, 7.28515625]}, "gradients/decoder.transformer.h.18.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 2.0, 0.0, 4.0, 4.0, 2.0, 8.0, 13.0, 17.0, 14.0, 32.0, 42.0, 78.0, 130.0, 201.0, 367.0, 543.0, 718.0, 672.0, 474.0, 286.0, 191.0, 115.0, 61.0, 36.0, 23.0, 14.0, 17.0, 9.0, 9.0, 2.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-14.1484375, -13.71728515625, -13.2861328125, -12.85498046875, -12.423828125, -11.99267578125, -11.5615234375, -11.13037109375, -10.69921875, -10.26806640625, -9.8369140625, -9.40576171875, -8.974609375, -8.54345703125, -8.1123046875, -7.68115234375, -7.25, -6.81884765625, -6.3876953125, -5.95654296875, -5.525390625, -5.09423828125, -4.6630859375, -4.23193359375, -3.80078125, -3.36962890625, -2.9384765625, -2.50732421875, -2.076171875, -1.64501953125, -1.2138671875, -0.78271484375, -0.3515625, 0.07958984375, 0.5107421875, 0.94189453125, 1.373046875, 1.80419921875, 2.2353515625, 2.66650390625, 3.09765625, 3.52880859375, 3.9599609375, 4.39111328125, 4.822265625, 5.25341796875, 5.6845703125, 6.11572265625, 6.546875, 6.97802734375, 7.4091796875, 7.84033203125, 8.271484375, 8.70263671875, 9.1337890625, 9.56494140625, 9.99609375, 10.42724609375, 10.8583984375, 11.28955078125, 11.720703125, 12.15185546875, 12.5830078125, 13.01416015625, 13.4453125]}, "gradients/decoder.transformer.h.18.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 5.0, 2.0, 4.0, 5.0, 16.0, 26.0, 36.0, 60.0, 76.0, 142.0, 272.0, 603.0, 1368.0, 3828.0, 15200.0, 104619.0, 1220618.0, 2548210.0, 259737.0, 29647.0, 6199.0, 2070.0, 750.0, 355.0, 167.0, 111.0, 57.0, 38.0, 31.0, 14.0, 11.0, 4.0, 6.0, 2.0, 2.0, 2.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-16.90625, -16.3642578125, -15.822265625, -15.2802734375, -14.73828125, -14.1962890625, -13.654296875, -13.1123046875, -12.5703125, -12.0283203125, -11.486328125, -10.9443359375, -10.40234375, -9.8603515625, -9.318359375, -8.7763671875, -8.234375, -7.6923828125, -7.150390625, -6.6083984375, -6.06640625, -5.5244140625, -4.982421875, -4.4404296875, -3.8984375, -3.3564453125, -2.814453125, -2.2724609375, -1.73046875, -1.1884765625, -0.646484375, -0.1044921875, 0.4375, 0.9794921875, 1.521484375, 2.0634765625, 2.60546875, 3.1474609375, 3.689453125, 4.2314453125, 4.7734375, 5.3154296875, 5.857421875, 6.3994140625, 6.94140625, 7.4833984375, 8.025390625, 8.5673828125, 9.109375, 9.6513671875, 10.193359375, 10.7353515625, 11.27734375, 11.8193359375, 12.361328125, 12.9033203125, 13.4453125, 13.9873046875, 14.529296875, 15.0712890625, 15.61328125, 16.1552734375, 16.697265625, 17.2392578125, 17.78125]}, "gradients/decoder.transformer.h.18.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 5.0, 34.0, 102.0, 212.0, 311.0, 216.0, 97.0, 30.0, 5.0, 3.0, 1.0, 1.0, 1.0], "bins": [-222.94378662109375, -218.93153381347656, -214.9192657470703, -210.90701293945312, -206.89474487304688, -202.8824920654297, -198.8702392578125, -194.85797119140625, -190.84571838378906, -186.83346557617188, -182.82119750976562, -178.80894470214844, -174.7966766357422, -170.784423828125, -166.77215576171875, -162.75990295410156, -158.74765014648438, -154.7353973388672, -150.72312927246094, -146.71087646484375, -142.6986083984375, -138.6863555908203, -134.67410278320312, -130.66183471679688, -126.64956665039062, -122.6373062133789, -118.62504577636719, -114.61279296875, -110.60053253173828, -106.58827209472656, -102.57601165771484, -98.56375122070312, -94.55149841308594, -90.53923797607422, -86.5269775390625, -82.51472473144531, -78.5024642944336, -74.49020385742188, -70.47794342041016, -66.46568298339844, -62.453426361083984, -58.441165924072266, -54.42890930175781, -50.416648864746094, -46.404388427734375, -42.39213180541992, -38.3798713684082, -34.36761474609375, -30.35535430908203, -26.343095779418945, -22.33083724975586, -18.31857681274414, -14.306318283081055, -10.294059753417969, -6.28179931640625, -2.269540786743164, 1.7427177429199219, 5.754976749420166, 9.76723575592041, 13.779495239257812, 17.7917537689209, 21.804012298583984, 25.816272735595703, 29.82853126525879, 33.840789794921875]}, "gradients/decoder.transformer.h.18.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 2.0, 3.0, 5.0, 6.0, 2.0, 5.0, 13.0, 7.0, 5.0, 9.0, 14.0, 18.0, 10.0, 19.0, 29.0, 25.0, 23.0, 22.0, 28.0, 40.0, 43.0, 35.0, 49.0, 35.0, 39.0, 48.0, 33.0, 47.0, 36.0, 52.0, 36.0, 43.0, 28.0, 32.0, 33.0, 14.0, 32.0, 18.0, 11.0, 10.0, 15.0, 8.0, 10.0, 5.0, 4.0, 2.0, 0.0, 4.0, 3.0, 3.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0], "bins": [-31.356494903564453, -30.371456146240234, -29.386417388916016, -28.401378631591797, -27.416337966918945, -26.431299209594727, -25.446260452270508, -24.46122169494629, -23.47618293762207, -22.49114418029785, -21.506105422973633, -20.52106475830078, -19.536026000976562, -18.550987243652344, -17.565948486328125, -16.580909729003906, -15.595870018005371, -14.610831260681152, -13.625791549682617, -12.640752792358398, -11.65571403503418, -10.670675277709961, -9.685635566711426, -8.700596809387207, -7.71555757522583, -6.730518341064453, -5.745479583740234, -4.760440349578857, -3.7754013538360596, -2.7903623580932617, -1.8053231239318848, -0.820284366607666, 0.16475486755371094, 1.1497938632965088, 2.1348328590393066, 3.1198720932006836, 4.104910850524902, 5.089950084686279, 6.074989318847656, 7.060028076171875, 8.045066833496094, 9.030105590820312, 10.015145301818848, 11.000184059143066, 11.985222816467285, 12.97026252746582, 13.955301284790039, 14.940340042114258, 15.925379753112793, 16.910419464111328, 17.895458221435547, 18.880496978759766, 19.865535736083984, 20.850574493408203, 21.835613250732422, 22.82065200805664, 23.805692672729492, 24.79073143005371, 25.77577018737793, 26.76081085205078, 27.745849609375, 28.73088836669922, 29.715927124023438, 30.700965881347656, 31.686004638671875]}, "gradients/decoder.transformer.h.18.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 4.0, 6.0, 6.0, 3.0, 2.0, 8.0, 12.0, 12.0, 16.0, 17.0, 20.0, 24.0, 29.0, 38.0, 30.0, 51.0, 29.0, 59.0, 28.0, 45.0, 34.0, 50.0, 49.0, 45.0, 37.0, 47.0, 38.0, 40.0, 34.0, 31.0, 26.0, 33.0, 23.0, 17.0, 16.0, 14.0, 12.0, 11.0, 6.0, 4.0, 4.0, 1.0, 4.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.109375, -3.990692138671875, -3.87200927734375, -3.753326416015625, -3.6346435546875, -3.515960693359375, -3.39727783203125, -3.278594970703125, -3.159912109375, -3.041229248046875, -2.92254638671875, -2.803863525390625, -2.6851806640625, -2.566497802734375, -2.44781494140625, -2.329132080078125, -2.21044921875, -2.091766357421875, -1.97308349609375, -1.854400634765625, -1.7357177734375, -1.617034912109375, -1.49835205078125, -1.379669189453125, -1.260986328125, -1.142303466796875, -1.02362060546875, -0.904937744140625, -0.7862548828125, -0.667572021484375, -0.54888916015625, -0.430206298828125, -0.3115234375, -0.192840576171875, -0.07415771484375, 0.044525146484375, 0.1632080078125, 0.281890869140625, 0.40057373046875, 0.519256591796875, 0.637939453125, 0.756622314453125, 0.87530517578125, 0.993988037109375, 1.1126708984375, 1.231353759765625, 1.35003662109375, 1.468719482421875, 1.58740234375, 1.706085205078125, 1.82476806640625, 1.943450927734375, 2.0621337890625, 2.180816650390625, 2.29949951171875, 2.418182373046875, 2.536865234375, 2.655548095703125, 2.77423095703125, 2.892913818359375, 3.0115966796875, 3.130279541015625, 3.24896240234375, 3.367645263671875, 3.486328125]}, "gradients/decoder.transformer.h.18.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 2.0, 0.0, 3.0, 6.0, 3.0, 16.0, 13.0, 30.0, 46.0, 77.0, 120.0, 173.0, 278.0, 459.0, 732.0, 1127.0, 1864.0, 2915.0, 5017.0, 8100.0, 13529.0, 22521.0, 37068.0, 59376.0, 90843.0, 129761.0, 163400.0, 159197.0, 123988.0, 86050.0, 55030.0, 34231.0, 20743.0, 12353.0, 7611.0, 4546.0, 2775.0, 1641.0, 1057.0, 661.0, 419.0, 284.0, 179.0, 104.0, 67.0, 37.0, 30.0, 32.0, 19.0, 7.0, 11.0, 5.0, 5.0, 3.0, 5.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0], "bins": [-0.45361328125, -0.43845367431640625, -0.4232940673828125, -0.40813446044921875, -0.392974853515625, -0.37781524658203125, -0.3626556396484375, -0.34749603271484375, -0.33233642578125, -0.31717681884765625, -0.3020172119140625, -0.28685760498046875, -0.271697998046875, -0.25653839111328125, -0.2413787841796875, -0.22621917724609375, -0.2110595703125, -0.19589996337890625, -0.1807403564453125, -0.16558074951171875, -0.150421142578125, -0.13526153564453125, -0.1201019287109375, -0.10494232177734375, -0.08978271484375, -0.07462310791015625, -0.0594635009765625, -0.04430389404296875, -0.029144287109375, -0.01398468017578125, 0.0011749267578125, 0.01633453369140625, 0.031494140625, 0.04665374755859375, 0.0618133544921875, 0.07697296142578125, 0.092132568359375, 0.10729217529296875, 0.1224517822265625, 0.13761138916015625, 0.15277099609375, 0.16793060302734375, 0.1830902099609375, 0.19824981689453125, 0.213409423828125, 0.22856903076171875, 0.2437286376953125, 0.25888824462890625, 0.2740478515625, 0.28920745849609375, 0.3043670654296875, 0.31952667236328125, 0.334686279296875, 0.34984588623046875, 0.3650054931640625, 0.38016510009765625, 0.39532470703125, 0.41048431396484375, 0.4256439208984375, 0.44080352783203125, 0.455963134765625, 0.47112274169921875, 0.4862823486328125, 0.5014419555664062, 0.5166015625]}, "gradients/decoder.transformer.h.18.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 3.0, 6.0, 1.0, 7.0, 3.0, 5.0, 8.0, 6.0, 11.0, 13.0, 13.0, 19.0, 18.0, 20.0, 29.0, 27.0, 30.0, 36.0, 27.0, 36.0, 34.0, 48.0, 40.0, 38.0, 1055.0, 36.0, 32.0, 42.0, 46.0, 35.0, 34.0, 26.0, 38.0, 25.0, 35.0, 12.0, 17.0, 32.0, 21.0, 15.0, 10.0, 9.0, 10.0, 15.0, 2.0, 6.0, 2.0, 2.0, 3.0, 3.0, 0.0, 1.0, 0.0, 1.0], "bins": [-2.6640625, -2.587921142578125, -2.51177978515625, -2.435638427734375, -2.3594970703125, -2.283355712890625, -2.20721435546875, -2.131072998046875, -2.054931640625, -1.978790283203125, -1.90264892578125, -1.826507568359375, -1.7503662109375, -1.674224853515625, -1.59808349609375, -1.521942138671875, -1.44580078125, -1.369659423828125, -1.29351806640625, -1.217376708984375, -1.1412353515625, -1.065093994140625, -0.98895263671875, -0.912811279296875, -0.836669921875, -0.760528564453125, -0.68438720703125, -0.608245849609375, -0.5321044921875, -0.455963134765625, -0.37982177734375, -0.303680419921875, -0.2275390625, -0.151397705078125, -0.07525634765625, 0.000885009765625, 0.0770263671875, 0.153167724609375, 0.22930908203125, 0.305450439453125, 0.381591796875, 0.457733154296875, 0.53387451171875, 0.610015869140625, 0.6861572265625, 0.762298583984375, 0.83843994140625, 0.914581298828125, 0.99072265625, 1.066864013671875, 1.14300537109375, 1.219146728515625, 1.2952880859375, 1.371429443359375, 1.44757080078125, 1.523712158203125, 1.599853515625, 1.675994873046875, 1.75213623046875, 1.828277587890625, 1.9044189453125, 1.980560302734375, 2.05670166015625, 2.132843017578125, 2.208984375]}, "gradients/decoder.transformer.h.18.crossattention.c_attn.weight": {"_type": "histogram", "values": [3.0, 1.0, 8.0, 5.0, 5.0, 6.0, 19.0, 22.0, 36.0, 60.0, 55.0, 100.0, 158.0, 235.0, 369.0, 553.0, 797.0, 1220.0, 1887.0, 2893.0, 4400.0, 6723.0, 10302.0, 15952.0, 24937.0, 38540.0, 58878.0, 87600.0, 125348.0, 977944.0, 373526.0, 120696.0, 84352.0, 56131.0, 36108.0, 23294.0, 15375.0, 9850.0, 6540.0, 4278.0, 2796.0, 1697.0, 1193.0, 746.0, 503.0, 348.0, 224.0, 136.0, 91.0, 69.0, 47.0, 25.0, 17.0, 16.0, 11.0, 6.0, 8.0, 5.0, 1.0, 2.0, 0.0, 1.0, 2.0, 2.0], "bins": [-0.271484375, -0.2624168395996094, -0.25334930419921875, -0.24428176879882812, -0.2352142333984375, -0.22614669799804688, -0.21707916259765625, -0.20801162719726562, -0.198944091796875, -0.18987655639648438, -0.18080902099609375, -0.17174148559570312, -0.1626739501953125, -0.15360641479492188, -0.14453887939453125, -0.13547134399414062, -0.12640380859375, -0.11733627319335938, -0.10826873779296875, -0.09920120239257812, -0.0901336669921875, -0.08106613159179688, -0.07199859619140625, -0.06293106079101562, -0.053863525390625, -0.044795989990234375, -0.03572845458984375, -0.026660919189453125, -0.0175933837890625, -0.008525848388671875, 0.00054168701171875, 0.009609222412109375, 0.0186767578125, 0.027744293212890625, 0.03681182861328125, 0.045879364013671875, 0.0549468994140625, 0.06401443481445312, 0.07308197021484375, 0.08214950561523438, 0.091217041015625, 0.10028457641601562, 0.10935211181640625, 0.11841964721679688, 0.1274871826171875, 0.13655471801757812, 0.14562225341796875, 0.15468978881835938, 0.16375732421875, 0.17282485961914062, 0.18189239501953125, 0.19095993041992188, 0.2000274658203125, 0.20909500122070312, 0.21816253662109375, 0.22723007202148438, 0.236297607421875, 0.24536514282226562, 0.25443267822265625, 0.2635002136230469, 0.2725677490234375, 0.2816352844238281, 0.29070281982421875, 0.2997703552246094, 0.308837890625]}, "gradients/decoder.transformer.h.18.crossattention.q_attn.bias": {"_type": "histogram", "values": [3.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 3.0, 2.0, 4.0, 3.0, 8.0, 10.0, 6.0, 12.0, 12.0, 14.0, 14.0, 24.0, 18.0, 18.0, 29.0, 24.0, 36.0, 32.0, 39.0, 33.0, 60.0, 55.0, 83.0, 83.0, 55.0, 52.0, 30.0, 30.0, 30.0, 21.0, 32.0, 25.0, 17.0, 19.0, 8.0, 12.0, 14.0, 9.0, 3.0, 10.0, 6.0, 5.0, 3.0, 4.0, 0.0, 1.0, 3.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.01303863525390625, -0.012598514556884766, -0.012158393859863281, -0.011718273162841797, -0.011278152465820312, -0.010838031768798828, -0.010397911071777344, -0.00995779037475586, -0.009517669677734375, -0.00907754898071289, -0.008637428283691406, -0.008197307586669922, -0.0077571868896484375, -0.007317066192626953, -0.006876945495605469, -0.006436824798583984, -0.0059967041015625, -0.005556583404541016, -0.005116462707519531, -0.004676342010498047, -0.0042362213134765625, -0.003796100616455078, -0.0033559799194335938, -0.0029158592224121094, -0.002475738525390625, -0.0020356178283691406, -0.0015954971313476562, -0.0011553764343261719, -0.0007152557373046875, -0.0002751350402832031, 0.00016498565673828125, 0.0006051063537597656, 0.00104522705078125, 0.0014853477478027344, 0.0019254684448242188, 0.002365589141845703, 0.0028057098388671875, 0.003245830535888672, 0.0036859512329101562, 0.004126071929931641, 0.004566192626953125, 0.005006313323974609, 0.005446434020996094, 0.005886554718017578, 0.0063266754150390625, 0.006766796112060547, 0.007206916809082031, 0.007647037506103516, 0.008087158203125, 0.008527278900146484, 0.008967399597167969, 0.009407520294189453, 0.009847640991210938, 0.010287761688232422, 0.010727882385253906, 0.01116800308227539, 0.011608123779296875, 0.01204824447631836, 0.012488365173339844, 0.012928485870361328, 0.013368606567382812, 0.013808727264404297, 0.014248847961425781, 0.014688968658447266, 0.01512908935546875]}, "gradients/decoder.transformer.h.18.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 1.0, 2.0, 1.0, 4.0, 3.0, 4.0, 10.0, 10.0, 3.0, 17.0, 20.0, 26.0, 29.0, 42.0, 71.0, 84.0, 125.0, 145.0, 188.0, 301.0, 436.0, 1111.0, 52078.0, 983884.0, 7977.0, 693.0, 348.0, 249.0, 192.0, 106.0, 100.0, 78.0, 45.0, 46.0, 34.0, 16.0, 19.0, 11.0, 14.0, 12.0, 11.0, 4.0, 5.0, 2.0, 5.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 3.0], "bins": [-0.2467041015625, -0.23929786682128906, -0.23189163208007812, -0.2244853973388672, -0.21707916259765625, -0.2096729278564453, -0.20226669311523438, -0.19486045837402344, -0.1874542236328125, -0.18004798889160156, -0.17264175415039062, -0.1652355194091797, -0.15782928466796875, -0.1504230499267578, -0.14301681518554688, -0.13561058044433594, -0.128204345703125, -0.12079811096191406, -0.11339187622070312, -0.10598564147949219, -0.09857940673828125, -0.09117317199707031, -0.08376693725585938, -0.07636070251464844, -0.0689544677734375, -0.06154823303222656, -0.054141998291015625, -0.04673576354980469, -0.03932952880859375, -0.03192329406738281, -0.024517059326171875, -0.017110824584960938, -0.00970458984375, -0.0022983551025390625, 0.005107879638671875, 0.012514114379882812, 0.01992034912109375, 0.027326583862304688, 0.034732818603515625, 0.04213905334472656, 0.0495452880859375, 0.05695152282714844, 0.06435775756835938, 0.07176399230957031, 0.07917022705078125, 0.08657646179199219, 0.09398269653320312, 0.10138893127441406, 0.108795166015625, 0.11620140075683594, 0.12360763549804688, 0.1310138702392578, 0.13842010498046875, 0.1458263397216797, 0.15323257446289062, 0.16063880920410156, 0.1680450439453125, 0.17545127868652344, 0.18285751342773438, 0.1902637481689453, 0.19766998291015625, 0.2050762176513672, 0.21248245239257812, 0.21988868713378906, 0.227294921875]}, "gradients/decoder.transformer.h.18.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 10.0, 54.0, 379.0, 546.0, 23.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.17351217567920685, -0.16958020627498627, -0.16564823687076569, -0.1617162525653839, -0.15778428316116333, -0.15385231375694275, -0.14992034435272217, -0.1459883749485016, -0.142056405544281, -0.13812443614006042, -0.13419246673583984, -0.13026048243045807, -0.1263285130262375, -0.1223965436220169, -0.11846457421779633, -0.11453260481357574, -0.11060062050819397, -0.10666865110397339, -0.10273667424917221, -0.09880470484495163, -0.09487272799015045, -0.09094075858592987, -0.08700878918170929, -0.08307681977748871, -0.07914484292268753, -0.07521287351846695, -0.07128089666366577, -0.06734892725944519, -0.06341695785522461, -0.05948498100042343, -0.05555301159620285, -0.05162103846669197, -0.047689057886600494, -0.043757084757089615, -0.039825111627578735, -0.035893142223358154, -0.031961169093847275, -0.028029195964336395, -0.024097224697470665, -0.020165253430604935, -0.016233280301094055, -0.01230130810290575, -0.008369335904717445, -0.0044373637065291405, -0.0005053915083408356, 0.003426581621170044, 0.007358552888035774, 0.011290524154901505, 0.015222497284412384, 0.019154470413923264, 0.023086441680788994, 0.027018412947654724, 0.030950386077165604, 0.03488235920667648, 0.038814328610897064, 0.042746301740407944, 0.04667827486991882, 0.0506102479994297, 0.05454222112894058, 0.05847419053316116, 0.06240616366267204, 0.06633813679218292, 0.0702701061964035, 0.07420207560062408, 0.07813405245542526]}, "gradients/decoder.transformer.h.18.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 2.0, 3.0, 0.0, 2.0, 4.0, 1.0, 2.0, 2.0, 3.0, 4.0, 6.0, 6.0, 16.0, 12.0, 11.0, 19.0, 16.0, 22.0, 17.0, 32.0, 27.0, 38.0, 24.0, 34.0, 28.0, 24.0, 40.0, 33.0, 29.0, 57.0, 21.0, 44.0, 30.0, 37.0, 31.0, 41.0, 25.0, 23.0, 29.0, 35.0, 22.0, 23.0, 18.0, 19.0, 19.0, 12.0, 10.0, 9.0, 9.0, 14.0, 6.0, 8.0, 4.0, 4.0, 5.0, 0.0, 2.0, 2.0, 1.0, 2.0, 2.0], "bins": [-0.020041584968566895, -0.019436698406934738, -0.018831809982657433, -0.018226921558380127, -0.01762203499674797, -0.017017148435115814, -0.01641226001083851, -0.015807371586561203, -0.015202485024929047, -0.014597597531974316, -0.013992710039019585, -0.013387822546064854, -0.012782935053110123, -0.012178047560155392, -0.01157316006720066, -0.01096827257424593, -0.010363385081291199, -0.009758497588336468, -0.009153610095381737, -0.008548722602427006, -0.007943835109472275, -0.007338947616517544, -0.006734060123562813, -0.006129172630608082, -0.005524285137653351, -0.00491939764469862, -0.004314510151743889, -0.003709622658789158, -0.003104735165834427, -0.002499847672879696, -0.001894960179924965, -0.001290072686970234, -0.0006851851940155029, -8.029770106077194e-05, 0.000524589791893959, 0.00112947728484869, 0.001734364777803421, 0.002339252270758152, 0.002944139763712883, 0.003549027256667614, 0.004153914749622345, 0.004758802242577076, 0.005363689735531807, 0.005968577228486538, 0.006573464721441269, 0.007178352214396, 0.007783239707350731, 0.008388127200305462, 0.008993014693260193, 0.009597902186214924, 0.010202789679169655, 0.010807677172124386, 0.011412564665079117, 0.012017452158033848, 0.012622339650988579, 0.01322722714394331, 0.01383211463689804, 0.014437002129852772, 0.015041889622807503, 0.01564677804708481, 0.016251664608716965, 0.01685655117034912, 0.017461439594626427, 0.018066328018903732, 0.01867121458053589]}, "gradients/decoder.transformer.h.18.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 4.0, 6.0, 6.0, 3.0, 2.0, 8.0, 11.0, 13.0, 16.0, 17.0, 20.0, 24.0, 29.0, 38.0, 30.0, 51.0, 29.0, 59.0, 28.0, 45.0, 35.0, 49.0, 49.0, 45.0, 38.0, 46.0, 39.0, 39.0, 34.0, 31.0, 26.0, 32.0, 24.0, 17.0, 16.0, 14.0, 12.0, 11.0, 5.0, 5.0, 4.0, 1.0, 4.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.109375, -3.990692138671875, -3.87200927734375, -3.753326416015625, -3.6346435546875, -3.515960693359375, -3.39727783203125, -3.278594970703125, -3.159912109375, -3.041229248046875, -2.92254638671875, -2.803863525390625, -2.6851806640625, -2.566497802734375, -2.44781494140625, -2.329132080078125, -2.21044921875, -2.091766357421875, -1.97308349609375, -1.854400634765625, -1.7357177734375, -1.617034912109375, -1.49835205078125, -1.379669189453125, -1.260986328125, -1.142303466796875, -1.02362060546875, -0.904937744140625, -0.7862548828125, -0.667572021484375, -0.54888916015625, -0.430206298828125, -0.3115234375, -0.192840576171875, -0.07415771484375, 0.044525146484375, 0.1632080078125, 0.281890869140625, 0.40057373046875, 0.519256591796875, 0.637939453125, 0.756622314453125, 0.87530517578125, 0.993988037109375, 1.1126708984375, 1.231353759765625, 1.35003662109375, 1.468719482421875, 1.58740234375, 1.706085205078125, 1.82476806640625, 1.943450927734375, 2.0621337890625, 2.180816650390625, 2.29949951171875, 2.418182373046875, 2.536865234375, 2.655548095703125, 2.77423095703125, 2.892913818359375, 3.0115966796875, 3.130279541015625, 3.24896240234375, 3.367645263671875, 3.486328125]}, "gradients/decoder.transformer.h.18.attn.c_proj.weight": {"_type": "histogram", "values": [5.0, 4.0, 4.0, 2.0, 2.0, 7.0, 9.0, 12.0, 20.0, 18.0, 35.0, 33.0, 70.0, 116.0, 216.0, 309.0, 559.0, 1004.0, 1774.0, 3213.0, 5946.0, 11908.0, 29521.0, 96961.0, 422286.0, 345591.0, 80525.0, 25385.0, 10731.0, 5502.0, 2897.0, 1635.0, 928.0, 537.0, 316.0, 167.0, 98.0, 62.0, 47.0, 36.0, 15.0, 17.0, 10.0, 4.0, 4.0, 10.0, 3.0, 7.0, 5.0, 1.0, 3.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.4140625, -4.23699951171875, -4.0599365234375, -3.88287353515625, -3.705810546875, -3.52874755859375, -3.3516845703125, -3.17462158203125, -2.99755859375, -2.82049560546875, -2.6434326171875, -2.46636962890625, -2.289306640625, -2.11224365234375, -1.9351806640625, -1.75811767578125, -1.5810546875, -1.40399169921875, -1.2269287109375, -1.04986572265625, -0.872802734375, -0.69573974609375, -0.5186767578125, -0.34161376953125, -0.16455078125, 0.01251220703125, 0.1895751953125, 0.36663818359375, 0.543701171875, 0.72076416015625, 0.8978271484375, 1.07489013671875, 1.251953125, 1.42901611328125, 1.6060791015625, 1.78314208984375, 1.960205078125, 2.13726806640625, 2.3143310546875, 2.49139404296875, 2.66845703125, 2.84552001953125, 3.0225830078125, 3.19964599609375, 3.376708984375, 3.55377197265625, 3.7308349609375, 3.90789794921875, 4.0849609375, 4.26202392578125, 4.4390869140625, 4.61614990234375, 4.793212890625, 4.97027587890625, 5.1473388671875, 5.32440185546875, 5.50146484375, 5.67852783203125, 5.8555908203125, 6.03265380859375, 6.209716796875, 6.38677978515625, 6.5638427734375, 6.74090576171875, 6.91796875]}, "gradients/decoder.transformer.h.18.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 3.0, 1.0, 5.0, 5.0, 5.0, 4.0, 5.0, 10.0, 3.0, 10.0, 13.0, 15.0, 16.0, 18.0, 23.0, 23.0, 39.0, 36.0, 36.0, 52.0, 55.0, 77.0, 119.0, 345.0, 1506.0, 173.0, 82.0, 56.0, 47.0, 39.0, 42.0, 26.0, 30.0, 17.0, 22.0, 25.0, 13.0, 17.0, 13.0, 8.0, 9.0, 6.0, 6.0, 1.0, 1.0, 2.0, 3.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-13.6796875, -13.267333984375, -12.85498046875, -12.442626953125, -12.0302734375, -11.617919921875, -11.20556640625, -10.793212890625, -10.380859375, -9.968505859375, -9.55615234375, -9.143798828125, -8.7314453125, -8.319091796875, -7.90673828125, -7.494384765625, -7.08203125, -6.669677734375, -6.25732421875, -5.844970703125, -5.4326171875, -5.020263671875, -4.60791015625, -4.195556640625, -3.783203125, -3.370849609375, -2.95849609375, -2.546142578125, -2.1337890625, -1.721435546875, -1.30908203125, -0.896728515625, -0.484375, -0.072021484375, 0.34033203125, 0.752685546875, 1.1650390625, 1.577392578125, 1.98974609375, 2.402099609375, 2.814453125, 3.226806640625, 3.63916015625, 4.051513671875, 4.4638671875, 4.876220703125, 5.28857421875, 5.700927734375, 6.11328125, 6.525634765625, 6.93798828125, 7.350341796875, 7.7626953125, 8.175048828125, 8.58740234375, 8.999755859375, 9.412109375, 9.824462890625, 10.23681640625, 10.649169921875, 11.0615234375, 11.473876953125, 11.88623046875, 12.298583984375, 12.7109375]}, "gradients/decoder.transformer.h.18.attn.c_attn.weight": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 1.0, 2.0, 3.0, 1.0, 2.0, 5.0, 8.0, 1.0, 3.0, 9.0, 11.0, 15.0, 15.0, 13.0, 28.0, 25.0, 34.0, 41.0, 62.0, 99.0, 172.0, 242.0, 453.0, 1574.0, 11774.0, 518526.0, 2583069.0, 25394.0, 2592.0, 630.0, 308.0, 177.0, 121.0, 70.0, 48.0, 34.0, 39.0, 24.0, 12.0, 25.0, 7.0, 9.0, 8.0, 10.0, 7.0, 2.0, 3.0, 1.0, 3.0, 2.0, 2.0, 2.0, 2.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-21.796875, -21.049072265625, -20.30126953125, -19.553466796875, -18.8056640625, -18.057861328125, -17.31005859375, -16.562255859375, -15.814453125, -15.066650390625, -14.31884765625, -13.571044921875, -12.8232421875, -12.075439453125, -11.32763671875, -10.579833984375, -9.83203125, -9.084228515625, -8.33642578125, -7.588623046875, -6.8408203125, -6.093017578125, -5.34521484375, -4.597412109375, -3.849609375, -3.101806640625, -2.35400390625, -1.606201171875, -0.8583984375, -0.110595703125, 0.63720703125, 1.385009765625, 2.1328125, 2.880615234375, 3.62841796875, 4.376220703125, 5.1240234375, 5.871826171875, 6.61962890625, 7.367431640625, 8.115234375, 8.863037109375, 9.61083984375, 10.358642578125, 11.1064453125, 11.854248046875, 12.60205078125, 13.349853515625, 14.09765625, 14.845458984375, 15.59326171875, 16.341064453125, 17.0888671875, 17.836669921875, 18.58447265625, 19.332275390625, 20.080078125, 20.827880859375, 21.57568359375, 22.323486328125, 23.0712890625, 23.819091796875, 24.56689453125, 25.314697265625, 26.0625]}, "gradients/decoder.transformer.h.18.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 4.0, 150.0, 546.0, 291.0, 24.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-158.98736572265625, -155.42532348632812, -151.86329650878906, -148.30125427246094, -144.73922729492188, -141.17718505859375, -137.61514282226562, -134.05311584472656, -130.49107360839844, -126.92903900146484, -123.36700439453125, -119.80496215820312, -116.24292755126953, -112.68089294433594, -109.11885833740234, -105.55682373046875, -101.99478912353516, -98.43275451660156, -94.87071990966797, -91.30868530273438, -87.74664306640625, -84.18460845947266, -80.62257385253906, -77.06053924560547, -73.49850463867188, -69.93647003173828, -66.37443542480469, -62.81239700317383, -59.25035858154297, -55.688323974609375, -52.12628936767578, -48.56425476074219, -45.00220489501953, -41.44017028808594, -37.87813186645508, -34.316097259521484, -30.754060745239258, -27.19202423095703, -23.629989624023438, -20.06795310974121, -16.505916595458984, -12.943880081176758, -9.381844520568848, -5.8198089599609375, -2.257772445678711, 1.3042640686035156, 4.866298675537109, 8.428335189819336, 11.990371704101562, 15.552408218383789, 19.114444732666016, 22.67647933959961, 26.238515853881836, 29.800552368164062, 33.362586975097656, 36.92462158203125, 40.48666000366211, 44.0486946105957, 47.61073303222656, 51.172767639160156, 54.73480224609375, 58.29684066772461, 61.8588752746582, 65.42091369628906, 68.98294830322266]}, "gradients/decoder.transformer.h.18.ln_1.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 4.0, 4.0, 3.0, 5.0, 9.0, 13.0, 12.0, 14.0, 16.0, 14.0, 30.0, 27.0, 24.0, 34.0, 32.0, 31.0, 48.0, 33.0, 51.0, 37.0, 50.0, 44.0, 52.0, 48.0, 49.0, 37.0, 30.0, 41.0, 26.0, 29.0, 35.0, 26.0, 30.0, 12.0, 11.0, 15.0, 6.0, 3.0, 4.0, 8.0, 3.0, 3.0, 7.0, 3.0, 0.0, 3.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-38.27981185913086, -36.99015808105469, -35.700504302978516, -34.41084671020508, -33.121192932128906, -31.831539154052734, -30.541885375976562, -29.25223159790039, -27.962575912475586, -26.672922134399414, -25.38326644897461, -24.093612670898438, -22.803958892822266, -21.51430320739746, -20.22464942932129, -18.934993743896484, -17.645339965820312, -16.35568618774414, -15.066030502319336, -13.776376724243164, -12.486721992492676, -11.197067260742188, -9.907413482666016, -8.617758750915527, -7.328104019165039, -6.038449287414551, -4.748795032501221, -3.4591405391693115, -2.1694860458374023, -0.8798313140869141, 0.409822940826416, 1.699477195739746, 2.9891357421875, 4.278790473937988, 5.568444728851318, 6.858098983764648, 8.147753715515137, 9.437408447265625, 10.727062225341797, 12.016716957092285, 13.306371688842773, 14.596026420593262, 15.88568115234375, 17.175334930419922, 18.464988708496094, 19.7546443939209, 21.04429817199707, 22.333953857421875, 23.623607635498047, 24.91326141357422, 26.202917098999023, 27.492570877075195, 28.7822265625, 30.071880340576172, 31.361534118652344, 32.651187896728516, 33.94084167480469, 35.23049545288086, 36.52014923095703, 37.80980682373047, 39.09946060180664, 40.38911437988281, 41.678768157958984, 42.968421936035156, 44.258079528808594]}, "gradients/decoder.transformer.h.17.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 4.0, 3.0, 10.0, 4.0, 3.0, 5.0, 12.0, 9.0, 21.0, 21.0, 20.0, 34.0, 29.0, 32.0, 40.0, 46.0, 51.0, 44.0, 34.0, 45.0, 48.0, 50.0, 54.0, 45.0, 50.0, 42.0, 38.0, 29.0, 31.0, 34.0, 25.0, 19.0, 18.0, 19.0, 16.0, 11.0, 4.0, 5.0, 1.0, 4.0, 4.0, 3.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.60546875, -4.472930908203125, -4.34039306640625, -4.207855224609375, -4.0753173828125, -3.942779541015625, -3.81024169921875, -3.677703857421875, -3.545166015625, -3.412628173828125, -3.28009033203125, -3.147552490234375, -3.0150146484375, -2.882476806640625, -2.74993896484375, -2.617401123046875, -2.48486328125, -2.352325439453125, -2.21978759765625, -2.087249755859375, -1.9547119140625, -1.822174072265625, -1.68963623046875, -1.557098388671875, -1.424560546875, -1.292022705078125, -1.15948486328125, -1.026947021484375, -0.8944091796875, -0.761871337890625, -0.62933349609375, -0.496795654296875, -0.3642578125, -0.231719970703125, -0.09918212890625, 0.033355712890625, 0.1658935546875, 0.298431396484375, 0.43096923828125, 0.563507080078125, 0.696044921875, 0.828582763671875, 0.96112060546875, 1.093658447265625, 1.2261962890625, 1.358734130859375, 1.49127197265625, 1.623809814453125, 1.75634765625, 1.888885498046875, 2.02142333984375, 2.153961181640625, 2.2864990234375, 2.419036865234375, 2.55157470703125, 2.684112548828125, 2.816650390625, 2.949188232421875, 3.08172607421875, 3.214263916015625, 3.3468017578125, 3.479339599609375, 3.61187744140625, 3.744415283203125, 3.876953125]}, "gradients/decoder.transformer.h.17.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 3.0, 3.0, 3.0, 6.0, 5.0, 11.0, 14.0, 19.0, 21.0, 47.0, 55.0, 70.0, 116.0, 152.0, 235.0, 400.0, 617.0, 1108.0, 1800.0, 3199.0, 6392.0, 13116.0, 28933.0, 71982.0, 199370.0, 583041.0, 1313220.0, 1200145.0, 491128.0, 167409.0, 61794.0, 25496.0, 11599.0, 5566.0, 2979.0, 1645.0, 978.0, 604.0, 360.0, 210.0, 149.0, 103.0, 75.0, 50.0, 20.0, 21.0, 11.0, 11.0, 6.0, 1.0, 0.0, 1.0, 1.0, 0.0, 2.0], "bins": [-5.09375, -4.951934814453125, -4.81011962890625, -4.668304443359375, -4.5264892578125, -4.384674072265625, -4.24285888671875, -4.101043701171875, -3.959228515625, -3.817413330078125, -3.67559814453125, -3.533782958984375, -3.3919677734375, -3.250152587890625, -3.10833740234375, -2.966522216796875, -2.82470703125, -2.682891845703125, -2.54107666015625, -2.399261474609375, -2.2574462890625, -2.115631103515625, -1.97381591796875, -1.832000732421875, -1.690185546875, -1.548370361328125, -1.40655517578125, -1.264739990234375, -1.1229248046875, -0.981109619140625, -0.83929443359375, -0.697479248046875, -0.5556640625, -0.413848876953125, -0.27203369140625, -0.130218505859375, 0.0115966796875, 0.153411865234375, 0.29522705078125, 0.437042236328125, 0.578857421875, 0.720672607421875, 0.86248779296875, 1.004302978515625, 1.1461181640625, 1.287933349609375, 1.42974853515625, 1.571563720703125, 1.71337890625, 1.855194091796875, 1.99700927734375, 2.138824462890625, 2.2806396484375, 2.422454833984375, 2.56427001953125, 2.706085205078125, 2.847900390625, 2.989715576171875, 3.13153076171875, 3.273345947265625, 3.4151611328125, 3.556976318359375, 3.69879150390625, 3.840606689453125, 3.982421875]}, "gradients/decoder.transformer.h.17.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 2.0, 0.0, 2.0, 3.0, 3.0, 1.0, 6.0, 1.0, 7.0, 8.0, 13.0, 11.0, 19.0, 24.0, 30.0, 37.0, 60.0, 64.0, 99.0, 145.0, 186.0, 306.0, 334.0, 482.0, 460.0, 429.0, 327.0, 282.0, 175.0, 154.0, 106.0, 96.0, 60.0, 52.0, 39.0, 21.0, 13.0, 10.0, 5.0, 3.0, 2.0, 4.0, 4.0, 0.0, 2.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-11.015625, -10.721435546875, -10.42724609375, -10.133056640625, -9.8388671875, -9.544677734375, -9.25048828125, -8.956298828125, -8.662109375, -8.367919921875, -8.07373046875, -7.779541015625, -7.4853515625, -7.191162109375, -6.89697265625, -6.602783203125, -6.30859375, -6.014404296875, -5.72021484375, -5.426025390625, -5.1318359375, -4.837646484375, -4.54345703125, -4.249267578125, -3.955078125, -3.660888671875, -3.36669921875, -3.072509765625, -2.7783203125, -2.484130859375, -2.18994140625, -1.895751953125, -1.6015625, -1.307373046875, -1.01318359375, -0.718994140625, -0.4248046875, -0.130615234375, 0.16357421875, 0.457763671875, 0.751953125, 1.046142578125, 1.34033203125, 1.634521484375, 1.9287109375, 2.222900390625, 2.51708984375, 2.811279296875, 3.10546875, 3.399658203125, 3.69384765625, 3.988037109375, 4.2822265625, 4.576416015625, 4.87060546875, 5.164794921875, 5.458984375, 5.753173828125, 6.04736328125, 6.341552734375, 6.6357421875, 6.929931640625, 7.22412109375, 7.518310546875, 7.8125]}, "gradients/decoder.transformer.h.17.mlp.c_fc.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 2.0, 4.0, 8.0, 5.0, 14.0, 15.0, 16.0, 15.0, 46.0, 64.0, 125.0, 192.0, 308.0, 660.0, 1270.0, 2531.0, 6031.0, 18098.0, 67356.0, 315387.0, 1541259.0, 1752836.0, 376725.0, 78225.0, 20682.0, 6984.0, 2715.0, 1247.0, 612.0, 299.0, 228.0, 106.0, 76.0, 40.0, 36.0, 23.0, 18.0, 9.0, 8.0, 5.0, 4.0, 4.0, 1.0, 2.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-8.890625, -8.5357666015625, -8.180908203125, -7.8260498046875, -7.47119140625, -7.1163330078125, -6.761474609375, -6.4066162109375, -6.0517578125, -5.6968994140625, -5.342041015625, -4.9871826171875, -4.63232421875, -4.2774658203125, -3.922607421875, -3.5677490234375, -3.212890625, -2.8580322265625, -2.503173828125, -2.1483154296875, -1.79345703125, -1.4385986328125, -1.083740234375, -0.7288818359375, -0.3740234375, -0.0191650390625, 0.335693359375, 0.6905517578125, 1.04541015625, 1.4002685546875, 1.755126953125, 2.1099853515625, 2.46484375, 2.8197021484375, 3.174560546875, 3.5294189453125, 3.88427734375, 4.2391357421875, 4.593994140625, 4.9488525390625, 5.3037109375, 5.6585693359375, 6.013427734375, 6.3682861328125, 6.72314453125, 7.0780029296875, 7.432861328125, 7.7877197265625, 8.142578125, 8.4974365234375, 8.852294921875, 9.2071533203125, 9.56201171875, 9.9168701171875, 10.271728515625, 10.6265869140625, 10.9814453125, 11.3363037109375, 11.691162109375, 12.0460205078125, 12.40087890625, 12.7557373046875, 13.110595703125, 13.4654541015625, 13.8203125]}, "gradients/decoder.transformer.h.17.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 7.0, 16.0, 68.0, 180.0, 332.0, 265.0, 119.0, 24.0, 3.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-234.86798095703125, -230.33230590820312, -225.79661560058594, -221.2609405517578, -216.7252655029297, -212.1895751953125, -207.65390014648438, -203.11822509765625, -198.58253479003906, -194.04685974121094, -189.51116943359375, -184.97549438476562, -180.4398193359375, -175.9041290283203, -171.3684539794922, -166.83277893066406, -162.29708862304688, -157.76141357421875, -153.22572326660156, -148.69004821777344, -144.1543731689453, -139.61868286132812, -135.0830078125, -130.54733276367188, -126.01165771484375, -121.4759750366211, -116.94029998779297, -112.40461730957031, -107.86893463134766, -103.333251953125, -98.79757690429688, -94.26189422607422, -89.72621154785156, -85.1905288696289, -80.65485382080078, -76.11917114257812, -71.58348846435547, -67.04780578613281, -62.51213073730469, -57.97644805908203, -53.440765380859375, -48.905086517333984, -44.36940383911133, -39.83372497558594, -35.29804229736328, -30.76236343383789, -26.2266845703125, -21.691003799438477, -17.155323028564453, -12.61964225769043, -8.083962440490723, -3.5482826232910156, 0.9873981475830078, 5.523078918457031, 10.058757781982422, 14.594438552856445, 19.13011932373047, 23.665800094604492, 28.201480865478516, 32.737159729003906, 37.27284240722656, 41.80852127075195, 46.344200134277344, 50.8798828125, 55.41556167602539]}, "gradients/decoder.transformer.h.17.ln_2.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 2.0, 1.0, 1.0, 4.0, 4.0, 4.0, 4.0, 2.0, 6.0, 5.0, 4.0, 11.0, 11.0, 10.0, 12.0, 14.0, 19.0, 20.0, 17.0, 19.0, 24.0, 29.0, 32.0, 25.0, 28.0, 42.0, 33.0, 26.0, 32.0, 30.0, 48.0, 41.0, 48.0, 32.0, 30.0, 38.0, 27.0, 21.0, 31.0, 30.0, 20.0, 25.0, 19.0, 11.0, 12.0, 13.0, 15.0, 17.0, 18.0, 15.0, 3.0, 6.0, 6.0, 13.0, 2.0, 3.0, 1.0, 3.0, 1.0, 0.0, 1.0], "bins": [-27.68250846862793, -26.85538101196289, -26.02825164794922, -25.201122283935547, -24.373994827270508, -23.54686737060547, -22.719738006591797, -21.892608642578125, -21.065481185913086, -20.238353729248047, -19.411224365234375, -18.584095001220703, -17.756967544555664, -16.929840087890625, -16.102710723876953, -15.275582313537598, -14.448453903198242, -13.621325492858887, -12.794197082519531, -11.967068672180176, -11.13994026184082, -10.312811851501465, -9.48568344116211, -8.658555030822754, -7.831426620483398, -7.004298210144043, -6.1771697998046875, -5.350041389465332, -4.522912979125977, -3.695784568786621, -2.8686561584472656, -2.04152774810791, -1.2143993377685547, -0.3872709274291992, 0.43985748291015625, 1.2669858932495117, 2.094114303588867, 2.9212427139282227, 3.748371124267578, 4.575499534606934, 5.402627944946289, 6.2297563552856445, 7.056884765625, 7.8840131759643555, 8.711141586303711, 9.538269996643066, 10.365398406982422, 11.192526817321777, 12.019655227661133, 12.846783638000488, 13.673912048339844, 14.5010404586792, 15.328168869018555, 16.155296325683594, 16.982425689697266, 17.809555053710938, 18.636682510375977, 19.463809967041016, 20.290939331054688, 21.11806869506836, 21.9451961517334, 22.772323608398438, 23.59945297241211, 24.42658233642578, 25.25370979309082]}, "gradients/decoder.transformer.h.17.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 4.0, 4.0, 8.0, 4.0, 5.0, 12.0, 13.0, 11.0, 10.0, 22.0, 24.0, 18.0, 25.0, 26.0, 40.0, 37.0, 35.0, 42.0, 40.0, 50.0, 41.0, 44.0, 45.0, 49.0, 50.0, 46.0, 38.0, 43.0, 35.0, 26.0, 28.0, 27.0, 24.0, 13.0, 17.0, 16.0, 12.0, 7.0, 9.0, 5.0, 0.0, 2.0, 2.0, 1.0, 5.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.05859375, -3.93017578125, -3.8017578125, -3.67333984375, -3.544921875, -3.41650390625, -3.2880859375, -3.15966796875, -3.03125, -2.90283203125, -2.7744140625, -2.64599609375, -2.517578125, -2.38916015625, -2.2607421875, -2.13232421875, -2.00390625, -1.87548828125, -1.7470703125, -1.61865234375, -1.490234375, -1.36181640625, -1.2333984375, -1.10498046875, -0.9765625, -0.84814453125, -0.7197265625, -0.59130859375, -0.462890625, -0.33447265625, -0.2060546875, -0.07763671875, 0.05078125, 0.17919921875, 0.3076171875, 0.43603515625, 0.564453125, 0.69287109375, 0.8212890625, 0.94970703125, 1.078125, 1.20654296875, 1.3349609375, 1.46337890625, 1.591796875, 1.72021484375, 1.8486328125, 1.97705078125, 2.10546875, 2.23388671875, 2.3623046875, 2.49072265625, 2.619140625, 2.74755859375, 2.8759765625, 3.00439453125, 3.1328125, 3.26123046875, 3.3896484375, 3.51806640625, 3.646484375, 3.77490234375, 3.9033203125, 4.03173828125, 4.16015625]}, "gradients/decoder.transformer.h.17.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 0.0, 0.0, 2.0, 2.0, 3.0, 3.0, 4.0, 5.0, 12.0, 12.0, 24.0, 38.0, 55.0, 88.0, 158.0, 235.0, 371.0, 679.0, 1164.0, 1899.0, 3308.0, 5848.0, 10249.0, 18242.0, 32284.0, 58922.0, 101150.0, 160055.0, 204432.0, 175718.0, 116654.0, 68767.0, 38539.0, 21671.0, 11909.0, 6712.0, 3766.0, 2261.0, 1307.0, 796.0, 482.0, 282.0, 172.0, 109.0, 65.0, 47.0, 19.0, 26.0, 7.0, 6.0, 3.0, 4.0, 2.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.64404296875, -0.623687744140625, -0.60333251953125, -0.582977294921875, -0.5626220703125, -0.542266845703125, -0.52191162109375, -0.501556396484375, -0.481201171875, -0.460845947265625, -0.44049072265625, -0.420135498046875, -0.3997802734375, -0.379425048828125, -0.35906982421875, -0.338714599609375, -0.318359375, -0.298004150390625, -0.27764892578125, -0.257293701171875, -0.2369384765625, -0.216583251953125, -0.19622802734375, -0.175872802734375, -0.155517578125, -0.135162353515625, -0.11480712890625, -0.094451904296875, -0.0740966796875, -0.053741455078125, -0.03338623046875, -0.013031005859375, 0.00732421875, 0.027679443359375, 0.04803466796875, 0.068389892578125, 0.0887451171875, 0.109100341796875, 0.12945556640625, 0.149810791015625, 0.170166015625, 0.190521240234375, 0.21087646484375, 0.231231689453125, 0.2515869140625, 0.271942138671875, 0.29229736328125, 0.312652587890625, 0.3330078125, 0.353363037109375, 0.37371826171875, 0.394073486328125, 0.4144287109375, 0.434783935546875, 0.45513916015625, 0.475494384765625, 0.495849609375, 0.516204833984375, 0.53656005859375, 0.556915283203125, 0.5772705078125, 0.597625732421875, 0.61798095703125, 0.638336181640625, 0.65869140625]}, "gradients/decoder.transformer.h.17.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 2.0, 2.0, 0.0, 1.0, 3.0, 6.0, 2.0, 9.0, 6.0, 9.0, 15.0, 15.0, 14.0, 14.0, 13.0, 23.0, 32.0, 31.0, 40.0, 25.0, 46.0, 43.0, 42.0, 40.0, 43.0, 38.0, 1065.0, 31.0, 47.0, 30.0, 30.0, 33.0, 51.0, 30.0, 26.0, 28.0, 18.0, 16.0, 20.0, 13.0, 15.0, 16.0, 6.0, 8.0, 6.0, 10.0, 11.0, 8.0, 4.0, 4.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0], "bins": [-2.4375, -2.35711669921875, -2.2767333984375, -2.19635009765625, -2.115966796875, -2.03558349609375, -1.9552001953125, -1.87481689453125, -1.79443359375, -1.71405029296875, -1.6336669921875, -1.55328369140625, -1.472900390625, -1.39251708984375, -1.3121337890625, -1.23175048828125, -1.1513671875, -1.07098388671875, -0.9906005859375, -0.91021728515625, -0.829833984375, -0.74945068359375, -0.6690673828125, -0.58868408203125, -0.50830078125, -0.42791748046875, -0.3475341796875, -0.26715087890625, -0.186767578125, -0.10638427734375, -0.0260009765625, 0.05438232421875, 0.134765625, 0.21514892578125, 0.2955322265625, 0.37591552734375, 0.456298828125, 0.53668212890625, 0.6170654296875, 0.69744873046875, 0.77783203125, 0.85821533203125, 0.9385986328125, 1.01898193359375, 1.099365234375, 1.17974853515625, 1.2601318359375, 1.34051513671875, 1.4208984375, 1.50128173828125, 1.5816650390625, 1.66204833984375, 1.742431640625, 1.82281494140625, 1.9031982421875, 1.98358154296875, 2.06396484375, 2.14434814453125, 2.2247314453125, 2.30511474609375, 2.385498046875, 2.46588134765625, 2.5462646484375, 2.62664794921875, 2.70703125]}, "gradients/decoder.transformer.h.17.crossattention.c_attn.weight": {"_type": "histogram", "values": [3.0, 1.0, 3.0, 2.0, 4.0, 4.0, 10.0, 16.0, 23.0, 31.0, 40.0, 88.0, 107.0, 133.0, 237.0, 342.0, 541.0, 818.0, 1220.0, 2018.0, 2935.0, 4646.0, 7069.0, 10953.0, 16762.0, 26959.0, 43398.0, 68646.0, 106013.0, 152190.0, 1210668.0, 150465.0, 105201.0, 68082.0, 42899.0, 26947.0, 16825.0, 10747.0, 7087.0, 4537.0, 3019.0, 1937.0, 1206.0, 788.0, 513.0, 354.0, 229.0, 143.0, 115.0, 59.0, 32.0, 25.0, 19.0, 15.0, 10.0, 4.0, 4.0, 6.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.318603515625, -0.3081550598144531, -0.29770660400390625, -0.2872581481933594, -0.2768096923828125, -0.2663612365722656, -0.25591278076171875, -0.24546432495117188, -0.235015869140625, -0.22456741333007812, -0.21411895751953125, -0.20367050170898438, -0.1932220458984375, -0.18277359008789062, -0.17232513427734375, -0.16187667846679688, -0.15142822265625, -0.14097976684570312, -0.13053131103515625, -0.12008285522460938, -0.1096343994140625, -0.09918594360351562, -0.08873748779296875, -0.07828903198242188, -0.067840576171875, -0.057392120361328125, -0.04694366455078125, -0.036495208740234375, -0.0260467529296875, -0.015598297119140625, -0.00514984130859375, 0.005298614501953125, 0.0157470703125, 0.026195526123046875, 0.03664398193359375, 0.047092437744140625, 0.0575408935546875, 0.06798934936523438, 0.07843780517578125, 0.08888626098632812, 0.099334716796875, 0.10978317260742188, 0.12023162841796875, 0.13068008422851562, 0.1411285400390625, 0.15157699584960938, 0.16202545166015625, 0.17247390747070312, 0.18292236328125, 0.19337081909179688, 0.20381927490234375, 0.21426773071289062, 0.2247161865234375, 0.23516464233398438, 0.24561309814453125, 0.2560615539550781, 0.266510009765625, 0.2769584655761719, 0.28740692138671875, 0.2978553771972656, 0.3083038330078125, 0.3187522888183594, 0.32920074462890625, 0.3396492004394531, 0.35009765625]}, "gradients/decoder.transformer.h.17.crossattention.q_attn.bias": {"_type": "histogram", "values": [2.0, 2.0, 2.0, 2.0, 1.0, 1.0, 4.0, 1.0, 6.0, 5.0, 4.0, 3.0, 7.0, 14.0, 7.0, 12.0, 20.0, 11.0, 15.0, 16.0, 17.0, 21.0, 28.0, 33.0, 40.0, 43.0, 48.0, 54.0, 64.0, 64.0, 62.0, 50.0, 36.0, 42.0, 38.0, 39.0, 27.0, 15.0, 23.0, 23.0, 19.0, 16.0, 15.0, 13.0, 10.0, 7.0, 6.0, 5.0, 4.0, 3.0, 4.0, 7.0, 1.0, 2.0, 2.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 2.0, 1.0], "bins": [-0.01137542724609375, -0.010986566543579102, -0.010597705841064453, -0.010208845138549805, -0.009819984436035156, -0.009431123733520508, -0.00904226303100586, -0.008653402328491211, -0.008264541625976562, -0.007875680923461914, -0.007486820220947266, -0.007097959518432617, -0.006709098815917969, -0.00632023811340332, -0.005931377410888672, -0.0055425167083740234, -0.005153656005859375, -0.0047647953033447266, -0.004375934600830078, -0.00398707389831543, -0.0035982131958007812, -0.003209352493286133, -0.0028204917907714844, -0.002431631088256836, -0.0020427703857421875, -0.001653909683227539, -0.0012650489807128906, -0.0008761882781982422, -0.00048732757568359375, -9.846687316894531e-05, 0.0002903938293457031, 0.0006792545318603516, 0.001068115234375, 0.0014569759368896484, 0.0018458366394042969, 0.0022346973419189453, 0.0026235580444335938, 0.003012418746948242, 0.0034012794494628906, 0.003790140151977539, 0.0041790008544921875, 0.004567861557006836, 0.004956722259521484, 0.005345582962036133, 0.005734443664550781, 0.00612330436706543, 0.006512165069580078, 0.0069010257720947266, 0.007289886474609375, 0.0076787471771240234, 0.008067607879638672, 0.00845646858215332, 0.008845329284667969, 0.009234189987182617, 0.009623050689697266, 0.010011911392211914, 0.010400772094726562, 0.010789632797241211, 0.01117849349975586, 0.011567354202270508, 0.011956214904785156, 0.012345075607299805, 0.012733936309814453, 0.013122797012329102, 0.01351165771484375]}, "gradients/decoder.transformer.h.17.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 4.0, 6.0, 3.0, 6.0, 4.0, 5.0, 5.0, 8.0, 16.0, 16.0, 28.0, 22.0, 38.0, 48.0, 49.0, 71.0, 124.0, 166.0, 245.0, 293.0, 521.0, 2312.0, 146329.0, 889387.0, 6799.0, 703.0, 407.0, 249.0, 200.0, 135.0, 86.0, 57.0, 48.0, 25.0, 35.0, 26.0, 12.0, 16.0, 18.0, 4.0, 8.0, 5.0, 4.0, 6.0, 4.0, 1.0, 2.0, 0.0, 3.0, 0.0, 3.0, 1.0, 2.0], "bins": [-0.2261962890625, -0.2195758819580078, -0.21295547485351562, -0.20633506774902344, -0.19971466064453125, -0.19309425354003906, -0.18647384643554688, -0.1798534393310547, -0.1732330322265625, -0.1666126251220703, -0.15999221801757812, -0.15337181091308594, -0.14675140380859375, -0.14013099670410156, -0.13351058959960938, -0.1268901824951172, -0.120269775390625, -0.11364936828613281, -0.10702896118164062, -0.10040855407714844, -0.09378814697265625, -0.08716773986816406, -0.08054733276367188, -0.07392692565917969, -0.0673065185546875, -0.06068611145019531, -0.054065704345703125, -0.04744529724121094, -0.04082489013671875, -0.03420448303222656, -0.027584075927734375, -0.020963668823242188, -0.01434326171875, -0.0077228546142578125, -0.001102447509765625, 0.0055179595947265625, 0.01213836669921875, 0.018758773803710938, 0.025379180908203125, 0.03199958801269531, 0.0386199951171875, 0.04524040222167969, 0.051860809326171875, 0.05848121643066406, 0.06510162353515625, 0.07172203063964844, 0.07834243774414062, 0.08496284484863281, 0.091583251953125, 0.09820365905761719, 0.10482406616210938, 0.11144447326660156, 0.11806488037109375, 0.12468528747558594, 0.13130569458007812, 0.1379261016845703, 0.1445465087890625, 0.1511669158935547, 0.15778732299804688, 0.16440773010253906, 0.17102813720703125, 0.17764854431152344, 0.18426895141601562, 0.1908893585205078, 0.197509765625]}, "gradients/decoder.transformer.h.17.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 6.0, 53.0, 731.0, 217.0, 8.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.12403152137994766, -0.11883340030908585, -0.11363527923822403, -0.10843715816736221, -0.1032390370965004, -0.09804091602563858, -0.09284279495477676, -0.08764467388391495, -0.08244655281305313, -0.07724843174219131, -0.0720503106713295, -0.06685218960046768, -0.061654068529605865, -0.05645594745874405, -0.05125782638788223, -0.046059705317020416, -0.0408615842461586, -0.03566346317529678, -0.030465342104434967, -0.02526722103357315, -0.020069099962711334, -0.014870978891849518, -0.009672857820987701, -0.004474736750125885, 0.0007233843207359314, 0.005921505391597748, 0.011119626462459564, 0.01631774753332138, 0.021515868604183197, 0.026713989675045013, 0.03191211074590683, 0.037110231816768646, 0.042308345437049866, 0.04750646650791168, 0.0527045875787735, 0.057902708649635315, 0.06310082972049713, 0.06829895079135895, 0.07349707186222076, 0.07869519293308258, 0.0838933140039444, 0.08909143507480621, 0.09428955614566803, 0.09948767721652985, 0.10468579828739166, 0.10988391935825348, 0.1150820404291153, 0.12028016149997711, 0.12547828257083893, 0.13067640364170074, 0.13587452471256256, 0.14107264578342438, 0.1462707668542862, 0.151468887925148, 0.15666700899600983, 0.16186513006687164, 0.16706325113773346, 0.17226137220859528, 0.1774594932794571, 0.1826576143503189, 0.18785573542118073, 0.19305385649204254, 0.19825197756290436, 0.20345009863376617, 0.208648219704628]}, "gradients/decoder.transformer.h.17.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 2.0, 2.0, 3.0, 5.0, 2.0, 9.0, 6.0, 10.0, 10.0, 10.0, 13.0, 15.0, 17.0, 28.0, 23.0, 29.0, 29.0, 21.0, 31.0, 29.0, 37.0, 45.0, 37.0, 38.0, 46.0, 39.0, 39.0, 39.0, 34.0, 34.0, 47.0, 33.0, 29.0, 30.0, 30.0, 28.0, 18.0, 17.0, 17.0, 11.0, 12.0, 16.0, 8.0, 7.0, 6.0, 6.0, 5.0, 3.0, 5.0, 3.0, 1.0, 3.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.018207669258117676, -0.017568517476320267, -0.01692936383187771, -0.01629021018743515, -0.01565105840563774, -0.015011905692517757, -0.014372752979397774, -0.01373360026627779, -0.013094447553157806, -0.012455294840037823, -0.011816142126917839, -0.011176989413797855, -0.010537836700677872, -0.009898683987557888, -0.009259531274437904, -0.00862037856131792, -0.007981225848197937, -0.007342073135077953, -0.00670292042195797, -0.006063767708837986, -0.005424614995718002, -0.004785462282598019, -0.004146309569478035, -0.0035071568563580513, -0.0028680041432380676, -0.002228851430118084, -0.0015896987169981003, -0.0009505460038781166, -0.00031139329075813293, 0.00032775942236185074, 0.0009669121354818344, 0.001606064848601818, 0.0022452175617218018, 0.0028843702748417854, 0.003523522987961769, 0.004162675701081753, 0.0048018284142017365, 0.00544098112732172, 0.006080133840441704, 0.0067192865535616875, 0.007358439266681671, 0.007997591979801655, 0.008636744692921638, 0.009275897406041622, 0.009915050119161606, 0.01055420283228159, 0.011193355545401573, 0.011832508258521557, 0.01247166097164154, 0.013110813684761524, 0.013749966397881508, 0.014389119111001492, 0.015028271824121475, 0.015667423605918884, 0.016306577250361443, 0.016945730894804, 0.01758488267660141, 0.01822403445839882, 0.018863188102841377, 0.019502341747283936, 0.020141493529081345, 0.020780645310878754, 0.021419798955321312, 0.02205895259976387, 0.02269810438156128]}, "gradients/decoder.transformer.h.17.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 3.0, 4.0, 8.0, 4.0, 6.0, 11.0, 13.0, 11.0, 10.0, 22.0, 24.0, 18.0, 25.0, 26.0, 40.0, 38.0, 34.0, 42.0, 41.0, 49.0, 41.0, 45.0, 44.0, 49.0, 50.0, 46.0, 38.0, 43.0, 35.0, 26.0, 28.0, 27.0, 24.0, 13.0, 17.0, 16.0, 12.0, 7.0, 9.0, 5.0, 0.0, 2.0, 2.0, 1.0, 5.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.05859375, -3.93017578125, -3.8017578125, -3.67333984375, -3.544921875, -3.41650390625, -3.2880859375, -3.15966796875, -3.03125, -2.90283203125, -2.7744140625, -2.64599609375, -2.517578125, -2.38916015625, -2.2607421875, -2.13232421875, -2.00390625, -1.87548828125, -1.7470703125, -1.61865234375, -1.490234375, -1.36181640625, -1.2333984375, -1.10498046875, -0.9765625, -0.84814453125, -0.7197265625, -0.59130859375, -0.462890625, -0.33447265625, -0.2060546875, -0.07763671875, 0.05078125, 0.17919921875, 0.3076171875, 0.43603515625, 0.564453125, 0.69287109375, 0.8212890625, 0.94970703125, 1.078125, 1.20654296875, 1.3349609375, 1.46337890625, 1.591796875, 1.72021484375, 1.8486328125, 1.97705078125, 2.10546875, 2.23388671875, 2.3623046875, 2.49072265625, 2.619140625, 2.74755859375, 2.8759765625, 3.00439453125, 3.1328125, 3.26123046875, 3.3896484375, 3.51806640625, 3.646484375, 3.77490234375, 3.9033203125, 4.03173828125, 4.16015625]}, "gradients/decoder.transformer.h.17.attn.c_proj.weight": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 1.0, 1.0, 3.0, 5.0, 5.0, 7.0, 7.0, 12.0, 21.0, 20.0, 37.0, 46.0, 80.0, 91.0, 90.0, 166.0, 208.0, 317.0, 476.0, 861.0, 1349.0, 2166.0, 3700.0, 6059.0, 10333.0, 18444.0, 39120.0, 102728.0, 298585.0, 343346.0, 125310.0, 45523.0, 20865.0, 11462.0, 6543.0, 4001.0, 2402.0, 1461.0, 931.0, 599.0, 373.0, 248.0, 178.0, 114.0, 72.0, 55.0, 43.0, 35.0, 12.0, 14.0, 15.0, 6.0, 9.0, 4.0, 4.0, 2.0, 2.0, 2.0, 2.0, 0.0, 1.0], "bins": [-4.171875, -4.0418701171875, -3.911865234375, -3.7818603515625, -3.65185546875, -3.5218505859375, -3.391845703125, -3.2618408203125, -3.1318359375, -3.0018310546875, -2.871826171875, -2.7418212890625, -2.61181640625, -2.4818115234375, -2.351806640625, -2.2218017578125, -2.091796875, -1.9617919921875, -1.831787109375, -1.7017822265625, -1.57177734375, -1.4417724609375, -1.311767578125, -1.1817626953125, -1.0517578125, -0.9217529296875, -0.791748046875, -0.6617431640625, -0.53173828125, -0.4017333984375, -0.271728515625, -0.1417236328125, -0.01171875, 0.1182861328125, 0.248291015625, 0.3782958984375, 0.50830078125, 0.6383056640625, 0.768310546875, 0.8983154296875, 1.0283203125, 1.1583251953125, 1.288330078125, 1.4183349609375, 1.54833984375, 1.6783447265625, 1.808349609375, 1.9383544921875, 2.068359375, 2.1983642578125, 2.328369140625, 2.4583740234375, 2.58837890625, 2.7183837890625, 2.848388671875, 2.9783935546875, 3.1083984375, 3.2384033203125, 3.368408203125, 3.4984130859375, 3.62841796875, 3.7584228515625, 3.888427734375, 4.0184326171875, 4.1484375]}, "gradients/decoder.transformer.h.17.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 1.0, 1.0, 2.0, 1.0, 2.0, 4.0, 1.0, 4.0, 11.0, 8.0, 12.0, 14.0, 19.0, 19.0, 31.0, 27.0, 32.0, 41.0, 47.0, 61.0, 63.0, 110.0, 239.0, 1526.0, 257.0, 101.0, 69.0, 50.0, 47.0, 47.0, 31.0, 40.0, 30.0, 24.0, 14.0, 13.0, 13.0, 14.0, 13.0, 5.0, 7.0, 2.0, 4.0, 4.0, 2.0, 1.0, 1.0, 1.0, 0.0, 1.0], "bins": [-18.234375, -17.74853515625, -17.2626953125, -16.77685546875, -16.291015625, -15.80517578125, -15.3193359375, -14.83349609375, -14.34765625, -13.86181640625, -13.3759765625, -12.89013671875, -12.404296875, -11.91845703125, -11.4326171875, -10.94677734375, -10.4609375, -9.97509765625, -9.4892578125, -9.00341796875, -8.517578125, -8.03173828125, -7.5458984375, -7.06005859375, -6.57421875, -6.08837890625, -5.6025390625, -5.11669921875, -4.630859375, -4.14501953125, -3.6591796875, -3.17333984375, -2.6875, -2.20166015625, -1.7158203125, -1.22998046875, -0.744140625, -0.25830078125, 0.2275390625, 0.71337890625, 1.19921875, 1.68505859375, 2.1708984375, 2.65673828125, 3.142578125, 3.62841796875, 4.1142578125, 4.60009765625, 5.0859375, 5.57177734375, 6.0576171875, 6.54345703125, 7.029296875, 7.51513671875, 8.0009765625, 8.48681640625, 8.97265625, 9.45849609375, 9.9443359375, 10.43017578125, 10.916015625, 11.40185546875, 11.8876953125, 12.37353515625, 12.859375]}, "gradients/decoder.transformer.h.17.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 1.0, 0.0, 1.0, 2.0, 7.0, 4.0, 6.0, 7.0, 6.0, 9.0, 17.0, 21.0, 25.0, 30.0, 52.0, 62.0, 106.0, 175.0, 259.0, 526.0, 2055.0, 56300.0, 3051405.0, 31826.0, 1630.0, 478.0, 247.0, 139.0, 81.0, 65.0, 63.0, 25.0, 21.0, 12.0, 11.0, 9.0, 6.0, 10.0, 4.0, 5.0, 5.0, 2.0, 0.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-38.5625, -37.4091796875, -36.255859375, -35.1025390625, -33.94921875, -32.7958984375, -31.642578125, -30.4892578125, -29.3359375, -28.1826171875, -27.029296875, -25.8759765625, -24.72265625, -23.5693359375, -22.416015625, -21.2626953125, -20.109375, -18.9560546875, -17.802734375, -16.6494140625, -15.49609375, -14.3427734375, -13.189453125, -12.0361328125, -10.8828125, -9.7294921875, -8.576171875, -7.4228515625, -6.26953125, -5.1162109375, -3.962890625, -2.8095703125, -1.65625, -0.5029296875, 0.650390625, 1.8037109375, 2.95703125, 4.1103515625, 5.263671875, 6.4169921875, 7.5703125, 8.7236328125, 9.876953125, 11.0302734375, 12.18359375, 13.3369140625, 14.490234375, 15.6435546875, 16.796875, 17.9501953125, 19.103515625, 20.2568359375, 21.41015625, 22.5634765625, 23.716796875, 24.8701171875, 26.0234375, 27.1767578125, 28.330078125, 29.4833984375, 30.63671875, 31.7900390625, 32.943359375, 34.0966796875, 35.25]}, "gradients/decoder.transformer.h.17.ln_1.weight": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 4.0, 14.0, 33.0, 85.0, 191.0, 203.0, 225.0, 143.0, 62.0, 40.0, 11.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-19.109067916870117, -17.74233627319336, -16.37560272216797, -15.008871078491211, -13.642139434814453, -12.275406837463379, -10.908674240112305, -9.541942596435547, -8.175209999084473, -6.808477878570557, -5.441745758056641, -4.075013160705566, -2.7082810401916504, -1.3415489196777344, 0.025183677673339844, 1.3919153213500977, 2.758647918701172, 4.125380039215088, 5.492112159729004, 6.858844757080078, 8.225576400756836, 9.59230899810791, 10.959041595458984, 12.325773239135742, 13.692505836486816, 15.05923843383789, 16.42597007751465, 17.792701721191406, 19.159435272216797, 20.526166915893555, 21.892898559570312, 23.259632110595703, 24.626361846923828, 25.993093490600586, 27.359827041625977, 28.726558685302734, 30.093290328979492, 31.46002197265625, 32.82675552368164, 34.19348907470703, 35.560218811035156, 36.92695236206055, 38.29368209838867, 39.66041564941406, 41.02714920043945, 42.39387893676758, 43.76061248779297, 45.127342224121094, 46.49407958984375, 47.86081314086914, 49.227542877197266, 50.594276428222656, 51.96100997924805, 53.32773971557617, 54.69447326660156, 56.06120300292969, 57.42793655395508, 58.79467010498047, 60.161399841308594, 61.528133392333984, 62.894866943359375, 64.2615966796875, 65.62832641601562, 66.99506378173828, 68.3617935180664]}, "gradients/decoder.transformer.h.17.ln_1.bias": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 3.0, 2.0, 2.0, 2.0, 0.0, 0.0, 1.0, 11.0, 3.0, 13.0, 18.0, 13.0, 21.0, 16.0, 24.0, 19.0, 15.0, 29.0, 24.0, 27.0, 35.0, 31.0, 37.0, 31.0, 41.0, 49.0, 37.0, 48.0, 36.0, 31.0, 32.0, 41.0, 36.0, 34.0, 30.0, 30.0, 28.0, 22.0, 16.0, 21.0, 16.0, 14.0, 19.0, 9.0, 10.0, 5.0, 7.0, 5.0, 7.0, 4.0, 3.0, 4.0, 2.0, 1.0, 1.0, 2.0, 0.0, 1.0], "bins": [-33.74267578125, -32.720741271972656, -31.698808670043945, -30.676876068115234, -29.65494155883789, -28.63300895690918, -27.61107635498047, -26.589141845703125, -25.56720733642578, -24.54527473449707, -23.523340225219727, -22.501407623291016, -21.479473114013672, -20.45754051208496, -19.43560791015625, -18.413673400878906, -17.391740798950195, -16.369808197021484, -15.34787368774414, -14.32594108581543, -13.304006576538086, -12.282073974609375, -11.260140419006348, -10.23820686340332, -9.216273307800293, -8.194339752197266, -7.172406196594238, -6.150473117828369, -5.128539562225342, -4.1066060066223145, -3.0846729278564453, -2.062739372253418, -1.0408058166503906, -0.018872380256652832, 1.003061056137085, 2.024994373321533, 3.0469279289245605, 4.068861484527588, 5.090794563293457, 6.112728118896484, 7.134661674499512, 8.156595230102539, 9.178528785705566, 10.200462341308594, 11.222394943237305, 12.244329452514648, 13.26626205444336, 14.288195610046387, 15.310129165649414, 16.332061767578125, 17.35399627685547, 18.37592887878418, 19.397863388061523, 20.419795989990234, 21.441730499267578, 22.46366310119629, 23.485595703125, 24.50752830505371, 25.529462814331055, 26.551395416259766, 27.57332992553711, 28.59526252746582, 29.61719512939453, 30.639129638671875, 31.66106414794922]}, "gradients/decoder.transformer.h.16.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0, 5.0, 3.0, 3.0, 7.0, 8.0, 13.0, 9.0, 11.0, 8.0, 22.0, 22.0, 25.0, 26.0, 29.0, 34.0, 41.0, 29.0, 46.0, 41.0, 40.0, 47.0, 45.0, 53.0, 41.0, 48.0, 37.0, 41.0, 35.0, 36.0, 39.0, 35.0, 24.0, 19.0, 18.0, 12.0, 23.0, 8.0, 10.0, 6.0, 1.0, 4.0, 6.0, 3.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-4.24609375, -4.112060546875, -3.97802734375, -3.843994140625, -3.7099609375, -3.575927734375, -3.44189453125, -3.307861328125, -3.173828125, -3.039794921875, -2.90576171875, -2.771728515625, -2.6376953125, -2.503662109375, -2.36962890625, -2.235595703125, -2.1015625, -1.967529296875, -1.83349609375, -1.699462890625, -1.5654296875, -1.431396484375, -1.29736328125, -1.163330078125, -1.029296875, -0.895263671875, -0.76123046875, -0.627197265625, -0.4931640625, -0.359130859375, -0.22509765625, -0.091064453125, 0.04296875, 0.177001953125, 0.31103515625, 0.445068359375, 0.5791015625, 0.713134765625, 0.84716796875, 0.981201171875, 1.115234375, 1.249267578125, 1.38330078125, 1.517333984375, 1.6513671875, 1.785400390625, 1.91943359375, 2.053466796875, 2.1875, 2.321533203125, 2.45556640625, 2.589599609375, 2.7236328125, 2.857666015625, 2.99169921875, 3.125732421875, 3.259765625, 3.393798828125, 3.52783203125, 3.661865234375, 3.7958984375, 3.929931640625, 4.06396484375, 4.197998046875, 4.33203125]}, "gradients/decoder.transformer.h.16.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 3.0, 6.0, 5.0, 5.0, 13.0, 9.0, 14.0, 14.0, 16.0, 27.0, 25.0, 42.0, 35.0, 59.0, 86.0, 108.0, 224.0, 636.0, 2439.0, 12239.0, 163594.0, 2868391.0, 1092932.0, 45819.0, 5412.0, 1236.0, 370.0, 152.0, 98.0, 48.0, 35.0, 49.0, 34.0, 20.0, 21.0, 23.0, 17.0, 13.0, 9.0, 3.0, 8.0, 1.0, 2.0, 2.0, 1.0, 1.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-15.5703125, -15.0953369140625, -14.620361328125, -14.1453857421875, -13.67041015625, -13.1954345703125, -12.720458984375, -12.2454833984375, -11.7705078125, -11.2955322265625, -10.820556640625, -10.3455810546875, -9.87060546875, -9.3956298828125, -8.920654296875, -8.4456787109375, -7.970703125, -7.4957275390625, -7.020751953125, -6.5457763671875, -6.07080078125, -5.5958251953125, -5.120849609375, -4.6458740234375, -4.1708984375, -3.6959228515625, -3.220947265625, -2.7459716796875, -2.27099609375, -1.7960205078125, -1.321044921875, -0.8460693359375, -0.37109375, 0.1038818359375, 0.578857421875, 1.0538330078125, 1.52880859375, 2.0037841796875, 2.478759765625, 2.9537353515625, 3.4287109375, 3.9036865234375, 4.378662109375, 4.8536376953125, 5.32861328125, 5.8035888671875, 6.278564453125, 6.7535400390625, 7.228515625, 7.7034912109375, 8.178466796875, 8.6534423828125, 9.12841796875, 9.6033935546875, 10.078369140625, 10.5533447265625, 11.0283203125, 11.5032958984375, 11.978271484375, 12.4532470703125, 12.92822265625, 13.4031982421875, 13.878173828125, 14.3531494140625, 14.828125]}, "gradients/decoder.transformer.h.16.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 3.0, 0.0, 4.0, 3.0, 7.0, 11.0, 20.0, 31.0, 28.0, 34.0, 67.0, 97.0, 145.0, 224.0, 352.0, 412.0, 526.0, 606.0, 447.0, 349.0, 225.0, 154.0, 113.0, 79.0, 50.0, 35.0, 23.0, 19.0, 5.0, 9.0, 5.0, 3.0, 0.0, 3.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-10.765625, -10.382080078125, -9.99853515625, -9.614990234375, -9.2314453125, -8.847900390625, -8.46435546875, -8.080810546875, -7.697265625, -7.313720703125, -6.93017578125, -6.546630859375, -6.1630859375, -5.779541015625, -5.39599609375, -5.012451171875, -4.62890625, -4.245361328125, -3.86181640625, -3.478271484375, -3.0947265625, -2.711181640625, -2.32763671875, -1.944091796875, -1.560546875, -1.177001953125, -0.79345703125, -0.409912109375, -0.0263671875, 0.357177734375, 0.74072265625, 1.124267578125, 1.5078125, 1.891357421875, 2.27490234375, 2.658447265625, 3.0419921875, 3.425537109375, 3.80908203125, 4.192626953125, 4.576171875, 4.959716796875, 5.34326171875, 5.726806640625, 6.1103515625, 6.493896484375, 6.87744140625, 7.260986328125, 7.64453125, 8.028076171875, 8.41162109375, 8.795166015625, 9.1787109375, 9.562255859375, 9.94580078125, 10.329345703125, 10.712890625, 11.096435546875, 11.47998046875, 11.863525390625, 12.2470703125, 12.630615234375, 13.01416015625, 13.397705078125, 13.78125]}, "gradients/decoder.transformer.h.16.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 2.0, 1.0, 2.0, 9.0, 11.0, 8.0, 26.0, 40.0, 48.0, 73.0, 127.0, 225.0, 442.0, 1767.0, 16100.0, 837509.0, 3268172.0, 64608.0, 3676.0, 762.0, 306.0, 128.0, 84.0, 52.0, 37.0, 27.0, 21.0, 12.0, 7.0, 7.0, 1.0, 2.0, 3.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-38.46875, -37.434814453125, -36.40087890625, -35.366943359375, -34.3330078125, -33.299072265625, -32.26513671875, -31.231201171875, -30.197265625, -29.163330078125, -28.12939453125, -27.095458984375, -26.0615234375, -25.027587890625, -23.99365234375, -22.959716796875, -21.92578125, -20.891845703125, -19.85791015625, -18.823974609375, -17.7900390625, -16.756103515625, -15.72216796875, -14.688232421875, -13.654296875, -12.620361328125, -11.58642578125, -10.552490234375, -9.5185546875, -8.484619140625, -7.45068359375, -6.416748046875, -5.3828125, -4.348876953125, -3.31494140625, -2.281005859375, -1.2470703125, -0.213134765625, 0.82080078125, 1.854736328125, 2.888671875, 3.922607421875, 4.95654296875, 5.990478515625, 7.0244140625, 8.058349609375, 9.09228515625, 10.126220703125, 11.16015625, 12.194091796875, 13.22802734375, 14.261962890625, 15.2958984375, 16.329833984375, 17.36376953125, 18.397705078125, 19.431640625, 20.465576171875, 21.49951171875, 22.533447265625, 23.5673828125, 24.601318359375, 25.63525390625, 26.669189453125, 27.703125]}, "gradients/decoder.transformer.h.16.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 3.0, 2.0, 1.0, 4.0, 2.0, 8.0, 6.0, 18.0, 28.0, 38.0, 64.0, 85.0, 93.0, 102.0, 97.0, 93.0, 99.0, 86.0, 63.0, 49.0, 29.0, 17.0, 15.0, 7.0, 4.0, 2.0, 3.0, 2.0], "bins": [-78.19986724853516, -76.66096496582031, -75.1220703125, -73.58316802978516, -72.04427337646484, -70.50537109375, -68.96647644042969, -67.42757415771484, -65.88867950439453, -64.34977722167969, -62.810882568359375, -61.2719841003418, -59.73308563232422, -58.19418716430664, -56.65528869628906, -55.11638641357422, -53.57748794555664, -52.03858947753906, -50.499691009521484, -48.960792541503906, -47.42189407348633, -45.88299560546875, -44.344093322753906, -42.805198669433594, -41.26629638671875, -39.72739791870117, -38.188499450683594, -36.649600982666016, -35.11070251464844, -33.57180404663086, -32.03290557861328, -30.49400520324707, -28.955106735229492, -27.416208267211914, -25.877309799194336, -24.338409423828125, -22.799510955810547, -21.26061248779297, -19.72171401977539, -18.182815551757812, -16.643917083740234, -15.105018615722656, -13.566120147705078, -12.027220726013184, -10.488322257995605, -8.949423789978027, -7.410524368286133, -5.871625900268555, -4.332727432250977, -2.7938287258148193, -1.254930019378662, 0.2839689254760742, 1.8228673934936523, 3.3617658615112305, 4.900665283203125, 6.439563751220703, 7.978462219238281, 9.51736068725586, 11.056259155273438, 12.595158576965332, 14.13405704498291, 15.672955513000488, 17.211854934692383, 18.75075340270996, 20.28965187072754]}, "gradients/decoder.transformer.h.16.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 3.0, 2.0, 3.0, 2.0, 4.0, 6.0, 4.0, 5.0, 11.0, 10.0, 14.0, 18.0, 15.0, 19.0, 27.0, 34.0, 20.0, 35.0, 47.0, 26.0, 52.0, 62.0, 56.0, 50.0, 46.0, 36.0, 42.0, 36.0, 39.0, 49.0, 40.0, 29.0, 29.0, 28.0, 13.0, 19.0, 16.0, 20.0, 11.0, 9.0, 8.0, 10.0, 1.0, 2.0, 4.0, 4.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-32.34918212890625, -31.29689598083496, -30.244611740112305, -29.192325592041016, -28.14004135131836, -27.08775520324707, -26.03546905517578, -24.983184814453125, -23.93090057373047, -22.87861442565918, -21.826330184936523, -20.774044036865234, -19.721759796142578, -18.66947364807129, -17.6171875, -16.564903259277344, -15.512617111206055, -14.460331916809082, -13.40804672241211, -12.35576057434082, -11.303476333618164, -10.251190185546875, -9.198904991149902, -8.14661979675293, -7.094334602355957, -6.042049407958984, -4.989764213562012, -3.937478542327881, -2.885193347930908, -1.8329081535339355, -0.7806224822998047, 0.27166271209716797, 1.3239479064941406, 2.3762331008911133, 3.428518533706665, 4.480803966522217, 5.5330891609191895, 6.585374355316162, 7.637660026550293, 8.689945220947266, 9.742230415344238, 10.794515609741211, 11.846800804138184, 12.899085998535156, 13.951372146606445, 15.003656387329102, 16.05594253540039, 17.108226776123047, 18.160512924194336, 19.212799072265625, 20.26508331298828, 21.31736946105957, 22.369653701782227, 23.421939849853516, 24.474224090576172, 25.52651023864746, 26.57879638671875, 27.63108253479004, 28.683366775512695, 29.735652923583984, 30.78793716430664, 31.84022331237793, 32.89250946044922, 33.944793701171875, 34.99707794189453]}, "gradients/decoder.transformer.h.16.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 2.0, 0.0, 4.0, 1.0, 2.0, 5.0, 4.0, 12.0, 10.0, 12.0, 11.0, 22.0, 22.0, 17.0, 19.0, 30.0, 41.0, 26.0, 49.0, 40.0, 35.0, 42.0, 57.0, 48.0, 38.0, 49.0, 35.0, 48.0, 45.0, 37.0, 41.0, 37.0, 28.0, 25.0, 25.0, 17.0, 21.0, 17.0, 5.0, 6.0, 9.0, 3.0, 7.0, 3.0, 4.0, 2.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-4.25, -4.11309814453125, -3.9761962890625, -3.83929443359375, -3.702392578125, -3.56549072265625, -3.4285888671875, -3.29168701171875, -3.15478515625, -3.01788330078125, -2.8809814453125, -2.74407958984375, -2.607177734375, -2.47027587890625, -2.3333740234375, -2.19647216796875, -2.0595703125, -1.92266845703125, -1.7857666015625, -1.64886474609375, -1.511962890625, -1.37506103515625, -1.2381591796875, -1.10125732421875, -0.96435546875, -0.82745361328125, -0.6905517578125, -0.55364990234375, -0.416748046875, -0.27984619140625, -0.1429443359375, -0.00604248046875, 0.130859375, 0.26776123046875, 0.4046630859375, 0.54156494140625, 0.678466796875, 0.81536865234375, 0.9522705078125, 1.08917236328125, 1.22607421875, 1.36297607421875, 1.4998779296875, 1.63677978515625, 1.773681640625, 1.91058349609375, 2.0474853515625, 2.18438720703125, 2.3212890625, 2.45819091796875, 2.5950927734375, 2.73199462890625, 2.868896484375, 3.00579833984375, 3.1427001953125, 3.27960205078125, 3.41650390625, 3.55340576171875, 3.6903076171875, 3.82720947265625, 3.964111328125, 4.10101318359375, 4.2379150390625, 4.37481689453125, 4.51171875]}, "gradients/decoder.transformer.h.16.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 4.0, 4.0, 8.0, 9.0, 10.0, 16.0, 29.0, 50.0, 61.0, 115.0, 161.0, 211.0, 345.0, 509.0, 843.0, 1288.0, 1755.0, 2768.0, 4110.0, 6179.0, 9962.0, 15331.0, 23556.0, 36199.0, 54550.0, 80436.0, 111413.0, 140670.0, 148442.0, 125480.0, 93276.0, 65763.0, 43322.0, 28741.0, 18458.0, 11789.0, 7862.0, 5000.0, 3323.0, 2158.0, 1485.0, 951.0, 656.0, 406.0, 298.0, 173.0, 150.0, 68.0, 65.0, 39.0, 20.0, 20.0, 10.0, 8.0, 6.0, 4.0, 2.0, 1.0, 3.0, 0.0, 2.0, 0.0, 2.0], "bins": [-0.489501953125, -0.4727516174316406, -0.45600128173828125, -0.4392509460449219, -0.4225006103515625, -0.4057502746582031, -0.38899993896484375, -0.3722496032714844, -0.355499267578125, -0.3387489318847656, -0.32199859619140625, -0.3052482604980469, -0.2884979248046875, -0.2717475891113281, -0.25499725341796875, -0.23824691772460938, -0.22149658203125, -0.20474624633789062, -0.18799591064453125, -0.17124557495117188, -0.1544952392578125, -0.13774490356445312, -0.12099456787109375, -0.10424423217773438, -0.087493896484375, -0.07074356079101562, -0.05399322509765625, -0.037242889404296875, -0.0204925537109375, -0.003742218017578125, 0.01300811767578125, 0.029758453369140625, 0.0465087890625, 0.06325912475585938, 0.08000946044921875, 0.09675979614257812, 0.1135101318359375, 0.13026046752929688, 0.14701080322265625, 0.16376113891601562, 0.180511474609375, 0.19726181030273438, 0.21401214599609375, 0.23076248168945312, 0.2475128173828125, 0.2642631530761719, 0.28101348876953125, 0.2977638244628906, 0.31451416015625, 0.3312644958496094, 0.34801483154296875, 0.3647651672363281, 0.3815155029296875, 0.3982658386230469, 0.41501617431640625, 0.4317665100097656, 0.448516845703125, 0.4652671813964844, 0.48201751708984375, 0.4987678527832031, 0.5155181884765625, 0.5322685241699219, 0.5490188598632812, 0.5657691955566406, 0.58251953125]}, "gradients/decoder.transformer.h.16.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 4.0, 2.0, 1.0, 10.0, 3.0, 11.0, 3.0, 11.0, 7.0, 21.0, 18.0, 19.0, 17.0, 32.0, 27.0, 52.0, 37.0, 35.0, 40.0, 36.0, 52.0, 48.0, 39.0, 1068.0, 56.0, 34.0, 41.0, 30.0, 38.0, 34.0, 34.0, 29.0, 23.0, 20.0, 21.0, 21.0, 17.0, 12.0, 9.0, 6.0, 7.0, 7.0, 3.0, 2.0, 2.0, 0.0, 0.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.005859375, -2.91229248046875, -2.8187255859375, -2.72515869140625, -2.631591796875, -2.53802490234375, -2.4444580078125, -2.35089111328125, -2.25732421875, -2.16375732421875, -2.0701904296875, -1.97662353515625, -1.883056640625, -1.78948974609375, -1.6959228515625, -1.60235595703125, -1.5087890625, -1.41522216796875, -1.3216552734375, -1.22808837890625, -1.134521484375, -1.04095458984375, -0.9473876953125, -0.85382080078125, -0.76025390625, -0.66668701171875, -0.5731201171875, -0.47955322265625, -0.385986328125, -0.29241943359375, -0.1988525390625, -0.10528564453125, -0.01171875, 0.08184814453125, 0.1754150390625, 0.26898193359375, 0.362548828125, 0.45611572265625, 0.5496826171875, 0.64324951171875, 0.73681640625, 0.83038330078125, 0.9239501953125, 1.01751708984375, 1.111083984375, 1.20465087890625, 1.2982177734375, 1.39178466796875, 1.4853515625, 1.57891845703125, 1.6724853515625, 1.76605224609375, 1.859619140625, 1.95318603515625, 2.0467529296875, 2.14031982421875, 2.23388671875, 2.32745361328125, 2.4210205078125, 2.51458740234375, 2.608154296875, 2.70172119140625, 2.7952880859375, 2.88885498046875, 2.982421875]}, "gradients/decoder.transformer.h.16.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 4.0, 2.0, 2.0, 8.0, 5.0, 5.0, 6.0, 12.0, 47.0, 51.0, 63.0, 114.0, 170.0, 256.0, 440.0, 692.0, 1051.0, 1660.0, 2648.0, 4122.0, 6504.0, 10795.0, 17387.0, 28413.0, 46870.0, 76339.0, 117885.0, 180077.0, 1204963.0, 143420.0, 96249.0, 60739.0, 37095.0, 22417.0, 13735.0, 8495.0, 5300.0, 3305.0, 2039.0, 1329.0, 856.0, 557.0, 344.0, 243.0, 130.0, 85.0, 76.0, 48.0, 33.0, 18.0, 18.0, 10.0, 6.0, 7.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.373046875, -0.3614845275878906, -0.34992218017578125, -0.3383598327636719, -0.3267974853515625, -0.3152351379394531, -0.30367279052734375, -0.2921104431152344, -0.280548095703125, -0.2689857482910156, -0.25742340087890625, -0.24586105346679688, -0.2342987060546875, -0.22273635864257812, -0.21117401123046875, -0.19961166381835938, -0.18804931640625, -0.17648696899414062, -0.16492462158203125, -0.15336227416992188, -0.1417999267578125, -0.13023757934570312, -0.11867523193359375, -0.10711288452148438, -0.095550537109375, -0.08398818969726562, -0.07242584228515625, -0.060863494873046875, -0.0493011474609375, -0.037738800048828125, -0.02617645263671875, -0.014614105224609375, -0.0030517578125, 0.008510589599609375, 0.02007293701171875, 0.031635284423828125, 0.0431976318359375, 0.054759979248046875, 0.06632232666015625, 0.07788467407226562, 0.089447021484375, 0.10100936889648438, 0.11257171630859375, 0.12413406372070312, 0.1356964111328125, 0.14725875854492188, 0.15882110595703125, 0.17038345336914062, 0.18194580078125, 0.19350814819335938, 0.20507049560546875, 0.21663284301757812, 0.2281951904296875, 0.23975753784179688, 0.25131988525390625, 0.2628822326660156, 0.274444580078125, 0.2860069274902344, 0.29756927490234375, 0.3091316223144531, 0.3206939697265625, 0.3322563171386719, 0.34381866455078125, 0.3553810119628906, 0.366943359375]}, "gradients/decoder.transformer.h.16.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0, 3.0, 5.0, 6.0, 3.0, 4.0, 7.0, 9.0, 8.0, 10.0, 16.0, 27.0, 35.0, 54.0, 65.0, 93.0, 106.0, 154.0, 106.0, 77.0, 59.0, 51.0, 35.0, 20.0, 17.0, 6.0, 8.0, 11.0, 2.0, 2.0, 3.0, 2.0, 2.0, 2.0, 3.0, 0.0, 2.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0222930908203125, -0.021509170532226562, -0.020725250244140625, -0.019941329956054688, -0.01915740966796875, -0.018373489379882812, -0.017589569091796875, -0.016805648803710938, -0.016021728515625, -0.015237808227539062, -0.014453887939453125, -0.013669967651367188, -0.01288604736328125, -0.012102127075195312, -0.011318206787109375, -0.010534286499023438, -0.0097503662109375, -0.008966445922851562, -0.008182525634765625, -0.0073986053466796875, -0.00661468505859375, -0.0058307647705078125, -0.005046844482421875, -0.0042629241943359375, -0.00347900390625, -0.0026950836181640625, -0.001911163330078125, -0.0011272430419921875, -0.00034332275390625, 0.0004405975341796875, 0.001224517822265625, 0.0020084381103515625, 0.0027923583984375, 0.0035762786865234375, 0.004360198974609375, 0.0051441192626953125, 0.00592803955078125, 0.0067119598388671875, 0.007495880126953125, 0.008279800415039062, 0.009063720703125, 0.009847640991210938, 0.010631561279296875, 0.011415481567382812, 0.01219940185546875, 0.012983322143554688, 0.013767242431640625, 0.014551162719726562, 0.0153350830078125, 0.016119003295898438, 0.016902923583984375, 0.017686843872070312, 0.01847076416015625, 0.019254684448242188, 0.020038604736328125, 0.020822525024414062, 0.0216064453125, 0.022390365600585938, 0.023174285888671875, 0.023958206176757812, 0.02474212646484375, 0.025526046752929688, 0.026309967041015625, 0.027093887329101562, 0.0278778076171875]}, "gradients/decoder.transformer.h.16.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 3.0, 2.0, 3.0, 2.0, 2.0, 4.0, 2.0, 2.0, 9.0, 16.0, 16.0, 16.0, 42.0, 51.0, 112.0, 162.0, 312.0, 1072.0, 105326.0, 938804.0, 1756.0, 374.0, 173.0, 96.0, 66.0, 41.0, 32.0, 17.0, 14.0, 7.0, 10.0, 6.0, 2.0, 7.0, 6.0, 2.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.46875, -0.4553985595703125, -0.442047119140625, -0.4286956787109375, -0.41534423828125, -0.4019927978515625, -0.388641357421875, -0.3752899169921875, -0.3619384765625, -0.3485870361328125, -0.335235595703125, -0.3218841552734375, -0.30853271484375, -0.2951812744140625, -0.281829833984375, -0.2684783935546875, -0.255126953125, -0.2417755126953125, -0.228424072265625, -0.2150726318359375, -0.20172119140625, -0.1883697509765625, -0.175018310546875, -0.1616668701171875, -0.1483154296875, -0.1349639892578125, -0.121612548828125, -0.1082611083984375, -0.09490966796875, -0.0815582275390625, -0.068206787109375, -0.0548553466796875, -0.04150390625, -0.0281524658203125, -0.014801025390625, -0.0014495849609375, 0.01190185546875, 0.0252532958984375, 0.038604736328125, 0.0519561767578125, 0.0653076171875, 0.0786590576171875, 0.092010498046875, 0.1053619384765625, 0.11871337890625, 0.1320648193359375, 0.145416259765625, 0.1587677001953125, 0.172119140625, 0.1854705810546875, 0.198822021484375, 0.2121734619140625, 0.22552490234375, 0.2388763427734375, 0.252227783203125, 0.2655792236328125, 0.2789306640625, 0.2922821044921875, 0.305633544921875, 0.3189849853515625, 0.33233642578125, 0.3456878662109375, 0.359039306640625, 0.3723907470703125, 0.3857421875]}, "gradients/decoder.transformer.h.16.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 0.0, 2.0, 6.0, 55.0, 772.0, 176.0, 6.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.16217412054538727, -0.15654630959033966, -0.15091849863529205, -0.14529068768024445, -0.13966286182403564, -0.13403505086898804, -0.12840723991394043, -0.12277942895889282, -0.11715161800384521, -0.11152380704879761, -0.10589599609375, -0.1002681776881218, -0.09464036673307419, -0.08901255577802658, -0.08338473737239838, -0.07775692641735077, -0.07212911546230316, -0.06650130450725555, -0.06087348982691765, -0.05524567514657974, -0.049617864191532135, -0.04399005323648453, -0.03836223855614662, -0.032734423875808716, -0.02710661292076111, -0.021478800103068352, -0.015850987285375595, -0.010223174467682838, -0.004595361649990082, 0.0010324511677026749, 0.0066602639853954315, 0.012288078665733337, 0.01791590452194214, 0.023543717339634895, 0.029171530157327652, 0.03479934483766556, 0.040427155792713165, 0.04605496674776077, 0.05168278142809868, 0.057310596108436584, 0.06293840706348419, 0.0685662180185318, 0.0741940289735794, 0.07982184737920761, 0.08544965833425522, 0.09107746928930283, 0.09670528769493103, 0.10233309864997864, 0.10796090960502625, 0.11358872056007385, 0.11921653151512146, 0.12484434992074966, 0.13047215342521667, 0.13609997928142548, 0.14172779023647308, 0.1473556011915207, 0.1529834121465683, 0.1586112231016159, 0.1642390340566635, 0.16986684501171112, 0.17549467086791992, 0.18112248182296753, 0.18675029277801514, 0.19237810373306274, 0.19800591468811035]}, "gradients/decoder.transformer.h.16.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 2.0, 4.0, 3.0, 3.0, 8.0, 9.0, 4.0, 12.0, 4.0, 13.0, 10.0, 19.0, 19.0, 18.0, 30.0, 30.0, 29.0, 34.0, 38.0, 24.0, 24.0, 37.0, 18.0, 39.0, 41.0, 29.0, 29.0, 31.0, 30.0, 35.0, 42.0, 44.0, 38.0, 31.0, 18.0, 21.0, 27.0, 28.0, 20.0, 22.0, 10.0, 11.0, 10.0, 6.0, 16.0, 13.0, 7.0, 7.0, 3.0, 7.0, 4.0, 4.0, 0.0, 1.0, 1.0, 1.0, 1.0], "bins": [-0.020583808422088623, -0.019961532205343246, -0.01933925412595272, -0.018716976046562195, -0.018094699829816818, -0.01747242361307144, -0.016850145533680916, -0.01622786745429039, -0.015605591237545013, -0.014983314089477062, -0.014361036941409111, -0.01373875979334116, -0.013116482645273209, -0.012494205497205257, -0.011871928349137306, -0.011249651201069355, -0.010627374053001404, -0.010005096904933453, -0.009382819756865501, -0.00876054260879755, -0.008138265460729599, -0.007515988312661648, -0.006893711164593697, -0.006271434016525745, -0.005649156868457794, -0.005026879720389843, -0.004404602572321892, -0.0037823254242539406, -0.0031600482761859894, -0.002537771128118038, -0.001915493980050087, -0.0012932168319821358, -0.0006709396839141846, -4.866253584623337e-05, 0.0005736146122217178, 0.001195891760289669, 0.0018181689083576202, 0.0024404460564255714, 0.0030627232044935226, 0.003685000352561474, 0.004307277500629425, 0.004929554648697376, 0.0055518317967653275, 0.006174108944833279, 0.00679638609290123, 0.007418663240969181, 0.008040940389037132, 0.008663217537105083, 0.009285494685173035, 0.009907771833240986, 0.010530048981308937, 0.011152326129376888, 0.01177460327744484, 0.01239688042551279, 0.013019157573580742, 0.013641434721648693, 0.014263711869716644, 0.014885989017784595, 0.015508266165852547, 0.016130544245243073, 0.01675282046198845, 0.017375096678733826, 0.01799737475812435, 0.018619652837514877, 0.019241929054260254]}, "gradients/decoder.transformer.h.16.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 2.0, 0.0, 4.0, 1.0, 2.0, 5.0, 4.0, 12.0, 10.0, 12.0, 11.0, 22.0, 22.0, 17.0, 19.0, 29.0, 41.0, 27.0, 47.0, 41.0, 36.0, 41.0, 57.0, 48.0, 39.0, 48.0, 35.0, 48.0, 45.0, 37.0, 42.0, 37.0, 27.0, 26.0, 24.0, 18.0, 20.0, 17.0, 6.0, 6.0, 9.0, 3.0, 6.0, 4.0, 4.0, 1.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-4.25, -4.1131591796875, -3.976318359375, -3.8394775390625, -3.70263671875, -3.5657958984375, -3.428955078125, -3.2921142578125, -3.1552734375, -3.0184326171875, -2.881591796875, -2.7447509765625, -2.60791015625, -2.4710693359375, -2.334228515625, -2.1973876953125, -2.060546875, -1.9237060546875, -1.786865234375, -1.6500244140625, -1.51318359375, -1.3763427734375, -1.239501953125, -1.1026611328125, -0.9658203125, -0.8289794921875, -0.692138671875, -0.5552978515625, -0.41845703125, -0.2816162109375, -0.144775390625, -0.0079345703125, 0.12890625, 0.2657470703125, 0.402587890625, 0.5394287109375, 0.67626953125, 0.8131103515625, 0.949951171875, 1.0867919921875, 1.2236328125, 1.3604736328125, 1.497314453125, 1.6341552734375, 1.77099609375, 1.9078369140625, 2.044677734375, 2.1815185546875, 2.318359375, 2.4552001953125, 2.592041015625, 2.7288818359375, 2.86572265625, 3.0025634765625, 3.139404296875, 3.2762451171875, 3.4130859375, 3.5499267578125, 3.686767578125, 3.8236083984375, 3.96044921875, 4.0972900390625, 4.234130859375, 4.3709716796875, 4.5078125]}, "gradients/decoder.transformer.h.16.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 3.0, 2.0, 3.0, 7.0, 8.0, 8.0, 9.0, 25.0, 27.0, 33.0, 81.0, 140.0, 206.0, 341.0, 688.0, 1252.0, 2202.0, 4221.0, 7552.0, 13529.0, 24202.0, 43590.0, 94214.0, 233912.0, 332663.0, 152248.0, 63574.0, 32219.0, 18458.0, 10451.0, 5817.0, 3148.0, 1668.0, 885.0, 481.0, 294.0, 146.0, 88.0, 69.0, 27.0, 31.0, 12.0, 10.0, 12.0, 6.0, 3.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0], "bins": [-3.228515625, -3.125244140625, -3.02197265625, -2.918701171875, -2.8154296875, -2.712158203125, -2.60888671875, -2.505615234375, -2.40234375, -2.299072265625, -2.19580078125, -2.092529296875, -1.9892578125, -1.885986328125, -1.78271484375, -1.679443359375, -1.576171875, -1.472900390625, -1.36962890625, -1.266357421875, -1.1630859375, -1.059814453125, -0.95654296875, -0.853271484375, -0.75, -0.646728515625, -0.54345703125, -0.440185546875, -0.3369140625, -0.233642578125, -0.13037109375, -0.027099609375, 0.076171875, 0.179443359375, 0.28271484375, 0.385986328125, 0.4892578125, 0.592529296875, 0.69580078125, 0.799072265625, 0.90234375, 1.005615234375, 1.10888671875, 1.212158203125, 1.3154296875, 1.418701171875, 1.52197265625, 1.625244140625, 1.728515625, 1.831787109375, 1.93505859375, 2.038330078125, 2.1416015625, 2.244873046875, 2.34814453125, 2.451416015625, 2.5546875, 2.657958984375, 2.76123046875, 2.864501953125, 2.9677734375, 3.071044921875, 3.17431640625, 3.277587890625, 3.380859375]}, "gradients/decoder.transformer.h.16.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 3.0, 2.0, 2.0, 3.0, 4.0, 7.0, 6.0, 7.0, 6.0, 9.0, 15.0, 18.0, 22.0, 21.0, 20.0, 16.0, 31.0, 27.0, 39.0, 34.0, 52.0, 59.0, 59.0, 118.0, 265.0, 1501.0, 200.0, 82.0, 83.0, 52.0, 33.0, 32.0, 34.0, 30.0, 31.0, 34.0, 21.0, 13.0, 15.0, 16.0, 9.0, 7.0, 10.0, 6.0, 4.0, 2.0, 1.0, 1.0, 1.0, 0.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-13.203125, -12.7373046875, -12.271484375, -11.8056640625, -11.33984375, -10.8740234375, -10.408203125, -9.9423828125, -9.4765625, -9.0107421875, -8.544921875, -8.0791015625, -7.61328125, -7.1474609375, -6.681640625, -6.2158203125, -5.75, -5.2841796875, -4.818359375, -4.3525390625, -3.88671875, -3.4208984375, -2.955078125, -2.4892578125, -2.0234375, -1.5576171875, -1.091796875, -0.6259765625, -0.16015625, 0.3056640625, 0.771484375, 1.2373046875, 1.703125, 2.1689453125, 2.634765625, 3.1005859375, 3.56640625, 4.0322265625, 4.498046875, 4.9638671875, 5.4296875, 5.8955078125, 6.361328125, 6.8271484375, 7.29296875, 7.7587890625, 8.224609375, 8.6904296875, 9.15625, 9.6220703125, 10.087890625, 10.5537109375, 11.01953125, 11.4853515625, 11.951171875, 12.4169921875, 12.8828125, 13.3486328125, 13.814453125, 14.2802734375, 14.74609375, 15.2119140625, 15.677734375, 16.1435546875, 16.609375]}, "gradients/decoder.transformer.h.16.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 2.0, 0.0, 1.0, 3.0, 7.0, 6.0, 4.0, 14.0, 30.0, 28.0, 57.0, 69.0, 113.0, 164.0, 283.0, 592.0, 2881.0, 2121228.0, 1016304.0, 2531.0, 585.0, 321.0, 155.0, 94.0, 78.0, 45.0, 40.0, 23.0, 21.0, 10.0, 6.0, 12.0, 5.0, 2.0, 4.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-43.71875, -42.15576171875, -40.5927734375, -39.02978515625, -37.466796875, -35.90380859375, -34.3408203125, -32.77783203125, -31.21484375, -29.65185546875, -28.0888671875, -26.52587890625, -24.962890625, -23.39990234375, -21.8369140625, -20.27392578125, -18.7109375, -17.14794921875, -15.5849609375, -14.02197265625, -12.458984375, -10.89599609375, -9.3330078125, -7.77001953125, -6.20703125, -4.64404296875, -3.0810546875, -1.51806640625, 0.044921875, 1.60791015625, 3.1708984375, 4.73388671875, 6.296875, 7.85986328125, 9.4228515625, 10.98583984375, 12.548828125, 14.11181640625, 15.6748046875, 17.23779296875, 18.80078125, 20.36376953125, 21.9267578125, 23.48974609375, 25.052734375, 26.61572265625, 28.1787109375, 29.74169921875, 31.3046875, 32.86767578125, 34.4306640625, 35.99365234375, 37.556640625, 39.11962890625, 40.6826171875, 42.24560546875, 43.80859375, 45.37158203125, 46.9345703125, 48.49755859375, 50.060546875, 51.62353515625, 53.1865234375, 54.74951171875, 56.3125]}, "gradients/decoder.transformer.h.16.ln_1.weight": {"_type": "histogram", "values": [427.0, 586.0, 7.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-8.176066398620605, -0.4833803176879883, 7.209305763244629, 14.901991844177246, 22.594676971435547, 30.287364959716797, 37.98004913330078, 45.672733306884766, 53.365421295166016, 61.05810546875, 68.75079345703125, 76.4434814453125, 84.13616943359375, 91.828857421875, 99.52153778076172, 107.21421813964844, 114.90691375732422, 122.59960174560547, 130.2922821044922, 137.98497009277344, 145.6776580810547, 153.37034606933594, 161.0630340576172, 168.75570678710938, 176.44839477539062, 184.14108276367188, 191.83377075195312, 199.52645874023438, 207.21914672851562, 214.91183471679688, 222.60450744628906, 230.2971954345703, 237.98989868164062, 245.68258666992188, 253.37527465820312, 261.0679626464844, 268.7606506347656, 276.4533386230469, 284.1460266113281, 291.83868408203125, 299.5313720703125, 307.22406005859375, 314.916748046875, 322.60943603515625, 330.3021240234375, 337.99481201171875, 345.6875, 353.38018798828125, 361.0728759765625, 368.76556396484375, 376.458251953125, 384.15093994140625, 391.8436279296875, 399.53631591796875, 407.22900390625, 414.92169189453125, 422.6143798828125, 430.30706787109375, 437.999755859375, 445.69244384765625, 453.3851318359375, 461.07781982421875, 468.7705078125, 476.46319580078125, 484.1558532714844]}, "gradients/decoder.transformer.h.16.ln_1.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 2.0, 0.0, 6.0, 2.0, 4.0, 7.0, 10.0, 12.0, 11.0, 12.0, 13.0, 16.0, 18.0, 25.0, 27.0, 33.0, 33.0, 26.0, 27.0, 37.0, 39.0, 32.0, 39.0, 42.0, 42.0, 39.0, 43.0, 21.0, 36.0, 28.0, 43.0, 37.0, 41.0, 23.0, 29.0, 26.0, 15.0, 24.0, 17.0, 13.0, 14.0, 11.0, 7.0, 8.0, 7.0, 5.0, 7.0, 3.0, 3.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-40.046756744384766, -38.795963287353516, -37.545169830322266, -36.294376373291016, -35.0435791015625, -33.79278564453125, -32.5419921875, -31.29119873046875, -30.0404052734375, -28.78961181640625, -27.538818359375, -26.288022994995117, -25.037229537963867, -23.786436080932617, -22.535640716552734, -21.284847259521484, -20.034053802490234, -18.783260345458984, -17.532466888427734, -16.28167152404785, -15.030878067016602, -13.780084609985352, -12.529290199279785, -11.278495788574219, -10.027702331542969, -8.776908874511719, -7.526114463806152, -6.275320529937744, -5.024526596069336, -3.7737326622009277, -2.5229387283325195, -1.2721443176269531, -0.02135467529296875, 1.2294392585754395, 2.4802331924438477, 3.731027126312256, 4.981821060180664, 6.232614994049072, 7.4834089279174805, 8.734203338623047, 9.984996795654297, 11.235790252685547, 12.486584663391113, 13.73737907409668, 14.98817253112793, 16.23896598815918, 17.489761352539062, 18.740554809570312, 19.991348266601562, 21.242141723632812, 22.492935180664062, 23.743730545043945, 24.994524002075195, 26.245317459106445, 27.496112823486328, 28.746906280517578, 29.997699737548828, 31.248493194580078, 32.49928665161133, 33.75008010864258, 35.000877380371094, 36.251670837402344, 37.502464294433594, 38.753257751464844, 40.004051208496094]}, "gradients/decoder.transformer.h.15.mlp.c_proj.bias": {"_type": "histogram", "values": [3.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 4.0, 4.0, 3.0, 5.0, 5.0, 5.0, 7.0, 10.0, 19.0, 21.0, 19.0, 20.0, 22.0, 27.0, 30.0, 36.0, 39.0, 39.0, 37.0, 42.0, 47.0, 52.0, 39.0, 46.0, 37.0, 45.0, 47.0, 40.0, 29.0, 37.0, 31.0, 28.0, 21.0, 26.0, 23.0, 17.0, 9.0, 8.0, 8.0, 6.0, 3.0, 5.0, 3.0, 5.0, 2.0, 4.0, 3.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-4.09375, -3.95501708984375, -3.8162841796875, -3.67755126953125, -3.538818359375, -3.40008544921875, -3.2613525390625, -3.12261962890625, -2.98388671875, -2.84515380859375, -2.7064208984375, -2.56768798828125, -2.428955078125, -2.29022216796875, -2.1514892578125, -2.01275634765625, -1.8740234375, -1.73529052734375, -1.5965576171875, -1.45782470703125, -1.319091796875, -1.18035888671875, -1.0416259765625, -0.90289306640625, -0.76416015625, -0.62542724609375, -0.4866943359375, -0.34796142578125, -0.209228515625, -0.07049560546875, 0.0682373046875, 0.20697021484375, 0.345703125, 0.48443603515625, 0.6231689453125, 0.76190185546875, 0.900634765625, 1.03936767578125, 1.1781005859375, 1.31683349609375, 1.45556640625, 1.59429931640625, 1.7330322265625, 1.87176513671875, 2.010498046875, 2.14923095703125, 2.2879638671875, 2.42669677734375, 2.5654296875, 2.70416259765625, 2.8428955078125, 2.98162841796875, 3.120361328125, 3.25909423828125, 3.3978271484375, 3.53656005859375, 3.67529296875, 3.81402587890625, 3.9527587890625, 4.09149169921875, 4.230224609375, 4.36895751953125, 4.5076904296875, 4.64642333984375, 4.78515625]}, "gradients/decoder.transformer.h.15.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 4.0, 4.0, 6.0, 9.0, 11.0, 18.0, 26.0, 34.0, 52.0, 78.0, 96.0, 161.0, 320.0, 499.0, 827.0, 1541.0, 2939.0, 6104.0, 15212.0, 43363.0, 142072.0, 483690.0, 1284316.0, 1394234.0, 567888.0, 168594.0, 50981.0, 17481.0, 6871.0, 3120.0, 1547.0, 855.0, 504.0, 307.0, 171.0, 118.0, 77.0, 51.0, 31.0, 20.0, 17.0, 16.0, 13.0, 6.0, 3.0, 2.0, 6.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.078125, -5.89971923828125, -5.7213134765625, -5.54290771484375, -5.364501953125, -5.18609619140625, -5.0076904296875, -4.82928466796875, -4.65087890625, -4.47247314453125, -4.2940673828125, -4.11566162109375, -3.937255859375, -3.75885009765625, -3.5804443359375, -3.40203857421875, -3.2236328125, -3.04522705078125, -2.8668212890625, -2.68841552734375, -2.510009765625, -2.33160400390625, -2.1531982421875, -1.97479248046875, -1.79638671875, -1.61798095703125, -1.4395751953125, -1.26116943359375, -1.082763671875, -0.90435791015625, -0.7259521484375, -0.54754638671875, -0.369140625, -0.19073486328125, -0.0123291015625, 0.16607666015625, 0.344482421875, 0.52288818359375, 0.7012939453125, 0.87969970703125, 1.05810546875, 1.23651123046875, 1.4149169921875, 1.59332275390625, 1.771728515625, 1.95013427734375, 2.1285400390625, 2.30694580078125, 2.4853515625, 2.66375732421875, 2.8421630859375, 3.02056884765625, 3.198974609375, 3.37738037109375, 3.5557861328125, 3.73419189453125, 3.91259765625, 4.09100341796875, 4.2694091796875, 4.44781494140625, 4.626220703125, 4.80462646484375, 4.9830322265625, 5.16143798828125, 5.33984375]}, "gradients/decoder.transformer.h.15.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 4.0, 4.0, 5.0, 1.0, 3.0, 8.0, 8.0, 15.0, 18.0, 19.0, 52.0, 75.0, 95.0, 170.0, 262.0, 359.0, 406.0, 563.0, 557.0, 458.0, 337.0, 222.0, 142.0, 105.0, 47.0, 44.0, 33.0, 21.0, 21.0, 12.0, 7.0, 7.0, 5.0, 3.0, 1.0, 2.0, 1.0, 0.0, 1.0], "bins": [-16.90625, -16.511962890625, -16.11767578125, -15.723388671875, -15.3291015625, -14.934814453125, -14.54052734375, -14.146240234375, -13.751953125, -13.357666015625, -12.96337890625, -12.569091796875, -12.1748046875, -11.780517578125, -11.38623046875, -10.991943359375, -10.59765625, -10.203369140625, -9.80908203125, -9.414794921875, -9.0205078125, -8.626220703125, -8.23193359375, -7.837646484375, -7.443359375, -7.049072265625, -6.65478515625, -6.260498046875, -5.8662109375, -5.471923828125, -5.07763671875, -4.683349609375, -4.2890625, -3.894775390625, -3.50048828125, -3.106201171875, -2.7119140625, -2.317626953125, -1.92333984375, -1.529052734375, -1.134765625, -0.740478515625, -0.34619140625, 0.048095703125, 0.4423828125, 0.836669921875, 1.23095703125, 1.625244140625, 2.01953125, 2.413818359375, 2.80810546875, 3.202392578125, 3.5966796875, 3.990966796875, 4.38525390625, 4.779541015625, 5.173828125, 5.568115234375, 5.96240234375, 6.356689453125, 6.7509765625, 7.145263671875, 7.53955078125, 7.933837890625, 8.328125]}, "gradients/decoder.transformer.h.15.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 3.0, 4.0, 2.0, 4.0, 9.0, 12.0, 22.0, 23.0, 39.0, 56.0, 99.0, 191.0, 392.0, 841.0, 2540.0, 13156.0, 170703.0, 2795884.0, 1146506.0, 54767.0, 6215.0, 1588.0, 600.0, 284.0, 144.0, 82.0, 48.0, 24.0, 21.0, 13.0, 8.0, 5.0, 4.0, 4.0, 5.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-14.6875, -13.9814453125, -13.275390625, -12.5693359375, -11.86328125, -11.1572265625, -10.451171875, -9.7451171875, -9.0390625, -8.3330078125, -7.626953125, -6.9208984375, -6.21484375, -5.5087890625, -4.802734375, -4.0966796875, -3.390625, -2.6845703125, -1.978515625, -1.2724609375, -0.56640625, 0.1396484375, 0.845703125, 1.5517578125, 2.2578125, 2.9638671875, 3.669921875, 4.3759765625, 5.08203125, 5.7880859375, 6.494140625, 7.2001953125, 7.90625, 8.6123046875, 9.318359375, 10.0244140625, 10.73046875, 11.4365234375, 12.142578125, 12.8486328125, 13.5546875, 14.2607421875, 14.966796875, 15.6728515625, 16.37890625, 17.0849609375, 17.791015625, 18.4970703125, 19.203125, 19.9091796875, 20.615234375, 21.3212890625, 22.02734375, 22.7333984375, 23.439453125, 24.1455078125, 24.8515625, 25.5576171875, 26.263671875, 26.9697265625, 27.67578125, 28.3818359375, 29.087890625, 29.7939453125, 30.5]}, "gradients/decoder.transformer.h.15.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 9.0, 20.0, 54.0, 90.0, 202.0, 204.0, 210.0, 129.0, 58.0, 28.0, 6.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-154.40191650390625, -151.1575164794922, -147.91311645507812, -144.66871643066406, -141.42431640625, -138.17991638183594, -134.93551635742188, -131.6911163330078, -128.44671630859375, -125.20231628417969, -121.95791625976562, -118.71351623535156, -115.4691162109375, -112.22471618652344, -108.98031616210938, -105.73591613769531, -102.49152374267578, -99.24712371826172, -96.00272369384766, -92.7583236694336, -89.51392364501953, -86.26952362060547, -83.02513122558594, -79.78073120117188, -76.53633117675781, -73.29193115234375, -70.04753112792969, -66.80313110351562, -63.55873107910156, -60.3143310546875, -57.0699348449707, -53.82553482055664, -50.58113479614258, -47.336734771728516, -44.09233474731445, -40.847938537597656, -37.603538513183594, -34.35913848876953, -31.11473846435547, -27.870338439941406, -24.625938415527344, -21.38153839111328, -18.13713836669922, -14.892740249633789, -11.648340225219727, -8.403940200805664, -5.159542083740234, -1.9151420593261719, 1.3292579650878906, 4.573657512664795, 7.818057060241699, 11.062456130981445, 14.306856155395508, 17.55125617980957, 20.795654296875, 24.040054321289062, 27.284454345703125, 30.528854370117188, 33.77325439453125, 37.01765441894531, 40.262054443359375, 43.50645446777344, 46.750850677490234, 49.9952507019043, 53.23965072631836]}, "gradients/decoder.transformer.h.15.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 3.0, 2.0, 6.0, 5.0, 5.0, 8.0, 9.0, 3.0, 10.0, 14.0, 19.0, 8.0, 16.0, 22.0, 21.0, 36.0, 35.0, 38.0, 30.0, 36.0, 34.0, 31.0, 48.0, 41.0, 35.0, 54.0, 43.0, 36.0, 33.0, 41.0, 43.0, 34.0, 35.0, 28.0, 17.0, 36.0, 14.0, 17.0, 22.0, 10.0, 10.0, 6.0, 4.0, 8.0, 2.0, 1.0, 2.0, 3.0, 3.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-31.089622497558594, -30.034391403198242, -28.97916030883789, -27.923927307128906, -26.868696212768555, -25.813465118408203, -24.75823402404785, -23.7030029296875, -22.647769927978516, -21.592538833618164, -20.537307739257812, -19.482074737548828, -18.426843643188477, -17.371612548828125, -16.316381454467773, -15.261150360107422, -14.20591926574707, -13.150688171386719, -12.09545612335205, -11.0402250289917, -9.984992980957031, -8.92976188659668, -7.874530792236328, -6.819299221038818, -5.764067649841309, -4.708836078643799, -3.653604745864868, -2.5983734130859375, -1.5431418418884277, -0.48791027069091797, 0.5673208236694336, 1.6225523948669434, 2.677783966064453, 3.733015537261963, 4.788247108459473, 5.843478202819824, 6.898709774017334, 7.953941345214844, 9.009172439575195, 10.064403533935547, 11.119635581970215, 12.174866676330566, 13.230098724365234, 14.285329818725586, 15.340560913085938, 16.395793914794922, 17.45102310180664, 18.506256103515625, 19.561487197875977, 20.616718292236328, 21.67194938659668, 22.72718048095703, 23.782413482666016, 24.837644577026367, 25.89287567138672, 26.94810676574707, 28.003337860107422, 29.058568954467773, 30.113800048828125, 31.16903305053711, 32.22426223754883, 33.27949523925781, 34.33472442626953, 35.389957427978516, 36.4451904296875]}, "gradients/decoder.transformer.h.15.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 3.0, 2.0, 1.0, 2.0, 2.0, 7.0, 6.0, 4.0, 4.0, 4.0, 12.0, 5.0, 14.0, 17.0, 18.0, 27.0, 20.0, 29.0, 32.0, 29.0, 31.0, 40.0, 39.0, 45.0, 48.0, 34.0, 38.0, 55.0, 43.0, 45.0, 39.0, 34.0, 39.0, 32.0, 34.0, 34.0, 26.0, 17.0, 19.0, 11.0, 12.0, 19.0, 9.0, 6.0, 2.0, 8.0, 5.0, 3.0, 2.0, 4.0, 3.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0], "bins": [-3.908203125, -3.772247314453125, -3.63629150390625, -3.500335693359375, -3.3643798828125, -3.228424072265625, -3.09246826171875, -2.956512451171875, -2.820556640625, -2.684600830078125, -2.54864501953125, -2.412689208984375, -2.2767333984375, -2.140777587890625, -2.00482177734375, -1.868865966796875, -1.73291015625, -1.596954345703125, -1.46099853515625, -1.325042724609375, -1.1890869140625, -1.053131103515625, -0.91717529296875, -0.781219482421875, -0.645263671875, -0.509307861328125, -0.37335205078125, -0.237396240234375, -0.1014404296875, 0.034515380859375, 0.17047119140625, 0.306427001953125, 0.4423828125, 0.578338623046875, 0.71429443359375, 0.850250244140625, 0.9862060546875, 1.122161865234375, 1.25811767578125, 1.394073486328125, 1.530029296875, 1.665985107421875, 1.80194091796875, 1.937896728515625, 2.0738525390625, 2.209808349609375, 2.34576416015625, 2.481719970703125, 2.61767578125, 2.753631591796875, 2.88958740234375, 3.025543212890625, 3.1614990234375, 3.297454833984375, 3.43341064453125, 3.569366455078125, 3.705322265625, 3.841278076171875, 3.97723388671875, 4.113189697265625, 4.2491455078125, 4.385101318359375, 4.52105712890625, 4.657012939453125, 4.79296875]}, "gradients/decoder.transformer.h.15.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 2.0, 0.0, 5.0, 9.0, 11.0, 6.0, 22.0, 30.0, 46.0, 91.0, 112.0, 210.0, 341.0, 548.0, 897.0, 1456.0, 2481.0, 4157.0, 6907.0, 12259.0, 21374.0, 37066.0, 62979.0, 102846.0, 153025.0, 186872.0, 165440.0, 116008.0, 71975.0, 42661.0, 24301.0, 14141.0, 7983.0, 4976.0, 2875.0, 1697.0, 1050.0, 674.0, 390.0, 245.0, 147.0, 109.0, 53.0, 41.0, 18.0, 10.0, 6.0, 9.0, 2.0, 2.0, 1.0, 3.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0], "bins": [-0.68896484375, -0.666473388671875, -0.64398193359375, -0.621490478515625, -0.5989990234375, -0.576507568359375, -0.55401611328125, -0.531524658203125, -0.509033203125, -0.486541748046875, -0.46405029296875, -0.441558837890625, -0.4190673828125, -0.396575927734375, -0.37408447265625, -0.351593017578125, -0.3291015625, -0.306610107421875, -0.28411865234375, -0.261627197265625, -0.2391357421875, -0.216644287109375, -0.19415283203125, -0.171661376953125, -0.149169921875, -0.126678466796875, -0.10418701171875, -0.081695556640625, -0.0592041015625, -0.036712646484375, -0.01422119140625, 0.008270263671875, 0.03076171875, 0.053253173828125, 0.07574462890625, 0.098236083984375, 0.1207275390625, 0.143218994140625, 0.16571044921875, 0.188201904296875, 0.210693359375, 0.233184814453125, 0.25567626953125, 0.278167724609375, 0.3006591796875, 0.323150634765625, 0.34564208984375, 0.368133544921875, 0.390625, 0.413116455078125, 0.43560791015625, 0.458099365234375, 0.4805908203125, 0.503082275390625, 0.52557373046875, 0.548065185546875, 0.570556640625, 0.593048095703125, 0.61553955078125, 0.638031005859375, 0.6605224609375, 0.683013916015625, 0.70550537109375, 0.727996826171875, 0.75048828125]}, "gradients/decoder.transformer.h.15.crossattention.c_attn.bias": {"_type": "histogram", "values": [3.0, 3.0, 1.0, 4.0, 2.0, 3.0, 3.0, 9.0, 10.0, 8.0, 14.0, 15.0, 21.0, 15.0, 16.0, 24.0, 34.0, 34.0, 29.0, 28.0, 46.0, 38.0, 36.0, 39.0, 46.0, 1070.0, 39.0, 44.0, 36.0, 43.0, 34.0, 46.0, 36.0, 33.0, 21.0, 22.0, 21.0, 20.0, 19.0, 16.0, 16.0, 13.0, 11.0, 4.0, 3.0, 5.0, 3.0, 2.0, 5.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.611328125, -2.510650634765625, -2.40997314453125, -2.309295654296875, -2.2086181640625, -2.107940673828125, -2.00726318359375, -1.906585693359375, -1.805908203125, -1.705230712890625, -1.60455322265625, -1.503875732421875, -1.4031982421875, -1.302520751953125, -1.20184326171875, -1.101165771484375, -1.00048828125, -0.899810791015625, -0.79913330078125, -0.698455810546875, -0.5977783203125, -0.497100830078125, -0.39642333984375, -0.295745849609375, -0.195068359375, -0.094390869140625, 0.00628662109375, 0.106964111328125, 0.2076416015625, 0.308319091796875, 0.40899658203125, 0.509674072265625, 0.6103515625, 0.711029052734375, 0.81170654296875, 0.912384033203125, 1.0130615234375, 1.113739013671875, 1.21441650390625, 1.315093994140625, 1.415771484375, 1.516448974609375, 1.61712646484375, 1.717803955078125, 1.8184814453125, 1.919158935546875, 2.01983642578125, 2.120513916015625, 2.22119140625, 2.321868896484375, 2.42254638671875, 2.523223876953125, 2.6239013671875, 2.724578857421875, 2.82525634765625, 2.925933837890625, 3.026611328125, 3.127288818359375, 3.22796630859375, 3.328643798828125, 3.4293212890625, 3.529998779296875, 3.63067626953125, 3.731353759765625, 3.83203125]}, "gradients/decoder.transformer.h.15.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 2.0, 0.0, 1.0, 7.0, 17.0, 6.0, 24.0, 22.0, 33.0, 75.0, 94.0, 156.0, 245.0, 384.0, 661.0, 1124.0, 1728.0, 2879.0, 4553.0, 7800.0, 12968.0, 21814.0, 37609.0, 63302.0, 101329.0, 149641.0, 1211148.0, 176875.0, 117278.0, 75097.0, 45093.0, 26336.0, 15462.0, 9182.0, 5529.0, 3216.0, 2107.0, 1238.0, 810.0, 456.0, 298.0, 204.0, 122.0, 83.0, 62.0, 31.0, 14.0, 14.0, 6.0, 3.0, 5.0, 1.0, 2.0, 0.0, 0.0, 1.0], "bins": [-0.440673828125, -0.4279632568359375, -0.415252685546875, -0.4025421142578125, -0.38983154296875, -0.3771209716796875, -0.364410400390625, -0.3516998291015625, -0.3389892578125, -0.3262786865234375, -0.313568115234375, -0.3008575439453125, -0.28814697265625, -0.2754364013671875, -0.262725830078125, -0.2500152587890625, -0.2373046875, -0.2245941162109375, -0.211883544921875, -0.1991729736328125, -0.18646240234375, -0.1737518310546875, -0.161041259765625, -0.1483306884765625, -0.1356201171875, -0.1229095458984375, -0.110198974609375, -0.0974884033203125, -0.08477783203125, -0.0720672607421875, -0.059356689453125, -0.0466461181640625, -0.033935546875, -0.0212249755859375, -0.008514404296875, 0.0041961669921875, 0.01690673828125, 0.0296173095703125, 0.042327880859375, 0.0550384521484375, 0.0677490234375, 0.0804595947265625, 0.093170166015625, 0.1058807373046875, 0.11859130859375, 0.1313018798828125, 0.144012451171875, 0.1567230224609375, 0.16943359375, 0.1821441650390625, 0.194854736328125, 0.2075653076171875, 0.22027587890625, 0.2329864501953125, 0.245697021484375, 0.2584075927734375, 0.2711181640625, 0.2838287353515625, 0.296539306640625, 0.3092498779296875, 0.32196044921875, 0.3346710205078125, 0.347381591796875, 0.3600921630859375, 0.372802734375]}, "gradients/decoder.transformer.h.15.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 4.0, 3.0, 5.0, 9.0, 7.0, 10.0, 17.0, 23.0, 24.0, 39.0, 34.0, 45.0, 46.0, 61.0, 58.0, 50.0, 74.0, 78.0, 55.0, 53.0, 44.0, 48.0, 44.0, 32.0, 33.0, 17.0, 24.0, 25.0, 15.0, 5.0, 5.0, 4.0, 4.0, 6.0, 4.0, 3.0, 5.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0194091796875, -0.018801212310791016, -0.01819324493408203, -0.017585277557373047, -0.016977310180664062, -0.016369342803955078, -0.015761375427246094, -0.01515340805053711, -0.014545440673828125, -0.01393747329711914, -0.013329505920410156, -0.012721538543701172, -0.012113571166992188, -0.011505603790283203, -0.010897636413574219, -0.010289669036865234, -0.00968170166015625, -0.009073734283447266, -0.008465766906738281, -0.007857799530029297, -0.0072498321533203125, -0.006641864776611328, -0.006033897399902344, -0.005425930023193359, -0.004817962646484375, -0.004209995269775391, -0.0036020278930664062, -0.002994060516357422, -0.0023860931396484375, -0.0017781257629394531, -0.0011701583862304688, -0.0005621910095214844, 4.57763671875e-05, 0.0006537437438964844, 0.0012617111206054688, 0.0018696784973144531, 0.0024776458740234375, 0.003085613250732422, 0.0036935806274414062, 0.004301548004150391, 0.004909515380859375, 0.005517482757568359, 0.006125450134277344, 0.006733417510986328, 0.0073413848876953125, 0.007949352264404297, 0.008557319641113281, 0.009165287017822266, 0.00977325439453125, 0.010381221771240234, 0.010989189147949219, 0.011597156524658203, 0.012205123901367188, 0.012813091278076172, 0.013421058654785156, 0.01402902603149414, 0.014636993408203125, 0.01524496078491211, 0.015852928161621094, 0.016460895538330078, 0.017068862915039062, 0.017676830291748047, 0.01828479766845703, 0.018892765045166016, 0.019500732421875]}, "gradients/decoder.transformer.h.15.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 0.0, 2.0, 3.0, 1.0, 2.0, 6.0, 4.0, 5.0, 7.0, 5.0, 2.0, 19.0, 20.0, 25.0, 29.0, 36.0, 36.0, 55.0, 119.0, 158.0, 269.0, 528.0, 1267.0, 122106.0, 920240.0, 2089.0, 593.0, 320.0, 205.0, 121.0, 90.0, 57.0, 45.0, 28.0, 24.0, 15.0, 9.0, 5.0, 7.0, 6.0, 5.0, 4.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.333251953125, -0.3228797912597656, -0.31250762939453125, -0.3021354675292969, -0.2917633056640625, -0.2813911437988281, -0.27101898193359375, -0.2606468200683594, -0.250274658203125, -0.23990249633789062, -0.22953033447265625, -0.21915817260742188, -0.2087860107421875, -0.19841384887695312, -0.18804168701171875, -0.17766952514648438, -0.16729736328125, -0.15692520141601562, -0.14655303955078125, -0.13618087768554688, -0.1258087158203125, -0.11543655395507812, -0.10506439208984375, -0.09469223022460938, -0.084320068359375, -0.07394790649414062, -0.06357574462890625, -0.053203582763671875, -0.0428314208984375, -0.032459259033203125, -0.02208709716796875, -0.011714935302734375, -0.0013427734375, 0.009029388427734375, 0.01940155029296875, 0.029773712158203125, 0.0401458740234375, 0.050518035888671875, 0.06089019775390625, 0.07126235961914062, 0.081634521484375, 0.09200668334960938, 0.10237884521484375, 0.11275100708007812, 0.1231231689453125, 0.13349533081054688, 0.14386749267578125, 0.15423965454101562, 0.16461181640625, 0.17498397827148438, 0.18535614013671875, 0.19572830200195312, 0.2061004638671875, 0.21647262573242188, 0.22684478759765625, 0.23721694946289062, 0.247589111328125, 0.2579612731933594, 0.26833343505859375, 0.2787055969238281, 0.2890777587890625, 0.2994499206542969, 0.30982208251953125, 0.3201942443847656, 0.33056640625]}, "gradients/decoder.transformer.h.15.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 5.0, 16.0, 72.0, 627.0, 282.0, 15.0], "bins": [-0.255596786737442, -0.251468300819397, -0.24733978509902954, -0.2432112991809845, -0.23908279836177826, -0.23495429754257202, -0.23082581162452698, -0.22669731080532074, -0.2225688099861145, -0.21844030916690826, -0.21431180834770203, -0.21018332242965698, -0.20605482161045074, -0.2019263207912445, -0.19779783487319946, -0.19366933405399323, -0.189540833234787, -0.18541233241558075, -0.1812838315963745, -0.17715534567832947, -0.17302684485912323, -0.168898344039917, -0.16476985812187195, -0.1606413573026657, -0.15651285648345947, -0.15238435566425323, -0.148255854845047, -0.14412736892700195, -0.13999886810779572, -0.13587036728858948, -0.13174188137054443, -0.1276133805513382, -0.12348486483097076, -0.11935636401176453, -0.11522787064313889, -0.11109937727451324, -0.106970876455307, -0.10284237563610077, -0.09871388226747513, -0.09458538889884949, -0.09045688807964325, -0.08632838726043701, -0.08219989389181137, -0.07807140052318573, -0.07394289970397949, -0.06981439888477325, -0.06568590551614761, -0.061557408422231674, -0.057428911328315735, -0.053300414234399796, -0.049171917140483856, -0.04504342004656792, -0.04091492295265198, -0.03678642585873604, -0.0326579287648201, -0.02852943167090416, -0.02440093457698822, -0.02027243748307228, -0.01614394038915634, -0.012015443295240402, -0.007886946201324463, -0.0037584491074085236, 0.00037004798650741577, 0.004498545080423355, 0.00862704124301672]}, "gradients/decoder.transformer.h.15.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 2.0, 3.0, 1.0, 5.0, 6.0, 9.0, 16.0, 10.0, 20.0, 20.0, 26.0, 33.0, 30.0, 24.0, 42.0, 34.0, 37.0, 32.0, 42.0, 37.0, 58.0, 45.0, 33.0, 28.0, 46.0, 45.0, 36.0, 39.0, 37.0, 32.0, 27.0, 25.0, 25.0, 17.0, 14.0, 21.0, 16.0, 14.0, 10.0, 3.0, 4.0, 7.0, 2.0, 1.0, 2.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.026240408420562744, -0.025387877598404884, -0.024535346776247025, -0.023682815954089165, -0.022830285131931305, -0.021977754309773445, -0.021125223487615585, -0.020272692665457726, -0.019420161843299866, -0.018567631021142006, -0.017715100198984146, -0.016862569376826286, -0.016010038554668427, -0.015157507732510567, -0.014304976910352707, -0.013452446088194847, -0.012599915266036987, -0.011747384443879128, -0.010894853621721268, -0.010042322799563408, -0.009189791977405548, -0.008337261155247688, -0.0074847303330898285, -0.006632199510931969, -0.005779668688774109, -0.004927137866616249, -0.004074607044458389, -0.0032220762223005295, -0.0023695454001426697, -0.0015170145779848099, -0.0006644837558269501, 0.00018804706633090973, 0.0010405778884887695, 0.0018931087106466293, 0.002745639532804489, 0.003598170354962349, 0.004450701177120209, 0.0053032319992780685, 0.006155762821435928, 0.007008293643593788, 0.007860824465751648, 0.008713355287909508, 0.009565886110067368, 0.010418416932225227, 0.011270947754383087, 0.012123478576540947, 0.012976009398698807, 0.013828540220856667, 0.014681071043014526, 0.015533601865172386, 0.016386132687330246, 0.017238663509488106, 0.018091194331645966, 0.018943725153803825, 0.019796255975961685, 0.020648786798119545, 0.021501317620277405, 0.022353848442435265, 0.023206379264593124, 0.024058910086750984, 0.024911440908908844, 0.025763971731066704, 0.026616502553224564, 0.027469033375382423, 0.028321564197540283]}, "gradients/decoder.transformer.h.15.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 4.0, 2.0, 1.0, 2.0, 2.0, 7.0, 6.0, 4.0, 4.0, 3.0, 13.0, 5.0, 14.0, 17.0, 18.0, 27.0, 19.0, 29.0, 33.0, 29.0, 31.0, 40.0, 39.0, 43.0, 50.0, 34.0, 38.0, 55.0, 42.0, 46.0, 39.0, 34.0, 39.0, 32.0, 34.0, 34.0, 26.0, 17.0, 19.0, 11.0, 12.0, 19.0, 9.0, 6.0, 2.0, 8.0, 5.0, 3.0, 2.0, 4.0, 3.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0], "bins": [-3.91015625, -3.774169921875, -3.63818359375, -3.502197265625, -3.3662109375, -3.230224609375, -3.09423828125, -2.958251953125, -2.822265625, -2.686279296875, -2.55029296875, -2.414306640625, -2.2783203125, -2.142333984375, -2.00634765625, -1.870361328125, -1.734375, -1.598388671875, -1.46240234375, -1.326416015625, -1.1904296875, -1.054443359375, -0.91845703125, -0.782470703125, -0.646484375, -0.510498046875, -0.37451171875, -0.238525390625, -0.1025390625, 0.033447265625, 0.16943359375, 0.305419921875, 0.44140625, 0.577392578125, 0.71337890625, 0.849365234375, 0.9853515625, 1.121337890625, 1.25732421875, 1.393310546875, 1.529296875, 1.665283203125, 1.80126953125, 1.937255859375, 2.0732421875, 2.209228515625, 2.34521484375, 2.481201171875, 2.6171875, 2.753173828125, 2.88916015625, 3.025146484375, 3.1611328125, 3.297119140625, 3.43310546875, 3.569091796875, 3.705078125, 3.841064453125, 3.97705078125, 4.113037109375, 4.2490234375, 4.385009765625, 4.52099609375, 4.656982421875, 4.79296875]}, "gradients/decoder.transformer.h.15.attn.c_proj.weight": {"_type": "histogram", "values": [2.0, 2.0, 0.0, 1.0, 2.0, 3.0, 3.0, 5.0, 5.0, 13.0, 9.0, 13.0, 20.0, 28.0, 48.0, 53.0, 94.0, 111.0, 223.0, 308.0, 449.0, 781.0, 1292.0, 2235.0, 3930.0, 7748.0, 14883.0, 28741.0, 55935.0, 109372.0, 207210.0, 269167.0, 166648.0, 86297.0, 44566.0, 22726.0, 11610.0, 6121.0, 3314.0, 1821.0, 1055.0, 609.0, 366.0, 229.0, 151.0, 108.0, 70.0, 50.0, 41.0, 23.0, 28.0, 14.0, 13.0, 4.0, 7.0, 6.0, 3.0, 3.0, 2.0, 1.0, 1.0, 1.0, 0.0, 2.0], "bins": [-3.7890625, -3.6680908203125, -3.547119140625, -3.4261474609375, -3.30517578125, -3.1842041015625, -3.063232421875, -2.9422607421875, -2.8212890625, -2.7003173828125, -2.579345703125, -2.4583740234375, -2.33740234375, -2.2164306640625, -2.095458984375, -1.9744873046875, -1.853515625, -1.7325439453125, -1.611572265625, -1.4906005859375, -1.36962890625, -1.2486572265625, -1.127685546875, -1.0067138671875, -0.8857421875, -0.7647705078125, -0.643798828125, -0.5228271484375, -0.40185546875, -0.2808837890625, -0.159912109375, -0.0389404296875, 0.08203125, 0.2030029296875, 0.323974609375, 0.4449462890625, 0.56591796875, 0.6868896484375, 0.807861328125, 0.9288330078125, 1.0498046875, 1.1707763671875, 1.291748046875, 1.4127197265625, 1.53369140625, 1.6546630859375, 1.775634765625, 1.8966064453125, 2.017578125, 2.1385498046875, 2.259521484375, 2.3804931640625, 2.50146484375, 2.6224365234375, 2.743408203125, 2.8643798828125, 2.9853515625, 3.1063232421875, 3.227294921875, 3.3482666015625, 3.46923828125, 3.5902099609375, 3.711181640625, 3.8321533203125, 3.953125]}, "gradients/decoder.transformer.h.15.attn.c_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0, 0.0, 4.0, 1.0, 6.0, 3.0, 4.0, 11.0, 8.0, 13.0, 18.0, 29.0, 20.0, 30.0, 33.0, 34.0, 51.0, 56.0, 80.0, 81.0, 144.0, 374.0, 1407.0, 198.0, 89.0, 59.0, 46.0, 38.0, 44.0, 33.0, 31.0, 22.0, 16.0, 16.0, 14.0, 9.0, 12.0, 5.0, 9.0, 4.0, 2.0, 2.0, 0.0, 2.0, 2.0, 1.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-18.0, -17.4033203125, -16.806640625, -16.2099609375, -15.61328125, -15.0166015625, -14.419921875, -13.8232421875, -13.2265625, -12.6298828125, -12.033203125, -11.4365234375, -10.83984375, -10.2431640625, -9.646484375, -9.0498046875, -8.453125, -7.8564453125, -7.259765625, -6.6630859375, -6.06640625, -5.4697265625, -4.873046875, -4.2763671875, -3.6796875, -3.0830078125, -2.486328125, -1.8896484375, -1.29296875, -0.6962890625, -0.099609375, 0.4970703125, 1.09375, 1.6904296875, 2.287109375, 2.8837890625, 3.48046875, 4.0771484375, 4.673828125, 5.2705078125, 5.8671875, 6.4638671875, 7.060546875, 7.6572265625, 8.25390625, 8.8505859375, 9.447265625, 10.0439453125, 10.640625, 11.2373046875, 11.833984375, 12.4306640625, 13.02734375, 13.6240234375, 14.220703125, 14.8173828125, 15.4140625, 16.0107421875, 16.607421875, 17.2041015625, 17.80078125, 18.3974609375, 18.994140625, 19.5908203125, 20.1875]}, "gradients/decoder.transformer.h.15.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 4.0, 5.0, 3.0, 4.0, 12.0, 20.0, 38.0, 39.0, 58.0, 120.0, 199.0, 305.0, 817.0, 16791.0, 3116965.0, 8764.0, 747.0, 350.0, 183.0, 116.0, 68.0, 45.0, 28.0, 12.0, 14.0, 3.0, 4.0, 1.0, 2.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-61.03125, -58.88525390625, -56.7392578125, -54.59326171875, -52.447265625, -50.30126953125, -48.1552734375, -46.00927734375, -43.86328125, -41.71728515625, -39.5712890625, -37.42529296875, -35.279296875, -33.13330078125, -30.9873046875, -28.84130859375, -26.6953125, -24.54931640625, -22.4033203125, -20.25732421875, -18.111328125, -15.96533203125, -13.8193359375, -11.67333984375, -9.52734375, -7.38134765625, -5.2353515625, -3.08935546875, -0.943359375, 1.20263671875, 3.3486328125, 5.49462890625, 7.640625, 9.78662109375, 11.9326171875, 14.07861328125, 16.224609375, 18.37060546875, 20.5166015625, 22.66259765625, 24.80859375, 26.95458984375, 29.1005859375, 31.24658203125, 33.392578125, 35.53857421875, 37.6845703125, 39.83056640625, 41.9765625, 44.12255859375, 46.2685546875, 48.41455078125, 50.560546875, 52.70654296875, 54.8525390625, 56.99853515625, 59.14453125, 61.29052734375, 63.4365234375, 65.58251953125, 67.728515625, 69.87451171875, 72.0205078125, 74.16650390625, 76.3125]}, "gradients/decoder.transformer.h.15.ln_1.weight": {"_type": "histogram", "values": [2.0, 517.0, 496.0, 3.0, 1.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-18.420413970947266, -9.360157012939453, -0.2999000549316406, 8.760356903076172, 17.820613861083984, 26.880870819091797, 35.94112777709961, 45.00138473510742, 54.061641693115234, 63.12189865112305, 72.18215942382812, 81.24241638183594, 90.30267333984375, 99.36293029785156, 108.42318725585938, 117.48344421386719, 126.543701171875, 135.6039581298828, 144.66421508789062, 153.72447204589844, 162.78472900390625, 171.84498596191406, 180.90524291992188, 189.9654998779297, 199.0257568359375, 208.0860137939453, 217.14627075195312, 226.20652770996094, 235.26678466796875, 244.32704162597656, 253.38729858398438, 262.44757080078125, 271.5078125, 280.56805419921875, 289.6283264160156, 298.6885986328125, 307.74884033203125, 316.80908203125, 325.8693542480469, 334.92962646484375, 343.9898681640625, 353.05010986328125, 362.1103820800781, 371.170654296875, 380.23089599609375, 389.2911376953125, 398.3514099121094, 407.41168212890625, 416.471923828125, 425.53216552734375, 434.5924377441406, 443.6527099609375, 452.71295166015625, 461.773193359375, 470.8334655761719, 479.89373779296875, 488.9539794921875, 498.01422119140625, 507.0744934082031, 516.134765625, 525.1950073242188, 534.2552490234375, 543.3155517578125, 552.3757934570312, 561.43603515625]}, "gradients/decoder.transformer.h.15.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 2.0, 0.0, 4.0, 5.0, 8.0, 4.0, 7.0, 7.0, 10.0, 16.0, 16.0, 18.0, 18.0, 19.0, 28.0, 28.0, 22.0, 35.0, 46.0, 47.0, 46.0, 42.0, 49.0, 38.0, 46.0, 35.0, 45.0, 42.0, 33.0, 30.0, 29.0, 39.0, 26.0, 25.0, 25.0, 29.0, 19.0, 10.0, 12.0, 13.0, 9.0, 9.0, 4.0, 5.0, 9.0, 6.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-42.22984313964844, -40.94559097290039, -39.66133499145508, -38.37708282470703, -37.09282684326172, -35.80857467651367, -34.524322509765625, -33.24006652832031, -31.955814361572266, -30.671560287475586, -29.387306213378906, -28.10305404663086, -26.81879997253418, -25.5345458984375, -24.25029182434082, -22.96603775024414, -21.68178367614746, -20.39752960205078, -19.1132755279541, -17.829021453857422, -16.544769287109375, -15.260515213012695, -13.976261138916016, -12.692008018493652, -11.407753944396973, -10.123499870300293, -8.83924674987793, -7.55499267578125, -6.2707390785217285, -4.986485481262207, -3.7022314071655273, -2.417978286743164, -1.1337242126464844, 0.15052950382232666, 1.4347832202911377, 2.7190370559692383, 4.00329065322876, 5.287544250488281, 6.571798324584961, 7.856051445007324, 9.140305519104004, 10.424559593200684, 11.708812713623047, 12.993066787719727, 14.277320861816406, 15.56157398223877, 16.845829010009766, 18.130081176757812, 19.414335250854492, 20.698589324951172, 21.98284339904785, 23.26709747314453, 24.551349639892578, 25.835603713989258, 27.119857788085938, 28.404109954833984, 29.688365936279297, 30.972620010375977, 32.256874084472656, 33.5411262512207, 34.825382232666016, 36.10963439941406, 37.393890380859375, 38.67814254760742, 39.96239471435547]}, "gradients/decoder.transformer.h.14.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 1.0, 3.0, 3.0, 4.0, 5.0, 5.0, 5.0, 4.0, 6.0, 10.0, 12.0, 16.0, 14.0, 14.0, 28.0, 24.0, 24.0, 28.0, 39.0, 35.0, 29.0, 30.0, 36.0, 53.0, 35.0, 40.0, 42.0, 44.0, 37.0, 34.0, 51.0, 35.0, 34.0, 35.0, 30.0, 24.0, 29.0, 24.0, 15.0, 13.0, 11.0, 12.0, 4.0, 7.0, 10.0, 3.0, 3.0, 7.0, 2.0, 2.0, 0.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 3.0], "bins": [-4.109375, -3.96795654296875, -3.8265380859375, -3.68511962890625, -3.543701171875, -3.40228271484375, -3.2608642578125, -3.11944580078125, -2.97802734375, -2.83660888671875, -2.6951904296875, -2.55377197265625, -2.412353515625, -2.27093505859375, -2.1295166015625, -1.98809814453125, -1.8466796875, -1.70526123046875, -1.5638427734375, -1.42242431640625, -1.281005859375, -1.13958740234375, -0.9981689453125, -0.85675048828125, -0.71533203125, -0.57391357421875, -0.4324951171875, -0.29107666015625, -0.149658203125, -0.00823974609375, 0.1331787109375, 0.27459716796875, 0.416015625, 0.55743408203125, 0.6988525390625, 0.84027099609375, 0.981689453125, 1.12310791015625, 1.2645263671875, 1.40594482421875, 1.54736328125, 1.68878173828125, 1.8302001953125, 1.97161865234375, 2.113037109375, 2.25445556640625, 2.3958740234375, 2.53729248046875, 2.6787109375, 2.82012939453125, 2.9615478515625, 3.10296630859375, 3.244384765625, 3.38580322265625, 3.5272216796875, 3.66864013671875, 3.81005859375, 3.95147705078125, 4.0928955078125, 4.23431396484375, 4.375732421875, 4.51715087890625, 4.6585693359375, 4.79998779296875, 4.94140625]}, "gradients/decoder.transformer.h.14.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 2.0, 2.0, 3.0, 4.0, 5.0, 7.0, 3.0, 3.0, 7.0, 12.0, 16.0, 13.0, 14.0, 26.0, 20.0, 31.0, 44.0, 77.0, 112.0, 208.0, 382.0, 1068.0, 4483.0, 36113.0, 602194.0, 2946071.0, 564121.0, 33467.0, 3980.0, 921.0, 351.0, 171.0, 112.0, 53.0, 40.0, 33.0, 12.0, 28.0, 11.0, 11.0, 12.0, 18.0, 4.0, 6.0, 7.0, 2.0, 1.0, 5.0, 2.0, 2.0, 3.0, 2.0, 1.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0, 1.0], "bins": [-13.203125, -12.75537109375, -12.3076171875, -11.85986328125, -11.412109375, -10.96435546875, -10.5166015625, -10.06884765625, -9.62109375, -9.17333984375, -8.7255859375, -8.27783203125, -7.830078125, -7.38232421875, -6.9345703125, -6.48681640625, -6.0390625, -5.59130859375, -5.1435546875, -4.69580078125, -4.248046875, -3.80029296875, -3.3525390625, -2.90478515625, -2.45703125, -2.00927734375, -1.5615234375, -1.11376953125, -0.666015625, -0.21826171875, 0.2294921875, 0.67724609375, 1.125, 1.57275390625, 2.0205078125, 2.46826171875, 2.916015625, 3.36376953125, 3.8115234375, 4.25927734375, 4.70703125, 5.15478515625, 5.6025390625, 6.05029296875, 6.498046875, 6.94580078125, 7.3935546875, 7.84130859375, 8.2890625, 8.73681640625, 9.1845703125, 9.63232421875, 10.080078125, 10.52783203125, 10.9755859375, 11.42333984375, 11.87109375, 12.31884765625, 12.7666015625, 13.21435546875, 13.662109375, 14.10986328125, 14.5576171875, 15.00537109375, 15.453125]}, "gradients/decoder.transformer.h.14.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 0.0, 4.0, 8.0, 6.0, 6.0, 9.0, 17.0, 17.0, 28.0, 29.0, 42.0, 47.0, 86.0, 109.0, 160.0, 181.0, 276.0, 340.0, 388.0, 418.0, 417.0, 348.0, 279.0, 222.0, 161.0, 121.0, 92.0, 84.0, 54.0, 40.0, 29.0, 25.0, 11.0, 14.0, 5.0, 7.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0], "bins": [-11.390625, -11.08843994140625, -10.7862548828125, -10.48406982421875, -10.181884765625, -9.87969970703125, -9.5775146484375, -9.27532958984375, -8.97314453125, -8.67095947265625, -8.3687744140625, -8.06658935546875, -7.764404296875, -7.46221923828125, -7.1600341796875, -6.85784912109375, -6.5556640625, -6.25347900390625, -5.9512939453125, -5.64910888671875, -5.346923828125, -5.04473876953125, -4.7425537109375, -4.44036865234375, -4.13818359375, -3.83599853515625, -3.5338134765625, -3.23162841796875, -2.929443359375, -2.62725830078125, -2.3250732421875, -2.02288818359375, -1.720703125, -1.41851806640625, -1.1163330078125, -0.81414794921875, -0.511962890625, -0.20977783203125, 0.0924072265625, 0.39459228515625, 0.69677734375, 0.99896240234375, 1.3011474609375, 1.60333251953125, 1.905517578125, 2.20770263671875, 2.5098876953125, 2.81207275390625, 3.1142578125, 3.41644287109375, 3.7186279296875, 4.02081298828125, 4.322998046875, 4.62518310546875, 4.9273681640625, 5.22955322265625, 5.53173828125, 5.83392333984375, 6.1361083984375, 6.43829345703125, 6.740478515625, 7.04266357421875, 7.3448486328125, 7.64703369140625, 7.94921875]}, "gradients/decoder.transformer.h.14.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 2.0, 3.0, 4.0, 9.0, 20.0, 31.0, 24.0, 55.0, 63.0, 79.0, 141.0, 202.0, 369.0, 831.0, 2928.0, 21662.0, 352088.0, 3119967.0, 653585.0, 35902.0, 4166.0, 1017.0, 454.0, 242.0, 151.0, 77.0, 63.0, 44.0, 29.0, 25.0, 20.0, 8.0, 10.0, 8.0, 3.0, 5.0, 3.0, 1.0, 2.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-17.9375, -17.26318359375, -16.5888671875, -15.91455078125, -15.240234375, -14.56591796875, -13.8916015625, -13.21728515625, -12.54296875, -11.86865234375, -11.1943359375, -10.52001953125, -9.845703125, -9.17138671875, -8.4970703125, -7.82275390625, -7.1484375, -6.47412109375, -5.7998046875, -5.12548828125, -4.451171875, -3.77685546875, -3.1025390625, -2.42822265625, -1.75390625, -1.07958984375, -0.4052734375, 0.26904296875, 0.943359375, 1.61767578125, 2.2919921875, 2.96630859375, 3.640625, 4.31494140625, 4.9892578125, 5.66357421875, 6.337890625, 7.01220703125, 7.6865234375, 8.36083984375, 9.03515625, 9.70947265625, 10.3837890625, 11.05810546875, 11.732421875, 12.40673828125, 13.0810546875, 13.75537109375, 14.4296875, 15.10400390625, 15.7783203125, 16.45263671875, 17.126953125, 17.80126953125, 18.4755859375, 19.14990234375, 19.82421875, 20.49853515625, 21.1728515625, 21.84716796875, 22.521484375, 23.19580078125, 23.8701171875, 24.54443359375, 25.21875]}, "gradients/decoder.transformer.h.14.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 3.0, 5.0, 9.0, 23.0, 48.0, 91.0, 124.0, 146.0, 183.0, 149.0, 94.0, 71.0, 32.0, 21.0, 12.0, 5.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-94.71366119384766, -92.24703979492188, -89.78041076660156, -87.31378936767578, -84.84716033935547, -82.38053894042969, -79.91390991210938, -77.4472885131836, -74.98065948486328, -72.5140380859375, -70.04740905761719, -67.5807876586914, -65.1141586303711, -62.64753341674805, -60.180908203125, -57.71428680419922, -55.24766159057617, -52.781036376953125, -50.31441116333008, -47.84778594970703, -45.381160736083984, -42.91453552246094, -40.447914123535156, -37.981285095214844, -35.51466369628906, -33.048038482666016, -30.58141326904297, -28.114788055419922, -25.648162841796875, -23.181537628173828, -20.714914321899414, -18.248289108276367, -15.781665802001953, -13.315040588378906, -10.84841537475586, -8.381791114807129, -5.915165901184082, -3.448540687561035, -0.9819164276123047, 1.4847087860107422, 3.951333999633789, 6.417959213256836, 8.884584426879883, 11.351208686828613, 13.81783390045166, 16.28445816040039, 18.751083374023438, 21.217708587646484, 23.68433380126953, 26.150959014892578, 28.617584228515625, 31.084209442138672, 33.55083465576172, 36.017459869384766, 38.48408508300781, 40.950706481933594, 43.417335510253906, 45.88396072387695, 48.3505859375, 50.81721115112305, 53.283836364746094, 55.75046157836914, 58.21708679199219, 60.68370819091797, 63.150333404541016]}, "gradients/decoder.transformer.h.14.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 4.0, 1.0, 2.0, 2.0, 2.0, 2.0, 1.0, 8.0, 5.0, 10.0, 13.0, 7.0, 15.0, 21.0, 16.0, 19.0, 15.0, 16.0, 22.0, 29.0, 44.0, 31.0, 33.0, 34.0, 31.0, 39.0, 39.0, 34.0, 42.0, 38.0, 47.0, 29.0, 28.0, 38.0, 28.0, 30.0, 26.0, 27.0, 24.0, 21.0, 23.0, 24.0, 13.0, 16.0, 11.0, 13.0, 11.0, 10.0, 5.0, 3.0, 4.0, 6.0, 0.0, 2.0, 3.0, 1.0, 1.0, 1.0, 1.0, 2.0], "bins": [-29.55217933654785, -28.636825561523438, -27.721473693847656, -26.806119918823242, -25.89076805114746, -24.975414276123047, -24.060062408447266, -23.14470863342285, -22.229354858398438, -21.314001083374023, -20.398649215698242, -19.483295440673828, -18.567943572998047, -17.652589797973633, -16.73723602294922, -15.821884155273438, -14.906532287597656, -13.991179466247559, -13.075826644897461, -12.160472869873047, -11.245121002197266, -10.329767227172852, -9.414414405822754, -8.499061584472656, -7.583708763122559, -6.668355941772461, -5.753003120422363, -4.837649822235107, -3.9222970008850098, -3.006944179534912, -2.0915908813476562, -1.1762380599975586, -0.26088714599609375, 0.6544657945632935, 1.5698187351226807, 2.4851717948913574, 3.400524616241455, 4.315877437591553, 5.231230735778809, 6.146583557128906, 7.061936378479004, 7.977289199829102, 8.8926420211792, 9.807994842529297, 10.723348617553711, 11.638700485229492, 12.554054260253906, 13.469407081604004, 14.384759902954102, 15.3001127243042, 16.215465545654297, 17.13081932067871, 18.046171188354492, 18.961524963378906, 19.876876831054688, 20.7922306060791, 21.707584381103516, 22.62293815612793, 23.53829002380371, 24.453643798828125, 25.368995666503906, 26.28434944152832, 27.199703216552734, 28.115055084228516, 29.030406951904297]}, "gradients/decoder.transformer.h.14.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 3.0, 3.0, 4.0, 5.0, 2.0, 7.0, 11.0, 6.0, 10.0, 9.0, 16.0, 19.0, 21.0, 27.0, 17.0, 21.0, 28.0, 42.0, 34.0, 40.0, 47.0, 39.0, 37.0, 50.0, 39.0, 42.0, 38.0, 38.0, 37.0, 43.0, 28.0, 28.0, 21.0, 36.0, 29.0, 22.0, 25.0, 19.0, 14.0, 10.0, 7.0, 6.0, 5.0, 6.0, 8.0, 5.0, 2.0, 3.0, 2.0, 1.0, 1.0, 3.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0, 1.0], "bins": [-4.125, -3.98553466796875, -3.8460693359375, -3.70660400390625, -3.567138671875, -3.42767333984375, -3.2882080078125, -3.14874267578125, -3.00927734375, -2.86981201171875, -2.7303466796875, -2.59088134765625, -2.451416015625, -2.31195068359375, -2.1724853515625, -2.03302001953125, -1.8935546875, -1.75408935546875, -1.6146240234375, -1.47515869140625, -1.335693359375, -1.19622802734375, -1.0567626953125, -0.91729736328125, -0.77783203125, -0.63836669921875, -0.4989013671875, -0.35943603515625, -0.219970703125, -0.08050537109375, 0.0589599609375, 0.19842529296875, 0.337890625, 0.47735595703125, 0.6168212890625, 0.75628662109375, 0.895751953125, 1.03521728515625, 1.1746826171875, 1.31414794921875, 1.45361328125, 1.59307861328125, 1.7325439453125, 1.87200927734375, 2.011474609375, 2.15093994140625, 2.2904052734375, 2.42987060546875, 2.5693359375, 2.70880126953125, 2.8482666015625, 2.98773193359375, 3.127197265625, 3.26666259765625, 3.4061279296875, 3.54559326171875, 3.68505859375, 3.82452392578125, 3.9639892578125, 4.10345458984375, 4.242919921875, 4.38238525390625, 4.5218505859375, 4.66131591796875, 4.80078125]}, "gradients/decoder.transformer.h.14.crossattention.c_proj.weight": {"_type": "histogram", "values": [3.0, 2.0, 4.0, 2.0, 7.0, 11.0, 15.0, 18.0, 24.0, 41.0, 51.0, 72.0, 100.0, 158.0, 204.0, 357.0, 487.0, 670.0, 1055.0, 1540.0, 2284.0, 3547.0, 5399.0, 8173.0, 13047.0, 20389.0, 31798.0, 49742.0, 76778.0, 110215.0, 143828.0, 155242.0, 134861.0, 99298.0, 66878.0, 43853.0, 27950.0, 17758.0, 11383.0, 7131.0, 4626.0, 3145.0, 2093.0, 1343.0, 960.0, 643.0, 425.0, 298.0, 190.0, 146.0, 81.0, 75.0, 69.0, 29.0, 32.0, 23.0, 4.0, 8.0, 3.0, 5.0, 0.0, 2.0, 0.0, 1.0], "bins": [-0.6337890625, -0.6135711669921875, -0.593353271484375, -0.5731353759765625, -0.55291748046875, -0.5326995849609375, -0.512481689453125, -0.4922637939453125, -0.4720458984375, -0.4518280029296875, -0.431610107421875, -0.4113922119140625, -0.39117431640625, -0.3709564208984375, -0.350738525390625, -0.3305206298828125, -0.310302734375, -0.2900848388671875, -0.269866943359375, -0.2496490478515625, -0.22943115234375, -0.2092132568359375, -0.188995361328125, -0.1687774658203125, -0.1485595703125, -0.1283416748046875, -0.108123779296875, -0.0879058837890625, -0.06768798828125, -0.0474700927734375, -0.027252197265625, -0.0070343017578125, 0.01318359375, 0.0334014892578125, 0.053619384765625, 0.0738372802734375, 0.09405517578125, 0.1142730712890625, 0.134490966796875, 0.1547088623046875, 0.1749267578125, 0.1951446533203125, 0.215362548828125, 0.2355804443359375, 0.25579833984375, 0.2760162353515625, 0.296234130859375, 0.3164520263671875, 0.336669921875, 0.3568878173828125, 0.377105712890625, 0.3973236083984375, 0.41754150390625, 0.4377593994140625, 0.457977294921875, 0.4781951904296875, 0.4984130859375, 0.5186309814453125, 0.538848876953125, 0.5590667724609375, 0.57928466796875, 0.5995025634765625, 0.619720458984375, 0.6399383544921875, 0.66015625]}, "gradients/decoder.transformer.h.14.crossattention.c_attn.bias": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 2.0, 4.0, 2.0, 0.0, 5.0, 7.0, 6.0, 5.0, 5.0, 8.0, 13.0, 15.0, 13.0, 13.0, 15.0, 26.0, 25.0, 15.0, 26.0, 32.0, 32.0, 29.0, 41.0, 46.0, 38.0, 42.0, 30.0, 1060.0, 45.0, 31.0, 40.0, 47.0, 33.0, 37.0, 27.0, 27.0, 29.0, 23.0, 20.0, 19.0, 21.0, 21.0, 15.0, 7.0, 7.0, 6.0, 10.0, 5.0, 5.0, 2.0, 2.0, 3.0, 2.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.861328125, -2.766357421875, -2.67138671875, -2.576416015625, -2.4814453125, -2.386474609375, -2.29150390625, -2.196533203125, -2.1015625, -2.006591796875, -1.91162109375, -1.816650390625, -1.7216796875, -1.626708984375, -1.53173828125, -1.436767578125, -1.341796875, -1.246826171875, -1.15185546875, -1.056884765625, -0.9619140625, -0.866943359375, -0.77197265625, -0.677001953125, -0.58203125, -0.487060546875, -0.39208984375, -0.297119140625, -0.2021484375, -0.107177734375, -0.01220703125, 0.082763671875, 0.177734375, 0.272705078125, 0.36767578125, 0.462646484375, 0.5576171875, 0.652587890625, 0.74755859375, 0.842529296875, 0.9375, 1.032470703125, 1.12744140625, 1.222412109375, 1.3173828125, 1.412353515625, 1.50732421875, 1.602294921875, 1.697265625, 1.792236328125, 1.88720703125, 1.982177734375, 2.0771484375, 2.172119140625, 2.26708984375, 2.362060546875, 2.45703125, 2.552001953125, 2.64697265625, 2.741943359375, 2.8369140625, 2.931884765625, 3.02685546875, 3.121826171875, 3.216796875]}, "gradients/decoder.transformer.h.14.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 7.0, 9.0, 13.0, 19.0, 27.0, 42.0, 52.0, 94.0, 145.0, 195.0, 301.0, 534.0, 698.0, 1072.0, 1738.0, 2502.0, 3954.0, 6240.0, 9513.0, 14913.0, 24231.0, 38826.0, 61024.0, 93488.0, 134607.0, 1149179.0, 213386.0, 119118.0, 80493.0, 52001.0, 32791.0, 20176.0, 12605.0, 8192.0, 5220.0, 3335.0, 2239.0, 1395.0, 954.0, 619.0, 415.0, 257.0, 183.0, 101.0, 64.0, 57.0, 43.0, 25.0, 19.0, 11.0, 5.0, 6.0, 3.0, 2.0, 5.0, 0.0, 3.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.345703125, -0.3337059020996094, -0.32170867919921875, -0.3097114562988281, -0.2977142333984375, -0.2857170104980469, -0.27371978759765625, -0.2617225646972656, -0.249725341796875, -0.23772811889648438, -0.22573089599609375, -0.21373367309570312, -0.2017364501953125, -0.18973922729492188, -0.17774200439453125, -0.16574478149414062, -0.15374755859375, -0.14175033569335938, -0.12975311279296875, -0.11775588989257812, -0.1057586669921875, -0.09376144409179688, -0.08176422119140625, -0.06976699829101562, -0.057769775390625, -0.045772552490234375, -0.03377532958984375, -0.021778106689453125, -0.0097808837890625, 0.002216339111328125, 0.01421356201171875, 0.026210784912109375, 0.0382080078125, 0.050205230712890625, 0.06220245361328125, 0.07419967651367188, 0.0861968994140625, 0.09819412231445312, 0.11019134521484375, 0.12218856811523438, 0.134185791015625, 0.14618301391601562, 0.15818023681640625, 0.17017745971679688, 0.1821746826171875, 0.19417190551757812, 0.20616912841796875, 0.21816635131835938, 0.23016357421875, 0.24216079711914062, 0.25415802001953125, 0.2661552429199219, 0.2781524658203125, 0.2901496887207031, 0.30214691162109375, 0.3141441345214844, 0.326141357421875, 0.3381385803222656, 0.35013580322265625, 0.3621330261230469, 0.3741302490234375, 0.3861274719238281, 0.39812469482421875, 0.4101219177246094, 0.422119140625]}, "gradients/decoder.transformer.h.14.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 4.0, 1.0, 3.0, 4.0, 4.0, 3.0, 3.0, 11.0, 15.0, 16.0, 30.0, 23.0, 32.0, 46.0, 66.0, 68.0, 80.0, 97.0, 99.0, 86.0, 75.0, 51.0, 48.0, 42.0, 29.0, 16.0, 13.0, 13.0, 12.0, 9.0, 4.0, 2.0, 3.0, 1.0, 1.0, 2.0, 3.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.031494140625, -0.030615806579589844, -0.029737472534179688, -0.02885913848876953, -0.027980804443359375, -0.02710247039794922, -0.026224136352539062, -0.025345802307128906, -0.02446746826171875, -0.023589134216308594, -0.022710800170898438, -0.02183246612548828, -0.020954132080078125, -0.02007579803466797, -0.019197463989257812, -0.018319129943847656, -0.0174407958984375, -0.016562461853027344, -0.015684127807617188, -0.014805793762207031, -0.013927459716796875, -0.013049125671386719, -0.012170791625976562, -0.011292457580566406, -0.01041412353515625, -0.009535789489746094, -0.008657455444335938, -0.007779121398925781, -0.006900787353515625, -0.006022453308105469, -0.0051441192626953125, -0.004265785217285156, -0.003387451171875, -0.0025091171264648438, -0.0016307830810546875, -0.0007524490356445312, 0.000125885009765625, 0.0010042190551757812, 0.0018825531005859375, 0.0027608871459960938, 0.00363922119140625, 0.004517555236816406, 0.0053958892822265625, 0.006274223327636719, 0.007152557373046875, 0.008030891418457031, 0.008909225463867188, 0.009787559509277344, 0.0106658935546875, 0.011544227600097656, 0.012422561645507812, 0.013300895690917969, 0.014179229736328125, 0.015057563781738281, 0.015935897827148438, 0.016814231872558594, 0.01769256591796875, 0.018570899963378906, 0.019449234008789062, 0.02032756805419922, 0.021205902099609375, 0.02208423614501953, 0.022962570190429688, 0.023840904235839844, 0.02471923828125]}, "gradients/decoder.transformer.h.14.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 2.0, 2.0, 2.0, 1.0, 2.0, 2.0, 3.0, 7.0, 12.0, 14.0, 19.0, 16.0, 27.0, 61.0, 101.0, 124.0, 327.0, 794.0, 59426.0, 985601.0, 1197.0, 366.0, 184.0, 88.0, 57.0, 39.0, 29.0, 23.0, 12.0, 9.0, 0.0, 7.0, 2.0, 3.0, 4.0, 4.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.499755859375, -0.4819908142089844, -0.46422576904296875, -0.4464607238769531, -0.4286956787109375, -0.4109306335449219, -0.39316558837890625, -0.3754005432128906, -0.357635498046875, -0.3398704528808594, -0.32210540771484375, -0.3043403625488281, -0.2865753173828125, -0.2688102722167969, -0.25104522705078125, -0.23328018188476562, -0.21551513671875, -0.19775009155273438, -0.17998504638671875, -0.16222000122070312, -0.1444549560546875, -0.12668991088867188, -0.10892486572265625, -0.09115982055664062, -0.073394775390625, -0.055629730224609375, -0.03786468505859375, -0.020099639892578125, -0.0023345947265625, 0.015430450439453125, 0.03319549560546875, 0.050960540771484375, 0.0687255859375, 0.08649063110351562, 0.10425567626953125, 0.12202072143554688, 0.1397857666015625, 0.15755081176757812, 0.17531585693359375, 0.19308090209960938, 0.210845947265625, 0.22861099243164062, 0.24637603759765625, 0.2641410827636719, 0.2819061279296875, 0.2996711730957031, 0.31743621826171875, 0.3352012634277344, 0.35296630859375, 0.3707313537597656, 0.38849639892578125, 0.4062614440917969, 0.4240264892578125, 0.4417915344238281, 0.45955657958984375, 0.4773216247558594, 0.495086669921875, 0.5128517150878906, 0.5306167602539062, 0.5483818054199219, 0.5661468505859375, 0.5839118957519531, 0.6016769409179688, 0.6194419860839844, 0.63720703125]}, "gradients/decoder.transformer.h.14.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 6.0, 17.0, 160.0, 802.0, 29.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.24430254101753235, -0.23839910328388214, -0.23249565064907074, -0.22659221291542053, -0.22068876028060913, -0.21478532254695892, -0.20888188481330872, -0.20297843217849731, -0.1970749795436859, -0.1911715418100357, -0.1852680891752243, -0.1793646514415741, -0.1734611988067627, -0.1675577610731125, -0.16165432333946228, -0.15575087070465088, -0.14984743297100067, -0.14394399523735046, -0.13804054260253906, -0.13213710486888885, -0.12623365223407745, -0.12033021450042725, -0.11442676931619644, -0.10852332413196564, -0.10261987894773483, -0.09671643376350403, -0.09081298857927322, -0.08490954339504242, -0.07900610566139221, -0.07310265302658081, -0.0671992152929306, -0.0612957701086998, -0.05539233982563019, -0.049488894641399384, -0.04358544945716858, -0.03768200799822807, -0.03177856281399727, -0.025875117629766464, -0.019971676170825958, -0.014068230986595154, -0.00816478580236435, -0.0022613415494561195, 0.0036421027034521103, 0.009545546025037766, 0.01544899120926857, 0.021352436393499374, 0.02725587785243988, 0.033159323036670685, 0.03906276822090149, 0.044966213405132294, 0.0508696585893631, 0.056773100048303604, 0.06267654895782471, 0.06857998669147491, 0.07448343187570572, 0.08038687705993652, 0.08629032224416733, 0.09219376742839813, 0.09809721261262894, 0.10400065779685974, 0.10990409553050995, 0.11580754816532135, 0.12171098589897156, 0.12761443853378296, 0.13351787626743317]}, "gradients/decoder.transformer.h.14.ln_cross_attn.bias": {"_type": "histogram", "values": [2.0, 3.0, 3.0, 3.0, 4.0, 7.0, 10.0, 7.0, 4.0, 10.0, 9.0, 13.0, 7.0, 14.0, 22.0, 17.0, 21.0, 21.0, 19.0, 24.0, 23.0, 35.0, 21.0, 29.0, 36.0, 29.0, 39.0, 33.0, 33.0, 36.0, 34.0, 28.0, 31.0, 33.0, 35.0, 36.0, 34.0, 30.0, 14.0, 28.0, 21.0, 21.0, 26.0, 14.0, 15.0, 11.0, 14.0, 12.0, 10.0, 8.0, 2.0, 8.0, 6.0, 3.0, 7.0, 2.0, 2.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.022610485553741455, -0.02184230647981167, -0.02107412740588188, -0.020305948331952095, -0.01953776925802231, -0.01876959018409252, -0.018001411110162735, -0.01723323203623295, -0.01646505296230316, -0.015696873888373375, -0.014928694814443588, -0.014160515740513802, -0.013392336666584015, -0.012624157592654228, -0.011855978518724442, -0.011087799444794655, -0.010319620370864868, -0.009551441296935081, -0.008783262223005295, -0.008015083149075508, -0.0072469040751457214, -0.006478725001215935, -0.005710545927286148, -0.004942366853356361, -0.004174187779426575, -0.003406008705496788, -0.0026378296315670013, -0.0018696505576372147, -0.001101471483707428, -0.0003332924097776413, 0.0004348866641521454, 0.001203065738081932, 0.0019712448120117188, 0.0027394238859415054, 0.003507602959871292, 0.004275782033801079, 0.0050439611077308655, 0.005812140181660652, 0.006580319255590439, 0.0073484983295202255, 0.008116677403450012, 0.008884856477379799, 0.009653035551309586, 0.010421214625239372, 0.011189393699169159, 0.011957572773098946, 0.012725751847028732, 0.013493930920958519, 0.014262109994888306, 0.015030289068818092, 0.01579846814274788, 0.016566647216677666, 0.017334826290607452, 0.01810300536453724, 0.018871184438467026, 0.019639363512396812, 0.0204075425863266, 0.021175721660256386, 0.021943900734186172, 0.02271207980811596, 0.023480258882045746, 0.024248437955975533, 0.02501661702990532, 0.025784796103835106, 0.026552975177764893]}, "gradients/decoder.transformer.h.14.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 3.0, 3.0, 4.0, 5.0, 2.0, 7.0, 11.0, 6.0, 10.0, 9.0, 16.0, 19.0, 20.0, 28.0, 17.0, 21.0, 28.0, 42.0, 34.0, 40.0, 47.0, 39.0, 37.0, 52.0, 37.0, 42.0, 38.0, 38.0, 37.0, 42.0, 29.0, 28.0, 21.0, 36.0, 29.0, 22.0, 25.0, 19.0, 14.0, 10.0, 7.0, 6.0, 5.0, 6.0, 8.0, 5.0, 2.0, 3.0, 2.0, 1.0, 1.0, 3.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0, 1.0], "bins": [-4.125, -3.98553466796875, -3.8460693359375, -3.70660400390625, -3.567138671875, -3.42767333984375, -3.2882080078125, -3.14874267578125, -3.00927734375, -2.86981201171875, -2.7303466796875, -2.59088134765625, -2.451416015625, -2.31195068359375, -2.1724853515625, -2.03302001953125, -1.8935546875, -1.75408935546875, -1.6146240234375, -1.47515869140625, -1.335693359375, -1.19622802734375, -1.0567626953125, -0.91729736328125, -0.77783203125, -0.63836669921875, -0.4989013671875, -0.35943603515625, -0.219970703125, -0.08050537109375, 0.0589599609375, 0.19842529296875, 0.337890625, 0.47735595703125, 0.6168212890625, 0.75628662109375, 0.895751953125, 1.03521728515625, 1.1746826171875, 1.31414794921875, 1.45361328125, 1.59307861328125, 1.7325439453125, 1.87200927734375, 2.011474609375, 2.15093994140625, 2.2904052734375, 2.42987060546875, 2.5693359375, 2.70880126953125, 2.8482666015625, 2.98773193359375, 3.127197265625, 3.26666259765625, 3.4061279296875, 3.54559326171875, 3.68505859375, 3.82452392578125, 3.9639892578125, 4.10345458984375, 4.242919921875, 4.38238525390625, 4.5218505859375, 4.66131591796875, 4.80078125]}, "gradients/decoder.transformer.h.14.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 2.0, 1.0, 6.0, 1.0, 6.0, 7.0, 1.0, 4.0, 10.0, 5.0, 6.0, 13.0, 14.0, 25.0, 23.0, 56.0, 75.0, 146.0, 290.0, 601.0, 1273.0, 2743.0, 5924.0, 12668.0, 26129.0, 55736.0, 116817.0, 236775.0, 292795.0, 154903.0, 74541.0, 35190.0, 16617.0, 8007.0, 3666.0, 1764.0, 840.0, 391.0, 214.0, 91.0, 80.0, 28.0, 26.0, 14.0, 12.0, 9.0, 6.0, 1.0, 7.0, 3.0, 4.0, 2.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0], "bins": [-4.16796875, -4.02520751953125, -3.8824462890625, -3.73968505859375, -3.596923828125, -3.45416259765625, -3.3114013671875, -3.16864013671875, -3.02587890625, -2.88311767578125, -2.7403564453125, -2.59759521484375, -2.454833984375, -2.31207275390625, -2.1693115234375, -2.02655029296875, -1.8837890625, -1.74102783203125, -1.5982666015625, -1.45550537109375, -1.312744140625, -1.16998291015625, -1.0272216796875, -0.88446044921875, -0.74169921875, -0.59893798828125, -0.4561767578125, -0.31341552734375, -0.170654296875, -0.02789306640625, 0.1148681640625, 0.25762939453125, 0.400390625, 0.54315185546875, 0.6859130859375, 0.82867431640625, 0.971435546875, 1.11419677734375, 1.2569580078125, 1.39971923828125, 1.54248046875, 1.68524169921875, 1.8280029296875, 1.97076416015625, 2.113525390625, 2.25628662109375, 2.3990478515625, 2.54180908203125, 2.6845703125, 2.82733154296875, 2.9700927734375, 3.11285400390625, 3.255615234375, 3.39837646484375, 3.5411376953125, 3.68389892578125, 3.82666015625, 3.96942138671875, 4.1121826171875, 4.25494384765625, 4.397705078125, 4.54046630859375, 4.6832275390625, 4.82598876953125, 4.96875]}, "gradients/decoder.transformer.h.14.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 5.0, 1.0, 3.0, 4.0, 8.0, 3.0, 8.0, 8.0, 17.0, 12.0, 12.0, 10.0, 22.0, 17.0, 30.0, 32.0, 31.0, 35.0, 46.0, 48.0, 58.0, 86.0, 182.0, 1393.0, 342.0, 152.0, 76.0, 66.0, 51.0, 36.0, 38.0, 35.0, 33.0, 25.0, 28.0, 18.0, 24.0, 15.0, 11.0, 11.0, 8.0, 6.0, 4.0, 2.0, 3.0, 2.0, 2.0, 2.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0], "bins": [-14.375, -13.89306640625, -13.4111328125, -12.92919921875, -12.447265625, -11.96533203125, -11.4833984375, -11.00146484375, -10.51953125, -10.03759765625, -9.5556640625, -9.07373046875, -8.591796875, -8.10986328125, -7.6279296875, -7.14599609375, -6.6640625, -6.18212890625, -5.7001953125, -5.21826171875, -4.736328125, -4.25439453125, -3.7724609375, -3.29052734375, -2.80859375, -2.32666015625, -1.8447265625, -1.36279296875, -0.880859375, -0.39892578125, 0.0830078125, 0.56494140625, 1.046875, 1.52880859375, 2.0107421875, 2.49267578125, 2.974609375, 3.45654296875, 3.9384765625, 4.42041015625, 4.90234375, 5.38427734375, 5.8662109375, 6.34814453125, 6.830078125, 7.31201171875, 7.7939453125, 8.27587890625, 8.7578125, 9.23974609375, 9.7216796875, 10.20361328125, 10.685546875, 11.16748046875, 11.6494140625, 12.13134765625, 12.61328125, 13.09521484375, 13.5771484375, 14.05908203125, 14.541015625, 15.02294921875, 15.5048828125, 15.98681640625, 16.46875]}, "gradients/decoder.transformer.h.14.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 3.0, 1.0, 0.0, 0.0, 0.0, 2.0, 2.0, 2.0, 4.0, 5.0, 2.0, 4.0, 1.0, 12.0, 14.0, 13.0, 25.0, 20.0, 21.0, 43.0, 35.0, 50.0, 65.0, 75.0, 92.0, 124.0, 181.0, 309.0, 390.0, 931.0, 5418.0, 575484.0, 2544596.0, 14943.0, 1291.0, 464.0, 288.0, 180.0, 124.0, 111.0, 69.0, 68.0, 49.0, 42.0, 40.0, 27.0, 25.0, 17.0, 13.0, 13.0, 12.0, 7.0, 8.0, 1.0, 5.0, 1.0, 2.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-35.3125, -34.2783203125, -33.244140625, -32.2099609375, -31.17578125, -30.1416015625, -29.107421875, -28.0732421875, -27.0390625, -26.0048828125, -24.970703125, -23.9365234375, -22.90234375, -21.8681640625, -20.833984375, -19.7998046875, -18.765625, -17.7314453125, -16.697265625, -15.6630859375, -14.62890625, -13.5947265625, -12.560546875, -11.5263671875, -10.4921875, -9.4580078125, -8.423828125, -7.3896484375, -6.35546875, -5.3212890625, -4.287109375, -3.2529296875, -2.21875, -1.1845703125, -0.150390625, 0.8837890625, 1.91796875, 2.9521484375, 3.986328125, 5.0205078125, 6.0546875, 7.0888671875, 8.123046875, 9.1572265625, 10.19140625, 11.2255859375, 12.259765625, 13.2939453125, 14.328125, 15.3623046875, 16.396484375, 17.4306640625, 18.46484375, 19.4990234375, 20.533203125, 21.5673828125, 22.6015625, 23.6357421875, 24.669921875, 25.7041015625, 26.73828125, 27.7724609375, 28.806640625, 29.8408203125, 30.875]}, "gradients/decoder.transformer.h.14.ln_1.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 5.0, 10.0, 41.0, 69.0, 105.0, 152.0, 163.0, 191.0, 134.0, 79.0, 36.0, 19.0, 8.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-47.75235366821289, -46.520484924316406, -45.28861618041992, -44.05674743652344, -42.82487869262695, -41.59300994873047, -40.361141204833984, -39.1292724609375, -37.89739990234375, -36.665531158447266, -35.43366241455078, -34.2017936706543, -32.96992492675781, -31.738056182861328, -30.50618553161621, -29.274316787719727, -28.042449951171875, -26.81058120727539, -25.578712463378906, -24.346843719482422, -23.114974975585938, -21.883106231689453, -20.651235580444336, -19.41936683654785, -18.187498092651367, -16.955629348754883, -15.723760604858398, -14.491890907287598, -13.260022163391113, -12.028153419494629, -10.796283721923828, -9.564414978027344, -8.332550048828125, -7.100681304931641, -5.868812084197998, -4.6369428634643555, -3.405074119567871, -2.1732053756713867, -0.9413361549377441, 0.29053306579589844, 1.5224018096923828, 2.7542707920074463, 3.9861397743225098, 5.218008995056152, 6.449877738952637, 7.681746482849121, 8.913616180419922, 10.145484924316406, 11.37735366821289, 12.609222412109375, 13.84109115600586, 15.07296085357666, 16.304828643798828, 17.536697387695312, 18.76856803894043, 20.000436782836914, 21.2323055267334, 22.464174270629883, 23.696043014526367, 24.92791175842285, 26.15978240966797, 27.391651153564453, 28.623519897460938, 29.855388641357422, 31.087257385253906]}, "gradients/decoder.transformer.h.14.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 3.0, 1.0, 5.0, 2.0, 2.0, 9.0, 3.0, 7.0, 8.0, 5.0, 18.0, 24.0, 31.0, 11.0, 20.0, 26.0, 37.0, 31.0, 29.0, 43.0, 27.0, 38.0, 52.0, 41.0, 43.0, 52.0, 43.0, 55.0, 44.0, 36.0, 39.0, 31.0, 24.0, 32.0, 25.0, 19.0, 17.0, 12.0, 14.0, 15.0, 9.0, 8.0, 9.0, 8.0, 1.0, 2.0, 3.0, 3.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0], "bins": [-46.419464111328125, -44.99747085571289, -43.575477600097656, -42.15348815917969, -40.73149490356445, -39.30950164794922, -37.88751220703125, -36.465518951416016, -35.04352569580078, -33.62153244018555, -32.19953918457031, -30.777549743652344, -29.35555648803711, -27.933563232421875, -26.511571884155273, -25.089580535888672, -23.667587280273438, -22.245594024658203, -20.8236026763916, -19.401611328125, -17.979618072509766, -16.55762481689453, -15.13563346862793, -13.713641166687012, -12.291648864746094, -10.869656562805176, -9.447664260864258, -8.02567195892334, -6.603679656982422, -5.181687355041504, -3.759695053100586, -2.337702751159668, -0.9157066345214844, 0.5062856674194336, 1.9282779693603516, 3.3502702713012695, 4.7722625732421875, 6.1942548751831055, 7.616247177124023, 9.038239479064941, 10.46023178100586, 11.882224082946777, 13.304216384887695, 14.726208686828613, 16.14820098876953, 17.570194244384766, 18.992185592651367, 20.41417694091797, 21.836170196533203, 23.258163452148438, 24.68015480041504, 26.10214614868164, 27.524139404296875, 28.94613265991211, 30.36812400817871, 31.790115356445312, 33.21210861206055, 34.63410186767578, 36.05609130859375, 37.478084564208984, 38.90007781982422, 40.32207107543945, 41.74406433105469, 43.166053771972656, 44.58804702758789]}, "gradients/decoder.transformer.h.13.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 5.0, 3.0, 0.0, 2.0, 7.0, 7.0, 5.0, 10.0, 12.0, 14.0, 10.0, 18.0, 19.0, 19.0, 18.0, 22.0, 46.0, 25.0, 30.0, 44.0, 37.0, 41.0, 32.0, 41.0, 48.0, 43.0, 39.0, 34.0, 48.0, 33.0, 26.0, 30.0, 35.0, 29.0, 25.0, 27.0, 27.0, 17.0, 20.0, 11.0, 13.0, 7.0, 6.0, 4.0, 6.0, 9.0, 2.0, 3.0, 1.0, 3.0, 2.0, 1.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-4.19140625, -4.04217529296875, -3.8929443359375, -3.74371337890625, -3.594482421875, -3.44525146484375, -3.2960205078125, -3.14678955078125, -2.99755859375, -2.84832763671875, -2.6990966796875, -2.54986572265625, -2.400634765625, -2.25140380859375, -2.1021728515625, -1.95294189453125, -1.8037109375, -1.65447998046875, -1.5052490234375, -1.35601806640625, -1.206787109375, -1.05755615234375, -0.9083251953125, -0.75909423828125, -0.60986328125, -0.46063232421875, -0.3114013671875, -0.16217041015625, -0.012939453125, 0.13629150390625, 0.2855224609375, 0.43475341796875, 0.583984375, 0.73321533203125, 0.8824462890625, 1.03167724609375, 1.180908203125, 1.33013916015625, 1.4793701171875, 1.62860107421875, 1.77783203125, 1.92706298828125, 2.0762939453125, 2.22552490234375, 2.374755859375, 2.52398681640625, 2.6732177734375, 2.82244873046875, 2.9716796875, 3.12091064453125, 3.2701416015625, 3.41937255859375, 3.568603515625, 3.71783447265625, 3.8670654296875, 4.01629638671875, 4.16552734375, 4.31475830078125, 4.4639892578125, 4.61322021484375, 4.762451171875, 4.91168212890625, 5.0609130859375, 5.21014404296875, 5.359375]}, "gradients/decoder.transformer.h.13.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 3.0, 1.0, 2.0, 3.0, 4.0, 7.0, 3.0, 10.0, 4.0, 10.0, 15.0, 18.0, 23.0, 27.0, 25.0, 45.0, 41.0, 69.0, 121.0, 231.0, 455.0, 1277.0, 4565.0, 28336.0, 313541.0, 2259138.0, 1432515.0, 135264.0, 14280.0, 2702.0, 786.0, 315.0, 149.0, 78.0, 45.0, 36.0, 30.0, 17.0, 21.0, 12.0, 17.0, 9.0, 8.0, 11.0, 6.0, 5.0, 3.0, 2.0, 6.0, 2.0, 2.0, 2.0, 1.0, 1.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-10.734375, -10.3343505859375, -9.934326171875, -9.5343017578125, -9.13427734375, -8.7342529296875, -8.334228515625, -7.9342041015625, -7.5341796875, -7.1341552734375, -6.734130859375, -6.3341064453125, -5.93408203125, -5.5340576171875, -5.134033203125, -4.7340087890625, -4.333984375, -3.9339599609375, -3.533935546875, -3.1339111328125, -2.73388671875, -2.3338623046875, -1.933837890625, -1.5338134765625, -1.1337890625, -0.7337646484375, -0.333740234375, 0.0662841796875, 0.46630859375, 0.8663330078125, 1.266357421875, 1.6663818359375, 2.06640625, 2.4664306640625, 2.866455078125, 3.2664794921875, 3.66650390625, 4.0665283203125, 4.466552734375, 4.8665771484375, 5.2666015625, 5.6666259765625, 6.066650390625, 6.4666748046875, 6.86669921875, 7.2667236328125, 7.666748046875, 8.0667724609375, 8.466796875, 8.8668212890625, 9.266845703125, 9.6668701171875, 10.06689453125, 10.4669189453125, 10.866943359375, 11.2669677734375, 11.6669921875, 12.0670166015625, 12.467041015625, 12.8670654296875, 13.26708984375, 13.6671142578125, 14.067138671875, 14.4671630859375, 14.8671875]}, "gradients/decoder.transformer.h.13.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 4.0, 6.0, 7.0, 6.0, 14.0, 22.0, 23.0, 38.0, 52.0, 88.0, 141.0, 185.0, 290.0, 370.0, 481.0, 516.0, 509.0, 391.0, 291.0, 197.0, 155.0, 101.0, 66.0, 47.0, 29.0, 17.0, 14.0, 5.0, 10.0, 6.0, 3.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-9.8828125, -9.479736328125, -9.07666015625, -8.673583984375, -8.2705078125, -7.867431640625, -7.46435546875, -7.061279296875, -6.658203125, -6.255126953125, -5.85205078125, -5.448974609375, -5.0458984375, -4.642822265625, -4.23974609375, -3.836669921875, -3.43359375, -3.030517578125, -2.62744140625, -2.224365234375, -1.8212890625, -1.418212890625, -1.01513671875, -0.612060546875, -0.208984375, 0.194091796875, 0.59716796875, 1.000244140625, 1.4033203125, 1.806396484375, 2.20947265625, 2.612548828125, 3.015625, 3.418701171875, 3.82177734375, 4.224853515625, 4.6279296875, 5.031005859375, 5.43408203125, 5.837158203125, 6.240234375, 6.643310546875, 7.04638671875, 7.449462890625, 7.8525390625, 8.255615234375, 8.65869140625, 9.061767578125, 9.46484375, 9.867919921875, 10.27099609375, 10.674072265625, 11.0771484375, 11.480224609375, 11.88330078125, 12.286376953125, 12.689453125, 13.092529296875, 13.49560546875, 13.898681640625, 14.3017578125, 14.704833984375, 15.10791015625, 15.510986328125, 15.9140625]}, "gradients/decoder.transformer.h.13.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 4.0, 5.0, 10.0, 11.0, 7.0, 17.0, 32.0, 48.0, 80.0, 109.0, 190.0, 291.0, 628.0, 2822.0, 169566.0, 3918728.0, 98183.0, 2293.0, 552.0, 257.0, 175.0, 105.0, 60.0, 34.0, 26.0, 21.0, 16.0, 7.0, 5.0, 7.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-52.125, -50.8037109375, -49.482421875, -48.1611328125, -46.83984375, -45.5185546875, -44.197265625, -42.8759765625, -41.5546875, -40.2333984375, -38.912109375, -37.5908203125, -36.26953125, -34.9482421875, -33.626953125, -32.3056640625, -30.984375, -29.6630859375, -28.341796875, -27.0205078125, -25.69921875, -24.3779296875, -23.056640625, -21.7353515625, -20.4140625, -19.0927734375, -17.771484375, -16.4501953125, -15.12890625, -13.8076171875, -12.486328125, -11.1650390625, -9.84375, -8.5224609375, -7.201171875, -5.8798828125, -4.55859375, -3.2373046875, -1.916015625, -0.5947265625, 0.7265625, 2.0478515625, 3.369140625, 4.6904296875, 6.01171875, 7.3330078125, 8.654296875, 9.9755859375, 11.296875, 12.6181640625, 13.939453125, 15.2607421875, 16.58203125, 17.9033203125, 19.224609375, 20.5458984375, 21.8671875, 23.1884765625, 24.509765625, 25.8310546875, 27.15234375, 28.4736328125, 29.794921875, 31.1162109375, 32.4375]}, "gradients/decoder.transformer.h.13.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 2.0, 11.0, 92.0, 391.0, 404.0, 98.0, 16.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-233.0514678955078, -226.18728637695312, -219.32308959960938, -212.4589080810547, -205.59471130371094, -198.73052978515625, -191.8663330078125, -185.0021514892578, -178.13796997070312, -171.27378845214844, -164.4095916748047, -157.54541015625, -150.68121337890625, -143.81703186035156, -136.95285034179688, -130.08865356445312, -123.22445678710938, -116.36026763916016, -109.49607849121094, -102.63189697265625, -95.7677001953125, -88.90351867675781, -82.0393295288086, -75.17514038085938, -68.31095123291016, -61.44676208496094, -54.58257293701172, -47.718387603759766, -40.85419845581055, -33.99000930786133, -27.125823974609375, -20.261634826660156, -13.3974609375, -6.533272743225098, 0.3309154510498047, 7.195102691650391, 14.05929183959961, 20.923480987548828, 27.78766632080078, 34.65185546875, 41.51604461669922, 48.38023376464844, 55.244422912597656, 62.10860824584961, 68.97279357910156, 75.83699035644531, 82.701171875, 89.56536102294922, 96.42955017089844, 103.29373931884766, 110.15792846679688, 117.02210998535156, 123.88630676269531, 130.75048828125, 137.61468505859375, 144.47886657714844, 151.34304809570312, 158.2072296142578, 165.07142639160156, 171.93560791015625, 178.7998046875, 185.6639862060547, 192.52816772460938, 199.39236450195312, 206.25656127929688]}, "gradients/decoder.transformer.h.13.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 3.0, 2.0, 1.0, 1.0, 3.0, 2.0, 8.0, 5.0, 10.0, 12.0, 14.0, 18.0, 17.0, 31.0, 29.0, 23.0, 32.0, 40.0, 50.0, 40.0, 45.0, 50.0, 40.0, 44.0, 52.0, 53.0, 40.0, 42.0, 46.0, 37.0, 38.0, 41.0, 33.0, 18.0, 19.0, 18.0, 18.0, 16.0, 7.0, 7.0, 6.0, 2.0, 2.0, 1.0, 4.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-39.510833740234375, -38.17347717285156, -36.836124420166016, -35.4987678527832, -34.161415100097656, -32.824058532714844, -31.486703872680664, -30.149349212646484, -28.811994552612305, -27.474639892578125, -26.137285232543945, -24.799930572509766, -23.462574005126953, -22.125221252441406, -20.787864685058594, -19.450510025024414, -18.113155364990234, -16.775800704956055, -15.438446044921875, -14.101090431213379, -12.7637357711792, -11.42638111114502, -10.089025497436523, -8.751670837402344, -7.414316177368164, -6.076961517333984, -4.7396063804626465, -3.4022512435913086, -2.064896583557129, -0.7275419235229492, 0.6098136901855469, 1.9471683502197266, 3.2845230102539062, 4.621877670288086, 5.959232807159424, 7.296587944030762, 8.633942604064941, 9.971297264099121, 11.308652877807617, 12.646007537841797, 13.983362197875977, 15.320716857910156, 16.658071517944336, 17.995426177978516, 19.332782745361328, 20.670135498046875, 22.007492065429688, 23.344846725463867, 24.682201385498047, 26.019556045532227, 27.356910705566406, 28.69426727294922, 30.031620025634766, 31.368976593017578, 32.706329345703125, 34.04368591308594, 35.38104248046875, 36.71839904785156, 38.05575180053711, 39.39310836791992, 40.73046112060547, 42.06781768798828, 43.405174255371094, 44.74252700805664, 46.07987976074219]}, "gradients/decoder.transformer.h.13.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 2.0, 7.0, 4.0, 2.0, 6.0, 7.0, 11.0, 12.0, 6.0, 8.0, 16.0, 18.0, 35.0, 27.0, 27.0, 26.0, 35.0, 36.0, 36.0, 34.0, 41.0, 45.0, 45.0, 36.0, 33.0, 45.0, 37.0, 33.0, 29.0, 37.0, 40.0, 23.0, 37.0, 30.0, 26.0, 24.0, 11.0, 17.0, 12.0, 10.0, 7.0, 10.0, 6.0, 7.0, 2.0, 4.0, 5.0, 5.0, 1.0, 3.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.00390625, -3.85540771484375, -3.7069091796875, -3.55841064453125, -3.409912109375, -3.26141357421875, -3.1129150390625, -2.96441650390625, -2.81591796875, -2.66741943359375, -2.5189208984375, -2.37042236328125, -2.221923828125, -2.07342529296875, -1.9249267578125, -1.77642822265625, -1.6279296875, -1.47943115234375, -1.3309326171875, -1.18243408203125, -1.033935546875, -0.88543701171875, -0.7369384765625, -0.58843994140625, -0.43994140625, -0.29144287109375, -0.1429443359375, 0.00555419921875, 0.154052734375, 0.30255126953125, 0.4510498046875, 0.59954833984375, 0.748046875, 0.89654541015625, 1.0450439453125, 1.19354248046875, 1.342041015625, 1.49053955078125, 1.6390380859375, 1.78753662109375, 1.93603515625, 2.08453369140625, 2.2330322265625, 2.38153076171875, 2.530029296875, 2.67852783203125, 2.8270263671875, 2.97552490234375, 3.1240234375, 3.27252197265625, 3.4210205078125, 3.56951904296875, 3.718017578125, 3.86651611328125, 4.0150146484375, 4.16351318359375, 4.31201171875, 4.46051025390625, 4.6090087890625, 4.75750732421875, 4.906005859375, 5.05450439453125, 5.2030029296875, 5.35150146484375, 5.5]}, "gradients/decoder.transformer.h.13.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 2.0, 5.0, 2.0, 10.0, 14.0, 20.0, 27.0, 39.0, 66.0, 103.0, 146.0, 216.0, 281.0, 468.0, 554.0, 919.0, 1335.0, 2079.0, 2991.0, 4523.0, 6943.0, 10933.0, 16872.0, 27550.0, 44067.0, 70541.0, 107742.0, 149284.0, 168670.0, 145547.0, 104390.0, 67629.0, 41991.0, 25838.0, 16343.0, 10375.0, 6650.0, 4443.0, 2901.0, 1955.0, 1294.0, 923.0, 560.0, 447.0, 289.0, 183.0, 101.0, 100.0, 77.0, 48.0, 35.0, 20.0, 12.0, 6.0, 5.0, 5.0, 0.0, 4.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.6689453125, -0.6462326049804688, -0.6235198974609375, -0.6008071899414062, -0.578094482421875, -0.5553817749023438, -0.5326690673828125, -0.5099563598632812, -0.48724365234375, -0.46453094482421875, -0.4418182373046875, -0.41910552978515625, -0.396392822265625, -0.37368011474609375, -0.3509674072265625, -0.32825469970703125, -0.3055419921875, -0.28282928466796875, -0.2601165771484375, -0.23740386962890625, -0.214691162109375, -0.19197845458984375, -0.1692657470703125, -0.14655303955078125, -0.12384033203125, -0.10112762451171875, -0.0784149169921875, -0.05570220947265625, -0.032989501953125, -0.01027679443359375, 0.0124359130859375, 0.03514862060546875, 0.057861328125, 0.08057403564453125, 0.1032867431640625, 0.12599945068359375, 0.148712158203125, 0.17142486572265625, 0.1941375732421875, 0.21685028076171875, 0.23956298828125, 0.26227569580078125, 0.2849884033203125, 0.30770111083984375, 0.330413818359375, 0.35312652587890625, 0.3758392333984375, 0.39855194091796875, 0.4212646484375, 0.44397735595703125, 0.4666900634765625, 0.48940277099609375, 0.512115478515625, 0.5348281860351562, 0.5575408935546875, 0.5802536010742188, 0.60296630859375, 0.6256790161132812, 0.6483917236328125, 0.6711044311523438, 0.693817138671875, 0.7165298461914062, 0.7392425537109375, 0.7619552612304688, 0.78466796875]}, "gradients/decoder.transformer.h.13.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 3.0, 6.0, 0.0, 1.0, 3.0, 5.0, 6.0, 7.0, 5.0, 3.0, 10.0, 11.0, 15.0, 8.0, 19.0, 16.0, 19.0, 21.0, 25.0, 34.0, 37.0, 44.0, 37.0, 43.0, 47.0, 39.0, 48.0, 1056.0, 43.0, 32.0, 46.0, 35.0, 39.0, 36.0, 30.0, 32.0, 24.0, 21.0, 24.0, 21.0, 13.0, 10.0, 14.0, 11.0, 11.0, 7.0, 6.0, 6.0, 4.0, 3.0, 3.0, 2.0, 1.0, 0.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.255859375, -3.147674560546875, -3.03948974609375, -2.931304931640625, -2.8231201171875, -2.714935302734375, -2.60675048828125, -2.498565673828125, -2.390380859375, -2.282196044921875, -2.17401123046875, -2.065826416015625, -1.9576416015625, -1.849456787109375, -1.74127197265625, -1.633087158203125, -1.52490234375, -1.416717529296875, -1.30853271484375, -1.200347900390625, -1.0921630859375, -0.983978271484375, -0.87579345703125, -0.767608642578125, -0.659423828125, -0.551239013671875, -0.44305419921875, -0.334869384765625, -0.2266845703125, -0.118499755859375, -0.01031494140625, 0.097869873046875, 0.2060546875, 0.314239501953125, 0.42242431640625, 0.530609130859375, 0.6387939453125, 0.746978759765625, 0.85516357421875, 0.963348388671875, 1.071533203125, 1.179718017578125, 1.28790283203125, 1.396087646484375, 1.5042724609375, 1.612457275390625, 1.72064208984375, 1.828826904296875, 1.93701171875, 2.045196533203125, 2.15338134765625, 2.261566162109375, 2.3697509765625, 2.477935791015625, 2.58612060546875, 2.694305419921875, 2.802490234375, 2.910675048828125, 3.01885986328125, 3.127044677734375, 3.2352294921875, 3.343414306640625, 3.45159912109375, 3.559783935546875, 3.66796875]}, "gradients/decoder.transformer.h.13.crossattention.c_attn.weight": {"_type": "histogram", "values": [3.0, 2.0, 2.0, 6.0, 3.0, 6.0, 13.0, 19.0, 27.0, 35.0, 31.0, 65.0, 104.0, 159.0, 219.0, 320.0, 555.0, 884.0, 1345.0, 2242.0, 3554.0, 5829.0, 9410.0, 15614.0, 26879.0, 45604.0, 76750.0, 121382.0, 206638.0, 1188650.0, 148250.0, 96384.0, 59097.0, 35065.0, 20316.0, 12214.0, 7236.0, 4521.0, 2916.0, 1675.0, 1152.0, 690.0, 460.0, 284.0, 191.0, 127.0, 72.0, 56.0, 34.0, 18.0, 13.0, 8.0, 6.0, 4.0, 4.0, 3.0, 2.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-0.424072265625, -0.4095726013183594, -0.39507293701171875, -0.3805732727050781, -0.3660736083984375, -0.3515739440917969, -0.33707427978515625, -0.3225746154785156, -0.308074951171875, -0.2935752868652344, -0.27907562255859375, -0.2645759582519531, -0.2500762939453125, -0.23557662963867188, -0.22107696533203125, -0.20657730102539062, -0.19207763671875, -0.17757797241210938, -0.16307830810546875, -0.14857864379882812, -0.1340789794921875, -0.11957931518554688, -0.10507965087890625, -0.09057998657226562, -0.076080322265625, -0.061580657958984375, -0.04708099365234375, -0.032581329345703125, -0.0180816650390625, -0.003582000732421875, 0.01091766357421875, 0.025417327880859375, 0.0399169921875, 0.054416656494140625, 0.06891632080078125, 0.08341598510742188, 0.0979156494140625, 0.11241531372070312, 0.12691497802734375, 0.14141464233398438, 0.155914306640625, 0.17041397094726562, 0.18491363525390625, 0.19941329956054688, 0.2139129638671875, 0.22841262817382812, 0.24291229248046875, 0.2574119567871094, 0.27191162109375, 0.2864112854003906, 0.30091094970703125, 0.3154106140136719, 0.3299102783203125, 0.3444099426269531, 0.35890960693359375, 0.3734092712402344, 0.387908935546875, 0.4024085998535156, 0.41690826416015625, 0.4314079284667969, 0.4459075927734375, 0.4604072570800781, 0.47490692138671875, 0.4894065856933594, 0.50390625]}, "gradients/decoder.transformer.h.13.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 2.0, 2.0, 1.0, 3.0, 5.0, 6.0, 8.0, 14.0, 10.0, 18.0, 22.0, 12.0, 20.0, 32.0, 30.0, 53.0, 40.0, 54.0, 76.0, 75.0, 81.0, 75.0, 69.0, 51.0, 35.0, 52.0, 34.0, 20.0, 18.0, 22.0, 14.0, 12.0, 10.0, 12.0, 10.0, 6.0, 5.0, 1.0, 3.0, 0.0, 2.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.038665771484375, -0.03754568099975586, -0.03642559051513672, -0.03530550003051758, -0.03418540954589844, -0.0330653190612793, -0.031945228576660156, -0.030825138092041016, -0.029705047607421875, -0.028584957122802734, -0.027464866638183594, -0.026344776153564453, -0.025224685668945312, -0.024104595184326172, -0.02298450469970703, -0.02186441421508789, -0.02074432373046875, -0.01962423324584961, -0.01850414276123047, -0.017384052276611328, -0.016263961791992188, -0.015143871307373047, -0.014023780822753906, -0.012903690338134766, -0.011783599853515625, -0.010663509368896484, -0.009543418884277344, -0.008423328399658203, -0.0073032379150390625, -0.006183147430419922, -0.005063056945800781, -0.003942966461181641, -0.0028228759765625, -0.0017027854919433594, -0.0005826950073242188, 0.0005373954772949219, 0.0016574859619140625, 0.002777576446533203, 0.0038976669311523438, 0.005017757415771484, 0.006137847900390625, 0.007257938385009766, 0.008378028869628906, 0.009498119354248047, 0.010618209838867188, 0.011738300323486328, 0.012858390808105469, 0.01397848129272461, 0.01509857177734375, 0.01621866226196289, 0.01733875274658203, 0.018458843231201172, 0.019578933715820312, 0.020699024200439453, 0.021819114685058594, 0.022939205169677734, 0.024059295654296875, 0.025179386138916016, 0.026299476623535156, 0.027419567108154297, 0.028539657592773438, 0.029659748077392578, 0.03077983856201172, 0.03189992904663086, 0.03302001953125]}, "gradients/decoder.transformer.h.13.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 1.0, 1.0, 2.0, 3.0, 3.0, 8.0, 10.0, 12.0, 12.0, 15.0, 17.0, 24.0, 26.0, 47.0, 62.0, 100.0, 133.0, 274.0, 602.0, 3095.0, 1037943.0, 4722.0, 645.0, 282.0, 143.0, 108.0, 67.0, 56.0, 33.0, 17.0, 25.0, 22.0, 12.0, 15.0, 7.0, 5.0, 8.0, 1.0, 4.0, 1.0, 3.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.642578125, -0.6208267211914062, -0.5990753173828125, -0.5773239135742188, -0.555572509765625, -0.5338211059570312, -0.5120697021484375, -0.49031829833984375, -0.46856689453125, -0.44681549072265625, -0.4250640869140625, -0.40331268310546875, -0.381561279296875, -0.35980987548828125, -0.3380584716796875, -0.31630706787109375, -0.2945556640625, -0.27280426025390625, -0.2510528564453125, -0.22930145263671875, -0.207550048828125, -0.18579864501953125, -0.1640472412109375, -0.14229583740234375, -0.12054443359375, -0.09879302978515625, -0.0770416259765625, -0.05529022216796875, -0.033538818359375, -0.01178741455078125, 0.0099639892578125, 0.03171539306640625, 0.053466796875, 0.07521820068359375, 0.0969696044921875, 0.11872100830078125, 0.140472412109375, 0.16222381591796875, 0.1839752197265625, 0.20572662353515625, 0.22747802734375, 0.24922943115234375, 0.2709808349609375, 0.29273223876953125, 0.314483642578125, 0.33623504638671875, 0.3579864501953125, 0.37973785400390625, 0.4014892578125, 0.42324066162109375, 0.4449920654296875, 0.46674346923828125, 0.488494873046875, 0.5102462768554688, 0.5319976806640625, 0.5537490844726562, 0.57550048828125, 0.5972518920898438, 0.6190032958984375, 0.6407546997070312, 0.662506103515625, 0.6842575073242188, 0.7060089111328125, 0.7277603149414062, 0.74951171875]}, "gradients/decoder.transformer.h.13.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 5.0, 231.0, 783.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.13723061978816986, -0.11794070154428482, -0.09865078330039978, -0.07936087250709534, -0.0600709542632103, -0.040781036019325256, -0.021491125226020813, -0.0022011995315551758, 0.017088711261749268, 0.03637862950563431, 0.05566854402422905, 0.07495845854282379, 0.09424837678670883, 0.11353829503059387, 0.13282820582389832, 0.15211813151836395, 0.1714080423116684, 0.19069795310497284, 0.20998787879943848, 0.22927778959274292, 0.24856770038604736, 0.2678576111793518, 0.28714752197265625, 0.3064374625682831, 0.3257273733615875, 0.34501728415489197, 0.3643071949481964, 0.38359713554382324, 0.4028870463371277, 0.42217695713043213, 0.4414668679237366, 0.460756778717041, 0.48004668951034546, 0.4993366003036499, 0.5186265110969543, 0.5379164218902588, 0.5572063326835632, 0.5764962434768677, 0.5957862138748169, 0.6150761246681213, 0.6343660354614258, 0.6536559462547302, 0.6729458570480347, 0.6922357678413391, 0.7115256786346436, 0.7308156490325928, 0.7501055002212524, 0.7693954706192017, 0.7886853218078613, 0.8079752326011658, 0.8272651433944702, 0.8465550541877747, 0.8658449649810791, 0.8851349353790283, 0.904424786567688, 0.9237147569656372, 0.9430046677589417, 0.9622945785522461, 0.9815844893455505, 1.000874400138855, 1.0201643705368042, 1.0394542217254639, 1.058744192123413, 1.0780340433120728, 1.097324013710022]}, "gradients/decoder.transformer.h.13.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 3.0, 1.0, 0.0, 4.0, 2.0, 9.0, 10.0, 8.0, 6.0, 11.0, 7.0, 13.0, 12.0, 27.0, 14.0, 32.0, 24.0, 32.0, 27.0, 32.0, 32.0, 31.0, 39.0, 43.0, 48.0, 41.0, 41.0, 36.0, 36.0, 52.0, 42.0, 20.0, 28.0, 34.0, 28.0, 26.0, 30.0, 18.0, 21.0, 22.0, 16.0, 12.0, 4.0, 12.0, 6.0, 5.0, 7.0, 5.0, 3.0, 4.0, 1.0, 1.0, 3.0, 1.0], "bins": [-0.05590951442718506, -0.054350536316633224, -0.05279155820608139, -0.051232583820819855, -0.04967360571026802, -0.048114627599716187, -0.04655565321445465, -0.04499667510390282, -0.04343769699335098, -0.04187871888279915, -0.040319740772247314, -0.03876076638698578, -0.037201788276433945, -0.03564281016588211, -0.034083835780620575, -0.03252485767006874, -0.030965879559516907, -0.029406901448965073, -0.027847925201058388, -0.026288948953151703, -0.02472997084259987, -0.023170992732048035, -0.02161201648414135, -0.020053040236234665, -0.01849406212568283, -0.016935084015130997, -0.015376107767224312, -0.013817130587995052, -0.012258153408765793, -0.010699176229536533, -0.009140199050307274, -0.007581221871078014, -0.006022244691848755, -0.004463267512619495, -0.002904290333390236, -0.0013453131541609764, 0.00021366402506828308, 0.0017726412042975426, 0.003331618383526802, 0.0048905955627560616, 0.006449572741985321, 0.00800854992121458, 0.00956752710044384, 0.0111265042796731, 0.012685481458902359, 0.014244458638131618, 0.015803435817360878, 0.017362412065267563, 0.018921390175819397, 0.02048036828637123, 0.022039344534277916, 0.0235983207821846, 0.025157298892736435, 0.02671627700328827, 0.028275253251194954, 0.02983422949910164, 0.03139320760965347, 0.03295218572020531, 0.03451116383075714, 0.03607013821601868, 0.03762911632657051, 0.039188094437122345, 0.04074706882238388, 0.042306046932935715, 0.04386502504348755]}, "gradients/decoder.transformer.h.13.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 2.0, 7.0, 4.0, 2.0, 6.0, 7.0, 11.0, 12.0, 7.0, 7.0, 16.0, 18.0, 35.0, 27.0, 27.0, 26.0, 35.0, 36.0, 36.0, 33.0, 42.0, 46.0, 43.0, 37.0, 34.0, 44.0, 37.0, 33.0, 29.0, 37.0, 40.0, 23.0, 37.0, 30.0, 26.0, 24.0, 11.0, 16.0, 13.0, 10.0, 7.0, 10.0, 6.0, 7.0, 2.0, 4.0, 5.0, 5.0, 1.0, 3.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.00390625, -3.85540771484375, -3.7069091796875, -3.55841064453125, -3.409912109375, -3.26141357421875, -3.1129150390625, -2.96441650390625, -2.81591796875, -2.66741943359375, -2.5189208984375, -2.37042236328125, -2.221923828125, -2.07342529296875, -1.9249267578125, -1.77642822265625, -1.6279296875, -1.47943115234375, -1.3309326171875, -1.18243408203125, -1.033935546875, -0.88543701171875, -0.7369384765625, -0.58843994140625, -0.43994140625, -0.29144287109375, -0.1429443359375, 0.00555419921875, 0.154052734375, 0.30255126953125, 0.4510498046875, 0.59954833984375, 0.748046875, 0.89654541015625, 1.0450439453125, 1.19354248046875, 1.342041015625, 1.49053955078125, 1.6390380859375, 1.78753662109375, 1.93603515625, 2.08453369140625, 2.2330322265625, 2.38153076171875, 2.530029296875, 2.67852783203125, 2.8270263671875, 2.97552490234375, 3.1240234375, 3.27252197265625, 3.4210205078125, 3.56951904296875, 3.718017578125, 3.86651611328125, 4.0150146484375, 4.16351318359375, 4.31201171875, 4.46051025390625, 4.6090087890625, 4.75750732421875, 4.906005859375, 5.05450439453125, 5.2030029296875, 5.35150146484375, 5.5]}, "gradients/decoder.transformer.h.13.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 2.0, 6.0, 2.0, 4.0, 18.0, 35.0, 52.0, 116.0, 195.0, 362.0, 701.0, 1533.0, 3210.0, 7008.0, 17992.0, 54705.0, 184808.0, 437453.0, 233318.0, 69326.0, 21991.0, 8517.0, 3747.0, 1727.0, 838.0, 419.0, 229.0, 88.0, 79.0, 27.0, 20.0, 14.0, 6.0, 6.0, 3.0, 4.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-7.86328125, -7.64239501953125, -7.4215087890625, -7.20062255859375, -6.979736328125, -6.75885009765625, -6.5379638671875, -6.31707763671875, -6.09619140625, -5.87530517578125, -5.6544189453125, -5.43353271484375, -5.212646484375, -4.99176025390625, -4.7708740234375, -4.54998779296875, -4.3291015625, -4.10821533203125, -3.8873291015625, -3.66644287109375, -3.445556640625, -3.22467041015625, -3.0037841796875, -2.78289794921875, -2.56201171875, -2.34112548828125, -2.1202392578125, -1.89935302734375, -1.678466796875, -1.45758056640625, -1.2366943359375, -1.01580810546875, -0.794921875, -0.57403564453125, -0.3531494140625, -0.13226318359375, 0.088623046875, 0.30950927734375, 0.5303955078125, 0.75128173828125, 0.97216796875, 1.19305419921875, 1.4139404296875, 1.63482666015625, 1.855712890625, 2.07659912109375, 2.2974853515625, 2.51837158203125, 2.7392578125, 2.96014404296875, 3.1810302734375, 3.40191650390625, 3.622802734375, 3.84368896484375, 4.0645751953125, 4.28546142578125, 4.50634765625, 4.72723388671875, 4.9481201171875, 5.16900634765625, 5.389892578125, 5.61077880859375, 5.8316650390625, 6.05255126953125, 6.2734375]}, "gradients/decoder.transformer.h.13.attn.c_attn.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 2.0, 3.0, 1.0, 6.0, 1.0, 4.0, 5.0, 6.0, 5.0, 4.0, 9.0, 12.0, 11.0, 17.0, 23.0, 22.0, 24.0, 25.0, 35.0, 44.0, 37.0, 42.0, 52.0, 76.0, 169.0, 358.0, 1407.0, 188.0, 79.0, 64.0, 55.0, 36.0, 37.0, 25.0, 27.0, 22.0, 33.0, 17.0, 15.0, 12.0, 9.0, 10.0, 12.0, 4.0, 7.0, 6.0, 2.0, 1.0, 2.0, 1.0, 2.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-14.96875, -14.453857421875, -13.93896484375, -13.424072265625, -12.9091796875, -12.394287109375, -11.87939453125, -11.364501953125, -10.849609375, -10.334716796875, -9.81982421875, -9.304931640625, -8.7900390625, -8.275146484375, -7.76025390625, -7.245361328125, -6.73046875, -6.215576171875, -5.70068359375, -5.185791015625, -4.6708984375, -4.156005859375, -3.64111328125, -3.126220703125, -2.611328125, -2.096435546875, -1.58154296875, -1.066650390625, -0.5517578125, -0.036865234375, 0.47802734375, 0.992919921875, 1.5078125, 2.022705078125, 2.53759765625, 3.052490234375, 3.5673828125, 4.082275390625, 4.59716796875, 5.112060546875, 5.626953125, 6.141845703125, 6.65673828125, 7.171630859375, 7.6865234375, 8.201416015625, 8.71630859375, 9.231201171875, 9.74609375, 10.260986328125, 10.77587890625, 11.290771484375, 11.8056640625, 12.320556640625, 12.83544921875, 13.350341796875, 13.865234375, 14.380126953125, 14.89501953125, 15.409912109375, 15.9248046875, 16.439697265625, 16.95458984375, 17.469482421875, 17.984375]}, "gradients/decoder.transformer.h.13.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 12.0, 10.0, 9.0, 26.0, 31.0, 45.0, 76.0, 115.0, 149.0, 265.0, 432.0, 1228.0, 69266.0, 3066597.0, 5765.0, 698.0, 317.0, 243.0, 132.0, 101.0, 47.0, 34.0, 26.0, 21.0, 20.0, 5.0, 7.0, 6.0, 5.0, 7.0, 3.0, 0.0, 2.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-76.3125, -74.2646484375, -72.216796875, -70.1689453125, -68.12109375, -66.0732421875, -64.025390625, -61.9775390625, -59.9296875, -57.8818359375, -55.833984375, -53.7861328125, -51.73828125, -49.6904296875, -47.642578125, -45.5947265625, -43.546875, -41.4990234375, -39.451171875, -37.4033203125, -35.35546875, -33.3076171875, -31.259765625, -29.2119140625, -27.1640625, -25.1162109375, -23.068359375, -21.0205078125, -18.97265625, -16.9248046875, -14.876953125, -12.8291015625, -10.78125, -8.7333984375, -6.685546875, -4.6376953125, -2.58984375, -0.5419921875, 1.505859375, 3.5537109375, 5.6015625, 7.6494140625, 9.697265625, 11.7451171875, 13.79296875, 15.8408203125, 17.888671875, 19.9365234375, 21.984375, 24.0322265625, 26.080078125, 28.1279296875, 30.17578125, 32.2236328125, 34.271484375, 36.3193359375, 38.3671875, 40.4150390625, 42.462890625, 44.5107421875, 46.55859375, 48.6064453125, 50.654296875, 52.7021484375, 54.75]}, "gradients/decoder.transformer.h.13.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 78.0, 355.0, 434.0, 138.0, 10.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-195.0911407470703, -191.5563201904297, -188.02151489257812, -184.4866943359375, -180.95188903808594, -177.4170684814453, -173.88226318359375, -170.34744262695312, -166.81263732910156, -163.27781677246094, -159.74301147460938, -156.20819091796875, -152.6733856201172, -149.13856506347656, -145.603759765625, -142.06893920898438, -138.53411865234375, -134.99929809570312, -131.46449279785156, -127.92967987060547, -124.39486694335938, -120.86005401611328, -117.32524108886719, -113.79042053222656, -110.255615234375, -106.7208023071289, -103.18598937988281, -99.65117645263672, -96.11636352539062, -92.58155059814453, -89.04673767089844, -85.51191711425781, -81.97711181640625, -78.44229888916016, -74.90748596191406, -71.37267303466797, -67.83786010742188, -64.30304718017578, -60.76823043823242, -57.23341751098633, -53.6986083984375, -50.163795471191406, -46.62898254394531, -43.09416961669922, -39.559356689453125, -36.02454376220703, -32.48972702026367, -28.954914093017578, -25.42009925842285, -21.885286331176758, -18.35047149658203, -14.815658569335938, -11.280845642089844, -7.74603271484375, -4.211217880249023, -0.6764049530029297, 2.858407974243164, 6.393221378326416, 9.928034782409668, 13.462848663330078, 16.997661590576172, 20.532474517822266, 24.067289352416992, 27.602102279663086, 31.13691520690918]}, "gradients/decoder.transformer.h.13.ln_1.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 0.0, 1.0, 3.0, 2.0, 6.0, 5.0, 8.0, 7.0, 8.0, 8.0, 13.0, 12.0, 9.0, 14.0, 12.0, 25.0, 24.0, 29.0, 33.0, 36.0, 34.0, 33.0, 36.0, 33.0, 24.0, 41.0, 42.0, 37.0, 34.0, 53.0, 40.0, 44.0, 30.0, 30.0, 35.0, 38.0, 18.0, 24.0, 27.0, 20.0, 16.0, 10.0, 13.0, 11.0, 6.0, 10.0, 7.0, 5.0, 3.0, 5.0, 2.0, 1.0, 1.0], "bins": [-49.89497756958008, -48.5804328918457, -47.26588821411133, -45.95134353637695, -44.636802673339844, -43.32225799560547, -42.007713317871094, -40.69316864013672, -39.378623962402344, -38.06407928466797, -36.749534606933594, -35.43498992919922, -34.120445251464844, -32.805904388427734, -31.49135971069336, -30.176815032958984, -28.86227035522461, -27.547725677490234, -26.23318099975586, -24.918638229370117, -23.604093551635742, -22.289548873901367, -20.975006103515625, -19.66046142578125, -18.345916748046875, -17.0313720703125, -15.716828346252441, -14.402284622192383, -13.087739944458008, -11.773195266723633, -10.458651542663574, -9.144107818603516, -7.829566955566406, -6.5150227546691895, -5.200478553771973, -3.885934352874756, -2.571390151977539, -1.2568459510803223, 0.05769824981689453, 1.3722419738769531, 2.686786651611328, 4.001330852508545, 5.315875053405762, 6.6304192543029785, 7.944963455200195, 9.25950813293457, 10.574051856994629, 11.888595581054688, 13.203140258789062, 14.517684936523438, 15.832228660583496, 17.146772384643555, 18.46131706237793, 19.775861740112305, 21.090404510498047, 22.404949188232422, 23.719493865966797, 25.034038543701172, 26.348583221435547, 27.66312599182129, 28.977670669555664, 30.29221534729004, 31.60675811767578, 32.921302795410156, 34.23584747314453]}, "gradients/decoder.transformer.h.12.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 1.0, 2.0, 6.0, 2.0, 0.0, 7.0, 5.0, 11.0, 8.0, 11.0, 13.0, 15.0, 18.0, 18.0, 21.0, 21.0, 27.0, 35.0, 39.0, 29.0, 33.0, 43.0, 46.0, 41.0, 39.0, 28.0, 31.0, 53.0, 34.0, 31.0, 45.0, 44.0, 27.0, 26.0, 39.0, 31.0, 24.0, 18.0, 22.0, 13.0, 8.0, 8.0, 13.0, 3.0, 5.0, 3.0, 4.0, 5.0, 3.0, 5.0, 0.0, 3.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-4.66015625, -4.4976806640625, -4.335205078125, -4.1727294921875, -4.01025390625, -3.8477783203125, -3.685302734375, -3.5228271484375, -3.3603515625, -3.1978759765625, -3.035400390625, -2.8729248046875, -2.71044921875, -2.5479736328125, -2.385498046875, -2.2230224609375, -2.060546875, -1.8980712890625, -1.735595703125, -1.5731201171875, -1.41064453125, -1.2481689453125, -1.085693359375, -0.9232177734375, -0.7607421875, -0.5982666015625, -0.435791015625, -0.2733154296875, -0.11083984375, 0.0516357421875, 0.214111328125, 0.3765869140625, 0.5390625, 0.7015380859375, 0.864013671875, 1.0264892578125, 1.18896484375, 1.3514404296875, 1.513916015625, 1.6763916015625, 1.8388671875, 2.0013427734375, 2.163818359375, 2.3262939453125, 2.48876953125, 2.6512451171875, 2.813720703125, 2.9761962890625, 3.138671875, 3.3011474609375, 3.463623046875, 3.6260986328125, 3.78857421875, 3.9510498046875, 4.113525390625, 4.2760009765625, 4.4384765625, 4.6009521484375, 4.763427734375, 4.9259033203125, 5.08837890625, 5.2508544921875, 5.413330078125, 5.5758056640625, 5.73828125]}, "gradients/decoder.transformer.h.12.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 5.0, 4.0, 4.0, 4.0, 6.0, 10.0, 10.0, 14.0, 18.0, 23.0, 36.0, 41.0, 70.0, 82.0, 117.0, 192.0, 280.0, 463.0, 833.0, 1778.0, 4269.0, 13274.0, 54530.0, 266397.0, 1124656.0, 1806942.0, 723068.0, 151236.0, 31732.0, 8457.0, 2984.0, 1233.0, 623.0, 286.0, 205.0, 119.0, 78.0, 75.0, 36.0, 30.0, 21.0, 8.0, 9.0, 7.0, 9.0, 1.0, 5.0, 6.0, 2.0, 0.0, 2.0, 4.0, 1.0, 2.0, 0.0, 2.0, 1.0, 1.0], "bins": [-8.140625, -7.8807373046875, -7.620849609375, -7.3609619140625, -7.10107421875, -6.8411865234375, -6.581298828125, -6.3214111328125, -6.0615234375, -5.8016357421875, -5.541748046875, -5.2818603515625, -5.02197265625, -4.7620849609375, -4.502197265625, -4.2423095703125, -3.982421875, -3.7225341796875, -3.462646484375, -3.2027587890625, -2.94287109375, -2.6829833984375, -2.423095703125, -2.1632080078125, -1.9033203125, -1.6434326171875, -1.383544921875, -1.1236572265625, -0.86376953125, -0.6038818359375, -0.343994140625, -0.0841064453125, 0.17578125, 0.4356689453125, 0.695556640625, 0.9554443359375, 1.21533203125, 1.4752197265625, 1.735107421875, 1.9949951171875, 2.2548828125, 2.5147705078125, 2.774658203125, 3.0345458984375, 3.29443359375, 3.5543212890625, 3.814208984375, 4.0740966796875, 4.333984375, 4.5938720703125, 4.853759765625, 5.1136474609375, 5.37353515625, 5.6334228515625, 5.893310546875, 6.1531982421875, 6.4130859375, 6.6729736328125, 6.932861328125, 7.1927490234375, 7.45263671875, 7.7125244140625, 7.972412109375, 8.2322998046875, 8.4921875]}, "gradients/decoder.transformer.h.12.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 1.0, 4.0, 6.0, 2.0, 8.0, 7.0, 7.0, 8.0, 22.0, 25.0, 32.0, 54.0, 60.0, 95.0, 125.0, 194.0, 228.0, 307.0, 420.0, 475.0, 475.0, 386.0, 327.0, 217.0, 168.0, 124.0, 98.0, 54.0, 49.0, 39.0, 23.0, 13.0, 8.0, 7.0, 2.0, 3.0, 2.0, 1.0, 1.0, 4.0, 0.0, 2.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-11.125, -10.7760009765625, -10.427001953125, -10.0780029296875, -9.72900390625, -9.3800048828125, -9.031005859375, -8.6820068359375, -8.3330078125, -7.9840087890625, -7.635009765625, -7.2860107421875, -6.93701171875, -6.5880126953125, -6.239013671875, -5.8900146484375, -5.541015625, -5.1920166015625, -4.843017578125, -4.4940185546875, -4.14501953125, -3.7960205078125, -3.447021484375, -3.0980224609375, -2.7490234375, -2.4000244140625, -2.051025390625, -1.7020263671875, -1.35302734375, -1.0040283203125, -0.655029296875, -0.3060302734375, 0.04296875, 0.3919677734375, 0.740966796875, 1.0899658203125, 1.43896484375, 1.7879638671875, 2.136962890625, 2.4859619140625, 2.8349609375, 3.1839599609375, 3.532958984375, 3.8819580078125, 4.23095703125, 4.5799560546875, 4.928955078125, 5.2779541015625, 5.626953125, 5.9759521484375, 6.324951171875, 6.6739501953125, 7.02294921875, 7.3719482421875, 7.720947265625, 8.0699462890625, 8.4189453125, 8.7679443359375, 9.116943359375, 9.4659423828125, 9.81494140625, 10.1639404296875, 10.512939453125, 10.8619384765625, 11.2109375]}, "gradients/decoder.transformer.h.12.mlp.c_fc.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 0.0, 2.0, 1.0, 1.0, 3.0, 4.0, 1.0, 5.0, 4.0, 8.0, 13.0, 20.0, 33.0, 54.0, 71.0, 94.0, 158.0, 205.0, 354.0, 705.0, 4814.0, 279867.0, 3783061.0, 120188.0, 2982.0, 659.0, 344.0, 202.0, 153.0, 75.0, 64.0, 50.0, 27.0, 21.0, 10.0, 10.0, 5.0, 12.0, 3.0, 7.0, 2.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-37.90625, -36.73681640625, -35.5673828125, -34.39794921875, -33.228515625, -32.05908203125, -30.8896484375, -29.72021484375, -28.55078125, -27.38134765625, -26.2119140625, -25.04248046875, -23.873046875, -22.70361328125, -21.5341796875, -20.36474609375, -19.1953125, -18.02587890625, -16.8564453125, -15.68701171875, -14.517578125, -13.34814453125, -12.1787109375, -11.00927734375, -9.83984375, -8.67041015625, -7.5009765625, -6.33154296875, -5.162109375, -3.99267578125, -2.8232421875, -1.65380859375, -0.484375, 0.68505859375, 1.8544921875, 3.02392578125, 4.193359375, 5.36279296875, 6.5322265625, 7.70166015625, 8.87109375, 10.04052734375, 11.2099609375, 12.37939453125, 13.548828125, 14.71826171875, 15.8876953125, 17.05712890625, 18.2265625, 19.39599609375, 20.5654296875, 21.73486328125, 22.904296875, 24.07373046875, 25.2431640625, 26.41259765625, 27.58203125, 28.75146484375, 29.9208984375, 31.09033203125, 32.259765625, 33.42919921875, 34.5986328125, 35.76806640625, 36.9375]}, "gradients/decoder.transformer.h.12.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 2.0, 4.0, 11.0, 15.0, 25.0, 52.0, 61.0, 88.0, 98.0, 115.0, 131.0, 115.0, 86.0, 69.0, 54.0, 33.0, 27.0, 13.0, 11.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-69.04686737060547, -67.33910369873047, -65.63134002685547, -63.9235725402832, -62.2158088684082, -60.50804138183594, -58.80027770996094, -57.09251403808594, -55.38475036621094, -53.67698669433594, -51.96921920776367, -50.26145553588867, -48.55369186401367, -46.845924377441406, -45.138160705566406, -43.430397033691406, -41.72262954711914, -40.01486587524414, -38.307098388671875, -36.599334716796875, -34.891571044921875, -33.183807373046875, -31.47603988647461, -29.76827621459961, -28.060510635375977, -26.352745056152344, -24.644981384277344, -22.93721580505371, -21.229450225830078, -19.521686553955078, -17.813920974731445, -16.106155395507812, -14.398387908935547, -12.69062328338623, -10.982858657836914, -9.275093078613281, -7.567328453063965, -5.859563827514648, -4.151798248291016, -2.444033622741699, -0.7362689971923828, 0.9714958667755127, 2.679260730743408, 4.387025833129883, 6.094790458679199, 7.802555084228516, 9.510320663452148, 11.218085289001465, 12.925849914550781, 14.633614540100098, 16.341379165649414, 18.049144744873047, 19.756908416748047, 21.46467399597168, 23.172439575195312, 24.880203247070312, 26.587968826293945, 28.295734405517578, 30.003498077392578, 31.71126365661621, 33.419029235839844, 35.126792907714844, 36.834556579589844, 38.54232406616211, 40.25008773803711]}, "gradients/decoder.transformer.h.12.ln_2.bias": {"_type": "histogram", "values": [2.0, 2.0, 1.0, 0.0, 1.0, 0.0, 3.0, 0.0, 2.0, 4.0, 12.0, 9.0, 7.0, 8.0, 11.0, 9.0, 11.0, 18.0, 18.0, 15.0, 18.0, 12.0, 24.0, 29.0, 30.0, 29.0, 33.0, 31.0, 45.0, 41.0, 35.0, 33.0, 46.0, 34.0, 32.0, 49.0, 29.0, 46.0, 30.0, 37.0, 26.0, 24.0, 19.0, 17.0, 25.0, 17.0, 13.0, 15.0, 12.0, 11.0, 9.0, 8.0, 13.0, 5.0, 3.0, 6.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-30.984907150268555, -30.021028518676758, -29.057147979736328, -28.09326934814453, -27.129390716552734, -26.165512084960938, -25.20163345336914, -24.23775291442871, -23.273874282836914, -22.309995651245117, -21.346115112304688, -20.38223648071289, -19.418357849121094, -18.454479217529297, -17.4906005859375, -16.52672004699707, -15.562841415405273, -14.598962783813477, -13.635083198547363, -12.67120361328125, -11.707324981689453, -10.743446350097656, -9.779566764831543, -8.81568717956543, -7.851808547973633, -6.887929439544678, -5.924050331115723, -4.960171222686768, -3.9962921142578125, -3.0324130058288574, -2.0685338973999023, -1.1046547889709473, -0.140777587890625, 0.8231015205383301, 1.7869806289672852, 2.7508597373962402, 3.7147388458251953, 4.67861795425415, 5.6424970626831055, 6.6063761711120605, 7.570255279541016, 8.534133911132812, 9.498013496398926, 10.461893081665039, 11.425771713256836, 12.389650344848633, 13.353529930114746, 14.31740951538086, 15.281288146972656, 16.245166778564453, 17.20904541015625, 18.17292594909668, 19.136804580688477, 20.100683212280273, 21.064563751220703, 22.0284423828125, 22.992321014404297, 23.956199645996094, 24.92007827758789, 25.88395881652832, 26.847837448120117, 27.811716079711914, 28.775596618652344, 29.73947525024414, 30.703353881835938]}, "gradients/decoder.transformer.h.12.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 2.0, 1.0, 1.0, 5.0, 1.0, 5.0, 2.0, 3.0, 5.0, 5.0, 14.0, 16.0, 16.0, 10.0, 17.0, 26.0, 30.0, 26.0, 23.0, 32.0, 39.0, 36.0, 26.0, 30.0, 43.0, 41.0, 36.0, 56.0, 47.0, 44.0, 37.0, 44.0, 43.0, 24.0, 28.0, 31.0, 26.0, 18.0, 24.0, 15.0, 15.0, 16.0, 15.0, 14.0, 4.0, 6.0, 4.0, 3.0, 3.0, 5.0, 0.0, 3.0, 1.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.86328125, -4.70135498046875, -4.5394287109375, -4.37750244140625, -4.215576171875, -4.05364990234375, -3.8917236328125, -3.72979736328125, -3.56787109375, -3.40594482421875, -3.2440185546875, -3.08209228515625, -2.920166015625, -2.75823974609375, -2.5963134765625, -2.43438720703125, -2.2724609375, -2.11053466796875, -1.9486083984375, -1.78668212890625, -1.624755859375, -1.46282958984375, -1.3009033203125, -1.13897705078125, -0.97705078125, -0.81512451171875, -0.6531982421875, -0.49127197265625, -0.329345703125, -0.16741943359375, -0.0054931640625, 0.15643310546875, 0.318359375, 0.48028564453125, 0.6422119140625, 0.80413818359375, 0.966064453125, 1.12799072265625, 1.2899169921875, 1.45184326171875, 1.61376953125, 1.77569580078125, 1.9376220703125, 2.09954833984375, 2.261474609375, 2.42340087890625, 2.5853271484375, 2.74725341796875, 2.9091796875, 3.07110595703125, 3.2330322265625, 3.39495849609375, 3.556884765625, 3.71881103515625, 3.8807373046875, 4.04266357421875, 4.20458984375, 4.36651611328125, 4.5284423828125, 4.69036865234375, 4.852294921875, 5.01422119140625, 5.1761474609375, 5.33807373046875, 5.5]}, "gradients/decoder.transformer.h.12.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 6.0, 5.0, 7.0, 8.0, 15.0, 14.0, 26.0, 34.0, 51.0, 63.0, 99.0, 173.0, 218.0, 350.0, 434.0, 649.0, 963.0, 1476.0, 2335.0, 3468.0, 5357.0, 8422.0, 13303.0, 21229.0, 34896.0, 55987.0, 89701.0, 134395.0, 172743.0, 166305.0, 123035.0, 79828.0, 49793.0, 30579.0, 18809.0, 12057.0, 7461.0, 4856.0, 3205.0, 2045.0, 1304.0, 892.0, 597.0, 409.0, 306.0, 207.0, 133.0, 108.0, 66.0, 42.0, 21.0, 22.0, 15.0, 21.0, 12.0, 5.0, 3.0, 3.0, 3.0, 1.0, 2.0], "bins": [-0.81640625, -0.7907943725585938, -0.7651824951171875, -0.7395706176757812, -0.713958740234375, -0.6883468627929688, -0.6627349853515625, -0.6371231079101562, -0.61151123046875, -0.5858993530273438, -0.5602874755859375, -0.5346755981445312, -0.509063720703125, -0.48345184326171875, -0.4578399658203125, -0.43222808837890625, -0.4066162109375, -0.38100433349609375, -0.3553924560546875, -0.32978057861328125, -0.304168701171875, -0.27855682373046875, -0.2529449462890625, -0.22733306884765625, -0.20172119140625, -0.17610931396484375, -0.1504974365234375, -0.12488555908203125, -0.099273681640625, -0.07366180419921875, -0.0480499267578125, -0.02243804931640625, 0.003173828125, 0.02878570556640625, 0.0543975830078125, 0.08000946044921875, 0.105621337890625, 0.13123321533203125, 0.1568450927734375, 0.18245697021484375, 0.20806884765625, 0.23368072509765625, 0.2592926025390625, 0.28490447998046875, 0.310516357421875, 0.33612823486328125, 0.3617401123046875, 0.38735198974609375, 0.4129638671875, 0.43857574462890625, 0.4641876220703125, 0.48979949951171875, 0.515411376953125, 0.5410232543945312, 0.5666351318359375, 0.5922470092773438, 0.61785888671875, 0.6434707641601562, 0.6690826416015625, 0.6946945190429688, 0.720306396484375, 0.7459182739257812, 0.7715301513671875, 0.7971420288085938, 0.82275390625]}, "gradients/decoder.transformer.h.12.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 2.0, 0.0, 1.0, 2.0, 2.0, 6.0, 9.0, 9.0, 7.0, 13.0, 11.0, 9.0, 14.0, 15.0, 17.0, 29.0, 22.0, 26.0, 34.0, 37.0, 36.0, 38.0, 48.0, 36.0, 40.0, 39.0, 1071.0, 38.0, 46.0, 45.0, 37.0, 36.0, 36.0, 41.0, 32.0, 24.0, 25.0, 13.0, 16.0, 11.0, 17.0, 13.0, 11.0, 5.0, 7.0, 5.0, 5.0, 3.0, 0.0, 1.0, 3.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.716796875, -3.593536376953125, -3.47027587890625, -3.347015380859375, -3.2237548828125, -3.100494384765625, -2.97723388671875, -2.853973388671875, -2.730712890625, -2.607452392578125, -2.48419189453125, -2.360931396484375, -2.2376708984375, -2.114410400390625, -1.99114990234375, -1.867889404296875, -1.74462890625, -1.621368408203125, -1.49810791015625, -1.374847412109375, -1.2515869140625, -1.128326416015625, -1.00506591796875, -0.881805419921875, -0.758544921875, -0.635284423828125, -0.51202392578125, -0.388763427734375, -0.2655029296875, -0.142242431640625, -0.01898193359375, 0.104278564453125, 0.2275390625, 0.350799560546875, 0.47406005859375, 0.597320556640625, 0.7205810546875, 0.843841552734375, 0.96710205078125, 1.090362548828125, 1.213623046875, 1.336883544921875, 1.46014404296875, 1.583404541015625, 1.7066650390625, 1.829925537109375, 1.95318603515625, 2.076446533203125, 2.19970703125, 2.322967529296875, 2.44622802734375, 2.569488525390625, 2.6927490234375, 2.816009521484375, 2.93927001953125, 3.062530517578125, 3.185791015625, 3.309051513671875, 3.43231201171875, 3.555572509765625, 3.6788330078125, 3.802093505859375, 3.92535400390625, 4.048614501953125, 4.171875]}, "gradients/decoder.transformer.h.12.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 3.0, 5.0, 8.0, 11.0, 11.0, 19.0, 32.0, 45.0, 81.0, 128.0, 264.0, 419.0, 766.0, 1374.0, 2527.0, 4509.0, 8535.0, 16028.0, 30720.0, 58982.0, 110681.0, 190157.0, 1272267.0, 179508.0, 103515.0, 54963.0, 28865.0, 15147.0, 7940.0, 4258.0, 2327.0, 1262.0, 746.0, 435.0, 254.0, 119.0, 82.0, 48.0, 37.0, 21.0, 19.0, 10.0, 8.0, 3.0, 4.0, 2.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.71630859375, -0.6966552734375, -0.677001953125, -0.6573486328125, -0.6376953125, -0.6180419921875, -0.598388671875, -0.5787353515625, -0.55908203125, -0.5394287109375, -0.519775390625, -0.5001220703125, -0.48046875, -0.4608154296875, -0.441162109375, -0.4215087890625, -0.40185546875, -0.3822021484375, -0.362548828125, -0.3428955078125, -0.3232421875, -0.3035888671875, -0.283935546875, -0.2642822265625, -0.24462890625, -0.2249755859375, -0.205322265625, -0.1856689453125, -0.166015625, -0.1463623046875, -0.126708984375, -0.1070556640625, -0.08740234375, -0.0677490234375, -0.048095703125, -0.0284423828125, -0.0087890625, 0.0108642578125, 0.030517578125, 0.0501708984375, 0.06982421875, 0.0894775390625, 0.109130859375, 0.1287841796875, 0.1484375, 0.1680908203125, 0.187744140625, 0.2073974609375, 0.22705078125, 0.2467041015625, 0.266357421875, 0.2860107421875, 0.3056640625, 0.3253173828125, 0.344970703125, 0.3646240234375, 0.38427734375, 0.4039306640625, 0.423583984375, 0.4432373046875, 0.462890625, 0.4825439453125, 0.502197265625, 0.5218505859375, 0.54150390625]}, "gradients/decoder.transformer.h.12.crossattention.q_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 2.0, 3.0, 2.0, 7.0, 2.0, 6.0, 5.0, 2.0, 6.0, 8.0, 19.0, 20.0, 19.0, 28.0, 37.0, 30.0, 43.0, 45.0, 54.0, 51.0, 68.0, 60.0, 70.0, 53.0, 50.0, 42.0, 45.0, 31.0, 25.0, 34.0, 16.0, 29.0, 15.0, 16.0, 14.0, 7.0, 14.0, 4.0, 4.0, 5.0, 3.0, 4.0, 5.0, 3.0, 3.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 3.0], "bins": [-0.0290679931640625, -0.028173208236694336, -0.027278423309326172, -0.026383638381958008, -0.025488853454589844, -0.02459406852722168, -0.023699283599853516, -0.02280449867248535, -0.021909713745117188, -0.021014928817749023, -0.02012014389038086, -0.019225358963012695, -0.01833057403564453, -0.017435789108276367, -0.016541004180908203, -0.01564621925354004, -0.014751434326171875, -0.013856649398803711, -0.012961864471435547, -0.012067079544067383, -0.011172294616699219, -0.010277509689331055, -0.00938272476196289, -0.008487939834594727, -0.0075931549072265625, -0.0066983699798583984, -0.005803585052490234, -0.00490880012512207, -0.004014015197753906, -0.003119230270385742, -0.002224445343017578, -0.001329660415649414, -0.00043487548828125, 0.00045990943908691406, 0.0013546943664550781, 0.002249479293823242, 0.0031442642211914062, 0.00403904914855957, 0.004933834075927734, 0.0058286190032958984, 0.0067234039306640625, 0.0076181888580322266, 0.00851297378540039, 0.009407758712768555, 0.010302543640136719, 0.011197328567504883, 0.012092113494873047, 0.012986898422241211, 0.013881683349609375, 0.014776468276977539, 0.015671253204345703, 0.016566038131713867, 0.01746082305908203, 0.018355607986450195, 0.01925039291381836, 0.020145177841186523, 0.021039962768554688, 0.02193474769592285, 0.022829532623291016, 0.02372431755065918, 0.024619102478027344, 0.025513887405395508, 0.026408672332763672, 0.027303457260131836, 0.0281982421875]}, "gradients/decoder.transformer.h.12.crossattention.q_attn.weight": {"_type": "histogram", "values": [3.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 3.0, 3.0, 5.0, 4.0, 4.0, 5.0, 6.0, 6.0, 10.0, 13.0, 19.0, 20.0, 27.0, 37.0, 30.0, 49.0, 60.0, 77.0, 113.0, 168.0, 315.0, 636.0, 5277.0, 1032947.0, 7108.0, 635.0, 330.0, 195.0, 145.0, 74.0, 57.0, 46.0, 29.0, 28.0, 28.0, 8.0, 8.0, 6.0, 4.0, 5.0, 5.0, 8.0, 3.0, 3.0, 2.0, 2.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0], "bins": [-0.5517578125, -0.5342559814453125, -0.516754150390625, -0.4992523193359375, -0.48175048828125, -0.4642486572265625, -0.446746826171875, -0.4292449951171875, -0.4117431640625, -0.3942413330078125, -0.376739501953125, -0.3592376708984375, -0.34173583984375, -0.3242340087890625, -0.306732177734375, -0.2892303466796875, -0.271728515625, -0.2542266845703125, -0.236724853515625, -0.2192230224609375, -0.20172119140625, -0.1842193603515625, -0.166717529296875, -0.1492156982421875, -0.1317138671875, -0.1142120361328125, -0.096710205078125, -0.0792083740234375, -0.06170654296875, -0.0442047119140625, -0.026702880859375, -0.0092010498046875, 0.00830078125, 0.0258026123046875, 0.043304443359375, 0.0608062744140625, 0.07830810546875, 0.0958099365234375, 0.113311767578125, 0.1308135986328125, 0.1483154296875, 0.1658172607421875, 0.183319091796875, 0.2008209228515625, 0.21832275390625, 0.2358245849609375, 0.253326416015625, 0.2708282470703125, 0.288330078125, 0.3058319091796875, 0.323333740234375, 0.3408355712890625, 0.35833740234375, 0.3758392333984375, 0.393341064453125, 0.4108428955078125, 0.4283447265625, 0.4458465576171875, 0.463348388671875, 0.4808502197265625, 0.49835205078125, 0.5158538818359375, 0.533355712890625, 0.5508575439453125, 0.568359375]}, "gradients/decoder.transformer.h.12.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 36.0, 979.0, 2.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.11276122182607651, -0.0918130949139595, -0.0708649754524231, -0.04991684854030609, -0.028968721628189087, -0.008020594716072083, 0.012927524745464325, 0.03387565165758133, 0.054823778569698334, 0.07577190548181534, 0.09672002494335175, 0.11766815185546875, 0.13861627876758575, 0.15956440567970276, 0.18051251769065857, 0.20146065950393677, 0.22240877151489258, 0.24335689842700958, 0.2643050253391266, 0.2852531373500824, 0.3062012791633606, 0.3271493911743164, 0.3480975031852722, 0.3690456449985504, 0.3899937868118286, 0.4109418988227844, 0.4318900406360626, 0.45283815264701843, 0.47378629446029663, 0.49473440647125244, 0.5156825184822083, 0.5366306304931641, 0.5575787425041199, 0.5785268545150757, 0.5994749665260315, 0.6204231381416321, 0.6413712501525879, 0.6623193621635437, 0.6832674741744995, 0.7042156457901001, 0.7251637578010559, 0.7461118698120117, 0.7670599818229675, 0.7880081534385681, 0.8089562654495239, 0.8299043774604797, 0.8508524894714355, 0.8718006610870361, 0.8927487134933472, 0.913696825504303, 0.9346449375152588, 0.9555931091308594, 0.9765412211418152, 0.997489333152771, 1.0184375047683716, 1.0393855571746826, 1.0603337287902832, 1.0812819004058838, 1.1022299528121948, 1.1231781244277954, 1.1441261768341064, 1.165074348449707, 1.1860225200653076, 1.2069705724716187, 1.2279187440872192]}, "gradients/decoder.transformer.h.12.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 5.0, 2.0, 10.0, 4.0, 17.0, 13.0, 18.0, 22.0, 19.0, 14.0, 29.0, 34.0, 43.0, 26.0, 29.0, 39.0, 55.0, 31.0, 62.0, 46.0, 49.0, 35.0, 46.0, 51.0, 50.0, 33.0, 39.0, 37.0, 31.0, 20.0, 18.0, 16.0, 21.0, 15.0, 8.0, 6.0, 7.0, 3.0, 2.0, 4.0, 1.0, 3.0, 2.0, 1.0, 1.0, 1.0], "bins": [-0.06246870756149292, -0.06079918146133423, -0.05912965536117554, -0.057460129261016846, -0.055790603160858154, -0.05412107706069946, -0.05245155096054077, -0.05078202486038208, -0.04911249876022339, -0.0474429726600647, -0.045773446559906006, -0.044103920459747314, -0.04243439435958862, -0.04076486825942993, -0.03909534215927124, -0.03742581605911255, -0.03575628995895386, -0.034086763858795166, -0.032417237758636475, -0.030747711658477783, -0.029078185558319092, -0.0274086594581604, -0.02573913335800171, -0.024069607257843018, -0.022400081157684326, -0.020730555057525635, -0.019061028957366943, -0.017391502857208252, -0.01572197675704956, -0.01405245065689087, -0.012382924556732178, -0.010713398456573486, -0.009043872356414795, -0.0073743462562561035, -0.005704820156097412, -0.004035294055938721, -0.0023657679557800293, -0.0006962418556213379, 0.0009732842445373535, 0.002642810344696045, 0.004312336444854736, 0.005981862545013428, 0.007651388645172119, 0.00932091474533081, 0.010990440845489502, 0.012659966945648193, 0.014329493045806885, 0.015999019145965576, 0.017668545246124268, 0.01933807134628296, 0.02100759744644165, 0.022677123546600342, 0.024346649646759033, 0.026016175746917725, 0.027685701847076416, 0.029355227947235107, 0.0310247540473938, 0.03269428014755249, 0.03436380624771118, 0.03603333234786987, 0.037702858448028564, 0.039372384548187256, 0.04104191064834595, 0.04271143674850464, 0.04438096284866333]}, "gradients/decoder.transformer.h.12.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 2.0, 1.0, 1.0, 5.0, 1.0, 5.0, 2.0, 3.0, 5.0, 5.0, 14.0, 16.0, 16.0, 10.0, 17.0, 26.0, 30.0, 25.0, 24.0, 32.0, 39.0, 35.0, 26.0, 31.0, 41.0, 43.0, 33.0, 58.0, 47.0, 45.0, 37.0, 43.0, 44.0, 25.0, 26.0, 31.0, 27.0, 17.0, 26.0, 13.0, 16.0, 16.0, 15.0, 14.0, 4.0, 6.0, 4.0, 3.0, 3.0, 5.0, 0.0, 3.0, 1.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.8671875, -4.7052001953125, -4.543212890625, -4.3812255859375, -4.21923828125, -4.0572509765625, -3.895263671875, -3.7332763671875, -3.5712890625, -3.4093017578125, -3.247314453125, -3.0853271484375, -2.92333984375, -2.7613525390625, -2.599365234375, -2.4373779296875, -2.275390625, -2.1134033203125, -1.951416015625, -1.7894287109375, -1.62744140625, -1.4654541015625, -1.303466796875, -1.1414794921875, -0.9794921875, -0.8175048828125, -0.655517578125, -0.4935302734375, -0.33154296875, -0.1695556640625, -0.007568359375, 0.1544189453125, 0.31640625, 0.4783935546875, 0.640380859375, 0.8023681640625, 0.96435546875, 1.1263427734375, 1.288330078125, 1.4503173828125, 1.6123046875, 1.7742919921875, 1.936279296875, 2.0982666015625, 2.26025390625, 2.4222412109375, 2.584228515625, 2.7462158203125, 2.908203125, 3.0701904296875, 3.232177734375, 3.3941650390625, 3.55615234375, 3.7181396484375, 3.880126953125, 4.0421142578125, 4.2041015625, 4.3660888671875, 4.528076171875, 4.6900634765625, 4.85205078125, 5.0140380859375, 5.176025390625, 5.3380126953125, 5.5]}, "gradients/decoder.transformer.h.12.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 3.0, 2.0, 6.0, 4.0, 10.0, 10.0, 15.0, 22.0, 27.0, 50.0, 67.0, 89.0, 150.0, 245.0, 416.0, 790.0, 1633.0, 3423.0, 8261.0, 21699.0, 60973.0, 168341.0, 386113.0, 251034.0, 91319.0, 32519.0, 11805.0, 4857.0, 2185.0, 1088.0, 561.0, 315.0, 183.0, 108.0, 82.0, 47.0, 36.0, 23.0, 20.0, 9.0, 9.0, 6.0, 3.0, 3.0, 5.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.73046875, -6.54193115234375, -6.3533935546875, -6.16485595703125, -5.976318359375, -5.78778076171875, -5.5992431640625, -5.41070556640625, -5.22216796875, -5.03363037109375, -4.8450927734375, -4.65655517578125, -4.468017578125, -4.27947998046875, -4.0909423828125, -3.90240478515625, -3.7138671875, -3.52532958984375, -3.3367919921875, -3.14825439453125, -2.959716796875, -2.77117919921875, -2.5826416015625, -2.39410400390625, -2.20556640625, -2.01702880859375, -1.8284912109375, -1.63995361328125, -1.451416015625, -1.26287841796875, -1.0743408203125, -0.88580322265625, -0.697265625, -0.50872802734375, -0.3201904296875, -0.13165283203125, 0.056884765625, 0.24542236328125, 0.4339599609375, 0.62249755859375, 0.81103515625, 0.99957275390625, 1.1881103515625, 1.37664794921875, 1.565185546875, 1.75372314453125, 1.9422607421875, 2.13079833984375, 2.3193359375, 2.50787353515625, 2.6964111328125, 2.88494873046875, 3.073486328125, 3.26202392578125, 3.4505615234375, 3.63909912109375, 3.82763671875, 4.01617431640625, 4.2047119140625, 4.39324951171875, 4.581787109375, 4.77032470703125, 4.9588623046875, 5.14739990234375, 5.3359375]}, "gradients/decoder.transformer.h.12.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 3.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 2.0, 7.0, 3.0, 3.0, 7.0, 7.0, 5.0, 10.0, 15.0, 9.0, 18.0, 14.0, 16.0, 17.0, 26.0, 34.0, 32.0, 30.0, 48.0, 38.0, 56.0, 93.0, 142.0, 287.0, 1383.0, 205.0, 105.0, 77.0, 53.0, 49.0, 40.0, 44.0, 26.0, 17.0, 22.0, 23.0, 14.0, 9.0, 14.0, 10.0, 14.0, 9.0, 2.0, 8.0, 3.0, 4.0, 4.0, 0.0, 2.0, 2.0, 3.0, 0.0, 0.0, 0.0, 0.0, 3.0], "bins": [-16.078125, -15.579345703125, -15.08056640625, -14.581787109375, -14.0830078125, -13.584228515625, -13.08544921875, -12.586669921875, -12.087890625, -11.589111328125, -11.09033203125, -10.591552734375, -10.0927734375, -9.593994140625, -9.09521484375, -8.596435546875, -8.09765625, -7.598876953125, -7.10009765625, -6.601318359375, -6.1025390625, -5.603759765625, -5.10498046875, -4.606201171875, -4.107421875, -3.608642578125, -3.10986328125, -2.611083984375, -2.1123046875, -1.613525390625, -1.11474609375, -0.615966796875, -0.1171875, 0.381591796875, 0.88037109375, 1.379150390625, 1.8779296875, 2.376708984375, 2.87548828125, 3.374267578125, 3.873046875, 4.371826171875, 4.87060546875, 5.369384765625, 5.8681640625, 6.366943359375, 6.86572265625, 7.364501953125, 7.86328125, 8.362060546875, 8.86083984375, 9.359619140625, 9.8583984375, 10.357177734375, 10.85595703125, 11.354736328125, 11.853515625, 12.352294921875, 12.85107421875, 13.349853515625, 13.8486328125, 14.347412109375, 14.84619140625, 15.344970703125, 15.84375]}, "gradients/decoder.transformer.h.12.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 2.0, 7.0, 8.0, 12.0, 22.0, 28.0, 56.0, 60.0, 86.0, 156.0, 303.0, 729.0, 2903.0, 826594.0, 2309712.0, 3438.0, 789.0, 342.0, 158.0, 120.0, 54.0, 49.0, 25.0, 26.0, 14.0, 3.0, 9.0, 4.0, 5.0, 1.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-81.3125, -78.9921875, -76.671875, -74.3515625, -72.03125, -69.7109375, -67.390625, -65.0703125, -62.75, -60.4296875, -58.109375, -55.7890625, -53.46875, -51.1484375, -48.828125, -46.5078125, -44.1875, -41.8671875, -39.546875, -37.2265625, -34.90625, -32.5859375, -30.265625, -27.9453125, -25.625, -23.3046875, -20.984375, -18.6640625, -16.34375, -14.0234375, -11.703125, -9.3828125, -7.0625, -4.7421875, -2.421875, -0.1015625, 2.21875, 4.5390625, 6.859375, 9.1796875, 11.5, 13.8203125, 16.140625, 18.4609375, 20.78125, 23.1015625, 25.421875, 27.7421875, 30.0625, 32.3828125, 34.703125, 37.0234375, 39.34375, 41.6640625, 43.984375, 46.3046875, 48.625, 50.9453125, 53.265625, 55.5859375, 57.90625, 60.2265625, 62.546875, 64.8671875, 67.1875]}, "gradients/decoder.transformer.h.12.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 4.0, 21.0, 167.0, 407.0, 329.0, 80.0, 5.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-54.73880386352539, -51.657752990722656, -48.57670593261719, -45.49565505981445, -42.41460418701172, -39.333553314208984, -36.25250244140625, -33.17145538330078, -30.090404510498047, -27.009353637695312, -23.92830467224121, -20.84725570678711, -17.766204833984375, -14.685154914855957, -11.604104995727539, -8.523056030273438, -5.442005157470703, -2.360955238342285, 0.7200946807861328, 3.801144599914551, 6.882194519042969, 9.963244438171387, 13.044294357299805, 16.125343322753906, 19.20639419555664, 22.287445068359375, 25.368494033813477, 28.449542999267578, 31.530593872070312, 34.61164474487305, 37.69269561767578, 40.77374267578125, 43.85479736328125, 46.935848236083984, 50.01689910888672, 53.09794616699219, 56.17899703979492, 59.260047912597656, 62.341094970703125, 65.42214965820312, 68.5031967163086, 71.58424377441406, 74.66529846191406, 77.74634552001953, 80.827392578125, 83.908447265625, 86.98949432373047, 90.07054138183594, 93.15159606933594, 96.2326431274414, 99.3136978149414, 102.39474487304688, 105.47579956054688, 108.55684661865234, 111.63789367675781, 114.71894836425781, 117.79999542236328, 120.88104248046875, 123.96209716796875, 127.04314422607422, 130.1241912841797, 133.2052459716797, 136.2863006591797, 139.36734008789062, 142.44839477539062]}, "gradients/decoder.transformer.h.12.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 3.0, 3.0, 2.0, 2.0, 4.0, 2.0, 4.0, 3.0, 5.0, 6.0, 12.0, 10.0, 12.0, 10.0, 18.0, 16.0, 14.0, 22.0, 24.0, 23.0, 30.0, 35.0, 46.0, 45.0, 37.0, 41.0, 53.0, 37.0, 46.0, 47.0, 41.0, 33.0, 45.0, 30.0, 27.0, 27.0, 23.0, 30.0, 26.0, 19.0, 17.0, 20.0, 15.0, 7.0, 10.0, 6.0, 9.0, 4.0, 7.0, 4.0, 3.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 2.0], "bins": [-44.62644958496094, -43.326961517333984, -42.027469635009766, -40.72798156738281, -39.428489685058594, -38.12900161743164, -36.82950973510742, -35.53002166748047, -34.23052978515625, -32.9310417175293, -31.631549835205078, -30.332059860229492, -29.032569885253906, -27.733081817626953, -26.433591842651367, -25.13410186767578, -23.834613800048828, -22.535123825073242, -21.235633850097656, -19.93614387512207, -18.636653900146484, -17.33716583251953, -16.037675857543945, -14.73818588256836, -13.438695907592773, -12.139205932617188, -10.839715957641602, -9.540226936340332, -8.240736961364746, -6.94124698638916, -5.641757488250732, -4.342267990112305, -3.0427780151367188, -1.743288278579712, -0.4437985420227051, 0.8556911945343018, 2.1551809310913086, 3.4546709060668945, 4.754160404205322, 6.05364990234375, 7.353139877319336, 8.652629852294922, 9.952119827270508, 11.251608848571777, 12.551098823547363, 13.85058879852295, 15.150077819824219, 16.449567794799805, 17.74905776977539, 19.048547744750977, 20.348037719726562, 21.64752769470215, 22.947017669677734, 24.246505737304688, 25.545995712280273, 26.84548568725586, 28.144975662231445, 29.44446563720703, 30.743955612182617, 32.0434455871582, 33.342933654785156, 34.642425537109375, 35.94191360473633, 37.24140167236328, 38.5408935546875]}, "gradients/decoder.transformer.h.11.mlp.c_proj.bias": {"_type": "histogram", "values": [3.0, 1.0, 2.0, 0.0, 4.0, 5.0, 5.0, 3.0, 6.0, 4.0, 10.0, 11.0, 9.0, 21.0, 14.0, 22.0, 23.0, 26.0, 28.0, 29.0, 28.0, 37.0, 25.0, 33.0, 30.0, 39.0, 36.0, 29.0, 40.0, 53.0, 33.0, 48.0, 40.0, 36.0, 43.0, 28.0, 31.0, 23.0, 27.0, 17.0, 23.0, 20.0, 15.0, 15.0, 10.0, 7.0, 7.0, 6.0, 2.0, 2.0, 4.0, 2.0, 1.0, 0.0, 2.0, 2.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.64453125, -4.47955322265625, -4.3145751953125, -4.14959716796875, -3.984619140625, -3.81964111328125, -3.6546630859375, -3.48968505859375, -3.32470703125, -3.15972900390625, -2.9947509765625, -2.82977294921875, -2.664794921875, -2.49981689453125, -2.3348388671875, -2.16986083984375, -2.0048828125, -1.83990478515625, -1.6749267578125, -1.50994873046875, -1.344970703125, -1.17999267578125, -1.0150146484375, -0.85003662109375, -0.68505859375, -0.52008056640625, -0.3551025390625, -0.19012451171875, -0.025146484375, 0.13983154296875, 0.3048095703125, 0.46978759765625, 0.634765625, 0.79974365234375, 0.9647216796875, 1.12969970703125, 1.294677734375, 1.45965576171875, 1.6246337890625, 1.78961181640625, 1.95458984375, 2.11956787109375, 2.2845458984375, 2.44952392578125, 2.614501953125, 2.77947998046875, 2.9444580078125, 3.10943603515625, 3.2744140625, 3.43939208984375, 3.6043701171875, 3.76934814453125, 3.934326171875, 4.09930419921875, 4.2642822265625, 4.42926025390625, 4.59423828125, 4.75921630859375, 4.9241943359375, 5.08917236328125, 5.254150390625, 5.41912841796875, 5.5841064453125, 5.74908447265625, 5.9140625]}, "gradients/decoder.transformer.h.11.mlp.c_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 3.0, 2.0, 1.0, 3.0, 8.0, 3.0, 4.0, 5.0, 8.0, 15.0, 14.0, 22.0, 19.0, 26.0, 36.0, 55.0, 82.0, 104.0, 150.0, 228.0, 402.0, 921.0, 1933.0, 5270.0, 19464.0, 90284.0, 469297.0, 1644777.0, 1475487.0, 387756.0, 73968.0, 15981.0, 4580.0, 1589.0, 722.0, 395.0, 209.0, 122.0, 86.0, 47.0, 46.0, 25.0, 41.0, 15.0, 25.0, 13.0, 13.0, 13.0, 7.0, 3.0, 4.0, 2.0, 5.0, 2.0, 4.0, 4.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-8.7578125, -8.4747314453125, -8.191650390625, -7.9085693359375, -7.62548828125, -7.3424072265625, -7.059326171875, -6.7762451171875, -6.4931640625, -6.2100830078125, -5.927001953125, -5.6439208984375, -5.36083984375, -5.0777587890625, -4.794677734375, -4.5115966796875, -4.228515625, -3.9454345703125, -3.662353515625, -3.3792724609375, -3.09619140625, -2.8131103515625, -2.530029296875, -2.2469482421875, -1.9638671875, -1.6807861328125, -1.397705078125, -1.1146240234375, -0.83154296875, -0.5484619140625, -0.265380859375, 0.0177001953125, 0.30078125, 0.5838623046875, 0.866943359375, 1.1500244140625, 1.43310546875, 1.7161865234375, 1.999267578125, 2.2823486328125, 2.5654296875, 2.8485107421875, 3.131591796875, 3.4146728515625, 3.69775390625, 3.9808349609375, 4.263916015625, 4.5469970703125, 4.830078125, 5.1131591796875, 5.396240234375, 5.6793212890625, 5.96240234375, 6.2454833984375, 6.528564453125, 6.8116455078125, 7.0947265625, 7.3778076171875, 7.660888671875, 7.9439697265625, 8.22705078125, 8.5101318359375, 8.793212890625, 9.0762939453125, 9.359375]}, "gradients/decoder.transformer.h.11.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 3.0, 0.0, 2.0, 1.0, 2.0, 2.0, 4.0, 6.0, 6.0, 8.0, 9.0, 8.0, 15.0, 32.0, 15.0, 30.0, 26.0, 61.0, 50.0, 81.0, 101.0, 128.0, 158.0, 226.0, 243.0, 262.0, 335.0, 336.0, 328.0, 305.0, 285.0, 180.0, 175.0, 152.0, 105.0, 84.0, 56.0, 62.0, 42.0, 37.0, 31.0, 19.0, 15.0, 12.0, 9.0, 5.0, 6.0, 8.0, 5.0, 7.0, 2.0, 3.0, 2.0, 2.0, 0.0, 2.0, 2.0, 2.0, 0.0, 1.0, 1.0], "bins": [-8.0234375, -7.7628173828125, -7.502197265625, -7.2415771484375, -6.98095703125, -6.7203369140625, -6.459716796875, -6.1990966796875, -5.9384765625, -5.6778564453125, -5.417236328125, -5.1566162109375, -4.89599609375, -4.6353759765625, -4.374755859375, -4.1141357421875, -3.853515625, -3.5928955078125, -3.332275390625, -3.0716552734375, -2.81103515625, -2.5504150390625, -2.289794921875, -2.0291748046875, -1.7685546875, -1.5079345703125, -1.247314453125, -0.9866943359375, -0.72607421875, -0.4654541015625, -0.204833984375, 0.0557861328125, 0.31640625, 0.5770263671875, 0.837646484375, 1.0982666015625, 1.35888671875, 1.6195068359375, 1.880126953125, 2.1407470703125, 2.4013671875, 2.6619873046875, 2.922607421875, 3.1832275390625, 3.44384765625, 3.7044677734375, 3.965087890625, 4.2257080078125, 4.486328125, 4.7469482421875, 5.007568359375, 5.2681884765625, 5.52880859375, 5.7894287109375, 6.050048828125, 6.3106689453125, 6.5712890625, 6.8319091796875, 7.092529296875, 7.3531494140625, 7.61376953125, 7.8743896484375, 8.135009765625, 8.3956298828125, 8.65625]}, "gradients/decoder.transformer.h.11.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 4.0, 0.0, 1.0, 0.0, 7.0, 1.0, 0.0, 7.0, 8.0, 7.0, 8.0, 6.0, 7.0, 14.0, 20.0, 19.0, 36.0, 39.0, 64.0, 45.0, 69.0, 107.0, 148.0, 211.0, 266.0, 474.0, 2424.0, 98820.0, 3687365.0, 396325.0, 5898.0, 630.0, 355.0, 223.0, 171.0, 116.0, 87.0, 77.0, 55.0, 38.0, 27.0, 18.0, 26.0, 21.0, 11.0, 4.0, 12.0, 9.0, 6.0, 3.0, 3.0, 0.0, 2.0, 0.0, 2.0, 2.0, 1.0, 1.0], "bins": [-40.3125, -39.14892578125, -37.9853515625, -36.82177734375, -35.658203125, -34.49462890625, -33.3310546875, -32.16748046875, -31.00390625, -29.84033203125, -28.6767578125, -27.51318359375, -26.349609375, -25.18603515625, -24.0224609375, -22.85888671875, -21.6953125, -20.53173828125, -19.3681640625, -18.20458984375, -17.041015625, -15.87744140625, -14.7138671875, -13.55029296875, -12.38671875, -11.22314453125, -10.0595703125, -8.89599609375, -7.732421875, -6.56884765625, -5.4052734375, -4.24169921875, -3.078125, -1.91455078125, -0.7509765625, 0.41259765625, 1.576171875, 2.73974609375, 3.9033203125, 5.06689453125, 6.23046875, 7.39404296875, 8.5576171875, 9.72119140625, 10.884765625, 12.04833984375, 13.2119140625, 14.37548828125, 15.5390625, 16.70263671875, 17.8662109375, 19.02978515625, 20.193359375, 21.35693359375, 22.5205078125, 23.68408203125, 24.84765625, 26.01123046875, 27.1748046875, 28.33837890625, 29.501953125, 30.66552734375, 31.8291015625, 32.99267578125, 34.15625]}, "gradients/decoder.transformer.h.11.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 2.0, 3.0, 12.0, 10.0, 38.0, 71.0, 147.0, 148.0, 185.0, 167.0, 106.0, 59.0, 41.0, 20.0, 5.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-44.710391998291016, -42.2735481262207, -39.83670425415039, -37.39986038208008, -34.963016510009766, -32.52617263793945, -30.089326858520508, -27.652482986450195, -25.215639114379883, -22.77879524230957, -20.341951370239258, -17.905105590820312, -15.468262672424316, -13.031418800354004, -10.594573974609375, -8.157730102539062, -5.72088623046875, -3.2840421199798584, -0.8471980094909668, 1.589646339416504, 4.026490211486816, 6.463334083557129, 8.900178909301758, 11.33702278137207, 13.773866653442383, 16.210710525512695, 18.647554397583008, 21.084400177001953, 23.521244049072266, 25.958087921142578, 28.39493179321289, 30.831775665283203, 33.26861572265625, 35.70545959472656, 38.142303466796875, 40.57914733886719, 43.0159912109375, 45.45283508300781, 47.889678955078125, 50.32652282714844, 52.76336669921875, 55.20021057128906, 57.637054443359375, 60.07389831542969, 62.5107421875, 64.94758605957031, 67.38442993164062, 69.82127380371094, 72.25811767578125, 74.69496154785156, 77.13180541992188, 79.56864929199219, 82.0054931640625, 84.44233703613281, 86.87918090820312, 89.31602478027344, 91.75287628173828, 94.1897201538086, 96.6265640258789, 99.06340789794922, 101.50025177001953, 103.93709564208984, 106.37393951416016, 108.81078338623047, 111.24762725830078]}, "gradients/decoder.transformer.h.11.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 2.0, 9.0, 2.0, 2.0, 7.0, 7.0, 12.0, 11.0, 16.0, 18.0, 18.0, 22.0, 26.0, 20.0, 23.0, 39.0, 41.0, 22.0, 41.0, 37.0, 24.0, 54.0, 46.0, 41.0, 36.0, 46.0, 28.0, 42.0, 35.0, 33.0, 32.0, 30.0, 32.0, 30.0, 19.0, 9.0, 15.0, 12.0, 13.0, 13.0, 7.0, 8.0, 6.0, 1.0, 13.0, 1.0, 2.0, 5.0, 3.0, 0.0, 3.0, 2.0, 0.0, 0.0, 0.0, 2.0], "bins": [-33.08102798461914, -32.02484130859375, -30.96865463256836, -29.9124698638916, -28.85628318786621, -27.80009651184082, -26.743911743164062, -25.687725067138672, -24.63153839111328, -23.57535171508789, -22.5191650390625, -21.462980270385742, -20.40679359436035, -19.35060691833496, -18.294422149658203, -17.238235473632812, -16.182048797607422, -15.125862121582031, -14.069676399230957, -13.013490676879883, -11.957304000854492, -10.901117324829102, -9.844931602478027, -8.788745880126953, -7.7325592041015625, -6.67637300491333, -5.620186805725098, -4.564000606536865, -3.507814407348633, -2.4516282081604004, -1.395442008972168, -0.33925580978393555, 0.7169342041015625, 1.773120403289795, 2.8293066024780273, 3.8854928016662598, 4.941679000854492, 5.997865200042725, 7.054051399230957, 8.110237121582031, 9.166423797607422, 10.222610473632812, 11.278796195983887, 12.334981918334961, 13.391168594360352, 14.447355270385742, 15.503540992736816, 16.55972671508789, 17.61591339111328, 18.672100067138672, 19.728286743164062, 20.78447151184082, 21.84065818786621, 22.8968448638916, 23.95302963256836, 25.00921630859375, 26.06540298461914, 27.12158966064453, 28.177776336669922, 29.23396110534668, 30.29014778137207, 31.34633445739746, 32.40251922607422, 33.45870590209961, 34.514892578125]}, "gradients/decoder.transformer.h.11.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 3.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 5.0, 5.0, 4.0, 8.0, 7.0, 10.0, 8.0, 14.0, 16.0, 15.0, 21.0, 38.0, 27.0, 22.0, 34.0, 29.0, 27.0, 37.0, 29.0, 52.0, 45.0, 49.0, 30.0, 43.0, 47.0, 36.0, 40.0, 38.0, 32.0, 38.0, 21.0, 22.0, 20.0, 21.0, 22.0, 19.0, 16.0, 18.0, 5.0, 10.0, 5.0, 6.0, 5.0, 1.0, 4.0, 4.0, 1.0, 2.0, 0.0, 2.0, 2.0, 1.0, 1.0, 1.0], "bins": [-5.14453125, -4.98516845703125, -4.8258056640625, -4.66644287109375, -4.507080078125, -4.34771728515625, -4.1883544921875, -4.02899169921875, -3.86962890625, -3.71026611328125, -3.5509033203125, -3.39154052734375, -3.232177734375, -3.07281494140625, -2.9134521484375, -2.75408935546875, -2.5947265625, -2.43536376953125, -2.2760009765625, -2.11663818359375, -1.957275390625, -1.79791259765625, -1.6385498046875, -1.47918701171875, -1.31982421875, -1.16046142578125, -1.0010986328125, -0.84173583984375, -0.682373046875, -0.52301025390625, -0.3636474609375, -0.20428466796875, -0.044921875, 0.11444091796875, 0.2738037109375, 0.43316650390625, 0.592529296875, 0.75189208984375, 0.9112548828125, 1.07061767578125, 1.22998046875, 1.38934326171875, 1.5487060546875, 1.70806884765625, 1.867431640625, 2.02679443359375, 2.1861572265625, 2.34552001953125, 2.5048828125, 2.66424560546875, 2.8236083984375, 2.98297119140625, 3.142333984375, 3.30169677734375, 3.4610595703125, 3.62042236328125, 3.77978515625, 3.93914794921875, 4.0985107421875, 4.25787353515625, 4.417236328125, 4.57659912109375, 4.7359619140625, 4.89532470703125, 5.0546875]}, "gradients/decoder.transformer.h.11.crossattention.c_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 5.0, 3.0, 0.0, 3.0, 12.0, 8.0, 16.0, 26.0, 37.0, 65.0, 101.0, 140.0, 199.0, 303.0, 452.0, 627.0, 1002.0, 1390.0, 2094.0, 3158.0, 4713.0, 7334.0, 11264.0, 17463.0, 27310.0, 43109.0, 66466.0, 98722.0, 136948.0, 162226.0, 146082.0, 108647.0, 73373.0, 48348.0, 30792.0, 19478.0, 12486.0, 8152.0, 5311.0, 3455.0, 2307.0, 1618.0, 1081.0, 706.0, 505.0, 362.0, 204.0, 141.0, 118.0, 73.0, 49.0, 27.0, 22.0, 16.0, 11.0, 7.0, 2.0, 2.0, 2.0, 1.0], "bins": [-0.7314453125, -0.7096939086914062, -0.6879425048828125, -0.6661911010742188, -0.644439697265625, -0.6226882934570312, -0.6009368896484375, -0.5791854858398438, -0.55743408203125, -0.5356826782226562, -0.5139312744140625, -0.49217987060546875, -0.470428466796875, -0.44867706298828125, -0.4269256591796875, -0.40517425537109375, -0.3834228515625, -0.36167144775390625, -0.3399200439453125, -0.31816864013671875, -0.296417236328125, -0.27466583251953125, -0.2529144287109375, -0.23116302490234375, -0.20941162109375, -0.18766021728515625, -0.1659088134765625, -0.14415740966796875, -0.122406005859375, -0.10065460205078125, -0.0789031982421875, -0.05715179443359375, -0.035400390625, -0.01364898681640625, 0.0081024169921875, 0.02985382080078125, 0.051605224609375, 0.07335662841796875, 0.0951080322265625, 0.11685943603515625, 0.13861083984375, 0.16036224365234375, 0.1821136474609375, 0.20386505126953125, 0.225616455078125, 0.24736785888671875, 0.2691192626953125, 0.29087066650390625, 0.3126220703125, 0.33437347412109375, 0.3561248779296875, 0.37787628173828125, 0.399627685546875, 0.42137908935546875, 0.4431304931640625, 0.46488189697265625, 0.48663330078125, 0.5083847045898438, 0.5301361083984375, 0.5518875122070312, 0.573638916015625, 0.5953903198242188, 0.6171417236328125, 0.6388931274414062, 0.66064453125]}, "gradients/decoder.transformer.h.11.crossattention.c_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 3.0, 2.0, 4.0, 3.0, 6.0, 4.0, 2.0, 2.0, 5.0, 8.0, 13.0, 16.0, 12.0, 24.0, 13.0, 22.0, 16.0, 33.0, 21.0, 25.0, 34.0, 33.0, 31.0, 33.0, 36.0, 29.0, 33.0, 45.0, 1064.0, 36.0, 27.0, 44.0, 30.0, 35.0, 33.0, 35.0, 33.0, 30.0, 14.0, 17.0, 20.0, 15.0, 16.0, 8.0, 13.0, 12.0, 9.0, 11.0, 7.0, 8.0, 2.0, 5.0, 1.0, 5.0, 2.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0], "bins": [-3.13671875, -3.036346435546875, -2.93597412109375, -2.835601806640625, -2.7352294921875, -2.634857177734375, -2.53448486328125, -2.434112548828125, -2.333740234375, -2.233367919921875, -2.13299560546875, -2.032623291015625, -1.9322509765625, -1.831878662109375, -1.73150634765625, -1.631134033203125, -1.53076171875, -1.430389404296875, -1.33001708984375, -1.229644775390625, -1.1292724609375, -1.028900146484375, -0.92852783203125, -0.828155517578125, -0.727783203125, -0.627410888671875, -0.52703857421875, -0.426666259765625, -0.3262939453125, -0.225921630859375, -0.12554931640625, -0.025177001953125, 0.0751953125, 0.175567626953125, 0.27593994140625, 0.376312255859375, 0.4766845703125, 0.577056884765625, 0.67742919921875, 0.777801513671875, 0.878173828125, 0.978546142578125, 1.07891845703125, 1.179290771484375, 1.2796630859375, 1.380035400390625, 1.48040771484375, 1.580780029296875, 1.68115234375, 1.781524658203125, 1.88189697265625, 1.982269287109375, 2.0826416015625, 2.183013916015625, 2.28338623046875, 2.383758544921875, 2.484130859375, 2.584503173828125, 2.68487548828125, 2.785247802734375, 2.8856201171875, 2.985992431640625, 3.08636474609375, 3.186737060546875, 3.287109375]}, "gradients/decoder.transformer.h.11.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 5.0, 1.0, 4.0, 5.0, 11.0, 10.0, 17.0, 31.0, 28.0, 47.0, 83.0, 124.0, 201.0, 317.0, 436.0, 741.0, 1091.0, 1746.0, 2615.0, 4074.0, 6364.0, 9828.0, 15004.0, 23350.0, 35615.0, 54510.0, 80967.0, 113787.0, 182959.0, 1156918.0, 127048.0, 93837.0, 64233.0, 42295.0, 27663.0, 17959.0, 11602.0, 7440.0, 4949.0, 3284.0, 2072.0, 1393.0, 830.0, 559.0, 385.0, 246.0, 165.0, 108.0, 62.0, 35.0, 34.0, 21.0, 15.0, 9.0, 7.0, 4.0, 4.0, 2.0], "bins": [-0.448486328125, -0.4357452392578125, -0.423004150390625, -0.4102630615234375, -0.39752197265625, -0.3847808837890625, -0.372039794921875, -0.3592987060546875, -0.3465576171875, -0.3338165283203125, -0.321075439453125, -0.3083343505859375, -0.29559326171875, -0.2828521728515625, -0.270111083984375, -0.2573699951171875, -0.24462890625, -0.2318878173828125, -0.219146728515625, -0.2064056396484375, -0.19366455078125, -0.1809234619140625, -0.168182373046875, -0.1554412841796875, -0.1427001953125, -0.1299591064453125, -0.117218017578125, -0.1044769287109375, -0.09173583984375, -0.0789947509765625, -0.066253662109375, -0.0535125732421875, -0.040771484375, -0.0280303955078125, -0.015289306640625, -0.0025482177734375, 0.01019287109375, 0.0229339599609375, 0.035675048828125, 0.0484161376953125, 0.0611572265625, 0.0738983154296875, 0.086639404296875, 0.0993804931640625, 0.11212158203125, 0.1248626708984375, 0.137603759765625, 0.1503448486328125, 0.1630859375, 0.1758270263671875, 0.188568115234375, 0.2013092041015625, 0.21405029296875, 0.2267913818359375, 0.239532470703125, 0.2522735595703125, 0.2650146484375, 0.2777557373046875, 0.290496826171875, 0.3032379150390625, 0.31597900390625, 0.3287200927734375, 0.341461181640625, 0.3542022705078125, 0.366943359375]}, "gradients/decoder.transformer.h.11.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 2.0, 2.0, 2.0, 3.0, 4.0, 7.0, 5.0, 9.0, 12.0, 12.0, 15.0, 21.0, 30.0, 54.0, 65.0, 83.0, 90.0, 139.0, 91.0, 80.0, 63.0, 65.0, 45.0, 39.0, 19.0, 9.0, 13.0, 12.0, 7.0, 1.0, 3.0, 3.0, 4.0, 0.0, 2.0, 3.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.02984619140625, -0.02893662452697754, -0.028027057647705078, -0.027117490768432617, -0.026207923889160156, -0.025298357009887695, -0.024388790130615234, -0.023479223251342773, -0.022569656372070312, -0.02166008949279785, -0.02075052261352539, -0.01984095573425293, -0.01893138885498047, -0.018021821975708008, -0.017112255096435547, -0.016202688217163086, -0.015293121337890625, -0.014383554458618164, -0.013473987579345703, -0.012564420700073242, -0.011654853820800781, -0.01074528694152832, -0.00983572006225586, -0.008926153182983398, -0.008016586303710938, -0.0071070194244384766, -0.006197452545166016, -0.005287885665893555, -0.004378318786621094, -0.003468751907348633, -0.002559185028076172, -0.001649618148803711, -0.00074005126953125, 0.00016951560974121094, 0.0010790824890136719, 0.001988649368286133, 0.0028982162475585938, 0.0038077831268310547, 0.004717350006103516, 0.0056269168853759766, 0.0065364837646484375, 0.0074460506439208984, 0.00835561752319336, 0.00926518440246582, 0.010174751281738281, 0.011084318161010742, 0.011993885040283203, 0.012903451919555664, 0.013813018798828125, 0.014722585678100586, 0.015632152557373047, 0.016541719436645508, 0.01745128631591797, 0.01836085319519043, 0.01927042007446289, 0.02017998695373535, 0.021089553833007812, 0.021999120712280273, 0.022908687591552734, 0.023818254470825195, 0.024727821350097656, 0.025637388229370117, 0.026546955108642578, 0.02745652198791504, 0.0283660888671875]}, "gradients/decoder.transformer.h.11.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 2.0, 4.0, 0.0, 2.0, 4.0, 2.0, 5.0, 4.0, 10.0, 14.0, 18.0, 21.0, 28.0, 62.0, 81.0, 147.0, 232.0, 555.0, 18333.0, 1027104.0, 1123.0, 356.0, 154.0, 92.0, 68.0, 36.0, 30.0, 18.0, 10.0, 16.0, 10.0, 10.0, 3.0, 3.0, 3.0, 2.0, 1.0, 2.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.54638671875, -0.5288848876953125, -0.511383056640625, -0.4938812255859375, -0.47637939453125, -0.4588775634765625, -0.441375732421875, -0.4238739013671875, -0.4063720703125, -0.3888702392578125, -0.371368408203125, -0.3538665771484375, -0.33636474609375, -0.3188629150390625, -0.301361083984375, -0.2838592529296875, -0.266357421875, -0.2488555908203125, -0.231353759765625, -0.2138519287109375, -0.19635009765625, -0.1788482666015625, -0.161346435546875, -0.1438446044921875, -0.1263427734375, -0.1088409423828125, -0.091339111328125, -0.0738372802734375, -0.05633544921875, -0.0388336181640625, -0.021331787109375, -0.0038299560546875, 0.013671875, 0.0311737060546875, 0.048675537109375, 0.0661773681640625, 0.08367919921875, 0.1011810302734375, 0.118682861328125, 0.1361846923828125, 0.1536865234375, 0.1711883544921875, 0.188690185546875, 0.2061920166015625, 0.22369384765625, 0.2411956787109375, 0.258697509765625, 0.2761993408203125, 0.293701171875, 0.3112030029296875, 0.328704833984375, 0.3462066650390625, 0.36370849609375, 0.3812103271484375, 0.398712158203125, 0.4162139892578125, 0.4337158203125, 0.4512176513671875, 0.468719482421875, 0.4862213134765625, 0.50372314453125, 0.5212249755859375, 0.538726806640625, 0.5562286376953125, 0.57373046875]}, "gradients/decoder.transformer.h.11.ln_cross_attn.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 8.0, 143.0, 851.0, 15.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.13320964574813843, -0.12697333097457886, -0.12073702365159988, -0.11450071632862091, -0.10826440900564194, -0.10202810168266296, -0.0957917869091034, -0.08955547958612442, -0.08331917226314545, -0.07708286494016647, -0.0708465501666069, -0.06461024284362793, -0.058373935520648956, -0.052137624472379684, -0.04590131342411041, -0.03966500610113144, -0.03342869132757187, -0.027192382141947746, -0.020956072956323624, -0.014719761908054352, -0.00848345272243023, -0.0022471435368061066, 0.003989167511463165, 0.010225474834442139, 0.01646178588271141, 0.022698095068335533, 0.028934404253959656, 0.03517071530222893, 0.0414070263504982, 0.04764333367347717, 0.053879644721746445, 0.06011595204472542, 0.06635226309299469, 0.07258857041597366, 0.07882488518953323, 0.08506119251251221, 0.09129749983549118, 0.09753380715847015, 0.10377012193202972, 0.1100064292550087, 0.11624273657798767, 0.12247904390096664, 0.12871535122394562, 0.1349516659975052, 0.14118798077106476, 0.14742428064346313, 0.1536605954170227, 0.15989691019058228, 0.16613322496414185, 0.17236953973770142, 0.1786058396100998, 0.18484215438365936, 0.19107846915721893, 0.1973147690296173, 0.20355108380317688, 0.20978739857673645, 0.21602369844913483, 0.2222600132226944, 0.22849631309509277, 0.23473262786865234, 0.24096894264221191, 0.2472052425146103, 0.25344157218933105, 0.25967785716056824, 0.2659141719341278]}, "gradients/decoder.transformer.h.11.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 2.0, 4.0, 1.0, 5.0, 6.0, 5.0, 13.0, 16.0, 10.0, 8.0, 10.0, 19.0, 17.0, 15.0, 24.0, 30.0, 41.0, 33.0, 25.0, 35.0, 38.0, 44.0, 49.0, 33.0, 45.0, 32.0, 42.0, 45.0, 45.0, 33.0, 34.0, 37.0, 32.0, 27.0, 20.0, 15.0, 17.0, 18.0, 16.0, 13.0, 12.0, 13.0, 7.0, 9.0, 4.0, 7.0, 4.0, 1.0, 2.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 2.0], "bins": [-0.026247084140777588, -0.02541860193014145, -0.02459011971950531, -0.02376163750886917, -0.022933155298233032, -0.022104673087596893, -0.021276190876960754, -0.020447708666324615, -0.019619226455688477, -0.018790744245052338, -0.0179622620344162, -0.01713377982378006, -0.01630529761314392, -0.015476815402507782, -0.014648333191871643, -0.013819850981235504, -0.012991368770599365, -0.012162886559963226, -0.011334404349327087, -0.010505922138690948, -0.00967743992805481, -0.00884895771741867, -0.008020475506782532, -0.007191993296146393, -0.006363511085510254, -0.005535028874874115, -0.004706546664237976, -0.003878064453601837, -0.0030495822429656982, -0.0022211000323295593, -0.0013926178216934204, -0.0005641356110572815, 0.0002643465995788574, 0.0010928288102149963, 0.0019213110208511353, 0.002749793231487274, 0.003578275442123413, 0.004406757652759552, 0.005235239863395691, 0.00606372207403183, 0.006892204284667969, 0.007720686495304108, 0.008549168705940247, 0.009377650916576385, 0.010206133127212524, 0.011034615337848663, 0.011863097548484802, 0.012691579759120941, 0.01352006196975708, 0.014348544180393219, 0.015177026391029358, 0.016005508601665497, 0.016833990812301636, 0.017662473022937775, 0.018490955233573914, 0.019319437444210052, 0.02014791965484619, 0.02097640186548233, 0.02180488407611847, 0.022633366286754608, 0.023461848497390747, 0.024290330708026886, 0.025118812918663025, 0.025947295129299164, 0.026775777339935303]}, "gradients/decoder.transformer.h.11.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 3.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 5.0, 5.0, 4.0, 8.0, 7.0, 10.0, 8.0, 14.0, 16.0, 15.0, 21.0, 37.0, 29.0, 21.0, 34.0, 29.0, 27.0, 37.0, 28.0, 53.0, 45.0, 49.0, 30.0, 43.0, 48.0, 34.0, 41.0, 39.0, 31.0, 38.0, 21.0, 22.0, 20.0, 21.0, 22.0, 18.0, 17.0, 18.0, 5.0, 10.0, 5.0, 6.0, 5.0, 1.0, 4.0, 4.0, 1.0, 2.0, 0.0, 2.0, 2.0, 1.0, 1.0, 1.0], "bins": [-5.14453125, -4.98516845703125, -4.8258056640625, -4.66644287109375, -4.507080078125, -4.34771728515625, -4.1883544921875, -4.02899169921875, -3.86962890625, -3.71026611328125, -3.5509033203125, -3.39154052734375, -3.232177734375, -3.07281494140625, -2.9134521484375, -2.75408935546875, -2.5947265625, -2.43536376953125, -2.2760009765625, -2.11663818359375, -1.957275390625, -1.79791259765625, -1.6385498046875, -1.47918701171875, -1.31982421875, -1.16046142578125, -1.0010986328125, -0.84173583984375, -0.682373046875, -0.52301025390625, -0.3636474609375, -0.20428466796875, -0.044921875, 0.11444091796875, 0.2738037109375, 0.43316650390625, 0.592529296875, 0.75189208984375, 0.9112548828125, 1.07061767578125, 1.22998046875, 1.38934326171875, 1.5487060546875, 1.70806884765625, 1.867431640625, 2.02679443359375, 2.1861572265625, 2.34552001953125, 2.5048828125, 2.66424560546875, 2.8236083984375, 2.98297119140625, 3.142333984375, 3.30169677734375, 3.4610595703125, 3.62042236328125, 3.77978515625, 3.93914794921875, 4.0985107421875, 4.25787353515625, 4.417236328125, 4.57659912109375, 4.7359619140625, 4.89532470703125, 5.0546875]}, "gradients/decoder.transformer.h.11.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 1.0, 7.0, 5.0, 4.0, 5.0, 5.0, 8.0, 12.0, 21.0, 30.0, 19.0, 39.0, 58.0, 84.0, 140.0, 248.0, 427.0, 692.0, 1357.0, 2244.0, 3888.0, 6784.0, 11970.0, 20995.0, 41175.0, 92329.0, 217934.0, 327716.0, 172828.0, 72374.0, 33395.0, 17749.0, 10085.0, 5820.0, 3435.0, 1938.0, 1075.0, 627.0, 388.0, 244.0, 148.0, 79.0, 47.0, 36.0, 28.0, 19.0, 9.0, 14.0, 12.0, 8.0, 3.0, 4.0, 0.0, 3.0, 2.0, 2.0, 0.0, 0.0, 3.0, 1.0], "bins": [-4.6875, -4.53802490234375, -4.3885498046875, -4.23907470703125, -4.089599609375, -3.94012451171875, -3.7906494140625, -3.64117431640625, -3.49169921875, -3.34222412109375, -3.1927490234375, -3.04327392578125, -2.893798828125, -2.74432373046875, -2.5948486328125, -2.44537353515625, -2.2958984375, -2.14642333984375, -1.9969482421875, -1.84747314453125, -1.697998046875, -1.54852294921875, -1.3990478515625, -1.24957275390625, -1.10009765625, -0.95062255859375, -0.8011474609375, -0.65167236328125, -0.502197265625, -0.35272216796875, -0.2032470703125, -0.05377197265625, 0.095703125, 0.24517822265625, 0.3946533203125, 0.54412841796875, 0.693603515625, 0.84307861328125, 0.9925537109375, 1.14202880859375, 1.29150390625, 1.44097900390625, 1.5904541015625, 1.73992919921875, 1.889404296875, 2.03887939453125, 2.1883544921875, 2.33782958984375, 2.4873046875, 2.63677978515625, 2.7862548828125, 2.93572998046875, 3.085205078125, 3.23468017578125, 3.3841552734375, 3.53363037109375, 3.68310546875, 3.83258056640625, 3.9820556640625, 4.13153076171875, 4.281005859375, 4.43048095703125, 4.5799560546875, 4.72943115234375, 4.87890625]}, "gradients/decoder.transformer.h.11.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0, 1.0, 2.0, 2.0, 4.0, 4.0, 6.0, 5.0, 10.0, 12.0, 12.0, 14.0, 13.0, 19.0, 32.0, 27.0, 36.0, 23.0, 30.0, 38.0, 50.0, 64.0, 79.0, 119.0, 255.0, 1416.0, 218.0, 119.0, 74.0, 48.0, 46.0, 41.0, 28.0, 33.0, 39.0, 28.0, 20.0, 24.0, 12.0, 12.0, 16.0, 7.0, 6.0, 5.0, 4.0, 2.0, 2.0, 3.0, 2.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-18.125, -17.56591796875, -17.0068359375, -16.44775390625, -15.888671875, -15.32958984375, -14.7705078125, -14.21142578125, -13.65234375, -13.09326171875, -12.5341796875, -11.97509765625, -11.416015625, -10.85693359375, -10.2978515625, -9.73876953125, -9.1796875, -8.62060546875, -8.0615234375, -7.50244140625, -6.943359375, -6.38427734375, -5.8251953125, -5.26611328125, -4.70703125, -4.14794921875, -3.5888671875, -3.02978515625, -2.470703125, -1.91162109375, -1.3525390625, -0.79345703125, -0.234375, 0.32470703125, 0.8837890625, 1.44287109375, 2.001953125, 2.56103515625, 3.1201171875, 3.67919921875, 4.23828125, 4.79736328125, 5.3564453125, 5.91552734375, 6.474609375, 7.03369140625, 7.5927734375, 8.15185546875, 8.7109375, 9.27001953125, 9.8291015625, 10.38818359375, 10.947265625, 11.50634765625, 12.0654296875, 12.62451171875, 13.18359375, 13.74267578125, 14.3017578125, 14.86083984375, 15.419921875, 15.97900390625, 16.5380859375, 17.09716796875, 17.65625]}, "gradients/decoder.transformer.h.11.attn.c_attn.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 2.0, 1.0, 2.0, 2.0, 2.0, 2.0, 3.0, 6.0, 7.0, 3.0, 9.0, 5.0, 10.0, 15.0, 24.0, 32.0, 42.0, 38.0, 62.0, 70.0, 86.0, 112.0, 132.0, 177.0, 308.0, 571.0, 1810.0, 28399.0, 2916325.0, 190729.0, 4549.0, 811.0, 363.0, 253.0, 180.0, 136.0, 83.0, 84.0, 50.0, 50.0, 34.0, 28.0, 20.0, 21.0, 19.0, 11.0, 6.0, 8.0, 6.0, 6.0, 2.0, 5.0, 2.0, 2.0, 2.0, 1.0, 4.0, 2.0, 0.0, 0.0, 2.0], "bins": [-40.96875, -39.6767578125, -38.384765625, -37.0927734375, -35.80078125, -34.5087890625, -33.216796875, -31.9248046875, -30.6328125, -29.3408203125, -28.048828125, -26.7568359375, -25.46484375, -24.1728515625, -22.880859375, -21.5888671875, -20.296875, -19.0048828125, -17.712890625, -16.4208984375, -15.12890625, -13.8369140625, -12.544921875, -11.2529296875, -9.9609375, -8.6689453125, -7.376953125, -6.0849609375, -4.79296875, -3.5009765625, -2.208984375, -0.9169921875, 0.375, 1.6669921875, 2.958984375, 4.2509765625, 5.54296875, 6.8349609375, 8.126953125, 9.4189453125, 10.7109375, 12.0029296875, 13.294921875, 14.5869140625, 15.87890625, 17.1708984375, 18.462890625, 19.7548828125, 21.046875, 22.3388671875, 23.630859375, 24.9228515625, 26.21484375, 27.5068359375, 28.798828125, 30.0908203125, 31.3828125, 32.6748046875, 33.966796875, 35.2587890625, 36.55078125, 37.8427734375, 39.134765625, 40.4267578125, 41.71875]}, "gradients/decoder.transformer.h.11.ln_1.weight": {"_type": "histogram", "values": [1.0, 1.0, 108.0, 850.0, 54.0, 2.0, 3.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-27.984949111938477, -19.899105072021484, -11.813262939453125, -3.727418899536133, 4.358423233032227, 12.444265365600586, 20.53011131286621, 28.61595344543457, 36.70179748535156, 44.78763961791992, 52.87348175048828, 60.959327697753906, 69.045166015625, 77.13101196289062, 85.21685791015625, 93.30270385742188, 101.38854217529297, 109.4743881225586, 117.56022644042969, 125.64607238769531, 133.73191833496094, 141.8177490234375, 149.90359497070312, 157.98944091796875, 166.07528686523438, 174.1611328125, 182.24697875976562, 190.33282470703125, 198.4186553955078, 206.50450134277344, 214.59034729003906, 222.6761932373047, 230.76202392578125, 238.84786987304688, 246.9337158203125, 255.01956176757812, 263.10540771484375, 271.19122314453125, 279.277099609375, 287.3629150390625, 295.4487609863281, 303.53460693359375, 311.6204528808594, 319.706298828125, 327.7921447753906, 335.87799072265625, 343.96380615234375, 352.0496520996094, 360.135498046875, 368.2213439941406, 376.30718994140625, 384.3930358886719, 392.4788818359375, 400.564697265625, 408.65057373046875, 416.73638916015625, 424.822265625, 432.9081115722656, 440.99395751953125, 449.0798034667969, 457.1656494140625, 465.25146484375, 473.33734130859375, 481.42315673828125, 489.5090026855469]}, "gradients/decoder.transformer.h.11.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 2.0, 3.0, 3.0, 5.0, 11.0, 5.0, 5.0, 13.0, 15.0, 8.0, 6.0, 19.0, 27.0, 19.0, 29.0, 27.0, 26.0, 35.0, 38.0, 30.0, 32.0, 48.0, 45.0, 40.0, 41.0, 47.0, 43.0, 44.0, 30.0, 33.0, 38.0, 28.0, 33.0, 24.0, 33.0, 26.0, 19.0, 11.0, 15.0, 15.0, 10.0, 8.0, 9.0, 7.0, 1.0, 0.0, 4.0, 4.0, 0.0, 1.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-45.271949768066406, -43.83234786987305, -42.39274597167969, -40.953147888183594, -39.513545989990234, -38.073944091796875, -36.63434600830078, -35.19474411010742, -33.75514221191406, -32.3155403137207, -30.875940322875977, -29.43634033203125, -27.99673843383789, -26.55713653564453, -25.117536544799805, -23.677936553955078, -22.23833465576172, -20.79873275756836, -19.359132766723633, -17.919532775878906, -16.479930877685547, -15.040329933166504, -13.600728988647461, -12.161128044128418, -10.721527099609375, -9.281926155090332, -7.842325210571289, -6.402724266052246, -4.963123321533203, -3.52352237701416, -2.083921432495117, -0.6443204879760742, 0.7952804565429688, 2.2348814010620117, 3.6744823455810547, 5.114083290100098, 6.553684234619141, 7.993285179138184, 9.432886123657227, 10.87248706817627, 12.312088012695312, 13.751688957214355, 15.191289901733398, 16.630889892578125, 18.070491790771484, 19.510093688964844, 20.94969367980957, 22.389293670654297, 23.828895568847656, 25.268497467041016, 26.708097457885742, 28.14769744873047, 29.587299346923828, 31.026901245117188, 32.46649932861328, 33.90610122680664, 35.345703125, 36.78530502319336, 38.22490692138672, 39.66450500488281, 41.10410690307617, 42.54370880126953, 43.983306884765625, 45.422908782958984, 46.862510681152344]}, "gradients/decoder.transformer.h.10.mlp.c_proj.bias": {"_type": "histogram", "values": [3.0, 0.0, 1.0, 1.0, 1.0, 2.0, 4.0, 8.0, 5.0, 5.0, 7.0, 2.0, 7.0, 10.0, 13.0, 11.0, 10.0, 18.0, 20.0, 32.0, 25.0, 21.0, 36.0, 30.0, 32.0, 19.0, 39.0, 41.0, 42.0, 39.0, 42.0, 36.0, 38.0, 43.0, 27.0, 24.0, 42.0, 26.0, 28.0, 31.0, 26.0, 27.0, 25.0, 23.0, 24.0, 14.0, 9.0, 6.0, 8.0, 5.0, 11.0, 2.0, 2.0, 3.0, 6.0, 0.0, 4.0, 1.0, 1.0, 1.0, 2.0, 1.0, 0.0, 2.0], "bins": [-5.01953125, -4.85662841796875, -4.6937255859375, -4.53082275390625, -4.367919921875, -4.20501708984375, -4.0421142578125, -3.87921142578125, -3.71630859375, -3.55340576171875, -3.3905029296875, -3.22760009765625, -3.064697265625, -2.90179443359375, -2.7388916015625, -2.57598876953125, -2.4130859375, -2.25018310546875, -2.0872802734375, -1.92437744140625, -1.761474609375, -1.59857177734375, -1.4356689453125, -1.27276611328125, -1.10986328125, -0.94696044921875, -0.7840576171875, -0.62115478515625, -0.458251953125, -0.29534912109375, -0.1324462890625, 0.03045654296875, 0.193359375, 0.35626220703125, 0.5191650390625, 0.68206787109375, 0.844970703125, 1.00787353515625, 1.1707763671875, 1.33367919921875, 1.49658203125, 1.65948486328125, 1.8223876953125, 1.98529052734375, 2.148193359375, 2.31109619140625, 2.4739990234375, 2.63690185546875, 2.7998046875, 2.96270751953125, 3.1256103515625, 3.28851318359375, 3.451416015625, 3.61431884765625, 3.7772216796875, 3.94012451171875, 4.10302734375, 4.26593017578125, 4.4288330078125, 4.59173583984375, 4.754638671875, 4.91754150390625, 5.0804443359375, 5.24334716796875, 5.40625]}, "gradients/decoder.transformer.h.10.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 2.0, 1.0, 1.0, 5.0, 1.0, 3.0, 6.0, 9.0, 10.0, 13.0, 24.0, 25.0, 26.0, 34.0, 38.0, 63.0, 95.0, 131.0, 219.0, 351.0, 773.0, 2164.0, 7150.0, 35317.0, 235092.0, 1412589.0, 1984116.0, 436904.0, 62766.0, 11410.0, 2843.0, 904.0, 444.0, 248.0, 146.0, 99.0, 52.0, 46.0, 41.0, 29.0, 27.0, 14.0, 15.0, 16.0, 11.0, 5.0, 4.0, 2.0, 2.0, 5.0, 3.0, 1.0, 2.0, 1.0, 1.0, 0.0, 1.0], "bins": [-11.7578125, -11.4031982421875, -11.048583984375, -10.6939697265625, -10.33935546875, -9.9847412109375, -9.630126953125, -9.2755126953125, -8.9208984375, -8.5662841796875, -8.211669921875, -7.8570556640625, -7.50244140625, -7.1478271484375, -6.793212890625, -6.4385986328125, -6.083984375, -5.7293701171875, -5.374755859375, -5.0201416015625, -4.66552734375, -4.3109130859375, -3.956298828125, -3.6016845703125, -3.2470703125, -2.8924560546875, -2.537841796875, -2.1832275390625, -1.82861328125, -1.4739990234375, -1.119384765625, -0.7647705078125, -0.41015625, -0.0555419921875, 0.299072265625, 0.6536865234375, 1.00830078125, 1.3629150390625, 1.717529296875, 2.0721435546875, 2.4267578125, 2.7813720703125, 3.135986328125, 3.4906005859375, 3.84521484375, 4.1998291015625, 4.554443359375, 4.9090576171875, 5.263671875, 5.6182861328125, 5.972900390625, 6.3275146484375, 6.68212890625, 7.0367431640625, 7.391357421875, 7.7459716796875, 8.1005859375, 8.4552001953125, 8.809814453125, 9.1644287109375, 9.51904296875, 9.8736572265625, 10.228271484375, 10.5828857421875, 10.9375]}, "gradients/decoder.transformer.h.10.mlp.c_fc.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 2.0, 6.0, 2.0, 3.0, 7.0, 10.0, 16.0, 15.0, 20.0, 34.0, 34.0, 65.0, 107.0, 137.0, 176.0, 235.0, 353.0, 453.0, 488.0, 461.0, 381.0, 287.0, 205.0, 152.0, 108.0, 95.0, 62.0, 50.0, 42.0, 18.0, 16.0, 18.0, 7.0, 3.0, 8.0, 4.0, 2.0, 2.0, 1.0, 2.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-10.8046875, -10.4287109375, -10.052734375, -9.6767578125, -9.30078125, -8.9248046875, -8.548828125, -8.1728515625, -7.796875, -7.4208984375, -7.044921875, -6.6689453125, -6.29296875, -5.9169921875, -5.541015625, -5.1650390625, -4.7890625, -4.4130859375, -4.037109375, -3.6611328125, -3.28515625, -2.9091796875, -2.533203125, -2.1572265625, -1.78125, -1.4052734375, -1.029296875, -0.6533203125, -0.27734375, 0.0986328125, 0.474609375, 0.8505859375, 1.2265625, 1.6025390625, 1.978515625, 2.3544921875, 2.73046875, 3.1064453125, 3.482421875, 3.8583984375, 4.234375, 4.6103515625, 4.986328125, 5.3623046875, 5.73828125, 6.1142578125, 6.490234375, 6.8662109375, 7.2421875, 7.6181640625, 7.994140625, 8.3701171875, 8.74609375, 9.1220703125, 9.498046875, 9.8740234375, 10.25, 10.6259765625, 11.001953125, 11.3779296875, 11.75390625, 12.1298828125, 12.505859375, 12.8818359375, 13.2578125]}, "gradients/decoder.transformer.h.10.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 3.0, 4.0, 4.0, 4.0, 11.0, 17.0, 16.0, 23.0, 43.0, 51.0, 78.0, 81.0, 139.0, 163.0, 293.0, 518.0, 2842.0, 528621.0, 3642724.0, 16765.0, 828.0, 363.0, 226.0, 144.0, 106.0, 72.0, 36.0, 34.0, 25.0, 11.0, 15.0, 9.0, 10.0, 2.0, 1.0, 3.0, 4.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-56.78125, -55.12158203125, -53.4619140625, -51.80224609375, -50.142578125, -48.48291015625, -46.8232421875, -45.16357421875, -43.50390625, -41.84423828125, -40.1845703125, -38.52490234375, -36.865234375, -35.20556640625, -33.5458984375, -31.88623046875, -30.2265625, -28.56689453125, -26.9072265625, -25.24755859375, -23.587890625, -21.92822265625, -20.2685546875, -18.60888671875, -16.94921875, -15.28955078125, -13.6298828125, -11.97021484375, -10.310546875, -8.65087890625, -6.9912109375, -5.33154296875, -3.671875, -2.01220703125, -0.3525390625, 1.30712890625, 2.966796875, 4.62646484375, 6.2861328125, 7.94580078125, 9.60546875, 11.26513671875, 12.9248046875, 14.58447265625, 16.244140625, 17.90380859375, 19.5634765625, 21.22314453125, 22.8828125, 24.54248046875, 26.2021484375, 27.86181640625, 29.521484375, 31.18115234375, 32.8408203125, 34.50048828125, 36.16015625, 37.81982421875, 39.4794921875, 41.13916015625, 42.798828125, 44.45849609375, 46.1181640625, 47.77783203125, 49.4375]}, "gradients/decoder.transformer.h.10.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 3.0, 15.0, 66.0, 131.0, 249.0, 274.0, 176.0, 71.0, 24.0, 8.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-69.85260009765625, -65.97642517089844, -62.100250244140625, -58.22407531738281, -54.347900390625, -50.47172546386719, -46.595550537109375, -42.71937561035156, -38.84320068359375, -34.96702575683594, -31.090850830078125, -27.214675903320312, -23.3385009765625, -19.46232795715332, -15.586153030395508, -11.709978103637695, -7.833805084228516, -3.9576303958892822, -0.08145570755004883, 3.7947187423706055, 7.670893669128418, 11.547067642211914, 15.423242568969727, 19.29941749572754, 23.17559242248535, 27.051767349243164, 30.927942276000977, 34.804115295410156, 38.68029022216797, 42.55646514892578, 46.432640075683594, 50.308815002441406, 54.18498992919922, 58.06116485595703, 61.937339782714844, 65.81351470947266, 69.68968963623047, 73.56586456298828, 77.4420394897461, 81.3182144165039, 85.19438934326172, 89.07056427001953, 92.94673919677734, 96.82291412353516, 100.69908905029297, 104.57526397705078, 108.4514389038086, 112.3276138305664, 116.20378112792969, 120.0799560546875, 123.95613098144531, 127.83230590820312, 131.70848083496094, 135.58465576171875, 139.46083068847656, 143.33700561523438, 147.2131805419922, 151.08935546875, 154.9655303955078, 158.84170532226562, 162.71788024902344, 166.59405517578125, 170.47023010253906, 174.34640502929688, 178.2225799560547]}, "gradients/decoder.transformer.h.10.ln_2.bias": {"_type": "histogram", "values": [1.0, 4.0, 2.0, 3.0, 5.0, 3.0, 8.0, 7.0, 4.0, 9.0, 4.0, 11.0, 21.0, 17.0, 16.0, 17.0, 28.0, 30.0, 27.0, 35.0, 29.0, 25.0, 45.0, 42.0, 24.0, 37.0, 25.0, 32.0, 35.0, 38.0, 35.0, 42.0, 31.0, 31.0, 32.0, 34.0, 19.0, 25.0, 25.0, 16.0, 19.0, 15.0, 15.0, 7.0, 10.0, 10.0, 16.0, 8.0, 8.0, 10.0, 9.0, 5.0, 5.0, 2.0, 2.0, 1.0, 1.0, 2.0, 1.0, 0.0, 1.0, 0.0, 1.0, 2.0], "bins": [-28.55019187927246, -27.543489456176758, -26.536787033081055, -25.53008460998535, -24.52338218688965, -23.516679763793945, -22.50997543334961, -21.503273010253906, -20.496570587158203, -19.4898681640625, -18.483165740966797, -17.476463317871094, -16.46976089477539, -15.463058471679688, -14.456355094909668, -13.449652671813965, -12.442951202392578, -11.436248779296875, -10.429546356201172, -9.422843933105469, -8.416141510009766, -7.409438610076904, -6.402735710144043, -5.39603328704834, -4.389330863952637, -3.3826284408569336, -2.3759257793426514, -1.3692231178283691, -0.362520694732666, 0.6441817283630371, 1.6508846282958984, 2.6575870513916016, 3.6642913818359375, 4.670993804931641, 5.677696228027344, 6.684399127960205, 7.691101551055908, 8.697803497314453, 9.704506874084473, 10.711209297180176, 11.717911720275879, 12.724614143371582, 13.731316566467285, 14.738019943237305, 15.744722366333008, 16.75142478942871, 17.758127212524414, 18.764829635620117, 19.77153205871582, 20.778234481811523, 21.784936904907227, 22.79163932800293, 23.798341751098633, 24.805044174194336, 25.811748504638672, 26.818450927734375, 27.825153350830078, 28.83185577392578, 29.838558197021484, 30.845260620117188, 31.85196304321289, 32.858665466308594, 33.8653678894043, 34.8720703125, 35.8787727355957]}, "gradients/decoder.transformer.h.10.crossattention.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 3.0, 0.0, 5.0, 8.0, 6.0, 5.0, 4.0, 7.0, 7.0, 16.0, 10.0, 17.0, 19.0, 23.0, 22.0, 20.0, 32.0, 44.0, 38.0, 45.0, 35.0, 42.0, 54.0, 42.0, 44.0, 38.0, 40.0, 38.0, 33.0, 33.0, 35.0, 42.0, 23.0, 33.0, 28.0, 25.0, 12.0, 20.0, 17.0, 13.0, 7.0, 8.0, 6.0, 6.0, 2.0, 3.0, 1.0, 1.0, 1.0, 2.0, 2.0, 0.0, 1.0, 0.0, 1.0], "bins": [-5.78515625, -5.611328125, -5.4375, -5.263671875, -5.08984375, -4.916015625, -4.7421875, -4.568359375, -4.39453125, -4.220703125, -4.046875, -3.873046875, -3.69921875, -3.525390625, -3.3515625, -3.177734375, -3.00390625, -2.830078125, -2.65625, -2.482421875, -2.30859375, -2.134765625, -1.9609375, -1.787109375, -1.61328125, -1.439453125, -1.265625, -1.091796875, -0.91796875, -0.744140625, -0.5703125, -0.396484375, -0.22265625, -0.048828125, 0.125, 0.298828125, 0.47265625, 0.646484375, 0.8203125, 0.994140625, 1.16796875, 1.341796875, 1.515625, 1.689453125, 1.86328125, 2.037109375, 2.2109375, 2.384765625, 2.55859375, 2.732421875, 2.90625, 3.080078125, 3.25390625, 3.427734375, 3.6015625, 3.775390625, 3.94921875, 4.123046875, 4.296875, 4.470703125, 4.64453125, 4.818359375, 4.9921875, 5.166015625, 5.33984375]}, "gradients/decoder.transformer.h.10.crossattention.c_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 2.0, 2.0, 3.0, 2.0, 6.0, 3.0, 14.0, 17.0, 26.0, 38.0, 81.0, 100.0, 156.0, 207.0, 300.0, 450.0, 692.0, 1019.0, 1541.0, 2344.0, 3720.0, 5435.0, 8293.0, 12787.0, 19423.0, 29926.0, 45152.0, 67470.0, 96952.0, 129104.0, 149607.0, 138445.0, 107482.0, 76335.0, 52076.0, 34131.0, 22553.0, 14642.0, 9583.0, 6209.0, 4179.0, 2673.0, 1803.0, 1210.0, 827.0, 521.0, 348.0, 241.0, 140.0, 104.0, 55.0, 47.0, 39.0, 19.0, 18.0, 4.0, 10.0, 4.0, 3.0, 0.0, 0.0, 1.0], "bins": [-0.65966796875, -0.6394729614257812, -0.6192779541015625, -0.5990829467773438, -0.578887939453125, -0.5586929321289062, -0.5384979248046875, -0.5183029174804688, -0.49810791015625, -0.47791290283203125, -0.4577178955078125, -0.43752288818359375, -0.417327880859375, -0.39713287353515625, -0.3769378662109375, -0.35674285888671875, -0.3365478515625, -0.31635284423828125, -0.2961578369140625, -0.27596282958984375, -0.255767822265625, -0.23557281494140625, -0.2153778076171875, -0.19518280029296875, -0.17498779296875, -0.15479278564453125, -0.1345977783203125, -0.11440277099609375, -0.094207763671875, -0.07401275634765625, -0.0538177490234375, -0.03362274169921875, -0.013427734375, 0.00676727294921875, 0.0269622802734375, 0.04715728759765625, 0.067352294921875, 0.08754730224609375, 0.1077423095703125, 0.12793731689453125, 0.14813232421875, 0.16832733154296875, 0.1885223388671875, 0.20871734619140625, 0.228912353515625, 0.24910736083984375, 0.2693023681640625, 0.28949737548828125, 0.3096923828125, 0.32988739013671875, 0.3500823974609375, 0.37027740478515625, 0.390472412109375, 0.41066741943359375, 0.4308624267578125, 0.45105743408203125, 0.47125244140625, 0.49144744873046875, 0.5116424560546875, 0.5318374633789062, 0.552032470703125, 0.5722274780273438, 0.5924224853515625, 0.6126174926757812, 0.6328125]}, "gradients/decoder.transformer.h.10.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0, 3.0, 1.0, 0.0, 1.0, 2.0, 3.0, 3.0, 12.0, 5.0, 4.0, 13.0, 14.0, 13.0, 18.0, 20.0, 22.0, 23.0, 27.0, 27.0, 30.0, 35.0, 39.0, 47.0, 32.0, 34.0, 29.0, 46.0, 1061.0, 54.0, 27.0, 46.0, 33.0, 34.0, 31.0, 24.0, 35.0, 29.0, 22.0, 18.0, 21.0, 14.0, 18.0, 15.0, 9.0, 9.0, 6.0, 6.0, 5.0, 9.0, 3.0, 3.0, 1.0, 3.0, 0.0, 3.0, 0.0, 1.0], "bins": [-3.69921875, -3.590606689453125, -3.48199462890625, -3.373382568359375, -3.2647705078125, -3.156158447265625, -3.04754638671875, -2.938934326171875, -2.830322265625, -2.721710205078125, -2.61309814453125, -2.504486083984375, -2.3958740234375, -2.287261962890625, -2.17864990234375, -2.070037841796875, -1.96142578125, -1.852813720703125, -1.74420166015625, -1.635589599609375, -1.5269775390625, -1.418365478515625, -1.30975341796875, -1.201141357421875, -1.092529296875, -0.983917236328125, -0.87530517578125, -0.766693115234375, -0.6580810546875, -0.549468994140625, -0.44085693359375, -0.332244873046875, -0.2236328125, -0.115020751953125, -0.00640869140625, 0.102203369140625, 0.2108154296875, 0.319427490234375, 0.42803955078125, 0.536651611328125, 0.645263671875, 0.753875732421875, 0.86248779296875, 0.971099853515625, 1.0797119140625, 1.188323974609375, 1.29693603515625, 1.405548095703125, 1.51416015625, 1.622772216796875, 1.73138427734375, 1.839996337890625, 1.9486083984375, 2.057220458984375, 2.16583251953125, 2.274444580078125, 2.383056640625, 2.491668701171875, 2.60028076171875, 2.708892822265625, 2.8175048828125, 2.926116943359375, 3.03472900390625, 3.143341064453125, 3.251953125]}, "gradients/decoder.transformer.h.10.crossattention.c_attn.weight": {"_type": "histogram", "values": [3.0, 3.0, 5.0, 6.0, 8.0, 11.0, 7.0, 18.0, 21.0, 36.0, 64.0, 95.0, 118.0, 180.0, 257.0, 389.0, 554.0, 933.0, 1352.0, 2076.0, 3187.0, 5107.0, 8116.0, 12542.0, 20106.0, 31332.0, 48200.0, 72733.0, 104638.0, 137556.0, 1199167.0, 137082.0, 104820.0, 72300.0, 48024.0, 31155.0, 19636.0, 12524.0, 8002.0, 5199.0, 3270.0, 2145.0, 1420.0, 874.0, 629.0, 416.0, 266.0, 189.0, 110.0, 80.0, 51.0, 37.0, 25.0, 24.0, 17.0, 13.0, 7.0, 4.0, 4.0, 3.0, 2.0, 1.0, 1.0, 2.0], "bins": [-0.40234375, -0.3891487121582031, -0.37595367431640625, -0.3627586364746094, -0.3495635986328125, -0.3363685607910156, -0.32317352294921875, -0.3099784851074219, -0.296783447265625, -0.2835884094238281, -0.27039337158203125, -0.2571983337402344, -0.2440032958984375, -0.23080825805664062, -0.21761322021484375, -0.20441818237304688, -0.19122314453125, -0.17802810668945312, -0.16483306884765625, -0.15163803100585938, -0.1384429931640625, -0.12524795532226562, -0.11205291748046875, -0.09885787963867188, -0.085662841796875, -0.07246780395507812, -0.05927276611328125, -0.046077728271484375, -0.0328826904296875, -0.019687652587890625, -0.00649261474609375, 0.006702423095703125, 0.0198974609375, 0.033092498779296875, 0.04628753662109375, 0.059482574462890625, 0.0726776123046875, 0.08587265014648438, 0.09906768798828125, 0.11226272583007812, 0.125457763671875, 0.13865280151367188, 0.15184783935546875, 0.16504287719726562, 0.1782379150390625, 0.19143295288085938, 0.20462799072265625, 0.21782302856445312, 0.23101806640625, 0.24421310424804688, 0.25740814208984375, 0.2706031799316406, 0.2837982177734375, 0.2969932556152344, 0.31018829345703125, 0.3233833312988281, 0.336578369140625, 0.3497734069824219, 0.36296844482421875, 0.3761634826660156, 0.3893585205078125, 0.4025535583496094, 0.41574859619140625, 0.4289436340332031, 0.442138671875]}, "gradients/decoder.transformer.h.10.crossattention.q_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0, 2.0, 3.0, 2.0, 3.0, 4.0, 4.0, 3.0, 5.0, 9.0, 8.0, 8.0, 15.0, 20.0, 26.0, 23.0, 42.0, 41.0, 52.0, 75.0, 71.0, 81.0, 81.0, 90.0, 73.0, 42.0, 48.0, 40.0, 30.0, 14.0, 17.0, 11.0, 14.0, 8.0, 7.0, 9.0, 5.0, 6.0, 4.0, 6.0, 4.0, 3.0, 0.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0], "bins": [-0.0121612548828125, -0.011792302131652832, -0.011423349380493164, -0.011054396629333496, -0.010685443878173828, -0.01031649112701416, -0.009947538375854492, -0.009578585624694824, -0.009209632873535156, -0.008840680122375488, -0.00847172737121582, -0.008102774620056152, -0.007733821868896484, -0.007364869117736816, -0.0069959163665771484, -0.0066269636154174805, -0.0062580108642578125, -0.0058890581130981445, -0.0055201053619384766, -0.005151152610778809, -0.004782199859619141, -0.004413247108459473, -0.004044294357299805, -0.0036753416061401367, -0.0033063888549804688, -0.0029374361038208008, -0.002568483352661133, -0.002199530601501465, -0.0018305778503417969, -0.001461625099182129, -0.001092672348022461, -0.000723719596862793, -0.000354766845703125, 1.4185905456542969e-05, 0.00038313865661621094, 0.0007520914077758789, 0.0011210441589355469, 0.0014899969100952148, 0.0018589496612548828, 0.0022279024124145508, 0.0025968551635742188, 0.0029658079147338867, 0.0033347606658935547, 0.0037037134170532227, 0.004072666168212891, 0.004441618919372559, 0.0048105716705322266, 0.0051795244216918945, 0.0055484771728515625, 0.0059174299240112305, 0.0062863826751708984, 0.006655335426330566, 0.007024288177490234, 0.007393240928649902, 0.00776219367980957, 0.008131146430969238, 0.008500099182128906, 0.008869051933288574, 0.009238004684448242, 0.00960695743560791, 0.009975910186767578, 0.010344862937927246, 0.010713815689086914, 0.011082768440246582, 0.01145172119140625]}, "gradients/decoder.transformer.h.10.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 3.0, 6.0, 7.0, 5.0, 6.0, 7.0, 17.0, 18.0, 13.0, 24.0, 31.0, 37.0, 66.0, 74.0, 125.0, 225.0, 539.0, 16670.0, 1025309.0, 4354.0, 436.0, 188.0, 113.0, 72.0, 64.0, 41.0, 29.0, 20.0, 12.0, 11.0, 9.0, 6.0, 5.0, 4.0, 7.0, 2.0, 2.0, 4.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.24658203125, -0.2387237548828125, -0.230865478515625, -0.2230072021484375, -0.21514892578125, -0.2072906494140625, -0.199432373046875, -0.1915740966796875, -0.1837158203125, -0.1758575439453125, -0.167999267578125, -0.1601409912109375, -0.15228271484375, -0.1444244384765625, -0.136566162109375, -0.1287078857421875, -0.120849609375, -0.1129913330078125, -0.105133056640625, -0.0972747802734375, -0.08941650390625, -0.0815582275390625, -0.073699951171875, -0.0658416748046875, -0.0579833984375, -0.0501251220703125, -0.042266845703125, -0.0344085693359375, -0.02655029296875, -0.0186920166015625, -0.010833740234375, -0.0029754638671875, 0.0048828125, 0.0127410888671875, 0.020599365234375, 0.0284576416015625, 0.03631591796875, 0.0441741943359375, 0.052032470703125, 0.0598907470703125, 0.0677490234375, 0.0756072998046875, 0.083465576171875, 0.0913238525390625, 0.09918212890625, 0.1070404052734375, 0.114898681640625, 0.1227569580078125, 0.130615234375, 0.1384735107421875, 0.146331787109375, 0.1541900634765625, 0.16204833984375, 0.1699066162109375, 0.177764892578125, 0.1856231689453125, 0.1934814453125, 0.2013397216796875, 0.209197998046875, 0.2170562744140625, 0.22491455078125, 0.2327728271484375, 0.240631103515625, 0.2484893798828125, 0.25634765625]}, "gradients/decoder.transformer.h.10.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 8.0, 74.0, 904.0, 32.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.11567068845033646, -0.11130581051111221, -0.10694093257188797, -0.10257605463266373, -0.09821117669343948, -0.09384629875421524, -0.0894814133644104, -0.08511653542518616, -0.08075165748596191, -0.07638677954673767, -0.07202190160751343, -0.06765702366828918, -0.06329214572906494, -0.0589272677898407, -0.05456238612532616, -0.05019750818610191, -0.04583263397216797, -0.041467756032943726, -0.03710287809371948, -0.03273800015449524, -0.028373120352625847, -0.024008242413401604, -0.01964336261153221, -0.015278484672307968, -0.010913606733083725, -0.0065487283281981945, -0.002183849923312664, 0.0021810289472341537, 0.006545906886458397, 0.01091078482568264, 0.015275664627552032, 0.019640542566776276, 0.024005427956581116, 0.02837030589580536, 0.0327351838350296, 0.037100061774253845, 0.04146493971347809, 0.04582981765270233, 0.05019469931721687, 0.054559577256441116, 0.05892445519566536, 0.0632893368601799, 0.06765421479940414, 0.07201909273862839, 0.07638397067785263, 0.08074884861707687, 0.08511372655630112, 0.08947860449552536, 0.0938434824347496, 0.09820836037397385, 0.10257323831319809, 0.10693811625242233, 0.11130299419164658, 0.11566787213087082, 0.12003275752067566, 0.1243976354598999, 0.12876251339912415, 0.1331273913383484, 0.13749226927757263, 0.14185714721679688, 0.14622202515602112, 0.15058690309524536, 0.1549517810344696, 0.15931665897369385, 0.1636815369129181]}, "gradients/decoder.transformer.h.10.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 4.0, 1.0, 2.0, 2.0, 1.0, 6.0, 8.0, 5.0, 1.0, 10.0, 11.0, 14.0, 19.0, 20.0, 14.0, 28.0, 22.0, 35.0, 32.0, 40.0, 38.0, 33.0, 32.0, 39.0, 39.0, 42.0, 51.0, 43.0, 32.0, 43.0, 45.0, 30.0, 29.0, 27.0, 35.0, 30.0, 28.0, 24.0, 15.0, 20.0, 14.0, 8.0, 9.0, 8.0, 8.0, 6.0, 4.0, 4.0, 3.0, 2.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 2.0], "bins": [-0.01267462968826294, -0.01226948481053114, -0.01186433993279934, -0.01145919505506754, -0.01105405017733574, -0.010648905299603939, -0.010243760421872139, -0.009838615544140339, -0.009433470666408539, -0.009028325788676739, -0.008623180910944939, -0.008218036033213139, -0.007812891155481339, -0.007407746277749538, -0.007002601400017738, -0.006597456522285938, -0.006192311644554138, -0.005787166766822338, -0.005382021889090538, -0.004976877011358738, -0.004571732133626938, -0.004166587255895138, -0.0037614423781633377, -0.0033562975004315376, -0.0029511526226997375, -0.0025460077449679375, -0.0021408628672361374, -0.0017357179895043373, -0.0013305731117725372, -0.0009254282340407372, -0.0005202833563089371, -0.000115138478577137, 0.0002900063991546631, 0.0006951512768864632, 0.0011002961546182632, 0.0015054410323500633, 0.0019105859100818634, 0.0023157307878136635, 0.0027208756655454636, 0.0031260205432772636, 0.0035311654210090637, 0.003936310298740864, 0.004341455176472664, 0.004746600054204464, 0.005151744931936264, 0.005556889809668064, 0.005962034687399864, 0.006367179565131664, 0.006772324442863464, 0.0071774693205952644, 0.0075826141983270645, 0.007987759076058865, 0.008392903953790665, 0.008798048831522465, 0.009203193709254265, 0.009608338586986065, 0.010013483464717865, 0.010418628342449665, 0.010823773220181465, 0.011228918097913265, 0.011634062975645065, 0.012039207853376865, 0.012444352731108665, 0.012849497608840466, 0.013254642486572266]}, "gradients/decoder.transformer.h.10.attn.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 3.0, 0.0, 5.0, 8.0, 6.0, 5.0, 4.0, 7.0, 7.0, 16.0, 10.0, 17.0, 19.0, 23.0, 22.0, 20.0, 33.0, 43.0, 38.0, 45.0, 35.0, 42.0, 55.0, 41.0, 44.0, 38.0, 40.0, 38.0, 33.0, 33.0, 35.0, 42.0, 23.0, 33.0, 28.0, 25.0, 12.0, 20.0, 17.0, 13.0, 7.0, 8.0, 6.0, 6.0, 2.0, 3.0, 1.0, 1.0, 1.0, 2.0, 2.0, 0.0, 1.0, 0.0, 1.0], "bins": [-5.78515625, -5.611328125, -5.4375, -5.263671875, -5.08984375, -4.916015625, -4.7421875, -4.568359375, -4.39453125, -4.220703125, -4.046875, -3.873046875, -3.69921875, -3.525390625, -3.3515625, -3.177734375, -3.00390625, -2.830078125, -2.65625, -2.482421875, -2.30859375, -2.134765625, -1.9609375, -1.787109375, -1.61328125, -1.439453125, -1.265625, -1.091796875, -0.91796875, -0.744140625, -0.5703125, -0.396484375, -0.22265625, -0.048828125, 0.125, 0.298828125, 0.47265625, 0.646484375, 0.8203125, 0.994140625, 1.16796875, 1.341796875, 1.515625, 1.689453125, 1.86328125, 2.037109375, 2.2109375, 2.384765625, 2.55859375, 2.732421875, 2.90625, 3.080078125, 3.25390625, 3.427734375, 3.6015625, 3.775390625, 3.94921875, 4.123046875, 4.296875, 4.470703125, 4.64453125, 4.818359375, 4.9921875, 5.166015625, 5.33984375]}, "gradients/decoder.transformer.h.10.attn.c_proj.weight": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 3.0, 3.0, 4.0, 8.0, 5.0, 13.0, 8.0, 9.0, 19.0, 25.0, 21.0, 42.0, 62.0, 79.0, 112.0, 189.0, 314.0, 534.0, 991.0, 1870.0, 3768.0, 7414.0, 14865.0, 29664.0, 62644.0, 132314.0, 278981.0, 268093.0, 128014.0, 60310.0, 28742.0, 14302.0, 7177.0, 3631.0, 1897.0, 949.0, 570.0, 372.0, 174.0, 111.0, 64.0, 41.0, 48.0, 22.0, 21.0, 27.0, 7.0, 7.0, 10.0, 6.0, 2.0, 1.0, 5.0, 3.0, 1.0, 2.0, 1.0], "bins": [-5.9375, -5.76275634765625, -5.5880126953125, -5.41326904296875, -5.238525390625, -5.06378173828125, -4.8890380859375, -4.71429443359375, -4.53955078125, -4.36480712890625, -4.1900634765625, -4.01531982421875, -3.840576171875, -3.66583251953125, -3.4910888671875, -3.31634521484375, -3.1416015625, -2.96685791015625, -2.7921142578125, -2.61737060546875, -2.442626953125, -2.26788330078125, -2.0931396484375, -1.91839599609375, -1.74365234375, -1.56890869140625, -1.3941650390625, -1.21942138671875, -1.044677734375, -0.86993408203125, -0.6951904296875, -0.52044677734375, -0.345703125, -0.17095947265625, 0.0037841796875, 0.17852783203125, 0.353271484375, 0.52801513671875, 0.7027587890625, 0.87750244140625, 1.05224609375, 1.22698974609375, 1.4017333984375, 1.57647705078125, 1.751220703125, 1.92596435546875, 2.1007080078125, 2.27545166015625, 2.4501953125, 2.62493896484375, 2.7996826171875, 2.97442626953125, 3.149169921875, 3.32391357421875, 3.4986572265625, 3.67340087890625, 3.84814453125, 4.02288818359375, 4.1976318359375, 4.37237548828125, 4.547119140625, 4.72186279296875, 4.8966064453125, 5.07135009765625, 5.24609375]}, "gradients/decoder.transformer.h.10.attn.c_attn.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 4.0, 1.0, 2.0, 2.0, 0.0, 2.0, 2.0, 6.0, 4.0, 3.0, 5.0, 16.0, 7.0, 10.0, 7.0, 16.0, 14.0, 20.0, 13.0, 20.0, 24.0, 37.0, 31.0, 42.0, 33.0, 61.0, 58.0, 84.0, 146.0, 244.0, 1377.0, 207.0, 107.0, 68.0, 38.0, 40.0, 33.0, 37.0, 32.0, 25.0, 28.0, 19.0, 20.0, 19.0, 19.0, 19.0, 14.0, 11.0, 5.0, 10.0, 2.0, 5.0, 2.0, 5.0, 6.0, 2.0, 0.0, 1.0, 2.0, 0.0, 2.0], "bins": [-15.140625, -14.6846923828125, -14.228759765625, -13.7728271484375, -13.31689453125, -12.8609619140625, -12.405029296875, -11.9490966796875, -11.4931640625, -11.0372314453125, -10.581298828125, -10.1253662109375, -9.66943359375, -9.2135009765625, -8.757568359375, -8.3016357421875, -7.845703125, -7.3897705078125, -6.933837890625, -6.4779052734375, -6.02197265625, -5.5660400390625, -5.110107421875, -4.6541748046875, -4.1982421875, -3.7423095703125, -3.286376953125, -2.8304443359375, -2.37451171875, -1.9185791015625, -1.462646484375, -1.0067138671875, -0.55078125, -0.0948486328125, 0.361083984375, 0.8170166015625, 1.27294921875, 1.7288818359375, 2.184814453125, 2.6407470703125, 3.0966796875, 3.5526123046875, 4.008544921875, 4.4644775390625, 4.92041015625, 5.3763427734375, 5.832275390625, 6.2882080078125, 6.744140625, 7.2000732421875, 7.656005859375, 8.1119384765625, 8.56787109375, 9.0238037109375, 9.479736328125, 9.9356689453125, 10.3916015625, 10.8475341796875, 11.303466796875, 11.7593994140625, 12.21533203125, 12.6712646484375, 13.127197265625, 13.5831298828125, 14.0390625]}, "gradients/decoder.transformer.h.10.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 1.0, 2.0, 0.0, 4.0, 1.0, 5.0, 4.0, 6.0, 7.0, 5.0, 10.0, 7.0, 11.0, 26.0, 23.0, 28.0, 47.0, 61.0, 95.0, 149.0, 227.0, 368.0, 778.0, 2877.0, 524046.0, 2610162.0, 4671.0, 929.0, 435.0, 239.0, 130.0, 98.0, 64.0, 50.0, 35.0, 29.0, 12.0, 22.0, 9.0, 12.0, 7.0, 7.0, 6.0, 5.0, 3.0, 2.0, 3.0, 0.0, 4.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-56.21875, -54.28662109375, -52.3544921875, -50.42236328125, -48.490234375, -46.55810546875, -44.6259765625, -42.69384765625, -40.76171875, -38.82958984375, -36.8974609375, -34.96533203125, -33.033203125, -31.10107421875, -29.1689453125, -27.23681640625, -25.3046875, -23.37255859375, -21.4404296875, -19.50830078125, -17.576171875, -15.64404296875, -13.7119140625, -11.77978515625, -9.84765625, -7.91552734375, -5.9833984375, -4.05126953125, -2.119140625, -0.18701171875, 1.7451171875, 3.67724609375, 5.609375, 7.54150390625, 9.4736328125, 11.40576171875, 13.337890625, 15.27001953125, 17.2021484375, 19.13427734375, 21.06640625, 22.99853515625, 24.9306640625, 26.86279296875, 28.794921875, 30.72705078125, 32.6591796875, 34.59130859375, 36.5234375, 38.45556640625, 40.3876953125, 42.31982421875, 44.251953125, 46.18408203125, 48.1162109375, 50.04833984375, 51.98046875, 53.91259765625, 55.8447265625, 57.77685546875, 59.708984375, 61.64111328125, 63.5732421875, 65.50537109375, 67.4375]}, "gradients/decoder.transformer.h.10.ln_1.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 2.0, 4.0, 19.0, 144.0, 401.0, 337.0, 97.0, 13.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-29.084705352783203, -25.799070358276367, -22.51343536376953, -19.227800369262695, -15.94216537475586, -12.656530380249023, -9.370895385742188, -6.085260391235352, -2.7996253967285156, 0.4860095977783203, 3.7716445922851562, 7.057279586791992, 10.342914581298828, 13.628549575805664, 16.9141845703125, 20.199819564819336, 23.485454559326172, 26.771089553833008, 30.056724548339844, 33.34236145019531, 36.627994537353516, 39.91362762451172, 43.19926452636719, 46.484901428222656, 49.77053451538086, 53.05616760253906, 56.34180450439453, 59.62744140625, 62.9130744934082, 66.1987075805664, 69.48434448242188, 72.76998138427734, 76.05561828613281, 79.34125518798828, 82.62689208984375, 85.91252136230469, 89.19815826416016, 92.48379516601562, 95.76942443847656, 99.05506134033203, 102.3406982421875, 105.62633514404297, 108.91197204589844, 112.19760131835938, 115.48323822021484, 118.76887512207031, 122.05450439453125, 125.34014129638672, 128.6257781982422, 131.91140747070312, 135.19705200195312, 138.48268127441406, 141.768310546875, 145.053955078125, 148.33958435058594, 151.62522888183594, 154.91085815429688, 158.1964874267578, 161.4821319580078, 164.76776123046875, 168.05340576171875, 171.3390350341797, 174.62466430664062, 177.91030883789062, 181.19593811035156]}, "gradients/decoder.transformer.h.10.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 3.0, 0.0, 4.0, 3.0, 3.0, 4.0, 9.0, 12.0, 22.0, 16.0, 27.0, 22.0, 29.0, 42.0, 28.0, 48.0, 42.0, 45.0, 34.0, 64.0, 56.0, 57.0, 40.0, 46.0, 40.0, 49.0, 44.0, 35.0, 39.0, 20.0, 27.0, 22.0, 18.0, 16.0, 18.0, 4.0, 8.0, 3.0, 5.0, 6.0, 4.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-61.01214599609375, -59.292633056640625, -57.5731201171875, -55.853607177734375, -54.13409423828125, -52.414581298828125, -50.695064544677734, -48.97555160522461, -47.256038665771484, -45.53652572631836, -43.817012786865234, -42.09749984741211, -40.37798309326172, -38.658470153808594, -36.93895721435547, -35.219444274902344, -33.49993133544922, -31.780418395996094, -30.06090545654297, -28.34139060974121, -26.621877670288086, -24.90236473083496, -23.182849884033203, -21.463336944580078, -19.743824005126953, -18.024311065673828, -16.304798126220703, -14.585283279418945, -12.86577033996582, -11.146257400512695, -9.426743507385254, -7.7072296142578125, -5.9877166748046875, -4.268203258514404, -2.548689842224121, -0.8291764259338379, 0.8903369903564453, 2.6098499298095703, 4.329363822937012, 6.048877716064453, 7.768390655517578, 9.487903594970703, 11.207417488098145, 12.926931381225586, 14.646444320678711, 16.365957260131836, 18.085472106933594, 19.80498504638672, 21.524497985839844, 23.24401092529297, 24.963523864746094, 26.68303871154785, 28.402551651000977, 30.1220645904541, 31.84157943725586, 33.561092376708984, 35.28060531616211, 37.000118255615234, 38.71963119506836, 40.439144134521484, 42.158660888671875, 43.878173828125, 45.597686767578125, 47.31719970703125, 49.036712646484375]}, "gradients/decoder.transformer.h.9.mlp.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 4.0, 0.0, 0.0, 2.0, 1.0, 2.0, 5.0, 5.0, 6.0, 9.0, 7.0, 11.0, 7.0, 3.0, 19.0, 17.0, 26.0, 19.0, 23.0, 25.0, 26.0, 29.0, 39.0, 36.0, 39.0, 36.0, 54.0, 43.0, 29.0, 45.0, 32.0, 42.0, 38.0, 38.0, 35.0, 37.0, 28.0, 26.0, 28.0, 20.0, 22.0, 18.0, 19.0, 17.0, 12.0, 6.0, 7.0, 6.0, 4.0, 6.0, 3.0, 5.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 2.0], "bins": [-6.01171875, -5.8330078125, -5.654296875, -5.4755859375, -5.296875, -5.1181640625, -4.939453125, -4.7607421875, -4.58203125, -4.4033203125, -4.224609375, -4.0458984375, -3.8671875, -3.6884765625, -3.509765625, -3.3310546875, -3.15234375, -2.9736328125, -2.794921875, -2.6162109375, -2.4375, -2.2587890625, -2.080078125, -1.9013671875, -1.72265625, -1.5439453125, -1.365234375, -1.1865234375, -1.0078125, -0.8291015625, -0.650390625, -0.4716796875, -0.29296875, -0.1142578125, 0.064453125, 0.2431640625, 0.421875, 0.6005859375, 0.779296875, 0.9580078125, 1.13671875, 1.3154296875, 1.494140625, 1.6728515625, 1.8515625, 2.0302734375, 2.208984375, 2.3876953125, 2.56640625, 2.7451171875, 2.923828125, 3.1025390625, 3.28125, 3.4599609375, 3.638671875, 3.8173828125, 3.99609375, 4.1748046875, 4.353515625, 4.5322265625, 4.7109375, 4.8896484375, 5.068359375, 5.2470703125, 5.42578125]}, "gradients/decoder.transformer.h.9.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 4.0, 3.0, 4.0, 1.0, 8.0, 6.0, 8.0, 5.0, 9.0, 13.0, 27.0, 21.0, 37.0, 30.0, 43.0, 70.0, 110.0, 152.0, 322.0, 834.0, 3219.0, 16967.0, 140964.0, 1325218.0, 2305168.0, 355547.0, 37459.0, 5638.0, 1352.0, 447.0, 196.0, 96.0, 74.0, 56.0, 29.0, 35.0, 26.0, 25.0, 11.0, 9.0, 10.0, 8.0, 7.0, 5.0, 5.0, 5.0, 5.0, 4.0, 2.0, 0.0, 1.0, 1.0, 1.0, 1.0], "bins": [-15.015625, -14.5765380859375, -14.137451171875, -13.6983642578125, -13.25927734375, -12.8201904296875, -12.381103515625, -11.9420166015625, -11.5029296875, -11.0638427734375, -10.624755859375, -10.1856689453125, -9.74658203125, -9.3074951171875, -8.868408203125, -8.4293212890625, -7.990234375, -7.5511474609375, -7.112060546875, -6.6729736328125, -6.23388671875, -5.7947998046875, -5.355712890625, -4.9166259765625, -4.4775390625, -4.0384521484375, -3.599365234375, -3.1602783203125, -2.72119140625, -2.2821044921875, -1.843017578125, -1.4039306640625, -0.96484375, -0.5257568359375, -0.086669921875, 0.3524169921875, 0.79150390625, 1.2305908203125, 1.669677734375, 2.1087646484375, 2.5478515625, 2.9869384765625, 3.426025390625, 3.8651123046875, 4.30419921875, 4.7432861328125, 5.182373046875, 5.6214599609375, 6.060546875, 6.4996337890625, 6.938720703125, 7.3778076171875, 7.81689453125, 8.2559814453125, 8.695068359375, 9.1341552734375, 9.5732421875, 10.0123291015625, 10.451416015625, 10.8905029296875, 11.32958984375, 11.7686767578125, 12.207763671875, 12.6468505859375, 13.0859375]}, "gradients/decoder.transformer.h.9.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 1.0, 0.0, 2.0, 2.0, 6.0, 5.0, 5.0, 6.0, 8.0, 17.0, 12.0, 21.0, 29.0, 40.0, 67.0, 72.0, 91.0, 159.0, 190.0, 305.0, 339.0, 435.0, 422.0, 416.0, 318.0, 257.0, 215.0, 173.0, 135.0, 67.0, 58.0, 55.0, 32.0, 35.0, 25.0, 17.0, 9.0, 18.0, 9.0, 2.0, 4.0, 2.0, 1.0, 0.0, 1.0, 2.0, 1.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-10.546875, -10.212646484375, -9.87841796875, -9.544189453125, -9.2099609375, -8.875732421875, -8.54150390625, -8.207275390625, -7.873046875, -7.538818359375, -7.20458984375, -6.870361328125, -6.5361328125, -6.201904296875, -5.86767578125, -5.533447265625, -5.19921875, -4.864990234375, -4.53076171875, -4.196533203125, -3.8623046875, -3.528076171875, -3.19384765625, -2.859619140625, -2.525390625, -2.191162109375, -1.85693359375, -1.522705078125, -1.1884765625, -0.854248046875, -0.52001953125, -0.185791015625, 0.1484375, 0.482666015625, 0.81689453125, 1.151123046875, 1.4853515625, 1.819580078125, 2.15380859375, 2.488037109375, 2.822265625, 3.156494140625, 3.49072265625, 3.824951171875, 4.1591796875, 4.493408203125, 4.82763671875, 5.161865234375, 5.49609375, 5.830322265625, 6.16455078125, 6.498779296875, 6.8330078125, 7.167236328125, 7.50146484375, 7.835693359375, 8.169921875, 8.504150390625, 8.83837890625, 9.172607421875, 9.5068359375, 9.841064453125, 10.17529296875, 10.509521484375, 10.84375]}, "gradients/decoder.transformer.h.9.mlp.c_fc.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 4.0, 3.0, 9.0, 18.0, 11.0, 9.0, 24.0, 29.0, 39.0, 53.0, 61.0, 79.0, 96.0, 156.0, 262.0, 352.0, 798.0, 11382.0, 2610473.0, 1561130.0, 7444.0, 760.0, 378.0, 213.0, 138.0, 89.0, 85.0, 49.0, 33.0, 20.0, 25.0, 16.0, 15.0, 8.0, 9.0, 1.0, 3.0, 6.0, 3.0, 2.0, 1.0, 1.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-54.34375, -52.6943359375, -51.044921875, -49.3955078125, -47.74609375, -46.0966796875, -44.447265625, -42.7978515625, -41.1484375, -39.4990234375, -37.849609375, -36.2001953125, -34.55078125, -32.9013671875, -31.251953125, -29.6025390625, -27.953125, -26.3037109375, -24.654296875, -23.0048828125, -21.35546875, -19.7060546875, -18.056640625, -16.4072265625, -14.7578125, -13.1083984375, -11.458984375, -9.8095703125, -8.16015625, -6.5107421875, -4.861328125, -3.2119140625, -1.5625, 0.0869140625, 1.736328125, 3.3857421875, 5.03515625, 6.6845703125, 8.333984375, 9.9833984375, 11.6328125, 13.2822265625, 14.931640625, 16.5810546875, 18.23046875, 19.8798828125, 21.529296875, 23.1787109375, 24.828125, 26.4775390625, 28.126953125, 29.7763671875, 31.42578125, 33.0751953125, 34.724609375, 36.3740234375, 38.0234375, 39.6728515625, 41.322265625, 42.9716796875, 44.62109375, 46.2705078125, 47.919921875, 49.5693359375, 51.21875]}, "gradients/decoder.transformer.h.9.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 5.0, 8.0, 14.0, 28.0, 50.0, 89.0, 109.0, 162.0, 152.0, 148.0, 104.0, 85.0, 27.0, 25.0, 5.0, 3.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-83.05976104736328, -80.81766510009766, -78.57556915283203, -76.3334732055664, -74.09137725830078, -71.84928131103516, -69.60718536376953, -67.3650894165039, -65.12299346923828, -62.880897521972656, -60.63880157470703, -58.396705627441406, -56.15460968017578, -53.912513732910156, -51.67041778564453, -49.428321838378906, -47.18622970581055, -44.94413375854492, -42.7020378112793, -40.45994186401367, -38.21784591674805, -35.97574996948242, -33.73365783691406, -31.491559982299805, -29.24946403503418, -27.007368087768555, -24.76527214050293, -22.523178100585938, -20.281082153320312, -18.038986206054688, -15.796890258789062, -13.554794311523438, -11.312698364257812, -9.070602416992188, -6.828506946563721, -4.586411476135254, -2.344315528869629, -0.1022195816040039, 2.1398754119873047, 4.38197135925293, 6.624067306518555, 8.86616325378418, 11.108259201049805, 13.350354194641113, 15.592450141906738, 17.834545135498047, 20.076641082763672, 22.318737030029297, 24.560832977294922, 26.802928924560547, 29.045024871826172, 31.287120819091797, 33.52921676635742, 35.77131271362305, 38.013404846191406, 40.25550079345703, 42.497596740722656, 44.73969268798828, 46.981788635253906, 49.22388458251953, 51.465980529785156, 53.70807647705078, 55.950172424316406, 58.19226837158203, 60.434364318847656]}, "gradients/decoder.transformer.h.9.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 3.0, 4.0, 1.0, 5.0, 7.0, 7.0, 6.0, 15.0, 14.0, 17.0, 14.0, 17.0, 23.0, 23.0, 24.0, 20.0, 30.0, 27.0, 35.0, 37.0, 38.0, 35.0, 39.0, 44.0, 42.0, 52.0, 41.0, 36.0, 26.0, 33.0, 40.0, 36.0, 31.0, 30.0, 28.0, 25.0, 11.0, 19.0, 12.0, 10.0, 14.0, 6.0, 12.0, 7.0, 6.0, 5.0, 5.0, 2.0, 0.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-35.778419494628906, -34.61531066894531, -33.45220184326172, -32.28909683227539, -31.125988006591797, -29.962879180908203, -28.799772262573242, -27.63666534423828, -26.473556518554688, -25.310447692871094, -24.147340774536133, -22.984233856201172, -21.821125030517578, -20.658016204833984, -19.494909286499023, -18.331802368164062, -17.16869354248047, -16.005584716796875, -14.842477798461914, -13.679369926452637, -12.51626205444336, -11.353154182434082, -10.190046310424805, -9.026938438415527, -7.86383056640625, -6.700722694396973, -5.537614822387695, -4.374506950378418, -3.2113990783691406, -2.0482912063598633, -0.8851833343505859, 0.2779245376586914, 1.4410362243652344, 2.6041440963745117, 3.767251968383789, 4.930359840393066, 6.093467712402344, 7.256575584411621, 8.419683456420898, 9.582791328430176, 10.745899200439453, 11.90900707244873, 13.072114944458008, 14.235222816467285, 15.398330688476562, 16.561439514160156, 17.724546432495117, 18.887653350830078, 20.050762176513672, 21.213871002197266, 22.376977920532227, 23.540084838867188, 24.70319366455078, 25.866302490234375, 27.029409408569336, 28.192516326904297, 29.35562515258789, 30.518733978271484, 31.681840896606445, 32.844947814941406, 34.008056640625, 35.171165466308594, 36.33427429199219, 37.497379302978516, 38.66048812866211]}, "gradients/decoder.transformer.h.9.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 2.0, 2.0, 1.0, 5.0, 1.0, 5.0, 6.0, 13.0, 8.0, 8.0, 17.0, 15.0, 17.0, 18.0, 28.0, 24.0, 31.0, 27.0, 35.0, 32.0, 30.0, 37.0, 42.0, 42.0, 43.0, 44.0, 47.0, 44.0, 43.0, 51.0, 30.0, 35.0, 31.0, 34.0, 29.0, 16.0, 20.0, 16.0, 16.0, 12.0, 20.0, 8.0, 8.0, 9.0, 6.0, 0.0, 3.0, 1.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0], "bins": [-6.0, -5.81842041015625, -5.6368408203125, -5.45526123046875, -5.273681640625, -5.09210205078125, -4.9105224609375, -4.72894287109375, -4.54736328125, -4.36578369140625, -4.1842041015625, -4.00262451171875, -3.821044921875, -3.63946533203125, -3.4578857421875, -3.27630615234375, -3.0947265625, -2.91314697265625, -2.7315673828125, -2.54998779296875, -2.368408203125, -2.18682861328125, -2.0052490234375, -1.82366943359375, -1.64208984375, -1.46051025390625, -1.2789306640625, -1.09735107421875, -0.915771484375, -0.73419189453125, -0.5526123046875, -0.37103271484375, -0.189453125, -0.00787353515625, 0.1737060546875, 0.35528564453125, 0.536865234375, 0.71844482421875, 0.9000244140625, 1.08160400390625, 1.26318359375, 1.44476318359375, 1.6263427734375, 1.80792236328125, 1.989501953125, 2.17108154296875, 2.3526611328125, 2.53424072265625, 2.7158203125, 2.89739990234375, 3.0789794921875, 3.26055908203125, 3.442138671875, 3.62371826171875, 3.8052978515625, 3.98687744140625, 4.16845703125, 4.35003662109375, 4.5316162109375, 4.71319580078125, 4.894775390625, 5.07635498046875, 5.2579345703125, 5.43951416015625, 5.62109375]}, "gradients/decoder.transformer.h.9.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 1.0, 1.0, 3.0, 6.0, 5.0, 13.0, 3.0, 17.0, 24.0, 41.0, 54.0, 78.0, 101.0, 179.0, 239.0, 369.0, 622.0, 920.0, 1453.0, 2311.0, 3548.0, 5636.0, 9011.0, 14617.0, 24047.0, 38928.0, 62749.0, 99223.0, 145618.0, 176494.0, 157548.0, 112791.0, 73523.0, 45427.0, 27396.0, 16970.0, 10615.0, 6506.0, 4150.0, 2586.0, 1652.0, 1100.0, 688.0, 440.0, 285.0, 201.0, 118.0, 88.0, 46.0, 43.0, 27.0, 22.0, 12.0, 9.0, 5.0, 6.0, 2.0, 0.0, 3.0, 1.0, 2.0], "bins": [-0.84033203125, -0.8145980834960938, -0.7888641357421875, -0.7631301879882812, -0.737396240234375, -0.7116622924804688, -0.6859283447265625, -0.6601943969726562, -0.63446044921875, -0.6087265014648438, -0.5829925537109375, -0.5572586059570312, -0.531524658203125, -0.5057907104492188, -0.4800567626953125, -0.45432281494140625, -0.4285888671875, -0.40285491943359375, -0.3771209716796875, -0.35138702392578125, -0.325653076171875, -0.29991912841796875, -0.2741851806640625, -0.24845123291015625, -0.22271728515625, -0.19698333740234375, -0.1712493896484375, -0.14551544189453125, -0.119781494140625, -0.09404754638671875, -0.0683135986328125, -0.04257965087890625, -0.016845703125, 0.00888824462890625, 0.0346221923828125, 0.06035614013671875, 0.086090087890625, 0.11182403564453125, 0.1375579833984375, 0.16329193115234375, 0.18902587890625, 0.21475982666015625, 0.2404937744140625, 0.26622772216796875, 0.291961669921875, 0.31769561767578125, 0.3434295654296875, 0.36916351318359375, 0.3948974609375, 0.42063140869140625, 0.4463653564453125, 0.47209930419921875, 0.497833251953125, 0.5235671997070312, 0.5493011474609375, 0.5750350952148438, 0.60076904296875, 0.6265029907226562, 0.6522369384765625, 0.6779708862304688, 0.703704833984375, 0.7294387817382812, 0.7551727294921875, 0.7809066772460938, 0.806640625]}, "gradients/decoder.transformer.h.9.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 3.0, 2.0, 4.0, 2.0, 9.0, 5.0, 4.0, 5.0, 5.0, 17.0, 14.0, 24.0, 19.0, 27.0, 31.0, 38.0, 27.0, 38.0, 34.0, 36.0, 40.0, 57.0, 41.0, 1073.0, 44.0, 41.0, 51.0, 57.0, 39.0, 38.0, 28.0, 36.0, 27.0, 25.0, 19.0, 18.0, 18.0, 12.0, 10.0, 6.0, 4.0, 3.0, 4.0, 3.0, 1.0, 4.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.8046875, -3.6650390625, -3.525390625, -3.3857421875, -3.24609375, -3.1064453125, -2.966796875, -2.8271484375, -2.6875, -2.5478515625, -2.408203125, -2.2685546875, -2.12890625, -1.9892578125, -1.849609375, -1.7099609375, -1.5703125, -1.4306640625, -1.291015625, -1.1513671875, -1.01171875, -0.8720703125, -0.732421875, -0.5927734375, -0.453125, -0.3134765625, -0.173828125, -0.0341796875, 0.10546875, 0.2451171875, 0.384765625, 0.5244140625, 0.6640625, 0.8037109375, 0.943359375, 1.0830078125, 1.22265625, 1.3623046875, 1.501953125, 1.6416015625, 1.78125, 1.9208984375, 2.060546875, 2.2001953125, 2.33984375, 2.4794921875, 2.619140625, 2.7587890625, 2.8984375, 3.0380859375, 3.177734375, 3.3173828125, 3.45703125, 3.5966796875, 3.736328125, 3.8759765625, 4.015625, 4.1552734375, 4.294921875, 4.4345703125, 4.57421875, 4.7138671875, 4.853515625, 4.9931640625, 5.1328125]}, "gradients/decoder.transformer.h.9.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 2.0, 2.0, 0.0, 2.0, 6.0, 10.0, 17.0, 21.0, 34.0, 65.0, 78.0, 152.0, 242.0, 426.0, 758.0, 1320.0, 2253.0, 4140.0, 7411.0, 13145.0, 24487.0, 43953.0, 77236.0, 127077.0, 209526.0, 1214765.0, 150628.0, 95029.0, 55185.0, 30661.0, 16920.0, 9472.0, 5168.0, 2945.0, 1712.0, 975.0, 540.0, 311.0, 192.0, 104.0, 70.0, 40.0, 21.0, 18.0, 9.0, 7.0, 5.0, 3.0, 1.0, 2.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.66650390625, -0.6475677490234375, -0.628631591796875, -0.6096954345703125, -0.59075927734375, -0.5718231201171875, -0.552886962890625, -0.5339508056640625, -0.5150146484375, -0.4960784912109375, -0.477142333984375, -0.4582061767578125, -0.43927001953125, -0.4203338623046875, -0.401397705078125, -0.3824615478515625, -0.363525390625, -0.3445892333984375, -0.325653076171875, -0.3067169189453125, -0.28778076171875, -0.2688446044921875, -0.249908447265625, -0.2309722900390625, -0.2120361328125, -0.1930999755859375, -0.174163818359375, -0.1552276611328125, -0.13629150390625, -0.1173553466796875, -0.098419189453125, -0.0794830322265625, -0.060546875, -0.0416107177734375, -0.022674560546875, -0.0037384033203125, 0.01519775390625, 0.0341339111328125, 0.053070068359375, 0.0720062255859375, 0.0909423828125, 0.1098785400390625, 0.128814697265625, 0.1477508544921875, 0.16668701171875, 0.1856231689453125, 0.204559326171875, 0.2234954833984375, 0.242431640625, 0.2613677978515625, 0.280303955078125, 0.2992401123046875, 0.31817626953125, 0.3371124267578125, 0.356048583984375, 0.3749847412109375, 0.3939208984375, 0.4128570556640625, 0.431793212890625, 0.4507293701171875, 0.46966552734375, 0.4886016845703125, 0.507537841796875, 0.5264739990234375, 0.54541015625]}, "gradients/decoder.transformer.h.9.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0, 1.0, 5.0, 5.0, 8.0, 3.0, 4.0, 9.0, 11.0, 7.0, 7.0, 12.0, 9.0, 20.0, 23.0, 22.0, 39.0, 45.0, 44.0, 53.0, 66.0, 80.0, 127.0, 67.0, 66.0, 42.0, 39.0, 38.0, 24.0, 21.0, 25.0, 13.0, 9.0, 13.0, 11.0, 5.0, 5.0, 5.0, 8.0, 6.0, 7.0, 3.0, 3.0, 1.0, 0.0, 2.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.0256805419921875, -0.024831295013427734, -0.02398204803466797, -0.023132801055908203, -0.022283554077148438, -0.021434307098388672, -0.020585060119628906, -0.01973581314086914, -0.018886566162109375, -0.01803731918334961, -0.017188072204589844, -0.016338825225830078, -0.015489578247070312, -0.014640331268310547, -0.013791084289550781, -0.012941837310791016, -0.01209259033203125, -0.011243343353271484, -0.010394096374511719, -0.009544849395751953, -0.008695602416992188, -0.007846355438232422, -0.006997108459472656, -0.006147861480712891, -0.005298614501953125, -0.004449367523193359, -0.0036001205444335938, -0.002750873565673828, -0.0019016265869140625, -0.0010523796081542969, -0.00020313262939453125, 0.0006461143493652344, 0.001495361328125, 0.0023446083068847656, 0.0031938552856445312, 0.004043102264404297, 0.0048923492431640625, 0.005741596221923828, 0.006590843200683594, 0.007440090179443359, 0.008289337158203125, 0.00913858413696289, 0.009987831115722656, 0.010837078094482422, 0.011686325073242188, 0.012535572052001953, 0.013384819030761719, 0.014234066009521484, 0.01508331298828125, 0.015932559967041016, 0.01678180694580078, 0.017631053924560547, 0.018480300903320312, 0.019329547882080078, 0.020178794860839844, 0.02102804183959961, 0.021877288818359375, 0.02272653579711914, 0.023575782775878906, 0.024425029754638672, 0.025274276733398438, 0.026123523712158203, 0.02697277069091797, 0.027822017669677734, 0.0286712646484375]}, "gradients/decoder.transformer.h.9.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 0.0, 1.0, 2.0, 0.0, 1.0, 3.0, 3.0, 8.0, 9.0, 7.0, 7.0, 4.0, 9.0, 15.0, 12.0, 22.0, 31.0, 32.0, 47.0, 60.0, 86.0, 122.0, 206.0, 473.0, 1750.0, 1014879.0, 29119.0, 761.0, 322.0, 148.0, 110.0, 78.0, 52.0, 49.0, 26.0, 23.0, 22.0, 9.0, 8.0, 12.0, 11.0, 4.0, 4.0, 6.0, 6.0, 6.0, 1.0, 0.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.5859375, -0.5685806274414062, -0.5512237548828125, -0.5338668823242188, -0.516510009765625, -0.49915313720703125, -0.4817962646484375, -0.46443939208984375, -0.44708251953125, -0.42972564697265625, -0.4123687744140625, -0.39501190185546875, -0.377655029296875, -0.36029815673828125, -0.3429412841796875, -0.32558441162109375, -0.3082275390625, -0.29087066650390625, -0.2735137939453125, -0.25615692138671875, -0.238800048828125, -0.22144317626953125, -0.2040863037109375, -0.18672943115234375, -0.16937255859375, -0.15201568603515625, -0.1346588134765625, -0.11730194091796875, -0.099945068359375, -0.08258819580078125, -0.0652313232421875, -0.04787445068359375, -0.030517578125, -0.01316070556640625, 0.0041961669921875, 0.02155303955078125, 0.038909912109375, 0.05626678466796875, 0.0736236572265625, 0.09098052978515625, 0.10833740234375, 0.12569427490234375, 0.1430511474609375, 0.16040802001953125, 0.177764892578125, 0.19512176513671875, 0.2124786376953125, 0.22983551025390625, 0.2471923828125, 0.26454925537109375, 0.2819061279296875, 0.29926300048828125, 0.316619873046875, 0.33397674560546875, 0.3513336181640625, 0.36869049072265625, 0.38604736328125, 0.40340423583984375, 0.4207611083984375, 0.43811798095703125, 0.455474853515625, 0.47283172607421875, 0.4901885986328125, 0.5075454711914062, 0.52490234375]}, "gradients/decoder.transformer.h.9.ln_cross_attn.weight": {"_type": "histogram", "values": [3.0, 21.0, 991.0, 4.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.05284806340932846, -0.03107510134577751, -0.009302139282226562, 0.012470819056034088, 0.034243784844875336, 0.05601675063371658, 0.07778970152139664, 0.09956266731023788, 0.12133563309907913, 0.14310859143733978, 0.16488155722618103, 0.18665450811386108, 0.20842748880386353, 0.23020043969154358, 0.25197339057922363, 0.2737463712692261, 0.29551932215690613, 0.3172922730445862, 0.3390652537345886, 0.3608382046222687, 0.38261115550994873, 0.40438413619995117, 0.4261570870876312, 0.4479300379753113, 0.4697030186653137, 0.4914759695529938, 0.5132489204406738, 0.5350219011306763, 0.5567948818206787, 0.5785678625106812, 0.6003407835960388, 0.6221137642860413, 0.6438866853713989, 0.6656596660614014, 0.687432587146759, 0.7092055678367615, 0.7309785485267639, 0.7527514696121216, 0.774524450302124, 0.7962974309921265, 0.8180704116821289, 0.8398433923721313, 0.861616313457489, 0.8833892941474915, 0.9051622748374939, 0.9269351959228516, 0.948708176612854, 0.9704811573028564, 0.9922540783882141, 1.0140269994735718, 1.0357999801635742, 1.0575729608535767, 1.079345941543579, 1.1011189222335815, 1.122891902923584, 1.1446647644042969, 1.1664377450942993, 1.1882107257843018, 1.2099837064743042, 1.2317566871643066, 1.2535295486450195, 1.275302529335022, 1.2970755100250244, 1.3188484907150269, 1.3406214714050293]}, "gradients/decoder.transformer.h.9.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 7.0, 1.0, 3.0, 8.0, 11.0, 14.0, 19.0, 21.0, 20.0, 28.0, 33.0, 38.0, 45.0, 39.0, 45.0, 47.0, 49.0, 59.0, 46.0, 54.0, 45.0, 53.0, 55.0, 42.0, 44.0, 32.0, 25.0, 31.0, 23.0, 24.0, 14.0, 10.0, 7.0, 9.0, 7.0, 4.0, 1.0, 3.0, 2.0, 2.0], "bins": [-0.06615883111953735, -0.06458798050880432, -0.06301712989807129, -0.06144627556204796, -0.059875424951314926, -0.058304574340581894, -0.05673372000455856, -0.05516286939382553, -0.0535920187830925, -0.05202116817235947, -0.050450317561626434, -0.048879463225603104, -0.04730861261487007, -0.04573776200413704, -0.04416690766811371, -0.042596057057380676, -0.041025206446647644, -0.03945435583591461, -0.03788350522518158, -0.03631265088915825, -0.03474180027842522, -0.033170949667692184, -0.031600095331668854, -0.03002924472093582, -0.02845839411020279, -0.026887543499469757, -0.025316691026091576, -0.023745838552713394, -0.022174987941980362, -0.02060413733124733, -0.019033284857869148, -0.017462432384490967, -0.015891581773757935, -0.014320730231702328, -0.012749878689646721, -0.011179027147591114, -0.009608175605535507, -0.0080373240634799, -0.0064664725214242935, -0.004895620979368687, -0.00332476943731308, -0.001753917895257473, -0.00018306635320186615, 0.0013877851888537407, 0.0029586367309093475, 0.004529488272964954, 0.006100339815020561, 0.007671191357076168, 0.009242042899131775, 0.010812894441187382, 0.012383745983242989, 0.013954597525298595, 0.015525449067354202, 0.017096299678087234, 0.018667152151465416, 0.020238004624843597, 0.02180885523557663, 0.023379705846309662, 0.024950558319687843, 0.026521410793066025, 0.028092261403799057, 0.02966311201453209, 0.03123396448791027, 0.03280481696128845, 0.034375667572021484]}, "gradients/decoder.transformer.h.9.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 2.0, 2.0, 1.0, 5.0, 1.0, 5.0, 6.0, 13.0, 8.0, 8.0, 17.0, 15.0, 16.0, 19.0, 27.0, 25.0, 31.0, 26.0, 36.0, 32.0, 29.0, 38.0, 42.0, 42.0, 43.0, 44.0, 47.0, 44.0, 43.0, 51.0, 32.0, 32.0, 32.0, 34.0, 30.0, 15.0, 20.0, 16.0, 16.0, 13.0, 19.0, 8.0, 9.0, 8.0, 6.0, 0.0, 3.0, 1.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0], "bins": [-6.00390625, -5.82220458984375, -5.6405029296875, -5.45880126953125, -5.277099609375, -5.09539794921875, -4.9136962890625, -4.73199462890625, -4.55029296875, -4.36859130859375, -4.1868896484375, -4.00518798828125, -3.823486328125, -3.64178466796875, -3.4600830078125, -3.27838134765625, -3.0966796875, -2.91497802734375, -2.7332763671875, -2.55157470703125, -2.369873046875, -2.18817138671875, -2.0064697265625, -1.82476806640625, -1.64306640625, -1.46136474609375, -1.2796630859375, -1.09796142578125, -0.916259765625, -0.73455810546875, -0.5528564453125, -0.37115478515625, -0.189453125, -0.00775146484375, 0.1739501953125, 0.35565185546875, 0.537353515625, 0.71905517578125, 0.9007568359375, 1.08245849609375, 1.26416015625, 1.44586181640625, 1.6275634765625, 1.80926513671875, 1.990966796875, 2.17266845703125, 2.3543701171875, 2.53607177734375, 2.7177734375, 2.89947509765625, 3.0811767578125, 3.26287841796875, 3.444580078125, 3.62628173828125, 3.8079833984375, 3.98968505859375, 4.17138671875, 4.35308837890625, 4.5347900390625, 4.71649169921875, 4.898193359375, 5.07989501953125, 5.2615966796875, 5.44329833984375, 5.625]}, "gradients/decoder.transformer.h.9.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 2.0, 2.0, 1.0, 2.0, 3.0, 3.0, 1.0, 7.0, 11.0, 6.0, 11.0, 8.0, 15.0, 27.0, 27.0, 39.0, 42.0, 59.0, 113.0, 115.0, 189.0, 401.0, 869.0, 2828.0, 11146.0, 47039.0, 224684.0, 578951.0, 139591.0, 31123.0, 7544.0, 2045.0, 686.0, 334.0, 178.0, 125.0, 84.0, 68.0, 43.0, 29.0, 29.0, 21.0, 18.0, 11.0, 13.0, 8.0, 6.0, 3.0, 4.0, 2.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0], "bins": [-11.796875, -11.4432373046875, -11.089599609375, -10.7359619140625, -10.38232421875, -10.0286865234375, -9.675048828125, -9.3214111328125, -8.9677734375, -8.6141357421875, -8.260498046875, -7.9068603515625, -7.55322265625, -7.1995849609375, -6.845947265625, -6.4923095703125, -6.138671875, -5.7850341796875, -5.431396484375, -5.0777587890625, -4.72412109375, -4.3704833984375, -4.016845703125, -3.6632080078125, -3.3095703125, -2.9559326171875, -2.602294921875, -2.2486572265625, -1.89501953125, -1.5413818359375, -1.187744140625, -0.8341064453125, -0.48046875, -0.1268310546875, 0.226806640625, 0.5804443359375, 0.93408203125, 1.2877197265625, 1.641357421875, 1.9949951171875, 2.3486328125, 2.7022705078125, 3.055908203125, 3.4095458984375, 3.76318359375, 4.1168212890625, 4.470458984375, 4.8240966796875, 5.177734375, 5.5313720703125, 5.885009765625, 6.2386474609375, 6.59228515625, 6.9459228515625, 7.299560546875, 7.6531982421875, 8.0068359375, 8.3604736328125, 8.714111328125, 9.0677490234375, 9.42138671875, 9.7750244140625, 10.128662109375, 10.4822998046875, 10.8359375]}, "gradients/decoder.transformer.h.9.attn.c_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 1.0, 3.0, 3.0, 1.0, 2.0, 3.0, 2.0, 2.0, 5.0, 6.0, 7.0, 10.0, 14.0, 10.0, 21.0, 18.0, 16.0, 24.0, 24.0, 34.0, 26.0, 37.0, 35.0, 50.0, 70.0, 86.0, 173.0, 272.0, 1363.0, 165.0, 107.0, 73.0, 66.0, 42.0, 47.0, 35.0, 37.0, 26.0, 24.0, 21.0, 14.0, 14.0, 13.0, 9.0, 11.0, 11.0, 8.0, 2.0, 7.0, 3.0, 1.0, 2.0, 5.0, 2.0, 2.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0], "bins": [-17.875, -17.301513671875, -16.72802734375, -16.154541015625, -15.5810546875, -15.007568359375, -14.43408203125, -13.860595703125, -13.287109375, -12.713623046875, -12.14013671875, -11.566650390625, -10.9931640625, -10.419677734375, -9.84619140625, -9.272705078125, -8.69921875, -8.125732421875, -7.55224609375, -6.978759765625, -6.4052734375, -5.831787109375, -5.25830078125, -4.684814453125, -4.111328125, -3.537841796875, -2.96435546875, -2.390869140625, -1.8173828125, -1.243896484375, -0.67041015625, -0.096923828125, 0.4765625, 1.050048828125, 1.62353515625, 2.197021484375, 2.7705078125, 3.343994140625, 3.91748046875, 4.490966796875, 5.064453125, 5.637939453125, 6.21142578125, 6.784912109375, 7.3583984375, 7.931884765625, 8.50537109375, 9.078857421875, 9.65234375, 10.225830078125, 10.79931640625, 11.372802734375, 11.9462890625, 12.519775390625, 13.09326171875, 13.666748046875, 14.240234375, 14.813720703125, 15.38720703125, 15.960693359375, 16.5341796875, 17.107666015625, 17.68115234375, 18.254638671875, 18.828125]}, "gradients/decoder.transformer.h.9.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 4.0, 2.0, 5.0, 6.0, 13.0, 15.0, 29.0, 24.0, 45.0, 66.0, 96.0, 140.0, 309.0, 782.0, 8218.0, 3118575.0, 15686.0, 902.0, 280.0, 175.0, 120.0, 69.0, 54.0, 36.0, 29.0, 13.0, 6.0, 6.0, 5.0, 5.0, 1.0, 2.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-94.875, -91.7705078125, -88.666015625, -85.5615234375, -82.45703125, -79.3525390625, -76.248046875, -73.1435546875, -70.0390625, -66.9345703125, -63.830078125, -60.7255859375, -57.62109375, -54.5166015625, -51.412109375, -48.3076171875, -45.203125, -42.0986328125, -38.994140625, -35.8896484375, -32.78515625, -29.6806640625, -26.576171875, -23.4716796875, -20.3671875, -17.2626953125, -14.158203125, -11.0537109375, -7.94921875, -4.8447265625, -1.740234375, 1.3642578125, 4.46875, 7.5732421875, 10.677734375, 13.7822265625, 16.88671875, 19.9912109375, 23.095703125, 26.2001953125, 29.3046875, 32.4091796875, 35.513671875, 38.6181640625, 41.72265625, 44.8271484375, 47.931640625, 51.0361328125, 54.140625, 57.2451171875, 60.349609375, 63.4541015625, 66.55859375, 69.6630859375, 72.767578125, 75.8720703125, 78.9765625, 82.0810546875, 85.185546875, 88.2900390625, 91.39453125, 94.4990234375, 97.603515625, 100.7080078125, 103.8125]}, "gradients/decoder.transformer.h.9.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 2.0, 51.0, 181.0, 347.0, 281.0, 120.0, 24.0, 7.0, 1.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-32.45763397216797, -29.720035552978516, -26.982439041137695, -24.244842529296875, -21.507244110107422, -18.76964569091797, -16.03204917907715, -13.294452667236328, -10.556854248046875, -7.819256782531738, -5.081659317016602, -2.344061851501465, 0.3935356140136719, 3.1311330795288086, 5.868730545043945, 8.606327056884766, 11.343925476074219, 14.081522941589355, 16.819120407104492, 19.556716918945312, 22.294315338134766, 25.03191375732422, 27.76951026916504, 30.50710678100586, 33.24470520019531, 35.982303619384766, 38.71990203857422, 41.457496643066406, 44.19509506225586, 46.93269348144531, 49.6702880859375, 52.40788650512695, 55.145477294921875, 57.88307571411133, 60.62067413330078, 63.35826873779297, 66.09587097167969, 68.83346557617188, 71.57106018066406, 74.30865478515625, 77.04625701904297, 79.78385162353516, 82.52145385742188, 85.25904846191406, 87.99664306640625, 90.73424530029297, 93.47183990478516, 96.20944213867188, 98.94703674316406, 101.68463134765625, 104.42223358154297, 107.15982818603516, 109.89743041992188, 112.63502502441406, 115.37261962890625, 118.11021423339844, 120.84781646728516, 123.58541107177734, 126.32301330566406, 129.06060791015625, 131.79820251464844, 134.53579711914062, 137.27340698242188, 140.01100158691406, 142.74859619140625]}, "gradients/decoder.transformer.h.9.ln_1.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 2.0, 2.0, 1.0, 6.0, 6.0, 1.0, 9.0, 8.0, 8.0, 15.0, 20.0, 12.0, 19.0, 33.0, 20.0, 40.0, 19.0, 50.0, 32.0, 40.0, 41.0, 54.0, 42.0, 49.0, 47.0, 46.0, 39.0, 45.0, 27.0, 33.0, 40.0, 27.0, 23.0, 20.0, 25.0, 23.0, 15.0, 10.0, 15.0, 11.0, 6.0, 12.0, 4.0, 6.0, 3.0, 2.0, 1.0, 5.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-61.99579620361328, -60.05271911621094, -58.109642028808594, -56.16656494140625, -54.223487854003906, -52.28041076660156, -50.33732986450195, -48.39425277709961, -46.451175689697266, -44.50809860229492, -42.56502151489258, -40.621944427490234, -38.678863525390625, -36.73578643798828, -34.79270935058594, -32.849632263183594, -30.90655517578125, -28.963478088378906, -27.020401000976562, -25.077322006225586, -23.134244918823242, -21.1911678314209, -19.248088836669922, -17.305011749267578, -15.361934661865234, -13.41885757446289, -11.47577953338623, -9.53270149230957, -7.589624404907227, -5.646547317504883, -3.7034692764282227, -1.7603912353515625, 0.18268203735351562, 2.1257596015930176, 4.0688371658325195, 6.0119147300720215, 7.954992294311523, 9.898069381713867, 11.841147422790527, 13.784225463867188, 15.727302551269531, 17.670379638671875, 19.61345672607422, 21.556535720825195, 23.49961280822754, 25.442689895629883, 27.38576889038086, 29.328845977783203, 31.271923065185547, 33.21500015258789, 35.158077239990234, 37.10115432739258, 39.04423522949219, 40.98731231689453, 42.930389404296875, 44.87346649169922, 46.81654357910156, 48.759620666503906, 50.70269775390625, 52.645774841308594, 54.58885192871094, 56.53192901611328, 58.47500991821289, 60.418087005615234, 62.36116409301758]}, "gradients/decoder.transformer.h.8.mlp.c_proj.bias": {"_type": "histogram", "values": [2.0, 2.0, 2.0, 0.0, 1.0, 2.0, 1.0, 3.0, 2.0, 4.0, 1.0, 2.0, 6.0, 13.0, 11.0, 9.0, 13.0, 18.0, 17.0, 18.0, 19.0, 33.0, 19.0, 27.0, 43.0, 37.0, 38.0, 34.0, 41.0, 43.0, 50.0, 39.0, 50.0, 27.0, 41.0, 24.0, 48.0, 43.0, 34.0, 37.0, 24.0, 22.0, 20.0, 14.0, 17.0, 13.0, 10.0, 10.0, 4.0, 10.0, 4.0, 4.0, 8.0, 0.0, 4.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-6.20703125, -6.00738525390625, -5.8077392578125, -5.60809326171875, -5.408447265625, -5.20880126953125, -5.0091552734375, -4.80950927734375, -4.60986328125, -4.41021728515625, -4.2105712890625, -4.01092529296875, -3.811279296875, -3.61163330078125, -3.4119873046875, -3.21234130859375, -3.0126953125, -2.81304931640625, -2.6134033203125, -2.41375732421875, -2.214111328125, -2.01446533203125, -1.8148193359375, -1.61517333984375, -1.41552734375, -1.21588134765625, -1.0162353515625, -0.81658935546875, -0.616943359375, -0.41729736328125, -0.2176513671875, -0.01800537109375, 0.181640625, 0.38128662109375, 0.5809326171875, 0.78057861328125, 0.980224609375, 1.17987060546875, 1.3795166015625, 1.57916259765625, 1.77880859375, 1.97845458984375, 2.1781005859375, 2.37774658203125, 2.577392578125, 2.77703857421875, 2.9766845703125, 3.17633056640625, 3.3759765625, 3.57562255859375, 3.7752685546875, 3.97491455078125, 4.174560546875, 4.37420654296875, 4.5738525390625, 4.77349853515625, 4.97314453125, 5.17279052734375, 5.3724365234375, 5.57208251953125, 5.771728515625, 5.97137451171875, 6.1710205078125, 6.37066650390625, 6.5703125]}, "gradients/decoder.transformer.h.8.mlp.c_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 4.0, 0.0, 0.0, 1.0, 2.0, 2.0, 2.0, 4.0, 6.0, 14.0, 9.0, 18.0, 24.0, 30.0, 34.0, 42.0, 51.0, 97.0, 143.0, 219.0, 361.0, 637.0, 1371.0, 3045.0, 7837.0, 21177.0, 61987.0, 186739.0, 534516.0, 1145860.0, 1240564.0, 637886.0, 232741.0, 76755.0, 25760.0, 9434.0, 3609.0, 1512.0, 703.0, 355.0, 258.0, 139.0, 89.0, 66.0, 48.0, 41.0, 32.0, 20.0, 22.0, 8.0, 9.0, 6.0, 4.0, 6.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-7.16015625, -6.93707275390625, -6.7139892578125, -6.49090576171875, -6.267822265625, -6.04473876953125, -5.8216552734375, -5.59857177734375, -5.37548828125, -5.15240478515625, -4.9293212890625, -4.70623779296875, -4.483154296875, -4.26007080078125, -4.0369873046875, -3.81390380859375, -3.5908203125, -3.36773681640625, -3.1446533203125, -2.92156982421875, -2.698486328125, -2.47540283203125, -2.2523193359375, -2.02923583984375, -1.80615234375, -1.58306884765625, -1.3599853515625, -1.13690185546875, -0.913818359375, -0.69073486328125, -0.4676513671875, -0.24456787109375, -0.021484375, 0.20159912109375, 0.4246826171875, 0.64776611328125, 0.870849609375, 1.09393310546875, 1.3170166015625, 1.54010009765625, 1.76318359375, 1.98626708984375, 2.2093505859375, 2.43243408203125, 2.655517578125, 2.87860107421875, 3.1016845703125, 3.32476806640625, 3.5478515625, 3.77093505859375, 3.9940185546875, 4.21710205078125, 4.440185546875, 4.66326904296875, 4.8863525390625, 5.10943603515625, 5.33251953125, 5.55560302734375, 5.7786865234375, 6.00177001953125, 6.224853515625, 6.44793701171875, 6.6710205078125, 6.89410400390625, 7.1171875]}, "gradients/decoder.transformer.h.8.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 0.0, 1.0, 3.0, 3.0, 4.0, 5.0, 9.0, 5.0, 12.0, 15.0, 31.0, 39.0, 42.0, 53.0, 77.0, 104.0, 122.0, 188.0, 213.0, 311.0, 346.0, 419.0, 411.0, 386.0, 300.0, 253.0, 183.0, 141.0, 95.0, 80.0, 51.0, 50.0, 33.0, 25.0, 14.0, 23.0, 7.0, 9.0, 6.0, 3.0, 6.0, 4.0, 5.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-12.796875, -12.4422607421875, -12.087646484375, -11.7330322265625, -11.37841796875, -11.0238037109375, -10.669189453125, -10.3145751953125, -9.9599609375, -9.6053466796875, -9.250732421875, -8.8961181640625, -8.54150390625, -8.1868896484375, -7.832275390625, -7.4776611328125, -7.123046875, -6.7684326171875, -6.413818359375, -6.0592041015625, -5.70458984375, -5.3499755859375, -4.995361328125, -4.6407470703125, -4.2861328125, -3.9315185546875, -3.576904296875, -3.2222900390625, -2.86767578125, -2.5130615234375, -2.158447265625, -1.8038330078125, -1.44921875, -1.0946044921875, -0.739990234375, -0.3853759765625, -0.03076171875, 0.3238525390625, 0.678466796875, 1.0330810546875, 1.3876953125, 1.7423095703125, 2.096923828125, 2.4515380859375, 2.80615234375, 3.1607666015625, 3.515380859375, 3.8699951171875, 4.224609375, 4.5792236328125, 4.933837890625, 5.2884521484375, 5.64306640625, 5.9976806640625, 6.352294921875, 6.7069091796875, 7.0615234375, 7.4161376953125, 7.770751953125, 8.1253662109375, 8.47998046875, 8.8345947265625, 9.189208984375, 9.5438232421875, 9.8984375]}, "gradients/decoder.transformer.h.8.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 3.0, 4.0, 3.0, 6.0, 7.0, 9.0, 13.0, 21.0, 12.0, 41.0, 36.0, 56.0, 71.0, 119.0, 194.0, 327.0, 591.0, 1308.0, 6878.0, 364313.0, 3719018.0, 95396.0, 3544.0, 967.0, 506.0, 289.0, 183.0, 109.0, 86.0, 61.0, 40.0, 25.0, 22.0, 11.0, 8.0, 7.0, 3.0, 4.0, 2.0, 2.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-36.375, -35.04541015625, -33.7158203125, -32.38623046875, -31.056640625, -29.72705078125, -28.3974609375, -27.06787109375, -25.73828125, -24.40869140625, -23.0791015625, -21.74951171875, -20.419921875, -19.09033203125, -17.7607421875, -16.43115234375, -15.1015625, -13.77197265625, -12.4423828125, -11.11279296875, -9.783203125, -8.45361328125, -7.1240234375, -5.79443359375, -4.46484375, -3.13525390625, -1.8056640625, -0.47607421875, 0.853515625, 2.18310546875, 3.5126953125, 4.84228515625, 6.171875, 7.50146484375, 8.8310546875, 10.16064453125, 11.490234375, 12.81982421875, 14.1494140625, 15.47900390625, 16.80859375, 18.13818359375, 19.4677734375, 20.79736328125, 22.126953125, 23.45654296875, 24.7861328125, 26.11572265625, 27.4453125, 28.77490234375, 30.1044921875, 31.43408203125, 32.763671875, 34.09326171875, 35.4228515625, 36.75244140625, 38.08203125, 39.41162109375, 40.7412109375, 42.07080078125, 43.400390625, 44.72998046875, 46.0595703125, 47.38916015625, 48.71875]}, "gradients/decoder.transformer.h.8.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 8.0, 28.0, 64.0, 161.0, 229.0, 246.0, 176.0, 76.0, 24.0, 3.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-143.14224243164062, -139.3675994873047, -135.5929718017578, -131.81832885742188, -128.04368591308594, -124.26905059814453, -120.49441528320312, -116.71977233886719, -112.94513702392578, -109.17050170898438, -105.39585876464844, -101.62122344970703, -97.84658813476562, -94.07194519042969, -90.29730987548828, -86.52267456054688, -82.74803161621094, -78.97339630126953, -75.1987533569336, -71.42411804199219, -67.64947509765625, -63.874839782714844, -60.10020446777344, -56.325565338134766, -52.550926208496094, -48.77628707885742, -45.00164794921875, -41.227012634277344, -37.45237350463867, -33.677734375, -29.90309715270996, -26.128459930419922, -22.35381317138672, -18.579174041748047, -14.804536819458008, -11.029898643493652, -7.255260467529297, -3.480621337890625, 0.29401588439941406, 4.068653106689453, 7.843292236328125, 11.61793041229248, 15.392568588256836, 19.167205810546875, 22.941844940185547, 26.71648406982422, 30.491121292114258, 34.2657585144043, 38.04039764404297, 41.81503677368164, 45.58967590332031, 49.36431121826172, 53.13895034790039, 56.91358947753906, 60.68822479248047, 64.46286010742188, 68.23750305175781, 72.01213836669922, 75.78678131103516, 79.56141662597656, 83.3360595703125, 87.1106948852539, 90.88533020019531, 94.65997314453125, 98.43460845947266]}, "gradients/decoder.transformer.h.8.ln_2.bias": {"_type": "histogram", "values": [2.0, 1.0, 2.0, 4.0, 4.0, 0.0, 2.0, 2.0, 5.0, 5.0, 1.0, 6.0, 12.0, 3.0, 9.0, 9.0, 20.0, 16.0, 25.0, 22.0, 24.0, 25.0, 23.0, 23.0, 31.0, 26.0, 41.0, 38.0, 33.0, 38.0, 55.0, 32.0, 29.0, 37.0, 29.0, 34.0, 42.0, 30.0, 33.0, 29.0, 16.0, 21.0, 17.0, 27.0, 21.0, 16.0, 24.0, 13.0, 7.0, 14.0, 8.0, 7.0, 8.0, 6.0, 2.0, 5.0, 1.0, 0.0, 4.0, 0.0, 0.0, 2.0, 1.0, 2.0], "bins": [-35.62378692626953, -34.5118293762207, -33.399871826171875, -32.28791427612305, -31.17595672607422, -30.06399917602539, -28.952041625976562, -27.840084075927734, -26.728126525878906, -25.616168975830078, -24.50421142578125, -23.392253875732422, -22.280296325683594, -21.168338775634766, -20.056381225585938, -18.94442367553711, -17.83246421813965, -16.72050666809082, -15.608549118041992, -14.496591567993164, -13.384634017944336, -12.272676467895508, -11.160717964172363, -10.048760414123535, -8.936802864074707, -7.824845314025879, -6.712887763977051, -5.6009297370910645, -4.488972187042236, -3.377014636993408, -2.265056610107422, -1.1530990600585938, -0.041141510009765625, 1.070816159248352, 2.1827738285064697, 3.294731616973877, 4.406689167022705, 5.518646717071533, 6.6306047439575195, 7.742562294006348, 8.854519844055176, 9.966477394104004, 11.078434944152832, 12.190393447875977, 13.302350997924805, 14.414308547973633, 15.526266098022461, 16.63822364807129, 17.750181198120117, 18.862138748168945, 19.974096298217773, 21.0860538482666, 22.19801139831543, 23.309968948364258, 24.42192840576172, 25.533885955810547, 26.645843505859375, 27.757801055908203, 28.86975860595703, 29.98171615600586, 31.093673706054688, 32.205631256103516, 33.317588806152344, 34.42954635620117, 35.54150390625]}, "gradients/decoder.transformer.h.8.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 4.0, 3.0, 2.0, 4.0, 6.0, 3.0, 6.0, 8.0, 11.0, 16.0, 20.0, 14.0, 34.0, 26.0, 30.0, 36.0, 36.0, 29.0, 50.0, 40.0, 40.0, 33.0, 35.0, 43.0, 42.0, 47.0, 40.0, 39.0, 34.0, 33.0, 30.0, 31.0, 29.0, 24.0, 26.0, 24.0, 17.0, 13.0, 10.0, 6.0, 7.0, 5.0, 4.0, 8.0, 6.0, 3.0, 2.0, 0.0, 2.0, 1.0, 0.0, 1.0, 0.0, 2.0], "bins": [-6.51953125, -6.32391357421875, -6.1282958984375, -5.93267822265625, -5.737060546875, -5.54144287109375, -5.3458251953125, -5.15020751953125, -4.95458984375, -4.75897216796875, -4.5633544921875, -4.36773681640625, -4.172119140625, -3.97650146484375, -3.7808837890625, -3.58526611328125, -3.3896484375, -3.19403076171875, -2.9984130859375, -2.80279541015625, -2.607177734375, -2.41156005859375, -2.2159423828125, -2.02032470703125, -1.82470703125, -1.62908935546875, -1.4334716796875, -1.23785400390625, -1.042236328125, -0.84661865234375, -0.6510009765625, -0.45538330078125, -0.259765625, -0.06414794921875, 0.1314697265625, 0.32708740234375, 0.522705078125, 0.71832275390625, 0.9139404296875, 1.10955810546875, 1.30517578125, 1.50079345703125, 1.6964111328125, 1.89202880859375, 2.087646484375, 2.28326416015625, 2.4788818359375, 2.67449951171875, 2.8701171875, 3.06573486328125, 3.2613525390625, 3.45697021484375, 3.652587890625, 3.84820556640625, 4.0438232421875, 4.23944091796875, 4.43505859375, 4.63067626953125, 4.8262939453125, 5.02191162109375, 5.217529296875, 5.41314697265625, 5.6087646484375, 5.80438232421875, 6.0]}, "gradients/decoder.transformer.h.8.crossattention.c_proj.weight": {"_type": "histogram", "values": [3.0, 5.0, 4.0, 9.0, 12.0, 13.0, 27.0, 40.0, 69.0, 82.0, 108.0, 189.0, 275.0, 391.0, 571.0, 801.0, 1230.0, 1773.0, 2622.0, 3738.0, 5622.0, 8186.0, 12013.0, 18296.0, 27356.0, 41023.0, 60246.0, 84446.0, 111608.0, 134231.0, 136670.0, 115003.0, 87894.0, 63067.0, 42924.0, 28928.0, 19267.0, 12804.0, 8675.0, 5809.0, 3920.0, 2694.0, 1883.0, 1292.0, 882.0, 595.0, 397.0, 274.0, 209.0, 130.0, 90.0, 69.0, 49.0, 20.0, 22.0, 2.0, 9.0, 3.0, 2.0, 1.0, 0.0, 1.0, 0.0, 2.0], "bins": [-0.63671875, -0.61553955078125, -0.5943603515625, -0.57318115234375, -0.552001953125, -0.53082275390625, -0.5096435546875, -0.48846435546875, -0.46728515625, -0.44610595703125, -0.4249267578125, -0.40374755859375, -0.382568359375, -0.36138916015625, -0.3402099609375, -0.31903076171875, -0.2978515625, -0.27667236328125, -0.2554931640625, -0.23431396484375, -0.213134765625, -0.19195556640625, -0.1707763671875, -0.14959716796875, -0.12841796875, -0.10723876953125, -0.0860595703125, -0.06488037109375, -0.043701171875, -0.02252197265625, -0.0013427734375, 0.01983642578125, 0.041015625, 0.06219482421875, 0.0833740234375, 0.10455322265625, 0.125732421875, 0.14691162109375, 0.1680908203125, 0.18927001953125, 0.21044921875, 0.23162841796875, 0.2528076171875, 0.27398681640625, 0.295166015625, 0.31634521484375, 0.3375244140625, 0.35870361328125, 0.3798828125, 0.40106201171875, 0.4222412109375, 0.44342041015625, 0.464599609375, 0.48577880859375, 0.5069580078125, 0.52813720703125, 0.54931640625, 0.57049560546875, 0.5916748046875, 0.61285400390625, 0.634033203125, 0.65521240234375, 0.6763916015625, 0.69757080078125, 0.71875]}, "gradients/decoder.transformer.h.8.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 3.0, 1.0, 6.0, 3.0, 5.0, 8.0, 9.0, 11.0, 8.0, 16.0, 23.0, 30.0, 22.0, 29.0, 37.0, 28.0, 47.0, 44.0, 28.0, 40.0, 48.0, 1070.0, 53.0, 51.0, 43.0, 45.0, 38.0, 36.0, 41.0, 28.0, 32.0, 27.0, 24.0, 22.0, 13.0, 17.0, 10.0, 10.0, 10.0, 7.0, 2.0, 4.0, 1.0, 3.0, 3.0, 2.0, 1.0, 0.0, 2.0, 1.0], "bins": [-5.59765625, -5.44146728515625, -5.2852783203125, -5.12908935546875, -4.972900390625, -4.81671142578125, -4.6605224609375, -4.50433349609375, -4.34814453125, -4.19195556640625, -4.0357666015625, -3.87957763671875, -3.723388671875, -3.56719970703125, -3.4110107421875, -3.25482177734375, -3.0986328125, -2.94244384765625, -2.7862548828125, -2.63006591796875, -2.473876953125, -2.31768798828125, -2.1614990234375, -2.00531005859375, -1.84912109375, -1.69293212890625, -1.5367431640625, -1.38055419921875, -1.224365234375, -1.06817626953125, -0.9119873046875, -0.75579833984375, -0.599609375, -0.44342041015625, -0.2872314453125, -0.13104248046875, 0.025146484375, 0.18133544921875, 0.3375244140625, 0.49371337890625, 0.64990234375, 0.80609130859375, 0.9622802734375, 1.11846923828125, 1.274658203125, 1.43084716796875, 1.5870361328125, 1.74322509765625, 1.8994140625, 2.05560302734375, 2.2117919921875, 2.36798095703125, 2.524169921875, 2.68035888671875, 2.8365478515625, 2.99273681640625, 3.14892578125, 3.30511474609375, 3.4613037109375, 3.61749267578125, 3.773681640625, 3.92987060546875, 4.0860595703125, 4.24224853515625, 4.3984375]}, "gradients/decoder.transformer.h.8.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 4.0, 5.0, 10.0, 15.0, 13.0, 28.0, 30.0, 53.0, 66.0, 114.0, 172.0, 283.0, 395.0, 751.0, 1054.0, 1726.0, 2850.0, 4682.0, 7700.0, 12658.0, 20928.0, 33698.0, 52572.0, 80754.0, 116277.0, 175381.0, 1182060.0, 134186.0, 96376.0, 64439.0, 41162.0, 25922.0, 15617.0, 9747.0, 5828.0, 3657.0, 2228.0, 1403.0, 896.0, 534.0, 330.0, 185.0, 134.0, 66.0, 62.0, 40.0, 22.0, 14.0, 8.0, 5.0, 4.0, 1.0, 1.0], "bins": [-0.611328125, -0.5949058532714844, -0.5784835815429688, -0.5620613098144531, -0.5456390380859375, -0.5292167663574219, -0.5127944946289062, -0.4963722229003906, -0.479949951171875, -0.4635276794433594, -0.44710540771484375, -0.4306831359863281, -0.4142608642578125, -0.3978385925292969, -0.38141632080078125, -0.3649940490722656, -0.34857177734375, -0.3321495056152344, -0.31572723388671875, -0.2993049621582031, -0.2828826904296875, -0.2664604187011719, -0.25003814697265625, -0.23361587524414062, -0.217193603515625, -0.20077133178710938, -0.18434906005859375, -0.16792678833007812, -0.1515045166015625, -0.13508224487304688, -0.11865997314453125, -0.10223770141601562, -0.0858154296875, -0.06939315795898438, -0.05297088623046875, -0.036548614501953125, -0.0201263427734375, -0.003704071044921875, 0.01271820068359375, 0.029140472412109375, 0.045562744140625, 0.061985015869140625, 0.07840728759765625, 0.09482955932617188, 0.1112518310546875, 0.12767410278320312, 0.14409637451171875, 0.16051864624023438, 0.17694091796875, 0.19336318969726562, 0.20978546142578125, 0.22620773315429688, 0.2426300048828125, 0.2590522766113281, 0.27547454833984375, 0.2918968200683594, 0.308319091796875, 0.3247413635253906, 0.34116363525390625, 0.3575859069824219, 0.3740081787109375, 0.3904304504394531, 0.40685272216796875, 0.4232749938964844, 0.439697265625]}, "gradients/decoder.transformer.h.8.crossattention.q_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 2.0, 6.0, 2.0, 1.0, 5.0, 3.0, 5.0, 8.0, 4.0, 6.0, 5.0, 13.0, 13.0, 10.0, 16.0, 13.0, 24.0, 27.0, 20.0, 40.0, 40.0, 52.0, 67.0, 59.0, 73.0, 64.0, 80.0, 48.0, 45.0, 34.0, 22.0, 35.0, 27.0, 16.0, 23.0, 11.0, 21.0, 12.0, 9.0, 8.0, 8.0, 8.0, 8.0, 8.0, 2.0, 2.0, 4.0, 2.0, 1.0, 0.0, 3.0, 1.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.01561737060546875, -0.015056967735290527, -0.014496564865112305, -0.013936161994934082, -0.01337575912475586, -0.012815356254577637, -0.012254953384399414, -0.011694550514221191, -0.011134147644042969, -0.010573744773864746, -0.010013341903686523, -0.0094529390335083, -0.008892536163330078, -0.008332133293151855, -0.007771730422973633, -0.00721132755279541, -0.0066509246826171875, -0.006090521812438965, -0.005530118942260742, -0.0049697160720825195, -0.004409313201904297, -0.0038489103317260742, -0.0032885074615478516, -0.002728104591369629, -0.0021677017211914062, -0.0016072988510131836, -0.001046895980834961, -0.0004864931106567383, 7.390975952148438e-05, 0.000634312629699707, 0.0011947154998779297, 0.0017551183700561523, 0.002315521240234375, 0.0028759241104125977, 0.0034363269805908203, 0.003996729850769043, 0.004557132720947266, 0.005117535591125488, 0.005677938461303711, 0.006238341331481934, 0.006798744201660156, 0.007359147071838379, 0.007919549942016602, 0.008479952812194824, 0.009040355682373047, 0.00960075855255127, 0.010161161422729492, 0.010721564292907715, 0.011281967163085938, 0.01184237003326416, 0.012402772903442383, 0.012963175773620605, 0.013523578643798828, 0.01408398151397705, 0.014644384384155273, 0.015204787254333496, 0.01576519012451172, 0.01632559299468994, 0.016885995864868164, 0.017446398735046387, 0.01800680160522461, 0.018567204475402832, 0.019127607345581055, 0.019688010215759277, 0.0202484130859375]}, "gradients/decoder.transformer.h.8.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 3.0, 4.0, 6.0, 9.0, 9.0, 8.0, 14.0, 11.0, 13.0, 24.0, 26.0, 34.0, 44.0, 54.0, 71.0, 82.0, 112.0, 174.0, 307.0, 663.0, 11323.0, 1027789.0, 6237.0, 565.0, 315.0, 165.0, 119.0, 84.0, 63.0, 43.0, 37.0, 25.0, 29.0, 19.0, 15.0, 15.0, 6.0, 6.0, 7.0, 8.0, 2.0, 5.0, 3.0, 2.0, 6.0, 2.0, 0.0, 2.0], "bins": [-0.421630859375, -0.410369873046875, -0.39910888671875, -0.387847900390625, -0.3765869140625, -0.365325927734375, -0.35406494140625, -0.342803955078125, -0.33154296875, -0.320281982421875, -0.30902099609375, -0.297760009765625, -0.2864990234375, -0.275238037109375, -0.26397705078125, -0.252716064453125, -0.241455078125, -0.230194091796875, -0.21893310546875, -0.207672119140625, -0.1964111328125, -0.185150146484375, -0.17388916015625, -0.162628173828125, -0.1513671875, -0.140106201171875, -0.12884521484375, -0.117584228515625, -0.1063232421875, -0.095062255859375, -0.08380126953125, -0.072540283203125, -0.061279296875, -0.050018310546875, -0.03875732421875, -0.027496337890625, -0.0162353515625, -0.004974365234375, 0.00628662109375, 0.017547607421875, 0.02880859375, 0.040069580078125, 0.05133056640625, 0.062591552734375, 0.0738525390625, 0.085113525390625, 0.09637451171875, 0.107635498046875, 0.118896484375, 0.130157470703125, 0.14141845703125, 0.152679443359375, 0.1639404296875, 0.175201416015625, 0.18646240234375, 0.197723388671875, 0.208984375, 0.220245361328125, 0.23150634765625, 0.242767333984375, 0.2540283203125, 0.265289306640625, 0.27655029296875, 0.287811279296875, 0.299072265625]}, "gradients/decoder.transformer.h.8.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 0.0, 11.0, 991.0, 18.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.08656884729862213, -0.07344507426023483, -0.06032129377126694, -0.04719752073287964, -0.03407374396920204, -0.020949967205524445, -0.007826194167137146, 0.0052975863218307495, 0.018421359360218048, 0.031545136123895645, 0.04466891288757324, 0.05779268592596054, 0.07091645896434784, 0.08404023945331573, 0.09716401249170303, 0.11028779298067093, 0.12341156601905823, 0.13653534650802612, 0.14965911209583282, 0.16278289258480072, 0.17590667307376862, 0.18903043866157532, 0.2021542191505432, 0.2152779996395111, 0.228401780128479, 0.2415255606174469, 0.2546493411064148, 0.2677730917930603, 0.2808968722820282, 0.2940206527709961, 0.307144433259964, 0.3202682137489319, 0.3333919942378998, 0.3465157747268677, 0.35963955521583557, 0.37276333570480347, 0.385887086391449, 0.39901086688041687, 0.41213464736938477, 0.42525842785835266, 0.43838220834732056, 0.45150598883628845, 0.46462976932525635, 0.47775352001190186, 0.49087730050086975, 0.5040010809898376, 0.5171248912811279, 0.5302486419677734, 0.543372392654419, 0.5564961433410645, 0.5696199536323547, 0.5827437043190002, 0.5958675146102905, 0.608991265296936, 0.6221150159835815, 0.6352388262748718, 0.6483626365661621, 0.6614863872528076, 0.6746101975440979, 0.6877339482307434, 0.7008577585220337, 0.7139815092086792, 0.7271052598953247, 0.740229070186615, 0.7533528208732605]}, "gradients/decoder.transformer.h.8.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 2.0, 2.0, 0.0, 2.0, 2.0, 4.0, 7.0, 12.0, 8.0, 13.0, 21.0, 28.0, 22.0, 32.0, 42.0, 43.0, 52.0, 58.0, 48.0, 61.0, 59.0, 50.0, 49.0, 56.0, 43.0, 51.0, 43.0, 28.0, 36.0, 31.0, 31.0, 15.0, 13.0, 11.0, 16.0, 11.0, 5.0, 3.0, 2.0, 2.0, 4.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.03833037614822388, -0.037341926246881485, -0.03635347634553909, -0.0353650227189064, -0.03437657281756401, -0.03338812291622162, -0.03239966928958893, -0.031411219388246536, -0.030422769486904144, -0.029434319585561752, -0.02844586782157421, -0.02745741605758667, -0.026468966156244278, -0.025480516254901886, -0.024492064490914345, -0.023503612726926804, -0.02251516282558441, -0.02152671292424202, -0.02053826116025448, -0.019549809396266937, -0.018561359494924545, -0.017572909593582153, -0.016584457829594612, -0.015596006996929646, -0.014607556164264679, -0.013619105331599712, -0.012630654498934746, -0.01164220366626978, -0.010653752833604813, -0.009665302000939846, -0.00867685116827488, -0.007688400335609913, -0.006699949502944946, -0.00571149867027998, -0.004723047837615013, -0.0037345970049500465, -0.00274614617228508, -0.0017576953396201134, -0.0007692445069551468, 0.0002192063257098198, 0.0012076571583747864, 0.002196107991039753, 0.0031845588237047195, 0.004173009656369686, 0.005161460489034653, 0.006149911321699619, 0.007138362154364586, 0.008126812987029552, 0.009115263819694519, 0.010103714652359486, 0.011092165485024452, 0.012080616317689419, 0.013069067150354385, 0.014057517983019352, 0.015045968815684319, 0.01603442057967186, 0.01702287048101425, 0.018011320382356644, 0.018999772146344185, 0.019988223910331726, 0.020976673811674118, 0.02196512371301651, 0.02295357547700405, 0.023942027240991592, 0.024930477142333984]}, "gradients/decoder.transformer.h.8.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 3.0, 4.0, 2.0, 4.0, 5.0, 4.0, 6.0, 8.0, 11.0, 16.0, 20.0, 14.0, 34.0, 25.0, 31.0, 35.0, 37.0, 28.0, 49.0, 42.0, 40.0, 33.0, 33.0, 44.0, 43.0, 47.0, 39.0, 39.0, 33.0, 35.0, 30.0, 31.0, 29.0, 24.0, 26.0, 23.0, 18.0, 13.0, 10.0, 6.0, 7.0, 5.0, 4.0, 7.0, 7.0, 3.0, 2.0, 0.0, 2.0, 1.0, 0.0, 1.0, 0.0, 2.0], "bins": [-6.5234375, -6.3277587890625, -6.132080078125, -5.9364013671875, -5.74072265625, -5.5450439453125, -5.349365234375, -5.1536865234375, -4.9580078125, -4.7623291015625, -4.566650390625, -4.3709716796875, -4.17529296875, -3.9796142578125, -3.783935546875, -3.5882568359375, -3.392578125, -3.1968994140625, -3.001220703125, -2.8055419921875, -2.60986328125, -2.4141845703125, -2.218505859375, -2.0228271484375, -1.8271484375, -1.6314697265625, -1.435791015625, -1.2401123046875, -1.04443359375, -0.8487548828125, -0.653076171875, -0.4573974609375, -0.26171875, -0.0660400390625, 0.129638671875, 0.3253173828125, 0.52099609375, 0.7166748046875, 0.912353515625, 1.1080322265625, 1.3037109375, 1.4993896484375, 1.695068359375, 1.8907470703125, 2.08642578125, 2.2821044921875, 2.477783203125, 2.6734619140625, 2.869140625, 3.0648193359375, 3.260498046875, 3.4561767578125, 3.65185546875, 3.8475341796875, 4.043212890625, 4.2388916015625, 4.4345703125, 4.6302490234375, 4.825927734375, 5.0216064453125, 5.21728515625, 5.4129638671875, 5.608642578125, 5.8043212890625, 6.0]}, "gradients/decoder.transformer.h.8.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 2.0, 4.0, 2.0, 4.0, 3.0, 7.0, 5.0, 8.0, 15.0, 13.0, 23.0, 42.0, 63.0, 88.0, 145.0, 286.0, 525.0, 961.0, 1651.0, 3260.0, 6111.0, 11118.0, 20697.0, 39857.0, 84387.0, 199238.0, 320034.0, 194535.0, 82340.0, 39026.0, 20386.0, 10873.0, 5768.0, 3213.0, 1668.0, 941.0, 505.0, 299.0, 172.0, 105.0, 55.0, 40.0, 19.0, 21.0, 11.0, 14.0, 9.0, 5.0, 4.0, 6.0, 3.0, 3.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-6.78515625, -6.58251953125, -6.3798828125, -6.17724609375, -5.974609375, -5.77197265625, -5.5693359375, -5.36669921875, -5.1640625, -4.96142578125, -4.7587890625, -4.55615234375, -4.353515625, -4.15087890625, -3.9482421875, -3.74560546875, -3.54296875, -3.34033203125, -3.1376953125, -2.93505859375, -2.732421875, -2.52978515625, -2.3271484375, -2.12451171875, -1.921875, -1.71923828125, -1.5166015625, -1.31396484375, -1.111328125, -0.90869140625, -0.7060546875, -0.50341796875, -0.30078125, -0.09814453125, 0.1044921875, 0.30712890625, 0.509765625, 0.71240234375, 0.9150390625, 1.11767578125, 1.3203125, 1.52294921875, 1.7255859375, 1.92822265625, 2.130859375, 2.33349609375, 2.5361328125, 2.73876953125, 2.94140625, 3.14404296875, 3.3466796875, 3.54931640625, 3.751953125, 3.95458984375, 4.1572265625, 4.35986328125, 4.5625, 4.76513671875, 4.9677734375, 5.17041015625, 5.373046875, 5.57568359375, 5.7783203125, 5.98095703125, 6.18359375]}, "gradients/decoder.transformer.h.8.attn.c_attn.bias": {"_type": "histogram", "values": [2.0, 3.0, 0.0, 0.0, 2.0, 5.0, 5.0, 5.0, 9.0, 9.0, 17.0, 12.0, 22.0, 13.0, 27.0, 33.0, 38.0, 41.0, 51.0, 44.0, 57.0, 111.0, 173.0, 1452.0, 359.0, 151.0, 76.0, 69.0, 34.0, 54.0, 27.0, 31.0, 27.0, 22.0, 20.0, 18.0, 12.0, 7.0, 4.0, 4.0, 9.0, 2.0, 2.0, 2.0, 2.0, 3.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-18.1875, -17.428466796875, -16.66943359375, -15.910400390625, -15.1513671875, -14.392333984375, -13.63330078125, -12.874267578125, -12.115234375, -11.356201171875, -10.59716796875, -9.838134765625, -9.0791015625, -8.320068359375, -7.56103515625, -6.802001953125, -6.04296875, -5.283935546875, -4.52490234375, -3.765869140625, -3.0068359375, -2.247802734375, -1.48876953125, -0.729736328125, 0.029296875, 0.788330078125, 1.54736328125, 2.306396484375, 3.0654296875, 3.824462890625, 4.58349609375, 5.342529296875, 6.1015625, 6.860595703125, 7.61962890625, 8.378662109375, 9.1376953125, 9.896728515625, 10.65576171875, 11.414794921875, 12.173828125, 12.932861328125, 13.69189453125, 14.450927734375, 15.2099609375, 15.968994140625, 16.72802734375, 17.487060546875, 18.24609375, 19.005126953125, 19.76416015625, 20.523193359375, 21.2822265625, 22.041259765625, 22.80029296875, 23.559326171875, 24.318359375, 25.077392578125, 25.83642578125, 26.595458984375, 27.3544921875, 28.113525390625, 28.87255859375, 29.631591796875, 30.390625]}, "gradients/decoder.transformer.h.8.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 3.0, 1.0, 2.0, 2.0, 5.0, 6.0, 6.0, 6.0, 13.0, 12.0, 12.0, 26.0, 28.0, 48.0, 73.0, 96.0, 151.0, 229.0, 396.0, 974.0, 8720.0, 1234248.0, 1887589.0, 10809.0, 1028.0, 404.0, 280.0, 147.0, 111.0, 80.0, 51.0, 40.0, 25.0, 18.0, 19.0, 12.0, 9.0, 11.0, 5.0, 3.0, 6.0, 3.0, 3.0, 1.0, 2.0, 0.0, 4.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-47.1875, -45.71435546875, -44.2412109375, -42.76806640625, -41.294921875, -39.82177734375, -38.3486328125, -36.87548828125, -35.40234375, -33.92919921875, -32.4560546875, -30.98291015625, -29.509765625, -28.03662109375, -26.5634765625, -25.09033203125, -23.6171875, -22.14404296875, -20.6708984375, -19.19775390625, -17.724609375, -16.25146484375, -14.7783203125, -13.30517578125, -11.83203125, -10.35888671875, -8.8857421875, -7.41259765625, -5.939453125, -4.46630859375, -2.9931640625, -1.52001953125, -0.046875, 1.42626953125, 2.8994140625, 4.37255859375, 5.845703125, 7.31884765625, 8.7919921875, 10.26513671875, 11.73828125, 13.21142578125, 14.6845703125, 16.15771484375, 17.630859375, 19.10400390625, 20.5771484375, 22.05029296875, 23.5234375, 24.99658203125, 26.4697265625, 27.94287109375, 29.416015625, 30.88916015625, 32.3623046875, 33.83544921875, 35.30859375, 36.78173828125, 38.2548828125, 39.72802734375, 41.201171875, 42.67431640625, 44.1474609375, 45.62060546875, 47.09375]}, "gradients/decoder.transformer.h.8.ln_1.weight": {"_type": "histogram", "values": [117.0, 825.0, 76.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-12.943163871765137, -3.9436283111572266, 5.055907249450684, 14.05544376373291, 23.054977416992188, 32.05451202392578, 41.05405044555664, 50.053585052490234, 59.05311965942383, 68.05265808105469, 77.05219268798828, 86.05172729492188, 95.05126190185547, 104.05079650878906, 113.05033874511719, 122.04986572265625, 131.04940795898438, 140.0489501953125, 149.04847717285156, 158.0480194091797, 167.04754638671875, 176.04708862304688, 185.046630859375, 194.04615783691406, 203.04568481445312, 212.04522705078125, 221.0447540283203, 230.04429626464844, 239.0438232421875, 248.04336547851562, 257.04290771484375, 266.04241943359375, 275.0419616699219, 284.04150390625, 293.0410461425781, 302.0405578613281, 311.04010009765625, 320.0396423339844, 329.0391845703125, 338.0386962890625, 347.0382385253906, 356.03778076171875, 365.0373229980469, 374.0368347167969, 383.036376953125, 392.0359191894531, 401.03546142578125, 410.03497314453125, 419.0345458984375, 428.0340881347656, 437.03363037109375, 446.03314208984375, 455.0326843261719, 464.0322265625, 473.0317687988281, 482.03131103515625, 491.03082275390625, 500.0303649902344, 509.0299072265625, 518.0294189453125, 527.0289916992188, 536.0285034179688, 545.0280151367188, 554.027587890625, 563.027099609375]}, "gradients/decoder.transformer.h.8.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 2.0, 2.0, 2.0, 2.0, 8.0, 7.0, 9.0, 6.0, 10.0, 16.0, 15.0, 28.0, 22.0, 24.0, 22.0, 29.0, 39.0, 28.0, 33.0, 34.0, 41.0, 39.0, 46.0, 42.0, 44.0, 50.0, 37.0, 39.0, 32.0, 32.0, 33.0, 34.0, 26.0, 22.0, 17.0, 23.0, 16.0, 15.0, 18.0, 14.0, 12.0, 12.0, 6.0, 5.0, 6.0, 9.0, 2.0, 1.0, 0.0, 1.0, 4.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-53.73203659057617, -51.995750427246094, -50.25946044921875, -48.52317428588867, -46.786888122558594, -45.05059814453125, -43.31431198120117, -41.578025817871094, -39.84173583984375, -38.10544967651367, -36.36915969848633, -34.63287353515625, -32.896583557128906, -31.160297393798828, -29.42401123046875, -27.68772315979004, -25.951435089111328, -24.215147018432617, -22.478858947753906, -20.742572784423828, -19.006284713745117, -17.269996643066406, -15.533709526062012, -13.797422409057617, -12.061134338378906, -10.324846267700195, -8.5885591506958, -6.852271556854248, -5.115983963012695, -3.3796958923339844, -1.6434087753295898, 0.09287834167480469, 1.8291702270507812, 3.565457820892334, 5.301745414733887, 7.0380330085754395, 8.774320602416992, 10.510608673095703, 12.246895790100098, 13.983182907104492, 15.719470977783203, 17.455759048461914, 19.192047119140625, 20.928333282470703, 22.664621353149414, 24.400909423828125, 26.137195587158203, 27.873483657836914, 29.609771728515625, 31.346059799194336, 33.08234786987305, 34.818634033203125, 36.55492401123047, 38.29121017456055, 40.027496337890625, 41.76378631591797, 43.50007247924805, 45.236358642578125, 46.97264862060547, 48.70893478393555, 50.445220947265625, 52.18151092529297, 53.91779708862305, 55.654083251953125, 57.39037322998047]}, "gradients/decoder.transformer.h.7.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 2.0, 1.0, 0.0, 1.0, 1.0, 2.0, 5.0, 2.0, 4.0, 5.0, 5.0, 6.0, 9.0, 18.0, 11.0, 10.0, 17.0, 18.0, 20.0, 27.0, 28.0, 36.0, 28.0, 43.0, 38.0, 40.0, 30.0, 37.0, 44.0, 39.0, 39.0, 41.0, 32.0, 40.0, 42.0, 31.0, 35.0, 28.0, 29.0, 19.0, 20.0, 19.0, 26.0, 17.0, 10.0, 16.0, 9.0, 7.0, 9.0, 4.0, 6.0, 1.0, 1.0, 5.0, 3.0, 0.0, 0.0, 1.0, 0.0, 4.0, 1.0], "bins": [-6.3515625, -6.15716552734375, -5.9627685546875, -5.76837158203125, -5.573974609375, -5.37957763671875, -5.1851806640625, -4.99078369140625, -4.79638671875, -4.60198974609375, -4.4075927734375, -4.21319580078125, -4.018798828125, -3.82440185546875, -3.6300048828125, -3.43560791015625, -3.2412109375, -3.04681396484375, -2.8524169921875, -2.65802001953125, -2.463623046875, -2.26922607421875, -2.0748291015625, -1.88043212890625, -1.68603515625, -1.49163818359375, -1.2972412109375, -1.10284423828125, -0.908447265625, -0.71405029296875, -0.5196533203125, -0.32525634765625, -0.130859375, 0.06353759765625, 0.2579345703125, 0.45233154296875, 0.646728515625, 0.84112548828125, 1.0355224609375, 1.22991943359375, 1.42431640625, 1.61871337890625, 1.8131103515625, 2.00750732421875, 2.201904296875, 2.39630126953125, 2.5906982421875, 2.78509521484375, 2.9794921875, 3.17388916015625, 3.3682861328125, 3.56268310546875, 3.757080078125, 3.95147705078125, 4.1458740234375, 4.34027099609375, 4.53466796875, 4.72906494140625, 4.9234619140625, 5.11785888671875, 5.312255859375, 5.50665283203125, 5.7010498046875, 5.89544677734375, 6.08984375]}, "gradients/decoder.transformer.h.7.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 4.0, 1.0, 2.0, 2.0, 4.0, 5.0, 4.0, 5.0, 4.0, 8.0, 7.0, 9.0, 20.0, 12.0, 21.0, 21.0, 29.0, 30.0, 59.0, 91.0, 168.0, 366.0, 915.0, 3072.0, 13985.0, 83005.0, 568183.0, 2215719.0, 1106301.0, 168220.0, 26234.0, 5288.0, 1412.0, 492.0, 227.0, 104.0, 59.0, 34.0, 30.0, 19.0, 23.0, 19.0, 15.0, 13.0, 9.0, 11.0, 5.0, 7.0, 4.0, 3.0, 7.0, 6.0, 2.0, 2.0, 4.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-12.2109375, -11.8255615234375, -11.440185546875, -11.0548095703125, -10.66943359375, -10.2840576171875, -9.898681640625, -9.5133056640625, -9.1279296875, -8.7425537109375, -8.357177734375, -7.9718017578125, -7.58642578125, -7.2010498046875, -6.815673828125, -6.4302978515625, -6.044921875, -5.6595458984375, -5.274169921875, -4.8887939453125, -4.50341796875, -4.1180419921875, -3.732666015625, -3.3472900390625, -2.9619140625, -2.5765380859375, -2.191162109375, -1.8057861328125, -1.42041015625, -1.0350341796875, -0.649658203125, -0.2642822265625, 0.12109375, 0.5064697265625, 0.891845703125, 1.2772216796875, 1.66259765625, 2.0479736328125, 2.433349609375, 2.8187255859375, 3.2041015625, 3.5894775390625, 3.974853515625, 4.3602294921875, 4.74560546875, 5.1309814453125, 5.516357421875, 5.9017333984375, 6.287109375, 6.6724853515625, 7.057861328125, 7.4432373046875, 7.82861328125, 8.2139892578125, 8.599365234375, 8.9847412109375, 9.3701171875, 9.7554931640625, 10.140869140625, 10.5262451171875, 10.91162109375, 11.2969970703125, 11.682373046875, 12.0677490234375, 12.453125]}, "gradients/decoder.transformer.h.7.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 5.0, 0.0, 2.0, 3.0, 5.0, 13.0, 8.0, 6.0, 21.0, 22.0, 34.0, 34.0, 37.0, 52.0, 77.0, 102.0, 108.0, 158.0, 204.0, 265.0, 315.0, 357.0, 404.0, 354.0, 312.0, 278.0, 208.0, 149.0, 137.0, 101.0, 63.0, 59.0, 42.0, 31.0, 30.0, 27.0, 20.0, 10.0, 3.0, 5.0, 8.0, 5.0, 6.0, 4.0, 0.0, 1.0, 2.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-8.6015625, -8.3101806640625, -8.018798828125, -7.7274169921875, -7.43603515625, -7.1446533203125, -6.853271484375, -6.5618896484375, -6.2705078125, -5.9791259765625, -5.687744140625, -5.3963623046875, -5.10498046875, -4.8135986328125, -4.522216796875, -4.2308349609375, -3.939453125, -3.6480712890625, -3.356689453125, -3.0653076171875, -2.77392578125, -2.4825439453125, -2.191162109375, -1.8997802734375, -1.6083984375, -1.3170166015625, -1.025634765625, -0.7342529296875, -0.44287109375, -0.1514892578125, 0.139892578125, 0.4312744140625, 0.72265625, 1.0140380859375, 1.305419921875, 1.5968017578125, 1.88818359375, 2.1795654296875, 2.470947265625, 2.7623291015625, 3.0537109375, 3.3450927734375, 3.636474609375, 3.9278564453125, 4.21923828125, 4.5106201171875, 4.802001953125, 5.0933837890625, 5.384765625, 5.6761474609375, 5.967529296875, 6.2589111328125, 6.55029296875, 6.8416748046875, 7.133056640625, 7.4244384765625, 7.7158203125, 8.0072021484375, 8.298583984375, 8.5899658203125, 8.88134765625, 9.1727294921875, 9.464111328125, 9.7554931640625, 10.046875]}, "gradients/decoder.transformer.h.7.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 3.0, 0.0, 3.0, 2.0, 5.0, 7.0, 6.0, 8.0, 12.0, 19.0, 23.0, 34.0, 45.0, 56.0, 66.0, 97.0, 131.0, 195.0, 256.0, 418.0, 932.0, 4918.0, 77169.0, 2333270.0, 1715316.0, 55251.0, 3887.0, 807.0, 382.0, 267.0, 166.0, 125.0, 104.0, 84.0, 62.0, 44.0, 27.0, 21.0, 23.0, 16.0, 9.0, 5.0, 10.0, 7.0, 4.0, 1.0, 3.0, 2.0, 0.0, 0.0, 1.0], "bins": [-36.9375, -35.937744140625, -34.93798828125, -33.938232421875, -32.9384765625, -31.938720703125, -30.93896484375, -29.939208984375, -28.939453125, -27.939697265625, -26.93994140625, -25.940185546875, -24.9404296875, -23.940673828125, -22.94091796875, -21.941162109375, -20.94140625, -19.941650390625, -18.94189453125, -17.942138671875, -16.9423828125, -15.942626953125, -14.94287109375, -13.943115234375, -12.943359375, -11.943603515625, -10.94384765625, -9.944091796875, -8.9443359375, -7.944580078125, -6.94482421875, -5.945068359375, -4.9453125, -3.945556640625, -2.94580078125, -1.946044921875, -0.9462890625, 0.053466796875, 1.05322265625, 2.052978515625, 3.052734375, 4.052490234375, 5.05224609375, 6.052001953125, 7.0517578125, 8.051513671875, 9.05126953125, 10.051025390625, 11.05078125, 12.050537109375, 13.05029296875, 14.050048828125, 15.0498046875, 16.049560546875, 17.04931640625, 18.049072265625, 19.048828125, 20.048583984375, 21.04833984375, 22.048095703125, 23.0478515625, 24.047607421875, 25.04736328125, 26.047119140625, 27.046875]}, "gradients/decoder.transformer.h.7.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 22.0, 196.0, 448.0, 296.0, 49.0, 5.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-219.59033203125, -212.85984802246094, -206.1293487548828, -199.39886474609375, -192.66836547851562, -185.93788146972656, -179.2073974609375, -172.47689819335938, -165.74639892578125, -159.0159149169922, -152.28541564941406, -145.554931640625, -138.82443237304688, -132.0939483642578, -125.36345672607422, -118.63296508789062, -111.90248107910156, -105.17198944091797, -98.44149780273438, -91.71101379394531, -84.98051452636719, -78.25003051757812, -71.51953887939453, -64.78904724121094, -58.058555603027344, -51.32806396484375, -44.597572326660156, -37.86708450317383, -31.136592864990234, -24.40610122680664, -17.675613403320312, -10.945121765136719, -4.2146148681640625, 2.515875816345215, 9.246366500854492, 15.976856231689453, 22.707347869873047, 29.43783950805664, 36.16832733154297, 42.89881896972656, 49.629310607910156, 56.35980224609375, 63.090293884277344, 69.82078552246094, 76.55126953125, 83.28176879882812, 90.01225280761719, 96.74274444580078, 103.47323608398438, 110.20372772216797, 116.93421936035156, 123.66470336914062, 130.39520263671875, 137.1256866455078, 143.85617065429688, 150.586669921875, 157.31716918945312, 164.0476531982422, 170.7781524658203, 177.50863647460938, 184.2391357421875, 190.96961975097656, 197.70010375976562, 204.43060302734375, 211.1610870361328]}, "gradients/decoder.transformer.h.7.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 2.0, 3.0, 0.0, 5.0, 3.0, 6.0, 7.0, 3.0, 13.0, 11.0, 10.0, 25.0, 15.0, 22.0, 16.0, 18.0, 26.0, 29.0, 35.0, 30.0, 36.0, 49.0, 40.0, 43.0, 35.0, 43.0, 36.0, 30.0, 29.0, 35.0, 36.0, 41.0, 46.0, 39.0, 21.0, 26.0, 19.0, 28.0, 22.0, 17.0, 18.0, 9.0, 6.0, 10.0, 3.0, 5.0, 3.0, 4.0, 3.0, 0.0, 1.0, 1.0, 0.0, 3.0, 1.0, 2.0, 1.0], "bins": [-35.08148956298828, -33.992820739746094, -32.904151916503906, -31.81548500061035, -30.726818084716797, -29.63814926147461, -28.549480438232422, -27.460813522338867, -26.372146606445312, -25.283477783203125, -24.19481086730957, -23.106142044067383, -22.017475128173828, -20.92880630493164, -19.840137481689453, -18.7514705657959, -17.66280174255371, -16.574132919311523, -15.485466003417969, -14.396797180175781, -13.308130264282227, -12.219461441040039, -11.130793571472168, -10.042125701904297, -8.953457832336426, -7.864789962768555, -6.776122093200684, -5.687453746795654, -4.598785877227783, -3.510118007659912, -2.421449661254883, -1.3327817916870117, -0.24411392211914062, 0.84455406665802, 1.9332220554351807, 3.021890163421631, 4.110558032989502, 5.199225902557373, 6.287894248962402, 7.376562118530273, 8.465229988098145, 9.553897857666016, 10.642565727233887, 11.731233596801758, 12.819902420043945, 13.9085693359375, 14.997238159179688, 16.085906982421875, 17.17457389831543, 18.263242721557617, 19.351909637451172, 20.44057846069336, 21.529245376586914, 22.6179141998291, 23.706581115722656, 24.795249938964844, 25.88391876220703, 26.97258758544922, 28.061254501342773, 29.14992332458496, 30.238590240478516, 31.327259063720703, 32.41592788696289, 33.50459289550781, 34.59326171875]}, "gradients/decoder.transformer.h.7.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 0.0, 1.0, 3.0, 3.0, 1.0, 3.0, 4.0, 2.0, 3.0, 8.0, 25.0, 13.0, 11.0, 21.0, 14.0, 26.0, 26.0, 22.0, 19.0, 47.0, 31.0, 30.0, 33.0, 47.0, 39.0, 31.0, 44.0, 53.0, 37.0, 46.0, 40.0, 33.0, 34.0, 30.0, 30.0, 28.0, 21.0, 18.0, 18.0, 21.0, 14.0, 13.0, 20.0, 9.0, 10.0, 3.0, 6.0, 4.0, 9.0, 1.0, 4.0, 1.0, 2.0, 2.0, 3.0, 0.0, 1.0], "bins": [-6.45703125, -6.2664794921875, -6.075927734375, -5.8853759765625, -5.69482421875, -5.5042724609375, -5.313720703125, -5.1231689453125, -4.9326171875, -4.7420654296875, -4.551513671875, -4.3609619140625, -4.17041015625, -3.9798583984375, -3.789306640625, -3.5987548828125, -3.408203125, -3.2176513671875, -3.027099609375, -2.8365478515625, -2.64599609375, -2.4554443359375, -2.264892578125, -2.0743408203125, -1.8837890625, -1.6932373046875, -1.502685546875, -1.3121337890625, -1.12158203125, -0.9310302734375, -0.740478515625, -0.5499267578125, -0.359375, -0.1688232421875, 0.021728515625, 0.2122802734375, 0.40283203125, 0.5933837890625, 0.783935546875, 0.9744873046875, 1.1650390625, 1.3555908203125, 1.546142578125, 1.7366943359375, 1.92724609375, 2.1177978515625, 2.308349609375, 2.4989013671875, 2.689453125, 2.8800048828125, 3.070556640625, 3.2611083984375, 3.45166015625, 3.6422119140625, 3.832763671875, 4.0233154296875, 4.2138671875, 4.4044189453125, 4.594970703125, 4.7855224609375, 4.97607421875, 5.1666259765625, 5.357177734375, 5.5477294921875, 5.73828125]}, "gradients/decoder.transformer.h.7.crossattention.c_proj.weight": {"_type": "histogram", "values": [2.0, 2.0, 0.0, 4.0, 3.0, 4.0, 10.0, 15.0, 17.0, 24.0, 45.0, 60.0, 114.0, 166.0, 218.0, 352.0, 545.0, 841.0, 1181.0, 1817.0, 2808.0, 4172.0, 6139.0, 9230.0, 14070.0, 21483.0, 32464.0, 49304.0, 72841.0, 103788.0, 137073.0, 150790.0, 132778.0, 99880.0, 69216.0, 46506.0, 30704.0, 20140.0, 13417.0, 8944.0, 5871.0, 3968.0, 2489.0, 1732.0, 1126.0, 792.0, 441.0, 330.0, 216.0, 155.0, 80.0, 69.0, 48.0, 29.0, 21.0, 14.0, 9.0, 6.0, 6.0, 3.0, 0.0, 1.0, 1.0, 2.0], "bins": [-0.7177734375, -0.6949386596679688, -0.6721038818359375, -0.6492691040039062, -0.626434326171875, -0.6035995483398438, -0.5807647705078125, -0.5579299926757812, -0.53509521484375, -0.5122604370117188, -0.4894256591796875, -0.46659088134765625, -0.443756103515625, -0.42092132568359375, -0.3980865478515625, -0.37525177001953125, -0.3524169921875, -0.32958221435546875, -0.3067474365234375, -0.28391265869140625, -0.261077880859375, -0.23824310302734375, -0.2154083251953125, -0.19257354736328125, -0.16973876953125, -0.14690399169921875, -0.1240692138671875, -0.10123443603515625, -0.078399658203125, -0.05556488037109375, -0.0327301025390625, -0.00989532470703125, 0.012939453125, 0.03577423095703125, 0.0586090087890625, 0.08144378662109375, 0.104278564453125, 0.12711334228515625, 0.1499481201171875, 0.17278289794921875, 0.19561767578125, 0.21845245361328125, 0.2412872314453125, 0.26412200927734375, 0.286956787109375, 0.30979156494140625, 0.3326263427734375, 0.35546112060546875, 0.3782958984375, 0.40113067626953125, 0.4239654541015625, 0.44680023193359375, 0.469635009765625, 0.49246978759765625, 0.5153045654296875, 0.5381393432617188, 0.56097412109375, 0.5838088989257812, 0.6066436767578125, 0.6294784545898438, 0.652313232421875, 0.6751480102539062, 0.6979827880859375, 0.7208175659179688, 0.74365234375]}, "gradients/decoder.transformer.h.7.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 4.0, 1.0, 8.0, 3.0, 5.0, 9.0, 10.0, 4.0, 16.0, 17.0, 24.0, 19.0, 22.0, 27.0, 26.0, 42.0, 44.0, 40.0, 42.0, 49.0, 46.0, 51.0, 1088.0, 47.0, 56.0, 37.0, 47.0, 23.0, 28.0, 33.0, 33.0, 24.0, 20.0, 25.0, 14.0, 16.0, 3.0, 8.0, 10.0, 7.0, 3.0, 3.0, 1.0, 1.0, 0.0, 1.0, 0.0, 3.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-4.91796875, -4.76983642578125, -4.6217041015625, -4.47357177734375, -4.325439453125, -4.17730712890625, -4.0291748046875, -3.88104248046875, -3.73291015625, -3.58477783203125, -3.4366455078125, -3.28851318359375, -3.140380859375, -2.99224853515625, -2.8441162109375, -2.69598388671875, -2.5478515625, -2.39971923828125, -2.2515869140625, -2.10345458984375, -1.955322265625, -1.80718994140625, -1.6590576171875, -1.51092529296875, -1.36279296875, -1.21466064453125, -1.0665283203125, -0.91839599609375, -0.770263671875, -0.62213134765625, -0.4739990234375, -0.32586669921875, -0.177734375, -0.02960205078125, 0.1185302734375, 0.26666259765625, 0.414794921875, 0.56292724609375, 0.7110595703125, 0.85919189453125, 1.00732421875, 1.15545654296875, 1.3035888671875, 1.45172119140625, 1.599853515625, 1.74798583984375, 1.8961181640625, 2.04425048828125, 2.1923828125, 2.34051513671875, 2.4886474609375, 2.63677978515625, 2.784912109375, 2.93304443359375, 3.0811767578125, 3.22930908203125, 3.37744140625, 3.52557373046875, 3.6737060546875, 3.82183837890625, 3.969970703125, 4.11810302734375, 4.2662353515625, 4.41436767578125, 4.5625]}, "gradients/decoder.transformer.h.7.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 3.0, 0.0, 2.0, 2.0, 3.0, 3.0, 6.0, 7.0, 15.0, 17.0, 28.0, 40.0, 66.0, 76.0, 117.0, 203.0, 373.0, 517.0, 838.0, 1446.0, 2378.0, 4080.0, 6723.0, 11375.0, 19488.0, 33038.0, 54983.0, 87873.0, 131004.0, 424500.0, 957678.0, 131831.0, 89594.0, 56198.0, 33639.0, 19938.0, 11695.0, 7136.0, 4064.0, 2421.0, 1434.0, 864.0, 524.0, 343.0, 208.0, 128.0, 82.0, 60.0, 47.0, 17.0, 11.0, 10.0, 9.0, 2.0, 4.0, 2.0, 4.0, 2.0, 0.0, 2.0], "bins": [-0.59521484375, -0.5777206420898438, -0.5602264404296875, -0.5427322387695312, -0.525238037109375, -0.5077438354492188, -0.4902496337890625, -0.47275543212890625, -0.45526123046875, -0.43776702880859375, -0.4202728271484375, -0.40277862548828125, -0.385284423828125, -0.36779022216796875, -0.3502960205078125, -0.33280181884765625, -0.3153076171875, -0.29781341552734375, -0.2803192138671875, -0.26282501220703125, -0.245330810546875, -0.22783660888671875, -0.2103424072265625, -0.19284820556640625, -0.17535400390625, -0.15785980224609375, -0.1403656005859375, -0.12287139892578125, -0.105377197265625, -0.08788299560546875, -0.0703887939453125, -0.05289459228515625, -0.035400390625, -0.01790618896484375, -0.0004119873046875, 0.01708221435546875, 0.034576416015625, 0.05207061767578125, 0.0695648193359375, 0.08705902099609375, 0.10455322265625, 0.12204742431640625, 0.1395416259765625, 0.15703582763671875, 0.174530029296875, 0.19202423095703125, 0.2095184326171875, 0.22701263427734375, 0.2445068359375, 0.26200103759765625, 0.2794952392578125, 0.29698944091796875, 0.314483642578125, 0.33197784423828125, 0.3494720458984375, 0.36696624755859375, 0.38446044921875, 0.40195465087890625, 0.4194488525390625, 0.43694305419921875, 0.454437255859375, 0.47193145751953125, 0.4894256591796875, 0.5069198608398438, 0.5244140625]}, "gradients/decoder.transformer.h.7.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 3.0, 0.0, 1.0, 0.0, 3.0, 3.0, 4.0, 2.0, 3.0, 4.0, 7.0, 6.0, 4.0, 2.0, 4.0, 8.0, 12.0, 15.0, 15.0, 17.0, 23.0, 18.0, 19.0, 30.0, 41.0, 23.0, 38.0, 46.0, 73.0, 77.0, 71.0, 72.0, 48.0, 53.0, 33.0, 29.0, 32.0, 23.0, 23.0, 20.0, 13.0, 15.0, 16.0, 10.0, 11.0, 9.0, 6.0, 9.0, 6.0, 7.0, 4.0, 4.0, 2.0, 1.0, 1.0, 0.0, 2.0], "bins": [-0.0120086669921875, -0.011686563491821289, -0.011364459991455078, -0.011042356491088867, -0.010720252990722656, -0.010398149490356445, -0.010076045989990234, -0.009753942489624023, -0.009431838989257812, -0.009109735488891602, -0.00878763198852539, -0.00846552848815918, -0.008143424987792969, -0.007821321487426758, -0.007499217987060547, -0.007177114486694336, -0.006855010986328125, -0.006532907485961914, -0.006210803985595703, -0.005888700485229492, -0.005566596984863281, -0.00524449348449707, -0.004922389984130859, -0.0046002864837646484, -0.0042781829833984375, -0.0039560794830322266, -0.0036339759826660156, -0.0033118724822998047, -0.0029897689819335938, -0.002667665481567383, -0.002345561981201172, -0.002023458480834961, -0.00170135498046875, -0.001379251480102539, -0.0010571479797363281, -0.0007350444793701172, -0.00041294097900390625, -9.083747863769531e-05, 0.00023126602172851562, 0.0005533695220947266, 0.0008754730224609375, 0.0011975765228271484, 0.0015196800231933594, 0.0018417835235595703, 0.0021638870239257812, 0.002485990524291992, 0.002808094024658203, 0.003130197525024414, 0.003452301025390625, 0.003774404525756836, 0.004096508026123047, 0.004418611526489258, 0.004740715026855469, 0.00506281852722168, 0.005384922027587891, 0.0057070255279541016, 0.0060291290283203125, 0.0063512325286865234, 0.006673336029052734, 0.006995439529418945, 0.007317543029785156, 0.007639646530151367, 0.007961750030517578, 0.008283853530883789, 0.00860595703125]}, "gradients/decoder.transformer.h.7.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 2.0, 5.0, 6.0, 5.0, 5.0, 9.0, 10.0, 14.0, 17.0, 16.0, 25.0, 31.0, 45.0, 52.0, 85.0, 88.0, 170.0, 269.0, 510.0, 4242.0, 1028796.0, 12550.0, 666.0, 291.0, 181.0, 127.0, 82.0, 50.0, 39.0, 42.0, 22.0, 32.0, 20.0, 8.0, 10.0, 6.0, 6.0, 5.0, 8.0, 4.0, 2.0, 1.0, 1.0, 4.0, 5.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.20263671875, -0.19501876831054688, -0.18740081787109375, -0.17978286743164062, -0.1721649169921875, -0.16454696655273438, -0.15692901611328125, -0.14931106567382812, -0.141693115234375, -0.13407516479492188, -0.12645721435546875, -0.11883926391601562, -0.1112213134765625, -0.10360336303710938, -0.09598541259765625, -0.08836746215820312, -0.08074951171875, -0.07313156127929688, -0.06551361083984375, -0.057895660400390625, -0.0502777099609375, -0.042659759521484375, -0.03504180908203125, -0.027423858642578125, -0.019805908203125, -0.012187957763671875, -0.00457000732421875, 0.003047943115234375, 0.0106658935546875, 0.018283843994140625, 0.02590179443359375, 0.033519744873046875, 0.0411376953125, 0.048755645751953125, 0.05637359619140625, 0.06399154663085938, 0.0716094970703125, 0.07922744750976562, 0.08684539794921875, 0.09446334838867188, 0.102081298828125, 0.10969924926757812, 0.11731719970703125, 0.12493515014648438, 0.1325531005859375, 0.14017105102539062, 0.14778900146484375, 0.15540695190429688, 0.16302490234375, 0.17064285278320312, 0.17826080322265625, 0.18587875366210938, 0.1934967041015625, 0.20111465454101562, 0.20873260498046875, 0.21635055541992188, 0.223968505859375, 0.23158645629882812, 0.23920440673828125, 0.24682235717773438, 0.2544403076171875, 0.2620582580566406, 0.26967620849609375, 0.2772941589355469, 0.284912109375]}, "gradients/decoder.transformer.h.7.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 153.0, 865.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.09518353641033173, -0.08733966946601868, -0.07949580997228622, -0.07165194302797318, -0.06380808353424072, -0.05596421658992767, -0.048120349645614624, -0.04027648642659187, -0.03243262320756912, -0.02458875998854637, -0.01674489490687847, -0.008901029825210571, -0.0010571666061878204, 0.00678669661283493, 0.01463056355714798, 0.02247442677617073, 0.03031828999519348, 0.03816215321421623, 0.04600601643323898, 0.05384988337755203, 0.06169374659657478, 0.06953760981559753, 0.07738147675991058, 0.08522534370422363, 0.09306920319795609, 0.10091307014226913, 0.10875692963600159, 0.11660079658031464, 0.12444466352462769, 0.13228851556777954, 0.14013239741325378, 0.14797624945640564, 0.1558201014995575, 0.16366396844387054, 0.1715078353881836, 0.17935168743133545, 0.1871955543756485, 0.19503942131996155, 0.2028832882642746, 0.21072715520858765, 0.2185710072517395, 0.22641487419605255, 0.2342587411403656, 0.24210259318351746, 0.2499464601278305, 0.25779032707214355, 0.2656341791152954, 0.27347806096076965, 0.2813219428062439, 0.28916579484939575, 0.29700967669487, 0.30485352873802185, 0.3126974105834961, 0.32054126262664795, 0.3283851146697998, 0.33622899651527405, 0.3440728485584259, 0.35191670060157776, 0.359760582447052, 0.36760443449020386, 0.3754483163356781, 0.38329216837882996, 0.3911360502243042, 0.39897990226745605, 0.4068237543106079]}, "gradients/decoder.transformer.h.7.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 2.0, 1.0, 7.0, 15.0, 9.0, 17.0, 16.0, 26.0, 20.0, 31.0, 51.0, 39.0, 41.0, 44.0, 69.0, 55.0, 60.0, 60.0, 65.0, 35.0, 60.0, 62.0, 41.0, 38.0, 26.0, 28.0, 21.0, 14.0, 22.0, 13.0, 10.0, 5.0, 4.0, 6.0, 2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.019495010375976562, -0.01893671602010727, -0.018378421664237976, -0.017820127308368683, -0.01726183295249939, -0.016703538596630096, -0.016145244240760803, -0.01558694988489151, -0.015028655529022217, -0.014470361173152924, -0.01391206681728363, -0.013353772461414337, -0.012795478105545044, -0.01223718374967575, -0.011678889393806458, -0.011120595037937164, -0.010562300682067871, -0.010004006326198578, -0.009445711970329285, -0.008887417614459991, -0.008329123258590698, -0.007770828902721405, -0.007212534546852112, -0.006654240190982819, -0.006095945835113525, -0.005537651479244232, -0.004979357123374939, -0.004421062767505646, -0.0038627684116363525, -0.0033044740557670593, -0.002746179699897766, -0.002187885344028473, -0.0016295909881591797, -0.0010712966322898865, -0.0005130022764205933, 4.529207944869995e-05, 0.0006035864353179932, 0.0011618807911872864, 0.0017201751470565796, 0.002278469502925873, 0.002836763858795166, 0.0033950582146644592, 0.0039533525705337524, 0.004511646926403046, 0.005069941282272339, 0.005628235638141632, 0.006186529994010925, 0.0067448243498802185, 0.007303118705749512, 0.007861413061618805, 0.008419707417488098, 0.008978001773357391, 0.009536296129226685, 0.010094590485095978, 0.010652884840965271, 0.011211179196834564, 0.011769473552703857, 0.01232776790857315, 0.012886062264442444, 0.013444356620311737, 0.01400265097618103, 0.014560945332050323, 0.015119239687919617, 0.01567753404378891, 0.016235828399658203]}, "gradients/decoder.transformer.h.7.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 0.0, 1.0, 3.0, 3.0, 1.0, 3.0, 4.0, 2.0, 3.0, 8.0, 25.0, 13.0, 11.0, 21.0, 14.0, 25.0, 27.0, 22.0, 19.0, 48.0, 30.0, 30.0, 33.0, 47.0, 39.0, 31.0, 44.0, 53.0, 37.0, 46.0, 40.0, 33.0, 34.0, 30.0, 30.0, 28.0, 21.0, 18.0, 18.0, 21.0, 14.0, 13.0, 20.0, 9.0, 10.0, 3.0, 6.0, 4.0, 9.0, 1.0, 4.0, 1.0, 2.0, 2.0, 3.0, 0.0, 1.0], "bins": [-6.45703125, -6.2664794921875, -6.075927734375, -5.8853759765625, -5.69482421875, -5.5042724609375, -5.313720703125, -5.1231689453125, -4.9326171875, -4.7420654296875, -4.551513671875, -4.3609619140625, -4.17041015625, -3.9798583984375, -3.789306640625, -3.5987548828125, -3.408203125, -3.2176513671875, -3.027099609375, -2.8365478515625, -2.64599609375, -2.4554443359375, -2.264892578125, -2.0743408203125, -1.8837890625, -1.6932373046875, -1.502685546875, -1.3121337890625, -1.12158203125, -0.9310302734375, -0.740478515625, -0.5499267578125, -0.359375, -0.1688232421875, 0.021728515625, 0.2122802734375, 0.40283203125, 0.5933837890625, 0.783935546875, 0.9744873046875, 1.1650390625, 1.3555908203125, 1.546142578125, 1.7366943359375, 1.92724609375, 2.1177978515625, 2.308349609375, 2.4989013671875, 2.689453125, 2.8800048828125, 3.070556640625, 3.2611083984375, 3.45166015625, 3.6422119140625, 3.832763671875, 4.0233154296875, 4.2138671875, 4.4044189453125, 4.594970703125, 4.7855224609375, 4.97607421875, 5.1666259765625, 5.357177734375, 5.5477294921875, 5.73828125]}, "gradients/decoder.transformer.h.7.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 3.0, 1.0, 1.0, 2.0, 2.0, 1.0, 4.0, 3.0, 3.0, 7.0, 9.0, 18.0, 23.0, 24.0, 28.0, 31.0, 34.0, 69.0, 74.0, 103.0, 156.0, 203.0, 359.0, 673.0, 1693.0, 4465.0, 12782.0, 42175.0, 197723.0, 562156.0, 169808.0, 37061.0, 11640.0, 3910.0, 1562.0, 679.0, 323.0, 203.0, 123.0, 113.0, 70.0, 67.0, 32.0, 26.0, 27.0, 17.0, 17.0, 21.0, 16.0, 7.0, 5.0, 8.0, 5.0, 1.0, 1.0, 4.0, 2.0, 0.0, 1.0, 1.0], "bins": [-11.953125, -11.5958251953125, -11.238525390625, -10.8812255859375, -10.52392578125, -10.1666259765625, -9.809326171875, -9.4520263671875, -9.0947265625, -8.7374267578125, -8.380126953125, -8.0228271484375, -7.66552734375, -7.3082275390625, -6.950927734375, -6.5936279296875, -6.236328125, -5.8790283203125, -5.521728515625, -5.1644287109375, -4.80712890625, -4.4498291015625, -4.092529296875, -3.7352294921875, -3.3779296875, -3.0206298828125, -2.663330078125, -2.3060302734375, -1.94873046875, -1.5914306640625, -1.234130859375, -0.8768310546875, -0.51953125, -0.1622314453125, 0.195068359375, 0.5523681640625, 0.90966796875, 1.2669677734375, 1.624267578125, 1.9815673828125, 2.3388671875, 2.6961669921875, 3.053466796875, 3.4107666015625, 3.76806640625, 4.1253662109375, 4.482666015625, 4.8399658203125, 5.197265625, 5.5545654296875, 5.911865234375, 6.2691650390625, 6.62646484375, 6.9837646484375, 7.341064453125, 7.6983642578125, 8.0556640625, 8.4129638671875, 8.770263671875, 9.1275634765625, 9.48486328125, 9.8421630859375, 10.199462890625, 10.5567626953125, 10.9140625]}, "gradients/decoder.transformer.h.7.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 1.0, 1.0, 1.0, 3.0, 6.0, 4.0, 8.0, 8.0, 16.0, 14.0, 17.0, 19.0, 22.0, 26.0, 28.0, 29.0, 29.0, 45.0, 48.0, 51.0, 59.0, 118.0, 253.0, 1444.0, 269.0, 124.0, 56.0, 52.0, 46.0, 36.0, 36.0, 32.0, 27.0, 28.0, 24.0, 19.0, 8.0, 13.0, 8.0, 9.0, 6.0, 8.0, 5.0, 4.0, 1.0, 1.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-21.5, -20.860595703125, -20.22119140625, -19.581787109375, -18.9423828125, -18.302978515625, -17.66357421875, -17.024169921875, -16.384765625, -15.745361328125, -15.10595703125, -14.466552734375, -13.8271484375, -13.187744140625, -12.54833984375, -11.908935546875, -11.26953125, -10.630126953125, -9.99072265625, -9.351318359375, -8.7119140625, -8.072509765625, -7.43310546875, -6.793701171875, -6.154296875, -5.514892578125, -4.87548828125, -4.236083984375, -3.5966796875, -2.957275390625, -2.31787109375, -1.678466796875, -1.0390625, -0.399658203125, 0.23974609375, 0.879150390625, 1.5185546875, 2.157958984375, 2.79736328125, 3.436767578125, 4.076171875, 4.715576171875, 5.35498046875, 5.994384765625, 6.6337890625, 7.273193359375, 7.91259765625, 8.552001953125, 9.19140625, 9.830810546875, 10.47021484375, 11.109619140625, 11.7490234375, 12.388427734375, 13.02783203125, 13.667236328125, 14.306640625, 14.946044921875, 15.58544921875, 16.224853515625, 16.8642578125, 17.503662109375, 18.14306640625, 18.782470703125, 19.421875]}, "gradients/decoder.transformer.h.7.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 2.0, 2.0, 0.0, 4.0, 2.0, 3.0, 4.0, 5.0, 9.0, 9.0, 11.0, 15.0, 16.0, 19.0, 22.0, 25.0, 40.0, 58.0, 53.0, 95.0, 139.0, 230.0, 423.0, 1332.0, 12472.0, 1972207.0, 1146177.0, 9972.0, 1169.0, 422.0, 241.0, 134.0, 79.0, 69.0, 44.0, 49.0, 29.0, 31.0, 20.0, 18.0, 18.0, 15.0, 12.0, 8.0, 7.0, 2.0, 2.0, 1.0, 4.0, 0.0, 3.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-42.4375, -41.02099609375, -39.6044921875, -38.18798828125, -36.771484375, -35.35498046875, -33.9384765625, -32.52197265625, -31.10546875, -29.68896484375, -28.2724609375, -26.85595703125, -25.439453125, -24.02294921875, -22.6064453125, -21.18994140625, -19.7734375, -18.35693359375, -16.9404296875, -15.52392578125, -14.107421875, -12.69091796875, -11.2744140625, -9.85791015625, -8.44140625, -7.02490234375, -5.6083984375, -4.19189453125, -2.775390625, -1.35888671875, 0.0576171875, 1.47412109375, 2.890625, 4.30712890625, 5.7236328125, 7.14013671875, 8.556640625, 9.97314453125, 11.3896484375, 12.80615234375, 14.22265625, 15.63916015625, 17.0556640625, 18.47216796875, 19.888671875, 21.30517578125, 22.7216796875, 24.13818359375, 25.5546875, 26.97119140625, 28.3876953125, 29.80419921875, 31.220703125, 32.63720703125, 34.0537109375, 35.47021484375, 36.88671875, 38.30322265625, 39.7197265625, 41.13623046875, 42.552734375, 43.96923828125, 45.3857421875, 46.80224609375, 48.21875]}, "gradients/decoder.transformer.h.7.ln_1.weight": {"_type": "histogram", "values": [14.0, 203.0, 575.0, 192.0, 33.0, 3.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-12.258474349975586, -7.544036388397217, -2.8295984268188477, 1.8848400115966797, 6.599277496337891, 11.313714981079102, 16.028154373168945, 20.742589950561523, 25.457029342651367, 30.171466827392578, 34.88590621948242, 39.600341796875, 44.314781188964844, 49.02922058105469, 53.74365997314453, 58.458091735839844, 63.17253112792969, 67.88697052001953, 72.60140991210938, 77.31584167480469, 82.03028106689453, 86.74472045898438, 91.45915985107422, 96.17359924316406, 100.8880386352539, 105.60247802734375, 110.3169174194336, 115.03135681152344, 119.74578857421875, 124.4602279663086, 129.17466735839844, 133.88909912109375, 138.60353088378906, 143.31796264648438, 148.03240966796875, 152.74684143066406, 157.46128845214844, 162.17572021484375, 166.89016723632812, 171.60459899902344, 176.31903076171875, 181.03346252441406, 185.74790954589844, 190.46234130859375, 195.17678833007812, 199.89122009277344, 204.60565185546875, 209.32009887695312, 214.0345458984375, 218.7489776611328, 223.4634246826172, 228.1778564453125, 232.89230346679688, 237.6067352294922, 242.3211669921875, 247.03561401367188, 251.7500457763672, 256.4644775390625, 261.1789245605469, 265.89337158203125, 270.6077880859375, 275.3222351074219, 280.03668212890625, 284.7510986328125, 289.4655456542969]}, "gradients/decoder.transformer.h.7.ln_1.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 1.0, 4.0, 4.0, 5.0, 3.0, 3.0, 10.0, 6.0, 18.0, 17.0, 20.0, 17.0, 24.0, 33.0, 18.0, 33.0, 35.0, 31.0, 41.0, 44.0, 37.0, 37.0, 42.0, 40.0, 41.0, 33.0, 39.0, 34.0, 26.0, 37.0, 24.0, 31.0, 33.0, 30.0, 29.0, 25.0, 16.0, 17.0, 13.0, 14.0, 10.0, 5.0, 9.0, 7.0, 5.0, 6.0, 3.0, 4.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-52.386573791503906, -50.8758430480957, -49.3651123046875, -47.85438537597656, -46.34365463256836, -44.832923889160156, -43.32219696044922, -41.811466217041016, -40.30073547363281, -38.79000473022461, -37.279273986816406, -35.76854705810547, -34.257816314697266, -32.74708557128906, -31.236356735229492, -29.725627899169922, -28.21489715576172, -26.704166412353516, -25.193437576293945, -23.682708740234375, -22.171977996826172, -20.66124725341797, -19.1505184173584, -17.639789581298828, -16.129058837890625, -14.618329048156738, -13.107599258422852, -11.596869468688965, -10.086139678955078, -8.575409889221191, -7.064680099487305, -5.553950309753418, -4.043224334716797, -2.53249454498291, -1.0217647552490234, 0.4889650344848633, 1.99969482421875, 3.5104246139526367, 5.021154403686523, 6.53188419342041, 8.042613983154297, 9.553343772888184, 11.06407356262207, 12.574803352355957, 14.085533142089844, 15.59626293182373, 17.106992721557617, 18.617721557617188, 20.12845230102539, 21.639183044433594, 23.149911880493164, 24.660640716552734, 26.171371459960938, 27.68210220336914, 29.19283103942871, 30.70355987548828, 32.214290618896484, 33.72502136230469, 35.235748291015625, 36.74647903442383, 38.25720977783203, 39.767940521240234, 41.27867126464844, 42.789398193359375, 44.30012893676758]}, "gradients/decoder.transformer.h.6.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 3.0, 0.0, 3.0, 2.0, 0.0, 1.0, 4.0, 2.0, 6.0, 6.0, 3.0, 15.0, 10.0, 11.0, 13.0, 16.0, 25.0, 21.0, 23.0, 33.0, 34.0, 24.0, 32.0, 33.0, 38.0, 36.0, 44.0, 41.0, 45.0, 41.0, 37.0, 34.0, 47.0, 42.0, 32.0, 30.0, 25.0, 35.0, 27.0, 18.0, 16.0, 20.0, 19.0, 12.0, 7.0, 6.0, 8.0, 12.0, 6.0, 3.0, 5.0, 3.0, 3.0, 2.0, 3.0, 1.0, 2.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.41015625, -6.20208740234375, -5.9940185546875, -5.78594970703125, -5.577880859375, -5.36981201171875, -5.1617431640625, -4.95367431640625, -4.74560546875, -4.53753662109375, -4.3294677734375, -4.12139892578125, -3.913330078125, -3.70526123046875, -3.4971923828125, -3.28912353515625, -3.0810546875, -2.87298583984375, -2.6649169921875, -2.45684814453125, -2.248779296875, -2.04071044921875, -1.8326416015625, -1.62457275390625, -1.41650390625, -1.20843505859375, -1.0003662109375, -0.79229736328125, -0.584228515625, -0.37615966796875, -0.1680908203125, 0.03997802734375, 0.248046875, 0.45611572265625, 0.6641845703125, 0.87225341796875, 1.080322265625, 1.28839111328125, 1.4964599609375, 1.70452880859375, 1.91259765625, 2.12066650390625, 2.3287353515625, 2.53680419921875, 2.744873046875, 2.95294189453125, 3.1610107421875, 3.36907958984375, 3.5771484375, 3.78521728515625, 3.9932861328125, 4.20135498046875, 4.409423828125, 4.61749267578125, 4.8255615234375, 5.03363037109375, 5.24169921875, 5.44976806640625, 5.6578369140625, 5.86590576171875, 6.073974609375, 6.28204345703125, 6.4901123046875, 6.69818115234375, 6.90625]}, "gradients/decoder.transformer.h.6.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 5.0, 4.0, 0.0, 7.0, 4.0, 3.0, 8.0, 7.0, 8.0, 15.0, 18.0, 13.0, 18.0, 28.0, 37.0, 47.0, 67.0, 132.0, 270.0, 707.0, 2263.0, 8754.0, 43308.0, 258156.0, 1363427.0, 1936906.0, 482649.0, 77406.0, 14707.0, 3468.0, 981.0, 365.0, 170.0, 96.0, 55.0, 40.0, 36.0, 19.0, 8.0, 14.0, 17.0, 12.0, 8.0, 5.0, 5.0, 5.0, 7.0, 2.0, 4.0, 3.0, 2.0, 2.0, 1.0, 0.0, 0.0, 2.0], "bins": [-12.484375, -12.1080322265625, -11.731689453125, -11.3553466796875, -10.97900390625, -10.6026611328125, -10.226318359375, -9.8499755859375, -9.4736328125, -9.0972900390625, -8.720947265625, -8.3446044921875, -7.96826171875, -7.5919189453125, -7.215576171875, -6.8392333984375, -6.462890625, -6.0865478515625, -5.710205078125, -5.3338623046875, -4.95751953125, -4.5811767578125, -4.204833984375, -3.8284912109375, -3.4521484375, -3.0758056640625, -2.699462890625, -2.3231201171875, -1.94677734375, -1.5704345703125, -1.194091796875, -0.8177490234375, -0.44140625, -0.0650634765625, 0.311279296875, 0.6876220703125, 1.06396484375, 1.4403076171875, 1.816650390625, 2.1929931640625, 2.5693359375, 2.9456787109375, 3.322021484375, 3.6983642578125, 4.07470703125, 4.4510498046875, 4.827392578125, 5.2037353515625, 5.580078125, 5.9564208984375, 6.332763671875, 6.7091064453125, 7.08544921875, 7.4617919921875, 7.838134765625, 8.2144775390625, 8.5908203125, 8.9671630859375, 9.343505859375, 9.7198486328125, 10.09619140625, 10.4725341796875, 10.848876953125, 11.2252197265625, 11.6015625]}, "gradients/decoder.transformer.h.6.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 3.0, 1.0, 0.0, 1.0, 0.0, 1.0, 2.0, 1.0, 0.0, 3.0, 2.0, 7.0, 7.0, 10.0, 12.0, 14.0, 31.0, 30.0, 33.0, 65.0, 67.0, 80.0, 115.0, 171.0, 222.0, 273.0, 337.0, 391.0, 401.0, 432.0, 359.0, 256.0, 183.0, 135.0, 113.0, 85.0, 62.0, 40.0, 39.0, 26.0, 9.0, 23.0, 13.0, 8.0, 7.0, 2.0, 8.0, 2.0, 4.0, 2.0, 0.0, 3.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-10.21875, -9.8831787109375, -9.547607421875, -9.2120361328125, -8.87646484375, -8.5408935546875, -8.205322265625, -7.8697509765625, -7.5341796875, -7.1986083984375, -6.863037109375, -6.5274658203125, -6.19189453125, -5.8563232421875, -5.520751953125, -5.1851806640625, -4.849609375, -4.5140380859375, -4.178466796875, -3.8428955078125, -3.50732421875, -3.1717529296875, -2.836181640625, -2.5006103515625, -2.1650390625, -1.8294677734375, -1.493896484375, -1.1583251953125, -0.82275390625, -0.4871826171875, -0.151611328125, 0.1839599609375, 0.51953125, 0.8551025390625, 1.190673828125, 1.5262451171875, 1.86181640625, 2.1973876953125, 2.532958984375, 2.8685302734375, 3.2041015625, 3.5396728515625, 3.875244140625, 4.2108154296875, 4.54638671875, 4.8819580078125, 5.217529296875, 5.5531005859375, 5.888671875, 6.2242431640625, 6.559814453125, 6.8953857421875, 7.23095703125, 7.5665283203125, 7.902099609375, 8.2376708984375, 8.5732421875, 8.9088134765625, 9.244384765625, 9.5799560546875, 9.91552734375, 10.2510986328125, 10.586669921875, 10.9222412109375, 11.2578125]}, "gradients/decoder.transformer.h.6.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 2.0, 1.0, 1.0, 0.0, 2.0, 1.0, 2.0, 2.0, 3.0, 0.0, 2.0, 4.0, 11.0, 8.0, 14.0, 26.0, 26.0, 30.0, 49.0, 59.0, 79.0, 100.0, 139.0, 243.0, 347.0, 560.0, 1266.0, 4628.0, 31289.0, 395907.0, 3098244.0, 607835.0, 44020.0, 6021.0, 1492.0, 647.0, 360.0, 250.0, 144.0, 126.0, 96.0, 51.0, 55.0, 46.0, 33.0, 17.0, 15.0, 11.0, 8.0, 5.0, 3.0, 5.0, 6.0, 2.0, 0.0, 0.0, 0.0, 0.0, 4.0, 2.0, 1.0, 1.0, 2.0], "bins": [-24.359375, -23.587890625, -22.81640625, -22.044921875, -21.2734375, -20.501953125, -19.73046875, -18.958984375, -18.1875, -17.416015625, -16.64453125, -15.873046875, -15.1015625, -14.330078125, -13.55859375, -12.787109375, -12.015625, -11.244140625, -10.47265625, -9.701171875, -8.9296875, -8.158203125, -7.38671875, -6.615234375, -5.84375, -5.072265625, -4.30078125, -3.529296875, -2.7578125, -1.986328125, -1.21484375, -0.443359375, 0.328125, 1.099609375, 1.87109375, 2.642578125, 3.4140625, 4.185546875, 4.95703125, 5.728515625, 6.5, 7.271484375, 8.04296875, 8.814453125, 9.5859375, 10.357421875, 11.12890625, 11.900390625, 12.671875, 13.443359375, 14.21484375, 14.986328125, 15.7578125, 16.529296875, 17.30078125, 18.072265625, 18.84375, 19.615234375, 20.38671875, 21.158203125, 21.9296875, 22.701171875, 23.47265625, 24.244140625, 25.015625]}, "gradients/decoder.transformer.h.6.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 5.0, 7.0, 12.0, 24.0, 48.0, 81.0, 123.0, 134.0, 134.0, 142.0, 125.0, 86.0, 50.0, 26.0, 7.0, 5.0, 5.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-76.61207580566406, -74.57588195800781, -72.5396957397461, -70.50350189208984, -68.46731567382812, -66.43112182617188, -64.39492797851562, -62.358741760253906, -60.322547912597656, -58.28635787963867, -56.25016784667969, -54.21397399902344, -52.17778396606445, -50.14159393310547, -48.105403900146484, -46.0692138671875, -44.033023834228516, -41.99683380126953, -39.96064376831055, -37.92445373535156, -35.88825988769531, -33.85206985473633, -31.815879821777344, -29.77968978881836, -27.743497848510742, -25.707307815551758, -23.67111587524414, -21.634925842285156, -19.598735809326172, -17.562543869018555, -15.52635383605957, -13.49016284942627, -11.453975677490234, -9.417784690856934, -7.381594181060791, -5.345403671264648, -3.3092126846313477, -1.2730216979980469, 0.7631683349609375, 2.7993593215942383, 4.835550308227539, 6.87174129486084, 8.90793228149414, 10.944122314453125, 12.980313301086426, 15.016504287719727, 17.05269432067871, 19.088886260986328, 21.125076293945312, 23.161266326904297, 25.197458267211914, 27.2336483001709, 29.269840240478516, 31.3060302734375, 33.342220306396484, 35.37841033935547, 37.41460418701172, 39.4507942199707, 41.48698425292969, 43.52317810058594, 45.55936813354492, 47.595558166503906, 49.63174819946289, 51.667938232421875, 53.70412826538086]}, "gradients/decoder.transformer.h.6.ln_2.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 2.0, 2.0, 10.0, 4.0, 3.0, 3.0, 12.0, 5.0, 9.0, 18.0, 20.0, 24.0, 27.0, 32.0, 23.0, 29.0, 42.0, 32.0, 38.0, 48.0, 40.0, 52.0, 36.0, 61.0, 43.0, 42.0, 52.0, 42.0, 27.0, 32.0, 32.0, 26.0, 22.0, 17.0, 18.0, 14.0, 17.0, 11.0, 13.0, 12.0, 6.0, 2.0, 4.0, 2.0, 3.0, 1.0, 2.0, 3.0, 0.0, 2.0, 1.0], "bins": [-47.48520278930664, -46.13641357421875, -44.787620544433594, -43.43882751464844, -42.09003829956055, -40.741249084472656, -39.3924560546875, -38.043663024902344, -36.69487380981445, -35.34608459472656, -33.997291564941406, -32.64849853515625, -31.29970932006836, -29.950918197631836, -28.602127075195312, -27.25333595275879, -25.904544830322266, -24.555753707885742, -23.20696258544922, -21.858171463012695, -20.509380340576172, -19.16058921813965, -17.811798095703125, -16.4630069732666, -15.114215850830078, -13.765424728393555, -12.416633605957031, -11.067842483520508, -9.719051361083984, -8.370260238647461, -7.0214691162109375, -5.672677993774414, -4.323883056640625, -2.9750919342041016, -1.6263008117675781, -0.2775096893310547, 1.0712814331054688, 2.420072555541992, 3.7688636779785156, 5.117654800415039, 6.4664459228515625, 7.815237045288086, 9.16402816772461, 10.512819290161133, 11.861610412597656, 13.21040153503418, 14.559192657470703, 15.907983779907227, 17.25677490234375, 18.605566024780273, 19.954357147216797, 21.30314826965332, 22.651939392089844, 24.000730514526367, 25.34952163696289, 26.698312759399414, 28.047103881835938, 29.39589500427246, 30.744686126708984, 32.093475341796875, 33.44226837158203, 34.79106140136719, 36.13985061645508, 37.48863983154297, 38.837432861328125]}, "gradients/decoder.transformer.h.6.crossattention.c_proj.bias": {"_type": "histogram", "values": [2.0, 2.0, 2.0, 3.0, 4.0, 3.0, 5.0, 1.0, 6.0, 6.0, 11.0, 8.0, 11.0, 13.0, 12.0, 17.0, 16.0, 33.0, 23.0, 28.0, 30.0, 29.0, 29.0, 32.0, 41.0, 34.0, 40.0, 49.0, 41.0, 38.0, 38.0, 38.0, 35.0, 26.0, 32.0, 40.0, 34.0, 20.0, 25.0, 26.0, 18.0, 20.0, 17.0, 9.0, 13.0, 13.0, 7.0, 8.0, 7.0, 6.0, 4.0, 0.0, 5.0, 2.0, 3.0, 3.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-5.88671875, -5.6824951171875, -5.478271484375, -5.2740478515625, -5.06982421875, -4.8656005859375, -4.661376953125, -4.4571533203125, -4.2529296875, -4.0487060546875, -3.844482421875, -3.6402587890625, -3.43603515625, -3.2318115234375, -3.027587890625, -2.8233642578125, -2.619140625, -2.4149169921875, -2.210693359375, -2.0064697265625, -1.80224609375, -1.5980224609375, -1.393798828125, -1.1895751953125, -0.9853515625, -0.7811279296875, -0.576904296875, -0.3726806640625, -0.16845703125, 0.0357666015625, 0.239990234375, 0.4442138671875, 0.6484375, 0.8526611328125, 1.056884765625, 1.2611083984375, 1.46533203125, 1.6695556640625, 1.873779296875, 2.0780029296875, 2.2822265625, 2.4864501953125, 2.690673828125, 2.8948974609375, 3.09912109375, 3.3033447265625, 3.507568359375, 3.7117919921875, 3.916015625, 4.1202392578125, 4.324462890625, 4.5286865234375, 4.73291015625, 4.9371337890625, 5.141357421875, 5.3455810546875, 5.5498046875, 5.7540283203125, 5.958251953125, 6.1624755859375, 6.36669921875, 6.5709228515625, 6.775146484375, 6.9793701171875, 7.18359375]}, "gradients/decoder.transformer.h.6.crossattention.c_proj.weight": {"_type": "histogram", "values": [2.0, 3.0, 1.0, 2.0, 1.0, 5.0, 6.0, 10.0, 18.0, 20.0, 41.0, 59.0, 97.0, 157.0, 239.0, 336.0, 587.0, 913.0, 1484.0, 2244.0, 3569.0, 5782.0, 9419.0, 14786.0, 23659.0, 37595.0, 58168.0, 87554.0, 124296.0, 155491.0, 155375.0, 122578.0, 86556.0, 57791.0, 36968.0, 23464.0, 14617.0, 9259.0, 5712.0, 3628.0, 2305.0, 1331.0, 945.0, 542.0, 331.0, 241.0, 120.0, 91.0, 66.0, 34.0, 31.0, 16.0, 12.0, 3.0, 5.0, 7.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.73779296875, -0.7131881713867188, -0.6885833740234375, -0.6639785766601562, -0.639373779296875, -0.6147689819335938, -0.5901641845703125, -0.5655593872070312, -0.54095458984375, -0.5163497924804688, -0.4917449951171875, -0.46714019775390625, -0.442535400390625, -0.41793060302734375, -0.3933258056640625, -0.36872100830078125, -0.3441162109375, -0.31951141357421875, -0.2949066162109375, -0.27030181884765625, -0.245697021484375, -0.22109222412109375, -0.1964874267578125, -0.17188262939453125, -0.14727783203125, -0.12267303466796875, -0.0980682373046875, -0.07346343994140625, -0.048858642578125, -0.02425384521484375, 0.0003509521484375, 0.02495574951171875, 0.049560546875, 0.07416534423828125, 0.0987701416015625, 0.12337493896484375, 0.147979736328125, 0.17258453369140625, 0.1971893310546875, 0.22179412841796875, 0.24639892578125, 0.27100372314453125, 0.2956085205078125, 0.32021331787109375, 0.344818115234375, 0.36942291259765625, 0.3940277099609375, 0.41863250732421875, 0.4432373046875, 0.46784210205078125, 0.4924468994140625, 0.5170516967773438, 0.541656494140625, 0.5662612915039062, 0.5908660888671875, 0.6154708862304688, 0.64007568359375, 0.6646804809570312, 0.6892852783203125, 0.7138900756835938, 0.738494873046875, 0.7630996704101562, 0.7877044677734375, 0.8123092651367188, 0.8369140625]}, "gradients/decoder.transformer.h.6.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 3.0, 0.0, 1.0, 3.0, 3.0, 1.0, 2.0, 2.0, 1.0, 5.0, 4.0, 7.0, 12.0, 12.0, 12.0, 11.0, 15.0, 17.0, 13.0, 16.0, 24.0, 28.0, 33.0, 22.0, 43.0, 36.0, 52.0, 39.0, 44.0, 44.0, 1063.0, 35.0, 38.0, 31.0, 49.0, 27.0, 31.0, 31.0, 35.0, 25.0, 32.0, 18.0, 21.0, 22.0, 12.0, 18.0, 6.0, 10.0, 8.0, 7.0, 1.0, 3.0, 2.0, 6.0, 3.0, 2.0, 2.0, 2.0, 0.0, 0.0, 1.0, 1.0], "bins": [-4.70703125, -4.56219482421875, -4.4173583984375, -4.27252197265625, -4.127685546875, -3.98284912109375, -3.8380126953125, -3.69317626953125, -3.54833984375, -3.40350341796875, -3.2586669921875, -3.11383056640625, -2.968994140625, -2.82415771484375, -2.6793212890625, -2.53448486328125, -2.3896484375, -2.24481201171875, -2.0999755859375, -1.95513916015625, -1.810302734375, -1.66546630859375, -1.5206298828125, -1.37579345703125, -1.23095703125, -1.08612060546875, -0.9412841796875, -0.79644775390625, -0.651611328125, -0.50677490234375, -0.3619384765625, -0.21710205078125, -0.072265625, 0.07257080078125, 0.2174072265625, 0.36224365234375, 0.507080078125, 0.65191650390625, 0.7967529296875, 0.94158935546875, 1.08642578125, 1.23126220703125, 1.3760986328125, 1.52093505859375, 1.665771484375, 1.81060791015625, 1.9554443359375, 2.10028076171875, 2.2451171875, 2.38995361328125, 2.5347900390625, 2.67962646484375, 2.824462890625, 2.96929931640625, 3.1141357421875, 3.25897216796875, 3.40380859375, 3.54864501953125, 3.6934814453125, 3.83831787109375, 3.983154296875, 4.12799072265625, 4.2728271484375, 4.41766357421875, 4.5625]}, "gradients/decoder.transformer.h.6.crossattention.c_attn.weight": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 2.0, 2.0, 0.0, 2.0, 4.0, 10.0, 11.0, 13.0, 34.0, 41.0, 52.0, 96.0, 139.0, 212.0, 292.0, 463.0, 711.0, 991.0, 1731.0, 2577.0, 3938.0, 6224.0, 9622.0, 15243.0, 23582.0, 35791.0, 54172.0, 79040.0, 109199.0, 139143.0, 1191127.0, 126779.0, 96189.0, 68230.0, 46423.0, 30116.0, 19737.0, 12619.0, 8038.0, 5200.0, 3232.0, 2108.0, 1349.0, 850.0, 622.0, 386.0, 260.0, 167.0, 124.0, 81.0, 57.0, 37.0, 23.0, 29.0, 7.0, 8.0, 5.0, 0.0, 3.0, 4.0, 2.0], "bins": [-0.533203125, -0.5171890258789062, -0.5011749267578125, -0.48516082763671875, -0.469146728515625, -0.45313262939453125, -0.4371185302734375, -0.42110443115234375, -0.40509033203125, -0.38907623291015625, -0.3730621337890625, -0.35704803466796875, -0.341033935546875, -0.32501983642578125, -0.3090057373046875, -0.29299163818359375, -0.2769775390625, -0.26096343994140625, -0.2449493408203125, -0.22893524169921875, -0.212921142578125, -0.19690704345703125, -0.1808929443359375, -0.16487884521484375, -0.14886474609375, -0.13285064697265625, -0.1168365478515625, -0.10082244873046875, -0.084808349609375, -0.06879425048828125, -0.0527801513671875, -0.03676605224609375, -0.020751953125, -0.00473785400390625, 0.0112762451171875, 0.02729034423828125, 0.043304443359375, 0.05931854248046875, 0.0753326416015625, 0.09134674072265625, 0.10736083984375, 0.12337493896484375, 0.1393890380859375, 0.15540313720703125, 0.171417236328125, 0.18743133544921875, 0.2034454345703125, 0.21945953369140625, 0.2354736328125, 0.25148773193359375, 0.2675018310546875, 0.28351593017578125, 0.299530029296875, 0.31554412841796875, 0.3315582275390625, 0.34757232666015625, 0.36358642578125, 0.37960052490234375, 0.3956146240234375, 0.41162872314453125, 0.427642822265625, 0.44365692138671875, 0.4596710205078125, 0.47568511962890625, 0.49169921875]}, "gradients/decoder.transformer.h.6.crossattention.q_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0, 2.0, 3.0, 2.0, 2.0, 1.0, 4.0, 1.0, 5.0, 3.0, 8.0, 8.0, 10.0, 10.0, 10.0, 14.0, 10.0, 15.0, 28.0, 24.0, 24.0, 34.0, 51.0, 47.0, 68.0, 86.0, 102.0, 85.0, 79.0, 51.0, 26.0, 35.0, 36.0, 20.0, 16.0, 17.0, 9.0, 12.0, 11.0, 9.0, 7.0, 7.0, 6.0, 3.0, 3.0, 2.0, 2.0, 2.0, 3.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.0109100341796875, -0.010610103607177734, -0.010310173034667969, -0.010010242462158203, -0.009710311889648438, -0.009410381317138672, -0.009110450744628906, -0.00881052017211914, -0.008510589599609375, -0.00821065902709961, -0.007910728454589844, -0.007610797882080078, -0.0073108673095703125, -0.007010936737060547, -0.006711006164550781, -0.006411075592041016, -0.00611114501953125, -0.005811214447021484, -0.005511283874511719, -0.005211353302001953, -0.0049114227294921875, -0.004611492156982422, -0.004311561584472656, -0.004011631011962891, -0.003711700439453125, -0.0034117698669433594, -0.0031118392944335938, -0.002811908721923828, -0.0025119781494140625, -0.002212047576904297, -0.0019121170043945312, -0.0016121864318847656, -0.001312255859375, -0.0010123252868652344, -0.0007123947143554688, -0.0004124641418457031, -0.0001125335693359375, 0.00018739700317382812, 0.00048732757568359375, 0.0007872581481933594, 0.001087188720703125, 0.0013871192932128906, 0.0016870498657226562, 0.001986980438232422, 0.0022869110107421875, 0.002586841583251953, 0.0028867721557617188, 0.0031867027282714844, 0.00348663330078125, 0.0037865638732910156, 0.004086494445800781, 0.004386425018310547, 0.0046863555908203125, 0.004986286163330078, 0.005286216735839844, 0.005586147308349609, 0.005886077880859375, 0.006186008453369141, 0.006485939025878906, 0.006785869598388672, 0.0070858001708984375, 0.007385730743408203, 0.007685661315917969, 0.007985591888427734, 0.0082855224609375]}, "gradients/decoder.transformer.h.6.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 3.0, 2.0, 0.0, 0.0, 5.0, 5.0, 4.0, 6.0, 9.0, 12.0, 13.0, 17.0, 20.0, 19.0, 33.0, 48.0, 65.0, 94.0, 139.0, 200.0, 456.0, 1834.0, 725911.0, 317146.0, 1423.0, 384.0, 219.0, 133.0, 81.0, 65.0, 49.0, 38.0, 28.0, 15.0, 11.0, 15.0, 15.0, 7.0, 6.0, 10.0, 3.0, 6.0, 4.0, 1.0, 5.0, 1.0, 2.0, 3.0, 1.0, 1.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0], "bins": [-0.1739501953125, -0.16772842407226562, -0.16150665283203125, -0.15528488159179688, -0.1490631103515625, -0.14284133911132812, -0.13661956787109375, -0.13039779663085938, -0.124176025390625, -0.11795425415039062, -0.11173248291015625, -0.10551071166992188, -0.0992889404296875, -0.09306716918945312, -0.08684539794921875, -0.08062362670898438, -0.07440185546875, -0.06818008422851562, -0.06195831298828125, -0.055736541748046875, -0.0495147705078125, -0.043292999267578125, -0.03707122802734375, -0.030849456787109375, -0.024627685546875, -0.018405914306640625, -0.01218414306640625, -0.005962371826171875, 0.0002593994140625, 0.006481170654296875, 0.01270294189453125, 0.018924713134765625, 0.025146484375, 0.031368255615234375, 0.03759002685546875, 0.043811798095703125, 0.0500335693359375, 0.056255340576171875, 0.06247711181640625, 0.06869888305664062, 0.074920654296875, 0.08114242553710938, 0.08736419677734375, 0.09358596801757812, 0.0998077392578125, 0.10602951049804688, 0.11225128173828125, 0.11847305297851562, 0.12469482421875, 0.13091659545898438, 0.13713836669921875, 0.14336013793945312, 0.1495819091796875, 0.15580368041992188, 0.16202545166015625, 0.16824722290039062, 0.174468994140625, 0.18069076538085938, 0.18691253662109375, 0.19313430786132812, 0.1993560791015625, 0.20557785034179688, 0.21179962158203125, 0.21802139282226562, 0.2242431640625]}, "gradients/decoder.transformer.h.6.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 19.0, 926.0, 69.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.07837981730699539, -0.07418414950370789, -0.06998848170042038, -0.06579281389713287, -0.06159714609384537, -0.05740147829055786, -0.05320580676198006, -0.04901013895869255, -0.044814471155405045, -0.04061880335211754, -0.03642313554883003, -0.03222746402025223, -0.02803179807960987, -0.023836130276322365, -0.01964046061038971, -0.015444792807102203, -0.011249125003814697, -0.007053456734865904, -0.0028577884659171104, 0.0013378802686929703, 0.005533548071980476, 0.009729215875267982, 0.013924885541200638, 0.018120553344488144, 0.02231622114777565, 0.026511888951063156, 0.030707556754350662, 0.03490322828292847, 0.03909889608621597, 0.04329456388950348, 0.047490231692790985, 0.05168589949607849, 0.0558815598487854, 0.060077227652072906, 0.06427289545536041, 0.06846856325864792, 0.07266423106193542, 0.07685989886522293, 0.08105556666851044, 0.08525124192237854, 0.08944690227508545, 0.09364257007837296, 0.09783823788166046, 0.10203390568494797, 0.10622957348823547, 0.11042524129152298, 0.11462090909481049, 0.11881658434867859, 0.1230122521519661, 0.1272079199552536, 0.1314035952091217, 0.1355992555618286, 0.13979493081569672, 0.14399059116840363, 0.14818626642227173, 0.15238192677497864, 0.15657760202884674, 0.16077327728271484, 0.16496893763542175, 0.16916461288928986, 0.17336027324199677, 0.17755594849586487, 0.18175160884857178, 0.18594728410243988, 0.1901429444551468]}, "gradients/decoder.transformer.h.6.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 2.0, 1.0, 4.0, 4.0, 2.0, 2.0, 3.0, 8.0, 14.0, 18.0, 12.0, 25.0, 23.0, 32.0, 31.0, 39.0, 64.0, 50.0, 66.0, 45.0, 63.0, 54.0, 50.0, 52.0, 45.0, 41.0, 41.0, 36.0, 31.0, 23.0, 31.0, 27.0, 11.0, 18.0, 16.0, 10.0, 9.0, 7.0, 2.0, 3.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0095747709274292, -0.009224273264408112, -0.008873775601387024, -0.008523277938365936, -0.008172780275344849, -0.007822282612323761, -0.007471784949302673, -0.007121287286281586, -0.006770789623260498, -0.00642029196023941, -0.006069794297218323, -0.005719296634197235, -0.0053687989711761475, -0.00501830130815506, -0.004667803645133972, -0.0043173059821128845, -0.003966808319091797, -0.0036163106560707092, -0.0032658129930496216, -0.002915315330028534, -0.0025648176670074463, -0.0022143200039863586, -0.001863822340965271, -0.0015133246779441833, -0.0011628270149230957, -0.0008123293519020081, -0.0004618316888809204, -0.00011133402585983276, 0.00023916363716125488, 0.0005896613001823425, 0.0009401589632034302, 0.0012906566262245178, 0.0016411542892456055, 0.001991651952266693, 0.0023421496152877808, 0.0026926472783088684, 0.003043144941329956, 0.0033936426043510437, 0.0037441402673721313, 0.004094637930393219, 0.004445135593414307, 0.004795633256435394, 0.005146130919456482, 0.00549662858247757, 0.005847126245498657, 0.006197623908519745, 0.0065481215715408325, 0.00689861923456192, 0.007249116897583008, 0.0075996145606040955, 0.007950112223625183, 0.00830060988664627, 0.008651107549667358, 0.009001605212688446, 0.009352102875709534, 0.009702600538730621, 0.010053098201751709, 0.010403595864772797, 0.010754093527793884, 0.011104591190814972, 0.01145508885383606, 0.011805586516857147, 0.012156084179878235, 0.012506581842899323, 0.01285707950592041]}, "gradients/decoder.transformer.h.6.attn.c_proj.bias": {"_type": "histogram", "values": [2.0, 2.0, 2.0, 3.0, 4.0, 3.0, 5.0, 1.0, 6.0, 6.0, 11.0, 8.0, 11.0, 13.0, 12.0, 17.0, 16.0, 33.0, 23.0, 28.0, 30.0, 29.0, 29.0, 32.0, 41.0, 34.0, 39.0, 50.0, 41.0, 38.0, 38.0, 38.0, 35.0, 26.0, 32.0, 40.0, 34.0, 20.0, 25.0, 26.0, 18.0, 20.0, 17.0, 9.0, 13.0, 13.0, 7.0, 8.0, 7.0, 6.0, 4.0, 0.0, 5.0, 2.0, 3.0, 3.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-5.88671875, -5.6824951171875, -5.478271484375, -5.2740478515625, -5.06982421875, -4.8656005859375, -4.661376953125, -4.4571533203125, -4.2529296875, -4.0487060546875, -3.844482421875, -3.6402587890625, -3.43603515625, -3.2318115234375, -3.027587890625, -2.8233642578125, -2.619140625, -2.4149169921875, -2.210693359375, -2.0064697265625, -1.80224609375, -1.5980224609375, -1.393798828125, -1.1895751953125, -0.9853515625, -0.7811279296875, -0.576904296875, -0.3726806640625, -0.16845703125, 0.0357666015625, 0.239990234375, 0.4442138671875, 0.6484375, 0.8526611328125, 1.056884765625, 1.2611083984375, 1.46533203125, 1.6695556640625, 1.873779296875, 2.0780029296875, 2.2822265625, 2.4864501953125, 2.690673828125, 2.8948974609375, 3.09912109375, 3.3033447265625, 3.507568359375, 3.7117919921875, 3.916015625, 4.1202392578125, 4.324462890625, 4.5286865234375, 4.73291015625, 4.9371337890625, 5.141357421875, 5.3455810546875, 5.5498046875, 5.7540283203125, 5.958251953125, 6.1624755859375, 6.36669921875, 6.5709228515625, 6.775146484375, 6.9793701171875, 7.18359375]}, "gradients/decoder.transformer.h.6.attn.c_proj.weight": {"_type": "histogram", "values": [5.0, 1.0, 2.0, 4.0, 9.0, 7.0, 6.0, 9.0, 10.0, 21.0, 27.0, 22.0, 32.0, 54.0, 51.0, 62.0, 97.0, 134.0, 149.0, 200.0, 257.0, 346.0, 599.0, 1127.0, 2551.0, 7353.0, 22009.0, 85231.0, 686308.0, 188341.0, 34709.0, 11049.0, 3823.0, 1514.0, 787.0, 424.0, 296.0, 216.0, 136.0, 139.0, 94.0, 76.0, 67.0, 40.0, 29.0, 43.0, 23.0, 12.0, 16.0, 13.0, 13.0, 5.0, 7.0, 6.0, 2.0, 6.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-14.21875, -13.72314453125, -13.2275390625, -12.73193359375, -12.236328125, -11.74072265625, -11.2451171875, -10.74951171875, -10.25390625, -9.75830078125, -9.2626953125, -8.76708984375, -8.271484375, -7.77587890625, -7.2802734375, -6.78466796875, -6.2890625, -5.79345703125, -5.2978515625, -4.80224609375, -4.306640625, -3.81103515625, -3.3154296875, -2.81982421875, -2.32421875, -1.82861328125, -1.3330078125, -0.83740234375, -0.341796875, 0.15380859375, 0.6494140625, 1.14501953125, 1.640625, 2.13623046875, 2.6318359375, 3.12744140625, 3.623046875, 4.11865234375, 4.6142578125, 5.10986328125, 5.60546875, 6.10107421875, 6.5966796875, 7.09228515625, 7.587890625, 8.08349609375, 8.5791015625, 9.07470703125, 9.5703125, 10.06591796875, 10.5615234375, 11.05712890625, 11.552734375, 12.04833984375, 12.5439453125, 13.03955078125, 13.53515625, 14.03076171875, 14.5263671875, 15.02197265625, 15.517578125, 16.01318359375, 16.5087890625, 17.00439453125, 17.5]}, "gradients/decoder.transformer.h.6.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 5.0, 2.0, 3.0, 2.0, 7.0, 8.0, 4.0, 5.0, 8.0, 10.0, 12.0, 19.0, 19.0, 25.0, 32.0, 20.0, 25.0, 36.0, 40.0, 31.0, 40.0, 59.0, 79.0, 195.0, 1462.0, 313.0, 118.0, 79.0, 53.0, 51.0, 51.0, 35.0, 45.0, 26.0, 19.0, 26.0, 21.0, 18.0, 9.0, 9.0, 12.0, 3.0, 10.0, 4.0, 2.0, 7.0, 3.0, 2.0, 1.0, 3.0], "bins": [-24.9375, -24.295166015625, -23.65283203125, -23.010498046875, -22.3681640625, -21.725830078125, -21.08349609375, -20.441162109375, -19.798828125, -19.156494140625, -18.51416015625, -17.871826171875, -17.2294921875, -16.587158203125, -15.94482421875, -15.302490234375, -14.66015625, -14.017822265625, -13.37548828125, -12.733154296875, -12.0908203125, -11.448486328125, -10.80615234375, -10.163818359375, -9.521484375, -8.879150390625, -8.23681640625, -7.594482421875, -6.9521484375, -6.309814453125, -5.66748046875, -5.025146484375, -4.3828125, -3.740478515625, -3.09814453125, -2.455810546875, -1.8134765625, -1.171142578125, -0.52880859375, 0.113525390625, 0.755859375, 1.398193359375, 2.04052734375, 2.682861328125, 3.3251953125, 3.967529296875, 4.60986328125, 5.252197265625, 5.89453125, 6.536865234375, 7.17919921875, 7.821533203125, 8.4638671875, 9.106201171875, 9.74853515625, 10.390869140625, 11.033203125, 11.675537109375, 12.31787109375, 12.960205078125, 13.6025390625, 14.244873046875, 14.88720703125, 15.529541015625, 16.171875]}, "gradients/decoder.transformer.h.6.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 3.0, 0.0, 5.0, 4.0, 7.0, 4.0, 9.0, 11.0, 8.0, 12.0, 17.0, 12.0, 24.0, 31.0, 26.0, 41.0, 65.0, 70.0, 110.0, 161.0, 299.0, 692.0, 2622.0, 41013.0, 3006496.0, 88260.0, 3859.0, 851.0, 350.0, 171.0, 101.0, 75.0, 55.0, 46.0, 49.0, 29.0, 23.0, 24.0, 15.0, 14.0, 17.0, 10.0, 7.0, 8.0, 4.0, 3.0, 2.0, 3.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-45.59375, -43.87939453125, -42.1650390625, -40.45068359375, -38.736328125, -37.02197265625, -35.3076171875, -33.59326171875, -31.87890625, -30.16455078125, -28.4501953125, -26.73583984375, -25.021484375, -23.30712890625, -21.5927734375, -19.87841796875, -18.1640625, -16.44970703125, -14.7353515625, -13.02099609375, -11.306640625, -9.59228515625, -7.8779296875, -6.16357421875, -4.44921875, -2.73486328125, -1.0205078125, 0.69384765625, 2.408203125, 4.12255859375, 5.8369140625, 7.55126953125, 9.265625, 10.97998046875, 12.6943359375, 14.40869140625, 16.123046875, 17.83740234375, 19.5517578125, 21.26611328125, 22.98046875, 24.69482421875, 26.4091796875, 28.12353515625, 29.837890625, 31.55224609375, 33.2666015625, 34.98095703125, 36.6953125, 38.40966796875, 40.1240234375, 41.83837890625, 43.552734375, 45.26708984375, 46.9814453125, 48.69580078125, 50.41015625, 52.12451171875, 53.8388671875, 55.55322265625, 57.267578125, 58.98193359375, 60.6962890625, 62.41064453125, 64.125]}, "gradients/decoder.transformer.h.6.ln_1.weight": {"_type": "histogram", "values": [7.0, 81.0, 356.0, 417.0, 137.0, 18.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-13.232892990112305, -9.023740768432617, -4.814587593078613, -0.6054344177246094, 3.603717803955078, 7.812870025634766, 12.022024154663086, 16.231176376342773, 20.44032859802246, 24.64948081970215, 28.85863494873047, 33.067787170410156, 37.276939392089844, 41.48609161376953, 45.69524383544922, 49.904396057128906, 54.113548278808594, 58.32270050048828, 62.53185272216797, 66.74100494384766, 70.95015716552734, 75.15930938720703, 79.36846923828125, 83.57762145996094, 87.78677368164062, 91.99592590332031, 96.205078125, 100.41423034667969, 104.62338256835938, 108.83253479003906, 113.04168701171875, 117.25083923339844, 121.46000671386719, 125.66915893554688, 129.87831115722656, 134.08746337890625, 138.29661560058594, 142.50576782226562, 146.7149200439453, 150.924072265625, 155.1332244873047, 159.34237670898438, 163.55152893066406, 167.76068115234375, 171.96983337402344, 176.17898559570312, 180.3881378173828, 184.5972900390625, 188.80645751953125, 193.01560974121094, 197.22476196289062, 201.4339141845703, 205.64306640625, 209.8522186279297, 214.06137084960938, 218.27052307128906, 222.47967529296875, 226.68882751464844, 230.89797973632812, 235.1071319580078, 239.3162841796875, 243.5254364013672, 247.73458862304688, 251.94374084472656, 256.15289306640625]}, "gradients/decoder.transformer.h.6.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 5.0, 2.0, 2.0, 4.0, 1.0, 9.0, 4.0, 8.0, 7.0, 7.0, 10.0, 17.0, 16.0, 23.0, 16.0, 24.0, 24.0, 30.0, 27.0, 28.0, 34.0, 35.0, 38.0, 32.0, 40.0, 36.0, 38.0, 41.0, 36.0, 41.0, 30.0, 30.0, 36.0, 26.0, 34.0, 20.0, 29.0, 16.0, 26.0, 18.0, 20.0, 27.0, 15.0, 13.0, 10.0, 10.0, 8.0, 3.0, 3.0, 5.0, 0.0, 2.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-47.96725082397461, -46.43487548828125, -44.902503967285156, -43.3701286315918, -41.83775329589844, -40.30537796020508, -38.77300262451172, -37.240631103515625, -35.708255767822266, -34.175880432128906, -32.64350891113281, -31.111133575439453, -29.578758239746094, -28.046382904052734, -26.514009475708008, -24.98163604736328, -23.449260711669922, -21.916885375976562, -20.384511947631836, -18.85213851928711, -17.31976318359375, -15.787388801574707, -14.255014419555664, -12.722640037536621, -11.190265655517578, -9.657891273498535, -8.125516891479492, -6.593142509460449, -5.060768127441406, -3.5283937454223633, -1.9960193634033203, -0.46364498138427734, 1.0687255859375, 2.601099967956543, 4.133474349975586, 5.665848731994629, 7.198223114013672, 8.730597496032715, 10.262971878051758, 11.7953462600708, 13.327720642089844, 14.860095024108887, 16.39246940612793, 17.924842834472656, 19.457218170166016, 20.989593505859375, 22.5219669342041, 24.054340362548828, 25.586715698242188, 27.119091033935547, 28.651464462280273, 30.183837890625, 31.71621322631836, 33.24858856201172, 34.78096008300781, 36.31333541870117, 37.84571075439453, 39.37808609008789, 40.91046142578125, 42.442832946777344, 43.9752082824707, 45.50758361816406, 47.039955139160156, 48.572330474853516, 50.104705810546875]}, "gradients/decoder.transformer.h.5.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 0.0, 2.0, 4.0, 3.0, 3.0, 6.0, 9.0, 4.0, 7.0, 9.0, 11.0, 11.0, 15.0, 12.0, 18.0, 18.0, 27.0, 30.0, 18.0, 44.0, 39.0, 37.0, 39.0, 41.0, 34.0, 38.0, 49.0, 42.0, 43.0, 34.0, 42.0, 31.0, 33.0, 35.0, 23.0, 27.0, 28.0, 31.0, 17.0, 10.0, 24.0, 8.0, 9.0, 6.0, 8.0, 9.0, 9.0, 8.0, 4.0, 1.0, 1.0, 1.0, 2.0, 3.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-6.6640625, -6.43975830078125, -6.2154541015625, -5.99114990234375, -5.766845703125, -5.54254150390625, -5.3182373046875, -5.09393310546875, -4.86962890625, -4.64532470703125, -4.4210205078125, -4.19671630859375, -3.972412109375, -3.74810791015625, -3.5238037109375, -3.29949951171875, -3.0751953125, -2.85089111328125, -2.6265869140625, -2.40228271484375, -2.177978515625, -1.95367431640625, -1.7293701171875, -1.50506591796875, -1.28076171875, -1.05645751953125, -0.8321533203125, -0.60784912109375, -0.383544921875, -0.15924072265625, 0.0650634765625, 0.28936767578125, 0.513671875, 0.73797607421875, 0.9622802734375, 1.18658447265625, 1.410888671875, 1.63519287109375, 1.8594970703125, 2.08380126953125, 2.30810546875, 2.53240966796875, 2.7567138671875, 2.98101806640625, 3.205322265625, 3.42962646484375, 3.6539306640625, 3.87823486328125, 4.1025390625, 4.32684326171875, 4.5511474609375, 4.77545166015625, 4.999755859375, 5.22406005859375, 5.4483642578125, 5.67266845703125, 5.89697265625, 6.12127685546875, 6.3455810546875, 6.56988525390625, 6.794189453125, 7.01849365234375, 7.2427978515625, 7.46710205078125, 7.69140625]}, "gradients/decoder.transformer.h.5.mlp.c_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 2.0, 2.0, 0.0, 3.0, 2.0, 2.0, 4.0, 7.0, 11.0, 7.0, 19.0, 14.0, 17.0, 20.0, 22.0, 28.0, 53.0, 73.0, 109.0, 168.0, 304.0, 579.0, 1382.0, 3603.0, 10775.0, 38377.0, 163390.0, 725926.0, 1874473.0, 1052404.0, 244571.0, 55213.0, 14540.0, 4744.0, 1716.0, 710.0, 396.0, 166.0, 149.0, 69.0, 69.0, 32.0, 27.0, 27.0, 23.0, 14.0, 14.0, 6.0, 8.0, 7.0, 3.0, 6.0, 3.0, 3.0, 3.0, 1.0, 1.0, 1.0, 0.0, 1.0, 2.0], "bins": [-10.53125, -10.1983642578125, -9.865478515625, -9.5325927734375, -9.19970703125, -8.8668212890625, -8.533935546875, -8.2010498046875, -7.8681640625, -7.5352783203125, -7.202392578125, -6.8695068359375, -6.53662109375, -6.2037353515625, -5.870849609375, -5.5379638671875, -5.205078125, -4.8721923828125, -4.539306640625, -4.2064208984375, -3.87353515625, -3.5406494140625, -3.207763671875, -2.8748779296875, -2.5419921875, -2.2091064453125, -1.876220703125, -1.5433349609375, -1.21044921875, -0.8775634765625, -0.544677734375, -0.2117919921875, 0.12109375, 0.4539794921875, 0.786865234375, 1.1197509765625, 1.45263671875, 1.7855224609375, 2.118408203125, 2.4512939453125, 2.7841796875, 3.1170654296875, 3.449951171875, 3.7828369140625, 4.11572265625, 4.4486083984375, 4.781494140625, 5.1143798828125, 5.447265625, 5.7801513671875, 6.113037109375, 6.4459228515625, 6.77880859375, 7.1116943359375, 7.444580078125, 7.7774658203125, 8.1103515625, 8.4432373046875, 8.776123046875, 9.1090087890625, 9.44189453125, 9.7747802734375, 10.107666015625, 10.4405517578125, 10.7734375]}, "gradients/decoder.transformer.h.5.mlp.c_fc.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0, 1.0, 0.0, 5.0, 2.0, 2.0, 4.0, 8.0, 4.0, 7.0, 14.0, 15.0, 23.0, 29.0, 32.0, 58.0, 72.0, 104.0, 144.0, 189.0, 281.0, 384.0, 490.0, 480.0, 480.0, 334.0, 259.0, 199.0, 130.0, 87.0, 58.0, 50.0, 37.0, 25.0, 22.0, 13.0, 14.0, 12.0, 5.0, 5.0, 1.0, 4.0, 2.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-12.859375, -12.4605712890625, -12.061767578125, -11.6629638671875, -11.26416015625, -10.8653564453125, -10.466552734375, -10.0677490234375, -9.6689453125, -9.2701416015625, -8.871337890625, -8.4725341796875, -8.07373046875, -7.6749267578125, -7.276123046875, -6.8773193359375, -6.478515625, -6.0797119140625, -5.680908203125, -5.2821044921875, -4.88330078125, -4.4844970703125, -4.085693359375, -3.6868896484375, -3.2880859375, -2.8892822265625, -2.490478515625, -2.0916748046875, -1.69287109375, -1.2940673828125, -0.895263671875, -0.4964599609375, -0.09765625, 0.3011474609375, 0.699951171875, 1.0987548828125, 1.49755859375, 1.8963623046875, 2.295166015625, 2.6939697265625, 3.0927734375, 3.4915771484375, 3.890380859375, 4.2891845703125, 4.68798828125, 5.0867919921875, 5.485595703125, 5.8843994140625, 6.283203125, 6.6820068359375, 7.080810546875, 7.4796142578125, 7.87841796875, 8.2772216796875, 8.676025390625, 9.0748291015625, 9.4736328125, 9.8724365234375, 10.271240234375, 10.6700439453125, 11.06884765625, 11.4676513671875, 11.866455078125, 12.2652587890625, 12.6640625]}, "gradients/decoder.transformer.h.5.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 3.0, 3.0, 3.0, 4.0, 3.0, 10.0, 19.0, 13.0, 22.0, 18.0, 39.0, 58.0, 80.0, 113.0, 171.0, 307.0, 594.0, 1398.0, 4944.0, 42323.0, 969154.0, 3019405.0, 140610.0, 10939.0, 2137.0, 816.0, 372.0, 250.0, 155.0, 95.0, 76.0, 40.0, 29.0, 21.0, 19.0, 13.0, 7.0, 8.0, 7.0, 4.0, 4.0, 4.0, 2.0, 1.0, 0.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0], "bins": [-30.125, -29.1279296875, -28.130859375, -27.1337890625, -26.13671875, -25.1396484375, -24.142578125, -23.1455078125, -22.1484375, -21.1513671875, -20.154296875, -19.1572265625, -18.16015625, -17.1630859375, -16.166015625, -15.1689453125, -14.171875, -13.1748046875, -12.177734375, -11.1806640625, -10.18359375, -9.1865234375, -8.189453125, -7.1923828125, -6.1953125, -5.1982421875, -4.201171875, -3.2041015625, -2.20703125, -1.2099609375, -0.212890625, 0.7841796875, 1.78125, 2.7783203125, 3.775390625, 4.7724609375, 5.76953125, 6.7666015625, 7.763671875, 8.7607421875, 9.7578125, 10.7548828125, 11.751953125, 12.7490234375, 13.74609375, 14.7431640625, 15.740234375, 16.7373046875, 17.734375, 18.7314453125, 19.728515625, 20.7255859375, 21.72265625, 22.7197265625, 23.716796875, 24.7138671875, 25.7109375, 26.7080078125, 27.705078125, 28.7021484375, 29.69921875, 30.6962890625, 31.693359375, 32.6904296875, 33.6875]}, "gradients/decoder.transformer.h.5.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 4.0, 31.0, 132.0, 335.0, 358.0, 138.0, 14.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-168.94461059570312, -162.89486694335938, -156.84510803222656, -150.7953643798828, -144.74560546875, -138.69586181640625, -132.6461181640625, -126.59635925292969, -120.5466079711914, -114.49685668945312, -108.44710540771484, -102.39735412597656, -96.34761047363281, -90.2978515625, -84.24810791015625, -78.19835662841797, -72.14860534667969, -66.0988540649414, -60.049102783203125, -53.99935531616211, -47.94960403442383, -41.89985275268555, -35.85010528564453, -29.80035400390625, -23.75060272216797, -17.700851440429688, -11.651102066040039, -5.601352691650391, 0.4483985900878906, 6.498149871826172, 12.547897338867188, 18.59764862060547, 24.647384643554688, 30.69713592529297, 36.74688720703125, 42.796634674072266, 48.84638595581055, 54.89613723754883, 60.945884704589844, 66.99563598632812, 73.0453872680664, 79.09513854980469, 85.14488983154297, 91.19464111328125, 97.244384765625, 103.29414367675781, 109.34388732910156, 115.39363861083984, 121.44338989257812, 127.4931411743164, 133.5428924560547, 139.59263610839844, 145.64239501953125, 151.692138671875, 157.74188232421875, 163.79164123535156, 169.84140014648438, 175.89114379882812, 181.94090270996094, 187.9906463623047, 194.0404052734375, 200.09014892578125, 206.139892578125, 212.1896514892578, 218.23939514160156]}, "gradients/decoder.transformer.h.5.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 2.0, 4.0, 4.0, 6.0, 3.0, 4.0, 9.0, 12.0, 12.0, 12.0, 20.0, 20.0, 11.0, 22.0, 23.0, 32.0, 34.0, 37.0, 31.0, 46.0, 37.0, 32.0, 40.0, 47.0, 33.0, 48.0, 38.0, 28.0, 29.0, 33.0, 41.0, 37.0, 35.0, 14.0, 21.0, 21.0, 27.0, 17.0, 10.0, 16.0, 15.0, 13.0, 10.0, 6.0, 3.0, 5.0, 3.0, 2.0, 3.0, 3.0, 2.0, 2.0, 1.0, 0.0, 2.0, 0.0, 1.0], "bins": [-37.40069580078125, -36.1751708984375, -34.94964599609375, -33.724117279052734, -32.498592376708984, -31.273067474365234, -30.04754066467285, -28.82201385498047, -27.59648895263672, -26.37096405029297, -25.145437240600586, -23.919910430908203, -22.694385528564453, -21.468860626220703, -20.24333381652832, -19.017807006835938, -17.792282104492188, -16.566757202148438, -15.341230392456055, -14.115704536437988, -12.890178680419922, -11.664652824401855, -10.439126968383789, -9.213601112365723, -7.988075256347656, -6.76254940032959, -5.537023544311523, -4.311497688293457, -3.0859718322753906, -1.8604459762573242, -0.6349201202392578, 0.5906057357788086, 1.8161354064941406, 3.041661262512207, 4.267187118530273, 5.49271297454834, 6.718238830566406, 7.943764686584473, 9.169290542602539, 10.394816398620605, 11.620342254638672, 12.845868110656738, 14.071393966674805, 15.296919822692871, 16.522445678710938, 17.747970581054688, 18.97349739074707, 20.199024200439453, 21.424549102783203, 22.650074005126953, 23.875600814819336, 25.10112762451172, 26.32665252685547, 27.55217742919922, 28.7777042388916, 30.003231048583984, 31.228755950927734, 32.454280853271484, 33.6798095703125, 34.90533447265625, 36.130859375, 37.35638427734375, 38.5819091796875, 39.807437896728516, 41.032962799072266]}, "gradients/decoder.transformer.h.5.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 4.0, 3.0, 4.0, 9.0, 10.0, 5.0, 11.0, 10.0, 13.0, 26.0, 22.0, 20.0, 28.0, 21.0, 32.0, 44.0, 35.0, 36.0, 37.0, 56.0, 48.0, 47.0, 48.0, 47.0, 37.0, 46.0, 45.0, 37.0, 32.0, 31.0, 22.0, 19.0, 21.0, 21.0, 18.0, 13.0, 16.0, 10.0, 9.0, 3.0, 4.0, 2.0, 4.0, 0.0, 4.0, 2.0, 1.0, 3.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-7.25, -7.006591796875, -6.76318359375, -6.519775390625, -6.2763671875, -6.032958984375, -5.78955078125, -5.546142578125, -5.302734375, -5.059326171875, -4.81591796875, -4.572509765625, -4.3291015625, -4.085693359375, -3.84228515625, -3.598876953125, -3.35546875, -3.112060546875, -2.86865234375, -2.625244140625, -2.3818359375, -2.138427734375, -1.89501953125, -1.651611328125, -1.408203125, -1.164794921875, -0.92138671875, -0.677978515625, -0.4345703125, -0.191162109375, 0.05224609375, 0.295654296875, 0.5390625, 0.782470703125, 1.02587890625, 1.269287109375, 1.5126953125, 1.756103515625, 1.99951171875, 2.242919921875, 2.486328125, 2.729736328125, 2.97314453125, 3.216552734375, 3.4599609375, 3.703369140625, 3.94677734375, 4.190185546875, 4.43359375, 4.677001953125, 4.92041015625, 5.163818359375, 5.4072265625, 5.650634765625, 5.89404296875, 6.137451171875, 6.380859375, 6.624267578125, 6.86767578125, 7.111083984375, 7.3544921875, 7.597900390625, 7.84130859375, 8.084716796875, 8.328125]}, "gradients/decoder.transformer.h.5.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 4.0, 4.0, 8.0, 17.0, 16.0, 30.0, 39.0, 54.0, 93.0, 142.0, 206.0, 287.0, 446.0, 575.0, 925.0, 1405.0, 2110.0, 3034.0, 4723.0, 7320.0, 10912.0, 16870.0, 25890.0, 39162.0, 59454.0, 86754.0, 118581.0, 144067.0, 143976.0, 118776.0, 87437.0, 59737.0, 39730.0, 26043.0, 17293.0, 11091.0, 7257.0, 4662.0, 3038.0, 2075.0, 1340.0, 979.0, 602.0, 414.0, 311.0, 220.0, 131.0, 109.0, 68.0, 39.0, 41.0, 28.0, 11.0, 11.0, 11.0, 7.0, 4.0, 3.0, 0.0, 1.0], "bins": [-0.76416015625, -0.7402877807617188, -0.7164154052734375, -0.6925430297851562, -0.668670654296875, -0.6447982788085938, -0.6209259033203125, -0.5970535278320312, -0.57318115234375, -0.5493087768554688, -0.5254364013671875, -0.5015640258789062, -0.477691650390625, -0.45381927490234375, -0.4299468994140625, -0.40607452392578125, -0.3822021484375, -0.35832977294921875, -0.3344573974609375, -0.31058502197265625, -0.286712646484375, -0.26284027099609375, -0.2389678955078125, -0.21509552001953125, -0.19122314453125, -0.16735076904296875, -0.1434783935546875, -0.11960601806640625, -0.095733642578125, -0.07186126708984375, -0.0479888916015625, -0.02411651611328125, -0.000244140625, 0.02362823486328125, 0.0475006103515625, 0.07137298583984375, 0.095245361328125, 0.11911773681640625, 0.1429901123046875, 0.16686248779296875, 0.19073486328125, 0.21460723876953125, 0.2384796142578125, 0.26235198974609375, 0.286224365234375, 0.31009674072265625, 0.3339691162109375, 0.35784149169921875, 0.3817138671875, 0.40558624267578125, 0.4294586181640625, 0.45333099365234375, 0.477203369140625, 0.5010757446289062, 0.5249481201171875, 0.5488204956054688, 0.57269287109375, 0.5965652465820312, 0.6204376220703125, 0.6443099975585938, 0.668182373046875, 0.6920547485351562, 0.7159271240234375, 0.7397994995117188, 0.763671875]}, "gradients/decoder.transformer.h.5.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 5.0, 1.0, 5.0, 5.0, 3.0, 9.0, 12.0, 6.0, 4.0, 9.0, 9.0, 24.0, 20.0, 17.0, 32.0, 35.0, 35.0, 35.0, 39.0, 38.0, 49.0, 36.0, 1076.0, 44.0, 46.0, 44.0, 38.0, 34.0, 50.0, 29.0, 27.0, 23.0, 27.0, 22.0, 23.0, 24.0, 19.0, 23.0, 13.0, 12.0, 9.0, 1.0, 6.0, 6.0, 3.0, 3.0, 3.0, 2.0, 0.0, 4.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.62109375, -4.47174072265625, -4.3223876953125, -4.17303466796875, -4.023681640625, -3.87432861328125, -3.7249755859375, -3.57562255859375, -3.42626953125, -3.27691650390625, -3.1275634765625, -2.97821044921875, -2.828857421875, -2.67950439453125, -2.5301513671875, -2.38079833984375, -2.2314453125, -2.08209228515625, -1.9327392578125, -1.78338623046875, -1.634033203125, -1.48468017578125, -1.3353271484375, -1.18597412109375, -1.03662109375, -0.88726806640625, -0.7379150390625, -0.58856201171875, -0.439208984375, -0.28985595703125, -0.1405029296875, 0.00885009765625, 0.158203125, 0.30755615234375, 0.4569091796875, 0.60626220703125, 0.755615234375, 0.90496826171875, 1.0543212890625, 1.20367431640625, 1.35302734375, 1.50238037109375, 1.6517333984375, 1.80108642578125, 1.950439453125, 2.09979248046875, 2.2491455078125, 2.39849853515625, 2.5478515625, 2.69720458984375, 2.8465576171875, 2.99591064453125, 3.145263671875, 3.29461669921875, 3.4439697265625, 3.59332275390625, 3.74267578125, 3.89202880859375, 4.0413818359375, 4.19073486328125, 4.340087890625, 4.48944091796875, 4.6387939453125, 4.78814697265625, 4.9375]}, "gradients/decoder.transformer.h.5.crossattention.c_attn.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 4.0, 6.0, 2.0, 5.0, 9.0, 29.0, 16.0, 45.0, 83.0, 96.0, 167.0, 239.0, 385.0, 593.0, 940.0, 1454.0, 2237.0, 3452.0, 5451.0, 8836.0, 13881.0, 21530.0, 32749.0, 49931.0, 73228.0, 103316.0, 133520.0, 1195844.0, 132567.0, 102911.0, 72987.0, 49552.0, 32658.0, 21217.0, 13477.0, 8647.0, 5498.0, 3545.0, 2237.0, 1358.0, 872.0, 548.0, 387.0, 220.0, 150.0, 77.0, 59.0, 40.0, 24.0, 18.0, 20.0, 7.0, 6.0, 7.0, 2.0, 1.0, 4.0, 4.0, 0.0, 0.0, 1.0], "bins": [-0.52587890625, -0.5086288452148438, -0.4913787841796875, -0.47412872314453125, -0.456878662109375, -0.43962860107421875, -0.4223785400390625, -0.40512847900390625, -0.38787841796875, -0.37062835693359375, -0.3533782958984375, -0.33612823486328125, -0.318878173828125, -0.30162811279296875, -0.2843780517578125, -0.26712799072265625, -0.2498779296875, -0.23262786865234375, -0.2153778076171875, -0.19812774658203125, -0.180877685546875, -0.16362762451171875, -0.1463775634765625, -0.12912750244140625, -0.11187744140625, -0.09462738037109375, -0.0773773193359375, -0.06012725830078125, -0.042877197265625, -0.02562713623046875, -0.0083770751953125, 0.00887298583984375, 0.026123046875, 0.04337310791015625, 0.0606231689453125, 0.07787322998046875, 0.095123291015625, 0.11237335205078125, 0.1296234130859375, 0.14687347412109375, 0.16412353515625, 0.18137359619140625, 0.1986236572265625, 0.21587371826171875, 0.233123779296875, 0.25037384033203125, 0.2676239013671875, 0.28487396240234375, 0.3021240234375, 0.31937408447265625, 0.3366241455078125, 0.35387420654296875, 0.371124267578125, 0.38837432861328125, 0.4056243896484375, 0.42287445068359375, 0.44012451171875, 0.45737457275390625, 0.4746246337890625, 0.49187469482421875, 0.509124755859375, 0.5263748168945312, 0.5436248779296875, 0.5608749389648438, 0.578125]}, "gradients/decoder.transformer.h.5.crossattention.q_attn.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 3.0, 2.0, 1.0, 1.0, 1.0, 4.0, 4.0, 3.0, 9.0, 6.0, 4.0, 6.0, 20.0, 18.0, 21.0, 17.0, 41.0, 49.0, 65.0, 127.0, 165.0, 142.0, 76.0, 63.0, 35.0, 30.0, 18.0, 23.0, 17.0, 9.0, 7.0, 1.0, 4.0, 2.0, 5.0, 7.0, 2.0, 1.0, 3.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0], "bins": [-0.01270294189453125, -0.0122758150100708, -0.011848688125610352, -0.011421561241149902, -0.010994434356689453, -0.010567307472229004, -0.010140180587768555, -0.009713053703308105, -0.009285926818847656, -0.008858799934387207, -0.008431673049926758, -0.008004546165466309, -0.007577419281005859, -0.00715029239654541, -0.006723165512084961, -0.006296038627624512, -0.0058689117431640625, -0.005441784858703613, -0.005014657974243164, -0.004587531089782715, -0.004160404205322266, -0.0037332773208618164, -0.003306150436401367, -0.002879023551940918, -0.0024518966674804688, -0.0020247697830200195, -0.0015976428985595703, -0.001170516014099121, -0.0007433891296386719, -0.00031626224517822266, 0.00011086463928222656, 0.0005379915237426758, 0.000965118408203125, 0.0013922452926635742, 0.0018193721771240234, 0.0022464990615844727, 0.002673625946044922, 0.003100752830505371, 0.0035278797149658203, 0.0039550065994262695, 0.004382133483886719, 0.004809260368347168, 0.005236387252807617, 0.005663514137268066, 0.006090641021728516, 0.006517767906188965, 0.006944894790649414, 0.007372021675109863, 0.0077991485595703125, 0.008226275444030762, 0.008653402328491211, 0.00908052921295166, 0.00950765609741211, 0.009934782981872559, 0.010361909866333008, 0.010789036750793457, 0.011216163635253906, 0.011643290519714355, 0.012070417404174805, 0.012497544288635254, 0.012924671173095703, 0.013351798057556152, 0.013778924942016602, 0.01420605182647705, 0.0146331787109375]}, "gradients/decoder.transformer.h.5.crossattention.q_attn.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 1.0, 2.0, 2.0, 1.0, 6.0, 6.0, 5.0, 1.0, 10.0, 11.0, 16.0, 27.0, 23.0, 39.0, 71.0, 107.0, 203.0, 473.0, 6216.0, 1038265.0, 2149.0, 428.0, 170.0, 91.0, 67.0, 40.0, 37.0, 23.0, 14.0, 10.0, 6.0, 13.0, 8.0, 6.0, 4.0, 1.0, 1.0, 3.0, 3.0, 2.0, 1.0, 2.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0], "bins": [-0.3037109375, -0.29488372802734375, -0.2860565185546875, -0.27722930908203125, -0.268402099609375, -0.25957489013671875, -0.2507476806640625, -0.24192047119140625, -0.23309326171875, -0.22426605224609375, -0.2154388427734375, -0.20661163330078125, -0.197784423828125, -0.18895721435546875, -0.1801300048828125, -0.17130279541015625, -0.1624755859375, -0.15364837646484375, -0.1448211669921875, -0.13599395751953125, -0.127166748046875, -0.11833953857421875, -0.1095123291015625, -0.10068511962890625, -0.09185791015625, -0.08303070068359375, -0.0742034912109375, -0.06537628173828125, -0.056549072265625, -0.04772186279296875, -0.0388946533203125, -0.03006744384765625, -0.021240234375, -0.01241302490234375, -0.0035858154296875, 0.00524139404296875, 0.014068603515625, 0.02289581298828125, 0.0317230224609375, 0.04055023193359375, 0.04937744140625, 0.05820465087890625, 0.0670318603515625, 0.07585906982421875, 0.084686279296875, 0.09351348876953125, 0.1023406982421875, 0.11116790771484375, 0.1199951171875, 0.12882232666015625, 0.1376495361328125, 0.14647674560546875, 0.155303955078125, 0.16413116455078125, 0.1729583740234375, 0.18178558349609375, 0.19061279296875, 0.19944000244140625, 0.2082672119140625, 0.21709442138671875, 0.225921630859375, 0.23474884033203125, 0.2435760498046875, 0.25240325927734375, 0.26123046875]}, "gradients/decoder.transformer.h.5.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 10.0, 968.0, 37.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.11654668301343918, -0.11141981184482574, -0.10629294812679291, -0.10116607695817947, -0.09603920578956604, -0.0909123346209526, -0.08578546345233917, -0.08065859973430634, -0.0755317285656929, -0.07040485739707947, -0.06527799367904663, -0.0601511225104332, -0.05502425134181976, -0.04989738017320633, -0.044770512729883194, -0.03964364528656006, -0.034516774117946625, -0.02938990481197834, -0.024263035506010056, -0.01913616620004177, -0.014009296894073486, -0.008882427588105202, -0.003755558282136917, 0.0013713091611862183, 0.006498180329799652, 0.011625049635767937, 0.01675191894173622, 0.021878788247704506, 0.02700565755367279, 0.032132528722286224, 0.03725939616560936, 0.042386263608932495, 0.047513142228126526, 0.05264001339673996, 0.057766880840063095, 0.06289374828338623, 0.06802061945199966, 0.0731474906206131, 0.07827435433864594, 0.08340122550725937, 0.0885280966758728, 0.09365496784448624, 0.09878183901309967, 0.10390870273113251, 0.10903557389974594, 0.11416244506835938, 0.11928930878639221, 0.12441617995500565, 0.12954305112361908, 0.13466991484165192, 0.13979679346084595, 0.14492365717887878, 0.15005052089691162, 0.15517739951610565, 0.1603042632341385, 0.16543114185333252, 0.17055800557136536, 0.1756848692893982, 0.18081174790859222, 0.18593861162662506, 0.1910654902458191, 0.19619235396385193, 0.20131921768188477, 0.2064460813999176, 0.21157296001911163]}, "gradients/decoder.transformer.h.5.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 2.0, 0.0, 1.0, 1.0, 0.0, 2.0, 1.0, 4.0, 7.0, 9.0, 10.0, 3.0, 16.0, 20.0, 21.0, 21.0, 31.0, 41.0, 44.0, 47.0, 62.0, 47.0, 45.0, 44.0, 57.0, 72.0, 46.0, 58.0, 54.0, 39.0, 34.0, 41.0, 24.0, 24.0, 18.0, 16.0, 14.0, 5.0, 5.0, 6.0, 11.0, 5.0, 2.0, 1.0, 4.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.010081470012664795, -0.00971247348934412, -0.009343476966023445, -0.00897448044270277, -0.008605483919382095, -0.00823648739606142, -0.007867490872740746, -0.007498494349420071, -0.007129497826099396, -0.006760501302778721, -0.006391504779458046, -0.006022508256137371, -0.005653511732816696, -0.005284515209496021, -0.004915518686175346, -0.0045465221628546715, -0.004177525639533997, -0.0038085291162133217, -0.003439532592892647, -0.003070536069571972, -0.002701539546251297, -0.002332543022930622, -0.001963546499609947, -0.0015945499762892723, -0.0012255534529685974, -0.0008565569296479225, -0.0004875604063272476, -0.00011856388300657272, 0.00025043264031410217, 0.0006194291636347771, 0.000988425686955452, 0.0013574222102761269, 0.0017264187335968018, 0.0020954152569174767, 0.0024644117802381516, 0.0028334083035588264, 0.0032024048268795013, 0.0035714013502001762, 0.003940397873520851, 0.004309394396841526, 0.004678390920162201, 0.005047387443482876, 0.005416383966803551, 0.005785380490124226, 0.0061543770134449005, 0.006523373536765575, 0.00689237006008625, 0.007261366583406925, 0.0076303631067276, 0.007999359630048275, 0.00836835615336895, 0.008737352676689625, 0.0091063492000103, 0.009475345723330975, 0.00984434224665165, 0.010213338769972324, 0.010582335293293, 0.010951331816613674, 0.011320328339934349, 0.011689324863255024, 0.012058321386575699, 0.012427317909896374, 0.012796314433217049, 0.013165310956537724, 0.013534307479858398]}, "gradients/decoder.transformer.h.5.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 4.0, 3.0, 4.0, 9.0, 10.0, 5.0, 11.0, 10.0, 13.0, 26.0, 22.0, 20.0, 28.0, 21.0, 32.0, 44.0, 35.0, 36.0, 37.0, 56.0, 48.0, 47.0, 48.0, 47.0, 37.0, 46.0, 45.0, 37.0, 33.0, 30.0, 22.0, 19.0, 21.0, 21.0, 18.0, 13.0, 16.0, 10.0, 9.0, 3.0, 4.0, 2.0, 4.0, 0.0, 4.0, 2.0, 1.0, 3.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-7.25, -7.006591796875, -6.76318359375, -6.519775390625, -6.2763671875, -6.032958984375, -5.78955078125, -5.546142578125, -5.302734375, -5.059326171875, -4.81591796875, -4.572509765625, -4.3291015625, -4.085693359375, -3.84228515625, -3.598876953125, -3.35546875, -3.112060546875, -2.86865234375, -2.625244140625, -2.3818359375, -2.138427734375, -1.89501953125, -1.651611328125, -1.408203125, -1.164794921875, -0.92138671875, -0.677978515625, -0.4345703125, -0.191162109375, 0.05224609375, 0.295654296875, 0.5390625, 0.782470703125, 1.02587890625, 1.269287109375, 1.5126953125, 1.756103515625, 1.99951171875, 2.242919921875, 2.486328125, 2.729736328125, 2.97314453125, 3.216552734375, 3.4599609375, 3.703369140625, 3.94677734375, 4.190185546875, 4.43359375, 4.677001953125, 4.92041015625, 5.163818359375, 5.4072265625, 5.650634765625, 5.89404296875, 6.137451171875, 6.380859375, 6.624267578125, 6.86767578125, 7.111083984375, 7.3544921875, 7.597900390625, 7.84130859375, 8.084716796875, 8.328125]}, "gradients/decoder.transformer.h.5.attn.c_proj.weight": {"_type": "histogram", "values": [2.0, 2.0, 3.0, 1.0, 6.0, 4.0, 5.0, 6.0, 17.0, 19.0, 25.0, 37.0, 57.0, 78.0, 98.0, 161.0, 253.0, 398.0, 669.0, 1135.0, 2040.0, 4037.0, 8320.0, 18574.0, 42975.0, 99290.0, 234972.0, 348306.0, 161247.0, 69430.0, 30214.0, 13134.0, 6209.0, 2937.0, 1563.0, 853.0, 530.0, 326.0, 195.0, 131.0, 90.0, 61.0, 50.0, 25.0, 26.0, 17.0, 11.0, 12.0, 5.0, 3.0, 4.0, 4.0, 1.0, 3.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-7.99609375, -7.70306396484375, -7.4100341796875, -7.11700439453125, -6.823974609375, -6.53094482421875, -6.2379150390625, -5.94488525390625, -5.65185546875, -5.35882568359375, -5.0657958984375, -4.77276611328125, -4.479736328125, -4.18670654296875, -3.8936767578125, -3.60064697265625, -3.3076171875, -3.01458740234375, -2.7215576171875, -2.42852783203125, -2.135498046875, -1.84246826171875, -1.5494384765625, -1.25640869140625, -0.96337890625, -0.67034912109375, -0.3773193359375, -0.08428955078125, 0.208740234375, 0.50177001953125, 0.7947998046875, 1.08782958984375, 1.380859375, 1.67388916015625, 1.9669189453125, 2.25994873046875, 2.552978515625, 2.84600830078125, 3.1390380859375, 3.43206787109375, 3.72509765625, 4.01812744140625, 4.3111572265625, 4.60418701171875, 4.897216796875, 5.19024658203125, 5.4832763671875, 5.77630615234375, 6.0693359375, 6.36236572265625, 6.6553955078125, 6.94842529296875, 7.241455078125, 7.53448486328125, 7.8275146484375, 8.12054443359375, 8.41357421875, 8.70660400390625, 8.9996337890625, 9.29266357421875, 9.585693359375, 9.87872314453125, 10.1717529296875, 10.46478271484375, 10.7578125]}, "gradients/decoder.transformer.h.5.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 2.0, 2.0, 3.0, 4.0, 2.0, 4.0, 3.0, 6.0, 12.0, 6.0, 6.0, 13.0, 15.0, 13.0, 8.0, 20.0, 27.0, 28.0, 28.0, 36.0, 29.0, 37.0, 59.0, 69.0, 66.0, 118.0, 182.0, 1294.0, 274.0, 149.0, 99.0, 64.0, 44.0, 41.0, 43.0, 30.0, 30.0, 17.0, 31.0, 21.0, 15.0, 15.0, 16.0, 17.0, 10.0, 7.0, 10.0, 5.0, 6.0, 4.0, 5.0, 6.0, 5.0, 5.0, 2.0, 2.0, 2.0, 0.0, 0.0, 0.0, 2.0], "bins": [-17.234375, -16.677978515625, -16.12158203125, -15.565185546875, -15.0087890625, -14.452392578125, -13.89599609375, -13.339599609375, -12.783203125, -12.226806640625, -11.67041015625, -11.114013671875, -10.5576171875, -10.001220703125, -9.44482421875, -8.888427734375, -8.33203125, -7.775634765625, -7.21923828125, -6.662841796875, -6.1064453125, -5.550048828125, -4.99365234375, -4.437255859375, -3.880859375, -3.324462890625, -2.76806640625, -2.211669921875, -1.6552734375, -1.098876953125, -0.54248046875, 0.013916015625, 0.5703125, 1.126708984375, 1.68310546875, 2.239501953125, 2.7958984375, 3.352294921875, 3.90869140625, 4.465087890625, 5.021484375, 5.577880859375, 6.13427734375, 6.690673828125, 7.2470703125, 7.803466796875, 8.35986328125, 8.916259765625, 9.47265625, 10.029052734375, 10.58544921875, 11.141845703125, 11.6982421875, 12.254638671875, 12.81103515625, 13.367431640625, 13.923828125, 14.480224609375, 15.03662109375, 15.593017578125, 16.1494140625, 16.705810546875, 17.26220703125, 17.818603515625, 18.375]}, "gradients/decoder.transformer.h.5.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 5.0, 3.0, 2.0, 5.0, 9.0, 5.0, 5.0, 1.0, 9.0, 17.0, 19.0, 39.0, 45.0, 64.0, 85.0, 162.0, 209.0, 336.0, 714.0, 2705.0, 43063.0, 2574312.0, 507741.0, 13328.0, 1421.0, 548.0, 286.0, 198.0, 108.0, 77.0, 63.0, 41.0, 21.0, 31.0, 8.0, 11.0, 7.0, 6.0, 3.0, 4.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-42.53125, -41.1943359375, -39.857421875, -38.5205078125, -37.18359375, -35.8466796875, -34.509765625, -33.1728515625, -31.8359375, -30.4990234375, -29.162109375, -27.8251953125, -26.48828125, -25.1513671875, -23.814453125, -22.4775390625, -21.140625, -19.8037109375, -18.466796875, -17.1298828125, -15.79296875, -14.4560546875, -13.119140625, -11.7822265625, -10.4453125, -9.1083984375, -7.771484375, -6.4345703125, -5.09765625, -3.7607421875, -2.423828125, -1.0869140625, 0.25, 1.5869140625, 2.923828125, 4.2607421875, 5.59765625, 6.9345703125, 8.271484375, 9.6083984375, 10.9453125, 12.2822265625, 13.619140625, 14.9560546875, 16.29296875, 17.6298828125, 18.966796875, 20.3037109375, 21.640625, 22.9775390625, 24.314453125, 25.6513671875, 26.98828125, 28.3251953125, 29.662109375, 30.9990234375, 32.3359375, 33.6728515625, 35.009765625, 36.3466796875, 37.68359375, 39.0205078125, 40.357421875, 41.6943359375, 43.03125]}, "gradients/decoder.transformer.h.5.ln_1.weight": {"_type": "histogram", "values": [4.0, 47.0, 322.0, 459.0, 167.0, 15.0, 3.0, 0.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-16.130380630493164, -11.101810455322266, -6.073239326477051, -1.044668197631836, 3.9839019775390625, 9.012472152709961, 14.041044235229492, 19.069612503051758, 24.09818458557129, 29.126754760742188, 34.15532684326172, 39.18389892578125, 44.212467193603516, 49.24103546142578, 54.26960754394531, 59.298179626464844, 64.32675170898438, 69.3553237915039, 74.38389587402344, 79.41246032714844, 84.44103240966797, 89.4696044921875, 94.49817657470703, 99.52674865722656, 104.55531311035156, 109.5838851928711, 114.61245727539062, 119.64102172851562, 124.66959381103516, 129.6981658935547, 134.72674560546875, 139.75531005859375, 144.7838897705078, 149.8124542236328, 154.84103393554688, 159.86959838867188, 164.89817810058594, 169.92674255371094, 174.955322265625, 179.98388671875, 185.012451171875, 190.041015625, 195.06959533691406, 200.09815979003906, 205.12673950195312, 210.15530395507812, 215.18386840820312, 220.2124481201172, 225.24102783203125, 230.26959228515625, 235.2981719970703, 240.3267364501953, 245.35531616210938, 250.38388061523438, 255.41244506835938, 260.4410095214844, 265.4695739746094, 270.4981384277344, 275.5267028808594, 280.5552978515625, 285.5838623046875, 290.6124267578125, 295.6409912109375, 300.6695556640625, 305.6981506347656]}, "gradients/decoder.transformer.h.5.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 1.0, 3.0, 3.0, 5.0, 3.0, 7.0, 6.0, 7.0, 4.0, 17.0, 18.0, 16.0, 22.0, 21.0, 16.0, 17.0, 26.0, 25.0, 22.0, 29.0, 37.0, 32.0, 41.0, 54.0, 37.0, 39.0, 51.0, 44.0, 33.0, 46.0, 31.0, 34.0, 35.0, 23.0, 27.0, 19.0, 22.0, 16.0, 19.0, 18.0, 20.0, 9.0, 10.0, 12.0, 8.0, 8.0, 6.0, 5.0, 4.0, 2.0, 2.0, 1.0, 1.0, 2.0], "bins": [-47.337684631347656, -45.98408508300781, -44.630489349365234, -43.27688980102539, -41.92329406738281, -40.56969451904297, -39.216094970703125, -37.86249542236328, -36.5088996887207, -35.15530014038086, -33.80170440673828, -32.44810485839844, -31.094507217407227, -29.740909576416016, -28.387310028076172, -27.03371238708496, -25.68011474609375, -24.32651710510254, -22.972919464111328, -21.619319915771484, -20.265722274780273, -18.912124633789062, -17.55852508544922, -16.204927444458008, -14.851329803466797, -13.497732162475586, -12.144133567810059, -10.790534973144531, -9.43693733215332, -8.08333969116211, -6.729741096496582, -5.376142501831055, -4.022544860839844, -2.6689467430114746, -1.3153486251831055, 0.03824949264526367, 1.3918476104736328, 2.745445728302002, 4.099043846130371, 5.452642440795898, 6.806240081787109, 8.15983772277832, 9.513436317443848, 10.867034912109375, 12.220632553100586, 13.574230194091797, 14.927828788757324, 16.28142738342285, 17.635025024414062, 18.988622665405273, 20.342220306396484, 21.695819854736328, 23.04941749572754, 24.40301513671875, 25.756614685058594, 27.110212326049805, 28.463809967041016, 29.817407608032227, 31.171005249023438, 32.52460479736328, 33.878204345703125, 35.2318000793457, 36.58539962768555, 37.938995361328125, 39.29259490966797]}, "gradients/decoder.transformer.h.4.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 4.0, 4.0, 1.0, 1.0, 6.0, 7.0, 1.0, 7.0, 9.0, 10.0, 10.0, 9.0, 10.0, 21.0, 21.0, 17.0, 23.0, 12.0, 33.0, 29.0, 34.0, 33.0, 39.0, 35.0, 33.0, 52.0, 43.0, 41.0, 39.0, 41.0, 35.0, 33.0, 35.0, 33.0, 29.0, 28.0, 26.0, 19.0, 17.0, 21.0, 23.0, 20.0, 13.0, 17.0, 5.0, 8.0, 9.0, 5.0, 2.0, 5.0, 2.0, 1.0, 1.0, 2.0, 2.0, 1.0, 2.0, 0.0, 1.0, 2.0], "bins": [-6.9140625, -6.69317626953125, -6.4722900390625, -6.25140380859375, -6.030517578125, -5.80963134765625, -5.5887451171875, -5.36785888671875, -5.14697265625, -4.92608642578125, -4.7052001953125, -4.48431396484375, -4.263427734375, -4.04254150390625, -3.8216552734375, -3.60076904296875, -3.3798828125, -3.15899658203125, -2.9381103515625, -2.71722412109375, -2.496337890625, -2.27545166015625, -2.0545654296875, -1.83367919921875, -1.61279296875, -1.39190673828125, -1.1710205078125, -0.95013427734375, -0.729248046875, -0.50836181640625, -0.2874755859375, -0.06658935546875, 0.154296875, 0.37518310546875, 0.5960693359375, 0.81695556640625, 1.037841796875, 1.25872802734375, 1.4796142578125, 1.70050048828125, 1.92138671875, 2.14227294921875, 2.3631591796875, 2.58404541015625, 2.804931640625, 3.02581787109375, 3.2467041015625, 3.46759033203125, 3.6884765625, 3.90936279296875, 4.1302490234375, 4.35113525390625, 4.572021484375, 4.79290771484375, 5.0137939453125, 5.23468017578125, 5.45556640625, 5.67645263671875, 5.8973388671875, 6.11822509765625, 6.339111328125, 6.55999755859375, 6.7808837890625, 7.00177001953125, 7.22265625]}, "gradients/decoder.transformer.h.4.mlp.c_proj.weight": {"_type": "histogram", "values": [3.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 3.0, 0.0, 5.0, 4.0, 11.0, 12.0, 10.0, 28.0, 29.0, 50.0, 66.0, 93.0, 138.0, 232.0, 379.0, 631.0, 1166.0, 2361.0, 5043.0, 11858.0, 29863.0, 84292.0, 253912.0, 748805.0, 1484890.0, 1014405.0, 366068.0, 119555.0, 41387.0, 15868.0, 6548.0, 3033.0, 1499.0, 810.0, 442.0, 267.0, 187.0, 107.0, 70.0, 38.0, 40.0, 21.0, 16.0, 18.0, 14.0, 6.0, 4.0, 3.0, 3.0, 1.0, 1.0, 2.0, 0.0, 0.0, 2.0, 0.0, 1.0], "bins": [-8.078125, -7.822998046875, -7.56787109375, -7.312744140625, -7.0576171875, -6.802490234375, -6.54736328125, -6.292236328125, -6.037109375, -5.781982421875, -5.52685546875, -5.271728515625, -5.0166015625, -4.761474609375, -4.50634765625, -4.251220703125, -3.99609375, -3.740966796875, -3.48583984375, -3.230712890625, -2.9755859375, -2.720458984375, -2.46533203125, -2.210205078125, -1.955078125, -1.699951171875, -1.44482421875, -1.189697265625, -0.9345703125, -0.679443359375, -0.42431640625, -0.169189453125, 0.0859375, 0.341064453125, 0.59619140625, 0.851318359375, 1.1064453125, 1.361572265625, 1.61669921875, 1.871826171875, 2.126953125, 2.382080078125, 2.63720703125, 2.892333984375, 3.1474609375, 3.402587890625, 3.65771484375, 3.912841796875, 4.16796875, 4.423095703125, 4.67822265625, 4.933349609375, 5.1884765625, 5.443603515625, 5.69873046875, 5.953857421875, 6.208984375, 6.464111328125, 6.71923828125, 6.974365234375, 7.2294921875, 7.484619140625, 7.73974609375, 7.994873046875, 8.25]}, "gradients/decoder.transformer.h.4.mlp.c_fc.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 4.0, 2.0, 6.0, 9.0, 16.0, 17.0, 19.0, 16.0, 19.0, 40.0, 44.0, 72.0, 92.0, 132.0, 169.0, 204.0, 299.0, 440.0, 458.0, 446.0, 454.0, 326.0, 221.0, 145.0, 112.0, 71.0, 75.0, 41.0, 36.0, 21.0, 23.0, 13.0, 7.0, 13.0, 7.0, 3.0, 4.0, 2.0, 0.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0], "bins": [-12.125, -11.767578125, -11.41015625, -11.052734375, -10.6953125, -10.337890625, -9.98046875, -9.623046875, -9.265625, -8.908203125, -8.55078125, -8.193359375, -7.8359375, -7.478515625, -7.12109375, -6.763671875, -6.40625, -6.048828125, -5.69140625, -5.333984375, -4.9765625, -4.619140625, -4.26171875, -3.904296875, -3.546875, -3.189453125, -2.83203125, -2.474609375, -2.1171875, -1.759765625, -1.40234375, -1.044921875, -0.6875, -0.330078125, 0.02734375, 0.384765625, 0.7421875, 1.099609375, 1.45703125, 1.814453125, 2.171875, 2.529296875, 2.88671875, 3.244140625, 3.6015625, 3.958984375, 4.31640625, 4.673828125, 5.03125, 5.388671875, 5.74609375, 6.103515625, 6.4609375, 6.818359375, 7.17578125, 7.533203125, 7.890625, 8.248046875, 8.60546875, 8.962890625, 9.3203125, 9.677734375, 10.03515625, 10.392578125, 10.75]}, "gradients/decoder.transformer.h.4.mlp.c_fc.weight": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 1.0, 2.0, 0.0, 3.0, 1.0, 3.0, 2.0, 3.0, 9.0, 6.0, 10.0, 20.0, 19.0, 47.0, 52.0, 76.0, 108.0, 181.0, 307.0, 514.0, 1178.0, 2468.0, 6599.0, 20034.0, 71956.0, 339234.0, 1917139.0, 1497872.0, 254024.0, 56424.0, 16075.0, 5439.0, 2172.0, 972.0, 503.0, 299.0, 168.0, 99.0, 88.0, 63.0, 41.0, 23.0, 18.0, 11.0, 11.0, 3.0, 3.0, 0.0, 3.0, 6.0, 2.0, 3.0, 0.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 2.0], "bins": [-14.453125, -13.969970703125, -13.48681640625, -13.003662109375, -12.5205078125, -12.037353515625, -11.55419921875, -11.071044921875, -10.587890625, -10.104736328125, -9.62158203125, -9.138427734375, -8.6552734375, -8.172119140625, -7.68896484375, -7.205810546875, -6.72265625, -6.239501953125, -5.75634765625, -5.273193359375, -4.7900390625, -4.306884765625, -3.82373046875, -3.340576171875, -2.857421875, -2.374267578125, -1.89111328125, -1.407958984375, -0.9248046875, -0.441650390625, 0.04150390625, 0.524658203125, 1.0078125, 1.490966796875, 1.97412109375, 2.457275390625, 2.9404296875, 3.423583984375, 3.90673828125, 4.389892578125, 4.873046875, 5.356201171875, 5.83935546875, 6.322509765625, 6.8056640625, 7.288818359375, 7.77197265625, 8.255126953125, 8.73828125, 9.221435546875, 9.70458984375, 10.187744140625, 10.6708984375, 11.154052734375, 11.63720703125, 12.120361328125, 12.603515625, 13.086669921875, 13.56982421875, 14.052978515625, 14.5361328125, 15.019287109375, 15.50244140625, 15.985595703125, 16.46875]}, "gradients/decoder.transformer.h.4.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 10.0, 36.0, 222.0, 414.0, 268.0, 56.0, 8.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-226.86776733398438, -220.66490173339844, -214.4620361328125, -208.25917053222656, -202.05630493164062, -195.8534393310547, -189.65057373046875, -183.44769287109375, -177.24484252929688, -171.04197692871094, -164.839111328125, -158.63624572753906, -152.43338012695312, -146.2305145263672, -140.02764892578125, -133.82476806640625, -127.62190246582031, -121.41903686523438, -115.21617126464844, -109.0133056640625, -102.81044006347656, -96.60757446289062, -90.40470123291016, -84.20183563232422, -77.99897003173828, -71.79610443115234, -65.5932388305664, -59.3903694152832, -53.187503814697266, -46.98463821411133, -40.781768798828125, -34.57890319824219, -28.376052856445312, -22.173187255859375, -15.970319747924805, -9.767452239990234, -3.564586639404297, 2.6382789611816406, 8.841148376464844, 15.044013977050781, 21.24687957763672, 27.449745178222656, 33.652610778808594, 39.8554801940918, 46.058345794677734, 52.26121139526367, 58.464080810546875, 64.66694641113281, 70.86981201171875, 77.07267761230469, 83.27554321289062, 89.47840881347656, 95.6812744140625, 101.88414001464844, 108.0870132446289, 114.28987884521484, 120.49274444580078, 126.69561004638672, 132.8984832763672, 139.10134887695312, 145.30421447753906, 151.507080078125, 157.70994567871094, 163.91281127929688, 170.1156768798828]}, "gradients/decoder.transformer.h.4.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 3.0, 1.0, 5.0, 3.0, 7.0, 7.0, 4.0, 7.0, 12.0, 4.0, 11.0, 17.0, 17.0, 15.0, 22.0, 16.0, 19.0, 20.0, 38.0, 26.0, 34.0, 22.0, 37.0, 45.0, 33.0, 35.0, 32.0, 55.0, 48.0, 39.0, 35.0, 39.0, 30.0, 37.0, 27.0, 20.0, 21.0, 18.0, 14.0, 17.0, 21.0, 13.0, 14.0, 13.0, 8.0, 8.0, 11.0, 4.0, 10.0, 9.0, 2.0, 2.0, 3.0, 1.0, 3.0, 0.0, 5.0, 0.0, 1.0, 1.0, 0.0, 2.0], "bins": [-32.07764434814453, -30.92927360534668, -29.780902862548828, -28.632532119750977, -27.484161376953125, -26.33578872680664, -25.18741798400879, -24.039047241210938, -22.890676498413086, -21.742305755615234, -20.593935012817383, -19.44556427001953, -18.297191619873047, -17.148822784423828, -16.000450134277344, -14.852079391479492, -13.70370864868164, -12.555337905883789, -11.406967163085938, -10.25859546661377, -9.110224723815918, -7.961853981018066, -6.813482761383057, -5.665111541748047, -4.516740798950195, -3.3683698177337646, -2.219998836517334, -1.0716278553009033, 0.07674312591552734, 1.225113868713379, 2.3734850883483887, 3.5218563079833984, 4.670230865478516, 5.818601608276367, 6.966972827911377, 8.115344047546387, 9.263714790344238, 10.41208553314209, 11.560457229614258, 12.70882797241211, 13.857198715209961, 15.005569458007812, 16.153940200805664, 17.302310943603516, 18.45068359375, 19.59905242919922, 20.747425079345703, 21.895795822143555, 23.044166564941406, 24.192537307739258, 25.34090805053711, 26.48927879333496, 27.637649536132812, 28.786022186279297, 29.93439292907715, 31.082763671875, 32.23113250732422, 33.3795051574707, 34.52787399291992, 35.676246643066406, 36.824615478515625, 37.97298812866211, 39.12135696411133, 40.26972961425781, 41.4181022644043]}, "gradients/decoder.transformer.h.4.crossattention.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 3.0, 2.0, 4.0, 1.0, 7.0, 3.0, 8.0, 8.0, 9.0, 12.0, 7.0, 14.0, 19.0, 21.0, 16.0, 11.0, 20.0, 32.0, 17.0, 46.0, 27.0, 39.0, 21.0, 32.0, 37.0, 47.0, 35.0, 51.0, 42.0, 38.0, 43.0, 34.0, 33.0, 41.0, 26.0, 21.0, 26.0, 25.0, 20.0, 17.0, 16.0, 13.0, 8.0, 13.0, 8.0, 11.0, 2.0, 3.0, 5.0, 4.0, 4.0, 2.0, 6.0, 2.0, 3.0, 0.0, 0.0, 0.0, 1.0, 3.0, 2.0], "bins": [-6.58984375, -6.3702392578125, -6.150634765625, -5.9310302734375, -5.71142578125, -5.4918212890625, -5.272216796875, -5.0526123046875, -4.8330078125, -4.6134033203125, -4.393798828125, -4.1741943359375, -3.95458984375, -3.7349853515625, -3.515380859375, -3.2957763671875, -3.076171875, -2.8565673828125, -2.636962890625, -2.4173583984375, -2.19775390625, -1.9781494140625, -1.758544921875, -1.5389404296875, -1.3193359375, -1.0997314453125, -0.880126953125, -0.6605224609375, -0.44091796875, -0.2213134765625, -0.001708984375, 0.2178955078125, 0.4375, 0.6571044921875, 0.876708984375, 1.0963134765625, 1.31591796875, 1.5355224609375, 1.755126953125, 1.9747314453125, 2.1943359375, 2.4139404296875, 2.633544921875, 2.8531494140625, 3.07275390625, 3.2923583984375, 3.511962890625, 3.7315673828125, 3.951171875, 4.1707763671875, 4.390380859375, 4.6099853515625, 4.82958984375, 5.0491943359375, 5.268798828125, 5.4884033203125, 5.7080078125, 5.9276123046875, 6.147216796875, 6.3668212890625, 6.58642578125, 6.8060302734375, 7.025634765625, 7.2452392578125, 7.46484375]}, "gradients/decoder.transformer.h.4.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 5.0, 8.0, 7.0, 11.0, 15.0, 31.0, 35.0, 55.0, 107.0, 166.0, 250.0, 392.0, 562.0, 1043.0, 1686.0, 2584.0, 4224.0, 6785.0, 10826.0, 17050.0, 26764.0, 42380.0, 66633.0, 100843.0, 141448.0, 168233.0, 149232.0, 108687.0, 72361.0, 46605.0, 29703.0, 18639.0, 11692.0, 7403.0, 4580.0, 2839.0, 1757.0, 1078.0, 714.0, 438.0, 277.0, 150.0, 81.0, 50.0, 43.0, 31.0, 15.0, 20.0, 13.0, 5.0, 5.0, 5.0, 2.0, 1.0, 1.0, 0.0, 1.0], "bins": [-0.9716796875, -0.9418716430664062, -0.9120635986328125, -0.8822555541992188, -0.852447509765625, -0.8226394653320312, -0.7928314208984375, -0.7630233764648438, -0.73321533203125, -0.7034072875976562, -0.6735992431640625, -0.6437911987304688, -0.613983154296875, -0.5841751098632812, -0.5543670654296875, -0.5245590209960938, -0.4947509765625, -0.46494293212890625, -0.4351348876953125, -0.40532684326171875, -0.375518798828125, -0.34571075439453125, -0.3159027099609375, -0.28609466552734375, -0.25628662109375, -0.22647857666015625, -0.1966705322265625, -0.16686248779296875, -0.137054443359375, -0.10724639892578125, -0.0774383544921875, -0.04763031005859375, -0.017822265625, 0.01198577880859375, 0.0417938232421875, 0.07160186767578125, 0.101409912109375, 0.13121795654296875, 0.1610260009765625, 0.19083404541015625, 0.22064208984375, 0.25045013427734375, 0.2802581787109375, 0.31006622314453125, 0.339874267578125, 0.36968231201171875, 0.3994903564453125, 0.42929840087890625, 0.4591064453125, 0.48891448974609375, 0.5187225341796875, 0.5485305786132812, 0.578338623046875, 0.6081466674804688, 0.6379547119140625, 0.6677627563476562, 0.69757080078125, 0.7273788452148438, 0.7571868896484375, 0.7869949340820312, 0.816802978515625, 0.8466110229492188, 0.8764190673828125, 0.9062271118164062, 0.93603515625]}, "gradients/decoder.transformer.h.4.crossattention.c_attn.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 3.0, 0.0, 2.0, 2.0, 2.0, 6.0, 5.0, 7.0, 9.0, 11.0, 11.0, 10.0, 13.0, 11.0, 18.0, 16.0, 24.0, 28.0, 31.0, 19.0, 34.0, 38.0, 28.0, 36.0, 29.0, 36.0, 48.0, 1065.0, 34.0, 49.0, 40.0, 39.0, 29.0, 38.0, 28.0, 27.0, 31.0, 32.0, 12.0, 21.0, 18.0, 12.0, 16.0, 19.0, 10.0, 12.0, 5.0, 11.0, 10.0, 2.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-5.0546875, -4.9056396484375, -4.756591796875, -4.6075439453125, -4.45849609375, -4.3094482421875, -4.160400390625, -4.0113525390625, -3.8623046875, -3.7132568359375, -3.564208984375, -3.4151611328125, -3.26611328125, -3.1170654296875, -2.968017578125, -2.8189697265625, -2.669921875, -2.5208740234375, -2.371826171875, -2.2227783203125, -2.07373046875, -1.9246826171875, -1.775634765625, -1.6265869140625, -1.4775390625, -1.3284912109375, -1.179443359375, -1.0303955078125, -0.88134765625, -0.7322998046875, -0.583251953125, -0.4342041015625, -0.28515625, -0.1361083984375, 0.012939453125, 0.1619873046875, 0.31103515625, 0.4600830078125, 0.609130859375, 0.7581787109375, 0.9072265625, 1.0562744140625, 1.205322265625, 1.3543701171875, 1.50341796875, 1.6524658203125, 1.801513671875, 1.9505615234375, 2.099609375, 2.2486572265625, 2.397705078125, 2.5467529296875, 2.69580078125, 2.8448486328125, 2.993896484375, 3.1429443359375, 3.2919921875, 3.4410400390625, 3.590087890625, 3.7391357421875, 3.88818359375, 4.0372314453125, 4.186279296875, 4.3353271484375, 4.484375]}, "gradients/decoder.transformer.h.4.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 2.0, 3.0, 6.0, 7.0, 14.0, 13.0, 29.0, 25.0, 62.0, 100.0, 134.0, 206.0, 390.0, 593.0, 909.0, 1470.0, 2217.0, 3710.0, 6072.0, 9382.0, 15040.0, 23703.0, 36821.0, 56293.0, 83106.0, 113300.0, 146818.0, 1186704.0, 125551.0, 95243.0, 66662.0, 44779.0, 28759.0, 18280.0, 11381.0, 7239.0, 4466.0, 2836.0, 1762.0, 1137.0, 678.0, 413.0, 309.0, 164.0, 135.0, 82.0, 37.0, 37.0, 23.0, 17.0, 8.0, 7.0, 4.0, 1.0, 4.0, 3.0, 1.0], "bins": [-0.63330078125, -0.6147918701171875, -0.596282958984375, -0.5777740478515625, -0.55926513671875, -0.5407562255859375, -0.522247314453125, -0.5037384033203125, -0.4852294921875, -0.4667205810546875, -0.448211669921875, -0.4297027587890625, -0.41119384765625, -0.3926849365234375, -0.374176025390625, -0.3556671142578125, -0.337158203125, -0.3186492919921875, -0.300140380859375, -0.2816314697265625, -0.26312255859375, -0.2446136474609375, -0.226104736328125, -0.2075958251953125, -0.1890869140625, -0.1705780029296875, -0.152069091796875, -0.1335601806640625, -0.11505126953125, -0.0965423583984375, -0.078033447265625, -0.0595245361328125, -0.041015625, -0.0225067138671875, -0.003997802734375, 0.0145111083984375, 0.03302001953125, 0.0515289306640625, 0.070037841796875, 0.0885467529296875, 0.1070556640625, 0.1255645751953125, 0.144073486328125, 0.1625823974609375, 0.18109130859375, 0.1996002197265625, 0.218109130859375, 0.2366180419921875, 0.255126953125, 0.2736358642578125, 0.292144775390625, 0.3106536865234375, 0.32916259765625, 0.3476715087890625, 0.366180419921875, 0.3846893310546875, 0.4031982421875, 0.4217071533203125, 0.440216064453125, 0.4587249755859375, 0.47723388671875, 0.4957427978515625, 0.514251708984375, 0.5327606201171875, 0.55126953125]}, "gradients/decoder.transformer.h.4.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 3.0, 0.0, 2.0, 2.0, 1.0, 7.0, 3.0, 4.0, 5.0, 9.0, 12.0, 12.0, 17.0, 32.0, 18.0, 22.0, 34.0, 52.0, 65.0, 122.0, 132.0, 119.0, 79.0, 52.0, 39.0, 30.0, 25.0, 35.0, 18.0, 10.0, 7.0, 11.0, 6.0, 6.0, 2.0, 0.0, 6.0, 1.0, 2.0, 5.0, 3.0, 3.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.015716552734375, -0.015249848365783691, -0.014783143997192383, -0.014316439628601074, -0.013849735260009766, -0.013383030891418457, -0.012916326522827148, -0.01244962215423584, -0.011982917785644531, -0.011516213417053223, -0.011049509048461914, -0.010582804679870605, -0.010116100311279297, -0.009649395942687988, -0.00918269157409668, -0.008715987205505371, -0.008249282836914062, -0.007782578468322754, -0.007315874099731445, -0.006849169731140137, -0.006382465362548828, -0.0059157609939575195, -0.005449056625366211, -0.004982352256774902, -0.004515647888183594, -0.004048943519592285, -0.0035822391510009766, -0.003115534782409668, -0.0026488304138183594, -0.0021821260452270508, -0.0017154216766357422, -0.0012487173080444336, -0.000782012939453125, -0.0003153085708618164, 0.0001513957977294922, 0.0006181001663208008, 0.0010848045349121094, 0.001551508903503418, 0.0020182132720947266, 0.002484917640686035, 0.0029516220092773438, 0.0034183263778686523, 0.003885030746459961, 0.0043517351150512695, 0.004818439483642578, 0.005285143852233887, 0.005751848220825195, 0.006218552589416504, 0.0066852569580078125, 0.007151961326599121, 0.00761866569519043, 0.008085370063781738, 0.008552074432373047, 0.009018778800964355, 0.009485483169555664, 0.009952187538146973, 0.010418891906738281, 0.01088559627532959, 0.011352300643920898, 0.011819005012512207, 0.012285709381103516, 0.012752413749694824, 0.013219118118286133, 0.013685822486877441, 0.01415252685546875]}, "gradients/decoder.transformer.h.4.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 2.0, 1.0, 5.0, 1.0, 7.0, 2.0, 2.0, 6.0, 4.0, 5.0, 6.0, 16.0, 12.0, 16.0, 17.0, 37.0, 52.0, 63.0, 100.0, 156.0, 271.0, 699.0, 15788.0, 1023506.0, 6503.0, 583.0, 239.0, 129.0, 92.0, 53.0, 47.0, 38.0, 27.0, 21.0, 16.0, 5.0, 6.0, 5.0, 5.0, 4.0, 6.0, 1.0, 4.0, 3.0, 2.0, 1.0, 1.0, 1.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.27685546875, -0.26775360107421875, -0.2586517333984375, -0.24954986572265625, -0.240447998046875, -0.23134613037109375, -0.2222442626953125, -0.21314239501953125, -0.20404052734375, -0.19493865966796875, -0.1858367919921875, -0.17673492431640625, -0.167633056640625, -0.15853118896484375, -0.1494293212890625, -0.14032745361328125, -0.1312255859375, -0.12212371826171875, -0.1130218505859375, -0.10391998291015625, -0.094818115234375, -0.08571624755859375, -0.0766143798828125, -0.06751251220703125, -0.05841064453125, -0.04930877685546875, -0.0402069091796875, -0.03110504150390625, -0.022003173828125, -0.01290130615234375, -0.0037994384765625, 0.00530242919921875, 0.014404296875, 0.02350616455078125, 0.0326080322265625, 0.04170989990234375, 0.050811767578125, 0.05991363525390625, 0.0690155029296875, 0.07811737060546875, 0.08721923828125, 0.09632110595703125, 0.1054229736328125, 0.11452484130859375, 0.123626708984375, 0.13272857666015625, 0.1418304443359375, 0.15093231201171875, 0.1600341796875, 0.16913604736328125, 0.1782379150390625, 0.18733978271484375, 0.196441650390625, 0.20554351806640625, 0.2146453857421875, 0.22374725341796875, 0.23284912109375, 0.24195098876953125, 0.2510528564453125, 0.26015472412109375, 0.269256591796875, 0.27835845947265625, 0.2874603271484375, 0.29656219482421875, 0.3056640625]}, "gradients/decoder.transformer.h.4.ln_cross_attn.weight": {"_type": "histogram", "values": [4.0, 0.0, 0.0, 0.0, 2.0, 42.0, 971.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.05599676072597504, -0.04718736559152603, -0.038377970457077026, -0.029568571597337723, -0.020759176462888718, -0.011949781328439713, -0.003140382468700409, 0.005669012665748596, 0.014478407800197601, 0.023287802934646606, 0.03209719806909561, 0.040906596928834915, 0.04971599206328392, 0.058525387197732925, 0.06733478605747223, 0.07614418119192123, 0.08495357632637024, 0.09376297146081924, 0.10257236659526825, 0.11138176918029785, 0.12019115686416626, 0.12900055944919586, 0.13780996203422546, 0.14661934971809387, 0.15542873740196228, 0.16423813998699188, 0.1730475276708603, 0.1818569302558899, 0.1906663179397583, 0.1994757205247879, 0.2082851231098175, 0.2170945107936859, 0.22590389847755432, 0.23471330106258392, 0.24352268874645233, 0.25233209133148193, 0.26114147901535034, 0.26995086669921875, 0.27876028418540955, 0.28756967186927795, 0.29637905955314636, 0.30518844723701477, 0.31399786472320557, 0.322807252407074, 0.3316166400909424, 0.3404260277748108, 0.3492354452610016, 0.35804483294487, 0.3668542504310608, 0.3756636381149292, 0.38447305560112, 0.3932824432849884, 0.4020918309688568, 0.4109012186527252, 0.419710636138916, 0.4285200238227844, 0.43732941150665283, 0.44613879919052124, 0.45494821667671204, 0.46375760436058044, 0.47256699204444885, 0.48137637972831726, 0.49018579721450806, 0.49899518489837646, 0.5078045725822449]}, "gradients/decoder.transformer.h.4.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 3.0, 7.0, 7.0, 6.0, 22.0, 13.0, 30.0, 27.0, 30.0, 48.0, 49.0, 55.0, 59.0, 75.0, 66.0, 81.0, 56.0, 60.0, 41.0, 42.0, 46.0, 33.0, 40.0, 32.0, 18.0, 18.0, 15.0, 12.0, 6.0, 9.0, 2.0, 4.0, 0.0, 3.0, 0.0, 1.0, 2.0], "bins": [-0.024235308170318604, -0.023661397397518158, -0.023087484762072563, -0.02251357212662697, -0.021939661353826523, -0.021365750581026077, -0.020791837945580482, -0.020217925310134888, -0.019644014537334442, -0.019070103764533997, -0.018496191129088402, -0.017922278493642807, -0.01734836772084236, -0.016774456948041916, -0.01620054431259632, -0.015626631677150726, -0.01505272090435028, -0.01447880920022726, -0.01390489749610424, -0.01333098579198122, -0.0127570740878582, -0.01218316238373518, -0.01160925067961216, -0.01103533897548914, -0.01046142727136612, -0.0098875155672431, -0.009313603863120079, -0.008739692158997059, -0.008165780454874039, -0.0075918687507510185, -0.007017957046627998, -0.006444045342504978, -0.005870133638381958, -0.005296221934258938, -0.004722310230135918, -0.0041483985260128975, -0.0035744868218898773, -0.003000575117766857, -0.002426663413643837, -0.0018527517095208168, -0.0012788400053977966, -0.0007049283012747765, -0.0001310165971517563, 0.0004428951069712639, 0.001016806811094284, 0.0015907185152173042, 0.0021646302193403244, 0.0027385419234633446, 0.0033124536275863647, 0.003886365331709385, 0.004460277035832405, 0.005034188739955425, 0.0056081004440784454, 0.006182012148201466, 0.006755923852324486, 0.007329835556447506, 0.007903747260570526, 0.008477658964693546, 0.009051570668816566, 0.009625482372939587, 0.010199394077062607, 0.010773305781185627, 0.011347217485308647, 0.011921129189431667, 0.012495040893554688]}, "gradients/decoder.transformer.h.4.attn.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 3.0, 2.0, 4.0, 1.0, 7.0, 4.0, 7.0, 8.0, 9.0, 12.0, 7.0, 14.0, 19.0, 21.0, 16.0, 11.0, 20.0, 32.0, 17.0, 46.0, 29.0, 38.0, 20.0, 32.0, 37.0, 47.0, 35.0, 51.0, 43.0, 39.0, 42.0, 33.0, 35.0, 42.0, 23.0, 21.0, 26.0, 25.0, 21.0, 16.0, 16.0, 13.0, 8.0, 13.0, 8.0, 11.0, 2.0, 3.0, 5.0, 4.0, 4.0, 2.0, 7.0, 1.0, 3.0, 0.0, 0.0, 0.0, 2.0, 2.0, 2.0], "bins": [-6.58984375, -6.37017822265625, -6.1505126953125, -5.93084716796875, -5.711181640625, -5.49151611328125, -5.2718505859375, -5.05218505859375, -4.83251953125, -4.61285400390625, -4.3931884765625, -4.17352294921875, -3.953857421875, -3.73419189453125, -3.5145263671875, -3.29486083984375, -3.0751953125, -2.85552978515625, -2.6358642578125, -2.41619873046875, -2.196533203125, -1.97686767578125, -1.7572021484375, -1.53753662109375, -1.31787109375, -1.09820556640625, -0.8785400390625, -0.65887451171875, -0.439208984375, -0.21954345703125, 0.0001220703125, 0.21978759765625, 0.439453125, 0.65911865234375, 0.8787841796875, 1.09844970703125, 1.318115234375, 1.53778076171875, 1.7574462890625, 1.97711181640625, 2.19677734375, 2.41644287109375, 2.6361083984375, 2.85577392578125, 3.075439453125, 3.29510498046875, 3.5147705078125, 3.73443603515625, 3.9541015625, 4.17376708984375, 4.3934326171875, 4.61309814453125, 4.832763671875, 5.05242919921875, 5.2720947265625, 5.49176025390625, 5.71142578125, 5.93109130859375, 6.1507568359375, 6.37042236328125, 6.590087890625, 6.80975341796875, 7.0294189453125, 7.24908447265625, 7.46875]}, "gradients/decoder.transformer.h.4.attn.c_proj.weight": {"_type": "histogram", "values": [3.0, 0.0, 0.0, 1.0, 2.0, 3.0, 3.0, 6.0, 5.0, 9.0, 8.0, 18.0, 22.0, 17.0, 28.0, 34.0, 48.0, 51.0, 79.0, 90.0, 125.0, 191.0, 246.0, 409.0, 627.0, 1041.0, 2108.0, 4154.0, 9013.0, 19941.0, 46068.0, 111410.0, 289978.0, 331480.0, 132555.0, 54260.0, 23392.0, 10522.0, 4871.0, 2368.0, 1239.0, 699.0, 435.0, 261.0, 184.0, 127.0, 106.0, 74.0, 63.0, 34.0, 38.0, 27.0, 23.0, 19.0, 11.0, 15.0, 9.0, 8.0, 5.0, 7.0, 0.0, 3.0, 2.0, 1.0], "bins": [-10.5078125, -10.1903076171875, -9.872802734375, -9.5552978515625, -9.23779296875, -8.9202880859375, -8.602783203125, -8.2852783203125, -7.9677734375, -7.6502685546875, -7.332763671875, -7.0152587890625, -6.69775390625, -6.3802490234375, -6.062744140625, -5.7452392578125, -5.427734375, -5.1102294921875, -4.792724609375, -4.4752197265625, -4.15771484375, -3.8402099609375, -3.522705078125, -3.2052001953125, -2.8876953125, -2.5701904296875, -2.252685546875, -1.9351806640625, -1.61767578125, -1.3001708984375, -0.982666015625, -0.6651611328125, -0.34765625, -0.0301513671875, 0.287353515625, 0.6048583984375, 0.92236328125, 1.2398681640625, 1.557373046875, 1.8748779296875, 2.1923828125, 2.5098876953125, 2.827392578125, 3.1448974609375, 3.46240234375, 3.7799072265625, 4.097412109375, 4.4149169921875, 4.732421875, 5.0499267578125, 5.367431640625, 5.6849365234375, 6.00244140625, 6.3199462890625, 6.637451171875, 6.9549560546875, 7.2724609375, 7.5899658203125, 7.907470703125, 8.2249755859375, 8.54248046875, 8.8599853515625, 9.177490234375, 9.4949951171875, 9.8125]}, "gradients/decoder.transformer.h.4.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 1.0, 1.0, 8.0, 6.0, 0.0, 3.0, 2.0, 11.0, 14.0, 7.0, 11.0, 17.0, 13.0, 16.0, 27.0, 26.0, 20.0, 31.0, 27.0, 39.0, 29.0, 51.0, 58.0, 68.0, 114.0, 201.0, 1279.0, 220.0, 146.0, 117.0, 81.0, 59.0, 51.0, 46.0, 32.0, 29.0, 25.0, 21.0, 25.0, 18.0, 15.0, 14.0, 9.0, 14.0, 17.0, 8.0, 11.0, 5.0, 3.0, 6.0, 3.0, 1.0, 4.0, 3.0, 0.0, 2.0, 2.0, 0.0, 0.0, 0.0, 2.0], "bins": [-15.546875, -15.02734375, -14.5078125, -13.98828125, -13.46875, -12.94921875, -12.4296875, -11.91015625, -11.390625, -10.87109375, -10.3515625, -9.83203125, -9.3125, -8.79296875, -8.2734375, -7.75390625, -7.234375, -6.71484375, -6.1953125, -5.67578125, -5.15625, -4.63671875, -4.1171875, -3.59765625, -3.078125, -2.55859375, -2.0390625, -1.51953125, -1.0, -0.48046875, 0.0390625, 0.55859375, 1.078125, 1.59765625, 2.1171875, 2.63671875, 3.15625, 3.67578125, 4.1953125, 4.71484375, 5.234375, 5.75390625, 6.2734375, 6.79296875, 7.3125, 7.83203125, 8.3515625, 8.87109375, 9.390625, 9.91015625, 10.4296875, 10.94921875, 11.46875, 11.98828125, 12.5078125, 13.02734375, 13.546875, 14.06640625, 14.5859375, 15.10546875, 15.625, 16.14453125, 16.6640625, 17.18359375, 17.703125]}, "gradients/decoder.transformer.h.4.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 2.0, 1.0, 4.0, 5.0, 4.0, 7.0, 8.0, 7.0, 15.0, 18.0, 26.0, 34.0, 63.0, 57.0, 96.0, 143.0, 210.0, 363.0, 649.0, 2226.0, 24402.0, 2365728.0, 736158.0, 12333.0, 1595.0, 595.0, 336.0, 217.0, 119.0, 111.0, 47.0, 40.0, 25.0, 19.0, 13.0, 10.0, 5.0, 5.0, 6.0, 8.0, 4.0, 1.0, 0.0, 1.0, 2.0, 1.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-40.09375, -38.65625, -37.21875, -35.78125, -34.34375, -32.90625, -31.46875, -30.03125, -28.59375, -27.15625, -25.71875, -24.28125, -22.84375, -21.40625, -19.96875, -18.53125, -17.09375, -15.65625, -14.21875, -12.78125, -11.34375, -9.90625, -8.46875, -7.03125, -5.59375, -4.15625, -2.71875, -1.28125, 0.15625, 1.59375, 3.03125, 4.46875, 5.90625, 7.34375, 8.78125, 10.21875, 11.65625, 13.09375, 14.53125, 15.96875, 17.40625, 18.84375, 20.28125, 21.71875, 23.15625, 24.59375, 26.03125, 27.46875, 28.90625, 30.34375, 31.78125, 33.21875, 34.65625, 36.09375, 37.53125, 38.96875, 40.40625, 41.84375, 43.28125, 44.71875, 46.15625, 47.59375, 49.03125, 50.46875, 51.90625]}, "gradients/decoder.transformer.h.4.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 549.0, 459.0, 10.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-189.22972106933594, -176.56564331054688, -163.90158081054688, -151.2375030517578, -138.57342529296875, -125.90934753417969, -113.24527740478516, -100.58120727539062, -87.91712951660156, -75.2530517578125, -62.58898162841797, -49.92490768432617, -37.260833740234375, -24.596759796142578, -11.932685852050781, 0.73138427734375, 13.395462036132812, 26.05953598022461, 38.723609924316406, 51.3876838684082, 64.0517578125, 76.71583557128906, 89.3799057006836, 102.04397583007812, 114.70805358886719, 127.37213134765625, 140.03619384765625, 152.7002716064453, 165.36434936523438, 178.02842712402344, 190.6925048828125, 203.3565673828125, 216.0206298828125, 228.68470764160156, 241.34878540039062, 254.01284790039062, 266.67694091796875, 279.34100341796875, 292.00506591796875, 304.66912841796875, 317.3332214355469, 329.9972839355469, 342.661376953125, 355.325439453125, 367.989501953125, 380.6535949707031, 393.3176574707031, 405.98175048828125, 418.64581298828125, 431.30987548828125, 443.9739685058594, 456.6380310058594, 469.3021240234375, 481.9661865234375, 494.6302490234375, 507.2943115234375, 519.9583740234375, 532.6224365234375, 545.2864990234375, 557.9506225585938, 570.6146850585938, 583.2787475585938, 595.9428100585938, 608.6068725585938, 621.27099609375]}, "gradients/decoder.transformer.h.4.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 1.0, 0.0, 4.0, 4.0, 1.0, 8.0, 9.0, 7.0, 7.0, 7.0, 14.0, 19.0, 21.0, 38.0, 24.0, 33.0, 34.0, 39.0, 25.0, 41.0, 34.0, 42.0, 39.0, 40.0, 41.0, 59.0, 54.0, 43.0, 37.0, 38.0, 38.0, 26.0, 18.0, 17.0, 17.0, 34.0, 22.0, 19.0, 6.0, 11.0, 9.0, 9.0, 7.0, 10.0, 5.0, 2.0, 2.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0], "bins": [-51.2830924987793, -49.833824157714844, -48.38455581665039, -46.93528366088867, -45.48601531982422, -44.036746978759766, -42.58747863769531, -41.138206481933594, -39.68893814086914, -38.23966979980469, -36.790401458740234, -35.341129302978516, -33.89186096191406, -32.44259262084961, -30.993324279785156, -29.54405403137207, -28.094785690307617, -26.645517349243164, -25.196247100830078, -23.746978759765625, -22.29770851135254, -20.848440170288086, -19.399169921875, -17.949901580810547, -16.500633239746094, -15.051363945007324, -13.602094650268555, -12.152826309204102, -10.703556060791016, -9.254287719726562, -7.805018424987793, -6.355749130249023, -4.9064788818359375, -3.457209587097168, -2.0079405307769775, -0.5586714744567871, 0.8905978202819824, 2.339867115020752, 3.7891359329223633, 5.238405227661133, 6.687674522399902, 8.136943817138672, 9.586213111877441, 11.035482406616211, 12.484750747680664, 13.93402099609375, 15.383289337158203, 16.832557678222656, 18.281827926635742, 19.731096267700195, 21.18036651611328, 22.629634857177734, 24.07890510559082, 25.528173446655273, 26.97744369506836, 28.426712036132812, 29.875980377197266, 31.32524871826172, 32.77451705932617, 34.22378921508789, 35.673057556152344, 37.1223258972168, 38.57159423828125, 40.02086639404297, 41.47013473510742]}, "gradients/decoder.transformer.h.3.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 2.0, 3.0, 2.0, 3.0, 7.0, 7.0, 10.0, 13.0, 3.0, 13.0, 18.0, 16.0, 15.0, 29.0, 30.0, 34.0, 28.0, 28.0, 27.0, 31.0, 44.0, 35.0, 26.0, 48.0, 46.0, 43.0, 48.0, 30.0, 45.0, 32.0, 40.0, 30.0, 31.0, 17.0, 24.0, 19.0, 18.0, 22.0, 11.0, 12.0, 14.0, 10.0, 12.0, 4.0, 8.0, 4.0, 5.0, 4.0, 5.0, 4.0, 3.0, 2.0, 0.0, 1.0, 1.0, 1.0], "bins": [-7.65625, -7.42279052734375, -7.1893310546875, -6.95587158203125, -6.722412109375, -6.48895263671875, -6.2554931640625, -6.02203369140625, -5.78857421875, -5.55511474609375, -5.3216552734375, -5.08819580078125, -4.854736328125, -4.62127685546875, -4.3878173828125, -4.15435791015625, -3.9208984375, -3.68743896484375, -3.4539794921875, -3.22052001953125, -2.987060546875, -2.75360107421875, -2.5201416015625, -2.28668212890625, -2.05322265625, -1.81976318359375, -1.5863037109375, -1.35284423828125, -1.119384765625, -0.88592529296875, -0.6524658203125, -0.41900634765625, -0.185546875, 0.04791259765625, 0.2813720703125, 0.51483154296875, 0.748291015625, 0.98175048828125, 1.2152099609375, 1.44866943359375, 1.68212890625, 1.91558837890625, 2.1490478515625, 2.38250732421875, 2.615966796875, 2.84942626953125, 3.0828857421875, 3.31634521484375, 3.5498046875, 3.78326416015625, 4.0167236328125, 4.25018310546875, 4.483642578125, 4.71710205078125, 4.9505615234375, 5.18402099609375, 5.41748046875, 5.65093994140625, 5.8843994140625, 6.11785888671875, 6.351318359375, 6.58477783203125, 6.8182373046875, 7.05169677734375, 7.28515625]}, "gradients/decoder.transformer.h.3.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 3.0, 3.0, 4.0, 8.0, 5.0, 11.0, 7.0, 15.0, 16.0, 19.0, 18.0, 28.0, 29.0, 43.0, 57.0, 77.0, 97.0, 142.0, 200.0, 227.0, 372.0, 887.0, 40020.0, 4097938.0, 51608.0, 1021.0, 400.0, 260.0, 206.0, 142.0, 98.0, 76.0, 53.0, 43.0, 37.0, 20.0, 20.0, 22.0, 12.0, 11.0, 11.0, 5.0, 8.0, 5.0, 3.0, 2.0, 3.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0], "bins": [-67.625, -65.4794921875, -63.333984375, -61.1884765625, -59.04296875, -56.8974609375, -54.751953125, -52.6064453125, -50.4609375, -48.3154296875, -46.169921875, -44.0244140625, -41.87890625, -39.7333984375, -37.587890625, -35.4423828125, -33.296875, -31.1513671875, -29.005859375, -26.8603515625, -24.71484375, -22.5693359375, -20.423828125, -18.2783203125, -16.1328125, -13.9873046875, -11.841796875, -9.6962890625, -7.55078125, -5.4052734375, -3.259765625, -1.1142578125, 1.03125, 3.1767578125, 5.322265625, 7.4677734375, 9.61328125, 11.7587890625, 13.904296875, 16.0498046875, 18.1953125, 20.3408203125, 22.486328125, 24.6318359375, 26.77734375, 28.9228515625, 31.068359375, 33.2138671875, 35.359375, 37.5048828125, 39.650390625, 41.7958984375, 43.94140625, 46.0869140625, 48.232421875, 50.3779296875, 52.5234375, 54.6689453125, 56.814453125, 58.9599609375, 61.10546875, 63.2509765625, 65.396484375, 67.5419921875, 69.6875]}, "gradients/decoder.transformer.h.3.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 0.0, 1.0, 2.0, 2.0, 5.0, 3.0, 2.0, 2.0, 8.0, 8.0, 5.0, 22.0, 20.0, 40.0, 38.0, 60.0, 100.0, 134.0, 167.0, 245.0, 379.0, 525.0, 627.0, 530.0, 356.0, 275.0, 157.0, 107.0, 62.0, 62.0, 32.0, 29.0, 23.0, 19.0, 12.0, 5.0, 6.0, 3.0, 6.0, 4.0, 2.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-16.75, -16.3031005859375, -15.856201171875, -15.4093017578125, -14.96240234375, -14.5155029296875, -14.068603515625, -13.6217041015625, -13.1748046875, -12.7279052734375, -12.281005859375, -11.8341064453125, -11.38720703125, -10.9403076171875, -10.493408203125, -10.0465087890625, -9.599609375, -9.1527099609375, -8.705810546875, -8.2589111328125, -7.81201171875, -7.3651123046875, -6.918212890625, -6.4713134765625, -6.0244140625, -5.5775146484375, -5.130615234375, -4.6837158203125, -4.23681640625, -3.7899169921875, -3.343017578125, -2.8961181640625, -2.44921875, -2.0023193359375, -1.555419921875, -1.1085205078125, -0.66162109375, -0.2147216796875, 0.232177734375, 0.6790771484375, 1.1259765625, 1.5728759765625, 2.019775390625, 2.4666748046875, 2.91357421875, 3.3604736328125, 3.807373046875, 4.2542724609375, 4.701171875, 5.1480712890625, 5.594970703125, 6.0418701171875, 6.48876953125, 6.9356689453125, 7.382568359375, 7.8294677734375, 8.2763671875, 8.7232666015625, 9.170166015625, 9.6170654296875, 10.06396484375, 10.5108642578125, 10.957763671875, 11.4046630859375, 11.8515625]}, "gradients/decoder.transformer.h.3.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 3.0, 0.0, 5.0, 7.0, 3.0, 5.0, 9.0, 6.0, 8.0, 14.0, 17.0, 17.0, 34.0, 37.0, 57.0, 86.0, 130.0, 251.0, 656.0, 2465.0, 17050.0, 259236.0, 3572259.0, 317994.0, 19821.0, 2651.0, 720.0, 252.0, 143.0, 101.0, 52.0, 40.0, 40.0, 30.0, 21.0, 15.0, 13.0, 12.0, 6.0, 6.0, 5.0, 8.0, 4.0, 0.0, 2.0, 2.0, 2.0, 3.0, 1.0], "bins": [-40.8125, -39.72509765625, -38.6376953125, -37.55029296875, -36.462890625, -35.37548828125, -34.2880859375, -33.20068359375, -32.11328125, -31.02587890625, -29.9384765625, -28.85107421875, -27.763671875, -26.67626953125, -25.5888671875, -24.50146484375, -23.4140625, -22.32666015625, -21.2392578125, -20.15185546875, -19.064453125, -17.97705078125, -16.8896484375, -15.80224609375, -14.71484375, -13.62744140625, -12.5400390625, -11.45263671875, -10.365234375, -9.27783203125, -8.1904296875, -7.10302734375, -6.015625, -4.92822265625, -3.8408203125, -2.75341796875, -1.666015625, -0.57861328125, 0.5087890625, 1.59619140625, 2.68359375, 3.77099609375, 4.8583984375, 5.94580078125, 7.033203125, 8.12060546875, 9.2080078125, 10.29541015625, 11.3828125, 12.47021484375, 13.5576171875, 14.64501953125, 15.732421875, 16.81982421875, 17.9072265625, 18.99462890625, 20.08203125, 21.16943359375, 22.2568359375, 23.34423828125, 24.431640625, 25.51904296875, 26.6064453125, 27.69384765625, 28.78125]}, "gradients/decoder.transformer.h.3.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 17.0, 246.0, 630.0, 120.0, 6.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-153.51507568359375, -143.5267333984375, -133.53839111328125, -123.55004119873047, -113.56169891357422, -103.57335662841797, -93.58500671386719, -83.59666442871094, -73.60832214355469, -63.61997985839844, -53.63163375854492, -43.643287658691406, -33.654945373535156, -23.666603088378906, -13.67825698852539, -3.689910888671875, 6.298431396484375, 16.286775588989258, 26.27511978149414, 36.263465881347656, 46.251808166503906, 56.240150451660156, 66.22850036621094, 76.21684265136719, 86.20518493652344, 96.19352722167969, 106.18186950683594, 116.17021942138672, 126.15856170654297, 136.14691162109375, 146.13525390625, 156.12359619140625, 166.11190795898438, 176.10025024414062, 186.08859252929688, 196.07693481445312, 206.06527709960938, 216.05361938476562, 226.04197692871094, 236.0303192138672, 246.01866149902344, 256.00701904296875, 265.995361328125, 275.98370361328125, 285.9720458984375, 295.96038818359375, 305.94873046875, 315.93707275390625, 325.9254150390625, 335.91375732421875, 345.902099609375, 355.89044189453125, 365.8787841796875, 375.86712646484375, 385.85546875, 395.84381103515625, 405.8321533203125, 415.82049560546875, 425.808837890625, 435.79718017578125, 445.7855224609375, 455.77386474609375, 465.76220703125, 475.75054931640625, 485.7389221191406]}, "gradients/decoder.transformer.h.3.ln_2.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 5.0, 2.0, 3.0, 2.0, 4.0, 4.0, 5.0, 6.0, 9.0, 5.0, 15.0, 11.0, 19.0, 14.0, 23.0, 31.0, 25.0, 37.0, 33.0, 26.0, 33.0, 22.0, 43.0, 32.0, 34.0, 56.0, 48.0, 35.0, 34.0, 43.0, 32.0, 33.0, 40.0, 34.0, 30.0, 27.0, 24.0, 18.0, 22.0, 19.0, 13.0, 11.0, 8.0, 7.0, 9.0, 4.0, 12.0, 7.0, 1.0, 2.0, 3.0, 0.0, 3.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-36.44572067260742, -35.20658493041992, -33.967445373535156, -32.728309631347656, -31.48917007446289, -30.25003433227539, -29.010896682739258, -27.771759033203125, -26.532621383666992, -25.29348373413086, -24.054346084594727, -22.815208435058594, -21.576072692871094, -20.336933135986328, -19.097797393798828, -17.858659744262695, -16.619522094726562, -15.38038444519043, -14.141246795654297, -12.90211009979248, -11.662972450256348, -10.423834800720215, -9.184698104858398, -7.945560455322266, -6.706422805786133, -5.46728515625, -4.228147983551025, -2.9890105724334717, -1.749873161315918, -0.5107355117797852, 0.7284016609191895, 1.967538833618164, 3.2066802978515625, 4.445817947387695, 5.68495512008667, 6.9240922927856445, 8.163229942321777, 9.40236759185791, 10.641504287719727, 11.88064193725586, 13.119779586791992, 14.358917236328125, 15.598054885864258, 16.83719253540039, 18.07632827758789, 19.315467834472656, 20.554603576660156, 21.79374122619629, 23.032878875732422, 24.272016525268555, 25.511154174804688, 26.75029182434082, 27.989429473876953, 29.228565216064453, 30.467702865600586, 31.70684051513672, 32.94597625732422, 34.18511199951172, 35.424251556396484, 36.663387298583984, 37.90252685546875, 39.14166259765625, 40.380802154541016, 41.619937896728516, 42.85907745361328]}, "gradients/decoder.transformer.h.3.crossattention.c_proj.bias": {"_type": "histogram", "values": [5.0, 2.0, 1.0, 2.0, 1.0, 0.0, 2.0, 4.0, 4.0, 4.0, 11.0, 8.0, 10.0, 10.0, 16.0, 11.0, 11.0, 22.0, 20.0, 17.0, 15.0, 16.0, 24.0, 30.0, 29.0, 33.0, 31.0, 37.0, 40.0, 38.0, 39.0, 28.0, 46.0, 46.0, 30.0, 35.0, 34.0, 31.0, 25.0, 34.0, 11.0, 22.0, 25.0, 19.0, 17.0, 15.0, 21.0, 10.0, 6.0, 14.0, 8.0, 10.0, 7.0, 6.0, 10.0, 4.0, 3.0, 5.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0], "bins": [-6.6484375, -6.4384765625, -6.228515625, -6.0185546875, -5.80859375, -5.5986328125, -5.388671875, -5.1787109375, -4.96875, -4.7587890625, -4.548828125, -4.3388671875, -4.12890625, -3.9189453125, -3.708984375, -3.4990234375, -3.2890625, -3.0791015625, -2.869140625, -2.6591796875, -2.44921875, -2.2392578125, -2.029296875, -1.8193359375, -1.609375, -1.3994140625, -1.189453125, -0.9794921875, -0.76953125, -0.5595703125, -0.349609375, -0.1396484375, 0.0703125, 0.2802734375, 0.490234375, 0.7001953125, 0.91015625, 1.1201171875, 1.330078125, 1.5400390625, 1.75, 1.9599609375, 2.169921875, 2.3798828125, 2.58984375, 2.7998046875, 3.009765625, 3.2197265625, 3.4296875, 3.6396484375, 3.849609375, 4.0595703125, 4.26953125, 4.4794921875, 4.689453125, 4.8994140625, 5.109375, 5.3193359375, 5.529296875, 5.7392578125, 5.94921875, 6.1591796875, 6.369140625, 6.5791015625, 6.7890625]}, "gradients/decoder.transformer.h.3.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 1.0, 4.0, 3.0, 6.0, 5.0, 11.0, 21.0, 41.0, 51.0, 79.0, 109.0, 200.0, 303.0, 390.0, 612.0, 834.0, 1342.0, 1867.0, 2852.0, 4086.0, 6084.0, 9082.0, 13143.0, 19290.0, 28728.0, 41712.0, 59500.0, 81957.0, 107321.0, 127261.0, 130379.0, 113479.0, 88718.0, 64492.0, 45983.0, 31610.0, 21315.0, 14619.0, 9954.0, 6811.0, 4628.0, 3208.0, 2111.0, 1357.0, 983.0, 628.0, 473.0, 326.0, 199.0, 147.0, 89.0, 64.0, 39.0, 22.0, 11.0, 13.0, 6.0, 4.0, 3.0, 6.0, 1.0], "bins": [-0.830078125, -0.805023193359375, -0.77996826171875, -0.754913330078125, -0.7298583984375, -0.704803466796875, -0.67974853515625, -0.654693603515625, -0.629638671875, -0.604583740234375, -0.57952880859375, -0.554473876953125, -0.5294189453125, -0.504364013671875, -0.47930908203125, -0.454254150390625, -0.42919921875, -0.404144287109375, -0.37908935546875, -0.354034423828125, -0.3289794921875, -0.303924560546875, -0.27886962890625, -0.253814697265625, -0.228759765625, -0.203704833984375, -0.17864990234375, -0.153594970703125, -0.1285400390625, -0.103485107421875, -0.07843017578125, -0.053375244140625, -0.0283203125, -0.003265380859375, 0.02178955078125, 0.046844482421875, 0.0718994140625, 0.096954345703125, 0.12200927734375, 0.147064208984375, 0.172119140625, 0.197174072265625, 0.22222900390625, 0.247283935546875, 0.2723388671875, 0.297393798828125, 0.32244873046875, 0.347503662109375, 0.37255859375, 0.397613525390625, 0.42266845703125, 0.447723388671875, 0.4727783203125, 0.497833251953125, 0.52288818359375, 0.547943115234375, 0.572998046875, 0.598052978515625, 0.62310791015625, 0.648162841796875, 0.6732177734375, 0.698272705078125, 0.72332763671875, 0.748382568359375, 0.7734375]}, "gradients/decoder.transformer.h.3.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 5.0, 4.0, 7.0, 5.0, 6.0, 15.0, 16.0, 17.0, 33.0, 21.0, 19.0, 36.0, 34.0, 25.0, 29.0, 41.0, 33.0, 42.0, 40.0, 37.0, 33.0, 1055.0, 39.0, 42.0, 45.0, 48.0, 42.0, 37.0, 33.0, 23.0, 27.0, 20.0, 21.0, 17.0, 18.0, 11.0, 17.0, 8.0, 8.0, 6.0, 8.0, 3.0, 6.0, 1.0, 5.0, 3.0, 0.0, 0.0, 1.0, 1.0], "bins": [-5.953125, -5.78448486328125, -5.6158447265625, -5.44720458984375, -5.278564453125, -5.10992431640625, -4.9412841796875, -4.77264404296875, -4.60400390625, -4.43536376953125, -4.2667236328125, -4.09808349609375, -3.929443359375, -3.76080322265625, -3.5921630859375, -3.42352294921875, -3.2548828125, -3.08624267578125, -2.9176025390625, -2.74896240234375, -2.580322265625, -2.41168212890625, -2.2430419921875, -2.07440185546875, -1.90576171875, -1.73712158203125, -1.5684814453125, -1.39984130859375, -1.231201171875, -1.06256103515625, -0.8939208984375, -0.72528076171875, -0.556640625, -0.38800048828125, -0.2193603515625, -0.05072021484375, 0.117919921875, 0.28656005859375, 0.4552001953125, 0.62384033203125, 0.79248046875, 0.96112060546875, 1.1297607421875, 1.29840087890625, 1.467041015625, 1.63568115234375, 1.8043212890625, 1.97296142578125, 2.1416015625, 2.31024169921875, 2.4788818359375, 2.64752197265625, 2.816162109375, 2.98480224609375, 3.1534423828125, 3.32208251953125, 3.49072265625, 3.65936279296875, 3.8280029296875, 3.99664306640625, 4.165283203125, 4.33392333984375, 4.5025634765625, 4.67120361328125, 4.83984375]}, "gradients/decoder.transformer.h.3.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 1.0, 3.0, 16.0, 17.0, 15.0, 36.0, 39.0, 66.0, 84.0, 126.0, 207.0, 314.0, 527.0, 758.0, 1273.0, 2064.0, 3282.0, 5358.0, 8603.0, 13452.0, 21105.0, 32456.0, 48559.0, 71512.0, 99763.0, 127819.0, 1184187.0, 136895.0, 106249.0, 78649.0, 53664.0, 35781.0, 23394.0, 15084.0, 9636.0, 6097.0, 3841.0, 2393.0, 1460.0, 874.0, 525.0, 350.0, 231.0, 123.0, 104.0, 49.0, 25.0, 31.0, 16.0, 12.0, 7.0, 4.0, 4.0, 2.0, 1.0, 3.0], "bins": [-0.62841796875, -0.6102676391601562, -0.5921173095703125, -0.5739669799804688, -0.555816650390625, -0.5376663208007812, -0.5195159912109375, -0.5013656616210938, -0.48321533203125, -0.46506500244140625, -0.4469146728515625, -0.42876434326171875, -0.410614013671875, -0.39246368408203125, -0.3743133544921875, -0.35616302490234375, -0.3380126953125, -0.31986236572265625, -0.3017120361328125, -0.28356170654296875, -0.265411376953125, -0.24726104736328125, -0.2291107177734375, -0.21096038818359375, -0.19281005859375, -0.17465972900390625, -0.1565093994140625, -0.13835906982421875, -0.120208740234375, -0.10205841064453125, -0.0839080810546875, -0.06575775146484375, -0.047607421875, -0.02945709228515625, -0.0113067626953125, 0.00684356689453125, 0.024993896484375, 0.04314422607421875, 0.0612945556640625, 0.07944488525390625, 0.09759521484375, 0.11574554443359375, 0.1338958740234375, 0.15204620361328125, 0.170196533203125, 0.18834686279296875, 0.2064971923828125, 0.22464752197265625, 0.2427978515625, 0.26094818115234375, 0.2790985107421875, 0.29724884033203125, 0.315399169921875, 0.33354949951171875, 0.3516998291015625, 0.36985015869140625, 0.38800048828125, 0.40615081787109375, 0.4243011474609375, 0.44245147705078125, 0.460601806640625, 0.47875213623046875, 0.4969024658203125, 0.5150527954101562, 0.533203125]}, "gradients/decoder.transformer.h.3.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 2.0, 1.0, 2.0, 1.0, 5.0, 4.0, 2.0, 3.0, 4.0, 6.0, 5.0, 7.0, 11.0, 6.0, 12.0, 13.0, 26.0, 24.0, 27.0, 46.0, 106.0, 143.0, 145.0, 131.0, 72.0, 53.0, 28.0, 27.0, 29.0, 15.0, 13.0, 6.0, 6.0, 3.0, 2.0, 3.0, 5.0, 3.0, 2.0, 6.0, 1.0, 3.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 3.0], "bins": [-0.026031494140625, -0.025250673294067383, -0.024469852447509766, -0.02368903160095215, -0.02290821075439453, -0.022127389907836914, -0.021346569061279297, -0.02056574821472168, -0.019784927368164062, -0.019004106521606445, -0.018223285675048828, -0.01744246482849121, -0.016661643981933594, -0.015880823135375977, -0.01510000228881836, -0.014319181442260742, -0.013538360595703125, -0.012757539749145508, -0.01197671890258789, -0.011195898056030273, -0.010415077209472656, -0.009634256362915039, -0.008853435516357422, -0.008072614669799805, -0.0072917938232421875, -0.00651097297668457, -0.005730152130126953, -0.004949331283569336, -0.004168510437011719, -0.0033876895904541016, -0.0026068687438964844, -0.0018260478973388672, -0.00104522705078125, -0.0002644062042236328, 0.0005164146423339844, 0.0012972354888916016, 0.0020780563354492188, 0.002858877182006836, 0.003639698028564453, 0.00442051887512207, 0.0052013397216796875, 0.005982160568237305, 0.006762981414794922, 0.007543802261352539, 0.008324623107910156, 0.009105443954467773, 0.00988626480102539, 0.010667085647583008, 0.011447906494140625, 0.012228727340698242, 0.01300954818725586, 0.013790369033813477, 0.014571189880371094, 0.015352010726928711, 0.016132831573486328, 0.016913652420043945, 0.017694473266601562, 0.01847529411315918, 0.019256114959716797, 0.020036935806274414, 0.02081775665283203, 0.02159857749938965, 0.022379398345947266, 0.023160219192504883, 0.0239410400390625]}, "gradients/decoder.transformer.h.3.crossattention.q_attn.weight": {"_type": "histogram", "values": [3.0, 0.0, 0.0, 1.0, 1.0, 0.0, 3.0, 1.0, 2.0, 3.0, 2.0, 2.0, 2.0, 6.0, 8.0, 7.0, 8.0, 7.0, 10.0, 7.0, 17.0, 20.0, 25.0, 32.0, 83.0, 104.0, 116.0, 203.0, 459.0, 2844.0, 1032658.0, 10576.0, 625.0, 255.0, 132.0, 90.0, 57.0, 45.0, 25.0, 26.0, 24.0, 14.0, 11.0, 13.0, 10.0, 8.0, 3.0, 7.0, 6.0, 3.0, 2.0, 1.0, 2.0, 1.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.453125, -0.43833160400390625, -0.4235382080078125, -0.40874481201171875, -0.393951416015625, -0.37915802001953125, -0.3643646240234375, -0.34957122802734375, -0.33477783203125, -0.31998443603515625, -0.3051910400390625, -0.29039764404296875, -0.275604248046875, -0.26081085205078125, -0.2460174560546875, -0.23122406005859375, -0.2164306640625, -0.20163726806640625, -0.1868438720703125, -0.17205047607421875, -0.157257080078125, -0.14246368408203125, -0.1276702880859375, -0.11287689208984375, -0.09808349609375, -0.08329010009765625, -0.0684967041015625, -0.05370330810546875, -0.038909912109375, -0.02411651611328125, -0.0093231201171875, 0.00547027587890625, 0.020263671875, 0.03505706787109375, 0.0498504638671875, 0.06464385986328125, 0.079437255859375, 0.09423065185546875, 0.1090240478515625, 0.12381744384765625, 0.13861083984375, 0.15340423583984375, 0.1681976318359375, 0.18299102783203125, 0.197784423828125, 0.21257781982421875, 0.2273712158203125, 0.24216461181640625, 0.2569580078125, 0.27175140380859375, 0.2865447998046875, 0.30133819580078125, 0.316131591796875, 0.33092498779296875, 0.3457183837890625, 0.36051177978515625, 0.37530517578125, 0.39009857177734375, 0.4048919677734375, 0.41968536376953125, 0.434478759765625, 0.44927215576171875, 0.4640655517578125, 0.47885894775390625, 0.49365234375]}, "gradients/decoder.transformer.h.3.ln_cross_attn.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 957.0, 56.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.08458857983350754, -0.07367686927318573, -0.06276515871286392, -0.05185345187783241, -0.040941741317510605, -0.030030030757188797, -0.019118323922157288, -0.00820661336183548, 0.002705097198486328, 0.013616806827485561, 0.024528516456484795, 0.03544022515416145, 0.04635193571448326, 0.05726364627480507, 0.06817535310983658, 0.07908706367015839, 0.0899987742304802, 0.100910484790802, 0.11182219535112381, 0.12273390591144562, 0.13364560902118683, 0.14455732703208923, 0.15546903014183044, 0.16638073325157166, 0.17729245126247406, 0.18820415437221527, 0.19911587238311768, 0.2100275754928589, 0.2209392935037613, 0.2318509966135025, 0.2427627146244049, 0.2536744177341461, 0.26458612084388733, 0.27549782395362854, 0.28640952706336975, 0.29732125997543335, 0.30823296308517456, 0.31914466619491577, 0.330056369304657, 0.3409680724143982, 0.3518798053264618, 0.362791508436203, 0.3737032115459442, 0.3846149444580078, 0.395526647567749, 0.40643835067749023, 0.41735005378723145, 0.42826175689697266, 0.43917346000671387, 0.4500851631164551, 0.4609968662261963, 0.4719085991382599, 0.4828203022480011, 0.4937320053577423, 0.5046437382698059, 0.5155554413795471, 0.5264671444892883, 0.5373788475990295, 0.5482905507087708, 0.559202253818512, 0.5701139569282532, 0.5810257196426392, 0.5919374227523804, 0.6028491258621216, 0.6137608289718628]}, "gradients/decoder.transformer.h.3.ln_cross_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 0.0, 2.0, 4.0, 8.0, 8.0, 11.0, 9.0, 12.0, 21.0, 30.0, 32.0, 36.0, 36.0, 54.0, 36.0, 66.0, 65.0, 67.0, 58.0, 62.0, 46.0, 53.0, 54.0, 34.0, 57.0, 28.0, 25.0, 24.0, 25.0, 13.0, 18.0, 7.0, 3.0, 5.0, 0.0, 3.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.03312307596206665, -0.032248929142951965, -0.03137478232383728, -0.030500631779432297, -0.02962648496031761, -0.028752338141202927, -0.027878189459443092, -0.027004040777683258, -0.026129893958568573, -0.025255747139453888, -0.024381598457694054, -0.02350744977593422, -0.022633302956819534, -0.02175915613770485, -0.020885007455945015, -0.02001085877418518, -0.019136711955070496, -0.01826256513595581, -0.017388416454195976, -0.016514267772436142, -0.015640120953321457, -0.014765973202884197, -0.013891825452446938, -0.013017677702009678, -0.012143529951572418, -0.011269382201135159, -0.010395234450697899, -0.00952108670026064, -0.00864693894982338, -0.00777279119938612, -0.00689864344894886, -0.0060244956985116005, -0.005150347948074341, -0.004276200197637081, -0.0034020524471998215, -0.002527904696762562, -0.0016537569463253021, -0.0007796091958880424, 9.453855454921722e-05, 0.0009686863049864769, 0.0018428340554237366, 0.0027169818058609962, 0.003591129556298256, 0.004465277306735516, 0.005339425057172775, 0.006213572807610035, 0.007087720558047295, 0.007961868308484554, 0.008836016058921814, 0.009710163809359074, 0.010584311559796333, 0.011458459310233593, 0.012332607060670853, 0.013206754811108112, 0.014080902561545372, 0.014955050311982632, 0.01582919806241989, 0.016703344881534576, 0.01757749356329441, 0.018451642245054245, 0.01932578906416893, 0.020199935883283615, 0.02107408456504345, 0.021948233246803284, 0.02282238006591797]}, "gradients/decoder.transformer.h.3.attn.c_proj.bias": {"_type": "histogram", "values": [5.0, 2.0, 1.0, 2.0, 1.0, 0.0, 2.0, 4.0, 4.0, 5.0, 10.0, 8.0, 10.0, 10.0, 16.0, 11.0, 11.0, 22.0, 20.0, 17.0, 15.0, 17.0, 23.0, 30.0, 29.0, 33.0, 31.0, 36.0, 40.0, 39.0, 39.0, 28.0, 46.0, 46.0, 29.0, 36.0, 35.0, 30.0, 25.0, 34.0, 11.0, 22.0, 25.0, 19.0, 17.0, 15.0, 20.0, 11.0, 6.0, 13.0, 9.0, 10.0, 8.0, 5.0, 10.0, 4.0, 2.0, 6.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0], "bins": [-6.64453125, -6.4346923828125, -6.224853515625, -6.0150146484375, -5.80517578125, -5.5953369140625, -5.385498046875, -5.1756591796875, -4.9658203125, -4.7559814453125, -4.546142578125, -4.3363037109375, -4.12646484375, -3.9166259765625, -3.706787109375, -3.4969482421875, -3.287109375, -3.0772705078125, -2.867431640625, -2.6575927734375, -2.44775390625, -2.2379150390625, -2.028076171875, -1.8182373046875, -1.6083984375, -1.3985595703125, -1.188720703125, -0.9788818359375, -0.76904296875, -0.5592041015625, -0.349365234375, -0.1395263671875, 0.0703125, 0.2801513671875, 0.489990234375, 0.6998291015625, 0.90966796875, 1.1195068359375, 1.329345703125, 1.5391845703125, 1.7490234375, 1.9588623046875, 2.168701171875, 2.3785400390625, 2.58837890625, 2.7982177734375, 3.008056640625, 3.2178955078125, 3.427734375, 3.6375732421875, 3.847412109375, 4.0572509765625, 4.26708984375, 4.4769287109375, 4.686767578125, 4.8966064453125, 5.1064453125, 5.3162841796875, 5.526123046875, 5.7359619140625, 5.94580078125, 6.1556396484375, 6.365478515625, 6.5753173828125, 6.78515625]}, "gradients/decoder.transformer.h.3.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 2.0, 3.0, 5.0, 5.0, 4.0, 2.0, 6.0, 12.0, 13.0, 25.0, 25.0, 32.0, 43.0, 54.0, 83.0, 92.0, 122.0, 151.0, 169.0, 239.0, 317.0, 352.0, 457.0, 619.0, 829.0, 1213.0, 2290.0, 5470.0, 16536.0, 54800.0, 171531.0, 405706.0, 257495.0, 86825.0, 26066.0, 8190.0, 3155.0, 1641.0, 916.0, 655.0, 493.0, 403.0, 333.0, 274.0, 221.0, 148.0, 121.0, 87.0, 73.0, 63.0, 48.0, 37.0, 33.0, 24.0, 11.0, 14.0, 9.0, 8.0, 12.0, 5.0, 2.0, 4.0, 2.0], "bins": [-13.25, -12.8447265625, -12.439453125, -12.0341796875, -11.62890625, -11.2236328125, -10.818359375, -10.4130859375, -10.0078125, -9.6025390625, -9.197265625, -8.7919921875, -8.38671875, -7.9814453125, -7.576171875, -7.1708984375, -6.765625, -6.3603515625, -5.955078125, -5.5498046875, -5.14453125, -4.7392578125, -4.333984375, -3.9287109375, -3.5234375, -3.1181640625, -2.712890625, -2.3076171875, -1.90234375, -1.4970703125, -1.091796875, -0.6865234375, -0.28125, 0.1240234375, 0.529296875, 0.9345703125, 1.33984375, 1.7451171875, 2.150390625, 2.5556640625, 2.9609375, 3.3662109375, 3.771484375, 4.1767578125, 4.58203125, 4.9873046875, 5.392578125, 5.7978515625, 6.203125, 6.6083984375, 7.013671875, 7.4189453125, 7.82421875, 8.2294921875, 8.634765625, 9.0400390625, 9.4453125, 9.8505859375, 10.255859375, 10.6611328125, 11.06640625, 11.4716796875, 11.876953125, 12.2822265625, 12.6875]}, "gradients/decoder.transformer.h.3.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 2.0, 0.0, 1.0, 1.0, 2.0, 5.0, 5.0, 4.0, 7.0, 7.0, 15.0, 15.0, 15.0, 23.0, 17.0, 29.0, 37.0, 37.0, 39.0, 43.0, 79.0, 86.0, 127.0, 262.0, 1413.0, 226.0, 128.0, 85.0, 56.0, 49.0, 46.0, 32.0, 28.0, 28.0, 19.0, 24.0, 15.0, 12.0, 9.0, 9.0, 3.0, 4.0, 1.0, 6.0, 2.0, 1.0, 2.0, 1.0, 1.0, 4.0, 0.0, 1.0, 3.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-20.046875, -19.388427734375, -18.72998046875, -18.071533203125, -17.4130859375, -16.754638671875, -16.09619140625, -15.437744140625, -14.779296875, -14.120849609375, -13.46240234375, -12.803955078125, -12.1455078125, -11.487060546875, -10.82861328125, -10.170166015625, -9.51171875, -8.853271484375, -8.19482421875, -7.536376953125, -6.8779296875, -6.219482421875, -5.56103515625, -4.902587890625, -4.244140625, -3.585693359375, -2.92724609375, -2.268798828125, -1.6103515625, -0.951904296875, -0.29345703125, 0.364990234375, 1.0234375, 1.681884765625, 2.34033203125, 2.998779296875, 3.6572265625, 4.315673828125, 4.97412109375, 5.632568359375, 6.291015625, 6.949462890625, 7.60791015625, 8.266357421875, 8.9248046875, 9.583251953125, 10.24169921875, 10.900146484375, 11.55859375, 12.217041015625, 12.87548828125, 13.533935546875, 14.1923828125, 14.850830078125, 15.50927734375, 16.167724609375, 16.826171875, 17.484619140625, 18.14306640625, 18.801513671875, 19.4599609375, 20.118408203125, 20.77685546875, 21.435302734375, 22.09375]}, "gradients/decoder.transformer.h.3.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 3.0, 1.0, 1.0, 3.0, 1.0, 5.0, 6.0, 6.0, 10.0, 11.0, 14.0, 17.0, 31.0, 44.0, 50.0, 86.0, 137.0, 285.0, 517.0, 1069.0, 7230.0, 3129386.0, 4650.0, 1000.0, 447.0, 253.0, 166.0, 82.0, 59.0, 42.0, 29.0, 16.0, 17.0, 14.0, 9.0, 9.0, 1.0, 4.0, 3.0, 1.0, 2.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-125.1875, -121.5537109375, -117.919921875, -114.2861328125, -110.65234375, -107.0185546875, -103.384765625, -99.7509765625, -96.1171875, -92.4833984375, -88.849609375, -85.2158203125, -81.58203125, -77.9482421875, -74.314453125, -70.6806640625, -67.046875, -63.4130859375, -59.779296875, -56.1455078125, -52.51171875, -48.8779296875, -45.244140625, -41.6103515625, -37.9765625, -34.3427734375, -30.708984375, -27.0751953125, -23.44140625, -19.8076171875, -16.173828125, -12.5400390625, -8.90625, -5.2724609375, -1.638671875, 1.9951171875, 5.62890625, 9.2626953125, 12.896484375, 16.5302734375, 20.1640625, 23.7978515625, 27.431640625, 31.0654296875, 34.69921875, 38.3330078125, 41.966796875, 45.6005859375, 49.234375, 52.8681640625, 56.501953125, 60.1357421875, 63.76953125, 67.4033203125, 71.037109375, 74.6708984375, 78.3046875, 81.9384765625, 85.572265625, 89.2060546875, 92.83984375, 96.4736328125, 100.107421875, 103.7412109375, 107.375]}, "gradients/decoder.transformer.h.3.ln_1.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 3.0, 5.0, 18.0, 41.0, 98.0, 179.0, 247.0, 199.0, 119.0, 69.0, 21.0, 8.0, 5.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-71.30699157714844, -68.3917236328125, -65.47645568847656, -62.561187744140625, -59.64592361450195, -56.730655670166016, -53.81538772583008, -50.900123596191406, -47.98485565185547, -45.06958770751953, -42.154319763183594, -39.239051818847656, -36.323787689208984, -33.40851974487305, -30.49325180053711, -27.577985763549805, -24.662715911865234, -21.747447967529297, -18.832181930541992, -15.916913986206055, -13.001646995544434, -10.086380004882812, -7.171112060546875, -4.25584602355957, -1.3405780792236328, 1.5746891498565674, 4.489956378936768, 7.405223846435547, 10.320490837097168, 13.235757827758789, 16.151025772094727, 19.06629180908203, 21.98155975341797, 24.896827697753906, 27.81209373474121, 30.72736167907715, 33.64262771606445, 36.55789566040039, 39.47316360473633, 42.388427734375, 45.30369567871094, 48.218963623046875, 51.13423156738281, 54.04949951171875, 56.96476364135742, 59.88003158569336, 62.7952995300293, 65.71056365966797, 68.62583923339844, 71.54110717773438, 74.45637512207031, 77.37164306640625, 80.28691101074219, 83.20217895507812, 86.11744689941406, 89.03270721435547, 91.9479751586914, 94.86324310302734, 97.77851104736328, 100.69377899169922, 103.60904693603516, 106.52430725097656, 109.4395751953125, 112.35484313964844, 115.27011108398438]}, "gradients/decoder.transformer.h.3.ln_1.bias": {"_type": "histogram", "values": [1.0, 3.0, 1.0, 1.0, 1.0, 1.0, 2.0, 4.0, 5.0, 9.0, 8.0, 6.0, 13.0, 15.0, 14.0, 12.0, 21.0, 25.0, 22.0, 25.0, 25.0, 33.0, 33.0, 42.0, 44.0, 35.0, 36.0, 41.0, 47.0, 47.0, 44.0, 35.0, 31.0, 32.0, 36.0, 38.0, 41.0, 30.0, 29.0, 24.0, 17.0, 15.0, 7.0, 18.0, 11.0, 8.0, 7.0, 6.0, 4.0, 2.0, 5.0, 4.0, 2.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-49.09221649169922, -47.382896423339844, -45.67357635498047, -43.964256286621094, -42.254940032958984, -40.54561996459961, -38.836299896240234, -37.12697982788086, -35.417659759521484, -33.70833969116211, -31.999021530151367, -30.289701461791992, -28.580381393432617, -26.871063232421875, -25.1617431640625, -23.452423095703125, -21.743104934692383, -20.033784866333008, -18.324466705322266, -16.61514663696289, -14.905826568603516, -13.196507453918457, -11.487188339233398, -9.777868270874023, -8.068549156188965, -6.359229564666748, -4.649909973144531, -2.9405908584594727, -1.2312712669372559, 0.47804832458496094, 2.1873674392700195, 3.8966875076293945, 5.606006622314453, 7.31532621383667, 9.024645805358887, 10.733964920043945, 12.44328498840332, 14.152604103088379, 15.861923217773438, 17.571243286132812, 19.280563354492188, 20.989883422851562, 22.699201583862305, 24.40852165222168, 26.117841720581055, 27.827159881591797, 29.536479949951172, 31.245800018310547, 32.955116271972656, 34.66443634033203, 36.373756408691406, 38.08307647705078, 39.79239273071289, 41.501712799072266, 43.21103286743164, 44.920352935791016, 46.62967300415039, 48.338993072509766, 50.04831314086914, 51.75762939453125, 53.466949462890625, 55.17626953125, 56.885589599609375, 58.59490966796875, 60.304229736328125]}, "gradients/decoder.transformer.h.2.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 4.0, 6.0, 6.0, 10.0, 15.0, 8.0, 7.0, 9.0, 15.0, 21.0, 18.0, 22.0, 22.0, 30.0, 14.0, 37.0, 37.0, 31.0, 41.0, 53.0, 44.0, 29.0, 41.0, 41.0, 34.0, 25.0, 36.0, 38.0, 46.0, 32.0, 22.0, 28.0, 23.0, 16.0, 21.0, 20.0, 25.0, 15.0, 9.0, 8.0, 9.0, 7.0, 6.0, 9.0, 5.0, 4.0, 2.0, 5.0, 7.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0], "bins": [-7.3203125, -7.0911865234375, -6.862060546875, -6.6329345703125, -6.40380859375, -6.1746826171875, -5.945556640625, -5.7164306640625, -5.4873046875, -5.2581787109375, -5.029052734375, -4.7999267578125, -4.57080078125, -4.3416748046875, -4.112548828125, -3.8834228515625, -3.654296875, -3.4251708984375, -3.196044921875, -2.9669189453125, -2.73779296875, -2.5086669921875, -2.279541015625, -2.0504150390625, -1.8212890625, -1.5921630859375, -1.363037109375, -1.1339111328125, -0.90478515625, -0.6756591796875, -0.446533203125, -0.2174072265625, 0.01171875, 0.2408447265625, 0.469970703125, 0.6990966796875, 0.92822265625, 1.1573486328125, 1.386474609375, 1.6156005859375, 1.8447265625, 2.0738525390625, 2.302978515625, 2.5321044921875, 2.76123046875, 2.9903564453125, 3.219482421875, 3.4486083984375, 3.677734375, 3.9068603515625, 4.135986328125, 4.3651123046875, 4.59423828125, 4.8233642578125, 5.052490234375, 5.2816162109375, 5.5107421875, 5.7398681640625, 5.968994140625, 6.1981201171875, 6.42724609375, 6.6563720703125, 6.885498046875, 7.1146240234375, 7.34375]}, "gradients/decoder.transformer.h.2.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 3.0, 0.0, 0.0, 1.0, 2.0, 0.0, 3.0, 4.0, 7.0, 9.0, 11.0, 11.0, 17.0, 35.0, 67.0, 101.0, 185.0, 422.0, 962.0, 2049.0, 5368.0, 19435.0, 147774.0, 1532722.0, 2200989.0, 245471.0, 27587.0, 6747.0, 2347.0, 996.0, 488.0, 231.0, 113.0, 52.0, 29.0, 16.0, 10.0, 10.0, 4.0, 3.0, 7.0, 2.0, 2.0, 2.0, 1.0, 0.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-18.0625, -17.44189453125, -16.8212890625, -16.20068359375, -15.580078125, -14.95947265625, -14.3388671875, -13.71826171875, -13.09765625, -12.47705078125, -11.8564453125, -11.23583984375, -10.615234375, -9.99462890625, -9.3740234375, -8.75341796875, -8.1328125, -7.51220703125, -6.8916015625, -6.27099609375, -5.650390625, -5.02978515625, -4.4091796875, -3.78857421875, -3.16796875, -2.54736328125, -1.9267578125, -1.30615234375, -0.685546875, -0.06494140625, 0.5556640625, 1.17626953125, 1.796875, 2.41748046875, 3.0380859375, 3.65869140625, 4.279296875, 4.89990234375, 5.5205078125, 6.14111328125, 6.76171875, 7.38232421875, 8.0029296875, 8.62353515625, 9.244140625, 9.86474609375, 10.4853515625, 11.10595703125, 11.7265625, 12.34716796875, 12.9677734375, 13.58837890625, 14.208984375, 14.82958984375, 15.4501953125, 16.07080078125, 16.69140625, 17.31201171875, 17.9326171875, 18.55322265625, 19.173828125, 19.79443359375, 20.4150390625, 21.03564453125, 21.65625]}, "gradients/decoder.transformer.h.2.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 4.0, 3.0, 4.0, 10.0, 12.0, 32.0, 35.0, 48.0, 95.0, 127.0, 213.0, 313.0, 499.0, 780.0, 671.0, 462.0, 288.0, 156.0, 116.0, 86.0, 45.0, 21.0, 15.0, 26.0, 17.0, 3.0, 2.0, 1.0, 2.0, 3.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-13.875, -13.2119140625, -12.548828125, -11.8857421875, -11.22265625, -10.5595703125, -9.896484375, -9.2333984375, -8.5703125, -7.9072265625, -7.244140625, -6.5810546875, -5.91796875, -5.2548828125, -4.591796875, -3.9287109375, -3.265625, -2.6025390625, -1.939453125, -1.2763671875, -0.61328125, 0.0498046875, 0.712890625, 1.3759765625, 2.0390625, 2.7021484375, 3.365234375, 4.0283203125, 4.69140625, 5.3544921875, 6.017578125, 6.6806640625, 7.34375, 8.0068359375, 8.669921875, 9.3330078125, 9.99609375, 10.6591796875, 11.322265625, 11.9853515625, 12.6484375, 13.3115234375, 13.974609375, 14.6376953125, 15.30078125, 15.9638671875, 16.626953125, 17.2900390625, 17.953125, 18.6162109375, 19.279296875, 19.9423828125, 20.60546875, 21.2685546875, 21.931640625, 22.5947265625, 23.2578125, 23.9208984375, 24.583984375, 25.2470703125, 25.91015625, 26.5732421875, 27.236328125, 27.8994140625, 28.5625]}, "gradients/decoder.transformer.h.2.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 0.0, 1.0, 1.0, 8.0, 9.0, 9.0, 17.0, 37.0, 75.0, 124.0, 298.0, 731.0, 2257.0, 19488.0, 1195722.0, 2929368.0, 41258.0, 3253.0, 930.0, 392.0, 157.0, 74.0, 39.0, 25.0, 10.0, 6.0, 4.0, 1.0, 0.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-42.25, -40.93408203125, -39.6181640625, -38.30224609375, -36.986328125, -35.67041015625, -34.3544921875, -33.03857421875, -31.72265625, -30.40673828125, -29.0908203125, -27.77490234375, -26.458984375, -25.14306640625, -23.8271484375, -22.51123046875, -21.1953125, -19.87939453125, -18.5634765625, -17.24755859375, -15.931640625, -14.61572265625, -13.2998046875, -11.98388671875, -10.66796875, -9.35205078125, -8.0361328125, -6.72021484375, -5.404296875, -4.08837890625, -2.7724609375, -1.45654296875, -0.140625, 1.17529296875, 2.4912109375, 3.80712890625, 5.123046875, 6.43896484375, 7.7548828125, 9.07080078125, 10.38671875, 11.70263671875, 13.0185546875, 14.33447265625, 15.650390625, 16.96630859375, 18.2822265625, 19.59814453125, 20.9140625, 22.22998046875, 23.5458984375, 24.86181640625, 26.177734375, 27.49365234375, 28.8095703125, 30.12548828125, 31.44140625, 32.75732421875, 34.0732421875, 35.38916015625, 36.705078125, 38.02099609375, 39.3369140625, 40.65283203125, 41.96875]}, "gradients/decoder.transformer.h.2.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 11.0, 54.0, 245.0, 444.0, 216.0, 38.0, 11.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-94.20370483398438, -87.2127685546875, -80.22183990478516, -73.23091125488281, -66.23997497558594, -59.24904251098633, -52.25811004638672, -45.26717758178711, -38.2762451171875, -31.28531265258789, -24.29438018798828, -17.303447723388672, -10.312515258789062, -3.321582794189453, 3.6693496704101562, 10.660282135009766, 17.651214599609375, 24.642147064208984, 31.633079528808594, 38.6240119934082, 45.61494445800781, 52.60587692260742, 59.59680938720703, 66.58773803710938, 73.57867431640625, 80.56961059570312, 87.56053924560547, 94.55146789550781, 101.54240417480469, 108.53334045410156, 115.5242691040039, 122.51519775390625, 129.50613403320312, 136.4970703125, 143.48800659179688, 150.4789276123047, 157.46986389160156, 164.46080017089844, 171.45172119140625, 178.44265747070312, 185.43359375, 192.42453002929688, 199.41546630859375, 206.40638732910156, 213.39732360839844, 220.3882598876953, 227.37918090820312, 234.3701171875, 241.36105346679688, 248.35198974609375, 255.34292602539062, 262.3338623046875, 269.32476806640625, 276.3157043457031, 283.306640625, 290.2975769042969, 297.28851318359375, 304.2794494628906, 311.2703857421875, 318.2613220214844, 325.25225830078125, 332.2431640625, 339.2341003417969, 346.22503662109375, 353.2159729003906]}, "gradients/decoder.transformer.h.2.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 1.0, 1.0, 4.0, 2.0, 3.0, 6.0, 7.0, 3.0, 7.0, 10.0, 7.0, 11.0, 12.0, 9.0, 17.0, 17.0, 16.0, 23.0, 14.0, 24.0, 21.0, 25.0, 36.0, 29.0, 40.0, 50.0, 39.0, 31.0, 32.0, 38.0, 38.0, 33.0, 32.0, 33.0, 38.0, 29.0, 30.0, 24.0, 28.0, 41.0, 28.0, 26.0, 11.0, 14.0, 14.0, 12.0, 6.0, 14.0, 6.0, 8.0, 6.0, 2.0, 3.0, 3.0, 1.0, 4.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-45.37238311767578, -43.9488639831543, -42.52534484863281, -41.10182571411133, -39.678306579589844, -38.254783630371094, -36.83126449584961, -35.407745361328125, -33.98422622680664, -32.560707092285156, -31.137187957763672, -29.713666915893555, -28.29014778137207, -26.866628646850586, -25.44310760498047, -24.019588470458984, -22.5960693359375, -21.172550201416016, -19.74903106689453, -18.325510025024414, -16.90199089050293, -15.478471755981445, -14.054951667785645, -12.631431579589844, -11.20791244506836, -9.784393310546875, -8.360873222351074, -6.937353610992432, -5.513833999633789, -4.0903143882751465, -2.666794776916504, -1.2432746887207031, 0.18024826049804688, 1.6037678718566895, 3.027287483215332, 4.450807094573975, 5.874326705932617, 7.29784631729126, 8.721365928649902, 10.144886016845703, 11.568405151367188, 12.991924285888672, 14.415444374084473, 15.838964462280273, 17.262483596801758, 18.686002731323242, 20.10952377319336, 21.533042907714844, 22.956562042236328, 24.380081176757812, 25.803600311279297, 27.227121353149414, 28.6506404876709, 30.074159622192383, 31.4976806640625, 32.921199798583984, 34.34471893310547, 35.76823806762695, 37.19175720214844, 38.61527633666992, 40.038795471191406, 41.462318420410156, 42.88583755493164, 44.309356689453125, 45.73287582397461]}, "gradients/decoder.transformer.h.2.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 3.0, 1.0, 2.0, 4.0, 4.0, 3.0, 6.0, 10.0, 9.0, 9.0, 8.0, 8.0, 23.0, 22.0, 22.0, 35.0, 26.0, 29.0, 35.0, 30.0, 45.0, 42.0, 65.0, 40.0, 40.0, 39.0, 32.0, 35.0, 50.0, 40.0, 47.0, 32.0, 28.0, 28.0, 18.0, 23.0, 24.0, 17.0, 15.0, 8.0, 14.0, 10.0, 12.0, 8.0, 5.0, 1.0, 4.0, 3.0, 4.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.52734375, -6.317138671875, -6.10693359375, -5.896728515625, -5.6865234375, -5.476318359375, -5.26611328125, -5.055908203125, -4.845703125, -4.635498046875, -4.42529296875, -4.215087890625, -4.0048828125, -3.794677734375, -3.58447265625, -3.374267578125, -3.1640625, -2.953857421875, -2.74365234375, -2.533447265625, -2.3232421875, -2.113037109375, -1.90283203125, -1.692626953125, -1.482421875, -1.272216796875, -1.06201171875, -0.851806640625, -0.6416015625, -0.431396484375, -0.22119140625, -0.010986328125, 0.19921875, 0.409423828125, 0.61962890625, 0.829833984375, 1.0400390625, 1.250244140625, 1.46044921875, 1.670654296875, 1.880859375, 2.091064453125, 2.30126953125, 2.511474609375, 2.7216796875, 2.931884765625, 3.14208984375, 3.352294921875, 3.5625, 3.772705078125, 3.98291015625, 4.193115234375, 4.4033203125, 4.613525390625, 4.82373046875, 5.033935546875, 5.244140625, 5.454345703125, 5.66455078125, 5.874755859375, 6.0849609375, 6.295166015625, 6.50537109375, 6.715576171875, 6.92578125]}, "gradients/decoder.transformer.h.2.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 6.0, 3.0, 5.0, 17.0, 24.0, 28.0, 37.0, 59.0, 80.0, 112.0, 191.0, 253.0, 391.0, 554.0, 875.0, 1241.0, 2019.0, 3220.0, 5085.0, 8242.0, 13376.0, 21468.0, 35251.0, 56205.0, 88722.0, 131033.0, 169336.0, 165444.0, 124499.0, 82927.0, 51876.0, 32847.0, 20006.0, 12334.0, 7497.0, 4791.0, 3011.0, 1925.0, 1205.0, 770.0, 544.0, 357.0, 243.0, 140.0, 128.0, 62.0, 46.0, 20.0, 17.0, 22.0, 12.0, 3.0, 3.0, 4.0, 2.0, 2.0, 1.0], "bins": [-0.85791015625, -0.8326187133789062, -0.8073272705078125, -0.7820358276367188, -0.756744384765625, -0.7314529418945312, -0.7061614990234375, -0.6808700561523438, -0.65557861328125, -0.6302871704101562, -0.6049957275390625, -0.5797042846679688, -0.554412841796875, -0.5291213989257812, -0.5038299560546875, -0.47853851318359375, -0.4532470703125, -0.42795562744140625, -0.4026641845703125, -0.37737274169921875, -0.352081298828125, -0.32678985595703125, -0.3014984130859375, -0.27620697021484375, -0.25091552734375, -0.22562408447265625, -0.2003326416015625, -0.17504119873046875, -0.149749755859375, -0.12445831298828125, -0.0991668701171875, -0.07387542724609375, -0.048583984375, -0.02329254150390625, 0.0019989013671875, 0.02729034423828125, 0.052581787109375, 0.07787322998046875, 0.1031646728515625, 0.12845611572265625, 0.15374755859375, 0.17903900146484375, 0.2043304443359375, 0.22962188720703125, 0.254913330078125, 0.28020477294921875, 0.3054962158203125, 0.33078765869140625, 0.3560791015625, 0.38137054443359375, 0.4066619873046875, 0.43195343017578125, 0.457244873046875, 0.48253631591796875, 0.5078277587890625, 0.5331192016601562, 0.55841064453125, 0.5837020874023438, 0.6089935302734375, 0.6342849731445312, 0.659576416015625, 0.6848678588867188, 0.7101593017578125, 0.7354507446289062, 0.7607421875]}, "gradients/decoder.transformer.h.2.crossattention.c_attn.bias": {"_type": "histogram", "values": [4.0, 1.0, 0.0, 0.0, 3.0, 0.0, 2.0, 3.0, 4.0, 4.0, 5.0, 5.0, 5.0, 10.0, 17.0, 15.0, 14.0, 21.0, 19.0, 11.0, 28.0, 23.0, 23.0, 26.0, 32.0, 27.0, 31.0, 27.0, 35.0, 42.0, 42.0, 1072.0, 44.0, 38.0, 24.0, 39.0, 45.0, 29.0, 33.0, 30.0, 34.0, 21.0, 16.0, 21.0, 15.0, 11.0, 16.0, 12.0, 17.0, 10.0, 4.0, 3.0, 3.0, 9.0, 8.0, 2.0, 1.0, 1.0, 4.0, 1.0, 1.0, 2.0, 1.0, 2.0], "bins": [-3.935546875, -3.810943603515625, -3.68634033203125, -3.561737060546875, -3.4371337890625, -3.312530517578125, -3.18792724609375, -3.063323974609375, -2.938720703125, -2.814117431640625, -2.68951416015625, -2.564910888671875, -2.4403076171875, -2.315704345703125, -2.19110107421875, -2.066497802734375, -1.94189453125, -1.817291259765625, -1.69268798828125, -1.568084716796875, -1.4434814453125, -1.318878173828125, -1.19427490234375, -1.069671630859375, -0.945068359375, -0.820465087890625, -0.69586181640625, -0.571258544921875, -0.4466552734375, -0.322052001953125, -0.19744873046875, -0.072845458984375, 0.0517578125, 0.176361083984375, 0.30096435546875, 0.425567626953125, 0.5501708984375, 0.674774169921875, 0.79937744140625, 0.923980712890625, 1.048583984375, 1.173187255859375, 1.29779052734375, 1.422393798828125, 1.5469970703125, 1.671600341796875, 1.79620361328125, 1.920806884765625, 2.04541015625, 2.170013427734375, 2.29461669921875, 2.419219970703125, 2.5438232421875, 2.668426513671875, 2.79302978515625, 2.917633056640625, 3.042236328125, 3.166839599609375, 3.29144287109375, 3.416046142578125, 3.5406494140625, 3.665252685546875, 3.78985595703125, 3.914459228515625, 4.0390625]}, "gradients/decoder.transformer.h.2.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 0.0, 0.0, 2.0, 4.0, 6.0, 7.0, 19.0, 33.0, 44.0, 50.0, 80.0, 143.0, 252.0, 349.0, 573.0, 783.0, 1367.0, 2137.0, 3435.0, 5495.0, 8810.0, 14154.0, 22242.0, 34876.0, 53576.0, 80568.0, 114251.0, 153252.0, 1194448.0, 130381.0, 94631.0, 64744.0, 42726.0, 27309.0, 17223.0, 10852.0, 6764.0, 4297.0, 2626.0, 1718.0, 1032.0, 657.0, 431.0, 268.0, 172.0, 132.0, 79.0, 45.0, 29.0, 25.0, 14.0, 7.0, 12.0, 10.0, 6.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-0.50537109375, -0.48918914794921875, -0.4730072021484375, -0.45682525634765625, -0.440643310546875, -0.42446136474609375, -0.4082794189453125, -0.39209747314453125, -0.37591552734375, -0.35973358154296875, -0.3435516357421875, -0.32736968994140625, -0.311187744140625, -0.29500579833984375, -0.2788238525390625, -0.26264190673828125, -0.2464599609375, -0.23027801513671875, -0.2140960693359375, -0.19791412353515625, -0.181732177734375, -0.16555023193359375, -0.1493682861328125, -0.13318634033203125, -0.11700439453125, -0.10082244873046875, -0.0846405029296875, -0.06845855712890625, -0.052276611328125, -0.03609466552734375, -0.0199127197265625, -0.00373077392578125, 0.012451171875, 0.02863311767578125, 0.0448150634765625, 0.06099700927734375, 0.077178955078125, 0.09336090087890625, 0.1095428466796875, 0.12572479248046875, 0.14190673828125, 0.15808868408203125, 0.1742706298828125, 0.19045257568359375, 0.206634521484375, 0.22281646728515625, 0.2389984130859375, 0.25518035888671875, 0.2713623046875, 0.28754425048828125, 0.3037261962890625, 0.31990814208984375, 0.336090087890625, 0.35227203369140625, 0.3684539794921875, 0.38463592529296875, 0.40081787109375, 0.41699981689453125, 0.4331817626953125, 0.44936370849609375, 0.465545654296875, 0.48172760009765625, 0.4979095458984375, 0.5140914916992188, 0.5302734375]}, "gradients/decoder.transformer.h.2.crossattention.q_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 0.0, 0.0, 2.0, 0.0, 3.0, 7.0, 3.0, 3.0, 5.0, 6.0, 10.0, 10.0, 10.0, 18.0, 20.0, 25.0, 37.0, 40.0, 50.0, 76.0, 89.0, 87.0, 104.0, 80.0, 62.0, 55.0, 37.0, 30.0, 23.0, 20.0, 10.0, 15.0, 9.0, 12.0, 11.0, 6.0, 11.0, 6.0, 3.0, 3.0, 3.0, 1.0, 4.0, 1.0, 1.0, 0.0, 1.0, 0.0, 4.0, 1.0, 2.0], "bins": [-0.01259613037109375, -0.012238025665283203, -0.011879920959472656, -0.01152181625366211, -0.011163711547851562, -0.010805606842041016, -0.010447502136230469, -0.010089397430419922, -0.009731292724609375, -0.009373188018798828, -0.009015083312988281, -0.008656978607177734, -0.008298873901367188, -0.00794076919555664, -0.007582664489746094, -0.007224559783935547, -0.006866455078125, -0.006508350372314453, -0.006150245666503906, -0.005792140960693359, -0.0054340362548828125, -0.005075931549072266, -0.004717826843261719, -0.004359722137451172, -0.004001617431640625, -0.003643512725830078, -0.0032854080200195312, -0.0029273033142089844, -0.0025691986083984375, -0.0022110939025878906, -0.0018529891967773438, -0.0014948844909667969, -0.00113677978515625, -0.0007786750793457031, -0.00042057037353515625, -6.246566772460938e-05, 0.0002956390380859375, 0.0006537437438964844, 0.0010118484497070312, 0.0013699531555175781, 0.001728057861328125, 0.002086162567138672, 0.0024442672729492188, 0.0028023719787597656, 0.0031604766845703125, 0.0035185813903808594, 0.0038766860961914062, 0.004234790802001953, 0.0045928955078125, 0.004951000213623047, 0.005309104919433594, 0.005667209625244141, 0.0060253143310546875, 0.006383419036865234, 0.006741523742675781, 0.007099628448486328, 0.007457733154296875, 0.007815837860107422, 0.008173942565917969, 0.008532047271728516, 0.008890151977539062, 0.00924825668334961, 0.009606361389160156, 0.009964466094970703, 0.01032257080078125]}, "gradients/decoder.transformer.h.2.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 2.0, 4.0, 1.0, 5.0, 1.0, 3.0, 3.0, 1.0, 9.0, 6.0, 6.0, 13.0, 11.0, 27.0, 18.0, 38.0, 17.0, 35.0, 41.0, 48.0, 86.0, 119.0, 198.0, 304.0, 594.0, 5222.0, 1020513.0, 19324.0, 810.0, 379.0, 202.0, 144.0, 99.0, 67.0, 42.0, 41.0, 27.0, 19.0, 22.0, 16.0, 11.0, 9.0, 10.0, 6.0, 2.0, 3.0, 2.0, 0.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 2.0, 0.0, 0.0, 2.0], "bins": [-0.184814453125, -0.1785736083984375, -0.172332763671875, -0.1660919189453125, -0.15985107421875, -0.1536102294921875, -0.147369384765625, -0.1411285400390625, -0.1348876953125, -0.1286468505859375, -0.122406005859375, -0.1161651611328125, -0.10992431640625, -0.1036834716796875, -0.097442626953125, -0.0912017822265625, -0.0849609375, -0.0787200927734375, -0.072479248046875, -0.0662384033203125, -0.05999755859375, -0.0537567138671875, -0.047515869140625, -0.0412750244140625, -0.0350341796875, -0.0287933349609375, -0.022552490234375, -0.0163116455078125, -0.01007080078125, -0.0038299560546875, 0.002410888671875, 0.0086517333984375, 0.014892578125, 0.0211334228515625, 0.027374267578125, 0.0336151123046875, 0.03985595703125, 0.0460968017578125, 0.052337646484375, 0.0585784912109375, 0.0648193359375, 0.0710601806640625, 0.077301025390625, 0.0835418701171875, 0.08978271484375, 0.0960235595703125, 0.102264404296875, 0.1085052490234375, 0.11474609375, 0.1209869384765625, 0.127227783203125, 0.1334686279296875, 0.13970947265625, 0.1459503173828125, 0.152191162109375, 0.1584320068359375, 0.1646728515625, 0.1709136962890625, 0.177154541015625, 0.1833953857421875, 0.18963623046875, 0.1958770751953125, 0.202117919921875, 0.2083587646484375, 0.214599609375]}, "gradients/decoder.transformer.h.2.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 2.0, 978.0, 34.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.04295317083597183, -0.037378452718257904, -0.031803734600543976, -0.026229020208120346, -0.020654302090406418, -0.01507958397269249, -0.00950486958026886, -0.003930151462554932, 0.0016445666551589966, 0.00721928384155035, 0.012794001027941704, 0.018368717283010483, 0.02394343540072441, 0.02951815351843834, 0.03509286791086197, 0.0406675860285759, 0.046242304146289825, 0.051817022264003754, 0.05739174038171768, 0.06296645104885101, 0.06854116916656494, 0.07411588728427887, 0.0796906054019928, 0.08526532351970673, 0.09084004163742065, 0.09641475975513458, 0.10198947787284851, 0.10756419599056244, 0.11313891410827637, 0.1187136322259903, 0.12428834289312363, 0.12986305356025696, 0.13543778657913208, 0.141012504696846, 0.14658722281455994, 0.15216194093227386, 0.1577366590499878, 0.16331137716770172, 0.16888609528541565, 0.17446079850196838, 0.1800355315208435, 0.18561024963855743, 0.19118496775627136, 0.1967596858739853, 0.20233440399169922, 0.20790912210941315, 0.21348384022712708, 0.2190585434436798, 0.22463326156139374, 0.23020797967910767, 0.2357826977968216, 0.24135741591453552, 0.24693213403224945, 0.2525068521499634, 0.2580815553665161, 0.26365628838539124, 0.26923099160194397, 0.2748056948184967, 0.2803804278373718, 0.28595513105392456, 0.2915298640727997, 0.2971045672893524, 0.30267930030822754, 0.3082540035247803, 0.3138287365436554]}, "gradients/decoder.transformer.h.2.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 3.0, 3.0, 3.0, 3.0, 4.0, 7.0, 11.0, 20.0, 22.0, 25.0, 30.0, 46.0, 60.0, 47.0, 64.0, 68.0, 71.0, 66.0, 55.0, 57.0, 50.0, 48.0, 50.0, 38.0, 40.0, 41.0, 23.0, 19.0, 11.0, 12.0, 5.0, 8.0, 3.0, 2.0, 0.0, 2.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.018280208110809326, -0.01781718246638775, -0.01735415682196617, -0.016891131177544594, -0.016428105533123016, -0.01596507988870144, -0.015502054244279861, -0.015039028599858284, -0.014576002955436707, -0.014112977311015129, -0.013649951666593552, -0.013186926022171974, -0.012723900377750397, -0.01226087473332882, -0.011797849088907242, -0.011334823444485664, -0.010871797800064087, -0.01040877215564251, -0.009945746511220932, -0.009482720866799355, -0.009019695222377777, -0.0085566695779562, -0.008093643933534622, -0.007630618289113045, -0.007167592644691467, -0.00670456700026989, -0.006241541355848312, -0.005778515711426735, -0.0053154900670051575, -0.00485246442258358, -0.0043894387781620026, -0.003926413133740425, -0.0034633874893188477, -0.00300036184489727, -0.0025373362004756927, -0.0020743105560541153, -0.0016112849116325378, -0.0011482592672109604, -0.0006852336227893829, -0.00022220797836780548, 0.00024081766605377197, 0.0007038433104753494, 0.0011668689548969269, 0.0016298945993185043, 0.002092920243740082, 0.0025559458881616592, 0.0030189715325832367, 0.003481997177004814, 0.003945022821426392, 0.004408048465847969, 0.0048710741102695465, 0.005334099754691124, 0.005797125399112701, 0.006260151043534279, 0.006723176687955856, 0.007186202332377434, 0.007649227976799011, 0.008112253621220589, 0.008575279265642166, 0.009038304910063744, 0.009501330554485321, 0.009964356198906898, 0.010427381843328476, 0.010890407487750053, 0.01135343313217163]}, "gradients/decoder.transformer.h.2.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 3.0, 1.0, 2.0, 3.0, 4.0, 4.0, 5.0, 11.0, 9.0, 9.0, 8.0, 8.0, 23.0, 20.0, 24.0, 35.0, 26.0, 29.0, 34.0, 31.0, 45.0, 42.0, 65.0, 37.0, 43.0, 39.0, 32.0, 35.0, 49.0, 41.0, 45.0, 32.0, 30.0, 28.0, 17.0, 24.0, 24.0, 16.0, 15.0, 9.0, 14.0, 9.0, 13.0, 8.0, 4.0, 2.0, 4.0, 3.0, 4.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.53125, -6.321044921875, -6.11083984375, -5.900634765625, -5.6904296875, -5.480224609375, -5.27001953125, -5.059814453125, -4.849609375, -4.639404296875, -4.42919921875, -4.218994140625, -4.0087890625, -3.798583984375, -3.58837890625, -3.378173828125, -3.16796875, -2.957763671875, -2.74755859375, -2.537353515625, -2.3271484375, -2.116943359375, -1.90673828125, -1.696533203125, -1.486328125, -1.276123046875, -1.06591796875, -0.855712890625, -0.6455078125, -0.435302734375, -0.22509765625, -0.014892578125, 0.1953125, 0.405517578125, 0.61572265625, 0.825927734375, 1.0361328125, 1.246337890625, 1.45654296875, 1.666748046875, 1.876953125, 2.087158203125, 2.29736328125, 2.507568359375, 2.7177734375, 2.927978515625, 3.13818359375, 3.348388671875, 3.55859375, 3.768798828125, 3.97900390625, 4.189208984375, 4.3994140625, 4.609619140625, 4.81982421875, 5.030029296875, 5.240234375, 5.450439453125, 5.66064453125, 5.870849609375, 6.0810546875, 6.291259765625, 6.50146484375, 6.711669921875, 6.921875]}, "gradients/decoder.transformer.h.2.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 2.0, 1.0, 4.0, 2.0, 4.0, 0.0, 8.0, 4.0, 15.0, 18.0, 24.0, 37.0, 48.0, 79.0, 97.0, 125.0, 148.0, 214.0, 311.0, 409.0, 566.0, 726.0, 940.0, 1425.0, 2096.0, 3437.0, 7887.0, 34646.0, 232621.0, 616458.0, 112838.0, 18769.0, 5393.0, 2665.0, 1794.0, 1312.0, 927.0, 647.0, 517.0, 356.0, 270.0, 209.0, 148.0, 102.0, 74.0, 54.0, 36.0, 35.0, 15.0, 20.0, 8.0, 9.0, 8.0, 4.0, 4.0, 2.0, 1.0, 2.0, 1.0, 1.0, 1.0], "bins": [-16.78125, -16.261962890625, -15.74267578125, -15.223388671875, -14.7041015625, -14.184814453125, -13.66552734375, -13.146240234375, -12.626953125, -12.107666015625, -11.58837890625, -11.069091796875, -10.5498046875, -10.030517578125, -9.51123046875, -8.991943359375, -8.47265625, -7.953369140625, -7.43408203125, -6.914794921875, -6.3955078125, -5.876220703125, -5.35693359375, -4.837646484375, -4.318359375, -3.799072265625, -3.27978515625, -2.760498046875, -2.2412109375, -1.721923828125, -1.20263671875, -0.683349609375, -0.1640625, 0.355224609375, 0.87451171875, 1.393798828125, 1.9130859375, 2.432373046875, 2.95166015625, 3.470947265625, 3.990234375, 4.509521484375, 5.02880859375, 5.548095703125, 6.0673828125, 6.586669921875, 7.10595703125, 7.625244140625, 8.14453125, 8.663818359375, 9.18310546875, 9.702392578125, 10.2216796875, 10.740966796875, 11.26025390625, 11.779541015625, 12.298828125, 12.818115234375, 13.33740234375, 13.856689453125, 14.3759765625, 14.895263671875, 15.41455078125, 15.933837890625, 16.453125]}, "gradients/decoder.transformer.h.2.attn.c_attn.bias": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 2.0, 0.0, 5.0, 6.0, 2.0, 2.0, 4.0, 9.0, 14.0, 9.0, 17.0, 12.0, 20.0, 22.0, 31.0, 35.0, 28.0, 34.0, 51.0, 50.0, 69.0, 121.0, 209.0, 1396.0, 277.0, 158.0, 93.0, 54.0, 46.0, 45.0, 40.0, 32.0, 27.0, 17.0, 25.0, 16.0, 11.0, 11.0, 9.0, 11.0, 10.0, 6.0, 0.0, 4.0, 6.0, 2.0, 6.0, 2.0, 3.0, 2.0, 1.0, 0.0, 1.0, 1.0], "bins": [-16.59375, -16.1019287109375, -15.610107421875, -15.1182861328125, -14.62646484375, -14.1346435546875, -13.642822265625, -13.1510009765625, -12.6591796875, -12.1673583984375, -11.675537109375, -11.1837158203125, -10.69189453125, -10.2000732421875, -9.708251953125, -9.2164306640625, -8.724609375, -8.2327880859375, -7.740966796875, -7.2491455078125, -6.75732421875, -6.2655029296875, -5.773681640625, -5.2818603515625, -4.7900390625, -4.2982177734375, -3.806396484375, -3.3145751953125, -2.82275390625, -2.3309326171875, -1.839111328125, -1.3472900390625, -0.85546875, -0.3636474609375, 0.128173828125, 0.6199951171875, 1.11181640625, 1.6036376953125, 2.095458984375, 2.5872802734375, 3.0791015625, 3.5709228515625, 4.062744140625, 4.5545654296875, 5.04638671875, 5.5382080078125, 6.030029296875, 6.5218505859375, 7.013671875, 7.5054931640625, 7.997314453125, 8.4891357421875, 8.98095703125, 9.4727783203125, 9.964599609375, 10.4564208984375, 10.9482421875, 11.4400634765625, 11.931884765625, 12.4237060546875, 12.91552734375, 13.4073486328125, 13.899169921875, 14.3909912109375, 14.8828125]}, "gradients/decoder.transformer.h.2.attn.c_attn.weight": {"_type": "histogram", "values": [2.0, 0.0, 2.0, 2.0, 0.0, 2.0, 2.0, 2.0, 6.0, 3.0, 4.0, 9.0, 5.0, 17.0, 21.0, 13.0, 20.0, 26.0, 33.0, 50.0, 82.0, 83.0, 141.0, 250.0, 434.0, 827.0, 1957.0, 11040.0, 3109439.0, 16920.0, 2208.0, 868.0, 436.0, 246.0, 171.0, 109.0, 72.0, 49.0, 46.0, 33.0, 24.0, 16.0, 12.0, 12.0, 5.0, 6.0, 4.0, 4.0, 3.0, 2.0, 3.0, 2.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-59.25, -57.1748046875, -55.099609375, -53.0244140625, -50.94921875, -48.8740234375, -46.798828125, -44.7236328125, -42.6484375, -40.5732421875, -38.498046875, -36.4228515625, -34.34765625, -32.2724609375, -30.197265625, -28.1220703125, -26.046875, -23.9716796875, -21.896484375, -19.8212890625, -17.74609375, -15.6708984375, -13.595703125, -11.5205078125, -9.4453125, -7.3701171875, -5.294921875, -3.2197265625, -1.14453125, 0.9306640625, 3.005859375, 5.0810546875, 7.15625, 9.2314453125, 11.306640625, 13.3818359375, 15.45703125, 17.5322265625, 19.607421875, 21.6826171875, 23.7578125, 25.8330078125, 27.908203125, 29.9833984375, 32.05859375, 34.1337890625, 36.208984375, 38.2841796875, 40.359375, 42.4345703125, 44.509765625, 46.5849609375, 48.66015625, 50.7353515625, 52.810546875, 54.8857421875, 56.9609375, 59.0361328125, 61.111328125, 63.1865234375, 65.26171875, 67.3369140625, 69.412109375, 71.4873046875, 73.5625]}, "gradients/decoder.transformer.h.2.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 6.0, 23.0, 137.0, 274.0, 299.0, 194.0, 62.0, 15.0, 1.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-176.9239959716797, -172.71615600585938, -168.50831604003906, -164.3004913330078, -160.0926513671875, -155.8848114013672, -151.67697143554688, -147.46914672851562, -143.2613067626953, -139.053466796875, -134.8456268310547, -130.63780212402344, -126.42996215820312, -122.22212219238281, -118.0142822265625, -113.80644989013672, -109.5986099243164, -105.3907699584961, -101.18293762207031, -96.97509765625, -92.76726531982422, -88.5594253540039, -84.35159301757812, -80.14375305175781, -75.9359130859375, -71.72807312011719, -67.5202407836914, -63.312400817871094, -59.10456848144531, -54.896728515625, -50.68889236450195, -46.481056213378906, -42.273231506347656, -38.06539535522461, -33.85755920410156, -29.649721145629883, -25.441884994506836, -21.23404884338379, -17.02621078491211, -12.818374633789062, -8.610538482666016, -4.4027018547058105, -0.19486522674560547, 4.012971878051758, 8.220808029174805, 12.428644180297852, 16.63648223876953, 20.844318389892578, 25.052154541015625, 29.259990692138672, 33.46782684326172, 37.67566680908203, 41.88349914550781, 46.091339111328125, 50.29917526245117, 54.50701141357422, 58.714847564697266, 62.92268371582031, 67.13052368164062, 71.3383560180664, 75.54619598388672, 79.7540283203125, 83.96186828613281, 88.16970825195312, 92.3775405883789]}, "gradients/decoder.transformer.h.2.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 5.0, 3.0, 3.0, 2.0, 2.0, 1.0, 2.0, 3.0, 5.0, 12.0, 8.0, 10.0, 17.0, 15.0, 18.0, 19.0, 14.0, 17.0, 17.0, 31.0, 25.0, 24.0, 27.0, 32.0, 33.0, 40.0, 50.0, 43.0, 34.0, 32.0, 34.0, 45.0, 43.0, 33.0, 35.0, 28.0, 35.0, 29.0, 28.0, 21.0, 20.0, 26.0, 20.0, 16.0, 11.0, 12.0, 6.0, 6.0, 5.0, 2.0, 6.0, 4.0, 2.0, 3.0, 1.0, 4.0, 1.0, 1.0, 1.0], "bins": [-46.40268325805664, -45.01646423339844, -43.630245208740234, -42.24402618408203, -40.857810974121094, -39.47159194946289, -38.08537292480469, -36.699153900146484, -35.31293487548828, -33.92671585083008, -32.540496826171875, -31.154279708862305, -29.768062591552734, -28.38184356689453, -26.995624542236328, -25.609405517578125, -24.223190307617188, -22.836971282958984, -21.450754165649414, -20.06453514099121, -18.67831802368164, -17.292098999023438, -15.905879974365234, -14.519661903381348, -13.133443832397461, -11.747225761413574, -10.361007690429688, -8.974788665771484, -7.588570594787598, -6.202352523803711, -4.816133499145508, -3.429915428161621, -2.043701171875, -0.6574828624725342, 0.7287354469299316, 2.1149539947509766, 3.5011720657348633, 4.88739013671875, 6.273609161376953, 7.65982723236084, 9.046045303344727, 10.432263374328613, 11.8184814453125, 13.204700469970703, 14.59091854095459, 15.977136611938477, 17.36335563659668, 18.74957275390625, 20.135791778564453, 21.522010803222656, 22.908227920532227, 24.29444694519043, 25.6806640625, 27.066883087158203, 28.453102111816406, 29.83932113647461, 31.22553825378418, 32.61175537109375, 33.99797439575195, 35.384193420410156, 36.77041244506836, 38.15663146972656, 39.5428466796875, 40.9290657043457, 42.315284729003906]}, "gradients/decoder.transformer.h.1.mlp.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 1.0, 2.0, 4.0, 2.0, 2.0, 3.0, 5.0, 3.0, 9.0, 8.0, 11.0, 15.0, 13.0, 16.0, 17.0, 14.0, 22.0, 24.0, 33.0, 22.0, 45.0, 48.0, 41.0, 32.0, 43.0, 44.0, 43.0, 44.0, 41.0, 46.0, 50.0, 35.0, 30.0, 39.0, 34.0, 26.0, 20.0, 22.0, 24.0, 13.0, 9.0, 14.0, 17.0, 5.0, 5.0, 3.0, 8.0, 2.0, 2.0, 3.0, 3.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.7421875, -6.52191162109375, -6.3016357421875, -6.08135986328125, -5.861083984375, -5.64080810546875, -5.4205322265625, -5.20025634765625, -4.97998046875, -4.75970458984375, -4.5394287109375, -4.31915283203125, -4.098876953125, -3.87860107421875, -3.6583251953125, -3.43804931640625, -3.2177734375, -2.99749755859375, -2.7772216796875, -2.55694580078125, -2.336669921875, -2.11639404296875, -1.8961181640625, -1.67584228515625, -1.45556640625, -1.23529052734375, -1.0150146484375, -0.79473876953125, -0.574462890625, -0.35418701171875, -0.1339111328125, 0.08636474609375, 0.306640625, 0.52691650390625, 0.7471923828125, 0.96746826171875, 1.187744140625, 1.40802001953125, 1.6282958984375, 1.84857177734375, 2.06884765625, 2.28912353515625, 2.5093994140625, 2.72967529296875, 2.949951171875, 3.17022705078125, 3.3905029296875, 3.61077880859375, 3.8310546875, 4.05133056640625, 4.2716064453125, 4.49188232421875, 4.712158203125, 4.93243408203125, 5.1527099609375, 5.37298583984375, 5.59326171875, 5.81353759765625, 6.0338134765625, 6.25408935546875, 6.474365234375, 6.69464111328125, 6.9149169921875, 7.13519287109375, 7.35546875]}, "gradients/decoder.transformer.h.1.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 2.0, 1.0, 3.0, 6.0, 18.0, 17.0, 28.0, 54.0, 83.0, 171.0, 327.0, 584.0, 1136.0, 2381.0, 5703.0, 17641.0, 104198.0, 1144163.0, 2486860.0, 375153.0, 39207.0, 9504.0, 3680.0, 1618.0, 794.0, 424.0, 234.0, 134.0, 73.0, 38.0, 22.0, 14.0, 6.0, 4.0, 8.0, 0.0, 3.0, 2.0, 1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-14.671875, -14.17041015625, -13.6689453125, -13.16748046875, -12.666015625, -12.16455078125, -11.6630859375, -11.16162109375, -10.66015625, -10.15869140625, -9.6572265625, -9.15576171875, -8.654296875, -8.15283203125, -7.6513671875, -7.14990234375, -6.6484375, -6.14697265625, -5.6455078125, -5.14404296875, -4.642578125, -4.14111328125, -3.6396484375, -3.13818359375, -2.63671875, -2.13525390625, -1.6337890625, -1.13232421875, -0.630859375, -0.12939453125, 0.3720703125, 0.87353515625, 1.375, 1.87646484375, 2.3779296875, 2.87939453125, 3.380859375, 3.88232421875, 4.3837890625, 4.88525390625, 5.38671875, 5.88818359375, 6.3896484375, 6.89111328125, 7.392578125, 7.89404296875, 8.3955078125, 8.89697265625, 9.3984375, 9.89990234375, 10.4013671875, 10.90283203125, 11.404296875, 11.90576171875, 12.4072265625, 12.90869140625, 13.41015625, 13.91162109375, 14.4130859375, 14.91455078125, 15.416015625, 15.91748046875, 16.4189453125, 16.92041015625, 17.421875]}, "gradients/decoder.transformer.h.1.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 3.0, 4.0, 3.0, 4.0, 8.0, 17.0, 19.0, 21.0, 26.0, 45.0, 68.0, 112.0, 142.0, 222.0, 258.0, 348.0, 470.0, 529.0, 492.0, 397.0, 273.0, 182.0, 138.0, 102.0, 67.0, 46.0, 26.0, 21.0, 18.0, 8.0, 6.0, 7.0, 5.0, 0.0, 2.0, 1.0], "bins": [-18.40625, -18.0013427734375, -17.596435546875, -17.1915283203125, -16.78662109375, -16.3817138671875, -15.976806640625, -15.5718994140625, -15.1669921875, -14.7620849609375, -14.357177734375, -13.9522705078125, -13.54736328125, -13.1424560546875, -12.737548828125, -12.3326416015625, -11.927734375, -11.5228271484375, -11.117919921875, -10.7130126953125, -10.30810546875, -9.9031982421875, -9.498291015625, -9.0933837890625, -8.6884765625, -8.2835693359375, -7.878662109375, -7.4737548828125, -7.06884765625, -6.6639404296875, -6.259033203125, -5.8541259765625, -5.44921875, -5.0443115234375, -4.639404296875, -4.2344970703125, -3.82958984375, -3.4246826171875, -3.019775390625, -2.6148681640625, -2.2099609375, -1.8050537109375, -1.400146484375, -0.9952392578125, -0.59033203125, -0.1854248046875, 0.219482421875, 0.6243896484375, 1.029296875, 1.4342041015625, 1.839111328125, 2.2440185546875, 2.64892578125, 3.0538330078125, 3.458740234375, 3.8636474609375, 4.2685546875, 4.6734619140625, 5.078369140625, 5.4832763671875, 5.88818359375, 6.2930908203125, 6.697998046875, 7.1029052734375, 7.5078125]}, "gradients/decoder.transformer.h.1.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 2.0, 2.0, 3.0, 3.0, 13.0, 16.0, 16.0, 30.0, 50.0, 84.0, 143.0, 250.0, 468.0, 1150.0, 3681.0, 33689.0, 2580226.0, 1546994.0, 22397.0, 3034.0, 1006.0, 440.0, 232.0, 139.0, 89.0, 49.0, 26.0, 21.0, 17.0, 6.0, 7.0, 1.0, 4.0, 4.0, 1.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-19.78125, -18.83642578125, -17.8916015625, -16.94677734375, -16.001953125, -15.05712890625, -14.1123046875, -13.16748046875, -12.22265625, -11.27783203125, -10.3330078125, -9.38818359375, -8.443359375, -7.49853515625, -6.5537109375, -5.60888671875, -4.6640625, -3.71923828125, -2.7744140625, -1.82958984375, -0.884765625, 0.06005859375, 1.0048828125, 1.94970703125, 2.89453125, 3.83935546875, 4.7841796875, 5.72900390625, 6.673828125, 7.61865234375, 8.5634765625, 9.50830078125, 10.453125, 11.39794921875, 12.3427734375, 13.28759765625, 14.232421875, 15.17724609375, 16.1220703125, 17.06689453125, 18.01171875, 18.95654296875, 19.9013671875, 20.84619140625, 21.791015625, 22.73583984375, 23.6806640625, 24.62548828125, 25.5703125, 26.51513671875, 27.4599609375, 28.40478515625, 29.349609375, 30.29443359375, 31.2392578125, 32.18408203125, 33.12890625, 34.07373046875, 35.0185546875, 35.96337890625, 36.908203125, 37.85302734375, 38.7978515625, 39.74267578125, 40.6875]}, "gradients/decoder.transformer.h.1.ln_2.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 8.0, 304.0, 647.0, 54.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-136.85989379882812, -124.69395446777344, -112.52802276611328, -100.36209106445312, -88.19615173339844, -76.03021240234375, -63.864280700683594, -51.69834899902344, -39.53240966796875, -27.366474151611328, -15.200538635253906, -3.0346031188964844, 9.131332397460938, 21.29726791381836, 33.46320343017578, 45.62913513183594, 57.795074462890625, 69.96101379394531, 82.12694549560547, 94.29287719726562, 106.45881652832031, 118.624755859375, 130.79067993164062, 142.9566192626953, 155.12255859375, 167.2884979248047, 179.45443725585938, 191.620361328125, 203.7863006591797, 215.95223999023438, 228.1181640625, 240.2841033935547, 252.45001220703125, 264.6159362792969, 276.7818908691406, 288.94781494140625, 301.11376953125, 313.2796936035156, 325.44561767578125, 337.611572265625, 349.7774963378906, 361.94342041015625, 374.109375, 386.2752990722656, 398.44122314453125, 410.607177734375, 422.7731018066406, 434.93902587890625, 447.10498046875, 459.2709045410156, 471.4368591308594, 483.602783203125, 495.76873779296875, 507.9346618652344, 520.1005859375, 532.2665405273438, 544.4324951171875, 556.5984497070312, 568.7643432617188, 580.9302978515625, 593.0962524414062, 605.2621459960938, 617.4281005859375, 629.5940551757812, 641.7599487304688]}, "gradients/decoder.transformer.h.1.ln_2.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 5.0, 4.0, 4.0, 6.0, 11.0, 5.0, 7.0, 12.0, 10.0, 13.0, 18.0, 21.0, 22.0, 22.0, 29.0, 27.0, 27.0, 36.0, 42.0, 36.0, 38.0, 40.0, 48.0, 42.0, 44.0, 46.0, 33.0, 28.0, 46.0, 44.0, 20.0, 30.0, 27.0, 30.0, 21.0, 15.0, 17.0, 16.0, 20.0, 13.0, 4.0, 9.0, 9.0, 4.0, 3.0, 2.0, 2.0, 2.0, 0.0, 4.0, 0.0, 1.0, 3.0, 2.0], "bins": [-42.56147003173828, -41.305564880371094, -40.04965591430664, -38.79375076293945, -37.537845611572266, -36.28194046020508, -35.026031494140625, -33.77012634277344, -32.51422119140625, -31.25831413269043, -30.002408981323242, -28.746501922607422, -27.490596771240234, -26.234689712524414, -24.978782653808594, -23.722877502441406, -22.466970443725586, -21.211063385009766, -19.955158233642578, -18.699251174926758, -17.44334602355957, -16.18743896484375, -14.931532859802246, -13.675626754760742, -12.419720649719238, -11.163814544677734, -9.90790843963623, -8.652002334594727, -7.3960957527160645, -6.1401896476745605, -4.884283065795898, -3.6283769607543945, -2.3724708557128906, -1.1165646314620972, 0.1393415927886963, 1.3952479362487793, 2.651154041290283, 3.907060146331787, 5.162966728210449, 6.418872833251953, 7.674778938293457, 8.930685043334961, 10.186591148376465, 11.442497253417969, 12.698404312133789, 13.954309463500977, 15.210216522216797, 16.466121673583984, 17.722028732299805, 18.977935791015625, 20.233840942382812, 21.489748001098633, 22.74565315246582, 24.00156021118164, 25.257465362548828, 26.51337242126465, 27.76927947998047, 29.02518653869629, 30.281091690063477, 31.536998748779297, 32.792903900146484, 34.04880905151367, 35.304718017578125, 36.56062316894531, 37.8165283203125]}, "gradients/decoder.transformer.h.1.crossattention.c_proj.bias": {"_type": "histogram", "values": [3.0, 1.0, 1.0, 1.0, 1.0, 2.0, 7.0, 4.0, 3.0, 8.0, 6.0, 12.0, 8.0, 11.0, 6.0, 10.0, 12.0, 16.0, 21.0, 16.0, 20.0, 27.0, 28.0, 31.0, 37.0, 36.0, 43.0, 43.0, 40.0, 43.0, 52.0, 47.0, 36.0, 43.0, 31.0, 41.0, 39.0, 36.0, 33.0, 31.0, 17.0, 12.0, 17.0, 13.0, 23.0, 11.0, 9.0, 7.0, 5.0, 7.0, 4.0, 3.0, 1.0, 4.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.93359375, -4.76898193359375, -4.6043701171875, -4.43975830078125, -4.275146484375, -4.11053466796875, -3.9459228515625, -3.78131103515625, -3.61669921875, -3.45208740234375, -3.2874755859375, -3.12286376953125, -2.958251953125, -2.79364013671875, -2.6290283203125, -2.46441650390625, -2.2998046875, -2.13519287109375, -1.9705810546875, -1.80596923828125, -1.641357421875, -1.47674560546875, -1.3121337890625, -1.14752197265625, -0.98291015625, -0.81829833984375, -0.6536865234375, -0.48907470703125, -0.324462890625, -0.15985107421875, 0.0047607421875, 0.16937255859375, 0.333984375, 0.49859619140625, 0.6632080078125, 0.82781982421875, 0.992431640625, 1.15704345703125, 1.3216552734375, 1.48626708984375, 1.65087890625, 1.81549072265625, 1.9801025390625, 2.14471435546875, 2.309326171875, 2.47393798828125, 2.6385498046875, 2.80316162109375, 2.9677734375, 3.13238525390625, 3.2969970703125, 3.46160888671875, 3.626220703125, 3.79083251953125, 3.9554443359375, 4.12005615234375, 4.28466796875, 4.44927978515625, 4.6138916015625, 4.77850341796875, 4.943115234375, 5.10772705078125, 5.2723388671875, 5.43695068359375, 5.6015625]}, "gradients/decoder.transformer.h.1.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 2.0, 4.0, 4.0, 9.0, 11.0, 11.0, 25.0, 29.0, 55.0, 68.0, 136.0, 214.0, 245.0, 321.0, 531.0, 705.0, 1046.0, 1590.0, 2387.0, 3268.0, 5032.0, 7520.0, 11018.0, 16601.0, 24360.0, 36936.0, 54771.0, 79138.0, 109274.0, 138176.0, 144773.0, 122046.0, 91667.0, 63848.0, 43987.0, 29301.0, 19442.0, 12943.0, 8567.0, 5855.0, 3938.0, 2769.0, 1894.0, 1304.0, 810.0, 634.0, 389.0, 304.0, 200.0, 138.0, 88.0, 64.0, 42.0, 30.0, 22.0, 10.0, 5.0, 6.0, 2.0, 4.0, 5.0], "bins": [-0.52978515625, -0.5138320922851562, -0.4978790283203125, -0.48192596435546875, -0.465972900390625, -0.45001983642578125, -0.4340667724609375, -0.41811370849609375, -0.40216064453125, -0.38620758056640625, -0.3702545166015625, -0.35430145263671875, -0.338348388671875, -0.32239532470703125, -0.3064422607421875, -0.29048919677734375, -0.2745361328125, -0.25858306884765625, -0.2426300048828125, -0.22667694091796875, -0.210723876953125, -0.19477081298828125, -0.1788177490234375, -0.16286468505859375, -0.14691162109375, -0.13095855712890625, -0.1150054931640625, -0.09905242919921875, -0.083099365234375, -0.06714630126953125, -0.0511932373046875, -0.03524017333984375, -0.019287109375, -0.00333404541015625, 0.0126190185546875, 0.02857208251953125, 0.044525146484375, 0.06047821044921875, 0.0764312744140625, 0.09238433837890625, 0.10833740234375, 0.12429046630859375, 0.1402435302734375, 0.15619659423828125, 0.172149658203125, 0.18810272216796875, 0.2040557861328125, 0.22000885009765625, 0.2359619140625, 0.25191497802734375, 0.2678680419921875, 0.28382110595703125, 0.299774169921875, 0.31572723388671875, 0.3316802978515625, 0.34763336181640625, 0.36358642578125, 0.37953948974609375, 0.3954925537109375, 0.41144561767578125, 0.427398681640625, 0.44335174560546875, 0.4593048095703125, 0.47525787353515625, 0.4912109375]}, "gradients/decoder.transformer.h.1.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 1.0, 2.0, 4.0, 1.0, 5.0, 9.0, 5.0, 12.0, 6.0, 6.0, 12.0, 13.0, 22.0, 27.0, 19.0, 29.0, 27.0, 26.0, 43.0, 39.0, 48.0, 33.0, 46.0, 38.0, 1065.0, 48.0, 45.0, 39.0, 34.0, 30.0, 39.0, 31.0, 39.0, 35.0, 26.0, 32.0, 14.0, 11.0, 12.0, 10.0, 17.0, 9.0, 12.0, 5.0, 2.0, 2.0, 1.0, 1.0, 4.0, 1.0, 0.0, 2.0, 1.0, 1.0, 2.0], "bins": [-3.798828125, -3.685089111328125, -3.57135009765625, -3.457611083984375, -3.3438720703125, -3.230133056640625, -3.11639404296875, -3.002655029296875, -2.888916015625, -2.775177001953125, -2.66143798828125, -2.547698974609375, -2.4339599609375, -2.320220947265625, -2.20648193359375, -2.092742919921875, -1.97900390625, -1.865264892578125, -1.75152587890625, -1.637786865234375, -1.5240478515625, -1.410308837890625, -1.29656982421875, -1.182830810546875, -1.069091796875, -0.955352783203125, -0.84161376953125, -0.727874755859375, -0.6141357421875, -0.500396728515625, -0.38665771484375, -0.272918701171875, -0.1591796875, -0.045440673828125, 0.06829833984375, 0.182037353515625, 0.2957763671875, 0.409515380859375, 0.52325439453125, 0.636993408203125, 0.750732421875, 0.864471435546875, 0.97821044921875, 1.091949462890625, 1.2056884765625, 1.319427490234375, 1.43316650390625, 1.546905517578125, 1.66064453125, 1.774383544921875, 1.88812255859375, 2.001861572265625, 2.1156005859375, 2.229339599609375, 2.34307861328125, 2.456817626953125, 2.570556640625, 2.684295654296875, 2.79803466796875, 2.911773681640625, 3.0255126953125, 3.139251708984375, 3.25299072265625, 3.366729736328125, 3.48046875]}, "gradients/decoder.transformer.h.1.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 5.0, 1.0, 3.0, 6.0, 24.0, 24.0, 36.0, 45.0, 79.0, 121.0, 168.0, 314.0, 407.0, 679.0, 1061.0, 1631.0, 2546.0, 3959.0, 6613.0, 10285.0, 16459.0, 26198.0, 40201.0, 60862.0, 90007.0, 123793.0, 1093859.0, 251701.0, 118902.0, 85366.0, 57879.0, 37934.0, 24233.0, 15553.0, 9800.0, 6125.0, 3701.0, 2413.0, 1500.0, 908.0, 589.0, 386.0, 260.0, 173.0, 116.0, 66.0, 50.0, 40.0, 17.0, 17.0, 9.0, 10.0, 3.0, 4.0, 3.0, 0.0, 1.0, 3.0], "bins": [-0.42822265625, -0.4152183532714844, -0.40221405029296875, -0.3892097473144531, -0.3762054443359375, -0.3632011413574219, -0.35019683837890625, -0.3371925354003906, -0.324188232421875, -0.3111839294433594, -0.29817962646484375, -0.2851753234863281, -0.2721710205078125, -0.2591667175292969, -0.24616241455078125, -0.23315811157226562, -0.22015380859375, -0.20714950561523438, -0.19414520263671875, -0.18114089965820312, -0.1681365966796875, -0.15513229370117188, -0.14212799072265625, -0.12912368774414062, -0.116119384765625, -0.10311508178710938, -0.09011077880859375, -0.07710647583007812, -0.0641021728515625, -0.051097869873046875, -0.03809356689453125, -0.025089263916015625, -0.0120849609375, 0.000919342041015625, 0.01392364501953125, 0.026927947998046875, 0.0399322509765625, 0.052936553955078125, 0.06594085693359375, 0.07894515991210938, 0.091949462890625, 0.10495376586914062, 0.11795806884765625, 0.13096237182617188, 0.1439666748046875, 0.15697097778320312, 0.16997528076171875, 0.18297958374023438, 0.19598388671875, 0.20898818969726562, 0.22199249267578125, 0.23499679565429688, 0.2480010986328125, 0.2610054016113281, 0.27400970458984375, 0.2870140075683594, 0.300018310546875, 0.3130226135253906, 0.32602691650390625, 0.3390312194824219, 0.3520355224609375, 0.3650398254394531, 0.37804412841796875, 0.3910484313964844, 0.404052734375]}, "gradients/decoder.transformer.h.1.crossattention.q_attn.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 2.0, 1.0, 2.0, 0.0, 3.0, 0.0, 3.0, 6.0, 3.0, 5.0, 7.0, 12.0, 6.0, 15.0, 15.0, 15.0, 27.0, 34.0, 40.0, 64.0, 61.0, 85.0, 76.0, 81.0, 83.0, 72.0, 66.0, 42.0, 43.0, 38.0, 25.0, 14.0, 17.0, 10.0, 7.0, 4.0, 5.0, 6.0, 2.0, 4.0, 3.0, 3.0, 3.0, 1.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.01157379150390625, -0.011189818382263184, -0.010805845260620117, -0.01042187213897705, -0.010037899017333984, -0.009653925895690918, -0.009269952774047852, -0.008885979652404785, -0.008502006530761719, -0.008118033409118652, -0.007734060287475586, -0.0073500871658325195, -0.006966114044189453, -0.006582140922546387, -0.00619816780090332, -0.005814194679260254, -0.0054302215576171875, -0.005046248435974121, -0.004662275314331055, -0.004278302192687988, -0.003894329071044922, -0.0035103559494018555, -0.003126382827758789, -0.0027424097061157227, -0.0023584365844726562, -0.00197446346282959, -0.0015904903411865234, -0.001206517219543457, -0.0008225440979003906, -0.0004385709762573242, -5.459785461425781e-05, 0.0003293752670288086, 0.000713348388671875, 0.0010973215103149414, 0.0014812946319580078, 0.0018652677536010742, 0.0022492408752441406, 0.002633213996887207, 0.0030171871185302734, 0.00340116024017334, 0.0037851333618164062, 0.004169106483459473, 0.004553079605102539, 0.0049370527267456055, 0.005321025848388672, 0.005704998970031738, 0.006088972091674805, 0.006472945213317871, 0.0068569183349609375, 0.007240891456604004, 0.00762486457824707, 0.008008837699890137, 0.008392810821533203, 0.00877678394317627, 0.009160757064819336, 0.009544730186462402, 0.009928703308105469, 0.010312676429748535, 0.010696649551391602, 0.011080622673034668, 0.011464595794677734, 0.0118485689163208, 0.012232542037963867, 0.012616515159606934, 0.01300048828125]}, "gradients/decoder.transformer.h.1.crossattention.q_attn.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 2.0, 2.0, 2.0, 1.0, 6.0, 8.0, 9.0, 7.0, 13.0, 13.0, 11.0, 15.0, 27.0, 37.0, 49.0, 74.0, 88.0, 135.0, 211.0, 369.0, 622.0, 1564.0, 956303.0, 86514.0, 1053.0, 513.0, 282.0, 184.0, 131.0, 71.0, 66.0, 33.0, 35.0, 21.0, 27.0, 16.0, 8.0, 10.0, 8.0, 7.0, 1.0, 3.0, 5.0, 3.0, 1.0, 2.0, 4.0, 1.0, 0.0, 1.0, 0.0, 1.0, 2.0], "bins": [-0.227783203125, -0.2210559844970703, -0.21432876586914062, -0.20760154724121094, -0.20087432861328125, -0.19414710998535156, -0.18741989135742188, -0.1806926727294922, -0.1739654541015625, -0.1672382354736328, -0.16051101684570312, -0.15378379821777344, -0.14705657958984375, -0.14032936096191406, -0.13360214233398438, -0.1268749237060547, -0.120147705078125, -0.11342048645019531, -0.10669326782226562, -0.09996604919433594, -0.09323883056640625, -0.08651161193847656, -0.07978439331054688, -0.07305717468261719, -0.0663299560546875, -0.05960273742675781, -0.052875518798828125, -0.04614830017089844, -0.03942108154296875, -0.03269386291503906, -0.025966644287109375, -0.019239425659179688, -0.01251220703125, -0.0057849884033203125, 0.000942230224609375, 0.0076694488525390625, 0.01439666748046875, 0.021123886108398438, 0.027851104736328125, 0.03457832336425781, 0.0413055419921875, 0.04803276062011719, 0.054759979248046875, 0.06148719787597656, 0.06821441650390625, 0.07494163513183594, 0.08166885375976562, 0.08839607238769531, 0.095123291015625, 0.10185050964355469, 0.10857772827148438, 0.11530494689941406, 0.12203216552734375, 0.12875938415527344, 0.13548660278320312, 0.1422138214111328, 0.1489410400390625, 0.1556682586669922, 0.16239547729492188, 0.16912269592285156, 0.17584991455078125, 0.18257713317871094, 0.18930435180664062, 0.1960315704345703, 0.2027587890625]}, "gradients/decoder.transformer.h.1.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 726.0, 286.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.18916285037994385, -0.184042289853096, -0.17892172932624817, -0.17380118370056152, -0.16868062317371368, -0.16356006264686584, -0.1584395170211792, -0.15331895649433136, -0.14819839596748352, -0.14307783544063568, -0.13795727491378784, -0.1328367292881012, -0.12771616876125336, -0.12259560823440552, -0.11747505515813828, -0.11235450208187103, -0.1072339415550232, -0.10211338102817535, -0.09699282795190811, -0.09187227487564087, -0.08675171434879303, -0.08163115382194519, -0.07651060074567795, -0.0713900476694107, -0.06626948714256287, -0.061148930341005325, -0.056028373539447784, -0.050907816737890244, -0.0457872599363327, -0.04066670313477516, -0.03554614633321762, -0.03042558953166008, -0.02530503273010254, -0.020184475928544998, -0.015063919126987457, -0.009943362325429916, -0.0048228055238723755, 0.0002977512776851654, 0.005418308079242706, 0.010538864880800247, 0.015659421682357788, 0.02077997848391533, 0.02590053528547287, 0.03102109208703041, 0.03614164888858795, 0.04126220569014549, 0.04638276249170303, 0.051503319293260574, 0.056623876094818115, 0.061744432896375656, 0.0668649896979332, 0.07198554277420044, 0.07710610330104828, 0.08222666382789612, 0.08734721690416336, 0.0924677699804306, 0.09758833050727844, 0.10270889103412628, 0.10782944411039352, 0.11294999718666077, 0.1180705577135086, 0.12319111824035645, 0.1283116638660431, 0.13343222439289093, 0.13855278491973877]}, "gradients/decoder.transformer.h.1.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 4.0, 8.0, 5.0, 5.0, 8.0, 17.0, 27.0, 22.0, 42.0, 54.0, 34.0, 60.0, 52.0, 65.0, 71.0, 63.0, 50.0, 70.0, 73.0, 36.0, 56.0, 45.0, 35.0, 27.0, 19.0, 21.0, 13.0, 6.0, 4.0, 6.0, 5.0, 9.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0], "bins": [-0.018456220626831055, -0.017981894314289093, -0.01750756800174713, -0.01703324168920517, -0.016558915376663208, -0.016084589064121246, -0.015610262751579285, -0.015135936439037323, -0.014661610126495361, -0.0141872838139534, -0.013712957501411438, -0.013238631188869476, -0.012764304876327515, -0.012289978563785553, -0.011815652251243591, -0.01134132593870163, -0.010866999626159668, -0.010392673313617706, -0.009918347001075745, -0.009444020688533783, -0.008969694375991821, -0.00849536806344986, -0.008021041750907898, -0.007546715438365936, -0.007072389125823975, -0.006598062813282013, -0.006123736500740051, -0.00564941018819809, -0.005175083875656128, -0.004700757563114166, -0.004226431250572205, -0.003752104938030243, -0.0032777786254882812, -0.0028034523129463196, -0.002329126000404358, -0.0018547996878623962, -0.0013804733753204346, -0.0009061470627784729, -0.00043182075023651123, 4.250556230545044e-05, 0.0005168318748474121, 0.0009911581873893738, 0.0014654844999313354, 0.0019398108124732971, 0.002414137125015259, 0.0028884634375572205, 0.003362789750099182, 0.003837116062641144, 0.0043114423751831055, 0.004785768687725067, 0.005260095000267029, 0.0057344213128089905, 0.006208747625350952, 0.006683073937892914, 0.0071574002504348755, 0.007631726562976837, 0.008106052875518799, 0.00858037918806076, 0.009054705500602722, 0.009529031813144684, 0.010003358125686646, 0.010477684438228607, 0.010952010750770569, 0.01142633706331253, 0.011900663375854492]}, "gradients/decoder.transformer.h.1.attn.c_proj.bias": {"_type": "histogram", "values": [3.0, 1.0, 1.0, 1.0, 1.0, 2.0, 7.0, 4.0, 3.0, 7.0, 7.0, 12.0, 8.0, 11.0, 6.0, 10.0, 12.0, 16.0, 21.0, 16.0, 20.0, 27.0, 28.0, 31.0, 37.0, 36.0, 43.0, 43.0, 40.0, 43.0, 52.0, 47.0, 36.0, 43.0, 32.0, 41.0, 38.0, 37.0, 32.0, 31.0, 17.0, 12.0, 17.0, 13.0, 23.0, 11.0, 9.0, 7.0, 5.0, 7.0, 4.0, 3.0, 1.0, 4.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.93359375, -4.76898193359375, -4.6043701171875, -4.43975830078125, -4.275146484375, -4.11053466796875, -3.9459228515625, -3.78131103515625, -3.61669921875, -3.45208740234375, -3.2874755859375, -3.12286376953125, -2.958251953125, -2.79364013671875, -2.6290283203125, -2.46441650390625, -2.2998046875, -2.13519287109375, -1.9705810546875, -1.80596923828125, -1.641357421875, -1.47674560546875, -1.3121337890625, -1.14752197265625, -0.98291015625, -0.81829833984375, -0.6536865234375, -0.48907470703125, -0.324462890625, -0.15985107421875, 0.0047607421875, 0.16937255859375, 0.333984375, 0.49859619140625, 0.6632080078125, 0.82781982421875, 0.992431640625, 1.15704345703125, 1.3216552734375, 1.48626708984375, 1.65087890625, 1.81549072265625, 1.9801025390625, 2.14471435546875, 2.309326171875, 2.47393798828125, 2.6385498046875, 2.80316162109375, 2.9677734375, 3.13238525390625, 3.2969970703125, 3.46160888671875, 3.626220703125, 3.79083251953125, 3.9554443359375, 4.12005615234375, 4.28466796875, 4.44927978515625, 4.6138916015625, 4.77850341796875, 4.943115234375, 5.10772705078125, 5.2723388671875, 5.43695068359375, 5.6015625]}, "gradients/decoder.transformer.h.1.attn.c_proj.weight": {"_type": "histogram", "values": [2.0, 2.0, 0.0, 2.0, 3.0, 8.0, 6.0, 9.0, 12.0, 14.0, 21.0, 31.0, 31.0, 40.0, 26.0, 51.0, 70.0, 77.0, 114.0, 173.0, 196.0, 253.0, 400.0, 493.0, 764.0, 1291.0, 2383.0, 5482.0, 17218.0, 72136.0, 360406.0, 453109.0, 97799.0, 22187.0, 6727.0, 2706.0, 1401.0, 781.0, 529.0, 412.0, 282.0, 197.0, 160.0, 131.0, 117.0, 82.0, 58.0, 45.0, 46.0, 26.0, 19.0, 11.0, 7.0, 8.0, 8.0, 4.0, 1.0, 1.0, 2.0, 0.0, 4.0, 0.0, 0.0, 2.0], "bins": [-8.453125, -8.1812744140625, -7.909423828125, -7.6375732421875, -7.36572265625, -7.0938720703125, -6.822021484375, -6.5501708984375, -6.2783203125, -6.0064697265625, -5.734619140625, -5.4627685546875, -5.19091796875, -4.9190673828125, -4.647216796875, -4.3753662109375, -4.103515625, -3.8316650390625, -3.559814453125, -3.2879638671875, -3.01611328125, -2.7442626953125, -2.472412109375, -2.2005615234375, -1.9287109375, -1.6568603515625, -1.385009765625, -1.1131591796875, -0.84130859375, -0.5694580078125, -0.297607421875, -0.0257568359375, 0.24609375, 0.5179443359375, 0.789794921875, 1.0616455078125, 1.33349609375, 1.6053466796875, 1.877197265625, 2.1490478515625, 2.4208984375, 2.6927490234375, 2.964599609375, 3.2364501953125, 3.50830078125, 3.7801513671875, 4.052001953125, 4.3238525390625, 4.595703125, 4.8675537109375, 5.139404296875, 5.4112548828125, 5.68310546875, 5.9549560546875, 6.226806640625, 6.4986572265625, 6.7705078125, 7.0423583984375, 7.314208984375, 7.5860595703125, 7.85791015625, 8.1297607421875, 8.401611328125, 8.6734619140625, 8.9453125]}, "gradients/decoder.transformer.h.1.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 3.0, 3.0, 5.0, 2.0, 7.0, 8.0, 5.0, 4.0, 13.0, 13.0, 19.0, 21.0, 25.0, 27.0, 36.0, 38.0, 50.0, 66.0, 101.0, 151.0, 399.0, 1460.0, 164.0, 100.0, 64.0, 49.0, 53.0, 28.0, 29.0, 25.0, 14.0, 15.0, 15.0, 7.0, 5.0, 9.0, 4.0, 7.0, 1.0, 5.0, 2.0, 1.0, 2.0, 2.0, 1.0, 1.0, 1.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 2.0], "bins": [-16.484375, -15.953125, -15.421875, -14.890625, -14.359375, -13.828125, -13.296875, -12.765625, -12.234375, -11.703125, -11.171875, -10.640625, -10.109375, -9.578125, -9.046875, -8.515625, -7.984375, -7.453125, -6.921875, -6.390625, -5.859375, -5.328125, -4.796875, -4.265625, -3.734375, -3.203125, -2.671875, -2.140625, -1.609375, -1.078125, -0.546875, -0.015625, 0.515625, 1.046875, 1.578125, 2.109375, 2.640625, 3.171875, 3.703125, 4.234375, 4.765625, 5.296875, 5.828125, 6.359375, 6.890625, 7.421875, 7.953125, 8.484375, 9.015625, 9.546875, 10.078125, 10.609375, 11.140625, 11.671875, 12.203125, 12.734375, 13.265625, 13.796875, 14.328125, 14.859375, 15.390625, 15.921875, 16.453125, 16.984375, 17.515625]}, "gradients/decoder.transformer.h.1.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 4.0, 1.0, 1.0, 2.0, 3.0, 1.0, 2.0, 1.0, 6.0, 6.0, 9.0, 18.0, 6.0, 19.0, 29.0, 41.0, 48.0, 82.0, 131.0, 134.0, 238.0, 552.0, 1806.0, 25748.0, 3095216.0, 18678.0, 1575.0, 514.0, 254.0, 183.0, 111.0, 85.0, 58.0, 49.0, 21.0, 27.0, 16.0, 11.0, 9.0, 6.0, 4.0, 6.0, 4.0, 2.0, 0.0, 3.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-38.9375, -37.77392578125, -36.6103515625, -35.44677734375, -34.283203125, -33.11962890625, -31.9560546875, -30.79248046875, -29.62890625, -28.46533203125, -27.3017578125, -26.13818359375, -24.974609375, -23.81103515625, -22.6474609375, -21.48388671875, -20.3203125, -19.15673828125, -17.9931640625, -16.82958984375, -15.666015625, -14.50244140625, -13.3388671875, -12.17529296875, -11.01171875, -9.84814453125, -8.6845703125, -7.52099609375, -6.357421875, -5.19384765625, -4.0302734375, -2.86669921875, -1.703125, -0.53955078125, 0.6240234375, 1.78759765625, 2.951171875, 4.11474609375, 5.2783203125, 6.44189453125, 7.60546875, 8.76904296875, 9.9326171875, 11.09619140625, 12.259765625, 13.42333984375, 14.5869140625, 15.75048828125, 16.9140625, 18.07763671875, 19.2412109375, 20.40478515625, 21.568359375, 22.73193359375, 23.8955078125, 25.05908203125, 26.22265625, 27.38623046875, 28.5498046875, 29.71337890625, 30.876953125, 32.04052734375, 33.2041015625, 34.36767578125, 35.53125]}, "gradients/decoder.transformer.h.1.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 59.0, 455.0, 459.0, 35.0, 4.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-68.66342163085938, -64.0647964477539, -59.46617126464844, -54.8675422668457, -50.268917083740234, -45.670291900634766, -41.07166290283203, -36.47303771972656, -31.874412536621094, -27.275787353515625, -22.677160263061523, -18.078533172607422, -13.479907989501953, -8.881282806396484, -4.282655715942383, 0.31597137451171875, 4.9145965576171875, 9.513222694396973, 14.111848831176758, 18.71047592163086, 23.309101104736328, 27.907726287841797, 32.50635528564453, 37.10498046875, 41.70360565185547, 46.30223083496094, 50.900856018066406, 55.49948501586914, 60.09811019897461, 64.69673156738281, 69.29536437988281, 73.89398956298828, 78.49263000488281, 83.09125518798828, 87.68988037109375, 92.28850555419922, 96.88713073730469, 101.48576354980469, 106.08438873291016, 110.68301391601562, 115.2816390991211, 119.88026428222656, 124.47888946533203, 129.0775146484375, 133.6761474609375, 138.27476501464844, 142.87339782714844, 147.47201538085938, 152.07064819335938, 156.66928100585938, 161.2678985595703, 165.8665313720703, 170.46514892578125, 175.06378173828125, 179.6623992919922, 184.2610321044922, 188.85964965820312, 193.45828247070312, 198.05690002441406, 202.65553283691406, 207.254150390625, 211.852783203125, 216.45140075683594, 221.05003356933594, 225.64866638183594]}, "gradients/decoder.transformer.h.1.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 1.0, 2.0, 3.0, 10.0, 4.0, 5.0, 6.0, 9.0, 11.0, 14.0, 21.0, 25.0, 18.0, 26.0, 31.0, 36.0, 28.0, 38.0, 46.0, 44.0, 49.0, 37.0, 40.0, 40.0, 30.0, 36.0, 39.0, 40.0, 41.0, 42.0, 32.0, 36.0, 28.0, 28.0, 15.0, 15.0, 22.0, 14.0, 10.0, 10.0, 4.0, 8.0, 5.0, 4.0, 6.0, 2.0, 4.0, 0.0, 0.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-31.162355422973633, -30.059326171875, -28.956296920776367, -27.853267669677734, -26.75023651123047, -25.64720916748047, -24.544178009033203, -23.44114875793457, -22.338119506835938, -21.235090255737305, -20.132061004638672, -19.02903175354004, -17.926002502441406, -16.82297134399414, -15.719942092895508, -14.616912841796875, -13.513883590698242, -12.41085433959961, -11.307825088500977, -10.204794883728027, -9.101765632629395, -7.998736381530762, -6.895706653594971, -5.79267692565918, -4.689647674560547, -3.586618185043335, -2.483588695526123, -1.3805592060089111, -0.2775297164916992, 0.8254995346069336, 1.9285292625427246, 3.0315589904785156, 4.134590148925781, 5.237619400024414, 6.340649127960205, 7.443678855895996, 8.546708106994629, 9.649737358093262, 10.752767562866211, 11.855796813964844, 12.958826065063477, 14.06185531616211, 15.164884567260742, 16.267913818359375, 17.37094497680664, 18.47397232055664, 19.577003479003906, 20.68003273010254, 21.783061981201172, 22.886091232299805, 23.989120483398438, 25.09214973449707, 26.195178985595703, 27.29821014404297, 28.4012393951416, 29.504268646240234, 30.607297897338867, 31.7103271484375, 32.813358306884766, 33.916385650634766, 35.01941680908203, 36.12244415283203, 37.2254753112793, 38.32850646972656, 39.43153381347656]}, "gradients/decoder.transformer.h.0.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 3.0, 3.0, 2.0, 1.0, 3.0, 4.0, 2.0, 4.0, 5.0, 10.0, 5.0, 9.0, 8.0, 14.0, 8.0, 9.0, 18.0, 16.0, 18.0, 20.0, 21.0, 26.0, 38.0, 30.0, 42.0, 34.0, 34.0, 38.0, 49.0, 46.0, 48.0, 50.0, 42.0, 34.0, 27.0, 37.0, 34.0, 26.0, 23.0, 27.0, 19.0, 21.0, 16.0, 14.0, 14.0, 11.0, 8.0, 6.0, 11.0, 6.0, 6.0, 7.0, 2.0, 4.0, 3.0, 1.0, 2.0, 2.0, 0.0, 1.0], "bins": [-6.2265625, -6.04150390625, -5.8564453125, -5.67138671875, -5.486328125, -5.30126953125, -5.1162109375, -4.93115234375, -4.74609375, -4.56103515625, -4.3759765625, -4.19091796875, -4.005859375, -3.82080078125, -3.6357421875, -3.45068359375, -3.265625, -3.08056640625, -2.8955078125, -2.71044921875, -2.525390625, -2.34033203125, -2.1552734375, -1.97021484375, -1.78515625, -1.60009765625, -1.4150390625, -1.22998046875, -1.044921875, -0.85986328125, -0.6748046875, -0.48974609375, -0.3046875, -0.11962890625, 0.0654296875, 0.25048828125, 0.435546875, 0.62060546875, 0.8056640625, 0.99072265625, 1.17578125, 1.36083984375, 1.5458984375, 1.73095703125, 1.916015625, 2.10107421875, 2.2861328125, 2.47119140625, 2.65625, 2.84130859375, 3.0263671875, 3.21142578125, 3.396484375, 3.58154296875, 3.7666015625, 3.95166015625, 4.13671875, 4.32177734375, 4.5068359375, 4.69189453125, 4.876953125, 5.06201171875, 5.2470703125, 5.43212890625, 5.6171875]}, "gradients/decoder.transformer.h.0.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 4.0, 0.0, 0.0, 3.0, 5.0, 5.0, 5.0, 6.0, 5.0, 10.0, 15.0, 13.0, 16.0, 24.0, 25.0, 41.0, 63.0, 81.0, 108.0, 146.0, 184.0, 288.0, 388.0, 548.0, 990.0, 2071.0, 10068.0, 219963.0, 3385504.0, 550807.0, 16845.0, 2713.0, 1160.0, 671.0, 416.0, 292.0, 220.0, 162.0, 102.0, 82.0, 42.0, 45.0, 34.0, 24.0, 24.0, 24.0, 10.0, 7.0, 6.0, 5.0, 9.0, 6.0, 5.0, 3.0, 3.0, 1.0, 2.0], "bins": [-33.5625, -32.6201171875, -31.677734375, -30.7353515625, -29.79296875, -28.8505859375, -27.908203125, -26.9658203125, -26.0234375, -25.0810546875, -24.138671875, -23.1962890625, -22.25390625, -21.3115234375, -20.369140625, -19.4267578125, -18.484375, -17.5419921875, -16.599609375, -15.6572265625, -14.71484375, -13.7724609375, -12.830078125, -11.8876953125, -10.9453125, -10.0029296875, -9.060546875, -8.1181640625, -7.17578125, -6.2333984375, -5.291015625, -4.3486328125, -3.40625, -2.4638671875, -1.521484375, -0.5791015625, 0.36328125, 1.3056640625, 2.248046875, 3.1904296875, 4.1328125, 5.0751953125, 6.017578125, 6.9599609375, 7.90234375, 8.8447265625, 9.787109375, 10.7294921875, 11.671875, 12.6142578125, 13.556640625, 14.4990234375, 15.44140625, 16.3837890625, 17.326171875, 18.2685546875, 19.2109375, 20.1533203125, 21.095703125, 22.0380859375, 22.98046875, 23.9228515625, 24.865234375, 25.8076171875, 26.75]}, "gradients/decoder.transformer.h.0.mlp.c_fc.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 5.0, 5.0, 4.0, 11.0, 20.0, 44.0, 57.0, 59.0, 98.0, 139.0, 202.0, 288.0, 403.0, 442.0, 525.0, 450.0, 374.0, 271.0, 204.0, 131.0, 115.0, 75.0, 54.0, 37.0, 21.0, 13.0, 8.0, 7.0, 4.0, 5.0, 3.0, 1.0, 2.0, 0.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 2.0], "bins": [-11.875, -11.486328125, -11.09765625, -10.708984375, -10.3203125, -9.931640625, -9.54296875, -9.154296875, -8.765625, -8.376953125, -7.98828125, -7.599609375, -7.2109375, -6.822265625, -6.43359375, -6.044921875, -5.65625, -5.267578125, -4.87890625, -4.490234375, -4.1015625, -3.712890625, -3.32421875, -2.935546875, -2.546875, -2.158203125, -1.76953125, -1.380859375, -0.9921875, -0.603515625, -0.21484375, 0.173828125, 0.5625, 0.951171875, 1.33984375, 1.728515625, 2.1171875, 2.505859375, 2.89453125, 3.283203125, 3.671875, 4.060546875, 4.44921875, 4.837890625, 5.2265625, 5.615234375, 6.00390625, 6.392578125, 6.78125, 7.169921875, 7.55859375, 7.947265625, 8.3359375, 8.724609375, 9.11328125, 9.501953125, 9.890625, 10.279296875, 10.66796875, 11.056640625, 11.4453125, 11.833984375, 12.22265625, 12.611328125, 13.0]}, "gradients/decoder.transformer.h.0.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 2.0, 1.0, 1.0, 2.0, 2.0, 4.0, 5.0, 35.0, 58.0, 101.0, 295.0, 766.0, 2912.0, 26519.0, 1357101.0, 2741643.0, 58976.0, 4294.0, 1057.0, 322.0, 116.0, 35.0, 19.0, 10.0, 4.0, 2.0, 2.0, 2.0, 0.0, 2.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 1.0], "bins": [-31.78125, -30.96875, -30.15625, -29.34375, -28.53125, -27.71875, -26.90625, -26.09375, -25.28125, -24.46875, -23.65625, -22.84375, -22.03125, -21.21875, -20.40625, -19.59375, -18.78125, -17.96875, -17.15625, -16.34375, -15.53125, -14.71875, -13.90625, -13.09375, -12.28125, -11.46875, -10.65625, -9.84375, -9.03125, -8.21875, -7.40625, -6.59375, -5.78125, -4.96875, -4.15625, -3.34375, -2.53125, -1.71875, -0.90625, -0.09375, 0.71875, 1.53125, 2.34375, 3.15625, 3.96875, 4.78125, 5.59375, 6.40625, 7.21875, 8.03125, 8.84375, 9.65625, 10.46875, 11.28125, 12.09375, 12.90625, 13.71875, 14.53125, 15.34375, 16.15625, 16.96875, 17.78125, 18.59375, 19.40625, 20.21875]}, "gradients/decoder.transformer.h.0.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 4.0, 4.0, 8.0, 10.0, 8.0, 15.0, 20.0, 33.0, 48.0, 40.0, 65.0, 51.0, 66.0, 77.0, 85.0, 72.0, 77.0, 63.0, 52.0, 53.0, 35.0, 26.0, 34.0, 12.0, 17.0, 10.0, 9.0, 5.0, 3.0, 3.0, 3.0, 2.0, 3.0, 2.0, 0.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-48.35721206665039, -46.113197326660156, -43.869178771972656, -41.62516403198242, -39.38114547729492, -37.13713073730469, -34.89311218261719, -32.64909744262695, -30.405080795288086, -28.16106414794922, -25.91704750061035, -23.673030853271484, -21.42901611328125, -19.18499755859375, -16.940982818603516, -14.696966171264648, -12.452949523925781, -10.208932876586914, -7.964916706085205, -5.720900535583496, -3.476883888244629, -1.2328672409057617, 1.011148452758789, 3.2551651000976562, 5.499181747436523, 7.743198394775391, 9.987215042114258, 12.231230735778809, 14.475247383117676, 16.71926498413086, 18.963279724121094, 21.20729637145996, 23.451309204101562, 25.69532585144043, 27.939342498779297, 30.18335723876953, 32.42737579345703, 34.671390533447266, 36.9154052734375, 39.159423828125, 41.4034423828125, 43.647457122802734, 45.891475677490234, 48.13549041748047, 50.37950897216797, 52.6235237121582, 54.86753845214844, 57.11155700683594, 59.35557174682617, 61.599586486816406, 63.843605041503906, 66.0876235961914, 68.33163452148438, 70.57565307617188, 72.81967163085938, 75.06369018554688, 77.30770111083984, 79.55171966552734, 81.79573059082031, 84.03974914550781, 86.28376770019531, 88.52778625488281, 90.77179718017578, 93.01581573486328, 95.25983428955078]}, "gradients/decoder.transformer.h.0.ln_2.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 2.0, 3.0, 0.0, 2.0, 4.0, 4.0, 2.0, 10.0, 8.0, 5.0, 7.0, 12.0, 16.0, 20.0, 12.0, 23.0, 26.0, 32.0, 27.0, 28.0, 37.0, 44.0, 38.0, 50.0, 35.0, 32.0, 29.0, 38.0, 47.0, 38.0, 38.0, 44.0, 37.0, 30.0, 38.0, 30.0, 28.0, 22.0, 15.0, 25.0, 12.0, 8.0, 9.0, 2.0, 7.0, 10.0, 6.0, 2.0, 7.0, 6.0, 3.0, 2.0, 1.0, 2.0, 1.0, 2.0], "bins": [-49.04084014892578, -47.628089904785156, -46.2153434753418, -44.80259704589844, -43.38984680175781, -41.97709655761719, -40.56435012817383, -39.15160369873047, -37.738853454589844, -36.32610321044922, -34.91335678100586, -33.5006103515625, -32.087860107421875, -30.675111770629883, -29.26236343383789, -27.8496150970459, -26.436866760253906, -25.024118423461914, -23.611370086669922, -22.19862174987793, -20.785873413085938, -19.373125076293945, -17.960376739501953, -16.54762840270996, -15.134880065917969, -13.722131729125977, -12.309383392333984, -10.896635055541992, -9.48388671875, -8.071138381958008, -6.658390045166016, -5.245641708374023, -3.8328933715820312, -2.420145034790039, -1.0073966979980469, 0.4053516387939453, 1.8180999755859375, 3.2308483123779297, 4.643596649169922, 6.056344985961914, 7.469093322753906, 8.881841659545898, 10.29458999633789, 11.707338333129883, 13.120086669921875, 14.532835006713867, 15.94558334350586, 17.35833168029785, 18.771080017089844, 20.183828353881836, 21.596576690673828, 23.00932502746582, 24.422073364257812, 25.834821701049805, 27.247570037841797, 28.66031837463379, 30.07306671142578, 31.485815048217773, 32.898563385009766, 34.311309814453125, 35.72406005859375, 37.136810302734375, 38.549556732177734, 39.962303161621094, 41.37505340576172]}, "gradients/decoder.transformer.h.0.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 2.0, 6.0, 0.0, 4.0, 1.0, 1.0, 4.0, 1.0, 6.0, 5.0, 8.0, 6.0, 13.0, 12.0, 13.0, 14.0, 14.0, 29.0, 19.0, 26.0, 33.0, 31.0, 46.0, 44.0, 39.0, 36.0, 29.0, 38.0, 48.0, 44.0, 46.0, 47.0, 32.0, 34.0, 39.0, 28.0, 23.0, 35.0, 24.0, 23.0, 24.0, 12.0, 16.0, 10.0, 6.0, 4.0, 7.0, 5.0, 3.0, 5.0, 8.0, 3.0, 3.0, 4.0, 3.0, 1.0, 3.0], "bins": [-90.0625, -87.51953125, -84.9765625, -82.43359375, -79.890625, -77.34765625, -74.8046875, -72.26171875, -69.71875, -67.17578125, -64.6328125, -62.08984375, -59.546875, -57.00390625, -54.4609375, -51.91796875, -49.375, -46.83203125, -44.2890625, -41.74609375, -39.203125, -36.66015625, -34.1171875, -31.57421875, -29.03125, -26.48828125, -23.9453125, -21.40234375, -18.859375, -16.31640625, -13.7734375, -11.23046875, -8.6875, -6.14453125, -3.6015625, -1.05859375, 1.484375, 4.02734375, 6.5703125, 9.11328125, 11.65625, 14.19921875, 16.7421875, 19.28515625, 21.828125, 24.37109375, 26.9140625, 29.45703125, 32.0, 34.54296875, 37.0859375, 39.62890625, 42.171875, 44.71484375, 47.2578125, 49.80078125, 52.34375, 54.88671875, 57.4296875, 59.97265625, 62.515625, 65.05859375, 67.6015625, 70.14453125, 72.6875]}, "gradients/decoder.transformer.h.0.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 7.0, 7.0, 9.0, 12.0, 18.0, 32.0, 46.0, 73.0, 124.0, 189.0, 307.0, 470.0, 731.0, 1070.0, 1594.0, 2652.0, 4084.0, 6362.0, 9750.0, 15296.0, 23912.0, 36830.0, 55042.0, 80949.0, 112112.0, 141199.0, 147076.0, 125421.0, 93491.0, 65448.0, 43972.0, 28611.0, 18610.0, 11883.0, 7702.0, 4803.0, 3153.0, 1958.0, 1275.0, 844.0, 491.0, 323.0, 226.0, 141.0, 103.0, 57.0, 39.0, 29.0, 16.0, 7.0, 5.0, 5.0, 4.0, 0.0, 1.0, 1.0, 1.0], "bins": [-7.01953125, -6.80816650390625, -6.5968017578125, -6.38543701171875, -6.174072265625, -5.96270751953125, -5.7513427734375, -5.53997802734375, -5.32861328125, -5.11724853515625, -4.9058837890625, -4.69451904296875, -4.483154296875, -4.27178955078125, -4.0604248046875, -3.84906005859375, -3.6376953125, -3.42633056640625, -3.2149658203125, -3.00360107421875, -2.792236328125, -2.58087158203125, -2.3695068359375, -2.15814208984375, -1.94677734375, -1.73541259765625, -1.5240478515625, -1.31268310546875, -1.101318359375, -0.88995361328125, -0.6785888671875, -0.46722412109375, -0.255859375, -0.04449462890625, 0.1668701171875, 0.37823486328125, 0.589599609375, 0.80096435546875, 1.0123291015625, 1.22369384765625, 1.43505859375, 1.64642333984375, 1.8577880859375, 2.06915283203125, 2.280517578125, 2.49188232421875, 2.7032470703125, 2.91461181640625, 3.1259765625, 3.33734130859375, 3.5487060546875, 3.76007080078125, 3.971435546875, 4.18280029296875, 4.3941650390625, 4.60552978515625, 4.81689453125, 5.02825927734375, 5.2396240234375, 5.45098876953125, 5.662353515625, 5.87371826171875, 6.0850830078125, 6.29644775390625, 6.5078125]}, "gradients/decoder.transformer.h.0.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 0.0, 2.0, 3.0, 1.0, 2.0, 4.0, 2.0, 4.0, 7.0, 8.0, 9.0, 14.0, 12.0, 13.0, 11.0, 29.0, 14.0, 24.0, 21.0, 18.0, 29.0, 28.0, 28.0, 31.0, 31.0, 31.0, 37.0, 38.0, 38.0, 1067.0, 33.0, 30.0, 34.0, 46.0, 22.0, 34.0, 39.0, 36.0, 25.0, 22.0, 17.0, 16.0, 20.0, 13.0, 20.0, 16.0, 11.0, 8.0, 5.0, 9.0, 3.0, 10.0, 1.0, 5.0, 2.0, 4.0, 1.0, 0.0, 4.0, 1.0, 1.0], "bins": [-49.03125, -47.52880859375, -46.0263671875, -44.52392578125, -43.021484375, -41.51904296875, -40.0166015625, -38.51416015625, -37.01171875, -35.50927734375, -34.0068359375, -32.50439453125, -31.001953125, -29.49951171875, -27.9970703125, -26.49462890625, -24.9921875, -23.48974609375, -21.9873046875, -20.48486328125, -18.982421875, -17.47998046875, -15.9775390625, -14.47509765625, -12.97265625, -11.47021484375, -9.9677734375, -8.46533203125, -6.962890625, -5.46044921875, -3.9580078125, -2.45556640625, -0.953125, 0.54931640625, 2.0517578125, 3.55419921875, 5.056640625, 6.55908203125, 8.0615234375, 9.56396484375, 11.06640625, 12.56884765625, 14.0712890625, 15.57373046875, 17.076171875, 18.57861328125, 20.0810546875, 21.58349609375, 23.0859375, 24.58837890625, 26.0908203125, 27.59326171875, 29.095703125, 30.59814453125, 32.1005859375, 33.60302734375, 35.10546875, 36.60791015625, 38.1103515625, 39.61279296875, 41.115234375, 42.61767578125, 44.1201171875, 45.62255859375, 47.125]}, "gradients/decoder.transformer.h.0.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 2.0, 3.0, 3.0, 4.0, 6.0, 14.0, 23.0, 28.0, 49.0, 58.0, 100.0, 169.0, 287.0, 458.0, 810.0, 1359.0, 2303.0, 4024.0, 6588.0, 11170.0, 18868.0, 30963.0, 51033.0, 80345.0, 119599.0, 158372.0, 1216013.0, 137800.0, 96616.0, 63047.0, 38572.0, 23617.0, 14137.0, 8251.0, 5082.0, 2994.0, 1817.0, 1044.0, 615.0, 337.0, 229.0, 126.0, 87.0, 51.0, 30.0, 16.0, 13.0, 7.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 2.0], "bins": [-6.7890625, -6.584716796875, -6.38037109375, -6.176025390625, -5.9716796875, -5.767333984375, -5.56298828125, -5.358642578125, -5.154296875, -4.949951171875, -4.74560546875, -4.541259765625, -4.3369140625, -4.132568359375, -3.92822265625, -3.723876953125, -3.51953125, -3.315185546875, -3.11083984375, -2.906494140625, -2.7021484375, -2.497802734375, -2.29345703125, -2.089111328125, -1.884765625, -1.680419921875, -1.47607421875, -1.271728515625, -1.0673828125, -0.863037109375, -0.65869140625, -0.454345703125, -0.25, -0.045654296875, 0.15869140625, 0.363037109375, 0.5673828125, 0.771728515625, 0.97607421875, 1.180419921875, 1.384765625, 1.589111328125, 1.79345703125, 1.997802734375, 2.2021484375, 2.406494140625, 2.61083984375, 2.815185546875, 3.01953125, 3.223876953125, 3.42822265625, 3.632568359375, 3.8369140625, 4.041259765625, 4.24560546875, 4.449951171875, 4.654296875, 4.858642578125, 5.06298828125, 5.267333984375, 5.4716796875, 5.676025390625, 5.88037109375, 6.084716796875, 6.2890625]}, "gradients/decoder.transformer.h.0.crossattention.q_attn.bias": {"_type": "histogram", "values": [4.0, 3.0, 1.0, 3.0, 6.0, 3.0, 0.0, 5.0, 9.0, 3.0, 5.0, 4.0, 8.0, 12.0, 15.0, 11.0, 19.0, 18.0, 10.0, 25.0, 27.0, 29.0, 54.0, 56.0, 53.0, 54.0, 63.0, 62.0, 60.0, 63.0, 50.0, 43.0, 26.0, 32.0, 32.0, 23.0, 26.0, 15.0, 12.0, 17.0, 8.0, 9.0, 11.0, 3.0, 10.0, 8.0, 4.0, 2.0, 2.0, 1.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.040771484375, -0.03925657272338867, -0.037741661071777344, -0.036226749420166016, -0.03471183776855469, -0.03319692611694336, -0.03168201446533203, -0.030167102813720703, -0.028652191162109375, -0.027137279510498047, -0.02562236785888672, -0.02410745620727539, -0.022592544555664062, -0.021077632904052734, -0.019562721252441406, -0.018047809600830078, -0.01653289794921875, -0.015017986297607422, -0.013503074645996094, -0.011988162994384766, -0.010473251342773438, -0.00895833969116211, -0.007443428039550781, -0.005928516387939453, -0.004413604736328125, -0.002898693084716797, -0.0013837814331054688, 0.00013113021850585938, 0.0016460418701171875, 0.0031609535217285156, 0.004675865173339844, 0.006190776824951172, 0.0077056884765625, 0.009220600128173828, 0.010735511779785156, 0.012250423431396484, 0.013765335083007812, 0.01528024673461914, 0.01679515838623047, 0.018310070037841797, 0.019824981689453125, 0.021339893341064453, 0.02285480499267578, 0.02436971664428711, 0.025884628295898438, 0.027399539947509766, 0.028914451599121094, 0.030429363250732422, 0.03194427490234375, 0.03345918655395508, 0.034974098205566406, 0.036489009857177734, 0.03800392150878906, 0.03951883316040039, 0.04103374481201172, 0.04254865646362305, 0.044063568115234375, 0.0455784797668457, 0.04709339141845703, 0.04860830307006836, 0.05012321472167969, 0.051638126373291016, 0.053153038024902344, 0.05466794967651367, 0.056182861328125]}, "gradients/decoder.transformer.h.0.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 3.0, 6.0, 6.0, 6.0, 13.0, 14.0, 14.0, 25.0, 44.0, 46.0, 49.0, 90.0, 119.0, 188.0, 289.0, 458.0, 740.0, 1475.0, 2551.0, 4933.0, 9980.0, 21692.0, 51267.0, 132629.0, 347913.0, 291011.0, 105107.0, 41829.0, 18193.0, 8411.0, 4200.0, 2141.0, 1215.0, 691.0, 405.0, 246.0, 139.0, 133.0, 72.0, 48.0, 44.0, 35.0, 22.0, 13.0, 13.0, 13.0, 15.0, 7.0, 5.0, 6.0, 1.0, 2.0, 1.0, 0.0, 2.0], "bins": [-0.265625, -0.2577857971191406, -0.24994659423828125, -0.24210739135742188, -0.2342681884765625, -0.22642898559570312, -0.21858978271484375, -0.21075057983398438, -0.202911376953125, -0.19507217407226562, -0.18723297119140625, -0.17939376831054688, -0.1715545654296875, -0.16371536254882812, -0.15587615966796875, -0.14803695678710938, -0.14019775390625, -0.13235855102539062, -0.12451934814453125, -0.11668014526367188, -0.1088409423828125, -0.10100173950195312, -0.09316253662109375, -0.08532333374023438, -0.077484130859375, -0.06964492797851562, -0.06180572509765625, -0.053966522216796875, -0.0461273193359375, -0.038288116455078125, -0.03044891357421875, -0.022609710693359375, -0.0147705078125, -0.006931304931640625, 0.00090789794921875, 0.008747100830078125, 0.0165863037109375, 0.024425506591796875, 0.03226470947265625, 0.040103912353515625, 0.047943115234375, 0.055782318115234375, 0.06362152099609375, 0.07146072387695312, 0.0792999267578125, 0.08713912963867188, 0.09497833251953125, 0.10281753540039062, 0.11065673828125, 0.11849594116210938, 0.12633514404296875, 0.13417434692382812, 0.1420135498046875, 0.14985275268554688, 0.15769195556640625, 0.16553115844726562, 0.173370361328125, 0.18120956420898438, 0.18904876708984375, 0.19688796997070312, 0.2047271728515625, 0.21256637573242188, 0.22040557861328125, 0.22824478149414062, 0.236083984375]}, "gradients/decoder.transformer.h.0.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 2.0, 2.0, 4.0, 7.0, 1.0, 9.0, 9.0, 12.0, 16.0, 21.0, 26.0, 43.0, 56.0, 50.0, 60.0, 104.0, 107.0, 77.0, 88.0, 81.0, 64.0, 60.0, 36.0, 28.0, 16.0, 14.0, 9.0, 7.0, 2.0, 1.0, 2.0, 1.0, 3.0], "bins": [-0.12564866244792938, -0.1230572760105133, -0.12046589702367783, -0.11787451058626175, -0.11528313159942627, -0.11269174516201019, -0.11010036617517471, -0.10750897973775864, -0.10491760075092316, -0.10232621431350708, -0.0997348353266716, -0.09714344888925552, -0.09455206990242004, -0.09196068346500397, -0.08936930447816849, -0.08677791804075241, -0.08418653160333633, -0.08159514516592026, -0.07900376617908478, -0.0764123797416687, -0.07382100075483322, -0.07122961431741714, -0.06863823533058167, -0.06604684889316559, -0.06345546245574951, -0.060864079743623734, -0.058272697031497955, -0.05568131431937218, -0.0530899316072464, -0.05049854516983032, -0.047907162457704544, -0.045315779745578766, -0.042724404484033585, -0.040133021771907806, -0.03754163905978203, -0.03495025634765625, -0.03235886991024017, -0.029767489060759544, -0.027176104485988617, -0.02458472177386284, -0.02199333906173706, -0.019401956349611282, -0.016810573637485504, -0.014219189062714577, -0.011627806350588799, -0.00903642363846302, -0.0064450399950146675, -0.0038536563515663147, -0.0012622736394405365, 0.001329109538346529, 0.0039204927161335945, 0.00651187589392066, 0.009103259071707726, 0.011694641783833504, 0.014286025427281857, 0.01687740907073021, 0.019468791782855988, 0.022060174494981766, 0.024651557207107544, 0.02724294178187847, 0.02983432449400425, 0.03242570906877518, 0.035017091780900955, 0.03760847449302673, 0.04019985720515251]}, "gradients/decoder.transformer.h.0.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 4.0, 3.0, 6.0, 9.0, 5.0, 8.0, 7.0, 11.0, 16.0, 17.0, 20.0, 27.0, 22.0, 37.0, 27.0, 30.0, 30.0, 43.0, 49.0, 39.0, 53.0, 45.0, 40.0, 48.0, 43.0, 38.0, 39.0, 30.0, 42.0, 34.0, 35.0, 25.0, 25.0, 21.0, 13.0, 22.0, 14.0, 11.0, 8.0, 6.0, 3.0, 4.0, 7.0, 1.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.04023468494415283, -0.03889888897538185, -0.03756309300661087, -0.03622729703783989, -0.03489150106906891, -0.03355570510029793, -0.03221990913152695, -0.030884113162755966, -0.029548317193984985, -0.028212521225214005, -0.026876725256443024, -0.025540929287672043, -0.024205133318901062, -0.02286933735013008, -0.0215335413813591, -0.02019774541258812, -0.01886194944381714, -0.017526153475046158, -0.016190357506275177, -0.014854561537504196, -0.013518765568733215, -0.012182969599962234, -0.010847173631191254, -0.009511377662420273, -0.008175581693649292, -0.006839785724878311, -0.00550398975610733, -0.0041681937873363495, -0.0028323978185653687, -0.0014966018497943878, -0.00016080588102340698, 0.0011749900877475739, 0.0025107860565185547, 0.0038465820252895355, 0.005182377994060516, 0.006518173962831497, 0.007853969931602478, 0.009189765900373459, 0.01052556186914444, 0.01186135783791542, 0.013197153806686401, 0.014532949775457382, 0.015868745744228363, 0.017204541712999344, 0.018540337681770325, 0.019876133650541306, 0.021211929619312286, 0.022547725588083267, 0.023883521556854248, 0.02521931752562523, 0.02655511349439621, 0.02789090946316719, 0.02922670543193817, 0.030562501400709152, 0.03189829736948013, 0.033234093338251114, 0.034569889307022095, 0.035905685275793076, 0.037241481244564056, 0.03857727721333504, 0.03991307318210602, 0.041248869150877, 0.04258466511964798, 0.04392046108841896, 0.04525625705718994]}, "gradients/decoder.transformer.h.0.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 2.0, 6.0, 0.0, 4.0, 1.0, 2.0, 3.0, 1.0, 7.0, 4.0, 8.0, 6.0, 14.0, 10.0, 14.0, 14.0, 15.0, 27.0, 20.0, 28.0, 30.0, 33.0, 45.0, 44.0, 39.0, 37.0, 29.0, 40.0, 47.0, 42.0, 48.0, 47.0, 29.0, 37.0, 37.0, 29.0, 21.0, 37.0, 24.0, 23.0, 24.0, 11.0, 17.0, 9.0, 6.0, 4.0, 7.0, 5.0, 3.0, 5.0, 9.0, 4.0, 1.0, 4.0, 3.0, 1.0, 3.0], "bins": [-90.0, -87.45703125, -84.9140625, -82.37109375, -79.828125, -77.28515625, -74.7421875, -72.19921875, -69.65625, -67.11328125, -64.5703125, -62.02734375, -59.484375, -56.94140625, -54.3984375, -51.85546875, -49.3125, -46.76953125, -44.2265625, -41.68359375, -39.140625, -36.59765625, -34.0546875, -31.51171875, -28.96875, -26.42578125, -23.8828125, -21.33984375, -18.796875, -16.25390625, -13.7109375, -11.16796875, -8.625, -6.08203125, -3.5390625, -0.99609375, 1.546875, 4.08984375, 6.6328125, 9.17578125, 11.71875, 14.26171875, 16.8046875, 19.34765625, 21.890625, 24.43359375, 26.9765625, 29.51953125, 32.0625, 34.60546875, 37.1484375, 39.69140625, 42.234375, 44.77734375, 47.3203125, 49.86328125, 52.40625, 54.94921875, 57.4921875, 60.03515625, 62.578125, 65.12109375, 67.6640625, 70.20703125, 72.75]}, "gradients/decoder.transformer.h.0.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 3.0, 3.0, 5.0, 3.0, 2.0, 1.0, 4.0, 7.0, 6.0, 10.0, 12.0, 17.0, 23.0, 24.0, 38.0, 53.0, 85.0, 109.0, 169.0, 301.0, 405.0, 722.0, 1619.0, 4029.0, 13151.0, 55800.0, 267513.0, 500104.0, 157433.0, 32785.0, 8368.0, 2868.0, 1162.0, 615.0, 379.0, 212.0, 166.0, 93.0, 74.0, 58.0, 29.0, 25.0, 13.0, 23.0, 9.0, 6.0, 8.0, 5.0, 7.0, 4.0, 3.0, 3.0, 0.0, 2.0, 2.0, 2.0, 1.0], "bins": [-14.9921875, -14.555419921875, -14.11865234375, -13.681884765625, -13.2451171875, -12.808349609375, -12.37158203125, -11.934814453125, -11.498046875, -11.061279296875, -10.62451171875, -10.187744140625, -9.7509765625, -9.314208984375, -8.87744140625, -8.440673828125, -8.00390625, -7.567138671875, -7.13037109375, -6.693603515625, -6.2568359375, -5.820068359375, -5.38330078125, -4.946533203125, -4.509765625, -4.072998046875, -3.63623046875, -3.199462890625, -2.7626953125, -2.325927734375, -1.88916015625, -1.452392578125, -1.015625, -0.578857421875, -0.14208984375, 0.294677734375, 0.7314453125, 1.168212890625, 1.60498046875, 2.041748046875, 2.478515625, 2.915283203125, 3.35205078125, 3.788818359375, 4.2255859375, 4.662353515625, 5.09912109375, 5.535888671875, 5.97265625, 6.409423828125, 6.84619140625, 7.282958984375, 7.7197265625, 8.156494140625, 8.59326171875, 9.030029296875, 9.466796875, 9.903564453125, 10.34033203125, 10.777099609375, 11.2138671875, 11.650634765625, 12.08740234375, 12.524169921875, 12.9609375]}, "gradients/decoder.transformer.h.0.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 3.0, 1.0, 3.0, 2.0, 2.0, 3.0, 5.0, 1.0, 2.0, 4.0, 20.0, 26.0, 37.0, 51.0, 47.0, 70.0, 87.0, 90.0, 2131.0, 107.0, 86.0, 88.0, 52.0, 37.0, 25.0, 31.0, 15.0, 12.0, 7.0, 5.0, 2.0, 0.0, 3.0, 4.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-257.5, -249.375, -241.25, -233.125, -225.0, -216.875, -208.75, -200.625, -192.5, -184.375, -176.25, -168.125, -160.0, -151.875, -143.75, -135.625, -127.5, -119.375, -111.25, -103.125, -95.0, -86.875, -78.75, -70.625, -62.5, -54.375, -46.25, -38.125, -30.0, -21.875, -13.75, -5.625, 2.5, 10.625, 18.75, 26.875, 35.0, 43.125, 51.25, 59.375, 67.5, 75.625, 83.75, 91.875, 100.0, 108.125, 116.25, 124.375, 132.5, 140.625, 148.75, 156.875, 165.0, 173.125, 181.25, 189.375, 197.5, 205.625, 213.75, 221.875, 230.0, 238.125, 246.25, 254.375, 262.5]}, "gradients/decoder.transformer.h.0.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 2.0, 1.0, 2.0, 3.0, 3.0, 1.0, 1.0, 2.0, 6.0, 3.0, 9.0, 11.0, 15.0, 15.0, 26.0, 32.0, 58.0, 81.0, 124.0, 242.0, 430.0, 962.0, 2660.0, 15009.0, 2790471.0, 323549.0, 8337.0, 1852.0, 815.0, 395.0, 219.0, 111.0, 80.0, 42.0, 37.0, 25.0, 22.0, 13.0, 8.0, 13.0, 6.0, 5.0, 4.0, 5.0, 3.0, 2.0, 1.0, 1.0, 1.0, 2.0, 2.0, 1.0, 0.0, 2.0, 1.0], "bins": [-40.5625, -39.365234375, -38.16796875, -36.970703125, -35.7734375, -34.576171875, -33.37890625, -32.181640625, -30.984375, -29.787109375, -28.58984375, -27.392578125, -26.1953125, -24.998046875, -23.80078125, -22.603515625, -21.40625, -20.208984375, -19.01171875, -17.814453125, -16.6171875, -15.419921875, -14.22265625, -13.025390625, -11.828125, -10.630859375, -9.43359375, -8.236328125, -7.0390625, -5.841796875, -4.64453125, -3.447265625, -2.25, -1.052734375, 0.14453125, 1.341796875, 2.5390625, 3.736328125, 4.93359375, 6.130859375, 7.328125, 8.525390625, 9.72265625, 10.919921875, 12.1171875, 13.314453125, 14.51171875, 15.708984375, 16.90625, 18.103515625, 19.30078125, 20.498046875, 21.6953125, 22.892578125, 24.08984375, 25.287109375, 26.484375, 27.681640625, 28.87890625, 30.076171875, 31.2734375, 32.470703125, 33.66796875, 34.865234375, 36.0625]}, "gradients/decoder.transformer.h.0.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 0.0, 1.0, 0.0, 2.0, 1.0, 3.0, 1.0, 2.0, 3.0, 3.0, 9.0, 15.0, 26.0, 49.0, 83.0, 121.0, 129.0, 173.0, 144.0, 100.0, 58.0, 24.0, 26.0, 13.0, 5.0, 5.0, 5.0, 3.0, 3.0, 0.0, 1.0, 1.0, 1.0, 2.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-180.86422729492188, -174.43331909179688, -168.00241088867188, -161.57150268554688, -155.14060974121094, -148.70970153808594, -142.27879333496094, -135.84788513183594, -129.4169921875, -122.986083984375, -116.55518341064453, -110.12427520751953, -103.69337463378906, -97.26246643066406, -90.83155822753906, -84.40065002441406, -77.96974182128906, -71.53883361816406, -65.1079330444336, -58.677024841308594, -52.24612045288086, -45.815216064453125, -39.384307861328125, -32.95340347290039, -26.522499084472656, -20.091594696044922, -13.660688400268555, -7.2297821044921875, -0.7988777160644531, 5.632026672363281, 12.062934875488281, 18.493839263916016, 24.924728393554688, 31.355632781982422, 37.786537170410156, 44.217445373535156, 50.64834976196289, 57.079254150390625, 63.510162353515625, 69.94107055664062, 76.3719711303711, 82.8028793334961, 89.23377990722656, 95.66468811035156, 102.09559631347656, 108.52649688720703, 114.95740509033203, 121.3883056640625, 127.8192138671875, 134.2501220703125, 140.6810302734375, 147.1119384765625, 153.54283142089844, 159.97373962402344, 166.40464782714844, 172.83555603027344, 179.26644897460938, 185.69735717773438, 192.12826538085938, 198.55917358398438, 204.9900665283203, 211.4209747314453, 217.8518829345703, 224.2827911376953, 230.7136993408203]}, "gradients/decoder.transformer.h.0.ln_1.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 2.0, 1.0, 2.0, 2.0, 1.0, 4.0, 0.0, 2.0, 2.0, 2.0, 4.0, 5.0, 6.0, 13.0, 10.0, 20.0, 20.0, 21.0, 21.0, 20.0, 27.0, 35.0, 38.0, 39.0, 39.0, 32.0, 50.0, 46.0, 48.0, 43.0, 50.0, 63.0, 43.0, 57.0, 25.0, 35.0, 40.0, 20.0, 31.0, 11.0, 13.0, 13.0, 18.0, 7.0, 8.0, 7.0, 3.0, 10.0, 1.0, 3.0, 2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 3.0, 0.0, 1.0, 0.0, 1.0], "bins": [-192.01649475097656, -185.93333435058594, -179.85018920898438, -173.76702880859375, -167.6838836669922, -161.60072326660156, -155.517578125, -149.43441772460938, -143.3512725830078, -137.2681121826172, -131.18496704101562, -125.10181427001953, -119.01866149902344, -112.93550872802734, -106.85235595703125, -100.76919555664062, -94.68604278564453, -88.60289001464844, -82.51973724365234, -76.43658447265625, -70.35343170166016, -64.27027893066406, -58.1871223449707, -52.10396957397461, -46.020816802978516, -39.93766403198242, -33.85451126098633, -27.7713565826416, -21.688203811645508, -15.605051040649414, -9.521896362304688, -3.4387435913085938, 2.6444091796875, 8.727561950683594, 14.810715675354004, 20.893869400024414, 26.977022171020508, 33.06017303466797, 39.14332962036133, 45.22648239135742, 51.309635162353516, 57.39278793334961, 63.4759407043457, 69.55909729003906, 75.64225006103516, 81.72540283203125, 87.80855560302734, 93.89170837402344, 99.97486114501953, 106.05801391601562, 112.14116668701172, 118.22431945800781, 124.3074722290039, 130.390625, 136.47378540039062, 142.5569305419922, 148.6400909423828, 154.72325134277344, 160.806396484375, 166.88955688476562, 172.9727020263672, 179.0558624267578, 185.13900756835938, 191.22216796875, 197.30531311035156]}, "gradients/decoder.transformer.wpe.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 1.0, 2.0, 2.0, 3.0, 2.0, 3.0, 12.0, 6.0, 8.0, 15.0, 25.0, 25.0, 41.0, 62.0, 62.0, 94.0, 144.0, 196.0, 223.0, 334.0, 487.0, 641.0, 838.0, 1250.0, 1628.0, 2150.0, 2960.0, 4443.0, 1015452.0, 5300.0, 3236.0, 2318.0, 1699.0, 1298.0, 996.0, 733.0, 527.0, 376.0, 294.0, 192.0, 144.0, 75.0, 84.0, 45.0, 37.0, 24.0, 24.0, 14.0, 10.0, 14.0, 4.0, 10.0, 3.0, 1.0, 2.0, 1.0, 0.0, 1.0, 0.0, 2.0], "bins": [-28.988479614257812, -28.100051879882812, -27.211626052856445, -26.323198318481445, -25.434772491455078, -24.546344757080078, -23.657917022705078, -22.76949119567871, -21.88106346130371, -20.99263572692871, -20.104209899902344, -19.215782165527344, -18.327356338500977, -17.438928604125977, -16.55050277709961, -15.66207504272461, -14.773648262023926, -13.885221481323242, -12.996794700622559, -12.108367919921875, -11.219940185546875, -10.331513404846191, -9.443086624145508, -8.554658889770508, -7.666232585906982, -6.777805805206299, -5.889378547668457, -5.000951766967773, -4.11252498626709, -3.224097728729248, -2.3356709480285645, -1.4472436904907227, -0.5588169097900391, 0.32961004972457886, 1.2180370092391968, 2.10646390914917, 2.9948909282684326, 3.8833179473876953, 4.771744728088379, 5.660171985626221, 6.548598766326904, 7.437025547027588, 8.32545280456543, 9.213879585266113, 10.102306365966797, 10.990734100341797, 11.879159927368164, 12.767587661743164, 13.656014442443848, 14.544441223144531, 15.432868003845215, 16.3212947845459, 17.2097225189209, 18.098148345947266, 18.986576080322266, 19.875003814697266, 20.763429641723633, 21.651857376098633, 22.540283203125, 23.4287109375, 24.317136764526367, 25.205564498901367, 26.093990325927734, 26.982418060302734, 27.870845794677734]}, "gradients/decoder.transformer.wte.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 0.0, 0.0, 3.0, 2.0, 4.0, 3.0, 2.0, 1.0, 8.0, 10.0, 18.0, 16.0, 34.0, 50.0, 44.0, 82.0, 119.0, 423.0, 51460200.0, 1846.0, 146.0, 57.0, 34.0, 21.0, 15.0, 9.0, 4.0, 6.0, 4.0, 5.0, 1.0, 3.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1636.43115234375, -1560.9873046875, -1485.5435791015625, -1410.0997314453125, -1334.656005859375, -1259.212158203125, -1183.768310546875, -1108.3245849609375, -1032.880859375, -957.4370727539062, -881.9932861328125, -806.5494384765625, -731.105712890625, -655.661865234375, -580.2180786132812, -504.7742919921875, -429.3304443359375, -353.88665771484375, -278.44287109375, -202.99905395507812, -127.55526733398438, -52.111480712890625, 23.33233642578125, 98.776123046875, 174.21990966796875, 249.6636962890625, 325.10748291015625, 400.5513000488281, 475.9950866699219, 551.4388427734375, 626.8826904296875, 702.3264770507812, 777.770263671875, 853.2140502929688, 928.6578369140625, 1004.1016845703125, 1079.54541015625, 1154.9892578125, 1230.43310546875, 1305.8768310546875, 1381.320556640625, 1456.764404296875, 1532.2081298828125, 1607.6519775390625, 1683.095703125, 1758.53955078125, 1833.9833984375, 1909.4271240234375, 1984.8709716796875, 2060.314697265625, 2135.758544921875, 2211.202392578125, 2286.646240234375, 2362.08984375, 2437.53369140625, 2512.9775390625, 2588.42138671875, 2663.865234375, 2739.30908203125, 2814.752685546875, 2890.196533203125, 2965.640380859375, 3041.084228515625, 3116.52783203125, 3191.9716796875]}, "gradients/encoder.adapter.layers.2.conv.weight": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 0.0, 3.0, 4.0, 5.0, 6.0, 17.0, 31.0, 47.0, 72.0, 92.0, 133.0, 185.0, 319.0, 503.0, 757.0, 1073.0, 1675.0, 2559.0, 3994.0, 6028.0, 9356.0, 14356.0, 22049.0, 33355.0, 50326.0, 77273.0, 115130.0, 167785.0, 236371.0, 320406.0, 575526.0, 3316191.0, 397875.0, 289824.0, 209603.0, 145909.0, 99920.0, 66447.0, 43667.0, 28727.0, 18625.0, 12305.0, 7843.0, 5284.0, 3354.0, 2192.0, 1506.0, 958.0, 609.0, 426.0, 286.0, 153.0, 110.0, 89.0, 60.0, 21.0, 10.0, 12.0, 7.0, 2.0, 2.0], "bins": [-1.8564453125, -1.8023529052734375, -1.748260498046875, -1.6941680908203125, -1.64007568359375, -1.5859832763671875, -1.531890869140625, -1.4777984619140625, -1.4237060546875, -1.3696136474609375, -1.315521240234375, -1.2614288330078125, -1.20733642578125, -1.1532440185546875, -1.099151611328125, -1.0450592041015625, -0.990966796875, -0.9368743896484375, -0.882781982421875, -0.8286895751953125, -0.77459716796875, -0.7205047607421875, -0.666412353515625, -0.6123199462890625, -0.5582275390625, -0.5041351318359375, -0.450042724609375, -0.3959503173828125, -0.34185791015625, -0.2877655029296875, -0.233673095703125, -0.1795806884765625, -0.12548828125, -0.0713958740234375, -0.017303466796875, 0.0367889404296875, 0.09088134765625, 0.1449737548828125, 0.199066162109375, 0.2531585693359375, 0.3072509765625, 0.3613433837890625, 0.415435791015625, 0.4695281982421875, 0.52362060546875, 0.5777130126953125, 0.631805419921875, 0.6858978271484375, 0.739990234375, 0.7940826416015625, 0.848175048828125, 0.9022674560546875, 0.95635986328125, 1.0104522705078125, 1.064544677734375, 1.1186370849609375, 1.1727294921875, 1.2268218994140625, 1.280914306640625, 1.3350067138671875, 1.38909912109375, 1.4431915283203125, 1.497283935546875, 1.5513763427734375, 1.60546875]}, "gradients/encoder.adapter.layers.2.conv.bias": {"_type": "histogram", "values": [4.0, 4.0, 0.0, 6.0, 4.0, 3.0, 5.0, 3.0, 7.0, 7.0, 8.0, 10.0, 6.0, 12.0, 19.0, 15.0, 17.0, 22.0, 18.0, 12.0, 26.0, 19.0, 29.0, 33.0, 37.0, 24.0, 39.0, 42.0, 46.0, 156.0, 860.0, 102.0, 36.0, 29.0, 29.0, 29.0, 34.0, 33.0, 24.0, 28.0, 24.0, 22.0, 23.0, 11.0, 22.0, 16.0, 12.0, 10.0, 16.0, 7.0, 9.0, 6.0, 5.0, 6.0, 2.0, 3.0, 3.0, 3.0, 2.0, 3.0, 1.0, 3.0, 0.0, 2.0], "bins": [-20.640625, -19.959716796875, -19.27880859375, -18.597900390625, -17.9169921875, -17.236083984375, -16.55517578125, -15.874267578125, -15.193359375, -14.512451171875, -13.83154296875, -13.150634765625, -12.4697265625, -11.788818359375, -11.10791015625, -10.427001953125, -9.74609375, -9.065185546875, -8.38427734375, -7.703369140625, -7.0224609375, -6.341552734375, -5.66064453125, -4.979736328125, -4.298828125, -3.617919921875, -2.93701171875, -2.256103515625, -1.5751953125, -0.894287109375, -0.21337890625, 0.467529296875, 1.1484375, 1.829345703125, 2.51025390625, 3.191162109375, 3.8720703125, 4.552978515625, 5.23388671875, 5.914794921875, 6.595703125, 7.276611328125, 7.95751953125, 8.638427734375, 9.3193359375, 10.000244140625, 10.68115234375, 11.362060546875, 12.04296875, 12.723876953125, 13.40478515625, 14.085693359375, 14.7666015625, 15.447509765625, 16.12841796875, 16.809326171875, 17.490234375, 18.171142578125, 18.85205078125, 19.532958984375, 20.2138671875, 20.894775390625, 21.57568359375, 22.256591796875, 22.9375]}, "gradients/encoder.adapter.layers.1.conv.weight": {"_type": "histogram", "values": [1.0, 0.0, 4.0, 2.0, 0.0, 3.0, 6.0, 4.0, 10.0, 22.0, 32.0, 56.0, 89.0, 138.0, 196.0, 315.0, 465.0, 789.0, 1151.0, 1786.0, 2719.0, 4550.0, 6798.0, 10410.0, 16415.0, 25952.0, 41036.0, 64065.0, 102128.0, 160847.0, 244240.0, 352221.0, 747143.0, 3271704.0, 415378.0, 287014.0, 193855.0, 123902.0, 78912.0, 49802.0, 31807.0, 20089.0, 12541.0, 8074.0, 5198.0, 3462.0, 2217.0, 1403.0, 920.0, 538.0, 383.0, 249.0, 138.0, 111.0, 49.0, 45.0, 20.0, 19.0, 21.0, 4.0, 3.0, 2.0, 2.0, 1.0], "bins": [-1.841796875, -1.786346435546875, -1.73089599609375, -1.675445556640625, -1.6199951171875, -1.564544677734375, -1.50909423828125, -1.453643798828125, -1.398193359375, -1.342742919921875, -1.28729248046875, -1.231842041015625, -1.1763916015625, -1.120941162109375, -1.06549072265625, -1.010040283203125, -0.95458984375, -0.899139404296875, -0.84368896484375, -0.788238525390625, -0.7327880859375, -0.677337646484375, -0.62188720703125, -0.566436767578125, -0.510986328125, -0.455535888671875, -0.40008544921875, -0.344635009765625, -0.2891845703125, -0.233734130859375, -0.17828369140625, -0.122833251953125, -0.0673828125, -0.011932373046875, 0.04351806640625, 0.098968505859375, 0.1544189453125, 0.209869384765625, 0.26531982421875, 0.320770263671875, 0.376220703125, 0.431671142578125, 0.48712158203125, 0.542572021484375, 0.5980224609375, 0.653472900390625, 0.70892333984375, 0.764373779296875, 0.81982421875, 0.875274658203125, 0.93072509765625, 0.986175537109375, 1.0416259765625, 1.097076416015625, 1.15252685546875, 1.207977294921875, 1.263427734375, 1.318878173828125, 1.37432861328125, 1.429779052734375, 1.4852294921875, 1.540679931640625, 1.59613037109375, 1.651580810546875, 1.70703125]}, "gradients/encoder.adapter.layers.1.conv.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 3.0, 4.0, 5.0, 5.0, 11.0, 7.0, 12.0, 16.0, 10.0, 10.0, 11.0, 23.0, 14.0, 20.0, 19.0, 29.0, 28.0, 25.0, 25.0, 43.0, 39.0, 49.0, 47.0, 192.0, 886.0, 52.0, 54.0, 39.0, 31.0, 43.0, 40.0, 41.0, 22.0, 22.0, 22.0, 23.0, 20.0, 17.0, 19.0, 12.0, 11.0, 6.0, 7.0, 7.0, 6.0, 2.0, 4.0, 2.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-28.390625, -27.559326171875, -26.72802734375, -25.896728515625, -25.0654296875, -24.234130859375, -23.40283203125, -22.571533203125, -21.740234375, -20.908935546875, -20.07763671875, -19.246337890625, -18.4150390625, -17.583740234375, -16.75244140625, -15.921142578125, -15.08984375, -14.258544921875, -13.42724609375, -12.595947265625, -11.7646484375, -10.933349609375, -10.10205078125, -9.270751953125, -8.439453125, -7.608154296875, -6.77685546875, -5.945556640625, -5.1142578125, -4.282958984375, -3.45166015625, -2.620361328125, -1.7890625, -0.957763671875, -0.12646484375, 0.704833984375, 1.5361328125, 2.367431640625, 3.19873046875, 4.030029296875, 4.861328125, 5.692626953125, 6.52392578125, 7.355224609375, 8.1865234375, 9.017822265625, 9.84912109375, 10.680419921875, 11.51171875, 12.343017578125, 13.17431640625, 14.005615234375, 14.8369140625, 15.668212890625, 16.49951171875, 17.330810546875, 18.162109375, 18.993408203125, 19.82470703125, 20.656005859375, 21.4873046875, 22.318603515625, 23.14990234375, 23.981201171875, 24.8125]}, "gradients/encoder.adapter.layers.0.conv.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 3.0, 1.0, 1.0, 7.0, 9.0, 12.0, 17.0, 16.0, 18.0, 20.0, 22.0, 39.0, 52.0, 72.0, 120.0, 158.0, 275.0, 542.0, 1147.0, 2395.0, 6004.0, 14073.0, 35172.0, 96012.0, 5682834.0, 322843.0, 78570.0, 29692.0, 11917.0, 4942.0, 2142.0, 1066.0, 447.0, 258.0, 137.0, 140.0, 62.0, 43.0, 20.0, 22.0, 25.0, 26.0, 26.0, 14.0, 7.0, 9.0, 12.0, 4.0, 1.0, 0.0, 1.0, 1.0, 2.0], "bins": [-10.7421875, -10.4429931640625, -10.143798828125, -9.8446044921875, -9.54541015625, -9.2462158203125, -8.947021484375, -8.6478271484375, -8.3486328125, -8.0494384765625, -7.750244140625, -7.4510498046875, -7.15185546875, -6.8526611328125, -6.553466796875, -6.2542724609375, -5.955078125, -5.6558837890625, -5.356689453125, -5.0574951171875, -4.75830078125, -4.4591064453125, -4.159912109375, -3.8607177734375, -3.5615234375, -3.2623291015625, -2.963134765625, -2.6639404296875, -2.36474609375, -2.0655517578125, -1.766357421875, -1.4671630859375, -1.16796875, -0.8687744140625, -0.569580078125, -0.2703857421875, 0.02880859375, 0.3280029296875, 0.627197265625, 0.9263916015625, 1.2255859375, 1.5247802734375, 1.823974609375, 2.1231689453125, 2.42236328125, 2.7215576171875, 3.020751953125, 3.3199462890625, 3.619140625, 3.9183349609375, 4.217529296875, 4.5167236328125, 4.81591796875, 5.1151123046875, 5.414306640625, 5.7135009765625, 6.0126953125, 6.3118896484375, 6.611083984375, 6.9102783203125, 7.20947265625, 7.5086669921875, 7.807861328125, 8.1070556640625, 8.40625]}, "gradients/encoder.adapter.layers.0.conv.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 5.0, 4.0, 3.0, 11.0, 13.0, 8.0, 16.0, 21.0, 19.0, 11.0, 27.0, 35.0, 30.0, 28.0, 43.0, 34.0, 48.0, 40.0, 39.0, 105.0, 1002.0, 63.0, 45.0, 50.0, 30.0, 41.0, 37.0, 33.0, 30.0, 25.0, 25.0, 20.0, 12.0, 19.0, 11.0, 14.0, 6.0, 9.0, 9.0, 2.0, 7.0, 1.0, 2.0, 3.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-28.234375, -27.267333984375, -26.30029296875, -25.333251953125, -24.3662109375, -23.399169921875, -22.43212890625, -21.465087890625, -20.498046875, -19.531005859375, -18.56396484375, -17.596923828125, -16.6298828125, -15.662841796875, -14.69580078125, -13.728759765625, -12.76171875, -11.794677734375, -10.82763671875, -9.860595703125, -8.8935546875, -7.926513671875, -6.95947265625, -5.992431640625, -5.025390625, -4.058349609375, -3.09130859375, -2.124267578125, -1.1572265625, -0.190185546875, 0.77685546875, 1.743896484375, 2.7109375, 3.677978515625, 4.64501953125, 5.612060546875, 6.5791015625, 7.546142578125, 8.51318359375, 9.480224609375, 10.447265625, 11.414306640625, 12.38134765625, 13.348388671875, 14.3154296875, 15.282470703125, 16.24951171875, 17.216552734375, 18.18359375, 19.150634765625, 20.11767578125, 21.084716796875, 22.0517578125, 23.018798828125, 23.98583984375, 24.952880859375, 25.919921875, 26.886962890625, 27.85400390625, 28.821044921875, 29.7880859375, 30.755126953125, 31.72216796875, 32.689208984375, 33.65625]}, "gradients/encoder.encoder.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 3.0, 9.0, 30.0, 118.0, 491.0, 268.0, 63.0, 18.0, 8.0, 5.0, 2.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-72.91288757324219, -69.02229309082031, -65.13169860839844, -61.2411003112793, -57.35050582885742, -53.45990753173828, -49.569313049316406, -45.67871856689453, -41.788124084472656, -37.89752960205078, -34.00693130493164, -30.116336822509766, -26.22574234008789, -22.335145950317383, -18.444549560546875, -14.553955078125, -10.66335678100586, -6.772761344909668, -2.8821654319763184, 1.0084304809570312, 4.899025917053223, 8.789621353149414, 12.680217742919922, 16.570812225341797, 20.461408615112305, 24.352005004882812, 28.242599487304688, 32.13319396972656, 36.0237922668457, 39.91438674926758, 43.80498504638672, 47.695579528808594, 51.58617401123047, 55.476768493652344, 59.367366790771484, 63.25796127319336, 67.1485595703125, 71.03915405273438, 74.92974853515625, 78.82034301757812, 82.7109375, 86.60153198242188, 90.49212646484375, 94.38272094726562, 98.27332305908203, 102.1639175415039, 106.05451202392578, 109.94510650634766, 113.83570861816406, 117.72630310058594, 121.61689758300781, 125.50749206542969, 129.39808654785156, 133.28868103027344, 137.17929077148438, 141.06988525390625, 144.96046447753906, 148.85105895996094, 152.7416534423828, 156.6322479248047, 160.52284240722656, 164.41343688964844, 168.30404663085938, 172.19464111328125, 176.08523559570312]}, "gradients/encoder.encoder.layer_norm.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 1.0, 1.0, 3.0, 6.0, 6.0, 6.0, 13.0, 7.0, 12.0, 12.0, 36.0, 41.0, 71.0, 77.0, 84.0, 115.0, 109.0, 104.0, 71.0, 63.0, 56.0, 33.0, 29.0, 18.0, 9.0, 7.0, 5.0, 7.0, 3.0, 1.0, 5.0, 2.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-102.44094848632812, -98.71145629882812, -94.98196411132812, -91.25247192382812, -87.52297973632812, -83.79348754882812, -80.06399536132812, -76.33450317382812, -72.60501098632812, -68.87551879882812, -65.14602661132812, -61.416534423828125, -57.687042236328125, -53.957550048828125, -50.22806167602539, -46.49856948852539, -42.769081115722656, -39.039588928222656, -35.310096740722656, -31.58060646057129, -27.85111427307129, -24.12162208557129, -20.392131805419922, -16.662639617919922, -12.933147430419922, -9.203655242919922, -5.474164009094238, -1.7446727752685547, 1.9848194122314453, 5.714311599731445, 9.443801879882812, 13.173294067382812, 16.902786254882812, 20.632278442382812, 24.361770629882812, 28.09126091003418, 31.82075309753418, 35.55024719238281, 39.27973556518555, 43.00922775268555, 46.73871994018555, 50.46821212768555, 54.19770431518555, 57.92719268798828, 61.65668487548828, 65.38617706298828, 69.11566925048828, 72.84516143798828, 76.57465362548828, 80.30414581298828, 84.03363800048828, 87.76313018798828, 91.49262237548828, 95.22211456298828, 98.95159912109375, 102.68109130859375, 106.41058349609375, 110.14007568359375, 113.86956787109375, 117.59906005859375, 121.32855224609375, 125.05804443359375, 128.78753662109375, 132.51702880859375, 136.24652099609375]}, "gradients/encoder.encoder.layers.23.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 3.0, 1.0, 3.0, 2.0, 8.0, 12.0, 16.0, 16.0, 19.0, 35.0, 29.0, 76.0, 77.0, 141.0, 183.0, 285.0, 423.0, 692.0, 1115.0, 1762.0, 2999.0, 5326.0, 10740.0, 25830.0, 99941.0, 3921796.0, 81608.0, 21130.0, 8570.0, 4365.0, 2537.0, 1489.0, 961.0, 624.0, 424.0, 289.0, 235.0, 173.0, 92.0, 71.0, 57.0, 37.0, 34.0, 24.0, 12.0, 4.0, 7.0, 6.0, 2.0, 5.0, 6.0, 2.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 0.0, 1.0], "bins": [-0.57861328125, -0.5589752197265625, -0.539337158203125, -0.5196990966796875, -0.50006103515625, -0.4804229736328125, -0.460784912109375, -0.4411468505859375, -0.4215087890625, -0.4018707275390625, -0.382232666015625, -0.3625946044921875, -0.34295654296875, -0.3233184814453125, -0.303680419921875, -0.2840423583984375, -0.264404296875, -0.2447662353515625, -0.225128173828125, -0.2054901123046875, -0.18585205078125, -0.1662139892578125, -0.146575927734375, -0.1269378662109375, -0.1072998046875, -0.0876617431640625, -0.068023681640625, -0.0483856201171875, -0.02874755859375, -0.0091094970703125, 0.010528564453125, 0.0301666259765625, 0.0498046875, 0.0694427490234375, 0.089080810546875, 0.1087188720703125, 0.12835693359375, 0.1479949951171875, 0.167633056640625, 0.1872711181640625, 0.2069091796875, 0.2265472412109375, 0.246185302734375, 0.2658233642578125, 0.28546142578125, 0.3050994873046875, 0.324737548828125, 0.3443756103515625, 0.364013671875, 0.3836517333984375, 0.403289794921875, 0.4229278564453125, 0.44256591796875, 0.4622039794921875, 0.481842041015625, 0.5014801025390625, 0.5211181640625, 0.5407562255859375, 0.560394287109375, 0.5800323486328125, 0.59967041015625, 0.6193084716796875, 0.638946533203125, 0.6585845947265625, 0.67822265625]}, "gradients/encoder.encoder.layers.23.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 2.0, 1.0, 0.0, 2.0, 3.0, 0.0, 1.0, 2.0, 6.0, 1.0, 1.0, 2.0, 5.0, 1.0, 5.0, 7.0, 8.0, 6.0, 5.0, 8.0, 10.0, 10.0, 18.0, 14.0, 546.0, 247.0, 16.0, 17.0, 16.0, 6.0, 8.0, 7.0, 4.0, 3.0, 3.0, 0.0, 3.0, 5.0, 1.0, 1.0, 6.0, 2.0, 1.0, 4.0, 1.0, 3.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.339111328125, -0.326934814453125, -0.31475830078125, -0.302581787109375, -0.2904052734375, -0.278228759765625, -0.26605224609375, -0.253875732421875, -0.24169921875, -0.229522705078125, -0.21734619140625, -0.205169677734375, -0.1929931640625, -0.180816650390625, -0.16864013671875, -0.156463623046875, -0.144287109375, -0.132110595703125, -0.11993408203125, -0.107757568359375, -0.0955810546875, -0.083404541015625, -0.07122802734375, -0.059051513671875, -0.046875, -0.034698486328125, -0.02252197265625, -0.010345458984375, 0.0018310546875, 0.014007568359375, 0.02618408203125, 0.038360595703125, 0.050537109375, 0.062713623046875, 0.07489013671875, 0.087066650390625, 0.0992431640625, 0.111419677734375, 0.12359619140625, 0.135772705078125, 0.14794921875, 0.160125732421875, 0.17230224609375, 0.184478759765625, 0.1966552734375, 0.208831787109375, 0.22100830078125, 0.233184814453125, 0.245361328125, 0.257537841796875, 0.26971435546875, 0.281890869140625, 0.2940673828125, 0.306243896484375, 0.31842041015625, 0.330596923828125, 0.3427734375, 0.354949951171875, 0.36712646484375, 0.379302978515625, 0.3914794921875, 0.403656005859375, 0.41583251953125, 0.428009033203125, 0.440185546875]}, "gradients/encoder.encoder.layers.23.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 7.0, 3.0, 9.0, 6.0, 10.0, 6.0, 25.0, 28.0, 34.0, 60.0, 88.0, 130.0, 219.0, 315.0, 604.0, 1210.0, 2869.0, 7986.0, 33705.0, 327517.0, 3666475.0, 125632.0, 18520.0, 5136.0, 1713.0, 843.0, 435.0, 255.0, 144.0, 88.0, 65.0, 37.0, 29.0, 14.0, 18.0, 14.0, 13.0, 12.0, 6.0, 4.0, 4.0, 0.0, 6.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.6865234375, -0.6638946533203125, -0.641265869140625, -0.6186370849609375, -0.59600830078125, -0.5733795166015625, -0.550750732421875, -0.5281219482421875, -0.5054931640625, -0.4828643798828125, -0.460235595703125, -0.4376068115234375, -0.41497802734375, -0.3923492431640625, -0.369720458984375, -0.3470916748046875, -0.324462890625, -0.3018341064453125, -0.279205322265625, -0.2565765380859375, -0.23394775390625, -0.2113189697265625, -0.188690185546875, -0.1660614013671875, -0.1434326171875, -0.1208038330078125, -0.098175048828125, -0.0755462646484375, -0.05291748046875, -0.0302886962890625, -0.007659912109375, 0.0149688720703125, 0.03759765625, 0.0602264404296875, 0.082855224609375, 0.1054840087890625, 0.12811279296875, 0.1507415771484375, 0.173370361328125, 0.1959991455078125, 0.2186279296875, 0.2412567138671875, 0.263885498046875, 0.2865142822265625, 0.30914306640625, 0.3317718505859375, 0.354400634765625, 0.3770294189453125, 0.399658203125, 0.4222869873046875, 0.444915771484375, 0.4675445556640625, 0.49017333984375, 0.5128021240234375, 0.535430908203125, 0.5580596923828125, 0.5806884765625, 0.6033172607421875, 0.625946044921875, 0.6485748291015625, 0.67120361328125, 0.6938323974609375, 0.716461181640625, 0.7390899658203125, 0.76171875]}, "gradients/encoder.encoder.layers.23.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 1.0, 2.0, 4.0, 1.0, 3.0, 2.0, 1.0, 6.0, 4.0, 3.0, 12.0, 17.0, 18.0, 37.0, 50.0, 76.0, 147.0, 251.0, 678.0, 1817.0, 499.0, 178.0, 100.0, 55.0, 40.0, 31.0, 12.0, 8.0, 11.0, 4.0, 2.0, 6.0, 3.0, 2.0, 4.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0], "bins": [-0.4619140625, -0.4484748840332031, -0.43503570556640625, -0.4215965270996094, -0.4081573486328125, -0.3947181701660156, -0.38127899169921875, -0.3678398132324219, -0.354400634765625, -0.3409614562988281, -0.32752227783203125, -0.3140830993652344, -0.3006439208984375, -0.2872047424316406, -0.27376556396484375, -0.2603263854980469, -0.24688720703125, -0.23344802856445312, -0.22000885009765625, -0.20656967163085938, -0.1931304931640625, -0.17969131469726562, -0.16625213623046875, -0.15281295776367188, -0.139373779296875, -0.12593460083007812, -0.11249542236328125, -0.09905624389648438, -0.0856170654296875, -0.07217788696289062, -0.05873870849609375, -0.045299530029296875, -0.0318603515625, -0.018421173095703125, -0.00498199462890625, 0.008457183837890625, 0.0218963623046875, 0.035335540771484375, 0.04877471923828125, 0.062213897705078125, 0.075653076171875, 0.08909225463867188, 0.10253143310546875, 0.11597061157226562, 0.1294097900390625, 0.14284896850585938, 0.15628814697265625, 0.16972732543945312, 0.18316650390625, 0.19660568237304688, 0.21004486083984375, 0.22348403930664062, 0.2369232177734375, 0.2503623962402344, 0.26380157470703125, 0.2772407531738281, 0.290679931640625, 0.3041191101074219, 0.31755828857421875, 0.3309974670410156, 0.3444366455078125, 0.3578758239746094, 0.37131500244140625, 0.3847541809082031, 0.398193359375]}, "gradients/encoder.encoder.layers.23.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 1.0, 1.0, 0.0, 3.0, 7.0, 16.0, 46.0, 398.0, 485.0, 47.0, 6.0, 1.0, 1.0, 2.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.0191986560821533, -1.8531420230865479, -1.687085509300232, -1.521028995513916, -1.3549723625183105, -1.188915729522705, -1.0228592157363892, -0.8568027019500732, -0.6907460689544678, -0.5246894955635071, -0.3586329221725464, -0.1925763487815857, -0.026519775390625, 0.1395367980003357, 0.3055933713912964, 0.4716498851776123, 0.6377065181732178, 0.8037630915641785, 0.9698196649551392, 1.135876178741455, 1.3019328117370605, 1.467989444732666, 1.634045958518982, 1.8001024723052979, 1.9661591053009033, 2.132215738296509, 2.298272132873535, 2.4643287658691406, 2.630385398864746, 2.7964420318603516, 2.962498664855957, 3.1285550594329834, 3.294611930847168, 3.4606685638427734, 3.626725196838379, 3.7927815914154053, 3.9588382244110107, 4.124894618988037, 4.290951251983643, 4.457007884979248, 4.6230645179748535, 4.789121150970459, 4.9551777839660645, 5.12123441696167, 5.287290573120117, 5.453347206115723, 5.619403839111328, 5.785460472106934, 5.951517105102539, 6.1175737380981445, 6.28363037109375, 6.4496870040893555, 6.615743637084961, 6.781799793243408, 6.947856426239014, 7.113913059234619, 7.279969692230225, 7.44602632522583, 7.6120829582214355, 7.778139591217041, 7.944195747375488, 8.110252380371094, 8.2763090133667, 8.442365646362305, 8.60842227935791]}, "gradients/encoder.encoder.layers.23.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 0.0, 2.0, 5.0, 4.0, 6.0, 7.0, 6.0, 9.0, 15.0, 30.0, 26.0, 54.0, 56.0, 49.0, 75.0, 97.0, 88.0, 110.0, 78.0, 70.0, 61.0, 55.0, 38.0, 28.0, 15.0, 10.0, 1.0, 6.0, 4.0, 2.0, 3.0, 1.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0], "bins": [-2.696762800216675, -2.6279690265655518, -2.5591752529144287, -2.4903814792633057, -2.4215877056121826, -2.3527939319610596, -2.2840001583099365, -2.2152063846588135, -2.1464126110076904, -2.0776188373565674, -2.0088250637054443, -1.9400312900543213, -1.8712375164031982, -1.8024437427520752, -1.7336499691009521, -1.664856195449829, -1.596062421798706, -1.527268648147583, -1.45847487449646, -1.389681100845337, -1.3208873271942139, -1.2520935535430908, -1.1832997798919678, -1.1145060062408447, -1.0457122325897217, -0.9769184589385986, -0.9081246852874756, -0.8393309116363525, -0.7705371379852295, -0.7017433643341064, -0.6329495906829834, -0.5641558170318604, -0.4953620433807373, -0.42656826972961426, -0.3577744960784912, -0.28898072242736816, -0.22018694877624512, -0.15139317512512207, -0.08259940147399902, -0.013805627822875977, 0.05498814582824707, 0.12378191947937012, 0.19257569313049316, 0.2613694667816162, 0.33016324043273926, 0.3989570140838623, 0.46775078773498535, 0.5365445613861084, 0.6053383350372314, 0.6741321086883545, 0.7429258823394775, 0.8117196559906006, 0.8805134296417236, 0.9493072032928467, 1.0181009769439697, 1.0868947505950928, 1.1556885242462158, 1.2244822978973389, 1.293276071548462, 1.362069845199585, 1.430863618850708, 1.499657392501831, 1.568451166152954, 1.6372449398040771, 1.7060387134552002]}, "gradients/encoder.encoder.layers.23.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 3.0, 1.0, 4.0, 1.0, 6.0, 10.0, 15.0, 13.0, 30.0, 44.0, 51.0, 82.0, 141.0, 202.0, 281.0, 467.0, 652.0, 993.0, 1552.0, 2510.0, 4151.0, 7419.0, 14816.0, 36958.0, 881914.0, 55205.0, 19055.0, 8979.0, 4834.0, 2898.0, 1820.0, 1189.0, 749.0, 514.0, 337.0, 224.0, 144.0, 87.0, 82.0, 33.0, 31.0, 24.0, 13.0, 15.0, 4.0, 8.0, 2.0, 3.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-1.6064453125, -1.560089111328125, -1.51373291015625, -1.467376708984375, -1.4210205078125, -1.374664306640625, -1.32830810546875, -1.281951904296875, -1.235595703125, -1.189239501953125, -1.14288330078125, -1.096527099609375, -1.0501708984375, -1.003814697265625, -0.95745849609375, -0.911102294921875, -0.86474609375, -0.818389892578125, -0.77203369140625, -0.725677490234375, -0.6793212890625, -0.632965087890625, -0.58660888671875, -0.540252685546875, -0.493896484375, -0.447540283203125, -0.40118408203125, -0.354827880859375, -0.3084716796875, -0.262115478515625, -0.21575927734375, -0.169403076171875, -0.123046875, -0.076690673828125, -0.03033447265625, 0.016021728515625, 0.0623779296875, 0.108734130859375, 0.15509033203125, 0.201446533203125, 0.247802734375, 0.294158935546875, 0.34051513671875, 0.386871337890625, 0.4332275390625, 0.479583740234375, 0.52593994140625, 0.572296142578125, 0.61865234375, 0.665008544921875, 0.71136474609375, 0.757720947265625, 0.8040771484375, 0.850433349609375, 0.89678955078125, 0.943145751953125, 0.989501953125, 1.035858154296875, 1.08221435546875, 1.128570556640625, 1.1749267578125, 1.221282958984375, 1.26763916015625, 1.313995361328125, 1.3603515625]}, "gradients/encoder.encoder.layers.23.attention.out_proj.bias": {"_type": "histogram", "values": [2.0, 2.0, 1.0, 0.0, 2.0, 2.0, 0.0, 1.0, 3.0, 2.0, 5.0, 1.0, 3.0, 5.0, 0.0, 5.0, 5.0, 6.0, 6.0, 8.0, 10.0, 10.0, 9.0, 13.0, 15.0, 204.0, 583.0, 24.0, 11.0, 20.0, 8.0, 4.0, 9.0, 8.0, 1.0, 4.0, 1.0, 4.0, 3.0, 3.0, 0.0, 2.0, 4.0, 1.0, 6.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.302490234375, -0.2909126281738281, -0.27933502197265625, -0.2677574157714844, -0.2561798095703125, -0.24460220336914062, -0.23302459716796875, -0.22144699096679688, -0.209869384765625, -0.19829177856445312, -0.18671417236328125, -0.17513656616210938, -0.1635589599609375, -0.15198135375976562, -0.14040374755859375, -0.12882614135742188, -0.11724853515625, -0.10567092895507812, -0.09409332275390625, -0.08251571655273438, -0.0709381103515625, -0.059360504150390625, -0.04778289794921875, -0.036205291748046875, -0.024627685546875, -0.013050079345703125, -0.00147247314453125, 0.010105133056640625, 0.0216827392578125, 0.033260345458984375, 0.04483795166015625, 0.056415557861328125, 0.0679931640625, 0.07957077026367188, 0.09114837646484375, 0.10272598266601562, 0.1143035888671875, 0.12588119506835938, 0.13745880126953125, 0.14903640747070312, 0.160614013671875, 0.17219161987304688, 0.18376922607421875, 0.19534683227539062, 0.2069244384765625, 0.21850204467773438, 0.23007965087890625, 0.24165725708007812, 0.25323486328125, 0.2648124694824219, 0.27639007568359375, 0.2879676818847656, 0.2995452880859375, 0.3111228942871094, 0.32270050048828125, 0.3342781066894531, 0.345855712890625, 0.3574333190917969, 0.36901092529296875, 0.3805885314941406, 0.3921661376953125, 0.4037437438964844, 0.41532135009765625, 0.4268989562988281, 0.4384765625]}, "gradients/encoder.encoder.layers.23.attention.v_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 5.0, 7.0, 7.0, 10.0, 13.0, 10.0, 19.0, 31.0, 53.0, 95.0, 141.0, 239.0, 481.0, 1088.0, 3051.0, 12274.0, 73053.0, 627428.0, 286668.0, 33705.0, 6624.0, 1922.0, 735.0, 349.0, 197.0, 129.0, 83.0, 50.0, 33.0, 23.0, 16.0, 12.0, 2.0, 5.0, 2.0, 2.0, 1.0, 3.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.3896484375, -1.345947265625, -1.30224609375, -1.258544921875, -1.21484375, -1.171142578125, -1.12744140625, -1.083740234375, -1.0400390625, -0.996337890625, -0.95263671875, -0.908935546875, -0.865234375, -0.821533203125, -0.77783203125, -0.734130859375, -0.6904296875, -0.646728515625, -0.60302734375, -0.559326171875, -0.515625, -0.471923828125, -0.42822265625, -0.384521484375, -0.3408203125, -0.297119140625, -0.25341796875, -0.209716796875, -0.166015625, -0.122314453125, -0.07861328125, -0.034912109375, 0.0087890625, 0.052490234375, 0.09619140625, 0.139892578125, 0.18359375, 0.227294921875, 0.27099609375, 0.314697265625, 0.3583984375, 0.402099609375, 0.44580078125, 0.489501953125, 0.533203125, 0.576904296875, 0.62060546875, 0.664306640625, 0.7080078125, 0.751708984375, 0.79541015625, 0.839111328125, 0.8828125, 0.926513671875, 0.97021484375, 1.013916015625, 1.0576171875, 1.101318359375, 1.14501953125, 1.188720703125, 1.232421875, 1.276123046875, 1.31982421875, 1.363525390625, 1.4072265625]}, "gradients/encoder.encoder.layers.23.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 1.0, 1.0, 3.0, 1.0, 6.0, 4.0, 3.0, 10.0, 11.0, 7.0, 12.0, 13.0, 13.0, 23.0, 22.0, 26.0, 27.0, 31.0, 29.0, 35.0, 41.0, 32.0, 41.0, 45.0, 45.0, 35.0, 44.0, 46.0, 47.0, 48.0, 46.0, 32.0, 46.0, 30.0, 29.0, 27.0, 13.0, 16.0, 11.0, 11.0, 10.0, 7.0, 6.0, 5.0, 9.0, 4.0, 4.0, 2.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 0.0, 1.0], "bins": [-1.435546875, -1.392852783203125, -1.35015869140625, -1.307464599609375, -1.2647705078125, -1.222076416015625, -1.17938232421875, -1.136688232421875, -1.093994140625, -1.051300048828125, -1.00860595703125, -0.965911865234375, -0.9232177734375, -0.880523681640625, -0.83782958984375, -0.795135498046875, -0.75244140625, -0.709747314453125, -0.66705322265625, -0.624359130859375, -0.5816650390625, -0.538970947265625, -0.49627685546875, -0.453582763671875, -0.410888671875, -0.368194580078125, -0.32550048828125, -0.282806396484375, -0.2401123046875, -0.197418212890625, -0.15472412109375, -0.112030029296875, -0.0693359375, -0.026641845703125, 0.01605224609375, 0.058746337890625, 0.1014404296875, 0.144134521484375, 0.18682861328125, 0.229522705078125, 0.272216796875, 0.314910888671875, 0.35760498046875, 0.400299072265625, 0.4429931640625, 0.485687255859375, 0.52838134765625, 0.571075439453125, 0.61376953125, 0.656463623046875, 0.69915771484375, 0.741851806640625, 0.7845458984375, 0.827239990234375, 0.86993408203125, 0.912628173828125, 0.955322265625, 0.998016357421875, 1.04071044921875, 1.083404541015625, 1.1260986328125, 1.168792724609375, 1.21148681640625, 1.254180908203125, 1.296875]}, "gradients/encoder.encoder.layers.23.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 4.0, 4.0, 4.0, 6.0, 10.0, 16.0, 35.0, 51.0, 98.0, 269.0, 801.0, 4939.0, 709463.0, 327380.0, 4236.0, 769.0, 256.0, 91.0, 59.0, 32.0, 19.0, 5.0, 6.0, 4.0, 2.0, 2.0, 1.0, 0.0, 3.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-2.88671875, -2.8126373291015625, -2.738555908203125, -2.6644744873046875, -2.59039306640625, -2.5163116455078125, -2.442230224609375, -2.3681488037109375, -2.2940673828125, -2.2199859619140625, -2.145904541015625, -2.0718231201171875, -1.99774169921875, -1.9236602783203125, -1.849578857421875, -1.7754974365234375, -1.701416015625, -1.6273345947265625, -1.553253173828125, -1.4791717529296875, -1.40509033203125, -1.3310089111328125, -1.256927490234375, -1.1828460693359375, -1.1087646484375, -1.0346832275390625, -0.960601806640625, -0.8865203857421875, -0.81243896484375, -0.7383575439453125, -0.664276123046875, -0.5901947021484375, -0.51611328125, -0.4420318603515625, -0.367950439453125, -0.2938690185546875, -0.21978759765625, -0.1457061767578125, -0.071624755859375, 0.0024566650390625, 0.0765380859375, 0.1506195068359375, 0.224700927734375, 0.2987823486328125, 0.37286376953125, 0.4469451904296875, 0.521026611328125, 0.5951080322265625, 0.669189453125, 0.7432708740234375, 0.817352294921875, 0.8914337158203125, 0.96551513671875, 1.0395965576171875, 1.113677978515625, 1.1877593994140625, 1.2618408203125, 1.3359222412109375, 1.410003662109375, 1.4840850830078125, 1.55816650390625, 1.6322479248046875, 1.706329345703125, 1.7804107666015625, 1.8544921875]}, "gradients/encoder.encoder.layers.23.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 2.0, 0.0, 1.0, 4.0, 1.0, 2.0, 1.0, 8.0, 6.0, 5.0, 10.0, 10.0, 10.0, 19.0, 15.0, 28.0, 38.0, 62.0, 91.0, 81.0, 125.0, 111.0, 104.0, 73.0, 62.0, 49.0, 23.0, 29.0, 11.0, 6.0, 7.0, 6.0, 2.0, 4.0, 0.0, 3.0, 2.0, 1.0, 0.0, 4.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.233287811279297e-05, -5.0693750381469727e-05, -4.9054622650146484e-05, -4.741549491882324e-05, -4.57763671875e-05, -4.413723945617676e-05, -4.2498111724853516e-05, -4.0858983993530273e-05, -3.921985626220703e-05, -3.758072853088379e-05, -3.594160079956055e-05, -3.4302473068237305e-05, -3.266334533691406e-05, -3.102421760559082e-05, -2.9385089874267578e-05, -2.7745962142944336e-05, -2.6106834411621094e-05, -2.446770668029785e-05, -2.282857894897461e-05, -2.1189451217651367e-05, -1.9550323486328125e-05, -1.7911195755004883e-05, -1.627206802368164e-05, -1.4632940292358398e-05, -1.2993812561035156e-05, -1.1354684829711914e-05, -9.715557098388672e-06, -8.07642936706543e-06, -6.4373016357421875e-06, -4.798173904418945e-06, -3.159046173095703e-06, -1.519918441772461e-06, 1.1920928955078125e-07, 1.7583370208740234e-06, 3.3974647521972656e-06, 5.036592483520508e-06, 6.67572021484375e-06, 8.314847946166992e-06, 9.953975677490234e-06, 1.1593103408813477e-05, 1.3232231140136719e-05, 1.4871358871459961e-05, 1.6510486602783203e-05, 1.8149614334106445e-05, 1.9788742065429688e-05, 2.142786979675293e-05, 2.3066997528076172e-05, 2.4706125259399414e-05, 2.6345252990722656e-05, 2.79843807220459e-05, 2.962350845336914e-05, 3.126263618469238e-05, 3.2901763916015625e-05, 3.454089164733887e-05, 3.618001937866211e-05, 3.781914710998535e-05, 3.9458274841308594e-05, 4.1097402572631836e-05, 4.273653030395508e-05, 4.437565803527832e-05, 4.601478576660156e-05, 4.7653913497924805e-05, 4.929304122924805e-05, 5.093216896057129e-05, 5.257129669189453e-05]}, "gradients/encoder.encoder.layers.23.attention.q_proj.weight": {"_type": "histogram", "values": [3.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 2.0, 0.0, 1.0, 0.0, 2.0, 5.0, 4.0, 2.0, 8.0, 9.0, 7.0, 19.0, 38.0, 45.0, 74.0, 159.0, 381.0, 739.0, 2006.0, 8676.0, 114507.0, 871439.0, 42575.0, 5139.0, 1461.0, 613.0, 295.0, 149.0, 63.0, 57.0, 25.0, 18.0, 15.0, 6.0, 8.0, 5.0, 1.0, 0.0, 2.0, 4.0, 3.0, 3.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.986328125, -0.953826904296875, -0.92132568359375, -0.888824462890625, -0.8563232421875, -0.823822021484375, -0.79132080078125, -0.758819580078125, -0.726318359375, -0.693817138671875, -0.66131591796875, -0.628814697265625, -0.5963134765625, -0.563812255859375, -0.53131103515625, -0.498809814453125, -0.46630859375, -0.433807373046875, -0.40130615234375, -0.368804931640625, -0.3363037109375, -0.303802490234375, -0.27130126953125, -0.238800048828125, -0.206298828125, -0.173797607421875, -0.14129638671875, -0.108795166015625, -0.0762939453125, -0.043792724609375, -0.01129150390625, 0.021209716796875, 0.0537109375, 0.086212158203125, 0.11871337890625, 0.151214599609375, 0.1837158203125, 0.216217041015625, 0.24871826171875, 0.281219482421875, 0.313720703125, 0.346221923828125, 0.37872314453125, 0.411224365234375, 0.4437255859375, 0.476226806640625, 0.50872802734375, 0.541229248046875, 0.57373046875, 0.606231689453125, 0.63873291015625, 0.671234130859375, 0.7037353515625, 0.736236572265625, 0.76873779296875, 0.801239013671875, 0.833740234375, 0.866241455078125, 0.89874267578125, 0.931243896484375, 0.9637451171875, 0.996246337890625, 1.02874755859375, 1.061248779296875, 1.09375]}, "gradients/encoder.encoder.layers.23.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 2.0, 2.0, 5.0, 4.0, 13.0, 23.0, 21.0, 52.0, 82.0, 137.0, 173.0, 167.0, 126.0, 82.0, 36.0, 29.0, 26.0, 7.0, 11.0, 3.0, 4.0, 1.0, 2.0, 3.0, 2.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.6083984375, -1.5596160888671875, -1.510833740234375, -1.4620513916015625, -1.41326904296875, -1.3644866943359375, -1.315704345703125, -1.2669219970703125, -1.2181396484375, -1.1693572998046875, -1.120574951171875, -1.0717926025390625, -1.02301025390625, -0.9742279052734375, -0.925445556640625, -0.8766632080078125, -0.827880859375, -0.7790985107421875, -0.730316162109375, -0.6815338134765625, -0.63275146484375, -0.5839691162109375, -0.535186767578125, -0.4864044189453125, -0.4376220703125, -0.3888397216796875, -0.340057373046875, -0.2912750244140625, -0.24249267578125, -0.1937103271484375, -0.144927978515625, -0.0961456298828125, -0.04736328125, 0.0014190673828125, 0.050201416015625, 0.0989837646484375, 0.14776611328125, 0.1965484619140625, 0.245330810546875, 0.2941131591796875, 0.3428955078125, 0.3916778564453125, 0.440460205078125, 0.4892425537109375, 0.53802490234375, 0.5868072509765625, 0.635589599609375, 0.6843719482421875, 0.733154296875, 0.7819366455078125, 0.830718994140625, 0.8795013427734375, 0.92828369140625, 0.9770660400390625, 1.025848388671875, 1.0746307373046875, 1.1234130859375, 1.1721954345703125, 1.220977783203125, 1.2697601318359375, 1.31854248046875, 1.3673248291015625, 1.416107177734375, 1.4648895263671875, 1.513671875]}, "gradients/encoder.encoder.layers.23.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 7.0, 5.0, 15.0, 55.0, 301.0, 513.0, 79.0, 15.0, 12.0, 4.0, 1.0, 2.0, 0.0, 3.0, 0.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-16.141454696655273, -15.250275611877441, -14.35909652709961, -13.467917442321777, -12.576738357543945, -11.685559272766113, -10.794380187988281, -9.90320110321045, -9.012022018432617, -8.120842933654785, -7.229663848876953, -6.338484764099121, -5.447305679321289, -4.556126594543457, -3.664947509765625, -2.773768424987793, -1.882589340209961, -0.9914102554321289, -0.10023117065429688, 0.7909479141235352, 1.6821269989013672, 2.573306083679199, 3.4644851684570312, 4.355664253234863, 5.246843338012695, 6.138022422790527, 7.029201507568359, 7.920380592346191, 8.811559677124023, 9.702738761901855, 10.593917846679688, 11.48509693145752, 12.376274108886719, 13.26745319366455, 14.158632278442383, 15.049811363220215, 15.940990447998047, 16.832168579101562, 17.72334861755371, 18.61452865600586, 19.505706787109375, 20.39688491821289, 21.28806495666504, 22.179244995117188, 23.070423126220703, 23.96160125732422, 24.852781295776367, 25.743961334228516, 26.63513946533203, 27.526317596435547, 28.417497634887695, 29.308677673339844, 30.19985580444336, 31.091033935546875, 31.982213973999023, 32.87339401245117, 33.76457214355469, 34.6557502746582, 35.54692840576172, 36.4381103515625, 37.329288482666016, 38.22046661376953, 39.11164855957031, 40.00282669067383, 40.894004821777344]}, "gradients/encoder.encoder.layers.23.layer_norm.bias": {"_type": "histogram", "values": [4.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 3.0, 1.0, 2.0, 2.0, 4.0, 4.0, 8.0, 2.0, 5.0, 5.0, 4.0, 6.0, 16.0, 18.0, 31.0, 44.0, 61.0, 93.0, 112.0, 143.0, 115.0, 86.0, 68.0, 42.0, 25.0, 19.0, 15.0, 11.0, 9.0, 7.0, 4.0, 5.0, 3.0, 6.0, 4.0, 1.0, 4.0, 2.0, 1.0, 2.0, 2.0, 2.0, 0.0, 1.0, 2.0, 3.0, 1.0, 0.0, 2.0, 0.0, 2.0], "bins": [-22.497350692749023, -21.807695388793945, -21.118038177490234, -20.428382873535156, -19.738727569580078, -19.049072265625, -18.35941505432129, -17.66975975036621, -16.980104446411133, -16.290449142456055, -15.60079288482666, -14.911136627197266, -14.221481323242188, -13.531825065612793, -12.842168807983398, -12.15251350402832, -11.462857246398926, -10.773200988769531, -10.083545684814453, -9.393889427185059, -8.70423412322998, -8.014577865600586, -7.32492208480835, -6.635266304016113, -5.945610523223877, -5.255954742431641, -4.566298961639404, -3.876642942428589, -3.1869871616363525, -2.497331380844116, -1.8076753616333008, -1.1180195808410645, -0.4283638000488281, 0.261292040348053, 0.9509478807449341, 1.64060378074646, 2.3302595615386963, 3.0199153423309326, 3.709571361541748, 4.399227142333984, 5.088882923126221, 5.778538703918457, 6.468194484710693, 7.15785026550293, 7.847506523132324, 8.537161827087402, 9.226818084716797, 9.916473388671875, 10.60612964630127, 11.295785903930664, 11.985441207885742, 12.675097465515137, 13.364752769470215, 14.05440902709961, 14.744064331054688, 15.433720588684082, 16.123376846313477, 16.813032150268555, 17.502689361572266, 18.192344665527344, 18.881999969482422, 19.5716552734375, 20.26131248474121, 20.95096778869629, 21.640623092651367]}, "gradients/encoder.encoder.layers.22.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 3.0, 4.0, 1.0, 1.0, 4.0, 5.0, 9.0, 10.0, 7.0, 18.0, 32.0, 43.0, 91.0, 103.0, 228.0, 433.0, 901.0, 2067.0, 6022.0, 25759.0, 3098938.0, 1028853.0, 21645.0, 5324.0, 1841.0, 887.0, 432.0, 243.0, 120.0, 86.0, 61.0, 47.0, 29.0, 7.0, 8.0, 13.0, 5.0, 3.0, 4.0, 3.0, 4.0, 2.0, 1.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.455078125, -3.335845947265625, -3.21661376953125, -3.097381591796875, -2.9781494140625, -2.858917236328125, -2.73968505859375, -2.620452880859375, -2.501220703125, -2.381988525390625, -2.26275634765625, -2.143524169921875, -2.0242919921875, -1.905059814453125, -1.78582763671875, -1.666595458984375, -1.54736328125, -1.428131103515625, -1.30889892578125, -1.189666748046875, -1.0704345703125, -0.951202392578125, -0.83197021484375, -0.712738037109375, -0.593505859375, -0.474273681640625, -0.35504150390625, -0.235809326171875, -0.1165771484375, 0.002655029296875, 0.12188720703125, 0.241119384765625, 0.3603515625, 0.479583740234375, 0.59881591796875, 0.718048095703125, 0.8372802734375, 0.956512451171875, 1.07574462890625, 1.194976806640625, 1.314208984375, 1.433441162109375, 1.55267333984375, 1.671905517578125, 1.7911376953125, 1.910369873046875, 2.02960205078125, 2.148834228515625, 2.26806640625, 2.387298583984375, 2.50653076171875, 2.625762939453125, 2.7449951171875, 2.864227294921875, 2.98345947265625, 3.102691650390625, 3.221923828125, 3.341156005859375, 3.46038818359375, 3.579620361328125, 3.6988525390625, 3.818084716796875, 3.93731689453125, 4.056549072265625, 4.17578125]}, "gradients/encoder.encoder.layers.22.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 2.0, 1.0, 0.0, 1.0, 5.0, 3.0, 3.0, 9.0, 6.0, 17.0, 12.0, 13.0, 17.0, 34.0, 100.0, 393.0, 252.0, 52.0, 23.0, 19.0, 12.0, 12.0, 6.0, 5.0, 5.0, 6.0, 1.0, 3.0, 0.0, 1.0, 3.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.3828125, -0.3699302673339844, -0.35704803466796875, -0.3441658020019531, -0.3312835693359375, -0.3184013366699219, -0.30551910400390625, -0.2926368713378906, -0.279754638671875, -0.2668724060058594, -0.25399017333984375, -0.24110794067382812, -0.2282257080078125, -0.21534347534179688, -0.20246124267578125, -0.18957901000976562, -0.17669677734375, -0.16381454467773438, -0.15093231201171875, -0.13805007934570312, -0.1251678466796875, -0.11228561401367188, -0.09940338134765625, -0.08652114868164062, -0.073638916015625, -0.060756683349609375, -0.04787445068359375, -0.034992218017578125, -0.0221099853515625, -0.009227752685546875, 0.00365447998046875, 0.016536712646484375, 0.0294189453125, 0.042301177978515625, 0.05518341064453125, 0.06806564331054688, 0.0809478759765625, 0.09383010864257812, 0.10671234130859375, 0.11959457397460938, 0.132476806640625, 0.14535903930664062, 0.15824127197265625, 0.17112350463867188, 0.1840057373046875, 0.19688796997070312, 0.20977020263671875, 0.22265243530273438, 0.23553466796875, 0.24841690063476562, 0.26129913330078125, 0.2741813659667969, 0.2870635986328125, 0.2999458312988281, 0.31282806396484375, 0.3257102966308594, 0.338592529296875, 0.3514747619628906, 0.36435699462890625, 0.3772392272949219, 0.3901214599609375, 0.4030036926269531, 0.41588592529296875, 0.4287681579589844, 0.441650390625]}, "gradients/encoder.encoder.layers.22.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 2.0, 1.0, 1.0, 2.0, 6.0, 3.0, 8.0, 13.0, 11.0, 14.0, 25.0, 45.0, 58.0, 127.0, 261.0, 539.0, 1376.0, 4078.0, 21077.0, 551520.0, 3571400.0, 35365.0, 5498.0, 1610.0, 636.0, 264.0, 157.0, 74.0, 47.0, 22.0, 18.0, 10.0, 8.0, 4.0, 6.0, 1.0, 5.0, 0.0, 2.0, 1.0, 3.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.544921875, -1.478118896484375, -1.41131591796875, -1.344512939453125, -1.2777099609375, -1.210906982421875, -1.14410400390625, -1.077301025390625, -1.010498046875, -0.943695068359375, -0.87689208984375, -0.810089111328125, -0.7432861328125, -0.676483154296875, -0.60968017578125, -0.542877197265625, -0.47607421875, -0.409271240234375, -0.34246826171875, -0.275665283203125, -0.2088623046875, -0.142059326171875, -0.07525634765625, -0.008453369140625, 0.058349609375, 0.125152587890625, 0.19195556640625, 0.258758544921875, 0.3255615234375, 0.392364501953125, 0.45916748046875, 0.525970458984375, 0.5927734375, 0.659576416015625, 0.72637939453125, 0.793182373046875, 0.8599853515625, 0.926788330078125, 0.99359130859375, 1.060394287109375, 1.127197265625, 1.194000244140625, 1.26080322265625, 1.327606201171875, 1.3944091796875, 1.461212158203125, 1.52801513671875, 1.594818115234375, 1.66162109375, 1.728424072265625, 1.79522705078125, 1.862030029296875, 1.9288330078125, 1.995635986328125, 2.06243896484375, 2.129241943359375, 2.196044921875, 2.262847900390625, 2.32965087890625, 2.396453857421875, 2.4632568359375, 2.530059814453125, 2.59686279296875, 2.663665771484375, 2.73046875]}, "gradients/encoder.encoder.layers.22.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 2.0, 0.0, 1.0, 1.0, 1.0, 1.0, 4.0, 6.0, 2.0, 5.0, 13.0, 15.0, 12.0, 14.0, 38.0, 61.0, 68.0, 113.0, 237.0, 613.0, 1817.0, 579.0, 185.0, 103.0, 55.0, 38.0, 25.0, 17.0, 14.0, 11.0, 12.0, 2.0, 8.0, 2.0, 4.0, 2.0, 1.0, 4.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.4072265625, -0.3960838317871094, -0.38494110107421875, -0.3737983703613281, -0.3626556396484375, -0.3515129089355469, -0.34037017822265625, -0.3292274475097656, -0.318084716796875, -0.3069419860839844, -0.29579925537109375, -0.2846565246582031, -0.2735137939453125, -0.2623710632324219, -0.25122833251953125, -0.24008560180664062, -0.22894287109375, -0.21780014038085938, -0.20665740966796875, -0.19551467895507812, -0.1843719482421875, -0.17322921752929688, -0.16208648681640625, -0.15094375610351562, -0.139801025390625, -0.12865829467773438, -0.11751556396484375, -0.10637283325195312, -0.0952301025390625, -0.08408737182617188, -0.07294464111328125, -0.061801910400390625, -0.0506591796875, -0.039516448974609375, -0.02837371826171875, -0.017230987548828125, -0.0060882568359375, 0.005054473876953125, 0.01619720458984375, 0.027339935302734375, 0.038482666015625, 0.049625396728515625, 0.06076812744140625, 0.07191085815429688, 0.0830535888671875, 0.09419631958007812, 0.10533905029296875, 0.11648178100585938, 0.12762451171875, 0.13876724243164062, 0.14990997314453125, 0.16105270385742188, 0.1721954345703125, 0.18333816528320312, 0.19448089599609375, 0.20562362670898438, 0.216766357421875, 0.22790908813476562, 0.23905181884765625, 0.2501945495605469, 0.2613372802734375, 0.2724800109863281, 0.28362274169921875, 0.2947654724121094, 0.305908203125]}, "gradients/encoder.encoder.layers.22.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 3.0, 2.0, 2.0, 4.0, 5.0, 9.0, 15.0, 152.0, 802.0, 18.0, 1.0, 1.0, 2.0, 1.0, 0.0, 1.0], "bins": [-20.02397346496582, -19.668811798095703, -19.313648223876953, -18.958486557006836, -18.60332489013672, -18.24816131591797, -17.89299964904785, -17.537837982177734, -17.182674407958984, -16.827512741088867, -16.472349166870117, -16.1171875, -15.762024879455566, -15.406862258911133, -15.051700592041016, -14.696537971496582, -14.341376304626465, -13.986213684082031, -13.631052017211914, -13.27588939666748, -12.920726776123047, -12.56556510925293, -12.210402488708496, -11.855239868164062, -11.500078201293945, -11.144915580749512, -10.789753913879395, -10.434591293334961, -10.079428672790527, -9.724266052246094, -9.369104385375977, -9.013941764831543, -8.65877914428711, -8.303616523742676, -7.9484543800354, -7.593292236328125, -7.238129615783691, -6.882967472076416, -6.527805328369141, -6.172642707824707, -5.817480087280273, -5.462317943572998, -5.1071553230285645, -4.751993179321289, -4.3968305587768555, -4.04166841506958, -3.6865062713623047, -3.33134388923645, -2.9761815071105957, -2.621019124984741, -2.2658567428588867, -1.9106945991516113, -1.5555322170257568, -1.2003698348999023, -0.845207691192627, -0.49004530906677246, -0.13488292694091797, 0.22027939558029175, 0.5754417181015015, 0.9306039810180664, 1.285766363143921, 1.6409287452697754, 1.9960908889770508, 2.3512532711029053, 2.7064156532287598]}, "gradients/encoder.encoder.layers.22.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 2.0, 1.0, 1.0, 2.0, 2.0, 3.0, 2.0, 3.0, 3.0, 8.0, 6.0, 12.0, 11.0, 13.0, 23.0, 21.0, 36.0, 42.0, 48.0, 56.0, 61.0, 50.0, 56.0, 72.0, 68.0, 73.0, 54.0, 47.0, 57.0, 34.0, 29.0, 25.0, 20.0, 20.0, 9.0, 7.0, 12.0, 5.0, 5.0, 1.0, 2.0, 4.0, 1.0, 1.0, 2.0, 2.0, 3.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-1.4993104934692383, -1.4471986293792725, -1.3950867652893066, -1.3429749011993408, -1.2908631563186646, -1.2387512922286987, -1.186639428138733, -1.134527564048767, -1.0824156999588013, -1.0303038358688354, -0.9781920313835144, -0.9260801672935486, -0.8739683032035828, -0.8218564987182617, -0.7697446346282959, -0.7176327705383301, -0.665520966053009, -0.6134091019630432, -0.5612972974777222, -0.5091854333877563, -0.4570735692977905, -0.4049617350101471, -0.35284990072250366, -0.30073803663253784, -0.2486262023448944, -0.19651435315608978, -0.14440250396728516, -0.09229066967964172, -0.0401788204908371, 0.01193302869796753, 0.06404486298561096, 0.11615672707557678, 0.16826856136322021, 0.22038041055202484, 0.27249225974082947, 0.3246040940284729, 0.3767159581184387, 0.42882779240608215, 0.4809396266937256, 0.5330514907836914, 0.5851633548736572, 0.637275218963623, 0.6893870234489441, 0.7414988875389099, 0.7936107516288757, 0.8457225561141968, 0.8978344202041626, 0.9499462842941284, 1.0020580291748047, 1.0541698932647705, 1.1062817573547363, 1.1583936214447021, 1.2105053663253784, 1.2626172304153442, 1.31472909450531, 1.3668409585952759, 1.4189528226852417, 1.4710646867752075, 1.5231765508651733, 1.5752882957458496, 1.6274001598358154, 1.6795120239257812, 1.731623888015747, 1.783735752105713, 1.8358476161956787]}, "gradients/encoder.encoder.layers.22.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0, 0.0, 3.0, 4.0, 6.0, 5.0, 15.0, 9.0, 17.0, 29.0, 34.0, 43.0, 82.0, 88.0, 136.0, 272.0, 461.0, 766.0, 1510.0, 3280.0, 8532.0, 29035.0, 580104.0, 384297.0, 25708.0, 7742.0, 3082.0, 1436.0, 740.0, 396.0, 243.0, 157.0, 101.0, 65.0, 48.0, 31.0, 25.0, 18.0, 14.0, 9.0, 8.0, 5.0, 2.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 2.0, 1.0], "bins": [-2.2890625, -2.2196044921875, -2.150146484375, -2.0806884765625, -2.01123046875, -1.9417724609375, -1.872314453125, -1.8028564453125, -1.7333984375, -1.6639404296875, -1.594482421875, -1.5250244140625, -1.45556640625, -1.3861083984375, -1.316650390625, -1.2471923828125, -1.177734375, -1.1082763671875, -1.038818359375, -0.9693603515625, -0.89990234375, -0.8304443359375, -0.760986328125, -0.6915283203125, -0.6220703125, -0.5526123046875, -0.483154296875, -0.4136962890625, -0.34423828125, -0.2747802734375, -0.205322265625, -0.1358642578125, -0.06640625, 0.0030517578125, 0.072509765625, 0.1419677734375, 0.21142578125, 0.2808837890625, 0.350341796875, 0.4197998046875, 0.4892578125, 0.5587158203125, 0.628173828125, 0.6976318359375, 0.76708984375, 0.8365478515625, 0.906005859375, 0.9754638671875, 1.044921875, 1.1143798828125, 1.183837890625, 1.2532958984375, 1.32275390625, 1.3922119140625, 1.461669921875, 1.5311279296875, 1.6005859375, 1.6700439453125, 1.739501953125, 1.8089599609375, 1.87841796875, 1.9478759765625, 2.017333984375, 2.0867919921875, 2.15625]}, "gradients/encoder.encoder.layers.22.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 3.0, 1.0, 0.0, 0.0, 3.0, 3.0, 3.0, 4.0, 9.0, 7.0, 12.0, 14.0, 19.0, 30.0, 128.0, 341.0, 269.0, 92.0, 24.0, 12.0, 13.0, 7.0, 8.0, 4.0, 3.0, 0.0, 3.0, 3.0, 0.0, 2.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.397705078125, -0.3843536376953125, -0.371002197265625, -0.3576507568359375, -0.34429931640625, -0.3309478759765625, -0.317596435546875, -0.3042449951171875, -0.2908935546875, -0.2775421142578125, -0.264190673828125, -0.2508392333984375, -0.23748779296875, -0.2241363525390625, -0.210784912109375, -0.1974334716796875, -0.18408203125, -0.1707305908203125, -0.157379150390625, -0.1440277099609375, -0.13067626953125, -0.1173248291015625, -0.103973388671875, -0.0906219482421875, -0.0772705078125, -0.0639190673828125, -0.050567626953125, -0.0372161865234375, -0.02386474609375, -0.0105133056640625, 0.002838134765625, 0.0161895751953125, 0.029541015625, 0.0428924560546875, 0.056243896484375, 0.0695953369140625, 0.08294677734375, 0.0962982177734375, 0.109649658203125, 0.1230010986328125, 0.1363525390625, 0.1497039794921875, 0.163055419921875, 0.1764068603515625, 0.18975830078125, 0.2031097412109375, 0.216461181640625, 0.2298126220703125, 0.2431640625, 0.2565155029296875, 0.269866943359375, 0.2832183837890625, 0.29656982421875, 0.3099212646484375, 0.323272705078125, 0.3366241455078125, 0.3499755859375, 0.3633270263671875, 0.376678466796875, 0.3900299072265625, 0.40338134765625, 0.4167327880859375, 0.430084228515625, 0.4434356689453125, 0.456787109375]}, "gradients/encoder.encoder.layers.22.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 3.0, 2.0, 6.0, 2.0, 4.0, 8.0, 8.0, 7.0, 12.0, 19.0, 21.0, 24.0, 44.0, 44.0, 72.0, 85.0, 139.0, 195.0, 330.0, 559.0, 1093.0, 2480.0, 7001.0, 24795.0, 113666.0, 529330.0, 294299.0, 54141.0, 13145.0, 3884.0, 1481.0, 659.0, 340.0, 207.0, 126.0, 100.0, 59.0, 40.0, 45.0, 22.0, 16.0, 17.0, 8.0, 9.0, 7.0, 7.0, 1.0, 5.0, 1.0, 2.0], "bins": [-1.205078125, -1.1747817993164062, -1.1444854736328125, -1.1141891479492188, -1.083892822265625, -1.0535964965820312, -1.0233001708984375, -0.9930038452148438, -0.96270751953125, -0.9324111938476562, -0.9021148681640625, -0.8718185424804688, -0.841522216796875, -0.8112258911132812, -0.7809295654296875, -0.7506332397460938, -0.7203369140625, -0.6900405883789062, -0.6597442626953125, -0.6294479370117188, -0.599151611328125, -0.5688552856445312, -0.5385589599609375, -0.5082626342773438, -0.47796630859375, -0.44766998291015625, -0.4173736572265625, -0.38707733154296875, -0.356781005859375, -0.32648468017578125, -0.2961883544921875, -0.26589202880859375, -0.235595703125, -0.20529937744140625, -0.1750030517578125, -0.14470672607421875, -0.114410400390625, -0.08411407470703125, -0.0538177490234375, -0.02352142333984375, 0.00677490234375, 0.03707122802734375, 0.0673675537109375, 0.09766387939453125, 0.127960205078125, 0.15825653076171875, 0.1885528564453125, 0.21884918212890625, 0.2491455078125, 0.27944183349609375, 0.3097381591796875, 0.34003448486328125, 0.370330810546875, 0.40062713623046875, 0.4309234619140625, 0.46121978759765625, 0.49151611328125, 0.5218124389648438, 0.5521087646484375, 0.5824050903320312, 0.612701416015625, 0.6429977416992188, 0.6732940673828125, 0.7035903930664062, 0.73388671875]}, "gradients/encoder.encoder.layers.22.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 3.0, 2.0, 3.0, 2.0, 5.0, 4.0, 4.0, 7.0, 9.0, 12.0, 11.0, 17.0, 21.0, 26.0, 27.0, 34.0, 39.0, 28.0, 32.0, 37.0, 52.0, 46.0, 36.0, 51.0, 37.0, 45.0, 52.0, 43.0, 50.0, 40.0, 39.0, 27.0, 34.0, 20.0, 20.0, 18.0, 23.0, 13.0, 14.0, 9.0, 5.0, 7.0, 2.0, 7.0, 3.0, 1.0, 1.0], "bins": [-1.0517578125, -1.0261688232421875, -1.000579833984375, -0.9749908447265625, -0.94940185546875, -0.9238128662109375, -0.898223876953125, -0.8726348876953125, -0.8470458984375, -0.8214569091796875, -0.795867919921875, -0.7702789306640625, -0.74468994140625, -0.7191009521484375, -0.693511962890625, -0.6679229736328125, -0.642333984375, -0.6167449951171875, -0.591156005859375, -0.5655670166015625, -0.53997802734375, -0.5143890380859375, -0.488800048828125, -0.4632110595703125, -0.4376220703125, -0.4120330810546875, -0.386444091796875, -0.3608551025390625, -0.33526611328125, -0.3096771240234375, -0.284088134765625, -0.2584991455078125, -0.23291015625, -0.2073211669921875, -0.181732177734375, -0.1561431884765625, -0.13055419921875, -0.1049652099609375, -0.079376220703125, -0.0537872314453125, -0.0281982421875, -0.0026092529296875, 0.022979736328125, 0.0485687255859375, 0.07415771484375, 0.0997467041015625, 0.125335693359375, 0.1509246826171875, 0.176513671875, 0.2021026611328125, 0.227691650390625, 0.2532806396484375, 0.27886962890625, 0.3044586181640625, 0.330047607421875, 0.3556365966796875, 0.3812255859375, 0.4068145751953125, 0.432403564453125, 0.4579925537109375, 0.48358154296875, 0.5091705322265625, 0.534759521484375, 0.5603485107421875, 0.5859375]}, "gradients/encoder.encoder.layers.22.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 3.0, 1.0, 3.0, 1.0, 2.0, 5.0, 6.0, 10.0, 16.0, 23.0, 33.0, 64.0, 157.0, 379.0, 1118.0, 6428.0, 926036.0, 109851.0, 3212.0, 695.0, 250.0, 124.0, 49.0, 30.0, 27.0, 12.0, 7.0, 5.0, 4.0, 1.0, 6.0, 1.0, 2.0, 2.0, 2.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.314453125, -3.216522216796875, -3.11859130859375, -3.020660400390625, -2.9227294921875, -2.824798583984375, -2.72686767578125, -2.628936767578125, -2.531005859375, -2.433074951171875, -2.33514404296875, -2.237213134765625, -2.1392822265625, -2.041351318359375, -1.94342041015625, -1.845489501953125, -1.74755859375, -1.649627685546875, -1.55169677734375, -1.453765869140625, -1.3558349609375, -1.257904052734375, -1.15997314453125, -1.062042236328125, -0.964111328125, -0.866180419921875, -0.76824951171875, -0.670318603515625, -0.5723876953125, -0.474456787109375, -0.37652587890625, -0.278594970703125, -0.1806640625, -0.082733154296875, 0.01519775390625, 0.113128662109375, 0.2110595703125, 0.308990478515625, 0.40692138671875, 0.504852294921875, 0.602783203125, 0.700714111328125, 0.79864501953125, 0.896575927734375, 0.9945068359375, 1.092437744140625, 1.19036865234375, 1.288299560546875, 1.38623046875, 1.484161376953125, 1.58209228515625, 1.680023193359375, 1.7779541015625, 1.875885009765625, 1.97381591796875, 2.071746826171875, 2.169677734375, 2.267608642578125, 2.36553955078125, 2.463470458984375, 2.5614013671875, 2.659332275390625, 2.75726318359375, 2.855194091796875, 2.953125]}, "gradients/encoder.encoder.layers.22.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 2.0, 6.0, 8.0, 6.0, 9.0, 11.0, 14.0, 24.0, 57.0, 63.0, 149.0, 298.0, 159.0, 69.0, 46.0, 25.0, 15.0, 11.0, 5.0, 8.0, 3.0, 6.0, 3.0, 4.0, 3.0, 4.0, 2.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0003046989440917969, -0.0002963319420814514, -0.00028796494007110596, -0.0002795979380607605, -0.00027123093605041504, -0.0002628639340400696, -0.0002544969320297241, -0.00024612993001937866, -0.0002377629280090332, -0.00022939592599868774, -0.00022102892398834229, -0.00021266192197799683, -0.00020429491996765137, -0.0001959279179573059, -0.00018756091594696045, -0.000179193913936615, -0.00017082691192626953, -0.00016245990991592407, -0.0001540929079055786, -0.00014572590589523315, -0.0001373589038848877, -0.00012899190187454224, -0.00012062489986419678, -0.00011225789785385132, -0.00010389089584350586, -9.55238938331604e-05, -8.715689182281494e-05, -7.878988981246948e-05, -7.042288780212402e-05, -6.205588579177856e-05, -5.3688883781433105e-05, -4.5321881771087646e-05, -3.695487976074219e-05, -2.858787775039673e-05, -2.022087574005127e-05, -1.185387372970581e-05, -3.4868717193603516e-06, 4.880130290985107e-06, 1.3247132301330566e-05, 2.1614134311676025e-05, 2.9981136322021484e-05, 3.834813833236694e-05, 4.67151403427124e-05, 5.508214235305786e-05, 6.344914436340332e-05, 7.181614637374878e-05, 8.018314838409424e-05, 8.85501503944397e-05, 9.691715240478516e-05, 0.00010528415441513062, 0.00011365115642547607, 0.00012201815843582153, 0.000130385160446167, 0.00013875216245651245, 0.0001471191644668579, 0.00015548616647720337, 0.00016385316848754883, 0.0001722201704978943, 0.00018058717250823975, 0.0001889541745185852, 0.00019732117652893066, 0.00020568817853927612, 0.00021405518054962158, 0.00022242218255996704, 0.0002307891845703125]}, "gradients/encoder.encoder.layers.22.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0, 1.0, 4.0, 1.0, 4.0, 6.0, 12.0, 15.0, 36.0, 47.0, 91.0, 208.0, 464.0, 1209.0, 6105.0, 189533.0, 835427.0, 12638.0, 1755.0, 525.0, 243.0, 106.0, 65.0, 25.0, 17.0, 6.0, 10.0, 2.0, 2.0, 3.0, 3.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-2.251953125, -2.19134521484375, -2.1307373046875, -2.07012939453125, -2.009521484375, -1.94891357421875, -1.8883056640625, -1.82769775390625, -1.76708984375, -1.70648193359375, -1.6458740234375, -1.58526611328125, -1.524658203125, -1.46405029296875, -1.4034423828125, -1.34283447265625, -1.2822265625, -1.22161865234375, -1.1610107421875, -1.10040283203125, -1.039794921875, -0.97918701171875, -0.9185791015625, -0.85797119140625, -0.79736328125, -0.73675537109375, -0.6761474609375, -0.61553955078125, -0.554931640625, -0.49432373046875, -0.4337158203125, -0.37310791015625, -0.3125, -0.25189208984375, -0.1912841796875, -0.13067626953125, -0.070068359375, -0.00946044921875, 0.0511474609375, 0.11175537109375, 0.17236328125, 0.23297119140625, 0.2935791015625, 0.35418701171875, 0.414794921875, 0.47540283203125, 0.5360107421875, 0.59661865234375, 0.6572265625, 0.71783447265625, 0.7784423828125, 0.83905029296875, 0.899658203125, 0.96026611328125, 1.0208740234375, 1.08148193359375, 1.14208984375, 1.20269775390625, 1.2633056640625, 1.32391357421875, 1.384521484375, 1.44512939453125, 1.5057373046875, 1.56634521484375, 1.626953125]}, "gradients/encoder.encoder.layers.22.attention.q_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 0.0, 3.0, 7.0, 9.0, 16.0, 34.0, 51.0, 78.0, 150.0, 196.0, 161.0, 131.0, 67.0, 38.0, 28.0, 18.0, 10.0, 8.0, 4.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.32421875, -1.283966064453125, -1.24371337890625, -1.203460693359375, -1.1632080078125, -1.122955322265625, -1.08270263671875, -1.042449951171875, -1.002197265625, -0.961944580078125, -0.92169189453125, -0.881439208984375, -0.8411865234375, -0.800933837890625, -0.76068115234375, -0.720428466796875, -0.68017578125, -0.639923095703125, -0.59967041015625, -0.559417724609375, -0.5191650390625, -0.478912353515625, -0.43865966796875, -0.398406982421875, -0.358154296875, -0.317901611328125, -0.27764892578125, -0.237396240234375, -0.1971435546875, -0.156890869140625, -0.11663818359375, -0.076385498046875, -0.0361328125, 0.004119873046875, 0.04437255859375, 0.084625244140625, 0.1248779296875, 0.165130615234375, 0.20538330078125, 0.245635986328125, 0.285888671875, 0.326141357421875, 0.36639404296875, 0.406646728515625, 0.4468994140625, 0.487152099609375, 0.52740478515625, 0.567657470703125, 0.60791015625, 0.648162841796875, 0.68841552734375, 0.728668212890625, 0.7689208984375, 0.809173583984375, 0.84942626953125, 0.889678955078125, 0.929931640625, 0.970184326171875, 1.01043701171875, 1.050689697265625, 1.0909423828125, 1.131195068359375, 1.17144775390625, 1.211700439453125, 1.251953125]}, "gradients/encoder.encoder.layers.22.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 1.0, 3.0, 9.0, 57.0, 775.0, 148.0, 14.0, 6.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-24.953643798828125, -23.853364944458008, -22.75308609008789, -21.65280532836914, -20.552526473999023, -19.452247619628906, -18.351966857910156, -17.25168800354004, -16.151409149169922, -15.051130294799805, -13.950850486755371, -12.850570678710938, -11.75029182434082, -10.650012969970703, -9.54973316192627, -8.449453353881836, -7.349174499511719, -6.248895168304443, -5.148615837097168, -4.048336505889893, -2.948057174682617, -1.8477778434753418, -0.7474985122680664, 0.352780818939209, 1.4530601501464844, 2.5533394813537598, 3.653618812561035, 4.7538981437683105, 5.854177474975586, 6.954456806182861, 8.054736137390137, 9.15501594543457, 10.255294799804688, 11.355573654174805, 12.455853462219238, 13.556133270263672, 14.656412124633789, 15.756690979003906, 16.856971740722656, 17.957250595092773, 19.05752944946289, 20.157808303833008, 21.258087158203125, 22.358367919921875, 23.458646774291992, 24.55892562866211, 25.65920639038086, 26.759485244750977, 27.859764099121094, 28.96004295349121, 30.060321807861328, 31.160602569580078, 32.26087951660156, 33.36116027832031, 34.46144104003906, 35.56171798706055, 36.6619987487793, 37.76227951049805, 38.86255645751953, 39.96283721923828, 41.06311798095703, 42.163394927978516, 43.263675689697266, 44.36395263671875, 45.4642333984375]}, "gradients/encoder.encoder.layers.22.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 2.0, 2.0, 1.0, 2.0, 5.0, 1.0, 2.0, 3.0, 2.0, 4.0, 4.0, 6.0, 1.0, 11.0, 11.0, 26.0, 33.0, 42.0, 57.0, 57.0, 90.0, 96.0, 108.0, 92.0, 72.0, 65.0, 52.0, 41.0, 31.0, 25.0, 14.0, 11.0, 7.0, 3.0, 6.0, 8.0, 2.0, 6.0, 2.0, 3.0, 4.0, 2.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0], "bins": [-10.809890747070312, -10.494063377380371, -10.17823600769043, -9.862409591674805, -9.546582221984863, -9.230754852294922, -8.914928436279297, -8.599101066589355, -8.283273696899414, -7.967446327209473, -7.6516194343566895, -7.335792541503906, -7.019965171813965, -6.704137802124023, -6.38831090927124, -6.072484016418457, -5.756656646728516, -5.440829277038574, -5.125002384185791, -4.809175491333008, -4.493348121643066, -4.177520751953125, -3.861693859100342, -3.5458667278289795, -3.230039596557617, -2.914212465286255, -2.5983853340148926, -2.2825582027435303, -1.966731071472168, -1.6509039402008057, -1.3350768089294434, -1.019249677658081, -0.7034215927124023, -0.38759446144104004, -0.07176733016967773, 0.24405980110168457, 0.5598869323730469, 0.8757140636444092, 1.1915411949157715, 1.5073683261871338, 1.823195457458496, 2.1390225887298584, 2.4548497200012207, 2.770676851272583, 3.0865039825439453, 3.4023311138153076, 3.71815824508667, 4.033985137939453, 4.3498125076293945, 4.665639877319336, 4.981466770172119, 5.297293663024902, 5.613121032714844, 5.928948402404785, 6.244775295257568, 6.560602188110352, 6.876429557800293, 7.192256927490234, 7.508083820343018, 7.823910713195801, 8.139738082885742, 8.455565452575684, 8.771392822265625, 9.08721923828125, 9.403046607971191]}, "gradients/encoder.encoder.layers.21.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 2.0, 1.0, 5.0, 4.0, 1.0, 9.0, 8.0, 4.0, 5.0, 10.0, 14.0, 11.0, 18.0, 26.0, 47.0, 52.0, 83.0, 99.0, 214.0, 370.0, 736.0, 1884.0, 5449.0, 56606.0, 4106014.0, 16871.0, 3105.0, 1114.0, 567.0, 321.0, 190.0, 156.0, 91.0, 58.0, 32.0, 23.0, 21.0, 11.0, 12.0, 5.0, 4.0, 9.0, 3.0, 5.0, 6.0, 8.0, 4.0, 1.0, 2.0, 3.0, 3.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-6.7578125, -6.5347900390625, -6.311767578125, -6.0887451171875, -5.86572265625, -5.6427001953125, -5.419677734375, -5.1966552734375, -4.9736328125, -4.7506103515625, -4.527587890625, -4.3045654296875, -4.08154296875, -3.8585205078125, -3.635498046875, -3.4124755859375, -3.189453125, -2.9664306640625, -2.743408203125, -2.5203857421875, -2.29736328125, -2.0743408203125, -1.851318359375, -1.6282958984375, -1.4052734375, -1.1822509765625, -0.959228515625, -0.7362060546875, -0.51318359375, -0.2901611328125, -0.067138671875, 0.1558837890625, 0.37890625, 0.6019287109375, 0.824951171875, 1.0479736328125, 1.27099609375, 1.4940185546875, 1.717041015625, 1.9400634765625, 2.1630859375, 2.3861083984375, 2.609130859375, 2.8321533203125, 3.05517578125, 3.2781982421875, 3.501220703125, 3.7242431640625, 3.947265625, 4.1702880859375, 4.393310546875, 4.6163330078125, 4.83935546875, 5.0623779296875, 5.285400390625, 5.5084228515625, 5.7314453125, 5.9544677734375, 6.177490234375, 6.4005126953125, 6.62353515625, 6.8465576171875, 7.069580078125, 7.2926025390625, 7.515625]}, "gradients/encoder.encoder.layers.21.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 2.0, 6.0, 2.0, 6.0, 19.0, 20.0, 31.0, 72.0, 237.0, 313.0, 191.0, 52.0, 26.0, 9.0, 9.0, 3.0, 7.0, 1.0, 1.0, 2.0, 1.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.45068359375, -0.435821533203125, -0.42095947265625, -0.406097412109375, -0.3912353515625, -0.376373291015625, -0.36151123046875, -0.346649169921875, -0.331787109375, -0.316925048828125, -0.30206298828125, -0.287200927734375, -0.2723388671875, -0.257476806640625, -0.24261474609375, -0.227752685546875, -0.212890625, -0.198028564453125, -0.18316650390625, -0.168304443359375, -0.1534423828125, -0.138580322265625, -0.12371826171875, -0.108856201171875, -0.093994140625, -0.079132080078125, -0.06427001953125, -0.049407958984375, -0.0345458984375, -0.019683837890625, -0.00482177734375, 0.010040283203125, 0.02490234375, 0.039764404296875, 0.05462646484375, 0.069488525390625, 0.0843505859375, 0.099212646484375, 0.11407470703125, 0.128936767578125, 0.143798828125, 0.158660888671875, 0.17352294921875, 0.188385009765625, 0.2032470703125, 0.218109130859375, 0.23297119140625, 0.247833251953125, 0.2626953125, 0.277557373046875, 0.29241943359375, 0.307281494140625, 0.3221435546875, 0.337005615234375, 0.35186767578125, 0.366729736328125, 0.381591796875, 0.396453857421875, 0.41131591796875, 0.426177978515625, 0.4410400390625, 0.455902099609375, 0.47076416015625, 0.485626220703125, 0.50048828125]}, "gradients/encoder.encoder.layers.21.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 3.0, 6.0, 3.0, 11.0, 5.0, 10.0, 19.0, 21.0, 44.0, 62.0, 151.0, 487.0, 3568.0, 1929681.0, 2255723.0, 3576.0, 561.0, 160.0, 70.0, 50.0, 21.0, 17.0, 7.0, 6.0, 7.0, 3.0, 4.0, 1.0, 4.0, 1.0, 4.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-6.96875, -6.7637939453125, -6.558837890625, -6.3538818359375, -6.14892578125, -5.9439697265625, -5.739013671875, -5.5340576171875, -5.3291015625, -5.1241455078125, -4.919189453125, -4.7142333984375, -4.50927734375, -4.3043212890625, -4.099365234375, -3.8944091796875, -3.689453125, -3.4844970703125, -3.279541015625, -3.0745849609375, -2.86962890625, -2.6646728515625, -2.459716796875, -2.2547607421875, -2.0498046875, -1.8448486328125, -1.639892578125, -1.4349365234375, -1.22998046875, -1.0250244140625, -0.820068359375, -0.6151123046875, -0.41015625, -0.2052001953125, -0.000244140625, 0.2047119140625, 0.40966796875, 0.6146240234375, 0.819580078125, 1.0245361328125, 1.2294921875, 1.4344482421875, 1.639404296875, 1.8443603515625, 2.04931640625, 2.2542724609375, 2.459228515625, 2.6641845703125, 2.869140625, 3.0740966796875, 3.279052734375, 3.4840087890625, 3.68896484375, 3.8939208984375, 4.098876953125, 4.3038330078125, 4.5087890625, 4.7137451171875, 4.918701171875, 5.1236572265625, 5.32861328125, 5.5335693359375, 5.738525390625, 5.9434814453125, 6.1484375]}, "gradients/encoder.encoder.layers.21.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 5.0, 5.0, 1.0, 5.0, 5.0, 11.0, 12.0, 17.0, 38.0, 40.0, 66.0, 116.0, 333.0, 922.0, 1735.0, 388.0, 141.0, 74.0, 48.0, 32.0, 18.0, 16.0, 9.0, 10.0, 7.0, 3.0, 6.0, 6.0, 3.0, 2.0, 2.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.392578125, -0.3795814514160156, -0.36658477783203125, -0.3535881042480469, -0.3405914306640625, -0.3275947570800781, -0.31459808349609375, -0.3016014099121094, -0.288604736328125, -0.2756080627441406, -0.26261138916015625, -0.24961471557617188, -0.2366180419921875, -0.22362136840820312, -0.21062469482421875, -0.19762802124023438, -0.18463134765625, -0.17163467407226562, -0.15863800048828125, -0.14564132690429688, -0.1326446533203125, -0.11964797973632812, -0.10665130615234375, -0.09365463256835938, -0.080657958984375, -0.06766128540039062, -0.05466461181640625, -0.041667938232421875, -0.0286712646484375, -0.015674591064453125, -0.00267791748046875, 0.010318756103515625, 0.0233154296875, 0.036312103271484375, 0.04930877685546875, 0.062305450439453125, 0.0753021240234375, 0.08829879760742188, 0.10129547119140625, 0.11429214477539062, 0.127288818359375, 0.14028549194335938, 0.15328216552734375, 0.16627883911132812, 0.1792755126953125, 0.19227218627929688, 0.20526885986328125, 0.21826553344726562, 0.23126220703125, 0.24425888061523438, 0.25725555419921875, 0.2702522277832031, 0.2832489013671875, 0.2962455749511719, 0.30924224853515625, 0.3222389221191406, 0.335235595703125, 0.3482322692871094, 0.36122894287109375, 0.3742256164550781, 0.3872222900390625, 0.4002189636230469, 0.41321563720703125, 0.4262123107910156, 0.439208984375]}, "gradients/encoder.encoder.layers.21.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 1.0, 2.0, 0.0, 1.0, 4.0, 4.0, 15.0, 19.0, 87.0, 446.0, 326.0, 78.0, 14.0, 12.0, 4.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.9233076572418213, -2.795870304107666, -2.6684327125549316, -2.5409953594207764, -2.413558006286621, -2.2861204147338867, -2.1586830615997314, -2.031245708465576, -1.9038081169128418, -1.776370644569397, -1.6489331722259521, -1.5214958190917969, -1.394058346748352, -1.2666208744049072, -1.139183521270752, -1.0117460489273071, -0.8843085765838623, -0.7568711042404175, -0.6294336915016174, -0.5019962787628174, -0.37455880641937256, -0.24712133407592773, -0.11968392133712769, 0.007753491401672363, 0.1351909637451172, 0.2626284062862396, 0.39006584882736206, 0.5175032615661621, 0.6449407339096069, 0.7723782062530518, 0.8998156189918518, 1.0272530317306519, 1.1546907424926758, 1.2821282148361206, 1.4095656871795654, 1.5370030403137207, 1.6644405126571655, 1.7918779850006104, 1.9193153381347656, 2.0467529296875, 2.1741902828216553, 2.3016276359558105, 2.429065227508545, 2.5565025806427, 2.6839399337768555, 2.81137752532959, 2.938814878463745, 3.0662522315979004, 3.1936898231506348, 3.32112717628479, 3.4485647678375244, 3.5760021209716797, 3.703439712524414, 3.8308770656585693, 3.9583144187927246, 4.085752010345459, 4.213189125061035, 4.3406267166137695, 4.468063831329346, 4.59550142288208, 4.7229390144348145, 4.850376129150391, 4.977813720703125, 5.105251312255859, 5.232688903808594]}, "gradients/encoder.encoder.layers.21.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 1.0, 2.0, 1.0, 2.0, 3.0, 6.0, 7.0, 4.0, 10.0, 8.0, 12.0, 17.0, 34.0, 25.0, 41.0, 45.0, 74.0, 65.0, 64.0, 72.0, 72.0, 71.0, 65.0, 67.0, 54.0, 46.0, 39.0, 22.0, 21.0, 18.0, 8.0, 9.0, 5.0, 5.0, 9.0, 2.0, 1.0, 2.0, 4.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.55598783493042, -1.502063512802124, -1.4481391906738281, -1.3942149877548218, -1.3402906656265259, -1.28636634349823, -1.2324421405792236, -1.1785178184509277, -1.1245934963226318, -1.070669174194336, -1.01674485206604, -0.9628206491470337, -0.9088963270187378, -0.8549720048904419, -0.8010477423667908, -0.7471234798431396, -0.6931991577148438, -0.6392748355865479, -0.5853505730628967, -0.5314263105392456, -0.4775019884109497, -0.4235776960849762, -0.3696534037590027, -0.3157291114330292, -0.26180481910705566, -0.20788052678108215, -0.15395623445510864, -0.10003194212913513, -0.04610764980316162, 0.00781664252281189, 0.0617409348487854, 0.11566522717475891, 0.16958951950073242, 0.22351381182670593, 0.27743810415267944, 0.33136239647865295, 0.38528668880462646, 0.4392109811306, 0.4931352734565735, 0.5470595359802246, 0.6009838581085205, 0.6549081802368164, 0.7088324427604675, 0.7627567052841187, 0.8166810274124146, 0.8706053495407104, 0.9245296120643616, 0.9784538745880127, 1.0323781967163086, 1.0863025188446045, 1.1402268409729004, 1.1941510438919067, 1.2480753660202026, 1.3019996881484985, 1.3559238910675049, 1.4098482131958008, 1.4637725353240967, 1.5176968574523926, 1.5716211795806885, 1.6255453824996948, 1.6794697046279907, 1.7333940267562866, 1.787318229675293, 1.8412425518035889, 1.8951668739318848]}, "gradients/encoder.encoder.layers.21.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 2.0, 0.0, 2.0, 7.0, 7.0, 2.0, 10.0, 9.0, 17.0, 30.0, 35.0, 57.0, 94.0, 142.0, 227.0, 475.0, 1068.0, 2968.0, 16447.0, 844299.0, 171051.0, 8016.0, 1917.0, 742.0, 358.0, 205.0, 141.0, 76.0, 50.0, 27.0, 22.0, 18.0, 10.0, 11.0, 5.0, 4.0, 2.0, 4.0, 5.0, 1.0, 1.0, 3.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0], "bins": [-3.474609375, -3.37188720703125, -3.2691650390625, -3.16644287109375, -3.063720703125, -2.96099853515625, -2.8582763671875, -2.75555419921875, -2.65283203125, -2.55010986328125, -2.4473876953125, -2.34466552734375, -2.241943359375, -2.13922119140625, -2.0364990234375, -1.93377685546875, -1.8310546875, -1.72833251953125, -1.6256103515625, -1.52288818359375, -1.420166015625, -1.31744384765625, -1.2147216796875, -1.11199951171875, -1.00927734375, -0.90655517578125, -0.8038330078125, -0.70111083984375, -0.598388671875, -0.49566650390625, -0.3929443359375, -0.29022216796875, -0.1875, -0.08477783203125, 0.0179443359375, 0.12066650390625, 0.223388671875, 0.32611083984375, 0.4288330078125, 0.53155517578125, 0.63427734375, 0.73699951171875, 0.8397216796875, 0.94244384765625, 1.045166015625, 1.14788818359375, 1.2506103515625, 1.35333251953125, 1.4560546875, 1.55877685546875, 1.6614990234375, 1.76422119140625, 1.866943359375, 1.96966552734375, 2.0723876953125, 2.17510986328125, 2.27783203125, 2.38055419921875, 2.4832763671875, 2.58599853515625, 2.688720703125, 2.79144287109375, 2.8941650390625, 2.99688720703125, 3.099609375]}, "gradients/encoder.encoder.layers.21.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 2.0, 4.0, 11.0, 8.0, 14.0, 38.0, 84.0, 242.0, 286.0, 182.0, 80.0, 30.0, 13.0, 4.0, 5.0, 1.0, 2.0, 1.0, 0.0, 3.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.405517578125, -0.3917083740234375, -0.377899169921875, -0.3640899658203125, -0.35028076171875, -0.3364715576171875, -0.322662353515625, -0.3088531494140625, -0.2950439453125, -0.2812347412109375, -0.267425537109375, -0.2536163330078125, -0.23980712890625, -0.2259979248046875, -0.212188720703125, -0.1983795166015625, -0.1845703125, -0.1707611083984375, -0.156951904296875, -0.1431427001953125, -0.12933349609375, -0.1155242919921875, -0.101715087890625, -0.0879058837890625, -0.0740966796875, -0.0602874755859375, -0.046478271484375, -0.0326690673828125, -0.01885986328125, -0.0050506591796875, 0.008758544921875, 0.0225677490234375, 0.036376953125, 0.0501861572265625, 0.063995361328125, 0.0778045654296875, 0.09161376953125, 0.1054229736328125, 0.119232177734375, 0.1330413818359375, 0.1468505859375, 0.1606597900390625, 0.174468994140625, 0.1882781982421875, 0.20208740234375, 0.2158966064453125, 0.229705810546875, 0.2435150146484375, 0.25732421875, 0.2711334228515625, 0.284942626953125, 0.2987518310546875, 0.31256103515625, 0.3263702392578125, 0.340179443359375, 0.3539886474609375, 0.3677978515625, 0.3816070556640625, 0.395416259765625, 0.4092254638671875, 0.42303466796875, 0.4368438720703125, 0.450653076171875, 0.4644622802734375, 0.478271484375]}, "gradients/encoder.encoder.layers.21.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 0.0, 1.0, 5.0, 5.0, 7.0, 8.0, 9.0, 10.0, 15.0, 31.0, 31.0, 54.0, 67.0, 103.0, 132.0, 190.0, 306.0, 497.0, 952.0, 2168.0, 6686.0, 28392.0, 198540.0, 676400.0, 106607.0, 18510.0, 4849.0, 1776.0, 797.0, 461.0, 267.0, 187.0, 142.0, 83.0, 75.0, 50.0, 52.0, 29.0, 16.0, 11.0, 11.0, 13.0, 7.0, 3.0, 5.0, 2.0, 2.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0], "bins": [-1.6455078125, -1.5976104736328125, -1.549713134765625, -1.5018157958984375, -1.45391845703125, -1.4060211181640625, -1.358123779296875, -1.3102264404296875, -1.2623291015625, -1.2144317626953125, -1.166534423828125, -1.1186370849609375, -1.07073974609375, -1.0228424072265625, -0.974945068359375, -0.9270477294921875, -0.879150390625, -0.8312530517578125, -0.783355712890625, -0.7354583740234375, -0.68756103515625, -0.6396636962890625, -0.591766357421875, -0.5438690185546875, -0.4959716796875, -0.4480743408203125, -0.400177001953125, -0.3522796630859375, -0.30438232421875, -0.2564849853515625, -0.208587646484375, -0.1606903076171875, -0.11279296875, -0.0648956298828125, -0.016998291015625, 0.0308990478515625, 0.07879638671875, 0.1266937255859375, 0.174591064453125, 0.2224884033203125, 0.2703857421875, 0.3182830810546875, 0.366180419921875, 0.4140777587890625, 0.46197509765625, 0.5098724365234375, 0.557769775390625, 0.6056671142578125, 0.653564453125, 0.7014617919921875, 0.749359130859375, 0.7972564697265625, 0.84515380859375, 0.8930511474609375, 0.940948486328125, 0.9888458251953125, 1.0367431640625, 1.0846405029296875, 1.132537841796875, 1.1804351806640625, 1.22833251953125, 1.2762298583984375, 1.324127197265625, 1.3720245361328125, 1.419921875]}, "gradients/encoder.encoder.layers.21.attention.v_proj.bias": {"_type": "histogram", "values": [3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 3.0, 4.0, 1.0, 10.0, 3.0, 5.0, 4.0, 8.0, 16.0, 11.0, 10.0, 20.0, 20.0, 29.0, 14.0, 28.0, 24.0, 28.0, 42.0, 35.0, 50.0, 39.0, 51.0, 44.0, 53.0, 40.0, 35.0, 49.0, 44.0, 43.0, 34.0, 31.0, 28.0, 20.0, 25.0, 14.0, 18.0, 12.0, 13.0, 14.0, 12.0, 5.0, 2.0, 6.0, 4.0, 3.0, 3.0, 1.0, 4.0, 3.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.8974609375, -0.8691329956054688, -0.8408050537109375, -0.8124771118164062, -0.784149169921875, -0.7558212280273438, -0.7274932861328125, -0.6991653442382812, -0.67083740234375, -0.6425094604492188, -0.6141815185546875, -0.5858535766601562, -0.557525634765625, -0.5291976928710938, -0.5008697509765625, -0.47254180908203125, -0.4442138671875, -0.41588592529296875, -0.3875579833984375, -0.35923004150390625, -0.330902099609375, -0.30257415771484375, -0.2742462158203125, -0.24591827392578125, -0.21759033203125, -0.18926239013671875, -0.1609344482421875, -0.13260650634765625, -0.104278564453125, -0.07595062255859375, -0.0476226806640625, -0.01929473876953125, 0.009033203125, 0.03736114501953125, 0.0656890869140625, 0.09401702880859375, 0.122344970703125, 0.15067291259765625, 0.1790008544921875, 0.20732879638671875, 0.23565673828125, 0.26398468017578125, 0.2923126220703125, 0.32064056396484375, 0.348968505859375, 0.37729644775390625, 0.4056243896484375, 0.43395233154296875, 0.4622802734375, 0.49060821533203125, 0.5189361572265625, 0.5472640991210938, 0.575592041015625, 0.6039199829101562, 0.6322479248046875, 0.6605758666992188, 0.68890380859375, 0.7172317504882812, 0.7455596923828125, 0.7738876342773438, 0.802215576171875, 0.8305435180664062, 0.8588714599609375, 0.8871994018554688, 0.91552734375]}, "gradients/encoder.encoder.layers.21.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 3.0, 2.0, 1.0, 0.0, 0.0, 2.0, 0.0, 2.0, 1.0, 13.0, 10.0, 21.0, 27.0, 98.0, 171.0, 470.0, 2036.0, 62313.0, 975164.0, 6799.0, 900.0, 291.0, 117.0, 48.0, 28.0, 22.0, 9.0, 7.0, 8.0, 4.0, 2.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.19140625, -2.12347412109375, -2.0555419921875, -1.98760986328125, -1.919677734375, -1.85174560546875, -1.7838134765625, -1.71588134765625, -1.64794921875, -1.58001708984375, -1.5120849609375, -1.44415283203125, -1.376220703125, -1.30828857421875, -1.2403564453125, -1.17242431640625, -1.1044921875, -1.03656005859375, -0.9686279296875, -0.90069580078125, -0.832763671875, -0.76483154296875, -0.6968994140625, -0.62896728515625, -0.56103515625, -0.49310302734375, -0.4251708984375, -0.35723876953125, -0.289306640625, -0.22137451171875, -0.1534423828125, -0.08551025390625, -0.017578125, 0.05035400390625, 0.1182861328125, 0.18621826171875, 0.254150390625, 0.32208251953125, 0.3900146484375, 0.45794677734375, 0.52587890625, 0.59381103515625, 0.6617431640625, 0.72967529296875, 0.797607421875, 0.86553955078125, 0.9334716796875, 1.00140380859375, 1.0693359375, 1.13726806640625, 1.2052001953125, 1.27313232421875, 1.341064453125, 1.40899658203125, 1.4769287109375, 1.54486083984375, 1.61279296875, 1.68072509765625, 1.7486572265625, 1.81658935546875, 1.884521484375, 1.95245361328125, 2.0203857421875, 2.08831787109375, 2.15625]}, "gradients/encoder.encoder.layers.21.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 3.0, 1.0, 3.0, 2.0, 2.0, 3.0, 1.0, 8.0, 10.0, 11.0, 19.0, 12.0, 19.0, 28.0, 42.0, 63.0, 77.0, 122.0, 190.0, 101.0, 67.0, 62.0, 39.0, 32.0, 16.0, 14.0, 16.0, 7.0, 9.0, 11.0, 1.0, 7.0, 4.0, 1.0, 0.0, 4.0, 2.0, 1.0, 0.0, 0.0, 2.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.00020933151245117188, -0.00020265765488147736, -0.00019598379731178284, -0.00018930993974208832, -0.0001826360821723938, -0.00017596222460269928, -0.00016928836703300476, -0.00016261450946331024, -0.00015594065189361572, -0.0001492667943239212, -0.00014259293675422668, -0.00013591907918453217, -0.00012924522161483765, -0.00012257136404514313, -0.00011589750647544861, -0.00010922364890575409, -0.00010254979133605957, -9.587593376636505e-05, -8.920207619667053e-05, -8.252821862697601e-05, -7.58543610572815e-05, -6.918050348758698e-05, -6.250664591789246e-05, -5.583278834819794e-05, -4.915893077850342e-05, -4.24850732088089e-05, -3.581121563911438e-05, -2.913735806941986e-05, -2.2463500499725342e-05, -1.5789642930030823e-05, -9.115785360336304e-06, -2.4419277906417847e-06, 4.231929779052734e-06, 1.0905787348747253e-05, 1.7579644918441772e-05, 2.425350248813629e-05, 3.092736005783081e-05, 3.760121762752533e-05, 4.427507519721985e-05, 5.094893276691437e-05, 5.762279033660889e-05, 6.42966479063034e-05, 7.097050547599792e-05, 7.764436304569244e-05, 8.431822061538696e-05, 9.099207818508148e-05, 9.7665935754776e-05, 0.00010433979332447052, 0.00011101365089416504, 0.00011768750846385956, 0.00012436136603355408, 0.0001310352236032486, 0.00013770908117294312, 0.00014438293874263763, 0.00015105679631233215, 0.00015773065388202667, 0.0001644045114517212, 0.0001710783690214157, 0.00017775222659111023, 0.00018442608416080475, 0.00019109994173049927, 0.0001977737993001938, 0.0002044476568698883, 0.00021112151443958282, 0.00021779537200927734]}, "gradients/encoder.encoder.layers.21.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 2.0, 1.0, 3.0, 1.0, 4.0, 2.0, 3.0, 6.0, 6.0, 10.0, 9.0, 12.0, 13.0, 25.0, 27.0, 53.0, 64.0, 91.0, 144.0, 307.0, 627.0, 1626.0, 5751.0, 38465.0, 714386.0, 262820.0, 18314.0, 3552.0, 1163.0, 478.0, 226.0, 118.0, 77.0, 38.0, 28.0, 34.0, 14.0, 19.0, 13.0, 6.0, 6.0, 4.0, 7.0, 5.0, 3.0, 2.0, 0.0, 1.0, 1.0, 0.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.109375, -1.07452392578125, -1.0396728515625, -1.00482177734375, -0.969970703125, -0.93511962890625, -0.9002685546875, -0.86541748046875, -0.83056640625, -0.79571533203125, -0.7608642578125, -0.72601318359375, -0.691162109375, -0.65631103515625, -0.6214599609375, -0.58660888671875, -0.5517578125, -0.51690673828125, -0.4820556640625, -0.44720458984375, -0.412353515625, -0.37750244140625, -0.3426513671875, -0.30780029296875, -0.27294921875, -0.23809814453125, -0.2032470703125, -0.16839599609375, -0.133544921875, -0.09869384765625, -0.0638427734375, -0.02899169921875, 0.005859375, 0.04071044921875, 0.0755615234375, 0.11041259765625, 0.145263671875, 0.18011474609375, 0.2149658203125, 0.24981689453125, 0.28466796875, 0.31951904296875, 0.3543701171875, 0.38922119140625, 0.424072265625, 0.45892333984375, 0.4937744140625, 0.52862548828125, 0.5634765625, 0.59832763671875, 0.6331787109375, 0.66802978515625, 0.702880859375, 0.73773193359375, 0.7725830078125, 0.80743408203125, 0.84228515625, 0.87713623046875, 0.9119873046875, 0.94683837890625, 0.981689453125, 1.01654052734375, 1.0513916015625, 1.08624267578125, 1.12109375]}, "gradients/encoder.encoder.layers.21.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 2.0, 0.0, 2.0, 3.0, 6.0, 5.0, 5.0, 6.0, 9.0, 15.0, 22.0, 24.0, 28.0, 35.0, 45.0, 49.0, 83.0, 92.0, 104.0, 100.0, 88.0, 63.0, 48.0, 37.0, 34.0, 28.0, 18.0, 8.0, 9.0, 9.0, 8.0, 6.0, 3.0, 5.0, 1.0, 1.0, 1.0, 1.0, 3.0, 2.0, 2.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.6201171875, -0.6022491455078125, -0.584381103515625, -0.5665130615234375, -0.54864501953125, -0.5307769775390625, -0.512908935546875, -0.4950408935546875, -0.4771728515625, -0.4593048095703125, -0.441436767578125, -0.4235687255859375, -0.40570068359375, -0.3878326416015625, -0.369964599609375, -0.3520965576171875, -0.334228515625, -0.3163604736328125, -0.298492431640625, -0.2806243896484375, -0.26275634765625, -0.2448883056640625, -0.227020263671875, -0.2091522216796875, -0.1912841796875, -0.1734161376953125, -0.155548095703125, -0.1376800537109375, -0.11981201171875, -0.1019439697265625, -0.084075927734375, -0.0662078857421875, -0.04833984375, -0.0304718017578125, -0.012603759765625, 0.0052642822265625, 0.02313232421875, 0.0410003662109375, 0.058868408203125, 0.0767364501953125, 0.0946044921875, 0.1124725341796875, 0.130340576171875, 0.1482086181640625, 0.16607666015625, 0.1839447021484375, 0.201812744140625, 0.2196807861328125, 0.237548828125, 0.2554168701171875, 0.273284912109375, 0.2911529541015625, 0.30902099609375, 0.3268890380859375, 0.344757080078125, 0.3626251220703125, 0.3804931640625, 0.3983612060546875, 0.416229248046875, 0.4340972900390625, 0.45196533203125, 0.4698333740234375, 0.487701416015625, 0.5055694580078125, 0.5234375]}, "gradients/encoder.encoder.layers.21.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 4.0, 5.0, 8.0, 57.0, 777.0, 156.0, 7.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-21.993803024291992, -20.389400482177734, -18.78499984741211, -17.180599212646484, -15.576196670532227, -13.971795082092285, -12.367393493652344, -10.762991905212402, -9.158590316772461, -7.5541887283325195, -5.949787139892578, -4.345385551452637, -2.7409839630126953, -1.136582374572754, 0.4678192138671875, 2.072220802307129, 3.6766223907470703, 5.281023979187012, 6.885425567626953, 8.489827156066895, 10.094228744506836, 11.698630332946777, 13.303031921386719, 14.90743350982666, 16.5118350982666, 18.11623764038086, 19.720638275146484, 21.32503890991211, 22.929441452026367, 24.533843994140625, 26.13824462890625, 27.742645263671875, 29.3470458984375, 30.951446533203125, 32.55584716796875, 34.16025161743164, 35.764652252197266, 37.36905288696289, 38.97345733642578, 40.577857971191406, 42.18225860595703, 43.786659240722656, 45.39105987548828, 46.99546432495117, 48.5998649597168, 50.20426559448242, 51.80867004394531, 53.41307067871094, 55.01747131347656, 56.62187194824219, 58.22627258300781, 59.8306770324707, 61.43507766723633, 63.03947830200195, 64.64388275146484, 66.24828338623047, 67.8526840209961, 69.45708465576172, 71.06148529052734, 72.66588592529297, 74.27029418945312, 75.87469482421875, 77.47909545898438, 79.08349609375, 80.68789672851562]}, "gradients/encoder.encoder.layers.21.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 2.0, 3.0, 5.0, 6.0, 2.0, 11.0, 12.0, 31.0, 40.0, 38.0, 51.0, 56.0, 83.0, 91.0, 88.0, 75.0, 93.0, 78.0, 62.0, 42.0, 41.0, 33.0, 20.0, 18.0, 13.0, 9.0, 4.0, 4.0, 2.0, 0.0, 1.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-10.905634880065918, -10.500060081481934, -10.094484329223633, -9.688909530639648, -9.283334732055664, -8.87775993347168, -8.472184181213379, -8.066609382629395, -7.661034107208252, -7.255458831787109, -6.849884033203125, -6.444308757781982, -6.03873348236084, -5.6331586837768555, -5.227583408355713, -4.82200813293457, -4.416433334350586, -4.010858058929443, -3.605283260345459, -3.1997079849243164, -2.794132947921753, -2.3885579109191895, -1.9829826354980469, -1.5774075984954834, -1.17183256149292, -0.7662574648857117, -0.3606823682785034, 0.04489278793334961, 0.4504678249359131, 0.8560428619384766, 1.2616181373596191, 1.6671931743621826, 2.0727691650390625, 2.478344202041626, 2.8839192390441895, 3.289494514465332, 3.6950695514678955, 4.100644588470459, 4.506219863891602, 4.911794662475586, 5.3173699378967285, 5.722945213317871, 6.1285200119018555, 6.534095287322998, 6.939670562744141, 7.345245361328125, 7.750820636749268, 8.15639591217041, 8.561970710754395, 8.967545509338379, 9.37312126159668, 9.778696060180664, 10.184270858764648, 10.589845657348633, 10.995421409606934, 11.400996208190918, 11.806571960449219, 12.212146759033203, 12.617722511291504, 13.023297309875488, 13.428872108459473, 13.834447860717773, 14.240022659301758, 14.645597457885742, 15.051172256469727]}, "gradients/encoder.encoder.layers.20.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 5.0, 7.0, 12.0, 24.0, 54.0, 147.0, 539.0, 32199.0, 4159905.0, 1016.0, 252.0, 71.0, 27.0, 12.0, 8.0, 6.0, 2.0, 3.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-22.421875, -21.677734375, -20.93359375, -20.189453125, -19.4453125, -18.701171875, -17.95703125, -17.212890625, -16.46875, -15.724609375, -14.98046875, -14.236328125, -13.4921875, -12.748046875, -12.00390625, -11.259765625, -10.515625, -9.771484375, -9.02734375, -8.283203125, -7.5390625, -6.794921875, -6.05078125, -5.306640625, -4.5625, -3.818359375, -3.07421875, -2.330078125, -1.5859375, -0.841796875, -0.09765625, 0.646484375, 1.390625, 2.134765625, 2.87890625, 3.623046875, 4.3671875, 5.111328125, 5.85546875, 6.599609375, 7.34375, 8.087890625, 8.83203125, 9.576171875, 10.3203125, 11.064453125, 11.80859375, 12.552734375, 13.296875, 14.041015625, 14.78515625, 15.529296875, 16.2734375, 17.017578125, 17.76171875, 18.505859375, 19.25, 19.994140625, 20.73828125, 21.482421875, 22.2265625, 22.970703125, 23.71484375, 24.458984375, 25.203125]}, "gradients/encoder.encoder.layers.20.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 3.0, 4.0, 5.0, 11.0, 20.0, 44.0, 83.0, 168.0, 244.0, 205.0, 113.0, 72.0, 15.0, 8.0, 5.0, 4.0, 3.0, 1.0, 1.0, 1.0, 0.0, 2.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.53955078125, -0.5213775634765625, -0.503204345703125, -0.4850311279296875, -0.46685791015625, -0.4486846923828125, -0.430511474609375, -0.4123382568359375, -0.3941650390625, -0.3759918212890625, -0.357818603515625, -0.3396453857421875, -0.32147216796875, -0.3032989501953125, -0.285125732421875, -0.2669525146484375, -0.248779296875, -0.2306060791015625, -0.212432861328125, -0.1942596435546875, -0.17608642578125, -0.1579132080078125, -0.139739990234375, -0.1215667724609375, -0.1033935546875, -0.0852203369140625, -0.067047119140625, -0.0488739013671875, -0.03070068359375, -0.0125274658203125, 0.005645751953125, 0.0238189697265625, 0.0419921875, 0.0601654052734375, 0.078338623046875, 0.0965118408203125, 0.11468505859375, 0.1328582763671875, 0.151031494140625, 0.1692047119140625, 0.1873779296875, 0.2055511474609375, 0.223724365234375, 0.2418975830078125, 0.26007080078125, 0.2782440185546875, 0.296417236328125, 0.3145904541015625, 0.332763671875, 0.3509368896484375, 0.369110107421875, 0.3872833251953125, 0.40545654296875, 0.4236297607421875, 0.441802978515625, 0.4599761962890625, 0.4781494140625, 0.4963226318359375, 0.514495849609375, 0.5326690673828125, 0.55084228515625, 0.5690155029296875, 0.587188720703125, 0.6053619384765625, 0.62353515625]}, "gradients/encoder.encoder.layers.20.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 5.0, 5.0, 15.0, 14.0, 24.0, 44.0, 67.0, 124.0, 310.0, 1080.0, 7145.0, 200989.0, 3960467.0, 20620.0, 2357.0, 527.0, 190.0, 117.0, 74.0, 50.0, 29.0, 14.0, 13.0, 2.0, 4.0, 1.0, 1.0, 2.0, 3.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-3.568359375, -3.460906982421875, -3.35345458984375, -3.246002197265625, -3.1385498046875, -3.031097412109375, -2.92364501953125, -2.816192626953125, -2.708740234375, -2.601287841796875, -2.49383544921875, -2.386383056640625, -2.2789306640625, -2.171478271484375, -2.06402587890625, -1.956573486328125, -1.84912109375, -1.741668701171875, -1.63421630859375, -1.526763916015625, -1.4193115234375, -1.311859130859375, -1.20440673828125, -1.096954345703125, -0.989501953125, -0.882049560546875, -0.77459716796875, -0.667144775390625, -0.5596923828125, -0.452239990234375, -0.34478759765625, -0.237335205078125, -0.1298828125, -0.022430419921875, 0.08502197265625, 0.192474365234375, 0.2999267578125, 0.407379150390625, 0.51483154296875, 0.622283935546875, 0.729736328125, 0.837188720703125, 0.94464111328125, 1.052093505859375, 1.1595458984375, 1.266998291015625, 1.37445068359375, 1.481903076171875, 1.58935546875, 1.696807861328125, 1.80426025390625, 1.911712646484375, 2.0191650390625, 2.126617431640625, 2.23406982421875, 2.341522216796875, 2.448974609375, 2.556427001953125, 2.66387939453125, 2.771331787109375, 2.8787841796875, 2.986236572265625, 3.09368896484375, 3.201141357421875, 3.30859375]}, "gradients/encoder.encoder.layers.20.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 3.0, 3.0, 4.0, 11.0, 11.0, 10.0, 12.0, 22.0, 34.0, 106.0, 262.0, 1172.0, 1838.0, 318.0, 125.0, 54.0, 30.0, 20.0, 18.0, 7.0, 8.0, 5.0, 4.0, 3.0, 2.0, 4.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.349609375, -0.33129119873046875, -0.3129730224609375, -0.29465484619140625, -0.276336669921875, -0.25801849365234375, -0.2397003173828125, -0.22138214111328125, -0.20306396484375, -0.18474578857421875, -0.1664276123046875, -0.14810943603515625, -0.129791259765625, -0.11147308349609375, -0.0931549072265625, -0.07483673095703125, -0.0565185546875, -0.03820037841796875, -0.0198822021484375, -0.00156402587890625, 0.016754150390625, 0.03507232666015625, 0.0533905029296875, 0.07170867919921875, 0.09002685546875, 0.10834503173828125, 0.1266632080078125, 0.14498138427734375, 0.163299560546875, 0.18161773681640625, 0.1999359130859375, 0.21825408935546875, 0.236572265625, 0.25489044189453125, 0.2732086181640625, 0.29152679443359375, 0.309844970703125, 0.32816314697265625, 0.3464813232421875, 0.36479949951171875, 0.38311767578125, 0.40143585205078125, 0.4197540283203125, 0.43807220458984375, 0.456390380859375, 0.47470855712890625, 0.4930267333984375, 0.5113449096679688, 0.5296630859375, 0.5479812622070312, 0.5662994384765625, 0.5846176147460938, 0.602935791015625, 0.6212539672851562, 0.6395721435546875, 0.6578903198242188, 0.67620849609375, 0.6945266723632812, 0.7128448486328125, 0.7311630249023438, 0.749481201171875, 0.7677993774414062, 0.7861175537109375, 0.8044357299804688, 0.82275390625]}, "gradients/encoder.encoder.layers.20.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 3.0, 7.0, 4.0, 11.0, 26.0, 46.0, 87.0, 187.0, 190.0, 185.0, 113.0, 76.0, 33.0, 13.0, 12.0, 6.0, 2.0, 4.0, 1.0, 2.0, 2.0, 0.0, 4.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.0713757276535034, -1.0100514888763428, -0.9487271308898926, -0.8874028325080872, -0.8260785341262817, -0.7647542357444763, -0.7034299373626709, -0.6421056389808655, -0.5807813405990601, -0.5194570422172546, -0.4581327438354492, -0.3968084454536438, -0.3354841470718384, -0.27415984869003296, -0.21283555030822754, -0.15151125192642212, -0.0901869535446167, -0.02886265516281128, 0.03246164321899414, 0.09378594160079956, 0.15511023998260498, 0.2164345383644104, 0.2777588367462158, 0.33908313512802124, 0.40040743350982666, 0.4617317318916321, 0.5230560302734375, 0.5843803286552429, 0.6457046270370483, 0.7070289254188538, 0.7683532238006592, 0.8296775221824646, 0.8910017013549805, 0.9523259997367859, 1.0136502981185913, 1.074974536895752, 1.1362988948822021, 1.1976232528686523, 1.258947491645813, 1.3202717304229736, 1.3815960884094238, 1.442920446395874, 1.5042446851730347, 1.5655689239501953, 1.6268932819366455, 1.6882176399230957, 1.7495418787002563, 1.810866117477417, 1.8721904754638672, 1.9335148334503174, 1.994839072227478, 2.0561633110046387, 2.117487668991089, 2.178812026977539, 2.24013614654541, 2.3014605045318604, 2.3627848625183105, 2.4241092205047607, 2.485433578491211, 2.546757698059082, 2.6080820560455322, 2.6694064140319824, 2.7307305335998535, 2.7920548915863037, 2.853379249572754]}, "gradients/encoder.encoder.layers.20.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 1.0, 5.0, 2.0, 1.0, 2.0, 11.0, 15.0, 12.0, 20.0, 23.0, 19.0, 28.0, 29.0, 33.0, 37.0, 44.0, 43.0, 50.0, 60.0, 51.0, 48.0, 53.0, 51.0, 32.0, 49.0, 42.0, 42.0, 44.0, 33.0, 21.0, 21.0, 24.0, 17.0, 14.0, 8.0, 5.0, 6.0, 3.0, 6.0, 2.0, 4.0, 1.0, 2.0, 0.0, 0.0, 0.0, 2.0, 1.0], "bins": [-1.4748361110687256, -1.4331227540969849, -1.3914093971252441, -1.3496960401535034, -1.3079826831817627, -1.266269326210022, -1.2245559692382812, -1.1828426122665405, -1.1411292552947998, -1.099415898323059, -1.0577025413513184, -1.0159891843795776, -0.9742758274078369, -0.9325624704360962, -0.8908491134643555, -0.8491357564926147, -0.8074224591255188, -0.7657091021537781, -0.7239957451820374, -0.6822823882102966, -0.6405690312385559, -0.5988556742668152, -0.5571423768997192, -0.5154290199279785, -0.4737156331539154, -0.4320022761821747, -0.39028891921043396, -0.3485755920410156, -0.3068622350692749, -0.2651488780975342, -0.22343552112579346, -0.18172216415405273, -0.140008807182312, -0.09829545021057129, -0.05658210068941116, -0.014868751168251038, 0.026844605803489685, 0.06855796277523041, 0.11027130484580994, 0.15198466181755066, 0.19369801878929138, 0.2354113757610321, 0.2771247327327728, 0.31883805990219116, 0.3605514168739319, 0.4022647738456726, 0.44397813081741333, 0.48569148778915405, 0.5274048447608948, 0.5691182017326355, 0.6108315587043762, 0.6525449156761169, 0.6942582726478577, 0.7359716296195984, 0.7776849269866943, 0.8193982839584351, 0.8611116409301758, 0.9028249979019165, 0.9445383548736572, 0.986251711845398, 1.0279650688171387, 1.0696784257888794, 1.1113917827606201, 1.1531051397323608, 1.1948184967041016]}, "gradients/encoder.encoder.layers.20.attention.out_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 5.0, 7.0, 5.0, 9.0, 8.0, 8.0, 15.0, 28.0, 34.0, 54.0, 83.0, 172.0, 332.0, 654.0, 1738.0, 6733.0, 198019.0, 821593.0, 14562.0, 2705.0, 865.0, 424.0, 208.0, 119.0, 56.0, 44.0, 26.0, 12.0, 7.0, 9.0, 8.0, 5.0, 6.0, 6.0, 2.0, 1.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0], "bins": [-3.44140625, -3.334442138671875, -3.22747802734375, -3.120513916015625, -3.0135498046875, -2.906585693359375, -2.79962158203125, -2.692657470703125, -2.585693359375, -2.478729248046875, -2.37176513671875, -2.264801025390625, -2.1578369140625, -2.050872802734375, -1.94390869140625, -1.836944580078125, -1.72998046875, -1.623016357421875, -1.51605224609375, -1.409088134765625, -1.3021240234375, -1.195159912109375, -1.08819580078125, -0.981231689453125, -0.874267578125, -0.767303466796875, -0.66033935546875, -0.553375244140625, -0.4464111328125, -0.339447021484375, -0.23248291015625, -0.125518798828125, -0.0185546875, 0.088409423828125, 0.19537353515625, 0.302337646484375, 0.4093017578125, 0.516265869140625, 0.62322998046875, 0.730194091796875, 0.837158203125, 0.944122314453125, 1.05108642578125, 1.158050537109375, 1.2650146484375, 1.371978759765625, 1.47894287109375, 1.585906982421875, 1.69287109375, 1.799835205078125, 1.90679931640625, 2.013763427734375, 2.1207275390625, 2.227691650390625, 2.33465576171875, 2.441619873046875, 2.548583984375, 2.655548095703125, 2.76251220703125, 2.869476318359375, 2.9764404296875, 3.083404541015625, 3.19036865234375, 3.297332763671875, 3.404296875]}, "gradients/encoder.encoder.layers.20.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 2.0, 4.0, 2.0, 11.0, 16.0, 49.0, 120.0, 201.0, 236.0, 169.0, 108.0, 56.0, 17.0, 5.0, 7.0, 3.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.52587890625, -0.5086288452148438, -0.4913787841796875, -0.47412872314453125, -0.456878662109375, -0.43962860107421875, -0.4223785400390625, -0.40512847900390625, -0.38787841796875, -0.37062835693359375, -0.3533782958984375, -0.33612823486328125, -0.318878173828125, -0.30162811279296875, -0.2843780517578125, -0.26712799072265625, -0.2498779296875, -0.23262786865234375, -0.2153778076171875, -0.19812774658203125, -0.180877685546875, -0.16362762451171875, -0.1463775634765625, -0.12912750244140625, -0.11187744140625, -0.09462738037109375, -0.0773773193359375, -0.06012725830078125, -0.042877197265625, -0.02562713623046875, -0.0083770751953125, 0.00887298583984375, 0.026123046875, 0.04337310791015625, 0.0606231689453125, 0.07787322998046875, 0.095123291015625, 0.11237335205078125, 0.1296234130859375, 0.14687347412109375, 0.16412353515625, 0.18137359619140625, 0.1986236572265625, 0.21587371826171875, 0.233123779296875, 0.25037384033203125, 0.2676239013671875, 0.28487396240234375, 0.3021240234375, 0.31937408447265625, 0.3366241455078125, 0.35387420654296875, 0.371124267578125, 0.38837432861328125, 0.4056243896484375, 0.42287445068359375, 0.44012451171875, 0.45737457275390625, 0.4746246337890625, 0.49187469482421875, 0.509124755859375, 0.5263748168945312, 0.5436248779296875, 0.5608749389648438, 0.578125]}, "gradients/encoder.encoder.layers.20.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0, 4.0, 3.0, 1.0, 5.0, 7.0, 6.0, 9.0, 11.0, 16.0, 37.0, 63.0, 95.0, 185.0, 341.0, 713.0, 1943.0, 9001.0, 106353.0, 823657.0, 94625.0, 8264.0, 1873.0, 655.0, 319.0, 147.0, 82.0, 55.0, 32.0, 21.0, 8.0, 11.0, 7.0, 4.0, 6.0, 2.0, 2.0, 1.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-2.296875, -2.226165771484375, -2.15545654296875, -2.084747314453125, -2.0140380859375, -1.943328857421875, -1.87261962890625, -1.801910400390625, -1.731201171875, -1.660491943359375, -1.58978271484375, -1.519073486328125, -1.4483642578125, -1.377655029296875, -1.30694580078125, -1.236236572265625, -1.16552734375, -1.094818115234375, -1.02410888671875, -0.953399658203125, -0.8826904296875, -0.811981201171875, -0.74127197265625, -0.670562744140625, -0.599853515625, -0.529144287109375, -0.45843505859375, -0.387725830078125, -0.3170166015625, -0.246307373046875, -0.17559814453125, -0.104888916015625, -0.0341796875, 0.036529541015625, 0.10723876953125, 0.177947998046875, 0.2486572265625, 0.319366455078125, 0.39007568359375, 0.460784912109375, 0.531494140625, 0.602203369140625, 0.67291259765625, 0.743621826171875, 0.8143310546875, 0.885040283203125, 0.95574951171875, 1.026458740234375, 1.09716796875, 1.167877197265625, 1.23858642578125, 1.309295654296875, 1.3800048828125, 1.450714111328125, 1.52142333984375, 1.592132568359375, 1.662841796875, 1.733551025390625, 1.80426025390625, 1.874969482421875, 1.9456787109375, 2.016387939453125, 2.08709716796875, 2.157806396484375, 2.228515625]}, "gradients/encoder.encoder.layers.20.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 3.0, 3.0, 7.0, 0.0, 7.0, 2.0, 5.0, 3.0, 13.0, 15.0, 21.0, 23.0, 28.0, 41.0, 44.0, 56.0, 68.0, 59.0, 80.0, 61.0, 64.0, 69.0, 61.0, 54.0, 49.0, 39.0, 42.0, 29.0, 20.0, 17.0, 4.0, 5.0, 2.0, 3.0, 3.0, 7.0, 5.0, 2.0, 4.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.5830078125, -1.533172607421875, -1.48333740234375, -1.433502197265625, -1.3836669921875, -1.333831787109375, -1.28399658203125, -1.234161376953125, -1.184326171875, -1.134490966796875, -1.08465576171875, -1.034820556640625, -0.9849853515625, -0.935150146484375, -0.88531494140625, -0.835479736328125, -0.78564453125, -0.735809326171875, -0.68597412109375, -0.636138916015625, -0.5863037109375, -0.536468505859375, -0.48663330078125, -0.436798095703125, -0.386962890625, -0.337127685546875, -0.28729248046875, -0.237457275390625, -0.1876220703125, -0.137786865234375, -0.08795166015625, -0.038116455078125, 0.01171875, 0.061553955078125, 0.11138916015625, 0.161224365234375, 0.2110595703125, 0.260894775390625, 0.31072998046875, 0.360565185546875, 0.410400390625, 0.460235595703125, 0.51007080078125, 0.559906005859375, 0.6097412109375, 0.659576416015625, 0.70941162109375, 0.759246826171875, 0.80908203125, 0.858917236328125, 0.90875244140625, 0.958587646484375, 1.0084228515625, 1.058258056640625, 1.10809326171875, 1.157928466796875, 1.207763671875, 1.257598876953125, 1.30743408203125, 1.357269287109375, 1.4071044921875, 1.456939697265625, 1.50677490234375, 1.556610107421875, 1.6064453125]}, "gradients/encoder.encoder.layers.20.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 3.0, 2.0, 9.0, 19.0, 15.0, 39.0, 93.0, 187.0, 678.0, 3735.0, 917426.0, 123641.0, 1955.0, 468.0, 145.0, 69.0, 33.0, 20.0, 9.0, 11.0, 1.0, 3.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.181640625, -2.115234375, -2.048828125, -1.982421875, -1.916015625, -1.849609375, -1.783203125, -1.716796875, -1.650390625, -1.583984375, -1.517578125, -1.451171875, -1.384765625, -1.318359375, -1.251953125, -1.185546875, -1.119140625, -1.052734375, -0.986328125, -0.919921875, -0.853515625, -0.787109375, -0.720703125, -0.654296875, -0.587890625, -0.521484375, -0.455078125, -0.388671875, -0.322265625, -0.255859375, -0.189453125, -0.123046875, -0.056640625, 0.009765625, 0.076171875, 0.142578125, 0.208984375, 0.275390625, 0.341796875, 0.408203125, 0.474609375, 0.541015625, 0.607421875, 0.673828125, 0.740234375, 0.806640625, 0.873046875, 0.939453125, 1.005859375, 1.072265625, 1.138671875, 1.205078125, 1.271484375, 1.337890625, 1.404296875, 1.470703125, 1.537109375, 1.603515625, 1.669921875, 1.736328125, 1.802734375, 1.869140625, 1.935546875, 2.001953125, 2.068359375]}, "gradients/encoder.encoder.layers.20.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 1.0, 7.0, 4.0, 3.0, 5.0, 3.0, 1.0, 7.0, 7.0, 8.0, 15.0, 15.0, 15.0, 14.0, 24.0, 19.0, 22.0, 25.0, 43.0, 69.0, 132.0, 146.0, 117.0, 64.0, 40.0, 26.0, 23.0, 21.0, 30.0, 16.0, 20.0, 9.0, 12.0, 8.0, 4.0, 6.0, 8.0, 3.0, 2.0, 6.0, 3.0, 1.0, 2.0, 1.0, 3.0, 0.0, 1.0, 2.0, 1.0, 1.0, 1.0], "bins": [-0.00014448165893554688, -0.0001402720808982849, -0.00013606250286102295, -0.00013185292482376099, -0.00012764334678649902, -0.00012343376874923706, -0.0001192241907119751, -0.00011501461267471313, -0.00011080503463745117, -0.00010659545660018921, -0.00010238587856292725, -9.817630052566528e-05, -9.396672248840332e-05, -8.975714445114136e-05, -8.55475664138794e-05, -8.133798837661743e-05, -7.712841033935547e-05, -7.29188323020935e-05, -6.870925426483154e-05, -6.449967622756958e-05, -6.029009819030762e-05, -5.6080520153045654e-05, -5.187094211578369e-05, -4.766136407852173e-05, -4.3451786041259766e-05, -3.92422080039978e-05, -3.503262996673584e-05, -3.082305192947388e-05, -2.6613473892211914e-05, -2.240389585494995e-05, -1.8194317817687988e-05, -1.3984739780426025e-05, -9.775161743164062e-06, -5.5655837059021e-06, -1.3560056686401367e-06, 2.853572368621826e-06, 7.063150405883789e-06, 1.1272728443145752e-05, 1.5482306480407715e-05, 1.9691884517669678e-05, 2.390146255493164e-05, 2.8111040592193604e-05, 3.2320618629455566e-05, 3.653019666671753e-05, 4.073977470397949e-05, 4.4949352741241455e-05, 4.915893077850342e-05, 5.336850881576538e-05, 5.7578086853027344e-05, 6.17876648902893e-05, 6.599724292755127e-05, 7.020682096481323e-05, 7.44163990020752e-05, 7.862597703933716e-05, 8.283555507659912e-05, 8.704513311386108e-05, 9.125471115112305e-05, 9.546428918838501e-05, 9.967386722564697e-05, 0.00010388344526290894, 0.0001080930233001709, 0.00011230260133743286, 0.00011651217937469482, 0.00012072175741195679, 0.00012493133544921875]}, "gradients/encoder.encoder.layers.20.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 3.0, 0.0, 4.0, 1.0, 3.0, 9.0, 8.0, 9.0, 13.0, 20.0, 32.0, 47.0, 82.0, 138.0, 291.0, 684.0, 2234.0, 12326.0, 421525.0, 592050.0, 15196.0, 2462.0, 776.0, 299.0, 139.0, 85.0, 32.0, 34.0, 28.0, 8.0, 7.0, 6.0, 3.0, 4.0, 4.0, 4.0, 3.0, 2.0, 1.0], "bins": [-1.6513671875, -1.6138763427734375, -1.576385498046875, -1.5388946533203125, -1.50140380859375, -1.4639129638671875, -1.426422119140625, -1.3889312744140625, -1.3514404296875, -1.3139495849609375, -1.276458740234375, -1.2389678955078125, -1.20147705078125, -1.1639862060546875, -1.126495361328125, -1.0890045166015625, -1.051513671875, -1.0140228271484375, -0.976531982421875, -0.9390411376953125, -0.90155029296875, -0.8640594482421875, -0.826568603515625, -0.7890777587890625, -0.7515869140625, -0.7140960693359375, -0.676605224609375, -0.6391143798828125, -0.60162353515625, -0.5641326904296875, -0.526641845703125, -0.4891510009765625, -0.45166015625, -0.4141693115234375, -0.376678466796875, -0.3391876220703125, -0.30169677734375, -0.2642059326171875, -0.226715087890625, -0.1892242431640625, -0.1517333984375, -0.1142425537109375, -0.076751708984375, -0.0392608642578125, -0.00177001953125, 0.0357208251953125, 0.073211669921875, 0.1107025146484375, 0.148193359375, 0.1856842041015625, 0.223175048828125, 0.2606658935546875, 0.29815673828125, 0.3356475830078125, 0.373138427734375, 0.4106292724609375, 0.4481201171875, 0.4856109619140625, 0.523101806640625, 0.5605926513671875, 0.59808349609375, 0.6355743408203125, 0.673065185546875, 0.7105560302734375, 0.748046875]}, "gradients/encoder.encoder.layers.20.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 4.0, 6.0, 4.0, 2.0, 7.0, 12.0, 12.0, 14.0, 25.0, 32.0, 59.0, 81.0, 100.0, 99.0, 140.0, 113.0, 89.0, 67.0, 40.0, 36.0, 19.0, 14.0, 11.0, 9.0, 5.0, 6.0, 0.0, 3.0, 3.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.59716796875, -0.5768051147460938, -0.5564422607421875, -0.5360794067382812, -0.515716552734375, -0.49535369873046875, -0.4749908447265625, -0.45462799072265625, -0.43426513671875, -0.41390228271484375, -0.3935394287109375, -0.37317657470703125, -0.352813720703125, -0.33245086669921875, -0.3120880126953125, -0.29172515869140625, -0.2713623046875, -0.25099945068359375, -0.2306365966796875, -0.21027374267578125, -0.189910888671875, -0.16954803466796875, -0.1491851806640625, -0.12882232666015625, -0.10845947265625, -0.08809661865234375, -0.0677337646484375, -0.04737091064453125, -0.027008056640625, -0.00664520263671875, 0.0137176513671875, 0.03408050537109375, 0.054443359375, 0.07480621337890625, 0.0951690673828125, 0.11553192138671875, 0.135894775390625, 0.15625762939453125, 0.1766204833984375, 0.19698333740234375, 0.21734619140625, 0.23770904541015625, 0.2580718994140625, 0.27843475341796875, 0.298797607421875, 0.31916046142578125, 0.3395233154296875, 0.35988616943359375, 0.3802490234375, 0.40061187744140625, 0.4209747314453125, 0.44133758544921875, 0.461700439453125, 0.48206329345703125, 0.5024261474609375, 0.5227890014648438, 0.54315185546875, 0.5635147094726562, 0.5838775634765625, 0.6042404174804688, 0.624603271484375, 0.6449661254882812, 0.6653289794921875, 0.6856918334960938, 0.7060546875]}, "gradients/encoder.encoder.layers.20.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 4.0, 3.0, 12.0, 51.0, 415.0, 476.0, 47.0, 5.0, 3.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-14.725003242492676, -13.811201095581055, -12.897398948669434, -11.983596801757812, -11.069794654846191, -10.15599250793457, -9.242189407348633, -8.328388214111328, -7.414585590362549, -6.500783443450928, -5.586981296539307, -4.673178672790527, -3.7593767642974854, -2.845574378967285, -1.931772232055664, -1.017970085144043, -0.10416793823242188, 0.809634268283844, 1.7234364748001099, 2.6372387409210205, 3.5510408878326416, 4.464843273162842, 5.378645420074463, 6.292447566986084, 7.206249713897705, 8.120052337646484, 9.033854484558105, 9.947656631469727, 10.861458778381348, 11.775260925292969, 12.68906307220459, 13.602865219116211, 14.516668319702148, 15.43047046661377, 16.34427261352539, 17.258075714111328, 18.171876907348633, 19.08568000793457, 19.999481201171875, 20.913284301757812, 21.827085494995117, 22.740888595581055, 23.65468978881836, 24.568492889404297, 25.4822940826416, 26.39609718322754, 27.309898376464844, 28.22370147705078, 29.13750457763672, 30.051307678222656, 30.96510887145996, 31.8789119720459, 32.7927131652832, 33.70651626586914, 34.62031936645508, 35.53411865234375, 36.44792175292969, 37.361724853515625, 38.27552795410156, 39.189327239990234, 40.10313034057617, 41.01693344116211, 41.93073654174805, 42.84453582763672, 43.758338928222656]}, "gradients/encoder.encoder.layers.20.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 4.0, 2.0, 1.0, 5.0, 1.0, 3.0, 5.0, 7.0, 12.0, 12.0, 17.0, 24.0, 24.0, 23.0, 24.0, 26.0, 21.0, 38.0, 32.0, 40.0, 43.0, 31.0, 42.0, 46.0, 36.0, 52.0, 38.0, 38.0, 43.0, 37.0, 48.0, 32.0, 35.0, 30.0, 26.0, 14.0, 15.0, 18.0, 12.0, 12.0, 14.0, 7.0, 5.0, 3.0, 5.0, 2.0, 2.0, 2.0, 4.0, 1.0, 0.0, 0.0, 3.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-6.249598503112793, -6.043041229248047, -5.836483955383301, -5.629927158355713, -5.423369884490967, -5.216812610626221, -5.010255336761475, -4.803698539733887, -4.597141265869141, -4.3905839920043945, -4.184026718139648, -3.9774696826934814, -3.7709126472473145, -3.5643553733825684, -3.3577980995178223, -3.1512410640716553, -2.944683790206909, -2.738126516342163, -2.531569480895996, -2.32501220703125, -2.118455171585083, -1.911897897720337, -1.7053407430648804, -1.4987835884094238, -1.2922264337539673, -1.0856692790985107, -0.8791121244430542, -0.6725549101829529, -0.46599775552749634, -0.259440541267395, -0.05288338661193848, 0.15367376804351807, 0.3602309226989746, 0.5667880773544312, 0.7733452320098877, 0.979902446269989, 1.1864595413208008, 1.3930168151855469, 1.5995739698410034, 1.80613112449646, 2.012688159942627, 2.219245433807373, 2.42580246925354, 2.632359743118286, 2.838916778564453, 3.045474052429199, 3.2520313262939453, 3.4585883617401123, 3.6651456356048584, 3.8717029094696045, 4.0782599449157715, 4.284817218780518, 4.491374492645264, 4.697931289672852, 4.904488563537598, 5.111045837402344, 5.31760311126709, 5.524160385131836, 5.730717658996582, 5.93727445602417, 6.143831729888916, 6.350389003753662, 6.556946277618408, 6.763503074645996, 6.970060348510742]}, "gradients/encoder.encoder.layers.19.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 2.0, 2.0, 1.0, 0.0, 2.0, 3.0, 13.0, 12.0, 19.0, 31.0, 90.0, 216.0, 679.0, 3989.0, 3669764.0, 514695.0, 3578.0, 741.0, 236.0, 103.0, 43.0, 23.0, 25.0, 6.0, 6.0, 6.0, 4.0, 2.0, 2.0, 0.0, 3.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.88671875, -5.69024658203125, -5.4937744140625, -5.29730224609375, -5.100830078125, -4.90435791015625, -4.7078857421875, -4.51141357421875, -4.31494140625, -4.11846923828125, -3.9219970703125, -3.72552490234375, -3.529052734375, -3.33258056640625, -3.1361083984375, -2.93963623046875, -2.7431640625, -2.54669189453125, -2.3502197265625, -2.15374755859375, -1.957275390625, -1.76080322265625, -1.5643310546875, -1.36785888671875, -1.17138671875, -0.97491455078125, -0.7784423828125, -0.58197021484375, -0.385498046875, -0.18902587890625, 0.0074462890625, 0.20391845703125, 0.400390625, 0.59686279296875, 0.7933349609375, 0.98980712890625, 1.186279296875, 1.38275146484375, 1.5792236328125, 1.77569580078125, 1.97216796875, 2.16864013671875, 2.3651123046875, 2.56158447265625, 2.758056640625, 2.95452880859375, 3.1510009765625, 3.34747314453125, 3.5439453125, 3.74041748046875, 3.9368896484375, 4.13336181640625, 4.329833984375, 4.52630615234375, 4.7227783203125, 4.91925048828125, 5.11572265625, 5.31219482421875, 5.5086669921875, 5.70513916015625, 5.901611328125, 6.09808349609375, 6.2945556640625, 6.49102783203125, 6.6875]}, "gradients/encoder.encoder.layers.19.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 2.0, 0.0, 1.0, 2.0, 0.0, 4.0, 8.0, 19.0, 39.0, 67.0, 103.0, 171.0, 182.0, 163.0, 114.0, 64.0, 38.0, 16.0, 9.0, 7.0, 2.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.55419921875, -0.5354461669921875, -0.516693115234375, -0.4979400634765625, -0.47918701171875, -0.4604339599609375, -0.441680908203125, -0.4229278564453125, -0.4041748046875, -0.3854217529296875, -0.366668701171875, -0.3479156494140625, -0.32916259765625, -0.3104095458984375, -0.291656494140625, -0.2729034423828125, -0.254150390625, -0.2353973388671875, -0.216644287109375, -0.1978912353515625, -0.17913818359375, -0.1603851318359375, -0.141632080078125, -0.1228790283203125, -0.1041259765625, -0.0853729248046875, -0.066619873046875, -0.0478668212890625, -0.02911376953125, -0.0103607177734375, 0.008392333984375, 0.0271453857421875, 0.0458984375, 0.0646514892578125, 0.083404541015625, 0.1021575927734375, 0.12091064453125, 0.1396636962890625, 0.158416748046875, 0.1771697998046875, 0.1959228515625, 0.2146759033203125, 0.233428955078125, 0.2521820068359375, 0.27093505859375, 0.2896881103515625, 0.308441162109375, 0.3271942138671875, 0.345947265625, 0.3647003173828125, 0.383453369140625, 0.4022064208984375, 0.42095947265625, 0.4397125244140625, 0.458465576171875, 0.4772186279296875, 0.4959716796875, 0.5147247314453125, 0.533477783203125, 0.5522308349609375, 0.57098388671875, 0.5897369384765625, 0.608489990234375, 0.6272430419921875, 0.64599609375]}, "gradients/encoder.encoder.layers.19.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 2.0, 0.0, 2.0, 1.0, 0.0, 5.0, 3.0, 1.0, 12.0, 17.0, 23.0, 41.0, 78.0, 119.0, 234.0, 686.0, 3731.0, 51739.0, 4087290.0, 45586.0, 3632.0, 639.0, 229.0, 93.0, 55.0, 23.0, 14.0, 15.0, 7.0, 2.0, 4.0, 0.0, 4.0, 0.0, 1.0, 2.0, 1.0, 1.0, 3.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.177734375, -2.09228515625, -2.0068359375, -1.92138671875, -1.8359375, -1.75048828125, -1.6650390625, -1.57958984375, -1.494140625, -1.40869140625, -1.3232421875, -1.23779296875, -1.15234375, -1.06689453125, -0.9814453125, -0.89599609375, -0.810546875, -0.72509765625, -0.6396484375, -0.55419921875, -0.46875, -0.38330078125, -0.2978515625, -0.21240234375, -0.126953125, -0.04150390625, 0.0439453125, 0.12939453125, 0.21484375, 0.30029296875, 0.3857421875, 0.47119140625, 0.556640625, 0.64208984375, 0.7275390625, 0.81298828125, 0.8984375, 0.98388671875, 1.0693359375, 1.15478515625, 1.240234375, 1.32568359375, 1.4111328125, 1.49658203125, 1.58203125, 1.66748046875, 1.7529296875, 1.83837890625, 1.923828125, 2.00927734375, 2.0947265625, 2.18017578125, 2.265625, 2.35107421875, 2.4365234375, 2.52197265625, 2.607421875, 2.69287109375, 2.7783203125, 2.86376953125, 2.94921875, 3.03466796875, 3.1201171875, 3.20556640625, 3.291015625]}, "gradients/encoder.encoder.layers.19.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 5.0, 0.0, 2.0, 4.0, 5.0, 10.0, 10.0, 18.0, 26.0, 31.0, 56.0, 143.0, 442.0, 2241.0, 726.0, 193.0, 66.0, 37.0, 21.0, 11.0, 13.0, 12.0, 4.0, 4.0, 4.0, 1.0, 4.0, 3.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.7353515625, -0.7181053161621094, -0.7008590698242188, -0.6836128234863281, -0.6663665771484375, -0.6491203308105469, -0.6318740844726562, -0.6146278381347656, -0.597381591796875, -0.5801353454589844, -0.5628890991210938, -0.5456428527832031, -0.5283966064453125, -0.5111503601074219, -0.49390411376953125, -0.4766578674316406, -0.45941162109375, -0.4421653747558594, -0.42491912841796875, -0.4076728820800781, -0.3904266357421875, -0.3731803894042969, -0.35593414306640625, -0.3386878967285156, -0.321441650390625, -0.3041954040527344, -0.28694915771484375, -0.2697029113769531, -0.2524566650390625, -0.23521041870117188, -0.21796417236328125, -0.20071792602539062, -0.1834716796875, -0.16622543334960938, -0.14897918701171875, -0.13173294067382812, -0.1144866943359375, -0.09724044799804688, -0.07999420166015625, -0.06274795532226562, -0.045501708984375, -0.028255462646484375, -0.01100921630859375, 0.006237030029296875, 0.0234832763671875, 0.040729522705078125, 0.05797576904296875, 0.07522201538085938, 0.09246826171875, 0.10971450805664062, 0.12696075439453125, 0.14420700073242188, 0.1614532470703125, 0.17869949340820312, 0.19594573974609375, 0.21319198608398438, 0.230438232421875, 0.24768447875976562, 0.26493072509765625, 0.2821769714355469, 0.2994232177734375, 0.3166694641113281, 0.33391571044921875, 0.3511619567871094, 0.368408203125]}, "gradients/encoder.encoder.layers.19.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 2.0, 2.0, 1.0, 0.0, 1.0, 2.0, 4.0, 15.0, 76.0, 315.0, 411.0, 146.0, 22.0, 9.0, 3.0, 5.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.168980360031128, -2.042003631591797, -1.9150269031524658, -1.7880500555038452, -1.6610733270645142, -1.534096598625183, -1.4071197509765625, -1.2801430225372314, -1.1531662940979004, -1.0261895656585693, -0.8992127776145935, -0.7722359895706177, -0.6452592611312866, -0.5182825326919556, -0.39130574464797974, -0.2643289566040039, -0.13735222816467285, -0.01037546992301941, 0.11660128831863403, 0.24357804656028748, 0.3705548048019409, 0.497531533241272, 0.6245083212852478, 0.7514851093292236, 0.8784618377685547, 1.0054385662078857, 1.1324152946472168, 1.2593921422958374, 1.3863688707351685, 1.5133455991744995, 1.6403224468231201, 1.7672991752624512, 1.8942756652832031, 2.021252393722534, 2.1482291221618652, 2.2752058506011963, 2.4021825790405273, 2.5291595458984375, 2.6561362743377686, 2.7831130027770996, 2.9100897312164307, 3.0370664596557617, 3.1640431880950928, 3.291019916534424, 3.417996883392334, 3.544973373413086, 3.671950340270996, 3.798927068710327, 3.925903797149658, 4.052880764007568, 4.17985725402832, 4.3068342208862305, 4.433810710906982, 4.560787677764893, 4.6877641677856445, 4.814741134643555, 4.941718101501465, 5.068695068359375, 5.195671558380127, 5.322648525238037, 5.449625015258789, 5.576601982116699, 5.703578472137451, 5.830555438995361, 5.957531929016113]}, "gradients/encoder.encoder.layers.19.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 2.0, 3.0, 5.0, 10.0, 7.0, 16.0, 13.0, 28.0, 30.0, 38.0, 38.0, 48.0, 53.0, 60.0, 68.0, 73.0, 65.0, 68.0, 55.0, 64.0, 50.0, 52.0, 41.0, 25.0, 26.0, 15.0, 23.0, 14.0, 4.0, 11.0, 3.0, 2.0, 4.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-1.8195555210113525, -1.7663153409957886, -1.7130751609802246, -1.6598351001739502, -1.6065949201583862, -1.5533547401428223, -1.5001146793365479, -1.4468744993209839, -1.39363431930542, -1.340394139289856, -1.287153959274292, -1.2339138984680176, -1.1806737184524536, -1.1274335384368896, -1.0741934776306152, -1.0209532976150513, -0.9677131175994873, -0.9144729375839233, -0.8612328171730042, -0.807992696762085, -0.754752516746521, -0.701512336730957, -0.6482722163200378, -0.5950320959091187, -0.5417919158935547, -0.4885517656803131, -0.43531161546707153, -0.38207146525382996, -0.3288313150405884, -0.2755911648273468, -0.22235101461410522, -0.16911086440086365, -0.11587071418762207, -0.0626305639743805, -0.009390413761138916, 0.04384973645210266, 0.09708988666534424, 0.15033003687858582, 0.2035701870918274, 0.25681033730506897, 0.31005048751831055, 0.3632906377315521, 0.4165307879447937, 0.4697709381580353, 0.5230110883712769, 0.5762512683868408, 0.62949138879776, 0.6827315092086792, 0.7359716892242432, 0.7892118692398071, 0.8424519896507263, 0.8956921100616455, 0.9489322900772095, 1.0021724700927734, 1.0554125308990479, 1.1086527109146118, 1.1618928909301758, 1.2151330709457397, 1.2683732509613037, 1.3216133117675781, 1.374853491783142, 1.428093671798706, 1.4813337326049805, 1.5345739126205444, 1.5878140926361084]}, "gradients/encoder.encoder.layers.19.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 3.0, 1.0, 5.0, 7.0, 5.0, 12.0, 15.0, 35.0, 36.0, 45.0, 78.0, 96.0, 158.0, 274.0, 464.0, 764.0, 1701.0, 5331.0, 29863.0, 390853.0, 562894.0, 44876.0, 6750.0, 2062.0, 946.0, 494.0, 267.0, 176.0, 120.0, 70.0, 53.0, 31.0, 22.0, 15.0, 15.0, 11.0, 7.0, 6.0, 3.0, 1.0, 0.0, 0.0, 3.0, 0.0, 3.0, 0.0, 1.0, 1.0], "bins": [-1.7021484375, -1.6549835205078125, -1.607818603515625, -1.5606536865234375, -1.51348876953125, -1.4663238525390625, -1.419158935546875, -1.3719940185546875, -1.3248291015625, -1.2776641845703125, -1.230499267578125, -1.1833343505859375, -1.13616943359375, -1.0890045166015625, -1.041839599609375, -0.9946746826171875, -0.947509765625, -0.9003448486328125, -0.853179931640625, -0.8060150146484375, -0.75885009765625, -0.7116851806640625, -0.664520263671875, -0.6173553466796875, -0.5701904296875, -0.5230255126953125, -0.475860595703125, -0.4286956787109375, -0.38153076171875, -0.3343658447265625, -0.287200927734375, -0.2400360107421875, -0.19287109375, -0.1457061767578125, -0.098541259765625, -0.0513763427734375, -0.00421142578125, 0.0429534912109375, 0.090118408203125, 0.1372833251953125, 0.1844482421875, 0.2316131591796875, 0.278778076171875, 0.3259429931640625, 0.37310791015625, 0.4202728271484375, 0.467437744140625, 0.5146026611328125, 0.561767578125, 0.6089324951171875, 0.656097412109375, 0.7032623291015625, 0.75042724609375, 0.7975921630859375, 0.844757080078125, 0.8919219970703125, 0.9390869140625, 0.9862518310546875, 1.033416748046875, 1.0805816650390625, 1.12774658203125, 1.1749114990234375, 1.222076416015625, 1.2692413330078125, 1.31640625]}, "gradients/encoder.encoder.layers.19.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 3.0, 1.0, 0.0, 1.0, 1.0, 0.0, 2.0, 11.0, 19.0, 25.0, 67.0, 117.0, 150.0, 172.0, 159.0, 138.0, 73.0, 40.0, 15.0, 8.0, 4.0, 4.0, 1.0, 1.0, 2.0, 0.0, 1.0, 3.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.50732421875, -0.4896392822265625, -0.471954345703125, -0.4542694091796875, -0.43658447265625, -0.4188995361328125, -0.401214599609375, -0.3835296630859375, -0.3658447265625, -0.3481597900390625, -0.330474853515625, -0.3127899169921875, -0.29510498046875, -0.2774200439453125, -0.259735107421875, -0.2420501708984375, -0.224365234375, -0.2066802978515625, -0.188995361328125, -0.1713104248046875, -0.15362548828125, -0.1359405517578125, -0.118255615234375, -0.1005706787109375, -0.0828857421875, -0.0652008056640625, -0.047515869140625, -0.0298309326171875, -0.01214599609375, 0.0055389404296875, 0.023223876953125, 0.0409088134765625, 0.05859375, 0.0762786865234375, 0.093963623046875, 0.1116485595703125, 0.12933349609375, 0.1470184326171875, 0.164703369140625, 0.1823883056640625, 0.2000732421875, 0.2177581787109375, 0.235443115234375, 0.2531280517578125, 0.27081298828125, 0.2884979248046875, 0.306182861328125, 0.3238677978515625, 0.341552734375, 0.3592376708984375, 0.376922607421875, 0.3946075439453125, 0.41229248046875, 0.4299774169921875, 0.447662353515625, 0.4653472900390625, 0.4830322265625, 0.5007171630859375, 0.518402099609375, 0.5360870361328125, 0.55377197265625, 0.5714569091796875, 0.589141845703125, 0.6068267822265625, 0.62451171875]}, "gradients/encoder.encoder.layers.19.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 3.0, 1.0, 4.0, 1.0, 1.0, 2.0, 1.0, 4.0, 2.0, 4.0, 14.0, 15.0, 12.0, 16.0, 26.0, 31.0, 31.0, 66.0, 74.0, 103.0, 127.0, 203.0, 298.0, 458.0, 730.0, 1419.0, 3400.0, 9498.0, 36599.0, 198796.0, 580055.0, 169263.0, 31879.0, 8850.0, 3069.0, 1410.0, 770.0, 390.0, 246.0, 192.0, 151.0, 111.0, 59.0, 52.0, 43.0, 25.0, 19.0, 12.0, 3.0, 5.0, 12.0, 8.0, 4.0, 3.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.0712890625, -1.0372314453125, -1.003173828125, -0.9691162109375, -0.93505859375, -0.9010009765625, -0.866943359375, -0.8328857421875, -0.798828125, -0.7647705078125, -0.730712890625, -0.6966552734375, -0.66259765625, -0.6285400390625, -0.594482421875, -0.5604248046875, -0.5263671875, -0.4923095703125, -0.458251953125, -0.4241943359375, -0.39013671875, -0.3560791015625, -0.322021484375, -0.2879638671875, -0.25390625, -0.2198486328125, -0.185791015625, -0.1517333984375, -0.11767578125, -0.0836181640625, -0.049560546875, -0.0155029296875, 0.0185546875, 0.0526123046875, 0.086669921875, 0.1207275390625, 0.15478515625, 0.1888427734375, 0.222900390625, 0.2569580078125, 0.291015625, 0.3250732421875, 0.359130859375, 0.3931884765625, 0.42724609375, 0.4613037109375, 0.495361328125, 0.5294189453125, 0.5634765625, 0.5975341796875, 0.631591796875, 0.6656494140625, 0.69970703125, 0.7337646484375, 0.767822265625, 0.8018798828125, 0.8359375, 0.8699951171875, 0.904052734375, 0.9381103515625, 0.97216796875, 1.0062255859375, 1.040283203125, 1.0743408203125, 1.1083984375]}, "gradients/encoder.encoder.layers.19.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 4.0, 1.0, 1.0, 1.0, 3.0, 3.0, 4.0, 3.0, 4.0, 3.0, 9.0, 12.0, 12.0, 11.0, 17.0, 18.0, 15.0, 22.0, 29.0, 30.0, 44.0, 42.0, 44.0, 52.0, 56.0, 66.0, 47.0, 55.0, 51.0, 44.0, 43.0, 52.0, 35.0, 44.0, 23.0, 25.0, 17.0, 13.0, 12.0, 8.0, 7.0, 12.0, 6.0, 5.0, 2.0, 2.0, 4.0, 2.0, 2.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-1.46875, -1.42742919921875, -1.3861083984375, -1.34478759765625, -1.303466796875, -1.26214599609375, -1.2208251953125, -1.17950439453125, -1.13818359375, -1.09686279296875, -1.0555419921875, -1.01422119140625, -0.972900390625, -0.93157958984375, -0.8902587890625, -0.84893798828125, -0.8076171875, -0.76629638671875, -0.7249755859375, -0.68365478515625, -0.642333984375, -0.60101318359375, -0.5596923828125, -0.51837158203125, -0.47705078125, -0.43572998046875, -0.3944091796875, -0.35308837890625, -0.311767578125, -0.27044677734375, -0.2291259765625, -0.18780517578125, -0.146484375, -0.10516357421875, -0.0638427734375, -0.02252197265625, 0.018798828125, 0.06011962890625, 0.1014404296875, 0.14276123046875, 0.18408203125, 0.22540283203125, 0.2667236328125, 0.30804443359375, 0.349365234375, 0.39068603515625, 0.4320068359375, 0.47332763671875, 0.5146484375, 0.55596923828125, 0.5972900390625, 0.63861083984375, 0.679931640625, 0.72125244140625, 0.7625732421875, 0.80389404296875, 0.84521484375, 0.88653564453125, 0.9278564453125, 0.96917724609375, 1.010498046875, 1.05181884765625, 1.0931396484375, 1.13446044921875, 1.17578125]}, "gradients/encoder.encoder.layers.19.attention.k_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 2.0, 1.0, 2.0, 3.0, 1.0, 7.0, 3.0, 3.0, 5.0, 10.0, 11.0, 14.0, 23.0, 37.0, 69.0, 115.0, 278.0, 668.0, 1951.0, 10290.0, 294282.0, 718138.0, 18384.0, 2695.0, 884.0, 312.0, 153.0, 89.0, 40.0, 26.0, 21.0, 10.0, 14.0, 3.0, 6.0, 3.0, 3.0, 2.0, 3.0, 0.0, 1.0, 1.0, 3.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.6494140625, -0.6278533935546875, -0.606292724609375, -0.5847320556640625, -0.56317138671875, -0.5416107177734375, -0.520050048828125, -0.4984893798828125, -0.4769287109375, -0.4553680419921875, -0.433807373046875, -0.4122467041015625, -0.39068603515625, -0.3691253662109375, -0.347564697265625, -0.3260040283203125, -0.304443359375, -0.2828826904296875, -0.261322021484375, -0.2397613525390625, -0.21820068359375, -0.1966400146484375, -0.175079345703125, -0.1535186767578125, -0.1319580078125, -0.1103973388671875, -0.088836669921875, -0.0672760009765625, -0.04571533203125, -0.0241546630859375, -0.002593994140625, 0.0189666748046875, 0.04052734375, 0.0620880126953125, 0.083648681640625, 0.1052093505859375, 0.12677001953125, 0.1483306884765625, 0.169891357421875, 0.1914520263671875, 0.2130126953125, 0.2345733642578125, 0.256134033203125, 0.2776947021484375, 0.29925537109375, 0.3208160400390625, 0.342376708984375, 0.3639373779296875, 0.385498046875, 0.4070587158203125, 0.428619384765625, 0.4501800537109375, 0.47174072265625, 0.4933013916015625, 0.514862060546875, 0.5364227294921875, 0.5579833984375, 0.5795440673828125, 0.601104736328125, 0.6226654052734375, 0.64422607421875, 0.6657867431640625, 0.687347412109375, 0.7089080810546875, 0.73046875]}, "gradients/encoder.encoder.layers.19.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 3.0, 0.0, 2.0, 3.0, 4.0, 4.0, 3.0, 10.0, 13.0, 19.0, 21.0, 39.0, 40.0, 59.0, 119.0, 184.0, 177.0, 100.0, 74.0, 51.0, 32.0, 15.0, 17.0, 13.0, 7.0, 6.0, 1.0, 1.0, 3.0], "bins": [-0.0003383159637451172, -0.00033166538923978806, -0.0003250148147344589, -0.0003183642402291298, -0.00031171366572380066, -0.0003050630912184715, -0.0002984125167131424, -0.00029176194220781326, -0.00028511136770248413, -0.000278460793197155, -0.00027181021869182587, -0.00026515964418649673, -0.0002585090696811676, -0.00025185849517583847, -0.00024520792067050934, -0.0002385573461651802, -0.00023190677165985107, -0.00022525619715452194, -0.0002186056226491928, -0.00021195504814386368, -0.00020530447363853455, -0.00019865389913320541, -0.00019200332462787628, -0.00018535275012254715, -0.00017870217561721802, -0.00017205160111188889, -0.00016540102660655975, -0.00015875045210123062, -0.0001520998775959015, -0.00014544930309057236, -0.00013879872858524323, -0.0001321481540799141, -0.00012549757957458496, -0.00011884700506925583, -0.0001121964305639267, -0.00010554585605859756, -9.889528155326843e-05, -9.22447070479393e-05, -8.559413254261017e-05, -7.894355803728104e-05, -7.22929835319519e-05, -6.564240902662277e-05, -5.899183452129364e-05, -5.234126001596451e-05, -4.5690685510635376e-05, -3.9040111005306244e-05, -3.238953649997711e-05, -2.573896199464798e-05, -1.9088387489318848e-05, -1.2437812983989716e-05, -5.7872384786605835e-06, 8.633360266685486e-07, 7.513910531997681e-06, 1.4164485037326813e-05, 2.0815059542655945e-05, 2.7465634047985077e-05, 3.411620855331421e-05, 4.076678305864334e-05, 4.741735756397247e-05, 5.4067932069301605e-05, 6.071850657463074e-05, 6.736908107995987e-05, 7.4019655585289e-05, 8.067023009061813e-05, 8.732080459594727e-05]}, "gradients/encoder.encoder.layers.19.attention.q_proj.weight": {"_type": "histogram", "values": [2.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 3.0, 0.0, 2.0, 5.0, 6.0, 13.0, 14.0, 13.0, 25.0, 40.0, 72.0, 169.0, 382.0, 971.0, 3404.0, 29954.0, 850326.0, 152663.0, 7750.0, 1672.0, 614.0, 241.0, 88.0, 37.0, 36.0, 17.0, 17.0, 7.0, 4.0, 6.0, 6.0, 3.0, 1.0, 3.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.61865234375, -0.5946044921875, -0.570556640625, -0.5465087890625, -0.5224609375, -0.4984130859375, -0.474365234375, -0.4503173828125, -0.42626953125, -0.4022216796875, -0.378173828125, -0.3541259765625, -0.330078125, -0.3060302734375, -0.281982421875, -0.2579345703125, -0.23388671875, -0.2098388671875, -0.185791015625, -0.1617431640625, -0.1376953125, -0.1136474609375, -0.089599609375, -0.0655517578125, -0.04150390625, -0.0174560546875, 0.006591796875, 0.0306396484375, 0.0546875, 0.0787353515625, 0.102783203125, 0.1268310546875, 0.15087890625, 0.1749267578125, 0.198974609375, 0.2230224609375, 0.2470703125, 0.2711181640625, 0.295166015625, 0.3192138671875, 0.34326171875, 0.3673095703125, 0.391357421875, 0.4154052734375, 0.439453125, 0.4635009765625, 0.487548828125, 0.5115966796875, 0.53564453125, 0.5596923828125, 0.583740234375, 0.6077880859375, 0.6318359375, 0.6558837890625, 0.679931640625, 0.7039794921875, 0.72802734375, 0.7520751953125, 0.776123046875, 0.8001708984375, 0.82421875, 0.8482666015625, 0.872314453125, 0.8963623046875, 0.92041015625]}, "gradients/encoder.encoder.layers.19.attention.q_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 3.0, 3.0, 4.0, 9.0, 9.0, 7.0, 8.0, 18.0, 13.0, 26.0, 38.0, 55.0, 75.0, 84.0, 137.0, 135.0, 118.0, 83.0, 49.0, 46.0, 23.0, 15.0, 17.0, 12.0, 8.0, 4.0, 7.0, 1.0, 2.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 3.0], "bins": [-0.55517578125, -0.5388450622558594, -0.5225143432617188, -0.5061836242675781, -0.4898529052734375, -0.4735221862792969, -0.45719146728515625, -0.4408607482910156, -0.424530029296875, -0.4081993103027344, -0.39186859130859375, -0.3755378723144531, -0.3592071533203125, -0.3428764343261719, -0.32654571533203125, -0.3102149963378906, -0.29388427734375, -0.2775535583496094, -0.26122283935546875, -0.24489212036132812, -0.2285614013671875, -0.21223068237304688, -0.19589996337890625, -0.17956924438476562, -0.163238525390625, -0.14690780639648438, -0.13057708740234375, -0.11424636840820312, -0.0979156494140625, -0.08158493041992188, -0.06525421142578125, -0.048923492431640625, -0.0325927734375, -0.016262054443359375, 6.866455078125e-05, 0.016399383544921875, 0.0327301025390625, 0.049060821533203125, 0.06539154052734375, 0.08172225952148438, 0.098052978515625, 0.11438369750976562, 0.13071441650390625, 0.14704513549804688, 0.1633758544921875, 0.17970657348632812, 0.19603729248046875, 0.21236801147460938, 0.22869873046875, 0.24502944946289062, 0.26136016845703125, 0.2776908874511719, 0.2940216064453125, 0.3103523254394531, 0.32668304443359375, 0.3430137634277344, 0.359344482421875, 0.3756752014160156, 0.39200592041015625, 0.4083366394042969, 0.4246673583984375, 0.4409980773925781, 0.45732879638671875, 0.4736595153808594, 0.489990234375]}, "gradients/encoder.encoder.layers.19.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 4.0, 4.0, 8.0, 14.0, 49.0, 131.0, 334.0, 309.0, 103.0, 29.0, 13.0, 10.0, 4.0, 3.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-10.298676490783691, -9.85159683227539, -9.404518127441406, -8.957438468933105, -8.510358810424805, -8.06328010559082, -7.6162004470825195, -7.169121265411377, -6.722042083740234, -6.274962902069092, -5.827883720397949, -5.380804061889648, -4.933724880218506, -4.486645698547363, -4.0395660400390625, -3.59248685836792, -3.1454076766967773, -2.6983284950256348, -2.251249074935913, -1.804169774055481, -1.3570904731750488, -0.9100112915039062, -0.46293187141418457, -0.01585245132446289, 0.4312267303466797, 0.8783060312271118, 1.325385332107544, 1.772464632987976, 2.219543933868408, 2.666623115539551, 3.1137025356292725, 3.560781955718994, 4.00786018371582, 4.454939365386963, 4.9020185470581055, 5.349098205566406, 5.796177387237549, 6.243256568908691, 6.690336227416992, 7.137415409088135, 7.584494590759277, 8.031574249267578, 8.478652954101562, 8.925732612609863, 9.372812271118164, 9.819890975952148, 10.26697063446045, 10.71405029296875, 11.161128997802734, 11.608208656311035, 12.05528736114502, 12.50236701965332, 12.949445724487305, 13.396525382995605, 13.843605041503906, 14.29068374633789, 14.737763404846191, 15.184843063354492, 15.631921768188477, 16.07900047302246, 16.526081085205078, 16.973159790039062, 17.420238494873047, 17.867319107055664, 18.31439781188965]}, "gradients/encoder.encoder.layers.19.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 3.0, 0.0, 4.0, 1.0, 2.0, 6.0, 6.0, 5.0, 15.0, 12.0, 16.0, 18.0, 32.0, 24.0, 38.0, 39.0, 31.0, 37.0, 40.0, 55.0, 53.0, 44.0, 49.0, 47.0, 43.0, 43.0, 45.0, 36.0, 29.0, 35.0, 37.0, 30.0, 15.0, 24.0, 24.0, 16.0, 16.0, 13.0, 7.0, 8.0, 5.0, 5.0, 2.0, 4.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-7.905524253845215, -7.677194595336914, -7.4488654136657715, -7.220535755157471, -6.992206573486328, -6.763876914978027, -6.535547256469727, -6.307218074798584, -6.078888416290283, -5.850558757781982, -5.62222957611084, -5.393899917602539, -5.1655707359313965, -4.937241077423096, -4.708911895751953, -4.480582237243652, -4.252252578735352, -4.023922920227051, -3.795593738555908, -3.5672640800476074, -3.3389346599578857, -3.110605239868164, -2.8822758197784424, -2.6539463996887207, -2.425617218017578, -2.1972877979278564, -1.9689582586288452, -1.7406288385391235, -1.5122992992401123, -1.2839698791503906, -1.055640459060669, -0.8273109197616577, -0.5989813804626465, -0.37065190076828003, -0.14232245087623596, 0.0860069990158081, 0.31433647871017456, 0.542665958404541, 0.7709953784942627, 0.9993249177932739, 1.2276543378829956, 1.4559837579727173, 1.6843132972717285, 1.9126427173614502, 2.140972137451172, 2.3693017959594727, 2.5976309776306152, 2.825960636138916, 3.0542900562286377, 3.2826194763183594, 3.510948896408081, 3.7392783164978027, 3.9676079750061035, 4.195937156677246, 4.424266815185547, 4.652596473693848, 4.88092565536499, 5.109255313873291, 5.337584495544434, 5.565914154052734, 5.794243335723877, 6.022572994232178, 6.25090217590332, 6.479231834411621, 6.707561492919922]}, "gradients/encoder.encoder.layers.18.feed_forward.output_dense.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 3.0, 2.0, 1.0, 2.0, 6.0, 2.0, 9.0, 13.0, 18.0, 28.0, 46.0, 93.0, 198.0, 380.0, 933.0, 3191.0, 23649.0, 4079324.0, 77949.0, 5890.0, 1431.0, 524.0, 245.0, 123.0, 76.0, 52.0, 35.0, 17.0, 15.0, 9.0, 10.0, 4.0, 2.0, 4.0, 4.0, 2.0, 1.0, 2.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.111328125, -2.0350341796875, -1.958740234375, -1.8824462890625, -1.80615234375, -1.7298583984375, -1.653564453125, -1.5772705078125, -1.5009765625, -1.4246826171875, -1.348388671875, -1.2720947265625, -1.19580078125, -1.1195068359375, -1.043212890625, -0.9669189453125, -0.890625, -0.8143310546875, -0.738037109375, -0.6617431640625, -0.58544921875, -0.5091552734375, -0.432861328125, -0.3565673828125, -0.2802734375, -0.2039794921875, -0.127685546875, -0.0513916015625, 0.02490234375, 0.1011962890625, 0.177490234375, 0.2537841796875, 0.330078125, 0.4063720703125, 0.482666015625, 0.5589599609375, 0.63525390625, 0.7115478515625, 0.787841796875, 0.8641357421875, 0.9404296875, 1.0167236328125, 1.093017578125, 1.1693115234375, 1.24560546875, 1.3218994140625, 1.398193359375, 1.4744873046875, 1.55078125, 1.6270751953125, 1.703369140625, 1.7796630859375, 1.85595703125, 1.9322509765625, 2.008544921875, 2.0848388671875, 2.1611328125, 2.2374267578125, 2.313720703125, 2.3900146484375, 2.46630859375, 2.5426025390625, 2.618896484375, 2.6951904296875, 2.771484375]}, "gradients/encoder.encoder.layers.18.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 1.0, 2.0, 0.0, 1.0, 3.0, 4.0, 5.0, 14.0, 23.0, 37.0, 64.0, 100.0, 127.0, 170.0, 147.0, 121.0, 68.0, 55.0, 35.0, 18.0, 7.0, 4.0, 3.0, 1.0, 1.0, 2.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.49169921875, -0.47399139404296875, -0.4562835693359375, -0.43857574462890625, -0.420867919921875, -0.40316009521484375, -0.3854522705078125, -0.36774444580078125, -0.35003662109375, -0.33232879638671875, -0.3146209716796875, -0.29691314697265625, -0.279205322265625, -0.26149749755859375, -0.2437896728515625, -0.22608184814453125, -0.2083740234375, -0.19066619873046875, -0.1729583740234375, -0.15525054931640625, -0.137542724609375, -0.11983489990234375, -0.1021270751953125, -0.08441925048828125, -0.06671142578125, -0.04900360107421875, -0.0312957763671875, -0.01358795166015625, 0.004119873046875, 0.02182769775390625, 0.0395355224609375, 0.05724334716796875, 0.074951171875, 0.09265899658203125, 0.1103668212890625, 0.12807464599609375, 0.145782470703125, 0.16349029541015625, 0.1811981201171875, 0.19890594482421875, 0.21661376953125, 0.23432159423828125, 0.2520294189453125, 0.26973724365234375, 0.287445068359375, 0.30515289306640625, 0.3228607177734375, 0.34056854248046875, 0.3582763671875, 0.37598419189453125, 0.3936920166015625, 0.41139984130859375, 0.429107666015625, 0.44681549072265625, 0.4645233154296875, 0.48223114013671875, 0.49993896484375, 0.5176467895507812, 0.5353546142578125, 0.5530624389648438, 0.570770263671875, 0.5884780883789062, 0.6061859130859375, 0.6238937377929688, 0.6416015625]}, "gradients/encoder.encoder.layers.18.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 1.0, 0.0, 1.0, 2.0, 2.0, 0.0, 2.0, 8.0, 5.0, 10.0, 18.0, 16.0, 43.0, 88.0, 214.0, 682.0, 2499.0, 12292.0, 628850.0, 3528463.0, 16903.0, 2867.0, 877.0, 252.0, 98.0, 46.0, 21.0, 12.0, 3.0, 5.0, 1.0, 4.0, 2.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 3.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.7958984375, -1.7399139404296875, -1.683929443359375, -1.6279449462890625, -1.57196044921875, -1.5159759521484375, -1.459991455078125, -1.4040069580078125, -1.3480224609375, -1.2920379638671875, -1.236053466796875, -1.1800689697265625, -1.12408447265625, -1.0680999755859375, -1.012115478515625, -0.9561309814453125, -0.900146484375, -0.8441619873046875, -0.788177490234375, -0.7321929931640625, -0.67620849609375, -0.6202239990234375, -0.564239501953125, -0.5082550048828125, -0.4522705078125, -0.3962860107421875, -0.340301513671875, -0.2843170166015625, -0.22833251953125, -0.1723480224609375, -0.116363525390625, -0.0603790283203125, -0.00439453125, 0.0515899658203125, 0.107574462890625, 0.1635589599609375, 0.21954345703125, 0.2755279541015625, 0.331512451171875, 0.3874969482421875, 0.4434814453125, 0.4994659423828125, 0.555450439453125, 0.6114349365234375, 0.66741943359375, 0.7234039306640625, 0.779388427734375, 0.8353729248046875, 0.891357421875, 0.9473419189453125, 1.003326416015625, 1.0593109130859375, 1.11529541015625, 1.1712799072265625, 1.227264404296875, 1.2832489013671875, 1.3392333984375, 1.3952178955078125, 1.451202392578125, 1.5071868896484375, 1.56317138671875, 1.6191558837890625, 1.675140380859375, 1.7311248779296875, 1.787109375]}, "gradients/encoder.encoder.layers.18.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0, 1.0, 1.0, 1.0, 2.0, 0.0, 1.0, 4.0, 2.0, 2.0, 2.0, 5.0, 4.0, 5.0, 10.0, 8.0, 10.0, 18.0, 26.0, 39.0, 73.0, 186.0, 606.0, 2106.0, 543.0, 192.0, 87.0, 43.0, 32.0, 15.0, 15.0, 12.0, 8.0, 7.0, 6.0, 3.0, 2.0, 2.0, 2.0, 2.0, 1.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.3125, -0.30255889892578125, -0.2926177978515625, -0.28267669677734375, -0.272735595703125, -0.26279449462890625, -0.2528533935546875, -0.24291229248046875, -0.23297119140625, -0.22303009033203125, -0.2130889892578125, -0.20314788818359375, -0.193206787109375, -0.18326568603515625, -0.1733245849609375, -0.16338348388671875, -0.1534423828125, -0.14350128173828125, -0.1335601806640625, -0.12361907958984375, -0.113677978515625, -0.10373687744140625, -0.0937957763671875, -0.08385467529296875, -0.07391357421875, -0.06397247314453125, -0.0540313720703125, -0.04409027099609375, -0.034149169921875, -0.02420806884765625, -0.0142669677734375, -0.00432586669921875, 0.005615234375, 0.01555633544921875, 0.0254974365234375, 0.03543853759765625, 0.045379638671875, 0.05532073974609375, 0.0652618408203125, 0.07520294189453125, 0.08514404296875, 0.09508514404296875, 0.1050262451171875, 0.11496734619140625, 0.124908447265625, 0.13484954833984375, 0.1447906494140625, 0.15473175048828125, 0.1646728515625, 0.17461395263671875, 0.1845550537109375, 0.19449615478515625, 0.204437255859375, 0.21437835693359375, 0.2243194580078125, 0.23426055908203125, 0.24420166015625, 0.25414276123046875, 0.2640838623046875, 0.27402496337890625, 0.283966064453125, 0.29390716552734375, 0.3038482666015625, 0.31378936767578125, 0.32373046875]}, "gradients/encoder.encoder.layers.18.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0, 2.0, 14.0, 39.0, 192.0, 437.0, 269.0, 44.0, 8.0, 3.0, 2.0, 1.0, 3.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.591681480407715, -4.488351821899414, -4.385021686553955, -4.281692028045654, -4.1783623695373535, -4.075032711029053, -3.9717025756835938, -3.868372917175293, -3.765043258666992, -3.6617133617401123, -3.5583837032318115, -3.4550538063049316, -3.351724147796631, -3.248394250869751, -3.145064353942871, -3.0417346954345703, -2.9384047985076904, -2.8350749015808105, -2.7317452430725098, -2.62841534614563, -2.525085687637329, -2.421755790710449, -2.3184261322021484, -2.2150962352752686, -2.1117663383483887, -2.008436441421509, -1.905106782913208, -1.8017768859863281, -1.6984472274780273, -1.5951173305511475, -1.4917875528335571, -1.3884577751159668, -1.285127878189087, -1.1817981004714966, -1.0784683227539062, -0.9751384854316711, -0.8718087077140808, -0.7684789299964905, -0.6651490926742554, -0.561819314956665, -0.4584895372390747, -0.3551597595214844, -0.25182995200157166, -0.14850014448165894, -0.045170366764068604, 0.05815941095352173, 0.16148924827575684, 0.26481902599334717, 0.3681488037109375, 0.47147858142852783, 0.5748083591461182, 0.6781381964683533, 0.7814679741859436, 0.8847977519035339, 0.988127589225769, 1.0914573669433594, 1.1947871446609497, 1.29811692237854, 1.4014467000961304, 1.5047764778137207, 1.6081063747406006, 1.7114360332489014, 1.8147659301757812, 1.9180957078933716, 2.021425485610962]}, "gradients/encoder.encoder.layers.18.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 5.0, 0.0, 1.0, 4.0, 3.0, 1.0, 6.0, 8.0, 6.0, 11.0, 14.0, 14.0, 17.0, 14.0, 24.0, 37.0, 35.0, 42.0, 46.0, 41.0, 43.0, 44.0, 60.0, 60.0, 52.0, 59.0, 44.0, 41.0, 44.0, 44.0, 27.0, 31.0, 29.0, 20.0, 22.0, 16.0, 13.0, 9.0, 9.0, 7.0, 6.0, 4.0, 3.0, 1.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-1.089052438735962, -1.0584359169006348, -1.0278193950653076, -0.9972027540206909, -0.9665862321853638, -0.9359697103500366, -0.9053531289100647, -0.8747365474700928, -0.8441200256347656, -0.8135035037994385, -0.7828869223594666, -0.7522703409194946, -0.7216538190841675, -0.6910372972488403, -0.6604207158088684, -0.6298041343688965, -0.5991876125335693, -0.5685710906982422, -0.5379545092582703, -0.5073379278182983, -0.4767214059829712, -0.44610485434532166, -0.4154883027076721, -0.3848717510700226, -0.35425519943237305, -0.3236386477947235, -0.293022096157074, -0.26240554451942444, -0.2317889928817749, -0.20117244124412537, -0.17055588960647583, -0.1399393379688263, -0.10932278633117676, -0.07870623469352722, -0.048089683055877686, -0.01747313141822815, 0.013143420219421387, 0.04375997185707092, 0.07437652349472046, 0.10499307513237, 0.13560962677001953, 0.16622617840766907, 0.1968427300453186, 0.22745928168296814, 0.2580758333206177, 0.2886923849582672, 0.31930893659591675, 0.3499254882335663, 0.3805420398712158, 0.41115859150886536, 0.4417751431465149, 0.47239169478416443, 0.503008246421814, 0.5336247682571411, 0.564241349697113, 0.594857931137085, 0.6254744529724121, 0.6560909748077393, 0.6867075562477112, 0.7173241376876831, 0.7479406595230103, 0.7785571813583374, 0.8091737627983093, 0.8397903442382812, 0.8704068660736084]}, "gradients/encoder.encoder.layers.18.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 4.0, 1.0, 2.0, 5.0, 5.0, 8.0, 6.0, 28.0, 35.0, 31.0, 56.0, 104.0, 191.0, 319.0, 646.0, 1487.0, 4737.0, 28402.0, 460831.0, 512121.0, 31361.0, 5121.0, 1580.0, 662.0, 331.0, 177.0, 114.0, 68.0, 40.0, 33.0, 18.0, 11.0, 10.0, 4.0, 5.0, 3.0, 1.0, 3.0, 4.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0], "bins": [-1.923828125, -1.868896484375, -1.81396484375, -1.759033203125, -1.7041015625, -1.649169921875, -1.59423828125, -1.539306640625, -1.484375, -1.429443359375, -1.37451171875, -1.319580078125, -1.2646484375, -1.209716796875, -1.15478515625, -1.099853515625, -1.044921875, -0.989990234375, -0.93505859375, -0.880126953125, -0.8251953125, -0.770263671875, -0.71533203125, -0.660400390625, -0.60546875, -0.550537109375, -0.49560546875, -0.440673828125, -0.3857421875, -0.330810546875, -0.27587890625, -0.220947265625, -0.166015625, -0.111083984375, -0.05615234375, -0.001220703125, 0.0537109375, 0.108642578125, 0.16357421875, 0.218505859375, 0.2734375, 0.328369140625, 0.38330078125, 0.438232421875, 0.4931640625, 0.548095703125, 0.60302734375, 0.657958984375, 0.712890625, 0.767822265625, 0.82275390625, 0.877685546875, 0.9326171875, 0.987548828125, 1.04248046875, 1.097412109375, 1.15234375, 1.207275390625, 1.26220703125, 1.317138671875, 1.3720703125, 1.427001953125, 1.48193359375, 1.536865234375, 1.591796875]}, "gradients/encoder.encoder.layers.18.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 1.0, 1.0, 0.0, 1.0, 2.0, 4.0, 4.0, 8.0, 24.0, 24.0, 60.0, 84.0, 132.0, 139.0, 162.0, 119.0, 96.0, 71.0, 41.0, 17.0, 7.0, 7.0, 5.0, 1.0, 0.0, 1.0, 3.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.487060546875, -0.4697456359863281, -0.45243072509765625, -0.4351158142089844, -0.4178009033203125, -0.4004859924316406, -0.38317108154296875, -0.3658561706542969, -0.348541259765625, -0.3312263488769531, -0.31391143798828125, -0.2965965270996094, -0.2792816162109375, -0.2619667053222656, -0.24465179443359375, -0.22733688354492188, -0.21002197265625, -0.19270706176757812, -0.17539215087890625, -0.15807723999023438, -0.1407623291015625, -0.12344741821289062, -0.10613250732421875, -0.08881759643554688, -0.071502685546875, -0.054187774658203125, -0.03687286376953125, -0.019557952880859375, -0.0022430419921875, 0.015071868896484375, 0.03238677978515625, 0.049701690673828125, 0.0670166015625, 0.08433151245117188, 0.10164642333984375, 0.11896133422851562, 0.1362762451171875, 0.15359115600585938, 0.17090606689453125, 0.18822097778320312, 0.205535888671875, 0.22285079956054688, 0.24016571044921875, 0.2574806213378906, 0.2747955322265625, 0.2921104431152344, 0.30942535400390625, 0.3267402648925781, 0.34405517578125, 0.3613700866699219, 0.37868499755859375, 0.3959999084472656, 0.4133148193359375, 0.4306297302246094, 0.44794464111328125, 0.4652595520019531, 0.482574462890625, 0.4998893737792969, 0.5172042846679688, 0.5345191955566406, 0.5518341064453125, 0.5691490173339844, 0.5864639282226562, 0.6037788391113281, 0.62109375]}, "gradients/encoder.encoder.layers.18.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 1.0, 2.0, 0.0, 0.0, 3.0, 4.0, 5.0, 9.0, 3.0, 15.0, 5.0, 16.0, 16.0, 26.0, 30.0, 55.0, 66.0, 107.0, 140.0, 221.0, 339.0, 549.0, 908.0, 1956.0, 4961.0, 17286.0, 85799.0, 460706.0, 387037.0, 65898.0, 14031.0, 4234.0, 1620.0, 883.0, 521.0, 343.0, 213.0, 171.0, 113.0, 78.0, 67.0, 30.0, 26.0, 21.0, 14.0, 10.0, 11.0, 7.0, 6.0, 0.0, 3.0, 2.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0, 1.0], "bins": [-0.98046875, -0.9497604370117188, -0.9190521240234375, -0.8883438110351562, -0.857635498046875, -0.8269271850585938, -0.7962188720703125, -0.7655105590820312, -0.73480224609375, -0.7040939331054688, -0.6733856201171875, -0.6426773071289062, -0.611968994140625, -0.5812606811523438, -0.5505523681640625, -0.5198440551757812, -0.4891357421875, -0.45842742919921875, -0.4277191162109375, -0.39701080322265625, -0.366302490234375, -0.33559417724609375, -0.3048858642578125, -0.27417755126953125, -0.24346923828125, -0.21276092529296875, -0.1820526123046875, -0.15134429931640625, -0.120635986328125, -0.08992767333984375, -0.0592193603515625, -0.02851104736328125, 0.002197265625, 0.03290557861328125, 0.0636138916015625, 0.09432220458984375, 0.125030517578125, 0.15573883056640625, 0.1864471435546875, 0.21715545654296875, 0.24786376953125, 0.27857208251953125, 0.3092803955078125, 0.33998870849609375, 0.370697021484375, 0.40140533447265625, 0.4321136474609375, 0.46282196044921875, 0.4935302734375, 0.5242385864257812, 0.5549468994140625, 0.5856552124023438, 0.616363525390625, 0.6470718383789062, 0.6777801513671875, 0.7084884643554688, 0.73919677734375, 0.7699050903320312, 0.8006134033203125, 0.8313217163085938, 0.862030029296875, 0.8927383422851562, 0.9234466552734375, 0.9541549682617188, 0.98486328125]}, "gradients/encoder.encoder.layers.18.attention.v_proj.bias": {"_type": "histogram", "values": [4.0, 0.0, 0.0, 2.0, 5.0, 2.0, 2.0, 6.0, 3.0, 4.0, 5.0, 10.0, 6.0, 17.0, 11.0, 13.0, 8.0, 18.0, 25.0, 30.0, 32.0, 33.0, 37.0, 46.0, 50.0, 38.0, 40.0, 50.0, 55.0, 40.0, 48.0, 40.0, 35.0, 38.0, 36.0, 25.0, 26.0, 22.0, 19.0, 15.0, 25.0, 20.0, 17.0, 7.0, 11.0, 8.0, 4.0, 9.0, 6.0, 8.0, 4.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.88818359375, -0.8559799194335938, -0.8237762451171875, -0.7915725708007812, -0.759368896484375, -0.7271652221679688, -0.6949615478515625, -0.6627578735351562, -0.63055419921875, -0.5983505249023438, -0.5661468505859375, -0.5339431762695312, -0.501739501953125, -0.46953582763671875, -0.4373321533203125, -0.40512847900390625, -0.3729248046875, -0.34072113037109375, -0.3085174560546875, -0.27631378173828125, -0.244110107421875, -0.21190643310546875, -0.1797027587890625, -0.14749908447265625, -0.11529541015625, -0.08309173583984375, -0.0508880615234375, -0.01868438720703125, 0.013519287109375, 0.04572296142578125, 0.0779266357421875, 0.11013031005859375, 0.142333984375, 0.17453765869140625, 0.2067413330078125, 0.23894500732421875, 0.271148681640625, 0.30335235595703125, 0.3355560302734375, 0.36775970458984375, 0.39996337890625, 0.43216705322265625, 0.4643707275390625, 0.49657440185546875, 0.528778076171875, 0.5609817504882812, 0.5931854248046875, 0.6253890991210938, 0.6575927734375, 0.6897964477539062, 0.7220001220703125, 0.7542037963867188, 0.786407470703125, 0.8186111450195312, 0.8508148193359375, 0.8830184936523438, 0.91522216796875, 0.9474258422851562, 0.9796295166015625, 1.0118331909179688, 1.044036865234375, 1.0762405395507812, 1.1084442138671875, 1.1406478881835938, 1.1728515625]}, "gradients/encoder.encoder.layers.18.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 2.0, 0.0, 1.0, 2.0, 2.0, 2.0, 1.0, 1.0, 1.0, 2.0, 5.0, 9.0, 15.0, 10.0, 20.0, 34.0, 50.0, 82.0, 154.0, 337.0, 902.0, 3412.0, 33519.0, 874066.0, 127379.0, 6265.0, 1288.0, 489.0, 220.0, 120.0, 62.0, 36.0, 28.0, 16.0, 11.0, 8.0, 5.0, 2.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.60107421875, -0.5846824645996094, -0.5682907104492188, -0.5518989562988281, -0.5355072021484375, -0.5191154479980469, -0.5027236938476562, -0.4863319396972656, -0.469940185546875, -0.4535484313964844, -0.43715667724609375, -0.4207649230957031, -0.4043731689453125, -0.3879814147949219, -0.37158966064453125, -0.3551979064941406, -0.33880615234375, -0.3224143981933594, -0.30602264404296875, -0.2896308898925781, -0.2732391357421875, -0.2568473815917969, -0.24045562744140625, -0.22406387329101562, -0.207672119140625, -0.19128036499023438, -0.17488861083984375, -0.15849685668945312, -0.1421051025390625, -0.12571334838867188, -0.10932159423828125, -0.09292984008789062, -0.0765380859375, -0.060146331787109375, -0.04375457763671875, -0.027362823486328125, -0.0109710693359375, 0.005420684814453125, 0.02181243896484375, 0.038204193115234375, 0.054595947265625, 0.07098770141601562, 0.08737945556640625, 0.10377120971679688, 0.1201629638671875, 0.13655471801757812, 0.15294647216796875, 0.16933822631835938, 0.18572998046875, 0.20212173461914062, 0.21851348876953125, 0.23490524291992188, 0.2512969970703125, 0.2676887512207031, 0.28408050537109375, 0.3004722595214844, 0.316864013671875, 0.3332557678222656, 0.34964752197265625, 0.3660392761230469, 0.3824310302734375, 0.3988227844238281, 0.41521453857421875, 0.4316062927246094, 0.447998046875]}, "gradients/encoder.encoder.layers.18.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 4.0, 1.0, 3.0, 1.0, 2.0, 4.0, 8.0, 6.0, 15.0, 6.0, 16.0, 21.0, 25.0, 37.0, 48.0, 64.0, 142.0, 166.0, 143.0, 85.0, 56.0, 38.0, 25.0, 21.0, 18.0, 13.0, 8.0, 9.0, 3.0, 3.0, 5.0, 2.0, 5.0, 1.0, 4.0, 3.0, 0.0, 0.0, 2.0, 2.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.00014090538024902344, -0.0001371670514345169, -0.00013342872262001038, -0.00012969039380550385, -0.00012595206499099731, -0.00012221373617649078, -0.00011847540736198425, -0.00011473707854747772, -0.00011099874973297119, -0.00010726042091846466, -0.00010352209210395813, -9.97837632894516e-05, -9.604543447494507e-05, -9.230710566043854e-05, -8.856877684593201e-05, -8.483044803142548e-05, -8.109211921691895e-05, -7.735379040241241e-05, -7.361546158790588e-05, -6.987713277339935e-05, -6.613880395889282e-05, -6.240047514438629e-05, -5.866214632987976e-05, -5.492381751537323e-05, -5.11854887008667e-05, -4.744715988636017e-05, -4.370883107185364e-05, -3.997050225734711e-05, -3.6232173442840576e-05, -3.2493844628334045e-05, -2.8755515813827515e-05, -2.5017186999320984e-05, -2.1278858184814453e-05, -1.7540529370307922e-05, -1.3802200555801392e-05, -1.006387174129486e-05, -6.32554292678833e-06, -2.5872141122817993e-06, 1.1511147022247314e-06, 4.889443516731262e-06, 8.627772331237793e-06, 1.2366101145744324e-05, 1.6104429960250854e-05, 1.9842758774757385e-05, 2.3581087589263916e-05, 2.7319416403770447e-05, 3.105774521827698e-05, 3.479607403278351e-05, 3.853440284729004e-05, 4.227273166179657e-05, 4.60110604763031e-05, 4.974938929080963e-05, 5.348771810531616e-05, 5.722604691982269e-05, 6.0964375734329224e-05, 6.470270454883575e-05, 6.844103336334229e-05, 7.217936217784882e-05, 7.591769099235535e-05, 7.965601980686188e-05, 8.339434862136841e-05, 8.713267743587494e-05, 9.087100625038147e-05, 9.4609335064888e-05, 9.834766387939453e-05]}, "gradients/encoder.encoder.layers.18.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 3.0, 4.0, 4.0, 6.0, 18.0, 16.0, 29.0, 34.0, 89.0, 186.0, 479.0, 1630.0, 9739.0, 502863.0, 520804.0, 10224.0, 1534.0, 492.0, 205.0, 75.0, 39.0, 40.0, 18.0, 12.0, 7.0, 8.0, 4.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.7138671875, -0.6922378540039062, -0.6706085205078125, -0.6489791870117188, -0.627349853515625, -0.6057205200195312, -0.5840911865234375, -0.5624618530273438, -0.54083251953125, -0.5192031860351562, -0.4975738525390625, -0.47594451904296875, -0.454315185546875, -0.43268585205078125, -0.4110565185546875, -0.38942718505859375, -0.3677978515625, -0.34616851806640625, -0.3245391845703125, -0.30290985107421875, -0.281280517578125, -0.25965118408203125, -0.2380218505859375, -0.21639251708984375, -0.19476318359375, -0.17313385009765625, -0.1515045166015625, -0.12987518310546875, -0.108245849609375, -0.08661651611328125, -0.0649871826171875, -0.04335784912109375, -0.021728515625, -9.918212890625e-05, 0.0215301513671875, 0.04315948486328125, 0.064788818359375, 0.08641815185546875, 0.1080474853515625, 0.12967681884765625, 0.15130615234375, 0.17293548583984375, 0.1945648193359375, 0.21619415283203125, 0.237823486328125, 0.25945281982421875, 0.2810821533203125, 0.30271148681640625, 0.3243408203125, 0.34597015380859375, 0.3675994873046875, 0.38922882080078125, 0.410858154296875, 0.43248748779296875, 0.4541168212890625, 0.47574615478515625, 0.49737548828125, 0.5190048217773438, 0.5406341552734375, 0.5622634887695312, 0.583892822265625, 0.6055221557617188, 0.6271514892578125, 0.6487808227539062, 0.67041015625]}, "gradients/encoder.encoder.layers.18.attention.q_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 3.0, 1.0, 0.0, 1.0, 2.0, 1.0, 7.0, 6.0, 8.0, 13.0, 30.0, 69.0, 110.0, 130.0, 193.0, 149.0, 126.0, 74.0, 35.0, 14.0, 12.0, 13.0, 5.0, 2.0, 2.0, 3.0, 3.0, 4.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.63037109375, -0.6136627197265625, -0.596954345703125, -0.5802459716796875, -0.56353759765625, -0.5468292236328125, -0.530120849609375, -0.5134124755859375, -0.4967041015625, -0.4799957275390625, -0.463287353515625, -0.4465789794921875, -0.42987060546875, -0.4131622314453125, -0.396453857421875, -0.3797454833984375, -0.363037109375, -0.3463287353515625, -0.329620361328125, -0.3129119873046875, -0.29620361328125, -0.2794952392578125, -0.262786865234375, -0.2460784912109375, -0.2293701171875, -0.2126617431640625, -0.195953369140625, -0.1792449951171875, -0.16253662109375, -0.1458282470703125, -0.129119873046875, -0.1124114990234375, -0.095703125, -0.0789947509765625, -0.062286376953125, -0.0455780029296875, -0.02886962890625, -0.0121612548828125, 0.004547119140625, 0.0212554931640625, 0.0379638671875, 0.0546722412109375, 0.071380615234375, 0.0880889892578125, 0.10479736328125, 0.1215057373046875, 0.138214111328125, 0.1549224853515625, 0.171630859375, 0.1883392333984375, 0.205047607421875, 0.2217559814453125, 0.23846435546875, 0.2551727294921875, 0.271881103515625, 0.2885894775390625, 0.3052978515625, 0.3220062255859375, 0.338714599609375, 0.3554229736328125, 0.37213134765625, 0.3888397216796875, 0.405548095703125, 0.4222564697265625, 0.43896484375]}, "gradients/encoder.encoder.layers.18.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 3.0, 1.0, 6.0, 5.0, 7.0, 19.0, 38.0, 47.0, 95.0, 155.0, 205.0, 174.0, 128.0, 49.0, 34.0, 14.0, 14.0, 10.0, 5.0, 2.0, 2.0, 0.0, 2.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-5.40093469619751, -5.198666572570801, -4.996398448944092, -4.794130325317383, -4.591862201690674, -4.389594078063965, -4.187325954437256, -3.985057830810547, -3.782789707183838, -3.580521583557129, -3.37825345993042, -3.175985336303711, -2.973717212677002, -2.771449089050293, -2.569180965423584, -2.366912841796875, -2.164644718170166, -1.962376594543457, -1.760108470916748, -1.557840347290039, -1.35557222366333, -1.153304100036621, -0.9510359764099121, -0.7487678527832031, -0.5464997291564941, -0.34423160552978516, -0.14196348190307617, 0.06030464172363281, 0.2625727653503418, 0.4648408889770508, 0.6671090126037598, 0.8693771362304688, 1.071645736694336, 1.273913860321045, 1.476181983947754, 1.678450107574463, 1.8807182312011719, 2.082986354827881, 2.28525447845459, 2.487522602081299, 2.689790725708008, 2.892058849334717, 3.094326972961426, 3.2965950965881348, 3.4988632202148438, 3.7011313438415527, 3.9033994674682617, 4.105667591094971, 4.30793571472168, 4.510203838348389, 4.712471961975098, 4.914740085601807, 5.117008209228516, 5.319276332855225, 5.521544456481934, 5.723812580108643, 5.926080703735352, 6.1283488273620605, 6.3306169509887695, 6.5328850746154785, 6.7351531982421875, 6.9374213218688965, 7.1396894454956055, 7.3419575691223145, 7.544225692749023]}, "gradients/encoder.encoder.layers.18.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 1.0, 2.0, 0.0, 1.0, 2.0, 7.0, 5.0, 6.0, 13.0, 17.0, 16.0, 26.0, 21.0, 45.0, 25.0, 58.0, 56.0, 56.0, 66.0, 79.0, 62.0, 69.0, 47.0, 60.0, 59.0, 48.0, 29.0, 27.0, 25.0, 15.0, 20.0, 12.0, 7.0, 5.0, 6.0, 3.0, 3.0, 3.0, 5.0, 3.0, 0.0, 2.0, 1.0, 3.0, 0.0, 1.0, 1.0, 0.0, 2.0], "bins": [-8.962808609008789, -8.708889961242676, -8.454971313476562, -8.20105266571045, -7.947134494781494, -7.693215847015381, -7.439297676086426, -7.1853790283203125, -6.931460380554199, -6.677541732788086, -6.423623085021973, -6.169704914093018, -5.915786266326904, -5.661867618560791, -5.407949447631836, -5.154030799865723, -4.900112152099609, -4.646193504333496, -4.392274856567383, -4.138356685638428, -3.8844380378723145, -3.630519390106201, -3.376600980758667, -3.122682571411133, -2.8687639236450195, -2.6148452758789062, -2.360926866531372, -2.107008457183838, -1.8530898094177246, -1.5991712808609009, -1.3452527523040771, -1.0913342237472534, -0.8374161720275879, -0.5834976434707642, -0.32957911491394043, -0.0756605863571167, 0.17825794219970703, 0.43217647075653076, 0.6860949993133545, 0.9400135278701782, 1.193932056427002, 1.4478505849838257, 1.7017691135406494, 1.9556876420974731, 2.209606170654297, 2.46352481842041, 2.7174432277679443, 2.9713616371154785, 3.225280284881592, 3.479198932647705, 3.7331173419952393, 3.9870357513427734, 4.240954399108887, 4.494873046875, 4.748791694641113, 5.002709865570068, 5.256628513336182, 5.510547161102295, 5.76446533203125, 6.018383979797363, 6.272302627563477, 6.52622127532959, 6.780139923095703, 7.034058094024658, 7.2879767417907715]}, "gradients/encoder.encoder.layers.17.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 4.0, 4.0, 10.0, 11.0, 18.0, 30.0, 33.0, 61.0, 100.0, 148.0, 289.0, 657.0, 1492.0, 4789.0, 25431.0, 2687333.0, 1441119.0, 24294.0, 4948.0, 1753.0, 767.0, 375.0, 228.0, 136.0, 70.0, 49.0, 42.0, 28.0, 20.0, 9.0, 7.0, 6.0, 14.0, 1.0, 4.0, 4.0, 3.0, 2.0, 0.0, 2.0, 3.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.669921875, -1.6080322265625, -1.546142578125, -1.4842529296875, -1.42236328125, -1.3604736328125, -1.298583984375, -1.2366943359375, -1.1748046875, -1.1129150390625, -1.051025390625, -0.9891357421875, -0.92724609375, -0.8653564453125, -0.803466796875, -0.7415771484375, -0.6796875, -0.6177978515625, -0.555908203125, -0.4940185546875, -0.43212890625, -0.3702392578125, -0.308349609375, -0.2464599609375, -0.1845703125, -0.1226806640625, -0.060791015625, 0.0010986328125, 0.06298828125, 0.1248779296875, 0.186767578125, 0.2486572265625, 0.310546875, 0.3724365234375, 0.434326171875, 0.4962158203125, 0.55810546875, 0.6199951171875, 0.681884765625, 0.7437744140625, 0.8056640625, 0.8675537109375, 0.929443359375, 0.9913330078125, 1.05322265625, 1.1151123046875, 1.177001953125, 1.2388916015625, 1.30078125, 1.3626708984375, 1.424560546875, 1.4864501953125, 1.54833984375, 1.6102294921875, 1.672119140625, 1.7340087890625, 1.7958984375, 1.8577880859375, 1.919677734375, 1.9815673828125, 2.04345703125, 2.1053466796875, 2.167236328125, 2.2291259765625, 2.291015625]}, "gradients/encoder.encoder.layers.17.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 2.0, 1.0, 1.0, 2.0, 3.0, 4.0, 7.0, 15.0, 32.0, 41.0, 88.0, 90.0, 114.0, 136.0, 138.0, 126.0, 75.0, 57.0, 38.0, 13.0, 10.0, 9.0, 1.0, 2.0, 5.0, 3.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.50146484375, -0.48340606689453125, -0.4653472900390625, -0.44728851318359375, -0.429229736328125, -0.41117095947265625, -0.3931121826171875, -0.37505340576171875, -0.35699462890625, -0.33893585205078125, -0.3208770751953125, -0.30281829833984375, -0.284759521484375, -0.26670074462890625, -0.2486419677734375, -0.23058319091796875, -0.2125244140625, -0.19446563720703125, -0.1764068603515625, -0.15834808349609375, -0.140289306640625, -0.12223052978515625, -0.1041717529296875, -0.08611297607421875, -0.06805419921875, -0.04999542236328125, -0.0319366455078125, -0.01387786865234375, 0.004180908203125, 0.02223968505859375, 0.0402984619140625, 0.05835723876953125, 0.076416015625, 0.09447479248046875, 0.1125335693359375, 0.13059234619140625, 0.148651123046875, 0.16670989990234375, 0.1847686767578125, 0.20282745361328125, 0.22088623046875, 0.23894500732421875, 0.2570037841796875, 0.27506256103515625, 0.293121337890625, 0.31118011474609375, 0.3292388916015625, 0.34729766845703125, 0.3653564453125, 0.38341522216796875, 0.4014739990234375, 0.41953277587890625, 0.437591552734375, 0.45565032958984375, 0.4737091064453125, 0.49176788330078125, 0.50982666015625, 0.5278854370117188, 0.5459442138671875, 0.5640029907226562, 0.582061767578125, 0.6001205444335938, 0.6181793212890625, 0.6362380981445312, 0.654296875]}, "gradients/encoder.encoder.layers.17.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 5.0, 2.0, 2.0, 4.0, 12.0, 26.0, 47.0, 93.0, 177.0, 410.0, 1040.0, 5012.0, 182848.0, 3988846.0, 12714.0, 1866.0, 646.0, 265.0, 115.0, 69.0, 30.0, 18.0, 11.0, 6.0, 6.0, 7.0, 5.0, 2.0, 1.0, 2.0, 2.0, 0.0, 2.0, 0.0, 0.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.30078125, -2.212921142578125, -2.12506103515625, -2.037200927734375, -1.9493408203125, -1.861480712890625, -1.77362060546875, -1.685760498046875, -1.597900390625, -1.510040283203125, -1.42218017578125, -1.334320068359375, -1.2464599609375, -1.158599853515625, -1.07073974609375, -0.982879638671875, -0.89501953125, -0.807159423828125, -0.71929931640625, -0.631439208984375, -0.5435791015625, -0.455718994140625, -0.36785888671875, -0.279998779296875, -0.192138671875, -0.104278564453125, -0.01641845703125, 0.071441650390625, 0.1593017578125, 0.247161865234375, 0.33502197265625, 0.422882080078125, 0.5107421875, 0.598602294921875, 0.68646240234375, 0.774322509765625, 0.8621826171875, 0.950042724609375, 1.03790283203125, 1.125762939453125, 1.213623046875, 1.301483154296875, 1.38934326171875, 1.477203369140625, 1.5650634765625, 1.652923583984375, 1.74078369140625, 1.828643798828125, 1.91650390625, 2.004364013671875, 2.09222412109375, 2.180084228515625, 2.2679443359375, 2.355804443359375, 2.44366455078125, 2.531524658203125, 2.619384765625, 2.707244873046875, 2.79510498046875, 2.882965087890625, 2.9708251953125, 3.058685302734375, 3.14654541015625, 3.234405517578125, 3.322265625]}, "gradients/encoder.encoder.layers.17.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 3.0, 4.0, 5.0, 8.0, 10.0, 18.0, 28.0, 57.0, 188.0, 1219.0, 2092.0, 294.0, 70.0, 31.0, 26.0, 10.0, 9.0, 4.0, 3.0, 1.0, 2.0, 1.0, 1.0, 0.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.98583984375, -0.9541549682617188, -0.9224700927734375, -0.8907852172851562, -0.859100341796875, -0.8274154663085938, -0.7957305908203125, -0.7640457153320312, -0.73236083984375, -0.7006759643554688, -0.6689910888671875, -0.6373062133789062, -0.605621337890625, -0.5739364624023438, -0.5422515869140625, -0.5105667114257812, -0.4788818359375, -0.44719696044921875, -0.4155120849609375, -0.38382720947265625, -0.352142333984375, -0.32045745849609375, -0.2887725830078125, -0.25708770751953125, -0.22540283203125, -0.19371795654296875, -0.1620330810546875, -0.13034820556640625, -0.098663330078125, -0.06697845458984375, -0.0352935791015625, -0.00360870361328125, 0.028076171875, 0.05976104736328125, 0.0914459228515625, 0.12313079833984375, 0.154815673828125, 0.18650054931640625, 0.2181854248046875, 0.24987030029296875, 0.28155517578125, 0.31324005126953125, 0.3449249267578125, 0.37660980224609375, 0.408294677734375, 0.43997955322265625, 0.4716644287109375, 0.5033493041992188, 0.5350341796875, 0.5667190551757812, 0.5984039306640625, 0.6300888061523438, 0.661773681640625, 0.6934585571289062, 0.7251434326171875, 0.7568283081054688, 0.78851318359375, 0.8201980590820312, 0.8518829345703125, 0.8835678100585938, 0.915252685546875, 0.9469375610351562, 0.9786224365234375, 1.0103073120117188, 1.0419921875]}, "gradients/encoder.encoder.layers.17.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 4.0, 2.0, 2.0, 7.0, 3.0, 11.0, 18.0, 57.0, 129.0, 221.0, 250.0, 171.0, 67.0, 32.0, 18.0, 7.0, 5.0, 0.0, 1.0, 2.0, 2.0, 1.0, 1.0, 1.0, 3.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.6571035385131836, -2.562452554702759, -2.467801809310913, -2.3731508255004883, -2.2785000801086426, -2.1838490962982178, -2.089198112487793, -1.9945472478866577, -1.8998963832855225, -1.8052455186843872, -1.710594654083252, -1.6159436702728271, -1.521292805671692, -1.4266419410705566, -1.3319909572601318, -1.2373400926589966, -1.1426892280578613, -1.048038363456726, -0.953387439250946, -0.858736515045166, -0.7640856504440308, -0.6694347858428955, -0.5747838616371155, -0.48013293743133545, -0.3854820728302002, -0.29083117842674255, -0.1961802840232849, -0.10152938961982727, -0.006878495216369629, 0.08777239918708801, 0.18242329359054565, 0.2770742177963257, 0.37172484397888184, 0.4663757383823395, 0.5610266327857971, 0.6556775569915771, 0.7503284215927124, 0.8449792861938477, 0.9396302103996277, 1.0342811346054077, 1.128931999206543, 1.2235828638076782, 1.3182337284088135, 1.4128847122192383, 1.5075355768203735, 1.6021864414215088, 1.6968374252319336, 1.7914882898330688, 1.886139154434204, 1.9807900190353394, 2.0754408836364746, 2.1700918674468994, 2.264742851257324, 2.35939359664917, 2.4540445804595947, 2.5486955642700195, 2.6433463096618652, 2.73799729347229, 2.8326480388641357, 2.9272990226745605, 3.0219497680664062, 3.116600751876831, 3.211251735687256, 3.3059024810791016, 3.4005534648895264]}, "gradients/encoder.encoder.layers.17.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 2.0, 2.0, 2.0, 2.0, 5.0, 5.0, 13.0, 19.0, 27.0, 35.0, 63.0, 85.0, 118.0, 118.0, 152.0, 82.0, 94.0, 74.0, 39.0, 36.0, 15.0, 11.0, 6.0, 3.0, 2.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-3.897249698638916, -3.7748000621795654, -3.652350425720215, -3.5299010276794434, -3.4074513912200928, -3.285001754760742, -3.1625521183013916, -3.040102481842041, -2.9176530838012695, -2.795203447341919, -2.6727538108825684, -2.550304412841797, -2.4278547763824463, -2.3054051399230957, -2.182955503463745, -2.0605058670043945, -1.938056230545044, -1.8156065940856934, -1.6931570768356323, -1.5707074403762817, -1.4482579231262207, -1.3258082866668701, -1.2033586502075195, -1.080909013748169, -0.9584594964981079, -0.8360099196434021, -0.7135603427886963, -0.5911107063293457, -0.4686611294746399, -0.3462115526199341, -0.2237619161605835, -0.10131233930587769, 0.021137237548828125, 0.14358682930469513, 0.26603642106056213, 0.38848602771759033, 0.5109356045722961, 0.633385181427002, 0.7558348178863525, 0.8782843947410583, 1.0007339715957642, 1.1231836080551147, 1.2456331253051758, 1.3680827617645264, 1.490532398223877, 1.612981915473938, 1.7354315519332886, 1.8578810691833496, 1.9803307056427002, 2.102780342102051, 2.2252299785614014, 2.347679615020752, 2.4701290130615234, 2.592578649520874, 2.7150282859802246, 2.837477922439575, 2.959927558898926, 3.0823771953582764, 3.204826831817627, 3.3272762298583984, 3.449725866317749, 3.5721755027770996, 3.69462513923645, 3.817074775695801, 3.9395241737365723]}, "gradients/encoder.encoder.layers.17.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 1.0, 0.0, 2.0, 0.0, 5.0, 11.0, 9.0, 21.0, 25.0, 38.0, 71.0, 132.0, 266.0, 582.0, 1348.0, 3975.0, 26566.0, 683457.0, 313892.0, 13308.0, 2809.0, 1096.0, 460.0, 221.0, 117.0, 69.0, 39.0, 17.0, 12.0, 4.0, 3.0, 3.0, 3.0, 1.0, 2.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0], "bins": [-2.63671875, -2.56707763671875, -2.4974365234375, -2.42779541015625, -2.358154296875, -2.28851318359375, -2.2188720703125, -2.14923095703125, -2.07958984375, -2.00994873046875, -1.9403076171875, -1.87066650390625, -1.801025390625, -1.73138427734375, -1.6617431640625, -1.59210205078125, -1.5224609375, -1.45281982421875, -1.3831787109375, -1.31353759765625, -1.243896484375, -1.17425537109375, -1.1046142578125, -1.03497314453125, -0.96533203125, -0.89569091796875, -0.8260498046875, -0.75640869140625, -0.686767578125, -0.61712646484375, -0.5474853515625, -0.47784423828125, -0.408203125, -0.33856201171875, -0.2689208984375, -0.19927978515625, -0.129638671875, -0.05999755859375, 0.0096435546875, 0.07928466796875, 0.14892578125, 0.21856689453125, 0.2882080078125, 0.35784912109375, 0.427490234375, 0.49713134765625, 0.5667724609375, 0.63641357421875, 0.7060546875, 0.77569580078125, 0.8453369140625, 0.91497802734375, 0.984619140625, 1.05426025390625, 1.1239013671875, 1.19354248046875, 1.26318359375, 1.33282470703125, 1.4024658203125, 1.47210693359375, 1.541748046875, 1.61138916015625, 1.6810302734375, 1.75067138671875, 1.8203125]}, "gradients/encoder.encoder.layers.17.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 3.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 3.0, 1.0, 4.0, 9.0, 21.0, 37.0, 53.0, 84.0, 103.0, 119.0, 142.0, 129.0, 113.0, 71.0, 53.0, 27.0, 10.0, 7.0, 6.0, 3.0, 5.0, 1.0, 3.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.474853515625, -0.4575538635253906, -0.44025421142578125, -0.4229545593261719, -0.4056549072265625, -0.3883552551269531, -0.37105560302734375, -0.3537559509277344, -0.336456298828125, -0.3191566467285156, -0.30185699462890625, -0.2845573425292969, -0.2672576904296875, -0.24995803833007812, -0.23265838623046875, -0.21535873413085938, -0.19805908203125, -0.18075942993164062, -0.16345977783203125, -0.14616012573242188, -0.1288604736328125, -0.11156082153320312, -0.09426116943359375, -0.07696151733398438, -0.059661865234375, -0.042362213134765625, -0.02506256103515625, -0.007762908935546875, 0.0095367431640625, 0.026836395263671875, 0.04413604736328125, 0.061435699462890625, 0.0787353515625, 0.09603500366210938, 0.11333465576171875, 0.13063430786132812, 0.1479339599609375, 0.16523361206054688, 0.18253326416015625, 0.19983291625976562, 0.217132568359375, 0.23443222045898438, 0.25173187255859375, 0.2690315246582031, 0.2863311767578125, 0.3036308288574219, 0.32093048095703125, 0.3382301330566406, 0.35552978515625, 0.3728294372558594, 0.39012908935546875, 0.4074287414550781, 0.4247283935546875, 0.4420280456542969, 0.45932769775390625, 0.4766273498535156, 0.493927001953125, 0.5112266540527344, 0.5285263061523438, 0.5458259582519531, 0.5631256103515625, 0.5804252624511719, 0.5977249145507812, 0.6150245666503906, 0.63232421875]}, "gradients/encoder.encoder.layers.17.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 5.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 7.0, 6.0, 3.0, 6.0, 13.0, 9.0, 14.0, 18.0, 34.0, 55.0, 79.0, 124.0, 224.0, 483.0, 1054.0, 3060.0, 14219.0, 131048.0, 714665.0, 161128.0, 16537.0, 3424.0, 1168.0, 501.0, 246.0, 145.0, 91.0, 49.0, 45.0, 27.0, 14.0, 8.0, 12.0, 8.0, 9.0, 2.0, 7.0, 6.0, 3.0, 1.0, 5.0, 2.0, 1.0, 0.0, 0.0, 3.0], "bins": [-1.658203125, -1.612823486328125, -1.56744384765625, -1.522064208984375, -1.4766845703125, -1.431304931640625, -1.38592529296875, -1.340545654296875, -1.295166015625, -1.249786376953125, -1.20440673828125, -1.159027099609375, -1.1136474609375, -1.068267822265625, -1.02288818359375, -0.977508544921875, -0.93212890625, -0.886749267578125, -0.84136962890625, -0.795989990234375, -0.7506103515625, -0.705230712890625, -0.65985107421875, -0.614471435546875, -0.569091796875, -0.523712158203125, -0.47833251953125, -0.432952880859375, -0.3875732421875, -0.342193603515625, -0.29681396484375, -0.251434326171875, -0.2060546875, -0.160675048828125, -0.11529541015625, -0.069915771484375, -0.0245361328125, 0.020843505859375, 0.06622314453125, 0.111602783203125, 0.156982421875, 0.202362060546875, 0.24774169921875, 0.293121337890625, 0.3385009765625, 0.383880615234375, 0.42926025390625, 0.474639892578125, 0.52001953125, 0.565399169921875, 0.61077880859375, 0.656158447265625, 0.7015380859375, 0.746917724609375, 0.79229736328125, 0.837677001953125, 0.883056640625, 0.928436279296875, 0.97381591796875, 1.019195556640625, 1.0645751953125, 1.109954833984375, 1.15533447265625, 1.200714111328125, 1.24609375]}, "gradients/encoder.encoder.layers.17.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 3.0, 0.0, 2.0, 4.0, 1.0, 7.0, 4.0, 8.0, 10.0, 14.0, 9.0, 28.0, 26.0, 29.0, 33.0, 56.0, 48.0, 51.0, 58.0, 53.0, 53.0, 45.0, 59.0, 57.0, 48.0, 50.0, 47.0, 59.0, 32.0, 23.0, 22.0, 21.0, 10.0, 14.0, 3.0, 6.0, 5.0, 4.0, 3.0, 1.0, 2.0, 3.0, 3.0, 2.0, 1.0], "bins": [-1.8564453125, -1.810150146484375, -1.76385498046875, -1.717559814453125, -1.6712646484375, -1.624969482421875, -1.57867431640625, -1.532379150390625, -1.486083984375, -1.439788818359375, -1.39349365234375, -1.347198486328125, -1.3009033203125, -1.254608154296875, -1.20831298828125, -1.162017822265625, -1.11572265625, -1.069427490234375, -1.02313232421875, -0.976837158203125, -0.9305419921875, -0.884246826171875, -0.83795166015625, -0.791656494140625, -0.745361328125, -0.699066162109375, -0.65277099609375, -0.606475830078125, -0.5601806640625, -0.513885498046875, -0.46759033203125, -0.421295166015625, -0.375, -0.328704833984375, -0.28240966796875, -0.236114501953125, -0.1898193359375, -0.143524169921875, -0.09722900390625, -0.050933837890625, -0.004638671875, 0.041656494140625, 0.08795166015625, 0.134246826171875, 0.1805419921875, 0.226837158203125, 0.27313232421875, 0.319427490234375, 0.36572265625, 0.412017822265625, 0.45831298828125, 0.504608154296875, 0.5509033203125, 0.597198486328125, 0.64349365234375, 0.689788818359375, 0.736083984375, 0.782379150390625, 0.82867431640625, 0.874969482421875, 0.9212646484375, 0.967559814453125, 1.01385498046875, 1.060150146484375, 1.1064453125]}, "gradients/encoder.encoder.layers.17.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 2.0, 0.0, 1.0, 2.0, 2.0, 5.0, 8.0, 11.0, 18.0, 32.0, 74.0, 99.0, 267.0, 785.0, 2666.0, 17493.0, 954147.0, 66408.0, 4600.0, 1206.0, 395.0, 181.0, 67.0, 42.0, 17.0, 9.0, 6.0, 6.0, 5.0, 2.0, 3.0, 4.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-1.337890625, -1.3041763305664062, -1.2704620361328125, -1.2367477416992188, -1.203033447265625, -1.1693191528320312, -1.1356048583984375, -1.1018905639648438, -1.06817626953125, -1.0344619750976562, -1.0007476806640625, -0.9670333862304688, -0.933319091796875, -0.8996047973632812, -0.8658905029296875, -0.8321762084960938, -0.7984619140625, -0.7647476196289062, -0.7310333251953125, -0.6973190307617188, -0.663604736328125, -0.6298904418945312, -0.5961761474609375, -0.5624618530273438, -0.52874755859375, -0.49503326416015625, -0.4613189697265625, -0.42760467529296875, -0.393890380859375, -0.36017608642578125, -0.3264617919921875, -0.29274749755859375, -0.259033203125, -0.22531890869140625, -0.1916046142578125, -0.15789031982421875, -0.124176025390625, -0.09046173095703125, -0.0567474365234375, -0.02303314208984375, 0.01068115234375, 0.04439544677734375, 0.0781097412109375, 0.11182403564453125, 0.145538330078125, 0.17925262451171875, 0.2129669189453125, 0.24668121337890625, 0.2803955078125, 0.31410980224609375, 0.3478240966796875, 0.38153839111328125, 0.415252685546875, 0.44896697998046875, 0.4826812744140625, 0.5163955688476562, 0.55010986328125, 0.5838241577148438, 0.6175384521484375, 0.6512527465820312, 0.684967041015625, 0.7186813354492188, 0.7523956298828125, 0.7861099243164062, 0.81982421875]}, "gradients/encoder.encoder.layers.17.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 1.0, 4.0, 4.0, 5.0, 7.0, 9.0, 14.0, 9.0, 23.0, 16.0, 36.0, 35.0, 73.0, 118.0, 152.0, 164.0, 104.0, 59.0, 32.0, 36.0, 21.0, 16.0, 17.0, 9.0, 12.0, 10.0, 9.0, 4.0, 2.0, 5.0, 3.0, 0.0, 0.0, 2.0, 1.0, 0.0, 2.0], "bins": [-0.00015914440155029297, -0.00015533901751041412, -0.00015153363347053528, -0.00014772824943065643, -0.0001439228653907776, -0.00014011748135089874, -0.0001363120973110199, -0.00013250671327114105, -0.0001287013292312622, -0.00012489594519138336, -0.00012109056115150452, -0.00011728517711162567, -0.00011347979307174683, -0.00010967440903186798, -0.00010586902499198914, -0.00010206364095211029, -9.825825691223145e-05, -9.44528728723526e-05, -9.064748883247375e-05, -8.684210479259491e-05, -8.303672075271606e-05, -7.923133671283722e-05, -7.542595267295837e-05, -7.162056863307953e-05, -6.781518459320068e-05, -6.400980055332184e-05, -6.020441651344299e-05, -5.639903247356415e-05, -5.25936484336853e-05, -4.878826439380646e-05, -4.498288035392761e-05, -4.117749631404877e-05, -3.737211227416992e-05, -3.356672823429108e-05, -2.976134419441223e-05, -2.5955960154533386e-05, -2.215057611465454e-05, -1.8345192074775696e-05, -1.453980803489685e-05, -1.0734423995018005e-05, -6.92903995513916e-06, -3.123655915260315e-06, 6.817281246185303e-07, 4.4871121644973755e-06, 8.29249620437622e-06, 1.2097880244255066e-05, 1.590326428413391e-05, 1.9708648324012756e-05, 2.35140323638916e-05, 2.7319416403770447e-05, 3.112480044364929e-05, 3.493018448352814e-05, 3.873556852340698e-05, 4.254095256328583e-05, 4.634633660316467e-05, 5.015172064304352e-05, 5.395710468292236e-05, 5.776248872280121e-05, 6.156787276268005e-05, 6.53732568025589e-05, 6.917864084243774e-05, 7.298402488231659e-05, 7.678940892219543e-05, 8.059479296207428e-05, 8.440017700195312e-05]}, "gradients/encoder.encoder.layers.17.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 1.0, 4.0, 0.0, 2.0, 2.0, 2.0, 11.0, 18.0, 22.0, 29.0, 56.0, 119.0, 261.0, 643.0, 2321.0, 13394.0, 598927.0, 418458.0, 11243.0, 2075.0, 580.0, 195.0, 95.0, 43.0, 20.0, 13.0, 8.0, 9.0, 8.0, 2.0, 1.0, 3.0, 2.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.5595703125, -0.532867431640625, -0.50616455078125, -0.479461669921875, -0.4527587890625, -0.426055908203125, -0.39935302734375, -0.372650146484375, -0.345947265625, -0.319244384765625, -0.29254150390625, -0.265838623046875, -0.2391357421875, -0.212432861328125, -0.18572998046875, -0.159027099609375, -0.13232421875, -0.105621337890625, -0.07891845703125, -0.052215576171875, -0.0255126953125, 0.001190185546875, 0.02789306640625, 0.054595947265625, 0.081298828125, 0.108001708984375, 0.13470458984375, 0.161407470703125, 0.1881103515625, 0.214813232421875, 0.24151611328125, 0.268218994140625, 0.294921875, 0.321624755859375, 0.34832763671875, 0.375030517578125, 0.4017333984375, 0.428436279296875, 0.45513916015625, 0.481842041015625, 0.508544921875, 0.535247802734375, 0.56195068359375, 0.588653564453125, 0.6153564453125, 0.642059326171875, 0.66876220703125, 0.695465087890625, 0.72216796875, 0.748870849609375, 0.77557373046875, 0.802276611328125, 0.8289794921875, 0.855682373046875, 0.88238525390625, 0.909088134765625, 0.935791015625, 0.962493896484375, 0.98919677734375, 1.015899658203125, 1.0426025390625, 1.069305419921875, 1.09600830078125, 1.122711181640625, 1.1494140625]}, "gradients/encoder.encoder.layers.17.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 2.0, 4.0, 1.0, 3.0, 3.0, 2.0, 3.0, 2.0, 11.0, 15.0, 30.0, 33.0, 51.0, 84.0, 120.0, 155.0, 165.0, 111.0, 69.0, 60.0, 26.0, 14.0, 9.0, 5.0, 5.0, 5.0, 5.0, 5.0, 1.0, 1.0, 2.0, 2.0, 3.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.48291015625, -0.46431732177734375, -0.4457244873046875, -0.42713165283203125, -0.408538818359375, -0.38994598388671875, -0.3713531494140625, -0.35276031494140625, -0.33416748046875, -0.31557464599609375, -0.2969818115234375, -0.27838897705078125, -0.259796142578125, -0.24120330810546875, -0.2226104736328125, -0.20401763916015625, -0.1854248046875, -0.16683197021484375, -0.1482391357421875, -0.12964630126953125, -0.111053466796875, -0.09246063232421875, -0.0738677978515625, -0.05527496337890625, -0.03668212890625, -0.01808929443359375, 0.0005035400390625, 0.01909637451171875, 0.037689208984375, 0.05628204345703125, 0.0748748779296875, 0.09346771240234375, 0.112060546875, 0.13065338134765625, 0.1492462158203125, 0.16783905029296875, 0.186431884765625, 0.20502471923828125, 0.2236175537109375, 0.24221038818359375, 0.26080322265625, 0.27939605712890625, 0.2979888916015625, 0.31658172607421875, 0.335174560546875, 0.35376739501953125, 0.3723602294921875, 0.39095306396484375, 0.4095458984375, 0.42813873291015625, 0.4467315673828125, 0.46532440185546875, 0.483917236328125, 0.5025100708007812, 0.5211029052734375, 0.5396957397460938, 0.55828857421875, 0.5768814086914062, 0.5954742431640625, 0.6140670776367188, 0.632659912109375, 0.6512527465820312, 0.6698455810546875, 0.6884384155273438, 0.70703125]}, "gradients/encoder.encoder.layers.17.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 2.0, 3.0, 3.0, 12.0, 19.0, 15.0, 51.0, 88.0, 170.0, 202.0, 168.0, 119.0, 65.0, 40.0, 19.0, 15.0, 6.0, 6.0, 2.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 3.0, 1.0, 0.0, 1.0], "bins": [-10.624582290649414, -10.381489753723145, -10.138396263122559, -9.895303726196289, -9.652210235595703, -9.409117698669434, -9.166025161743164, -8.922931671142578, -8.679839134216309, -8.436746597290039, -8.193653106689453, -7.950560569763184, -7.707467555999756, -7.464374542236328, -7.2212815284729, -6.978188514709473, -6.735095500946045, -6.492002487182617, -6.2489094734191895, -6.005816459655762, -5.762723922729492, -5.5196309089660645, -5.276537895202637, -5.033444881439209, -4.790351867675781, -4.5472588539123535, -4.304165840148926, -4.061073303222656, -3.8179802894592285, -3.574887275695801, -3.331794261932373, -3.0887012481689453, -2.845608711242676, -2.602515697479248, -2.3594229221343994, -2.1163299083709717, -1.8732370138168335, -1.6301441192626953, -1.3870511054992676, -1.1439582109451294, -0.9008653163909912, -0.657772421836853, -0.41467946767807007, -0.1715865135192871, 0.07150638103485107, 0.31459927558898926, 0.557692289352417, 0.8007851839065552, 1.0438780784606934, 1.2869709730148315, 1.5300638675689697, 1.7731568813323975, 2.016249656677246, 2.259342670440674, 2.5024356842041016, 2.7455286979675293, 2.988621473312378, 3.2317144870758057, 3.4748072624206543, 3.717900276184082, 3.9609932899475098, 4.2040863037109375, 4.447178840637207, 4.690271854400635, 4.9333648681640625]}, "gradients/encoder.encoder.layers.17.layer_norm.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 1.0, 0.0, 2.0, 1.0, 1.0, 1.0, 6.0, 3.0, 6.0, 13.0, 10.0, 9.0, 16.0, 16.0, 19.0, 23.0, 39.0, 30.0, 40.0, 39.0, 44.0, 52.0, 66.0, 47.0, 60.0, 68.0, 47.0, 55.0, 51.0, 38.0, 32.0, 28.0, 16.0, 26.0, 18.0, 23.0, 18.0, 8.0, 12.0, 10.0, 1.0, 4.0, 3.0, 4.0, 3.0, 2.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 2.0], "bins": [-8.529154777526855, -8.273794174194336, -8.0184326171875, -7.763071537017822, -7.5077104568481445, -7.252349853515625, -6.996988773345947, -6.7416276931762695, -6.486266613006592, -6.230905532836914, -5.975544452667236, -5.720183372497559, -5.464822769165039, -5.209461212158203, -4.954100608825684, -4.698739528656006, -4.443378448486328, -4.18801736831665, -3.9326562881469727, -3.677295446395874, -3.4219343662261963, -3.1665732860565186, -2.91121244430542, -2.655851364135742, -2.4004902839660645, -2.1451292037963867, -1.8897682428359985, -1.6344072818756104, -1.3790462017059326, -1.1236851215362549, -0.8683241605758667, -0.6129631996154785, -0.357602596282959, -0.10224157571792603, 0.15311944484710693, 0.4084804654121399, 0.6638414859771729, 0.9192025661468506, 1.1745635271072388, 1.429924488067627, 1.6852855682373047, 1.9406466484069824, 2.19600772857666, 2.451368570327759, 2.7067296504974365, 2.9620907306671143, 3.217451572418213, 3.4728126525878906, 3.7281737327575684, 3.983534812927246, 4.238895893096924, 4.494256973266602, 4.749617576599121, 5.004979133605957, 5.260339736938477, 5.515700817108154, 5.771061897277832, 6.02642297744751, 6.2817840576171875, 6.537145137786865, 6.792506217956543, 7.0478668212890625, 7.30322790145874, 7.558588981628418, 7.813950061798096]}, "gradients/encoder.encoder.layers.16.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 3.0, 4.0, 3.0, 7.0, 14.0, 12.0, 27.0, 54.0, 76.0, 145.0, 236.0, 475.0, 1033.0, 3987.0, 30120.0, 4038069.0, 108748.0, 7807.0, 1909.0, 703.0, 311.0, 217.0, 118.0, 72.0, 54.0, 30.0, 16.0, 14.0, 5.0, 12.0, 4.0, 5.0, 4.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.57421875, -1.512939453125, -1.45166015625, -1.390380859375, -1.3291015625, -1.267822265625, -1.20654296875, -1.145263671875, -1.083984375, -1.022705078125, -0.96142578125, -0.900146484375, -0.8388671875, -0.777587890625, -0.71630859375, -0.655029296875, -0.59375, -0.532470703125, -0.47119140625, -0.409912109375, -0.3486328125, -0.287353515625, -0.22607421875, -0.164794921875, -0.103515625, -0.042236328125, 0.01904296875, 0.080322265625, 0.1416015625, 0.202880859375, 0.26416015625, 0.325439453125, 0.38671875, 0.447998046875, 0.50927734375, 0.570556640625, 0.6318359375, 0.693115234375, 0.75439453125, 0.815673828125, 0.876953125, 0.938232421875, 0.99951171875, 1.060791015625, 1.1220703125, 1.183349609375, 1.24462890625, 1.305908203125, 1.3671875, 1.428466796875, 1.48974609375, 1.551025390625, 1.6123046875, 1.673583984375, 1.73486328125, 1.796142578125, 1.857421875, 1.918701171875, 1.97998046875, 2.041259765625, 2.1025390625, 2.163818359375, 2.22509765625, 2.286376953125, 2.34765625]}, "gradients/encoder.encoder.layers.16.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 2.0, 0.0, 5.0, 1.0, 4.0, 7.0, 8.0, 28.0, 33.0, 53.0, 58.0, 80.0, 112.0, 111.0, 118.0, 109.0, 94.0, 59.0, 50.0, 28.0, 12.0, 11.0, 6.0, 5.0, 3.0, 1.0, 3.0, 4.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.455810546875, -0.4388771057128906, -0.42194366455078125, -0.4050102233886719, -0.3880767822265625, -0.3711433410644531, -0.35420989990234375, -0.3372764587402344, -0.320343017578125, -0.3034095764160156, -0.28647613525390625, -0.2695426940917969, -0.2526092529296875, -0.23567581176757812, -0.21874237060546875, -0.20180892944335938, -0.18487548828125, -0.16794204711914062, -0.15100860595703125, -0.13407516479492188, -0.1171417236328125, -0.10020828247070312, -0.08327484130859375, -0.06634140014648438, -0.049407958984375, -0.032474517822265625, -0.01554107666015625, 0.001392364501953125, 0.0183258056640625, 0.035259246826171875, 0.05219268798828125, 0.06912612915039062, 0.0860595703125, 0.10299301147460938, 0.11992645263671875, 0.13685989379882812, 0.1537933349609375, 0.17072677612304688, 0.18766021728515625, 0.20459365844726562, 0.221527099609375, 0.23846054077148438, 0.25539398193359375, 0.2723274230957031, 0.2892608642578125, 0.3061943054199219, 0.32312774658203125, 0.3400611877441406, 0.35699462890625, 0.3739280700683594, 0.39086151123046875, 0.4077949523925781, 0.4247283935546875, 0.4416618347167969, 0.45859527587890625, 0.4755287170410156, 0.492462158203125, 0.5093955993652344, 0.5263290405273438, 0.5432624816894531, 0.5601959228515625, 0.5771293640136719, 0.5940628051757812, 0.6109962463378906, 0.6279296875]}, "gradients/encoder.encoder.layers.16.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 3.0, 1.0, 1.0, 5.0, 2.0, 3.0, 9.0, 8.0, 21.0, 55.0, 97.0, 210.0, 463.0, 1074.0, 3357.0, 18676.0, 2133484.0, 2013184.0, 18354.0, 3418.0, 1073.0, 419.0, 195.0, 79.0, 40.0, 24.0, 9.0, 10.0, 4.0, 2.0, 3.0, 3.0, 2.0, 1.0, 1.0, 1.0, 0.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0], "bins": [-1.5009765625, -1.4554901123046875, -1.410003662109375, -1.3645172119140625, -1.31903076171875, -1.2735443115234375, -1.228057861328125, -1.1825714111328125, -1.1370849609375, -1.0915985107421875, -1.046112060546875, -1.0006256103515625, -0.95513916015625, -0.9096527099609375, -0.864166259765625, -0.8186798095703125, -0.773193359375, -0.7277069091796875, -0.682220458984375, -0.6367340087890625, -0.59124755859375, -0.5457611083984375, -0.500274658203125, -0.4547882080078125, -0.4093017578125, -0.3638153076171875, -0.318328857421875, -0.2728424072265625, -0.22735595703125, -0.1818695068359375, -0.136383056640625, -0.0908966064453125, -0.04541015625, 7.62939453125e-05, 0.045562744140625, 0.0910491943359375, 0.13653564453125, 0.1820220947265625, 0.227508544921875, 0.2729949951171875, 0.3184814453125, 0.3639678955078125, 0.409454345703125, 0.4549407958984375, 0.50042724609375, 0.5459136962890625, 0.591400146484375, 0.6368865966796875, 0.682373046875, 0.7278594970703125, 0.773345947265625, 0.8188323974609375, 0.86431884765625, 0.9098052978515625, 0.955291748046875, 1.0007781982421875, 1.0462646484375, 1.0917510986328125, 1.137237548828125, 1.1827239990234375, 1.22821044921875, 1.2736968994140625, 1.319183349609375, 1.3646697998046875, 1.41015625]}, "gradients/encoder.encoder.layers.16.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 5.0, 4.0, 4.0, 6.0, 7.0, 13.0, 28.0, 34.0, 63.0, 135.0, 399.0, 2146.0, 821.0, 198.0, 95.0, 48.0, 21.0, 15.0, 12.0, 5.0, 6.0, 4.0, 6.0, 2.0, 3.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-0.5615234375, -0.5462379455566406, -0.5309524536132812, -0.5156669616699219, -0.5003814697265625, -0.4850959777832031, -0.46981048583984375, -0.4545249938964844, -0.439239501953125, -0.4239540100097656, -0.40866851806640625, -0.3933830261230469, -0.3780975341796875, -0.3628120422363281, -0.34752655029296875, -0.3322410583496094, -0.31695556640625, -0.3016700744628906, -0.28638458251953125, -0.2710990905761719, -0.2558135986328125, -0.24052810668945312, -0.22524261474609375, -0.20995712280273438, -0.194671630859375, -0.17938613891601562, -0.16410064697265625, -0.14881515502929688, -0.1335296630859375, -0.11824417114257812, -0.10295867919921875, -0.08767318725585938, -0.0723876953125, -0.057102203369140625, -0.04181671142578125, -0.026531219482421875, -0.0112457275390625, 0.004039764404296875, 0.01932525634765625, 0.034610748291015625, 0.049896240234375, 0.06518173217773438, 0.08046722412109375, 0.09575271606445312, 0.1110382080078125, 0.12632369995117188, 0.14160919189453125, 0.15689468383789062, 0.17218017578125, 0.18746566772460938, 0.20275115966796875, 0.21803665161132812, 0.2333221435546875, 0.24860763549804688, 0.26389312744140625, 0.2791786193847656, 0.294464111328125, 0.3097496032714844, 0.32503509521484375, 0.3403205871582031, 0.3556060791015625, 0.3708915710449219, 0.38617706298828125, 0.4014625549316406, 0.416748046875]}, "gradients/encoder.encoder.layers.16.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 3.0, 7.0, 12.0, 32.0, 93.0, 314.0, 349.0, 138.0, 41.0, 12.0, 10.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.765956878662109, -4.652946949005127, -4.5399370193481445, -4.426926612854004, -4.3139166831970215, -4.200906753540039, -4.087896347045898, -3.974886417388916, -3.8618764877319336, -3.748866558074951, -3.6358563899993896, -3.522846221923828, -3.4098362922668457, -3.2968263626098633, -3.1838161945343018, -3.0708060264587402, -2.957796096801758, -2.8447861671447754, -2.731775999069214, -2.6187658309936523, -2.50575590133667, -2.3927459716796875, -2.279735803604126, -2.1667256355285645, -2.053715705871582, -1.94070565700531, -1.827695608139038, -1.7146855592727661, -1.6016755104064941, -1.4886654615402222, -1.3756554126739502, -1.2626453638076782, -1.1496353149414062, -1.0366252660751343, -0.9236152172088623, -0.8106051683425903, -0.6975951194763184, -0.5845850706100464, -0.4715750217437744, -0.35856497287750244, -0.24555492401123047, -0.1325448751449585, -0.019534826278686523, 0.09347522258758545, 0.20648527145385742, 0.3194953203201294, 0.43250536918640137, 0.5455154180526733, 0.6585254669189453, 0.7715355157852173, 0.8845455646514893, 0.9975556135177612, 1.1105656623840332, 1.2235757112503052, 1.3365857601165771, 1.4495958089828491, 1.562605857849121, 1.675615906715393, 1.788625955581665, 1.901636004447937, 2.014646053314209, 2.1276559829711914, 2.240666151046753, 2.3536763191223145, 2.466686248779297]}, "gradients/encoder.encoder.layers.16.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 3.0, 2.0, 1.0, 0.0, 2.0, 2.0, 12.0, 14.0, 10.0, 26.0, 25.0, 29.0, 47.0, 66.0, 60.0, 69.0, 75.0, 67.0, 76.0, 71.0, 53.0, 47.0, 58.0, 48.0, 45.0, 21.0, 20.0, 15.0, 14.0, 16.0, 11.0, 5.0, 5.0, 3.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.8561229705810547, -1.8031017780303955, -1.7500805854797363, -1.6970593929290771, -1.644038200378418, -1.5910170078277588, -1.5379959344863892, -1.48497474193573, -1.4319535493850708, -1.3789323568344116, -1.3259111642837524, -1.2728899717330933, -1.2198688983917236, -1.1668477058410645, -1.1138265132904053, -1.060805320739746, -1.007784128189087, -0.9547629356384277, -0.9017417430877686, -0.8487206101417542, -0.795699417591095, -0.7426782250404358, -0.6896570920944214, -0.6366358995437622, -0.583614706993103, -0.5305935144424438, -0.47757235169410706, -0.42455118894577026, -0.3715299963951111, -0.3185088038444519, -0.2654876410961151, -0.21246647834777832, -0.1594454050064087, -0.1064242273569107, -0.05340304970741272, -0.0003818720579147339, 0.05263930559158325, 0.10566049814224243, 0.15868166089057922, 0.21170282363891602, 0.2647240161895752, 0.3177452087402344, 0.37076637148857117, 0.42378753423690796, 0.47680872678756714, 0.5298299193382263, 0.5828510522842407, 0.6358722448348999, 0.6888934373855591, 0.7419146299362183, 0.7949358224868774, 0.8479569554328918, 0.900978147983551, 0.9539993405342102, 1.0070204734802246, 1.0600416660308838, 1.113062858581543, 1.1660840511322021, 1.2191052436828613, 1.2721264362335205, 1.3251476287841797, 1.3781688213348389, 1.4311898946762085, 1.4842110872268677, 1.5372322797775269]}, "gradients/encoder.encoder.layers.16.attention.out_proj.weight": {"_type": "histogram", "values": [3.0, 0.0, 0.0, 1.0, 1.0, 2.0, 2.0, 2.0, 3.0, 7.0, 7.0, 7.0, 16.0, 26.0, 28.0, 40.0, 64.0, 81.0, 147.0, 187.0, 293.0, 549.0, 956.0, 1683.0, 3755.0, 12152.0, 65324.0, 509301.0, 389009.0, 47855.0, 9997.0, 3341.0, 1556.0, 829.0, 451.0, 265.0, 223.0, 141.0, 76.0, 70.0, 38.0, 26.0, 22.0, 12.0, 11.0, 6.0, 4.0, 0.0, 2.0, 0.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.259765625, -1.2146453857421875, -1.169525146484375, -1.1244049072265625, -1.07928466796875, -1.0341644287109375, -0.989044189453125, -0.9439239501953125, -0.8988037109375, -0.8536834716796875, -0.808563232421875, -0.7634429931640625, -0.71832275390625, -0.6732025146484375, -0.628082275390625, -0.5829620361328125, -0.537841796875, -0.4927215576171875, -0.447601318359375, -0.4024810791015625, -0.35736083984375, -0.3122406005859375, -0.267120361328125, -0.2220001220703125, -0.1768798828125, -0.1317596435546875, -0.086639404296875, -0.0415191650390625, 0.00360107421875, 0.0487213134765625, 0.093841552734375, 0.1389617919921875, 0.18408203125, 0.2292022705078125, 0.274322509765625, 0.3194427490234375, 0.36456298828125, 0.4096832275390625, 0.454803466796875, 0.4999237060546875, 0.5450439453125, 0.5901641845703125, 0.635284423828125, 0.6804046630859375, 0.72552490234375, 0.7706451416015625, 0.815765380859375, 0.8608856201171875, 0.906005859375, 0.9511260986328125, 0.996246337890625, 1.0413665771484375, 1.08648681640625, 1.1316070556640625, 1.176727294921875, 1.2218475341796875, 1.2669677734375, 1.3120880126953125, 1.357208251953125, 1.4023284912109375, 1.44744873046875, 1.4925689697265625, 1.537689208984375, 1.5828094482421875, 1.6279296875]}, "gradients/encoder.encoder.layers.16.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 2.0, 1.0, 1.0, 1.0, 0.0, 3.0, 4.0, 3.0, 6.0, 12.0, 29.0, 43.0, 46.0, 66.0, 88.0, 96.0, 122.0, 119.0, 115.0, 85.0, 59.0, 48.0, 22.0, 7.0, 11.0, 6.0, 4.0, 3.0, 2.0, 3.0, 2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.473388671875, -0.4563255310058594, -0.43926239013671875, -0.4221992492675781, -0.4051361083984375, -0.3880729675292969, -0.37100982666015625, -0.3539466857910156, -0.336883544921875, -0.3198204040527344, -0.30275726318359375, -0.2856941223144531, -0.2686309814453125, -0.2515678405761719, -0.23450469970703125, -0.21744155883789062, -0.20037841796875, -0.18331527709960938, -0.16625213623046875, -0.14918899536132812, -0.1321258544921875, -0.11506271362304688, -0.09799957275390625, -0.08093643188476562, -0.063873291015625, -0.046810150146484375, -0.02974700927734375, -0.012683868408203125, 0.0043792724609375, 0.021442413330078125, 0.03850555419921875, 0.055568695068359375, 0.0726318359375, 0.08969497680664062, 0.10675811767578125, 0.12382125854492188, 0.1408843994140625, 0.15794754028320312, 0.17501068115234375, 0.19207382202148438, 0.209136962890625, 0.22620010375976562, 0.24326324462890625, 0.2603263854980469, 0.2773895263671875, 0.2944526672363281, 0.31151580810546875, 0.3285789489746094, 0.34564208984375, 0.3627052307128906, 0.37976837158203125, 0.3968315124511719, 0.4138946533203125, 0.4309577941894531, 0.44802093505859375, 0.4650840759277344, 0.482147216796875, 0.4992103576660156, 0.5162734985351562, 0.5333366394042969, 0.5503997802734375, 0.5674629211425781, 0.5845260620117188, 0.6015892028808594, 0.61865234375]}, "gradients/encoder.encoder.layers.16.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 7.0, 4.0, 7.0, 7.0, 10.0, 16.0, 18.0, 45.0, 48.0, 57.0, 87.0, 151.0, 244.0, 376.0, 730.0, 1439.0, 3758.0, 14317.0, 82607.0, 487600.0, 382066.0, 58165.0, 10800.0, 3188.0, 1231.0, 574.0, 343.0, 218.0, 136.0, 100.0, 71.0, 43.0, 38.0, 17.0, 12.0, 11.0, 3.0, 3.0, 8.0, 3.0, 3.0, 4.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-1.10546875, -1.0697021484375, -1.033935546875, -0.9981689453125, -0.96240234375, -0.9266357421875, -0.890869140625, -0.8551025390625, -0.8193359375, -0.7835693359375, -0.747802734375, -0.7120361328125, -0.67626953125, -0.6405029296875, -0.604736328125, -0.5689697265625, -0.533203125, -0.4974365234375, -0.461669921875, -0.4259033203125, -0.39013671875, -0.3543701171875, -0.318603515625, -0.2828369140625, -0.2470703125, -0.2113037109375, -0.175537109375, -0.1397705078125, -0.10400390625, -0.0682373046875, -0.032470703125, 0.0032958984375, 0.0390625, 0.0748291015625, 0.110595703125, 0.1463623046875, 0.18212890625, 0.2178955078125, 0.253662109375, 0.2894287109375, 0.3251953125, 0.3609619140625, 0.396728515625, 0.4324951171875, 0.46826171875, 0.5040283203125, 0.539794921875, 0.5755615234375, 0.611328125, 0.6470947265625, 0.682861328125, 0.7186279296875, 0.75439453125, 0.7901611328125, 0.825927734375, 0.8616943359375, 0.8974609375, 0.9332275390625, 0.968994140625, 1.0047607421875, 1.04052734375, 1.0762939453125, 1.112060546875, 1.1478271484375, 1.18359375]}, "gradients/encoder.encoder.layers.16.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0, 0.0, 1.0, 2.0, 5.0, 9.0, 5.0, 5.0, 9.0, 9.0, 16.0, 8.0, 22.0, 25.0, 36.0, 32.0, 55.0, 43.0, 43.0, 52.0, 61.0, 42.0, 58.0, 64.0, 56.0, 42.0, 47.0, 35.0, 45.0, 54.0, 28.0, 20.0, 22.0, 13.0, 8.0, 14.0, 5.0, 6.0, 5.0, 6.0, 3.0, 2.0, 2.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.51953125, -1.47332763671875, -1.4271240234375, -1.38092041015625, -1.334716796875, -1.28851318359375, -1.2423095703125, -1.19610595703125, -1.14990234375, -1.10369873046875, -1.0574951171875, -1.01129150390625, -0.965087890625, -0.91888427734375, -0.8726806640625, -0.82647705078125, -0.7802734375, -0.73406982421875, -0.6878662109375, -0.64166259765625, -0.595458984375, -0.54925537109375, -0.5030517578125, -0.45684814453125, -0.41064453125, -0.36444091796875, -0.3182373046875, -0.27203369140625, -0.225830078125, -0.17962646484375, -0.1334228515625, -0.08721923828125, -0.041015625, 0.00518798828125, 0.0513916015625, 0.09759521484375, 0.143798828125, 0.19000244140625, 0.2362060546875, 0.28240966796875, 0.32861328125, 0.37481689453125, 0.4210205078125, 0.46722412109375, 0.513427734375, 0.55963134765625, 0.6058349609375, 0.65203857421875, 0.6982421875, 0.74444580078125, 0.7906494140625, 0.83685302734375, 0.883056640625, 0.92926025390625, 0.9754638671875, 1.02166748046875, 1.06787109375, 1.11407470703125, 1.1602783203125, 1.20648193359375, 1.252685546875, 1.29888916015625, 1.3450927734375, 1.39129638671875, 1.4375]}, "gradients/encoder.encoder.layers.16.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 5.0, 3.0, 3.0, 11.0, 11.0, 12.0, 22.0, 40.0, 60.0, 141.0, 276.0, 766.0, 2270.0, 12692.0, 273368.0, 722191.0, 31028.0, 3865.0, 1038.0, 389.0, 179.0, 89.0, 40.0, 23.0, 14.0, 13.0, 7.0, 4.0, 1.0, 0.0, 3.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0], "bins": [-0.62890625, -0.608734130859375, -0.58856201171875, -0.568389892578125, -0.5482177734375, -0.528045654296875, -0.50787353515625, -0.487701416015625, -0.467529296875, -0.447357177734375, -0.42718505859375, -0.407012939453125, -0.3868408203125, -0.366668701171875, -0.34649658203125, -0.326324462890625, -0.30615234375, -0.285980224609375, -0.26580810546875, -0.245635986328125, -0.2254638671875, -0.205291748046875, -0.18511962890625, -0.164947509765625, -0.144775390625, -0.124603271484375, -0.10443115234375, -0.084259033203125, -0.0640869140625, -0.043914794921875, -0.02374267578125, -0.003570556640625, 0.0166015625, 0.036773681640625, 0.05694580078125, 0.077117919921875, 0.0972900390625, 0.117462158203125, 0.13763427734375, 0.157806396484375, 0.177978515625, 0.198150634765625, 0.21832275390625, 0.238494873046875, 0.2586669921875, 0.278839111328125, 0.29901123046875, 0.319183349609375, 0.33935546875, 0.359527587890625, 0.37969970703125, 0.399871826171875, 0.4200439453125, 0.440216064453125, 0.46038818359375, 0.480560302734375, 0.500732421875, 0.520904541015625, 0.54107666015625, 0.561248779296875, 0.5814208984375, 0.601593017578125, 0.62176513671875, 0.641937255859375, 0.662109375]}, "gradients/encoder.encoder.layers.16.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 0.0, 2.0, 2.0, 1.0, 3.0, 6.0, 1.0, 3.0, 8.0, 7.0, 7.0, 8.0, 13.0, 18.0, 24.0, 34.0, 34.0, 52.0, 56.0, 65.0, 86.0, 98.0, 107.0, 85.0, 56.0, 55.0, 27.0, 22.0, 20.0, 15.0, 14.0, 16.0, 8.0, 6.0, 7.0, 6.0, 4.0, 5.0, 4.0, 8.0, 3.0, 4.0, 1.0, 4.0, 4.0, 0.0, 1.0, 2.0, 2.0], "bins": [-9.21487808227539e-05, -8.963234722614288e-05, -8.711591362953186e-05, -8.459948003292084e-05, -8.208304643630981e-05, -7.956661283969879e-05, -7.705017924308777e-05, -7.453374564647675e-05, -7.201731204986572e-05, -6.95008784532547e-05, -6.698444485664368e-05, -6.446801126003265e-05, -6.195157766342163e-05, -5.943514406681061e-05, -5.6918710470199585e-05, -5.440227687358856e-05, -5.188584327697754e-05, -4.9369409680366516e-05, -4.685297608375549e-05, -4.433654248714447e-05, -4.182010889053345e-05, -3.9303675293922424e-05, -3.67872416973114e-05, -3.427080810070038e-05, -3.1754374504089355e-05, -2.9237940907478333e-05, -2.672150731086731e-05, -2.4205073714256287e-05, -2.1688640117645264e-05, -1.917220652103424e-05, -1.6655772924423218e-05, -1.4139339327812195e-05, -1.1622905731201172e-05, -9.106472134590149e-06, -6.590038537979126e-06, -4.073604941368103e-06, -1.55717134475708e-06, 9.592622518539429e-07, 3.475695848464966e-06, 5.992129445075989e-06, 8.508563041687012e-06, 1.1024996638298035e-05, 1.3541430234909058e-05, 1.605786383152008e-05, 1.8574297428131104e-05, 2.1090731024742126e-05, 2.360716462135315e-05, 2.6123598217964172e-05, 2.8640031814575195e-05, 3.115646541118622e-05, 3.367289900779724e-05, 3.6189332604408264e-05, 3.870576620101929e-05, 4.122219979763031e-05, 4.373863339424133e-05, 4.6255066990852356e-05, 4.877150058746338e-05, 5.12879341840744e-05, 5.3804367780685425e-05, 5.632080137729645e-05, 5.883723497390747e-05, 6.13536685705185e-05, 6.387010216712952e-05, 6.638653576374054e-05, 6.890296936035156e-05]}, "gradients/encoder.encoder.layers.16.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 4.0, 0.0, 0.0, 0.0, 1.0, 3.0, 4.0, 11.0, 5.0, 8.0, 15.0, 24.0, 31.0, 50.0, 82.0, 123.0, 222.0, 430.0, 927.0, 2479.0, 9125.0, 53985.0, 478992.0, 440188.0, 49239.0, 8406.0, 2318.0, 901.0, 391.0, 217.0, 125.0, 81.0, 55.0, 33.0, 29.0, 18.0, 8.0, 5.0, 9.0, 4.0, 7.0, 3.0, 0.0, 4.0, 0.0, 3.0, 2.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.374267578125, -0.3617820739746094, -0.34929656982421875, -0.3368110656738281, -0.3243255615234375, -0.3118400573730469, -0.29935455322265625, -0.2868690490722656, -0.274383544921875, -0.2618980407714844, -0.24941253662109375, -0.23692703247070312, -0.2244415283203125, -0.21195602416992188, -0.19947052001953125, -0.18698501586914062, -0.17449951171875, -0.16201400756835938, -0.14952850341796875, -0.13704299926757812, -0.1245574951171875, -0.11207199096679688, -0.09958648681640625, -0.08710098266601562, -0.074615478515625, -0.062129974365234375, -0.04964447021484375, -0.037158966064453125, -0.0246734619140625, -0.012187957763671875, 0.00029754638671875, 0.012783050537109375, 0.0252685546875, 0.037754058837890625, 0.05023956298828125, 0.06272506713867188, 0.0752105712890625, 0.08769607543945312, 0.10018157958984375, 0.11266708374023438, 0.125152587890625, 0.13763809204101562, 0.15012359619140625, 0.16260910034179688, 0.1750946044921875, 0.18758010864257812, 0.20006561279296875, 0.21255111694335938, 0.22503662109375, 0.23752212524414062, 0.25000762939453125, 0.2624931335449219, 0.2749786376953125, 0.2874641418457031, 0.29994964599609375, 0.3124351501464844, 0.324920654296875, 0.3374061584472656, 0.34989166259765625, 0.3623771667480469, 0.3748626708984375, 0.3873481750488281, 0.39983367919921875, 0.4123191833496094, 0.4248046875]}, "gradients/encoder.encoder.layers.16.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 2.0, 2.0, 2.0, 7.0, 4.0, 3.0, 5.0, 13.0, 8.0, 17.0, 19.0, 22.0, 39.0, 40.0, 58.0, 67.0, 107.0, 98.0, 108.0, 89.0, 75.0, 56.0, 37.0, 35.0, 23.0, 28.0, 8.0, 12.0, 3.0, 6.0, 3.0, 5.0, 6.0, 2.0, 4.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.279052734375, -0.266937255859375, -0.25482177734375, -0.242706298828125, -0.2305908203125, -0.218475341796875, -0.20635986328125, -0.194244384765625, -0.18212890625, -0.170013427734375, -0.15789794921875, -0.145782470703125, -0.1336669921875, -0.121551513671875, -0.10943603515625, -0.097320556640625, -0.085205078125, -0.073089599609375, -0.06097412109375, -0.048858642578125, -0.0367431640625, -0.024627685546875, -0.01251220703125, -0.000396728515625, 0.01171875, 0.023834228515625, 0.03594970703125, 0.048065185546875, 0.0601806640625, 0.072296142578125, 0.08441162109375, 0.096527099609375, 0.108642578125, 0.120758056640625, 0.13287353515625, 0.144989013671875, 0.1571044921875, 0.169219970703125, 0.18133544921875, 0.193450927734375, 0.20556640625, 0.217681884765625, 0.22979736328125, 0.241912841796875, 0.2540283203125, 0.266143798828125, 0.27825927734375, 0.290374755859375, 0.302490234375, 0.314605712890625, 0.32672119140625, 0.338836669921875, 0.3509521484375, 0.363067626953125, 0.37518310546875, 0.387298583984375, 0.3994140625, 0.411529541015625, 0.42364501953125, 0.435760498046875, 0.4478759765625, 0.459991455078125, 0.47210693359375, 0.484222412109375, 0.496337890625]}, "gradients/encoder.encoder.layers.16.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 2.0, 4.0, 1.0, 4.0, 8.0, 10.0, 18.0, 25.0, 36.0, 57.0, 97.0, 120.0, 163.0, 133.0, 107.0, 77.0, 62.0, 32.0, 17.0, 10.0, 8.0, 9.0, 2.0, 4.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0], "bins": [-7.429243087768555, -7.225990295410156, -7.022737979888916, -6.819485664367676, -6.616232872009277, -6.412980079650879, -6.209727764129639, -6.006475448608398, -5.80322265625, -5.599969863891602, -5.396717548370361, -5.193465232849121, -4.990212440490723, -4.786959648132324, -4.583707332611084, -4.380455017089844, -4.177202224731445, -3.973949670791626, -3.7706971168518066, -3.5674445629119873, -3.364192008972168, -3.1609394550323486, -2.9576869010925293, -2.75443434715271, -2.5511817932128906, -2.3479292392730713, -2.144676685333252, -1.9414241313934326, -1.7381715774536133, -1.534919023513794, -1.3316664695739746, -1.1284139156341553, -0.9251613616943359, -0.7219088077545166, -0.5186562538146973, -0.31540369987487793, -0.1121511459350586, 0.09110140800476074, 0.2943539619445801, 0.4976065158843994, 0.7008590698242188, 0.9041116237640381, 1.1073641777038574, 1.3106167316436768, 1.513869285583496, 1.7171218395233154, 1.9203743934631348, 2.123626947402954, 2.3268795013427734, 2.5301320552825928, 2.733384609222412, 2.9366371631622314, 3.139889717102051, 3.34314227104187, 3.5463948249816895, 3.749647378921509, 3.952899932861328, 4.156152725219727, 4.359405040740967, 4.562657356262207, 4.7659101486206055, 4.969162940979004, 5.172415256500244, 5.375667572021484, 5.578920364379883]}, "gradients/encoder.encoder.layers.16.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 2.0, 1.0, 8.0, 4.0, 5.0, 9.0, 9.0, 15.0, 14.0, 6.0, 15.0, 23.0, 21.0, 37.0, 35.0, 29.0, 40.0, 42.0, 47.0, 45.0, 41.0, 47.0, 48.0, 55.0, 47.0, 49.0, 32.0, 45.0, 30.0, 31.0, 30.0, 24.0, 21.0, 23.0, 21.0, 12.0, 10.0, 13.0, 5.0, 3.0, 4.0, 2.0, 4.0, 2.0, 2.0, 3.0, 2.0, 1.0, 2.0, 0.0, 0.0, 1.0], "bins": [-8.620080947875977, -8.367776870727539, -8.115472793579102, -7.863168716430664, -7.610864162445068, -7.358560085296631, -7.106256008148193, -6.853951930999756, -6.60164737701416, -6.349343299865723, -6.097039222717285, -5.844735145568848, -5.592430591583252, -5.3401265144348145, -5.087822437286377, -4.8355183601379395, -4.583214282989502, -4.3309102058410645, -4.078606128692627, -3.8263018131256104, -3.5739974975585938, -3.3216934204101562, -3.0693893432617188, -2.8170852661132812, -2.5647809505462646, -2.312476873397827, -2.0601725578308105, -1.807868480682373, -1.555564284324646, -1.303260087966919, -1.0509560108184814, -0.7986518144607544, -0.5463480949401855, -0.2940439283847809, -0.04173976182937622, 0.21056437492370605, 0.4628685712814331, 0.7151727676391602, 0.9674768447875977, 1.2197810411453247, 1.4720852375030518, 1.7243894338607788, 1.9766936302185059, 2.2289977073669434, 2.481301784515381, 2.7336061000823975, 2.985910177230835, 3.2382144927978516, 3.490518569946289, 3.7428226470947266, 3.995126962661743, 4.247430801391602, 4.499735355377197, 4.752039432525635, 5.004343509674072, 5.25664758682251, 5.5089521408081055, 5.761256217956543, 6.0135602951049805, 6.265864372253418, 6.518168926239014, 6.770473003387451, 7.022777080535889, 7.275081157684326, 7.527385234832764]}, "gradients/encoder.encoder.layers.15.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 7.0, 0.0, 0.0, 2.0, 5.0, 3.0, 7.0, 8.0, 16.0, 17.0, 15.0, 35.0, 31.0, 65.0, 79.0, 108.0, 182.0, 351.0, 687.0, 1464.0, 3535.0, 11537.0, 93108.0, 3990981.0, 74893.0, 10438.0, 3533.0, 1463.0, 683.0, 362.0, 212.0, 133.0, 97.0, 63.0, 39.0, 34.0, 23.0, 16.0, 18.0, 9.0, 7.0, 5.0, 7.0, 5.0, 3.0, 2.0, 0.0, 3.0, 2.0, 0.0, 0.0, 3.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-1.5966796875, -1.5424957275390625, -1.488311767578125, -1.4341278076171875, -1.37994384765625, -1.3257598876953125, -1.271575927734375, -1.2173919677734375, -1.1632080078125, -1.1090240478515625, -1.054840087890625, -1.0006561279296875, -0.94647216796875, -0.8922882080078125, -0.838104248046875, -0.7839202880859375, -0.729736328125, -0.6755523681640625, -0.621368408203125, -0.5671844482421875, -0.51300048828125, -0.4588165283203125, -0.404632568359375, -0.3504486083984375, -0.2962646484375, -0.2420806884765625, -0.187896728515625, -0.1337127685546875, -0.07952880859375, -0.0253448486328125, 0.028839111328125, 0.0830230712890625, 0.13720703125, 0.1913909912109375, 0.245574951171875, 0.2997589111328125, 0.35394287109375, 0.4081268310546875, 0.462310791015625, 0.5164947509765625, 0.5706787109375, 0.6248626708984375, 0.679046630859375, 0.7332305908203125, 0.78741455078125, 0.8415985107421875, 0.895782470703125, 0.9499664306640625, 1.004150390625, 1.0583343505859375, 1.112518310546875, 1.1667022705078125, 1.22088623046875, 1.2750701904296875, 1.329254150390625, 1.3834381103515625, 1.4376220703125, 1.4918060302734375, 1.545989990234375, 1.6001739501953125, 1.65435791015625, 1.7085418701171875, 1.762725830078125, 1.8169097900390625, 1.87109375]}, "gradients/encoder.encoder.layers.15.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 10.0, 15.0, 29.0, 44.0, 48.0, 68.0, 88.0, 106.0, 104.0, 108.0, 103.0, 89.0, 54.0, 39.0, 32.0, 18.0, 11.0, 8.0, 5.0, 3.0, 2.0, 2.0, 1.0, 0.0, 4.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.60009765625, -0.5811691284179688, -0.5622406005859375, -0.5433120727539062, -0.524383544921875, -0.5054550170898438, -0.4865264892578125, -0.46759796142578125, -0.44866943359375, -0.42974090576171875, -0.4108123779296875, -0.39188385009765625, -0.372955322265625, -0.35402679443359375, -0.3350982666015625, -0.31616973876953125, -0.2972412109375, -0.27831268310546875, -0.2593841552734375, -0.24045562744140625, -0.221527099609375, -0.20259857177734375, -0.1836700439453125, -0.16474151611328125, -0.14581298828125, -0.12688446044921875, -0.1079559326171875, -0.08902740478515625, -0.070098876953125, -0.05117034912109375, -0.0322418212890625, -0.01331329345703125, 0.005615234375, 0.02454376220703125, 0.0434722900390625, 0.06240081787109375, 0.081329345703125, 0.10025787353515625, 0.1191864013671875, 0.13811492919921875, 0.15704345703125, 0.17597198486328125, 0.1949005126953125, 0.21382904052734375, 0.232757568359375, 0.25168609619140625, 0.2706146240234375, 0.28954315185546875, 0.3084716796875, 0.32740020751953125, 0.3463287353515625, 0.36525726318359375, 0.384185791015625, 0.40311431884765625, 0.4220428466796875, 0.44097137451171875, 0.45989990234375, 0.47882843017578125, 0.4977569580078125, 0.5166854858398438, 0.535614013671875, 0.5545425415039062, 0.5734710693359375, 0.5923995971679688, 0.611328125]}, "gradients/encoder.encoder.layers.15.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 3.0, 3.0, 1.0, 2.0, 8.0, 14.0, 39.0, 79.0, 294.0, 994.0, 4326.0, 53940.0, 4089738.0, 39631.0, 3854.0, 940.0, 280.0, 88.0, 33.0, 17.0, 2.0, 0.0, 1.0, 1.0, 1.0, 2.0, 3.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.572265625, -1.50225830078125, -1.4322509765625, -1.36224365234375, -1.292236328125, -1.22222900390625, -1.1522216796875, -1.08221435546875, -1.01220703125, -0.94219970703125, -0.8721923828125, -0.80218505859375, -0.732177734375, -0.66217041015625, -0.5921630859375, -0.52215576171875, -0.4521484375, -0.38214111328125, -0.3121337890625, -0.24212646484375, -0.172119140625, -0.10211181640625, -0.0321044921875, 0.03790283203125, 0.10791015625, 0.17791748046875, 0.2479248046875, 0.31793212890625, 0.387939453125, 0.45794677734375, 0.5279541015625, 0.59796142578125, 0.66796875, 0.73797607421875, 0.8079833984375, 0.87799072265625, 0.947998046875, 1.01800537109375, 1.0880126953125, 1.15802001953125, 1.22802734375, 1.29803466796875, 1.3680419921875, 1.43804931640625, 1.508056640625, 1.57806396484375, 1.6480712890625, 1.71807861328125, 1.7880859375, 1.85809326171875, 1.9281005859375, 1.99810791015625, 2.068115234375, 2.13812255859375, 2.2081298828125, 2.27813720703125, 2.34814453125, 2.41815185546875, 2.4881591796875, 2.55816650390625, 2.628173828125, 2.69818115234375, 2.7681884765625, 2.83819580078125, 2.908203125]}, "gradients/encoder.encoder.layers.15.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 2.0, 3.0, 1.0, 2.0, 5.0, 5.0, 3.0, 7.0, 3.0, 11.0, 19.0, 28.0, 52.0, 104.0, 224.0, 1195.0, 1769.0, 327.0, 121.0, 71.0, 47.0, 26.0, 15.0, 11.0, 9.0, 4.0, 4.0, 2.0, 4.0, 2.0, 1.0, 1.0, 0.0, 3.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.564453125, -0.54571533203125, -0.5269775390625, -0.50823974609375, -0.489501953125, -0.47076416015625, -0.4520263671875, -0.43328857421875, -0.41455078125, -0.39581298828125, -0.3770751953125, -0.35833740234375, -0.339599609375, -0.32086181640625, -0.3021240234375, -0.28338623046875, -0.2646484375, -0.24591064453125, -0.2271728515625, -0.20843505859375, -0.189697265625, -0.17095947265625, -0.1522216796875, -0.13348388671875, -0.11474609375, -0.09600830078125, -0.0772705078125, -0.05853271484375, -0.039794921875, -0.02105712890625, -0.0023193359375, 0.01641845703125, 0.03515625, 0.05389404296875, 0.0726318359375, 0.09136962890625, 0.110107421875, 0.12884521484375, 0.1475830078125, 0.16632080078125, 0.18505859375, 0.20379638671875, 0.2225341796875, 0.24127197265625, 0.260009765625, 0.27874755859375, 0.2974853515625, 0.31622314453125, 0.3349609375, 0.35369873046875, 0.3724365234375, 0.39117431640625, 0.409912109375, 0.42864990234375, 0.4473876953125, 0.46612548828125, 0.48486328125, 0.50360107421875, 0.5223388671875, 0.54107666015625, 0.559814453125, 0.57855224609375, 0.5972900390625, 0.61602783203125, 0.634765625]}, "gradients/encoder.encoder.layers.15.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 3.0, 16.0, 134.0, 597.0, 234.0, 21.0, 9.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.443396806716919, -2.1563329696655273, -1.8692693710327148, -1.5822056531906128, -1.2951419353485107, -1.0080782175064087, -0.7210144996643066, -0.43395066261291504, -0.14688706398010254, 0.1401766538619995, 0.42724037170410156, 0.7143040895462036, 1.0013678073883057, 1.2884315252304077, 1.5754952430725098, 1.8625590801239014, 2.149622678756714, 2.4366865158081055, 2.723750114440918, 3.0108137130737305, 3.297877550125122, 3.5849413871765137, 3.872004985809326, 4.159069061279297, 4.446132659912109, 4.733196258544922, 5.020259857177734, 5.307323932647705, 5.594387531280518, 5.88145112991333, 6.168515205383301, 6.455578804016113, 6.742642402648926, 7.029706001281738, 7.316769599914551, 7.6038336753845215, 7.890897274017334, 8.177961349487305, 8.465024948120117, 8.75208854675293, 9.039152145385742, 9.326215744018555, 9.613279342651367, 9.90034294128418, 10.187406539916992, 10.474471092224121, 10.761534690856934, 11.048598289489746, 11.335661888122559, 11.622725486755371, 11.909789085388184, 12.196852684020996, 12.483917236328125, 12.770980834960938, 13.05804443359375, 13.345108032226562, 13.632171630859375, 13.919235229492188, 14.206298828125, 14.493362426757812, 14.780426025390625, 15.067490577697754, 15.354554176330566, 15.641617774963379, 15.928681373596191]}, "gradients/encoder.encoder.layers.15.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 2.0, 4.0, 3.0, 0.0, 4.0, 4.0, 5.0, 9.0, 4.0, 5.0, 9.0, 18.0, 16.0, 26.0, 26.0, 31.0, 24.0, 38.0, 37.0, 35.0, 42.0, 55.0, 58.0, 50.0, 36.0, 44.0, 57.0, 45.0, 49.0, 33.0, 33.0, 39.0, 31.0, 24.0, 18.0, 18.0, 13.0, 16.0, 16.0, 9.0, 7.0, 3.0, 7.0, 5.0, 4.0, 1.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0], "bins": [-1.7116096019744873, -1.6607743501663208, -1.6099392175674438, -1.5591039657592773, -1.5082688331604004, -1.4574335813522339, -1.4065983295440674, -1.3557631969451904, -1.304927945137024, -1.2540926933288574, -1.2032575607299805, -1.152422308921814, -1.1015870571136475, -1.0507519245147705, -0.999916672706604, -0.9490814805030823, -0.8982462882995605, -0.8474110960960388, -0.7965759038925171, -0.7457406520843506, -0.6949054598808289, -0.6440702676773071, -0.5932350158691406, -0.5423998236656189, -0.49156463146209717, -0.44072943925857544, -0.3898942172527313, -0.3390589952468872, -0.2882238030433655, -0.23738861083984375, -0.18655338883399963, -0.13571816682815552, -0.08488297462463379, -0.03404776751995087, 0.016787439584732056, 0.06762264668941498, 0.1184578537940979, 0.16929304599761963, 0.22012826800346375, 0.27096349000930786, 0.3217986822128296, 0.3726338744163513, 0.42346909642219543, 0.47430431842803955, 0.5251395106315613, 0.575974702835083, 0.6268099546432495, 0.6776451468467712, 0.728480339050293, 0.7793155312538147, 0.8301507234573364, 0.8809859752655029, 0.9318211674690247, 0.9826563596725464, 1.033491611480713, 1.0843267440795898, 1.1351619958877563, 1.1859972476959229, 1.2368323802947998, 1.2876676321029663, 1.3385028839111328, 1.3893380165100098, 1.4401732683181763, 1.4910085201263428, 1.5418436527252197]}, "gradients/encoder.encoder.layers.15.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 3.0, 4.0, 2.0, 2.0, 5.0, 7.0, 6.0, 13.0, 16.0, 15.0, 30.0, 42.0, 69.0, 84.0, 171.0, 279.0, 542.0, 1181.0, 2723.0, 9471.0, 56675.0, 668246.0, 274137.0, 25540.0, 5519.0, 1905.0, 787.0, 452.0, 233.0, 134.0, 80.0, 66.0, 43.0, 22.0, 6.0, 14.0, 8.0, 10.0, 5.0, 4.0, 5.0, 1.0, 0.0, 3.0, 1.0, 4.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-1.6953125, -1.6402740478515625, -1.585235595703125, -1.5301971435546875, -1.47515869140625, -1.4201202392578125, -1.365081787109375, -1.3100433349609375, -1.2550048828125, -1.1999664306640625, -1.144927978515625, -1.0898895263671875, -1.03485107421875, -0.9798126220703125, -0.924774169921875, -0.8697357177734375, -0.814697265625, -0.7596588134765625, -0.704620361328125, -0.6495819091796875, -0.59454345703125, -0.5395050048828125, -0.484466552734375, -0.4294281005859375, -0.3743896484375, -0.3193511962890625, -0.264312744140625, -0.2092742919921875, -0.15423583984375, -0.0991973876953125, -0.044158935546875, 0.0108795166015625, 0.06591796875, 0.1209564208984375, 0.175994873046875, 0.2310333251953125, 0.28607177734375, 0.3411102294921875, 0.396148681640625, 0.4511871337890625, 0.5062255859375, 0.5612640380859375, 0.616302490234375, 0.6713409423828125, 0.72637939453125, 0.7814178466796875, 0.836456298828125, 0.8914947509765625, 0.946533203125, 1.0015716552734375, 1.056610107421875, 1.1116485595703125, 1.16668701171875, 1.2217254638671875, 1.276763916015625, 1.3318023681640625, 1.3868408203125, 1.4418792724609375, 1.496917724609375, 1.5519561767578125, 1.60699462890625, 1.6620330810546875, 1.717071533203125, 1.7721099853515625, 1.8271484375]}, "gradients/encoder.encoder.layers.15.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 2.0, 0.0, 4.0, 3.0, 5.0, 8.0, 8.0, 17.0, 40.0, 33.0, 68.0, 77.0, 92.0, 108.0, 124.0, 106.0, 77.0, 82.0, 55.0, 39.0, 22.0, 9.0, 11.0, 6.0, 2.0, 4.0, 3.0, 1.0, 0.0, 2.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0], "bins": [-0.64453125, -0.625152587890625, -0.60577392578125, -0.586395263671875, -0.5670166015625, -0.547637939453125, -0.52825927734375, -0.508880615234375, -0.489501953125, -0.470123291015625, -0.45074462890625, -0.431365966796875, -0.4119873046875, -0.392608642578125, -0.37322998046875, -0.353851318359375, -0.33447265625, -0.315093994140625, -0.29571533203125, -0.276336669921875, -0.2569580078125, -0.237579345703125, -0.21820068359375, -0.198822021484375, -0.179443359375, -0.160064697265625, -0.14068603515625, -0.121307373046875, -0.1019287109375, -0.082550048828125, -0.06317138671875, -0.043792724609375, -0.0244140625, -0.005035400390625, 0.01434326171875, 0.033721923828125, 0.0531005859375, 0.072479248046875, 0.09185791015625, 0.111236572265625, 0.130615234375, 0.149993896484375, 0.16937255859375, 0.188751220703125, 0.2081298828125, 0.227508544921875, 0.24688720703125, 0.266265869140625, 0.28564453125, 0.305023193359375, 0.32440185546875, 0.343780517578125, 0.3631591796875, 0.382537841796875, 0.40191650390625, 0.421295166015625, 0.440673828125, 0.460052490234375, 0.47943115234375, 0.498809814453125, 0.5181884765625, 0.537567138671875, 0.55694580078125, 0.576324462890625, 0.595703125]}, "gradients/encoder.encoder.layers.15.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 2.0, 0.0, 4.0, 3.0, 5.0, 4.0, 8.0, 4.0, 10.0, 18.0, 19.0, 42.0, 42.0, 68.0, 67.0, 109.0, 159.0, 253.0, 439.0, 832.0, 2176.0, 7868.0, 42560.0, 386552.0, 533299.0, 59074.0, 9975.0, 2687.0, 974.0, 460.0, 275.0, 183.0, 109.0, 94.0, 51.0, 32.0, 31.0, 22.0, 14.0, 12.0, 10.0, 4.0, 5.0, 1.0, 3.0, 4.0, 2.0, 2.0, 0.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-1.40234375, -1.359954833984375, -1.31756591796875, -1.275177001953125, -1.2327880859375, -1.190399169921875, -1.14801025390625, -1.105621337890625, -1.063232421875, -1.020843505859375, -0.97845458984375, -0.936065673828125, -0.8936767578125, -0.851287841796875, -0.80889892578125, -0.766510009765625, -0.72412109375, -0.681732177734375, -0.63934326171875, -0.596954345703125, -0.5545654296875, -0.512176513671875, -0.46978759765625, -0.427398681640625, -0.385009765625, -0.342620849609375, -0.30023193359375, -0.257843017578125, -0.2154541015625, -0.173065185546875, -0.13067626953125, -0.088287353515625, -0.0458984375, -0.003509521484375, 0.03887939453125, 0.081268310546875, 0.1236572265625, 0.166046142578125, 0.20843505859375, 0.250823974609375, 0.293212890625, 0.335601806640625, 0.37799072265625, 0.420379638671875, 0.4627685546875, 0.505157470703125, 0.54754638671875, 0.589935302734375, 0.63232421875, 0.674713134765625, 0.71710205078125, 0.759490966796875, 0.8018798828125, 0.844268798828125, 0.88665771484375, 0.929046630859375, 0.971435546875, 1.013824462890625, 1.05621337890625, 1.098602294921875, 1.1409912109375, 1.183380126953125, 1.22576904296875, 1.268157958984375, 1.310546875]}, "gradients/encoder.encoder.layers.15.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 2.0, 4.0, 4.0, 0.0, 2.0, 3.0, 7.0, 10.0, 12.0, 11.0, 10.0, 15.0, 24.0, 23.0, 25.0, 37.0, 31.0, 28.0, 41.0, 43.0, 35.0, 65.0, 54.0, 45.0, 56.0, 46.0, 58.0, 48.0, 38.0, 43.0, 37.0, 28.0, 30.0, 16.0, 16.0, 5.0, 12.0, 12.0, 13.0, 10.0, 5.0, 3.0, 4.0, 0.0, 4.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.6572265625, -1.610321044921875, -1.56341552734375, -1.516510009765625, -1.4696044921875, -1.422698974609375, -1.37579345703125, -1.328887939453125, -1.281982421875, -1.235076904296875, -1.18817138671875, -1.141265869140625, -1.0943603515625, -1.047454833984375, -1.00054931640625, -0.953643798828125, -0.90673828125, -0.859832763671875, -0.81292724609375, -0.766021728515625, -0.7191162109375, -0.672210693359375, -0.62530517578125, -0.578399658203125, -0.531494140625, -0.484588623046875, -0.43768310546875, -0.390777587890625, -0.3438720703125, -0.296966552734375, -0.25006103515625, -0.203155517578125, -0.15625, -0.109344482421875, -0.06243896484375, -0.015533447265625, 0.0313720703125, 0.078277587890625, 0.12518310546875, 0.172088623046875, 0.218994140625, 0.265899658203125, 0.31280517578125, 0.359710693359375, 0.4066162109375, 0.453521728515625, 0.50042724609375, 0.547332763671875, 0.59423828125, 0.641143798828125, 0.68804931640625, 0.734954833984375, 0.7818603515625, 0.828765869140625, 0.87567138671875, 0.922576904296875, 0.969482421875, 1.016387939453125, 1.06329345703125, 1.110198974609375, 1.1571044921875, 1.204010009765625, 1.25091552734375, 1.297821044921875, 1.3447265625]}, "gradients/encoder.encoder.layers.15.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 3.0, 1.0, 4.0, 11.0, 30.0, 44.0, 148.0, 708.0, 8281.0, 1005315.0, 32505.0, 1160.0, 260.0, 53.0, 25.0, 7.0, 5.0, 2.0, 3.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.4423828125, -1.40301513671875, -1.3636474609375, -1.32427978515625, -1.284912109375, -1.24554443359375, -1.2061767578125, -1.16680908203125, -1.12744140625, -1.08807373046875, -1.0487060546875, -1.00933837890625, -0.969970703125, -0.93060302734375, -0.8912353515625, -0.85186767578125, -0.8125, -0.77313232421875, -0.7337646484375, -0.69439697265625, -0.655029296875, -0.61566162109375, -0.5762939453125, -0.53692626953125, -0.49755859375, -0.45819091796875, -0.4188232421875, -0.37945556640625, -0.340087890625, -0.30072021484375, -0.2613525390625, -0.22198486328125, -0.1826171875, -0.14324951171875, -0.1038818359375, -0.06451416015625, -0.025146484375, 0.01422119140625, 0.0535888671875, 0.09295654296875, 0.13232421875, 0.17169189453125, 0.2110595703125, 0.25042724609375, 0.289794921875, 0.32916259765625, 0.3685302734375, 0.40789794921875, 0.447265625, 0.48663330078125, 0.5260009765625, 0.56536865234375, 0.604736328125, 0.64410400390625, 0.6834716796875, 0.72283935546875, 0.76220703125, 0.80157470703125, 0.8409423828125, 0.88031005859375, 0.919677734375, 0.95904541015625, 0.9984130859375, 1.03778076171875, 1.0771484375]}, "gradients/encoder.encoder.layers.15.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 2.0, 3.0, 2.0, 2.0, 4.0, 5.0, 12.0, 10.0, 17.0, 23.0, 32.0, 66.0, 94.0, 158.0, 213.0, 145.0, 77.0, 40.0, 32.0, 23.0, 15.0, 11.0, 6.0, 7.0, 4.0, 1.0, 2.0, 4.0, 1.0, 0.0, 2.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00014197826385498047, -0.00013698451220989227, -0.00013199076056480408, -0.00012699700891971588, -0.00012200325727462769, -0.00011700950562953949, -0.0001120157539844513, -0.0001070220023393631, -0.0001020282506942749, -9.70344990491867e-05, -9.204074740409851e-05, -8.704699575901031e-05, -8.205324411392212e-05, -7.705949246883392e-05, -7.206574082374573e-05, -6.707198917865753e-05, -6.207823753356934e-05, -5.708448588848114e-05, -5.2090734243392944e-05, -4.709698259830475e-05, -4.210323095321655e-05, -3.710947930812836e-05, -3.211572766304016e-05, -2.7121976017951965e-05, -2.212822437286377e-05, -1.7134472727775574e-05, -1.2140721082687378e-05, -7.146969437599182e-06, -2.1532177925109863e-06, 2.8405338525772095e-06, 7.834285497665405e-06, 1.2828037142753601e-05, 1.7821788787841797e-05, 2.2815540432929993e-05, 2.780929207801819e-05, 3.2803043723106384e-05, 3.779679536819458e-05, 4.2790547013282776e-05, 4.778429865837097e-05, 5.277805030345917e-05, 5.777180194854736e-05, 6.276555359363556e-05, 6.775930523872375e-05, 7.275305688381195e-05, 7.774680852890015e-05, 8.274056017398834e-05, 8.773431181907654e-05, 9.272806346416473e-05, 9.772181510925293e-05, 0.00010271556675434113, 0.00010770931839942932, 0.00011270307004451752, 0.00011769682168960571, 0.0001226905733346939, 0.0001276843249797821, 0.0001326780766248703, 0.0001376718282699585, 0.0001426655799150467, 0.0001476593315601349, 0.00015265308320522308, 0.00015764683485031128, 0.00016264058649539948, 0.00016763433814048767, 0.00017262808978557587, 0.00017762184143066406]}, "gradients/encoder.encoder.layers.15.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 3.0, 0.0, 2.0, 4.0, 3.0, 7.0, 13.0, 29.0, 52.0, 118.0, 353.0, 1909.0, 307127.0, 735673.0, 2589.0, 424.0, 131.0, 56.0, 37.0, 17.0, 7.0, 4.0, 2.0, 3.0, 2.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.9609375, -0.9192657470703125, -0.877593994140625, -0.8359222412109375, -0.79425048828125, -0.7525787353515625, -0.710906982421875, -0.6692352294921875, -0.6275634765625, -0.5858917236328125, -0.544219970703125, -0.5025482177734375, -0.46087646484375, -0.4192047119140625, -0.377532958984375, -0.3358612060546875, -0.294189453125, -0.2525177001953125, -0.210845947265625, -0.1691741943359375, -0.12750244140625, -0.0858306884765625, -0.044158935546875, -0.0024871826171875, 0.0391845703125, 0.0808563232421875, 0.122528076171875, 0.1641998291015625, 0.20587158203125, 0.2475433349609375, 0.289215087890625, 0.3308868408203125, 0.37255859375, 0.4142303466796875, 0.455902099609375, 0.4975738525390625, 0.53924560546875, 0.5809173583984375, 0.622589111328125, 0.6642608642578125, 0.7059326171875, 0.7476043701171875, 0.789276123046875, 0.8309478759765625, 0.87261962890625, 0.9142913818359375, 0.955963134765625, 0.9976348876953125, 1.039306640625, 1.0809783935546875, 1.122650146484375, 1.1643218994140625, 1.20599365234375, 1.2476654052734375, 1.289337158203125, 1.3310089111328125, 1.3726806640625, 1.4143524169921875, 1.456024169921875, 1.4976959228515625, 1.53936767578125, 1.5810394287109375, 1.622711181640625, 1.6643829345703125, 1.7060546875]}, "gradients/encoder.encoder.layers.15.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 4.0, 3.0, 15.0, 19.0, 35.0, 74.0, 200.0, 299.0, 204.0, 87.0, 41.0, 24.0, 8.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.72021484375, -0.6896591186523438, -0.6591033935546875, -0.6285476684570312, -0.597991943359375, -0.5674362182617188, -0.5368804931640625, -0.5063247680664062, -0.47576904296875, -0.44521331787109375, -0.4146575927734375, -0.38410186767578125, -0.353546142578125, -0.32299041748046875, -0.2924346923828125, -0.26187896728515625, -0.2313232421875, -0.20076751708984375, -0.1702117919921875, -0.13965606689453125, -0.109100341796875, -0.07854461669921875, -0.0479888916015625, -0.01743316650390625, 0.01312255859375, 0.04367828369140625, 0.0742340087890625, 0.10478973388671875, 0.135345458984375, 0.16590118408203125, 0.1964569091796875, 0.22701263427734375, 0.257568359375, 0.28812408447265625, 0.3186798095703125, 0.34923553466796875, 0.379791259765625, 0.41034698486328125, 0.4409027099609375, 0.47145843505859375, 0.50201416015625, 0.5325698852539062, 0.5631256103515625, 0.5936813354492188, 0.624237060546875, 0.6547927856445312, 0.6853485107421875, 0.7159042358398438, 0.7464599609375, 0.7770156860351562, 0.8075714111328125, 0.8381271362304688, 0.868682861328125, 0.8992385864257812, 0.9297943115234375, 0.9603500366210938, 0.99090576171875, 1.0214614868164062, 1.0520172119140625, 1.0825729370117188, 1.113128662109375, 1.1436843872070312, 1.1742401123046875, 1.2047958374023438, 1.2353515625]}, "gradients/encoder.encoder.layers.15.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 3.0, 16.0, 21.0, 42.0, 171.0, 307.0, 257.0, 112.0, 53.0, 18.0, 8.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-12.163463592529297, -11.769636154174805, -11.375809669494629, -10.981982231140137, -10.588154792785645, -10.194328308105469, -9.800500869750977, -9.406673431396484, -9.012845993041992, -8.6190185546875, -8.225192070007324, -7.831364631652832, -7.43753719329834, -7.043710231781006, -6.649883270263672, -6.25605583190918, -5.862229347229004, -5.46840238571167, -5.074574947357178, -4.680747985839844, -4.286920547485352, -3.8930935859680176, -3.4992666244506836, -3.1054394245147705, -2.7116122245788574, -2.3177850246429443, -1.9239579439163208, -1.5301308631896973, -1.1363036632537842, -0.7424764633178711, -0.3486495018005371, 0.04517769813537598, 0.43900489807128906, 0.8328320384025574, 1.2266591787338257, 1.6204862594604492, 2.0143134593963623, 2.4081406593322754, 2.8019676208496094, 3.1957948207855225, 3.5896220207214355, 3.9834492206573486, 4.377276420593262, 4.771103382110596, 5.16493034362793, 5.558757781982422, 5.952584743499756, 6.34641170501709, 6.740239143371582, 7.134066104888916, 7.527893543243408, 7.921720504760742, 8.315547943115234, 8.709375381469727, 9.103201866149902, 9.497029304504395, 9.89085578918457, 10.284683227539062, 10.678509712219238, 11.07233715057373, 11.466164588928223, 11.859991073608398, 12.25381851196289, 12.647645950317383, 13.041473388671875]}, "gradients/encoder.encoder.layers.15.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 4.0, 1.0, 2.0, 6.0, 6.0, 11.0, 10.0, 12.0, 14.0, 14.0, 18.0, 22.0, 21.0, 22.0, 36.0, 23.0, 51.0, 51.0, 58.0, 49.0, 52.0, 50.0, 50.0, 44.0, 51.0, 43.0, 34.0, 49.0, 34.0, 35.0, 24.0, 16.0, 20.0, 16.0, 10.0, 10.0, 12.0, 10.0, 4.0, 11.0, 3.0, 2.0, 5.0, 1.0, 0.0, 2.0], "bins": [-9.796653747558594, -9.548315048217773, -9.299976348876953, -9.05163860321045, -8.803299903869629, -8.554961204528809, -8.306622505187988, -8.058284759521484, -7.809946060180664, -7.561607360839844, -7.313269138336182, -7.064930438995361, -6.816592216491699, -6.568253517150879, -6.319914817810059, -6.0715765953063965, -5.823237895965576, -5.574899196624756, -5.326560974121094, -5.078222274780273, -4.829884052276611, -4.581545352935791, -4.333207130432129, -4.084868431091309, -3.8365299701690674, -3.588191509246826, -3.339853048324585, -3.0915145874023438, -2.8431758880615234, -2.5948376655578613, -2.346498966217041, -2.0981605052948, -1.8498215675354004, -1.6014831066131592, -1.353144645690918, -1.1048060655593872, -0.856467604637146, -0.6081291437149048, -0.359790563583374, -0.11145210266113281, 0.1368863582611084, 0.385224848985672, 0.6335633397102356, 0.8819018602371216, 1.1302403211593628, 1.378578782081604, 1.6269173622131348, 1.875255823135376, 2.123594284057617, 2.3719327449798584, 2.6202712059020996, 2.86860990524292, 3.116948127746582, 3.3652868270874023, 3.6136252880096436, 3.8619637489318848, 4.110301971435547, 4.358640670776367, 4.606978893280029, 4.85531759262085, 5.103655815124512, 5.351994514465332, 5.600333213806152, 5.8486714363098145, 6.097010135650635]}, "gradients/encoder.encoder.layers.14.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 3.0, 0.0, 4.0, 4.0, 0.0, 5.0, 2.0, 5.0, 7.0, 9.0, 12.0, 6.0, 19.0, 17.0, 27.0, 37.0, 41.0, 54.0, 57.0, 100.0, 139.0, 273.0, 392.0, 734.0, 1718.0, 3988.0, 11620.0, 54090.0, 3711002.0, 370534.0, 26397.0, 7184.0, 2753.0, 1236.0, 668.0, 375.0, 197.0, 144.0, 117.0, 69.0, 54.0, 42.0, 34.0, 22.0, 30.0, 17.0, 8.0, 9.0, 7.0, 5.0, 5.0, 7.0, 10.0, 2.0, 1.0, 4.0, 1.0, 1.0, 0.0, 1.0, 2.0, 2.0], "bins": [-1.1982421875, -1.1593475341796875, -1.120452880859375, -1.0815582275390625, -1.04266357421875, -1.0037689208984375, -0.964874267578125, -0.9259796142578125, -0.8870849609375, -0.8481903076171875, -0.809295654296875, -0.7704010009765625, -0.73150634765625, -0.6926116943359375, -0.653717041015625, -0.6148223876953125, -0.575927734375, -0.5370330810546875, -0.498138427734375, -0.4592437744140625, -0.42034912109375, -0.3814544677734375, -0.342559814453125, -0.3036651611328125, -0.2647705078125, -0.2258758544921875, -0.186981201171875, -0.1480865478515625, -0.10919189453125, -0.0702972412109375, -0.031402587890625, 0.0074920654296875, 0.04638671875, 0.0852813720703125, 0.124176025390625, 0.1630706787109375, 0.20196533203125, 0.2408599853515625, 0.279754638671875, 0.3186492919921875, 0.3575439453125, 0.3964385986328125, 0.435333251953125, 0.4742279052734375, 0.51312255859375, 0.5520172119140625, 0.590911865234375, 0.6298065185546875, 0.668701171875, 0.7075958251953125, 0.746490478515625, 0.7853851318359375, 0.82427978515625, 0.8631744384765625, 0.902069091796875, 0.9409637451171875, 0.9798583984375, 1.0187530517578125, 1.057647705078125, 1.0965423583984375, 1.13543701171875, 1.1743316650390625, 1.213226318359375, 1.2521209716796875, 1.291015625]}, "gradients/encoder.encoder.layers.14.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 3.0, 1.0, 3.0, 6.0, 9.0, 11.0, 31.0, 56.0, 60.0, 68.0, 76.0, 117.0, 118.0, 109.0, 119.0, 80.0, 50.0, 32.0, 22.0, 13.0, 10.0, 4.0, 5.0, 3.0, 2.0, 0.0, 0.0, 2.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0], "bins": [-0.7685546875, -0.746856689453125, -0.72515869140625, -0.703460693359375, -0.6817626953125, -0.660064697265625, -0.63836669921875, -0.616668701171875, -0.594970703125, -0.573272705078125, -0.55157470703125, -0.529876708984375, -0.5081787109375, -0.486480712890625, -0.46478271484375, -0.443084716796875, -0.42138671875, -0.399688720703125, -0.37799072265625, -0.356292724609375, -0.3345947265625, -0.312896728515625, -0.29119873046875, -0.269500732421875, -0.247802734375, -0.226104736328125, -0.20440673828125, -0.182708740234375, -0.1610107421875, -0.139312744140625, -0.11761474609375, -0.095916748046875, -0.07421875, -0.052520751953125, -0.03082275390625, -0.009124755859375, 0.0125732421875, 0.034271240234375, 0.05596923828125, 0.077667236328125, 0.099365234375, 0.121063232421875, 0.14276123046875, 0.164459228515625, 0.1861572265625, 0.207855224609375, 0.22955322265625, 0.251251220703125, 0.27294921875, 0.294647216796875, 0.31634521484375, 0.338043212890625, 0.3597412109375, 0.381439208984375, 0.40313720703125, 0.424835205078125, 0.446533203125, 0.468231201171875, 0.48992919921875, 0.511627197265625, 0.5333251953125, 0.555023193359375, 0.57672119140625, 0.598419189453125, 0.6201171875]}, "gradients/encoder.encoder.layers.14.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 3.0, 0.0, 2.0, 0.0, 3.0, 4.0, 13.0, 35.0, 71.0, 120.0, 304.0, 705.0, 2023.0, 13032.0, 3853043.0, 315566.0, 6800.0, 1480.0, 603.0, 254.0, 118.0, 61.0, 30.0, 11.0, 4.0, 1.0, 1.0, 2.0, 2.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-2.359375, -2.289703369140625, -2.22003173828125, -2.150360107421875, -2.0806884765625, -2.011016845703125, -1.94134521484375, -1.871673583984375, -1.802001953125, -1.732330322265625, -1.66265869140625, -1.592987060546875, -1.5233154296875, -1.453643798828125, -1.38397216796875, -1.314300537109375, -1.24462890625, -1.174957275390625, -1.10528564453125, -1.035614013671875, -0.9659423828125, -0.896270751953125, -0.82659912109375, -0.756927490234375, -0.687255859375, -0.617584228515625, -0.54791259765625, -0.478240966796875, -0.4085693359375, -0.338897705078125, -0.26922607421875, -0.199554443359375, -0.1298828125, -0.060211181640625, 0.00946044921875, 0.079132080078125, 0.1488037109375, 0.218475341796875, 0.28814697265625, 0.357818603515625, 0.427490234375, 0.497161865234375, 0.56683349609375, 0.636505126953125, 0.7061767578125, 0.775848388671875, 0.84552001953125, 0.915191650390625, 0.98486328125, 1.054534912109375, 1.12420654296875, 1.193878173828125, 1.2635498046875, 1.333221435546875, 1.40289306640625, 1.472564697265625, 1.542236328125, 1.611907958984375, 1.68157958984375, 1.751251220703125, 1.8209228515625, 1.890594482421875, 1.96026611328125, 2.029937744140625, 2.099609375]}, "gradients/encoder.encoder.layers.14.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 2.0, 1.0, 0.0, 1.0, 1.0, 4.0, 6.0, 6.0, 5.0, 10.0, 17.0, 33.0, 49.0, 167.0, 1073.0, 2208.0, 298.0, 89.0, 45.0, 22.0, 20.0, 9.0, 5.0, 5.0, 5.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.53173828125, -0.5113983154296875, -0.491058349609375, -0.4707183837890625, -0.45037841796875, -0.4300384521484375, -0.409698486328125, -0.3893585205078125, -0.3690185546875, -0.3486785888671875, -0.328338623046875, -0.3079986572265625, -0.28765869140625, -0.2673187255859375, -0.246978759765625, -0.2266387939453125, -0.206298828125, -0.1859588623046875, -0.165618896484375, -0.1452789306640625, -0.12493896484375, -0.1045989990234375, -0.084259033203125, -0.0639190673828125, -0.0435791015625, -0.0232391357421875, -0.002899169921875, 0.0174407958984375, 0.03778076171875, 0.0581207275390625, 0.078460693359375, 0.0988006591796875, 0.119140625, 0.1394805908203125, 0.159820556640625, 0.1801605224609375, 0.20050048828125, 0.2208404541015625, 0.241180419921875, 0.2615203857421875, 0.2818603515625, 0.3022003173828125, 0.322540283203125, 0.3428802490234375, 0.36322021484375, 0.3835601806640625, 0.403900146484375, 0.4242401123046875, 0.444580078125, 0.4649200439453125, 0.485260009765625, 0.5055999755859375, 0.52593994140625, 0.5462799072265625, 0.566619873046875, 0.5869598388671875, 0.6072998046875, 0.6276397705078125, 0.647979736328125, 0.6683197021484375, 0.68865966796875, 0.7089996337890625, 0.729339599609375, 0.7496795654296875, 0.77001953125]}, "gradients/encoder.encoder.layers.14.final_layer_norm.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 1.0, 7.0, 22.0, 96.0, 328.0, 355.0, 148.0, 38.0, 9.0, 4.0, 1.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-3.1776537895202637, -3.0316455364227295, -2.885637044906616, -2.739628791809082, -2.5936203002929688, -2.4476120471954346, -2.3016037940979004, -2.155595302581787, -2.009587049484253, -1.8635786771774292, -1.7175703048706055, -1.5715620517730713, -1.4255536794662476, -1.2795453071594238, -1.1335370540618896, -0.9875286817550659, -0.8415203094482422, -0.6955119371414185, -0.5495036244392395, -0.40349528193473816, -0.2574869394302368, -0.11147856712341309, 0.03452974557876587, 0.18053805828094482, 0.32654643058776855, 0.4725547730922699, 0.6185631155967712, 0.7645714282989502, 0.9105798006057739, 1.0565881729125977, 1.2025964260101318, 1.3486047983169556, 1.4946131706237793, 1.640621542930603, 1.7866299152374268, 1.932638168334961, 2.078646659851074, 2.2246549129486084, 2.3706631660461426, 2.516671657562256, 2.66267991065979, 2.808688163757324, 2.9546966552734375, 3.1007049083709717, 3.246713161468506, 3.392721652984619, 3.5387299060821533, 3.6847381591796875, 3.830746650695801, 3.976754903793335, 4.122763156890869, 4.268771648406982, 4.414780139923096, 4.560788154602051, 4.706796646118164, 4.852805137634277, 4.998813629150391, 5.144822120666504, 5.290830135345459, 5.436838626861572, 5.5828471183776855, 5.728855133056641, 5.874863624572754, 6.020872116088867, 6.166880130767822]}, "gradients/encoder.encoder.layers.14.final_layer_norm.bias": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 7.0, 8.0, 13.0, 23.0, 22.0, 36.0, 56.0, 43.0, 58.0, 80.0, 84.0, 74.0, 74.0, 66.0, 61.0, 70.0, 52.0, 44.0, 47.0, 36.0, 17.0, 17.0, 7.0, 4.0, 5.0, 4.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-1.9207372665405273, -1.852354645729065, -1.783972144126892, -1.7155895233154297, -1.6472070217132568, -1.5788244009017944, -1.510441780090332, -1.4420592784881592, -1.3736766576766968, -1.3052940368652344, -1.2369115352630615, -1.1685289144515991, -1.1001462936401367, -1.0317637920379639, -0.9633811712265015, -0.8949986100196838, -0.8266160488128662, -0.7582334876060486, -0.689850926399231, -0.6214683055877686, -0.5530857443809509, -0.4847031831741333, -0.4163205921649933, -0.34793800115585327, -0.27955543994903564, -0.21117286384105682, -0.142790287733078, -0.07440771162509918, -0.006025135517120361, 0.062357425689697266, 0.13074001669883728, 0.1991226077079773, 0.267505407333374, 0.33588796854019165, 0.40427055954933167, 0.4726531505584717, 0.5410357117652893, 0.6094182729721069, 0.6778008937835693, 0.746183454990387, 0.8145660161972046, 0.8829485774040222, 0.9513311386108398, 1.0197137594223022, 1.0880963802337646, 1.1564788818359375, 1.2248615026474, 1.2932441234588623, 1.3616266250610352, 1.4300092458724976, 1.4983917474746704, 1.5667743682861328, 1.6351568698883057, 1.703539490699768, 1.7719221115112305, 1.8403046131134033, 1.9086872339248657, 1.9770698547363281, 2.045452356338501, 2.113834857940674, 2.182217597961426, 2.2506000995635986, 2.3189826011657715, 2.3873653411865234, 2.4557478427886963]}, "gradients/encoder.encoder.layers.14.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 4.0, 1.0, 0.0, 0.0, 1.0, 2.0, 3.0, 7.0, 7.0, 21.0, 42.0, 132.0, 273.0, 921.0, 4979.0, 184829.0, 840916.0, 14082.0, 1622.0, 440.0, 167.0, 53.0, 36.0, 9.0, 7.0, 2.0, 4.0, 2.0, 5.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-3.63671875, -3.52716064453125, -3.4176025390625, -3.30804443359375, -3.198486328125, -3.08892822265625, -2.9793701171875, -2.86981201171875, -2.76025390625, -2.65069580078125, -2.5411376953125, -2.43157958984375, -2.322021484375, -2.21246337890625, -2.1029052734375, -1.99334716796875, -1.8837890625, -1.77423095703125, -1.6646728515625, -1.55511474609375, -1.445556640625, -1.33599853515625, -1.2264404296875, -1.11688232421875, -1.00732421875, -0.89776611328125, -0.7882080078125, -0.67864990234375, -0.569091796875, -0.45953369140625, -0.3499755859375, -0.24041748046875, -0.130859375, -0.02130126953125, 0.0882568359375, 0.19781494140625, 0.307373046875, 0.41693115234375, 0.5264892578125, 0.63604736328125, 0.74560546875, 0.85516357421875, 0.9647216796875, 1.07427978515625, 1.183837890625, 1.29339599609375, 1.4029541015625, 1.51251220703125, 1.6220703125, 1.73162841796875, 1.8411865234375, 1.95074462890625, 2.060302734375, 2.16986083984375, 2.2794189453125, 2.38897705078125, 2.49853515625, 2.60809326171875, 2.7176513671875, 2.82720947265625, 2.936767578125, 3.04632568359375, 3.1558837890625, 3.26544189453125, 3.375]}, "gradients/encoder.encoder.layers.14.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 3.0, 1.0, 3.0, 1.0, 11.0, 9.0, 25.0, 39.0, 62.0, 62.0, 81.0, 93.0, 125.0, 126.0, 109.0, 93.0, 59.0, 42.0, 25.0, 15.0, 8.0, 7.0, 3.0, 5.0, 2.0, 0.0, 0.0, 2.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0], "bins": [-0.80517578125, -0.782684326171875, -0.76019287109375, -0.737701416015625, -0.7152099609375, -0.692718505859375, -0.67022705078125, -0.647735595703125, -0.625244140625, -0.602752685546875, -0.58026123046875, -0.557769775390625, -0.5352783203125, -0.512786865234375, -0.49029541015625, -0.467803955078125, -0.4453125, -0.422821044921875, -0.40032958984375, -0.377838134765625, -0.3553466796875, -0.332855224609375, -0.31036376953125, -0.287872314453125, -0.265380859375, -0.242889404296875, -0.22039794921875, -0.197906494140625, -0.1754150390625, -0.152923583984375, -0.13043212890625, -0.107940673828125, -0.08544921875, -0.062957763671875, -0.04046630859375, -0.017974853515625, 0.0045166015625, 0.027008056640625, 0.04949951171875, 0.071990966796875, 0.094482421875, 0.116973876953125, 0.13946533203125, 0.161956787109375, 0.1844482421875, 0.206939697265625, 0.22943115234375, 0.251922607421875, 0.2744140625, 0.296905517578125, 0.31939697265625, 0.341888427734375, 0.3643798828125, 0.386871337890625, 0.40936279296875, 0.431854248046875, 0.454345703125, 0.476837158203125, 0.49932861328125, 0.521820068359375, 0.5443115234375, 0.566802978515625, 0.58929443359375, 0.611785888671875, 0.63427734375]}, "gradients/encoder.encoder.layers.14.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 2.0, 3.0, 5.0, 8.0, 12.0, 11.0, 15.0, 20.0, 30.0, 38.0, 59.0, 65.0, 106.0, 136.0, 211.0, 423.0, 1118.0, 3359.0, 13662.0, 87528.0, 663825.0, 238887.0, 29776.0, 6022.0, 1737.0, 666.0, 307.0, 163.0, 108.0, 67.0, 52.0, 42.0, 32.0, 12.0, 16.0, 7.0, 11.0, 9.0, 4.0, 5.0, 1.0, 1.0, 2.0, 2.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.0634765625, -1.023681640625, -0.98388671875, -0.944091796875, -0.904296875, -0.864501953125, -0.82470703125, -0.784912109375, -0.7451171875, -0.705322265625, -0.66552734375, -0.625732421875, -0.5859375, -0.546142578125, -0.50634765625, -0.466552734375, -0.4267578125, -0.386962890625, -0.34716796875, -0.307373046875, -0.267578125, -0.227783203125, -0.18798828125, -0.148193359375, -0.1083984375, -0.068603515625, -0.02880859375, 0.010986328125, 0.05078125, 0.090576171875, 0.13037109375, 0.170166015625, 0.2099609375, 0.249755859375, 0.28955078125, 0.329345703125, 0.369140625, 0.408935546875, 0.44873046875, 0.488525390625, 0.5283203125, 0.568115234375, 0.60791015625, 0.647705078125, 0.6875, 0.727294921875, 0.76708984375, 0.806884765625, 0.8466796875, 0.886474609375, 0.92626953125, 0.966064453125, 1.005859375, 1.045654296875, 1.08544921875, 1.125244140625, 1.1650390625, 1.204833984375, 1.24462890625, 1.284423828125, 1.32421875, 1.364013671875, 1.40380859375, 1.443603515625, 1.4833984375]}, "gradients/encoder.encoder.layers.14.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 4.0, 3.0, 1.0, 2.0, 3.0, 6.0, 5.0, 4.0, 10.0, 10.0, 4.0, 12.0, 17.0, 18.0, 17.0, 26.0, 24.0, 37.0, 30.0, 26.0, 23.0, 33.0, 39.0, 53.0, 39.0, 31.0, 57.0, 31.0, 50.0, 47.0, 39.0, 38.0, 36.0, 30.0, 27.0, 34.0, 18.0, 16.0, 13.0, 17.0, 12.0, 14.0, 11.0, 10.0, 8.0, 4.0, 9.0, 7.0, 1.0, 3.0, 1.0, 3.0, 0.0, 1.0, 3.0, 1.0, 1.0, 0.0, 2.0], "bins": [-1.216796875, -1.178497314453125, -1.14019775390625, -1.101898193359375, -1.0635986328125, -1.025299072265625, -0.98699951171875, -0.948699951171875, -0.910400390625, -0.872100830078125, -0.83380126953125, -0.795501708984375, -0.7572021484375, -0.718902587890625, -0.68060302734375, -0.642303466796875, -0.60400390625, -0.565704345703125, -0.52740478515625, -0.489105224609375, -0.4508056640625, -0.412506103515625, -0.37420654296875, -0.335906982421875, -0.297607421875, -0.259307861328125, -0.22100830078125, -0.182708740234375, -0.1444091796875, -0.106109619140625, -0.06781005859375, -0.029510498046875, 0.0087890625, 0.047088623046875, 0.08538818359375, 0.123687744140625, 0.1619873046875, 0.200286865234375, 0.23858642578125, 0.276885986328125, 0.315185546875, 0.353485107421875, 0.39178466796875, 0.430084228515625, 0.4683837890625, 0.506683349609375, 0.54498291015625, 0.583282470703125, 0.62158203125, 0.659881591796875, 0.69818115234375, 0.736480712890625, 0.7747802734375, 0.813079833984375, 0.85137939453125, 0.889678955078125, 0.927978515625, 0.966278076171875, 1.00457763671875, 1.042877197265625, 1.0811767578125, 1.119476318359375, 1.15777587890625, 1.196075439453125, 1.234375]}, "gradients/encoder.encoder.layers.14.attention.k_proj.weight": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 2.0, 1.0, 2.0, 3.0, 2.0, 8.0, 10.0, 12.0, 15.0, 24.0, 40.0, 86.0, 164.0, 347.0, 947.0, 3354.0, 34046.0, 872948.0, 127775.0, 6474.0, 1352.0, 471.0, 215.0, 91.0, 67.0, 36.0, 25.0, 13.0, 8.0, 7.0, 5.0, 3.0, 3.0, 0.0, 0.0, 5.0, 2.0, 0.0, 0.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.6865234375, -0.6648483276367188, -0.6431732177734375, -0.6214981079101562, -0.599822998046875, -0.5781478881835938, -0.5564727783203125, -0.5347976684570312, -0.51312255859375, -0.49144744873046875, -0.4697723388671875, -0.44809722900390625, -0.426422119140625, -0.40474700927734375, -0.3830718994140625, -0.36139678955078125, -0.3397216796875, -0.31804656982421875, -0.2963714599609375, -0.27469635009765625, -0.253021240234375, -0.23134613037109375, -0.2096710205078125, -0.18799591064453125, -0.16632080078125, -0.14464569091796875, -0.1229705810546875, -0.10129547119140625, -0.079620361328125, -0.05794525146484375, -0.0362701416015625, -0.01459503173828125, 0.007080078125, 0.02875518798828125, 0.0504302978515625, 0.07210540771484375, 0.093780517578125, 0.11545562744140625, 0.1371307373046875, 0.15880584716796875, 0.18048095703125, 0.20215606689453125, 0.2238311767578125, 0.24550628662109375, 0.267181396484375, 0.28885650634765625, 0.3105316162109375, 0.33220672607421875, 0.3538818359375, 0.37555694580078125, 0.3972320556640625, 0.41890716552734375, 0.440582275390625, 0.46225738525390625, 0.4839324951171875, 0.5056076049804688, 0.52728271484375, 0.5489578247070312, 0.5706329345703125, 0.5923080444335938, 0.613983154296875, 0.6356582641601562, 0.6573333740234375, 0.6790084838867188, 0.70068359375]}, "gradients/encoder.encoder.layers.14.attention.k_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 2.0, 1.0, 4.0, 2.0, 4.0, 6.0, 8.0, 8.0, 15.0, 6.0, 4.0, 14.0, 11.0, 17.0, 19.0, 36.0, 56.0, 104.0, 154.0, 195.0, 101.0, 69.0, 35.0, 25.0, 18.0, 20.0, 13.0, 13.0, 9.0, 6.0, 5.0, 7.0, 5.0, 3.0, 6.0, 4.0, 1.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 3.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0001609325408935547, -0.000155741348862648, -0.00015055015683174133, -0.00014535896480083466, -0.00014016777276992798, -0.0001349765807390213, -0.00012978538870811462, -0.00012459419667720795, -0.00011940300464630127, -0.00011421181261539459, -0.00010902062058448792, -0.00010382942855358124, -9.863823652267456e-05, -9.344704449176788e-05, -8.82558524608612e-05, -8.306466042995453e-05, -7.787346839904785e-05, -7.268227636814117e-05, -6.74910843372345e-05, -6.229989230632782e-05, -5.710870027542114e-05, -5.1917508244514465e-05, -4.672631621360779e-05, -4.153512418270111e-05, -3.6343932151794434e-05, -3.1152740120887756e-05, -2.596154808998108e-05, -2.0770356059074402e-05, -1.5579164028167725e-05, -1.0387971997261047e-05, -5.19677996635437e-06, -5.587935447692871e-09, 5.185604095458984e-06, 1.0376796126365662e-05, 1.556798815727234e-05, 2.0759180188179016e-05, 2.5950372219085693e-05, 3.114156424999237e-05, 3.633275628089905e-05, 4.1523948311805725e-05, 4.67151403427124e-05, 5.190633237361908e-05, 5.709752440452576e-05, 6.228871643543243e-05, 6.747990846633911e-05, 7.267110049724579e-05, 7.786229252815247e-05, 8.305348455905914e-05, 8.824467658996582e-05, 9.34358686208725e-05, 9.862706065177917e-05, 0.00010381825268268585, 0.00010900944471359253, 0.0001142006367444992, 0.00011939182877540588, 0.00012458302080631256, 0.00012977421283721924, 0.00013496540486812592, 0.0001401565968990326, 0.00014534778892993927, 0.00015053898096084595, 0.00015573017299175262, 0.0001609213650226593, 0.00016611255705356598, 0.00017130374908447266]}, "gradients/encoder.encoder.layers.14.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 5.0, 5.0, 3.0, 2.0, 2.0, 10.0, 9.0, 17.0, 18.0, 21.0, 35.0, 53.0, 86.0, 236.0, 522.0, 1642.0, 6999.0, 72310.0, 849656.0, 105496.0, 8448.0, 1829.0, 606.0, 250.0, 110.0, 69.0, 25.0, 31.0, 17.0, 14.0, 6.0, 5.0, 6.0, 5.0, 5.0, 2.0, 0.0, 1.0, 2.0, 1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.434326171875, -0.4173316955566406, -0.40033721923828125, -0.3833427429199219, -0.3663482666015625, -0.3493537902832031, -0.33235931396484375, -0.3153648376464844, -0.298370361328125, -0.2813758850097656, -0.26438140869140625, -0.24738693237304688, -0.2303924560546875, -0.21339797973632812, -0.19640350341796875, -0.17940902709960938, -0.16241455078125, -0.14542007446289062, -0.12842559814453125, -0.11143112182617188, -0.0944366455078125, -0.07744216918945312, -0.06044769287109375, -0.043453216552734375, -0.026458740234375, -0.009464263916015625, 0.00753021240234375, 0.024524688720703125, 0.0415191650390625, 0.058513641357421875, 0.07550811767578125, 0.09250259399414062, 0.1094970703125, 0.12649154663085938, 0.14348602294921875, 0.16048049926757812, 0.1774749755859375, 0.19446945190429688, 0.21146392822265625, 0.22845840454101562, 0.245452880859375, 0.2624473571777344, 0.27944183349609375, 0.2964363098144531, 0.3134307861328125, 0.3304252624511719, 0.34741973876953125, 0.3644142150878906, 0.38140869140625, 0.3984031677246094, 0.41539764404296875, 0.4323921203613281, 0.4493865966796875, 0.4663810729980469, 0.48337554931640625, 0.5003700256347656, 0.517364501953125, 0.5343589782714844, 0.5513534545898438, 0.5683479309082031, 0.5853424072265625, 0.6023368835449219, 0.6193313598632812, 0.6363258361816406, 0.6533203125]}, "gradients/encoder.encoder.layers.14.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 5.0, 2.0, 4.0, 5.0, 5.0, 7.0, 10.0, 20.0, 26.0, 31.0, 57.0, 64.0, 94.0, 139.0, 124.0, 121.0, 80.0, 58.0, 35.0, 41.0, 19.0, 15.0, 12.0, 8.0, 10.0, 3.0, 8.0, 3.0, 2.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.355224609375, -0.3400077819824219, -0.32479095458984375, -0.3095741271972656, -0.2943572998046875, -0.2791404724121094, -0.26392364501953125, -0.24870681762695312, -0.233489990234375, -0.21827316284179688, -0.20305633544921875, -0.18783950805664062, -0.1726226806640625, -0.15740585327148438, -0.14218902587890625, -0.12697219848632812, -0.11175537109375, -0.09653854370117188, -0.08132171630859375, -0.06610488891601562, -0.0508880615234375, -0.035671234130859375, -0.02045440673828125, -0.005237579345703125, 0.009979248046875, 0.025196075439453125, 0.04041290283203125, 0.055629730224609375, 0.0708465576171875, 0.08606338500976562, 0.10128021240234375, 0.11649703979492188, 0.1317138671875, 0.14693069458007812, 0.16214752197265625, 0.17736434936523438, 0.1925811767578125, 0.20779800415039062, 0.22301483154296875, 0.23823165893554688, 0.253448486328125, 0.2686653137207031, 0.28388214111328125, 0.2990989685058594, 0.3143157958984375, 0.3295326232910156, 0.34474945068359375, 0.3599662780761719, 0.37518310546875, 0.3903999328613281, 0.40561676025390625, 0.4208335876464844, 0.4360504150390625, 0.4512672424316406, 0.46648406982421875, 0.4817008972167969, 0.496917724609375, 0.5121345520019531, 0.5273513793945312, 0.5425682067871094, 0.5577850341796875, 0.5730018615722656, 0.5882186889648438, 0.6034355163574219, 0.61865234375]}, "gradients/encoder.encoder.layers.14.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 45.0, 533.0, 381.0, 44.0, 7.0, 3.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-42.19060516357422, -41.18220901489258, -40.17381286621094, -39.1654167175293, -38.157020568847656, -37.148624420166016, -36.140228271484375, -35.131832122802734, -34.123435974121094, -33.11503982543945, -32.10664367675781, -31.098247528076172, -30.08985137939453, -29.08145523071289, -28.07305908203125, -27.06466293334961, -26.056264877319336, -25.047868728637695, -24.039472579956055, -23.031076431274414, -22.022680282592773, -21.014284133911133, -20.00588607788086, -18.99748992919922, -17.989093780517578, -16.980697631835938, -15.972301483154297, -14.963905334472656, -13.955509185791016, -12.947113037109375, -11.938715934753418, -10.930319786071777, -9.921924591064453, -8.913528442382812, -7.905132293701172, -6.896735668182373, -5.888339519500732, -4.879943370819092, -3.871546745300293, -2.8631505966186523, -1.8547544479370117, -0.8463581800460815, 0.16203808784484863, 1.1704344749450684, 2.178830623626709, 3.1872267723083496, 4.195623397827148, 5.204019546508789, 6.21241569519043, 7.22081184387207, 8.229207992553711, 9.237604141235352, 10.246000289916992, 11.254396438598633, 12.26279354095459, 13.27118968963623, 14.279585838317871, 15.287981986999512, 16.29637908935547, 17.30477523803711, 18.31317138671875, 19.32156753540039, 20.32996368408203, 21.338359832763672, 22.346755981445312]}, "gradients/encoder.encoder.layers.14.layer_norm.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 3.0, 5.0, 6.0, 6.0, 8.0, 11.0, 8.0, 17.0, 18.0, 13.0, 26.0, 24.0, 35.0, 25.0, 24.0, 32.0, 39.0, 45.0, 36.0, 36.0, 37.0, 43.0, 43.0, 47.0, 41.0, 37.0, 32.0, 40.0, 35.0, 34.0, 31.0, 33.0, 22.0, 13.0, 15.0, 23.0, 12.0, 13.0, 9.0, 9.0, 9.0, 6.0, 7.0, 1.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 2.0, 0.0, 1.0], "bins": [-6.6352081298828125, -6.434624195098877, -6.234039783477783, -6.033455848693848, -5.832871913909912, -5.632287979125977, -5.431703567504883, -5.231119632720947, -5.030535697937012, -4.829951763153076, -4.629367351531982, -4.428783416748047, -4.228199481964111, -4.027615547180176, -3.827031135559082, -3.6264472007751465, -3.4258627891540527, -3.225278615951538, -3.0246946811676025, -2.824110507965088, -2.6235265731811523, -2.4229423999786377, -2.222358226776123, -2.0217742919921875, -1.8211901187896729, -1.6206060647964478, -1.4200220108032227, -1.219437837600708, -1.018853783607483, -0.8182697296142578, -0.6176855564117432, -0.41710150241851807, -0.21651744842529297, -0.015933364629745483, 0.184650719165802, 0.3852348327636719, 0.585818886756897, 0.7864029407501221, 0.9869871139526367, 1.1875711679458618, 1.388155221939087, 1.588739275932312, 1.789323329925537, 1.9899075031280518, 2.1904916763305664, 2.391075611114502, 2.5916597843170166, 2.7922439575195312, 2.992827892303467, 3.1934120655059814, 3.393996000289917, 3.5945801734924316, 3.795164108276367, 3.995748281478882, 4.1963324546813965, 4.396916389465332, 4.597500801086426, 4.798084735870361, 4.998669147491455, 5.199253082275391, 5.399837017059326, 5.600420951843262, 5.8010053634643555, 6.001589298248291, 6.202173233032227]}, "gradients/encoder.encoder.layers.13.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 4.0, 3.0, 4.0, 1.0, 5.0, 3.0, 10.0, 15.0, 32.0, 46.0, 98.0, 134.0, 237.0, 571.0, 1589.0, 6611.0, 54836.0, 4068334.0, 52615.0, 6222.0, 1643.0, 623.0, 279.0, 147.0, 82.0, 51.0, 39.0, 22.0, 11.0, 14.0, 6.0, 5.0, 2.0, 0.0, 1.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.501953125, -3.39739990234375, -3.2928466796875, -3.18829345703125, -3.083740234375, -2.97918701171875, -2.8746337890625, -2.77008056640625, -2.66552734375, -2.56097412109375, -2.4564208984375, -2.35186767578125, -2.247314453125, -2.14276123046875, -2.0382080078125, -1.93365478515625, -1.8291015625, -1.72454833984375, -1.6199951171875, -1.51544189453125, -1.410888671875, -1.30633544921875, -1.2017822265625, -1.09722900390625, -0.99267578125, -0.88812255859375, -0.7835693359375, -0.67901611328125, -0.574462890625, -0.46990966796875, -0.3653564453125, -0.26080322265625, -0.15625, -0.05169677734375, 0.0528564453125, 0.15740966796875, 0.261962890625, 0.36651611328125, 0.4710693359375, 0.57562255859375, 0.68017578125, 0.78472900390625, 0.8892822265625, 0.99383544921875, 1.098388671875, 1.20294189453125, 1.3074951171875, 1.41204833984375, 1.5166015625, 1.62115478515625, 1.7257080078125, 1.83026123046875, 1.934814453125, 2.03936767578125, 2.1439208984375, 2.24847412109375, 2.35302734375, 2.45758056640625, 2.5621337890625, 2.66668701171875, 2.771240234375, 2.87579345703125, 2.9803466796875, 3.08489990234375, 3.189453125]}, "gradients/encoder.encoder.layers.13.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 2.0, 1.0, 0.0, 1.0, 2.0, 6.0, 6.0, 7.0, 20.0, 28.0, 40.0, 64.0, 76.0, 80.0, 118.0, 106.0, 128.0, 104.0, 72.0, 53.0, 34.0, 21.0, 15.0, 10.0, 6.0, 7.0, 3.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0], "bins": [-0.82421875, -0.80096435546875, -0.7777099609375, -0.75445556640625, -0.731201171875, -0.70794677734375, -0.6846923828125, -0.66143798828125, -0.63818359375, -0.61492919921875, -0.5916748046875, -0.56842041015625, -0.545166015625, -0.52191162109375, -0.4986572265625, -0.47540283203125, -0.4521484375, -0.42889404296875, -0.4056396484375, -0.38238525390625, -0.359130859375, -0.33587646484375, -0.3126220703125, -0.28936767578125, -0.26611328125, -0.24285888671875, -0.2196044921875, -0.19635009765625, -0.173095703125, -0.14984130859375, -0.1265869140625, -0.10333251953125, -0.080078125, -0.05682373046875, -0.0335693359375, -0.01031494140625, 0.012939453125, 0.03619384765625, 0.0594482421875, 0.08270263671875, 0.10595703125, 0.12921142578125, 0.1524658203125, 0.17572021484375, 0.198974609375, 0.22222900390625, 0.2454833984375, 0.26873779296875, 0.2919921875, 0.31524658203125, 0.3385009765625, 0.36175537109375, 0.385009765625, 0.40826416015625, 0.4315185546875, 0.45477294921875, 0.47802734375, 0.50128173828125, 0.5245361328125, 0.54779052734375, 0.571044921875, 0.59429931640625, 0.6175537109375, 0.64080810546875, 0.6640625]}, "gradients/encoder.encoder.layers.13.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 4.0, 2.0, 10.0, 13.0, 47.0, 134.0, 259.0, 717.0, 1751.0, 12108.0, 4126033.0, 48600.0, 2957.0, 987.0, 379.0, 177.0, 72.0, 21.0, 11.0, 5.0, 5.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.26953125, -4.12066650390625, -3.9718017578125, -3.82293701171875, -3.674072265625, -3.52520751953125, -3.3763427734375, -3.22747802734375, -3.07861328125, -2.92974853515625, -2.7808837890625, -2.63201904296875, -2.483154296875, -2.33428955078125, -2.1854248046875, -2.03656005859375, -1.8876953125, -1.73883056640625, -1.5899658203125, -1.44110107421875, -1.292236328125, -1.14337158203125, -0.9945068359375, -0.84564208984375, -0.69677734375, -0.54791259765625, -0.3990478515625, -0.25018310546875, -0.101318359375, 0.04754638671875, 0.1964111328125, 0.34527587890625, 0.494140625, 0.64300537109375, 0.7918701171875, 0.94073486328125, 1.089599609375, 1.23846435546875, 1.3873291015625, 1.53619384765625, 1.68505859375, 1.83392333984375, 1.9827880859375, 2.13165283203125, 2.280517578125, 2.42938232421875, 2.5782470703125, 2.72711181640625, 2.8759765625, 3.02484130859375, 3.1737060546875, 3.32257080078125, 3.471435546875, 3.62030029296875, 3.7691650390625, 3.91802978515625, 4.06689453125, 4.21575927734375, 4.3646240234375, 4.51348876953125, 4.662353515625, 4.81121826171875, 4.9600830078125, 5.10894775390625, 5.2578125]}, "gradients/encoder.encoder.layers.13.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 4.0, 2.0, 2.0, 1.0, 7.0, 8.0, 12.0, 25.0, 51.0, 144.0, 2746.0, 904.0, 86.0, 38.0, 21.0, 11.0, 6.0, 6.0, 3.0, 2.0, 2.0, 0.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.240234375, -1.1987457275390625, -1.157257080078125, -1.1157684326171875, -1.07427978515625, -1.0327911376953125, -0.991302490234375, -0.9498138427734375, -0.9083251953125, -0.8668365478515625, -0.825347900390625, -0.7838592529296875, -0.74237060546875, -0.7008819580078125, -0.659393310546875, -0.6179046630859375, -0.576416015625, -0.5349273681640625, -0.493438720703125, -0.4519500732421875, -0.41046142578125, -0.3689727783203125, -0.327484130859375, -0.2859954833984375, -0.2445068359375, -0.2030181884765625, -0.161529541015625, -0.1200408935546875, -0.07855224609375, -0.0370635986328125, 0.004425048828125, 0.0459136962890625, 0.08740234375, 0.1288909912109375, 0.170379638671875, 0.2118682861328125, 0.25335693359375, 0.2948455810546875, 0.336334228515625, 0.3778228759765625, 0.4193115234375, 0.4608001708984375, 0.502288818359375, 0.5437774658203125, 0.58526611328125, 0.6267547607421875, 0.668243408203125, 0.7097320556640625, 0.751220703125, 0.7927093505859375, 0.834197998046875, 0.8756866455078125, 0.91717529296875, 0.9586639404296875, 1.000152587890625, 1.0416412353515625, 1.0831298828125, 1.1246185302734375, 1.166107177734375, 1.2075958251953125, 1.24908447265625, 1.2905731201171875, 1.332061767578125, 1.3735504150390625, 1.4150390625]}, "gradients/encoder.encoder.layers.13.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 3.0, 1.0, 0.0, 2.0, 1.0, 3.0, 4.0, 18.0, 54.0, 178.0, 397.0, 236.0, 72.0, 23.0, 12.0, 6.0, 3.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-8.344240188598633, -8.164571762084961, -7.9849042892456055, -7.805236339569092, -7.625568389892578, -7.4459004402160645, -7.266232490539551, -7.086564540863037, -6.906896591186523, -6.72722864151001, -6.547560691833496, -6.367892742156982, -6.188224792480469, -6.008556842803955, -5.828888893127441, -5.649220943450928, -5.469552993774414, -5.2898850440979, -5.110217094421387, -4.930549144744873, -4.750881195068359, -4.571213245391846, -4.391545295715332, -4.211877346038818, -4.0322089195251465, -3.852540969848633, -3.672873020172119, -3.4932050704956055, -3.313537120819092, -3.133869171142578, -2.9542012214660645, -2.774533271789551, -2.594865322113037, -2.4151973724365234, -2.2355294227600098, -2.055861473083496, -1.8761935234069824, -1.6965255737304688, -1.5168575048446655, -1.3371895551681519, -1.1575216054916382, -0.9778536558151245, -0.7981857061386108, -0.6185176968574524, -0.4388497471809387, -0.25918179750442505, -0.0795137882232666, 0.10015416145324707, 0.27982211112976074, 0.4594900608062744, 0.6391580104827881, 0.8188260197639465, 0.9984939694404602, 1.178161859512329, 1.3578299283981323, 1.537497878074646, 1.7171658277511597, 1.8968337774276733, 2.0765018463134766, 2.2561697959899902, 2.435837745666504, 2.6155056953430176, 2.7951736450195312, 2.974841594696045, 3.1545095443725586]}, "gradients/encoder.encoder.layers.13.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 5.0, 5.0, 3.0, 5.0, 5.0, 11.0, 19.0, 25.0, 37.0, 40.0, 44.0, 53.0, 55.0, 76.0, 85.0, 77.0, 64.0, 70.0, 67.0, 45.0, 60.0, 40.0, 30.0, 25.0, 24.0, 19.0, 3.0, 5.0, 7.0, 5.0, 3.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.5115015506744385, -2.4132091999053955, -2.3149166107177734, -2.2166242599487305, -2.1183319091796875, -2.0200395584106445, -1.9217469692230225, -1.8234546184539795, -1.725162148475647, -1.6268696784973145, -1.5285773277282715, -1.430284857749939, -1.3319923877716064, -1.2337000370025635, -1.135407567024231, -1.0371150970458984, -0.9388227462768555, -0.8405303359031677, -0.74223792552948, -0.6439454555511475, -0.5456530451774597, -0.447360634803772, -0.34906816482543945, -0.2507757544517517, -0.15248334407806396, -0.05419091880321503, 0.04410150647163391, 0.14239394664764404, 0.2406863570213318, 0.33897876739501953, 0.43727123737335205, 0.5355636477470398, 0.6338562965393066, 0.7321487069129944, 0.8304411172866821, 0.9287335872650146, 1.0270259380340576, 1.1253184080123901, 1.2236108779907227, 1.3219032287597656, 1.4201956987380981, 1.5184881687164307, 1.6167805194854736, 1.7150729894638062, 1.8133654594421387, 1.9116578102111816, 2.0099501609802246, 2.1082427501678467, 2.2065351009368896, 2.3048274517059326, 2.4031200408935547, 2.5014123916625977, 2.5997047424316406, 2.6979970932006836, 2.7962896823883057, 2.8945820331573486, 2.9928746223449707, 3.0911669731140137, 3.1894595623016357, 3.2877519130706787, 3.3860442638397217, 3.4843368530273438, 3.5826292037963867, 3.6809215545654297, 3.7792139053344727]}, "gradients/encoder.encoder.layers.13.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 4.0, 2.0, 2.0, 7.0, 8.0, 10.0, 13.0, 35.0, 48.0, 89.0, 229.0, 546.0, 1571.0, 6114.0, 67485.0, 834735.0, 125955.0, 8615.0, 1935.0, 640.0, 257.0, 133.0, 63.0, 24.0, 24.0, 6.0, 7.0, 1.0, 2.0, 1.0, 3.0, 1.0, 4.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.00390625, -2.9117431640625, -2.819580078125, -2.7274169921875, -2.63525390625, -2.5430908203125, -2.450927734375, -2.3587646484375, -2.2666015625, -2.1744384765625, -2.082275390625, -1.9901123046875, -1.89794921875, -1.8057861328125, -1.713623046875, -1.6214599609375, -1.529296875, -1.4371337890625, -1.344970703125, -1.2528076171875, -1.16064453125, -1.0684814453125, -0.976318359375, -0.8841552734375, -0.7919921875, -0.6998291015625, -0.607666015625, -0.5155029296875, -0.42333984375, -0.3311767578125, -0.239013671875, -0.1468505859375, -0.0546875, 0.0374755859375, 0.129638671875, 0.2218017578125, 0.31396484375, 0.4061279296875, 0.498291015625, 0.5904541015625, 0.6826171875, 0.7747802734375, 0.866943359375, 0.9591064453125, 1.05126953125, 1.1434326171875, 1.235595703125, 1.3277587890625, 1.419921875, 1.5120849609375, 1.604248046875, 1.6964111328125, 1.78857421875, 1.8807373046875, 1.972900390625, 2.0650634765625, 2.1572265625, 2.2493896484375, 2.341552734375, 2.4337158203125, 2.52587890625, 2.6180419921875, 2.710205078125, 2.8023681640625, 2.89453125]}, "gradients/encoder.encoder.layers.13.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 5.0, 8.0, 15.0, 26.0, 37.0, 51.0, 78.0, 79.0, 107.0, 121.0, 120.0, 111.0, 63.0, 60.0, 43.0, 32.0, 14.0, 14.0, 9.0, 2.0, 3.0, 4.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.85205078125, -0.8275222778320312, -0.8029937744140625, -0.7784652709960938, -0.753936767578125, -0.7294082641601562, -0.7048797607421875, -0.6803512573242188, -0.65582275390625, -0.6312942504882812, -0.6067657470703125, -0.5822372436523438, -0.557708740234375, -0.5331802368164062, -0.5086517333984375, -0.48412322998046875, -0.4595947265625, -0.43506622314453125, -0.4105377197265625, -0.38600921630859375, -0.361480712890625, -0.33695220947265625, -0.3124237060546875, -0.28789520263671875, -0.26336669921875, -0.23883819580078125, -0.2143096923828125, -0.18978118896484375, -0.165252685546875, -0.14072418212890625, -0.1161956787109375, -0.09166717529296875, -0.067138671875, -0.04261016845703125, -0.0180816650390625, 0.00644683837890625, 0.030975341796875, 0.05550384521484375, 0.0800323486328125, 0.10456085205078125, 0.12908935546875, 0.15361785888671875, 0.1781463623046875, 0.20267486572265625, 0.227203369140625, 0.25173187255859375, 0.2762603759765625, 0.30078887939453125, 0.3253173828125, 0.34984588623046875, 0.3743743896484375, 0.39890289306640625, 0.423431396484375, 0.44795989990234375, 0.4724884033203125, 0.49701690673828125, 0.52154541015625, 0.5460739135742188, 0.5706024169921875, 0.5951309204101562, 0.619659423828125, 0.6441879272460938, 0.6687164306640625, 0.6932449340820312, 0.7177734375]}, "gradients/encoder.encoder.layers.13.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 3.0, 1.0, 2.0, 6.0, 5.0, 5.0, 4.0, 4.0, 12.0, 18.0, 23.0, 29.0, 44.0, 47.0, 74.0, 129.0, 228.0, 473.0, 993.0, 2793.0, 12059.0, 110784.0, 802845.0, 102035.0, 11434.0, 2605.0, 906.0, 379.0, 209.0, 138.0, 83.0, 60.0, 31.0, 29.0, 21.0, 8.0, 14.0, 9.0, 7.0, 6.0, 4.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.7890625, -1.7188720703125, -1.648681640625, -1.5784912109375, -1.50830078125, -1.4381103515625, -1.367919921875, -1.2977294921875, -1.2275390625, -1.1573486328125, -1.087158203125, -1.0169677734375, -0.94677734375, -0.8765869140625, -0.806396484375, -0.7362060546875, -0.666015625, -0.5958251953125, -0.525634765625, -0.4554443359375, -0.38525390625, -0.3150634765625, -0.244873046875, -0.1746826171875, -0.1044921875, -0.0343017578125, 0.035888671875, 0.1060791015625, 0.17626953125, 0.2464599609375, 0.316650390625, 0.3868408203125, 0.45703125, 0.5272216796875, 0.597412109375, 0.6676025390625, 0.73779296875, 0.8079833984375, 0.878173828125, 0.9483642578125, 1.0185546875, 1.0887451171875, 1.158935546875, 1.2291259765625, 1.29931640625, 1.3695068359375, 1.439697265625, 1.5098876953125, 1.580078125, 1.6502685546875, 1.720458984375, 1.7906494140625, 1.86083984375, 1.9310302734375, 2.001220703125, 2.0714111328125, 2.1416015625, 2.2117919921875, 2.281982421875, 2.3521728515625, 2.42236328125, 2.4925537109375, 2.562744140625, 2.6329345703125, 2.703125]}, "gradients/encoder.encoder.layers.13.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0, 4.0, 2.0, 2.0, 1.0, 3.0, 4.0, 4.0, 5.0, 8.0, 12.0, 10.0, 20.0, 16.0, 33.0, 35.0, 32.0, 33.0, 52.0, 59.0, 50.0, 63.0, 75.0, 78.0, 69.0, 38.0, 63.0, 43.0, 42.0, 42.0, 21.0, 12.0, 14.0, 13.0, 13.0, 7.0, 5.0, 3.0, 6.0, 5.0, 3.0, 5.0, 2.0, 2.0, 2.0, 1.0, 2.0], "bins": [-2.9375, -2.86407470703125, -2.7906494140625, -2.71722412109375, -2.643798828125, -2.57037353515625, -2.4969482421875, -2.42352294921875, -2.35009765625, -2.27667236328125, -2.2032470703125, -2.12982177734375, -2.056396484375, -1.98297119140625, -1.9095458984375, -1.83612060546875, -1.7626953125, -1.68927001953125, -1.6158447265625, -1.54241943359375, -1.468994140625, -1.39556884765625, -1.3221435546875, -1.24871826171875, -1.17529296875, -1.10186767578125, -1.0284423828125, -0.95501708984375, -0.881591796875, -0.80816650390625, -0.7347412109375, -0.66131591796875, -0.587890625, -0.51446533203125, -0.4410400390625, -0.36761474609375, -0.294189453125, -0.22076416015625, -0.1473388671875, -0.07391357421875, -0.00048828125, 0.07293701171875, 0.1463623046875, 0.21978759765625, 0.293212890625, 0.36663818359375, 0.4400634765625, 0.51348876953125, 0.5869140625, 0.66033935546875, 0.7337646484375, 0.80718994140625, 0.880615234375, 0.95404052734375, 1.0274658203125, 1.10089111328125, 1.17431640625, 1.24774169921875, 1.3211669921875, 1.39459228515625, 1.468017578125, 1.54144287109375, 1.6148681640625, 1.68829345703125, 1.76171875]}, "gradients/encoder.encoder.layers.13.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 2.0, 1.0, 1.0, 2.0, 3.0, 3.0, 4.0, 2.0, 7.0, 13.0, 18.0, 21.0, 39.0, 57.0, 94.0, 209.0, 392.0, 718.0, 1878.0, 6803.0, 56840.0, 920025.0, 51514.0, 6550.0, 1809.0, 721.0, 343.0, 209.0, 131.0, 64.0, 36.0, 27.0, 11.0, 7.0, 5.0, 6.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.2060546875, -1.1637115478515625, -1.121368408203125, -1.0790252685546875, -1.03668212890625, -0.9943389892578125, -0.951995849609375, -0.9096527099609375, -0.8673095703125, -0.8249664306640625, -0.782623291015625, -0.7402801513671875, -0.69793701171875, -0.6555938720703125, -0.613250732421875, -0.5709075927734375, -0.528564453125, -0.4862213134765625, -0.443878173828125, -0.4015350341796875, -0.35919189453125, -0.3168487548828125, -0.274505615234375, -0.2321624755859375, -0.1898193359375, -0.1474761962890625, -0.105133056640625, -0.0627899169921875, -0.02044677734375, 0.0218963623046875, 0.064239501953125, 0.1065826416015625, 0.14892578125, 0.1912689208984375, 0.233612060546875, 0.2759552001953125, 0.31829833984375, 0.3606414794921875, 0.402984619140625, 0.4453277587890625, 0.4876708984375, 0.5300140380859375, 0.572357177734375, 0.6147003173828125, 0.65704345703125, 0.6993865966796875, 0.741729736328125, 0.7840728759765625, 0.826416015625, 0.8687591552734375, 0.911102294921875, 0.9534454345703125, 0.99578857421875, 1.0381317138671875, 1.080474853515625, 1.1228179931640625, 1.1651611328125, 1.2075042724609375, 1.249847412109375, 1.2921905517578125, 1.33453369140625, 1.3768768310546875, 1.419219970703125, 1.4615631103515625, 1.50390625]}, "gradients/encoder.encoder.layers.13.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 3.0, 2.0, 2.0, 6.0, 4.0, 4.0, 5.0, 8.0, 7.0, 13.0, 12.0, 16.0, 22.0, 39.0, 49.0, 49.0, 68.0, 116.0, 166.0, 116.0, 73.0, 59.0, 37.0, 31.0, 28.0, 19.0, 14.0, 6.0, 15.0, 4.0, 8.0, 3.0, 2.0, 2.0, 1.0, 2.0, 4.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00015270709991455078, -0.00014816038310527802, -0.00014361366629600525, -0.00013906694948673248, -0.00013452023267745972, -0.00012997351586818695, -0.00012542679905891418, -0.00012088008224964142, -0.00011633336544036865, -0.00011178664863109589, -0.00010723993182182312, -0.00010269321501255035, -9.814649820327759e-05, -9.359978139400482e-05, -8.905306458473206e-05, -8.450634777545929e-05, -7.995963096618652e-05, -7.541291415691376e-05, -7.086619734764099e-05, -6.631948053836823e-05, -6.177276372909546e-05, -5.722604691982269e-05, -5.267933011054993e-05, -4.813261330127716e-05, -4.3585896492004395e-05, -3.903917968273163e-05, -3.449246287345886e-05, -2.9945746064186096e-05, -2.539902925491333e-05, -2.0852312445640564e-05, -1.6305595636367798e-05, -1.1758878827095032e-05, -7.212162017822266e-06, -2.6654452085494995e-06, 1.8812716007232666e-06, 6.427988409996033e-06, 1.0974705219268799e-05, 1.5521422028541565e-05, 2.006813883781433e-05, 2.4614855647087097e-05, 2.9161572456359863e-05, 3.370828926563263e-05, 3.8255006074905396e-05, 4.280172288417816e-05, 4.734843969345093e-05, 5.1895156502723694e-05, 5.644187331199646e-05, 6.0988590121269226e-05, 6.553530693054199e-05, 7.008202373981476e-05, 7.462874054908752e-05, 7.917545735836029e-05, 8.372217416763306e-05, 8.826889097690582e-05, 9.281560778617859e-05, 9.736232459545135e-05, 0.00010190904140472412, 0.00010645575821399689, 0.00011100247502326965, 0.00011554919183254242, 0.00012009590864181519, 0.00012464262545108795, 0.00012918934226036072, 0.00013373605906963348, 0.00013828277587890625]}, "gradients/encoder.encoder.layers.13.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 5.0, 7.0, 4.0, 5.0, 4.0, 5.0, 9.0, 8.0, 14.0, 22.0, 36.0, 43.0, 81.0, 160.0, 270.0, 577.0, 1162.0, 4131.0, 23635.0, 477978.0, 509012.0, 24647.0, 4230.0, 1319.0, 546.0, 258.0, 154.0, 94.0, 53.0, 30.0, 16.0, 16.0, 14.0, 4.0, 3.0, 5.0, 6.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0], "bins": [-1.125, -1.092864990234375, -1.06072998046875, -1.028594970703125, -0.9964599609375, -0.964324951171875, -0.93218994140625, -0.900054931640625, -0.867919921875, -0.835784912109375, -0.80364990234375, -0.771514892578125, -0.7393798828125, -0.707244873046875, -0.67510986328125, -0.642974853515625, -0.61083984375, -0.578704833984375, -0.54656982421875, -0.514434814453125, -0.4822998046875, -0.450164794921875, -0.41802978515625, -0.385894775390625, -0.353759765625, -0.321624755859375, -0.28948974609375, -0.257354736328125, -0.2252197265625, -0.193084716796875, -0.16094970703125, -0.128814697265625, -0.0966796875, -0.064544677734375, -0.03240966796875, -0.000274658203125, 0.0318603515625, 0.063995361328125, 0.09613037109375, 0.128265380859375, 0.160400390625, 0.192535400390625, 0.22467041015625, 0.256805419921875, 0.2889404296875, 0.321075439453125, 0.35321044921875, 0.385345458984375, 0.41748046875, 0.449615478515625, 0.48175048828125, 0.513885498046875, 0.5460205078125, 0.578155517578125, 0.61029052734375, 0.642425537109375, 0.674560546875, 0.706695556640625, 0.73883056640625, 0.770965576171875, 0.8031005859375, 0.835235595703125, 0.86737060546875, 0.899505615234375, 0.931640625]}, "gradients/encoder.encoder.layers.13.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 2.0, 4.0, 0.0, 1.0, 3.0, 0.0, 3.0, 5.0, 1.0, 7.0, 9.0, 9.0, 22.0, 17.0, 17.0, 48.0, 53.0, 116.0, 181.0, 184.0, 110.0, 69.0, 39.0, 24.0, 15.0, 23.0, 7.0, 17.0, 4.0, 3.0, 3.0, 4.0, 5.0, 10.0, 1.0, 1.0, 1.0, 0.0, 1.0], "bins": [-1.44140625, -1.4086990356445312, -1.3759918212890625, -1.3432846069335938, -1.310577392578125, -1.2778701782226562, -1.2451629638671875, -1.2124557495117188, -1.17974853515625, -1.1470413208007812, -1.1143341064453125, -1.0816268920898438, -1.048919677734375, -1.0162124633789062, -0.9835052490234375, -0.9507980346679688, -0.9180908203125, -0.8853836059570312, -0.8526763916015625, -0.8199691772460938, -0.787261962890625, -0.7545547485351562, -0.7218475341796875, -0.6891403198242188, -0.65643310546875, -0.6237258911132812, -0.5910186767578125, -0.5583114624023438, -0.525604248046875, -0.49289703369140625, -0.4601898193359375, -0.42748260498046875, -0.394775390625, -0.36206817626953125, -0.3293609619140625, -0.29665374755859375, -0.263946533203125, -0.23123931884765625, -0.1985321044921875, -0.16582489013671875, -0.13311767578125, -0.10041046142578125, -0.0677032470703125, -0.03499603271484375, -0.002288818359375, 0.03041839599609375, 0.0631256103515625, 0.09583282470703125, 0.1285400390625, 0.16124725341796875, 0.1939544677734375, 0.22666168212890625, 0.259368896484375, 0.29207611083984375, 0.3247833251953125, 0.35749053955078125, 0.39019775390625, 0.42290496826171875, 0.4556121826171875, 0.48831939697265625, 0.521026611328125, 0.5537338256835938, 0.5864410400390625, 0.6191482543945312, 0.65185546875]}, "gradients/encoder.encoder.layers.13.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 3.0, 1.0, 0.0, 6.0, 4.0, 8.0, 14.0, 47.0, 85.0, 188.0, 247.0, 204.0, 110.0, 49.0, 28.0, 10.0, 7.0, 2.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-23.90314483642578, -23.355697631835938, -22.808250427246094, -22.260805130004883, -21.71335792541504, -21.165910720825195, -20.618465423583984, -20.07101821899414, -19.523571014404297, -18.976123809814453, -18.42867660522461, -17.8812313079834, -17.333784103393555, -16.78633689880371, -16.2388916015625, -15.691444396972656, -15.143997192382812, -14.596549987792969, -14.049103736877441, -13.501657485961914, -12.95421028137207, -12.406763076782227, -11.8593168258667, -11.311870574951172, -10.764423370361328, -10.216976165771484, -9.669529914855957, -9.12208366394043, -8.574636459350586, -8.027189254760742, -7.479743003845215, -6.932296276092529, -6.384847640991211, -5.837400913238525, -5.28995418548584, -4.742507457733154, -4.195060729980469, -3.647614002227783, -3.1001672744750977, -2.552720546722412, -2.0052738189697266, -1.457827091217041, -0.9103803634643555, -0.3629336357116699, 0.18451309204101562, 0.7319598197937012, 1.2794065475463867, 1.8268532752990723, 2.374300003051758, 2.9217467308044434, 3.469193458557129, 4.0166401863098145, 4.5640869140625, 5.1115336418151855, 5.658980369567871, 6.206427097320557, 6.753873825073242, 7.301320552825928, 7.848767280578613, 8.39621353149414, 8.943660736083984, 9.491107940673828, 10.038554191589355, 10.586000442504883, 11.133447647094727]}, "gradients/encoder.encoder.layers.13.layer_norm.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 5.0, 1.0, 1.0, 4.0, 1.0, 8.0, 3.0, 8.0, 7.0, 5.0, 15.0, 8.0, 16.0, 30.0, 22.0, 27.0, 27.0, 34.0, 22.0, 49.0, 34.0, 42.0, 49.0, 47.0, 54.0, 50.0, 31.0, 35.0, 48.0, 40.0, 47.0, 42.0, 35.0, 20.0, 23.0, 24.0, 15.0, 9.0, 15.0, 13.0, 8.0, 10.0, 13.0, 8.0, 4.0, 2.0, 2.0, 2.0, 0.0, 1.0, 0.0, 2.0, 1.0], "bins": [-13.676092147827148, -13.295989990234375, -12.915887832641602, -12.535785675048828, -12.155682563781738, -11.775580406188965, -11.395478248596191, -11.015376091003418, -10.635273933410645, -10.255171775817871, -9.875069618225098, -9.494966506958008, -9.114864349365234, -8.734762191772461, -8.354660034179688, -7.974557876586914, -7.594455242156982, -7.214353084564209, -6.834250450134277, -6.454148292541504, -6.0740461349487305, -5.693943977355957, -5.313841342926025, -4.933739185333252, -4.55363655090332, -4.173534393310547, -3.7934319972991943, -3.413329601287842, -3.0332274436950684, -2.653125047683716, -2.2730226516723633, -1.8929204940795898, -1.5128183364868164, -1.1327160596847534, -0.7526137232780457, -0.3725113868713379, 0.007590889930725098, 0.3876931667327881, 0.7677955627441406, 1.147897720336914, 1.5280001163482666, 1.9081023931503296, 2.2882046699523926, 2.668307065963745, 3.0484094619750977, 3.428511619567871, 3.8086140155792236, 4.188715934753418, 4.56881856918335, 4.948920726776123, 5.329023361206055, 5.709125518798828, 6.089227676391602, 6.469329833984375, 6.849432468414307, 7.22953462600708, 7.609637260437012, 7.989739418029785, 8.369841575622559, 8.749944686889648, 9.130046844482422, 9.510149002075195, 9.890251159667969, 10.270353317260742, 10.650455474853516]}, "gradients/encoder.encoder.layers.12.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 2.0, 2.0, 2.0, 4.0, 1.0, 5.0, 10.0, 14.0, 13.0, 20.0, 30.0, 51.0, 76.0, 128.0, 181.0, 320.0, 621.0, 1274.0, 2716.0, 7260.0, 27701.0, 346602.0, 3727281.0, 60375.0, 12098.0, 3954.0, 1676.0, 798.0, 423.0, 206.0, 153.0, 91.0, 68.0, 37.0, 18.0, 26.0, 17.0, 12.0, 8.0, 5.0, 4.0, 3.0, 1.0, 0.0, 3.0, 3.0, 0.0, 2.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.193359375, -2.125274658203125, -2.05718994140625, -1.989105224609375, -1.9210205078125, -1.852935791015625, -1.78485107421875, -1.716766357421875, -1.648681640625, -1.580596923828125, -1.51251220703125, -1.444427490234375, -1.3763427734375, -1.308258056640625, -1.24017333984375, -1.172088623046875, -1.10400390625, -1.035919189453125, -0.96783447265625, -0.899749755859375, -0.8316650390625, -0.763580322265625, -0.69549560546875, -0.627410888671875, -0.559326171875, -0.491241455078125, -0.42315673828125, -0.355072021484375, -0.2869873046875, -0.218902587890625, -0.15081787109375, -0.082733154296875, -0.0146484375, 0.053436279296875, 0.12152099609375, 0.189605712890625, 0.2576904296875, 0.325775146484375, 0.39385986328125, 0.461944580078125, 0.530029296875, 0.598114013671875, 0.66619873046875, 0.734283447265625, 0.8023681640625, 0.870452880859375, 0.93853759765625, 1.006622314453125, 1.07470703125, 1.142791748046875, 1.21087646484375, 1.278961181640625, 1.3470458984375, 1.415130615234375, 1.48321533203125, 1.551300048828125, 1.619384765625, 1.687469482421875, 1.75555419921875, 1.823638916015625, 1.8917236328125, 1.959808349609375, 2.02789306640625, 2.095977783203125, 2.1640625]}, "gradients/encoder.encoder.layers.12.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 3.0, 4.0, 2.0, 0.0, 6.0, 10.0, 17.0, 17.0, 25.0, 36.0, 45.0, 59.0, 76.0, 60.0, 93.0, 91.0, 96.0, 84.0, 72.0, 75.0, 33.0, 32.0, 23.0, 11.0, 21.0, 7.0, 5.0, 2.0, 3.0, 2.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.9013671875, -0.8751983642578125, -0.849029541015625, -0.8228607177734375, -0.79669189453125, -0.7705230712890625, -0.744354248046875, -0.7181854248046875, -0.6920166015625, -0.6658477783203125, -0.639678955078125, -0.6135101318359375, -0.58734130859375, -0.5611724853515625, -0.535003662109375, -0.5088348388671875, -0.482666015625, -0.4564971923828125, -0.430328369140625, -0.4041595458984375, -0.37799072265625, -0.3518218994140625, -0.325653076171875, -0.2994842529296875, -0.2733154296875, -0.2471466064453125, -0.220977783203125, -0.1948089599609375, -0.16864013671875, -0.1424713134765625, -0.116302490234375, -0.0901336669921875, -0.06396484375, -0.0377960205078125, -0.011627197265625, 0.0145416259765625, 0.04071044921875, 0.0668792724609375, 0.093048095703125, 0.1192169189453125, 0.1453857421875, 0.1715545654296875, 0.197723388671875, 0.2238922119140625, 0.25006103515625, 0.2762298583984375, 0.302398681640625, 0.3285675048828125, 0.354736328125, 0.3809051513671875, 0.407073974609375, 0.4332427978515625, 0.45941162109375, 0.4855804443359375, 0.511749267578125, 0.5379180908203125, 0.5640869140625, 0.5902557373046875, 0.616424560546875, 0.6425933837890625, 0.66876220703125, 0.6949310302734375, 0.721099853515625, 0.7472686767578125, 0.7734375]}, "gradients/encoder.encoder.layers.12.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 2.0, 2.0, 2.0, 2.0, 4.0, 8.0, 15.0, 19.0, 66.0, 102.0, 217.0, 456.0, 1004.0, 2536.0, 7985.0, 46152.0, 3875644.0, 236231.0, 16817.0, 4229.0, 1513.0, 688.0, 298.0, 152.0, 74.0, 31.0, 22.0, 7.0, 4.0, 5.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.1171875, -2.034942626953125, -1.95269775390625, -1.870452880859375, -1.7882080078125, -1.705963134765625, -1.62371826171875, -1.541473388671875, -1.459228515625, -1.376983642578125, -1.29473876953125, -1.212493896484375, -1.1302490234375, -1.048004150390625, -0.96575927734375, -0.883514404296875, -0.80126953125, -0.719024658203125, -0.63677978515625, -0.554534912109375, -0.4722900390625, -0.390045166015625, -0.30780029296875, -0.225555419921875, -0.143310546875, -0.061065673828125, 0.02117919921875, 0.103424072265625, 0.1856689453125, 0.267913818359375, 0.35015869140625, 0.432403564453125, 0.5146484375, 0.596893310546875, 0.67913818359375, 0.761383056640625, 0.8436279296875, 0.925872802734375, 1.00811767578125, 1.090362548828125, 1.172607421875, 1.254852294921875, 1.33709716796875, 1.419342041015625, 1.5015869140625, 1.583831787109375, 1.66607666015625, 1.748321533203125, 1.83056640625, 1.912811279296875, 1.99505615234375, 2.077301025390625, 2.1595458984375, 2.241790771484375, 2.32403564453125, 2.406280517578125, 2.488525390625, 2.570770263671875, 2.65301513671875, 2.735260009765625, 2.8175048828125, 2.899749755859375, 2.98199462890625, 3.064239501953125, 3.146484375]}, "gradients/encoder.encoder.layers.12.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 1.0, 1.0, 2.0, 3.0, 4.0, 2.0, 5.0, 6.0, 5.0, 5.0, 9.0, 9.0, 18.0, 31.0, 60.0, 103.0, 353.0, 2480.0, 646.0, 162.0, 60.0, 30.0, 23.0, 18.0, 11.0, 6.0, 8.0, 3.0, 5.0, 5.0, 4.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 3.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.197265625, -1.15826416015625, -1.1192626953125, -1.08026123046875, -1.041259765625, -1.00225830078125, -0.9632568359375, -0.92425537109375, -0.88525390625, -0.84625244140625, -0.8072509765625, -0.76824951171875, -0.729248046875, -0.69024658203125, -0.6512451171875, -0.61224365234375, -0.5732421875, -0.53424072265625, -0.4952392578125, -0.45623779296875, -0.417236328125, -0.37823486328125, -0.3392333984375, -0.30023193359375, -0.26123046875, -0.22222900390625, -0.1832275390625, -0.14422607421875, -0.105224609375, -0.06622314453125, -0.0272216796875, 0.01177978515625, 0.05078125, 0.08978271484375, 0.1287841796875, 0.16778564453125, 0.206787109375, 0.24578857421875, 0.2847900390625, 0.32379150390625, 0.36279296875, 0.40179443359375, 0.4407958984375, 0.47979736328125, 0.518798828125, 0.55780029296875, 0.5968017578125, 0.63580322265625, 0.6748046875, 0.71380615234375, 0.7528076171875, 0.79180908203125, 0.830810546875, 0.86981201171875, 0.9088134765625, 0.94781494140625, 0.98681640625, 1.02581787109375, 1.0648193359375, 1.10382080078125, 1.142822265625, 1.18182373046875, 1.2208251953125, 1.25982666015625, 1.298828125]}, "gradients/encoder.encoder.layers.12.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 1.0, 4.0, 14.0, 30.0, 127.0, 367.0, 323.0, 103.0, 33.0, 7.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-8.952731132507324, -8.632033348083496, -8.311336517333984, -7.990638732910156, -7.669940948486328, -7.349243640899658, -7.028546333312988, -6.70784854888916, -6.38715124130249, -6.06645393371582, -5.745756149291992, -5.425058841705322, -5.104361534118652, -4.783663749694824, -4.462966442108154, -4.142269134521484, -3.8215713500976562, -3.5008738040924072, -3.180176258087158, -2.8594789505004883, -2.5387814044952393, -2.2180838584899902, -1.8973864316940308, -1.5766890048980713, -1.2559914588928223, -0.935293972492218, -0.6145964860916138, -0.2938989996910095, 0.026798486709594727, 0.34749603271484375, 0.6681934595108032, 0.9888908863067627, 1.3095874786376953, 1.6302850246429443, 1.9509824514389038, 2.2716798782348633, 2.5923774242401123, 2.9130749702453613, 3.2337722778320312, 3.5544698238372803, 3.8751673698425293, 4.195864677429199, 4.516562461853027, 4.837259769439697, 5.157957077026367, 5.478654861450195, 5.799352169036865, 6.120049476623535, 6.440747261047363, 6.761444568634033, 7.082142353057861, 7.402839660644531, 7.723537445068359, 8.044235229492188, 8.3649320602417, 8.685629844665527, 9.006326675415039, 9.327024459838867, 9.647721290588379, 9.968419075012207, 10.289116859436035, 10.609813690185547, 10.930511474609375, 11.251209259033203, 11.571907043457031]}, "gradients/encoder.encoder.layers.12.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 4.0, 3.0, 3.0, 10.0, 11.0, 8.0, 10.0, 12.0, 13.0, 23.0, 45.0, 28.0, 27.0, 37.0, 40.0, 48.0, 50.0, 52.0, 56.0, 55.0, 55.0, 64.0, 45.0, 54.0, 38.0, 38.0, 37.0, 26.0, 22.0, 22.0, 20.0, 15.0, 10.0, 7.0, 11.0, 4.0, 3.0, 6.0, 3.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-4.089986801147461, -3.980909585952759, -3.8718323707580566, -3.7627551555633545, -3.6536779403686523, -3.54460072517395, -3.435523509979248, -3.326446056365967, -3.2173690795898438, -3.1082918643951416, -2.9992146492004395, -2.8901374340057373, -2.781060218811035, -2.671983003616333, -2.562905788421631, -2.4538283348083496, -2.3447511196136475, -2.2356739044189453, -2.126596689224243, -2.017519474029541, -1.9084422588348389, -1.7993650436401367, -1.690287709236145, -1.5812104940414429, -1.4721332788467407, -1.3630560636520386, -1.2539788484573364, -1.1449015140533447, -1.0358242988586426, -0.9267471432685852, -0.8176698684692383, -0.7085926532745361, -0.599515438079834, -0.49043822288513184, -0.3813609778881073, -0.27228373289108276, -0.16320651769638062, -0.05412930250167847, 0.05494797229766846, 0.1640251874923706, 0.27310240268707275, 0.3821796178817749, 0.49125686287879944, 0.600334107875824, 0.7094113230705261, 0.8184885382652283, 0.9275658130645752, 1.0366430282592773, 1.1457202434539795, 1.2547974586486816, 1.3638746738433838, 1.472951889038086, 1.582029104232788, 1.6911063194274902, 1.800183653831482, 1.909260869026184, 2.018338203430176, 2.127415418624878, 2.23649263381958, 2.3455698490142822, 2.4546470642089844, 2.5637242794036865, 2.6728014945983887, 2.78187894821167, 2.890955924987793]}, "gradients/encoder.encoder.layers.12.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 2.0, 0.0, 5.0, 4.0, 3.0, 2.0, 15.0, 14.0, 15.0, 35.0, 49.0, 79.0, 129.0, 213.0, 399.0, 732.0, 1482.0, 3655.0, 10186.0, 35659.0, 167572.0, 528251.0, 231787.0, 47536.0, 12652.0, 4374.0, 1803.0, 866.0, 427.0, 233.0, 143.0, 90.0, 56.0, 29.0, 24.0, 19.0, 4.0, 5.0, 8.0, 5.0, 3.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-1.7744140625, -1.7216033935546875, -1.668792724609375, -1.6159820556640625, -1.56317138671875, -1.5103607177734375, -1.457550048828125, -1.4047393798828125, -1.3519287109375, -1.2991180419921875, -1.246307373046875, -1.1934967041015625, -1.14068603515625, -1.0878753662109375, -1.035064697265625, -0.9822540283203125, -0.929443359375, -0.8766326904296875, -0.823822021484375, -0.7710113525390625, -0.71820068359375, -0.6653900146484375, -0.612579345703125, -0.5597686767578125, -0.5069580078125, -0.4541473388671875, -0.401336669921875, -0.3485260009765625, -0.29571533203125, -0.2429046630859375, -0.190093994140625, -0.1372833251953125, -0.08447265625, -0.0316619873046875, 0.021148681640625, 0.0739593505859375, 0.12677001953125, 0.1795806884765625, 0.232391357421875, 0.2852020263671875, 0.3380126953125, 0.3908233642578125, 0.443634033203125, 0.4964447021484375, 0.54925537109375, 0.6020660400390625, 0.654876708984375, 0.7076873779296875, 0.760498046875, 0.8133087158203125, 0.866119384765625, 0.9189300537109375, 0.97174072265625, 1.0245513916015625, 1.077362060546875, 1.1301727294921875, 1.1829833984375, 1.2357940673828125, 1.288604736328125, 1.3414154052734375, 1.39422607421875, 1.4470367431640625, 1.499847412109375, 1.5526580810546875, 1.60546875]}, "gradients/encoder.encoder.layers.12.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 4.0, 2.0, 2.0, 5.0, 6.0, 9.0, 16.0, 20.0, 36.0, 42.0, 43.0, 74.0, 68.0, 86.0, 91.0, 82.0, 98.0, 81.0, 64.0, 54.0, 42.0, 28.0, 16.0, 13.0, 11.0, 8.0, 6.0, 3.0, 0.0, 2.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.923828125, -0.897430419921875, -0.87103271484375, -0.844635009765625, -0.8182373046875, -0.791839599609375, -0.76544189453125, -0.739044189453125, -0.712646484375, -0.686248779296875, -0.65985107421875, -0.633453369140625, -0.6070556640625, -0.580657958984375, -0.55426025390625, -0.527862548828125, -0.50146484375, -0.475067138671875, -0.44866943359375, -0.422271728515625, -0.3958740234375, -0.369476318359375, -0.34307861328125, -0.316680908203125, -0.290283203125, -0.263885498046875, -0.23748779296875, -0.211090087890625, -0.1846923828125, -0.158294677734375, -0.13189697265625, -0.105499267578125, -0.0791015625, -0.052703857421875, -0.02630615234375, 9.1552734375e-05, 0.0264892578125, 0.052886962890625, 0.07928466796875, 0.105682373046875, 0.132080078125, 0.158477783203125, 0.18487548828125, 0.211273193359375, 0.2376708984375, 0.264068603515625, 0.29046630859375, 0.316864013671875, 0.34326171875, 0.369659423828125, 0.39605712890625, 0.422454833984375, 0.4488525390625, 0.475250244140625, 0.50164794921875, 0.528045654296875, 0.554443359375, 0.580841064453125, 0.60723876953125, 0.633636474609375, 0.6600341796875, 0.686431884765625, 0.71282958984375, 0.739227294921875, 0.765625]}, "gradients/encoder.encoder.layers.12.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 3.0, 5.0, 1.0, 3.0, 8.0, 8.0, 10.0, 14.0, 15.0, 25.0, 34.0, 49.0, 55.0, 81.0, 123.0, 224.0, 331.0, 641.0, 1681.0, 5089.0, 22673.0, 165278.0, 694291.0, 131247.0, 19077.0, 4585.0, 1403.0, 645.0, 328.0, 189.0, 144.0, 84.0, 58.0, 40.0, 37.0, 18.0, 12.0, 16.0, 13.0, 5.0, 9.0, 7.0, 0.0, 3.0, 3.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-2.376953125, -2.29888916015625, -2.2208251953125, -2.14276123046875, -2.064697265625, -1.98663330078125, -1.9085693359375, -1.83050537109375, -1.75244140625, -1.67437744140625, -1.5963134765625, -1.51824951171875, -1.440185546875, -1.36212158203125, -1.2840576171875, -1.20599365234375, -1.1279296875, -1.04986572265625, -0.9718017578125, -0.89373779296875, -0.815673828125, -0.73760986328125, -0.6595458984375, -0.58148193359375, -0.50341796875, -0.42535400390625, -0.3472900390625, -0.26922607421875, -0.191162109375, -0.11309814453125, -0.0350341796875, 0.04302978515625, 0.12109375, 0.19915771484375, 0.2772216796875, 0.35528564453125, 0.433349609375, 0.51141357421875, 0.5894775390625, 0.66754150390625, 0.74560546875, 0.82366943359375, 0.9017333984375, 0.97979736328125, 1.057861328125, 1.13592529296875, 1.2139892578125, 1.29205322265625, 1.3701171875, 1.44818115234375, 1.5262451171875, 1.60430908203125, 1.682373046875, 1.76043701171875, 1.8385009765625, 1.91656494140625, 1.99462890625, 2.07269287109375, 2.1507568359375, 2.22882080078125, 2.306884765625, 2.38494873046875, 2.4630126953125, 2.54107666015625, 2.619140625]}, "gradients/encoder.encoder.layers.12.attention.v_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 4.0, 0.0, 3.0, 0.0, 3.0, 3.0, 5.0, 3.0, 4.0, 7.0, 6.0, 12.0, 8.0, 10.0, 22.0, 18.0, 17.0, 21.0, 20.0, 36.0, 45.0, 43.0, 39.0, 39.0, 48.0, 43.0, 38.0, 42.0, 56.0, 45.0, 40.0, 54.0, 35.0, 31.0, 35.0, 32.0, 24.0, 27.0, 18.0, 9.0, 11.0, 12.0, 11.0, 8.0, 6.0, 4.0, 5.0, 4.0, 5.0, 3.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-2.583984375, -2.495147705078125, -2.40631103515625, -2.317474365234375, -2.2286376953125, -2.139801025390625, -2.05096435546875, -1.962127685546875, -1.873291015625, -1.784454345703125, -1.69561767578125, -1.606781005859375, -1.5179443359375, -1.429107666015625, -1.34027099609375, -1.251434326171875, -1.16259765625, -1.073760986328125, -0.98492431640625, -0.896087646484375, -0.8072509765625, -0.718414306640625, -0.62957763671875, -0.540740966796875, -0.451904296875, -0.363067626953125, -0.27423095703125, -0.185394287109375, -0.0965576171875, -0.007720947265625, 0.08111572265625, 0.169952392578125, 0.2587890625, 0.347625732421875, 0.43646240234375, 0.525299072265625, 0.6141357421875, 0.702972412109375, 0.79180908203125, 0.880645751953125, 0.969482421875, 1.058319091796875, 1.14715576171875, 1.235992431640625, 1.3248291015625, 1.413665771484375, 1.50250244140625, 1.591339111328125, 1.68017578125, 1.769012451171875, 1.85784912109375, 1.946685791015625, 2.0355224609375, 2.124359130859375, 2.21319580078125, 2.302032470703125, 2.390869140625, 2.479705810546875, 2.56854248046875, 2.657379150390625, 2.7462158203125, 2.835052490234375, 2.92388916015625, 3.012725830078125, 3.1015625]}, "gradients/encoder.encoder.layers.12.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 4.0, 2.0, 5.0, 9.0, 9.0, 26.0, 23.0, 37.0, 51.0, 118.0, 254.0, 542.0, 1566.0, 7437.0, 111285.0, 873031.0, 47441.0, 4741.0, 1101.0, 422.0, 220.0, 109.0, 55.0, 34.0, 17.0, 9.0, 8.0, 3.0, 5.0, 1.0, 4.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.720703125, -1.6691436767578125, -1.617584228515625, -1.5660247802734375, -1.51446533203125, -1.4629058837890625, -1.411346435546875, -1.3597869873046875, -1.3082275390625, -1.2566680908203125, -1.205108642578125, -1.1535491943359375, -1.10198974609375, -1.0504302978515625, -0.998870849609375, -0.9473114013671875, -0.895751953125, -0.8441925048828125, -0.792633056640625, -0.7410736083984375, -0.68951416015625, -0.6379547119140625, -0.586395263671875, -0.5348358154296875, -0.4832763671875, -0.4317169189453125, -0.380157470703125, -0.3285980224609375, -0.27703857421875, -0.2254791259765625, -0.173919677734375, -0.1223602294921875, -0.07080078125, -0.0192413330078125, 0.032318115234375, 0.0838775634765625, 0.13543701171875, 0.1869964599609375, 0.238555908203125, 0.2901153564453125, 0.3416748046875, 0.3932342529296875, 0.444793701171875, 0.4963531494140625, 0.54791259765625, 0.5994720458984375, 0.651031494140625, 0.7025909423828125, 0.754150390625, 0.8057098388671875, 0.857269287109375, 0.9088287353515625, 0.96038818359375, 1.0119476318359375, 1.063507080078125, 1.1150665283203125, 1.1666259765625, 1.2181854248046875, 1.269744873046875, 1.3213043212890625, 1.37286376953125, 1.4244232177734375, 1.475982666015625, 1.5275421142578125, 1.5791015625]}, "gradients/encoder.encoder.layers.12.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 2.0, 2.0, 2.0, 1.0, 1.0, 3.0, 4.0, 7.0, 6.0, 9.0, 24.0, 23.0, 33.0, 62.0, 66.0, 99.0, 99.0, 114.0, 100.0, 99.0, 70.0, 50.0, 30.0, 34.0, 20.0, 19.0, 10.0, 6.0, 4.0, 7.0, 2.0, 1.0, 2.0, 2.0, 1.0, 0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0001323223114013672, -0.00012771226465702057, -0.00012310221791267395, -0.00011849217116832733, -0.00011388212442398071, -0.0001092720776796341, -0.00010466203093528748, -0.00010005198419094086, -9.544193744659424e-05, -9.083189070224762e-05, -8.6221843957901e-05, -8.161179721355438e-05, -7.700175046920776e-05, -7.239170372486115e-05, -6.778165698051453e-05, -6.317161023616791e-05, -5.856156349182129e-05, -5.395151674747467e-05, -4.934147000312805e-05, -4.473142325878143e-05, -4.0121376514434814e-05, -3.5511329770088196e-05, -3.090128302574158e-05, -2.629123628139496e-05, -2.168118953704834e-05, -1.707114279270172e-05, -1.2461096048355103e-05, -7.851049304008484e-06, -3.2410025596618652e-06, 1.3690441846847534e-06, 5.979090929031372e-06, 1.058913767337799e-05, 1.519918441772461e-05, 1.9809231162071228e-05, 2.4419277906417847e-05, 2.9029324650764465e-05, 3.3639371395111084e-05, 3.82494181394577e-05, 4.285946488380432e-05, 4.746951162815094e-05, 5.207955837249756e-05, 5.668960511684418e-05, 6.12996518611908e-05, 6.590969860553741e-05, 7.051974534988403e-05, 7.512979209423065e-05, 7.973983883857727e-05, 8.434988558292389e-05, 8.895993232727051e-05, 9.356997907161713e-05, 9.818002581596375e-05, 0.00010279007256031036, 0.00010740011930465698, 0.0001120101660490036, 0.00011662021279335022, 0.00012123025953769684, 0.00012584030628204346, 0.00013045035302639008, 0.0001350603997707367, 0.0001396704465150833, 0.00014428049325942993, 0.00014889054000377655, 0.00015350058674812317, 0.0001581106334924698, 0.0001627206802368164]}, "gradients/encoder.encoder.layers.12.attention.q_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 3.0, 1.0, 1.0, 2.0, 1.0, 6.0, 5.0, 3.0, 10.0, 15.0, 26.0, 43.0, 77.0, 134.0, 260.0, 549.0, 1323.0, 4823.0, 32107.0, 525813.0, 448288.0, 28377.0, 4471.0, 1151.0, 529.0, 238.0, 130.0, 75.0, 35.0, 24.0, 12.0, 13.0, 8.0, 4.0, 4.0, 2.0, 2.0, 2.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-1.1611328125, -1.1223907470703125, -1.083648681640625, -1.0449066162109375, -1.00616455078125, -0.9674224853515625, -0.928680419921875, -0.8899383544921875, -0.8511962890625, -0.8124542236328125, -0.773712158203125, -0.7349700927734375, -0.69622802734375, -0.6574859619140625, -0.618743896484375, -0.5800018310546875, -0.541259765625, -0.5025177001953125, -0.463775634765625, -0.4250335693359375, -0.38629150390625, -0.3475494384765625, -0.308807373046875, -0.2700653076171875, -0.2313232421875, -0.1925811767578125, -0.153839111328125, -0.1150970458984375, -0.07635498046875, -0.0376129150390625, 0.001129150390625, 0.0398712158203125, 0.07861328125, 0.1173553466796875, 0.156097412109375, 0.1948394775390625, 0.23358154296875, 0.2723236083984375, 0.311065673828125, 0.3498077392578125, 0.3885498046875, 0.4272918701171875, 0.466033935546875, 0.5047760009765625, 0.54351806640625, 0.5822601318359375, 0.621002197265625, 0.6597442626953125, 0.698486328125, 0.7372283935546875, 0.775970458984375, 0.8147125244140625, 0.85345458984375, 0.8921966552734375, 0.930938720703125, 0.9696807861328125, 1.0084228515625, 1.0471649169921875, 1.085906982421875, 1.1246490478515625, 1.16339111328125, 1.2021331787109375, 1.240875244140625, 1.2796173095703125, 1.318359375]}, "gradients/encoder.encoder.layers.12.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 2.0, 5.0, 2.0, 3.0, 2.0, 7.0, 11.0, 10.0, 8.0, 17.0, 26.0, 31.0, 41.0, 59.0, 88.0, 119.0, 137.0, 134.0, 91.0, 55.0, 39.0, 30.0, 20.0, 14.0, 16.0, 11.0, 11.0, 9.0, 5.0, 3.0, 5.0, 2.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.125, -1.0867919921875, -1.048583984375, -1.0103759765625, -0.97216796875, -0.9339599609375, -0.895751953125, -0.8575439453125, -0.8193359375, -0.7811279296875, -0.742919921875, -0.7047119140625, -0.66650390625, -0.6282958984375, -0.590087890625, -0.5518798828125, -0.513671875, -0.4754638671875, -0.437255859375, -0.3990478515625, -0.36083984375, -0.3226318359375, -0.284423828125, -0.2462158203125, -0.2080078125, -0.1697998046875, -0.131591796875, -0.0933837890625, -0.05517578125, -0.0169677734375, 0.021240234375, 0.0594482421875, 0.09765625, 0.1358642578125, 0.174072265625, 0.2122802734375, 0.25048828125, 0.2886962890625, 0.326904296875, 0.3651123046875, 0.4033203125, 0.4415283203125, 0.479736328125, 0.5179443359375, 0.55615234375, 0.5943603515625, 0.632568359375, 0.6707763671875, 0.708984375, 0.7471923828125, 0.785400390625, 0.8236083984375, 0.86181640625, 0.9000244140625, 0.938232421875, 0.9764404296875, 1.0146484375, 1.0528564453125, 1.091064453125, 1.1292724609375, 1.16748046875, 1.2056884765625, 1.243896484375, 1.2821044921875, 1.3203125]}, "gradients/encoder.encoder.layers.12.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 4.0, 2.0, 0.0, 0.0, 1.0, 2.0, 4.0, 11.0, 30.0, 75.0, 225.0, 329.0, 197.0, 77.0, 28.0, 18.0, 6.0, 3.0, 3.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-17.165355682373047, -16.234865188598633, -15.304374694824219, -14.373884201049805, -13.44339370727539, -12.512903213500977, -11.582412719726562, -10.651922225952148, -9.721431732177734, -8.79094123840332, -7.860450744628906, -6.929960250854492, -5.999469757080078, -5.068978786468506, -4.138488292694092, -3.2079977989196777, -2.2775068283081055, -1.3470163345336914, -0.4165257215499878, 0.5139648914337158, 1.4444553852081299, 2.374946117401123, 3.305436611175537, 4.235927104949951, 5.166417598724365, 6.096908092498779, 7.027398586273193, 7.957889556884766, 8.88838005065918, 9.818870544433594, 10.749361038208008, 11.679851531982422, 12.610342025756836, 13.54083251953125, 14.471323013305664, 15.401813507080078, 16.332304000854492, 17.262794494628906, 18.19328498840332, 19.123775482177734, 20.05426597595215, 20.984756469726562, 21.915246963500977, 22.84573745727539, 23.776227951049805, 24.70671844482422, 25.637208938598633, 26.567699432373047, 27.498191833496094, 28.428682327270508, 29.359172821044922, 30.289663314819336, 31.22015380859375, 32.1506462097168, 33.08113479614258, 34.011627197265625, 34.942115783691406, 35.87260818481445, 36.803096771240234, 37.73358917236328, 38.66407775878906, 39.59457015991211, 40.52505874633789, 41.45555114746094, 42.38603973388672]}, "gradients/encoder.encoder.layers.12.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 2.0, 1.0, 5.0, 2.0, 2.0, 4.0, 3.0, 12.0, 7.0, 7.0, 12.0, 13.0, 18.0, 18.0, 25.0, 29.0, 34.0, 36.0, 47.0, 41.0, 51.0, 58.0, 66.0, 61.0, 64.0, 61.0, 52.0, 44.0, 46.0, 37.0, 30.0, 29.0, 21.0, 17.0, 11.0, 10.0, 8.0, 5.0, 8.0, 3.0, 4.0, 5.0, 2.0, 3.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0], "bins": [-22.446754455566406, -21.841707229614258, -21.23666000366211, -20.631614685058594, -20.026567459106445, -19.421520233154297, -18.81647491455078, -18.211427688598633, -17.606380462646484, -17.001333236694336, -16.396286010742188, -15.791240692138672, -15.186193466186523, -14.581146240234375, -13.976099967956543, -13.371053695678711, -12.766006469726562, -12.160959243774414, -11.555912971496582, -10.95086669921875, -10.345819473266602, -9.740772247314453, -9.135725975036621, -8.530679702758789, -7.925632476806641, -7.32058572769165, -6.71553897857666, -6.11049222946167, -5.50544548034668, -4.9003987312316895, -4.295351982116699, -3.690305233001709, -3.0852584838867188, -2.4802117347717285, -1.8751649856567383, -1.270118236541748, -0.6650714874267578, -0.06002473831176758, 0.5450220108032227, 1.150068759918213, 1.7551155090332031, 2.3601622581481934, 2.9652090072631836, 3.570255756378174, 4.175302505493164, 4.780349254608154, 5.3853960037231445, 5.990442752838135, 6.595489501953125, 7.200536251068115, 7.8055830001831055, 8.410629272460938, 9.015676498413086, 9.620723724365234, 10.225769996643066, 10.830816268920898, 11.435863494873047, 12.040910720825195, 12.645956993103027, 13.25100326538086, 13.856050491333008, 14.461097717285156, 15.066143989562988, 15.67119026184082, 16.27623748779297]}, "gradients/encoder.encoder.layers.11.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 3.0, 5.0, 3.0, 6.0, 2.0, 10.0, 15.0, 17.0, 31.0, 56.0, 89.0, 117.0, 189.0, 327.0, 546.0, 997.0, 1905.0, 4284.0, 11626.0, 48438.0, 3806806.0, 277488.0, 26864.0, 7877.0, 3093.0, 1566.0, 782.0, 441.0, 257.0, 174.0, 86.0, 59.0, 43.0, 21.0, 16.0, 13.0, 13.0, 7.0, 4.0, 8.0, 3.0, 2.0, 3.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-1.6259765625, -1.57489013671875, -1.5238037109375, -1.47271728515625, -1.421630859375, -1.37054443359375, -1.3194580078125, -1.26837158203125, -1.21728515625, -1.16619873046875, -1.1151123046875, -1.06402587890625, -1.012939453125, -0.96185302734375, -0.9107666015625, -0.85968017578125, -0.80859375, -0.75750732421875, -0.7064208984375, -0.65533447265625, -0.604248046875, -0.55316162109375, -0.5020751953125, -0.45098876953125, -0.39990234375, -0.34881591796875, -0.2977294921875, -0.24664306640625, -0.195556640625, -0.14447021484375, -0.0933837890625, -0.04229736328125, 0.0087890625, 0.05987548828125, 0.1109619140625, 0.16204833984375, 0.213134765625, 0.26422119140625, 0.3153076171875, 0.36639404296875, 0.41748046875, 0.46856689453125, 0.5196533203125, 0.57073974609375, 0.621826171875, 0.67291259765625, 0.7239990234375, 0.77508544921875, 0.826171875, 0.87725830078125, 0.9283447265625, 0.97943115234375, 1.030517578125, 1.08160400390625, 1.1326904296875, 1.18377685546875, 1.23486328125, 1.28594970703125, 1.3370361328125, 1.38812255859375, 1.439208984375, 1.49029541015625, 1.5413818359375, 1.59246826171875, 1.6435546875]}, "gradients/encoder.encoder.layers.11.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 4.0, 1.0, 2.0, 4.0, 4.0, 4.0, 4.0, 3.0, 8.0, 6.0, 12.0, 20.0, 27.0, 31.0, 33.0, 41.0, 71.0, 75.0, 69.0, 87.0, 81.0, 89.0, 63.0, 67.0, 50.0, 36.0, 35.0, 18.0, 12.0, 17.0, 9.0, 9.0, 7.0, 6.0, 3.0, 6.0, 3.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.033203125, -1.003662109375, -0.97412109375, -0.944580078125, -0.9150390625, -0.885498046875, -0.85595703125, -0.826416015625, -0.796875, -0.767333984375, -0.73779296875, -0.708251953125, -0.6787109375, -0.649169921875, -0.61962890625, -0.590087890625, -0.560546875, -0.531005859375, -0.50146484375, -0.471923828125, -0.4423828125, -0.412841796875, -0.38330078125, -0.353759765625, -0.32421875, -0.294677734375, -0.26513671875, -0.235595703125, -0.2060546875, -0.176513671875, -0.14697265625, -0.117431640625, -0.087890625, -0.058349609375, -0.02880859375, 0.000732421875, 0.0302734375, 0.059814453125, 0.08935546875, 0.118896484375, 0.1484375, 0.177978515625, 0.20751953125, 0.237060546875, 0.2666015625, 0.296142578125, 0.32568359375, 0.355224609375, 0.384765625, 0.414306640625, 0.44384765625, 0.473388671875, 0.5029296875, 0.532470703125, 0.56201171875, 0.591552734375, 0.62109375, 0.650634765625, 0.68017578125, 0.709716796875, 0.7392578125, 0.768798828125, 0.79833984375, 0.827880859375, 0.857421875]}, "gradients/encoder.encoder.layers.11.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 2.0, 1.0, 2.0, 6.0, 7.0, 6.0, 7.0, 9.0, 15.0, 17.0, 35.0, 50.0, 85.0, 110.0, 147.0, 183.0, 299.0, 496.0, 766.0, 1299.0, 2426.0, 5072.0, 13918.0, 82174.0, 3978668.0, 83305.0, 13977.0, 5292.0, 2389.0, 1260.0, 793.0, 497.0, 301.0, 217.0, 134.0, 83.0, 67.0, 57.0, 45.0, 30.0, 22.0, 12.0, 6.0, 6.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-1.7265625, -1.665985107421875, -1.60540771484375, -1.544830322265625, -1.4842529296875, -1.423675537109375, -1.36309814453125, -1.302520751953125, -1.241943359375, -1.181365966796875, -1.12078857421875, -1.060211181640625, -0.9996337890625, -0.939056396484375, -0.87847900390625, -0.817901611328125, -0.75732421875, -0.696746826171875, -0.63616943359375, -0.575592041015625, -0.5150146484375, -0.454437255859375, -0.39385986328125, -0.333282470703125, -0.272705078125, -0.212127685546875, -0.15155029296875, -0.090972900390625, -0.0303955078125, 0.030181884765625, 0.09075927734375, 0.151336669921875, 0.2119140625, 0.272491455078125, 0.33306884765625, 0.393646240234375, 0.4542236328125, 0.514801025390625, 0.57537841796875, 0.635955810546875, 0.696533203125, 0.757110595703125, 0.81768798828125, 0.878265380859375, 0.9388427734375, 0.999420166015625, 1.05999755859375, 1.120574951171875, 1.18115234375, 1.241729736328125, 1.30230712890625, 1.362884521484375, 1.4234619140625, 1.484039306640625, 1.54461669921875, 1.605194091796875, 1.665771484375, 1.726348876953125, 1.78692626953125, 1.847503662109375, 1.9080810546875, 1.968658447265625, 2.02923583984375, 2.089813232421875, 2.150390625]}, "gradients/encoder.encoder.layers.11.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 3.0, 2.0, 0.0, 2.0, 6.0, 9.0, 8.0, 23.0, 28.0, 94.0, 689.0, 3024.0, 105.0, 26.0, 19.0, 16.0, 6.0, 3.0, 4.0, 6.0, 1.0, 0.0, 3.0, 2.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.64697265625, -0.62615966796875, -0.6053466796875, -0.58453369140625, -0.563720703125, -0.54290771484375, -0.5220947265625, -0.50128173828125, -0.48046875, -0.45965576171875, -0.4388427734375, -0.41802978515625, -0.397216796875, -0.37640380859375, -0.3555908203125, -0.33477783203125, -0.31396484375, -0.29315185546875, -0.2723388671875, -0.25152587890625, -0.230712890625, -0.20989990234375, -0.1890869140625, -0.16827392578125, -0.1474609375, -0.12664794921875, -0.1058349609375, -0.08502197265625, -0.064208984375, -0.04339599609375, -0.0225830078125, -0.00177001953125, 0.01904296875, 0.03985595703125, 0.0606689453125, 0.08148193359375, 0.102294921875, 0.12310791015625, 0.1439208984375, 0.16473388671875, 0.185546875, 0.20635986328125, 0.2271728515625, 0.24798583984375, 0.268798828125, 0.28961181640625, 0.3104248046875, 0.33123779296875, 0.35205078125, 0.37286376953125, 0.3936767578125, 0.41448974609375, 0.435302734375, 0.45611572265625, 0.4769287109375, 0.49774169921875, 0.5185546875, 0.53936767578125, 0.5601806640625, 0.58099365234375, 0.601806640625, 0.62261962890625, 0.6434326171875, 0.66424560546875, 0.68505859375]}, "gradients/encoder.encoder.layers.11.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 7.0, 15.0, 50.0, 146.0, 265.0, 271.0, 149.0, 66.0, 27.0, 9.0, 3.0, 5.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-2.5270354747772217, -2.422767400741577, -2.3184990882873535, -2.214231014251709, -2.1099629402160645, -2.00569486618042, -1.9014266729354858, -1.7971584796905518, -1.6928904056549072, -1.5886223316192627, -1.4843541383743286, -1.3800859451293945, -1.27581787109375, -1.1715497970581055, -1.0672816038131714, -0.9630134701728821, -0.8587453365325928, -0.7544772028923035, -0.6502090692520142, -0.5459409356117249, -0.44167280197143555, -0.33740466833114624, -0.23313653469085693, -0.12886840105056763, -0.02460026741027832, 0.07966786623001099, 0.1839359998703003, 0.2882041335105896, 0.3924722671508789, 0.4967404007911682, 0.6010085344314575, 0.7052766680717468, 0.809544563293457, 0.9138126969337463, 1.0180808305740356, 1.1223490238189697, 1.2266170978546143, 1.3308851718902588, 1.4351533651351929, 1.539421558380127, 1.6436896324157715, 1.747957706451416, 1.85222589969635, 1.9564940929412842, 2.0607621669769287, 2.1650302410125732, 2.269298553466797, 2.3735666275024414, 2.477834701538086, 2.5821027755737305, 2.686370849609375, 2.7906391620635986, 2.894907236099243, 2.9991753101348877, 3.1034436225891113, 3.207711696624756, 3.3119797706604004, 3.416247844696045, 3.5205159187316895, 3.624784231185913, 3.7290523052215576, 3.833320379257202, 3.937588691711426, 4.04185676574707, 4.146124839782715]}, "gradients/encoder.encoder.layers.11.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 2.0, 0.0, 2.0, 2.0, 3.0, 4.0, 5.0, 4.0, 1.0, 16.0, 16.0, 10.0, 15.0, 19.0, 29.0, 22.0, 26.0, 39.0, 27.0, 49.0, 42.0, 38.0, 55.0, 56.0, 55.0, 57.0, 50.0, 40.0, 43.0, 52.0, 18.0, 30.0, 30.0, 29.0, 25.0, 21.0, 16.0, 15.0, 18.0, 7.0, 4.0, 4.0, 2.0, 3.0, 4.0, 2.0, 2.0, 4.0, 2.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0], "bins": [-1.1421548128128052, -1.1085011959075928, -1.0748475790023804, -1.041193962097168, -1.0075403451919556, -0.9738867282867432, -0.9402331709861755, -0.9065795540809631, -0.8729259371757507, -0.8392723202705383, -0.8056187033653259, -0.7719650864601135, -0.7383115291595459, -0.7046579122543335, -0.6710042953491211, -0.6373506784439087, -0.6036970615386963, -0.5700434446334839, -0.5363898277282715, -0.5027362108230591, -0.46908262372016907, -0.43542900681495667, -0.40177541971206665, -0.36812180280685425, -0.33446818590164185, -0.30081456899642944, -0.26716095209121704, -0.23350736498832703, -0.19985374808311462, -0.16620013117790222, -0.132546529173851, -0.0988929271697998, -0.06523919105529785, -0.031585581600666046, 0.0020680278539657593, 0.035721637308597565, 0.06937524676322937, 0.10302886366844177, 0.13668246567249298, 0.1703360676765442, 0.2039896845817566, 0.237643301486969, 0.2712969183921814, 0.3049505054950714, 0.3386041224002838, 0.3722577393054962, 0.40591132640838623, 0.43956494331359863, 0.47321856021881104, 0.5068721771240234, 0.5405257940292358, 0.5741794109344482, 0.6078330278396606, 0.641486644744873, 0.6751402020454407, 0.7087938189506531, 0.7424474358558655, 0.7761010527610779, 0.8097546696662903, 0.8434082865715027, 0.8770618438720703, 0.9107154607772827, 0.9443690776824951, 0.9780226945877075, 1.01167631149292]}, "gradients/encoder.encoder.layers.11.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 0.0, 1.0, 2.0, 1.0, 4.0, 3.0, 3.0, 8.0, 9.0, 14.0, 22.0, 30.0, 52.0, 94.0, 179.0, 359.0, 758.0, 1902.0, 4993.0, 14944.0, 61381.0, 304661.0, 492006.0, 126946.0, 27020.0, 8089.0, 2873.0, 1139.0, 541.0, 237.0, 119.0, 53.0, 42.0, 27.0, 17.0, 10.0, 13.0, 8.0, 5.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.08984375, -2.024993896484375, -1.96014404296875, -1.895294189453125, -1.8304443359375, -1.765594482421875, -1.70074462890625, -1.635894775390625, -1.571044921875, -1.506195068359375, -1.44134521484375, -1.376495361328125, -1.3116455078125, -1.246795654296875, -1.18194580078125, -1.117095947265625, -1.05224609375, -0.987396240234375, -0.92254638671875, -0.857696533203125, -0.7928466796875, -0.727996826171875, -0.66314697265625, -0.598297119140625, -0.533447265625, -0.468597412109375, -0.40374755859375, -0.338897705078125, -0.2740478515625, -0.209197998046875, -0.14434814453125, -0.079498291015625, -0.0146484375, 0.050201416015625, 0.11505126953125, 0.179901123046875, 0.2447509765625, 0.309600830078125, 0.37445068359375, 0.439300537109375, 0.504150390625, 0.569000244140625, 0.63385009765625, 0.698699951171875, 0.7635498046875, 0.828399658203125, 0.89324951171875, 0.958099365234375, 1.02294921875, 1.087799072265625, 1.15264892578125, 1.217498779296875, 1.2823486328125, 1.347198486328125, 1.41204833984375, 1.476898193359375, 1.541748046875, 1.606597900390625, 1.67144775390625, 1.736297607421875, 1.8011474609375, 1.865997314453125, 1.93084716796875, 1.995697021484375, 2.060546875]}, "gradients/encoder.encoder.layers.11.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 3.0, 1.0, 2.0, 8.0, 2.0, 1.0, 5.0, 11.0, 9.0, 15.0, 17.0, 20.0, 18.0, 51.0, 34.0, 63.0, 71.0, 77.0, 78.0, 82.0, 71.0, 88.0, 60.0, 55.0, 47.0, 31.0, 25.0, 8.0, 11.0, 8.0, 13.0, 11.0, 7.0, 7.0, 3.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.044921875, -1.015228271484375, -0.98553466796875, -0.955841064453125, -0.9261474609375, -0.896453857421875, -0.86676025390625, -0.837066650390625, -0.807373046875, -0.777679443359375, -0.74798583984375, -0.718292236328125, -0.6885986328125, -0.658905029296875, -0.62921142578125, -0.599517822265625, -0.56982421875, -0.540130615234375, -0.51043701171875, -0.480743408203125, -0.4510498046875, -0.421356201171875, -0.39166259765625, -0.361968994140625, -0.332275390625, -0.302581787109375, -0.27288818359375, -0.243194580078125, -0.2135009765625, -0.183807373046875, -0.15411376953125, -0.124420166015625, -0.0947265625, -0.065032958984375, -0.03533935546875, -0.005645751953125, 0.0240478515625, 0.053741455078125, 0.08343505859375, 0.113128662109375, 0.142822265625, 0.172515869140625, 0.20220947265625, 0.231903076171875, 0.2615966796875, 0.291290283203125, 0.32098388671875, 0.350677490234375, 0.38037109375, 0.410064697265625, 0.43975830078125, 0.469451904296875, 0.4991455078125, 0.528839111328125, 0.55853271484375, 0.588226318359375, 0.617919921875, 0.647613525390625, 0.67730712890625, 0.707000732421875, 0.7366943359375, 0.766387939453125, 0.79608154296875, 0.825775146484375, 0.85546875]}, "gradients/encoder.encoder.layers.11.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 1.0, 3.0, 6.0, 7.0, 4.0, 8.0, 11.0, 19.0, 20.0, 30.0, 47.0, 94.0, 128.0, 223.0, 348.0, 837.0, 2147.0, 7782.0, 52471.0, 691443.0, 264166.0, 21616.0, 4379.0, 1419.0, 601.0, 290.0, 186.0, 85.0, 54.0, 39.0, 25.0, 22.0, 12.0, 14.0, 9.0, 6.0, 6.0, 5.0, 1.0, 3.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.38671875, -3.26910400390625, -3.1514892578125, -3.03387451171875, -2.916259765625, -2.79864501953125, -2.6810302734375, -2.56341552734375, -2.44580078125, -2.32818603515625, -2.2105712890625, -2.09295654296875, -1.975341796875, -1.85772705078125, -1.7401123046875, -1.62249755859375, -1.5048828125, -1.38726806640625, -1.2696533203125, -1.15203857421875, -1.034423828125, -0.91680908203125, -0.7991943359375, -0.68157958984375, -0.56396484375, -0.44635009765625, -0.3287353515625, -0.21112060546875, -0.093505859375, 0.02410888671875, 0.1417236328125, 0.25933837890625, 0.376953125, 0.49456787109375, 0.6121826171875, 0.72979736328125, 0.847412109375, 0.96502685546875, 1.0826416015625, 1.20025634765625, 1.31787109375, 1.43548583984375, 1.5531005859375, 1.67071533203125, 1.788330078125, 1.90594482421875, 2.0235595703125, 2.14117431640625, 2.2587890625, 2.37640380859375, 2.4940185546875, 2.61163330078125, 2.729248046875, 2.84686279296875, 2.9644775390625, 3.08209228515625, 3.19970703125, 3.31732177734375, 3.4349365234375, 3.55255126953125, 3.670166015625, 3.78778076171875, 3.9053955078125, 4.02301025390625, 4.140625]}, "gradients/encoder.encoder.layers.11.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 2.0, 1.0, 1.0, 4.0, 4.0, 2.0, 4.0, 7.0, 8.0, 6.0, 5.0, 14.0, 16.0, 18.0, 26.0, 25.0, 39.0, 46.0, 34.0, 53.0, 54.0, 56.0, 51.0, 63.0, 61.0, 62.0, 52.0, 56.0, 47.0, 41.0, 16.0, 28.0, 18.0, 21.0, 16.0, 10.0, 7.0, 11.0, 6.0, 4.0, 6.0, 4.0, 4.0, 3.0, 0.0, 2.0, 1.0, 0.0, 0.0, 2.0], "bins": [-5.44921875, -5.3037109375, -5.158203125, -5.0126953125, -4.8671875, -4.7216796875, -4.576171875, -4.4306640625, -4.28515625, -4.1396484375, -3.994140625, -3.8486328125, -3.703125, -3.5576171875, -3.412109375, -3.2666015625, -3.12109375, -2.9755859375, -2.830078125, -2.6845703125, -2.5390625, -2.3935546875, -2.248046875, -2.1025390625, -1.95703125, -1.8115234375, -1.666015625, -1.5205078125, -1.375, -1.2294921875, -1.083984375, -0.9384765625, -0.79296875, -0.6474609375, -0.501953125, -0.3564453125, -0.2109375, -0.0654296875, 0.080078125, 0.2255859375, 0.37109375, 0.5166015625, 0.662109375, 0.8076171875, 0.953125, 1.0986328125, 1.244140625, 1.3896484375, 1.53515625, 1.6806640625, 1.826171875, 1.9716796875, 2.1171875, 2.2626953125, 2.408203125, 2.5537109375, 2.69921875, 2.8447265625, 2.990234375, 3.1357421875, 3.28125, 3.4267578125, 3.572265625, 3.7177734375, 3.86328125]}, "gradients/encoder.encoder.layers.11.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 1.0, 4.0, 5.0, 8.0, 27.0, 34.0, 69.0, 220.0, 999.0, 18740.0, 1016897.0, 10406.0, 841.0, 196.0, 58.0, 30.0, 14.0, 8.0, 4.0, 2.0, 2.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.4453125, -5.2916259765625, -5.137939453125, -4.9842529296875, -4.83056640625, -4.6768798828125, -4.523193359375, -4.3695068359375, -4.2158203125, -4.0621337890625, -3.908447265625, -3.7547607421875, -3.60107421875, -3.4473876953125, -3.293701171875, -3.1400146484375, -2.986328125, -2.8326416015625, -2.678955078125, -2.5252685546875, -2.37158203125, -2.2178955078125, -2.064208984375, -1.9105224609375, -1.7568359375, -1.6031494140625, -1.449462890625, -1.2957763671875, -1.14208984375, -0.9884033203125, -0.834716796875, -0.6810302734375, -0.52734375, -0.3736572265625, -0.219970703125, -0.0662841796875, 0.08740234375, 0.2410888671875, 0.394775390625, 0.5484619140625, 0.7021484375, 0.8558349609375, 1.009521484375, 1.1632080078125, 1.31689453125, 1.4705810546875, 1.624267578125, 1.7779541015625, 1.931640625, 2.0853271484375, 2.239013671875, 2.3927001953125, 2.54638671875, 2.7000732421875, 2.853759765625, 3.0074462890625, 3.1611328125, 3.3148193359375, 3.468505859375, 3.6221923828125, 3.77587890625, 3.9295654296875, 4.083251953125, 4.2369384765625, 4.390625]}, "gradients/encoder.encoder.layers.11.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 3.0, 3.0, 2.0, 7.0, 11.0, 16.0, 48.0, 72.0, 136.0, 174.0, 193.0, 138.0, 89.0, 51.0, 36.0, 20.0, 8.0, 4.0, 4.0, 1.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00032901763916015625, -0.00032103434205055237, -0.0003130510449409485, -0.0003050677478313446, -0.0002970844507217407, -0.00028910115361213684, -0.00028111785650253296, -0.0002731345593929291, -0.0002651512622833252, -0.0002571679651737213, -0.00024918466806411743, -0.00024120137095451355, -0.00023321807384490967, -0.00022523477673530579, -0.0002172514796257019, -0.00020926818251609802, -0.00020128488540649414, -0.00019330158829689026, -0.00018531829118728638, -0.0001773349940776825, -0.0001693516969680786, -0.00016136839985847473, -0.00015338510274887085, -0.00014540180563926697, -0.00013741850852966309, -0.0001294352114200592, -0.00012145191431045532, -0.00011346861720085144, -0.00010548532009124756, -9.750202298164368e-05, -8.95187258720398e-05, -8.153542876243591e-05, -7.355213165283203e-05, -6.556883454322815e-05, -5.758553743362427e-05, -4.9602240324020386e-05, -4.1618943214416504e-05, -3.363564610481262e-05, -2.565234899520874e-05, -1.766905188560486e-05, -9.685754776000977e-06, -1.7024576663970947e-06, 6.280839443206787e-06, 1.4264136552810669e-05, 2.224743366241455e-05, 3.0230730772018433e-05, 3.8214027881622314e-05, 4.6197324991226196e-05, 5.418062210083008e-05, 6.216391921043396e-05, 7.014721632003784e-05, 7.813051342964172e-05, 8.61138105392456e-05, 9.409710764884949e-05, 0.00010208040475845337, 0.00011006370186805725, 0.00011804699897766113, 0.00012603029608726501, 0.0001340135931968689, 0.00014199689030647278, 0.00014998018741607666, 0.00015796348452568054, 0.00016594678163528442, 0.0001739300787448883, 0.0001819133758544922]}, "gradients/encoder.encoder.layers.11.attention.q_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 2.0, 2.0, 3.0, 2.0, 2.0, 3.0, 8.0, 10.0, 15.0, 13.0, 19.0, 24.0, 29.0, 54.0, 61.0, 81.0, 145.0, 293.0, 607.0, 1470.0, 4677.0, 21265.0, 188244.0, 723907.0, 89279.0, 12911.0, 3159.0, 1094.0, 485.0, 245.0, 143.0, 96.0, 56.0, 42.0, 28.0, 18.0, 12.0, 19.0, 13.0, 11.0, 3.0, 3.0, 2.0, 2.0, 2.0, 1.0, 3.0, 1.0, 1.0, 2.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-1.123046875, -1.0883331298828125, -1.053619384765625, -1.0189056396484375, -0.98419189453125, -0.9494781494140625, -0.914764404296875, -0.8800506591796875, -0.8453369140625, -0.8106231689453125, -0.775909423828125, -0.7411956787109375, -0.70648193359375, -0.6717681884765625, -0.637054443359375, -0.6023406982421875, -0.567626953125, -0.5329132080078125, -0.498199462890625, -0.4634857177734375, -0.42877197265625, -0.3940582275390625, -0.359344482421875, -0.3246307373046875, -0.2899169921875, -0.2552032470703125, -0.220489501953125, -0.1857757568359375, -0.15106201171875, -0.1163482666015625, -0.081634521484375, -0.0469207763671875, -0.01220703125, 0.0225067138671875, 0.057220458984375, 0.0919342041015625, 0.12664794921875, 0.1613616943359375, 0.196075439453125, 0.2307891845703125, 0.2655029296875, 0.3002166748046875, 0.334930419921875, 0.3696441650390625, 0.40435791015625, 0.4390716552734375, 0.473785400390625, 0.5084991455078125, 0.543212890625, 0.5779266357421875, 0.612640380859375, 0.6473541259765625, 0.68206787109375, 0.7167816162109375, 0.751495361328125, 0.7862091064453125, 0.8209228515625, 0.8556365966796875, 0.890350341796875, 0.9250640869140625, 0.95977783203125, 0.9944915771484375, 1.029205322265625, 1.0639190673828125, 1.0986328125]}, "gradients/encoder.encoder.layers.11.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 2.0, 0.0, 4.0, 2.0, 2.0, 3.0, 8.0, 2.0, 2.0, 12.0, 11.0, 15.0, 17.0, 27.0, 22.0, 25.0, 40.0, 84.0, 94.0, 109.0, 127.0, 106.0, 76.0, 58.0, 37.0, 24.0, 20.0, 26.0, 12.0, 9.0, 8.0, 3.0, 3.0, 5.0, 4.0, 3.0, 3.0, 1.0, 2.0, 3.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-1.6123046875, -1.5688934326171875, -1.525482177734375, -1.4820709228515625, -1.43865966796875, -1.3952484130859375, -1.351837158203125, -1.3084259033203125, -1.2650146484375, -1.2216033935546875, -1.178192138671875, -1.1347808837890625, -1.09136962890625, -1.0479583740234375, -1.004547119140625, -0.9611358642578125, -0.917724609375, -0.8743133544921875, -0.830902099609375, -0.7874908447265625, -0.74407958984375, -0.7006683349609375, -0.657257080078125, -0.6138458251953125, -0.5704345703125, -0.5270233154296875, -0.483612060546875, -0.4402008056640625, -0.39678955078125, -0.3533782958984375, -0.309967041015625, -0.2665557861328125, -0.22314453125, -0.1797332763671875, -0.136322021484375, -0.0929107666015625, -0.04949951171875, -0.0060882568359375, 0.037322998046875, 0.0807342529296875, 0.1241455078125, 0.1675567626953125, 0.210968017578125, 0.2543792724609375, 0.29779052734375, 0.3412017822265625, 0.384613037109375, 0.4280242919921875, 0.471435546875, 0.5148468017578125, 0.558258056640625, 0.6016693115234375, 0.64508056640625, 0.6884918212890625, 0.731903076171875, 0.7753143310546875, 0.8187255859375, 0.8621368408203125, 0.905548095703125, 0.9489593505859375, 0.99237060546875, 1.0357818603515625, 1.079193115234375, 1.1226043701171875, 1.166015625]}, "gradients/encoder.encoder.layers.11.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 3.0, 7.0, 4.0, 9.0, 20.0, 41.0, 55.0, 100.0, 187.0, 192.0, 161.0, 92.0, 55.0, 34.0, 19.0, 10.0, 15.0, 3.0, 2.0, 3.0, 1.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-22.407970428466797, -21.664745330810547, -20.921520233154297, -20.178295135498047, -19.435070037841797, -18.69184684753418, -17.94862174987793, -17.20539665222168, -16.46217155456543, -15.71894645690918, -14.97572135925293, -14.232497215270996, -13.489272117614746, -12.746047019958496, -12.002822875976562, -11.259597778320312, -10.516372680664062, -9.773147583007812, -9.029922485351562, -8.286698341369629, -7.543473243713379, -6.800248146057129, -6.057023525238037, -5.313798904418945, -4.570573806762695, -3.8273489475250244, -3.0841240882873535, -2.3408992290496826, -1.5976743698120117, -0.8544495105743408, -0.11122465133666992, 0.6319999694824219, 1.3752250671386719, 2.1184499263763428, 2.8616747856140137, 3.6048996448516846, 4.3481245040893555, 5.0913496017456055, 5.834574222564697, 6.577798843383789, 7.321023941040039, 8.064249038696289, 8.807474136352539, 9.550698280334473, 10.293923377990723, 11.037148475646973, 11.780372619628906, 12.523597717285156, 13.266822814941406, 14.010047912597656, 14.753273010253906, 15.49649715423584, 16.239723205566406, 16.982946395874023, 17.726171493530273, 18.469396591186523, 19.212621688842773, 19.955846786499023, 20.699071884155273, 21.442296981811523, 22.18552017211914, 22.92874526977539, 23.67197036743164, 24.41519546508789, 25.15842056274414]}, "gradients/encoder.encoder.layers.11.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 4.0, 2.0, 1.0, 3.0, 2.0, 5.0, 5.0, 5.0, 4.0, 9.0, 8.0, 13.0, 12.0, 15.0, 16.0, 26.0, 26.0, 28.0, 32.0, 50.0, 50.0, 54.0, 47.0, 56.0, 60.0, 61.0, 57.0, 46.0, 50.0, 48.0, 31.0, 28.0, 27.0, 17.0, 20.0, 17.0, 17.0, 13.0, 16.0, 10.0, 3.0, 5.0, 2.0, 2.0, 3.0, 3.0, 2.0, 3.0, 2.0, 2.0, 0.0, 0.0, 0.0, 2.0], "bins": [-22.595840454101562, -21.9388484954834, -21.2818546295166, -20.624862670898438, -19.96786880493164, -19.310876846313477, -18.653884887695312, -17.996891021728516, -17.33989906311035, -16.682907104492188, -16.02591323852539, -15.368921279907227, -14.711928367614746, -14.054935455322266, -13.397942543029785, -12.740949630737305, -12.083956718444824, -11.426963806152344, -10.769970893859863, -10.112977981567383, -9.455986022949219, -8.798993110656738, -8.142000198364258, -7.4850077629089355, -6.828014850616455, -6.171021938323975, -5.514029502868652, -4.857036590576172, -4.200043678283691, -3.543051242828369, -2.8860583305358887, -2.2290658950805664, -1.572072982788086, -0.9150802493095398, -0.25808751583099365, 0.39890527725219727, 1.0558979511260986, 1.712890625, 2.3698835372924805, 3.0268759727478027, 3.683868885040283, 4.340861797332764, 4.997854232788086, 5.654847145080566, 6.311840057373047, 6.968832492828369, 7.62582540512085, 8.282817840576172, 8.939810752868652, 9.596803665161133, 10.253796577453613, 10.910789489746094, 11.567781448364258, 12.224774360656738, 12.881767272949219, 13.538759231567383, 14.19575309753418, 14.85274600982666, 15.50973892211914, 16.166730880737305, 16.8237247467041, 17.480716705322266, 18.137710571289062, 18.794702529907227, 19.45169448852539]}, "gradients/encoder.encoder.layers.10.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 3.0, 0.0, 1.0, 2.0, 2.0, 2.0, 2.0, 9.0, 3.0, 10.0, 9.0, 25.0, 25.0, 31.0, 44.0, 52.0, 81.0, 109.0, 173.0, 269.0, 373.0, 585.0, 1061.0, 1975.0, 3685.0, 9147.0, 26680.0, 124950.0, 3776067.0, 194514.0, 33843.0, 10872.0, 4495.0, 2166.0, 1150.0, 616.0, 383.0, 293.0, 166.0, 122.0, 86.0, 61.0, 47.0, 29.0, 24.0, 10.0, 9.0, 9.0, 8.0, 4.0, 6.0, 7.0, 3.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-1.455078125, -1.405914306640625, -1.35675048828125, -1.307586669921875, -1.2584228515625, -1.209259033203125, -1.16009521484375, -1.110931396484375, -1.061767578125, -1.012603759765625, -0.96343994140625, -0.914276123046875, -0.8651123046875, -0.815948486328125, -0.76678466796875, -0.717620849609375, -0.66845703125, -0.619293212890625, -0.57012939453125, -0.520965576171875, -0.4718017578125, -0.422637939453125, -0.37347412109375, -0.324310302734375, -0.275146484375, -0.225982666015625, -0.17681884765625, -0.127655029296875, -0.0784912109375, -0.029327392578125, 0.01983642578125, 0.069000244140625, 0.1181640625, 0.167327880859375, 0.21649169921875, 0.265655517578125, 0.3148193359375, 0.363983154296875, 0.41314697265625, 0.462310791015625, 0.511474609375, 0.560638427734375, 0.60980224609375, 0.658966064453125, 0.7081298828125, 0.757293701171875, 0.80645751953125, 0.855621337890625, 0.90478515625, 0.953948974609375, 1.00311279296875, 1.052276611328125, 1.1014404296875, 1.150604248046875, 1.19976806640625, 1.248931884765625, 1.298095703125, 1.347259521484375, 1.39642333984375, 1.445587158203125, 1.4947509765625, 1.543914794921875, 1.59307861328125, 1.642242431640625, 1.69140625]}, "gradients/encoder.encoder.layers.10.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 2.0, 3.0, 2.0, 0.0, 2.0, 4.0, 15.0, 6.0, 4.0, 8.0, 13.0, 17.0, 30.0, 33.0, 25.0, 46.0, 62.0, 79.0, 84.0, 78.0, 99.0, 82.0, 65.0, 62.0, 43.0, 31.0, 31.0, 22.0, 16.0, 14.0, 8.0, 5.0, 2.0, 8.0, 4.0, 2.0, 1.0, 0.0, 2.0, 2.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-1.2294921875, -1.195098876953125, -1.16070556640625, -1.126312255859375, -1.0919189453125, -1.057525634765625, -1.02313232421875, -0.988739013671875, -0.954345703125, -0.919952392578125, -0.88555908203125, -0.851165771484375, -0.8167724609375, -0.782379150390625, -0.74798583984375, -0.713592529296875, -0.67919921875, -0.644805908203125, -0.61041259765625, -0.576019287109375, -0.5416259765625, -0.507232666015625, -0.47283935546875, -0.438446044921875, -0.404052734375, -0.369659423828125, -0.33526611328125, -0.300872802734375, -0.2664794921875, -0.232086181640625, -0.19769287109375, -0.163299560546875, -0.12890625, -0.094512939453125, -0.06011962890625, -0.025726318359375, 0.0086669921875, 0.043060302734375, 0.07745361328125, 0.111846923828125, 0.146240234375, 0.180633544921875, 0.21502685546875, 0.249420166015625, 0.2838134765625, 0.318206787109375, 0.35260009765625, 0.386993408203125, 0.42138671875, 0.455780029296875, 0.49017333984375, 0.524566650390625, 0.5589599609375, 0.593353271484375, 0.62774658203125, 0.662139892578125, 0.696533203125, 0.730926513671875, 0.76531982421875, 0.799713134765625, 0.8341064453125, 0.868499755859375, 0.90289306640625, 0.937286376953125, 0.9716796875]}, "gradients/encoder.encoder.layers.10.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 6.0, 1.0, 4.0, 2.0, 5.0, 2.0, 4.0, 9.0, 20.0, 19.0, 21.0, 28.0, 34.0, 54.0, 76.0, 117.0, 185.0, 351.0, 747.0, 1535.0, 4040.0, 12544.0, 53110.0, 1053265.0, 2990178.0, 57388.0, 12978.0, 4244.0, 1687.0, 709.0, 346.0, 194.0, 111.0, 75.0, 53.0, 37.0, 27.0, 15.0, 18.0, 14.0, 8.0, 5.0, 9.0, 5.0, 4.0, 3.0, 2.0, 3.0, 1.0, 4.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.1640625, -2.09429931640625, -2.0245361328125, -1.95477294921875, -1.885009765625, -1.81524658203125, -1.7454833984375, -1.67572021484375, -1.60595703125, -1.53619384765625, -1.4664306640625, -1.39666748046875, -1.326904296875, -1.25714111328125, -1.1873779296875, -1.11761474609375, -1.0478515625, -0.97808837890625, -0.9083251953125, -0.83856201171875, -0.768798828125, -0.69903564453125, -0.6292724609375, -0.55950927734375, -0.48974609375, -0.41998291015625, -0.3502197265625, -0.28045654296875, -0.210693359375, -0.14093017578125, -0.0711669921875, -0.00140380859375, 0.068359375, 0.13812255859375, 0.2078857421875, 0.27764892578125, 0.347412109375, 0.41717529296875, 0.4869384765625, 0.55670166015625, 0.62646484375, 0.69622802734375, 0.7659912109375, 0.83575439453125, 0.905517578125, 0.97528076171875, 1.0450439453125, 1.11480712890625, 1.1845703125, 1.25433349609375, 1.3240966796875, 1.39385986328125, 1.463623046875, 1.53338623046875, 1.6031494140625, 1.67291259765625, 1.74267578125, 1.81243896484375, 1.8822021484375, 1.95196533203125, 2.021728515625, 2.09149169921875, 2.1612548828125, 2.23101806640625, 2.30078125]}, "gradients/encoder.encoder.layers.10.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 3.0, 4.0, 4.0, 11.0, 18.0, 28.0, 67.0, 181.0, 2728.0, 777.0, 136.0, 59.0, 31.0, 16.0, 3.0, 10.0, 6.0, 4.0, 2.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.2998046875, -1.2672500610351562, -1.2346954345703125, -1.2021408081054688, -1.169586181640625, -1.1370315551757812, -1.1044769287109375, -1.0719223022460938, -1.03936767578125, -1.0068130493164062, -0.9742584228515625, -0.9417037963867188, -0.909149169921875, -0.8765945434570312, -0.8440399169921875, -0.8114852905273438, -0.7789306640625, -0.7463760375976562, -0.7138214111328125, -0.6812667846679688, -0.648712158203125, -0.6161575317382812, -0.5836029052734375, -0.5510482788085938, -0.51849365234375, -0.48593902587890625, -0.4533843994140625, -0.42082977294921875, -0.388275146484375, -0.35572052001953125, -0.3231658935546875, -0.29061126708984375, -0.258056640625, -0.22550201416015625, -0.1929473876953125, -0.16039276123046875, -0.127838134765625, -0.09528350830078125, -0.0627288818359375, -0.03017425537109375, 0.00238037109375, 0.03493499755859375, 0.0674896240234375, 0.10004425048828125, 0.132598876953125, 0.16515350341796875, 0.1977081298828125, 0.23026275634765625, 0.2628173828125, 0.29537200927734375, 0.3279266357421875, 0.36048126220703125, 0.393035888671875, 0.42559051513671875, 0.4581451416015625, 0.49069976806640625, 0.52325439453125, 0.5558090209960938, 0.5883636474609375, 0.6209182739257812, 0.653472900390625, 0.6860275268554688, 0.7185821533203125, 0.7511367797851562, 0.78369140625]}, "gradients/encoder.encoder.layers.10.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 0.0, 2.0, 2.0, 3.0, 2.0, 3.0, 5.0, 5.0, 5.0, 17.0, 25.0, 27.0, 44.0, 59.0, 41.0, 58.0, 87.0, 76.0, 81.0, 78.0, 65.0, 67.0, 56.0, 45.0, 47.0, 27.0, 28.0, 16.0, 11.0, 7.0, 7.0, 8.0, 4.0, 3.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.822250485420227, -1.7732852697372437, -1.7243200540542603, -1.6753548383712769, -1.626389503479004, -1.5774242877960205, -1.528459072113037, -1.4794938564300537, -1.4305286407470703, -1.381563425064087, -1.3325982093811035, -1.2836329936981201, -1.2346677780151367, -1.1857024431228638, -1.1367372274398804, -1.087772011756897, -1.0388067960739136, -0.9898415803909302, -0.9408763647079468, -0.8919110894203186, -0.8429458737373352, -0.7939806580543518, -0.7450153827667236, -0.6960501670837402, -0.6470849514007568, -0.5981197357177734, -0.54915452003479, -0.5001892447471619, -0.45122402906417847, -0.40225881338119507, -0.3532935678958893, -0.3043283224105835, -0.25536322593688965, -0.20639799535274506, -0.15743276476860046, -0.10846753418445587, -0.05950230360031128, -0.010537073016166687, 0.038428157567977905, 0.08739340305328369, 0.1363586187362671, 0.18532384932041168, 0.23428907990455627, 0.28325432538986206, 0.33221954107284546, 0.38118475675582886, 0.43015000224113464, 0.47911524772644043, 0.5280804634094238, 0.5770456790924072, 0.6260108947753906, 0.6749761700630188, 0.7239413857460022, 0.7729066014289856, 0.8218718767166138, 0.8708370923995972, 0.9198023080825806, 0.968767523765564, 1.0177327394485474, 1.0666979551315308, 1.1156632900238037, 1.164628505706787, 1.2135937213897705, 1.262558937072754, 1.3115241527557373]}, "gradients/encoder.encoder.layers.10.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 3.0, 3.0, 6.0, 5.0, 4.0, 5.0, 4.0, 6.0, 13.0, 14.0, 11.0, 17.0, 23.0, 19.0, 34.0, 25.0, 39.0, 42.0, 37.0, 43.0, 38.0, 40.0, 59.0, 51.0, 47.0, 39.0, 45.0, 39.0, 36.0, 39.0, 32.0, 31.0, 34.0, 30.0, 20.0, 11.0, 13.0, 11.0, 8.0, 13.0, 4.0, 3.0, 5.0, 2.0, 5.0, 2.0, 2.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0], "bins": [-1.4191548824310303, -1.3746458292007446, -1.3301368951797485, -1.285627841949463, -1.2411187887191772, -1.1966097354888916, -1.1521008014678955, -1.1075917482376099, -1.0630826950073242, -1.0185736417770386, -0.9740646481513977, -0.9295556545257568, -0.8850466012954712, -0.8405376076698303, -0.7960286140441895, -0.7515195608139038, -0.7070106267929077, -0.6625016331672668, -0.6179925799369812, -0.5734835863113403, -0.5289745330810547, -0.4844655394554138, -0.43995654582977295, -0.3954475224018097, -0.35093849897384644, -0.3064294755458832, -0.2619204521179199, -0.21741145849227905, -0.1729024350643158, -0.12839341163635254, -0.08388441801071167, -0.03937539458274841, 0.005133628845214844, 0.049642644822597504, 0.09415166079998016, 0.13866066932678223, 0.18316969275474548, 0.22767871618270874, 0.2721877098083496, 0.31669673323631287, 0.3612057566642761, 0.4057147800922394, 0.45022380352020264, 0.4947327971458435, 0.5392417907714844, 0.58375084400177, 0.6282598376274109, 0.6727688312530518, 0.7172778844833374, 0.7617868781089783, 0.8062959313392639, 0.8508049249649048, 0.8953139781951904, 0.9398229718208313, 0.9843319654464722, 1.0288410186767578, 1.073349952697754, 1.1178590059280396, 1.1623679399490356, 1.2068769931793213, 1.251386046409607, 1.2958950996398926, 1.3404040336608887, 1.3849130868911743, 1.42942214012146]}, "gradients/encoder.encoder.layers.10.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0, 2.0, 1.0, 4.0, 0.0, 7.0, 7.0, 2.0, 9.0, 11.0, 12.0, 19.0, 40.0, 47.0, 93.0, 142.0, 253.0, 587.0, 1356.0, 3827.0, 13567.0, 69348.0, 458122.0, 420698.0, 62146.0, 12193.0, 3609.0, 1286.0, 556.0, 257.0, 126.0, 62.0, 63.0, 23.0, 30.0, 14.0, 10.0, 12.0, 7.0, 2.0, 5.0, 1.0, 4.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 3.0], "bins": [-2.166015625, -2.09124755859375, -2.0164794921875, -1.94171142578125, -1.866943359375, -1.79217529296875, -1.7174072265625, -1.64263916015625, -1.56787109375, -1.49310302734375, -1.4183349609375, -1.34356689453125, -1.268798828125, -1.19403076171875, -1.1192626953125, -1.04449462890625, -0.9697265625, -0.89495849609375, -0.8201904296875, -0.74542236328125, -0.670654296875, -0.59588623046875, -0.5211181640625, -0.44635009765625, -0.37158203125, -0.29681396484375, -0.2220458984375, -0.14727783203125, -0.072509765625, 0.00225830078125, 0.0770263671875, 0.15179443359375, 0.2265625, 0.30133056640625, 0.3760986328125, 0.45086669921875, 0.525634765625, 0.60040283203125, 0.6751708984375, 0.74993896484375, 0.82470703125, 0.89947509765625, 0.9742431640625, 1.04901123046875, 1.123779296875, 1.19854736328125, 1.2733154296875, 1.34808349609375, 1.4228515625, 1.49761962890625, 1.5723876953125, 1.64715576171875, 1.721923828125, 1.79669189453125, 1.8714599609375, 1.94622802734375, 2.02099609375, 2.09576416015625, 2.1705322265625, 2.24530029296875, 2.320068359375, 2.39483642578125, 2.4696044921875, 2.54437255859375, 2.619140625]}, "gradients/encoder.encoder.layers.10.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 2.0, 2.0, 2.0, 1.0, 5.0, 2.0, 3.0, 4.0, 8.0, 7.0, 15.0, 14.0, 37.0, 20.0, 35.0, 40.0, 54.0, 72.0, 72.0, 73.0, 86.0, 78.0, 81.0, 62.0, 51.0, 50.0, 26.0, 25.0, 21.0, 15.0, 10.0, 11.0, 6.0, 7.0, 5.0, 2.0, 4.0, 0.0, 4.0, 0.0, 1.0, 3.0, 1.0, 0.0, 1.0, 1.0], "bins": [-1.26953125, -1.2364349365234375, -1.203338623046875, -1.1702423095703125, -1.13714599609375, -1.1040496826171875, -1.070953369140625, -1.0378570556640625, -1.0047607421875, -0.9716644287109375, -0.938568115234375, -0.9054718017578125, -0.87237548828125, -0.8392791748046875, -0.806182861328125, -0.7730865478515625, -0.739990234375, -0.7068939208984375, -0.673797607421875, -0.6407012939453125, -0.60760498046875, -0.5745086669921875, -0.541412353515625, -0.5083160400390625, -0.4752197265625, -0.4421234130859375, -0.409027099609375, -0.3759307861328125, -0.34283447265625, -0.3097381591796875, -0.276641845703125, -0.2435455322265625, -0.21044921875, -0.1773529052734375, -0.144256591796875, -0.1111602783203125, -0.07806396484375, -0.0449676513671875, -0.011871337890625, 0.0212249755859375, 0.0543212890625, 0.0874176025390625, 0.120513916015625, 0.1536102294921875, 0.18670654296875, 0.2198028564453125, 0.252899169921875, 0.2859954833984375, 0.319091796875, 0.3521881103515625, 0.385284423828125, 0.4183807373046875, 0.45147705078125, 0.4845733642578125, 0.517669677734375, 0.5507659912109375, 0.5838623046875, 0.6169586181640625, 0.650054931640625, 0.6831512451171875, 0.71624755859375, 0.7493438720703125, 0.782440185546875, 0.8155364990234375, 0.8486328125]}, "gradients/encoder.encoder.layers.10.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 2.0, 6.0, 2.0, 4.0, 3.0, 6.0, 17.0, 12.0, 22.0, 29.0, 42.0, 75.0, 153.0, 325.0, 1022.0, 4938.0, 95001.0, 913593.0, 29230.0, 2781.0, 718.0, 259.0, 118.0, 61.0, 41.0, 27.0, 22.0, 11.0, 9.0, 11.0, 4.0, 5.0, 1.0, 5.0, 3.0, 2.0, 4.0, 3.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.1171875, -4.94281005859375, -4.7684326171875, -4.59405517578125, -4.419677734375, -4.24530029296875, -4.0709228515625, -3.89654541015625, -3.72216796875, -3.54779052734375, -3.3734130859375, -3.19903564453125, -3.024658203125, -2.85028076171875, -2.6759033203125, -2.50152587890625, -2.3271484375, -2.15277099609375, -1.9783935546875, -1.80401611328125, -1.629638671875, -1.45526123046875, -1.2808837890625, -1.10650634765625, -0.93212890625, -0.75775146484375, -0.5833740234375, -0.40899658203125, -0.234619140625, -0.06024169921875, 0.1141357421875, 0.28851318359375, 0.462890625, 0.63726806640625, 0.8116455078125, 0.98602294921875, 1.160400390625, 1.33477783203125, 1.5091552734375, 1.68353271484375, 1.85791015625, 2.03228759765625, 2.2066650390625, 2.38104248046875, 2.555419921875, 2.72979736328125, 2.9041748046875, 3.07855224609375, 3.2529296875, 3.42730712890625, 3.6016845703125, 3.77606201171875, 3.950439453125, 4.12481689453125, 4.2991943359375, 4.47357177734375, 4.64794921875, 4.82232666015625, 4.9967041015625, 5.17108154296875, 5.345458984375, 5.51983642578125, 5.6942138671875, 5.86859130859375, 6.04296875]}, "gradients/encoder.encoder.layers.10.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 2.0, 2.0, 1.0, 1.0, 1.0, 1.0, 3.0, 6.0, 4.0, 8.0, 9.0, 6.0, 13.0, 19.0, 19.0, 19.0, 31.0, 36.0, 41.0, 47.0, 49.0, 54.0, 60.0, 58.0, 55.0, 57.0, 77.0, 53.0, 38.0, 39.0, 35.0, 33.0, 29.0, 19.0, 12.0, 14.0, 17.0, 11.0, 10.0, 5.0, 3.0, 7.0, 2.0, 5.0, 1.0, 2.0, 3.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.90234375, -3.75994873046875, -3.6175537109375, -3.47515869140625, -3.332763671875, -3.19036865234375, -3.0479736328125, -2.90557861328125, -2.76318359375, -2.62078857421875, -2.4783935546875, -2.33599853515625, -2.193603515625, -2.05120849609375, -1.9088134765625, -1.76641845703125, -1.6240234375, -1.48162841796875, -1.3392333984375, -1.19683837890625, -1.054443359375, -0.91204833984375, -0.7696533203125, -0.62725830078125, -0.48486328125, -0.34246826171875, -0.2000732421875, -0.05767822265625, 0.084716796875, 0.22711181640625, 0.3695068359375, 0.51190185546875, 0.654296875, 0.79669189453125, 0.9390869140625, 1.08148193359375, 1.223876953125, 1.36627197265625, 1.5086669921875, 1.65106201171875, 1.79345703125, 1.93585205078125, 2.0782470703125, 2.22064208984375, 2.363037109375, 2.50543212890625, 2.6478271484375, 2.79022216796875, 2.9326171875, 3.07501220703125, 3.2174072265625, 3.35980224609375, 3.502197265625, 3.64459228515625, 3.7869873046875, 3.92938232421875, 4.07177734375, 4.21417236328125, 4.3565673828125, 4.49896240234375, 4.641357421875, 4.78375244140625, 4.9261474609375, 5.06854248046875, 5.2109375]}, "gradients/encoder.encoder.layers.10.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0, 1.0, 1.0, 2.0, 2.0, 3.0, 2.0, 4.0, 3.0, 3.0, 7.0, 4.0, 15.0, 22.0, 14.0, 20.0, 25.0, 47.0, 56.0, 67.0, 112.0, 202.0, 363.0, 807.0, 2472.0, 15050.0, 420887.0, 585484.0, 18092.0, 2870.0, 928.0, 380.0, 200.0, 125.0, 74.0, 56.0, 36.0, 40.0, 19.0, 11.0, 19.0, 10.0, 7.0, 7.0, 7.0, 2.0, 2.0, 1.0, 3.0, 2.0, 2.0, 1.0, 1.0], "bins": [-2.80078125, -2.7271575927734375, -2.653533935546875, -2.5799102783203125, -2.50628662109375, -2.4326629638671875, -2.359039306640625, -2.2854156494140625, -2.2117919921875, -2.1381683349609375, -2.064544677734375, -1.9909210205078125, -1.91729736328125, -1.8436737060546875, -1.770050048828125, -1.6964263916015625, -1.622802734375, -1.5491790771484375, -1.475555419921875, -1.4019317626953125, -1.32830810546875, -1.2546844482421875, -1.181060791015625, -1.1074371337890625, -1.0338134765625, -0.9601898193359375, -0.886566162109375, -0.8129425048828125, -0.73931884765625, -0.6656951904296875, -0.592071533203125, -0.5184478759765625, -0.44482421875, -0.3712005615234375, -0.297576904296875, -0.2239532470703125, -0.15032958984375, -0.0767059326171875, -0.003082275390625, 0.0705413818359375, 0.1441650390625, 0.2177886962890625, 0.291412353515625, 0.3650360107421875, 0.43865966796875, 0.5122833251953125, 0.585906982421875, 0.6595306396484375, 0.733154296875, 0.8067779541015625, 0.880401611328125, 0.9540252685546875, 1.02764892578125, 1.1012725830078125, 1.174896240234375, 1.2485198974609375, 1.3221435546875, 1.3957672119140625, 1.469390869140625, 1.5430145263671875, 1.61663818359375, 1.6902618408203125, 1.763885498046875, 1.8375091552734375, 1.9111328125]}, "gradients/encoder.encoder.layers.10.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 3.0, 6.0, 4.0, 9.0, 19.0, 35.0, 43.0, 94.0, 141.0, 177.0, 156.0, 135.0, 96.0, 34.0, 31.0, 10.0, 8.0, 6.0, 3.0, 3.0, 1.0, 0.0, 2.0, 1.0, 0.0, 1.0, 1.0], "bins": [-0.0003345012664794922, -0.00032736361026763916, -0.00032022595405578613, -0.0003130882978439331, -0.0003059506416320801, -0.00029881298542022705, -0.000291675329208374, -0.000284537672996521, -0.00027740001678466797, -0.00027026236057281494, -0.0002631247043609619, -0.0002559870481491089, -0.00024884939193725586, -0.00024171173572540283, -0.0002345740795135498, -0.00022743642330169678, -0.00022029876708984375, -0.00021316111087799072, -0.0002060234546661377, -0.00019888579845428467, -0.00019174814224243164, -0.0001846104860305786, -0.00017747282981872559, -0.00017033517360687256, -0.00016319751739501953, -0.0001560598611831665, -0.00014892220497131348, -0.00014178454875946045, -0.00013464689254760742, -0.0001275092363357544, -0.00012037158012390137, -0.00011323392391204834, -0.00010609626770019531, -9.895861148834229e-05, -9.182095527648926e-05, -8.468329906463623e-05, -7.75456428527832e-05, -7.040798664093018e-05, -6.327033042907715e-05, -5.613267421722412e-05, -4.8995018005371094e-05, -4.1857361793518066e-05, -3.471970558166504e-05, -2.7582049369812012e-05, -2.0444393157958984e-05, -1.3306736946105957e-05, -6.16908073425293e-06, 9.685754776000977e-07, 8.106231689453125e-06, 1.5243887901306152e-05, 2.238154411315918e-05, 2.9519200325012207e-05, 3.6656856536865234e-05, 4.379451274871826e-05, 5.093216896057129e-05, 5.8069825172424316e-05, 6.520748138427734e-05, 7.234513759613037e-05, 7.94827938079834e-05, 8.662045001983643e-05, 9.375810623168945e-05, 0.00010089576244354248, 0.00010803341865539551, 0.00011517107486724854, 0.00012230873107910156]}, "gradients/encoder.encoder.layers.10.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 3.0, 3.0, 3.0, 3.0, 5.0, 6.0, 16.0, 28.0, 28.0, 72.0, 136.0, 240.0, 683.0, 2320.0, 22255.0, 937447.0, 79577.0, 4089.0, 915.0, 337.0, 177.0, 74.0, 54.0, 26.0, 14.0, 19.0, 11.0, 5.0, 5.0, 4.0, 2.0, 0.0, 2.0, 1.0, 4.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.69921875, -2.59405517578125, -2.4888916015625, -2.38372802734375, -2.278564453125, -2.17340087890625, -2.0682373046875, -1.96307373046875, -1.85791015625, -1.75274658203125, -1.6475830078125, -1.54241943359375, -1.437255859375, -1.33209228515625, -1.2269287109375, -1.12176513671875, -1.0166015625, -0.91143798828125, -0.8062744140625, -0.70111083984375, -0.595947265625, -0.49078369140625, -0.3856201171875, -0.28045654296875, -0.17529296875, -0.07012939453125, 0.0350341796875, 0.14019775390625, 0.245361328125, 0.35052490234375, 0.4556884765625, 0.56085205078125, 0.666015625, 0.77117919921875, 0.8763427734375, 0.98150634765625, 1.086669921875, 1.19183349609375, 1.2969970703125, 1.40216064453125, 1.50732421875, 1.61248779296875, 1.7176513671875, 1.82281494140625, 1.927978515625, 2.03314208984375, 2.1383056640625, 2.24346923828125, 2.3486328125, 2.45379638671875, 2.5589599609375, 2.66412353515625, 2.769287109375, 2.87445068359375, 2.9796142578125, 3.08477783203125, 3.18994140625, 3.29510498046875, 3.4002685546875, 3.50543212890625, 3.610595703125, 3.71575927734375, 3.8209228515625, 3.92608642578125, 4.03125]}, "gradients/encoder.encoder.layers.10.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 4.0, 3.0, 3.0, 3.0, 8.0, 12.0, 23.0, 32.0, 49.0, 86.0, 147.0, 176.0, 158.0, 117.0, 66.0, 48.0, 31.0, 18.0, 7.0, 10.0, 7.0, 1.0, 3.0, 1.0, 2.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.09765625, -3.0199737548828125, -2.942291259765625, -2.8646087646484375, -2.78692626953125, -2.7092437744140625, -2.631561279296875, -2.5538787841796875, -2.4761962890625, -2.3985137939453125, -2.320831298828125, -2.2431488037109375, -2.16546630859375, -2.0877838134765625, -2.010101318359375, -1.9324188232421875, -1.854736328125, -1.7770538330078125, -1.699371337890625, -1.6216888427734375, -1.54400634765625, -1.4663238525390625, -1.388641357421875, -1.3109588623046875, -1.2332763671875, -1.1555938720703125, -1.077911376953125, -1.0002288818359375, -0.92254638671875, -0.8448638916015625, -0.767181396484375, -0.6894989013671875, -0.61181640625, -0.5341339111328125, -0.456451416015625, -0.3787689208984375, -0.30108642578125, -0.2234039306640625, -0.145721435546875, -0.0680389404296875, 0.0096435546875, 0.0873260498046875, 0.165008544921875, 0.2426910400390625, 0.32037353515625, 0.3980560302734375, 0.475738525390625, 0.5534210205078125, 0.631103515625, 0.7087860107421875, 0.786468505859375, 0.8641510009765625, 0.94183349609375, 1.0195159912109375, 1.097198486328125, 1.1748809814453125, 1.2525634765625, 1.3302459716796875, 1.407928466796875, 1.4856109619140625, 1.56329345703125, 1.6409759521484375, 1.718658447265625, 1.7963409423828125, 1.8740234375]}, "gradients/encoder.encoder.layers.10.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 0.0, 5.0, 0.0, 10.0, 8.0, 24.0, 33.0, 65.0, 89.0, 141.0, 150.0, 183.0, 102.0, 78.0, 59.0, 29.0, 11.0, 7.0, 4.0, 3.0, 6.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-20.10253143310547, -19.405010223388672, -18.707489013671875, -18.009967803955078, -17.31244659423828, -16.614925384521484, -15.917405128479004, -15.219884872436523, -14.522363662719727, -13.82484245300293, -13.127321243286133, -12.429800033569336, -11.732279777526855, -11.034758567810059, -10.337237358093262, -9.639717102050781, -8.942194938659668, -8.244673728942871, -7.547152996063232, -6.8496317863464355, -6.152111053466797, -5.45458984375, -4.757068634033203, -4.0595479011535645, -3.3620266914367676, -2.66450572013855, -1.9669846296310425, -1.2694635391235352, -0.5719425678253174, 0.1255784034729004, 0.8230996131896973, 1.520620346069336, 2.218141555786133, 2.9156625270843506, 3.6131834983825684, 4.310704708099365, 5.008225440979004, 5.705746650695801, 6.403267860412598, 7.100788593292236, 7.798309803009033, 8.495830535888672, 9.193351745605469, 9.890872955322266, 10.588394165039062, 11.28591537475586, 11.983436584472656, 12.680956840515137, 13.378478050231934, 14.07599925994873, 14.773520469665527, 15.471040725708008, 16.168561935424805, 16.8660831451416, 17.5636043548584, 18.261125564575195, 18.958646774291992, 19.65616798400879, 20.353689193725586, 21.051210403442383, 21.74873161315918, 22.446250915527344, 23.14377212524414, 23.841293334960938, 24.538814544677734]}, "gradients/encoder.encoder.layers.10.layer_norm.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 2.0, 5.0, 8.0, 4.0, 3.0, 7.0, 5.0, 8.0, 8.0, 13.0, 20.0, 21.0, 23.0, 28.0, 26.0, 45.0, 39.0, 47.0, 41.0, 52.0, 68.0, 47.0, 58.0, 47.0, 58.0, 55.0, 47.0, 37.0, 38.0, 33.0, 17.0, 20.0, 22.0, 9.0, 8.0, 8.0, 7.0, 7.0, 8.0, 2.0, 6.0, 3.0, 1.0, 3.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-15.755983352661133, -15.202385902404785, -14.648787498474121, -14.095190048217773, -13.54159164428711, -12.987994194030762, -12.434396743774414, -11.88079833984375, -11.327200889587402, -10.773603439331055, -10.22000503540039, -9.666407585144043, -9.112810134887695, -8.559211730957031, -8.005614280700684, -7.452016353607178, -6.898418426513672, -6.344820499420166, -5.79122257232666, -5.2376251220703125, -4.684027194976807, -4.130429267883301, -3.576831579208374, -3.0232338905334473, -2.4696359634399414, -1.916038155555725, -1.3624403476715088, -0.8088425397872925, -0.25524473190307617, 0.2983531951904297, 0.8519508838653564, 1.4055485725402832, 1.959146499633789, 2.512744426727295, 3.0663421154022217, 3.6199398040771484, 4.173537731170654, 4.72713565826416, 5.280733108520508, 5.834331035614014, 6.3879289627075195, 6.941526889801025, 7.495124816894531, 8.048722267150879, 8.602319717407227, 9.15591812133789, 9.709515571594238, 10.263113021850586, 10.81671142578125, 11.370308876037598, 11.923907279968262, 12.47750473022461, 13.031103134155273, 13.584700584411621, 14.138298034667969, 14.691896438598633, 15.24549388885498, 15.799091339111328, 16.352689743041992, 16.906288146972656, 17.459884643554688, 18.01348304748535, 18.567081451416016, 19.120677947998047, 19.67427635192871]}, "gradients/encoder.encoder.layers.9.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 5.0, 0.0, 3.0, 8.0, 11.0, 6.0, 11.0, 12.0, 21.0, 33.0, 62.0, 101.0, 176.0, 399.0, 1196.0, 4567.0, 49335.0, 4107947.0, 25557.0, 3233.0, 907.0, 355.0, 167.0, 73.0, 39.0, 27.0, 13.0, 6.0, 7.0, 4.0, 2.0, 2.0, 4.0, 2.0, 2.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.33984375, -5.18896484375, -5.0380859375, -4.88720703125, -4.736328125, -4.58544921875, -4.4345703125, -4.28369140625, -4.1328125, -3.98193359375, -3.8310546875, -3.68017578125, -3.529296875, -3.37841796875, -3.2275390625, -3.07666015625, -2.92578125, -2.77490234375, -2.6240234375, -2.47314453125, -2.322265625, -2.17138671875, -2.0205078125, -1.86962890625, -1.71875, -1.56787109375, -1.4169921875, -1.26611328125, -1.115234375, -0.96435546875, -0.8134765625, -0.66259765625, -0.51171875, -0.36083984375, -0.2099609375, -0.05908203125, 0.091796875, 0.24267578125, 0.3935546875, 0.54443359375, 0.6953125, 0.84619140625, 0.9970703125, 1.14794921875, 1.298828125, 1.44970703125, 1.6005859375, 1.75146484375, 1.90234375, 2.05322265625, 2.2041015625, 2.35498046875, 2.505859375, 2.65673828125, 2.8076171875, 2.95849609375, 3.109375, 3.26025390625, 3.4111328125, 3.56201171875, 3.712890625, 3.86376953125, 4.0146484375, 4.16552734375, 4.31640625]}, "gradients/encoder.encoder.layers.9.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 3.0, 4.0, 3.0, 2.0, 7.0, 4.0, 9.0, 13.0, 15.0, 23.0, 32.0, 40.0, 44.0, 65.0, 78.0, 96.0, 100.0, 95.0, 82.0, 63.0, 64.0, 42.0, 33.0, 25.0, 16.0, 10.0, 16.0, 8.0, 3.0, 4.0, 3.0, 3.0, 4.0, 3.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.4951171875, -1.454345703125, -1.41357421875, -1.372802734375, -1.33203125, -1.291259765625, -1.25048828125, -1.209716796875, -1.1689453125, -1.128173828125, -1.08740234375, -1.046630859375, -1.005859375, -0.965087890625, -0.92431640625, -0.883544921875, -0.8427734375, -0.802001953125, -0.76123046875, -0.720458984375, -0.6796875, -0.638916015625, -0.59814453125, -0.557373046875, -0.5166015625, -0.475830078125, -0.43505859375, -0.394287109375, -0.353515625, -0.312744140625, -0.27197265625, -0.231201171875, -0.1904296875, -0.149658203125, -0.10888671875, -0.068115234375, -0.02734375, 0.013427734375, 0.05419921875, 0.094970703125, 0.1357421875, 0.176513671875, 0.21728515625, 0.258056640625, 0.298828125, 0.339599609375, 0.38037109375, 0.421142578125, 0.4619140625, 0.502685546875, 0.54345703125, 0.584228515625, 0.625, 0.665771484375, 0.70654296875, 0.747314453125, 0.7880859375, 0.828857421875, 0.86962890625, 0.910400390625, 0.951171875, 0.991943359375, 1.03271484375, 1.073486328125, 1.1142578125]}, "gradients/encoder.encoder.layers.9.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [4.0, 0.0, 0.0, 1.0, 3.0, 3.0, 4.0, 1.0, 4.0, 8.0, 21.0, 12.0, 22.0, 27.0, 32.0, 49.0, 60.0, 91.0, 136.0, 227.0, 361.0, 650.0, 1336.0, 2869.0, 6836.0, 22212.0, 127820.0, 3912299.0, 89575.0, 18128.0, 6023.0, 2576.0, 1311.0, 610.0, 315.0, 196.0, 137.0, 79.0, 80.0, 35.0, 42.0, 21.0, 29.0, 14.0, 13.0, 7.0, 7.0, 3.0, 2.0, 5.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0], "bins": [-1.71484375, -1.652313232421875, -1.58978271484375, -1.527252197265625, -1.4647216796875, -1.402191162109375, -1.33966064453125, -1.277130126953125, -1.214599609375, -1.152069091796875, -1.08953857421875, -1.027008056640625, -0.9644775390625, -0.901947021484375, -0.83941650390625, -0.776885986328125, -0.71435546875, -0.651824951171875, -0.58929443359375, -0.526763916015625, -0.4642333984375, -0.401702880859375, -0.33917236328125, -0.276641845703125, -0.214111328125, -0.151580810546875, -0.08905029296875, -0.026519775390625, 0.0360107421875, 0.098541259765625, 0.16107177734375, 0.223602294921875, 0.2861328125, 0.348663330078125, 0.41119384765625, 0.473724365234375, 0.5362548828125, 0.598785400390625, 0.66131591796875, 0.723846435546875, 0.786376953125, 0.848907470703125, 0.91143798828125, 0.973968505859375, 1.0364990234375, 1.099029541015625, 1.16156005859375, 1.224090576171875, 1.28662109375, 1.349151611328125, 1.41168212890625, 1.474212646484375, 1.5367431640625, 1.599273681640625, 1.66180419921875, 1.724334716796875, 1.786865234375, 1.849395751953125, 1.91192626953125, 1.974456787109375, 2.0369873046875, 2.099517822265625, 2.16204833984375, 2.224578857421875, 2.287109375]}, "gradients/encoder.encoder.layers.9.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 3.0, 2.0, 7.0, 20.0, 20.0, 53.0, 100.0, 419.0, 3192.0, 152.0, 43.0, 29.0, 24.0, 7.0, 6.0, 2.0, 1.0, 1.0, 1.0, 0.0, 2.0, 3.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.4462890625, -0.4217529296875, -0.397216796875, -0.3726806640625, -0.34814453125, -0.3236083984375, -0.299072265625, -0.2745361328125, -0.25, -0.2254638671875, -0.200927734375, -0.1763916015625, -0.15185546875, -0.1273193359375, -0.102783203125, -0.0782470703125, -0.0537109375, -0.0291748046875, -0.004638671875, 0.0198974609375, 0.04443359375, 0.0689697265625, 0.093505859375, 0.1180419921875, 0.142578125, 0.1671142578125, 0.191650390625, 0.2161865234375, 0.24072265625, 0.2652587890625, 0.289794921875, 0.3143310546875, 0.3388671875, 0.3634033203125, 0.387939453125, 0.4124755859375, 0.43701171875, 0.4615478515625, 0.486083984375, 0.5106201171875, 0.53515625, 0.5596923828125, 0.584228515625, 0.6087646484375, 0.63330078125, 0.6578369140625, 0.682373046875, 0.7069091796875, 0.7314453125, 0.7559814453125, 0.780517578125, 0.8050537109375, 0.82958984375, 0.8541259765625, 0.878662109375, 0.9031982421875, 0.927734375, 0.9522705078125, 0.976806640625, 1.0013427734375, 1.02587890625, 1.0504150390625, 1.074951171875, 1.0994873046875, 1.1240234375]}, "gradients/encoder.encoder.layers.9.final_layer_norm.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 1.0, 0.0, 2.0, 3.0, 11.0, 12.0, 31.0, 54.0, 98.0, 95.0, 165.0, 166.0, 138.0, 108.0, 54.0, 31.0, 24.0, 8.0, 6.0, 1.0, 0.0, 0.0, 3.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.3960165977478027, -1.3235034942626953, -1.2509905099868774, -1.1784775257110596, -1.1059644222259521, -1.0334513187408447, -0.9609383344650269, -0.8884252905845642, -0.8159122467041016, -0.7433992028236389, -0.6708861589431763, -0.5983731150627136, -0.525860071182251, -0.45334702730178833, -0.3808339834213257, -0.30832093954086304, -0.2358078956604004, -0.16329485177993774, -0.0907818078994751, -0.01826876401901245, 0.054244279861450195, 0.12675732374191284, 0.1992703676223755, 0.27178341150283813, 0.3442964553833008, 0.4168094992637634, 0.4893225431442261, 0.5618355870246887, 0.6343486309051514, 0.706861674785614, 0.7793747186660767, 0.8518877625465393, 0.924400806427002, 0.9969138503074646, 1.0694268941879272, 1.1419398784637451, 1.2144529819488525, 1.28696608543396, 1.3594790697097778, 1.4319920539855957, 1.5045051574707031, 1.5770182609558105, 1.6495312452316284, 1.7220442295074463, 1.7945573329925537, 1.8670704364776611, 1.939583420753479, 2.012096405029297, 2.0846095085144043, 2.1571226119995117, 2.229635715484619, 2.3021485805511475, 2.374661684036255, 2.4471747875213623, 2.5196876525878906, 2.592200756072998, 2.6647138595581055, 2.737226963043213, 2.8097400665283203, 2.8822529315948486, 2.954766035079956, 3.0272791385650635, 3.099792003631592, 3.172305107116699, 3.2448182106018066]}, "gradients/encoder.encoder.layers.9.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 5.0, 2.0, 3.0, 8.0, 4.0, 3.0, 4.0, 7.0, 9.0, 9.0, 14.0, 15.0, 25.0, 24.0, 30.0, 34.0, 48.0, 40.0, 45.0, 44.0, 50.0, 54.0, 61.0, 60.0, 50.0, 55.0, 47.0, 36.0, 42.0, 39.0, 25.0, 20.0, 22.0, 13.0, 16.0, 12.0, 11.0, 9.0, 7.0, 3.0, 4.0, 0.0, 1.0, 1.0, 3.0, 2.0, 3.0, 1.0, 0.0, 1.0], "bins": [-1.322563886642456, -1.2858307361602783, -1.2490975856781006, -1.2123644351959229, -1.1756314039230347, -1.138898253440857, -1.1021651029586792, -1.0654319524765015, -1.0286988019943237, -0.991965651512146, -0.955232560634613, -0.9184994101524353, -0.8817662596702576, -0.8450331687927246, -0.8083000183105469, -0.7715668678283691, -0.7348337769508362, -0.6981006264686584, -0.6613675355911255, -0.6246343851089478, -0.58790123462677, -0.5511680841445923, -0.5144349932670593, -0.4777018427848816, -0.44096872210502625, -0.4042356014251709, -0.36750245094299316, -0.3307693302631378, -0.29403620958328247, -0.25730305910110474, -0.2205699384212494, -0.18383678793907166, -0.1471036672592163, -0.11037053167819977, -0.07363740354776382, -0.03690427541732788, -0.00017113983631134033, 0.0365619957447052, 0.07329511642456055, 0.11002826690673828, 0.14676138758659363, 0.18349452316761017, 0.2202276587486267, 0.25696077942848206, 0.2936939001083374, 0.33042705059051514, 0.3671601712703705, 0.4038933217525482, 0.44062644243240356, 0.4773595631122589, 0.5140926837921143, 0.550825834274292, 0.5875589847564697, 0.6242921352386475, 0.6610252261161804, 0.6977583765983582, 0.7344914674758911, 0.7712246179580688, 0.8079577088356018, 0.8446908593177795, 0.8814240097999573, 0.9181571006774902, 0.954890251159668, 0.9916234016418457, 1.0283565521240234]}, "gradients/encoder.encoder.layers.9.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 1.0, 6.0, 1.0, 4.0, 11.0, 5.0, 15.0, 23.0, 46.0, 87.0, 131.0, 301.0, 789.0, 2342.0, 11015.0, 94360.0, 738519.0, 178771.0, 16914.0, 3411.0, 1041.0, 381.0, 179.0, 92.0, 42.0, 27.0, 13.0, 10.0, 4.0, 3.0, 7.0, 3.0, 3.0, 4.0, 1.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.630859375, -3.531707763671875, -3.43255615234375, -3.333404541015625, -3.2342529296875, -3.135101318359375, -3.03594970703125, -2.936798095703125, -2.837646484375, -2.738494873046875, -2.63934326171875, -2.540191650390625, -2.4410400390625, -2.341888427734375, -2.24273681640625, -2.143585205078125, -2.04443359375, -1.945281982421875, -1.84613037109375, -1.746978759765625, -1.6478271484375, -1.548675537109375, -1.44952392578125, -1.350372314453125, -1.251220703125, -1.152069091796875, -1.05291748046875, -0.953765869140625, -0.8546142578125, -0.755462646484375, -0.65631103515625, -0.557159423828125, -0.4580078125, -0.358856201171875, -0.25970458984375, -0.160552978515625, -0.0614013671875, 0.037750244140625, 0.13690185546875, 0.236053466796875, 0.335205078125, 0.434356689453125, 0.53350830078125, 0.632659912109375, 0.7318115234375, 0.830963134765625, 0.93011474609375, 1.029266357421875, 1.12841796875, 1.227569580078125, 1.32672119140625, 1.425872802734375, 1.5250244140625, 1.624176025390625, 1.72332763671875, 1.822479248046875, 1.921630859375, 2.020782470703125, 2.11993408203125, 2.219085693359375, 2.3182373046875, 2.417388916015625, 2.51654052734375, 2.615692138671875, 2.71484375]}, "gradients/encoder.encoder.layers.9.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 2.0, 0.0, 8.0, 1.0, 4.0, 6.0, 5.0, 10.0, 21.0, 18.0, 24.0, 27.0, 38.0, 53.0, 61.0, 79.0, 84.0, 89.0, 78.0, 67.0, 73.0, 55.0, 48.0, 43.0, 17.0, 26.0, 20.0, 16.0, 10.0, 7.0, 5.0, 3.0, 5.0, 4.0, 1.0, 2.0, 3.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-1.390625, -1.3539276123046875, -1.317230224609375, -1.2805328369140625, -1.24383544921875, -1.2071380615234375, -1.170440673828125, -1.1337432861328125, -1.0970458984375, -1.0603485107421875, -1.023651123046875, -0.9869537353515625, -0.95025634765625, -0.9135589599609375, -0.876861572265625, -0.8401641845703125, -0.803466796875, -0.7667694091796875, -0.730072021484375, -0.6933746337890625, -0.65667724609375, -0.6199798583984375, -0.583282470703125, -0.5465850830078125, -0.5098876953125, -0.4731903076171875, -0.436492919921875, -0.3997955322265625, -0.36309814453125, -0.3264007568359375, -0.289703369140625, -0.2530059814453125, -0.21630859375, -0.1796112060546875, -0.142913818359375, -0.1062164306640625, -0.06951904296875, -0.0328216552734375, 0.003875732421875, 0.0405731201171875, 0.0772705078125, 0.1139678955078125, 0.150665283203125, 0.1873626708984375, 0.22406005859375, 0.2607574462890625, 0.297454833984375, 0.3341522216796875, 0.370849609375, 0.4075469970703125, 0.444244384765625, 0.4809417724609375, 0.51763916015625, 0.5543365478515625, 0.591033935546875, 0.6277313232421875, 0.6644287109375, 0.7011260986328125, 0.737823486328125, 0.7745208740234375, 0.81121826171875, 0.8479156494140625, 0.884613037109375, 0.9213104248046875, 0.9580078125]}, "gradients/encoder.encoder.layers.9.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 3.0, 3.0, 5.0, 7.0, 5.0, 8.0, 12.0, 13.0, 17.0, 27.0, 31.0, 64.0, 96.0, 153.0, 309.0, 565.0, 1699.0, 9291.0, 275303.0, 734775.0, 21884.0, 2618.0, 855.0, 336.0, 162.0, 108.0, 72.0, 43.0, 28.0, 25.0, 20.0, 8.0, 6.0, 7.0, 4.0, 1.0, 2.0, 0.0, 1.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.388671875, -3.263885498046875, -3.13909912109375, -3.014312744140625, -2.8895263671875, -2.764739990234375, -2.63995361328125, -2.515167236328125, -2.390380859375, -2.265594482421875, -2.14080810546875, -2.016021728515625, -1.8912353515625, -1.766448974609375, -1.64166259765625, -1.516876220703125, -1.39208984375, -1.267303466796875, -1.14251708984375, -1.017730712890625, -0.8929443359375, -0.768157958984375, -0.64337158203125, -0.518585205078125, -0.393798828125, -0.269012451171875, -0.14422607421875, -0.019439697265625, 0.1053466796875, 0.230133056640625, 0.35491943359375, 0.479705810546875, 0.6044921875, 0.729278564453125, 0.85406494140625, 0.978851318359375, 1.1036376953125, 1.228424072265625, 1.35321044921875, 1.477996826171875, 1.602783203125, 1.727569580078125, 1.85235595703125, 1.977142333984375, 2.1019287109375, 2.226715087890625, 2.35150146484375, 2.476287841796875, 2.60107421875, 2.725860595703125, 2.85064697265625, 2.975433349609375, 3.1002197265625, 3.225006103515625, 3.34979248046875, 3.474578857421875, 3.599365234375, 3.724151611328125, 3.84893798828125, 3.973724365234375, 4.0985107421875, 4.223297119140625, 4.34808349609375, 4.472869873046875, 4.59765625]}, "gradients/encoder.encoder.layers.9.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 5.0, 3.0, 5.0, 6.0, 8.0, 9.0, 13.0, 8.0, 11.0, 16.0, 9.0, 19.0, 34.0, 27.0, 36.0, 31.0, 43.0, 43.0, 37.0, 48.0, 49.0, 38.0, 44.0, 48.0, 41.0, 50.0, 42.0, 49.0, 33.0, 32.0, 21.0, 34.0, 31.0, 13.0, 16.0, 12.0, 14.0, 7.0, 10.0, 8.0, 4.0, 2.0, 3.0, 3.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0], "bins": [-3.298828125, -3.19293212890625, -3.0870361328125, -2.98114013671875, -2.875244140625, -2.76934814453125, -2.6634521484375, -2.55755615234375, -2.45166015625, -2.34576416015625, -2.2398681640625, -2.13397216796875, -2.028076171875, -1.92218017578125, -1.8162841796875, -1.71038818359375, -1.6044921875, -1.49859619140625, -1.3927001953125, -1.28680419921875, -1.180908203125, -1.07501220703125, -0.9691162109375, -0.86322021484375, -0.75732421875, -0.65142822265625, -0.5455322265625, -0.43963623046875, -0.333740234375, -0.22784423828125, -0.1219482421875, -0.01605224609375, 0.08984375, 0.19573974609375, 0.3016357421875, 0.40753173828125, 0.513427734375, 0.61932373046875, 0.7252197265625, 0.83111572265625, 0.93701171875, 1.04290771484375, 1.1488037109375, 1.25469970703125, 1.360595703125, 1.46649169921875, 1.5723876953125, 1.67828369140625, 1.7841796875, 1.89007568359375, 1.9959716796875, 2.10186767578125, 2.207763671875, 2.31365966796875, 2.4195556640625, 2.52545166015625, 2.63134765625, 2.73724365234375, 2.8431396484375, 2.94903564453125, 3.054931640625, 3.16082763671875, 3.2667236328125, 3.37261962890625, 3.478515625]}, "gradients/encoder.encoder.layers.9.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 1.0, 3.0, 2.0, 1.0, 5.0, 8.0, 13.0, 18.0, 31.0, 54.0, 74.0, 131.0, 278.0, 766.0, 2824.0, 27913.0, 923585.0, 86576.0, 4461.0, 1006.0, 385.0, 190.0, 99.0, 56.0, 24.0, 13.0, 8.0, 7.0, 6.0, 7.0, 2.0, 4.0, 4.0, 3.0, 3.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.138671875, -2.071075439453125, -2.00347900390625, -1.935882568359375, -1.8682861328125, -1.800689697265625, -1.73309326171875, -1.665496826171875, -1.597900390625, -1.530303955078125, -1.46270751953125, -1.395111083984375, -1.3275146484375, -1.259918212890625, -1.19232177734375, -1.124725341796875, -1.05712890625, -0.989532470703125, -0.92193603515625, -0.854339599609375, -0.7867431640625, -0.719146728515625, -0.65155029296875, -0.583953857421875, -0.516357421875, -0.448760986328125, -0.38116455078125, -0.313568115234375, -0.2459716796875, -0.178375244140625, -0.11077880859375, -0.043182373046875, 0.0244140625, 0.092010498046875, 0.15960693359375, 0.227203369140625, 0.2947998046875, 0.362396240234375, 0.42999267578125, 0.497589111328125, 0.565185546875, 0.632781982421875, 0.70037841796875, 0.767974853515625, 0.8355712890625, 0.903167724609375, 0.97076416015625, 1.038360595703125, 1.10595703125, 1.173553466796875, 1.24114990234375, 1.308746337890625, 1.3763427734375, 1.443939208984375, 1.51153564453125, 1.579132080078125, 1.646728515625, 1.714324951171875, 1.78192138671875, 1.849517822265625, 1.9171142578125, 1.984710693359375, 2.05230712890625, 2.119903564453125, 2.1875]}, "gradients/encoder.encoder.layers.9.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 4.0, 1.0, 8.0, 10.0, 15.0, 20.0, 40.0, 75.0, 110.0, 159.0, 159.0, 149.0, 91.0, 78.0, 43.0, 24.0, 11.0, 8.0, 4.0, 4.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0002090930938720703, -0.00020320340991020203, -0.00019731372594833374, -0.00019142404198646545, -0.00018553435802459717, -0.00017964467406272888, -0.0001737549901008606, -0.0001678653061389923, -0.00016197562217712402, -0.00015608593821525574, -0.00015019625425338745, -0.00014430657029151917, -0.00013841688632965088, -0.0001325272023677826, -0.0001266375184059143, -0.00012074783444404602, -0.00011485815048217773, -0.00010896846652030945, -0.00010307878255844116, -9.718909859657288e-05, -9.129941463470459e-05, -8.54097306728363e-05, -7.952004671096802e-05, -7.363036274909973e-05, -6.774067878723145e-05, -6.185099482536316e-05, -5.596131086349487e-05, -5.007162690162659e-05, -4.41819429397583e-05, -3.8292258977890015e-05, -3.240257501602173e-05, -2.6512891054153442e-05, -2.0623207092285156e-05, -1.473352313041687e-05, -8.843839168548584e-06, -2.954155206680298e-06, 2.9355287551879883e-06, 8.825212717056274e-06, 1.471489667892456e-05, 2.0604580640792847e-05, 2.6494264602661133e-05, 3.238394856452942e-05, 3.8273632526397705e-05, 4.416331648826599e-05, 5.005300045013428e-05, 5.5942684412002563e-05, 6.183236837387085e-05, 6.772205233573914e-05, 7.361173629760742e-05, 7.950142025947571e-05, 8.5391104221344e-05, 9.128078818321228e-05, 9.717047214508057e-05, 0.00010306015610694885, 0.00010894984006881714, 0.00011483952403068542, 0.00012072920799255371, 0.000126618891954422, 0.00013250857591629028, 0.00013839825987815857, 0.00014428794384002686, 0.00015017762780189514, 0.00015606731176376343, 0.00016195699572563171, 0.0001678466796875]}, "gradients/encoder.encoder.layers.9.attention.q_proj.weight": {"_type": "histogram", "values": [2.0, 3.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 3.0, 2.0, 1.0, 5.0, 3.0, 4.0, 4.0, 5.0, 14.0, 24.0, 31.0, 54.0, 96.0, 136.0, 295.0, 629.0, 1752.0, 5947.0, 51530.0, 805263.0, 167308.0, 11323.0, 2428.0, 860.0, 396.0, 183.0, 95.0, 55.0, 39.0, 16.0, 17.0, 10.0, 9.0, 4.0, 4.0, 8.0, 2.0, 1.0, 2.0, 0.0, 2.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.3291015625, -1.2827606201171875, -1.236419677734375, -1.1900787353515625, -1.14373779296875, -1.0973968505859375, -1.051055908203125, -1.0047149658203125, -0.9583740234375, -0.9120330810546875, -0.865692138671875, -0.8193511962890625, -0.77301025390625, -0.7266693115234375, -0.680328369140625, -0.6339874267578125, -0.587646484375, -0.5413055419921875, -0.494964599609375, -0.4486236572265625, -0.40228271484375, -0.3559417724609375, -0.309600830078125, -0.2632598876953125, -0.2169189453125, -0.1705780029296875, -0.124237060546875, -0.0778961181640625, -0.03155517578125, 0.0147857666015625, 0.061126708984375, 0.1074676513671875, 0.15380859375, 0.2001495361328125, 0.246490478515625, 0.2928314208984375, 0.33917236328125, 0.3855133056640625, 0.431854248046875, 0.4781951904296875, 0.5245361328125, 0.5708770751953125, 0.617218017578125, 0.6635589599609375, 0.70989990234375, 0.7562408447265625, 0.802581787109375, 0.8489227294921875, 0.895263671875, 0.9416046142578125, 0.987945556640625, 1.0342864990234375, 1.08062744140625, 1.1269683837890625, 1.173309326171875, 1.2196502685546875, 1.2659912109375, 1.3123321533203125, 1.358673095703125, 1.4050140380859375, 1.45135498046875, 1.4976959228515625, 1.544036865234375, 1.5903778076171875, 1.63671875]}, "gradients/encoder.encoder.layers.9.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 3.0, 4.0, 4.0, 4.0, 6.0, 13.0, 18.0, 20.0, 30.0, 85.0, 77.0, 115.0, 123.0, 140.0, 115.0, 91.0, 55.0, 33.0, 29.0, 21.0, 9.0, 6.0, 0.0, 7.0, 0.0, 2.0, 3.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.6328125, -1.584808349609375, -1.53680419921875, -1.488800048828125, -1.4407958984375, -1.392791748046875, -1.34478759765625, -1.296783447265625, -1.248779296875, -1.200775146484375, -1.15277099609375, -1.104766845703125, -1.0567626953125, -1.008758544921875, -0.96075439453125, -0.912750244140625, -0.86474609375, -0.816741943359375, -0.76873779296875, -0.720733642578125, -0.6727294921875, -0.624725341796875, -0.57672119140625, -0.528717041015625, -0.480712890625, -0.432708740234375, -0.38470458984375, -0.336700439453125, -0.2886962890625, -0.240692138671875, -0.19268798828125, -0.144683837890625, -0.0966796875, -0.048675537109375, -0.00067138671875, 0.047332763671875, 0.0953369140625, 0.143341064453125, 0.19134521484375, 0.239349365234375, 0.287353515625, 0.335357666015625, 0.38336181640625, 0.431365966796875, 0.4793701171875, 0.527374267578125, 0.57537841796875, 0.623382568359375, 0.67138671875, 0.719390869140625, 0.76739501953125, 0.815399169921875, 0.8634033203125, 0.911407470703125, 0.95941162109375, 1.007415771484375, 1.055419921875, 1.103424072265625, 1.15142822265625, 1.199432373046875, 1.2474365234375, 1.295440673828125, 1.34344482421875, 1.391448974609375, 1.439453125]}, "gradients/encoder.encoder.layers.9.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 2.0, 0.0, 3.0, 12.0, 77.0, 519.0, 365.0, 32.0, 6.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-22.524555206298828, -20.250925064086914, -17.977294921875, -15.703664779663086, -13.430034637451172, -11.156404495239258, -8.882774353027344, -6.60914421081543, -4.335514068603516, -2.0618839263916016, 0.2117462158203125, 2.4853763580322266, 4.759006500244141, 7.032636642456055, 9.306266784667969, 11.579896926879883, 13.853527069091797, 16.12715721130371, 18.400787353515625, 20.67441749572754, 22.948047637939453, 25.221677780151367, 27.49530792236328, 29.768938064575195, 32.04256820678711, 34.316200256347656, 36.58982849121094, 38.86345672607422, 41.137088775634766, 43.41072082519531, 45.684349060058594, 47.957977294921875, 50.23161315917969, 52.50524139404297, 54.778873443603516, 57.05250549316406, 59.326133728027344, 61.599761962890625, 63.87339401245117, 66.14702606201172, 68.420654296875, 70.69428253173828, 72.96791076660156, 75.24154663085938, 77.51517486572266, 79.78880310058594, 82.06243896484375, 84.33606719970703, 86.60969543457031, 88.8833236694336, 91.15695190429688, 93.43058776855469, 95.70421600341797, 97.97784423828125, 100.25148010253906, 102.52510833740234, 104.79873657226562, 107.0723648071289, 109.34599304199219, 111.61962890625, 113.89325714111328, 116.16688537597656, 118.44052124023438, 120.71414947509766, 122.98777770996094]}, "gradients/encoder.encoder.layers.9.layer_norm.bias": {"_type": "histogram", "values": [3.0, 1.0, 0.0, 1.0, 2.0, 0.0, 1.0, 3.0, 3.0, 1.0, 7.0, 7.0, 10.0, 22.0, 26.0, 16.0, 21.0, 21.0, 42.0, 54.0, 51.0, 61.0, 55.0, 69.0, 56.0, 62.0, 65.0, 63.0, 41.0, 39.0, 36.0, 31.0, 30.0, 31.0, 16.0, 11.0, 23.0, 8.0, 11.0, 5.0, 4.0, 5.0, 3.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-13.545770645141602, -12.991360664367676, -12.43695068359375, -11.882540702819824, -11.328130722045898, -10.773721694946289, -10.219311714172363, -9.664901733398438, -9.110491752624512, -8.556081771850586, -8.00167179107666, -7.447262287139893, -6.892852306365967, -6.338442325592041, -5.784032821655273, -5.229622840881348, -4.675212860107422, -4.120802879333496, -3.5663931369781494, -3.0119833946228027, -2.457573413848877, -1.9031634330749512, -1.3487536907196045, -0.7943439483642578, -0.23993396759033203, 0.3144758939743042, 0.8688857555389404, 1.4232956171035767, 1.977705478668213, 2.5321154594421387, 3.0865252017974854, 3.640934944152832, 4.195343017578125, 4.749752998352051, 5.304162979125977, 5.858572483062744, 6.41298246383667, 6.967392444610596, 7.521801948547363, 8.076211929321289, 8.630621910095215, 9.18503189086914, 9.739441871643066, 10.293851852416992, 10.848260879516602, 11.402671813964844, 11.957080841064453, 12.511490821838379, 13.065900802612305, 13.62031078338623, 14.174720764160156, 14.729130744934082, 15.283540725708008, 15.837949752807617, 16.39236068725586, 16.94676971435547, 17.501178741455078, 18.055587768554688, 18.60999870300293, 19.16440773010254, 19.71881866455078, 20.27322769165039, 20.827638626098633, 21.382047653198242, 21.936458587646484]}, "gradients/encoder.encoder.layers.8.feed_forward.output_dense.weight": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 4.0, 2.0, 5.0, 4.0, 8.0, 7.0, 10.0, 17.0, 18.0, 27.0, 39.0, 68.0, 105.0, 166.0, 305.0, 449.0, 896.0, 1721.0, 4053.0, 12532.0, 62663.0, 3610412.0, 448723.0, 36775.0, 8850.0, 3195.0, 1476.0, 737.0, 417.0, 210.0, 131.0, 84.0, 55.0, 40.0, 27.0, 27.0, 7.0, 7.0, 5.0, 4.0, 3.0, 0.0, 3.0, 1.0, 2.0, 1.0, 4.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.69921875, -1.6441497802734375, -1.589080810546875, -1.5340118408203125, -1.47894287109375, -1.4238739013671875, -1.368804931640625, -1.3137359619140625, -1.2586669921875, -1.2035980224609375, -1.148529052734375, -1.0934600830078125, -1.03839111328125, -0.9833221435546875, -0.928253173828125, -0.8731842041015625, -0.818115234375, -0.7630462646484375, -0.707977294921875, -0.6529083251953125, -0.59783935546875, -0.5427703857421875, -0.487701416015625, -0.4326324462890625, -0.3775634765625, -0.3224945068359375, -0.267425537109375, -0.2123565673828125, -0.15728759765625, -0.1022186279296875, -0.047149658203125, 0.0079193115234375, 0.06298828125, 0.1180572509765625, 0.173126220703125, 0.2281951904296875, 0.28326416015625, 0.3383331298828125, 0.393402099609375, 0.4484710693359375, 0.5035400390625, 0.5586090087890625, 0.613677978515625, 0.6687469482421875, 0.72381591796875, 0.7788848876953125, 0.833953857421875, 0.8890228271484375, 0.944091796875, 0.9991607666015625, 1.054229736328125, 1.1092987060546875, 1.16436767578125, 1.2194366455078125, 1.274505615234375, 1.3295745849609375, 1.3846435546875, 1.4397125244140625, 1.494781494140625, 1.5498504638671875, 1.60491943359375, 1.6599884033203125, 1.715057373046875, 1.7701263427734375, 1.8251953125]}, "gradients/encoder.encoder.layers.8.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 3.0, 1.0, 1.0, 4.0, 6.0, 6.0, 9.0, 7.0, 14.0, 17.0, 24.0, 29.0, 40.0, 35.0, 73.0, 59.0, 84.0, 86.0, 72.0, 74.0, 73.0, 63.0, 53.0, 28.0, 42.0, 21.0, 26.0, 18.0, 13.0, 12.0, 5.0, 4.0, 3.0, 3.0, 3.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.037109375, -1.003997802734375, -0.97088623046875, -0.937774658203125, -0.9046630859375, -0.871551513671875, -0.83843994140625, -0.805328369140625, -0.772216796875, -0.739105224609375, -0.70599365234375, -0.672882080078125, -0.6397705078125, -0.606658935546875, -0.57354736328125, -0.540435791015625, -0.50732421875, -0.474212646484375, -0.44110107421875, -0.407989501953125, -0.3748779296875, -0.341766357421875, -0.30865478515625, -0.275543212890625, -0.242431640625, -0.209320068359375, -0.17620849609375, -0.143096923828125, -0.1099853515625, -0.076873779296875, -0.04376220703125, -0.010650634765625, 0.0224609375, 0.055572509765625, 0.08868408203125, 0.121795654296875, 0.1549072265625, 0.188018798828125, 0.22113037109375, 0.254241943359375, 0.287353515625, 0.320465087890625, 0.35357666015625, 0.386688232421875, 0.4197998046875, 0.452911376953125, 0.48602294921875, 0.519134521484375, 0.55224609375, 0.585357666015625, 0.61846923828125, 0.651580810546875, 0.6846923828125, 0.717803955078125, 0.75091552734375, 0.784027099609375, 0.817138671875, 0.850250244140625, 0.88336181640625, 0.916473388671875, 0.9495849609375, 0.982696533203125, 1.01580810546875, 1.048919677734375, 1.08203125]}, "gradients/encoder.encoder.layers.8.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 3.0, 3.0, 1.0, 7.0, 8.0, 9.0, 12.0, 26.0, 31.0, 55.0, 87.0, 153.0, 297.0, 458.0, 1049.0, 1903.0, 4323.0, 10937.0, 35063.0, 184502.0, 3572591.0, 310045.0, 48790.0, 13958.0, 5170.0, 2372.0, 1101.0, 574.0, 298.0, 197.0, 104.0, 63.0, 36.0, 25.0, 12.0, 7.0, 12.0, 6.0, 1.0, 2.0, 4.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.130859375, -1.0939178466796875, -1.056976318359375, -1.0200347900390625, -0.98309326171875, -0.9461517333984375, -0.909210205078125, -0.8722686767578125, -0.8353271484375, -0.7983856201171875, -0.761444091796875, -0.7245025634765625, -0.68756103515625, -0.6506195068359375, -0.613677978515625, -0.5767364501953125, -0.539794921875, -0.5028533935546875, -0.465911865234375, -0.4289703369140625, -0.39202880859375, -0.3550872802734375, -0.318145751953125, -0.2812042236328125, -0.2442626953125, -0.2073211669921875, -0.170379638671875, -0.1334381103515625, -0.09649658203125, -0.0595550537109375, -0.022613525390625, 0.0143280029296875, 0.05126953125, 0.0882110595703125, 0.125152587890625, 0.1620941162109375, 0.19903564453125, 0.2359771728515625, 0.272918701171875, 0.3098602294921875, 0.3468017578125, 0.3837432861328125, 0.420684814453125, 0.4576263427734375, 0.49456787109375, 0.5315093994140625, 0.568450927734375, 0.6053924560546875, 0.642333984375, 0.6792755126953125, 0.716217041015625, 0.7531585693359375, 0.79010009765625, 0.8270416259765625, 0.863983154296875, 0.9009246826171875, 0.9378662109375, 0.9748077392578125, 1.011749267578125, 1.0486907958984375, 1.08563232421875, 1.1225738525390625, 1.159515380859375, 1.1964569091796875, 1.2333984375]}, "gradients/encoder.encoder.layers.8.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 5.0, 1.0, 2.0, 3.0, 5.0, 6.0, 6.0, 10.0, 11.0, 16.0, 30.0, 38.0, 54.0, 86.0, 148.0, 288.0, 1735.0, 955.0, 278.0, 132.0, 99.0, 49.0, 35.0, 27.0, 25.0, 7.0, 6.0, 8.0, 5.0, 6.0, 5.0, 2.0, 0.0, 2.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.40380859375, -0.3862152099609375, -0.368621826171875, -0.3510284423828125, -0.33343505859375, -0.3158416748046875, -0.298248291015625, -0.2806549072265625, -0.2630615234375, -0.2454681396484375, -0.227874755859375, -0.2102813720703125, -0.19268798828125, -0.1750946044921875, -0.157501220703125, -0.1399078369140625, -0.122314453125, -0.1047210693359375, -0.087127685546875, -0.0695343017578125, -0.05194091796875, -0.0343475341796875, -0.016754150390625, 0.0008392333984375, 0.0184326171875, 0.0360260009765625, 0.053619384765625, 0.0712127685546875, 0.08880615234375, 0.1063995361328125, 0.123992919921875, 0.1415863037109375, 0.1591796875, 0.1767730712890625, 0.194366455078125, 0.2119598388671875, 0.22955322265625, 0.2471466064453125, 0.264739990234375, 0.2823333740234375, 0.2999267578125, 0.3175201416015625, 0.335113525390625, 0.3527069091796875, 0.37030029296875, 0.3878936767578125, 0.405487060546875, 0.4230804443359375, 0.440673828125, 0.4582672119140625, 0.475860595703125, 0.4934539794921875, 0.51104736328125, 0.5286407470703125, 0.546234130859375, 0.5638275146484375, 0.5814208984375, 0.5990142822265625, 0.616607666015625, 0.6342010498046875, 0.65179443359375, 0.6693878173828125, 0.686981201171875, 0.7045745849609375, 0.72216796875]}, "gradients/encoder.encoder.layers.8.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0, 2.0, 6.0, 2.0, 7.0, 11.0, 16.0, 33.0, 51.0, 76.0, 95.0, 127.0, 138.0, 118.0, 97.0, 81.0, 55.0, 31.0, 27.0, 13.0, 12.0, 3.0, 2.0, 3.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.822126865386963, -2.7299299240112305, -2.637733221054077, -2.545536518096924, -2.4533395767211914, -2.361142635345459, -2.2689459323883057, -2.1767492294311523, -2.08455228805542, -1.992355465888977, -1.9001586437225342, -1.8079618215560913, -1.7157649993896484, -1.6235681772232056, -1.5313713550567627, -1.4391745328903198, -1.346977710723877, -1.254780888557434, -1.1625840663909912, -1.0703872442245483, -0.9781904220581055, -0.8859935998916626, -0.7937967777252197, -0.7015999555587769, -0.609403133392334, -0.5172063112258911, -0.42500948905944824, -0.33281266689300537, -0.2406158447265625, -0.14841902256011963, -0.05622220039367676, 0.03597462177276611, 0.12817144393920898, 0.22036826610565186, 0.3125650882720947, 0.4047619104385376, 0.49695873260498047, 0.5891555547714233, 0.6813523769378662, 0.7735491991043091, 0.865746021270752, 0.9579428434371948, 1.0501396656036377, 1.1423364877700806, 1.2345333099365234, 1.3267301321029663, 1.4189269542694092, 1.511123776435852, 1.603320598602295, 1.6955174207687378, 1.7877142429351807, 1.8799110651016235, 1.9721078872680664, 2.064304828643799, 2.156501531600952, 2.2486982345581055, 2.340895175933838, 2.4330921173095703, 2.5252888202667236, 2.617485523223877, 2.7096824645996094, 2.801879405975342, 2.894076108932495, 2.9862728118896484, 3.078469753265381]}, "gradients/encoder.encoder.layers.8.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 1.0, 4.0, 1.0, 6.0, 4.0, 5.0, 5.0, 6.0, 18.0, 13.0, 15.0, 20.0, 20.0, 32.0, 38.0, 32.0, 39.0, 49.0, 48.0, 45.0, 55.0, 57.0, 49.0, 54.0, 58.0, 36.0, 47.0, 44.0, 41.0, 30.0, 27.0, 18.0, 11.0, 16.0, 15.0, 11.0, 10.0, 6.0, 12.0, 5.0, 1.0, 3.0, 2.0, 1.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0], "bins": [-2.2364330291748047, -2.172072410583496, -2.1077117919921875, -2.043351411819458, -1.9789907932281494, -1.9146301746368408, -1.8502695560455322, -1.7859089374542236, -1.7215484380722046, -1.657187819480896, -1.592827320098877, -1.5284667015075684, -1.4641060829162598, -1.3997455835342407, -1.3353849649429321, -1.271024465560913, -1.2066638469696045, -1.142303228378296, -1.0779427289962769, -1.0135821104049683, -0.9492215514183044, -0.8848609924316406, -0.820500373840332, -0.7561398148536682, -0.6917792558670044, -0.6274186968803406, -0.5630581378936768, -0.49869751930236816, -0.43433696031570435, -0.3699764013290405, -0.3056158125400543, -0.24125522375106812, -0.1768946647644043, -0.11253409087657928, -0.04817351698875427, 0.01618705689907074, 0.08054763078689575, 0.14490818977355957, 0.20926877856254578, 0.273629367351532, 0.3379899263381958, 0.4023504853248596, 0.4667110741138458, 0.531071662902832, 0.5954322218894958, 0.6597927808761597, 0.7241533994674683, 0.7885139584541321, 0.8528745174407959, 0.9172350764274597, 0.9815956354141235, 1.0459562540054321, 1.1103167533874512, 1.1746773719787598, 1.2390379905700684, 1.303398609161377, 1.367759108543396, 1.4321197271347046, 1.4964802265167236, 1.5608408451080322, 1.6252014636993408, 1.6895619630813599, 1.7539225816726685, 1.8182830810546875, 1.882643699645996]}, "gradients/encoder.encoder.layers.8.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 2.0, 2.0, 0.0, 1.0, 1.0, 5.0, 4.0, 4.0, 12.0, 16.0, 16.0, 42.0, 84.0, 172.0, 471.0, 1796.0, 11020.0, 159562.0, 792191.0, 74647.0, 6623.0, 1241.0, 348.0, 156.0, 56.0, 38.0, 16.0, 11.0, 8.0, 6.0, 2.0, 4.0, 4.0, 1.0, 3.0, 0.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-2.943359375, -2.835845947265625, -2.72833251953125, -2.620819091796875, -2.5133056640625, -2.405792236328125, -2.29827880859375, -2.190765380859375, -2.083251953125, -1.975738525390625, -1.86822509765625, -1.760711669921875, -1.6531982421875, -1.545684814453125, -1.43817138671875, -1.330657958984375, -1.22314453125, -1.115631103515625, -1.00811767578125, -0.900604248046875, -0.7930908203125, -0.685577392578125, -0.57806396484375, -0.470550537109375, -0.363037109375, -0.255523681640625, -0.14801025390625, -0.040496826171875, 0.0670166015625, 0.174530029296875, 0.28204345703125, 0.389556884765625, 0.4970703125, 0.604583740234375, 0.71209716796875, 0.819610595703125, 0.9271240234375, 1.034637451171875, 1.14215087890625, 1.249664306640625, 1.357177734375, 1.464691162109375, 1.57220458984375, 1.679718017578125, 1.7872314453125, 1.894744873046875, 2.00225830078125, 2.109771728515625, 2.21728515625, 2.324798583984375, 2.43231201171875, 2.539825439453125, 2.6473388671875, 2.754852294921875, 2.86236572265625, 2.969879150390625, 3.077392578125, 3.184906005859375, 3.29241943359375, 3.399932861328125, 3.5074462890625, 3.614959716796875, 3.72247314453125, 3.829986572265625, 3.9375]}, "gradients/encoder.encoder.layers.8.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 2.0, 7.0, 10.0, 3.0, 12.0, 6.0, 13.0, 13.0, 22.0, 26.0, 26.0, 34.0, 38.0, 51.0, 58.0, 67.0, 68.0, 72.0, 61.0, 67.0, 59.0, 66.0, 36.0, 30.0, 26.0, 28.0, 33.0, 16.0, 18.0, 13.0, 9.0, 5.0, 7.0, 1.0, 3.0, 1.0, 1.0, 2.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0], "bins": [-0.966796875, -0.9382095336914062, -0.9096221923828125, -0.8810348510742188, -0.852447509765625, -0.8238601684570312, -0.7952728271484375, -0.7666854858398438, -0.73809814453125, -0.7095108032226562, -0.6809234619140625, -0.6523361206054688, -0.623748779296875, -0.5951614379882812, -0.5665740966796875, -0.5379867553710938, -0.5093994140625, -0.48081207275390625, -0.4522247314453125, -0.42363739013671875, -0.395050048828125, -0.36646270751953125, -0.3378753662109375, -0.30928802490234375, -0.28070068359375, -0.25211334228515625, -0.2235260009765625, -0.19493865966796875, -0.166351318359375, -0.13776397705078125, -0.1091766357421875, -0.08058929443359375, -0.052001953125, -0.02341461181640625, 0.0051727294921875, 0.03376007080078125, 0.062347412109375, 0.09093475341796875, 0.1195220947265625, 0.14810943603515625, 0.17669677734375, 0.20528411865234375, 0.2338714599609375, 0.26245880126953125, 0.291046142578125, 0.31963348388671875, 0.3482208251953125, 0.37680816650390625, 0.4053955078125, 0.43398284912109375, 0.4625701904296875, 0.49115753173828125, 0.519744873046875, 0.5483322143554688, 0.5769195556640625, 0.6055068969726562, 0.63409423828125, 0.6626815795898438, 0.6912689208984375, 0.7198562622070312, 0.748443603515625, 0.7770309448242188, 0.8056182861328125, 0.8342056274414062, 0.86279296875]}, "gradients/encoder.encoder.layers.8.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 0.0, 3.0, 1.0, 4.0, 5.0, 2.0, 4.0, 6.0, 10.0, 10.0, 21.0, 30.0, 41.0, 52.0, 127.0, 180.0, 417.0, 861.0, 2532.0, 14022.0, 321077.0, 677931.0, 25437.0, 3575.0, 1166.0, 484.0, 214.0, 123.0, 80.0, 66.0, 28.0, 17.0, 10.0, 9.0, 7.0, 2.0, 2.0, 4.0, 2.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 2.0], "bins": [-3.703125, -3.5946044921875, -3.486083984375, -3.3775634765625, -3.26904296875, -3.1605224609375, -3.052001953125, -2.9434814453125, -2.8349609375, -2.7264404296875, -2.617919921875, -2.5093994140625, -2.40087890625, -2.2923583984375, -2.183837890625, -2.0753173828125, -1.966796875, -1.8582763671875, -1.749755859375, -1.6412353515625, -1.53271484375, -1.4241943359375, -1.315673828125, -1.2071533203125, -1.0986328125, -0.9901123046875, -0.881591796875, -0.7730712890625, -0.66455078125, -0.5560302734375, -0.447509765625, -0.3389892578125, -0.23046875, -0.1219482421875, -0.013427734375, 0.0950927734375, 0.20361328125, 0.3121337890625, 0.420654296875, 0.5291748046875, 0.6376953125, 0.7462158203125, 0.854736328125, 0.9632568359375, 1.07177734375, 1.1802978515625, 1.288818359375, 1.3973388671875, 1.505859375, 1.6143798828125, 1.722900390625, 1.8314208984375, 1.93994140625, 2.0484619140625, 2.156982421875, 2.2655029296875, 2.3740234375, 2.4825439453125, 2.591064453125, 2.6995849609375, 2.80810546875, 2.9166259765625, 3.025146484375, 3.1336669921875, 3.2421875]}, "gradients/encoder.encoder.layers.8.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 3.0, 3.0, 4.0, 3.0, 6.0, 6.0, 8.0, 10.0, 11.0, 14.0, 16.0, 15.0, 23.0, 24.0, 13.0, 27.0, 36.0, 42.0, 34.0, 38.0, 39.0, 41.0, 44.0, 40.0, 47.0, 37.0, 38.0, 46.0, 46.0, 31.0, 38.0, 30.0, 25.0, 21.0, 29.0, 25.0, 17.0, 12.0, 13.0, 8.0, 6.0, 6.0, 9.0, 6.0, 6.0, 5.0, 1.0, 3.0, 2.0, 3.0, 4.0, 0.0, 0.0, 1.0, 1.0], "bins": [-2.7265625, -2.6446533203125, -2.562744140625, -2.4808349609375, -2.39892578125, -2.3170166015625, -2.235107421875, -2.1531982421875, -2.0712890625, -1.9893798828125, -1.907470703125, -1.8255615234375, -1.74365234375, -1.6617431640625, -1.579833984375, -1.4979248046875, -1.416015625, -1.3341064453125, -1.252197265625, -1.1702880859375, -1.08837890625, -1.0064697265625, -0.924560546875, -0.8426513671875, -0.7607421875, -0.6788330078125, -0.596923828125, -0.5150146484375, -0.43310546875, -0.3511962890625, -0.269287109375, -0.1873779296875, -0.10546875, -0.0235595703125, 0.058349609375, 0.1402587890625, 0.22216796875, 0.3040771484375, 0.385986328125, 0.4678955078125, 0.5498046875, 0.6317138671875, 0.713623046875, 0.7955322265625, 0.87744140625, 0.9593505859375, 1.041259765625, 1.1231689453125, 1.205078125, 1.2869873046875, 1.368896484375, 1.4508056640625, 1.53271484375, 1.6146240234375, 1.696533203125, 1.7784423828125, 1.8603515625, 1.9422607421875, 2.024169921875, 2.1060791015625, 2.18798828125, 2.2698974609375, 2.351806640625, 2.4337158203125, 2.515625]}, "gradients/encoder.encoder.layers.8.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 0.0, 1.0, 0.0, 2.0, 5.0, 2.0, 3.0, 7.0, 13.0, 23.0, 31.0, 101.0, 205.0, 572.0, 2166.0, 19961.0, 802985.0, 213734.0, 6881.0, 1207.0, 345.0, 150.0, 88.0, 38.0, 19.0, 10.0, 5.0, 5.0, 7.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.525390625, -1.4870681762695312, -1.4487457275390625, -1.4104232788085938, -1.372100830078125, -1.3337783813476562, -1.2954559326171875, -1.2571334838867188, -1.21881103515625, -1.1804885864257812, -1.1421661376953125, -1.1038436889648438, -1.065521240234375, -1.0271987915039062, -0.9888763427734375, -0.9505538940429688, -0.9122314453125, -0.8739089965820312, -0.8355865478515625, -0.7972640991210938, -0.758941650390625, -0.7206192016601562, -0.6822967529296875, -0.6439743041992188, -0.60565185546875, -0.5673294067382812, -0.5290069580078125, -0.49068450927734375, -0.452362060546875, -0.41403961181640625, -0.3757171630859375, -0.33739471435546875, -0.299072265625, -0.26074981689453125, -0.2224273681640625, -0.18410491943359375, -0.145782470703125, -0.10746002197265625, -0.0691375732421875, -0.03081512451171875, 0.00750732421875, 0.04582977294921875, 0.0841522216796875, 0.12247467041015625, 0.160797119140625, 0.19911956787109375, 0.2374420166015625, 0.27576446533203125, 0.3140869140625, 0.35240936279296875, 0.3907318115234375, 0.42905426025390625, 0.467376708984375, 0.5056991577148438, 0.5440216064453125, 0.5823440551757812, 0.62066650390625, 0.6589889526367188, 0.6973114013671875, 0.7356338500976562, 0.773956298828125, 0.8122787475585938, 0.8506011962890625, 0.8889236450195312, 0.92724609375]}, "gradients/encoder.encoder.layers.8.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 0.0, 2.0, 1.0, 7.0, 4.0, 6.0, 4.0, 15.0, 6.0, 9.0, 13.0, 24.0, 17.0, 25.0, 31.0, 35.0, 44.0, 46.0, 66.0, 80.0, 93.0, 77.0, 65.0, 65.0, 38.0, 52.0, 31.0, 34.0, 18.0, 28.0, 15.0, 11.0, 9.0, 9.0, 6.0, 5.0, 5.0, 5.0, 3.0, 2.0, 3.0, 4.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0], "bins": [-8.273124694824219e-05, -8.041225373744965e-05, -7.80932605266571e-05, -7.577426731586456e-05, -7.345527410507202e-05, -7.113628089427948e-05, -6.881728768348694e-05, -6.64982944726944e-05, -6.417930126190186e-05, -6.186030805110931e-05, -5.954131484031677e-05, -5.722232162952423e-05, -5.490332841873169e-05, -5.258433520793915e-05, -5.0265341997146606e-05, -4.7946348786354065e-05, -4.5627355575561523e-05, -4.330836236476898e-05, -4.098936915397644e-05, -3.86703759431839e-05, -3.635138273239136e-05, -3.4032389521598816e-05, -3.1713396310806274e-05, -2.9394403100013733e-05, -2.707540988922119e-05, -2.475641667842865e-05, -2.243742346763611e-05, -2.0118430256843567e-05, -1.7799437046051025e-05, -1.5480443835258484e-05, -1.3161450624465942e-05, -1.0842457413673401e-05, -8.52346420288086e-06, -6.204470992088318e-06, -3.885477781295776e-06, -1.5664845705032349e-06, 7.525086402893066e-07, 3.071501851081848e-06, 5.39049506187439e-06, 7.709488272666931e-06, 1.0028481483459473e-05, 1.2347474694252014e-05, 1.4666467905044556e-05, 1.6985461115837097e-05, 1.930445432662964e-05, 2.162344753742218e-05, 2.394244074821472e-05, 2.6261433959007263e-05, 2.8580427169799805e-05, 3.0899420380592346e-05, 3.321841359138489e-05, 3.553740680217743e-05, 3.785640001296997e-05, 4.017539322376251e-05, 4.2494386434555054e-05, 4.4813379645347595e-05, 4.713237285614014e-05, 4.945136606693268e-05, 5.177035927772522e-05, 5.408935248851776e-05, 5.64083456993103e-05, 5.8727338910102844e-05, 6.104633212089539e-05, 6.336532533168793e-05, 6.568431854248047e-05]}, "gradients/encoder.encoder.layers.8.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0, 2.0, 2.0, 3.0, 4.0, 6.0, 2.0, 9.0, 11.0, 15.0, 26.0, 31.0, 69.0, 106.0, 221.0, 488.0, 1252.0, 4332.0, 35015.0, 737976.0, 252342.0, 12548.0, 2540.0, 844.0, 352.0, 138.0, 82.0, 46.0, 39.0, 21.0, 13.0, 4.0, 11.0, 4.0, 3.0, 2.0, 1.0, 0.0, 2.0, 2.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.0791015625, -1.0489578247070312, -1.0188140869140625, -0.9886703491210938, -0.958526611328125, -0.9283828735351562, -0.8982391357421875, -0.8680953979492188, -0.83795166015625, -0.8078079223632812, -0.7776641845703125, -0.7475204467773438, -0.717376708984375, -0.6872329711914062, -0.6570892333984375, -0.6269454956054688, -0.5968017578125, -0.5666580200195312, -0.5365142822265625, -0.5063705444335938, -0.476226806640625, -0.44608306884765625, -0.4159393310546875, -0.38579559326171875, -0.35565185546875, -0.32550811767578125, -0.2953643798828125, -0.26522064208984375, -0.235076904296875, -0.20493316650390625, -0.1747894287109375, -0.14464569091796875, -0.114501953125, -0.08435821533203125, -0.0542144775390625, -0.02407073974609375, 0.006072998046875, 0.03621673583984375, 0.0663604736328125, 0.09650421142578125, 0.12664794921875, 0.15679168701171875, 0.1869354248046875, 0.21707916259765625, 0.247222900390625, 0.27736663818359375, 0.3075103759765625, 0.33765411376953125, 0.3677978515625, 0.39794158935546875, 0.4280853271484375, 0.45822906494140625, 0.488372802734375, 0.5185165405273438, 0.5486602783203125, 0.5788040161132812, 0.60894775390625, 0.6390914916992188, 0.6692352294921875, 0.6993789672851562, 0.729522705078125, 0.7596664428710938, 0.7898101806640625, 0.8199539184570312, 0.85009765625]}, "gradients/encoder.encoder.layers.8.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 2.0, 0.0, 1.0, 5.0, 3.0, 5.0, 9.0, 11.0, 4.0, 14.0, 21.0, 32.0, 39.0, 48.0, 47.0, 77.0, 108.0, 108.0, 102.0, 98.0, 66.0, 65.0, 46.0, 22.0, 20.0, 20.0, 12.0, 9.0, 6.0, 5.0, 5.0, 1.0, 2.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.705078125, -0.6812744140625, -0.657470703125, -0.6336669921875, -0.60986328125, -0.5860595703125, -0.562255859375, -0.5384521484375, -0.5146484375, -0.4908447265625, -0.467041015625, -0.4432373046875, -0.41943359375, -0.3956298828125, -0.371826171875, -0.3480224609375, -0.32421875, -0.3004150390625, -0.276611328125, -0.2528076171875, -0.22900390625, -0.2052001953125, -0.181396484375, -0.1575927734375, -0.1337890625, -0.1099853515625, -0.086181640625, -0.0623779296875, -0.03857421875, -0.0147705078125, 0.009033203125, 0.0328369140625, 0.056640625, 0.0804443359375, 0.104248046875, 0.1280517578125, 0.15185546875, 0.1756591796875, 0.199462890625, 0.2232666015625, 0.2470703125, 0.2708740234375, 0.294677734375, 0.3184814453125, 0.34228515625, 0.3660888671875, 0.389892578125, 0.4136962890625, 0.4375, 0.4613037109375, 0.485107421875, 0.5089111328125, 0.53271484375, 0.5565185546875, 0.580322265625, 0.6041259765625, 0.6279296875, 0.6517333984375, 0.675537109375, 0.6993408203125, 0.72314453125, 0.7469482421875, 0.770751953125, 0.7945556640625, 0.818359375]}, "gradients/encoder.encoder.layers.8.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 2.0, 1.0, 4.0, 8.0, 18.0, 29.0, 53.0, 103.0, 150.0, 204.0, 188.0, 119.0, 71.0, 26.0, 14.0, 10.0, 2.0, 4.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-16.43756866455078, -15.863180160522461, -15.28879165649414, -14.71440315246582, -14.1400146484375, -13.56562614440918, -12.991238594055176, -12.416850090026855, -11.842461585998535, -11.268073081970215, -10.693684577941895, -10.119296073913574, -9.54490852355957, -8.97052001953125, -8.39613151550293, -7.821743011474609, -7.247354507446289, -6.672966003417969, -6.098577499389648, -5.524189472198486, -4.949800968170166, -4.375412464141846, -3.8010241985321045, -3.2266359329223633, -2.652247428894043, -2.0778589248657227, -1.5034706592559814, -0.9290822744369507, -0.3546938896179199, 0.2196946144104004, 0.7940828800201416, 1.3684711456298828, 1.942861557006836, 2.5172500610351562, 3.0916383266448975, 3.6660265922546387, 4.240415096282959, 4.814803600311279, 5.389191627502441, 5.963580131530762, 6.537968635559082, 7.112357139587402, 7.686745643615723, 8.261134147644043, 8.835521697998047, 9.409910202026367, 9.984298706054688, 10.558687210083008, 11.133075714111328, 11.707464218139648, 12.281852722167969, 12.856241226196289, 13.43062973022461, 14.00501823425293, 14.579405784606934, 15.153794288635254, 15.728182792663574, 16.302570343017578, 16.8769588470459, 17.45134735107422, 18.02573585510254, 18.60012435913086, 19.17451286315918, 19.7489013671875, 20.32328987121582]}, "gradients/encoder.encoder.layers.8.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 3.0, 2.0, 4.0, 2.0, 3.0, 7.0, 10.0, 8.0, 15.0, 9.0, 20.0, 13.0, 23.0, 34.0, 25.0, 46.0, 32.0, 48.0, 45.0, 49.0, 59.0, 52.0, 44.0, 63.0, 54.0, 51.0, 41.0, 39.0, 42.0, 28.0, 28.0, 21.0, 9.0, 19.0, 14.0, 8.0, 10.0, 8.0, 3.0, 5.0, 4.0, 6.0, 3.0, 4.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-15.533100128173828, -15.084814071655273, -14.636527061462402, -14.188241004943848, -13.739953994750977, -13.291667938232422, -12.843381881713867, -12.395095825195312, -11.946808815002441, -11.498522758483887, -11.050235748291016, -10.601949691772461, -10.153663635253906, -9.705376625061035, -9.25709056854248, -8.80880355834961, -8.360517501831055, -7.912230968475342, -7.463944435119629, -7.015658378601074, -6.567371845245361, -6.119085311889648, -5.670799255371094, -5.222512722015381, -4.774226188659668, -4.325939655303955, -3.8776533603668213, -3.4293670654296875, -2.9810805320739746, -2.5327939987182617, -2.084507703781128, -1.6362214088439941, -1.1879348754882812, -0.7396484613418579, -0.29136204719543457, 0.15692436695098877, 0.6052107810974121, 1.053497314453125, 1.5017836093902588, 1.9500699043273926, 2.3983564376831055, 2.8466429710388184, 3.294929265975952, 3.743215560913086, 4.191502094268799, 4.639788627624512, 5.088074684143066, 5.536361217498779, 5.984647750854492, 6.432934284210205, 6.881220817565918, 7.329506874084473, 7.7777934074401855, 8.226079940795898, 8.674365997314453, 9.122652053833008, 9.570939064025879, 10.019225120544434, 10.467512130737305, 10.91579818725586, 11.364084243774414, 11.812371253967285, 12.26065731048584, 12.708944320678711, 13.157230377197266]}, "gradients/encoder.encoder.layers.7.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 2.0, 2.0, 5.0, 1.0, 0.0, 3.0, 8.0, 12.0, 8.0, 19.0, 28.0, 54.0, 84.0, 163.0, 468.0, 1732.0, 12918.0, 598551.0, 3540691.0, 35028.0, 3305.0, 715.0, 238.0, 100.0, 54.0, 35.0, 15.0, 17.0, 12.0, 4.0, 6.0, 3.0, 1.0, 3.0, 2.0, 3.0, 2.0, 0.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.3125, -2.233245849609375, -2.15399169921875, -2.074737548828125, -1.9954833984375, -1.916229248046875, -1.83697509765625, -1.757720947265625, -1.678466796875, -1.599212646484375, -1.51995849609375, -1.440704345703125, -1.3614501953125, -1.282196044921875, -1.20294189453125, -1.123687744140625, -1.04443359375, -0.965179443359375, -0.88592529296875, -0.806671142578125, -0.7274169921875, -0.648162841796875, -0.56890869140625, -0.489654541015625, -0.410400390625, -0.331146240234375, -0.25189208984375, -0.172637939453125, -0.0933837890625, -0.014129638671875, 0.06512451171875, 0.144378662109375, 0.2236328125, 0.302886962890625, 0.38214111328125, 0.461395263671875, 0.5406494140625, 0.619903564453125, 0.69915771484375, 0.778411865234375, 0.857666015625, 0.936920166015625, 1.01617431640625, 1.095428466796875, 1.1746826171875, 1.253936767578125, 1.33319091796875, 1.412445068359375, 1.49169921875, 1.570953369140625, 1.65020751953125, 1.729461669921875, 1.8087158203125, 1.887969970703125, 1.96722412109375, 2.046478271484375, 2.125732421875, 2.204986572265625, 2.28424072265625, 2.363494873046875, 2.4427490234375, 2.522003173828125, 2.60125732421875, 2.680511474609375, 2.759765625]}, "gradients/encoder.encoder.layers.7.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 2.0, 4.0, 2.0, 2.0, 7.0, 9.0, 10.0, 15.0, 14.0, 23.0, 39.0, 49.0, 52.0, 55.0, 72.0, 72.0, 62.0, 86.0, 65.0, 62.0, 66.0, 39.0, 59.0, 34.0, 26.0, 20.0, 29.0, 11.0, 13.0, 4.0, 4.0, 1.0, 2.0, 3.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.96728515625, -0.9398193359375, -0.912353515625, -0.8848876953125, -0.857421875, -0.8299560546875, -0.802490234375, -0.7750244140625, -0.74755859375, -0.7200927734375, -0.692626953125, -0.6651611328125, -0.6376953125, -0.6102294921875, -0.582763671875, -0.5552978515625, -0.52783203125, -0.5003662109375, -0.472900390625, -0.4454345703125, -0.41796875, -0.3905029296875, -0.363037109375, -0.3355712890625, -0.30810546875, -0.2806396484375, -0.253173828125, -0.2257080078125, -0.1982421875, -0.1707763671875, -0.143310546875, -0.1158447265625, -0.08837890625, -0.0609130859375, -0.033447265625, -0.0059814453125, 0.021484375, 0.0489501953125, 0.076416015625, 0.1038818359375, 0.13134765625, 0.1588134765625, 0.186279296875, 0.2137451171875, 0.2412109375, 0.2686767578125, 0.296142578125, 0.3236083984375, 0.35107421875, 0.3785400390625, 0.406005859375, 0.4334716796875, 0.4609375, 0.4884033203125, 0.515869140625, 0.5433349609375, 0.57080078125, 0.5982666015625, 0.625732421875, 0.6531982421875, 0.6806640625, 0.7081298828125, 0.735595703125, 0.7630615234375, 0.79052734375]}, "gradients/encoder.encoder.layers.7.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 0.0, 2.0, 0.0, 1.0, 1.0, 2.0, 4.0, 2.0, 2.0, 8.0, 10.0, 12.0, 22.0, 29.0, 31.0, 87.0, 144.0, 415.0, 1274.0, 5908.0, 50866.0, 3402375.0, 705490.0, 23158.0, 3261.0, 736.0, 232.0, 78.0, 51.0, 23.0, 11.0, 13.0, 10.0, 5.0, 8.0, 5.0, 6.0, 6.0, 0.0, 0.0, 1.0, 1.0, 1.0, 3.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0], "bins": [-2.28125, -2.21759033203125, -2.1539306640625, -2.09027099609375, -2.026611328125, -1.96295166015625, -1.8992919921875, -1.83563232421875, -1.77197265625, -1.70831298828125, -1.6446533203125, -1.58099365234375, -1.517333984375, -1.45367431640625, -1.3900146484375, -1.32635498046875, -1.2626953125, -1.19903564453125, -1.1353759765625, -1.07171630859375, -1.008056640625, -0.94439697265625, -0.8807373046875, -0.81707763671875, -0.75341796875, -0.68975830078125, -0.6260986328125, -0.56243896484375, -0.498779296875, -0.43511962890625, -0.3714599609375, -0.30780029296875, -0.244140625, -0.18048095703125, -0.1168212890625, -0.05316162109375, 0.010498046875, 0.07415771484375, 0.1378173828125, 0.20147705078125, 0.26513671875, 0.32879638671875, 0.3924560546875, 0.45611572265625, 0.519775390625, 0.58343505859375, 0.6470947265625, 0.71075439453125, 0.7744140625, 0.83807373046875, 0.9017333984375, 0.96539306640625, 1.029052734375, 1.09271240234375, 1.1563720703125, 1.22003173828125, 1.28369140625, 1.34735107421875, 1.4110107421875, 1.47467041015625, 1.538330078125, 1.60198974609375, 1.6656494140625, 1.72930908203125, 1.79296875]}, "gradients/encoder.encoder.layers.7.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 2.0, 1.0, 1.0, 1.0, 2.0, 0.0, 1.0, 2.0, 1.0, 1.0, 3.0, 2.0, 7.0, 5.0, 1.0, 9.0, 14.0, 18.0, 21.0, 37.0, 71.0, 124.0, 258.0, 512.0, 924.0, 894.0, 549.0, 251.0, 123.0, 82.0, 48.0, 30.0, 22.0, 19.0, 19.0, 11.0, 8.0, 3.0, 2.0, 2.0, 5.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.6767578125, -1.6275177001953125, -1.578277587890625, -1.5290374755859375, -1.47979736328125, -1.4305572509765625, -1.381317138671875, -1.3320770263671875, -1.2828369140625, -1.2335968017578125, -1.184356689453125, -1.1351165771484375, -1.08587646484375, -1.0366363525390625, -0.987396240234375, -0.9381561279296875, -0.888916015625, -0.8396759033203125, -0.790435791015625, -0.7411956787109375, -0.69195556640625, -0.6427154541015625, -0.593475341796875, -0.5442352294921875, -0.4949951171875, -0.4457550048828125, -0.396514892578125, -0.3472747802734375, -0.29803466796875, -0.2487945556640625, -0.199554443359375, -0.1503143310546875, -0.10107421875, -0.0518341064453125, -0.002593994140625, 0.0466461181640625, 0.09588623046875, 0.1451263427734375, 0.194366455078125, 0.2436065673828125, 0.2928466796875, 0.3420867919921875, 0.391326904296875, 0.4405670166015625, 0.48980712890625, 0.5390472412109375, 0.588287353515625, 0.6375274658203125, 0.686767578125, 0.7360076904296875, 0.785247802734375, 0.8344879150390625, 0.88372802734375, 0.9329681396484375, 0.982208251953125, 1.0314483642578125, 1.0806884765625, 1.1299285888671875, 1.179168701171875, 1.2284088134765625, 1.27764892578125, 1.3268890380859375, 1.376129150390625, 1.4253692626953125, 1.474609375]}, "gradients/encoder.encoder.layers.7.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 8.0, 4.0, 19.0, 65.0, 160.0, 295.0, 259.0, 126.0, 30.0, 21.0, 7.0, 5.0, 5.0, 2.0, 1.0, 2.0, 0.0, 1.0, 1.0, 2.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-20.698305130004883, -20.1650333404541, -19.631763458251953, -19.098491668701172, -18.56521987915039, -18.03194808959961, -17.498676300048828, -16.96540641784668, -16.4321346282959, -15.898862838745117, -15.365592002868652, -14.832321166992188, -14.299049377441406, -13.765777587890625, -13.23250675201416, -12.699235916137695, -12.165964126586914, -11.632692337036133, -11.099421501159668, -10.566150665283203, -10.032878875732422, -9.49960708618164, -8.966336250305176, -8.433065414428711, -7.89979362487793, -7.366522312164307, -6.833250999450684, -6.2999796867370605, -5.7667083740234375, -5.2334370613098145, -4.700165748596191, -4.166894435882568, -3.633625030517578, -3.100353717803955, -2.567082405090332, -2.033811092376709, -1.500539779663086, -0.9672684669494629, -0.43399715423583984, 0.0992741584777832, 0.6325454711914062, 1.1658167839050293, 1.6990880966186523, 2.2323594093322754, 2.7656307220458984, 3.2989020347595215, 3.8321733474731445, 4.365444660186768, 4.898715972900391, 5.431987285614014, 5.965258598327637, 6.49852991104126, 7.031801223754883, 7.565072536468506, 8.098343849182129, 8.631614685058594, 9.164886474609375, 9.698158264160156, 10.231429100036621, 10.764699935913086, 11.297971725463867, 11.831243515014648, 12.364514350891113, 12.897785186767578, 13.43105697631836]}, "gradients/encoder.encoder.layers.7.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 4.0, 2.0, 2.0, 3.0, 3.0, 5.0, 7.0, 7.0, 23.0, 11.0, 23.0, 22.0, 25.0, 32.0, 37.0, 49.0, 47.0, 60.0, 59.0, 58.0, 56.0, 46.0, 47.0, 62.0, 50.0, 39.0, 39.0, 32.0, 40.0, 34.0, 20.0, 10.0, 12.0, 13.0, 5.0, 6.0, 7.0, 7.0, 5.0, 2.0, 5.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0], "bins": [-7.867152690887451, -7.651735782623291, -7.436319351196289, -7.220902442932129, -7.005485534667969, -6.790068626403809, -6.574651718139648, -6.3592352867126465, -6.143818378448486, -5.928401470184326, -5.712985038757324, -5.497568130493164, -5.282151222229004, -5.066734313964844, -4.851317405700684, -4.635900974273682, -4.4204840660095215, -4.205067157745361, -3.9896504878997803, -3.774233818054199, -3.558816909790039, -3.343400001525879, -3.127983331680298, -2.912566661834717, -2.6971497535705566, -2.4817328453063965, -2.2663161754608154, -2.0508995056152344, -1.8354825973510742, -1.6200658082962036, -1.404649019241333, -1.1892322301864624, -0.9738154411315918, -0.7583986520767212, -0.5429818630218506, -0.32756507396698, -0.11214828491210938, 0.10326850414276123, 0.31868529319763184, 0.5341020822525024, 0.749518871307373, 0.9649356603622437, 1.1803524494171143, 1.3957692384719849, 1.6111860275268555, 1.826602816581726, 2.0420196056365967, 2.2574362754821777, 2.472853183746338, 2.688270092010498, 2.903686761856079, 3.11910343170166, 3.3345203399658203, 3.5499372482299805, 3.7653539180755615, 3.9807705879211426, 4.196187496185303, 4.411604404449463, 4.627020835876465, 4.842437744140625, 5.057854652404785, 5.273271560668945, 5.4886884689331055, 5.704104900360107, 5.919521808624268]}, "gradients/encoder.encoder.layers.7.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 1.0, 2.0, 2.0, 3.0, 6.0, 12.0, 15.0, 16.0, 28.0, 44.0, 67.0, 138.0, 210.0, 331.0, 615.0, 1095.0, 2040.0, 3989.0, 8894.0, 20999.0, 54873.0, 153538.0, 361619.0, 275041.0, 100494.0, 36803.0, 14600.0, 6372.0, 3132.0, 1570.0, 833.0, 472.0, 297.0, 165.0, 88.0, 60.0, 34.0, 24.0, 12.0, 9.0, 8.0, 6.0, 3.0, 1.0, 5.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.85595703125, -0.8281631469726562, -0.8003692626953125, -0.7725753784179688, -0.744781494140625, -0.7169876098632812, -0.6891937255859375, -0.6613998413085938, -0.63360595703125, -0.6058120727539062, -0.5780181884765625, -0.5502243041992188, -0.522430419921875, -0.49463653564453125, -0.4668426513671875, -0.43904876708984375, -0.4112548828125, -0.38346099853515625, -0.3556671142578125, -0.32787322998046875, -0.300079345703125, -0.27228546142578125, -0.2444915771484375, -0.21669769287109375, -0.18890380859375, -0.16110992431640625, -0.1333160400390625, -0.10552215576171875, -0.077728271484375, -0.04993438720703125, -0.0221405029296875, 0.00565338134765625, 0.033447265625, 0.06124114990234375, 0.0890350341796875, 0.11682891845703125, 0.144622802734375, 0.17241668701171875, 0.2002105712890625, 0.22800445556640625, 0.25579833984375, 0.28359222412109375, 0.3113861083984375, 0.33917999267578125, 0.366973876953125, 0.39476776123046875, 0.4225616455078125, 0.45035552978515625, 0.4781494140625, 0.5059432983398438, 0.5337371826171875, 0.5615310668945312, 0.589324951171875, 0.6171188354492188, 0.6449127197265625, 0.6727066040039062, 0.70050048828125, 0.7282943725585938, 0.7560882568359375, 0.7838821411132812, 0.811676025390625, 0.8394699096679688, 0.8672637939453125, 0.8950576782226562, 0.9228515625]}, "gradients/encoder.encoder.layers.7.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 5.0, 3.0, 5.0, 5.0, 6.0, 10.0, 16.0, 24.0, 25.0, 42.0, 49.0, 64.0, 59.0, 62.0, 83.0, 80.0, 84.0, 81.0, 70.0, 63.0, 47.0, 33.0, 35.0, 14.0, 19.0, 16.0, 6.0, 5.0, 2.0, 2.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.98681640625, -0.958221435546875, -0.92962646484375, -0.901031494140625, -0.8724365234375, -0.843841552734375, -0.81524658203125, -0.786651611328125, -0.758056640625, -0.729461669921875, -0.70086669921875, -0.672271728515625, -0.6436767578125, -0.615081787109375, -0.58648681640625, -0.557891845703125, -0.529296875, -0.500701904296875, -0.47210693359375, -0.443511962890625, -0.4149169921875, -0.386322021484375, -0.35772705078125, -0.329132080078125, -0.300537109375, -0.271942138671875, -0.24334716796875, -0.214752197265625, -0.1861572265625, -0.157562255859375, -0.12896728515625, -0.100372314453125, -0.07177734375, -0.043182373046875, -0.01458740234375, 0.014007568359375, 0.0426025390625, 0.071197509765625, 0.09979248046875, 0.128387451171875, 0.156982421875, 0.185577392578125, 0.21417236328125, 0.242767333984375, 0.2713623046875, 0.299957275390625, 0.32855224609375, 0.357147216796875, 0.3857421875, 0.414337158203125, 0.44293212890625, 0.471527099609375, 0.5001220703125, 0.528717041015625, 0.55731201171875, 0.585906982421875, 0.614501953125, 0.643096923828125, 0.67169189453125, 0.700286865234375, 0.7288818359375, 0.757476806640625, 0.78607177734375, 0.814666748046875, 0.84326171875]}, "gradients/encoder.encoder.layers.7.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 4.0, 3.0, 5.0, 3.0, 13.0, 13.0, 30.0, 24.0, 57.0, 60.0, 99.0, 148.0, 249.0, 518.0, 800.0, 1844.0, 6255.0, 55964.0, 813623.0, 152719.0, 10976.0, 2617.0, 1096.0, 557.0, 318.0, 220.0, 118.0, 74.0, 48.0, 40.0, 15.0, 19.0, 11.0, 8.0, 9.0, 1.0, 1.0, 1.0, 3.0, 0.0, 1.0, 2.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-2.361328125, -2.289031982421875, -2.21673583984375, -2.144439697265625, -2.0721435546875, -1.999847412109375, -1.92755126953125, -1.855255126953125, -1.782958984375, -1.710662841796875, -1.63836669921875, -1.566070556640625, -1.4937744140625, -1.421478271484375, -1.34918212890625, -1.276885986328125, -1.20458984375, -1.132293701171875, -1.05999755859375, -0.987701416015625, -0.9154052734375, -0.843109130859375, -0.77081298828125, -0.698516845703125, -0.626220703125, -0.553924560546875, -0.48162841796875, -0.409332275390625, -0.3370361328125, -0.264739990234375, -0.19244384765625, -0.120147705078125, -0.0478515625, 0.024444580078125, 0.09674072265625, 0.169036865234375, 0.2413330078125, 0.313629150390625, 0.38592529296875, 0.458221435546875, 0.530517578125, 0.602813720703125, 0.67510986328125, 0.747406005859375, 0.8197021484375, 0.891998291015625, 0.96429443359375, 1.036590576171875, 1.10888671875, 1.181182861328125, 1.25347900390625, 1.325775146484375, 1.3980712890625, 1.470367431640625, 1.54266357421875, 1.614959716796875, 1.687255859375, 1.759552001953125, 1.83184814453125, 1.904144287109375, 1.9764404296875, 2.048736572265625, 2.12103271484375, 2.193328857421875, 2.265625]}, "gradients/encoder.encoder.layers.7.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 3.0, 0.0, 1.0, 2.0, 3.0, 5.0, 4.0, 1.0, 8.0, 5.0, 7.0, 10.0, 6.0, 16.0, 10.0, 16.0, 18.0, 15.0, 16.0, 19.0, 30.0, 32.0, 37.0, 33.0, 40.0, 44.0, 46.0, 39.0, 46.0, 42.0, 44.0, 39.0, 38.0, 38.0, 35.0, 32.0, 32.0, 36.0, 26.0, 23.0, 26.0, 17.0, 10.0, 18.0, 12.0, 10.0, 8.0, 3.0, 2.0, 3.0, 3.0, 2.0, 2.0, 1.0, 0.0, 3.0, 1.0, 0.0, 2.0, 2.0, 1.0], "bins": [-2.0078125, -1.944610595703125, -1.88140869140625, -1.818206787109375, -1.7550048828125, -1.691802978515625, -1.62860107421875, -1.565399169921875, -1.502197265625, -1.438995361328125, -1.37579345703125, -1.312591552734375, -1.2493896484375, -1.186187744140625, -1.12298583984375, -1.059783935546875, -0.99658203125, -0.933380126953125, -0.87017822265625, -0.806976318359375, -0.7437744140625, -0.680572509765625, -0.61737060546875, -0.554168701171875, -0.490966796875, -0.427764892578125, -0.36456298828125, -0.301361083984375, -0.2381591796875, -0.174957275390625, -0.11175537109375, -0.048553466796875, 0.0146484375, 0.077850341796875, 0.14105224609375, 0.204254150390625, 0.2674560546875, 0.330657958984375, 0.39385986328125, 0.457061767578125, 0.520263671875, 0.583465576171875, 0.64666748046875, 0.709869384765625, 0.7730712890625, 0.836273193359375, 0.89947509765625, 0.962677001953125, 1.02587890625, 1.089080810546875, 1.15228271484375, 1.215484619140625, 1.2786865234375, 1.341888427734375, 1.40509033203125, 1.468292236328125, 1.531494140625, 1.594696044921875, 1.65789794921875, 1.721099853515625, 1.7843017578125, 1.847503662109375, 1.91070556640625, 1.973907470703125, 2.037109375]}, "gradients/encoder.encoder.layers.7.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 3.0, 0.0, 0.0, 1.0, 2.0, 4.0, 3.0, 4.0, 1.0, 7.0, 19.0, 14.0, 19.0, 23.0, 40.0, 74.0, 138.0, 233.0, 390.0, 994.0, 3136.0, 20259.0, 563784.0, 436430.0, 18105.0, 2948.0, 981.0, 408.0, 199.0, 129.0, 71.0, 39.0, 21.0, 18.0, 15.0, 14.0, 7.0, 6.0, 7.0, 1.0, 2.0, 5.0, 4.0, 6.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 4.0], "bins": [-0.96337890625, -0.9358367919921875, -0.908294677734375, -0.8807525634765625, -0.85321044921875, -0.8256683349609375, -0.798126220703125, -0.7705841064453125, -0.7430419921875, -0.7154998779296875, -0.687957763671875, -0.6604156494140625, -0.63287353515625, -0.6053314208984375, -0.577789306640625, -0.5502471923828125, -0.522705078125, -0.4951629638671875, -0.467620849609375, -0.4400787353515625, -0.41253662109375, -0.3849945068359375, -0.357452392578125, -0.3299102783203125, -0.3023681640625, -0.2748260498046875, -0.247283935546875, -0.2197418212890625, -0.19219970703125, -0.1646575927734375, -0.137115478515625, -0.1095733642578125, -0.08203125, -0.0544891357421875, -0.026947021484375, 0.0005950927734375, 0.02813720703125, 0.0556793212890625, 0.083221435546875, 0.1107635498046875, 0.1383056640625, 0.1658477783203125, 0.193389892578125, 0.2209320068359375, 0.24847412109375, 0.2760162353515625, 0.303558349609375, 0.3311004638671875, 0.358642578125, 0.3861846923828125, 0.413726806640625, 0.4412689208984375, 0.46881103515625, 0.4963531494140625, 0.523895263671875, 0.5514373779296875, 0.5789794921875, 0.6065216064453125, 0.634063720703125, 0.6616058349609375, 0.68914794921875, 0.7166900634765625, 0.744232177734375, 0.7717742919921875, 0.79931640625]}, "gradients/encoder.encoder.layers.7.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 2.0, 2.0, 4.0, 2.0, 8.0, 6.0, 6.0, 12.0, 5.0, 9.0, 7.0, 13.0, 18.0, 29.0, 25.0, 32.0, 36.0, 52.0, 65.0, 84.0, 91.0, 86.0, 84.0, 66.0, 52.0, 46.0, 26.0, 29.0, 16.0, 16.0, 10.0, 13.0, 9.0, 12.0, 8.0, 9.0, 1.0, 6.0, 1.0, 1.0, 2.0, 2.0, 1.0, 2.0, 0.0, 4.0, 1.0, 0.0, 2.0, 1.0, 1.0], "bins": [-8.064508438110352e-05, -7.827114313840866e-05, -7.58972018957138e-05, -7.352326065301895e-05, -7.11493194103241e-05, -6.877537816762924e-05, -6.640143692493439e-05, -6.402749568223953e-05, -6.165355443954468e-05, -5.927961319684982e-05, -5.690567195415497e-05, -5.4531730711460114e-05, -5.215778946876526e-05, -4.9783848226070404e-05, -4.740990698337555e-05, -4.5035965740680695e-05, -4.266202449798584e-05, -4.0288083255290985e-05, -3.791414201259613e-05, -3.5540200769901276e-05, -3.316625952720642e-05, -3.0792318284511566e-05, -2.841837704181671e-05, -2.6044435799121857e-05, -2.3670494556427002e-05, -2.1296553313732147e-05, -1.8922612071037292e-05, -1.6548670828342438e-05, -1.4174729585647583e-05, -1.1800788342952728e-05, -9.426847100257874e-06, -7.052905857563019e-06, -4.678964614868164e-06, -2.3050233721733093e-06, 6.891787052154541e-08, 2.4428591132164e-06, 4.816800355911255e-06, 7.19074159860611e-06, 9.564682841300964e-06, 1.1938624083995819e-05, 1.4312565326690674e-05, 1.668650656938553e-05, 1.9060447812080383e-05, 2.1434389054775238e-05, 2.3808330297470093e-05, 2.6182271540164948e-05, 2.8556212782859802e-05, 3.093015402555466e-05, 3.330409526824951e-05, 3.5678036510944366e-05, 3.805197775363922e-05, 4.0425918996334076e-05, 4.279986023902893e-05, 4.5173801481723785e-05, 4.754774272441864e-05, 4.9921683967113495e-05, 5.229562520980835e-05, 5.4669566452503204e-05, 5.704350769519806e-05, 5.9417448937892914e-05, 6.179139018058777e-05, 6.416533142328262e-05, 6.653927266597748e-05, 6.891321390867233e-05, 7.128715515136719e-05]}, "gradients/encoder.encoder.layers.7.attention.q_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 2.0, 2.0, 4.0, 2.0, 8.0, 3.0, 5.0, 7.0, 16.0, 18.0, 21.0, 37.0, 56.0, 88.0, 186.0, 277.0, 473.0, 1064.0, 2710.0, 10019.0, 98444.0, 836598.0, 84622.0, 9158.0, 2518.0, 1034.0, 478.0, 274.0, 151.0, 92.0, 64.0, 38.0, 34.0, 24.0, 9.0, 7.0, 8.0, 3.0, 1.0, 3.0, 1.0, 4.0, 1.0, 0.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.7294921875, -0.7038726806640625, -0.678253173828125, -0.6526336669921875, -0.62701416015625, -0.6013946533203125, -0.575775146484375, -0.5501556396484375, -0.5245361328125, -0.4989166259765625, -0.473297119140625, -0.4476776123046875, -0.42205810546875, -0.3964385986328125, -0.370819091796875, -0.3451995849609375, -0.319580078125, -0.2939605712890625, -0.268341064453125, -0.2427215576171875, -0.21710205078125, -0.1914825439453125, -0.165863037109375, -0.1402435302734375, -0.1146240234375, -0.0890045166015625, -0.063385009765625, -0.0377655029296875, -0.01214599609375, 0.0134735107421875, 0.039093017578125, 0.0647125244140625, 0.09033203125, 0.1159515380859375, 0.141571044921875, 0.1671905517578125, 0.19281005859375, 0.2184295654296875, 0.244049072265625, 0.2696685791015625, 0.2952880859375, 0.3209075927734375, 0.346527099609375, 0.3721466064453125, 0.39776611328125, 0.4233856201171875, 0.449005126953125, 0.4746246337890625, 0.500244140625, 0.5258636474609375, 0.551483154296875, 0.5771026611328125, 0.60272216796875, 0.6283416748046875, 0.653961181640625, 0.6795806884765625, 0.7052001953125, 0.7308197021484375, 0.756439208984375, 0.7820587158203125, 0.80767822265625, 0.8332977294921875, 0.858917236328125, 0.8845367431640625, 0.91015625]}, "gradients/encoder.encoder.layers.7.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 1.0, 4.0, 11.0, 5.0, 6.0, 9.0, 19.0, 17.0, 28.0, 35.0, 32.0, 48.0, 56.0, 65.0, 90.0, 101.0, 79.0, 74.0, 70.0, 56.0, 39.0, 49.0, 31.0, 21.0, 11.0, 18.0, 12.0, 4.0, 7.0, 2.0, 4.0, 2.0, 4.0, 2.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.54443359375, -0.5223312377929688, -0.5002288818359375, -0.47812652587890625, -0.456024169921875, -0.43392181396484375, -0.4118194580078125, -0.38971710205078125, -0.36761474609375, -0.34551239013671875, -0.3234100341796875, -0.30130767822265625, -0.279205322265625, -0.25710296630859375, -0.2350006103515625, -0.21289825439453125, -0.1907958984375, -0.16869354248046875, -0.1465911865234375, -0.12448883056640625, -0.102386474609375, -0.08028411865234375, -0.0581817626953125, -0.03607940673828125, -0.01397705078125, 0.00812530517578125, 0.0302276611328125, 0.05233001708984375, 0.074432373046875, 0.09653472900390625, 0.1186370849609375, 0.14073944091796875, 0.162841796875, 0.18494415283203125, 0.2070465087890625, 0.22914886474609375, 0.251251220703125, 0.27335357666015625, 0.2954559326171875, 0.31755828857421875, 0.33966064453125, 0.36176300048828125, 0.3838653564453125, 0.40596771240234375, 0.428070068359375, 0.45017242431640625, 0.4722747802734375, 0.49437713623046875, 0.5164794921875, 0.5385818481445312, 0.5606842041015625, 0.5827865600585938, 0.604888916015625, 0.6269912719726562, 0.6490936279296875, 0.6711959838867188, 0.69329833984375, 0.7154006958007812, 0.7375030517578125, 0.7596054077148438, 0.781707763671875, 0.8038101196289062, 0.8259124755859375, 0.8480148315429688, 0.8701171875]}, "gradients/encoder.encoder.layers.7.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 3.0, 5.0, 2.0, 7.0, 12.0, 28.0, 70.0, 208.0, 360.0, 200.0, 87.0, 18.0, 6.0, 4.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-16.452632904052734, -15.788700103759766, -15.124768257141113, -14.460835456848145, -13.796902656555176, -13.132970809936523, -12.469038009643555, -11.805105209350586, -11.141172409057617, -10.477239608764648, -9.813307762145996, -9.149374961853027, -8.485442161560059, -7.821509838104248, -7.1575775146484375, -6.493644714355469, -5.829712867736816, -5.165780544281006, -4.501847743988037, -3.8379154205322266, -3.173982858657837, -2.5100502967834473, -1.8461179733276367, -1.182185173034668, -0.5182528495788574, 0.14567965269088745, 0.8096121549606323, 1.4735445976257324, 2.137477159500122, 2.8014097213745117, 3.4653420448303223, 4.129274845123291, 4.793207168579102, 5.457139492034912, 6.121072292327881, 6.785004615783691, 7.44893741607666, 8.112869262695312, 8.776802062988281, 9.44073486328125, 10.104667663574219, 10.768600463867188, 11.43253231048584, 12.096465110778809, 12.760397911071777, 13.42432975769043, 14.088262557983398, 14.752195358276367, 15.41612720489502, 16.080059051513672, 16.74399185180664, 17.40792465209961, 18.071857452392578, 18.735790252685547, 19.399723052978516, 20.063655853271484, 20.72758674621582, 21.39151954650879, 22.055452346801758, 22.719383239746094, 23.383316040039062, 24.04724884033203, 24.711181640625, 25.37511444091797, 26.039047241210938]}, "gradients/encoder.encoder.layers.7.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 2.0, 1.0, 2.0, 4.0, 5.0, 2.0, 6.0, 6.0, 7.0, 9.0, 8.0, 5.0, 11.0, 10.0, 16.0, 21.0, 19.0, 15.0, 18.0, 36.0, 33.0, 31.0, 36.0, 38.0, 40.0, 45.0, 50.0, 51.0, 45.0, 43.0, 48.0, 40.0, 36.0, 27.0, 37.0, 27.0, 26.0, 28.0, 22.0, 26.0, 12.0, 16.0, 10.0, 10.0, 5.0, 6.0, 6.0, 3.0, 3.0, 3.0, 4.0, 3.0, 3.0, 1.0, 3.0], "bins": [-9.18872356414795, -8.937575340270996, -8.68642807006836, -8.435279846191406, -8.184131622314453, -7.932984352111816, -7.681836128234863, -7.430688381195068, -7.179540634155273, -6.9283928871154785, -6.677245140075684, -6.4260969161987305, -6.1749491691589355, -5.923801422119141, -5.6726531982421875, -5.421505451202393, -5.170357704162598, -4.919209957122803, -4.668062210083008, -4.416913986206055, -4.16576623916626, -3.914618492126465, -3.663470506668091, -3.412322521209717, -3.161174774169922, -2.910027027130127, -2.658879041671753, -2.407731056213379, -2.156583309173584, -1.9054354429244995, -1.654287576675415, -1.4031397104263306, -1.1519923210144043, -0.9008444547653198, -0.6496965885162354, -0.3985487222671509, -0.1474008560180664, 0.10374701023101807, 0.35489487648010254, 0.606042742729187, 0.8571906089782715, 1.108338475227356, 1.3594863414764404, 1.610634207725525, 1.8617820739746094, 2.1129298210144043, 2.3640778064727783, 2.6152257919311523, 2.8663735389709473, 3.117521286010742, 3.368669271469116, 3.6198172569274902, 3.870965003967285, 4.12211275100708, 4.373260498046875, 4.624408721923828, 4.875556468963623, 5.126704216003418, 5.377852439880371, 5.629000186920166, 5.880147933959961, 6.131295680999756, 6.382443428039551, 6.633591651916504, 6.884739398956299]}, "gradients/encoder.encoder.layers.6.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 0.0, 1.0, 2.0, 4.0, 3.0, 4.0, 4.0, 11.0, 11.0, 18.0, 24.0, 45.0, 61.0, 98.0, 152.0, 274.0, 737.0, 4133806.0, 58006.0, 419.0, 219.0, 138.0, 76.0, 58.0, 40.0, 23.0, 16.0, 14.0, 13.0, 4.0, 5.0, 3.0, 4.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-8.4453125, -8.15283203125, -7.8603515625, -7.56787109375, -7.275390625, -6.98291015625, -6.6904296875, -6.39794921875, -6.10546875, -5.81298828125, -5.5205078125, -5.22802734375, -4.935546875, -4.64306640625, -4.3505859375, -4.05810546875, -3.765625, -3.47314453125, -3.1806640625, -2.88818359375, -2.595703125, -2.30322265625, -2.0107421875, -1.71826171875, -1.42578125, -1.13330078125, -0.8408203125, -0.54833984375, -0.255859375, 0.03662109375, 0.3291015625, 0.62158203125, 0.9140625, 1.20654296875, 1.4990234375, 1.79150390625, 2.083984375, 2.37646484375, 2.6689453125, 2.96142578125, 3.25390625, 3.54638671875, 3.8388671875, 4.13134765625, 4.423828125, 4.71630859375, 5.0087890625, 5.30126953125, 5.59375, 5.88623046875, 6.1787109375, 6.47119140625, 6.763671875, 7.05615234375, 7.3486328125, 7.64111328125, 7.93359375, 8.22607421875, 8.5185546875, 8.81103515625, 9.103515625, 9.39599609375, 9.6884765625, 9.98095703125, 10.2734375]}, "gradients/encoder.encoder.layers.6.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 4.0, 3.0, 3.0, 3.0, 11.0, 9.0, 18.0, 33.0, 25.0, 61.0, 79.0, 80.0, 98.0, 102.0, 125.0, 97.0, 77.0, 65.0, 38.0, 37.0, 18.0, 12.0, 5.0, 5.0, 5.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.42578125, -1.3906097412109375, -1.355438232421875, -1.3202667236328125, -1.28509521484375, -1.2499237060546875, -1.214752197265625, -1.1795806884765625, -1.1444091796875, -1.1092376708984375, -1.074066162109375, -1.0388946533203125, -1.00372314453125, -0.9685516357421875, -0.933380126953125, -0.8982086181640625, -0.863037109375, -0.8278656005859375, -0.792694091796875, -0.7575225830078125, -0.72235107421875, -0.6871795654296875, -0.652008056640625, -0.6168365478515625, -0.5816650390625, -0.5464935302734375, -0.511322021484375, -0.4761505126953125, -0.44097900390625, -0.4058074951171875, -0.370635986328125, -0.3354644775390625, -0.30029296875, -0.2651214599609375, -0.229949951171875, -0.1947784423828125, -0.15960693359375, -0.1244354248046875, -0.089263916015625, -0.0540924072265625, -0.0189208984375, 0.0162506103515625, 0.051422119140625, 0.0865936279296875, 0.12176513671875, 0.1569366455078125, 0.192108154296875, 0.2272796630859375, 0.262451171875, 0.2976226806640625, 0.332794189453125, 0.3679656982421875, 0.40313720703125, 0.4383087158203125, 0.473480224609375, 0.5086517333984375, 0.5438232421875, 0.5789947509765625, 0.614166259765625, 0.6493377685546875, 0.68450927734375, 0.7196807861328125, 0.754852294921875, 0.7900238037109375, 0.8251953125]}, "gradients/encoder.encoder.layers.6.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 0.0, 3.0, 1.0, 2.0, 5.0, 2.0, 5.0, 7.0, 13.0, 23.0, 44.0, 80.0, 152.0, 396.0, 872.0, 2076.0, 5978.0, 22948.0, 167268.0, 3409878.0, 526373.0, 41951.0, 10107.0, 3489.0, 1463.0, 582.0, 264.0, 132.0, 72.0, 37.0, 21.0, 12.0, 10.0, 4.0, 7.0, 4.0, 7.0, 2.0, 2.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0], "bins": [-1.1318359375, -1.1001129150390625, -1.068389892578125, -1.0366668701171875, -1.00494384765625, -0.9732208251953125, -0.941497802734375, -0.9097747802734375, -0.8780517578125, -0.8463287353515625, -0.814605712890625, -0.7828826904296875, -0.75115966796875, -0.7194366455078125, -0.687713623046875, -0.6559906005859375, -0.624267578125, -0.5925445556640625, -0.560821533203125, -0.5290985107421875, -0.49737548828125, -0.4656524658203125, -0.433929443359375, -0.4022064208984375, -0.3704833984375, -0.3387603759765625, -0.307037353515625, -0.2753143310546875, -0.24359130859375, -0.2118682861328125, -0.180145263671875, -0.1484222412109375, -0.11669921875, -0.0849761962890625, -0.053253173828125, -0.0215301513671875, 0.01019287109375, 0.0419158935546875, 0.073638916015625, 0.1053619384765625, 0.1370849609375, 0.1688079833984375, 0.200531005859375, 0.2322540283203125, 0.26397705078125, 0.2957000732421875, 0.327423095703125, 0.3591461181640625, 0.390869140625, 0.4225921630859375, 0.454315185546875, 0.4860382080078125, 0.51776123046875, 0.5494842529296875, 0.581207275390625, 0.6129302978515625, 0.6446533203125, 0.6763763427734375, 0.708099365234375, 0.7398223876953125, 0.77154541015625, 0.8032684326171875, 0.834991455078125, 0.8667144775390625, 0.8984375]}, "gradients/encoder.encoder.layers.6.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 0.0, 2.0, 1.0, 3.0, 6.0, 3.0, 9.0, 11.0, 11.0, 25.0, 41.0, 48.0, 92.0, 179.0, 317.0, 660.0, 981.0, 796.0, 399.0, 220.0, 99.0, 48.0, 38.0, 25.0, 18.0, 9.0, 9.0, 5.0, 4.0, 6.0, 1.0, 5.0, 5.0, 1.0, 0.0, 0.0, 1.0, 3.0, 0.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-1.048828125, -1.01458740234375, -0.9803466796875, -0.94610595703125, -0.911865234375, -0.87762451171875, -0.8433837890625, -0.80914306640625, -0.77490234375, -0.74066162109375, -0.7064208984375, -0.67218017578125, -0.637939453125, -0.60369873046875, -0.5694580078125, -0.53521728515625, -0.5009765625, -0.46673583984375, -0.4324951171875, -0.39825439453125, -0.364013671875, -0.32977294921875, -0.2955322265625, -0.26129150390625, -0.22705078125, -0.19281005859375, -0.1585693359375, -0.12432861328125, -0.090087890625, -0.05584716796875, -0.0216064453125, 0.01263427734375, 0.046875, 0.08111572265625, 0.1153564453125, 0.14959716796875, 0.183837890625, 0.21807861328125, 0.2523193359375, 0.28656005859375, 0.32080078125, 0.35504150390625, 0.3892822265625, 0.42352294921875, 0.457763671875, 0.49200439453125, 0.5262451171875, 0.56048583984375, 0.5947265625, 0.62896728515625, 0.6632080078125, 0.69744873046875, 0.731689453125, 0.76593017578125, 0.8001708984375, 0.83441162109375, 0.86865234375, 0.90289306640625, 0.9371337890625, 0.97137451171875, 1.005615234375, 1.03985595703125, 1.0740966796875, 1.10833740234375, 1.142578125]}, "gradients/encoder.encoder.layers.6.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 1.0, 3.0, 2.0, 4.0, 12.0, 36.0, 171.0, 362.0, 292.0, 92.0, 21.0, 9.0, 6.0, 0.0, 3.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-19.129640579223633, -18.661569595336914, -18.193498611450195, -17.725427627563477, -17.257356643676758, -16.78928565979004, -16.32121467590332, -15.853143692016602, -15.385072708129883, -14.917001724243164, -14.448930740356445, -13.980859756469727, -13.512788772583008, -13.044717788696289, -12.57664680480957, -12.108575820922852, -11.640504837036133, -11.172433853149414, -10.704362869262695, -10.236291885375977, -9.768220901489258, -9.300149917602539, -8.83207893371582, -8.364007949829102, -7.895937919616699, -7.4278669357299805, -6.959795951843262, -6.491724967956543, -6.023653984069824, -5.5555830001831055, -5.087512016296387, -4.619441032409668, -4.151370048522949, -3.6832990646362305, -3.2152280807495117, -2.747157096862793, -2.279086112976074, -1.8110153675079346, -1.3429443836212158, -0.8748733997344971, -0.4068024158477783, 0.06126853823661804, 0.5293394923210144, 0.9974104166030884, 1.4654814004898071, 1.9335522651672363, 2.401623249053955, 2.869694232940674, 3.3377652168273926, 3.8058362007141113, 4.27390718460083, 4.741978168487549, 5.210049152374268, 5.678119659423828, 6.146190643310547, 6.614261627197266, 7.082332611083984, 7.550403594970703, 8.018474578857422, 8.48654556274414, 8.95461654663086, 9.422687530517578, 9.890758514404297, 10.358829498291016, 10.826900482177734]}, "gradients/encoder.encoder.layers.6.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 1.0, 4.0, 2.0, 7.0, 5.0, 5.0, 10.0, 6.0, 12.0, 18.0, 13.0, 25.0, 29.0, 40.0, 38.0, 36.0, 37.0, 45.0, 54.0, 56.0, 57.0, 53.0, 55.0, 52.0, 43.0, 52.0, 48.0, 27.0, 32.0, 33.0, 27.0, 20.0, 13.0, 14.0, 12.0, 9.0, 5.0, 6.0, 5.0, 5.0, 1.0, 2.0, 2.0, 1.0, 1.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.8907530307769775, -2.758129596710205, -2.6255059242248535, -2.492882490158081, -2.3602588176727295, -2.227635383605957, -2.0950117111206055, -1.962388277053833, -1.829764723777771, -1.697141170501709, -1.564517617225647, -1.431894063949585, -1.2992706298828125, -1.166646957397461, -1.0340235233306885, -0.9013999700546265, -0.7687764167785645, -0.6361528635025024, -0.5035293102264404, -0.3709058165550232, -0.23828226327896118, -0.10565871000289917, 0.026964783668518066, 0.15958833694458008, 0.2922118902206421, 0.4248354434967041, 0.5574589967727661, 0.6900824904441833, 0.8227060437202454, 0.9553295969963074, 1.0879530906677246, 1.2205766439437866, 1.3531999588012695, 1.4858235120773315, 1.6184470653533936, 1.751070499420166, 1.8836941719055176, 2.01631760597229, 2.1489410400390625, 2.281564712524414, 2.4141883850097656, 2.546811819076538, 2.6794354915618896, 2.812058925628662, 2.9446825981140137, 3.077306032180786, 3.2099294662475586, 3.34255313873291, 3.4751765727996826, 3.607800006866455, 3.7404236793518066, 3.873047113418579, 4.005670547485352, 4.138294219970703, 4.270917892456055, 4.403541564941406, 4.5361647605896, 4.668788433074951, 4.8014116287231445, 4.934035301208496, 5.066658973693848, 5.199282646179199, 5.331905841827393, 5.464529514312744, 5.597153186798096]}, "gradients/encoder.encoder.layers.6.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 2.0, 0.0, 2.0, 3.0, 1.0, 1.0, 2.0, 7.0, 10.0, 6.0, 10.0, 14.0, 22.0, 36.0, 54.0, 55.0, 98.0, 131.0, 167.0, 299.0, 472.0, 706.0, 1290.0, 2258.0, 4404.0, 9078.0, 20458.0, 50685.0, 134769.0, 328357.0, 298775.0, 116575.0, 43923.0, 18238.0, 8373.0, 4070.0, 2100.0, 1198.0, 680.0, 386.0, 259.0, 162.0, 137.0, 89.0, 63.0, 47.0, 31.0, 17.0, 9.0, 10.0, 7.0, 6.0, 8.0, 0.0, 6.0, 2.0, 2.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.74462890625, -0.722015380859375, -0.69940185546875, -0.676788330078125, -0.6541748046875, -0.631561279296875, -0.60894775390625, -0.586334228515625, -0.563720703125, -0.541107177734375, -0.51849365234375, -0.495880126953125, -0.4732666015625, -0.450653076171875, -0.42803955078125, -0.405426025390625, -0.3828125, -0.360198974609375, -0.33758544921875, -0.314971923828125, -0.2923583984375, -0.269744873046875, -0.24713134765625, -0.224517822265625, -0.201904296875, -0.179290771484375, -0.15667724609375, -0.134063720703125, -0.1114501953125, -0.088836669921875, -0.06622314453125, -0.043609619140625, -0.02099609375, 0.001617431640625, 0.02423095703125, 0.046844482421875, 0.0694580078125, 0.092071533203125, 0.11468505859375, 0.137298583984375, 0.159912109375, 0.182525634765625, 0.20513916015625, 0.227752685546875, 0.2503662109375, 0.272979736328125, 0.29559326171875, 0.318206787109375, 0.3408203125, 0.363433837890625, 0.38604736328125, 0.408660888671875, 0.4312744140625, 0.453887939453125, 0.47650146484375, 0.499114990234375, 0.521728515625, 0.544342041015625, 0.56695556640625, 0.589569091796875, 0.6121826171875, 0.634796142578125, 0.65740966796875, 0.680023193359375, 0.70263671875]}, "gradients/encoder.encoder.layers.6.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 4.0, 0.0, 3.0, 3.0, 1.0, 7.0, 15.0, 14.0, 25.0, 30.0, 43.0, 58.0, 69.0, 83.0, 84.0, 78.0, 100.0, 90.0, 66.0, 65.0, 47.0, 47.0, 37.0, 13.0, 15.0, 4.0, 4.0, 6.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.98583984375, -0.9574356079101562, -0.9290313720703125, -0.9006271362304688, -0.872222900390625, -0.8438186645507812, -0.8154144287109375, -0.7870101928710938, -0.75860595703125, -0.7302017211914062, -0.7017974853515625, -0.6733932495117188, -0.644989013671875, -0.6165847778320312, -0.5881805419921875, -0.5597763061523438, -0.5313720703125, -0.5029678344726562, -0.4745635986328125, -0.44615936279296875, -0.417755126953125, -0.38935089111328125, -0.3609466552734375, -0.33254241943359375, -0.30413818359375, -0.27573394775390625, -0.2473297119140625, -0.21892547607421875, -0.190521240234375, -0.16211700439453125, -0.1337127685546875, -0.10530853271484375, -0.076904296875, -0.04850006103515625, -0.0200958251953125, 0.00830841064453125, 0.036712646484375, 0.06511688232421875, 0.0935211181640625, 0.12192535400390625, 0.15032958984375, 0.17873382568359375, 0.2071380615234375, 0.23554229736328125, 0.263946533203125, 0.29235076904296875, 0.3207550048828125, 0.34915924072265625, 0.3775634765625, 0.40596771240234375, 0.4343719482421875, 0.46277618408203125, 0.491180419921875, 0.5195846557617188, 0.5479888916015625, 0.5763931274414062, 0.60479736328125, 0.6332015991210938, 0.6616058349609375, 0.6900100708007812, 0.718414306640625, 0.7468185424804688, 0.7752227783203125, 0.8036270141601562, 0.83203125]}, "gradients/encoder.encoder.layers.6.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 3.0, 2.0, 3.0, 6.0, 3.0, 22.0, 22.0, 31.0, 47.0, 106.0, 195.0, 407.0, 1006.0, 3431.0, 32453.0, 763599.0, 232417.0, 11430.0, 2021.0, 702.0, 327.0, 138.0, 84.0, 33.0, 25.0, 15.0, 10.0, 7.0, 5.0, 2.0, 4.0, 1.0, 4.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0], "bins": [-2.732421875, -2.658172607421875, -2.58392333984375, -2.509674072265625, -2.4354248046875, -2.361175537109375, -2.28692626953125, -2.212677001953125, -2.138427734375, -2.064178466796875, -1.98992919921875, -1.915679931640625, -1.8414306640625, -1.767181396484375, -1.69293212890625, -1.618682861328125, -1.54443359375, -1.470184326171875, -1.39593505859375, -1.321685791015625, -1.2474365234375, -1.173187255859375, -1.09893798828125, -1.024688720703125, -0.950439453125, -0.876190185546875, -0.80194091796875, -0.727691650390625, -0.6534423828125, -0.579193115234375, -0.50494384765625, -0.430694580078125, -0.3564453125, -0.282196044921875, -0.20794677734375, -0.133697509765625, -0.0594482421875, 0.014801025390625, 0.08905029296875, 0.163299560546875, 0.237548828125, 0.311798095703125, 0.38604736328125, 0.460296630859375, 0.5345458984375, 0.608795166015625, 0.68304443359375, 0.757293701171875, 0.83154296875, 0.905792236328125, 0.98004150390625, 1.054290771484375, 1.1285400390625, 1.202789306640625, 1.27703857421875, 1.351287841796875, 1.425537109375, 1.499786376953125, 1.57403564453125, 1.648284912109375, 1.7225341796875, 1.796783447265625, 1.87103271484375, 1.945281982421875, 2.01953125]}, "gradients/encoder.encoder.layers.6.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 2.0, 1.0, 8.0, 3.0, 1.0, 4.0, 9.0, 10.0, 11.0, 9.0, 9.0, 12.0, 15.0, 18.0, 17.0, 19.0, 25.0, 25.0, 33.0, 28.0, 33.0, 38.0, 41.0, 36.0, 44.0, 49.0, 38.0, 40.0, 35.0, 41.0, 47.0, 31.0, 36.0, 27.0, 29.0, 22.0, 24.0, 24.0, 18.0, 16.0, 18.0, 15.0, 11.0, 6.0, 7.0, 6.0, 4.0, 4.0, 4.0, 4.0, 1.0, 3.0, 3.0, 1.0, 3.0, 2.0], "bins": [-1.650390625, -1.6019439697265625, -1.553497314453125, -1.5050506591796875, -1.45660400390625, -1.4081573486328125, -1.359710693359375, -1.3112640380859375, -1.2628173828125, -1.2143707275390625, -1.165924072265625, -1.1174774169921875, -1.06903076171875, -1.0205841064453125, -0.972137451171875, -0.9236907958984375, -0.875244140625, -0.8267974853515625, -0.778350830078125, -0.7299041748046875, -0.68145751953125, -0.6330108642578125, -0.584564208984375, -0.5361175537109375, -0.4876708984375, -0.4392242431640625, -0.390777587890625, -0.3423309326171875, -0.29388427734375, -0.2454376220703125, -0.196990966796875, -0.1485443115234375, -0.10009765625, -0.0516510009765625, -0.003204345703125, 0.0452423095703125, 0.09368896484375, 0.1421356201171875, 0.190582275390625, 0.2390289306640625, 0.2874755859375, 0.3359222412109375, 0.384368896484375, 0.4328155517578125, 0.48126220703125, 0.5297088623046875, 0.578155517578125, 0.6266021728515625, 0.675048828125, 0.7234954833984375, 0.771942138671875, 0.8203887939453125, 0.86883544921875, 0.9172821044921875, 0.965728759765625, 1.0141754150390625, 1.0626220703125, 1.1110687255859375, 1.159515380859375, 1.2079620361328125, 1.25640869140625, 1.3048553466796875, 1.353302001953125, 1.4017486572265625, 1.4501953125]}, "gradients/encoder.encoder.layers.6.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 0.0, 0.0, 2.0, 1.0, 2.0, 2.0, 4.0, 3.0, 6.0, 7.0, 7.0, 23.0, 28.0, 32.0, 75.0, 153.0, 289.0, 764.0, 3296.0, 32004.0, 952363.0, 53777.0, 4036.0, 1004.0, 338.0, 143.0, 76.0, 42.0, 26.0, 14.0, 8.0, 10.0, 12.0, 5.0, 1.0, 3.0, 1.0, 0.0, 3.0, 1.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-1.07421875, -1.0378875732421875, -1.001556396484375, -0.9652252197265625, -0.92889404296875, -0.8925628662109375, -0.856231689453125, -0.8199005126953125, -0.7835693359375, -0.7472381591796875, -0.710906982421875, -0.6745758056640625, -0.63824462890625, -0.6019134521484375, -0.565582275390625, -0.5292510986328125, -0.492919921875, -0.4565887451171875, -0.420257568359375, -0.3839263916015625, -0.34759521484375, -0.3112640380859375, -0.274932861328125, -0.2386016845703125, -0.2022705078125, -0.1659393310546875, -0.129608154296875, -0.0932769775390625, -0.05694580078125, -0.0206146240234375, 0.015716552734375, 0.0520477294921875, 0.08837890625, 0.1247100830078125, 0.161041259765625, 0.1973724365234375, 0.23370361328125, 0.2700347900390625, 0.306365966796875, 0.3426971435546875, 0.3790283203125, 0.4153594970703125, 0.451690673828125, 0.4880218505859375, 0.52435302734375, 0.5606842041015625, 0.597015380859375, 0.6333465576171875, 0.669677734375, 0.7060089111328125, 0.742340087890625, 0.7786712646484375, 0.81500244140625, 0.8513336181640625, 0.887664794921875, 0.9239959716796875, 0.9603271484375, 0.9966583251953125, 1.032989501953125, 1.0693206787109375, 1.10565185546875, 1.1419830322265625, 1.178314208984375, 1.2146453857421875, 1.2509765625]}, "gradients/encoder.encoder.layers.6.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 3.0, 1.0, 2.0, 1.0, 1.0, 1.0, 2.0, 7.0, 5.0, 4.0, 9.0, 8.0, 12.0, 9.0, 18.0, 22.0, 43.0, 65.0, 82.0, 119.0, 137.0, 132.0, 83.0, 68.0, 39.0, 36.0, 24.0, 22.0, 7.0, 12.0, 16.0, 5.0, 10.0, 5.0, 2.0, 3.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-9.822845458984375e-05, -9.521190077066422e-05, -9.219534695148468e-05, -8.917879313230515e-05, -8.616223931312561e-05, -8.314568549394608e-05, -8.012913167476654e-05, -7.7112577855587e-05, -7.409602403640747e-05, -7.107947021722794e-05, -6.80629163980484e-05, -6.504636257886887e-05, -6.202980875968933e-05, -5.9013254940509796e-05, -5.599670112133026e-05, -5.2980147302150726e-05, -4.996359348297119e-05, -4.6947039663791656e-05, -4.393048584461212e-05, -4.091393202543259e-05, -3.789737820625305e-05, -3.488082438707352e-05, -3.186427056789398e-05, -2.8847716748714447e-05, -2.5831162929534912e-05, -2.2814609110355377e-05, -1.9798055291175842e-05, -1.6781501471996307e-05, -1.3764947652816772e-05, -1.0748393833637238e-05, -7.731840014457703e-06, -4.715286195278168e-06, -1.6987323760986328e-06, 1.317821443080902e-06, 4.334375262260437e-06, 7.350929081439972e-06, 1.0367482900619507e-05, 1.3384036719799042e-05, 1.6400590538978577e-05, 1.941714435815811e-05, 2.2433698177337646e-05, 2.545025199651718e-05, 2.8466805815696716e-05, 3.148335963487625e-05, 3.4499913454055786e-05, 3.751646727323532e-05, 4.0533021092414856e-05, 4.354957491159439e-05, 4.6566128730773926e-05, 4.958268254995346e-05, 5.2599236369132996e-05, 5.561579018831253e-05, 5.8632344007492065e-05, 6.16488978266716e-05, 6.466545164585114e-05, 6.768200546503067e-05, 7.06985592842102e-05, 7.371511310338974e-05, 7.673166692256927e-05, 7.974822074174881e-05, 8.276477456092834e-05, 8.578132838010788e-05, 8.879788219928741e-05, 9.181443601846695e-05, 9.483098983764648e-05]}, "gradients/encoder.encoder.layers.6.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 2.0, 6.0, 1.0, 3.0, 7.0, 5.0, 11.0, 13.0, 19.0, 39.0, 46.0, 78.0, 134.0, 247.0, 460.0, 1022.0, 2925.0, 12762.0, 167054.0, 803437.0, 50231.0, 6619.0, 1869.0, 757.0, 320.0, 208.0, 94.0, 56.0, 36.0, 28.0, 28.0, 9.0, 11.0, 5.0, 8.0, 1.0, 6.0, 2.0, 3.0, 0.0, 3.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.7666015625, -0.7448883056640625, -0.723175048828125, -0.7014617919921875, -0.67974853515625, -0.6580352783203125, -0.636322021484375, -0.6146087646484375, -0.5928955078125, -0.5711822509765625, -0.549468994140625, -0.5277557373046875, -0.50604248046875, -0.4843292236328125, -0.462615966796875, -0.4409027099609375, -0.419189453125, -0.3974761962890625, -0.375762939453125, -0.3540496826171875, -0.33233642578125, -0.3106231689453125, -0.288909912109375, -0.2671966552734375, -0.2454833984375, -0.2237701416015625, -0.202056884765625, -0.1803436279296875, -0.15863037109375, -0.1369171142578125, -0.115203857421875, -0.0934906005859375, -0.07177734375, -0.0500640869140625, -0.028350830078125, -0.0066375732421875, 0.01507568359375, 0.0367889404296875, 0.058502197265625, 0.0802154541015625, 0.1019287109375, 0.1236419677734375, 0.145355224609375, 0.1670684814453125, 0.18878173828125, 0.2104949951171875, 0.232208251953125, 0.2539215087890625, 0.275634765625, 0.2973480224609375, 0.319061279296875, 0.3407745361328125, 0.36248779296875, 0.3842010498046875, 0.405914306640625, 0.4276275634765625, 0.4493408203125, 0.4710540771484375, 0.492767333984375, 0.5144805908203125, 0.53619384765625, 0.5579071044921875, 0.579620361328125, 0.6013336181640625, 0.623046875]}, "gradients/encoder.encoder.layers.6.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 4.0, 3.0, 5.0, 9.0, 8.0, 5.0, 19.0, 21.0, 29.0, 37.0, 36.0, 55.0, 67.0, 86.0, 90.0, 88.0, 97.0, 74.0, 59.0, 58.0, 37.0, 26.0, 26.0, 19.0, 22.0, 10.0, 5.0, 3.0, 2.0, 3.0, 3.0, 2.0, 0.0, 3.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.537109375, -0.5209732055664062, -0.5048370361328125, -0.48870086669921875, -0.472564697265625, -0.45642852783203125, -0.4402923583984375, -0.42415618896484375, -0.40802001953125, -0.39188385009765625, -0.3757476806640625, -0.35961151123046875, -0.343475341796875, -0.32733917236328125, -0.3112030029296875, -0.29506683349609375, -0.2789306640625, -0.26279449462890625, -0.2466583251953125, -0.23052215576171875, -0.214385986328125, -0.19824981689453125, -0.1821136474609375, -0.16597747802734375, -0.14984130859375, -0.13370513916015625, -0.1175689697265625, -0.10143280029296875, -0.085296630859375, -0.06916046142578125, -0.0530242919921875, -0.03688812255859375, -0.020751953125, -0.00461578369140625, 0.0115203857421875, 0.02765655517578125, 0.043792724609375, 0.05992889404296875, 0.0760650634765625, 0.09220123291015625, 0.10833740234375, 0.12447357177734375, 0.1406097412109375, 0.15674591064453125, 0.172882080078125, 0.18901824951171875, 0.2051544189453125, 0.22129058837890625, 0.2374267578125, 0.25356292724609375, 0.2696990966796875, 0.28583526611328125, 0.301971435546875, 0.31810760498046875, 0.3342437744140625, 0.35037994384765625, 0.36651611328125, 0.38265228271484375, 0.3987884521484375, 0.41492462158203125, 0.431060791015625, 0.44719696044921875, 0.4633331298828125, 0.47946929931640625, 0.49560546875]}, "gradients/encoder.encoder.layers.6.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 0.0, 0.0, 2.0, 2.0, 4.0, 7.0, 5.0, 10.0, 31.0, 83.0, 263.0, 334.0, 173.0, 56.0, 23.0, 7.0, 4.0, 3.0, 4.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-22.919252395629883, -22.401060104370117, -21.88286590576172, -21.364673614501953, -20.846481323242188, -20.32828712463379, -19.810094833374023, -19.291902542114258, -18.77370834350586, -18.255516052246094, -17.737321853637695, -17.21912956237793, -16.700937271118164, -16.182743072509766, -15.66455078125, -15.146357536315918, -14.628165245056152, -14.10997200012207, -13.591779708862305, -13.073586463928223, -12.55539321899414, -12.037200927734375, -11.519007682800293, -11.000814437866211, -10.482622146606445, -9.964428901672363, -9.446236610412598, -8.928043365478516, -8.409850120544434, -7.89165735244751, -7.373464584350586, -6.855271339416504, -6.337078094482422, -5.818885326385498, -5.300692081451416, -4.782499313354492, -4.26430606842041, -3.7461133003234863, -3.2279205322265625, -2.7097275257110596, -2.1915345191955566, -1.6733415126800537, -1.1551486253738403, -0.636955738067627, -0.11876273155212402, 0.3994302749633789, 0.9176230430603027, 1.4358160495758057, 1.9540090560913086, 2.4722020626068115, 2.9903950691223145, 3.5085878372192383, 4.02678108215332, 4.544973850250244, 5.063166618347168, 5.58135986328125, 6.099552631378174, 6.617745399475098, 7.13593864440918, 7.6541314125061035, 8.172324180603027, 8.69051742553711, 9.208709716796875, 9.726902961730957, 10.245096206665039]}, "gradients/encoder.encoder.layers.6.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 3.0, 1.0, 6.0, 4.0, 4.0, 5.0, 7.0, 8.0, 13.0, 5.0, 20.0, 18.0, 14.0, 21.0, 18.0, 36.0, 40.0, 37.0, 34.0, 42.0, 56.0, 47.0, 67.0, 57.0, 52.0, 44.0, 43.0, 58.0, 24.0, 35.0, 21.0, 15.0, 26.0, 30.0, 18.0, 7.0, 20.0, 7.0, 10.0, 12.0, 4.0, 9.0, 5.0, 1.0, 4.0, 1.0, 3.0, 4.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-6.808499336242676, -6.574554920196533, -6.340610504150391, -6.106666564941406, -5.872722148895264, -5.638777732849121, -5.404833793640137, -5.170889377593994, -4.936944961547852, -4.703000545501709, -4.469056129455566, -4.235112190246582, -4.0011677742004395, -3.767223358154297, -3.5332791805267334, -3.29933500289917, -3.0653905868530273, -2.8314461708068848, -2.5975019931793213, -2.363557815551758, -2.1296133995056152, -1.8956691026687622, -1.6617248058319092, -1.4277805089950562, -1.1938362121582031, -0.9598919153213501, -0.7259476184844971, -0.49200332164764404, -0.258059024810791, -0.02411472797393799, 0.20982956886291504, 0.44377386569976807, 0.6777181625366211, 0.9116624593734741, 1.1456067562103271, 1.3795510530471802, 1.6134953498840332, 1.8474396467208862, 2.0813839435577393, 2.3153281211853027, 2.5492725372314453, 2.783216953277588, 3.0171611309051514, 3.251105308532715, 3.4850497245788574, 3.718994140625, 3.9529383182525635, 4.186882495880127, 4.4208269119262695, 4.654771327972412, 4.888715744018555, 5.122659683227539, 5.356604099273682, 5.590548515319824, 5.824492454528809, 6.058436870574951, 6.292381286621094, 6.526325702667236, 6.760270118713379, 6.994214057922363, 7.228158473968506, 7.462102890014648, 7.696046829223633, 7.929991245269775, 8.163935661315918]}, "gradients/encoder.encoder.layers.5.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 1.0, 1.0, 2.0, 1.0, 5.0, 5.0, 9.0, 7.0, 11.0, 20.0, 32.0, 40.0, 81.0, 128.0, 191.0, 422.0, 1039.0, 3308.0, 16675.0, 192017.0, 3529800.0, 419879.0, 23823.0, 4441.0, 1229.0, 484.0, 229.0, 146.0, 79.0, 53.0, 38.0, 38.0, 17.0, 13.0, 9.0, 9.0, 6.0, 3.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 2.0], "bins": [-1.2626953125, -1.2308120727539062, -1.1989288330078125, -1.1670455932617188, -1.135162353515625, -1.1032791137695312, -1.0713958740234375, -1.0395126342773438, -1.00762939453125, -0.9757461547851562, -0.9438629150390625, -0.9119796752929688, -0.880096435546875, -0.8482131958007812, -0.8163299560546875, -0.7844467163085938, -0.7525634765625, -0.7206802368164062, -0.6887969970703125, -0.6569137573242188, -0.625030517578125, -0.5931472778320312, -0.5612640380859375, -0.5293807983398438, -0.49749755859375, -0.46561431884765625, -0.4337310791015625, -0.40184783935546875, -0.369964599609375, -0.33808135986328125, -0.3061981201171875, -0.27431488037109375, -0.242431640625, -0.21054840087890625, -0.1786651611328125, -0.14678192138671875, -0.114898681640625, -0.08301544189453125, -0.0511322021484375, -0.01924896240234375, 0.01263427734375, 0.04451751708984375, 0.0764007568359375, 0.10828399658203125, 0.140167236328125, 0.17205047607421875, 0.2039337158203125, 0.23581695556640625, 0.2677001953125, 0.29958343505859375, 0.3314666748046875, 0.36334991455078125, 0.395233154296875, 0.42711639404296875, 0.4589996337890625, 0.49088287353515625, 0.52276611328125, 0.5546493530273438, 0.5865325927734375, 0.6184158325195312, 0.650299072265625, 0.6821823120117188, 0.7140655517578125, 0.7459487915039062, 0.77783203125]}, "gradients/encoder.encoder.layers.5.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 2.0, 4.0, 2.0, 0.0, 6.0, 6.0, 18.0, 28.0, 26.0, 30.0, 63.0, 62.0, 83.0, 98.0, 103.0, 91.0, 96.0, 85.0, 71.0, 41.0, 38.0, 22.0, 21.0, 8.0, 4.0, 3.0, 1.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.95263671875, -0.9244003295898438, -0.8961639404296875, -0.8679275512695312, -0.839691162109375, -0.8114547729492188, -0.7832183837890625, -0.7549819946289062, -0.72674560546875, -0.6985092163085938, -0.6702728271484375, -0.6420364379882812, -0.613800048828125, -0.5855636596679688, -0.5573272705078125, -0.5290908813476562, -0.5008544921875, -0.47261810302734375, -0.4443817138671875, -0.41614532470703125, -0.387908935546875, -0.35967254638671875, -0.3314361572265625, -0.30319976806640625, -0.27496337890625, -0.24672698974609375, -0.2184906005859375, -0.19025421142578125, -0.162017822265625, -0.13378143310546875, -0.1055450439453125, -0.07730865478515625, -0.049072265625, -0.02083587646484375, 0.0074005126953125, 0.03563690185546875, 0.063873291015625, 0.09210968017578125, 0.1203460693359375, 0.14858245849609375, 0.17681884765625, 0.20505523681640625, 0.2332916259765625, 0.26152801513671875, 0.289764404296875, 0.31800079345703125, 0.3462371826171875, 0.37447357177734375, 0.4027099609375, 0.43094635009765625, 0.4591827392578125, 0.48741912841796875, 0.515655517578125, 0.5438919067382812, 0.5721282958984375, 0.6003646850585938, 0.62860107421875, 0.6568374633789062, 0.6850738525390625, 0.7133102416992188, 0.741546630859375, 0.7697830200195312, 0.7980194091796875, 0.8262557983398438, 0.8544921875]}, "gradients/encoder.encoder.layers.5.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 2.0, 1.0, 4.0, 2.0, 11.0, 22.0, 38.0, 63.0, 138.0, 293.0, 772.0, 3267.0, 29310.0, 2471930.0, 1660037.0, 24659.0, 2753.0, 610.0, 224.0, 81.0, 31.0, 22.0, 8.0, 11.0, 2.0, 1.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-1.9853515625, -1.9344024658203125, -1.883453369140625, -1.8325042724609375, -1.78155517578125, -1.7306060791015625, -1.679656982421875, -1.6287078857421875, -1.5777587890625, -1.5268096923828125, -1.475860595703125, -1.4249114990234375, -1.37396240234375, -1.3230133056640625, -1.272064208984375, -1.2211151123046875, -1.170166015625, -1.1192169189453125, -1.068267822265625, -1.0173187255859375, -0.96636962890625, -0.9154205322265625, -0.864471435546875, -0.8135223388671875, -0.7625732421875, -0.7116241455078125, -0.660675048828125, -0.6097259521484375, -0.55877685546875, -0.5078277587890625, -0.456878662109375, -0.4059295654296875, -0.35498046875, -0.3040313720703125, -0.253082275390625, -0.2021331787109375, -0.15118408203125, -0.1002349853515625, -0.049285888671875, 0.0016632080078125, 0.0526123046875, 0.1035614013671875, 0.154510498046875, 0.2054595947265625, 0.25640869140625, 0.3073577880859375, 0.358306884765625, 0.4092559814453125, 0.460205078125, 0.5111541748046875, 0.562103271484375, 0.6130523681640625, 0.66400146484375, 0.7149505615234375, 0.765899658203125, 0.8168487548828125, 0.8677978515625, 0.9187469482421875, 0.969696044921875, 1.0206451416015625, 1.07159423828125, 1.1225433349609375, 1.173492431640625, 1.2244415283203125, 1.275390625]}, "gradients/encoder.encoder.layers.5.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 3.0, 3.0, 6.0, 9.0, 10.0, 6.0, 12.0, 18.0, 35.0, 48.0, 69.0, 114.0, 304.0, 598.0, 951.0, 865.0, 477.0, 239.0, 106.0, 68.0, 44.0, 34.0, 19.0, 11.0, 12.0, 8.0, 6.0, 3.0, 1.0, 2.0, 2.0, 0.0, 3.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.4091796875, -1.3698883056640625, -1.330596923828125, -1.2913055419921875, -1.25201416015625, -1.2127227783203125, -1.173431396484375, -1.1341400146484375, -1.0948486328125, -1.0555572509765625, -1.016265869140625, -0.9769744873046875, -0.93768310546875, -0.8983917236328125, -0.859100341796875, -0.8198089599609375, -0.780517578125, -0.7412261962890625, -0.701934814453125, -0.6626434326171875, -0.62335205078125, -0.5840606689453125, -0.544769287109375, -0.5054779052734375, -0.4661865234375, -0.4268951416015625, -0.387603759765625, -0.3483123779296875, -0.30902099609375, -0.2697296142578125, -0.230438232421875, -0.1911468505859375, -0.15185546875, -0.1125640869140625, -0.073272705078125, -0.0339813232421875, 0.00531005859375, 0.0446014404296875, 0.083892822265625, 0.1231842041015625, 0.1624755859375, 0.2017669677734375, 0.241058349609375, 0.2803497314453125, 0.31964111328125, 0.3589324951171875, 0.398223876953125, 0.4375152587890625, 0.476806640625, 0.5160980224609375, 0.555389404296875, 0.5946807861328125, 0.63397216796875, 0.6732635498046875, 0.712554931640625, 0.7518463134765625, 0.7911376953125, 0.8304290771484375, 0.869720458984375, 0.9090118408203125, 0.94830322265625, 0.9875946044921875, 1.026885986328125, 1.0661773681640625, 1.10546875]}, "gradients/encoder.encoder.layers.5.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 2.0, 1.0, 0.0, 2.0, 4.0, 10.0, 21.0, 62.0, 183.0, 344.0, 236.0, 103.0, 19.0, 9.0, 3.0, 6.0, 1.0, 1.0, 2.0, 2.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-12.195302963256836, -11.739367485046387, -11.283432960510254, -10.827497482299805, -10.371562957763672, -9.915627479553223, -9.45969295501709, -9.00375747680664, -8.547822952270508, -8.091887474060059, -7.635952949523926, -7.180017948150635, -6.724082946777344, -6.2681474685668945, -5.812212944030762, -5.3562774658203125, -4.9003424644470215, -4.4444074630737305, -3.9884724617004395, -3.5325374603271484, -3.0766024589538574, -2.6206672191619873, -2.1647322177886963, -1.7087972164154053, -1.2528622150421143, -0.7969272136688232, -0.34099215269088745, 0.11494290828704834, 0.5708779096603394, 1.02681303024292, 1.482748031616211, 1.938683032989502, 2.394618034362793, 2.850553035736084, 3.306488037109375, 3.762423038482666, 4.218358039855957, 4.674293518066406, 5.130228042602539, 5.586163520812988, 6.042098045349121, 6.498033046722412, 6.953968048095703, 7.409903049468994, 7.865838050842285, 8.321773529052734, 8.777708053588867, 9.233643531799316, 9.689579010009766, 10.145514488220215, 10.601449012756348, 11.057384490966797, 11.51331901550293, 11.969254493713379, 12.425189018249512, 12.881124496459961, 13.337059020996094, 13.792994499206543, 14.248929023742676, 14.704864501953125, 15.160799026489258, 15.616734504699707, 16.072669982910156, 16.52860450744629, 16.984539031982422]}, "gradients/encoder.encoder.layers.5.final_layer_norm.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0, 1.0, 3.0, 3.0, 6.0, 6.0, 5.0, 6.0, 10.0, 6.0, 19.0, 11.0, 14.0, 19.0, 27.0, 28.0, 43.0, 41.0, 42.0, 56.0, 58.0, 49.0, 66.0, 58.0, 54.0, 55.0, 50.0, 49.0, 40.0, 36.0, 25.0, 17.0, 21.0, 22.0, 14.0, 9.0, 12.0, 8.0, 9.0, 4.0, 2.0, 2.0, 2.0, 4.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.975935935974121, -4.809560775756836, -4.643185615539551, -4.476810932159424, -4.310435771942139, -4.1440606117248535, -3.9776854515075684, -3.811310291290283, -3.644935369491577, -3.478560209274292, -3.312185287475586, -3.145810127258301, -2.9794349670410156, -2.8130600452423096, -2.6466848850250244, -2.4803099632263184, -2.313934803009033, -2.147559642791748, -1.981184720993042, -1.8148095607757568, -1.6484345197677612, -1.4820594787597656, -1.3156843185424805, -1.1493092775344849, -0.9829342365264893, -0.8165591955184937, -0.6501840949058533, -0.4838090240955353, -0.3174339532852173, -0.15105891227722168, 0.015316188335418701, 0.18169128894805908, 0.3480663299560547, 0.5144413709640503, 0.6808164715766907, 0.847191572189331, 1.0135666131973267, 1.1799416542053223, 1.3463168144226074, 1.512691855430603, 1.6790668964385986, 1.8454419374465942, 2.01181697845459, 2.178192138671875, 2.34456729888916, 2.510942220687866, 2.6773173809051514, 2.8436923027038574, 3.0100674629211426, 3.1764426231384277, 3.342817544937134, 3.509192705154419, 3.675567626953125, 3.84194278717041, 4.008317947387695, 4.1746931076049805, 4.341068267822266, 4.507443428039551, 4.673818588256836, 4.840193271636963, 5.006568431854248, 5.172943592071533, 5.339318752288818, 5.5056939125061035, 5.6720685958862305]}, "gradients/encoder.encoder.layers.5.attention.out_proj.weight": {"_type": "histogram", "values": [3.0, 0.0, 0.0, 0.0, 0.0, 3.0, 1.0, 3.0, 6.0, 5.0, 15.0, 15.0, 14.0, 28.0, 41.0, 56.0, 69.0, 127.0, 186.0, 243.0, 385.0, 681.0, 1165.0, 2219.0, 4604.0, 9824.0, 23457.0, 59631.0, 154248.0, 312223.0, 275130.0, 122370.0, 46872.0, 18565.0, 8027.0, 3794.0, 1885.0, 1036.0, 545.0, 389.0, 225.0, 148.0, 104.0, 65.0, 48.0, 30.0, 37.0, 17.0, 8.0, 8.0, 9.0, 3.0, 2.0, 2.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.603515625, -0.58331298828125, -0.5631103515625, -0.54290771484375, -0.522705078125, -0.50250244140625, -0.4822998046875, -0.46209716796875, -0.44189453125, -0.42169189453125, -0.4014892578125, -0.38128662109375, -0.361083984375, -0.34088134765625, -0.3206787109375, -0.30047607421875, -0.2802734375, -0.26007080078125, -0.2398681640625, -0.21966552734375, -0.199462890625, -0.17926025390625, -0.1590576171875, -0.13885498046875, -0.11865234375, -0.09844970703125, -0.0782470703125, -0.05804443359375, -0.037841796875, -0.01763916015625, 0.0025634765625, 0.02276611328125, 0.04296875, 0.06317138671875, 0.0833740234375, 0.10357666015625, 0.123779296875, 0.14398193359375, 0.1641845703125, 0.18438720703125, 0.20458984375, 0.22479248046875, 0.2449951171875, 0.26519775390625, 0.285400390625, 0.30560302734375, 0.3258056640625, 0.34600830078125, 0.3662109375, 0.38641357421875, 0.4066162109375, 0.42681884765625, 0.447021484375, 0.46722412109375, 0.4874267578125, 0.50762939453125, 0.52783203125, 0.54803466796875, 0.5682373046875, 0.58843994140625, 0.608642578125, 0.62884521484375, 0.6490478515625, 0.66925048828125, 0.689453125]}, "gradients/encoder.encoder.layers.5.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 3.0, 5.0, 7.0, 7.0, 16.0, 25.0, 26.0, 42.0, 50.0, 67.0, 72.0, 92.0, 97.0, 104.0, 76.0, 79.0, 63.0, 52.0, 49.0, 30.0, 14.0, 14.0, 4.0, 4.0, 5.0, 0.0, 5.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.87060546875, -0.8449172973632812, -0.8192291259765625, -0.7935409545898438, -0.767852783203125, -0.7421646118164062, -0.7164764404296875, -0.6907882690429688, -0.66510009765625, -0.6394119262695312, -0.6137237548828125, -0.5880355834960938, -0.562347412109375, -0.5366592407226562, -0.5109710693359375, -0.48528289794921875, -0.4595947265625, -0.43390655517578125, -0.4082183837890625, -0.38253021240234375, -0.356842041015625, -0.33115386962890625, -0.3054656982421875, -0.27977752685546875, -0.25408935546875, -0.22840118408203125, -0.2027130126953125, -0.17702484130859375, -0.151336669921875, -0.12564849853515625, -0.0999603271484375, -0.07427215576171875, -0.048583984375, -0.02289581298828125, 0.0027923583984375, 0.02848052978515625, 0.054168701171875, 0.07985687255859375, 0.1055450439453125, 0.13123321533203125, 0.15692138671875, 0.18260955810546875, 0.2082977294921875, 0.23398590087890625, 0.259674072265625, 0.28536224365234375, 0.3110504150390625, 0.33673858642578125, 0.3624267578125, 0.38811492919921875, 0.4138031005859375, 0.43949127197265625, 0.465179443359375, 0.49086761474609375, 0.5165557861328125, 0.5422439575195312, 0.56793212890625, 0.5936203002929688, 0.6193084716796875, 0.6449966430664062, 0.670684814453125, 0.6963729858398438, 0.7220611572265625, 0.7477493286132812, 0.7734375]}, "gradients/encoder.encoder.layers.5.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 1.0, 0.0, 2.0, 1.0, 2.0, 2.0, 0.0, 5.0, 1.0, 8.0, 6.0, 15.0, 16.0, 13.0, 27.0, 31.0, 47.0, 59.0, 79.0, 130.0, 169.0, 242.0, 292.0, 478.0, 730.0, 1209.0, 2458.0, 7311.0, 42912.0, 551185.0, 392141.0, 36717.0, 6575.0, 2326.0, 1181.0, 627.0, 443.0, 316.0, 222.0, 146.0, 115.0, 83.0, 61.0, 39.0, 39.0, 28.0, 16.0, 20.0, 11.0, 7.0, 10.0, 5.0, 6.0, 1.0, 5.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-1.5517578125, -1.5046844482421875, -1.457611083984375, -1.4105377197265625, -1.36346435546875, -1.3163909912109375, -1.269317626953125, -1.2222442626953125, -1.1751708984375, -1.1280975341796875, -1.081024169921875, -1.0339508056640625, -0.98687744140625, -0.9398040771484375, -0.892730712890625, -0.8456573486328125, -0.798583984375, -0.7515106201171875, -0.704437255859375, -0.6573638916015625, -0.61029052734375, -0.5632171630859375, -0.516143798828125, -0.4690704345703125, -0.4219970703125, -0.3749237060546875, -0.327850341796875, -0.2807769775390625, -0.23370361328125, -0.1866302490234375, -0.139556884765625, -0.0924835205078125, -0.04541015625, 0.0016632080078125, 0.048736572265625, 0.0958099365234375, 0.14288330078125, 0.1899566650390625, 0.237030029296875, 0.2841033935546875, 0.3311767578125, 0.3782501220703125, 0.425323486328125, 0.4723968505859375, 0.51947021484375, 0.5665435791015625, 0.613616943359375, 0.6606903076171875, 0.707763671875, 0.7548370361328125, 0.801910400390625, 0.8489837646484375, 0.89605712890625, 0.9431304931640625, 0.990203857421875, 1.0372772216796875, 1.0843505859375, 1.1314239501953125, 1.178497314453125, 1.2255706787109375, 1.27264404296875, 1.3197174072265625, 1.366790771484375, 1.4138641357421875, 1.4609375]}, "gradients/encoder.encoder.layers.5.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 5.0, 3.0, 8.0, 7.0, 9.0, 5.0, 13.0, 12.0, 7.0, 11.0, 16.0, 20.0, 18.0, 14.0, 25.0, 32.0, 33.0, 33.0, 49.0, 58.0, 43.0, 43.0, 39.0, 42.0, 49.0, 45.0, 48.0, 37.0, 38.0, 39.0, 27.0, 17.0, 24.0, 17.0, 20.0, 19.0, 14.0, 16.0, 13.0, 7.0, 6.0, 10.0, 2.0, 8.0, 4.0, 2.0, 1.0, 5.0, 1.0, 1.0, 0.0, 3.0, 1.0], "bins": [-1.94140625, -1.8848114013671875, -1.828216552734375, -1.7716217041015625, -1.71502685546875, -1.6584320068359375, -1.601837158203125, -1.5452423095703125, -1.4886474609375, -1.4320526123046875, -1.375457763671875, -1.3188629150390625, -1.26226806640625, -1.2056732177734375, -1.149078369140625, -1.0924835205078125, -1.035888671875, -0.9792938232421875, -0.922698974609375, -0.8661041259765625, -0.80950927734375, -0.7529144287109375, -0.696319580078125, -0.6397247314453125, -0.5831298828125, -0.5265350341796875, -0.469940185546875, -0.4133453369140625, -0.35675048828125, -0.3001556396484375, -0.243560791015625, -0.1869659423828125, -0.13037109375, -0.0737762451171875, -0.017181396484375, 0.0394134521484375, 0.09600830078125, 0.1526031494140625, 0.209197998046875, 0.2657928466796875, 0.3223876953125, 0.3789825439453125, 0.435577392578125, 0.4921722412109375, 0.54876708984375, 0.6053619384765625, 0.661956787109375, 0.7185516357421875, 0.775146484375, 0.8317413330078125, 0.888336181640625, 0.9449310302734375, 1.00152587890625, 1.0581207275390625, 1.114715576171875, 1.1713104248046875, 1.2279052734375, 1.2845001220703125, 1.341094970703125, 1.3976898193359375, 1.45428466796875, 1.5108795166015625, 1.567474365234375, 1.6240692138671875, 1.6806640625]}, "gradients/encoder.encoder.layers.5.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 5.0, 3.0, 2.0, 5.0, 5.0, 7.0, 18.0, 14.0, 28.0, 38.0, 52.0, 99.0, 179.0, 271.0, 678.0, 1843.0, 5841.0, 33848.0, 668913.0, 307529.0, 22064.0, 4465.0, 1363.0, 621.0, 317.0, 133.0, 79.0, 44.0, 37.0, 21.0, 18.0, 3.0, 10.0, 6.0, 4.0, 2.0, 1.0, 0.0, 2.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.59912109375, -0.5809097290039062, -0.5626983642578125, -0.5444869995117188, -0.526275634765625, -0.5080642700195312, -0.4898529052734375, -0.47164154052734375, -0.45343017578125, -0.43521881103515625, -0.4170074462890625, -0.39879608154296875, -0.380584716796875, -0.36237335205078125, -0.3441619873046875, -0.32595062255859375, -0.3077392578125, -0.28952789306640625, -0.2713165283203125, -0.25310516357421875, -0.234893798828125, -0.21668243408203125, -0.1984710693359375, -0.18025970458984375, -0.16204833984375, -0.14383697509765625, -0.1256256103515625, -0.10741424560546875, -0.089202880859375, -0.07099151611328125, -0.0527801513671875, -0.03456878662109375, -0.016357421875, 0.00185394287109375, 0.0200653076171875, 0.03827667236328125, 0.056488037109375, 0.07469940185546875, 0.0929107666015625, 0.11112213134765625, 0.12933349609375, 0.14754486083984375, 0.1657562255859375, 0.18396759033203125, 0.202178955078125, 0.22039031982421875, 0.2386016845703125, 0.25681304931640625, 0.2750244140625, 0.29323577880859375, 0.3114471435546875, 0.32965850830078125, 0.347869873046875, 0.36608123779296875, 0.3842926025390625, 0.40250396728515625, 0.42071533203125, 0.43892669677734375, 0.4571380615234375, 0.47534942626953125, 0.493560791015625, 0.5117721557617188, 0.5299835205078125, 0.5481948852539062, 0.56640625]}, "gradients/encoder.encoder.layers.5.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 3.0, 2.0, 3.0, 4.0, 7.0, 6.0, 9.0, 5.0, 18.0, 23.0, 19.0, 35.0, 38.0, 49.0, 63.0, 91.0, 108.0, 118.0, 79.0, 84.0, 59.0, 33.0, 41.0, 19.0, 22.0, 19.0, 22.0, 12.0, 8.0, 2.0, 2.0, 4.0, 1.0, 2.0, 1.0, 3.0, 2.0, 1.0, 0.0, 1.0, 2.0], "bins": [-0.00011360645294189453, -0.00011077430099248886, -0.00010794214904308319, -0.00010510999709367752, -0.00010227784514427185, -9.944569319486618e-05, -9.661354124546051e-05, -9.378138929605484e-05, -9.094923734664917e-05, -8.81170853972435e-05, -8.528493344783783e-05, -8.245278149843216e-05, -7.962062954902649e-05, -7.678847759962082e-05, -7.395632565021515e-05, -7.112417370080948e-05, -6.829202175140381e-05, -6.545986980199814e-05, -6.262771785259247e-05, -5.97955659031868e-05, -5.696341395378113e-05, -5.413126200437546e-05, -5.129911005496979e-05, -4.846695810556412e-05, -4.563480615615845e-05, -4.280265420675278e-05, -3.997050225734711e-05, -3.713835030794144e-05, -3.4306198358535767e-05, -3.1474046409130096e-05, -2.8641894459724426e-05, -2.5809742510318756e-05, -2.2977590560913086e-05, -2.0145438611507416e-05, -1.7313286662101746e-05, -1.4481134712696075e-05, -1.1648982763290405e-05, -8.816830813884735e-06, -5.984678864479065e-06, -3.1525269150733948e-06, -3.203749656677246e-07, 2.5117769837379456e-06, 5.343928933143616e-06, 8.176080882549286e-06, 1.1008232831954956e-05, 1.3840384781360626e-05, 1.6672536730766296e-05, 1.9504688680171967e-05, 2.2336840629577637e-05, 2.5168992578983307e-05, 2.8001144528388977e-05, 3.083329647779465e-05, 3.366544842720032e-05, 3.649760037660599e-05, 3.932975232601166e-05, 4.216190427541733e-05, 4.4994056224823e-05, 4.782620817422867e-05, 5.065836012363434e-05, 5.349051207304001e-05, 5.632266402244568e-05, 5.915481597185135e-05, 6.198696792125702e-05, 6.481911987066269e-05, 6.765127182006836e-05]}, "gradients/encoder.encoder.layers.5.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0, 2.0, 1.0, 1.0, 4.0, 6.0, 2.0, 5.0, 13.0, 18.0, 37.0, 44.0, 70.0, 112.0, 207.0, 378.0, 710.0, 1893.0, 6822.0, 56294.0, 863266.0, 104752.0, 9680.0, 2350.0, 883.0, 421.0, 226.0, 137.0, 75.0, 51.0, 43.0, 21.0, 12.0, 13.0, 6.0, 3.0, 2.0, 1.0, 2.0, 1.0, 4.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.68994140625, -0.670562744140625, -0.65118408203125, -0.631805419921875, -0.6124267578125, -0.593048095703125, -0.57366943359375, -0.554290771484375, -0.534912109375, -0.515533447265625, -0.49615478515625, -0.476776123046875, -0.4573974609375, -0.438018798828125, -0.41864013671875, -0.399261474609375, -0.3798828125, -0.360504150390625, -0.34112548828125, -0.321746826171875, -0.3023681640625, -0.282989501953125, -0.26361083984375, -0.244232177734375, -0.224853515625, -0.205474853515625, -0.18609619140625, -0.166717529296875, -0.1473388671875, -0.127960205078125, -0.10858154296875, -0.089202880859375, -0.06982421875, -0.050445556640625, -0.03106689453125, -0.011688232421875, 0.0076904296875, 0.027069091796875, 0.04644775390625, 0.065826416015625, 0.085205078125, 0.104583740234375, 0.12396240234375, 0.143341064453125, 0.1627197265625, 0.182098388671875, 0.20147705078125, 0.220855712890625, 0.240234375, 0.259613037109375, 0.27899169921875, 0.298370361328125, 0.3177490234375, 0.337127685546875, 0.35650634765625, 0.375885009765625, 0.395263671875, 0.414642333984375, 0.43402099609375, 0.453399658203125, 0.4727783203125, 0.492156982421875, 0.51153564453125, 0.530914306640625, 0.55029296875]}, "gradients/encoder.encoder.layers.5.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 2.0, 3.0, 4.0, 5.0, 3.0, 7.0, 9.0, 12.0, 18.0, 19.0, 25.0, 34.0, 47.0, 64.0, 98.0, 107.0, 104.0, 107.0, 108.0, 59.0, 47.0, 31.0, 26.0, 15.0, 18.0, 9.0, 8.0, 9.0, 5.0, 2.0, 3.0, 2.0, 2.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.6044921875, -0.5864791870117188, -0.5684661865234375, -0.5504531860351562, -0.532440185546875, -0.5144271850585938, -0.4964141845703125, -0.47840118408203125, -0.46038818359375, -0.44237518310546875, -0.4243621826171875, -0.40634918212890625, -0.388336181640625, -0.37032318115234375, -0.3523101806640625, -0.33429718017578125, -0.3162841796875, -0.29827117919921875, -0.2802581787109375, -0.26224517822265625, -0.244232177734375, -0.22621917724609375, -0.2082061767578125, -0.19019317626953125, -0.17218017578125, -0.15416717529296875, -0.1361541748046875, -0.11814117431640625, -0.100128173828125, -0.08211517333984375, -0.0641021728515625, -0.04608917236328125, -0.028076171875, -0.01006317138671875, 0.0079498291015625, 0.02596282958984375, 0.043975830078125, 0.06198883056640625, 0.0800018310546875, 0.09801483154296875, 0.11602783203125, 0.13404083251953125, 0.1520538330078125, 0.17006683349609375, 0.188079833984375, 0.20609283447265625, 0.2241058349609375, 0.24211883544921875, 0.2601318359375, 0.27814483642578125, 0.2961578369140625, 0.31417083740234375, 0.332183837890625, 0.35019683837890625, 0.3682098388671875, 0.38622283935546875, 0.40423583984375, 0.42224884033203125, 0.4402618408203125, 0.45827484130859375, 0.476287841796875, 0.49430084228515625, 0.5123138427734375, 0.5303268432617188, 0.54833984375]}, "gradients/encoder.encoder.layers.5.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 3.0, 1.0, 0.0, 2.0, 3.0, 2.0, 2.0, 4.0, 6.0, 7.0, 39.0, 77.0, 212.0, 346.0, 185.0, 66.0, 29.0, 18.0, 5.0, 4.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-22.19564437866211, -21.73833465576172, -21.281023025512695, -20.823713302612305, -20.36640167236328, -19.90909194946289, -19.451780319213867, -18.994470596313477, -18.537158966064453, -18.079849243164062, -17.62253761291504, -17.16522789001465, -16.707916259765625, -16.250606536865234, -15.793295860290527, -15.33598518371582, -14.87867546081543, -14.421364784240723, -13.964054107666016, -13.506743431091309, -13.049432754516602, -12.592123031616211, -12.134812355041504, -11.677501678466797, -11.22019100189209, -10.762880325317383, -10.305569648742676, -9.848258972167969, -9.390949249267578, -8.933637619018555, -8.476327896118164, -8.019017219543457, -7.561707019805908, -7.104396343231201, -6.647086143493652, -6.189775466918945, -5.732464790344238, -5.275154113769531, -4.817843437194824, -4.360533237457275, -3.9032225608825684, -3.4459118843078613, -2.9886014461517334, -2.5312910079956055, -2.0739803314208984, -1.6166696548461914, -1.1593592166900635, -0.7020487785339355, -0.24473810195922852, 0.21257245540618896, 0.6698830127716064, 1.127193570137024, 1.5845041275024414, 2.0418148040771484, 2.4991252422332764, 2.9564356803894043, 3.4137463569641113, 3.8710570335388184, 4.328367233276367, 4.785677909851074, 5.242988586425781, 5.700299263000488, 6.157609939575195, 6.614920139312744, 7.072230815887451]}, "gradients/encoder.encoder.layers.5.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 4.0, 1.0, 5.0, 1.0, 3.0, 0.0, 4.0, 7.0, 11.0, 10.0, 7.0, 17.0, 18.0, 14.0, 15.0, 15.0, 18.0, 33.0, 32.0, 32.0, 24.0, 41.0, 39.0, 45.0, 53.0, 84.0, 53.0, 62.0, 53.0, 41.0, 34.0, 30.0, 36.0, 28.0, 21.0, 24.0, 15.0, 21.0, 12.0, 9.0, 8.0, 9.0, 7.0, 5.0, 2.0, 6.0, 2.0, 1.0, 3.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.471921920776367, -6.248087406158447, -6.024252891540527, -5.800418376922607, -5.5765838623046875, -5.352749824523926, -5.128914833068848, -4.905080795288086, -4.681246280670166, -4.457411766052246, -4.233577251434326, -4.009742736816406, -3.7859084606170654, -3.5620739459991455, -3.3382394313812256, -3.1144051551818848, -2.8905704021453857, -2.666735887527466, -2.442901372909546, -2.219067096710205, -1.9952325820922852, -1.7713980674743652, -1.5475635528564453, -1.323729157447815, -1.099894642829895, -0.8760601878166199, -0.6522257328033447, -0.4283912181854248, -0.20455676317214966, 0.01927769184112549, 0.2431122064590454, 0.4669466018676758, 0.6907811164855957, 0.9146155714988708, 1.138450026512146, 1.362284541130066, 1.5861189365386963, 1.8099534511566162, 2.033787965774536, 2.257622241973877, 2.481456756591797, 2.705291271209717, 2.9291257858276367, 3.1529603004455566, 3.3767945766448975, 3.6006290912628174, 3.8244636058807373, 4.048297882080078, 4.272132873535156, 4.495967388153076, 4.719801902770996, 4.943636417388916, 5.167470932006836, 5.391304969787598, 5.615139961242676, 5.8389739990234375, 6.062808513641357, 6.286643028259277, 6.510477542877197, 6.734312057495117, 6.958146572113037, 7.181981086730957, 7.405815124511719, 7.629649639129639, 7.853484153747559]}, "gradients/encoder.encoder.layers.4.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 1.0, 6.0, 3.0, 5.0, 7.0, 13.0, 9.0, 24.0, 43.0, 73.0, 122.0, 225.0, 473.0, 1012.0, 2842.0, 9793.0, 48000.0, 508167.0, 3087990.0, 477593.0, 44336.0, 8839.0, 2690.0, 976.0, 463.0, 244.0, 146.0, 66.0, 45.0, 30.0, 14.0, 18.0, 9.0, 4.0, 2.0, 2.0, 2.0, 0.0, 2.0, 3.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.73779296875, -0.7170028686523438, -0.6962127685546875, -0.6754226684570312, -0.654632568359375, -0.6338424682617188, -0.6130523681640625, -0.5922622680664062, -0.57147216796875, -0.5506820678710938, -0.5298919677734375, -0.5091018676757812, -0.488311767578125, -0.46752166748046875, -0.4467315673828125, -0.42594146728515625, -0.4051513671875, -0.38436126708984375, -0.3635711669921875, -0.34278106689453125, -0.321990966796875, -0.30120086669921875, -0.2804107666015625, -0.25962066650390625, -0.23883056640625, -0.21804046630859375, -0.1972503662109375, -0.17646026611328125, -0.155670166015625, -0.13488006591796875, -0.1140899658203125, -0.09329986572265625, -0.072509765625, -0.05171966552734375, -0.0309295654296875, -0.01013946533203125, 0.010650634765625, 0.03144073486328125, 0.0522308349609375, 0.07302093505859375, 0.09381103515625, 0.11460113525390625, 0.1353912353515625, 0.15618133544921875, 0.176971435546875, 0.19776153564453125, 0.2185516357421875, 0.23934173583984375, 0.2601318359375, 0.28092193603515625, 0.3017120361328125, 0.32250213623046875, 0.343292236328125, 0.36408233642578125, 0.3848724365234375, 0.40566253662109375, 0.42645263671875, 0.44724273681640625, 0.4680328369140625, 0.48882293701171875, 0.509613037109375, 0.5304031372070312, 0.5511932373046875, 0.5719833374023438, 0.5927734375]}, "gradients/encoder.encoder.layers.4.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 3.0, 3.0, 4.0, 5.0, 9.0, 4.0, 11.0, 30.0, 22.0, 39.0, 53.0, 66.0, 77.0, 90.0, 85.0, 105.0, 75.0, 84.0, 63.0, 54.0, 40.0, 30.0, 19.0, 14.0, 8.0, 7.0, 1.0, 8.0, 2.0, 0.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.87548828125, -0.8504791259765625, -0.825469970703125, -0.8004608154296875, -0.77545166015625, -0.7504425048828125, -0.725433349609375, -0.7004241943359375, -0.6754150390625, -0.6504058837890625, -0.625396728515625, -0.6003875732421875, -0.57537841796875, -0.5503692626953125, -0.525360107421875, -0.5003509521484375, -0.475341796875, -0.4503326416015625, -0.425323486328125, -0.4003143310546875, -0.37530517578125, -0.3502960205078125, -0.325286865234375, -0.3002777099609375, -0.2752685546875, -0.2502593994140625, -0.225250244140625, -0.2002410888671875, -0.17523193359375, -0.1502227783203125, -0.125213623046875, -0.1002044677734375, -0.0751953125, -0.0501861572265625, -0.025177001953125, -0.0001678466796875, 0.02484130859375, 0.0498504638671875, 0.074859619140625, 0.0998687744140625, 0.1248779296875, 0.1498870849609375, 0.174896240234375, 0.1999053955078125, 0.22491455078125, 0.2499237060546875, 0.274932861328125, 0.2999420166015625, 0.324951171875, 0.3499603271484375, 0.374969482421875, 0.3999786376953125, 0.42498779296875, 0.4499969482421875, 0.475006103515625, 0.5000152587890625, 0.5250244140625, 0.5500335693359375, 0.575042724609375, 0.6000518798828125, 0.62506103515625, 0.6500701904296875, 0.675079345703125, 0.7000885009765625, 0.72509765625]}, "gradients/encoder.encoder.layers.4.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 2.0, 3.0, 4.0, 6.0, 10.0, 36.0, 55.0, 91.0, 185.0, 532.0, 2272.0, 25967.0, 3783608.0, 371839.0, 8048.0, 1125.0, 310.0, 107.0, 47.0, 24.0, 9.0, 8.0, 5.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.1953125, -2.1372222900390625, -2.079132080078125, -2.0210418701171875, -1.96295166015625, -1.9048614501953125, -1.846771240234375, -1.7886810302734375, -1.7305908203125, -1.6725006103515625, -1.614410400390625, -1.5563201904296875, -1.49822998046875, -1.4401397705078125, -1.382049560546875, -1.3239593505859375, -1.265869140625, -1.2077789306640625, -1.149688720703125, -1.0915985107421875, -1.03350830078125, -0.9754180908203125, -0.917327880859375, -0.8592376708984375, -0.8011474609375, -0.7430572509765625, -0.684967041015625, -0.6268768310546875, -0.56878662109375, -0.5106964111328125, -0.452606201171875, -0.3945159912109375, -0.33642578125, -0.2783355712890625, -0.220245361328125, -0.1621551513671875, -0.10406494140625, -0.0459747314453125, 0.012115478515625, 0.0702056884765625, 0.1282958984375, 0.1863861083984375, 0.244476318359375, 0.3025665283203125, 0.36065673828125, 0.4187469482421875, 0.476837158203125, 0.5349273681640625, 0.593017578125, 0.6511077880859375, 0.709197998046875, 0.7672882080078125, 0.82537841796875, 0.8834686279296875, 0.941558837890625, 0.9996490478515625, 1.0577392578125, 1.1158294677734375, 1.173919677734375, 1.2320098876953125, 1.29010009765625, 1.3481903076171875, 1.406280517578125, 1.4643707275390625, 1.5224609375]}, "gradients/encoder.encoder.layers.4.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 2.0, 2.0, 3.0, 4.0, 6.0, 2.0, 8.0, 15.0, 19.0, 26.0, 53.0, 88.0, 205.0, 520.0, 1063.0, 1024.0, 485.0, 255.0, 124.0, 65.0, 42.0, 24.0, 17.0, 13.0, 6.0, 6.0, 2.0, 2.0, 0.0, 2.0, 2.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-1.6474609375, -1.6063766479492188, -1.5652923583984375, -1.5242080688476562, -1.483123779296875, -1.4420394897460938, -1.4009552001953125, -1.3598709106445312, -1.31878662109375, -1.2777023315429688, -1.2366180419921875, -1.1955337524414062, -1.154449462890625, -1.1133651733398438, -1.0722808837890625, -1.0311965942382812, -0.9901123046875, -0.9490280151367188, -0.9079437255859375, -0.8668594360351562, -0.825775146484375, -0.7846908569335938, -0.7436065673828125, -0.7025222778320312, -0.66143798828125, -0.6203536987304688, -0.5792694091796875, -0.5381851196289062, -0.497100830078125, -0.45601654052734375, -0.4149322509765625, -0.37384796142578125, -0.332763671875, -0.29167938232421875, -0.2505950927734375, -0.20951080322265625, -0.168426513671875, -0.12734222412109375, -0.0862579345703125, -0.04517364501953125, -0.00408935546875, 0.03699493408203125, 0.0780792236328125, 0.11916351318359375, 0.160247802734375, 0.20133209228515625, 0.2424163818359375, 0.28350067138671875, 0.3245849609375, 0.36566925048828125, 0.4067535400390625, 0.44783782958984375, 0.488922119140625, 0.5300064086914062, 0.5710906982421875, 0.6121749877929688, 0.65325927734375, 0.6943435668945312, 0.7354278564453125, 0.7765121459960938, 0.817596435546875, 0.8586807250976562, 0.8997650146484375, 0.9408493041992188, 0.98193359375]}, "gradients/encoder.encoder.layers.4.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 0.0, 2.0, 5.0, 13.0, 44.0, 79.0, 154.0, 239.0, 242.0, 118.0, 38.0, 30.0, 7.0, 10.0, 5.0, 3.0, 2.0, 2.0, 3.0, 5.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0], "bins": [-9.334019660949707, -9.047407150268555, -8.760793685913086, -8.474181175231934, -8.187568664550781, -7.900955677032471, -7.61434268951416, -7.327730178833008, -7.041117191314697, -6.754504203796387, -6.467891693115234, -6.181278705596924, -5.894665718078613, -5.608053207397461, -5.32144021987915, -5.03482723236084, -4.7482147216796875, -4.461601734161377, -4.174989223480225, -3.888376235961914, -3.6017634868621826, -3.315150737762451, -3.0285377502441406, -2.741925001144409, -2.4553122520446777, -2.1686995029449463, -1.8820866346359253, -1.5954737663269043, -1.3088610172271729, -1.0222482681274414, -0.7356353998184204, -0.4490225315093994, -0.16241073608398438, 0.12420207262039185, 0.41081488132476807, 0.6974276900291443, 0.9840404987335205, 1.270653247833252, 1.557266116142273, 1.843878984451294, 2.1304917335510254, 2.417104482650757, 2.7037172317504883, 2.990330219268799, 3.2769429683685303, 3.5635557174682617, 3.8501687049865723, 4.136781692504883, 4.423394203186035, 4.710007190704346, 4.996619701385498, 5.283232688903809, 5.569845199584961, 5.8564581871032715, 6.143071174621582, 6.429683685302734, 6.716296672821045, 7.0029096603393555, 7.289522171020508, 7.576135158538818, 7.862748146057129, 8.149360656738281, 8.435973167419434, 8.722586631774902, 9.009199142456055]}, "gradients/encoder.encoder.layers.4.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 3.0, 1.0, 2.0, 3.0, 3.0, 10.0, 5.0, 7.0, 20.0, 14.0, 16.0, 16.0, 19.0, 28.0, 28.0, 37.0, 35.0, 44.0, 38.0, 46.0, 47.0, 62.0, 51.0, 46.0, 55.0, 44.0, 48.0, 38.0, 40.0, 30.0, 29.0, 19.0, 32.0, 19.0, 12.0, 11.0, 16.0, 4.0, 10.0, 3.0, 6.0, 8.0, 2.0, 5.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-4.3766865730285645, -4.241964340209961, -4.107242107391357, -3.972520112991333, -3.8377978801727295, -3.703075647354126, -3.5683536529541016, -3.433631420135498, -3.2989091873168945, -3.164186954498291, -3.0294647216796875, -2.894742727279663, -2.7600204944610596, -2.625298261642456, -2.4905762672424316, -2.355854034423828, -2.2211318016052246, -2.086409568786621, -1.9516874551773071, -1.8169653415679932, -1.6822431087493896, -1.5475208759307861, -1.4127987623214722, -1.2780766487121582, -1.1433544158935547, -1.0086321830749512, -0.8739100694656372, -0.7391878962516785, -0.6044657230377197, -0.469743549823761, -0.33502137660980225, -0.2002992033958435, -0.06557655334472656, 0.06914561986923218, 0.20386779308319092, 0.33858996629714966, 0.4733121395111084, 0.6080343127250671, 0.7427564859390259, 0.8774786591529846, 1.0122008323669434, 1.1469230651855469, 1.2816451787948608, 1.4163672924041748, 1.5510895252227783, 1.6858117580413818, 1.8205338716506958, 1.9552559852600098, 2.0899782180786133, 2.224700450897217, 2.3594226837158203, 2.4941446781158447, 2.6288669109344482, 2.7635891437530518, 2.898311138153076, 3.0330333709716797, 3.167755603790283, 3.3024778366088867, 3.4372000694274902, 3.5719220638275146, 3.706644296646118, 3.8413665294647217, 3.976088523864746, 4.11081075668335, 4.245532989501953]}, "gradients/encoder.encoder.layers.4.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 5.0, 1.0, 2.0, 9.0, 15.0, 20.0, 43.0, 45.0, 88.0, 160.0, 273.0, 607.0, 1091.0, 2639.0, 6787.0, 20665.0, 80607.0, 340930.0, 437857.0, 113879.0, 28102.0, 8621.0, 3290.0, 1401.0, 657.0, 323.0, 179.0, 101.0, 60.0, 38.0, 27.0, 15.0, 7.0, 9.0, 1.0, 6.0, 0.0, 2.0, 1.0, 0.0, 2.0, 0.0, 3.0, 0.0, 2.0], "bins": [-1.05859375, -1.0300750732421875, -1.001556396484375, -0.9730377197265625, -0.94451904296875, -0.9160003662109375, -0.887481689453125, -0.8589630126953125, -0.8304443359375, -0.8019256591796875, -0.773406982421875, -0.7448883056640625, -0.71636962890625, -0.6878509521484375, -0.659332275390625, -0.6308135986328125, -0.602294921875, -0.5737762451171875, -0.545257568359375, -0.5167388916015625, -0.48822021484375, -0.4597015380859375, -0.431182861328125, -0.4026641845703125, -0.3741455078125, -0.3456268310546875, -0.317108154296875, -0.2885894775390625, -0.26007080078125, -0.2315521240234375, -0.203033447265625, -0.1745147705078125, -0.14599609375, -0.1174774169921875, -0.088958740234375, -0.0604400634765625, -0.03192138671875, -0.0034027099609375, 0.025115966796875, 0.0536346435546875, 0.0821533203125, 0.1106719970703125, 0.139190673828125, 0.1677093505859375, 0.19622802734375, 0.2247467041015625, 0.253265380859375, 0.2817840576171875, 0.310302734375, 0.3388214111328125, 0.367340087890625, 0.3958587646484375, 0.42437744140625, 0.4528961181640625, 0.481414794921875, 0.5099334716796875, 0.5384521484375, 0.5669708251953125, 0.595489501953125, 0.6240081787109375, 0.65252685546875, 0.6810455322265625, 0.709564208984375, 0.7380828857421875, 0.7666015625]}, "gradients/encoder.encoder.layers.4.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 7.0, 4.0, 7.0, 9.0, 8.0, 19.0, 28.0, 28.0, 38.0, 54.0, 60.0, 82.0, 110.0, 78.0, 86.0, 85.0, 56.0, 83.0, 43.0, 48.0, 22.0, 21.0, 12.0, 9.0, 7.0, 3.0, 4.0, 0.0, 1.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.85400390625, -0.8294296264648438, -0.8048553466796875, -0.7802810668945312, -0.755706787109375, -0.7311325073242188, -0.7065582275390625, -0.6819839477539062, -0.65740966796875, -0.6328353881835938, -0.6082611083984375, -0.5836868286132812, -0.559112548828125, -0.5345382690429688, -0.5099639892578125, -0.48538970947265625, -0.4608154296875, -0.43624114990234375, -0.4116668701171875, -0.38709259033203125, -0.362518310546875, -0.33794403076171875, -0.3133697509765625, -0.28879547119140625, -0.26422119140625, -0.23964691162109375, -0.2150726318359375, -0.19049835205078125, -0.165924072265625, -0.14134979248046875, -0.1167755126953125, -0.09220123291015625, -0.067626953125, -0.04305267333984375, -0.0184783935546875, 0.00609588623046875, 0.030670166015625, 0.05524444580078125, 0.0798187255859375, 0.10439300537109375, 0.12896728515625, 0.15354156494140625, 0.1781158447265625, 0.20269012451171875, 0.227264404296875, 0.25183868408203125, 0.2764129638671875, 0.30098724365234375, 0.3255615234375, 0.35013580322265625, 0.3747100830078125, 0.39928436279296875, 0.423858642578125, 0.44843292236328125, 0.4730072021484375, 0.49758148193359375, 0.52215576171875, 0.5467300415039062, 0.5713043212890625, 0.5958786010742188, 0.620452880859375, 0.6450271606445312, 0.6696014404296875, 0.6941757202148438, 0.71875]}, "gradients/encoder.encoder.layers.4.attention.v_proj.weight": {"_type": "histogram", "values": [3.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 3.0, 6.0, 5.0, 12.0, 16.0, 26.0, 29.0, 29.0, 62.0, 72.0, 125.0, 215.0, 350.0, 558.0, 1205.0, 3399.0, 22804.0, 630962.0, 365553.0, 17389.0, 3058.0, 1160.0, 605.0, 341.0, 203.0, 130.0, 77.0, 53.0, 40.0, 32.0, 10.0, 9.0, 9.0, 5.0, 5.0, 1.0, 4.0, 2.0, 0.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.5732421875, -1.5101776123046875, -1.447113037109375, -1.3840484619140625, -1.32098388671875, -1.2579193115234375, -1.194854736328125, -1.1317901611328125, -1.0687255859375, -1.0056610107421875, -0.942596435546875, -0.8795318603515625, -0.81646728515625, -0.7534027099609375, -0.690338134765625, -0.6272735595703125, -0.564208984375, -0.5011444091796875, -0.438079833984375, -0.3750152587890625, -0.31195068359375, -0.2488861083984375, -0.185821533203125, -0.1227569580078125, -0.0596923828125, 0.0033721923828125, 0.066436767578125, 0.1295013427734375, 0.19256591796875, 0.2556304931640625, 0.318695068359375, 0.3817596435546875, 0.44482421875, 0.5078887939453125, 0.570953369140625, 0.6340179443359375, 0.69708251953125, 0.7601470947265625, 0.823211669921875, 0.8862762451171875, 0.9493408203125, 1.0124053955078125, 1.075469970703125, 1.1385345458984375, 1.20159912109375, 1.2646636962890625, 1.327728271484375, 1.3907928466796875, 1.453857421875, 1.5169219970703125, 1.579986572265625, 1.6430511474609375, 1.70611572265625, 1.7691802978515625, 1.832244873046875, 1.8953094482421875, 1.9583740234375, 2.0214385986328125, 2.084503173828125, 2.1475677490234375, 2.21063232421875, 2.2736968994140625, 2.336761474609375, 2.3998260498046875, 2.462890625]}, "gradients/encoder.encoder.layers.4.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 2.0, 0.0, 1.0, 1.0, 4.0, 4.0, 4.0, 1.0, 10.0, 7.0, 14.0, 7.0, 17.0, 21.0, 18.0, 36.0, 25.0, 22.0, 33.0, 36.0, 30.0, 28.0, 53.0, 50.0, 59.0, 57.0, 57.0, 61.0, 50.0, 33.0, 35.0, 33.0, 42.0, 23.0, 33.0, 20.0, 22.0, 14.0, 11.0, 13.0, 9.0, 6.0, 3.0, 3.0, 2.0, 2.0, 3.0, 2.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.6083984375, -1.5524749755859375, -1.496551513671875, -1.4406280517578125, -1.38470458984375, -1.3287811279296875, -1.272857666015625, -1.2169342041015625, -1.1610107421875, -1.1050872802734375, -1.049163818359375, -0.9932403564453125, -0.93731689453125, -0.8813934326171875, -0.825469970703125, -0.7695465087890625, -0.713623046875, -0.6576995849609375, -0.601776123046875, -0.5458526611328125, -0.48992919921875, -0.4340057373046875, -0.378082275390625, -0.3221588134765625, -0.2662353515625, -0.2103118896484375, -0.154388427734375, -0.0984649658203125, -0.04254150390625, 0.0133819580078125, 0.069305419921875, 0.1252288818359375, 0.18115234375, 0.2370758056640625, 0.292999267578125, 0.3489227294921875, 0.40484619140625, 0.4607696533203125, 0.516693115234375, 0.5726165771484375, 0.6285400390625, 0.6844635009765625, 0.740386962890625, 0.7963104248046875, 0.85223388671875, 0.9081573486328125, 0.964080810546875, 1.0200042724609375, 1.075927734375, 1.1318511962890625, 1.187774658203125, 1.2436981201171875, 1.29962158203125, 1.3555450439453125, 1.411468505859375, 1.4673919677734375, 1.5233154296875, 1.5792388916015625, 1.635162353515625, 1.6910858154296875, 1.74700927734375, 1.8029327392578125, 1.858856201171875, 1.9147796630859375, 1.970703125]}, "gradients/encoder.encoder.layers.4.attention.k_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 2.0, 0.0, 0.0, 5.0, 2.0, 0.0, 2.0, 0.0, 3.0, 1.0, 1.0, 3.0, 11.0, 17.0, 9.0, 19.0, 13.0, 19.0, 32.0, 59.0, 81.0, 146.0, 271.0, 710.0, 1947.0, 10253.0, 175761.0, 824561.0, 28864.0, 3811.0, 1056.0, 412.0, 210.0, 96.0, 53.0, 38.0, 23.0, 21.0, 11.0, 7.0, 9.0, 6.0, 8.0, 8.0, 4.0, 3.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.64990234375, -0.6276626586914062, -0.6054229736328125, -0.5831832885742188, -0.560943603515625, -0.5387039184570312, -0.5164642333984375, -0.49422454833984375, -0.47198486328125, -0.44974517822265625, -0.4275054931640625, -0.40526580810546875, -0.383026123046875, -0.36078643798828125, -0.3385467529296875, -0.31630706787109375, -0.2940673828125, -0.27182769775390625, -0.2495880126953125, -0.22734832763671875, -0.205108642578125, -0.18286895751953125, -0.1606292724609375, -0.13838958740234375, -0.11614990234375, -0.09391021728515625, -0.0716705322265625, -0.04943084716796875, -0.027191162109375, -0.00495147705078125, 0.0172882080078125, 0.03952789306640625, 0.061767578125, 0.08400726318359375, 0.1062469482421875, 0.12848663330078125, 0.150726318359375, 0.17296600341796875, 0.1952056884765625, 0.21744537353515625, 0.23968505859375, 0.26192474365234375, 0.2841644287109375, 0.30640411376953125, 0.328643798828125, 0.35088348388671875, 0.3731231689453125, 0.39536285400390625, 0.4176025390625, 0.43984222412109375, 0.4620819091796875, 0.48432159423828125, 0.506561279296875, 0.5288009643554688, 0.5510406494140625, 0.5732803344726562, 0.59552001953125, 0.6177597045898438, 0.6399993896484375, 0.6622390747070312, 0.684478759765625, 0.7067184448242188, 0.7289581298828125, 0.7511978149414062, 0.7734375]}, "gradients/encoder.encoder.layers.4.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 3.0, 2.0, 3.0, 3.0, 1.0, 5.0, 0.0, 3.0, 7.0, 9.0, 8.0, 7.0, 12.0, 10.0, 24.0, 21.0, 24.0, 31.0, 45.0, 69.0, 82.0, 117.0, 103.0, 83.0, 91.0, 58.0, 35.0, 32.0, 25.0, 22.0, 9.0, 11.0, 10.0, 17.0, 7.0, 6.0, 3.0, 5.0, 1.0, 2.0, 3.0, 3.0, 1.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0], "bins": [-7.730722427368164e-05, -7.474608719348907e-05, -7.218495011329651e-05, -6.962381303310394e-05, -6.706267595291138e-05, -6.450153887271881e-05, -6.194040179252625e-05, -5.937926471233368e-05, -5.681812763214111e-05, -5.425699055194855e-05, -5.169585347175598e-05, -4.9134716391563416e-05, -4.657357931137085e-05, -4.4012442231178284e-05, -4.145130515098572e-05, -3.889016807079315e-05, -3.6329030990600586e-05, -3.376789391040802e-05, -3.1206756830215454e-05, -2.8645619750022888e-05, -2.6084482669830322e-05, -2.3523345589637756e-05, -2.096220850944519e-05, -1.8401071429252625e-05, -1.583993434906006e-05, -1.3278797268867493e-05, -1.0717660188674927e-05, -8.15652310848236e-06, -5.595386028289795e-06, -3.034248948097229e-06, -4.731118679046631e-07, 2.088025212287903e-06, 4.649162292480469e-06, 7.210299372673035e-06, 9.7714364528656e-06, 1.2332573533058167e-05, 1.4893710613250732e-05, 1.74548476934433e-05, 2.0015984773635864e-05, 2.257712185382843e-05, 2.5138258934020996e-05, 2.7699396014213562e-05, 3.0260533094406128e-05, 3.2821670174598694e-05, 3.538280725479126e-05, 3.7943944334983826e-05, 4.050508141517639e-05, 4.306621849536896e-05, 4.5627355575561523e-05, 4.818849265575409e-05, 5.0749629735946655e-05, 5.331076681613922e-05, 5.587190389633179e-05, 5.843304097652435e-05, 6.099417805671692e-05, 6.355531513690948e-05, 6.611645221710205e-05, 6.867758929729462e-05, 7.123872637748718e-05, 7.379986345767975e-05, 7.636100053787231e-05, 7.892213761806488e-05, 8.148327469825745e-05, 8.404441177845001e-05, 8.660554885864258e-05]}, "gradients/encoder.encoder.layers.4.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 3.0, 3.0, 6.0, 1.0, 6.0, 8.0, 21.0, 23.0, 35.0, 53.0, 92.0, 175.0, 388.0, 922.0, 2847.0, 19398.0, 784106.0, 227706.0, 9544.0, 1951.0, 641.0, 284.0, 126.0, 87.0, 56.0, 35.0, 16.0, 9.0, 8.0, 9.0, 4.0, 1.0, 3.0, 4.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.49609375, -0.4743804931640625, -0.452667236328125, -0.4309539794921875, -0.40924072265625, -0.3875274658203125, -0.365814208984375, -0.3441009521484375, -0.3223876953125, -0.3006744384765625, -0.278961181640625, -0.2572479248046875, -0.23553466796875, -0.2138214111328125, -0.192108154296875, -0.1703948974609375, -0.148681640625, -0.1269683837890625, -0.105255126953125, -0.0835418701171875, -0.06182861328125, -0.0401153564453125, -0.018402099609375, 0.0033111572265625, 0.0250244140625, 0.0467376708984375, 0.068450927734375, 0.0901641845703125, 0.11187744140625, 0.1335906982421875, 0.155303955078125, 0.1770172119140625, 0.19873046875, 0.2204437255859375, 0.242156982421875, 0.2638702392578125, 0.28558349609375, 0.3072967529296875, 0.329010009765625, 0.3507232666015625, 0.3724365234375, 0.3941497802734375, 0.415863037109375, 0.4375762939453125, 0.45928955078125, 0.4810028076171875, 0.502716064453125, 0.5244293212890625, 0.546142578125, 0.5678558349609375, 0.589569091796875, 0.6112823486328125, 0.63299560546875, 0.6547088623046875, 0.676422119140625, 0.6981353759765625, 0.7198486328125, 0.7415618896484375, 0.763275146484375, 0.7849884033203125, 0.80670166015625, 0.8284149169921875, 0.850128173828125, 0.8718414306640625, 0.8935546875]}, "gradients/encoder.encoder.layers.4.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 3.0, 0.0, 5.0, 5.0, 5.0, 3.0, 7.0, 16.0, 18.0, 28.0, 46.0, 58.0, 74.0, 96.0, 125.0, 112.0, 109.0, 90.0, 59.0, 56.0, 28.0, 18.0, 14.0, 7.0, 6.0, 8.0, 5.0, 3.0, 4.0, 2.0, 2.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.544921875, -0.5286026000976562, -0.5122833251953125, -0.49596405029296875, -0.479644775390625, -0.46332550048828125, -0.4470062255859375, -0.43068695068359375, -0.41436767578125, -0.39804840087890625, -0.3817291259765625, -0.36540985107421875, -0.349090576171875, -0.33277130126953125, -0.3164520263671875, -0.30013275146484375, -0.2838134765625, -0.26749420166015625, -0.2511749267578125, -0.23485565185546875, -0.218536376953125, -0.20221710205078125, -0.1858978271484375, -0.16957855224609375, -0.15325927734375, -0.13694000244140625, -0.1206207275390625, -0.10430145263671875, -0.087982177734375, -0.07166290283203125, -0.0553436279296875, -0.03902435302734375, -0.022705078125, -0.00638580322265625, 0.0099334716796875, 0.02625274658203125, 0.042572021484375, 0.05889129638671875, 0.0752105712890625, 0.09152984619140625, 0.10784912109375, 0.12416839599609375, 0.1404876708984375, 0.15680694580078125, 0.173126220703125, 0.18944549560546875, 0.2057647705078125, 0.22208404541015625, 0.2384033203125, 0.25472259521484375, 0.2710418701171875, 0.28736114501953125, 0.303680419921875, 0.31999969482421875, 0.3363189697265625, 0.35263824462890625, 0.36895751953125, 0.38527679443359375, 0.4015960693359375, 0.41791534423828125, 0.434234619140625, 0.45055389404296875, 0.4668731689453125, 0.48319244384765625, 0.49951171875]}, "gradients/encoder.encoder.layers.4.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 2.0, 1.0, 1.0, 1.0, 4.0, 2.0, 3.0, 2.0, 5.0, 4.0, 8.0, 11.0, 33.0, 45.0, 101.0, 165.0, 269.0, 186.0, 84.0, 45.0, 18.0, 10.0, 6.0, 2.0, 0.0, 4.0, 2.0, 1.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-13.406059265136719, -13.091744422912598, -12.777429580688477, -12.463113784790039, -12.148798942565918, -11.834484100341797, -11.520169258117676, -11.205854415893555, -10.891538619995117, -10.577223777770996, -10.262908935546875, -9.948593139648438, -9.634278297424316, -9.319963455200195, -9.005648612976074, -8.691333770751953, -8.377018928527832, -8.062704086303711, -7.748388767242432, -7.4340739250183105, -7.119758605957031, -6.80544376373291, -6.491128921508789, -6.176814079284668, -5.862498760223389, -5.548183917999268, -5.233868598937988, -4.919553756713867, -4.605238914489746, -4.290923595428467, -3.9766087532043457, -3.6622936725616455, -3.3479795455932617, -3.0336644649505615, -2.7193493843078613, -2.4050345420837402, -2.09071946144104, -1.7764043807983398, -1.4620894193649292, -1.1477744579315186, -0.8334593772888184, -0.5191443562507629, -0.20482933521270752, 0.1094856858253479, 0.4238007068634033, 0.7381157875061035, 1.0524307489395142, 1.3667457103729248, 1.681060791015625, 1.9953758716583252, 2.3096909523010254, 2.6240057945251465, 2.9383208751678467, 3.252635955810547, 3.566950798034668, 3.881265878677368, 4.195580959320068, 4.5098958015441895, 4.824211120605469, 5.13852596282959, 5.452840805053711, 5.76715612411499, 6.081470966339111, 6.395786285400391, 6.710101127624512]}, "gradients/encoder.encoder.layers.4.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 3.0, 5.0, 3.0, 6.0, 3.0, 5.0, 5.0, 7.0, 7.0, 12.0, 15.0, 19.0, 20.0, 18.0, 20.0, 27.0, 21.0, 33.0, 33.0, 39.0, 42.0, 76.0, 60.0, 69.0, 88.0, 58.0, 46.0, 28.0, 25.0, 24.0, 26.0, 21.0, 35.0, 19.0, 21.0, 12.0, 17.0, 7.0, 5.0, 5.0, 7.0, 4.0, 4.0, 3.0, 6.0, 2.0, 3.0, 1.0, 0.0, 1.0, 1.0, 0.0, 2.0, 1.0], "bins": [-7.3751115798950195, -7.153839588165283, -6.932567596435547, -6.7112956047058105, -6.490023612976074, -6.26875114440918, -6.047479152679443, -5.826207160949707, -5.604935169219971, -5.383663177490234, -5.162391185760498, -4.941119194030762, -4.719846725463867, -4.498575210571289, -4.2773027420043945, -4.056030750274658, -3.834758758544922, -3.6134867668151855, -3.392214775085449, -3.170942544937134, -2.9496705532073975, -2.728398561477661, -2.5071263313293457, -2.2858543395996094, -2.064582347869873, -1.8433103561401367, -1.6220382452011108, -1.400766134262085, -1.1794941425323486, -0.9582221508026123, -0.7369500398635864, -0.5156779289245605, -0.2944064140319824, -0.07313436269760132, 0.14813768863677979, 0.3694097399711609, 0.590681791305542, 0.8119537830352783, 1.0332258939743042, 1.25449800491333, 1.4757699966430664, 1.6970419883728027, 1.9183140993118286, 2.1395862102508545, 2.360858201980591, 2.582130193710327, 2.8034024238586426, 3.024674415588379, 3.2459464073181152, 3.4672183990478516, 3.688490390777588, 3.9097626209259033, 4.131034851074219, 4.352306365966797, 4.573578834533691, 4.794850826263428, 5.016122817993164, 5.2373948097229, 5.458666801452637, 5.679938793182373, 5.901210784912109, 6.122483253479004, 6.34375524520874, 6.565027236938477, 6.786299228668213]}, "gradients/encoder.encoder.layers.3.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 3.0, 1.0, 2.0, 12.0, 11.0, 11.0, 29.0, 36.0, 60.0, 120.0, 305.0, 656.0, 1942.0, 9935.0, 149303.0, 3719025.0, 297144.0, 12021.0, 2269.0, 707.0, 279.0, 169.0, 93.0, 53.0, 29.0, 24.0, 16.0, 9.0, 8.0, 5.0, 4.0, 6.0, 3.0, 0.0, 0.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.9931640625, -0.9617156982421875, -0.930267333984375, -0.8988189697265625, -0.86737060546875, -0.8359222412109375, -0.804473876953125, -0.7730255126953125, -0.7415771484375, -0.7101287841796875, -0.678680419921875, -0.6472320556640625, -0.61578369140625, -0.5843353271484375, -0.552886962890625, -0.5214385986328125, -0.489990234375, -0.4585418701171875, -0.427093505859375, -0.3956451416015625, -0.36419677734375, -0.3327484130859375, -0.301300048828125, -0.2698516845703125, -0.2384033203125, -0.2069549560546875, -0.175506591796875, -0.1440582275390625, -0.11260986328125, -0.0811614990234375, -0.049713134765625, -0.0182647705078125, 0.01318359375, 0.0446319580078125, 0.076080322265625, 0.1075286865234375, 0.13897705078125, 0.1704254150390625, 0.201873779296875, 0.2333221435546875, 0.2647705078125, 0.2962188720703125, 0.327667236328125, 0.3591156005859375, 0.39056396484375, 0.4220123291015625, 0.453460693359375, 0.4849090576171875, 0.516357421875, 0.5478057861328125, 0.579254150390625, 0.6107025146484375, 0.64215087890625, 0.6735992431640625, 0.705047607421875, 0.7364959716796875, 0.7679443359375, 0.7993927001953125, 0.830841064453125, 0.8622894287109375, 0.89373779296875, 0.9251861572265625, 0.956634521484375, 0.9880828857421875, 1.01953125]}, "gradients/encoder.encoder.layers.3.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 2.0, 4.0, 3.0, 5.0, 5.0, 9.0, 18.0, 17.0, 30.0, 34.0, 40.0, 64.0, 61.0, 99.0, 86.0, 95.0, 92.0, 82.0, 64.0, 53.0, 49.0, 33.0, 22.0, 10.0, 10.0, 9.0, 7.0, 4.0, 3.0, 1.0, 2.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.8671875, -0.8426132202148438, -0.8180389404296875, -0.7934646606445312, -0.768890380859375, -0.7443161010742188, -0.7197418212890625, -0.6951675415039062, -0.67059326171875, -0.6460189819335938, -0.6214447021484375, -0.5968704223632812, -0.572296142578125, -0.5477218627929688, -0.5231475830078125, -0.49857330322265625, -0.4739990234375, -0.44942474365234375, -0.4248504638671875, -0.40027618408203125, -0.375701904296875, -0.35112762451171875, -0.3265533447265625, -0.30197906494140625, -0.27740478515625, -0.25283050537109375, -0.2282562255859375, -0.20368194580078125, -0.179107666015625, -0.15453338623046875, -0.1299591064453125, -0.10538482666015625, -0.080810546875, -0.05623626708984375, -0.0316619873046875, -0.00708770751953125, 0.017486572265625, 0.04206085205078125, 0.0666351318359375, 0.09120941162109375, 0.11578369140625, 0.14035797119140625, 0.1649322509765625, 0.18950653076171875, 0.214080810546875, 0.23865509033203125, 0.2632293701171875, 0.28780364990234375, 0.3123779296875, 0.33695220947265625, 0.3615264892578125, 0.38610076904296875, 0.410675048828125, 0.43524932861328125, 0.4598236083984375, 0.48439788818359375, 0.50897216796875, 0.5335464477539062, 0.5581207275390625, 0.5826950073242188, 0.607269287109375, 0.6318435668945312, 0.6564178466796875, 0.6809921264648438, 0.70556640625]}, "gradients/encoder.encoder.layers.3.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 1.0, 0.0, 2.0, 1.0, 1.0, 1.0, 2.0, 1.0, 4.0, 6.0, 3.0, 5.0, 8.0, 10.0, 6.0, 16.0, 25.0, 32.0, 85.0, 119.0, 262.0, 505.0, 1088.0, 2641.0, 9104.0, 63157.0, 3368683.0, 711049.0, 27958.0, 5567.0, 2077.0, 905.0, 421.0, 244.0, 117.0, 66.0, 43.0, 18.0, 12.0, 13.0, 9.0, 7.0, 6.0, 3.0, 2.0, 3.0, 3.0, 1.0, 0.0, 1.0, 2.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.93115234375, -0.8999252319335938, -0.8686981201171875, -0.8374710083007812, -0.806243896484375, -0.7750167846679688, -0.7437896728515625, -0.7125625610351562, -0.68133544921875, -0.6501083374023438, -0.6188812255859375, -0.5876541137695312, -0.556427001953125, -0.5251998901367188, -0.4939727783203125, -0.46274566650390625, -0.4315185546875, -0.40029144287109375, -0.3690643310546875, -0.33783721923828125, -0.306610107421875, -0.27538299560546875, -0.2441558837890625, -0.21292877197265625, -0.18170166015625, -0.15047454833984375, -0.1192474365234375, -0.08802032470703125, -0.056793212890625, -0.02556610107421875, 0.0056610107421875, 0.03688812255859375, 0.068115234375, 0.09934234619140625, 0.1305694580078125, 0.16179656982421875, 0.193023681640625, 0.22425079345703125, 0.2554779052734375, 0.28670501708984375, 0.31793212890625, 0.34915924072265625, 0.3803863525390625, 0.41161346435546875, 0.442840576171875, 0.47406768798828125, 0.5052947998046875, 0.5365219116210938, 0.5677490234375, 0.5989761352539062, 0.6302032470703125, 0.6614303588867188, 0.692657470703125, 0.7238845825195312, 0.7551116943359375, 0.7863388061523438, 0.81756591796875, 0.8487930297851562, 0.8800201416015625, 0.9112472534179688, 0.942474365234375, 0.9737014770507812, 1.0049285888671875, 1.0361557006835938, 1.0673828125]}, "gradients/encoder.encoder.layers.3.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 2.0, 1.0, 1.0, 1.0, 1.0, 3.0, 1.0, 0.0, 3.0, 8.0, 9.0, 12.0, 24.0, 22.0, 39.0, 64.0, 132.0, 321.0, 706.0, 1175.0, 828.0, 360.0, 152.0, 89.0, 40.0, 27.0, 26.0, 13.0, 4.0, 8.0, 5.0, 3.0, 0.0, 4.0, 0.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.0302734375, -0.99658203125, -0.962890625, -0.92919921875, -0.8955078125, -0.86181640625, -0.828125, -0.79443359375, -0.7607421875, -0.72705078125, -0.693359375, -0.65966796875, -0.6259765625, -0.59228515625, -0.55859375, -0.52490234375, -0.4912109375, -0.45751953125, -0.423828125, -0.39013671875, -0.3564453125, -0.32275390625, -0.2890625, -0.25537109375, -0.2216796875, -0.18798828125, -0.154296875, -0.12060546875, -0.0869140625, -0.05322265625, -0.01953125, 0.01416015625, 0.0478515625, 0.08154296875, 0.115234375, 0.14892578125, 0.1826171875, 0.21630859375, 0.25, 0.28369140625, 0.3173828125, 0.35107421875, 0.384765625, 0.41845703125, 0.4521484375, 0.48583984375, 0.51953125, 0.55322265625, 0.5869140625, 0.62060546875, 0.654296875, 0.68798828125, 0.7216796875, 0.75537109375, 0.7890625, 0.82275390625, 0.8564453125, 0.89013671875, 0.923828125, 0.95751953125, 0.9912109375, 1.02490234375, 1.05859375, 1.09228515625, 1.1259765625]}, "gradients/encoder.encoder.layers.3.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 4.0, 4.0, 3.0, 5.0, 10.0, 21.0, 44.0, 119.0, 246.0, 252.0, 152.0, 63.0, 32.0, 16.0, 9.0, 8.0, 8.0, 3.0, 1.0, 1.0, 3.0, 1.0, 3.0, 1.0, 2.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-8.174121856689453, -7.919416427612305, -7.6647114753723145, -7.410006046295166, -7.155301094055176, -6.900595664978027, -6.645890235900879, -6.3911848068237305, -6.13647985458374, -5.881774425506592, -5.627069473266602, -5.372364044189453, -5.117658615112305, -4.8629536628723145, -4.608248233795166, -4.353543281555176, -4.098837852478027, -3.844132661819458, -3.5894274711608887, -3.3347220420837402, -3.080016851425171, -2.8253116607666016, -2.570606231689453, -2.315901041030884, -2.0611958503723145, -1.8064906597137451, -1.5517853498458862, -1.2970800399780273, -1.042374849319458, -0.7876696586608887, -0.5329643487930298, -0.2782590389251709, -0.023553848266601562, 0.23115140199661255, 0.48585665225982666, 0.7405619025230408, 0.9952671527862549, 1.2499723434448242, 1.504677653312683, 1.759382963180542, 2.0140881538391113, 2.2687933444976807, 2.52349853515625, 2.7782039642333984, 3.0329091548919678, 3.287614345550537, 3.5423197746276855, 3.797024965286255, 4.051730155944824, 4.306435585021973, 4.561140537261963, 4.815845966339111, 5.070550918579102, 5.32525634765625, 5.579961776733398, 5.834667205810547, 6.089372158050537, 6.3440775871276855, 6.598782539367676, 6.853487968444824, 7.108193397521973, 7.362898349761963, 7.617603778839111, 7.872308731079102, 8.12701416015625]}, "gradients/encoder.encoder.layers.3.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 1.0, 1.0, 2.0, 3.0, 11.0, 14.0, 10.0, 17.0, 24.0, 30.0, 35.0, 45.0, 44.0, 69.0, 56.0, 73.0, 105.0, 77.0, 66.0, 66.0, 58.0, 53.0, 39.0, 32.0, 29.0, 10.0, 16.0, 12.0, 5.0, 5.0, 1.0, 1.0, 1.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.137676239013672, -5.969976425170898, -5.802276611328125, -5.634576797485352, -5.466876983642578, -5.299177169799805, -5.131477355957031, -4.963777542114258, -4.796077728271484, -4.628377914428711, -4.4606781005859375, -4.292978286743164, -4.125278472900391, -3.957578659057617, -3.7898788452148438, -3.6221790313720703, -3.454479455947876, -3.2867796421051025, -3.119079828262329, -2.9513800144195557, -2.7836802005767822, -2.615980386734009, -2.4482808113098145, -2.280580997467041, -2.1128811836242676, -1.9451813697814941, -1.7774815559387207, -1.6097817420959473, -1.4420819282531738, -1.2743821144104004, -1.1066824197769165, -0.9389826059341431, -0.7712826728820801, -0.6035828590393066, -0.4358830749988556, -0.26818329095840454, -0.1004834771156311, 0.06721633672714233, 0.234916090965271, 0.40261590480804443, 0.5703157186508179, 0.7380155324935913, 0.9057153463363647, 1.0734150409698486, 1.241114854812622, 1.4088146686553955, 1.576514482498169, 1.7442142963409424, 1.9119141101837158, 2.0796139240264893, 2.2473137378692627, 2.415013551712036, 2.5827133655548096, 2.750413179397583, 2.9181127548217773, 3.085812568664551, 3.253512382507324, 3.4212121963500977, 3.588912010192871, 3.7566118240356445, 3.924311637878418, 4.092011451721191, 4.259711265563965, 4.427411079406738, 4.595110893249512]}, "gradients/encoder.encoder.layers.3.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 3.0, 2.0, 3.0, 2.0, 2.0, 6.0, 3.0, 6.0, 8.0, 18.0, 27.0, 37.0, 85.0, 115.0, 265.0, 515.0, 1258.0, 3878.0, 15113.0, 86926.0, 505120.0, 366210.0, 53885.0, 10267.0, 2832.0, 1038.0, 459.0, 206.0, 111.0, 57.0, 33.0, 19.0, 11.0, 13.0, 11.0, 3.0, 9.0, 0.0, 3.0, 0.0, 1.0, 1.0, 1.0, 3.0, 0.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.9921875, -0.960052490234375, -0.92791748046875, -0.895782470703125, -0.8636474609375, -0.831512451171875, -0.79937744140625, -0.767242431640625, -0.735107421875, -0.702972412109375, -0.67083740234375, -0.638702392578125, -0.6065673828125, -0.574432373046875, -0.54229736328125, -0.510162353515625, -0.47802734375, -0.445892333984375, -0.41375732421875, -0.381622314453125, -0.3494873046875, -0.317352294921875, -0.28521728515625, -0.253082275390625, -0.220947265625, -0.188812255859375, -0.15667724609375, -0.124542236328125, -0.0924072265625, -0.060272216796875, -0.02813720703125, 0.003997802734375, 0.0361328125, 0.068267822265625, 0.10040283203125, 0.132537841796875, 0.1646728515625, 0.196807861328125, 0.22894287109375, 0.261077880859375, 0.293212890625, 0.325347900390625, 0.35748291015625, 0.389617919921875, 0.4217529296875, 0.453887939453125, 0.48602294921875, 0.518157958984375, 0.55029296875, 0.582427978515625, 0.61456298828125, 0.646697998046875, 0.6788330078125, 0.710968017578125, 0.74310302734375, 0.775238037109375, 0.807373046875, 0.839508056640625, 0.87164306640625, 0.903778076171875, 0.9359130859375, 0.968048095703125, 1.00018310546875, 1.032318115234375, 1.064453125]}, "gradients/encoder.encoder.layers.3.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 0.0, 1.0, 0.0, 2.0, 4.0, 4.0, 7.0, 4.0, 5.0, 11.0, 25.0, 24.0, 33.0, 49.0, 58.0, 84.0, 106.0, 77.0, 77.0, 88.0, 85.0, 66.0, 56.0, 36.0, 34.0, 24.0, 13.0, 14.0, 10.0, 5.0, 7.0, 1.0, 2.0, 2.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.85986328125, -0.8354644775390625, -0.811065673828125, -0.7866668701171875, -0.76226806640625, -0.7378692626953125, -0.713470458984375, -0.6890716552734375, -0.6646728515625, -0.6402740478515625, -0.615875244140625, -0.5914764404296875, -0.56707763671875, -0.5426788330078125, -0.518280029296875, -0.4938812255859375, -0.469482421875, -0.4450836181640625, -0.420684814453125, -0.3962860107421875, -0.37188720703125, -0.3474884033203125, -0.323089599609375, -0.2986907958984375, -0.2742919921875, -0.2498931884765625, -0.225494384765625, -0.2010955810546875, -0.17669677734375, -0.1522979736328125, -0.127899169921875, -0.1035003662109375, -0.0791015625, -0.0547027587890625, -0.030303955078125, -0.0059051513671875, 0.01849365234375, 0.0428924560546875, 0.067291259765625, 0.0916900634765625, 0.1160888671875, 0.1404876708984375, 0.164886474609375, 0.1892852783203125, 0.21368408203125, 0.2380828857421875, 0.262481689453125, 0.2868804931640625, 0.311279296875, 0.3356781005859375, 0.360076904296875, 0.3844757080078125, 0.40887451171875, 0.4332733154296875, 0.457672119140625, 0.4820709228515625, 0.5064697265625, 0.5308685302734375, 0.555267333984375, 0.5796661376953125, 0.60406494140625, 0.6284637451171875, 0.652862548828125, 0.6772613525390625, 0.70166015625]}, "gradients/encoder.encoder.layers.3.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 3.0, 4.0, 9.0, 2.0, 5.0, 15.0, 18.0, 25.0, 22.0, 32.0, 43.0, 73.0, 143.0, 239.0, 594.0, 2396.0, 17745.0, 288439.0, 696987.0, 36315.0, 3806.0, 906.0, 308.0, 150.0, 74.0, 54.0, 37.0, 35.0, 22.0, 19.0, 15.0, 8.0, 5.0, 7.0, 5.0, 5.0, 2.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.2724609375, -1.2218475341796875, -1.171234130859375, -1.1206207275390625, -1.07000732421875, -1.0193939208984375, -0.968780517578125, -0.9181671142578125, -0.8675537109375, -0.8169403076171875, -0.766326904296875, -0.7157135009765625, -0.66510009765625, -0.6144866943359375, -0.563873291015625, -0.5132598876953125, -0.462646484375, -0.4120330810546875, -0.361419677734375, -0.3108062744140625, -0.26019287109375, -0.2095794677734375, -0.158966064453125, -0.1083526611328125, -0.0577392578125, -0.0071258544921875, 0.043487548828125, 0.0941009521484375, 0.14471435546875, 0.1953277587890625, 0.245941162109375, 0.2965545654296875, 0.34716796875, 0.3977813720703125, 0.448394775390625, 0.4990081787109375, 0.54962158203125, 0.6002349853515625, 0.650848388671875, 0.7014617919921875, 0.7520751953125, 0.8026885986328125, 0.853302001953125, 0.9039154052734375, 0.95452880859375, 1.0051422119140625, 1.055755615234375, 1.1063690185546875, 1.156982421875, 1.2075958251953125, 1.258209228515625, 1.3088226318359375, 1.35943603515625, 1.4100494384765625, 1.460662841796875, 1.5112762451171875, 1.5618896484375, 1.6125030517578125, 1.663116455078125, 1.7137298583984375, 1.76434326171875, 1.8149566650390625, 1.865570068359375, 1.9161834716796875, 1.966796875]}, "gradients/encoder.encoder.layers.3.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 3.0, 2.0, 4.0, 3.0, 4.0, 4.0, 5.0, 10.0, 9.0, 16.0, 15.0, 19.0, 18.0, 38.0, 32.0, 41.0, 37.0, 44.0, 55.0, 48.0, 55.0, 59.0, 49.0, 48.0, 53.0, 58.0, 34.0, 34.0, 41.0, 33.0, 33.0, 24.0, 13.0, 11.0, 16.0, 9.0, 7.0, 9.0, 4.0, 5.0, 3.0, 0.0, 1.0, 2.0, 2.0, 2.0, 3.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-1.64453125, -1.588623046875, -1.53271484375, -1.476806640625, -1.4208984375, -1.364990234375, -1.30908203125, -1.253173828125, -1.197265625, -1.141357421875, -1.08544921875, -1.029541015625, -0.9736328125, -0.917724609375, -0.86181640625, -0.805908203125, -0.75, -0.694091796875, -0.63818359375, -0.582275390625, -0.5263671875, -0.470458984375, -0.41455078125, -0.358642578125, -0.302734375, -0.246826171875, -0.19091796875, -0.135009765625, -0.0791015625, -0.023193359375, 0.03271484375, 0.088623046875, 0.14453125, 0.200439453125, 0.25634765625, 0.312255859375, 0.3681640625, 0.424072265625, 0.47998046875, 0.535888671875, 0.591796875, 0.647705078125, 0.70361328125, 0.759521484375, 0.8154296875, 0.871337890625, 0.92724609375, 0.983154296875, 1.0390625, 1.094970703125, 1.15087890625, 1.206787109375, 1.2626953125, 1.318603515625, 1.37451171875, 1.430419921875, 1.486328125, 1.542236328125, 1.59814453125, 1.654052734375, 1.7099609375, 1.765869140625, 1.82177734375, 1.877685546875, 1.93359375]}, "gradients/encoder.encoder.layers.3.attention.k_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 3.0, 0.0, 4.0, 1.0, 12.0, 13.0, 23.0, 27.0, 57.0, 95.0, 205.0, 458.0, 1018.0, 2797.0, 10517.0, 58005.0, 695990.0, 241905.0, 28067.0, 6076.0, 1884.0, 710.0, 308.0, 169.0, 102.0, 49.0, 27.0, 16.0, 9.0, 5.0, 3.0, 6.0, 3.0, 1.0, 0.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.270751953125, -0.2593803405761719, -0.24800872802734375, -0.23663711547851562, -0.2252655029296875, -0.21389389038085938, -0.20252227783203125, -0.19115066528320312, -0.179779052734375, -0.16840744018554688, -0.15703582763671875, -0.14566421508789062, -0.1342926025390625, -0.12292098999023438, -0.11154937744140625, -0.10017776489257812, -0.08880615234375, -0.07743453979492188, -0.06606292724609375, -0.054691314697265625, -0.0433197021484375, -0.031948089599609375, -0.02057647705078125, -0.009204864501953125, 0.002166748046875, 0.013538360595703125, 0.02490997314453125, 0.036281585693359375, 0.0476531982421875, 0.059024810791015625, 0.07039642333984375, 0.08176803588867188, 0.0931396484375, 0.10451126098632812, 0.11588287353515625, 0.12725448608398438, 0.1386260986328125, 0.14999771118164062, 0.16136932373046875, 0.17274093627929688, 0.184112548828125, 0.19548416137695312, 0.20685577392578125, 0.21822738647460938, 0.2295989990234375, 0.24097061157226562, 0.25234222412109375, 0.2637138366699219, 0.27508544921875, 0.2864570617675781, 0.29782867431640625, 0.3092002868652344, 0.3205718994140625, 0.3319435119628906, 0.34331512451171875, 0.3546867370605469, 0.366058349609375, 0.3774299621582031, 0.38880157470703125, 0.4001731872558594, 0.4115447998046875, 0.4229164123535156, 0.43428802490234375, 0.4456596374511719, 0.45703125]}, "gradients/encoder.encoder.layers.3.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 2.0, 3.0, 5.0, 5.0, 4.0, 7.0, 8.0, 4.0, 6.0, 13.0, 26.0, 30.0, 45.0, 55.0, 76.0, 88.0, 111.0, 115.0, 91.0, 70.0, 53.0, 51.0, 25.0, 23.0, 28.0, 22.0, 9.0, 9.0, 7.0, 5.0, 4.0, 2.0, 3.0, 0.0, 1.0, 1.0, 2.0, 2.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.798173904418945e-05, -4.59868460893631e-05, -4.399195313453674e-05, -4.199706017971039e-05, -4.000216722488403e-05, -3.800727427005768e-05, -3.601238131523132e-05, -3.401748836040497e-05, -3.202259540557861e-05, -3.0027702450752258e-05, -2.8032809495925903e-05, -2.603791654109955e-05, -2.4043023586273193e-05, -2.204813063144684e-05, -2.0053237676620483e-05, -1.805834472179413e-05, -1.6063451766967773e-05, -1.4068558812141418e-05, -1.2073665857315063e-05, -1.0078772902488708e-05, -8.083879947662354e-06, -6.0889869928359985e-06, -4.0940940380096436e-06, -2.0992010831832886e-06, -1.043081283569336e-07, 1.8905848264694214e-06, 3.885477781295776e-06, 5.880370736122131e-06, 7.875263690948486e-06, 9.870156645774841e-06, 1.1865049600601196e-05, 1.3859942555427551e-05, 1.5854835510253906e-05, 1.784972846508026e-05, 1.9844621419906616e-05, 2.183951437473297e-05, 2.3834407329559326e-05, 2.582930028438568e-05, 2.7824193239212036e-05, 2.981908619403839e-05, 3.1813979148864746e-05, 3.38088721036911e-05, 3.5803765058517456e-05, 3.779865801334381e-05, 3.9793550968170166e-05, 4.178844392299652e-05, 4.3783336877822876e-05, 4.577822983264923e-05, 4.7773122787475586e-05, 4.976801574230194e-05, 5.1762908697128296e-05, 5.375780165195465e-05, 5.5752694606781006e-05, 5.774758756160736e-05, 5.9742480516433716e-05, 6.173737347126007e-05, 6.373226642608643e-05, 6.572715938091278e-05, 6.772205233573914e-05, 6.971694529056549e-05, 7.171183824539185e-05, 7.37067312002182e-05, 7.570162415504456e-05, 7.769651710987091e-05, 7.969141006469727e-05]}, "gradients/encoder.encoder.layers.3.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 2.0, 1.0, 4.0, 6.0, 6.0, 13.0, 20.0, 24.0, 39.0, 88.0, 143.0, 306.0, 921.0, 4756.0, 51691.0, 842038.0, 137009.0, 9243.0, 1438.0, 418.0, 166.0, 106.0, 45.0, 39.0, 14.0, 11.0, 11.0, 4.0, 1.0, 4.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.364501953125, -0.3490943908691406, -0.33368682861328125, -0.3182792663574219, -0.3028717041015625, -0.2874641418457031, -0.27205657958984375, -0.2566490173339844, -0.241241455078125, -0.22583389282226562, -0.21042633056640625, -0.19501876831054688, -0.1796112060546875, -0.16420364379882812, -0.14879608154296875, -0.13338851928710938, -0.11798095703125, -0.10257339477539062, -0.08716583251953125, -0.07175827026367188, -0.0563507080078125, -0.040943145751953125, -0.02553558349609375, -0.010128021240234375, 0.005279541015625, 0.020687103271484375, 0.03609466552734375, 0.051502227783203125, 0.0669097900390625, 0.08231735229492188, 0.09772491455078125, 0.11313247680664062, 0.1285400390625, 0.14394760131835938, 0.15935516357421875, 0.17476272583007812, 0.1901702880859375, 0.20557785034179688, 0.22098541259765625, 0.23639297485351562, 0.251800537109375, 0.2672080993652344, 0.28261566162109375, 0.2980232238769531, 0.3134307861328125, 0.3288383483886719, 0.34424591064453125, 0.3596534729003906, 0.37506103515625, 0.3904685974121094, 0.40587615966796875, 0.4212837219238281, 0.4366912841796875, 0.4520988464355469, 0.46750640869140625, 0.4829139709472656, 0.498321533203125, 0.5137290954589844, 0.5291366577148438, 0.5445442199707031, 0.5599517822265625, 0.5753593444824219, 0.5907669067382812, 0.6061744689941406, 0.62158203125]}, "gradients/encoder.encoder.layers.3.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 5.0, 0.0, 8.0, 9.0, 10.0, 20.0, 31.0, 32.0, 54.0, 76.0, 109.0, 127.0, 134.0, 106.0, 109.0, 69.0, 36.0, 27.0, 19.0, 15.0, 8.0, 5.0, 3.0, 3.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.3505859375, -0.3341522216796875, -0.317718505859375, -0.3012847900390625, -0.28485107421875, -0.2684173583984375, -0.251983642578125, -0.2355499267578125, -0.2191162109375, -0.2026824951171875, -0.186248779296875, -0.1698150634765625, -0.15338134765625, -0.1369476318359375, -0.120513916015625, -0.1040802001953125, -0.087646484375, -0.0712127685546875, -0.054779052734375, -0.0383453369140625, -0.02191162109375, -0.0054779052734375, 0.010955810546875, 0.0273895263671875, 0.0438232421875, 0.0602569580078125, 0.076690673828125, 0.0931243896484375, 0.10955810546875, 0.1259918212890625, 0.142425537109375, 0.1588592529296875, 0.17529296875, 0.1917266845703125, 0.208160400390625, 0.2245941162109375, 0.24102783203125, 0.2574615478515625, 0.273895263671875, 0.2903289794921875, 0.3067626953125, 0.3231964111328125, 0.339630126953125, 0.3560638427734375, 0.37249755859375, 0.3889312744140625, 0.405364990234375, 0.4217987060546875, 0.438232421875, 0.4546661376953125, 0.471099853515625, 0.4875335693359375, 0.50396728515625, 0.5204010009765625, 0.536834716796875, 0.5532684326171875, 0.5697021484375, 0.5861358642578125, 0.602569580078125, 0.6190032958984375, 0.63543701171875, 0.6518707275390625, 0.668304443359375, 0.6847381591796875, 0.701171875]}, "gradients/encoder.encoder.layers.3.layer_norm.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 3.0, 0.0, 0.0, 7.0, 3.0, 16.0, 68.0, 171.0, 422.0, 187.0, 66.0, 30.0, 13.0, 8.0, 4.0, 6.0, 1.0, 0.0, 3.0, 1.0, 1.0, 2.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-6.818939208984375, -6.402278423309326, -5.985617637634277, -5.56895637512207, -5.1522955894470215, -4.735634803771973, -4.318974018096924, -3.902312994003296, -3.485652208328247, -3.0689914226531982, -2.6523303985595703, -2.2356696128845215, -1.819008708000183, -1.4023478031158447, -0.9856870174407959, -0.569025993347168, -0.15236520767211914, 0.26429566740989685, 0.6809565424919128, 1.0976173877716064, 1.5142782926559448, 1.9309391975402832, 2.347599983215332, 2.76426100730896, 3.180921792984009, 3.5975825786590576, 4.0142436027526855, 4.430904388427734, 4.847565174102783, 5.264225959777832, 5.680887222290039, 6.097548007965088, 6.514208793640137, 6.9308695793151855, 7.347530364990234, 7.764191627502441, 8.180851936340332, 8.597513198852539, 9.01417350769043, 9.430834770202637, 9.847496032714844, 10.26415729522705, 10.680817604064941, 11.097478866577148, 11.514139175415039, 11.930800437927246, 12.347461700439453, 12.764122009277344, 13.180782318115234, 13.597443580627441, 14.014103889465332, 14.430765151977539, 14.84742546081543, 15.264086723327637, 15.680747985839844, 16.097408294677734, 16.514070510864258, 16.93073081970215, 17.347393035888672, 17.764053344726562, 18.180713653564453, 18.597373962402344, 19.014036178588867, 19.430696487426758, 19.84735679626465]}, "gradients/encoder.encoder.layers.3.layer_norm.bias": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 2.0, 1.0, 2.0, 2.0, 3.0, 2.0, 7.0, 4.0, 8.0, 8.0, 7.0, 9.0, 12.0, 13.0, 26.0, 12.0, 27.0, 26.0, 35.0, 39.0, 44.0, 34.0, 40.0, 55.0, 87.0, 94.0, 54.0, 43.0, 45.0, 35.0, 28.0, 34.0, 22.0, 22.0, 25.0, 19.0, 10.0, 20.0, 7.0, 6.0, 10.0, 5.0, 8.0, 8.0, 2.0, 3.0, 2.0, 1.0, 5.0, 1.0, 2.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.6862077713012695, -5.483908176422119, -5.281608581542969, -5.07930850982666, -4.87700891494751, -4.674709320068359, -4.472409725189209, -4.270110130310059, -4.06781005859375, -3.8655104637145996, -3.66321063041687, -3.4609110355377197, -3.2586112022399902, -3.05631160736084, -2.8540120124816895, -2.651712417602539, -2.4494128227233887, -2.2471132278442383, -2.044813394546509, -1.8425137996673584, -1.6402140855789185, -1.4379143714904785, -1.2356147766113281, -1.0333150625228882, -0.8310153484344482, -0.6287156343460083, -0.42641597986221313, -0.22411632537841797, -0.021816611289978027, 0.18048310279846191, 0.3827826976776123, 0.5850824117660522, 0.7873821258544922, 0.9896818399429321, 1.191981554031372, 1.3942811489105225, 1.5965808629989624, 1.7988805770874023, 2.0011801719665527, 2.203479766845703, 2.4057796001434326, 2.608079195022583, 2.8103790283203125, 3.012678623199463, 3.2149782180786133, 3.4172780513763428, 3.619577646255493, 3.8218774795532227, 4.024177074432373, 4.226476669311523, 4.428776264190674, 4.631075859069824, 4.833375930786133, 5.035675525665283, 5.237975120544434, 5.440274715423584, 5.642574310302734, 5.844873905181885, 6.047173500061035, 6.249473571777344, 6.451773166656494, 6.6540727615356445, 6.856372356414795, 7.058671951293945, 7.260972023010254]}, "gradients/encoder.encoder.layers.2.feed_forward.output_dense.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 3.0, 0.0, 1.0, 4.0, 4.0, 10.0, 13.0, 23.0, 33.0, 53.0, 97.0, 152.0, 282.0, 552.0, 1400.0, 3717.0, 11287.0, 60842.0, 688798.0, 2938586.0, 433199.0, 41506.0, 8479.0, 2783.0, 1168.0, 562.0, 266.0, 162.0, 92.0, 67.0, 42.0, 25.0, 27.0, 15.0, 12.0, 12.0, 8.0, 4.0, 3.0, 1.0, 2.0, 1.0, 2.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.480224609375, -0.4633216857910156, -0.44641876220703125, -0.4295158386230469, -0.4126129150390625, -0.3957099914550781, -0.37880706787109375, -0.3619041442871094, -0.345001220703125, -0.3280982971191406, -0.31119537353515625, -0.2942924499511719, -0.2773895263671875, -0.2604866027832031, -0.24358367919921875, -0.22668075561523438, -0.20977783203125, -0.19287490844726562, -0.17597198486328125, -0.15906906127929688, -0.1421661376953125, -0.12526321411132812, -0.10836029052734375, -0.09145736694335938, -0.074554443359375, -0.057651519775390625, -0.04074859619140625, -0.023845672607421875, -0.0069427490234375, 0.009960174560546875, 0.02686309814453125, 0.043766021728515625, 0.0606689453125, 0.07757186889648438, 0.09447479248046875, 0.11137771606445312, 0.1282806396484375, 0.14518356323242188, 0.16208648681640625, 0.17898941040039062, 0.195892333984375, 0.21279525756835938, 0.22969818115234375, 0.24660110473632812, 0.2635040283203125, 0.2804069519042969, 0.29730987548828125, 0.3142127990722656, 0.33111572265625, 0.3480186462402344, 0.36492156982421875, 0.3818244934082031, 0.3987274169921875, 0.4156303405761719, 0.43253326416015625, 0.4494361877441406, 0.466339111328125, 0.4832420349121094, 0.5001449584960938, 0.5170478820800781, 0.5339508056640625, 0.5508537292480469, 0.5677566528320312, 0.5846595764160156, 0.6015625]}, "gradients/encoder.encoder.layers.2.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 2.0, 3.0, 3.0, 9.0, 10.0, 15.0, 16.0, 37.0, 32.0, 54.0, 59.0, 77.0, 90.0, 85.0, 99.0, 103.0, 78.0, 61.0, 52.0, 41.0, 31.0, 17.0, 11.0, 9.0, 6.0, 7.0, 4.0, 2.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.904296875, -0.87921142578125, -0.8541259765625, -0.82904052734375, -0.803955078125, -0.77886962890625, -0.7537841796875, -0.72869873046875, -0.70361328125, -0.67852783203125, -0.6534423828125, -0.62835693359375, -0.603271484375, -0.57818603515625, -0.5531005859375, -0.52801513671875, -0.5029296875, -0.47784423828125, -0.4527587890625, -0.42767333984375, -0.402587890625, -0.37750244140625, -0.3524169921875, -0.32733154296875, -0.30224609375, -0.27716064453125, -0.2520751953125, -0.22698974609375, -0.201904296875, -0.17681884765625, -0.1517333984375, -0.12664794921875, -0.1015625, -0.07647705078125, -0.0513916015625, -0.02630615234375, -0.001220703125, 0.02386474609375, 0.0489501953125, 0.07403564453125, 0.09912109375, 0.12420654296875, 0.1492919921875, 0.17437744140625, 0.199462890625, 0.22454833984375, 0.2496337890625, 0.27471923828125, 0.2998046875, 0.32489013671875, 0.3499755859375, 0.37506103515625, 0.400146484375, 0.42523193359375, 0.4503173828125, 0.47540283203125, 0.50048828125, 0.52557373046875, 0.5506591796875, 0.57574462890625, 0.600830078125, 0.62591552734375, 0.6510009765625, 0.67608642578125, 0.701171875]}, "gradients/encoder.encoder.layers.2.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 4.0, 5.0, 1.0, 11.0, 8.0, 9.0, 17.0, 26.0, 58.0, 85.0, 188.0, 403.0, 1202.0, 5193.0, 72500.0, 3958470.0, 146614.0, 7077.0, 1484.0, 469.0, 224.0, 97.0, 55.0, 26.0, 18.0, 10.0, 13.0, 4.0, 5.0, 5.0, 3.0, 1.0, 2.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.04296875, -1.005157470703125, -0.96734619140625, -0.929534912109375, -0.8917236328125, -0.853912353515625, -0.81610107421875, -0.778289794921875, -0.740478515625, -0.702667236328125, -0.66485595703125, -0.627044677734375, -0.5892333984375, -0.551422119140625, -0.51361083984375, -0.475799560546875, -0.43798828125, -0.400177001953125, -0.36236572265625, -0.324554443359375, -0.2867431640625, -0.248931884765625, -0.21112060546875, -0.173309326171875, -0.135498046875, -0.097686767578125, -0.05987548828125, -0.022064208984375, 0.0157470703125, 0.053558349609375, 0.09136962890625, 0.129180908203125, 0.1669921875, 0.204803466796875, 0.24261474609375, 0.280426025390625, 0.3182373046875, 0.356048583984375, 0.39385986328125, 0.431671142578125, 0.469482421875, 0.507293701171875, 0.54510498046875, 0.582916259765625, 0.6207275390625, 0.658538818359375, 0.69635009765625, 0.734161376953125, 0.77197265625, 0.809783935546875, 0.84759521484375, 0.885406494140625, 0.9232177734375, 0.961029052734375, 0.99884033203125, 1.036651611328125, 1.074462890625, 1.112274169921875, 1.15008544921875, 1.187896728515625, 1.2257080078125, 1.263519287109375, 1.30133056640625, 1.339141845703125, 1.376953125]}, "gradients/encoder.encoder.layers.2.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 2.0, 1.0, 1.0, 2.0, 3.0, 3.0, 6.0, 10.0, 15.0, 14.0, 29.0, 43.0, 62.0, 150.0, 230.0, 440.0, 853.0, 889.0, 667.0, 301.0, 161.0, 72.0, 49.0, 25.0, 17.0, 18.0, 2.0, 5.0, 3.0, 4.0, 3.0, 0.0, 2.0, 2.0, 2.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.6875, -0.6602020263671875, -0.632904052734375, -0.6056060791015625, -0.57830810546875, -0.5510101318359375, -0.523712158203125, -0.4964141845703125, -0.4691162109375, -0.4418182373046875, -0.414520263671875, -0.3872222900390625, -0.35992431640625, -0.3326263427734375, -0.305328369140625, -0.2780303955078125, -0.250732421875, -0.2234344482421875, -0.196136474609375, -0.1688385009765625, -0.14154052734375, -0.1142425537109375, -0.086944580078125, -0.0596466064453125, -0.0323486328125, -0.0050506591796875, 0.022247314453125, 0.0495452880859375, 0.07684326171875, 0.1041412353515625, 0.131439208984375, 0.1587371826171875, 0.18603515625, 0.2133331298828125, 0.240631103515625, 0.2679290771484375, 0.29522705078125, 0.3225250244140625, 0.349822998046875, 0.3771209716796875, 0.4044189453125, 0.4317169189453125, 0.459014892578125, 0.4863128662109375, 0.51361083984375, 0.5409088134765625, 0.568206787109375, 0.5955047607421875, 0.622802734375, 0.6501007080078125, 0.677398681640625, 0.7046966552734375, 0.73199462890625, 0.7592926025390625, 0.786590576171875, 0.8138885498046875, 0.8411865234375, 0.8684844970703125, 0.895782470703125, 0.9230804443359375, 0.95037841796875, 0.9776763916015625, 1.004974365234375, 1.0322723388671875, 1.0595703125]}, "gradients/encoder.encoder.layers.2.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 4.0, 9.0, 17.0, 81.0, 244.0, 354.0, 203.0, 72.0, 16.0, 3.0, 5.0, 4.0, 1.0, 2.0, 1.0, 2.0], "bins": [-17.57353973388672, -17.244478225708008, -16.915416717529297, -16.586353302001953, -16.257291793823242, -15.928230285644531, -15.59916877746582, -15.27010726928711, -14.941044807434082, -14.611983299255371, -14.282920837402344, -13.953859329223633, -13.624797821044922, -13.295735359191895, -12.966673851013184, -12.637611389160156, -12.308549880981445, -11.979488372802734, -11.650425910949707, -11.321364402770996, -10.992302894592285, -10.663240432739258, -10.334178924560547, -10.005117416381836, -9.676055908203125, -9.346994400024414, -9.017931938171387, -8.688870429992676, -8.359808921813965, -8.030746459960938, -7.701684951782227, -7.372622966766357, -7.043560981750488, -6.714498996734619, -6.385437488555908, -6.056375503540039, -5.72731351852417, -5.398251533508301, -5.06919002532959, -4.740128040313721, -4.41106653213501, -4.082004547119141, -3.7529428005218506, -3.4238810539245605, -3.0948190689086914, -2.7657573223114014, -2.4366955757141113, -2.107633590698242, -1.7785718441009521, -1.4495099782943726, -1.120448112487793, -0.7913863658905029, -0.46232450008392334, -0.13326263427734375, 0.1957991123199463, 0.5248610973358154, 0.8539228439331055, 1.182984709739685, 1.5120465755462646, 1.8411083221435547, 2.170170307159424, 2.499232053756714, 2.828293800354004, 3.157355785369873, 3.486417531967163]}, "gradients/encoder.encoder.layers.2.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 2.0, 0.0, 5.0, 2.0, 1.0, 6.0, 4.0, 9.0, 14.0, 9.0, 24.0, 19.0, 22.0, 22.0, 37.0, 30.0, 42.0, 49.0, 50.0, 56.0, 63.0, 58.0, 50.0, 82.0, 62.0, 49.0, 49.0, 37.0, 28.0, 20.0, 30.0, 22.0, 9.0, 12.0, 13.0, 10.0, 1.0, 9.0, 4.0, 1.0, 2.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.4971179962158203, -3.3783347606658936, -3.2595512866973877, -3.140768051147461, -3.021984577178955, -2.9032013416290283, -2.7844181060791016, -2.6656346321105957, -2.546851396560669, -2.428068161010742, -2.3092846870422363, -2.1905014514923096, -2.071718215942383, -1.952934741973877, -1.8341515064239502, -1.7153681516647339, -1.5965847969055176, -1.4778014421463013, -1.359018087387085, -1.2402348518371582, -1.121451497077942, -1.0026681423187256, -0.883884847164154, -0.7651015520095825, -0.6463181972503662, -0.5275348424911499, -0.40875154733657837, -0.28996822237968445, -0.17118489742279053, -0.05240154266357422, 0.06638175249099731, 0.18516504764556885, 0.30394840240478516, 0.4227317273616791, 0.541515052318573, 0.6602983474731445, 0.7790817022323608, 0.8978650569915771, 1.016648292541504, 1.1354316473007202, 1.2542150020599365, 1.3729983568191528, 1.4917817115783691, 1.610564947128296, 1.7293483018875122, 1.8481316566467285, 1.9669148921966553, 2.085698127746582, 2.204481601715088, 2.3232648372650146, 2.4420483112335205, 2.5608315467834473, 2.679615020751953, 2.79839825630188, 2.9171814918518066, 3.0359649658203125, 3.1547482013702393, 3.273531436920166, 3.392314910888672, 3.5110981464385986, 3.6298813819885254, 3.7486648559570312, 3.867448091506958, 3.9862313270568848, 4.105014801025391]}, "gradients/encoder.encoder.layers.2.attention.out_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 2.0, 4.0, 6.0, 11.0, 13.0, 14.0, 35.0, 51.0, 94.0, 175.0, 310.0, 734.0, 1776.0, 5340.0, 21493.0, 122894.0, 582966.0, 258841.0, 40410.0, 8870.0, 2591.0, 1019.0, 416.0, 203.0, 111.0, 68.0, 54.0, 16.0, 15.0, 12.0, 8.0, 3.0, 5.0, 2.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.75390625, -0.7245635986328125, -0.695220947265625, -0.6658782958984375, -0.63653564453125, -0.6071929931640625, -0.577850341796875, -0.5485076904296875, -0.5191650390625, -0.4898223876953125, -0.460479736328125, -0.4311370849609375, -0.40179443359375, -0.3724517822265625, -0.343109130859375, -0.3137664794921875, -0.284423828125, -0.2550811767578125, -0.225738525390625, -0.1963958740234375, -0.16705322265625, -0.1377105712890625, -0.108367919921875, -0.0790252685546875, -0.0496826171875, -0.0203399658203125, 0.009002685546875, 0.0383453369140625, 0.06768798828125, 0.0970306396484375, 0.126373291015625, 0.1557159423828125, 0.18505859375, 0.2144012451171875, 0.243743896484375, 0.2730865478515625, 0.30242919921875, 0.3317718505859375, 0.361114501953125, 0.3904571533203125, 0.4197998046875, 0.4491424560546875, 0.478485107421875, 0.5078277587890625, 0.53717041015625, 0.5665130615234375, 0.595855712890625, 0.6251983642578125, 0.654541015625, 0.6838836669921875, 0.713226318359375, 0.7425689697265625, 0.77191162109375, 0.8012542724609375, 0.830596923828125, 0.8599395751953125, 0.8892822265625, 0.9186248779296875, 0.947967529296875, 0.9773101806640625, 1.00665283203125, 1.0359954833984375, 1.065338134765625, 1.0946807861328125, 1.1240234375]}, "gradients/encoder.encoder.layers.2.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 2.0, 1.0, 4.0, 8.0, 8.0, 15.0, 18.0, 27.0, 33.0, 33.0, 50.0, 82.0, 82.0, 97.0, 88.0, 83.0, 89.0, 69.0, 61.0, 48.0, 38.0, 27.0, 11.0, 20.0, 3.0, 6.0, 4.0, 6.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.8271484375, -0.8032150268554688, -0.7792816162109375, -0.7553482055664062, -0.731414794921875, -0.7074813842773438, -0.6835479736328125, -0.6596145629882812, -0.63568115234375, -0.6117477416992188, -0.5878143310546875, -0.5638809204101562, -0.539947509765625, -0.5160140991210938, -0.4920806884765625, -0.46814727783203125, -0.4442138671875, -0.42028045654296875, -0.3963470458984375, -0.37241363525390625, -0.348480224609375, -0.32454681396484375, -0.3006134033203125, -0.27667999267578125, -0.25274658203125, -0.22881317138671875, -0.2048797607421875, -0.18094635009765625, -0.157012939453125, -0.13307952880859375, -0.1091461181640625, -0.08521270751953125, -0.061279296875, -0.03734588623046875, -0.0134124755859375, 0.01052093505859375, 0.034454345703125, 0.05838775634765625, 0.0823211669921875, 0.10625457763671875, 0.13018798828125, 0.15412139892578125, 0.1780548095703125, 0.20198822021484375, 0.225921630859375, 0.24985504150390625, 0.2737884521484375, 0.29772186279296875, 0.3216552734375, 0.34558868408203125, 0.3695220947265625, 0.39345550537109375, 0.417388916015625, 0.44132232666015625, 0.4652557373046875, 0.48918914794921875, 0.51312255859375, 0.5370559692382812, 0.5609893798828125, 0.5849227905273438, 0.608856201171875, 0.6327896118164062, 0.6567230224609375, 0.6806564331054688, 0.70458984375]}, "gradients/encoder.encoder.layers.2.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0, 1.0, 2.0, 3.0, 1.0, 4.0, 2.0, 7.0, 5.0, 13.0, 14.0, 10.0, 19.0, 37.0, 42.0, 45.0, 70.0, 110.0, 152.0, 230.0, 374.0, 658.0, 1188.0, 3173.0, 13778.0, 111038.0, 766200.0, 129191.0, 15841.0, 3431.0, 1221.0, 617.0, 364.0, 215.0, 134.0, 97.0, 61.0, 58.0, 37.0, 24.0, 20.0, 27.0, 17.0, 7.0, 10.0, 4.0, 3.0, 3.0, 3.0, 3.0, 1.0, 3.0, 0.0, 0.0, 1.0, 1.0, 1.0], "bins": [-1.12109375, -1.0876617431640625, -1.054229736328125, -1.0207977294921875, -0.98736572265625, -0.9539337158203125, -0.920501708984375, -0.8870697021484375, -0.8536376953125, -0.8202056884765625, -0.786773681640625, -0.7533416748046875, -0.71990966796875, -0.6864776611328125, -0.653045654296875, -0.6196136474609375, -0.586181640625, -0.5527496337890625, -0.519317626953125, -0.4858856201171875, -0.45245361328125, -0.4190216064453125, -0.385589599609375, -0.3521575927734375, -0.3187255859375, -0.2852935791015625, -0.251861572265625, -0.2184295654296875, -0.18499755859375, -0.1515655517578125, -0.118133544921875, -0.0847015380859375, -0.05126953125, -0.0178375244140625, 0.015594482421875, 0.0490264892578125, 0.08245849609375, 0.1158905029296875, 0.149322509765625, 0.1827545166015625, 0.2161865234375, 0.2496185302734375, 0.283050537109375, 0.3164825439453125, 0.34991455078125, 0.3833465576171875, 0.416778564453125, 0.4502105712890625, 0.483642578125, 0.5170745849609375, 0.550506591796875, 0.5839385986328125, 0.61737060546875, 0.6508026123046875, 0.684234619140625, 0.7176666259765625, 0.7510986328125, 0.7845306396484375, 0.817962646484375, 0.8513946533203125, 0.88482666015625, 0.9182586669921875, 0.951690673828125, 0.9851226806640625, 1.0185546875]}, "gradients/encoder.encoder.layers.2.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 5.0, 2.0, 5.0, 10.0, 10.0, 10.0, 22.0, 24.0, 39.0, 41.0, 43.0, 48.0, 66.0, 63.0, 60.0, 80.0, 65.0, 65.0, 73.0, 54.0, 45.0, 46.0, 37.0, 19.0, 18.0, 13.0, 10.0, 8.0, 7.0, 7.0, 3.0, 8.0, 1.0, 1.0, 3.0, 0.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.09765625, -2.0360107421875, -1.974365234375, -1.9127197265625, -1.85107421875, -1.7894287109375, -1.727783203125, -1.6661376953125, -1.6044921875, -1.5428466796875, -1.481201171875, -1.4195556640625, -1.35791015625, -1.2962646484375, -1.234619140625, -1.1729736328125, -1.111328125, -1.0496826171875, -0.988037109375, -0.9263916015625, -0.86474609375, -0.8031005859375, -0.741455078125, -0.6798095703125, -0.6181640625, -0.5565185546875, -0.494873046875, -0.4332275390625, -0.37158203125, -0.3099365234375, -0.248291015625, -0.1866455078125, -0.125, -0.0633544921875, -0.001708984375, 0.0599365234375, 0.12158203125, 0.1832275390625, 0.244873046875, 0.3065185546875, 0.3681640625, 0.4298095703125, 0.491455078125, 0.5531005859375, 0.61474609375, 0.6763916015625, 0.738037109375, 0.7996826171875, 0.861328125, 0.9229736328125, 0.984619140625, 1.0462646484375, 1.10791015625, 1.1695556640625, 1.231201171875, 1.2928466796875, 1.3544921875, 1.4161376953125, 1.477783203125, 1.5394287109375, 1.60107421875, 1.6627197265625, 1.724365234375, 1.7860107421875, 1.84765625]}, "gradients/encoder.encoder.layers.2.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 4.0, 2.0, 3.0, 5.0, 4.0, 5.0, 8.0, 14.0, 11.0, 13.0, 20.0, 25.0, 41.0, 51.0, 94.0, 123.0, 183.0, 280.0, 486.0, 757.0, 1359.0, 2415.0, 4838.0, 10592.0, 26445.0, 79751.0, 581927.0, 242476.0, 58481.0, 20539.0, 8687.0, 4023.0, 1961.0, 1189.0, 616.0, 378.0, 263.0, 164.0, 92.0, 73.0, 53.0, 25.0, 22.0, 24.0, 13.0, 7.0, 6.0, 10.0, 5.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 0.0, 1.0], "bins": [-0.2156982421875, -0.20932960510253906, -0.20296096801757812, -0.1965923309326172, -0.19022369384765625, -0.1838550567626953, -0.17748641967773438, -0.17111778259277344, -0.1647491455078125, -0.15838050842285156, -0.15201187133789062, -0.1456432342529297, -0.13927459716796875, -0.1329059600830078, -0.12653732299804688, -0.12016868591308594, -0.113800048828125, -0.10743141174316406, -0.10106277465820312, -0.09469413757324219, -0.08832550048828125, -0.08195686340332031, -0.07558822631835938, -0.06921958923339844, -0.0628509521484375, -0.05648231506347656, -0.050113677978515625, -0.04374504089355469, -0.03737640380859375, -0.031007766723632812, -0.024639129638671875, -0.018270492553710938, -0.01190185546875, -0.0055332183837890625, 0.000835418701171875, 0.0072040557861328125, 0.01357269287109375, 0.019941329956054688, 0.026309967041015625, 0.03267860412597656, 0.0390472412109375, 0.04541587829589844, 0.051784515380859375, 0.05815315246582031, 0.06452178955078125, 0.07089042663574219, 0.07725906372070312, 0.08362770080566406, 0.089996337890625, 0.09636497497558594, 0.10273361206054688, 0.10910224914550781, 0.11547088623046875, 0.12183952331542969, 0.12820816040039062, 0.13457679748535156, 0.1409454345703125, 0.14731407165527344, 0.15368270874023438, 0.1600513458251953, 0.16641998291015625, 0.1727886199951172, 0.17915725708007812, 0.18552589416503906, 0.19189453125]}, "gradients/encoder.encoder.layers.2.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 3.0, 3.0, 1.0, 4.0, 2.0, 5.0, 7.0, 9.0, 9.0, 18.0, 18.0, 15.0, 32.0, 34.0, 60.0, 60.0, 89.0, 105.0, 122.0, 104.0, 74.0, 57.0, 39.0, 36.0, 22.0, 19.0, 14.0, 6.0, 7.0, 9.0, 11.0, 3.0, 5.0, 2.0, 1.0, 4.0, 2.0, 1.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.508827209472656e-05, -6.323400884866714e-05, -6.137974560260773e-05, -5.952548235654831e-05, -5.767121911048889e-05, -5.5816955864429474e-05, -5.3962692618370056e-05, -5.210842937231064e-05, -5.025416612625122e-05, -4.83999028801918e-05, -4.6545639634132385e-05, -4.469137638807297e-05, -4.283711314201355e-05, -4.098284989595413e-05, -3.9128586649894714e-05, -3.72743234038353e-05, -3.542006015777588e-05, -3.356579691171646e-05, -3.1711533665657043e-05, -2.9857270419597626e-05, -2.8003007173538208e-05, -2.614874392747879e-05, -2.4294480681419373e-05, -2.2440217435359955e-05, -2.0585954189300537e-05, -1.873169094324112e-05, -1.68774276971817e-05, -1.5023164451122284e-05, -1.3168901205062866e-05, -1.1314637959003448e-05, -9.46037471294403e-06, -7.606111466884613e-06, -5.751848220825195e-06, -3.897584974765778e-06, -2.04332172870636e-06, -1.8905848264694214e-07, 1.6652047634124756e-06, 3.5194680094718933e-06, 5.373731255531311e-06, 7.227994501590729e-06, 9.082257747650146e-06, 1.0936520993709564e-05, 1.2790784239768982e-05, 1.46450474858284e-05, 1.6499310731887817e-05, 1.8353573977947235e-05, 2.0207837224006653e-05, 2.206210047006607e-05, 2.3916363716125488e-05, 2.5770626962184906e-05, 2.7624890208244324e-05, 2.947915345430374e-05, 3.133341670036316e-05, 3.318767994642258e-05, 3.5041943192481995e-05, 3.689620643854141e-05, 3.875046968460083e-05, 4.060473293066025e-05, 4.2458996176719666e-05, 4.431325942277908e-05, 4.61675226688385e-05, 4.802178591489792e-05, 4.9876049160957336e-05, 5.1730312407016754e-05, 5.358457565307617e-05]}, "gradients/encoder.encoder.layers.2.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 3.0, 2.0, 7.0, 14.0, 17.0, 45.0, 62.0, 147.0, 345.0, 1007.0, 6572.0, 122929.0, 886889.0, 26827.0, 2668.0, 602.0, 205.0, 90.0, 61.0, 24.0, 20.0, 12.0, 6.0, 8.0, 4.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.681640625, -0.6638298034667969, -0.6460189819335938, -0.6282081604003906, -0.6103973388671875, -0.5925865173339844, -0.5747756958007812, -0.5569648742675781, -0.539154052734375, -0.5213432312011719, -0.5035324096679688, -0.4857215881347656, -0.4679107666015625, -0.4500999450683594, -0.43228912353515625, -0.4144783020019531, -0.39666748046875, -0.3788566589355469, -0.36104583740234375, -0.3432350158691406, -0.3254241943359375, -0.3076133728027344, -0.28980255126953125, -0.2719917297363281, -0.254180908203125, -0.23637008666992188, -0.21855926513671875, -0.20074844360351562, -0.1829376220703125, -0.16512680053710938, -0.14731597900390625, -0.12950515747070312, -0.1116943359375, -0.09388351440429688, -0.07607269287109375, -0.058261871337890625, -0.0404510498046875, -0.022640228271484375, -0.00482940673828125, 0.012981414794921875, 0.030792236328125, 0.048603057861328125, 0.06641387939453125, 0.08422470092773438, 0.1020355224609375, 0.11984634399414062, 0.13765716552734375, 0.15546798706054688, 0.17327880859375, 0.19108963012695312, 0.20890045166015625, 0.22671127319335938, 0.2445220947265625, 0.2623329162597656, 0.28014373779296875, 0.2979545593261719, 0.315765380859375, 0.3335762023925781, 0.35138702392578125, 0.3691978454589844, 0.3870086669921875, 0.4048194885253906, 0.42263031005859375, 0.4404411315917969, 0.458251953125]}, "gradients/encoder.encoder.layers.2.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 1.0, 2.0, 1.0, 10.0, 5.0, 8.0, 15.0, 17.0, 18.0, 38.0, 48.0, 62.0, 59.0, 120.0, 133.0, 123.0, 93.0, 64.0, 54.0, 29.0, 33.0, 23.0, 19.0, 4.0, 10.0, 4.0, 6.0, 4.0, 3.0, 4.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.463623046875, -0.4510002136230469, -0.43837738037109375, -0.4257545471191406, -0.4131317138671875, -0.4005088806152344, -0.38788604736328125, -0.3752632141113281, -0.362640380859375, -0.3500175476074219, -0.33739471435546875, -0.3247718811035156, -0.3121490478515625, -0.2995262145996094, -0.28690338134765625, -0.2742805480957031, -0.26165771484375, -0.24903488159179688, -0.23641204833984375, -0.22378921508789062, -0.2111663818359375, -0.19854354858398438, -0.18592071533203125, -0.17329788208007812, -0.160675048828125, -0.14805221557617188, -0.13542938232421875, -0.12280654907226562, -0.1101837158203125, -0.09756088256835938, -0.08493804931640625, -0.07231521606445312, -0.0596923828125, -0.047069549560546875, -0.03444671630859375, -0.021823883056640625, -0.0092010498046875, 0.003421783447265625, 0.01604461669921875, 0.028667449951171875, 0.041290283203125, 0.053913116455078125, 0.06653594970703125, 0.07915878295898438, 0.0917816162109375, 0.10440444946289062, 0.11702728271484375, 0.12965011596679688, 0.14227294921875, 0.15489578247070312, 0.16751861572265625, 0.18014144897460938, 0.1927642822265625, 0.20538711547851562, 0.21800994873046875, 0.23063278198242188, 0.243255615234375, 0.2558784484863281, 0.26850128173828125, 0.2811241149902344, 0.2937469482421875, 0.3063697814941406, 0.31899261474609375, 0.3316154479980469, 0.34423828125]}, "gradients/encoder.encoder.layers.2.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 4.0, 8.0, 34.0, 229.0, 538.0, 144.0, 34.0, 14.0, 5.0, 4.0, 2.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-11.751058578491211, -11.136977195739746, -10.522895812988281, -9.908814430236816, -9.294733047485352, -8.68065071105957, -8.066569328308105, -7.452487945556641, -6.838406562805176, -6.224325180053711, -5.610243797302246, -4.996161937713623, -4.382080554962158, -3.7679991722106934, -3.1539175510406494, -2.5398359298706055, -1.9257545471191406, -1.3116730451583862, -0.6975915431976318, -0.08351004123687744, 0.530571460723877, 1.1446528434753418, 1.7587344646453857, 2.3728160858154297, 2.9868974685668945, 3.6009788513183594, 4.215060234069824, 4.829142093658447, 5.443223476409912, 6.057304859161377, 6.67138671875, 7.285468101501465, 7.899547576904297, 8.513628959655762, 9.127710342407227, 9.741791725158691, 10.355873107910156, 10.969955444335938, 11.584036827087402, 12.198118209838867, 12.812199592590332, 13.426280975341797, 14.040362358093262, 14.654443740844727, 15.268526077270508, 15.882606506347656, 16.496688842773438, 17.11077117919922, 17.724851608276367, 18.33893394470215, 18.953014373779297, 19.567096710205078, 20.181177139282227, 20.795259475708008, 21.409339904785156, 22.023422241210938, 22.63750457763672, 23.2515869140625, 23.86566734313965, 24.47974967956543, 25.093830108642578, 25.70791244506836, 26.321992874145508, 26.93607521057129, 27.550155639648438]}, "gradients/encoder.encoder.layers.2.layer_norm.bias": {"_type": "histogram", "values": [3.0, 0.0, 0.0, 0.0, 3.0, 3.0, 1.0, 2.0, 1.0, 4.0, 7.0, 6.0, 4.0, 7.0, 1.0, 8.0, 16.0, 16.0, 12.0, 23.0, 21.0, 23.0, 16.0, 32.0, 33.0, 33.0, 39.0, 42.0, 73.0, 85.0, 94.0, 71.0, 45.0, 36.0, 38.0, 18.0, 19.0, 29.0, 18.0, 16.0, 18.0, 9.0, 11.0, 13.0, 15.0, 12.0, 4.0, 4.0, 9.0, 6.0, 6.0, 2.0, 5.0, 2.0, 3.0, 3.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-4.552083969116211, -4.4005937576293945, -4.249103546142578, -4.097613334655762, -3.9461231231689453, -3.794632911682129, -3.6431427001953125, -3.491652488708496, -3.3401622772216797, -3.1886720657348633, -3.037181854248047, -2.8856916427612305, -2.734201431274414, -2.5827112197875977, -2.4312210083007812, -2.279730796813965, -2.1282408237457275, -1.9767506122589111, -1.8252604007720947, -1.6737701892852783, -1.522279977798462, -1.3707897663116455, -1.2192996740341187, -1.0678094625473022, -0.9163192510604858, -0.7648290395736694, -0.613338828086853, -0.4618486762046814, -0.310358464717865, -0.15886825323104858, -0.007378101348876953, 0.14411211013793945, 0.29560232162475586, 0.44709253311157227, 0.5985827445983887, 0.7500728964805603, 0.9015631079673767, 1.053053379058838, 1.2045434713363647, 1.3560336828231812, 1.5075238943099976, 1.659014105796814, 1.8105043172836304, 1.9619944095611572, 2.1134846210479736, 2.26497483253479, 2.4164650440216064, 2.567955255508423, 2.7194454669952393, 2.8709356784820557, 3.022425889968872, 3.1739161014556885, 3.325406312942505, 3.4768965244293213, 3.6283864974975586, 3.779876708984375, 3.9313669204711914, 4.082857131958008, 4.234347343444824, 4.385837554931641, 4.537327766418457, 4.688817977905273, 4.84030818939209, 4.991798400878906, 5.143288612365723]}, "gradients/encoder.encoder.layers.1.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 2.0, 2.0, 3.0, 3.0, 3.0, 9.0, 5.0, 17.0, 27.0, 39.0, 43.0, 104.0, 177.0, 317.0, 663.0, 2308.0, 10744.0, 129127.0, 2721995.0, 1273684.0, 46837.0, 5546.0, 1468.0, 501.0, 249.0, 137.0, 96.0, 68.0, 31.0, 20.0, 20.0, 13.0, 8.0, 8.0, 7.0, 3.0, 3.0, 1.0, 2.0, 2.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.8662109375, -0.842041015625, -0.81787109375, -0.793701171875, -0.76953125, -0.745361328125, -0.72119140625, -0.697021484375, -0.6728515625, -0.648681640625, -0.62451171875, -0.600341796875, -0.576171875, -0.552001953125, -0.52783203125, -0.503662109375, -0.4794921875, -0.455322265625, -0.43115234375, -0.406982421875, -0.3828125, -0.358642578125, -0.33447265625, -0.310302734375, -0.2861328125, -0.261962890625, -0.23779296875, -0.213623046875, -0.189453125, -0.165283203125, -0.14111328125, -0.116943359375, -0.0927734375, -0.068603515625, -0.04443359375, -0.020263671875, 0.00390625, 0.028076171875, 0.05224609375, 0.076416015625, 0.1005859375, 0.124755859375, 0.14892578125, 0.173095703125, 0.197265625, 0.221435546875, 0.24560546875, 0.269775390625, 0.2939453125, 0.318115234375, 0.34228515625, 0.366455078125, 0.390625, 0.414794921875, 0.43896484375, 0.463134765625, 0.4873046875, 0.511474609375, 0.53564453125, 0.559814453125, 0.583984375, 0.608154296875, 0.63232421875, 0.656494140625, 0.6806640625]}, "gradients/encoder.encoder.layers.1.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 2.0, 2.0, 11.0, 6.0, 10.0, 16.0, 24.0, 36.0, 40.0, 43.0, 66.0, 70.0, 82.0, 101.0, 93.0, 75.0, 75.0, 65.0, 52.0, 37.0, 34.0, 18.0, 17.0, 10.0, 10.0, 4.0, 5.0, 5.0, 2.0, 1.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.75537109375, -0.7325439453125, -0.709716796875, -0.6868896484375, -0.6640625, -0.6412353515625, -0.618408203125, -0.5955810546875, -0.57275390625, -0.5499267578125, -0.527099609375, -0.5042724609375, -0.4814453125, -0.4586181640625, -0.435791015625, -0.4129638671875, -0.39013671875, -0.3673095703125, -0.344482421875, -0.3216552734375, -0.298828125, -0.2760009765625, -0.253173828125, -0.2303466796875, -0.20751953125, -0.1846923828125, -0.161865234375, -0.1390380859375, -0.1162109375, -0.0933837890625, -0.070556640625, -0.0477294921875, -0.02490234375, -0.0020751953125, 0.020751953125, 0.0435791015625, 0.06640625, 0.0892333984375, 0.112060546875, 0.1348876953125, 0.15771484375, 0.1805419921875, 0.203369140625, 0.2261962890625, 0.2490234375, 0.2718505859375, 0.294677734375, 0.3175048828125, 0.34033203125, 0.3631591796875, 0.385986328125, 0.4088134765625, 0.431640625, 0.4544677734375, 0.477294921875, 0.5001220703125, 0.52294921875, 0.5457763671875, 0.568603515625, 0.5914306640625, 0.6142578125, 0.6370849609375, 0.659912109375, 0.6827392578125, 0.70556640625]}, "gradients/encoder.encoder.layers.1.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 4.0, 1.0, 10.0, 10.0, 40.0, 83.0, 296.0, 1097.0, 17244.0, 4163471.0, 10814.0, 854.0, 270.0, 54.0, 25.0, 11.0, 3.0, 3.0, 2.0, 3.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.8828125, -2.79400634765625, -2.7052001953125, -2.61639404296875, -2.527587890625, -2.43878173828125, -2.3499755859375, -2.26116943359375, -2.17236328125, -2.08355712890625, -1.9947509765625, -1.90594482421875, -1.817138671875, -1.72833251953125, -1.6395263671875, -1.55072021484375, -1.4619140625, -1.37310791015625, -1.2843017578125, -1.19549560546875, -1.106689453125, -1.01788330078125, -0.9290771484375, -0.84027099609375, -0.75146484375, -0.66265869140625, -0.5738525390625, -0.48504638671875, -0.396240234375, -0.30743408203125, -0.2186279296875, -0.12982177734375, -0.041015625, 0.04779052734375, 0.1365966796875, 0.22540283203125, 0.314208984375, 0.40301513671875, 0.4918212890625, 0.58062744140625, 0.66943359375, 0.75823974609375, 0.8470458984375, 0.93585205078125, 1.024658203125, 1.11346435546875, 1.2022705078125, 1.29107666015625, 1.3798828125, 1.46868896484375, 1.5574951171875, 1.64630126953125, 1.735107421875, 1.82391357421875, 1.9127197265625, 2.00152587890625, 2.09033203125, 2.17913818359375, 2.2679443359375, 2.35675048828125, 2.445556640625, 2.53436279296875, 2.6231689453125, 2.71197509765625, 2.80078125]}, "gradients/encoder.encoder.layers.1.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 0.0, 2.0, 2.0, 4.0, 2.0, 10.0, 12.0, 19.0, 27.0, 51.0, 105.0, 201.0, 458.0, 889.0, 1069.0, 641.0, 318.0, 126.0, 52.0, 45.0, 19.0, 15.0, 3.0, 6.0, 2.0, 2.0, 1.0, 5.0, 1.0], "bins": [-1.7548828125, -1.7192764282226562, -1.6836700439453125, -1.6480636596679688, -1.612457275390625, -1.5768508911132812, -1.5412445068359375, -1.5056381225585938, -1.47003173828125, -1.4344253540039062, -1.3988189697265625, -1.3632125854492188, -1.327606201171875, -1.2919998168945312, -1.2563934326171875, -1.2207870483398438, -1.1851806640625, -1.1495742797851562, -1.1139678955078125, -1.0783615112304688, -1.042755126953125, -1.0071487426757812, -0.9715423583984375, -0.9359359741210938, -0.90032958984375, -0.8647232055664062, -0.8291168212890625, -0.7935104370117188, -0.757904052734375, -0.7222976684570312, -0.6866912841796875, -0.6510848999023438, -0.615478515625, -0.5798721313476562, -0.5442657470703125, -0.5086593627929688, -0.473052978515625, -0.43744659423828125, -0.4018402099609375, -0.36623382568359375, -0.33062744140625, -0.29502105712890625, -0.2594146728515625, -0.22380828857421875, -0.188201904296875, -0.15259552001953125, -0.1169891357421875, -0.08138275146484375, -0.0457763671875, -0.01016998291015625, 0.0254364013671875, 0.06104278564453125, 0.096649169921875, 0.13225555419921875, 0.1678619384765625, 0.20346832275390625, 0.23907470703125, 0.27468109130859375, 0.3102874755859375, 0.34589385986328125, 0.381500244140625, 0.41710662841796875, 0.4527130126953125, 0.48831939697265625, 0.52392578125]}, "gradients/encoder.encoder.layers.1.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 2.0, 1.0, 6.0, 16.0, 23.0, 79.0, 253.0, 397.0, 166.0, 41.0, 14.0, 7.0, 4.0, 4.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-16.131092071533203, -15.67730712890625, -15.22352123260498, -14.769736289978027, -14.315950393676758, -13.862165451049805, -13.408380508422852, -12.954595565795898, -12.500809669494629, -12.047024726867676, -11.593238830566406, -11.139453887939453, -10.6856689453125, -10.23188304901123, -9.778098106384277, -9.324312210083008, -8.870527267456055, -8.416742324829102, -7.962956428527832, -7.509171485900879, -7.055386066436768, -6.601600646972656, -6.147815704345703, -5.694030284881592, -5.2402448654174805, -4.786459445953369, -4.332674026489258, -3.8788890838623047, -3.4251036643981934, -2.971318244934082, -2.51753306388855, -2.0637478828430176, -1.6099634170532227, -1.1561781167984009, -0.7023928165435791, -0.24860751628875732, 0.20517778396606445, 0.6589632034301758, 1.112748384475708, 1.5665335655212402, 2.0203189849853516, 2.474104404449463, 2.927889585494995, 3.3816747665405273, 3.8354601860046387, 4.28924560546875, 4.743030548095703, 5.1968159675598145, 5.650601387023926, 6.104386806488037, 6.558172225952148, 7.011957168579102, 7.465742588043213, 7.919528007507324, 8.373312950134277, 8.827098846435547, 9.2808837890625, 9.734668731689453, 10.188454627990723, 10.642239570617676, 11.096025466918945, 11.549810409545898, 12.003595352172852, 12.457380294799805, 12.911166191101074]}, "gradients/encoder.encoder.layers.1.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 4.0, 2.0, 2.0, 2.0, 2.0, 5.0, 7.0, 5.0, 10.0, 12.0, 13.0, 18.0, 29.0, 14.0, 29.0, 17.0, 28.0, 30.0, 45.0, 35.0, 34.0, 45.0, 38.0, 41.0, 41.0, 43.0, 53.0, 38.0, 45.0, 41.0, 38.0, 33.0, 29.0, 33.0, 19.0, 21.0, 27.0, 18.0, 20.0, 11.0, 11.0, 2.0, 8.0, 5.0, 5.0, 2.0, 1.0, 0.0, 2.0, 1.0, 1.0, 1.0, 1.0, 2.0], "bins": [-4.32381010055542, -4.196715354919434, -4.0696210861206055, -3.9425265789031982, -3.815432071685791, -3.688337564468384, -3.5612430572509766, -3.4341483116149902, -3.307054042816162, -3.179959535598755, -3.0528650283813477, -2.9257705211639404, -2.798676013946533, -2.671581506729126, -2.5444869995117188, -2.4173922538757324, -2.290297746658325, -2.163203239440918, -2.0361087322235107, -1.9090142250061035, -1.7819197177886963, -1.654825210571289, -1.5277305841445923, -1.400636076927185, -1.2735415697097778, -1.1464470624923706, -1.0193525552749634, -0.8922579884529114, -0.7651634812355042, -0.6380689740180969, -0.5109744071960449, -0.3838798999786377, -0.25678539276123047, -0.12969087064266205, -0.002596348524093628, 0.12449818849563599, 0.2515926957130432, 0.37868720293045044, 0.5057817697525024, 0.6328762769699097, 0.7599707841873169, 0.8870652914047241, 1.0141597986221313, 1.1412544250488281, 1.2683489322662354, 1.3954434394836426, 1.5225379467010498, 1.649632453918457, 1.7767269611358643, 1.9038214683532715, 2.0309159755706787, 2.158010482788086, 2.285104990005493, 2.4121994972229004, 2.5392942428588867, 2.666388511657715, 2.793483257293701, 2.9205777645111084, 3.0476722717285156, 3.174766778945923, 3.30186128616333, 3.4289557933807373, 3.5560503005981445, 3.683145046234131, 3.810239315032959]}, "gradients/encoder.encoder.layers.1.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 3.0, 5.0, 3.0, 5.0, 11.0, 13.0, 14.0, 19.0, 48.0, 73.0, 140.0, 234.0, 464.0, 1098.0, 2729.0, 7690.0, 26089.0, 122783.0, 544165.0, 271925.0, 50661.0, 13130.0, 4256.0, 1589.0, 658.0, 339.0, 195.0, 89.0, 60.0, 25.0, 15.0, 19.0, 10.0, 7.0, 2.0, 3.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.134765625, -1.106903076171875, -1.07904052734375, -1.051177978515625, -1.0233154296875, -0.995452880859375, -0.96759033203125, -0.939727783203125, -0.911865234375, -0.884002685546875, -0.85614013671875, -0.828277587890625, -0.8004150390625, -0.772552490234375, -0.74468994140625, -0.716827392578125, -0.68896484375, -0.661102294921875, -0.63323974609375, -0.605377197265625, -0.5775146484375, -0.549652099609375, -0.52178955078125, -0.493927001953125, -0.466064453125, -0.438201904296875, -0.41033935546875, -0.382476806640625, -0.3546142578125, -0.326751708984375, -0.29888916015625, -0.271026611328125, -0.2431640625, -0.215301513671875, -0.18743896484375, -0.159576416015625, -0.1317138671875, -0.103851318359375, -0.07598876953125, -0.048126220703125, -0.020263671875, 0.007598876953125, 0.03546142578125, 0.063323974609375, 0.0911865234375, 0.119049072265625, 0.14691162109375, 0.174774169921875, 0.20263671875, 0.230499267578125, 0.25836181640625, 0.286224365234375, 0.3140869140625, 0.341949462890625, 0.36981201171875, 0.397674560546875, 0.425537109375, 0.453399658203125, 0.48126220703125, 0.509124755859375, 0.5369873046875, 0.564849853515625, 0.59271240234375, 0.620574951171875, 0.6484375]}, "gradients/encoder.encoder.layers.1.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 4.0, 5.0, 4.0, 5.0, 7.0, 16.0, 16.0, 22.0, 36.0, 43.0, 62.0, 53.0, 79.0, 68.0, 90.0, 88.0, 75.0, 76.0, 61.0, 48.0, 51.0, 21.0, 29.0, 18.0, 12.0, 9.0, 4.0, 2.0, 5.0, 3.0, 0.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.75048828125, -0.7277145385742188, -0.7049407958984375, -0.6821670532226562, -0.659393310546875, -0.6366195678710938, -0.6138458251953125, -0.5910720825195312, -0.56829833984375, -0.5455245971679688, -0.5227508544921875, -0.49997711181640625, -0.477203369140625, -0.45442962646484375, -0.4316558837890625, -0.40888214111328125, -0.3861083984375, -0.36333465576171875, -0.3405609130859375, -0.31778717041015625, -0.295013427734375, -0.27223968505859375, -0.2494659423828125, -0.22669219970703125, -0.20391845703125, -0.18114471435546875, -0.1583709716796875, -0.13559722900390625, -0.112823486328125, -0.09004974365234375, -0.0672760009765625, -0.04450225830078125, -0.021728515625, 0.00104522705078125, 0.0238189697265625, 0.04659271240234375, 0.069366455078125, 0.09214019775390625, 0.1149139404296875, 0.13768768310546875, 0.16046142578125, 0.18323516845703125, 0.2060089111328125, 0.22878265380859375, 0.251556396484375, 0.27433013916015625, 0.2971038818359375, 0.31987762451171875, 0.3426513671875, 0.36542510986328125, 0.3881988525390625, 0.41097259521484375, 0.433746337890625, 0.45652008056640625, 0.4792938232421875, 0.5020675659179688, 0.52484130859375, 0.5476150512695312, 0.5703887939453125, 0.5931625366210938, 0.615936279296875, 0.6387100219726562, 0.6614837646484375, 0.6842575073242188, 0.70703125]}, "gradients/encoder.encoder.layers.1.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 3.0, 1.0, 3.0, 2.0, 3.0, 7.0, 6.0, 6.0, 9.0, 8.0, 11.0, 19.0, 20.0, 26.0, 40.0, 63.0, 95.0, 114.0, 175.0, 272.0, 445.0, 748.0, 1544.0, 4413.0, 27706.0, 759643.0, 232671.0, 14419.0, 3064.0, 1191.0, 618.0, 381.0, 254.0, 154.0, 132.0, 91.0, 61.0, 41.0, 30.0, 14.0, 14.0, 10.0, 8.0, 3.0, 7.0, 2.0, 7.0, 1.0, 4.0, 4.0, 1.0, 1.0, 1.0, 0.0, 2.0, 2.0], "bins": [-1.4951171875, -1.450958251953125, -1.40679931640625, -1.362640380859375, -1.3184814453125, -1.274322509765625, -1.23016357421875, -1.186004638671875, -1.141845703125, -1.097686767578125, -1.05352783203125, -1.009368896484375, -0.9652099609375, -0.921051025390625, -0.87689208984375, -0.832733154296875, -0.78857421875, -0.744415283203125, -0.70025634765625, -0.656097412109375, -0.6119384765625, -0.567779541015625, -0.52362060546875, -0.479461669921875, -0.435302734375, -0.391143798828125, -0.34698486328125, -0.302825927734375, -0.2586669921875, -0.214508056640625, -0.17034912109375, -0.126190185546875, -0.08203125, -0.037872314453125, 0.00628662109375, 0.050445556640625, 0.0946044921875, 0.138763427734375, 0.18292236328125, 0.227081298828125, 0.271240234375, 0.315399169921875, 0.35955810546875, 0.403717041015625, 0.4478759765625, 0.492034912109375, 0.53619384765625, 0.580352783203125, 0.62451171875, 0.668670654296875, 0.71282958984375, 0.756988525390625, 0.8011474609375, 0.845306396484375, 0.88946533203125, 0.933624267578125, 0.977783203125, 1.021942138671875, 1.06610107421875, 1.110260009765625, 1.1544189453125, 1.198577880859375, 1.24273681640625, 1.286895751953125, 1.3310546875]}, "gradients/encoder.encoder.layers.1.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 1.0, 3.0, 1.0, 1.0, 5.0, 2.0, 4.0, 9.0, 12.0, 15.0, 15.0, 27.0, 37.0, 34.0, 47.0, 53.0, 72.0, 67.0, 63.0, 79.0, 77.0, 63.0, 55.0, 64.0, 38.0, 38.0, 34.0, 24.0, 14.0, 12.0, 16.0, 9.0, 6.0, 2.0, 3.0, 1.0, 1.0, 1.0, 0.0, 3.0, 1.0, 2.0, 0.0, 1.0, 2.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.275390625, -2.207366943359375, -2.13934326171875, -2.071319580078125, -2.0032958984375, -1.935272216796875, -1.86724853515625, -1.799224853515625, -1.731201171875, -1.663177490234375, -1.59515380859375, -1.527130126953125, -1.4591064453125, -1.391082763671875, -1.32305908203125, -1.255035400390625, -1.18701171875, -1.118988037109375, -1.05096435546875, -0.982940673828125, -0.9149169921875, -0.846893310546875, -0.77886962890625, -0.710845947265625, -0.642822265625, -0.574798583984375, -0.50677490234375, -0.438751220703125, -0.3707275390625, -0.302703857421875, -0.23468017578125, -0.166656494140625, -0.0986328125, -0.030609130859375, 0.03741455078125, 0.105438232421875, 0.1734619140625, 0.241485595703125, 0.30950927734375, 0.377532958984375, 0.445556640625, 0.513580322265625, 0.58160400390625, 0.649627685546875, 0.7176513671875, 0.785675048828125, 0.85369873046875, 0.921722412109375, 0.98974609375, 1.057769775390625, 1.12579345703125, 1.193817138671875, 1.2618408203125, 1.329864501953125, 1.39788818359375, 1.465911865234375, 1.533935546875, 1.601959228515625, 1.66998291015625, 1.738006591796875, 1.8060302734375, 1.874053955078125, 1.94207763671875, 2.010101318359375, 2.078125]}, "gradients/encoder.encoder.layers.1.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 1.0, 0.0, 1.0, 3.0, 2.0, 7.0, 3.0, 7.0, 4.0, 4.0, 4.0, 4.0, 17.0, 11.0, 32.0, 29.0, 59.0, 113.0, 158.0, 304.0, 624.0, 1540.0, 4452.0, 20656.0, 234450.0, 744901.0, 31751.0, 5940.0, 1902.0, 721.0, 337.0, 170.0, 126.0, 67.0, 35.0, 27.0, 26.0, 19.0, 9.0, 5.0, 10.0, 9.0, 5.0, 5.0, 1.0, 1.0, 2.0, 6.0, 2.0, 1.0, 2.0, 2.0, 3.0, 2.0], "bins": [-0.373291015625, -0.3626594543457031, -0.35202789306640625, -0.3413963317871094, -0.3307647705078125, -0.3201332092285156, -0.30950164794921875, -0.2988700866699219, -0.288238525390625, -0.2776069641113281, -0.26697540283203125, -0.2563438415527344, -0.2457122802734375, -0.23508071899414062, -0.22444915771484375, -0.21381759643554688, -0.20318603515625, -0.19255447387695312, -0.18192291259765625, -0.17129135131835938, -0.1606597900390625, -0.15002822875976562, -0.13939666748046875, -0.12876510620117188, -0.118133544921875, -0.10750198364257812, -0.09687042236328125, -0.08623886108398438, -0.0756072998046875, -0.06497573852539062, -0.05434417724609375, -0.043712615966796875, -0.0330810546875, -0.022449493408203125, -0.01181793212890625, -0.001186370849609375, 0.0094451904296875, 0.020076751708984375, 0.03070831298828125, 0.041339874267578125, 0.051971435546875, 0.06260299682617188, 0.07323455810546875, 0.08386611938476562, 0.0944976806640625, 0.10512924194335938, 0.11576080322265625, 0.12639236450195312, 0.13702392578125, 0.14765548706054688, 0.15828704833984375, 0.16891860961914062, 0.1795501708984375, 0.19018173217773438, 0.20081329345703125, 0.21144485473632812, 0.222076416015625, 0.23270797729492188, 0.24333953857421875, 0.2539710998535156, 0.2646026611328125, 0.2752342224121094, 0.28586578369140625, 0.2964973449707031, 0.30712890625]}, "gradients/encoder.encoder.layers.1.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 2.0, 1.0, 4.0, 8.0, 2.0, 2.0, 14.0, 12.0, 18.0, 30.0, 48.0, 61.0, 129.0, 185.0, 183.0, 102.0, 69.0, 50.0, 26.0, 17.0, 13.0, 7.0, 10.0, 11.0, 5.0, 0.0, 2.0, 1.0, 1.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-8.249282836914062e-05, -7.973518222570419e-05, -7.697753608226776e-05, -7.421988993883133e-05, -7.14622437953949e-05, -6.870459765195847e-05, -6.594695150852203e-05, -6.31893053650856e-05, -6.043165922164917e-05, -5.767401307821274e-05, -5.4916366934776306e-05, -5.2158720791339874e-05, -4.940107464790344e-05, -4.664342850446701e-05, -4.388578236103058e-05, -4.112813621759415e-05, -3.8370490074157715e-05, -3.561284393072128e-05, -3.285519778728485e-05, -3.009755164384842e-05, -2.7339905500411987e-05, -2.4582259356975555e-05, -2.1824613213539124e-05, -1.906696707010269e-05, -1.630932092666626e-05, -1.3551674783229828e-05, -1.0794028639793396e-05, -8.036382496356964e-06, -5.278736352920532e-06, -2.5210902094841003e-06, 2.3655593395233154e-07, 2.9942020773887634e-06, 5.751848220825195e-06, 8.509494364261627e-06, 1.1267140507698059e-05, 1.4024786651134491e-05, 1.6782432794570923e-05, 1.9540078938007355e-05, 2.2297725081443787e-05, 2.505537122488022e-05, 2.781301736831665e-05, 3.057066351175308e-05, 3.3328309655189514e-05, 3.6085955798625946e-05, 3.884360194206238e-05, 4.160124808549881e-05, 4.435889422893524e-05, 4.7116540372371674e-05, 4.9874186515808105e-05, 5.263183265924454e-05, 5.538947880268097e-05, 5.81471249461174e-05, 6.090477108955383e-05, 6.366241723299026e-05, 6.64200633764267e-05, 6.917770951986313e-05, 7.193535566329956e-05, 7.469300180673599e-05, 7.745064795017242e-05, 8.020829409360886e-05, 8.296594023704529e-05, 8.572358638048172e-05, 8.848123252391815e-05, 9.123887866735458e-05, 9.399652481079102e-05]}, "gradients/encoder.encoder.layers.1.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 4.0, 1.0, 5.0, 5.0, 7.0, 9.0, 12.0, 21.0, 27.0, 41.0, 80.0, 136.0, 294.0, 751.0, 2772.0, 17308.0, 780469.0, 232699.0, 10692.0, 2006.0, 634.0, 264.0, 115.0, 87.0, 36.0, 39.0, 18.0, 10.0, 6.0, 7.0, 4.0, 4.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.32177734375, -0.30831146240234375, -0.2948455810546875, -0.28137969970703125, -0.267913818359375, -0.25444793701171875, -0.2409820556640625, -0.22751617431640625, -0.21405029296875, -0.20058441162109375, -0.1871185302734375, -0.17365264892578125, -0.160186767578125, -0.14672088623046875, -0.1332550048828125, -0.11978912353515625, -0.1063232421875, -0.09285736083984375, -0.0793914794921875, -0.06592559814453125, -0.052459716796875, -0.03899383544921875, -0.0255279541015625, -0.01206207275390625, 0.00140380859375, 0.01486968994140625, 0.0283355712890625, 0.04180145263671875, 0.055267333984375, 0.06873321533203125, 0.0821990966796875, 0.09566497802734375, 0.109130859375, 0.12259674072265625, 0.1360626220703125, 0.14952850341796875, 0.162994384765625, 0.17646026611328125, 0.1899261474609375, 0.20339202880859375, 0.21685791015625, 0.23032379150390625, 0.2437896728515625, 0.25725555419921875, 0.270721435546875, 0.28418731689453125, 0.2976531982421875, 0.31111907958984375, 0.3245849609375, 0.33805084228515625, 0.3515167236328125, 0.36498260498046875, 0.378448486328125, 0.39191436767578125, 0.4053802490234375, 0.41884613037109375, 0.43231201171875, 0.44577789306640625, 0.4592437744140625, 0.47270965576171875, 0.486175537109375, 0.49964141845703125, 0.5131072998046875, 0.5265731811523438, 0.5400390625]}, "gradients/encoder.encoder.layers.1.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 3.0, 1.0, 6.0, 6.0, 12.0, 13.0, 17.0, 16.0, 35.0, 46.0, 33.0, 61.0, 86.0, 107.0, 153.0, 106.0, 92.0, 63.0, 41.0, 29.0, 19.0, 17.0, 11.0, 17.0, 4.0, 6.0, 3.0, 3.0, 3.0, 2.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.295166015625, -0.2839775085449219, -0.27278900146484375, -0.2616004943847656, -0.2504119873046875, -0.23922348022460938, -0.22803497314453125, -0.21684646606445312, -0.205657958984375, -0.19446945190429688, -0.18328094482421875, -0.17209243774414062, -0.1609039306640625, -0.14971542358398438, -0.13852691650390625, -0.12733840942382812, -0.11614990234375, -0.10496139526367188, -0.09377288818359375, -0.08258438110351562, -0.0713958740234375, -0.060207366943359375, -0.04901885986328125, -0.037830352783203125, -0.026641845703125, -0.015453338623046875, -0.00426483154296875, 0.006923675537109375, 0.0181121826171875, 0.029300689697265625, 0.04048919677734375, 0.051677703857421875, 0.0628662109375, 0.07405471801757812, 0.08524322509765625, 0.09643173217773438, 0.1076202392578125, 0.11880874633789062, 0.12999725341796875, 0.14118576049804688, 0.152374267578125, 0.16356277465820312, 0.17475128173828125, 0.18593978881835938, 0.1971282958984375, 0.20831680297851562, 0.21950531005859375, 0.23069381713867188, 0.24188232421875, 0.2530708312988281, 0.26425933837890625, 0.2754478454589844, 0.2866363525390625, 0.2978248596191406, 0.30901336669921875, 0.3202018737792969, 0.331390380859375, 0.3425788879394531, 0.35376739501953125, 0.3649559020996094, 0.3761444091796875, 0.3873329162597656, 0.39852142333984375, 0.4097099304199219, 0.4208984375]}, "gradients/encoder.encoder.layers.1.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 3.0, 2.0, 6.0, 8.0, 20.0, 68.0, 363.0, 449.0, 67.0, 14.0, 5.0, 6.0, 2.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-17.502649307250977, -16.93880271911621, -16.374954223632812, -15.811107635498047, -15.247261047363281, -14.6834135055542, -14.119565963745117, -13.555719375610352, -12.99187183380127, -12.428024291992188, -11.864177703857422, -11.30033016204834, -10.736482620239258, -10.172636032104492, -9.60878849029541, -9.044940948486328, -8.481094360351562, -7.917247295379639, -7.353400230407715, -6.789552688598633, -6.225705623626709, -5.661858558654785, -5.098011016845703, -4.534163951873779, -3.9703168869018555, -3.4064698219299316, -2.8426225185394287, -2.278775215148926, -1.714928150177002, -1.1510810852050781, -0.5872337818145752, -0.023386478424072266, 0.5404605865478516, 1.104307770729065, 1.6681549549102783, 2.2320022583007812, 2.795849323272705, 3.359696388244629, 3.923543691635132, 4.487390995025635, 5.051238059997559, 5.615085124969482, 6.178932189941406, 6.742779731750488, 7.306626796722412, 7.870473861694336, 8.434321403503418, 8.9981689453125, 9.562015533447266, 10.125863075256348, 10.689709663391113, 11.253557205200195, 11.817403793334961, 12.381251335144043, 12.945098876953125, 13.50894546508789, 14.072793006896973, 14.636640548706055, 15.20048713684082, 15.764334678649902, 16.328182220458984, 16.89202880859375, 17.455875396728516, 18.019723892211914, 18.58357048034668]}, "gradients/encoder.encoder.layers.1.layer_norm.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 2.0, 1.0, 3.0, 1.0, 4.0, 4.0, 4.0, 5.0, 5.0, 11.0, 8.0, 21.0, 12.0, 16.0, 21.0, 19.0, 29.0, 30.0, 27.0, 33.0, 34.0, 51.0, 111.0, 182.0, 74.0, 34.0, 36.0, 30.0, 35.0, 17.0, 18.0, 27.0, 21.0, 15.0, 8.0, 20.0, 13.0, 7.0, 5.0, 4.0, 4.0, 5.0, 2.0, 2.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.206874847412109, -6.0015082359313965, -5.796141624450684, -5.5907745361328125, -5.3854079246521, -5.180041313171387, -4.974674701690674, -4.769308090209961, -4.56394100189209, -4.358574390411377, -4.153207778930664, -3.947840929031372, -3.74247407913208, -3.537107467651367, -3.3317408561706543, -3.1263740062713623, -2.9210073947906494, -2.7156407833099365, -2.5102739334106445, -2.3049073219299316, -2.0995404720306396, -1.8941738605499268, -1.6888071298599243, -1.4834403991699219, -1.2780736684799194, -1.072706937789917, -0.8673402070999146, -0.6619735360145569, -0.45660680532455444, -0.2512401342391968, -0.045873403549194336, 0.1594933271408081, 0.36486005783081055, 0.570226788520813, 0.7755935192108154, 0.9809601902961731, 1.1863269805908203, 1.3916935920715332, 1.5970603227615356, 1.802427053451538, 2.00779390335083, 2.213160514831543, 2.418527364730835, 2.623893976211548, 2.82926082611084, 3.0346274375915527, 3.2399940490722656, 3.4453608989715576, 3.6507275104522705, 3.8560941219329834, 4.061460971832275, 4.266827583312988, 4.472194194793701, 4.677560806274414, 4.882927894592285, 5.088294506072998, 5.293661117553711, 5.499027729034424, 5.704394340515137, 5.909761428833008, 6.115128040313721, 6.320494651794434, 6.5258612632751465, 6.731227874755859, 6.9365949630737305]}, "gradients/encoder.encoder.layers.0.feed_forward.output_dense.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 3.0, 1.0, 6.0, 3.0, 12.0, 9.0, 14.0, 12.0, 29.0, 48.0, 77.0, 139.0, 274.0, 466.0, 921.0, 2229.0, 6480.0, 20959.0, 153581.0, 1384559.0, 2206382.0, 364843.0, 38547.0, 8830.0, 3125.0, 1254.0, 630.0, 340.0, 172.0, 116.0, 68.0, 53.0, 28.0, 20.0, 11.0, 12.0, 9.0, 5.0, 6.0, 7.0, 2.0, 4.0, 2.0, 1.0, 2.0, 0.0, 0.0, 2.0, 1.0, 2.0, 0.0, 1.0, 0.0, 2.0, 1.0], "bins": [-0.646484375, -0.62432861328125, -0.6021728515625, -0.58001708984375, -0.557861328125, -0.53570556640625, -0.5135498046875, -0.49139404296875, -0.46923828125, -0.44708251953125, -0.4249267578125, -0.40277099609375, -0.380615234375, -0.35845947265625, -0.3363037109375, -0.31414794921875, -0.2919921875, -0.26983642578125, -0.2476806640625, -0.22552490234375, -0.203369140625, -0.18121337890625, -0.1590576171875, -0.13690185546875, -0.11474609375, -0.09259033203125, -0.0704345703125, -0.04827880859375, -0.026123046875, -0.00396728515625, 0.0181884765625, 0.04034423828125, 0.0625, 0.08465576171875, 0.1068115234375, 0.12896728515625, 0.151123046875, 0.17327880859375, 0.1954345703125, 0.21759033203125, 0.23974609375, 0.26190185546875, 0.2840576171875, 0.30621337890625, 0.328369140625, 0.35052490234375, 0.3726806640625, 0.39483642578125, 0.4169921875, 0.43914794921875, 0.4613037109375, 0.48345947265625, 0.505615234375, 0.52777099609375, 0.5499267578125, 0.57208251953125, 0.59423828125, 0.61639404296875, 0.6385498046875, 0.66070556640625, 0.682861328125, 0.70501708984375, 0.7271728515625, 0.74932861328125, 0.771484375]}, "gradients/encoder.encoder.layers.0.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 3.0, 2.0, 6.0, 5.0, 11.0, 12.0, 18.0, 14.0, 31.0, 37.0, 58.0, 69.0, 84.0, 63.0, 84.0, 91.0, 94.0, 78.0, 57.0, 47.0, 43.0, 31.0, 28.0, 11.0, 16.0, 6.0, 7.0, 2.0, 1.0, 3.0, 0.0, 1.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.748046875, -0.7253799438476562, -0.7027130126953125, -0.6800460815429688, -0.657379150390625, -0.6347122192382812, -0.6120452880859375, -0.5893783569335938, -0.56671142578125, -0.5440444946289062, -0.5213775634765625, -0.49871063232421875, -0.476043701171875, -0.45337677001953125, -0.4307098388671875, -0.40804290771484375, -0.3853759765625, -0.36270904541015625, -0.3400421142578125, -0.31737518310546875, -0.294708251953125, -0.27204132080078125, -0.2493743896484375, -0.22670745849609375, -0.20404052734375, -0.18137359619140625, -0.1587066650390625, -0.13603973388671875, -0.113372802734375, -0.09070587158203125, -0.0680389404296875, -0.04537200927734375, -0.022705078125, -3.814697265625e-05, 0.0226287841796875, 0.04529571533203125, 0.067962646484375, 0.09062957763671875, 0.1132965087890625, 0.13596343994140625, 0.15863037109375, 0.18129730224609375, 0.2039642333984375, 0.22663116455078125, 0.249298095703125, 0.27196502685546875, 0.2946319580078125, 0.31729888916015625, 0.3399658203125, 0.36263275146484375, 0.3852996826171875, 0.40796661376953125, 0.430633544921875, 0.45330047607421875, 0.4759674072265625, 0.49863433837890625, 0.52130126953125, 0.5439682006835938, 0.5666351318359375, 0.5893020629882812, 0.611968994140625, 0.6346359252929688, 0.6573028564453125, 0.6799697875976562, 0.70263671875]}, "gradients/encoder.encoder.layers.0.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 2.0, 0.0, 1.0, 1.0, 4.0, 4.0, 10.0, 10.0, 17.0, 27.0, 60.0, 115.0, 309.0, 923.0, 4326.0, 3665484.0, 518611.0, 3225.0, 715.0, 240.0, 98.0, 47.0, 29.0, 16.0, 10.0, 4.0, 1.0, 2.0, 2.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.39453125, -4.25689697265625, -4.1192626953125, -3.98162841796875, -3.843994140625, -3.70635986328125, -3.5687255859375, -3.43109130859375, -3.29345703125, -3.15582275390625, -3.0181884765625, -2.88055419921875, -2.742919921875, -2.60528564453125, -2.4676513671875, -2.33001708984375, -2.1923828125, -2.05474853515625, -1.9171142578125, -1.77947998046875, -1.641845703125, -1.50421142578125, -1.3665771484375, -1.22894287109375, -1.09130859375, -0.95367431640625, -0.8160400390625, -0.67840576171875, -0.540771484375, -0.40313720703125, -0.2655029296875, -0.12786865234375, 0.009765625, 0.14739990234375, 0.2850341796875, 0.42266845703125, 0.560302734375, 0.69793701171875, 0.8355712890625, 0.97320556640625, 1.11083984375, 1.24847412109375, 1.3861083984375, 1.52374267578125, 1.661376953125, 1.79901123046875, 1.9366455078125, 2.07427978515625, 2.2119140625, 2.34954833984375, 2.4871826171875, 2.62481689453125, 2.762451171875, 2.90008544921875, 3.0377197265625, 3.17535400390625, 3.31298828125, 3.45062255859375, 3.5882568359375, 3.72589111328125, 3.863525390625, 4.00115966796875, 4.1387939453125, 4.27642822265625, 4.4140625]}, "gradients/encoder.encoder.layers.0.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 4.0, 6.0, 5.0, 12.0, 10.0, 31.0, 55.0, 108.0, 234.0, 573.0, 1168.0, 1000.0, 459.0, 212.0, 90.0, 43.0, 20.0, 21.0, 10.0, 9.0, 8.0, 4.0, 2.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.361328125, -3.282989501953125, -3.20465087890625, -3.126312255859375, -3.0479736328125, -2.969635009765625, -2.89129638671875, -2.812957763671875, -2.734619140625, -2.656280517578125, -2.57794189453125, -2.499603271484375, -2.4212646484375, -2.342926025390625, -2.26458740234375, -2.186248779296875, -2.10791015625, -2.029571533203125, -1.95123291015625, -1.872894287109375, -1.7945556640625, -1.716217041015625, -1.63787841796875, -1.559539794921875, -1.481201171875, -1.402862548828125, -1.32452392578125, -1.246185302734375, -1.1678466796875, -1.089508056640625, -1.01116943359375, -0.932830810546875, -0.8544921875, -0.776153564453125, -0.69781494140625, -0.619476318359375, -0.5411376953125, -0.462799072265625, -0.38446044921875, -0.306121826171875, -0.227783203125, -0.149444580078125, -0.07110595703125, 0.007232666015625, 0.0855712890625, 0.163909912109375, 0.24224853515625, 0.320587158203125, 0.39892578125, 0.477264404296875, 0.55560302734375, 0.633941650390625, 0.7122802734375, 0.790618896484375, 0.86895751953125, 0.947296142578125, 1.025634765625, 1.103973388671875, 1.18231201171875, 1.260650634765625, 1.3389892578125, 1.417327880859375, 1.49566650390625, 1.574005126953125, 1.65234375]}, "gradients/encoder.encoder.layers.0.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 1.0, 0.0, 3.0, 1.0, 3.0, 0.0, 4.0, 0.0, 6.0, 10.0, 17.0, 19.0, 38.0, 71.0, 152.0, 225.0, 225.0, 123.0, 49.0, 27.0, 11.0, 4.0, 7.0, 4.0, 5.0, 2.0, 0.0, 0.0, 1.0, 3.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-16.177806854248047, -15.654191970825195, -15.130577087402344, -14.606961250305176, -14.083346366882324, -13.559731483459473, -13.036115646362305, -12.512500762939453, -11.988885879516602, -11.46527099609375, -10.941656112670898, -10.41804027557373, -9.894425392150879, -9.370810508728027, -8.84719467163086, -8.323579788208008, -7.799964904785156, -7.276350021362305, -6.752734661102295, -6.229119300842285, -5.705504417419434, -5.181889533996582, -4.658274173736572, -4.1346588134765625, -3.611043930053711, -3.0874288082122803, -2.5638136863708496, -2.040198564529419, -1.5165834426879883, -0.9929683208465576, -0.46935319900512695, 0.05426192283630371, 0.5778770446777344, 1.101492166519165, 1.6251072883605957, 2.1487224102020264, 2.672337532043457, 3.1959526538848877, 3.7195677757263184, 4.243183135986328, 4.76679801940918, 5.290412902832031, 5.814028263092041, 6.337643623352051, 6.861258506774902, 7.384873390197754, 7.908488750457764, 8.432104110717773, 8.955718994140625, 9.479333877563477, 10.002948760986328, 10.526564598083496, 11.050179481506348, 11.5737943649292, 12.097410202026367, 12.621025085449219, 13.14463996887207, 13.668254852294922, 14.191869735717773, 14.715485572814941, 15.239100456237793, 15.762715339660645, 16.286331176757812, 16.809946060180664, 17.333560943603516]}, "gradients/encoder.encoder.layers.0.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0, 0.0, 1.0, 2.0, 2.0, 9.0, 5.0, 8.0, 20.0, 28.0, 32.0, 35.0, 47.0, 53.0, 52.0, 67.0, 90.0, 86.0, 73.0, 80.0, 69.0, 48.0, 48.0, 31.0, 22.0, 24.0, 14.0, 19.0, 15.0, 4.0, 4.0, 6.0, 4.0, 4.0, 1.0, 3.0, 1.0, 1.0, 4.0, 0.0, 2.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-9.463289260864258, -9.098551750183105, -8.73381519317627, -8.369077682495117, -8.004341125488281, -7.639603614807129, -7.274866580963135, -6.910129547119141, -6.5453925132751465, -6.180655479431152, -5.815918445587158, -5.451181411743164, -5.086443901062012, -4.721707344055176, -4.356969833374023, -3.9922327995300293, -3.627495765686035, -3.262758731842041, -2.898021697998047, -2.5332844257354736, -2.1685473918914795, -1.8038103580474854, -1.439073085784912, -1.074336051940918, -0.7095990180969238, -0.3448619246482849, 0.019875168800354004, 0.3846123218536377, 0.7493493556976318, 1.114086389541626, 1.4788236618041992, 1.8435606956481934, 2.208296775817871, 2.5730338096618652, 2.9377708435058594, 3.3025081157684326, 3.6672451496124268, 4.031982421875, 4.396719455718994, 4.761456489562988, 5.126193523406982, 5.490930557250977, 5.855667591094971, 6.220404624938965, 6.585142135620117, 6.949878692626953, 7.3146162033081055, 7.6793532371521, 8.044090270996094, 8.408827781677246, 8.773564338684082, 9.138301849365234, 9.50303840637207, 9.867775917053223, 10.232513427734375, 10.597249984741211, 10.961986541748047, 11.3267240524292, 11.691460609436035, 12.056198120117188, 12.420934677124023, 12.785672187805176, 13.150409698486328, 13.515146255493164, 13.879883766174316]}, "gradients/encoder.encoder.layers.0.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 2.0, 5.0, 9.0, 3.0, 10.0, 9.0, 23.0, 22.0, 18.0, 49.0, 78.0, 106.0, 217.0, 404.0, 816.0, 1871.0, 5164.0, 18007.0, 95168.0, 727433.0, 161653.0, 26094.0, 6909.0, 2380.0, 977.0, 467.0, 243.0, 133.0, 99.0, 59.0, 45.0, 31.0, 24.0, 20.0, 8.0, 2.0, 2.0, 2.0, 4.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.9111328125, -0.88232421875, -0.853515625, -0.82470703125, -0.7958984375, -0.76708984375, -0.73828125, -0.70947265625, -0.6806640625, -0.65185546875, -0.623046875, -0.59423828125, -0.5654296875, -0.53662109375, -0.5078125, -0.47900390625, -0.4501953125, -0.42138671875, -0.392578125, -0.36376953125, -0.3349609375, -0.30615234375, -0.27734375, -0.24853515625, -0.2197265625, -0.19091796875, -0.162109375, -0.13330078125, -0.1044921875, -0.07568359375, -0.046875, -0.01806640625, 0.0107421875, 0.03955078125, 0.068359375, 0.09716796875, 0.1259765625, 0.15478515625, 0.18359375, 0.21240234375, 0.2412109375, 0.27001953125, 0.298828125, 0.32763671875, 0.3564453125, 0.38525390625, 0.4140625, 0.44287109375, 0.4716796875, 0.50048828125, 0.529296875, 0.55810546875, 0.5869140625, 0.61572265625, 0.64453125, 0.67333984375, 0.7021484375, 0.73095703125, 0.759765625, 0.78857421875, 0.8173828125, 0.84619140625, 0.875, 0.90380859375, 0.9326171875]}, "gradients/encoder.encoder.layers.0.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 5.0, 0.0, 3.0, 3.0, 6.0, 7.0, 13.0, 27.0, 38.0, 47.0, 67.0, 84.0, 99.0, 100.0, 98.0, 97.0, 87.0, 78.0, 46.0, 37.0, 17.0, 17.0, 15.0, 5.0, 6.0, 3.0, 2.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.021484375, -0.9906158447265625, -0.959747314453125, -0.9288787841796875, -0.89801025390625, -0.8671417236328125, -0.836273193359375, -0.8054046630859375, -0.7745361328125, -0.7436676025390625, -0.712799072265625, -0.6819305419921875, -0.65106201171875, -0.6201934814453125, -0.589324951171875, -0.5584564208984375, -0.527587890625, -0.4967193603515625, -0.465850830078125, -0.4349822998046875, -0.40411376953125, -0.3732452392578125, -0.342376708984375, -0.3115081787109375, -0.2806396484375, -0.2497711181640625, -0.218902587890625, -0.1880340576171875, -0.15716552734375, -0.1262969970703125, -0.095428466796875, -0.0645599365234375, -0.03369140625, -0.0028228759765625, 0.028045654296875, 0.0589141845703125, 0.08978271484375, 0.1206512451171875, 0.151519775390625, 0.1823883056640625, 0.2132568359375, 0.2441253662109375, 0.274993896484375, 0.3058624267578125, 0.33673095703125, 0.3675994873046875, 0.398468017578125, 0.4293365478515625, 0.460205078125, 0.4910736083984375, 0.521942138671875, 0.5528106689453125, 0.58367919921875, 0.6145477294921875, 0.645416259765625, 0.6762847900390625, 0.7071533203125, 0.7380218505859375, 0.768890380859375, 0.7997589111328125, 0.83062744140625, 0.8614959716796875, 0.892364501953125, 0.9232330322265625, 0.9541015625]}, "gradients/encoder.encoder.layers.0.attention.v_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0, 3.0, 3.0, 3.0, 5.0, 5.0, 8.0, 10.0, 14.0, 21.0, 33.0, 37.0, 63.0, 113.0, 158.0, 314.0, 549.0, 1394.0, 6492.0, 85829.0, 922179.0, 25746.0, 3343.0, 1031.0, 455.0, 259.0, 157.0, 95.0, 54.0, 58.0, 37.0, 25.0, 13.0, 13.0, 9.0, 7.0, 6.0, 5.0, 9.0, 5.0, 4.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.02734375, -0.98828125, -0.94921875, -0.91015625, -0.87109375, -0.83203125, -0.79296875, -0.75390625, -0.71484375, -0.67578125, -0.63671875, -0.59765625, -0.55859375, -0.51953125, -0.48046875, -0.44140625, -0.40234375, -0.36328125, -0.32421875, -0.28515625, -0.24609375, -0.20703125, -0.16796875, -0.12890625, -0.08984375, -0.05078125, -0.01171875, 0.02734375, 0.06640625, 0.10546875, 0.14453125, 0.18359375, 0.22265625, 0.26171875, 0.30078125, 0.33984375, 0.37890625, 0.41796875, 0.45703125, 0.49609375, 0.53515625, 0.57421875, 0.61328125, 0.65234375, 0.69140625, 0.73046875, 0.76953125, 0.80859375, 0.84765625, 0.88671875, 0.92578125, 0.96484375, 1.00390625, 1.04296875, 1.08203125, 1.12109375, 1.16015625, 1.19921875, 1.23828125, 1.27734375, 1.31640625, 1.35546875, 1.39453125, 1.43359375, 1.47265625]}, "gradients/encoder.encoder.layers.0.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 2.0, 4.0, 2.0, 1.0, 5.0, 8.0, 4.0, 7.0, 7.0, 7.0, 21.0, 18.0, 23.0, 13.0, 32.0, 34.0, 44.0, 51.0, 66.0, 68.0, 64.0, 83.0, 72.0, 55.0, 47.0, 35.0, 41.0, 31.0, 23.0, 31.0, 22.0, 19.0, 13.0, 11.0, 11.0, 6.0, 5.0, 4.0, 5.0, 6.0, 2.0, 3.0, 4.0, 3.0, 3.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-1.8544921875, -1.7902984619140625, -1.726104736328125, -1.6619110107421875, -1.59771728515625, -1.5335235595703125, -1.469329833984375, -1.4051361083984375, -1.3409423828125, -1.2767486572265625, -1.212554931640625, -1.1483612060546875, -1.08416748046875, -1.0199737548828125, -0.955780029296875, -0.8915863037109375, -0.827392578125, -0.7631988525390625, -0.699005126953125, -0.6348114013671875, -0.57061767578125, -0.5064239501953125, -0.442230224609375, -0.3780364990234375, -0.3138427734375, -0.2496490478515625, -0.185455322265625, -0.1212615966796875, -0.05706787109375, 0.0071258544921875, 0.071319580078125, 0.1355133056640625, 0.19970703125, 0.2639007568359375, 0.328094482421875, 0.3922882080078125, 0.45648193359375, 0.5206756591796875, 0.584869384765625, 0.6490631103515625, 0.7132568359375, 0.7774505615234375, 0.841644287109375, 0.9058380126953125, 0.97003173828125, 1.0342254638671875, 1.098419189453125, 1.1626129150390625, 1.226806640625, 1.2910003662109375, 1.355194091796875, 1.4193878173828125, 1.48358154296875, 1.5477752685546875, 1.611968994140625, 1.6761627197265625, 1.7403564453125, 1.8045501708984375, 1.868743896484375, 1.9329376220703125, 1.99713134765625, 2.0613250732421875, 2.125518798828125, 2.1897125244140625, 2.25390625]}, "gradients/encoder.encoder.layers.0.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 5.0, 3.0, 3.0, 2.0, 3.0, 7.0, 4.0, 8.0, 11.0, 15.0, 17.0, 26.0, 48.0, 61.0, 87.0, 150.0, 289.0, 636.0, 1798.0, 7549.0, 87477.0, 922487.0, 22280.0, 3536.0, 1056.0, 412.0, 221.0, 132.0, 71.0, 45.0, 30.0, 16.0, 15.0, 11.0, 10.0, 12.0, 11.0, 6.0, 5.0, 3.0, 0.0, 2.0, 1.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.2451171875, -0.2379627227783203, -0.23080825805664062, -0.22365379333496094, -0.21649932861328125, -0.20934486389160156, -0.20219039916992188, -0.1950359344482422, -0.1878814697265625, -0.1807270050048828, -0.17357254028320312, -0.16641807556152344, -0.15926361083984375, -0.15210914611816406, -0.14495468139648438, -0.1378002166748047, -0.130645751953125, -0.12349128723144531, -0.11633682250976562, -0.10918235778808594, -0.10202789306640625, -0.09487342834472656, -0.08771896362304688, -0.08056449890136719, -0.0734100341796875, -0.06625556945800781, -0.059101104736328125, -0.05194664001464844, -0.04479217529296875, -0.03763771057128906, -0.030483245849609375, -0.023328781127929688, -0.01617431640625, -0.009019851684570312, -0.001865386962890625, 0.0052890777587890625, 0.01244354248046875, 0.019598007202148438, 0.026752471923828125, 0.03390693664550781, 0.0410614013671875, 0.04821586608886719, 0.055370330810546875, 0.06252479553222656, 0.06967926025390625, 0.07683372497558594, 0.08398818969726562, 0.09114265441894531, 0.098297119140625, 0.10545158386230469, 0.11260604858398438, 0.11976051330566406, 0.12691497802734375, 0.13406944274902344, 0.14122390747070312, 0.1483783721923828, 0.1555328369140625, 0.1626873016357422, 0.16984176635742188, 0.17699623107910156, 0.18415069580078125, 0.19130516052246094, 0.19845962524414062, 0.2056140899658203, 0.2127685546875]}, "gradients/encoder.encoder.layers.0.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0, 1.0, 2.0, 4.0, 1.0, 14.0, 5.0, 5.0, 13.0, 16.0, 25.0, 31.0, 37.0, 52.0, 84.0, 102.0, 109.0, 124.0, 102.0, 69.0, 57.0, 44.0, 24.0, 17.0, 14.0, 10.0, 18.0, 5.0, 10.0, 4.0, 2.0, 2.0, 2.0, 0.0, 2.0, 1.0, 2.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.737211227416992e-05, -3.620237112045288e-05, -3.503262996673584e-05, -3.38628888130188e-05, -3.269314765930176e-05, -3.152340650558472e-05, -3.0353665351867676e-05, -2.9183924198150635e-05, -2.8014183044433594e-05, -2.6844441890716553e-05, -2.5674700736999512e-05, -2.450495958328247e-05, -2.333521842956543e-05, -2.216547727584839e-05, -2.0995736122131348e-05, -1.9825994968414307e-05, -1.8656253814697266e-05, -1.7486512660980225e-05, -1.6316771507263184e-05, -1.5147030353546143e-05, -1.3977289199829102e-05, -1.280754804611206e-05, -1.163780689239502e-05, -1.0468065738677979e-05, -9.298324584960938e-06, -8.128583431243896e-06, -6.9588422775268555e-06, -5.7891011238098145e-06, -4.6193599700927734e-06, -3.4496188163757324e-06, -2.2798776626586914e-06, -1.1101365089416504e-06, 5.960464477539063e-08, 1.2293457984924316e-06, 2.3990869522094727e-06, 3.5688281059265137e-06, 4.738569259643555e-06, 5.908310413360596e-06, 7.078051567077637e-06, 8.247792720794678e-06, 9.417533874511719e-06, 1.058727502822876e-05, 1.17570161819458e-05, 1.2926757335662842e-05, 1.4096498489379883e-05, 1.5266239643096924e-05, 1.6435980796813965e-05, 1.7605721950531006e-05, 1.8775463104248047e-05, 1.9945204257965088e-05, 2.111494541168213e-05, 2.228468656539917e-05, 2.345442771911621e-05, 2.4624168872833252e-05, 2.5793910026550293e-05, 2.6963651180267334e-05, 2.8133392333984375e-05, 2.9303133487701416e-05, 3.0472874641418457e-05, 3.16426157951355e-05, 3.281235694885254e-05, 3.398209810256958e-05, 3.515183925628662e-05, 3.632158041000366e-05, 3.74913215637207e-05]}, "gradients/encoder.encoder.layers.0.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 3.0, 1.0, 3.0, 1.0, 1.0, 5.0, 6.0, 9.0, 6.0, 6.0, 11.0, 27.0, 31.0, 43.0, 104.0, 188.0, 341.0, 747.0, 1772.0, 4962.0, 19621.0, 175400.0, 802643.0, 31693.0, 7015.0, 2152.0, 820.0, 401.0, 229.0, 114.0, 75.0, 40.0, 24.0, 17.0, 13.0, 6.0, 5.0, 8.0, 2.0, 3.0, 7.0, 5.0, 4.0, 0.0, 1.0, 0.0, 1.0, 1.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.12384033203125, -0.11943912506103516, -0.11503791809082031, -0.11063671112060547, -0.10623550415039062, -0.10183429718017578, -0.09743309020996094, -0.0930318832397461, -0.08863067626953125, -0.0842294692993164, -0.07982826232910156, -0.07542705535888672, -0.07102584838867188, -0.06662464141845703, -0.06222343444824219, -0.057822227478027344, -0.0534210205078125, -0.049019813537597656, -0.04461860656738281, -0.04021739959716797, -0.035816192626953125, -0.03141498565673828, -0.027013778686523438, -0.022612571716308594, -0.01821136474609375, -0.013810157775878906, -0.009408950805664062, -0.005007743835449219, -0.000606536865234375, 0.0037946701049804688, 0.008195877075195312, 0.012597084045410156, 0.016998291015625, 0.021399497985839844, 0.025800704956054688, 0.03020191192626953, 0.034603118896484375, 0.03900432586669922, 0.04340553283691406, 0.047806739807128906, 0.05220794677734375, 0.056609153747558594, 0.06101036071777344, 0.06541156768798828, 0.06981277465820312, 0.07421398162841797, 0.07861518859863281, 0.08301639556884766, 0.0874176025390625, 0.09181880950927734, 0.09622001647949219, 0.10062122344970703, 0.10502243041992188, 0.10942363739013672, 0.11382484436035156, 0.1182260513305664, 0.12262725830078125, 0.1270284652709961, 0.13142967224121094, 0.13583087921142578, 0.14023208618164062, 0.14463329315185547, 0.1490345001220703, 0.15343570709228516, 0.1578369140625]}, "gradients/encoder.encoder.layers.0.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 3.0, 2.0, 3.0, 3.0, 7.0, 5.0, 8.0, 6.0, 9.0, 8.0, 9.0, 17.0, 29.0, 38.0, 45.0, 81.0, 92.0, 122.0, 135.0, 99.0, 77.0, 62.0, 45.0, 26.0, 16.0, 11.0, 9.0, 8.0, 7.0, 2.0, 4.0, 2.0, 3.0, 2.0, 3.0, 1.0, 4.0, 2.0, 4.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.125, -0.119842529296875, -0.11468505859375, -0.109527587890625, -0.1043701171875, -0.099212646484375, -0.09405517578125, -0.088897705078125, -0.083740234375, -0.078582763671875, -0.07342529296875, -0.068267822265625, -0.0631103515625, -0.057952880859375, -0.05279541015625, -0.047637939453125, -0.04248046875, -0.037322998046875, -0.03216552734375, -0.027008056640625, -0.0218505859375, -0.016693115234375, -0.01153564453125, -0.006378173828125, -0.001220703125, 0.003936767578125, 0.00909423828125, 0.014251708984375, 0.0194091796875, 0.024566650390625, 0.02972412109375, 0.034881591796875, 0.0400390625, 0.045196533203125, 0.05035400390625, 0.055511474609375, 0.0606689453125, 0.065826416015625, 0.07098388671875, 0.076141357421875, 0.081298828125, 0.086456298828125, 0.09161376953125, 0.096771240234375, 0.1019287109375, 0.107086181640625, 0.11224365234375, 0.117401123046875, 0.12255859375, 0.127716064453125, 0.13287353515625, 0.138031005859375, 0.1431884765625, 0.148345947265625, 0.15350341796875, 0.158660888671875, 0.163818359375, 0.168975830078125, 0.17413330078125, 0.179290771484375, 0.1844482421875, 0.189605712890625, 0.19476318359375, 0.199920654296875, 0.205078125]}, "gradients/encoder.encoder.layers.0.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0, 2.0, 4.0, 18.0, 43.0, 121.0, 626.0, 110.0, 49.0, 23.0, 5.0, 2.0, 3.0, 4.0, 2.0, 1.0, 0.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-9.510433197021484, -9.08431625366211, -8.658199310302734, -8.23208236694336, -7.805965423583984, -7.379848480224609, -6.953731060028076, -6.527614116668701, -6.101497173309326, -5.675380229949951, -5.249263286590576, -4.823145866394043, -4.397028923034668, -3.970912218093872, -3.544795036315918, -3.118678092956543, -2.692561149597168, -2.266444206237793, -1.8403271436691284, -1.4142100811004639, -0.9880931377410889, -0.5619761943817139, -0.13585901260375977, 0.29025793075561523, 0.7163748741149902, 1.1424918174743652, 1.5686088800430298, 1.9947259426116943, 2.4208428859710693, 2.8469598293304443, 3.2730770111083984, 3.6991939544677734, 4.125310897827148, 4.551427841186523, 4.977544784545898, 5.403661727905273, 5.829778671264648, 6.255895614624023, 6.682013034820557, 7.108129978179932, 7.534246921539307, 7.960363864898682, 8.386481285095215, 8.81259822845459, 9.238715171813965, 9.66483211517334, 10.090949058532715, 10.51706600189209, 10.943182945251465, 11.36929988861084, 11.795416831970215, 12.22153377532959, 12.647650718688965, 13.07376766204834, 13.499885559082031, 13.926002502441406, 14.352119445800781, 14.778236389160156, 15.204353332519531, 15.630470275878906, 16.05658721923828, 16.482704162597656, 16.90882110595703, 17.334938049316406, 17.76105499267578]}, "gradients/encoder.encoder.layers.0.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 3.0, 5.0, 6.0, 14.0, 10.0, 11.0, 13.0, 19.0, 19.0, 32.0, 25.0, 29.0, 41.0, 61.0, 422.0, 76.0, 35.0, 38.0, 23.0, 28.0, 17.0, 18.0, 16.0, 10.0, 9.0, 5.0, 8.0, 12.0, 2.0, 2.0, 3.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-10.549214363098145, -10.295380592346191, -10.041546821594238, -9.787712097167969, -9.533878326416016, -9.280044555664062, -9.02621078491211, -8.772377014160156, -8.518543243408203, -8.26470947265625, -8.010875701904297, -7.7570414543151855, -7.503207683563232, -7.249373435974121, -6.995539665222168, -6.741705894470215, -6.4878716468811035, -6.23403787612915, -5.980203628540039, -5.726369857788086, -5.472536087036133, -5.21870231628418, -4.964868068695068, -4.711034297943115, -4.457200050354004, -4.203366279602051, -3.9495322704315186, -3.6956982612609863, -3.441864490509033, -3.188030481338501, -2.9341964721679688, -2.6803627014160156, -2.4265289306640625, -2.1726949214935303, -1.9188611507415771, -1.665027141571045, -1.4111932516098022, -1.1573593616485596, -0.9035253524780273, -0.6496914625167847, -0.395857572555542, -0.14202365279197693, 0.11181026697158813, 0.3656442165374756, 0.6194781064987183, 0.8733119964599609, 1.1271460056304932, 1.3809798955917358, 1.6348137855529785, 1.8886476755142212, 2.142481565475464, 2.396315574645996, 2.650149345397949, 2.9039833545684814, 3.1578173637390137, 3.411651134490967, 3.665485143661499, 3.9193191528320312, 4.173152923583984, 4.4269866943359375, 4.680820941925049, 4.934654712677002, 5.188488960266113, 5.442322731018066, 5.6961565017700195]}, "gradients/encoder.encoder.pos_conv_embed.conv.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 2.0, 1.0, 5.0, 3.0, 6.0, 9.0, 6.0, 16.0, 11.0, 16.0, 15.0, 23.0, 22.0, 33.0, 47.0, 36.0, 113.0, 280.0, 92.0, 43.0, 39.0, 23.0, 25.0, 32.0, 23.0, 15.0, 14.0, 8.0, 8.0, 9.0, 12.0, 7.0, 5.0, 3.0, 5.0, 2.0, 2.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.580078125, -0.5631637573242188, -0.5462493896484375, -0.5293350219726562, -0.512420654296875, -0.49550628662109375, -0.4785919189453125, -0.46167755126953125, -0.44476318359375, -0.42784881591796875, -0.4109344482421875, -0.39402008056640625, -0.377105712890625, -0.36019134521484375, -0.3432769775390625, -0.32636260986328125, -0.3094482421875, -0.29253387451171875, -0.2756195068359375, -0.25870513916015625, -0.241790771484375, -0.22487640380859375, -0.2079620361328125, -0.19104766845703125, -0.17413330078125, -0.15721893310546875, -0.1403045654296875, -0.12339019775390625, -0.106475830078125, -0.08956146240234375, -0.0726470947265625, -0.05573272705078125, -0.038818359375, -0.02190399169921875, -0.0049896240234375, 0.01192474365234375, 0.028839111328125, 0.04575347900390625, 0.0626678466796875, 0.07958221435546875, 0.09649658203125, 0.11341094970703125, 0.1303253173828125, 0.14723968505859375, 0.164154052734375, 0.18106842041015625, 0.1979827880859375, 0.21489715576171875, 0.2318115234375, 0.24872589111328125, 0.2656402587890625, 0.28255462646484375, 0.299468994140625, 0.31638336181640625, 0.3332977294921875, 0.35021209716796875, 0.36712646484375, 0.38404083251953125, 0.4009552001953125, 0.41786956787109375, 0.434783935546875, 0.45169830322265625, 0.4686126708984375, 0.48552703857421875, 0.50244140625]}, "gradients/encoder.encoder.pos_conv_embed.conv.weight_v": {"_type": "histogram", "values": [1.0, 3.0, 4.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0, 1.0, 1.0, 3.0, 1.0, 8.0, 10.0, 12.0, 18.0, 21.0, 17.0, 38.0, 59.0, 108.0, 247.0, 567.0, 1998.0, 18255.0, 8357651.0, 7508.0, 1256.0, 401.0, 158.0, 89.0, 64.0, 27.0, 15.0, 5.0, 9.0, 15.0, 3.0, 0.0, 8.0, 1.0, 3.0, 7.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 3.0], "bins": [-5.078289985656738, -4.905268669128418, -4.732247829437256, -4.5592265129089355, -4.386205196380615, -4.213184356689453, -4.040163040161133, -3.8671417236328125, -3.6941206455230713, -3.52109956741333, -3.3480782508850098, -3.1750571727752686, -3.0020360946655273, -2.829014778137207, -2.655993700027466, -2.4829726219177246, -2.3099513053894043, -2.136930227279663, -1.9639089107513428, -1.7908878326416016, -1.6178666353225708, -1.44484543800354, -1.2718243598937988, -1.098803162574768, -0.9257819652557373, -0.7527607679367065, -0.5797396302223206, -0.40671849250793457, -0.2336972951889038, -0.06067609786987305, 0.11234498023986816, 0.2853661775588989, 0.4583878517150879, 0.6314090490341187, 0.8044301867485046, 0.9774513244628906, 1.1504725217819214, 1.3234937191009521, 1.4965147972106934, 1.6695359945297241, 1.8425571918487549, 2.015578269958496, 2.1885995864868164, 2.3616206645965576, 2.534641742706299, 2.707663059234619, 2.8806841373443604, 3.0537052154541016, 3.226726531982422, 3.399747610092163, 3.5727689266204834, 3.7457900047302246, 3.918811321258545, 4.091832160949707, 4.264853477478027, 4.437874794006348, 4.610896110534668, 4.783917427062988, 4.95693826675415, 5.129959583282471, 5.302980899810791, 5.476001739501953, 5.649023056030273, 5.822044372558594, 5.995065212249756]}, "gradients/encoder.encoder.pos_conv_embed.conv.weight_g": {"_type": "histogram", "values": [1.0, 0.0, 4.0, 0.0, 2.0, 0.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 1.0, 1.0, 4.0, 4.0, 2.0, 2.0, 3.0, 4.0, 2.0, 2.0, 2.0, 4.0, 4.0, 7.0, 6.0, 4.0, 0.0, 3.0, 4.0, 5.0, 4.0, 4.0, 4.0, 4.0, 3.0, 6.0, 3.0, 3.0, 1.0, 2.0, 1.0, 0.0, 3.0, 1.0, 3.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-7.025946140289307, -6.829422950744629, -6.632899761199951, -6.436376571655273, -6.239853858947754, -6.043330669403076, -5.846807479858398, -5.650284290313721, -5.453761100769043, -5.257237911224365, -5.0607147216796875, -4.864192008972168, -4.66766881942749, -4.4711456298828125, -4.274622440338135, -4.078099250793457, -3.8815762996673584, -3.6850531101226807, -3.488530158996582, -3.2920069694519043, -3.0954837799072266, -2.898960590362549, -2.70243763923645, -2.5059144496917725, -2.309391498565674, -2.112868309020996, -1.916345238685608, -1.7198221683502197, -1.523298978805542, -1.3267759084701538, -1.1302528381347656, -0.9337296485900879, -0.7372064590454102, -0.5406833291053772, -0.3441602289676666, -0.14763712882995605, 0.048886001110076904, 0.24540913105010986, 0.44193220138549805, 0.6384553909301758, 0.834978461265564, 1.0315015316009521, 1.2280247211456299, 1.424547791481018, 1.6210708618164062, 1.817594051361084, 2.0141172409057617, 2.2106404304504395, 2.407163381576538, 2.603686571121216, 2.8002095222473145, 2.996732711791992, 3.19325590133667, 3.3897790908813477, 3.5863020420074463, 3.782825231552124, 3.9793481826782227, 4.1758713722229, 4.372394561767578, 4.568917274475098, 4.765440464019775, 4.961963653564453, 5.158486843109131, 5.355010032653809, 5.551533222198486]}, "gradients/encoder.feature_projection.projection.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0, 0.0, 1.0, 4.0, 1.0, 2.0, 1.0, 4.0, 4.0, 8.0, 8.0, 3.0, 10.0, 19.0, 19.0, 29.0, 39.0, 53.0, 81.0, 119.0, 215.0, 350.0, 618.0, 1250.0, 2799.0, 7178.0, 19828.0, 59519.0, 170968.0, 170874.0, 58794.0, 19134.0, 6916.0, 2712.0, 1214.0, 597.0, 336.0, 196.0, 118.0, 88.0, 41.0, 40.0, 26.0, 24.0, 17.0, 4.0, 8.0, 7.0, 3.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0], "bins": [-3.150390625, -3.062896728515625, -2.97540283203125, -2.887908935546875, -2.8004150390625, -2.712921142578125, -2.62542724609375, -2.537933349609375, -2.450439453125, -2.362945556640625, -2.27545166015625, -2.187957763671875, -2.1004638671875, -2.012969970703125, -1.92547607421875, -1.837982177734375, -1.75048828125, -1.662994384765625, -1.57550048828125, -1.488006591796875, -1.4005126953125, -1.313018798828125, -1.22552490234375, -1.138031005859375, -1.050537109375, -0.963043212890625, -0.87554931640625, -0.788055419921875, -0.7005615234375, -0.613067626953125, -0.52557373046875, -0.438079833984375, -0.3505859375, -0.263092041015625, -0.17559814453125, -0.088104248046875, -0.0006103515625, 0.086883544921875, 0.17437744140625, 0.261871337890625, 0.349365234375, 0.436859130859375, 0.52435302734375, 0.611846923828125, 0.6993408203125, 0.786834716796875, 0.87432861328125, 0.961822509765625, 1.04931640625, 1.136810302734375, 1.22430419921875, 1.311798095703125, 1.3992919921875, 1.486785888671875, 1.57427978515625, 1.661773681640625, 1.749267578125, 1.836761474609375, 1.92425537109375, 2.011749267578125, 2.0992431640625, 2.186737060546875, 2.27423095703125, 2.361724853515625, 2.44921875]}, "gradients/encoder.feature_projection.projection.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 2.0, 0.0, 5.0, 4.0, 6.0, 9.0, 15.0, 21.0, 37.0, 38.0, 65.0, 94.0, 112.0, 157.0, 141.0, 112.0, 60.0, 52.0, 28.0, 23.0, 15.0, 9.0, 3.0, 1.0, 3.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.1904296875, -1.155120849609375, -1.11981201171875, -1.084503173828125, -1.0491943359375, -1.013885498046875, -0.97857666015625, -0.943267822265625, -0.907958984375, -0.872650146484375, -0.83734130859375, -0.802032470703125, -0.7667236328125, -0.731414794921875, -0.69610595703125, -0.660797119140625, -0.62548828125, -0.590179443359375, -0.55487060546875, -0.519561767578125, -0.4842529296875, -0.448944091796875, -0.41363525390625, -0.378326416015625, -0.343017578125, -0.307708740234375, -0.27239990234375, -0.237091064453125, -0.2017822265625, -0.166473388671875, -0.13116455078125, -0.095855712890625, -0.060546875, -0.025238037109375, 0.01007080078125, 0.045379638671875, 0.0806884765625, 0.115997314453125, 0.15130615234375, 0.186614990234375, 0.221923828125, 0.257232666015625, 0.29254150390625, 0.327850341796875, 0.3631591796875, 0.398468017578125, 0.43377685546875, 0.469085693359375, 0.50439453125, 0.539703369140625, 0.57501220703125, 0.610321044921875, 0.6456298828125, 0.680938720703125, 0.71624755859375, 0.751556396484375, 0.786865234375, 0.822174072265625, 0.85748291015625, 0.892791748046875, 0.9281005859375, 0.963409423828125, 0.99871826171875, 1.034027099609375, 1.0693359375]}, "gradients/encoder.feature_projection.layer_norm.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 2.0, 1.0, 1.0, 3.0, 1.0, 3.0, 3.0, 4.0, 8.0, 3.0, 11.0, 19.0, 33.0, 54.0, 101.0, 104.0, 69.0, 34.0, 13.0, 9.0, 9.0, 2.0, 3.0, 2.0, 3.0, 1.0, 0.0, 4.0, 1.0, 1.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.014385223388672, -2.877683162689209, -2.740981101989746, -2.604279041290283, -2.4675769805908203, -2.3308749198913574, -2.1941728591918945, -2.0574707984924316, -1.9207688570022583, -1.7840667963027954, -1.6473647356033325, -1.5106627941131592, -1.3739607334136963, -1.2372586727142334, -1.1005566120147705, -0.9638545513153076, -0.8271524906158447, -0.6904504299163818, -0.553748369216919, -0.41704636812210083, -0.28034430742263794, -0.14364224672317505, -0.006940245628356934, 0.12976181507110596, 0.26646387577056885, 0.40316593647003174, 0.5398679971694946, 0.6765699982643127, 0.8132720589637756, 0.9499741196632385, 1.0866761207580566, 1.2233781814575195, 1.3600802421569824, 1.4967823028564453, 1.6334843635559082, 1.770186424255371, 1.906888484954834, 2.043590545654297, 2.1802926063537598, 2.3169946670532227, 2.4536967277526855, 2.5903987884521484, 2.7271008491516113, 2.863802909851074, 3.000504970550537, 3.13720703125, 3.273909091949463, 3.410611152648926, 3.5473129749298096, 3.6840150356292725, 3.8207170963287354, 3.9574191570281982, 4.094120979309082, 4.230823040008545, 4.367525100708008, 4.504227161407471, 4.640929222106934, 4.7776312828063965, 4.914333343505859, 5.051035404205322, 5.187737464904785, 5.324439525604248, 5.461141586303711, 5.597843647003174, 5.734545707702637]}, "gradients/encoder.feature_projection.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 1.0, 1.0, 3.0, 5.0, 2.0, 5.0, 3.0, 3.0, 6.0, 8.0, 3.0, 14.0, 26.0, 63.0, 133.0, 105.0, 51.0, 26.0, 8.0, 5.0, 4.0, 4.0, 3.0, 2.0, 5.0, 1.0, 4.0, 5.0, 0.0, 0.0, 4.0, 1.0, 0.0, 0.0, 1.0, 1.0], "bins": [-5.935749530792236, -5.798346042633057, -5.660942554473877, -5.5235395431518555, -5.386136054992676, -5.248732566833496, -5.111329078674316, -4.973925590515137, -4.836522579193115, -4.6991190910339355, -4.561715602874756, -4.424312591552734, -4.286909103393555, -4.149505615234375, -4.012102127075195, -3.8746988773345947, -3.737295389175415, -3.5998919010162354, -3.4624886512756348, -3.325085163116455, -3.1876819133758545, -3.050278425216675, -2.912875175476074, -2.7754716873168945, -2.638068199157715, -2.500664710998535, -2.3632614612579346, -2.225857973098755, -2.0884547233581543, -1.9510512351989746, -1.8136478662490845, -1.6762444972991943, -1.5388410091400146, -1.4014376401901245, -1.2640342712402344, -1.1266307830810547, -0.9892274737358093, -0.8518241047859192, -0.7144206762313843, -0.5770173072814941, -0.439613938331604, -0.30221056938171387, -0.16480717062950134, -0.02740377187728882, 0.10999959707260132, 0.24740296602249146, 0.38480639457702637, 0.5222097635269165, 0.6596131324768066, 0.7970165014266968, 0.9344198703765869, 1.0718233585357666, 1.2092266082763672, 1.3466300964355469, 1.484033465385437, 1.6214368343353271, 1.7588402032852173, 1.8962435722351074, 2.033647060394287, 2.1710503101348877, 2.3084537982940674, 2.445857048034668, 2.5832605361938477, 2.7206640243530273, 2.858067274093628]}, "train/train_runtime": 4795.9642, "train/train_samples_per_second": 5.95, "train/train_steps_per_second": 0.062, "train/total_flos": 0.0, "train/train_loss": 4.303745459225844, "eval/loss": 4.729526996612549, "eval/wer": 2.3516065053550177, "eval/runtime": 946.0728, "eval/samples_per_second": 2.793, "eval/steps_per_second": 0.234} \ No newline at end of file +{"train/loss": 4.5291, "train/learning_rate": 0.00017759999999999998, "train/epoch": 1.0, "train/global_step": 297, "_runtime": 6097, "_timestamp": 1646257333, "_step": 298, "gradients/decoder.transformer.ln_f.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 32.0, 208.0, 492.0, 240.0, 41.0, 4.0, 1.0, 0.0, 1.0], "bins": [-272.8752136230469, -268.13763427734375, -263.4000549316406, -258.6624755859375, -253.92489624023438, -249.18731689453125, -244.44973754882812, -239.712158203125, -234.97457885742188, -230.23699951171875, -225.49942016601562, -220.7618408203125, -216.02426147460938, -211.28668212890625, -206.54910278320312, -201.8115234375, -197.07394409179688, -192.33636474609375, -187.59878540039062, -182.8612060546875, -178.12362670898438, -173.38604736328125, -168.64846801757812, -163.910888671875, -159.17330932617188, -154.43572998046875, -149.69815063476562, -144.9605712890625, -140.22299194335938, -135.48541259765625, -130.74783325195312, -126.01025390625, -121.2726821899414, -116.53510284423828, -111.79752349853516, -107.05994415283203, -102.3223648071289, -97.58478546142578, -92.84720611572266, -88.10962677001953, -83.3720474243164, -78.63446807861328, -73.89688873291016, -69.15930938720703, -64.4217300415039, -59.68415069580078, -54.946571350097656, -50.20899200439453, -45.471412658691406, -40.73383331298828, -35.996253967285156, -31.25867462158203, -26.521095275878906, -21.78351593017578, -17.045936584472656, -12.308357238769531, -7.570777893066406, -2.8331985473632812, 1.9043807983398438, 6.641960144042969, 11.379539489746094, 16.11711883544922, 20.854698181152344, 25.59227752685547, 30.329856872558594]}, "gradients/decoder.transformer.ln_f.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 5.0, 1.0, 2.0, 5.0, 4.0, 5.0, 10.0, 13.0, 18.0, 21.0, 25.0, 28.0, 32.0, 47.0, 44.0, 50.0, 56.0, 52.0, 60.0, 49.0, 59.0, 59.0, 57.0, 49.0, 52.0, 43.0, 39.0, 26.0, 24.0, 20.0, 13.0, 9.0, 10.0, 13.0, 7.0, 3.0, 3.0, 1.0, 2.0, 1.0, 2.0, 1.0, 1.0], "bins": [-83.19218444824219, -81.14936828613281, -79.10655212402344, -77.06373596191406, -75.02091217041016, -72.97809600830078, -70.9352798461914, -68.89246368408203, -66.84964752197266, -64.80683135986328, -62.76401138305664, -60.721195220947266, -58.67837905883789, -56.63555908203125, -54.592742919921875, -52.5499267578125, -50.50710678100586, -48.464290618896484, -46.421470642089844, -44.37865447998047, -42.335838317871094, -40.29302215576172, -38.25020217895508, -36.2073860168457, -34.16456604003906, -32.12174987792969, -30.07893180847168, -28.036113739013672, -25.993297576904297, -23.95047950744629, -21.90766143798828, -19.864845275878906, -17.822025299072266, -15.779208183288574, -13.736391067504883, -11.693572998046875, -9.650755882263184, -7.607938766479492, -5.565120697021484, -3.522303581237793, -1.4794864654541016, 0.563330888748169, 2.6061482429504395, 4.648965835571289, 6.6917829513549805, 8.734600067138672, 10.77741813659668, 12.820235252380371, 14.863052368164062, 16.90587043762207, 18.948686599731445, 20.991504669189453, 23.034320831298828, 25.077138900756836, 27.119956970214844, 29.16277313232422, 31.205591201782227, 33.248409271240234, 35.29122543334961, 37.33404541015625, 39.376861572265625, 41.419677734375, 43.462493896484375, 45.505313873291016, 47.54813003540039]}, "gradients/decoder.transformer.h.23.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 3.0, 3.0, 1.0, 6.0, 7.0, 9.0, 15.0, 16.0, 23.0, 27.0, 39.0, 40.0, 35.0, 58.0, 64.0, 56.0, 63.0, 42.0, 76.0, 66.0, 58.0, 52.0, 56.0, 49.0, 28.0, 27.0, 23.0, 23.0, 14.0, 9.0, 6.0, 6.0, 4.0, 4.0, 3.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.677734375, -2.58111572265625, -2.4844970703125, -2.38787841796875, -2.291259765625, -2.19464111328125, -2.0980224609375, -2.00140380859375, -1.90478515625, -1.80816650390625, -1.7115478515625, -1.61492919921875, -1.518310546875, -1.42169189453125, -1.3250732421875, -1.22845458984375, -1.1318359375, -1.03521728515625, -0.9385986328125, -0.84197998046875, -0.745361328125, -0.64874267578125, -0.5521240234375, -0.45550537109375, -0.35888671875, -0.26226806640625, -0.1656494140625, -0.06903076171875, 0.027587890625, 0.12420654296875, 0.2208251953125, 0.31744384765625, 0.4140625, 0.51068115234375, 0.6072998046875, 0.70391845703125, 0.800537109375, 0.89715576171875, 0.9937744140625, 1.09039306640625, 1.18701171875, 1.28363037109375, 1.3802490234375, 1.47686767578125, 1.573486328125, 1.67010498046875, 1.7667236328125, 1.86334228515625, 1.9599609375, 2.05657958984375, 2.1531982421875, 2.24981689453125, 2.346435546875, 2.44305419921875, 2.5396728515625, 2.63629150390625, 2.73291015625, 2.82952880859375, 2.9261474609375, 3.02276611328125, 3.119384765625, 3.21600341796875, 3.3126220703125, 3.40924072265625, 3.505859375]}, "gradients/decoder.transformer.h.23.mlp.c_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 4.0, 4.0, 5.0, 2.0, 6.0, 19.0, 12.0, 26.0, 46.0, 38.0, 51.0, 101.0, 185.0, 304.0, 489.0, 970.0, 2110.0, 5697.0, 21435.0, 140499.0, 2710044.0, 1216418.0, 74188.0, 14342.0, 4146.0, 1494.0, 696.0, 343.0, 188.0, 141.0, 85.0, 64.0, 43.0, 36.0, 13.0, 10.0, 14.0, 10.0, 5.0, 7.0, 2.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-11.8671875, -11.4825439453125, -11.097900390625, -10.7132568359375, -10.32861328125, -9.9439697265625, -9.559326171875, -9.1746826171875, -8.7900390625, -8.4053955078125, -8.020751953125, -7.6361083984375, -7.25146484375, -6.8668212890625, -6.482177734375, -6.0975341796875, -5.712890625, -5.3282470703125, -4.943603515625, -4.5589599609375, -4.17431640625, -3.7896728515625, -3.405029296875, -3.0203857421875, -2.6357421875, -2.2510986328125, -1.866455078125, -1.4818115234375, -1.09716796875, -0.7125244140625, -0.327880859375, 0.0567626953125, 0.44140625, 0.8260498046875, 1.210693359375, 1.5953369140625, 1.97998046875, 2.3646240234375, 2.749267578125, 3.1339111328125, 3.5185546875, 3.9031982421875, 4.287841796875, 4.6724853515625, 5.05712890625, 5.4417724609375, 5.826416015625, 6.2110595703125, 6.595703125, 6.9803466796875, 7.364990234375, 7.7496337890625, 8.13427734375, 8.5189208984375, 8.903564453125, 9.2882080078125, 9.6728515625, 10.0574951171875, 10.442138671875, 10.8267822265625, 11.21142578125, 11.5960693359375, 11.980712890625, 12.3653564453125, 12.75]}, "gradients/decoder.transformer.h.23.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 2.0, 1.0, 1.0, 2.0, 2.0, 8.0, 10.0, 6.0, 16.0, 17.0, 39.0, 54.0, 63.0, 97.0, 138.0, 185.0, 271.0, 386.0, 515.0, 551.0, 461.0, 377.0, 260.0, 172.0, 150.0, 80.0, 60.0, 46.0, 29.0, 23.0, 18.0, 11.0, 4.0, 5.0, 4.0, 2.0, 5.0, 2.0, 1.0, 3.0, 2.0, 1.0, 2.0, 0.0, 1.0], "bins": [-11.390625, -11.09283447265625, -10.7950439453125, -10.49725341796875, -10.199462890625, -9.90167236328125, -9.6038818359375, -9.30609130859375, -9.00830078125, -8.71051025390625, -8.4127197265625, -8.11492919921875, -7.817138671875, -7.51934814453125, -7.2215576171875, -6.92376708984375, -6.6259765625, -6.32818603515625, -6.0303955078125, -5.73260498046875, -5.434814453125, -5.13702392578125, -4.8392333984375, -4.54144287109375, -4.24365234375, -3.94586181640625, -3.6480712890625, -3.35028076171875, -3.052490234375, -2.75469970703125, -2.4569091796875, -2.15911865234375, -1.861328125, -1.56353759765625, -1.2657470703125, -0.96795654296875, -0.670166015625, -0.37237548828125, -0.0745849609375, 0.22320556640625, 0.52099609375, 0.81878662109375, 1.1165771484375, 1.41436767578125, 1.712158203125, 2.00994873046875, 2.3077392578125, 2.60552978515625, 2.9033203125, 3.20111083984375, 3.4989013671875, 3.79669189453125, 4.094482421875, 4.39227294921875, 4.6900634765625, 4.98785400390625, 5.28564453125, 5.58343505859375, 5.8812255859375, 6.17901611328125, 6.476806640625, 6.77459716796875, 7.0723876953125, 7.37017822265625, 7.66796875]}, "gradients/decoder.transformer.h.23.mlp.c_fc.weight": {"_type": "histogram", "values": [2.0, 2.0, 1.0, 1.0, 0.0, 2.0, 4.0, 1.0, 1.0, 1.0, 1.0, 4.0, 3.0, 4.0, 4.0, 3.0, 5.0, 7.0, 6.0, 13.0, 25.0, 25.0, 43.0, 71.0, 116.0, 199.0, 330.0, 509.0, 1161.0, 3186.0, 21597.0, 1087839.0, 3027962.0, 43350.0, 4717.0, 1457.0, 703.0, 341.0, 230.0, 119.0, 69.0, 66.0, 30.0, 19.0, 14.0, 11.0, 10.0, 8.0, 6.0, 7.0, 4.0, 3.0, 3.0, 0.0, 2.0, 2.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-32.78125, -31.7607421875, -30.740234375, -29.7197265625, -28.69921875, -27.6787109375, -26.658203125, -25.6376953125, -24.6171875, -23.5966796875, -22.576171875, -21.5556640625, -20.53515625, -19.5146484375, -18.494140625, -17.4736328125, -16.453125, -15.4326171875, -14.412109375, -13.3916015625, -12.37109375, -11.3505859375, -10.330078125, -9.3095703125, -8.2890625, -7.2685546875, -6.248046875, -5.2275390625, -4.20703125, -3.1865234375, -2.166015625, -1.1455078125, -0.125, 0.8955078125, 1.916015625, 2.9365234375, 3.95703125, 4.9775390625, 5.998046875, 7.0185546875, 8.0390625, 9.0595703125, 10.080078125, 11.1005859375, 12.12109375, 13.1416015625, 14.162109375, 15.1826171875, 16.203125, 17.2236328125, 18.244140625, 19.2646484375, 20.28515625, 21.3056640625, 22.326171875, 23.3466796875, 24.3671875, 25.3876953125, 26.408203125, 27.4287109375, 28.44921875, 29.4697265625, 30.490234375, 31.5107421875, 32.53125]}, "gradients/decoder.transformer.h.23.ln_2.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 7.0, 28.0, 127.0, 301.0, 333.0, 141.0, 62.0, 10.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-120.14483642578125, -117.35649108886719, -114.56815338134766, -111.77981567382812, -108.99147033691406, -106.203125, -103.41478729248047, -100.62644958496094, -97.83810424804688, -95.04975891113281, -92.26142120361328, -89.47308349609375, -86.68473815917969, -83.89639282226562, -81.1080551147461, -78.31971740722656, -75.5313720703125, -72.74302673339844, -69.9546890258789, -67.16635131835938, -64.37800598144531, -61.589664459228516, -58.80132293701172, -56.01298141479492, -53.224639892578125, -50.43629837036133, -47.64795684814453, -44.859615325927734, -42.07127380371094, -39.28293228149414, -36.494590759277344, -33.70624923706055, -30.91790008544922, -28.129558563232422, -25.341217041015625, -22.552875518798828, -19.76453399658203, -16.976192474365234, -14.187850952148438, -11.39950942993164, -8.611167907714844, -5.822826385498047, -3.03448486328125, -0.24614334106445312, 2.5421981811523438, 5.330539703369141, 8.118881225585938, 10.907222747802734, 13.695564270019531, 16.483905792236328, 19.272247314453125, 22.060588836669922, 24.84893035888672, 27.637271881103516, 30.425613403320312, 33.21395492553711, 36.002296447753906, 38.7906379699707, 41.5789794921875, 44.3673210144043, 47.155662536621094, 49.94400405883789, 52.73234558105469, 55.520687103271484, 58.30902862548828]}, "gradients/decoder.transformer.h.23.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 3.0, 0.0, 1.0, 2.0, 2.0, 8.0, 4.0, 9.0, 7.0, 17.0, 19.0, 19.0, 25.0, 25.0, 25.0, 37.0, 42.0, 25.0, 52.0, 49.0, 56.0, 47.0, 68.0, 43.0, 57.0, 42.0, 40.0, 35.0, 49.0, 41.0, 22.0, 29.0, 26.0, 20.0, 11.0, 19.0, 10.0, 8.0, 6.0, 3.0, 6.0, 3.0, 1.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-39.958919525146484, -38.758872985839844, -37.5588264465332, -36.35877990722656, -35.15873718261719, -33.95869064331055, -32.758644104003906, -31.558597564697266, -30.358551025390625, -29.158504486083984, -27.958457946777344, -26.758413314819336, -25.558366775512695, -24.358320236206055, -23.158275604248047, -21.958229064941406, -20.758182525634766, -19.558135986328125, -18.358089447021484, -17.158044815063477, -15.957998275756836, -14.757951736450195, -13.557906150817871, -12.357860565185547, -11.157814025878906, -9.957767486572266, -8.757721900939941, -7.557675838470459, -6.357629776000977, -5.157583713531494, -3.9575376510620117, -2.7574920654296875, -1.5574455261230469, -0.35739946365356445, 0.842646598815918, 2.0426926612854004, 3.242738723754883, 4.442784786224365, 5.642830848693848, 6.842876434326172, 8.042922973632812, 9.242969512939453, 10.443015098571777, 11.643060684204102, 12.843107223510742, 14.043153762817383, 15.243199348449707, 16.44324493408203, 17.643291473388672, 18.843338012695312, 20.043384552001953, 21.24342918395996, 22.4434757232666, 23.643522262573242, 24.84356689453125, 26.04361343383789, 27.24365997314453, 28.443706512451172, 29.643753051757812, 30.84379768371582, 32.043846130371094, 33.24388885498047, 34.44393539428711, 35.64398193359375, 36.84402847290039]}, "gradients/decoder.transformer.h.23.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 2.0, 3.0, 7.0, 8.0, 11.0, 12.0, 18.0, 18.0, 33.0, 23.0, 30.0, 35.0, 39.0, 42.0, 51.0, 49.0, 46.0, 58.0, 48.0, 53.0, 55.0, 50.0, 44.0, 48.0, 36.0, 40.0, 24.0, 23.0, 25.0, 20.0, 16.0, 17.0, 8.0, 2.0, 2.0, 5.0, 2.0, 6.0, 2.0, 2.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.53125, -2.4453125, -2.359375, -2.2734375, -2.1875, -2.1015625, -2.015625, -1.9296875, -1.84375, -1.7578125, -1.671875, -1.5859375, -1.5, -1.4140625, -1.328125, -1.2421875, -1.15625, -1.0703125, -0.984375, -0.8984375, -0.8125, -0.7265625, -0.640625, -0.5546875, -0.46875, -0.3828125, -0.296875, -0.2109375, -0.125, -0.0390625, 0.046875, 0.1328125, 0.21875, 0.3046875, 0.390625, 0.4765625, 0.5625, 0.6484375, 0.734375, 0.8203125, 0.90625, 0.9921875, 1.078125, 1.1640625, 1.25, 1.3359375, 1.421875, 1.5078125, 1.59375, 1.6796875, 1.765625, 1.8515625, 1.9375, 2.0234375, 2.109375, 2.1953125, 2.28125, 2.3671875, 2.453125, 2.5390625, 2.625, 2.7109375, 2.796875, 2.8828125, 2.96875]}, "gradients/decoder.transformer.h.23.crossattention.c_proj.weight": {"_type": "histogram", "values": [3.0, 2.0, 3.0, 2.0, 7.0, 6.0, 8.0, 6.0, 14.0, 16.0, 22.0, 42.0, 42.0, 51.0, 71.0, 103.0, 142.0, 225.0, 322.0, 529.0, 884.0, 1431.0, 2400.0, 4305.0, 7995.0, 15463.0, 29674.0, 57031.0, 107705.0, 181675.0, 235497.0, 178967.0, 105655.0, 55694.0, 29226.0, 14976.0, 7870.0, 4261.0, 2333.0, 1410.0, 882.0, 502.0, 359.0, 235.0, 129.0, 115.0, 71.0, 50.0, 47.0, 27.0, 15.0, 20.0, 19.0, 7.0, 8.0, 8.0, 3.0, 3.0, 1.0, 3.0, 2.0, 1.0, 0.0, 1.0], "bins": [-0.39013671875, -0.3773384094238281, -0.36454010009765625, -0.3517417907714844, -0.3389434814453125, -0.3261451721191406, -0.31334686279296875, -0.3005485534667969, -0.287750244140625, -0.2749519348144531, -0.26215362548828125, -0.24935531616210938, -0.2365570068359375, -0.22375869750976562, -0.21096038818359375, -0.19816207885742188, -0.18536376953125, -0.17256546020507812, -0.15976715087890625, -0.14696884155273438, -0.1341705322265625, -0.12137222290039062, -0.10857391357421875, -0.09577560424804688, -0.082977294921875, -0.07017898559570312, -0.05738067626953125, -0.044582366943359375, -0.0317840576171875, -0.018985748291015625, -0.00618743896484375, 0.006610870361328125, 0.0194091796875, 0.032207489013671875, 0.04500579833984375, 0.057804107666015625, 0.0706024169921875, 0.08340072631835938, 0.09619903564453125, 0.10899734497070312, 0.121795654296875, 0.13459396362304688, 0.14739227294921875, 0.16019058227539062, 0.1729888916015625, 0.18578720092773438, 0.19858551025390625, 0.21138381958007812, 0.22418212890625, 0.23698043823242188, 0.24977874755859375, 0.2625770568847656, 0.2753753662109375, 0.2881736755371094, 0.30097198486328125, 0.3137702941894531, 0.326568603515625, 0.3393669128417969, 0.35216522216796875, 0.3649635314941406, 0.3777618408203125, 0.3905601501464844, 0.40335845947265625, 0.4161567687988281, 0.428955078125]}, "gradients/decoder.transformer.h.23.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 2.0, 3.0, 5.0, 6.0, 6.0, 3.0, 8.0, 7.0, 11.0, 18.0, 16.0, 15.0, 34.0, 24.0, 31.0, 39.0, 29.0, 37.0, 40.0, 35.0, 36.0, 57.0, 48.0, 1066.0, 42.0, 40.0, 37.0, 35.0, 30.0, 33.0, 35.0, 36.0, 22.0, 25.0, 22.0, 17.0, 22.0, 13.0, 14.0, 6.0, 10.0, 11.0, 3.0, 2.0, 1.0, 4.0, 2.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-1.568359375, -1.52252197265625, -1.4766845703125, -1.43084716796875, -1.385009765625, -1.33917236328125, -1.2933349609375, -1.24749755859375, -1.20166015625, -1.15582275390625, -1.1099853515625, -1.06414794921875, -1.018310546875, -0.97247314453125, -0.9266357421875, -0.88079833984375, -0.8349609375, -0.78912353515625, -0.7432861328125, -0.69744873046875, -0.651611328125, -0.60577392578125, -0.5599365234375, -0.51409912109375, -0.46826171875, -0.42242431640625, -0.3765869140625, -0.33074951171875, -0.284912109375, -0.23907470703125, -0.1932373046875, -0.14739990234375, -0.1015625, -0.05572509765625, -0.0098876953125, 0.03594970703125, 0.081787109375, 0.12762451171875, 0.1734619140625, 0.21929931640625, 0.26513671875, 0.31097412109375, 0.3568115234375, 0.40264892578125, 0.448486328125, 0.49432373046875, 0.5401611328125, 0.58599853515625, 0.6318359375, 0.67767333984375, 0.7235107421875, 0.76934814453125, 0.815185546875, 0.86102294921875, 0.9068603515625, 0.95269775390625, 0.99853515625, 1.04437255859375, 1.0902099609375, 1.13604736328125, 1.181884765625, 1.22772216796875, 1.2735595703125, 1.31939697265625, 1.365234375]}, "gradients/decoder.transformer.h.23.crossattention.c_attn.weight": {"_type": "histogram", "values": [3.0, 2.0, 3.0, 6.0, 13.0, 14.0, 30.0, 34.0, 45.0, 67.0, 97.0, 164.0, 244.0, 350.0, 571.0, 919.0, 1389.0, 2003.0, 3135.0, 5070.0, 7678.0, 11961.0, 18538.0, 29104.0, 45075.0, 69260.0, 100979.0, 137419.0, 1205524.0, 141713.0, 106626.0, 72900.0, 48477.0, 31430.0, 19933.0, 12925.0, 8342.0, 5217.0, 3542.0, 2277.0, 1372.0, 936.0, 609.0, 432.0, 250.0, 146.0, 105.0, 81.0, 46.0, 35.0, 17.0, 17.0, 5.0, 6.0, 2.0, 3.0, 4.0, 3.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0], "bins": [-0.160400390625, -0.1547870635986328, -0.14917373657226562, -0.14356040954589844, -0.13794708251953125, -0.13233375549316406, -0.12672042846679688, -0.12110710144042969, -0.1154937744140625, -0.10988044738769531, -0.10426712036132812, -0.09865379333496094, -0.09304046630859375, -0.08742713928222656, -0.08181381225585938, -0.07620048522949219, -0.070587158203125, -0.06497383117675781, -0.059360504150390625, -0.05374717712402344, -0.04813385009765625, -0.04252052307128906, -0.036907196044921875, -0.03129386901855469, -0.0256805419921875, -0.020067214965820312, -0.014453887939453125, -0.008840560913085938, -0.00322723388671875, 0.0023860931396484375, 0.007999420166015625, 0.013612747192382812, 0.01922607421875, 0.024839401245117188, 0.030452728271484375, 0.03606605529785156, 0.04167938232421875, 0.04729270935058594, 0.052906036376953125, 0.05851936340332031, 0.0641326904296875, 0.06974601745605469, 0.07535934448242188, 0.08097267150878906, 0.08658599853515625, 0.09219932556152344, 0.09781265258789062, 0.10342597961425781, 0.109039306640625, 0.11465263366699219, 0.12026596069335938, 0.12587928771972656, 0.13149261474609375, 0.13710594177246094, 0.14271926879882812, 0.1483325958251953, 0.1539459228515625, 0.1595592498779297, 0.16517257690429688, 0.17078590393066406, 0.17639923095703125, 0.18201255798339844, 0.18762588500976562, 0.1932392120361328, 0.1988525390625]}, "gradients/decoder.transformer.h.23.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 3.0, 1.0, 2.0, 2.0, 0.0, 4.0, 8.0, 3.0, 3.0, 2.0, 7.0, 5.0, 11.0, 12.0, 12.0, 32.0, 54.0, 61.0, 81.0, 136.0, 148.0, 127.0, 95.0, 59.0, 31.0, 18.0, 18.0, 14.0, 16.0, 12.0, 6.0, 5.0, 7.0, 5.0, 3.0, 2.0, 3.0, 1.0, 2.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.00228118896484375, -0.002206355333328247, -0.002131521701812744, -0.002056688070297241, -0.0019818544387817383, -0.0019070208072662354, -0.0018321871757507324, -0.0017573535442352295, -0.0016825199127197266, -0.0016076862812042236, -0.0015328526496887207, -0.0014580190181732178, -0.0013831853866577148, -0.001308351755142212, -0.001233518123626709, -0.001158684492111206, -0.0010838508605957031, -0.0010090172290802002, -0.0009341835975646973, -0.0008593499660491943, -0.0007845163345336914, -0.0007096827030181885, -0.0006348490715026855, -0.0005600154399871826, -0.0004851818084716797, -0.00041034817695617676, -0.00033551454544067383, -0.0002606809139251709, -0.00018584728240966797, -0.00011101365089416504, -3.618001937866211e-05, 3.865361213684082e-05, 0.00011348724365234375, 0.00018832087516784668, 0.0002631545066833496, 0.00033798813819885254, 0.00041282176971435547, 0.0004876554012298584, 0.0005624890327453613, 0.0006373226642608643, 0.0007121562957763672, 0.0007869899272918701, 0.000861823558807373, 0.000936657190322876, 0.001011490821838379, 0.0010863244533538818, 0.0011611580848693848, 0.0012359917163848877, 0.0013108253479003906, 0.0013856589794158936, 0.0014604926109313965, 0.0015353262424468994, 0.0016101598739624023, 0.0016849935054779053, 0.0017598271369934082, 0.0018346607685089111, 0.001909494400024414, 0.001984328031539917, 0.00205916166305542, 0.002133995294570923, 0.0022088289260864258, 0.0022836625576019287, 0.0023584961891174316, 0.0024333298206329346, 0.0025081634521484375]}, "gradients/decoder.transformer.h.23.crossattention.q_attn.weight": {"_type": "histogram", "values": [2.0, 0.0, 2.0, 3.0, 1.0, 2.0, 1.0, 2.0, 1.0, 3.0, 2.0, 9.0, 2.0, 7.0, 7.0, 6.0, 14.0, 13.0, 10.0, 16.0, 21.0, 25.0, 41.0, 38.0, 74.0, 146.0, 166.0, 328.0, 930.0, 82191.0, 961044.0, 2316.0, 451.0, 228.0, 129.0, 72.0, 57.0, 34.0, 44.0, 24.0, 23.0, 16.0, 19.0, 11.0, 5.0, 8.0, 6.0, 3.0, 4.0, 3.0, 4.0, 1.0, 2.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 0.0, 2.0], "bins": [-0.039276123046875, -0.03797483444213867, -0.036673545837402344, -0.035372257232666016, -0.03407096862792969, -0.03276968002319336, -0.03146839141845703, -0.030167102813720703, -0.028865814208984375, -0.027564525604248047, -0.02626323699951172, -0.02496194839477539, -0.023660659790039062, -0.022359371185302734, -0.021058082580566406, -0.019756793975830078, -0.01845550537109375, -0.017154216766357422, -0.015852928161621094, -0.014551639556884766, -0.013250350952148438, -0.01194906234741211, -0.010647773742675781, -0.009346485137939453, -0.008045196533203125, -0.006743907928466797, -0.005442619323730469, -0.004141330718994141, -0.0028400421142578125, -0.0015387535095214844, -0.00023746490478515625, 0.0010638236999511719, 0.0023651123046875, 0.003666400909423828, 0.004967689514160156, 0.006268978118896484, 0.0075702667236328125, 0.00887155532836914, 0.010172843933105469, 0.011474132537841797, 0.012775421142578125, 0.014076709747314453, 0.015377998352050781, 0.01667928695678711, 0.017980575561523438, 0.019281864166259766, 0.020583152770996094, 0.021884441375732422, 0.02318572998046875, 0.024487018585205078, 0.025788307189941406, 0.027089595794677734, 0.028390884399414062, 0.02969217300415039, 0.03099346160888672, 0.03229475021362305, 0.033596038818359375, 0.0348973274230957, 0.03619861602783203, 0.03749990463256836, 0.03880119323730469, 0.040102481842041016, 0.041403770446777344, 0.04270505905151367, 0.04400634765625]}, "gradients/decoder.transformer.h.23.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 11.0, 538.0, 460.0, 5.0, 1.0, 1.0], "bins": [-0.030485354363918304, -0.029977165162563324, -0.029468975961208344, -0.028960786759853363, -0.028452597558498383, -0.02794441021978855, -0.02743622101843357, -0.02692803181707859, -0.02641984261572361, -0.02591165341436863, -0.02540346421301365, -0.02489527501165867, -0.024387087672948837, -0.023878898471593857, -0.023370709270238876, -0.022862520068883896, -0.022354330867528915, -0.021846141666173935, -0.021337952464818954, -0.020829763263463974, -0.020321574062108994, -0.019813386723399162, -0.019305197522044182, -0.0187970083206892, -0.01828881911933422, -0.01778062991797924, -0.01727244071662426, -0.01676425151526928, -0.016256064176559448, -0.015747874975204468, -0.015239685773849487, -0.014731496572494507, -0.014223309233784676, -0.013715120032429695, -0.013206930831074715, -0.012698742561042309, -0.012190553359687328, -0.011682364158332348, -0.011174175888299942, -0.010665986686944962, -0.010157797485589981, -0.009649608284235, -0.00914141908288002, -0.008633230812847614, -0.008125041611492634, -0.007616852410137653, -0.00710866367444396, -0.006600474938750267, -0.006092285271733999, -0.005584096536040306, -0.005075907334685326, -0.004567718133330345, -0.004059529397636652, -0.003551340429112315, -0.0030431514605879784, -0.0025349624920636415, -0.0020267735235393047, -0.001518584555014968, -0.001010395586490631, -0.0005022066179662943, 5.982350558042526e-06, 0.0005141713190823793, 0.0010223602876067162, 0.001530549256131053, 0.00203873822465539]}, "gradients/decoder.transformer.h.23.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0, 2.0, 0.0, 5.0, 2.0, 6.0, 6.0, 9.0, 14.0, 18.0, 27.0, 27.0, 41.0, 23.0, 36.0, 42.0, 47.0, 46.0, 49.0, 53.0, 63.0, 42.0, 53.0, 41.0, 58.0, 42.0, 44.0, 32.0, 32.0, 33.0, 28.0, 24.0, 9.0, 16.0, 15.0, 11.0, 8.0, 6.0, 0.0, 4.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.0014300942420959473, -0.0013913484290242195, -0.0013526026159524918, -0.001313856802880764, -0.0012751109898090363, -0.0012363651767373085, -0.0011976193636655807, -0.001158873550593853, -0.0011201277375221252, -0.0010813819244503975, -0.0010426361113786697, -0.001003890298306942, -0.0009651444852352142, -0.0009263986721634865, -0.0008876528590917587, -0.000848907046020031, -0.0008101612329483032, -0.0007714154198765755, -0.0007326696068048477, -0.00069392379373312, -0.0006551779806613922, -0.0006164321675896645, -0.0005776863545179367, -0.000538940541446209, -0.0005001947283744812, -0.00046144891530275345, -0.0004227031022310257, -0.00038395728915929794, -0.0003452114760875702, -0.00030646566301584244, -0.0002677198499441147, -0.00022897403687238693, -0.00019022822380065918, -0.00015148241072893143, -0.00011273659765720367, -7.399078458547592e-05, -3.524497151374817e-05, 3.5008415579795837e-06, 4.2246654629707336e-05, 8.099246770143509e-05, 0.00011973828077316284, 0.0001584840938448906, 0.00019722990691661835, 0.0002359757199883461, 0.00027472153306007385, 0.0003134673461318016, 0.00035221315920352936, 0.0003909589722752571, 0.00042970478534698486, 0.0004684505984187126, 0.0005071964114904404, 0.0005459422245621681, 0.0005846880376338959, 0.0006234338507056236, 0.0006621796637773514, 0.0007009254768490791, 0.0007396712899208069, 0.0007784171029925346, 0.0008171629160642624, 0.0008559087291359901, 0.0008946545422077179, 0.0009334003552794456, 0.0009721461683511734, 0.0010108919814229012, 0.001049637794494629]}, "gradients/decoder.transformer.h.23.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 2.0, 3.0, 7.0, 8.0, 11.0, 12.0, 18.0, 18.0, 33.0, 23.0, 30.0, 35.0, 40.0, 41.0, 51.0, 49.0, 46.0, 58.0, 48.0, 53.0, 55.0, 50.0, 44.0, 48.0, 36.0, 40.0, 24.0, 23.0, 25.0, 20.0, 16.0, 17.0, 8.0, 2.0, 2.0, 5.0, 2.0, 6.0, 2.0, 2.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.53125, -2.4453125, -2.359375, -2.2734375, -2.1875, -2.1015625, -2.015625, -1.9296875, -1.84375, -1.7578125, -1.671875, -1.5859375, -1.5, -1.4140625, -1.328125, -1.2421875, -1.15625, -1.0703125, -0.984375, -0.8984375, -0.8125, -0.7265625, -0.640625, -0.5546875, -0.46875, -0.3828125, -0.296875, -0.2109375, -0.125, -0.0390625, 0.046875, 0.1328125, 0.21875, 0.3046875, 0.390625, 0.4765625, 0.5625, 0.6484375, 0.734375, 0.8203125, 0.90625, 0.9921875, 1.078125, 1.1640625, 1.25, 1.3359375, 1.421875, 1.5078125, 1.59375, 1.6796875, 1.765625, 1.8515625, 1.9375, 2.0234375, 2.109375, 2.1953125, 2.28125, 2.3671875, 2.453125, 2.5390625, 2.625, 2.7109375, 2.796875, 2.8828125, 2.96875]}, "gradients/decoder.transformer.h.23.attn.c_proj.weight": {"_type": "histogram", "values": [3.0, 4.0, 2.0, 2.0, 5.0, 7.0, 10.0, 8.0, 10.0, 14.0, 24.0, 31.0, 38.0, 68.0, 76.0, 134.0, 186.0, 273.0, 384.0, 563.0, 888.0, 1241.0, 1969.0, 3037.0, 5092.0, 8990.0, 17954.0, 44195.0, 135317.0, 378143.0, 290944.0, 93054.0, 32445.0, 14050.0, 7151.0, 4352.0, 2738.0, 1726.0, 1112.0, 748.0, 501.0, 323.0, 204.0, 158.0, 97.0, 77.0, 63.0, 45.0, 32.0, 31.0, 13.0, 13.0, 10.0, 4.0, 4.0, 4.0, 3.0, 1.0, 1.0, 1.0, 0.0, 0.0, 2.0, 1.0], "bins": [-6.3125, -6.1009521484375, -5.889404296875, -5.6778564453125, -5.46630859375, -5.2547607421875, -5.043212890625, -4.8316650390625, -4.6201171875, -4.4085693359375, -4.197021484375, -3.9854736328125, -3.77392578125, -3.5623779296875, -3.350830078125, -3.1392822265625, -2.927734375, -2.7161865234375, -2.504638671875, -2.2930908203125, -2.08154296875, -1.8699951171875, -1.658447265625, -1.4468994140625, -1.2353515625, -1.0238037109375, -0.812255859375, -0.6007080078125, -0.38916015625, -0.1776123046875, 0.033935546875, 0.2454833984375, 0.45703125, 0.6685791015625, 0.880126953125, 1.0916748046875, 1.30322265625, 1.5147705078125, 1.726318359375, 1.9378662109375, 2.1494140625, 2.3609619140625, 2.572509765625, 2.7840576171875, 2.99560546875, 3.2071533203125, 3.418701171875, 3.6302490234375, 3.841796875, 4.0533447265625, 4.264892578125, 4.4764404296875, 4.68798828125, 4.8995361328125, 5.111083984375, 5.3226318359375, 5.5341796875, 5.7457275390625, 5.957275390625, 6.1688232421875, 6.38037109375, 6.5919189453125, 6.803466796875, 7.0150146484375, 7.2265625]}, "gradients/decoder.transformer.h.23.attn.c_attn.bias": {"_type": "histogram", "values": [2.0, 2.0, 1.0, 5.0, 3.0, 3.0, 1.0, 4.0, 10.0, 8.0, 9.0, 12.0, 11.0, 11.0, 14.0, 13.0, 23.0, 15.0, 21.0, 30.0, 21.0, 30.0, 43.0, 44.0, 42.0, 56.0, 102.0, 162.0, 287.0, 1312.0, 192.0, 116.0, 87.0, 47.0, 34.0, 45.0, 30.0, 31.0, 26.0, 22.0, 26.0, 13.0, 19.0, 11.0, 14.0, 13.0, 13.0, 8.0, 5.0, 3.0, 6.0, 4.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 1.0, 1.0, 2.0], "bins": [-5.84375, -5.64349365234375, -5.4432373046875, -5.24298095703125, -5.042724609375, -4.84246826171875, -4.6422119140625, -4.44195556640625, -4.24169921875, -4.04144287109375, -3.8411865234375, -3.64093017578125, -3.440673828125, -3.24041748046875, -3.0401611328125, -2.83990478515625, -2.6396484375, -2.43939208984375, -2.2391357421875, -2.03887939453125, -1.838623046875, -1.63836669921875, -1.4381103515625, -1.23785400390625, -1.03759765625, -0.83734130859375, -0.6370849609375, -0.43682861328125, -0.236572265625, -0.03631591796875, 0.1639404296875, 0.36419677734375, 0.564453125, 0.76470947265625, 0.9649658203125, 1.16522216796875, 1.365478515625, 1.56573486328125, 1.7659912109375, 1.96624755859375, 2.16650390625, 2.36676025390625, 2.5670166015625, 2.76727294921875, 2.967529296875, 3.16778564453125, 3.3680419921875, 3.56829833984375, 3.7685546875, 3.96881103515625, 4.1690673828125, 4.36932373046875, 4.569580078125, 4.76983642578125, 4.9700927734375, 5.17034912109375, 5.37060546875, 5.57086181640625, 5.7711181640625, 5.97137451171875, 6.171630859375, 6.37188720703125, 6.5721435546875, 6.77239990234375, 6.97265625]}, "gradients/decoder.transformer.h.23.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 3.0, 3.0, 5.0, 4.0, 5.0, 9.0, 9.0, 8.0, 14.0, 16.0, 17.0, 23.0, 13.0, 17.0, 22.0, 25.0, 44.0, 34.0, 69.0, 112.0, 245.0, 702.0, 7309.0, 2143900.0, 986852.0, 4968.0, 591.0, 212.0, 138.0, 60.0, 44.0, 33.0, 32.0, 22.0, 16.0, 19.0, 16.0, 16.0, 22.0, 6.0, 9.0, 8.0, 11.0, 6.0, 6.0, 6.0, 5.0, 3.0, 4.0, 1.0, 1.0, 6.0], "bins": [-28.765625, -27.96533203125, -27.1650390625, -26.36474609375, -25.564453125, -24.76416015625, -23.9638671875, -23.16357421875, -22.36328125, -21.56298828125, -20.7626953125, -19.96240234375, -19.162109375, -18.36181640625, -17.5615234375, -16.76123046875, -15.9609375, -15.16064453125, -14.3603515625, -13.56005859375, -12.759765625, -11.95947265625, -11.1591796875, -10.35888671875, -9.55859375, -8.75830078125, -7.9580078125, -7.15771484375, -6.357421875, -5.55712890625, -4.7568359375, -3.95654296875, -3.15625, -2.35595703125, -1.5556640625, -0.75537109375, 0.044921875, 0.84521484375, 1.6455078125, 2.44580078125, 3.24609375, 4.04638671875, 4.8466796875, 5.64697265625, 6.447265625, 7.24755859375, 8.0478515625, 8.84814453125, 9.6484375, 10.44873046875, 11.2490234375, 12.04931640625, 12.849609375, 13.64990234375, 14.4501953125, 15.25048828125, 16.05078125, 16.85107421875, 17.6513671875, 18.45166015625, 19.251953125, 20.05224609375, 20.8525390625, 21.65283203125, 22.453125]}, "gradients/decoder.transformer.h.23.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 6.0, 134.0, 807.0, 69.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-40.606422424316406, -37.03171920776367, -33.4570198059082, -29.88231658935547, -26.307615280151367, -22.732913970947266, -19.15821075439453, -15.58350944519043, -12.008808135986328, -8.434106826782227, -4.859404563903809, -1.2847023010253906, 2.289999008178711, 5.8647003173828125, 9.439403533935547, 13.014104843139648, 16.58880615234375, 20.16350746154785, 23.738208770751953, 27.312911987304688, 30.88761329650879, 34.46231460571289, 38.037017822265625, 41.611717224121094, 45.18642044067383, 48.76112365722656, 52.33582305908203, 55.910526275634766, 59.4852294921875, 63.05992889404297, 66.63462829589844, 70.20933532714844, 73.78404235839844, 77.3587417602539, 80.9334487915039, 84.50814819335938, 88.08284759521484, 91.65754699707031, 95.23225402832031, 98.80695343017578, 102.38165283203125, 105.95635223388672, 109.53105926513672, 113.10575866699219, 116.68045806884766, 120.25515747070312, 123.82986450195312, 127.4045639038086, 130.97927856445312, 134.55398559570312, 138.12867736816406, 141.70338439941406, 145.27809143066406, 148.852783203125, 152.427490234375, 156.002197265625, 159.57688903808594, 163.15159606933594, 166.72628784179688, 170.30099487304688, 173.87570190429688, 177.4503936767578, 181.0251007080078, 184.5998077392578, 188.17449951171875]}, "gradients/decoder.transformer.h.23.ln_1.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 2.0, 2.0, 3.0, 2.0, 5.0, 6.0, 6.0, 7.0, 7.0, 10.0, 10.0, 12.0, 21.0, 18.0, 13.0, 39.0, 25.0, 27.0, 33.0, 27.0, 37.0, 42.0, 44.0, 48.0, 36.0, 32.0, 43.0, 46.0, 51.0, 38.0, 35.0, 32.0, 16.0, 32.0, 26.0, 22.0, 19.0, 28.0, 12.0, 21.0, 18.0, 15.0, 7.0, 7.0, 6.0, 4.0, 5.0, 6.0, 4.0, 4.0, 2.0, 2.0, 0.0, 2.0], "bins": [-23.8145694732666, -23.078014373779297, -22.341461181640625, -21.60490608215332, -20.868350982666016, -20.131797790527344, -19.39524269104004, -18.658687591552734, -17.922134399414062, -17.185579299926758, -16.449026107788086, -15.712471008300781, -14.975915908813477, -14.239361763000488, -13.5028076171875, -12.766252517700195, -12.02969741821289, -11.293143272399902, -10.556588172912598, -9.82003402709961, -9.083478927612305, -8.346924781799316, -7.610370635986328, -6.873816013336182, -6.137261390686035, -5.400706768035889, -4.664152145385742, -3.927597999572754, -3.1910433769226074, -2.454488754272461, -1.7179346084594727, -0.9813799858093262, -0.24482345581054688, 0.49173104763031006, 1.228285551071167, 1.9648399353027344, 2.701394557952881, 3.4379491806030273, 4.174503326416016, 4.911057949066162, 5.647612571716309, 6.384167194366455, 7.120721817016602, 7.85727596282959, 8.593830108642578, 9.330385208129883, 10.066939353942871, 10.80349349975586, 11.540048599243164, 12.276602745056152, 13.013157844543457, 13.749711990356445, 14.48626708984375, 15.222821235656738, 15.959375381469727, 16.69593048095703, 17.432483673095703, 18.169038772583008, 18.90559196472168, 19.642147064208984, 20.37870216369629, 21.115257263183594, 21.851810455322266, 22.58836555480957, 23.324920654296875]}, "gradients/decoder.transformer.h.22.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 1.0, 1.0, 2.0, 6.0, 8.0, 6.0, 13.0, 16.0, 19.0, 22.0, 22.0, 29.0, 30.0, 34.0, 41.0, 54.0, 54.0, 46.0, 54.0, 59.0, 41.0, 58.0, 53.0, 38.0, 46.0, 43.0, 39.0, 34.0, 29.0, 30.0, 21.0, 12.0, 15.0, 10.0, 10.0, 5.0, 3.0, 2.0, 2.0, 3.0, 2.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.0078125, -2.91387939453125, -2.8199462890625, -2.72601318359375, -2.632080078125, -2.53814697265625, -2.4442138671875, -2.35028076171875, -2.25634765625, -2.16241455078125, -2.0684814453125, -1.97454833984375, -1.880615234375, -1.78668212890625, -1.6927490234375, -1.59881591796875, -1.5048828125, -1.41094970703125, -1.3170166015625, -1.22308349609375, -1.129150390625, -1.03521728515625, -0.9412841796875, -0.84735107421875, -0.75341796875, -0.65948486328125, -0.5655517578125, -0.47161865234375, -0.377685546875, -0.28375244140625, -0.1898193359375, -0.09588623046875, -0.001953125, 0.09197998046875, 0.1859130859375, 0.27984619140625, 0.373779296875, 0.46771240234375, 0.5616455078125, 0.65557861328125, 0.74951171875, 0.84344482421875, 0.9373779296875, 1.03131103515625, 1.125244140625, 1.21917724609375, 1.3131103515625, 1.40704345703125, 1.5009765625, 1.59490966796875, 1.6888427734375, 1.78277587890625, 1.876708984375, 1.97064208984375, 2.0645751953125, 2.15850830078125, 2.25244140625, 2.34637451171875, 2.4403076171875, 2.53424072265625, 2.628173828125, 2.72210693359375, 2.8160400390625, 2.90997314453125, 3.00390625]}, "gradients/decoder.transformer.h.22.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 2.0, 3.0, 2.0, 3.0, 3.0, 6.0, 8.0, 12.0, 15.0, 16.0, 22.0, 37.0, 59.0, 70.0, 119.0, 153.0, 230.0, 336.0, 516.0, 1094.0, 2519.0, 8697.0, 44155.0, 467351.0, 3195936.0, 418566.0, 41395.0, 8218.0, 2359.0, 944.0, 457.0, 299.0, 197.0, 135.0, 98.0, 61.0, 50.0, 32.0, 28.0, 26.0, 21.0, 11.0, 11.0, 7.0, 3.0, 4.0, 1.0, 6.0, 2.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-11.046875, -10.6844482421875, -10.322021484375, -9.9595947265625, -9.59716796875, -9.2347412109375, -8.872314453125, -8.5098876953125, -8.1474609375, -7.7850341796875, -7.422607421875, -7.0601806640625, -6.69775390625, -6.3353271484375, -5.972900390625, -5.6104736328125, -5.248046875, -4.8856201171875, -4.523193359375, -4.1607666015625, -3.79833984375, -3.4359130859375, -3.073486328125, -2.7110595703125, -2.3486328125, -1.9862060546875, -1.623779296875, -1.2613525390625, -0.89892578125, -0.5364990234375, -0.174072265625, 0.1883544921875, 0.55078125, 0.9132080078125, 1.275634765625, 1.6380615234375, 2.00048828125, 2.3629150390625, 2.725341796875, 3.0877685546875, 3.4501953125, 3.8126220703125, 4.175048828125, 4.5374755859375, 4.89990234375, 5.2623291015625, 5.624755859375, 5.9871826171875, 6.349609375, 6.7120361328125, 7.074462890625, 7.4368896484375, 7.79931640625, 8.1617431640625, 8.524169921875, 8.8865966796875, 9.2490234375, 9.6114501953125, 9.973876953125, 10.3363037109375, 10.69873046875, 11.0611572265625, 11.423583984375, 11.7860107421875, 12.1484375]}, "gradients/decoder.transformer.h.22.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 5.0, 4.0, 6.0, 9.0, 11.0, 10.0, 31.0, 37.0, 57.0, 76.0, 89.0, 128.0, 208.0, 282.0, 389.0, 486.0, 531.0, 456.0, 391.0, 262.0, 179.0, 147.0, 80.0, 66.0, 39.0, 30.0, 25.0, 16.0, 8.0, 8.0, 6.0, 3.0, 2.0, 4.0, 2.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-9.7265625, -9.426513671875, -9.12646484375, -8.826416015625, -8.5263671875, -8.226318359375, -7.92626953125, -7.626220703125, -7.326171875, -7.026123046875, -6.72607421875, -6.426025390625, -6.1259765625, -5.825927734375, -5.52587890625, -5.225830078125, -4.92578125, -4.625732421875, -4.32568359375, -4.025634765625, -3.7255859375, -3.425537109375, -3.12548828125, -2.825439453125, -2.525390625, -2.225341796875, -1.92529296875, -1.625244140625, -1.3251953125, -1.025146484375, -0.72509765625, -0.425048828125, -0.125, 0.175048828125, 0.47509765625, 0.775146484375, 1.0751953125, 1.375244140625, 1.67529296875, 1.975341796875, 2.275390625, 2.575439453125, 2.87548828125, 3.175537109375, 3.4755859375, 3.775634765625, 4.07568359375, 4.375732421875, 4.67578125, 4.975830078125, 5.27587890625, 5.575927734375, 5.8759765625, 6.176025390625, 6.47607421875, 6.776123046875, 7.076171875, 7.376220703125, 7.67626953125, 7.976318359375, 8.2763671875, 8.576416015625, 8.87646484375, 9.176513671875, 9.4765625]}, "gradients/decoder.transformer.h.22.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 2.0, 1.0, 6.0, 7.0, 11.0, 20.0, 21.0, 27.0, 45.0, 87.0, 150.0, 250.0, 479.0, 1013.0, 2619.0, 8860.0, 45050.0, 437731.0, 3009809.0, 614076.0, 58242.0, 10537.0, 2963.0, 1102.0, 561.0, 279.0, 152.0, 68.0, 51.0, 25.0, 15.0, 14.0, 7.0, 8.0, 2.0, 2.0, 1.0, 1.0, 0.0, 3.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-17.59375, -17.053466796875, -16.51318359375, -15.972900390625, -15.4326171875, -14.892333984375, -14.35205078125, -13.811767578125, -13.271484375, -12.731201171875, -12.19091796875, -11.650634765625, -11.1103515625, -10.570068359375, -10.02978515625, -9.489501953125, -8.94921875, -8.408935546875, -7.86865234375, -7.328369140625, -6.7880859375, -6.247802734375, -5.70751953125, -5.167236328125, -4.626953125, -4.086669921875, -3.54638671875, -3.006103515625, -2.4658203125, -1.925537109375, -1.38525390625, -0.844970703125, -0.3046875, 0.235595703125, 0.77587890625, 1.316162109375, 1.8564453125, 2.396728515625, 2.93701171875, 3.477294921875, 4.017578125, 4.557861328125, 5.09814453125, 5.638427734375, 6.1787109375, 6.718994140625, 7.25927734375, 7.799560546875, 8.33984375, 8.880126953125, 9.42041015625, 9.960693359375, 10.5009765625, 11.041259765625, 11.58154296875, 12.121826171875, 12.662109375, 13.202392578125, 13.74267578125, 14.282958984375, 14.8232421875, 15.363525390625, 15.90380859375, 16.444091796875, 16.984375]}, "gradients/decoder.transformer.h.22.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 9.0, 43.0, 181.0, 365.0, 299.0, 98.0, 18.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-44.43085861206055, -40.977272033691406, -37.523685455322266, -34.070098876953125, -30.616514205932617, -27.162927627563477, -23.70934295654297, -20.255756378173828, -16.802169799804688, -13.348583221435547, -9.894997596740723, -6.441411972045898, -2.987825393676758, 0.4657611846923828, 3.9193458557128906, 7.372932434082031, 10.826519012451172, 14.280105590820312, 17.733692169189453, 21.18727684020996, 24.6408634185791, 28.094449996948242, 31.54803466796875, 35.00162124633789, 38.45520782470703, 41.90879440307617, 45.36238098144531, 48.81596374511719, 52.269554138183594, 55.72313690185547, 59.17672348022461, 62.63031005859375, 66.08389282226562, 69.5374755859375, 72.9910659790039, 76.44464874267578, 79.89823913574219, 83.35182189941406, 86.80540466308594, 90.25899505615234, 93.71258544921875, 97.16616821289062, 100.61975860595703, 104.0733413696289, 107.52693176269531, 110.98051452636719, 114.43409729003906, 117.88768768310547, 121.34127044677734, 124.79485321044922, 128.24844360351562, 131.7020263671875, 135.15560913085938, 138.6092071533203, 142.0627899169922, 145.51637268066406, 148.96995544433594, 152.4235382080078, 155.8771209716797, 159.33071899414062, 162.7843017578125, 166.23788452148438, 169.69146728515625, 173.14505004882812, 176.59864807128906]}, "gradients/decoder.transformer.h.22.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 4.0, 0.0, 0.0, 1.0, 1.0, 6.0, 4.0, 13.0, 6.0, 5.0, 6.0, 11.0, 17.0, 14.0, 24.0, 21.0, 26.0, 26.0, 32.0, 29.0, 42.0, 45.0, 33.0, 43.0, 36.0, 39.0, 28.0, 51.0, 44.0, 43.0, 45.0, 44.0, 27.0, 29.0, 28.0, 26.0, 27.0, 26.0, 17.0, 21.0, 15.0, 19.0, 11.0, 6.0, 6.0, 4.0, 8.0, 5.0, 1.0, 2.0, 0.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-26.333749771118164, -25.468366622924805, -24.602985382080078, -23.73760223388672, -22.87221908569336, -22.0068359375, -21.14145278930664, -20.276071548461914, -19.410688400268555, -18.545305252075195, -17.67992401123047, -16.81454086303711, -15.94915771484375, -15.08377456665039, -14.218392372131348, -13.353010177612305, -12.487627029418945, -11.622243881225586, -10.756861686706543, -9.8914794921875, -9.02609634399414, -8.160713195800781, -7.295331001281738, -6.429948329925537, -5.564565658569336, -4.699182987213135, -3.8338003158569336, -2.9684176445007324, -2.1030349731445312, -1.23765230178833, -0.3722696304321289, 0.49311304092407227, 1.3584976196289062, 2.2238802909851074, 3.0892629623413086, 3.9546456336975098, 4.820028305053711, 5.685410976409912, 6.550793647766113, 7.4161763191223145, 8.281558990478516, 9.146942138671875, 10.012324333190918, 10.877706527709961, 11.74308967590332, 12.60847282409668, 13.473855018615723, 14.339237213134766, 15.204620361328125, 16.070003509521484, 16.935386657714844, 17.80076789855957, 18.66615104675293, 19.53153419494629, 20.396915435791016, 21.262298583984375, 22.127681732177734, 22.993064880371094, 23.858448028564453, 24.72382926940918, 25.58921241760254, 26.4545955657959, 27.319976806640625, 28.185359954833984, 29.050743103027344]}, "gradients/decoder.transformer.h.22.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 3.0, 4.0, 4.0, 6.0, 7.0, 8.0, 10.0, 10.0, 15.0, 21.0, 18.0, 17.0, 34.0, 29.0, 27.0, 17.0, 41.0, 40.0, 41.0, 45.0, 49.0, 38.0, 42.0, 42.0, 44.0, 44.0, 40.0, 41.0, 28.0, 25.0, 25.0, 22.0, 30.0, 25.0, 26.0, 20.0, 17.0, 17.0, 8.0, 11.0, 5.0, 2.0, 4.0, 4.0, 2.0, 3.0, 2.0, 0.0, 1.0, 1.0, 4.0], "bins": [-2.814453125, -2.735565185546875, -2.65667724609375, -2.577789306640625, -2.4989013671875, -2.420013427734375, -2.34112548828125, -2.262237548828125, -2.183349609375, -2.104461669921875, -2.02557373046875, -1.946685791015625, -1.8677978515625, -1.788909912109375, -1.71002197265625, -1.631134033203125, -1.55224609375, -1.473358154296875, -1.39447021484375, -1.315582275390625, -1.2366943359375, -1.157806396484375, -1.07891845703125, -1.000030517578125, -0.921142578125, -0.842254638671875, -0.76336669921875, -0.684478759765625, -0.6055908203125, -0.526702880859375, -0.44781494140625, -0.368927001953125, -0.2900390625, -0.211151123046875, -0.13226318359375, -0.053375244140625, 0.0255126953125, 0.104400634765625, 0.18328857421875, 0.262176513671875, 0.341064453125, 0.419952392578125, 0.49884033203125, 0.577728271484375, 0.6566162109375, 0.735504150390625, 0.81439208984375, 0.893280029296875, 0.97216796875, 1.051055908203125, 1.12994384765625, 1.208831787109375, 1.2877197265625, 1.366607666015625, 1.44549560546875, 1.524383544921875, 1.603271484375, 1.682159423828125, 1.76104736328125, 1.839935302734375, 1.9188232421875, 1.997711181640625, 2.07659912109375, 2.155487060546875, 2.234375]}, "gradients/decoder.transformer.h.22.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 3.0, 1.0, 6.0, 5.0, 2.0, 9.0, 17.0, 20.0, 27.0, 34.0, 49.0, 87.0, 108.0, 219.0, 328.0, 497.0, 849.0, 1446.0, 2564.0, 4425.0, 8127.0, 15402.0, 29431.0, 56819.0, 109027.0, 188255.0, 240770.0, 178025.0, 100909.0, 52380.0, 27038.0, 14047.0, 7531.0, 4173.0, 2346.0, 1391.0, 818.0, 500.0, 338.0, 178.0, 140.0, 83.0, 47.0, 42.0, 17.0, 12.0, 11.0, 4.0, 4.0, 4.0, 2.0, 1.0, 2.0, 1.0, 0.0, 0.0, 2.0], "bins": [-0.476806640625, -0.46254730224609375, -0.4482879638671875, -0.43402862548828125, -0.419769287109375, -0.40550994873046875, -0.3912506103515625, -0.37699127197265625, -0.36273193359375, -0.34847259521484375, -0.3342132568359375, -0.31995391845703125, -0.305694580078125, -0.29143524169921875, -0.2771759033203125, -0.26291656494140625, -0.2486572265625, -0.23439788818359375, -0.2201385498046875, -0.20587921142578125, -0.191619873046875, -0.17736053466796875, -0.1631011962890625, -0.14884185791015625, -0.13458251953125, -0.12032318115234375, -0.1060638427734375, -0.09180450439453125, -0.077545166015625, -0.06328582763671875, -0.0490264892578125, -0.03476715087890625, -0.0205078125, -0.00624847412109375, 0.0080108642578125, 0.02227020263671875, 0.036529541015625, 0.05078887939453125, 0.0650482177734375, 0.07930755615234375, 0.09356689453125, 0.10782623291015625, 0.1220855712890625, 0.13634490966796875, 0.150604248046875, 0.16486358642578125, 0.1791229248046875, 0.19338226318359375, 0.2076416015625, 0.22190093994140625, 0.2361602783203125, 0.25041961669921875, 0.264678955078125, 0.27893829345703125, 0.2931976318359375, 0.30745697021484375, 0.32171630859375, 0.33597564697265625, 0.3502349853515625, 0.36449432373046875, 0.378753662109375, 0.39301300048828125, 0.4072723388671875, 0.42153167724609375, 0.435791015625]}, "gradients/decoder.transformer.h.22.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 0.0, 3.0, 1.0, 3.0, 2.0, 6.0, 2.0, 4.0, 9.0, 5.0, 8.0, 6.0, 14.0, 8.0, 8.0, 6.0, 13.0, 14.0, 26.0, 18.0, 33.0, 26.0, 40.0, 34.0, 27.0, 39.0, 41.0, 33.0, 31.0, 36.0, 1065.0, 53.0, 38.0, 26.0, 37.0, 43.0, 32.0, 38.0, 25.0, 24.0, 18.0, 20.0, 22.0, 16.0, 18.0, 9.0, 5.0, 11.0, 8.0, 7.0, 5.0, 6.0, 5.0, 5.0, 0.0, 4.0, 4.0, 2.0, 1.0, 1.0, 1.0], "bins": [-1.6396484375, -1.591278076171875, -1.54290771484375, -1.494537353515625, -1.4461669921875, -1.397796630859375, -1.34942626953125, -1.301055908203125, -1.252685546875, -1.204315185546875, -1.15594482421875, -1.107574462890625, -1.0592041015625, -1.010833740234375, -0.96246337890625, -0.914093017578125, -0.86572265625, -0.817352294921875, -0.76898193359375, -0.720611572265625, -0.6722412109375, -0.623870849609375, -0.57550048828125, -0.527130126953125, -0.478759765625, -0.430389404296875, -0.38201904296875, -0.333648681640625, -0.2852783203125, -0.236907958984375, -0.18853759765625, -0.140167236328125, -0.091796875, -0.043426513671875, 0.00494384765625, 0.053314208984375, 0.1016845703125, 0.150054931640625, 0.19842529296875, 0.246795654296875, 0.295166015625, 0.343536376953125, 0.39190673828125, 0.440277099609375, 0.4886474609375, 0.537017822265625, 0.58538818359375, 0.633758544921875, 0.68212890625, 0.730499267578125, 0.77886962890625, 0.827239990234375, 0.8756103515625, 0.923980712890625, 0.97235107421875, 1.020721435546875, 1.069091796875, 1.117462158203125, 1.16583251953125, 1.214202880859375, 1.2625732421875, 1.310943603515625, 1.35931396484375, 1.407684326171875, 1.4560546875]}, "gradients/decoder.transformer.h.22.crossattention.c_attn.weight": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 3.0, 3.0, 3.0, 10.0, 12.0, 14.0, 27.0, 36.0, 56.0, 95.0, 162.0, 228.0, 353.0, 587.0, 865.0, 1320.0, 2133.0, 3634.0, 5748.0, 9476.0, 15573.0, 26160.0, 43855.0, 70881.0, 111229.0, 161393.0, 1223059.0, 150303.0, 103540.0, 65209.0, 39774.0, 23931.0, 14494.0, 8661.0, 5436.0, 3209.0, 2089.0, 1297.0, 804.0, 514.0, 355.0, 207.0, 129.0, 101.0, 66.0, 30.0, 16.0, 21.0, 18.0, 10.0, 6.0, 5.0, 2.0, 4.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.214111328125, -0.2068328857421875, -0.199554443359375, -0.1922760009765625, -0.18499755859375, -0.1777191162109375, -0.170440673828125, -0.1631622314453125, -0.1558837890625, -0.1486053466796875, -0.141326904296875, -0.1340484619140625, -0.12677001953125, -0.1194915771484375, -0.112213134765625, -0.1049346923828125, -0.09765625, -0.0903778076171875, -0.083099365234375, -0.0758209228515625, -0.06854248046875, -0.0612640380859375, -0.053985595703125, -0.0467071533203125, -0.0394287109375, -0.0321502685546875, -0.024871826171875, -0.0175933837890625, -0.01031494140625, -0.0030364990234375, 0.004241943359375, 0.0115203857421875, 0.018798828125, 0.0260772705078125, 0.033355712890625, 0.0406341552734375, 0.04791259765625, 0.0551910400390625, 0.062469482421875, 0.0697479248046875, 0.0770263671875, 0.0843048095703125, 0.091583251953125, 0.0988616943359375, 0.10614013671875, 0.1134185791015625, 0.120697021484375, 0.1279754638671875, 0.13525390625, 0.1425323486328125, 0.149810791015625, 0.1570892333984375, 0.16436767578125, 0.1716461181640625, 0.178924560546875, 0.1862030029296875, 0.1934814453125, 0.2007598876953125, 0.208038330078125, 0.2153167724609375, 0.22259521484375, 0.2298736572265625, 0.237152099609375, 0.2444305419921875, 0.251708984375]}, "gradients/decoder.transformer.h.22.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 2.0, 3.0, 1.0, 5.0, 3.0, 9.0, 6.0, 7.0, 9.0, 10.0, 28.0, 29.0, 41.0, 45.0, 46.0, 66.0, 82.0, 126.0, 107.0, 87.0, 59.0, 58.0, 44.0, 30.0, 40.0, 14.0, 11.0, 13.0, 7.0, 6.0, 4.0, 4.0, 2.0, 1.0, 3.0, 1.0, 2.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.008209228515625, -0.007926106452941895, -0.007642984390258789, -0.007359862327575684, -0.007076740264892578, -0.006793618202209473, -0.006510496139526367, -0.006227374076843262, -0.005944252014160156, -0.005661129951477051, -0.005378007888793945, -0.00509488582611084, -0.004811763763427734, -0.004528641700744629, -0.0042455196380615234, -0.003962397575378418, -0.0036792755126953125, -0.003396153450012207, -0.0031130313873291016, -0.002829909324645996, -0.0025467872619628906, -0.002263665199279785, -0.0019805431365966797, -0.0016974210739135742, -0.0014142990112304688, -0.0011311769485473633, -0.0008480548858642578, -0.0005649328231811523, -0.0002818107604980469, 1.3113021850585938e-06, 0.00028443336486816406, 0.0005675554275512695, 0.000850677490234375, 0.0011337995529174805, 0.001416921615600586, 0.0017000436782836914, 0.001983165740966797, 0.0022662878036499023, 0.002549409866333008, 0.0028325319290161133, 0.0031156539916992188, 0.0033987760543823242, 0.0036818981170654297, 0.003965020179748535, 0.004248142242431641, 0.004531264305114746, 0.0048143863677978516, 0.005097508430480957, 0.0053806304931640625, 0.005663752555847168, 0.0059468746185302734, 0.006229996681213379, 0.006513118743896484, 0.00679624080657959, 0.007079362869262695, 0.007362484931945801, 0.007645606994628906, 0.007928729057312012, 0.008211851119995117, 0.008494973182678223, 0.008778095245361328, 0.009061217308044434, 0.009344339370727539, 0.009627461433410645, 0.00991058349609375]}, "gradients/decoder.transformer.h.22.crossattention.q_attn.weight": {"_type": "histogram", "values": [3.0, 1.0, 1.0, 1.0, 0.0, 0.0, 2.0, 1.0, 1.0, 1.0, 4.0, 6.0, 6.0, 6.0, 6.0, 7.0, 12.0, 26.0, 15.0, 30.0, 43.0, 55.0, 76.0, 112.0, 142.0, 196.0, 318.0, 928.0, 165053.0, 879136.0, 1261.0, 358.0, 191.0, 168.0, 105.0, 70.0, 59.0, 39.0, 35.0, 20.0, 15.0, 17.0, 12.0, 7.0, 7.0, 3.0, 2.0, 3.0, 2.0, 1.0, 2.0, 3.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.1343994140625, -0.1297779083251953, -0.12515640258789062, -0.12053489685058594, -0.11591339111328125, -0.11129188537597656, -0.10667037963867188, -0.10204887390136719, -0.0974273681640625, -0.09280586242675781, -0.08818435668945312, -0.08356285095214844, -0.07894134521484375, -0.07431983947753906, -0.06969833374023438, -0.06507682800292969, -0.060455322265625, -0.05583381652832031, -0.051212310791015625, -0.04659080505371094, -0.04196929931640625, -0.03734779357910156, -0.032726287841796875, -0.028104782104492188, -0.0234832763671875, -0.018861770629882812, -0.014240264892578125, -0.009618759155273438, -0.00499725341796875, -0.0003757476806640625, 0.004245758056640625, 0.008867263793945312, 0.01348876953125, 0.018110275268554688, 0.022731781005859375, 0.027353286743164062, 0.03197479248046875, 0.03659629821777344, 0.041217803955078125, 0.04583930969238281, 0.0504608154296875, 0.05508232116699219, 0.059703826904296875, 0.06432533264160156, 0.06894683837890625, 0.07356834411621094, 0.07818984985351562, 0.08281135559082031, 0.087432861328125, 0.09205436706542969, 0.09667587280273438, 0.10129737854003906, 0.10591888427734375, 0.11054039001464844, 0.11516189575195312, 0.11978340148925781, 0.1244049072265625, 0.1290264129638672, 0.13364791870117188, 0.13826942443847656, 0.14289093017578125, 0.14751243591308594, 0.15213394165039062, 0.1567554473876953, 0.161376953125]}, "gradients/decoder.transformer.h.22.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 44.0, 974.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.21586118638515472, -0.21063318848609924, -0.20540520548820496, -0.20017720758914948, -0.1949492245912552, -0.1897212266921997, -0.18449324369430542, -0.17926524579524994, -0.17403726279735565, -0.16880926489830017, -0.16358128190040588, -0.1583532840013504, -0.15312530100345612, -0.14789730310440063, -0.14266932010650635, -0.13744132220745087, -0.13221332430839539, -0.1269853264093399, -0.12175734341144562, -0.11652935296297073, -0.11130136251449585, -0.10607337206602097, -0.10084538161754608, -0.0956173837184906, -0.09038940072059631, -0.08516141027212143, -0.07993341982364655, -0.07470542937517166, -0.06947743892669678, -0.0642494484782219, -0.05902145430445671, -0.05379346385598183, -0.04856547713279724, -0.04333748668432236, -0.03810949623584747, -0.03288150578737259, -0.027653513476252556, -0.022425523027777672, -0.01719753071665764, -0.011969540268182755, -0.0067415498197078705, -0.0015135589055716991, 0.003714432008564472, 0.00894242338836193, 0.014170413836836815, 0.0193984042853117, 0.024626396596431732, 0.029854387044906616, 0.0350823774933815, 0.040310367941856384, 0.04553835839033127, 0.05076634883880615, 0.055994339287281036, 0.06122232973575592, 0.0664503276348114, 0.07167831063270569, 0.07690630853176117, 0.08213429898023605, 0.08736228942871094, 0.09259027987718582, 0.0978182703256607, 0.10304626077413559, 0.10827425122261047, 0.11350224912166595, 0.11873023211956024]}, "gradients/decoder.transformer.h.22.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 5.0, 1.0, 6.0, 3.0, 8.0, 16.0, 19.0, 17.0, 41.0, 47.0, 46.0, 52.0, 71.0, 71.0, 62.0, 95.0, 67.0, 68.0, 58.0, 72.0, 36.0, 36.0, 35.0, 25.0, 27.0, 12.0, 11.0, 4.0, 3.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.013050436973571777, -0.012714066542685032, -0.012377696111798286, -0.012041325680911541, -0.011704955250024796, -0.01136858481913805, -0.011032214388251305, -0.01069584395736456, -0.010359473526477814, -0.010023103095591068, -0.009686732664704323, -0.009350362233817577, -0.009013991802930832, -0.008677621372044086, -0.008341250941157341, -0.008004880510270596, -0.00766851007938385, -0.007332139648497105, -0.006995769217610359, -0.006659398786723614, -0.006323028355836868, -0.005986657924950123, -0.005650287494063377, -0.005313917063176632, -0.0049775466322898865, -0.004641176201403141, -0.004304805770516396, -0.00396843533962965, -0.0036320649087429047, -0.003295694477856159, -0.0029593240469694138, -0.0026229536160826683, -0.002286583185195923, -0.0019502127543091774, -0.001613842323422432, -0.0012774718925356865, -0.000941101461648941, -0.0006047310307621956, -0.00026836059987545013, 6.800983101129532e-05, 0.00040438026189804077, 0.0007407506927847862, 0.0010771211236715317, 0.0014134915545582771, 0.0017498619854450226, 0.002086232416331768, 0.0024226028472185135, 0.002758973278105259, 0.0030953437089920044, 0.00343171413987875, 0.0037680845707654953, 0.004104455001652241, 0.004440825432538986, 0.004777195863425732, 0.005113566294312477, 0.0054499367251992226, 0.005786307156085968, 0.0061226775869727135, 0.006459048017859459, 0.006795418448746204, 0.00713178887963295, 0.007468159310519695, 0.007804529741406441, 0.008140900172293186, 0.008477270603179932]}, "gradients/decoder.transformer.h.22.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 3.0, 4.0, 4.0, 6.0, 7.0, 8.0, 10.0, 10.0, 15.0, 21.0, 18.0, 16.0, 35.0, 27.0, 28.0, 18.0, 40.0, 40.0, 42.0, 45.0, 49.0, 37.0, 43.0, 42.0, 44.0, 44.0, 40.0, 41.0, 28.0, 24.0, 26.0, 22.0, 30.0, 25.0, 26.0, 20.0, 17.0, 17.0, 8.0, 11.0, 5.0, 2.0, 4.0, 4.0, 2.0, 3.0, 2.0, 0.0, 1.0, 1.0, 4.0], "bins": [-2.81640625, -2.73748779296875, -2.6585693359375, -2.57965087890625, -2.500732421875, -2.42181396484375, -2.3428955078125, -2.26397705078125, -2.18505859375, -2.10614013671875, -2.0272216796875, -1.94830322265625, -1.869384765625, -1.79046630859375, -1.7115478515625, -1.63262939453125, -1.5537109375, -1.47479248046875, -1.3958740234375, -1.31695556640625, -1.238037109375, -1.15911865234375, -1.0802001953125, -1.00128173828125, -0.92236328125, -0.84344482421875, -0.7645263671875, -0.68560791015625, -0.606689453125, -0.52777099609375, -0.4488525390625, -0.36993408203125, -0.291015625, -0.21209716796875, -0.1331787109375, -0.05426025390625, 0.024658203125, 0.10357666015625, 0.1824951171875, 0.26141357421875, 0.34033203125, 0.41925048828125, 0.4981689453125, 0.57708740234375, 0.656005859375, 0.73492431640625, 0.8138427734375, 0.89276123046875, 0.9716796875, 1.05059814453125, 1.1295166015625, 1.20843505859375, 1.287353515625, 1.36627197265625, 1.4451904296875, 1.52410888671875, 1.60302734375, 1.68194580078125, 1.7608642578125, 1.83978271484375, 1.918701171875, 1.99761962890625, 2.0765380859375, 2.15545654296875, 2.234375]}, "gradients/decoder.transformer.h.22.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 1.0, 5.0, 4.0, 3.0, 3.0, 6.0, 13.0, 13.0, 24.0, 31.0, 40.0, 59.0, 61.0, 118.0, 158.0, 231.0, 352.0, 509.0, 808.0, 1141.0, 1972.0, 3757.0, 7809.0, 17870.0, 44269.0, 107547.0, 219815.0, 290149.0, 194609.0, 90123.0, 36940.0, 15156.0, 6554.0, 3289.0, 1828.0, 1096.0, 721.0, 478.0, 311.0, 172.0, 140.0, 101.0, 84.0, 36.0, 54.0, 37.0, 24.0, 11.0, 10.0, 7.0, 3.0, 2.0, 3.0, 4.0, 1.0, 3.0, 1.0, 2.0], "bins": [-2.822265625, -2.737762451171875, -2.65325927734375, -2.568756103515625, -2.4842529296875, -2.399749755859375, -2.31524658203125, -2.230743408203125, -2.146240234375, -2.061737060546875, -1.97723388671875, -1.892730712890625, -1.8082275390625, -1.723724365234375, -1.63922119140625, -1.554718017578125, -1.47021484375, -1.385711669921875, -1.30120849609375, -1.216705322265625, -1.1322021484375, -1.047698974609375, -0.96319580078125, -0.878692626953125, -0.794189453125, -0.709686279296875, -0.62518310546875, -0.540679931640625, -0.4561767578125, -0.371673583984375, -0.28717041015625, -0.202667236328125, -0.1181640625, -0.033660888671875, 0.05084228515625, 0.135345458984375, 0.2198486328125, 0.304351806640625, 0.38885498046875, 0.473358154296875, 0.557861328125, 0.642364501953125, 0.72686767578125, 0.811370849609375, 0.8958740234375, 0.980377197265625, 1.06488037109375, 1.149383544921875, 1.23388671875, 1.318389892578125, 1.40289306640625, 1.487396240234375, 1.5718994140625, 1.656402587890625, 1.74090576171875, 1.825408935546875, 1.909912109375, 1.994415283203125, 2.07891845703125, 2.163421630859375, 2.2479248046875, 2.332427978515625, 2.41693115234375, 2.501434326171875, 2.5859375]}, "gradients/decoder.transformer.h.22.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 4.0, 6.0, 6.0, 6.0, 6.0, 10.0, 19.0, 7.0, 11.0, 15.0, 22.0, 28.0, 29.0, 31.0, 22.0, 32.0, 41.0, 47.0, 49.0, 70.0, 105.0, 302.0, 1407.0, 235.0, 118.0, 65.0, 50.0, 38.0, 38.0, 33.0, 27.0, 30.0, 22.0, 19.0, 22.0, 15.0, 16.0, 12.0, 8.0, 11.0, 8.0, 5.0, 6.0, 3.0, 0.0, 2.0, 1.0, 0.0, 2.0, 2.0, 1.0, 1.0, 1.0, 0.0, 1.0], "bins": [-8.984375, -8.6983642578125, -8.412353515625, -8.1263427734375, -7.84033203125, -7.5543212890625, -7.268310546875, -6.9822998046875, -6.6962890625, -6.4102783203125, -6.124267578125, -5.8382568359375, -5.55224609375, -5.2662353515625, -4.980224609375, -4.6942138671875, -4.408203125, -4.1221923828125, -3.836181640625, -3.5501708984375, -3.26416015625, -2.9781494140625, -2.692138671875, -2.4061279296875, -2.1201171875, -1.8341064453125, -1.548095703125, -1.2620849609375, -0.97607421875, -0.6900634765625, -0.404052734375, -0.1180419921875, 0.16796875, 0.4539794921875, 0.739990234375, 1.0260009765625, 1.31201171875, 1.5980224609375, 1.884033203125, 2.1700439453125, 2.4560546875, 2.7420654296875, 3.028076171875, 3.3140869140625, 3.60009765625, 3.8861083984375, 4.172119140625, 4.4581298828125, 4.744140625, 5.0301513671875, 5.316162109375, 5.6021728515625, 5.88818359375, 6.1741943359375, 6.460205078125, 6.7462158203125, 7.0322265625, 7.3182373046875, 7.604248046875, 7.8902587890625, 8.17626953125, 8.4622802734375, 8.748291015625, 9.0343017578125, 9.3203125]}, "gradients/decoder.transformer.h.22.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 2.0, 1.0, 2.0, 0.0, 2.0, 0.0, 0.0, 4.0, 7.0, 3.0, 5.0, 8.0, 12.0, 6.0, 17.0, 10.0, 25.0, 19.0, 22.0, 27.0, 34.0, 39.0, 58.0, 83.0, 157.0, 290.0, 831.0, 6067.0, 459507.0, 2659838.0, 16071.0, 1573.0, 389.0, 161.0, 105.0, 69.0, 53.0, 37.0, 32.0, 22.0, 25.0, 20.0, 17.0, 13.0, 12.0, 10.0, 8.0, 7.0, 9.0, 3.0, 6.0, 3.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-20.375, -19.7607421875, -19.146484375, -18.5322265625, -17.91796875, -17.3037109375, -16.689453125, -16.0751953125, -15.4609375, -14.8466796875, -14.232421875, -13.6181640625, -13.00390625, -12.3896484375, -11.775390625, -11.1611328125, -10.546875, -9.9326171875, -9.318359375, -8.7041015625, -8.08984375, -7.4755859375, -6.861328125, -6.2470703125, -5.6328125, -5.0185546875, -4.404296875, -3.7900390625, -3.17578125, -2.5615234375, -1.947265625, -1.3330078125, -0.71875, -0.1044921875, 0.509765625, 1.1240234375, 1.73828125, 2.3525390625, 2.966796875, 3.5810546875, 4.1953125, 4.8095703125, 5.423828125, 6.0380859375, 6.65234375, 7.2666015625, 7.880859375, 8.4951171875, 9.109375, 9.7236328125, 10.337890625, 10.9521484375, 11.56640625, 12.1806640625, 12.794921875, 13.4091796875, 14.0234375, 14.6376953125, 15.251953125, 15.8662109375, 16.48046875, 17.0947265625, 17.708984375, 18.3232421875, 18.9375]}, "gradients/decoder.transformer.h.22.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 4.0, 90.0, 671.0, 243.0, 7.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-64.43699645996094, -61.82619094848633, -59.21538543701172, -56.60457992553711, -53.9937744140625, -51.382965087890625, -48.772159576416016, -46.161354064941406, -43.5505485534668, -40.93974304199219, -38.32893753051758, -35.71813201904297, -33.107322692871094, -30.496519088745117, -27.885711669921875, -25.274906158447266, -22.664100646972656, -20.053295135498047, -17.442489624023438, -14.831682205200195, -12.220876693725586, -9.610071182250977, -6.999264717102051, -4.388458251953125, -1.7776527404785156, 0.833153247833252, 3.4439592361450195, 6.054765224456787, 8.665571212768555, 11.276376724243164, 13.88718318939209, 16.497989654541016, 19.108787536621094, 21.719593048095703, 24.330398559570312, 26.941205978393555, 29.552011489868164, 32.162818908691406, 34.773624420166016, 37.384429931640625, 39.995235443115234, 42.606040954589844, 45.21684646606445, 47.82765197753906, 50.43846130371094, 53.04926300048828, 55.660072326660156, 58.270877838134766, 60.881683349609375, 63.492488861083984, 66.1032943725586, 68.71410369873047, 71.32490539550781, 73.93571472167969, 76.54651641845703, 79.1573257446289, 81.76812744140625, 84.37893676757812, 86.98973846435547, 89.60054779052734, 92.21134948730469, 94.82215881347656, 97.4329605102539, 100.04376983642578, 102.65457916259766]}, "gradients/decoder.transformer.h.22.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 2.0, 2.0, 3.0, 3.0, 9.0, 8.0, 13.0, 11.0, 17.0, 15.0, 17.0, 22.0, 19.0, 26.0, 24.0, 32.0, 33.0, 52.0, 48.0, 34.0, 39.0, 48.0, 41.0, 39.0, 40.0, 47.0, 38.0, 53.0, 41.0, 32.0, 26.0, 40.0, 19.0, 19.0, 26.0, 15.0, 17.0, 14.0, 5.0, 13.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-31.764469146728516, -30.7552490234375, -29.746028900146484, -28.73680877685547, -27.727588653564453, -26.718368530273438, -25.70914649963379, -24.699926376342773, -23.690706253051758, -22.681486129760742, -21.672266006469727, -20.66304588317871, -19.653823852539062, -18.644603729248047, -17.63538360595703, -16.626163482666016, -15.616943359375, -14.607723236083984, -13.598503112792969, -12.589282035827637, -11.580061912536621, -10.570841789245605, -9.561620712280273, -8.552400588989258, -7.543180465698242, -6.533960342407227, -5.524739742279053, -4.515519142150879, -3.5062990188598633, -2.4970788955688477, -1.4878582954406738, -0.4786376953125, 0.5305862426757812, 1.539806604385376, 2.5490269660949707, 3.5582473278045654, 4.56746768951416, 5.576687812805176, 6.58590841293335, 7.595129013061523, 8.604349136352539, 9.613569259643555, 10.62278938293457, 11.632010459899902, 12.641230583190918, 13.650450706481934, 14.659671783447266, 15.668891906738281, 16.678112030029297, 17.687332153320312, 18.696552276611328, 19.705772399902344, 20.71499252319336, 21.724212646484375, 22.733434677124023, 23.74265480041504, 24.751874923706055, 25.76109504699707, 26.770315170288086, 27.7795352935791, 28.78875732421875, 29.797977447509766, 30.80719757080078, 31.816417694091797, 32.82563781738281]}, "gradients/decoder.transformer.h.21.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 3.0, 2.0, 1.0, 5.0, 7.0, 8.0, 9.0, 7.0, 11.0, 14.0, 16.0, 21.0, 26.0, 28.0, 30.0, 27.0, 18.0, 39.0, 42.0, 42.0, 38.0, 44.0, 40.0, 44.0, 43.0, 50.0, 47.0, 46.0, 33.0, 28.0, 25.0, 29.0, 17.0, 41.0, 19.0, 31.0, 14.0, 15.0, 13.0, 8.0, 8.0, 8.0, 3.0, 3.0, 4.0, 4.0, 2.0, 1.0, 1.0, 4.0, 1.0], "bins": [-3.0625, -2.9788818359375, -2.895263671875, -2.8116455078125, -2.72802734375, -2.6444091796875, -2.560791015625, -2.4771728515625, -2.3935546875, -2.3099365234375, -2.226318359375, -2.1427001953125, -2.05908203125, -1.9754638671875, -1.891845703125, -1.8082275390625, -1.724609375, -1.6409912109375, -1.557373046875, -1.4737548828125, -1.39013671875, -1.3065185546875, -1.222900390625, -1.1392822265625, -1.0556640625, -0.9720458984375, -0.888427734375, -0.8048095703125, -0.72119140625, -0.6375732421875, -0.553955078125, -0.4703369140625, -0.38671875, -0.3031005859375, -0.219482421875, -0.1358642578125, -0.05224609375, 0.0313720703125, 0.114990234375, 0.1986083984375, 0.2822265625, 0.3658447265625, 0.449462890625, 0.5330810546875, 0.61669921875, 0.7003173828125, 0.783935546875, 0.8675537109375, 0.951171875, 1.0347900390625, 1.118408203125, 1.2020263671875, 1.28564453125, 1.3692626953125, 1.452880859375, 1.5364990234375, 1.6201171875, 1.7037353515625, 1.787353515625, 1.8709716796875, 1.95458984375, 2.0382080078125, 2.121826171875, 2.2054443359375, 2.2890625]}, "gradients/decoder.transformer.h.21.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 3.0, 1.0, 7.0, 7.0, 7.0, 12.0, 17.0, 22.0, 28.0, 43.0, 53.0, 59.0, 86.0, 133.0, 167.0, 415.0, 958.0, 2855.0, 11308.0, 77685.0, 1180719.0, 2697220.0, 193353.0, 22032.0, 4551.0, 1345.0, 512.0, 239.0, 100.0, 82.0, 73.0, 54.0, 34.0, 38.0, 13.0, 10.0, 11.0, 15.0, 8.0, 5.0, 7.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 3.0], "bins": [-12.6953125, -12.344482421875, -11.99365234375, -11.642822265625, -11.2919921875, -10.941162109375, -10.59033203125, -10.239501953125, -9.888671875, -9.537841796875, -9.18701171875, -8.836181640625, -8.4853515625, -8.134521484375, -7.78369140625, -7.432861328125, -7.08203125, -6.731201171875, -6.38037109375, -6.029541015625, -5.6787109375, -5.327880859375, -4.97705078125, -4.626220703125, -4.275390625, -3.924560546875, -3.57373046875, -3.222900390625, -2.8720703125, -2.521240234375, -2.17041015625, -1.819580078125, -1.46875, -1.117919921875, -0.76708984375, -0.416259765625, -0.0654296875, 0.285400390625, 0.63623046875, 0.987060546875, 1.337890625, 1.688720703125, 2.03955078125, 2.390380859375, 2.7412109375, 3.092041015625, 3.44287109375, 3.793701171875, 4.14453125, 4.495361328125, 4.84619140625, 5.197021484375, 5.5478515625, 5.898681640625, 6.24951171875, 6.600341796875, 6.951171875, 7.302001953125, 7.65283203125, 8.003662109375, 8.3544921875, 8.705322265625, 9.05615234375, 9.406982421875, 9.7578125]}, "gradients/decoder.transformer.h.21.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 4.0, 2.0, 5.0, 8.0, 1.0, 5.0, 8.0, 10.0, 19.0, 22.0, 24.0, 76.0, 98.0, 141.0, 232.0, 348.0, 502.0, 647.0, 611.0, 457.0, 305.0, 193.0, 130.0, 83.0, 54.0, 36.0, 24.0, 17.0, 14.0, 6.0, 4.0, 0.0, 4.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-9.4375, -9.0718994140625, -8.706298828125, -8.3406982421875, -7.97509765625, -7.6094970703125, -7.243896484375, -6.8782958984375, -6.5126953125, -6.1470947265625, -5.781494140625, -5.4158935546875, -5.05029296875, -4.6846923828125, -4.319091796875, -3.9534912109375, -3.587890625, -3.2222900390625, -2.856689453125, -2.4910888671875, -2.12548828125, -1.7598876953125, -1.394287109375, -1.0286865234375, -0.6630859375, -0.2974853515625, 0.068115234375, 0.4337158203125, 0.79931640625, 1.1649169921875, 1.530517578125, 1.8961181640625, 2.26171875, 2.6273193359375, 2.992919921875, 3.3585205078125, 3.72412109375, 4.0897216796875, 4.455322265625, 4.8209228515625, 5.1865234375, 5.5521240234375, 5.917724609375, 6.2833251953125, 6.64892578125, 7.0145263671875, 7.380126953125, 7.7457275390625, 8.111328125, 8.4769287109375, 8.842529296875, 9.2081298828125, 9.57373046875, 9.9393310546875, 10.304931640625, 10.6705322265625, 11.0361328125, 11.4017333984375, 11.767333984375, 12.1329345703125, 12.49853515625, 12.8641357421875, 13.229736328125, 13.5953369140625, 13.9609375]}, "gradients/decoder.transformer.h.21.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 4.0, 1.0, 6.0, 16.0, 18.0, 21.0, 30.0, 43.0, 91.0, 146.0, 219.0, 396.0, 809.0, 1691.0, 4600.0, 15386.0, 74786.0, 538139.0, 2677140.0, 752878.0, 99325.0, 19182.0, 5318.0, 2046.0, 941.0, 452.0, 244.0, 148.0, 73.0, 49.0, 27.0, 33.0, 10.0, 8.0, 5.0, 4.0, 4.0, 3.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-14.5546875, -14.10791015625, -13.6611328125, -13.21435546875, -12.767578125, -12.32080078125, -11.8740234375, -11.42724609375, -10.98046875, -10.53369140625, -10.0869140625, -9.64013671875, -9.193359375, -8.74658203125, -8.2998046875, -7.85302734375, -7.40625, -6.95947265625, -6.5126953125, -6.06591796875, -5.619140625, -5.17236328125, -4.7255859375, -4.27880859375, -3.83203125, -3.38525390625, -2.9384765625, -2.49169921875, -2.044921875, -1.59814453125, -1.1513671875, -0.70458984375, -0.2578125, 0.18896484375, 0.6357421875, 1.08251953125, 1.529296875, 1.97607421875, 2.4228515625, 2.86962890625, 3.31640625, 3.76318359375, 4.2099609375, 4.65673828125, 5.103515625, 5.55029296875, 5.9970703125, 6.44384765625, 6.890625, 7.33740234375, 7.7841796875, 8.23095703125, 8.677734375, 9.12451171875, 9.5712890625, 10.01806640625, 10.46484375, 10.91162109375, 11.3583984375, 11.80517578125, 12.251953125, 12.69873046875, 13.1455078125, 13.59228515625, 14.0390625]}, "gradients/decoder.transformer.h.21.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 6.0, 19.0, 80.0, 220.0, 347.0, 214.0, 96.0, 27.0, 6.0, 3.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-173.36392211914062, -169.86744689941406, -166.3709716796875, -162.87451171875, -159.37803649902344, -155.88156127929688, -152.3850860595703, -148.88861083984375, -145.3921356201172, -141.89566040039062, -138.39918518066406, -134.9027099609375, -131.40625, -127.90977478027344, -124.41329956054688, -120.91682434082031, -117.42036437988281, -113.92388916015625, -110.42742156982422, -106.93094635009766, -103.4344711303711, -99.93800354003906, -96.4415283203125, -92.94505310058594, -89.44857788085938, -85.95210266113281, -82.45563507080078, -78.95915985107422, -75.46268463134766, -71.96621704101562, -68.46974182128906, -64.9732666015625, -61.4767951965332, -57.980323791503906, -54.483848571777344, -50.98737716674805, -47.49090576171875, -43.99443054199219, -40.49795913696289, -37.001487731933594, -33.50501251220703, -30.0085391998291, -26.512067794799805, -23.015594482421875, -19.519123077392578, -16.02264976501465, -12.526176452636719, -9.029705047607422, -5.533233642578125, -2.0367610454559326, 1.4597115516662598, 4.956184387207031, 8.452656745910645, 11.949129104614258, 15.445602416992188, 18.942073822021484, 22.438547134399414, 25.935020446777344, 29.43149185180664, 32.92796325683594, 36.4244384765625, 39.9209098815918, 43.417381286621094, 46.913856506347656, 50.41032791137695]}, "gradients/decoder.transformer.h.21.ln_2.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 3.0, 3.0, 3.0, 3.0, 2.0, 6.0, 8.0, 6.0, 6.0, 18.0, 5.0, 13.0, 19.0, 17.0, 19.0, 22.0, 34.0, 39.0, 44.0, 30.0, 33.0, 30.0, 34.0, 43.0, 40.0, 43.0, 36.0, 35.0, 34.0, 33.0, 45.0, 35.0, 25.0, 24.0, 35.0, 32.0, 28.0, 20.0, 22.0, 12.0, 8.0, 9.0, 12.0, 4.0, 7.0, 11.0, 7.0, 4.0, 2.0, 3.0, 3.0, 5.0, 1.0, 0.0, 2.0, 1.0], "bins": [-28.353797912597656, -27.50921058654785, -26.66462516784668, -25.820037841796875, -24.975452423095703, -24.1308650970459, -23.286277770996094, -22.441692352294922, -21.59710693359375, -20.752519607543945, -19.907934188842773, -19.06334686279297, -18.218761444091797, -17.374174118041992, -16.529586791992188, -15.685001373291016, -14.840414047241211, -13.995827674865723, -13.151241302490234, -12.30665397644043, -11.462068557739258, -10.617481231689453, -9.772894859313965, -8.928308486938477, -8.083722114562988, -7.2391357421875, -6.394549369812012, -5.549962520599365, -4.705376148223877, -3.8607897758483887, -3.016202926635742, -2.171616554260254, -1.3270282745361328, -0.482441782951355, 0.36214470863342285, 1.2067313194274902, 2.0513176918029785, 2.895904064178467, 3.7404909133911133, 4.585077285766602, 5.42966365814209, 6.274250030517578, 7.118836402893066, 7.963423252105713, 8.80801010131836, 9.652595520019531, 10.497182846069336, 11.341769218444824, 12.186355590820312, 13.0309419631958, 13.875528335571289, 14.720115661621094, 15.564701080322266, 16.40928840637207, 17.253875732421875, 18.098461151123047, 18.94304656982422, 19.787633895874023, 20.632219314575195, 21.476806640625, 22.321392059326172, 23.165979385375977, 24.01056671142578, 24.855152130126953, 25.699739456176758]}, "gradients/decoder.transformer.h.21.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 3.0, 4.0, 3.0, 5.0, 7.0, 11.0, 7.0, 14.0, 21.0, 14.0, 19.0, 22.0, 22.0, 27.0, 26.0, 31.0, 31.0, 39.0, 29.0, 50.0, 44.0, 50.0, 48.0, 48.0, 47.0, 35.0, 39.0, 46.0, 31.0, 33.0, 21.0, 27.0, 31.0, 21.0, 14.0, 22.0, 15.0, 16.0, 12.0, 9.0, 6.0, 3.0, 4.0, 1.0, 3.0, 2.0, 2.0, 0.0, 2.0, 1.0, 1.0], "bins": [-3.150390625, -3.061859130859375, -2.97332763671875, -2.884796142578125, -2.7962646484375, -2.707733154296875, -2.61920166015625, -2.530670166015625, -2.442138671875, -2.353607177734375, -2.26507568359375, -2.176544189453125, -2.0880126953125, -1.999481201171875, -1.91094970703125, -1.822418212890625, -1.73388671875, -1.645355224609375, -1.55682373046875, -1.468292236328125, -1.3797607421875, -1.291229248046875, -1.20269775390625, -1.114166259765625, -1.025634765625, -0.937103271484375, -0.84857177734375, -0.760040283203125, -0.6715087890625, -0.582977294921875, -0.49444580078125, -0.405914306640625, -0.3173828125, -0.228851318359375, -0.14031982421875, -0.051788330078125, 0.0367431640625, 0.125274658203125, 0.21380615234375, 0.302337646484375, 0.390869140625, 0.479400634765625, 0.56793212890625, 0.656463623046875, 0.7449951171875, 0.833526611328125, 0.92205810546875, 1.010589599609375, 1.09912109375, 1.187652587890625, 1.27618408203125, 1.364715576171875, 1.4532470703125, 1.541778564453125, 1.63031005859375, 1.718841552734375, 1.807373046875, 1.895904541015625, 1.98443603515625, 2.072967529296875, 2.1614990234375, 2.250030517578125, 2.33856201171875, 2.427093505859375, 2.515625]}, "gradients/decoder.transformer.h.21.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 3.0, 1.0, 1.0, 5.0, 8.0, 14.0, 16.0, 31.0, 48.0, 85.0, 109.0, 187.0, 324.0, 566.0, 1021.0, 1942.0, 3781.0, 7615.0, 15220.0, 31329.0, 64549.0, 128676.0, 225041.0, 250993.0, 158028.0, 80630.0, 39455.0, 19245.0, 9687.0, 4720.0, 2344.0, 1275.0, 672.0, 367.0, 220.0, 140.0, 78.0, 53.0, 29.0, 22.0, 13.0, 9.0, 7.0, 5.0, 4.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.51220703125, -0.4952239990234375, -0.478240966796875, -0.4612579345703125, -0.44427490234375, -0.4272918701171875, -0.410308837890625, -0.3933258056640625, -0.3763427734375, -0.3593597412109375, -0.342376708984375, -0.3253936767578125, -0.30841064453125, -0.2914276123046875, -0.274444580078125, -0.2574615478515625, -0.240478515625, -0.2234954833984375, -0.206512451171875, -0.1895294189453125, -0.17254638671875, -0.1555633544921875, -0.138580322265625, -0.1215972900390625, -0.1046142578125, -0.0876312255859375, -0.070648193359375, -0.0536651611328125, -0.03668212890625, -0.0196990966796875, -0.002716064453125, 0.0142669677734375, 0.03125, 0.0482330322265625, 0.065216064453125, 0.0821990966796875, 0.09918212890625, 0.1161651611328125, 0.133148193359375, 0.1501312255859375, 0.1671142578125, 0.1840972900390625, 0.201080322265625, 0.2180633544921875, 0.23504638671875, 0.2520294189453125, 0.269012451171875, 0.2859954833984375, 0.302978515625, 0.3199615478515625, 0.336944580078125, 0.3539276123046875, 0.37091064453125, 0.3878936767578125, 0.404876708984375, 0.4218597412109375, 0.4388427734375, 0.4558258056640625, 0.472808837890625, 0.4897918701171875, 0.50677490234375, 0.5237579345703125, 0.540740966796875, 0.5577239990234375, 0.57470703125]}, "gradients/decoder.transformer.h.21.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 2.0, 3.0, 2.0, 6.0, 6.0, 14.0, 15.0, 10.0, 14.0, 16.0, 18.0, 22.0, 26.0, 27.0, 20.0, 22.0, 36.0, 42.0, 32.0, 44.0, 40.0, 36.0, 33.0, 1070.0, 38.0, 40.0, 36.0, 38.0, 35.0, 29.0, 34.0, 26.0, 31.0, 23.0, 25.0, 19.0, 29.0, 6.0, 11.0, 13.0, 14.0, 10.0, 8.0, 8.0, 3.0, 5.0, 2.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.6494140625, -1.59417724609375, -1.5389404296875, -1.48370361328125, -1.428466796875, -1.37322998046875, -1.3179931640625, -1.26275634765625, -1.20751953125, -1.15228271484375, -1.0970458984375, -1.04180908203125, -0.986572265625, -0.93133544921875, -0.8760986328125, -0.82086181640625, -0.765625, -0.71038818359375, -0.6551513671875, -0.59991455078125, -0.544677734375, -0.48944091796875, -0.4342041015625, -0.37896728515625, -0.32373046875, -0.26849365234375, -0.2132568359375, -0.15802001953125, -0.102783203125, -0.04754638671875, 0.0076904296875, 0.06292724609375, 0.1181640625, 0.17340087890625, 0.2286376953125, 0.28387451171875, 0.339111328125, 0.39434814453125, 0.4495849609375, 0.50482177734375, 0.56005859375, 0.61529541015625, 0.6705322265625, 0.72576904296875, 0.781005859375, 0.83624267578125, 0.8914794921875, 0.94671630859375, 1.001953125, 1.05718994140625, 1.1124267578125, 1.16766357421875, 1.222900390625, 1.27813720703125, 1.3333740234375, 1.38861083984375, 1.44384765625, 1.49908447265625, 1.5543212890625, 1.60955810546875, 1.664794921875, 1.72003173828125, 1.7752685546875, 1.83050537109375, 1.8857421875]}, "gradients/decoder.transformer.h.21.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 2.0, 1.0, 1.0, 3.0, 11.0, 12.0, 19.0, 25.0, 42.0, 45.0, 84.0, 150.0, 193.0, 325.0, 457.0, 718.0, 1263.0, 2017.0, 3322.0, 5396.0, 8957.0, 14755.0, 24603.0, 41080.0, 67326.0, 108254.0, 159109.0, 1226516.0, 157304.0, 106385.0, 66937.0, 40459.0, 24228.0, 14365.0, 8940.0, 5298.0, 3185.0, 2012.0, 1194.0, 821.0, 483.0, 272.0, 210.0, 126.0, 81.0, 64.0, 33.0, 19.0, 19.0, 8.0, 6.0, 4.0, 2.0, 2.0, 2.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.2420654296875, -0.23412513732910156, -0.22618484497070312, -0.2182445526123047, -0.21030426025390625, -0.2023639678955078, -0.19442367553710938, -0.18648338317871094, -0.1785430908203125, -0.17060279846191406, -0.16266250610351562, -0.1547222137451172, -0.14678192138671875, -0.1388416290283203, -0.13090133666992188, -0.12296104431152344, -0.115020751953125, -0.10708045959472656, -0.09914016723632812, -0.09119987487792969, -0.08325958251953125, -0.07531929016113281, -0.06737899780273438, -0.05943870544433594, -0.0514984130859375, -0.04355812072753906, -0.035617828369140625, -0.027677536010742188, -0.01973724365234375, -0.011796951293945312, -0.003856658935546875, 0.0040836334228515625, 0.01202392578125, 0.019964218139648438, 0.027904510498046875, 0.03584480285644531, 0.04378509521484375, 0.05172538757324219, 0.059665679931640625, 0.06760597229003906, 0.0755462646484375, 0.08348655700683594, 0.09142684936523438, 0.09936714172363281, 0.10730743408203125, 0.11524772644042969, 0.12318801879882812, 0.13112831115722656, 0.139068603515625, 0.14700889587402344, 0.15494918823242188, 0.1628894805908203, 0.17082977294921875, 0.1787700653076172, 0.18671035766601562, 0.19465065002441406, 0.2025909423828125, 0.21053123474121094, 0.21847152709960938, 0.2264118194580078, 0.23435211181640625, 0.2422924041748047, 0.2502326965332031, 0.25817298889160156, 0.26611328125]}, "gradients/decoder.transformer.h.21.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 3.0, 2.0, 6.0, 2.0, 2.0, 8.0, 8.0, 14.0, 19.0, 15.0, 18.0, 18.0, 31.0, 40.0, 44.0, 57.0, 71.0, 90.0, 116.0, 95.0, 72.0, 47.0, 51.0, 36.0, 36.0, 26.0, 21.0, 15.0, 11.0, 11.0, 7.0, 5.0, 6.0, 3.0, 0.0, 2.0, 3.0, 0.0, 1.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0], "bins": [-0.01045989990234375, -0.010173201560974121, -0.009886503219604492, -0.009599804878234863, -0.009313106536865234, -0.009026408195495605, -0.008739709854125977, -0.008453011512756348, -0.008166313171386719, -0.00787961483001709, -0.007592916488647461, -0.007306218147277832, -0.007019519805908203, -0.006732821464538574, -0.006446123123168945, -0.006159424781799316, -0.0058727264404296875, -0.005586028099060059, -0.00529932975769043, -0.005012631416320801, -0.004725933074951172, -0.004439234733581543, -0.004152536392211914, -0.003865838050842285, -0.0035791397094726562, -0.0032924413681030273, -0.0030057430267333984, -0.0027190446853637695, -0.0024323463439941406, -0.0021456480026245117, -0.0018589496612548828, -0.001572251319885254, -0.001285552978515625, -0.000998854637145996, -0.0007121562957763672, -0.0004254579544067383, -0.00013875961303710938, 0.00014793872833251953, 0.00043463706970214844, 0.0007213354110717773, 0.0010080337524414062, 0.0012947320938110352, 0.001581430435180664, 0.001868128776550293, 0.002154827117919922, 0.0024415254592895508, 0.0027282238006591797, 0.0030149221420288086, 0.0033016204833984375, 0.0035883188247680664, 0.0038750171661376953, 0.004161715507507324, 0.004448413848876953, 0.004735112190246582, 0.005021810531616211, 0.00530850887298584, 0.005595207214355469, 0.005881905555725098, 0.0061686038970947266, 0.0064553022384643555, 0.006742000579833984, 0.007028698921203613, 0.007315397262573242, 0.007602095603942871, 0.0078887939453125]}, "gradients/decoder.transformer.h.21.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 0.0, 1.0, 2.0, 1.0, 2.0, 0.0, 1.0, 3.0, 5.0, 6.0, 9.0, 19.0, 13.0, 20.0, 30.0, 45.0, 65.0, 74.0, 83.0, 116.0, 177.0, 291.0, 713.0, 20439.0, 1020223.0, 4863.0, 499.0, 244.0, 159.0, 130.0, 82.0, 55.0, 42.0, 37.0, 40.0, 17.0, 14.0, 10.0, 5.0, 9.0, 7.0, 4.0, 5.0, 2.0, 1.0, 2.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.1531982421875, -0.14846420288085938, -0.14373016357421875, -0.13899612426757812, -0.1342620849609375, -0.12952804565429688, -0.12479400634765625, -0.12005996704101562, -0.115325927734375, -0.11059188842773438, -0.10585784912109375, -0.10112380981445312, -0.0963897705078125, -0.09165573120117188, -0.08692169189453125, -0.08218765258789062, -0.07745361328125, -0.07271957397460938, -0.06798553466796875, -0.06325149536132812, -0.0585174560546875, -0.053783416748046875, -0.04904937744140625, -0.044315338134765625, -0.039581298828125, -0.034847259521484375, -0.03011322021484375, -0.025379180908203125, -0.0206451416015625, -0.015911102294921875, -0.01117706298828125, -0.006443023681640625, -0.001708984375, 0.003025054931640625, 0.00775909423828125, 0.012493133544921875, 0.0172271728515625, 0.021961212158203125, 0.02669525146484375, 0.031429290771484375, 0.036163330078125, 0.040897369384765625, 0.04563140869140625, 0.050365447998046875, 0.0550994873046875, 0.059833526611328125, 0.06456756591796875, 0.06930160522460938, 0.07403564453125, 0.07876968383789062, 0.08350372314453125, 0.08823776245117188, 0.0929718017578125, 0.09770584106445312, 0.10243988037109375, 0.10717391967773438, 0.111907958984375, 0.11664199829101562, 0.12137603759765625, 0.12611007690429688, 0.1308441162109375, 0.13557815551757812, 0.14031219482421875, 0.14504623413085938, 0.1497802734375]}, "gradients/decoder.transformer.h.21.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 4.0, 915.0, 98.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.14734181761741638, -0.14311569929122925, -0.13888958096504211, -0.13466346263885498, -0.13043734431266785, -0.1262112259864807, -0.12198510020971298, -0.11775898188352585, -0.11353286355733871, -0.10930674523115158, -0.10508062690496445, -0.10085450857877731, -0.09662838280200958, -0.09240226447582245, -0.08817614614963531, -0.08395002782344818, -0.07972390949726105, -0.07549779117107391, -0.07127167284488678, -0.06704555451869965, -0.06281943619251251, -0.05859331414103508, -0.05436719208955765, -0.050141073763370514, -0.04591495543718338, -0.041688837110996246, -0.03746271878480911, -0.03323659673333168, -0.029010478407144547, -0.024784360080957413, -0.02055823989212513, -0.016332119703292847, -0.012106016278266907, -0.007879897020757198, -0.00365377776324749, 0.0005723414942622185, 0.004798460751771927, 0.00902457907795906, 0.013250699266791344, 0.017476819455623627, 0.02170293778181076, 0.025929056107997894, 0.030155176296830177, 0.03438129648566246, 0.038607414811849594, 0.04283353313803673, 0.04705965518951416, 0.051285773515701294, 0.05551189184188843, 0.05973801016807556, 0.0639641284942627, 0.06819024682044983, 0.07241636514663696, 0.0766424834728241, 0.08086860924959183, 0.08509472757577896, 0.0893208459019661, 0.09354696422815323, 0.09777308255434036, 0.1019992008805275, 0.10622532665729523, 0.11045144498348236, 0.1146775633096695, 0.11890368163585663, 0.12312979996204376]}, "gradients/decoder.transformer.h.21.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 3.0, 0.0, 2.0, 0.0, 5.0, 6.0, 5.0, 8.0, 15.0, 25.0, 25.0, 28.0, 34.0, 40.0, 39.0, 37.0, 51.0, 48.0, 61.0, 72.0, 56.0, 61.0, 57.0, 61.0, 46.0, 40.0, 34.0, 34.0, 29.0, 26.0, 24.0, 14.0, 10.0, 6.0, 5.0, 5.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.011496543884277344, -0.011163455434143543, -0.010830366984009743, -0.010497278533875942, -0.010164190083742142, -0.009831101633608341, -0.00949801318347454, -0.00916492473334074, -0.00883183628320694, -0.00849874783307314, -0.008165659382939339, -0.007832570932805538, -0.007499482482671738, -0.007166394032537937, -0.006833305582404137, -0.006500217132270336, -0.006167128682136536, -0.005834040232002735, -0.005500951781868935, -0.005167863331735134, -0.004834774881601334, -0.004501686431467533, -0.004168597981333733, -0.003835509531199932, -0.0035024210810661316, -0.003169332630932331, -0.0028362441807985306, -0.00250315573066473, -0.0021700672805309296, -0.001836978830397129, -0.0015038903802633286, -0.001170801930129528, -0.0008377134799957275, -0.000504625029861927, -0.00017153657972812653, 0.00016155187040567398, 0.0004946403205394745, 0.000827728770673275, 0.0011608172208070755, 0.001493905670940876, 0.0018269941210746765, 0.002160082571208477, 0.0024931710213422775, 0.002826259471476078, 0.0031593479216098785, 0.003492436371743679, 0.0038255248218774796, 0.00415861327201128, 0.004491701722145081, 0.004824790172278881, 0.005157878622412682, 0.005490967072546482, 0.005824055522680283, 0.006157143972814083, 0.006490232422947884, 0.006823320873081684, 0.007156409323215485, 0.007489497773349285, 0.007822586223483086, 0.008155674673616886, 0.008488763123750687, 0.008821851573884487, 0.009154940024018288, 0.009488028474152088, 0.009821116924285889]}, "gradients/decoder.transformer.h.21.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 3.0, 4.0, 3.0, 5.0, 7.0, 11.0, 7.0, 14.0, 21.0, 14.0, 19.0, 22.0, 22.0, 27.0, 26.0, 31.0, 31.0, 39.0, 29.0, 50.0, 44.0, 50.0, 48.0, 48.0, 47.0, 35.0, 39.0, 46.0, 31.0, 33.0, 21.0, 27.0, 31.0, 21.0, 14.0, 22.0, 15.0, 15.0, 13.0, 9.0, 6.0, 3.0, 4.0, 1.0, 3.0, 2.0, 2.0, 0.0, 2.0, 1.0, 1.0], "bins": [-3.150390625, -3.061859130859375, -2.97332763671875, -2.884796142578125, -2.7962646484375, -2.707733154296875, -2.61920166015625, -2.530670166015625, -2.442138671875, -2.353607177734375, -2.26507568359375, -2.176544189453125, -2.0880126953125, -1.999481201171875, -1.91094970703125, -1.822418212890625, -1.73388671875, -1.645355224609375, -1.55682373046875, -1.468292236328125, -1.3797607421875, -1.291229248046875, -1.20269775390625, -1.114166259765625, -1.025634765625, -0.937103271484375, -0.84857177734375, -0.760040283203125, -0.6715087890625, -0.582977294921875, -0.49444580078125, -0.405914306640625, -0.3173828125, -0.228851318359375, -0.14031982421875, -0.051788330078125, 0.0367431640625, 0.125274658203125, 0.21380615234375, 0.302337646484375, 0.390869140625, 0.479400634765625, 0.56793212890625, 0.656463623046875, 0.7449951171875, 0.833526611328125, 0.92205810546875, 1.010589599609375, 1.09912109375, 1.187652587890625, 1.27618408203125, 1.364715576171875, 1.4532470703125, 1.541778564453125, 1.63031005859375, 1.718841552734375, 1.807373046875, 1.895904541015625, 1.98443603515625, 2.072967529296875, 2.1614990234375, 2.250030517578125, 2.33856201171875, 2.427093505859375, 2.515625]}, "gradients/decoder.transformer.h.21.attn.c_proj.weight": {"_type": "histogram", "values": [3.0, 0.0, 1.0, 3.0, 1.0, 3.0, 5.0, 12.0, 10.0, 13.0, 25.0, 39.0, 65.0, 58.0, 109.0, 195.0, 286.0, 427.0, 643.0, 1095.0, 1809.0, 2969.0, 5010.0, 9604.0, 22391.0, 70836.0, 258077.0, 441859.0, 156298.0, 43609.0, 15385.0, 7284.0, 4028.0, 2424.0, 1484.0, 903.0, 586.0, 380.0, 222.0, 138.0, 80.0, 53.0, 33.0, 38.0, 16.0, 19.0, 13.0, 9.0, 8.0, 4.0, 1.0, 4.0, 4.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-3.365234375, -3.241973876953125, -3.11871337890625, -2.995452880859375, -2.8721923828125, -2.748931884765625, -2.62567138671875, -2.502410888671875, -2.379150390625, -2.255889892578125, -2.13262939453125, -2.009368896484375, -1.8861083984375, -1.762847900390625, -1.63958740234375, -1.516326904296875, -1.39306640625, -1.269805908203125, -1.14654541015625, -1.023284912109375, -0.9000244140625, -0.776763916015625, -0.65350341796875, -0.530242919921875, -0.406982421875, -0.283721923828125, -0.16046142578125, -0.037200927734375, 0.0860595703125, 0.209320068359375, 0.33258056640625, 0.455841064453125, 0.5791015625, 0.702362060546875, 0.82562255859375, 0.948883056640625, 1.0721435546875, 1.195404052734375, 1.31866455078125, 1.441925048828125, 1.565185546875, 1.688446044921875, 1.81170654296875, 1.934967041015625, 2.0582275390625, 2.181488037109375, 2.30474853515625, 2.428009033203125, 2.55126953125, 2.674530029296875, 2.79779052734375, 2.921051025390625, 3.0443115234375, 3.167572021484375, 3.29083251953125, 3.414093017578125, 3.537353515625, 3.660614013671875, 3.78387451171875, 3.907135009765625, 4.0303955078125, 4.153656005859375, 4.27691650390625, 4.400177001953125, 4.5234375]}, "gradients/decoder.transformer.h.21.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 4.0, 5.0, 0.0, 6.0, 7.0, 9.0, 6.0, 14.0, 10.0, 16.0, 18.0, 25.0, 32.0, 29.0, 21.0, 42.0, 26.0, 34.0, 39.0, 50.0, 55.0, 90.0, 260.0, 1543.0, 199.0, 83.0, 60.0, 46.0, 41.0, 37.0, 34.0, 27.0, 41.0, 25.0, 26.0, 19.0, 16.0, 10.0, 22.0, 9.0, 8.0, 7.0, 5.0, 4.0, 1.0, 3.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-8.09375, -7.7872314453125, -7.480712890625, -7.1741943359375, -6.86767578125, -6.5611572265625, -6.254638671875, -5.9481201171875, -5.6416015625, -5.3350830078125, -5.028564453125, -4.7220458984375, -4.41552734375, -4.1090087890625, -3.802490234375, -3.4959716796875, -3.189453125, -2.8829345703125, -2.576416015625, -2.2698974609375, -1.96337890625, -1.6568603515625, -1.350341796875, -1.0438232421875, -0.7373046875, -0.4307861328125, -0.124267578125, 0.1822509765625, 0.48876953125, 0.7952880859375, 1.101806640625, 1.4083251953125, 1.71484375, 2.0213623046875, 2.327880859375, 2.6343994140625, 2.94091796875, 3.2474365234375, 3.553955078125, 3.8604736328125, 4.1669921875, 4.4735107421875, 4.780029296875, 5.0865478515625, 5.39306640625, 5.6995849609375, 6.006103515625, 6.3126220703125, 6.619140625, 6.9256591796875, 7.232177734375, 7.5386962890625, 7.84521484375, 8.1517333984375, 8.458251953125, 8.7647705078125, 9.0712890625, 9.3778076171875, 9.684326171875, 9.9908447265625, 10.29736328125, 10.6038818359375, 10.910400390625, 11.2169189453125, 11.5234375]}, "gradients/decoder.transformer.h.21.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 5.0, 7.0, 4.0, 7.0, 11.0, 13.0, 14.0, 21.0, 14.0, 27.0, 30.0, 49.0, 52.0, 76.0, 105.0, 223.0, 420.0, 1068.0, 4428.0, 35660.0, 2576694.0, 506261.0, 16334.0, 2613.0, 719.0, 292.0, 149.0, 98.0, 69.0, 55.0, 35.0, 36.0, 32.0, 20.0, 15.0, 16.0, 11.0, 12.0, 4.0, 3.0, 7.0, 5.0, 2.0, 3.0, 1.0, 0.0, 1.0], "bins": [-19.34375, -18.83251953125, -18.3212890625, -17.81005859375, -17.298828125, -16.78759765625, -16.2763671875, -15.76513671875, -15.25390625, -14.74267578125, -14.2314453125, -13.72021484375, -13.208984375, -12.69775390625, -12.1865234375, -11.67529296875, -11.1640625, -10.65283203125, -10.1416015625, -9.63037109375, -9.119140625, -8.60791015625, -8.0966796875, -7.58544921875, -7.07421875, -6.56298828125, -6.0517578125, -5.54052734375, -5.029296875, -4.51806640625, -4.0068359375, -3.49560546875, -2.984375, -2.47314453125, -1.9619140625, -1.45068359375, -0.939453125, -0.42822265625, 0.0830078125, 0.59423828125, 1.10546875, 1.61669921875, 2.1279296875, 2.63916015625, 3.150390625, 3.66162109375, 4.1728515625, 4.68408203125, 5.1953125, 5.70654296875, 6.2177734375, 6.72900390625, 7.240234375, 7.75146484375, 8.2626953125, 8.77392578125, 9.28515625, 9.79638671875, 10.3076171875, 10.81884765625, 11.330078125, 11.84130859375, 12.3525390625, 12.86376953125, 13.375]}, "gradients/decoder.transformer.h.21.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 3.0, 6.0, 25.0, 59.0, 221.0, 243.0, 221.0, 143.0, 70.0, 13.0, 9.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-40.67424774169922, -39.62382125854492, -38.573394775390625, -37.52296829223633, -36.47254180908203, -35.42211151123047, -34.37168502807617, -33.321258544921875, -32.27083206176758, -31.22040557861328, -30.169979095458984, -29.119550704956055, -28.069124221801758, -27.01869773864746, -25.96826934814453, -24.917842864990234, -23.867416381835938, -22.81698989868164, -21.766563415527344, -20.716135025024414, -19.665708541870117, -18.61528205871582, -17.56485366821289, -16.514427185058594, -15.464000701904297, -14.41357421875, -13.363146781921387, -12.312719345092773, -11.262292861938477, -10.21186637878418, -9.161438941955566, -8.111011505126953, -7.060581207275391, -6.0101542472839355, -4.9597272872924805, -3.9093003273010254, -2.8588733673095703, -1.8084464073181152, -0.7580194473266602, 0.2924075126647949, 1.34283447265625, 2.393261432647705, 3.44368839263916, 4.494115352630615, 5.54454231262207, 6.594969272613525, 7.6453962326049805, 8.695823669433594, 9.74625015258789, 10.796676635742188, 11.8471040725708, 12.897531509399414, 13.947957992553711, 14.998384475708008, 16.048812866210938, 17.099239349365234, 18.14966583251953, 19.200092315673828, 20.250518798828125, 21.300947189331055, 22.35137367248535, 23.40180015563965, 24.452228546142578, 25.502655029296875, 26.553081512451172]}, "gradients/decoder.transformer.h.21.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 5.0, 1.0, 4.0, 7.0, 5.0, 3.0, 11.0, 8.0, 11.0, 11.0, 15.0, 24.0, 14.0, 24.0, 18.0, 27.0, 26.0, 33.0, 36.0, 37.0, 36.0, 37.0, 41.0, 37.0, 34.0, 47.0, 38.0, 32.0, 42.0, 34.0, 44.0, 29.0, 33.0, 24.0, 31.0, 23.0, 23.0, 20.0, 25.0, 14.0, 13.0, 6.0, 10.0, 4.0, 1.0, 7.0, 8.0, 2.0, 1.0, 4.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-28.69546890258789, -27.72551918029785, -26.75556755065918, -25.78561782836914, -24.81566619873047, -23.84571647644043, -22.875764846801758, -21.90581512451172, -20.935863494873047, -19.965913772583008, -18.995962142944336, -18.026012420654297, -17.056060791015625, -16.086111068725586, -15.116159439086914, -14.146209716796875, -13.17625904083252, -12.206308364868164, -11.236357688903809, -10.266407012939453, -9.296456336975098, -8.326505661010742, -7.356555461883545, -6.3866047859191895, -5.416654109954834, -4.4467034339904785, -3.476752758026123, -2.5068023204803467, -1.5368516445159912, -0.5669012069702148, 0.4030494689941406, 1.373000144958496, 2.3429508209228516, 3.312901496887207, 4.2828521728515625, 5.252802848815918, 6.222753524780273, 7.192703723907471, 8.162654876708984, 9.132604598999023, 10.102556228637695, 11.07250690460205, 12.042457580566406, 13.012408256530762, 13.982358932495117, 14.952308654785156, 15.922260284423828, 16.892210006713867, 17.862159729003906, 18.832109451293945, 19.802061080932617, 20.772010803222656, 21.741962432861328, 22.711912155151367, 23.68186378479004, 24.651813507080078, 25.62176513671875, 26.59171485900879, 27.56166648864746, 28.5316162109375, 29.501567840576172, 30.47151756286621, 31.441469192504883, 32.41141891479492, 33.381370544433594]}, "gradients/decoder.transformer.h.20.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 1.0, 2.0, 2.0, 6.0, 2.0, 5.0, 13.0, 10.0, 14.0, 12.0, 19.0, 23.0, 15.0, 21.0, 21.0, 26.0, 25.0, 36.0, 41.0, 37.0, 43.0, 39.0, 42.0, 39.0, 68.0, 45.0, 52.0, 41.0, 39.0, 25.0, 35.0, 25.0, 23.0, 20.0, 29.0, 23.0, 14.0, 12.0, 19.0, 12.0, 11.0, 12.0, 2.0, 6.0, 3.0, 0.0, 2.0, 3.0, 2.0, 0.0, 0.0, 3.0], "bins": [-3.34375, -3.250732421875, -3.15771484375, -3.064697265625, -2.9716796875, -2.878662109375, -2.78564453125, -2.692626953125, -2.599609375, -2.506591796875, -2.41357421875, -2.320556640625, -2.2275390625, -2.134521484375, -2.04150390625, -1.948486328125, -1.85546875, -1.762451171875, -1.66943359375, -1.576416015625, -1.4833984375, -1.390380859375, -1.29736328125, -1.204345703125, -1.111328125, -1.018310546875, -0.92529296875, -0.832275390625, -0.7392578125, -0.646240234375, -0.55322265625, -0.460205078125, -0.3671875, -0.274169921875, -0.18115234375, -0.088134765625, 0.0048828125, 0.097900390625, 0.19091796875, 0.283935546875, 0.376953125, 0.469970703125, 0.56298828125, 0.656005859375, 0.7490234375, 0.842041015625, 0.93505859375, 1.028076171875, 1.12109375, 1.214111328125, 1.30712890625, 1.400146484375, 1.4931640625, 1.586181640625, 1.67919921875, 1.772216796875, 1.865234375, 1.958251953125, 2.05126953125, 2.144287109375, 2.2373046875, 2.330322265625, 2.42333984375, 2.516357421875, 2.609375]}, "gradients/decoder.transformer.h.20.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 4.0, 1.0, 6.0, 5.0, 5.0, 8.0, 7.0, 14.0, 12.0, 21.0, 23.0, 29.0, 38.0, 54.0, 80.0, 124.0, 311.0, 862.0, 3286.0, 18306.0, 199800.0, 3075169.0, 838942.0, 48445.0, 6521.0, 1346.0, 401.0, 155.0, 81.0, 58.0, 30.0, 24.0, 27.0, 19.0, 14.0, 15.0, 11.0, 12.0, 5.0, 10.0, 4.0, 2.0, 0.0, 3.0, 1.0, 3.0, 1.0, 1.0, 1.0, 0.0, 1.0], "bins": [-13.8515625, -13.45263671875, -13.0537109375, -12.65478515625, -12.255859375, -11.85693359375, -11.4580078125, -11.05908203125, -10.66015625, -10.26123046875, -9.8623046875, -9.46337890625, -9.064453125, -8.66552734375, -8.2666015625, -7.86767578125, -7.46875, -7.06982421875, -6.6708984375, -6.27197265625, -5.873046875, -5.47412109375, -5.0751953125, -4.67626953125, -4.27734375, -3.87841796875, -3.4794921875, -3.08056640625, -2.681640625, -2.28271484375, -1.8837890625, -1.48486328125, -1.0859375, -0.68701171875, -0.2880859375, 0.11083984375, 0.509765625, 0.90869140625, 1.3076171875, 1.70654296875, 2.10546875, 2.50439453125, 2.9033203125, 3.30224609375, 3.701171875, 4.10009765625, 4.4990234375, 4.89794921875, 5.296875, 5.69580078125, 6.0947265625, 6.49365234375, 6.892578125, 7.29150390625, 7.6904296875, 8.08935546875, 8.48828125, 8.88720703125, 9.2861328125, 9.68505859375, 10.083984375, 10.48291015625, 10.8818359375, 11.28076171875, 11.6796875]}, "gradients/decoder.transformer.h.20.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 2.0, 5.0, 5.0, 6.0, 8.0, 6.0, 13.0, 16.0, 20.0, 31.0, 49.0, 49.0, 63.0, 126.0, 126.0, 192.0, 214.0, 329.0, 422.0, 454.0, 442.0, 405.0, 307.0, 212.0, 161.0, 104.0, 77.0, 59.0, 43.0, 45.0, 23.0, 13.0, 7.0, 11.0, 9.0, 8.0, 5.0, 6.0, 0.0, 4.0, 1.0, 2.0, 3.0, 0.0, 1.0, 0.0, 1.0, 0.0, 3.0], "bins": [-8.9609375, -8.70257568359375, -8.4442138671875, -8.18585205078125, -7.927490234375, -7.66912841796875, -7.4107666015625, -7.15240478515625, -6.89404296875, -6.63568115234375, -6.3773193359375, -6.11895751953125, -5.860595703125, -5.60223388671875, -5.3438720703125, -5.08551025390625, -4.8271484375, -4.56878662109375, -4.3104248046875, -4.05206298828125, -3.793701171875, -3.53533935546875, -3.2769775390625, -3.01861572265625, -2.76025390625, -2.50189208984375, -2.2435302734375, -1.98516845703125, -1.726806640625, -1.46844482421875, -1.2100830078125, -0.95172119140625, -0.693359375, -0.43499755859375, -0.1766357421875, 0.08172607421875, 0.340087890625, 0.59844970703125, 0.8568115234375, 1.11517333984375, 1.37353515625, 1.63189697265625, 1.8902587890625, 2.14862060546875, 2.406982421875, 2.66534423828125, 2.9237060546875, 3.18206787109375, 3.4404296875, 3.69879150390625, 3.9571533203125, 4.21551513671875, 4.473876953125, 4.73223876953125, 4.9906005859375, 5.24896240234375, 5.50732421875, 5.76568603515625, 6.0240478515625, 6.28240966796875, 6.540771484375, 6.79913330078125, 7.0574951171875, 7.31585693359375, 7.57421875]}, "gradients/decoder.transformer.h.20.mlp.c_fc.weight": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 3.0, 3.0, 3.0, 5.0, 4.0, 7.0, 9.0, 7.0, 19.0, 13.0, 23.0, 52.0, 77.0, 117.0, 184.0, 321.0, 730.0, 1806.0, 5567.0, 23808.0, 153242.0, 1539743.0, 2195451.0, 229954.0, 32154.0, 7113.0, 2142.0, 826.0, 373.0, 196.0, 109.0, 78.0, 49.0, 23.0, 25.0, 13.0, 13.0, 7.0, 11.0, 5.0, 3.0, 3.0, 2.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-13.9609375, -13.4639892578125, -12.967041015625, -12.4700927734375, -11.97314453125, -11.4761962890625, -10.979248046875, -10.4822998046875, -9.9853515625, -9.4884033203125, -8.991455078125, -8.4945068359375, -7.99755859375, -7.5006103515625, -7.003662109375, -6.5067138671875, -6.009765625, -5.5128173828125, -5.015869140625, -4.5189208984375, -4.02197265625, -3.5250244140625, -3.028076171875, -2.5311279296875, -2.0341796875, -1.5372314453125, -1.040283203125, -0.5433349609375, -0.04638671875, 0.4505615234375, 0.947509765625, 1.4444580078125, 1.94140625, 2.4383544921875, 2.935302734375, 3.4322509765625, 3.92919921875, 4.4261474609375, 4.923095703125, 5.4200439453125, 5.9169921875, 6.4139404296875, 6.910888671875, 7.4078369140625, 7.90478515625, 8.4017333984375, 8.898681640625, 9.3956298828125, 9.892578125, 10.3895263671875, 10.886474609375, 11.3834228515625, 11.88037109375, 12.3773193359375, 12.874267578125, 13.3712158203125, 13.8681640625, 14.3651123046875, 14.862060546875, 15.3590087890625, 15.85595703125, 16.3529052734375, 16.849853515625, 17.3468017578125, 17.84375]}, "gradients/decoder.transformer.h.20.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 6.0, 38.0, 102.0, 293.0, 304.0, 186.0, 72.0, 15.0, 0.0, 0.0, 0.0, 1.0], "bins": [-226.0171356201172, -221.99790954589844, -217.97866821289062, -213.95944213867188, -209.94020080566406, -205.9209747314453, -201.9017333984375, -197.88250732421875, -193.86326599121094, -189.8440399169922, -185.82479858398438, -181.80557250976562, -177.7863311767578, -173.76710510253906, -169.74786376953125, -165.7286376953125, -161.70941162109375, -157.690185546875, -153.6709442138672, -149.65171813964844, -145.63247680664062, -141.61325073242188, -137.59400939941406, -133.5747833251953, -129.5555419921875, -125.53630828857422, -121.51707458496094, -117.49784088134766, -113.47860717773438, -109.4593734741211, -105.44013977050781, -101.42091369628906, -97.40167999267578, -93.3824462890625, -89.36321258544922, -85.34397888183594, -81.32474517822266, -77.30551147460938, -73.28628540039062, -69.26704406738281, -65.24781799316406, -61.22858428955078, -57.2093505859375, -53.19011688232422, -49.17088317871094, -45.151649475097656, -41.13241958618164, -37.11318588256836, -33.09394836425781, -29.07471466064453, -25.05548095703125, -21.0362491607666, -17.01701545715332, -12.997781753540039, -8.97854995727539, -4.959316253662109, -0.9400844573974609, 3.079148769378662, 7.098381996154785, 11.11761474609375, 15.136848449707031, 19.156082153320312, 23.17531394958496, 27.194547653198242, 31.213781356811523]}, "gradients/decoder.transformer.h.20.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 4.0, 6.0, 4.0, 4.0, 7.0, 7.0, 9.0, 9.0, 10.0, 16.0, 12.0, 14.0, 15.0, 20.0, 23.0, 29.0, 22.0, 29.0, 37.0, 40.0, 39.0, 38.0, 46.0, 37.0, 38.0, 43.0, 25.0, 42.0, 37.0, 41.0, 32.0, 23.0, 34.0, 30.0, 29.0, 19.0, 18.0, 16.0, 13.0, 17.0, 8.0, 18.0, 13.0, 12.0, 8.0, 6.0, 6.0, 3.0, 2.0, 3.0, 2.0, 1.0, 1.0, 2.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-24.757465362548828, -23.924449920654297, -23.091434478759766, -22.258420944213867, -21.425405502319336, -20.592390060424805, -19.759376525878906, -18.926361083984375, -18.093345642089844, -17.260330200195312, -16.42731475830078, -15.594301223754883, -14.761285781860352, -13.92827033996582, -13.095255851745605, -12.26224136352539, -11.42922592163086, -10.596210479736328, -9.763195991516113, -8.930181503295898, -8.097166061401367, -7.264151096343994, -6.431136131286621, -5.598121166229248, -4.765106201171875, -3.932091236114502, -3.099076271057129, -2.266061305999756, -1.4330463409423828, -0.6000313758850098, 0.23298358917236328, 1.0659985542297363, 1.8990116119384766, 2.7320265769958496, 3.5650415420532227, 4.398056507110596, 5.231071472167969, 6.064086437225342, 6.897101402282715, 7.730116367340088, 8.563131332397461, 9.396146774291992, 10.229161262512207, 11.062175750732422, 11.895191192626953, 12.728206634521484, 13.5612211227417, 14.394235610961914, 15.227251052856445, 16.060266494750977, 16.893280029296875, 17.726295471191406, 18.559310913085938, 19.39232635498047, 20.225341796875, 21.0583553314209, 21.89137077331543, 22.72438621520996, 23.55739974975586, 24.39041519165039, 25.223430633544922, 26.056446075439453, 26.889461517333984, 27.722475051879883, 28.555490493774414]}, "gradients/decoder.transformer.h.20.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 5.0, 1.0, 4.0, 7.0, 9.0, 18.0, 14.0, 16.0, 25.0, 17.0, 26.0, 31.0, 29.0, 34.0, 36.0, 42.0, 43.0, 51.0, 38.0, 55.0, 43.0, 67.0, 51.0, 53.0, 30.0, 34.0, 36.0, 35.0, 30.0, 21.0, 26.0, 19.0, 16.0, 13.0, 8.0, 8.0, 10.0, 3.0, 3.0, 2.0, 5.0, 3.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-3.880859375, -3.773223876953125, -3.66558837890625, -3.557952880859375, -3.4503173828125, -3.342681884765625, -3.23504638671875, -3.127410888671875, -3.019775390625, -2.912139892578125, -2.80450439453125, -2.696868896484375, -2.5892333984375, -2.481597900390625, -2.37396240234375, -2.266326904296875, -2.15869140625, -2.051055908203125, -1.94342041015625, -1.835784912109375, -1.7281494140625, -1.620513916015625, -1.51287841796875, -1.405242919921875, -1.297607421875, -1.189971923828125, -1.08233642578125, -0.974700927734375, -0.8670654296875, -0.759429931640625, -0.65179443359375, -0.544158935546875, -0.4365234375, -0.328887939453125, -0.22125244140625, -0.113616943359375, -0.0059814453125, 0.101654052734375, 0.20928955078125, 0.316925048828125, 0.424560546875, 0.532196044921875, 0.63983154296875, 0.747467041015625, 0.8551025390625, 0.962738037109375, 1.07037353515625, 1.178009033203125, 1.28564453125, 1.393280029296875, 1.50091552734375, 1.608551025390625, 1.7161865234375, 1.823822021484375, 1.93145751953125, 2.039093017578125, 2.146728515625, 2.254364013671875, 2.36199951171875, 2.469635009765625, 2.5772705078125, 2.684906005859375, 2.79254150390625, 2.900177001953125, 3.0078125]}, "gradients/decoder.transformer.h.20.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 4.0, 9.0, 7.0, 6.0, 13.0, 20.0, 29.0, 53.0, 62.0, 121.0, 189.0, 328.0, 620.0, 1083.0, 1974.0, 3619.0, 7232.0, 14102.0, 27304.0, 53943.0, 101212.0, 173428.0, 232777.0, 190461.0, 113878.0, 61363.0, 31200.0, 16148.0, 8177.0, 4172.0, 2163.0, 1208.0, 676.0, 387.0, 221.0, 134.0, 79.0, 52.0, 35.0, 23.0, 13.0, 14.0, 9.0, 8.0, 7.0, 4.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0], "bins": [-0.4580078125, -0.44141387939453125, -0.4248199462890625, -0.40822601318359375, -0.391632080078125, -0.37503814697265625, -0.3584442138671875, -0.34185028076171875, -0.32525634765625, -0.30866241455078125, -0.2920684814453125, -0.27547454833984375, -0.258880615234375, -0.24228668212890625, -0.2256927490234375, -0.20909881591796875, -0.1925048828125, -0.17591094970703125, -0.1593170166015625, -0.14272308349609375, -0.126129150390625, -0.10953521728515625, -0.0929412841796875, -0.07634735107421875, -0.05975341796875, -0.04315948486328125, -0.0265655517578125, -0.00997161865234375, 0.006622314453125, 0.02321624755859375, 0.0398101806640625, 0.05640411376953125, 0.072998046875, 0.08959197998046875, 0.1061859130859375, 0.12277984619140625, 0.139373779296875, 0.15596771240234375, 0.1725616455078125, 0.18915557861328125, 0.20574951171875, 0.22234344482421875, 0.2389373779296875, 0.25553131103515625, 0.272125244140625, 0.28871917724609375, 0.3053131103515625, 0.32190704345703125, 0.3385009765625, 0.35509490966796875, 0.3716888427734375, 0.38828277587890625, 0.404876708984375, 0.42147064208984375, 0.4380645751953125, 0.45465850830078125, 0.47125244140625, 0.48784637451171875, 0.5044403076171875, 0.5210342407226562, 0.537628173828125, 0.5542221069335938, 0.5708160400390625, 0.5874099731445312, 0.60400390625]}, "gradients/decoder.transformer.h.20.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 1.0, 0.0, 2.0, 4.0, 7.0, 6.0, 6.0, 7.0, 5.0, 4.0, 14.0, 8.0, 11.0, 19.0, 20.0, 19.0, 21.0, 16.0, 19.0, 25.0, 32.0, 30.0, 35.0, 41.0, 50.0, 34.0, 45.0, 1059.0, 46.0, 37.0, 43.0, 50.0, 30.0, 32.0, 41.0, 30.0, 24.0, 31.0, 22.0, 17.0, 11.0, 16.0, 15.0, 6.0, 8.0, 8.0, 6.0, 10.0, 5.0, 0.0, 1.0, 4.0, 3.0, 2.0, 1.0, 4.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.77734375, -1.7187042236328125, -1.660064697265625, -1.6014251708984375, -1.54278564453125, -1.4841461181640625, -1.425506591796875, -1.3668670654296875, -1.3082275390625, -1.2495880126953125, -1.190948486328125, -1.1323089599609375, -1.07366943359375, -1.0150299072265625, -0.956390380859375, -0.8977508544921875, -0.839111328125, -0.7804718017578125, -0.721832275390625, -0.6631927490234375, -0.60455322265625, -0.5459136962890625, -0.487274169921875, -0.4286346435546875, -0.3699951171875, -0.3113555908203125, -0.252716064453125, -0.1940765380859375, -0.13543701171875, -0.0767974853515625, -0.018157958984375, 0.0404815673828125, 0.09912109375, 0.1577606201171875, 0.216400146484375, 0.2750396728515625, 0.33367919921875, 0.3923187255859375, 0.450958251953125, 0.5095977783203125, 0.5682373046875, 0.6268768310546875, 0.685516357421875, 0.7441558837890625, 0.80279541015625, 0.8614349365234375, 0.920074462890625, 0.9787139892578125, 1.037353515625, 1.0959930419921875, 1.154632568359375, 1.2132720947265625, 1.27191162109375, 1.3305511474609375, 1.389190673828125, 1.4478302001953125, 1.5064697265625, 1.5651092529296875, 1.623748779296875, 1.6823883056640625, 1.74102783203125, 1.7996673583984375, 1.858306884765625, 1.9169464111328125, 1.9755859375]}, "gradients/decoder.transformer.h.20.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 5.0, 5.0, 9.0, 5.0, 15.0, 19.0, 32.0, 28.0, 72.0, 75.0, 144.0, 240.0, 379.0, 589.0, 841.0, 1354.0, 2170.0, 3510.0, 5453.0, 9087.0, 15261.0, 25753.0, 44040.0, 73730.0, 116746.0, 174532.0, 1223734.0, 148816.0, 99586.0, 60863.0, 36350.0, 21071.0, 12501.0, 7509.0, 4585.0, 2980.0, 1848.0, 1155.0, 759.0, 454.0, 310.0, 184.0, 124.0, 88.0, 46.0, 29.0, 26.0, 13.0, 7.0, 7.0, 4.0, 4.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.26708984375, -0.2585601806640625, -0.250030517578125, -0.2415008544921875, -0.23297119140625, -0.2244415283203125, -0.215911865234375, -0.2073822021484375, -0.1988525390625, -0.1903228759765625, -0.181793212890625, -0.1732635498046875, -0.16473388671875, -0.1562042236328125, -0.147674560546875, -0.1391448974609375, -0.130615234375, -0.1220855712890625, -0.113555908203125, -0.1050262451171875, -0.09649658203125, -0.0879669189453125, -0.079437255859375, -0.0709075927734375, -0.0623779296875, -0.0538482666015625, -0.045318603515625, -0.0367889404296875, -0.02825927734375, -0.0197296142578125, -0.011199951171875, -0.0026702880859375, 0.005859375, 0.0143890380859375, 0.022918701171875, 0.0314483642578125, 0.03997802734375, 0.0485076904296875, 0.057037353515625, 0.0655670166015625, 0.0740966796875, 0.0826263427734375, 0.091156005859375, 0.0996856689453125, 0.10821533203125, 0.1167449951171875, 0.125274658203125, 0.1338043212890625, 0.142333984375, 0.1508636474609375, 0.159393310546875, 0.1679229736328125, 0.17645263671875, 0.1849822998046875, 0.193511962890625, 0.2020416259765625, 0.2105712890625, 0.2191009521484375, 0.227630615234375, 0.2361602783203125, 0.24468994140625, 0.2532196044921875, 0.261749267578125, 0.2702789306640625, 0.27880859375]}, "gradients/decoder.transformer.h.20.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 4.0, 6.0, 2.0, 5.0, 5.0, 1.0, 2.0, 8.0, 7.0, 6.0, 9.0, 11.0, 17.0, 16.0, 19.0, 13.0, 20.0, 31.0, 32.0, 26.0, 52.0, 60.0, 79.0, 89.0, 106.0, 69.0, 40.0, 41.0, 34.0, 26.0, 23.0, 29.0, 20.0, 16.0, 18.0, 14.0, 11.0, 11.0, 8.0, 7.0, 6.0, 4.0, 5.0, 1.0, 0.0, 3.0, 1.0, 0.0, 3.0, 1.0, 0.0, 2.0], "bins": [-0.0091400146484375, -0.008888006210327148, -0.008635997772216797, -0.008383989334106445, -0.008131980895996094, -0.007879972457885742, -0.007627964019775391, -0.007375955581665039, -0.0071239471435546875, -0.006871938705444336, -0.006619930267333984, -0.006367921829223633, -0.006115913391113281, -0.00586390495300293, -0.005611896514892578, -0.0053598880767822266, -0.005107879638671875, -0.0048558712005615234, -0.004603862762451172, -0.00435185432434082, -0.004099845886230469, -0.003847837448120117, -0.0035958290100097656, -0.003343820571899414, -0.0030918121337890625, -0.002839803695678711, -0.0025877952575683594, -0.002335786819458008, -0.0020837783813476562, -0.0018317699432373047, -0.0015797615051269531, -0.0013277530670166016, -0.00107574462890625, -0.0008237361907958984, -0.0005717277526855469, -0.0003197193145751953, -6.771087646484375e-05, 0.0001842975616455078, 0.0004363059997558594, 0.0006883144378662109, 0.0009403228759765625, 0.001192331314086914, 0.0014443397521972656, 0.0016963481903076172, 0.0019483566284179688, 0.0022003650665283203, 0.002452373504638672, 0.0027043819427490234, 0.002956390380859375, 0.0032083988189697266, 0.003460407257080078, 0.0037124156951904297, 0.003964424133300781, 0.004216432571411133, 0.004468441009521484, 0.004720449447631836, 0.0049724578857421875, 0.005224466323852539, 0.005476474761962891, 0.005728483200073242, 0.005980491638183594, 0.006232500076293945, 0.006484508514404297, 0.0067365169525146484, 0.006988525390625]}, "gradients/decoder.transformer.h.20.crossattention.q_attn.weight": {"_type": "histogram", "values": [3.0, 0.0, 1.0, 2.0, 2.0, 2.0, 1.0, 5.0, 2.0, 8.0, 14.0, 14.0, 22.0, 16.0, 15.0, 31.0, 29.0, 41.0, 67.0, 68.0, 85.0, 98.0, 162.0, 184.0, 285.0, 628.0, 8702.0, 970005.0, 65461.0, 1295.0, 375.0, 229.0, 155.0, 109.0, 98.0, 80.0, 53.0, 48.0, 39.0, 39.0, 19.0, 19.0, 7.0, 9.0, 9.0, 7.0, 10.0, 5.0, 6.0, 2.0, 3.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.11175537109375, -0.10772228240966797, -0.10368919372558594, -0.0996561050415039, -0.09562301635742188, -0.09158992767333984, -0.08755683898925781, -0.08352375030517578, -0.07949066162109375, -0.07545757293701172, -0.07142448425292969, -0.06739139556884766, -0.06335830688476562, -0.059325218200683594, -0.05529212951660156, -0.05125904083251953, -0.0472259521484375, -0.04319286346435547, -0.03915977478027344, -0.035126686096191406, -0.031093597412109375, -0.027060508728027344, -0.023027420043945312, -0.01899433135986328, -0.01496124267578125, -0.010928153991699219, -0.0068950653076171875, -0.0028619766235351562, 0.001171112060546875, 0.005204200744628906, 0.009237289428710938, 0.013270378112792969, 0.017303466796875, 0.02133655548095703, 0.025369644165039062, 0.029402732849121094, 0.033435821533203125, 0.037468910217285156, 0.04150199890136719, 0.04553508758544922, 0.04956817626953125, 0.05360126495361328, 0.05763435363769531, 0.061667442321777344, 0.06570053100585938, 0.0697336196899414, 0.07376670837402344, 0.07779979705810547, 0.0818328857421875, 0.08586597442626953, 0.08989906311035156, 0.0939321517944336, 0.09796524047851562, 0.10199832916259766, 0.10603141784667969, 0.11006450653076172, 0.11409759521484375, 0.11813068389892578, 0.12216377258300781, 0.12619686126708984, 0.13022994995117188, 0.1342630386352539, 0.13829612731933594, 0.14232921600341797, 0.1463623046875]}, "gradients/decoder.transformer.h.20.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 34.0, 809.0, 169.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.1530391275882721, -0.14990441501140594, -0.1467697024345398, -0.14363498985767365, -0.1405002772808075, -0.13736556470394135, -0.1342308521270752, -0.13109613955020905, -0.1279614269733429, -0.12482671439647675, -0.1216920018196106, -0.11855728924274445, -0.1154225766658783, -0.11228786408901215, -0.109153151512146, -0.10601843893527985, -0.1028837189078331, -0.09974900633096695, -0.0966142937541008, -0.09347958117723465, -0.0903448686003685, -0.08721015602350235, -0.0840754359960556, -0.08094072341918945, -0.0778060108423233, -0.07467129826545715, -0.071536585688591, -0.06840187311172485, -0.0652671605348587, -0.062132447957992554, -0.058997731655836105, -0.055863019078969955, -0.052728310227394104, -0.049593597650527954, -0.046458885073661804, -0.043324172496795654, -0.040189459919929504, -0.037054747343063354, -0.033920031040906906, -0.030785318464040756, -0.027650605887174606, -0.024515893310308456, -0.021381180733442307, -0.018246466293931007, -0.015111753717064857, -0.011977041140198708, -0.008842326700687408, -0.0057076141238212585, -0.0025729015469551086, 0.0005618114955723286, 0.0036965245380997658, 0.00683123804628849, 0.00996595062315464, 0.01310066320002079, 0.01623537763953209, 0.01937009021639824, 0.02250480279326439, 0.02563951537013054, 0.02877422794699669, 0.03190894424915314, 0.03504365682601929, 0.03817836940288544, 0.04131308197975159, 0.04444779455661774, 0.04758250713348389]}, "gradients/decoder.transformer.h.20.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 1.0, 6.0, 1.0, 9.0, 7.0, 15.0, 8.0, 20.0, 29.0, 22.0, 29.0, 32.0, 48.0, 50.0, 56.0, 45.0, 52.0, 43.0, 45.0, 56.0, 48.0, 58.0, 41.0, 51.0, 41.0, 41.0, 24.0, 29.0, 24.0, 17.0, 12.0, 21.0, 11.0, 4.0, 6.0, 4.0, 3.0, 4.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.013088226318359375, -0.012706208974123001, -0.012324191629886627, -0.011942174285650253, -0.01156015694141388, -0.011178139597177505, -0.010796122252941132, -0.010414104908704758, -0.010032087564468384, -0.00965007022023201, -0.009268052875995636, -0.008886035531759262, -0.008504018187522888, -0.008122000843286514, -0.00773998349905014, -0.0073579661548137665, -0.006975948810577393, -0.006593931466341019, -0.006211914122104645, -0.005829896777868271, -0.005447879433631897, -0.005065862089395523, -0.004683844745159149, -0.004301827400922775, -0.003919810056686401, -0.0035377927124500275, -0.0031557753682136536, -0.0027737580239772797, -0.0023917406797409058, -0.002009723335504532, -0.001627705991268158, -0.001245688647031784, -0.0008636713027954102, -0.00048165395855903625, -9.963661432266235e-05, 0.00028238072991371155, 0.0006643980741500854, 0.0010464154183864594, 0.0014284327626228333, 0.0018104501068592072, 0.002192467451095581, 0.002574484795331955, 0.002956502139568329, 0.0033385194838047028, 0.0037205368280410767, 0.0041025541722774506, 0.0044845715165138245, 0.004866588860750198, 0.005248606204986572, 0.005630623549222946, 0.00601264089345932, 0.006394658237695694, 0.006776675581932068, 0.007158692926168442, 0.007540710270404816, 0.00792272761464119, 0.008304744958877563, 0.008686762303113937, 0.009068779647350311, 0.009450796991586685, 0.009832814335823059, 0.010214831680059433, 0.010596849024295807, 0.01097886636853218, 0.011360883712768555]}, "gradients/decoder.transformer.h.20.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 5.0, 1.0, 4.0, 7.0, 9.0, 18.0, 14.0, 16.0, 25.0, 17.0, 26.0, 31.0, 29.0, 34.0, 36.0, 42.0, 43.0, 51.0, 38.0, 55.0, 43.0, 67.0, 51.0, 53.0, 30.0, 34.0, 36.0, 35.0, 31.0, 20.0, 26.0, 19.0, 16.0, 13.0, 8.0, 8.0, 10.0, 3.0, 3.0, 2.0, 5.0, 3.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-3.880859375, -3.773223876953125, -3.66558837890625, -3.557952880859375, -3.4503173828125, -3.342681884765625, -3.23504638671875, -3.127410888671875, -3.019775390625, -2.912139892578125, -2.80450439453125, -2.696868896484375, -2.5892333984375, -2.481597900390625, -2.37396240234375, -2.266326904296875, -2.15869140625, -2.051055908203125, -1.94342041015625, -1.835784912109375, -1.7281494140625, -1.620513916015625, -1.51287841796875, -1.405242919921875, -1.297607421875, -1.189971923828125, -1.08233642578125, -0.974700927734375, -0.8670654296875, -0.759429931640625, -0.65179443359375, -0.544158935546875, -0.4365234375, -0.328887939453125, -0.22125244140625, -0.113616943359375, -0.0059814453125, 0.101654052734375, 0.20928955078125, 0.316925048828125, 0.424560546875, 0.532196044921875, 0.63983154296875, 0.747467041015625, 0.8551025390625, 0.962738037109375, 1.07037353515625, 1.178009033203125, 1.28564453125, 1.393280029296875, 1.50091552734375, 1.608551025390625, 1.7161865234375, 1.823822021484375, 1.93145751953125, 2.039093017578125, 2.146728515625, 2.254364013671875, 2.36199951171875, 2.469635009765625, 2.5772705078125, 2.684906005859375, 2.79254150390625, 2.900177001953125, 3.0078125]}, "gradients/decoder.transformer.h.20.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 3.0, 2.0, 1.0, 4.0, 5.0, 8.0, 18.0, 38.0, 37.0, 42.0, 67.0, 96.0, 173.0, 204.0, 352.0, 635.0, 1100.0, 2250.0, 5073.0, 12518.0, 33915.0, 118664.0, 399620.0, 334746.0, 92111.0, 27570.0, 10341.0, 4366.0, 2011.0, 1058.0, 576.0, 304.0, 213.0, 151.0, 104.0, 62.0, 34.0, 30.0, 25.0, 15.0, 6.0, 9.0, 4.0, 4.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-4.1171875, -3.995758056640625, -3.87432861328125, -3.752899169921875, -3.6314697265625, -3.510040283203125, -3.38861083984375, -3.267181396484375, -3.145751953125, -3.024322509765625, -2.90289306640625, -2.781463623046875, -2.6600341796875, -2.538604736328125, -2.41717529296875, -2.295745849609375, -2.17431640625, -2.052886962890625, -1.93145751953125, -1.810028076171875, -1.6885986328125, -1.567169189453125, -1.44573974609375, -1.324310302734375, -1.202880859375, -1.081451416015625, -0.96002197265625, -0.838592529296875, -0.7171630859375, -0.595733642578125, -0.47430419921875, -0.352874755859375, -0.2314453125, -0.110015869140625, 0.01141357421875, 0.132843017578125, 0.2542724609375, 0.375701904296875, 0.49713134765625, 0.618560791015625, 0.739990234375, 0.861419677734375, 0.98284912109375, 1.104278564453125, 1.2257080078125, 1.347137451171875, 1.46856689453125, 1.589996337890625, 1.71142578125, 1.832855224609375, 1.95428466796875, 2.075714111328125, 2.1971435546875, 2.318572998046875, 2.44000244140625, 2.561431884765625, 2.682861328125, 2.804290771484375, 2.92572021484375, 3.047149658203125, 3.1685791015625, 3.290008544921875, 3.41143798828125, 3.532867431640625, 3.654296875]}, "gradients/decoder.transformer.h.20.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 4.0, 2.0, 3.0, 7.0, 5.0, 6.0, 3.0, 15.0, 7.0, 17.0, 16.0, 19.0, 21.0, 25.0, 33.0, 34.0, 41.0, 45.0, 46.0, 37.0, 64.0, 140.0, 427.0, 1454.0, 132.0, 51.0, 50.0, 48.0, 39.0, 40.0, 34.0, 34.0, 26.0, 17.0, 23.0, 23.0, 11.0, 8.0, 10.0, 5.0, 13.0, 3.0, 5.0, 6.0, 5.0, 4.0, 5.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-9.828125, -9.5006103515625, -9.173095703125, -8.8455810546875, -8.51806640625, -8.1905517578125, -7.863037109375, -7.5355224609375, -7.2080078125, -6.8804931640625, -6.552978515625, -6.2254638671875, -5.89794921875, -5.5704345703125, -5.242919921875, -4.9154052734375, -4.587890625, -4.2603759765625, -3.932861328125, -3.6053466796875, -3.27783203125, -2.9503173828125, -2.622802734375, -2.2952880859375, -1.9677734375, -1.6402587890625, -1.312744140625, -0.9852294921875, -0.65771484375, -0.3302001953125, -0.002685546875, 0.3248291015625, 0.65234375, 0.9798583984375, 1.307373046875, 1.6348876953125, 1.96240234375, 2.2899169921875, 2.617431640625, 2.9449462890625, 3.2724609375, 3.5999755859375, 3.927490234375, 4.2550048828125, 4.58251953125, 4.9100341796875, 5.237548828125, 5.5650634765625, 5.892578125, 6.2200927734375, 6.547607421875, 6.8751220703125, 7.20263671875, 7.5301513671875, 7.857666015625, 8.1851806640625, 8.5126953125, 8.8402099609375, 9.167724609375, 9.4952392578125, 9.82275390625, 10.1502685546875, 10.477783203125, 10.8052978515625, 11.1328125]}, "gradients/decoder.transformer.h.20.attn.c_attn.weight": {"_type": "histogram", "values": [3.0, 1.0, 2.0, 2.0, 2.0, 6.0, 7.0, 2.0, 5.0, 4.0, 6.0, 11.0, 7.0, 6.0, 11.0, 13.0, 21.0, 26.0, 32.0, 23.0, 39.0, 62.0, 76.0, 137.0, 207.0, 337.0, 811.0, 2456.0, 14411.0, 259293.0, 2797555.0, 60917.0, 6415.0, 1446.0, 560.0, 258.0, 151.0, 106.0, 53.0, 49.0, 37.0, 23.0, 31.0, 20.0, 16.0, 12.0, 14.0, 7.0, 6.0, 4.0, 4.0, 9.0, 3.0, 3.0, 3.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 2.0], "bins": [-13.6484375, -13.197998046875, -12.74755859375, -12.297119140625, -11.8466796875, -11.396240234375, -10.94580078125, -10.495361328125, -10.044921875, -9.594482421875, -9.14404296875, -8.693603515625, -8.2431640625, -7.792724609375, -7.34228515625, -6.891845703125, -6.44140625, -5.990966796875, -5.54052734375, -5.090087890625, -4.6396484375, -4.189208984375, -3.73876953125, -3.288330078125, -2.837890625, -2.387451171875, -1.93701171875, -1.486572265625, -1.0361328125, -0.585693359375, -0.13525390625, 0.315185546875, 0.765625, 1.216064453125, 1.66650390625, 2.116943359375, 2.5673828125, 3.017822265625, 3.46826171875, 3.918701171875, 4.369140625, 4.819580078125, 5.27001953125, 5.720458984375, 6.1708984375, 6.621337890625, 7.07177734375, 7.522216796875, 7.97265625, 8.423095703125, 8.87353515625, 9.323974609375, 9.7744140625, 10.224853515625, 10.67529296875, 11.125732421875, 11.576171875, 12.026611328125, 12.47705078125, 12.927490234375, 13.3779296875, 13.828369140625, 14.27880859375, 14.729248046875, 15.1796875]}, "gradients/decoder.transformer.h.20.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 10.0, 92.0, 436.0, 393.0, 79.0, 6.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-81.15377807617188, -78.89427185058594, -76.63475799560547, -74.37525177001953, -72.11573791503906, -69.85623168945312, -67.59671783447266, -65.33721160888672, -63.07769775390625, -60.81818771362305, -58.558677673339844, -56.29916763305664, -54.03965759277344, -51.780147552490234, -49.52063751220703, -47.261131286621094, -45.00162124633789, -42.74211120605469, -40.482601165771484, -38.22309112548828, -35.96358108520508, -33.704071044921875, -31.444562911987305, -29.1850528717041, -26.9255428314209, -24.666032791137695, -22.406522750854492, -20.147014617919922, -17.88750457763672, -15.6279935836792, -13.368484497070312, -11.10897445678711, -8.849468231201172, -6.589958190917969, -4.330448627471924, -2.070939064025879, 0.18857097625732422, 2.4480810165405273, 4.707590103149414, 6.967100143432617, 9.22661018371582, 11.486120223999023, 13.745630264282227, 16.005138397216797, 18.2646484375, 20.524158477783203, 22.783668518066406, 25.04317855834961, 27.302688598632812, 29.562198638916016, 31.82170867919922, 34.08121871948242, 36.340728759765625, 38.60023880004883, 40.85974884033203, 43.11925506591797, 45.37876892089844, 47.63827896118164, 49.897789001464844, 52.15729904174805, 54.41680908203125, 56.67631912231445, 58.935829162597656, 61.195335388183594, 63.4548454284668]}, "gradients/decoder.transformer.h.20.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 6.0, 1.0, 1.0, 5.0, 5.0, 2.0, 4.0, 10.0, 8.0, 11.0, 12.0, 20.0, 20.0, 20.0, 20.0, 22.0, 26.0, 25.0, 27.0, 41.0, 32.0, 26.0, 41.0, 42.0, 33.0, 44.0, 47.0, 47.0, 53.0, 40.0, 27.0, 38.0, 41.0, 25.0, 26.0, 28.0, 19.0, 21.0, 17.0, 10.0, 16.0, 14.0, 8.0, 5.0, 7.0, 8.0, 5.0, 4.0, 4.0, 2.0, 1.0, 1.0, 1.0, 1.0], "bins": [-39.18626403808594, -38.09932327270508, -37.01238250732422, -35.92544174194336, -34.8385009765625, -33.75156021118164, -32.66461944580078, -31.577678680419922, -30.490737915039062, -29.403797149658203, -28.316856384277344, -27.229915618896484, -26.142974853515625, -25.056034088134766, -23.969093322753906, -22.882152557373047, -21.795211791992188, -20.708271026611328, -19.62133026123047, -18.53438949584961, -17.44744873046875, -16.36050796508789, -15.273567199707031, -14.186626434326172, -13.099685668945312, -12.012744903564453, -10.925804138183594, -9.838863372802734, -8.751922607421875, -7.664981842041016, -6.578041076660156, -5.491100311279297, -4.4041595458984375, -3.317218780517578, -2.2302780151367188, -1.1433372497558594, -0.056396484375, 1.0305442810058594, 2.1174850463867188, 3.204425811767578, 4.2913665771484375, 5.378307342529297, 6.465248107910156, 7.552188873291016, 8.639129638671875, 9.726070404052734, 10.813011169433594, 11.899951934814453, 12.986892700195312, 14.073833465576172, 15.160774230957031, 16.24771499633789, 17.33465576171875, 18.42159652709961, 19.50853729248047, 20.595478057861328, 21.682418823242188, 22.769359588623047, 23.856300354003906, 24.943241119384766, 26.030181884765625, 27.117122650146484, 28.204063415527344, 29.291004180908203, 30.377944946289062]}, "gradients/decoder.transformer.h.19.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 3.0, 1.0, 5.0, 3.0, 7.0, 8.0, 18.0, 20.0, 15.0, 23.0, 27.0, 18.0, 34.0, 25.0, 36.0, 34.0, 37.0, 43.0, 40.0, 51.0, 41.0, 54.0, 55.0, 45.0, 50.0, 47.0, 33.0, 30.0, 34.0, 25.0, 24.0, 24.0, 26.0, 13.0, 17.0, 11.0, 13.0, 5.0, 5.0, 5.0, 2.0, 5.0, 4.0, 2.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0], "bins": [-3.80859375, -3.700408935546875, -3.59222412109375, -3.484039306640625, -3.3758544921875, -3.267669677734375, -3.15948486328125, -3.051300048828125, -2.943115234375, -2.834930419921875, -2.72674560546875, -2.618560791015625, -2.5103759765625, -2.402191162109375, -2.29400634765625, -2.185821533203125, -2.07763671875, -1.969451904296875, -1.86126708984375, -1.753082275390625, -1.6448974609375, -1.536712646484375, -1.42852783203125, -1.320343017578125, -1.212158203125, -1.103973388671875, -0.99578857421875, -0.887603759765625, -0.7794189453125, -0.671234130859375, -0.56304931640625, -0.454864501953125, -0.3466796875, -0.238494873046875, -0.13031005859375, -0.022125244140625, 0.0860595703125, 0.194244384765625, 0.30242919921875, 0.410614013671875, 0.518798828125, 0.626983642578125, 0.73516845703125, 0.843353271484375, 0.9515380859375, 1.059722900390625, 1.16790771484375, 1.276092529296875, 1.38427734375, 1.492462158203125, 1.60064697265625, 1.708831787109375, 1.8170166015625, 1.925201416015625, 2.03338623046875, 2.141571044921875, 2.249755859375, 2.357940673828125, 2.46612548828125, 2.574310302734375, 2.6824951171875, 2.790679931640625, 2.89886474609375, 3.007049560546875, 3.115234375]}, "gradients/decoder.transformer.h.19.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 3.0, 4.0, 3.0, 7.0, 4.0, 12.0, 11.0, 13.0, 24.0, 29.0, 52.0, 67.0, 113.0, 178.0, 342.0, 766.0, 1885.0, 5375.0, 18127.0, 81319.0, 568141.0, 2542198.0, 829272.0, 112520.0, 23405.0, 6571.0, 2168.0, 809.0, 390.0, 181.0, 102.0, 57.0, 38.0, 27.0, 18.0, 24.0, 12.0, 5.0, 7.0, 5.0, 3.0, 1.0, 5.0, 0.0, 3.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-8.546875, -8.29241943359375, -8.0379638671875, -7.78350830078125, -7.529052734375, -7.27459716796875, -7.0201416015625, -6.76568603515625, -6.51123046875, -6.25677490234375, -6.0023193359375, -5.74786376953125, -5.493408203125, -5.23895263671875, -4.9844970703125, -4.73004150390625, -4.4755859375, -4.22113037109375, -3.9666748046875, -3.71221923828125, -3.457763671875, -3.20330810546875, -2.9488525390625, -2.69439697265625, -2.43994140625, -2.18548583984375, -1.9310302734375, -1.67657470703125, -1.422119140625, -1.16766357421875, -0.9132080078125, -0.65875244140625, -0.404296875, -0.14984130859375, 0.1046142578125, 0.35906982421875, 0.613525390625, 0.86798095703125, 1.1224365234375, 1.37689208984375, 1.63134765625, 1.88580322265625, 2.1402587890625, 2.39471435546875, 2.649169921875, 2.90362548828125, 3.1580810546875, 3.41253662109375, 3.6669921875, 3.92144775390625, 4.1759033203125, 4.43035888671875, 4.684814453125, 4.93927001953125, 5.1937255859375, 5.44818115234375, 5.70263671875, 5.95709228515625, 6.2115478515625, 6.46600341796875, 6.720458984375, 6.97491455078125, 7.2293701171875, 7.48382568359375, 7.73828125]}, "gradients/decoder.transformer.h.19.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 3.0, 6.0, 2.0, 1.0, 6.0, 5.0, 6.0, 13.0, 19.0, 23.0, 19.0, 33.0, 42.0, 52.0, 61.0, 97.0, 137.0, 163.0, 248.0, 342.0, 453.0, 488.0, 448.0, 393.0, 289.0, 224.0, 127.0, 102.0, 76.0, 46.0, 42.0, 32.0, 22.0, 19.0, 11.0, 8.0, 3.0, 8.0, 4.0, 7.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0], "bins": [-9.421875, -9.1494140625, -8.876953125, -8.6044921875, -8.33203125, -8.0595703125, -7.787109375, -7.5146484375, -7.2421875, -6.9697265625, -6.697265625, -6.4248046875, -6.15234375, -5.8798828125, -5.607421875, -5.3349609375, -5.0625, -4.7900390625, -4.517578125, -4.2451171875, -3.97265625, -3.7001953125, -3.427734375, -3.1552734375, -2.8828125, -2.6103515625, -2.337890625, -2.0654296875, -1.79296875, -1.5205078125, -1.248046875, -0.9755859375, -0.703125, -0.4306640625, -0.158203125, 0.1142578125, 0.38671875, 0.6591796875, 0.931640625, 1.2041015625, 1.4765625, 1.7490234375, 2.021484375, 2.2939453125, 2.56640625, 2.8388671875, 3.111328125, 3.3837890625, 3.65625, 3.9287109375, 4.201171875, 4.4736328125, 4.74609375, 5.0185546875, 5.291015625, 5.5634765625, 5.8359375, 6.1083984375, 6.380859375, 6.6533203125, 6.92578125, 7.1982421875, 7.470703125, 7.7431640625, 8.015625]}, "gradients/decoder.transformer.h.19.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 1.0, 2.0, 1.0, 0.0, 4.0, 0.0, 0.0, 4.0, 9.0, 11.0, 16.0, 14.0, 29.0, 60.0, 91.0, 111.0, 191.0, 344.0, 584.0, 1128.0, 2362.0, 5261.0, 14202.0, 46823.0, 211341.0, 1209604.0, 2137251.0, 440988.0, 86425.0, 22942.0, 7789.0, 3263.0, 1562.0, 813.0, 428.0, 236.0, 136.0, 107.0, 59.0, 25.0, 26.0, 17.0, 12.0, 7.0, 8.0, 2.0, 5.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-12.171875, -11.805419921875, -11.43896484375, -11.072509765625, -10.7060546875, -10.339599609375, -9.97314453125, -9.606689453125, -9.240234375, -8.873779296875, -8.50732421875, -8.140869140625, -7.7744140625, -7.407958984375, -7.04150390625, -6.675048828125, -6.30859375, -5.942138671875, -5.57568359375, -5.209228515625, -4.8427734375, -4.476318359375, -4.10986328125, -3.743408203125, -3.376953125, -3.010498046875, -2.64404296875, -2.277587890625, -1.9111328125, -1.544677734375, -1.17822265625, -0.811767578125, -0.4453125, -0.078857421875, 0.28759765625, 0.654052734375, 1.0205078125, 1.386962890625, 1.75341796875, 2.119873046875, 2.486328125, 2.852783203125, 3.21923828125, 3.585693359375, 3.9521484375, 4.318603515625, 4.68505859375, 5.051513671875, 5.41796875, 5.784423828125, 6.15087890625, 6.517333984375, 6.8837890625, 7.250244140625, 7.61669921875, 7.983154296875, 8.349609375, 8.716064453125, 9.08251953125, 9.448974609375, 9.8154296875, 10.181884765625, 10.54833984375, 10.914794921875, 11.28125]}, "gradients/decoder.transformer.h.19.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 2.0, 3.0, 19.0, 64.0, 138.0, 237.0, 266.0, 175.0, 73.0, 22.0, 13.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-172.69281005859375, -169.1873321533203, -165.68185424804688, -162.1763916015625, -158.67091369628906, -155.16543579101562, -151.6599578857422, -148.15447998046875, -144.6490020751953, -141.14352416992188, -137.63804626464844, -134.132568359375, -130.62710571289062, -127.12162780761719, -123.61614990234375, -120.11067199707031, -116.6052017211914, -113.09972381591797, -109.59425354003906, -106.08877563476562, -102.58329772949219, -99.07781982421875, -95.57234954833984, -92.0668716430664, -88.5614013671875, -85.05592346191406, -81.55045318603516, -78.04497528076172, -74.53949737548828, -71.03402709960938, -67.52854919433594, -64.0230712890625, -60.51759719848633, -57.012123107910156, -53.50664520263672, -50.00117111206055, -46.495697021484375, -42.99021911621094, -39.484745025634766, -35.979270935058594, -32.473793029785156, -28.96831703186035, -25.462841033935547, -21.957366943359375, -18.45189094543457, -14.946414947509766, -11.440940856933594, -7.935464859008789, -4.429988861083984, -0.9245133399963379, 2.5809621810913086, 6.086437225341797, 9.591913223266602, 13.097389221191406, 16.602863311767578, 20.108339309692383, 23.613815307617188, 27.119291305541992, 30.624767303466797, 34.13024139404297, 37.635719299316406, 41.14119338989258, 44.64666748046875, 48.15214538574219, 51.65761947631836]}, "gradients/decoder.transformer.h.19.ln_2.bias": {"_type": "histogram", "values": [1.0, 3.0, 0.0, 0.0, 2.0, 1.0, 5.0, 3.0, 4.0, 1.0, 8.0, 3.0, 4.0, 12.0, 11.0, 10.0, 19.0, 19.0, 16.0, 19.0, 27.0, 14.0, 22.0, 23.0, 28.0, 21.0, 25.0, 30.0, 25.0, 30.0, 38.0, 40.0, 40.0, 40.0, 33.0, 46.0, 29.0, 42.0, 26.0, 34.0, 20.0, 29.0, 31.0, 19.0, 23.0, 23.0, 18.0, 16.0, 12.0, 6.0, 6.0, 7.0, 10.0, 12.0, 8.0, 8.0, 5.0, 3.0, 7.0, 2.0, 2.0, 0.0, 1.0, 2.0], "bins": [-26.002193450927734, -25.206300735473633, -24.41040802001953, -23.61451530456543, -22.818622589111328, -22.022727966308594, -21.226837158203125, -20.43094253540039, -19.63504981994629, -18.839157104492188, -18.043264389038086, -17.247371673583984, -16.451478958129883, -15.655585289001465, -14.859692573547363, -14.063798904418945, -13.26790714263916, -12.472014427185059, -11.676121711730957, -10.880228042602539, -10.084335327148438, -9.288442611694336, -8.492549896240234, -7.696656703948975, -6.900763988494873, -6.1048712730407715, -5.308978080749512, -4.51308536529541, -3.7171924114227295, -2.921299457550049, -2.1254067420959473, -1.3295135498046875, -0.5336208343505859, 0.26227205991744995, 1.0581649541854858, 1.854057788848877, 2.6499507427215576, 3.4458436965942383, 4.24173641204834, 5.0376296043396, 5.833522319793701, 6.629415035247803, 7.4253082275390625, 8.221200942993164, 9.017093658447266, 9.812986373901367, 10.608879089355469, 11.404772758483887, 12.200665473937988, 12.99655818939209, 13.792450904846191, 14.58834457397461, 15.384237289428711, 16.180130004882812, 16.976022720336914, 17.771915435791016, 18.567808151245117, 19.36370086669922, 20.15959358215332, 20.955486297607422, 21.751379013061523, 22.547271728515625, 23.34316635131836, 24.13905906677246, 24.934951782226562]}, "gradients/decoder.transformer.h.19.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 4.0, 0.0, 8.0, 10.0, 8.0, 9.0, 12.0, 18.0, 22.0, 26.0, 20.0, 26.0, 34.0, 29.0, 38.0, 40.0, 39.0, 43.0, 48.0, 48.0, 49.0, 49.0, 52.0, 41.0, 58.0, 27.0, 27.0, 40.0, 30.0, 24.0, 25.0, 23.0, 19.0, 15.0, 10.0, 14.0, 10.0, 5.0, 5.0, 6.0, 3.0, 3.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.87109375, -3.7589111328125, -3.646728515625, -3.5345458984375, -3.42236328125, -3.3101806640625, -3.197998046875, -3.0858154296875, -2.9736328125, -2.8614501953125, -2.749267578125, -2.6370849609375, -2.52490234375, -2.4127197265625, -2.300537109375, -2.1883544921875, -2.076171875, -1.9639892578125, -1.851806640625, -1.7396240234375, -1.62744140625, -1.5152587890625, -1.403076171875, -1.2908935546875, -1.1787109375, -1.0665283203125, -0.954345703125, -0.8421630859375, -0.72998046875, -0.6177978515625, -0.505615234375, -0.3934326171875, -0.28125, -0.1690673828125, -0.056884765625, 0.0552978515625, 0.16748046875, 0.2796630859375, 0.391845703125, 0.5040283203125, 0.6162109375, 0.7283935546875, 0.840576171875, 0.9527587890625, 1.06494140625, 1.1771240234375, 1.289306640625, 1.4014892578125, 1.513671875, 1.6258544921875, 1.738037109375, 1.8502197265625, 1.96240234375, 2.0745849609375, 2.186767578125, 2.2989501953125, 2.4111328125, 2.5233154296875, 2.635498046875, 2.7476806640625, 2.85986328125, 2.9720458984375, 3.084228515625, 3.1964111328125, 3.30859375]}, "gradients/decoder.transformer.h.19.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 3.0, 0.0, 3.0, 2.0, 1.0, 6.0, 15.0, 11.0, 19.0, 32.0, 42.0, 64.0, 115.0, 177.0, 282.0, 470.0, 737.0, 1375.0, 2259.0, 3965.0, 7050.0, 12266.0, 21702.0, 37622.0, 65142.0, 107675.0, 158843.0, 192413.0, 163119.0, 111514.0, 68401.0, 40253.0, 22793.0, 12801.0, 7332.0, 4176.0, 2432.0, 1366.0, 812.0, 488.0, 304.0, 184.0, 110.0, 63.0, 40.0, 33.0, 15.0, 10.0, 11.0, 5.0, 5.0, 5.0, 0.0, 2.0, 3.0, 3.0, 2.0], "bins": [-0.5185546875, -0.5035438537597656, -0.48853302001953125, -0.4735221862792969, -0.4585113525390625, -0.4435005187988281, -0.42848968505859375, -0.4134788513183594, -0.398468017578125, -0.3834571838378906, -0.36844635009765625, -0.3534355163574219, -0.3384246826171875, -0.3234138488769531, -0.30840301513671875, -0.2933921813964844, -0.27838134765625, -0.2633705139160156, -0.24835968017578125, -0.23334884643554688, -0.2183380126953125, -0.20332717895507812, -0.18831634521484375, -0.17330551147460938, -0.158294677734375, -0.14328384399414062, -0.12827301025390625, -0.11326217651367188, -0.0982513427734375, -0.08324050903320312, -0.06822967529296875, -0.053218841552734375, -0.0382080078125, -0.023197174072265625, -0.00818634033203125, 0.006824493408203125, 0.0218353271484375, 0.036846160888671875, 0.05185699462890625, 0.06686782836914062, 0.081878662109375, 0.09688949584960938, 0.11190032958984375, 0.12691116333007812, 0.1419219970703125, 0.15693283081054688, 0.17194366455078125, 0.18695449829101562, 0.20196533203125, 0.21697616577148438, 0.23198699951171875, 0.24699783325195312, 0.2620086669921875, 0.2770195007324219, 0.29203033447265625, 0.3070411682128906, 0.322052001953125, 0.3370628356933594, 0.35207366943359375, 0.3670845031738281, 0.3820953369140625, 0.3971061706542969, 0.41211700439453125, 0.4271278381347656, 0.442138671875]}, "gradients/decoder.transformer.h.19.crossattention.c_attn.bias": {"_type": "histogram", "values": [3.0, 0.0, 1.0, 1.0, 3.0, 1.0, 0.0, 1.0, 3.0, 7.0, 1.0, 5.0, 10.0, 5.0, 14.0, 22.0, 8.0, 26.0, 21.0, 25.0, 30.0, 17.0, 26.0, 26.0, 24.0, 41.0, 34.0, 39.0, 46.0, 42.0, 1047.0, 41.0, 32.0, 35.0, 35.0, 53.0, 34.0, 31.0, 33.0, 35.0, 28.0, 18.0, 20.0, 24.0, 19.0, 15.0, 14.0, 8.0, 8.0, 8.0, 9.0, 3.0, 4.0, 5.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 3.0, 0.0, 0.0, 1.0], "bins": [-2.015625, -1.950164794921875, -1.88470458984375, -1.819244384765625, -1.7537841796875, -1.688323974609375, -1.62286376953125, -1.557403564453125, -1.491943359375, -1.426483154296875, -1.36102294921875, -1.295562744140625, -1.2301025390625, -1.164642333984375, -1.09918212890625, -1.033721923828125, -0.96826171875, -0.902801513671875, -0.83734130859375, -0.771881103515625, -0.7064208984375, -0.640960693359375, -0.57550048828125, -0.510040283203125, -0.444580078125, -0.379119873046875, -0.31365966796875, -0.248199462890625, -0.1827392578125, -0.117279052734375, -0.05181884765625, 0.013641357421875, 0.0791015625, 0.144561767578125, 0.21002197265625, 0.275482177734375, 0.3409423828125, 0.406402587890625, 0.47186279296875, 0.537322998046875, 0.602783203125, 0.668243408203125, 0.73370361328125, 0.799163818359375, 0.8646240234375, 0.930084228515625, 0.99554443359375, 1.061004638671875, 1.12646484375, 1.191925048828125, 1.25738525390625, 1.322845458984375, 1.3883056640625, 1.453765869140625, 1.51922607421875, 1.584686279296875, 1.650146484375, 1.715606689453125, 1.78106689453125, 1.846527099609375, 1.9119873046875, 1.977447509765625, 2.04290771484375, 2.108367919921875, 2.173828125]}, "gradients/decoder.transformer.h.19.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 3.0, 3.0, 2.0, 8.0, 12.0, 13.0, 21.0, 39.0, 42.0, 48.0, 106.0, 138.0, 221.0, 300.0, 445.0, 705.0, 1062.0, 1548.0, 2519.0, 4005.0, 6253.0, 9801.0, 15559.0, 25212.0, 41056.0, 64802.0, 98179.0, 139364.0, 1200802.0, 164308.0, 113327.0, 76461.0, 48953.0, 30737.0, 18894.0, 11689.0, 7322.0, 4758.0, 2861.0, 1896.0, 1161.0, 859.0, 561.0, 389.0, 234.0, 157.0, 100.0, 73.0, 45.0, 29.0, 19.0, 17.0, 14.0, 5.0, 5.0, 2.0, 3.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-0.26318359375, -0.254608154296875, -0.24603271484375, -0.237457275390625, -0.2288818359375, -0.220306396484375, -0.21173095703125, -0.203155517578125, -0.194580078125, -0.186004638671875, -0.17742919921875, -0.168853759765625, -0.1602783203125, -0.151702880859375, -0.14312744140625, -0.134552001953125, -0.1259765625, -0.117401123046875, -0.10882568359375, -0.100250244140625, -0.0916748046875, -0.083099365234375, -0.07452392578125, -0.065948486328125, -0.057373046875, -0.048797607421875, -0.04022216796875, -0.031646728515625, -0.0230712890625, -0.014495849609375, -0.00592041015625, 0.002655029296875, 0.01123046875, 0.019805908203125, 0.02838134765625, 0.036956787109375, 0.0455322265625, 0.054107666015625, 0.06268310546875, 0.071258544921875, 0.079833984375, 0.088409423828125, 0.09698486328125, 0.105560302734375, 0.1141357421875, 0.122711181640625, 0.13128662109375, 0.139862060546875, 0.1484375, 0.157012939453125, 0.16558837890625, 0.174163818359375, 0.1827392578125, 0.191314697265625, 0.19989013671875, 0.208465576171875, 0.217041015625, 0.225616455078125, 0.23419189453125, 0.242767333984375, 0.2513427734375, 0.259918212890625, 0.26849365234375, 0.277069091796875, 0.28564453125]}, "gradients/decoder.transformer.h.19.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 2.0, 1.0, 1.0, 3.0, 7.0, 4.0, 3.0, 5.0, 7.0, 5.0, 14.0, 12.0, 20.0, 21.0, 24.0, 41.0, 37.0, 55.0, 65.0, 78.0, 125.0, 120.0, 92.0, 62.0, 32.0, 41.0, 29.0, 12.0, 15.0, 20.0, 20.0, 9.0, 10.0, 4.0, 4.0, 6.0, 3.0, 1.0, 1.0, 2.0, 1.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0138702392578125, -0.013421177864074707, -0.012972116470336914, -0.012523055076599121, -0.012073993682861328, -0.011624932289123535, -0.011175870895385742, -0.01072680950164795, -0.010277748107910156, -0.009828686714172363, -0.00937962532043457, -0.008930563926696777, -0.008481502532958984, -0.008032441139221191, -0.0075833797454833984, -0.0071343183517456055, -0.0066852569580078125, -0.0062361955642700195, -0.0057871341705322266, -0.005338072776794434, -0.004889011383056641, -0.004439949989318848, -0.003990888595581055, -0.0035418272018432617, -0.0030927658081054688, -0.0026437044143676758, -0.002194643020629883, -0.0017455816268920898, -0.0012965202331542969, -0.0008474588394165039, -0.00039839744567871094, 5.066394805908203e-05, 0.000499725341796875, 0.000948786735534668, 0.001397848129272461, 0.001846909523010254, 0.002295970916748047, 0.00274503231048584, 0.003194093704223633, 0.0036431550979614258, 0.004092216491699219, 0.004541277885437012, 0.004990339279174805, 0.005439400672912598, 0.005888462066650391, 0.006337523460388184, 0.0067865848541259766, 0.0072356462478637695, 0.0076847076416015625, 0.008133769035339355, 0.008582830429077148, 0.009031891822814941, 0.009480953216552734, 0.009930014610290527, 0.01037907600402832, 0.010828137397766113, 0.011277198791503906, 0.0117262601852417, 0.012175321578979492, 0.012624382972717285, 0.013073444366455078, 0.013522505760192871, 0.013971567153930664, 0.014420628547668457, 0.01486968994140625]}, "gradients/decoder.transformer.h.19.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 1.0, 1.0, 3.0, 3.0, 3.0, 4.0, 6.0, 8.0, 9.0, 15.0, 25.0, 36.0, 40.0, 41.0, 54.0, 107.0, 136.0, 241.0, 342.0, 953.0, 52798.0, 989815.0, 2631.0, 465.0, 255.0, 147.0, 123.0, 89.0, 59.0, 46.0, 24.0, 25.0, 13.0, 10.0, 9.0, 3.0, 9.0, 5.0, 6.0, 0.0, 1.0, 4.0, 2.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.2305908203125, -0.2236480712890625, -0.216705322265625, -0.2097625732421875, -0.20281982421875, -0.1958770751953125, -0.188934326171875, -0.1819915771484375, -0.175048828125, -0.1681060791015625, -0.161163330078125, -0.1542205810546875, -0.14727783203125, -0.1403350830078125, -0.133392333984375, -0.1264495849609375, -0.1195068359375, -0.1125640869140625, -0.105621337890625, -0.0986785888671875, -0.09173583984375, -0.0847930908203125, -0.077850341796875, -0.0709075927734375, -0.06396484375, -0.0570220947265625, -0.050079345703125, -0.0431365966796875, -0.03619384765625, -0.0292510986328125, -0.022308349609375, -0.0153656005859375, -0.0084228515625, -0.0014801025390625, 0.005462646484375, 0.0124053955078125, 0.01934814453125, 0.0262908935546875, 0.033233642578125, 0.0401763916015625, 0.047119140625, 0.0540618896484375, 0.061004638671875, 0.0679473876953125, 0.07489013671875, 0.0818328857421875, 0.088775634765625, 0.0957183837890625, 0.1026611328125, 0.1096038818359375, 0.116546630859375, 0.1234893798828125, 0.13043212890625, 0.1373748779296875, 0.144317626953125, 0.1512603759765625, 0.158203125, 0.1651458740234375, 0.172088623046875, 0.1790313720703125, 0.18597412109375, 0.1929168701171875, 0.199859619140625, 0.2068023681640625, 0.2137451171875]}, "gradients/decoder.transformer.h.19.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 61.0, 938.0, 16.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.13172505795955658, -0.1265614628791809, -0.12139785289764404, -0.11623425036668777, -0.1110706478357315, -0.10590704530477524, -0.10074344277381897, -0.0955798402428627, -0.09041623771190643, -0.08525263518095016, -0.0800890326499939, -0.07492543011903763, -0.06976182758808136, -0.06459822505712509, -0.05943462252616882, -0.054271019995212555, -0.04910741746425629, -0.04394381493330002, -0.03878021240234375, -0.03361660987138748, -0.028453007340431213, -0.023289404809474945, -0.018125802278518677, -0.012962199747562408, -0.00779859721660614, -0.002634994685649872, 0.0025286078453063965, 0.007692210376262665, 0.012855812907218933, 0.0180194154381752, 0.02318301796913147, 0.028346620500087738, 0.033510223031044006, 0.038673825562000275, 0.04383742809295654, 0.04900103062391281, 0.05416463315486908, 0.05932823568582535, 0.06449183821678162, 0.06965544074773788, 0.07481904327869415, 0.07998264580965042, 0.08514624834060669, 0.09030985087156296, 0.09547345340251923, 0.1006370559334755, 0.10580065846443176, 0.11096426099538803, 0.1161278635263443, 0.12129146605730057, 0.12645506858825684, 0.1316186785697937, 0.13678227365016937, 0.14194586873054504, 0.1471094787120819, 0.15227308869361877, 0.15743668377399445, 0.16260027885437012, 0.16776388883590698, 0.17292749881744385, 0.17809109389781952, 0.1832546889781952, 0.18841829895973206, 0.19358190894126892, 0.1987455040216446]}, "gradients/decoder.transformer.h.19.ln_cross_attn.bias": {"_type": "histogram", "values": [3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 2.0, 3.0, 2.0, 7.0, 6.0, 9.0, 7.0, 7.0, 11.0, 19.0, 18.0, 17.0, 30.0, 20.0, 19.0, 30.0, 41.0, 39.0, 24.0, 33.0, 36.0, 49.0, 43.0, 36.0, 37.0, 34.0, 39.0, 39.0, 36.0, 40.0, 40.0, 35.0, 28.0, 26.0, 26.0, 23.0, 14.0, 19.0, 11.0, 8.0, 14.0, 4.0, 8.0, 8.0, 4.0, 5.0, 2.0, 0.0, 1.0, 0.0, 4.0, 1.0, 0.0, 1.0, 1.0], "bins": [-0.013681352138519287, -0.01326003111898899, -0.012838710099458694, -0.012417389079928398, -0.011996068060398102, -0.011574747040867805, -0.01115342602133751, -0.010732105001807213, -0.010310783982276917, -0.00988946296274662, -0.009468141943216324, -0.009046820923686028, -0.008625499904155731, -0.008204178884625435, -0.0077828578650951385, -0.007361536845564842, -0.006940215826034546, -0.00651889480650425, -0.006097573786973953, -0.005676252767443657, -0.005254931747913361, -0.004833610728383064, -0.004412289708852768, -0.003990968689322472, -0.0035696476697921753, -0.003148326650261879, -0.0027270056307315826, -0.0023056846112012863, -0.00188436359167099, -0.0014630425721406937, -0.0010417215526103973, -0.000620400533080101, -0.0001990795135498047, 0.00022224150598049164, 0.000643562525510788, 0.0010648835450410843, 0.0014862045645713806, 0.001907525584101677, 0.0023288466036319733, 0.0027501676231622696, 0.003171488642692566, 0.0035928096622228622, 0.004014130681753159, 0.004435451701283455, 0.004856772720813751, 0.0052780937403440475, 0.005699414759874344, 0.00612073577940464, 0.0065420567989349365, 0.006963377818465233, 0.007384698837995529, 0.0078060198575258255, 0.008227340877056122, 0.008648661896586418, 0.009069982916116714, 0.00949130393564701, 0.009912624955177307, 0.010333945974707603, 0.0107552669942379, 0.011176588013768196, 0.011597909033298492, 0.012019230052828789, 0.012440551072359085, 0.012861872091889381, 0.013283193111419678]}, "gradients/decoder.transformer.h.19.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 4.0, 0.0, 8.0, 10.0, 8.0, 9.0, 12.0, 18.0, 22.0, 26.0, 20.0, 26.0, 34.0, 29.0, 38.0, 40.0, 39.0, 43.0, 48.0, 47.0, 50.0, 49.0, 52.0, 41.0, 58.0, 27.0, 28.0, 39.0, 31.0, 23.0, 25.0, 23.0, 19.0, 15.0, 10.0, 14.0, 10.0, 5.0, 5.0, 6.0, 3.0, 3.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.87109375, -3.7589111328125, -3.646728515625, -3.5345458984375, -3.42236328125, -3.3101806640625, -3.197998046875, -3.0858154296875, -2.9736328125, -2.8614501953125, -2.749267578125, -2.6370849609375, -2.52490234375, -2.4127197265625, -2.300537109375, -2.1883544921875, -2.076171875, -1.9639892578125, -1.851806640625, -1.7396240234375, -1.62744140625, -1.5152587890625, -1.403076171875, -1.2908935546875, -1.1787109375, -1.0665283203125, -0.954345703125, -0.8421630859375, -0.72998046875, -0.6177978515625, -0.505615234375, -0.3934326171875, -0.28125, -0.1690673828125, -0.056884765625, 0.0552978515625, 0.16748046875, 0.2796630859375, 0.391845703125, 0.5040283203125, 0.6162109375, 0.7283935546875, 0.840576171875, 0.9527587890625, 1.06494140625, 1.1771240234375, 1.289306640625, 1.4014892578125, 1.513671875, 1.6258544921875, 1.738037109375, 1.8502197265625, 1.96240234375, 2.0745849609375, 2.186767578125, 2.2989501953125, 2.4111328125, 2.5233154296875, 2.635498046875, 2.7476806640625, 2.85986328125, 2.9720458984375, 3.084228515625, 3.1964111328125, 3.30859375]}, "gradients/decoder.transformer.h.19.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 3.0, 4.0, 0.0, 4.0, 7.0, 8.0, 18.0, 18.0, 27.0, 60.0, 64.0, 119.0, 205.0, 306.0, 530.0, 872.0, 1493.0, 2766.0, 5366.0, 10296.0, 20662.0, 44237.0, 99140.0, 224895.0, 312577.0, 177966.0, 77201.0, 34678.0, 16769.0, 8425.0, 4375.0, 2373.0, 1273.0, 713.0, 436.0, 242.0, 153.0, 87.0, 71.0, 39.0, 29.0, 18.0, 14.0, 7.0, 8.0, 5.0, 2.0, 6.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0], "bins": [-2.86328125, -2.765716552734375, -2.66815185546875, -2.570587158203125, -2.4730224609375, -2.375457763671875, -2.27789306640625, -2.180328369140625, -2.082763671875, -1.985198974609375, -1.88763427734375, -1.790069580078125, -1.6925048828125, -1.594940185546875, -1.49737548828125, -1.399810791015625, -1.30224609375, -1.204681396484375, -1.10711669921875, -1.009552001953125, -0.9119873046875, -0.814422607421875, -0.71685791015625, -0.619293212890625, -0.521728515625, -0.424163818359375, -0.32659912109375, -0.229034423828125, -0.1314697265625, -0.033905029296875, 0.06365966796875, 0.161224365234375, 0.2587890625, 0.356353759765625, 0.45391845703125, 0.551483154296875, 0.6490478515625, 0.746612548828125, 0.84417724609375, 0.941741943359375, 1.039306640625, 1.136871337890625, 1.23443603515625, 1.332000732421875, 1.4295654296875, 1.527130126953125, 1.62469482421875, 1.722259521484375, 1.81982421875, 1.917388916015625, 2.01495361328125, 2.112518310546875, 2.2100830078125, 2.307647705078125, 2.40521240234375, 2.502777099609375, 2.600341796875, 2.697906494140625, 2.79547119140625, 2.893035888671875, 2.9906005859375, 3.088165283203125, 3.18572998046875, 3.283294677734375, 3.380859375]}, "gradients/decoder.transformer.h.19.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 0.0, 0.0, 0.0, 5.0, 5.0, 3.0, 8.0, 10.0, 14.0, 14.0, 14.0, 17.0, 20.0, 19.0, 23.0, 36.0, 37.0, 43.0, 60.0, 77.0, 91.0, 140.0, 320.0, 1450.0, 169.0, 94.0, 63.0, 50.0, 40.0, 34.0, 42.0, 20.0, 27.0, 15.0, 20.0, 14.0, 20.0, 16.0, 4.0, 7.0, 5.0, 7.0, 1.0, 2.0, 2.0, 1.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-12.703125, -12.3079833984375, -11.912841796875, -11.5177001953125, -11.12255859375, -10.7274169921875, -10.332275390625, -9.9371337890625, -9.5419921875, -9.1468505859375, -8.751708984375, -8.3565673828125, -7.96142578125, -7.5662841796875, -7.171142578125, -6.7760009765625, -6.380859375, -5.9857177734375, -5.590576171875, -5.1954345703125, -4.80029296875, -4.4051513671875, -4.010009765625, -3.6148681640625, -3.2197265625, -2.8245849609375, -2.429443359375, -2.0343017578125, -1.63916015625, -1.2440185546875, -0.848876953125, -0.4537353515625, -0.05859375, 0.3365478515625, 0.731689453125, 1.1268310546875, 1.52197265625, 1.9171142578125, 2.312255859375, 2.7073974609375, 3.1025390625, 3.4976806640625, 3.892822265625, 4.2879638671875, 4.68310546875, 5.0782470703125, 5.473388671875, 5.8685302734375, 6.263671875, 6.6588134765625, 7.053955078125, 7.4490966796875, 7.84423828125, 8.2393798828125, 8.634521484375, 9.0296630859375, 9.4248046875, 9.8199462890625, 10.215087890625, 10.6102294921875, 11.00537109375, 11.4005126953125, 11.795654296875, 12.1907958984375, 12.5859375]}, "gradients/decoder.transformer.h.19.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 2.0, 0.0, 7.0, 4.0, 6.0, 4.0, 15.0, 15.0, 11.0, 20.0, 21.0, 27.0, 34.0, 51.0, 83.0, 95.0, 172.0, 375.0, 915.0, 3536.0, 41487.0, 2582836.0, 500186.0, 12493.0, 1925.0, 587.0, 271.0, 164.0, 108.0, 70.0, 39.0, 35.0, 24.0, 16.0, 23.0, 14.0, 15.0, 7.0, 7.0, 6.0, 3.0, 0.0, 5.0, 2.0, 1.0, 3.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-20.84375, -20.22705078125, -19.6103515625, -18.99365234375, -18.376953125, -17.76025390625, -17.1435546875, -16.52685546875, -15.91015625, -15.29345703125, -14.6767578125, -14.06005859375, -13.443359375, -12.82666015625, -12.2099609375, -11.59326171875, -10.9765625, -10.35986328125, -9.7431640625, -9.12646484375, -8.509765625, -7.89306640625, -7.2763671875, -6.65966796875, -6.04296875, -5.42626953125, -4.8095703125, -4.19287109375, -3.576171875, -2.95947265625, -2.3427734375, -1.72607421875, -1.109375, -0.49267578125, 0.1240234375, 0.74072265625, 1.357421875, 1.97412109375, 2.5908203125, 3.20751953125, 3.82421875, 4.44091796875, 5.0576171875, 5.67431640625, 6.291015625, 6.90771484375, 7.5244140625, 8.14111328125, 8.7578125, 9.37451171875, 9.9912109375, 10.60791015625, 11.224609375, 11.84130859375, 12.4580078125, 13.07470703125, 13.69140625, 14.30810546875, 14.9248046875, 15.54150390625, 16.158203125, 16.77490234375, 17.3916015625, 18.00830078125, 18.625]}, "gradients/decoder.transformer.h.19.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 3.0, 4.0, 17.0, 53.0, 184.0, 286.0, 275.0, 148.0, 34.0, 11.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-17.094955444335938, -15.357843399047852, -13.620732307434082, -11.883620262145996, -10.146509170532227, -8.40939712524414, -6.672285079956055, -4.935173988342285, -3.198061943054199, -1.460950255393982, 0.27616143226623535, 2.013273239135742, 3.75038480758667, 5.487496376037598, 7.224608421325684, 8.961719512939453, 10.698831558227539, 12.435943603515625, 14.173054695129395, 15.91016674041748, 17.64727783203125, 19.384389877319336, 21.121501922607422, 22.858612060546875, 24.595726013183594, 26.33283805847168, 28.069950103759766, 29.80706024169922, 31.544172286987305, 33.28128433227539, 35.018394470214844, 36.75550842285156, 38.492618560791016, 40.22972869873047, 41.96684265136719, 43.70395278930664, 45.44106674194336, 47.17817687988281, 48.91529083251953, 50.652400970458984, 52.38951110839844, 54.12662124633789, 55.86373519897461, 57.60084533691406, 59.33795928955078, 61.075069427490234, 62.81217956542969, 64.5492935180664, 66.28640747070312, 68.02352142333984, 69.76062774658203, 71.49774169921875, 73.23485565185547, 74.97196960449219, 76.70907592773438, 78.4461898803711, 80.18329620361328, 81.92041015625, 83.65751647949219, 85.3946304321289, 87.13174438476562, 88.86885070800781, 90.60596466064453, 92.34307861328125, 94.08019256591797]}, "gradients/decoder.transformer.h.19.ln_1.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 2.0, 0.0, 1.0, 1.0, 4.0, 1.0, 1.0, 2.0, 7.0, 5.0, 7.0, 7.0, 8.0, 5.0, 11.0, 20.0, 20.0, 12.0, 29.0, 35.0, 32.0, 32.0, 34.0, 44.0, 52.0, 34.0, 45.0, 51.0, 45.0, 52.0, 51.0, 28.0, 39.0, 33.0, 28.0, 30.0, 39.0, 21.0, 32.0, 16.0, 24.0, 21.0, 13.0, 11.0, 11.0, 4.0, 2.0, 6.0, 1.0, 2.0, 3.0, 1.0, 2.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-35.736412048339844, -34.60384750366211, -33.471282958984375, -32.338722229003906, -31.206157684326172, -30.073593139648438, -28.941030502319336, -27.808467864990234, -26.6759033203125, -25.543338775634766, -24.410776138305664, -23.278213500976562, -22.145648956298828, -21.013084411621094, -19.880521774291992, -18.74795913696289, -17.615394592285156, -16.482830047607422, -15.35026741027832, -14.217703819274902, -13.085140228271484, -11.952576637268066, -10.820013046264648, -9.68744945526123, -8.554885864257812, -7.4223222732543945, -6.289758682250977, -5.157195091247559, -4.024631500244141, -2.8920679092407227, -1.7595043182373047, -0.6269407272338867, 0.5056190490722656, 1.6381826400756836, 2.7707462310791016, 3.9033098220825195, 5.0358734130859375, 6.1684370040893555, 7.301000595092773, 8.433564186096191, 9.56612777709961, 10.698691368103027, 11.831254959106445, 12.963818550109863, 14.096382141113281, 15.2289457321167, 16.361509323120117, 17.49407196044922, 18.626636505126953, 19.759201049804688, 20.89176368713379, 22.02432632446289, 23.156890869140625, 24.28945541381836, 25.42201805114746, 26.554580688476562, 27.687145233154297, 28.81970977783203, 29.952272415161133, 31.084835052490234, 32.21739959716797, 33.3499641418457, 34.48252868652344, 35.615089416503906, 36.74765396118164]}, "gradients/decoder.transformer.h.18.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 2.0, 2.0, 5.0, 6.0, 8.0, 9.0, 8.0, 18.0, 21.0, 20.0, 17.0, 30.0, 23.0, 30.0, 28.0, 37.0, 46.0, 38.0, 41.0, 40.0, 41.0, 45.0, 56.0, 42.0, 57.0, 38.0, 36.0, 29.0, 34.0, 28.0, 34.0, 20.0, 26.0, 15.0, 18.0, 9.0, 19.0, 17.0, 7.0, 4.0, 5.0, 2.0, 3.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-3.826171875, -3.712371826171875, -3.59857177734375, -3.484771728515625, -3.3709716796875, -3.257171630859375, -3.14337158203125, -3.029571533203125, -2.915771484375, -2.801971435546875, -2.68817138671875, -2.574371337890625, -2.4605712890625, -2.346771240234375, -2.23297119140625, -2.119171142578125, -2.00537109375, -1.891571044921875, -1.77777099609375, -1.663970947265625, -1.5501708984375, -1.436370849609375, -1.32257080078125, -1.208770751953125, -1.094970703125, -0.981170654296875, -0.86737060546875, -0.753570556640625, -0.6397705078125, -0.525970458984375, -0.41217041015625, -0.298370361328125, -0.1845703125, -0.070770263671875, 0.04302978515625, 0.156829833984375, 0.2706298828125, 0.384429931640625, 0.49822998046875, 0.612030029296875, 0.725830078125, 0.839630126953125, 0.95343017578125, 1.067230224609375, 1.1810302734375, 1.294830322265625, 1.40863037109375, 1.522430419921875, 1.63623046875, 1.750030517578125, 1.86383056640625, 1.977630615234375, 2.0914306640625, 2.205230712890625, 2.31903076171875, 2.432830810546875, 2.546630859375, 2.660430908203125, 2.77423095703125, 2.888031005859375, 3.0018310546875, 3.115631103515625, 3.22943115234375, 3.343231201171875, 3.45703125]}, "gradients/decoder.transformer.h.18.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 5.0, 6.0, 1.0, 10.0, 13.0, 13.0, 11.0, 27.0, 33.0, 34.0, 68.0, 82.0, 157.0, 348.0, 644.0, 1443.0, 4005.0, 11853.0, 44687.0, 226136.0, 1339292.0, 2025390.0, 433630.0, 77884.0, 19111.0, 5727.0, 2081.0, 807.0, 333.0, 182.0, 82.0, 40.0, 39.0, 29.0, 20.0, 20.0, 9.0, 7.0, 10.0, 3.0, 12.0, 4.0, 2.0, 3.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-7.3671875, -7.13824462890625, -6.9093017578125, -6.68035888671875, -6.451416015625, -6.22247314453125, -5.9935302734375, -5.76458740234375, -5.53564453125, -5.30670166015625, -5.0777587890625, -4.84881591796875, -4.619873046875, -4.39093017578125, -4.1619873046875, -3.93304443359375, -3.7041015625, -3.47515869140625, -3.2462158203125, -3.01727294921875, -2.788330078125, -2.55938720703125, -2.3304443359375, -2.10150146484375, -1.87255859375, -1.64361572265625, -1.4146728515625, -1.18572998046875, -0.956787109375, -0.72784423828125, -0.4989013671875, -0.26995849609375, -0.041015625, 0.18792724609375, 0.4168701171875, 0.64581298828125, 0.874755859375, 1.10369873046875, 1.3326416015625, 1.56158447265625, 1.79052734375, 2.01947021484375, 2.2484130859375, 2.47735595703125, 2.706298828125, 2.93524169921875, 3.1641845703125, 3.39312744140625, 3.6220703125, 3.85101318359375, 4.0799560546875, 4.30889892578125, 4.537841796875, 4.76678466796875, 4.9957275390625, 5.22467041015625, 5.45361328125, 5.68255615234375, 5.9114990234375, 6.14044189453125, 6.369384765625, 6.59832763671875, 6.8272705078125, 7.05621337890625, 7.28515625]}, "gradients/decoder.transformer.h.18.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 2.0, 0.0, 4.0, 4.0, 2.0, 8.0, 13.0, 17.0, 14.0, 32.0, 42.0, 78.0, 130.0, 201.0, 367.0, 543.0, 718.0, 672.0, 474.0, 286.0, 191.0, 115.0, 61.0, 36.0, 23.0, 14.0, 17.0, 9.0, 9.0, 2.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-14.1484375, -13.71728515625, -13.2861328125, -12.85498046875, -12.423828125, -11.99267578125, -11.5615234375, -11.13037109375, -10.69921875, -10.26806640625, -9.8369140625, -9.40576171875, -8.974609375, -8.54345703125, -8.1123046875, -7.68115234375, -7.25, -6.81884765625, -6.3876953125, -5.95654296875, -5.525390625, -5.09423828125, -4.6630859375, -4.23193359375, -3.80078125, -3.36962890625, -2.9384765625, -2.50732421875, -2.076171875, -1.64501953125, -1.2138671875, -0.78271484375, -0.3515625, 0.07958984375, 0.5107421875, 0.94189453125, 1.373046875, 1.80419921875, 2.2353515625, 2.66650390625, 3.09765625, 3.52880859375, 3.9599609375, 4.39111328125, 4.822265625, 5.25341796875, 5.6845703125, 6.11572265625, 6.546875, 6.97802734375, 7.4091796875, 7.84033203125, 8.271484375, 8.70263671875, 9.1337890625, 9.56494140625, 9.99609375, 10.42724609375, 10.8583984375, 11.28955078125, 11.720703125, 12.15185546875, 12.5830078125, 13.01416015625, 13.4453125]}, "gradients/decoder.transformer.h.18.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 5.0, 2.0, 4.0, 5.0, 16.0, 26.0, 36.0, 60.0, 76.0, 142.0, 272.0, 603.0, 1368.0, 3828.0, 15200.0, 104619.0, 1220618.0, 2548210.0, 259737.0, 29647.0, 6199.0, 2070.0, 750.0, 355.0, 167.0, 111.0, 57.0, 38.0, 31.0, 14.0, 11.0, 4.0, 6.0, 2.0, 2.0, 2.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-16.90625, -16.3642578125, -15.822265625, -15.2802734375, -14.73828125, -14.1962890625, -13.654296875, -13.1123046875, -12.5703125, -12.0283203125, -11.486328125, -10.9443359375, -10.40234375, -9.8603515625, -9.318359375, -8.7763671875, -8.234375, -7.6923828125, -7.150390625, -6.6083984375, -6.06640625, -5.5244140625, -4.982421875, -4.4404296875, -3.8984375, -3.3564453125, -2.814453125, -2.2724609375, -1.73046875, -1.1884765625, -0.646484375, -0.1044921875, 0.4375, 0.9794921875, 1.521484375, 2.0634765625, 2.60546875, 3.1474609375, 3.689453125, 4.2314453125, 4.7734375, 5.3154296875, 5.857421875, 6.3994140625, 6.94140625, 7.4833984375, 8.025390625, 8.5673828125, 9.109375, 9.6513671875, 10.193359375, 10.7353515625, 11.27734375, 11.8193359375, 12.361328125, 12.9033203125, 13.4453125, 13.9873046875, 14.529296875, 15.0712890625, 15.61328125, 16.1552734375, 16.697265625, 17.2392578125, 17.78125]}, "gradients/decoder.transformer.h.18.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 5.0, 34.0, 102.0, 212.0, 311.0, 216.0, 97.0, 30.0, 5.0, 3.0, 1.0, 1.0, 1.0], "bins": [-222.94378662109375, -218.93153381347656, -214.9192657470703, -210.90701293945312, -206.89474487304688, -202.8824920654297, -198.8702392578125, -194.85797119140625, -190.84571838378906, -186.83346557617188, -182.82119750976562, -178.80894470214844, -174.7966766357422, -170.784423828125, -166.77215576171875, -162.75990295410156, -158.74765014648438, -154.7353973388672, -150.72312927246094, -146.71087646484375, -142.6986083984375, -138.6863555908203, -134.67410278320312, -130.66183471679688, -126.64956665039062, -122.6373062133789, -118.62504577636719, -114.61279296875, -110.60053253173828, -106.58827209472656, -102.57601165771484, -98.56375122070312, -94.55149841308594, -90.53923797607422, -86.5269775390625, -82.51472473144531, -78.5024642944336, -74.49020385742188, -70.47794342041016, -66.46568298339844, -62.453426361083984, -58.441165924072266, -54.42890930175781, -50.416648864746094, -46.404388427734375, -42.39213180541992, -38.3798713684082, -34.36761474609375, -30.35535430908203, -26.343095779418945, -22.33083724975586, -18.31857681274414, -14.306318283081055, -10.294059753417969, -6.28179931640625, -2.269540786743164, 1.7427177429199219, 5.754976749420166, 9.76723575592041, 13.779495239257812, 17.7917537689209, 21.804012298583984, 25.816272735595703, 29.82853126525879, 33.840789794921875]}, "gradients/decoder.transformer.h.18.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 2.0, 3.0, 5.0, 6.0, 2.0, 5.0, 13.0, 7.0, 5.0, 9.0, 14.0, 18.0, 10.0, 19.0, 29.0, 25.0, 23.0, 22.0, 28.0, 40.0, 43.0, 35.0, 49.0, 35.0, 39.0, 48.0, 33.0, 47.0, 36.0, 52.0, 36.0, 43.0, 28.0, 32.0, 33.0, 14.0, 32.0, 18.0, 11.0, 10.0, 15.0, 8.0, 10.0, 5.0, 4.0, 2.0, 0.0, 4.0, 3.0, 3.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0], "bins": [-31.356494903564453, -30.371456146240234, -29.386417388916016, -28.401378631591797, -27.416337966918945, -26.431299209594727, -25.446260452270508, -24.46122169494629, -23.47618293762207, -22.49114418029785, -21.506105422973633, -20.52106475830078, -19.536026000976562, -18.550987243652344, -17.565948486328125, -16.580909729003906, -15.595870018005371, -14.610831260681152, -13.625791549682617, -12.640752792358398, -11.65571403503418, -10.670675277709961, -9.685635566711426, -8.700596809387207, -7.71555757522583, -6.730518341064453, -5.745479583740234, -4.760440349578857, -3.7754013538360596, -2.7903623580932617, -1.8053231239318848, -0.820284366607666, 0.16475486755371094, 1.1497938632965088, 2.1348328590393066, 3.1198720932006836, 4.104910850524902, 5.089950084686279, 6.074989318847656, 7.060028076171875, 8.045066833496094, 9.030105590820312, 10.015145301818848, 11.000184059143066, 11.985222816467285, 12.97026252746582, 13.955301284790039, 14.940340042114258, 15.925379753112793, 16.910419464111328, 17.895458221435547, 18.880496978759766, 19.865535736083984, 20.850574493408203, 21.835613250732422, 22.82065200805664, 23.805692672729492, 24.79073143005371, 25.77577018737793, 26.76081085205078, 27.745849609375, 28.73088836669922, 29.715927124023438, 30.700965881347656, 31.686004638671875]}, "gradients/decoder.transformer.h.18.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 4.0, 6.0, 6.0, 3.0, 2.0, 8.0, 12.0, 12.0, 16.0, 17.0, 20.0, 24.0, 29.0, 38.0, 30.0, 51.0, 29.0, 59.0, 28.0, 45.0, 34.0, 50.0, 49.0, 45.0, 37.0, 47.0, 38.0, 40.0, 34.0, 31.0, 26.0, 33.0, 23.0, 17.0, 16.0, 14.0, 12.0, 11.0, 6.0, 4.0, 4.0, 1.0, 4.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.109375, -3.990692138671875, -3.87200927734375, -3.753326416015625, -3.6346435546875, -3.515960693359375, -3.39727783203125, -3.278594970703125, -3.159912109375, -3.041229248046875, -2.92254638671875, -2.803863525390625, -2.6851806640625, -2.566497802734375, -2.44781494140625, -2.329132080078125, -2.21044921875, -2.091766357421875, -1.97308349609375, -1.854400634765625, -1.7357177734375, -1.617034912109375, -1.49835205078125, -1.379669189453125, -1.260986328125, -1.142303466796875, -1.02362060546875, -0.904937744140625, -0.7862548828125, -0.667572021484375, -0.54888916015625, -0.430206298828125, -0.3115234375, -0.192840576171875, -0.07415771484375, 0.044525146484375, 0.1632080078125, 0.281890869140625, 0.40057373046875, 0.519256591796875, 0.637939453125, 0.756622314453125, 0.87530517578125, 0.993988037109375, 1.1126708984375, 1.231353759765625, 1.35003662109375, 1.468719482421875, 1.58740234375, 1.706085205078125, 1.82476806640625, 1.943450927734375, 2.0621337890625, 2.180816650390625, 2.29949951171875, 2.418182373046875, 2.536865234375, 2.655548095703125, 2.77423095703125, 2.892913818359375, 3.0115966796875, 3.130279541015625, 3.24896240234375, 3.367645263671875, 3.486328125]}, "gradients/decoder.transformer.h.18.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 2.0, 0.0, 3.0, 6.0, 3.0, 16.0, 13.0, 30.0, 46.0, 77.0, 120.0, 173.0, 278.0, 459.0, 732.0, 1127.0, 1864.0, 2915.0, 5017.0, 8100.0, 13529.0, 22521.0, 37068.0, 59376.0, 90843.0, 129761.0, 163400.0, 159197.0, 123988.0, 86050.0, 55030.0, 34231.0, 20743.0, 12353.0, 7611.0, 4546.0, 2775.0, 1641.0, 1057.0, 661.0, 419.0, 284.0, 179.0, 104.0, 67.0, 37.0, 30.0, 32.0, 19.0, 7.0, 11.0, 5.0, 5.0, 3.0, 5.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0], "bins": [-0.45361328125, -0.43845367431640625, -0.4232940673828125, -0.40813446044921875, -0.392974853515625, -0.37781524658203125, -0.3626556396484375, -0.34749603271484375, -0.33233642578125, -0.31717681884765625, -0.3020172119140625, -0.28685760498046875, -0.271697998046875, -0.25653839111328125, -0.2413787841796875, -0.22621917724609375, -0.2110595703125, -0.19589996337890625, -0.1807403564453125, -0.16558074951171875, -0.150421142578125, -0.13526153564453125, -0.1201019287109375, -0.10494232177734375, -0.08978271484375, -0.07462310791015625, -0.0594635009765625, -0.04430389404296875, -0.029144287109375, -0.01398468017578125, 0.0011749267578125, 0.01633453369140625, 0.031494140625, 0.04665374755859375, 0.0618133544921875, 0.07697296142578125, 0.092132568359375, 0.10729217529296875, 0.1224517822265625, 0.13761138916015625, 0.15277099609375, 0.16793060302734375, 0.1830902099609375, 0.19824981689453125, 0.213409423828125, 0.22856903076171875, 0.2437286376953125, 0.25888824462890625, 0.2740478515625, 0.28920745849609375, 0.3043670654296875, 0.31952667236328125, 0.334686279296875, 0.34984588623046875, 0.3650054931640625, 0.38016510009765625, 0.39532470703125, 0.41048431396484375, 0.4256439208984375, 0.44080352783203125, 0.455963134765625, 0.47112274169921875, 0.4862823486328125, 0.5014419555664062, 0.5166015625]}, "gradients/decoder.transformer.h.18.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 3.0, 6.0, 1.0, 7.0, 3.0, 5.0, 8.0, 6.0, 11.0, 13.0, 13.0, 19.0, 18.0, 20.0, 29.0, 27.0, 30.0, 36.0, 27.0, 36.0, 34.0, 48.0, 40.0, 38.0, 1055.0, 36.0, 32.0, 42.0, 46.0, 35.0, 34.0, 26.0, 38.0, 25.0, 35.0, 12.0, 17.0, 32.0, 21.0, 15.0, 10.0, 9.0, 10.0, 15.0, 2.0, 6.0, 2.0, 2.0, 3.0, 3.0, 0.0, 1.0, 0.0, 1.0], "bins": [-2.6640625, -2.587921142578125, -2.51177978515625, -2.435638427734375, -2.3594970703125, -2.283355712890625, -2.20721435546875, -2.131072998046875, -2.054931640625, -1.978790283203125, -1.90264892578125, -1.826507568359375, -1.7503662109375, -1.674224853515625, -1.59808349609375, -1.521942138671875, -1.44580078125, -1.369659423828125, -1.29351806640625, -1.217376708984375, -1.1412353515625, -1.065093994140625, -0.98895263671875, -0.912811279296875, -0.836669921875, -0.760528564453125, -0.68438720703125, -0.608245849609375, -0.5321044921875, -0.455963134765625, -0.37982177734375, -0.303680419921875, -0.2275390625, -0.151397705078125, -0.07525634765625, 0.000885009765625, 0.0770263671875, 0.153167724609375, 0.22930908203125, 0.305450439453125, 0.381591796875, 0.457733154296875, 0.53387451171875, 0.610015869140625, 0.6861572265625, 0.762298583984375, 0.83843994140625, 0.914581298828125, 0.99072265625, 1.066864013671875, 1.14300537109375, 1.219146728515625, 1.2952880859375, 1.371429443359375, 1.44757080078125, 1.523712158203125, 1.599853515625, 1.675994873046875, 1.75213623046875, 1.828277587890625, 1.9044189453125, 1.980560302734375, 2.05670166015625, 2.132843017578125, 2.208984375]}, "gradients/decoder.transformer.h.18.crossattention.c_attn.weight": {"_type": "histogram", "values": [3.0, 1.0, 8.0, 5.0, 5.0, 6.0, 19.0, 22.0, 36.0, 60.0, 55.0, 100.0, 158.0, 235.0, 369.0, 553.0, 797.0, 1220.0, 1887.0, 2893.0, 4400.0, 6723.0, 10302.0, 15952.0, 24937.0, 38540.0, 58878.0, 87600.0, 125348.0, 977944.0, 373526.0, 120696.0, 84352.0, 56131.0, 36108.0, 23294.0, 15375.0, 9850.0, 6540.0, 4278.0, 2796.0, 1697.0, 1193.0, 746.0, 503.0, 348.0, 224.0, 136.0, 91.0, 69.0, 47.0, 25.0, 17.0, 16.0, 11.0, 6.0, 8.0, 5.0, 1.0, 2.0, 0.0, 1.0, 2.0, 2.0], "bins": [-0.271484375, -0.2624168395996094, -0.25334930419921875, -0.24428176879882812, -0.2352142333984375, -0.22614669799804688, -0.21707916259765625, -0.20801162719726562, -0.198944091796875, -0.18987655639648438, -0.18080902099609375, -0.17174148559570312, -0.1626739501953125, -0.15360641479492188, -0.14453887939453125, -0.13547134399414062, -0.12640380859375, -0.11733627319335938, -0.10826873779296875, -0.09920120239257812, -0.0901336669921875, -0.08106613159179688, -0.07199859619140625, -0.06293106079101562, -0.053863525390625, -0.044795989990234375, -0.03572845458984375, -0.026660919189453125, -0.0175933837890625, -0.008525848388671875, 0.00054168701171875, 0.009609222412109375, 0.0186767578125, 0.027744293212890625, 0.03681182861328125, 0.045879364013671875, 0.0549468994140625, 0.06401443481445312, 0.07308197021484375, 0.08214950561523438, 0.091217041015625, 0.10028457641601562, 0.10935211181640625, 0.11841964721679688, 0.1274871826171875, 0.13655471801757812, 0.14562225341796875, 0.15468978881835938, 0.16375732421875, 0.17282485961914062, 0.18189239501953125, 0.19095993041992188, 0.2000274658203125, 0.20909500122070312, 0.21816253662109375, 0.22723007202148438, 0.236297607421875, 0.24536514282226562, 0.25443267822265625, 0.2635002136230469, 0.2725677490234375, 0.2816352844238281, 0.29070281982421875, 0.2997703552246094, 0.308837890625]}, "gradients/decoder.transformer.h.18.crossattention.q_attn.bias": {"_type": "histogram", "values": [3.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 3.0, 2.0, 4.0, 3.0, 8.0, 10.0, 6.0, 12.0, 12.0, 14.0, 14.0, 24.0, 18.0, 18.0, 29.0, 24.0, 36.0, 32.0, 39.0, 33.0, 60.0, 55.0, 83.0, 83.0, 55.0, 52.0, 30.0, 30.0, 30.0, 21.0, 32.0, 25.0, 17.0, 19.0, 8.0, 12.0, 14.0, 9.0, 3.0, 10.0, 6.0, 5.0, 3.0, 4.0, 0.0, 1.0, 3.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.01303863525390625, -0.012598514556884766, -0.012158393859863281, -0.011718273162841797, -0.011278152465820312, -0.010838031768798828, -0.010397911071777344, -0.00995779037475586, -0.009517669677734375, -0.00907754898071289, -0.008637428283691406, -0.008197307586669922, -0.0077571868896484375, -0.007317066192626953, -0.006876945495605469, -0.006436824798583984, -0.0059967041015625, -0.005556583404541016, -0.005116462707519531, -0.004676342010498047, -0.0042362213134765625, -0.003796100616455078, -0.0033559799194335938, -0.0029158592224121094, -0.002475738525390625, -0.0020356178283691406, -0.0015954971313476562, -0.0011553764343261719, -0.0007152557373046875, -0.0002751350402832031, 0.00016498565673828125, 0.0006051063537597656, 0.00104522705078125, 0.0014853477478027344, 0.0019254684448242188, 0.002365589141845703, 0.0028057098388671875, 0.003245830535888672, 0.0036859512329101562, 0.004126071929931641, 0.004566192626953125, 0.005006313323974609, 0.005446434020996094, 0.005886554718017578, 0.0063266754150390625, 0.006766796112060547, 0.007206916809082031, 0.007647037506103516, 0.008087158203125, 0.008527278900146484, 0.008967399597167969, 0.009407520294189453, 0.009847640991210938, 0.010287761688232422, 0.010727882385253906, 0.01116800308227539, 0.011608123779296875, 0.01204824447631836, 0.012488365173339844, 0.012928485870361328, 0.013368606567382812, 0.013808727264404297, 0.014248847961425781, 0.014688968658447266, 0.01512908935546875]}, "gradients/decoder.transformer.h.18.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 1.0, 2.0, 1.0, 4.0, 3.0, 4.0, 10.0, 10.0, 3.0, 17.0, 20.0, 26.0, 29.0, 42.0, 71.0, 84.0, 125.0, 145.0, 188.0, 301.0, 436.0, 1111.0, 52078.0, 983884.0, 7977.0, 693.0, 348.0, 249.0, 192.0, 106.0, 100.0, 78.0, 45.0, 46.0, 34.0, 16.0, 19.0, 11.0, 14.0, 12.0, 11.0, 4.0, 5.0, 2.0, 5.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 3.0], "bins": [-0.2467041015625, -0.23929786682128906, -0.23189163208007812, -0.2244853973388672, -0.21707916259765625, -0.2096729278564453, -0.20226669311523438, -0.19486045837402344, -0.1874542236328125, -0.18004798889160156, -0.17264175415039062, -0.1652355194091797, -0.15782928466796875, -0.1504230499267578, -0.14301681518554688, -0.13561058044433594, -0.128204345703125, -0.12079811096191406, -0.11339187622070312, -0.10598564147949219, -0.09857940673828125, -0.09117317199707031, -0.08376693725585938, -0.07636070251464844, -0.0689544677734375, -0.06154823303222656, -0.054141998291015625, -0.04673576354980469, -0.03932952880859375, -0.03192329406738281, -0.024517059326171875, -0.017110824584960938, -0.00970458984375, -0.0022983551025390625, 0.005107879638671875, 0.012514114379882812, 0.01992034912109375, 0.027326583862304688, 0.034732818603515625, 0.04213905334472656, 0.0495452880859375, 0.05695152282714844, 0.06435775756835938, 0.07176399230957031, 0.07917022705078125, 0.08657646179199219, 0.09398269653320312, 0.10138893127441406, 0.108795166015625, 0.11620140075683594, 0.12360763549804688, 0.1310138702392578, 0.13842010498046875, 0.1458263397216797, 0.15323257446289062, 0.16063880920410156, 0.1680450439453125, 0.17545127868652344, 0.18285751342773438, 0.1902637481689453, 0.19766998291015625, 0.2050762176513672, 0.21248245239257812, 0.21988868713378906, 0.227294921875]}, "gradients/decoder.transformer.h.18.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 10.0, 54.0, 379.0, 546.0, 23.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.17351217567920685, -0.16958020627498627, -0.16564823687076569, -0.1617162525653839, -0.15778428316116333, -0.15385231375694275, -0.14992034435272217, -0.1459883749485016, -0.142056405544281, -0.13812443614006042, -0.13419246673583984, -0.13026048243045807, -0.1263285130262375, -0.1223965436220169, -0.11846457421779633, -0.11453260481357574, -0.11060062050819397, -0.10666865110397339, -0.10273667424917221, -0.09880470484495163, -0.09487272799015045, -0.09094075858592987, -0.08700878918170929, -0.08307681977748871, -0.07914484292268753, -0.07521287351846695, -0.07128089666366577, -0.06734892725944519, -0.06341695785522461, -0.05948498100042343, -0.05555301159620285, -0.05162103846669197, -0.047689057886600494, -0.043757084757089615, -0.039825111627578735, -0.035893142223358154, -0.031961169093847275, -0.028029195964336395, -0.024097224697470665, -0.020165253430604935, -0.016233280301094055, -0.01230130810290575, -0.008369335904717445, -0.0044373637065291405, -0.0005053915083408356, 0.003426581621170044, 0.007358552888035774, 0.011290524154901505, 0.015222497284412384, 0.019154470413923264, 0.023086441680788994, 0.027018412947654724, 0.030950386077165604, 0.03488235920667648, 0.038814328610897064, 0.042746301740407944, 0.04667827486991882, 0.0506102479994297, 0.05454222112894058, 0.05847419053316116, 0.06240616366267204, 0.06633813679218292, 0.0702701061964035, 0.07420207560062408, 0.07813405245542526]}, "gradients/decoder.transformer.h.18.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 2.0, 3.0, 0.0, 2.0, 4.0, 1.0, 2.0, 2.0, 3.0, 4.0, 6.0, 6.0, 16.0, 12.0, 11.0, 19.0, 16.0, 22.0, 17.0, 32.0, 27.0, 38.0, 24.0, 34.0, 28.0, 24.0, 40.0, 33.0, 29.0, 57.0, 21.0, 44.0, 30.0, 37.0, 31.0, 41.0, 25.0, 23.0, 29.0, 35.0, 22.0, 23.0, 18.0, 19.0, 19.0, 12.0, 10.0, 9.0, 9.0, 14.0, 6.0, 8.0, 4.0, 4.0, 5.0, 0.0, 2.0, 2.0, 1.0, 2.0, 2.0], "bins": [-0.020041584968566895, -0.019436698406934738, -0.018831809982657433, -0.018226921558380127, -0.01762203499674797, -0.017017148435115814, -0.01641226001083851, -0.015807371586561203, -0.015202485024929047, -0.014597597531974316, -0.013992710039019585, -0.013387822546064854, -0.012782935053110123, -0.012178047560155392, -0.01157316006720066, -0.01096827257424593, -0.010363385081291199, -0.009758497588336468, -0.009153610095381737, -0.008548722602427006, -0.007943835109472275, -0.007338947616517544, -0.006734060123562813, -0.006129172630608082, -0.005524285137653351, -0.00491939764469862, -0.004314510151743889, -0.003709622658789158, -0.003104735165834427, -0.002499847672879696, -0.001894960179924965, -0.001290072686970234, -0.0006851851940155029, -8.029770106077194e-05, 0.000524589791893959, 0.00112947728484869, 0.001734364777803421, 0.002339252270758152, 0.002944139763712883, 0.003549027256667614, 0.004153914749622345, 0.004758802242577076, 0.005363689735531807, 0.005968577228486538, 0.006573464721441269, 0.007178352214396, 0.007783239707350731, 0.008388127200305462, 0.008993014693260193, 0.009597902186214924, 0.010202789679169655, 0.010807677172124386, 0.011412564665079117, 0.012017452158033848, 0.012622339650988579, 0.01322722714394331, 0.01383211463689804, 0.014437002129852772, 0.015041889622807503, 0.01564677804708481, 0.016251664608716965, 0.01685655117034912, 0.017461439594626427, 0.018066328018903732, 0.01867121458053589]}, "gradients/decoder.transformer.h.18.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 4.0, 6.0, 6.0, 3.0, 2.0, 8.0, 11.0, 13.0, 16.0, 17.0, 20.0, 24.0, 29.0, 38.0, 30.0, 51.0, 29.0, 59.0, 28.0, 45.0, 35.0, 49.0, 49.0, 45.0, 38.0, 46.0, 39.0, 39.0, 34.0, 31.0, 26.0, 32.0, 24.0, 17.0, 16.0, 14.0, 12.0, 11.0, 5.0, 5.0, 4.0, 1.0, 4.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.109375, -3.990692138671875, -3.87200927734375, -3.753326416015625, -3.6346435546875, -3.515960693359375, -3.39727783203125, -3.278594970703125, -3.159912109375, -3.041229248046875, -2.92254638671875, -2.803863525390625, -2.6851806640625, -2.566497802734375, -2.44781494140625, -2.329132080078125, -2.21044921875, -2.091766357421875, -1.97308349609375, -1.854400634765625, -1.7357177734375, -1.617034912109375, -1.49835205078125, -1.379669189453125, -1.260986328125, -1.142303466796875, -1.02362060546875, -0.904937744140625, -0.7862548828125, -0.667572021484375, -0.54888916015625, -0.430206298828125, -0.3115234375, -0.192840576171875, -0.07415771484375, 0.044525146484375, 0.1632080078125, 0.281890869140625, 0.40057373046875, 0.519256591796875, 0.637939453125, 0.756622314453125, 0.87530517578125, 0.993988037109375, 1.1126708984375, 1.231353759765625, 1.35003662109375, 1.468719482421875, 1.58740234375, 1.706085205078125, 1.82476806640625, 1.943450927734375, 2.0621337890625, 2.180816650390625, 2.29949951171875, 2.418182373046875, 2.536865234375, 2.655548095703125, 2.77423095703125, 2.892913818359375, 3.0115966796875, 3.130279541015625, 3.24896240234375, 3.367645263671875, 3.486328125]}, "gradients/decoder.transformer.h.18.attn.c_proj.weight": {"_type": "histogram", "values": [5.0, 4.0, 4.0, 2.0, 2.0, 7.0, 9.0, 12.0, 20.0, 18.0, 35.0, 33.0, 70.0, 116.0, 216.0, 309.0, 559.0, 1004.0, 1774.0, 3213.0, 5946.0, 11908.0, 29521.0, 96961.0, 422286.0, 345591.0, 80525.0, 25385.0, 10731.0, 5502.0, 2897.0, 1635.0, 928.0, 537.0, 316.0, 167.0, 98.0, 62.0, 47.0, 36.0, 15.0, 17.0, 10.0, 4.0, 4.0, 10.0, 3.0, 7.0, 5.0, 1.0, 3.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.4140625, -4.23699951171875, -4.0599365234375, -3.88287353515625, -3.705810546875, -3.52874755859375, -3.3516845703125, -3.17462158203125, -2.99755859375, -2.82049560546875, -2.6434326171875, -2.46636962890625, -2.289306640625, -2.11224365234375, -1.9351806640625, -1.75811767578125, -1.5810546875, -1.40399169921875, -1.2269287109375, -1.04986572265625, -0.872802734375, -0.69573974609375, -0.5186767578125, -0.34161376953125, -0.16455078125, 0.01251220703125, 0.1895751953125, 0.36663818359375, 0.543701171875, 0.72076416015625, 0.8978271484375, 1.07489013671875, 1.251953125, 1.42901611328125, 1.6060791015625, 1.78314208984375, 1.960205078125, 2.13726806640625, 2.3143310546875, 2.49139404296875, 2.66845703125, 2.84552001953125, 3.0225830078125, 3.19964599609375, 3.376708984375, 3.55377197265625, 3.7308349609375, 3.90789794921875, 4.0849609375, 4.26202392578125, 4.4390869140625, 4.61614990234375, 4.793212890625, 4.97027587890625, 5.1473388671875, 5.32440185546875, 5.50146484375, 5.67852783203125, 5.8555908203125, 6.03265380859375, 6.209716796875, 6.38677978515625, 6.5638427734375, 6.74090576171875, 6.91796875]}, "gradients/decoder.transformer.h.18.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 3.0, 1.0, 5.0, 5.0, 5.0, 4.0, 5.0, 10.0, 3.0, 10.0, 13.0, 15.0, 16.0, 18.0, 23.0, 23.0, 39.0, 36.0, 36.0, 52.0, 55.0, 77.0, 119.0, 345.0, 1506.0, 173.0, 82.0, 56.0, 47.0, 39.0, 42.0, 26.0, 30.0, 17.0, 22.0, 25.0, 13.0, 17.0, 13.0, 8.0, 9.0, 6.0, 6.0, 1.0, 1.0, 2.0, 3.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-13.6796875, -13.267333984375, -12.85498046875, -12.442626953125, -12.0302734375, -11.617919921875, -11.20556640625, -10.793212890625, -10.380859375, -9.968505859375, -9.55615234375, -9.143798828125, -8.7314453125, -8.319091796875, -7.90673828125, -7.494384765625, -7.08203125, -6.669677734375, -6.25732421875, -5.844970703125, -5.4326171875, -5.020263671875, -4.60791015625, -4.195556640625, -3.783203125, -3.370849609375, -2.95849609375, -2.546142578125, -2.1337890625, -1.721435546875, -1.30908203125, -0.896728515625, -0.484375, -0.072021484375, 0.34033203125, 0.752685546875, 1.1650390625, 1.577392578125, 1.98974609375, 2.402099609375, 2.814453125, 3.226806640625, 3.63916015625, 4.051513671875, 4.4638671875, 4.876220703125, 5.28857421875, 5.700927734375, 6.11328125, 6.525634765625, 6.93798828125, 7.350341796875, 7.7626953125, 8.175048828125, 8.58740234375, 8.999755859375, 9.412109375, 9.824462890625, 10.23681640625, 10.649169921875, 11.0615234375, 11.473876953125, 11.88623046875, 12.298583984375, 12.7109375]}, "gradients/decoder.transformer.h.18.attn.c_attn.weight": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 1.0, 2.0, 3.0, 1.0, 2.0, 5.0, 8.0, 1.0, 3.0, 9.0, 11.0, 15.0, 15.0, 13.0, 28.0, 25.0, 34.0, 41.0, 62.0, 99.0, 172.0, 242.0, 453.0, 1574.0, 11774.0, 518526.0, 2583069.0, 25394.0, 2592.0, 630.0, 308.0, 177.0, 121.0, 70.0, 48.0, 34.0, 39.0, 24.0, 12.0, 25.0, 7.0, 9.0, 8.0, 10.0, 7.0, 2.0, 3.0, 1.0, 3.0, 2.0, 2.0, 2.0, 2.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-21.796875, -21.049072265625, -20.30126953125, -19.553466796875, -18.8056640625, -18.057861328125, -17.31005859375, -16.562255859375, -15.814453125, -15.066650390625, -14.31884765625, -13.571044921875, -12.8232421875, -12.075439453125, -11.32763671875, -10.579833984375, -9.83203125, -9.084228515625, -8.33642578125, -7.588623046875, -6.8408203125, -6.093017578125, -5.34521484375, -4.597412109375, -3.849609375, -3.101806640625, -2.35400390625, -1.606201171875, -0.8583984375, -0.110595703125, 0.63720703125, 1.385009765625, 2.1328125, 2.880615234375, 3.62841796875, 4.376220703125, 5.1240234375, 5.871826171875, 6.61962890625, 7.367431640625, 8.115234375, 8.863037109375, 9.61083984375, 10.358642578125, 11.1064453125, 11.854248046875, 12.60205078125, 13.349853515625, 14.09765625, 14.845458984375, 15.59326171875, 16.341064453125, 17.0888671875, 17.836669921875, 18.58447265625, 19.332275390625, 20.080078125, 20.827880859375, 21.57568359375, 22.323486328125, 23.0712890625, 23.819091796875, 24.56689453125, 25.314697265625, 26.0625]}, "gradients/decoder.transformer.h.18.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 4.0, 150.0, 546.0, 291.0, 24.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-158.98736572265625, -155.42532348632812, -151.86329650878906, -148.30125427246094, -144.73922729492188, -141.17718505859375, -137.61514282226562, -134.05311584472656, -130.49107360839844, -126.92903900146484, -123.36700439453125, -119.80496215820312, -116.24292755126953, -112.68089294433594, -109.11885833740234, -105.55682373046875, -101.99478912353516, -98.43275451660156, -94.87071990966797, -91.30868530273438, -87.74664306640625, -84.18460845947266, -80.62257385253906, -77.06053924560547, -73.49850463867188, -69.93647003173828, -66.37443542480469, -62.81239700317383, -59.25035858154297, -55.688323974609375, -52.12628936767578, -48.56425476074219, -45.00220489501953, -41.44017028808594, -37.87813186645508, -34.316097259521484, -30.754060745239258, -27.19202423095703, -23.629989624023438, -20.06795310974121, -16.505916595458984, -12.943880081176758, -9.381844520568848, -5.8198089599609375, -2.257772445678711, 1.3042640686035156, 4.866298675537109, 8.428335189819336, 11.990371704101562, 15.552408218383789, 19.114444732666016, 22.67647933959961, 26.238515853881836, 29.800552368164062, 33.362586975097656, 36.92462158203125, 40.48666000366211, 44.0486946105957, 47.61073303222656, 51.172767639160156, 54.73480224609375, 58.29684066772461, 61.8588752746582, 65.42091369628906, 68.98294830322266]}, "gradients/decoder.transformer.h.18.ln_1.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 4.0, 4.0, 3.0, 5.0, 9.0, 13.0, 12.0, 14.0, 16.0, 14.0, 30.0, 27.0, 24.0, 34.0, 32.0, 31.0, 48.0, 33.0, 51.0, 37.0, 50.0, 44.0, 52.0, 48.0, 49.0, 37.0, 30.0, 41.0, 26.0, 29.0, 35.0, 26.0, 30.0, 12.0, 11.0, 15.0, 6.0, 3.0, 4.0, 8.0, 3.0, 3.0, 7.0, 3.0, 0.0, 3.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-38.27981185913086, -36.99015808105469, -35.700504302978516, -34.41084671020508, -33.121192932128906, -31.831539154052734, -30.541885375976562, -29.25223159790039, -27.962575912475586, -26.672922134399414, -25.38326644897461, -24.093612670898438, -22.803958892822266, -21.51430320739746, -20.22464942932129, -18.934993743896484, -17.645339965820312, -16.35568618774414, -15.066030502319336, -13.776376724243164, -12.486721992492676, -11.197067260742188, -9.907413482666016, -8.617758750915527, -7.328104019165039, -6.038449287414551, -4.748795032501221, -3.4591405391693115, -2.1694860458374023, -0.8798313140869141, 0.409822940826416, 1.699477195739746, 2.9891357421875, 4.278790473937988, 5.568444728851318, 6.858098983764648, 8.147753715515137, 9.437408447265625, 10.727062225341797, 12.016716957092285, 13.306371688842773, 14.596026420593262, 15.88568115234375, 17.175334930419922, 18.464988708496094, 19.7546443939209, 21.04429817199707, 22.333953857421875, 23.623607635498047, 24.91326141357422, 26.202917098999023, 27.492570877075195, 28.7822265625, 30.071880340576172, 31.361534118652344, 32.651187896728516, 33.94084167480469, 35.23049545288086, 36.52014923095703, 37.80980682373047, 39.09946060180664, 40.38911437988281, 41.678768157958984, 42.968421936035156, 44.258079528808594]}, "gradients/decoder.transformer.h.17.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 4.0, 3.0, 10.0, 4.0, 3.0, 5.0, 12.0, 9.0, 21.0, 21.0, 20.0, 34.0, 29.0, 32.0, 40.0, 46.0, 51.0, 44.0, 34.0, 45.0, 48.0, 50.0, 54.0, 45.0, 50.0, 42.0, 38.0, 29.0, 31.0, 34.0, 25.0, 19.0, 18.0, 19.0, 16.0, 11.0, 4.0, 5.0, 1.0, 4.0, 4.0, 3.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.60546875, -4.472930908203125, -4.34039306640625, -4.207855224609375, -4.0753173828125, -3.942779541015625, -3.81024169921875, -3.677703857421875, -3.545166015625, -3.412628173828125, -3.28009033203125, -3.147552490234375, -3.0150146484375, -2.882476806640625, -2.74993896484375, -2.617401123046875, -2.48486328125, -2.352325439453125, -2.21978759765625, -2.087249755859375, -1.9547119140625, -1.822174072265625, -1.68963623046875, -1.557098388671875, -1.424560546875, -1.292022705078125, -1.15948486328125, -1.026947021484375, -0.8944091796875, -0.761871337890625, -0.62933349609375, -0.496795654296875, -0.3642578125, -0.231719970703125, -0.09918212890625, 0.033355712890625, 0.1658935546875, 0.298431396484375, 0.43096923828125, 0.563507080078125, 0.696044921875, 0.828582763671875, 0.96112060546875, 1.093658447265625, 1.2261962890625, 1.358734130859375, 1.49127197265625, 1.623809814453125, 1.75634765625, 1.888885498046875, 2.02142333984375, 2.153961181640625, 2.2864990234375, 2.419036865234375, 2.55157470703125, 2.684112548828125, 2.816650390625, 2.949188232421875, 3.08172607421875, 3.214263916015625, 3.3468017578125, 3.479339599609375, 3.61187744140625, 3.744415283203125, 3.876953125]}, "gradients/decoder.transformer.h.17.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 3.0, 3.0, 3.0, 6.0, 5.0, 11.0, 14.0, 19.0, 21.0, 47.0, 55.0, 70.0, 116.0, 152.0, 235.0, 400.0, 617.0, 1108.0, 1800.0, 3199.0, 6392.0, 13116.0, 28933.0, 71982.0, 199370.0, 583041.0, 1313220.0, 1200145.0, 491128.0, 167409.0, 61794.0, 25496.0, 11599.0, 5566.0, 2979.0, 1645.0, 978.0, 604.0, 360.0, 210.0, 149.0, 103.0, 75.0, 50.0, 20.0, 21.0, 11.0, 11.0, 6.0, 1.0, 0.0, 1.0, 1.0, 0.0, 2.0], "bins": [-5.09375, -4.951934814453125, -4.81011962890625, -4.668304443359375, -4.5264892578125, -4.384674072265625, -4.24285888671875, -4.101043701171875, -3.959228515625, -3.817413330078125, -3.67559814453125, -3.533782958984375, -3.3919677734375, -3.250152587890625, -3.10833740234375, -2.966522216796875, -2.82470703125, -2.682891845703125, -2.54107666015625, -2.399261474609375, -2.2574462890625, -2.115631103515625, -1.97381591796875, -1.832000732421875, -1.690185546875, -1.548370361328125, -1.40655517578125, -1.264739990234375, -1.1229248046875, -0.981109619140625, -0.83929443359375, -0.697479248046875, -0.5556640625, -0.413848876953125, -0.27203369140625, -0.130218505859375, 0.0115966796875, 0.153411865234375, 0.29522705078125, 0.437042236328125, 0.578857421875, 0.720672607421875, 0.86248779296875, 1.004302978515625, 1.1461181640625, 1.287933349609375, 1.42974853515625, 1.571563720703125, 1.71337890625, 1.855194091796875, 1.99700927734375, 2.138824462890625, 2.2806396484375, 2.422454833984375, 2.56427001953125, 2.706085205078125, 2.847900390625, 2.989715576171875, 3.13153076171875, 3.273345947265625, 3.4151611328125, 3.556976318359375, 3.69879150390625, 3.840606689453125, 3.982421875]}, "gradients/decoder.transformer.h.17.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 2.0, 0.0, 2.0, 3.0, 3.0, 1.0, 6.0, 1.0, 7.0, 8.0, 13.0, 11.0, 19.0, 24.0, 30.0, 37.0, 60.0, 64.0, 99.0, 145.0, 186.0, 306.0, 334.0, 482.0, 460.0, 429.0, 327.0, 282.0, 175.0, 154.0, 106.0, 96.0, 60.0, 52.0, 39.0, 21.0, 13.0, 10.0, 5.0, 3.0, 2.0, 4.0, 4.0, 0.0, 2.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-11.015625, -10.721435546875, -10.42724609375, -10.133056640625, -9.8388671875, -9.544677734375, -9.25048828125, -8.956298828125, -8.662109375, -8.367919921875, -8.07373046875, -7.779541015625, -7.4853515625, -7.191162109375, -6.89697265625, -6.602783203125, -6.30859375, -6.014404296875, -5.72021484375, -5.426025390625, -5.1318359375, -4.837646484375, -4.54345703125, -4.249267578125, -3.955078125, -3.660888671875, -3.36669921875, -3.072509765625, -2.7783203125, -2.484130859375, -2.18994140625, -1.895751953125, -1.6015625, -1.307373046875, -1.01318359375, -0.718994140625, -0.4248046875, -0.130615234375, 0.16357421875, 0.457763671875, 0.751953125, 1.046142578125, 1.34033203125, 1.634521484375, 1.9287109375, 2.222900390625, 2.51708984375, 2.811279296875, 3.10546875, 3.399658203125, 3.69384765625, 3.988037109375, 4.2822265625, 4.576416015625, 4.87060546875, 5.164794921875, 5.458984375, 5.753173828125, 6.04736328125, 6.341552734375, 6.6357421875, 6.929931640625, 7.22412109375, 7.518310546875, 7.8125]}, "gradients/decoder.transformer.h.17.mlp.c_fc.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 2.0, 4.0, 8.0, 5.0, 14.0, 15.0, 16.0, 15.0, 46.0, 64.0, 125.0, 192.0, 308.0, 660.0, 1270.0, 2531.0, 6031.0, 18098.0, 67356.0, 315387.0, 1541259.0, 1752836.0, 376725.0, 78225.0, 20682.0, 6984.0, 2715.0, 1247.0, 612.0, 299.0, 228.0, 106.0, 76.0, 40.0, 36.0, 23.0, 18.0, 9.0, 8.0, 5.0, 4.0, 4.0, 1.0, 2.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-8.890625, -8.5357666015625, -8.180908203125, -7.8260498046875, -7.47119140625, -7.1163330078125, -6.761474609375, -6.4066162109375, -6.0517578125, -5.6968994140625, -5.342041015625, -4.9871826171875, -4.63232421875, -4.2774658203125, -3.922607421875, -3.5677490234375, -3.212890625, -2.8580322265625, -2.503173828125, -2.1483154296875, -1.79345703125, -1.4385986328125, -1.083740234375, -0.7288818359375, -0.3740234375, -0.0191650390625, 0.335693359375, 0.6905517578125, 1.04541015625, 1.4002685546875, 1.755126953125, 2.1099853515625, 2.46484375, 2.8197021484375, 3.174560546875, 3.5294189453125, 3.88427734375, 4.2391357421875, 4.593994140625, 4.9488525390625, 5.3037109375, 5.6585693359375, 6.013427734375, 6.3682861328125, 6.72314453125, 7.0780029296875, 7.432861328125, 7.7877197265625, 8.142578125, 8.4974365234375, 8.852294921875, 9.2071533203125, 9.56201171875, 9.9168701171875, 10.271728515625, 10.6265869140625, 10.9814453125, 11.3363037109375, 11.691162109375, 12.0460205078125, 12.40087890625, 12.7557373046875, 13.110595703125, 13.4654541015625, 13.8203125]}, "gradients/decoder.transformer.h.17.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 7.0, 16.0, 68.0, 180.0, 332.0, 265.0, 119.0, 24.0, 3.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-234.86798095703125, -230.33230590820312, -225.79661560058594, -221.2609405517578, -216.7252655029297, -212.1895751953125, -207.65390014648438, -203.11822509765625, -198.58253479003906, -194.04685974121094, -189.51116943359375, -184.97549438476562, -180.4398193359375, -175.9041290283203, -171.3684539794922, -166.83277893066406, -162.29708862304688, -157.76141357421875, -153.22572326660156, -148.69004821777344, -144.1543731689453, -139.61868286132812, -135.0830078125, -130.54733276367188, -126.01165771484375, -121.4759750366211, -116.94029998779297, -112.40461730957031, -107.86893463134766, -103.333251953125, -98.79757690429688, -94.26189422607422, -89.72621154785156, -85.1905288696289, -80.65485382080078, -76.11917114257812, -71.58348846435547, -67.04780578613281, -62.51213073730469, -57.97644805908203, -53.440765380859375, -48.905086517333984, -44.36940383911133, -39.83372497558594, -35.29804229736328, -30.76236343383789, -26.2266845703125, -21.691003799438477, -17.155323028564453, -12.61964225769043, -8.083962440490723, -3.5482826232910156, 0.9873981475830078, 5.523078918457031, 10.058757781982422, 14.594438552856445, 19.13011932373047, 23.665800094604492, 28.201480865478516, 32.737159729003906, 37.27284240722656, 41.80852127075195, 46.344200134277344, 50.8798828125, 55.41556167602539]}, "gradients/decoder.transformer.h.17.ln_2.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 2.0, 1.0, 1.0, 4.0, 4.0, 4.0, 4.0, 2.0, 6.0, 5.0, 4.0, 11.0, 11.0, 10.0, 12.0, 14.0, 19.0, 20.0, 17.0, 19.0, 24.0, 29.0, 32.0, 25.0, 28.0, 42.0, 33.0, 26.0, 32.0, 30.0, 48.0, 41.0, 48.0, 32.0, 30.0, 38.0, 27.0, 21.0, 31.0, 30.0, 20.0, 25.0, 19.0, 11.0, 12.0, 13.0, 15.0, 17.0, 18.0, 15.0, 3.0, 6.0, 6.0, 13.0, 2.0, 3.0, 1.0, 3.0, 1.0, 0.0, 1.0], "bins": [-27.68250846862793, -26.85538101196289, -26.02825164794922, -25.201122283935547, -24.373994827270508, -23.54686737060547, -22.719738006591797, -21.892608642578125, -21.065481185913086, -20.238353729248047, -19.411224365234375, -18.584095001220703, -17.756967544555664, -16.929840087890625, -16.102710723876953, -15.275582313537598, -14.448453903198242, -13.621325492858887, -12.794197082519531, -11.967068672180176, -11.13994026184082, -10.312811851501465, -9.48568344116211, -8.658555030822754, -7.831426620483398, -7.004298210144043, -6.1771697998046875, -5.350041389465332, -4.522912979125977, -3.695784568786621, -2.8686561584472656, -2.04152774810791, -1.2143993377685547, -0.3872709274291992, 0.43985748291015625, 1.2669858932495117, 2.094114303588867, 2.9212427139282227, 3.748371124267578, 4.575499534606934, 5.402627944946289, 6.2297563552856445, 7.056884765625, 7.8840131759643555, 8.711141586303711, 9.538269996643066, 10.365398406982422, 11.192526817321777, 12.019655227661133, 12.846783638000488, 13.673912048339844, 14.5010404586792, 15.328168869018555, 16.155296325683594, 16.982425689697266, 17.809555053710938, 18.636682510375977, 19.463809967041016, 20.290939331054688, 21.11806869506836, 21.9451961517334, 22.772323608398438, 23.59945297241211, 24.42658233642578, 25.25370979309082]}, "gradients/decoder.transformer.h.17.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 4.0, 4.0, 8.0, 4.0, 5.0, 12.0, 13.0, 11.0, 10.0, 22.0, 24.0, 18.0, 25.0, 26.0, 40.0, 37.0, 35.0, 42.0, 40.0, 50.0, 41.0, 44.0, 45.0, 49.0, 50.0, 46.0, 38.0, 43.0, 35.0, 26.0, 28.0, 27.0, 24.0, 13.0, 17.0, 16.0, 12.0, 7.0, 9.0, 5.0, 0.0, 2.0, 2.0, 1.0, 5.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.05859375, -3.93017578125, -3.8017578125, -3.67333984375, -3.544921875, -3.41650390625, -3.2880859375, -3.15966796875, -3.03125, -2.90283203125, -2.7744140625, -2.64599609375, -2.517578125, -2.38916015625, -2.2607421875, -2.13232421875, -2.00390625, -1.87548828125, -1.7470703125, -1.61865234375, -1.490234375, -1.36181640625, -1.2333984375, -1.10498046875, -0.9765625, -0.84814453125, -0.7197265625, -0.59130859375, -0.462890625, -0.33447265625, -0.2060546875, -0.07763671875, 0.05078125, 0.17919921875, 0.3076171875, 0.43603515625, 0.564453125, 0.69287109375, 0.8212890625, 0.94970703125, 1.078125, 1.20654296875, 1.3349609375, 1.46337890625, 1.591796875, 1.72021484375, 1.8486328125, 1.97705078125, 2.10546875, 2.23388671875, 2.3623046875, 2.49072265625, 2.619140625, 2.74755859375, 2.8759765625, 3.00439453125, 3.1328125, 3.26123046875, 3.3896484375, 3.51806640625, 3.646484375, 3.77490234375, 3.9033203125, 4.03173828125, 4.16015625]}, "gradients/decoder.transformer.h.17.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 0.0, 0.0, 2.0, 2.0, 3.0, 3.0, 4.0, 5.0, 12.0, 12.0, 24.0, 38.0, 55.0, 88.0, 158.0, 235.0, 371.0, 679.0, 1164.0, 1899.0, 3308.0, 5848.0, 10249.0, 18242.0, 32284.0, 58922.0, 101150.0, 160055.0, 204432.0, 175718.0, 116654.0, 68767.0, 38539.0, 21671.0, 11909.0, 6712.0, 3766.0, 2261.0, 1307.0, 796.0, 482.0, 282.0, 172.0, 109.0, 65.0, 47.0, 19.0, 26.0, 7.0, 6.0, 3.0, 4.0, 2.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.64404296875, -0.623687744140625, -0.60333251953125, -0.582977294921875, -0.5626220703125, -0.542266845703125, -0.52191162109375, -0.501556396484375, -0.481201171875, -0.460845947265625, -0.44049072265625, -0.420135498046875, -0.3997802734375, -0.379425048828125, -0.35906982421875, -0.338714599609375, -0.318359375, -0.298004150390625, -0.27764892578125, -0.257293701171875, -0.2369384765625, -0.216583251953125, -0.19622802734375, -0.175872802734375, -0.155517578125, -0.135162353515625, -0.11480712890625, -0.094451904296875, -0.0740966796875, -0.053741455078125, -0.03338623046875, -0.013031005859375, 0.00732421875, 0.027679443359375, 0.04803466796875, 0.068389892578125, 0.0887451171875, 0.109100341796875, 0.12945556640625, 0.149810791015625, 0.170166015625, 0.190521240234375, 0.21087646484375, 0.231231689453125, 0.2515869140625, 0.271942138671875, 0.29229736328125, 0.312652587890625, 0.3330078125, 0.353363037109375, 0.37371826171875, 0.394073486328125, 0.4144287109375, 0.434783935546875, 0.45513916015625, 0.475494384765625, 0.495849609375, 0.516204833984375, 0.53656005859375, 0.556915283203125, 0.5772705078125, 0.597625732421875, 0.61798095703125, 0.638336181640625, 0.65869140625]}, "gradients/decoder.transformer.h.17.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 2.0, 2.0, 0.0, 1.0, 3.0, 6.0, 2.0, 9.0, 6.0, 9.0, 15.0, 15.0, 14.0, 14.0, 13.0, 23.0, 32.0, 31.0, 40.0, 25.0, 46.0, 43.0, 42.0, 40.0, 43.0, 38.0, 1065.0, 31.0, 47.0, 30.0, 30.0, 33.0, 51.0, 30.0, 26.0, 28.0, 18.0, 16.0, 20.0, 13.0, 15.0, 16.0, 6.0, 8.0, 6.0, 10.0, 11.0, 8.0, 4.0, 4.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0], "bins": [-2.4375, -2.35711669921875, -2.2767333984375, -2.19635009765625, -2.115966796875, -2.03558349609375, -1.9552001953125, -1.87481689453125, -1.79443359375, -1.71405029296875, -1.6336669921875, -1.55328369140625, -1.472900390625, -1.39251708984375, -1.3121337890625, -1.23175048828125, -1.1513671875, -1.07098388671875, -0.9906005859375, -0.91021728515625, -0.829833984375, -0.74945068359375, -0.6690673828125, -0.58868408203125, -0.50830078125, -0.42791748046875, -0.3475341796875, -0.26715087890625, -0.186767578125, -0.10638427734375, -0.0260009765625, 0.05438232421875, 0.134765625, 0.21514892578125, 0.2955322265625, 0.37591552734375, 0.456298828125, 0.53668212890625, 0.6170654296875, 0.69744873046875, 0.77783203125, 0.85821533203125, 0.9385986328125, 1.01898193359375, 1.099365234375, 1.17974853515625, 1.2601318359375, 1.34051513671875, 1.4208984375, 1.50128173828125, 1.5816650390625, 1.66204833984375, 1.742431640625, 1.82281494140625, 1.9031982421875, 1.98358154296875, 2.06396484375, 2.14434814453125, 2.2247314453125, 2.30511474609375, 2.385498046875, 2.46588134765625, 2.5462646484375, 2.62664794921875, 2.70703125]}, "gradients/decoder.transformer.h.17.crossattention.c_attn.weight": {"_type": "histogram", "values": [3.0, 1.0, 3.0, 2.0, 4.0, 4.0, 10.0, 16.0, 23.0, 31.0, 40.0, 88.0, 107.0, 133.0, 237.0, 342.0, 541.0, 818.0, 1220.0, 2018.0, 2935.0, 4646.0, 7069.0, 10953.0, 16762.0, 26959.0, 43398.0, 68646.0, 106013.0, 152190.0, 1210668.0, 150465.0, 105201.0, 68082.0, 42899.0, 26947.0, 16825.0, 10747.0, 7087.0, 4537.0, 3019.0, 1937.0, 1206.0, 788.0, 513.0, 354.0, 229.0, 143.0, 115.0, 59.0, 32.0, 25.0, 19.0, 15.0, 10.0, 4.0, 4.0, 6.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.318603515625, -0.3081550598144531, -0.29770660400390625, -0.2872581481933594, -0.2768096923828125, -0.2663612365722656, -0.25591278076171875, -0.24546432495117188, -0.235015869140625, -0.22456741333007812, -0.21411895751953125, -0.20367050170898438, -0.1932220458984375, -0.18277359008789062, -0.17232513427734375, -0.16187667846679688, -0.15142822265625, -0.14097976684570312, -0.13053131103515625, -0.12008285522460938, -0.1096343994140625, -0.09918594360351562, -0.08873748779296875, -0.07828903198242188, -0.067840576171875, -0.057392120361328125, -0.04694366455078125, -0.036495208740234375, -0.0260467529296875, -0.015598297119140625, -0.00514984130859375, 0.005298614501953125, 0.0157470703125, 0.026195526123046875, 0.03664398193359375, 0.047092437744140625, 0.0575408935546875, 0.06798934936523438, 0.07843780517578125, 0.08888626098632812, 0.099334716796875, 0.10978317260742188, 0.12023162841796875, 0.13068008422851562, 0.1411285400390625, 0.15157699584960938, 0.16202545166015625, 0.17247390747070312, 0.18292236328125, 0.19337081909179688, 0.20381927490234375, 0.21426773071289062, 0.2247161865234375, 0.23516464233398438, 0.24561309814453125, 0.2560615539550781, 0.266510009765625, 0.2769584655761719, 0.28740692138671875, 0.2978553771972656, 0.3083038330078125, 0.3187522888183594, 0.32920074462890625, 0.3396492004394531, 0.35009765625]}, "gradients/decoder.transformer.h.17.crossattention.q_attn.bias": {"_type": "histogram", "values": [2.0, 2.0, 2.0, 2.0, 1.0, 1.0, 4.0, 1.0, 6.0, 5.0, 4.0, 3.0, 7.0, 14.0, 7.0, 12.0, 20.0, 11.0, 15.0, 16.0, 17.0, 21.0, 28.0, 33.0, 40.0, 43.0, 48.0, 54.0, 64.0, 64.0, 62.0, 50.0, 36.0, 42.0, 38.0, 39.0, 27.0, 15.0, 23.0, 23.0, 19.0, 16.0, 15.0, 13.0, 10.0, 7.0, 6.0, 5.0, 4.0, 3.0, 4.0, 7.0, 1.0, 2.0, 2.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 2.0, 1.0], "bins": [-0.01137542724609375, -0.010986566543579102, -0.010597705841064453, -0.010208845138549805, -0.009819984436035156, -0.009431123733520508, -0.00904226303100586, -0.008653402328491211, -0.008264541625976562, -0.007875680923461914, -0.007486820220947266, -0.007097959518432617, -0.006709098815917969, -0.00632023811340332, -0.005931377410888672, -0.0055425167083740234, -0.005153656005859375, -0.0047647953033447266, -0.004375934600830078, -0.00398707389831543, -0.0035982131958007812, -0.003209352493286133, -0.0028204917907714844, -0.002431631088256836, -0.0020427703857421875, -0.001653909683227539, -0.0012650489807128906, -0.0008761882781982422, -0.00048732757568359375, -9.846687316894531e-05, 0.0002903938293457031, 0.0006792545318603516, 0.001068115234375, 0.0014569759368896484, 0.0018458366394042969, 0.0022346973419189453, 0.0026235580444335938, 0.003012418746948242, 0.0034012794494628906, 0.003790140151977539, 0.0041790008544921875, 0.004567861557006836, 0.004956722259521484, 0.005345582962036133, 0.005734443664550781, 0.00612330436706543, 0.006512165069580078, 0.0069010257720947266, 0.007289886474609375, 0.0076787471771240234, 0.008067607879638672, 0.00845646858215332, 0.008845329284667969, 0.009234189987182617, 0.009623050689697266, 0.010011911392211914, 0.010400772094726562, 0.010789632797241211, 0.01117849349975586, 0.011567354202270508, 0.011956214904785156, 0.012345075607299805, 0.012733936309814453, 0.013122797012329102, 0.01351165771484375]}, "gradients/decoder.transformer.h.17.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 4.0, 6.0, 3.0, 6.0, 4.0, 5.0, 5.0, 8.0, 16.0, 16.0, 28.0, 22.0, 38.0, 48.0, 49.0, 71.0, 124.0, 166.0, 245.0, 293.0, 521.0, 2312.0, 146329.0, 889387.0, 6799.0, 703.0, 407.0, 249.0, 200.0, 135.0, 86.0, 57.0, 48.0, 25.0, 35.0, 26.0, 12.0, 16.0, 18.0, 4.0, 8.0, 5.0, 4.0, 6.0, 4.0, 1.0, 2.0, 0.0, 3.0, 0.0, 3.0, 1.0, 2.0], "bins": [-0.2261962890625, -0.2195758819580078, -0.21295547485351562, -0.20633506774902344, -0.19971466064453125, -0.19309425354003906, -0.18647384643554688, -0.1798534393310547, -0.1732330322265625, -0.1666126251220703, -0.15999221801757812, -0.15337181091308594, -0.14675140380859375, -0.14013099670410156, -0.13351058959960938, -0.1268901824951172, -0.120269775390625, -0.11364936828613281, -0.10702896118164062, -0.10040855407714844, -0.09378814697265625, -0.08716773986816406, -0.08054733276367188, -0.07392692565917969, -0.0673065185546875, -0.06068611145019531, -0.054065704345703125, -0.04744529724121094, -0.04082489013671875, -0.03420448303222656, -0.027584075927734375, -0.020963668823242188, -0.01434326171875, -0.0077228546142578125, -0.001102447509765625, 0.0055179595947265625, 0.01213836669921875, 0.018758773803710938, 0.025379180908203125, 0.03199958801269531, 0.0386199951171875, 0.04524040222167969, 0.051860809326171875, 0.05848121643066406, 0.06510162353515625, 0.07172203063964844, 0.07834243774414062, 0.08496284484863281, 0.091583251953125, 0.09820365905761719, 0.10482406616210938, 0.11144447326660156, 0.11806488037109375, 0.12468528747558594, 0.13130569458007812, 0.1379261016845703, 0.1445465087890625, 0.1511669158935547, 0.15778732299804688, 0.16440773010253906, 0.17102813720703125, 0.17764854431152344, 0.18426895141601562, 0.1908893585205078, 0.197509765625]}, "gradients/decoder.transformer.h.17.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 6.0, 53.0, 731.0, 217.0, 8.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.12403152137994766, -0.11883340030908585, -0.11363527923822403, -0.10843715816736221, -0.1032390370965004, -0.09804091602563858, -0.09284279495477676, -0.08764467388391495, -0.08244655281305313, -0.07724843174219131, -0.0720503106713295, -0.06685218960046768, -0.061654068529605865, -0.05645594745874405, -0.05125782638788223, -0.046059705317020416, -0.0408615842461586, -0.03566346317529678, -0.030465342104434967, -0.02526722103357315, -0.020069099962711334, -0.014870978891849518, -0.009672857820987701, -0.004474736750125885, 0.0007233843207359314, 0.005921505391597748, 0.011119626462459564, 0.01631774753332138, 0.021515868604183197, 0.026713989675045013, 0.03191211074590683, 0.037110231816768646, 0.042308345437049866, 0.04750646650791168, 0.0527045875787735, 0.057902708649635315, 0.06310082972049713, 0.06829895079135895, 0.07349707186222076, 0.07869519293308258, 0.0838933140039444, 0.08909143507480621, 0.09428955614566803, 0.09948767721652985, 0.10468579828739166, 0.10988391935825348, 0.1150820404291153, 0.12028016149997711, 0.12547828257083893, 0.13067640364170074, 0.13587452471256256, 0.14107264578342438, 0.1462707668542862, 0.151468887925148, 0.15666700899600983, 0.16186513006687164, 0.16706325113773346, 0.17226137220859528, 0.1774594932794571, 0.1826576143503189, 0.18785573542118073, 0.19305385649204254, 0.19825197756290436, 0.20345009863376617, 0.208648219704628]}, "gradients/decoder.transformer.h.17.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 2.0, 2.0, 3.0, 5.0, 2.0, 9.0, 6.0, 10.0, 10.0, 10.0, 13.0, 15.0, 17.0, 28.0, 23.0, 29.0, 29.0, 21.0, 31.0, 29.0, 37.0, 45.0, 37.0, 38.0, 46.0, 39.0, 39.0, 39.0, 34.0, 34.0, 47.0, 33.0, 29.0, 30.0, 30.0, 28.0, 18.0, 17.0, 17.0, 11.0, 12.0, 16.0, 8.0, 7.0, 6.0, 6.0, 5.0, 3.0, 5.0, 3.0, 1.0, 3.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.018207669258117676, -0.017568517476320267, -0.01692936383187771, -0.01629021018743515, -0.01565105840563774, -0.015011905692517757, -0.014372752979397774, -0.01373360026627779, -0.013094447553157806, -0.012455294840037823, -0.011816142126917839, -0.011176989413797855, -0.010537836700677872, -0.009898683987557888, -0.009259531274437904, -0.00862037856131792, -0.007981225848197937, -0.007342073135077953, -0.00670292042195797, -0.006063767708837986, -0.005424614995718002, -0.004785462282598019, -0.004146309569478035, -0.0035071568563580513, -0.0028680041432380676, -0.002228851430118084, -0.0015896987169981003, -0.0009505460038781166, -0.00031139329075813293, 0.00032775942236185074, 0.0009669121354818344, 0.001606064848601818, 0.0022452175617218018, 0.0028843702748417854, 0.003523522987961769, 0.004162675701081753, 0.0048018284142017365, 0.00544098112732172, 0.006080133840441704, 0.0067192865535616875, 0.007358439266681671, 0.007997591979801655, 0.008636744692921638, 0.009275897406041622, 0.009915050119161606, 0.01055420283228159, 0.011193355545401573, 0.011832508258521557, 0.01247166097164154, 0.013110813684761524, 0.013749966397881508, 0.014389119111001492, 0.015028271824121475, 0.015667423605918884, 0.016306577250361443, 0.016945730894804, 0.01758488267660141, 0.01822403445839882, 0.018863188102841377, 0.019502341747283936, 0.020141493529081345, 0.020780645310878754, 0.021419798955321312, 0.02205895259976387, 0.02269810438156128]}, "gradients/decoder.transformer.h.17.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 3.0, 4.0, 8.0, 4.0, 6.0, 11.0, 13.0, 11.0, 10.0, 22.0, 24.0, 18.0, 25.0, 26.0, 40.0, 38.0, 34.0, 42.0, 41.0, 49.0, 41.0, 45.0, 44.0, 49.0, 50.0, 46.0, 38.0, 43.0, 35.0, 26.0, 28.0, 27.0, 24.0, 13.0, 17.0, 16.0, 12.0, 7.0, 9.0, 5.0, 0.0, 2.0, 2.0, 1.0, 5.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.05859375, -3.93017578125, -3.8017578125, -3.67333984375, -3.544921875, -3.41650390625, -3.2880859375, -3.15966796875, -3.03125, -2.90283203125, -2.7744140625, -2.64599609375, -2.517578125, -2.38916015625, -2.2607421875, -2.13232421875, -2.00390625, -1.87548828125, -1.7470703125, -1.61865234375, -1.490234375, -1.36181640625, -1.2333984375, -1.10498046875, -0.9765625, -0.84814453125, -0.7197265625, -0.59130859375, -0.462890625, -0.33447265625, -0.2060546875, -0.07763671875, 0.05078125, 0.17919921875, 0.3076171875, 0.43603515625, 0.564453125, 0.69287109375, 0.8212890625, 0.94970703125, 1.078125, 1.20654296875, 1.3349609375, 1.46337890625, 1.591796875, 1.72021484375, 1.8486328125, 1.97705078125, 2.10546875, 2.23388671875, 2.3623046875, 2.49072265625, 2.619140625, 2.74755859375, 2.8759765625, 3.00439453125, 3.1328125, 3.26123046875, 3.3896484375, 3.51806640625, 3.646484375, 3.77490234375, 3.9033203125, 4.03173828125, 4.16015625]}, "gradients/decoder.transformer.h.17.attn.c_proj.weight": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 1.0, 1.0, 3.0, 5.0, 5.0, 7.0, 7.0, 12.0, 21.0, 20.0, 37.0, 46.0, 80.0, 91.0, 90.0, 166.0, 208.0, 317.0, 476.0, 861.0, 1349.0, 2166.0, 3700.0, 6059.0, 10333.0, 18444.0, 39120.0, 102728.0, 298585.0, 343346.0, 125310.0, 45523.0, 20865.0, 11462.0, 6543.0, 4001.0, 2402.0, 1461.0, 931.0, 599.0, 373.0, 248.0, 178.0, 114.0, 72.0, 55.0, 43.0, 35.0, 12.0, 14.0, 15.0, 6.0, 9.0, 4.0, 4.0, 2.0, 2.0, 2.0, 2.0, 0.0, 1.0], "bins": [-4.171875, -4.0418701171875, -3.911865234375, -3.7818603515625, -3.65185546875, -3.5218505859375, -3.391845703125, -3.2618408203125, -3.1318359375, -3.0018310546875, -2.871826171875, -2.7418212890625, -2.61181640625, -2.4818115234375, -2.351806640625, -2.2218017578125, -2.091796875, -1.9617919921875, -1.831787109375, -1.7017822265625, -1.57177734375, -1.4417724609375, -1.311767578125, -1.1817626953125, -1.0517578125, -0.9217529296875, -0.791748046875, -0.6617431640625, -0.53173828125, -0.4017333984375, -0.271728515625, -0.1417236328125, -0.01171875, 0.1182861328125, 0.248291015625, 0.3782958984375, 0.50830078125, 0.6383056640625, 0.768310546875, 0.8983154296875, 1.0283203125, 1.1583251953125, 1.288330078125, 1.4183349609375, 1.54833984375, 1.6783447265625, 1.808349609375, 1.9383544921875, 2.068359375, 2.1983642578125, 2.328369140625, 2.4583740234375, 2.58837890625, 2.7183837890625, 2.848388671875, 2.9783935546875, 3.1083984375, 3.2384033203125, 3.368408203125, 3.4984130859375, 3.62841796875, 3.7584228515625, 3.888427734375, 4.0184326171875, 4.1484375]}, "gradients/decoder.transformer.h.17.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 1.0, 1.0, 2.0, 1.0, 2.0, 4.0, 1.0, 4.0, 11.0, 8.0, 12.0, 14.0, 19.0, 19.0, 31.0, 27.0, 32.0, 41.0, 47.0, 61.0, 63.0, 110.0, 239.0, 1526.0, 257.0, 101.0, 69.0, 50.0, 47.0, 47.0, 31.0, 40.0, 30.0, 24.0, 14.0, 13.0, 13.0, 14.0, 13.0, 5.0, 7.0, 2.0, 4.0, 4.0, 2.0, 1.0, 1.0, 1.0, 0.0, 1.0], "bins": [-18.234375, -17.74853515625, -17.2626953125, -16.77685546875, -16.291015625, -15.80517578125, -15.3193359375, -14.83349609375, -14.34765625, -13.86181640625, -13.3759765625, -12.89013671875, -12.404296875, -11.91845703125, -11.4326171875, -10.94677734375, -10.4609375, -9.97509765625, -9.4892578125, -9.00341796875, -8.517578125, -8.03173828125, -7.5458984375, -7.06005859375, -6.57421875, -6.08837890625, -5.6025390625, -5.11669921875, -4.630859375, -4.14501953125, -3.6591796875, -3.17333984375, -2.6875, -2.20166015625, -1.7158203125, -1.22998046875, -0.744140625, -0.25830078125, 0.2275390625, 0.71337890625, 1.19921875, 1.68505859375, 2.1708984375, 2.65673828125, 3.142578125, 3.62841796875, 4.1142578125, 4.60009765625, 5.0859375, 5.57177734375, 6.0576171875, 6.54345703125, 7.029296875, 7.51513671875, 8.0009765625, 8.48681640625, 8.97265625, 9.45849609375, 9.9443359375, 10.43017578125, 10.916015625, 11.40185546875, 11.8876953125, 12.37353515625, 12.859375]}, "gradients/decoder.transformer.h.17.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 1.0, 0.0, 1.0, 2.0, 7.0, 4.0, 6.0, 7.0, 6.0, 9.0, 17.0, 21.0, 25.0, 30.0, 52.0, 62.0, 106.0, 175.0, 259.0, 526.0, 2055.0, 56300.0, 3051405.0, 31826.0, 1630.0, 478.0, 247.0, 139.0, 81.0, 65.0, 63.0, 25.0, 21.0, 12.0, 11.0, 9.0, 6.0, 10.0, 4.0, 5.0, 5.0, 2.0, 0.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-38.5625, -37.4091796875, -36.255859375, -35.1025390625, -33.94921875, -32.7958984375, -31.642578125, -30.4892578125, -29.3359375, -28.1826171875, -27.029296875, -25.8759765625, -24.72265625, -23.5693359375, -22.416015625, -21.2626953125, -20.109375, -18.9560546875, -17.802734375, -16.6494140625, -15.49609375, -14.3427734375, -13.189453125, -12.0361328125, -10.8828125, -9.7294921875, -8.576171875, -7.4228515625, -6.26953125, -5.1162109375, -3.962890625, -2.8095703125, -1.65625, -0.5029296875, 0.650390625, 1.8037109375, 2.95703125, 4.1103515625, 5.263671875, 6.4169921875, 7.5703125, 8.7236328125, 9.876953125, 11.0302734375, 12.18359375, 13.3369140625, 14.490234375, 15.6435546875, 16.796875, 17.9501953125, 19.103515625, 20.2568359375, 21.41015625, 22.5634765625, 23.716796875, 24.8701171875, 26.0234375, 27.1767578125, 28.330078125, 29.4833984375, 30.63671875, 31.7900390625, 32.943359375, 34.0966796875, 35.25]}, "gradients/decoder.transformer.h.17.ln_1.weight": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 4.0, 14.0, 33.0, 85.0, 191.0, 203.0, 225.0, 143.0, 62.0, 40.0, 11.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-19.109067916870117, -17.74233627319336, -16.37560272216797, -15.008871078491211, -13.642139434814453, -12.275406837463379, -10.908674240112305, -9.541942596435547, -8.175209999084473, -6.808477878570557, -5.441745758056641, -4.075013160705566, -2.7082810401916504, -1.3415489196777344, 0.025183677673339844, 1.3919153213500977, 2.758647918701172, 4.125380039215088, 5.492112159729004, 6.858844757080078, 8.225576400756836, 9.59230899810791, 10.959041595458984, 12.325773239135742, 13.692505836486816, 15.05923843383789, 16.42597007751465, 17.792701721191406, 19.159435272216797, 20.526166915893555, 21.892898559570312, 23.259632110595703, 24.626361846923828, 25.993093490600586, 27.359827041625977, 28.726558685302734, 30.093290328979492, 31.46002197265625, 32.82675552368164, 34.19348907470703, 35.560218811035156, 36.92695236206055, 38.29368209838867, 39.66041564941406, 41.02714920043945, 42.39387893676758, 43.76061248779297, 45.127342224121094, 46.49407958984375, 47.86081314086914, 49.227542877197266, 50.594276428222656, 51.96100997924805, 53.32773971557617, 54.69447326660156, 56.06120300292969, 57.42793655395508, 58.79467010498047, 60.161399841308594, 61.528133392333984, 62.894866943359375, 64.2615966796875, 65.62832641601562, 66.99506378173828, 68.3617935180664]}, "gradients/decoder.transformer.h.17.ln_1.bias": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 3.0, 2.0, 2.0, 2.0, 0.0, 0.0, 1.0, 11.0, 3.0, 13.0, 18.0, 13.0, 21.0, 16.0, 24.0, 19.0, 15.0, 29.0, 24.0, 27.0, 35.0, 31.0, 37.0, 31.0, 41.0, 49.0, 37.0, 48.0, 36.0, 31.0, 32.0, 41.0, 36.0, 34.0, 30.0, 30.0, 28.0, 22.0, 16.0, 21.0, 16.0, 14.0, 19.0, 9.0, 10.0, 5.0, 7.0, 5.0, 7.0, 4.0, 3.0, 4.0, 2.0, 1.0, 1.0, 2.0, 0.0, 1.0], "bins": [-33.74267578125, -32.720741271972656, -31.698808670043945, -30.676876068115234, -29.65494155883789, -28.63300895690918, -27.61107635498047, -26.589141845703125, -25.56720733642578, -24.54527473449707, -23.523340225219727, -22.501407623291016, -21.479473114013672, -20.45754051208496, -19.43560791015625, -18.413673400878906, -17.391740798950195, -16.369808197021484, -15.34787368774414, -14.32594108581543, -13.304006576538086, -12.282073974609375, -11.260140419006348, -10.23820686340332, -9.216273307800293, -8.194339752197266, -7.172406196594238, -6.150473117828369, -5.128539562225342, -4.1066060066223145, -3.0846729278564453, -2.062739372253418, -1.0408058166503906, -0.018872380256652832, 1.003061056137085, 2.024994373321533, 3.0469279289245605, 4.068861484527588, 5.090794563293457, 6.112728118896484, 7.134661674499512, 8.156595230102539, 9.178528785705566, 10.200462341308594, 11.222394943237305, 12.244329452514648, 13.26626205444336, 14.288195610046387, 15.310129165649414, 16.332061767578125, 17.35399627685547, 18.37592887878418, 19.397863388061523, 20.419795989990234, 21.441730499267578, 22.46366310119629, 23.485595703125, 24.50752830505371, 25.529462814331055, 26.551395416259766, 27.57332992553711, 28.59526252746582, 29.61719512939453, 30.639129638671875, 31.66106414794922]}, "gradients/decoder.transformer.h.16.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0, 5.0, 3.0, 3.0, 7.0, 8.0, 13.0, 9.0, 11.0, 8.0, 22.0, 22.0, 25.0, 26.0, 29.0, 34.0, 41.0, 29.0, 46.0, 41.0, 40.0, 47.0, 45.0, 53.0, 41.0, 48.0, 37.0, 41.0, 35.0, 36.0, 39.0, 35.0, 24.0, 19.0, 18.0, 12.0, 23.0, 8.0, 10.0, 6.0, 1.0, 4.0, 6.0, 3.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-4.24609375, -4.112060546875, -3.97802734375, -3.843994140625, -3.7099609375, -3.575927734375, -3.44189453125, -3.307861328125, -3.173828125, -3.039794921875, -2.90576171875, -2.771728515625, -2.6376953125, -2.503662109375, -2.36962890625, -2.235595703125, -2.1015625, -1.967529296875, -1.83349609375, -1.699462890625, -1.5654296875, -1.431396484375, -1.29736328125, -1.163330078125, -1.029296875, -0.895263671875, -0.76123046875, -0.627197265625, -0.4931640625, -0.359130859375, -0.22509765625, -0.091064453125, 0.04296875, 0.177001953125, 0.31103515625, 0.445068359375, 0.5791015625, 0.713134765625, 0.84716796875, 0.981201171875, 1.115234375, 1.249267578125, 1.38330078125, 1.517333984375, 1.6513671875, 1.785400390625, 1.91943359375, 2.053466796875, 2.1875, 2.321533203125, 2.45556640625, 2.589599609375, 2.7236328125, 2.857666015625, 2.99169921875, 3.125732421875, 3.259765625, 3.393798828125, 3.52783203125, 3.661865234375, 3.7958984375, 3.929931640625, 4.06396484375, 4.197998046875, 4.33203125]}, "gradients/decoder.transformer.h.16.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 3.0, 6.0, 5.0, 5.0, 13.0, 9.0, 14.0, 14.0, 16.0, 27.0, 25.0, 42.0, 35.0, 59.0, 86.0, 108.0, 224.0, 636.0, 2439.0, 12239.0, 163594.0, 2868391.0, 1092932.0, 45819.0, 5412.0, 1236.0, 370.0, 152.0, 98.0, 48.0, 35.0, 49.0, 34.0, 20.0, 21.0, 23.0, 17.0, 13.0, 9.0, 3.0, 8.0, 1.0, 2.0, 2.0, 1.0, 1.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-15.5703125, -15.0953369140625, -14.620361328125, -14.1453857421875, -13.67041015625, -13.1954345703125, -12.720458984375, -12.2454833984375, -11.7705078125, -11.2955322265625, -10.820556640625, -10.3455810546875, -9.87060546875, -9.3956298828125, -8.920654296875, -8.4456787109375, -7.970703125, -7.4957275390625, -7.020751953125, -6.5457763671875, -6.07080078125, -5.5958251953125, -5.120849609375, -4.6458740234375, -4.1708984375, -3.6959228515625, -3.220947265625, -2.7459716796875, -2.27099609375, -1.7960205078125, -1.321044921875, -0.8460693359375, -0.37109375, 0.1038818359375, 0.578857421875, 1.0538330078125, 1.52880859375, 2.0037841796875, 2.478759765625, 2.9537353515625, 3.4287109375, 3.9036865234375, 4.378662109375, 4.8536376953125, 5.32861328125, 5.8035888671875, 6.278564453125, 6.7535400390625, 7.228515625, 7.7034912109375, 8.178466796875, 8.6534423828125, 9.12841796875, 9.6033935546875, 10.078369140625, 10.5533447265625, 11.0283203125, 11.5032958984375, 11.978271484375, 12.4532470703125, 12.92822265625, 13.4031982421875, 13.878173828125, 14.3531494140625, 14.828125]}, "gradients/decoder.transformer.h.16.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 3.0, 0.0, 4.0, 3.0, 7.0, 11.0, 20.0, 31.0, 28.0, 34.0, 67.0, 97.0, 145.0, 224.0, 352.0, 412.0, 526.0, 606.0, 447.0, 349.0, 225.0, 154.0, 113.0, 79.0, 50.0, 35.0, 23.0, 19.0, 5.0, 9.0, 5.0, 3.0, 0.0, 3.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-10.765625, -10.382080078125, -9.99853515625, -9.614990234375, -9.2314453125, -8.847900390625, -8.46435546875, -8.080810546875, -7.697265625, -7.313720703125, -6.93017578125, -6.546630859375, -6.1630859375, -5.779541015625, -5.39599609375, -5.012451171875, -4.62890625, -4.245361328125, -3.86181640625, -3.478271484375, -3.0947265625, -2.711181640625, -2.32763671875, -1.944091796875, -1.560546875, -1.177001953125, -0.79345703125, -0.409912109375, -0.0263671875, 0.357177734375, 0.74072265625, 1.124267578125, 1.5078125, 1.891357421875, 2.27490234375, 2.658447265625, 3.0419921875, 3.425537109375, 3.80908203125, 4.192626953125, 4.576171875, 4.959716796875, 5.34326171875, 5.726806640625, 6.1103515625, 6.493896484375, 6.87744140625, 7.260986328125, 7.64453125, 8.028076171875, 8.41162109375, 8.795166015625, 9.1787109375, 9.562255859375, 9.94580078125, 10.329345703125, 10.712890625, 11.096435546875, 11.47998046875, 11.863525390625, 12.2470703125, 12.630615234375, 13.01416015625, 13.397705078125, 13.78125]}, "gradients/decoder.transformer.h.16.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 2.0, 1.0, 2.0, 9.0, 11.0, 8.0, 26.0, 40.0, 48.0, 73.0, 127.0, 225.0, 442.0, 1767.0, 16100.0, 837509.0, 3268172.0, 64608.0, 3676.0, 762.0, 306.0, 128.0, 84.0, 52.0, 37.0, 27.0, 21.0, 12.0, 7.0, 7.0, 1.0, 2.0, 3.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-38.46875, -37.434814453125, -36.40087890625, -35.366943359375, -34.3330078125, -33.299072265625, -32.26513671875, -31.231201171875, -30.197265625, -29.163330078125, -28.12939453125, -27.095458984375, -26.0615234375, -25.027587890625, -23.99365234375, -22.959716796875, -21.92578125, -20.891845703125, -19.85791015625, -18.823974609375, -17.7900390625, -16.756103515625, -15.72216796875, -14.688232421875, -13.654296875, -12.620361328125, -11.58642578125, -10.552490234375, -9.5185546875, -8.484619140625, -7.45068359375, -6.416748046875, -5.3828125, -4.348876953125, -3.31494140625, -2.281005859375, -1.2470703125, -0.213134765625, 0.82080078125, 1.854736328125, 2.888671875, 3.922607421875, 4.95654296875, 5.990478515625, 7.0244140625, 8.058349609375, 9.09228515625, 10.126220703125, 11.16015625, 12.194091796875, 13.22802734375, 14.261962890625, 15.2958984375, 16.329833984375, 17.36376953125, 18.397705078125, 19.431640625, 20.465576171875, 21.49951171875, 22.533447265625, 23.5673828125, 24.601318359375, 25.63525390625, 26.669189453125, 27.703125]}, "gradients/decoder.transformer.h.16.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 3.0, 2.0, 1.0, 4.0, 2.0, 8.0, 6.0, 18.0, 28.0, 38.0, 64.0, 85.0, 93.0, 102.0, 97.0, 93.0, 99.0, 86.0, 63.0, 49.0, 29.0, 17.0, 15.0, 7.0, 4.0, 2.0, 3.0, 2.0], "bins": [-78.19986724853516, -76.66096496582031, -75.1220703125, -73.58316802978516, -72.04427337646484, -70.50537109375, -68.96647644042969, -67.42757415771484, -65.88867950439453, -64.34977722167969, -62.810882568359375, -61.2719841003418, -59.73308563232422, -58.19418716430664, -56.65528869628906, -55.11638641357422, -53.57748794555664, -52.03858947753906, -50.499691009521484, -48.960792541503906, -47.42189407348633, -45.88299560546875, -44.344093322753906, -42.805198669433594, -41.26629638671875, -39.72739791870117, -38.188499450683594, -36.649600982666016, -35.11070251464844, -33.57180404663086, -32.03290557861328, -30.49400520324707, -28.955106735229492, -27.416208267211914, -25.877309799194336, -24.338409423828125, -22.799510955810547, -21.26061248779297, -19.72171401977539, -18.182815551757812, -16.643917083740234, -15.105018615722656, -13.566120147705078, -12.027220726013184, -10.488322257995605, -8.949423789978027, -7.410524368286133, -5.871625900268555, -4.332727432250977, -2.7938287258148193, -1.254930019378662, 0.2839689254760742, 1.8228673934936523, 3.3617658615112305, 4.900665283203125, 6.439563751220703, 7.978462219238281, 9.51736068725586, 11.056259155273438, 12.595158576965332, 14.13405704498291, 15.672955513000488, 17.211854934692383, 18.75075340270996, 20.28965187072754]}, "gradients/decoder.transformer.h.16.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 3.0, 2.0, 3.0, 2.0, 4.0, 6.0, 4.0, 5.0, 11.0, 10.0, 14.0, 18.0, 15.0, 19.0, 27.0, 34.0, 20.0, 35.0, 47.0, 26.0, 52.0, 62.0, 56.0, 50.0, 46.0, 36.0, 42.0, 36.0, 39.0, 49.0, 40.0, 29.0, 29.0, 28.0, 13.0, 19.0, 16.0, 20.0, 11.0, 9.0, 8.0, 10.0, 1.0, 2.0, 4.0, 4.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-32.34918212890625, -31.29689598083496, -30.244611740112305, -29.192325592041016, -28.14004135131836, -27.08775520324707, -26.03546905517578, -24.983184814453125, -23.93090057373047, -22.87861442565918, -21.826330184936523, -20.774044036865234, -19.721759796142578, -18.66947364807129, -17.6171875, -16.564903259277344, -15.512617111206055, -14.460331916809082, -13.40804672241211, -12.35576057434082, -11.303476333618164, -10.251190185546875, -9.198904991149902, -8.14661979675293, -7.094334602355957, -6.042049407958984, -4.989764213562012, -3.937478542327881, -2.885193347930908, -1.8329081535339355, -0.7806224822998047, 0.27166271209716797, 1.3239479064941406, 2.3762331008911133, 3.428518533706665, 4.480803966522217, 5.5330891609191895, 6.585374355316162, 7.637660026550293, 8.689945220947266, 9.742230415344238, 10.794515609741211, 11.846800804138184, 12.899085998535156, 13.951372146606445, 15.003656387329102, 16.05594253540039, 17.108226776123047, 18.160512924194336, 19.212799072265625, 20.26508331298828, 21.31736946105957, 22.369653701782227, 23.421939849853516, 24.474224090576172, 25.52651023864746, 26.57879638671875, 27.63108253479004, 28.683366775512695, 29.735652923583984, 30.78793716430664, 31.84022331237793, 32.89250946044922, 33.944793701171875, 34.99707794189453]}, "gradients/decoder.transformer.h.16.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 2.0, 0.0, 4.0, 1.0, 2.0, 5.0, 4.0, 12.0, 10.0, 12.0, 11.0, 22.0, 22.0, 17.0, 19.0, 30.0, 41.0, 26.0, 49.0, 40.0, 35.0, 42.0, 57.0, 48.0, 38.0, 49.0, 35.0, 48.0, 45.0, 37.0, 41.0, 37.0, 28.0, 25.0, 25.0, 17.0, 21.0, 17.0, 5.0, 6.0, 9.0, 3.0, 7.0, 3.0, 4.0, 2.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-4.25, -4.11309814453125, -3.9761962890625, -3.83929443359375, -3.702392578125, -3.56549072265625, -3.4285888671875, -3.29168701171875, -3.15478515625, -3.01788330078125, -2.8809814453125, -2.74407958984375, -2.607177734375, -2.47027587890625, -2.3333740234375, -2.19647216796875, -2.0595703125, -1.92266845703125, -1.7857666015625, -1.64886474609375, -1.511962890625, -1.37506103515625, -1.2381591796875, -1.10125732421875, -0.96435546875, -0.82745361328125, -0.6905517578125, -0.55364990234375, -0.416748046875, -0.27984619140625, -0.1429443359375, -0.00604248046875, 0.130859375, 0.26776123046875, 0.4046630859375, 0.54156494140625, 0.678466796875, 0.81536865234375, 0.9522705078125, 1.08917236328125, 1.22607421875, 1.36297607421875, 1.4998779296875, 1.63677978515625, 1.773681640625, 1.91058349609375, 2.0474853515625, 2.18438720703125, 2.3212890625, 2.45819091796875, 2.5950927734375, 2.73199462890625, 2.868896484375, 3.00579833984375, 3.1427001953125, 3.27960205078125, 3.41650390625, 3.55340576171875, 3.6903076171875, 3.82720947265625, 3.964111328125, 4.10101318359375, 4.2379150390625, 4.37481689453125, 4.51171875]}, "gradients/decoder.transformer.h.16.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 4.0, 4.0, 8.0, 9.0, 10.0, 16.0, 29.0, 50.0, 61.0, 115.0, 161.0, 211.0, 345.0, 509.0, 843.0, 1288.0, 1755.0, 2768.0, 4110.0, 6179.0, 9962.0, 15331.0, 23556.0, 36199.0, 54550.0, 80436.0, 111413.0, 140670.0, 148442.0, 125480.0, 93276.0, 65763.0, 43322.0, 28741.0, 18458.0, 11789.0, 7862.0, 5000.0, 3323.0, 2158.0, 1485.0, 951.0, 656.0, 406.0, 298.0, 173.0, 150.0, 68.0, 65.0, 39.0, 20.0, 20.0, 10.0, 8.0, 6.0, 4.0, 2.0, 1.0, 3.0, 0.0, 2.0, 0.0, 2.0], "bins": [-0.489501953125, -0.4727516174316406, -0.45600128173828125, -0.4392509460449219, -0.4225006103515625, -0.4057502746582031, -0.38899993896484375, -0.3722496032714844, -0.355499267578125, -0.3387489318847656, -0.32199859619140625, -0.3052482604980469, -0.2884979248046875, -0.2717475891113281, -0.25499725341796875, -0.23824691772460938, -0.22149658203125, -0.20474624633789062, -0.18799591064453125, -0.17124557495117188, -0.1544952392578125, -0.13774490356445312, -0.12099456787109375, -0.10424423217773438, -0.087493896484375, -0.07074356079101562, -0.05399322509765625, -0.037242889404296875, -0.0204925537109375, -0.003742218017578125, 0.01300811767578125, 0.029758453369140625, 0.0465087890625, 0.06325912475585938, 0.08000946044921875, 0.09675979614257812, 0.1135101318359375, 0.13026046752929688, 0.14701080322265625, 0.16376113891601562, 0.180511474609375, 0.19726181030273438, 0.21401214599609375, 0.23076248168945312, 0.2475128173828125, 0.2642631530761719, 0.28101348876953125, 0.2977638244628906, 0.31451416015625, 0.3312644958496094, 0.34801483154296875, 0.3647651672363281, 0.3815155029296875, 0.3982658386230469, 0.41501617431640625, 0.4317665100097656, 0.448516845703125, 0.4652671813964844, 0.48201751708984375, 0.4987678527832031, 0.5155181884765625, 0.5322685241699219, 0.5490188598632812, 0.5657691955566406, 0.58251953125]}, "gradients/decoder.transformer.h.16.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 4.0, 2.0, 1.0, 10.0, 3.0, 11.0, 3.0, 11.0, 7.0, 21.0, 18.0, 19.0, 17.0, 32.0, 27.0, 52.0, 37.0, 35.0, 40.0, 36.0, 52.0, 48.0, 39.0, 1068.0, 56.0, 34.0, 41.0, 30.0, 38.0, 34.0, 34.0, 29.0, 23.0, 20.0, 21.0, 21.0, 17.0, 12.0, 9.0, 6.0, 7.0, 7.0, 3.0, 2.0, 2.0, 0.0, 0.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.005859375, -2.91229248046875, -2.8187255859375, -2.72515869140625, -2.631591796875, -2.53802490234375, -2.4444580078125, -2.35089111328125, -2.25732421875, -2.16375732421875, -2.0701904296875, -1.97662353515625, -1.883056640625, -1.78948974609375, -1.6959228515625, -1.60235595703125, -1.5087890625, -1.41522216796875, -1.3216552734375, -1.22808837890625, -1.134521484375, -1.04095458984375, -0.9473876953125, -0.85382080078125, -0.76025390625, -0.66668701171875, -0.5731201171875, -0.47955322265625, -0.385986328125, -0.29241943359375, -0.1988525390625, -0.10528564453125, -0.01171875, 0.08184814453125, 0.1754150390625, 0.26898193359375, 0.362548828125, 0.45611572265625, 0.5496826171875, 0.64324951171875, 0.73681640625, 0.83038330078125, 0.9239501953125, 1.01751708984375, 1.111083984375, 1.20465087890625, 1.2982177734375, 1.39178466796875, 1.4853515625, 1.57891845703125, 1.6724853515625, 1.76605224609375, 1.859619140625, 1.95318603515625, 2.0467529296875, 2.14031982421875, 2.23388671875, 2.32745361328125, 2.4210205078125, 2.51458740234375, 2.608154296875, 2.70172119140625, 2.7952880859375, 2.88885498046875, 2.982421875]}, "gradients/decoder.transformer.h.16.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 4.0, 2.0, 2.0, 8.0, 5.0, 5.0, 6.0, 12.0, 47.0, 51.0, 63.0, 114.0, 170.0, 256.0, 440.0, 692.0, 1051.0, 1660.0, 2648.0, 4122.0, 6504.0, 10795.0, 17387.0, 28413.0, 46870.0, 76339.0, 117885.0, 180077.0, 1204963.0, 143420.0, 96249.0, 60739.0, 37095.0, 22417.0, 13735.0, 8495.0, 5300.0, 3305.0, 2039.0, 1329.0, 856.0, 557.0, 344.0, 243.0, 130.0, 85.0, 76.0, 48.0, 33.0, 18.0, 18.0, 10.0, 6.0, 7.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.373046875, -0.3614845275878906, -0.34992218017578125, -0.3383598327636719, -0.3267974853515625, -0.3152351379394531, -0.30367279052734375, -0.2921104431152344, -0.280548095703125, -0.2689857482910156, -0.25742340087890625, -0.24586105346679688, -0.2342987060546875, -0.22273635864257812, -0.21117401123046875, -0.19961166381835938, -0.18804931640625, -0.17648696899414062, -0.16492462158203125, -0.15336227416992188, -0.1417999267578125, -0.13023757934570312, -0.11867523193359375, -0.10711288452148438, -0.095550537109375, -0.08398818969726562, -0.07242584228515625, -0.060863494873046875, -0.0493011474609375, -0.037738800048828125, -0.02617645263671875, -0.014614105224609375, -0.0030517578125, 0.008510589599609375, 0.02007293701171875, 0.031635284423828125, 0.0431976318359375, 0.054759979248046875, 0.06632232666015625, 0.07788467407226562, 0.089447021484375, 0.10100936889648438, 0.11257171630859375, 0.12413406372070312, 0.1356964111328125, 0.14725875854492188, 0.15882110595703125, 0.17038345336914062, 0.18194580078125, 0.19350814819335938, 0.20507049560546875, 0.21663284301757812, 0.2281951904296875, 0.23975753784179688, 0.25131988525390625, 0.2628822326660156, 0.274444580078125, 0.2860069274902344, 0.29756927490234375, 0.3091316223144531, 0.3206939697265625, 0.3322563171386719, 0.34381866455078125, 0.3553810119628906, 0.366943359375]}, "gradients/decoder.transformer.h.16.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0, 3.0, 5.0, 6.0, 3.0, 4.0, 7.0, 9.0, 8.0, 10.0, 16.0, 27.0, 35.0, 54.0, 65.0, 93.0, 106.0, 154.0, 106.0, 77.0, 59.0, 51.0, 35.0, 20.0, 17.0, 6.0, 8.0, 11.0, 2.0, 2.0, 3.0, 2.0, 2.0, 2.0, 3.0, 0.0, 2.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0222930908203125, -0.021509170532226562, -0.020725250244140625, -0.019941329956054688, -0.01915740966796875, -0.018373489379882812, -0.017589569091796875, -0.016805648803710938, -0.016021728515625, -0.015237808227539062, -0.014453887939453125, -0.013669967651367188, -0.01288604736328125, -0.012102127075195312, -0.011318206787109375, -0.010534286499023438, -0.0097503662109375, -0.008966445922851562, -0.008182525634765625, -0.0073986053466796875, -0.00661468505859375, -0.0058307647705078125, -0.005046844482421875, -0.0042629241943359375, -0.00347900390625, -0.0026950836181640625, -0.001911163330078125, -0.0011272430419921875, -0.00034332275390625, 0.0004405975341796875, 0.001224517822265625, 0.0020084381103515625, 0.0027923583984375, 0.0035762786865234375, 0.004360198974609375, 0.0051441192626953125, 0.00592803955078125, 0.0067119598388671875, 0.007495880126953125, 0.008279800415039062, 0.009063720703125, 0.009847640991210938, 0.010631561279296875, 0.011415481567382812, 0.01219940185546875, 0.012983322143554688, 0.013767242431640625, 0.014551162719726562, 0.0153350830078125, 0.016119003295898438, 0.016902923583984375, 0.017686843872070312, 0.01847076416015625, 0.019254684448242188, 0.020038604736328125, 0.020822525024414062, 0.0216064453125, 0.022390365600585938, 0.023174285888671875, 0.023958206176757812, 0.02474212646484375, 0.025526046752929688, 0.026309967041015625, 0.027093887329101562, 0.0278778076171875]}, "gradients/decoder.transformer.h.16.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 3.0, 2.0, 3.0, 2.0, 2.0, 4.0, 2.0, 2.0, 9.0, 16.0, 16.0, 16.0, 42.0, 51.0, 112.0, 162.0, 312.0, 1072.0, 105326.0, 938804.0, 1756.0, 374.0, 173.0, 96.0, 66.0, 41.0, 32.0, 17.0, 14.0, 7.0, 10.0, 6.0, 2.0, 7.0, 6.0, 2.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.46875, -0.4553985595703125, -0.442047119140625, -0.4286956787109375, -0.41534423828125, -0.4019927978515625, -0.388641357421875, -0.3752899169921875, -0.3619384765625, -0.3485870361328125, -0.335235595703125, -0.3218841552734375, -0.30853271484375, -0.2951812744140625, -0.281829833984375, -0.2684783935546875, -0.255126953125, -0.2417755126953125, -0.228424072265625, -0.2150726318359375, -0.20172119140625, -0.1883697509765625, -0.175018310546875, -0.1616668701171875, -0.1483154296875, -0.1349639892578125, -0.121612548828125, -0.1082611083984375, -0.09490966796875, -0.0815582275390625, -0.068206787109375, -0.0548553466796875, -0.04150390625, -0.0281524658203125, -0.014801025390625, -0.0014495849609375, 0.01190185546875, 0.0252532958984375, 0.038604736328125, 0.0519561767578125, 0.0653076171875, 0.0786590576171875, 0.092010498046875, 0.1053619384765625, 0.11871337890625, 0.1320648193359375, 0.145416259765625, 0.1587677001953125, 0.172119140625, 0.1854705810546875, 0.198822021484375, 0.2121734619140625, 0.22552490234375, 0.2388763427734375, 0.252227783203125, 0.2655792236328125, 0.2789306640625, 0.2922821044921875, 0.305633544921875, 0.3189849853515625, 0.33233642578125, 0.3456878662109375, 0.359039306640625, 0.3723907470703125, 0.3857421875]}, "gradients/decoder.transformer.h.16.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 0.0, 2.0, 6.0, 55.0, 772.0, 176.0, 6.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.16217412054538727, -0.15654630959033966, -0.15091849863529205, -0.14529068768024445, -0.13966286182403564, -0.13403505086898804, -0.12840723991394043, -0.12277942895889282, -0.11715161800384521, -0.11152380704879761, -0.10589599609375, -0.1002681776881218, -0.09464036673307419, -0.08901255577802658, -0.08338473737239838, -0.07775692641735077, -0.07212911546230316, -0.06650130450725555, -0.06087348982691765, -0.05524567514657974, -0.049617864191532135, -0.04399005323648453, -0.03836223855614662, -0.032734423875808716, -0.02710661292076111, -0.021478800103068352, -0.015850987285375595, -0.010223174467682838, -0.004595361649990082, 0.0010324511677026749, 0.0066602639853954315, 0.012288078665733337, 0.01791590452194214, 0.023543717339634895, 0.029171530157327652, 0.03479934483766556, 0.040427155792713165, 0.04605496674776077, 0.05168278142809868, 0.057310596108436584, 0.06293840706348419, 0.0685662180185318, 0.0741940289735794, 0.07982184737920761, 0.08544965833425522, 0.09107746928930283, 0.09670528769493103, 0.10233309864997864, 0.10796090960502625, 0.11358872056007385, 0.11921653151512146, 0.12484434992074966, 0.13047215342521667, 0.13609997928142548, 0.14172779023647308, 0.1473556011915207, 0.1529834121465683, 0.1586112231016159, 0.1642390340566635, 0.16986684501171112, 0.17549467086791992, 0.18112248182296753, 0.18675029277801514, 0.19237810373306274, 0.19800591468811035]}, "gradients/decoder.transformer.h.16.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 2.0, 4.0, 3.0, 3.0, 8.0, 9.0, 4.0, 12.0, 4.0, 13.0, 10.0, 19.0, 19.0, 18.0, 30.0, 30.0, 29.0, 34.0, 38.0, 24.0, 24.0, 37.0, 18.0, 39.0, 41.0, 29.0, 29.0, 31.0, 30.0, 35.0, 42.0, 44.0, 38.0, 31.0, 18.0, 21.0, 27.0, 28.0, 20.0, 22.0, 10.0, 11.0, 10.0, 6.0, 16.0, 13.0, 7.0, 7.0, 3.0, 7.0, 4.0, 4.0, 0.0, 1.0, 1.0, 1.0, 1.0], "bins": [-0.020583808422088623, -0.019961532205343246, -0.01933925412595272, -0.018716976046562195, -0.018094699829816818, -0.01747242361307144, -0.016850145533680916, -0.01622786745429039, -0.015605591237545013, -0.014983314089477062, -0.014361036941409111, -0.01373875979334116, -0.013116482645273209, -0.012494205497205257, -0.011871928349137306, -0.011249651201069355, -0.010627374053001404, -0.010005096904933453, -0.009382819756865501, -0.00876054260879755, -0.008138265460729599, -0.007515988312661648, -0.006893711164593697, -0.006271434016525745, -0.005649156868457794, -0.005026879720389843, -0.004404602572321892, -0.0037823254242539406, -0.0031600482761859894, -0.002537771128118038, -0.001915493980050087, -0.0012932168319821358, -0.0006709396839141846, -4.866253584623337e-05, 0.0005736146122217178, 0.001195891760289669, 0.0018181689083576202, 0.0024404460564255714, 0.0030627232044935226, 0.003685000352561474, 0.004307277500629425, 0.004929554648697376, 0.0055518317967653275, 0.006174108944833279, 0.00679638609290123, 0.007418663240969181, 0.008040940389037132, 0.008663217537105083, 0.009285494685173035, 0.009907771833240986, 0.010530048981308937, 0.011152326129376888, 0.01177460327744484, 0.01239688042551279, 0.013019157573580742, 0.013641434721648693, 0.014263711869716644, 0.014885989017784595, 0.015508266165852547, 0.016130544245243073, 0.01675282046198845, 0.017375096678733826, 0.01799737475812435, 0.018619652837514877, 0.019241929054260254]}, "gradients/decoder.transformer.h.16.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 2.0, 0.0, 4.0, 1.0, 2.0, 5.0, 4.0, 12.0, 10.0, 12.0, 11.0, 22.0, 22.0, 17.0, 19.0, 29.0, 41.0, 27.0, 47.0, 41.0, 36.0, 41.0, 57.0, 48.0, 39.0, 48.0, 35.0, 48.0, 45.0, 37.0, 42.0, 37.0, 27.0, 26.0, 24.0, 18.0, 20.0, 17.0, 6.0, 6.0, 9.0, 3.0, 6.0, 4.0, 4.0, 1.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-4.25, -4.1131591796875, -3.976318359375, -3.8394775390625, -3.70263671875, -3.5657958984375, -3.428955078125, -3.2921142578125, -3.1552734375, -3.0184326171875, -2.881591796875, -2.7447509765625, -2.60791015625, -2.4710693359375, -2.334228515625, -2.1973876953125, -2.060546875, -1.9237060546875, -1.786865234375, -1.6500244140625, -1.51318359375, -1.3763427734375, -1.239501953125, -1.1026611328125, -0.9658203125, -0.8289794921875, -0.692138671875, -0.5552978515625, -0.41845703125, -0.2816162109375, -0.144775390625, -0.0079345703125, 0.12890625, 0.2657470703125, 0.402587890625, 0.5394287109375, 0.67626953125, 0.8131103515625, 0.949951171875, 1.0867919921875, 1.2236328125, 1.3604736328125, 1.497314453125, 1.6341552734375, 1.77099609375, 1.9078369140625, 2.044677734375, 2.1815185546875, 2.318359375, 2.4552001953125, 2.592041015625, 2.7288818359375, 2.86572265625, 3.0025634765625, 3.139404296875, 3.2762451171875, 3.4130859375, 3.5499267578125, 3.686767578125, 3.8236083984375, 3.96044921875, 4.0972900390625, 4.234130859375, 4.3709716796875, 4.5078125]}, "gradients/decoder.transformer.h.16.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 3.0, 2.0, 3.0, 7.0, 8.0, 8.0, 9.0, 25.0, 27.0, 33.0, 81.0, 140.0, 206.0, 341.0, 688.0, 1252.0, 2202.0, 4221.0, 7552.0, 13529.0, 24202.0, 43590.0, 94214.0, 233912.0, 332663.0, 152248.0, 63574.0, 32219.0, 18458.0, 10451.0, 5817.0, 3148.0, 1668.0, 885.0, 481.0, 294.0, 146.0, 88.0, 69.0, 27.0, 31.0, 12.0, 10.0, 12.0, 6.0, 3.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0], "bins": [-3.228515625, -3.125244140625, -3.02197265625, -2.918701171875, -2.8154296875, -2.712158203125, -2.60888671875, -2.505615234375, -2.40234375, -2.299072265625, -2.19580078125, -2.092529296875, -1.9892578125, -1.885986328125, -1.78271484375, -1.679443359375, -1.576171875, -1.472900390625, -1.36962890625, -1.266357421875, -1.1630859375, -1.059814453125, -0.95654296875, -0.853271484375, -0.75, -0.646728515625, -0.54345703125, -0.440185546875, -0.3369140625, -0.233642578125, -0.13037109375, -0.027099609375, 0.076171875, 0.179443359375, 0.28271484375, 0.385986328125, 0.4892578125, 0.592529296875, 0.69580078125, 0.799072265625, 0.90234375, 1.005615234375, 1.10888671875, 1.212158203125, 1.3154296875, 1.418701171875, 1.52197265625, 1.625244140625, 1.728515625, 1.831787109375, 1.93505859375, 2.038330078125, 2.1416015625, 2.244873046875, 2.34814453125, 2.451416015625, 2.5546875, 2.657958984375, 2.76123046875, 2.864501953125, 2.9677734375, 3.071044921875, 3.17431640625, 3.277587890625, 3.380859375]}, "gradients/decoder.transformer.h.16.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 3.0, 2.0, 2.0, 3.0, 4.0, 7.0, 6.0, 7.0, 6.0, 9.0, 15.0, 18.0, 22.0, 21.0, 20.0, 16.0, 31.0, 27.0, 39.0, 34.0, 52.0, 59.0, 59.0, 118.0, 265.0, 1501.0, 200.0, 82.0, 83.0, 52.0, 33.0, 32.0, 34.0, 30.0, 31.0, 34.0, 21.0, 13.0, 15.0, 16.0, 9.0, 7.0, 10.0, 6.0, 4.0, 2.0, 1.0, 1.0, 1.0, 0.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-13.203125, -12.7373046875, -12.271484375, -11.8056640625, -11.33984375, -10.8740234375, -10.408203125, -9.9423828125, -9.4765625, -9.0107421875, -8.544921875, -8.0791015625, -7.61328125, -7.1474609375, -6.681640625, -6.2158203125, -5.75, -5.2841796875, -4.818359375, -4.3525390625, -3.88671875, -3.4208984375, -2.955078125, -2.4892578125, -2.0234375, -1.5576171875, -1.091796875, -0.6259765625, -0.16015625, 0.3056640625, 0.771484375, 1.2373046875, 1.703125, 2.1689453125, 2.634765625, 3.1005859375, 3.56640625, 4.0322265625, 4.498046875, 4.9638671875, 5.4296875, 5.8955078125, 6.361328125, 6.8271484375, 7.29296875, 7.7587890625, 8.224609375, 8.6904296875, 9.15625, 9.6220703125, 10.087890625, 10.5537109375, 11.01953125, 11.4853515625, 11.951171875, 12.4169921875, 12.8828125, 13.3486328125, 13.814453125, 14.2802734375, 14.74609375, 15.2119140625, 15.677734375, 16.1435546875, 16.609375]}, "gradients/decoder.transformer.h.16.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 2.0, 0.0, 1.0, 3.0, 7.0, 6.0, 4.0, 14.0, 30.0, 28.0, 57.0, 69.0, 113.0, 164.0, 283.0, 592.0, 2881.0, 2121228.0, 1016304.0, 2531.0, 585.0, 321.0, 155.0, 94.0, 78.0, 45.0, 40.0, 23.0, 21.0, 10.0, 6.0, 12.0, 5.0, 2.0, 4.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-43.71875, -42.15576171875, -40.5927734375, -39.02978515625, -37.466796875, -35.90380859375, -34.3408203125, -32.77783203125, -31.21484375, -29.65185546875, -28.0888671875, -26.52587890625, -24.962890625, -23.39990234375, -21.8369140625, -20.27392578125, -18.7109375, -17.14794921875, -15.5849609375, -14.02197265625, -12.458984375, -10.89599609375, -9.3330078125, -7.77001953125, -6.20703125, -4.64404296875, -3.0810546875, -1.51806640625, 0.044921875, 1.60791015625, 3.1708984375, 4.73388671875, 6.296875, 7.85986328125, 9.4228515625, 10.98583984375, 12.548828125, 14.11181640625, 15.6748046875, 17.23779296875, 18.80078125, 20.36376953125, 21.9267578125, 23.48974609375, 25.052734375, 26.61572265625, 28.1787109375, 29.74169921875, 31.3046875, 32.86767578125, 34.4306640625, 35.99365234375, 37.556640625, 39.11962890625, 40.6826171875, 42.24560546875, 43.80859375, 45.37158203125, 46.9345703125, 48.49755859375, 50.060546875, 51.62353515625, 53.1865234375, 54.74951171875, 56.3125]}, "gradients/decoder.transformer.h.16.ln_1.weight": {"_type": "histogram", "values": [427.0, 586.0, 7.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-8.176066398620605, -0.4833803176879883, 7.209305763244629, 14.901991844177246, 22.594676971435547, 30.287364959716797, 37.98004913330078, 45.672733306884766, 53.365421295166016, 61.05810546875, 68.75079345703125, 76.4434814453125, 84.13616943359375, 91.828857421875, 99.52153778076172, 107.21421813964844, 114.90691375732422, 122.59960174560547, 130.2922821044922, 137.98497009277344, 145.6776580810547, 153.37034606933594, 161.0630340576172, 168.75570678710938, 176.44839477539062, 184.14108276367188, 191.83377075195312, 199.52645874023438, 207.21914672851562, 214.91183471679688, 222.60450744628906, 230.2971954345703, 237.98989868164062, 245.68258666992188, 253.37527465820312, 261.0679626464844, 268.7606506347656, 276.4533386230469, 284.1460266113281, 291.83868408203125, 299.5313720703125, 307.22406005859375, 314.916748046875, 322.60943603515625, 330.3021240234375, 337.99481201171875, 345.6875, 353.38018798828125, 361.0728759765625, 368.76556396484375, 376.458251953125, 384.15093994140625, 391.8436279296875, 399.53631591796875, 407.22900390625, 414.92169189453125, 422.6143798828125, 430.30706787109375, 437.999755859375, 445.69244384765625, 453.3851318359375, 461.07781982421875, 468.7705078125, 476.46319580078125, 484.1558532714844]}, "gradients/decoder.transformer.h.16.ln_1.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 2.0, 0.0, 6.0, 2.0, 4.0, 7.0, 10.0, 12.0, 11.0, 12.0, 13.0, 16.0, 18.0, 25.0, 27.0, 33.0, 33.0, 26.0, 27.0, 37.0, 39.0, 32.0, 39.0, 42.0, 42.0, 39.0, 43.0, 21.0, 36.0, 28.0, 43.0, 37.0, 41.0, 23.0, 29.0, 26.0, 15.0, 24.0, 17.0, 13.0, 14.0, 11.0, 7.0, 8.0, 7.0, 5.0, 7.0, 3.0, 3.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-40.046756744384766, -38.795963287353516, -37.545169830322266, -36.294376373291016, -35.0435791015625, -33.79278564453125, -32.5419921875, -31.29119873046875, -30.0404052734375, -28.78961181640625, -27.538818359375, -26.288022994995117, -25.037229537963867, -23.786436080932617, -22.535640716552734, -21.284847259521484, -20.034053802490234, -18.783260345458984, -17.532466888427734, -16.28167152404785, -15.030878067016602, -13.780084609985352, -12.529290199279785, -11.278495788574219, -10.027702331542969, -8.776908874511719, -7.526114463806152, -6.275320529937744, -5.024526596069336, -3.7737326622009277, -2.5229387283325195, -1.2721443176269531, -0.02135467529296875, 1.2294392585754395, 2.4802331924438477, 3.731027126312256, 4.981821060180664, 6.232614994049072, 7.4834089279174805, 8.734203338623047, 9.984996795654297, 11.235790252685547, 12.486584663391113, 13.73737907409668, 14.98817253112793, 16.23896598815918, 17.489761352539062, 18.740554809570312, 19.991348266601562, 21.242141723632812, 22.492935180664062, 23.743730545043945, 24.994524002075195, 26.245317459106445, 27.496112823486328, 28.746906280517578, 29.997699737548828, 31.248493194580078, 32.49928665161133, 33.75008010864258, 35.000877380371094, 36.251670837402344, 37.502464294433594, 38.753257751464844, 40.004051208496094]}, "gradients/decoder.transformer.h.15.mlp.c_proj.bias": {"_type": "histogram", "values": [3.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 4.0, 4.0, 3.0, 5.0, 5.0, 5.0, 7.0, 10.0, 19.0, 21.0, 19.0, 20.0, 22.0, 27.0, 30.0, 36.0, 39.0, 39.0, 37.0, 42.0, 47.0, 52.0, 39.0, 46.0, 37.0, 45.0, 47.0, 40.0, 29.0, 37.0, 31.0, 28.0, 21.0, 26.0, 23.0, 17.0, 9.0, 8.0, 8.0, 6.0, 3.0, 5.0, 3.0, 5.0, 2.0, 4.0, 3.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-4.09375, -3.95501708984375, -3.8162841796875, -3.67755126953125, -3.538818359375, -3.40008544921875, -3.2613525390625, -3.12261962890625, -2.98388671875, -2.84515380859375, -2.7064208984375, -2.56768798828125, -2.428955078125, -2.29022216796875, -2.1514892578125, -2.01275634765625, -1.8740234375, -1.73529052734375, -1.5965576171875, -1.45782470703125, -1.319091796875, -1.18035888671875, -1.0416259765625, -0.90289306640625, -0.76416015625, -0.62542724609375, -0.4866943359375, -0.34796142578125, -0.209228515625, -0.07049560546875, 0.0682373046875, 0.20697021484375, 0.345703125, 0.48443603515625, 0.6231689453125, 0.76190185546875, 0.900634765625, 1.03936767578125, 1.1781005859375, 1.31683349609375, 1.45556640625, 1.59429931640625, 1.7330322265625, 1.87176513671875, 2.010498046875, 2.14923095703125, 2.2879638671875, 2.42669677734375, 2.5654296875, 2.70416259765625, 2.8428955078125, 2.98162841796875, 3.120361328125, 3.25909423828125, 3.3978271484375, 3.53656005859375, 3.67529296875, 3.81402587890625, 3.9527587890625, 4.09149169921875, 4.230224609375, 4.36895751953125, 4.5076904296875, 4.64642333984375, 4.78515625]}, "gradients/decoder.transformer.h.15.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 4.0, 4.0, 6.0, 9.0, 11.0, 18.0, 26.0, 34.0, 52.0, 78.0, 96.0, 161.0, 320.0, 499.0, 827.0, 1541.0, 2939.0, 6104.0, 15212.0, 43363.0, 142072.0, 483690.0, 1284316.0, 1394234.0, 567888.0, 168594.0, 50981.0, 17481.0, 6871.0, 3120.0, 1547.0, 855.0, 504.0, 307.0, 171.0, 118.0, 77.0, 51.0, 31.0, 20.0, 17.0, 16.0, 13.0, 6.0, 3.0, 2.0, 6.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.078125, -5.89971923828125, -5.7213134765625, -5.54290771484375, -5.364501953125, -5.18609619140625, -5.0076904296875, -4.82928466796875, -4.65087890625, -4.47247314453125, -4.2940673828125, -4.11566162109375, -3.937255859375, -3.75885009765625, -3.5804443359375, -3.40203857421875, -3.2236328125, -3.04522705078125, -2.8668212890625, -2.68841552734375, -2.510009765625, -2.33160400390625, -2.1531982421875, -1.97479248046875, -1.79638671875, -1.61798095703125, -1.4395751953125, -1.26116943359375, -1.082763671875, -0.90435791015625, -0.7259521484375, -0.54754638671875, -0.369140625, -0.19073486328125, -0.0123291015625, 0.16607666015625, 0.344482421875, 0.52288818359375, 0.7012939453125, 0.87969970703125, 1.05810546875, 1.23651123046875, 1.4149169921875, 1.59332275390625, 1.771728515625, 1.95013427734375, 2.1285400390625, 2.30694580078125, 2.4853515625, 2.66375732421875, 2.8421630859375, 3.02056884765625, 3.198974609375, 3.37738037109375, 3.5557861328125, 3.73419189453125, 3.91259765625, 4.09100341796875, 4.2694091796875, 4.44781494140625, 4.626220703125, 4.80462646484375, 4.9830322265625, 5.16143798828125, 5.33984375]}, "gradients/decoder.transformer.h.15.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 4.0, 4.0, 5.0, 1.0, 3.0, 8.0, 8.0, 15.0, 18.0, 19.0, 52.0, 75.0, 95.0, 170.0, 262.0, 359.0, 406.0, 563.0, 557.0, 458.0, 337.0, 222.0, 142.0, 105.0, 47.0, 44.0, 33.0, 21.0, 21.0, 12.0, 7.0, 7.0, 5.0, 3.0, 1.0, 2.0, 1.0, 0.0, 1.0], "bins": [-16.90625, -16.511962890625, -16.11767578125, -15.723388671875, -15.3291015625, -14.934814453125, -14.54052734375, -14.146240234375, -13.751953125, -13.357666015625, -12.96337890625, -12.569091796875, -12.1748046875, -11.780517578125, -11.38623046875, -10.991943359375, -10.59765625, -10.203369140625, -9.80908203125, -9.414794921875, -9.0205078125, -8.626220703125, -8.23193359375, -7.837646484375, -7.443359375, -7.049072265625, -6.65478515625, -6.260498046875, -5.8662109375, -5.471923828125, -5.07763671875, -4.683349609375, -4.2890625, -3.894775390625, -3.50048828125, -3.106201171875, -2.7119140625, -2.317626953125, -1.92333984375, -1.529052734375, -1.134765625, -0.740478515625, -0.34619140625, 0.048095703125, 0.4423828125, 0.836669921875, 1.23095703125, 1.625244140625, 2.01953125, 2.413818359375, 2.80810546875, 3.202392578125, 3.5966796875, 3.990966796875, 4.38525390625, 4.779541015625, 5.173828125, 5.568115234375, 5.96240234375, 6.356689453125, 6.7509765625, 7.145263671875, 7.53955078125, 7.933837890625, 8.328125]}, "gradients/decoder.transformer.h.15.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 3.0, 4.0, 2.0, 4.0, 9.0, 12.0, 22.0, 23.0, 39.0, 56.0, 99.0, 191.0, 392.0, 841.0, 2540.0, 13156.0, 170703.0, 2795884.0, 1146506.0, 54767.0, 6215.0, 1588.0, 600.0, 284.0, 144.0, 82.0, 48.0, 24.0, 21.0, 13.0, 8.0, 5.0, 4.0, 4.0, 5.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-14.6875, -13.9814453125, -13.275390625, -12.5693359375, -11.86328125, -11.1572265625, -10.451171875, -9.7451171875, -9.0390625, -8.3330078125, -7.626953125, -6.9208984375, -6.21484375, -5.5087890625, -4.802734375, -4.0966796875, -3.390625, -2.6845703125, -1.978515625, -1.2724609375, -0.56640625, 0.1396484375, 0.845703125, 1.5517578125, 2.2578125, 2.9638671875, 3.669921875, 4.3759765625, 5.08203125, 5.7880859375, 6.494140625, 7.2001953125, 7.90625, 8.6123046875, 9.318359375, 10.0244140625, 10.73046875, 11.4365234375, 12.142578125, 12.8486328125, 13.5546875, 14.2607421875, 14.966796875, 15.6728515625, 16.37890625, 17.0849609375, 17.791015625, 18.4970703125, 19.203125, 19.9091796875, 20.615234375, 21.3212890625, 22.02734375, 22.7333984375, 23.439453125, 24.1455078125, 24.8515625, 25.5576171875, 26.263671875, 26.9697265625, 27.67578125, 28.3818359375, 29.087890625, 29.7939453125, 30.5]}, "gradients/decoder.transformer.h.15.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 9.0, 20.0, 54.0, 90.0, 202.0, 204.0, 210.0, 129.0, 58.0, 28.0, 6.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-154.40191650390625, -151.1575164794922, -147.91311645507812, -144.66871643066406, -141.42431640625, -138.17991638183594, -134.93551635742188, -131.6911163330078, -128.44671630859375, -125.20231628417969, -121.95791625976562, -118.71351623535156, -115.4691162109375, -112.22471618652344, -108.98031616210938, -105.73591613769531, -102.49152374267578, -99.24712371826172, -96.00272369384766, -92.7583236694336, -89.51392364501953, -86.26952362060547, -83.02513122558594, -79.78073120117188, -76.53633117675781, -73.29193115234375, -70.04753112792969, -66.80313110351562, -63.55873107910156, -60.3143310546875, -57.0699348449707, -53.82553482055664, -50.58113479614258, -47.336734771728516, -44.09233474731445, -40.847938537597656, -37.603538513183594, -34.35913848876953, -31.11473846435547, -27.870338439941406, -24.625938415527344, -21.38153839111328, -18.13713836669922, -14.892740249633789, -11.648340225219727, -8.403940200805664, -5.159542083740234, -1.9151420593261719, 1.3292579650878906, 4.573657512664795, 7.818057060241699, 11.062456130981445, 14.306856155395508, 17.55125617980957, 20.795654296875, 24.040054321289062, 27.284454345703125, 30.528854370117188, 33.77325439453125, 37.01765441894531, 40.262054443359375, 43.50645446777344, 46.750850677490234, 49.9952507019043, 53.23965072631836]}, "gradients/decoder.transformer.h.15.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 3.0, 2.0, 6.0, 5.0, 5.0, 8.0, 9.0, 3.0, 10.0, 14.0, 19.0, 8.0, 16.0, 22.0, 21.0, 36.0, 35.0, 38.0, 30.0, 36.0, 34.0, 31.0, 48.0, 41.0, 35.0, 54.0, 43.0, 36.0, 33.0, 41.0, 43.0, 34.0, 35.0, 28.0, 17.0, 36.0, 14.0, 17.0, 22.0, 10.0, 10.0, 6.0, 4.0, 8.0, 2.0, 1.0, 2.0, 3.0, 3.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-31.089622497558594, -30.034391403198242, -28.97916030883789, -27.923927307128906, -26.868696212768555, -25.813465118408203, -24.75823402404785, -23.7030029296875, -22.647769927978516, -21.592538833618164, -20.537307739257812, -19.482074737548828, -18.426843643188477, -17.371612548828125, -16.316381454467773, -15.261150360107422, -14.20591926574707, -13.150688171386719, -12.09545612335205, -11.0402250289917, -9.984992980957031, -8.92976188659668, -7.874530792236328, -6.819299221038818, -5.764067649841309, -4.708836078643799, -3.653604745864868, -2.5983734130859375, -1.5431418418884277, -0.48791027069091797, 0.5673208236694336, 1.6225523948669434, 2.677783966064453, 3.733015537261963, 4.788247108459473, 5.843478202819824, 6.898709774017334, 7.953941345214844, 9.009172439575195, 10.064403533935547, 11.119635581970215, 12.174866676330566, 13.230098724365234, 14.285329818725586, 15.340560913085938, 16.395793914794922, 17.45102310180664, 18.506256103515625, 19.561487197875977, 20.616718292236328, 21.67194938659668, 22.72718048095703, 23.782413482666016, 24.837644577026367, 25.89287567138672, 26.94810676574707, 28.003337860107422, 29.058568954467773, 30.113800048828125, 31.16903305053711, 32.22426223754883, 33.27949523925781, 34.33472442626953, 35.389957427978516, 36.4451904296875]}, "gradients/decoder.transformer.h.15.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 3.0, 2.0, 1.0, 2.0, 2.0, 7.0, 6.0, 4.0, 4.0, 4.0, 12.0, 5.0, 14.0, 17.0, 18.0, 27.0, 20.0, 29.0, 32.0, 29.0, 31.0, 40.0, 39.0, 45.0, 48.0, 34.0, 38.0, 55.0, 43.0, 45.0, 39.0, 34.0, 39.0, 32.0, 34.0, 34.0, 26.0, 17.0, 19.0, 11.0, 12.0, 19.0, 9.0, 6.0, 2.0, 8.0, 5.0, 3.0, 2.0, 4.0, 3.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0], "bins": [-3.908203125, -3.772247314453125, -3.63629150390625, -3.500335693359375, -3.3643798828125, -3.228424072265625, -3.09246826171875, -2.956512451171875, -2.820556640625, -2.684600830078125, -2.54864501953125, -2.412689208984375, -2.2767333984375, -2.140777587890625, -2.00482177734375, -1.868865966796875, -1.73291015625, -1.596954345703125, -1.46099853515625, -1.325042724609375, -1.1890869140625, -1.053131103515625, -0.91717529296875, -0.781219482421875, -0.645263671875, -0.509307861328125, -0.37335205078125, -0.237396240234375, -0.1014404296875, 0.034515380859375, 0.17047119140625, 0.306427001953125, 0.4423828125, 0.578338623046875, 0.71429443359375, 0.850250244140625, 0.9862060546875, 1.122161865234375, 1.25811767578125, 1.394073486328125, 1.530029296875, 1.665985107421875, 1.80194091796875, 1.937896728515625, 2.0738525390625, 2.209808349609375, 2.34576416015625, 2.481719970703125, 2.61767578125, 2.753631591796875, 2.88958740234375, 3.025543212890625, 3.1614990234375, 3.297454833984375, 3.43341064453125, 3.569366455078125, 3.705322265625, 3.841278076171875, 3.97723388671875, 4.113189697265625, 4.2491455078125, 4.385101318359375, 4.52105712890625, 4.657012939453125, 4.79296875]}, "gradients/decoder.transformer.h.15.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 2.0, 0.0, 5.0, 9.0, 11.0, 6.0, 22.0, 30.0, 46.0, 91.0, 112.0, 210.0, 341.0, 548.0, 897.0, 1456.0, 2481.0, 4157.0, 6907.0, 12259.0, 21374.0, 37066.0, 62979.0, 102846.0, 153025.0, 186872.0, 165440.0, 116008.0, 71975.0, 42661.0, 24301.0, 14141.0, 7983.0, 4976.0, 2875.0, 1697.0, 1050.0, 674.0, 390.0, 245.0, 147.0, 109.0, 53.0, 41.0, 18.0, 10.0, 6.0, 9.0, 2.0, 2.0, 1.0, 3.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0], "bins": [-0.68896484375, -0.666473388671875, -0.64398193359375, -0.621490478515625, -0.5989990234375, -0.576507568359375, -0.55401611328125, -0.531524658203125, -0.509033203125, -0.486541748046875, -0.46405029296875, -0.441558837890625, -0.4190673828125, -0.396575927734375, -0.37408447265625, -0.351593017578125, -0.3291015625, -0.306610107421875, -0.28411865234375, -0.261627197265625, -0.2391357421875, -0.216644287109375, -0.19415283203125, -0.171661376953125, -0.149169921875, -0.126678466796875, -0.10418701171875, -0.081695556640625, -0.0592041015625, -0.036712646484375, -0.01422119140625, 0.008270263671875, 0.03076171875, 0.053253173828125, 0.07574462890625, 0.098236083984375, 0.1207275390625, 0.143218994140625, 0.16571044921875, 0.188201904296875, 0.210693359375, 0.233184814453125, 0.25567626953125, 0.278167724609375, 0.3006591796875, 0.323150634765625, 0.34564208984375, 0.368133544921875, 0.390625, 0.413116455078125, 0.43560791015625, 0.458099365234375, 0.4805908203125, 0.503082275390625, 0.52557373046875, 0.548065185546875, 0.570556640625, 0.593048095703125, 0.61553955078125, 0.638031005859375, 0.6605224609375, 0.683013916015625, 0.70550537109375, 0.727996826171875, 0.75048828125]}, "gradients/decoder.transformer.h.15.crossattention.c_attn.bias": {"_type": "histogram", "values": [3.0, 3.0, 1.0, 4.0, 2.0, 3.0, 3.0, 9.0, 10.0, 8.0, 14.0, 15.0, 21.0, 15.0, 16.0, 24.0, 34.0, 34.0, 29.0, 28.0, 46.0, 38.0, 36.0, 39.0, 46.0, 1070.0, 39.0, 44.0, 36.0, 43.0, 34.0, 46.0, 36.0, 33.0, 21.0, 22.0, 21.0, 20.0, 19.0, 16.0, 16.0, 13.0, 11.0, 4.0, 3.0, 5.0, 3.0, 2.0, 5.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.611328125, -2.510650634765625, -2.40997314453125, -2.309295654296875, -2.2086181640625, -2.107940673828125, -2.00726318359375, -1.906585693359375, -1.805908203125, -1.705230712890625, -1.60455322265625, -1.503875732421875, -1.4031982421875, -1.302520751953125, -1.20184326171875, -1.101165771484375, -1.00048828125, -0.899810791015625, -0.79913330078125, -0.698455810546875, -0.5977783203125, -0.497100830078125, -0.39642333984375, -0.295745849609375, -0.195068359375, -0.094390869140625, 0.00628662109375, 0.106964111328125, 0.2076416015625, 0.308319091796875, 0.40899658203125, 0.509674072265625, 0.6103515625, 0.711029052734375, 0.81170654296875, 0.912384033203125, 1.0130615234375, 1.113739013671875, 1.21441650390625, 1.315093994140625, 1.415771484375, 1.516448974609375, 1.61712646484375, 1.717803955078125, 1.8184814453125, 1.919158935546875, 2.01983642578125, 2.120513916015625, 2.22119140625, 2.321868896484375, 2.42254638671875, 2.523223876953125, 2.6239013671875, 2.724578857421875, 2.82525634765625, 2.925933837890625, 3.026611328125, 3.127288818359375, 3.22796630859375, 3.328643798828125, 3.4293212890625, 3.529998779296875, 3.63067626953125, 3.731353759765625, 3.83203125]}, "gradients/decoder.transformer.h.15.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 2.0, 0.0, 1.0, 7.0, 17.0, 6.0, 24.0, 22.0, 33.0, 75.0, 94.0, 156.0, 245.0, 384.0, 661.0, 1124.0, 1728.0, 2879.0, 4553.0, 7800.0, 12968.0, 21814.0, 37609.0, 63302.0, 101329.0, 149641.0, 1211148.0, 176875.0, 117278.0, 75097.0, 45093.0, 26336.0, 15462.0, 9182.0, 5529.0, 3216.0, 2107.0, 1238.0, 810.0, 456.0, 298.0, 204.0, 122.0, 83.0, 62.0, 31.0, 14.0, 14.0, 6.0, 3.0, 5.0, 1.0, 2.0, 0.0, 0.0, 1.0], "bins": [-0.440673828125, -0.4279632568359375, -0.415252685546875, -0.4025421142578125, -0.38983154296875, -0.3771209716796875, -0.364410400390625, -0.3516998291015625, -0.3389892578125, -0.3262786865234375, -0.313568115234375, -0.3008575439453125, -0.28814697265625, -0.2754364013671875, -0.262725830078125, -0.2500152587890625, -0.2373046875, -0.2245941162109375, -0.211883544921875, -0.1991729736328125, -0.18646240234375, -0.1737518310546875, -0.161041259765625, -0.1483306884765625, -0.1356201171875, -0.1229095458984375, -0.110198974609375, -0.0974884033203125, -0.08477783203125, -0.0720672607421875, -0.059356689453125, -0.0466461181640625, -0.033935546875, -0.0212249755859375, -0.008514404296875, 0.0041961669921875, 0.01690673828125, 0.0296173095703125, 0.042327880859375, 0.0550384521484375, 0.0677490234375, 0.0804595947265625, 0.093170166015625, 0.1058807373046875, 0.11859130859375, 0.1313018798828125, 0.144012451171875, 0.1567230224609375, 0.16943359375, 0.1821441650390625, 0.194854736328125, 0.2075653076171875, 0.22027587890625, 0.2329864501953125, 0.245697021484375, 0.2584075927734375, 0.2711181640625, 0.2838287353515625, 0.296539306640625, 0.3092498779296875, 0.32196044921875, 0.3346710205078125, 0.347381591796875, 0.3600921630859375, 0.372802734375]}, "gradients/decoder.transformer.h.15.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 4.0, 3.0, 5.0, 9.0, 7.0, 10.0, 17.0, 23.0, 24.0, 39.0, 34.0, 45.0, 46.0, 61.0, 58.0, 50.0, 74.0, 78.0, 55.0, 53.0, 44.0, 48.0, 44.0, 32.0, 33.0, 17.0, 24.0, 25.0, 15.0, 5.0, 5.0, 4.0, 4.0, 6.0, 4.0, 3.0, 5.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0194091796875, -0.018801212310791016, -0.01819324493408203, -0.017585277557373047, -0.016977310180664062, -0.016369342803955078, -0.015761375427246094, -0.01515340805053711, -0.014545440673828125, -0.01393747329711914, -0.013329505920410156, -0.012721538543701172, -0.012113571166992188, -0.011505603790283203, -0.010897636413574219, -0.010289669036865234, -0.00968170166015625, -0.009073734283447266, -0.008465766906738281, -0.007857799530029297, -0.0072498321533203125, -0.006641864776611328, -0.006033897399902344, -0.005425930023193359, -0.004817962646484375, -0.004209995269775391, -0.0036020278930664062, -0.002994060516357422, -0.0023860931396484375, -0.0017781257629394531, -0.0011701583862304688, -0.0005621910095214844, 4.57763671875e-05, 0.0006537437438964844, 0.0012617111206054688, 0.0018696784973144531, 0.0024776458740234375, 0.003085613250732422, 0.0036935806274414062, 0.004301548004150391, 0.004909515380859375, 0.005517482757568359, 0.006125450134277344, 0.006733417510986328, 0.0073413848876953125, 0.007949352264404297, 0.008557319641113281, 0.009165287017822266, 0.00977325439453125, 0.010381221771240234, 0.010989189147949219, 0.011597156524658203, 0.012205123901367188, 0.012813091278076172, 0.013421058654785156, 0.01402902603149414, 0.014636993408203125, 0.01524496078491211, 0.015852928161621094, 0.016460895538330078, 0.017068862915039062, 0.017676830291748047, 0.01828479766845703, 0.018892765045166016, 0.019500732421875]}, "gradients/decoder.transformer.h.15.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 0.0, 2.0, 3.0, 1.0, 2.0, 6.0, 4.0, 5.0, 7.0, 5.0, 2.0, 19.0, 20.0, 25.0, 29.0, 36.0, 36.0, 55.0, 119.0, 158.0, 269.0, 528.0, 1267.0, 122106.0, 920240.0, 2089.0, 593.0, 320.0, 205.0, 121.0, 90.0, 57.0, 45.0, 28.0, 24.0, 15.0, 9.0, 5.0, 7.0, 6.0, 5.0, 4.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.333251953125, -0.3228797912597656, -0.31250762939453125, -0.3021354675292969, -0.2917633056640625, -0.2813911437988281, -0.27101898193359375, -0.2606468200683594, -0.250274658203125, -0.23990249633789062, -0.22953033447265625, -0.21915817260742188, -0.2087860107421875, -0.19841384887695312, -0.18804168701171875, -0.17766952514648438, -0.16729736328125, -0.15692520141601562, -0.14655303955078125, -0.13618087768554688, -0.1258087158203125, -0.11543655395507812, -0.10506439208984375, -0.09469223022460938, -0.084320068359375, -0.07394790649414062, -0.06357574462890625, -0.053203582763671875, -0.0428314208984375, -0.032459259033203125, -0.02208709716796875, -0.011714935302734375, -0.0013427734375, 0.009029388427734375, 0.01940155029296875, 0.029773712158203125, 0.0401458740234375, 0.050518035888671875, 0.06089019775390625, 0.07126235961914062, 0.081634521484375, 0.09200668334960938, 0.10237884521484375, 0.11275100708007812, 0.1231231689453125, 0.13349533081054688, 0.14386749267578125, 0.15423965454101562, 0.16461181640625, 0.17498397827148438, 0.18535614013671875, 0.19572830200195312, 0.2061004638671875, 0.21647262573242188, 0.22684478759765625, 0.23721694946289062, 0.247589111328125, 0.2579612731933594, 0.26833343505859375, 0.2787055969238281, 0.2890777587890625, 0.2994499206542969, 0.30982208251953125, 0.3201942443847656, 0.33056640625]}, "gradients/decoder.transformer.h.15.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 5.0, 16.0, 72.0, 627.0, 282.0, 15.0], "bins": [-0.255596786737442, -0.251468300819397, -0.24733978509902954, -0.2432112991809845, -0.23908279836177826, -0.23495429754257202, -0.23082581162452698, -0.22669731080532074, -0.2225688099861145, -0.21844030916690826, -0.21431180834770203, -0.21018332242965698, -0.20605482161045074, -0.2019263207912445, -0.19779783487319946, -0.19366933405399323, -0.189540833234787, -0.18541233241558075, -0.1812838315963745, -0.17715534567832947, -0.17302684485912323, -0.168898344039917, -0.16476985812187195, -0.1606413573026657, -0.15651285648345947, -0.15238435566425323, -0.148255854845047, -0.14412736892700195, -0.13999886810779572, -0.13587036728858948, -0.13174188137054443, -0.1276133805513382, -0.12348486483097076, -0.11935636401176453, -0.11522787064313889, -0.11109937727451324, -0.106970876455307, -0.10284237563610077, -0.09871388226747513, -0.09458538889884949, -0.09045688807964325, -0.08632838726043701, -0.08219989389181137, -0.07807140052318573, -0.07394289970397949, -0.06981439888477325, -0.06568590551614761, -0.061557408422231674, -0.057428911328315735, -0.053300414234399796, -0.049171917140483856, -0.04504342004656792, -0.04091492295265198, -0.03678642585873604, -0.0326579287648201, -0.02852943167090416, -0.02440093457698822, -0.02027243748307228, -0.01614394038915634, -0.012015443295240402, -0.007886946201324463, -0.0037584491074085236, 0.00037004798650741577, 0.004498545080423355, 0.00862704124301672]}, "gradients/decoder.transformer.h.15.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 2.0, 3.0, 1.0, 5.0, 6.0, 9.0, 16.0, 10.0, 20.0, 20.0, 26.0, 33.0, 30.0, 24.0, 42.0, 34.0, 37.0, 32.0, 42.0, 37.0, 58.0, 45.0, 33.0, 28.0, 46.0, 45.0, 36.0, 39.0, 37.0, 32.0, 27.0, 25.0, 25.0, 17.0, 14.0, 21.0, 16.0, 14.0, 10.0, 3.0, 4.0, 7.0, 2.0, 1.0, 2.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.026240408420562744, -0.025387877598404884, -0.024535346776247025, -0.023682815954089165, -0.022830285131931305, -0.021977754309773445, -0.021125223487615585, -0.020272692665457726, -0.019420161843299866, -0.018567631021142006, -0.017715100198984146, -0.016862569376826286, -0.016010038554668427, -0.015157507732510567, -0.014304976910352707, -0.013452446088194847, -0.012599915266036987, -0.011747384443879128, -0.010894853621721268, -0.010042322799563408, -0.009189791977405548, -0.008337261155247688, -0.0074847303330898285, -0.006632199510931969, -0.005779668688774109, -0.004927137866616249, -0.004074607044458389, -0.0032220762223005295, -0.0023695454001426697, -0.0015170145779848099, -0.0006644837558269501, 0.00018804706633090973, 0.0010405778884887695, 0.0018931087106466293, 0.002745639532804489, 0.003598170354962349, 0.004450701177120209, 0.0053032319992780685, 0.006155762821435928, 0.007008293643593788, 0.007860824465751648, 0.008713355287909508, 0.009565886110067368, 0.010418416932225227, 0.011270947754383087, 0.012123478576540947, 0.012976009398698807, 0.013828540220856667, 0.014681071043014526, 0.015533601865172386, 0.016386132687330246, 0.017238663509488106, 0.018091194331645966, 0.018943725153803825, 0.019796255975961685, 0.020648786798119545, 0.021501317620277405, 0.022353848442435265, 0.023206379264593124, 0.024058910086750984, 0.024911440908908844, 0.025763971731066704, 0.026616502553224564, 0.027469033375382423, 0.028321564197540283]}, "gradients/decoder.transformer.h.15.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 4.0, 2.0, 1.0, 2.0, 2.0, 7.0, 6.0, 4.0, 4.0, 3.0, 13.0, 5.0, 14.0, 17.0, 18.0, 27.0, 19.0, 29.0, 33.0, 29.0, 31.0, 40.0, 39.0, 43.0, 50.0, 34.0, 38.0, 55.0, 42.0, 46.0, 39.0, 34.0, 39.0, 32.0, 34.0, 34.0, 26.0, 17.0, 19.0, 11.0, 12.0, 19.0, 9.0, 6.0, 2.0, 8.0, 5.0, 3.0, 2.0, 4.0, 3.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0], "bins": [-3.91015625, -3.774169921875, -3.63818359375, -3.502197265625, -3.3662109375, -3.230224609375, -3.09423828125, -2.958251953125, -2.822265625, -2.686279296875, -2.55029296875, -2.414306640625, -2.2783203125, -2.142333984375, -2.00634765625, -1.870361328125, -1.734375, -1.598388671875, -1.46240234375, -1.326416015625, -1.1904296875, -1.054443359375, -0.91845703125, -0.782470703125, -0.646484375, -0.510498046875, -0.37451171875, -0.238525390625, -0.1025390625, 0.033447265625, 0.16943359375, 0.305419921875, 0.44140625, 0.577392578125, 0.71337890625, 0.849365234375, 0.9853515625, 1.121337890625, 1.25732421875, 1.393310546875, 1.529296875, 1.665283203125, 1.80126953125, 1.937255859375, 2.0732421875, 2.209228515625, 2.34521484375, 2.481201171875, 2.6171875, 2.753173828125, 2.88916015625, 3.025146484375, 3.1611328125, 3.297119140625, 3.43310546875, 3.569091796875, 3.705078125, 3.841064453125, 3.97705078125, 4.113037109375, 4.2490234375, 4.385009765625, 4.52099609375, 4.656982421875, 4.79296875]}, "gradients/decoder.transformer.h.15.attn.c_proj.weight": {"_type": "histogram", "values": [2.0, 2.0, 0.0, 1.0, 2.0, 3.0, 3.0, 5.0, 5.0, 13.0, 9.0, 13.0, 20.0, 28.0, 48.0, 53.0, 94.0, 111.0, 223.0, 308.0, 449.0, 781.0, 1292.0, 2235.0, 3930.0, 7748.0, 14883.0, 28741.0, 55935.0, 109372.0, 207210.0, 269167.0, 166648.0, 86297.0, 44566.0, 22726.0, 11610.0, 6121.0, 3314.0, 1821.0, 1055.0, 609.0, 366.0, 229.0, 151.0, 108.0, 70.0, 50.0, 41.0, 23.0, 28.0, 14.0, 13.0, 4.0, 7.0, 6.0, 3.0, 3.0, 2.0, 1.0, 1.0, 1.0, 0.0, 2.0], "bins": [-3.7890625, -3.6680908203125, -3.547119140625, -3.4261474609375, -3.30517578125, -3.1842041015625, -3.063232421875, -2.9422607421875, -2.8212890625, -2.7003173828125, -2.579345703125, -2.4583740234375, -2.33740234375, -2.2164306640625, -2.095458984375, -1.9744873046875, -1.853515625, -1.7325439453125, -1.611572265625, -1.4906005859375, -1.36962890625, -1.2486572265625, -1.127685546875, -1.0067138671875, -0.8857421875, -0.7647705078125, -0.643798828125, -0.5228271484375, -0.40185546875, -0.2808837890625, -0.159912109375, -0.0389404296875, 0.08203125, 0.2030029296875, 0.323974609375, 0.4449462890625, 0.56591796875, 0.6868896484375, 0.807861328125, 0.9288330078125, 1.0498046875, 1.1707763671875, 1.291748046875, 1.4127197265625, 1.53369140625, 1.6546630859375, 1.775634765625, 1.8966064453125, 2.017578125, 2.1385498046875, 2.259521484375, 2.3804931640625, 2.50146484375, 2.6224365234375, 2.743408203125, 2.8643798828125, 2.9853515625, 3.1063232421875, 3.227294921875, 3.3482666015625, 3.46923828125, 3.5902099609375, 3.711181640625, 3.8321533203125, 3.953125]}, "gradients/decoder.transformer.h.15.attn.c_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0, 0.0, 4.0, 1.0, 6.0, 3.0, 4.0, 11.0, 8.0, 13.0, 18.0, 29.0, 20.0, 30.0, 33.0, 34.0, 51.0, 56.0, 80.0, 81.0, 144.0, 374.0, 1407.0, 198.0, 89.0, 59.0, 46.0, 38.0, 44.0, 33.0, 31.0, 22.0, 16.0, 16.0, 14.0, 9.0, 12.0, 5.0, 9.0, 4.0, 2.0, 2.0, 0.0, 2.0, 2.0, 1.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-18.0, -17.4033203125, -16.806640625, -16.2099609375, -15.61328125, -15.0166015625, -14.419921875, -13.8232421875, -13.2265625, -12.6298828125, -12.033203125, -11.4365234375, -10.83984375, -10.2431640625, -9.646484375, -9.0498046875, -8.453125, -7.8564453125, -7.259765625, -6.6630859375, -6.06640625, -5.4697265625, -4.873046875, -4.2763671875, -3.6796875, -3.0830078125, -2.486328125, -1.8896484375, -1.29296875, -0.6962890625, -0.099609375, 0.4970703125, 1.09375, 1.6904296875, 2.287109375, 2.8837890625, 3.48046875, 4.0771484375, 4.673828125, 5.2705078125, 5.8671875, 6.4638671875, 7.060546875, 7.6572265625, 8.25390625, 8.8505859375, 9.447265625, 10.0439453125, 10.640625, 11.2373046875, 11.833984375, 12.4306640625, 13.02734375, 13.6240234375, 14.220703125, 14.8173828125, 15.4140625, 16.0107421875, 16.607421875, 17.2041015625, 17.80078125, 18.3974609375, 18.994140625, 19.5908203125, 20.1875]}, "gradients/decoder.transformer.h.15.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 4.0, 5.0, 3.0, 4.0, 12.0, 20.0, 38.0, 39.0, 58.0, 120.0, 199.0, 305.0, 817.0, 16791.0, 3116965.0, 8764.0, 747.0, 350.0, 183.0, 116.0, 68.0, 45.0, 28.0, 12.0, 14.0, 3.0, 4.0, 1.0, 2.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-61.03125, -58.88525390625, -56.7392578125, -54.59326171875, -52.447265625, -50.30126953125, -48.1552734375, -46.00927734375, -43.86328125, -41.71728515625, -39.5712890625, -37.42529296875, -35.279296875, -33.13330078125, -30.9873046875, -28.84130859375, -26.6953125, -24.54931640625, -22.4033203125, -20.25732421875, -18.111328125, -15.96533203125, -13.8193359375, -11.67333984375, -9.52734375, -7.38134765625, -5.2353515625, -3.08935546875, -0.943359375, 1.20263671875, 3.3486328125, 5.49462890625, 7.640625, 9.78662109375, 11.9326171875, 14.07861328125, 16.224609375, 18.37060546875, 20.5166015625, 22.66259765625, 24.80859375, 26.95458984375, 29.1005859375, 31.24658203125, 33.392578125, 35.53857421875, 37.6845703125, 39.83056640625, 41.9765625, 44.12255859375, 46.2685546875, 48.41455078125, 50.560546875, 52.70654296875, 54.8525390625, 56.99853515625, 59.14453125, 61.29052734375, 63.4365234375, 65.58251953125, 67.728515625, 69.87451171875, 72.0205078125, 74.16650390625, 76.3125]}, "gradients/decoder.transformer.h.15.ln_1.weight": {"_type": "histogram", "values": [2.0, 517.0, 496.0, 3.0, 1.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-18.420413970947266, -9.360157012939453, -0.2999000549316406, 8.760356903076172, 17.820613861083984, 26.880870819091797, 35.94112777709961, 45.00138473510742, 54.061641693115234, 63.12189865112305, 72.18215942382812, 81.24241638183594, 90.30267333984375, 99.36293029785156, 108.42318725585938, 117.48344421386719, 126.543701171875, 135.6039581298828, 144.66421508789062, 153.72447204589844, 162.78472900390625, 171.84498596191406, 180.90524291992188, 189.9654998779297, 199.0257568359375, 208.0860137939453, 217.14627075195312, 226.20652770996094, 235.26678466796875, 244.32704162597656, 253.38729858398438, 262.44757080078125, 271.5078125, 280.56805419921875, 289.6283264160156, 298.6885986328125, 307.74884033203125, 316.80908203125, 325.8693542480469, 334.92962646484375, 343.9898681640625, 353.05010986328125, 362.1103820800781, 371.170654296875, 380.23089599609375, 389.2911376953125, 398.3514099121094, 407.41168212890625, 416.471923828125, 425.53216552734375, 434.5924377441406, 443.6527099609375, 452.71295166015625, 461.773193359375, 470.8334655761719, 479.89373779296875, 488.9539794921875, 498.01422119140625, 507.0744934082031, 516.134765625, 525.1950073242188, 534.2552490234375, 543.3155517578125, 552.3757934570312, 561.43603515625]}, "gradients/decoder.transformer.h.15.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 2.0, 0.0, 4.0, 5.0, 8.0, 4.0, 7.0, 7.0, 10.0, 16.0, 16.0, 18.0, 18.0, 19.0, 28.0, 28.0, 22.0, 35.0, 46.0, 47.0, 46.0, 42.0, 49.0, 38.0, 46.0, 35.0, 45.0, 42.0, 33.0, 30.0, 29.0, 39.0, 26.0, 25.0, 25.0, 29.0, 19.0, 10.0, 12.0, 13.0, 9.0, 9.0, 4.0, 5.0, 9.0, 6.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-42.22984313964844, -40.94559097290039, -39.66133499145508, -38.37708282470703, -37.09282684326172, -35.80857467651367, -34.524322509765625, -33.24006652832031, -31.955814361572266, -30.671560287475586, -29.387306213378906, -28.10305404663086, -26.81879997253418, -25.5345458984375, -24.25029182434082, -22.96603775024414, -21.68178367614746, -20.39752960205078, -19.1132755279541, -17.829021453857422, -16.544769287109375, -15.260515213012695, -13.976261138916016, -12.692008018493652, -11.407753944396973, -10.123499870300293, -8.83924674987793, -7.55499267578125, -6.2707390785217285, -4.986485481262207, -3.7022314071655273, -2.417978286743164, -1.1337242126464844, 0.15052950382232666, 1.4347832202911377, 2.7190370559692383, 4.00329065322876, 5.287544250488281, 6.571798324584961, 7.856051445007324, 9.140305519104004, 10.424559593200684, 11.708812713623047, 12.993066787719727, 14.277320861816406, 15.56157398223877, 16.845829010009766, 18.130081176757812, 19.414335250854492, 20.698589324951172, 21.98284339904785, 23.26709747314453, 24.551349639892578, 25.835603713989258, 27.119857788085938, 28.404109954833984, 29.688365936279297, 30.972620010375977, 32.256874084472656, 33.5411262512207, 34.825382232666016, 36.10963439941406, 37.393890380859375, 38.67814254760742, 39.96239471435547]}, "gradients/decoder.transformer.h.14.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 1.0, 3.0, 3.0, 4.0, 5.0, 5.0, 5.0, 4.0, 6.0, 10.0, 12.0, 16.0, 14.0, 14.0, 28.0, 24.0, 24.0, 28.0, 39.0, 35.0, 29.0, 30.0, 36.0, 53.0, 35.0, 40.0, 42.0, 44.0, 37.0, 34.0, 51.0, 35.0, 34.0, 35.0, 30.0, 24.0, 29.0, 24.0, 15.0, 13.0, 11.0, 12.0, 4.0, 7.0, 10.0, 3.0, 3.0, 7.0, 2.0, 2.0, 0.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 3.0], "bins": [-4.109375, -3.96795654296875, -3.8265380859375, -3.68511962890625, -3.543701171875, -3.40228271484375, -3.2608642578125, -3.11944580078125, -2.97802734375, -2.83660888671875, -2.6951904296875, -2.55377197265625, -2.412353515625, -2.27093505859375, -2.1295166015625, -1.98809814453125, -1.8466796875, -1.70526123046875, -1.5638427734375, -1.42242431640625, -1.281005859375, -1.13958740234375, -0.9981689453125, -0.85675048828125, -0.71533203125, -0.57391357421875, -0.4324951171875, -0.29107666015625, -0.149658203125, -0.00823974609375, 0.1331787109375, 0.27459716796875, 0.416015625, 0.55743408203125, 0.6988525390625, 0.84027099609375, 0.981689453125, 1.12310791015625, 1.2645263671875, 1.40594482421875, 1.54736328125, 1.68878173828125, 1.8302001953125, 1.97161865234375, 2.113037109375, 2.25445556640625, 2.3958740234375, 2.53729248046875, 2.6787109375, 2.82012939453125, 2.9615478515625, 3.10296630859375, 3.244384765625, 3.38580322265625, 3.5272216796875, 3.66864013671875, 3.81005859375, 3.95147705078125, 4.0928955078125, 4.23431396484375, 4.375732421875, 4.51715087890625, 4.6585693359375, 4.79998779296875, 4.94140625]}, "gradients/decoder.transformer.h.14.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 2.0, 2.0, 3.0, 4.0, 5.0, 7.0, 3.0, 3.0, 7.0, 12.0, 16.0, 13.0, 14.0, 26.0, 20.0, 31.0, 44.0, 77.0, 112.0, 208.0, 382.0, 1068.0, 4483.0, 36113.0, 602194.0, 2946071.0, 564121.0, 33467.0, 3980.0, 921.0, 351.0, 171.0, 112.0, 53.0, 40.0, 33.0, 12.0, 28.0, 11.0, 11.0, 12.0, 18.0, 4.0, 6.0, 7.0, 2.0, 1.0, 5.0, 2.0, 2.0, 3.0, 2.0, 1.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0, 1.0], "bins": [-13.203125, -12.75537109375, -12.3076171875, -11.85986328125, -11.412109375, -10.96435546875, -10.5166015625, -10.06884765625, -9.62109375, -9.17333984375, -8.7255859375, -8.27783203125, -7.830078125, -7.38232421875, -6.9345703125, -6.48681640625, -6.0390625, -5.59130859375, -5.1435546875, -4.69580078125, -4.248046875, -3.80029296875, -3.3525390625, -2.90478515625, -2.45703125, -2.00927734375, -1.5615234375, -1.11376953125, -0.666015625, -0.21826171875, 0.2294921875, 0.67724609375, 1.125, 1.57275390625, 2.0205078125, 2.46826171875, 2.916015625, 3.36376953125, 3.8115234375, 4.25927734375, 4.70703125, 5.15478515625, 5.6025390625, 6.05029296875, 6.498046875, 6.94580078125, 7.3935546875, 7.84130859375, 8.2890625, 8.73681640625, 9.1845703125, 9.63232421875, 10.080078125, 10.52783203125, 10.9755859375, 11.42333984375, 11.87109375, 12.31884765625, 12.7666015625, 13.21435546875, 13.662109375, 14.10986328125, 14.5576171875, 15.00537109375, 15.453125]}, "gradients/decoder.transformer.h.14.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 0.0, 4.0, 8.0, 6.0, 6.0, 9.0, 17.0, 17.0, 28.0, 29.0, 42.0, 47.0, 86.0, 109.0, 160.0, 181.0, 276.0, 340.0, 388.0, 418.0, 417.0, 348.0, 279.0, 222.0, 161.0, 121.0, 92.0, 84.0, 54.0, 40.0, 29.0, 25.0, 11.0, 14.0, 5.0, 7.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0], "bins": [-11.390625, -11.08843994140625, -10.7862548828125, -10.48406982421875, -10.181884765625, -9.87969970703125, -9.5775146484375, -9.27532958984375, -8.97314453125, -8.67095947265625, -8.3687744140625, -8.06658935546875, -7.764404296875, -7.46221923828125, -7.1600341796875, -6.85784912109375, -6.5556640625, -6.25347900390625, -5.9512939453125, -5.64910888671875, -5.346923828125, -5.04473876953125, -4.7425537109375, -4.44036865234375, -4.13818359375, -3.83599853515625, -3.5338134765625, -3.23162841796875, -2.929443359375, -2.62725830078125, -2.3250732421875, -2.02288818359375, -1.720703125, -1.41851806640625, -1.1163330078125, -0.81414794921875, -0.511962890625, -0.20977783203125, 0.0924072265625, 0.39459228515625, 0.69677734375, 0.99896240234375, 1.3011474609375, 1.60333251953125, 1.905517578125, 2.20770263671875, 2.5098876953125, 2.81207275390625, 3.1142578125, 3.41644287109375, 3.7186279296875, 4.02081298828125, 4.322998046875, 4.62518310546875, 4.9273681640625, 5.22955322265625, 5.53173828125, 5.83392333984375, 6.1361083984375, 6.43829345703125, 6.740478515625, 7.04266357421875, 7.3448486328125, 7.64703369140625, 7.94921875]}, "gradients/decoder.transformer.h.14.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 2.0, 3.0, 4.0, 9.0, 20.0, 31.0, 24.0, 55.0, 63.0, 79.0, 141.0, 202.0, 369.0, 831.0, 2928.0, 21662.0, 352088.0, 3119967.0, 653585.0, 35902.0, 4166.0, 1017.0, 454.0, 242.0, 151.0, 77.0, 63.0, 44.0, 29.0, 25.0, 20.0, 8.0, 10.0, 8.0, 3.0, 5.0, 3.0, 1.0, 2.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-17.9375, -17.26318359375, -16.5888671875, -15.91455078125, -15.240234375, -14.56591796875, -13.8916015625, -13.21728515625, -12.54296875, -11.86865234375, -11.1943359375, -10.52001953125, -9.845703125, -9.17138671875, -8.4970703125, -7.82275390625, -7.1484375, -6.47412109375, -5.7998046875, -5.12548828125, -4.451171875, -3.77685546875, -3.1025390625, -2.42822265625, -1.75390625, -1.07958984375, -0.4052734375, 0.26904296875, 0.943359375, 1.61767578125, 2.2919921875, 2.96630859375, 3.640625, 4.31494140625, 4.9892578125, 5.66357421875, 6.337890625, 7.01220703125, 7.6865234375, 8.36083984375, 9.03515625, 9.70947265625, 10.3837890625, 11.05810546875, 11.732421875, 12.40673828125, 13.0810546875, 13.75537109375, 14.4296875, 15.10400390625, 15.7783203125, 16.45263671875, 17.126953125, 17.80126953125, 18.4755859375, 19.14990234375, 19.82421875, 20.49853515625, 21.1728515625, 21.84716796875, 22.521484375, 23.19580078125, 23.8701171875, 24.54443359375, 25.21875]}, "gradients/decoder.transformer.h.14.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 3.0, 5.0, 9.0, 23.0, 48.0, 91.0, 124.0, 146.0, 183.0, 149.0, 94.0, 71.0, 32.0, 21.0, 12.0, 5.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-94.71366119384766, -92.24703979492188, -89.78041076660156, -87.31378936767578, -84.84716033935547, -82.38053894042969, -79.91390991210938, -77.4472885131836, -74.98065948486328, -72.5140380859375, -70.04740905761719, -67.5807876586914, -65.1141586303711, -62.64753341674805, -60.180908203125, -57.71428680419922, -55.24766159057617, -52.781036376953125, -50.31441116333008, -47.84778594970703, -45.381160736083984, -42.91453552246094, -40.447914123535156, -37.981285095214844, -35.51466369628906, -33.048038482666016, -30.58141326904297, -28.114788055419922, -25.648162841796875, -23.181537628173828, -20.714914321899414, -18.248289108276367, -15.781665802001953, -13.315040588378906, -10.84841537475586, -8.381791114807129, -5.915165901184082, -3.448540687561035, -0.9819164276123047, 1.4847087860107422, 3.951333999633789, 6.417959213256836, 8.884584426879883, 11.351208686828613, 13.81783390045166, 16.28445816040039, 18.751083374023438, 21.217708587646484, 23.68433380126953, 26.150959014892578, 28.617584228515625, 31.084209442138672, 33.55083465576172, 36.017459869384766, 38.48408508300781, 40.950706481933594, 43.417335510253906, 45.88396072387695, 48.3505859375, 50.81721115112305, 53.283836364746094, 55.75046157836914, 58.21708679199219, 60.68370819091797, 63.150333404541016]}, "gradients/decoder.transformer.h.14.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 4.0, 1.0, 2.0, 2.0, 2.0, 2.0, 1.0, 8.0, 5.0, 10.0, 13.0, 7.0, 15.0, 21.0, 16.0, 19.0, 15.0, 16.0, 22.0, 29.0, 44.0, 31.0, 33.0, 34.0, 31.0, 39.0, 39.0, 34.0, 42.0, 38.0, 47.0, 29.0, 28.0, 38.0, 28.0, 30.0, 26.0, 27.0, 24.0, 21.0, 23.0, 24.0, 13.0, 16.0, 11.0, 13.0, 11.0, 10.0, 5.0, 3.0, 4.0, 6.0, 0.0, 2.0, 3.0, 1.0, 1.0, 1.0, 1.0, 2.0], "bins": [-29.55217933654785, -28.636825561523438, -27.721473693847656, -26.806119918823242, -25.89076805114746, -24.975414276123047, -24.060062408447266, -23.14470863342285, -22.229354858398438, -21.314001083374023, -20.398649215698242, -19.483295440673828, -18.567943572998047, -17.652589797973633, -16.73723602294922, -15.821884155273438, -14.906532287597656, -13.991179466247559, -13.075826644897461, -12.160472869873047, -11.245121002197266, -10.329767227172852, -9.414414405822754, -8.499061584472656, -7.583708763122559, -6.668355941772461, -5.753003120422363, -4.837649822235107, -3.9222970008850098, -3.006944179534912, -2.0915908813476562, -1.1762380599975586, -0.26088714599609375, 0.6544657945632935, 1.5698187351226807, 2.4851717948913574, 3.400524616241455, 4.315877437591553, 5.231230735778809, 6.146583557128906, 7.061936378479004, 7.977289199829102, 8.8926420211792, 9.807994842529297, 10.723348617553711, 11.638700485229492, 12.554054260253906, 13.469407081604004, 14.384759902954102, 15.3001127243042, 16.215465545654297, 17.13081932067871, 18.046171188354492, 18.961524963378906, 19.876876831054688, 20.7922306060791, 21.707584381103516, 22.62293815612793, 23.53829002380371, 24.453643798828125, 25.368995666503906, 26.28434944152832, 27.199703216552734, 28.115055084228516, 29.030406951904297]}, "gradients/decoder.transformer.h.14.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 3.0, 3.0, 4.0, 5.0, 2.0, 7.0, 11.0, 6.0, 10.0, 9.0, 16.0, 19.0, 21.0, 27.0, 17.0, 21.0, 28.0, 42.0, 34.0, 40.0, 47.0, 39.0, 37.0, 50.0, 39.0, 42.0, 38.0, 38.0, 37.0, 43.0, 28.0, 28.0, 21.0, 36.0, 29.0, 22.0, 25.0, 19.0, 14.0, 10.0, 7.0, 6.0, 5.0, 6.0, 8.0, 5.0, 2.0, 3.0, 2.0, 1.0, 1.0, 3.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0, 1.0], "bins": [-4.125, -3.98553466796875, -3.8460693359375, -3.70660400390625, -3.567138671875, -3.42767333984375, -3.2882080078125, -3.14874267578125, -3.00927734375, -2.86981201171875, -2.7303466796875, -2.59088134765625, -2.451416015625, -2.31195068359375, -2.1724853515625, -2.03302001953125, -1.8935546875, -1.75408935546875, -1.6146240234375, -1.47515869140625, -1.335693359375, -1.19622802734375, -1.0567626953125, -0.91729736328125, -0.77783203125, -0.63836669921875, -0.4989013671875, -0.35943603515625, -0.219970703125, -0.08050537109375, 0.0589599609375, 0.19842529296875, 0.337890625, 0.47735595703125, 0.6168212890625, 0.75628662109375, 0.895751953125, 1.03521728515625, 1.1746826171875, 1.31414794921875, 1.45361328125, 1.59307861328125, 1.7325439453125, 1.87200927734375, 2.011474609375, 2.15093994140625, 2.2904052734375, 2.42987060546875, 2.5693359375, 2.70880126953125, 2.8482666015625, 2.98773193359375, 3.127197265625, 3.26666259765625, 3.4061279296875, 3.54559326171875, 3.68505859375, 3.82452392578125, 3.9639892578125, 4.10345458984375, 4.242919921875, 4.38238525390625, 4.5218505859375, 4.66131591796875, 4.80078125]}, "gradients/decoder.transformer.h.14.crossattention.c_proj.weight": {"_type": "histogram", "values": [3.0, 2.0, 4.0, 2.0, 7.0, 11.0, 15.0, 18.0, 24.0, 41.0, 51.0, 72.0, 100.0, 158.0, 204.0, 357.0, 487.0, 670.0, 1055.0, 1540.0, 2284.0, 3547.0, 5399.0, 8173.0, 13047.0, 20389.0, 31798.0, 49742.0, 76778.0, 110215.0, 143828.0, 155242.0, 134861.0, 99298.0, 66878.0, 43853.0, 27950.0, 17758.0, 11383.0, 7131.0, 4626.0, 3145.0, 2093.0, 1343.0, 960.0, 643.0, 425.0, 298.0, 190.0, 146.0, 81.0, 75.0, 69.0, 29.0, 32.0, 23.0, 4.0, 8.0, 3.0, 5.0, 0.0, 2.0, 0.0, 1.0], "bins": [-0.6337890625, -0.6135711669921875, -0.593353271484375, -0.5731353759765625, -0.55291748046875, -0.5326995849609375, -0.512481689453125, -0.4922637939453125, -0.4720458984375, -0.4518280029296875, -0.431610107421875, -0.4113922119140625, -0.39117431640625, -0.3709564208984375, -0.350738525390625, -0.3305206298828125, -0.310302734375, -0.2900848388671875, -0.269866943359375, -0.2496490478515625, -0.22943115234375, -0.2092132568359375, -0.188995361328125, -0.1687774658203125, -0.1485595703125, -0.1283416748046875, -0.108123779296875, -0.0879058837890625, -0.06768798828125, -0.0474700927734375, -0.027252197265625, -0.0070343017578125, 0.01318359375, 0.0334014892578125, 0.053619384765625, 0.0738372802734375, 0.09405517578125, 0.1142730712890625, 0.134490966796875, 0.1547088623046875, 0.1749267578125, 0.1951446533203125, 0.215362548828125, 0.2355804443359375, 0.25579833984375, 0.2760162353515625, 0.296234130859375, 0.3164520263671875, 0.336669921875, 0.3568878173828125, 0.377105712890625, 0.3973236083984375, 0.41754150390625, 0.4377593994140625, 0.457977294921875, 0.4781951904296875, 0.4984130859375, 0.5186309814453125, 0.538848876953125, 0.5590667724609375, 0.57928466796875, 0.5995025634765625, 0.619720458984375, 0.6399383544921875, 0.66015625]}, "gradients/decoder.transformer.h.14.crossattention.c_attn.bias": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 2.0, 4.0, 2.0, 0.0, 5.0, 7.0, 6.0, 5.0, 5.0, 8.0, 13.0, 15.0, 13.0, 13.0, 15.0, 26.0, 25.0, 15.0, 26.0, 32.0, 32.0, 29.0, 41.0, 46.0, 38.0, 42.0, 30.0, 1060.0, 45.0, 31.0, 40.0, 47.0, 33.0, 37.0, 27.0, 27.0, 29.0, 23.0, 20.0, 19.0, 21.0, 21.0, 15.0, 7.0, 7.0, 6.0, 10.0, 5.0, 5.0, 2.0, 2.0, 3.0, 2.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.861328125, -2.766357421875, -2.67138671875, -2.576416015625, -2.4814453125, -2.386474609375, -2.29150390625, -2.196533203125, -2.1015625, -2.006591796875, -1.91162109375, -1.816650390625, -1.7216796875, -1.626708984375, -1.53173828125, -1.436767578125, -1.341796875, -1.246826171875, -1.15185546875, -1.056884765625, -0.9619140625, -0.866943359375, -0.77197265625, -0.677001953125, -0.58203125, -0.487060546875, -0.39208984375, -0.297119140625, -0.2021484375, -0.107177734375, -0.01220703125, 0.082763671875, 0.177734375, 0.272705078125, 0.36767578125, 0.462646484375, 0.5576171875, 0.652587890625, 0.74755859375, 0.842529296875, 0.9375, 1.032470703125, 1.12744140625, 1.222412109375, 1.3173828125, 1.412353515625, 1.50732421875, 1.602294921875, 1.697265625, 1.792236328125, 1.88720703125, 1.982177734375, 2.0771484375, 2.172119140625, 2.26708984375, 2.362060546875, 2.45703125, 2.552001953125, 2.64697265625, 2.741943359375, 2.8369140625, 2.931884765625, 3.02685546875, 3.121826171875, 3.216796875]}, "gradients/decoder.transformer.h.14.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 7.0, 9.0, 13.0, 19.0, 27.0, 42.0, 52.0, 94.0, 145.0, 195.0, 301.0, 534.0, 698.0, 1072.0, 1738.0, 2502.0, 3954.0, 6240.0, 9513.0, 14913.0, 24231.0, 38826.0, 61024.0, 93488.0, 134607.0, 1149179.0, 213386.0, 119118.0, 80493.0, 52001.0, 32791.0, 20176.0, 12605.0, 8192.0, 5220.0, 3335.0, 2239.0, 1395.0, 954.0, 619.0, 415.0, 257.0, 183.0, 101.0, 64.0, 57.0, 43.0, 25.0, 19.0, 11.0, 5.0, 6.0, 3.0, 2.0, 5.0, 0.0, 3.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.345703125, -0.3337059020996094, -0.32170867919921875, -0.3097114562988281, -0.2977142333984375, -0.2857170104980469, -0.27371978759765625, -0.2617225646972656, -0.249725341796875, -0.23772811889648438, -0.22573089599609375, -0.21373367309570312, -0.2017364501953125, -0.18973922729492188, -0.17774200439453125, -0.16574478149414062, -0.15374755859375, -0.14175033569335938, -0.12975311279296875, -0.11775588989257812, -0.1057586669921875, -0.09376144409179688, -0.08176422119140625, -0.06976699829101562, -0.057769775390625, -0.045772552490234375, -0.03377532958984375, -0.021778106689453125, -0.0097808837890625, 0.002216339111328125, 0.01421356201171875, 0.026210784912109375, 0.0382080078125, 0.050205230712890625, 0.06220245361328125, 0.07419967651367188, 0.0861968994140625, 0.09819412231445312, 0.11019134521484375, 0.12218856811523438, 0.134185791015625, 0.14618301391601562, 0.15818023681640625, 0.17017745971679688, 0.1821746826171875, 0.19417190551757812, 0.20616912841796875, 0.21816635131835938, 0.23016357421875, 0.24216079711914062, 0.25415802001953125, 0.2661552429199219, 0.2781524658203125, 0.2901496887207031, 0.30214691162109375, 0.3141441345214844, 0.326141357421875, 0.3381385803222656, 0.35013580322265625, 0.3621330261230469, 0.3741302490234375, 0.3861274719238281, 0.39812469482421875, 0.4101219177246094, 0.422119140625]}, "gradients/decoder.transformer.h.14.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 4.0, 1.0, 3.0, 4.0, 4.0, 3.0, 3.0, 11.0, 15.0, 16.0, 30.0, 23.0, 32.0, 46.0, 66.0, 68.0, 80.0, 97.0, 99.0, 86.0, 75.0, 51.0, 48.0, 42.0, 29.0, 16.0, 13.0, 13.0, 12.0, 9.0, 4.0, 2.0, 3.0, 1.0, 1.0, 2.0, 3.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.031494140625, -0.030615806579589844, -0.029737472534179688, -0.02885913848876953, -0.027980804443359375, -0.02710247039794922, -0.026224136352539062, -0.025345802307128906, -0.02446746826171875, -0.023589134216308594, -0.022710800170898438, -0.02183246612548828, -0.020954132080078125, -0.02007579803466797, -0.019197463989257812, -0.018319129943847656, -0.0174407958984375, -0.016562461853027344, -0.015684127807617188, -0.014805793762207031, -0.013927459716796875, -0.013049125671386719, -0.012170791625976562, -0.011292457580566406, -0.01041412353515625, -0.009535789489746094, -0.008657455444335938, -0.007779121398925781, -0.006900787353515625, -0.006022453308105469, -0.0051441192626953125, -0.004265785217285156, -0.003387451171875, -0.0025091171264648438, -0.0016307830810546875, -0.0007524490356445312, 0.000125885009765625, 0.0010042190551757812, 0.0018825531005859375, 0.0027608871459960938, 0.00363922119140625, 0.004517555236816406, 0.0053958892822265625, 0.006274223327636719, 0.007152557373046875, 0.008030891418457031, 0.008909225463867188, 0.009787559509277344, 0.0106658935546875, 0.011544227600097656, 0.012422561645507812, 0.013300895690917969, 0.014179229736328125, 0.015057563781738281, 0.015935897827148438, 0.016814231872558594, 0.01769256591796875, 0.018570899963378906, 0.019449234008789062, 0.02032756805419922, 0.021205902099609375, 0.02208423614501953, 0.022962570190429688, 0.023840904235839844, 0.02471923828125]}, "gradients/decoder.transformer.h.14.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 2.0, 2.0, 2.0, 1.0, 2.0, 2.0, 3.0, 7.0, 12.0, 14.0, 19.0, 16.0, 27.0, 61.0, 101.0, 124.0, 327.0, 794.0, 59426.0, 985601.0, 1197.0, 366.0, 184.0, 88.0, 57.0, 39.0, 29.0, 23.0, 12.0, 9.0, 0.0, 7.0, 2.0, 3.0, 4.0, 4.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.499755859375, -0.4819908142089844, -0.46422576904296875, -0.4464607238769531, -0.4286956787109375, -0.4109306335449219, -0.39316558837890625, -0.3754005432128906, -0.357635498046875, -0.3398704528808594, -0.32210540771484375, -0.3043403625488281, -0.2865753173828125, -0.2688102722167969, -0.25104522705078125, -0.23328018188476562, -0.21551513671875, -0.19775009155273438, -0.17998504638671875, -0.16222000122070312, -0.1444549560546875, -0.12668991088867188, -0.10892486572265625, -0.09115982055664062, -0.073394775390625, -0.055629730224609375, -0.03786468505859375, -0.020099639892578125, -0.0023345947265625, 0.015430450439453125, 0.03319549560546875, 0.050960540771484375, 0.0687255859375, 0.08649063110351562, 0.10425567626953125, 0.12202072143554688, 0.1397857666015625, 0.15755081176757812, 0.17531585693359375, 0.19308090209960938, 0.210845947265625, 0.22861099243164062, 0.24637603759765625, 0.2641410827636719, 0.2819061279296875, 0.2996711730957031, 0.31743621826171875, 0.3352012634277344, 0.35296630859375, 0.3707313537597656, 0.38849639892578125, 0.4062614440917969, 0.4240264892578125, 0.4417915344238281, 0.45955657958984375, 0.4773216247558594, 0.495086669921875, 0.5128517150878906, 0.5306167602539062, 0.5483818054199219, 0.5661468505859375, 0.5839118957519531, 0.6016769409179688, 0.6194419860839844, 0.63720703125]}, "gradients/decoder.transformer.h.14.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 6.0, 17.0, 160.0, 802.0, 29.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.24430254101753235, -0.23839910328388214, -0.23249565064907074, -0.22659221291542053, -0.22068876028060913, -0.21478532254695892, -0.20888188481330872, -0.20297843217849731, -0.1970749795436859, -0.1911715418100357, -0.1852680891752243, -0.1793646514415741, -0.1734611988067627, -0.1675577610731125, -0.16165432333946228, -0.15575087070465088, -0.14984743297100067, -0.14394399523735046, -0.13804054260253906, -0.13213710486888885, -0.12623365223407745, -0.12033021450042725, -0.11442676931619644, -0.10852332413196564, -0.10261987894773483, -0.09671643376350403, -0.09081298857927322, -0.08490954339504242, -0.07900610566139221, -0.07310265302658081, -0.0671992152929306, -0.0612957701086998, -0.05539233982563019, -0.049488894641399384, -0.04358544945716858, -0.03768200799822807, -0.03177856281399727, -0.025875117629766464, -0.019971676170825958, -0.014068230986595154, -0.00816478580236435, -0.0022613415494561195, 0.0036421027034521103, 0.009545546025037766, 0.01544899120926857, 0.021352436393499374, 0.02725587785243988, 0.033159323036670685, 0.03906276822090149, 0.044966213405132294, 0.0508696585893631, 0.056773100048303604, 0.06267654895782471, 0.06857998669147491, 0.07448343187570572, 0.08038687705993652, 0.08629032224416733, 0.09219376742839813, 0.09809721261262894, 0.10400065779685974, 0.10990409553050995, 0.11580754816532135, 0.12171098589897156, 0.12761443853378296, 0.13351787626743317]}, "gradients/decoder.transformer.h.14.ln_cross_attn.bias": {"_type": "histogram", "values": [2.0, 3.0, 3.0, 3.0, 4.0, 7.0, 10.0, 7.0, 4.0, 10.0, 9.0, 13.0, 7.0, 14.0, 22.0, 17.0, 21.0, 21.0, 19.0, 24.0, 23.0, 35.0, 21.0, 29.0, 36.0, 29.0, 39.0, 33.0, 33.0, 36.0, 34.0, 28.0, 31.0, 33.0, 35.0, 36.0, 34.0, 30.0, 14.0, 28.0, 21.0, 21.0, 26.0, 14.0, 15.0, 11.0, 14.0, 12.0, 10.0, 8.0, 2.0, 8.0, 6.0, 3.0, 7.0, 2.0, 2.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.022610485553741455, -0.02184230647981167, -0.02107412740588188, -0.020305948331952095, -0.01953776925802231, -0.01876959018409252, -0.018001411110162735, -0.01723323203623295, -0.01646505296230316, -0.015696873888373375, -0.014928694814443588, -0.014160515740513802, -0.013392336666584015, -0.012624157592654228, -0.011855978518724442, -0.011087799444794655, -0.010319620370864868, -0.009551441296935081, -0.008783262223005295, -0.008015083149075508, -0.0072469040751457214, -0.006478725001215935, -0.005710545927286148, -0.004942366853356361, -0.004174187779426575, -0.003406008705496788, -0.0026378296315670013, -0.0018696505576372147, -0.001101471483707428, -0.0003332924097776413, 0.0004348866641521454, 0.001203065738081932, 0.0019712448120117188, 0.0027394238859415054, 0.003507602959871292, 0.004275782033801079, 0.0050439611077308655, 0.005812140181660652, 0.006580319255590439, 0.0073484983295202255, 0.008116677403450012, 0.008884856477379799, 0.009653035551309586, 0.010421214625239372, 0.011189393699169159, 0.011957572773098946, 0.012725751847028732, 0.013493930920958519, 0.014262109994888306, 0.015030289068818092, 0.01579846814274788, 0.016566647216677666, 0.017334826290607452, 0.01810300536453724, 0.018871184438467026, 0.019639363512396812, 0.0204075425863266, 0.021175721660256386, 0.021943900734186172, 0.02271207980811596, 0.023480258882045746, 0.024248437955975533, 0.02501661702990532, 0.025784796103835106, 0.026552975177764893]}, "gradients/decoder.transformer.h.14.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 3.0, 3.0, 4.0, 5.0, 2.0, 7.0, 11.0, 6.0, 10.0, 9.0, 16.0, 19.0, 20.0, 28.0, 17.0, 21.0, 28.0, 42.0, 34.0, 40.0, 47.0, 39.0, 37.0, 52.0, 37.0, 42.0, 38.0, 38.0, 37.0, 42.0, 29.0, 28.0, 21.0, 36.0, 29.0, 22.0, 25.0, 19.0, 14.0, 10.0, 7.0, 6.0, 5.0, 6.0, 8.0, 5.0, 2.0, 3.0, 2.0, 1.0, 1.0, 3.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0, 1.0], "bins": [-4.125, -3.98553466796875, -3.8460693359375, -3.70660400390625, -3.567138671875, -3.42767333984375, -3.2882080078125, -3.14874267578125, -3.00927734375, -2.86981201171875, -2.7303466796875, -2.59088134765625, -2.451416015625, -2.31195068359375, -2.1724853515625, -2.03302001953125, -1.8935546875, -1.75408935546875, -1.6146240234375, -1.47515869140625, -1.335693359375, -1.19622802734375, -1.0567626953125, -0.91729736328125, -0.77783203125, -0.63836669921875, -0.4989013671875, -0.35943603515625, -0.219970703125, -0.08050537109375, 0.0589599609375, 0.19842529296875, 0.337890625, 0.47735595703125, 0.6168212890625, 0.75628662109375, 0.895751953125, 1.03521728515625, 1.1746826171875, 1.31414794921875, 1.45361328125, 1.59307861328125, 1.7325439453125, 1.87200927734375, 2.011474609375, 2.15093994140625, 2.2904052734375, 2.42987060546875, 2.5693359375, 2.70880126953125, 2.8482666015625, 2.98773193359375, 3.127197265625, 3.26666259765625, 3.4061279296875, 3.54559326171875, 3.68505859375, 3.82452392578125, 3.9639892578125, 4.10345458984375, 4.242919921875, 4.38238525390625, 4.5218505859375, 4.66131591796875, 4.80078125]}, "gradients/decoder.transformer.h.14.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 2.0, 1.0, 6.0, 1.0, 6.0, 7.0, 1.0, 4.0, 10.0, 5.0, 6.0, 13.0, 14.0, 25.0, 23.0, 56.0, 75.0, 146.0, 290.0, 601.0, 1273.0, 2743.0, 5924.0, 12668.0, 26129.0, 55736.0, 116817.0, 236775.0, 292795.0, 154903.0, 74541.0, 35190.0, 16617.0, 8007.0, 3666.0, 1764.0, 840.0, 391.0, 214.0, 91.0, 80.0, 28.0, 26.0, 14.0, 12.0, 9.0, 6.0, 1.0, 7.0, 3.0, 4.0, 2.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0], "bins": [-4.16796875, -4.02520751953125, -3.8824462890625, -3.73968505859375, -3.596923828125, -3.45416259765625, -3.3114013671875, -3.16864013671875, -3.02587890625, -2.88311767578125, -2.7403564453125, -2.59759521484375, -2.454833984375, -2.31207275390625, -2.1693115234375, -2.02655029296875, -1.8837890625, -1.74102783203125, -1.5982666015625, -1.45550537109375, -1.312744140625, -1.16998291015625, -1.0272216796875, -0.88446044921875, -0.74169921875, -0.59893798828125, -0.4561767578125, -0.31341552734375, -0.170654296875, -0.02789306640625, 0.1148681640625, 0.25762939453125, 0.400390625, 0.54315185546875, 0.6859130859375, 0.82867431640625, 0.971435546875, 1.11419677734375, 1.2569580078125, 1.39971923828125, 1.54248046875, 1.68524169921875, 1.8280029296875, 1.97076416015625, 2.113525390625, 2.25628662109375, 2.3990478515625, 2.54180908203125, 2.6845703125, 2.82733154296875, 2.9700927734375, 3.11285400390625, 3.255615234375, 3.39837646484375, 3.5411376953125, 3.68389892578125, 3.82666015625, 3.96942138671875, 4.1121826171875, 4.25494384765625, 4.397705078125, 4.54046630859375, 4.6832275390625, 4.82598876953125, 4.96875]}, "gradients/decoder.transformer.h.14.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 5.0, 1.0, 3.0, 4.0, 8.0, 3.0, 8.0, 8.0, 17.0, 12.0, 12.0, 10.0, 22.0, 17.0, 30.0, 32.0, 31.0, 35.0, 46.0, 48.0, 58.0, 86.0, 182.0, 1393.0, 342.0, 152.0, 76.0, 66.0, 51.0, 36.0, 38.0, 35.0, 33.0, 25.0, 28.0, 18.0, 24.0, 15.0, 11.0, 11.0, 8.0, 6.0, 4.0, 2.0, 3.0, 2.0, 2.0, 2.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0], "bins": [-14.375, -13.89306640625, -13.4111328125, -12.92919921875, -12.447265625, -11.96533203125, -11.4833984375, -11.00146484375, -10.51953125, -10.03759765625, -9.5556640625, -9.07373046875, -8.591796875, -8.10986328125, -7.6279296875, -7.14599609375, -6.6640625, -6.18212890625, -5.7001953125, -5.21826171875, -4.736328125, -4.25439453125, -3.7724609375, -3.29052734375, -2.80859375, -2.32666015625, -1.8447265625, -1.36279296875, -0.880859375, -0.39892578125, 0.0830078125, 0.56494140625, 1.046875, 1.52880859375, 2.0107421875, 2.49267578125, 2.974609375, 3.45654296875, 3.9384765625, 4.42041015625, 4.90234375, 5.38427734375, 5.8662109375, 6.34814453125, 6.830078125, 7.31201171875, 7.7939453125, 8.27587890625, 8.7578125, 9.23974609375, 9.7216796875, 10.20361328125, 10.685546875, 11.16748046875, 11.6494140625, 12.13134765625, 12.61328125, 13.09521484375, 13.5771484375, 14.05908203125, 14.541015625, 15.02294921875, 15.5048828125, 15.98681640625, 16.46875]}, "gradients/decoder.transformer.h.14.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 3.0, 1.0, 0.0, 0.0, 0.0, 2.0, 2.0, 2.0, 4.0, 5.0, 2.0, 4.0, 1.0, 12.0, 14.0, 13.0, 25.0, 20.0, 21.0, 43.0, 35.0, 50.0, 65.0, 75.0, 92.0, 124.0, 181.0, 309.0, 390.0, 931.0, 5418.0, 575484.0, 2544596.0, 14943.0, 1291.0, 464.0, 288.0, 180.0, 124.0, 111.0, 69.0, 68.0, 49.0, 42.0, 40.0, 27.0, 25.0, 17.0, 13.0, 13.0, 12.0, 7.0, 8.0, 1.0, 5.0, 1.0, 2.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-35.3125, -34.2783203125, -33.244140625, -32.2099609375, -31.17578125, -30.1416015625, -29.107421875, -28.0732421875, -27.0390625, -26.0048828125, -24.970703125, -23.9365234375, -22.90234375, -21.8681640625, -20.833984375, -19.7998046875, -18.765625, -17.7314453125, -16.697265625, -15.6630859375, -14.62890625, -13.5947265625, -12.560546875, -11.5263671875, -10.4921875, -9.4580078125, -8.423828125, -7.3896484375, -6.35546875, -5.3212890625, -4.287109375, -3.2529296875, -2.21875, -1.1845703125, -0.150390625, 0.8837890625, 1.91796875, 2.9521484375, 3.986328125, 5.0205078125, 6.0546875, 7.0888671875, 8.123046875, 9.1572265625, 10.19140625, 11.2255859375, 12.259765625, 13.2939453125, 14.328125, 15.3623046875, 16.396484375, 17.4306640625, 18.46484375, 19.4990234375, 20.533203125, 21.5673828125, 22.6015625, 23.6357421875, 24.669921875, 25.7041015625, 26.73828125, 27.7724609375, 28.806640625, 29.8408203125, 30.875]}, "gradients/decoder.transformer.h.14.ln_1.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 5.0, 10.0, 41.0, 69.0, 105.0, 152.0, 163.0, 191.0, 134.0, 79.0, 36.0, 19.0, 8.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-47.75235366821289, -46.520484924316406, -45.28861618041992, -44.05674743652344, -42.82487869262695, -41.59300994873047, -40.361141204833984, -39.1292724609375, -37.89739990234375, -36.665531158447266, -35.43366241455078, -34.2017936706543, -32.96992492675781, -31.738056182861328, -30.50618553161621, -29.274316787719727, -28.042449951171875, -26.81058120727539, -25.578712463378906, -24.346843719482422, -23.114974975585938, -21.883106231689453, -20.651235580444336, -19.41936683654785, -18.187498092651367, -16.955629348754883, -15.723760604858398, -14.491890907287598, -13.260022163391113, -12.028153419494629, -10.796283721923828, -9.564414978027344, -8.332550048828125, -7.100681304931641, -5.868812084197998, -4.6369428634643555, -3.405074119567871, -2.1732053756713867, -0.9413361549377441, 0.29053306579589844, 1.5224018096923828, 2.7542707920074463, 3.9861397743225098, 5.218008995056152, 6.449877738952637, 7.681746482849121, 8.913616180419922, 10.145484924316406, 11.37735366821289, 12.609222412109375, 13.84109115600586, 15.07296085357666, 16.304828643798828, 17.536697387695312, 18.76856803894043, 20.000436782836914, 21.2323055267334, 22.464174270629883, 23.696043014526367, 24.92791175842285, 26.15978240966797, 27.391651153564453, 28.623519897460938, 29.855388641357422, 31.087257385253906]}, "gradients/decoder.transformer.h.14.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 3.0, 1.0, 5.0, 2.0, 2.0, 9.0, 3.0, 7.0, 8.0, 5.0, 18.0, 24.0, 31.0, 11.0, 20.0, 26.0, 37.0, 31.0, 29.0, 43.0, 27.0, 38.0, 52.0, 41.0, 43.0, 52.0, 43.0, 55.0, 44.0, 36.0, 39.0, 31.0, 24.0, 32.0, 25.0, 19.0, 17.0, 12.0, 14.0, 15.0, 9.0, 8.0, 9.0, 8.0, 1.0, 2.0, 3.0, 3.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0], "bins": [-46.419464111328125, -44.99747085571289, -43.575477600097656, -42.15348815917969, -40.73149490356445, -39.30950164794922, -37.88751220703125, -36.465518951416016, -35.04352569580078, -33.62153244018555, -32.19953918457031, -30.777549743652344, -29.35555648803711, -27.933563232421875, -26.511571884155273, -25.089580535888672, -23.667587280273438, -22.245594024658203, -20.8236026763916, -19.401611328125, -17.979618072509766, -16.55762481689453, -15.13563346862793, -13.713641166687012, -12.291648864746094, -10.869656562805176, -9.447664260864258, -8.02567195892334, -6.603679656982422, -5.181687355041504, -3.759695053100586, -2.337702751159668, -0.9157066345214844, 0.5062856674194336, 1.9282779693603516, 3.3502702713012695, 4.7722625732421875, 6.1942548751831055, 7.616247177124023, 9.038239479064941, 10.46023178100586, 11.882224082946777, 13.304216384887695, 14.726208686828613, 16.14820098876953, 17.570194244384766, 18.992185592651367, 20.41417694091797, 21.836170196533203, 23.258163452148438, 24.68015480041504, 26.10214614868164, 27.524139404296875, 28.94613265991211, 30.36812400817871, 31.790115356445312, 33.21210861206055, 34.63410186767578, 36.05609130859375, 37.478084564208984, 38.90007781982422, 40.32207107543945, 41.74406433105469, 43.166053771972656, 44.58804702758789]}, "gradients/decoder.transformer.h.13.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 5.0, 3.0, 0.0, 2.0, 7.0, 7.0, 5.0, 10.0, 12.0, 14.0, 10.0, 18.0, 19.0, 19.0, 18.0, 22.0, 46.0, 25.0, 30.0, 44.0, 37.0, 41.0, 32.0, 41.0, 48.0, 43.0, 39.0, 34.0, 48.0, 33.0, 26.0, 30.0, 35.0, 29.0, 25.0, 27.0, 27.0, 17.0, 20.0, 11.0, 13.0, 7.0, 6.0, 4.0, 6.0, 9.0, 2.0, 3.0, 1.0, 3.0, 2.0, 1.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-4.19140625, -4.04217529296875, -3.8929443359375, -3.74371337890625, -3.594482421875, -3.44525146484375, -3.2960205078125, -3.14678955078125, -2.99755859375, -2.84832763671875, -2.6990966796875, -2.54986572265625, -2.400634765625, -2.25140380859375, -2.1021728515625, -1.95294189453125, -1.8037109375, -1.65447998046875, -1.5052490234375, -1.35601806640625, -1.206787109375, -1.05755615234375, -0.9083251953125, -0.75909423828125, -0.60986328125, -0.46063232421875, -0.3114013671875, -0.16217041015625, -0.012939453125, 0.13629150390625, 0.2855224609375, 0.43475341796875, 0.583984375, 0.73321533203125, 0.8824462890625, 1.03167724609375, 1.180908203125, 1.33013916015625, 1.4793701171875, 1.62860107421875, 1.77783203125, 1.92706298828125, 2.0762939453125, 2.22552490234375, 2.374755859375, 2.52398681640625, 2.6732177734375, 2.82244873046875, 2.9716796875, 3.12091064453125, 3.2701416015625, 3.41937255859375, 3.568603515625, 3.71783447265625, 3.8670654296875, 4.01629638671875, 4.16552734375, 4.31475830078125, 4.4639892578125, 4.61322021484375, 4.762451171875, 4.91168212890625, 5.0609130859375, 5.21014404296875, 5.359375]}, "gradients/decoder.transformer.h.13.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 3.0, 1.0, 2.0, 3.0, 4.0, 7.0, 3.0, 10.0, 4.0, 10.0, 15.0, 18.0, 23.0, 27.0, 25.0, 45.0, 41.0, 69.0, 121.0, 231.0, 455.0, 1277.0, 4565.0, 28336.0, 313541.0, 2259138.0, 1432515.0, 135264.0, 14280.0, 2702.0, 786.0, 315.0, 149.0, 78.0, 45.0, 36.0, 30.0, 17.0, 21.0, 12.0, 17.0, 9.0, 8.0, 11.0, 6.0, 5.0, 3.0, 2.0, 6.0, 2.0, 2.0, 2.0, 1.0, 1.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-10.734375, -10.3343505859375, -9.934326171875, -9.5343017578125, -9.13427734375, -8.7342529296875, -8.334228515625, -7.9342041015625, -7.5341796875, -7.1341552734375, -6.734130859375, -6.3341064453125, -5.93408203125, -5.5340576171875, -5.134033203125, -4.7340087890625, -4.333984375, -3.9339599609375, -3.533935546875, -3.1339111328125, -2.73388671875, -2.3338623046875, -1.933837890625, -1.5338134765625, -1.1337890625, -0.7337646484375, -0.333740234375, 0.0662841796875, 0.46630859375, 0.8663330078125, 1.266357421875, 1.6663818359375, 2.06640625, 2.4664306640625, 2.866455078125, 3.2664794921875, 3.66650390625, 4.0665283203125, 4.466552734375, 4.8665771484375, 5.2666015625, 5.6666259765625, 6.066650390625, 6.4666748046875, 6.86669921875, 7.2667236328125, 7.666748046875, 8.0667724609375, 8.466796875, 8.8668212890625, 9.266845703125, 9.6668701171875, 10.06689453125, 10.4669189453125, 10.866943359375, 11.2669677734375, 11.6669921875, 12.0670166015625, 12.467041015625, 12.8670654296875, 13.26708984375, 13.6671142578125, 14.067138671875, 14.4671630859375, 14.8671875]}, "gradients/decoder.transformer.h.13.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 4.0, 6.0, 7.0, 6.0, 14.0, 22.0, 23.0, 38.0, 52.0, 88.0, 141.0, 185.0, 290.0, 370.0, 481.0, 516.0, 509.0, 391.0, 291.0, 197.0, 155.0, 101.0, 66.0, 47.0, 29.0, 17.0, 14.0, 5.0, 10.0, 6.0, 3.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-9.8828125, -9.479736328125, -9.07666015625, -8.673583984375, -8.2705078125, -7.867431640625, -7.46435546875, -7.061279296875, -6.658203125, -6.255126953125, -5.85205078125, -5.448974609375, -5.0458984375, -4.642822265625, -4.23974609375, -3.836669921875, -3.43359375, -3.030517578125, -2.62744140625, -2.224365234375, -1.8212890625, -1.418212890625, -1.01513671875, -0.612060546875, -0.208984375, 0.194091796875, 0.59716796875, 1.000244140625, 1.4033203125, 1.806396484375, 2.20947265625, 2.612548828125, 3.015625, 3.418701171875, 3.82177734375, 4.224853515625, 4.6279296875, 5.031005859375, 5.43408203125, 5.837158203125, 6.240234375, 6.643310546875, 7.04638671875, 7.449462890625, 7.8525390625, 8.255615234375, 8.65869140625, 9.061767578125, 9.46484375, 9.867919921875, 10.27099609375, 10.674072265625, 11.0771484375, 11.480224609375, 11.88330078125, 12.286376953125, 12.689453125, 13.092529296875, 13.49560546875, 13.898681640625, 14.3017578125, 14.704833984375, 15.10791015625, 15.510986328125, 15.9140625]}, "gradients/decoder.transformer.h.13.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 4.0, 5.0, 10.0, 11.0, 7.0, 17.0, 32.0, 48.0, 80.0, 109.0, 190.0, 291.0, 628.0, 2822.0, 169566.0, 3918728.0, 98183.0, 2293.0, 552.0, 257.0, 175.0, 105.0, 60.0, 34.0, 26.0, 21.0, 16.0, 7.0, 5.0, 7.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-52.125, -50.8037109375, -49.482421875, -48.1611328125, -46.83984375, -45.5185546875, -44.197265625, -42.8759765625, -41.5546875, -40.2333984375, -38.912109375, -37.5908203125, -36.26953125, -34.9482421875, -33.626953125, -32.3056640625, -30.984375, -29.6630859375, -28.341796875, -27.0205078125, -25.69921875, -24.3779296875, -23.056640625, -21.7353515625, -20.4140625, -19.0927734375, -17.771484375, -16.4501953125, -15.12890625, -13.8076171875, -12.486328125, -11.1650390625, -9.84375, -8.5224609375, -7.201171875, -5.8798828125, -4.55859375, -3.2373046875, -1.916015625, -0.5947265625, 0.7265625, 2.0478515625, 3.369140625, 4.6904296875, 6.01171875, 7.3330078125, 8.654296875, 9.9755859375, 11.296875, 12.6181640625, 13.939453125, 15.2607421875, 16.58203125, 17.9033203125, 19.224609375, 20.5458984375, 21.8671875, 23.1884765625, 24.509765625, 25.8310546875, 27.15234375, 28.4736328125, 29.794921875, 31.1162109375, 32.4375]}, "gradients/decoder.transformer.h.13.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 2.0, 11.0, 92.0, 391.0, 404.0, 98.0, 16.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-233.0514678955078, -226.18728637695312, -219.32308959960938, -212.4589080810547, -205.59471130371094, -198.73052978515625, -191.8663330078125, -185.0021514892578, -178.13796997070312, -171.27378845214844, -164.4095916748047, -157.54541015625, -150.68121337890625, -143.81703186035156, -136.95285034179688, -130.08865356445312, -123.22445678710938, -116.36026763916016, -109.49607849121094, -102.63189697265625, -95.7677001953125, -88.90351867675781, -82.0393295288086, -75.17514038085938, -68.31095123291016, -61.44676208496094, -54.58257293701172, -47.718387603759766, -40.85419845581055, -33.99000930786133, -27.125823974609375, -20.261634826660156, -13.3974609375, -6.533272743225098, 0.3309154510498047, 7.195102691650391, 14.05929183959961, 20.923480987548828, 27.78766632080078, 34.65185546875, 41.51604461669922, 48.38023376464844, 55.244422912597656, 62.10860824584961, 68.97279357910156, 75.83699035644531, 82.701171875, 89.56536102294922, 96.42955017089844, 103.29373931884766, 110.15792846679688, 117.02210998535156, 123.88630676269531, 130.75048828125, 137.61468505859375, 144.47886657714844, 151.34304809570312, 158.2072296142578, 165.07142639160156, 171.93560791015625, 178.7998046875, 185.6639862060547, 192.52816772460938, 199.39236450195312, 206.25656127929688]}, "gradients/decoder.transformer.h.13.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 3.0, 2.0, 1.0, 1.0, 3.0, 2.0, 8.0, 5.0, 10.0, 12.0, 14.0, 18.0, 17.0, 31.0, 29.0, 23.0, 32.0, 40.0, 50.0, 40.0, 45.0, 50.0, 40.0, 44.0, 52.0, 53.0, 40.0, 42.0, 46.0, 37.0, 38.0, 41.0, 33.0, 18.0, 19.0, 18.0, 18.0, 16.0, 7.0, 7.0, 6.0, 2.0, 2.0, 1.0, 4.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-39.510833740234375, -38.17347717285156, -36.836124420166016, -35.4987678527832, -34.161415100097656, -32.824058532714844, -31.486703872680664, -30.149349212646484, -28.811994552612305, -27.474639892578125, -26.137285232543945, -24.799930572509766, -23.462574005126953, -22.125221252441406, -20.787864685058594, -19.450510025024414, -18.113155364990234, -16.775800704956055, -15.438446044921875, -14.101090431213379, -12.7637357711792, -11.42638111114502, -10.089025497436523, -8.751670837402344, -7.414316177368164, -6.076961517333984, -4.7396063804626465, -3.4022512435913086, -2.064896583557129, -0.7275419235229492, 0.6098136901855469, 1.9471683502197266, 3.2845230102539062, 4.621877670288086, 5.959232807159424, 7.296587944030762, 8.633942604064941, 9.971297264099121, 11.308652877807617, 12.646007537841797, 13.983362197875977, 15.320716857910156, 16.658071517944336, 17.995426177978516, 19.332782745361328, 20.670135498046875, 22.007492065429688, 23.344846725463867, 24.682201385498047, 26.019556045532227, 27.356910705566406, 28.69426727294922, 30.031620025634766, 31.368976593017578, 32.706329345703125, 34.04368591308594, 35.38104248046875, 36.71839904785156, 38.05575180053711, 39.39310836791992, 40.73046112060547, 42.06781768798828, 43.405174255371094, 44.74252700805664, 46.07987976074219]}, "gradients/decoder.transformer.h.13.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 2.0, 7.0, 4.0, 2.0, 6.0, 7.0, 11.0, 12.0, 6.0, 8.0, 16.0, 18.0, 35.0, 27.0, 27.0, 26.0, 35.0, 36.0, 36.0, 34.0, 41.0, 45.0, 45.0, 36.0, 33.0, 45.0, 37.0, 33.0, 29.0, 37.0, 40.0, 23.0, 37.0, 30.0, 26.0, 24.0, 11.0, 17.0, 12.0, 10.0, 7.0, 10.0, 6.0, 7.0, 2.0, 4.0, 5.0, 5.0, 1.0, 3.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.00390625, -3.85540771484375, -3.7069091796875, -3.55841064453125, -3.409912109375, -3.26141357421875, -3.1129150390625, -2.96441650390625, -2.81591796875, -2.66741943359375, -2.5189208984375, -2.37042236328125, -2.221923828125, -2.07342529296875, -1.9249267578125, -1.77642822265625, -1.6279296875, -1.47943115234375, -1.3309326171875, -1.18243408203125, -1.033935546875, -0.88543701171875, -0.7369384765625, -0.58843994140625, -0.43994140625, -0.29144287109375, -0.1429443359375, 0.00555419921875, 0.154052734375, 0.30255126953125, 0.4510498046875, 0.59954833984375, 0.748046875, 0.89654541015625, 1.0450439453125, 1.19354248046875, 1.342041015625, 1.49053955078125, 1.6390380859375, 1.78753662109375, 1.93603515625, 2.08453369140625, 2.2330322265625, 2.38153076171875, 2.530029296875, 2.67852783203125, 2.8270263671875, 2.97552490234375, 3.1240234375, 3.27252197265625, 3.4210205078125, 3.56951904296875, 3.718017578125, 3.86651611328125, 4.0150146484375, 4.16351318359375, 4.31201171875, 4.46051025390625, 4.6090087890625, 4.75750732421875, 4.906005859375, 5.05450439453125, 5.2030029296875, 5.35150146484375, 5.5]}, "gradients/decoder.transformer.h.13.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 2.0, 5.0, 2.0, 10.0, 14.0, 20.0, 27.0, 39.0, 66.0, 103.0, 146.0, 216.0, 281.0, 468.0, 554.0, 919.0, 1335.0, 2079.0, 2991.0, 4523.0, 6943.0, 10933.0, 16872.0, 27550.0, 44067.0, 70541.0, 107742.0, 149284.0, 168670.0, 145547.0, 104390.0, 67629.0, 41991.0, 25838.0, 16343.0, 10375.0, 6650.0, 4443.0, 2901.0, 1955.0, 1294.0, 923.0, 560.0, 447.0, 289.0, 183.0, 101.0, 100.0, 77.0, 48.0, 35.0, 20.0, 12.0, 6.0, 5.0, 5.0, 0.0, 4.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.6689453125, -0.6462326049804688, -0.6235198974609375, -0.6008071899414062, -0.578094482421875, -0.5553817749023438, -0.5326690673828125, -0.5099563598632812, -0.48724365234375, -0.46453094482421875, -0.4418182373046875, -0.41910552978515625, -0.396392822265625, -0.37368011474609375, -0.3509674072265625, -0.32825469970703125, -0.3055419921875, -0.28282928466796875, -0.2601165771484375, -0.23740386962890625, -0.214691162109375, -0.19197845458984375, -0.1692657470703125, -0.14655303955078125, -0.12384033203125, -0.10112762451171875, -0.0784149169921875, -0.05570220947265625, -0.032989501953125, -0.01027679443359375, 0.0124359130859375, 0.03514862060546875, 0.057861328125, 0.08057403564453125, 0.1032867431640625, 0.12599945068359375, 0.148712158203125, 0.17142486572265625, 0.1941375732421875, 0.21685028076171875, 0.23956298828125, 0.26227569580078125, 0.2849884033203125, 0.30770111083984375, 0.330413818359375, 0.35312652587890625, 0.3758392333984375, 0.39855194091796875, 0.4212646484375, 0.44397735595703125, 0.4666900634765625, 0.48940277099609375, 0.512115478515625, 0.5348281860351562, 0.5575408935546875, 0.5802536010742188, 0.60296630859375, 0.6256790161132812, 0.6483917236328125, 0.6711044311523438, 0.693817138671875, 0.7165298461914062, 0.7392425537109375, 0.7619552612304688, 0.78466796875]}, "gradients/decoder.transformer.h.13.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 3.0, 6.0, 0.0, 1.0, 3.0, 5.0, 6.0, 7.0, 5.0, 3.0, 10.0, 11.0, 15.0, 8.0, 19.0, 16.0, 19.0, 21.0, 25.0, 34.0, 37.0, 44.0, 37.0, 43.0, 47.0, 39.0, 48.0, 1056.0, 43.0, 32.0, 46.0, 35.0, 39.0, 36.0, 30.0, 32.0, 24.0, 21.0, 24.0, 21.0, 13.0, 10.0, 14.0, 11.0, 11.0, 7.0, 6.0, 6.0, 4.0, 3.0, 3.0, 2.0, 1.0, 0.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.255859375, -3.147674560546875, -3.03948974609375, -2.931304931640625, -2.8231201171875, -2.714935302734375, -2.60675048828125, -2.498565673828125, -2.390380859375, -2.282196044921875, -2.17401123046875, -2.065826416015625, -1.9576416015625, -1.849456787109375, -1.74127197265625, -1.633087158203125, -1.52490234375, -1.416717529296875, -1.30853271484375, -1.200347900390625, -1.0921630859375, -0.983978271484375, -0.87579345703125, -0.767608642578125, -0.659423828125, -0.551239013671875, -0.44305419921875, -0.334869384765625, -0.2266845703125, -0.118499755859375, -0.01031494140625, 0.097869873046875, 0.2060546875, 0.314239501953125, 0.42242431640625, 0.530609130859375, 0.6387939453125, 0.746978759765625, 0.85516357421875, 0.963348388671875, 1.071533203125, 1.179718017578125, 1.28790283203125, 1.396087646484375, 1.5042724609375, 1.612457275390625, 1.72064208984375, 1.828826904296875, 1.93701171875, 2.045196533203125, 2.15338134765625, 2.261566162109375, 2.3697509765625, 2.477935791015625, 2.58612060546875, 2.694305419921875, 2.802490234375, 2.910675048828125, 3.01885986328125, 3.127044677734375, 3.2352294921875, 3.343414306640625, 3.45159912109375, 3.559783935546875, 3.66796875]}, "gradients/decoder.transformer.h.13.crossattention.c_attn.weight": {"_type": "histogram", "values": [3.0, 2.0, 2.0, 6.0, 3.0, 6.0, 13.0, 19.0, 27.0, 35.0, 31.0, 65.0, 104.0, 159.0, 219.0, 320.0, 555.0, 884.0, 1345.0, 2242.0, 3554.0, 5829.0, 9410.0, 15614.0, 26879.0, 45604.0, 76750.0, 121382.0, 206638.0, 1188650.0, 148250.0, 96384.0, 59097.0, 35065.0, 20316.0, 12214.0, 7236.0, 4521.0, 2916.0, 1675.0, 1152.0, 690.0, 460.0, 284.0, 191.0, 127.0, 72.0, 56.0, 34.0, 18.0, 13.0, 8.0, 6.0, 4.0, 4.0, 3.0, 2.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-0.424072265625, -0.4095726013183594, -0.39507293701171875, -0.3805732727050781, -0.3660736083984375, -0.3515739440917969, -0.33707427978515625, -0.3225746154785156, -0.308074951171875, -0.2935752868652344, -0.27907562255859375, -0.2645759582519531, -0.2500762939453125, -0.23557662963867188, -0.22107696533203125, -0.20657730102539062, -0.19207763671875, -0.17757797241210938, -0.16307830810546875, -0.14857864379882812, -0.1340789794921875, -0.11957931518554688, -0.10507965087890625, -0.09057998657226562, -0.076080322265625, -0.061580657958984375, -0.04708099365234375, -0.032581329345703125, -0.0180816650390625, -0.003582000732421875, 0.01091766357421875, 0.025417327880859375, 0.0399169921875, 0.054416656494140625, 0.06891632080078125, 0.08341598510742188, 0.0979156494140625, 0.11241531372070312, 0.12691497802734375, 0.14141464233398438, 0.155914306640625, 0.17041397094726562, 0.18491363525390625, 0.19941329956054688, 0.2139129638671875, 0.22841262817382812, 0.24291229248046875, 0.2574119567871094, 0.27191162109375, 0.2864112854003906, 0.30091094970703125, 0.3154106140136719, 0.3299102783203125, 0.3444099426269531, 0.35890960693359375, 0.3734092712402344, 0.387908935546875, 0.4024085998535156, 0.41690826416015625, 0.4314079284667969, 0.4459075927734375, 0.4604072570800781, 0.47490692138671875, 0.4894065856933594, 0.50390625]}, "gradients/decoder.transformer.h.13.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 2.0, 2.0, 1.0, 3.0, 5.0, 6.0, 8.0, 14.0, 10.0, 18.0, 22.0, 12.0, 20.0, 32.0, 30.0, 53.0, 40.0, 54.0, 76.0, 75.0, 81.0, 75.0, 69.0, 51.0, 35.0, 52.0, 34.0, 20.0, 18.0, 22.0, 14.0, 12.0, 10.0, 12.0, 10.0, 6.0, 5.0, 1.0, 3.0, 0.0, 2.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.038665771484375, -0.03754568099975586, -0.03642559051513672, -0.03530550003051758, -0.03418540954589844, -0.0330653190612793, -0.031945228576660156, -0.030825138092041016, -0.029705047607421875, -0.028584957122802734, -0.027464866638183594, -0.026344776153564453, -0.025224685668945312, -0.024104595184326172, -0.02298450469970703, -0.02186441421508789, -0.02074432373046875, -0.01962423324584961, -0.01850414276123047, -0.017384052276611328, -0.016263961791992188, -0.015143871307373047, -0.014023780822753906, -0.012903690338134766, -0.011783599853515625, -0.010663509368896484, -0.009543418884277344, -0.008423328399658203, -0.0073032379150390625, -0.006183147430419922, -0.005063056945800781, -0.003942966461181641, -0.0028228759765625, -0.0017027854919433594, -0.0005826950073242188, 0.0005373954772949219, 0.0016574859619140625, 0.002777576446533203, 0.0038976669311523438, 0.005017757415771484, 0.006137847900390625, 0.007257938385009766, 0.008378028869628906, 0.009498119354248047, 0.010618209838867188, 0.011738300323486328, 0.012858390808105469, 0.01397848129272461, 0.01509857177734375, 0.01621866226196289, 0.01733875274658203, 0.018458843231201172, 0.019578933715820312, 0.020699024200439453, 0.021819114685058594, 0.022939205169677734, 0.024059295654296875, 0.025179386138916016, 0.026299476623535156, 0.027419567108154297, 0.028539657592773438, 0.029659748077392578, 0.03077983856201172, 0.03189992904663086, 0.03302001953125]}, "gradients/decoder.transformer.h.13.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 1.0, 1.0, 2.0, 3.0, 3.0, 8.0, 10.0, 12.0, 12.0, 15.0, 17.0, 24.0, 26.0, 47.0, 62.0, 100.0, 133.0, 274.0, 602.0, 3095.0, 1037943.0, 4722.0, 645.0, 282.0, 143.0, 108.0, 67.0, 56.0, 33.0, 17.0, 25.0, 22.0, 12.0, 15.0, 7.0, 5.0, 8.0, 1.0, 4.0, 1.0, 3.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.642578125, -0.6208267211914062, -0.5990753173828125, -0.5773239135742188, -0.555572509765625, -0.5338211059570312, -0.5120697021484375, -0.49031829833984375, -0.46856689453125, -0.44681549072265625, -0.4250640869140625, -0.40331268310546875, -0.381561279296875, -0.35980987548828125, -0.3380584716796875, -0.31630706787109375, -0.2945556640625, -0.27280426025390625, -0.2510528564453125, -0.22930145263671875, -0.207550048828125, -0.18579864501953125, -0.1640472412109375, -0.14229583740234375, -0.12054443359375, -0.09879302978515625, -0.0770416259765625, -0.05529022216796875, -0.033538818359375, -0.01178741455078125, 0.0099639892578125, 0.03171539306640625, 0.053466796875, 0.07521820068359375, 0.0969696044921875, 0.11872100830078125, 0.140472412109375, 0.16222381591796875, 0.1839752197265625, 0.20572662353515625, 0.22747802734375, 0.24922943115234375, 0.2709808349609375, 0.29273223876953125, 0.314483642578125, 0.33623504638671875, 0.3579864501953125, 0.37973785400390625, 0.4014892578125, 0.42324066162109375, 0.4449920654296875, 0.46674346923828125, 0.488494873046875, 0.5102462768554688, 0.5319976806640625, 0.5537490844726562, 0.57550048828125, 0.5972518920898438, 0.6190032958984375, 0.6407546997070312, 0.662506103515625, 0.6842575073242188, 0.7060089111328125, 0.7277603149414062, 0.74951171875]}, "gradients/decoder.transformer.h.13.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 5.0, 231.0, 783.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.13723061978816986, -0.11794070154428482, -0.09865078330039978, -0.07936087250709534, -0.0600709542632103, -0.040781036019325256, -0.021491125226020813, -0.0022011995315551758, 0.017088711261749268, 0.03637862950563431, 0.05566854402422905, 0.07495845854282379, 0.09424837678670883, 0.11353829503059387, 0.13282820582389832, 0.15211813151836395, 0.1714080423116684, 0.19069795310497284, 0.20998787879943848, 0.22927778959274292, 0.24856770038604736, 0.2678576111793518, 0.28714752197265625, 0.3064374625682831, 0.3257273733615875, 0.34501728415489197, 0.3643071949481964, 0.38359713554382324, 0.4028870463371277, 0.42217695713043213, 0.4414668679237366, 0.460756778717041, 0.48004668951034546, 0.4993366003036499, 0.5186265110969543, 0.5379164218902588, 0.5572063326835632, 0.5764962434768677, 0.5957862138748169, 0.6150761246681213, 0.6343660354614258, 0.6536559462547302, 0.6729458570480347, 0.6922357678413391, 0.7115256786346436, 0.7308156490325928, 0.7501055002212524, 0.7693954706192017, 0.7886853218078613, 0.8079752326011658, 0.8272651433944702, 0.8465550541877747, 0.8658449649810791, 0.8851349353790283, 0.904424786567688, 0.9237147569656372, 0.9430046677589417, 0.9622945785522461, 0.9815844893455505, 1.000874400138855, 1.0201643705368042, 1.0394542217254639, 1.058744192123413, 1.0780340433120728, 1.097324013710022]}, "gradients/decoder.transformer.h.13.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 3.0, 1.0, 0.0, 4.0, 2.0, 9.0, 10.0, 8.0, 6.0, 11.0, 7.0, 13.0, 12.0, 27.0, 14.0, 32.0, 24.0, 32.0, 27.0, 32.0, 32.0, 31.0, 39.0, 43.0, 48.0, 41.0, 41.0, 36.0, 36.0, 52.0, 42.0, 20.0, 28.0, 34.0, 28.0, 26.0, 30.0, 18.0, 21.0, 22.0, 16.0, 12.0, 4.0, 12.0, 6.0, 5.0, 7.0, 5.0, 3.0, 4.0, 1.0, 1.0, 3.0, 1.0], "bins": [-0.05590951442718506, -0.054350536316633224, -0.05279155820608139, -0.051232583820819855, -0.04967360571026802, -0.048114627599716187, -0.04655565321445465, -0.04499667510390282, -0.04343769699335098, -0.04187871888279915, -0.040319740772247314, -0.03876076638698578, -0.037201788276433945, -0.03564281016588211, -0.034083835780620575, -0.03252485767006874, -0.030965879559516907, -0.029406901448965073, -0.027847925201058388, -0.026288948953151703, -0.02472997084259987, -0.023170992732048035, -0.02161201648414135, -0.020053040236234665, -0.01849406212568283, -0.016935084015130997, -0.015376107767224312, -0.013817130587995052, -0.012258153408765793, -0.010699176229536533, -0.009140199050307274, -0.007581221871078014, -0.006022244691848755, -0.004463267512619495, -0.002904290333390236, -0.0013453131541609764, 0.00021366402506828308, 0.0017726412042975426, 0.003331618383526802, 0.0048905955627560616, 0.006449572741985321, 0.00800854992121458, 0.00956752710044384, 0.0111265042796731, 0.012685481458902359, 0.014244458638131618, 0.015803435817360878, 0.017362412065267563, 0.018921390175819397, 0.02048036828637123, 0.022039344534277916, 0.0235983207821846, 0.025157298892736435, 0.02671627700328827, 0.028275253251194954, 0.02983422949910164, 0.03139320760965347, 0.03295218572020531, 0.03451116383075714, 0.03607013821601868, 0.03762911632657051, 0.039188094437122345, 0.04074706882238388, 0.042306046932935715, 0.04386502504348755]}, "gradients/decoder.transformer.h.13.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 2.0, 7.0, 4.0, 2.0, 6.0, 7.0, 11.0, 12.0, 7.0, 7.0, 16.0, 18.0, 35.0, 27.0, 27.0, 26.0, 35.0, 36.0, 36.0, 33.0, 42.0, 46.0, 43.0, 37.0, 34.0, 44.0, 37.0, 33.0, 29.0, 37.0, 40.0, 23.0, 37.0, 30.0, 26.0, 24.0, 11.0, 16.0, 13.0, 10.0, 7.0, 10.0, 6.0, 7.0, 2.0, 4.0, 5.0, 5.0, 1.0, 3.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.00390625, -3.85540771484375, -3.7069091796875, -3.55841064453125, -3.409912109375, -3.26141357421875, -3.1129150390625, -2.96441650390625, -2.81591796875, -2.66741943359375, -2.5189208984375, -2.37042236328125, -2.221923828125, -2.07342529296875, -1.9249267578125, -1.77642822265625, -1.6279296875, -1.47943115234375, -1.3309326171875, -1.18243408203125, -1.033935546875, -0.88543701171875, -0.7369384765625, -0.58843994140625, -0.43994140625, -0.29144287109375, -0.1429443359375, 0.00555419921875, 0.154052734375, 0.30255126953125, 0.4510498046875, 0.59954833984375, 0.748046875, 0.89654541015625, 1.0450439453125, 1.19354248046875, 1.342041015625, 1.49053955078125, 1.6390380859375, 1.78753662109375, 1.93603515625, 2.08453369140625, 2.2330322265625, 2.38153076171875, 2.530029296875, 2.67852783203125, 2.8270263671875, 2.97552490234375, 3.1240234375, 3.27252197265625, 3.4210205078125, 3.56951904296875, 3.718017578125, 3.86651611328125, 4.0150146484375, 4.16351318359375, 4.31201171875, 4.46051025390625, 4.6090087890625, 4.75750732421875, 4.906005859375, 5.05450439453125, 5.2030029296875, 5.35150146484375, 5.5]}, "gradients/decoder.transformer.h.13.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 2.0, 6.0, 2.0, 4.0, 18.0, 35.0, 52.0, 116.0, 195.0, 362.0, 701.0, 1533.0, 3210.0, 7008.0, 17992.0, 54705.0, 184808.0, 437453.0, 233318.0, 69326.0, 21991.0, 8517.0, 3747.0, 1727.0, 838.0, 419.0, 229.0, 88.0, 79.0, 27.0, 20.0, 14.0, 6.0, 6.0, 3.0, 4.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-7.86328125, -7.64239501953125, -7.4215087890625, -7.20062255859375, -6.979736328125, -6.75885009765625, -6.5379638671875, -6.31707763671875, -6.09619140625, -5.87530517578125, -5.6544189453125, -5.43353271484375, -5.212646484375, -4.99176025390625, -4.7708740234375, -4.54998779296875, -4.3291015625, -4.10821533203125, -3.8873291015625, -3.66644287109375, -3.445556640625, -3.22467041015625, -3.0037841796875, -2.78289794921875, -2.56201171875, -2.34112548828125, -2.1202392578125, -1.89935302734375, -1.678466796875, -1.45758056640625, -1.2366943359375, -1.01580810546875, -0.794921875, -0.57403564453125, -0.3531494140625, -0.13226318359375, 0.088623046875, 0.30950927734375, 0.5303955078125, 0.75128173828125, 0.97216796875, 1.19305419921875, 1.4139404296875, 1.63482666015625, 1.855712890625, 2.07659912109375, 2.2974853515625, 2.51837158203125, 2.7392578125, 2.96014404296875, 3.1810302734375, 3.40191650390625, 3.622802734375, 3.84368896484375, 4.0645751953125, 4.28546142578125, 4.50634765625, 4.72723388671875, 4.9481201171875, 5.16900634765625, 5.389892578125, 5.61077880859375, 5.8316650390625, 6.05255126953125, 6.2734375]}, "gradients/decoder.transformer.h.13.attn.c_attn.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 2.0, 3.0, 1.0, 6.0, 1.0, 4.0, 5.0, 6.0, 5.0, 4.0, 9.0, 12.0, 11.0, 17.0, 23.0, 22.0, 24.0, 25.0, 35.0, 44.0, 37.0, 42.0, 52.0, 76.0, 169.0, 358.0, 1407.0, 188.0, 79.0, 64.0, 55.0, 36.0, 37.0, 25.0, 27.0, 22.0, 33.0, 17.0, 15.0, 12.0, 9.0, 10.0, 12.0, 4.0, 7.0, 6.0, 2.0, 1.0, 2.0, 1.0, 2.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-14.96875, -14.453857421875, -13.93896484375, -13.424072265625, -12.9091796875, -12.394287109375, -11.87939453125, -11.364501953125, -10.849609375, -10.334716796875, -9.81982421875, -9.304931640625, -8.7900390625, -8.275146484375, -7.76025390625, -7.245361328125, -6.73046875, -6.215576171875, -5.70068359375, -5.185791015625, -4.6708984375, -4.156005859375, -3.64111328125, -3.126220703125, -2.611328125, -2.096435546875, -1.58154296875, -1.066650390625, -0.5517578125, -0.036865234375, 0.47802734375, 0.992919921875, 1.5078125, 2.022705078125, 2.53759765625, 3.052490234375, 3.5673828125, 4.082275390625, 4.59716796875, 5.112060546875, 5.626953125, 6.141845703125, 6.65673828125, 7.171630859375, 7.6865234375, 8.201416015625, 8.71630859375, 9.231201171875, 9.74609375, 10.260986328125, 10.77587890625, 11.290771484375, 11.8056640625, 12.320556640625, 12.83544921875, 13.350341796875, 13.865234375, 14.380126953125, 14.89501953125, 15.409912109375, 15.9248046875, 16.439697265625, 16.95458984375, 17.469482421875, 17.984375]}, "gradients/decoder.transformer.h.13.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 12.0, 10.0, 9.0, 26.0, 31.0, 45.0, 76.0, 115.0, 149.0, 265.0, 432.0, 1228.0, 69266.0, 3066597.0, 5765.0, 698.0, 317.0, 243.0, 132.0, 101.0, 47.0, 34.0, 26.0, 21.0, 20.0, 5.0, 7.0, 6.0, 5.0, 7.0, 3.0, 0.0, 2.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-76.3125, -74.2646484375, -72.216796875, -70.1689453125, -68.12109375, -66.0732421875, -64.025390625, -61.9775390625, -59.9296875, -57.8818359375, -55.833984375, -53.7861328125, -51.73828125, -49.6904296875, -47.642578125, -45.5947265625, -43.546875, -41.4990234375, -39.451171875, -37.4033203125, -35.35546875, -33.3076171875, -31.259765625, -29.2119140625, -27.1640625, -25.1162109375, -23.068359375, -21.0205078125, -18.97265625, -16.9248046875, -14.876953125, -12.8291015625, -10.78125, -8.7333984375, -6.685546875, -4.6376953125, -2.58984375, -0.5419921875, 1.505859375, 3.5537109375, 5.6015625, 7.6494140625, 9.697265625, 11.7451171875, 13.79296875, 15.8408203125, 17.888671875, 19.9365234375, 21.984375, 24.0322265625, 26.080078125, 28.1279296875, 30.17578125, 32.2236328125, 34.271484375, 36.3193359375, 38.3671875, 40.4150390625, 42.462890625, 44.5107421875, 46.55859375, 48.6064453125, 50.654296875, 52.7021484375, 54.75]}, "gradients/decoder.transformer.h.13.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 78.0, 355.0, 434.0, 138.0, 10.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-195.0911407470703, -191.5563201904297, -188.02151489257812, -184.4866943359375, -180.95188903808594, -177.4170684814453, -173.88226318359375, -170.34744262695312, -166.81263732910156, -163.27781677246094, -159.74301147460938, -156.20819091796875, -152.6733856201172, -149.13856506347656, -145.603759765625, -142.06893920898438, -138.53411865234375, -134.99929809570312, -131.46449279785156, -127.92967987060547, -124.39486694335938, -120.86005401611328, -117.32524108886719, -113.79042053222656, -110.255615234375, -106.7208023071289, -103.18598937988281, -99.65117645263672, -96.11636352539062, -92.58155059814453, -89.04673767089844, -85.51191711425781, -81.97711181640625, -78.44229888916016, -74.90748596191406, -71.37267303466797, -67.83786010742188, -64.30304718017578, -60.76823043823242, -57.23341751098633, -53.6986083984375, -50.163795471191406, -46.62898254394531, -43.09416961669922, -39.559356689453125, -36.02454376220703, -32.48972702026367, -28.954914093017578, -25.42009925842285, -21.885286331176758, -18.35047149658203, -14.815658569335938, -11.280845642089844, -7.74603271484375, -4.211217880249023, -0.6764049530029297, 2.858407974243164, 6.393221378326416, 9.928034782409668, 13.462848663330078, 16.997661590576172, 20.532474517822266, 24.067289352416992, 27.602102279663086, 31.13691520690918]}, "gradients/decoder.transformer.h.13.ln_1.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 0.0, 1.0, 3.0, 2.0, 6.0, 5.0, 8.0, 7.0, 8.0, 8.0, 13.0, 12.0, 9.0, 14.0, 12.0, 25.0, 24.0, 29.0, 33.0, 36.0, 34.0, 33.0, 36.0, 33.0, 24.0, 41.0, 42.0, 37.0, 34.0, 53.0, 40.0, 44.0, 30.0, 30.0, 35.0, 38.0, 18.0, 24.0, 27.0, 20.0, 16.0, 10.0, 13.0, 11.0, 6.0, 10.0, 7.0, 5.0, 3.0, 5.0, 2.0, 1.0, 1.0], "bins": [-49.89497756958008, -48.5804328918457, -47.26588821411133, -45.95134353637695, -44.636802673339844, -43.32225799560547, -42.007713317871094, -40.69316864013672, -39.378623962402344, -38.06407928466797, -36.749534606933594, -35.43498992919922, -34.120445251464844, -32.805904388427734, -31.49135971069336, -30.176815032958984, -28.86227035522461, -27.547725677490234, -26.23318099975586, -24.918638229370117, -23.604093551635742, -22.289548873901367, -20.975006103515625, -19.66046142578125, -18.345916748046875, -17.0313720703125, -15.716828346252441, -14.402284622192383, -13.087739944458008, -11.773195266723633, -10.458651542663574, -9.144107818603516, -7.829566955566406, -6.5150227546691895, -5.200478553771973, -3.885934352874756, -2.571390151977539, -1.2568459510803223, 0.05769824981689453, 1.3722419738769531, 2.686786651611328, 4.001330852508545, 5.315875053405762, 6.6304192543029785, 7.944963455200195, 9.25950813293457, 10.574051856994629, 11.888595581054688, 13.203140258789062, 14.517684936523438, 15.832228660583496, 17.146772384643555, 18.46131706237793, 19.775861740112305, 21.090404510498047, 22.404949188232422, 23.719493865966797, 25.034038543701172, 26.348583221435547, 27.66312599182129, 28.977670669555664, 30.29221534729004, 31.60675811767578, 32.921302795410156, 34.23584747314453]}, "gradients/decoder.transformer.h.12.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 1.0, 2.0, 6.0, 2.0, 0.0, 7.0, 5.0, 11.0, 8.0, 11.0, 13.0, 15.0, 18.0, 18.0, 21.0, 21.0, 27.0, 35.0, 39.0, 29.0, 33.0, 43.0, 46.0, 41.0, 39.0, 28.0, 31.0, 53.0, 34.0, 31.0, 45.0, 44.0, 27.0, 26.0, 39.0, 31.0, 24.0, 18.0, 22.0, 13.0, 8.0, 8.0, 13.0, 3.0, 5.0, 3.0, 4.0, 5.0, 3.0, 5.0, 0.0, 3.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-4.66015625, -4.4976806640625, -4.335205078125, -4.1727294921875, -4.01025390625, -3.8477783203125, -3.685302734375, -3.5228271484375, -3.3603515625, -3.1978759765625, -3.035400390625, -2.8729248046875, -2.71044921875, -2.5479736328125, -2.385498046875, -2.2230224609375, -2.060546875, -1.8980712890625, -1.735595703125, -1.5731201171875, -1.41064453125, -1.2481689453125, -1.085693359375, -0.9232177734375, -0.7607421875, -0.5982666015625, -0.435791015625, -0.2733154296875, -0.11083984375, 0.0516357421875, 0.214111328125, 0.3765869140625, 0.5390625, 0.7015380859375, 0.864013671875, 1.0264892578125, 1.18896484375, 1.3514404296875, 1.513916015625, 1.6763916015625, 1.8388671875, 2.0013427734375, 2.163818359375, 2.3262939453125, 2.48876953125, 2.6512451171875, 2.813720703125, 2.9761962890625, 3.138671875, 3.3011474609375, 3.463623046875, 3.6260986328125, 3.78857421875, 3.9510498046875, 4.113525390625, 4.2760009765625, 4.4384765625, 4.6009521484375, 4.763427734375, 4.9259033203125, 5.08837890625, 5.2508544921875, 5.413330078125, 5.5758056640625, 5.73828125]}, "gradients/decoder.transformer.h.12.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 5.0, 4.0, 4.0, 4.0, 6.0, 10.0, 10.0, 14.0, 18.0, 23.0, 36.0, 41.0, 70.0, 82.0, 117.0, 192.0, 280.0, 463.0, 833.0, 1778.0, 4269.0, 13274.0, 54530.0, 266397.0, 1124656.0, 1806942.0, 723068.0, 151236.0, 31732.0, 8457.0, 2984.0, 1233.0, 623.0, 286.0, 205.0, 119.0, 78.0, 75.0, 36.0, 30.0, 21.0, 8.0, 9.0, 7.0, 9.0, 1.0, 5.0, 6.0, 2.0, 0.0, 2.0, 4.0, 1.0, 2.0, 0.0, 2.0, 1.0, 1.0], "bins": [-8.140625, -7.8807373046875, -7.620849609375, -7.3609619140625, -7.10107421875, -6.8411865234375, -6.581298828125, -6.3214111328125, -6.0615234375, -5.8016357421875, -5.541748046875, -5.2818603515625, -5.02197265625, -4.7620849609375, -4.502197265625, -4.2423095703125, -3.982421875, -3.7225341796875, -3.462646484375, -3.2027587890625, -2.94287109375, -2.6829833984375, -2.423095703125, -2.1632080078125, -1.9033203125, -1.6434326171875, -1.383544921875, -1.1236572265625, -0.86376953125, -0.6038818359375, -0.343994140625, -0.0841064453125, 0.17578125, 0.4356689453125, 0.695556640625, 0.9554443359375, 1.21533203125, 1.4752197265625, 1.735107421875, 1.9949951171875, 2.2548828125, 2.5147705078125, 2.774658203125, 3.0345458984375, 3.29443359375, 3.5543212890625, 3.814208984375, 4.0740966796875, 4.333984375, 4.5938720703125, 4.853759765625, 5.1136474609375, 5.37353515625, 5.6334228515625, 5.893310546875, 6.1531982421875, 6.4130859375, 6.6729736328125, 6.932861328125, 7.1927490234375, 7.45263671875, 7.7125244140625, 7.972412109375, 8.2322998046875, 8.4921875]}, "gradients/decoder.transformer.h.12.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 1.0, 4.0, 6.0, 2.0, 8.0, 7.0, 7.0, 8.0, 22.0, 25.0, 32.0, 54.0, 60.0, 95.0, 125.0, 194.0, 228.0, 307.0, 420.0, 475.0, 475.0, 386.0, 327.0, 217.0, 168.0, 124.0, 98.0, 54.0, 49.0, 39.0, 23.0, 13.0, 8.0, 7.0, 2.0, 3.0, 2.0, 1.0, 1.0, 4.0, 0.0, 2.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-11.125, -10.7760009765625, -10.427001953125, -10.0780029296875, -9.72900390625, -9.3800048828125, -9.031005859375, -8.6820068359375, -8.3330078125, -7.9840087890625, -7.635009765625, -7.2860107421875, -6.93701171875, -6.5880126953125, -6.239013671875, -5.8900146484375, -5.541015625, -5.1920166015625, -4.843017578125, -4.4940185546875, -4.14501953125, -3.7960205078125, -3.447021484375, -3.0980224609375, -2.7490234375, -2.4000244140625, -2.051025390625, -1.7020263671875, -1.35302734375, -1.0040283203125, -0.655029296875, -0.3060302734375, 0.04296875, 0.3919677734375, 0.740966796875, 1.0899658203125, 1.43896484375, 1.7879638671875, 2.136962890625, 2.4859619140625, 2.8349609375, 3.1839599609375, 3.532958984375, 3.8819580078125, 4.23095703125, 4.5799560546875, 4.928955078125, 5.2779541015625, 5.626953125, 5.9759521484375, 6.324951171875, 6.6739501953125, 7.02294921875, 7.3719482421875, 7.720947265625, 8.0699462890625, 8.4189453125, 8.7679443359375, 9.116943359375, 9.4659423828125, 9.81494140625, 10.1639404296875, 10.512939453125, 10.8619384765625, 11.2109375]}, "gradients/decoder.transformer.h.12.mlp.c_fc.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 0.0, 2.0, 1.0, 1.0, 3.0, 4.0, 1.0, 5.0, 4.0, 8.0, 13.0, 20.0, 33.0, 54.0, 71.0, 94.0, 158.0, 205.0, 354.0, 705.0, 4814.0, 279867.0, 3783061.0, 120188.0, 2982.0, 659.0, 344.0, 202.0, 153.0, 75.0, 64.0, 50.0, 27.0, 21.0, 10.0, 10.0, 5.0, 12.0, 3.0, 7.0, 2.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-37.90625, -36.73681640625, -35.5673828125, -34.39794921875, -33.228515625, -32.05908203125, -30.8896484375, -29.72021484375, -28.55078125, -27.38134765625, -26.2119140625, -25.04248046875, -23.873046875, -22.70361328125, -21.5341796875, -20.36474609375, -19.1953125, -18.02587890625, -16.8564453125, -15.68701171875, -14.517578125, -13.34814453125, -12.1787109375, -11.00927734375, -9.83984375, -8.67041015625, -7.5009765625, -6.33154296875, -5.162109375, -3.99267578125, -2.8232421875, -1.65380859375, -0.484375, 0.68505859375, 1.8544921875, 3.02392578125, 4.193359375, 5.36279296875, 6.5322265625, 7.70166015625, 8.87109375, 10.04052734375, 11.2099609375, 12.37939453125, 13.548828125, 14.71826171875, 15.8876953125, 17.05712890625, 18.2265625, 19.39599609375, 20.5654296875, 21.73486328125, 22.904296875, 24.07373046875, 25.2431640625, 26.41259765625, 27.58203125, 28.75146484375, 29.9208984375, 31.09033203125, 32.259765625, 33.42919921875, 34.5986328125, 35.76806640625, 36.9375]}, "gradients/decoder.transformer.h.12.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 2.0, 4.0, 11.0, 15.0, 25.0, 52.0, 61.0, 88.0, 98.0, 115.0, 131.0, 115.0, 86.0, 69.0, 54.0, 33.0, 27.0, 13.0, 11.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-69.04686737060547, -67.33910369873047, -65.63134002685547, -63.9235725402832, -62.2158088684082, -60.50804138183594, -58.80027770996094, -57.09251403808594, -55.38475036621094, -53.67698669433594, -51.96921920776367, -50.26145553588867, -48.55369186401367, -46.845924377441406, -45.138160705566406, -43.430397033691406, -41.72262954711914, -40.01486587524414, -38.307098388671875, -36.599334716796875, -34.891571044921875, -33.183807373046875, -31.47603988647461, -29.76827621459961, -28.060510635375977, -26.352745056152344, -24.644981384277344, -22.93721580505371, -21.229450225830078, -19.521686553955078, -17.813920974731445, -16.106155395507812, -14.398387908935547, -12.69062328338623, -10.982858657836914, -9.275093078613281, -7.567328453063965, -5.859563827514648, -4.151798248291016, -2.444033622741699, -0.7362689971923828, 0.9714958667755127, 2.679260730743408, 4.387025833129883, 6.094790458679199, 7.802555084228516, 9.510320663452148, 11.218085289001465, 12.925849914550781, 14.633614540100098, 16.341379165649414, 18.049144744873047, 19.756908416748047, 21.46467399597168, 23.172439575195312, 24.880203247070312, 26.587968826293945, 28.295734405517578, 30.003498077392578, 31.71126365661621, 33.419029235839844, 35.126792907714844, 36.834556579589844, 38.54232406616211, 40.25008773803711]}, "gradients/decoder.transformer.h.12.ln_2.bias": {"_type": "histogram", "values": [2.0, 2.0, 1.0, 0.0, 1.0, 0.0, 3.0, 0.0, 2.0, 4.0, 12.0, 9.0, 7.0, 8.0, 11.0, 9.0, 11.0, 18.0, 18.0, 15.0, 18.0, 12.0, 24.0, 29.0, 30.0, 29.0, 33.0, 31.0, 45.0, 41.0, 35.0, 33.0, 46.0, 34.0, 32.0, 49.0, 29.0, 46.0, 30.0, 37.0, 26.0, 24.0, 19.0, 17.0, 25.0, 17.0, 13.0, 15.0, 12.0, 11.0, 9.0, 8.0, 13.0, 5.0, 3.0, 6.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-30.984907150268555, -30.021028518676758, -29.057147979736328, -28.09326934814453, -27.129390716552734, -26.165512084960938, -25.20163345336914, -24.23775291442871, -23.273874282836914, -22.309995651245117, -21.346115112304688, -20.38223648071289, -19.418357849121094, -18.454479217529297, -17.4906005859375, -16.52672004699707, -15.562841415405273, -14.598962783813477, -13.635083198547363, -12.67120361328125, -11.707324981689453, -10.743446350097656, -9.779566764831543, -8.81568717956543, -7.851808547973633, -6.887929439544678, -5.924050331115723, -4.960171222686768, -3.9962921142578125, -3.0324130058288574, -2.0685338973999023, -1.1046547889709473, -0.140777587890625, 0.8231015205383301, 1.7869806289672852, 2.7508597373962402, 3.7147388458251953, 4.67861795425415, 5.6424970626831055, 6.6063761711120605, 7.570255279541016, 8.534133911132812, 9.498013496398926, 10.461893081665039, 11.425771713256836, 12.389650344848633, 13.353529930114746, 14.31740951538086, 15.281288146972656, 16.245166778564453, 17.20904541015625, 18.17292594909668, 19.136804580688477, 20.100683212280273, 21.064563751220703, 22.0284423828125, 22.992321014404297, 23.956199645996094, 24.92007827758789, 25.88395881652832, 26.847837448120117, 27.811716079711914, 28.775596618652344, 29.73947525024414, 30.703353881835938]}, "gradients/decoder.transformer.h.12.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 2.0, 1.0, 1.0, 5.0, 1.0, 5.0, 2.0, 3.0, 5.0, 5.0, 14.0, 16.0, 16.0, 10.0, 17.0, 26.0, 30.0, 26.0, 23.0, 32.0, 39.0, 36.0, 26.0, 30.0, 43.0, 41.0, 36.0, 56.0, 47.0, 44.0, 37.0, 44.0, 43.0, 24.0, 28.0, 31.0, 26.0, 18.0, 24.0, 15.0, 15.0, 16.0, 15.0, 14.0, 4.0, 6.0, 4.0, 3.0, 3.0, 5.0, 0.0, 3.0, 1.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.86328125, -4.70135498046875, -4.5394287109375, -4.37750244140625, -4.215576171875, -4.05364990234375, -3.8917236328125, -3.72979736328125, -3.56787109375, -3.40594482421875, -3.2440185546875, -3.08209228515625, -2.920166015625, -2.75823974609375, -2.5963134765625, -2.43438720703125, -2.2724609375, -2.11053466796875, -1.9486083984375, -1.78668212890625, -1.624755859375, -1.46282958984375, -1.3009033203125, -1.13897705078125, -0.97705078125, -0.81512451171875, -0.6531982421875, -0.49127197265625, -0.329345703125, -0.16741943359375, -0.0054931640625, 0.15643310546875, 0.318359375, 0.48028564453125, 0.6422119140625, 0.80413818359375, 0.966064453125, 1.12799072265625, 1.2899169921875, 1.45184326171875, 1.61376953125, 1.77569580078125, 1.9376220703125, 2.09954833984375, 2.261474609375, 2.42340087890625, 2.5853271484375, 2.74725341796875, 2.9091796875, 3.07110595703125, 3.2330322265625, 3.39495849609375, 3.556884765625, 3.71881103515625, 3.8807373046875, 4.04266357421875, 4.20458984375, 4.36651611328125, 4.5284423828125, 4.69036865234375, 4.852294921875, 5.01422119140625, 5.1761474609375, 5.33807373046875, 5.5]}, "gradients/decoder.transformer.h.12.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 6.0, 5.0, 7.0, 8.0, 15.0, 14.0, 26.0, 34.0, 51.0, 63.0, 99.0, 173.0, 218.0, 350.0, 434.0, 649.0, 963.0, 1476.0, 2335.0, 3468.0, 5357.0, 8422.0, 13303.0, 21229.0, 34896.0, 55987.0, 89701.0, 134395.0, 172743.0, 166305.0, 123035.0, 79828.0, 49793.0, 30579.0, 18809.0, 12057.0, 7461.0, 4856.0, 3205.0, 2045.0, 1304.0, 892.0, 597.0, 409.0, 306.0, 207.0, 133.0, 108.0, 66.0, 42.0, 21.0, 22.0, 15.0, 21.0, 12.0, 5.0, 3.0, 3.0, 3.0, 1.0, 2.0], "bins": [-0.81640625, -0.7907943725585938, -0.7651824951171875, -0.7395706176757812, -0.713958740234375, -0.6883468627929688, -0.6627349853515625, -0.6371231079101562, -0.61151123046875, -0.5858993530273438, -0.5602874755859375, -0.5346755981445312, -0.509063720703125, -0.48345184326171875, -0.4578399658203125, -0.43222808837890625, -0.4066162109375, -0.38100433349609375, -0.3553924560546875, -0.32978057861328125, -0.304168701171875, -0.27855682373046875, -0.2529449462890625, -0.22733306884765625, -0.20172119140625, -0.17610931396484375, -0.1504974365234375, -0.12488555908203125, -0.099273681640625, -0.07366180419921875, -0.0480499267578125, -0.02243804931640625, 0.003173828125, 0.02878570556640625, 0.0543975830078125, 0.08000946044921875, 0.105621337890625, 0.13123321533203125, 0.1568450927734375, 0.18245697021484375, 0.20806884765625, 0.23368072509765625, 0.2592926025390625, 0.28490447998046875, 0.310516357421875, 0.33612823486328125, 0.3617401123046875, 0.38735198974609375, 0.4129638671875, 0.43857574462890625, 0.4641876220703125, 0.48979949951171875, 0.515411376953125, 0.5410232543945312, 0.5666351318359375, 0.5922470092773438, 0.61785888671875, 0.6434707641601562, 0.6690826416015625, 0.6946945190429688, 0.720306396484375, 0.7459182739257812, 0.7715301513671875, 0.7971420288085938, 0.82275390625]}, "gradients/decoder.transformer.h.12.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 2.0, 0.0, 1.0, 2.0, 2.0, 6.0, 9.0, 9.0, 7.0, 13.0, 11.0, 9.0, 14.0, 15.0, 17.0, 29.0, 22.0, 26.0, 34.0, 37.0, 36.0, 38.0, 48.0, 36.0, 40.0, 39.0, 1071.0, 38.0, 46.0, 45.0, 37.0, 36.0, 36.0, 41.0, 32.0, 24.0, 25.0, 13.0, 16.0, 11.0, 17.0, 13.0, 11.0, 5.0, 7.0, 5.0, 5.0, 3.0, 0.0, 1.0, 3.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.716796875, -3.593536376953125, -3.47027587890625, -3.347015380859375, -3.2237548828125, -3.100494384765625, -2.97723388671875, -2.853973388671875, -2.730712890625, -2.607452392578125, -2.48419189453125, -2.360931396484375, -2.2376708984375, -2.114410400390625, -1.99114990234375, -1.867889404296875, -1.74462890625, -1.621368408203125, -1.49810791015625, -1.374847412109375, -1.2515869140625, -1.128326416015625, -1.00506591796875, -0.881805419921875, -0.758544921875, -0.635284423828125, -0.51202392578125, -0.388763427734375, -0.2655029296875, -0.142242431640625, -0.01898193359375, 0.104278564453125, 0.2275390625, 0.350799560546875, 0.47406005859375, 0.597320556640625, 0.7205810546875, 0.843841552734375, 0.96710205078125, 1.090362548828125, 1.213623046875, 1.336883544921875, 1.46014404296875, 1.583404541015625, 1.7066650390625, 1.829925537109375, 1.95318603515625, 2.076446533203125, 2.19970703125, 2.322967529296875, 2.44622802734375, 2.569488525390625, 2.6927490234375, 2.816009521484375, 2.93927001953125, 3.062530517578125, 3.185791015625, 3.309051513671875, 3.43231201171875, 3.555572509765625, 3.6788330078125, 3.802093505859375, 3.92535400390625, 4.048614501953125, 4.171875]}, "gradients/decoder.transformer.h.12.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 3.0, 5.0, 8.0, 11.0, 11.0, 19.0, 32.0, 45.0, 81.0, 128.0, 264.0, 419.0, 766.0, 1374.0, 2527.0, 4509.0, 8535.0, 16028.0, 30720.0, 58982.0, 110681.0, 190157.0, 1272267.0, 179508.0, 103515.0, 54963.0, 28865.0, 15147.0, 7940.0, 4258.0, 2327.0, 1262.0, 746.0, 435.0, 254.0, 119.0, 82.0, 48.0, 37.0, 21.0, 19.0, 10.0, 8.0, 3.0, 4.0, 2.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.71630859375, -0.6966552734375, -0.677001953125, -0.6573486328125, -0.6376953125, -0.6180419921875, -0.598388671875, -0.5787353515625, -0.55908203125, -0.5394287109375, -0.519775390625, -0.5001220703125, -0.48046875, -0.4608154296875, -0.441162109375, -0.4215087890625, -0.40185546875, -0.3822021484375, -0.362548828125, -0.3428955078125, -0.3232421875, -0.3035888671875, -0.283935546875, -0.2642822265625, -0.24462890625, -0.2249755859375, -0.205322265625, -0.1856689453125, -0.166015625, -0.1463623046875, -0.126708984375, -0.1070556640625, -0.08740234375, -0.0677490234375, -0.048095703125, -0.0284423828125, -0.0087890625, 0.0108642578125, 0.030517578125, 0.0501708984375, 0.06982421875, 0.0894775390625, 0.109130859375, 0.1287841796875, 0.1484375, 0.1680908203125, 0.187744140625, 0.2073974609375, 0.22705078125, 0.2467041015625, 0.266357421875, 0.2860107421875, 0.3056640625, 0.3253173828125, 0.344970703125, 0.3646240234375, 0.38427734375, 0.4039306640625, 0.423583984375, 0.4432373046875, 0.462890625, 0.4825439453125, 0.502197265625, 0.5218505859375, 0.54150390625]}, "gradients/decoder.transformer.h.12.crossattention.q_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 2.0, 3.0, 2.0, 7.0, 2.0, 6.0, 5.0, 2.0, 6.0, 8.0, 19.0, 20.0, 19.0, 28.0, 37.0, 30.0, 43.0, 45.0, 54.0, 51.0, 68.0, 60.0, 70.0, 53.0, 50.0, 42.0, 45.0, 31.0, 25.0, 34.0, 16.0, 29.0, 15.0, 16.0, 14.0, 7.0, 14.0, 4.0, 4.0, 5.0, 3.0, 4.0, 5.0, 3.0, 3.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 3.0], "bins": [-0.0290679931640625, -0.028173208236694336, -0.027278423309326172, -0.026383638381958008, -0.025488853454589844, -0.02459406852722168, -0.023699283599853516, -0.02280449867248535, -0.021909713745117188, -0.021014928817749023, -0.02012014389038086, -0.019225358963012695, -0.01833057403564453, -0.017435789108276367, -0.016541004180908203, -0.01564621925354004, -0.014751434326171875, -0.013856649398803711, -0.012961864471435547, -0.012067079544067383, -0.011172294616699219, -0.010277509689331055, -0.00938272476196289, -0.008487939834594727, -0.0075931549072265625, -0.0066983699798583984, -0.005803585052490234, -0.00490880012512207, -0.004014015197753906, -0.003119230270385742, -0.002224445343017578, -0.001329660415649414, -0.00043487548828125, 0.00045990943908691406, 0.0013546943664550781, 0.002249479293823242, 0.0031442642211914062, 0.00403904914855957, 0.004933834075927734, 0.0058286190032958984, 0.0067234039306640625, 0.0076181888580322266, 0.00851297378540039, 0.009407758712768555, 0.010302543640136719, 0.011197328567504883, 0.012092113494873047, 0.012986898422241211, 0.013881683349609375, 0.014776468276977539, 0.015671253204345703, 0.016566038131713867, 0.01746082305908203, 0.018355607986450195, 0.01925039291381836, 0.020145177841186523, 0.021039962768554688, 0.02193474769592285, 0.022829532623291016, 0.02372431755065918, 0.024619102478027344, 0.025513887405395508, 0.026408672332763672, 0.027303457260131836, 0.0281982421875]}, "gradients/decoder.transformer.h.12.crossattention.q_attn.weight": {"_type": "histogram", "values": [3.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 3.0, 3.0, 5.0, 4.0, 4.0, 5.0, 6.0, 6.0, 10.0, 13.0, 19.0, 20.0, 27.0, 37.0, 30.0, 49.0, 60.0, 77.0, 113.0, 168.0, 315.0, 636.0, 5277.0, 1032947.0, 7108.0, 635.0, 330.0, 195.0, 145.0, 74.0, 57.0, 46.0, 29.0, 28.0, 28.0, 8.0, 8.0, 6.0, 4.0, 5.0, 5.0, 8.0, 3.0, 3.0, 2.0, 2.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0], "bins": [-0.5517578125, -0.5342559814453125, -0.516754150390625, -0.4992523193359375, -0.48175048828125, -0.4642486572265625, -0.446746826171875, -0.4292449951171875, -0.4117431640625, -0.3942413330078125, -0.376739501953125, -0.3592376708984375, -0.34173583984375, -0.3242340087890625, -0.306732177734375, -0.2892303466796875, -0.271728515625, -0.2542266845703125, -0.236724853515625, -0.2192230224609375, -0.20172119140625, -0.1842193603515625, -0.166717529296875, -0.1492156982421875, -0.1317138671875, -0.1142120361328125, -0.096710205078125, -0.0792083740234375, -0.06170654296875, -0.0442047119140625, -0.026702880859375, -0.0092010498046875, 0.00830078125, 0.0258026123046875, 0.043304443359375, 0.0608062744140625, 0.07830810546875, 0.0958099365234375, 0.113311767578125, 0.1308135986328125, 0.1483154296875, 0.1658172607421875, 0.183319091796875, 0.2008209228515625, 0.21832275390625, 0.2358245849609375, 0.253326416015625, 0.2708282470703125, 0.288330078125, 0.3058319091796875, 0.323333740234375, 0.3408355712890625, 0.35833740234375, 0.3758392333984375, 0.393341064453125, 0.4108428955078125, 0.4283447265625, 0.4458465576171875, 0.463348388671875, 0.4808502197265625, 0.49835205078125, 0.5158538818359375, 0.533355712890625, 0.5508575439453125, 0.568359375]}, "gradients/decoder.transformer.h.12.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 36.0, 979.0, 2.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.11276122182607651, -0.0918130949139595, -0.0708649754524231, -0.04991684854030609, -0.028968721628189087, -0.008020594716072083, 0.012927524745464325, 0.03387565165758133, 0.054823778569698334, 0.07577190548181534, 0.09672002494335175, 0.11766815185546875, 0.13861627876758575, 0.15956440567970276, 0.18051251769065857, 0.20146065950393677, 0.22240877151489258, 0.24335689842700958, 0.2643050253391266, 0.2852531373500824, 0.3062012791633606, 0.3271493911743164, 0.3480975031852722, 0.3690456449985504, 0.3899937868118286, 0.4109418988227844, 0.4318900406360626, 0.45283815264701843, 0.47378629446029663, 0.49473440647125244, 0.5156825184822083, 0.5366306304931641, 0.5575787425041199, 0.5785268545150757, 0.5994749665260315, 0.6204231381416321, 0.6413712501525879, 0.6623193621635437, 0.6832674741744995, 0.7042156457901001, 0.7251637578010559, 0.7461118698120117, 0.7670599818229675, 0.7880081534385681, 0.8089562654495239, 0.8299043774604797, 0.8508524894714355, 0.8718006610870361, 0.8927487134933472, 0.913696825504303, 0.9346449375152588, 0.9555931091308594, 0.9765412211418152, 0.997489333152771, 1.0184375047683716, 1.0393855571746826, 1.0603337287902832, 1.0812819004058838, 1.1022299528121948, 1.1231781244277954, 1.1441261768341064, 1.165074348449707, 1.1860225200653076, 1.2069705724716187, 1.2279187440872192]}, "gradients/decoder.transformer.h.12.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 5.0, 2.0, 10.0, 4.0, 17.0, 13.0, 18.0, 22.0, 19.0, 14.0, 29.0, 34.0, 43.0, 26.0, 29.0, 39.0, 55.0, 31.0, 62.0, 46.0, 49.0, 35.0, 46.0, 51.0, 50.0, 33.0, 39.0, 37.0, 31.0, 20.0, 18.0, 16.0, 21.0, 15.0, 8.0, 6.0, 7.0, 3.0, 2.0, 4.0, 1.0, 3.0, 2.0, 1.0, 1.0, 1.0], "bins": [-0.06246870756149292, -0.06079918146133423, -0.05912965536117554, -0.057460129261016846, -0.055790603160858154, -0.05412107706069946, -0.05245155096054077, -0.05078202486038208, -0.04911249876022339, -0.0474429726600647, -0.045773446559906006, -0.044103920459747314, -0.04243439435958862, -0.04076486825942993, -0.03909534215927124, -0.03742581605911255, -0.03575628995895386, -0.034086763858795166, -0.032417237758636475, -0.030747711658477783, -0.029078185558319092, -0.0274086594581604, -0.02573913335800171, -0.024069607257843018, -0.022400081157684326, -0.020730555057525635, -0.019061028957366943, -0.017391502857208252, -0.01572197675704956, -0.01405245065689087, -0.012382924556732178, -0.010713398456573486, -0.009043872356414795, -0.0073743462562561035, -0.005704820156097412, -0.004035294055938721, -0.0023657679557800293, -0.0006962418556213379, 0.0009732842445373535, 0.002642810344696045, 0.004312336444854736, 0.005981862545013428, 0.007651388645172119, 0.00932091474533081, 0.010990440845489502, 0.012659966945648193, 0.014329493045806885, 0.015999019145965576, 0.017668545246124268, 0.01933807134628296, 0.02100759744644165, 0.022677123546600342, 0.024346649646759033, 0.026016175746917725, 0.027685701847076416, 0.029355227947235107, 0.0310247540473938, 0.03269428014755249, 0.03436380624771118, 0.03603333234786987, 0.037702858448028564, 0.039372384548187256, 0.04104191064834595, 0.04271143674850464, 0.04438096284866333]}, "gradients/decoder.transformer.h.12.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 2.0, 1.0, 1.0, 5.0, 1.0, 5.0, 2.0, 3.0, 5.0, 5.0, 14.0, 16.0, 16.0, 10.0, 17.0, 26.0, 30.0, 25.0, 24.0, 32.0, 39.0, 35.0, 26.0, 31.0, 41.0, 43.0, 33.0, 58.0, 47.0, 45.0, 37.0, 43.0, 44.0, 25.0, 26.0, 31.0, 27.0, 17.0, 26.0, 13.0, 16.0, 16.0, 15.0, 14.0, 4.0, 6.0, 4.0, 3.0, 3.0, 5.0, 0.0, 3.0, 1.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.8671875, -4.7052001953125, -4.543212890625, -4.3812255859375, -4.21923828125, -4.0572509765625, -3.895263671875, -3.7332763671875, -3.5712890625, -3.4093017578125, -3.247314453125, -3.0853271484375, -2.92333984375, -2.7613525390625, -2.599365234375, -2.4373779296875, -2.275390625, -2.1134033203125, -1.951416015625, -1.7894287109375, -1.62744140625, -1.4654541015625, -1.303466796875, -1.1414794921875, -0.9794921875, -0.8175048828125, -0.655517578125, -0.4935302734375, -0.33154296875, -0.1695556640625, -0.007568359375, 0.1544189453125, 0.31640625, 0.4783935546875, 0.640380859375, 0.8023681640625, 0.96435546875, 1.1263427734375, 1.288330078125, 1.4503173828125, 1.6123046875, 1.7742919921875, 1.936279296875, 2.0982666015625, 2.26025390625, 2.4222412109375, 2.584228515625, 2.7462158203125, 2.908203125, 3.0701904296875, 3.232177734375, 3.3941650390625, 3.55615234375, 3.7181396484375, 3.880126953125, 4.0421142578125, 4.2041015625, 4.3660888671875, 4.528076171875, 4.6900634765625, 4.85205078125, 5.0140380859375, 5.176025390625, 5.3380126953125, 5.5]}, "gradients/decoder.transformer.h.12.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 3.0, 2.0, 6.0, 4.0, 10.0, 10.0, 15.0, 22.0, 27.0, 50.0, 67.0, 89.0, 150.0, 245.0, 416.0, 790.0, 1633.0, 3423.0, 8261.0, 21699.0, 60973.0, 168341.0, 386113.0, 251034.0, 91319.0, 32519.0, 11805.0, 4857.0, 2185.0, 1088.0, 561.0, 315.0, 183.0, 108.0, 82.0, 47.0, 36.0, 23.0, 20.0, 9.0, 9.0, 6.0, 3.0, 3.0, 5.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.73046875, -6.54193115234375, -6.3533935546875, -6.16485595703125, -5.976318359375, -5.78778076171875, -5.5992431640625, -5.41070556640625, -5.22216796875, -5.03363037109375, -4.8450927734375, -4.65655517578125, -4.468017578125, -4.27947998046875, -4.0909423828125, -3.90240478515625, -3.7138671875, -3.52532958984375, -3.3367919921875, -3.14825439453125, -2.959716796875, -2.77117919921875, -2.5826416015625, -2.39410400390625, -2.20556640625, -2.01702880859375, -1.8284912109375, -1.63995361328125, -1.451416015625, -1.26287841796875, -1.0743408203125, -0.88580322265625, -0.697265625, -0.50872802734375, -0.3201904296875, -0.13165283203125, 0.056884765625, 0.24542236328125, 0.4339599609375, 0.62249755859375, 0.81103515625, 0.99957275390625, 1.1881103515625, 1.37664794921875, 1.565185546875, 1.75372314453125, 1.9422607421875, 2.13079833984375, 2.3193359375, 2.50787353515625, 2.6964111328125, 2.88494873046875, 3.073486328125, 3.26202392578125, 3.4505615234375, 3.63909912109375, 3.82763671875, 4.01617431640625, 4.2047119140625, 4.39324951171875, 4.581787109375, 4.77032470703125, 4.9588623046875, 5.14739990234375, 5.3359375]}, "gradients/decoder.transformer.h.12.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 3.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 2.0, 7.0, 3.0, 3.0, 7.0, 7.0, 5.0, 10.0, 15.0, 9.0, 18.0, 14.0, 16.0, 17.0, 26.0, 34.0, 32.0, 30.0, 48.0, 38.0, 56.0, 93.0, 142.0, 287.0, 1383.0, 205.0, 105.0, 77.0, 53.0, 49.0, 40.0, 44.0, 26.0, 17.0, 22.0, 23.0, 14.0, 9.0, 14.0, 10.0, 14.0, 9.0, 2.0, 8.0, 3.0, 4.0, 4.0, 0.0, 2.0, 2.0, 3.0, 0.0, 0.0, 0.0, 0.0, 3.0], "bins": [-16.078125, -15.579345703125, -15.08056640625, -14.581787109375, -14.0830078125, -13.584228515625, -13.08544921875, -12.586669921875, -12.087890625, -11.589111328125, -11.09033203125, -10.591552734375, -10.0927734375, -9.593994140625, -9.09521484375, -8.596435546875, -8.09765625, -7.598876953125, -7.10009765625, -6.601318359375, -6.1025390625, -5.603759765625, -5.10498046875, -4.606201171875, -4.107421875, -3.608642578125, -3.10986328125, -2.611083984375, -2.1123046875, -1.613525390625, -1.11474609375, -0.615966796875, -0.1171875, 0.381591796875, 0.88037109375, 1.379150390625, 1.8779296875, 2.376708984375, 2.87548828125, 3.374267578125, 3.873046875, 4.371826171875, 4.87060546875, 5.369384765625, 5.8681640625, 6.366943359375, 6.86572265625, 7.364501953125, 7.86328125, 8.362060546875, 8.86083984375, 9.359619140625, 9.8583984375, 10.357177734375, 10.85595703125, 11.354736328125, 11.853515625, 12.352294921875, 12.85107421875, 13.349853515625, 13.8486328125, 14.347412109375, 14.84619140625, 15.344970703125, 15.84375]}, "gradients/decoder.transformer.h.12.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 2.0, 7.0, 8.0, 12.0, 22.0, 28.0, 56.0, 60.0, 86.0, 156.0, 303.0, 729.0, 2903.0, 826594.0, 2309712.0, 3438.0, 789.0, 342.0, 158.0, 120.0, 54.0, 49.0, 25.0, 26.0, 14.0, 3.0, 9.0, 4.0, 5.0, 1.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-81.3125, -78.9921875, -76.671875, -74.3515625, -72.03125, -69.7109375, -67.390625, -65.0703125, -62.75, -60.4296875, -58.109375, -55.7890625, -53.46875, -51.1484375, -48.828125, -46.5078125, -44.1875, -41.8671875, -39.546875, -37.2265625, -34.90625, -32.5859375, -30.265625, -27.9453125, -25.625, -23.3046875, -20.984375, -18.6640625, -16.34375, -14.0234375, -11.703125, -9.3828125, -7.0625, -4.7421875, -2.421875, -0.1015625, 2.21875, 4.5390625, 6.859375, 9.1796875, 11.5, 13.8203125, 16.140625, 18.4609375, 20.78125, 23.1015625, 25.421875, 27.7421875, 30.0625, 32.3828125, 34.703125, 37.0234375, 39.34375, 41.6640625, 43.984375, 46.3046875, 48.625, 50.9453125, 53.265625, 55.5859375, 57.90625, 60.2265625, 62.546875, 64.8671875, 67.1875]}, "gradients/decoder.transformer.h.12.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 4.0, 21.0, 167.0, 407.0, 329.0, 80.0, 5.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-54.73880386352539, -51.657752990722656, -48.57670593261719, -45.49565505981445, -42.41460418701172, -39.333553314208984, -36.25250244140625, -33.17145538330078, -30.090404510498047, -27.009353637695312, -23.92830467224121, -20.84725570678711, -17.766204833984375, -14.685154914855957, -11.604104995727539, -8.523056030273438, -5.442005157470703, -2.360955238342285, 0.7200946807861328, 3.801144599914551, 6.882194519042969, 9.963244438171387, 13.044294357299805, 16.125343322753906, 19.20639419555664, 22.287445068359375, 25.368494033813477, 28.449542999267578, 31.530593872070312, 34.61164474487305, 37.69269561767578, 40.77374267578125, 43.85479736328125, 46.935848236083984, 50.01689910888672, 53.09794616699219, 56.17899703979492, 59.260047912597656, 62.341094970703125, 65.42214965820312, 68.5031967163086, 71.58424377441406, 74.66529846191406, 77.74634552001953, 80.827392578125, 83.908447265625, 86.98949432373047, 90.07054138183594, 93.15159606933594, 96.2326431274414, 99.3136978149414, 102.39474487304688, 105.47579956054688, 108.55684661865234, 111.63789367675781, 114.71894836425781, 117.79999542236328, 120.88104248046875, 123.96209716796875, 127.04314422607422, 130.1241912841797, 133.2052459716797, 136.2863006591797, 139.36734008789062, 142.44839477539062]}, "gradients/decoder.transformer.h.12.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 3.0, 3.0, 2.0, 2.0, 4.0, 2.0, 4.0, 3.0, 5.0, 6.0, 12.0, 10.0, 12.0, 10.0, 18.0, 16.0, 14.0, 22.0, 24.0, 23.0, 30.0, 35.0, 46.0, 45.0, 37.0, 41.0, 53.0, 37.0, 46.0, 47.0, 41.0, 33.0, 45.0, 30.0, 27.0, 27.0, 23.0, 30.0, 26.0, 19.0, 17.0, 20.0, 15.0, 7.0, 10.0, 6.0, 9.0, 4.0, 7.0, 4.0, 3.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 2.0], "bins": [-44.62644958496094, -43.326961517333984, -42.027469635009766, -40.72798156738281, -39.428489685058594, -38.12900161743164, -36.82950973510742, -35.53002166748047, -34.23052978515625, -32.9310417175293, -31.631549835205078, -30.332059860229492, -29.032569885253906, -27.733081817626953, -26.433591842651367, -25.13410186767578, -23.834613800048828, -22.535123825073242, -21.235633850097656, -19.93614387512207, -18.636653900146484, -17.33716583251953, -16.037675857543945, -14.73818588256836, -13.438695907592773, -12.139205932617188, -10.839715957641602, -9.540226936340332, -8.240736961364746, -6.94124698638916, -5.641757488250732, -4.342267990112305, -3.0427780151367188, -1.743288278579712, -0.4437985420227051, 0.8556911945343018, 2.1551809310913086, 3.4546709060668945, 4.754160404205322, 6.05364990234375, 7.353139877319336, 8.652629852294922, 9.952119827270508, 11.251608848571777, 12.551098823547363, 13.85058879852295, 15.150077819824219, 16.449567794799805, 17.74905776977539, 19.048547744750977, 20.348037719726562, 21.64752769470215, 22.947017669677734, 24.246505737304688, 25.545995712280273, 26.84548568725586, 28.144975662231445, 29.44446563720703, 30.743955612182617, 32.0434455871582, 33.342933654785156, 34.642425537109375, 35.94191360473633, 37.24140167236328, 38.5408935546875]}, "gradients/decoder.transformer.h.11.mlp.c_proj.bias": {"_type": "histogram", "values": [3.0, 1.0, 2.0, 0.0, 4.0, 5.0, 5.0, 3.0, 6.0, 4.0, 10.0, 11.0, 9.0, 21.0, 14.0, 22.0, 23.0, 26.0, 28.0, 29.0, 28.0, 37.0, 25.0, 33.0, 30.0, 39.0, 36.0, 29.0, 40.0, 53.0, 33.0, 48.0, 40.0, 36.0, 43.0, 28.0, 31.0, 23.0, 27.0, 17.0, 23.0, 20.0, 15.0, 15.0, 10.0, 7.0, 7.0, 6.0, 2.0, 2.0, 4.0, 2.0, 1.0, 0.0, 2.0, 2.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.64453125, -4.47955322265625, -4.3145751953125, -4.14959716796875, -3.984619140625, -3.81964111328125, -3.6546630859375, -3.48968505859375, -3.32470703125, -3.15972900390625, -2.9947509765625, -2.82977294921875, -2.664794921875, -2.49981689453125, -2.3348388671875, -2.16986083984375, -2.0048828125, -1.83990478515625, -1.6749267578125, -1.50994873046875, -1.344970703125, -1.17999267578125, -1.0150146484375, -0.85003662109375, -0.68505859375, -0.52008056640625, -0.3551025390625, -0.19012451171875, -0.025146484375, 0.13983154296875, 0.3048095703125, 0.46978759765625, 0.634765625, 0.79974365234375, 0.9647216796875, 1.12969970703125, 1.294677734375, 1.45965576171875, 1.6246337890625, 1.78961181640625, 1.95458984375, 2.11956787109375, 2.2845458984375, 2.44952392578125, 2.614501953125, 2.77947998046875, 2.9444580078125, 3.10943603515625, 3.2744140625, 3.43939208984375, 3.6043701171875, 3.76934814453125, 3.934326171875, 4.09930419921875, 4.2642822265625, 4.42926025390625, 4.59423828125, 4.75921630859375, 4.9241943359375, 5.08917236328125, 5.254150390625, 5.41912841796875, 5.5841064453125, 5.74908447265625, 5.9140625]}, "gradients/decoder.transformer.h.11.mlp.c_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 3.0, 2.0, 1.0, 3.0, 8.0, 3.0, 4.0, 5.0, 8.0, 15.0, 14.0, 22.0, 19.0, 26.0, 36.0, 55.0, 82.0, 104.0, 150.0, 228.0, 402.0, 921.0, 1933.0, 5270.0, 19464.0, 90284.0, 469297.0, 1644777.0, 1475487.0, 387756.0, 73968.0, 15981.0, 4580.0, 1589.0, 722.0, 395.0, 209.0, 122.0, 86.0, 47.0, 46.0, 25.0, 41.0, 15.0, 25.0, 13.0, 13.0, 13.0, 7.0, 3.0, 4.0, 2.0, 5.0, 2.0, 4.0, 4.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-8.7578125, -8.4747314453125, -8.191650390625, -7.9085693359375, -7.62548828125, -7.3424072265625, -7.059326171875, -6.7762451171875, -6.4931640625, -6.2100830078125, -5.927001953125, -5.6439208984375, -5.36083984375, -5.0777587890625, -4.794677734375, -4.5115966796875, -4.228515625, -3.9454345703125, -3.662353515625, -3.3792724609375, -3.09619140625, -2.8131103515625, -2.530029296875, -2.2469482421875, -1.9638671875, -1.6807861328125, -1.397705078125, -1.1146240234375, -0.83154296875, -0.5484619140625, -0.265380859375, 0.0177001953125, 0.30078125, 0.5838623046875, 0.866943359375, 1.1500244140625, 1.43310546875, 1.7161865234375, 1.999267578125, 2.2823486328125, 2.5654296875, 2.8485107421875, 3.131591796875, 3.4146728515625, 3.69775390625, 3.9808349609375, 4.263916015625, 4.5469970703125, 4.830078125, 5.1131591796875, 5.396240234375, 5.6793212890625, 5.96240234375, 6.2454833984375, 6.528564453125, 6.8116455078125, 7.0947265625, 7.3778076171875, 7.660888671875, 7.9439697265625, 8.22705078125, 8.5101318359375, 8.793212890625, 9.0762939453125, 9.359375]}, "gradients/decoder.transformer.h.11.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 3.0, 0.0, 2.0, 1.0, 2.0, 2.0, 4.0, 6.0, 6.0, 8.0, 9.0, 8.0, 15.0, 32.0, 15.0, 30.0, 26.0, 61.0, 50.0, 81.0, 101.0, 128.0, 158.0, 226.0, 243.0, 262.0, 335.0, 336.0, 328.0, 305.0, 285.0, 180.0, 175.0, 152.0, 105.0, 84.0, 56.0, 62.0, 42.0, 37.0, 31.0, 19.0, 15.0, 12.0, 9.0, 5.0, 6.0, 8.0, 5.0, 7.0, 2.0, 3.0, 2.0, 2.0, 0.0, 2.0, 2.0, 2.0, 0.0, 1.0, 1.0], "bins": [-8.0234375, -7.7628173828125, -7.502197265625, -7.2415771484375, -6.98095703125, -6.7203369140625, -6.459716796875, -6.1990966796875, -5.9384765625, -5.6778564453125, -5.417236328125, -5.1566162109375, -4.89599609375, -4.6353759765625, -4.374755859375, -4.1141357421875, -3.853515625, -3.5928955078125, -3.332275390625, -3.0716552734375, -2.81103515625, -2.5504150390625, -2.289794921875, -2.0291748046875, -1.7685546875, -1.5079345703125, -1.247314453125, -0.9866943359375, -0.72607421875, -0.4654541015625, -0.204833984375, 0.0557861328125, 0.31640625, 0.5770263671875, 0.837646484375, 1.0982666015625, 1.35888671875, 1.6195068359375, 1.880126953125, 2.1407470703125, 2.4013671875, 2.6619873046875, 2.922607421875, 3.1832275390625, 3.44384765625, 3.7044677734375, 3.965087890625, 4.2257080078125, 4.486328125, 4.7469482421875, 5.007568359375, 5.2681884765625, 5.52880859375, 5.7894287109375, 6.050048828125, 6.3106689453125, 6.5712890625, 6.8319091796875, 7.092529296875, 7.3531494140625, 7.61376953125, 7.8743896484375, 8.135009765625, 8.3956298828125, 8.65625]}, "gradients/decoder.transformer.h.11.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 4.0, 0.0, 1.0, 0.0, 7.0, 1.0, 0.0, 7.0, 8.0, 7.0, 8.0, 6.0, 7.0, 14.0, 20.0, 19.0, 36.0, 39.0, 64.0, 45.0, 69.0, 107.0, 148.0, 211.0, 266.0, 474.0, 2424.0, 98820.0, 3687365.0, 396325.0, 5898.0, 630.0, 355.0, 223.0, 171.0, 116.0, 87.0, 77.0, 55.0, 38.0, 27.0, 18.0, 26.0, 21.0, 11.0, 4.0, 12.0, 9.0, 6.0, 3.0, 3.0, 0.0, 2.0, 0.0, 2.0, 2.0, 1.0, 1.0], "bins": [-40.3125, -39.14892578125, -37.9853515625, -36.82177734375, -35.658203125, -34.49462890625, -33.3310546875, -32.16748046875, -31.00390625, -29.84033203125, -28.6767578125, -27.51318359375, -26.349609375, -25.18603515625, -24.0224609375, -22.85888671875, -21.6953125, -20.53173828125, -19.3681640625, -18.20458984375, -17.041015625, -15.87744140625, -14.7138671875, -13.55029296875, -12.38671875, -11.22314453125, -10.0595703125, -8.89599609375, -7.732421875, -6.56884765625, -5.4052734375, -4.24169921875, -3.078125, -1.91455078125, -0.7509765625, 0.41259765625, 1.576171875, 2.73974609375, 3.9033203125, 5.06689453125, 6.23046875, 7.39404296875, 8.5576171875, 9.72119140625, 10.884765625, 12.04833984375, 13.2119140625, 14.37548828125, 15.5390625, 16.70263671875, 17.8662109375, 19.02978515625, 20.193359375, 21.35693359375, 22.5205078125, 23.68408203125, 24.84765625, 26.01123046875, 27.1748046875, 28.33837890625, 29.501953125, 30.66552734375, 31.8291015625, 32.99267578125, 34.15625]}, "gradients/decoder.transformer.h.11.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 2.0, 3.0, 12.0, 10.0, 38.0, 71.0, 147.0, 148.0, 185.0, 167.0, 106.0, 59.0, 41.0, 20.0, 5.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-44.710391998291016, -42.2735481262207, -39.83670425415039, -37.39986038208008, -34.963016510009766, -32.52617263793945, -30.089326858520508, -27.652482986450195, -25.215639114379883, -22.77879524230957, -20.341951370239258, -17.905105590820312, -15.468262672424316, -13.031418800354004, -10.594573974609375, -8.157730102539062, -5.72088623046875, -3.2840421199798584, -0.8471980094909668, 1.589646339416504, 4.026490211486816, 6.463334083557129, 8.900178909301758, 11.33702278137207, 13.773866653442383, 16.210710525512695, 18.647554397583008, 21.084400177001953, 23.521244049072266, 25.958087921142578, 28.39493179321289, 30.831775665283203, 33.26861572265625, 35.70545959472656, 38.142303466796875, 40.57914733886719, 43.0159912109375, 45.45283508300781, 47.889678955078125, 50.32652282714844, 52.76336669921875, 55.20021057128906, 57.637054443359375, 60.07389831542969, 62.5107421875, 64.94758605957031, 67.38442993164062, 69.82127380371094, 72.25811767578125, 74.69496154785156, 77.13180541992188, 79.56864929199219, 82.0054931640625, 84.44233703613281, 86.87918090820312, 89.31602478027344, 91.75287628173828, 94.1897201538086, 96.6265640258789, 99.06340789794922, 101.50025177001953, 103.93709564208984, 106.37393951416016, 108.81078338623047, 111.24762725830078]}, "gradients/decoder.transformer.h.11.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 2.0, 9.0, 2.0, 2.0, 7.0, 7.0, 12.0, 11.0, 16.0, 18.0, 18.0, 22.0, 26.0, 20.0, 23.0, 39.0, 41.0, 22.0, 41.0, 37.0, 24.0, 54.0, 46.0, 41.0, 36.0, 46.0, 28.0, 42.0, 35.0, 33.0, 32.0, 30.0, 32.0, 30.0, 19.0, 9.0, 15.0, 12.0, 13.0, 13.0, 7.0, 8.0, 6.0, 1.0, 13.0, 1.0, 2.0, 5.0, 3.0, 0.0, 3.0, 2.0, 0.0, 0.0, 0.0, 2.0], "bins": [-33.08102798461914, -32.02484130859375, -30.96865463256836, -29.9124698638916, -28.85628318786621, -27.80009651184082, -26.743911743164062, -25.687725067138672, -24.63153839111328, -23.57535171508789, -22.5191650390625, -21.462980270385742, -20.40679359436035, -19.35060691833496, -18.294422149658203, -17.238235473632812, -16.182048797607422, -15.125862121582031, -14.069676399230957, -13.013490676879883, -11.957304000854492, -10.901117324829102, -9.844931602478027, -8.788745880126953, -7.7325592041015625, -6.67637300491333, -5.620186805725098, -4.564000606536865, -3.507814407348633, -2.4516282081604004, -1.395442008972168, -0.33925580978393555, 0.7169342041015625, 1.773120403289795, 2.8293066024780273, 3.8854928016662598, 4.941679000854492, 5.997865200042725, 7.054051399230957, 8.110237121582031, 9.166423797607422, 10.222610473632812, 11.278796195983887, 12.334981918334961, 13.391168594360352, 14.447355270385742, 15.503540992736816, 16.55972671508789, 17.61591339111328, 18.672100067138672, 19.728286743164062, 20.78447151184082, 21.84065818786621, 22.8968448638916, 23.95302963256836, 25.00921630859375, 26.06540298461914, 27.12158966064453, 28.177776336669922, 29.23396110534668, 30.29014778137207, 31.34633445739746, 32.40251922607422, 33.45870590209961, 34.514892578125]}, "gradients/decoder.transformer.h.11.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 3.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 5.0, 5.0, 4.0, 8.0, 7.0, 10.0, 8.0, 14.0, 16.0, 15.0, 21.0, 38.0, 27.0, 22.0, 34.0, 29.0, 27.0, 37.0, 29.0, 52.0, 45.0, 49.0, 30.0, 43.0, 47.0, 36.0, 40.0, 38.0, 32.0, 38.0, 21.0, 22.0, 20.0, 21.0, 22.0, 19.0, 16.0, 18.0, 5.0, 10.0, 5.0, 6.0, 5.0, 1.0, 4.0, 4.0, 1.0, 2.0, 0.0, 2.0, 2.0, 1.0, 1.0, 1.0], "bins": [-5.14453125, -4.98516845703125, -4.8258056640625, -4.66644287109375, -4.507080078125, -4.34771728515625, -4.1883544921875, -4.02899169921875, -3.86962890625, -3.71026611328125, -3.5509033203125, -3.39154052734375, -3.232177734375, -3.07281494140625, -2.9134521484375, -2.75408935546875, -2.5947265625, -2.43536376953125, -2.2760009765625, -2.11663818359375, -1.957275390625, -1.79791259765625, -1.6385498046875, -1.47918701171875, -1.31982421875, -1.16046142578125, -1.0010986328125, -0.84173583984375, -0.682373046875, -0.52301025390625, -0.3636474609375, -0.20428466796875, -0.044921875, 0.11444091796875, 0.2738037109375, 0.43316650390625, 0.592529296875, 0.75189208984375, 0.9112548828125, 1.07061767578125, 1.22998046875, 1.38934326171875, 1.5487060546875, 1.70806884765625, 1.867431640625, 2.02679443359375, 2.1861572265625, 2.34552001953125, 2.5048828125, 2.66424560546875, 2.8236083984375, 2.98297119140625, 3.142333984375, 3.30169677734375, 3.4610595703125, 3.62042236328125, 3.77978515625, 3.93914794921875, 4.0985107421875, 4.25787353515625, 4.417236328125, 4.57659912109375, 4.7359619140625, 4.89532470703125, 5.0546875]}, "gradients/decoder.transformer.h.11.crossattention.c_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 5.0, 3.0, 0.0, 3.0, 12.0, 8.0, 16.0, 26.0, 37.0, 65.0, 101.0, 140.0, 199.0, 303.0, 452.0, 627.0, 1002.0, 1390.0, 2094.0, 3158.0, 4713.0, 7334.0, 11264.0, 17463.0, 27310.0, 43109.0, 66466.0, 98722.0, 136948.0, 162226.0, 146082.0, 108647.0, 73373.0, 48348.0, 30792.0, 19478.0, 12486.0, 8152.0, 5311.0, 3455.0, 2307.0, 1618.0, 1081.0, 706.0, 505.0, 362.0, 204.0, 141.0, 118.0, 73.0, 49.0, 27.0, 22.0, 16.0, 11.0, 7.0, 2.0, 2.0, 2.0, 1.0], "bins": [-0.7314453125, -0.7096939086914062, -0.6879425048828125, -0.6661911010742188, -0.644439697265625, -0.6226882934570312, -0.6009368896484375, -0.5791854858398438, -0.55743408203125, -0.5356826782226562, -0.5139312744140625, -0.49217987060546875, -0.470428466796875, -0.44867706298828125, -0.4269256591796875, -0.40517425537109375, -0.3834228515625, -0.36167144775390625, -0.3399200439453125, -0.31816864013671875, -0.296417236328125, -0.27466583251953125, -0.2529144287109375, -0.23116302490234375, -0.20941162109375, -0.18766021728515625, -0.1659088134765625, -0.14415740966796875, -0.122406005859375, -0.10065460205078125, -0.0789031982421875, -0.05715179443359375, -0.035400390625, -0.01364898681640625, 0.0081024169921875, 0.02985382080078125, 0.051605224609375, 0.07335662841796875, 0.0951080322265625, 0.11685943603515625, 0.13861083984375, 0.16036224365234375, 0.1821136474609375, 0.20386505126953125, 0.225616455078125, 0.24736785888671875, 0.2691192626953125, 0.29087066650390625, 0.3126220703125, 0.33437347412109375, 0.3561248779296875, 0.37787628173828125, 0.399627685546875, 0.42137908935546875, 0.4431304931640625, 0.46488189697265625, 0.48663330078125, 0.5083847045898438, 0.5301361083984375, 0.5518875122070312, 0.573638916015625, 0.5953903198242188, 0.6171417236328125, 0.6388931274414062, 0.66064453125]}, "gradients/decoder.transformer.h.11.crossattention.c_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 3.0, 2.0, 4.0, 3.0, 6.0, 4.0, 2.0, 2.0, 5.0, 8.0, 13.0, 16.0, 12.0, 24.0, 13.0, 22.0, 16.0, 33.0, 21.0, 25.0, 34.0, 33.0, 31.0, 33.0, 36.0, 29.0, 33.0, 45.0, 1064.0, 36.0, 27.0, 44.0, 30.0, 35.0, 33.0, 35.0, 33.0, 30.0, 14.0, 17.0, 20.0, 15.0, 16.0, 8.0, 13.0, 12.0, 9.0, 11.0, 7.0, 8.0, 2.0, 5.0, 1.0, 5.0, 2.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0], "bins": [-3.13671875, -3.036346435546875, -2.93597412109375, -2.835601806640625, -2.7352294921875, -2.634857177734375, -2.53448486328125, -2.434112548828125, -2.333740234375, -2.233367919921875, -2.13299560546875, -2.032623291015625, -1.9322509765625, -1.831878662109375, -1.73150634765625, -1.631134033203125, -1.53076171875, -1.430389404296875, -1.33001708984375, -1.229644775390625, -1.1292724609375, -1.028900146484375, -0.92852783203125, -0.828155517578125, -0.727783203125, -0.627410888671875, -0.52703857421875, -0.426666259765625, -0.3262939453125, -0.225921630859375, -0.12554931640625, -0.025177001953125, 0.0751953125, 0.175567626953125, 0.27593994140625, 0.376312255859375, 0.4766845703125, 0.577056884765625, 0.67742919921875, 0.777801513671875, 0.878173828125, 0.978546142578125, 1.07891845703125, 1.179290771484375, 1.2796630859375, 1.380035400390625, 1.48040771484375, 1.580780029296875, 1.68115234375, 1.781524658203125, 1.88189697265625, 1.982269287109375, 2.0826416015625, 2.183013916015625, 2.28338623046875, 2.383758544921875, 2.484130859375, 2.584503173828125, 2.68487548828125, 2.785247802734375, 2.8856201171875, 2.985992431640625, 3.08636474609375, 3.186737060546875, 3.287109375]}, "gradients/decoder.transformer.h.11.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 5.0, 1.0, 4.0, 5.0, 11.0, 10.0, 17.0, 31.0, 28.0, 47.0, 83.0, 124.0, 201.0, 317.0, 436.0, 741.0, 1091.0, 1746.0, 2615.0, 4074.0, 6364.0, 9828.0, 15004.0, 23350.0, 35615.0, 54510.0, 80967.0, 113787.0, 182959.0, 1156918.0, 127048.0, 93837.0, 64233.0, 42295.0, 27663.0, 17959.0, 11602.0, 7440.0, 4949.0, 3284.0, 2072.0, 1393.0, 830.0, 559.0, 385.0, 246.0, 165.0, 108.0, 62.0, 35.0, 34.0, 21.0, 15.0, 9.0, 7.0, 4.0, 4.0, 2.0], "bins": [-0.448486328125, -0.4357452392578125, -0.423004150390625, -0.4102630615234375, -0.39752197265625, -0.3847808837890625, -0.372039794921875, -0.3592987060546875, -0.3465576171875, -0.3338165283203125, -0.321075439453125, -0.3083343505859375, -0.29559326171875, -0.2828521728515625, -0.270111083984375, -0.2573699951171875, -0.24462890625, -0.2318878173828125, -0.219146728515625, -0.2064056396484375, -0.19366455078125, -0.1809234619140625, -0.168182373046875, -0.1554412841796875, -0.1427001953125, -0.1299591064453125, -0.117218017578125, -0.1044769287109375, -0.09173583984375, -0.0789947509765625, -0.066253662109375, -0.0535125732421875, -0.040771484375, -0.0280303955078125, -0.015289306640625, -0.0025482177734375, 0.01019287109375, 0.0229339599609375, 0.035675048828125, 0.0484161376953125, 0.0611572265625, 0.0738983154296875, 0.086639404296875, 0.0993804931640625, 0.11212158203125, 0.1248626708984375, 0.137603759765625, 0.1503448486328125, 0.1630859375, 0.1758270263671875, 0.188568115234375, 0.2013092041015625, 0.21405029296875, 0.2267913818359375, 0.239532470703125, 0.2522735595703125, 0.2650146484375, 0.2777557373046875, 0.290496826171875, 0.3032379150390625, 0.31597900390625, 0.3287200927734375, 0.341461181640625, 0.3542022705078125, 0.366943359375]}, "gradients/decoder.transformer.h.11.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 2.0, 2.0, 2.0, 3.0, 4.0, 7.0, 5.0, 9.0, 12.0, 12.0, 15.0, 21.0, 30.0, 54.0, 65.0, 83.0, 90.0, 139.0, 91.0, 80.0, 63.0, 65.0, 45.0, 39.0, 19.0, 9.0, 13.0, 12.0, 7.0, 1.0, 3.0, 3.0, 4.0, 0.0, 2.0, 3.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.02984619140625, -0.02893662452697754, -0.028027057647705078, -0.027117490768432617, -0.026207923889160156, -0.025298357009887695, -0.024388790130615234, -0.023479223251342773, -0.022569656372070312, -0.02166008949279785, -0.02075052261352539, -0.01984095573425293, -0.01893138885498047, -0.018021821975708008, -0.017112255096435547, -0.016202688217163086, -0.015293121337890625, -0.014383554458618164, -0.013473987579345703, -0.012564420700073242, -0.011654853820800781, -0.01074528694152832, -0.00983572006225586, -0.008926153182983398, -0.008016586303710938, -0.0071070194244384766, -0.006197452545166016, -0.005287885665893555, -0.004378318786621094, -0.003468751907348633, -0.002559185028076172, -0.001649618148803711, -0.00074005126953125, 0.00016951560974121094, 0.0010790824890136719, 0.001988649368286133, 0.0028982162475585938, 0.0038077831268310547, 0.004717350006103516, 0.0056269168853759766, 0.0065364837646484375, 0.0074460506439208984, 0.00835561752319336, 0.00926518440246582, 0.010174751281738281, 0.011084318161010742, 0.011993885040283203, 0.012903451919555664, 0.013813018798828125, 0.014722585678100586, 0.015632152557373047, 0.016541719436645508, 0.01745128631591797, 0.01836085319519043, 0.01927042007446289, 0.02017998695373535, 0.021089553833007812, 0.021999120712280273, 0.022908687591552734, 0.023818254470825195, 0.024727821350097656, 0.025637388229370117, 0.026546955108642578, 0.02745652198791504, 0.0283660888671875]}, "gradients/decoder.transformer.h.11.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 2.0, 4.0, 0.0, 2.0, 4.0, 2.0, 5.0, 4.0, 10.0, 14.0, 18.0, 21.0, 28.0, 62.0, 81.0, 147.0, 232.0, 555.0, 18333.0, 1027104.0, 1123.0, 356.0, 154.0, 92.0, 68.0, 36.0, 30.0, 18.0, 10.0, 16.0, 10.0, 10.0, 3.0, 3.0, 3.0, 2.0, 1.0, 2.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.54638671875, -0.5288848876953125, -0.511383056640625, -0.4938812255859375, -0.47637939453125, -0.4588775634765625, -0.441375732421875, -0.4238739013671875, -0.4063720703125, -0.3888702392578125, -0.371368408203125, -0.3538665771484375, -0.33636474609375, -0.3188629150390625, -0.301361083984375, -0.2838592529296875, -0.266357421875, -0.2488555908203125, -0.231353759765625, -0.2138519287109375, -0.19635009765625, -0.1788482666015625, -0.161346435546875, -0.1438446044921875, -0.1263427734375, -0.1088409423828125, -0.091339111328125, -0.0738372802734375, -0.05633544921875, -0.0388336181640625, -0.021331787109375, -0.0038299560546875, 0.013671875, 0.0311737060546875, 0.048675537109375, 0.0661773681640625, 0.08367919921875, 0.1011810302734375, 0.118682861328125, 0.1361846923828125, 0.1536865234375, 0.1711883544921875, 0.188690185546875, 0.2061920166015625, 0.22369384765625, 0.2411956787109375, 0.258697509765625, 0.2761993408203125, 0.293701171875, 0.3112030029296875, 0.328704833984375, 0.3462066650390625, 0.36370849609375, 0.3812103271484375, 0.398712158203125, 0.4162139892578125, 0.4337158203125, 0.4512176513671875, 0.468719482421875, 0.4862213134765625, 0.50372314453125, 0.5212249755859375, 0.538726806640625, 0.5562286376953125, 0.57373046875]}, "gradients/decoder.transformer.h.11.ln_cross_attn.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 8.0, 143.0, 851.0, 15.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.13320964574813843, -0.12697333097457886, -0.12073702365159988, -0.11450071632862091, -0.10826440900564194, -0.10202810168266296, -0.0957917869091034, -0.08955547958612442, -0.08331917226314545, -0.07708286494016647, -0.0708465501666069, -0.06461024284362793, -0.058373935520648956, -0.052137624472379684, -0.04590131342411041, -0.03966500610113144, -0.03342869132757187, -0.027192382141947746, -0.020956072956323624, -0.014719761908054352, -0.00848345272243023, -0.0022471435368061066, 0.003989167511463165, 0.010225474834442139, 0.01646178588271141, 0.022698095068335533, 0.028934404253959656, 0.03517071530222893, 0.0414070263504982, 0.04764333367347717, 0.053879644721746445, 0.06011595204472542, 0.06635226309299469, 0.07258857041597366, 0.07882488518953323, 0.08506119251251221, 0.09129749983549118, 0.09753380715847015, 0.10377012193202972, 0.1100064292550087, 0.11624273657798767, 0.12247904390096664, 0.12871535122394562, 0.1349516659975052, 0.14118798077106476, 0.14742428064346313, 0.1536605954170227, 0.15989691019058228, 0.16613322496414185, 0.17236953973770142, 0.1786058396100998, 0.18484215438365936, 0.19107846915721893, 0.1973147690296173, 0.20355108380317688, 0.20978739857673645, 0.21602369844913483, 0.2222600132226944, 0.22849631309509277, 0.23473262786865234, 0.24096894264221191, 0.2472052425146103, 0.25344157218933105, 0.25967785716056824, 0.2659141719341278]}, "gradients/decoder.transformer.h.11.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 2.0, 4.0, 1.0, 5.0, 6.0, 5.0, 13.0, 16.0, 10.0, 8.0, 10.0, 19.0, 17.0, 15.0, 24.0, 30.0, 41.0, 33.0, 25.0, 35.0, 38.0, 44.0, 49.0, 33.0, 45.0, 32.0, 42.0, 45.0, 45.0, 33.0, 34.0, 37.0, 32.0, 27.0, 20.0, 15.0, 17.0, 18.0, 16.0, 13.0, 12.0, 13.0, 7.0, 9.0, 4.0, 7.0, 4.0, 1.0, 2.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 2.0], "bins": [-0.026247084140777588, -0.02541860193014145, -0.02459011971950531, -0.02376163750886917, -0.022933155298233032, -0.022104673087596893, -0.021276190876960754, -0.020447708666324615, -0.019619226455688477, -0.018790744245052338, -0.0179622620344162, -0.01713377982378006, -0.01630529761314392, -0.015476815402507782, -0.014648333191871643, -0.013819850981235504, -0.012991368770599365, -0.012162886559963226, -0.011334404349327087, -0.010505922138690948, -0.00967743992805481, -0.00884895771741867, -0.008020475506782532, -0.007191993296146393, -0.006363511085510254, -0.005535028874874115, -0.004706546664237976, -0.003878064453601837, -0.0030495822429656982, -0.0022211000323295593, -0.0013926178216934204, -0.0005641356110572815, 0.0002643465995788574, 0.0010928288102149963, 0.0019213110208511353, 0.002749793231487274, 0.003578275442123413, 0.004406757652759552, 0.005235239863395691, 0.00606372207403183, 0.006892204284667969, 0.007720686495304108, 0.008549168705940247, 0.009377650916576385, 0.010206133127212524, 0.011034615337848663, 0.011863097548484802, 0.012691579759120941, 0.01352006196975708, 0.014348544180393219, 0.015177026391029358, 0.016005508601665497, 0.016833990812301636, 0.017662473022937775, 0.018490955233573914, 0.019319437444210052, 0.02014791965484619, 0.02097640186548233, 0.02180488407611847, 0.022633366286754608, 0.023461848497390747, 0.024290330708026886, 0.025118812918663025, 0.025947295129299164, 0.026775777339935303]}, "gradients/decoder.transformer.h.11.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 3.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 5.0, 5.0, 4.0, 8.0, 7.0, 10.0, 8.0, 14.0, 16.0, 15.0, 21.0, 37.0, 29.0, 21.0, 34.0, 29.0, 27.0, 37.0, 28.0, 53.0, 45.0, 49.0, 30.0, 43.0, 48.0, 34.0, 41.0, 39.0, 31.0, 38.0, 21.0, 22.0, 20.0, 21.0, 22.0, 18.0, 17.0, 18.0, 5.0, 10.0, 5.0, 6.0, 5.0, 1.0, 4.0, 4.0, 1.0, 2.0, 0.0, 2.0, 2.0, 1.0, 1.0, 1.0], "bins": [-5.14453125, -4.98516845703125, -4.8258056640625, -4.66644287109375, -4.507080078125, -4.34771728515625, -4.1883544921875, -4.02899169921875, -3.86962890625, -3.71026611328125, -3.5509033203125, -3.39154052734375, -3.232177734375, -3.07281494140625, -2.9134521484375, -2.75408935546875, -2.5947265625, -2.43536376953125, -2.2760009765625, -2.11663818359375, -1.957275390625, -1.79791259765625, -1.6385498046875, -1.47918701171875, -1.31982421875, -1.16046142578125, -1.0010986328125, -0.84173583984375, -0.682373046875, -0.52301025390625, -0.3636474609375, -0.20428466796875, -0.044921875, 0.11444091796875, 0.2738037109375, 0.43316650390625, 0.592529296875, 0.75189208984375, 0.9112548828125, 1.07061767578125, 1.22998046875, 1.38934326171875, 1.5487060546875, 1.70806884765625, 1.867431640625, 2.02679443359375, 2.1861572265625, 2.34552001953125, 2.5048828125, 2.66424560546875, 2.8236083984375, 2.98297119140625, 3.142333984375, 3.30169677734375, 3.4610595703125, 3.62042236328125, 3.77978515625, 3.93914794921875, 4.0985107421875, 4.25787353515625, 4.417236328125, 4.57659912109375, 4.7359619140625, 4.89532470703125, 5.0546875]}, "gradients/decoder.transformer.h.11.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 1.0, 7.0, 5.0, 4.0, 5.0, 5.0, 8.0, 12.0, 21.0, 30.0, 19.0, 39.0, 58.0, 84.0, 140.0, 248.0, 427.0, 692.0, 1357.0, 2244.0, 3888.0, 6784.0, 11970.0, 20995.0, 41175.0, 92329.0, 217934.0, 327716.0, 172828.0, 72374.0, 33395.0, 17749.0, 10085.0, 5820.0, 3435.0, 1938.0, 1075.0, 627.0, 388.0, 244.0, 148.0, 79.0, 47.0, 36.0, 28.0, 19.0, 9.0, 14.0, 12.0, 8.0, 3.0, 4.0, 0.0, 3.0, 2.0, 2.0, 0.0, 0.0, 3.0, 1.0], "bins": [-4.6875, -4.53802490234375, -4.3885498046875, -4.23907470703125, -4.089599609375, -3.94012451171875, -3.7906494140625, -3.64117431640625, -3.49169921875, -3.34222412109375, -3.1927490234375, -3.04327392578125, -2.893798828125, -2.74432373046875, -2.5948486328125, -2.44537353515625, -2.2958984375, -2.14642333984375, -1.9969482421875, -1.84747314453125, -1.697998046875, -1.54852294921875, -1.3990478515625, -1.24957275390625, -1.10009765625, -0.95062255859375, -0.8011474609375, -0.65167236328125, -0.502197265625, -0.35272216796875, -0.2032470703125, -0.05377197265625, 0.095703125, 0.24517822265625, 0.3946533203125, 0.54412841796875, 0.693603515625, 0.84307861328125, 0.9925537109375, 1.14202880859375, 1.29150390625, 1.44097900390625, 1.5904541015625, 1.73992919921875, 1.889404296875, 2.03887939453125, 2.1883544921875, 2.33782958984375, 2.4873046875, 2.63677978515625, 2.7862548828125, 2.93572998046875, 3.085205078125, 3.23468017578125, 3.3841552734375, 3.53363037109375, 3.68310546875, 3.83258056640625, 3.9820556640625, 4.13153076171875, 4.281005859375, 4.43048095703125, 4.5799560546875, 4.72943115234375, 4.87890625]}, "gradients/decoder.transformer.h.11.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0, 1.0, 2.0, 2.0, 4.0, 4.0, 6.0, 5.0, 10.0, 12.0, 12.0, 14.0, 13.0, 19.0, 32.0, 27.0, 36.0, 23.0, 30.0, 38.0, 50.0, 64.0, 79.0, 119.0, 255.0, 1416.0, 218.0, 119.0, 74.0, 48.0, 46.0, 41.0, 28.0, 33.0, 39.0, 28.0, 20.0, 24.0, 12.0, 12.0, 16.0, 7.0, 6.0, 5.0, 4.0, 2.0, 2.0, 3.0, 2.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-18.125, -17.56591796875, -17.0068359375, -16.44775390625, -15.888671875, -15.32958984375, -14.7705078125, -14.21142578125, -13.65234375, -13.09326171875, -12.5341796875, -11.97509765625, -11.416015625, -10.85693359375, -10.2978515625, -9.73876953125, -9.1796875, -8.62060546875, -8.0615234375, -7.50244140625, -6.943359375, -6.38427734375, -5.8251953125, -5.26611328125, -4.70703125, -4.14794921875, -3.5888671875, -3.02978515625, -2.470703125, -1.91162109375, -1.3525390625, -0.79345703125, -0.234375, 0.32470703125, 0.8837890625, 1.44287109375, 2.001953125, 2.56103515625, 3.1201171875, 3.67919921875, 4.23828125, 4.79736328125, 5.3564453125, 5.91552734375, 6.474609375, 7.03369140625, 7.5927734375, 8.15185546875, 8.7109375, 9.27001953125, 9.8291015625, 10.38818359375, 10.947265625, 11.50634765625, 12.0654296875, 12.62451171875, 13.18359375, 13.74267578125, 14.3017578125, 14.86083984375, 15.419921875, 15.97900390625, 16.5380859375, 17.09716796875, 17.65625]}, "gradients/decoder.transformer.h.11.attn.c_attn.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 2.0, 1.0, 2.0, 2.0, 2.0, 2.0, 3.0, 6.0, 7.0, 3.0, 9.0, 5.0, 10.0, 15.0, 24.0, 32.0, 42.0, 38.0, 62.0, 70.0, 86.0, 112.0, 132.0, 177.0, 308.0, 571.0, 1810.0, 28399.0, 2916325.0, 190729.0, 4549.0, 811.0, 363.0, 253.0, 180.0, 136.0, 83.0, 84.0, 50.0, 50.0, 34.0, 28.0, 20.0, 21.0, 19.0, 11.0, 6.0, 8.0, 6.0, 6.0, 2.0, 5.0, 2.0, 2.0, 2.0, 1.0, 4.0, 2.0, 0.0, 0.0, 2.0], "bins": [-40.96875, -39.6767578125, -38.384765625, -37.0927734375, -35.80078125, -34.5087890625, -33.216796875, -31.9248046875, -30.6328125, -29.3408203125, -28.048828125, -26.7568359375, -25.46484375, -24.1728515625, -22.880859375, -21.5888671875, -20.296875, -19.0048828125, -17.712890625, -16.4208984375, -15.12890625, -13.8369140625, -12.544921875, -11.2529296875, -9.9609375, -8.6689453125, -7.376953125, -6.0849609375, -4.79296875, -3.5009765625, -2.208984375, -0.9169921875, 0.375, 1.6669921875, 2.958984375, 4.2509765625, 5.54296875, 6.8349609375, 8.126953125, 9.4189453125, 10.7109375, 12.0029296875, 13.294921875, 14.5869140625, 15.87890625, 17.1708984375, 18.462890625, 19.7548828125, 21.046875, 22.3388671875, 23.630859375, 24.9228515625, 26.21484375, 27.5068359375, 28.798828125, 30.0908203125, 31.3828125, 32.6748046875, 33.966796875, 35.2587890625, 36.55078125, 37.8427734375, 39.134765625, 40.4267578125, 41.71875]}, "gradients/decoder.transformer.h.11.ln_1.weight": {"_type": "histogram", "values": [1.0, 1.0, 108.0, 850.0, 54.0, 2.0, 3.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-27.984949111938477, -19.899105072021484, -11.813262939453125, -3.727418899536133, 4.358423233032227, 12.444265365600586, 20.53011131286621, 28.61595344543457, 36.70179748535156, 44.78763961791992, 52.87348175048828, 60.959327697753906, 69.045166015625, 77.13101196289062, 85.21685791015625, 93.30270385742188, 101.38854217529297, 109.4743881225586, 117.56022644042969, 125.64607238769531, 133.73191833496094, 141.8177490234375, 149.90359497070312, 157.98944091796875, 166.07528686523438, 174.1611328125, 182.24697875976562, 190.33282470703125, 198.4186553955078, 206.50450134277344, 214.59034729003906, 222.6761932373047, 230.76202392578125, 238.84786987304688, 246.9337158203125, 255.01956176757812, 263.10540771484375, 271.19122314453125, 279.277099609375, 287.3629150390625, 295.4487609863281, 303.53460693359375, 311.6204528808594, 319.706298828125, 327.7921447753906, 335.87799072265625, 343.96380615234375, 352.0496520996094, 360.135498046875, 368.2213439941406, 376.30718994140625, 384.3930358886719, 392.4788818359375, 400.564697265625, 408.65057373046875, 416.73638916015625, 424.822265625, 432.9081115722656, 440.99395751953125, 449.0798034667969, 457.1656494140625, 465.25146484375, 473.33734130859375, 481.42315673828125, 489.5090026855469]}, "gradients/decoder.transformer.h.11.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 2.0, 3.0, 3.0, 5.0, 11.0, 5.0, 5.0, 13.0, 15.0, 8.0, 6.0, 19.0, 27.0, 19.0, 29.0, 27.0, 26.0, 35.0, 38.0, 30.0, 32.0, 48.0, 45.0, 40.0, 41.0, 47.0, 43.0, 44.0, 30.0, 33.0, 38.0, 28.0, 33.0, 24.0, 33.0, 26.0, 19.0, 11.0, 15.0, 15.0, 10.0, 8.0, 9.0, 7.0, 1.0, 0.0, 4.0, 4.0, 0.0, 1.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-45.271949768066406, -43.83234786987305, -42.39274597167969, -40.953147888183594, -39.513545989990234, -38.073944091796875, -36.63434600830078, -35.19474411010742, -33.75514221191406, -32.3155403137207, -30.875940322875977, -29.43634033203125, -27.99673843383789, -26.55713653564453, -25.117536544799805, -23.677936553955078, -22.23833465576172, -20.79873275756836, -19.359132766723633, -17.919532775878906, -16.479930877685547, -15.040329933166504, -13.600728988647461, -12.161128044128418, -10.721527099609375, -9.281926155090332, -7.842325210571289, -6.402724266052246, -4.963123321533203, -3.52352237701416, -2.083921432495117, -0.6443204879760742, 0.7952804565429688, 2.2348814010620117, 3.6744823455810547, 5.114083290100098, 6.553684234619141, 7.993285179138184, 9.432886123657227, 10.87248706817627, 12.312088012695312, 13.751688957214355, 15.191289901733398, 16.630889892578125, 18.070491790771484, 19.510093688964844, 20.94969367980957, 22.389293670654297, 23.828895568847656, 25.268497467041016, 26.708097457885742, 28.14769744873047, 29.587299346923828, 31.026901245117188, 32.46649932861328, 33.90610122680664, 35.345703125, 36.78530502319336, 38.22490692138672, 39.66450500488281, 41.10410690307617, 42.54370880126953, 43.983306884765625, 45.422908782958984, 46.862510681152344]}, "gradients/decoder.transformer.h.10.mlp.c_proj.bias": {"_type": "histogram", "values": [3.0, 0.0, 1.0, 1.0, 1.0, 2.0, 4.0, 8.0, 5.0, 5.0, 7.0, 2.0, 7.0, 10.0, 13.0, 11.0, 10.0, 18.0, 20.0, 32.0, 25.0, 21.0, 36.0, 30.0, 32.0, 19.0, 39.0, 41.0, 42.0, 39.0, 42.0, 36.0, 38.0, 43.0, 27.0, 24.0, 42.0, 26.0, 28.0, 31.0, 26.0, 27.0, 25.0, 23.0, 24.0, 14.0, 9.0, 6.0, 8.0, 5.0, 11.0, 2.0, 2.0, 3.0, 6.0, 0.0, 4.0, 1.0, 1.0, 1.0, 2.0, 1.0, 0.0, 2.0], "bins": [-5.01953125, -4.85662841796875, -4.6937255859375, -4.53082275390625, -4.367919921875, -4.20501708984375, -4.0421142578125, -3.87921142578125, -3.71630859375, -3.55340576171875, -3.3905029296875, -3.22760009765625, -3.064697265625, -2.90179443359375, -2.7388916015625, -2.57598876953125, -2.4130859375, -2.25018310546875, -2.0872802734375, -1.92437744140625, -1.761474609375, -1.59857177734375, -1.4356689453125, -1.27276611328125, -1.10986328125, -0.94696044921875, -0.7840576171875, -0.62115478515625, -0.458251953125, -0.29534912109375, -0.1324462890625, 0.03045654296875, 0.193359375, 0.35626220703125, 0.5191650390625, 0.68206787109375, 0.844970703125, 1.00787353515625, 1.1707763671875, 1.33367919921875, 1.49658203125, 1.65948486328125, 1.8223876953125, 1.98529052734375, 2.148193359375, 2.31109619140625, 2.4739990234375, 2.63690185546875, 2.7998046875, 2.96270751953125, 3.1256103515625, 3.28851318359375, 3.451416015625, 3.61431884765625, 3.7772216796875, 3.94012451171875, 4.10302734375, 4.26593017578125, 4.4288330078125, 4.59173583984375, 4.754638671875, 4.91754150390625, 5.0804443359375, 5.24334716796875, 5.40625]}, "gradients/decoder.transformer.h.10.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 2.0, 1.0, 1.0, 5.0, 1.0, 3.0, 6.0, 9.0, 10.0, 13.0, 24.0, 25.0, 26.0, 34.0, 38.0, 63.0, 95.0, 131.0, 219.0, 351.0, 773.0, 2164.0, 7150.0, 35317.0, 235092.0, 1412589.0, 1984116.0, 436904.0, 62766.0, 11410.0, 2843.0, 904.0, 444.0, 248.0, 146.0, 99.0, 52.0, 46.0, 41.0, 29.0, 27.0, 14.0, 15.0, 16.0, 11.0, 5.0, 4.0, 2.0, 2.0, 5.0, 3.0, 1.0, 2.0, 1.0, 1.0, 0.0, 1.0], "bins": [-11.7578125, -11.4031982421875, -11.048583984375, -10.6939697265625, -10.33935546875, -9.9847412109375, -9.630126953125, -9.2755126953125, -8.9208984375, -8.5662841796875, -8.211669921875, -7.8570556640625, -7.50244140625, -7.1478271484375, -6.793212890625, -6.4385986328125, -6.083984375, -5.7293701171875, -5.374755859375, -5.0201416015625, -4.66552734375, -4.3109130859375, -3.956298828125, -3.6016845703125, -3.2470703125, -2.8924560546875, -2.537841796875, -2.1832275390625, -1.82861328125, -1.4739990234375, -1.119384765625, -0.7647705078125, -0.41015625, -0.0555419921875, 0.299072265625, 0.6536865234375, 1.00830078125, 1.3629150390625, 1.717529296875, 2.0721435546875, 2.4267578125, 2.7813720703125, 3.135986328125, 3.4906005859375, 3.84521484375, 4.1998291015625, 4.554443359375, 4.9090576171875, 5.263671875, 5.6182861328125, 5.972900390625, 6.3275146484375, 6.68212890625, 7.0367431640625, 7.391357421875, 7.7459716796875, 8.1005859375, 8.4552001953125, 8.809814453125, 9.1644287109375, 9.51904296875, 9.8736572265625, 10.228271484375, 10.5828857421875, 10.9375]}, "gradients/decoder.transformer.h.10.mlp.c_fc.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 2.0, 6.0, 2.0, 3.0, 7.0, 10.0, 16.0, 15.0, 20.0, 34.0, 34.0, 65.0, 107.0, 137.0, 176.0, 235.0, 353.0, 453.0, 488.0, 461.0, 381.0, 287.0, 205.0, 152.0, 108.0, 95.0, 62.0, 50.0, 42.0, 18.0, 16.0, 18.0, 7.0, 3.0, 8.0, 4.0, 2.0, 2.0, 1.0, 2.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-10.8046875, -10.4287109375, -10.052734375, -9.6767578125, -9.30078125, -8.9248046875, -8.548828125, -8.1728515625, -7.796875, -7.4208984375, -7.044921875, -6.6689453125, -6.29296875, -5.9169921875, -5.541015625, -5.1650390625, -4.7890625, -4.4130859375, -4.037109375, -3.6611328125, -3.28515625, -2.9091796875, -2.533203125, -2.1572265625, -1.78125, -1.4052734375, -1.029296875, -0.6533203125, -0.27734375, 0.0986328125, 0.474609375, 0.8505859375, 1.2265625, 1.6025390625, 1.978515625, 2.3544921875, 2.73046875, 3.1064453125, 3.482421875, 3.8583984375, 4.234375, 4.6103515625, 4.986328125, 5.3623046875, 5.73828125, 6.1142578125, 6.490234375, 6.8662109375, 7.2421875, 7.6181640625, 7.994140625, 8.3701171875, 8.74609375, 9.1220703125, 9.498046875, 9.8740234375, 10.25, 10.6259765625, 11.001953125, 11.3779296875, 11.75390625, 12.1298828125, 12.505859375, 12.8818359375, 13.2578125]}, "gradients/decoder.transformer.h.10.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 3.0, 4.0, 4.0, 4.0, 11.0, 17.0, 16.0, 23.0, 43.0, 51.0, 78.0, 81.0, 139.0, 163.0, 293.0, 518.0, 2842.0, 528621.0, 3642724.0, 16765.0, 828.0, 363.0, 226.0, 144.0, 106.0, 72.0, 36.0, 34.0, 25.0, 11.0, 15.0, 9.0, 10.0, 2.0, 1.0, 3.0, 4.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-56.78125, -55.12158203125, -53.4619140625, -51.80224609375, -50.142578125, -48.48291015625, -46.8232421875, -45.16357421875, -43.50390625, -41.84423828125, -40.1845703125, -38.52490234375, -36.865234375, -35.20556640625, -33.5458984375, -31.88623046875, -30.2265625, -28.56689453125, -26.9072265625, -25.24755859375, -23.587890625, -21.92822265625, -20.2685546875, -18.60888671875, -16.94921875, -15.28955078125, -13.6298828125, -11.97021484375, -10.310546875, -8.65087890625, -6.9912109375, -5.33154296875, -3.671875, -2.01220703125, -0.3525390625, 1.30712890625, 2.966796875, 4.62646484375, 6.2861328125, 7.94580078125, 9.60546875, 11.26513671875, 12.9248046875, 14.58447265625, 16.244140625, 17.90380859375, 19.5634765625, 21.22314453125, 22.8828125, 24.54248046875, 26.2021484375, 27.86181640625, 29.521484375, 31.18115234375, 32.8408203125, 34.50048828125, 36.16015625, 37.81982421875, 39.4794921875, 41.13916015625, 42.798828125, 44.45849609375, 46.1181640625, 47.77783203125, 49.4375]}, "gradients/decoder.transformer.h.10.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 3.0, 15.0, 66.0, 131.0, 249.0, 274.0, 176.0, 71.0, 24.0, 8.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-69.85260009765625, -65.97642517089844, -62.100250244140625, -58.22407531738281, -54.347900390625, -50.47172546386719, -46.595550537109375, -42.71937561035156, -38.84320068359375, -34.96702575683594, -31.090850830078125, -27.214675903320312, -23.3385009765625, -19.46232795715332, -15.586153030395508, -11.709978103637695, -7.833805084228516, -3.9576303958892822, -0.08145570755004883, 3.7947187423706055, 7.670893669128418, 11.547067642211914, 15.423242568969727, 19.29941749572754, 23.17559242248535, 27.051767349243164, 30.927942276000977, 34.804115295410156, 38.68029022216797, 42.55646514892578, 46.432640075683594, 50.308815002441406, 54.18498992919922, 58.06116485595703, 61.937339782714844, 65.81351470947266, 69.68968963623047, 73.56586456298828, 77.4420394897461, 81.3182144165039, 85.19438934326172, 89.07056427001953, 92.94673919677734, 96.82291412353516, 100.69908905029297, 104.57526397705078, 108.4514389038086, 112.3276138305664, 116.20378112792969, 120.0799560546875, 123.95613098144531, 127.83230590820312, 131.70848083496094, 135.58465576171875, 139.46083068847656, 143.33700561523438, 147.2131805419922, 151.08935546875, 154.9655303955078, 158.84170532226562, 162.71788024902344, 166.59405517578125, 170.47023010253906, 174.34640502929688, 178.2225799560547]}, "gradients/decoder.transformer.h.10.ln_2.bias": {"_type": "histogram", "values": [1.0, 4.0, 2.0, 3.0, 5.0, 3.0, 8.0, 7.0, 4.0, 9.0, 4.0, 11.0, 21.0, 17.0, 16.0, 17.0, 28.0, 30.0, 27.0, 35.0, 29.0, 25.0, 45.0, 42.0, 24.0, 37.0, 25.0, 32.0, 35.0, 38.0, 35.0, 42.0, 31.0, 31.0, 32.0, 34.0, 19.0, 25.0, 25.0, 16.0, 19.0, 15.0, 15.0, 7.0, 10.0, 10.0, 16.0, 8.0, 8.0, 10.0, 9.0, 5.0, 5.0, 2.0, 2.0, 1.0, 1.0, 2.0, 1.0, 0.0, 1.0, 0.0, 1.0, 2.0], "bins": [-28.55019187927246, -27.543489456176758, -26.536787033081055, -25.53008460998535, -24.52338218688965, -23.516679763793945, -22.50997543334961, -21.503273010253906, -20.496570587158203, -19.4898681640625, -18.483165740966797, -17.476463317871094, -16.46976089477539, -15.463058471679688, -14.456355094909668, -13.449652671813965, -12.442951202392578, -11.436248779296875, -10.429546356201172, -9.422843933105469, -8.416141510009766, -7.409438610076904, -6.402735710144043, -5.39603328704834, -4.389330863952637, -3.3826284408569336, -2.3759257793426514, -1.3692231178283691, -0.362520694732666, 0.6441817283630371, 1.6508846282958984, 2.6575870513916016, 3.6642913818359375, 4.670993804931641, 5.677696228027344, 6.684399127960205, 7.691101551055908, 8.697803497314453, 9.704506874084473, 10.711209297180176, 11.717911720275879, 12.724614143371582, 13.731316566467285, 14.738019943237305, 15.744722366333008, 16.75142478942871, 17.758127212524414, 18.764829635620117, 19.77153205871582, 20.778234481811523, 21.784936904907227, 22.79163932800293, 23.798341751098633, 24.805044174194336, 25.811748504638672, 26.818450927734375, 27.825153350830078, 28.83185577392578, 29.838558197021484, 30.845260620117188, 31.85196304321289, 32.858665466308594, 33.8653678894043, 34.8720703125, 35.8787727355957]}, "gradients/decoder.transformer.h.10.crossattention.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 3.0, 0.0, 5.0, 8.0, 6.0, 5.0, 4.0, 7.0, 7.0, 16.0, 10.0, 17.0, 19.0, 23.0, 22.0, 20.0, 32.0, 44.0, 38.0, 45.0, 35.0, 42.0, 54.0, 42.0, 44.0, 38.0, 40.0, 38.0, 33.0, 33.0, 35.0, 42.0, 23.0, 33.0, 28.0, 25.0, 12.0, 20.0, 17.0, 13.0, 7.0, 8.0, 6.0, 6.0, 2.0, 3.0, 1.0, 1.0, 1.0, 2.0, 2.0, 0.0, 1.0, 0.0, 1.0], "bins": [-5.78515625, -5.611328125, -5.4375, -5.263671875, -5.08984375, -4.916015625, -4.7421875, -4.568359375, -4.39453125, -4.220703125, -4.046875, -3.873046875, -3.69921875, -3.525390625, -3.3515625, -3.177734375, -3.00390625, -2.830078125, -2.65625, -2.482421875, -2.30859375, -2.134765625, -1.9609375, -1.787109375, -1.61328125, -1.439453125, -1.265625, -1.091796875, -0.91796875, -0.744140625, -0.5703125, -0.396484375, -0.22265625, -0.048828125, 0.125, 0.298828125, 0.47265625, 0.646484375, 0.8203125, 0.994140625, 1.16796875, 1.341796875, 1.515625, 1.689453125, 1.86328125, 2.037109375, 2.2109375, 2.384765625, 2.55859375, 2.732421875, 2.90625, 3.080078125, 3.25390625, 3.427734375, 3.6015625, 3.775390625, 3.94921875, 4.123046875, 4.296875, 4.470703125, 4.64453125, 4.818359375, 4.9921875, 5.166015625, 5.33984375]}, "gradients/decoder.transformer.h.10.crossattention.c_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 2.0, 2.0, 3.0, 2.0, 6.0, 3.0, 14.0, 17.0, 26.0, 38.0, 81.0, 100.0, 156.0, 207.0, 300.0, 450.0, 692.0, 1019.0, 1541.0, 2344.0, 3720.0, 5435.0, 8293.0, 12787.0, 19423.0, 29926.0, 45152.0, 67470.0, 96952.0, 129104.0, 149607.0, 138445.0, 107482.0, 76335.0, 52076.0, 34131.0, 22553.0, 14642.0, 9583.0, 6209.0, 4179.0, 2673.0, 1803.0, 1210.0, 827.0, 521.0, 348.0, 241.0, 140.0, 104.0, 55.0, 47.0, 39.0, 19.0, 18.0, 4.0, 10.0, 4.0, 3.0, 0.0, 0.0, 1.0], "bins": [-0.65966796875, -0.6394729614257812, -0.6192779541015625, -0.5990829467773438, -0.578887939453125, -0.5586929321289062, -0.5384979248046875, -0.5183029174804688, -0.49810791015625, -0.47791290283203125, -0.4577178955078125, -0.43752288818359375, -0.417327880859375, -0.39713287353515625, -0.3769378662109375, -0.35674285888671875, -0.3365478515625, -0.31635284423828125, -0.2961578369140625, -0.27596282958984375, -0.255767822265625, -0.23557281494140625, -0.2153778076171875, -0.19518280029296875, -0.17498779296875, -0.15479278564453125, -0.1345977783203125, -0.11440277099609375, -0.094207763671875, -0.07401275634765625, -0.0538177490234375, -0.03362274169921875, -0.013427734375, 0.00676727294921875, 0.0269622802734375, 0.04715728759765625, 0.067352294921875, 0.08754730224609375, 0.1077423095703125, 0.12793731689453125, 0.14813232421875, 0.16832733154296875, 0.1885223388671875, 0.20871734619140625, 0.228912353515625, 0.24910736083984375, 0.2693023681640625, 0.28949737548828125, 0.3096923828125, 0.32988739013671875, 0.3500823974609375, 0.37027740478515625, 0.390472412109375, 0.41066741943359375, 0.4308624267578125, 0.45105743408203125, 0.47125244140625, 0.49144744873046875, 0.5116424560546875, 0.5318374633789062, 0.552032470703125, 0.5722274780273438, 0.5924224853515625, 0.6126174926757812, 0.6328125]}, "gradients/decoder.transformer.h.10.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0, 3.0, 1.0, 0.0, 1.0, 2.0, 3.0, 3.0, 12.0, 5.0, 4.0, 13.0, 14.0, 13.0, 18.0, 20.0, 22.0, 23.0, 27.0, 27.0, 30.0, 35.0, 39.0, 47.0, 32.0, 34.0, 29.0, 46.0, 1061.0, 54.0, 27.0, 46.0, 33.0, 34.0, 31.0, 24.0, 35.0, 29.0, 22.0, 18.0, 21.0, 14.0, 18.0, 15.0, 9.0, 9.0, 6.0, 6.0, 5.0, 9.0, 3.0, 3.0, 1.0, 3.0, 0.0, 3.0, 0.0, 1.0], "bins": [-3.69921875, -3.590606689453125, -3.48199462890625, -3.373382568359375, -3.2647705078125, -3.156158447265625, -3.04754638671875, -2.938934326171875, -2.830322265625, -2.721710205078125, -2.61309814453125, -2.504486083984375, -2.3958740234375, -2.287261962890625, -2.17864990234375, -2.070037841796875, -1.96142578125, -1.852813720703125, -1.74420166015625, -1.635589599609375, -1.5269775390625, -1.418365478515625, -1.30975341796875, -1.201141357421875, -1.092529296875, -0.983917236328125, -0.87530517578125, -0.766693115234375, -0.6580810546875, -0.549468994140625, -0.44085693359375, -0.332244873046875, -0.2236328125, -0.115020751953125, -0.00640869140625, 0.102203369140625, 0.2108154296875, 0.319427490234375, 0.42803955078125, 0.536651611328125, 0.645263671875, 0.753875732421875, 0.86248779296875, 0.971099853515625, 1.0797119140625, 1.188323974609375, 1.29693603515625, 1.405548095703125, 1.51416015625, 1.622772216796875, 1.73138427734375, 1.839996337890625, 1.9486083984375, 2.057220458984375, 2.16583251953125, 2.274444580078125, 2.383056640625, 2.491668701171875, 2.60028076171875, 2.708892822265625, 2.8175048828125, 2.926116943359375, 3.03472900390625, 3.143341064453125, 3.251953125]}, "gradients/decoder.transformer.h.10.crossattention.c_attn.weight": {"_type": "histogram", "values": [3.0, 3.0, 5.0, 6.0, 8.0, 11.0, 7.0, 18.0, 21.0, 36.0, 64.0, 95.0, 118.0, 180.0, 257.0, 389.0, 554.0, 933.0, 1352.0, 2076.0, 3187.0, 5107.0, 8116.0, 12542.0, 20106.0, 31332.0, 48200.0, 72733.0, 104638.0, 137556.0, 1199167.0, 137082.0, 104820.0, 72300.0, 48024.0, 31155.0, 19636.0, 12524.0, 8002.0, 5199.0, 3270.0, 2145.0, 1420.0, 874.0, 629.0, 416.0, 266.0, 189.0, 110.0, 80.0, 51.0, 37.0, 25.0, 24.0, 17.0, 13.0, 7.0, 4.0, 4.0, 3.0, 2.0, 1.0, 1.0, 2.0], "bins": [-0.40234375, -0.3891487121582031, -0.37595367431640625, -0.3627586364746094, -0.3495635986328125, -0.3363685607910156, -0.32317352294921875, -0.3099784851074219, -0.296783447265625, -0.2835884094238281, -0.27039337158203125, -0.2571983337402344, -0.2440032958984375, -0.23080825805664062, -0.21761322021484375, -0.20441818237304688, -0.19122314453125, -0.17802810668945312, -0.16483306884765625, -0.15163803100585938, -0.1384429931640625, -0.12524795532226562, -0.11205291748046875, -0.09885787963867188, -0.085662841796875, -0.07246780395507812, -0.05927276611328125, -0.046077728271484375, -0.0328826904296875, -0.019687652587890625, -0.00649261474609375, 0.006702423095703125, 0.0198974609375, 0.033092498779296875, 0.04628753662109375, 0.059482574462890625, 0.0726776123046875, 0.08587265014648438, 0.09906768798828125, 0.11226272583007812, 0.125457763671875, 0.13865280151367188, 0.15184783935546875, 0.16504287719726562, 0.1782379150390625, 0.19143295288085938, 0.20462799072265625, 0.21782302856445312, 0.23101806640625, 0.24421310424804688, 0.25740814208984375, 0.2706031799316406, 0.2837982177734375, 0.2969932556152344, 0.31018829345703125, 0.3233833312988281, 0.336578369140625, 0.3497734069824219, 0.36296844482421875, 0.3761634826660156, 0.3893585205078125, 0.4025535583496094, 0.41574859619140625, 0.4289436340332031, 0.442138671875]}, "gradients/decoder.transformer.h.10.crossattention.q_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0, 2.0, 3.0, 2.0, 3.0, 4.0, 4.0, 3.0, 5.0, 9.0, 8.0, 8.0, 15.0, 20.0, 26.0, 23.0, 42.0, 41.0, 52.0, 75.0, 71.0, 81.0, 81.0, 90.0, 73.0, 42.0, 48.0, 40.0, 30.0, 14.0, 17.0, 11.0, 14.0, 8.0, 7.0, 9.0, 5.0, 6.0, 4.0, 6.0, 4.0, 3.0, 0.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0], "bins": [-0.0121612548828125, -0.011792302131652832, -0.011423349380493164, -0.011054396629333496, -0.010685443878173828, -0.01031649112701416, -0.009947538375854492, -0.009578585624694824, -0.009209632873535156, -0.008840680122375488, -0.00847172737121582, -0.008102774620056152, -0.007733821868896484, -0.007364869117736816, -0.0069959163665771484, -0.0066269636154174805, -0.0062580108642578125, -0.0058890581130981445, -0.0055201053619384766, -0.005151152610778809, -0.004782199859619141, -0.004413247108459473, -0.004044294357299805, -0.0036753416061401367, -0.0033063888549804688, -0.0029374361038208008, -0.002568483352661133, -0.002199530601501465, -0.0018305778503417969, -0.001461625099182129, -0.001092672348022461, -0.000723719596862793, -0.000354766845703125, 1.4185905456542969e-05, 0.00038313865661621094, 0.0007520914077758789, 0.0011210441589355469, 0.0014899969100952148, 0.0018589496612548828, 0.0022279024124145508, 0.0025968551635742188, 0.0029658079147338867, 0.0033347606658935547, 0.0037037134170532227, 0.004072666168212891, 0.004441618919372559, 0.0048105716705322266, 0.0051795244216918945, 0.0055484771728515625, 0.0059174299240112305, 0.0062863826751708984, 0.006655335426330566, 0.007024288177490234, 0.007393240928649902, 0.00776219367980957, 0.008131146430969238, 0.008500099182128906, 0.008869051933288574, 0.009238004684448242, 0.00960695743560791, 0.009975910186767578, 0.010344862937927246, 0.010713815689086914, 0.011082768440246582, 0.01145172119140625]}, "gradients/decoder.transformer.h.10.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 3.0, 6.0, 7.0, 5.0, 6.0, 7.0, 17.0, 18.0, 13.0, 24.0, 31.0, 37.0, 66.0, 74.0, 125.0, 225.0, 539.0, 16670.0, 1025309.0, 4354.0, 436.0, 188.0, 113.0, 72.0, 64.0, 41.0, 29.0, 20.0, 12.0, 11.0, 9.0, 6.0, 5.0, 4.0, 7.0, 2.0, 2.0, 4.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.24658203125, -0.2387237548828125, -0.230865478515625, -0.2230072021484375, -0.21514892578125, -0.2072906494140625, -0.199432373046875, -0.1915740966796875, -0.1837158203125, -0.1758575439453125, -0.167999267578125, -0.1601409912109375, -0.15228271484375, -0.1444244384765625, -0.136566162109375, -0.1287078857421875, -0.120849609375, -0.1129913330078125, -0.105133056640625, -0.0972747802734375, -0.08941650390625, -0.0815582275390625, -0.073699951171875, -0.0658416748046875, -0.0579833984375, -0.0501251220703125, -0.042266845703125, -0.0344085693359375, -0.02655029296875, -0.0186920166015625, -0.010833740234375, -0.0029754638671875, 0.0048828125, 0.0127410888671875, 0.020599365234375, 0.0284576416015625, 0.03631591796875, 0.0441741943359375, 0.052032470703125, 0.0598907470703125, 0.0677490234375, 0.0756072998046875, 0.083465576171875, 0.0913238525390625, 0.09918212890625, 0.1070404052734375, 0.114898681640625, 0.1227569580078125, 0.130615234375, 0.1384735107421875, 0.146331787109375, 0.1541900634765625, 0.16204833984375, 0.1699066162109375, 0.177764892578125, 0.1856231689453125, 0.1934814453125, 0.2013397216796875, 0.209197998046875, 0.2170562744140625, 0.22491455078125, 0.2327728271484375, 0.240631103515625, 0.2484893798828125, 0.25634765625]}, "gradients/decoder.transformer.h.10.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 8.0, 74.0, 904.0, 32.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.11567068845033646, -0.11130581051111221, -0.10694093257188797, -0.10257605463266373, -0.09821117669343948, -0.09384629875421524, -0.0894814133644104, -0.08511653542518616, -0.08075165748596191, -0.07638677954673767, -0.07202190160751343, -0.06765702366828918, -0.06329214572906494, -0.0589272677898407, -0.05456238612532616, -0.05019750818610191, -0.04583263397216797, -0.041467756032943726, -0.03710287809371948, -0.03273800015449524, -0.028373120352625847, -0.024008242413401604, -0.01964336261153221, -0.015278484672307968, -0.010913606733083725, -0.0065487283281981945, -0.002183849923312664, 0.0021810289472341537, 0.006545906886458397, 0.01091078482568264, 0.015275664627552032, 0.019640542566776276, 0.024005427956581116, 0.02837030589580536, 0.0327351838350296, 0.037100061774253845, 0.04146493971347809, 0.04582981765270233, 0.05019469931721687, 0.054559577256441116, 0.05892445519566536, 0.0632893368601799, 0.06765421479940414, 0.07201909273862839, 0.07638397067785263, 0.08074884861707687, 0.08511372655630112, 0.08947860449552536, 0.0938434824347496, 0.09820836037397385, 0.10257323831319809, 0.10693811625242233, 0.11130299419164658, 0.11566787213087082, 0.12003275752067566, 0.1243976354598999, 0.12876251339912415, 0.1331273913383484, 0.13749226927757263, 0.14185714721679688, 0.14622202515602112, 0.15058690309524536, 0.1549517810344696, 0.15931665897369385, 0.1636815369129181]}, "gradients/decoder.transformer.h.10.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 4.0, 1.0, 2.0, 2.0, 1.0, 6.0, 8.0, 5.0, 1.0, 10.0, 11.0, 14.0, 19.0, 20.0, 14.0, 28.0, 22.0, 35.0, 32.0, 40.0, 38.0, 33.0, 32.0, 39.0, 39.0, 42.0, 51.0, 43.0, 32.0, 43.0, 45.0, 30.0, 29.0, 27.0, 35.0, 30.0, 28.0, 24.0, 15.0, 20.0, 14.0, 8.0, 9.0, 8.0, 8.0, 6.0, 4.0, 4.0, 3.0, 2.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 2.0], "bins": [-0.01267462968826294, -0.01226948481053114, -0.01186433993279934, -0.01145919505506754, -0.01105405017733574, -0.010648905299603939, -0.010243760421872139, -0.009838615544140339, -0.009433470666408539, -0.009028325788676739, -0.008623180910944939, -0.008218036033213139, -0.007812891155481339, -0.007407746277749538, -0.007002601400017738, -0.006597456522285938, -0.006192311644554138, -0.005787166766822338, -0.005382021889090538, -0.004976877011358738, -0.004571732133626938, -0.004166587255895138, -0.0037614423781633377, -0.0033562975004315376, -0.0029511526226997375, -0.0025460077449679375, -0.0021408628672361374, -0.0017357179895043373, -0.0013305731117725372, -0.0009254282340407372, -0.0005202833563089371, -0.000115138478577137, 0.0002900063991546631, 0.0006951512768864632, 0.0011002961546182632, 0.0015054410323500633, 0.0019105859100818634, 0.0023157307878136635, 0.0027208756655454636, 0.0031260205432772636, 0.0035311654210090637, 0.003936310298740864, 0.004341455176472664, 0.004746600054204464, 0.005151744931936264, 0.005556889809668064, 0.005962034687399864, 0.006367179565131664, 0.006772324442863464, 0.0071774693205952644, 0.0075826141983270645, 0.007987759076058865, 0.008392903953790665, 0.008798048831522465, 0.009203193709254265, 0.009608338586986065, 0.010013483464717865, 0.010418628342449665, 0.010823773220181465, 0.011228918097913265, 0.011634062975645065, 0.012039207853376865, 0.012444352731108665, 0.012849497608840466, 0.013254642486572266]}, "gradients/decoder.transformer.h.10.attn.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 3.0, 0.0, 5.0, 8.0, 6.0, 5.0, 4.0, 7.0, 7.0, 16.0, 10.0, 17.0, 19.0, 23.0, 22.0, 20.0, 33.0, 43.0, 38.0, 45.0, 35.0, 42.0, 55.0, 41.0, 44.0, 38.0, 40.0, 38.0, 33.0, 33.0, 35.0, 42.0, 23.0, 33.0, 28.0, 25.0, 12.0, 20.0, 17.0, 13.0, 7.0, 8.0, 6.0, 6.0, 2.0, 3.0, 1.0, 1.0, 1.0, 2.0, 2.0, 0.0, 1.0, 0.0, 1.0], "bins": [-5.78515625, -5.611328125, -5.4375, -5.263671875, -5.08984375, -4.916015625, -4.7421875, -4.568359375, -4.39453125, -4.220703125, -4.046875, -3.873046875, -3.69921875, -3.525390625, -3.3515625, -3.177734375, -3.00390625, -2.830078125, -2.65625, -2.482421875, -2.30859375, -2.134765625, -1.9609375, -1.787109375, -1.61328125, -1.439453125, -1.265625, -1.091796875, -0.91796875, -0.744140625, -0.5703125, -0.396484375, -0.22265625, -0.048828125, 0.125, 0.298828125, 0.47265625, 0.646484375, 0.8203125, 0.994140625, 1.16796875, 1.341796875, 1.515625, 1.689453125, 1.86328125, 2.037109375, 2.2109375, 2.384765625, 2.55859375, 2.732421875, 2.90625, 3.080078125, 3.25390625, 3.427734375, 3.6015625, 3.775390625, 3.94921875, 4.123046875, 4.296875, 4.470703125, 4.64453125, 4.818359375, 4.9921875, 5.166015625, 5.33984375]}, "gradients/decoder.transformer.h.10.attn.c_proj.weight": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 3.0, 3.0, 4.0, 8.0, 5.0, 13.0, 8.0, 9.0, 19.0, 25.0, 21.0, 42.0, 62.0, 79.0, 112.0, 189.0, 314.0, 534.0, 991.0, 1870.0, 3768.0, 7414.0, 14865.0, 29664.0, 62644.0, 132314.0, 278981.0, 268093.0, 128014.0, 60310.0, 28742.0, 14302.0, 7177.0, 3631.0, 1897.0, 949.0, 570.0, 372.0, 174.0, 111.0, 64.0, 41.0, 48.0, 22.0, 21.0, 27.0, 7.0, 7.0, 10.0, 6.0, 2.0, 1.0, 5.0, 3.0, 1.0, 2.0, 1.0], "bins": [-5.9375, -5.76275634765625, -5.5880126953125, -5.41326904296875, -5.238525390625, -5.06378173828125, -4.8890380859375, -4.71429443359375, -4.53955078125, -4.36480712890625, -4.1900634765625, -4.01531982421875, -3.840576171875, -3.66583251953125, -3.4910888671875, -3.31634521484375, -3.1416015625, -2.96685791015625, -2.7921142578125, -2.61737060546875, -2.442626953125, -2.26788330078125, -2.0931396484375, -1.91839599609375, -1.74365234375, -1.56890869140625, -1.3941650390625, -1.21942138671875, -1.044677734375, -0.86993408203125, -0.6951904296875, -0.52044677734375, -0.345703125, -0.17095947265625, 0.0037841796875, 0.17852783203125, 0.353271484375, 0.52801513671875, 0.7027587890625, 0.87750244140625, 1.05224609375, 1.22698974609375, 1.4017333984375, 1.57647705078125, 1.751220703125, 1.92596435546875, 2.1007080078125, 2.27545166015625, 2.4501953125, 2.62493896484375, 2.7996826171875, 2.97442626953125, 3.149169921875, 3.32391357421875, 3.4986572265625, 3.67340087890625, 3.84814453125, 4.02288818359375, 4.1976318359375, 4.37237548828125, 4.547119140625, 4.72186279296875, 4.8966064453125, 5.07135009765625, 5.24609375]}, "gradients/decoder.transformer.h.10.attn.c_attn.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 4.0, 1.0, 2.0, 2.0, 0.0, 2.0, 2.0, 6.0, 4.0, 3.0, 5.0, 16.0, 7.0, 10.0, 7.0, 16.0, 14.0, 20.0, 13.0, 20.0, 24.0, 37.0, 31.0, 42.0, 33.0, 61.0, 58.0, 84.0, 146.0, 244.0, 1377.0, 207.0, 107.0, 68.0, 38.0, 40.0, 33.0, 37.0, 32.0, 25.0, 28.0, 19.0, 20.0, 19.0, 19.0, 19.0, 14.0, 11.0, 5.0, 10.0, 2.0, 5.0, 2.0, 5.0, 6.0, 2.0, 0.0, 1.0, 2.0, 0.0, 2.0], "bins": [-15.140625, -14.6846923828125, -14.228759765625, -13.7728271484375, -13.31689453125, -12.8609619140625, -12.405029296875, -11.9490966796875, -11.4931640625, -11.0372314453125, -10.581298828125, -10.1253662109375, -9.66943359375, -9.2135009765625, -8.757568359375, -8.3016357421875, -7.845703125, -7.3897705078125, -6.933837890625, -6.4779052734375, -6.02197265625, -5.5660400390625, -5.110107421875, -4.6541748046875, -4.1982421875, -3.7423095703125, -3.286376953125, -2.8304443359375, -2.37451171875, -1.9185791015625, -1.462646484375, -1.0067138671875, -0.55078125, -0.0948486328125, 0.361083984375, 0.8170166015625, 1.27294921875, 1.7288818359375, 2.184814453125, 2.6407470703125, 3.0966796875, 3.5526123046875, 4.008544921875, 4.4644775390625, 4.92041015625, 5.3763427734375, 5.832275390625, 6.2882080078125, 6.744140625, 7.2000732421875, 7.656005859375, 8.1119384765625, 8.56787109375, 9.0238037109375, 9.479736328125, 9.9356689453125, 10.3916015625, 10.8475341796875, 11.303466796875, 11.7593994140625, 12.21533203125, 12.6712646484375, 13.127197265625, 13.5831298828125, 14.0390625]}, "gradients/decoder.transformer.h.10.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 1.0, 2.0, 0.0, 4.0, 1.0, 5.0, 4.0, 6.0, 7.0, 5.0, 10.0, 7.0, 11.0, 26.0, 23.0, 28.0, 47.0, 61.0, 95.0, 149.0, 227.0, 368.0, 778.0, 2877.0, 524046.0, 2610162.0, 4671.0, 929.0, 435.0, 239.0, 130.0, 98.0, 64.0, 50.0, 35.0, 29.0, 12.0, 22.0, 9.0, 12.0, 7.0, 7.0, 6.0, 5.0, 3.0, 2.0, 3.0, 0.0, 4.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-56.21875, -54.28662109375, -52.3544921875, -50.42236328125, -48.490234375, -46.55810546875, -44.6259765625, -42.69384765625, -40.76171875, -38.82958984375, -36.8974609375, -34.96533203125, -33.033203125, -31.10107421875, -29.1689453125, -27.23681640625, -25.3046875, -23.37255859375, -21.4404296875, -19.50830078125, -17.576171875, -15.64404296875, -13.7119140625, -11.77978515625, -9.84765625, -7.91552734375, -5.9833984375, -4.05126953125, -2.119140625, -0.18701171875, 1.7451171875, 3.67724609375, 5.609375, 7.54150390625, 9.4736328125, 11.40576171875, 13.337890625, 15.27001953125, 17.2021484375, 19.13427734375, 21.06640625, 22.99853515625, 24.9306640625, 26.86279296875, 28.794921875, 30.72705078125, 32.6591796875, 34.59130859375, 36.5234375, 38.45556640625, 40.3876953125, 42.31982421875, 44.251953125, 46.18408203125, 48.1162109375, 50.04833984375, 51.98046875, 53.91259765625, 55.8447265625, 57.77685546875, 59.708984375, 61.64111328125, 63.5732421875, 65.50537109375, 67.4375]}, "gradients/decoder.transformer.h.10.ln_1.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 2.0, 4.0, 19.0, 144.0, 401.0, 337.0, 97.0, 13.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-29.084705352783203, -25.799070358276367, -22.51343536376953, -19.227800369262695, -15.94216537475586, -12.656530380249023, -9.370895385742188, -6.085260391235352, -2.7996253967285156, 0.4860095977783203, 3.7716445922851562, 7.057279586791992, 10.342914581298828, 13.628549575805664, 16.9141845703125, 20.199819564819336, 23.485454559326172, 26.771089553833008, 30.056724548339844, 33.34236145019531, 36.627994537353516, 39.91362762451172, 43.19926452636719, 46.484901428222656, 49.77053451538086, 53.05616760253906, 56.34180450439453, 59.62744140625, 62.9130744934082, 66.1987075805664, 69.48434448242188, 72.76998138427734, 76.05561828613281, 79.34125518798828, 82.62689208984375, 85.91252136230469, 89.19815826416016, 92.48379516601562, 95.76942443847656, 99.05506134033203, 102.3406982421875, 105.62633514404297, 108.91197204589844, 112.19760131835938, 115.48323822021484, 118.76887512207031, 122.05450439453125, 125.34014129638672, 128.6257781982422, 131.91140747070312, 135.19705200195312, 138.48268127441406, 141.768310546875, 145.053955078125, 148.33958435058594, 151.62522888183594, 154.91085815429688, 158.1964874267578, 161.4821319580078, 164.76776123046875, 168.05340576171875, 171.3390350341797, 174.62466430664062, 177.91030883789062, 181.19593811035156]}, "gradients/decoder.transformer.h.10.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 3.0, 0.0, 4.0, 3.0, 3.0, 4.0, 9.0, 12.0, 22.0, 16.0, 27.0, 22.0, 29.0, 42.0, 28.0, 48.0, 42.0, 45.0, 34.0, 64.0, 56.0, 57.0, 40.0, 46.0, 40.0, 49.0, 44.0, 35.0, 39.0, 20.0, 27.0, 22.0, 18.0, 16.0, 18.0, 4.0, 8.0, 3.0, 5.0, 6.0, 4.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-61.01214599609375, -59.292633056640625, -57.5731201171875, -55.853607177734375, -54.13409423828125, -52.414581298828125, -50.695064544677734, -48.97555160522461, -47.256038665771484, -45.53652572631836, -43.817012786865234, -42.09749984741211, -40.37798309326172, -38.658470153808594, -36.93895721435547, -35.219444274902344, -33.49993133544922, -31.780418395996094, -30.06090545654297, -28.34139060974121, -26.621877670288086, -24.90236473083496, -23.182849884033203, -21.463336944580078, -19.743824005126953, -18.024311065673828, -16.304798126220703, -14.585283279418945, -12.86577033996582, -11.146257400512695, -9.426743507385254, -7.7072296142578125, -5.9877166748046875, -4.268203258514404, -2.548689842224121, -0.8291764259338379, 0.8903369903564453, 2.6098499298095703, 4.329363822937012, 6.048877716064453, 7.768390655517578, 9.487903594970703, 11.207417488098145, 12.926931381225586, 14.646444320678711, 16.365957260131836, 18.085472106933594, 19.80498504638672, 21.524497985839844, 23.24401092529297, 24.963523864746094, 26.68303871154785, 28.402551651000977, 30.1220645904541, 31.84157943725586, 33.561092376708984, 35.28060531616211, 37.000118255615234, 38.71963119506836, 40.439144134521484, 42.158660888671875, 43.878173828125, 45.597686767578125, 47.31719970703125, 49.036712646484375]}, "gradients/decoder.transformer.h.9.mlp.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 4.0, 0.0, 0.0, 2.0, 1.0, 2.0, 5.0, 5.0, 6.0, 9.0, 7.0, 11.0, 7.0, 3.0, 19.0, 17.0, 26.0, 19.0, 23.0, 25.0, 26.0, 29.0, 39.0, 36.0, 39.0, 36.0, 54.0, 43.0, 29.0, 45.0, 32.0, 42.0, 38.0, 38.0, 35.0, 37.0, 28.0, 26.0, 28.0, 20.0, 22.0, 18.0, 19.0, 17.0, 12.0, 6.0, 7.0, 6.0, 4.0, 6.0, 3.0, 5.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 2.0], "bins": [-6.01171875, -5.8330078125, -5.654296875, -5.4755859375, -5.296875, -5.1181640625, -4.939453125, -4.7607421875, -4.58203125, -4.4033203125, -4.224609375, -4.0458984375, -3.8671875, -3.6884765625, -3.509765625, -3.3310546875, -3.15234375, -2.9736328125, -2.794921875, -2.6162109375, -2.4375, -2.2587890625, -2.080078125, -1.9013671875, -1.72265625, -1.5439453125, -1.365234375, -1.1865234375, -1.0078125, -0.8291015625, -0.650390625, -0.4716796875, -0.29296875, -0.1142578125, 0.064453125, 0.2431640625, 0.421875, 0.6005859375, 0.779296875, 0.9580078125, 1.13671875, 1.3154296875, 1.494140625, 1.6728515625, 1.8515625, 2.0302734375, 2.208984375, 2.3876953125, 2.56640625, 2.7451171875, 2.923828125, 3.1025390625, 3.28125, 3.4599609375, 3.638671875, 3.8173828125, 3.99609375, 4.1748046875, 4.353515625, 4.5322265625, 4.7109375, 4.8896484375, 5.068359375, 5.2470703125, 5.42578125]}, "gradients/decoder.transformer.h.9.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 4.0, 3.0, 4.0, 1.0, 8.0, 6.0, 8.0, 5.0, 9.0, 13.0, 27.0, 21.0, 37.0, 30.0, 43.0, 70.0, 110.0, 152.0, 322.0, 834.0, 3219.0, 16967.0, 140964.0, 1325218.0, 2305168.0, 355547.0, 37459.0, 5638.0, 1352.0, 447.0, 196.0, 96.0, 74.0, 56.0, 29.0, 35.0, 26.0, 25.0, 11.0, 9.0, 10.0, 8.0, 7.0, 5.0, 5.0, 5.0, 5.0, 4.0, 2.0, 0.0, 1.0, 1.0, 1.0, 1.0], "bins": [-15.015625, -14.5765380859375, -14.137451171875, -13.6983642578125, -13.25927734375, -12.8201904296875, -12.381103515625, -11.9420166015625, -11.5029296875, -11.0638427734375, -10.624755859375, -10.1856689453125, -9.74658203125, -9.3074951171875, -8.868408203125, -8.4293212890625, -7.990234375, -7.5511474609375, -7.112060546875, -6.6729736328125, -6.23388671875, -5.7947998046875, -5.355712890625, -4.9166259765625, -4.4775390625, -4.0384521484375, -3.599365234375, -3.1602783203125, -2.72119140625, -2.2821044921875, -1.843017578125, -1.4039306640625, -0.96484375, -0.5257568359375, -0.086669921875, 0.3524169921875, 0.79150390625, 1.2305908203125, 1.669677734375, 2.1087646484375, 2.5478515625, 2.9869384765625, 3.426025390625, 3.8651123046875, 4.30419921875, 4.7432861328125, 5.182373046875, 5.6214599609375, 6.060546875, 6.4996337890625, 6.938720703125, 7.3778076171875, 7.81689453125, 8.2559814453125, 8.695068359375, 9.1341552734375, 9.5732421875, 10.0123291015625, 10.451416015625, 10.8905029296875, 11.32958984375, 11.7686767578125, 12.207763671875, 12.6468505859375, 13.0859375]}, "gradients/decoder.transformer.h.9.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 1.0, 0.0, 2.0, 2.0, 6.0, 5.0, 5.0, 6.0, 8.0, 17.0, 12.0, 21.0, 29.0, 40.0, 67.0, 72.0, 91.0, 159.0, 190.0, 305.0, 339.0, 435.0, 422.0, 416.0, 318.0, 257.0, 215.0, 173.0, 135.0, 67.0, 58.0, 55.0, 32.0, 35.0, 25.0, 17.0, 9.0, 18.0, 9.0, 2.0, 4.0, 2.0, 1.0, 0.0, 1.0, 2.0, 1.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-10.546875, -10.212646484375, -9.87841796875, -9.544189453125, -9.2099609375, -8.875732421875, -8.54150390625, -8.207275390625, -7.873046875, -7.538818359375, -7.20458984375, -6.870361328125, -6.5361328125, -6.201904296875, -5.86767578125, -5.533447265625, -5.19921875, -4.864990234375, -4.53076171875, -4.196533203125, -3.8623046875, -3.528076171875, -3.19384765625, -2.859619140625, -2.525390625, -2.191162109375, -1.85693359375, -1.522705078125, -1.1884765625, -0.854248046875, -0.52001953125, -0.185791015625, 0.1484375, 0.482666015625, 0.81689453125, 1.151123046875, 1.4853515625, 1.819580078125, 2.15380859375, 2.488037109375, 2.822265625, 3.156494140625, 3.49072265625, 3.824951171875, 4.1591796875, 4.493408203125, 4.82763671875, 5.161865234375, 5.49609375, 5.830322265625, 6.16455078125, 6.498779296875, 6.8330078125, 7.167236328125, 7.50146484375, 7.835693359375, 8.169921875, 8.504150390625, 8.83837890625, 9.172607421875, 9.5068359375, 9.841064453125, 10.17529296875, 10.509521484375, 10.84375]}, "gradients/decoder.transformer.h.9.mlp.c_fc.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 4.0, 3.0, 9.0, 18.0, 11.0, 9.0, 24.0, 29.0, 39.0, 53.0, 61.0, 79.0, 96.0, 156.0, 262.0, 352.0, 798.0, 11382.0, 2610473.0, 1561130.0, 7444.0, 760.0, 378.0, 213.0, 138.0, 89.0, 85.0, 49.0, 33.0, 20.0, 25.0, 16.0, 15.0, 8.0, 9.0, 1.0, 3.0, 6.0, 3.0, 2.0, 1.0, 1.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-54.34375, -52.6943359375, -51.044921875, -49.3955078125, -47.74609375, -46.0966796875, -44.447265625, -42.7978515625, -41.1484375, -39.4990234375, -37.849609375, -36.2001953125, -34.55078125, -32.9013671875, -31.251953125, -29.6025390625, -27.953125, -26.3037109375, -24.654296875, -23.0048828125, -21.35546875, -19.7060546875, -18.056640625, -16.4072265625, -14.7578125, -13.1083984375, -11.458984375, -9.8095703125, -8.16015625, -6.5107421875, -4.861328125, -3.2119140625, -1.5625, 0.0869140625, 1.736328125, 3.3857421875, 5.03515625, 6.6845703125, 8.333984375, 9.9833984375, 11.6328125, 13.2822265625, 14.931640625, 16.5810546875, 18.23046875, 19.8798828125, 21.529296875, 23.1787109375, 24.828125, 26.4775390625, 28.126953125, 29.7763671875, 31.42578125, 33.0751953125, 34.724609375, 36.3740234375, 38.0234375, 39.6728515625, 41.322265625, 42.9716796875, 44.62109375, 46.2705078125, 47.919921875, 49.5693359375, 51.21875]}, "gradients/decoder.transformer.h.9.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 5.0, 8.0, 14.0, 28.0, 50.0, 89.0, 109.0, 162.0, 152.0, 148.0, 104.0, 85.0, 27.0, 25.0, 5.0, 3.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-83.05976104736328, -80.81766510009766, -78.57556915283203, -76.3334732055664, -74.09137725830078, -71.84928131103516, -69.60718536376953, -67.3650894165039, -65.12299346923828, -62.880897521972656, -60.63880157470703, -58.396705627441406, -56.15460968017578, -53.912513732910156, -51.67041778564453, -49.428321838378906, -47.18622970581055, -44.94413375854492, -42.7020378112793, -40.45994186401367, -38.21784591674805, -35.97574996948242, -33.73365783691406, -31.491559982299805, -29.24946403503418, -27.007368087768555, -24.76527214050293, -22.523178100585938, -20.281082153320312, -18.038986206054688, -15.796890258789062, -13.554794311523438, -11.312698364257812, -9.070602416992188, -6.828506946563721, -4.586411476135254, -2.344315528869629, -0.1022195816040039, 2.1398754119873047, 4.38197135925293, 6.624067306518555, 8.86616325378418, 11.108259201049805, 13.350354194641113, 15.592450141906738, 17.834545135498047, 20.076641082763672, 22.318737030029297, 24.560832977294922, 26.802928924560547, 29.045024871826172, 31.287120819091797, 33.52921676635742, 35.77131271362305, 38.013404846191406, 40.25550079345703, 42.497596740722656, 44.73969268798828, 46.981788635253906, 49.22388458251953, 51.465980529785156, 53.70807647705078, 55.950172424316406, 58.19226837158203, 60.434364318847656]}, "gradients/decoder.transformer.h.9.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 3.0, 4.0, 1.0, 5.0, 7.0, 7.0, 6.0, 15.0, 14.0, 17.0, 14.0, 17.0, 23.0, 23.0, 24.0, 20.0, 30.0, 27.0, 35.0, 37.0, 38.0, 35.0, 39.0, 44.0, 42.0, 52.0, 41.0, 36.0, 26.0, 33.0, 40.0, 36.0, 31.0, 30.0, 28.0, 25.0, 11.0, 19.0, 12.0, 10.0, 14.0, 6.0, 12.0, 7.0, 6.0, 5.0, 5.0, 2.0, 0.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-35.778419494628906, -34.61531066894531, -33.45220184326172, -32.28909683227539, -31.125988006591797, -29.962879180908203, -28.799772262573242, -27.63666534423828, -26.473556518554688, -25.310447692871094, -24.147340774536133, -22.984233856201172, -21.821125030517578, -20.658016204833984, -19.494909286499023, -18.331802368164062, -17.16869354248047, -16.005584716796875, -14.842477798461914, -13.679369926452637, -12.51626205444336, -11.353154182434082, -10.190046310424805, -9.026938438415527, -7.86383056640625, -6.700722694396973, -5.537614822387695, -4.374506950378418, -3.2113990783691406, -2.0482912063598633, -0.8851833343505859, 0.2779245376586914, 1.4410362243652344, 2.6041440963745117, 3.767251968383789, 4.930359840393066, 6.093467712402344, 7.256575584411621, 8.419683456420898, 9.582791328430176, 10.745899200439453, 11.90900707244873, 13.072114944458008, 14.235222816467285, 15.398330688476562, 16.561439514160156, 17.724546432495117, 18.887653350830078, 20.050762176513672, 21.213871002197266, 22.376977920532227, 23.540084838867188, 24.70319366455078, 25.866302490234375, 27.029409408569336, 28.192516326904297, 29.35562515258789, 30.518733978271484, 31.681840896606445, 32.844947814941406, 34.008056640625, 35.171165466308594, 36.33427429199219, 37.497379302978516, 38.66048812866211]}, "gradients/decoder.transformer.h.9.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 2.0, 2.0, 1.0, 5.0, 1.0, 5.0, 6.0, 13.0, 8.0, 8.0, 17.0, 15.0, 17.0, 18.0, 28.0, 24.0, 31.0, 27.0, 35.0, 32.0, 30.0, 37.0, 42.0, 42.0, 43.0, 44.0, 47.0, 44.0, 43.0, 51.0, 30.0, 35.0, 31.0, 34.0, 29.0, 16.0, 20.0, 16.0, 16.0, 12.0, 20.0, 8.0, 8.0, 9.0, 6.0, 0.0, 3.0, 1.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0], "bins": [-6.0, -5.81842041015625, -5.6368408203125, -5.45526123046875, -5.273681640625, -5.09210205078125, -4.9105224609375, -4.72894287109375, -4.54736328125, -4.36578369140625, -4.1842041015625, -4.00262451171875, -3.821044921875, -3.63946533203125, -3.4578857421875, -3.27630615234375, -3.0947265625, -2.91314697265625, -2.7315673828125, -2.54998779296875, -2.368408203125, -2.18682861328125, -2.0052490234375, -1.82366943359375, -1.64208984375, -1.46051025390625, -1.2789306640625, -1.09735107421875, -0.915771484375, -0.73419189453125, -0.5526123046875, -0.37103271484375, -0.189453125, -0.00787353515625, 0.1737060546875, 0.35528564453125, 0.536865234375, 0.71844482421875, 0.9000244140625, 1.08160400390625, 1.26318359375, 1.44476318359375, 1.6263427734375, 1.80792236328125, 1.989501953125, 2.17108154296875, 2.3526611328125, 2.53424072265625, 2.7158203125, 2.89739990234375, 3.0789794921875, 3.26055908203125, 3.442138671875, 3.62371826171875, 3.8052978515625, 3.98687744140625, 4.16845703125, 4.35003662109375, 4.5316162109375, 4.71319580078125, 4.894775390625, 5.07635498046875, 5.2579345703125, 5.43951416015625, 5.62109375]}, "gradients/decoder.transformer.h.9.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 1.0, 1.0, 3.0, 6.0, 5.0, 13.0, 3.0, 17.0, 24.0, 41.0, 54.0, 78.0, 101.0, 179.0, 239.0, 369.0, 622.0, 920.0, 1453.0, 2311.0, 3548.0, 5636.0, 9011.0, 14617.0, 24047.0, 38928.0, 62749.0, 99223.0, 145618.0, 176494.0, 157548.0, 112791.0, 73523.0, 45427.0, 27396.0, 16970.0, 10615.0, 6506.0, 4150.0, 2586.0, 1652.0, 1100.0, 688.0, 440.0, 285.0, 201.0, 118.0, 88.0, 46.0, 43.0, 27.0, 22.0, 12.0, 9.0, 5.0, 6.0, 2.0, 0.0, 3.0, 1.0, 2.0], "bins": [-0.84033203125, -0.8145980834960938, -0.7888641357421875, -0.7631301879882812, -0.737396240234375, -0.7116622924804688, -0.6859283447265625, -0.6601943969726562, -0.63446044921875, -0.6087265014648438, -0.5829925537109375, -0.5572586059570312, -0.531524658203125, -0.5057907104492188, -0.4800567626953125, -0.45432281494140625, -0.4285888671875, -0.40285491943359375, -0.3771209716796875, -0.35138702392578125, -0.325653076171875, -0.29991912841796875, -0.2741851806640625, -0.24845123291015625, -0.22271728515625, -0.19698333740234375, -0.1712493896484375, -0.14551544189453125, -0.119781494140625, -0.09404754638671875, -0.0683135986328125, -0.04257965087890625, -0.016845703125, 0.00888824462890625, 0.0346221923828125, 0.06035614013671875, 0.086090087890625, 0.11182403564453125, 0.1375579833984375, 0.16329193115234375, 0.18902587890625, 0.21475982666015625, 0.2404937744140625, 0.26622772216796875, 0.291961669921875, 0.31769561767578125, 0.3434295654296875, 0.36916351318359375, 0.3948974609375, 0.42063140869140625, 0.4463653564453125, 0.47209930419921875, 0.497833251953125, 0.5235671997070312, 0.5493011474609375, 0.5750350952148438, 0.60076904296875, 0.6265029907226562, 0.6522369384765625, 0.6779708862304688, 0.703704833984375, 0.7294387817382812, 0.7551727294921875, 0.7809066772460938, 0.806640625]}, "gradients/decoder.transformer.h.9.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 3.0, 2.0, 4.0, 2.0, 9.0, 5.0, 4.0, 5.0, 5.0, 17.0, 14.0, 24.0, 19.0, 27.0, 31.0, 38.0, 27.0, 38.0, 34.0, 36.0, 40.0, 57.0, 41.0, 1073.0, 44.0, 41.0, 51.0, 57.0, 39.0, 38.0, 28.0, 36.0, 27.0, 25.0, 19.0, 18.0, 18.0, 12.0, 10.0, 6.0, 4.0, 3.0, 4.0, 3.0, 1.0, 4.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.8046875, -3.6650390625, -3.525390625, -3.3857421875, -3.24609375, -3.1064453125, -2.966796875, -2.8271484375, -2.6875, -2.5478515625, -2.408203125, -2.2685546875, -2.12890625, -1.9892578125, -1.849609375, -1.7099609375, -1.5703125, -1.4306640625, -1.291015625, -1.1513671875, -1.01171875, -0.8720703125, -0.732421875, -0.5927734375, -0.453125, -0.3134765625, -0.173828125, -0.0341796875, 0.10546875, 0.2451171875, 0.384765625, 0.5244140625, 0.6640625, 0.8037109375, 0.943359375, 1.0830078125, 1.22265625, 1.3623046875, 1.501953125, 1.6416015625, 1.78125, 1.9208984375, 2.060546875, 2.2001953125, 2.33984375, 2.4794921875, 2.619140625, 2.7587890625, 2.8984375, 3.0380859375, 3.177734375, 3.3173828125, 3.45703125, 3.5966796875, 3.736328125, 3.8759765625, 4.015625, 4.1552734375, 4.294921875, 4.4345703125, 4.57421875, 4.7138671875, 4.853515625, 4.9931640625, 5.1328125]}, "gradients/decoder.transformer.h.9.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 2.0, 2.0, 0.0, 2.0, 6.0, 10.0, 17.0, 21.0, 34.0, 65.0, 78.0, 152.0, 242.0, 426.0, 758.0, 1320.0, 2253.0, 4140.0, 7411.0, 13145.0, 24487.0, 43953.0, 77236.0, 127077.0, 209526.0, 1214765.0, 150628.0, 95029.0, 55185.0, 30661.0, 16920.0, 9472.0, 5168.0, 2945.0, 1712.0, 975.0, 540.0, 311.0, 192.0, 104.0, 70.0, 40.0, 21.0, 18.0, 9.0, 7.0, 5.0, 3.0, 1.0, 2.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.66650390625, -0.6475677490234375, -0.628631591796875, -0.6096954345703125, -0.59075927734375, -0.5718231201171875, -0.552886962890625, -0.5339508056640625, -0.5150146484375, -0.4960784912109375, -0.477142333984375, -0.4582061767578125, -0.43927001953125, -0.4203338623046875, -0.401397705078125, -0.3824615478515625, -0.363525390625, -0.3445892333984375, -0.325653076171875, -0.3067169189453125, -0.28778076171875, -0.2688446044921875, -0.249908447265625, -0.2309722900390625, -0.2120361328125, -0.1930999755859375, -0.174163818359375, -0.1552276611328125, -0.13629150390625, -0.1173553466796875, -0.098419189453125, -0.0794830322265625, -0.060546875, -0.0416107177734375, -0.022674560546875, -0.0037384033203125, 0.01519775390625, 0.0341339111328125, 0.053070068359375, 0.0720062255859375, 0.0909423828125, 0.1098785400390625, 0.128814697265625, 0.1477508544921875, 0.16668701171875, 0.1856231689453125, 0.204559326171875, 0.2234954833984375, 0.242431640625, 0.2613677978515625, 0.280303955078125, 0.2992401123046875, 0.31817626953125, 0.3371124267578125, 0.356048583984375, 0.3749847412109375, 0.3939208984375, 0.4128570556640625, 0.431793212890625, 0.4507293701171875, 0.46966552734375, 0.4886016845703125, 0.507537841796875, 0.5264739990234375, 0.54541015625]}, "gradients/decoder.transformer.h.9.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0, 1.0, 5.0, 5.0, 8.0, 3.0, 4.0, 9.0, 11.0, 7.0, 7.0, 12.0, 9.0, 20.0, 23.0, 22.0, 39.0, 45.0, 44.0, 53.0, 66.0, 80.0, 127.0, 67.0, 66.0, 42.0, 39.0, 38.0, 24.0, 21.0, 25.0, 13.0, 9.0, 13.0, 11.0, 5.0, 5.0, 5.0, 8.0, 6.0, 7.0, 3.0, 3.0, 1.0, 0.0, 2.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.0256805419921875, -0.024831295013427734, -0.02398204803466797, -0.023132801055908203, -0.022283554077148438, -0.021434307098388672, -0.020585060119628906, -0.01973581314086914, -0.018886566162109375, -0.01803731918334961, -0.017188072204589844, -0.016338825225830078, -0.015489578247070312, -0.014640331268310547, -0.013791084289550781, -0.012941837310791016, -0.01209259033203125, -0.011243343353271484, -0.010394096374511719, -0.009544849395751953, -0.008695602416992188, -0.007846355438232422, -0.006997108459472656, -0.006147861480712891, -0.005298614501953125, -0.004449367523193359, -0.0036001205444335938, -0.002750873565673828, -0.0019016265869140625, -0.0010523796081542969, -0.00020313262939453125, 0.0006461143493652344, 0.001495361328125, 0.0023446083068847656, 0.0031938552856445312, 0.004043102264404297, 0.0048923492431640625, 0.005741596221923828, 0.006590843200683594, 0.007440090179443359, 0.008289337158203125, 0.00913858413696289, 0.009987831115722656, 0.010837078094482422, 0.011686325073242188, 0.012535572052001953, 0.013384819030761719, 0.014234066009521484, 0.01508331298828125, 0.015932559967041016, 0.01678180694580078, 0.017631053924560547, 0.018480300903320312, 0.019329547882080078, 0.020178794860839844, 0.02102804183959961, 0.021877288818359375, 0.02272653579711914, 0.023575782775878906, 0.024425029754638672, 0.025274276733398438, 0.026123523712158203, 0.02697277069091797, 0.027822017669677734, 0.0286712646484375]}, "gradients/decoder.transformer.h.9.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 0.0, 1.0, 2.0, 0.0, 1.0, 3.0, 3.0, 8.0, 9.0, 7.0, 7.0, 4.0, 9.0, 15.0, 12.0, 22.0, 31.0, 32.0, 47.0, 60.0, 86.0, 122.0, 206.0, 473.0, 1750.0, 1014879.0, 29119.0, 761.0, 322.0, 148.0, 110.0, 78.0, 52.0, 49.0, 26.0, 23.0, 22.0, 9.0, 8.0, 12.0, 11.0, 4.0, 4.0, 6.0, 6.0, 6.0, 1.0, 0.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.5859375, -0.5685806274414062, -0.5512237548828125, -0.5338668823242188, -0.516510009765625, -0.49915313720703125, -0.4817962646484375, -0.46443939208984375, -0.44708251953125, -0.42972564697265625, -0.4123687744140625, -0.39501190185546875, -0.377655029296875, -0.36029815673828125, -0.3429412841796875, -0.32558441162109375, -0.3082275390625, -0.29087066650390625, -0.2735137939453125, -0.25615692138671875, -0.238800048828125, -0.22144317626953125, -0.2040863037109375, -0.18672943115234375, -0.16937255859375, -0.15201568603515625, -0.1346588134765625, -0.11730194091796875, -0.099945068359375, -0.08258819580078125, -0.0652313232421875, -0.04787445068359375, -0.030517578125, -0.01316070556640625, 0.0041961669921875, 0.02155303955078125, 0.038909912109375, 0.05626678466796875, 0.0736236572265625, 0.09098052978515625, 0.10833740234375, 0.12569427490234375, 0.1430511474609375, 0.16040802001953125, 0.177764892578125, 0.19512176513671875, 0.2124786376953125, 0.22983551025390625, 0.2471923828125, 0.26454925537109375, 0.2819061279296875, 0.29926300048828125, 0.316619873046875, 0.33397674560546875, 0.3513336181640625, 0.36869049072265625, 0.38604736328125, 0.40340423583984375, 0.4207611083984375, 0.43811798095703125, 0.455474853515625, 0.47283172607421875, 0.4901885986328125, 0.5075454711914062, 0.52490234375]}, "gradients/decoder.transformer.h.9.ln_cross_attn.weight": {"_type": "histogram", "values": [3.0, 21.0, 991.0, 4.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.05284806340932846, -0.03107510134577751, -0.009302139282226562, 0.012470819056034088, 0.034243784844875336, 0.05601675063371658, 0.07778970152139664, 0.09956266731023788, 0.12133563309907913, 0.14310859143733978, 0.16488155722618103, 0.18665450811386108, 0.20842748880386353, 0.23020043969154358, 0.25197339057922363, 0.2737463712692261, 0.29551932215690613, 0.3172922730445862, 0.3390652537345886, 0.3608382046222687, 0.38261115550994873, 0.40438413619995117, 0.4261570870876312, 0.4479300379753113, 0.4697030186653137, 0.4914759695529938, 0.5132489204406738, 0.5350219011306763, 0.5567948818206787, 0.5785678625106812, 0.6003407835960388, 0.6221137642860413, 0.6438866853713989, 0.6656596660614014, 0.687432587146759, 0.7092055678367615, 0.7309785485267639, 0.7527514696121216, 0.774524450302124, 0.7962974309921265, 0.8180704116821289, 0.8398433923721313, 0.861616313457489, 0.8833892941474915, 0.9051622748374939, 0.9269351959228516, 0.948708176612854, 0.9704811573028564, 0.9922540783882141, 1.0140269994735718, 1.0357999801635742, 1.0575729608535767, 1.079345941543579, 1.1011189222335815, 1.122891902923584, 1.1446647644042969, 1.1664377450942993, 1.1882107257843018, 1.2099837064743042, 1.2317566871643066, 1.2535295486450195, 1.275302529335022, 1.2970755100250244, 1.3188484907150269, 1.3406214714050293]}, "gradients/decoder.transformer.h.9.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 7.0, 1.0, 3.0, 8.0, 11.0, 14.0, 19.0, 21.0, 20.0, 28.0, 33.0, 38.0, 45.0, 39.0, 45.0, 47.0, 49.0, 59.0, 46.0, 54.0, 45.0, 53.0, 55.0, 42.0, 44.0, 32.0, 25.0, 31.0, 23.0, 24.0, 14.0, 10.0, 7.0, 9.0, 7.0, 4.0, 1.0, 3.0, 2.0, 2.0], "bins": [-0.06615883111953735, -0.06458798050880432, -0.06301712989807129, -0.06144627556204796, -0.059875424951314926, -0.058304574340581894, -0.05673372000455856, -0.05516286939382553, -0.0535920187830925, -0.05202116817235947, -0.050450317561626434, -0.048879463225603104, -0.04730861261487007, -0.04573776200413704, -0.04416690766811371, -0.042596057057380676, -0.041025206446647644, -0.03945435583591461, -0.03788350522518158, -0.03631265088915825, -0.03474180027842522, -0.033170949667692184, -0.031600095331668854, -0.03002924472093582, -0.02845839411020279, -0.026887543499469757, -0.025316691026091576, -0.023745838552713394, -0.022174987941980362, -0.02060413733124733, -0.019033284857869148, -0.017462432384490967, -0.015891581773757935, -0.014320730231702328, -0.012749878689646721, -0.011179027147591114, -0.009608175605535507, -0.0080373240634799, -0.0064664725214242935, -0.004895620979368687, -0.00332476943731308, -0.001753917895257473, -0.00018306635320186615, 0.0013877851888537407, 0.0029586367309093475, 0.004529488272964954, 0.006100339815020561, 0.007671191357076168, 0.009242042899131775, 0.010812894441187382, 0.012383745983242989, 0.013954597525298595, 0.015525449067354202, 0.017096299678087234, 0.018667152151465416, 0.020238004624843597, 0.02180885523557663, 0.023379705846309662, 0.024950558319687843, 0.026521410793066025, 0.028092261403799057, 0.02966311201453209, 0.03123396448791027, 0.03280481696128845, 0.034375667572021484]}, "gradients/decoder.transformer.h.9.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 2.0, 2.0, 1.0, 5.0, 1.0, 5.0, 6.0, 13.0, 8.0, 8.0, 17.0, 15.0, 16.0, 19.0, 27.0, 25.0, 31.0, 26.0, 36.0, 32.0, 29.0, 38.0, 42.0, 42.0, 43.0, 44.0, 47.0, 44.0, 43.0, 51.0, 32.0, 32.0, 32.0, 34.0, 30.0, 15.0, 20.0, 16.0, 16.0, 13.0, 19.0, 8.0, 9.0, 8.0, 6.0, 0.0, 3.0, 1.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0], "bins": [-6.00390625, -5.82220458984375, -5.6405029296875, -5.45880126953125, -5.277099609375, -5.09539794921875, -4.9136962890625, -4.73199462890625, -4.55029296875, -4.36859130859375, -4.1868896484375, -4.00518798828125, -3.823486328125, -3.64178466796875, -3.4600830078125, -3.27838134765625, -3.0966796875, -2.91497802734375, -2.7332763671875, -2.55157470703125, -2.369873046875, -2.18817138671875, -2.0064697265625, -1.82476806640625, -1.64306640625, -1.46136474609375, -1.2796630859375, -1.09796142578125, -0.916259765625, -0.73455810546875, -0.5528564453125, -0.37115478515625, -0.189453125, -0.00775146484375, 0.1739501953125, 0.35565185546875, 0.537353515625, 0.71905517578125, 0.9007568359375, 1.08245849609375, 1.26416015625, 1.44586181640625, 1.6275634765625, 1.80926513671875, 1.990966796875, 2.17266845703125, 2.3543701171875, 2.53607177734375, 2.7177734375, 2.89947509765625, 3.0811767578125, 3.26287841796875, 3.444580078125, 3.62628173828125, 3.8079833984375, 3.98968505859375, 4.17138671875, 4.35308837890625, 4.5347900390625, 4.71649169921875, 4.898193359375, 5.07989501953125, 5.2615966796875, 5.44329833984375, 5.625]}, "gradients/decoder.transformer.h.9.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 2.0, 2.0, 1.0, 2.0, 3.0, 3.0, 1.0, 7.0, 11.0, 6.0, 11.0, 8.0, 15.0, 27.0, 27.0, 39.0, 42.0, 59.0, 113.0, 115.0, 189.0, 401.0, 869.0, 2828.0, 11146.0, 47039.0, 224684.0, 578951.0, 139591.0, 31123.0, 7544.0, 2045.0, 686.0, 334.0, 178.0, 125.0, 84.0, 68.0, 43.0, 29.0, 29.0, 21.0, 18.0, 11.0, 13.0, 8.0, 6.0, 3.0, 4.0, 2.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0], "bins": [-11.796875, -11.4432373046875, -11.089599609375, -10.7359619140625, -10.38232421875, -10.0286865234375, -9.675048828125, -9.3214111328125, -8.9677734375, -8.6141357421875, -8.260498046875, -7.9068603515625, -7.55322265625, -7.1995849609375, -6.845947265625, -6.4923095703125, -6.138671875, -5.7850341796875, -5.431396484375, -5.0777587890625, -4.72412109375, -4.3704833984375, -4.016845703125, -3.6632080078125, -3.3095703125, -2.9559326171875, -2.602294921875, -2.2486572265625, -1.89501953125, -1.5413818359375, -1.187744140625, -0.8341064453125, -0.48046875, -0.1268310546875, 0.226806640625, 0.5804443359375, 0.93408203125, 1.2877197265625, 1.641357421875, 1.9949951171875, 2.3486328125, 2.7022705078125, 3.055908203125, 3.4095458984375, 3.76318359375, 4.1168212890625, 4.470458984375, 4.8240966796875, 5.177734375, 5.5313720703125, 5.885009765625, 6.2386474609375, 6.59228515625, 6.9459228515625, 7.299560546875, 7.6531982421875, 8.0068359375, 8.3604736328125, 8.714111328125, 9.0677490234375, 9.42138671875, 9.7750244140625, 10.128662109375, 10.4822998046875, 10.8359375]}, "gradients/decoder.transformer.h.9.attn.c_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 1.0, 3.0, 3.0, 1.0, 2.0, 3.0, 2.0, 2.0, 5.0, 6.0, 7.0, 10.0, 14.0, 10.0, 21.0, 18.0, 16.0, 24.0, 24.0, 34.0, 26.0, 37.0, 35.0, 50.0, 70.0, 86.0, 173.0, 272.0, 1363.0, 165.0, 107.0, 73.0, 66.0, 42.0, 47.0, 35.0, 37.0, 26.0, 24.0, 21.0, 14.0, 14.0, 13.0, 9.0, 11.0, 11.0, 8.0, 2.0, 7.0, 3.0, 1.0, 2.0, 5.0, 2.0, 2.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0], "bins": [-17.875, -17.301513671875, -16.72802734375, -16.154541015625, -15.5810546875, -15.007568359375, -14.43408203125, -13.860595703125, -13.287109375, -12.713623046875, -12.14013671875, -11.566650390625, -10.9931640625, -10.419677734375, -9.84619140625, -9.272705078125, -8.69921875, -8.125732421875, -7.55224609375, -6.978759765625, -6.4052734375, -5.831787109375, -5.25830078125, -4.684814453125, -4.111328125, -3.537841796875, -2.96435546875, -2.390869140625, -1.8173828125, -1.243896484375, -0.67041015625, -0.096923828125, 0.4765625, 1.050048828125, 1.62353515625, 2.197021484375, 2.7705078125, 3.343994140625, 3.91748046875, 4.490966796875, 5.064453125, 5.637939453125, 6.21142578125, 6.784912109375, 7.3583984375, 7.931884765625, 8.50537109375, 9.078857421875, 9.65234375, 10.225830078125, 10.79931640625, 11.372802734375, 11.9462890625, 12.519775390625, 13.09326171875, 13.666748046875, 14.240234375, 14.813720703125, 15.38720703125, 15.960693359375, 16.5341796875, 17.107666015625, 17.68115234375, 18.254638671875, 18.828125]}, "gradients/decoder.transformer.h.9.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 4.0, 2.0, 5.0, 6.0, 13.0, 15.0, 29.0, 24.0, 45.0, 66.0, 96.0, 140.0, 309.0, 782.0, 8218.0, 3118575.0, 15686.0, 902.0, 280.0, 175.0, 120.0, 69.0, 54.0, 36.0, 29.0, 13.0, 6.0, 6.0, 5.0, 5.0, 1.0, 2.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-94.875, -91.7705078125, -88.666015625, -85.5615234375, -82.45703125, -79.3525390625, -76.248046875, -73.1435546875, -70.0390625, -66.9345703125, -63.830078125, -60.7255859375, -57.62109375, -54.5166015625, -51.412109375, -48.3076171875, -45.203125, -42.0986328125, -38.994140625, -35.8896484375, -32.78515625, -29.6806640625, -26.576171875, -23.4716796875, -20.3671875, -17.2626953125, -14.158203125, -11.0537109375, -7.94921875, -4.8447265625, -1.740234375, 1.3642578125, 4.46875, 7.5732421875, 10.677734375, 13.7822265625, 16.88671875, 19.9912109375, 23.095703125, 26.2001953125, 29.3046875, 32.4091796875, 35.513671875, 38.6181640625, 41.72265625, 44.8271484375, 47.931640625, 51.0361328125, 54.140625, 57.2451171875, 60.349609375, 63.4541015625, 66.55859375, 69.6630859375, 72.767578125, 75.8720703125, 78.9765625, 82.0810546875, 85.185546875, 88.2900390625, 91.39453125, 94.4990234375, 97.603515625, 100.7080078125, 103.8125]}, "gradients/decoder.transformer.h.9.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 2.0, 51.0, 181.0, 347.0, 281.0, 120.0, 24.0, 7.0, 1.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-32.45763397216797, -29.720035552978516, -26.982439041137695, -24.244842529296875, -21.507244110107422, -18.76964569091797, -16.03204917907715, -13.294452667236328, -10.556854248046875, -7.819256782531738, -5.081659317016602, -2.344061851501465, 0.3935356140136719, 3.1311330795288086, 5.868730545043945, 8.606327056884766, 11.343925476074219, 14.081522941589355, 16.819120407104492, 19.556716918945312, 22.294315338134766, 25.03191375732422, 27.76951026916504, 30.50710678100586, 33.24470520019531, 35.982303619384766, 38.71990203857422, 41.457496643066406, 44.19509506225586, 46.93269348144531, 49.6702880859375, 52.40788650512695, 55.145477294921875, 57.88307571411133, 60.62067413330078, 63.35826873779297, 66.09587097167969, 68.83346557617188, 71.57106018066406, 74.30865478515625, 77.04625701904297, 79.78385162353516, 82.52145385742188, 85.25904846191406, 87.99664306640625, 90.73424530029297, 93.47183990478516, 96.20944213867188, 98.94703674316406, 101.68463134765625, 104.42223358154297, 107.15982818603516, 109.89743041992188, 112.63502502441406, 115.37261962890625, 118.11021423339844, 120.84781646728516, 123.58541107177734, 126.32301330566406, 129.06060791015625, 131.79820251464844, 134.53579711914062, 137.27340698242188, 140.01100158691406, 142.74859619140625]}, "gradients/decoder.transformer.h.9.ln_1.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 2.0, 2.0, 1.0, 6.0, 6.0, 1.0, 9.0, 8.0, 8.0, 15.0, 20.0, 12.0, 19.0, 33.0, 20.0, 40.0, 19.0, 50.0, 32.0, 40.0, 41.0, 54.0, 42.0, 49.0, 47.0, 46.0, 39.0, 45.0, 27.0, 33.0, 40.0, 27.0, 23.0, 20.0, 25.0, 23.0, 15.0, 10.0, 15.0, 11.0, 6.0, 12.0, 4.0, 6.0, 3.0, 2.0, 1.0, 5.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-61.99579620361328, -60.05271911621094, -58.109642028808594, -56.16656494140625, -54.223487854003906, -52.28041076660156, -50.33732986450195, -48.39425277709961, -46.451175689697266, -44.50809860229492, -42.56502151489258, -40.621944427490234, -38.678863525390625, -36.73578643798828, -34.79270935058594, -32.849632263183594, -30.90655517578125, -28.963478088378906, -27.020401000976562, -25.077322006225586, -23.134244918823242, -21.1911678314209, -19.248088836669922, -17.305011749267578, -15.361934661865234, -13.41885757446289, -11.47577953338623, -9.53270149230957, -7.589624404907227, -5.646547317504883, -3.7034692764282227, -1.7603912353515625, 0.18268203735351562, 2.1257596015930176, 4.0688371658325195, 6.0119147300720215, 7.954992294311523, 9.898069381713867, 11.841147422790527, 13.784225463867188, 15.727302551269531, 17.670379638671875, 19.61345672607422, 21.556535720825195, 23.49961280822754, 25.442689895629883, 27.38576889038086, 29.328845977783203, 31.271923065185547, 33.21500015258789, 35.158077239990234, 37.10115432739258, 39.04423522949219, 40.98731231689453, 42.930389404296875, 44.87346649169922, 46.81654357910156, 48.759620666503906, 50.70269775390625, 52.645774841308594, 54.58885192871094, 56.53192901611328, 58.47500991821289, 60.418087005615234, 62.36116409301758]}, "gradients/decoder.transformer.h.8.mlp.c_proj.bias": {"_type": "histogram", "values": [2.0, 2.0, 2.0, 0.0, 1.0, 2.0, 1.0, 3.0, 2.0, 4.0, 1.0, 2.0, 6.0, 13.0, 11.0, 9.0, 13.0, 18.0, 17.0, 18.0, 19.0, 33.0, 19.0, 27.0, 43.0, 37.0, 38.0, 34.0, 41.0, 43.0, 50.0, 39.0, 50.0, 27.0, 41.0, 24.0, 48.0, 43.0, 34.0, 37.0, 24.0, 22.0, 20.0, 14.0, 17.0, 13.0, 10.0, 10.0, 4.0, 10.0, 4.0, 4.0, 8.0, 0.0, 4.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-6.20703125, -6.00738525390625, -5.8077392578125, -5.60809326171875, -5.408447265625, -5.20880126953125, -5.0091552734375, -4.80950927734375, -4.60986328125, -4.41021728515625, -4.2105712890625, -4.01092529296875, -3.811279296875, -3.61163330078125, -3.4119873046875, -3.21234130859375, -3.0126953125, -2.81304931640625, -2.6134033203125, -2.41375732421875, -2.214111328125, -2.01446533203125, -1.8148193359375, -1.61517333984375, -1.41552734375, -1.21588134765625, -1.0162353515625, -0.81658935546875, -0.616943359375, -0.41729736328125, -0.2176513671875, -0.01800537109375, 0.181640625, 0.38128662109375, 0.5809326171875, 0.78057861328125, 0.980224609375, 1.17987060546875, 1.3795166015625, 1.57916259765625, 1.77880859375, 1.97845458984375, 2.1781005859375, 2.37774658203125, 2.577392578125, 2.77703857421875, 2.9766845703125, 3.17633056640625, 3.3759765625, 3.57562255859375, 3.7752685546875, 3.97491455078125, 4.174560546875, 4.37420654296875, 4.5738525390625, 4.77349853515625, 4.97314453125, 5.17279052734375, 5.3724365234375, 5.57208251953125, 5.771728515625, 5.97137451171875, 6.1710205078125, 6.37066650390625, 6.5703125]}, "gradients/decoder.transformer.h.8.mlp.c_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 4.0, 0.0, 0.0, 1.0, 2.0, 2.0, 2.0, 4.0, 6.0, 14.0, 9.0, 18.0, 24.0, 30.0, 34.0, 42.0, 51.0, 97.0, 143.0, 219.0, 361.0, 637.0, 1371.0, 3045.0, 7837.0, 21177.0, 61987.0, 186739.0, 534516.0, 1145860.0, 1240564.0, 637886.0, 232741.0, 76755.0, 25760.0, 9434.0, 3609.0, 1512.0, 703.0, 355.0, 258.0, 139.0, 89.0, 66.0, 48.0, 41.0, 32.0, 20.0, 22.0, 8.0, 9.0, 6.0, 4.0, 6.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-7.16015625, -6.93707275390625, -6.7139892578125, -6.49090576171875, -6.267822265625, -6.04473876953125, -5.8216552734375, -5.59857177734375, -5.37548828125, -5.15240478515625, -4.9293212890625, -4.70623779296875, -4.483154296875, -4.26007080078125, -4.0369873046875, -3.81390380859375, -3.5908203125, -3.36773681640625, -3.1446533203125, -2.92156982421875, -2.698486328125, -2.47540283203125, -2.2523193359375, -2.02923583984375, -1.80615234375, -1.58306884765625, -1.3599853515625, -1.13690185546875, -0.913818359375, -0.69073486328125, -0.4676513671875, -0.24456787109375, -0.021484375, 0.20159912109375, 0.4246826171875, 0.64776611328125, 0.870849609375, 1.09393310546875, 1.3170166015625, 1.54010009765625, 1.76318359375, 1.98626708984375, 2.2093505859375, 2.43243408203125, 2.655517578125, 2.87860107421875, 3.1016845703125, 3.32476806640625, 3.5478515625, 3.77093505859375, 3.9940185546875, 4.21710205078125, 4.440185546875, 4.66326904296875, 4.8863525390625, 5.10943603515625, 5.33251953125, 5.55560302734375, 5.7786865234375, 6.00177001953125, 6.224853515625, 6.44793701171875, 6.6710205078125, 6.89410400390625, 7.1171875]}, "gradients/decoder.transformer.h.8.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 0.0, 1.0, 3.0, 3.0, 4.0, 5.0, 9.0, 5.0, 12.0, 15.0, 31.0, 39.0, 42.0, 53.0, 77.0, 104.0, 122.0, 188.0, 213.0, 311.0, 346.0, 419.0, 411.0, 386.0, 300.0, 253.0, 183.0, 141.0, 95.0, 80.0, 51.0, 50.0, 33.0, 25.0, 14.0, 23.0, 7.0, 9.0, 6.0, 3.0, 6.0, 4.0, 5.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-12.796875, -12.4422607421875, -12.087646484375, -11.7330322265625, -11.37841796875, -11.0238037109375, -10.669189453125, -10.3145751953125, -9.9599609375, -9.6053466796875, -9.250732421875, -8.8961181640625, -8.54150390625, -8.1868896484375, -7.832275390625, -7.4776611328125, -7.123046875, -6.7684326171875, -6.413818359375, -6.0592041015625, -5.70458984375, -5.3499755859375, -4.995361328125, -4.6407470703125, -4.2861328125, -3.9315185546875, -3.576904296875, -3.2222900390625, -2.86767578125, -2.5130615234375, -2.158447265625, -1.8038330078125, -1.44921875, -1.0946044921875, -0.739990234375, -0.3853759765625, -0.03076171875, 0.3238525390625, 0.678466796875, 1.0330810546875, 1.3876953125, 1.7423095703125, 2.096923828125, 2.4515380859375, 2.80615234375, 3.1607666015625, 3.515380859375, 3.8699951171875, 4.224609375, 4.5792236328125, 4.933837890625, 5.2884521484375, 5.64306640625, 5.9976806640625, 6.352294921875, 6.7069091796875, 7.0615234375, 7.4161376953125, 7.770751953125, 8.1253662109375, 8.47998046875, 8.8345947265625, 9.189208984375, 9.5438232421875, 9.8984375]}, "gradients/decoder.transformer.h.8.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 3.0, 4.0, 3.0, 6.0, 7.0, 9.0, 13.0, 21.0, 12.0, 41.0, 36.0, 56.0, 71.0, 119.0, 194.0, 327.0, 591.0, 1308.0, 6878.0, 364313.0, 3719018.0, 95396.0, 3544.0, 967.0, 506.0, 289.0, 183.0, 109.0, 86.0, 61.0, 40.0, 25.0, 22.0, 11.0, 8.0, 7.0, 3.0, 4.0, 2.0, 2.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-36.375, -35.04541015625, -33.7158203125, -32.38623046875, -31.056640625, -29.72705078125, -28.3974609375, -27.06787109375, -25.73828125, -24.40869140625, -23.0791015625, -21.74951171875, -20.419921875, -19.09033203125, -17.7607421875, -16.43115234375, -15.1015625, -13.77197265625, -12.4423828125, -11.11279296875, -9.783203125, -8.45361328125, -7.1240234375, -5.79443359375, -4.46484375, -3.13525390625, -1.8056640625, -0.47607421875, 0.853515625, 2.18310546875, 3.5126953125, 4.84228515625, 6.171875, 7.50146484375, 8.8310546875, 10.16064453125, 11.490234375, 12.81982421875, 14.1494140625, 15.47900390625, 16.80859375, 18.13818359375, 19.4677734375, 20.79736328125, 22.126953125, 23.45654296875, 24.7861328125, 26.11572265625, 27.4453125, 28.77490234375, 30.1044921875, 31.43408203125, 32.763671875, 34.09326171875, 35.4228515625, 36.75244140625, 38.08203125, 39.41162109375, 40.7412109375, 42.07080078125, 43.400390625, 44.72998046875, 46.0595703125, 47.38916015625, 48.71875]}, "gradients/decoder.transformer.h.8.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 8.0, 28.0, 64.0, 161.0, 229.0, 246.0, 176.0, 76.0, 24.0, 3.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-143.14224243164062, -139.3675994873047, -135.5929718017578, -131.81832885742188, -128.04368591308594, -124.26905059814453, -120.49441528320312, -116.71977233886719, -112.94513702392578, -109.17050170898438, -105.39585876464844, -101.62122344970703, -97.84658813476562, -94.07194519042969, -90.29730987548828, -86.52267456054688, -82.74803161621094, -78.97339630126953, -75.1987533569336, -71.42411804199219, -67.64947509765625, -63.874839782714844, -60.10020446777344, -56.325565338134766, -52.550926208496094, -48.77628707885742, -45.00164794921875, -41.227012634277344, -37.45237350463867, -33.677734375, -29.90309715270996, -26.128459930419922, -22.35381317138672, -18.579174041748047, -14.804536819458008, -11.029898643493652, -7.255260467529297, -3.480621337890625, 0.29401588439941406, 4.068653106689453, 7.843292236328125, 11.61793041229248, 15.392568588256836, 19.167205810546875, 22.941844940185547, 26.71648406982422, 30.491121292114258, 34.2657585144043, 38.04039764404297, 41.81503677368164, 45.58967590332031, 49.36431121826172, 53.13895034790039, 56.91358947753906, 60.68822479248047, 64.46286010742188, 68.23750305175781, 72.01213836669922, 75.78678131103516, 79.56141662597656, 83.3360595703125, 87.1106948852539, 90.88533020019531, 94.65997314453125, 98.43460845947266]}, "gradients/decoder.transformer.h.8.ln_2.bias": {"_type": "histogram", "values": [2.0, 1.0, 2.0, 4.0, 4.0, 0.0, 2.0, 2.0, 5.0, 5.0, 1.0, 6.0, 12.0, 3.0, 9.0, 9.0, 20.0, 16.0, 25.0, 22.0, 24.0, 25.0, 23.0, 23.0, 31.0, 26.0, 41.0, 38.0, 33.0, 38.0, 55.0, 32.0, 29.0, 37.0, 29.0, 34.0, 42.0, 30.0, 33.0, 29.0, 16.0, 21.0, 17.0, 27.0, 21.0, 16.0, 24.0, 13.0, 7.0, 14.0, 8.0, 7.0, 8.0, 6.0, 2.0, 5.0, 1.0, 0.0, 4.0, 0.0, 0.0, 2.0, 1.0, 2.0], "bins": [-35.62378692626953, -34.5118293762207, -33.399871826171875, -32.28791427612305, -31.17595672607422, -30.06399917602539, -28.952041625976562, -27.840084075927734, -26.728126525878906, -25.616168975830078, -24.50421142578125, -23.392253875732422, -22.280296325683594, -21.168338775634766, -20.056381225585938, -18.94442367553711, -17.83246421813965, -16.72050666809082, -15.608549118041992, -14.496591567993164, -13.384634017944336, -12.272676467895508, -11.160717964172363, -10.048760414123535, -8.936802864074707, -7.824845314025879, -6.712887763977051, -5.6009297370910645, -4.488972187042236, -3.377014636993408, -2.265056610107422, -1.1530990600585938, -0.041141510009765625, 1.070816159248352, 2.1827738285064697, 3.294731616973877, 4.406689167022705, 5.518646717071533, 6.6306047439575195, 7.742562294006348, 8.854519844055176, 9.966477394104004, 11.078434944152832, 12.190393447875977, 13.302350997924805, 14.414308547973633, 15.526266098022461, 16.63822364807129, 17.750181198120117, 18.862138748168945, 19.974096298217773, 21.0860538482666, 22.19801139831543, 23.309968948364258, 24.42192840576172, 25.533885955810547, 26.645843505859375, 27.757801055908203, 28.86975860595703, 29.98171615600586, 31.093673706054688, 32.205631256103516, 33.317588806152344, 34.42954635620117, 35.54150390625]}, "gradients/decoder.transformer.h.8.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 4.0, 3.0, 2.0, 4.0, 6.0, 3.0, 6.0, 8.0, 11.0, 16.0, 20.0, 14.0, 34.0, 26.0, 30.0, 36.0, 36.0, 29.0, 50.0, 40.0, 40.0, 33.0, 35.0, 43.0, 42.0, 47.0, 40.0, 39.0, 34.0, 33.0, 30.0, 31.0, 29.0, 24.0, 26.0, 24.0, 17.0, 13.0, 10.0, 6.0, 7.0, 5.0, 4.0, 8.0, 6.0, 3.0, 2.0, 0.0, 2.0, 1.0, 0.0, 1.0, 0.0, 2.0], "bins": [-6.51953125, -6.32391357421875, -6.1282958984375, -5.93267822265625, -5.737060546875, -5.54144287109375, -5.3458251953125, -5.15020751953125, -4.95458984375, -4.75897216796875, -4.5633544921875, -4.36773681640625, -4.172119140625, -3.97650146484375, -3.7808837890625, -3.58526611328125, -3.3896484375, -3.19403076171875, -2.9984130859375, -2.80279541015625, -2.607177734375, -2.41156005859375, -2.2159423828125, -2.02032470703125, -1.82470703125, -1.62908935546875, -1.4334716796875, -1.23785400390625, -1.042236328125, -0.84661865234375, -0.6510009765625, -0.45538330078125, -0.259765625, -0.06414794921875, 0.1314697265625, 0.32708740234375, 0.522705078125, 0.71832275390625, 0.9139404296875, 1.10955810546875, 1.30517578125, 1.50079345703125, 1.6964111328125, 1.89202880859375, 2.087646484375, 2.28326416015625, 2.4788818359375, 2.67449951171875, 2.8701171875, 3.06573486328125, 3.2613525390625, 3.45697021484375, 3.652587890625, 3.84820556640625, 4.0438232421875, 4.23944091796875, 4.43505859375, 4.63067626953125, 4.8262939453125, 5.02191162109375, 5.217529296875, 5.41314697265625, 5.6087646484375, 5.80438232421875, 6.0]}, "gradients/decoder.transformer.h.8.crossattention.c_proj.weight": {"_type": "histogram", "values": [3.0, 5.0, 4.0, 9.0, 12.0, 13.0, 27.0, 40.0, 69.0, 82.0, 108.0, 189.0, 275.0, 391.0, 571.0, 801.0, 1230.0, 1773.0, 2622.0, 3738.0, 5622.0, 8186.0, 12013.0, 18296.0, 27356.0, 41023.0, 60246.0, 84446.0, 111608.0, 134231.0, 136670.0, 115003.0, 87894.0, 63067.0, 42924.0, 28928.0, 19267.0, 12804.0, 8675.0, 5809.0, 3920.0, 2694.0, 1883.0, 1292.0, 882.0, 595.0, 397.0, 274.0, 209.0, 130.0, 90.0, 69.0, 49.0, 20.0, 22.0, 2.0, 9.0, 3.0, 2.0, 1.0, 0.0, 1.0, 0.0, 2.0], "bins": [-0.63671875, -0.61553955078125, -0.5943603515625, -0.57318115234375, -0.552001953125, -0.53082275390625, -0.5096435546875, -0.48846435546875, -0.46728515625, -0.44610595703125, -0.4249267578125, -0.40374755859375, -0.382568359375, -0.36138916015625, -0.3402099609375, -0.31903076171875, -0.2978515625, -0.27667236328125, -0.2554931640625, -0.23431396484375, -0.213134765625, -0.19195556640625, -0.1707763671875, -0.14959716796875, -0.12841796875, -0.10723876953125, -0.0860595703125, -0.06488037109375, -0.043701171875, -0.02252197265625, -0.0013427734375, 0.01983642578125, 0.041015625, 0.06219482421875, 0.0833740234375, 0.10455322265625, 0.125732421875, 0.14691162109375, 0.1680908203125, 0.18927001953125, 0.21044921875, 0.23162841796875, 0.2528076171875, 0.27398681640625, 0.295166015625, 0.31634521484375, 0.3375244140625, 0.35870361328125, 0.3798828125, 0.40106201171875, 0.4222412109375, 0.44342041015625, 0.464599609375, 0.48577880859375, 0.5069580078125, 0.52813720703125, 0.54931640625, 0.57049560546875, 0.5916748046875, 0.61285400390625, 0.634033203125, 0.65521240234375, 0.6763916015625, 0.69757080078125, 0.71875]}, "gradients/decoder.transformer.h.8.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 3.0, 1.0, 6.0, 3.0, 5.0, 8.0, 9.0, 11.0, 8.0, 16.0, 23.0, 30.0, 22.0, 29.0, 37.0, 28.0, 47.0, 44.0, 28.0, 40.0, 48.0, 1070.0, 53.0, 51.0, 43.0, 45.0, 38.0, 36.0, 41.0, 28.0, 32.0, 27.0, 24.0, 22.0, 13.0, 17.0, 10.0, 10.0, 10.0, 7.0, 2.0, 4.0, 1.0, 3.0, 3.0, 2.0, 1.0, 0.0, 2.0, 1.0], "bins": [-5.59765625, -5.44146728515625, -5.2852783203125, -5.12908935546875, -4.972900390625, -4.81671142578125, -4.6605224609375, -4.50433349609375, -4.34814453125, -4.19195556640625, -4.0357666015625, -3.87957763671875, -3.723388671875, -3.56719970703125, -3.4110107421875, -3.25482177734375, -3.0986328125, -2.94244384765625, -2.7862548828125, -2.63006591796875, -2.473876953125, -2.31768798828125, -2.1614990234375, -2.00531005859375, -1.84912109375, -1.69293212890625, -1.5367431640625, -1.38055419921875, -1.224365234375, -1.06817626953125, -0.9119873046875, -0.75579833984375, -0.599609375, -0.44342041015625, -0.2872314453125, -0.13104248046875, 0.025146484375, 0.18133544921875, 0.3375244140625, 0.49371337890625, 0.64990234375, 0.80609130859375, 0.9622802734375, 1.11846923828125, 1.274658203125, 1.43084716796875, 1.5870361328125, 1.74322509765625, 1.8994140625, 2.05560302734375, 2.2117919921875, 2.36798095703125, 2.524169921875, 2.68035888671875, 2.8365478515625, 2.99273681640625, 3.14892578125, 3.30511474609375, 3.4613037109375, 3.61749267578125, 3.773681640625, 3.92987060546875, 4.0860595703125, 4.24224853515625, 4.3984375]}, "gradients/decoder.transformer.h.8.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 4.0, 5.0, 10.0, 15.0, 13.0, 28.0, 30.0, 53.0, 66.0, 114.0, 172.0, 283.0, 395.0, 751.0, 1054.0, 1726.0, 2850.0, 4682.0, 7700.0, 12658.0, 20928.0, 33698.0, 52572.0, 80754.0, 116277.0, 175381.0, 1182060.0, 134186.0, 96376.0, 64439.0, 41162.0, 25922.0, 15617.0, 9747.0, 5828.0, 3657.0, 2228.0, 1403.0, 896.0, 534.0, 330.0, 185.0, 134.0, 66.0, 62.0, 40.0, 22.0, 14.0, 8.0, 5.0, 4.0, 1.0, 1.0], "bins": [-0.611328125, -0.5949058532714844, -0.5784835815429688, -0.5620613098144531, -0.5456390380859375, -0.5292167663574219, -0.5127944946289062, -0.4963722229003906, -0.479949951171875, -0.4635276794433594, -0.44710540771484375, -0.4306831359863281, -0.4142608642578125, -0.3978385925292969, -0.38141632080078125, -0.3649940490722656, -0.34857177734375, -0.3321495056152344, -0.31572723388671875, -0.2993049621582031, -0.2828826904296875, -0.2664604187011719, -0.25003814697265625, -0.23361587524414062, -0.217193603515625, -0.20077133178710938, -0.18434906005859375, -0.16792678833007812, -0.1515045166015625, -0.13508224487304688, -0.11865997314453125, -0.10223770141601562, -0.0858154296875, -0.06939315795898438, -0.05297088623046875, -0.036548614501953125, -0.0201263427734375, -0.003704071044921875, 0.01271820068359375, 0.029140472412109375, 0.045562744140625, 0.061985015869140625, 0.07840728759765625, 0.09482955932617188, 0.1112518310546875, 0.12767410278320312, 0.14409637451171875, 0.16051864624023438, 0.17694091796875, 0.19336318969726562, 0.20978546142578125, 0.22620773315429688, 0.2426300048828125, 0.2590522766113281, 0.27547454833984375, 0.2918968200683594, 0.308319091796875, 0.3247413635253906, 0.34116363525390625, 0.3575859069824219, 0.3740081787109375, 0.3904304504394531, 0.40685272216796875, 0.4232749938964844, 0.439697265625]}, "gradients/decoder.transformer.h.8.crossattention.q_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 2.0, 6.0, 2.0, 1.0, 5.0, 3.0, 5.0, 8.0, 4.0, 6.0, 5.0, 13.0, 13.0, 10.0, 16.0, 13.0, 24.0, 27.0, 20.0, 40.0, 40.0, 52.0, 67.0, 59.0, 73.0, 64.0, 80.0, 48.0, 45.0, 34.0, 22.0, 35.0, 27.0, 16.0, 23.0, 11.0, 21.0, 12.0, 9.0, 8.0, 8.0, 8.0, 8.0, 8.0, 2.0, 2.0, 4.0, 2.0, 1.0, 0.0, 3.0, 1.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.01561737060546875, -0.015056967735290527, -0.014496564865112305, -0.013936161994934082, -0.01337575912475586, -0.012815356254577637, -0.012254953384399414, -0.011694550514221191, -0.011134147644042969, -0.010573744773864746, -0.010013341903686523, -0.0094529390335083, -0.008892536163330078, -0.008332133293151855, -0.007771730422973633, -0.00721132755279541, -0.0066509246826171875, -0.006090521812438965, -0.005530118942260742, -0.0049697160720825195, -0.004409313201904297, -0.0038489103317260742, -0.0032885074615478516, -0.002728104591369629, -0.0021677017211914062, -0.0016072988510131836, -0.001046895980834961, -0.0004864931106567383, 7.390975952148438e-05, 0.000634312629699707, 0.0011947154998779297, 0.0017551183700561523, 0.002315521240234375, 0.0028759241104125977, 0.0034363269805908203, 0.003996729850769043, 0.004557132720947266, 0.005117535591125488, 0.005677938461303711, 0.006238341331481934, 0.006798744201660156, 0.007359147071838379, 0.007919549942016602, 0.008479952812194824, 0.009040355682373047, 0.00960075855255127, 0.010161161422729492, 0.010721564292907715, 0.011281967163085938, 0.01184237003326416, 0.012402772903442383, 0.012963175773620605, 0.013523578643798828, 0.01408398151397705, 0.014644384384155273, 0.015204787254333496, 0.01576519012451172, 0.01632559299468994, 0.016885995864868164, 0.017446398735046387, 0.01800680160522461, 0.018567204475402832, 0.019127607345581055, 0.019688010215759277, 0.0202484130859375]}, "gradients/decoder.transformer.h.8.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 3.0, 4.0, 6.0, 9.0, 9.0, 8.0, 14.0, 11.0, 13.0, 24.0, 26.0, 34.0, 44.0, 54.0, 71.0, 82.0, 112.0, 174.0, 307.0, 663.0, 11323.0, 1027789.0, 6237.0, 565.0, 315.0, 165.0, 119.0, 84.0, 63.0, 43.0, 37.0, 25.0, 29.0, 19.0, 15.0, 15.0, 6.0, 6.0, 7.0, 8.0, 2.0, 5.0, 3.0, 2.0, 6.0, 2.0, 0.0, 2.0], "bins": [-0.421630859375, -0.410369873046875, -0.39910888671875, -0.387847900390625, -0.3765869140625, -0.365325927734375, -0.35406494140625, -0.342803955078125, -0.33154296875, -0.320281982421875, -0.30902099609375, -0.297760009765625, -0.2864990234375, -0.275238037109375, -0.26397705078125, -0.252716064453125, -0.241455078125, -0.230194091796875, -0.21893310546875, -0.207672119140625, -0.1964111328125, -0.185150146484375, -0.17388916015625, -0.162628173828125, -0.1513671875, -0.140106201171875, -0.12884521484375, -0.117584228515625, -0.1063232421875, -0.095062255859375, -0.08380126953125, -0.072540283203125, -0.061279296875, -0.050018310546875, -0.03875732421875, -0.027496337890625, -0.0162353515625, -0.004974365234375, 0.00628662109375, 0.017547607421875, 0.02880859375, 0.040069580078125, 0.05133056640625, 0.062591552734375, 0.0738525390625, 0.085113525390625, 0.09637451171875, 0.107635498046875, 0.118896484375, 0.130157470703125, 0.14141845703125, 0.152679443359375, 0.1639404296875, 0.175201416015625, 0.18646240234375, 0.197723388671875, 0.208984375, 0.220245361328125, 0.23150634765625, 0.242767333984375, 0.2540283203125, 0.265289306640625, 0.27655029296875, 0.287811279296875, 0.299072265625]}, "gradients/decoder.transformer.h.8.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 0.0, 11.0, 991.0, 18.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.08656884729862213, -0.07344507426023483, -0.06032129377126694, -0.04719752073287964, -0.03407374396920204, -0.020949967205524445, -0.007826194167137146, 0.0052975863218307495, 0.018421359360218048, 0.031545136123895645, 0.04466891288757324, 0.05779268592596054, 0.07091645896434784, 0.08404023945331573, 0.09716401249170303, 0.11028779298067093, 0.12341156601905823, 0.13653534650802612, 0.14965911209583282, 0.16278289258480072, 0.17590667307376862, 0.18903043866157532, 0.2021542191505432, 0.2152779996395111, 0.228401780128479, 0.2415255606174469, 0.2546493411064148, 0.2677730917930603, 0.2808968722820282, 0.2940206527709961, 0.307144433259964, 0.3202682137489319, 0.3333919942378998, 0.3465157747268677, 0.35963955521583557, 0.37276333570480347, 0.385887086391449, 0.39901086688041687, 0.41213464736938477, 0.42525842785835266, 0.43838220834732056, 0.45150598883628845, 0.46462976932525635, 0.47775352001190186, 0.49087730050086975, 0.5040010809898376, 0.5171248912811279, 0.5302486419677734, 0.543372392654419, 0.5564961433410645, 0.5696199536323547, 0.5827437043190002, 0.5958675146102905, 0.608991265296936, 0.6221150159835815, 0.6352388262748718, 0.6483626365661621, 0.6614863872528076, 0.6746101975440979, 0.6877339482307434, 0.7008577585220337, 0.7139815092086792, 0.7271052598953247, 0.740229070186615, 0.7533528208732605]}, "gradients/decoder.transformer.h.8.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 2.0, 2.0, 0.0, 2.0, 2.0, 4.0, 7.0, 12.0, 8.0, 13.0, 21.0, 28.0, 22.0, 32.0, 42.0, 43.0, 52.0, 58.0, 48.0, 61.0, 59.0, 50.0, 49.0, 56.0, 43.0, 51.0, 43.0, 28.0, 36.0, 31.0, 31.0, 15.0, 13.0, 11.0, 16.0, 11.0, 5.0, 3.0, 2.0, 2.0, 4.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.03833037614822388, -0.037341926246881485, -0.03635347634553909, -0.0353650227189064, -0.03437657281756401, -0.03338812291622162, -0.03239966928958893, -0.031411219388246536, -0.030422769486904144, -0.029434319585561752, -0.02844586782157421, -0.02745741605758667, -0.026468966156244278, -0.025480516254901886, -0.024492064490914345, -0.023503612726926804, -0.02251516282558441, -0.02152671292424202, -0.02053826116025448, -0.019549809396266937, -0.018561359494924545, -0.017572909593582153, -0.016584457829594612, -0.015596006996929646, -0.014607556164264679, -0.013619105331599712, -0.012630654498934746, -0.01164220366626978, -0.010653752833604813, -0.009665302000939846, -0.00867685116827488, -0.007688400335609913, -0.006699949502944946, -0.00571149867027998, -0.004723047837615013, -0.0037345970049500465, -0.00274614617228508, -0.0017576953396201134, -0.0007692445069551468, 0.0002192063257098198, 0.0012076571583747864, 0.002196107991039753, 0.0031845588237047195, 0.004173009656369686, 0.005161460489034653, 0.006149911321699619, 0.007138362154364586, 0.008126812987029552, 0.009115263819694519, 0.010103714652359486, 0.011092165485024452, 0.012080616317689419, 0.013069067150354385, 0.014057517983019352, 0.015045968815684319, 0.01603442057967186, 0.01702287048101425, 0.018011320382356644, 0.018999772146344185, 0.019988223910331726, 0.020976673811674118, 0.02196512371301651, 0.02295357547700405, 0.023942027240991592, 0.024930477142333984]}, "gradients/decoder.transformer.h.8.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 3.0, 4.0, 2.0, 4.0, 5.0, 4.0, 6.0, 8.0, 11.0, 16.0, 20.0, 14.0, 34.0, 25.0, 31.0, 35.0, 37.0, 28.0, 49.0, 42.0, 40.0, 33.0, 33.0, 44.0, 43.0, 47.0, 39.0, 39.0, 33.0, 35.0, 30.0, 31.0, 29.0, 24.0, 26.0, 23.0, 18.0, 13.0, 10.0, 6.0, 7.0, 5.0, 4.0, 7.0, 7.0, 3.0, 2.0, 0.0, 2.0, 1.0, 0.0, 1.0, 0.0, 2.0], "bins": [-6.5234375, -6.3277587890625, -6.132080078125, -5.9364013671875, -5.74072265625, -5.5450439453125, -5.349365234375, -5.1536865234375, -4.9580078125, -4.7623291015625, -4.566650390625, -4.3709716796875, -4.17529296875, -3.9796142578125, -3.783935546875, -3.5882568359375, -3.392578125, -3.1968994140625, -3.001220703125, -2.8055419921875, -2.60986328125, -2.4141845703125, -2.218505859375, -2.0228271484375, -1.8271484375, -1.6314697265625, -1.435791015625, -1.2401123046875, -1.04443359375, -0.8487548828125, -0.653076171875, -0.4573974609375, -0.26171875, -0.0660400390625, 0.129638671875, 0.3253173828125, 0.52099609375, 0.7166748046875, 0.912353515625, 1.1080322265625, 1.3037109375, 1.4993896484375, 1.695068359375, 1.8907470703125, 2.08642578125, 2.2821044921875, 2.477783203125, 2.6734619140625, 2.869140625, 3.0648193359375, 3.260498046875, 3.4561767578125, 3.65185546875, 3.8475341796875, 4.043212890625, 4.2388916015625, 4.4345703125, 4.6302490234375, 4.825927734375, 5.0216064453125, 5.21728515625, 5.4129638671875, 5.608642578125, 5.8043212890625, 6.0]}, "gradients/decoder.transformer.h.8.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 2.0, 4.0, 2.0, 4.0, 3.0, 7.0, 5.0, 8.0, 15.0, 13.0, 23.0, 42.0, 63.0, 88.0, 145.0, 286.0, 525.0, 961.0, 1651.0, 3260.0, 6111.0, 11118.0, 20697.0, 39857.0, 84387.0, 199238.0, 320034.0, 194535.0, 82340.0, 39026.0, 20386.0, 10873.0, 5768.0, 3213.0, 1668.0, 941.0, 505.0, 299.0, 172.0, 105.0, 55.0, 40.0, 19.0, 21.0, 11.0, 14.0, 9.0, 5.0, 4.0, 6.0, 3.0, 3.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-6.78515625, -6.58251953125, -6.3798828125, -6.17724609375, -5.974609375, -5.77197265625, -5.5693359375, -5.36669921875, -5.1640625, -4.96142578125, -4.7587890625, -4.55615234375, -4.353515625, -4.15087890625, -3.9482421875, -3.74560546875, -3.54296875, -3.34033203125, -3.1376953125, -2.93505859375, -2.732421875, -2.52978515625, -2.3271484375, -2.12451171875, -1.921875, -1.71923828125, -1.5166015625, -1.31396484375, -1.111328125, -0.90869140625, -0.7060546875, -0.50341796875, -0.30078125, -0.09814453125, 0.1044921875, 0.30712890625, 0.509765625, 0.71240234375, 0.9150390625, 1.11767578125, 1.3203125, 1.52294921875, 1.7255859375, 1.92822265625, 2.130859375, 2.33349609375, 2.5361328125, 2.73876953125, 2.94140625, 3.14404296875, 3.3466796875, 3.54931640625, 3.751953125, 3.95458984375, 4.1572265625, 4.35986328125, 4.5625, 4.76513671875, 4.9677734375, 5.17041015625, 5.373046875, 5.57568359375, 5.7783203125, 5.98095703125, 6.18359375]}, "gradients/decoder.transformer.h.8.attn.c_attn.bias": {"_type": "histogram", "values": [2.0, 3.0, 0.0, 0.0, 2.0, 5.0, 5.0, 5.0, 9.0, 9.0, 17.0, 12.0, 22.0, 13.0, 27.0, 33.0, 38.0, 41.0, 51.0, 44.0, 57.0, 111.0, 173.0, 1452.0, 359.0, 151.0, 76.0, 69.0, 34.0, 54.0, 27.0, 31.0, 27.0, 22.0, 20.0, 18.0, 12.0, 7.0, 4.0, 4.0, 9.0, 2.0, 2.0, 2.0, 2.0, 3.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-18.1875, -17.428466796875, -16.66943359375, -15.910400390625, -15.1513671875, -14.392333984375, -13.63330078125, -12.874267578125, -12.115234375, -11.356201171875, -10.59716796875, -9.838134765625, -9.0791015625, -8.320068359375, -7.56103515625, -6.802001953125, -6.04296875, -5.283935546875, -4.52490234375, -3.765869140625, -3.0068359375, -2.247802734375, -1.48876953125, -0.729736328125, 0.029296875, 0.788330078125, 1.54736328125, 2.306396484375, 3.0654296875, 3.824462890625, 4.58349609375, 5.342529296875, 6.1015625, 6.860595703125, 7.61962890625, 8.378662109375, 9.1376953125, 9.896728515625, 10.65576171875, 11.414794921875, 12.173828125, 12.932861328125, 13.69189453125, 14.450927734375, 15.2099609375, 15.968994140625, 16.72802734375, 17.487060546875, 18.24609375, 19.005126953125, 19.76416015625, 20.523193359375, 21.2822265625, 22.041259765625, 22.80029296875, 23.559326171875, 24.318359375, 25.077392578125, 25.83642578125, 26.595458984375, 27.3544921875, 28.113525390625, 28.87255859375, 29.631591796875, 30.390625]}, "gradients/decoder.transformer.h.8.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 3.0, 1.0, 2.0, 2.0, 5.0, 6.0, 6.0, 6.0, 13.0, 12.0, 12.0, 26.0, 28.0, 48.0, 73.0, 96.0, 151.0, 229.0, 396.0, 974.0, 8720.0, 1234248.0, 1887589.0, 10809.0, 1028.0, 404.0, 280.0, 147.0, 111.0, 80.0, 51.0, 40.0, 25.0, 18.0, 19.0, 12.0, 9.0, 11.0, 5.0, 3.0, 6.0, 3.0, 3.0, 1.0, 2.0, 0.0, 4.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-47.1875, -45.71435546875, -44.2412109375, -42.76806640625, -41.294921875, -39.82177734375, -38.3486328125, -36.87548828125, -35.40234375, -33.92919921875, -32.4560546875, -30.98291015625, -29.509765625, -28.03662109375, -26.5634765625, -25.09033203125, -23.6171875, -22.14404296875, -20.6708984375, -19.19775390625, -17.724609375, -16.25146484375, -14.7783203125, -13.30517578125, -11.83203125, -10.35888671875, -8.8857421875, -7.41259765625, -5.939453125, -4.46630859375, -2.9931640625, -1.52001953125, -0.046875, 1.42626953125, 2.8994140625, 4.37255859375, 5.845703125, 7.31884765625, 8.7919921875, 10.26513671875, 11.73828125, 13.21142578125, 14.6845703125, 16.15771484375, 17.630859375, 19.10400390625, 20.5771484375, 22.05029296875, 23.5234375, 24.99658203125, 26.4697265625, 27.94287109375, 29.416015625, 30.88916015625, 32.3623046875, 33.83544921875, 35.30859375, 36.78173828125, 38.2548828125, 39.72802734375, 41.201171875, 42.67431640625, 44.1474609375, 45.62060546875, 47.09375]}, "gradients/decoder.transformer.h.8.ln_1.weight": {"_type": "histogram", "values": [117.0, 825.0, 76.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-12.943163871765137, -3.9436283111572266, 5.055907249450684, 14.05544376373291, 23.054977416992188, 32.05451202392578, 41.05405044555664, 50.053585052490234, 59.05311965942383, 68.05265808105469, 77.05219268798828, 86.05172729492188, 95.05126190185547, 104.05079650878906, 113.05033874511719, 122.04986572265625, 131.04940795898438, 140.0489501953125, 149.04847717285156, 158.0480194091797, 167.04754638671875, 176.04708862304688, 185.046630859375, 194.04615783691406, 203.04568481445312, 212.04522705078125, 221.0447540283203, 230.04429626464844, 239.0438232421875, 248.04336547851562, 257.04290771484375, 266.04241943359375, 275.0419616699219, 284.04150390625, 293.0410461425781, 302.0405578613281, 311.04010009765625, 320.0396423339844, 329.0391845703125, 338.0386962890625, 347.0382385253906, 356.03778076171875, 365.0373229980469, 374.0368347167969, 383.036376953125, 392.0359191894531, 401.03546142578125, 410.03497314453125, 419.0345458984375, 428.0340881347656, 437.03363037109375, 446.03314208984375, 455.0326843261719, 464.0322265625, 473.0317687988281, 482.03131103515625, 491.03082275390625, 500.0303649902344, 509.0299072265625, 518.0294189453125, 527.0289916992188, 536.0285034179688, 545.0280151367188, 554.027587890625, 563.027099609375]}, "gradients/decoder.transformer.h.8.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 2.0, 2.0, 2.0, 2.0, 8.0, 7.0, 9.0, 6.0, 10.0, 16.0, 15.0, 28.0, 22.0, 24.0, 22.0, 29.0, 39.0, 28.0, 33.0, 34.0, 41.0, 39.0, 46.0, 42.0, 44.0, 50.0, 37.0, 39.0, 32.0, 32.0, 33.0, 34.0, 26.0, 22.0, 17.0, 23.0, 16.0, 15.0, 18.0, 14.0, 12.0, 12.0, 6.0, 5.0, 6.0, 9.0, 2.0, 1.0, 0.0, 1.0, 4.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-53.73203659057617, -51.995750427246094, -50.25946044921875, -48.52317428588867, -46.786888122558594, -45.05059814453125, -43.31431198120117, -41.578025817871094, -39.84173583984375, -38.10544967651367, -36.36915969848633, -34.63287353515625, -32.896583557128906, -31.160297393798828, -29.42401123046875, -27.68772315979004, -25.951435089111328, -24.215147018432617, -22.478858947753906, -20.742572784423828, -19.006284713745117, -17.269996643066406, -15.533709526062012, -13.797422409057617, -12.061134338378906, -10.324846267700195, -8.5885591506958, -6.852271556854248, -5.115983963012695, -3.3796958923339844, -1.6434087753295898, 0.09287834167480469, 1.8291702270507812, 3.565457820892334, 5.301745414733887, 7.0380330085754395, 8.774320602416992, 10.510608673095703, 12.246895790100098, 13.983182907104492, 15.719470977783203, 17.455759048461914, 19.192047119140625, 20.928333282470703, 22.664621353149414, 24.400909423828125, 26.137195587158203, 27.873483657836914, 29.609771728515625, 31.346059799194336, 33.08234786987305, 34.818634033203125, 36.55492401123047, 38.29121017456055, 40.027496337890625, 41.76378631591797, 43.50007247924805, 45.236358642578125, 46.97264862060547, 48.70893478393555, 50.445220947265625, 52.18151092529297, 53.91779708862305, 55.654083251953125, 57.39037322998047]}, "gradients/decoder.transformer.h.7.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 2.0, 1.0, 0.0, 1.0, 1.0, 2.0, 5.0, 2.0, 4.0, 5.0, 5.0, 6.0, 9.0, 18.0, 11.0, 10.0, 17.0, 18.0, 20.0, 27.0, 28.0, 36.0, 28.0, 43.0, 38.0, 40.0, 30.0, 37.0, 44.0, 39.0, 39.0, 41.0, 32.0, 40.0, 42.0, 31.0, 35.0, 28.0, 29.0, 19.0, 20.0, 19.0, 26.0, 17.0, 10.0, 16.0, 9.0, 7.0, 9.0, 4.0, 6.0, 1.0, 1.0, 5.0, 3.0, 0.0, 0.0, 1.0, 0.0, 4.0, 1.0], "bins": [-6.3515625, -6.15716552734375, -5.9627685546875, -5.76837158203125, -5.573974609375, -5.37957763671875, -5.1851806640625, -4.99078369140625, -4.79638671875, -4.60198974609375, -4.4075927734375, -4.21319580078125, -4.018798828125, -3.82440185546875, -3.6300048828125, -3.43560791015625, -3.2412109375, -3.04681396484375, -2.8524169921875, -2.65802001953125, -2.463623046875, -2.26922607421875, -2.0748291015625, -1.88043212890625, -1.68603515625, -1.49163818359375, -1.2972412109375, -1.10284423828125, -0.908447265625, -0.71405029296875, -0.5196533203125, -0.32525634765625, -0.130859375, 0.06353759765625, 0.2579345703125, 0.45233154296875, 0.646728515625, 0.84112548828125, 1.0355224609375, 1.22991943359375, 1.42431640625, 1.61871337890625, 1.8131103515625, 2.00750732421875, 2.201904296875, 2.39630126953125, 2.5906982421875, 2.78509521484375, 2.9794921875, 3.17388916015625, 3.3682861328125, 3.56268310546875, 3.757080078125, 3.95147705078125, 4.1458740234375, 4.34027099609375, 4.53466796875, 4.72906494140625, 4.9234619140625, 5.11785888671875, 5.312255859375, 5.50665283203125, 5.7010498046875, 5.89544677734375, 6.08984375]}, "gradients/decoder.transformer.h.7.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 4.0, 1.0, 2.0, 2.0, 4.0, 5.0, 4.0, 5.0, 4.0, 8.0, 7.0, 9.0, 20.0, 12.0, 21.0, 21.0, 29.0, 30.0, 59.0, 91.0, 168.0, 366.0, 915.0, 3072.0, 13985.0, 83005.0, 568183.0, 2215719.0, 1106301.0, 168220.0, 26234.0, 5288.0, 1412.0, 492.0, 227.0, 104.0, 59.0, 34.0, 30.0, 19.0, 23.0, 19.0, 15.0, 13.0, 9.0, 11.0, 5.0, 7.0, 4.0, 3.0, 7.0, 6.0, 2.0, 2.0, 4.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-12.2109375, -11.8255615234375, -11.440185546875, -11.0548095703125, -10.66943359375, -10.2840576171875, -9.898681640625, -9.5133056640625, -9.1279296875, -8.7425537109375, -8.357177734375, -7.9718017578125, -7.58642578125, -7.2010498046875, -6.815673828125, -6.4302978515625, -6.044921875, -5.6595458984375, -5.274169921875, -4.8887939453125, -4.50341796875, -4.1180419921875, -3.732666015625, -3.3472900390625, -2.9619140625, -2.5765380859375, -2.191162109375, -1.8057861328125, -1.42041015625, -1.0350341796875, -0.649658203125, -0.2642822265625, 0.12109375, 0.5064697265625, 0.891845703125, 1.2772216796875, 1.66259765625, 2.0479736328125, 2.433349609375, 2.8187255859375, 3.2041015625, 3.5894775390625, 3.974853515625, 4.3602294921875, 4.74560546875, 5.1309814453125, 5.516357421875, 5.9017333984375, 6.287109375, 6.6724853515625, 7.057861328125, 7.4432373046875, 7.82861328125, 8.2139892578125, 8.599365234375, 8.9847412109375, 9.3701171875, 9.7554931640625, 10.140869140625, 10.5262451171875, 10.91162109375, 11.2969970703125, 11.682373046875, 12.0677490234375, 12.453125]}, "gradients/decoder.transformer.h.7.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 5.0, 0.0, 2.0, 3.0, 5.0, 13.0, 8.0, 6.0, 21.0, 22.0, 34.0, 34.0, 37.0, 52.0, 77.0, 102.0, 108.0, 158.0, 204.0, 265.0, 315.0, 357.0, 404.0, 354.0, 312.0, 278.0, 208.0, 149.0, 137.0, 101.0, 63.0, 59.0, 42.0, 31.0, 30.0, 27.0, 20.0, 10.0, 3.0, 5.0, 8.0, 5.0, 6.0, 4.0, 0.0, 1.0, 2.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-8.6015625, -8.3101806640625, -8.018798828125, -7.7274169921875, -7.43603515625, -7.1446533203125, -6.853271484375, -6.5618896484375, -6.2705078125, -5.9791259765625, -5.687744140625, -5.3963623046875, -5.10498046875, -4.8135986328125, -4.522216796875, -4.2308349609375, -3.939453125, -3.6480712890625, -3.356689453125, -3.0653076171875, -2.77392578125, -2.4825439453125, -2.191162109375, -1.8997802734375, -1.6083984375, -1.3170166015625, -1.025634765625, -0.7342529296875, -0.44287109375, -0.1514892578125, 0.139892578125, 0.4312744140625, 0.72265625, 1.0140380859375, 1.305419921875, 1.5968017578125, 1.88818359375, 2.1795654296875, 2.470947265625, 2.7623291015625, 3.0537109375, 3.3450927734375, 3.636474609375, 3.9278564453125, 4.21923828125, 4.5106201171875, 4.802001953125, 5.0933837890625, 5.384765625, 5.6761474609375, 5.967529296875, 6.2589111328125, 6.55029296875, 6.8416748046875, 7.133056640625, 7.4244384765625, 7.7158203125, 8.0072021484375, 8.298583984375, 8.5899658203125, 8.88134765625, 9.1727294921875, 9.464111328125, 9.7554931640625, 10.046875]}, "gradients/decoder.transformer.h.7.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 3.0, 0.0, 3.0, 2.0, 5.0, 7.0, 6.0, 8.0, 12.0, 19.0, 23.0, 34.0, 45.0, 56.0, 66.0, 97.0, 131.0, 195.0, 256.0, 418.0, 932.0, 4918.0, 77169.0, 2333270.0, 1715316.0, 55251.0, 3887.0, 807.0, 382.0, 267.0, 166.0, 125.0, 104.0, 84.0, 62.0, 44.0, 27.0, 21.0, 23.0, 16.0, 9.0, 5.0, 10.0, 7.0, 4.0, 1.0, 3.0, 2.0, 0.0, 0.0, 1.0], "bins": [-36.9375, -35.937744140625, -34.93798828125, -33.938232421875, -32.9384765625, -31.938720703125, -30.93896484375, -29.939208984375, -28.939453125, -27.939697265625, -26.93994140625, -25.940185546875, -24.9404296875, -23.940673828125, -22.94091796875, -21.941162109375, -20.94140625, -19.941650390625, -18.94189453125, -17.942138671875, -16.9423828125, -15.942626953125, -14.94287109375, -13.943115234375, -12.943359375, -11.943603515625, -10.94384765625, -9.944091796875, -8.9443359375, -7.944580078125, -6.94482421875, -5.945068359375, -4.9453125, -3.945556640625, -2.94580078125, -1.946044921875, -0.9462890625, 0.053466796875, 1.05322265625, 2.052978515625, 3.052734375, 4.052490234375, 5.05224609375, 6.052001953125, 7.0517578125, 8.051513671875, 9.05126953125, 10.051025390625, 11.05078125, 12.050537109375, 13.05029296875, 14.050048828125, 15.0498046875, 16.049560546875, 17.04931640625, 18.049072265625, 19.048828125, 20.048583984375, 21.04833984375, 22.048095703125, 23.0478515625, 24.047607421875, 25.04736328125, 26.047119140625, 27.046875]}, "gradients/decoder.transformer.h.7.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 22.0, 196.0, 448.0, 296.0, 49.0, 5.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-219.59033203125, -212.85984802246094, -206.1293487548828, -199.39886474609375, -192.66836547851562, -185.93788146972656, -179.2073974609375, -172.47689819335938, -165.74639892578125, -159.0159149169922, -152.28541564941406, -145.554931640625, -138.82443237304688, -132.0939483642578, -125.36345672607422, -118.63296508789062, -111.90248107910156, -105.17198944091797, -98.44149780273438, -91.71101379394531, -84.98051452636719, -78.25003051757812, -71.51953887939453, -64.78904724121094, -58.058555603027344, -51.32806396484375, -44.597572326660156, -37.86708450317383, -31.136592864990234, -24.40610122680664, -17.675613403320312, -10.945121765136719, -4.2146148681640625, 2.515875816345215, 9.246366500854492, 15.976856231689453, 22.707347869873047, 29.43783950805664, 36.16832733154297, 42.89881896972656, 49.629310607910156, 56.35980224609375, 63.090293884277344, 69.82078552246094, 76.55126953125, 83.28176879882812, 90.01225280761719, 96.74274444580078, 103.47323608398438, 110.20372772216797, 116.93421936035156, 123.66470336914062, 130.39520263671875, 137.1256866455078, 143.85617065429688, 150.586669921875, 157.31716918945312, 164.0476531982422, 170.7781524658203, 177.50863647460938, 184.2391357421875, 190.96961975097656, 197.70010375976562, 204.43060302734375, 211.1610870361328]}, "gradients/decoder.transformer.h.7.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 2.0, 3.0, 0.0, 5.0, 3.0, 6.0, 7.0, 3.0, 13.0, 11.0, 10.0, 25.0, 15.0, 22.0, 16.0, 18.0, 26.0, 29.0, 35.0, 30.0, 36.0, 49.0, 40.0, 43.0, 35.0, 43.0, 36.0, 30.0, 29.0, 35.0, 36.0, 41.0, 46.0, 39.0, 21.0, 26.0, 19.0, 28.0, 22.0, 17.0, 18.0, 9.0, 6.0, 10.0, 3.0, 5.0, 3.0, 4.0, 3.0, 0.0, 1.0, 1.0, 0.0, 3.0, 1.0, 2.0, 1.0], "bins": [-35.08148956298828, -33.992820739746094, -32.904151916503906, -31.81548500061035, -30.726818084716797, -29.63814926147461, -28.549480438232422, -27.460813522338867, -26.372146606445312, -25.283477783203125, -24.19481086730957, -23.106142044067383, -22.017475128173828, -20.92880630493164, -19.840137481689453, -18.7514705657959, -17.66280174255371, -16.574132919311523, -15.485466003417969, -14.396797180175781, -13.308130264282227, -12.219461441040039, -11.130793571472168, -10.042125701904297, -8.953457832336426, -7.864789962768555, -6.776122093200684, -5.687453746795654, -4.598785877227783, -3.510118007659912, -2.421449661254883, -1.3327817916870117, -0.24411392211914062, 0.84455406665802, 1.9332220554351807, 3.021890163421631, 4.110558032989502, 5.199225902557373, 6.287894248962402, 7.376562118530273, 8.465229988098145, 9.553897857666016, 10.642565727233887, 11.731233596801758, 12.819902420043945, 13.9085693359375, 14.997238159179688, 16.085906982421875, 17.17457389831543, 18.263242721557617, 19.351909637451172, 20.44057846069336, 21.529245376586914, 22.6179141998291, 23.706581115722656, 24.795249938964844, 25.88391876220703, 26.97258758544922, 28.061254501342773, 29.14992332458496, 30.238590240478516, 31.327259063720703, 32.41592788696289, 33.50459289550781, 34.59326171875]}, "gradients/decoder.transformer.h.7.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 0.0, 1.0, 3.0, 3.0, 1.0, 3.0, 4.0, 2.0, 3.0, 8.0, 25.0, 13.0, 11.0, 21.0, 14.0, 26.0, 26.0, 22.0, 19.0, 47.0, 31.0, 30.0, 33.0, 47.0, 39.0, 31.0, 44.0, 53.0, 37.0, 46.0, 40.0, 33.0, 34.0, 30.0, 30.0, 28.0, 21.0, 18.0, 18.0, 21.0, 14.0, 13.0, 20.0, 9.0, 10.0, 3.0, 6.0, 4.0, 9.0, 1.0, 4.0, 1.0, 2.0, 2.0, 3.0, 0.0, 1.0], "bins": [-6.45703125, -6.2664794921875, -6.075927734375, -5.8853759765625, -5.69482421875, -5.5042724609375, -5.313720703125, -5.1231689453125, -4.9326171875, -4.7420654296875, -4.551513671875, -4.3609619140625, -4.17041015625, -3.9798583984375, -3.789306640625, -3.5987548828125, -3.408203125, -3.2176513671875, -3.027099609375, -2.8365478515625, -2.64599609375, -2.4554443359375, -2.264892578125, -2.0743408203125, -1.8837890625, -1.6932373046875, -1.502685546875, -1.3121337890625, -1.12158203125, -0.9310302734375, -0.740478515625, -0.5499267578125, -0.359375, -0.1688232421875, 0.021728515625, 0.2122802734375, 0.40283203125, 0.5933837890625, 0.783935546875, 0.9744873046875, 1.1650390625, 1.3555908203125, 1.546142578125, 1.7366943359375, 1.92724609375, 2.1177978515625, 2.308349609375, 2.4989013671875, 2.689453125, 2.8800048828125, 3.070556640625, 3.2611083984375, 3.45166015625, 3.6422119140625, 3.832763671875, 4.0233154296875, 4.2138671875, 4.4044189453125, 4.594970703125, 4.7855224609375, 4.97607421875, 5.1666259765625, 5.357177734375, 5.5477294921875, 5.73828125]}, "gradients/decoder.transformer.h.7.crossattention.c_proj.weight": {"_type": "histogram", "values": [2.0, 2.0, 0.0, 4.0, 3.0, 4.0, 10.0, 15.0, 17.0, 24.0, 45.0, 60.0, 114.0, 166.0, 218.0, 352.0, 545.0, 841.0, 1181.0, 1817.0, 2808.0, 4172.0, 6139.0, 9230.0, 14070.0, 21483.0, 32464.0, 49304.0, 72841.0, 103788.0, 137073.0, 150790.0, 132778.0, 99880.0, 69216.0, 46506.0, 30704.0, 20140.0, 13417.0, 8944.0, 5871.0, 3968.0, 2489.0, 1732.0, 1126.0, 792.0, 441.0, 330.0, 216.0, 155.0, 80.0, 69.0, 48.0, 29.0, 21.0, 14.0, 9.0, 6.0, 6.0, 3.0, 0.0, 1.0, 1.0, 2.0], "bins": [-0.7177734375, -0.6949386596679688, -0.6721038818359375, -0.6492691040039062, -0.626434326171875, -0.6035995483398438, -0.5807647705078125, -0.5579299926757812, -0.53509521484375, -0.5122604370117188, -0.4894256591796875, -0.46659088134765625, -0.443756103515625, -0.42092132568359375, -0.3980865478515625, -0.37525177001953125, -0.3524169921875, -0.32958221435546875, -0.3067474365234375, -0.28391265869140625, -0.261077880859375, -0.23824310302734375, -0.2154083251953125, -0.19257354736328125, -0.16973876953125, -0.14690399169921875, -0.1240692138671875, -0.10123443603515625, -0.078399658203125, -0.05556488037109375, -0.0327301025390625, -0.00989532470703125, 0.012939453125, 0.03577423095703125, 0.0586090087890625, 0.08144378662109375, 0.104278564453125, 0.12711334228515625, 0.1499481201171875, 0.17278289794921875, 0.19561767578125, 0.21845245361328125, 0.2412872314453125, 0.26412200927734375, 0.286956787109375, 0.30979156494140625, 0.3326263427734375, 0.35546112060546875, 0.3782958984375, 0.40113067626953125, 0.4239654541015625, 0.44680023193359375, 0.469635009765625, 0.49246978759765625, 0.5153045654296875, 0.5381393432617188, 0.56097412109375, 0.5838088989257812, 0.6066436767578125, 0.6294784545898438, 0.652313232421875, 0.6751480102539062, 0.6979827880859375, 0.7208175659179688, 0.74365234375]}, "gradients/decoder.transformer.h.7.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 4.0, 1.0, 8.0, 3.0, 5.0, 9.0, 10.0, 4.0, 16.0, 17.0, 24.0, 19.0, 22.0, 27.0, 26.0, 42.0, 44.0, 40.0, 42.0, 49.0, 46.0, 51.0, 1088.0, 47.0, 56.0, 37.0, 47.0, 23.0, 28.0, 33.0, 33.0, 24.0, 20.0, 25.0, 14.0, 16.0, 3.0, 8.0, 10.0, 7.0, 3.0, 3.0, 1.0, 1.0, 0.0, 1.0, 0.0, 3.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-4.91796875, -4.76983642578125, -4.6217041015625, -4.47357177734375, -4.325439453125, -4.17730712890625, -4.0291748046875, -3.88104248046875, -3.73291015625, -3.58477783203125, -3.4366455078125, -3.28851318359375, -3.140380859375, -2.99224853515625, -2.8441162109375, -2.69598388671875, -2.5478515625, -2.39971923828125, -2.2515869140625, -2.10345458984375, -1.955322265625, -1.80718994140625, -1.6590576171875, -1.51092529296875, -1.36279296875, -1.21466064453125, -1.0665283203125, -0.91839599609375, -0.770263671875, -0.62213134765625, -0.4739990234375, -0.32586669921875, -0.177734375, -0.02960205078125, 0.1185302734375, 0.26666259765625, 0.414794921875, 0.56292724609375, 0.7110595703125, 0.85919189453125, 1.00732421875, 1.15545654296875, 1.3035888671875, 1.45172119140625, 1.599853515625, 1.74798583984375, 1.8961181640625, 2.04425048828125, 2.1923828125, 2.34051513671875, 2.4886474609375, 2.63677978515625, 2.784912109375, 2.93304443359375, 3.0811767578125, 3.22930908203125, 3.37744140625, 3.52557373046875, 3.6737060546875, 3.82183837890625, 3.969970703125, 4.11810302734375, 4.2662353515625, 4.41436767578125, 4.5625]}, "gradients/decoder.transformer.h.7.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 3.0, 0.0, 2.0, 2.0, 3.0, 3.0, 6.0, 7.0, 15.0, 17.0, 28.0, 40.0, 66.0, 76.0, 117.0, 203.0, 373.0, 517.0, 838.0, 1446.0, 2378.0, 4080.0, 6723.0, 11375.0, 19488.0, 33038.0, 54983.0, 87873.0, 131004.0, 424500.0, 957678.0, 131831.0, 89594.0, 56198.0, 33639.0, 19938.0, 11695.0, 7136.0, 4064.0, 2421.0, 1434.0, 864.0, 524.0, 343.0, 208.0, 128.0, 82.0, 60.0, 47.0, 17.0, 11.0, 10.0, 9.0, 2.0, 4.0, 2.0, 4.0, 2.0, 0.0, 2.0], "bins": [-0.59521484375, -0.5777206420898438, -0.5602264404296875, -0.5427322387695312, -0.525238037109375, -0.5077438354492188, -0.4902496337890625, -0.47275543212890625, -0.45526123046875, -0.43776702880859375, -0.4202728271484375, -0.40277862548828125, -0.385284423828125, -0.36779022216796875, -0.3502960205078125, -0.33280181884765625, -0.3153076171875, -0.29781341552734375, -0.2803192138671875, -0.26282501220703125, -0.245330810546875, -0.22783660888671875, -0.2103424072265625, -0.19284820556640625, -0.17535400390625, -0.15785980224609375, -0.1403656005859375, -0.12287139892578125, -0.105377197265625, -0.08788299560546875, -0.0703887939453125, -0.05289459228515625, -0.035400390625, -0.01790618896484375, -0.0004119873046875, 0.01708221435546875, 0.034576416015625, 0.05207061767578125, 0.0695648193359375, 0.08705902099609375, 0.10455322265625, 0.12204742431640625, 0.1395416259765625, 0.15703582763671875, 0.174530029296875, 0.19202423095703125, 0.2095184326171875, 0.22701263427734375, 0.2445068359375, 0.26200103759765625, 0.2794952392578125, 0.29698944091796875, 0.314483642578125, 0.33197784423828125, 0.3494720458984375, 0.36696624755859375, 0.38446044921875, 0.40195465087890625, 0.4194488525390625, 0.43694305419921875, 0.454437255859375, 0.47193145751953125, 0.4894256591796875, 0.5069198608398438, 0.5244140625]}, "gradients/decoder.transformer.h.7.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 3.0, 0.0, 1.0, 0.0, 3.0, 3.0, 4.0, 2.0, 3.0, 4.0, 7.0, 6.0, 4.0, 2.0, 4.0, 8.0, 12.0, 15.0, 15.0, 17.0, 23.0, 18.0, 19.0, 30.0, 41.0, 23.0, 38.0, 46.0, 73.0, 77.0, 71.0, 72.0, 48.0, 53.0, 33.0, 29.0, 32.0, 23.0, 23.0, 20.0, 13.0, 15.0, 16.0, 10.0, 11.0, 9.0, 6.0, 9.0, 6.0, 7.0, 4.0, 4.0, 2.0, 1.0, 1.0, 0.0, 2.0], "bins": [-0.0120086669921875, -0.011686563491821289, -0.011364459991455078, -0.011042356491088867, -0.010720252990722656, -0.010398149490356445, -0.010076045989990234, -0.009753942489624023, -0.009431838989257812, -0.009109735488891602, -0.00878763198852539, -0.00846552848815918, -0.008143424987792969, -0.007821321487426758, -0.007499217987060547, -0.007177114486694336, -0.006855010986328125, -0.006532907485961914, -0.006210803985595703, -0.005888700485229492, -0.005566596984863281, -0.00524449348449707, -0.004922389984130859, -0.0046002864837646484, -0.0042781829833984375, -0.0039560794830322266, -0.0036339759826660156, -0.0033118724822998047, -0.0029897689819335938, -0.002667665481567383, -0.002345561981201172, -0.002023458480834961, -0.00170135498046875, -0.001379251480102539, -0.0010571479797363281, -0.0007350444793701172, -0.00041294097900390625, -9.083747863769531e-05, 0.00023126602172851562, 0.0005533695220947266, 0.0008754730224609375, 0.0011975765228271484, 0.0015196800231933594, 0.0018417835235595703, 0.0021638870239257812, 0.002485990524291992, 0.002808094024658203, 0.003130197525024414, 0.003452301025390625, 0.003774404525756836, 0.004096508026123047, 0.004418611526489258, 0.004740715026855469, 0.00506281852722168, 0.005384922027587891, 0.0057070255279541016, 0.0060291290283203125, 0.0063512325286865234, 0.006673336029052734, 0.006995439529418945, 0.007317543029785156, 0.007639646530151367, 0.007961750030517578, 0.008283853530883789, 0.00860595703125]}, "gradients/decoder.transformer.h.7.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 2.0, 5.0, 6.0, 5.0, 5.0, 9.0, 10.0, 14.0, 17.0, 16.0, 25.0, 31.0, 45.0, 52.0, 85.0, 88.0, 170.0, 269.0, 510.0, 4242.0, 1028796.0, 12550.0, 666.0, 291.0, 181.0, 127.0, 82.0, 50.0, 39.0, 42.0, 22.0, 32.0, 20.0, 8.0, 10.0, 6.0, 6.0, 5.0, 8.0, 4.0, 2.0, 1.0, 1.0, 4.0, 5.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.20263671875, -0.19501876831054688, -0.18740081787109375, -0.17978286743164062, -0.1721649169921875, -0.16454696655273438, -0.15692901611328125, -0.14931106567382812, -0.141693115234375, -0.13407516479492188, -0.12645721435546875, -0.11883926391601562, -0.1112213134765625, -0.10360336303710938, -0.09598541259765625, -0.08836746215820312, -0.08074951171875, -0.07313156127929688, -0.06551361083984375, -0.057895660400390625, -0.0502777099609375, -0.042659759521484375, -0.03504180908203125, -0.027423858642578125, -0.019805908203125, -0.012187957763671875, -0.00457000732421875, 0.003047943115234375, 0.0106658935546875, 0.018283843994140625, 0.02590179443359375, 0.033519744873046875, 0.0411376953125, 0.048755645751953125, 0.05637359619140625, 0.06399154663085938, 0.0716094970703125, 0.07922744750976562, 0.08684539794921875, 0.09446334838867188, 0.102081298828125, 0.10969924926757812, 0.11731719970703125, 0.12493515014648438, 0.1325531005859375, 0.14017105102539062, 0.14778900146484375, 0.15540695190429688, 0.16302490234375, 0.17064285278320312, 0.17826080322265625, 0.18587875366210938, 0.1934967041015625, 0.20111465454101562, 0.20873260498046875, 0.21635055541992188, 0.223968505859375, 0.23158645629882812, 0.23920440673828125, 0.24682235717773438, 0.2544403076171875, 0.2620582580566406, 0.26967620849609375, 0.2772941589355469, 0.284912109375]}, "gradients/decoder.transformer.h.7.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 153.0, 865.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.09518353641033173, -0.08733966946601868, -0.07949580997228622, -0.07165194302797318, -0.06380808353424072, -0.05596421658992767, -0.048120349645614624, -0.04027648642659187, -0.03243262320756912, -0.02458875998854637, -0.01674489490687847, -0.008901029825210571, -0.0010571666061878204, 0.00678669661283493, 0.01463056355714798, 0.02247442677617073, 0.03031828999519348, 0.03816215321421623, 0.04600601643323898, 0.05384988337755203, 0.06169374659657478, 0.06953760981559753, 0.07738147675991058, 0.08522534370422363, 0.09306920319795609, 0.10091307014226913, 0.10875692963600159, 0.11660079658031464, 0.12444466352462769, 0.13228851556777954, 0.14013239741325378, 0.14797624945640564, 0.1558201014995575, 0.16366396844387054, 0.1715078353881836, 0.17935168743133545, 0.1871955543756485, 0.19503942131996155, 0.2028832882642746, 0.21072715520858765, 0.2185710072517395, 0.22641487419605255, 0.2342587411403656, 0.24210259318351746, 0.2499464601278305, 0.25779032707214355, 0.2656341791152954, 0.27347806096076965, 0.2813219428062439, 0.28916579484939575, 0.29700967669487, 0.30485352873802185, 0.3126974105834961, 0.32054126262664795, 0.3283851146697998, 0.33622899651527405, 0.3440728485584259, 0.35191670060157776, 0.359760582447052, 0.36760443449020386, 0.3754483163356781, 0.38329216837882996, 0.3911360502243042, 0.39897990226745605, 0.4068237543106079]}, "gradients/decoder.transformer.h.7.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 2.0, 1.0, 7.0, 15.0, 9.0, 17.0, 16.0, 26.0, 20.0, 31.0, 51.0, 39.0, 41.0, 44.0, 69.0, 55.0, 60.0, 60.0, 65.0, 35.0, 60.0, 62.0, 41.0, 38.0, 26.0, 28.0, 21.0, 14.0, 22.0, 13.0, 10.0, 5.0, 4.0, 6.0, 2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.019495010375976562, -0.01893671602010727, -0.018378421664237976, -0.017820127308368683, -0.01726183295249939, -0.016703538596630096, -0.016145244240760803, -0.01558694988489151, -0.015028655529022217, -0.014470361173152924, -0.01391206681728363, -0.013353772461414337, -0.012795478105545044, -0.01223718374967575, -0.011678889393806458, -0.011120595037937164, -0.010562300682067871, -0.010004006326198578, -0.009445711970329285, -0.008887417614459991, -0.008329123258590698, -0.007770828902721405, -0.007212534546852112, -0.006654240190982819, -0.006095945835113525, -0.005537651479244232, -0.004979357123374939, -0.004421062767505646, -0.0038627684116363525, -0.0033044740557670593, -0.002746179699897766, -0.002187885344028473, -0.0016295909881591797, -0.0010712966322898865, -0.0005130022764205933, 4.529207944869995e-05, 0.0006035864353179932, 0.0011618807911872864, 0.0017201751470565796, 0.002278469502925873, 0.002836763858795166, 0.0033950582146644592, 0.0039533525705337524, 0.004511646926403046, 0.005069941282272339, 0.005628235638141632, 0.006186529994010925, 0.0067448243498802185, 0.007303118705749512, 0.007861413061618805, 0.008419707417488098, 0.008978001773357391, 0.009536296129226685, 0.010094590485095978, 0.010652884840965271, 0.011211179196834564, 0.011769473552703857, 0.01232776790857315, 0.012886062264442444, 0.013444356620311737, 0.01400265097618103, 0.014560945332050323, 0.015119239687919617, 0.01567753404378891, 0.016235828399658203]}, "gradients/decoder.transformer.h.7.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 0.0, 1.0, 3.0, 3.0, 1.0, 3.0, 4.0, 2.0, 3.0, 8.0, 25.0, 13.0, 11.0, 21.0, 14.0, 25.0, 27.0, 22.0, 19.0, 48.0, 30.0, 30.0, 33.0, 47.0, 39.0, 31.0, 44.0, 53.0, 37.0, 46.0, 40.0, 33.0, 34.0, 30.0, 30.0, 28.0, 21.0, 18.0, 18.0, 21.0, 14.0, 13.0, 20.0, 9.0, 10.0, 3.0, 6.0, 4.0, 9.0, 1.0, 4.0, 1.0, 2.0, 2.0, 3.0, 0.0, 1.0], "bins": [-6.45703125, -6.2664794921875, -6.075927734375, -5.8853759765625, -5.69482421875, -5.5042724609375, -5.313720703125, -5.1231689453125, -4.9326171875, -4.7420654296875, -4.551513671875, -4.3609619140625, -4.17041015625, -3.9798583984375, -3.789306640625, -3.5987548828125, -3.408203125, -3.2176513671875, -3.027099609375, -2.8365478515625, -2.64599609375, -2.4554443359375, -2.264892578125, -2.0743408203125, -1.8837890625, -1.6932373046875, -1.502685546875, -1.3121337890625, -1.12158203125, -0.9310302734375, -0.740478515625, -0.5499267578125, -0.359375, -0.1688232421875, 0.021728515625, 0.2122802734375, 0.40283203125, 0.5933837890625, 0.783935546875, 0.9744873046875, 1.1650390625, 1.3555908203125, 1.546142578125, 1.7366943359375, 1.92724609375, 2.1177978515625, 2.308349609375, 2.4989013671875, 2.689453125, 2.8800048828125, 3.070556640625, 3.2611083984375, 3.45166015625, 3.6422119140625, 3.832763671875, 4.0233154296875, 4.2138671875, 4.4044189453125, 4.594970703125, 4.7855224609375, 4.97607421875, 5.1666259765625, 5.357177734375, 5.5477294921875, 5.73828125]}, "gradients/decoder.transformer.h.7.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 3.0, 1.0, 1.0, 2.0, 2.0, 1.0, 4.0, 3.0, 3.0, 7.0, 9.0, 18.0, 23.0, 24.0, 28.0, 31.0, 34.0, 69.0, 74.0, 103.0, 156.0, 203.0, 359.0, 673.0, 1693.0, 4465.0, 12782.0, 42175.0, 197723.0, 562156.0, 169808.0, 37061.0, 11640.0, 3910.0, 1562.0, 679.0, 323.0, 203.0, 123.0, 113.0, 70.0, 67.0, 32.0, 26.0, 27.0, 17.0, 17.0, 21.0, 16.0, 7.0, 5.0, 8.0, 5.0, 1.0, 1.0, 4.0, 2.0, 0.0, 1.0, 1.0], "bins": [-11.953125, -11.5958251953125, -11.238525390625, -10.8812255859375, -10.52392578125, -10.1666259765625, -9.809326171875, -9.4520263671875, -9.0947265625, -8.7374267578125, -8.380126953125, -8.0228271484375, -7.66552734375, -7.3082275390625, -6.950927734375, -6.5936279296875, -6.236328125, -5.8790283203125, -5.521728515625, -5.1644287109375, -4.80712890625, -4.4498291015625, -4.092529296875, -3.7352294921875, -3.3779296875, -3.0206298828125, -2.663330078125, -2.3060302734375, -1.94873046875, -1.5914306640625, -1.234130859375, -0.8768310546875, -0.51953125, -0.1622314453125, 0.195068359375, 0.5523681640625, 0.90966796875, 1.2669677734375, 1.624267578125, 1.9815673828125, 2.3388671875, 2.6961669921875, 3.053466796875, 3.4107666015625, 3.76806640625, 4.1253662109375, 4.482666015625, 4.8399658203125, 5.197265625, 5.5545654296875, 5.911865234375, 6.2691650390625, 6.62646484375, 6.9837646484375, 7.341064453125, 7.6983642578125, 8.0556640625, 8.4129638671875, 8.770263671875, 9.1275634765625, 9.48486328125, 9.8421630859375, 10.199462890625, 10.5567626953125, 10.9140625]}, "gradients/decoder.transformer.h.7.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 1.0, 1.0, 1.0, 3.0, 6.0, 4.0, 8.0, 8.0, 16.0, 14.0, 17.0, 19.0, 22.0, 26.0, 28.0, 29.0, 29.0, 45.0, 48.0, 51.0, 59.0, 118.0, 253.0, 1444.0, 269.0, 124.0, 56.0, 52.0, 46.0, 36.0, 36.0, 32.0, 27.0, 28.0, 24.0, 19.0, 8.0, 13.0, 8.0, 9.0, 6.0, 8.0, 5.0, 4.0, 1.0, 1.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-21.5, -20.860595703125, -20.22119140625, -19.581787109375, -18.9423828125, -18.302978515625, -17.66357421875, -17.024169921875, -16.384765625, -15.745361328125, -15.10595703125, -14.466552734375, -13.8271484375, -13.187744140625, -12.54833984375, -11.908935546875, -11.26953125, -10.630126953125, -9.99072265625, -9.351318359375, -8.7119140625, -8.072509765625, -7.43310546875, -6.793701171875, -6.154296875, -5.514892578125, -4.87548828125, -4.236083984375, -3.5966796875, -2.957275390625, -2.31787109375, -1.678466796875, -1.0390625, -0.399658203125, 0.23974609375, 0.879150390625, 1.5185546875, 2.157958984375, 2.79736328125, 3.436767578125, 4.076171875, 4.715576171875, 5.35498046875, 5.994384765625, 6.6337890625, 7.273193359375, 7.91259765625, 8.552001953125, 9.19140625, 9.830810546875, 10.47021484375, 11.109619140625, 11.7490234375, 12.388427734375, 13.02783203125, 13.667236328125, 14.306640625, 14.946044921875, 15.58544921875, 16.224853515625, 16.8642578125, 17.503662109375, 18.14306640625, 18.782470703125, 19.421875]}, "gradients/decoder.transformer.h.7.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 2.0, 2.0, 0.0, 4.0, 2.0, 3.0, 4.0, 5.0, 9.0, 9.0, 11.0, 15.0, 16.0, 19.0, 22.0, 25.0, 40.0, 58.0, 53.0, 95.0, 139.0, 230.0, 423.0, 1332.0, 12472.0, 1972207.0, 1146177.0, 9972.0, 1169.0, 422.0, 241.0, 134.0, 79.0, 69.0, 44.0, 49.0, 29.0, 31.0, 20.0, 18.0, 18.0, 15.0, 12.0, 8.0, 7.0, 2.0, 2.0, 1.0, 4.0, 0.0, 3.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-42.4375, -41.02099609375, -39.6044921875, -38.18798828125, -36.771484375, -35.35498046875, -33.9384765625, -32.52197265625, -31.10546875, -29.68896484375, -28.2724609375, -26.85595703125, -25.439453125, -24.02294921875, -22.6064453125, -21.18994140625, -19.7734375, -18.35693359375, -16.9404296875, -15.52392578125, -14.107421875, -12.69091796875, -11.2744140625, -9.85791015625, -8.44140625, -7.02490234375, -5.6083984375, -4.19189453125, -2.775390625, -1.35888671875, 0.0576171875, 1.47412109375, 2.890625, 4.30712890625, 5.7236328125, 7.14013671875, 8.556640625, 9.97314453125, 11.3896484375, 12.80615234375, 14.22265625, 15.63916015625, 17.0556640625, 18.47216796875, 19.888671875, 21.30517578125, 22.7216796875, 24.13818359375, 25.5546875, 26.97119140625, 28.3876953125, 29.80419921875, 31.220703125, 32.63720703125, 34.0537109375, 35.47021484375, 36.88671875, 38.30322265625, 39.7197265625, 41.13623046875, 42.552734375, 43.96923828125, 45.3857421875, 46.80224609375, 48.21875]}, "gradients/decoder.transformer.h.7.ln_1.weight": {"_type": "histogram", "values": [14.0, 203.0, 575.0, 192.0, 33.0, 3.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-12.258474349975586, -7.544036388397217, -2.8295984268188477, 1.8848400115966797, 6.599277496337891, 11.313714981079102, 16.028154373168945, 20.742589950561523, 25.457029342651367, 30.171466827392578, 34.88590621948242, 39.600341796875, 44.314781188964844, 49.02922058105469, 53.74365997314453, 58.458091735839844, 63.17253112792969, 67.88697052001953, 72.60140991210938, 77.31584167480469, 82.03028106689453, 86.74472045898438, 91.45915985107422, 96.17359924316406, 100.8880386352539, 105.60247802734375, 110.3169174194336, 115.03135681152344, 119.74578857421875, 124.4602279663086, 129.17466735839844, 133.88909912109375, 138.60353088378906, 143.31796264648438, 148.03240966796875, 152.74684143066406, 157.46128845214844, 162.17572021484375, 166.89016723632812, 171.60459899902344, 176.31903076171875, 181.03346252441406, 185.74790954589844, 190.46234130859375, 195.17678833007812, 199.89122009277344, 204.60565185546875, 209.32009887695312, 214.0345458984375, 218.7489776611328, 223.4634246826172, 228.1778564453125, 232.89230346679688, 237.6067352294922, 242.3211669921875, 247.03561401367188, 251.7500457763672, 256.4644775390625, 261.1789245605469, 265.89337158203125, 270.6077880859375, 275.3222351074219, 280.03668212890625, 284.7510986328125, 289.4655456542969]}, "gradients/decoder.transformer.h.7.ln_1.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 1.0, 4.0, 4.0, 5.0, 3.0, 3.0, 10.0, 6.0, 18.0, 17.0, 20.0, 17.0, 24.0, 33.0, 18.0, 33.0, 35.0, 31.0, 41.0, 44.0, 37.0, 37.0, 42.0, 40.0, 41.0, 33.0, 39.0, 34.0, 26.0, 37.0, 24.0, 31.0, 33.0, 30.0, 29.0, 25.0, 16.0, 17.0, 13.0, 14.0, 10.0, 5.0, 9.0, 7.0, 5.0, 6.0, 3.0, 4.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-52.386573791503906, -50.8758430480957, -49.3651123046875, -47.85438537597656, -46.34365463256836, -44.832923889160156, -43.32219696044922, -41.811466217041016, -40.30073547363281, -38.79000473022461, -37.279273986816406, -35.76854705810547, -34.257816314697266, -32.74708557128906, -31.236356735229492, -29.725627899169922, -28.21489715576172, -26.704166412353516, -25.193437576293945, -23.682708740234375, -22.171977996826172, -20.66124725341797, -19.1505184173584, -17.639789581298828, -16.129058837890625, -14.618329048156738, -13.107599258422852, -11.596869468688965, -10.086139678955078, -8.575409889221191, -7.064680099487305, -5.553950309753418, -4.043224334716797, -2.53249454498291, -1.0217647552490234, 0.4889650344848633, 1.99969482421875, 3.5104246139526367, 5.021154403686523, 6.53188419342041, 8.042613983154297, 9.553343772888184, 11.06407356262207, 12.574803352355957, 14.085533142089844, 15.59626293182373, 17.106992721557617, 18.617721557617188, 20.12845230102539, 21.639183044433594, 23.149911880493164, 24.660640716552734, 26.171371459960938, 27.68210220336914, 29.19283103942871, 30.70355987548828, 32.214290618896484, 33.72502136230469, 35.235748291015625, 36.74647903442383, 38.25720977783203, 39.767940521240234, 41.27867126464844, 42.789398193359375, 44.30012893676758]}, "gradients/decoder.transformer.h.6.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 3.0, 0.0, 3.0, 2.0, 0.0, 1.0, 4.0, 2.0, 6.0, 6.0, 3.0, 15.0, 10.0, 11.0, 13.0, 16.0, 25.0, 21.0, 23.0, 33.0, 34.0, 24.0, 32.0, 33.0, 38.0, 36.0, 44.0, 41.0, 45.0, 41.0, 37.0, 34.0, 47.0, 42.0, 32.0, 30.0, 25.0, 35.0, 27.0, 18.0, 16.0, 20.0, 19.0, 12.0, 7.0, 6.0, 8.0, 12.0, 6.0, 3.0, 5.0, 3.0, 3.0, 2.0, 3.0, 1.0, 2.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.41015625, -6.20208740234375, -5.9940185546875, -5.78594970703125, -5.577880859375, -5.36981201171875, -5.1617431640625, -4.95367431640625, -4.74560546875, -4.53753662109375, -4.3294677734375, -4.12139892578125, -3.913330078125, -3.70526123046875, -3.4971923828125, -3.28912353515625, -3.0810546875, -2.87298583984375, -2.6649169921875, -2.45684814453125, -2.248779296875, -2.04071044921875, -1.8326416015625, -1.62457275390625, -1.41650390625, -1.20843505859375, -1.0003662109375, -0.79229736328125, -0.584228515625, -0.37615966796875, -0.1680908203125, 0.03997802734375, 0.248046875, 0.45611572265625, 0.6641845703125, 0.87225341796875, 1.080322265625, 1.28839111328125, 1.4964599609375, 1.70452880859375, 1.91259765625, 2.12066650390625, 2.3287353515625, 2.53680419921875, 2.744873046875, 2.95294189453125, 3.1610107421875, 3.36907958984375, 3.5771484375, 3.78521728515625, 3.9932861328125, 4.20135498046875, 4.409423828125, 4.61749267578125, 4.8255615234375, 5.03363037109375, 5.24169921875, 5.44976806640625, 5.6578369140625, 5.86590576171875, 6.073974609375, 6.28204345703125, 6.4901123046875, 6.69818115234375, 6.90625]}, "gradients/decoder.transformer.h.6.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 5.0, 4.0, 0.0, 7.0, 4.0, 3.0, 8.0, 7.0, 8.0, 15.0, 18.0, 13.0, 18.0, 28.0, 37.0, 47.0, 67.0, 132.0, 270.0, 707.0, 2263.0, 8754.0, 43308.0, 258156.0, 1363427.0, 1936906.0, 482649.0, 77406.0, 14707.0, 3468.0, 981.0, 365.0, 170.0, 96.0, 55.0, 40.0, 36.0, 19.0, 8.0, 14.0, 17.0, 12.0, 8.0, 5.0, 5.0, 5.0, 7.0, 2.0, 4.0, 3.0, 2.0, 2.0, 1.0, 0.0, 0.0, 2.0], "bins": [-12.484375, -12.1080322265625, -11.731689453125, -11.3553466796875, -10.97900390625, -10.6026611328125, -10.226318359375, -9.8499755859375, -9.4736328125, -9.0972900390625, -8.720947265625, -8.3446044921875, -7.96826171875, -7.5919189453125, -7.215576171875, -6.8392333984375, -6.462890625, -6.0865478515625, -5.710205078125, -5.3338623046875, -4.95751953125, -4.5811767578125, -4.204833984375, -3.8284912109375, -3.4521484375, -3.0758056640625, -2.699462890625, -2.3231201171875, -1.94677734375, -1.5704345703125, -1.194091796875, -0.8177490234375, -0.44140625, -0.0650634765625, 0.311279296875, 0.6876220703125, 1.06396484375, 1.4403076171875, 1.816650390625, 2.1929931640625, 2.5693359375, 2.9456787109375, 3.322021484375, 3.6983642578125, 4.07470703125, 4.4510498046875, 4.827392578125, 5.2037353515625, 5.580078125, 5.9564208984375, 6.332763671875, 6.7091064453125, 7.08544921875, 7.4617919921875, 7.838134765625, 8.2144775390625, 8.5908203125, 8.9671630859375, 9.343505859375, 9.7198486328125, 10.09619140625, 10.4725341796875, 10.848876953125, 11.2252197265625, 11.6015625]}, "gradients/decoder.transformer.h.6.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 3.0, 1.0, 0.0, 1.0, 0.0, 1.0, 2.0, 1.0, 0.0, 3.0, 2.0, 7.0, 7.0, 10.0, 12.0, 14.0, 31.0, 30.0, 33.0, 65.0, 67.0, 80.0, 115.0, 171.0, 222.0, 273.0, 337.0, 391.0, 401.0, 432.0, 359.0, 256.0, 183.0, 135.0, 113.0, 85.0, 62.0, 40.0, 39.0, 26.0, 9.0, 23.0, 13.0, 8.0, 7.0, 2.0, 8.0, 2.0, 4.0, 2.0, 0.0, 3.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-10.21875, -9.8831787109375, -9.547607421875, -9.2120361328125, -8.87646484375, -8.5408935546875, -8.205322265625, -7.8697509765625, -7.5341796875, -7.1986083984375, -6.863037109375, -6.5274658203125, -6.19189453125, -5.8563232421875, -5.520751953125, -5.1851806640625, -4.849609375, -4.5140380859375, -4.178466796875, -3.8428955078125, -3.50732421875, -3.1717529296875, -2.836181640625, -2.5006103515625, -2.1650390625, -1.8294677734375, -1.493896484375, -1.1583251953125, -0.82275390625, -0.4871826171875, -0.151611328125, 0.1839599609375, 0.51953125, 0.8551025390625, 1.190673828125, 1.5262451171875, 1.86181640625, 2.1973876953125, 2.532958984375, 2.8685302734375, 3.2041015625, 3.5396728515625, 3.875244140625, 4.2108154296875, 4.54638671875, 4.8819580078125, 5.217529296875, 5.5531005859375, 5.888671875, 6.2242431640625, 6.559814453125, 6.8953857421875, 7.23095703125, 7.5665283203125, 7.902099609375, 8.2376708984375, 8.5732421875, 8.9088134765625, 9.244384765625, 9.5799560546875, 9.91552734375, 10.2510986328125, 10.586669921875, 10.9222412109375, 11.2578125]}, "gradients/decoder.transformer.h.6.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 2.0, 1.0, 1.0, 0.0, 2.0, 1.0, 2.0, 2.0, 3.0, 0.0, 2.0, 4.0, 11.0, 8.0, 14.0, 26.0, 26.0, 30.0, 49.0, 59.0, 79.0, 100.0, 139.0, 243.0, 347.0, 560.0, 1266.0, 4628.0, 31289.0, 395907.0, 3098244.0, 607835.0, 44020.0, 6021.0, 1492.0, 647.0, 360.0, 250.0, 144.0, 126.0, 96.0, 51.0, 55.0, 46.0, 33.0, 17.0, 15.0, 11.0, 8.0, 5.0, 3.0, 5.0, 6.0, 2.0, 0.0, 0.0, 0.0, 0.0, 4.0, 2.0, 1.0, 1.0, 2.0], "bins": [-24.359375, -23.587890625, -22.81640625, -22.044921875, -21.2734375, -20.501953125, -19.73046875, -18.958984375, -18.1875, -17.416015625, -16.64453125, -15.873046875, -15.1015625, -14.330078125, -13.55859375, -12.787109375, -12.015625, -11.244140625, -10.47265625, -9.701171875, -8.9296875, -8.158203125, -7.38671875, -6.615234375, -5.84375, -5.072265625, -4.30078125, -3.529296875, -2.7578125, -1.986328125, -1.21484375, -0.443359375, 0.328125, 1.099609375, 1.87109375, 2.642578125, 3.4140625, 4.185546875, 4.95703125, 5.728515625, 6.5, 7.271484375, 8.04296875, 8.814453125, 9.5859375, 10.357421875, 11.12890625, 11.900390625, 12.671875, 13.443359375, 14.21484375, 14.986328125, 15.7578125, 16.529296875, 17.30078125, 18.072265625, 18.84375, 19.615234375, 20.38671875, 21.158203125, 21.9296875, 22.701171875, 23.47265625, 24.244140625, 25.015625]}, "gradients/decoder.transformer.h.6.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 5.0, 7.0, 12.0, 24.0, 48.0, 81.0, 123.0, 134.0, 134.0, 142.0, 125.0, 86.0, 50.0, 26.0, 7.0, 5.0, 5.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-76.61207580566406, -74.57588195800781, -72.5396957397461, -70.50350189208984, -68.46731567382812, -66.43112182617188, -64.39492797851562, -62.358741760253906, -60.322547912597656, -58.28635787963867, -56.25016784667969, -54.21397399902344, -52.17778396606445, -50.14159393310547, -48.105403900146484, -46.0692138671875, -44.033023834228516, -41.99683380126953, -39.96064376831055, -37.92445373535156, -35.88825988769531, -33.85206985473633, -31.815879821777344, -29.77968978881836, -27.743497848510742, -25.707307815551758, -23.67111587524414, -21.634925842285156, -19.598735809326172, -17.562543869018555, -15.52635383605957, -13.49016284942627, -11.453975677490234, -9.417784690856934, -7.381594181060791, -5.345403671264648, -3.3092126846313477, -1.2730216979980469, 0.7631683349609375, 2.7993593215942383, 4.835550308227539, 6.87174129486084, 8.90793228149414, 10.944122314453125, 12.980313301086426, 15.016504287719727, 17.05269432067871, 19.088886260986328, 21.125076293945312, 23.161266326904297, 25.197458267211914, 27.2336483001709, 29.269840240478516, 31.3060302734375, 33.342220306396484, 35.37841033935547, 37.41460418701172, 39.4507942199707, 41.48698425292969, 43.52317810058594, 45.55936813354492, 47.595558166503906, 49.63174819946289, 51.667938232421875, 53.70412826538086]}, "gradients/decoder.transformer.h.6.ln_2.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 2.0, 2.0, 10.0, 4.0, 3.0, 3.0, 12.0, 5.0, 9.0, 18.0, 20.0, 24.0, 27.0, 32.0, 23.0, 29.0, 42.0, 32.0, 38.0, 48.0, 40.0, 52.0, 36.0, 61.0, 43.0, 42.0, 52.0, 42.0, 27.0, 32.0, 32.0, 26.0, 22.0, 17.0, 18.0, 14.0, 17.0, 11.0, 13.0, 12.0, 6.0, 2.0, 4.0, 2.0, 3.0, 1.0, 2.0, 3.0, 0.0, 2.0, 1.0], "bins": [-47.48520278930664, -46.13641357421875, -44.787620544433594, -43.43882751464844, -42.09003829956055, -40.741249084472656, -39.3924560546875, -38.043663024902344, -36.69487380981445, -35.34608459472656, -33.997291564941406, -32.64849853515625, -31.29970932006836, -29.950918197631836, -28.602127075195312, -27.25333595275879, -25.904544830322266, -24.555753707885742, -23.20696258544922, -21.858171463012695, -20.509380340576172, -19.16058921813965, -17.811798095703125, -16.4630069732666, -15.114215850830078, -13.765424728393555, -12.416633605957031, -11.067842483520508, -9.719051361083984, -8.370260238647461, -7.0214691162109375, -5.672677993774414, -4.323883056640625, -2.9750919342041016, -1.6263008117675781, -0.2775096893310547, 1.0712814331054688, 2.420072555541992, 3.7688636779785156, 5.117654800415039, 6.4664459228515625, 7.815237045288086, 9.16402816772461, 10.512819290161133, 11.861610412597656, 13.21040153503418, 14.559192657470703, 15.907983779907227, 17.25677490234375, 18.605566024780273, 19.954357147216797, 21.30314826965332, 22.651939392089844, 24.000730514526367, 25.34952163696289, 26.698312759399414, 28.047103881835938, 29.39589500427246, 30.744686126708984, 32.093475341796875, 33.44226837158203, 34.79106140136719, 36.13985061645508, 37.48863983154297, 38.837432861328125]}, "gradients/decoder.transformer.h.6.crossattention.c_proj.bias": {"_type": "histogram", "values": [2.0, 2.0, 2.0, 3.0, 4.0, 3.0, 5.0, 1.0, 6.0, 6.0, 11.0, 8.0, 11.0, 13.0, 12.0, 17.0, 16.0, 33.0, 23.0, 28.0, 30.0, 29.0, 29.0, 32.0, 41.0, 34.0, 40.0, 49.0, 41.0, 38.0, 38.0, 38.0, 35.0, 26.0, 32.0, 40.0, 34.0, 20.0, 25.0, 26.0, 18.0, 20.0, 17.0, 9.0, 13.0, 13.0, 7.0, 8.0, 7.0, 6.0, 4.0, 0.0, 5.0, 2.0, 3.0, 3.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-5.88671875, -5.6824951171875, -5.478271484375, -5.2740478515625, -5.06982421875, -4.8656005859375, -4.661376953125, -4.4571533203125, -4.2529296875, -4.0487060546875, -3.844482421875, -3.6402587890625, -3.43603515625, -3.2318115234375, -3.027587890625, -2.8233642578125, -2.619140625, -2.4149169921875, -2.210693359375, -2.0064697265625, -1.80224609375, -1.5980224609375, -1.393798828125, -1.1895751953125, -0.9853515625, -0.7811279296875, -0.576904296875, -0.3726806640625, -0.16845703125, 0.0357666015625, 0.239990234375, 0.4442138671875, 0.6484375, 0.8526611328125, 1.056884765625, 1.2611083984375, 1.46533203125, 1.6695556640625, 1.873779296875, 2.0780029296875, 2.2822265625, 2.4864501953125, 2.690673828125, 2.8948974609375, 3.09912109375, 3.3033447265625, 3.507568359375, 3.7117919921875, 3.916015625, 4.1202392578125, 4.324462890625, 4.5286865234375, 4.73291015625, 4.9371337890625, 5.141357421875, 5.3455810546875, 5.5498046875, 5.7540283203125, 5.958251953125, 6.1624755859375, 6.36669921875, 6.5709228515625, 6.775146484375, 6.9793701171875, 7.18359375]}, "gradients/decoder.transformer.h.6.crossattention.c_proj.weight": {"_type": "histogram", "values": [2.0, 3.0, 1.0, 2.0, 1.0, 5.0, 6.0, 10.0, 18.0, 20.0, 41.0, 59.0, 97.0, 157.0, 239.0, 336.0, 587.0, 913.0, 1484.0, 2244.0, 3569.0, 5782.0, 9419.0, 14786.0, 23659.0, 37595.0, 58168.0, 87554.0, 124296.0, 155491.0, 155375.0, 122578.0, 86556.0, 57791.0, 36968.0, 23464.0, 14617.0, 9259.0, 5712.0, 3628.0, 2305.0, 1331.0, 945.0, 542.0, 331.0, 241.0, 120.0, 91.0, 66.0, 34.0, 31.0, 16.0, 12.0, 3.0, 5.0, 7.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.73779296875, -0.7131881713867188, -0.6885833740234375, -0.6639785766601562, -0.639373779296875, -0.6147689819335938, -0.5901641845703125, -0.5655593872070312, -0.54095458984375, -0.5163497924804688, -0.4917449951171875, -0.46714019775390625, -0.442535400390625, -0.41793060302734375, -0.3933258056640625, -0.36872100830078125, -0.3441162109375, -0.31951141357421875, -0.2949066162109375, -0.27030181884765625, -0.245697021484375, -0.22109222412109375, -0.1964874267578125, -0.17188262939453125, -0.14727783203125, -0.12267303466796875, -0.0980682373046875, -0.07346343994140625, -0.048858642578125, -0.02425384521484375, 0.0003509521484375, 0.02495574951171875, 0.049560546875, 0.07416534423828125, 0.0987701416015625, 0.12337493896484375, 0.147979736328125, 0.17258453369140625, 0.1971893310546875, 0.22179412841796875, 0.24639892578125, 0.27100372314453125, 0.2956085205078125, 0.32021331787109375, 0.344818115234375, 0.36942291259765625, 0.3940277099609375, 0.41863250732421875, 0.4432373046875, 0.46784210205078125, 0.4924468994140625, 0.5170516967773438, 0.541656494140625, 0.5662612915039062, 0.5908660888671875, 0.6154708862304688, 0.64007568359375, 0.6646804809570312, 0.6892852783203125, 0.7138900756835938, 0.738494873046875, 0.7630996704101562, 0.7877044677734375, 0.8123092651367188, 0.8369140625]}, "gradients/decoder.transformer.h.6.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 3.0, 0.0, 1.0, 3.0, 3.0, 1.0, 2.0, 2.0, 1.0, 5.0, 4.0, 7.0, 12.0, 12.0, 12.0, 11.0, 15.0, 17.0, 13.0, 16.0, 24.0, 28.0, 33.0, 22.0, 43.0, 36.0, 52.0, 39.0, 44.0, 44.0, 1063.0, 35.0, 38.0, 31.0, 49.0, 27.0, 31.0, 31.0, 35.0, 25.0, 32.0, 18.0, 21.0, 22.0, 12.0, 18.0, 6.0, 10.0, 8.0, 7.0, 1.0, 3.0, 2.0, 6.0, 3.0, 2.0, 2.0, 2.0, 0.0, 0.0, 1.0, 1.0], "bins": [-4.70703125, -4.56219482421875, -4.4173583984375, -4.27252197265625, -4.127685546875, -3.98284912109375, -3.8380126953125, -3.69317626953125, -3.54833984375, -3.40350341796875, -3.2586669921875, -3.11383056640625, -2.968994140625, -2.82415771484375, -2.6793212890625, -2.53448486328125, -2.3896484375, -2.24481201171875, -2.0999755859375, -1.95513916015625, -1.810302734375, -1.66546630859375, -1.5206298828125, -1.37579345703125, -1.23095703125, -1.08612060546875, -0.9412841796875, -0.79644775390625, -0.651611328125, -0.50677490234375, -0.3619384765625, -0.21710205078125, -0.072265625, 0.07257080078125, 0.2174072265625, 0.36224365234375, 0.507080078125, 0.65191650390625, 0.7967529296875, 0.94158935546875, 1.08642578125, 1.23126220703125, 1.3760986328125, 1.52093505859375, 1.665771484375, 1.81060791015625, 1.9554443359375, 2.10028076171875, 2.2451171875, 2.38995361328125, 2.5347900390625, 2.67962646484375, 2.824462890625, 2.96929931640625, 3.1141357421875, 3.25897216796875, 3.40380859375, 3.54864501953125, 3.6934814453125, 3.83831787109375, 3.983154296875, 4.12799072265625, 4.2728271484375, 4.41766357421875, 4.5625]}, "gradients/decoder.transformer.h.6.crossattention.c_attn.weight": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 2.0, 2.0, 0.0, 2.0, 4.0, 10.0, 11.0, 13.0, 34.0, 41.0, 52.0, 96.0, 139.0, 212.0, 292.0, 463.0, 711.0, 991.0, 1731.0, 2577.0, 3938.0, 6224.0, 9622.0, 15243.0, 23582.0, 35791.0, 54172.0, 79040.0, 109199.0, 139143.0, 1191127.0, 126779.0, 96189.0, 68230.0, 46423.0, 30116.0, 19737.0, 12619.0, 8038.0, 5200.0, 3232.0, 2108.0, 1349.0, 850.0, 622.0, 386.0, 260.0, 167.0, 124.0, 81.0, 57.0, 37.0, 23.0, 29.0, 7.0, 8.0, 5.0, 0.0, 3.0, 4.0, 2.0], "bins": [-0.533203125, -0.5171890258789062, -0.5011749267578125, -0.48516082763671875, -0.469146728515625, -0.45313262939453125, -0.4371185302734375, -0.42110443115234375, -0.40509033203125, -0.38907623291015625, -0.3730621337890625, -0.35704803466796875, -0.341033935546875, -0.32501983642578125, -0.3090057373046875, -0.29299163818359375, -0.2769775390625, -0.26096343994140625, -0.2449493408203125, -0.22893524169921875, -0.212921142578125, -0.19690704345703125, -0.1808929443359375, -0.16487884521484375, -0.14886474609375, -0.13285064697265625, -0.1168365478515625, -0.10082244873046875, -0.084808349609375, -0.06879425048828125, -0.0527801513671875, -0.03676605224609375, -0.020751953125, -0.00473785400390625, 0.0112762451171875, 0.02729034423828125, 0.043304443359375, 0.05931854248046875, 0.0753326416015625, 0.09134674072265625, 0.10736083984375, 0.12337493896484375, 0.1393890380859375, 0.15540313720703125, 0.171417236328125, 0.18743133544921875, 0.2034454345703125, 0.21945953369140625, 0.2354736328125, 0.25148773193359375, 0.2675018310546875, 0.28351593017578125, 0.299530029296875, 0.31554412841796875, 0.3315582275390625, 0.34757232666015625, 0.36358642578125, 0.37960052490234375, 0.3956146240234375, 0.41162872314453125, 0.427642822265625, 0.44365692138671875, 0.4596710205078125, 0.47568511962890625, 0.49169921875]}, "gradients/decoder.transformer.h.6.crossattention.q_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0, 2.0, 3.0, 2.0, 2.0, 1.0, 4.0, 1.0, 5.0, 3.0, 8.0, 8.0, 10.0, 10.0, 10.0, 14.0, 10.0, 15.0, 28.0, 24.0, 24.0, 34.0, 51.0, 47.0, 68.0, 86.0, 102.0, 85.0, 79.0, 51.0, 26.0, 35.0, 36.0, 20.0, 16.0, 17.0, 9.0, 12.0, 11.0, 9.0, 7.0, 7.0, 6.0, 3.0, 3.0, 2.0, 2.0, 2.0, 3.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.0109100341796875, -0.010610103607177734, -0.010310173034667969, -0.010010242462158203, -0.009710311889648438, -0.009410381317138672, -0.009110450744628906, -0.00881052017211914, -0.008510589599609375, -0.00821065902709961, -0.007910728454589844, -0.007610797882080078, -0.0073108673095703125, -0.007010936737060547, -0.006711006164550781, -0.006411075592041016, -0.00611114501953125, -0.005811214447021484, -0.005511283874511719, -0.005211353302001953, -0.0049114227294921875, -0.004611492156982422, -0.004311561584472656, -0.004011631011962891, -0.003711700439453125, -0.0034117698669433594, -0.0031118392944335938, -0.002811908721923828, -0.0025119781494140625, -0.002212047576904297, -0.0019121170043945312, -0.0016121864318847656, -0.001312255859375, -0.0010123252868652344, -0.0007123947143554688, -0.0004124641418457031, -0.0001125335693359375, 0.00018739700317382812, 0.00048732757568359375, 0.0007872581481933594, 0.001087188720703125, 0.0013871192932128906, 0.0016870498657226562, 0.001986980438232422, 0.0022869110107421875, 0.002586841583251953, 0.0028867721557617188, 0.0031867027282714844, 0.00348663330078125, 0.0037865638732910156, 0.004086494445800781, 0.004386425018310547, 0.0046863555908203125, 0.004986286163330078, 0.005286216735839844, 0.005586147308349609, 0.005886077880859375, 0.006186008453369141, 0.006485939025878906, 0.006785869598388672, 0.0070858001708984375, 0.007385730743408203, 0.007685661315917969, 0.007985591888427734, 0.0082855224609375]}, "gradients/decoder.transformer.h.6.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 3.0, 2.0, 0.0, 0.0, 5.0, 5.0, 4.0, 6.0, 9.0, 12.0, 13.0, 17.0, 20.0, 19.0, 33.0, 48.0, 65.0, 94.0, 139.0, 200.0, 456.0, 1834.0, 725911.0, 317146.0, 1423.0, 384.0, 219.0, 133.0, 81.0, 65.0, 49.0, 38.0, 28.0, 15.0, 11.0, 15.0, 15.0, 7.0, 6.0, 10.0, 3.0, 6.0, 4.0, 1.0, 5.0, 1.0, 2.0, 3.0, 1.0, 1.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0], "bins": [-0.1739501953125, -0.16772842407226562, -0.16150665283203125, -0.15528488159179688, -0.1490631103515625, -0.14284133911132812, -0.13661956787109375, -0.13039779663085938, -0.124176025390625, -0.11795425415039062, -0.11173248291015625, -0.10551071166992188, -0.0992889404296875, -0.09306716918945312, -0.08684539794921875, -0.08062362670898438, -0.07440185546875, -0.06818008422851562, -0.06195831298828125, -0.055736541748046875, -0.0495147705078125, -0.043292999267578125, -0.03707122802734375, -0.030849456787109375, -0.024627685546875, -0.018405914306640625, -0.01218414306640625, -0.005962371826171875, 0.0002593994140625, 0.006481170654296875, 0.01270294189453125, 0.018924713134765625, 0.025146484375, 0.031368255615234375, 0.03759002685546875, 0.043811798095703125, 0.0500335693359375, 0.056255340576171875, 0.06247711181640625, 0.06869888305664062, 0.074920654296875, 0.08114242553710938, 0.08736419677734375, 0.09358596801757812, 0.0998077392578125, 0.10602951049804688, 0.11225128173828125, 0.11847305297851562, 0.12469482421875, 0.13091659545898438, 0.13713836669921875, 0.14336013793945312, 0.1495819091796875, 0.15580368041992188, 0.16202545166015625, 0.16824722290039062, 0.174468994140625, 0.18069076538085938, 0.18691253662109375, 0.19313430786132812, 0.1993560791015625, 0.20557785034179688, 0.21179962158203125, 0.21802139282226562, 0.2242431640625]}, "gradients/decoder.transformer.h.6.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 19.0, 926.0, 69.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.07837981730699539, -0.07418414950370789, -0.06998848170042038, -0.06579281389713287, -0.06159714609384537, -0.05740147829055786, -0.05320580676198006, -0.04901013895869255, -0.044814471155405045, -0.04061880335211754, -0.03642313554883003, -0.03222746402025223, -0.02803179807960987, -0.023836130276322365, -0.01964046061038971, -0.015444792807102203, -0.011249125003814697, -0.007053456734865904, -0.0028577884659171104, 0.0013378802686929703, 0.005533548071980476, 0.009729215875267982, 0.013924885541200638, 0.018120553344488144, 0.02231622114777565, 0.026511888951063156, 0.030707556754350662, 0.03490322828292847, 0.03909889608621597, 0.04329456388950348, 0.047490231692790985, 0.05168589949607849, 0.0558815598487854, 0.060077227652072906, 0.06427289545536041, 0.06846856325864792, 0.07266423106193542, 0.07685989886522293, 0.08105556666851044, 0.08525124192237854, 0.08944690227508545, 0.09364257007837296, 0.09783823788166046, 0.10203390568494797, 0.10622957348823547, 0.11042524129152298, 0.11462090909481049, 0.11881658434867859, 0.1230122521519661, 0.1272079199552536, 0.1314035952091217, 0.1355992555618286, 0.13979493081569672, 0.14399059116840363, 0.14818626642227173, 0.15238192677497864, 0.15657760202884674, 0.16077327728271484, 0.16496893763542175, 0.16916461288928986, 0.17336027324199677, 0.17755594849586487, 0.18175160884857178, 0.18594728410243988, 0.1901429444551468]}, "gradients/decoder.transformer.h.6.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 2.0, 1.0, 4.0, 4.0, 2.0, 2.0, 3.0, 8.0, 14.0, 18.0, 12.0, 25.0, 23.0, 32.0, 31.0, 39.0, 64.0, 50.0, 66.0, 45.0, 63.0, 54.0, 50.0, 52.0, 45.0, 41.0, 41.0, 36.0, 31.0, 23.0, 31.0, 27.0, 11.0, 18.0, 16.0, 10.0, 9.0, 7.0, 2.0, 3.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0095747709274292, -0.009224273264408112, -0.008873775601387024, -0.008523277938365936, -0.008172780275344849, -0.007822282612323761, -0.007471784949302673, -0.007121287286281586, -0.006770789623260498, -0.00642029196023941, -0.006069794297218323, -0.005719296634197235, -0.0053687989711761475, -0.00501830130815506, -0.004667803645133972, -0.0043173059821128845, -0.003966808319091797, -0.0036163106560707092, -0.0032658129930496216, -0.002915315330028534, -0.0025648176670074463, -0.0022143200039863586, -0.001863822340965271, -0.0015133246779441833, -0.0011628270149230957, -0.0008123293519020081, -0.0004618316888809204, -0.00011133402585983276, 0.00023916363716125488, 0.0005896613001823425, 0.0009401589632034302, 0.0012906566262245178, 0.0016411542892456055, 0.001991651952266693, 0.0023421496152877808, 0.0026926472783088684, 0.003043144941329956, 0.0033936426043510437, 0.0037441402673721313, 0.004094637930393219, 0.004445135593414307, 0.004795633256435394, 0.005146130919456482, 0.00549662858247757, 0.005847126245498657, 0.006197623908519745, 0.0065481215715408325, 0.00689861923456192, 0.007249116897583008, 0.0075996145606040955, 0.007950112223625183, 0.00830060988664627, 0.008651107549667358, 0.009001605212688446, 0.009352102875709534, 0.009702600538730621, 0.010053098201751709, 0.010403595864772797, 0.010754093527793884, 0.011104591190814972, 0.01145508885383606, 0.011805586516857147, 0.012156084179878235, 0.012506581842899323, 0.01285707950592041]}, "gradients/decoder.transformer.h.6.attn.c_proj.bias": {"_type": "histogram", "values": [2.0, 2.0, 2.0, 3.0, 4.0, 3.0, 5.0, 1.0, 6.0, 6.0, 11.0, 8.0, 11.0, 13.0, 12.0, 17.0, 16.0, 33.0, 23.0, 28.0, 30.0, 29.0, 29.0, 32.0, 41.0, 34.0, 39.0, 50.0, 41.0, 38.0, 38.0, 38.0, 35.0, 26.0, 32.0, 40.0, 34.0, 20.0, 25.0, 26.0, 18.0, 20.0, 17.0, 9.0, 13.0, 13.0, 7.0, 8.0, 7.0, 6.0, 4.0, 0.0, 5.0, 2.0, 3.0, 3.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-5.88671875, -5.6824951171875, -5.478271484375, -5.2740478515625, -5.06982421875, -4.8656005859375, -4.661376953125, -4.4571533203125, -4.2529296875, -4.0487060546875, -3.844482421875, -3.6402587890625, -3.43603515625, -3.2318115234375, -3.027587890625, -2.8233642578125, -2.619140625, -2.4149169921875, -2.210693359375, -2.0064697265625, -1.80224609375, -1.5980224609375, -1.393798828125, -1.1895751953125, -0.9853515625, -0.7811279296875, -0.576904296875, -0.3726806640625, -0.16845703125, 0.0357666015625, 0.239990234375, 0.4442138671875, 0.6484375, 0.8526611328125, 1.056884765625, 1.2611083984375, 1.46533203125, 1.6695556640625, 1.873779296875, 2.0780029296875, 2.2822265625, 2.4864501953125, 2.690673828125, 2.8948974609375, 3.09912109375, 3.3033447265625, 3.507568359375, 3.7117919921875, 3.916015625, 4.1202392578125, 4.324462890625, 4.5286865234375, 4.73291015625, 4.9371337890625, 5.141357421875, 5.3455810546875, 5.5498046875, 5.7540283203125, 5.958251953125, 6.1624755859375, 6.36669921875, 6.5709228515625, 6.775146484375, 6.9793701171875, 7.18359375]}, "gradients/decoder.transformer.h.6.attn.c_proj.weight": {"_type": "histogram", "values": [5.0, 1.0, 2.0, 4.0, 9.0, 7.0, 6.0, 9.0, 10.0, 21.0, 27.0, 22.0, 32.0, 54.0, 51.0, 62.0, 97.0, 134.0, 149.0, 200.0, 257.0, 346.0, 599.0, 1127.0, 2551.0, 7353.0, 22009.0, 85231.0, 686308.0, 188341.0, 34709.0, 11049.0, 3823.0, 1514.0, 787.0, 424.0, 296.0, 216.0, 136.0, 139.0, 94.0, 76.0, 67.0, 40.0, 29.0, 43.0, 23.0, 12.0, 16.0, 13.0, 13.0, 5.0, 7.0, 6.0, 2.0, 6.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-14.21875, -13.72314453125, -13.2275390625, -12.73193359375, -12.236328125, -11.74072265625, -11.2451171875, -10.74951171875, -10.25390625, -9.75830078125, -9.2626953125, -8.76708984375, -8.271484375, -7.77587890625, -7.2802734375, -6.78466796875, -6.2890625, -5.79345703125, -5.2978515625, -4.80224609375, -4.306640625, -3.81103515625, -3.3154296875, -2.81982421875, -2.32421875, -1.82861328125, -1.3330078125, -0.83740234375, -0.341796875, 0.15380859375, 0.6494140625, 1.14501953125, 1.640625, 2.13623046875, 2.6318359375, 3.12744140625, 3.623046875, 4.11865234375, 4.6142578125, 5.10986328125, 5.60546875, 6.10107421875, 6.5966796875, 7.09228515625, 7.587890625, 8.08349609375, 8.5791015625, 9.07470703125, 9.5703125, 10.06591796875, 10.5615234375, 11.05712890625, 11.552734375, 12.04833984375, 12.5439453125, 13.03955078125, 13.53515625, 14.03076171875, 14.5263671875, 15.02197265625, 15.517578125, 16.01318359375, 16.5087890625, 17.00439453125, 17.5]}, "gradients/decoder.transformer.h.6.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 5.0, 2.0, 3.0, 2.0, 7.0, 8.0, 4.0, 5.0, 8.0, 10.0, 12.0, 19.0, 19.0, 25.0, 32.0, 20.0, 25.0, 36.0, 40.0, 31.0, 40.0, 59.0, 79.0, 195.0, 1462.0, 313.0, 118.0, 79.0, 53.0, 51.0, 51.0, 35.0, 45.0, 26.0, 19.0, 26.0, 21.0, 18.0, 9.0, 9.0, 12.0, 3.0, 10.0, 4.0, 2.0, 7.0, 3.0, 2.0, 1.0, 3.0], "bins": [-24.9375, -24.295166015625, -23.65283203125, -23.010498046875, -22.3681640625, -21.725830078125, -21.08349609375, -20.441162109375, -19.798828125, -19.156494140625, -18.51416015625, -17.871826171875, -17.2294921875, -16.587158203125, -15.94482421875, -15.302490234375, -14.66015625, -14.017822265625, -13.37548828125, -12.733154296875, -12.0908203125, -11.448486328125, -10.80615234375, -10.163818359375, -9.521484375, -8.879150390625, -8.23681640625, -7.594482421875, -6.9521484375, -6.309814453125, -5.66748046875, -5.025146484375, -4.3828125, -3.740478515625, -3.09814453125, -2.455810546875, -1.8134765625, -1.171142578125, -0.52880859375, 0.113525390625, 0.755859375, 1.398193359375, 2.04052734375, 2.682861328125, 3.3251953125, 3.967529296875, 4.60986328125, 5.252197265625, 5.89453125, 6.536865234375, 7.17919921875, 7.821533203125, 8.4638671875, 9.106201171875, 9.74853515625, 10.390869140625, 11.033203125, 11.675537109375, 12.31787109375, 12.960205078125, 13.6025390625, 14.244873046875, 14.88720703125, 15.529541015625, 16.171875]}, "gradients/decoder.transformer.h.6.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 3.0, 0.0, 5.0, 4.0, 7.0, 4.0, 9.0, 11.0, 8.0, 12.0, 17.0, 12.0, 24.0, 31.0, 26.0, 41.0, 65.0, 70.0, 110.0, 161.0, 299.0, 692.0, 2622.0, 41013.0, 3006496.0, 88260.0, 3859.0, 851.0, 350.0, 171.0, 101.0, 75.0, 55.0, 46.0, 49.0, 29.0, 23.0, 24.0, 15.0, 14.0, 17.0, 10.0, 7.0, 8.0, 4.0, 3.0, 2.0, 3.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-45.59375, -43.87939453125, -42.1650390625, -40.45068359375, -38.736328125, -37.02197265625, -35.3076171875, -33.59326171875, -31.87890625, -30.16455078125, -28.4501953125, -26.73583984375, -25.021484375, -23.30712890625, -21.5927734375, -19.87841796875, -18.1640625, -16.44970703125, -14.7353515625, -13.02099609375, -11.306640625, -9.59228515625, -7.8779296875, -6.16357421875, -4.44921875, -2.73486328125, -1.0205078125, 0.69384765625, 2.408203125, 4.12255859375, 5.8369140625, 7.55126953125, 9.265625, 10.97998046875, 12.6943359375, 14.40869140625, 16.123046875, 17.83740234375, 19.5517578125, 21.26611328125, 22.98046875, 24.69482421875, 26.4091796875, 28.12353515625, 29.837890625, 31.55224609375, 33.2666015625, 34.98095703125, 36.6953125, 38.40966796875, 40.1240234375, 41.83837890625, 43.552734375, 45.26708984375, 46.9814453125, 48.69580078125, 50.41015625, 52.12451171875, 53.8388671875, 55.55322265625, 57.267578125, 58.98193359375, 60.6962890625, 62.41064453125, 64.125]}, "gradients/decoder.transformer.h.6.ln_1.weight": {"_type": "histogram", "values": [7.0, 81.0, 356.0, 417.0, 137.0, 18.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-13.232892990112305, -9.023740768432617, -4.814587593078613, -0.6054344177246094, 3.603717803955078, 7.812870025634766, 12.022024154663086, 16.231176376342773, 20.44032859802246, 24.64948081970215, 28.85863494873047, 33.067787170410156, 37.276939392089844, 41.48609161376953, 45.69524383544922, 49.904396057128906, 54.113548278808594, 58.32270050048828, 62.53185272216797, 66.74100494384766, 70.95015716552734, 75.15930938720703, 79.36846923828125, 83.57762145996094, 87.78677368164062, 91.99592590332031, 96.205078125, 100.41423034667969, 104.62338256835938, 108.83253479003906, 113.04168701171875, 117.25083923339844, 121.46000671386719, 125.66915893554688, 129.87831115722656, 134.08746337890625, 138.29661560058594, 142.50576782226562, 146.7149200439453, 150.924072265625, 155.1332244873047, 159.34237670898438, 163.55152893066406, 167.76068115234375, 171.96983337402344, 176.17898559570312, 180.3881378173828, 184.5972900390625, 188.80645751953125, 193.01560974121094, 197.22476196289062, 201.4339141845703, 205.64306640625, 209.8522186279297, 214.06137084960938, 218.27052307128906, 222.47967529296875, 226.68882751464844, 230.89797973632812, 235.1071319580078, 239.3162841796875, 243.5254364013672, 247.73458862304688, 251.94374084472656, 256.15289306640625]}, "gradients/decoder.transformer.h.6.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 5.0, 2.0, 2.0, 4.0, 1.0, 9.0, 4.0, 8.0, 7.0, 7.0, 10.0, 17.0, 16.0, 23.0, 16.0, 24.0, 24.0, 30.0, 27.0, 28.0, 34.0, 35.0, 38.0, 32.0, 40.0, 36.0, 38.0, 41.0, 36.0, 41.0, 30.0, 30.0, 36.0, 26.0, 34.0, 20.0, 29.0, 16.0, 26.0, 18.0, 20.0, 27.0, 15.0, 13.0, 10.0, 10.0, 8.0, 3.0, 3.0, 5.0, 0.0, 2.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-47.96725082397461, -46.43487548828125, -44.902503967285156, -43.3701286315918, -41.83775329589844, -40.30537796020508, -38.77300262451172, -37.240631103515625, -35.708255767822266, -34.175880432128906, -32.64350891113281, -31.111133575439453, -29.578758239746094, -28.046382904052734, -26.514009475708008, -24.98163604736328, -23.449260711669922, -21.916885375976562, -20.384511947631836, -18.85213851928711, -17.31976318359375, -15.787388801574707, -14.255014419555664, -12.722640037536621, -11.190265655517578, -9.657891273498535, -8.125516891479492, -6.593142509460449, -5.060768127441406, -3.5283937454223633, -1.9960193634033203, -0.46364498138427734, 1.0687255859375, 2.601099967956543, 4.133474349975586, 5.665848731994629, 7.198223114013672, 8.730597496032715, 10.262971878051758, 11.7953462600708, 13.327720642089844, 14.860095024108887, 16.39246940612793, 17.924842834472656, 19.457218170166016, 20.989593505859375, 22.5219669342041, 24.054340362548828, 25.586715698242188, 27.119091033935547, 28.651464462280273, 30.183837890625, 31.71621322631836, 33.24858856201172, 34.78096008300781, 36.31333541870117, 37.84571075439453, 39.37808609008789, 40.91046142578125, 42.442832946777344, 43.9752082824707, 45.50758361816406, 47.039955139160156, 48.572330474853516, 50.104705810546875]}, "gradients/decoder.transformer.h.5.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 0.0, 2.0, 4.0, 3.0, 3.0, 6.0, 9.0, 4.0, 7.0, 9.0, 11.0, 11.0, 15.0, 12.0, 18.0, 18.0, 27.0, 30.0, 18.0, 44.0, 39.0, 37.0, 39.0, 41.0, 34.0, 38.0, 49.0, 42.0, 43.0, 34.0, 42.0, 31.0, 33.0, 35.0, 23.0, 27.0, 28.0, 31.0, 17.0, 10.0, 24.0, 8.0, 9.0, 6.0, 8.0, 9.0, 9.0, 8.0, 4.0, 1.0, 1.0, 1.0, 2.0, 3.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-6.6640625, -6.43975830078125, -6.2154541015625, -5.99114990234375, -5.766845703125, -5.54254150390625, -5.3182373046875, -5.09393310546875, -4.86962890625, -4.64532470703125, -4.4210205078125, -4.19671630859375, -3.972412109375, -3.74810791015625, -3.5238037109375, -3.29949951171875, -3.0751953125, -2.85089111328125, -2.6265869140625, -2.40228271484375, -2.177978515625, -1.95367431640625, -1.7293701171875, -1.50506591796875, -1.28076171875, -1.05645751953125, -0.8321533203125, -0.60784912109375, -0.383544921875, -0.15924072265625, 0.0650634765625, 0.28936767578125, 0.513671875, 0.73797607421875, 0.9622802734375, 1.18658447265625, 1.410888671875, 1.63519287109375, 1.8594970703125, 2.08380126953125, 2.30810546875, 2.53240966796875, 2.7567138671875, 2.98101806640625, 3.205322265625, 3.42962646484375, 3.6539306640625, 3.87823486328125, 4.1025390625, 4.32684326171875, 4.5511474609375, 4.77545166015625, 4.999755859375, 5.22406005859375, 5.4483642578125, 5.67266845703125, 5.89697265625, 6.12127685546875, 6.3455810546875, 6.56988525390625, 6.794189453125, 7.01849365234375, 7.2427978515625, 7.46710205078125, 7.69140625]}, "gradients/decoder.transformer.h.5.mlp.c_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 2.0, 2.0, 0.0, 3.0, 2.0, 2.0, 4.0, 7.0, 11.0, 7.0, 19.0, 14.0, 17.0, 20.0, 22.0, 28.0, 53.0, 73.0, 109.0, 168.0, 304.0, 579.0, 1382.0, 3603.0, 10775.0, 38377.0, 163390.0, 725926.0, 1874473.0, 1052404.0, 244571.0, 55213.0, 14540.0, 4744.0, 1716.0, 710.0, 396.0, 166.0, 149.0, 69.0, 69.0, 32.0, 27.0, 27.0, 23.0, 14.0, 14.0, 6.0, 8.0, 7.0, 3.0, 6.0, 3.0, 3.0, 3.0, 1.0, 1.0, 1.0, 0.0, 1.0, 2.0], "bins": [-10.53125, -10.1983642578125, -9.865478515625, -9.5325927734375, -9.19970703125, -8.8668212890625, -8.533935546875, -8.2010498046875, -7.8681640625, -7.5352783203125, -7.202392578125, -6.8695068359375, -6.53662109375, -6.2037353515625, -5.870849609375, -5.5379638671875, -5.205078125, -4.8721923828125, -4.539306640625, -4.2064208984375, -3.87353515625, -3.5406494140625, -3.207763671875, -2.8748779296875, -2.5419921875, -2.2091064453125, -1.876220703125, -1.5433349609375, -1.21044921875, -0.8775634765625, -0.544677734375, -0.2117919921875, 0.12109375, 0.4539794921875, 0.786865234375, 1.1197509765625, 1.45263671875, 1.7855224609375, 2.118408203125, 2.4512939453125, 2.7841796875, 3.1170654296875, 3.449951171875, 3.7828369140625, 4.11572265625, 4.4486083984375, 4.781494140625, 5.1143798828125, 5.447265625, 5.7801513671875, 6.113037109375, 6.4459228515625, 6.77880859375, 7.1116943359375, 7.444580078125, 7.7774658203125, 8.1103515625, 8.4432373046875, 8.776123046875, 9.1090087890625, 9.44189453125, 9.7747802734375, 10.107666015625, 10.4405517578125, 10.7734375]}, "gradients/decoder.transformer.h.5.mlp.c_fc.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0, 1.0, 0.0, 5.0, 2.0, 2.0, 4.0, 8.0, 4.0, 7.0, 14.0, 15.0, 23.0, 29.0, 32.0, 58.0, 72.0, 104.0, 144.0, 189.0, 281.0, 384.0, 490.0, 480.0, 480.0, 334.0, 259.0, 199.0, 130.0, 87.0, 58.0, 50.0, 37.0, 25.0, 22.0, 13.0, 14.0, 12.0, 5.0, 5.0, 1.0, 4.0, 2.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-12.859375, -12.4605712890625, -12.061767578125, -11.6629638671875, -11.26416015625, -10.8653564453125, -10.466552734375, -10.0677490234375, -9.6689453125, -9.2701416015625, -8.871337890625, -8.4725341796875, -8.07373046875, -7.6749267578125, -7.276123046875, -6.8773193359375, -6.478515625, -6.0797119140625, -5.680908203125, -5.2821044921875, -4.88330078125, -4.4844970703125, -4.085693359375, -3.6868896484375, -3.2880859375, -2.8892822265625, -2.490478515625, -2.0916748046875, -1.69287109375, -1.2940673828125, -0.895263671875, -0.4964599609375, -0.09765625, 0.3011474609375, 0.699951171875, 1.0987548828125, 1.49755859375, 1.8963623046875, 2.295166015625, 2.6939697265625, 3.0927734375, 3.4915771484375, 3.890380859375, 4.2891845703125, 4.68798828125, 5.0867919921875, 5.485595703125, 5.8843994140625, 6.283203125, 6.6820068359375, 7.080810546875, 7.4796142578125, 7.87841796875, 8.2772216796875, 8.676025390625, 9.0748291015625, 9.4736328125, 9.8724365234375, 10.271240234375, 10.6700439453125, 11.06884765625, 11.4676513671875, 11.866455078125, 12.2652587890625, 12.6640625]}, "gradients/decoder.transformer.h.5.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 3.0, 3.0, 3.0, 4.0, 3.0, 10.0, 19.0, 13.0, 22.0, 18.0, 39.0, 58.0, 80.0, 113.0, 171.0, 307.0, 594.0, 1398.0, 4944.0, 42323.0, 969154.0, 3019405.0, 140610.0, 10939.0, 2137.0, 816.0, 372.0, 250.0, 155.0, 95.0, 76.0, 40.0, 29.0, 21.0, 19.0, 13.0, 7.0, 8.0, 7.0, 4.0, 4.0, 4.0, 2.0, 1.0, 0.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0], "bins": [-30.125, -29.1279296875, -28.130859375, -27.1337890625, -26.13671875, -25.1396484375, -24.142578125, -23.1455078125, -22.1484375, -21.1513671875, -20.154296875, -19.1572265625, -18.16015625, -17.1630859375, -16.166015625, -15.1689453125, -14.171875, -13.1748046875, -12.177734375, -11.1806640625, -10.18359375, -9.1865234375, -8.189453125, -7.1923828125, -6.1953125, -5.1982421875, -4.201171875, -3.2041015625, -2.20703125, -1.2099609375, -0.212890625, 0.7841796875, 1.78125, 2.7783203125, 3.775390625, 4.7724609375, 5.76953125, 6.7666015625, 7.763671875, 8.7607421875, 9.7578125, 10.7548828125, 11.751953125, 12.7490234375, 13.74609375, 14.7431640625, 15.740234375, 16.7373046875, 17.734375, 18.7314453125, 19.728515625, 20.7255859375, 21.72265625, 22.7197265625, 23.716796875, 24.7138671875, 25.7109375, 26.7080078125, 27.705078125, 28.7021484375, 29.69921875, 30.6962890625, 31.693359375, 32.6904296875, 33.6875]}, "gradients/decoder.transformer.h.5.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 4.0, 31.0, 132.0, 335.0, 358.0, 138.0, 14.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-168.94461059570312, -162.89486694335938, -156.84510803222656, -150.7953643798828, -144.74560546875, -138.69586181640625, -132.6461181640625, -126.59635925292969, -120.5466079711914, -114.49685668945312, -108.44710540771484, -102.39735412597656, -96.34761047363281, -90.2978515625, -84.24810791015625, -78.19835662841797, -72.14860534667969, -66.0988540649414, -60.049102783203125, -53.99935531616211, -47.94960403442383, -41.89985275268555, -35.85010528564453, -29.80035400390625, -23.75060272216797, -17.700851440429688, -11.651102066040039, -5.601352691650391, 0.4483985900878906, 6.498149871826172, 12.547897338867188, 18.59764862060547, 24.647384643554688, 30.69713592529297, 36.74688720703125, 42.796634674072266, 48.84638595581055, 54.89613723754883, 60.945884704589844, 66.99563598632812, 73.0453872680664, 79.09513854980469, 85.14488983154297, 91.19464111328125, 97.244384765625, 103.29414367675781, 109.34388732910156, 115.39363861083984, 121.44338989257812, 127.4931411743164, 133.5428924560547, 139.59263610839844, 145.64239501953125, 151.692138671875, 157.74188232421875, 163.79164123535156, 169.84140014648438, 175.89114379882812, 181.94090270996094, 187.9906463623047, 194.0404052734375, 200.09014892578125, 206.139892578125, 212.1896514892578, 218.23939514160156]}, "gradients/decoder.transformer.h.5.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 2.0, 4.0, 4.0, 6.0, 3.0, 4.0, 9.0, 12.0, 12.0, 12.0, 20.0, 20.0, 11.0, 22.0, 23.0, 32.0, 34.0, 37.0, 31.0, 46.0, 37.0, 32.0, 40.0, 47.0, 33.0, 48.0, 38.0, 28.0, 29.0, 33.0, 41.0, 37.0, 35.0, 14.0, 21.0, 21.0, 27.0, 17.0, 10.0, 16.0, 15.0, 13.0, 10.0, 6.0, 3.0, 5.0, 3.0, 2.0, 3.0, 3.0, 2.0, 2.0, 1.0, 0.0, 2.0, 0.0, 1.0], "bins": [-37.40069580078125, -36.1751708984375, -34.94964599609375, -33.724117279052734, -32.498592376708984, -31.273067474365234, -30.04754066467285, -28.82201385498047, -27.59648895263672, -26.37096405029297, -25.145437240600586, -23.919910430908203, -22.694385528564453, -21.468860626220703, -20.24333381652832, -19.017807006835938, -17.792282104492188, -16.566757202148438, -15.341230392456055, -14.115704536437988, -12.890178680419922, -11.664652824401855, -10.439126968383789, -9.213601112365723, -7.988075256347656, -6.76254940032959, -5.537023544311523, -4.311497688293457, -3.0859718322753906, -1.8604459762573242, -0.6349201202392578, 0.5906057357788086, 1.8161354064941406, 3.041661262512207, 4.267187118530273, 5.49271297454834, 6.718238830566406, 7.943764686584473, 9.169290542602539, 10.394816398620605, 11.620342254638672, 12.845868110656738, 14.071393966674805, 15.296919822692871, 16.522445678710938, 17.747970581054688, 18.97349739074707, 20.199024200439453, 21.424549102783203, 22.650074005126953, 23.875600814819336, 25.10112762451172, 26.32665252685547, 27.55217742919922, 28.7777042388916, 30.003231048583984, 31.228755950927734, 32.454280853271484, 33.6798095703125, 34.90533447265625, 36.130859375, 37.35638427734375, 38.5819091796875, 39.807437896728516, 41.032962799072266]}, "gradients/decoder.transformer.h.5.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 4.0, 3.0, 4.0, 9.0, 10.0, 5.0, 11.0, 10.0, 13.0, 26.0, 22.0, 20.0, 28.0, 21.0, 32.0, 44.0, 35.0, 36.0, 37.0, 56.0, 48.0, 47.0, 48.0, 47.0, 37.0, 46.0, 45.0, 37.0, 32.0, 31.0, 22.0, 19.0, 21.0, 21.0, 18.0, 13.0, 16.0, 10.0, 9.0, 3.0, 4.0, 2.0, 4.0, 0.0, 4.0, 2.0, 1.0, 3.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-7.25, -7.006591796875, -6.76318359375, -6.519775390625, -6.2763671875, -6.032958984375, -5.78955078125, -5.546142578125, -5.302734375, -5.059326171875, -4.81591796875, -4.572509765625, -4.3291015625, -4.085693359375, -3.84228515625, -3.598876953125, -3.35546875, -3.112060546875, -2.86865234375, -2.625244140625, -2.3818359375, -2.138427734375, -1.89501953125, -1.651611328125, -1.408203125, -1.164794921875, -0.92138671875, -0.677978515625, -0.4345703125, -0.191162109375, 0.05224609375, 0.295654296875, 0.5390625, 0.782470703125, 1.02587890625, 1.269287109375, 1.5126953125, 1.756103515625, 1.99951171875, 2.242919921875, 2.486328125, 2.729736328125, 2.97314453125, 3.216552734375, 3.4599609375, 3.703369140625, 3.94677734375, 4.190185546875, 4.43359375, 4.677001953125, 4.92041015625, 5.163818359375, 5.4072265625, 5.650634765625, 5.89404296875, 6.137451171875, 6.380859375, 6.624267578125, 6.86767578125, 7.111083984375, 7.3544921875, 7.597900390625, 7.84130859375, 8.084716796875, 8.328125]}, "gradients/decoder.transformer.h.5.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 4.0, 4.0, 8.0, 17.0, 16.0, 30.0, 39.0, 54.0, 93.0, 142.0, 206.0, 287.0, 446.0, 575.0, 925.0, 1405.0, 2110.0, 3034.0, 4723.0, 7320.0, 10912.0, 16870.0, 25890.0, 39162.0, 59454.0, 86754.0, 118581.0, 144067.0, 143976.0, 118776.0, 87437.0, 59737.0, 39730.0, 26043.0, 17293.0, 11091.0, 7257.0, 4662.0, 3038.0, 2075.0, 1340.0, 979.0, 602.0, 414.0, 311.0, 220.0, 131.0, 109.0, 68.0, 39.0, 41.0, 28.0, 11.0, 11.0, 11.0, 7.0, 4.0, 3.0, 0.0, 1.0], "bins": [-0.76416015625, -0.7402877807617188, -0.7164154052734375, -0.6925430297851562, -0.668670654296875, -0.6447982788085938, -0.6209259033203125, -0.5970535278320312, -0.57318115234375, -0.5493087768554688, -0.5254364013671875, -0.5015640258789062, -0.477691650390625, -0.45381927490234375, -0.4299468994140625, -0.40607452392578125, -0.3822021484375, -0.35832977294921875, -0.3344573974609375, -0.31058502197265625, -0.286712646484375, -0.26284027099609375, -0.2389678955078125, -0.21509552001953125, -0.19122314453125, -0.16735076904296875, -0.1434783935546875, -0.11960601806640625, -0.095733642578125, -0.07186126708984375, -0.0479888916015625, -0.02411651611328125, -0.000244140625, 0.02362823486328125, 0.0475006103515625, 0.07137298583984375, 0.095245361328125, 0.11911773681640625, 0.1429901123046875, 0.16686248779296875, 0.19073486328125, 0.21460723876953125, 0.2384796142578125, 0.26235198974609375, 0.286224365234375, 0.31009674072265625, 0.3339691162109375, 0.35784149169921875, 0.3817138671875, 0.40558624267578125, 0.4294586181640625, 0.45333099365234375, 0.477203369140625, 0.5010757446289062, 0.5249481201171875, 0.5488204956054688, 0.57269287109375, 0.5965652465820312, 0.6204376220703125, 0.6443099975585938, 0.668182373046875, 0.6920547485351562, 0.7159271240234375, 0.7397994995117188, 0.763671875]}, "gradients/decoder.transformer.h.5.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 5.0, 1.0, 5.0, 5.0, 3.0, 9.0, 12.0, 6.0, 4.0, 9.0, 9.0, 24.0, 20.0, 17.0, 32.0, 35.0, 35.0, 35.0, 39.0, 38.0, 49.0, 36.0, 1076.0, 44.0, 46.0, 44.0, 38.0, 34.0, 50.0, 29.0, 27.0, 23.0, 27.0, 22.0, 23.0, 24.0, 19.0, 23.0, 13.0, 12.0, 9.0, 1.0, 6.0, 6.0, 3.0, 3.0, 3.0, 2.0, 0.0, 4.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.62109375, -4.47174072265625, -4.3223876953125, -4.17303466796875, -4.023681640625, -3.87432861328125, -3.7249755859375, -3.57562255859375, -3.42626953125, -3.27691650390625, -3.1275634765625, -2.97821044921875, -2.828857421875, -2.67950439453125, -2.5301513671875, -2.38079833984375, -2.2314453125, -2.08209228515625, -1.9327392578125, -1.78338623046875, -1.634033203125, -1.48468017578125, -1.3353271484375, -1.18597412109375, -1.03662109375, -0.88726806640625, -0.7379150390625, -0.58856201171875, -0.439208984375, -0.28985595703125, -0.1405029296875, 0.00885009765625, 0.158203125, 0.30755615234375, 0.4569091796875, 0.60626220703125, 0.755615234375, 0.90496826171875, 1.0543212890625, 1.20367431640625, 1.35302734375, 1.50238037109375, 1.6517333984375, 1.80108642578125, 1.950439453125, 2.09979248046875, 2.2491455078125, 2.39849853515625, 2.5478515625, 2.69720458984375, 2.8465576171875, 2.99591064453125, 3.145263671875, 3.29461669921875, 3.4439697265625, 3.59332275390625, 3.74267578125, 3.89202880859375, 4.0413818359375, 4.19073486328125, 4.340087890625, 4.48944091796875, 4.6387939453125, 4.78814697265625, 4.9375]}, "gradients/decoder.transformer.h.5.crossattention.c_attn.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 4.0, 6.0, 2.0, 5.0, 9.0, 29.0, 16.0, 45.0, 83.0, 96.0, 167.0, 239.0, 385.0, 593.0, 940.0, 1454.0, 2237.0, 3452.0, 5451.0, 8836.0, 13881.0, 21530.0, 32749.0, 49931.0, 73228.0, 103316.0, 133520.0, 1195844.0, 132567.0, 102911.0, 72987.0, 49552.0, 32658.0, 21217.0, 13477.0, 8647.0, 5498.0, 3545.0, 2237.0, 1358.0, 872.0, 548.0, 387.0, 220.0, 150.0, 77.0, 59.0, 40.0, 24.0, 18.0, 20.0, 7.0, 6.0, 7.0, 2.0, 1.0, 4.0, 4.0, 0.0, 0.0, 1.0], "bins": [-0.52587890625, -0.5086288452148438, -0.4913787841796875, -0.47412872314453125, -0.456878662109375, -0.43962860107421875, -0.4223785400390625, -0.40512847900390625, -0.38787841796875, -0.37062835693359375, -0.3533782958984375, -0.33612823486328125, -0.318878173828125, -0.30162811279296875, -0.2843780517578125, -0.26712799072265625, -0.2498779296875, -0.23262786865234375, -0.2153778076171875, -0.19812774658203125, -0.180877685546875, -0.16362762451171875, -0.1463775634765625, -0.12912750244140625, -0.11187744140625, -0.09462738037109375, -0.0773773193359375, -0.06012725830078125, -0.042877197265625, -0.02562713623046875, -0.0083770751953125, 0.00887298583984375, 0.026123046875, 0.04337310791015625, 0.0606231689453125, 0.07787322998046875, 0.095123291015625, 0.11237335205078125, 0.1296234130859375, 0.14687347412109375, 0.16412353515625, 0.18137359619140625, 0.1986236572265625, 0.21587371826171875, 0.233123779296875, 0.25037384033203125, 0.2676239013671875, 0.28487396240234375, 0.3021240234375, 0.31937408447265625, 0.3366241455078125, 0.35387420654296875, 0.371124267578125, 0.38837432861328125, 0.4056243896484375, 0.42287445068359375, 0.44012451171875, 0.45737457275390625, 0.4746246337890625, 0.49187469482421875, 0.509124755859375, 0.5263748168945312, 0.5436248779296875, 0.5608749389648438, 0.578125]}, "gradients/decoder.transformer.h.5.crossattention.q_attn.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 3.0, 2.0, 1.0, 1.0, 1.0, 4.0, 4.0, 3.0, 9.0, 6.0, 4.0, 6.0, 20.0, 18.0, 21.0, 17.0, 41.0, 49.0, 65.0, 127.0, 165.0, 142.0, 76.0, 63.0, 35.0, 30.0, 18.0, 23.0, 17.0, 9.0, 7.0, 1.0, 4.0, 2.0, 5.0, 7.0, 2.0, 1.0, 3.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0], "bins": [-0.01270294189453125, -0.0122758150100708, -0.011848688125610352, -0.011421561241149902, -0.010994434356689453, -0.010567307472229004, -0.010140180587768555, -0.009713053703308105, -0.009285926818847656, -0.008858799934387207, -0.008431673049926758, -0.008004546165466309, -0.007577419281005859, -0.00715029239654541, -0.006723165512084961, -0.006296038627624512, -0.0058689117431640625, -0.005441784858703613, -0.005014657974243164, -0.004587531089782715, -0.004160404205322266, -0.0037332773208618164, -0.003306150436401367, -0.002879023551940918, -0.0024518966674804688, -0.0020247697830200195, -0.0015976428985595703, -0.001170516014099121, -0.0007433891296386719, -0.00031626224517822266, 0.00011086463928222656, 0.0005379915237426758, 0.000965118408203125, 0.0013922452926635742, 0.0018193721771240234, 0.0022464990615844727, 0.002673625946044922, 0.003100752830505371, 0.0035278797149658203, 0.0039550065994262695, 0.004382133483886719, 0.004809260368347168, 0.005236387252807617, 0.005663514137268066, 0.006090641021728516, 0.006517767906188965, 0.006944894790649414, 0.007372021675109863, 0.0077991485595703125, 0.008226275444030762, 0.008653402328491211, 0.00908052921295166, 0.00950765609741211, 0.009934782981872559, 0.010361909866333008, 0.010789036750793457, 0.011216163635253906, 0.011643290519714355, 0.012070417404174805, 0.012497544288635254, 0.012924671173095703, 0.013351798057556152, 0.013778924942016602, 0.01420605182647705, 0.0146331787109375]}, "gradients/decoder.transformer.h.5.crossattention.q_attn.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 1.0, 2.0, 2.0, 1.0, 6.0, 6.0, 5.0, 1.0, 10.0, 11.0, 16.0, 27.0, 23.0, 39.0, 71.0, 107.0, 203.0, 473.0, 6216.0, 1038265.0, 2149.0, 428.0, 170.0, 91.0, 67.0, 40.0, 37.0, 23.0, 14.0, 10.0, 6.0, 13.0, 8.0, 6.0, 4.0, 1.0, 1.0, 3.0, 3.0, 2.0, 1.0, 2.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0], "bins": [-0.3037109375, -0.29488372802734375, -0.2860565185546875, -0.27722930908203125, -0.268402099609375, -0.25957489013671875, -0.2507476806640625, -0.24192047119140625, -0.23309326171875, -0.22426605224609375, -0.2154388427734375, -0.20661163330078125, -0.197784423828125, -0.18895721435546875, -0.1801300048828125, -0.17130279541015625, -0.1624755859375, -0.15364837646484375, -0.1448211669921875, -0.13599395751953125, -0.127166748046875, -0.11833953857421875, -0.1095123291015625, -0.10068511962890625, -0.09185791015625, -0.08303070068359375, -0.0742034912109375, -0.06537628173828125, -0.056549072265625, -0.04772186279296875, -0.0388946533203125, -0.03006744384765625, -0.021240234375, -0.01241302490234375, -0.0035858154296875, 0.00524139404296875, 0.014068603515625, 0.02289581298828125, 0.0317230224609375, 0.04055023193359375, 0.04937744140625, 0.05820465087890625, 0.0670318603515625, 0.07585906982421875, 0.084686279296875, 0.09351348876953125, 0.1023406982421875, 0.11116790771484375, 0.1199951171875, 0.12882232666015625, 0.1376495361328125, 0.14647674560546875, 0.155303955078125, 0.16413116455078125, 0.1729583740234375, 0.18178558349609375, 0.19061279296875, 0.19944000244140625, 0.2082672119140625, 0.21709442138671875, 0.225921630859375, 0.23474884033203125, 0.2435760498046875, 0.25240325927734375, 0.26123046875]}, "gradients/decoder.transformer.h.5.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 10.0, 968.0, 37.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.11654668301343918, -0.11141981184482574, -0.10629294812679291, -0.10116607695817947, -0.09603920578956604, -0.0909123346209526, -0.08578546345233917, -0.08065859973430634, -0.0755317285656929, -0.07040485739707947, -0.06527799367904663, -0.0601511225104332, -0.05502425134181976, -0.04989738017320633, -0.044770512729883194, -0.03964364528656006, -0.034516774117946625, -0.02938990481197834, -0.024263035506010056, -0.01913616620004177, -0.014009296894073486, -0.008882427588105202, -0.003755558282136917, 0.0013713091611862183, 0.006498180329799652, 0.011625049635767937, 0.01675191894173622, 0.021878788247704506, 0.02700565755367279, 0.032132528722286224, 0.03725939616560936, 0.042386263608932495, 0.047513142228126526, 0.05264001339673996, 0.057766880840063095, 0.06289374828338623, 0.06802061945199966, 0.0731474906206131, 0.07827435433864594, 0.08340122550725937, 0.0885280966758728, 0.09365496784448624, 0.09878183901309967, 0.10390870273113251, 0.10903557389974594, 0.11416244506835938, 0.11928930878639221, 0.12441617995500565, 0.12954305112361908, 0.13466991484165192, 0.13979679346084595, 0.14492365717887878, 0.15005052089691162, 0.15517739951610565, 0.1603042632341385, 0.16543114185333252, 0.17055800557136536, 0.1756848692893982, 0.18081174790859222, 0.18593861162662506, 0.1910654902458191, 0.19619235396385193, 0.20131921768188477, 0.2064460813999176, 0.21157296001911163]}, "gradients/decoder.transformer.h.5.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 2.0, 0.0, 1.0, 1.0, 0.0, 2.0, 1.0, 4.0, 7.0, 9.0, 10.0, 3.0, 16.0, 20.0, 21.0, 21.0, 31.0, 41.0, 44.0, 47.0, 62.0, 47.0, 45.0, 44.0, 57.0, 72.0, 46.0, 58.0, 54.0, 39.0, 34.0, 41.0, 24.0, 24.0, 18.0, 16.0, 14.0, 5.0, 5.0, 6.0, 11.0, 5.0, 2.0, 1.0, 4.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.010081470012664795, -0.00971247348934412, -0.009343476966023445, -0.00897448044270277, -0.008605483919382095, -0.00823648739606142, -0.007867490872740746, -0.007498494349420071, -0.007129497826099396, -0.006760501302778721, -0.006391504779458046, -0.006022508256137371, -0.005653511732816696, -0.005284515209496021, -0.004915518686175346, -0.0045465221628546715, -0.004177525639533997, -0.0038085291162133217, -0.003439532592892647, -0.003070536069571972, -0.002701539546251297, -0.002332543022930622, -0.001963546499609947, -0.0015945499762892723, -0.0012255534529685974, -0.0008565569296479225, -0.0004875604063272476, -0.00011856388300657272, 0.00025043264031410217, 0.0006194291636347771, 0.000988425686955452, 0.0013574222102761269, 0.0017264187335968018, 0.0020954152569174767, 0.0024644117802381516, 0.0028334083035588264, 0.0032024048268795013, 0.0035714013502001762, 0.003940397873520851, 0.004309394396841526, 0.004678390920162201, 0.005047387443482876, 0.005416383966803551, 0.005785380490124226, 0.0061543770134449005, 0.006523373536765575, 0.00689237006008625, 0.007261366583406925, 0.0076303631067276, 0.007999359630048275, 0.00836835615336895, 0.008737352676689625, 0.0091063492000103, 0.009475345723330975, 0.00984434224665165, 0.010213338769972324, 0.010582335293293, 0.010951331816613674, 0.011320328339934349, 0.011689324863255024, 0.012058321386575699, 0.012427317909896374, 0.012796314433217049, 0.013165310956537724, 0.013534307479858398]}, "gradients/decoder.transformer.h.5.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 4.0, 3.0, 4.0, 9.0, 10.0, 5.0, 11.0, 10.0, 13.0, 26.0, 22.0, 20.0, 28.0, 21.0, 32.0, 44.0, 35.0, 36.0, 37.0, 56.0, 48.0, 47.0, 48.0, 47.0, 37.0, 46.0, 45.0, 37.0, 33.0, 30.0, 22.0, 19.0, 21.0, 21.0, 18.0, 13.0, 16.0, 10.0, 9.0, 3.0, 4.0, 2.0, 4.0, 0.0, 4.0, 2.0, 1.0, 3.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-7.25, -7.006591796875, -6.76318359375, -6.519775390625, -6.2763671875, -6.032958984375, -5.78955078125, -5.546142578125, -5.302734375, -5.059326171875, -4.81591796875, -4.572509765625, -4.3291015625, -4.085693359375, -3.84228515625, -3.598876953125, -3.35546875, -3.112060546875, -2.86865234375, -2.625244140625, -2.3818359375, -2.138427734375, -1.89501953125, -1.651611328125, -1.408203125, -1.164794921875, -0.92138671875, -0.677978515625, -0.4345703125, -0.191162109375, 0.05224609375, 0.295654296875, 0.5390625, 0.782470703125, 1.02587890625, 1.269287109375, 1.5126953125, 1.756103515625, 1.99951171875, 2.242919921875, 2.486328125, 2.729736328125, 2.97314453125, 3.216552734375, 3.4599609375, 3.703369140625, 3.94677734375, 4.190185546875, 4.43359375, 4.677001953125, 4.92041015625, 5.163818359375, 5.4072265625, 5.650634765625, 5.89404296875, 6.137451171875, 6.380859375, 6.624267578125, 6.86767578125, 7.111083984375, 7.3544921875, 7.597900390625, 7.84130859375, 8.084716796875, 8.328125]}, "gradients/decoder.transformer.h.5.attn.c_proj.weight": {"_type": "histogram", "values": [2.0, 2.0, 3.0, 1.0, 6.0, 4.0, 5.0, 6.0, 17.0, 19.0, 25.0, 37.0, 57.0, 78.0, 98.0, 161.0, 253.0, 398.0, 669.0, 1135.0, 2040.0, 4037.0, 8320.0, 18574.0, 42975.0, 99290.0, 234972.0, 348306.0, 161247.0, 69430.0, 30214.0, 13134.0, 6209.0, 2937.0, 1563.0, 853.0, 530.0, 326.0, 195.0, 131.0, 90.0, 61.0, 50.0, 25.0, 26.0, 17.0, 11.0, 12.0, 5.0, 3.0, 4.0, 4.0, 1.0, 3.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-7.99609375, -7.70306396484375, -7.4100341796875, -7.11700439453125, -6.823974609375, -6.53094482421875, -6.2379150390625, -5.94488525390625, -5.65185546875, -5.35882568359375, -5.0657958984375, -4.77276611328125, -4.479736328125, -4.18670654296875, -3.8936767578125, -3.60064697265625, -3.3076171875, -3.01458740234375, -2.7215576171875, -2.42852783203125, -2.135498046875, -1.84246826171875, -1.5494384765625, -1.25640869140625, -0.96337890625, -0.67034912109375, -0.3773193359375, -0.08428955078125, 0.208740234375, 0.50177001953125, 0.7947998046875, 1.08782958984375, 1.380859375, 1.67388916015625, 1.9669189453125, 2.25994873046875, 2.552978515625, 2.84600830078125, 3.1390380859375, 3.43206787109375, 3.72509765625, 4.01812744140625, 4.3111572265625, 4.60418701171875, 4.897216796875, 5.19024658203125, 5.4832763671875, 5.77630615234375, 6.0693359375, 6.36236572265625, 6.6553955078125, 6.94842529296875, 7.241455078125, 7.53448486328125, 7.8275146484375, 8.12054443359375, 8.41357421875, 8.70660400390625, 8.9996337890625, 9.29266357421875, 9.585693359375, 9.87872314453125, 10.1717529296875, 10.46478271484375, 10.7578125]}, "gradients/decoder.transformer.h.5.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 2.0, 2.0, 3.0, 4.0, 2.0, 4.0, 3.0, 6.0, 12.0, 6.0, 6.0, 13.0, 15.0, 13.0, 8.0, 20.0, 27.0, 28.0, 28.0, 36.0, 29.0, 37.0, 59.0, 69.0, 66.0, 118.0, 182.0, 1294.0, 274.0, 149.0, 99.0, 64.0, 44.0, 41.0, 43.0, 30.0, 30.0, 17.0, 31.0, 21.0, 15.0, 15.0, 16.0, 17.0, 10.0, 7.0, 10.0, 5.0, 6.0, 4.0, 5.0, 6.0, 5.0, 5.0, 2.0, 2.0, 2.0, 0.0, 0.0, 0.0, 2.0], "bins": [-17.234375, -16.677978515625, -16.12158203125, -15.565185546875, -15.0087890625, -14.452392578125, -13.89599609375, -13.339599609375, -12.783203125, -12.226806640625, -11.67041015625, -11.114013671875, -10.5576171875, -10.001220703125, -9.44482421875, -8.888427734375, -8.33203125, -7.775634765625, -7.21923828125, -6.662841796875, -6.1064453125, -5.550048828125, -4.99365234375, -4.437255859375, -3.880859375, -3.324462890625, -2.76806640625, -2.211669921875, -1.6552734375, -1.098876953125, -0.54248046875, 0.013916015625, 0.5703125, 1.126708984375, 1.68310546875, 2.239501953125, 2.7958984375, 3.352294921875, 3.90869140625, 4.465087890625, 5.021484375, 5.577880859375, 6.13427734375, 6.690673828125, 7.2470703125, 7.803466796875, 8.35986328125, 8.916259765625, 9.47265625, 10.029052734375, 10.58544921875, 11.141845703125, 11.6982421875, 12.254638671875, 12.81103515625, 13.367431640625, 13.923828125, 14.480224609375, 15.03662109375, 15.593017578125, 16.1494140625, 16.705810546875, 17.26220703125, 17.818603515625, 18.375]}, "gradients/decoder.transformer.h.5.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 5.0, 3.0, 2.0, 5.0, 9.0, 5.0, 5.0, 1.0, 9.0, 17.0, 19.0, 39.0, 45.0, 64.0, 85.0, 162.0, 209.0, 336.0, 714.0, 2705.0, 43063.0, 2574312.0, 507741.0, 13328.0, 1421.0, 548.0, 286.0, 198.0, 108.0, 77.0, 63.0, 41.0, 21.0, 31.0, 8.0, 11.0, 7.0, 6.0, 3.0, 4.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-42.53125, -41.1943359375, -39.857421875, -38.5205078125, -37.18359375, -35.8466796875, -34.509765625, -33.1728515625, -31.8359375, -30.4990234375, -29.162109375, -27.8251953125, -26.48828125, -25.1513671875, -23.814453125, -22.4775390625, -21.140625, -19.8037109375, -18.466796875, -17.1298828125, -15.79296875, -14.4560546875, -13.119140625, -11.7822265625, -10.4453125, -9.1083984375, -7.771484375, -6.4345703125, -5.09765625, -3.7607421875, -2.423828125, -1.0869140625, 0.25, 1.5869140625, 2.923828125, 4.2607421875, 5.59765625, 6.9345703125, 8.271484375, 9.6083984375, 10.9453125, 12.2822265625, 13.619140625, 14.9560546875, 16.29296875, 17.6298828125, 18.966796875, 20.3037109375, 21.640625, 22.9775390625, 24.314453125, 25.6513671875, 26.98828125, 28.3251953125, 29.662109375, 30.9990234375, 32.3359375, 33.6728515625, 35.009765625, 36.3466796875, 37.68359375, 39.0205078125, 40.357421875, 41.6943359375, 43.03125]}, "gradients/decoder.transformer.h.5.ln_1.weight": {"_type": "histogram", "values": [4.0, 47.0, 322.0, 459.0, 167.0, 15.0, 3.0, 0.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-16.130380630493164, -11.101810455322266, -6.073239326477051, -1.044668197631836, 3.9839019775390625, 9.012472152709961, 14.041044235229492, 19.069612503051758, 24.09818458557129, 29.126754760742188, 34.15532684326172, 39.18389892578125, 44.212467193603516, 49.24103546142578, 54.26960754394531, 59.298179626464844, 64.32675170898438, 69.3553237915039, 74.38389587402344, 79.41246032714844, 84.44103240966797, 89.4696044921875, 94.49817657470703, 99.52674865722656, 104.55531311035156, 109.5838851928711, 114.61245727539062, 119.64102172851562, 124.66959381103516, 129.6981658935547, 134.72674560546875, 139.75531005859375, 144.7838897705078, 149.8124542236328, 154.84103393554688, 159.86959838867188, 164.89817810058594, 169.92674255371094, 174.955322265625, 179.98388671875, 185.012451171875, 190.041015625, 195.06959533691406, 200.09815979003906, 205.12673950195312, 210.15530395507812, 215.18386840820312, 220.2124481201172, 225.24102783203125, 230.26959228515625, 235.2981719970703, 240.3267364501953, 245.35531616210938, 250.38388061523438, 255.41244506835938, 260.4410095214844, 265.4695739746094, 270.4981384277344, 275.5267028808594, 280.5552978515625, 285.5838623046875, 290.6124267578125, 295.6409912109375, 300.6695556640625, 305.6981506347656]}, "gradients/decoder.transformer.h.5.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 1.0, 3.0, 3.0, 5.0, 3.0, 7.0, 6.0, 7.0, 4.0, 17.0, 18.0, 16.0, 22.0, 21.0, 16.0, 17.0, 26.0, 25.0, 22.0, 29.0, 37.0, 32.0, 41.0, 54.0, 37.0, 39.0, 51.0, 44.0, 33.0, 46.0, 31.0, 34.0, 35.0, 23.0, 27.0, 19.0, 22.0, 16.0, 19.0, 18.0, 20.0, 9.0, 10.0, 12.0, 8.0, 8.0, 6.0, 5.0, 4.0, 2.0, 2.0, 1.0, 1.0, 2.0], "bins": [-47.337684631347656, -45.98408508300781, -44.630489349365234, -43.27688980102539, -41.92329406738281, -40.56969451904297, -39.216094970703125, -37.86249542236328, -36.5088996887207, -35.15530014038086, -33.80170440673828, -32.44810485839844, -31.094507217407227, -29.740909576416016, -28.387310028076172, -27.03371238708496, -25.68011474609375, -24.32651710510254, -22.972919464111328, -21.619319915771484, -20.265722274780273, -18.912124633789062, -17.55852508544922, -16.204927444458008, -14.851329803466797, -13.497732162475586, -12.144133567810059, -10.790534973144531, -9.43693733215332, -8.08333969116211, -6.729741096496582, -5.376142501831055, -4.022544860839844, -2.6689467430114746, -1.3153486251831055, 0.03824949264526367, 1.3918476104736328, 2.745445728302002, 4.099043846130371, 5.452642440795898, 6.806240081787109, 8.15983772277832, 9.513436317443848, 10.867034912109375, 12.220632553100586, 13.574230194091797, 14.927828788757324, 16.28142738342285, 17.635025024414062, 18.988622665405273, 20.342220306396484, 21.695819854736328, 23.04941749572754, 24.40301513671875, 25.756614685058594, 27.110212326049805, 28.463809967041016, 29.817407608032227, 31.171005249023438, 32.52460479736328, 33.878204345703125, 35.2318000793457, 36.58539962768555, 37.938995361328125, 39.29259490966797]}, "gradients/decoder.transformer.h.4.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 4.0, 4.0, 1.0, 1.0, 6.0, 7.0, 1.0, 7.0, 9.0, 10.0, 10.0, 9.0, 10.0, 21.0, 21.0, 17.0, 23.0, 12.0, 33.0, 29.0, 34.0, 33.0, 39.0, 35.0, 33.0, 52.0, 43.0, 41.0, 39.0, 41.0, 35.0, 33.0, 35.0, 33.0, 29.0, 28.0, 26.0, 19.0, 17.0, 21.0, 23.0, 20.0, 13.0, 17.0, 5.0, 8.0, 9.0, 5.0, 2.0, 5.0, 2.0, 1.0, 1.0, 2.0, 2.0, 1.0, 2.0, 0.0, 1.0, 2.0], "bins": [-6.9140625, -6.69317626953125, -6.4722900390625, -6.25140380859375, -6.030517578125, -5.80963134765625, -5.5887451171875, -5.36785888671875, -5.14697265625, -4.92608642578125, -4.7052001953125, -4.48431396484375, -4.263427734375, -4.04254150390625, -3.8216552734375, -3.60076904296875, -3.3798828125, -3.15899658203125, -2.9381103515625, -2.71722412109375, -2.496337890625, -2.27545166015625, -2.0545654296875, -1.83367919921875, -1.61279296875, -1.39190673828125, -1.1710205078125, -0.95013427734375, -0.729248046875, -0.50836181640625, -0.2874755859375, -0.06658935546875, 0.154296875, 0.37518310546875, 0.5960693359375, 0.81695556640625, 1.037841796875, 1.25872802734375, 1.4796142578125, 1.70050048828125, 1.92138671875, 2.14227294921875, 2.3631591796875, 2.58404541015625, 2.804931640625, 3.02581787109375, 3.2467041015625, 3.46759033203125, 3.6884765625, 3.90936279296875, 4.1302490234375, 4.35113525390625, 4.572021484375, 4.79290771484375, 5.0137939453125, 5.23468017578125, 5.45556640625, 5.67645263671875, 5.8973388671875, 6.11822509765625, 6.339111328125, 6.55999755859375, 6.7808837890625, 7.00177001953125, 7.22265625]}, "gradients/decoder.transformer.h.4.mlp.c_proj.weight": {"_type": "histogram", "values": [3.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 3.0, 0.0, 5.0, 4.0, 11.0, 12.0, 10.0, 28.0, 29.0, 50.0, 66.0, 93.0, 138.0, 232.0, 379.0, 631.0, 1166.0, 2361.0, 5043.0, 11858.0, 29863.0, 84292.0, 253912.0, 748805.0, 1484890.0, 1014405.0, 366068.0, 119555.0, 41387.0, 15868.0, 6548.0, 3033.0, 1499.0, 810.0, 442.0, 267.0, 187.0, 107.0, 70.0, 38.0, 40.0, 21.0, 16.0, 18.0, 14.0, 6.0, 4.0, 3.0, 3.0, 1.0, 1.0, 2.0, 0.0, 0.0, 2.0, 0.0, 1.0], "bins": [-8.078125, -7.822998046875, -7.56787109375, -7.312744140625, -7.0576171875, -6.802490234375, -6.54736328125, -6.292236328125, -6.037109375, -5.781982421875, -5.52685546875, -5.271728515625, -5.0166015625, -4.761474609375, -4.50634765625, -4.251220703125, -3.99609375, -3.740966796875, -3.48583984375, -3.230712890625, -2.9755859375, -2.720458984375, -2.46533203125, -2.210205078125, -1.955078125, -1.699951171875, -1.44482421875, -1.189697265625, -0.9345703125, -0.679443359375, -0.42431640625, -0.169189453125, 0.0859375, 0.341064453125, 0.59619140625, 0.851318359375, 1.1064453125, 1.361572265625, 1.61669921875, 1.871826171875, 2.126953125, 2.382080078125, 2.63720703125, 2.892333984375, 3.1474609375, 3.402587890625, 3.65771484375, 3.912841796875, 4.16796875, 4.423095703125, 4.67822265625, 4.933349609375, 5.1884765625, 5.443603515625, 5.69873046875, 5.953857421875, 6.208984375, 6.464111328125, 6.71923828125, 6.974365234375, 7.2294921875, 7.484619140625, 7.73974609375, 7.994873046875, 8.25]}, "gradients/decoder.transformer.h.4.mlp.c_fc.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 4.0, 2.0, 6.0, 9.0, 16.0, 17.0, 19.0, 16.0, 19.0, 40.0, 44.0, 72.0, 92.0, 132.0, 169.0, 204.0, 299.0, 440.0, 458.0, 446.0, 454.0, 326.0, 221.0, 145.0, 112.0, 71.0, 75.0, 41.0, 36.0, 21.0, 23.0, 13.0, 7.0, 13.0, 7.0, 3.0, 4.0, 2.0, 0.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0], "bins": [-12.125, -11.767578125, -11.41015625, -11.052734375, -10.6953125, -10.337890625, -9.98046875, -9.623046875, -9.265625, -8.908203125, -8.55078125, -8.193359375, -7.8359375, -7.478515625, -7.12109375, -6.763671875, -6.40625, -6.048828125, -5.69140625, -5.333984375, -4.9765625, -4.619140625, -4.26171875, -3.904296875, -3.546875, -3.189453125, -2.83203125, -2.474609375, -2.1171875, -1.759765625, -1.40234375, -1.044921875, -0.6875, -0.330078125, 0.02734375, 0.384765625, 0.7421875, 1.099609375, 1.45703125, 1.814453125, 2.171875, 2.529296875, 2.88671875, 3.244140625, 3.6015625, 3.958984375, 4.31640625, 4.673828125, 5.03125, 5.388671875, 5.74609375, 6.103515625, 6.4609375, 6.818359375, 7.17578125, 7.533203125, 7.890625, 8.248046875, 8.60546875, 8.962890625, 9.3203125, 9.677734375, 10.03515625, 10.392578125, 10.75]}, "gradients/decoder.transformer.h.4.mlp.c_fc.weight": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 1.0, 2.0, 0.0, 3.0, 1.0, 3.0, 2.0, 3.0, 9.0, 6.0, 10.0, 20.0, 19.0, 47.0, 52.0, 76.0, 108.0, 181.0, 307.0, 514.0, 1178.0, 2468.0, 6599.0, 20034.0, 71956.0, 339234.0, 1917139.0, 1497872.0, 254024.0, 56424.0, 16075.0, 5439.0, 2172.0, 972.0, 503.0, 299.0, 168.0, 99.0, 88.0, 63.0, 41.0, 23.0, 18.0, 11.0, 11.0, 3.0, 3.0, 0.0, 3.0, 6.0, 2.0, 3.0, 0.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 2.0], "bins": [-14.453125, -13.969970703125, -13.48681640625, -13.003662109375, -12.5205078125, -12.037353515625, -11.55419921875, -11.071044921875, -10.587890625, -10.104736328125, -9.62158203125, -9.138427734375, -8.6552734375, -8.172119140625, -7.68896484375, -7.205810546875, -6.72265625, -6.239501953125, -5.75634765625, -5.273193359375, -4.7900390625, -4.306884765625, -3.82373046875, -3.340576171875, -2.857421875, -2.374267578125, -1.89111328125, -1.407958984375, -0.9248046875, -0.441650390625, 0.04150390625, 0.524658203125, 1.0078125, 1.490966796875, 1.97412109375, 2.457275390625, 2.9404296875, 3.423583984375, 3.90673828125, 4.389892578125, 4.873046875, 5.356201171875, 5.83935546875, 6.322509765625, 6.8056640625, 7.288818359375, 7.77197265625, 8.255126953125, 8.73828125, 9.221435546875, 9.70458984375, 10.187744140625, 10.6708984375, 11.154052734375, 11.63720703125, 12.120361328125, 12.603515625, 13.086669921875, 13.56982421875, 14.052978515625, 14.5361328125, 15.019287109375, 15.50244140625, 15.985595703125, 16.46875]}, "gradients/decoder.transformer.h.4.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 10.0, 36.0, 222.0, 414.0, 268.0, 56.0, 8.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-226.86776733398438, -220.66490173339844, -214.4620361328125, -208.25917053222656, -202.05630493164062, -195.8534393310547, -189.65057373046875, -183.44769287109375, -177.24484252929688, -171.04197692871094, -164.839111328125, -158.63624572753906, -152.43338012695312, -146.2305145263672, -140.02764892578125, -133.82476806640625, -127.62190246582031, -121.41903686523438, -115.21617126464844, -109.0133056640625, -102.81044006347656, -96.60757446289062, -90.40470123291016, -84.20183563232422, -77.99897003173828, -71.79610443115234, -65.5932388305664, -59.3903694152832, -53.187503814697266, -46.98463821411133, -40.781768798828125, -34.57890319824219, -28.376052856445312, -22.173187255859375, -15.970319747924805, -9.767452239990234, -3.564586639404297, 2.6382789611816406, 8.841148376464844, 15.044013977050781, 21.24687957763672, 27.449745178222656, 33.652610778808594, 39.8554801940918, 46.058345794677734, 52.26121139526367, 58.464080810546875, 64.66694641113281, 70.86981201171875, 77.07267761230469, 83.27554321289062, 89.47840881347656, 95.6812744140625, 101.88414001464844, 108.0870132446289, 114.28987884521484, 120.49274444580078, 126.69561004638672, 132.8984832763672, 139.10134887695312, 145.30421447753906, 151.507080078125, 157.70994567871094, 163.91281127929688, 170.1156768798828]}, "gradients/decoder.transformer.h.4.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 3.0, 1.0, 5.0, 3.0, 7.0, 7.0, 4.0, 7.0, 12.0, 4.0, 11.0, 17.0, 17.0, 15.0, 22.0, 16.0, 19.0, 20.0, 38.0, 26.0, 34.0, 22.0, 37.0, 45.0, 33.0, 35.0, 32.0, 55.0, 48.0, 39.0, 35.0, 39.0, 30.0, 37.0, 27.0, 20.0, 21.0, 18.0, 14.0, 17.0, 21.0, 13.0, 14.0, 13.0, 8.0, 8.0, 11.0, 4.0, 10.0, 9.0, 2.0, 2.0, 3.0, 1.0, 3.0, 0.0, 5.0, 0.0, 1.0, 1.0, 0.0, 2.0], "bins": [-32.07764434814453, -30.92927360534668, -29.780902862548828, -28.632532119750977, -27.484161376953125, -26.33578872680664, -25.18741798400879, -24.039047241210938, -22.890676498413086, -21.742305755615234, -20.593935012817383, -19.44556427001953, -18.297191619873047, -17.148822784423828, -16.000450134277344, -14.852079391479492, -13.70370864868164, -12.555337905883789, -11.406967163085938, -10.25859546661377, -9.110224723815918, -7.961853981018066, -6.813482761383057, -5.665111541748047, -4.516740798950195, -3.3683698177337646, -2.219998836517334, -1.0716278553009033, 0.07674312591552734, 1.225113868713379, 2.3734850883483887, 3.5218563079833984, 4.670230865478516, 5.818601608276367, 6.966972827911377, 8.115344047546387, 9.263714790344238, 10.41208553314209, 11.560457229614258, 12.70882797241211, 13.857198715209961, 15.005569458007812, 16.153940200805664, 17.302310943603516, 18.45068359375, 19.59905242919922, 20.747425079345703, 21.895795822143555, 23.044166564941406, 24.192537307739258, 25.34090805053711, 26.48927879333496, 27.637649536132812, 28.786022186279297, 29.93439292907715, 31.082763671875, 32.23113250732422, 33.3795051574707, 34.52787399291992, 35.676246643066406, 36.824615478515625, 37.97298812866211, 39.12135696411133, 40.26972961425781, 41.4181022644043]}, "gradients/decoder.transformer.h.4.crossattention.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 3.0, 2.0, 4.0, 1.0, 7.0, 3.0, 8.0, 8.0, 9.0, 12.0, 7.0, 14.0, 19.0, 21.0, 16.0, 11.0, 20.0, 32.0, 17.0, 46.0, 27.0, 39.0, 21.0, 32.0, 37.0, 47.0, 35.0, 51.0, 42.0, 38.0, 43.0, 34.0, 33.0, 41.0, 26.0, 21.0, 26.0, 25.0, 20.0, 17.0, 16.0, 13.0, 8.0, 13.0, 8.0, 11.0, 2.0, 3.0, 5.0, 4.0, 4.0, 2.0, 6.0, 2.0, 3.0, 0.0, 0.0, 0.0, 1.0, 3.0, 2.0], "bins": [-6.58984375, -6.3702392578125, -6.150634765625, -5.9310302734375, -5.71142578125, -5.4918212890625, -5.272216796875, -5.0526123046875, -4.8330078125, -4.6134033203125, -4.393798828125, -4.1741943359375, -3.95458984375, -3.7349853515625, -3.515380859375, -3.2957763671875, -3.076171875, -2.8565673828125, -2.636962890625, -2.4173583984375, -2.19775390625, -1.9781494140625, -1.758544921875, -1.5389404296875, -1.3193359375, -1.0997314453125, -0.880126953125, -0.6605224609375, -0.44091796875, -0.2213134765625, -0.001708984375, 0.2178955078125, 0.4375, 0.6571044921875, 0.876708984375, 1.0963134765625, 1.31591796875, 1.5355224609375, 1.755126953125, 1.9747314453125, 2.1943359375, 2.4139404296875, 2.633544921875, 2.8531494140625, 3.07275390625, 3.2923583984375, 3.511962890625, 3.7315673828125, 3.951171875, 4.1707763671875, 4.390380859375, 4.6099853515625, 4.82958984375, 5.0491943359375, 5.268798828125, 5.4884033203125, 5.7080078125, 5.9276123046875, 6.147216796875, 6.3668212890625, 6.58642578125, 6.8060302734375, 7.025634765625, 7.2452392578125, 7.46484375]}, "gradients/decoder.transformer.h.4.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 5.0, 8.0, 7.0, 11.0, 15.0, 31.0, 35.0, 55.0, 107.0, 166.0, 250.0, 392.0, 562.0, 1043.0, 1686.0, 2584.0, 4224.0, 6785.0, 10826.0, 17050.0, 26764.0, 42380.0, 66633.0, 100843.0, 141448.0, 168233.0, 149232.0, 108687.0, 72361.0, 46605.0, 29703.0, 18639.0, 11692.0, 7403.0, 4580.0, 2839.0, 1757.0, 1078.0, 714.0, 438.0, 277.0, 150.0, 81.0, 50.0, 43.0, 31.0, 15.0, 20.0, 13.0, 5.0, 5.0, 5.0, 2.0, 1.0, 1.0, 0.0, 1.0], "bins": [-0.9716796875, -0.9418716430664062, -0.9120635986328125, -0.8822555541992188, -0.852447509765625, -0.8226394653320312, -0.7928314208984375, -0.7630233764648438, -0.73321533203125, -0.7034072875976562, -0.6735992431640625, -0.6437911987304688, -0.613983154296875, -0.5841751098632812, -0.5543670654296875, -0.5245590209960938, -0.4947509765625, -0.46494293212890625, -0.4351348876953125, -0.40532684326171875, -0.375518798828125, -0.34571075439453125, -0.3159027099609375, -0.28609466552734375, -0.25628662109375, -0.22647857666015625, -0.1966705322265625, -0.16686248779296875, -0.137054443359375, -0.10724639892578125, -0.0774383544921875, -0.04763031005859375, -0.017822265625, 0.01198577880859375, 0.0417938232421875, 0.07160186767578125, 0.101409912109375, 0.13121795654296875, 0.1610260009765625, 0.19083404541015625, 0.22064208984375, 0.25045013427734375, 0.2802581787109375, 0.31006622314453125, 0.339874267578125, 0.36968231201171875, 0.3994903564453125, 0.42929840087890625, 0.4591064453125, 0.48891448974609375, 0.5187225341796875, 0.5485305786132812, 0.578338623046875, 0.6081466674804688, 0.6379547119140625, 0.6677627563476562, 0.69757080078125, 0.7273788452148438, 0.7571868896484375, 0.7869949340820312, 0.816802978515625, 0.8466110229492188, 0.8764190673828125, 0.9062271118164062, 0.93603515625]}, "gradients/decoder.transformer.h.4.crossattention.c_attn.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 3.0, 0.0, 2.0, 2.0, 2.0, 6.0, 5.0, 7.0, 9.0, 11.0, 11.0, 10.0, 13.0, 11.0, 18.0, 16.0, 24.0, 28.0, 31.0, 19.0, 34.0, 38.0, 28.0, 36.0, 29.0, 36.0, 48.0, 1065.0, 34.0, 49.0, 40.0, 39.0, 29.0, 38.0, 28.0, 27.0, 31.0, 32.0, 12.0, 21.0, 18.0, 12.0, 16.0, 19.0, 10.0, 12.0, 5.0, 11.0, 10.0, 2.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-5.0546875, -4.9056396484375, -4.756591796875, -4.6075439453125, -4.45849609375, -4.3094482421875, -4.160400390625, -4.0113525390625, -3.8623046875, -3.7132568359375, -3.564208984375, -3.4151611328125, -3.26611328125, -3.1170654296875, -2.968017578125, -2.8189697265625, -2.669921875, -2.5208740234375, -2.371826171875, -2.2227783203125, -2.07373046875, -1.9246826171875, -1.775634765625, -1.6265869140625, -1.4775390625, -1.3284912109375, -1.179443359375, -1.0303955078125, -0.88134765625, -0.7322998046875, -0.583251953125, -0.4342041015625, -0.28515625, -0.1361083984375, 0.012939453125, 0.1619873046875, 0.31103515625, 0.4600830078125, 0.609130859375, 0.7581787109375, 0.9072265625, 1.0562744140625, 1.205322265625, 1.3543701171875, 1.50341796875, 1.6524658203125, 1.801513671875, 1.9505615234375, 2.099609375, 2.2486572265625, 2.397705078125, 2.5467529296875, 2.69580078125, 2.8448486328125, 2.993896484375, 3.1429443359375, 3.2919921875, 3.4410400390625, 3.590087890625, 3.7391357421875, 3.88818359375, 4.0372314453125, 4.186279296875, 4.3353271484375, 4.484375]}, "gradients/decoder.transformer.h.4.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 2.0, 3.0, 6.0, 7.0, 14.0, 13.0, 29.0, 25.0, 62.0, 100.0, 134.0, 206.0, 390.0, 593.0, 909.0, 1470.0, 2217.0, 3710.0, 6072.0, 9382.0, 15040.0, 23703.0, 36821.0, 56293.0, 83106.0, 113300.0, 146818.0, 1186704.0, 125551.0, 95243.0, 66662.0, 44779.0, 28759.0, 18280.0, 11381.0, 7239.0, 4466.0, 2836.0, 1762.0, 1137.0, 678.0, 413.0, 309.0, 164.0, 135.0, 82.0, 37.0, 37.0, 23.0, 17.0, 8.0, 7.0, 4.0, 1.0, 4.0, 3.0, 1.0], "bins": [-0.63330078125, -0.6147918701171875, -0.596282958984375, -0.5777740478515625, -0.55926513671875, -0.5407562255859375, -0.522247314453125, -0.5037384033203125, -0.4852294921875, -0.4667205810546875, -0.448211669921875, -0.4297027587890625, -0.41119384765625, -0.3926849365234375, -0.374176025390625, -0.3556671142578125, -0.337158203125, -0.3186492919921875, -0.300140380859375, -0.2816314697265625, -0.26312255859375, -0.2446136474609375, -0.226104736328125, -0.2075958251953125, -0.1890869140625, -0.1705780029296875, -0.152069091796875, -0.1335601806640625, -0.11505126953125, -0.0965423583984375, -0.078033447265625, -0.0595245361328125, -0.041015625, -0.0225067138671875, -0.003997802734375, 0.0145111083984375, 0.03302001953125, 0.0515289306640625, 0.070037841796875, 0.0885467529296875, 0.1070556640625, 0.1255645751953125, 0.144073486328125, 0.1625823974609375, 0.18109130859375, 0.1996002197265625, 0.218109130859375, 0.2366180419921875, 0.255126953125, 0.2736358642578125, 0.292144775390625, 0.3106536865234375, 0.32916259765625, 0.3476715087890625, 0.366180419921875, 0.3846893310546875, 0.4031982421875, 0.4217071533203125, 0.440216064453125, 0.4587249755859375, 0.47723388671875, 0.4957427978515625, 0.514251708984375, 0.5327606201171875, 0.55126953125]}, "gradients/decoder.transformer.h.4.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 3.0, 0.0, 2.0, 2.0, 1.0, 7.0, 3.0, 4.0, 5.0, 9.0, 12.0, 12.0, 17.0, 32.0, 18.0, 22.0, 34.0, 52.0, 65.0, 122.0, 132.0, 119.0, 79.0, 52.0, 39.0, 30.0, 25.0, 35.0, 18.0, 10.0, 7.0, 11.0, 6.0, 6.0, 2.0, 0.0, 6.0, 1.0, 2.0, 5.0, 3.0, 3.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.015716552734375, -0.015249848365783691, -0.014783143997192383, -0.014316439628601074, -0.013849735260009766, -0.013383030891418457, -0.012916326522827148, -0.01244962215423584, -0.011982917785644531, -0.011516213417053223, -0.011049509048461914, -0.010582804679870605, -0.010116100311279297, -0.009649395942687988, -0.00918269157409668, -0.008715987205505371, -0.008249282836914062, -0.007782578468322754, -0.007315874099731445, -0.006849169731140137, -0.006382465362548828, -0.0059157609939575195, -0.005449056625366211, -0.004982352256774902, -0.004515647888183594, -0.004048943519592285, -0.0035822391510009766, -0.003115534782409668, -0.0026488304138183594, -0.0021821260452270508, -0.0017154216766357422, -0.0012487173080444336, -0.000782012939453125, -0.0003153085708618164, 0.0001513957977294922, 0.0006181001663208008, 0.0010848045349121094, 0.001551508903503418, 0.0020182132720947266, 0.002484917640686035, 0.0029516220092773438, 0.0034183263778686523, 0.003885030746459961, 0.0043517351150512695, 0.004818439483642578, 0.005285143852233887, 0.005751848220825195, 0.006218552589416504, 0.0066852569580078125, 0.007151961326599121, 0.00761866569519043, 0.008085370063781738, 0.008552074432373047, 0.009018778800964355, 0.009485483169555664, 0.009952187538146973, 0.010418891906738281, 0.01088559627532959, 0.011352300643920898, 0.011819005012512207, 0.012285709381103516, 0.012752413749694824, 0.013219118118286133, 0.013685822486877441, 0.01415252685546875]}, "gradients/decoder.transformer.h.4.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 2.0, 1.0, 5.0, 1.0, 7.0, 2.0, 2.0, 6.0, 4.0, 5.0, 6.0, 16.0, 12.0, 16.0, 17.0, 37.0, 52.0, 63.0, 100.0, 156.0, 271.0, 699.0, 15788.0, 1023506.0, 6503.0, 583.0, 239.0, 129.0, 92.0, 53.0, 47.0, 38.0, 27.0, 21.0, 16.0, 5.0, 6.0, 5.0, 5.0, 4.0, 6.0, 1.0, 4.0, 3.0, 2.0, 1.0, 1.0, 1.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.27685546875, -0.26775360107421875, -0.2586517333984375, -0.24954986572265625, -0.240447998046875, -0.23134613037109375, -0.2222442626953125, -0.21314239501953125, -0.20404052734375, -0.19493865966796875, -0.1858367919921875, -0.17673492431640625, -0.167633056640625, -0.15853118896484375, -0.1494293212890625, -0.14032745361328125, -0.1312255859375, -0.12212371826171875, -0.1130218505859375, -0.10391998291015625, -0.094818115234375, -0.08571624755859375, -0.0766143798828125, -0.06751251220703125, -0.05841064453125, -0.04930877685546875, -0.0402069091796875, -0.03110504150390625, -0.022003173828125, -0.01290130615234375, -0.0037994384765625, 0.00530242919921875, 0.014404296875, 0.02350616455078125, 0.0326080322265625, 0.04170989990234375, 0.050811767578125, 0.05991363525390625, 0.0690155029296875, 0.07811737060546875, 0.08721923828125, 0.09632110595703125, 0.1054229736328125, 0.11452484130859375, 0.123626708984375, 0.13272857666015625, 0.1418304443359375, 0.15093231201171875, 0.1600341796875, 0.16913604736328125, 0.1782379150390625, 0.18733978271484375, 0.196441650390625, 0.20554351806640625, 0.2146453857421875, 0.22374725341796875, 0.23284912109375, 0.24195098876953125, 0.2510528564453125, 0.26015472412109375, 0.269256591796875, 0.27835845947265625, 0.2874603271484375, 0.29656219482421875, 0.3056640625]}, "gradients/decoder.transformer.h.4.ln_cross_attn.weight": {"_type": "histogram", "values": [4.0, 0.0, 0.0, 0.0, 2.0, 42.0, 971.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.05599676072597504, -0.04718736559152603, -0.038377970457077026, -0.029568571597337723, -0.020759176462888718, -0.011949781328439713, -0.003140382468700409, 0.005669012665748596, 0.014478407800197601, 0.023287802934646606, 0.03209719806909561, 0.040906596928834915, 0.04971599206328392, 0.058525387197732925, 0.06733478605747223, 0.07614418119192123, 0.08495357632637024, 0.09376297146081924, 0.10257236659526825, 0.11138176918029785, 0.12019115686416626, 0.12900055944919586, 0.13780996203422546, 0.14661934971809387, 0.15542873740196228, 0.16423813998699188, 0.1730475276708603, 0.1818569302558899, 0.1906663179397583, 0.1994757205247879, 0.2082851231098175, 0.2170945107936859, 0.22590389847755432, 0.23471330106258392, 0.24352268874645233, 0.25233209133148193, 0.26114147901535034, 0.26995086669921875, 0.27876028418540955, 0.28756967186927795, 0.29637905955314636, 0.30518844723701477, 0.31399786472320557, 0.322807252407074, 0.3316166400909424, 0.3404260277748108, 0.3492354452610016, 0.35804483294487, 0.3668542504310608, 0.3756636381149292, 0.38447305560112, 0.3932824432849884, 0.4020918309688568, 0.4109012186527252, 0.419710636138916, 0.4285200238227844, 0.43732941150665283, 0.44613879919052124, 0.45494821667671204, 0.46375760436058044, 0.47256699204444885, 0.48137637972831726, 0.49018579721450806, 0.49899518489837646, 0.5078045725822449]}, "gradients/decoder.transformer.h.4.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 3.0, 7.0, 7.0, 6.0, 22.0, 13.0, 30.0, 27.0, 30.0, 48.0, 49.0, 55.0, 59.0, 75.0, 66.0, 81.0, 56.0, 60.0, 41.0, 42.0, 46.0, 33.0, 40.0, 32.0, 18.0, 18.0, 15.0, 12.0, 6.0, 9.0, 2.0, 4.0, 0.0, 3.0, 0.0, 1.0, 2.0], "bins": [-0.024235308170318604, -0.023661397397518158, -0.023087484762072563, -0.02251357212662697, -0.021939661353826523, -0.021365750581026077, -0.020791837945580482, -0.020217925310134888, -0.019644014537334442, -0.019070103764533997, -0.018496191129088402, -0.017922278493642807, -0.01734836772084236, -0.016774456948041916, -0.01620054431259632, -0.015626631677150726, -0.01505272090435028, -0.01447880920022726, -0.01390489749610424, -0.01333098579198122, -0.0127570740878582, -0.01218316238373518, -0.01160925067961216, -0.01103533897548914, -0.01046142727136612, -0.0098875155672431, -0.009313603863120079, -0.008739692158997059, -0.008165780454874039, -0.0075918687507510185, -0.007017957046627998, -0.006444045342504978, -0.005870133638381958, -0.005296221934258938, -0.004722310230135918, -0.0041483985260128975, -0.0035744868218898773, -0.003000575117766857, -0.002426663413643837, -0.0018527517095208168, -0.0012788400053977966, -0.0007049283012747765, -0.0001310165971517563, 0.0004428951069712639, 0.001016806811094284, 0.0015907185152173042, 0.0021646302193403244, 0.0027385419234633446, 0.0033124536275863647, 0.003886365331709385, 0.004460277035832405, 0.005034188739955425, 0.0056081004440784454, 0.006182012148201466, 0.006755923852324486, 0.007329835556447506, 0.007903747260570526, 0.008477658964693546, 0.009051570668816566, 0.009625482372939587, 0.010199394077062607, 0.010773305781185627, 0.011347217485308647, 0.011921129189431667, 0.012495040893554688]}, "gradients/decoder.transformer.h.4.attn.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 3.0, 2.0, 4.0, 1.0, 7.0, 4.0, 7.0, 8.0, 9.0, 12.0, 7.0, 14.0, 19.0, 21.0, 16.0, 11.0, 20.0, 32.0, 17.0, 46.0, 29.0, 38.0, 20.0, 32.0, 37.0, 47.0, 35.0, 51.0, 43.0, 39.0, 42.0, 33.0, 35.0, 42.0, 23.0, 21.0, 26.0, 25.0, 21.0, 16.0, 16.0, 13.0, 8.0, 13.0, 8.0, 11.0, 2.0, 3.0, 5.0, 4.0, 4.0, 2.0, 7.0, 1.0, 3.0, 0.0, 0.0, 0.0, 2.0, 2.0, 2.0], "bins": [-6.58984375, -6.37017822265625, -6.1505126953125, -5.93084716796875, -5.711181640625, -5.49151611328125, -5.2718505859375, -5.05218505859375, -4.83251953125, -4.61285400390625, -4.3931884765625, -4.17352294921875, -3.953857421875, -3.73419189453125, -3.5145263671875, -3.29486083984375, -3.0751953125, -2.85552978515625, -2.6358642578125, -2.41619873046875, -2.196533203125, -1.97686767578125, -1.7572021484375, -1.53753662109375, -1.31787109375, -1.09820556640625, -0.8785400390625, -0.65887451171875, -0.439208984375, -0.21954345703125, 0.0001220703125, 0.21978759765625, 0.439453125, 0.65911865234375, 0.8787841796875, 1.09844970703125, 1.318115234375, 1.53778076171875, 1.7574462890625, 1.97711181640625, 2.19677734375, 2.41644287109375, 2.6361083984375, 2.85577392578125, 3.075439453125, 3.29510498046875, 3.5147705078125, 3.73443603515625, 3.9541015625, 4.17376708984375, 4.3934326171875, 4.61309814453125, 4.832763671875, 5.05242919921875, 5.2720947265625, 5.49176025390625, 5.71142578125, 5.93109130859375, 6.1507568359375, 6.37042236328125, 6.590087890625, 6.80975341796875, 7.0294189453125, 7.24908447265625, 7.46875]}, "gradients/decoder.transformer.h.4.attn.c_proj.weight": {"_type": "histogram", "values": [3.0, 0.0, 0.0, 1.0, 2.0, 3.0, 3.0, 6.0, 5.0, 9.0, 8.0, 18.0, 22.0, 17.0, 28.0, 34.0, 48.0, 51.0, 79.0, 90.0, 125.0, 191.0, 246.0, 409.0, 627.0, 1041.0, 2108.0, 4154.0, 9013.0, 19941.0, 46068.0, 111410.0, 289978.0, 331480.0, 132555.0, 54260.0, 23392.0, 10522.0, 4871.0, 2368.0, 1239.0, 699.0, 435.0, 261.0, 184.0, 127.0, 106.0, 74.0, 63.0, 34.0, 38.0, 27.0, 23.0, 19.0, 11.0, 15.0, 9.0, 8.0, 5.0, 7.0, 0.0, 3.0, 2.0, 1.0], "bins": [-10.5078125, -10.1903076171875, -9.872802734375, -9.5552978515625, -9.23779296875, -8.9202880859375, -8.602783203125, -8.2852783203125, -7.9677734375, -7.6502685546875, -7.332763671875, -7.0152587890625, -6.69775390625, -6.3802490234375, -6.062744140625, -5.7452392578125, -5.427734375, -5.1102294921875, -4.792724609375, -4.4752197265625, -4.15771484375, -3.8402099609375, -3.522705078125, -3.2052001953125, -2.8876953125, -2.5701904296875, -2.252685546875, -1.9351806640625, -1.61767578125, -1.3001708984375, -0.982666015625, -0.6651611328125, -0.34765625, -0.0301513671875, 0.287353515625, 0.6048583984375, 0.92236328125, 1.2398681640625, 1.557373046875, 1.8748779296875, 2.1923828125, 2.5098876953125, 2.827392578125, 3.1448974609375, 3.46240234375, 3.7799072265625, 4.097412109375, 4.4149169921875, 4.732421875, 5.0499267578125, 5.367431640625, 5.6849365234375, 6.00244140625, 6.3199462890625, 6.637451171875, 6.9549560546875, 7.2724609375, 7.5899658203125, 7.907470703125, 8.2249755859375, 8.54248046875, 8.8599853515625, 9.177490234375, 9.4949951171875, 9.8125]}, "gradients/decoder.transformer.h.4.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 1.0, 1.0, 8.0, 6.0, 0.0, 3.0, 2.0, 11.0, 14.0, 7.0, 11.0, 17.0, 13.0, 16.0, 27.0, 26.0, 20.0, 31.0, 27.0, 39.0, 29.0, 51.0, 58.0, 68.0, 114.0, 201.0, 1279.0, 220.0, 146.0, 117.0, 81.0, 59.0, 51.0, 46.0, 32.0, 29.0, 25.0, 21.0, 25.0, 18.0, 15.0, 14.0, 9.0, 14.0, 17.0, 8.0, 11.0, 5.0, 3.0, 6.0, 3.0, 1.0, 4.0, 3.0, 0.0, 2.0, 2.0, 0.0, 0.0, 0.0, 2.0], "bins": [-15.546875, -15.02734375, -14.5078125, -13.98828125, -13.46875, -12.94921875, -12.4296875, -11.91015625, -11.390625, -10.87109375, -10.3515625, -9.83203125, -9.3125, -8.79296875, -8.2734375, -7.75390625, -7.234375, -6.71484375, -6.1953125, -5.67578125, -5.15625, -4.63671875, -4.1171875, -3.59765625, -3.078125, -2.55859375, -2.0390625, -1.51953125, -1.0, -0.48046875, 0.0390625, 0.55859375, 1.078125, 1.59765625, 2.1171875, 2.63671875, 3.15625, 3.67578125, 4.1953125, 4.71484375, 5.234375, 5.75390625, 6.2734375, 6.79296875, 7.3125, 7.83203125, 8.3515625, 8.87109375, 9.390625, 9.91015625, 10.4296875, 10.94921875, 11.46875, 11.98828125, 12.5078125, 13.02734375, 13.546875, 14.06640625, 14.5859375, 15.10546875, 15.625, 16.14453125, 16.6640625, 17.18359375, 17.703125]}, "gradients/decoder.transformer.h.4.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 2.0, 1.0, 4.0, 5.0, 4.0, 7.0, 8.0, 7.0, 15.0, 18.0, 26.0, 34.0, 63.0, 57.0, 96.0, 143.0, 210.0, 363.0, 649.0, 2226.0, 24402.0, 2365728.0, 736158.0, 12333.0, 1595.0, 595.0, 336.0, 217.0, 119.0, 111.0, 47.0, 40.0, 25.0, 19.0, 13.0, 10.0, 5.0, 5.0, 6.0, 8.0, 4.0, 1.0, 0.0, 1.0, 2.0, 1.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-40.09375, -38.65625, -37.21875, -35.78125, -34.34375, -32.90625, -31.46875, -30.03125, -28.59375, -27.15625, -25.71875, -24.28125, -22.84375, -21.40625, -19.96875, -18.53125, -17.09375, -15.65625, -14.21875, -12.78125, -11.34375, -9.90625, -8.46875, -7.03125, -5.59375, -4.15625, -2.71875, -1.28125, 0.15625, 1.59375, 3.03125, 4.46875, 5.90625, 7.34375, 8.78125, 10.21875, 11.65625, 13.09375, 14.53125, 15.96875, 17.40625, 18.84375, 20.28125, 21.71875, 23.15625, 24.59375, 26.03125, 27.46875, 28.90625, 30.34375, 31.78125, 33.21875, 34.65625, 36.09375, 37.53125, 38.96875, 40.40625, 41.84375, 43.28125, 44.71875, 46.15625, 47.59375, 49.03125, 50.46875, 51.90625]}, "gradients/decoder.transformer.h.4.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 549.0, 459.0, 10.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-189.22972106933594, -176.56564331054688, -163.90158081054688, -151.2375030517578, -138.57342529296875, -125.90934753417969, -113.24527740478516, -100.58120727539062, -87.91712951660156, -75.2530517578125, -62.58898162841797, -49.92490768432617, -37.260833740234375, -24.596759796142578, -11.932685852050781, 0.73138427734375, 13.395462036132812, 26.05953598022461, 38.723609924316406, 51.3876838684082, 64.0517578125, 76.71583557128906, 89.3799057006836, 102.04397583007812, 114.70805358886719, 127.37213134765625, 140.03619384765625, 152.7002716064453, 165.36434936523438, 178.02842712402344, 190.6925048828125, 203.3565673828125, 216.0206298828125, 228.68470764160156, 241.34878540039062, 254.01284790039062, 266.67694091796875, 279.34100341796875, 292.00506591796875, 304.66912841796875, 317.3332214355469, 329.9972839355469, 342.661376953125, 355.325439453125, 367.989501953125, 380.6535949707031, 393.3176574707031, 405.98175048828125, 418.64581298828125, 431.30987548828125, 443.9739685058594, 456.6380310058594, 469.3021240234375, 481.9661865234375, 494.6302490234375, 507.2943115234375, 519.9583740234375, 532.6224365234375, 545.2864990234375, 557.9506225585938, 570.6146850585938, 583.2787475585938, 595.9428100585938, 608.6068725585938, 621.27099609375]}, "gradients/decoder.transformer.h.4.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 1.0, 0.0, 4.0, 4.0, 1.0, 8.0, 9.0, 7.0, 7.0, 7.0, 14.0, 19.0, 21.0, 38.0, 24.0, 33.0, 34.0, 39.0, 25.0, 41.0, 34.0, 42.0, 39.0, 40.0, 41.0, 59.0, 54.0, 43.0, 37.0, 38.0, 38.0, 26.0, 18.0, 17.0, 17.0, 34.0, 22.0, 19.0, 6.0, 11.0, 9.0, 9.0, 7.0, 10.0, 5.0, 2.0, 2.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0], "bins": [-51.2830924987793, -49.833824157714844, -48.38455581665039, -46.93528366088867, -45.48601531982422, -44.036746978759766, -42.58747863769531, -41.138206481933594, -39.68893814086914, -38.23966979980469, -36.790401458740234, -35.341129302978516, -33.89186096191406, -32.44259262084961, -30.993324279785156, -29.54405403137207, -28.094785690307617, -26.645517349243164, -25.196247100830078, -23.746978759765625, -22.29770851135254, -20.848440170288086, -19.399169921875, -17.949901580810547, -16.500633239746094, -15.051363945007324, -13.602094650268555, -12.152826309204102, -10.703556060791016, -9.254287719726562, -7.805018424987793, -6.355749130249023, -4.9064788818359375, -3.457209587097168, -2.0079405307769775, -0.5586714744567871, 0.8905978202819824, 2.339867115020752, 3.7891359329223633, 5.238405227661133, 6.687674522399902, 8.136943817138672, 9.586213111877441, 11.035482406616211, 12.484750747680664, 13.93402099609375, 15.383289337158203, 16.832557678222656, 18.281827926635742, 19.731096267700195, 21.18036651611328, 22.629634857177734, 24.07890510559082, 25.528173446655273, 26.97744369506836, 28.426712036132812, 29.875980377197266, 31.32524871826172, 32.77451705932617, 34.22378921508789, 35.673057556152344, 37.1223258972168, 38.57159423828125, 40.02086639404297, 41.47013473510742]}, "gradients/decoder.transformer.h.3.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 2.0, 3.0, 2.0, 3.0, 7.0, 7.0, 10.0, 13.0, 3.0, 13.0, 18.0, 16.0, 15.0, 29.0, 30.0, 34.0, 28.0, 28.0, 27.0, 31.0, 44.0, 35.0, 26.0, 48.0, 46.0, 43.0, 48.0, 30.0, 45.0, 32.0, 40.0, 30.0, 31.0, 17.0, 24.0, 19.0, 18.0, 22.0, 11.0, 12.0, 14.0, 10.0, 12.0, 4.0, 8.0, 4.0, 5.0, 4.0, 5.0, 4.0, 3.0, 2.0, 0.0, 1.0, 1.0, 1.0], "bins": [-7.65625, -7.42279052734375, -7.1893310546875, -6.95587158203125, -6.722412109375, -6.48895263671875, -6.2554931640625, -6.02203369140625, -5.78857421875, -5.55511474609375, -5.3216552734375, -5.08819580078125, -4.854736328125, -4.62127685546875, -4.3878173828125, -4.15435791015625, -3.9208984375, -3.68743896484375, -3.4539794921875, -3.22052001953125, -2.987060546875, -2.75360107421875, -2.5201416015625, -2.28668212890625, -2.05322265625, -1.81976318359375, -1.5863037109375, -1.35284423828125, -1.119384765625, -0.88592529296875, -0.6524658203125, -0.41900634765625, -0.185546875, 0.04791259765625, 0.2813720703125, 0.51483154296875, 0.748291015625, 0.98175048828125, 1.2152099609375, 1.44866943359375, 1.68212890625, 1.91558837890625, 2.1490478515625, 2.38250732421875, 2.615966796875, 2.84942626953125, 3.0828857421875, 3.31634521484375, 3.5498046875, 3.78326416015625, 4.0167236328125, 4.25018310546875, 4.483642578125, 4.71710205078125, 4.9505615234375, 5.18402099609375, 5.41748046875, 5.65093994140625, 5.8843994140625, 6.11785888671875, 6.351318359375, 6.58477783203125, 6.8182373046875, 7.05169677734375, 7.28515625]}, "gradients/decoder.transformer.h.3.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 3.0, 3.0, 4.0, 8.0, 5.0, 11.0, 7.0, 15.0, 16.0, 19.0, 18.0, 28.0, 29.0, 43.0, 57.0, 77.0, 97.0, 142.0, 200.0, 227.0, 372.0, 887.0, 40020.0, 4097938.0, 51608.0, 1021.0, 400.0, 260.0, 206.0, 142.0, 98.0, 76.0, 53.0, 43.0, 37.0, 20.0, 20.0, 22.0, 12.0, 11.0, 11.0, 5.0, 8.0, 5.0, 3.0, 2.0, 3.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0], "bins": [-67.625, -65.4794921875, -63.333984375, -61.1884765625, -59.04296875, -56.8974609375, -54.751953125, -52.6064453125, -50.4609375, -48.3154296875, -46.169921875, -44.0244140625, -41.87890625, -39.7333984375, -37.587890625, -35.4423828125, -33.296875, -31.1513671875, -29.005859375, -26.8603515625, -24.71484375, -22.5693359375, -20.423828125, -18.2783203125, -16.1328125, -13.9873046875, -11.841796875, -9.6962890625, -7.55078125, -5.4052734375, -3.259765625, -1.1142578125, 1.03125, 3.1767578125, 5.322265625, 7.4677734375, 9.61328125, 11.7587890625, 13.904296875, 16.0498046875, 18.1953125, 20.3408203125, 22.486328125, 24.6318359375, 26.77734375, 28.9228515625, 31.068359375, 33.2138671875, 35.359375, 37.5048828125, 39.650390625, 41.7958984375, 43.94140625, 46.0869140625, 48.232421875, 50.3779296875, 52.5234375, 54.6689453125, 56.814453125, 58.9599609375, 61.10546875, 63.2509765625, 65.396484375, 67.5419921875, 69.6875]}, "gradients/decoder.transformer.h.3.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 0.0, 1.0, 2.0, 2.0, 5.0, 3.0, 2.0, 2.0, 8.0, 8.0, 5.0, 22.0, 20.0, 40.0, 38.0, 60.0, 100.0, 134.0, 167.0, 245.0, 379.0, 525.0, 627.0, 530.0, 356.0, 275.0, 157.0, 107.0, 62.0, 62.0, 32.0, 29.0, 23.0, 19.0, 12.0, 5.0, 6.0, 3.0, 6.0, 4.0, 2.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-16.75, -16.3031005859375, -15.856201171875, -15.4093017578125, -14.96240234375, -14.5155029296875, -14.068603515625, -13.6217041015625, -13.1748046875, -12.7279052734375, -12.281005859375, -11.8341064453125, -11.38720703125, -10.9403076171875, -10.493408203125, -10.0465087890625, -9.599609375, -9.1527099609375, -8.705810546875, -8.2589111328125, -7.81201171875, -7.3651123046875, -6.918212890625, -6.4713134765625, -6.0244140625, -5.5775146484375, -5.130615234375, -4.6837158203125, -4.23681640625, -3.7899169921875, -3.343017578125, -2.8961181640625, -2.44921875, -2.0023193359375, -1.555419921875, -1.1085205078125, -0.66162109375, -0.2147216796875, 0.232177734375, 0.6790771484375, 1.1259765625, 1.5728759765625, 2.019775390625, 2.4666748046875, 2.91357421875, 3.3604736328125, 3.807373046875, 4.2542724609375, 4.701171875, 5.1480712890625, 5.594970703125, 6.0418701171875, 6.48876953125, 6.9356689453125, 7.382568359375, 7.8294677734375, 8.2763671875, 8.7232666015625, 9.170166015625, 9.6170654296875, 10.06396484375, 10.5108642578125, 10.957763671875, 11.4046630859375, 11.8515625]}, "gradients/decoder.transformer.h.3.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 3.0, 0.0, 5.0, 7.0, 3.0, 5.0, 9.0, 6.0, 8.0, 14.0, 17.0, 17.0, 34.0, 37.0, 57.0, 86.0, 130.0, 251.0, 656.0, 2465.0, 17050.0, 259236.0, 3572259.0, 317994.0, 19821.0, 2651.0, 720.0, 252.0, 143.0, 101.0, 52.0, 40.0, 40.0, 30.0, 21.0, 15.0, 13.0, 12.0, 6.0, 6.0, 5.0, 8.0, 4.0, 0.0, 2.0, 2.0, 2.0, 3.0, 1.0], "bins": [-40.8125, -39.72509765625, -38.6376953125, -37.55029296875, -36.462890625, -35.37548828125, -34.2880859375, -33.20068359375, -32.11328125, -31.02587890625, -29.9384765625, -28.85107421875, -27.763671875, -26.67626953125, -25.5888671875, -24.50146484375, -23.4140625, -22.32666015625, -21.2392578125, -20.15185546875, -19.064453125, -17.97705078125, -16.8896484375, -15.80224609375, -14.71484375, -13.62744140625, -12.5400390625, -11.45263671875, -10.365234375, -9.27783203125, -8.1904296875, -7.10302734375, -6.015625, -4.92822265625, -3.8408203125, -2.75341796875, -1.666015625, -0.57861328125, 0.5087890625, 1.59619140625, 2.68359375, 3.77099609375, 4.8583984375, 5.94580078125, 7.033203125, 8.12060546875, 9.2080078125, 10.29541015625, 11.3828125, 12.47021484375, 13.5576171875, 14.64501953125, 15.732421875, 16.81982421875, 17.9072265625, 18.99462890625, 20.08203125, 21.16943359375, 22.2568359375, 23.34423828125, 24.431640625, 25.51904296875, 26.6064453125, 27.69384765625, 28.78125]}, "gradients/decoder.transformer.h.3.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 17.0, 246.0, 630.0, 120.0, 6.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-153.51507568359375, -143.5267333984375, -133.53839111328125, -123.55004119873047, -113.56169891357422, -103.57335662841797, -93.58500671386719, -83.59666442871094, -73.60832214355469, -63.61997985839844, -53.63163375854492, -43.643287658691406, -33.654945373535156, -23.666603088378906, -13.67825698852539, -3.689910888671875, 6.298431396484375, 16.286775588989258, 26.27511978149414, 36.263465881347656, 46.251808166503906, 56.240150451660156, 66.22850036621094, 76.21684265136719, 86.20518493652344, 96.19352722167969, 106.18186950683594, 116.17021942138672, 126.15856170654297, 136.14691162109375, 146.13525390625, 156.12359619140625, 166.11190795898438, 176.10025024414062, 186.08859252929688, 196.07693481445312, 206.06527709960938, 216.05361938476562, 226.04197692871094, 236.0303192138672, 246.01866149902344, 256.00701904296875, 265.995361328125, 275.98370361328125, 285.9720458984375, 295.96038818359375, 305.94873046875, 315.93707275390625, 325.9254150390625, 335.91375732421875, 345.902099609375, 355.89044189453125, 365.8787841796875, 375.86712646484375, 385.85546875, 395.84381103515625, 405.8321533203125, 415.82049560546875, 425.808837890625, 435.79718017578125, 445.7855224609375, 455.77386474609375, 465.76220703125, 475.75054931640625, 485.7389221191406]}, "gradients/decoder.transformer.h.3.ln_2.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 5.0, 2.0, 3.0, 2.0, 4.0, 4.0, 5.0, 6.0, 9.0, 5.0, 15.0, 11.0, 19.0, 14.0, 23.0, 31.0, 25.0, 37.0, 33.0, 26.0, 33.0, 22.0, 43.0, 32.0, 34.0, 56.0, 48.0, 35.0, 34.0, 43.0, 32.0, 33.0, 40.0, 34.0, 30.0, 27.0, 24.0, 18.0, 22.0, 19.0, 13.0, 11.0, 8.0, 7.0, 9.0, 4.0, 12.0, 7.0, 1.0, 2.0, 3.0, 0.0, 3.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-36.44572067260742, -35.20658493041992, -33.967445373535156, -32.728309631347656, -31.48917007446289, -30.25003433227539, -29.010896682739258, -27.771759033203125, -26.532621383666992, -25.29348373413086, -24.054346084594727, -22.815208435058594, -21.576072692871094, -20.336933135986328, -19.097797393798828, -17.858659744262695, -16.619522094726562, -15.38038444519043, -14.141246795654297, -12.90211009979248, -11.662972450256348, -10.423834800720215, -9.184698104858398, -7.945560455322266, -6.706422805786133, -5.46728515625, -4.228147983551025, -2.9890105724334717, -1.749873161315918, -0.5107355117797852, 0.7284016609191895, 1.967538833618164, 3.2066802978515625, 4.445817947387695, 5.68495512008667, 6.9240922927856445, 8.163229942321777, 9.40236759185791, 10.641504287719727, 11.88064193725586, 13.119779586791992, 14.358917236328125, 15.598054885864258, 16.83719253540039, 18.07632827758789, 19.315467834472656, 20.554603576660156, 21.79374122619629, 23.032878875732422, 24.272016525268555, 25.511154174804688, 26.75029182434082, 27.989429473876953, 29.228565216064453, 30.467702865600586, 31.70684051513672, 32.94597625732422, 34.18511199951172, 35.424251556396484, 36.663387298583984, 37.90252685546875, 39.14166259765625, 40.380802154541016, 41.619937896728516, 42.85907745361328]}, "gradients/decoder.transformer.h.3.crossattention.c_proj.bias": {"_type": "histogram", "values": [5.0, 2.0, 1.0, 2.0, 1.0, 0.0, 2.0, 4.0, 4.0, 4.0, 11.0, 8.0, 10.0, 10.0, 16.0, 11.0, 11.0, 22.0, 20.0, 17.0, 15.0, 16.0, 24.0, 30.0, 29.0, 33.0, 31.0, 37.0, 40.0, 38.0, 39.0, 28.0, 46.0, 46.0, 30.0, 35.0, 34.0, 31.0, 25.0, 34.0, 11.0, 22.0, 25.0, 19.0, 17.0, 15.0, 21.0, 10.0, 6.0, 14.0, 8.0, 10.0, 7.0, 6.0, 10.0, 4.0, 3.0, 5.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0], "bins": [-6.6484375, -6.4384765625, -6.228515625, -6.0185546875, -5.80859375, -5.5986328125, -5.388671875, -5.1787109375, -4.96875, -4.7587890625, -4.548828125, -4.3388671875, -4.12890625, -3.9189453125, -3.708984375, -3.4990234375, -3.2890625, -3.0791015625, -2.869140625, -2.6591796875, -2.44921875, -2.2392578125, -2.029296875, -1.8193359375, -1.609375, -1.3994140625, -1.189453125, -0.9794921875, -0.76953125, -0.5595703125, -0.349609375, -0.1396484375, 0.0703125, 0.2802734375, 0.490234375, 0.7001953125, 0.91015625, 1.1201171875, 1.330078125, 1.5400390625, 1.75, 1.9599609375, 2.169921875, 2.3798828125, 2.58984375, 2.7998046875, 3.009765625, 3.2197265625, 3.4296875, 3.6396484375, 3.849609375, 4.0595703125, 4.26953125, 4.4794921875, 4.689453125, 4.8994140625, 5.109375, 5.3193359375, 5.529296875, 5.7392578125, 5.94921875, 6.1591796875, 6.369140625, 6.5791015625, 6.7890625]}, "gradients/decoder.transformer.h.3.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 1.0, 4.0, 3.0, 6.0, 5.0, 11.0, 21.0, 41.0, 51.0, 79.0, 109.0, 200.0, 303.0, 390.0, 612.0, 834.0, 1342.0, 1867.0, 2852.0, 4086.0, 6084.0, 9082.0, 13143.0, 19290.0, 28728.0, 41712.0, 59500.0, 81957.0, 107321.0, 127261.0, 130379.0, 113479.0, 88718.0, 64492.0, 45983.0, 31610.0, 21315.0, 14619.0, 9954.0, 6811.0, 4628.0, 3208.0, 2111.0, 1357.0, 983.0, 628.0, 473.0, 326.0, 199.0, 147.0, 89.0, 64.0, 39.0, 22.0, 11.0, 13.0, 6.0, 4.0, 3.0, 6.0, 1.0], "bins": [-0.830078125, -0.805023193359375, -0.77996826171875, -0.754913330078125, -0.7298583984375, -0.704803466796875, -0.67974853515625, -0.654693603515625, -0.629638671875, -0.604583740234375, -0.57952880859375, -0.554473876953125, -0.5294189453125, -0.504364013671875, -0.47930908203125, -0.454254150390625, -0.42919921875, -0.404144287109375, -0.37908935546875, -0.354034423828125, -0.3289794921875, -0.303924560546875, -0.27886962890625, -0.253814697265625, -0.228759765625, -0.203704833984375, -0.17864990234375, -0.153594970703125, -0.1285400390625, -0.103485107421875, -0.07843017578125, -0.053375244140625, -0.0283203125, -0.003265380859375, 0.02178955078125, 0.046844482421875, 0.0718994140625, 0.096954345703125, 0.12200927734375, 0.147064208984375, 0.172119140625, 0.197174072265625, 0.22222900390625, 0.247283935546875, 0.2723388671875, 0.297393798828125, 0.32244873046875, 0.347503662109375, 0.37255859375, 0.397613525390625, 0.42266845703125, 0.447723388671875, 0.4727783203125, 0.497833251953125, 0.52288818359375, 0.547943115234375, 0.572998046875, 0.598052978515625, 0.62310791015625, 0.648162841796875, 0.6732177734375, 0.698272705078125, 0.72332763671875, 0.748382568359375, 0.7734375]}, "gradients/decoder.transformer.h.3.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 5.0, 4.0, 7.0, 5.0, 6.0, 15.0, 16.0, 17.0, 33.0, 21.0, 19.0, 36.0, 34.0, 25.0, 29.0, 41.0, 33.0, 42.0, 40.0, 37.0, 33.0, 1055.0, 39.0, 42.0, 45.0, 48.0, 42.0, 37.0, 33.0, 23.0, 27.0, 20.0, 21.0, 17.0, 18.0, 11.0, 17.0, 8.0, 8.0, 6.0, 8.0, 3.0, 6.0, 1.0, 5.0, 3.0, 0.0, 0.0, 1.0, 1.0], "bins": [-5.953125, -5.78448486328125, -5.6158447265625, -5.44720458984375, -5.278564453125, -5.10992431640625, -4.9412841796875, -4.77264404296875, -4.60400390625, -4.43536376953125, -4.2667236328125, -4.09808349609375, -3.929443359375, -3.76080322265625, -3.5921630859375, -3.42352294921875, -3.2548828125, -3.08624267578125, -2.9176025390625, -2.74896240234375, -2.580322265625, -2.41168212890625, -2.2430419921875, -2.07440185546875, -1.90576171875, -1.73712158203125, -1.5684814453125, -1.39984130859375, -1.231201171875, -1.06256103515625, -0.8939208984375, -0.72528076171875, -0.556640625, -0.38800048828125, -0.2193603515625, -0.05072021484375, 0.117919921875, 0.28656005859375, 0.4552001953125, 0.62384033203125, 0.79248046875, 0.96112060546875, 1.1297607421875, 1.29840087890625, 1.467041015625, 1.63568115234375, 1.8043212890625, 1.97296142578125, 2.1416015625, 2.31024169921875, 2.4788818359375, 2.64752197265625, 2.816162109375, 2.98480224609375, 3.1534423828125, 3.32208251953125, 3.49072265625, 3.65936279296875, 3.8280029296875, 3.99664306640625, 4.165283203125, 4.33392333984375, 4.5025634765625, 4.67120361328125, 4.83984375]}, "gradients/decoder.transformer.h.3.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 1.0, 3.0, 16.0, 17.0, 15.0, 36.0, 39.0, 66.0, 84.0, 126.0, 207.0, 314.0, 527.0, 758.0, 1273.0, 2064.0, 3282.0, 5358.0, 8603.0, 13452.0, 21105.0, 32456.0, 48559.0, 71512.0, 99763.0, 127819.0, 1184187.0, 136895.0, 106249.0, 78649.0, 53664.0, 35781.0, 23394.0, 15084.0, 9636.0, 6097.0, 3841.0, 2393.0, 1460.0, 874.0, 525.0, 350.0, 231.0, 123.0, 104.0, 49.0, 25.0, 31.0, 16.0, 12.0, 7.0, 4.0, 4.0, 2.0, 1.0, 3.0], "bins": [-0.62841796875, -0.6102676391601562, -0.5921173095703125, -0.5739669799804688, -0.555816650390625, -0.5376663208007812, -0.5195159912109375, -0.5013656616210938, -0.48321533203125, -0.46506500244140625, -0.4469146728515625, -0.42876434326171875, -0.410614013671875, -0.39246368408203125, -0.3743133544921875, -0.35616302490234375, -0.3380126953125, -0.31986236572265625, -0.3017120361328125, -0.28356170654296875, -0.265411376953125, -0.24726104736328125, -0.2291107177734375, -0.21096038818359375, -0.19281005859375, -0.17465972900390625, -0.1565093994140625, -0.13835906982421875, -0.120208740234375, -0.10205841064453125, -0.0839080810546875, -0.06575775146484375, -0.047607421875, -0.02945709228515625, -0.0113067626953125, 0.00684356689453125, 0.024993896484375, 0.04314422607421875, 0.0612945556640625, 0.07944488525390625, 0.09759521484375, 0.11574554443359375, 0.1338958740234375, 0.15204620361328125, 0.170196533203125, 0.18834686279296875, 0.2064971923828125, 0.22464752197265625, 0.2427978515625, 0.26094818115234375, 0.2790985107421875, 0.29724884033203125, 0.315399169921875, 0.33354949951171875, 0.3516998291015625, 0.36985015869140625, 0.38800048828125, 0.40615081787109375, 0.4243011474609375, 0.44245147705078125, 0.460601806640625, 0.47875213623046875, 0.4969024658203125, 0.5150527954101562, 0.533203125]}, "gradients/decoder.transformer.h.3.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 2.0, 1.0, 2.0, 1.0, 5.0, 4.0, 2.0, 3.0, 4.0, 6.0, 5.0, 7.0, 11.0, 6.0, 12.0, 13.0, 26.0, 24.0, 27.0, 46.0, 106.0, 143.0, 145.0, 131.0, 72.0, 53.0, 28.0, 27.0, 29.0, 15.0, 13.0, 6.0, 6.0, 3.0, 2.0, 3.0, 5.0, 3.0, 2.0, 6.0, 1.0, 3.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 3.0], "bins": [-0.026031494140625, -0.025250673294067383, -0.024469852447509766, -0.02368903160095215, -0.02290821075439453, -0.022127389907836914, -0.021346569061279297, -0.02056574821472168, -0.019784927368164062, -0.019004106521606445, -0.018223285675048828, -0.01744246482849121, -0.016661643981933594, -0.015880823135375977, -0.01510000228881836, -0.014319181442260742, -0.013538360595703125, -0.012757539749145508, -0.01197671890258789, -0.011195898056030273, -0.010415077209472656, -0.009634256362915039, -0.008853435516357422, -0.008072614669799805, -0.0072917938232421875, -0.00651097297668457, -0.005730152130126953, -0.004949331283569336, -0.004168510437011719, -0.0033876895904541016, -0.0026068687438964844, -0.0018260478973388672, -0.00104522705078125, -0.0002644062042236328, 0.0005164146423339844, 0.0012972354888916016, 0.0020780563354492188, 0.002858877182006836, 0.003639698028564453, 0.00442051887512207, 0.0052013397216796875, 0.005982160568237305, 0.006762981414794922, 0.007543802261352539, 0.008324623107910156, 0.009105443954467773, 0.00988626480102539, 0.010667085647583008, 0.011447906494140625, 0.012228727340698242, 0.01300954818725586, 0.013790369033813477, 0.014571189880371094, 0.015352010726928711, 0.016132831573486328, 0.016913652420043945, 0.017694473266601562, 0.01847529411315918, 0.019256114959716797, 0.020036935806274414, 0.02081775665283203, 0.02159857749938965, 0.022379398345947266, 0.023160219192504883, 0.0239410400390625]}, "gradients/decoder.transformer.h.3.crossattention.q_attn.weight": {"_type": "histogram", "values": [3.0, 0.0, 0.0, 1.0, 1.0, 0.0, 3.0, 1.0, 2.0, 3.0, 2.0, 2.0, 2.0, 6.0, 8.0, 7.0, 8.0, 7.0, 10.0, 7.0, 17.0, 20.0, 25.0, 32.0, 83.0, 104.0, 116.0, 203.0, 459.0, 2844.0, 1032658.0, 10576.0, 625.0, 255.0, 132.0, 90.0, 57.0, 45.0, 25.0, 26.0, 24.0, 14.0, 11.0, 13.0, 10.0, 8.0, 3.0, 7.0, 6.0, 3.0, 2.0, 1.0, 2.0, 1.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.453125, -0.43833160400390625, -0.4235382080078125, -0.40874481201171875, -0.393951416015625, -0.37915802001953125, -0.3643646240234375, -0.34957122802734375, -0.33477783203125, -0.31998443603515625, -0.3051910400390625, -0.29039764404296875, -0.275604248046875, -0.26081085205078125, -0.2460174560546875, -0.23122406005859375, -0.2164306640625, -0.20163726806640625, -0.1868438720703125, -0.17205047607421875, -0.157257080078125, -0.14246368408203125, -0.1276702880859375, -0.11287689208984375, -0.09808349609375, -0.08329010009765625, -0.0684967041015625, -0.05370330810546875, -0.038909912109375, -0.02411651611328125, -0.0093231201171875, 0.00547027587890625, 0.020263671875, 0.03505706787109375, 0.0498504638671875, 0.06464385986328125, 0.079437255859375, 0.09423065185546875, 0.1090240478515625, 0.12381744384765625, 0.13861083984375, 0.15340423583984375, 0.1681976318359375, 0.18299102783203125, 0.197784423828125, 0.21257781982421875, 0.2273712158203125, 0.24216461181640625, 0.2569580078125, 0.27175140380859375, 0.2865447998046875, 0.30133819580078125, 0.316131591796875, 0.33092498779296875, 0.3457183837890625, 0.36051177978515625, 0.37530517578125, 0.39009857177734375, 0.4048919677734375, 0.41968536376953125, 0.434478759765625, 0.44927215576171875, 0.4640655517578125, 0.47885894775390625, 0.49365234375]}, "gradients/decoder.transformer.h.3.ln_cross_attn.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 957.0, 56.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.08458857983350754, -0.07367686927318573, -0.06276515871286392, -0.05185345187783241, -0.040941741317510605, -0.030030030757188797, -0.019118323922157288, -0.00820661336183548, 0.002705097198486328, 0.013616806827485561, 0.024528516456484795, 0.03544022515416145, 0.04635193571448326, 0.05726364627480507, 0.06817535310983658, 0.07908706367015839, 0.0899987742304802, 0.100910484790802, 0.11182219535112381, 0.12273390591144562, 0.13364560902118683, 0.14455732703208923, 0.15546903014183044, 0.16638073325157166, 0.17729245126247406, 0.18820415437221527, 0.19911587238311768, 0.2100275754928589, 0.2209392935037613, 0.2318509966135025, 0.2427627146244049, 0.2536744177341461, 0.26458612084388733, 0.27549782395362854, 0.28640952706336975, 0.29732125997543335, 0.30823296308517456, 0.31914466619491577, 0.330056369304657, 0.3409680724143982, 0.3518798053264618, 0.362791508436203, 0.3737032115459442, 0.3846149444580078, 0.395526647567749, 0.40643835067749023, 0.41735005378723145, 0.42826175689697266, 0.43917346000671387, 0.4500851631164551, 0.4609968662261963, 0.4719085991382599, 0.4828203022480011, 0.4937320053577423, 0.5046437382698059, 0.5155554413795471, 0.5264671444892883, 0.5373788475990295, 0.5482905507087708, 0.559202253818512, 0.5701139569282532, 0.5810257196426392, 0.5919374227523804, 0.6028491258621216, 0.6137608289718628]}, "gradients/decoder.transformer.h.3.ln_cross_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 0.0, 2.0, 4.0, 8.0, 8.0, 11.0, 9.0, 12.0, 21.0, 30.0, 32.0, 36.0, 36.0, 54.0, 36.0, 66.0, 65.0, 67.0, 58.0, 62.0, 46.0, 53.0, 54.0, 34.0, 57.0, 28.0, 25.0, 24.0, 25.0, 13.0, 18.0, 7.0, 3.0, 5.0, 0.0, 3.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.03312307596206665, -0.032248929142951965, -0.03137478232383728, -0.030500631779432297, -0.02962648496031761, -0.028752338141202927, -0.027878189459443092, -0.027004040777683258, -0.026129893958568573, -0.025255747139453888, -0.024381598457694054, -0.02350744977593422, -0.022633302956819534, -0.02175915613770485, -0.020885007455945015, -0.02001085877418518, -0.019136711955070496, -0.01826256513595581, -0.017388416454195976, -0.016514267772436142, -0.015640120953321457, -0.014765973202884197, -0.013891825452446938, -0.013017677702009678, -0.012143529951572418, -0.011269382201135159, -0.010395234450697899, -0.00952108670026064, -0.00864693894982338, -0.00777279119938612, -0.00689864344894886, -0.0060244956985116005, -0.005150347948074341, -0.004276200197637081, -0.0034020524471998215, -0.002527904696762562, -0.0016537569463253021, -0.0007796091958880424, 9.453855454921722e-05, 0.0009686863049864769, 0.0018428340554237366, 0.0027169818058609962, 0.003591129556298256, 0.004465277306735516, 0.005339425057172775, 0.006213572807610035, 0.007087720558047295, 0.007961868308484554, 0.008836016058921814, 0.009710163809359074, 0.010584311559796333, 0.011458459310233593, 0.012332607060670853, 0.013206754811108112, 0.014080902561545372, 0.014955050311982632, 0.01582919806241989, 0.016703344881534576, 0.01757749356329441, 0.018451642245054245, 0.01932578906416893, 0.020199935883283615, 0.02107408456504345, 0.021948233246803284, 0.02282238006591797]}, "gradients/decoder.transformer.h.3.attn.c_proj.bias": {"_type": "histogram", "values": [5.0, 2.0, 1.0, 2.0, 1.0, 0.0, 2.0, 4.0, 4.0, 5.0, 10.0, 8.0, 10.0, 10.0, 16.0, 11.0, 11.0, 22.0, 20.0, 17.0, 15.0, 17.0, 23.0, 30.0, 29.0, 33.0, 31.0, 36.0, 40.0, 39.0, 39.0, 28.0, 46.0, 46.0, 29.0, 36.0, 35.0, 30.0, 25.0, 34.0, 11.0, 22.0, 25.0, 19.0, 17.0, 15.0, 20.0, 11.0, 6.0, 13.0, 9.0, 10.0, 8.0, 5.0, 10.0, 4.0, 2.0, 6.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0], "bins": [-6.64453125, -6.4346923828125, -6.224853515625, -6.0150146484375, -5.80517578125, -5.5953369140625, -5.385498046875, -5.1756591796875, -4.9658203125, -4.7559814453125, -4.546142578125, -4.3363037109375, -4.12646484375, -3.9166259765625, -3.706787109375, -3.4969482421875, -3.287109375, -3.0772705078125, -2.867431640625, -2.6575927734375, -2.44775390625, -2.2379150390625, -2.028076171875, -1.8182373046875, -1.6083984375, -1.3985595703125, -1.188720703125, -0.9788818359375, -0.76904296875, -0.5592041015625, -0.349365234375, -0.1395263671875, 0.0703125, 0.2801513671875, 0.489990234375, 0.6998291015625, 0.90966796875, 1.1195068359375, 1.329345703125, 1.5391845703125, 1.7490234375, 1.9588623046875, 2.168701171875, 2.3785400390625, 2.58837890625, 2.7982177734375, 3.008056640625, 3.2178955078125, 3.427734375, 3.6375732421875, 3.847412109375, 4.0572509765625, 4.26708984375, 4.4769287109375, 4.686767578125, 4.8966064453125, 5.1064453125, 5.3162841796875, 5.526123046875, 5.7359619140625, 5.94580078125, 6.1556396484375, 6.365478515625, 6.5753173828125, 6.78515625]}, "gradients/decoder.transformer.h.3.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 2.0, 3.0, 5.0, 5.0, 4.0, 2.0, 6.0, 12.0, 13.0, 25.0, 25.0, 32.0, 43.0, 54.0, 83.0, 92.0, 122.0, 151.0, 169.0, 239.0, 317.0, 352.0, 457.0, 619.0, 829.0, 1213.0, 2290.0, 5470.0, 16536.0, 54800.0, 171531.0, 405706.0, 257495.0, 86825.0, 26066.0, 8190.0, 3155.0, 1641.0, 916.0, 655.0, 493.0, 403.0, 333.0, 274.0, 221.0, 148.0, 121.0, 87.0, 73.0, 63.0, 48.0, 37.0, 33.0, 24.0, 11.0, 14.0, 9.0, 8.0, 12.0, 5.0, 2.0, 4.0, 2.0], "bins": [-13.25, -12.8447265625, -12.439453125, -12.0341796875, -11.62890625, -11.2236328125, -10.818359375, -10.4130859375, -10.0078125, -9.6025390625, -9.197265625, -8.7919921875, -8.38671875, -7.9814453125, -7.576171875, -7.1708984375, -6.765625, -6.3603515625, -5.955078125, -5.5498046875, -5.14453125, -4.7392578125, -4.333984375, -3.9287109375, -3.5234375, -3.1181640625, -2.712890625, -2.3076171875, -1.90234375, -1.4970703125, -1.091796875, -0.6865234375, -0.28125, 0.1240234375, 0.529296875, 0.9345703125, 1.33984375, 1.7451171875, 2.150390625, 2.5556640625, 2.9609375, 3.3662109375, 3.771484375, 4.1767578125, 4.58203125, 4.9873046875, 5.392578125, 5.7978515625, 6.203125, 6.6083984375, 7.013671875, 7.4189453125, 7.82421875, 8.2294921875, 8.634765625, 9.0400390625, 9.4453125, 9.8505859375, 10.255859375, 10.6611328125, 11.06640625, 11.4716796875, 11.876953125, 12.2822265625, 12.6875]}, "gradients/decoder.transformer.h.3.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 2.0, 0.0, 1.0, 1.0, 2.0, 5.0, 5.0, 4.0, 7.0, 7.0, 15.0, 15.0, 15.0, 23.0, 17.0, 29.0, 37.0, 37.0, 39.0, 43.0, 79.0, 86.0, 127.0, 262.0, 1413.0, 226.0, 128.0, 85.0, 56.0, 49.0, 46.0, 32.0, 28.0, 28.0, 19.0, 24.0, 15.0, 12.0, 9.0, 9.0, 3.0, 4.0, 1.0, 6.0, 2.0, 1.0, 2.0, 1.0, 1.0, 4.0, 0.0, 1.0, 3.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-20.046875, -19.388427734375, -18.72998046875, -18.071533203125, -17.4130859375, -16.754638671875, -16.09619140625, -15.437744140625, -14.779296875, -14.120849609375, -13.46240234375, -12.803955078125, -12.1455078125, -11.487060546875, -10.82861328125, -10.170166015625, -9.51171875, -8.853271484375, -8.19482421875, -7.536376953125, -6.8779296875, -6.219482421875, -5.56103515625, -4.902587890625, -4.244140625, -3.585693359375, -2.92724609375, -2.268798828125, -1.6103515625, -0.951904296875, -0.29345703125, 0.364990234375, 1.0234375, 1.681884765625, 2.34033203125, 2.998779296875, 3.6572265625, 4.315673828125, 4.97412109375, 5.632568359375, 6.291015625, 6.949462890625, 7.60791015625, 8.266357421875, 8.9248046875, 9.583251953125, 10.24169921875, 10.900146484375, 11.55859375, 12.217041015625, 12.87548828125, 13.533935546875, 14.1923828125, 14.850830078125, 15.50927734375, 16.167724609375, 16.826171875, 17.484619140625, 18.14306640625, 18.801513671875, 19.4599609375, 20.118408203125, 20.77685546875, 21.435302734375, 22.09375]}, "gradients/decoder.transformer.h.3.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 3.0, 1.0, 1.0, 3.0, 1.0, 5.0, 6.0, 6.0, 10.0, 11.0, 14.0, 17.0, 31.0, 44.0, 50.0, 86.0, 137.0, 285.0, 517.0, 1069.0, 7230.0, 3129386.0, 4650.0, 1000.0, 447.0, 253.0, 166.0, 82.0, 59.0, 42.0, 29.0, 16.0, 17.0, 14.0, 9.0, 9.0, 1.0, 4.0, 3.0, 1.0, 2.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-125.1875, -121.5537109375, -117.919921875, -114.2861328125, -110.65234375, -107.0185546875, -103.384765625, -99.7509765625, -96.1171875, -92.4833984375, -88.849609375, -85.2158203125, -81.58203125, -77.9482421875, -74.314453125, -70.6806640625, -67.046875, -63.4130859375, -59.779296875, -56.1455078125, -52.51171875, -48.8779296875, -45.244140625, -41.6103515625, -37.9765625, -34.3427734375, -30.708984375, -27.0751953125, -23.44140625, -19.8076171875, -16.173828125, -12.5400390625, -8.90625, -5.2724609375, -1.638671875, 1.9951171875, 5.62890625, 9.2626953125, 12.896484375, 16.5302734375, 20.1640625, 23.7978515625, 27.431640625, 31.0654296875, 34.69921875, 38.3330078125, 41.966796875, 45.6005859375, 49.234375, 52.8681640625, 56.501953125, 60.1357421875, 63.76953125, 67.4033203125, 71.037109375, 74.6708984375, 78.3046875, 81.9384765625, 85.572265625, 89.2060546875, 92.83984375, 96.4736328125, 100.107421875, 103.7412109375, 107.375]}, "gradients/decoder.transformer.h.3.ln_1.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 3.0, 5.0, 18.0, 41.0, 98.0, 179.0, 247.0, 199.0, 119.0, 69.0, 21.0, 8.0, 5.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-71.30699157714844, -68.3917236328125, -65.47645568847656, -62.561187744140625, -59.64592361450195, -56.730655670166016, -53.81538772583008, -50.900123596191406, -47.98485565185547, -45.06958770751953, -42.154319763183594, -39.239051818847656, -36.323787689208984, -33.40851974487305, -30.49325180053711, -27.577985763549805, -24.662715911865234, -21.747447967529297, -18.832181930541992, -15.916913986206055, -13.001646995544434, -10.086380004882812, -7.171112060546875, -4.25584602355957, -1.3405780792236328, 1.5746891498565674, 4.489956378936768, 7.405223846435547, 10.320490837097168, 13.235757827758789, 16.151025772094727, 19.06629180908203, 21.98155975341797, 24.896827697753906, 27.81209373474121, 30.72736167907715, 33.64262771606445, 36.55789566040039, 39.47316360473633, 42.388427734375, 45.30369567871094, 48.218963623046875, 51.13423156738281, 54.04949951171875, 56.96476364135742, 59.88003158569336, 62.7952995300293, 65.71056365966797, 68.62583923339844, 71.54110717773438, 74.45637512207031, 77.37164306640625, 80.28691101074219, 83.20217895507812, 86.11744689941406, 89.03270721435547, 91.9479751586914, 94.86324310302734, 97.77851104736328, 100.69377899169922, 103.60904693603516, 106.52430725097656, 109.4395751953125, 112.35484313964844, 115.27011108398438]}, "gradients/decoder.transformer.h.3.ln_1.bias": {"_type": "histogram", "values": [1.0, 3.0, 1.0, 1.0, 1.0, 1.0, 2.0, 4.0, 5.0, 9.0, 8.0, 6.0, 13.0, 15.0, 14.0, 12.0, 21.0, 25.0, 22.0, 25.0, 25.0, 33.0, 33.0, 42.0, 44.0, 35.0, 36.0, 41.0, 47.0, 47.0, 44.0, 35.0, 31.0, 32.0, 36.0, 38.0, 41.0, 30.0, 29.0, 24.0, 17.0, 15.0, 7.0, 18.0, 11.0, 8.0, 7.0, 6.0, 4.0, 2.0, 5.0, 4.0, 2.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-49.09221649169922, -47.382896423339844, -45.67357635498047, -43.964256286621094, -42.254940032958984, -40.54561996459961, -38.836299896240234, -37.12697982788086, -35.417659759521484, -33.70833969116211, -31.999021530151367, -30.289701461791992, -28.580381393432617, -26.871063232421875, -25.1617431640625, -23.452423095703125, -21.743104934692383, -20.033784866333008, -18.324466705322266, -16.61514663696289, -14.905826568603516, -13.196507453918457, -11.487188339233398, -9.777868270874023, -8.068549156188965, -6.359229564666748, -4.649909973144531, -2.9405908584594727, -1.2312712669372559, 0.47804832458496094, 2.1873674392700195, 3.8966875076293945, 5.606006622314453, 7.31532621383667, 9.024645805358887, 10.733964920043945, 12.44328498840332, 14.152604103088379, 15.861923217773438, 17.571243286132812, 19.280563354492188, 20.989883422851562, 22.699201583862305, 24.40852165222168, 26.117841720581055, 27.827159881591797, 29.536479949951172, 31.245800018310547, 32.955116271972656, 34.66443634033203, 36.373756408691406, 38.08307647705078, 39.79239273071289, 41.501712799072266, 43.21103286743164, 44.920352935791016, 46.62967300415039, 48.338993072509766, 50.04831314086914, 51.75762939453125, 53.466949462890625, 55.17626953125, 56.885589599609375, 58.59490966796875, 60.304229736328125]}, "gradients/decoder.transformer.h.2.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 4.0, 6.0, 6.0, 10.0, 15.0, 8.0, 7.0, 9.0, 15.0, 21.0, 18.0, 22.0, 22.0, 30.0, 14.0, 37.0, 37.0, 31.0, 41.0, 53.0, 44.0, 29.0, 41.0, 41.0, 34.0, 25.0, 36.0, 38.0, 46.0, 32.0, 22.0, 28.0, 23.0, 16.0, 21.0, 20.0, 25.0, 15.0, 9.0, 8.0, 9.0, 7.0, 6.0, 9.0, 5.0, 4.0, 2.0, 5.0, 7.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0], "bins": [-7.3203125, -7.0911865234375, -6.862060546875, -6.6329345703125, -6.40380859375, -6.1746826171875, -5.945556640625, -5.7164306640625, -5.4873046875, -5.2581787109375, -5.029052734375, -4.7999267578125, -4.57080078125, -4.3416748046875, -4.112548828125, -3.8834228515625, -3.654296875, -3.4251708984375, -3.196044921875, -2.9669189453125, -2.73779296875, -2.5086669921875, -2.279541015625, -2.0504150390625, -1.8212890625, -1.5921630859375, -1.363037109375, -1.1339111328125, -0.90478515625, -0.6756591796875, -0.446533203125, -0.2174072265625, 0.01171875, 0.2408447265625, 0.469970703125, 0.6990966796875, 0.92822265625, 1.1573486328125, 1.386474609375, 1.6156005859375, 1.8447265625, 2.0738525390625, 2.302978515625, 2.5321044921875, 2.76123046875, 2.9903564453125, 3.219482421875, 3.4486083984375, 3.677734375, 3.9068603515625, 4.135986328125, 4.3651123046875, 4.59423828125, 4.8233642578125, 5.052490234375, 5.2816162109375, 5.5107421875, 5.7398681640625, 5.968994140625, 6.1981201171875, 6.42724609375, 6.6563720703125, 6.885498046875, 7.1146240234375, 7.34375]}, "gradients/decoder.transformer.h.2.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 3.0, 0.0, 0.0, 1.0, 2.0, 0.0, 3.0, 4.0, 7.0, 9.0, 11.0, 11.0, 17.0, 35.0, 67.0, 101.0, 185.0, 422.0, 962.0, 2049.0, 5368.0, 19435.0, 147774.0, 1532722.0, 2200989.0, 245471.0, 27587.0, 6747.0, 2347.0, 996.0, 488.0, 231.0, 113.0, 52.0, 29.0, 16.0, 10.0, 10.0, 4.0, 3.0, 7.0, 2.0, 2.0, 2.0, 1.0, 0.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-18.0625, -17.44189453125, -16.8212890625, -16.20068359375, -15.580078125, -14.95947265625, -14.3388671875, -13.71826171875, -13.09765625, -12.47705078125, -11.8564453125, -11.23583984375, -10.615234375, -9.99462890625, -9.3740234375, -8.75341796875, -8.1328125, -7.51220703125, -6.8916015625, -6.27099609375, -5.650390625, -5.02978515625, -4.4091796875, -3.78857421875, -3.16796875, -2.54736328125, -1.9267578125, -1.30615234375, -0.685546875, -0.06494140625, 0.5556640625, 1.17626953125, 1.796875, 2.41748046875, 3.0380859375, 3.65869140625, 4.279296875, 4.89990234375, 5.5205078125, 6.14111328125, 6.76171875, 7.38232421875, 8.0029296875, 8.62353515625, 9.244140625, 9.86474609375, 10.4853515625, 11.10595703125, 11.7265625, 12.34716796875, 12.9677734375, 13.58837890625, 14.208984375, 14.82958984375, 15.4501953125, 16.07080078125, 16.69140625, 17.31201171875, 17.9326171875, 18.55322265625, 19.173828125, 19.79443359375, 20.4150390625, 21.03564453125, 21.65625]}, "gradients/decoder.transformer.h.2.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 4.0, 3.0, 4.0, 10.0, 12.0, 32.0, 35.0, 48.0, 95.0, 127.0, 213.0, 313.0, 499.0, 780.0, 671.0, 462.0, 288.0, 156.0, 116.0, 86.0, 45.0, 21.0, 15.0, 26.0, 17.0, 3.0, 2.0, 1.0, 2.0, 3.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-13.875, -13.2119140625, -12.548828125, -11.8857421875, -11.22265625, -10.5595703125, -9.896484375, -9.2333984375, -8.5703125, -7.9072265625, -7.244140625, -6.5810546875, -5.91796875, -5.2548828125, -4.591796875, -3.9287109375, -3.265625, -2.6025390625, -1.939453125, -1.2763671875, -0.61328125, 0.0498046875, 0.712890625, 1.3759765625, 2.0390625, 2.7021484375, 3.365234375, 4.0283203125, 4.69140625, 5.3544921875, 6.017578125, 6.6806640625, 7.34375, 8.0068359375, 8.669921875, 9.3330078125, 9.99609375, 10.6591796875, 11.322265625, 11.9853515625, 12.6484375, 13.3115234375, 13.974609375, 14.6376953125, 15.30078125, 15.9638671875, 16.626953125, 17.2900390625, 17.953125, 18.6162109375, 19.279296875, 19.9423828125, 20.60546875, 21.2685546875, 21.931640625, 22.5947265625, 23.2578125, 23.9208984375, 24.583984375, 25.2470703125, 25.91015625, 26.5732421875, 27.236328125, 27.8994140625, 28.5625]}, "gradients/decoder.transformer.h.2.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 0.0, 1.0, 1.0, 8.0, 9.0, 9.0, 17.0, 37.0, 75.0, 124.0, 298.0, 731.0, 2257.0, 19488.0, 1195722.0, 2929368.0, 41258.0, 3253.0, 930.0, 392.0, 157.0, 74.0, 39.0, 25.0, 10.0, 6.0, 4.0, 1.0, 0.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-42.25, -40.93408203125, -39.6181640625, -38.30224609375, -36.986328125, -35.67041015625, -34.3544921875, -33.03857421875, -31.72265625, -30.40673828125, -29.0908203125, -27.77490234375, -26.458984375, -25.14306640625, -23.8271484375, -22.51123046875, -21.1953125, -19.87939453125, -18.5634765625, -17.24755859375, -15.931640625, -14.61572265625, -13.2998046875, -11.98388671875, -10.66796875, -9.35205078125, -8.0361328125, -6.72021484375, -5.404296875, -4.08837890625, -2.7724609375, -1.45654296875, -0.140625, 1.17529296875, 2.4912109375, 3.80712890625, 5.123046875, 6.43896484375, 7.7548828125, 9.07080078125, 10.38671875, 11.70263671875, 13.0185546875, 14.33447265625, 15.650390625, 16.96630859375, 18.2822265625, 19.59814453125, 20.9140625, 22.22998046875, 23.5458984375, 24.86181640625, 26.177734375, 27.49365234375, 28.8095703125, 30.12548828125, 31.44140625, 32.75732421875, 34.0732421875, 35.38916015625, 36.705078125, 38.02099609375, 39.3369140625, 40.65283203125, 41.96875]}, "gradients/decoder.transformer.h.2.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 11.0, 54.0, 245.0, 444.0, 216.0, 38.0, 11.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-94.20370483398438, -87.2127685546875, -80.22183990478516, -73.23091125488281, -66.23997497558594, -59.24904251098633, -52.25811004638672, -45.26717758178711, -38.2762451171875, -31.28531265258789, -24.29438018798828, -17.303447723388672, -10.312515258789062, -3.321582794189453, 3.6693496704101562, 10.660282135009766, 17.651214599609375, 24.642147064208984, 31.633079528808594, 38.6240119934082, 45.61494445800781, 52.60587692260742, 59.59680938720703, 66.58773803710938, 73.57867431640625, 80.56961059570312, 87.56053924560547, 94.55146789550781, 101.54240417480469, 108.53334045410156, 115.5242691040039, 122.51519775390625, 129.50613403320312, 136.4970703125, 143.48800659179688, 150.4789276123047, 157.46986389160156, 164.46080017089844, 171.45172119140625, 178.44265747070312, 185.43359375, 192.42453002929688, 199.41546630859375, 206.40638732910156, 213.39732360839844, 220.3882598876953, 227.37918090820312, 234.3701171875, 241.36105346679688, 248.35198974609375, 255.34292602539062, 262.3338623046875, 269.32476806640625, 276.3157043457031, 283.306640625, 290.2975769042969, 297.28851318359375, 304.2794494628906, 311.2703857421875, 318.2613220214844, 325.25225830078125, 332.2431640625, 339.2341003417969, 346.22503662109375, 353.2159729003906]}, "gradients/decoder.transformer.h.2.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 1.0, 1.0, 4.0, 2.0, 3.0, 6.0, 7.0, 3.0, 7.0, 10.0, 7.0, 11.0, 12.0, 9.0, 17.0, 17.0, 16.0, 23.0, 14.0, 24.0, 21.0, 25.0, 36.0, 29.0, 40.0, 50.0, 39.0, 31.0, 32.0, 38.0, 38.0, 33.0, 32.0, 33.0, 38.0, 29.0, 30.0, 24.0, 28.0, 41.0, 28.0, 26.0, 11.0, 14.0, 14.0, 12.0, 6.0, 14.0, 6.0, 8.0, 6.0, 2.0, 3.0, 3.0, 1.0, 4.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-45.37238311767578, -43.9488639831543, -42.52534484863281, -41.10182571411133, -39.678306579589844, -38.254783630371094, -36.83126449584961, -35.407745361328125, -33.98422622680664, -32.560707092285156, -31.137187957763672, -29.713666915893555, -28.29014778137207, -26.866628646850586, -25.44310760498047, -24.019588470458984, -22.5960693359375, -21.172550201416016, -19.74903106689453, -18.325510025024414, -16.90199089050293, -15.478471755981445, -14.054951667785645, -12.631431579589844, -11.20791244506836, -9.784393310546875, -8.360873222351074, -6.937353610992432, -5.513833999633789, -4.0903143882751465, -2.666794776916504, -1.2432746887207031, 0.18024826049804688, 1.6037678718566895, 3.027287483215332, 4.450807094573975, 5.874326705932617, 7.29784631729126, 8.721365928649902, 10.144886016845703, 11.568405151367188, 12.991924285888672, 14.415444374084473, 15.838964462280273, 17.262483596801758, 18.686002731323242, 20.10952377319336, 21.533042907714844, 22.956562042236328, 24.380081176757812, 25.803600311279297, 27.227121353149414, 28.6506404876709, 30.074159622192383, 31.4976806640625, 32.921199798583984, 34.34471893310547, 35.76823806762695, 37.19175720214844, 38.61527633666992, 40.038795471191406, 41.462318420410156, 42.88583755493164, 44.309356689453125, 45.73287582397461]}, "gradients/decoder.transformer.h.2.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 3.0, 1.0, 2.0, 4.0, 4.0, 3.0, 6.0, 10.0, 9.0, 9.0, 8.0, 8.0, 23.0, 22.0, 22.0, 35.0, 26.0, 29.0, 35.0, 30.0, 45.0, 42.0, 65.0, 40.0, 40.0, 39.0, 32.0, 35.0, 50.0, 40.0, 47.0, 32.0, 28.0, 28.0, 18.0, 23.0, 24.0, 17.0, 15.0, 8.0, 14.0, 10.0, 12.0, 8.0, 5.0, 1.0, 4.0, 3.0, 4.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.52734375, -6.317138671875, -6.10693359375, -5.896728515625, -5.6865234375, -5.476318359375, -5.26611328125, -5.055908203125, -4.845703125, -4.635498046875, -4.42529296875, -4.215087890625, -4.0048828125, -3.794677734375, -3.58447265625, -3.374267578125, -3.1640625, -2.953857421875, -2.74365234375, -2.533447265625, -2.3232421875, -2.113037109375, -1.90283203125, -1.692626953125, -1.482421875, -1.272216796875, -1.06201171875, -0.851806640625, -0.6416015625, -0.431396484375, -0.22119140625, -0.010986328125, 0.19921875, 0.409423828125, 0.61962890625, 0.829833984375, 1.0400390625, 1.250244140625, 1.46044921875, 1.670654296875, 1.880859375, 2.091064453125, 2.30126953125, 2.511474609375, 2.7216796875, 2.931884765625, 3.14208984375, 3.352294921875, 3.5625, 3.772705078125, 3.98291015625, 4.193115234375, 4.4033203125, 4.613525390625, 4.82373046875, 5.033935546875, 5.244140625, 5.454345703125, 5.66455078125, 5.874755859375, 6.0849609375, 6.295166015625, 6.50537109375, 6.715576171875, 6.92578125]}, "gradients/decoder.transformer.h.2.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 6.0, 3.0, 5.0, 17.0, 24.0, 28.0, 37.0, 59.0, 80.0, 112.0, 191.0, 253.0, 391.0, 554.0, 875.0, 1241.0, 2019.0, 3220.0, 5085.0, 8242.0, 13376.0, 21468.0, 35251.0, 56205.0, 88722.0, 131033.0, 169336.0, 165444.0, 124499.0, 82927.0, 51876.0, 32847.0, 20006.0, 12334.0, 7497.0, 4791.0, 3011.0, 1925.0, 1205.0, 770.0, 544.0, 357.0, 243.0, 140.0, 128.0, 62.0, 46.0, 20.0, 17.0, 22.0, 12.0, 3.0, 3.0, 4.0, 2.0, 2.0, 1.0], "bins": [-0.85791015625, -0.8326187133789062, -0.8073272705078125, -0.7820358276367188, -0.756744384765625, -0.7314529418945312, -0.7061614990234375, -0.6808700561523438, -0.65557861328125, -0.6302871704101562, -0.6049957275390625, -0.5797042846679688, -0.554412841796875, -0.5291213989257812, -0.5038299560546875, -0.47853851318359375, -0.4532470703125, -0.42795562744140625, -0.4026641845703125, -0.37737274169921875, -0.352081298828125, -0.32678985595703125, -0.3014984130859375, -0.27620697021484375, -0.25091552734375, -0.22562408447265625, -0.2003326416015625, -0.17504119873046875, -0.149749755859375, -0.12445831298828125, -0.0991668701171875, -0.07387542724609375, -0.048583984375, -0.02329254150390625, 0.0019989013671875, 0.02729034423828125, 0.052581787109375, 0.07787322998046875, 0.1031646728515625, 0.12845611572265625, 0.15374755859375, 0.17903900146484375, 0.2043304443359375, 0.22962188720703125, 0.254913330078125, 0.28020477294921875, 0.3054962158203125, 0.33078765869140625, 0.3560791015625, 0.38137054443359375, 0.4066619873046875, 0.43195343017578125, 0.457244873046875, 0.48253631591796875, 0.5078277587890625, 0.5331192016601562, 0.55841064453125, 0.5837020874023438, 0.6089935302734375, 0.6342849731445312, 0.659576416015625, 0.6848678588867188, 0.7101593017578125, 0.7354507446289062, 0.7607421875]}, "gradients/decoder.transformer.h.2.crossattention.c_attn.bias": {"_type": "histogram", "values": [4.0, 1.0, 0.0, 0.0, 3.0, 0.0, 2.0, 3.0, 4.0, 4.0, 5.0, 5.0, 5.0, 10.0, 17.0, 15.0, 14.0, 21.0, 19.0, 11.0, 28.0, 23.0, 23.0, 26.0, 32.0, 27.0, 31.0, 27.0, 35.0, 42.0, 42.0, 1072.0, 44.0, 38.0, 24.0, 39.0, 45.0, 29.0, 33.0, 30.0, 34.0, 21.0, 16.0, 21.0, 15.0, 11.0, 16.0, 12.0, 17.0, 10.0, 4.0, 3.0, 3.0, 9.0, 8.0, 2.0, 1.0, 1.0, 4.0, 1.0, 1.0, 2.0, 1.0, 2.0], "bins": [-3.935546875, -3.810943603515625, -3.68634033203125, -3.561737060546875, -3.4371337890625, -3.312530517578125, -3.18792724609375, -3.063323974609375, -2.938720703125, -2.814117431640625, -2.68951416015625, -2.564910888671875, -2.4403076171875, -2.315704345703125, -2.19110107421875, -2.066497802734375, -1.94189453125, -1.817291259765625, -1.69268798828125, -1.568084716796875, -1.4434814453125, -1.318878173828125, -1.19427490234375, -1.069671630859375, -0.945068359375, -0.820465087890625, -0.69586181640625, -0.571258544921875, -0.4466552734375, -0.322052001953125, -0.19744873046875, -0.072845458984375, 0.0517578125, 0.176361083984375, 0.30096435546875, 0.425567626953125, 0.5501708984375, 0.674774169921875, 0.79937744140625, 0.923980712890625, 1.048583984375, 1.173187255859375, 1.29779052734375, 1.422393798828125, 1.5469970703125, 1.671600341796875, 1.79620361328125, 1.920806884765625, 2.04541015625, 2.170013427734375, 2.29461669921875, 2.419219970703125, 2.5438232421875, 2.668426513671875, 2.79302978515625, 2.917633056640625, 3.042236328125, 3.166839599609375, 3.29144287109375, 3.416046142578125, 3.5406494140625, 3.665252685546875, 3.78985595703125, 3.914459228515625, 4.0390625]}, "gradients/decoder.transformer.h.2.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 0.0, 0.0, 2.0, 4.0, 6.0, 7.0, 19.0, 33.0, 44.0, 50.0, 80.0, 143.0, 252.0, 349.0, 573.0, 783.0, 1367.0, 2137.0, 3435.0, 5495.0, 8810.0, 14154.0, 22242.0, 34876.0, 53576.0, 80568.0, 114251.0, 153252.0, 1194448.0, 130381.0, 94631.0, 64744.0, 42726.0, 27309.0, 17223.0, 10852.0, 6764.0, 4297.0, 2626.0, 1718.0, 1032.0, 657.0, 431.0, 268.0, 172.0, 132.0, 79.0, 45.0, 29.0, 25.0, 14.0, 7.0, 12.0, 10.0, 6.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-0.50537109375, -0.48918914794921875, -0.4730072021484375, -0.45682525634765625, -0.440643310546875, -0.42446136474609375, -0.4082794189453125, -0.39209747314453125, -0.37591552734375, -0.35973358154296875, -0.3435516357421875, -0.32736968994140625, -0.311187744140625, -0.29500579833984375, -0.2788238525390625, -0.26264190673828125, -0.2464599609375, -0.23027801513671875, -0.2140960693359375, -0.19791412353515625, -0.181732177734375, -0.16555023193359375, -0.1493682861328125, -0.13318634033203125, -0.11700439453125, -0.10082244873046875, -0.0846405029296875, -0.06845855712890625, -0.052276611328125, -0.03609466552734375, -0.0199127197265625, -0.00373077392578125, 0.012451171875, 0.02863311767578125, 0.0448150634765625, 0.06099700927734375, 0.077178955078125, 0.09336090087890625, 0.1095428466796875, 0.12572479248046875, 0.14190673828125, 0.15808868408203125, 0.1742706298828125, 0.19045257568359375, 0.206634521484375, 0.22281646728515625, 0.2389984130859375, 0.25518035888671875, 0.2713623046875, 0.28754425048828125, 0.3037261962890625, 0.31990814208984375, 0.336090087890625, 0.35227203369140625, 0.3684539794921875, 0.38463592529296875, 0.40081787109375, 0.41699981689453125, 0.4331817626953125, 0.44936370849609375, 0.465545654296875, 0.48172760009765625, 0.4979095458984375, 0.5140914916992188, 0.5302734375]}, "gradients/decoder.transformer.h.2.crossattention.q_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 0.0, 0.0, 2.0, 0.0, 3.0, 7.0, 3.0, 3.0, 5.0, 6.0, 10.0, 10.0, 10.0, 18.0, 20.0, 25.0, 37.0, 40.0, 50.0, 76.0, 89.0, 87.0, 104.0, 80.0, 62.0, 55.0, 37.0, 30.0, 23.0, 20.0, 10.0, 15.0, 9.0, 12.0, 11.0, 6.0, 11.0, 6.0, 3.0, 3.0, 3.0, 1.0, 4.0, 1.0, 1.0, 0.0, 1.0, 0.0, 4.0, 1.0, 2.0], "bins": [-0.01259613037109375, -0.012238025665283203, -0.011879920959472656, -0.01152181625366211, -0.011163711547851562, -0.010805606842041016, -0.010447502136230469, -0.010089397430419922, -0.009731292724609375, -0.009373188018798828, -0.009015083312988281, -0.008656978607177734, -0.008298873901367188, -0.00794076919555664, -0.007582664489746094, -0.007224559783935547, -0.006866455078125, -0.006508350372314453, -0.006150245666503906, -0.005792140960693359, -0.0054340362548828125, -0.005075931549072266, -0.004717826843261719, -0.004359722137451172, -0.004001617431640625, -0.003643512725830078, -0.0032854080200195312, -0.0029273033142089844, -0.0025691986083984375, -0.0022110939025878906, -0.0018529891967773438, -0.0014948844909667969, -0.00113677978515625, -0.0007786750793457031, -0.00042057037353515625, -6.246566772460938e-05, 0.0002956390380859375, 0.0006537437438964844, 0.0010118484497070312, 0.0013699531555175781, 0.001728057861328125, 0.002086162567138672, 0.0024442672729492188, 0.0028023719787597656, 0.0031604766845703125, 0.0035185813903808594, 0.0038766860961914062, 0.004234790802001953, 0.0045928955078125, 0.004951000213623047, 0.005309104919433594, 0.005667209625244141, 0.0060253143310546875, 0.006383419036865234, 0.006741523742675781, 0.007099628448486328, 0.007457733154296875, 0.007815837860107422, 0.008173942565917969, 0.008532047271728516, 0.008890151977539062, 0.00924825668334961, 0.009606361389160156, 0.009964466094970703, 0.01032257080078125]}, "gradients/decoder.transformer.h.2.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 2.0, 4.0, 1.0, 5.0, 1.0, 3.0, 3.0, 1.0, 9.0, 6.0, 6.0, 13.0, 11.0, 27.0, 18.0, 38.0, 17.0, 35.0, 41.0, 48.0, 86.0, 119.0, 198.0, 304.0, 594.0, 5222.0, 1020513.0, 19324.0, 810.0, 379.0, 202.0, 144.0, 99.0, 67.0, 42.0, 41.0, 27.0, 19.0, 22.0, 16.0, 11.0, 9.0, 10.0, 6.0, 2.0, 3.0, 2.0, 0.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 2.0, 0.0, 0.0, 2.0], "bins": [-0.184814453125, -0.1785736083984375, -0.172332763671875, -0.1660919189453125, -0.15985107421875, -0.1536102294921875, -0.147369384765625, -0.1411285400390625, -0.1348876953125, -0.1286468505859375, -0.122406005859375, -0.1161651611328125, -0.10992431640625, -0.1036834716796875, -0.097442626953125, -0.0912017822265625, -0.0849609375, -0.0787200927734375, -0.072479248046875, -0.0662384033203125, -0.05999755859375, -0.0537567138671875, -0.047515869140625, -0.0412750244140625, -0.0350341796875, -0.0287933349609375, -0.022552490234375, -0.0163116455078125, -0.01007080078125, -0.0038299560546875, 0.002410888671875, 0.0086517333984375, 0.014892578125, 0.0211334228515625, 0.027374267578125, 0.0336151123046875, 0.03985595703125, 0.0460968017578125, 0.052337646484375, 0.0585784912109375, 0.0648193359375, 0.0710601806640625, 0.077301025390625, 0.0835418701171875, 0.08978271484375, 0.0960235595703125, 0.102264404296875, 0.1085052490234375, 0.11474609375, 0.1209869384765625, 0.127227783203125, 0.1334686279296875, 0.13970947265625, 0.1459503173828125, 0.152191162109375, 0.1584320068359375, 0.1646728515625, 0.1709136962890625, 0.177154541015625, 0.1833953857421875, 0.18963623046875, 0.1958770751953125, 0.202117919921875, 0.2083587646484375, 0.214599609375]}, "gradients/decoder.transformer.h.2.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 2.0, 978.0, 34.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.04295317083597183, -0.037378452718257904, -0.031803734600543976, -0.026229020208120346, -0.020654302090406418, -0.01507958397269249, -0.00950486958026886, -0.003930151462554932, 0.0016445666551589966, 0.00721928384155035, 0.012794001027941704, 0.018368717283010483, 0.02394343540072441, 0.02951815351843834, 0.03509286791086197, 0.0406675860285759, 0.046242304146289825, 0.051817022264003754, 0.05739174038171768, 0.06296645104885101, 0.06854116916656494, 0.07411588728427887, 0.0796906054019928, 0.08526532351970673, 0.09084004163742065, 0.09641475975513458, 0.10198947787284851, 0.10756419599056244, 0.11313891410827637, 0.1187136322259903, 0.12428834289312363, 0.12986305356025696, 0.13543778657913208, 0.141012504696846, 0.14658722281455994, 0.15216194093227386, 0.1577366590499878, 0.16331137716770172, 0.16888609528541565, 0.17446079850196838, 0.1800355315208435, 0.18561024963855743, 0.19118496775627136, 0.1967596858739853, 0.20233440399169922, 0.20790912210941315, 0.21348384022712708, 0.2190585434436798, 0.22463326156139374, 0.23020797967910767, 0.2357826977968216, 0.24135741591453552, 0.24693213403224945, 0.2525068521499634, 0.2580815553665161, 0.26365628838539124, 0.26923099160194397, 0.2748056948184967, 0.2803804278373718, 0.28595513105392456, 0.2915298640727997, 0.2971045672893524, 0.30267930030822754, 0.3082540035247803, 0.3138287365436554]}, "gradients/decoder.transformer.h.2.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 3.0, 3.0, 3.0, 3.0, 4.0, 7.0, 11.0, 20.0, 22.0, 25.0, 30.0, 46.0, 60.0, 47.0, 64.0, 68.0, 71.0, 66.0, 55.0, 57.0, 50.0, 48.0, 50.0, 38.0, 40.0, 41.0, 23.0, 19.0, 11.0, 12.0, 5.0, 8.0, 3.0, 2.0, 0.0, 2.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.018280208110809326, -0.01781718246638775, -0.01735415682196617, -0.016891131177544594, -0.016428105533123016, -0.01596507988870144, -0.015502054244279861, -0.015039028599858284, -0.014576002955436707, -0.014112977311015129, -0.013649951666593552, -0.013186926022171974, -0.012723900377750397, -0.01226087473332882, -0.011797849088907242, -0.011334823444485664, -0.010871797800064087, -0.01040877215564251, -0.009945746511220932, -0.009482720866799355, -0.009019695222377777, -0.0085566695779562, -0.008093643933534622, -0.007630618289113045, -0.007167592644691467, -0.00670456700026989, -0.006241541355848312, -0.005778515711426735, -0.0053154900670051575, -0.00485246442258358, -0.0043894387781620026, -0.003926413133740425, -0.0034633874893188477, -0.00300036184489727, -0.0025373362004756927, -0.0020743105560541153, -0.0016112849116325378, -0.0011482592672109604, -0.0006852336227893829, -0.00022220797836780548, 0.00024081766605377197, 0.0007038433104753494, 0.0011668689548969269, 0.0016298945993185043, 0.002092920243740082, 0.0025559458881616592, 0.0030189715325832367, 0.003481997177004814, 0.003945022821426392, 0.004408048465847969, 0.0048710741102695465, 0.005334099754691124, 0.005797125399112701, 0.006260151043534279, 0.006723176687955856, 0.007186202332377434, 0.007649227976799011, 0.008112253621220589, 0.008575279265642166, 0.009038304910063744, 0.009501330554485321, 0.009964356198906898, 0.010427381843328476, 0.010890407487750053, 0.01135343313217163]}, "gradients/decoder.transformer.h.2.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 3.0, 1.0, 2.0, 3.0, 4.0, 4.0, 5.0, 11.0, 9.0, 9.0, 8.0, 8.0, 23.0, 20.0, 24.0, 35.0, 26.0, 29.0, 34.0, 31.0, 45.0, 42.0, 65.0, 37.0, 43.0, 39.0, 32.0, 35.0, 49.0, 41.0, 45.0, 32.0, 30.0, 28.0, 17.0, 24.0, 24.0, 16.0, 15.0, 9.0, 14.0, 9.0, 13.0, 8.0, 4.0, 2.0, 4.0, 3.0, 4.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.53125, -6.321044921875, -6.11083984375, -5.900634765625, -5.6904296875, -5.480224609375, -5.27001953125, -5.059814453125, -4.849609375, -4.639404296875, -4.42919921875, -4.218994140625, -4.0087890625, -3.798583984375, -3.58837890625, -3.378173828125, -3.16796875, -2.957763671875, -2.74755859375, -2.537353515625, -2.3271484375, -2.116943359375, -1.90673828125, -1.696533203125, -1.486328125, -1.276123046875, -1.06591796875, -0.855712890625, -0.6455078125, -0.435302734375, -0.22509765625, -0.014892578125, 0.1953125, 0.405517578125, 0.61572265625, 0.825927734375, 1.0361328125, 1.246337890625, 1.45654296875, 1.666748046875, 1.876953125, 2.087158203125, 2.29736328125, 2.507568359375, 2.7177734375, 2.927978515625, 3.13818359375, 3.348388671875, 3.55859375, 3.768798828125, 3.97900390625, 4.189208984375, 4.3994140625, 4.609619140625, 4.81982421875, 5.030029296875, 5.240234375, 5.450439453125, 5.66064453125, 5.870849609375, 6.0810546875, 6.291259765625, 6.50146484375, 6.711669921875, 6.921875]}, "gradients/decoder.transformer.h.2.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 2.0, 1.0, 4.0, 2.0, 4.0, 0.0, 8.0, 4.0, 15.0, 18.0, 24.0, 37.0, 48.0, 79.0, 97.0, 125.0, 148.0, 214.0, 311.0, 409.0, 566.0, 726.0, 940.0, 1425.0, 2096.0, 3437.0, 7887.0, 34646.0, 232621.0, 616458.0, 112838.0, 18769.0, 5393.0, 2665.0, 1794.0, 1312.0, 927.0, 647.0, 517.0, 356.0, 270.0, 209.0, 148.0, 102.0, 74.0, 54.0, 36.0, 35.0, 15.0, 20.0, 8.0, 9.0, 8.0, 4.0, 4.0, 2.0, 1.0, 2.0, 1.0, 1.0, 1.0], "bins": [-16.78125, -16.261962890625, -15.74267578125, -15.223388671875, -14.7041015625, -14.184814453125, -13.66552734375, -13.146240234375, -12.626953125, -12.107666015625, -11.58837890625, -11.069091796875, -10.5498046875, -10.030517578125, -9.51123046875, -8.991943359375, -8.47265625, -7.953369140625, -7.43408203125, -6.914794921875, -6.3955078125, -5.876220703125, -5.35693359375, -4.837646484375, -4.318359375, -3.799072265625, -3.27978515625, -2.760498046875, -2.2412109375, -1.721923828125, -1.20263671875, -0.683349609375, -0.1640625, 0.355224609375, 0.87451171875, 1.393798828125, 1.9130859375, 2.432373046875, 2.95166015625, 3.470947265625, 3.990234375, 4.509521484375, 5.02880859375, 5.548095703125, 6.0673828125, 6.586669921875, 7.10595703125, 7.625244140625, 8.14453125, 8.663818359375, 9.18310546875, 9.702392578125, 10.2216796875, 10.740966796875, 11.26025390625, 11.779541015625, 12.298828125, 12.818115234375, 13.33740234375, 13.856689453125, 14.3759765625, 14.895263671875, 15.41455078125, 15.933837890625, 16.453125]}, "gradients/decoder.transformer.h.2.attn.c_attn.bias": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 2.0, 0.0, 5.0, 6.0, 2.0, 2.0, 4.0, 9.0, 14.0, 9.0, 17.0, 12.0, 20.0, 22.0, 31.0, 35.0, 28.0, 34.0, 51.0, 50.0, 69.0, 121.0, 209.0, 1396.0, 277.0, 158.0, 93.0, 54.0, 46.0, 45.0, 40.0, 32.0, 27.0, 17.0, 25.0, 16.0, 11.0, 11.0, 9.0, 11.0, 10.0, 6.0, 0.0, 4.0, 6.0, 2.0, 6.0, 2.0, 3.0, 2.0, 1.0, 0.0, 1.0, 1.0], "bins": [-16.59375, -16.1019287109375, -15.610107421875, -15.1182861328125, -14.62646484375, -14.1346435546875, -13.642822265625, -13.1510009765625, -12.6591796875, -12.1673583984375, -11.675537109375, -11.1837158203125, -10.69189453125, -10.2000732421875, -9.708251953125, -9.2164306640625, -8.724609375, -8.2327880859375, -7.740966796875, -7.2491455078125, -6.75732421875, -6.2655029296875, -5.773681640625, -5.2818603515625, -4.7900390625, -4.2982177734375, -3.806396484375, -3.3145751953125, -2.82275390625, -2.3309326171875, -1.839111328125, -1.3472900390625, -0.85546875, -0.3636474609375, 0.128173828125, 0.6199951171875, 1.11181640625, 1.6036376953125, 2.095458984375, 2.5872802734375, 3.0791015625, 3.5709228515625, 4.062744140625, 4.5545654296875, 5.04638671875, 5.5382080078125, 6.030029296875, 6.5218505859375, 7.013671875, 7.5054931640625, 7.997314453125, 8.4891357421875, 8.98095703125, 9.4727783203125, 9.964599609375, 10.4564208984375, 10.9482421875, 11.4400634765625, 11.931884765625, 12.4237060546875, 12.91552734375, 13.4073486328125, 13.899169921875, 14.3909912109375, 14.8828125]}, "gradients/decoder.transformer.h.2.attn.c_attn.weight": {"_type": "histogram", "values": [2.0, 0.0, 2.0, 2.0, 0.0, 2.0, 2.0, 2.0, 6.0, 3.0, 4.0, 9.0, 5.0, 17.0, 21.0, 13.0, 20.0, 26.0, 33.0, 50.0, 82.0, 83.0, 141.0, 250.0, 434.0, 827.0, 1957.0, 11040.0, 3109439.0, 16920.0, 2208.0, 868.0, 436.0, 246.0, 171.0, 109.0, 72.0, 49.0, 46.0, 33.0, 24.0, 16.0, 12.0, 12.0, 5.0, 6.0, 4.0, 4.0, 3.0, 2.0, 3.0, 2.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-59.25, -57.1748046875, -55.099609375, -53.0244140625, -50.94921875, -48.8740234375, -46.798828125, -44.7236328125, -42.6484375, -40.5732421875, -38.498046875, -36.4228515625, -34.34765625, -32.2724609375, -30.197265625, -28.1220703125, -26.046875, -23.9716796875, -21.896484375, -19.8212890625, -17.74609375, -15.6708984375, -13.595703125, -11.5205078125, -9.4453125, -7.3701171875, -5.294921875, -3.2197265625, -1.14453125, 0.9306640625, 3.005859375, 5.0810546875, 7.15625, 9.2314453125, 11.306640625, 13.3818359375, 15.45703125, 17.5322265625, 19.607421875, 21.6826171875, 23.7578125, 25.8330078125, 27.908203125, 29.9833984375, 32.05859375, 34.1337890625, 36.208984375, 38.2841796875, 40.359375, 42.4345703125, 44.509765625, 46.5849609375, 48.66015625, 50.7353515625, 52.810546875, 54.8857421875, 56.9609375, 59.0361328125, 61.111328125, 63.1865234375, 65.26171875, 67.3369140625, 69.412109375, 71.4873046875, 73.5625]}, "gradients/decoder.transformer.h.2.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 6.0, 23.0, 137.0, 274.0, 299.0, 194.0, 62.0, 15.0, 1.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-176.9239959716797, -172.71615600585938, -168.50831604003906, -164.3004913330078, -160.0926513671875, -155.8848114013672, -151.67697143554688, -147.46914672851562, -143.2613067626953, -139.053466796875, -134.8456268310547, -130.63780212402344, -126.42996215820312, -122.22212219238281, -118.0142822265625, -113.80644989013672, -109.5986099243164, -105.3907699584961, -101.18293762207031, -96.97509765625, -92.76726531982422, -88.5594253540039, -84.35159301757812, -80.14375305175781, -75.9359130859375, -71.72807312011719, -67.5202407836914, -63.312400817871094, -59.10456848144531, -54.896728515625, -50.68889236450195, -46.481056213378906, -42.273231506347656, -38.06539535522461, -33.85755920410156, -29.649721145629883, -25.441884994506836, -21.23404884338379, -17.02621078491211, -12.818374633789062, -8.610538482666016, -4.4027018547058105, -0.19486522674560547, 4.012971878051758, 8.220808029174805, 12.428644180297852, 16.63648223876953, 20.844318389892578, 25.052154541015625, 29.259990692138672, 33.46782684326172, 37.67566680908203, 41.88349914550781, 46.091339111328125, 50.29917526245117, 54.50701141357422, 58.714847564697266, 62.92268371582031, 67.13052368164062, 71.3383560180664, 75.54619598388672, 79.7540283203125, 83.96186828613281, 88.16970825195312, 92.3775405883789]}, "gradients/decoder.transformer.h.2.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 5.0, 3.0, 3.0, 2.0, 2.0, 1.0, 2.0, 3.0, 5.0, 12.0, 8.0, 10.0, 17.0, 15.0, 18.0, 19.0, 14.0, 17.0, 17.0, 31.0, 25.0, 24.0, 27.0, 32.0, 33.0, 40.0, 50.0, 43.0, 34.0, 32.0, 34.0, 45.0, 43.0, 33.0, 35.0, 28.0, 35.0, 29.0, 28.0, 21.0, 20.0, 26.0, 20.0, 16.0, 11.0, 12.0, 6.0, 6.0, 5.0, 2.0, 6.0, 4.0, 2.0, 3.0, 1.0, 4.0, 1.0, 1.0, 1.0], "bins": [-46.40268325805664, -45.01646423339844, -43.630245208740234, -42.24402618408203, -40.857810974121094, -39.47159194946289, -38.08537292480469, -36.699153900146484, -35.31293487548828, -33.92671585083008, -32.540496826171875, -31.154279708862305, -29.768062591552734, -28.38184356689453, -26.995624542236328, -25.609405517578125, -24.223190307617188, -22.836971282958984, -21.450754165649414, -20.06453514099121, -18.67831802368164, -17.292098999023438, -15.905879974365234, -14.519661903381348, -13.133443832397461, -11.747225761413574, -10.361007690429688, -8.974788665771484, -7.588570594787598, -6.202352523803711, -4.816133499145508, -3.429915428161621, -2.043701171875, -0.6574828624725342, 0.7287354469299316, 2.1149539947509766, 3.5011720657348633, 4.88739013671875, 6.273609161376953, 7.65982723236084, 9.046045303344727, 10.432263374328613, 11.8184814453125, 13.204700469970703, 14.59091854095459, 15.977136611938477, 17.36335563659668, 18.74957275390625, 20.135791778564453, 21.522010803222656, 22.908227920532227, 24.29444694519043, 25.6806640625, 27.066883087158203, 28.453102111816406, 29.83932113647461, 31.22553825378418, 32.61175537109375, 33.99797439575195, 35.384193420410156, 36.77041244506836, 38.15663146972656, 39.5428466796875, 40.9290657043457, 42.315284729003906]}, "gradients/decoder.transformer.h.1.mlp.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 1.0, 2.0, 4.0, 2.0, 2.0, 3.0, 5.0, 3.0, 9.0, 8.0, 11.0, 15.0, 13.0, 16.0, 17.0, 14.0, 22.0, 24.0, 33.0, 22.0, 45.0, 48.0, 41.0, 32.0, 43.0, 44.0, 43.0, 44.0, 41.0, 46.0, 50.0, 35.0, 30.0, 39.0, 34.0, 26.0, 20.0, 22.0, 24.0, 13.0, 9.0, 14.0, 17.0, 5.0, 5.0, 3.0, 8.0, 2.0, 2.0, 3.0, 3.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.7421875, -6.52191162109375, -6.3016357421875, -6.08135986328125, -5.861083984375, -5.64080810546875, -5.4205322265625, -5.20025634765625, -4.97998046875, -4.75970458984375, -4.5394287109375, -4.31915283203125, -4.098876953125, -3.87860107421875, -3.6583251953125, -3.43804931640625, -3.2177734375, -2.99749755859375, -2.7772216796875, -2.55694580078125, -2.336669921875, -2.11639404296875, -1.8961181640625, -1.67584228515625, -1.45556640625, -1.23529052734375, -1.0150146484375, -0.79473876953125, -0.574462890625, -0.35418701171875, -0.1339111328125, 0.08636474609375, 0.306640625, 0.52691650390625, 0.7471923828125, 0.96746826171875, 1.187744140625, 1.40802001953125, 1.6282958984375, 1.84857177734375, 2.06884765625, 2.28912353515625, 2.5093994140625, 2.72967529296875, 2.949951171875, 3.17022705078125, 3.3905029296875, 3.61077880859375, 3.8310546875, 4.05133056640625, 4.2716064453125, 4.49188232421875, 4.712158203125, 4.93243408203125, 5.1527099609375, 5.37298583984375, 5.59326171875, 5.81353759765625, 6.0338134765625, 6.25408935546875, 6.474365234375, 6.69464111328125, 6.9149169921875, 7.13519287109375, 7.35546875]}, "gradients/decoder.transformer.h.1.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 2.0, 1.0, 3.0, 6.0, 18.0, 17.0, 28.0, 54.0, 83.0, 171.0, 327.0, 584.0, 1136.0, 2381.0, 5703.0, 17641.0, 104198.0, 1144163.0, 2486860.0, 375153.0, 39207.0, 9504.0, 3680.0, 1618.0, 794.0, 424.0, 234.0, 134.0, 73.0, 38.0, 22.0, 14.0, 6.0, 4.0, 8.0, 0.0, 3.0, 2.0, 1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-14.671875, -14.17041015625, -13.6689453125, -13.16748046875, -12.666015625, -12.16455078125, -11.6630859375, -11.16162109375, -10.66015625, -10.15869140625, -9.6572265625, -9.15576171875, -8.654296875, -8.15283203125, -7.6513671875, -7.14990234375, -6.6484375, -6.14697265625, -5.6455078125, -5.14404296875, -4.642578125, -4.14111328125, -3.6396484375, -3.13818359375, -2.63671875, -2.13525390625, -1.6337890625, -1.13232421875, -0.630859375, -0.12939453125, 0.3720703125, 0.87353515625, 1.375, 1.87646484375, 2.3779296875, 2.87939453125, 3.380859375, 3.88232421875, 4.3837890625, 4.88525390625, 5.38671875, 5.88818359375, 6.3896484375, 6.89111328125, 7.392578125, 7.89404296875, 8.3955078125, 8.89697265625, 9.3984375, 9.89990234375, 10.4013671875, 10.90283203125, 11.404296875, 11.90576171875, 12.4072265625, 12.90869140625, 13.41015625, 13.91162109375, 14.4130859375, 14.91455078125, 15.416015625, 15.91748046875, 16.4189453125, 16.92041015625, 17.421875]}, "gradients/decoder.transformer.h.1.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 3.0, 4.0, 3.0, 4.0, 8.0, 17.0, 19.0, 21.0, 26.0, 45.0, 68.0, 112.0, 142.0, 222.0, 258.0, 348.0, 470.0, 529.0, 492.0, 397.0, 273.0, 182.0, 138.0, 102.0, 67.0, 46.0, 26.0, 21.0, 18.0, 8.0, 6.0, 7.0, 5.0, 0.0, 2.0, 1.0], "bins": [-18.40625, -18.0013427734375, -17.596435546875, -17.1915283203125, -16.78662109375, -16.3817138671875, -15.976806640625, -15.5718994140625, -15.1669921875, -14.7620849609375, -14.357177734375, -13.9522705078125, -13.54736328125, -13.1424560546875, -12.737548828125, -12.3326416015625, -11.927734375, -11.5228271484375, -11.117919921875, -10.7130126953125, -10.30810546875, -9.9031982421875, -9.498291015625, -9.0933837890625, -8.6884765625, -8.2835693359375, -7.878662109375, -7.4737548828125, -7.06884765625, -6.6639404296875, -6.259033203125, -5.8541259765625, -5.44921875, -5.0443115234375, -4.639404296875, -4.2344970703125, -3.82958984375, -3.4246826171875, -3.019775390625, -2.6148681640625, -2.2099609375, -1.8050537109375, -1.400146484375, -0.9952392578125, -0.59033203125, -0.1854248046875, 0.219482421875, 0.6243896484375, 1.029296875, 1.4342041015625, 1.839111328125, 2.2440185546875, 2.64892578125, 3.0538330078125, 3.458740234375, 3.8636474609375, 4.2685546875, 4.6734619140625, 5.078369140625, 5.4832763671875, 5.88818359375, 6.2930908203125, 6.697998046875, 7.1029052734375, 7.5078125]}, "gradients/decoder.transformer.h.1.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 2.0, 2.0, 3.0, 3.0, 13.0, 16.0, 16.0, 30.0, 50.0, 84.0, 143.0, 250.0, 468.0, 1150.0, 3681.0, 33689.0, 2580226.0, 1546994.0, 22397.0, 3034.0, 1006.0, 440.0, 232.0, 139.0, 89.0, 49.0, 26.0, 21.0, 17.0, 6.0, 7.0, 1.0, 4.0, 4.0, 1.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-19.78125, -18.83642578125, -17.8916015625, -16.94677734375, -16.001953125, -15.05712890625, -14.1123046875, -13.16748046875, -12.22265625, -11.27783203125, -10.3330078125, -9.38818359375, -8.443359375, -7.49853515625, -6.5537109375, -5.60888671875, -4.6640625, -3.71923828125, -2.7744140625, -1.82958984375, -0.884765625, 0.06005859375, 1.0048828125, 1.94970703125, 2.89453125, 3.83935546875, 4.7841796875, 5.72900390625, 6.673828125, 7.61865234375, 8.5634765625, 9.50830078125, 10.453125, 11.39794921875, 12.3427734375, 13.28759765625, 14.232421875, 15.17724609375, 16.1220703125, 17.06689453125, 18.01171875, 18.95654296875, 19.9013671875, 20.84619140625, 21.791015625, 22.73583984375, 23.6806640625, 24.62548828125, 25.5703125, 26.51513671875, 27.4599609375, 28.40478515625, 29.349609375, 30.29443359375, 31.2392578125, 32.18408203125, 33.12890625, 34.07373046875, 35.0185546875, 35.96337890625, 36.908203125, 37.85302734375, 38.7978515625, 39.74267578125, 40.6875]}, "gradients/decoder.transformer.h.1.ln_2.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 8.0, 304.0, 647.0, 54.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-136.85989379882812, -124.69395446777344, -112.52802276611328, -100.36209106445312, -88.19615173339844, -76.03021240234375, -63.864280700683594, -51.69834899902344, -39.53240966796875, -27.366474151611328, -15.200538635253906, -3.0346031188964844, 9.131332397460938, 21.29726791381836, 33.46320343017578, 45.62913513183594, 57.795074462890625, 69.96101379394531, 82.12694549560547, 94.29287719726562, 106.45881652832031, 118.624755859375, 130.79067993164062, 142.9566192626953, 155.12255859375, 167.2884979248047, 179.45443725585938, 191.620361328125, 203.7863006591797, 215.95223999023438, 228.1181640625, 240.2841033935547, 252.45001220703125, 264.6159362792969, 276.7818908691406, 288.94781494140625, 301.11376953125, 313.2796936035156, 325.44561767578125, 337.611572265625, 349.7774963378906, 361.94342041015625, 374.109375, 386.2752990722656, 398.44122314453125, 410.607177734375, 422.7731018066406, 434.93902587890625, 447.10498046875, 459.2709045410156, 471.4368591308594, 483.602783203125, 495.76873779296875, 507.9346618652344, 520.1005859375, 532.2665405273438, 544.4324951171875, 556.5984497070312, 568.7643432617188, 580.9302978515625, 593.0962524414062, 605.2621459960938, 617.4281005859375, 629.5940551757812, 641.7599487304688]}, "gradients/decoder.transformer.h.1.ln_2.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 5.0, 4.0, 4.0, 6.0, 11.0, 5.0, 7.0, 12.0, 10.0, 13.0, 18.0, 21.0, 22.0, 22.0, 29.0, 27.0, 27.0, 36.0, 42.0, 36.0, 38.0, 40.0, 48.0, 42.0, 44.0, 46.0, 33.0, 28.0, 46.0, 44.0, 20.0, 30.0, 27.0, 30.0, 21.0, 15.0, 17.0, 16.0, 20.0, 13.0, 4.0, 9.0, 9.0, 4.0, 3.0, 2.0, 2.0, 2.0, 0.0, 4.0, 0.0, 1.0, 3.0, 2.0], "bins": [-42.56147003173828, -41.305564880371094, -40.04965591430664, -38.79375076293945, -37.537845611572266, -36.28194046020508, -35.026031494140625, -33.77012634277344, -32.51422119140625, -31.25831413269043, -30.002408981323242, -28.746501922607422, -27.490596771240234, -26.234689712524414, -24.978782653808594, -23.722877502441406, -22.466970443725586, -21.211063385009766, -19.955158233642578, -18.699251174926758, -17.44334602355957, -16.18743896484375, -14.931532859802246, -13.675626754760742, -12.419720649719238, -11.163814544677734, -9.90790843963623, -8.652002334594727, -7.3960957527160645, -6.1401896476745605, -4.884283065795898, -3.6283769607543945, -2.3724708557128906, -1.1165646314620972, 0.1393415927886963, 1.3952479362487793, 2.651154041290283, 3.907060146331787, 5.162966728210449, 6.418872833251953, 7.674778938293457, 8.930685043334961, 10.186591148376465, 11.442497253417969, 12.698404312133789, 13.954309463500977, 15.210216522216797, 16.466121673583984, 17.722028732299805, 18.977935791015625, 20.233840942382812, 21.489748001098633, 22.74565315246582, 24.00156021118164, 25.257465362548828, 26.51337242126465, 27.76927947998047, 29.02518653869629, 30.281091690063477, 31.536998748779297, 32.792903900146484, 34.04880905151367, 35.304718017578125, 36.56062316894531, 37.8165283203125]}, "gradients/decoder.transformer.h.1.crossattention.c_proj.bias": {"_type": "histogram", "values": [3.0, 1.0, 1.0, 1.0, 1.0, 2.0, 7.0, 4.0, 3.0, 8.0, 6.0, 12.0, 8.0, 11.0, 6.0, 10.0, 12.0, 16.0, 21.0, 16.0, 20.0, 27.0, 28.0, 31.0, 37.0, 36.0, 43.0, 43.0, 40.0, 43.0, 52.0, 47.0, 36.0, 43.0, 31.0, 41.0, 39.0, 36.0, 33.0, 31.0, 17.0, 12.0, 17.0, 13.0, 23.0, 11.0, 9.0, 7.0, 5.0, 7.0, 4.0, 3.0, 1.0, 4.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.93359375, -4.76898193359375, -4.6043701171875, -4.43975830078125, -4.275146484375, -4.11053466796875, -3.9459228515625, -3.78131103515625, -3.61669921875, -3.45208740234375, -3.2874755859375, -3.12286376953125, -2.958251953125, -2.79364013671875, -2.6290283203125, -2.46441650390625, -2.2998046875, -2.13519287109375, -1.9705810546875, -1.80596923828125, -1.641357421875, -1.47674560546875, -1.3121337890625, -1.14752197265625, -0.98291015625, -0.81829833984375, -0.6536865234375, -0.48907470703125, -0.324462890625, -0.15985107421875, 0.0047607421875, 0.16937255859375, 0.333984375, 0.49859619140625, 0.6632080078125, 0.82781982421875, 0.992431640625, 1.15704345703125, 1.3216552734375, 1.48626708984375, 1.65087890625, 1.81549072265625, 1.9801025390625, 2.14471435546875, 2.309326171875, 2.47393798828125, 2.6385498046875, 2.80316162109375, 2.9677734375, 3.13238525390625, 3.2969970703125, 3.46160888671875, 3.626220703125, 3.79083251953125, 3.9554443359375, 4.12005615234375, 4.28466796875, 4.44927978515625, 4.6138916015625, 4.77850341796875, 4.943115234375, 5.10772705078125, 5.2723388671875, 5.43695068359375, 5.6015625]}, "gradients/decoder.transformer.h.1.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 2.0, 4.0, 4.0, 9.0, 11.0, 11.0, 25.0, 29.0, 55.0, 68.0, 136.0, 214.0, 245.0, 321.0, 531.0, 705.0, 1046.0, 1590.0, 2387.0, 3268.0, 5032.0, 7520.0, 11018.0, 16601.0, 24360.0, 36936.0, 54771.0, 79138.0, 109274.0, 138176.0, 144773.0, 122046.0, 91667.0, 63848.0, 43987.0, 29301.0, 19442.0, 12943.0, 8567.0, 5855.0, 3938.0, 2769.0, 1894.0, 1304.0, 810.0, 634.0, 389.0, 304.0, 200.0, 138.0, 88.0, 64.0, 42.0, 30.0, 22.0, 10.0, 5.0, 6.0, 2.0, 4.0, 5.0], "bins": [-0.52978515625, -0.5138320922851562, -0.4978790283203125, -0.48192596435546875, -0.465972900390625, -0.45001983642578125, -0.4340667724609375, -0.41811370849609375, -0.40216064453125, -0.38620758056640625, -0.3702545166015625, -0.35430145263671875, -0.338348388671875, -0.32239532470703125, -0.3064422607421875, -0.29048919677734375, -0.2745361328125, -0.25858306884765625, -0.2426300048828125, -0.22667694091796875, -0.210723876953125, -0.19477081298828125, -0.1788177490234375, -0.16286468505859375, -0.14691162109375, -0.13095855712890625, -0.1150054931640625, -0.09905242919921875, -0.083099365234375, -0.06714630126953125, -0.0511932373046875, -0.03524017333984375, -0.019287109375, -0.00333404541015625, 0.0126190185546875, 0.02857208251953125, 0.044525146484375, 0.06047821044921875, 0.0764312744140625, 0.09238433837890625, 0.10833740234375, 0.12429046630859375, 0.1402435302734375, 0.15619659423828125, 0.172149658203125, 0.18810272216796875, 0.2040557861328125, 0.22000885009765625, 0.2359619140625, 0.25191497802734375, 0.2678680419921875, 0.28382110595703125, 0.299774169921875, 0.31572723388671875, 0.3316802978515625, 0.34763336181640625, 0.36358642578125, 0.37953948974609375, 0.3954925537109375, 0.41144561767578125, 0.427398681640625, 0.44335174560546875, 0.4593048095703125, 0.47525787353515625, 0.4912109375]}, "gradients/decoder.transformer.h.1.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 1.0, 2.0, 4.0, 1.0, 5.0, 9.0, 5.0, 12.0, 6.0, 6.0, 12.0, 13.0, 22.0, 27.0, 19.0, 29.0, 27.0, 26.0, 43.0, 39.0, 48.0, 33.0, 46.0, 38.0, 1065.0, 48.0, 45.0, 39.0, 34.0, 30.0, 39.0, 31.0, 39.0, 35.0, 26.0, 32.0, 14.0, 11.0, 12.0, 10.0, 17.0, 9.0, 12.0, 5.0, 2.0, 2.0, 1.0, 1.0, 4.0, 1.0, 0.0, 2.0, 1.0, 1.0, 2.0], "bins": [-3.798828125, -3.685089111328125, -3.57135009765625, -3.457611083984375, -3.3438720703125, -3.230133056640625, -3.11639404296875, -3.002655029296875, -2.888916015625, -2.775177001953125, -2.66143798828125, -2.547698974609375, -2.4339599609375, -2.320220947265625, -2.20648193359375, -2.092742919921875, -1.97900390625, -1.865264892578125, -1.75152587890625, -1.637786865234375, -1.5240478515625, -1.410308837890625, -1.29656982421875, -1.182830810546875, -1.069091796875, -0.955352783203125, -0.84161376953125, -0.727874755859375, -0.6141357421875, -0.500396728515625, -0.38665771484375, -0.272918701171875, -0.1591796875, -0.045440673828125, 0.06829833984375, 0.182037353515625, 0.2957763671875, 0.409515380859375, 0.52325439453125, 0.636993408203125, 0.750732421875, 0.864471435546875, 0.97821044921875, 1.091949462890625, 1.2056884765625, 1.319427490234375, 1.43316650390625, 1.546905517578125, 1.66064453125, 1.774383544921875, 1.88812255859375, 2.001861572265625, 2.1156005859375, 2.229339599609375, 2.34307861328125, 2.456817626953125, 2.570556640625, 2.684295654296875, 2.79803466796875, 2.911773681640625, 3.0255126953125, 3.139251708984375, 3.25299072265625, 3.366729736328125, 3.48046875]}, "gradients/decoder.transformer.h.1.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 5.0, 1.0, 3.0, 6.0, 24.0, 24.0, 36.0, 45.0, 79.0, 121.0, 168.0, 314.0, 407.0, 679.0, 1061.0, 1631.0, 2546.0, 3959.0, 6613.0, 10285.0, 16459.0, 26198.0, 40201.0, 60862.0, 90007.0, 123793.0, 1093859.0, 251701.0, 118902.0, 85366.0, 57879.0, 37934.0, 24233.0, 15553.0, 9800.0, 6125.0, 3701.0, 2413.0, 1500.0, 908.0, 589.0, 386.0, 260.0, 173.0, 116.0, 66.0, 50.0, 40.0, 17.0, 17.0, 9.0, 10.0, 3.0, 4.0, 3.0, 0.0, 1.0, 3.0], "bins": [-0.42822265625, -0.4152183532714844, -0.40221405029296875, -0.3892097473144531, -0.3762054443359375, -0.3632011413574219, -0.35019683837890625, -0.3371925354003906, -0.324188232421875, -0.3111839294433594, -0.29817962646484375, -0.2851753234863281, -0.2721710205078125, -0.2591667175292969, -0.24616241455078125, -0.23315811157226562, -0.22015380859375, -0.20714950561523438, -0.19414520263671875, -0.18114089965820312, -0.1681365966796875, -0.15513229370117188, -0.14212799072265625, -0.12912368774414062, -0.116119384765625, -0.10311508178710938, -0.09011077880859375, -0.07710647583007812, -0.0641021728515625, -0.051097869873046875, -0.03809356689453125, -0.025089263916015625, -0.0120849609375, 0.000919342041015625, 0.01392364501953125, 0.026927947998046875, 0.0399322509765625, 0.052936553955078125, 0.06594085693359375, 0.07894515991210938, 0.091949462890625, 0.10495376586914062, 0.11795806884765625, 0.13096237182617188, 0.1439666748046875, 0.15697097778320312, 0.16997528076171875, 0.18297958374023438, 0.19598388671875, 0.20898818969726562, 0.22199249267578125, 0.23499679565429688, 0.2480010986328125, 0.2610054016113281, 0.27400970458984375, 0.2870140075683594, 0.300018310546875, 0.3130226135253906, 0.32602691650390625, 0.3390312194824219, 0.3520355224609375, 0.3650398254394531, 0.37804412841796875, 0.3910484313964844, 0.404052734375]}, "gradients/decoder.transformer.h.1.crossattention.q_attn.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 2.0, 1.0, 2.0, 0.0, 3.0, 0.0, 3.0, 6.0, 3.0, 5.0, 7.0, 12.0, 6.0, 15.0, 15.0, 15.0, 27.0, 34.0, 40.0, 64.0, 61.0, 85.0, 76.0, 81.0, 83.0, 72.0, 66.0, 42.0, 43.0, 38.0, 25.0, 14.0, 17.0, 10.0, 7.0, 4.0, 5.0, 6.0, 2.0, 4.0, 3.0, 3.0, 3.0, 1.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.01157379150390625, -0.011189818382263184, -0.010805845260620117, -0.01042187213897705, -0.010037899017333984, -0.009653925895690918, -0.009269952774047852, -0.008885979652404785, -0.008502006530761719, -0.008118033409118652, -0.007734060287475586, -0.0073500871658325195, -0.006966114044189453, -0.006582140922546387, -0.00619816780090332, -0.005814194679260254, -0.0054302215576171875, -0.005046248435974121, -0.004662275314331055, -0.004278302192687988, -0.003894329071044922, -0.0035103559494018555, -0.003126382827758789, -0.0027424097061157227, -0.0023584365844726562, -0.00197446346282959, -0.0015904903411865234, -0.001206517219543457, -0.0008225440979003906, -0.0004385709762573242, -5.459785461425781e-05, 0.0003293752670288086, 0.000713348388671875, 0.0010973215103149414, 0.0014812946319580078, 0.0018652677536010742, 0.0022492408752441406, 0.002633213996887207, 0.0030171871185302734, 0.00340116024017334, 0.0037851333618164062, 0.004169106483459473, 0.004553079605102539, 0.0049370527267456055, 0.005321025848388672, 0.005704998970031738, 0.006088972091674805, 0.006472945213317871, 0.0068569183349609375, 0.007240891456604004, 0.00762486457824707, 0.008008837699890137, 0.008392810821533203, 0.00877678394317627, 0.009160757064819336, 0.009544730186462402, 0.009928703308105469, 0.010312676429748535, 0.010696649551391602, 0.011080622673034668, 0.011464595794677734, 0.0118485689163208, 0.012232542037963867, 0.012616515159606934, 0.01300048828125]}, "gradients/decoder.transformer.h.1.crossattention.q_attn.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 2.0, 2.0, 2.0, 1.0, 6.0, 8.0, 9.0, 7.0, 13.0, 13.0, 11.0, 15.0, 27.0, 37.0, 49.0, 74.0, 88.0, 135.0, 211.0, 369.0, 622.0, 1564.0, 956303.0, 86514.0, 1053.0, 513.0, 282.0, 184.0, 131.0, 71.0, 66.0, 33.0, 35.0, 21.0, 27.0, 16.0, 8.0, 10.0, 8.0, 7.0, 1.0, 3.0, 5.0, 3.0, 1.0, 2.0, 4.0, 1.0, 0.0, 1.0, 0.0, 1.0, 2.0], "bins": [-0.227783203125, -0.2210559844970703, -0.21432876586914062, -0.20760154724121094, -0.20087432861328125, -0.19414710998535156, -0.18741989135742188, -0.1806926727294922, -0.1739654541015625, -0.1672382354736328, -0.16051101684570312, -0.15378379821777344, -0.14705657958984375, -0.14032936096191406, -0.13360214233398438, -0.1268749237060547, -0.120147705078125, -0.11342048645019531, -0.10669326782226562, -0.09996604919433594, -0.09323883056640625, -0.08651161193847656, -0.07978439331054688, -0.07305717468261719, -0.0663299560546875, -0.05960273742675781, -0.052875518798828125, -0.04614830017089844, -0.03942108154296875, -0.03269386291503906, -0.025966644287109375, -0.019239425659179688, -0.01251220703125, -0.0057849884033203125, 0.000942230224609375, 0.0076694488525390625, 0.01439666748046875, 0.021123886108398438, 0.027851104736328125, 0.03457832336425781, 0.0413055419921875, 0.04803276062011719, 0.054759979248046875, 0.06148719787597656, 0.06821441650390625, 0.07494163513183594, 0.08166885375976562, 0.08839607238769531, 0.095123291015625, 0.10185050964355469, 0.10857772827148438, 0.11530494689941406, 0.12203216552734375, 0.12875938415527344, 0.13548660278320312, 0.1422138214111328, 0.1489410400390625, 0.1556682586669922, 0.16239547729492188, 0.16912269592285156, 0.17584991455078125, 0.18257713317871094, 0.18930435180664062, 0.1960315704345703, 0.2027587890625]}, "gradients/decoder.transformer.h.1.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 726.0, 286.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.18916285037994385, -0.184042289853096, -0.17892172932624817, -0.17380118370056152, -0.16868062317371368, -0.16356006264686584, -0.1584395170211792, -0.15331895649433136, -0.14819839596748352, -0.14307783544063568, -0.13795727491378784, -0.1328367292881012, -0.12771616876125336, -0.12259560823440552, -0.11747505515813828, -0.11235450208187103, -0.1072339415550232, -0.10211338102817535, -0.09699282795190811, -0.09187227487564087, -0.08675171434879303, -0.08163115382194519, -0.07651060074567795, -0.0713900476694107, -0.06626948714256287, -0.061148930341005325, -0.056028373539447784, -0.050907816737890244, -0.0457872599363327, -0.04066670313477516, -0.03554614633321762, -0.03042558953166008, -0.02530503273010254, -0.020184475928544998, -0.015063919126987457, -0.009943362325429916, -0.0048228055238723755, 0.0002977512776851654, 0.005418308079242706, 0.010538864880800247, 0.015659421682357788, 0.02077997848391533, 0.02590053528547287, 0.03102109208703041, 0.03614164888858795, 0.04126220569014549, 0.04638276249170303, 0.051503319293260574, 0.056623876094818115, 0.061744432896375656, 0.0668649896979332, 0.07198554277420044, 0.07710610330104828, 0.08222666382789612, 0.08734721690416336, 0.0924677699804306, 0.09758833050727844, 0.10270889103412628, 0.10782944411039352, 0.11294999718666077, 0.1180705577135086, 0.12319111824035645, 0.1283116638660431, 0.13343222439289093, 0.13855278491973877]}, "gradients/decoder.transformer.h.1.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 4.0, 8.0, 5.0, 5.0, 8.0, 17.0, 27.0, 22.0, 42.0, 54.0, 34.0, 60.0, 52.0, 65.0, 71.0, 63.0, 50.0, 70.0, 73.0, 36.0, 56.0, 45.0, 35.0, 27.0, 19.0, 21.0, 13.0, 6.0, 4.0, 6.0, 5.0, 9.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0], "bins": [-0.018456220626831055, -0.017981894314289093, -0.01750756800174713, -0.01703324168920517, -0.016558915376663208, -0.016084589064121246, -0.015610262751579285, -0.015135936439037323, -0.014661610126495361, -0.0141872838139534, -0.013712957501411438, -0.013238631188869476, -0.012764304876327515, -0.012289978563785553, -0.011815652251243591, -0.01134132593870163, -0.010866999626159668, -0.010392673313617706, -0.009918347001075745, -0.009444020688533783, -0.008969694375991821, -0.00849536806344986, -0.008021041750907898, -0.007546715438365936, -0.007072389125823975, -0.006598062813282013, -0.006123736500740051, -0.00564941018819809, -0.005175083875656128, -0.004700757563114166, -0.004226431250572205, -0.003752104938030243, -0.0032777786254882812, -0.0028034523129463196, -0.002329126000404358, -0.0018547996878623962, -0.0013804733753204346, -0.0009061470627784729, -0.00043182075023651123, 4.250556230545044e-05, 0.0005168318748474121, 0.0009911581873893738, 0.0014654844999313354, 0.0019398108124732971, 0.002414137125015259, 0.0028884634375572205, 0.003362789750099182, 0.003837116062641144, 0.0043114423751831055, 0.004785768687725067, 0.005260095000267029, 0.0057344213128089905, 0.006208747625350952, 0.006683073937892914, 0.0071574002504348755, 0.007631726562976837, 0.008106052875518799, 0.00858037918806076, 0.009054705500602722, 0.009529031813144684, 0.010003358125686646, 0.010477684438228607, 0.010952010750770569, 0.01142633706331253, 0.011900663375854492]}, "gradients/decoder.transformer.h.1.attn.c_proj.bias": {"_type": "histogram", "values": [3.0, 1.0, 1.0, 1.0, 1.0, 2.0, 7.0, 4.0, 3.0, 7.0, 7.0, 12.0, 8.0, 11.0, 6.0, 10.0, 12.0, 16.0, 21.0, 16.0, 20.0, 27.0, 28.0, 31.0, 37.0, 36.0, 43.0, 43.0, 40.0, 43.0, 52.0, 47.0, 36.0, 43.0, 32.0, 41.0, 38.0, 37.0, 32.0, 31.0, 17.0, 12.0, 17.0, 13.0, 23.0, 11.0, 9.0, 7.0, 5.0, 7.0, 4.0, 3.0, 1.0, 4.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.93359375, -4.76898193359375, -4.6043701171875, -4.43975830078125, -4.275146484375, -4.11053466796875, -3.9459228515625, -3.78131103515625, -3.61669921875, -3.45208740234375, -3.2874755859375, -3.12286376953125, -2.958251953125, -2.79364013671875, -2.6290283203125, -2.46441650390625, -2.2998046875, -2.13519287109375, -1.9705810546875, -1.80596923828125, -1.641357421875, -1.47674560546875, -1.3121337890625, -1.14752197265625, -0.98291015625, -0.81829833984375, -0.6536865234375, -0.48907470703125, -0.324462890625, -0.15985107421875, 0.0047607421875, 0.16937255859375, 0.333984375, 0.49859619140625, 0.6632080078125, 0.82781982421875, 0.992431640625, 1.15704345703125, 1.3216552734375, 1.48626708984375, 1.65087890625, 1.81549072265625, 1.9801025390625, 2.14471435546875, 2.309326171875, 2.47393798828125, 2.6385498046875, 2.80316162109375, 2.9677734375, 3.13238525390625, 3.2969970703125, 3.46160888671875, 3.626220703125, 3.79083251953125, 3.9554443359375, 4.12005615234375, 4.28466796875, 4.44927978515625, 4.6138916015625, 4.77850341796875, 4.943115234375, 5.10772705078125, 5.2723388671875, 5.43695068359375, 5.6015625]}, "gradients/decoder.transformer.h.1.attn.c_proj.weight": {"_type": "histogram", "values": [2.0, 2.0, 0.0, 2.0, 3.0, 8.0, 6.0, 9.0, 12.0, 14.0, 21.0, 31.0, 31.0, 40.0, 26.0, 51.0, 70.0, 77.0, 114.0, 173.0, 196.0, 253.0, 400.0, 493.0, 764.0, 1291.0, 2383.0, 5482.0, 17218.0, 72136.0, 360406.0, 453109.0, 97799.0, 22187.0, 6727.0, 2706.0, 1401.0, 781.0, 529.0, 412.0, 282.0, 197.0, 160.0, 131.0, 117.0, 82.0, 58.0, 45.0, 46.0, 26.0, 19.0, 11.0, 7.0, 8.0, 8.0, 4.0, 1.0, 1.0, 2.0, 0.0, 4.0, 0.0, 0.0, 2.0], "bins": [-8.453125, -8.1812744140625, -7.909423828125, -7.6375732421875, -7.36572265625, -7.0938720703125, -6.822021484375, -6.5501708984375, -6.2783203125, -6.0064697265625, -5.734619140625, -5.4627685546875, -5.19091796875, -4.9190673828125, -4.647216796875, -4.3753662109375, -4.103515625, -3.8316650390625, -3.559814453125, -3.2879638671875, -3.01611328125, -2.7442626953125, -2.472412109375, -2.2005615234375, -1.9287109375, -1.6568603515625, -1.385009765625, -1.1131591796875, -0.84130859375, -0.5694580078125, -0.297607421875, -0.0257568359375, 0.24609375, 0.5179443359375, 0.789794921875, 1.0616455078125, 1.33349609375, 1.6053466796875, 1.877197265625, 2.1490478515625, 2.4208984375, 2.6927490234375, 2.964599609375, 3.2364501953125, 3.50830078125, 3.7801513671875, 4.052001953125, 4.3238525390625, 4.595703125, 4.8675537109375, 5.139404296875, 5.4112548828125, 5.68310546875, 5.9549560546875, 6.226806640625, 6.4986572265625, 6.7705078125, 7.0423583984375, 7.314208984375, 7.5860595703125, 7.85791015625, 8.1297607421875, 8.401611328125, 8.6734619140625, 8.9453125]}, "gradients/decoder.transformer.h.1.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 3.0, 3.0, 5.0, 2.0, 7.0, 8.0, 5.0, 4.0, 13.0, 13.0, 19.0, 21.0, 25.0, 27.0, 36.0, 38.0, 50.0, 66.0, 101.0, 151.0, 399.0, 1460.0, 164.0, 100.0, 64.0, 49.0, 53.0, 28.0, 29.0, 25.0, 14.0, 15.0, 15.0, 7.0, 5.0, 9.0, 4.0, 7.0, 1.0, 5.0, 2.0, 1.0, 2.0, 2.0, 1.0, 1.0, 1.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 2.0], "bins": [-16.484375, -15.953125, -15.421875, -14.890625, -14.359375, -13.828125, -13.296875, -12.765625, -12.234375, -11.703125, -11.171875, -10.640625, -10.109375, -9.578125, -9.046875, -8.515625, -7.984375, -7.453125, -6.921875, -6.390625, -5.859375, -5.328125, -4.796875, -4.265625, -3.734375, -3.203125, -2.671875, -2.140625, -1.609375, -1.078125, -0.546875, -0.015625, 0.515625, 1.046875, 1.578125, 2.109375, 2.640625, 3.171875, 3.703125, 4.234375, 4.765625, 5.296875, 5.828125, 6.359375, 6.890625, 7.421875, 7.953125, 8.484375, 9.015625, 9.546875, 10.078125, 10.609375, 11.140625, 11.671875, 12.203125, 12.734375, 13.265625, 13.796875, 14.328125, 14.859375, 15.390625, 15.921875, 16.453125, 16.984375, 17.515625]}, "gradients/decoder.transformer.h.1.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 4.0, 1.0, 1.0, 2.0, 3.0, 1.0, 2.0, 1.0, 6.0, 6.0, 9.0, 18.0, 6.0, 19.0, 29.0, 41.0, 48.0, 82.0, 131.0, 134.0, 238.0, 552.0, 1806.0, 25748.0, 3095216.0, 18678.0, 1575.0, 514.0, 254.0, 183.0, 111.0, 85.0, 58.0, 49.0, 21.0, 27.0, 16.0, 11.0, 9.0, 6.0, 4.0, 6.0, 4.0, 2.0, 0.0, 3.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-38.9375, -37.77392578125, -36.6103515625, -35.44677734375, -34.283203125, -33.11962890625, -31.9560546875, -30.79248046875, -29.62890625, -28.46533203125, -27.3017578125, -26.13818359375, -24.974609375, -23.81103515625, -22.6474609375, -21.48388671875, -20.3203125, -19.15673828125, -17.9931640625, -16.82958984375, -15.666015625, -14.50244140625, -13.3388671875, -12.17529296875, -11.01171875, -9.84814453125, -8.6845703125, -7.52099609375, -6.357421875, -5.19384765625, -4.0302734375, -2.86669921875, -1.703125, -0.53955078125, 0.6240234375, 1.78759765625, 2.951171875, 4.11474609375, 5.2783203125, 6.44189453125, 7.60546875, 8.76904296875, 9.9326171875, 11.09619140625, 12.259765625, 13.42333984375, 14.5869140625, 15.75048828125, 16.9140625, 18.07763671875, 19.2412109375, 20.40478515625, 21.568359375, 22.73193359375, 23.8955078125, 25.05908203125, 26.22265625, 27.38623046875, 28.5498046875, 29.71337890625, 30.876953125, 32.04052734375, 33.2041015625, 34.36767578125, 35.53125]}, "gradients/decoder.transformer.h.1.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 59.0, 455.0, 459.0, 35.0, 4.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-68.66342163085938, -64.0647964477539, -59.46617126464844, -54.8675422668457, -50.268917083740234, -45.670291900634766, -41.07166290283203, -36.47303771972656, -31.874412536621094, -27.275787353515625, -22.677160263061523, -18.078533172607422, -13.479907989501953, -8.881282806396484, -4.282655715942383, 0.31597137451171875, 4.9145965576171875, 9.513222694396973, 14.111848831176758, 18.71047592163086, 23.309101104736328, 27.907726287841797, 32.50635528564453, 37.10498046875, 41.70360565185547, 46.30223083496094, 50.900856018066406, 55.49948501586914, 60.09811019897461, 64.69673156738281, 69.29536437988281, 73.89398956298828, 78.49263000488281, 83.09125518798828, 87.68988037109375, 92.28850555419922, 96.88713073730469, 101.48576354980469, 106.08438873291016, 110.68301391601562, 115.2816390991211, 119.88026428222656, 124.47888946533203, 129.0775146484375, 133.6761474609375, 138.27476501464844, 142.87339782714844, 147.47201538085938, 152.07064819335938, 156.66928100585938, 161.2678985595703, 165.8665313720703, 170.46514892578125, 175.06378173828125, 179.6623992919922, 184.2610321044922, 188.85964965820312, 193.45828247070312, 198.05690002441406, 202.65553283691406, 207.254150390625, 211.852783203125, 216.45140075683594, 221.05003356933594, 225.64866638183594]}, "gradients/decoder.transformer.h.1.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 1.0, 2.0, 3.0, 10.0, 4.0, 5.0, 6.0, 9.0, 11.0, 14.0, 21.0, 25.0, 18.0, 26.0, 31.0, 36.0, 28.0, 38.0, 46.0, 44.0, 49.0, 37.0, 40.0, 40.0, 30.0, 36.0, 39.0, 40.0, 41.0, 42.0, 32.0, 36.0, 28.0, 28.0, 15.0, 15.0, 22.0, 14.0, 10.0, 10.0, 4.0, 8.0, 5.0, 4.0, 6.0, 2.0, 4.0, 0.0, 0.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-31.162355422973633, -30.059326171875, -28.956296920776367, -27.853267669677734, -26.75023651123047, -25.64720916748047, -24.544178009033203, -23.44114875793457, -22.338119506835938, -21.235090255737305, -20.132061004638672, -19.02903175354004, -17.926002502441406, -16.82297134399414, -15.719942092895508, -14.616912841796875, -13.513883590698242, -12.41085433959961, -11.307825088500977, -10.204794883728027, -9.101765632629395, -7.998736381530762, -6.895706653594971, -5.79267692565918, -4.689647674560547, -3.586618185043335, -2.483588695526123, -1.3805592060089111, -0.2775297164916992, 0.8254995346069336, 1.9285292625427246, 3.0315589904785156, 4.134590148925781, 5.237619400024414, 6.340649127960205, 7.443678855895996, 8.546708106994629, 9.649737358093262, 10.752767562866211, 11.855796813964844, 12.958826065063477, 14.06185531616211, 15.164884567260742, 16.267913818359375, 17.37094497680664, 18.47397232055664, 19.577003479003906, 20.68003273010254, 21.783061981201172, 22.886091232299805, 23.989120483398438, 25.09214973449707, 26.195178985595703, 27.29821014404297, 28.4012393951416, 29.504268646240234, 30.607297897338867, 31.7103271484375, 32.813358306884766, 33.916385650634766, 35.01941680908203, 36.12244415283203, 37.2254753112793, 38.32850646972656, 39.43153381347656]}, "gradients/decoder.transformer.h.0.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 3.0, 3.0, 2.0, 1.0, 3.0, 4.0, 2.0, 4.0, 5.0, 10.0, 5.0, 9.0, 8.0, 14.0, 8.0, 9.0, 18.0, 16.0, 18.0, 20.0, 21.0, 26.0, 38.0, 30.0, 42.0, 34.0, 34.0, 38.0, 49.0, 46.0, 48.0, 50.0, 42.0, 34.0, 27.0, 37.0, 34.0, 26.0, 23.0, 27.0, 19.0, 21.0, 16.0, 14.0, 14.0, 11.0, 8.0, 6.0, 11.0, 6.0, 6.0, 7.0, 2.0, 4.0, 3.0, 1.0, 2.0, 2.0, 0.0, 1.0], "bins": [-6.2265625, -6.04150390625, -5.8564453125, -5.67138671875, -5.486328125, -5.30126953125, -5.1162109375, -4.93115234375, -4.74609375, -4.56103515625, -4.3759765625, -4.19091796875, -4.005859375, -3.82080078125, -3.6357421875, -3.45068359375, -3.265625, -3.08056640625, -2.8955078125, -2.71044921875, -2.525390625, -2.34033203125, -2.1552734375, -1.97021484375, -1.78515625, -1.60009765625, -1.4150390625, -1.22998046875, -1.044921875, -0.85986328125, -0.6748046875, -0.48974609375, -0.3046875, -0.11962890625, 0.0654296875, 0.25048828125, 0.435546875, 0.62060546875, 0.8056640625, 0.99072265625, 1.17578125, 1.36083984375, 1.5458984375, 1.73095703125, 1.916015625, 2.10107421875, 2.2861328125, 2.47119140625, 2.65625, 2.84130859375, 3.0263671875, 3.21142578125, 3.396484375, 3.58154296875, 3.7666015625, 3.95166015625, 4.13671875, 4.32177734375, 4.5068359375, 4.69189453125, 4.876953125, 5.06201171875, 5.2470703125, 5.43212890625, 5.6171875]}, "gradients/decoder.transformer.h.0.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 4.0, 0.0, 0.0, 3.0, 5.0, 5.0, 5.0, 6.0, 5.0, 10.0, 15.0, 13.0, 16.0, 24.0, 25.0, 41.0, 63.0, 81.0, 108.0, 146.0, 184.0, 288.0, 388.0, 548.0, 990.0, 2071.0, 10068.0, 219963.0, 3385504.0, 550807.0, 16845.0, 2713.0, 1160.0, 671.0, 416.0, 292.0, 220.0, 162.0, 102.0, 82.0, 42.0, 45.0, 34.0, 24.0, 24.0, 24.0, 10.0, 7.0, 6.0, 5.0, 9.0, 6.0, 5.0, 3.0, 3.0, 1.0, 2.0], "bins": [-33.5625, -32.6201171875, -31.677734375, -30.7353515625, -29.79296875, -28.8505859375, -27.908203125, -26.9658203125, -26.0234375, -25.0810546875, -24.138671875, -23.1962890625, -22.25390625, -21.3115234375, -20.369140625, -19.4267578125, -18.484375, -17.5419921875, -16.599609375, -15.6572265625, -14.71484375, -13.7724609375, -12.830078125, -11.8876953125, -10.9453125, -10.0029296875, -9.060546875, -8.1181640625, -7.17578125, -6.2333984375, -5.291015625, -4.3486328125, -3.40625, -2.4638671875, -1.521484375, -0.5791015625, 0.36328125, 1.3056640625, 2.248046875, 3.1904296875, 4.1328125, 5.0751953125, 6.017578125, 6.9599609375, 7.90234375, 8.8447265625, 9.787109375, 10.7294921875, 11.671875, 12.6142578125, 13.556640625, 14.4990234375, 15.44140625, 16.3837890625, 17.326171875, 18.2685546875, 19.2109375, 20.1533203125, 21.095703125, 22.0380859375, 22.98046875, 23.9228515625, 24.865234375, 25.8076171875, 26.75]}, "gradients/decoder.transformer.h.0.mlp.c_fc.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 5.0, 5.0, 4.0, 11.0, 20.0, 44.0, 57.0, 59.0, 98.0, 139.0, 202.0, 288.0, 403.0, 442.0, 525.0, 450.0, 374.0, 271.0, 204.0, 131.0, 115.0, 75.0, 54.0, 37.0, 21.0, 13.0, 8.0, 7.0, 4.0, 5.0, 3.0, 1.0, 2.0, 0.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 2.0], "bins": [-11.875, -11.486328125, -11.09765625, -10.708984375, -10.3203125, -9.931640625, -9.54296875, -9.154296875, -8.765625, -8.376953125, -7.98828125, -7.599609375, -7.2109375, -6.822265625, -6.43359375, -6.044921875, -5.65625, -5.267578125, -4.87890625, -4.490234375, -4.1015625, -3.712890625, -3.32421875, -2.935546875, -2.546875, -2.158203125, -1.76953125, -1.380859375, -0.9921875, -0.603515625, -0.21484375, 0.173828125, 0.5625, 0.951171875, 1.33984375, 1.728515625, 2.1171875, 2.505859375, 2.89453125, 3.283203125, 3.671875, 4.060546875, 4.44921875, 4.837890625, 5.2265625, 5.615234375, 6.00390625, 6.392578125, 6.78125, 7.169921875, 7.55859375, 7.947265625, 8.3359375, 8.724609375, 9.11328125, 9.501953125, 9.890625, 10.279296875, 10.66796875, 11.056640625, 11.4453125, 11.833984375, 12.22265625, 12.611328125, 13.0]}, "gradients/decoder.transformer.h.0.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 2.0, 1.0, 1.0, 2.0, 2.0, 4.0, 5.0, 35.0, 58.0, 101.0, 295.0, 766.0, 2912.0, 26519.0, 1357101.0, 2741643.0, 58976.0, 4294.0, 1057.0, 322.0, 116.0, 35.0, 19.0, 10.0, 4.0, 2.0, 2.0, 2.0, 0.0, 2.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 1.0], "bins": [-31.78125, -30.96875, -30.15625, -29.34375, -28.53125, -27.71875, -26.90625, -26.09375, -25.28125, -24.46875, -23.65625, -22.84375, -22.03125, -21.21875, -20.40625, -19.59375, -18.78125, -17.96875, -17.15625, -16.34375, -15.53125, -14.71875, -13.90625, -13.09375, -12.28125, -11.46875, -10.65625, -9.84375, -9.03125, -8.21875, -7.40625, -6.59375, -5.78125, -4.96875, -4.15625, -3.34375, -2.53125, -1.71875, -0.90625, -0.09375, 0.71875, 1.53125, 2.34375, 3.15625, 3.96875, 4.78125, 5.59375, 6.40625, 7.21875, 8.03125, 8.84375, 9.65625, 10.46875, 11.28125, 12.09375, 12.90625, 13.71875, 14.53125, 15.34375, 16.15625, 16.96875, 17.78125, 18.59375, 19.40625, 20.21875]}, "gradients/decoder.transformer.h.0.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 4.0, 4.0, 8.0, 10.0, 8.0, 15.0, 20.0, 33.0, 48.0, 40.0, 65.0, 51.0, 66.0, 77.0, 85.0, 72.0, 77.0, 63.0, 52.0, 53.0, 35.0, 26.0, 34.0, 12.0, 17.0, 10.0, 9.0, 5.0, 3.0, 3.0, 3.0, 2.0, 3.0, 2.0, 0.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-48.35721206665039, -46.113197326660156, -43.869178771972656, -41.62516403198242, -39.38114547729492, -37.13713073730469, -34.89311218261719, -32.64909744262695, -30.405080795288086, -28.16106414794922, -25.91704750061035, -23.673030853271484, -21.42901611328125, -19.18499755859375, -16.940982818603516, -14.696966171264648, -12.452949523925781, -10.208932876586914, -7.964916706085205, -5.720900535583496, -3.476883888244629, -1.2328672409057617, 1.011148452758789, 3.2551651000976562, 5.499181747436523, 7.743198394775391, 9.987215042114258, 12.231230735778809, 14.475247383117676, 16.71926498413086, 18.963279724121094, 21.20729637145996, 23.451309204101562, 25.69532585144043, 27.939342498779297, 30.18335723876953, 32.42737579345703, 34.671390533447266, 36.9154052734375, 39.159423828125, 41.4034423828125, 43.647457122802734, 45.891475677490234, 48.13549041748047, 50.37950897216797, 52.6235237121582, 54.86753845214844, 57.11155700683594, 59.35557174682617, 61.599586486816406, 63.843605041503906, 66.0876235961914, 68.33163452148438, 70.57565307617188, 72.81967163085938, 75.06369018554688, 77.30770111083984, 79.55171966552734, 81.79573059082031, 84.03974914550781, 86.28376770019531, 88.52778625488281, 90.77179718017578, 93.01581573486328, 95.25983428955078]}, "gradients/decoder.transformer.h.0.ln_2.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 2.0, 3.0, 0.0, 2.0, 4.0, 4.0, 2.0, 10.0, 8.0, 5.0, 7.0, 12.0, 16.0, 20.0, 12.0, 23.0, 26.0, 32.0, 27.0, 28.0, 37.0, 44.0, 38.0, 50.0, 35.0, 32.0, 29.0, 38.0, 47.0, 38.0, 38.0, 44.0, 37.0, 30.0, 38.0, 30.0, 28.0, 22.0, 15.0, 25.0, 12.0, 8.0, 9.0, 2.0, 7.0, 10.0, 6.0, 2.0, 7.0, 6.0, 3.0, 2.0, 1.0, 2.0, 1.0, 2.0], "bins": [-49.04084014892578, -47.628089904785156, -46.2153434753418, -44.80259704589844, -43.38984680175781, -41.97709655761719, -40.56435012817383, -39.15160369873047, -37.738853454589844, -36.32610321044922, -34.91335678100586, -33.5006103515625, -32.087860107421875, -30.675111770629883, -29.26236343383789, -27.8496150970459, -26.436866760253906, -25.024118423461914, -23.611370086669922, -22.19862174987793, -20.785873413085938, -19.373125076293945, -17.960376739501953, -16.54762840270996, -15.134880065917969, -13.722131729125977, -12.309383392333984, -10.896635055541992, -9.48388671875, -8.071138381958008, -6.658390045166016, -5.245641708374023, -3.8328933715820312, -2.420145034790039, -1.0073966979980469, 0.4053516387939453, 1.8180999755859375, 3.2308483123779297, 4.643596649169922, 6.056344985961914, 7.469093322753906, 8.881841659545898, 10.29458999633789, 11.707338333129883, 13.120086669921875, 14.532835006713867, 15.94558334350586, 17.35833168029785, 18.771080017089844, 20.183828353881836, 21.596576690673828, 23.00932502746582, 24.422073364257812, 25.834821701049805, 27.247570037841797, 28.66031837463379, 30.07306671142578, 31.485815048217773, 32.898563385009766, 34.311309814453125, 35.72406005859375, 37.136810302734375, 38.549556732177734, 39.962303161621094, 41.37505340576172]}, "gradients/decoder.transformer.h.0.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 2.0, 6.0, 0.0, 4.0, 1.0, 1.0, 4.0, 1.0, 6.0, 5.0, 8.0, 6.0, 13.0, 12.0, 13.0, 14.0, 14.0, 29.0, 19.0, 26.0, 33.0, 31.0, 46.0, 44.0, 39.0, 36.0, 29.0, 38.0, 48.0, 44.0, 46.0, 47.0, 32.0, 34.0, 39.0, 28.0, 23.0, 35.0, 24.0, 23.0, 24.0, 12.0, 16.0, 10.0, 6.0, 4.0, 7.0, 5.0, 3.0, 5.0, 8.0, 3.0, 3.0, 4.0, 3.0, 1.0, 3.0], "bins": [-90.0625, -87.51953125, -84.9765625, -82.43359375, -79.890625, -77.34765625, -74.8046875, -72.26171875, -69.71875, -67.17578125, -64.6328125, -62.08984375, -59.546875, -57.00390625, -54.4609375, -51.91796875, -49.375, -46.83203125, -44.2890625, -41.74609375, -39.203125, -36.66015625, -34.1171875, -31.57421875, -29.03125, -26.48828125, -23.9453125, -21.40234375, -18.859375, -16.31640625, -13.7734375, -11.23046875, -8.6875, -6.14453125, -3.6015625, -1.05859375, 1.484375, 4.02734375, 6.5703125, 9.11328125, 11.65625, 14.19921875, 16.7421875, 19.28515625, 21.828125, 24.37109375, 26.9140625, 29.45703125, 32.0, 34.54296875, 37.0859375, 39.62890625, 42.171875, 44.71484375, 47.2578125, 49.80078125, 52.34375, 54.88671875, 57.4296875, 59.97265625, 62.515625, 65.05859375, 67.6015625, 70.14453125, 72.6875]}, "gradients/decoder.transformer.h.0.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 7.0, 7.0, 9.0, 12.0, 18.0, 32.0, 46.0, 73.0, 124.0, 189.0, 307.0, 470.0, 731.0, 1070.0, 1594.0, 2652.0, 4084.0, 6362.0, 9750.0, 15296.0, 23912.0, 36830.0, 55042.0, 80949.0, 112112.0, 141199.0, 147076.0, 125421.0, 93491.0, 65448.0, 43972.0, 28611.0, 18610.0, 11883.0, 7702.0, 4803.0, 3153.0, 1958.0, 1275.0, 844.0, 491.0, 323.0, 226.0, 141.0, 103.0, 57.0, 39.0, 29.0, 16.0, 7.0, 5.0, 5.0, 4.0, 0.0, 1.0, 1.0, 1.0], "bins": [-7.01953125, -6.80816650390625, -6.5968017578125, -6.38543701171875, -6.174072265625, -5.96270751953125, -5.7513427734375, -5.53997802734375, -5.32861328125, -5.11724853515625, -4.9058837890625, -4.69451904296875, -4.483154296875, -4.27178955078125, -4.0604248046875, -3.84906005859375, -3.6376953125, -3.42633056640625, -3.2149658203125, -3.00360107421875, -2.792236328125, -2.58087158203125, -2.3695068359375, -2.15814208984375, -1.94677734375, -1.73541259765625, -1.5240478515625, -1.31268310546875, -1.101318359375, -0.88995361328125, -0.6785888671875, -0.46722412109375, -0.255859375, -0.04449462890625, 0.1668701171875, 0.37823486328125, 0.589599609375, 0.80096435546875, 1.0123291015625, 1.22369384765625, 1.43505859375, 1.64642333984375, 1.8577880859375, 2.06915283203125, 2.280517578125, 2.49188232421875, 2.7032470703125, 2.91461181640625, 3.1259765625, 3.33734130859375, 3.5487060546875, 3.76007080078125, 3.971435546875, 4.18280029296875, 4.3941650390625, 4.60552978515625, 4.81689453125, 5.02825927734375, 5.2396240234375, 5.45098876953125, 5.662353515625, 5.87371826171875, 6.0850830078125, 6.29644775390625, 6.5078125]}, "gradients/decoder.transformer.h.0.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 0.0, 2.0, 3.0, 1.0, 2.0, 4.0, 2.0, 4.0, 7.0, 8.0, 9.0, 14.0, 12.0, 13.0, 11.0, 29.0, 14.0, 24.0, 21.0, 18.0, 29.0, 28.0, 28.0, 31.0, 31.0, 31.0, 37.0, 38.0, 38.0, 1067.0, 33.0, 30.0, 34.0, 46.0, 22.0, 34.0, 39.0, 36.0, 25.0, 22.0, 17.0, 16.0, 20.0, 13.0, 20.0, 16.0, 11.0, 8.0, 5.0, 9.0, 3.0, 10.0, 1.0, 5.0, 2.0, 4.0, 1.0, 0.0, 4.0, 1.0, 1.0], "bins": [-49.03125, -47.52880859375, -46.0263671875, -44.52392578125, -43.021484375, -41.51904296875, -40.0166015625, -38.51416015625, -37.01171875, -35.50927734375, -34.0068359375, -32.50439453125, -31.001953125, -29.49951171875, -27.9970703125, -26.49462890625, -24.9921875, -23.48974609375, -21.9873046875, -20.48486328125, -18.982421875, -17.47998046875, -15.9775390625, -14.47509765625, -12.97265625, -11.47021484375, -9.9677734375, -8.46533203125, -6.962890625, -5.46044921875, -3.9580078125, -2.45556640625, -0.953125, 0.54931640625, 2.0517578125, 3.55419921875, 5.056640625, 6.55908203125, 8.0615234375, 9.56396484375, 11.06640625, 12.56884765625, 14.0712890625, 15.57373046875, 17.076171875, 18.57861328125, 20.0810546875, 21.58349609375, 23.0859375, 24.58837890625, 26.0908203125, 27.59326171875, 29.095703125, 30.59814453125, 32.1005859375, 33.60302734375, 35.10546875, 36.60791015625, 38.1103515625, 39.61279296875, 41.115234375, 42.61767578125, 44.1201171875, 45.62255859375, 47.125]}, "gradients/decoder.transformer.h.0.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 2.0, 3.0, 3.0, 4.0, 6.0, 14.0, 23.0, 28.0, 49.0, 58.0, 100.0, 169.0, 287.0, 458.0, 810.0, 1359.0, 2303.0, 4024.0, 6588.0, 11170.0, 18868.0, 30963.0, 51033.0, 80345.0, 119599.0, 158372.0, 1216013.0, 137800.0, 96616.0, 63047.0, 38572.0, 23617.0, 14137.0, 8251.0, 5082.0, 2994.0, 1817.0, 1044.0, 615.0, 337.0, 229.0, 126.0, 87.0, 51.0, 30.0, 16.0, 13.0, 7.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 2.0], "bins": [-6.7890625, -6.584716796875, -6.38037109375, -6.176025390625, -5.9716796875, -5.767333984375, -5.56298828125, -5.358642578125, -5.154296875, -4.949951171875, -4.74560546875, -4.541259765625, -4.3369140625, -4.132568359375, -3.92822265625, -3.723876953125, -3.51953125, -3.315185546875, -3.11083984375, -2.906494140625, -2.7021484375, -2.497802734375, -2.29345703125, -2.089111328125, -1.884765625, -1.680419921875, -1.47607421875, -1.271728515625, -1.0673828125, -0.863037109375, -0.65869140625, -0.454345703125, -0.25, -0.045654296875, 0.15869140625, 0.363037109375, 0.5673828125, 0.771728515625, 0.97607421875, 1.180419921875, 1.384765625, 1.589111328125, 1.79345703125, 1.997802734375, 2.2021484375, 2.406494140625, 2.61083984375, 2.815185546875, 3.01953125, 3.223876953125, 3.42822265625, 3.632568359375, 3.8369140625, 4.041259765625, 4.24560546875, 4.449951171875, 4.654296875, 4.858642578125, 5.06298828125, 5.267333984375, 5.4716796875, 5.676025390625, 5.88037109375, 6.084716796875, 6.2890625]}, "gradients/decoder.transformer.h.0.crossattention.q_attn.bias": {"_type": "histogram", "values": [4.0, 3.0, 1.0, 3.0, 6.0, 3.0, 0.0, 5.0, 9.0, 3.0, 5.0, 4.0, 8.0, 12.0, 15.0, 11.0, 19.0, 18.0, 10.0, 25.0, 27.0, 29.0, 54.0, 56.0, 53.0, 54.0, 63.0, 62.0, 60.0, 63.0, 50.0, 43.0, 26.0, 32.0, 32.0, 23.0, 26.0, 15.0, 12.0, 17.0, 8.0, 9.0, 11.0, 3.0, 10.0, 8.0, 4.0, 2.0, 2.0, 1.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.040771484375, -0.03925657272338867, -0.037741661071777344, -0.036226749420166016, -0.03471183776855469, -0.03319692611694336, -0.03168201446533203, -0.030167102813720703, -0.028652191162109375, -0.027137279510498047, -0.02562236785888672, -0.02410745620727539, -0.022592544555664062, -0.021077632904052734, -0.019562721252441406, -0.018047809600830078, -0.01653289794921875, -0.015017986297607422, -0.013503074645996094, -0.011988162994384766, -0.010473251342773438, -0.00895833969116211, -0.007443428039550781, -0.005928516387939453, -0.004413604736328125, -0.002898693084716797, -0.0013837814331054688, 0.00013113021850585938, 0.0016460418701171875, 0.0031609535217285156, 0.004675865173339844, 0.006190776824951172, 0.0077056884765625, 0.009220600128173828, 0.010735511779785156, 0.012250423431396484, 0.013765335083007812, 0.01528024673461914, 0.01679515838623047, 0.018310070037841797, 0.019824981689453125, 0.021339893341064453, 0.02285480499267578, 0.02436971664428711, 0.025884628295898438, 0.027399539947509766, 0.028914451599121094, 0.030429363250732422, 0.03194427490234375, 0.03345918655395508, 0.034974098205566406, 0.036489009857177734, 0.03800392150878906, 0.03951883316040039, 0.04103374481201172, 0.04254865646362305, 0.044063568115234375, 0.0455784797668457, 0.04709339141845703, 0.04860830307006836, 0.05012321472167969, 0.051638126373291016, 0.053153038024902344, 0.05466794967651367, 0.056182861328125]}, "gradients/decoder.transformer.h.0.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 3.0, 6.0, 6.0, 6.0, 13.0, 14.0, 14.0, 25.0, 44.0, 46.0, 49.0, 90.0, 119.0, 188.0, 289.0, 458.0, 740.0, 1475.0, 2551.0, 4933.0, 9980.0, 21692.0, 51267.0, 132629.0, 347913.0, 291011.0, 105107.0, 41829.0, 18193.0, 8411.0, 4200.0, 2141.0, 1215.0, 691.0, 405.0, 246.0, 139.0, 133.0, 72.0, 48.0, 44.0, 35.0, 22.0, 13.0, 13.0, 13.0, 15.0, 7.0, 5.0, 6.0, 1.0, 2.0, 1.0, 0.0, 2.0], "bins": [-0.265625, -0.2577857971191406, -0.24994659423828125, -0.24210739135742188, -0.2342681884765625, -0.22642898559570312, -0.21858978271484375, -0.21075057983398438, -0.202911376953125, -0.19507217407226562, -0.18723297119140625, -0.17939376831054688, -0.1715545654296875, -0.16371536254882812, -0.15587615966796875, -0.14803695678710938, -0.14019775390625, -0.13235855102539062, -0.12451934814453125, -0.11668014526367188, -0.1088409423828125, -0.10100173950195312, -0.09316253662109375, -0.08532333374023438, -0.077484130859375, -0.06964492797851562, -0.06180572509765625, -0.053966522216796875, -0.0461273193359375, -0.038288116455078125, -0.03044891357421875, -0.022609710693359375, -0.0147705078125, -0.006931304931640625, 0.00090789794921875, 0.008747100830078125, 0.0165863037109375, 0.024425506591796875, 0.03226470947265625, 0.040103912353515625, 0.047943115234375, 0.055782318115234375, 0.06362152099609375, 0.07146072387695312, 0.0792999267578125, 0.08713912963867188, 0.09497833251953125, 0.10281753540039062, 0.11065673828125, 0.11849594116210938, 0.12633514404296875, 0.13417434692382812, 0.1420135498046875, 0.14985275268554688, 0.15769195556640625, 0.16553115844726562, 0.173370361328125, 0.18120956420898438, 0.18904876708984375, 0.19688796997070312, 0.2047271728515625, 0.21256637573242188, 0.22040557861328125, 0.22824478149414062, 0.236083984375]}, "gradients/decoder.transformer.h.0.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 2.0, 2.0, 4.0, 7.0, 1.0, 9.0, 9.0, 12.0, 16.0, 21.0, 26.0, 43.0, 56.0, 50.0, 60.0, 104.0, 107.0, 77.0, 88.0, 81.0, 64.0, 60.0, 36.0, 28.0, 16.0, 14.0, 9.0, 7.0, 2.0, 1.0, 2.0, 1.0, 3.0], "bins": [-0.12564866244792938, -0.1230572760105133, -0.12046589702367783, -0.11787451058626175, -0.11528313159942627, -0.11269174516201019, -0.11010036617517471, -0.10750897973775864, -0.10491760075092316, -0.10232621431350708, -0.0997348353266716, -0.09714344888925552, -0.09455206990242004, -0.09196068346500397, -0.08936930447816849, -0.08677791804075241, -0.08418653160333633, -0.08159514516592026, -0.07900376617908478, -0.0764123797416687, -0.07382100075483322, -0.07122961431741714, -0.06863823533058167, -0.06604684889316559, -0.06345546245574951, -0.060864079743623734, -0.058272697031497955, -0.05568131431937218, -0.0530899316072464, -0.05049854516983032, -0.047907162457704544, -0.045315779745578766, -0.042724404484033585, -0.040133021771907806, -0.03754163905978203, -0.03495025634765625, -0.03235886991024017, -0.029767489060759544, -0.027176104485988617, -0.02458472177386284, -0.02199333906173706, -0.019401956349611282, -0.016810573637485504, -0.014219189062714577, -0.011627806350588799, -0.00903642363846302, -0.0064450399950146675, -0.0038536563515663147, -0.0012622736394405365, 0.001329109538346529, 0.0039204927161335945, 0.00651187589392066, 0.009103259071707726, 0.011694641783833504, 0.014286025427281857, 0.01687740907073021, 0.019468791782855988, 0.022060174494981766, 0.024651557207107544, 0.02724294178187847, 0.02983432449400425, 0.03242570906877518, 0.035017091780900955, 0.03760847449302673, 0.04019985720515251]}, "gradients/decoder.transformer.h.0.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 4.0, 3.0, 6.0, 9.0, 5.0, 8.0, 7.0, 11.0, 16.0, 17.0, 20.0, 27.0, 22.0, 37.0, 27.0, 30.0, 30.0, 43.0, 49.0, 39.0, 53.0, 45.0, 40.0, 48.0, 43.0, 38.0, 39.0, 30.0, 42.0, 34.0, 35.0, 25.0, 25.0, 21.0, 13.0, 22.0, 14.0, 11.0, 8.0, 6.0, 3.0, 4.0, 7.0, 1.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.04023468494415283, -0.03889888897538185, -0.03756309300661087, -0.03622729703783989, -0.03489150106906891, -0.03355570510029793, -0.03221990913152695, -0.030884113162755966, -0.029548317193984985, -0.028212521225214005, -0.026876725256443024, -0.025540929287672043, -0.024205133318901062, -0.02286933735013008, -0.0215335413813591, -0.02019774541258812, -0.01886194944381714, -0.017526153475046158, -0.016190357506275177, -0.014854561537504196, -0.013518765568733215, -0.012182969599962234, -0.010847173631191254, -0.009511377662420273, -0.008175581693649292, -0.006839785724878311, -0.00550398975610733, -0.0041681937873363495, -0.0028323978185653687, -0.0014966018497943878, -0.00016080588102340698, 0.0011749900877475739, 0.0025107860565185547, 0.0038465820252895355, 0.005182377994060516, 0.006518173962831497, 0.007853969931602478, 0.009189765900373459, 0.01052556186914444, 0.01186135783791542, 0.013197153806686401, 0.014532949775457382, 0.015868745744228363, 0.017204541712999344, 0.018540337681770325, 0.019876133650541306, 0.021211929619312286, 0.022547725588083267, 0.023883521556854248, 0.02521931752562523, 0.02655511349439621, 0.02789090946316719, 0.02922670543193817, 0.030562501400709152, 0.03189829736948013, 0.033234093338251114, 0.034569889307022095, 0.035905685275793076, 0.037241481244564056, 0.03857727721333504, 0.03991307318210602, 0.041248869150877, 0.04258466511964798, 0.04392046108841896, 0.04525625705718994]}, "gradients/decoder.transformer.h.0.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 2.0, 6.0, 0.0, 4.0, 1.0, 2.0, 3.0, 1.0, 7.0, 4.0, 8.0, 6.0, 14.0, 10.0, 14.0, 14.0, 15.0, 27.0, 20.0, 28.0, 30.0, 33.0, 45.0, 44.0, 39.0, 37.0, 29.0, 40.0, 47.0, 42.0, 48.0, 47.0, 29.0, 37.0, 37.0, 29.0, 21.0, 37.0, 24.0, 23.0, 24.0, 11.0, 17.0, 9.0, 6.0, 4.0, 7.0, 5.0, 3.0, 5.0, 9.0, 4.0, 1.0, 4.0, 3.0, 1.0, 3.0], "bins": [-90.0, -87.45703125, -84.9140625, -82.37109375, -79.828125, -77.28515625, -74.7421875, -72.19921875, -69.65625, -67.11328125, -64.5703125, -62.02734375, -59.484375, -56.94140625, -54.3984375, -51.85546875, -49.3125, -46.76953125, -44.2265625, -41.68359375, -39.140625, -36.59765625, -34.0546875, -31.51171875, -28.96875, -26.42578125, -23.8828125, -21.33984375, -18.796875, -16.25390625, -13.7109375, -11.16796875, -8.625, -6.08203125, -3.5390625, -0.99609375, 1.546875, 4.08984375, 6.6328125, 9.17578125, 11.71875, 14.26171875, 16.8046875, 19.34765625, 21.890625, 24.43359375, 26.9765625, 29.51953125, 32.0625, 34.60546875, 37.1484375, 39.69140625, 42.234375, 44.77734375, 47.3203125, 49.86328125, 52.40625, 54.94921875, 57.4921875, 60.03515625, 62.578125, 65.12109375, 67.6640625, 70.20703125, 72.75]}, "gradients/decoder.transformer.h.0.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 3.0, 3.0, 5.0, 3.0, 2.0, 1.0, 4.0, 7.0, 6.0, 10.0, 12.0, 17.0, 23.0, 24.0, 38.0, 53.0, 85.0, 109.0, 169.0, 301.0, 405.0, 722.0, 1619.0, 4029.0, 13151.0, 55800.0, 267513.0, 500104.0, 157433.0, 32785.0, 8368.0, 2868.0, 1162.0, 615.0, 379.0, 212.0, 166.0, 93.0, 74.0, 58.0, 29.0, 25.0, 13.0, 23.0, 9.0, 6.0, 8.0, 5.0, 7.0, 4.0, 3.0, 3.0, 0.0, 2.0, 2.0, 2.0, 1.0], "bins": [-14.9921875, -14.555419921875, -14.11865234375, -13.681884765625, -13.2451171875, -12.808349609375, -12.37158203125, -11.934814453125, -11.498046875, -11.061279296875, -10.62451171875, -10.187744140625, -9.7509765625, -9.314208984375, -8.87744140625, -8.440673828125, -8.00390625, -7.567138671875, -7.13037109375, -6.693603515625, -6.2568359375, -5.820068359375, -5.38330078125, -4.946533203125, -4.509765625, -4.072998046875, -3.63623046875, -3.199462890625, -2.7626953125, -2.325927734375, -1.88916015625, -1.452392578125, -1.015625, -0.578857421875, -0.14208984375, 0.294677734375, 0.7314453125, 1.168212890625, 1.60498046875, 2.041748046875, 2.478515625, 2.915283203125, 3.35205078125, 3.788818359375, 4.2255859375, 4.662353515625, 5.09912109375, 5.535888671875, 5.97265625, 6.409423828125, 6.84619140625, 7.282958984375, 7.7197265625, 8.156494140625, 8.59326171875, 9.030029296875, 9.466796875, 9.903564453125, 10.34033203125, 10.777099609375, 11.2138671875, 11.650634765625, 12.08740234375, 12.524169921875, 12.9609375]}, "gradients/decoder.transformer.h.0.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 3.0, 1.0, 3.0, 2.0, 2.0, 3.0, 5.0, 1.0, 2.0, 4.0, 20.0, 26.0, 37.0, 51.0, 47.0, 70.0, 87.0, 90.0, 2131.0, 107.0, 86.0, 88.0, 52.0, 37.0, 25.0, 31.0, 15.0, 12.0, 7.0, 5.0, 2.0, 0.0, 3.0, 4.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-257.5, -249.375, -241.25, -233.125, -225.0, -216.875, -208.75, -200.625, -192.5, -184.375, -176.25, -168.125, -160.0, -151.875, -143.75, -135.625, -127.5, -119.375, -111.25, -103.125, -95.0, -86.875, -78.75, -70.625, -62.5, -54.375, -46.25, -38.125, -30.0, -21.875, -13.75, -5.625, 2.5, 10.625, 18.75, 26.875, 35.0, 43.125, 51.25, 59.375, 67.5, 75.625, 83.75, 91.875, 100.0, 108.125, 116.25, 124.375, 132.5, 140.625, 148.75, 156.875, 165.0, 173.125, 181.25, 189.375, 197.5, 205.625, 213.75, 221.875, 230.0, 238.125, 246.25, 254.375, 262.5]}, "gradients/decoder.transformer.h.0.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 2.0, 1.0, 2.0, 3.0, 3.0, 1.0, 1.0, 2.0, 6.0, 3.0, 9.0, 11.0, 15.0, 15.0, 26.0, 32.0, 58.0, 81.0, 124.0, 242.0, 430.0, 962.0, 2660.0, 15009.0, 2790471.0, 323549.0, 8337.0, 1852.0, 815.0, 395.0, 219.0, 111.0, 80.0, 42.0, 37.0, 25.0, 22.0, 13.0, 8.0, 13.0, 6.0, 5.0, 4.0, 5.0, 3.0, 2.0, 1.0, 1.0, 1.0, 2.0, 2.0, 1.0, 0.0, 2.0, 1.0], "bins": [-40.5625, -39.365234375, -38.16796875, -36.970703125, -35.7734375, -34.576171875, -33.37890625, -32.181640625, -30.984375, -29.787109375, -28.58984375, -27.392578125, -26.1953125, -24.998046875, -23.80078125, -22.603515625, -21.40625, -20.208984375, -19.01171875, -17.814453125, -16.6171875, -15.419921875, -14.22265625, -13.025390625, -11.828125, -10.630859375, -9.43359375, -8.236328125, -7.0390625, -5.841796875, -4.64453125, -3.447265625, -2.25, -1.052734375, 0.14453125, 1.341796875, 2.5390625, 3.736328125, 4.93359375, 6.130859375, 7.328125, 8.525390625, 9.72265625, 10.919921875, 12.1171875, 13.314453125, 14.51171875, 15.708984375, 16.90625, 18.103515625, 19.30078125, 20.498046875, 21.6953125, 22.892578125, 24.08984375, 25.287109375, 26.484375, 27.681640625, 28.87890625, 30.076171875, 31.2734375, 32.470703125, 33.66796875, 34.865234375, 36.0625]}, "gradients/decoder.transformer.h.0.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 0.0, 1.0, 0.0, 2.0, 1.0, 3.0, 1.0, 2.0, 3.0, 3.0, 9.0, 15.0, 26.0, 49.0, 83.0, 121.0, 129.0, 173.0, 144.0, 100.0, 58.0, 24.0, 26.0, 13.0, 5.0, 5.0, 5.0, 3.0, 3.0, 0.0, 1.0, 1.0, 1.0, 2.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-180.86422729492188, -174.43331909179688, -168.00241088867188, -161.57150268554688, -155.14060974121094, -148.70970153808594, -142.27879333496094, -135.84788513183594, -129.4169921875, -122.986083984375, -116.55518341064453, -110.12427520751953, -103.69337463378906, -97.26246643066406, -90.83155822753906, -84.40065002441406, -77.96974182128906, -71.53883361816406, -65.1079330444336, -58.677024841308594, -52.24612045288086, -45.815216064453125, -39.384307861328125, -32.95340347290039, -26.522499084472656, -20.091594696044922, -13.660688400268555, -7.2297821044921875, -0.7988777160644531, 5.632026672363281, 12.062934875488281, 18.493839263916016, 24.924728393554688, 31.355632781982422, 37.786537170410156, 44.217445373535156, 50.64834976196289, 57.079254150390625, 63.510162353515625, 69.94107055664062, 76.3719711303711, 82.8028793334961, 89.23377990722656, 95.66468811035156, 102.09559631347656, 108.52649688720703, 114.95740509033203, 121.3883056640625, 127.8192138671875, 134.2501220703125, 140.6810302734375, 147.1119384765625, 153.54283142089844, 159.97373962402344, 166.40464782714844, 172.83555603027344, 179.26644897460938, 185.69735717773438, 192.12826538085938, 198.55917358398438, 204.9900665283203, 211.4209747314453, 217.8518829345703, 224.2827911376953, 230.7136993408203]}, "gradients/decoder.transformer.h.0.ln_1.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 2.0, 1.0, 2.0, 2.0, 1.0, 4.0, 0.0, 2.0, 2.0, 2.0, 4.0, 5.0, 6.0, 13.0, 10.0, 20.0, 20.0, 21.0, 21.0, 20.0, 27.0, 35.0, 38.0, 39.0, 39.0, 32.0, 50.0, 46.0, 48.0, 43.0, 50.0, 63.0, 43.0, 57.0, 25.0, 35.0, 40.0, 20.0, 31.0, 11.0, 13.0, 13.0, 18.0, 7.0, 8.0, 7.0, 3.0, 10.0, 1.0, 3.0, 2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 3.0, 0.0, 1.0, 0.0, 1.0], "bins": [-192.01649475097656, -185.93333435058594, -179.85018920898438, -173.76702880859375, -167.6838836669922, -161.60072326660156, -155.517578125, -149.43441772460938, -143.3512725830078, -137.2681121826172, -131.18496704101562, -125.10181427001953, -119.01866149902344, -112.93550872802734, -106.85235595703125, -100.76919555664062, -94.68604278564453, -88.60289001464844, -82.51973724365234, -76.43658447265625, -70.35343170166016, -64.27027893066406, -58.1871223449707, -52.10396957397461, -46.020816802978516, -39.93766403198242, -33.85451126098633, -27.7713565826416, -21.688203811645508, -15.605051040649414, -9.521896362304688, -3.4387435913085938, 2.6444091796875, 8.727561950683594, 14.810715675354004, 20.893869400024414, 26.977022171020508, 33.06017303466797, 39.14332962036133, 45.22648239135742, 51.309635162353516, 57.39278793334961, 63.4759407043457, 69.55909729003906, 75.64225006103516, 81.72540283203125, 87.80855560302734, 93.89170837402344, 99.97486114501953, 106.05801391601562, 112.14116668701172, 118.22431945800781, 124.3074722290039, 130.390625, 136.47378540039062, 142.5569305419922, 148.6400909423828, 154.72325134277344, 160.806396484375, 166.88955688476562, 172.9727020263672, 179.0558624267578, 185.13900756835938, 191.22216796875, 197.30531311035156]}, "gradients/decoder.transformer.wpe.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 1.0, 2.0, 2.0, 3.0, 2.0, 3.0, 12.0, 6.0, 8.0, 15.0, 25.0, 25.0, 41.0, 62.0, 62.0, 94.0, 144.0, 196.0, 223.0, 334.0, 487.0, 641.0, 838.0, 1250.0, 1628.0, 2150.0, 2960.0, 4443.0, 1015452.0, 5300.0, 3236.0, 2318.0, 1699.0, 1298.0, 996.0, 733.0, 527.0, 376.0, 294.0, 192.0, 144.0, 75.0, 84.0, 45.0, 37.0, 24.0, 24.0, 14.0, 10.0, 14.0, 4.0, 10.0, 3.0, 1.0, 2.0, 1.0, 0.0, 1.0, 0.0, 2.0], "bins": [-28.988479614257812, -28.100051879882812, -27.211626052856445, -26.323198318481445, -25.434772491455078, -24.546344757080078, -23.657917022705078, -22.76949119567871, -21.88106346130371, -20.99263572692871, -20.104209899902344, -19.215782165527344, -18.327356338500977, -17.438928604125977, -16.55050277709961, -15.66207504272461, -14.773648262023926, -13.885221481323242, -12.996794700622559, -12.108367919921875, -11.219940185546875, -10.331513404846191, -9.443086624145508, -8.554658889770508, -7.666232585906982, -6.777805805206299, -5.889378547668457, -5.000951766967773, -4.11252498626709, -3.224097728729248, -2.3356709480285645, -1.4472436904907227, -0.5588169097900391, 0.32961004972457886, 1.2180370092391968, 2.10646390914917, 2.9948909282684326, 3.8833179473876953, 4.771744728088379, 5.660171985626221, 6.548598766326904, 7.437025547027588, 8.32545280456543, 9.213879585266113, 10.102306365966797, 10.990734100341797, 11.879159927368164, 12.767587661743164, 13.656014442443848, 14.544441223144531, 15.432868003845215, 16.3212947845459, 17.2097225189209, 18.098148345947266, 18.986576080322266, 19.875003814697266, 20.763429641723633, 21.651857376098633, 22.540283203125, 23.4287109375, 24.317136764526367, 25.205564498901367, 26.093990325927734, 26.982418060302734, 27.870845794677734]}, "gradients/decoder.transformer.wte.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 0.0, 0.0, 3.0, 2.0, 4.0, 3.0, 2.0, 1.0, 8.0, 10.0, 18.0, 16.0, 34.0, 50.0, 44.0, 82.0, 119.0, 423.0, 51460200.0, 1846.0, 146.0, 57.0, 34.0, 21.0, 15.0, 9.0, 4.0, 6.0, 4.0, 5.0, 1.0, 3.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1636.43115234375, -1560.9873046875, -1485.5435791015625, -1410.0997314453125, -1334.656005859375, -1259.212158203125, -1183.768310546875, -1108.3245849609375, -1032.880859375, -957.4370727539062, -881.9932861328125, -806.5494384765625, -731.105712890625, -655.661865234375, -580.2180786132812, -504.7742919921875, -429.3304443359375, -353.88665771484375, -278.44287109375, -202.99905395507812, -127.55526733398438, -52.111480712890625, 23.33233642578125, 98.776123046875, 174.21990966796875, 249.6636962890625, 325.10748291015625, 400.5513000488281, 475.9950866699219, 551.4388427734375, 626.8826904296875, 702.3264770507812, 777.770263671875, 853.2140502929688, 928.6578369140625, 1004.1016845703125, 1079.54541015625, 1154.9892578125, 1230.43310546875, 1305.8768310546875, 1381.320556640625, 1456.764404296875, 1532.2081298828125, 1607.6519775390625, 1683.095703125, 1758.53955078125, 1833.9833984375, 1909.4271240234375, 1984.8709716796875, 2060.314697265625, 2135.758544921875, 2211.202392578125, 2286.646240234375, 2362.08984375, 2437.53369140625, 2512.9775390625, 2588.42138671875, 2663.865234375, 2739.30908203125, 2814.752685546875, 2890.196533203125, 2965.640380859375, 3041.084228515625, 3116.52783203125, 3191.9716796875]}, "gradients/encoder.adapter.layers.2.conv.weight": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 0.0, 3.0, 4.0, 5.0, 6.0, 17.0, 31.0, 47.0, 72.0, 92.0, 133.0, 185.0, 319.0, 503.0, 757.0, 1073.0, 1675.0, 2559.0, 3994.0, 6028.0, 9356.0, 14356.0, 22049.0, 33355.0, 50326.0, 77273.0, 115130.0, 167785.0, 236371.0, 320406.0, 575526.0, 3316191.0, 397875.0, 289824.0, 209603.0, 145909.0, 99920.0, 66447.0, 43667.0, 28727.0, 18625.0, 12305.0, 7843.0, 5284.0, 3354.0, 2192.0, 1506.0, 958.0, 609.0, 426.0, 286.0, 153.0, 110.0, 89.0, 60.0, 21.0, 10.0, 12.0, 7.0, 2.0, 2.0], "bins": [-1.8564453125, -1.8023529052734375, -1.748260498046875, -1.6941680908203125, -1.64007568359375, -1.5859832763671875, -1.531890869140625, -1.4777984619140625, -1.4237060546875, -1.3696136474609375, -1.315521240234375, -1.2614288330078125, -1.20733642578125, -1.1532440185546875, -1.099151611328125, -1.0450592041015625, -0.990966796875, -0.9368743896484375, -0.882781982421875, -0.8286895751953125, -0.77459716796875, -0.7205047607421875, -0.666412353515625, -0.6123199462890625, -0.5582275390625, -0.5041351318359375, -0.450042724609375, -0.3959503173828125, -0.34185791015625, -0.2877655029296875, -0.233673095703125, -0.1795806884765625, -0.12548828125, -0.0713958740234375, -0.017303466796875, 0.0367889404296875, 0.09088134765625, 0.1449737548828125, 0.199066162109375, 0.2531585693359375, 0.3072509765625, 0.3613433837890625, 0.415435791015625, 0.4695281982421875, 0.52362060546875, 0.5777130126953125, 0.631805419921875, 0.6858978271484375, 0.739990234375, 0.7940826416015625, 0.848175048828125, 0.9022674560546875, 0.95635986328125, 1.0104522705078125, 1.064544677734375, 1.1186370849609375, 1.1727294921875, 1.2268218994140625, 1.280914306640625, 1.3350067138671875, 1.38909912109375, 1.4431915283203125, 1.497283935546875, 1.5513763427734375, 1.60546875]}, "gradients/encoder.adapter.layers.2.conv.bias": {"_type": "histogram", "values": [4.0, 4.0, 0.0, 6.0, 4.0, 3.0, 5.0, 3.0, 7.0, 7.0, 8.0, 10.0, 6.0, 12.0, 19.0, 15.0, 17.0, 22.0, 18.0, 12.0, 26.0, 19.0, 29.0, 33.0, 37.0, 24.0, 39.0, 42.0, 46.0, 156.0, 860.0, 102.0, 36.0, 29.0, 29.0, 29.0, 34.0, 33.0, 24.0, 28.0, 24.0, 22.0, 23.0, 11.0, 22.0, 16.0, 12.0, 10.0, 16.0, 7.0, 9.0, 6.0, 5.0, 6.0, 2.0, 3.0, 3.0, 3.0, 2.0, 3.0, 1.0, 3.0, 0.0, 2.0], "bins": [-20.640625, -19.959716796875, -19.27880859375, -18.597900390625, -17.9169921875, -17.236083984375, -16.55517578125, -15.874267578125, -15.193359375, -14.512451171875, -13.83154296875, -13.150634765625, -12.4697265625, -11.788818359375, -11.10791015625, -10.427001953125, -9.74609375, -9.065185546875, -8.38427734375, -7.703369140625, -7.0224609375, -6.341552734375, -5.66064453125, -4.979736328125, -4.298828125, -3.617919921875, -2.93701171875, -2.256103515625, -1.5751953125, -0.894287109375, -0.21337890625, 0.467529296875, 1.1484375, 1.829345703125, 2.51025390625, 3.191162109375, 3.8720703125, 4.552978515625, 5.23388671875, 5.914794921875, 6.595703125, 7.276611328125, 7.95751953125, 8.638427734375, 9.3193359375, 10.000244140625, 10.68115234375, 11.362060546875, 12.04296875, 12.723876953125, 13.40478515625, 14.085693359375, 14.7666015625, 15.447509765625, 16.12841796875, 16.809326171875, 17.490234375, 18.171142578125, 18.85205078125, 19.532958984375, 20.2138671875, 20.894775390625, 21.57568359375, 22.256591796875, 22.9375]}, "gradients/encoder.adapter.layers.1.conv.weight": {"_type": "histogram", "values": [1.0, 0.0, 4.0, 2.0, 0.0, 3.0, 6.0, 4.0, 10.0, 22.0, 32.0, 56.0, 89.0, 138.0, 196.0, 315.0, 465.0, 789.0, 1151.0, 1786.0, 2719.0, 4550.0, 6798.0, 10410.0, 16415.0, 25952.0, 41036.0, 64065.0, 102128.0, 160847.0, 244240.0, 352221.0, 747143.0, 3271704.0, 415378.0, 287014.0, 193855.0, 123902.0, 78912.0, 49802.0, 31807.0, 20089.0, 12541.0, 8074.0, 5198.0, 3462.0, 2217.0, 1403.0, 920.0, 538.0, 383.0, 249.0, 138.0, 111.0, 49.0, 45.0, 20.0, 19.0, 21.0, 4.0, 3.0, 2.0, 2.0, 1.0], "bins": [-1.841796875, -1.786346435546875, -1.73089599609375, -1.675445556640625, -1.6199951171875, -1.564544677734375, -1.50909423828125, -1.453643798828125, -1.398193359375, -1.342742919921875, -1.28729248046875, -1.231842041015625, -1.1763916015625, -1.120941162109375, -1.06549072265625, -1.010040283203125, -0.95458984375, -0.899139404296875, -0.84368896484375, -0.788238525390625, -0.7327880859375, -0.677337646484375, -0.62188720703125, -0.566436767578125, -0.510986328125, -0.455535888671875, -0.40008544921875, -0.344635009765625, -0.2891845703125, -0.233734130859375, -0.17828369140625, -0.122833251953125, -0.0673828125, -0.011932373046875, 0.04351806640625, 0.098968505859375, 0.1544189453125, 0.209869384765625, 0.26531982421875, 0.320770263671875, 0.376220703125, 0.431671142578125, 0.48712158203125, 0.542572021484375, 0.5980224609375, 0.653472900390625, 0.70892333984375, 0.764373779296875, 0.81982421875, 0.875274658203125, 0.93072509765625, 0.986175537109375, 1.0416259765625, 1.097076416015625, 1.15252685546875, 1.207977294921875, 1.263427734375, 1.318878173828125, 1.37432861328125, 1.429779052734375, 1.4852294921875, 1.540679931640625, 1.59613037109375, 1.651580810546875, 1.70703125]}, "gradients/encoder.adapter.layers.1.conv.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 3.0, 4.0, 5.0, 5.0, 11.0, 7.0, 12.0, 16.0, 10.0, 10.0, 11.0, 23.0, 14.0, 20.0, 19.0, 29.0, 28.0, 25.0, 25.0, 43.0, 39.0, 49.0, 47.0, 192.0, 886.0, 52.0, 54.0, 39.0, 31.0, 43.0, 40.0, 41.0, 22.0, 22.0, 22.0, 23.0, 20.0, 17.0, 19.0, 12.0, 11.0, 6.0, 7.0, 7.0, 6.0, 2.0, 4.0, 2.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-28.390625, -27.559326171875, -26.72802734375, -25.896728515625, -25.0654296875, -24.234130859375, -23.40283203125, -22.571533203125, -21.740234375, -20.908935546875, -20.07763671875, -19.246337890625, -18.4150390625, -17.583740234375, -16.75244140625, -15.921142578125, -15.08984375, -14.258544921875, -13.42724609375, -12.595947265625, -11.7646484375, -10.933349609375, -10.10205078125, -9.270751953125, -8.439453125, -7.608154296875, -6.77685546875, -5.945556640625, -5.1142578125, -4.282958984375, -3.45166015625, -2.620361328125, -1.7890625, -0.957763671875, -0.12646484375, 0.704833984375, 1.5361328125, 2.367431640625, 3.19873046875, 4.030029296875, 4.861328125, 5.692626953125, 6.52392578125, 7.355224609375, 8.1865234375, 9.017822265625, 9.84912109375, 10.680419921875, 11.51171875, 12.343017578125, 13.17431640625, 14.005615234375, 14.8369140625, 15.668212890625, 16.49951171875, 17.330810546875, 18.162109375, 18.993408203125, 19.82470703125, 20.656005859375, 21.4873046875, 22.318603515625, 23.14990234375, 23.981201171875, 24.8125]}, "gradients/encoder.adapter.layers.0.conv.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 3.0, 1.0, 1.0, 7.0, 9.0, 12.0, 17.0, 16.0, 18.0, 20.0, 22.0, 39.0, 52.0, 72.0, 120.0, 158.0, 275.0, 542.0, 1147.0, 2395.0, 6004.0, 14073.0, 35172.0, 96012.0, 5682834.0, 322843.0, 78570.0, 29692.0, 11917.0, 4942.0, 2142.0, 1066.0, 447.0, 258.0, 137.0, 140.0, 62.0, 43.0, 20.0, 22.0, 25.0, 26.0, 26.0, 14.0, 7.0, 9.0, 12.0, 4.0, 1.0, 0.0, 1.0, 1.0, 2.0], "bins": [-10.7421875, -10.4429931640625, -10.143798828125, -9.8446044921875, -9.54541015625, -9.2462158203125, -8.947021484375, -8.6478271484375, -8.3486328125, -8.0494384765625, -7.750244140625, -7.4510498046875, -7.15185546875, -6.8526611328125, -6.553466796875, -6.2542724609375, -5.955078125, -5.6558837890625, -5.356689453125, -5.0574951171875, -4.75830078125, -4.4591064453125, -4.159912109375, -3.8607177734375, -3.5615234375, -3.2623291015625, -2.963134765625, -2.6639404296875, -2.36474609375, -2.0655517578125, -1.766357421875, -1.4671630859375, -1.16796875, -0.8687744140625, -0.569580078125, -0.2703857421875, 0.02880859375, 0.3280029296875, 0.627197265625, 0.9263916015625, 1.2255859375, 1.5247802734375, 1.823974609375, 2.1231689453125, 2.42236328125, 2.7215576171875, 3.020751953125, 3.3199462890625, 3.619140625, 3.9183349609375, 4.217529296875, 4.5167236328125, 4.81591796875, 5.1151123046875, 5.414306640625, 5.7135009765625, 6.0126953125, 6.3118896484375, 6.611083984375, 6.9102783203125, 7.20947265625, 7.5086669921875, 7.807861328125, 8.1070556640625, 8.40625]}, "gradients/encoder.adapter.layers.0.conv.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 5.0, 4.0, 3.0, 11.0, 13.0, 8.0, 16.0, 21.0, 19.0, 11.0, 27.0, 35.0, 30.0, 28.0, 43.0, 34.0, 48.0, 40.0, 39.0, 105.0, 1002.0, 63.0, 45.0, 50.0, 30.0, 41.0, 37.0, 33.0, 30.0, 25.0, 25.0, 20.0, 12.0, 19.0, 11.0, 14.0, 6.0, 9.0, 9.0, 2.0, 7.0, 1.0, 2.0, 3.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-28.234375, -27.267333984375, -26.30029296875, -25.333251953125, -24.3662109375, -23.399169921875, -22.43212890625, -21.465087890625, -20.498046875, -19.531005859375, -18.56396484375, -17.596923828125, -16.6298828125, -15.662841796875, -14.69580078125, -13.728759765625, -12.76171875, -11.794677734375, -10.82763671875, -9.860595703125, -8.8935546875, -7.926513671875, -6.95947265625, -5.992431640625, -5.025390625, -4.058349609375, -3.09130859375, -2.124267578125, -1.1572265625, -0.190185546875, 0.77685546875, 1.743896484375, 2.7109375, 3.677978515625, 4.64501953125, 5.612060546875, 6.5791015625, 7.546142578125, 8.51318359375, 9.480224609375, 10.447265625, 11.414306640625, 12.38134765625, 13.348388671875, 14.3154296875, 15.282470703125, 16.24951171875, 17.216552734375, 18.18359375, 19.150634765625, 20.11767578125, 21.084716796875, 22.0517578125, 23.018798828125, 23.98583984375, 24.952880859375, 25.919921875, 26.886962890625, 27.85400390625, 28.821044921875, 29.7880859375, 30.755126953125, 31.72216796875, 32.689208984375, 33.65625]}, "gradients/encoder.encoder.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 3.0, 9.0, 30.0, 118.0, 491.0, 268.0, 63.0, 18.0, 8.0, 5.0, 2.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-72.91288757324219, -69.02229309082031, -65.13169860839844, -61.2411003112793, -57.35050582885742, -53.45990753173828, -49.569313049316406, -45.67871856689453, -41.788124084472656, -37.89752960205078, -34.00693130493164, -30.116336822509766, -26.22574234008789, -22.335145950317383, -18.444549560546875, -14.553955078125, -10.66335678100586, -6.772761344909668, -2.8821654319763184, 1.0084304809570312, 4.899025917053223, 8.789621353149414, 12.680217742919922, 16.570812225341797, 20.461408615112305, 24.352005004882812, 28.242599487304688, 32.13319396972656, 36.0237922668457, 39.91438674926758, 43.80498504638672, 47.695579528808594, 51.58617401123047, 55.476768493652344, 59.367366790771484, 63.25796127319336, 67.1485595703125, 71.03915405273438, 74.92974853515625, 78.82034301757812, 82.7109375, 86.60153198242188, 90.49212646484375, 94.38272094726562, 98.27332305908203, 102.1639175415039, 106.05451202392578, 109.94510650634766, 113.83570861816406, 117.72630310058594, 121.61689758300781, 125.50749206542969, 129.39808654785156, 133.28868103027344, 137.17929077148438, 141.06988525390625, 144.96046447753906, 148.85105895996094, 152.7416534423828, 156.6322479248047, 160.52284240722656, 164.41343688964844, 168.30404663085938, 172.19464111328125, 176.08523559570312]}, "gradients/encoder.encoder.layer_norm.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 1.0, 1.0, 3.0, 6.0, 6.0, 6.0, 13.0, 7.0, 12.0, 12.0, 36.0, 41.0, 71.0, 77.0, 84.0, 115.0, 109.0, 104.0, 71.0, 63.0, 56.0, 33.0, 29.0, 18.0, 9.0, 7.0, 5.0, 7.0, 3.0, 1.0, 5.0, 2.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-102.44094848632812, -98.71145629882812, -94.98196411132812, -91.25247192382812, -87.52297973632812, -83.79348754882812, -80.06399536132812, -76.33450317382812, -72.60501098632812, -68.87551879882812, -65.14602661132812, -61.416534423828125, -57.687042236328125, -53.957550048828125, -50.22806167602539, -46.49856948852539, -42.769081115722656, -39.039588928222656, -35.310096740722656, -31.58060646057129, -27.85111427307129, -24.12162208557129, -20.392131805419922, -16.662639617919922, -12.933147430419922, -9.203655242919922, -5.474164009094238, -1.7446727752685547, 1.9848194122314453, 5.714311599731445, 9.443801879882812, 13.173294067382812, 16.902786254882812, 20.632278442382812, 24.361770629882812, 28.09126091003418, 31.82075309753418, 35.55024719238281, 39.27973556518555, 43.00922775268555, 46.73871994018555, 50.46821212768555, 54.19770431518555, 57.92719268798828, 61.65668487548828, 65.38617706298828, 69.11566925048828, 72.84516143798828, 76.57465362548828, 80.30414581298828, 84.03363800048828, 87.76313018798828, 91.49262237548828, 95.22211456298828, 98.95159912109375, 102.68109130859375, 106.41058349609375, 110.14007568359375, 113.86956787109375, 117.59906005859375, 121.32855224609375, 125.05804443359375, 128.78753662109375, 132.51702880859375, 136.24652099609375]}, "gradients/encoder.encoder.layers.23.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 3.0, 1.0, 3.0, 2.0, 8.0, 12.0, 16.0, 16.0, 19.0, 35.0, 29.0, 76.0, 77.0, 141.0, 183.0, 285.0, 423.0, 692.0, 1115.0, 1762.0, 2999.0, 5326.0, 10740.0, 25830.0, 99941.0, 3921796.0, 81608.0, 21130.0, 8570.0, 4365.0, 2537.0, 1489.0, 961.0, 624.0, 424.0, 289.0, 235.0, 173.0, 92.0, 71.0, 57.0, 37.0, 34.0, 24.0, 12.0, 4.0, 7.0, 6.0, 2.0, 5.0, 6.0, 2.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 0.0, 1.0], "bins": [-0.57861328125, -0.5589752197265625, -0.539337158203125, -0.5196990966796875, -0.50006103515625, -0.4804229736328125, -0.460784912109375, -0.4411468505859375, -0.4215087890625, -0.4018707275390625, -0.382232666015625, -0.3625946044921875, -0.34295654296875, -0.3233184814453125, -0.303680419921875, -0.2840423583984375, -0.264404296875, -0.2447662353515625, -0.225128173828125, -0.2054901123046875, -0.18585205078125, -0.1662139892578125, -0.146575927734375, -0.1269378662109375, -0.1072998046875, -0.0876617431640625, -0.068023681640625, -0.0483856201171875, -0.02874755859375, -0.0091094970703125, 0.010528564453125, 0.0301666259765625, 0.0498046875, 0.0694427490234375, 0.089080810546875, 0.1087188720703125, 0.12835693359375, 0.1479949951171875, 0.167633056640625, 0.1872711181640625, 0.2069091796875, 0.2265472412109375, 0.246185302734375, 0.2658233642578125, 0.28546142578125, 0.3050994873046875, 0.324737548828125, 0.3443756103515625, 0.364013671875, 0.3836517333984375, 0.403289794921875, 0.4229278564453125, 0.44256591796875, 0.4622039794921875, 0.481842041015625, 0.5014801025390625, 0.5211181640625, 0.5407562255859375, 0.560394287109375, 0.5800323486328125, 0.59967041015625, 0.6193084716796875, 0.638946533203125, 0.6585845947265625, 0.67822265625]}, "gradients/encoder.encoder.layers.23.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 2.0, 1.0, 0.0, 2.0, 3.0, 0.0, 1.0, 2.0, 6.0, 1.0, 1.0, 2.0, 5.0, 1.0, 5.0, 7.0, 8.0, 6.0, 5.0, 8.0, 10.0, 10.0, 18.0, 14.0, 546.0, 247.0, 16.0, 17.0, 16.0, 6.0, 8.0, 7.0, 4.0, 3.0, 3.0, 0.0, 3.0, 5.0, 1.0, 1.0, 6.0, 2.0, 1.0, 4.0, 1.0, 3.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.339111328125, -0.326934814453125, -0.31475830078125, -0.302581787109375, -0.2904052734375, -0.278228759765625, -0.26605224609375, -0.253875732421875, -0.24169921875, -0.229522705078125, -0.21734619140625, -0.205169677734375, -0.1929931640625, -0.180816650390625, -0.16864013671875, -0.156463623046875, -0.144287109375, -0.132110595703125, -0.11993408203125, -0.107757568359375, -0.0955810546875, -0.083404541015625, -0.07122802734375, -0.059051513671875, -0.046875, -0.034698486328125, -0.02252197265625, -0.010345458984375, 0.0018310546875, 0.014007568359375, 0.02618408203125, 0.038360595703125, 0.050537109375, 0.062713623046875, 0.07489013671875, 0.087066650390625, 0.0992431640625, 0.111419677734375, 0.12359619140625, 0.135772705078125, 0.14794921875, 0.160125732421875, 0.17230224609375, 0.184478759765625, 0.1966552734375, 0.208831787109375, 0.22100830078125, 0.233184814453125, 0.245361328125, 0.257537841796875, 0.26971435546875, 0.281890869140625, 0.2940673828125, 0.306243896484375, 0.31842041015625, 0.330596923828125, 0.3427734375, 0.354949951171875, 0.36712646484375, 0.379302978515625, 0.3914794921875, 0.403656005859375, 0.41583251953125, 0.428009033203125, 0.440185546875]}, "gradients/encoder.encoder.layers.23.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 7.0, 3.0, 9.0, 6.0, 10.0, 6.0, 25.0, 28.0, 34.0, 60.0, 88.0, 130.0, 219.0, 315.0, 604.0, 1210.0, 2869.0, 7986.0, 33705.0, 327517.0, 3666475.0, 125632.0, 18520.0, 5136.0, 1713.0, 843.0, 435.0, 255.0, 144.0, 88.0, 65.0, 37.0, 29.0, 14.0, 18.0, 14.0, 13.0, 12.0, 6.0, 4.0, 4.0, 0.0, 6.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.6865234375, -0.6638946533203125, -0.641265869140625, -0.6186370849609375, -0.59600830078125, -0.5733795166015625, -0.550750732421875, -0.5281219482421875, -0.5054931640625, -0.4828643798828125, -0.460235595703125, -0.4376068115234375, -0.41497802734375, -0.3923492431640625, -0.369720458984375, -0.3470916748046875, -0.324462890625, -0.3018341064453125, -0.279205322265625, -0.2565765380859375, -0.23394775390625, -0.2113189697265625, -0.188690185546875, -0.1660614013671875, -0.1434326171875, -0.1208038330078125, -0.098175048828125, -0.0755462646484375, -0.05291748046875, -0.0302886962890625, -0.007659912109375, 0.0149688720703125, 0.03759765625, 0.0602264404296875, 0.082855224609375, 0.1054840087890625, 0.12811279296875, 0.1507415771484375, 0.173370361328125, 0.1959991455078125, 0.2186279296875, 0.2412567138671875, 0.263885498046875, 0.2865142822265625, 0.30914306640625, 0.3317718505859375, 0.354400634765625, 0.3770294189453125, 0.399658203125, 0.4222869873046875, 0.444915771484375, 0.4675445556640625, 0.49017333984375, 0.5128021240234375, 0.535430908203125, 0.5580596923828125, 0.5806884765625, 0.6033172607421875, 0.625946044921875, 0.6485748291015625, 0.67120361328125, 0.6938323974609375, 0.716461181640625, 0.7390899658203125, 0.76171875]}, "gradients/encoder.encoder.layers.23.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 1.0, 2.0, 4.0, 1.0, 3.0, 2.0, 1.0, 6.0, 4.0, 3.0, 12.0, 17.0, 18.0, 37.0, 50.0, 76.0, 147.0, 251.0, 678.0, 1817.0, 499.0, 178.0, 100.0, 55.0, 40.0, 31.0, 12.0, 8.0, 11.0, 4.0, 2.0, 6.0, 3.0, 2.0, 4.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0], "bins": [-0.4619140625, -0.4484748840332031, -0.43503570556640625, -0.4215965270996094, -0.4081573486328125, -0.3947181701660156, -0.38127899169921875, -0.3678398132324219, -0.354400634765625, -0.3409614562988281, -0.32752227783203125, -0.3140830993652344, -0.3006439208984375, -0.2872047424316406, -0.27376556396484375, -0.2603263854980469, -0.24688720703125, -0.23344802856445312, -0.22000885009765625, -0.20656967163085938, -0.1931304931640625, -0.17969131469726562, -0.16625213623046875, -0.15281295776367188, -0.139373779296875, -0.12593460083007812, -0.11249542236328125, -0.09905624389648438, -0.0856170654296875, -0.07217788696289062, -0.05873870849609375, -0.045299530029296875, -0.0318603515625, -0.018421173095703125, -0.00498199462890625, 0.008457183837890625, 0.0218963623046875, 0.035335540771484375, 0.04877471923828125, 0.062213897705078125, 0.075653076171875, 0.08909225463867188, 0.10253143310546875, 0.11597061157226562, 0.1294097900390625, 0.14284896850585938, 0.15628814697265625, 0.16972732543945312, 0.18316650390625, 0.19660568237304688, 0.21004486083984375, 0.22348403930664062, 0.2369232177734375, 0.2503623962402344, 0.26380157470703125, 0.2772407531738281, 0.290679931640625, 0.3041191101074219, 0.31755828857421875, 0.3309974670410156, 0.3444366455078125, 0.3578758239746094, 0.37131500244140625, 0.3847541809082031, 0.398193359375]}, "gradients/encoder.encoder.layers.23.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 1.0, 1.0, 0.0, 3.0, 7.0, 16.0, 46.0, 398.0, 485.0, 47.0, 6.0, 1.0, 1.0, 2.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.0191986560821533, -1.8531420230865479, -1.687085509300232, -1.521028995513916, -1.3549723625183105, -1.188915729522705, -1.0228592157363892, -0.8568027019500732, -0.6907460689544678, -0.5246894955635071, -0.3586329221725464, -0.1925763487815857, -0.026519775390625, 0.1395367980003357, 0.3055933713912964, 0.4716498851776123, 0.6377065181732178, 0.8037630915641785, 0.9698196649551392, 1.135876178741455, 1.3019328117370605, 1.467989444732666, 1.634045958518982, 1.8001024723052979, 1.9661591053009033, 2.132215738296509, 2.298272132873535, 2.4643287658691406, 2.630385398864746, 2.7964420318603516, 2.962498664855957, 3.1285550594329834, 3.294611930847168, 3.4606685638427734, 3.626725196838379, 3.7927815914154053, 3.9588382244110107, 4.124894618988037, 4.290951251983643, 4.457007884979248, 4.6230645179748535, 4.789121150970459, 4.9551777839660645, 5.12123441696167, 5.287290573120117, 5.453347206115723, 5.619403839111328, 5.785460472106934, 5.951517105102539, 6.1175737380981445, 6.28363037109375, 6.4496870040893555, 6.615743637084961, 6.781799793243408, 6.947856426239014, 7.113913059234619, 7.279969692230225, 7.44602632522583, 7.6120829582214355, 7.778139591217041, 7.944195747375488, 8.110252380371094, 8.2763090133667, 8.442365646362305, 8.60842227935791]}, "gradients/encoder.encoder.layers.23.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 0.0, 2.0, 5.0, 4.0, 6.0, 7.0, 6.0, 9.0, 15.0, 30.0, 26.0, 54.0, 56.0, 49.0, 75.0, 97.0, 88.0, 110.0, 78.0, 70.0, 61.0, 55.0, 38.0, 28.0, 15.0, 10.0, 1.0, 6.0, 4.0, 2.0, 3.0, 1.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0], "bins": [-2.696762800216675, -2.6279690265655518, -2.5591752529144287, -2.4903814792633057, -2.4215877056121826, -2.3527939319610596, -2.2840001583099365, -2.2152063846588135, -2.1464126110076904, -2.0776188373565674, -2.0088250637054443, -1.9400312900543213, -1.8712375164031982, -1.8024437427520752, -1.7336499691009521, -1.664856195449829, -1.596062421798706, -1.527268648147583, -1.45847487449646, -1.389681100845337, -1.3208873271942139, -1.2520935535430908, -1.1832997798919678, -1.1145060062408447, -1.0457122325897217, -0.9769184589385986, -0.9081246852874756, -0.8393309116363525, -0.7705371379852295, -0.7017433643341064, -0.6329495906829834, -0.5641558170318604, -0.4953620433807373, -0.42656826972961426, -0.3577744960784912, -0.28898072242736816, -0.22018694877624512, -0.15139317512512207, -0.08259940147399902, -0.013805627822875977, 0.05498814582824707, 0.12378191947937012, 0.19257569313049316, 0.2613694667816162, 0.33016324043273926, 0.3989570140838623, 0.46775078773498535, 0.5365445613861084, 0.6053383350372314, 0.6741321086883545, 0.7429258823394775, 0.8117196559906006, 0.8805134296417236, 0.9493072032928467, 1.0181009769439697, 1.0868947505950928, 1.1556885242462158, 1.2244822978973389, 1.293276071548462, 1.362069845199585, 1.430863618850708, 1.499657392501831, 1.568451166152954, 1.6372449398040771, 1.7060387134552002]}, "gradients/encoder.encoder.layers.23.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 3.0, 1.0, 4.0, 1.0, 6.0, 10.0, 15.0, 13.0, 30.0, 44.0, 51.0, 82.0, 141.0, 202.0, 281.0, 467.0, 652.0, 993.0, 1552.0, 2510.0, 4151.0, 7419.0, 14816.0, 36958.0, 881914.0, 55205.0, 19055.0, 8979.0, 4834.0, 2898.0, 1820.0, 1189.0, 749.0, 514.0, 337.0, 224.0, 144.0, 87.0, 82.0, 33.0, 31.0, 24.0, 13.0, 15.0, 4.0, 8.0, 2.0, 3.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-1.6064453125, -1.560089111328125, -1.51373291015625, -1.467376708984375, -1.4210205078125, -1.374664306640625, -1.32830810546875, -1.281951904296875, -1.235595703125, -1.189239501953125, -1.14288330078125, -1.096527099609375, -1.0501708984375, -1.003814697265625, -0.95745849609375, -0.911102294921875, -0.86474609375, -0.818389892578125, -0.77203369140625, -0.725677490234375, -0.6793212890625, -0.632965087890625, -0.58660888671875, -0.540252685546875, -0.493896484375, -0.447540283203125, -0.40118408203125, -0.354827880859375, -0.3084716796875, -0.262115478515625, -0.21575927734375, -0.169403076171875, -0.123046875, -0.076690673828125, -0.03033447265625, 0.016021728515625, 0.0623779296875, 0.108734130859375, 0.15509033203125, 0.201446533203125, 0.247802734375, 0.294158935546875, 0.34051513671875, 0.386871337890625, 0.4332275390625, 0.479583740234375, 0.52593994140625, 0.572296142578125, 0.61865234375, 0.665008544921875, 0.71136474609375, 0.757720947265625, 0.8040771484375, 0.850433349609375, 0.89678955078125, 0.943145751953125, 0.989501953125, 1.035858154296875, 1.08221435546875, 1.128570556640625, 1.1749267578125, 1.221282958984375, 1.26763916015625, 1.313995361328125, 1.3603515625]}, "gradients/encoder.encoder.layers.23.attention.out_proj.bias": {"_type": "histogram", "values": [2.0, 2.0, 1.0, 0.0, 2.0, 2.0, 0.0, 1.0, 3.0, 2.0, 5.0, 1.0, 3.0, 5.0, 0.0, 5.0, 5.0, 6.0, 6.0, 8.0, 10.0, 10.0, 9.0, 13.0, 15.0, 204.0, 583.0, 24.0, 11.0, 20.0, 8.0, 4.0, 9.0, 8.0, 1.0, 4.0, 1.0, 4.0, 3.0, 3.0, 0.0, 2.0, 4.0, 1.0, 6.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.302490234375, -0.2909126281738281, -0.27933502197265625, -0.2677574157714844, -0.2561798095703125, -0.24460220336914062, -0.23302459716796875, -0.22144699096679688, -0.209869384765625, -0.19829177856445312, -0.18671417236328125, -0.17513656616210938, -0.1635589599609375, -0.15198135375976562, -0.14040374755859375, -0.12882614135742188, -0.11724853515625, -0.10567092895507812, -0.09409332275390625, -0.08251571655273438, -0.0709381103515625, -0.059360504150390625, -0.04778289794921875, -0.036205291748046875, -0.024627685546875, -0.013050079345703125, -0.00147247314453125, 0.010105133056640625, 0.0216827392578125, 0.033260345458984375, 0.04483795166015625, 0.056415557861328125, 0.0679931640625, 0.07957077026367188, 0.09114837646484375, 0.10272598266601562, 0.1143035888671875, 0.12588119506835938, 0.13745880126953125, 0.14903640747070312, 0.160614013671875, 0.17219161987304688, 0.18376922607421875, 0.19534683227539062, 0.2069244384765625, 0.21850204467773438, 0.23007965087890625, 0.24165725708007812, 0.25323486328125, 0.2648124694824219, 0.27639007568359375, 0.2879676818847656, 0.2995452880859375, 0.3111228942871094, 0.32270050048828125, 0.3342781066894531, 0.345855712890625, 0.3574333190917969, 0.36901092529296875, 0.3805885314941406, 0.3921661376953125, 0.4037437438964844, 0.41532135009765625, 0.4268989562988281, 0.4384765625]}, "gradients/encoder.encoder.layers.23.attention.v_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 5.0, 7.0, 7.0, 10.0, 13.0, 10.0, 19.0, 31.0, 53.0, 95.0, 141.0, 239.0, 481.0, 1088.0, 3051.0, 12274.0, 73053.0, 627428.0, 286668.0, 33705.0, 6624.0, 1922.0, 735.0, 349.0, 197.0, 129.0, 83.0, 50.0, 33.0, 23.0, 16.0, 12.0, 2.0, 5.0, 2.0, 2.0, 1.0, 3.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.3896484375, -1.345947265625, -1.30224609375, -1.258544921875, -1.21484375, -1.171142578125, -1.12744140625, -1.083740234375, -1.0400390625, -0.996337890625, -0.95263671875, -0.908935546875, -0.865234375, -0.821533203125, -0.77783203125, -0.734130859375, -0.6904296875, -0.646728515625, -0.60302734375, -0.559326171875, -0.515625, -0.471923828125, -0.42822265625, -0.384521484375, -0.3408203125, -0.297119140625, -0.25341796875, -0.209716796875, -0.166015625, -0.122314453125, -0.07861328125, -0.034912109375, 0.0087890625, 0.052490234375, 0.09619140625, 0.139892578125, 0.18359375, 0.227294921875, 0.27099609375, 0.314697265625, 0.3583984375, 0.402099609375, 0.44580078125, 0.489501953125, 0.533203125, 0.576904296875, 0.62060546875, 0.664306640625, 0.7080078125, 0.751708984375, 0.79541015625, 0.839111328125, 0.8828125, 0.926513671875, 0.97021484375, 1.013916015625, 1.0576171875, 1.101318359375, 1.14501953125, 1.188720703125, 1.232421875, 1.276123046875, 1.31982421875, 1.363525390625, 1.4072265625]}, "gradients/encoder.encoder.layers.23.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 1.0, 1.0, 3.0, 1.0, 6.0, 4.0, 3.0, 10.0, 11.0, 7.0, 12.0, 13.0, 13.0, 23.0, 22.0, 26.0, 27.0, 31.0, 29.0, 35.0, 41.0, 32.0, 41.0, 45.0, 45.0, 35.0, 44.0, 46.0, 47.0, 48.0, 46.0, 32.0, 46.0, 30.0, 29.0, 27.0, 13.0, 16.0, 11.0, 11.0, 10.0, 7.0, 6.0, 5.0, 9.0, 4.0, 4.0, 2.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 0.0, 1.0], "bins": [-1.435546875, -1.392852783203125, -1.35015869140625, -1.307464599609375, -1.2647705078125, -1.222076416015625, -1.17938232421875, -1.136688232421875, -1.093994140625, -1.051300048828125, -1.00860595703125, -0.965911865234375, -0.9232177734375, -0.880523681640625, -0.83782958984375, -0.795135498046875, -0.75244140625, -0.709747314453125, -0.66705322265625, -0.624359130859375, -0.5816650390625, -0.538970947265625, -0.49627685546875, -0.453582763671875, -0.410888671875, -0.368194580078125, -0.32550048828125, -0.282806396484375, -0.2401123046875, -0.197418212890625, -0.15472412109375, -0.112030029296875, -0.0693359375, -0.026641845703125, 0.01605224609375, 0.058746337890625, 0.1014404296875, 0.144134521484375, 0.18682861328125, 0.229522705078125, 0.272216796875, 0.314910888671875, 0.35760498046875, 0.400299072265625, 0.4429931640625, 0.485687255859375, 0.52838134765625, 0.571075439453125, 0.61376953125, 0.656463623046875, 0.69915771484375, 0.741851806640625, 0.7845458984375, 0.827239990234375, 0.86993408203125, 0.912628173828125, 0.955322265625, 0.998016357421875, 1.04071044921875, 1.083404541015625, 1.1260986328125, 1.168792724609375, 1.21148681640625, 1.254180908203125, 1.296875]}, "gradients/encoder.encoder.layers.23.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 4.0, 4.0, 4.0, 6.0, 10.0, 16.0, 35.0, 51.0, 98.0, 269.0, 801.0, 4939.0, 709463.0, 327380.0, 4236.0, 769.0, 256.0, 91.0, 59.0, 32.0, 19.0, 5.0, 6.0, 4.0, 2.0, 2.0, 1.0, 0.0, 3.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-2.88671875, -2.8126373291015625, -2.738555908203125, -2.6644744873046875, -2.59039306640625, -2.5163116455078125, -2.442230224609375, -2.3681488037109375, -2.2940673828125, -2.2199859619140625, -2.145904541015625, -2.0718231201171875, -1.99774169921875, -1.9236602783203125, -1.849578857421875, -1.7754974365234375, -1.701416015625, -1.6273345947265625, -1.553253173828125, -1.4791717529296875, -1.40509033203125, -1.3310089111328125, -1.256927490234375, -1.1828460693359375, -1.1087646484375, -1.0346832275390625, -0.960601806640625, -0.8865203857421875, -0.81243896484375, -0.7383575439453125, -0.664276123046875, -0.5901947021484375, -0.51611328125, -0.4420318603515625, -0.367950439453125, -0.2938690185546875, -0.21978759765625, -0.1457061767578125, -0.071624755859375, 0.0024566650390625, 0.0765380859375, 0.1506195068359375, 0.224700927734375, 0.2987823486328125, 0.37286376953125, 0.4469451904296875, 0.521026611328125, 0.5951080322265625, 0.669189453125, 0.7432708740234375, 0.817352294921875, 0.8914337158203125, 0.96551513671875, 1.0395965576171875, 1.113677978515625, 1.1877593994140625, 1.2618408203125, 1.3359222412109375, 1.410003662109375, 1.4840850830078125, 1.55816650390625, 1.6322479248046875, 1.706329345703125, 1.7804107666015625, 1.8544921875]}, "gradients/encoder.encoder.layers.23.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 2.0, 0.0, 1.0, 4.0, 1.0, 2.0, 1.0, 8.0, 6.0, 5.0, 10.0, 10.0, 10.0, 19.0, 15.0, 28.0, 38.0, 62.0, 91.0, 81.0, 125.0, 111.0, 104.0, 73.0, 62.0, 49.0, 23.0, 29.0, 11.0, 6.0, 7.0, 6.0, 2.0, 4.0, 0.0, 3.0, 2.0, 1.0, 0.0, 4.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.233287811279297e-05, -5.0693750381469727e-05, -4.9054622650146484e-05, -4.741549491882324e-05, -4.57763671875e-05, -4.413723945617676e-05, -4.2498111724853516e-05, -4.0858983993530273e-05, -3.921985626220703e-05, -3.758072853088379e-05, -3.594160079956055e-05, -3.4302473068237305e-05, -3.266334533691406e-05, -3.102421760559082e-05, -2.9385089874267578e-05, -2.7745962142944336e-05, -2.6106834411621094e-05, -2.446770668029785e-05, -2.282857894897461e-05, -2.1189451217651367e-05, -1.9550323486328125e-05, -1.7911195755004883e-05, -1.627206802368164e-05, -1.4632940292358398e-05, -1.2993812561035156e-05, -1.1354684829711914e-05, -9.715557098388672e-06, -8.07642936706543e-06, -6.4373016357421875e-06, -4.798173904418945e-06, -3.159046173095703e-06, -1.519918441772461e-06, 1.1920928955078125e-07, 1.7583370208740234e-06, 3.3974647521972656e-06, 5.036592483520508e-06, 6.67572021484375e-06, 8.314847946166992e-06, 9.953975677490234e-06, 1.1593103408813477e-05, 1.3232231140136719e-05, 1.4871358871459961e-05, 1.6510486602783203e-05, 1.8149614334106445e-05, 1.9788742065429688e-05, 2.142786979675293e-05, 2.3066997528076172e-05, 2.4706125259399414e-05, 2.6345252990722656e-05, 2.79843807220459e-05, 2.962350845336914e-05, 3.126263618469238e-05, 3.2901763916015625e-05, 3.454089164733887e-05, 3.618001937866211e-05, 3.781914710998535e-05, 3.9458274841308594e-05, 4.1097402572631836e-05, 4.273653030395508e-05, 4.437565803527832e-05, 4.601478576660156e-05, 4.7653913497924805e-05, 4.929304122924805e-05, 5.093216896057129e-05, 5.257129669189453e-05]}, "gradients/encoder.encoder.layers.23.attention.q_proj.weight": {"_type": "histogram", "values": [3.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 2.0, 0.0, 1.0, 0.0, 2.0, 5.0, 4.0, 2.0, 8.0, 9.0, 7.0, 19.0, 38.0, 45.0, 74.0, 159.0, 381.0, 739.0, 2006.0, 8676.0, 114507.0, 871439.0, 42575.0, 5139.0, 1461.0, 613.0, 295.0, 149.0, 63.0, 57.0, 25.0, 18.0, 15.0, 6.0, 8.0, 5.0, 1.0, 0.0, 2.0, 4.0, 3.0, 3.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.986328125, -0.953826904296875, -0.92132568359375, -0.888824462890625, -0.8563232421875, -0.823822021484375, -0.79132080078125, -0.758819580078125, -0.726318359375, -0.693817138671875, -0.66131591796875, -0.628814697265625, -0.5963134765625, -0.563812255859375, -0.53131103515625, -0.498809814453125, -0.46630859375, -0.433807373046875, -0.40130615234375, -0.368804931640625, -0.3363037109375, -0.303802490234375, -0.27130126953125, -0.238800048828125, -0.206298828125, -0.173797607421875, -0.14129638671875, -0.108795166015625, -0.0762939453125, -0.043792724609375, -0.01129150390625, 0.021209716796875, 0.0537109375, 0.086212158203125, 0.11871337890625, 0.151214599609375, 0.1837158203125, 0.216217041015625, 0.24871826171875, 0.281219482421875, 0.313720703125, 0.346221923828125, 0.37872314453125, 0.411224365234375, 0.4437255859375, 0.476226806640625, 0.50872802734375, 0.541229248046875, 0.57373046875, 0.606231689453125, 0.63873291015625, 0.671234130859375, 0.7037353515625, 0.736236572265625, 0.76873779296875, 0.801239013671875, 0.833740234375, 0.866241455078125, 0.89874267578125, 0.931243896484375, 0.9637451171875, 0.996246337890625, 1.02874755859375, 1.061248779296875, 1.09375]}, "gradients/encoder.encoder.layers.23.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 2.0, 2.0, 5.0, 4.0, 13.0, 23.0, 21.0, 52.0, 82.0, 137.0, 173.0, 167.0, 126.0, 82.0, 36.0, 29.0, 26.0, 7.0, 11.0, 3.0, 4.0, 1.0, 2.0, 3.0, 2.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.6083984375, -1.5596160888671875, -1.510833740234375, -1.4620513916015625, -1.41326904296875, -1.3644866943359375, -1.315704345703125, -1.2669219970703125, -1.2181396484375, -1.1693572998046875, -1.120574951171875, -1.0717926025390625, -1.02301025390625, -0.9742279052734375, -0.925445556640625, -0.8766632080078125, -0.827880859375, -0.7790985107421875, -0.730316162109375, -0.6815338134765625, -0.63275146484375, -0.5839691162109375, -0.535186767578125, -0.4864044189453125, -0.4376220703125, -0.3888397216796875, -0.340057373046875, -0.2912750244140625, -0.24249267578125, -0.1937103271484375, -0.144927978515625, -0.0961456298828125, -0.04736328125, 0.0014190673828125, 0.050201416015625, 0.0989837646484375, 0.14776611328125, 0.1965484619140625, 0.245330810546875, 0.2941131591796875, 0.3428955078125, 0.3916778564453125, 0.440460205078125, 0.4892425537109375, 0.53802490234375, 0.5868072509765625, 0.635589599609375, 0.6843719482421875, 0.733154296875, 0.7819366455078125, 0.830718994140625, 0.8795013427734375, 0.92828369140625, 0.9770660400390625, 1.025848388671875, 1.0746307373046875, 1.1234130859375, 1.1721954345703125, 1.220977783203125, 1.2697601318359375, 1.31854248046875, 1.3673248291015625, 1.416107177734375, 1.4648895263671875, 1.513671875]}, "gradients/encoder.encoder.layers.23.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 7.0, 5.0, 15.0, 55.0, 301.0, 513.0, 79.0, 15.0, 12.0, 4.0, 1.0, 2.0, 0.0, 3.0, 0.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-16.141454696655273, -15.250275611877441, -14.35909652709961, -13.467917442321777, -12.576738357543945, -11.685559272766113, -10.794380187988281, -9.90320110321045, -9.012022018432617, -8.120842933654785, -7.229663848876953, -6.338484764099121, -5.447305679321289, -4.556126594543457, -3.664947509765625, -2.773768424987793, -1.882589340209961, -0.9914102554321289, -0.10023117065429688, 0.7909479141235352, 1.6821269989013672, 2.573306083679199, 3.4644851684570312, 4.355664253234863, 5.246843338012695, 6.138022422790527, 7.029201507568359, 7.920380592346191, 8.811559677124023, 9.702738761901855, 10.593917846679688, 11.48509693145752, 12.376274108886719, 13.26745319366455, 14.158632278442383, 15.049811363220215, 15.940990447998047, 16.832168579101562, 17.72334861755371, 18.61452865600586, 19.505706787109375, 20.39688491821289, 21.28806495666504, 22.179244995117188, 23.070423126220703, 23.96160125732422, 24.852781295776367, 25.743961334228516, 26.63513946533203, 27.526317596435547, 28.417497634887695, 29.308677673339844, 30.19985580444336, 31.091033935546875, 31.982213973999023, 32.87339401245117, 33.76457214355469, 34.6557502746582, 35.54692840576172, 36.4381103515625, 37.329288482666016, 38.22046661376953, 39.11164855957031, 40.00282669067383, 40.894004821777344]}, "gradients/encoder.encoder.layers.23.layer_norm.bias": {"_type": "histogram", "values": [4.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 3.0, 1.0, 2.0, 2.0, 4.0, 4.0, 8.0, 2.0, 5.0, 5.0, 4.0, 6.0, 16.0, 18.0, 31.0, 44.0, 61.0, 93.0, 112.0, 143.0, 115.0, 86.0, 68.0, 42.0, 25.0, 19.0, 15.0, 11.0, 9.0, 7.0, 4.0, 5.0, 3.0, 6.0, 4.0, 1.0, 4.0, 2.0, 1.0, 2.0, 2.0, 2.0, 0.0, 1.0, 2.0, 3.0, 1.0, 0.0, 2.0, 0.0, 2.0], "bins": [-22.497350692749023, -21.807695388793945, -21.118038177490234, -20.428382873535156, -19.738727569580078, -19.049072265625, -18.35941505432129, -17.66975975036621, -16.980104446411133, -16.290449142456055, -15.60079288482666, -14.911136627197266, -14.221481323242188, -13.531825065612793, -12.842168807983398, -12.15251350402832, -11.462857246398926, -10.773200988769531, -10.083545684814453, -9.393889427185059, -8.70423412322998, -8.014577865600586, -7.32492208480835, -6.635266304016113, -5.945610523223877, -5.255954742431641, -4.566298961639404, -3.876642942428589, -3.1869871616363525, -2.497331380844116, -1.8076753616333008, -1.1180195808410645, -0.4283638000488281, 0.261292040348053, 0.9509478807449341, 1.64060378074646, 2.3302595615386963, 3.0199153423309326, 3.709571361541748, 4.399227142333984, 5.088882923126221, 5.778538703918457, 6.468194484710693, 7.15785026550293, 7.847506523132324, 8.537161827087402, 9.226818084716797, 9.916473388671875, 10.60612964630127, 11.295785903930664, 11.985441207885742, 12.675097465515137, 13.364752769470215, 14.05440902709961, 14.744064331054688, 15.433720588684082, 16.123376846313477, 16.813032150268555, 17.502689361572266, 18.192344665527344, 18.881999969482422, 19.5716552734375, 20.26131248474121, 20.95096778869629, 21.640623092651367]}, "gradients/encoder.encoder.layers.22.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 3.0, 4.0, 1.0, 1.0, 4.0, 5.0, 9.0, 10.0, 7.0, 18.0, 32.0, 43.0, 91.0, 103.0, 228.0, 433.0, 901.0, 2067.0, 6022.0, 25759.0, 3098938.0, 1028853.0, 21645.0, 5324.0, 1841.0, 887.0, 432.0, 243.0, 120.0, 86.0, 61.0, 47.0, 29.0, 7.0, 8.0, 13.0, 5.0, 3.0, 4.0, 3.0, 4.0, 2.0, 1.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.455078125, -3.335845947265625, -3.21661376953125, -3.097381591796875, -2.9781494140625, -2.858917236328125, -2.73968505859375, -2.620452880859375, -2.501220703125, -2.381988525390625, -2.26275634765625, -2.143524169921875, -2.0242919921875, -1.905059814453125, -1.78582763671875, -1.666595458984375, -1.54736328125, -1.428131103515625, -1.30889892578125, -1.189666748046875, -1.0704345703125, -0.951202392578125, -0.83197021484375, -0.712738037109375, -0.593505859375, -0.474273681640625, -0.35504150390625, -0.235809326171875, -0.1165771484375, 0.002655029296875, 0.12188720703125, 0.241119384765625, 0.3603515625, 0.479583740234375, 0.59881591796875, 0.718048095703125, 0.8372802734375, 0.956512451171875, 1.07574462890625, 1.194976806640625, 1.314208984375, 1.433441162109375, 1.55267333984375, 1.671905517578125, 1.7911376953125, 1.910369873046875, 2.02960205078125, 2.148834228515625, 2.26806640625, 2.387298583984375, 2.50653076171875, 2.625762939453125, 2.7449951171875, 2.864227294921875, 2.98345947265625, 3.102691650390625, 3.221923828125, 3.341156005859375, 3.46038818359375, 3.579620361328125, 3.6988525390625, 3.818084716796875, 3.93731689453125, 4.056549072265625, 4.17578125]}, "gradients/encoder.encoder.layers.22.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 2.0, 1.0, 0.0, 1.0, 5.0, 3.0, 3.0, 9.0, 6.0, 17.0, 12.0, 13.0, 17.0, 34.0, 100.0, 393.0, 252.0, 52.0, 23.0, 19.0, 12.0, 12.0, 6.0, 5.0, 5.0, 6.0, 1.0, 3.0, 0.0, 1.0, 3.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.3828125, -0.3699302673339844, -0.35704803466796875, -0.3441658020019531, -0.3312835693359375, -0.3184013366699219, -0.30551910400390625, -0.2926368713378906, -0.279754638671875, -0.2668724060058594, -0.25399017333984375, -0.24110794067382812, -0.2282257080078125, -0.21534347534179688, -0.20246124267578125, -0.18957901000976562, -0.17669677734375, -0.16381454467773438, -0.15093231201171875, -0.13805007934570312, -0.1251678466796875, -0.11228561401367188, -0.09940338134765625, -0.08652114868164062, -0.073638916015625, -0.060756683349609375, -0.04787445068359375, -0.034992218017578125, -0.0221099853515625, -0.009227752685546875, 0.00365447998046875, 0.016536712646484375, 0.0294189453125, 0.042301177978515625, 0.05518341064453125, 0.06806564331054688, 0.0809478759765625, 0.09383010864257812, 0.10671234130859375, 0.11959457397460938, 0.132476806640625, 0.14535903930664062, 0.15824127197265625, 0.17112350463867188, 0.1840057373046875, 0.19688796997070312, 0.20977020263671875, 0.22265243530273438, 0.23553466796875, 0.24841690063476562, 0.26129913330078125, 0.2741813659667969, 0.2870635986328125, 0.2999458312988281, 0.31282806396484375, 0.3257102966308594, 0.338592529296875, 0.3514747619628906, 0.36435699462890625, 0.3772392272949219, 0.3901214599609375, 0.4030036926269531, 0.41588592529296875, 0.4287681579589844, 0.441650390625]}, "gradients/encoder.encoder.layers.22.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 2.0, 1.0, 1.0, 2.0, 6.0, 3.0, 8.0, 13.0, 11.0, 14.0, 25.0, 45.0, 58.0, 127.0, 261.0, 539.0, 1376.0, 4078.0, 21077.0, 551520.0, 3571400.0, 35365.0, 5498.0, 1610.0, 636.0, 264.0, 157.0, 74.0, 47.0, 22.0, 18.0, 10.0, 8.0, 4.0, 6.0, 1.0, 5.0, 0.0, 2.0, 1.0, 3.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.544921875, -1.478118896484375, -1.41131591796875, -1.344512939453125, -1.2777099609375, -1.210906982421875, -1.14410400390625, -1.077301025390625, -1.010498046875, -0.943695068359375, -0.87689208984375, -0.810089111328125, -0.7432861328125, -0.676483154296875, -0.60968017578125, -0.542877197265625, -0.47607421875, -0.409271240234375, -0.34246826171875, -0.275665283203125, -0.2088623046875, -0.142059326171875, -0.07525634765625, -0.008453369140625, 0.058349609375, 0.125152587890625, 0.19195556640625, 0.258758544921875, 0.3255615234375, 0.392364501953125, 0.45916748046875, 0.525970458984375, 0.5927734375, 0.659576416015625, 0.72637939453125, 0.793182373046875, 0.8599853515625, 0.926788330078125, 0.99359130859375, 1.060394287109375, 1.127197265625, 1.194000244140625, 1.26080322265625, 1.327606201171875, 1.3944091796875, 1.461212158203125, 1.52801513671875, 1.594818115234375, 1.66162109375, 1.728424072265625, 1.79522705078125, 1.862030029296875, 1.9288330078125, 1.995635986328125, 2.06243896484375, 2.129241943359375, 2.196044921875, 2.262847900390625, 2.32965087890625, 2.396453857421875, 2.4632568359375, 2.530059814453125, 2.59686279296875, 2.663665771484375, 2.73046875]}, "gradients/encoder.encoder.layers.22.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 2.0, 0.0, 1.0, 1.0, 1.0, 1.0, 4.0, 6.0, 2.0, 5.0, 13.0, 15.0, 12.0, 14.0, 38.0, 61.0, 68.0, 113.0, 237.0, 613.0, 1817.0, 579.0, 185.0, 103.0, 55.0, 38.0, 25.0, 17.0, 14.0, 11.0, 12.0, 2.0, 8.0, 2.0, 4.0, 2.0, 1.0, 4.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.4072265625, -0.3960838317871094, -0.38494110107421875, -0.3737983703613281, -0.3626556396484375, -0.3515129089355469, -0.34037017822265625, -0.3292274475097656, -0.318084716796875, -0.3069419860839844, -0.29579925537109375, -0.2846565246582031, -0.2735137939453125, -0.2623710632324219, -0.25122833251953125, -0.24008560180664062, -0.22894287109375, -0.21780014038085938, -0.20665740966796875, -0.19551467895507812, -0.1843719482421875, -0.17322921752929688, -0.16208648681640625, -0.15094375610351562, -0.139801025390625, -0.12865829467773438, -0.11751556396484375, -0.10637283325195312, -0.0952301025390625, -0.08408737182617188, -0.07294464111328125, -0.061801910400390625, -0.0506591796875, -0.039516448974609375, -0.02837371826171875, -0.017230987548828125, -0.0060882568359375, 0.005054473876953125, 0.01619720458984375, 0.027339935302734375, 0.038482666015625, 0.049625396728515625, 0.06076812744140625, 0.07191085815429688, 0.0830535888671875, 0.09419631958007812, 0.10533905029296875, 0.11648178100585938, 0.12762451171875, 0.13876724243164062, 0.14990997314453125, 0.16105270385742188, 0.1721954345703125, 0.18333816528320312, 0.19448089599609375, 0.20562362670898438, 0.216766357421875, 0.22790908813476562, 0.23905181884765625, 0.2501945495605469, 0.2613372802734375, 0.2724800109863281, 0.28362274169921875, 0.2947654724121094, 0.305908203125]}, "gradients/encoder.encoder.layers.22.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 3.0, 2.0, 2.0, 4.0, 5.0, 9.0, 15.0, 152.0, 802.0, 18.0, 1.0, 1.0, 2.0, 1.0, 0.0, 1.0], "bins": [-20.02397346496582, -19.668811798095703, -19.313648223876953, -18.958486557006836, -18.60332489013672, -18.24816131591797, -17.89299964904785, -17.537837982177734, -17.182674407958984, -16.827512741088867, -16.472349166870117, -16.1171875, -15.762024879455566, -15.406862258911133, -15.051700592041016, -14.696537971496582, -14.341376304626465, -13.986213684082031, -13.631052017211914, -13.27588939666748, -12.920726776123047, -12.56556510925293, -12.210402488708496, -11.855239868164062, -11.500078201293945, -11.144915580749512, -10.789753913879395, -10.434591293334961, -10.079428672790527, -9.724266052246094, -9.369104385375977, -9.013941764831543, -8.65877914428711, -8.303616523742676, -7.9484543800354, -7.593292236328125, -7.238129615783691, -6.882967472076416, -6.527805328369141, -6.172642707824707, -5.817480087280273, -5.462317943572998, -5.1071553230285645, -4.751993179321289, -4.3968305587768555, -4.04166841506958, -3.6865062713623047, -3.33134388923645, -2.9761815071105957, -2.621019124984741, -2.2658567428588867, -1.9106945991516113, -1.5555322170257568, -1.2003698348999023, -0.845207691192627, -0.49004530906677246, -0.13488292694091797, 0.22027939558029175, 0.5754417181015015, 0.9306039810180664, 1.285766363143921, 1.6409287452697754, 1.9960908889770508, 2.3512532711029053, 2.7064156532287598]}, "gradients/encoder.encoder.layers.22.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 2.0, 1.0, 1.0, 2.0, 2.0, 3.0, 2.0, 3.0, 3.0, 8.0, 6.0, 12.0, 11.0, 13.0, 23.0, 21.0, 36.0, 42.0, 48.0, 56.0, 61.0, 50.0, 56.0, 72.0, 68.0, 73.0, 54.0, 47.0, 57.0, 34.0, 29.0, 25.0, 20.0, 20.0, 9.0, 7.0, 12.0, 5.0, 5.0, 1.0, 2.0, 4.0, 1.0, 1.0, 2.0, 2.0, 3.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-1.4993104934692383, -1.4471986293792725, -1.3950867652893066, -1.3429749011993408, -1.2908631563186646, -1.2387512922286987, -1.186639428138733, -1.134527564048767, -1.0824156999588013, -1.0303038358688354, -0.9781920313835144, -0.9260801672935486, -0.8739683032035828, -0.8218564987182617, -0.7697446346282959, -0.7176327705383301, -0.665520966053009, -0.6134091019630432, -0.5612972974777222, -0.5091854333877563, -0.4570735692977905, -0.4049617350101471, -0.35284990072250366, -0.30073803663253784, -0.2486262023448944, -0.19651435315608978, -0.14440250396728516, -0.09229066967964172, -0.0401788204908371, 0.01193302869796753, 0.06404486298561096, 0.11615672707557678, 0.16826856136322021, 0.22038041055202484, 0.27249225974082947, 0.3246040940284729, 0.3767159581184387, 0.42882779240608215, 0.4809396266937256, 0.5330514907836914, 0.5851633548736572, 0.637275218963623, 0.6893870234489441, 0.7414988875389099, 0.7936107516288757, 0.8457225561141968, 0.8978344202041626, 0.9499462842941284, 1.0020580291748047, 1.0541698932647705, 1.1062817573547363, 1.1583936214447021, 1.2105053663253784, 1.2626172304153442, 1.31472909450531, 1.3668409585952759, 1.4189528226852417, 1.4710646867752075, 1.5231765508651733, 1.5752882957458496, 1.6274001598358154, 1.6795120239257812, 1.731623888015747, 1.783735752105713, 1.8358476161956787]}, "gradients/encoder.encoder.layers.22.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0, 0.0, 3.0, 4.0, 6.0, 5.0, 15.0, 9.0, 17.0, 29.0, 34.0, 43.0, 82.0, 88.0, 136.0, 272.0, 461.0, 766.0, 1510.0, 3280.0, 8532.0, 29035.0, 580104.0, 384297.0, 25708.0, 7742.0, 3082.0, 1436.0, 740.0, 396.0, 243.0, 157.0, 101.0, 65.0, 48.0, 31.0, 25.0, 18.0, 14.0, 9.0, 8.0, 5.0, 2.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 2.0, 1.0], "bins": [-2.2890625, -2.2196044921875, -2.150146484375, -2.0806884765625, -2.01123046875, -1.9417724609375, -1.872314453125, -1.8028564453125, -1.7333984375, -1.6639404296875, -1.594482421875, -1.5250244140625, -1.45556640625, -1.3861083984375, -1.316650390625, -1.2471923828125, -1.177734375, -1.1082763671875, -1.038818359375, -0.9693603515625, -0.89990234375, -0.8304443359375, -0.760986328125, -0.6915283203125, -0.6220703125, -0.5526123046875, -0.483154296875, -0.4136962890625, -0.34423828125, -0.2747802734375, -0.205322265625, -0.1358642578125, -0.06640625, 0.0030517578125, 0.072509765625, 0.1419677734375, 0.21142578125, 0.2808837890625, 0.350341796875, 0.4197998046875, 0.4892578125, 0.5587158203125, 0.628173828125, 0.6976318359375, 0.76708984375, 0.8365478515625, 0.906005859375, 0.9754638671875, 1.044921875, 1.1143798828125, 1.183837890625, 1.2532958984375, 1.32275390625, 1.3922119140625, 1.461669921875, 1.5311279296875, 1.6005859375, 1.6700439453125, 1.739501953125, 1.8089599609375, 1.87841796875, 1.9478759765625, 2.017333984375, 2.0867919921875, 2.15625]}, "gradients/encoder.encoder.layers.22.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 3.0, 1.0, 0.0, 0.0, 3.0, 3.0, 3.0, 4.0, 9.0, 7.0, 12.0, 14.0, 19.0, 30.0, 128.0, 341.0, 269.0, 92.0, 24.0, 12.0, 13.0, 7.0, 8.0, 4.0, 3.0, 0.0, 3.0, 3.0, 0.0, 2.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.397705078125, -0.3843536376953125, -0.371002197265625, -0.3576507568359375, -0.34429931640625, -0.3309478759765625, -0.317596435546875, -0.3042449951171875, -0.2908935546875, -0.2775421142578125, -0.264190673828125, -0.2508392333984375, -0.23748779296875, -0.2241363525390625, -0.210784912109375, -0.1974334716796875, -0.18408203125, -0.1707305908203125, -0.157379150390625, -0.1440277099609375, -0.13067626953125, -0.1173248291015625, -0.103973388671875, -0.0906219482421875, -0.0772705078125, -0.0639190673828125, -0.050567626953125, -0.0372161865234375, -0.02386474609375, -0.0105133056640625, 0.002838134765625, 0.0161895751953125, 0.029541015625, 0.0428924560546875, 0.056243896484375, 0.0695953369140625, 0.08294677734375, 0.0962982177734375, 0.109649658203125, 0.1230010986328125, 0.1363525390625, 0.1497039794921875, 0.163055419921875, 0.1764068603515625, 0.18975830078125, 0.2031097412109375, 0.216461181640625, 0.2298126220703125, 0.2431640625, 0.2565155029296875, 0.269866943359375, 0.2832183837890625, 0.29656982421875, 0.3099212646484375, 0.323272705078125, 0.3366241455078125, 0.3499755859375, 0.3633270263671875, 0.376678466796875, 0.3900299072265625, 0.40338134765625, 0.4167327880859375, 0.430084228515625, 0.4434356689453125, 0.456787109375]}, "gradients/encoder.encoder.layers.22.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 3.0, 2.0, 6.0, 2.0, 4.0, 8.0, 8.0, 7.0, 12.0, 19.0, 21.0, 24.0, 44.0, 44.0, 72.0, 85.0, 139.0, 195.0, 330.0, 559.0, 1093.0, 2480.0, 7001.0, 24795.0, 113666.0, 529330.0, 294299.0, 54141.0, 13145.0, 3884.0, 1481.0, 659.0, 340.0, 207.0, 126.0, 100.0, 59.0, 40.0, 45.0, 22.0, 16.0, 17.0, 8.0, 9.0, 7.0, 7.0, 1.0, 5.0, 1.0, 2.0], "bins": [-1.205078125, -1.1747817993164062, -1.1444854736328125, -1.1141891479492188, -1.083892822265625, -1.0535964965820312, -1.0233001708984375, -0.9930038452148438, -0.96270751953125, -0.9324111938476562, -0.9021148681640625, -0.8718185424804688, -0.841522216796875, -0.8112258911132812, -0.7809295654296875, -0.7506332397460938, -0.7203369140625, -0.6900405883789062, -0.6597442626953125, -0.6294479370117188, -0.599151611328125, -0.5688552856445312, -0.5385589599609375, -0.5082626342773438, -0.47796630859375, -0.44766998291015625, -0.4173736572265625, -0.38707733154296875, -0.356781005859375, -0.32648468017578125, -0.2961883544921875, -0.26589202880859375, -0.235595703125, -0.20529937744140625, -0.1750030517578125, -0.14470672607421875, -0.114410400390625, -0.08411407470703125, -0.0538177490234375, -0.02352142333984375, 0.00677490234375, 0.03707122802734375, 0.0673675537109375, 0.09766387939453125, 0.127960205078125, 0.15825653076171875, 0.1885528564453125, 0.21884918212890625, 0.2491455078125, 0.27944183349609375, 0.3097381591796875, 0.34003448486328125, 0.370330810546875, 0.40062713623046875, 0.4309234619140625, 0.46121978759765625, 0.49151611328125, 0.5218124389648438, 0.5521087646484375, 0.5824050903320312, 0.612701416015625, 0.6429977416992188, 0.6732940673828125, 0.7035903930664062, 0.73388671875]}, "gradients/encoder.encoder.layers.22.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 3.0, 2.0, 3.0, 2.0, 5.0, 4.0, 4.0, 7.0, 9.0, 12.0, 11.0, 17.0, 21.0, 26.0, 27.0, 34.0, 39.0, 28.0, 32.0, 37.0, 52.0, 46.0, 36.0, 51.0, 37.0, 45.0, 52.0, 43.0, 50.0, 40.0, 39.0, 27.0, 34.0, 20.0, 20.0, 18.0, 23.0, 13.0, 14.0, 9.0, 5.0, 7.0, 2.0, 7.0, 3.0, 1.0, 1.0], "bins": [-1.0517578125, -1.0261688232421875, -1.000579833984375, -0.9749908447265625, -0.94940185546875, -0.9238128662109375, -0.898223876953125, -0.8726348876953125, -0.8470458984375, -0.8214569091796875, -0.795867919921875, -0.7702789306640625, -0.74468994140625, -0.7191009521484375, -0.693511962890625, -0.6679229736328125, -0.642333984375, -0.6167449951171875, -0.591156005859375, -0.5655670166015625, -0.53997802734375, -0.5143890380859375, -0.488800048828125, -0.4632110595703125, -0.4376220703125, -0.4120330810546875, -0.386444091796875, -0.3608551025390625, -0.33526611328125, -0.3096771240234375, -0.284088134765625, -0.2584991455078125, -0.23291015625, -0.2073211669921875, -0.181732177734375, -0.1561431884765625, -0.13055419921875, -0.1049652099609375, -0.079376220703125, -0.0537872314453125, -0.0281982421875, -0.0026092529296875, 0.022979736328125, 0.0485687255859375, 0.07415771484375, 0.0997467041015625, 0.125335693359375, 0.1509246826171875, 0.176513671875, 0.2021026611328125, 0.227691650390625, 0.2532806396484375, 0.27886962890625, 0.3044586181640625, 0.330047607421875, 0.3556365966796875, 0.3812255859375, 0.4068145751953125, 0.432403564453125, 0.4579925537109375, 0.48358154296875, 0.5091705322265625, 0.534759521484375, 0.5603485107421875, 0.5859375]}, "gradients/encoder.encoder.layers.22.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 3.0, 1.0, 3.0, 1.0, 2.0, 5.0, 6.0, 10.0, 16.0, 23.0, 33.0, 64.0, 157.0, 379.0, 1118.0, 6428.0, 926036.0, 109851.0, 3212.0, 695.0, 250.0, 124.0, 49.0, 30.0, 27.0, 12.0, 7.0, 5.0, 4.0, 1.0, 6.0, 1.0, 2.0, 2.0, 2.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.314453125, -3.216522216796875, -3.11859130859375, -3.020660400390625, -2.9227294921875, -2.824798583984375, -2.72686767578125, -2.628936767578125, -2.531005859375, -2.433074951171875, -2.33514404296875, -2.237213134765625, -2.1392822265625, -2.041351318359375, -1.94342041015625, -1.845489501953125, -1.74755859375, -1.649627685546875, -1.55169677734375, -1.453765869140625, -1.3558349609375, -1.257904052734375, -1.15997314453125, -1.062042236328125, -0.964111328125, -0.866180419921875, -0.76824951171875, -0.670318603515625, -0.5723876953125, -0.474456787109375, -0.37652587890625, -0.278594970703125, -0.1806640625, -0.082733154296875, 0.01519775390625, 0.113128662109375, 0.2110595703125, 0.308990478515625, 0.40692138671875, 0.504852294921875, 0.602783203125, 0.700714111328125, 0.79864501953125, 0.896575927734375, 0.9945068359375, 1.092437744140625, 1.19036865234375, 1.288299560546875, 1.38623046875, 1.484161376953125, 1.58209228515625, 1.680023193359375, 1.7779541015625, 1.875885009765625, 1.97381591796875, 2.071746826171875, 2.169677734375, 2.267608642578125, 2.36553955078125, 2.463470458984375, 2.5614013671875, 2.659332275390625, 2.75726318359375, 2.855194091796875, 2.953125]}, "gradients/encoder.encoder.layers.22.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 2.0, 6.0, 8.0, 6.0, 9.0, 11.0, 14.0, 24.0, 57.0, 63.0, 149.0, 298.0, 159.0, 69.0, 46.0, 25.0, 15.0, 11.0, 5.0, 8.0, 3.0, 6.0, 3.0, 4.0, 3.0, 4.0, 2.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0003046989440917969, -0.0002963319420814514, -0.00028796494007110596, -0.0002795979380607605, -0.00027123093605041504, -0.0002628639340400696, -0.0002544969320297241, -0.00024612993001937866, -0.0002377629280090332, -0.00022939592599868774, -0.00022102892398834229, -0.00021266192197799683, -0.00020429491996765137, -0.0001959279179573059, -0.00018756091594696045, -0.000179193913936615, -0.00017082691192626953, -0.00016245990991592407, -0.0001540929079055786, -0.00014572590589523315, -0.0001373589038848877, -0.00012899190187454224, -0.00012062489986419678, -0.00011225789785385132, -0.00010389089584350586, -9.55238938331604e-05, -8.715689182281494e-05, -7.878988981246948e-05, -7.042288780212402e-05, -6.205588579177856e-05, -5.3688883781433105e-05, -4.5321881771087646e-05, -3.695487976074219e-05, -2.858787775039673e-05, -2.022087574005127e-05, -1.185387372970581e-05, -3.4868717193603516e-06, 4.880130290985107e-06, 1.3247132301330566e-05, 2.1614134311676025e-05, 2.9981136322021484e-05, 3.834813833236694e-05, 4.67151403427124e-05, 5.508214235305786e-05, 6.344914436340332e-05, 7.181614637374878e-05, 8.018314838409424e-05, 8.85501503944397e-05, 9.691715240478516e-05, 0.00010528415441513062, 0.00011365115642547607, 0.00012201815843582153, 0.000130385160446167, 0.00013875216245651245, 0.0001471191644668579, 0.00015548616647720337, 0.00016385316848754883, 0.0001722201704978943, 0.00018058717250823975, 0.0001889541745185852, 0.00019732117652893066, 0.00020568817853927612, 0.00021405518054962158, 0.00022242218255996704, 0.0002307891845703125]}, "gradients/encoder.encoder.layers.22.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0, 1.0, 4.0, 1.0, 4.0, 6.0, 12.0, 15.0, 36.0, 47.0, 91.0, 208.0, 464.0, 1209.0, 6105.0, 189533.0, 835427.0, 12638.0, 1755.0, 525.0, 243.0, 106.0, 65.0, 25.0, 17.0, 6.0, 10.0, 2.0, 2.0, 3.0, 3.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-2.251953125, -2.19134521484375, -2.1307373046875, -2.07012939453125, -2.009521484375, -1.94891357421875, -1.8883056640625, -1.82769775390625, -1.76708984375, -1.70648193359375, -1.6458740234375, -1.58526611328125, -1.524658203125, -1.46405029296875, -1.4034423828125, -1.34283447265625, -1.2822265625, -1.22161865234375, -1.1610107421875, -1.10040283203125, -1.039794921875, -0.97918701171875, -0.9185791015625, -0.85797119140625, -0.79736328125, -0.73675537109375, -0.6761474609375, -0.61553955078125, -0.554931640625, -0.49432373046875, -0.4337158203125, -0.37310791015625, -0.3125, -0.25189208984375, -0.1912841796875, -0.13067626953125, -0.070068359375, -0.00946044921875, 0.0511474609375, 0.11175537109375, 0.17236328125, 0.23297119140625, 0.2935791015625, 0.35418701171875, 0.414794921875, 0.47540283203125, 0.5360107421875, 0.59661865234375, 0.6572265625, 0.71783447265625, 0.7784423828125, 0.83905029296875, 0.899658203125, 0.96026611328125, 1.0208740234375, 1.08148193359375, 1.14208984375, 1.20269775390625, 1.2633056640625, 1.32391357421875, 1.384521484375, 1.44512939453125, 1.5057373046875, 1.56634521484375, 1.626953125]}, "gradients/encoder.encoder.layers.22.attention.q_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 0.0, 3.0, 7.0, 9.0, 16.0, 34.0, 51.0, 78.0, 150.0, 196.0, 161.0, 131.0, 67.0, 38.0, 28.0, 18.0, 10.0, 8.0, 4.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.32421875, -1.283966064453125, -1.24371337890625, -1.203460693359375, -1.1632080078125, -1.122955322265625, -1.08270263671875, -1.042449951171875, -1.002197265625, -0.961944580078125, -0.92169189453125, -0.881439208984375, -0.8411865234375, -0.800933837890625, -0.76068115234375, -0.720428466796875, -0.68017578125, -0.639923095703125, -0.59967041015625, -0.559417724609375, -0.5191650390625, -0.478912353515625, -0.43865966796875, -0.398406982421875, -0.358154296875, -0.317901611328125, -0.27764892578125, -0.237396240234375, -0.1971435546875, -0.156890869140625, -0.11663818359375, -0.076385498046875, -0.0361328125, 0.004119873046875, 0.04437255859375, 0.084625244140625, 0.1248779296875, 0.165130615234375, 0.20538330078125, 0.245635986328125, 0.285888671875, 0.326141357421875, 0.36639404296875, 0.406646728515625, 0.4468994140625, 0.487152099609375, 0.52740478515625, 0.567657470703125, 0.60791015625, 0.648162841796875, 0.68841552734375, 0.728668212890625, 0.7689208984375, 0.809173583984375, 0.84942626953125, 0.889678955078125, 0.929931640625, 0.970184326171875, 1.01043701171875, 1.050689697265625, 1.0909423828125, 1.131195068359375, 1.17144775390625, 1.211700439453125, 1.251953125]}, "gradients/encoder.encoder.layers.22.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 1.0, 3.0, 9.0, 57.0, 775.0, 148.0, 14.0, 6.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-24.953643798828125, -23.853364944458008, -22.75308609008789, -21.65280532836914, -20.552526473999023, -19.452247619628906, -18.351966857910156, -17.25168800354004, -16.151409149169922, -15.051130294799805, -13.950850486755371, -12.850570678710938, -11.75029182434082, -10.650012969970703, -9.54973316192627, -8.449453353881836, -7.349174499511719, -6.248895168304443, -5.148615837097168, -4.048336505889893, -2.948057174682617, -1.8477778434753418, -0.7474985122680664, 0.352780818939209, 1.4530601501464844, 2.5533394813537598, 3.653618812561035, 4.7538981437683105, 5.854177474975586, 6.954456806182861, 8.054736137390137, 9.15501594543457, 10.255294799804688, 11.355573654174805, 12.455853462219238, 13.556133270263672, 14.656412124633789, 15.756690979003906, 16.856971740722656, 17.957250595092773, 19.05752944946289, 20.157808303833008, 21.258087158203125, 22.358367919921875, 23.458646774291992, 24.55892562866211, 25.65920639038086, 26.759485244750977, 27.859764099121094, 28.96004295349121, 30.060321807861328, 31.160602569580078, 32.26087951660156, 33.36116027832031, 34.46144104003906, 35.56171798706055, 36.6619987487793, 37.76227951049805, 38.86255645751953, 39.96283721923828, 41.06311798095703, 42.163394927978516, 43.263675689697266, 44.36395263671875, 45.4642333984375]}, "gradients/encoder.encoder.layers.22.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 2.0, 2.0, 1.0, 2.0, 5.0, 1.0, 2.0, 3.0, 2.0, 4.0, 4.0, 6.0, 1.0, 11.0, 11.0, 26.0, 33.0, 42.0, 57.0, 57.0, 90.0, 96.0, 108.0, 92.0, 72.0, 65.0, 52.0, 41.0, 31.0, 25.0, 14.0, 11.0, 7.0, 3.0, 6.0, 8.0, 2.0, 6.0, 2.0, 3.0, 4.0, 2.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0], "bins": [-10.809890747070312, -10.494063377380371, -10.17823600769043, -9.862409591674805, -9.546582221984863, -9.230754852294922, -8.914928436279297, -8.599101066589355, -8.283273696899414, -7.967446327209473, -7.6516194343566895, -7.335792541503906, -7.019965171813965, -6.704137802124023, -6.38831090927124, -6.072484016418457, -5.756656646728516, -5.440829277038574, -5.125002384185791, -4.809175491333008, -4.493348121643066, -4.177520751953125, -3.861693859100342, -3.5458667278289795, -3.230039596557617, -2.914212465286255, -2.5983853340148926, -2.2825582027435303, -1.966731071472168, -1.6509039402008057, -1.3350768089294434, -1.019249677658081, -0.7034215927124023, -0.38759446144104004, -0.07176733016967773, 0.24405980110168457, 0.5598869323730469, 0.8757140636444092, 1.1915411949157715, 1.5073683261871338, 1.823195457458496, 2.1390225887298584, 2.4548497200012207, 2.770676851272583, 3.0865039825439453, 3.4023311138153076, 3.71815824508667, 4.033985137939453, 4.3498125076293945, 4.665639877319336, 4.981466770172119, 5.297293663024902, 5.613121032714844, 5.928948402404785, 6.244775295257568, 6.560602188110352, 6.876429557800293, 7.192256927490234, 7.508083820343018, 7.823910713195801, 8.139738082885742, 8.455565452575684, 8.771392822265625, 9.08721923828125, 9.403046607971191]}, "gradients/encoder.encoder.layers.21.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 2.0, 1.0, 5.0, 4.0, 1.0, 9.0, 8.0, 4.0, 5.0, 10.0, 14.0, 11.0, 18.0, 26.0, 47.0, 52.0, 83.0, 99.0, 214.0, 370.0, 736.0, 1884.0, 5449.0, 56606.0, 4106014.0, 16871.0, 3105.0, 1114.0, 567.0, 321.0, 190.0, 156.0, 91.0, 58.0, 32.0, 23.0, 21.0, 11.0, 12.0, 5.0, 4.0, 9.0, 3.0, 5.0, 6.0, 8.0, 4.0, 1.0, 2.0, 3.0, 3.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-6.7578125, -6.5347900390625, -6.311767578125, -6.0887451171875, -5.86572265625, -5.6427001953125, -5.419677734375, -5.1966552734375, -4.9736328125, -4.7506103515625, -4.527587890625, -4.3045654296875, -4.08154296875, -3.8585205078125, -3.635498046875, -3.4124755859375, -3.189453125, -2.9664306640625, -2.743408203125, -2.5203857421875, -2.29736328125, -2.0743408203125, -1.851318359375, -1.6282958984375, -1.4052734375, -1.1822509765625, -0.959228515625, -0.7362060546875, -0.51318359375, -0.2901611328125, -0.067138671875, 0.1558837890625, 0.37890625, 0.6019287109375, 0.824951171875, 1.0479736328125, 1.27099609375, 1.4940185546875, 1.717041015625, 1.9400634765625, 2.1630859375, 2.3861083984375, 2.609130859375, 2.8321533203125, 3.05517578125, 3.2781982421875, 3.501220703125, 3.7242431640625, 3.947265625, 4.1702880859375, 4.393310546875, 4.6163330078125, 4.83935546875, 5.0623779296875, 5.285400390625, 5.5084228515625, 5.7314453125, 5.9544677734375, 6.177490234375, 6.4005126953125, 6.62353515625, 6.8465576171875, 7.069580078125, 7.2926025390625, 7.515625]}, "gradients/encoder.encoder.layers.21.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 2.0, 6.0, 2.0, 6.0, 19.0, 20.0, 31.0, 72.0, 237.0, 313.0, 191.0, 52.0, 26.0, 9.0, 9.0, 3.0, 7.0, 1.0, 1.0, 2.0, 1.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.45068359375, -0.435821533203125, -0.42095947265625, -0.406097412109375, -0.3912353515625, -0.376373291015625, -0.36151123046875, -0.346649169921875, -0.331787109375, -0.316925048828125, -0.30206298828125, -0.287200927734375, -0.2723388671875, -0.257476806640625, -0.24261474609375, -0.227752685546875, -0.212890625, -0.198028564453125, -0.18316650390625, -0.168304443359375, -0.1534423828125, -0.138580322265625, -0.12371826171875, -0.108856201171875, -0.093994140625, -0.079132080078125, -0.06427001953125, -0.049407958984375, -0.0345458984375, -0.019683837890625, -0.00482177734375, 0.010040283203125, 0.02490234375, 0.039764404296875, 0.05462646484375, 0.069488525390625, 0.0843505859375, 0.099212646484375, 0.11407470703125, 0.128936767578125, 0.143798828125, 0.158660888671875, 0.17352294921875, 0.188385009765625, 0.2032470703125, 0.218109130859375, 0.23297119140625, 0.247833251953125, 0.2626953125, 0.277557373046875, 0.29241943359375, 0.307281494140625, 0.3221435546875, 0.337005615234375, 0.35186767578125, 0.366729736328125, 0.381591796875, 0.396453857421875, 0.41131591796875, 0.426177978515625, 0.4410400390625, 0.455902099609375, 0.47076416015625, 0.485626220703125, 0.50048828125]}, "gradients/encoder.encoder.layers.21.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 3.0, 6.0, 3.0, 11.0, 5.0, 10.0, 19.0, 21.0, 44.0, 62.0, 151.0, 487.0, 3568.0, 1929681.0, 2255723.0, 3576.0, 561.0, 160.0, 70.0, 50.0, 21.0, 17.0, 7.0, 6.0, 7.0, 3.0, 4.0, 1.0, 4.0, 1.0, 4.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-6.96875, -6.7637939453125, -6.558837890625, -6.3538818359375, -6.14892578125, -5.9439697265625, -5.739013671875, -5.5340576171875, -5.3291015625, -5.1241455078125, -4.919189453125, -4.7142333984375, -4.50927734375, -4.3043212890625, -4.099365234375, -3.8944091796875, -3.689453125, -3.4844970703125, -3.279541015625, -3.0745849609375, -2.86962890625, -2.6646728515625, -2.459716796875, -2.2547607421875, -2.0498046875, -1.8448486328125, -1.639892578125, -1.4349365234375, -1.22998046875, -1.0250244140625, -0.820068359375, -0.6151123046875, -0.41015625, -0.2052001953125, -0.000244140625, 0.2047119140625, 0.40966796875, 0.6146240234375, 0.819580078125, 1.0245361328125, 1.2294921875, 1.4344482421875, 1.639404296875, 1.8443603515625, 2.04931640625, 2.2542724609375, 2.459228515625, 2.6641845703125, 2.869140625, 3.0740966796875, 3.279052734375, 3.4840087890625, 3.68896484375, 3.8939208984375, 4.098876953125, 4.3038330078125, 4.5087890625, 4.7137451171875, 4.918701171875, 5.1236572265625, 5.32861328125, 5.5335693359375, 5.738525390625, 5.9434814453125, 6.1484375]}, "gradients/encoder.encoder.layers.21.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 5.0, 5.0, 1.0, 5.0, 5.0, 11.0, 12.0, 17.0, 38.0, 40.0, 66.0, 116.0, 333.0, 922.0, 1735.0, 388.0, 141.0, 74.0, 48.0, 32.0, 18.0, 16.0, 9.0, 10.0, 7.0, 3.0, 6.0, 6.0, 3.0, 2.0, 2.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.392578125, -0.3795814514160156, -0.36658477783203125, -0.3535881042480469, -0.3405914306640625, -0.3275947570800781, -0.31459808349609375, -0.3016014099121094, -0.288604736328125, -0.2756080627441406, -0.26261138916015625, -0.24961471557617188, -0.2366180419921875, -0.22362136840820312, -0.21062469482421875, -0.19762802124023438, -0.18463134765625, -0.17163467407226562, -0.15863800048828125, -0.14564132690429688, -0.1326446533203125, -0.11964797973632812, -0.10665130615234375, -0.09365463256835938, -0.080657958984375, -0.06766128540039062, -0.05466461181640625, -0.041667938232421875, -0.0286712646484375, -0.015674591064453125, -0.00267791748046875, 0.010318756103515625, 0.0233154296875, 0.036312103271484375, 0.04930877685546875, 0.062305450439453125, 0.0753021240234375, 0.08829879760742188, 0.10129547119140625, 0.11429214477539062, 0.127288818359375, 0.14028549194335938, 0.15328216552734375, 0.16627883911132812, 0.1792755126953125, 0.19227218627929688, 0.20526885986328125, 0.21826553344726562, 0.23126220703125, 0.24425888061523438, 0.25725555419921875, 0.2702522277832031, 0.2832489013671875, 0.2962455749511719, 0.30924224853515625, 0.3222389221191406, 0.335235595703125, 0.3482322692871094, 0.36122894287109375, 0.3742256164550781, 0.3872222900390625, 0.4002189636230469, 0.41321563720703125, 0.4262123107910156, 0.439208984375]}, "gradients/encoder.encoder.layers.21.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 1.0, 2.0, 0.0, 1.0, 4.0, 4.0, 15.0, 19.0, 87.0, 446.0, 326.0, 78.0, 14.0, 12.0, 4.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.9233076572418213, -2.795870304107666, -2.6684327125549316, -2.5409953594207764, -2.413558006286621, -2.2861204147338867, -2.1586830615997314, -2.031245708465576, -1.9038081169128418, -1.776370644569397, -1.6489331722259521, -1.5214958190917969, -1.394058346748352, -1.2666208744049072, -1.139183521270752, -1.0117460489273071, -0.8843085765838623, -0.7568711042404175, -0.6294336915016174, -0.5019962787628174, -0.37455880641937256, -0.24712133407592773, -0.11968392133712769, 0.007753491401672363, 0.1351909637451172, 0.2626284062862396, 0.39006584882736206, 0.5175032615661621, 0.6449407339096069, 0.7723782062530518, 0.8998156189918518, 1.0272530317306519, 1.1546907424926758, 1.2821282148361206, 1.4095656871795654, 1.5370030403137207, 1.6644405126571655, 1.7918779850006104, 1.9193153381347656, 2.0467529296875, 2.1741902828216553, 2.3016276359558105, 2.429065227508545, 2.5565025806427, 2.6839399337768555, 2.81137752532959, 2.938814878463745, 3.0662522315979004, 3.1936898231506348, 3.32112717628479, 3.4485647678375244, 3.5760021209716797, 3.703439712524414, 3.8308770656585693, 3.9583144187927246, 4.085752010345459, 4.213189125061035, 4.3406267166137695, 4.468063831329346, 4.59550142288208, 4.7229390144348145, 4.850376129150391, 4.977813720703125, 5.105251312255859, 5.232688903808594]}, "gradients/encoder.encoder.layers.21.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 1.0, 2.0, 1.0, 2.0, 3.0, 6.0, 7.0, 4.0, 10.0, 8.0, 12.0, 17.0, 34.0, 25.0, 41.0, 45.0, 74.0, 65.0, 64.0, 72.0, 72.0, 71.0, 65.0, 67.0, 54.0, 46.0, 39.0, 22.0, 21.0, 18.0, 8.0, 9.0, 5.0, 5.0, 9.0, 2.0, 1.0, 2.0, 4.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.55598783493042, -1.502063512802124, -1.4481391906738281, -1.3942149877548218, -1.3402906656265259, -1.28636634349823, -1.2324421405792236, -1.1785178184509277, -1.1245934963226318, -1.070669174194336, -1.01674485206604, -0.9628206491470337, -0.9088963270187378, -0.8549720048904419, -0.8010477423667908, -0.7471234798431396, -0.6931991577148438, -0.6392748355865479, -0.5853505730628967, -0.5314263105392456, -0.4775019884109497, -0.4235776960849762, -0.3696534037590027, -0.3157291114330292, -0.26180481910705566, -0.20788052678108215, -0.15395623445510864, -0.10003194212913513, -0.04610764980316162, 0.00781664252281189, 0.0617409348487854, 0.11566522717475891, 0.16958951950073242, 0.22351381182670593, 0.27743810415267944, 0.33136239647865295, 0.38528668880462646, 0.4392109811306, 0.4931352734565735, 0.5470595359802246, 0.6009838581085205, 0.6549081802368164, 0.7088324427604675, 0.7627567052841187, 0.8166810274124146, 0.8706053495407104, 0.9245296120643616, 0.9784538745880127, 1.0323781967163086, 1.0863025188446045, 1.1402268409729004, 1.1941510438919067, 1.2480753660202026, 1.3019996881484985, 1.3559238910675049, 1.4098482131958008, 1.4637725353240967, 1.5176968574523926, 1.5716211795806885, 1.6255453824996948, 1.6794697046279907, 1.7333940267562866, 1.787318229675293, 1.8412425518035889, 1.8951668739318848]}, "gradients/encoder.encoder.layers.21.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 2.0, 0.0, 2.0, 7.0, 7.0, 2.0, 10.0, 9.0, 17.0, 30.0, 35.0, 57.0, 94.0, 142.0, 227.0, 475.0, 1068.0, 2968.0, 16447.0, 844299.0, 171051.0, 8016.0, 1917.0, 742.0, 358.0, 205.0, 141.0, 76.0, 50.0, 27.0, 22.0, 18.0, 10.0, 11.0, 5.0, 4.0, 2.0, 4.0, 5.0, 1.0, 1.0, 3.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0], "bins": [-3.474609375, -3.37188720703125, -3.2691650390625, -3.16644287109375, -3.063720703125, -2.96099853515625, -2.8582763671875, -2.75555419921875, -2.65283203125, -2.55010986328125, -2.4473876953125, -2.34466552734375, -2.241943359375, -2.13922119140625, -2.0364990234375, -1.93377685546875, -1.8310546875, -1.72833251953125, -1.6256103515625, -1.52288818359375, -1.420166015625, -1.31744384765625, -1.2147216796875, -1.11199951171875, -1.00927734375, -0.90655517578125, -0.8038330078125, -0.70111083984375, -0.598388671875, -0.49566650390625, -0.3929443359375, -0.29022216796875, -0.1875, -0.08477783203125, 0.0179443359375, 0.12066650390625, 0.223388671875, 0.32611083984375, 0.4288330078125, 0.53155517578125, 0.63427734375, 0.73699951171875, 0.8397216796875, 0.94244384765625, 1.045166015625, 1.14788818359375, 1.2506103515625, 1.35333251953125, 1.4560546875, 1.55877685546875, 1.6614990234375, 1.76422119140625, 1.866943359375, 1.96966552734375, 2.0723876953125, 2.17510986328125, 2.27783203125, 2.38055419921875, 2.4832763671875, 2.58599853515625, 2.688720703125, 2.79144287109375, 2.8941650390625, 2.99688720703125, 3.099609375]}, "gradients/encoder.encoder.layers.21.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 2.0, 4.0, 11.0, 8.0, 14.0, 38.0, 84.0, 242.0, 286.0, 182.0, 80.0, 30.0, 13.0, 4.0, 5.0, 1.0, 2.0, 1.0, 0.0, 3.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.405517578125, -0.3917083740234375, -0.377899169921875, -0.3640899658203125, -0.35028076171875, -0.3364715576171875, -0.322662353515625, -0.3088531494140625, -0.2950439453125, -0.2812347412109375, -0.267425537109375, -0.2536163330078125, -0.23980712890625, -0.2259979248046875, -0.212188720703125, -0.1983795166015625, -0.1845703125, -0.1707611083984375, -0.156951904296875, -0.1431427001953125, -0.12933349609375, -0.1155242919921875, -0.101715087890625, -0.0879058837890625, -0.0740966796875, -0.0602874755859375, -0.046478271484375, -0.0326690673828125, -0.01885986328125, -0.0050506591796875, 0.008758544921875, 0.0225677490234375, 0.036376953125, 0.0501861572265625, 0.063995361328125, 0.0778045654296875, 0.09161376953125, 0.1054229736328125, 0.119232177734375, 0.1330413818359375, 0.1468505859375, 0.1606597900390625, 0.174468994140625, 0.1882781982421875, 0.20208740234375, 0.2158966064453125, 0.229705810546875, 0.2435150146484375, 0.25732421875, 0.2711334228515625, 0.284942626953125, 0.2987518310546875, 0.31256103515625, 0.3263702392578125, 0.340179443359375, 0.3539886474609375, 0.3677978515625, 0.3816070556640625, 0.395416259765625, 0.4092254638671875, 0.42303466796875, 0.4368438720703125, 0.450653076171875, 0.4644622802734375, 0.478271484375]}, "gradients/encoder.encoder.layers.21.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 0.0, 1.0, 5.0, 5.0, 7.0, 8.0, 9.0, 10.0, 15.0, 31.0, 31.0, 54.0, 67.0, 103.0, 132.0, 190.0, 306.0, 497.0, 952.0, 2168.0, 6686.0, 28392.0, 198540.0, 676400.0, 106607.0, 18510.0, 4849.0, 1776.0, 797.0, 461.0, 267.0, 187.0, 142.0, 83.0, 75.0, 50.0, 52.0, 29.0, 16.0, 11.0, 11.0, 13.0, 7.0, 3.0, 5.0, 2.0, 2.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0], "bins": [-1.6455078125, -1.5976104736328125, -1.549713134765625, -1.5018157958984375, -1.45391845703125, -1.4060211181640625, -1.358123779296875, -1.3102264404296875, -1.2623291015625, -1.2144317626953125, -1.166534423828125, -1.1186370849609375, -1.07073974609375, -1.0228424072265625, -0.974945068359375, -0.9270477294921875, -0.879150390625, -0.8312530517578125, -0.783355712890625, -0.7354583740234375, -0.68756103515625, -0.6396636962890625, -0.591766357421875, -0.5438690185546875, -0.4959716796875, -0.4480743408203125, -0.400177001953125, -0.3522796630859375, -0.30438232421875, -0.2564849853515625, -0.208587646484375, -0.1606903076171875, -0.11279296875, -0.0648956298828125, -0.016998291015625, 0.0308990478515625, 0.07879638671875, 0.1266937255859375, 0.174591064453125, 0.2224884033203125, 0.2703857421875, 0.3182830810546875, 0.366180419921875, 0.4140777587890625, 0.46197509765625, 0.5098724365234375, 0.557769775390625, 0.6056671142578125, 0.653564453125, 0.7014617919921875, 0.749359130859375, 0.7972564697265625, 0.84515380859375, 0.8930511474609375, 0.940948486328125, 0.9888458251953125, 1.0367431640625, 1.0846405029296875, 1.132537841796875, 1.1804351806640625, 1.22833251953125, 1.2762298583984375, 1.324127197265625, 1.3720245361328125, 1.419921875]}, "gradients/encoder.encoder.layers.21.attention.v_proj.bias": {"_type": "histogram", "values": [3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 3.0, 4.0, 1.0, 10.0, 3.0, 5.0, 4.0, 8.0, 16.0, 11.0, 10.0, 20.0, 20.0, 29.0, 14.0, 28.0, 24.0, 28.0, 42.0, 35.0, 50.0, 39.0, 51.0, 44.0, 53.0, 40.0, 35.0, 49.0, 44.0, 43.0, 34.0, 31.0, 28.0, 20.0, 25.0, 14.0, 18.0, 12.0, 13.0, 14.0, 12.0, 5.0, 2.0, 6.0, 4.0, 3.0, 3.0, 1.0, 4.0, 3.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.8974609375, -0.8691329956054688, -0.8408050537109375, -0.8124771118164062, -0.784149169921875, -0.7558212280273438, -0.7274932861328125, -0.6991653442382812, -0.67083740234375, -0.6425094604492188, -0.6141815185546875, -0.5858535766601562, -0.557525634765625, -0.5291976928710938, -0.5008697509765625, -0.47254180908203125, -0.4442138671875, -0.41588592529296875, -0.3875579833984375, -0.35923004150390625, -0.330902099609375, -0.30257415771484375, -0.2742462158203125, -0.24591827392578125, -0.21759033203125, -0.18926239013671875, -0.1609344482421875, -0.13260650634765625, -0.104278564453125, -0.07595062255859375, -0.0476226806640625, -0.01929473876953125, 0.009033203125, 0.03736114501953125, 0.0656890869140625, 0.09401702880859375, 0.122344970703125, 0.15067291259765625, 0.1790008544921875, 0.20732879638671875, 0.23565673828125, 0.26398468017578125, 0.2923126220703125, 0.32064056396484375, 0.348968505859375, 0.37729644775390625, 0.4056243896484375, 0.43395233154296875, 0.4622802734375, 0.49060821533203125, 0.5189361572265625, 0.5472640991210938, 0.575592041015625, 0.6039199829101562, 0.6322479248046875, 0.6605758666992188, 0.68890380859375, 0.7172317504882812, 0.7455596923828125, 0.7738876342773438, 0.802215576171875, 0.8305435180664062, 0.8588714599609375, 0.8871994018554688, 0.91552734375]}, "gradients/encoder.encoder.layers.21.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 3.0, 2.0, 1.0, 0.0, 0.0, 2.0, 0.0, 2.0, 1.0, 13.0, 10.0, 21.0, 27.0, 98.0, 171.0, 470.0, 2036.0, 62313.0, 975164.0, 6799.0, 900.0, 291.0, 117.0, 48.0, 28.0, 22.0, 9.0, 7.0, 8.0, 4.0, 2.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.19140625, -2.12347412109375, -2.0555419921875, -1.98760986328125, -1.919677734375, -1.85174560546875, -1.7838134765625, -1.71588134765625, -1.64794921875, -1.58001708984375, -1.5120849609375, -1.44415283203125, -1.376220703125, -1.30828857421875, -1.2403564453125, -1.17242431640625, -1.1044921875, -1.03656005859375, -0.9686279296875, -0.90069580078125, -0.832763671875, -0.76483154296875, -0.6968994140625, -0.62896728515625, -0.56103515625, -0.49310302734375, -0.4251708984375, -0.35723876953125, -0.289306640625, -0.22137451171875, -0.1534423828125, -0.08551025390625, -0.017578125, 0.05035400390625, 0.1182861328125, 0.18621826171875, 0.254150390625, 0.32208251953125, 0.3900146484375, 0.45794677734375, 0.52587890625, 0.59381103515625, 0.6617431640625, 0.72967529296875, 0.797607421875, 0.86553955078125, 0.9334716796875, 1.00140380859375, 1.0693359375, 1.13726806640625, 1.2052001953125, 1.27313232421875, 1.341064453125, 1.40899658203125, 1.4769287109375, 1.54486083984375, 1.61279296875, 1.68072509765625, 1.7486572265625, 1.81658935546875, 1.884521484375, 1.95245361328125, 2.0203857421875, 2.08831787109375, 2.15625]}, "gradients/encoder.encoder.layers.21.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 3.0, 1.0, 3.0, 2.0, 2.0, 3.0, 1.0, 8.0, 10.0, 11.0, 19.0, 12.0, 19.0, 28.0, 42.0, 63.0, 77.0, 122.0, 190.0, 101.0, 67.0, 62.0, 39.0, 32.0, 16.0, 14.0, 16.0, 7.0, 9.0, 11.0, 1.0, 7.0, 4.0, 1.0, 0.0, 4.0, 2.0, 1.0, 0.0, 0.0, 2.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.00020933151245117188, -0.00020265765488147736, -0.00019598379731178284, -0.00018930993974208832, -0.0001826360821723938, -0.00017596222460269928, -0.00016928836703300476, -0.00016261450946331024, -0.00015594065189361572, -0.0001492667943239212, -0.00014259293675422668, -0.00013591907918453217, -0.00012924522161483765, -0.00012257136404514313, -0.00011589750647544861, -0.00010922364890575409, -0.00010254979133605957, -9.587593376636505e-05, -8.920207619667053e-05, -8.252821862697601e-05, -7.58543610572815e-05, -6.918050348758698e-05, -6.250664591789246e-05, -5.583278834819794e-05, -4.915893077850342e-05, -4.24850732088089e-05, -3.581121563911438e-05, -2.913735806941986e-05, -2.2463500499725342e-05, -1.5789642930030823e-05, -9.115785360336304e-06, -2.4419277906417847e-06, 4.231929779052734e-06, 1.0905787348747253e-05, 1.7579644918441772e-05, 2.425350248813629e-05, 3.092736005783081e-05, 3.760121762752533e-05, 4.427507519721985e-05, 5.094893276691437e-05, 5.762279033660889e-05, 6.42966479063034e-05, 7.097050547599792e-05, 7.764436304569244e-05, 8.431822061538696e-05, 9.099207818508148e-05, 9.7665935754776e-05, 0.00010433979332447052, 0.00011101365089416504, 0.00011768750846385956, 0.00012436136603355408, 0.0001310352236032486, 0.00013770908117294312, 0.00014438293874263763, 0.00015105679631233215, 0.00015773065388202667, 0.0001644045114517212, 0.0001710783690214157, 0.00017775222659111023, 0.00018442608416080475, 0.00019109994173049927, 0.0001977737993001938, 0.0002044476568698883, 0.00021112151443958282, 0.00021779537200927734]}, "gradients/encoder.encoder.layers.21.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 2.0, 1.0, 3.0, 1.0, 4.0, 2.0, 3.0, 6.0, 6.0, 10.0, 9.0, 12.0, 13.0, 25.0, 27.0, 53.0, 64.0, 91.0, 144.0, 307.0, 627.0, 1626.0, 5751.0, 38465.0, 714386.0, 262820.0, 18314.0, 3552.0, 1163.0, 478.0, 226.0, 118.0, 77.0, 38.0, 28.0, 34.0, 14.0, 19.0, 13.0, 6.0, 6.0, 4.0, 7.0, 5.0, 3.0, 2.0, 0.0, 1.0, 1.0, 0.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.109375, -1.07452392578125, -1.0396728515625, -1.00482177734375, -0.969970703125, -0.93511962890625, -0.9002685546875, -0.86541748046875, -0.83056640625, -0.79571533203125, -0.7608642578125, -0.72601318359375, -0.691162109375, -0.65631103515625, -0.6214599609375, -0.58660888671875, -0.5517578125, -0.51690673828125, -0.4820556640625, -0.44720458984375, -0.412353515625, -0.37750244140625, -0.3426513671875, -0.30780029296875, -0.27294921875, -0.23809814453125, -0.2032470703125, -0.16839599609375, -0.133544921875, -0.09869384765625, -0.0638427734375, -0.02899169921875, 0.005859375, 0.04071044921875, 0.0755615234375, 0.11041259765625, 0.145263671875, 0.18011474609375, 0.2149658203125, 0.24981689453125, 0.28466796875, 0.31951904296875, 0.3543701171875, 0.38922119140625, 0.424072265625, 0.45892333984375, 0.4937744140625, 0.52862548828125, 0.5634765625, 0.59832763671875, 0.6331787109375, 0.66802978515625, 0.702880859375, 0.73773193359375, 0.7725830078125, 0.80743408203125, 0.84228515625, 0.87713623046875, 0.9119873046875, 0.94683837890625, 0.981689453125, 1.01654052734375, 1.0513916015625, 1.08624267578125, 1.12109375]}, "gradients/encoder.encoder.layers.21.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 2.0, 0.0, 2.0, 3.0, 6.0, 5.0, 5.0, 6.0, 9.0, 15.0, 22.0, 24.0, 28.0, 35.0, 45.0, 49.0, 83.0, 92.0, 104.0, 100.0, 88.0, 63.0, 48.0, 37.0, 34.0, 28.0, 18.0, 8.0, 9.0, 9.0, 8.0, 6.0, 3.0, 5.0, 1.0, 1.0, 1.0, 1.0, 3.0, 2.0, 2.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.6201171875, -0.6022491455078125, -0.584381103515625, -0.5665130615234375, -0.54864501953125, -0.5307769775390625, -0.512908935546875, -0.4950408935546875, -0.4771728515625, -0.4593048095703125, -0.441436767578125, -0.4235687255859375, -0.40570068359375, -0.3878326416015625, -0.369964599609375, -0.3520965576171875, -0.334228515625, -0.3163604736328125, -0.298492431640625, -0.2806243896484375, -0.26275634765625, -0.2448883056640625, -0.227020263671875, -0.2091522216796875, -0.1912841796875, -0.1734161376953125, -0.155548095703125, -0.1376800537109375, -0.11981201171875, -0.1019439697265625, -0.084075927734375, -0.0662078857421875, -0.04833984375, -0.0304718017578125, -0.012603759765625, 0.0052642822265625, 0.02313232421875, 0.0410003662109375, 0.058868408203125, 0.0767364501953125, 0.0946044921875, 0.1124725341796875, 0.130340576171875, 0.1482086181640625, 0.16607666015625, 0.1839447021484375, 0.201812744140625, 0.2196807861328125, 0.237548828125, 0.2554168701171875, 0.273284912109375, 0.2911529541015625, 0.30902099609375, 0.3268890380859375, 0.344757080078125, 0.3626251220703125, 0.3804931640625, 0.3983612060546875, 0.416229248046875, 0.4340972900390625, 0.45196533203125, 0.4698333740234375, 0.487701416015625, 0.5055694580078125, 0.5234375]}, "gradients/encoder.encoder.layers.21.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 4.0, 5.0, 8.0, 57.0, 777.0, 156.0, 7.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-21.993803024291992, -20.389400482177734, -18.78499984741211, -17.180599212646484, -15.576196670532227, -13.971795082092285, -12.367393493652344, -10.762991905212402, -9.158590316772461, -7.5541887283325195, -5.949787139892578, -4.345385551452637, -2.7409839630126953, -1.136582374572754, 0.4678192138671875, 2.072220802307129, 3.6766223907470703, 5.281023979187012, 6.885425567626953, 8.489827156066895, 10.094228744506836, 11.698630332946777, 13.303031921386719, 14.90743350982666, 16.5118350982666, 18.11623764038086, 19.720638275146484, 21.32503890991211, 22.929441452026367, 24.533843994140625, 26.13824462890625, 27.742645263671875, 29.3470458984375, 30.951446533203125, 32.55584716796875, 34.16025161743164, 35.764652252197266, 37.36905288696289, 38.97345733642578, 40.577857971191406, 42.18225860595703, 43.786659240722656, 45.39105987548828, 46.99546432495117, 48.5998649597168, 50.20426559448242, 51.80867004394531, 53.41307067871094, 55.01747131347656, 56.62187194824219, 58.22627258300781, 59.8306770324707, 61.43507766723633, 63.03947830200195, 64.64388275146484, 66.24828338623047, 67.8526840209961, 69.45708465576172, 71.06148529052734, 72.66588592529297, 74.27029418945312, 75.87469482421875, 77.47909545898438, 79.08349609375, 80.68789672851562]}, "gradients/encoder.encoder.layers.21.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 2.0, 3.0, 5.0, 6.0, 2.0, 11.0, 12.0, 31.0, 40.0, 38.0, 51.0, 56.0, 83.0, 91.0, 88.0, 75.0, 93.0, 78.0, 62.0, 42.0, 41.0, 33.0, 20.0, 18.0, 13.0, 9.0, 4.0, 4.0, 2.0, 0.0, 1.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-10.905634880065918, -10.500060081481934, -10.094484329223633, -9.688909530639648, -9.283334732055664, -8.87775993347168, -8.472184181213379, -8.066609382629395, -7.661034107208252, -7.255458831787109, -6.849884033203125, -6.444308757781982, -6.03873348236084, -5.6331586837768555, -5.227583408355713, -4.82200813293457, -4.416433334350586, -4.010858058929443, -3.605283260345459, -3.1997079849243164, -2.794132947921753, -2.3885579109191895, -1.9829826354980469, -1.5774075984954834, -1.17183256149292, -0.7662574648857117, -0.3606823682785034, 0.04489278793334961, 0.4504678249359131, 0.8560428619384766, 1.2616181373596191, 1.6671931743621826, 2.0727691650390625, 2.478344202041626, 2.8839192390441895, 3.289494514465332, 3.6950695514678955, 4.100644588470459, 4.506219863891602, 4.911794662475586, 5.3173699378967285, 5.722945213317871, 6.1285200119018555, 6.534095287322998, 6.939670562744141, 7.345245361328125, 7.750820636749268, 8.15639591217041, 8.561970710754395, 8.967545509338379, 9.37312126159668, 9.778696060180664, 10.184270858764648, 10.589845657348633, 10.995421409606934, 11.400996208190918, 11.806571960449219, 12.212146759033203, 12.617722511291504, 13.023297309875488, 13.428872108459473, 13.834447860717773, 14.240022659301758, 14.645597457885742, 15.051172256469727]}, "gradients/encoder.encoder.layers.20.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 5.0, 7.0, 12.0, 24.0, 54.0, 147.0, 539.0, 32199.0, 4159905.0, 1016.0, 252.0, 71.0, 27.0, 12.0, 8.0, 6.0, 2.0, 3.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-22.421875, -21.677734375, -20.93359375, -20.189453125, -19.4453125, -18.701171875, -17.95703125, -17.212890625, -16.46875, -15.724609375, -14.98046875, -14.236328125, -13.4921875, -12.748046875, -12.00390625, -11.259765625, -10.515625, -9.771484375, -9.02734375, -8.283203125, -7.5390625, -6.794921875, -6.05078125, -5.306640625, -4.5625, -3.818359375, -3.07421875, -2.330078125, -1.5859375, -0.841796875, -0.09765625, 0.646484375, 1.390625, 2.134765625, 2.87890625, 3.623046875, 4.3671875, 5.111328125, 5.85546875, 6.599609375, 7.34375, 8.087890625, 8.83203125, 9.576171875, 10.3203125, 11.064453125, 11.80859375, 12.552734375, 13.296875, 14.041015625, 14.78515625, 15.529296875, 16.2734375, 17.017578125, 17.76171875, 18.505859375, 19.25, 19.994140625, 20.73828125, 21.482421875, 22.2265625, 22.970703125, 23.71484375, 24.458984375, 25.203125]}, "gradients/encoder.encoder.layers.20.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 3.0, 4.0, 5.0, 11.0, 20.0, 44.0, 83.0, 168.0, 244.0, 205.0, 113.0, 72.0, 15.0, 8.0, 5.0, 4.0, 3.0, 1.0, 1.0, 1.0, 0.0, 2.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.53955078125, -0.5213775634765625, -0.503204345703125, -0.4850311279296875, -0.46685791015625, -0.4486846923828125, -0.430511474609375, -0.4123382568359375, -0.3941650390625, -0.3759918212890625, -0.357818603515625, -0.3396453857421875, -0.32147216796875, -0.3032989501953125, -0.285125732421875, -0.2669525146484375, -0.248779296875, -0.2306060791015625, -0.212432861328125, -0.1942596435546875, -0.17608642578125, -0.1579132080078125, -0.139739990234375, -0.1215667724609375, -0.1033935546875, -0.0852203369140625, -0.067047119140625, -0.0488739013671875, -0.03070068359375, -0.0125274658203125, 0.005645751953125, 0.0238189697265625, 0.0419921875, 0.0601654052734375, 0.078338623046875, 0.0965118408203125, 0.11468505859375, 0.1328582763671875, 0.151031494140625, 0.1692047119140625, 0.1873779296875, 0.2055511474609375, 0.223724365234375, 0.2418975830078125, 0.26007080078125, 0.2782440185546875, 0.296417236328125, 0.3145904541015625, 0.332763671875, 0.3509368896484375, 0.369110107421875, 0.3872833251953125, 0.40545654296875, 0.4236297607421875, 0.441802978515625, 0.4599761962890625, 0.4781494140625, 0.4963226318359375, 0.514495849609375, 0.5326690673828125, 0.55084228515625, 0.5690155029296875, 0.587188720703125, 0.6053619384765625, 0.62353515625]}, "gradients/encoder.encoder.layers.20.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 5.0, 5.0, 15.0, 14.0, 24.0, 44.0, 67.0, 124.0, 310.0, 1080.0, 7145.0, 200989.0, 3960467.0, 20620.0, 2357.0, 527.0, 190.0, 117.0, 74.0, 50.0, 29.0, 14.0, 13.0, 2.0, 4.0, 1.0, 1.0, 2.0, 3.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-3.568359375, -3.460906982421875, -3.35345458984375, -3.246002197265625, -3.1385498046875, -3.031097412109375, -2.92364501953125, -2.816192626953125, -2.708740234375, -2.601287841796875, -2.49383544921875, -2.386383056640625, -2.2789306640625, -2.171478271484375, -2.06402587890625, -1.956573486328125, -1.84912109375, -1.741668701171875, -1.63421630859375, -1.526763916015625, -1.4193115234375, -1.311859130859375, -1.20440673828125, -1.096954345703125, -0.989501953125, -0.882049560546875, -0.77459716796875, -0.667144775390625, -0.5596923828125, -0.452239990234375, -0.34478759765625, -0.237335205078125, -0.1298828125, -0.022430419921875, 0.08502197265625, 0.192474365234375, 0.2999267578125, 0.407379150390625, 0.51483154296875, 0.622283935546875, 0.729736328125, 0.837188720703125, 0.94464111328125, 1.052093505859375, 1.1595458984375, 1.266998291015625, 1.37445068359375, 1.481903076171875, 1.58935546875, 1.696807861328125, 1.80426025390625, 1.911712646484375, 2.0191650390625, 2.126617431640625, 2.23406982421875, 2.341522216796875, 2.448974609375, 2.556427001953125, 2.66387939453125, 2.771331787109375, 2.8787841796875, 2.986236572265625, 3.09368896484375, 3.201141357421875, 3.30859375]}, "gradients/encoder.encoder.layers.20.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 3.0, 3.0, 4.0, 11.0, 11.0, 10.0, 12.0, 22.0, 34.0, 106.0, 262.0, 1172.0, 1838.0, 318.0, 125.0, 54.0, 30.0, 20.0, 18.0, 7.0, 8.0, 5.0, 4.0, 3.0, 2.0, 4.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.349609375, -0.33129119873046875, -0.3129730224609375, -0.29465484619140625, -0.276336669921875, -0.25801849365234375, -0.2397003173828125, -0.22138214111328125, -0.20306396484375, -0.18474578857421875, -0.1664276123046875, -0.14810943603515625, -0.129791259765625, -0.11147308349609375, -0.0931549072265625, -0.07483673095703125, -0.0565185546875, -0.03820037841796875, -0.0198822021484375, -0.00156402587890625, 0.016754150390625, 0.03507232666015625, 0.0533905029296875, 0.07170867919921875, 0.09002685546875, 0.10834503173828125, 0.1266632080078125, 0.14498138427734375, 0.163299560546875, 0.18161773681640625, 0.1999359130859375, 0.21825408935546875, 0.236572265625, 0.25489044189453125, 0.2732086181640625, 0.29152679443359375, 0.309844970703125, 0.32816314697265625, 0.3464813232421875, 0.36479949951171875, 0.38311767578125, 0.40143585205078125, 0.4197540283203125, 0.43807220458984375, 0.456390380859375, 0.47470855712890625, 0.4930267333984375, 0.5113449096679688, 0.5296630859375, 0.5479812622070312, 0.5662994384765625, 0.5846176147460938, 0.602935791015625, 0.6212539672851562, 0.6395721435546875, 0.6578903198242188, 0.67620849609375, 0.6945266723632812, 0.7128448486328125, 0.7311630249023438, 0.749481201171875, 0.7677993774414062, 0.7861175537109375, 0.8044357299804688, 0.82275390625]}, "gradients/encoder.encoder.layers.20.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 3.0, 7.0, 4.0, 11.0, 26.0, 46.0, 87.0, 187.0, 190.0, 185.0, 113.0, 76.0, 33.0, 13.0, 12.0, 6.0, 2.0, 4.0, 1.0, 2.0, 2.0, 0.0, 4.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.0713757276535034, -1.0100514888763428, -0.9487271308898926, -0.8874028325080872, -0.8260785341262817, -0.7647542357444763, -0.7034299373626709, -0.6421056389808655, -0.5807813405990601, -0.5194570422172546, -0.4581327438354492, -0.3968084454536438, -0.3354841470718384, -0.27415984869003296, -0.21283555030822754, -0.15151125192642212, -0.0901869535446167, -0.02886265516281128, 0.03246164321899414, 0.09378594160079956, 0.15511023998260498, 0.2164345383644104, 0.2777588367462158, 0.33908313512802124, 0.40040743350982666, 0.4617317318916321, 0.5230560302734375, 0.5843803286552429, 0.6457046270370483, 0.7070289254188538, 0.7683532238006592, 0.8296775221824646, 0.8910017013549805, 0.9523259997367859, 1.0136502981185913, 1.074974536895752, 1.1362988948822021, 1.1976232528686523, 1.258947491645813, 1.3202717304229736, 1.3815960884094238, 1.442920446395874, 1.5042446851730347, 1.5655689239501953, 1.6268932819366455, 1.6882176399230957, 1.7495418787002563, 1.810866117477417, 1.8721904754638672, 1.9335148334503174, 1.994839072227478, 2.0561633110046387, 2.117487668991089, 2.178812026977539, 2.24013614654541, 2.3014605045318604, 2.3627848625183105, 2.4241092205047607, 2.485433578491211, 2.546757698059082, 2.6080820560455322, 2.6694064140319824, 2.7307305335998535, 2.7920548915863037, 2.853379249572754]}, "gradients/encoder.encoder.layers.20.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 1.0, 5.0, 2.0, 1.0, 2.0, 11.0, 15.0, 12.0, 20.0, 23.0, 19.0, 28.0, 29.0, 33.0, 37.0, 44.0, 43.0, 50.0, 60.0, 51.0, 48.0, 53.0, 51.0, 32.0, 49.0, 42.0, 42.0, 44.0, 33.0, 21.0, 21.0, 24.0, 17.0, 14.0, 8.0, 5.0, 6.0, 3.0, 6.0, 2.0, 4.0, 1.0, 2.0, 0.0, 0.0, 0.0, 2.0, 1.0], "bins": [-1.4748361110687256, -1.4331227540969849, -1.3914093971252441, -1.3496960401535034, -1.3079826831817627, -1.266269326210022, -1.2245559692382812, -1.1828426122665405, -1.1411292552947998, -1.099415898323059, -1.0577025413513184, -1.0159891843795776, -0.9742758274078369, -0.9325624704360962, -0.8908491134643555, -0.8491357564926147, -0.8074224591255188, -0.7657091021537781, -0.7239957451820374, -0.6822823882102966, -0.6405690312385559, -0.5988556742668152, -0.5571423768997192, -0.5154290199279785, -0.4737156331539154, -0.4320022761821747, -0.39028891921043396, -0.3485755920410156, -0.3068622350692749, -0.2651488780975342, -0.22343552112579346, -0.18172216415405273, -0.140008807182312, -0.09829545021057129, -0.05658210068941116, -0.014868751168251038, 0.026844605803489685, 0.06855796277523041, 0.11027130484580994, 0.15198466181755066, 0.19369801878929138, 0.2354113757610321, 0.2771247327327728, 0.31883805990219116, 0.3605514168739319, 0.4022647738456726, 0.44397813081741333, 0.48569148778915405, 0.5274048447608948, 0.5691182017326355, 0.6108315587043762, 0.6525449156761169, 0.6942582726478577, 0.7359716296195984, 0.7776849269866943, 0.8193982839584351, 0.8611116409301758, 0.9028249979019165, 0.9445383548736572, 0.986251711845398, 1.0279650688171387, 1.0696784257888794, 1.1113917827606201, 1.1531051397323608, 1.1948184967041016]}, "gradients/encoder.encoder.layers.20.attention.out_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 5.0, 7.0, 5.0, 9.0, 8.0, 8.0, 15.0, 28.0, 34.0, 54.0, 83.0, 172.0, 332.0, 654.0, 1738.0, 6733.0, 198019.0, 821593.0, 14562.0, 2705.0, 865.0, 424.0, 208.0, 119.0, 56.0, 44.0, 26.0, 12.0, 7.0, 9.0, 8.0, 5.0, 6.0, 6.0, 2.0, 1.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0], "bins": [-3.44140625, -3.334442138671875, -3.22747802734375, -3.120513916015625, -3.0135498046875, -2.906585693359375, -2.79962158203125, -2.692657470703125, -2.585693359375, -2.478729248046875, -2.37176513671875, -2.264801025390625, -2.1578369140625, -2.050872802734375, -1.94390869140625, -1.836944580078125, -1.72998046875, -1.623016357421875, -1.51605224609375, -1.409088134765625, -1.3021240234375, -1.195159912109375, -1.08819580078125, -0.981231689453125, -0.874267578125, -0.767303466796875, -0.66033935546875, -0.553375244140625, -0.4464111328125, -0.339447021484375, -0.23248291015625, -0.125518798828125, -0.0185546875, 0.088409423828125, 0.19537353515625, 0.302337646484375, 0.4093017578125, 0.516265869140625, 0.62322998046875, 0.730194091796875, 0.837158203125, 0.944122314453125, 1.05108642578125, 1.158050537109375, 1.2650146484375, 1.371978759765625, 1.47894287109375, 1.585906982421875, 1.69287109375, 1.799835205078125, 1.90679931640625, 2.013763427734375, 2.1207275390625, 2.227691650390625, 2.33465576171875, 2.441619873046875, 2.548583984375, 2.655548095703125, 2.76251220703125, 2.869476318359375, 2.9764404296875, 3.083404541015625, 3.19036865234375, 3.297332763671875, 3.404296875]}, "gradients/encoder.encoder.layers.20.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 2.0, 4.0, 2.0, 11.0, 16.0, 49.0, 120.0, 201.0, 236.0, 169.0, 108.0, 56.0, 17.0, 5.0, 7.0, 3.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.52587890625, -0.5086288452148438, -0.4913787841796875, -0.47412872314453125, -0.456878662109375, -0.43962860107421875, -0.4223785400390625, -0.40512847900390625, -0.38787841796875, -0.37062835693359375, -0.3533782958984375, -0.33612823486328125, -0.318878173828125, -0.30162811279296875, -0.2843780517578125, -0.26712799072265625, -0.2498779296875, -0.23262786865234375, -0.2153778076171875, -0.19812774658203125, -0.180877685546875, -0.16362762451171875, -0.1463775634765625, -0.12912750244140625, -0.11187744140625, -0.09462738037109375, -0.0773773193359375, -0.06012725830078125, -0.042877197265625, -0.02562713623046875, -0.0083770751953125, 0.00887298583984375, 0.026123046875, 0.04337310791015625, 0.0606231689453125, 0.07787322998046875, 0.095123291015625, 0.11237335205078125, 0.1296234130859375, 0.14687347412109375, 0.16412353515625, 0.18137359619140625, 0.1986236572265625, 0.21587371826171875, 0.233123779296875, 0.25037384033203125, 0.2676239013671875, 0.28487396240234375, 0.3021240234375, 0.31937408447265625, 0.3366241455078125, 0.35387420654296875, 0.371124267578125, 0.38837432861328125, 0.4056243896484375, 0.42287445068359375, 0.44012451171875, 0.45737457275390625, 0.4746246337890625, 0.49187469482421875, 0.509124755859375, 0.5263748168945312, 0.5436248779296875, 0.5608749389648438, 0.578125]}, "gradients/encoder.encoder.layers.20.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0, 4.0, 3.0, 1.0, 5.0, 7.0, 6.0, 9.0, 11.0, 16.0, 37.0, 63.0, 95.0, 185.0, 341.0, 713.0, 1943.0, 9001.0, 106353.0, 823657.0, 94625.0, 8264.0, 1873.0, 655.0, 319.0, 147.0, 82.0, 55.0, 32.0, 21.0, 8.0, 11.0, 7.0, 4.0, 6.0, 2.0, 2.0, 1.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-2.296875, -2.226165771484375, -2.15545654296875, -2.084747314453125, -2.0140380859375, -1.943328857421875, -1.87261962890625, -1.801910400390625, -1.731201171875, -1.660491943359375, -1.58978271484375, -1.519073486328125, -1.4483642578125, -1.377655029296875, -1.30694580078125, -1.236236572265625, -1.16552734375, -1.094818115234375, -1.02410888671875, -0.953399658203125, -0.8826904296875, -0.811981201171875, -0.74127197265625, -0.670562744140625, -0.599853515625, -0.529144287109375, -0.45843505859375, -0.387725830078125, -0.3170166015625, -0.246307373046875, -0.17559814453125, -0.104888916015625, -0.0341796875, 0.036529541015625, 0.10723876953125, 0.177947998046875, 0.2486572265625, 0.319366455078125, 0.39007568359375, 0.460784912109375, 0.531494140625, 0.602203369140625, 0.67291259765625, 0.743621826171875, 0.8143310546875, 0.885040283203125, 0.95574951171875, 1.026458740234375, 1.09716796875, 1.167877197265625, 1.23858642578125, 1.309295654296875, 1.3800048828125, 1.450714111328125, 1.52142333984375, 1.592132568359375, 1.662841796875, 1.733551025390625, 1.80426025390625, 1.874969482421875, 1.9456787109375, 2.016387939453125, 2.08709716796875, 2.157806396484375, 2.228515625]}, "gradients/encoder.encoder.layers.20.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 3.0, 3.0, 7.0, 0.0, 7.0, 2.0, 5.0, 3.0, 13.0, 15.0, 21.0, 23.0, 28.0, 41.0, 44.0, 56.0, 68.0, 59.0, 80.0, 61.0, 64.0, 69.0, 61.0, 54.0, 49.0, 39.0, 42.0, 29.0, 20.0, 17.0, 4.0, 5.0, 2.0, 3.0, 3.0, 7.0, 5.0, 2.0, 4.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.5830078125, -1.533172607421875, -1.48333740234375, -1.433502197265625, -1.3836669921875, -1.333831787109375, -1.28399658203125, -1.234161376953125, -1.184326171875, -1.134490966796875, -1.08465576171875, -1.034820556640625, -0.9849853515625, -0.935150146484375, -0.88531494140625, -0.835479736328125, -0.78564453125, -0.735809326171875, -0.68597412109375, -0.636138916015625, -0.5863037109375, -0.536468505859375, -0.48663330078125, -0.436798095703125, -0.386962890625, -0.337127685546875, -0.28729248046875, -0.237457275390625, -0.1876220703125, -0.137786865234375, -0.08795166015625, -0.038116455078125, 0.01171875, 0.061553955078125, 0.11138916015625, 0.161224365234375, 0.2110595703125, 0.260894775390625, 0.31072998046875, 0.360565185546875, 0.410400390625, 0.460235595703125, 0.51007080078125, 0.559906005859375, 0.6097412109375, 0.659576416015625, 0.70941162109375, 0.759246826171875, 0.80908203125, 0.858917236328125, 0.90875244140625, 0.958587646484375, 1.0084228515625, 1.058258056640625, 1.10809326171875, 1.157928466796875, 1.207763671875, 1.257598876953125, 1.30743408203125, 1.357269287109375, 1.4071044921875, 1.456939697265625, 1.50677490234375, 1.556610107421875, 1.6064453125]}, "gradients/encoder.encoder.layers.20.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 3.0, 2.0, 9.0, 19.0, 15.0, 39.0, 93.0, 187.0, 678.0, 3735.0, 917426.0, 123641.0, 1955.0, 468.0, 145.0, 69.0, 33.0, 20.0, 9.0, 11.0, 1.0, 3.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.181640625, -2.115234375, -2.048828125, -1.982421875, -1.916015625, -1.849609375, -1.783203125, -1.716796875, -1.650390625, -1.583984375, -1.517578125, -1.451171875, -1.384765625, -1.318359375, -1.251953125, -1.185546875, -1.119140625, -1.052734375, -0.986328125, -0.919921875, -0.853515625, -0.787109375, -0.720703125, -0.654296875, -0.587890625, -0.521484375, -0.455078125, -0.388671875, -0.322265625, -0.255859375, -0.189453125, -0.123046875, -0.056640625, 0.009765625, 0.076171875, 0.142578125, 0.208984375, 0.275390625, 0.341796875, 0.408203125, 0.474609375, 0.541015625, 0.607421875, 0.673828125, 0.740234375, 0.806640625, 0.873046875, 0.939453125, 1.005859375, 1.072265625, 1.138671875, 1.205078125, 1.271484375, 1.337890625, 1.404296875, 1.470703125, 1.537109375, 1.603515625, 1.669921875, 1.736328125, 1.802734375, 1.869140625, 1.935546875, 2.001953125, 2.068359375]}, "gradients/encoder.encoder.layers.20.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 1.0, 7.0, 4.0, 3.0, 5.0, 3.0, 1.0, 7.0, 7.0, 8.0, 15.0, 15.0, 15.0, 14.0, 24.0, 19.0, 22.0, 25.0, 43.0, 69.0, 132.0, 146.0, 117.0, 64.0, 40.0, 26.0, 23.0, 21.0, 30.0, 16.0, 20.0, 9.0, 12.0, 8.0, 4.0, 6.0, 8.0, 3.0, 2.0, 6.0, 3.0, 1.0, 2.0, 1.0, 3.0, 0.0, 1.0, 2.0, 1.0, 1.0, 1.0], "bins": [-0.00014448165893554688, -0.0001402720808982849, -0.00013606250286102295, -0.00013185292482376099, -0.00012764334678649902, -0.00012343376874923706, -0.0001192241907119751, -0.00011501461267471313, -0.00011080503463745117, -0.00010659545660018921, -0.00010238587856292725, -9.817630052566528e-05, -9.396672248840332e-05, -8.975714445114136e-05, -8.55475664138794e-05, -8.133798837661743e-05, -7.712841033935547e-05, -7.29188323020935e-05, -6.870925426483154e-05, -6.449967622756958e-05, -6.029009819030762e-05, -5.6080520153045654e-05, -5.187094211578369e-05, -4.766136407852173e-05, -4.3451786041259766e-05, -3.92422080039978e-05, -3.503262996673584e-05, -3.082305192947388e-05, -2.6613473892211914e-05, -2.240389585494995e-05, -1.8194317817687988e-05, -1.3984739780426025e-05, -9.775161743164062e-06, -5.5655837059021e-06, -1.3560056686401367e-06, 2.853572368621826e-06, 7.063150405883789e-06, 1.1272728443145752e-05, 1.5482306480407715e-05, 1.9691884517669678e-05, 2.390146255493164e-05, 2.8111040592193604e-05, 3.2320618629455566e-05, 3.653019666671753e-05, 4.073977470397949e-05, 4.4949352741241455e-05, 4.915893077850342e-05, 5.336850881576538e-05, 5.7578086853027344e-05, 6.17876648902893e-05, 6.599724292755127e-05, 7.020682096481323e-05, 7.44163990020752e-05, 7.862597703933716e-05, 8.283555507659912e-05, 8.704513311386108e-05, 9.125471115112305e-05, 9.546428918838501e-05, 9.967386722564697e-05, 0.00010388344526290894, 0.0001080930233001709, 0.00011230260133743286, 0.00011651217937469482, 0.00012072175741195679, 0.00012493133544921875]}, "gradients/encoder.encoder.layers.20.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 3.0, 0.0, 4.0, 1.0, 3.0, 9.0, 8.0, 9.0, 13.0, 20.0, 32.0, 47.0, 82.0, 138.0, 291.0, 684.0, 2234.0, 12326.0, 421525.0, 592050.0, 15196.0, 2462.0, 776.0, 299.0, 139.0, 85.0, 32.0, 34.0, 28.0, 8.0, 7.0, 6.0, 3.0, 4.0, 4.0, 4.0, 3.0, 2.0, 1.0], "bins": [-1.6513671875, -1.6138763427734375, -1.576385498046875, -1.5388946533203125, -1.50140380859375, -1.4639129638671875, -1.426422119140625, -1.3889312744140625, -1.3514404296875, -1.3139495849609375, -1.276458740234375, -1.2389678955078125, -1.20147705078125, -1.1639862060546875, -1.126495361328125, -1.0890045166015625, -1.051513671875, -1.0140228271484375, -0.976531982421875, -0.9390411376953125, -0.90155029296875, -0.8640594482421875, -0.826568603515625, -0.7890777587890625, -0.7515869140625, -0.7140960693359375, -0.676605224609375, -0.6391143798828125, -0.60162353515625, -0.5641326904296875, -0.526641845703125, -0.4891510009765625, -0.45166015625, -0.4141693115234375, -0.376678466796875, -0.3391876220703125, -0.30169677734375, -0.2642059326171875, -0.226715087890625, -0.1892242431640625, -0.1517333984375, -0.1142425537109375, -0.076751708984375, -0.0392608642578125, -0.00177001953125, 0.0357208251953125, 0.073211669921875, 0.1107025146484375, 0.148193359375, 0.1856842041015625, 0.223175048828125, 0.2606658935546875, 0.29815673828125, 0.3356475830078125, 0.373138427734375, 0.4106292724609375, 0.4481201171875, 0.4856109619140625, 0.523101806640625, 0.5605926513671875, 0.59808349609375, 0.6355743408203125, 0.673065185546875, 0.7105560302734375, 0.748046875]}, "gradients/encoder.encoder.layers.20.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 4.0, 6.0, 4.0, 2.0, 7.0, 12.0, 12.0, 14.0, 25.0, 32.0, 59.0, 81.0, 100.0, 99.0, 140.0, 113.0, 89.0, 67.0, 40.0, 36.0, 19.0, 14.0, 11.0, 9.0, 5.0, 6.0, 0.0, 3.0, 3.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.59716796875, -0.5768051147460938, -0.5564422607421875, -0.5360794067382812, -0.515716552734375, -0.49535369873046875, -0.4749908447265625, -0.45462799072265625, -0.43426513671875, -0.41390228271484375, -0.3935394287109375, -0.37317657470703125, -0.352813720703125, -0.33245086669921875, -0.3120880126953125, -0.29172515869140625, -0.2713623046875, -0.25099945068359375, -0.2306365966796875, -0.21027374267578125, -0.189910888671875, -0.16954803466796875, -0.1491851806640625, -0.12882232666015625, -0.10845947265625, -0.08809661865234375, -0.0677337646484375, -0.04737091064453125, -0.027008056640625, -0.00664520263671875, 0.0137176513671875, 0.03408050537109375, 0.054443359375, 0.07480621337890625, 0.0951690673828125, 0.11553192138671875, 0.135894775390625, 0.15625762939453125, 0.1766204833984375, 0.19698333740234375, 0.21734619140625, 0.23770904541015625, 0.2580718994140625, 0.27843475341796875, 0.298797607421875, 0.31916046142578125, 0.3395233154296875, 0.35988616943359375, 0.3802490234375, 0.40061187744140625, 0.4209747314453125, 0.44133758544921875, 0.461700439453125, 0.48206329345703125, 0.5024261474609375, 0.5227890014648438, 0.54315185546875, 0.5635147094726562, 0.5838775634765625, 0.6042404174804688, 0.624603271484375, 0.6449661254882812, 0.6653289794921875, 0.6856918334960938, 0.7060546875]}, "gradients/encoder.encoder.layers.20.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 4.0, 3.0, 12.0, 51.0, 415.0, 476.0, 47.0, 5.0, 3.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-14.725003242492676, -13.811201095581055, -12.897398948669434, -11.983596801757812, -11.069794654846191, -10.15599250793457, -9.242189407348633, -8.328388214111328, -7.414585590362549, -6.500783443450928, -5.586981296539307, -4.673178672790527, -3.7593767642974854, -2.845574378967285, -1.931772232055664, -1.017970085144043, -0.10416793823242188, 0.809634268283844, 1.7234364748001099, 2.6372387409210205, 3.5510408878326416, 4.464843273162842, 5.378645420074463, 6.292447566986084, 7.206249713897705, 8.120052337646484, 9.033854484558105, 9.947656631469727, 10.861458778381348, 11.775260925292969, 12.68906307220459, 13.602865219116211, 14.516668319702148, 15.43047046661377, 16.34427261352539, 17.258075714111328, 18.171876907348633, 19.08568000793457, 19.999481201171875, 20.913284301757812, 21.827085494995117, 22.740888595581055, 23.65468978881836, 24.568492889404297, 25.4822940826416, 26.39609718322754, 27.309898376464844, 28.22370147705078, 29.13750457763672, 30.051307678222656, 30.96510887145996, 31.8789119720459, 32.7927131652832, 33.70651626586914, 34.62031936645508, 35.53411865234375, 36.44792175292969, 37.361724853515625, 38.27552795410156, 39.189327239990234, 40.10313034057617, 41.01693344116211, 41.93073654174805, 42.84453582763672, 43.758338928222656]}, "gradients/encoder.encoder.layers.20.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 4.0, 2.0, 1.0, 5.0, 1.0, 3.0, 5.0, 7.0, 12.0, 12.0, 17.0, 24.0, 24.0, 23.0, 24.0, 26.0, 21.0, 38.0, 32.0, 40.0, 43.0, 31.0, 42.0, 46.0, 36.0, 52.0, 38.0, 38.0, 43.0, 37.0, 48.0, 32.0, 35.0, 30.0, 26.0, 14.0, 15.0, 18.0, 12.0, 12.0, 14.0, 7.0, 5.0, 3.0, 5.0, 2.0, 2.0, 2.0, 4.0, 1.0, 0.0, 0.0, 3.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-6.249598503112793, -6.043041229248047, -5.836483955383301, -5.629927158355713, -5.423369884490967, -5.216812610626221, -5.010255336761475, -4.803698539733887, -4.597141265869141, -4.3905839920043945, -4.184026718139648, -3.9774696826934814, -3.7709126472473145, -3.5643553733825684, -3.3577980995178223, -3.1512410640716553, -2.944683790206909, -2.738126516342163, -2.531569480895996, -2.32501220703125, -2.118455171585083, -1.911897897720337, -1.7053407430648804, -1.4987835884094238, -1.2922264337539673, -1.0856692790985107, -0.8791121244430542, -0.6725549101829529, -0.46599775552749634, -0.259440541267395, -0.05288338661193848, 0.15367376804351807, 0.3602309226989746, 0.5667880773544312, 0.7733452320098877, 0.979902446269989, 1.1864595413208008, 1.3930168151855469, 1.5995739698410034, 1.80613112449646, 2.012688159942627, 2.219245433807373, 2.42580246925354, 2.632359743118286, 2.838916778564453, 3.045474052429199, 3.2520313262939453, 3.4585883617401123, 3.6651456356048584, 3.8717029094696045, 4.0782599449157715, 4.284817218780518, 4.491374492645264, 4.697931289672852, 4.904488563537598, 5.111045837402344, 5.31760311126709, 5.524160385131836, 5.730717658996582, 5.93727445602417, 6.143831729888916, 6.350389003753662, 6.556946277618408, 6.763503074645996, 6.970060348510742]}, "gradients/encoder.encoder.layers.19.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 2.0, 2.0, 1.0, 0.0, 2.0, 3.0, 13.0, 12.0, 19.0, 31.0, 90.0, 216.0, 679.0, 3989.0, 3669764.0, 514695.0, 3578.0, 741.0, 236.0, 103.0, 43.0, 23.0, 25.0, 6.0, 6.0, 6.0, 4.0, 2.0, 2.0, 0.0, 3.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.88671875, -5.69024658203125, -5.4937744140625, -5.29730224609375, -5.100830078125, -4.90435791015625, -4.7078857421875, -4.51141357421875, -4.31494140625, -4.11846923828125, -3.9219970703125, -3.72552490234375, -3.529052734375, -3.33258056640625, -3.1361083984375, -2.93963623046875, -2.7431640625, -2.54669189453125, -2.3502197265625, -2.15374755859375, -1.957275390625, -1.76080322265625, -1.5643310546875, -1.36785888671875, -1.17138671875, -0.97491455078125, -0.7784423828125, -0.58197021484375, -0.385498046875, -0.18902587890625, 0.0074462890625, 0.20391845703125, 0.400390625, 0.59686279296875, 0.7933349609375, 0.98980712890625, 1.186279296875, 1.38275146484375, 1.5792236328125, 1.77569580078125, 1.97216796875, 2.16864013671875, 2.3651123046875, 2.56158447265625, 2.758056640625, 2.95452880859375, 3.1510009765625, 3.34747314453125, 3.5439453125, 3.74041748046875, 3.9368896484375, 4.13336181640625, 4.329833984375, 4.52630615234375, 4.7227783203125, 4.91925048828125, 5.11572265625, 5.31219482421875, 5.5086669921875, 5.70513916015625, 5.901611328125, 6.09808349609375, 6.2945556640625, 6.49102783203125, 6.6875]}, "gradients/encoder.encoder.layers.19.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 2.0, 0.0, 1.0, 2.0, 0.0, 4.0, 8.0, 19.0, 39.0, 67.0, 103.0, 171.0, 182.0, 163.0, 114.0, 64.0, 38.0, 16.0, 9.0, 7.0, 2.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.55419921875, -0.5354461669921875, -0.516693115234375, -0.4979400634765625, -0.47918701171875, -0.4604339599609375, -0.441680908203125, -0.4229278564453125, -0.4041748046875, -0.3854217529296875, -0.366668701171875, -0.3479156494140625, -0.32916259765625, -0.3104095458984375, -0.291656494140625, -0.2729034423828125, -0.254150390625, -0.2353973388671875, -0.216644287109375, -0.1978912353515625, -0.17913818359375, -0.1603851318359375, -0.141632080078125, -0.1228790283203125, -0.1041259765625, -0.0853729248046875, -0.066619873046875, -0.0478668212890625, -0.02911376953125, -0.0103607177734375, 0.008392333984375, 0.0271453857421875, 0.0458984375, 0.0646514892578125, 0.083404541015625, 0.1021575927734375, 0.12091064453125, 0.1396636962890625, 0.158416748046875, 0.1771697998046875, 0.1959228515625, 0.2146759033203125, 0.233428955078125, 0.2521820068359375, 0.27093505859375, 0.2896881103515625, 0.308441162109375, 0.3271942138671875, 0.345947265625, 0.3647003173828125, 0.383453369140625, 0.4022064208984375, 0.42095947265625, 0.4397125244140625, 0.458465576171875, 0.4772186279296875, 0.4959716796875, 0.5147247314453125, 0.533477783203125, 0.5522308349609375, 0.57098388671875, 0.5897369384765625, 0.608489990234375, 0.6272430419921875, 0.64599609375]}, "gradients/encoder.encoder.layers.19.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 2.0, 0.0, 2.0, 1.0, 0.0, 5.0, 3.0, 1.0, 12.0, 17.0, 23.0, 41.0, 78.0, 119.0, 234.0, 686.0, 3731.0, 51739.0, 4087290.0, 45586.0, 3632.0, 639.0, 229.0, 93.0, 55.0, 23.0, 14.0, 15.0, 7.0, 2.0, 4.0, 0.0, 4.0, 0.0, 1.0, 2.0, 1.0, 1.0, 3.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.177734375, -2.09228515625, -2.0068359375, -1.92138671875, -1.8359375, -1.75048828125, -1.6650390625, -1.57958984375, -1.494140625, -1.40869140625, -1.3232421875, -1.23779296875, -1.15234375, -1.06689453125, -0.9814453125, -0.89599609375, -0.810546875, -0.72509765625, -0.6396484375, -0.55419921875, -0.46875, -0.38330078125, -0.2978515625, -0.21240234375, -0.126953125, -0.04150390625, 0.0439453125, 0.12939453125, 0.21484375, 0.30029296875, 0.3857421875, 0.47119140625, 0.556640625, 0.64208984375, 0.7275390625, 0.81298828125, 0.8984375, 0.98388671875, 1.0693359375, 1.15478515625, 1.240234375, 1.32568359375, 1.4111328125, 1.49658203125, 1.58203125, 1.66748046875, 1.7529296875, 1.83837890625, 1.923828125, 2.00927734375, 2.0947265625, 2.18017578125, 2.265625, 2.35107421875, 2.4365234375, 2.52197265625, 2.607421875, 2.69287109375, 2.7783203125, 2.86376953125, 2.94921875, 3.03466796875, 3.1201171875, 3.20556640625, 3.291015625]}, "gradients/encoder.encoder.layers.19.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 5.0, 0.0, 2.0, 4.0, 5.0, 10.0, 10.0, 18.0, 26.0, 31.0, 56.0, 143.0, 442.0, 2241.0, 726.0, 193.0, 66.0, 37.0, 21.0, 11.0, 13.0, 12.0, 4.0, 4.0, 4.0, 1.0, 4.0, 3.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.7353515625, -0.7181053161621094, -0.7008590698242188, -0.6836128234863281, -0.6663665771484375, -0.6491203308105469, -0.6318740844726562, -0.6146278381347656, -0.597381591796875, -0.5801353454589844, -0.5628890991210938, -0.5456428527832031, -0.5283966064453125, -0.5111503601074219, -0.49390411376953125, -0.4766578674316406, -0.45941162109375, -0.4421653747558594, -0.42491912841796875, -0.4076728820800781, -0.3904266357421875, -0.3731803894042969, -0.35593414306640625, -0.3386878967285156, -0.321441650390625, -0.3041954040527344, -0.28694915771484375, -0.2697029113769531, -0.2524566650390625, -0.23521041870117188, -0.21796417236328125, -0.20071792602539062, -0.1834716796875, -0.16622543334960938, -0.14897918701171875, -0.13173294067382812, -0.1144866943359375, -0.09724044799804688, -0.07999420166015625, -0.06274795532226562, -0.045501708984375, -0.028255462646484375, -0.01100921630859375, 0.006237030029296875, 0.0234832763671875, 0.040729522705078125, 0.05797576904296875, 0.07522201538085938, 0.09246826171875, 0.10971450805664062, 0.12696075439453125, 0.14420700073242188, 0.1614532470703125, 0.17869949340820312, 0.19594573974609375, 0.21319198608398438, 0.230438232421875, 0.24768447875976562, 0.26493072509765625, 0.2821769714355469, 0.2994232177734375, 0.3166694641113281, 0.33391571044921875, 0.3511619567871094, 0.368408203125]}, "gradients/encoder.encoder.layers.19.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 2.0, 2.0, 1.0, 0.0, 1.0, 2.0, 4.0, 15.0, 76.0, 315.0, 411.0, 146.0, 22.0, 9.0, 3.0, 5.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.168980360031128, -2.042003631591797, -1.9150269031524658, -1.7880500555038452, -1.6610733270645142, -1.534096598625183, -1.4071197509765625, -1.2801430225372314, -1.1531662940979004, -1.0261895656585693, -0.8992127776145935, -0.7722359895706177, -0.6452592611312866, -0.5182825326919556, -0.39130574464797974, -0.2643289566040039, -0.13735222816467285, -0.01037546992301941, 0.11660128831863403, 0.24357804656028748, 0.3705548048019409, 0.497531533241272, 0.6245083212852478, 0.7514851093292236, 0.8784618377685547, 1.0054385662078857, 1.1324152946472168, 1.2593921422958374, 1.3863688707351685, 1.5133455991744995, 1.6403224468231201, 1.7672991752624512, 1.8942756652832031, 2.021252393722534, 2.1482291221618652, 2.2752058506011963, 2.4021825790405273, 2.5291595458984375, 2.6561362743377686, 2.7831130027770996, 2.9100897312164307, 3.0370664596557617, 3.1640431880950928, 3.291019916534424, 3.417996883392334, 3.544973373413086, 3.671950340270996, 3.798927068710327, 3.925903797149658, 4.052880764007568, 4.17985725402832, 4.3068342208862305, 4.433810710906982, 4.560787677764893, 4.6877641677856445, 4.814741134643555, 4.941718101501465, 5.068695068359375, 5.195671558380127, 5.322648525238037, 5.449625015258789, 5.576601982116699, 5.703578472137451, 5.830555438995361, 5.957531929016113]}, "gradients/encoder.encoder.layers.19.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 2.0, 3.0, 5.0, 10.0, 7.0, 16.0, 13.0, 28.0, 30.0, 38.0, 38.0, 48.0, 53.0, 60.0, 68.0, 73.0, 65.0, 68.0, 55.0, 64.0, 50.0, 52.0, 41.0, 25.0, 26.0, 15.0, 23.0, 14.0, 4.0, 11.0, 3.0, 2.0, 4.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-1.8195555210113525, -1.7663153409957886, -1.7130751609802246, -1.6598351001739502, -1.6065949201583862, -1.5533547401428223, -1.5001146793365479, -1.4468744993209839, -1.39363431930542, -1.340394139289856, -1.287153959274292, -1.2339138984680176, -1.1806737184524536, -1.1274335384368896, -1.0741934776306152, -1.0209532976150513, -0.9677131175994873, -0.9144729375839233, -0.8612328171730042, -0.807992696762085, -0.754752516746521, -0.701512336730957, -0.6482722163200378, -0.5950320959091187, -0.5417919158935547, -0.4885517656803131, -0.43531161546707153, -0.38207146525382996, -0.3288313150405884, -0.2755911648273468, -0.22235101461410522, -0.16911086440086365, -0.11587071418762207, -0.0626305639743805, -0.009390413761138916, 0.04384973645210266, 0.09708988666534424, 0.15033003687858582, 0.2035701870918274, 0.25681033730506897, 0.31005048751831055, 0.3632906377315521, 0.4165307879447937, 0.4697709381580353, 0.5230110883712769, 0.5762512683868408, 0.62949138879776, 0.6827315092086792, 0.7359716892242432, 0.7892118692398071, 0.8424519896507263, 0.8956921100616455, 0.9489322900772095, 1.0021724700927734, 1.0554125308990479, 1.1086527109146118, 1.1618928909301758, 1.2151330709457397, 1.2683732509613037, 1.3216133117675781, 1.374853491783142, 1.428093671798706, 1.4813337326049805, 1.5345739126205444, 1.5878140926361084]}, "gradients/encoder.encoder.layers.19.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 3.0, 1.0, 5.0, 7.0, 5.0, 12.0, 15.0, 35.0, 36.0, 45.0, 78.0, 96.0, 158.0, 274.0, 464.0, 764.0, 1701.0, 5331.0, 29863.0, 390853.0, 562894.0, 44876.0, 6750.0, 2062.0, 946.0, 494.0, 267.0, 176.0, 120.0, 70.0, 53.0, 31.0, 22.0, 15.0, 15.0, 11.0, 7.0, 6.0, 3.0, 1.0, 0.0, 0.0, 3.0, 0.0, 3.0, 0.0, 1.0, 1.0], "bins": [-1.7021484375, -1.6549835205078125, -1.607818603515625, -1.5606536865234375, -1.51348876953125, -1.4663238525390625, -1.419158935546875, -1.3719940185546875, -1.3248291015625, -1.2776641845703125, -1.230499267578125, -1.1833343505859375, -1.13616943359375, -1.0890045166015625, -1.041839599609375, -0.9946746826171875, -0.947509765625, -0.9003448486328125, -0.853179931640625, -0.8060150146484375, -0.75885009765625, -0.7116851806640625, -0.664520263671875, -0.6173553466796875, -0.5701904296875, -0.5230255126953125, -0.475860595703125, -0.4286956787109375, -0.38153076171875, -0.3343658447265625, -0.287200927734375, -0.2400360107421875, -0.19287109375, -0.1457061767578125, -0.098541259765625, -0.0513763427734375, -0.00421142578125, 0.0429534912109375, 0.090118408203125, 0.1372833251953125, 0.1844482421875, 0.2316131591796875, 0.278778076171875, 0.3259429931640625, 0.37310791015625, 0.4202728271484375, 0.467437744140625, 0.5146026611328125, 0.561767578125, 0.6089324951171875, 0.656097412109375, 0.7032623291015625, 0.75042724609375, 0.7975921630859375, 0.844757080078125, 0.8919219970703125, 0.9390869140625, 0.9862518310546875, 1.033416748046875, 1.0805816650390625, 1.12774658203125, 1.1749114990234375, 1.222076416015625, 1.2692413330078125, 1.31640625]}, "gradients/encoder.encoder.layers.19.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 3.0, 1.0, 0.0, 1.0, 1.0, 0.0, 2.0, 11.0, 19.0, 25.0, 67.0, 117.0, 150.0, 172.0, 159.0, 138.0, 73.0, 40.0, 15.0, 8.0, 4.0, 4.0, 1.0, 1.0, 2.0, 0.0, 1.0, 3.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.50732421875, -0.4896392822265625, -0.471954345703125, -0.4542694091796875, -0.43658447265625, -0.4188995361328125, -0.401214599609375, -0.3835296630859375, -0.3658447265625, -0.3481597900390625, -0.330474853515625, -0.3127899169921875, -0.29510498046875, -0.2774200439453125, -0.259735107421875, -0.2420501708984375, -0.224365234375, -0.2066802978515625, -0.188995361328125, -0.1713104248046875, -0.15362548828125, -0.1359405517578125, -0.118255615234375, -0.1005706787109375, -0.0828857421875, -0.0652008056640625, -0.047515869140625, -0.0298309326171875, -0.01214599609375, 0.0055389404296875, 0.023223876953125, 0.0409088134765625, 0.05859375, 0.0762786865234375, 0.093963623046875, 0.1116485595703125, 0.12933349609375, 0.1470184326171875, 0.164703369140625, 0.1823883056640625, 0.2000732421875, 0.2177581787109375, 0.235443115234375, 0.2531280517578125, 0.27081298828125, 0.2884979248046875, 0.306182861328125, 0.3238677978515625, 0.341552734375, 0.3592376708984375, 0.376922607421875, 0.3946075439453125, 0.41229248046875, 0.4299774169921875, 0.447662353515625, 0.4653472900390625, 0.4830322265625, 0.5007171630859375, 0.518402099609375, 0.5360870361328125, 0.55377197265625, 0.5714569091796875, 0.589141845703125, 0.6068267822265625, 0.62451171875]}, "gradients/encoder.encoder.layers.19.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 3.0, 1.0, 4.0, 1.0, 1.0, 2.0, 1.0, 4.0, 2.0, 4.0, 14.0, 15.0, 12.0, 16.0, 26.0, 31.0, 31.0, 66.0, 74.0, 103.0, 127.0, 203.0, 298.0, 458.0, 730.0, 1419.0, 3400.0, 9498.0, 36599.0, 198796.0, 580055.0, 169263.0, 31879.0, 8850.0, 3069.0, 1410.0, 770.0, 390.0, 246.0, 192.0, 151.0, 111.0, 59.0, 52.0, 43.0, 25.0, 19.0, 12.0, 3.0, 5.0, 12.0, 8.0, 4.0, 3.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.0712890625, -1.0372314453125, -1.003173828125, -0.9691162109375, -0.93505859375, -0.9010009765625, -0.866943359375, -0.8328857421875, -0.798828125, -0.7647705078125, -0.730712890625, -0.6966552734375, -0.66259765625, -0.6285400390625, -0.594482421875, -0.5604248046875, -0.5263671875, -0.4923095703125, -0.458251953125, -0.4241943359375, -0.39013671875, -0.3560791015625, -0.322021484375, -0.2879638671875, -0.25390625, -0.2198486328125, -0.185791015625, -0.1517333984375, -0.11767578125, -0.0836181640625, -0.049560546875, -0.0155029296875, 0.0185546875, 0.0526123046875, 0.086669921875, 0.1207275390625, 0.15478515625, 0.1888427734375, 0.222900390625, 0.2569580078125, 0.291015625, 0.3250732421875, 0.359130859375, 0.3931884765625, 0.42724609375, 0.4613037109375, 0.495361328125, 0.5294189453125, 0.5634765625, 0.5975341796875, 0.631591796875, 0.6656494140625, 0.69970703125, 0.7337646484375, 0.767822265625, 0.8018798828125, 0.8359375, 0.8699951171875, 0.904052734375, 0.9381103515625, 0.97216796875, 1.0062255859375, 1.040283203125, 1.0743408203125, 1.1083984375]}, "gradients/encoder.encoder.layers.19.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 4.0, 1.0, 1.0, 1.0, 3.0, 3.0, 4.0, 3.0, 4.0, 3.0, 9.0, 12.0, 12.0, 11.0, 17.0, 18.0, 15.0, 22.0, 29.0, 30.0, 44.0, 42.0, 44.0, 52.0, 56.0, 66.0, 47.0, 55.0, 51.0, 44.0, 43.0, 52.0, 35.0, 44.0, 23.0, 25.0, 17.0, 13.0, 12.0, 8.0, 7.0, 12.0, 6.0, 5.0, 2.0, 2.0, 4.0, 2.0, 2.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-1.46875, -1.42742919921875, -1.3861083984375, -1.34478759765625, -1.303466796875, -1.26214599609375, -1.2208251953125, -1.17950439453125, -1.13818359375, -1.09686279296875, -1.0555419921875, -1.01422119140625, -0.972900390625, -0.93157958984375, -0.8902587890625, -0.84893798828125, -0.8076171875, -0.76629638671875, -0.7249755859375, -0.68365478515625, -0.642333984375, -0.60101318359375, -0.5596923828125, -0.51837158203125, -0.47705078125, -0.43572998046875, -0.3944091796875, -0.35308837890625, -0.311767578125, -0.27044677734375, -0.2291259765625, -0.18780517578125, -0.146484375, -0.10516357421875, -0.0638427734375, -0.02252197265625, 0.018798828125, 0.06011962890625, 0.1014404296875, 0.14276123046875, 0.18408203125, 0.22540283203125, 0.2667236328125, 0.30804443359375, 0.349365234375, 0.39068603515625, 0.4320068359375, 0.47332763671875, 0.5146484375, 0.55596923828125, 0.5972900390625, 0.63861083984375, 0.679931640625, 0.72125244140625, 0.7625732421875, 0.80389404296875, 0.84521484375, 0.88653564453125, 0.9278564453125, 0.96917724609375, 1.010498046875, 1.05181884765625, 1.0931396484375, 1.13446044921875, 1.17578125]}, "gradients/encoder.encoder.layers.19.attention.k_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 2.0, 1.0, 2.0, 3.0, 1.0, 7.0, 3.0, 3.0, 5.0, 10.0, 11.0, 14.0, 23.0, 37.0, 69.0, 115.0, 278.0, 668.0, 1951.0, 10290.0, 294282.0, 718138.0, 18384.0, 2695.0, 884.0, 312.0, 153.0, 89.0, 40.0, 26.0, 21.0, 10.0, 14.0, 3.0, 6.0, 3.0, 3.0, 2.0, 3.0, 0.0, 1.0, 1.0, 3.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.6494140625, -0.6278533935546875, -0.606292724609375, -0.5847320556640625, -0.56317138671875, -0.5416107177734375, -0.520050048828125, -0.4984893798828125, -0.4769287109375, -0.4553680419921875, -0.433807373046875, -0.4122467041015625, -0.39068603515625, -0.3691253662109375, -0.347564697265625, -0.3260040283203125, -0.304443359375, -0.2828826904296875, -0.261322021484375, -0.2397613525390625, -0.21820068359375, -0.1966400146484375, -0.175079345703125, -0.1535186767578125, -0.1319580078125, -0.1103973388671875, -0.088836669921875, -0.0672760009765625, -0.04571533203125, -0.0241546630859375, -0.002593994140625, 0.0189666748046875, 0.04052734375, 0.0620880126953125, 0.083648681640625, 0.1052093505859375, 0.12677001953125, 0.1483306884765625, 0.169891357421875, 0.1914520263671875, 0.2130126953125, 0.2345733642578125, 0.256134033203125, 0.2776947021484375, 0.29925537109375, 0.3208160400390625, 0.342376708984375, 0.3639373779296875, 0.385498046875, 0.4070587158203125, 0.428619384765625, 0.4501800537109375, 0.47174072265625, 0.4933013916015625, 0.514862060546875, 0.5364227294921875, 0.5579833984375, 0.5795440673828125, 0.601104736328125, 0.6226654052734375, 0.64422607421875, 0.6657867431640625, 0.687347412109375, 0.7089080810546875, 0.73046875]}, "gradients/encoder.encoder.layers.19.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 3.0, 0.0, 2.0, 3.0, 4.0, 4.0, 3.0, 10.0, 13.0, 19.0, 21.0, 39.0, 40.0, 59.0, 119.0, 184.0, 177.0, 100.0, 74.0, 51.0, 32.0, 15.0, 17.0, 13.0, 7.0, 6.0, 1.0, 1.0, 3.0], "bins": [-0.0003383159637451172, -0.00033166538923978806, -0.0003250148147344589, -0.0003183642402291298, -0.00031171366572380066, -0.0003050630912184715, -0.0002984125167131424, -0.00029176194220781326, -0.00028511136770248413, -0.000278460793197155, -0.00027181021869182587, -0.00026515964418649673, -0.0002585090696811676, -0.00025185849517583847, -0.00024520792067050934, -0.0002385573461651802, -0.00023190677165985107, -0.00022525619715452194, -0.0002186056226491928, -0.00021195504814386368, -0.00020530447363853455, -0.00019865389913320541, -0.00019200332462787628, -0.00018535275012254715, -0.00017870217561721802, -0.00017205160111188889, -0.00016540102660655975, -0.00015875045210123062, -0.0001520998775959015, -0.00014544930309057236, -0.00013879872858524323, -0.0001321481540799141, -0.00012549757957458496, -0.00011884700506925583, -0.0001121964305639267, -0.00010554585605859756, -9.889528155326843e-05, -9.22447070479393e-05, -8.559413254261017e-05, -7.894355803728104e-05, -7.22929835319519e-05, -6.564240902662277e-05, -5.899183452129364e-05, -5.234126001596451e-05, -4.5690685510635376e-05, -3.9040111005306244e-05, -3.238953649997711e-05, -2.573896199464798e-05, -1.9088387489318848e-05, -1.2437812983989716e-05, -5.7872384786605835e-06, 8.633360266685486e-07, 7.513910531997681e-06, 1.4164485037326813e-05, 2.0815059542655945e-05, 2.7465634047985077e-05, 3.411620855331421e-05, 4.076678305864334e-05, 4.741735756397247e-05, 5.4067932069301605e-05, 6.071850657463074e-05, 6.736908107995987e-05, 7.4019655585289e-05, 8.067023009061813e-05, 8.732080459594727e-05]}, "gradients/encoder.encoder.layers.19.attention.q_proj.weight": {"_type": "histogram", "values": [2.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 3.0, 0.0, 2.0, 5.0, 6.0, 13.0, 14.0, 13.0, 25.0, 40.0, 72.0, 169.0, 382.0, 971.0, 3404.0, 29954.0, 850326.0, 152663.0, 7750.0, 1672.0, 614.0, 241.0, 88.0, 37.0, 36.0, 17.0, 17.0, 7.0, 4.0, 6.0, 6.0, 3.0, 1.0, 3.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.61865234375, -0.5946044921875, -0.570556640625, -0.5465087890625, -0.5224609375, -0.4984130859375, -0.474365234375, -0.4503173828125, -0.42626953125, -0.4022216796875, -0.378173828125, -0.3541259765625, -0.330078125, -0.3060302734375, -0.281982421875, -0.2579345703125, -0.23388671875, -0.2098388671875, -0.185791015625, -0.1617431640625, -0.1376953125, -0.1136474609375, -0.089599609375, -0.0655517578125, -0.04150390625, -0.0174560546875, 0.006591796875, 0.0306396484375, 0.0546875, 0.0787353515625, 0.102783203125, 0.1268310546875, 0.15087890625, 0.1749267578125, 0.198974609375, 0.2230224609375, 0.2470703125, 0.2711181640625, 0.295166015625, 0.3192138671875, 0.34326171875, 0.3673095703125, 0.391357421875, 0.4154052734375, 0.439453125, 0.4635009765625, 0.487548828125, 0.5115966796875, 0.53564453125, 0.5596923828125, 0.583740234375, 0.6077880859375, 0.6318359375, 0.6558837890625, 0.679931640625, 0.7039794921875, 0.72802734375, 0.7520751953125, 0.776123046875, 0.8001708984375, 0.82421875, 0.8482666015625, 0.872314453125, 0.8963623046875, 0.92041015625]}, "gradients/encoder.encoder.layers.19.attention.q_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 3.0, 3.0, 4.0, 9.0, 9.0, 7.0, 8.0, 18.0, 13.0, 26.0, 38.0, 55.0, 75.0, 84.0, 137.0, 135.0, 118.0, 83.0, 49.0, 46.0, 23.0, 15.0, 17.0, 12.0, 8.0, 4.0, 7.0, 1.0, 2.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 3.0], "bins": [-0.55517578125, -0.5388450622558594, -0.5225143432617188, -0.5061836242675781, -0.4898529052734375, -0.4735221862792969, -0.45719146728515625, -0.4408607482910156, -0.424530029296875, -0.4081993103027344, -0.39186859130859375, -0.3755378723144531, -0.3592071533203125, -0.3428764343261719, -0.32654571533203125, -0.3102149963378906, -0.29388427734375, -0.2775535583496094, -0.26122283935546875, -0.24489212036132812, -0.2285614013671875, -0.21223068237304688, -0.19589996337890625, -0.17956924438476562, -0.163238525390625, -0.14690780639648438, -0.13057708740234375, -0.11424636840820312, -0.0979156494140625, -0.08158493041992188, -0.06525421142578125, -0.048923492431640625, -0.0325927734375, -0.016262054443359375, 6.866455078125e-05, 0.016399383544921875, 0.0327301025390625, 0.049060821533203125, 0.06539154052734375, 0.08172225952148438, 0.098052978515625, 0.11438369750976562, 0.13071441650390625, 0.14704513549804688, 0.1633758544921875, 0.17970657348632812, 0.19603729248046875, 0.21236801147460938, 0.22869873046875, 0.24502944946289062, 0.26136016845703125, 0.2776908874511719, 0.2940216064453125, 0.3103523254394531, 0.32668304443359375, 0.3430137634277344, 0.359344482421875, 0.3756752014160156, 0.39200592041015625, 0.4083366394042969, 0.4246673583984375, 0.4409980773925781, 0.45732879638671875, 0.4736595153808594, 0.489990234375]}, "gradients/encoder.encoder.layers.19.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 4.0, 4.0, 8.0, 14.0, 49.0, 131.0, 334.0, 309.0, 103.0, 29.0, 13.0, 10.0, 4.0, 3.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-10.298676490783691, -9.85159683227539, -9.404518127441406, -8.957438468933105, -8.510358810424805, -8.06328010559082, -7.6162004470825195, -7.169121265411377, -6.722042083740234, -6.274962902069092, -5.827883720397949, -5.380804061889648, -4.933724880218506, -4.486645698547363, -4.0395660400390625, -3.59248685836792, -3.1454076766967773, -2.6983284950256348, -2.251249074935913, -1.804169774055481, -1.3570904731750488, -0.9100112915039062, -0.46293187141418457, -0.01585245132446289, 0.4312267303466797, 0.8783060312271118, 1.325385332107544, 1.772464632987976, 2.219543933868408, 2.666623115539551, 3.1137025356292725, 3.560781955718994, 4.00786018371582, 4.454939365386963, 4.9020185470581055, 5.349098205566406, 5.796177387237549, 6.243256568908691, 6.690336227416992, 7.137415409088135, 7.584494590759277, 8.031574249267578, 8.478652954101562, 8.925732612609863, 9.372812271118164, 9.819890975952148, 10.26697063446045, 10.71405029296875, 11.161128997802734, 11.608208656311035, 12.05528736114502, 12.50236701965332, 12.949445724487305, 13.396525382995605, 13.843605041503906, 14.29068374633789, 14.737763404846191, 15.184843063354492, 15.631921768188477, 16.07900047302246, 16.526081085205078, 16.973159790039062, 17.420238494873047, 17.867319107055664, 18.31439781188965]}, "gradients/encoder.encoder.layers.19.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 3.0, 0.0, 4.0, 1.0, 2.0, 6.0, 6.0, 5.0, 15.0, 12.0, 16.0, 18.0, 32.0, 24.0, 38.0, 39.0, 31.0, 37.0, 40.0, 55.0, 53.0, 44.0, 49.0, 47.0, 43.0, 43.0, 45.0, 36.0, 29.0, 35.0, 37.0, 30.0, 15.0, 24.0, 24.0, 16.0, 16.0, 13.0, 7.0, 8.0, 5.0, 5.0, 2.0, 4.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-7.905524253845215, -7.677194595336914, -7.4488654136657715, -7.220535755157471, -6.992206573486328, -6.763876914978027, -6.535547256469727, -6.307218074798584, -6.078888416290283, -5.850558757781982, -5.62222957611084, -5.393899917602539, -5.1655707359313965, -4.937241077423096, -4.708911895751953, -4.480582237243652, -4.252252578735352, -4.023922920227051, -3.795593738555908, -3.5672640800476074, -3.3389346599578857, -3.110605239868164, -2.8822758197784424, -2.6539463996887207, -2.425617218017578, -2.1972877979278564, -1.9689582586288452, -1.7406288385391235, -1.5122992992401123, -1.2839698791503906, -1.055640459060669, -0.8273109197616577, -0.5989813804626465, -0.37065190076828003, -0.14232245087623596, 0.0860069990158081, 0.31433647871017456, 0.542665958404541, 0.7709953784942627, 0.9993249177932739, 1.2276543378829956, 1.4559837579727173, 1.6843132972717285, 1.9126427173614502, 2.140972137451172, 2.3693017959594727, 2.5976309776306152, 2.825960636138916, 3.0542900562286377, 3.2826194763183594, 3.510948896408081, 3.7392783164978027, 3.9676079750061035, 4.195937156677246, 4.424266815185547, 4.652596473693848, 4.88092565536499, 5.109255313873291, 5.337584495544434, 5.565914154052734, 5.794243335723877, 6.022572994232178, 6.25090217590332, 6.479231834411621, 6.707561492919922]}, "gradients/encoder.encoder.layers.18.feed_forward.output_dense.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 3.0, 2.0, 1.0, 2.0, 6.0, 2.0, 9.0, 13.0, 18.0, 28.0, 46.0, 93.0, 198.0, 380.0, 933.0, 3191.0, 23649.0, 4079324.0, 77949.0, 5890.0, 1431.0, 524.0, 245.0, 123.0, 76.0, 52.0, 35.0, 17.0, 15.0, 9.0, 10.0, 4.0, 2.0, 4.0, 4.0, 2.0, 1.0, 2.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.111328125, -2.0350341796875, -1.958740234375, -1.8824462890625, -1.80615234375, -1.7298583984375, -1.653564453125, -1.5772705078125, -1.5009765625, -1.4246826171875, -1.348388671875, -1.2720947265625, -1.19580078125, -1.1195068359375, -1.043212890625, -0.9669189453125, -0.890625, -0.8143310546875, -0.738037109375, -0.6617431640625, -0.58544921875, -0.5091552734375, -0.432861328125, -0.3565673828125, -0.2802734375, -0.2039794921875, -0.127685546875, -0.0513916015625, 0.02490234375, 0.1011962890625, 0.177490234375, 0.2537841796875, 0.330078125, 0.4063720703125, 0.482666015625, 0.5589599609375, 0.63525390625, 0.7115478515625, 0.787841796875, 0.8641357421875, 0.9404296875, 1.0167236328125, 1.093017578125, 1.1693115234375, 1.24560546875, 1.3218994140625, 1.398193359375, 1.4744873046875, 1.55078125, 1.6270751953125, 1.703369140625, 1.7796630859375, 1.85595703125, 1.9322509765625, 2.008544921875, 2.0848388671875, 2.1611328125, 2.2374267578125, 2.313720703125, 2.3900146484375, 2.46630859375, 2.5426025390625, 2.618896484375, 2.6951904296875, 2.771484375]}, "gradients/encoder.encoder.layers.18.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 1.0, 2.0, 0.0, 1.0, 3.0, 4.0, 5.0, 14.0, 23.0, 37.0, 64.0, 100.0, 127.0, 170.0, 147.0, 121.0, 68.0, 55.0, 35.0, 18.0, 7.0, 4.0, 3.0, 1.0, 1.0, 2.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.49169921875, -0.47399139404296875, -0.4562835693359375, -0.43857574462890625, -0.420867919921875, -0.40316009521484375, -0.3854522705078125, -0.36774444580078125, -0.35003662109375, -0.33232879638671875, -0.3146209716796875, -0.29691314697265625, -0.279205322265625, -0.26149749755859375, -0.2437896728515625, -0.22608184814453125, -0.2083740234375, -0.19066619873046875, -0.1729583740234375, -0.15525054931640625, -0.137542724609375, -0.11983489990234375, -0.1021270751953125, -0.08441925048828125, -0.06671142578125, -0.04900360107421875, -0.0312957763671875, -0.01358795166015625, 0.004119873046875, 0.02182769775390625, 0.0395355224609375, 0.05724334716796875, 0.074951171875, 0.09265899658203125, 0.1103668212890625, 0.12807464599609375, 0.145782470703125, 0.16349029541015625, 0.1811981201171875, 0.19890594482421875, 0.21661376953125, 0.23432159423828125, 0.2520294189453125, 0.26973724365234375, 0.287445068359375, 0.30515289306640625, 0.3228607177734375, 0.34056854248046875, 0.3582763671875, 0.37598419189453125, 0.3936920166015625, 0.41139984130859375, 0.429107666015625, 0.44681549072265625, 0.4645233154296875, 0.48223114013671875, 0.49993896484375, 0.5176467895507812, 0.5353546142578125, 0.5530624389648438, 0.570770263671875, 0.5884780883789062, 0.6061859130859375, 0.6238937377929688, 0.6416015625]}, "gradients/encoder.encoder.layers.18.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 1.0, 0.0, 1.0, 2.0, 2.0, 0.0, 2.0, 8.0, 5.0, 10.0, 18.0, 16.0, 43.0, 88.0, 214.0, 682.0, 2499.0, 12292.0, 628850.0, 3528463.0, 16903.0, 2867.0, 877.0, 252.0, 98.0, 46.0, 21.0, 12.0, 3.0, 5.0, 1.0, 4.0, 2.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 3.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.7958984375, -1.7399139404296875, -1.683929443359375, -1.6279449462890625, -1.57196044921875, -1.5159759521484375, -1.459991455078125, -1.4040069580078125, -1.3480224609375, -1.2920379638671875, -1.236053466796875, -1.1800689697265625, -1.12408447265625, -1.0680999755859375, -1.012115478515625, -0.9561309814453125, -0.900146484375, -0.8441619873046875, -0.788177490234375, -0.7321929931640625, -0.67620849609375, -0.6202239990234375, -0.564239501953125, -0.5082550048828125, -0.4522705078125, -0.3962860107421875, -0.340301513671875, -0.2843170166015625, -0.22833251953125, -0.1723480224609375, -0.116363525390625, -0.0603790283203125, -0.00439453125, 0.0515899658203125, 0.107574462890625, 0.1635589599609375, 0.21954345703125, 0.2755279541015625, 0.331512451171875, 0.3874969482421875, 0.4434814453125, 0.4994659423828125, 0.555450439453125, 0.6114349365234375, 0.66741943359375, 0.7234039306640625, 0.779388427734375, 0.8353729248046875, 0.891357421875, 0.9473419189453125, 1.003326416015625, 1.0593109130859375, 1.11529541015625, 1.1712799072265625, 1.227264404296875, 1.2832489013671875, 1.3392333984375, 1.3952178955078125, 1.451202392578125, 1.5071868896484375, 1.56317138671875, 1.6191558837890625, 1.675140380859375, 1.7311248779296875, 1.787109375]}, "gradients/encoder.encoder.layers.18.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0, 1.0, 1.0, 1.0, 2.0, 0.0, 1.0, 4.0, 2.0, 2.0, 2.0, 5.0, 4.0, 5.0, 10.0, 8.0, 10.0, 18.0, 26.0, 39.0, 73.0, 186.0, 606.0, 2106.0, 543.0, 192.0, 87.0, 43.0, 32.0, 15.0, 15.0, 12.0, 8.0, 7.0, 6.0, 3.0, 2.0, 2.0, 2.0, 2.0, 1.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.3125, -0.30255889892578125, -0.2926177978515625, -0.28267669677734375, -0.272735595703125, -0.26279449462890625, -0.2528533935546875, -0.24291229248046875, -0.23297119140625, -0.22303009033203125, -0.2130889892578125, -0.20314788818359375, -0.193206787109375, -0.18326568603515625, -0.1733245849609375, -0.16338348388671875, -0.1534423828125, -0.14350128173828125, -0.1335601806640625, -0.12361907958984375, -0.113677978515625, -0.10373687744140625, -0.0937957763671875, -0.08385467529296875, -0.07391357421875, -0.06397247314453125, -0.0540313720703125, -0.04409027099609375, -0.034149169921875, -0.02420806884765625, -0.0142669677734375, -0.00432586669921875, 0.005615234375, 0.01555633544921875, 0.0254974365234375, 0.03543853759765625, 0.045379638671875, 0.05532073974609375, 0.0652618408203125, 0.07520294189453125, 0.08514404296875, 0.09508514404296875, 0.1050262451171875, 0.11496734619140625, 0.124908447265625, 0.13484954833984375, 0.1447906494140625, 0.15473175048828125, 0.1646728515625, 0.17461395263671875, 0.1845550537109375, 0.19449615478515625, 0.204437255859375, 0.21437835693359375, 0.2243194580078125, 0.23426055908203125, 0.24420166015625, 0.25414276123046875, 0.2640838623046875, 0.27402496337890625, 0.283966064453125, 0.29390716552734375, 0.3038482666015625, 0.31378936767578125, 0.32373046875]}, "gradients/encoder.encoder.layers.18.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0, 2.0, 14.0, 39.0, 192.0, 437.0, 269.0, 44.0, 8.0, 3.0, 2.0, 1.0, 3.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.591681480407715, -4.488351821899414, -4.385021686553955, -4.281692028045654, -4.1783623695373535, -4.075032711029053, -3.9717025756835938, -3.868372917175293, -3.765043258666992, -3.6617133617401123, -3.5583837032318115, -3.4550538063049316, -3.351724147796631, -3.248394250869751, -3.145064353942871, -3.0417346954345703, -2.9384047985076904, -2.8350749015808105, -2.7317452430725098, -2.62841534614563, -2.525085687637329, -2.421755790710449, -2.3184261322021484, -2.2150962352752686, -2.1117663383483887, -2.008436441421509, -1.905106782913208, -1.8017768859863281, -1.6984472274780273, -1.5951173305511475, -1.4917875528335571, -1.3884577751159668, -1.285127878189087, -1.1817981004714966, -1.0784683227539062, -0.9751384854316711, -0.8718087077140808, -0.7684789299964905, -0.6651490926742554, -0.561819314956665, -0.4584895372390747, -0.3551597595214844, -0.25182995200157166, -0.14850014448165894, -0.045170366764068604, 0.05815941095352173, 0.16148924827575684, 0.26481902599334717, 0.3681488037109375, 0.47147858142852783, 0.5748083591461182, 0.6781381964683533, 0.7814679741859436, 0.8847977519035339, 0.988127589225769, 1.0914573669433594, 1.1947871446609497, 1.29811692237854, 1.4014467000961304, 1.5047764778137207, 1.6081063747406006, 1.7114360332489014, 1.8147659301757812, 1.9180957078933716, 2.021425485610962]}, "gradients/encoder.encoder.layers.18.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 5.0, 0.0, 1.0, 4.0, 3.0, 1.0, 6.0, 8.0, 6.0, 11.0, 14.0, 14.0, 17.0, 14.0, 24.0, 37.0, 35.0, 42.0, 46.0, 41.0, 43.0, 44.0, 60.0, 60.0, 52.0, 59.0, 44.0, 41.0, 44.0, 44.0, 27.0, 31.0, 29.0, 20.0, 22.0, 16.0, 13.0, 9.0, 9.0, 7.0, 6.0, 4.0, 3.0, 1.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-1.089052438735962, -1.0584359169006348, -1.0278193950653076, -0.9972027540206909, -0.9665862321853638, -0.9359697103500366, -0.9053531289100647, -0.8747365474700928, -0.8441200256347656, -0.8135035037994385, -0.7828869223594666, -0.7522703409194946, -0.7216538190841675, -0.6910372972488403, -0.6604207158088684, -0.6298041343688965, -0.5991876125335693, -0.5685710906982422, -0.5379545092582703, -0.5073379278182983, -0.4767214059829712, -0.44610485434532166, -0.4154883027076721, -0.3848717510700226, -0.35425519943237305, -0.3236386477947235, -0.293022096157074, -0.26240554451942444, -0.2317889928817749, -0.20117244124412537, -0.17055588960647583, -0.1399393379688263, -0.10932278633117676, -0.07870623469352722, -0.048089683055877686, -0.01747313141822815, 0.013143420219421387, 0.04375997185707092, 0.07437652349472046, 0.10499307513237, 0.13560962677001953, 0.16622617840766907, 0.1968427300453186, 0.22745928168296814, 0.2580758333206177, 0.2886923849582672, 0.31930893659591675, 0.3499254882335663, 0.3805420398712158, 0.41115859150886536, 0.4417751431465149, 0.47239169478416443, 0.503008246421814, 0.5336247682571411, 0.564241349697113, 0.594857931137085, 0.6254744529724121, 0.6560909748077393, 0.6867075562477112, 0.7173241376876831, 0.7479406595230103, 0.7785571813583374, 0.8091737627983093, 0.8397903442382812, 0.8704068660736084]}, "gradients/encoder.encoder.layers.18.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 4.0, 1.0, 2.0, 5.0, 5.0, 8.0, 6.0, 28.0, 35.0, 31.0, 56.0, 104.0, 191.0, 319.0, 646.0, 1487.0, 4737.0, 28402.0, 460831.0, 512121.0, 31361.0, 5121.0, 1580.0, 662.0, 331.0, 177.0, 114.0, 68.0, 40.0, 33.0, 18.0, 11.0, 10.0, 4.0, 5.0, 3.0, 1.0, 3.0, 4.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0], "bins": [-1.923828125, -1.868896484375, -1.81396484375, -1.759033203125, -1.7041015625, -1.649169921875, -1.59423828125, -1.539306640625, -1.484375, -1.429443359375, -1.37451171875, -1.319580078125, -1.2646484375, -1.209716796875, -1.15478515625, -1.099853515625, -1.044921875, -0.989990234375, -0.93505859375, -0.880126953125, -0.8251953125, -0.770263671875, -0.71533203125, -0.660400390625, -0.60546875, -0.550537109375, -0.49560546875, -0.440673828125, -0.3857421875, -0.330810546875, -0.27587890625, -0.220947265625, -0.166015625, -0.111083984375, -0.05615234375, -0.001220703125, 0.0537109375, 0.108642578125, 0.16357421875, 0.218505859375, 0.2734375, 0.328369140625, 0.38330078125, 0.438232421875, 0.4931640625, 0.548095703125, 0.60302734375, 0.657958984375, 0.712890625, 0.767822265625, 0.82275390625, 0.877685546875, 0.9326171875, 0.987548828125, 1.04248046875, 1.097412109375, 1.15234375, 1.207275390625, 1.26220703125, 1.317138671875, 1.3720703125, 1.427001953125, 1.48193359375, 1.536865234375, 1.591796875]}, "gradients/encoder.encoder.layers.18.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 1.0, 1.0, 0.0, 1.0, 2.0, 4.0, 4.0, 8.0, 24.0, 24.0, 60.0, 84.0, 132.0, 139.0, 162.0, 119.0, 96.0, 71.0, 41.0, 17.0, 7.0, 7.0, 5.0, 1.0, 0.0, 1.0, 3.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.487060546875, -0.4697456359863281, -0.45243072509765625, -0.4351158142089844, -0.4178009033203125, -0.4004859924316406, -0.38317108154296875, -0.3658561706542969, -0.348541259765625, -0.3312263488769531, -0.31391143798828125, -0.2965965270996094, -0.2792816162109375, -0.2619667053222656, -0.24465179443359375, -0.22733688354492188, -0.21002197265625, -0.19270706176757812, -0.17539215087890625, -0.15807723999023438, -0.1407623291015625, -0.12344741821289062, -0.10613250732421875, -0.08881759643554688, -0.071502685546875, -0.054187774658203125, -0.03687286376953125, -0.019557952880859375, -0.0022430419921875, 0.015071868896484375, 0.03238677978515625, 0.049701690673828125, 0.0670166015625, 0.08433151245117188, 0.10164642333984375, 0.11896133422851562, 0.1362762451171875, 0.15359115600585938, 0.17090606689453125, 0.18822097778320312, 0.205535888671875, 0.22285079956054688, 0.24016571044921875, 0.2574806213378906, 0.2747955322265625, 0.2921104431152344, 0.30942535400390625, 0.3267402648925781, 0.34405517578125, 0.3613700866699219, 0.37868499755859375, 0.3959999084472656, 0.4133148193359375, 0.4306297302246094, 0.44794464111328125, 0.4652595520019531, 0.482574462890625, 0.4998893737792969, 0.5172042846679688, 0.5345191955566406, 0.5518341064453125, 0.5691490173339844, 0.5864639282226562, 0.6037788391113281, 0.62109375]}, "gradients/encoder.encoder.layers.18.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 1.0, 2.0, 0.0, 0.0, 3.0, 4.0, 5.0, 9.0, 3.0, 15.0, 5.0, 16.0, 16.0, 26.0, 30.0, 55.0, 66.0, 107.0, 140.0, 221.0, 339.0, 549.0, 908.0, 1956.0, 4961.0, 17286.0, 85799.0, 460706.0, 387037.0, 65898.0, 14031.0, 4234.0, 1620.0, 883.0, 521.0, 343.0, 213.0, 171.0, 113.0, 78.0, 67.0, 30.0, 26.0, 21.0, 14.0, 10.0, 11.0, 7.0, 6.0, 0.0, 3.0, 2.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0, 1.0], "bins": [-0.98046875, -0.9497604370117188, -0.9190521240234375, -0.8883438110351562, -0.857635498046875, -0.8269271850585938, -0.7962188720703125, -0.7655105590820312, -0.73480224609375, -0.7040939331054688, -0.6733856201171875, -0.6426773071289062, -0.611968994140625, -0.5812606811523438, -0.5505523681640625, -0.5198440551757812, -0.4891357421875, -0.45842742919921875, -0.4277191162109375, -0.39701080322265625, -0.366302490234375, -0.33559417724609375, -0.3048858642578125, -0.27417755126953125, -0.24346923828125, -0.21276092529296875, -0.1820526123046875, -0.15134429931640625, -0.120635986328125, -0.08992767333984375, -0.0592193603515625, -0.02851104736328125, 0.002197265625, 0.03290557861328125, 0.0636138916015625, 0.09432220458984375, 0.125030517578125, 0.15573883056640625, 0.1864471435546875, 0.21715545654296875, 0.24786376953125, 0.27857208251953125, 0.3092803955078125, 0.33998870849609375, 0.370697021484375, 0.40140533447265625, 0.4321136474609375, 0.46282196044921875, 0.4935302734375, 0.5242385864257812, 0.5549468994140625, 0.5856552124023438, 0.616363525390625, 0.6470718383789062, 0.6777801513671875, 0.7084884643554688, 0.73919677734375, 0.7699050903320312, 0.8006134033203125, 0.8313217163085938, 0.862030029296875, 0.8927383422851562, 0.9234466552734375, 0.9541549682617188, 0.98486328125]}, "gradients/encoder.encoder.layers.18.attention.v_proj.bias": {"_type": "histogram", "values": [4.0, 0.0, 0.0, 2.0, 5.0, 2.0, 2.0, 6.0, 3.0, 4.0, 5.0, 10.0, 6.0, 17.0, 11.0, 13.0, 8.0, 18.0, 25.0, 30.0, 32.0, 33.0, 37.0, 46.0, 50.0, 38.0, 40.0, 50.0, 55.0, 40.0, 48.0, 40.0, 35.0, 38.0, 36.0, 25.0, 26.0, 22.0, 19.0, 15.0, 25.0, 20.0, 17.0, 7.0, 11.0, 8.0, 4.0, 9.0, 6.0, 8.0, 4.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.88818359375, -0.8559799194335938, -0.8237762451171875, -0.7915725708007812, -0.759368896484375, -0.7271652221679688, -0.6949615478515625, -0.6627578735351562, -0.63055419921875, -0.5983505249023438, -0.5661468505859375, -0.5339431762695312, -0.501739501953125, -0.46953582763671875, -0.4373321533203125, -0.40512847900390625, -0.3729248046875, -0.34072113037109375, -0.3085174560546875, -0.27631378173828125, -0.244110107421875, -0.21190643310546875, -0.1797027587890625, -0.14749908447265625, -0.11529541015625, -0.08309173583984375, -0.0508880615234375, -0.01868438720703125, 0.013519287109375, 0.04572296142578125, 0.0779266357421875, 0.11013031005859375, 0.142333984375, 0.17453765869140625, 0.2067413330078125, 0.23894500732421875, 0.271148681640625, 0.30335235595703125, 0.3355560302734375, 0.36775970458984375, 0.39996337890625, 0.43216705322265625, 0.4643707275390625, 0.49657440185546875, 0.528778076171875, 0.5609817504882812, 0.5931854248046875, 0.6253890991210938, 0.6575927734375, 0.6897964477539062, 0.7220001220703125, 0.7542037963867188, 0.786407470703125, 0.8186111450195312, 0.8508148193359375, 0.8830184936523438, 0.91522216796875, 0.9474258422851562, 0.9796295166015625, 1.0118331909179688, 1.044036865234375, 1.0762405395507812, 1.1084442138671875, 1.1406478881835938, 1.1728515625]}, "gradients/encoder.encoder.layers.18.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 2.0, 0.0, 1.0, 2.0, 2.0, 2.0, 1.0, 1.0, 1.0, 2.0, 5.0, 9.0, 15.0, 10.0, 20.0, 34.0, 50.0, 82.0, 154.0, 337.0, 902.0, 3412.0, 33519.0, 874066.0, 127379.0, 6265.0, 1288.0, 489.0, 220.0, 120.0, 62.0, 36.0, 28.0, 16.0, 11.0, 8.0, 5.0, 2.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.60107421875, -0.5846824645996094, -0.5682907104492188, -0.5518989562988281, -0.5355072021484375, -0.5191154479980469, -0.5027236938476562, -0.4863319396972656, -0.469940185546875, -0.4535484313964844, -0.43715667724609375, -0.4207649230957031, -0.4043731689453125, -0.3879814147949219, -0.37158966064453125, -0.3551979064941406, -0.33880615234375, -0.3224143981933594, -0.30602264404296875, -0.2896308898925781, -0.2732391357421875, -0.2568473815917969, -0.24045562744140625, -0.22406387329101562, -0.207672119140625, -0.19128036499023438, -0.17488861083984375, -0.15849685668945312, -0.1421051025390625, -0.12571334838867188, -0.10932159423828125, -0.09292984008789062, -0.0765380859375, -0.060146331787109375, -0.04375457763671875, -0.027362823486328125, -0.0109710693359375, 0.005420684814453125, 0.02181243896484375, 0.038204193115234375, 0.054595947265625, 0.07098770141601562, 0.08737945556640625, 0.10377120971679688, 0.1201629638671875, 0.13655471801757812, 0.15294647216796875, 0.16933822631835938, 0.18572998046875, 0.20212173461914062, 0.21851348876953125, 0.23490524291992188, 0.2512969970703125, 0.2676887512207031, 0.28408050537109375, 0.3004722595214844, 0.316864013671875, 0.3332557678222656, 0.34964752197265625, 0.3660392761230469, 0.3824310302734375, 0.3988227844238281, 0.41521453857421875, 0.4316062927246094, 0.447998046875]}, "gradients/encoder.encoder.layers.18.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 4.0, 1.0, 3.0, 1.0, 2.0, 4.0, 8.0, 6.0, 15.0, 6.0, 16.0, 21.0, 25.0, 37.0, 48.0, 64.0, 142.0, 166.0, 143.0, 85.0, 56.0, 38.0, 25.0, 21.0, 18.0, 13.0, 8.0, 9.0, 3.0, 3.0, 5.0, 2.0, 5.0, 1.0, 4.0, 3.0, 0.0, 0.0, 2.0, 2.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.00014090538024902344, -0.0001371670514345169, -0.00013342872262001038, -0.00012969039380550385, -0.00012595206499099731, -0.00012221373617649078, -0.00011847540736198425, -0.00011473707854747772, -0.00011099874973297119, -0.00010726042091846466, -0.00010352209210395813, -9.97837632894516e-05, -9.604543447494507e-05, -9.230710566043854e-05, -8.856877684593201e-05, -8.483044803142548e-05, -8.109211921691895e-05, -7.735379040241241e-05, -7.361546158790588e-05, -6.987713277339935e-05, -6.613880395889282e-05, -6.240047514438629e-05, -5.866214632987976e-05, -5.492381751537323e-05, -5.11854887008667e-05, -4.744715988636017e-05, -4.370883107185364e-05, -3.997050225734711e-05, -3.6232173442840576e-05, -3.2493844628334045e-05, -2.8755515813827515e-05, -2.5017186999320984e-05, -2.1278858184814453e-05, -1.7540529370307922e-05, -1.3802200555801392e-05, -1.006387174129486e-05, -6.32554292678833e-06, -2.5872141122817993e-06, 1.1511147022247314e-06, 4.889443516731262e-06, 8.627772331237793e-06, 1.2366101145744324e-05, 1.6104429960250854e-05, 1.9842758774757385e-05, 2.3581087589263916e-05, 2.7319416403770447e-05, 3.105774521827698e-05, 3.479607403278351e-05, 3.853440284729004e-05, 4.227273166179657e-05, 4.60110604763031e-05, 4.974938929080963e-05, 5.348771810531616e-05, 5.722604691982269e-05, 6.0964375734329224e-05, 6.470270454883575e-05, 6.844103336334229e-05, 7.217936217784882e-05, 7.591769099235535e-05, 7.965601980686188e-05, 8.339434862136841e-05, 8.713267743587494e-05, 9.087100625038147e-05, 9.4609335064888e-05, 9.834766387939453e-05]}, "gradients/encoder.encoder.layers.18.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 3.0, 4.0, 4.0, 6.0, 18.0, 16.0, 29.0, 34.0, 89.0, 186.0, 479.0, 1630.0, 9739.0, 502863.0, 520804.0, 10224.0, 1534.0, 492.0, 205.0, 75.0, 39.0, 40.0, 18.0, 12.0, 7.0, 8.0, 4.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.7138671875, -0.6922378540039062, -0.6706085205078125, -0.6489791870117188, -0.627349853515625, -0.6057205200195312, -0.5840911865234375, -0.5624618530273438, -0.54083251953125, -0.5192031860351562, -0.4975738525390625, -0.47594451904296875, -0.454315185546875, -0.43268585205078125, -0.4110565185546875, -0.38942718505859375, -0.3677978515625, -0.34616851806640625, -0.3245391845703125, -0.30290985107421875, -0.281280517578125, -0.25965118408203125, -0.2380218505859375, -0.21639251708984375, -0.19476318359375, -0.17313385009765625, -0.1515045166015625, -0.12987518310546875, -0.108245849609375, -0.08661651611328125, -0.0649871826171875, -0.04335784912109375, -0.021728515625, -9.918212890625e-05, 0.0215301513671875, 0.04315948486328125, 0.064788818359375, 0.08641815185546875, 0.1080474853515625, 0.12967681884765625, 0.15130615234375, 0.17293548583984375, 0.1945648193359375, 0.21619415283203125, 0.237823486328125, 0.25945281982421875, 0.2810821533203125, 0.30271148681640625, 0.3243408203125, 0.34597015380859375, 0.3675994873046875, 0.38922882080078125, 0.410858154296875, 0.43248748779296875, 0.4541168212890625, 0.47574615478515625, 0.49737548828125, 0.5190048217773438, 0.5406341552734375, 0.5622634887695312, 0.583892822265625, 0.6055221557617188, 0.6271514892578125, 0.6487808227539062, 0.67041015625]}, "gradients/encoder.encoder.layers.18.attention.q_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 3.0, 1.0, 0.0, 1.0, 2.0, 1.0, 7.0, 6.0, 8.0, 13.0, 30.0, 69.0, 110.0, 130.0, 193.0, 149.0, 126.0, 74.0, 35.0, 14.0, 12.0, 13.0, 5.0, 2.0, 2.0, 3.0, 3.0, 4.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.63037109375, -0.6136627197265625, -0.596954345703125, -0.5802459716796875, -0.56353759765625, -0.5468292236328125, -0.530120849609375, -0.5134124755859375, -0.4967041015625, -0.4799957275390625, -0.463287353515625, -0.4465789794921875, -0.42987060546875, -0.4131622314453125, -0.396453857421875, -0.3797454833984375, -0.363037109375, -0.3463287353515625, -0.329620361328125, -0.3129119873046875, -0.29620361328125, -0.2794952392578125, -0.262786865234375, -0.2460784912109375, -0.2293701171875, -0.2126617431640625, -0.195953369140625, -0.1792449951171875, -0.16253662109375, -0.1458282470703125, -0.129119873046875, -0.1124114990234375, -0.095703125, -0.0789947509765625, -0.062286376953125, -0.0455780029296875, -0.02886962890625, -0.0121612548828125, 0.004547119140625, 0.0212554931640625, 0.0379638671875, 0.0546722412109375, 0.071380615234375, 0.0880889892578125, 0.10479736328125, 0.1215057373046875, 0.138214111328125, 0.1549224853515625, 0.171630859375, 0.1883392333984375, 0.205047607421875, 0.2217559814453125, 0.23846435546875, 0.2551727294921875, 0.271881103515625, 0.2885894775390625, 0.3052978515625, 0.3220062255859375, 0.338714599609375, 0.3554229736328125, 0.37213134765625, 0.3888397216796875, 0.405548095703125, 0.4222564697265625, 0.43896484375]}, "gradients/encoder.encoder.layers.18.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 3.0, 1.0, 6.0, 5.0, 7.0, 19.0, 38.0, 47.0, 95.0, 155.0, 205.0, 174.0, 128.0, 49.0, 34.0, 14.0, 14.0, 10.0, 5.0, 2.0, 2.0, 0.0, 2.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-5.40093469619751, -5.198666572570801, -4.996398448944092, -4.794130325317383, -4.591862201690674, -4.389594078063965, -4.187325954437256, -3.985057830810547, -3.782789707183838, -3.580521583557129, -3.37825345993042, -3.175985336303711, -2.973717212677002, -2.771449089050293, -2.569180965423584, -2.366912841796875, -2.164644718170166, -1.962376594543457, -1.760108470916748, -1.557840347290039, -1.35557222366333, -1.153304100036621, -0.9510359764099121, -0.7487678527832031, -0.5464997291564941, -0.34423160552978516, -0.14196348190307617, 0.06030464172363281, 0.2625727653503418, 0.4648408889770508, 0.6671090126037598, 0.8693771362304688, 1.071645736694336, 1.273913860321045, 1.476181983947754, 1.678450107574463, 1.8807182312011719, 2.082986354827881, 2.28525447845459, 2.487522602081299, 2.689790725708008, 2.892058849334717, 3.094326972961426, 3.2965950965881348, 3.4988632202148438, 3.7011313438415527, 3.9033994674682617, 4.105667591094971, 4.30793571472168, 4.510203838348389, 4.712471961975098, 4.914740085601807, 5.117008209228516, 5.319276332855225, 5.521544456481934, 5.723812580108643, 5.926080703735352, 6.1283488273620605, 6.3306169509887695, 6.5328850746154785, 6.7351531982421875, 6.9374213218688965, 7.1396894454956055, 7.3419575691223145, 7.544225692749023]}, "gradients/encoder.encoder.layers.18.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 1.0, 2.0, 0.0, 1.0, 2.0, 7.0, 5.0, 6.0, 13.0, 17.0, 16.0, 26.0, 21.0, 45.0, 25.0, 58.0, 56.0, 56.0, 66.0, 79.0, 62.0, 69.0, 47.0, 60.0, 59.0, 48.0, 29.0, 27.0, 25.0, 15.0, 20.0, 12.0, 7.0, 5.0, 6.0, 3.0, 3.0, 3.0, 5.0, 3.0, 0.0, 2.0, 1.0, 3.0, 0.0, 1.0, 1.0, 0.0, 2.0], "bins": [-8.962808609008789, -8.708889961242676, -8.454971313476562, -8.20105266571045, -7.947134494781494, -7.693215847015381, -7.439297676086426, -7.1853790283203125, -6.931460380554199, -6.677541732788086, -6.423623085021973, -6.169704914093018, -5.915786266326904, -5.661867618560791, -5.407949447631836, -5.154030799865723, -4.900112152099609, -4.646193504333496, -4.392274856567383, -4.138356685638428, -3.8844380378723145, -3.630519390106201, -3.376600980758667, -3.122682571411133, -2.8687639236450195, -2.6148452758789062, -2.360926866531372, -2.107008457183838, -1.8530898094177246, -1.5991712808609009, -1.3452527523040771, -1.0913342237472534, -0.8374161720275879, -0.5834976434707642, -0.32957911491394043, -0.0756605863571167, 0.17825794219970703, 0.43217647075653076, 0.6860949993133545, 0.9400135278701782, 1.193932056427002, 1.4478505849838257, 1.7017691135406494, 1.9556876420974731, 2.209606170654297, 2.46352481842041, 2.7174432277679443, 2.9713616371154785, 3.225280284881592, 3.479198932647705, 3.7331173419952393, 3.9870357513427734, 4.240954399108887, 4.494873046875, 4.748791694641113, 5.002709865570068, 5.256628513336182, 5.510547161102295, 5.76446533203125, 6.018383979797363, 6.272302627563477, 6.52622127532959, 6.780139923095703, 7.034058094024658, 7.2879767417907715]}, "gradients/encoder.encoder.layers.17.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 4.0, 4.0, 10.0, 11.0, 18.0, 30.0, 33.0, 61.0, 100.0, 148.0, 289.0, 657.0, 1492.0, 4789.0, 25431.0, 2687333.0, 1441119.0, 24294.0, 4948.0, 1753.0, 767.0, 375.0, 228.0, 136.0, 70.0, 49.0, 42.0, 28.0, 20.0, 9.0, 7.0, 6.0, 14.0, 1.0, 4.0, 4.0, 3.0, 2.0, 0.0, 2.0, 3.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.669921875, -1.6080322265625, -1.546142578125, -1.4842529296875, -1.42236328125, -1.3604736328125, -1.298583984375, -1.2366943359375, -1.1748046875, -1.1129150390625, -1.051025390625, -0.9891357421875, -0.92724609375, -0.8653564453125, -0.803466796875, -0.7415771484375, -0.6796875, -0.6177978515625, -0.555908203125, -0.4940185546875, -0.43212890625, -0.3702392578125, -0.308349609375, -0.2464599609375, -0.1845703125, -0.1226806640625, -0.060791015625, 0.0010986328125, 0.06298828125, 0.1248779296875, 0.186767578125, 0.2486572265625, 0.310546875, 0.3724365234375, 0.434326171875, 0.4962158203125, 0.55810546875, 0.6199951171875, 0.681884765625, 0.7437744140625, 0.8056640625, 0.8675537109375, 0.929443359375, 0.9913330078125, 1.05322265625, 1.1151123046875, 1.177001953125, 1.2388916015625, 1.30078125, 1.3626708984375, 1.424560546875, 1.4864501953125, 1.54833984375, 1.6102294921875, 1.672119140625, 1.7340087890625, 1.7958984375, 1.8577880859375, 1.919677734375, 1.9815673828125, 2.04345703125, 2.1053466796875, 2.167236328125, 2.2291259765625, 2.291015625]}, "gradients/encoder.encoder.layers.17.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 2.0, 1.0, 1.0, 2.0, 3.0, 4.0, 7.0, 15.0, 32.0, 41.0, 88.0, 90.0, 114.0, 136.0, 138.0, 126.0, 75.0, 57.0, 38.0, 13.0, 10.0, 9.0, 1.0, 2.0, 5.0, 3.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.50146484375, -0.48340606689453125, -0.4653472900390625, -0.44728851318359375, -0.429229736328125, -0.41117095947265625, -0.3931121826171875, -0.37505340576171875, -0.35699462890625, -0.33893585205078125, -0.3208770751953125, -0.30281829833984375, -0.284759521484375, -0.26670074462890625, -0.2486419677734375, -0.23058319091796875, -0.2125244140625, -0.19446563720703125, -0.1764068603515625, -0.15834808349609375, -0.140289306640625, -0.12223052978515625, -0.1041717529296875, -0.08611297607421875, -0.06805419921875, -0.04999542236328125, -0.0319366455078125, -0.01387786865234375, 0.004180908203125, 0.02223968505859375, 0.0402984619140625, 0.05835723876953125, 0.076416015625, 0.09447479248046875, 0.1125335693359375, 0.13059234619140625, 0.148651123046875, 0.16670989990234375, 0.1847686767578125, 0.20282745361328125, 0.22088623046875, 0.23894500732421875, 0.2570037841796875, 0.27506256103515625, 0.293121337890625, 0.31118011474609375, 0.3292388916015625, 0.34729766845703125, 0.3653564453125, 0.38341522216796875, 0.4014739990234375, 0.41953277587890625, 0.437591552734375, 0.45565032958984375, 0.4737091064453125, 0.49176788330078125, 0.50982666015625, 0.5278854370117188, 0.5459442138671875, 0.5640029907226562, 0.582061767578125, 0.6001205444335938, 0.6181793212890625, 0.6362380981445312, 0.654296875]}, "gradients/encoder.encoder.layers.17.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 5.0, 2.0, 2.0, 4.0, 12.0, 26.0, 47.0, 93.0, 177.0, 410.0, 1040.0, 5012.0, 182848.0, 3988846.0, 12714.0, 1866.0, 646.0, 265.0, 115.0, 69.0, 30.0, 18.0, 11.0, 6.0, 6.0, 7.0, 5.0, 2.0, 1.0, 2.0, 2.0, 0.0, 2.0, 0.0, 0.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.30078125, -2.212921142578125, -2.12506103515625, -2.037200927734375, -1.9493408203125, -1.861480712890625, -1.77362060546875, -1.685760498046875, -1.597900390625, -1.510040283203125, -1.42218017578125, -1.334320068359375, -1.2464599609375, -1.158599853515625, -1.07073974609375, -0.982879638671875, -0.89501953125, -0.807159423828125, -0.71929931640625, -0.631439208984375, -0.5435791015625, -0.455718994140625, -0.36785888671875, -0.279998779296875, -0.192138671875, -0.104278564453125, -0.01641845703125, 0.071441650390625, 0.1593017578125, 0.247161865234375, 0.33502197265625, 0.422882080078125, 0.5107421875, 0.598602294921875, 0.68646240234375, 0.774322509765625, 0.8621826171875, 0.950042724609375, 1.03790283203125, 1.125762939453125, 1.213623046875, 1.301483154296875, 1.38934326171875, 1.477203369140625, 1.5650634765625, 1.652923583984375, 1.74078369140625, 1.828643798828125, 1.91650390625, 2.004364013671875, 2.09222412109375, 2.180084228515625, 2.2679443359375, 2.355804443359375, 2.44366455078125, 2.531524658203125, 2.619384765625, 2.707244873046875, 2.79510498046875, 2.882965087890625, 2.9708251953125, 3.058685302734375, 3.14654541015625, 3.234405517578125, 3.322265625]}, "gradients/encoder.encoder.layers.17.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 3.0, 4.0, 5.0, 8.0, 10.0, 18.0, 28.0, 57.0, 188.0, 1219.0, 2092.0, 294.0, 70.0, 31.0, 26.0, 10.0, 9.0, 4.0, 3.0, 1.0, 2.0, 1.0, 1.0, 0.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.98583984375, -0.9541549682617188, -0.9224700927734375, -0.8907852172851562, -0.859100341796875, -0.8274154663085938, -0.7957305908203125, -0.7640457153320312, -0.73236083984375, -0.7006759643554688, -0.6689910888671875, -0.6373062133789062, -0.605621337890625, -0.5739364624023438, -0.5422515869140625, -0.5105667114257812, -0.4788818359375, -0.44719696044921875, -0.4155120849609375, -0.38382720947265625, -0.352142333984375, -0.32045745849609375, -0.2887725830078125, -0.25708770751953125, -0.22540283203125, -0.19371795654296875, -0.1620330810546875, -0.13034820556640625, -0.098663330078125, -0.06697845458984375, -0.0352935791015625, -0.00360870361328125, 0.028076171875, 0.05976104736328125, 0.0914459228515625, 0.12313079833984375, 0.154815673828125, 0.18650054931640625, 0.2181854248046875, 0.24987030029296875, 0.28155517578125, 0.31324005126953125, 0.3449249267578125, 0.37660980224609375, 0.408294677734375, 0.43997955322265625, 0.4716644287109375, 0.5033493041992188, 0.5350341796875, 0.5667190551757812, 0.5984039306640625, 0.6300888061523438, 0.661773681640625, 0.6934585571289062, 0.7251434326171875, 0.7568283081054688, 0.78851318359375, 0.8201980590820312, 0.8518829345703125, 0.8835678100585938, 0.915252685546875, 0.9469375610351562, 0.9786224365234375, 1.0103073120117188, 1.0419921875]}, "gradients/encoder.encoder.layers.17.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 4.0, 2.0, 2.0, 7.0, 3.0, 11.0, 18.0, 57.0, 129.0, 221.0, 250.0, 171.0, 67.0, 32.0, 18.0, 7.0, 5.0, 0.0, 1.0, 2.0, 2.0, 1.0, 1.0, 1.0, 3.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.6571035385131836, -2.562452554702759, -2.467801809310913, -2.3731508255004883, -2.2785000801086426, -2.1838490962982178, -2.089198112487793, -1.9945472478866577, -1.8998963832855225, -1.8052455186843872, -1.710594654083252, -1.6159436702728271, -1.521292805671692, -1.4266419410705566, -1.3319909572601318, -1.2373400926589966, -1.1426892280578613, -1.048038363456726, -0.953387439250946, -0.858736515045166, -0.7640856504440308, -0.6694347858428955, -0.5747838616371155, -0.48013293743133545, -0.3854820728302002, -0.29083117842674255, -0.1961802840232849, -0.10152938961982727, -0.006878495216369629, 0.08777239918708801, 0.18242329359054565, 0.2770742177963257, 0.37172484397888184, 0.4663757383823395, 0.5610266327857971, 0.6556775569915771, 0.7503284215927124, 0.8449792861938477, 0.9396302103996277, 1.0342811346054077, 1.128931999206543, 1.2235828638076782, 1.3182337284088135, 1.4128847122192383, 1.5075355768203735, 1.6021864414215088, 1.6968374252319336, 1.7914882898330688, 1.886139154434204, 1.9807900190353394, 2.0754408836364746, 2.1700918674468994, 2.264742851257324, 2.35939359664917, 2.4540445804595947, 2.5486955642700195, 2.6433463096618652, 2.73799729347229, 2.8326480388641357, 2.9272990226745605, 3.0219497680664062, 3.116600751876831, 3.211251735687256, 3.3059024810791016, 3.4005534648895264]}, "gradients/encoder.encoder.layers.17.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 2.0, 2.0, 2.0, 2.0, 5.0, 5.0, 13.0, 19.0, 27.0, 35.0, 63.0, 85.0, 118.0, 118.0, 152.0, 82.0, 94.0, 74.0, 39.0, 36.0, 15.0, 11.0, 6.0, 3.0, 2.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-3.897249698638916, -3.7748000621795654, -3.652350425720215, -3.5299010276794434, -3.4074513912200928, -3.285001754760742, -3.1625521183013916, -3.040102481842041, -2.9176530838012695, -2.795203447341919, -2.6727538108825684, -2.550304412841797, -2.4278547763824463, -2.3054051399230957, -2.182955503463745, -2.0605058670043945, -1.938056230545044, -1.8156065940856934, -1.6931570768356323, -1.5707074403762817, -1.4482579231262207, -1.3258082866668701, -1.2033586502075195, -1.080909013748169, -0.9584594964981079, -0.8360099196434021, -0.7135603427886963, -0.5911107063293457, -0.4686611294746399, -0.3462115526199341, -0.2237619161605835, -0.10131233930587769, 0.021137237548828125, 0.14358682930469513, 0.26603642106056213, 0.38848602771759033, 0.5109356045722961, 0.633385181427002, 0.7558348178863525, 0.8782843947410583, 1.0007339715957642, 1.1231836080551147, 1.2456331253051758, 1.3680827617645264, 1.490532398223877, 1.612981915473938, 1.7354315519332886, 1.8578810691833496, 1.9803307056427002, 2.102780342102051, 2.2252299785614014, 2.347679615020752, 2.4701290130615234, 2.592578649520874, 2.7150282859802246, 2.837477922439575, 2.959927558898926, 3.0823771953582764, 3.204826831817627, 3.3272762298583984, 3.449725866317749, 3.5721755027770996, 3.69462513923645, 3.817074775695801, 3.9395241737365723]}, "gradients/encoder.encoder.layers.17.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 1.0, 0.0, 2.0, 0.0, 5.0, 11.0, 9.0, 21.0, 25.0, 38.0, 71.0, 132.0, 266.0, 582.0, 1348.0, 3975.0, 26566.0, 683457.0, 313892.0, 13308.0, 2809.0, 1096.0, 460.0, 221.0, 117.0, 69.0, 39.0, 17.0, 12.0, 4.0, 3.0, 3.0, 3.0, 1.0, 2.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0], "bins": [-2.63671875, -2.56707763671875, -2.4974365234375, -2.42779541015625, -2.358154296875, -2.28851318359375, -2.2188720703125, -2.14923095703125, -2.07958984375, -2.00994873046875, -1.9403076171875, -1.87066650390625, -1.801025390625, -1.73138427734375, -1.6617431640625, -1.59210205078125, -1.5224609375, -1.45281982421875, -1.3831787109375, -1.31353759765625, -1.243896484375, -1.17425537109375, -1.1046142578125, -1.03497314453125, -0.96533203125, -0.89569091796875, -0.8260498046875, -0.75640869140625, -0.686767578125, -0.61712646484375, -0.5474853515625, -0.47784423828125, -0.408203125, -0.33856201171875, -0.2689208984375, -0.19927978515625, -0.129638671875, -0.05999755859375, 0.0096435546875, 0.07928466796875, 0.14892578125, 0.21856689453125, 0.2882080078125, 0.35784912109375, 0.427490234375, 0.49713134765625, 0.5667724609375, 0.63641357421875, 0.7060546875, 0.77569580078125, 0.8453369140625, 0.91497802734375, 0.984619140625, 1.05426025390625, 1.1239013671875, 1.19354248046875, 1.26318359375, 1.33282470703125, 1.4024658203125, 1.47210693359375, 1.541748046875, 1.61138916015625, 1.6810302734375, 1.75067138671875, 1.8203125]}, "gradients/encoder.encoder.layers.17.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 3.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 3.0, 1.0, 4.0, 9.0, 21.0, 37.0, 53.0, 84.0, 103.0, 119.0, 142.0, 129.0, 113.0, 71.0, 53.0, 27.0, 10.0, 7.0, 6.0, 3.0, 5.0, 1.0, 3.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.474853515625, -0.4575538635253906, -0.44025421142578125, -0.4229545593261719, -0.4056549072265625, -0.3883552551269531, -0.37105560302734375, -0.3537559509277344, -0.336456298828125, -0.3191566467285156, -0.30185699462890625, -0.2845573425292969, -0.2672576904296875, -0.24995803833007812, -0.23265838623046875, -0.21535873413085938, -0.19805908203125, -0.18075942993164062, -0.16345977783203125, -0.14616012573242188, -0.1288604736328125, -0.11156082153320312, -0.09426116943359375, -0.07696151733398438, -0.059661865234375, -0.042362213134765625, -0.02506256103515625, -0.007762908935546875, 0.0095367431640625, 0.026836395263671875, 0.04413604736328125, 0.061435699462890625, 0.0787353515625, 0.09603500366210938, 0.11333465576171875, 0.13063430786132812, 0.1479339599609375, 0.16523361206054688, 0.18253326416015625, 0.19983291625976562, 0.217132568359375, 0.23443222045898438, 0.25173187255859375, 0.2690315246582031, 0.2863311767578125, 0.3036308288574219, 0.32093048095703125, 0.3382301330566406, 0.35552978515625, 0.3728294372558594, 0.39012908935546875, 0.4074287414550781, 0.4247283935546875, 0.4420280456542969, 0.45932769775390625, 0.4766273498535156, 0.493927001953125, 0.5112266540527344, 0.5285263061523438, 0.5458259582519531, 0.5631256103515625, 0.5804252624511719, 0.5977249145507812, 0.6150245666503906, 0.63232421875]}, "gradients/encoder.encoder.layers.17.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 5.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 7.0, 6.0, 3.0, 6.0, 13.0, 9.0, 14.0, 18.0, 34.0, 55.0, 79.0, 124.0, 224.0, 483.0, 1054.0, 3060.0, 14219.0, 131048.0, 714665.0, 161128.0, 16537.0, 3424.0, 1168.0, 501.0, 246.0, 145.0, 91.0, 49.0, 45.0, 27.0, 14.0, 8.0, 12.0, 8.0, 9.0, 2.0, 7.0, 6.0, 3.0, 1.0, 5.0, 2.0, 1.0, 0.0, 0.0, 3.0], "bins": [-1.658203125, -1.612823486328125, -1.56744384765625, -1.522064208984375, -1.4766845703125, -1.431304931640625, -1.38592529296875, -1.340545654296875, -1.295166015625, -1.249786376953125, -1.20440673828125, -1.159027099609375, -1.1136474609375, -1.068267822265625, -1.02288818359375, -0.977508544921875, -0.93212890625, -0.886749267578125, -0.84136962890625, -0.795989990234375, -0.7506103515625, -0.705230712890625, -0.65985107421875, -0.614471435546875, -0.569091796875, -0.523712158203125, -0.47833251953125, -0.432952880859375, -0.3875732421875, -0.342193603515625, -0.29681396484375, -0.251434326171875, -0.2060546875, -0.160675048828125, -0.11529541015625, -0.069915771484375, -0.0245361328125, 0.020843505859375, 0.06622314453125, 0.111602783203125, 0.156982421875, 0.202362060546875, 0.24774169921875, 0.293121337890625, 0.3385009765625, 0.383880615234375, 0.42926025390625, 0.474639892578125, 0.52001953125, 0.565399169921875, 0.61077880859375, 0.656158447265625, 0.7015380859375, 0.746917724609375, 0.79229736328125, 0.837677001953125, 0.883056640625, 0.928436279296875, 0.97381591796875, 1.019195556640625, 1.0645751953125, 1.109954833984375, 1.15533447265625, 1.200714111328125, 1.24609375]}, "gradients/encoder.encoder.layers.17.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 3.0, 0.0, 2.0, 4.0, 1.0, 7.0, 4.0, 8.0, 10.0, 14.0, 9.0, 28.0, 26.0, 29.0, 33.0, 56.0, 48.0, 51.0, 58.0, 53.0, 53.0, 45.0, 59.0, 57.0, 48.0, 50.0, 47.0, 59.0, 32.0, 23.0, 22.0, 21.0, 10.0, 14.0, 3.0, 6.0, 5.0, 4.0, 3.0, 1.0, 2.0, 3.0, 3.0, 2.0, 1.0], "bins": [-1.8564453125, -1.810150146484375, -1.76385498046875, -1.717559814453125, -1.6712646484375, -1.624969482421875, -1.57867431640625, -1.532379150390625, -1.486083984375, -1.439788818359375, -1.39349365234375, -1.347198486328125, -1.3009033203125, -1.254608154296875, -1.20831298828125, -1.162017822265625, -1.11572265625, -1.069427490234375, -1.02313232421875, -0.976837158203125, -0.9305419921875, -0.884246826171875, -0.83795166015625, -0.791656494140625, -0.745361328125, -0.699066162109375, -0.65277099609375, -0.606475830078125, -0.5601806640625, -0.513885498046875, -0.46759033203125, -0.421295166015625, -0.375, -0.328704833984375, -0.28240966796875, -0.236114501953125, -0.1898193359375, -0.143524169921875, -0.09722900390625, -0.050933837890625, -0.004638671875, 0.041656494140625, 0.08795166015625, 0.134246826171875, 0.1805419921875, 0.226837158203125, 0.27313232421875, 0.319427490234375, 0.36572265625, 0.412017822265625, 0.45831298828125, 0.504608154296875, 0.5509033203125, 0.597198486328125, 0.64349365234375, 0.689788818359375, 0.736083984375, 0.782379150390625, 0.82867431640625, 0.874969482421875, 0.9212646484375, 0.967559814453125, 1.01385498046875, 1.060150146484375, 1.1064453125]}, "gradients/encoder.encoder.layers.17.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 2.0, 0.0, 1.0, 2.0, 2.0, 5.0, 8.0, 11.0, 18.0, 32.0, 74.0, 99.0, 267.0, 785.0, 2666.0, 17493.0, 954147.0, 66408.0, 4600.0, 1206.0, 395.0, 181.0, 67.0, 42.0, 17.0, 9.0, 6.0, 6.0, 5.0, 2.0, 3.0, 4.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-1.337890625, -1.3041763305664062, -1.2704620361328125, -1.2367477416992188, -1.203033447265625, -1.1693191528320312, -1.1356048583984375, -1.1018905639648438, -1.06817626953125, -1.0344619750976562, -1.0007476806640625, -0.9670333862304688, -0.933319091796875, -0.8996047973632812, -0.8658905029296875, -0.8321762084960938, -0.7984619140625, -0.7647476196289062, -0.7310333251953125, -0.6973190307617188, -0.663604736328125, -0.6298904418945312, -0.5961761474609375, -0.5624618530273438, -0.52874755859375, -0.49503326416015625, -0.4613189697265625, -0.42760467529296875, -0.393890380859375, -0.36017608642578125, -0.3264617919921875, -0.29274749755859375, -0.259033203125, -0.22531890869140625, -0.1916046142578125, -0.15789031982421875, -0.124176025390625, -0.09046173095703125, -0.0567474365234375, -0.02303314208984375, 0.01068115234375, 0.04439544677734375, 0.0781097412109375, 0.11182403564453125, 0.145538330078125, 0.17925262451171875, 0.2129669189453125, 0.24668121337890625, 0.2803955078125, 0.31410980224609375, 0.3478240966796875, 0.38153839111328125, 0.415252685546875, 0.44896697998046875, 0.4826812744140625, 0.5163955688476562, 0.55010986328125, 0.5838241577148438, 0.6175384521484375, 0.6512527465820312, 0.684967041015625, 0.7186813354492188, 0.7523956298828125, 0.7861099243164062, 0.81982421875]}, "gradients/encoder.encoder.layers.17.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 1.0, 4.0, 4.0, 5.0, 7.0, 9.0, 14.0, 9.0, 23.0, 16.0, 36.0, 35.0, 73.0, 118.0, 152.0, 164.0, 104.0, 59.0, 32.0, 36.0, 21.0, 16.0, 17.0, 9.0, 12.0, 10.0, 9.0, 4.0, 2.0, 5.0, 3.0, 0.0, 0.0, 2.0, 1.0, 0.0, 2.0], "bins": [-0.00015914440155029297, -0.00015533901751041412, -0.00015153363347053528, -0.00014772824943065643, -0.0001439228653907776, -0.00014011748135089874, -0.0001363120973110199, -0.00013250671327114105, -0.0001287013292312622, -0.00012489594519138336, -0.00012109056115150452, -0.00011728517711162567, -0.00011347979307174683, -0.00010967440903186798, -0.00010586902499198914, -0.00010206364095211029, -9.825825691223145e-05, -9.44528728723526e-05, -9.064748883247375e-05, -8.684210479259491e-05, -8.303672075271606e-05, -7.923133671283722e-05, -7.542595267295837e-05, -7.162056863307953e-05, -6.781518459320068e-05, -6.400980055332184e-05, -6.020441651344299e-05, -5.639903247356415e-05, -5.25936484336853e-05, -4.878826439380646e-05, -4.498288035392761e-05, -4.117749631404877e-05, -3.737211227416992e-05, -3.356672823429108e-05, -2.976134419441223e-05, -2.5955960154533386e-05, -2.215057611465454e-05, -1.8345192074775696e-05, -1.453980803489685e-05, -1.0734423995018005e-05, -6.92903995513916e-06, -3.123655915260315e-06, 6.817281246185303e-07, 4.4871121644973755e-06, 8.29249620437622e-06, 1.2097880244255066e-05, 1.590326428413391e-05, 1.9708648324012756e-05, 2.35140323638916e-05, 2.7319416403770447e-05, 3.112480044364929e-05, 3.493018448352814e-05, 3.873556852340698e-05, 4.254095256328583e-05, 4.634633660316467e-05, 5.015172064304352e-05, 5.395710468292236e-05, 5.776248872280121e-05, 6.156787276268005e-05, 6.53732568025589e-05, 6.917864084243774e-05, 7.298402488231659e-05, 7.678940892219543e-05, 8.059479296207428e-05, 8.440017700195312e-05]}, "gradients/encoder.encoder.layers.17.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 1.0, 4.0, 0.0, 2.0, 2.0, 2.0, 11.0, 18.0, 22.0, 29.0, 56.0, 119.0, 261.0, 643.0, 2321.0, 13394.0, 598927.0, 418458.0, 11243.0, 2075.0, 580.0, 195.0, 95.0, 43.0, 20.0, 13.0, 8.0, 9.0, 8.0, 2.0, 1.0, 3.0, 2.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.5595703125, -0.532867431640625, -0.50616455078125, -0.479461669921875, -0.4527587890625, -0.426055908203125, -0.39935302734375, -0.372650146484375, -0.345947265625, -0.319244384765625, -0.29254150390625, -0.265838623046875, -0.2391357421875, -0.212432861328125, -0.18572998046875, -0.159027099609375, -0.13232421875, -0.105621337890625, -0.07891845703125, -0.052215576171875, -0.0255126953125, 0.001190185546875, 0.02789306640625, 0.054595947265625, 0.081298828125, 0.108001708984375, 0.13470458984375, 0.161407470703125, 0.1881103515625, 0.214813232421875, 0.24151611328125, 0.268218994140625, 0.294921875, 0.321624755859375, 0.34832763671875, 0.375030517578125, 0.4017333984375, 0.428436279296875, 0.45513916015625, 0.481842041015625, 0.508544921875, 0.535247802734375, 0.56195068359375, 0.588653564453125, 0.6153564453125, 0.642059326171875, 0.66876220703125, 0.695465087890625, 0.72216796875, 0.748870849609375, 0.77557373046875, 0.802276611328125, 0.8289794921875, 0.855682373046875, 0.88238525390625, 0.909088134765625, 0.935791015625, 0.962493896484375, 0.98919677734375, 1.015899658203125, 1.0426025390625, 1.069305419921875, 1.09600830078125, 1.122711181640625, 1.1494140625]}, "gradients/encoder.encoder.layers.17.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 2.0, 4.0, 1.0, 3.0, 3.0, 2.0, 3.0, 2.0, 11.0, 15.0, 30.0, 33.0, 51.0, 84.0, 120.0, 155.0, 165.0, 111.0, 69.0, 60.0, 26.0, 14.0, 9.0, 5.0, 5.0, 5.0, 5.0, 5.0, 1.0, 1.0, 2.0, 2.0, 3.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.48291015625, -0.46431732177734375, -0.4457244873046875, -0.42713165283203125, -0.408538818359375, -0.38994598388671875, -0.3713531494140625, -0.35276031494140625, -0.33416748046875, -0.31557464599609375, -0.2969818115234375, -0.27838897705078125, -0.259796142578125, -0.24120330810546875, -0.2226104736328125, -0.20401763916015625, -0.1854248046875, -0.16683197021484375, -0.1482391357421875, -0.12964630126953125, -0.111053466796875, -0.09246063232421875, -0.0738677978515625, -0.05527496337890625, -0.03668212890625, -0.01808929443359375, 0.0005035400390625, 0.01909637451171875, 0.037689208984375, 0.05628204345703125, 0.0748748779296875, 0.09346771240234375, 0.112060546875, 0.13065338134765625, 0.1492462158203125, 0.16783905029296875, 0.186431884765625, 0.20502471923828125, 0.2236175537109375, 0.24221038818359375, 0.26080322265625, 0.27939605712890625, 0.2979888916015625, 0.31658172607421875, 0.335174560546875, 0.35376739501953125, 0.3723602294921875, 0.39095306396484375, 0.4095458984375, 0.42813873291015625, 0.4467315673828125, 0.46532440185546875, 0.483917236328125, 0.5025100708007812, 0.5211029052734375, 0.5396957397460938, 0.55828857421875, 0.5768814086914062, 0.5954742431640625, 0.6140670776367188, 0.632659912109375, 0.6512527465820312, 0.6698455810546875, 0.6884384155273438, 0.70703125]}, "gradients/encoder.encoder.layers.17.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 2.0, 3.0, 3.0, 12.0, 19.0, 15.0, 51.0, 88.0, 170.0, 202.0, 168.0, 119.0, 65.0, 40.0, 19.0, 15.0, 6.0, 6.0, 2.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 3.0, 1.0, 0.0, 1.0], "bins": [-10.624582290649414, -10.381489753723145, -10.138396263122559, -9.895303726196289, -9.652210235595703, -9.409117698669434, -9.166025161743164, -8.922931671142578, -8.679839134216309, -8.436746597290039, -8.193653106689453, -7.950560569763184, -7.707467555999756, -7.464374542236328, -7.2212815284729, -6.978188514709473, -6.735095500946045, -6.492002487182617, -6.2489094734191895, -6.005816459655762, -5.762723922729492, -5.5196309089660645, -5.276537895202637, -5.033444881439209, -4.790351867675781, -4.5472588539123535, -4.304165840148926, -4.061073303222656, -3.8179802894592285, -3.574887275695801, -3.331794261932373, -3.0887012481689453, -2.845608711242676, -2.602515697479248, -2.3594229221343994, -2.1163299083709717, -1.8732370138168335, -1.6301441192626953, -1.3870511054992676, -1.1439582109451294, -0.9008653163909912, -0.657772421836853, -0.41467946767807007, -0.1715865135192871, 0.07150638103485107, 0.31459927558898926, 0.557692289352417, 0.8007851839065552, 1.0438780784606934, 1.2869709730148315, 1.5300638675689697, 1.7731568813323975, 2.016249656677246, 2.259342670440674, 2.5024356842041016, 2.7455286979675293, 2.988621473312378, 3.2317144870758057, 3.4748072624206543, 3.717900276184082, 3.9609932899475098, 4.2040863037109375, 4.447178840637207, 4.690271854400635, 4.9333648681640625]}, "gradients/encoder.encoder.layers.17.layer_norm.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 1.0, 0.0, 2.0, 1.0, 1.0, 1.0, 6.0, 3.0, 6.0, 13.0, 10.0, 9.0, 16.0, 16.0, 19.0, 23.0, 39.0, 30.0, 40.0, 39.0, 44.0, 52.0, 66.0, 47.0, 60.0, 68.0, 47.0, 55.0, 51.0, 38.0, 32.0, 28.0, 16.0, 26.0, 18.0, 23.0, 18.0, 8.0, 12.0, 10.0, 1.0, 4.0, 3.0, 4.0, 3.0, 2.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 2.0], "bins": [-8.529154777526855, -8.273794174194336, -8.0184326171875, -7.763071537017822, -7.5077104568481445, -7.252349853515625, -6.996988773345947, -6.7416276931762695, -6.486266613006592, -6.230905532836914, -5.975544452667236, -5.720183372497559, -5.464822769165039, -5.209461212158203, -4.954100608825684, -4.698739528656006, -4.443378448486328, -4.18801736831665, -3.9326562881469727, -3.677295446395874, -3.4219343662261963, -3.1665732860565186, -2.91121244430542, -2.655851364135742, -2.4004902839660645, -2.1451292037963867, -1.8897682428359985, -1.6344072818756104, -1.3790462017059326, -1.1236851215362549, -0.8683241605758667, -0.6129631996154785, -0.357602596282959, -0.10224157571792603, 0.15311944484710693, 0.4084804654121399, 0.6638414859771729, 0.9192025661468506, 1.1745635271072388, 1.429924488067627, 1.6852855682373047, 1.9406466484069824, 2.19600772857666, 2.451368570327759, 2.7067296504974365, 2.9620907306671143, 3.217451572418213, 3.4728126525878906, 3.7281737327575684, 3.983534812927246, 4.238895893096924, 4.494256973266602, 4.749617576599121, 5.004979133605957, 5.260339736938477, 5.515700817108154, 5.771061897277832, 6.02642297744751, 6.2817840576171875, 6.537145137786865, 6.792506217956543, 7.0478668212890625, 7.30322790145874, 7.558588981628418, 7.813950061798096]}, "gradients/encoder.encoder.layers.16.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 3.0, 4.0, 3.0, 7.0, 14.0, 12.0, 27.0, 54.0, 76.0, 145.0, 236.0, 475.0, 1033.0, 3987.0, 30120.0, 4038069.0, 108748.0, 7807.0, 1909.0, 703.0, 311.0, 217.0, 118.0, 72.0, 54.0, 30.0, 16.0, 14.0, 5.0, 12.0, 4.0, 5.0, 4.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.57421875, -1.512939453125, -1.45166015625, -1.390380859375, -1.3291015625, -1.267822265625, -1.20654296875, -1.145263671875, -1.083984375, -1.022705078125, -0.96142578125, -0.900146484375, -0.8388671875, -0.777587890625, -0.71630859375, -0.655029296875, -0.59375, -0.532470703125, -0.47119140625, -0.409912109375, -0.3486328125, -0.287353515625, -0.22607421875, -0.164794921875, -0.103515625, -0.042236328125, 0.01904296875, 0.080322265625, 0.1416015625, 0.202880859375, 0.26416015625, 0.325439453125, 0.38671875, 0.447998046875, 0.50927734375, 0.570556640625, 0.6318359375, 0.693115234375, 0.75439453125, 0.815673828125, 0.876953125, 0.938232421875, 0.99951171875, 1.060791015625, 1.1220703125, 1.183349609375, 1.24462890625, 1.305908203125, 1.3671875, 1.428466796875, 1.48974609375, 1.551025390625, 1.6123046875, 1.673583984375, 1.73486328125, 1.796142578125, 1.857421875, 1.918701171875, 1.97998046875, 2.041259765625, 2.1025390625, 2.163818359375, 2.22509765625, 2.286376953125, 2.34765625]}, "gradients/encoder.encoder.layers.16.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 2.0, 0.0, 5.0, 1.0, 4.0, 7.0, 8.0, 28.0, 33.0, 53.0, 58.0, 80.0, 112.0, 111.0, 118.0, 109.0, 94.0, 59.0, 50.0, 28.0, 12.0, 11.0, 6.0, 5.0, 3.0, 1.0, 3.0, 4.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.455810546875, -0.4388771057128906, -0.42194366455078125, -0.4050102233886719, -0.3880767822265625, -0.3711433410644531, -0.35420989990234375, -0.3372764587402344, -0.320343017578125, -0.3034095764160156, -0.28647613525390625, -0.2695426940917969, -0.2526092529296875, -0.23567581176757812, -0.21874237060546875, -0.20180892944335938, -0.18487548828125, -0.16794204711914062, -0.15100860595703125, -0.13407516479492188, -0.1171417236328125, -0.10020828247070312, -0.08327484130859375, -0.06634140014648438, -0.049407958984375, -0.032474517822265625, -0.01554107666015625, 0.001392364501953125, 0.0183258056640625, 0.035259246826171875, 0.05219268798828125, 0.06912612915039062, 0.0860595703125, 0.10299301147460938, 0.11992645263671875, 0.13685989379882812, 0.1537933349609375, 0.17072677612304688, 0.18766021728515625, 0.20459365844726562, 0.221527099609375, 0.23846054077148438, 0.25539398193359375, 0.2723274230957031, 0.2892608642578125, 0.3061943054199219, 0.32312774658203125, 0.3400611877441406, 0.35699462890625, 0.3739280700683594, 0.39086151123046875, 0.4077949523925781, 0.4247283935546875, 0.4416618347167969, 0.45859527587890625, 0.4755287170410156, 0.492462158203125, 0.5093955993652344, 0.5263290405273438, 0.5432624816894531, 0.5601959228515625, 0.5771293640136719, 0.5940628051757812, 0.6109962463378906, 0.6279296875]}, "gradients/encoder.encoder.layers.16.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 3.0, 1.0, 1.0, 5.0, 2.0, 3.0, 9.0, 8.0, 21.0, 55.0, 97.0, 210.0, 463.0, 1074.0, 3357.0, 18676.0, 2133484.0, 2013184.0, 18354.0, 3418.0, 1073.0, 419.0, 195.0, 79.0, 40.0, 24.0, 9.0, 10.0, 4.0, 2.0, 3.0, 3.0, 2.0, 1.0, 1.0, 1.0, 0.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0], "bins": [-1.5009765625, -1.4554901123046875, -1.410003662109375, -1.3645172119140625, -1.31903076171875, -1.2735443115234375, -1.228057861328125, -1.1825714111328125, -1.1370849609375, -1.0915985107421875, -1.046112060546875, -1.0006256103515625, -0.95513916015625, -0.9096527099609375, -0.864166259765625, -0.8186798095703125, -0.773193359375, -0.7277069091796875, -0.682220458984375, -0.6367340087890625, -0.59124755859375, -0.5457611083984375, -0.500274658203125, -0.4547882080078125, -0.4093017578125, -0.3638153076171875, -0.318328857421875, -0.2728424072265625, -0.22735595703125, -0.1818695068359375, -0.136383056640625, -0.0908966064453125, -0.04541015625, 7.62939453125e-05, 0.045562744140625, 0.0910491943359375, 0.13653564453125, 0.1820220947265625, 0.227508544921875, 0.2729949951171875, 0.3184814453125, 0.3639678955078125, 0.409454345703125, 0.4549407958984375, 0.50042724609375, 0.5459136962890625, 0.591400146484375, 0.6368865966796875, 0.682373046875, 0.7278594970703125, 0.773345947265625, 0.8188323974609375, 0.86431884765625, 0.9098052978515625, 0.955291748046875, 1.0007781982421875, 1.0462646484375, 1.0917510986328125, 1.137237548828125, 1.1827239990234375, 1.22821044921875, 1.2736968994140625, 1.319183349609375, 1.3646697998046875, 1.41015625]}, "gradients/encoder.encoder.layers.16.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 5.0, 4.0, 4.0, 6.0, 7.0, 13.0, 28.0, 34.0, 63.0, 135.0, 399.0, 2146.0, 821.0, 198.0, 95.0, 48.0, 21.0, 15.0, 12.0, 5.0, 6.0, 4.0, 6.0, 2.0, 3.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-0.5615234375, -0.5462379455566406, -0.5309524536132812, -0.5156669616699219, -0.5003814697265625, -0.4850959777832031, -0.46981048583984375, -0.4545249938964844, -0.439239501953125, -0.4239540100097656, -0.40866851806640625, -0.3933830261230469, -0.3780975341796875, -0.3628120422363281, -0.34752655029296875, -0.3322410583496094, -0.31695556640625, -0.3016700744628906, -0.28638458251953125, -0.2710990905761719, -0.2558135986328125, -0.24052810668945312, -0.22524261474609375, -0.20995712280273438, -0.194671630859375, -0.17938613891601562, -0.16410064697265625, -0.14881515502929688, -0.1335296630859375, -0.11824417114257812, -0.10295867919921875, -0.08767318725585938, -0.0723876953125, -0.057102203369140625, -0.04181671142578125, -0.026531219482421875, -0.0112457275390625, 0.004039764404296875, 0.01932525634765625, 0.034610748291015625, 0.049896240234375, 0.06518173217773438, 0.08046722412109375, 0.09575271606445312, 0.1110382080078125, 0.12632369995117188, 0.14160919189453125, 0.15689468383789062, 0.17218017578125, 0.18746566772460938, 0.20275115966796875, 0.21803665161132812, 0.2333221435546875, 0.24860763549804688, 0.26389312744140625, 0.2791786193847656, 0.294464111328125, 0.3097496032714844, 0.32503509521484375, 0.3403205871582031, 0.3556060791015625, 0.3708915710449219, 0.38617706298828125, 0.4014625549316406, 0.416748046875]}, "gradients/encoder.encoder.layers.16.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 3.0, 7.0, 12.0, 32.0, 93.0, 314.0, 349.0, 138.0, 41.0, 12.0, 10.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.765956878662109, -4.652946949005127, -4.5399370193481445, -4.426926612854004, -4.3139166831970215, -4.200906753540039, -4.087896347045898, -3.974886417388916, -3.8618764877319336, -3.748866558074951, -3.6358563899993896, -3.522846221923828, -3.4098362922668457, -3.2968263626098633, -3.1838161945343018, -3.0708060264587402, -2.957796096801758, -2.8447861671447754, -2.731775999069214, -2.6187658309936523, -2.50575590133667, -2.3927459716796875, -2.279735803604126, -2.1667256355285645, -2.053715705871582, -1.94070565700531, -1.827695608139038, -1.7146855592727661, -1.6016755104064941, -1.4886654615402222, -1.3756554126739502, -1.2626453638076782, -1.1496353149414062, -1.0366252660751343, -0.9236152172088623, -0.8106051683425903, -0.6975951194763184, -0.5845850706100464, -0.4715750217437744, -0.35856497287750244, -0.24555492401123047, -0.1325448751449585, -0.019534826278686523, 0.09347522258758545, 0.20648527145385742, 0.3194953203201294, 0.43250536918640137, 0.5455154180526733, 0.6585254669189453, 0.7715355157852173, 0.8845455646514893, 0.9975556135177612, 1.1105656623840332, 1.2235757112503052, 1.3365857601165771, 1.4495958089828491, 1.562605857849121, 1.675615906715393, 1.788625955581665, 1.901636004447937, 2.014646053314209, 2.1276559829711914, 2.240666151046753, 2.3536763191223145, 2.466686248779297]}, "gradients/encoder.encoder.layers.16.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 3.0, 2.0, 1.0, 0.0, 2.0, 2.0, 12.0, 14.0, 10.0, 26.0, 25.0, 29.0, 47.0, 66.0, 60.0, 69.0, 75.0, 67.0, 76.0, 71.0, 53.0, 47.0, 58.0, 48.0, 45.0, 21.0, 20.0, 15.0, 14.0, 16.0, 11.0, 5.0, 5.0, 3.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.8561229705810547, -1.8031017780303955, -1.7500805854797363, -1.6970593929290771, -1.644038200378418, -1.5910170078277588, -1.5379959344863892, -1.48497474193573, -1.4319535493850708, -1.3789323568344116, -1.3259111642837524, -1.2728899717330933, -1.2198688983917236, -1.1668477058410645, -1.1138265132904053, -1.060805320739746, -1.007784128189087, -0.9547629356384277, -0.9017417430877686, -0.8487206101417542, -0.795699417591095, -0.7426782250404358, -0.6896570920944214, -0.6366358995437622, -0.583614706993103, -0.5305935144424438, -0.47757235169410706, -0.42455118894577026, -0.3715299963951111, -0.3185088038444519, -0.2654876410961151, -0.21246647834777832, -0.1594454050064087, -0.1064242273569107, -0.05340304970741272, -0.0003818720579147339, 0.05263930559158325, 0.10566049814224243, 0.15868166089057922, 0.21170282363891602, 0.2647240161895752, 0.3177452087402344, 0.37076637148857117, 0.42378753423690796, 0.47680872678756714, 0.5298299193382263, 0.5828510522842407, 0.6358722448348999, 0.6888934373855591, 0.7419146299362183, 0.7949358224868774, 0.8479569554328918, 0.900978147983551, 0.9539993405342102, 1.0070204734802246, 1.0600416660308838, 1.113062858581543, 1.1660840511322021, 1.2191052436828613, 1.2721264362335205, 1.3251476287841797, 1.3781688213348389, 1.4311898946762085, 1.4842110872268677, 1.5372322797775269]}, "gradients/encoder.encoder.layers.16.attention.out_proj.weight": {"_type": "histogram", "values": [3.0, 0.0, 0.0, 1.0, 1.0, 2.0, 2.0, 2.0, 3.0, 7.0, 7.0, 7.0, 16.0, 26.0, 28.0, 40.0, 64.0, 81.0, 147.0, 187.0, 293.0, 549.0, 956.0, 1683.0, 3755.0, 12152.0, 65324.0, 509301.0, 389009.0, 47855.0, 9997.0, 3341.0, 1556.0, 829.0, 451.0, 265.0, 223.0, 141.0, 76.0, 70.0, 38.0, 26.0, 22.0, 12.0, 11.0, 6.0, 4.0, 0.0, 2.0, 0.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.259765625, -1.2146453857421875, -1.169525146484375, -1.1244049072265625, -1.07928466796875, -1.0341644287109375, -0.989044189453125, -0.9439239501953125, -0.8988037109375, -0.8536834716796875, -0.808563232421875, -0.7634429931640625, -0.71832275390625, -0.6732025146484375, -0.628082275390625, -0.5829620361328125, -0.537841796875, -0.4927215576171875, -0.447601318359375, -0.4024810791015625, -0.35736083984375, -0.3122406005859375, -0.267120361328125, -0.2220001220703125, -0.1768798828125, -0.1317596435546875, -0.086639404296875, -0.0415191650390625, 0.00360107421875, 0.0487213134765625, 0.093841552734375, 0.1389617919921875, 0.18408203125, 0.2292022705078125, 0.274322509765625, 0.3194427490234375, 0.36456298828125, 0.4096832275390625, 0.454803466796875, 0.4999237060546875, 0.5450439453125, 0.5901641845703125, 0.635284423828125, 0.6804046630859375, 0.72552490234375, 0.7706451416015625, 0.815765380859375, 0.8608856201171875, 0.906005859375, 0.9511260986328125, 0.996246337890625, 1.0413665771484375, 1.08648681640625, 1.1316070556640625, 1.176727294921875, 1.2218475341796875, 1.2669677734375, 1.3120880126953125, 1.357208251953125, 1.4023284912109375, 1.44744873046875, 1.4925689697265625, 1.537689208984375, 1.5828094482421875, 1.6279296875]}, "gradients/encoder.encoder.layers.16.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 2.0, 1.0, 1.0, 1.0, 0.0, 3.0, 4.0, 3.0, 6.0, 12.0, 29.0, 43.0, 46.0, 66.0, 88.0, 96.0, 122.0, 119.0, 115.0, 85.0, 59.0, 48.0, 22.0, 7.0, 11.0, 6.0, 4.0, 3.0, 2.0, 3.0, 2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.473388671875, -0.4563255310058594, -0.43926239013671875, -0.4221992492675781, -0.4051361083984375, -0.3880729675292969, -0.37100982666015625, -0.3539466857910156, -0.336883544921875, -0.3198204040527344, -0.30275726318359375, -0.2856941223144531, -0.2686309814453125, -0.2515678405761719, -0.23450469970703125, -0.21744155883789062, -0.20037841796875, -0.18331527709960938, -0.16625213623046875, -0.14918899536132812, -0.1321258544921875, -0.11506271362304688, -0.09799957275390625, -0.08093643188476562, -0.063873291015625, -0.046810150146484375, -0.02974700927734375, -0.012683868408203125, 0.0043792724609375, 0.021442413330078125, 0.03850555419921875, 0.055568695068359375, 0.0726318359375, 0.08969497680664062, 0.10675811767578125, 0.12382125854492188, 0.1408843994140625, 0.15794754028320312, 0.17501068115234375, 0.19207382202148438, 0.209136962890625, 0.22620010375976562, 0.24326324462890625, 0.2603263854980469, 0.2773895263671875, 0.2944526672363281, 0.31151580810546875, 0.3285789489746094, 0.34564208984375, 0.3627052307128906, 0.37976837158203125, 0.3968315124511719, 0.4138946533203125, 0.4309577941894531, 0.44802093505859375, 0.4650840759277344, 0.482147216796875, 0.4992103576660156, 0.5162734985351562, 0.5333366394042969, 0.5503997802734375, 0.5674629211425781, 0.5845260620117188, 0.6015892028808594, 0.61865234375]}, "gradients/encoder.encoder.layers.16.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 7.0, 4.0, 7.0, 7.0, 10.0, 16.0, 18.0, 45.0, 48.0, 57.0, 87.0, 151.0, 244.0, 376.0, 730.0, 1439.0, 3758.0, 14317.0, 82607.0, 487600.0, 382066.0, 58165.0, 10800.0, 3188.0, 1231.0, 574.0, 343.0, 218.0, 136.0, 100.0, 71.0, 43.0, 38.0, 17.0, 12.0, 11.0, 3.0, 3.0, 8.0, 3.0, 3.0, 4.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-1.10546875, -1.0697021484375, -1.033935546875, -0.9981689453125, -0.96240234375, -0.9266357421875, -0.890869140625, -0.8551025390625, -0.8193359375, -0.7835693359375, -0.747802734375, -0.7120361328125, -0.67626953125, -0.6405029296875, -0.604736328125, -0.5689697265625, -0.533203125, -0.4974365234375, -0.461669921875, -0.4259033203125, -0.39013671875, -0.3543701171875, -0.318603515625, -0.2828369140625, -0.2470703125, -0.2113037109375, -0.175537109375, -0.1397705078125, -0.10400390625, -0.0682373046875, -0.032470703125, 0.0032958984375, 0.0390625, 0.0748291015625, 0.110595703125, 0.1463623046875, 0.18212890625, 0.2178955078125, 0.253662109375, 0.2894287109375, 0.3251953125, 0.3609619140625, 0.396728515625, 0.4324951171875, 0.46826171875, 0.5040283203125, 0.539794921875, 0.5755615234375, 0.611328125, 0.6470947265625, 0.682861328125, 0.7186279296875, 0.75439453125, 0.7901611328125, 0.825927734375, 0.8616943359375, 0.8974609375, 0.9332275390625, 0.968994140625, 1.0047607421875, 1.04052734375, 1.0762939453125, 1.112060546875, 1.1478271484375, 1.18359375]}, "gradients/encoder.encoder.layers.16.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0, 0.0, 1.0, 2.0, 5.0, 9.0, 5.0, 5.0, 9.0, 9.0, 16.0, 8.0, 22.0, 25.0, 36.0, 32.0, 55.0, 43.0, 43.0, 52.0, 61.0, 42.0, 58.0, 64.0, 56.0, 42.0, 47.0, 35.0, 45.0, 54.0, 28.0, 20.0, 22.0, 13.0, 8.0, 14.0, 5.0, 6.0, 5.0, 6.0, 3.0, 2.0, 2.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.51953125, -1.47332763671875, -1.4271240234375, -1.38092041015625, -1.334716796875, -1.28851318359375, -1.2423095703125, -1.19610595703125, -1.14990234375, -1.10369873046875, -1.0574951171875, -1.01129150390625, -0.965087890625, -0.91888427734375, -0.8726806640625, -0.82647705078125, -0.7802734375, -0.73406982421875, -0.6878662109375, -0.64166259765625, -0.595458984375, -0.54925537109375, -0.5030517578125, -0.45684814453125, -0.41064453125, -0.36444091796875, -0.3182373046875, -0.27203369140625, -0.225830078125, -0.17962646484375, -0.1334228515625, -0.08721923828125, -0.041015625, 0.00518798828125, 0.0513916015625, 0.09759521484375, 0.143798828125, 0.19000244140625, 0.2362060546875, 0.28240966796875, 0.32861328125, 0.37481689453125, 0.4210205078125, 0.46722412109375, 0.513427734375, 0.55963134765625, 0.6058349609375, 0.65203857421875, 0.6982421875, 0.74444580078125, 0.7906494140625, 0.83685302734375, 0.883056640625, 0.92926025390625, 0.9754638671875, 1.02166748046875, 1.06787109375, 1.11407470703125, 1.1602783203125, 1.20648193359375, 1.252685546875, 1.29888916015625, 1.3450927734375, 1.39129638671875, 1.4375]}, "gradients/encoder.encoder.layers.16.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 5.0, 3.0, 3.0, 11.0, 11.0, 12.0, 22.0, 40.0, 60.0, 141.0, 276.0, 766.0, 2270.0, 12692.0, 273368.0, 722191.0, 31028.0, 3865.0, 1038.0, 389.0, 179.0, 89.0, 40.0, 23.0, 14.0, 13.0, 7.0, 4.0, 1.0, 0.0, 3.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0], "bins": [-0.62890625, -0.608734130859375, -0.58856201171875, -0.568389892578125, -0.5482177734375, -0.528045654296875, -0.50787353515625, -0.487701416015625, -0.467529296875, -0.447357177734375, -0.42718505859375, -0.407012939453125, -0.3868408203125, -0.366668701171875, -0.34649658203125, -0.326324462890625, -0.30615234375, -0.285980224609375, -0.26580810546875, -0.245635986328125, -0.2254638671875, -0.205291748046875, -0.18511962890625, -0.164947509765625, -0.144775390625, -0.124603271484375, -0.10443115234375, -0.084259033203125, -0.0640869140625, -0.043914794921875, -0.02374267578125, -0.003570556640625, 0.0166015625, 0.036773681640625, 0.05694580078125, 0.077117919921875, 0.0972900390625, 0.117462158203125, 0.13763427734375, 0.157806396484375, 0.177978515625, 0.198150634765625, 0.21832275390625, 0.238494873046875, 0.2586669921875, 0.278839111328125, 0.29901123046875, 0.319183349609375, 0.33935546875, 0.359527587890625, 0.37969970703125, 0.399871826171875, 0.4200439453125, 0.440216064453125, 0.46038818359375, 0.480560302734375, 0.500732421875, 0.520904541015625, 0.54107666015625, 0.561248779296875, 0.5814208984375, 0.601593017578125, 0.62176513671875, 0.641937255859375, 0.662109375]}, "gradients/encoder.encoder.layers.16.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 0.0, 2.0, 2.0, 1.0, 3.0, 6.0, 1.0, 3.0, 8.0, 7.0, 7.0, 8.0, 13.0, 18.0, 24.0, 34.0, 34.0, 52.0, 56.0, 65.0, 86.0, 98.0, 107.0, 85.0, 56.0, 55.0, 27.0, 22.0, 20.0, 15.0, 14.0, 16.0, 8.0, 6.0, 7.0, 6.0, 4.0, 5.0, 4.0, 8.0, 3.0, 4.0, 1.0, 4.0, 4.0, 0.0, 1.0, 2.0, 2.0], "bins": [-9.21487808227539e-05, -8.963234722614288e-05, -8.711591362953186e-05, -8.459948003292084e-05, -8.208304643630981e-05, -7.956661283969879e-05, -7.705017924308777e-05, -7.453374564647675e-05, -7.201731204986572e-05, -6.95008784532547e-05, -6.698444485664368e-05, -6.446801126003265e-05, -6.195157766342163e-05, -5.943514406681061e-05, -5.6918710470199585e-05, -5.440227687358856e-05, -5.188584327697754e-05, -4.9369409680366516e-05, -4.685297608375549e-05, -4.433654248714447e-05, -4.182010889053345e-05, -3.9303675293922424e-05, -3.67872416973114e-05, -3.427080810070038e-05, -3.1754374504089355e-05, -2.9237940907478333e-05, -2.672150731086731e-05, -2.4205073714256287e-05, -2.1688640117645264e-05, -1.917220652103424e-05, -1.6655772924423218e-05, -1.4139339327812195e-05, -1.1622905731201172e-05, -9.106472134590149e-06, -6.590038537979126e-06, -4.073604941368103e-06, -1.55717134475708e-06, 9.592622518539429e-07, 3.475695848464966e-06, 5.992129445075989e-06, 8.508563041687012e-06, 1.1024996638298035e-05, 1.3541430234909058e-05, 1.605786383152008e-05, 1.8574297428131104e-05, 2.1090731024742126e-05, 2.360716462135315e-05, 2.6123598217964172e-05, 2.8640031814575195e-05, 3.115646541118622e-05, 3.367289900779724e-05, 3.6189332604408264e-05, 3.870576620101929e-05, 4.122219979763031e-05, 4.373863339424133e-05, 4.6255066990852356e-05, 4.877150058746338e-05, 5.12879341840744e-05, 5.3804367780685425e-05, 5.632080137729645e-05, 5.883723497390747e-05, 6.13536685705185e-05, 6.387010216712952e-05, 6.638653576374054e-05, 6.890296936035156e-05]}, "gradients/encoder.encoder.layers.16.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 4.0, 0.0, 0.0, 0.0, 1.0, 3.0, 4.0, 11.0, 5.0, 8.0, 15.0, 24.0, 31.0, 50.0, 82.0, 123.0, 222.0, 430.0, 927.0, 2479.0, 9125.0, 53985.0, 478992.0, 440188.0, 49239.0, 8406.0, 2318.0, 901.0, 391.0, 217.0, 125.0, 81.0, 55.0, 33.0, 29.0, 18.0, 8.0, 5.0, 9.0, 4.0, 7.0, 3.0, 0.0, 4.0, 0.0, 3.0, 2.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.374267578125, -0.3617820739746094, -0.34929656982421875, -0.3368110656738281, -0.3243255615234375, -0.3118400573730469, -0.29935455322265625, -0.2868690490722656, -0.274383544921875, -0.2618980407714844, -0.24941253662109375, -0.23692703247070312, -0.2244415283203125, -0.21195602416992188, -0.19947052001953125, -0.18698501586914062, -0.17449951171875, -0.16201400756835938, -0.14952850341796875, -0.13704299926757812, -0.1245574951171875, -0.11207199096679688, -0.09958648681640625, -0.08710098266601562, -0.074615478515625, -0.062129974365234375, -0.04964447021484375, -0.037158966064453125, -0.0246734619140625, -0.012187957763671875, 0.00029754638671875, 0.012783050537109375, 0.0252685546875, 0.037754058837890625, 0.05023956298828125, 0.06272506713867188, 0.0752105712890625, 0.08769607543945312, 0.10018157958984375, 0.11266708374023438, 0.125152587890625, 0.13763809204101562, 0.15012359619140625, 0.16260910034179688, 0.1750946044921875, 0.18758010864257812, 0.20006561279296875, 0.21255111694335938, 0.22503662109375, 0.23752212524414062, 0.25000762939453125, 0.2624931335449219, 0.2749786376953125, 0.2874641418457031, 0.29994964599609375, 0.3124351501464844, 0.324920654296875, 0.3374061584472656, 0.34989166259765625, 0.3623771667480469, 0.3748626708984375, 0.3873481750488281, 0.39983367919921875, 0.4123191833496094, 0.4248046875]}, "gradients/encoder.encoder.layers.16.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 2.0, 2.0, 2.0, 7.0, 4.0, 3.0, 5.0, 13.0, 8.0, 17.0, 19.0, 22.0, 39.0, 40.0, 58.0, 67.0, 107.0, 98.0, 108.0, 89.0, 75.0, 56.0, 37.0, 35.0, 23.0, 28.0, 8.0, 12.0, 3.0, 6.0, 3.0, 5.0, 6.0, 2.0, 4.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.279052734375, -0.266937255859375, -0.25482177734375, -0.242706298828125, -0.2305908203125, -0.218475341796875, -0.20635986328125, -0.194244384765625, -0.18212890625, -0.170013427734375, -0.15789794921875, -0.145782470703125, -0.1336669921875, -0.121551513671875, -0.10943603515625, -0.097320556640625, -0.085205078125, -0.073089599609375, -0.06097412109375, -0.048858642578125, -0.0367431640625, -0.024627685546875, -0.01251220703125, -0.000396728515625, 0.01171875, 0.023834228515625, 0.03594970703125, 0.048065185546875, 0.0601806640625, 0.072296142578125, 0.08441162109375, 0.096527099609375, 0.108642578125, 0.120758056640625, 0.13287353515625, 0.144989013671875, 0.1571044921875, 0.169219970703125, 0.18133544921875, 0.193450927734375, 0.20556640625, 0.217681884765625, 0.22979736328125, 0.241912841796875, 0.2540283203125, 0.266143798828125, 0.27825927734375, 0.290374755859375, 0.302490234375, 0.314605712890625, 0.32672119140625, 0.338836669921875, 0.3509521484375, 0.363067626953125, 0.37518310546875, 0.387298583984375, 0.3994140625, 0.411529541015625, 0.42364501953125, 0.435760498046875, 0.4478759765625, 0.459991455078125, 0.47210693359375, 0.484222412109375, 0.496337890625]}, "gradients/encoder.encoder.layers.16.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 2.0, 4.0, 1.0, 4.0, 8.0, 10.0, 18.0, 25.0, 36.0, 57.0, 97.0, 120.0, 163.0, 133.0, 107.0, 77.0, 62.0, 32.0, 17.0, 10.0, 8.0, 9.0, 2.0, 4.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0], "bins": [-7.429243087768555, -7.225990295410156, -7.022737979888916, -6.819485664367676, -6.616232872009277, -6.412980079650879, -6.209727764129639, -6.006475448608398, -5.80322265625, -5.599969863891602, -5.396717548370361, -5.193465232849121, -4.990212440490723, -4.786959648132324, -4.583707332611084, -4.380455017089844, -4.177202224731445, -3.973949670791626, -3.7706971168518066, -3.5674445629119873, -3.364192008972168, -3.1609394550323486, -2.9576869010925293, -2.75443434715271, -2.5511817932128906, -2.3479292392730713, -2.144676685333252, -1.9414241313934326, -1.7381715774536133, -1.534919023513794, -1.3316664695739746, -1.1284139156341553, -0.9251613616943359, -0.7219088077545166, -0.5186562538146973, -0.31540369987487793, -0.1121511459350586, 0.09110140800476074, 0.2943539619445801, 0.4976065158843994, 0.7008590698242188, 0.9041116237640381, 1.1073641777038574, 1.3106167316436768, 1.513869285583496, 1.7171218395233154, 1.9203743934631348, 2.123626947402954, 2.3268795013427734, 2.5301320552825928, 2.733384609222412, 2.9366371631622314, 3.139889717102051, 3.34314227104187, 3.5463948249816895, 3.749647378921509, 3.952899932861328, 4.156152725219727, 4.359405040740967, 4.562657356262207, 4.7659101486206055, 4.969162940979004, 5.172415256500244, 5.375667572021484, 5.578920364379883]}, "gradients/encoder.encoder.layers.16.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 2.0, 1.0, 8.0, 4.0, 5.0, 9.0, 9.0, 15.0, 14.0, 6.0, 15.0, 23.0, 21.0, 37.0, 35.0, 29.0, 40.0, 42.0, 47.0, 45.0, 41.0, 47.0, 48.0, 55.0, 47.0, 49.0, 32.0, 45.0, 30.0, 31.0, 30.0, 24.0, 21.0, 23.0, 21.0, 12.0, 10.0, 13.0, 5.0, 3.0, 4.0, 2.0, 4.0, 2.0, 2.0, 3.0, 2.0, 1.0, 2.0, 0.0, 0.0, 1.0], "bins": [-8.620080947875977, -8.367776870727539, -8.115472793579102, -7.863168716430664, -7.610864162445068, -7.358560085296631, -7.106256008148193, -6.853951930999756, -6.60164737701416, -6.349343299865723, -6.097039222717285, -5.844735145568848, -5.592430591583252, -5.3401265144348145, -5.087822437286377, -4.8355183601379395, -4.583214282989502, -4.3309102058410645, -4.078606128692627, -3.8263018131256104, -3.5739974975585938, -3.3216934204101562, -3.0693893432617188, -2.8170852661132812, -2.5647809505462646, -2.312476873397827, -2.0601725578308105, -1.807868480682373, -1.555564284324646, -1.303260087966919, -1.0509560108184814, -0.7986518144607544, -0.5463480949401855, -0.2940439283847809, -0.04173976182937622, 0.21056437492370605, 0.4628685712814331, 0.7151727676391602, 0.9674768447875977, 1.2197810411453247, 1.4720852375030518, 1.7243894338607788, 1.9766936302185059, 2.2289977073669434, 2.481301784515381, 2.7336061000823975, 2.985910177230835, 3.2382144927978516, 3.490518569946289, 3.7428226470947266, 3.995126962661743, 4.247430801391602, 4.499735355377197, 4.752039432525635, 5.004343509674072, 5.25664758682251, 5.5089521408081055, 5.761256217956543, 6.0135602951049805, 6.265864372253418, 6.518168926239014, 6.770473003387451, 7.022777080535889, 7.275081157684326, 7.527385234832764]}, "gradients/encoder.encoder.layers.15.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 7.0, 0.0, 0.0, 2.0, 5.0, 3.0, 7.0, 8.0, 16.0, 17.0, 15.0, 35.0, 31.0, 65.0, 79.0, 108.0, 182.0, 351.0, 687.0, 1464.0, 3535.0, 11537.0, 93108.0, 3990981.0, 74893.0, 10438.0, 3533.0, 1463.0, 683.0, 362.0, 212.0, 133.0, 97.0, 63.0, 39.0, 34.0, 23.0, 16.0, 18.0, 9.0, 7.0, 5.0, 7.0, 5.0, 3.0, 2.0, 0.0, 3.0, 2.0, 0.0, 0.0, 3.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-1.5966796875, -1.5424957275390625, -1.488311767578125, -1.4341278076171875, -1.37994384765625, -1.3257598876953125, -1.271575927734375, -1.2173919677734375, -1.1632080078125, -1.1090240478515625, -1.054840087890625, -1.0006561279296875, -0.94647216796875, -0.8922882080078125, -0.838104248046875, -0.7839202880859375, -0.729736328125, -0.6755523681640625, -0.621368408203125, -0.5671844482421875, -0.51300048828125, -0.4588165283203125, -0.404632568359375, -0.3504486083984375, -0.2962646484375, -0.2420806884765625, -0.187896728515625, -0.1337127685546875, -0.07952880859375, -0.0253448486328125, 0.028839111328125, 0.0830230712890625, 0.13720703125, 0.1913909912109375, 0.245574951171875, 0.2997589111328125, 0.35394287109375, 0.4081268310546875, 0.462310791015625, 0.5164947509765625, 0.5706787109375, 0.6248626708984375, 0.679046630859375, 0.7332305908203125, 0.78741455078125, 0.8415985107421875, 0.895782470703125, 0.9499664306640625, 1.004150390625, 1.0583343505859375, 1.112518310546875, 1.1667022705078125, 1.22088623046875, 1.2750701904296875, 1.329254150390625, 1.3834381103515625, 1.4376220703125, 1.4918060302734375, 1.545989990234375, 1.6001739501953125, 1.65435791015625, 1.7085418701171875, 1.762725830078125, 1.8169097900390625, 1.87109375]}, "gradients/encoder.encoder.layers.15.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 10.0, 15.0, 29.0, 44.0, 48.0, 68.0, 88.0, 106.0, 104.0, 108.0, 103.0, 89.0, 54.0, 39.0, 32.0, 18.0, 11.0, 8.0, 5.0, 3.0, 2.0, 2.0, 1.0, 0.0, 4.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.60009765625, -0.5811691284179688, -0.5622406005859375, -0.5433120727539062, -0.524383544921875, -0.5054550170898438, -0.4865264892578125, -0.46759796142578125, -0.44866943359375, -0.42974090576171875, -0.4108123779296875, -0.39188385009765625, -0.372955322265625, -0.35402679443359375, -0.3350982666015625, -0.31616973876953125, -0.2972412109375, -0.27831268310546875, -0.2593841552734375, -0.24045562744140625, -0.221527099609375, -0.20259857177734375, -0.1836700439453125, -0.16474151611328125, -0.14581298828125, -0.12688446044921875, -0.1079559326171875, -0.08902740478515625, -0.070098876953125, -0.05117034912109375, -0.0322418212890625, -0.01331329345703125, 0.005615234375, 0.02454376220703125, 0.0434722900390625, 0.06240081787109375, 0.081329345703125, 0.10025787353515625, 0.1191864013671875, 0.13811492919921875, 0.15704345703125, 0.17597198486328125, 0.1949005126953125, 0.21382904052734375, 0.232757568359375, 0.25168609619140625, 0.2706146240234375, 0.28954315185546875, 0.3084716796875, 0.32740020751953125, 0.3463287353515625, 0.36525726318359375, 0.384185791015625, 0.40311431884765625, 0.4220428466796875, 0.44097137451171875, 0.45989990234375, 0.47882843017578125, 0.4977569580078125, 0.5166854858398438, 0.535614013671875, 0.5545425415039062, 0.5734710693359375, 0.5923995971679688, 0.611328125]}, "gradients/encoder.encoder.layers.15.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 3.0, 3.0, 1.0, 2.0, 8.0, 14.0, 39.0, 79.0, 294.0, 994.0, 4326.0, 53940.0, 4089738.0, 39631.0, 3854.0, 940.0, 280.0, 88.0, 33.0, 17.0, 2.0, 0.0, 1.0, 1.0, 1.0, 2.0, 3.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.572265625, -1.50225830078125, -1.4322509765625, -1.36224365234375, -1.292236328125, -1.22222900390625, -1.1522216796875, -1.08221435546875, -1.01220703125, -0.94219970703125, -0.8721923828125, -0.80218505859375, -0.732177734375, -0.66217041015625, -0.5921630859375, -0.52215576171875, -0.4521484375, -0.38214111328125, -0.3121337890625, -0.24212646484375, -0.172119140625, -0.10211181640625, -0.0321044921875, 0.03790283203125, 0.10791015625, 0.17791748046875, 0.2479248046875, 0.31793212890625, 0.387939453125, 0.45794677734375, 0.5279541015625, 0.59796142578125, 0.66796875, 0.73797607421875, 0.8079833984375, 0.87799072265625, 0.947998046875, 1.01800537109375, 1.0880126953125, 1.15802001953125, 1.22802734375, 1.29803466796875, 1.3680419921875, 1.43804931640625, 1.508056640625, 1.57806396484375, 1.6480712890625, 1.71807861328125, 1.7880859375, 1.85809326171875, 1.9281005859375, 1.99810791015625, 2.068115234375, 2.13812255859375, 2.2081298828125, 2.27813720703125, 2.34814453125, 2.41815185546875, 2.4881591796875, 2.55816650390625, 2.628173828125, 2.69818115234375, 2.7681884765625, 2.83819580078125, 2.908203125]}, "gradients/encoder.encoder.layers.15.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 2.0, 3.0, 1.0, 2.0, 5.0, 5.0, 3.0, 7.0, 3.0, 11.0, 19.0, 28.0, 52.0, 104.0, 224.0, 1195.0, 1769.0, 327.0, 121.0, 71.0, 47.0, 26.0, 15.0, 11.0, 9.0, 4.0, 4.0, 2.0, 4.0, 2.0, 1.0, 1.0, 0.0, 3.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.564453125, -0.54571533203125, -0.5269775390625, -0.50823974609375, -0.489501953125, -0.47076416015625, -0.4520263671875, -0.43328857421875, -0.41455078125, -0.39581298828125, -0.3770751953125, -0.35833740234375, -0.339599609375, -0.32086181640625, -0.3021240234375, -0.28338623046875, -0.2646484375, -0.24591064453125, -0.2271728515625, -0.20843505859375, -0.189697265625, -0.17095947265625, -0.1522216796875, -0.13348388671875, -0.11474609375, -0.09600830078125, -0.0772705078125, -0.05853271484375, -0.039794921875, -0.02105712890625, -0.0023193359375, 0.01641845703125, 0.03515625, 0.05389404296875, 0.0726318359375, 0.09136962890625, 0.110107421875, 0.12884521484375, 0.1475830078125, 0.16632080078125, 0.18505859375, 0.20379638671875, 0.2225341796875, 0.24127197265625, 0.260009765625, 0.27874755859375, 0.2974853515625, 0.31622314453125, 0.3349609375, 0.35369873046875, 0.3724365234375, 0.39117431640625, 0.409912109375, 0.42864990234375, 0.4473876953125, 0.46612548828125, 0.48486328125, 0.50360107421875, 0.5223388671875, 0.54107666015625, 0.559814453125, 0.57855224609375, 0.5972900390625, 0.61602783203125, 0.634765625]}, "gradients/encoder.encoder.layers.15.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 3.0, 16.0, 134.0, 597.0, 234.0, 21.0, 9.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.443396806716919, -2.1563329696655273, -1.8692693710327148, -1.5822056531906128, -1.2951419353485107, -1.0080782175064087, -0.7210144996643066, -0.43395066261291504, -0.14688706398010254, 0.1401766538619995, 0.42724037170410156, 0.7143040895462036, 1.0013678073883057, 1.2884315252304077, 1.5754952430725098, 1.8625590801239014, 2.149622678756714, 2.4366865158081055, 2.723750114440918, 3.0108137130737305, 3.297877550125122, 3.5849413871765137, 3.872004985809326, 4.159069061279297, 4.446132659912109, 4.733196258544922, 5.020259857177734, 5.307323932647705, 5.594387531280518, 5.88145112991333, 6.168515205383301, 6.455578804016113, 6.742642402648926, 7.029706001281738, 7.316769599914551, 7.6038336753845215, 7.890897274017334, 8.177961349487305, 8.465024948120117, 8.75208854675293, 9.039152145385742, 9.326215744018555, 9.613279342651367, 9.90034294128418, 10.187406539916992, 10.474471092224121, 10.761534690856934, 11.048598289489746, 11.335661888122559, 11.622725486755371, 11.909789085388184, 12.196852684020996, 12.483917236328125, 12.770980834960938, 13.05804443359375, 13.345108032226562, 13.632171630859375, 13.919235229492188, 14.206298828125, 14.493362426757812, 14.780426025390625, 15.067490577697754, 15.354554176330566, 15.641617774963379, 15.928681373596191]}, "gradients/encoder.encoder.layers.15.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 2.0, 4.0, 3.0, 0.0, 4.0, 4.0, 5.0, 9.0, 4.0, 5.0, 9.0, 18.0, 16.0, 26.0, 26.0, 31.0, 24.0, 38.0, 37.0, 35.0, 42.0, 55.0, 58.0, 50.0, 36.0, 44.0, 57.0, 45.0, 49.0, 33.0, 33.0, 39.0, 31.0, 24.0, 18.0, 18.0, 13.0, 16.0, 16.0, 9.0, 7.0, 3.0, 7.0, 5.0, 4.0, 1.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0], "bins": [-1.7116096019744873, -1.6607743501663208, -1.6099392175674438, -1.5591039657592773, -1.5082688331604004, -1.4574335813522339, -1.4065983295440674, -1.3557631969451904, -1.304927945137024, -1.2540926933288574, -1.2032575607299805, -1.152422308921814, -1.1015870571136475, -1.0507519245147705, -0.999916672706604, -0.9490814805030823, -0.8982462882995605, -0.8474110960960388, -0.7965759038925171, -0.7457406520843506, -0.6949054598808289, -0.6440702676773071, -0.5932350158691406, -0.5423998236656189, -0.49156463146209717, -0.44072943925857544, -0.3898942172527313, -0.3390589952468872, -0.2882238030433655, -0.23738861083984375, -0.18655338883399963, -0.13571816682815552, -0.08488297462463379, -0.03404776751995087, 0.016787439584732056, 0.06762264668941498, 0.1184578537940979, 0.16929304599761963, 0.22012826800346375, 0.27096349000930786, 0.3217986822128296, 0.3726338744163513, 0.42346909642219543, 0.47430431842803955, 0.5251395106315613, 0.575974702835083, 0.6268099546432495, 0.6776451468467712, 0.728480339050293, 0.7793155312538147, 0.8301507234573364, 0.8809859752655029, 0.9318211674690247, 0.9826563596725464, 1.033491611480713, 1.0843267440795898, 1.1351619958877563, 1.1859972476959229, 1.2368323802947998, 1.2876676321029663, 1.3385028839111328, 1.3893380165100098, 1.4401732683181763, 1.4910085201263428, 1.5418436527252197]}, "gradients/encoder.encoder.layers.15.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 3.0, 4.0, 2.0, 2.0, 5.0, 7.0, 6.0, 13.0, 16.0, 15.0, 30.0, 42.0, 69.0, 84.0, 171.0, 279.0, 542.0, 1181.0, 2723.0, 9471.0, 56675.0, 668246.0, 274137.0, 25540.0, 5519.0, 1905.0, 787.0, 452.0, 233.0, 134.0, 80.0, 66.0, 43.0, 22.0, 6.0, 14.0, 8.0, 10.0, 5.0, 4.0, 5.0, 1.0, 0.0, 3.0, 1.0, 4.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-1.6953125, -1.6402740478515625, -1.585235595703125, -1.5301971435546875, -1.47515869140625, -1.4201202392578125, -1.365081787109375, -1.3100433349609375, -1.2550048828125, -1.1999664306640625, -1.144927978515625, -1.0898895263671875, -1.03485107421875, -0.9798126220703125, -0.924774169921875, -0.8697357177734375, -0.814697265625, -0.7596588134765625, -0.704620361328125, -0.6495819091796875, -0.59454345703125, -0.5395050048828125, -0.484466552734375, -0.4294281005859375, -0.3743896484375, -0.3193511962890625, -0.264312744140625, -0.2092742919921875, -0.15423583984375, -0.0991973876953125, -0.044158935546875, 0.0108795166015625, 0.06591796875, 0.1209564208984375, 0.175994873046875, 0.2310333251953125, 0.28607177734375, 0.3411102294921875, 0.396148681640625, 0.4511871337890625, 0.5062255859375, 0.5612640380859375, 0.616302490234375, 0.6713409423828125, 0.72637939453125, 0.7814178466796875, 0.836456298828125, 0.8914947509765625, 0.946533203125, 1.0015716552734375, 1.056610107421875, 1.1116485595703125, 1.16668701171875, 1.2217254638671875, 1.276763916015625, 1.3318023681640625, 1.3868408203125, 1.4418792724609375, 1.496917724609375, 1.5519561767578125, 1.60699462890625, 1.6620330810546875, 1.717071533203125, 1.7721099853515625, 1.8271484375]}, "gradients/encoder.encoder.layers.15.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 2.0, 0.0, 4.0, 3.0, 5.0, 8.0, 8.0, 17.0, 40.0, 33.0, 68.0, 77.0, 92.0, 108.0, 124.0, 106.0, 77.0, 82.0, 55.0, 39.0, 22.0, 9.0, 11.0, 6.0, 2.0, 4.0, 3.0, 1.0, 0.0, 2.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0], "bins": [-0.64453125, -0.625152587890625, -0.60577392578125, -0.586395263671875, -0.5670166015625, -0.547637939453125, -0.52825927734375, -0.508880615234375, -0.489501953125, -0.470123291015625, -0.45074462890625, -0.431365966796875, -0.4119873046875, -0.392608642578125, -0.37322998046875, -0.353851318359375, -0.33447265625, -0.315093994140625, -0.29571533203125, -0.276336669921875, -0.2569580078125, -0.237579345703125, -0.21820068359375, -0.198822021484375, -0.179443359375, -0.160064697265625, -0.14068603515625, -0.121307373046875, -0.1019287109375, -0.082550048828125, -0.06317138671875, -0.043792724609375, -0.0244140625, -0.005035400390625, 0.01434326171875, 0.033721923828125, 0.0531005859375, 0.072479248046875, 0.09185791015625, 0.111236572265625, 0.130615234375, 0.149993896484375, 0.16937255859375, 0.188751220703125, 0.2081298828125, 0.227508544921875, 0.24688720703125, 0.266265869140625, 0.28564453125, 0.305023193359375, 0.32440185546875, 0.343780517578125, 0.3631591796875, 0.382537841796875, 0.40191650390625, 0.421295166015625, 0.440673828125, 0.460052490234375, 0.47943115234375, 0.498809814453125, 0.5181884765625, 0.537567138671875, 0.55694580078125, 0.576324462890625, 0.595703125]}, "gradients/encoder.encoder.layers.15.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 2.0, 0.0, 4.0, 3.0, 5.0, 4.0, 8.0, 4.0, 10.0, 18.0, 19.0, 42.0, 42.0, 68.0, 67.0, 109.0, 159.0, 253.0, 439.0, 832.0, 2176.0, 7868.0, 42560.0, 386552.0, 533299.0, 59074.0, 9975.0, 2687.0, 974.0, 460.0, 275.0, 183.0, 109.0, 94.0, 51.0, 32.0, 31.0, 22.0, 14.0, 12.0, 10.0, 4.0, 5.0, 1.0, 3.0, 4.0, 2.0, 2.0, 0.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-1.40234375, -1.359954833984375, -1.31756591796875, -1.275177001953125, -1.2327880859375, -1.190399169921875, -1.14801025390625, -1.105621337890625, -1.063232421875, -1.020843505859375, -0.97845458984375, -0.936065673828125, -0.8936767578125, -0.851287841796875, -0.80889892578125, -0.766510009765625, -0.72412109375, -0.681732177734375, -0.63934326171875, -0.596954345703125, -0.5545654296875, -0.512176513671875, -0.46978759765625, -0.427398681640625, -0.385009765625, -0.342620849609375, -0.30023193359375, -0.257843017578125, -0.2154541015625, -0.173065185546875, -0.13067626953125, -0.088287353515625, -0.0458984375, -0.003509521484375, 0.03887939453125, 0.081268310546875, 0.1236572265625, 0.166046142578125, 0.20843505859375, 0.250823974609375, 0.293212890625, 0.335601806640625, 0.37799072265625, 0.420379638671875, 0.4627685546875, 0.505157470703125, 0.54754638671875, 0.589935302734375, 0.63232421875, 0.674713134765625, 0.71710205078125, 0.759490966796875, 0.8018798828125, 0.844268798828125, 0.88665771484375, 0.929046630859375, 0.971435546875, 1.013824462890625, 1.05621337890625, 1.098602294921875, 1.1409912109375, 1.183380126953125, 1.22576904296875, 1.268157958984375, 1.310546875]}, "gradients/encoder.encoder.layers.15.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 2.0, 4.0, 4.0, 0.0, 2.0, 3.0, 7.0, 10.0, 12.0, 11.0, 10.0, 15.0, 24.0, 23.0, 25.0, 37.0, 31.0, 28.0, 41.0, 43.0, 35.0, 65.0, 54.0, 45.0, 56.0, 46.0, 58.0, 48.0, 38.0, 43.0, 37.0, 28.0, 30.0, 16.0, 16.0, 5.0, 12.0, 12.0, 13.0, 10.0, 5.0, 3.0, 4.0, 0.0, 4.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.6572265625, -1.610321044921875, -1.56341552734375, -1.516510009765625, -1.4696044921875, -1.422698974609375, -1.37579345703125, -1.328887939453125, -1.281982421875, -1.235076904296875, -1.18817138671875, -1.141265869140625, -1.0943603515625, -1.047454833984375, -1.00054931640625, -0.953643798828125, -0.90673828125, -0.859832763671875, -0.81292724609375, -0.766021728515625, -0.7191162109375, -0.672210693359375, -0.62530517578125, -0.578399658203125, -0.531494140625, -0.484588623046875, -0.43768310546875, -0.390777587890625, -0.3438720703125, -0.296966552734375, -0.25006103515625, -0.203155517578125, -0.15625, -0.109344482421875, -0.06243896484375, -0.015533447265625, 0.0313720703125, 0.078277587890625, 0.12518310546875, 0.172088623046875, 0.218994140625, 0.265899658203125, 0.31280517578125, 0.359710693359375, 0.4066162109375, 0.453521728515625, 0.50042724609375, 0.547332763671875, 0.59423828125, 0.641143798828125, 0.68804931640625, 0.734954833984375, 0.7818603515625, 0.828765869140625, 0.87567138671875, 0.922576904296875, 0.969482421875, 1.016387939453125, 1.06329345703125, 1.110198974609375, 1.1571044921875, 1.204010009765625, 1.25091552734375, 1.297821044921875, 1.3447265625]}, "gradients/encoder.encoder.layers.15.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 3.0, 1.0, 4.0, 11.0, 30.0, 44.0, 148.0, 708.0, 8281.0, 1005315.0, 32505.0, 1160.0, 260.0, 53.0, 25.0, 7.0, 5.0, 2.0, 3.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.4423828125, -1.40301513671875, -1.3636474609375, -1.32427978515625, -1.284912109375, -1.24554443359375, -1.2061767578125, -1.16680908203125, -1.12744140625, -1.08807373046875, -1.0487060546875, -1.00933837890625, -0.969970703125, -0.93060302734375, -0.8912353515625, -0.85186767578125, -0.8125, -0.77313232421875, -0.7337646484375, -0.69439697265625, -0.655029296875, -0.61566162109375, -0.5762939453125, -0.53692626953125, -0.49755859375, -0.45819091796875, -0.4188232421875, -0.37945556640625, -0.340087890625, -0.30072021484375, -0.2613525390625, -0.22198486328125, -0.1826171875, -0.14324951171875, -0.1038818359375, -0.06451416015625, -0.025146484375, 0.01422119140625, 0.0535888671875, 0.09295654296875, 0.13232421875, 0.17169189453125, 0.2110595703125, 0.25042724609375, 0.289794921875, 0.32916259765625, 0.3685302734375, 0.40789794921875, 0.447265625, 0.48663330078125, 0.5260009765625, 0.56536865234375, 0.604736328125, 0.64410400390625, 0.6834716796875, 0.72283935546875, 0.76220703125, 0.80157470703125, 0.8409423828125, 0.88031005859375, 0.919677734375, 0.95904541015625, 0.9984130859375, 1.03778076171875, 1.0771484375]}, "gradients/encoder.encoder.layers.15.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 2.0, 3.0, 2.0, 2.0, 4.0, 5.0, 12.0, 10.0, 17.0, 23.0, 32.0, 66.0, 94.0, 158.0, 213.0, 145.0, 77.0, 40.0, 32.0, 23.0, 15.0, 11.0, 6.0, 7.0, 4.0, 1.0, 2.0, 4.0, 1.0, 0.0, 2.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00014197826385498047, -0.00013698451220989227, -0.00013199076056480408, -0.00012699700891971588, -0.00012200325727462769, -0.00011700950562953949, -0.0001120157539844513, -0.0001070220023393631, -0.0001020282506942749, -9.70344990491867e-05, -9.204074740409851e-05, -8.704699575901031e-05, -8.205324411392212e-05, -7.705949246883392e-05, -7.206574082374573e-05, -6.707198917865753e-05, -6.207823753356934e-05, -5.708448588848114e-05, -5.2090734243392944e-05, -4.709698259830475e-05, -4.210323095321655e-05, -3.710947930812836e-05, -3.211572766304016e-05, -2.7121976017951965e-05, -2.212822437286377e-05, -1.7134472727775574e-05, -1.2140721082687378e-05, -7.146969437599182e-06, -2.1532177925109863e-06, 2.8405338525772095e-06, 7.834285497665405e-06, 1.2828037142753601e-05, 1.7821788787841797e-05, 2.2815540432929993e-05, 2.780929207801819e-05, 3.2803043723106384e-05, 3.779679536819458e-05, 4.2790547013282776e-05, 4.778429865837097e-05, 5.277805030345917e-05, 5.777180194854736e-05, 6.276555359363556e-05, 6.775930523872375e-05, 7.275305688381195e-05, 7.774680852890015e-05, 8.274056017398834e-05, 8.773431181907654e-05, 9.272806346416473e-05, 9.772181510925293e-05, 0.00010271556675434113, 0.00010770931839942932, 0.00011270307004451752, 0.00011769682168960571, 0.0001226905733346939, 0.0001276843249797821, 0.0001326780766248703, 0.0001376718282699585, 0.0001426655799150467, 0.0001476593315601349, 0.00015265308320522308, 0.00015764683485031128, 0.00016264058649539948, 0.00016763433814048767, 0.00017262808978557587, 0.00017762184143066406]}, "gradients/encoder.encoder.layers.15.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 3.0, 0.0, 2.0, 4.0, 3.0, 7.0, 13.0, 29.0, 52.0, 118.0, 353.0, 1909.0, 307127.0, 735673.0, 2589.0, 424.0, 131.0, 56.0, 37.0, 17.0, 7.0, 4.0, 2.0, 3.0, 2.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.9609375, -0.9192657470703125, -0.877593994140625, -0.8359222412109375, -0.79425048828125, -0.7525787353515625, -0.710906982421875, -0.6692352294921875, -0.6275634765625, -0.5858917236328125, -0.544219970703125, -0.5025482177734375, -0.46087646484375, -0.4192047119140625, -0.377532958984375, -0.3358612060546875, -0.294189453125, -0.2525177001953125, -0.210845947265625, -0.1691741943359375, -0.12750244140625, -0.0858306884765625, -0.044158935546875, -0.0024871826171875, 0.0391845703125, 0.0808563232421875, 0.122528076171875, 0.1641998291015625, 0.20587158203125, 0.2475433349609375, 0.289215087890625, 0.3308868408203125, 0.37255859375, 0.4142303466796875, 0.455902099609375, 0.4975738525390625, 0.53924560546875, 0.5809173583984375, 0.622589111328125, 0.6642608642578125, 0.7059326171875, 0.7476043701171875, 0.789276123046875, 0.8309478759765625, 0.87261962890625, 0.9142913818359375, 0.955963134765625, 0.9976348876953125, 1.039306640625, 1.0809783935546875, 1.122650146484375, 1.1643218994140625, 1.20599365234375, 1.2476654052734375, 1.289337158203125, 1.3310089111328125, 1.3726806640625, 1.4143524169921875, 1.456024169921875, 1.4976959228515625, 1.53936767578125, 1.5810394287109375, 1.622711181640625, 1.6643829345703125, 1.7060546875]}, "gradients/encoder.encoder.layers.15.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 4.0, 3.0, 15.0, 19.0, 35.0, 74.0, 200.0, 299.0, 204.0, 87.0, 41.0, 24.0, 8.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.72021484375, -0.6896591186523438, -0.6591033935546875, -0.6285476684570312, -0.597991943359375, -0.5674362182617188, -0.5368804931640625, -0.5063247680664062, -0.47576904296875, -0.44521331787109375, -0.4146575927734375, -0.38410186767578125, -0.353546142578125, -0.32299041748046875, -0.2924346923828125, -0.26187896728515625, -0.2313232421875, -0.20076751708984375, -0.1702117919921875, -0.13965606689453125, -0.109100341796875, -0.07854461669921875, -0.0479888916015625, -0.01743316650390625, 0.01312255859375, 0.04367828369140625, 0.0742340087890625, 0.10478973388671875, 0.135345458984375, 0.16590118408203125, 0.1964569091796875, 0.22701263427734375, 0.257568359375, 0.28812408447265625, 0.3186798095703125, 0.34923553466796875, 0.379791259765625, 0.41034698486328125, 0.4409027099609375, 0.47145843505859375, 0.50201416015625, 0.5325698852539062, 0.5631256103515625, 0.5936813354492188, 0.624237060546875, 0.6547927856445312, 0.6853485107421875, 0.7159042358398438, 0.7464599609375, 0.7770156860351562, 0.8075714111328125, 0.8381271362304688, 0.868682861328125, 0.8992385864257812, 0.9297943115234375, 0.9603500366210938, 0.99090576171875, 1.0214614868164062, 1.0520172119140625, 1.0825729370117188, 1.113128662109375, 1.1436843872070312, 1.1742401123046875, 1.2047958374023438, 1.2353515625]}, "gradients/encoder.encoder.layers.15.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 3.0, 16.0, 21.0, 42.0, 171.0, 307.0, 257.0, 112.0, 53.0, 18.0, 8.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-12.163463592529297, -11.769636154174805, -11.375809669494629, -10.981982231140137, -10.588154792785645, -10.194328308105469, -9.800500869750977, -9.406673431396484, -9.012845993041992, -8.6190185546875, -8.225192070007324, -7.831364631652832, -7.43753719329834, -7.043710231781006, -6.649883270263672, -6.25605583190918, -5.862229347229004, -5.46840238571167, -5.074574947357178, -4.680747985839844, -4.286920547485352, -3.8930935859680176, -3.4992666244506836, -3.1054394245147705, -2.7116122245788574, -2.3177850246429443, -1.9239579439163208, -1.5301308631896973, -1.1363036632537842, -0.7424764633178711, -0.3486495018005371, 0.04517769813537598, 0.43900489807128906, 0.8328320384025574, 1.2266591787338257, 1.6204862594604492, 2.0143134593963623, 2.4081406593322754, 2.8019676208496094, 3.1957948207855225, 3.5896220207214355, 3.9834492206573486, 4.377276420593262, 4.771103382110596, 5.16493034362793, 5.558757781982422, 5.952584743499756, 6.34641170501709, 6.740239143371582, 7.134066104888916, 7.527893543243408, 7.921720504760742, 8.315547943115234, 8.709375381469727, 9.103201866149902, 9.497029304504395, 9.89085578918457, 10.284683227539062, 10.678509712219238, 11.07233715057373, 11.466164588928223, 11.859991073608398, 12.25381851196289, 12.647645950317383, 13.041473388671875]}, "gradients/encoder.encoder.layers.15.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 4.0, 1.0, 2.0, 6.0, 6.0, 11.0, 10.0, 12.0, 14.0, 14.0, 18.0, 22.0, 21.0, 22.0, 36.0, 23.0, 51.0, 51.0, 58.0, 49.0, 52.0, 50.0, 50.0, 44.0, 51.0, 43.0, 34.0, 49.0, 34.0, 35.0, 24.0, 16.0, 20.0, 16.0, 10.0, 10.0, 12.0, 10.0, 4.0, 11.0, 3.0, 2.0, 5.0, 1.0, 0.0, 2.0], "bins": [-9.796653747558594, -9.548315048217773, -9.299976348876953, -9.05163860321045, -8.803299903869629, -8.554961204528809, -8.306622505187988, -8.058284759521484, -7.809946060180664, -7.561607360839844, -7.313269138336182, -7.064930438995361, -6.816592216491699, -6.568253517150879, -6.319914817810059, -6.0715765953063965, -5.823237895965576, -5.574899196624756, -5.326560974121094, -5.078222274780273, -4.829884052276611, -4.581545352935791, -4.333207130432129, -4.084868431091309, -3.8365299701690674, -3.588191509246826, -3.339853048324585, -3.0915145874023438, -2.8431758880615234, -2.5948376655578613, -2.346498966217041, -2.0981605052948, -1.8498215675354004, -1.6014831066131592, -1.353144645690918, -1.1048060655593872, -0.856467604637146, -0.6081291437149048, -0.359790563583374, -0.11145210266113281, 0.1368863582611084, 0.385224848985672, 0.6335633397102356, 0.8819018602371216, 1.1302403211593628, 1.378578782081604, 1.6269173622131348, 1.875255823135376, 2.123594284057617, 2.3719327449798584, 2.6202712059020996, 2.86860990524292, 3.116948127746582, 3.3652868270874023, 3.6136252880096436, 3.8619637489318848, 4.110301971435547, 4.358640670776367, 4.606978893280029, 4.85531759262085, 5.103655815124512, 5.351994514465332, 5.600333213806152, 5.8486714363098145, 6.097010135650635]}, "gradients/encoder.encoder.layers.14.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 3.0, 0.0, 4.0, 4.0, 0.0, 5.0, 2.0, 5.0, 7.0, 9.0, 12.0, 6.0, 19.0, 17.0, 27.0, 37.0, 41.0, 54.0, 57.0, 100.0, 139.0, 273.0, 392.0, 734.0, 1718.0, 3988.0, 11620.0, 54090.0, 3711002.0, 370534.0, 26397.0, 7184.0, 2753.0, 1236.0, 668.0, 375.0, 197.0, 144.0, 117.0, 69.0, 54.0, 42.0, 34.0, 22.0, 30.0, 17.0, 8.0, 9.0, 7.0, 5.0, 5.0, 7.0, 10.0, 2.0, 1.0, 4.0, 1.0, 1.0, 0.0, 1.0, 2.0, 2.0], "bins": [-1.1982421875, -1.1593475341796875, -1.120452880859375, -1.0815582275390625, -1.04266357421875, -1.0037689208984375, -0.964874267578125, -0.9259796142578125, -0.8870849609375, -0.8481903076171875, -0.809295654296875, -0.7704010009765625, -0.73150634765625, -0.6926116943359375, -0.653717041015625, -0.6148223876953125, -0.575927734375, -0.5370330810546875, -0.498138427734375, -0.4592437744140625, -0.42034912109375, -0.3814544677734375, -0.342559814453125, -0.3036651611328125, -0.2647705078125, -0.2258758544921875, -0.186981201171875, -0.1480865478515625, -0.10919189453125, -0.0702972412109375, -0.031402587890625, 0.0074920654296875, 0.04638671875, 0.0852813720703125, 0.124176025390625, 0.1630706787109375, 0.20196533203125, 0.2408599853515625, 0.279754638671875, 0.3186492919921875, 0.3575439453125, 0.3964385986328125, 0.435333251953125, 0.4742279052734375, 0.51312255859375, 0.5520172119140625, 0.590911865234375, 0.6298065185546875, 0.668701171875, 0.7075958251953125, 0.746490478515625, 0.7853851318359375, 0.82427978515625, 0.8631744384765625, 0.902069091796875, 0.9409637451171875, 0.9798583984375, 1.0187530517578125, 1.057647705078125, 1.0965423583984375, 1.13543701171875, 1.1743316650390625, 1.213226318359375, 1.2521209716796875, 1.291015625]}, "gradients/encoder.encoder.layers.14.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 3.0, 1.0, 3.0, 6.0, 9.0, 11.0, 31.0, 56.0, 60.0, 68.0, 76.0, 117.0, 118.0, 109.0, 119.0, 80.0, 50.0, 32.0, 22.0, 13.0, 10.0, 4.0, 5.0, 3.0, 2.0, 0.0, 0.0, 2.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0], "bins": [-0.7685546875, -0.746856689453125, -0.72515869140625, -0.703460693359375, -0.6817626953125, -0.660064697265625, -0.63836669921875, -0.616668701171875, -0.594970703125, -0.573272705078125, -0.55157470703125, -0.529876708984375, -0.5081787109375, -0.486480712890625, -0.46478271484375, -0.443084716796875, -0.42138671875, -0.399688720703125, -0.37799072265625, -0.356292724609375, -0.3345947265625, -0.312896728515625, -0.29119873046875, -0.269500732421875, -0.247802734375, -0.226104736328125, -0.20440673828125, -0.182708740234375, -0.1610107421875, -0.139312744140625, -0.11761474609375, -0.095916748046875, -0.07421875, -0.052520751953125, -0.03082275390625, -0.009124755859375, 0.0125732421875, 0.034271240234375, 0.05596923828125, 0.077667236328125, 0.099365234375, 0.121063232421875, 0.14276123046875, 0.164459228515625, 0.1861572265625, 0.207855224609375, 0.22955322265625, 0.251251220703125, 0.27294921875, 0.294647216796875, 0.31634521484375, 0.338043212890625, 0.3597412109375, 0.381439208984375, 0.40313720703125, 0.424835205078125, 0.446533203125, 0.468231201171875, 0.48992919921875, 0.511627197265625, 0.5333251953125, 0.555023193359375, 0.57672119140625, 0.598419189453125, 0.6201171875]}, "gradients/encoder.encoder.layers.14.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 3.0, 0.0, 2.0, 0.0, 3.0, 4.0, 13.0, 35.0, 71.0, 120.0, 304.0, 705.0, 2023.0, 13032.0, 3853043.0, 315566.0, 6800.0, 1480.0, 603.0, 254.0, 118.0, 61.0, 30.0, 11.0, 4.0, 1.0, 1.0, 2.0, 2.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-2.359375, -2.289703369140625, -2.22003173828125, -2.150360107421875, -2.0806884765625, -2.011016845703125, -1.94134521484375, -1.871673583984375, -1.802001953125, -1.732330322265625, -1.66265869140625, -1.592987060546875, -1.5233154296875, -1.453643798828125, -1.38397216796875, -1.314300537109375, -1.24462890625, -1.174957275390625, -1.10528564453125, -1.035614013671875, -0.9659423828125, -0.896270751953125, -0.82659912109375, -0.756927490234375, -0.687255859375, -0.617584228515625, -0.54791259765625, -0.478240966796875, -0.4085693359375, -0.338897705078125, -0.26922607421875, -0.199554443359375, -0.1298828125, -0.060211181640625, 0.00946044921875, 0.079132080078125, 0.1488037109375, 0.218475341796875, 0.28814697265625, 0.357818603515625, 0.427490234375, 0.497161865234375, 0.56683349609375, 0.636505126953125, 0.7061767578125, 0.775848388671875, 0.84552001953125, 0.915191650390625, 0.98486328125, 1.054534912109375, 1.12420654296875, 1.193878173828125, 1.2635498046875, 1.333221435546875, 1.40289306640625, 1.472564697265625, 1.542236328125, 1.611907958984375, 1.68157958984375, 1.751251220703125, 1.8209228515625, 1.890594482421875, 1.96026611328125, 2.029937744140625, 2.099609375]}, "gradients/encoder.encoder.layers.14.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 2.0, 1.0, 0.0, 1.0, 1.0, 4.0, 6.0, 6.0, 5.0, 10.0, 17.0, 33.0, 49.0, 167.0, 1073.0, 2208.0, 298.0, 89.0, 45.0, 22.0, 20.0, 9.0, 5.0, 5.0, 5.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.53173828125, -0.5113983154296875, -0.491058349609375, -0.4707183837890625, -0.45037841796875, -0.4300384521484375, -0.409698486328125, -0.3893585205078125, -0.3690185546875, -0.3486785888671875, -0.328338623046875, -0.3079986572265625, -0.28765869140625, -0.2673187255859375, -0.246978759765625, -0.2266387939453125, -0.206298828125, -0.1859588623046875, -0.165618896484375, -0.1452789306640625, -0.12493896484375, -0.1045989990234375, -0.084259033203125, -0.0639190673828125, -0.0435791015625, -0.0232391357421875, -0.002899169921875, 0.0174407958984375, 0.03778076171875, 0.0581207275390625, 0.078460693359375, 0.0988006591796875, 0.119140625, 0.1394805908203125, 0.159820556640625, 0.1801605224609375, 0.20050048828125, 0.2208404541015625, 0.241180419921875, 0.2615203857421875, 0.2818603515625, 0.3022003173828125, 0.322540283203125, 0.3428802490234375, 0.36322021484375, 0.3835601806640625, 0.403900146484375, 0.4242401123046875, 0.444580078125, 0.4649200439453125, 0.485260009765625, 0.5055999755859375, 0.52593994140625, 0.5462799072265625, 0.566619873046875, 0.5869598388671875, 0.6072998046875, 0.6276397705078125, 0.647979736328125, 0.6683197021484375, 0.68865966796875, 0.7089996337890625, 0.729339599609375, 0.7496795654296875, 0.77001953125]}, "gradients/encoder.encoder.layers.14.final_layer_norm.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 1.0, 7.0, 22.0, 96.0, 328.0, 355.0, 148.0, 38.0, 9.0, 4.0, 1.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-3.1776537895202637, -3.0316455364227295, -2.885637044906616, -2.739628791809082, -2.5936203002929688, -2.4476120471954346, -2.3016037940979004, -2.155595302581787, -2.009587049484253, -1.8635786771774292, -1.7175703048706055, -1.5715620517730713, -1.4255536794662476, -1.2795453071594238, -1.1335370540618896, -0.9875286817550659, -0.8415203094482422, -0.6955119371414185, -0.5495036244392395, -0.40349528193473816, -0.2574869394302368, -0.11147856712341309, 0.03452974557876587, 0.18053805828094482, 0.32654643058776855, 0.4725547730922699, 0.6185631155967712, 0.7645714282989502, 0.9105798006057739, 1.0565881729125977, 1.2025964260101318, 1.3486047983169556, 1.4946131706237793, 1.640621542930603, 1.7866299152374268, 1.932638168334961, 2.078646659851074, 2.2246549129486084, 2.3706631660461426, 2.516671657562256, 2.66267991065979, 2.808688163757324, 2.9546966552734375, 3.1007049083709717, 3.246713161468506, 3.392721652984619, 3.5387299060821533, 3.6847381591796875, 3.830746650695801, 3.976754903793335, 4.122763156890869, 4.268771648406982, 4.414780139923096, 4.560788154602051, 4.706796646118164, 4.852805137634277, 4.998813629150391, 5.144822120666504, 5.290830135345459, 5.436838626861572, 5.5828471183776855, 5.728855133056641, 5.874863624572754, 6.020872116088867, 6.166880130767822]}, "gradients/encoder.encoder.layers.14.final_layer_norm.bias": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 7.0, 8.0, 13.0, 23.0, 22.0, 36.0, 56.0, 43.0, 58.0, 80.0, 84.0, 74.0, 74.0, 66.0, 61.0, 70.0, 52.0, 44.0, 47.0, 36.0, 17.0, 17.0, 7.0, 4.0, 5.0, 4.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-1.9207372665405273, -1.852354645729065, -1.783972144126892, -1.7155895233154297, -1.6472070217132568, -1.5788244009017944, -1.510441780090332, -1.4420592784881592, -1.3736766576766968, -1.3052940368652344, -1.2369115352630615, -1.1685289144515991, -1.1001462936401367, -1.0317637920379639, -0.9633811712265015, -0.8949986100196838, -0.8266160488128662, -0.7582334876060486, -0.689850926399231, -0.6214683055877686, -0.5530857443809509, -0.4847031831741333, -0.4163205921649933, -0.34793800115585327, -0.27955543994903564, -0.21117286384105682, -0.142790287733078, -0.07440771162509918, -0.006025135517120361, 0.062357425689697266, 0.13074001669883728, 0.1991226077079773, 0.267505407333374, 0.33588796854019165, 0.40427055954933167, 0.4726531505584717, 0.5410357117652893, 0.6094182729721069, 0.6778008937835693, 0.746183454990387, 0.8145660161972046, 0.8829485774040222, 0.9513311386108398, 1.0197137594223022, 1.0880963802337646, 1.1564788818359375, 1.2248615026474, 1.2932441234588623, 1.3616266250610352, 1.4300092458724976, 1.4983917474746704, 1.5667743682861328, 1.6351568698883057, 1.703539490699768, 1.7719221115112305, 1.8403046131134033, 1.9086872339248657, 1.9770698547363281, 2.045452356338501, 2.113834857940674, 2.182217597961426, 2.2506000995635986, 2.3189826011657715, 2.3873653411865234, 2.4557478427886963]}, "gradients/encoder.encoder.layers.14.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 4.0, 1.0, 0.0, 0.0, 1.0, 2.0, 3.0, 7.0, 7.0, 21.0, 42.0, 132.0, 273.0, 921.0, 4979.0, 184829.0, 840916.0, 14082.0, 1622.0, 440.0, 167.0, 53.0, 36.0, 9.0, 7.0, 2.0, 4.0, 2.0, 5.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-3.63671875, -3.52716064453125, -3.4176025390625, -3.30804443359375, -3.198486328125, -3.08892822265625, -2.9793701171875, -2.86981201171875, -2.76025390625, -2.65069580078125, -2.5411376953125, -2.43157958984375, -2.322021484375, -2.21246337890625, -2.1029052734375, -1.99334716796875, -1.8837890625, -1.77423095703125, -1.6646728515625, -1.55511474609375, -1.445556640625, -1.33599853515625, -1.2264404296875, -1.11688232421875, -1.00732421875, -0.89776611328125, -0.7882080078125, -0.67864990234375, -0.569091796875, -0.45953369140625, -0.3499755859375, -0.24041748046875, -0.130859375, -0.02130126953125, 0.0882568359375, 0.19781494140625, 0.307373046875, 0.41693115234375, 0.5264892578125, 0.63604736328125, 0.74560546875, 0.85516357421875, 0.9647216796875, 1.07427978515625, 1.183837890625, 1.29339599609375, 1.4029541015625, 1.51251220703125, 1.6220703125, 1.73162841796875, 1.8411865234375, 1.95074462890625, 2.060302734375, 2.16986083984375, 2.2794189453125, 2.38897705078125, 2.49853515625, 2.60809326171875, 2.7176513671875, 2.82720947265625, 2.936767578125, 3.04632568359375, 3.1558837890625, 3.26544189453125, 3.375]}, "gradients/encoder.encoder.layers.14.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 3.0, 1.0, 3.0, 1.0, 11.0, 9.0, 25.0, 39.0, 62.0, 62.0, 81.0, 93.0, 125.0, 126.0, 109.0, 93.0, 59.0, 42.0, 25.0, 15.0, 8.0, 7.0, 3.0, 5.0, 2.0, 0.0, 0.0, 2.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0], "bins": [-0.80517578125, -0.782684326171875, -0.76019287109375, -0.737701416015625, -0.7152099609375, -0.692718505859375, -0.67022705078125, -0.647735595703125, -0.625244140625, -0.602752685546875, -0.58026123046875, -0.557769775390625, -0.5352783203125, -0.512786865234375, -0.49029541015625, -0.467803955078125, -0.4453125, -0.422821044921875, -0.40032958984375, -0.377838134765625, -0.3553466796875, -0.332855224609375, -0.31036376953125, -0.287872314453125, -0.265380859375, -0.242889404296875, -0.22039794921875, -0.197906494140625, -0.1754150390625, -0.152923583984375, -0.13043212890625, -0.107940673828125, -0.08544921875, -0.062957763671875, -0.04046630859375, -0.017974853515625, 0.0045166015625, 0.027008056640625, 0.04949951171875, 0.071990966796875, 0.094482421875, 0.116973876953125, 0.13946533203125, 0.161956787109375, 0.1844482421875, 0.206939697265625, 0.22943115234375, 0.251922607421875, 0.2744140625, 0.296905517578125, 0.31939697265625, 0.341888427734375, 0.3643798828125, 0.386871337890625, 0.40936279296875, 0.431854248046875, 0.454345703125, 0.476837158203125, 0.49932861328125, 0.521820068359375, 0.5443115234375, 0.566802978515625, 0.58929443359375, 0.611785888671875, 0.63427734375]}, "gradients/encoder.encoder.layers.14.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 2.0, 3.0, 5.0, 8.0, 12.0, 11.0, 15.0, 20.0, 30.0, 38.0, 59.0, 65.0, 106.0, 136.0, 211.0, 423.0, 1118.0, 3359.0, 13662.0, 87528.0, 663825.0, 238887.0, 29776.0, 6022.0, 1737.0, 666.0, 307.0, 163.0, 108.0, 67.0, 52.0, 42.0, 32.0, 12.0, 16.0, 7.0, 11.0, 9.0, 4.0, 5.0, 1.0, 1.0, 2.0, 2.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.0634765625, -1.023681640625, -0.98388671875, -0.944091796875, -0.904296875, -0.864501953125, -0.82470703125, -0.784912109375, -0.7451171875, -0.705322265625, -0.66552734375, -0.625732421875, -0.5859375, -0.546142578125, -0.50634765625, -0.466552734375, -0.4267578125, -0.386962890625, -0.34716796875, -0.307373046875, -0.267578125, -0.227783203125, -0.18798828125, -0.148193359375, -0.1083984375, -0.068603515625, -0.02880859375, 0.010986328125, 0.05078125, 0.090576171875, 0.13037109375, 0.170166015625, 0.2099609375, 0.249755859375, 0.28955078125, 0.329345703125, 0.369140625, 0.408935546875, 0.44873046875, 0.488525390625, 0.5283203125, 0.568115234375, 0.60791015625, 0.647705078125, 0.6875, 0.727294921875, 0.76708984375, 0.806884765625, 0.8466796875, 0.886474609375, 0.92626953125, 0.966064453125, 1.005859375, 1.045654296875, 1.08544921875, 1.125244140625, 1.1650390625, 1.204833984375, 1.24462890625, 1.284423828125, 1.32421875, 1.364013671875, 1.40380859375, 1.443603515625, 1.4833984375]}, "gradients/encoder.encoder.layers.14.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 4.0, 3.0, 1.0, 2.0, 3.0, 6.0, 5.0, 4.0, 10.0, 10.0, 4.0, 12.0, 17.0, 18.0, 17.0, 26.0, 24.0, 37.0, 30.0, 26.0, 23.0, 33.0, 39.0, 53.0, 39.0, 31.0, 57.0, 31.0, 50.0, 47.0, 39.0, 38.0, 36.0, 30.0, 27.0, 34.0, 18.0, 16.0, 13.0, 17.0, 12.0, 14.0, 11.0, 10.0, 8.0, 4.0, 9.0, 7.0, 1.0, 3.0, 1.0, 3.0, 0.0, 1.0, 3.0, 1.0, 1.0, 0.0, 2.0], "bins": [-1.216796875, -1.178497314453125, -1.14019775390625, -1.101898193359375, -1.0635986328125, -1.025299072265625, -0.98699951171875, -0.948699951171875, -0.910400390625, -0.872100830078125, -0.83380126953125, -0.795501708984375, -0.7572021484375, -0.718902587890625, -0.68060302734375, -0.642303466796875, -0.60400390625, -0.565704345703125, -0.52740478515625, -0.489105224609375, -0.4508056640625, -0.412506103515625, -0.37420654296875, -0.335906982421875, -0.297607421875, -0.259307861328125, -0.22100830078125, -0.182708740234375, -0.1444091796875, -0.106109619140625, -0.06781005859375, -0.029510498046875, 0.0087890625, 0.047088623046875, 0.08538818359375, 0.123687744140625, 0.1619873046875, 0.200286865234375, 0.23858642578125, 0.276885986328125, 0.315185546875, 0.353485107421875, 0.39178466796875, 0.430084228515625, 0.4683837890625, 0.506683349609375, 0.54498291015625, 0.583282470703125, 0.62158203125, 0.659881591796875, 0.69818115234375, 0.736480712890625, 0.7747802734375, 0.813079833984375, 0.85137939453125, 0.889678955078125, 0.927978515625, 0.966278076171875, 1.00457763671875, 1.042877197265625, 1.0811767578125, 1.119476318359375, 1.15777587890625, 1.196075439453125, 1.234375]}, "gradients/encoder.encoder.layers.14.attention.k_proj.weight": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 2.0, 1.0, 2.0, 3.0, 2.0, 8.0, 10.0, 12.0, 15.0, 24.0, 40.0, 86.0, 164.0, 347.0, 947.0, 3354.0, 34046.0, 872948.0, 127775.0, 6474.0, 1352.0, 471.0, 215.0, 91.0, 67.0, 36.0, 25.0, 13.0, 8.0, 7.0, 5.0, 3.0, 3.0, 0.0, 0.0, 5.0, 2.0, 0.0, 0.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.6865234375, -0.6648483276367188, -0.6431732177734375, -0.6214981079101562, -0.599822998046875, -0.5781478881835938, -0.5564727783203125, -0.5347976684570312, -0.51312255859375, -0.49144744873046875, -0.4697723388671875, -0.44809722900390625, -0.426422119140625, -0.40474700927734375, -0.3830718994140625, -0.36139678955078125, -0.3397216796875, -0.31804656982421875, -0.2963714599609375, -0.27469635009765625, -0.253021240234375, -0.23134613037109375, -0.2096710205078125, -0.18799591064453125, -0.16632080078125, -0.14464569091796875, -0.1229705810546875, -0.10129547119140625, -0.079620361328125, -0.05794525146484375, -0.0362701416015625, -0.01459503173828125, 0.007080078125, 0.02875518798828125, 0.0504302978515625, 0.07210540771484375, 0.093780517578125, 0.11545562744140625, 0.1371307373046875, 0.15880584716796875, 0.18048095703125, 0.20215606689453125, 0.2238311767578125, 0.24550628662109375, 0.267181396484375, 0.28885650634765625, 0.3105316162109375, 0.33220672607421875, 0.3538818359375, 0.37555694580078125, 0.3972320556640625, 0.41890716552734375, 0.440582275390625, 0.46225738525390625, 0.4839324951171875, 0.5056076049804688, 0.52728271484375, 0.5489578247070312, 0.5706329345703125, 0.5923080444335938, 0.613983154296875, 0.6356582641601562, 0.6573333740234375, 0.6790084838867188, 0.70068359375]}, "gradients/encoder.encoder.layers.14.attention.k_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 2.0, 1.0, 4.0, 2.0, 4.0, 6.0, 8.0, 8.0, 15.0, 6.0, 4.0, 14.0, 11.0, 17.0, 19.0, 36.0, 56.0, 104.0, 154.0, 195.0, 101.0, 69.0, 35.0, 25.0, 18.0, 20.0, 13.0, 13.0, 9.0, 6.0, 5.0, 7.0, 5.0, 3.0, 6.0, 4.0, 1.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 3.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0001609325408935547, -0.000155741348862648, -0.00015055015683174133, -0.00014535896480083466, -0.00014016777276992798, -0.0001349765807390213, -0.00012978538870811462, -0.00012459419667720795, -0.00011940300464630127, -0.00011421181261539459, -0.00010902062058448792, -0.00010382942855358124, -9.863823652267456e-05, -9.344704449176788e-05, -8.82558524608612e-05, -8.306466042995453e-05, -7.787346839904785e-05, -7.268227636814117e-05, -6.74910843372345e-05, -6.229989230632782e-05, -5.710870027542114e-05, -5.1917508244514465e-05, -4.672631621360779e-05, -4.153512418270111e-05, -3.6343932151794434e-05, -3.1152740120887756e-05, -2.596154808998108e-05, -2.0770356059074402e-05, -1.5579164028167725e-05, -1.0387971997261047e-05, -5.19677996635437e-06, -5.587935447692871e-09, 5.185604095458984e-06, 1.0376796126365662e-05, 1.556798815727234e-05, 2.0759180188179016e-05, 2.5950372219085693e-05, 3.114156424999237e-05, 3.633275628089905e-05, 4.1523948311805725e-05, 4.67151403427124e-05, 5.190633237361908e-05, 5.709752440452576e-05, 6.228871643543243e-05, 6.747990846633911e-05, 7.267110049724579e-05, 7.786229252815247e-05, 8.305348455905914e-05, 8.824467658996582e-05, 9.34358686208725e-05, 9.862706065177917e-05, 0.00010381825268268585, 0.00010900944471359253, 0.0001142006367444992, 0.00011939182877540588, 0.00012458302080631256, 0.00012977421283721924, 0.00013496540486812592, 0.0001401565968990326, 0.00014534778892993927, 0.00015053898096084595, 0.00015573017299175262, 0.0001609213650226593, 0.00016611255705356598, 0.00017130374908447266]}, "gradients/encoder.encoder.layers.14.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 5.0, 5.0, 3.0, 2.0, 2.0, 10.0, 9.0, 17.0, 18.0, 21.0, 35.0, 53.0, 86.0, 236.0, 522.0, 1642.0, 6999.0, 72310.0, 849656.0, 105496.0, 8448.0, 1829.0, 606.0, 250.0, 110.0, 69.0, 25.0, 31.0, 17.0, 14.0, 6.0, 5.0, 6.0, 5.0, 5.0, 2.0, 0.0, 1.0, 2.0, 1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.434326171875, -0.4173316955566406, -0.40033721923828125, -0.3833427429199219, -0.3663482666015625, -0.3493537902832031, -0.33235931396484375, -0.3153648376464844, -0.298370361328125, -0.2813758850097656, -0.26438140869140625, -0.24738693237304688, -0.2303924560546875, -0.21339797973632812, -0.19640350341796875, -0.17940902709960938, -0.16241455078125, -0.14542007446289062, -0.12842559814453125, -0.11143112182617188, -0.0944366455078125, -0.07744216918945312, -0.06044769287109375, -0.043453216552734375, -0.026458740234375, -0.009464263916015625, 0.00753021240234375, 0.024524688720703125, 0.0415191650390625, 0.058513641357421875, 0.07550811767578125, 0.09250259399414062, 0.1094970703125, 0.12649154663085938, 0.14348602294921875, 0.16048049926757812, 0.1774749755859375, 0.19446945190429688, 0.21146392822265625, 0.22845840454101562, 0.245452880859375, 0.2624473571777344, 0.27944183349609375, 0.2964363098144531, 0.3134307861328125, 0.3304252624511719, 0.34741973876953125, 0.3644142150878906, 0.38140869140625, 0.3984031677246094, 0.41539764404296875, 0.4323921203613281, 0.4493865966796875, 0.4663810729980469, 0.48337554931640625, 0.5003700256347656, 0.517364501953125, 0.5343589782714844, 0.5513534545898438, 0.5683479309082031, 0.5853424072265625, 0.6023368835449219, 0.6193313598632812, 0.6363258361816406, 0.6533203125]}, "gradients/encoder.encoder.layers.14.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 5.0, 2.0, 4.0, 5.0, 5.0, 7.0, 10.0, 20.0, 26.0, 31.0, 57.0, 64.0, 94.0, 139.0, 124.0, 121.0, 80.0, 58.0, 35.0, 41.0, 19.0, 15.0, 12.0, 8.0, 10.0, 3.0, 8.0, 3.0, 2.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.355224609375, -0.3400077819824219, -0.32479095458984375, -0.3095741271972656, -0.2943572998046875, -0.2791404724121094, -0.26392364501953125, -0.24870681762695312, -0.233489990234375, -0.21827316284179688, -0.20305633544921875, -0.18783950805664062, -0.1726226806640625, -0.15740585327148438, -0.14218902587890625, -0.12697219848632812, -0.11175537109375, -0.09653854370117188, -0.08132171630859375, -0.06610488891601562, -0.0508880615234375, -0.035671234130859375, -0.02045440673828125, -0.005237579345703125, 0.009979248046875, 0.025196075439453125, 0.04041290283203125, 0.055629730224609375, 0.0708465576171875, 0.08606338500976562, 0.10128021240234375, 0.11649703979492188, 0.1317138671875, 0.14693069458007812, 0.16214752197265625, 0.17736434936523438, 0.1925811767578125, 0.20779800415039062, 0.22301483154296875, 0.23823165893554688, 0.253448486328125, 0.2686653137207031, 0.28388214111328125, 0.2990989685058594, 0.3143157958984375, 0.3295326232910156, 0.34474945068359375, 0.3599662780761719, 0.37518310546875, 0.3903999328613281, 0.40561676025390625, 0.4208335876464844, 0.4360504150390625, 0.4512672424316406, 0.46648406982421875, 0.4817008972167969, 0.496917724609375, 0.5121345520019531, 0.5273513793945312, 0.5425682067871094, 0.5577850341796875, 0.5730018615722656, 0.5882186889648438, 0.6034355163574219, 0.61865234375]}, "gradients/encoder.encoder.layers.14.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 45.0, 533.0, 381.0, 44.0, 7.0, 3.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-42.19060516357422, -41.18220901489258, -40.17381286621094, -39.1654167175293, -38.157020568847656, -37.148624420166016, -36.140228271484375, -35.131832122802734, -34.123435974121094, -33.11503982543945, -32.10664367675781, -31.098247528076172, -30.08985137939453, -29.08145523071289, -28.07305908203125, -27.06466293334961, -26.056264877319336, -25.047868728637695, -24.039472579956055, -23.031076431274414, -22.022680282592773, -21.014284133911133, -20.00588607788086, -18.99748992919922, -17.989093780517578, -16.980697631835938, -15.972301483154297, -14.963905334472656, -13.955509185791016, -12.947113037109375, -11.938715934753418, -10.930319786071777, -9.921924591064453, -8.913528442382812, -7.905132293701172, -6.896735668182373, -5.888339519500732, -4.879943370819092, -3.871546745300293, -2.8631505966186523, -1.8547544479370117, -0.8463581800460815, 0.16203808784484863, 1.1704344749450684, 2.178830623626709, 3.1872267723083496, 4.195623397827148, 5.204019546508789, 6.21241569519043, 7.22081184387207, 8.229207992553711, 9.237604141235352, 10.246000289916992, 11.254396438598633, 12.26279354095459, 13.27118968963623, 14.279585838317871, 15.287981986999512, 16.29637908935547, 17.30477523803711, 18.31317138671875, 19.32156753540039, 20.32996368408203, 21.338359832763672, 22.346755981445312]}, "gradients/encoder.encoder.layers.14.layer_norm.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 3.0, 5.0, 6.0, 6.0, 8.0, 11.0, 8.0, 17.0, 18.0, 13.0, 26.0, 24.0, 35.0, 25.0, 24.0, 32.0, 39.0, 45.0, 36.0, 36.0, 37.0, 43.0, 43.0, 47.0, 41.0, 37.0, 32.0, 40.0, 35.0, 34.0, 31.0, 33.0, 22.0, 13.0, 15.0, 23.0, 12.0, 13.0, 9.0, 9.0, 9.0, 6.0, 7.0, 1.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 2.0, 0.0, 1.0], "bins": [-6.6352081298828125, -6.434624195098877, -6.234039783477783, -6.033455848693848, -5.832871913909912, -5.632287979125977, -5.431703567504883, -5.231119632720947, -5.030535697937012, -4.829951763153076, -4.629367351531982, -4.428783416748047, -4.228199481964111, -4.027615547180176, -3.827031135559082, -3.6264472007751465, -3.4258627891540527, -3.225278615951538, -3.0246946811676025, -2.824110507965088, -2.6235265731811523, -2.4229423999786377, -2.222358226776123, -2.0217742919921875, -1.8211901187896729, -1.6206060647964478, -1.4200220108032227, -1.219437837600708, -1.018853783607483, -0.8182697296142578, -0.6176855564117432, -0.41710150241851807, -0.21651744842529297, -0.015933364629745483, 0.184650719165802, 0.3852348327636719, 0.585818886756897, 0.7864029407501221, 0.9869871139526367, 1.1875711679458618, 1.388155221939087, 1.588739275932312, 1.789323329925537, 1.9899075031280518, 2.1904916763305664, 2.391075611114502, 2.5916597843170166, 2.7922439575195312, 2.992827892303467, 3.1934120655059814, 3.393996000289917, 3.5945801734924316, 3.795164108276367, 3.995748281478882, 4.1963324546813965, 4.396916389465332, 4.597500801086426, 4.798084735870361, 4.998669147491455, 5.199253082275391, 5.399837017059326, 5.600420951843262, 5.8010053634643555, 6.001589298248291, 6.202173233032227]}, "gradients/encoder.encoder.layers.13.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 4.0, 3.0, 4.0, 1.0, 5.0, 3.0, 10.0, 15.0, 32.0, 46.0, 98.0, 134.0, 237.0, 571.0, 1589.0, 6611.0, 54836.0, 4068334.0, 52615.0, 6222.0, 1643.0, 623.0, 279.0, 147.0, 82.0, 51.0, 39.0, 22.0, 11.0, 14.0, 6.0, 5.0, 2.0, 0.0, 1.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.501953125, -3.39739990234375, -3.2928466796875, -3.18829345703125, -3.083740234375, -2.97918701171875, -2.8746337890625, -2.77008056640625, -2.66552734375, -2.56097412109375, -2.4564208984375, -2.35186767578125, -2.247314453125, -2.14276123046875, -2.0382080078125, -1.93365478515625, -1.8291015625, -1.72454833984375, -1.6199951171875, -1.51544189453125, -1.410888671875, -1.30633544921875, -1.2017822265625, -1.09722900390625, -0.99267578125, -0.88812255859375, -0.7835693359375, -0.67901611328125, -0.574462890625, -0.46990966796875, -0.3653564453125, -0.26080322265625, -0.15625, -0.05169677734375, 0.0528564453125, 0.15740966796875, 0.261962890625, 0.36651611328125, 0.4710693359375, 0.57562255859375, 0.68017578125, 0.78472900390625, 0.8892822265625, 0.99383544921875, 1.098388671875, 1.20294189453125, 1.3074951171875, 1.41204833984375, 1.5166015625, 1.62115478515625, 1.7257080078125, 1.83026123046875, 1.934814453125, 2.03936767578125, 2.1439208984375, 2.24847412109375, 2.35302734375, 2.45758056640625, 2.5621337890625, 2.66668701171875, 2.771240234375, 2.87579345703125, 2.9803466796875, 3.08489990234375, 3.189453125]}, "gradients/encoder.encoder.layers.13.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 2.0, 1.0, 0.0, 1.0, 2.0, 6.0, 6.0, 7.0, 20.0, 28.0, 40.0, 64.0, 76.0, 80.0, 118.0, 106.0, 128.0, 104.0, 72.0, 53.0, 34.0, 21.0, 15.0, 10.0, 6.0, 7.0, 3.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0], "bins": [-0.82421875, -0.80096435546875, -0.7777099609375, -0.75445556640625, -0.731201171875, -0.70794677734375, -0.6846923828125, -0.66143798828125, -0.63818359375, -0.61492919921875, -0.5916748046875, -0.56842041015625, -0.545166015625, -0.52191162109375, -0.4986572265625, -0.47540283203125, -0.4521484375, -0.42889404296875, -0.4056396484375, -0.38238525390625, -0.359130859375, -0.33587646484375, -0.3126220703125, -0.28936767578125, -0.26611328125, -0.24285888671875, -0.2196044921875, -0.19635009765625, -0.173095703125, -0.14984130859375, -0.1265869140625, -0.10333251953125, -0.080078125, -0.05682373046875, -0.0335693359375, -0.01031494140625, 0.012939453125, 0.03619384765625, 0.0594482421875, 0.08270263671875, 0.10595703125, 0.12921142578125, 0.1524658203125, 0.17572021484375, 0.198974609375, 0.22222900390625, 0.2454833984375, 0.26873779296875, 0.2919921875, 0.31524658203125, 0.3385009765625, 0.36175537109375, 0.385009765625, 0.40826416015625, 0.4315185546875, 0.45477294921875, 0.47802734375, 0.50128173828125, 0.5245361328125, 0.54779052734375, 0.571044921875, 0.59429931640625, 0.6175537109375, 0.64080810546875, 0.6640625]}, "gradients/encoder.encoder.layers.13.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 4.0, 2.0, 10.0, 13.0, 47.0, 134.0, 259.0, 717.0, 1751.0, 12108.0, 4126033.0, 48600.0, 2957.0, 987.0, 379.0, 177.0, 72.0, 21.0, 11.0, 5.0, 5.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.26953125, -4.12066650390625, -3.9718017578125, -3.82293701171875, -3.674072265625, -3.52520751953125, -3.3763427734375, -3.22747802734375, -3.07861328125, -2.92974853515625, -2.7808837890625, -2.63201904296875, -2.483154296875, -2.33428955078125, -2.1854248046875, -2.03656005859375, -1.8876953125, -1.73883056640625, -1.5899658203125, -1.44110107421875, -1.292236328125, -1.14337158203125, -0.9945068359375, -0.84564208984375, -0.69677734375, -0.54791259765625, -0.3990478515625, -0.25018310546875, -0.101318359375, 0.04754638671875, 0.1964111328125, 0.34527587890625, 0.494140625, 0.64300537109375, 0.7918701171875, 0.94073486328125, 1.089599609375, 1.23846435546875, 1.3873291015625, 1.53619384765625, 1.68505859375, 1.83392333984375, 1.9827880859375, 2.13165283203125, 2.280517578125, 2.42938232421875, 2.5782470703125, 2.72711181640625, 2.8759765625, 3.02484130859375, 3.1737060546875, 3.32257080078125, 3.471435546875, 3.62030029296875, 3.7691650390625, 3.91802978515625, 4.06689453125, 4.21575927734375, 4.3646240234375, 4.51348876953125, 4.662353515625, 4.81121826171875, 4.9600830078125, 5.10894775390625, 5.2578125]}, "gradients/encoder.encoder.layers.13.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 4.0, 2.0, 2.0, 1.0, 7.0, 8.0, 12.0, 25.0, 51.0, 144.0, 2746.0, 904.0, 86.0, 38.0, 21.0, 11.0, 6.0, 6.0, 3.0, 2.0, 2.0, 0.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.240234375, -1.1987457275390625, -1.157257080078125, -1.1157684326171875, -1.07427978515625, -1.0327911376953125, -0.991302490234375, -0.9498138427734375, -0.9083251953125, -0.8668365478515625, -0.825347900390625, -0.7838592529296875, -0.74237060546875, -0.7008819580078125, -0.659393310546875, -0.6179046630859375, -0.576416015625, -0.5349273681640625, -0.493438720703125, -0.4519500732421875, -0.41046142578125, -0.3689727783203125, -0.327484130859375, -0.2859954833984375, -0.2445068359375, -0.2030181884765625, -0.161529541015625, -0.1200408935546875, -0.07855224609375, -0.0370635986328125, 0.004425048828125, 0.0459136962890625, 0.08740234375, 0.1288909912109375, 0.170379638671875, 0.2118682861328125, 0.25335693359375, 0.2948455810546875, 0.336334228515625, 0.3778228759765625, 0.4193115234375, 0.4608001708984375, 0.502288818359375, 0.5437774658203125, 0.58526611328125, 0.6267547607421875, 0.668243408203125, 0.7097320556640625, 0.751220703125, 0.7927093505859375, 0.834197998046875, 0.8756866455078125, 0.91717529296875, 0.9586639404296875, 1.000152587890625, 1.0416412353515625, 1.0831298828125, 1.1246185302734375, 1.166107177734375, 1.2075958251953125, 1.24908447265625, 1.2905731201171875, 1.332061767578125, 1.3735504150390625, 1.4150390625]}, "gradients/encoder.encoder.layers.13.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 3.0, 1.0, 0.0, 2.0, 1.0, 3.0, 4.0, 18.0, 54.0, 178.0, 397.0, 236.0, 72.0, 23.0, 12.0, 6.0, 3.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-8.344240188598633, -8.164571762084961, -7.9849042892456055, -7.805236339569092, -7.625568389892578, -7.4459004402160645, -7.266232490539551, -7.086564540863037, -6.906896591186523, -6.72722864151001, -6.547560691833496, -6.367892742156982, -6.188224792480469, -6.008556842803955, -5.828888893127441, -5.649220943450928, -5.469552993774414, -5.2898850440979, -5.110217094421387, -4.930549144744873, -4.750881195068359, -4.571213245391846, -4.391545295715332, -4.211877346038818, -4.0322089195251465, -3.852540969848633, -3.672873020172119, -3.4932050704956055, -3.313537120819092, -3.133869171142578, -2.9542012214660645, -2.774533271789551, -2.594865322113037, -2.4151973724365234, -2.2355294227600098, -2.055861473083496, -1.8761935234069824, -1.6965255737304688, -1.5168575048446655, -1.3371895551681519, -1.1575216054916382, -0.9778536558151245, -0.7981857061386108, -0.6185176968574524, -0.4388497471809387, -0.25918179750442505, -0.0795137882232666, 0.10015416145324707, 0.27982211112976074, 0.4594900608062744, 0.6391580104827881, 0.8188260197639465, 0.9984939694404602, 1.178161859512329, 1.3578299283981323, 1.537497878074646, 1.7171658277511597, 1.8968337774276733, 2.0765018463134766, 2.2561697959899902, 2.435837745666504, 2.6155056953430176, 2.7951736450195312, 2.974841594696045, 3.1545095443725586]}, "gradients/encoder.encoder.layers.13.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 5.0, 5.0, 3.0, 5.0, 5.0, 11.0, 19.0, 25.0, 37.0, 40.0, 44.0, 53.0, 55.0, 76.0, 85.0, 77.0, 64.0, 70.0, 67.0, 45.0, 60.0, 40.0, 30.0, 25.0, 24.0, 19.0, 3.0, 5.0, 7.0, 5.0, 3.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.5115015506744385, -2.4132091999053955, -2.3149166107177734, -2.2166242599487305, -2.1183319091796875, -2.0200395584106445, -1.9217469692230225, -1.8234546184539795, -1.725162148475647, -1.6268696784973145, -1.5285773277282715, -1.430284857749939, -1.3319923877716064, -1.2337000370025635, -1.135407567024231, -1.0371150970458984, -0.9388227462768555, -0.8405303359031677, -0.74223792552948, -0.6439454555511475, -0.5456530451774597, -0.447360634803772, -0.34906816482543945, -0.2507757544517517, -0.15248334407806396, -0.05419091880321503, 0.04410150647163391, 0.14239394664764404, 0.2406863570213318, 0.33897876739501953, 0.43727123737335205, 0.5355636477470398, 0.6338562965393066, 0.7321487069129944, 0.8304411172866821, 0.9287335872650146, 1.0270259380340576, 1.1253184080123901, 1.2236108779907227, 1.3219032287597656, 1.4201956987380981, 1.5184881687164307, 1.6167805194854736, 1.7150729894638062, 1.8133654594421387, 1.9116578102111816, 2.0099501609802246, 2.1082427501678467, 2.2065351009368896, 2.3048274517059326, 2.4031200408935547, 2.5014123916625977, 2.5997047424316406, 2.6979970932006836, 2.7962896823883057, 2.8945820331573486, 2.9928746223449707, 3.0911669731140137, 3.1894595623016357, 3.2877519130706787, 3.3860442638397217, 3.4843368530273438, 3.5826292037963867, 3.6809215545654297, 3.7792139053344727]}, "gradients/encoder.encoder.layers.13.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 4.0, 2.0, 2.0, 7.0, 8.0, 10.0, 13.0, 35.0, 48.0, 89.0, 229.0, 546.0, 1571.0, 6114.0, 67485.0, 834735.0, 125955.0, 8615.0, 1935.0, 640.0, 257.0, 133.0, 63.0, 24.0, 24.0, 6.0, 7.0, 1.0, 2.0, 1.0, 3.0, 1.0, 4.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.00390625, -2.9117431640625, -2.819580078125, -2.7274169921875, -2.63525390625, -2.5430908203125, -2.450927734375, -2.3587646484375, -2.2666015625, -2.1744384765625, -2.082275390625, -1.9901123046875, -1.89794921875, -1.8057861328125, -1.713623046875, -1.6214599609375, -1.529296875, -1.4371337890625, -1.344970703125, -1.2528076171875, -1.16064453125, -1.0684814453125, -0.976318359375, -0.8841552734375, -0.7919921875, -0.6998291015625, -0.607666015625, -0.5155029296875, -0.42333984375, -0.3311767578125, -0.239013671875, -0.1468505859375, -0.0546875, 0.0374755859375, 0.129638671875, 0.2218017578125, 0.31396484375, 0.4061279296875, 0.498291015625, 0.5904541015625, 0.6826171875, 0.7747802734375, 0.866943359375, 0.9591064453125, 1.05126953125, 1.1434326171875, 1.235595703125, 1.3277587890625, 1.419921875, 1.5120849609375, 1.604248046875, 1.6964111328125, 1.78857421875, 1.8807373046875, 1.972900390625, 2.0650634765625, 2.1572265625, 2.2493896484375, 2.341552734375, 2.4337158203125, 2.52587890625, 2.6180419921875, 2.710205078125, 2.8023681640625, 2.89453125]}, "gradients/encoder.encoder.layers.13.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 5.0, 8.0, 15.0, 26.0, 37.0, 51.0, 78.0, 79.0, 107.0, 121.0, 120.0, 111.0, 63.0, 60.0, 43.0, 32.0, 14.0, 14.0, 9.0, 2.0, 3.0, 4.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.85205078125, -0.8275222778320312, -0.8029937744140625, -0.7784652709960938, -0.753936767578125, -0.7294082641601562, -0.7048797607421875, -0.6803512573242188, -0.65582275390625, -0.6312942504882812, -0.6067657470703125, -0.5822372436523438, -0.557708740234375, -0.5331802368164062, -0.5086517333984375, -0.48412322998046875, -0.4595947265625, -0.43506622314453125, -0.4105377197265625, -0.38600921630859375, -0.361480712890625, -0.33695220947265625, -0.3124237060546875, -0.28789520263671875, -0.26336669921875, -0.23883819580078125, -0.2143096923828125, -0.18978118896484375, -0.165252685546875, -0.14072418212890625, -0.1161956787109375, -0.09166717529296875, -0.067138671875, -0.04261016845703125, -0.0180816650390625, 0.00644683837890625, 0.030975341796875, 0.05550384521484375, 0.0800323486328125, 0.10456085205078125, 0.12908935546875, 0.15361785888671875, 0.1781463623046875, 0.20267486572265625, 0.227203369140625, 0.25173187255859375, 0.2762603759765625, 0.30078887939453125, 0.3253173828125, 0.34984588623046875, 0.3743743896484375, 0.39890289306640625, 0.423431396484375, 0.44795989990234375, 0.4724884033203125, 0.49701690673828125, 0.52154541015625, 0.5460739135742188, 0.5706024169921875, 0.5951309204101562, 0.619659423828125, 0.6441879272460938, 0.6687164306640625, 0.6932449340820312, 0.7177734375]}, "gradients/encoder.encoder.layers.13.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 3.0, 1.0, 2.0, 6.0, 5.0, 5.0, 4.0, 4.0, 12.0, 18.0, 23.0, 29.0, 44.0, 47.0, 74.0, 129.0, 228.0, 473.0, 993.0, 2793.0, 12059.0, 110784.0, 802845.0, 102035.0, 11434.0, 2605.0, 906.0, 379.0, 209.0, 138.0, 83.0, 60.0, 31.0, 29.0, 21.0, 8.0, 14.0, 9.0, 7.0, 6.0, 4.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.7890625, -1.7188720703125, -1.648681640625, -1.5784912109375, -1.50830078125, -1.4381103515625, -1.367919921875, -1.2977294921875, -1.2275390625, -1.1573486328125, -1.087158203125, -1.0169677734375, -0.94677734375, -0.8765869140625, -0.806396484375, -0.7362060546875, -0.666015625, -0.5958251953125, -0.525634765625, -0.4554443359375, -0.38525390625, -0.3150634765625, -0.244873046875, -0.1746826171875, -0.1044921875, -0.0343017578125, 0.035888671875, 0.1060791015625, 0.17626953125, 0.2464599609375, 0.316650390625, 0.3868408203125, 0.45703125, 0.5272216796875, 0.597412109375, 0.6676025390625, 0.73779296875, 0.8079833984375, 0.878173828125, 0.9483642578125, 1.0185546875, 1.0887451171875, 1.158935546875, 1.2291259765625, 1.29931640625, 1.3695068359375, 1.439697265625, 1.5098876953125, 1.580078125, 1.6502685546875, 1.720458984375, 1.7906494140625, 1.86083984375, 1.9310302734375, 2.001220703125, 2.0714111328125, 2.1416015625, 2.2117919921875, 2.281982421875, 2.3521728515625, 2.42236328125, 2.4925537109375, 2.562744140625, 2.6329345703125, 2.703125]}, "gradients/encoder.encoder.layers.13.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0, 4.0, 2.0, 2.0, 1.0, 3.0, 4.0, 4.0, 5.0, 8.0, 12.0, 10.0, 20.0, 16.0, 33.0, 35.0, 32.0, 33.0, 52.0, 59.0, 50.0, 63.0, 75.0, 78.0, 69.0, 38.0, 63.0, 43.0, 42.0, 42.0, 21.0, 12.0, 14.0, 13.0, 13.0, 7.0, 5.0, 3.0, 6.0, 5.0, 3.0, 5.0, 2.0, 2.0, 2.0, 1.0, 2.0], "bins": [-2.9375, -2.86407470703125, -2.7906494140625, -2.71722412109375, -2.643798828125, -2.57037353515625, -2.4969482421875, -2.42352294921875, -2.35009765625, -2.27667236328125, -2.2032470703125, -2.12982177734375, -2.056396484375, -1.98297119140625, -1.9095458984375, -1.83612060546875, -1.7626953125, -1.68927001953125, -1.6158447265625, -1.54241943359375, -1.468994140625, -1.39556884765625, -1.3221435546875, -1.24871826171875, -1.17529296875, -1.10186767578125, -1.0284423828125, -0.95501708984375, -0.881591796875, -0.80816650390625, -0.7347412109375, -0.66131591796875, -0.587890625, -0.51446533203125, -0.4410400390625, -0.36761474609375, -0.294189453125, -0.22076416015625, -0.1473388671875, -0.07391357421875, -0.00048828125, 0.07293701171875, 0.1463623046875, 0.21978759765625, 0.293212890625, 0.36663818359375, 0.4400634765625, 0.51348876953125, 0.5869140625, 0.66033935546875, 0.7337646484375, 0.80718994140625, 0.880615234375, 0.95404052734375, 1.0274658203125, 1.10089111328125, 1.17431640625, 1.24774169921875, 1.3211669921875, 1.39459228515625, 1.468017578125, 1.54144287109375, 1.6148681640625, 1.68829345703125, 1.76171875]}, "gradients/encoder.encoder.layers.13.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 2.0, 1.0, 1.0, 2.0, 3.0, 3.0, 4.0, 2.0, 7.0, 13.0, 18.0, 21.0, 39.0, 57.0, 94.0, 209.0, 392.0, 718.0, 1878.0, 6803.0, 56840.0, 920025.0, 51514.0, 6550.0, 1809.0, 721.0, 343.0, 209.0, 131.0, 64.0, 36.0, 27.0, 11.0, 7.0, 5.0, 6.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.2060546875, -1.1637115478515625, -1.121368408203125, -1.0790252685546875, -1.03668212890625, -0.9943389892578125, -0.951995849609375, -0.9096527099609375, -0.8673095703125, -0.8249664306640625, -0.782623291015625, -0.7402801513671875, -0.69793701171875, -0.6555938720703125, -0.613250732421875, -0.5709075927734375, -0.528564453125, -0.4862213134765625, -0.443878173828125, -0.4015350341796875, -0.35919189453125, -0.3168487548828125, -0.274505615234375, -0.2321624755859375, -0.1898193359375, -0.1474761962890625, -0.105133056640625, -0.0627899169921875, -0.02044677734375, 0.0218963623046875, 0.064239501953125, 0.1065826416015625, 0.14892578125, 0.1912689208984375, 0.233612060546875, 0.2759552001953125, 0.31829833984375, 0.3606414794921875, 0.402984619140625, 0.4453277587890625, 0.4876708984375, 0.5300140380859375, 0.572357177734375, 0.6147003173828125, 0.65704345703125, 0.6993865966796875, 0.741729736328125, 0.7840728759765625, 0.826416015625, 0.8687591552734375, 0.911102294921875, 0.9534454345703125, 0.99578857421875, 1.0381317138671875, 1.080474853515625, 1.1228179931640625, 1.1651611328125, 1.2075042724609375, 1.249847412109375, 1.2921905517578125, 1.33453369140625, 1.3768768310546875, 1.419219970703125, 1.4615631103515625, 1.50390625]}, "gradients/encoder.encoder.layers.13.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 3.0, 2.0, 2.0, 6.0, 4.0, 4.0, 5.0, 8.0, 7.0, 13.0, 12.0, 16.0, 22.0, 39.0, 49.0, 49.0, 68.0, 116.0, 166.0, 116.0, 73.0, 59.0, 37.0, 31.0, 28.0, 19.0, 14.0, 6.0, 15.0, 4.0, 8.0, 3.0, 2.0, 2.0, 1.0, 2.0, 4.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00015270709991455078, -0.00014816038310527802, -0.00014361366629600525, -0.00013906694948673248, -0.00013452023267745972, -0.00012997351586818695, -0.00012542679905891418, -0.00012088008224964142, -0.00011633336544036865, -0.00011178664863109589, -0.00010723993182182312, -0.00010269321501255035, -9.814649820327759e-05, -9.359978139400482e-05, -8.905306458473206e-05, -8.450634777545929e-05, -7.995963096618652e-05, -7.541291415691376e-05, -7.086619734764099e-05, -6.631948053836823e-05, -6.177276372909546e-05, -5.722604691982269e-05, -5.267933011054993e-05, -4.813261330127716e-05, -4.3585896492004395e-05, -3.903917968273163e-05, -3.449246287345886e-05, -2.9945746064186096e-05, -2.539902925491333e-05, -2.0852312445640564e-05, -1.6305595636367798e-05, -1.1758878827095032e-05, -7.212162017822266e-06, -2.6654452085494995e-06, 1.8812716007232666e-06, 6.427988409996033e-06, 1.0974705219268799e-05, 1.5521422028541565e-05, 2.006813883781433e-05, 2.4614855647087097e-05, 2.9161572456359863e-05, 3.370828926563263e-05, 3.8255006074905396e-05, 4.280172288417816e-05, 4.734843969345093e-05, 5.1895156502723694e-05, 5.644187331199646e-05, 6.0988590121269226e-05, 6.553530693054199e-05, 7.008202373981476e-05, 7.462874054908752e-05, 7.917545735836029e-05, 8.372217416763306e-05, 8.826889097690582e-05, 9.281560778617859e-05, 9.736232459545135e-05, 0.00010190904140472412, 0.00010645575821399689, 0.00011100247502326965, 0.00011554919183254242, 0.00012009590864181519, 0.00012464262545108795, 0.00012918934226036072, 0.00013373605906963348, 0.00013828277587890625]}, "gradients/encoder.encoder.layers.13.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 5.0, 7.0, 4.0, 5.0, 4.0, 5.0, 9.0, 8.0, 14.0, 22.0, 36.0, 43.0, 81.0, 160.0, 270.0, 577.0, 1162.0, 4131.0, 23635.0, 477978.0, 509012.0, 24647.0, 4230.0, 1319.0, 546.0, 258.0, 154.0, 94.0, 53.0, 30.0, 16.0, 16.0, 14.0, 4.0, 3.0, 5.0, 6.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0], "bins": [-1.125, -1.092864990234375, -1.06072998046875, -1.028594970703125, -0.9964599609375, -0.964324951171875, -0.93218994140625, -0.900054931640625, -0.867919921875, -0.835784912109375, -0.80364990234375, -0.771514892578125, -0.7393798828125, -0.707244873046875, -0.67510986328125, -0.642974853515625, -0.61083984375, -0.578704833984375, -0.54656982421875, -0.514434814453125, -0.4822998046875, -0.450164794921875, -0.41802978515625, -0.385894775390625, -0.353759765625, -0.321624755859375, -0.28948974609375, -0.257354736328125, -0.2252197265625, -0.193084716796875, -0.16094970703125, -0.128814697265625, -0.0966796875, -0.064544677734375, -0.03240966796875, -0.000274658203125, 0.0318603515625, 0.063995361328125, 0.09613037109375, 0.128265380859375, 0.160400390625, 0.192535400390625, 0.22467041015625, 0.256805419921875, 0.2889404296875, 0.321075439453125, 0.35321044921875, 0.385345458984375, 0.41748046875, 0.449615478515625, 0.48175048828125, 0.513885498046875, 0.5460205078125, 0.578155517578125, 0.61029052734375, 0.642425537109375, 0.674560546875, 0.706695556640625, 0.73883056640625, 0.770965576171875, 0.8031005859375, 0.835235595703125, 0.86737060546875, 0.899505615234375, 0.931640625]}, "gradients/encoder.encoder.layers.13.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 2.0, 4.0, 0.0, 1.0, 3.0, 0.0, 3.0, 5.0, 1.0, 7.0, 9.0, 9.0, 22.0, 17.0, 17.0, 48.0, 53.0, 116.0, 181.0, 184.0, 110.0, 69.0, 39.0, 24.0, 15.0, 23.0, 7.0, 17.0, 4.0, 3.0, 3.0, 4.0, 5.0, 10.0, 1.0, 1.0, 1.0, 0.0, 1.0], "bins": [-1.44140625, -1.4086990356445312, -1.3759918212890625, -1.3432846069335938, -1.310577392578125, -1.2778701782226562, -1.2451629638671875, -1.2124557495117188, -1.17974853515625, -1.1470413208007812, -1.1143341064453125, -1.0816268920898438, -1.048919677734375, -1.0162124633789062, -0.9835052490234375, -0.9507980346679688, -0.9180908203125, -0.8853836059570312, -0.8526763916015625, -0.8199691772460938, -0.787261962890625, -0.7545547485351562, -0.7218475341796875, -0.6891403198242188, -0.65643310546875, -0.6237258911132812, -0.5910186767578125, -0.5583114624023438, -0.525604248046875, -0.49289703369140625, -0.4601898193359375, -0.42748260498046875, -0.394775390625, -0.36206817626953125, -0.3293609619140625, -0.29665374755859375, -0.263946533203125, -0.23123931884765625, -0.1985321044921875, -0.16582489013671875, -0.13311767578125, -0.10041046142578125, -0.0677032470703125, -0.03499603271484375, -0.002288818359375, 0.03041839599609375, 0.0631256103515625, 0.09583282470703125, 0.1285400390625, 0.16124725341796875, 0.1939544677734375, 0.22666168212890625, 0.259368896484375, 0.29207611083984375, 0.3247833251953125, 0.35749053955078125, 0.39019775390625, 0.42290496826171875, 0.4556121826171875, 0.48831939697265625, 0.521026611328125, 0.5537338256835938, 0.5864410400390625, 0.6191482543945312, 0.65185546875]}, "gradients/encoder.encoder.layers.13.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 3.0, 1.0, 0.0, 6.0, 4.0, 8.0, 14.0, 47.0, 85.0, 188.0, 247.0, 204.0, 110.0, 49.0, 28.0, 10.0, 7.0, 2.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-23.90314483642578, -23.355697631835938, -22.808250427246094, -22.260805130004883, -21.71335792541504, -21.165910720825195, -20.618465423583984, -20.07101821899414, -19.523571014404297, -18.976123809814453, -18.42867660522461, -17.8812313079834, -17.333784103393555, -16.78633689880371, -16.2388916015625, -15.691444396972656, -15.143997192382812, -14.596549987792969, -14.049103736877441, -13.501657485961914, -12.95421028137207, -12.406763076782227, -11.8593168258667, -11.311870574951172, -10.764423370361328, -10.216976165771484, -9.669529914855957, -9.12208366394043, -8.574636459350586, -8.027189254760742, -7.479743003845215, -6.932296276092529, -6.384847640991211, -5.837400913238525, -5.28995418548584, -4.742507457733154, -4.195060729980469, -3.647614002227783, -3.1001672744750977, -2.552720546722412, -2.0052738189697266, -1.457827091217041, -0.9103803634643555, -0.3629336357116699, 0.18451309204101562, 0.7319598197937012, 1.2794065475463867, 1.8268532752990723, 2.374300003051758, 2.9217467308044434, 3.469193458557129, 4.0166401863098145, 4.5640869140625, 5.1115336418151855, 5.658980369567871, 6.206427097320557, 6.753873825073242, 7.301320552825928, 7.848767280578613, 8.39621353149414, 8.943660736083984, 9.491107940673828, 10.038554191589355, 10.586000442504883, 11.133447647094727]}, "gradients/encoder.encoder.layers.13.layer_norm.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 5.0, 1.0, 1.0, 4.0, 1.0, 8.0, 3.0, 8.0, 7.0, 5.0, 15.0, 8.0, 16.0, 30.0, 22.0, 27.0, 27.0, 34.0, 22.0, 49.0, 34.0, 42.0, 49.0, 47.0, 54.0, 50.0, 31.0, 35.0, 48.0, 40.0, 47.0, 42.0, 35.0, 20.0, 23.0, 24.0, 15.0, 9.0, 15.0, 13.0, 8.0, 10.0, 13.0, 8.0, 4.0, 2.0, 2.0, 2.0, 0.0, 1.0, 0.0, 2.0, 1.0], "bins": [-13.676092147827148, -13.295989990234375, -12.915887832641602, -12.535785675048828, -12.155682563781738, -11.775580406188965, -11.395478248596191, -11.015376091003418, -10.635273933410645, -10.255171775817871, -9.875069618225098, -9.494966506958008, -9.114864349365234, -8.734762191772461, -8.354660034179688, -7.974557876586914, -7.594455242156982, -7.214353084564209, -6.834250450134277, -6.454148292541504, -6.0740461349487305, -5.693943977355957, -5.313841342926025, -4.933739185333252, -4.55363655090332, -4.173534393310547, -3.7934319972991943, -3.413329601287842, -3.0332274436950684, -2.653125047683716, -2.2730226516723633, -1.8929204940795898, -1.5128183364868164, -1.1327160596847534, -0.7526137232780457, -0.3725113868713379, 0.007590889930725098, 0.3876931667327881, 0.7677955627441406, 1.147897720336914, 1.5280001163482666, 1.9081023931503296, 2.2882046699523926, 2.668307065963745, 3.0484094619750977, 3.428511619567871, 3.8086140155792236, 4.188715934753418, 4.56881856918335, 4.948920726776123, 5.329023361206055, 5.709125518798828, 6.089227676391602, 6.469329833984375, 6.849432468414307, 7.22953462600708, 7.609637260437012, 7.989739418029785, 8.369841575622559, 8.749944686889648, 9.130046844482422, 9.510149002075195, 9.890251159667969, 10.270353317260742, 10.650455474853516]}, "gradients/encoder.encoder.layers.12.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 2.0, 2.0, 2.0, 4.0, 1.0, 5.0, 10.0, 14.0, 13.0, 20.0, 30.0, 51.0, 76.0, 128.0, 181.0, 320.0, 621.0, 1274.0, 2716.0, 7260.0, 27701.0, 346602.0, 3727281.0, 60375.0, 12098.0, 3954.0, 1676.0, 798.0, 423.0, 206.0, 153.0, 91.0, 68.0, 37.0, 18.0, 26.0, 17.0, 12.0, 8.0, 5.0, 4.0, 3.0, 1.0, 0.0, 3.0, 3.0, 0.0, 2.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.193359375, -2.125274658203125, -2.05718994140625, -1.989105224609375, -1.9210205078125, -1.852935791015625, -1.78485107421875, -1.716766357421875, -1.648681640625, -1.580596923828125, -1.51251220703125, -1.444427490234375, -1.3763427734375, -1.308258056640625, -1.24017333984375, -1.172088623046875, -1.10400390625, -1.035919189453125, -0.96783447265625, -0.899749755859375, -0.8316650390625, -0.763580322265625, -0.69549560546875, -0.627410888671875, -0.559326171875, -0.491241455078125, -0.42315673828125, -0.355072021484375, -0.2869873046875, -0.218902587890625, -0.15081787109375, -0.082733154296875, -0.0146484375, 0.053436279296875, 0.12152099609375, 0.189605712890625, 0.2576904296875, 0.325775146484375, 0.39385986328125, 0.461944580078125, 0.530029296875, 0.598114013671875, 0.66619873046875, 0.734283447265625, 0.8023681640625, 0.870452880859375, 0.93853759765625, 1.006622314453125, 1.07470703125, 1.142791748046875, 1.21087646484375, 1.278961181640625, 1.3470458984375, 1.415130615234375, 1.48321533203125, 1.551300048828125, 1.619384765625, 1.687469482421875, 1.75555419921875, 1.823638916015625, 1.8917236328125, 1.959808349609375, 2.02789306640625, 2.095977783203125, 2.1640625]}, "gradients/encoder.encoder.layers.12.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 3.0, 4.0, 2.0, 0.0, 6.0, 10.0, 17.0, 17.0, 25.0, 36.0, 45.0, 59.0, 76.0, 60.0, 93.0, 91.0, 96.0, 84.0, 72.0, 75.0, 33.0, 32.0, 23.0, 11.0, 21.0, 7.0, 5.0, 2.0, 3.0, 2.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.9013671875, -0.8751983642578125, -0.849029541015625, -0.8228607177734375, -0.79669189453125, -0.7705230712890625, -0.744354248046875, -0.7181854248046875, -0.6920166015625, -0.6658477783203125, -0.639678955078125, -0.6135101318359375, -0.58734130859375, -0.5611724853515625, -0.535003662109375, -0.5088348388671875, -0.482666015625, -0.4564971923828125, -0.430328369140625, -0.4041595458984375, -0.37799072265625, -0.3518218994140625, -0.325653076171875, -0.2994842529296875, -0.2733154296875, -0.2471466064453125, -0.220977783203125, -0.1948089599609375, -0.16864013671875, -0.1424713134765625, -0.116302490234375, -0.0901336669921875, -0.06396484375, -0.0377960205078125, -0.011627197265625, 0.0145416259765625, 0.04071044921875, 0.0668792724609375, 0.093048095703125, 0.1192169189453125, 0.1453857421875, 0.1715545654296875, 0.197723388671875, 0.2238922119140625, 0.25006103515625, 0.2762298583984375, 0.302398681640625, 0.3285675048828125, 0.354736328125, 0.3809051513671875, 0.407073974609375, 0.4332427978515625, 0.45941162109375, 0.4855804443359375, 0.511749267578125, 0.5379180908203125, 0.5640869140625, 0.5902557373046875, 0.616424560546875, 0.6425933837890625, 0.66876220703125, 0.6949310302734375, 0.721099853515625, 0.7472686767578125, 0.7734375]}, "gradients/encoder.encoder.layers.12.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 2.0, 2.0, 2.0, 2.0, 4.0, 8.0, 15.0, 19.0, 66.0, 102.0, 217.0, 456.0, 1004.0, 2536.0, 7985.0, 46152.0, 3875644.0, 236231.0, 16817.0, 4229.0, 1513.0, 688.0, 298.0, 152.0, 74.0, 31.0, 22.0, 7.0, 4.0, 5.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.1171875, -2.034942626953125, -1.95269775390625, -1.870452880859375, -1.7882080078125, -1.705963134765625, -1.62371826171875, -1.541473388671875, -1.459228515625, -1.376983642578125, -1.29473876953125, -1.212493896484375, -1.1302490234375, -1.048004150390625, -0.96575927734375, -0.883514404296875, -0.80126953125, -0.719024658203125, -0.63677978515625, -0.554534912109375, -0.4722900390625, -0.390045166015625, -0.30780029296875, -0.225555419921875, -0.143310546875, -0.061065673828125, 0.02117919921875, 0.103424072265625, 0.1856689453125, 0.267913818359375, 0.35015869140625, 0.432403564453125, 0.5146484375, 0.596893310546875, 0.67913818359375, 0.761383056640625, 0.8436279296875, 0.925872802734375, 1.00811767578125, 1.090362548828125, 1.172607421875, 1.254852294921875, 1.33709716796875, 1.419342041015625, 1.5015869140625, 1.583831787109375, 1.66607666015625, 1.748321533203125, 1.83056640625, 1.912811279296875, 1.99505615234375, 2.077301025390625, 2.1595458984375, 2.241790771484375, 2.32403564453125, 2.406280517578125, 2.488525390625, 2.570770263671875, 2.65301513671875, 2.735260009765625, 2.8175048828125, 2.899749755859375, 2.98199462890625, 3.064239501953125, 3.146484375]}, "gradients/encoder.encoder.layers.12.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 1.0, 1.0, 2.0, 3.0, 4.0, 2.0, 5.0, 6.0, 5.0, 5.0, 9.0, 9.0, 18.0, 31.0, 60.0, 103.0, 353.0, 2480.0, 646.0, 162.0, 60.0, 30.0, 23.0, 18.0, 11.0, 6.0, 8.0, 3.0, 5.0, 5.0, 4.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 3.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.197265625, -1.15826416015625, -1.1192626953125, -1.08026123046875, -1.041259765625, -1.00225830078125, -0.9632568359375, -0.92425537109375, -0.88525390625, -0.84625244140625, -0.8072509765625, -0.76824951171875, -0.729248046875, -0.69024658203125, -0.6512451171875, -0.61224365234375, -0.5732421875, -0.53424072265625, -0.4952392578125, -0.45623779296875, -0.417236328125, -0.37823486328125, -0.3392333984375, -0.30023193359375, -0.26123046875, -0.22222900390625, -0.1832275390625, -0.14422607421875, -0.105224609375, -0.06622314453125, -0.0272216796875, 0.01177978515625, 0.05078125, 0.08978271484375, 0.1287841796875, 0.16778564453125, 0.206787109375, 0.24578857421875, 0.2847900390625, 0.32379150390625, 0.36279296875, 0.40179443359375, 0.4407958984375, 0.47979736328125, 0.518798828125, 0.55780029296875, 0.5968017578125, 0.63580322265625, 0.6748046875, 0.71380615234375, 0.7528076171875, 0.79180908203125, 0.830810546875, 0.86981201171875, 0.9088134765625, 0.94781494140625, 0.98681640625, 1.02581787109375, 1.0648193359375, 1.10382080078125, 1.142822265625, 1.18182373046875, 1.2208251953125, 1.25982666015625, 1.298828125]}, "gradients/encoder.encoder.layers.12.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 1.0, 4.0, 14.0, 30.0, 127.0, 367.0, 323.0, 103.0, 33.0, 7.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-8.952731132507324, -8.632033348083496, -8.311336517333984, -7.990638732910156, -7.669940948486328, -7.349243640899658, -7.028546333312988, -6.70784854888916, -6.38715124130249, -6.06645393371582, -5.745756149291992, -5.425058841705322, -5.104361534118652, -4.783663749694824, -4.462966442108154, -4.142269134521484, -3.8215713500976562, -3.5008738040924072, -3.180176258087158, -2.8594789505004883, -2.5387814044952393, -2.2180838584899902, -1.8973864316940308, -1.5766890048980713, -1.2559914588928223, -0.935293972492218, -0.6145964860916138, -0.2938989996910095, 0.026798486709594727, 0.34749603271484375, 0.6681934595108032, 0.9888908863067627, 1.3095874786376953, 1.6302850246429443, 1.9509824514389038, 2.2716798782348633, 2.5923774242401123, 2.9130749702453613, 3.2337722778320312, 3.5544698238372803, 3.8751673698425293, 4.195864677429199, 4.516562461853027, 4.837259769439697, 5.157957077026367, 5.478654861450195, 5.799352169036865, 6.120049476623535, 6.440747261047363, 6.761444568634033, 7.082142353057861, 7.402839660644531, 7.723537445068359, 8.044235229492188, 8.3649320602417, 8.685629844665527, 9.006326675415039, 9.327024459838867, 9.647721290588379, 9.968419075012207, 10.289116859436035, 10.609813690185547, 10.930511474609375, 11.251209259033203, 11.571907043457031]}, "gradients/encoder.encoder.layers.12.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 4.0, 3.0, 3.0, 10.0, 11.0, 8.0, 10.0, 12.0, 13.0, 23.0, 45.0, 28.0, 27.0, 37.0, 40.0, 48.0, 50.0, 52.0, 56.0, 55.0, 55.0, 64.0, 45.0, 54.0, 38.0, 38.0, 37.0, 26.0, 22.0, 22.0, 20.0, 15.0, 10.0, 7.0, 11.0, 4.0, 3.0, 6.0, 3.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-4.089986801147461, -3.980909585952759, -3.8718323707580566, -3.7627551555633545, -3.6536779403686523, -3.54460072517395, -3.435523509979248, -3.326446056365967, -3.2173690795898438, -3.1082918643951416, -2.9992146492004395, -2.8901374340057373, -2.781060218811035, -2.671983003616333, -2.562905788421631, -2.4538283348083496, -2.3447511196136475, -2.2356739044189453, -2.126596689224243, -2.017519474029541, -1.9084422588348389, -1.7993650436401367, -1.690287709236145, -1.5812104940414429, -1.4721332788467407, -1.3630560636520386, -1.2539788484573364, -1.1449015140533447, -1.0358242988586426, -0.9267471432685852, -0.8176698684692383, -0.7085926532745361, -0.599515438079834, -0.49043822288513184, -0.3813609778881073, -0.27228373289108276, -0.16320651769638062, -0.05412930250167847, 0.05494797229766846, 0.1640251874923706, 0.27310240268707275, 0.3821796178817749, 0.49125686287879944, 0.600334107875824, 0.7094113230705261, 0.8184885382652283, 0.9275658130645752, 1.0366430282592773, 1.1457202434539795, 1.2547974586486816, 1.3638746738433838, 1.472951889038086, 1.582029104232788, 1.6911063194274902, 1.800183653831482, 1.909260869026184, 2.018338203430176, 2.127415418624878, 2.23649263381958, 2.3455698490142822, 2.4546470642089844, 2.5637242794036865, 2.6728014945983887, 2.78187894821167, 2.890955924987793]}, "gradients/encoder.encoder.layers.12.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 2.0, 0.0, 5.0, 4.0, 3.0, 2.0, 15.0, 14.0, 15.0, 35.0, 49.0, 79.0, 129.0, 213.0, 399.0, 732.0, 1482.0, 3655.0, 10186.0, 35659.0, 167572.0, 528251.0, 231787.0, 47536.0, 12652.0, 4374.0, 1803.0, 866.0, 427.0, 233.0, 143.0, 90.0, 56.0, 29.0, 24.0, 19.0, 4.0, 5.0, 8.0, 5.0, 3.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-1.7744140625, -1.7216033935546875, -1.668792724609375, -1.6159820556640625, -1.56317138671875, -1.5103607177734375, -1.457550048828125, -1.4047393798828125, -1.3519287109375, -1.2991180419921875, -1.246307373046875, -1.1934967041015625, -1.14068603515625, -1.0878753662109375, -1.035064697265625, -0.9822540283203125, -0.929443359375, -0.8766326904296875, -0.823822021484375, -0.7710113525390625, -0.71820068359375, -0.6653900146484375, -0.612579345703125, -0.5597686767578125, -0.5069580078125, -0.4541473388671875, -0.401336669921875, -0.3485260009765625, -0.29571533203125, -0.2429046630859375, -0.190093994140625, -0.1372833251953125, -0.08447265625, -0.0316619873046875, 0.021148681640625, 0.0739593505859375, 0.12677001953125, 0.1795806884765625, 0.232391357421875, 0.2852020263671875, 0.3380126953125, 0.3908233642578125, 0.443634033203125, 0.4964447021484375, 0.54925537109375, 0.6020660400390625, 0.654876708984375, 0.7076873779296875, 0.760498046875, 0.8133087158203125, 0.866119384765625, 0.9189300537109375, 0.97174072265625, 1.0245513916015625, 1.077362060546875, 1.1301727294921875, 1.1829833984375, 1.2357940673828125, 1.288604736328125, 1.3414154052734375, 1.39422607421875, 1.4470367431640625, 1.499847412109375, 1.5526580810546875, 1.60546875]}, "gradients/encoder.encoder.layers.12.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 4.0, 2.0, 2.0, 5.0, 6.0, 9.0, 16.0, 20.0, 36.0, 42.0, 43.0, 74.0, 68.0, 86.0, 91.0, 82.0, 98.0, 81.0, 64.0, 54.0, 42.0, 28.0, 16.0, 13.0, 11.0, 8.0, 6.0, 3.0, 0.0, 2.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.923828125, -0.897430419921875, -0.87103271484375, -0.844635009765625, -0.8182373046875, -0.791839599609375, -0.76544189453125, -0.739044189453125, -0.712646484375, -0.686248779296875, -0.65985107421875, -0.633453369140625, -0.6070556640625, -0.580657958984375, -0.55426025390625, -0.527862548828125, -0.50146484375, -0.475067138671875, -0.44866943359375, -0.422271728515625, -0.3958740234375, -0.369476318359375, -0.34307861328125, -0.316680908203125, -0.290283203125, -0.263885498046875, -0.23748779296875, -0.211090087890625, -0.1846923828125, -0.158294677734375, -0.13189697265625, -0.105499267578125, -0.0791015625, -0.052703857421875, -0.02630615234375, 9.1552734375e-05, 0.0264892578125, 0.052886962890625, 0.07928466796875, 0.105682373046875, 0.132080078125, 0.158477783203125, 0.18487548828125, 0.211273193359375, 0.2376708984375, 0.264068603515625, 0.29046630859375, 0.316864013671875, 0.34326171875, 0.369659423828125, 0.39605712890625, 0.422454833984375, 0.4488525390625, 0.475250244140625, 0.50164794921875, 0.528045654296875, 0.554443359375, 0.580841064453125, 0.60723876953125, 0.633636474609375, 0.6600341796875, 0.686431884765625, 0.71282958984375, 0.739227294921875, 0.765625]}, "gradients/encoder.encoder.layers.12.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 3.0, 5.0, 1.0, 3.0, 8.0, 8.0, 10.0, 14.0, 15.0, 25.0, 34.0, 49.0, 55.0, 81.0, 123.0, 224.0, 331.0, 641.0, 1681.0, 5089.0, 22673.0, 165278.0, 694291.0, 131247.0, 19077.0, 4585.0, 1403.0, 645.0, 328.0, 189.0, 144.0, 84.0, 58.0, 40.0, 37.0, 18.0, 12.0, 16.0, 13.0, 5.0, 9.0, 7.0, 0.0, 3.0, 3.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-2.376953125, -2.29888916015625, -2.2208251953125, -2.14276123046875, -2.064697265625, -1.98663330078125, -1.9085693359375, -1.83050537109375, -1.75244140625, -1.67437744140625, -1.5963134765625, -1.51824951171875, -1.440185546875, -1.36212158203125, -1.2840576171875, -1.20599365234375, -1.1279296875, -1.04986572265625, -0.9718017578125, -0.89373779296875, -0.815673828125, -0.73760986328125, -0.6595458984375, -0.58148193359375, -0.50341796875, -0.42535400390625, -0.3472900390625, -0.26922607421875, -0.191162109375, -0.11309814453125, -0.0350341796875, 0.04302978515625, 0.12109375, 0.19915771484375, 0.2772216796875, 0.35528564453125, 0.433349609375, 0.51141357421875, 0.5894775390625, 0.66754150390625, 0.74560546875, 0.82366943359375, 0.9017333984375, 0.97979736328125, 1.057861328125, 1.13592529296875, 1.2139892578125, 1.29205322265625, 1.3701171875, 1.44818115234375, 1.5262451171875, 1.60430908203125, 1.682373046875, 1.76043701171875, 1.8385009765625, 1.91656494140625, 1.99462890625, 2.07269287109375, 2.1507568359375, 2.22882080078125, 2.306884765625, 2.38494873046875, 2.4630126953125, 2.54107666015625, 2.619140625]}, "gradients/encoder.encoder.layers.12.attention.v_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 4.0, 0.0, 3.0, 0.0, 3.0, 3.0, 5.0, 3.0, 4.0, 7.0, 6.0, 12.0, 8.0, 10.0, 22.0, 18.0, 17.0, 21.0, 20.0, 36.0, 45.0, 43.0, 39.0, 39.0, 48.0, 43.0, 38.0, 42.0, 56.0, 45.0, 40.0, 54.0, 35.0, 31.0, 35.0, 32.0, 24.0, 27.0, 18.0, 9.0, 11.0, 12.0, 11.0, 8.0, 6.0, 4.0, 5.0, 4.0, 5.0, 3.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-2.583984375, -2.495147705078125, -2.40631103515625, -2.317474365234375, -2.2286376953125, -2.139801025390625, -2.05096435546875, -1.962127685546875, -1.873291015625, -1.784454345703125, -1.69561767578125, -1.606781005859375, -1.5179443359375, -1.429107666015625, -1.34027099609375, -1.251434326171875, -1.16259765625, -1.073760986328125, -0.98492431640625, -0.896087646484375, -0.8072509765625, -0.718414306640625, -0.62957763671875, -0.540740966796875, -0.451904296875, -0.363067626953125, -0.27423095703125, -0.185394287109375, -0.0965576171875, -0.007720947265625, 0.08111572265625, 0.169952392578125, 0.2587890625, 0.347625732421875, 0.43646240234375, 0.525299072265625, 0.6141357421875, 0.702972412109375, 0.79180908203125, 0.880645751953125, 0.969482421875, 1.058319091796875, 1.14715576171875, 1.235992431640625, 1.3248291015625, 1.413665771484375, 1.50250244140625, 1.591339111328125, 1.68017578125, 1.769012451171875, 1.85784912109375, 1.946685791015625, 2.0355224609375, 2.124359130859375, 2.21319580078125, 2.302032470703125, 2.390869140625, 2.479705810546875, 2.56854248046875, 2.657379150390625, 2.7462158203125, 2.835052490234375, 2.92388916015625, 3.012725830078125, 3.1015625]}, "gradients/encoder.encoder.layers.12.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 4.0, 2.0, 5.0, 9.0, 9.0, 26.0, 23.0, 37.0, 51.0, 118.0, 254.0, 542.0, 1566.0, 7437.0, 111285.0, 873031.0, 47441.0, 4741.0, 1101.0, 422.0, 220.0, 109.0, 55.0, 34.0, 17.0, 9.0, 8.0, 3.0, 5.0, 1.0, 4.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.720703125, -1.6691436767578125, -1.617584228515625, -1.5660247802734375, -1.51446533203125, -1.4629058837890625, -1.411346435546875, -1.3597869873046875, -1.3082275390625, -1.2566680908203125, -1.205108642578125, -1.1535491943359375, -1.10198974609375, -1.0504302978515625, -0.998870849609375, -0.9473114013671875, -0.895751953125, -0.8441925048828125, -0.792633056640625, -0.7410736083984375, -0.68951416015625, -0.6379547119140625, -0.586395263671875, -0.5348358154296875, -0.4832763671875, -0.4317169189453125, -0.380157470703125, -0.3285980224609375, -0.27703857421875, -0.2254791259765625, -0.173919677734375, -0.1223602294921875, -0.07080078125, -0.0192413330078125, 0.032318115234375, 0.0838775634765625, 0.13543701171875, 0.1869964599609375, 0.238555908203125, 0.2901153564453125, 0.3416748046875, 0.3932342529296875, 0.444793701171875, 0.4963531494140625, 0.54791259765625, 0.5994720458984375, 0.651031494140625, 0.7025909423828125, 0.754150390625, 0.8057098388671875, 0.857269287109375, 0.9088287353515625, 0.96038818359375, 1.0119476318359375, 1.063507080078125, 1.1150665283203125, 1.1666259765625, 1.2181854248046875, 1.269744873046875, 1.3213043212890625, 1.37286376953125, 1.4244232177734375, 1.475982666015625, 1.5275421142578125, 1.5791015625]}, "gradients/encoder.encoder.layers.12.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 2.0, 2.0, 2.0, 1.0, 1.0, 3.0, 4.0, 7.0, 6.0, 9.0, 24.0, 23.0, 33.0, 62.0, 66.0, 99.0, 99.0, 114.0, 100.0, 99.0, 70.0, 50.0, 30.0, 34.0, 20.0, 19.0, 10.0, 6.0, 4.0, 7.0, 2.0, 1.0, 2.0, 2.0, 1.0, 0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0001323223114013672, -0.00012771226465702057, -0.00012310221791267395, -0.00011849217116832733, -0.00011388212442398071, -0.0001092720776796341, -0.00010466203093528748, -0.00010005198419094086, -9.544193744659424e-05, -9.083189070224762e-05, -8.6221843957901e-05, -8.161179721355438e-05, -7.700175046920776e-05, -7.239170372486115e-05, -6.778165698051453e-05, -6.317161023616791e-05, -5.856156349182129e-05, -5.395151674747467e-05, -4.934147000312805e-05, -4.473142325878143e-05, -4.0121376514434814e-05, -3.5511329770088196e-05, -3.090128302574158e-05, -2.629123628139496e-05, -2.168118953704834e-05, -1.707114279270172e-05, -1.2461096048355103e-05, -7.851049304008484e-06, -3.2410025596618652e-06, 1.3690441846847534e-06, 5.979090929031372e-06, 1.058913767337799e-05, 1.519918441772461e-05, 1.9809231162071228e-05, 2.4419277906417847e-05, 2.9029324650764465e-05, 3.3639371395111084e-05, 3.82494181394577e-05, 4.285946488380432e-05, 4.746951162815094e-05, 5.207955837249756e-05, 5.668960511684418e-05, 6.12996518611908e-05, 6.590969860553741e-05, 7.051974534988403e-05, 7.512979209423065e-05, 7.973983883857727e-05, 8.434988558292389e-05, 8.895993232727051e-05, 9.356997907161713e-05, 9.818002581596375e-05, 0.00010279007256031036, 0.00010740011930465698, 0.0001120101660490036, 0.00011662021279335022, 0.00012123025953769684, 0.00012584030628204346, 0.00013045035302639008, 0.0001350603997707367, 0.0001396704465150833, 0.00014428049325942993, 0.00014889054000377655, 0.00015350058674812317, 0.0001581106334924698, 0.0001627206802368164]}, "gradients/encoder.encoder.layers.12.attention.q_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 3.0, 1.0, 1.0, 2.0, 1.0, 6.0, 5.0, 3.0, 10.0, 15.0, 26.0, 43.0, 77.0, 134.0, 260.0, 549.0, 1323.0, 4823.0, 32107.0, 525813.0, 448288.0, 28377.0, 4471.0, 1151.0, 529.0, 238.0, 130.0, 75.0, 35.0, 24.0, 12.0, 13.0, 8.0, 4.0, 4.0, 2.0, 2.0, 2.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-1.1611328125, -1.1223907470703125, -1.083648681640625, -1.0449066162109375, -1.00616455078125, -0.9674224853515625, -0.928680419921875, -0.8899383544921875, -0.8511962890625, -0.8124542236328125, -0.773712158203125, -0.7349700927734375, -0.69622802734375, -0.6574859619140625, -0.618743896484375, -0.5800018310546875, -0.541259765625, -0.5025177001953125, -0.463775634765625, -0.4250335693359375, -0.38629150390625, -0.3475494384765625, -0.308807373046875, -0.2700653076171875, -0.2313232421875, -0.1925811767578125, -0.153839111328125, -0.1150970458984375, -0.07635498046875, -0.0376129150390625, 0.001129150390625, 0.0398712158203125, 0.07861328125, 0.1173553466796875, 0.156097412109375, 0.1948394775390625, 0.23358154296875, 0.2723236083984375, 0.311065673828125, 0.3498077392578125, 0.3885498046875, 0.4272918701171875, 0.466033935546875, 0.5047760009765625, 0.54351806640625, 0.5822601318359375, 0.621002197265625, 0.6597442626953125, 0.698486328125, 0.7372283935546875, 0.775970458984375, 0.8147125244140625, 0.85345458984375, 0.8921966552734375, 0.930938720703125, 0.9696807861328125, 1.0084228515625, 1.0471649169921875, 1.085906982421875, 1.1246490478515625, 1.16339111328125, 1.2021331787109375, 1.240875244140625, 1.2796173095703125, 1.318359375]}, "gradients/encoder.encoder.layers.12.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 2.0, 5.0, 2.0, 3.0, 2.0, 7.0, 11.0, 10.0, 8.0, 17.0, 26.0, 31.0, 41.0, 59.0, 88.0, 119.0, 137.0, 134.0, 91.0, 55.0, 39.0, 30.0, 20.0, 14.0, 16.0, 11.0, 11.0, 9.0, 5.0, 3.0, 5.0, 2.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.125, -1.0867919921875, -1.048583984375, -1.0103759765625, -0.97216796875, -0.9339599609375, -0.895751953125, -0.8575439453125, -0.8193359375, -0.7811279296875, -0.742919921875, -0.7047119140625, -0.66650390625, -0.6282958984375, -0.590087890625, -0.5518798828125, -0.513671875, -0.4754638671875, -0.437255859375, -0.3990478515625, -0.36083984375, -0.3226318359375, -0.284423828125, -0.2462158203125, -0.2080078125, -0.1697998046875, -0.131591796875, -0.0933837890625, -0.05517578125, -0.0169677734375, 0.021240234375, 0.0594482421875, 0.09765625, 0.1358642578125, 0.174072265625, 0.2122802734375, 0.25048828125, 0.2886962890625, 0.326904296875, 0.3651123046875, 0.4033203125, 0.4415283203125, 0.479736328125, 0.5179443359375, 0.55615234375, 0.5943603515625, 0.632568359375, 0.6707763671875, 0.708984375, 0.7471923828125, 0.785400390625, 0.8236083984375, 0.86181640625, 0.9000244140625, 0.938232421875, 0.9764404296875, 1.0146484375, 1.0528564453125, 1.091064453125, 1.1292724609375, 1.16748046875, 1.2056884765625, 1.243896484375, 1.2821044921875, 1.3203125]}, "gradients/encoder.encoder.layers.12.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 4.0, 2.0, 0.0, 0.0, 1.0, 2.0, 4.0, 11.0, 30.0, 75.0, 225.0, 329.0, 197.0, 77.0, 28.0, 18.0, 6.0, 3.0, 3.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-17.165355682373047, -16.234865188598633, -15.304374694824219, -14.373884201049805, -13.44339370727539, -12.512903213500977, -11.582412719726562, -10.651922225952148, -9.721431732177734, -8.79094123840332, -7.860450744628906, -6.929960250854492, -5.999469757080078, -5.068978786468506, -4.138488292694092, -3.2079977989196777, -2.2775068283081055, -1.3470163345336914, -0.4165257215499878, 0.5139648914337158, 1.4444553852081299, 2.374946117401123, 3.305436611175537, 4.235927104949951, 5.166417598724365, 6.096908092498779, 7.027398586273193, 7.957889556884766, 8.88838005065918, 9.818870544433594, 10.749361038208008, 11.679851531982422, 12.610342025756836, 13.54083251953125, 14.471323013305664, 15.401813507080078, 16.332304000854492, 17.262794494628906, 18.19328498840332, 19.123775482177734, 20.05426597595215, 20.984756469726562, 21.915246963500977, 22.84573745727539, 23.776227951049805, 24.70671844482422, 25.637208938598633, 26.567699432373047, 27.498191833496094, 28.428682327270508, 29.359172821044922, 30.289663314819336, 31.22015380859375, 32.1506462097168, 33.08113479614258, 34.011627197265625, 34.942115783691406, 35.87260818481445, 36.803096771240234, 37.73358917236328, 38.66407775878906, 39.59457015991211, 40.52505874633789, 41.45555114746094, 42.38603973388672]}, "gradients/encoder.encoder.layers.12.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 2.0, 1.0, 5.0, 2.0, 2.0, 4.0, 3.0, 12.0, 7.0, 7.0, 12.0, 13.0, 18.0, 18.0, 25.0, 29.0, 34.0, 36.0, 47.0, 41.0, 51.0, 58.0, 66.0, 61.0, 64.0, 61.0, 52.0, 44.0, 46.0, 37.0, 30.0, 29.0, 21.0, 17.0, 11.0, 10.0, 8.0, 5.0, 8.0, 3.0, 4.0, 5.0, 2.0, 3.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0], "bins": [-22.446754455566406, -21.841707229614258, -21.23666000366211, -20.631614685058594, -20.026567459106445, -19.421520233154297, -18.81647491455078, -18.211427688598633, -17.606380462646484, -17.001333236694336, -16.396286010742188, -15.791240692138672, -15.186193466186523, -14.581146240234375, -13.976099967956543, -13.371053695678711, -12.766006469726562, -12.160959243774414, -11.555912971496582, -10.95086669921875, -10.345819473266602, -9.740772247314453, -9.135725975036621, -8.530679702758789, -7.925632476806641, -7.32058572769165, -6.71553897857666, -6.11049222946167, -5.50544548034668, -4.9003987312316895, -4.295351982116699, -3.690305233001709, -3.0852584838867188, -2.4802117347717285, -1.8751649856567383, -1.270118236541748, -0.6650714874267578, -0.06002473831176758, 0.5450220108032227, 1.150068759918213, 1.7551155090332031, 2.3601622581481934, 2.9652090072631836, 3.570255756378174, 4.175302505493164, 4.780349254608154, 5.3853960037231445, 5.990442752838135, 6.595489501953125, 7.200536251068115, 7.8055830001831055, 8.410629272460938, 9.015676498413086, 9.620723724365234, 10.225769996643066, 10.830816268920898, 11.435863494873047, 12.040910720825195, 12.645956993103027, 13.25100326538086, 13.856050491333008, 14.461097717285156, 15.066143989562988, 15.67119026184082, 16.27623748779297]}, "gradients/encoder.encoder.layers.11.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 3.0, 5.0, 3.0, 6.0, 2.0, 10.0, 15.0, 17.0, 31.0, 56.0, 89.0, 117.0, 189.0, 327.0, 546.0, 997.0, 1905.0, 4284.0, 11626.0, 48438.0, 3806806.0, 277488.0, 26864.0, 7877.0, 3093.0, 1566.0, 782.0, 441.0, 257.0, 174.0, 86.0, 59.0, 43.0, 21.0, 16.0, 13.0, 13.0, 7.0, 4.0, 8.0, 3.0, 2.0, 3.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-1.6259765625, -1.57489013671875, -1.5238037109375, -1.47271728515625, -1.421630859375, -1.37054443359375, -1.3194580078125, -1.26837158203125, -1.21728515625, -1.16619873046875, -1.1151123046875, -1.06402587890625, -1.012939453125, -0.96185302734375, -0.9107666015625, -0.85968017578125, -0.80859375, -0.75750732421875, -0.7064208984375, -0.65533447265625, -0.604248046875, -0.55316162109375, -0.5020751953125, -0.45098876953125, -0.39990234375, -0.34881591796875, -0.2977294921875, -0.24664306640625, -0.195556640625, -0.14447021484375, -0.0933837890625, -0.04229736328125, 0.0087890625, 0.05987548828125, 0.1109619140625, 0.16204833984375, 0.213134765625, 0.26422119140625, 0.3153076171875, 0.36639404296875, 0.41748046875, 0.46856689453125, 0.5196533203125, 0.57073974609375, 0.621826171875, 0.67291259765625, 0.7239990234375, 0.77508544921875, 0.826171875, 0.87725830078125, 0.9283447265625, 0.97943115234375, 1.030517578125, 1.08160400390625, 1.1326904296875, 1.18377685546875, 1.23486328125, 1.28594970703125, 1.3370361328125, 1.38812255859375, 1.439208984375, 1.49029541015625, 1.5413818359375, 1.59246826171875, 1.6435546875]}, "gradients/encoder.encoder.layers.11.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 4.0, 1.0, 2.0, 4.0, 4.0, 4.0, 4.0, 3.0, 8.0, 6.0, 12.0, 20.0, 27.0, 31.0, 33.0, 41.0, 71.0, 75.0, 69.0, 87.0, 81.0, 89.0, 63.0, 67.0, 50.0, 36.0, 35.0, 18.0, 12.0, 17.0, 9.0, 9.0, 7.0, 6.0, 3.0, 6.0, 3.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.033203125, -1.003662109375, -0.97412109375, -0.944580078125, -0.9150390625, -0.885498046875, -0.85595703125, -0.826416015625, -0.796875, -0.767333984375, -0.73779296875, -0.708251953125, -0.6787109375, -0.649169921875, -0.61962890625, -0.590087890625, -0.560546875, -0.531005859375, -0.50146484375, -0.471923828125, -0.4423828125, -0.412841796875, -0.38330078125, -0.353759765625, -0.32421875, -0.294677734375, -0.26513671875, -0.235595703125, -0.2060546875, -0.176513671875, -0.14697265625, -0.117431640625, -0.087890625, -0.058349609375, -0.02880859375, 0.000732421875, 0.0302734375, 0.059814453125, 0.08935546875, 0.118896484375, 0.1484375, 0.177978515625, 0.20751953125, 0.237060546875, 0.2666015625, 0.296142578125, 0.32568359375, 0.355224609375, 0.384765625, 0.414306640625, 0.44384765625, 0.473388671875, 0.5029296875, 0.532470703125, 0.56201171875, 0.591552734375, 0.62109375, 0.650634765625, 0.68017578125, 0.709716796875, 0.7392578125, 0.768798828125, 0.79833984375, 0.827880859375, 0.857421875]}, "gradients/encoder.encoder.layers.11.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 2.0, 1.0, 2.0, 6.0, 7.0, 6.0, 7.0, 9.0, 15.0, 17.0, 35.0, 50.0, 85.0, 110.0, 147.0, 183.0, 299.0, 496.0, 766.0, 1299.0, 2426.0, 5072.0, 13918.0, 82174.0, 3978668.0, 83305.0, 13977.0, 5292.0, 2389.0, 1260.0, 793.0, 497.0, 301.0, 217.0, 134.0, 83.0, 67.0, 57.0, 45.0, 30.0, 22.0, 12.0, 6.0, 6.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-1.7265625, -1.665985107421875, -1.60540771484375, -1.544830322265625, -1.4842529296875, -1.423675537109375, -1.36309814453125, -1.302520751953125, -1.241943359375, -1.181365966796875, -1.12078857421875, -1.060211181640625, -0.9996337890625, -0.939056396484375, -0.87847900390625, -0.817901611328125, -0.75732421875, -0.696746826171875, -0.63616943359375, -0.575592041015625, -0.5150146484375, -0.454437255859375, -0.39385986328125, -0.333282470703125, -0.272705078125, -0.212127685546875, -0.15155029296875, -0.090972900390625, -0.0303955078125, 0.030181884765625, 0.09075927734375, 0.151336669921875, 0.2119140625, 0.272491455078125, 0.33306884765625, 0.393646240234375, 0.4542236328125, 0.514801025390625, 0.57537841796875, 0.635955810546875, 0.696533203125, 0.757110595703125, 0.81768798828125, 0.878265380859375, 0.9388427734375, 0.999420166015625, 1.05999755859375, 1.120574951171875, 1.18115234375, 1.241729736328125, 1.30230712890625, 1.362884521484375, 1.4234619140625, 1.484039306640625, 1.54461669921875, 1.605194091796875, 1.665771484375, 1.726348876953125, 1.78692626953125, 1.847503662109375, 1.9080810546875, 1.968658447265625, 2.02923583984375, 2.089813232421875, 2.150390625]}, "gradients/encoder.encoder.layers.11.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 3.0, 2.0, 0.0, 2.0, 6.0, 9.0, 8.0, 23.0, 28.0, 94.0, 689.0, 3024.0, 105.0, 26.0, 19.0, 16.0, 6.0, 3.0, 4.0, 6.0, 1.0, 0.0, 3.0, 2.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.64697265625, -0.62615966796875, -0.6053466796875, -0.58453369140625, -0.563720703125, -0.54290771484375, -0.5220947265625, -0.50128173828125, -0.48046875, -0.45965576171875, -0.4388427734375, -0.41802978515625, -0.397216796875, -0.37640380859375, -0.3555908203125, -0.33477783203125, -0.31396484375, -0.29315185546875, -0.2723388671875, -0.25152587890625, -0.230712890625, -0.20989990234375, -0.1890869140625, -0.16827392578125, -0.1474609375, -0.12664794921875, -0.1058349609375, -0.08502197265625, -0.064208984375, -0.04339599609375, -0.0225830078125, -0.00177001953125, 0.01904296875, 0.03985595703125, 0.0606689453125, 0.08148193359375, 0.102294921875, 0.12310791015625, 0.1439208984375, 0.16473388671875, 0.185546875, 0.20635986328125, 0.2271728515625, 0.24798583984375, 0.268798828125, 0.28961181640625, 0.3104248046875, 0.33123779296875, 0.35205078125, 0.37286376953125, 0.3936767578125, 0.41448974609375, 0.435302734375, 0.45611572265625, 0.4769287109375, 0.49774169921875, 0.5185546875, 0.53936767578125, 0.5601806640625, 0.58099365234375, 0.601806640625, 0.62261962890625, 0.6434326171875, 0.66424560546875, 0.68505859375]}, "gradients/encoder.encoder.layers.11.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 7.0, 15.0, 50.0, 146.0, 265.0, 271.0, 149.0, 66.0, 27.0, 9.0, 3.0, 5.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-2.5270354747772217, -2.422767400741577, -2.3184990882873535, -2.214231014251709, -2.1099629402160645, -2.00569486618042, -1.9014266729354858, -1.7971584796905518, -1.6928904056549072, -1.5886223316192627, -1.4843541383743286, -1.3800859451293945, -1.27581787109375, -1.1715497970581055, -1.0672816038131714, -0.9630134701728821, -0.8587453365325928, -0.7544772028923035, -0.6502090692520142, -0.5459409356117249, -0.44167280197143555, -0.33740466833114624, -0.23313653469085693, -0.12886840105056763, -0.02460026741027832, 0.07966786623001099, 0.1839359998703003, 0.2882041335105896, 0.3924722671508789, 0.4967404007911682, 0.6010085344314575, 0.7052766680717468, 0.809544563293457, 0.9138126969337463, 1.0180808305740356, 1.1223490238189697, 1.2266170978546143, 1.3308851718902588, 1.4351533651351929, 1.539421558380127, 1.6436896324157715, 1.747957706451416, 1.85222589969635, 1.9564940929412842, 2.0607621669769287, 2.1650302410125732, 2.269298553466797, 2.3735666275024414, 2.477834701538086, 2.5821027755737305, 2.686370849609375, 2.7906391620635986, 2.894907236099243, 2.9991753101348877, 3.1034436225891113, 3.207711696624756, 3.3119797706604004, 3.416247844696045, 3.5205159187316895, 3.624784231185913, 3.7290523052215576, 3.833320379257202, 3.937588691711426, 4.04185676574707, 4.146124839782715]}, "gradients/encoder.encoder.layers.11.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 2.0, 0.0, 2.0, 2.0, 3.0, 4.0, 5.0, 4.0, 1.0, 16.0, 16.0, 10.0, 15.0, 19.0, 29.0, 22.0, 26.0, 39.0, 27.0, 49.0, 42.0, 38.0, 55.0, 56.0, 55.0, 57.0, 50.0, 40.0, 43.0, 52.0, 18.0, 30.0, 30.0, 29.0, 25.0, 21.0, 16.0, 15.0, 18.0, 7.0, 4.0, 4.0, 2.0, 3.0, 4.0, 2.0, 2.0, 4.0, 2.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0], "bins": [-1.1421548128128052, -1.1085011959075928, -1.0748475790023804, -1.041193962097168, -1.0075403451919556, -0.9738867282867432, -0.9402331709861755, -0.9065795540809631, -0.8729259371757507, -0.8392723202705383, -0.8056187033653259, -0.7719650864601135, -0.7383115291595459, -0.7046579122543335, -0.6710042953491211, -0.6373506784439087, -0.6036970615386963, -0.5700434446334839, -0.5363898277282715, -0.5027362108230591, -0.46908262372016907, -0.43542900681495667, -0.40177541971206665, -0.36812180280685425, -0.33446818590164185, -0.30081456899642944, -0.26716095209121704, -0.23350736498832703, -0.19985374808311462, -0.16620013117790222, -0.132546529173851, -0.0988929271697998, -0.06523919105529785, -0.031585581600666046, 0.0020680278539657593, 0.035721637308597565, 0.06937524676322937, 0.10302886366844177, 0.13668246567249298, 0.1703360676765442, 0.2039896845817566, 0.237643301486969, 0.2712969183921814, 0.3049505054950714, 0.3386041224002838, 0.3722577393054962, 0.40591132640838623, 0.43956494331359863, 0.47321856021881104, 0.5068721771240234, 0.5405257940292358, 0.5741794109344482, 0.6078330278396606, 0.641486644744873, 0.6751402020454407, 0.7087938189506531, 0.7424474358558655, 0.7761010527610779, 0.8097546696662903, 0.8434082865715027, 0.8770618438720703, 0.9107154607772827, 0.9443690776824951, 0.9780226945877075, 1.01167631149292]}, "gradients/encoder.encoder.layers.11.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 0.0, 1.0, 2.0, 1.0, 4.0, 3.0, 3.0, 8.0, 9.0, 14.0, 22.0, 30.0, 52.0, 94.0, 179.0, 359.0, 758.0, 1902.0, 4993.0, 14944.0, 61381.0, 304661.0, 492006.0, 126946.0, 27020.0, 8089.0, 2873.0, 1139.0, 541.0, 237.0, 119.0, 53.0, 42.0, 27.0, 17.0, 10.0, 13.0, 8.0, 5.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.08984375, -2.024993896484375, -1.96014404296875, -1.895294189453125, -1.8304443359375, -1.765594482421875, -1.70074462890625, -1.635894775390625, -1.571044921875, -1.506195068359375, -1.44134521484375, -1.376495361328125, -1.3116455078125, -1.246795654296875, -1.18194580078125, -1.117095947265625, -1.05224609375, -0.987396240234375, -0.92254638671875, -0.857696533203125, -0.7928466796875, -0.727996826171875, -0.66314697265625, -0.598297119140625, -0.533447265625, -0.468597412109375, -0.40374755859375, -0.338897705078125, -0.2740478515625, -0.209197998046875, -0.14434814453125, -0.079498291015625, -0.0146484375, 0.050201416015625, 0.11505126953125, 0.179901123046875, 0.2447509765625, 0.309600830078125, 0.37445068359375, 0.439300537109375, 0.504150390625, 0.569000244140625, 0.63385009765625, 0.698699951171875, 0.7635498046875, 0.828399658203125, 0.89324951171875, 0.958099365234375, 1.02294921875, 1.087799072265625, 1.15264892578125, 1.217498779296875, 1.2823486328125, 1.347198486328125, 1.41204833984375, 1.476898193359375, 1.541748046875, 1.606597900390625, 1.67144775390625, 1.736297607421875, 1.8011474609375, 1.865997314453125, 1.93084716796875, 1.995697021484375, 2.060546875]}, "gradients/encoder.encoder.layers.11.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 3.0, 1.0, 2.0, 8.0, 2.0, 1.0, 5.0, 11.0, 9.0, 15.0, 17.0, 20.0, 18.0, 51.0, 34.0, 63.0, 71.0, 77.0, 78.0, 82.0, 71.0, 88.0, 60.0, 55.0, 47.0, 31.0, 25.0, 8.0, 11.0, 8.0, 13.0, 11.0, 7.0, 7.0, 3.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.044921875, -1.015228271484375, -0.98553466796875, -0.955841064453125, -0.9261474609375, -0.896453857421875, -0.86676025390625, -0.837066650390625, -0.807373046875, -0.777679443359375, -0.74798583984375, -0.718292236328125, -0.6885986328125, -0.658905029296875, -0.62921142578125, -0.599517822265625, -0.56982421875, -0.540130615234375, -0.51043701171875, -0.480743408203125, -0.4510498046875, -0.421356201171875, -0.39166259765625, -0.361968994140625, -0.332275390625, -0.302581787109375, -0.27288818359375, -0.243194580078125, -0.2135009765625, -0.183807373046875, -0.15411376953125, -0.124420166015625, -0.0947265625, -0.065032958984375, -0.03533935546875, -0.005645751953125, 0.0240478515625, 0.053741455078125, 0.08343505859375, 0.113128662109375, 0.142822265625, 0.172515869140625, 0.20220947265625, 0.231903076171875, 0.2615966796875, 0.291290283203125, 0.32098388671875, 0.350677490234375, 0.38037109375, 0.410064697265625, 0.43975830078125, 0.469451904296875, 0.4991455078125, 0.528839111328125, 0.55853271484375, 0.588226318359375, 0.617919921875, 0.647613525390625, 0.67730712890625, 0.707000732421875, 0.7366943359375, 0.766387939453125, 0.79608154296875, 0.825775146484375, 0.85546875]}, "gradients/encoder.encoder.layers.11.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 1.0, 3.0, 6.0, 7.0, 4.0, 8.0, 11.0, 19.0, 20.0, 30.0, 47.0, 94.0, 128.0, 223.0, 348.0, 837.0, 2147.0, 7782.0, 52471.0, 691443.0, 264166.0, 21616.0, 4379.0, 1419.0, 601.0, 290.0, 186.0, 85.0, 54.0, 39.0, 25.0, 22.0, 12.0, 14.0, 9.0, 6.0, 6.0, 5.0, 1.0, 3.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.38671875, -3.26910400390625, -3.1514892578125, -3.03387451171875, -2.916259765625, -2.79864501953125, -2.6810302734375, -2.56341552734375, -2.44580078125, -2.32818603515625, -2.2105712890625, -2.09295654296875, -1.975341796875, -1.85772705078125, -1.7401123046875, -1.62249755859375, -1.5048828125, -1.38726806640625, -1.2696533203125, -1.15203857421875, -1.034423828125, -0.91680908203125, -0.7991943359375, -0.68157958984375, -0.56396484375, -0.44635009765625, -0.3287353515625, -0.21112060546875, -0.093505859375, 0.02410888671875, 0.1417236328125, 0.25933837890625, 0.376953125, 0.49456787109375, 0.6121826171875, 0.72979736328125, 0.847412109375, 0.96502685546875, 1.0826416015625, 1.20025634765625, 1.31787109375, 1.43548583984375, 1.5531005859375, 1.67071533203125, 1.788330078125, 1.90594482421875, 2.0235595703125, 2.14117431640625, 2.2587890625, 2.37640380859375, 2.4940185546875, 2.61163330078125, 2.729248046875, 2.84686279296875, 2.9644775390625, 3.08209228515625, 3.19970703125, 3.31732177734375, 3.4349365234375, 3.55255126953125, 3.670166015625, 3.78778076171875, 3.9053955078125, 4.02301025390625, 4.140625]}, "gradients/encoder.encoder.layers.11.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 2.0, 1.0, 1.0, 4.0, 4.0, 2.0, 4.0, 7.0, 8.0, 6.0, 5.0, 14.0, 16.0, 18.0, 26.0, 25.0, 39.0, 46.0, 34.0, 53.0, 54.0, 56.0, 51.0, 63.0, 61.0, 62.0, 52.0, 56.0, 47.0, 41.0, 16.0, 28.0, 18.0, 21.0, 16.0, 10.0, 7.0, 11.0, 6.0, 4.0, 6.0, 4.0, 4.0, 3.0, 0.0, 2.0, 1.0, 0.0, 0.0, 2.0], "bins": [-5.44921875, -5.3037109375, -5.158203125, -5.0126953125, -4.8671875, -4.7216796875, -4.576171875, -4.4306640625, -4.28515625, -4.1396484375, -3.994140625, -3.8486328125, -3.703125, -3.5576171875, -3.412109375, -3.2666015625, -3.12109375, -2.9755859375, -2.830078125, -2.6845703125, -2.5390625, -2.3935546875, -2.248046875, -2.1025390625, -1.95703125, -1.8115234375, -1.666015625, -1.5205078125, -1.375, -1.2294921875, -1.083984375, -0.9384765625, -0.79296875, -0.6474609375, -0.501953125, -0.3564453125, -0.2109375, -0.0654296875, 0.080078125, 0.2255859375, 0.37109375, 0.5166015625, 0.662109375, 0.8076171875, 0.953125, 1.0986328125, 1.244140625, 1.3896484375, 1.53515625, 1.6806640625, 1.826171875, 1.9716796875, 2.1171875, 2.2626953125, 2.408203125, 2.5537109375, 2.69921875, 2.8447265625, 2.990234375, 3.1357421875, 3.28125, 3.4267578125, 3.572265625, 3.7177734375, 3.86328125]}, "gradients/encoder.encoder.layers.11.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 1.0, 4.0, 5.0, 8.0, 27.0, 34.0, 69.0, 220.0, 999.0, 18740.0, 1016897.0, 10406.0, 841.0, 196.0, 58.0, 30.0, 14.0, 8.0, 4.0, 2.0, 2.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.4453125, -5.2916259765625, -5.137939453125, -4.9842529296875, -4.83056640625, -4.6768798828125, -4.523193359375, -4.3695068359375, -4.2158203125, -4.0621337890625, -3.908447265625, -3.7547607421875, -3.60107421875, -3.4473876953125, -3.293701171875, -3.1400146484375, -2.986328125, -2.8326416015625, -2.678955078125, -2.5252685546875, -2.37158203125, -2.2178955078125, -2.064208984375, -1.9105224609375, -1.7568359375, -1.6031494140625, -1.449462890625, -1.2957763671875, -1.14208984375, -0.9884033203125, -0.834716796875, -0.6810302734375, -0.52734375, -0.3736572265625, -0.219970703125, -0.0662841796875, 0.08740234375, 0.2410888671875, 0.394775390625, 0.5484619140625, 0.7021484375, 0.8558349609375, 1.009521484375, 1.1632080078125, 1.31689453125, 1.4705810546875, 1.624267578125, 1.7779541015625, 1.931640625, 2.0853271484375, 2.239013671875, 2.3927001953125, 2.54638671875, 2.7000732421875, 2.853759765625, 3.0074462890625, 3.1611328125, 3.3148193359375, 3.468505859375, 3.6221923828125, 3.77587890625, 3.9295654296875, 4.083251953125, 4.2369384765625, 4.390625]}, "gradients/encoder.encoder.layers.11.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 3.0, 3.0, 2.0, 7.0, 11.0, 16.0, 48.0, 72.0, 136.0, 174.0, 193.0, 138.0, 89.0, 51.0, 36.0, 20.0, 8.0, 4.0, 4.0, 1.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00032901763916015625, -0.00032103434205055237, -0.0003130510449409485, -0.0003050677478313446, -0.0002970844507217407, -0.00028910115361213684, -0.00028111785650253296, -0.0002731345593929291, -0.0002651512622833252, -0.0002571679651737213, -0.00024918466806411743, -0.00024120137095451355, -0.00023321807384490967, -0.00022523477673530579, -0.0002172514796257019, -0.00020926818251609802, -0.00020128488540649414, -0.00019330158829689026, -0.00018531829118728638, -0.0001773349940776825, -0.0001693516969680786, -0.00016136839985847473, -0.00015338510274887085, -0.00014540180563926697, -0.00013741850852966309, -0.0001294352114200592, -0.00012145191431045532, -0.00011346861720085144, -0.00010548532009124756, -9.750202298164368e-05, -8.95187258720398e-05, -8.153542876243591e-05, -7.355213165283203e-05, -6.556883454322815e-05, -5.758553743362427e-05, -4.9602240324020386e-05, -4.1618943214416504e-05, -3.363564610481262e-05, -2.565234899520874e-05, -1.766905188560486e-05, -9.685754776000977e-06, -1.7024576663970947e-06, 6.280839443206787e-06, 1.4264136552810669e-05, 2.224743366241455e-05, 3.0230730772018433e-05, 3.8214027881622314e-05, 4.6197324991226196e-05, 5.418062210083008e-05, 6.216391921043396e-05, 7.014721632003784e-05, 7.813051342964172e-05, 8.61138105392456e-05, 9.409710764884949e-05, 0.00010208040475845337, 0.00011006370186805725, 0.00011804699897766113, 0.00012603029608726501, 0.0001340135931968689, 0.00014199689030647278, 0.00014998018741607666, 0.00015796348452568054, 0.00016594678163528442, 0.0001739300787448883, 0.0001819133758544922]}, "gradients/encoder.encoder.layers.11.attention.q_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 2.0, 2.0, 3.0, 2.0, 2.0, 3.0, 8.0, 10.0, 15.0, 13.0, 19.0, 24.0, 29.0, 54.0, 61.0, 81.0, 145.0, 293.0, 607.0, 1470.0, 4677.0, 21265.0, 188244.0, 723907.0, 89279.0, 12911.0, 3159.0, 1094.0, 485.0, 245.0, 143.0, 96.0, 56.0, 42.0, 28.0, 18.0, 12.0, 19.0, 13.0, 11.0, 3.0, 3.0, 2.0, 2.0, 2.0, 1.0, 3.0, 1.0, 1.0, 2.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-1.123046875, -1.0883331298828125, -1.053619384765625, -1.0189056396484375, -0.98419189453125, -0.9494781494140625, -0.914764404296875, -0.8800506591796875, -0.8453369140625, -0.8106231689453125, -0.775909423828125, -0.7411956787109375, -0.70648193359375, -0.6717681884765625, -0.637054443359375, -0.6023406982421875, -0.567626953125, -0.5329132080078125, -0.498199462890625, -0.4634857177734375, -0.42877197265625, -0.3940582275390625, -0.359344482421875, -0.3246307373046875, -0.2899169921875, -0.2552032470703125, -0.220489501953125, -0.1857757568359375, -0.15106201171875, -0.1163482666015625, -0.081634521484375, -0.0469207763671875, -0.01220703125, 0.0225067138671875, 0.057220458984375, 0.0919342041015625, 0.12664794921875, 0.1613616943359375, 0.196075439453125, 0.2307891845703125, 0.2655029296875, 0.3002166748046875, 0.334930419921875, 0.3696441650390625, 0.40435791015625, 0.4390716552734375, 0.473785400390625, 0.5084991455078125, 0.543212890625, 0.5779266357421875, 0.612640380859375, 0.6473541259765625, 0.68206787109375, 0.7167816162109375, 0.751495361328125, 0.7862091064453125, 0.8209228515625, 0.8556365966796875, 0.890350341796875, 0.9250640869140625, 0.95977783203125, 0.9944915771484375, 1.029205322265625, 1.0639190673828125, 1.0986328125]}, "gradients/encoder.encoder.layers.11.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 2.0, 0.0, 4.0, 2.0, 2.0, 3.0, 8.0, 2.0, 2.0, 12.0, 11.0, 15.0, 17.0, 27.0, 22.0, 25.0, 40.0, 84.0, 94.0, 109.0, 127.0, 106.0, 76.0, 58.0, 37.0, 24.0, 20.0, 26.0, 12.0, 9.0, 8.0, 3.0, 3.0, 5.0, 4.0, 3.0, 3.0, 1.0, 2.0, 3.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-1.6123046875, -1.5688934326171875, -1.525482177734375, -1.4820709228515625, -1.43865966796875, -1.3952484130859375, -1.351837158203125, -1.3084259033203125, -1.2650146484375, -1.2216033935546875, -1.178192138671875, -1.1347808837890625, -1.09136962890625, -1.0479583740234375, -1.004547119140625, -0.9611358642578125, -0.917724609375, -0.8743133544921875, -0.830902099609375, -0.7874908447265625, -0.74407958984375, -0.7006683349609375, -0.657257080078125, -0.6138458251953125, -0.5704345703125, -0.5270233154296875, -0.483612060546875, -0.4402008056640625, -0.39678955078125, -0.3533782958984375, -0.309967041015625, -0.2665557861328125, -0.22314453125, -0.1797332763671875, -0.136322021484375, -0.0929107666015625, -0.04949951171875, -0.0060882568359375, 0.037322998046875, 0.0807342529296875, 0.1241455078125, 0.1675567626953125, 0.210968017578125, 0.2543792724609375, 0.29779052734375, 0.3412017822265625, 0.384613037109375, 0.4280242919921875, 0.471435546875, 0.5148468017578125, 0.558258056640625, 0.6016693115234375, 0.64508056640625, 0.6884918212890625, 0.731903076171875, 0.7753143310546875, 0.8187255859375, 0.8621368408203125, 0.905548095703125, 0.9489593505859375, 0.99237060546875, 1.0357818603515625, 1.079193115234375, 1.1226043701171875, 1.166015625]}, "gradients/encoder.encoder.layers.11.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 3.0, 7.0, 4.0, 9.0, 20.0, 41.0, 55.0, 100.0, 187.0, 192.0, 161.0, 92.0, 55.0, 34.0, 19.0, 10.0, 15.0, 3.0, 2.0, 3.0, 1.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-22.407970428466797, -21.664745330810547, -20.921520233154297, -20.178295135498047, -19.435070037841797, -18.69184684753418, -17.94862174987793, -17.20539665222168, -16.46217155456543, -15.71894645690918, -14.97572135925293, -14.232497215270996, -13.489272117614746, -12.746047019958496, -12.002822875976562, -11.259597778320312, -10.516372680664062, -9.773147583007812, -9.029922485351562, -8.286698341369629, -7.543473243713379, -6.800248146057129, -6.057023525238037, -5.313798904418945, -4.570573806762695, -3.8273489475250244, -3.0841240882873535, -2.3408992290496826, -1.5976743698120117, -0.8544495105743408, -0.11122465133666992, 0.6319999694824219, 1.3752250671386719, 2.1184499263763428, 2.8616747856140137, 3.6048996448516846, 4.3481245040893555, 5.0913496017456055, 5.834574222564697, 6.577798843383789, 7.321023941040039, 8.064249038696289, 8.807474136352539, 9.550698280334473, 10.293923377990723, 11.037148475646973, 11.780372619628906, 12.523597717285156, 13.266822814941406, 14.010047912597656, 14.753273010253906, 15.49649715423584, 16.239723205566406, 16.982946395874023, 17.726171493530273, 18.469396591186523, 19.212621688842773, 19.955846786499023, 20.699071884155273, 21.442296981811523, 22.18552017211914, 22.92874526977539, 23.67197036743164, 24.41519546508789, 25.15842056274414]}, "gradients/encoder.encoder.layers.11.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 4.0, 2.0, 1.0, 3.0, 2.0, 5.0, 5.0, 5.0, 4.0, 9.0, 8.0, 13.0, 12.0, 15.0, 16.0, 26.0, 26.0, 28.0, 32.0, 50.0, 50.0, 54.0, 47.0, 56.0, 60.0, 61.0, 57.0, 46.0, 50.0, 48.0, 31.0, 28.0, 27.0, 17.0, 20.0, 17.0, 17.0, 13.0, 16.0, 10.0, 3.0, 5.0, 2.0, 2.0, 3.0, 3.0, 2.0, 3.0, 2.0, 2.0, 0.0, 0.0, 0.0, 2.0], "bins": [-22.595840454101562, -21.9388484954834, -21.2818546295166, -20.624862670898438, -19.96786880493164, -19.310876846313477, -18.653884887695312, -17.996891021728516, -17.33989906311035, -16.682907104492188, -16.02591323852539, -15.368921279907227, -14.711928367614746, -14.054935455322266, -13.397942543029785, -12.740949630737305, -12.083956718444824, -11.426963806152344, -10.769970893859863, -10.112977981567383, -9.455986022949219, -8.798993110656738, -8.142000198364258, -7.4850077629089355, -6.828014850616455, -6.171021938323975, -5.514029502868652, -4.857036590576172, -4.200043678283691, -3.543051242828369, -2.8860583305358887, -2.2290658950805664, -1.572072982788086, -0.9150802493095398, -0.25808751583099365, 0.39890527725219727, 1.0558979511260986, 1.712890625, 2.3698835372924805, 3.0268759727478027, 3.683868885040283, 4.340861797332764, 4.997854232788086, 5.654847145080566, 6.311840057373047, 6.968832492828369, 7.62582540512085, 8.282817840576172, 8.939810752868652, 9.596803665161133, 10.253796577453613, 10.910789489746094, 11.567781448364258, 12.224774360656738, 12.881767272949219, 13.538759231567383, 14.19575309753418, 14.85274600982666, 15.50973892211914, 16.166730880737305, 16.8237247467041, 17.480716705322266, 18.137710571289062, 18.794702529907227, 19.45169448852539]}, "gradients/encoder.encoder.layers.10.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 3.0, 0.0, 1.0, 2.0, 2.0, 2.0, 2.0, 9.0, 3.0, 10.0, 9.0, 25.0, 25.0, 31.0, 44.0, 52.0, 81.0, 109.0, 173.0, 269.0, 373.0, 585.0, 1061.0, 1975.0, 3685.0, 9147.0, 26680.0, 124950.0, 3776067.0, 194514.0, 33843.0, 10872.0, 4495.0, 2166.0, 1150.0, 616.0, 383.0, 293.0, 166.0, 122.0, 86.0, 61.0, 47.0, 29.0, 24.0, 10.0, 9.0, 9.0, 8.0, 4.0, 6.0, 7.0, 3.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-1.455078125, -1.405914306640625, -1.35675048828125, -1.307586669921875, -1.2584228515625, -1.209259033203125, -1.16009521484375, -1.110931396484375, -1.061767578125, -1.012603759765625, -0.96343994140625, -0.914276123046875, -0.8651123046875, -0.815948486328125, -0.76678466796875, -0.717620849609375, -0.66845703125, -0.619293212890625, -0.57012939453125, -0.520965576171875, -0.4718017578125, -0.422637939453125, -0.37347412109375, -0.324310302734375, -0.275146484375, -0.225982666015625, -0.17681884765625, -0.127655029296875, -0.0784912109375, -0.029327392578125, 0.01983642578125, 0.069000244140625, 0.1181640625, 0.167327880859375, 0.21649169921875, 0.265655517578125, 0.3148193359375, 0.363983154296875, 0.41314697265625, 0.462310791015625, 0.511474609375, 0.560638427734375, 0.60980224609375, 0.658966064453125, 0.7081298828125, 0.757293701171875, 0.80645751953125, 0.855621337890625, 0.90478515625, 0.953948974609375, 1.00311279296875, 1.052276611328125, 1.1014404296875, 1.150604248046875, 1.19976806640625, 1.248931884765625, 1.298095703125, 1.347259521484375, 1.39642333984375, 1.445587158203125, 1.4947509765625, 1.543914794921875, 1.59307861328125, 1.642242431640625, 1.69140625]}, "gradients/encoder.encoder.layers.10.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 2.0, 3.0, 2.0, 0.0, 2.0, 4.0, 15.0, 6.0, 4.0, 8.0, 13.0, 17.0, 30.0, 33.0, 25.0, 46.0, 62.0, 79.0, 84.0, 78.0, 99.0, 82.0, 65.0, 62.0, 43.0, 31.0, 31.0, 22.0, 16.0, 14.0, 8.0, 5.0, 2.0, 8.0, 4.0, 2.0, 1.0, 0.0, 2.0, 2.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-1.2294921875, -1.195098876953125, -1.16070556640625, -1.126312255859375, -1.0919189453125, -1.057525634765625, -1.02313232421875, -0.988739013671875, -0.954345703125, -0.919952392578125, -0.88555908203125, -0.851165771484375, -0.8167724609375, -0.782379150390625, -0.74798583984375, -0.713592529296875, -0.67919921875, -0.644805908203125, -0.61041259765625, -0.576019287109375, -0.5416259765625, -0.507232666015625, -0.47283935546875, -0.438446044921875, -0.404052734375, -0.369659423828125, -0.33526611328125, -0.300872802734375, -0.2664794921875, -0.232086181640625, -0.19769287109375, -0.163299560546875, -0.12890625, -0.094512939453125, -0.06011962890625, -0.025726318359375, 0.0086669921875, 0.043060302734375, 0.07745361328125, 0.111846923828125, 0.146240234375, 0.180633544921875, 0.21502685546875, 0.249420166015625, 0.2838134765625, 0.318206787109375, 0.35260009765625, 0.386993408203125, 0.42138671875, 0.455780029296875, 0.49017333984375, 0.524566650390625, 0.5589599609375, 0.593353271484375, 0.62774658203125, 0.662139892578125, 0.696533203125, 0.730926513671875, 0.76531982421875, 0.799713134765625, 0.8341064453125, 0.868499755859375, 0.90289306640625, 0.937286376953125, 0.9716796875]}, "gradients/encoder.encoder.layers.10.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 6.0, 1.0, 4.0, 2.0, 5.0, 2.0, 4.0, 9.0, 20.0, 19.0, 21.0, 28.0, 34.0, 54.0, 76.0, 117.0, 185.0, 351.0, 747.0, 1535.0, 4040.0, 12544.0, 53110.0, 1053265.0, 2990178.0, 57388.0, 12978.0, 4244.0, 1687.0, 709.0, 346.0, 194.0, 111.0, 75.0, 53.0, 37.0, 27.0, 15.0, 18.0, 14.0, 8.0, 5.0, 9.0, 5.0, 4.0, 3.0, 2.0, 3.0, 1.0, 4.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.1640625, -2.09429931640625, -2.0245361328125, -1.95477294921875, -1.885009765625, -1.81524658203125, -1.7454833984375, -1.67572021484375, -1.60595703125, -1.53619384765625, -1.4664306640625, -1.39666748046875, -1.326904296875, -1.25714111328125, -1.1873779296875, -1.11761474609375, -1.0478515625, -0.97808837890625, -0.9083251953125, -0.83856201171875, -0.768798828125, -0.69903564453125, -0.6292724609375, -0.55950927734375, -0.48974609375, -0.41998291015625, -0.3502197265625, -0.28045654296875, -0.210693359375, -0.14093017578125, -0.0711669921875, -0.00140380859375, 0.068359375, 0.13812255859375, 0.2078857421875, 0.27764892578125, 0.347412109375, 0.41717529296875, 0.4869384765625, 0.55670166015625, 0.62646484375, 0.69622802734375, 0.7659912109375, 0.83575439453125, 0.905517578125, 0.97528076171875, 1.0450439453125, 1.11480712890625, 1.1845703125, 1.25433349609375, 1.3240966796875, 1.39385986328125, 1.463623046875, 1.53338623046875, 1.6031494140625, 1.67291259765625, 1.74267578125, 1.81243896484375, 1.8822021484375, 1.95196533203125, 2.021728515625, 2.09149169921875, 2.1612548828125, 2.23101806640625, 2.30078125]}, "gradients/encoder.encoder.layers.10.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 3.0, 4.0, 4.0, 11.0, 18.0, 28.0, 67.0, 181.0, 2728.0, 777.0, 136.0, 59.0, 31.0, 16.0, 3.0, 10.0, 6.0, 4.0, 2.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.2998046875, -1.2672500610351562, -1.2346954345703125, -1.2021408081054688, -1.169586181640625, -1.1370315551757812, -1.1044769287109375, -1.0719223022460938, -1.03936767578125, -1.0068130493164062, -0.9742584228515625, -0.9417037963867188, -0.909149169921875, -0.8765945434570312, -0.8440399169921875, -0.8114852905273438, -0.7789306640625, -0.7463760375976562, -0.7138214111328125, -0.6812667846679688, -0.648712158203125, -0.6161575317382812, -0.5836029052734375, -0.5510482788085938, -0.51849365234375, -0.48593902587890625, -0.4533843994140625, -0.42082977294921875, -0.388275146484375, -0.35572052001953125, -0.3231658935546875, -0.29061126708984375, -0.258056640625, -0.22550201416015625, -0.1929473876953125, -0.16039276123046875, -0.127838134765625, -0.09528350830078125, -0.0627288818359375, -0.03017425537109375, 0.00238037109375, 0.03493499755859375, 0.0674896240234375, 0.10004425048828125, 0.132598876953125, 0.16515350341796875, 0.1977081298828125, 0.23026275634765625, 0.2628173828125, 0.29537200927734375, 0.3279266357421875, 0.36048126220703125, 0.393035888671875, 0.42559051513671875, 0.4581451416015625, 0.49069976806640625, 0.52325439453125, 0.5558090209960938, 0.5883636474609375, 0.6209182739257812, 0.653472900390625, 0.6860275268554688, 0.7185821533203125, 0.7511367797851562, 0.78369140625]}, "gradients/encoder.encoder.layers.10.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 0.0, 2.0, 2.0, 3.0, 2.0, 3.0, 5.0, 5.0, 5.0, 17.0, 25.0, 27.0, 44.0, 59.0, 41.0, 58.0, 87.0, 76.0, 81.0, 78.0, 65.0, 67.0, 56.0, 45.0, 47.0, 27.0, 28.0, 16.0, 11.0, 7.0, 7.0, 8.0, 4.0, 3.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.822250485420227, -1.7732852697372437, -1.7243200540542603, -1.6753548383712769, -1.626389503479004, -1.5774242877960205, -1.528459072113037, -1.4794938564300537, -1.4305286407470703, -1.381563425064087, -1.3325982093811035, -1.2836329936981201, -1.2346677780151367, -1.1857024431228638, -1.1367372274398804, -1.087772011756897, -1.0388067960739136, -0.9898415803909302, -0.9408763647079468, -0.8919110894203186, -0.8429458737373352, -0.7939806580543518, -0.7450153827667236, -0.6960501670837402, -0.6470849514007568, -0.5981197357177734, -0.54915452003479, -0.5001892447471619, -0.45122402906417847, -0.40225881338119507, -0.3532935678958893, -0.3043283224105835, -0.25536322593688965, -0.20639799535274506, -0.15743276476860046, -0.10846753418445587, -0.05950230360031128, -0.010537073016166687, 0.038428157567977905, 0.08739340305328369, 0.1363586187362671, 0.18532384932041168, 0.23428907990455627, 0.28325432538986206, 0.33221954107284546, 0.38118475675582886, 0.43015000224113464, 0.47911524772644043, 0.5280804634094238, 0.5770456790924072, 0.6260108947753906, 0.6749761700630188, 0.7239413857460022, 0.7729066014289856, 0.8218718767166138, 0.8708370923995972, 0.9198023080825806, 0.968767523765564, 1.0177327394485474, 1.0666979551315308, 1.1156632900238037, 1.164628505706787, 1.2135937213897705, 1.262558937072754, 1.3115241527557373]}, "gradients/encoder.encoder.layers.10.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 3.0, 3.0, 6.0, 5.0, 4.0, 5.0, 4.0, 6.0, 13.0, 14.0, 11.0, 17.0, 23.0, 19.0, 34.0, 25.0, 39.0, 42.0, 37.0, 43.0, 38.0, 40.0, 59.0, 51.0, 47.0, 39.0, 45.0, 39.0, 36.0, 39.0, 32.0, 31.0, 34.0, 30.0, 20.0, 11.0, 13.0, 11.0, 8.0, 13.0, 4.0, 3.0, 5.0, 2.0, 5.0, 2.0, 2.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0], "bins": [-1.4191548824310303, -1.3746458292007446, -1.3301368951797485, -1.285627841949463, -1.2411187887191772, -1.1966097354888916, -1.1521008014678955, -1.1075917482376099, -1.0630826950073242, -1.0185736417770386, -0.9740646481513977, -0.9295556545257568, -0.8850466012954712, -0.8405376076698303, -0.7960286140441895, -0.7515195608139038, -0.7070106267929077, -0.6625016331672668, -0.6179925799369812, -0.5734835863113403, -0.5289745330810547, -0.4844655394554138, -0.43995654582977295, -0.3954475224018097, -0.35093849897384644, -0.3064294755458832, -0.2619204521179199, -0.21741145849227905, -0.1729024350643158, -0.12839341163635254, -0.08388441801071167, -0.03937539458274841, 0.005133628845214844, 0.049642644822597504, 0.09415166079998016, 0.13866066932678223, 0.18316969275474548, 0.22767871618270874, 0.2721877098083496, 0.31669673323631287, 0.3612057566642761, 0.4057147800922394, 0.45022380352020264, 0.4947327971458435, 0.5392417907714844, 0.58375084400177, 0.6282598376274109, 0.6727688312530518, 0.7172778844833374, 0.7617868781089783, 0.8062959313392639, 0.8508049249649048, 0.8953139781951904, 0.9398229718208313, 0.9843319654464722, 1.0288410186767578, 1.073349952697754, 1.1178590059280396, 1.1623679399490356, 1.2068769931793213, 1.251386046409607, 1.2958950996398926, 1.3404040336608887, 1.3849130868911743, 1.42942214012146]}, "gradients/encoder.encoder.layers.10.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0, 2.0, 1.0, 4.0, 0.0, 7.0, 7.0, 2.0, 9.0, 11.0, 12.0, 19.0, 40.0, 47.0, 93.0, 142.0, 253.0, 587.0, 1356.0, 3827.0, 13567.0, 69348.0, 458122.0, 420698.0, 62146.0, 12193.0, 3609.0, 1286.0, 556.0, 257.0, 126.0, 62.0, 63.0, 23.0, 30.0, 14.0, 10.0, 12.0, 7.0, 2.0, 5.0, 1.0, 4.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 3.0], "bins": [-2.166015625, -2.09124755859375, -2.0164794921875, -1.94171142578125, -1.866943359375, -1.79217529296875, -1.7174072265625, -1.64263916015625, -1.56787109375, -1.49310302734375, -1.4183349609375, -1.34356689453125, -1.268798828125, -1.19403076171875, -1.1192626953125, -1.04449462890625, -0.9697265625, -0.89495849609375, -0.8201904296875, -0.74542236328125, -0.670654296875, -0.59588623046875, -0.5211181640625, -0.44635009765625, -0.37158203125, -0.29681396484375, -0.2220458984375, -0.14727783203125, -0.072509765625, 0.00225830078125, 0.0770263671875, 0.15179443359375, 0.2265625, 0.30133056640625, 0.3760986328125, 0.45086669921875, 0.525634765625, 0.60040283203125, 0.6751708984375, 0.74993896484375, 0.82470703125, 0.89947509765625, 0.9742431640625, 1.04901123046875, 1.123779296875, 1.19854736328125, 1.2733154296875, 1.34808349609375, 1.4228515625, 1.49761962890625, 1.5723876953125, 1.64715576171875, 1.721923828125, 1.79669189453125, 1.8714599609375, 1.94622802734375, 2.02099609375, 2.09576416015625, 2.1705322265625, 2.24530029296875, 2.320068359375, 2.39483642578125, 2.4696044921875, 2.54437255859375, 2.619140625]}, "gradients/encoder.encoder.layers.10.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 2.0, 2.0, 2.0, 1.0, 5.0, 2.0, 3.0, 4.0, 8.0, 7.0, 15.0, 14.0, 37.0, 20.0, 35.0, 40.0, 54.0, 72.0, 72.0, 73.0, 86.0, 78.0, 81.0, 62.0, 51.0, 50.0, 26.0, 25.0, 21.0, 15.0, 10.0, 11.0, 6.0, 7.0, 5.0, 2.0, 4.0, 0.0, 4.0, 0.0, 1.0, 3.0, 1.0, 0.0, 1.0, 1.0], "bins": [-1.26953125, -1.2364349365234375, -1.203338623046875, -1.1702423095703125, -1.13714599609375, -1.1040496826171875, -1.070953369140625, -1.0378570556640625, -1.0047607421875, -0.9716644287109375, -0.938568115234375, -0.9054718017578125, -0.87237548828125, -0.8392791748046875, -0.806182861328125, -0.7730865478515625, -0.739990234375, -0.7068939208984375, -0.673797607421875, -0.6407012939453125, -0.60760498046875, -0.5745086669921875, -0.541412353515625, -0.5083160400390625, -0.4752197265625, -0.4421234130859375, -0.409027099609375, -0.3759307861328125, -0.34283447265625, -0.3097381591796875, -0.276641845703125, -0.2435455322265625, -0.21044921875, -0.1773529052734375, -0.144256591796875, -0.1111602783203125, -0.07806396484375, -0.0449676513671875, -0.011871337890625, 0.0212249755859375, 0.0543212890625, 0.0874176025390625, 0.120513916015625, 0.1536102294921875, 0.18670654296875, 0.2198028564453125, 0.252899169921875, 0.2859954833984375, 0.319091796875, 0.3521881103515625, 0.385284423828125, 0.4183807373046875, 0.45147705078125, 0.4845733642578125, 0.517669677734375, 0.5507659912109375, 0.5838623046875, 0.6169586181640625, 0.650054931640625, 0.6831512451171875, 0.71624755859375, 0.7493438720703125, 0.782440185546875, 0.8155364990234375, 0.8486328125]}, "gradients/encoder.encoder.layers.10.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 2.0, 6.0, 2.0, 4.0, 3.0, 6.0, 17.0, 12.0, 22.0, 29.0, 42.0, 75.0, 153.0, 325.0, 1022.0, 4938.0, 95001.0, 913593.0, 29230.0, 2781.0, 718.0, 259.0, 118.0, 61.0, 41.0, 27.0, 22.0, 11.0, 9.0, 11.0, 4.0, 5.0, 1.0, 5.0, 3.0, 2.0, 4.0, 3.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.1171875, -4.94281005859375, -4.7684326171875, -4.59405517578125, -4.419677734375, -4.24530029296875, -4.0709228515625, -3.89654541015625, -3.72216796875, -3.54779052734375, -3.3734130859375, -3.19903564453125, -3.024658203125, -2.85028076171875, -2.6759033203125, -2.50152587890625, -2.3271484375, -2.15277099609375, -1.9783935546875, -1.80401611328125, -1.629638671875, -1.45526123046875, -1.2808837890625, -1.10650634765625, -0.93212890625, -0.75775146484375, -0.5833740234375, -0.40899658203125, -0.234619140625, -0.06024169921875, 0.1141357421875, 0.28851318359375, 0.462890625, 0.63726806640625, 0.8116455078125, 0.98602294921875, 1.160400390625, 1.33477783203125, 1.5091552734375, 1.68353271484375, 1.85791015625, 2.03228759765625, 2.2066650390625, 2.38104248046875, 2.555419921875, 2.72979736328125, 2.9041748046875, 3.07855224609375, 3.2529296875, 3.42730712890625, 3.6016845703125, 3.77606201171875, 3.950439453125, 4.12481689453125, 4.2991943359375, 4.47357177734375, 4.64794921875, 4.82232666015625, 4.9967041015625, 5.17108154296875, 5.345458984375, 5.51983642578125, 5.6942138671875, 5.86859130859375, 6.04296875]}, "gradients/encoder.encoder.layers.10.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 2.0, 2.0, 1.0, 1.0, 1.0, 1.0, 3.0, 6.0, 4.0, 8.0, 9.0, 6.0, 13.0, 19.0, 19.0, 19.0, 31.0, 36.0, 41.0, 47.0, 49.0, 54.0, 60.0, 58.0, 55.0, 57.0, 77.0, 53.0, 38.0, 39.0, 35.0, 33.0, 29.0, 19.0, 12.0, 14.0, 17.0, 11.0, 10.0, 5.0, 3.0, 7.0, 2.0, 5.0, 1.0, 2.0, 3.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.90234375, -3.75994873046875, -3.6175537109375, -3.47515869140625, -3.332763671875, -3.19036865234375, -3.0479736328125, -2.90557861328125, -2.76318359375, -2.62078857421875, -2.4783935546875, -2.33599853515625, -2.193603515625, -2.05120849609375, -1.9088134765625, -1.76641845703125, -1.6240234375, -1.48162841796875, -1.3392333984375, -1.19683837890625, -1.054443359375, -0.91204833984375, -0.7696533203125, -0.62725830078125, -0.48486328125, -0.34246826171875, -0.2000732421875, -0.05767822265625, 0.084716796875, 0.22711181640625, 0.3695068359375, 0.51190185546875, 0.654296875, 0.79669189453125, 0.9390869140625, 1.08148193359375, 1.223876953125, 1.36627197265625, 1.5086669921875, 1.65106201171875, 1.79345703125, 1.93585205078125, 2.0782470703125, 2.22064208984375, 2.363037109375, 2.50543212890625, 2.6478271484375, 2.79022216796875, 2.9326171875, 3.07501220703125, 3.2174072265625, 3.35980224609375, 3.502197265625, 3.64459228515625, 3.7869873046875, 3.92938232421875, 4.07177734375, 4.21417236328125, 4.3565673828125, 4.49896240234375, 4.641357421875, 4.78375244140625, 4.9261474609375, 5.06854248046875, 5.2109375]}, "gradients/encoder.encoder.layers.10.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0, 1.0, 1.0, 2.0, 2.0, 3.0, 2.0, 4.0, 3.0, 3.0, 7.0, 4.0, 15.0, 22.0, 14.0, 20.0, 25.0, 47.0, 56.0, 67.0, 112.0, 202.0, 363.0, 807.0, 2472.0, 15050.0, 420887.0, 585484.0, 18092.0, 2870.0, 928.0, 380.0, 200.0, 125.0, 74.0, 56.0, 36.0, 40.0, 19.0, 11.0, 19.0, 10.0, 7.0, 7.0, 7.0, 2.0, 2.0, 1.0, 3.0, 2.0, 2.0, 1.0, 1.0], "bins": [-2.80078125, -2.7271575927734375, -2.653533935546875, -2.5799102783203125, -2.50628662109375, -2.4326629638671875, -2.359039306640625, -2.2854156494140625, -2.2117919921875, -2.1381683349609375, -2.064544677734375, -1.9909210205078125, -1.91729736328125, -1.8436737060546875, -1.770050048828125, -1.6964263916015625, -1.622802734375, -1.5491790771484375, -1.475555419921875, -1.4019317626953125, -1.32830810546875, -1.2546844482421875, -1.181060791015625, -1.1074371337890625, -1.0338134765625, -0.9601898193359375, -0.886566162109375, -0.8129425048828125, -0.73931884765625, -0.6656951904296875, -0.592071533203125, -0.5184478759765625, -0.44482421875, -0.3712005615234375, -0.297576904296875, -0.2239532470703125, -0.15032958984375, -0.0767059326171875, -0.003082275390625, 0.0705413818359375, 0.1441650390625, 0.2177886962890625, 0.291412353515625, 0.3650360107421875, 0.43865966796875, 0.5122833251953125, 0.585906982421875, 0.6595306396484375, 0.733154296875, 0.8067779541015625, 0.880401611328125, 0.9540252685546875, 1.02764892578125, 1.1012725830078125, 1.174896240234375, 1.2485198974609375, 1.3221435546875, 1.3957672119140625, 1.469390869140625, 1.5430145263671875, 1.61663818359375, 1.6902618408203125, 1.763885498046875, 1.8375091552734375, 1.9111328125]}, "gradients/encoder.encoder.layers.10.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 3.0, 6.0, 4.0, 9.0, 19.0, 35.0, 43.0, 94.0, 141.0, 177.0, 156.0, 135.0, 96.0, 34.0, 31.0, 10.0, 8.0, 6.0, 3.0, 3.0, 1.0, 0.0, 2.0, 1.0, 0.0, 1.0, 1.0], "bins": [-0.0003345012664794922, -0.00032736361026763916, -0.00032022595405578613, -0.0003130882978439331, -0.0003059506416320801, -0.00029881298542022705, -0.000291675329208374, -0.000284537672996521, -0.00027740001678466797, -0.00027026236057281494, -0.0002631247043609619, -0.0002559870481491089, -0.00024884939193725586, -0.00024171173572540283, -0.0002345740795135498, -0.00022743642330169678, -0.00022029876708984375, -0.00021316111087799072, -0.0002060234546661377, -0.00019888579845428467, -0.00019174814224243164, -0.0001846104860305786, -0.00017747282981872559, -0.00017033517360687256, -0.00016319751739501953, -0.0001560598611831665, -0.00014892220497131348, -0.00014178454875946045, -0.00013464689254760742, -0.0001275092363357544, -0.00012037158012390137, -0.00011323392391204834, -0.00010609626770019531, -9.895861148834229e-05, -9.182095527648926e-05, -8.468329906463623e-05, -7.75456428527832e-05, -7.040798664093018e-05, -6.327033042907715e-05, -5.613267421722412e-05, -4.8995018005371094e-05, -4.1857361793518066e-05, -3.471970558166504e-05, -2.7582049369812012e-05, -2.0444393157958984e-05, -1.3306736946105957e-05, -6.16908073425293e-06, 9.685754776000977e-07, 8.106231689453125e-06, 1.5243887901306152e-05, 2.238154411315918e-05, 2.9519200325012207e-05, 3.6656856536865234e-05, 4.379451274871826e-05, 5.093216896057129e-05, 5.8069825172424316e-05, 6.520748138427734e-05, 7.234513759613037e-05, 7.94827938079834e-05, 8.662045001983643e-05, 9.375810623168945e-05, 0.00010089576244354248, 0.00010803341865539551, 0.00011517107486724854, 0.00012230873107910156]}, "gradients/encoder.encoder.layers.10.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 3.0, 3.0, 3.0, 3.0, 5.0, 6.0, 16.0, 28.0, 28.0, 72.0, 136.0, 240.0, 683.0, 2320.0, 22255.0, 937447.0, 79577.0, 4089.0, 915.0, 337.0, 177.0, 74.0, 54.0, 26.0, 14.0, 19.0, 11.0, 5.0, 5.0, 4.0, 2.0, 0.0, 2.0, 1.0, 4.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.69921875, -2.59405517578125, -2.4888916015625, -2.38372802734375, -2.278564453125, -2.17340087890625, -2.0682373046875, -1.96307373046875, -1.85791015625, -1.75274658203125, -1.6475830078125, -1.54241943359375, -1.437255859375, -1.33209228515625, -1.2269287109375, -1.12176513671875, -1.0166015625, -0.91143798828125, -0.8062744140625, -0.70111083984375, -0.595947265625, -0.49078369140625, -0.3856201171875, -0.28045654296875, -0.17529296875, -0.07012939453125, 0.0350341796875, 0.14019775390625, 0.245361328125, 0.35052490234375, 0.4556884765625, 0.56085205078125, 0.666015625, 0.77117919921875, 0.8763427734375, 0.98150634765625, 1.086669921875, 1.19183349609375, 1.2969970703125, 1.40216064453125, 1.50732421875, 1.61248779296875, 1.7176513671875, 1.82281494140625, 1.927978515625, 2.03314208984375, 2.1383056640625, 2.24346923828125, 2.3486328125, 2.45379638671875, 2.5589599609375, 2.66412353515625, 2.769287109375, 2.87445068359375, 2.9796142578125, 3.08477783203125, 3.18994140625, 3.29510498046875, 3.4002685546875, 3.50543212890625, 3.610595703125, 3.71575927734375, 3.8209228515625, 3.92608642578125, 4.03125]}, "gradients/encoder.encoder.layers.10.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 4.0, 3.0, 3.0, 3.0, 8.0, 12.0, 23.0, 32.0, 49.0, 86.0, 147.0, 176.0, 158.0, 117.0, 66.0, 48.0, 31.0, 18.0, 7.0, 10.0, 7.0, 1.0, 3.0, 1.0, 2.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.09765625, -3.0199737548828125, -2.942291259765625, -2.8646087646484375, -2.78692626953125, -2.7092437744140625, -2.631561279296875, -2.5538787841796875, -2.4761962890625, -2.3985137939453125, -2.320831298828125, -2.2431488037109375, -2.16546630859375, -2.0877838134765625, -2.010101318359375, -1.9324188232421875, -1.854736328125, -1.7770538330078125, -1.699371337890625, -1.6216888427734375, -1.54400634765625, -1.4663238525390625, -1.388641357421875, -1.3109588623046875, -1.2332763671875, -1.1555938720703125, -1.077911376953125, -1.0002288818359375, -0.92254638671875, -0.8448638916015625, -0.767181396484375, -0.6894989013671875, -0.61181640625, -0.5341339111328125, -0.456451416015625, -0.3787689208984375, -0.30108642578125, -0.2234039306640625, -0.145721435546875, -0.0680389404296875, 0.0096435546875, 0.0873260498046875, 0.165008544921875, 0.2426910400390625, 0.32037353515625, 0.3980560302734375, 0.475738525390625, 0.5534210205078125, 0.631103515625, 0.7087860107421875, 0.786468505859375, 0.8641510009765625, 0.94183349609375, 1.0195159912109375, 1.097198486328125, 1.1748809814453125, 1.2525634765625, 1.3302459716796875, 1.407928466796875, 1.4856109619140625, 1.56329345703125, 1.6409759521484375, 1.718658447265625, 1.7963409423828125, 1.8740234375]}, "gradients/encoder.encoder.layers.10.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 0.0, 5.0, 0.0, 10.0, 8.0, 24.0, 33.0, 65.0, 89.0, 141.0, 150.0, 183.0, 102.0, 78.0, 59.0, 29.0, 11.0, 7.0, 4.0, 3.0, 6.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-20.10253143310547, -19.405010223388672, -18.707489013671875, -18.009967803955078, -17.31244659423828, -16.614925384521484, -15.917405128479004, -15.219884872436523, -14.522363662719727, -13.82484245300293, -13.127321243286133, -12.429800033569336, -11.732279777526855, -11.034758567810059, -10.337237358093262, -9.639717102050781, -8.942194938659668, -8.244673728942871, -7.547152996063232, -6.8496317863464355, -6.152111053466797, -5.45458984375, -4.757068634033203, -4.0595479011535645, -3.3620266914367676, -2.66450572013855, -1.9669846296310425, -1.2694635391235352, -0.5719425678253174, 0.1255784034729004, 0.8230996131896973, 1.520620346069336, 2.218141555786133, 2.9156625270843506, 3.6131834983825684, 4.310704708099365, 5.008225440979004, 5.705746650695801, 6.403267860412598, 7.100788593292236, 7.798309803009033, 8.495830535888672, 9.193351745605469, 9.890872955322266, 10.588394165039062, 11.28591537475586, 11.983436584472656, 12.680956840515137, 13.378478050231934, 14.07599925994873, 14.773520469665527, 15.471040725708008, 16.168561935424805, 16.8660831451416, 17.5636043548584, 18.261125564575195, 18.958646774291992, 19.65616798400879, 20.353689193725586, 21.051210403442383, 21.74873161315918, 22.446250915527344, 23.14377212524414, 23.841293334960938, 24.538814544677734]}, "gradients/encoder.encoder.layers.10.layer_norm.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 2.0, 5.0, 8.0, 4.0, 3.0, 7.0, 5.0, 8.0, 8.0, 13.0, 20.0, 21.0, 23.0, 28.0, 26.0, 45.0, 39.0, 47.0, 41.0, 52.0, 68.0, 47.0, 58.0, 47.0, 58.0, 55.0, 47.0, 37.0, 38.0, 33.0, 17.0, 20.0, 22.0, 9.0, 8.0, 8.0, 7.0, 7.0, 8.0, 2.0, 6.0, 3.0, 1.0, 3.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-15.755983352661133, -15.202385902404785, -14.648787498474121, -14.095190048217773, -13.54159164428711, -12.987994194030762, -12.434396743774414, -11.88079833984375, -11.327200889587402, -10.773603439331055, -10.22000503540039, -9.666407585144043, -9.112810134887695, -8.559211730957031, -8.005614280700684, -7.452016353607178, -6.898418426513672, -6.344820499420166, -5.79122257232666, -5.2376251220703125, -4.684027194976807, -4.130429267883301, -3.576831579208374, -3.0232338905334473, -2.4696359634399414, -1.916038155555725, -1.3624403476715088, -0.8088425397872925, -0.25524473190307617, 0.2983531951904297, 0.8519508838653564, 1.4055485725402832, 1.959146499633789, 2.512744426727295, 3.0663421154022217, 3.6199398040771484, 4.173537731170654, 4.72713565826416, 5.280733108520508, 5.834331035614014, 6.3879289627075195, 6.941526889801025, 7.495124816894531, 8.048722267150879, 8.602319717407227, 9.15591812133789, 9.709515571594238, 10.263113021850586, 10.81671142578125, 11.370308876037598, 11.923907279968262, 12.47750473022461, 13.031103134155273, 13.584700584411621, 14.138298034667969, 14.691896438598633, 15.24549388885498, 15.799091339111328, 16.352689743041992, 16.906288146972656, 17.459884643554688, 18.01348304748535, 18.567081451416016, 19.120677947998047, 19.67427635192871]}, "gradients/encoder.encoder.layers.9.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 5.0, 0.0, 3.0, 8.0, 11.0, 6.0, 11.0, 12.0, 21.0, 33.0, 62.0, 101.0, 176.0, 399.0, 1196.0, 4567.0, 49335.0, 4107947.0, 25557.0, 3233.0, 907.0, 355.0, 167.0, 73.0, 39.0, 27.0, 13.0, 6.0, 7.0, 4.0, 2.0, 2.0, 4.0, 2.0, 2.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.33984375, -5.18896484375, -5.0380859375, -4.88720703125, -4.736328125, -4.58544921875, -4.4345703125, -4.28369140625, -4.1328125, -3.98193359375, -3.8310546875, -3.68017578125, -3.529296875, -3.37841796875, -3.2275390625, -3.07666015625, -2.92578125, -2.77490234375, -2.6240234375, -2.47314453125, -2.322265625, -2.17138671875, -2.0205078125, -1.86962890625, -1.71875, -1.56787109375, -1.4169921875, -1.26611328125, -1.115234375, -0.96435546875, -0.8134765625, -0.66259765625, -0.51171875, -0.36083984375, -0.2099609375, -0.05908203125, 0.091796875, 0.24267578125, 0.3935546875, 0.54443359375, 0.6953125, 0.84619140625, 0.9970703125, 1.14794921875, 1.298828125, 1.44970703125, 1.6005859375, 1.75146484375, 1.90234375, 2.05322265625, 2.2041015625, 2.35498046875, 2.505859375, 2.65673828125, 2.8076171875, 2.95849609375, 3.109375, 3.26025390625, 3.4111328125, 3.56201171875, 3.712890625, 3.86376953125, 4.0146484375, 4.16552734375, 4.31640625]}, "gradients/encoder.encoder.layers.9.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 3.0, 4.0, 3.0, 2.0, 7.0, 4.0, 9.0, 13.0, 15.0, 23.0, 32.0, 40.0, 44.0, 65.0, 78.0, 96.0, 100.0, 95.0, 82.0, 63.0, 64.0, 42.0, 33.0, 25.0, 16.0, 10.0, 16.0, 8.0, 3.0, 4.0, 3.0, 3.0, 4.0, 3.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.4951171875, -1.454345703125, -1.41357421875, -1.372802734375, -1.33203125, -1.291259765625, -1.25048828125, -1.209716796875, -1.1689453125, -1.128173828125, -1.08740234375, -1.046630859375, -1.005859375, -0.965087890625, -0.92431640625, -0.883544921875, -0.8427734375, -0.802001953125, -0.76123046875, -0.720458984375, -0.6796875, -0.638916015625, -0.59814453125, -0.557373046875, -0.5166015625, -0.475830078125, -0.43505859375, -0.394287109375, -0.353515625, -0.312744140625, -0.27197265625, -0.231201171875, -0.1904296875, -0.149658203125, -0.10888671875, -0.068115234375, -0.02734375, 0.013427734375, 0.05419921875, 0.094970703125, 0.1357421875, 0.176513671875, 0.21728515625, 0.258056640625, 0.298828125, 0.339599609375, 0.38037109375, 0.421142578125, 0.4619140625, 0.502685546875, 0.54345703125, 0.584228515625, 0.625, 0.665771484375, 0.70654296875, 0.747314453125, 0.7880859375, 0.828857421875, 0.86962890625, 0.910400390625, 0.951171875, 0.991943359375, 1.03271484375, 1.073486328125, 1.1142578125]}, "gradients/encoder.encoder.layers.9.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [4.0, 0.0, 0.0, 1.0, 3.0, 3.0, 4.0, 1.0, 4.0, 8.0, 21.0, 12.0, 22.0, 27.0, 32.0, 49.0, 60.0, 91.0, 136.0, 227.0, 361.0, 650.0, 1336.0, 2869.0, 6836.0, 22212.0, 127820.0, 3912299.0, 89575.0, 18128.0, 6023.0, 2576.0, 1311.0, 610.0, 315.0, 196.0, 137.0, 79.0, 80.0, 35.0, 42.0, 21.0, 29.0, 14.0, 13.0, 7.0, 7.0, 3.0, 2.0, 5.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0], "bins": [-1.71484375, -1.652313232421875, -1.58978271484375, -1.527252197265625, -1.4647216796875, -1.402191162109375, -1.33966064453125, -1.277130126953125, -1.214599609375, -1.152069091796875, -1.08953857421875, -1.027008056640625, -0.9644775390625, -0.901947021484375, -0.83941650390625, -0.776885986328125, -0.71435546875, -0.651824951171875, -0.58929443359375, -0.526763916015625, -0.4642333984375, -0.401702880859375, -0.33917236328125, -0.276641845703125, -0.214111328125, -0.151580810546875, -0.08905029296875, -0.026519775390625, 0.0360107421875, 0.098541259765625, 0.16107177734375, 0.223602294921875, 0.2861328125, 0.348663330078125, 0.41119384765625, 0.473724365234375, 0.5362548828125, 0.598785400390625, 0.66131591796875, 0.723846435546875, 0.786376953125, 0.848907470703125, 0.91143798828125, 0.973968505859375, 1.0364990234375, 1.099029541015625, 1.16156005859375, 1.224090576171875, 1.28662109375, 1.349151611328125, 1.41168212890625, 1.474212646484375, 1.5367431640625, 1.599273681640625, 1.66180419921875, 1.724334716796875, 1.786865234375, 1.849395751953125, 1.91192626953125, 1.974456787109375, 2.0369873046875, 2.099517822265625, 2.16204833984375, 2.224578857421875, 2.287109375]}, "gradients/encoder.encoder.layers.9.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 3.0, 2.0, 7.0, 20.0, 20.0, 53.0, 100.0, 419.0, 3192.0, 152.0, 43.0, 29.0, 24.0, 7.0, 6.0, 2.0, 1.0, 1.0, 1.0, 0.0, 2.0, 3.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.4462890625, -0.4217529296875, -0.397216796875, -0.3726806640625, -0.34814453125, -0.3236083984375, -0.299072265625, -0.2745361328125, -0.25, -0.2254638671875, -0.200927734375, -0.1763916015625, -0.15185546875, -0.1273193359375, -0.102783203125, -0.0782470703125, -0.0537109375, -0.0291748046875, -0.004638671875, 0.0198974609375, 0.04443359375, 0.0689697265625, 0.093505859375, 0.1180419921875, 0.142578125, 0.1671142578125, 0.191650390625, 0.2161865234375, 0.24072265625, 0.2652587890625, 0.289794921875, 0.3143310546875, 0.3388671875, 0.3634033203125, 0.387939453125, 0.4124755859375, 0.43701171875, 0.4615478515625, 0.486083984375, 0.5106201171875, 0.53515625, 0.5596923828125, 0.584228515625, 0.6087646484375, 0.63330078125, 0.6578369140625, 0.682373046875, 0.7069091796875, 0.7314453125, 0.7559814453125, 0.780517578125, 0.8050537109375, 0.82958984375, 0.8541259765625, 0.878662109375, 0.9031982421875, 0.927734375, 0.9522705078125, 0.976806640625, 1.0013427734375, 1.02587890625, 1.0504150390625, 1.074951171875, 1.0994873046875, 1.1240234375]}, "gradients/encoder.encoder.layers.9.final_layer_norm.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 1.0, 0.0, 2.0, 3.0, 11.0, 12.0, 31.0, 54.0, 98.0, 95.0, 165.0, 166.0, 138.0, 108.0, 54.0, 31.0, 24.0, 8.0, 6.0, 1.0, 0.0, 0.0, 3.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.3960165977478027, -1.3235034942626953, -1.2509905099868774, -1.1784775257110596, -1.1059644222259521, -1.0334513187408447, -0.9609383344650269, -0.8884252905845642, -0.8159122467041016, -0.7433992028236389, -0.6708861589431763, -0.5983731150627136, -0.525860071182251, -0.45334702730178833, -0.3808339834213257, -0.30832093954086304, -0.2358078956604004, -0.16329485177993774, -0.0907818078994751, -0.01826876401901245, 0.054244279861450195, 0.12675732374191284, 0.1992703676223755, 0.27178341150283813, 0.3442964553833008, 0.4168094992637634, 0.4893225431442261, 0.5618355870246887, 0.6343486309051514, 0.706861674785614, 0.7793747186660767, 0.8518877625465393, 0.924400806427002, 0.9969138503074646, 1.0694268941879272, 1.1419398784637451, 1.2144529819488525, 1.28696608543396, 1.3594790697097778, 1.4319920539855957, 1.5045051574707031, 1.5770182609558105, 1.6495312452316284, 1.7220442295074463, 1.7945573329925537, 1.8670704364776611, 1.939583420753479, 2.012096405029297, 2.0846095085144043, 2.1571226119995117, 2.229635715484619, 2.3021485805511475, 2.374661684036255, 2.4471747875213623, 2.5196876525878906, 2.592200756072998, 2.6647138595581055, 2.737226963043213, 2.8097400665283203, 2.8822529315948486, 2.954766035079956, 3.0272791385650635, 3.099792003631592, 3.172305107116699, 3.2448182106018066]}, "gradients/encoder.encoder.layers.9.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 5.0, 2.0, 3.0, 8.0, 4.0, 3.0, 4.0, 7.0, 9.0, 9.0, 14.0, 15.0, 25.0, 24.0, 30.0, 34.0, 48.0, 40.0, 45.0, 44.0, 50.0, 54.0, 61.0, 60.0, 50.0, 55.0, 47.0, 36.0, 42.0, 39.0, 25.0, 20.0, 22.0, 13.0, 16.0, 12.0, 11.0, 9.0, 7.0, 3.0, 4.0, 0.0, 1.0, 1.0, 3.0, 2.0, 3.0, 1.0, 0.0, 1.0], "bins": [-1.322563886642456, -1.2858307361602783, -1.2490975856781006, -1.2123644351959229, -1.1756314039230347, -1.138898253440857, -1.1021651029586792, -1.0654319524765015, -1.0286988019943237, -0.991965651512146, -0.955232560634613, -0.9184994101524353, -0.8817662596702576, -0.8450331687927246, -0.8083000183105469, -0.7715668678283691, -0.7348337769508362, -0.6981006264686584, -0.6613675355911255, -0.6246343851089478, -0.58790123462677, -0.5511680841445923, -0.5144349932670593, -0.4777018427848816, -0.44096872210502625, -0.4042356014251709, -0.36750245094299316, -0.3307693302631378, -0.29403620958328247, -0.25730305910110474, -0.2205699384212494, -0.18383678793907166, -0.1471036672592163, -0.11037053167819977, -0.07363740354776382, -0.03690427541732788, -0.00017113983631134033, 0.0365619957447052, 0.07329511642456055, 0.11002826690673828, 0.14676138758659363, 0.18349452316761017, 0.2202276587486267, 0.25696077942848206, 0.2936939001083374, 0.33042705059051514, 0.3671601712703705, 0.4038933217525482, 0.44062644243240356, 0.4773595631122589, 0.5140926837921143, 0.550825834274292, 0.5875589847564697, 0.6242921352386475, 0.6610252261161804, 0.6977583765983582, 0.7344914674758911, 0.7712246179580688, 0.8079577088356018, 0.8446908593177795, 0.8814240097999573, 0.9181571006774902, 0.954890251159668, 0.9916234016418457, 1.0283565521240234]}, "gradients/encoder.encoder.layers.9.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 1.0, 6.0, 1.0, 4.0, 11.0, 5.0, 15.0, 23.0, 46.0, 87.0, 131.0, 301.0, 789.0, 2342.0, 11015.0, 94360.0, 738519.0, 178771.0, 16914.0, 3411.0, 1041.0, 381.0, 179.0, 92.0, 42.0, 27.0, 13.0, 10.0, 4.0, 3.0, 7.0, 3.0, 3.0, 4.0, 1.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.630859375, -3.531707763671875, -3.43255615234375, -3.333404541015625, -3.2342529296875, -3.135101318359375, -3.03594970703125, -2.936798095703125, -2.837646484375, -2.738494873046875, -2.63934326171875, -2.540191650390625, -2.4410400390625, -2.341888427734375, -2.24273681640625, -2.143585205078125, -2.04443359375, -1.945281982421875, -1.84613037109375, -1.746978759765625, -1.6478271484375, -1.548675537109375, -1.44952392578125, -1.350372314453125, -1.251220703125, -1.152069091796875, -1.05291748046875, -0.953765869140625, -0.8546142578125, -0.755462646484375, -0.65631103515625, -0.557159423828125, -0.4580078125, -0.358856201171875, -0.25970458984375, -0.160552978515625, -0.0614013671875, 0.037750244140625, 0.13690185546875, 0.236053466796875, 0.335205078125, 0.434356689453125, 0.53350830078125, 0.632659912109375, 0.7318115234375, 0.830963134765625, 0.93011474609375, 1.029266357421875, 1.12841796875, 1.227569580078125, 1.32672119140625, 1.425872802734375, 1.5250244140625, 1.624176025390625, 1.72332763671875, 1.822479248046875, 1.921630859375, 2.020782470703125, 2.11993408203125, 2.219085693359375, 2.3182373046875, 2.417388916015625, 2.51654052734375, 2.615692138671875, 2.71484375]}, "gradients/encoder.encoder.layers.9.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 2.0, 0.0, 8.0, 1.0, 4.0, 6.0, 5.0, 10.0, 21.0, 18.0, 24.0, 27.0, 38.0, 53.0, 61.0, 79.0, 84.0, 89.0, 78.0, 67.0, 73.0, 55.0, 48.0, 43.0, 17.0, 26.0, 20.0, 16.0, 10.0, 7.0, 5.0, 3.0, 5.0, 4.0, 1.0, 2.0, 3.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-1.390625, -1.3539276123046875, -1.317230224609375, -1.2805328369140625, -1.24383544921875, -1.2071380615234375, -1.170440673828125, -1.1337432861328125, -1.0970458984375, -1.0603485107421875, -1.023651123046875, -0.9869537353515625, -0.95025634765625, -0.9135589599609375, -0.876861572265625, -0.8401641845703125, -0.803466796875, -0.7667694091796875, -0.730072021484375, -0.6933746337890625, -0.65667724609375, -0.6199798583984375, -0.583282470703125, -0.5465850830078125, -0.5098876953125, -0.4731903076171875, -0.436492919921875, -0.3997955322265625, -0.36309814453125, -0.3264007568359375, -0.289703369140625, -0.2530059814453125, -0.21630859375, -0.1796112060546875, -0.142913818359375, -0.1062164306640625, -0.06951904296875, -0.0328216552734375, 0.003875732421875, 0.0405731201171875, 0.0772705078125, 0.1139678955078125, 0.150665283203125, 0.1873626708984375, 0.22406005859375, 0.2607574462890625, 0.297454833984375, 0.3341522216796875, 0.370849609375, 0.4075469970703125, 0.444244384765625, 0.4809417724609375, 0.51763916015625, 0.5543365478515625, 0.591033935546875, 0.6277313232421875, 0.6644287109375, 0.7011260986328125, 0.737823486328125, 0.7745208740234375, 0.81121826171875, 0.8479156494140625, 0.884613037109375, 0.9213104248046875, 0.9580078125]}, "gradients/encoder.encoder.layers.9.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 3.0, 3.0, 5.0, 7.0, 5.0, 8.0, 12.0, 13.0, 17.0, 27.0, 31.0, 64.0, 96.0, 153.0, 309.0, 565.0, 1699.0, 9291.0, 275303.0, 734775.0, 21884.0, 2618.0, 855.0, 336.0, 162.0, 108.0, 72.0, 43.0, 28.0, 25.0, 20.0, 8.0, 6.0, 7.0, 4.0, 1.0, 2.0, 0.0, 1.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.388671875, -3.263885498046875, -3.13909912109375, -3.014312744140625, -2.8895263671875, -2.764739990234375, -2.63995361328125, -2.515167236328125, -2.390380859375, -2.265594482421875, -2.14080810546875, -2.016021728515625, -1.8912353515625, -1.766448974609375, -1.64166259765625, -1.516876220703125, -1.39208984375, -1.267303466796875, -1.14251708984375, -1.017730712890625, -0.8929443359375, -0.768157958984375, -0.64337158203125, -0.518585205078125, -0.393798828125, -0.269012451171875, -0.14422607421875, -0.019439697265625, 0.1053466796875, 0.230133056640625, 0.35491943359375, 0.479705810546875, 0.6044921875, 0.729278564453125, 0.85406494140625, 0.978851318359375, 1.1036376953125, 1.228424072265625, 1.35321044921875, 1.477996826171875, 1.602783203125, 1.727569580078125, 1.85235595703125, 1.977142333984375, 2.1019287109375, 2.226715087890625, 2.35150146484375, 2.476287841796875, 2.60107421875, 2.725860595703125, 2.85064697265625, 2.975433349609375, 3.1002197265625, 3.225006103515625, 3.34979248046875, 3.474578857421875, 3.599365234375, 3.724151611328125, 3.84893798828125, 3.973724365234375, 4.0985107421875, 4.223297119140625, 4.34808349609375, 4.472869873046875, 4.59765625]}, "gradients/encoder.encoder.layers.9.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 5.0, 3.0, 5.0, 6.0, 8.0, 9.0, 13.0, 8.0, 11.0, 16.0, 9.0, 19.0, 34.0, 27.0, 36.0, 31.0, 43.0, 43.0, 37.0, 48.0, 49.0, 38.0, 44.0, 48.0, 41.0, 50.0, 42.0, 49.0, 33.0, 32.0, 21.0, 34.0, 31.0, 13.0, 16.0, 12.0, 14.0, 7.0, 10.0, 8.0, 4.0, 2.0, 3.0, 3.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0], "bins": [-3.298828125, -3.19293212890625, -3.0870361328125, -2.98114013671875, -2.875244140625, -2.76934814453125, -2.6634521484375, -2.55755615234375, -2.45166015625, -2.34576416015625, -2.2398681640625, -2.13397216796875, -2.028076171875, -1.92218017578125, -1.8162841796875, -1.71038818359375, -1.6044921875, -1.49859619140625, -1.3927001953125, -1.28680419921875, -1.180908203125, -1.07501220703125, -0.9691162109375, -0.86322021484375, -0.75732421875, -0.65142822265625, -0.5455322265625, -0.43963623046875, -0.333740234375, -0.22784423828125, -0.1219482421875, -0.01605224609375, 0.08984375, 0.19573974609375, 0.3016357421875, 0.40753173828125, 0.513427734375, 0.61932373046875, 0.7252197265625, 0.83111572265625, 0.93701171875, 1.04290771484375, 1.1488037109375, 1.25469970703125, 1.360595703125, 1.46649169921875, 1.5723876953125, 1.67828369140625, 1.7841796875, 1.89007568359375, 1.9959716796875, 2.10186767578125, 2.207763671875, 2.31365966796875, 2.4195556640625, 2.52545166015625, 2.63134765625, 2.73724365234375, 2.8431396484375, 2.94903564453125, 3.054931640625, 3.16082763671875, 3.2667236328125, 3.37261962890625, 3.478515625]}, "gradients/encoder.encoder.layers.9.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 1.0, 3.0, 2.0, 1.0, 5.0, 8.0, 13.0, 18.0, 31.0, 54.0, 74.0, 131.0, 278.0, 766.0, 2824.0, 27913.0, 923585.0, 86576.0, 4461.0, 1006.0, 385.0, 190.0, 99.0, 56.0, 24.0, 13.0, 8.0, 7.0, 6.0, 7.0, 2.0, 4.0, 4.0, 3.0, 3.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.138671875, -2.071075439453125, -2.00347900390625, -1.935882568359375, -1.8682861328125, -1.800689697265625, -1.73309326171875, -1.665496826171875, -1.597900390625, -1.530303955078125, -1.46270751953125, -1.395111083984375, -1.3275146484375, -1.259918212890625, -1.19232177734375, -1.124725341796875, -1.05712890625, -0.989532470703125, -0.92193603515625, -0.854339599609375, -0.7867431640625, -0.719146728515625, -0.65155029296875, -0.583953857421875, -0.516357421875, -0.448760986328125, -0.38116455078125, -0.313568115234375, -0.2459716796875, -0.178375244140625, -0.11077880859375, -0.043182373046875, 0.0244140625, 0.092010498046875, 0.15960693359375, 0.227203369140625, 0.2947998046875, 0.362396240234375, 0.42999267578125, 0.497589111328125, 0.565185546875, 0.632781982421875, 0.70037841796875, 0.767974853515625, 0.8355712890625, 0.903167724609375, 0.97076416015625, 1.038360595703125, 1.10595703125, 1.173553466796875, 1.24114990234375, 1.308746337890625, 1.3763427734375, 1.443939208984375, 1.51153564453125, 1.579132080078125, 1.646728515625, 1.714324951171875, 1.78192138671875, 1.849517822265625, 1.9171142578125, 1.984710693359375, 2.05230712890625, 2.119903564453125, 2.1875]}, "gradients/encoder.encoder.layers.9.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 4.0, 1.0, 8.0, 10.0, 15.0, 20.0, 40.0, 75.0, 110.0, 159.0, 159.0, 149.0, 91.0, 78.0, 43.0, 24.0, 11.0, 8.0, 4.0, 4.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0002090930938720703, -0.00020320340991020203, -0.00019731372594833374, -0.00019142404198646545, -0.00018553435802459717, -0.00017964467406272888, -0.0001737549901008606, -0.0001678653061389923, -0.00016197562217712402, -0.00015608593821525574, -0.00015019625425338745, -0.00014430657029151917, -0.00013841688632965088, -0.0001325272023677826, -0.0001266375184059143, -0.00012074783444404602, -0.00011485815048217773, -0.00010896846652030945, -0.00010307878255844116, -9.718909859657288e-05, -9.129941463470459e-05, -8.54097306728363e-05, -7.952004671096802e-05, -7.363036274909973e-05, -6.774067878723145e-05, -6.185099482536316e-05, -5.596131086349487e-05, -5.007162690162659e-05, -4.41819429397583e-05, -3.8292258977890015e-05, -3.240257501602173e-05, -2.6512891054153442e-05, -2.0623207092285156e-05, -1.473352313041687e-05, -8.843839168548584e-06, -2.954155206680298e-06, 2.9355287551879883e-06, 8.825212717056274e-06, 1.471489667892456e-05, 2.0604580640792847e-05, 2.6494264602661133e-05, 3.238394856452942e-05, 3.8273632526397705e-05, 4.416331648826599e-05, 5.005300045013428e-05, 5.5942684412002563e-05, 6.183236837387085e-05, 6.772205233573914e-05, 7.361173629760742e-05, 7.950142025947571e-05, 8.5391104221344e-05, 9.128078818321228e-05, 9.717047214508057e-05, 0.00010306015610694885, 0.00010894984006881714, 0.00011483952403068542, 0.00012072920799255371, 0.000126618891954422, 0.00013250857591629028, 0.00013839825987815857, 0.00014428794384002686, 0.00015017762780189514, 0.00015606731176376343, 0.00016195699572563171, 0.0001678466796875]}, "gradients/encoder.encoder.layers.9.attention.q_proj.weight": {"_type": "histogram", "values": [2.0, 3.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 3.0, 2.0, 1.0, 5.0, 3.0, 4.0, 4.0, 5.0, 14.0, 24.0, 31.0, 54.0, 96.0, 136.0, 295.0, 629.0, 1752.0, 5947.0, 51530.0, 805263.0, 167308.0, 11323.0, 2428.0, 860.0, 396.0, 183.0, 95.0, 55.0, 39.0, 16.0, 17.0, 10.0, 9.0, 4.0, 4.0, 8.0, 2.0, 1.0, 2.0, 0.0, 2.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.3291015625, -1.2827606201171875, -1.236419677734375, -1.1900787353515625, -1.14373779296875, -1.0973968505859375, -1.051055908203125, -1.0047149658203125, -0.9583740234375, -0.9120330810546875, -0.865692138671875, -0.8193511962890625, -0.77301025390625, -0.7266693115234375, -0.680328369140625, -0.6339874267578125, -0.587646484375, -0.5413055419921875, -0.494964599609375, -0.4486236572265625, -0.40228271484375, -0.3559417724609375, -0.309600830078125, -0.2632598876953125, -0.2169189453125, -0.1705780029296875, -0.124237060546875, -0.0778961181640625, -0.03155517578125, 0.0147857666015625, 0.061126708984375, 0.1074676513671875, 0.15380859375, 0.2001495361328125, 0.246490478515625, 0.2928314208984375, 0.33917236328125, 0.3855133056640625, 0.431854248046875, 0.4781951904296875, 0.5245361328125, 0.5708770751953125, 0.617218017578125, 0.6635589599609375, 0.70989990234375, 0.7562408447265625, 0.802581787109375, 0.8489227294921875, 0.895263671875, 0.9416046142578125, 0.987945556640625, 1.0342864990234375, 1.08062744140625, 1.1269683837890625, 1.173309326171875, 1.2196502685546875, 1.2659912109375, 1.3123321533203125, 1.358673095703125, 1.4050140380859375, 1.45135498046875, 1.4976959228515625, 1.544036865234375, 1.5903778076171875, 1.63671875]}, "gradients/encoder.encoder.layers.9.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 3.0, 4.0, 4.0, 4.0, 6.0, 13.0, 18.0, 20.0, 30.0, 85.0, 77.0, 115.0, 123.0, 140.0, 115.0, 91.0, 55.0, 33.0, 29.0, 21.0, 9.0, 6.0, 0.0, 7.0, 0.0, 2.0, 3.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.6328125, -1.584808349609375, -1.53680419921875, -1.488800048828125, -1.4407958984375, -1.392791748046875, -1.34478759765625, -1.296783447265625, -1.248779296875, -1.200775146484375, -1.15277099609375, -1.104766845703125, -1.0567626953125, -1.008758544921875, -0.96075439453125, -0.912750244140625, -0.86474609375, -0.816741943359375, -0.76873779296875, -0.720733642578125, -0.6727294921875, -0.624725341796875, -0.57672119140625, -0.528717041015625, -0.480712890625, -0.432708740234375, -0.38470458984375, -0.336700439453125, -0.2886962890625, -0.240692138671875, -0.19268798828125, -0.144683837890625, -0.0966796875, -0.048675537109375, -0.00067138671875, 0.047332763671875, 0.0953369140625, 0.143341064453125, 0.19134521484375, 0.239349365234375, 0.287353515625, 0.335357666015625, 0.38336181640625, 0.431365966796875, 0.4793701171875, 0.527374267578125, 0.57537841796875, 0.623382568359375, 0.67138671875, 0.719390869140625, 0.76739501953125, 0.815399169921875, 0.8634033203125, 0.911407470703125, 0.95941162109375, 1.007415771484375, 1.055419921875, 1.103424072265625, 1.15142822265625, 1.199432373046875, 1.2474365234375, 1.295440673828125, 1.34344482421875, 1.391448974609375, 1.439453125]}, "gradients/encoder.encoder.layers.9.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 2.0, 0.0, 3.0, 12.0, 77.0, 519.0, 365.0, 32.0, 6.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-22.524555206298828, -20.250925064086914, -17.977294921875, -15.703664779663086, -13.430034637451172, -11.156404495239258, -8.882774353027344, -6.60914421081543, -4.335514068603516, -2.0618839263916016, 0.2117462158203125, 2.4853763580322266, 4.759006500244141, 7.032636642456055, 9.306266784667969, 11.579896926879883, 13.853527069091797, 16.12715721130371, 18.400787353515625, 20.67441749572754, 22.948047637939453, 25.221677780151367, 27.49530792236328, 29.768938064575195, 32.04256820678711, 34.316200256347656, 36.58982849121094, 38.86345672607422, 41.137088775634766, 43.41072082519531, 45.684349060058594, 47.957977294921875, 50.23161315917969, 52.50524139404297, 54.778873443603516, 57.05250549316406, 59.326133728027344, 61.599761962890625, 63.87339401245117, 66.14702606201172, 68.420654296875, 70.69428253173828, 72.96791076660156, 75.24154663085938, 77.51517486572266, 79.78880310058594, 82.06243896484375, 84.33606719970703, 86.60969543457031, 88.8833236694336, 91.15695190429688, 93.43058776855469, 95.70421600341797, 97.97784423828125, 100.25148010253906, 102.52510833740234, 104.79873657226562, 107.0723648071289, 109.34599304199219, 111.61962890625, 113.89325714111328, 116.16688537597656, 118.44052124023438, 120.71414947509766, 122.98777770996094]}, "gradients/encoder.encoder.layers.9.layer_norm.bias": {"_type": "histogram", "values": [3.0, 1.0, 0.0, 1.0, 2.0, 0.0, 1.0, 3.0, 3.0, 1.0, 7.0, 7.0, 10.0, 22.0, 26.0, 16.0, 21.0, 21.0, 42.0, 54.0, 51.0, 61.0, 55.0, 69.0, 56.0, 62.0, 65.0, 63.0, 41.0, 39.0, 36.0, 31.0, 30.0, 31.0, 16.0, 11.0, 23.0, 8.0, 11.0, 5.0, 4.0, 5.0, 3.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-13.545770645141602, -12.991360664367676, -12.43695068359375, -11.882540702819824, -11.328130722045898, -10.773721694946289, -10.219311714172363, -9.664901733398438, -9.110491752624512, -8.556081771850586, -8.00167179107666, -7.447262287139893, -6.892852306365967, -6.338442325592041, -5.784032821655273, -5.229622840881348, -4.675212860107422, -4.120802879333496, -3.5663931369781494, -3.0119833946228027, -2.457573413848877, -1.9031634330749512, -1.3487536907196045, -0.7943439483642578, -0.23993396759033203, 0.3144758939743042, 0.8688857555389404, 1.4232956171035767, 1.977705478668213, 2.5321154594421387, 3.0865252017974854, 3.640934944152832, 4.195343017578125, 4.749752998352051, 5.304162979125977, 5.858572483062744, 6.41298246383667, 6.967392444610596, 7.521801948547363, 8.076211929321289, 8.630621910095215, 9.18503189086914, 9.739441871643066, 10.293851852416992, 10.848260879516602, 11.402671813964844, 11.957080841064453, 12.511490821838379, 13.065900802612305, 13.62031078338623, 14.174720764160156, 14.729130744934082, 15.283540725708008, 15.837949752807617, 16.39236068725586, 16.94676971435547, 17.501178741455078, 18.055587768554688, 18.60999870300293, 19.16440773010254, 19.71881866455078, 20.27322769165039, 20.827638626098633, 21.382047653198242, 21.936458587646484]}, "gradients/encoder.encoder.layers.8.feed_forward.output_dense.weight": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 4.0, 2.0, 5.0, 4.0, 8.0, 7.0, 10.0, 17.0, 18.0, 27.0, 39.0, 68.0, 105.0, 166.0, 305.0, 449.0, 896.0, 1721.0, 4053.0, 12532.0, 62663.0, 3610412.0, 448723.0, 36775.0, 8850.0, 3195.0, 1476.0, 737.0, 417.0, 210.0, 131.0, 84.0, 55.0, 40.0, 27.0, 27.0, 7.0, 7.0, 5.0, 4.0, 3.0, 0.0, 3.0, 1.0, 2.0, 1.0, 4.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.69921875, -1.6441497802734375, -1.589080810546875, -1.5340118408203125, -1.47894287109375, -1.4238739013671875, -1.368804931640625, -1.3137359619140625, -1.2586669921875, -1.2035980224609375, -1.148529052734375, -1.0934600830078125, -1.03839111328125, -0.9833221435546875, -0.928253173828125, -0.8731842041015625, -0.818115234375, -0.7630462646484375, -0.707977294921875, -0.6529083251953125, -0.59783935546875, -0.5427703857421875, -0.487701416015625, -0.4326324462890625, -0.3775634765625, -0.3224945068359375, -0.267425537109375, -0.2123565673828125, -0.15728759765625, -0.1022186279296875, -0.047149658203125, 0.0079193115234375, 0.06298828125, 0.1180572509765625, 0.173126220703125, 0.2281951904296875, 0.28326416015625, 0.3383331298828125, 0.393402099609375, 0.4484710693359375, 0.5035400390625, 0.5586090087890625, 0.613677978515625, 0.6687469482421875, 0.72381591796875, 0.7788848876953125, 0.833953857421875, 0.8890228271484375, 0.944091796875, 0.9991607666015625, 1.054229736328125, 1.1092987060546875, 1.16436767578125, 1.2194366455078125, 1.274505615234375, 1.3295745849609375, 1.3846435546875, 1.4397125244140625, 1.494781494140625, 1.5498504638671875, 1.60491943359375, 1.6599884033203125, 1.715057373046875, 1.7701263427734375, 1.8251953125]}, "gradients/encoder.encoder.layers.8.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 3.0, 1.0, 1.0, 4.0, 6.0, 6.0, 9.0, 7.0, 14.0, 17.0, 24.0, 29.0, 40.0, 35.0, 73.0, 59.0, 84.0, 86.0, 72.0, 74.0, 73.0, 63.0, 53.0, 28.0, 42.0, 21.0, 26.0, 18.0, 13.0, 12.0, 5.0, 4.0, 3.0, 3.0, 3.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.037109375, -1.003997802734375, -0.97088623046875, -0.937774658203125, -0.9046630859375, -0.871551513671875, -0.83843994140625, -0.805328369140625, -0.772216796875, -0.739105224609375, -0.70599365234375, -0.672882080078125, -0.6397705078125, -0.606658935546875, -0.57354736328125, -0.540435791015625, -0.50732421875, -0.474212646484375, -0.44110107421875, -0.407989501953125, -0.3748779296875, -0.341766357421875, -0.30865478515625, -0.275543212890625, -0.242431640625, -0.209320068359375, -0.17620849609375, -0.143096923828125, -0.1099853515625, -0.076873779296875, -0.04376220703125, -0.010650634765625, 0.0224609375, 0.055572509765625, 0.08868408203125, 0.121795654296875, 0.1549072265625, 0.188018798828125, 0.22113037109375, 0.254241943359375, 0.287353515625, 0.320465087890625, 0.35357666015625, 0.386688232421875, 0.4197998046875, 0.452911376953125, 0.48602294921875, 0.519134521484375, 0.55224609375, 0.585357666015625, 0.61846923828125, 0.651580810546875, 0.6846923828125, 0.717803955078125, 0.75091552734375, 0.784027099609375, 0.817138671875, 0.850250244140625, 0.88336181640625, 0.916473388671875, 0.9495849609375, 0.982696533203125, 1.01580810546875, 1.048919677734375, 1.08203125]}, "gradients/encoder.encoder.layers.8.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 3.0, 3.0, 1.0, 7.0, 8.0, 9.0, 12.0, 26.0, 31.0, 55.0, 87.0, 153.0, 297.0, 458.0, 1049.0, 1903.0, 4323.0, 10937.0, 35063.0, 184502.0, 3572591.0, 310045.0, 48790.0, 13958.0, 5170.0, 2372.0, 1101.0, 574.0, 298.0, 197.0, 104.0, 63.0, 36.0, 25.0, 12.0, 7.0, 12.0, 6.0, 1.0, 2.0, 4.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.130859375, -1.0939178466796875, -1.056976318359375, -1.0200347900390625, -0.98309326171875, -0.9461517333984375, -0.909210205078125, -0.8722686767578125, -0.8353271484375, -0.7983856201171875, -0.761444091796875, -0.7245025634765625, -0.68756103515625, -0.6506195068359375, -0.613677978515625, -0.5767364501953125, -0.539794921875, -0.5028533935546875, -0.465911865234375, -0.4289703369140625, -0.39202880859375, -0.3550872802734375, -0.318145751953125, -0.2812042236328125, -0.2442626953125, -0.2073211669921875, -0.170379638671875, -0.1334381103515625, -0.09649658203125, -0.0595550537109375, -0.022613525390625, 0.0143280029296875, 0.05126953125, 0.0882110595703125, 0.125152587890625, 0.1620941162109375, 0.19903564453125, 0.2359771728515625, 0.272918701171875, 0.3098602294921875, 0.3468017578125, 0.3837432861328125, 0.420684814453125, 0.4576263427734375, 0.49456787109375, 0.5315093994140625, 0.568450927734375, 0.6053924560546875, 0.642333984375, 0.6792755126953125, 0.716217041015625, 0.7531585693359375, 0.79010009765625, 0.8270416259765625, 0.863983154296875, 0.9009246826171875, 0.9378662109375, 0.9748077392578125, 1.011749267578125, 1.0486907958984375, 1.08563232421875, 1.1225738525390625, 1.159515380859375, 1.1964569091796875, 1.2333984375]}, "gradients/encoder.encoder.layers.8.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 5.0, 1.0, 2.0, 3.0, 5.0, 6.0, 6.0, 10.0, 11.0, 16.0, 30.0, 38.0, 54.0, 86.0, 148.0, 288.0, 1735.0, 955.0, 278.0, 132.0, 99.0, 49.0, 35.0, 27.0, 25.0, 7.0, 6.0, 8.0, 5.0, 6.0, 5.0, 2.0, 0.0, 2.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.40380859375, -0.3862152099609375, -0.368621826171875, -0.3510284423828125, -0.33343505859375, -0.3158416748046875, -0.298248291015625, -0.2806549072265625, -0.2630615234375, -0.2454681396484375, -0.227874755859375, -0.2102813720703125, -0.19268798828125, -0.1750946044921875, -0.157501220703125, -0.1399078369140625, -0.122314453125, -0.1047210693359375, -0.087127685546875, -0.0695343017578125, -0.05194091796875, -0.0343475341796875, -0.016754150390625, 0.0008392333984375, 0.0184326171875, 0.0360260009765625, 0.053619384765625, 0.0712127685546875, 0.08880615234375, 0.1063995361328125, 0.123992919921875, 0.1415863037109375, 0.1591796875, 0.1767730712890625, 0.194366455078125, 0.2119598388671875, 0.22955322265625, 0.2471466064453125, 0.264739990234375, 0.2823333740234375, 0.2999267578125, 0.3175201416015625, 0.335113525390625, 0.3527069091796875, 0.37030029296875, 0.3878936767578125, 0.405487060546875, 0.4230804443359375, 0.440673828125, 0.4582672119140625, 0.475860595703125, 0.4934539794921875, 0.51104736328125, 0.5286407470703125, 0.546234130859375, 0.5638275146484375, 0.5814208984375, 0.5990142822265625, 0.616607666015625, 0.6342010498046875, 0.65179443359375, 0.6693878173828125, 0.686981201171875, 0.7045745849609375, 0.72216796875]}, "gradients/encoder.encoder.layers.8.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0, 2.0, 6.0, 2.0, 7.0, 11.0, 16.0, 33.0, 51.0, 76.0, 95.0, 127.0, 138.0, 118.0, 97.0, 81.0, 55.0, 31.0, 27.0, 13.0, 12.0, 3.0, 2.0, 3.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.822126865386963, -2.7299299240112305, -2.637733221054077, -2.545536518096924, -2.4533395767211914, -2.361142635345459, -2.2689459323883057, -2.1767492294311523, -2.08455228805542, -1.992355465888977, -1.9001586437225342, -1.8079618215560913, -1.7157649993896484, -1.6235681772232056, -1.5313713550567627, -1.4391745328903198, -1.346977710723877, -1.254780888557434, -1.1625840663909912, -1.0703872442245483, -0.9781904220581055, -0.8859935998916626, -0.7937967777252197, -0.7015999555587769, -0.609403133392334, -0.5172063112258911, -0.42500948905944824, -0.33281266689300537, -0.2406158447265625, -0.14841902256011963, -0.05622220039367676, 0.03597462177276611, 0.12817144393920898, 0.22036826610565186, 0.3125650882720947, 0.4047619104385376, 0.49695873260498047, 0.5891555547714233, 0.6813523769378662, 0.7735491991043091, 0.865746021270752, 0.9579428434371948, 1.0501396656036377, 1.1423364877700806, 1.2345333099365234, 1.3267301321029663, 1.4189269542694092, 1.511123776435852, 1.603320598602295, 1.6955174207687378, 1.7877142429351807, 1.8799110651016235, 1.9721078872680664, 2.064304828643799, 2.156501531600952, 2.2486982345581055, 2.340895175933838, 2.4330921173095703, 2.5252888202667236, 2.617485523223877, 2.7096824645996094, 2.801879405975342, 2.894076108932495, 2.9862728118896484, 3.078469753265381]}, "gradients/encoder.encoder.layers.8.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 1.0, 4.0, 1.0, 6.0, 4.0, 5.0, 5.0, 6.0, 18.0, 13.0, 15.0, 20.0, 20.0, 32.0, 38.0, 32.0, 39.0, 49.0, 48.0, 45.0, 55.0, 57.0, 49.0, 54.0, 58.0, 36.0, 47.0, 44.0, 41.0, 30.0, 27.0, 18.0, 11.0, 16.0, 15.0, 11.0, 10.0, 6.0, 12.0, 5.0, 1.0, 3.0, 2.0, 1.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0], "bins": [-2.2364330291748047, -2.172072410583496, -2.1077117919921875, -2.043351411819458, -1.9789907932281494, -1.9146301746368408, -1.8502695560455322, -1.7859089374542236, -1.7215484380722046, -1.657187819480896, -1.592827320098877, -1.5284667015075684, -1.4641060829162598, -1.3997455835342407, -1.3353849649429321, -1.271024465560913, -1.2066638469696045, -1.142303228378296, -1.0779427289962769, -1.0135821104049683, -0.9492215514183044, -0.8848609924316406, -0.820500373840332, -0.7561398148536682, -0.6917792558670044, -0.6274186968803406, -0.5630581378936768, -0.49869751930236816, -0.43433696031570435, -0.3699764013290405, -0.3056158125400543, -0.24125522375106812, -0.1768946647644043, -0.11253409087657928, -0.04817351698875427, 0.01618705689907074, 0.08054763078689575, 0.14490818977355957, 0.20926877856254578, 0.273629367351532, 0.3379899263381958, 0.4023504853248596, 0.4667110741138458, 0.531071662902832, 0.5954322218894958, 0.6597927808761597, 0.7241533994674683, 0.7885139584541321, 0.8528745174407959, 0.9172350764274597, 0.9815956354141235, 1.0459562540054321, 1.1103167533874512, 1.1746773719787598, 1.2390379905700684, 1.303398609161377, 1.367759108543396, 1.4321197271347046, 1.4964802265167236, 1.5608408451080322, 1.6252014636993408, 1.6895619630813599, 1.7539225816726685, 1.8182830810546875, 1.882643699645996]}, "gradients/encoder.encoder.layers.8.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 2.0, 2.0, 0.0, 1.0, 1.0, 5.0, 4.0, 4.0, 12.0, 16.0, 16.0, 42.0, 84.0, 172.0, 471.0, 1796.0, 11020.0, 159562.0, 792191.0, 74647.0, 6623.0, 1241.0, 348.0, 156.0, 56.0, 38.0, 16.0, 11.0, 8.0, 6.0, 2.0, 4.0, 4.0, 1.0, 3.0, 0.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-2.943359375, -2.835845947265625, -2.72833251953125, -2.620819091796875, -2.5133056640625, -2.405792236328125, -2.29827880859375, -2.190765380859375, -2.083251953125, -1.975738525390625, -1.86822509765625, -1.760711669921875, -1.6531982421875, -1.545684814453125, -1.43817138671875, -1.330657958984375, -1.22314453125, -1.115631103515625, -1.00811767578125, -0.900604248046875, -0.7930908203125, -0.685577392578125, -0.57806396484375, -0.470550537109375, -0.363037109375, -0.255523681640625, -0.14801025390625, -0.040496826171875, 0.0670166015625, 0.174530029296875, 0.28204345703125, 0.389556884765625, 0.4970703125, 0.604583740234375, 0.71209716796875, 0.819610595703125, 0.9271240234375, 1.034637451171875, 1.14215087890625, 1.249664306640625, 1.357177734375, 1.464691162109375, 1.57220458984375, 1.679718017578125, 1.7872314453125, 1.894744873046875, 2.00225830078125, 2.109771728515625, 2.21728515625, 2.324798583984375, 2.43231201171875, 2.539825439453125, 2.6473388671875, 2.754852294921875, 2.86236572265625, 2.969879150390625, 3.077392578125, 3.184906005859375, 3.29241943359375, 3.399932861328125, 3.5074462890625, 3.614959716796875, 3.72247314453125, 3.829986572265625, 3.9375]}, "gradients/encoder.encoder.layers.8.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 2.0, 7.0, 10.0, 3.0, 12.0, 6.0, 13.0, 13.0, 22.0, 26.0, 26.0, 34.0, 38.0, 51.0, 58.0, 67.0, 68.0, 72.0, 61.0, 67.0, 59.0, 66.0, 36.0, 30.0, 26.0, 28.0, 33.0, 16.0, 18.0, 13.0, 9.0, 5.0, 7.0, 1.0, 3.0, 1.0, 1.0, 2.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0], "bins": [-0.966796875, -0.9382095336914062, -0.9096221923828125, -0.8810348510742188, -0.852447509765625, -0.8238601684570312, -0.7952728271484375, -0.7666854858398438, -0.73809814453125, -0.7095108032226562, -0.6809234619140625, -0.6523361206054688, -0.623748779296875, -0.5951614379882812, -0.5665740966796875, -0.5379867553710938, -0.5093994140625, -0.48081207275390625, -0.4522247314453125, -0.42363739013671875, -0.395050048828125, -0.36646270751953125, -0.3378753662109375, -0.30928802490234375, -0.28070068359375, -0.25211334228515625, -0.2235260009765625, -0.19493865966796875, -0.166351318359375, -0.13776397705078125, -0.1091766357421875, -0.08058929443359375, -0.052001953125, -0.02341461181640625, 0.0051727294921875, 0.03376007080078125, 0.062347412109375, 0.09093475341796875, 0.1195220947265625, 0.14810943603515625, 0.17669677734375, 0.20528411865234375, 0.2338714599609375, 0.26245880126953125, 0.291046142578125, 0.31963348388671875, 0.3482208251953125, 0.37680816650390625, 0.4053955078125, 0.43398284912109375, 0.4625701904296875, 0.49115753173828125, 0.519744873046875, 0.5483322143554688, 0.5769195556640625, 0.6055068969726562, 0.63409423828125, 0.6626815795898438, 0.6912689208984375, 0.7198562622070312, 0.748443603515625, 0.7770309448242188, 0.8056182861328125, 0.8342056274414062, 0.86279296875]}, "gradients/encoder.encoder.layers.8.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 0.0, 3.0, 1.0, 4.0, 5.0, 2.0, 4.0, 6.0, 10.0, 10.0, 21.0, 30.0, 41.0, 52.0, 127.0, 180.0, 417.0, 861.0, 2532.0, 14022.0, 321077.0, 677931.0, 25437.0, 3575.0, 1166.0, 484.0, 214.0, 123.0, 80.0, 66.0, 28.0, 17.0, 10.0, 9.0, 7.0, 2.0, 2.0, 4.0, 2.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 2.0], "bins": [-3.703125, -3.5946044921875, -3.486083984375, -3.3775634765625, -3.26904296875, -3.1605224609375, -3.052001953125, -2.9434814453125, -2.8349609375, -2.7264404296875, -2.617919921875, -2.5093994140625, -2.40087890625, -2.2923583984375, -2.183837890625, -2.0753173828125, -1.966796875, -1.8582763671875, -1.749755859375, -1.6412353515625, -1.53271484375, -1.4241943359375, -1.315673828125, -1.2071533203125, -1.0986328125, -0.9901123046875, -0.881591796875, -0.7730712890625, -0.66455078125, -0.5560302734375, -0.447509765625, -0.3389892578125, -0.23046875, -0.1219482421875, -0.013427734375, 0.0950927734375, 0.20361328125, 0.3121337890625, 0.420654296875, 0.5291748046875, 0.6376953125, 0.7462158203125, 0.854736328125, 0.9632568359375, 1.07177734375, 1.1802978515625, 1.288818359375, 1.3973388671875, 1.505859375, 1.6143798828125, 1.722900390625, 1.8314208984375, 1.93994140625, 2.0484619140625, 2.156982421875, 2.2655029296875, 2.3740234375, 2.4825439453125, 2.591064453125, 2.6995849609375, 2.80810546875, 2.9166259765625, 3.025146484375, 3.1336669921875, 3.2421875]}, "gradients/encoder.encoder.layers.8.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 3.0, 3.0, 4.0, 3.0, 6.0, 6.0, 8.0, 10.0, 11.0, 14.0, 16.0, 15.0, 23.0, 24.0, 13.0, 27.0, 36.0, 42.0, 34.0, 38.0, 39.0, 41.0, 44.0, 40.0, 47.0, 37.0, 38.0, 46.0, 46.0, 31.0, 38.0, 30.0, 25.0, 21.0, 29.0, 25.0, 17.0, 12.0, 13.0, 8.0, 6.0, 6.0, 9.0, 6.0, 6.0, 5.0, 1.0, 3.0, 2.0, 3.0, 4.0, 0.0, 0.0, 1.0, 1.0], "bins": [-2.7265625, -2.6446533203125, -2.562744140625, -2.4808349609375, -2.39892578125, -2.3170166015625, -2.235107421875, -2.1531982421875, -2.0712890625, -1.9893798828125, -1.907470703125, -1.8255615234375, -1.74365234375, -1.6617431640625, -1.579833984375, -1.4979248046875, -1.416015625, -1.3341064453125, -1.252197265625, -1.1702880859375, -1.08837890625, -1.0064697265625, -0.924560546875, -0.8426513671875, -0.7607421875, -0.6788330078125, -0.596923828125, -0.5150146484375, -0.43310546875, -0.3511962890625, -0.269287109375, -0.1873779296875, -0.10546875, -0.0235595703125, 0.058349609375, 0.1402587890625, 0.22216796875, 0.3040771484375, 0.385986328125, 0.4678955078125, 0.5498046875, 0.6317138671875, 0.713623046875, 0.7955322265625, 0.87744140625, 0.9593505859375, 1.041259765625, 1.1231689453125, 1.205078125, 1.2869873046875, 1.368896484375, 1.4508056640625, 1.53271484375, 1.6146240234375, 1.696533203125, 1.7784423828125, 1.8603515625, 1.9422607421875, 2.024169921875, 2.1060791015625, 2.18798828125, 2.2698974609375, 2.351806640625, 2.4337158203125, 2.515625]}, "gradients/encoder.encoder.layers.8.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 0.0, 1.0, 0.0, 2.0, 5.0, 2.0, 3.0, 7.0, 13.0, 23.0, 31.0, 101.0, 205.0, 572.0, 2166.0, 19961.0, 802985.0, 213734.0, 6881.0, 1207.0, 345.0, 150.0, 88.0, 38.0, 19.0, 10.0, 5.0, 5.0, 7.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.525390625, -1.4870681762695312, -1.4487457275390625, -1.4104232788085938, -1.372100830078125, -1.3337783813476562, -1.2954559326171875, -1.2571334838867188, -1.21881103515625, -1.1804885864257812, -1.1421661376953125, -1.1038436889648438, -1.065521240234375, -1.0271987915039062, -0.9888763427734375, -0.9505538940429688, -0.9122314453125, -0.8739089965820312, -0.8355865478515625, -0.7972640991210938, -0.758941650390625, -0.7206192016601562, -0.6822967529296875, -0.6439743041992188, -0.60565185546875, -0.5673294067382812, -0.5290069580078125, -0.49068450927734375, -0.452362060546875, -0.41403961181640625, -0.3757171630859375, -0.33739471435546875, -0.299072265625, -0.26074981689453125, -0.2224273681640625, -0.18410491943359375, -0.145782470703125, -0.10746002197265625, -0.0691375732421875, -0.03081512451171875, 0.00750732421875, 0.04582977294921875, 0.0841522216796875, 0.12247467041015625, 0.160797119140625, 0.19911956787109375, 0.2374420166015625, 0.27576446533203125, 0.3140869140625, 0.35240936279296875, 0.3907318115234375, 0.42905426025390625, 0.467376708984375, 0.5056991577148438, 0.5440216064453125, 0.5823440551757812, 0.62066650390625, 0.6589889526367188, 0.6973114013671875, 0.7356338500976562, 0.773956298828125, 0.8122787475585938, 0.8506011962890625, 0.8889236450195312, 0.92724609375]}, "gradients/encoder.encoder.layers.8.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 0.0, 2.0, 1.0, 7.0, 4.0, 6.0, 4.0, 15.0, 6.0, 9.0, 13.0, 24.0, 17.0, 25.0, 31.0, 35.0, 44.0, 46.0, 66.0, 80.0, 93.0, 77.0, 65.0, 65.0, 38.0, 52.0, 31.0, 34.0, 18.0, 28.0, 15.0, 11.0, 9.0, 9.0, 6.0, 5.0, 5.0, 5.0, 3.0, 2.0, 3.0, 4.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0], "bins": [-8.273124694824219e-05, -8.041225373744965e-05, -7.80932605266571e-05, -7.577426731586456e-05, -7.345527410507202e-05, -7.113628089427948e-05, -6.881728768348694e-05, -6.64982944726944e-05, -6.417930126190186e-05, -6.186030805110931e-05, -5.954131484031677e-05, -5.722232162952423e-05, -5.490332841873169e-05, -5.258433520793915e-05, -5.0265341997146606e-05, -4.7946348786354065e-05, -4.5627355575561523e-05, -4.330836236476898e-05, -4.098936915397644e-05, -3.86703759431839e-05, -3.635138273239136e-05, -3.4032389521598816e-05, -3.1713396310806274e-05, -2.9394403100013733e-05, -2.707540988922119e-05, -2.475641667842865e-05, -2.243742346763611e-05, -2.0118430256843567e-05, -1.7799437046051025e-05, -1.5480443835258484e-05, -1.3161450624465942e-05, -1.0842457413673401e-05, -8.52346420288086e-06, -6.204470992088318e-06, -3.885477781295776e-06, -1.5664845705032349e-06, 7.525086402893066e-07, 3.071501851081848e-06, 5.39049506187439e-06, 7.709488272666931e-06, 1.0028481483459473e-05, 1.2347474694252014e-05, 1.4666467905044556e-05, 1.6985461115837097e-05, 1.930445432662964e-05, 2.162344753742218e-05, 2.394244074821472e-05, 2.6261433959007263e-05, 2.8580427169799805e-05, 3.0899420380592346e-05, 3.321841359138489e-05, 3.553740680217743e-05, 3.785640001296997e-05, 4.017539322376251e-05, 4.2494386434555054e-05, 4.4813379645347595e-05, 4.713237285614014e-05, 4.945136606693268e-05, 5.177035927772522e-05, 5.408935248851776e-05, 5.64083456993103e-05, 5.8727338910102844e-05, 6.104633212089539e-05, 6.336532533168793e-05, 6.568431854248047e-05]}, "gradients/encoder.encoder.layers.8.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0, 2.0, 2.0, 3.0, 4.0, 6.0, 2.0, 9.0, 11.0, 15.0, 26.0, 31.0, 69.0, 106.0, 221.0, 488.0, 1252.0, 4332.0, 35015.0, 737976.0, 252342.0, 12548.0, 2540.0, 844.0, 352.0, 138.0, 82.0, 46.0, 39.0, 21.0, 13.0, 4.0, 11.0, 4.0, 3.0, 2.0, 1.0, 0.0, 2.0, 2.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.0791015625, -1.0489578247070312, -1.0188140869140625, -0.9886703491210938, -0.958526611328125, -0.9283828735351562, -0.8982391357421875, -0.8680953979492188, -0.83795166015625, -0.8078079223632812, -0.7776641845703125, -0.7475204467773438, -0.717376708984375, -0.6872329711914062, -0.6570892333984375, -0.6269454956054688, -0.5968017578125, -0.5666580200195312, -0.5365142822265625, -0.5063705444335938, -0.476226806640625, -0.44608306884765625, -0.4159393310546875, -0.38579559326171875, -0.35565185546875, -0.32550811767578125, -0.2953643798828125, -0.26522064208984375, -0.235076904296875, -0.20493316650390625, -0.1747894287109375, -0.14464569091796875, -0.114501953125, -0.08435821533203125, -0.0542144775390625, -0.02407073974609375, 0.006072998046875, 0.03621673583984375, 0.0663604736328125, 0.09650421142578125, 0.12664794921875, 0.15679168701171875, 0.1869354248046875, 0.21707916259765625, 0.247222900390625, 0.27736663818359375, 0.3075103759765625, 0.33765411376953125, 0.3677978515625, 0.39794158935546875, 0.4280853271484375, 0.45822906494140625, 0.488372802734375, 0.5185165405273438, 0.5486602783203125, 0.5788040161132812, 0.60894775390625, 0.6390914916992188, 0.6692352294921875, 0.6993789672851562, 0.729522705078125, 0.7596664428710938, 0.7898101806640625, 0.8199539184570312, 0.85009765625]}, "gradients/encoder.encoder.layers.8.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 2.0, 0.0, 1.0, 5.0, 3.0, 5.0, 9.0, 11.0, 4.0, 14.0, 21.0, 32.0, 39.0, 48.0, 47.0, 77.0, 108.0, 108.0, 102.0, 98.0, 66.0, 65.0, 46.0, 22.0, 20.0, 20.0, 12.0, 9.0, 6.0, 5.0, 5.0, 1.0, 2.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.705078125, -0.6812744140625, -0.657470703125, -0.6336669921875, -0.60986328125, -0.5860595703125, -0.562255859375, -0.5384521484375, -0.5146484375, -0.4908447265625, -0.467041015625, -0.4432373046875, -0.41943359375, -0.3956298828125, -0.371826171875, -0.3480224609375, -0.32421875, -0.3004150390625, -0.276611328125, -0.2528076171875, -0.22900390625, -0.2052001953125, -0.181396484375, -0.1575927734375, -0.1337890625, -0.1099853515625, -0.086181640625, -0.0623779296875, -0.03857421875, -0.0147705078125, 0.009033203125, 0.0328369140625, 0.056640625, 0.0804443359375, 0.104248046875, 0.1280517578125, 0.15185546875, 0.1756591796875, 0.199462890625, 0.2232666015625, 0.2470703125, 0.2708740234375, 0.294677734375, 0.3184814453125, 0.34228515625, 0.3660888671875, 0.389892578125, 0.4136962890625, 0.4375, 0.4613037109375, 0.485107421875, 0.5089111328125, 0.53271484375, 0.5565185546875, 0.580322265625, 0.6041259765625, 0.6279296875, 0.6517333984375, 0.675537109375, 0.6993408203125, 0.72314453125, 0.7469482421875, 0.770751953125, 0.7945556640625, 0.818359375]}, "gradients/encoder.encoder.layers.8.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 2.0, 1.0, 4.0, 8.0, 18.0, 29.0, 53.0, 103.0, 150.0, 204.0, 188.0, 119.0, 71.0, 26.0, 14.0, 10.0, 2.0, 4.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-16.43756866455078, -15.863180160522461, -15.28879165649414, -14.71440315246582, -14.1400146484375, -13.56562614440918, -12.991238594055176, -12.416850090026855, -11.842461585998535, -11.268073081970215, -10.693684577941895, -10.119296073913574, -9.54490852355957, -8.97052001953125, -8.39613151550293, -7.821743011474609, -7.247354507446289, -6.672966003417969, -6.098577499389648, -5.524189472198486, -4.949800968170166, -4.375412464141846, -3.8010241985321045, -3.2266359329223633, -2.652247428894043, -2.0778589248657227, -1.5034706592559814, -0.9290822744369507, -0.3546938896179199, 0.2196946144104004, 0.7940828800201416, 1.3684711456298828, 1.942861557006836, 2.5172500610351562, 3.0916383266448975, 3.6660265922546387, 4.240415096282959, 4.814803600311279, 5.389191627502441, 5.963580131530762, 6.537968635559082, 7.112357139587402, 7.686745643615723, 8.261134147644043, 8.835521697998047, 9.409910202026367, 9.984298706054688, 10.558687210083008, 11.133075714111328, 11.707464218139648, 12.281852722167969, 12.856241226196289, 13.43062973022461, 14.00501823425293, 14.579405784606934, 15.153794288635254, 15.728182792663574, 16.302570343017578, 16.8769588470459, 17.45134735107422, 18.02573585510254, 18.60012435913086, 19.17451286315918, 19.7489013671875, 20.32328987121582]}, "gradients/encoder.encoder.layers.8.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 3.0, 2.0, 4.0, 2.0, 3.0, 7.0, 10.0, 8.0, 15.0, 9.0, 20.0, 13.0, 23.0, 34.0, 25.0, 46.0, 32.0, 48.0, 45.0, 49.0, 59.0, 52.0, 44.0, 63.0, 54.0, 51.0, 41.0, 39.0, 42.0, 28.0, 28.0, 21.0, 9.0, 19.0, 14.0, 8.0, 10.0, 8.0, 3.0, 5.0, 4.0, 6.0, 3.0, 4.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-15.533100128173828, -15.084814071655273, -14.636527061462402, -14.188241004943848, -13.739953994750977, -13.291667938232422, -12.843381881713867, -12.395095825195312, -11.946808815002441, -11.498522758483887, -11.050235748291016, -10.601949691772461, -10.153663635253906, -9.705376625061035, -9.25709056854248, -8.80880355834961, -8.360517501831055, -7.912230968475342, -7.463944435119629, -7.015658378601074, -6.567371845245361, -6.119085311889648, -5.670799255371094, -5.222512722015381, -4.774226188659668, -4.325939655303955, -3.8776533603668213, -3.4293670654296875, -2.9810805320739746, -2.5327939987182617, -2.084507703781128, -1.6362214088439941, -1.1879348754882812, -0.7396484613418579, -0.29136204719543457, 0.15692436695098877, 0.6052107810974121, 1.053497314453125, 1.5017836093902588, 1.9500699043273926, 2.3983564376831055, 2.8466429710388184, 3.294929265975952, 3.743215560913086, 4.191502094268799, 4.639788627624512, 5.088074684143066, 5.536361217498779, 5.984647750854492, 6.432934284210205, 6.881220817565918, 7.329506874084473, 7.7777934074401855, 8.226079940795898, 8.674365997314453, 9.122652053833008, 9.570939064025879, 10.019225120544434, 10.467512130737305, 10.91579818725586, 11.364084243774414, 11.812371253967285, 12.26065731048584, 12.708944320678711, 13.157230377197266]}, "gradients/encoder.encoder.layers.7.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 2.0, 2.0, 5.0, 1.0, 0.0, 3.0, 8.0, 12.0, 8.0, 19.0, 28.0, 54.0, 84.0, 163.0, 468.0, 1732.0, 12918.0, 598551.0, 3540691.0, 35028.0, 3305.0, 715.0, 238.0, 100.0, 54.0, 35.0, 15.0, 17.0, 12.0, 4.0, 6.0, 3.0, 1.0, 3.0, 2.0, 3.0, 2.0, 0.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.3125, -2.233245849609375, -2.15399169921875, -2.074737548828125, -1.9954833984375, -1.916229248046875, -1.83697509765625, -1.757720947265625, -1.678466796875, -1.599212646484375, -1.51995849609375, -1.440704345703125, -1.3614501953125, -1.282196044921875, -1.20294189453125, -1.123687744140625, -1.04443359375, -0.965179443359375, -0.88592529296875, -0.806671142578125, -0.7274169921875, -0.648162841796875, -0.56890869140625, -0.489654541015625, -0.410400390625, -0.331146240234375, -0.25189208984375, -0.172637939453125, -0.0933837890625, -0.014129638671875, 0.06512451171875, 0.144378662109375, 0.2236328125, 0.302886962890625, 0.38214111328125, 0.461395263671875, 0.5406494140625, 0.619903564453125, 0.69915771484375, 0.778411865234375, 0.857666015625, 0.936920166015625, 1.01617431640625, 1.095428466796875, 1.1746826171875, 1.253936767578125, 1.33319091796875, 1.412445068359375, 1.49169921875, 1.570953369140625, 1.65020751953125, 1.729461669921875, 1.8087158203125, 1.887969970703125, 1.96722412109375, 2.046478271484375, 2.125732421875, 2.204986572265625, 2.28424072265625, 2.363494873046875, 2.4427490234375, 2.522003173828125, 2.60125732421875, 2.680511474609375, 2.759765625]}, "gradients/encoder.encoder.layers.7.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 2.0, 4.0, 2.0, 2.0, 7.0, 9.0, 10.0, 15.0, 14.0, 23.0, 39.0, 49.0, 52.0, 55.0, 72.0, 72.0, 62.0, 86.0, 65.0, 62.0, 66.0, 39.0, 59.0, 34.0, 26.0, 20.0, 29.0, 11.0, 13.0, 4.0, 4.0, 1.0, 2.0, 3.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.96728515625, -0.9398193359375, -0.912353515625, -0.8848876953125, -0.857421875, -0.8299560546875, -0.802490234375, -0.7750244140625, -0.74755859375, -0.7200927734375, -0.692626953125, -0.6651611328125, -0.6376953125, -0.6102294921875, -0.582763671875, -0.5552978515625, -0.52783203125, -0.5003662109375, -0.472900390625, -0.4454345703125, -0.41796875, -0.3905029296875, -0.363037109375, -0.3355712890625, -0.30810546875, -0.2806396484375, -0.253173828125, -0.2257080078125, -0.1982421875, -0.1707763671875, -0.143310546875, -0.1158447265625, -0.08837890625, -0.0609130859375, -0.033447265625, -0.0059814453125, 0.021484375, 0.0489501953125, 0.076416015625, 0.1038818359375, 0.13134765625, 0.1588134765625, 0.186279296875, 0.2137451171875, 0.2412109375, 0.2686767578125, 0.296142578125, 0.3236083984375, 0.35107421875, 0.3785400390625, 0.406005859375, 0.4334716796875, 0.4609375, 0.4884033203125, 0.515869140625, 0.5433349609375, 0.57080078125, 0.5982666015625, 0.625732421875, 0.6531982421875, 0.6806640625, 0.7081298828125, 0.735595703125, 0.7630615234375, 0.79052734375]}, "gradients/encoder.encoder.layers.7.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 0.0, 2.0, 0.0, 1.0, 1.0, 2.0, 4.0, 2.0, 2.0, 8.0, 10.0, 12.0, 22.0, 29.0, 31.0, 87.0, 144.0, 415.0, 1274.0, 5908.0, 50866.0, 3402375.0, 705490.0, 23158.0, 3261.0, 736.0, 232.0, 78.0, 51.0, 23.0, 11.0, 13.0, 10.0, 5.0, 8.0, 5.0, 6.0, 6.0, 0.0, 0.0, 1.0, 1.0, 1.0, 3.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0], "bins": [-2.28125, -2.21759033203125, -2.1539306640625, -2.09027099609375, -2.026611328125, -1.96295166015625, -1.8992919921875, -1.83563232421875, -1.77197265625, -1.70831298828125, -1.6446533203125, -1.58099365234375, -1.517333984375, -1.45367431640625, -1.3900146484375, -1.32635498046875, -1.2626953125, -1.19903564453125, -1.1353759765625, -1.07171630859375, -1.008056640625, -0.94439697265625, -0.8807373046875, -0.81707763671875, -0.75341796875, -0.68975830078125, -0.6260986328125, -0.56243896484375, -0.498779296875, -0.43511962890625, -0.3714599609375, -0.30780029296875, -0.244140625, -0.18048095703125, -0.1168212890625, -0.05316162109375, 0.010498046875, 0.07415771484375, 0.1378173828125, 0.20147705078125, 0.26513671875, 0.32879638671875, 0.3924560546875, 0.45611572265625, 0.519775390625, 0.58343505859375, 0.6470947265625, 0.71075439453125, 0.7744140625, 0.83807373046875, 0.9017333984375, 0.96539306640625, 1.029052734375, 1.09271240234375, 1.1563720703125, 1.22003173828125, 1.28369140625, 1.34735107421875, 1.4110107421875, 1.47467041015625, 1.538330078125, 1.60198974609375, 1.6656494140625, 1.72930908203125, 1.79296875]}, "gradients/encoder.encoder.layers.7.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 2.0, 1.0, 1.0, 1.0, 2.0, 0.0, 1.0, 2.0, 1.0, 1.0, 3.0, 2.0, 7.0, 5.0, 1.0, 9.0, 14.0, 18.0, 21.0, 37.0, 71.0, 124.0, 258.0, 512.0, 924.0, 894.0, 549.0, 251.0, 123.0, 82.0, 48.0, 30.0, 22.0, 19.0, 19.0, 11.0, 8.0, 3.0, 2.0, 2.0, 5.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.6767578125, -1.6275177001953125, -1.578277587890625, -1.5290374755859375, -1.47979736328125, -1.4305572509765625, -1.381317138671875, -1.3320770263671875, -1.2828369140625, -1.2335968017578125, -1.184356689453125, -1.1351165771484375, -1.08587646484375, -1.0366363525390625, -0.987396240234375, -0.9381561279296875, -0.888916015625, -0.8396759033203125, -0.790435791015625, -0.7411956787109375, -0.69195556640625, -0.6427154541015625, -0.593475341796875, -0.5442352294921875, -0.4949951171875, -0.4457550048828125, -0.396514892578125, -0.3472747802734375, -0.29803466796875, -0.2487945556640625, -0.199554443359375, -0.1503143310546875, -0.10107421875, -0.0518341064453125, -0.002593994140625, 0.0466461181640625, 0.09588623046875, 0.1451263427734375, 0.194366455078125, 0.2436065673828125, 0.2928466796875, 0.3420867919921875, 0.391326904296875, 0.4405670166015625, 0.48980712890625, 0.5390472412109375, 0.588287353515625, 0.6375274658203125, 0.686767578125, 0.7360076904296875, 0.785247802734375, 0.8344879150390625, 0.88372802734375, 0.9329681396484375, 0.982208251953125, 1.0314483642578125, 1.0806884765625, 1.1299285888671875, 1.179168701171875, 1.2284088134765625, 1.27764892578125, 1.3268890380859375, 1.376129150390625, 1.4253692626953125, 1.474609375]}, "gradients/encoder.encoder.layers.7.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 8.0, 4.0, 19.0, 65.0, 160.0, 295.0, 259.0, 126.0, 30.0, 21.0, 7.0, 5.0, 5.0, 2.0, 1.0, 2.0, 0.0, 1.0, 1.0, 2.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-20.698305130004883, -20.1650333404541, -19.631763458251953, -19.098491668701172, -18.56521987915039, -18.03194808959961, -17.498676300048828, -16.96540641784668, -16.4321346282959, -15.898862838745117, -15.365592002868652, -14.832321166992188, -14.299049377441406, -13.765777587890625, -13.23250675201416, -12.699235916137695, -12.165964126586914, -11.632692337036133, -11.099421501159668, -10.566150665283203, -10.032878875732422, -9.49960708618164, -8.966336250305176, -8.433065414428711, -7.89979362487793, -7.366522312164307, -6.833250999450684, -6.2999796867370605, -5.7667083740234375, -5.2334370613098145, -4.700165748596191, -4.166894435882568, -3.633625030517578, -3.100353717803955, -2.567082405090332, -2.033811092376709, -1.500539779663086, -0.9672684669494629, -0.43399715423583984, 0.0992741584777832, 0.6325454711914062, 1.1658167839050293, 1.6990880966186523, 2.2323594093322754, 2.7656307220458984, 3.2989020347595215, 3.8321733474731445, 4.365444660186768, 4.898715972900391, 5.431987285614014, 5.965258598327637, 6.49852991104126, 7.031801223754883, 7.565072536468506, 8.098343849182129, 8.631614685058594, 9.164886474609375, 9.698158264160156, 10.231429100036621, 10.764699935913086, 11.297971725463867, 11.831243515014648, 12.364514350891113, 12.897785186767578, 13.43105697631836]}, "gradients/encoder.encoder.layers.7.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 4.0, 2.0, 2.0, 3.0, 3.0, 5.0, 7.0, 7.0, 23.0, 11.0, 23.0, 22.0, 25.0, 32.0, 37.0, 49.0, 47.0, 60.0, 59.0, 58.0, 56.0, 46.0, 47.0, 62.0, 50.0, 39.0, 39.0, 32.0, 40.0, 34.0, 20.0, 10.0, 12.0, 13.0, 5.0, 6.0, 7.0, 7.0, 5.0, 2.0, 5.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0], "bins": [-7.867152690887451, -7.651735782623291, -7.436319351196289, -7.220902442932129, -7.005485534667969, -6.790068626403809, -6.574651718139648, -6.3592352867126465, -6.143818378448486, -5.928401470184326, -5.712985038757324, -5.497568130493164, -5.282151222229004, -5.066734313964844, -4.851317405700684, -4.635900974273682, -4.4204840660095215, -4.205067157745361, -3.9896504878997803, -3.774233818054199, -3.558816909790039, -3.343400001525879, -3.127983331680298, -2.912566661834717, -2.6971497535705566, -2.4817328453063965, -2.2663161754608154, -2.0508995056152344, -1.8354825973510742, -1.6200658082962036, -1.404649019241333, -1.1892322301864624, -0.9738154411315918, -0.7583986520767212, -0.5429818630218506, -0.32756507396698, -0.11214828491210938, 0.10326850414276123, 0.31868529319763184, 0.5341020822525024, 0.749518871307373, 0.9649356603622437, 1.1803524494171143, 1.3957692384719849, 1.6111860275268555, 1.826602816581726, 2.0420196056365967, 2.2574362754821777, 2.472853183746338, 2.688270092010498, 2.903686761856079, 3.11910343170166, 3.3345203399658203, 3.5499372482299805, 3.7653539180755615, 3.9807705879211426, 4.196187496185303, 4.411604404449463, 4.627020835876465, 4.842437744140625, 5.057854652404785, 5.273271560668945, 5.4886884689331055, 5.704104900360107, 5.919521808624268]}, "gradients/encoder.encoder.layers.7.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 1.0, 2.0, 2.0, 3.0, 6.0, 12.0, 15.0, 16.0, 28.0, 44.0, 67.0, 138.0, 210.0, 331.0, 615.0, 1095.0, 2040.0, 3989.0, 8894.0, 20999.0, 54873.0, 153538.0, 361619.0, 275041.0, 100494.0, 36803.0, 14600.0, 6372.0, 3132.0, 1570.0, 833.0, 472.0, 297.0, 165.0, 88.0, 60.0, 34.0, 24.0, 12.0, 9.0, 8.0, 6.0, 3.0, 1.0, 5.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.85595703125, -0.8281631469726562, -0.8003692626953125, -0.7725753784179688, -0.744781494140625, -0.7169876098632812, -0.6891937255859375, -0.6613998413085938, -0.63360595703125, -0.6058120727539062, -0.5780181884765625, -0.5502243041992188, -0.522430419921875, -0.49463653564453125, -0.4668426513671875, -0.43904876708984375, -0.4112548828125, -0.38346099853515625, -0.3556671142578125, -0.32787322998046875, -0.300079345703125, -0.27228546142578125, -0.2444915771484375, -0.21669769287109375, -0.18890380859375, -0.16110992431640625, -0.1333160400390625, -0.10552215576171875, -0.077728271484375, -0.04993438720703125, -0.0221405029296875, 0.00565338134765625, 0.033447265625, 0.06124114990234375, 0.0890350341796875, 0.11682891845703125, 0.144622802734375, 0.17241668701171875, 0.2002105712890625, 0.22800445556640625, 0.25579833984375, 0.28359222412109375, 0.3113861083984375, 0.33917999267578125, 0.366973876953125, 0.39476776123046875, 0.4225616455078125, 0.45035552978515625, 0.4781494140625, 0.5059432983398438, 0.5337371826171875, 0.5615310668945312, 0.589324951171875, 0.6171188354492188, 0.6449127197265625, 0.6727066040039062, 0.70050048828125, 0.7282943725585938, 0.7560882568359375, 0.7838821411132812, 0.811676025390625, 0.8394699096679688, 0.8672637939453125, 0.8950576782226562, 0.9228515625]}, "gradients/encoder.encoder.layers.7.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 5.0, 3.0, 5.0, 5.0, 6.0, 10.0, 16.0, 24.0, 25.0, 42.0, 49.0, 64.0, 59.0, 62.0, 83.0, 80.0, 84.0, 81.0, 70.0, 63.0, 47.0, 33.0, 35.0, 14.0, 19.0, 16.0, 6.0, 5.0, 2.0, 2.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.98681640625, -0.958221435546875, -0.92962646484375, -0.901031494140625, -0.8724365234375, -0.843841552734375, -0.81524658203125, -0.786651611328125, -0.758056640625, -0.729461669921875, -0.70086669921875, -0.672271728515625, -0.6436767578125, -0.615081787109375, -0.58648681640625, -0.557891845703125, -0.529296875, -0.500701904296875, -0.47210693359375, -0.443511962890625, -0.4149169921875, -0.386322021484375, -0.35772705078125, -0.329132080078125, -0.300537109375, -0.271942138671875, -0.24334716796875, -0.214752197265625, -0.1861572265625, -0.157562255859375, -0.12896728515625, -0.100372314453125, -0.07177734375, -0.043182373046875, -0.01458740234375, 0.014007568359375, 0.0426025390625, 0.071197509765625, 0.09979248046875, 0.128387451171875, 0.156982421875, 0.185577392578125, 0.21417236328125, 0.242767333984375, 0.2713623046875, 0.299957275390625, 0.32855224609375, 0.357147216796875, 0.3857421875, 0.414337158203125, 0.44293212890625, 0.471527099609375, 0.5001220703125, 0.528717041015625, 0.55731201171875, 0.585906982421875, 0.614501953125, 0.643096923828125, 0.67169189453125, 0.700286865234375, 0.7288818359375, 0.757476806640625, 0.78607177734375, 0.814666748046875, 0.84326171875]}, "gradients/encoder.encoder.layers.7.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 4.0, 3.0, 5.0, 3.0, 13.0, 13.0, 30.0, 24.0, 57.0, 60.0, 99.0, 148.0, 249.0, 518.0, 800.0, 1844.0, 6255.0, 55964.0, 813623.0, 152719.0, 10976.0, 2617.0, 1096.0, 557.0, 318.0, 220.0, 118.0, 74.0, 48.0, 40.0, 15.0, 19.0, 11.0, 8.0, 9.0, 1.0, 1.0, 1.0, 3.0, 0.0, 1.0, 2.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-2.361328125, -2.289031982421875, -2.21673583984375, -2.144439697265625, -2.0721435546875, -1.999847412109375, -1.92755126953125, -1.855255126953125, -1.782958984375, -1.710662841796875, -1.63836669921875, -1.566070556640625, -1.4937744140625, -1.421478271484375, -1.34918212890625, -1.276885986328125, -1.20458984375, -1.132293701171875, -1.05999755859375, -0.987701416015625, -0.9154052734375, -0.843109130859375, -0.77081298828125, -0.698516845703125, -0.626220703125, -0.553924560546875, -0.48162841796875, -0.409332275390625, -0.3370361328125, -0.264739990234375, -0.19244384765625, -0.120147705078125, -0.0478515625, 0.024444580078125, 0.09674072265625, 0.169036865234375, 0.2413330078125, 0.313629150390625, 0.38592529296875, 0.458221435546875, 0.530517578125, 0.602813720703125, 0.67510986328125, 0.747406005859375, 0.8197021484375, 0.891998291015625, 0.96429443359375, 1.036590576171875, 1.10888671875, 1.181182861328125, 1.25347900390625, 1.325775146484375, 1.3980712890625, 1.470367431640625, 1.54266357421875, 1.614959716796875, 1.687255859375, 1.759552001953125, 1.83184814453125, 1.904144287109375, 1.9764404296875, 2.048736572265625, 2.12103271484375, 2.193328857421875, 2.265625]}, "gradients/encoder.encoder.layers.7.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 3.0, 0.0, 1.0, 2.0, 3.0, 5.0, 4.0, 1.0, 8.0, 5.0, 7.0, 10.0, 6.0, 16.0, 10.0, 16.0, 18.0, 15.0, 16.0, 19.0, 30.0, 32.0, 37.0, 33.0, 40.0, 44.0, 46.0, 39.0, 46.0, 42.0, 44.0, 39.0, 38.0, 38.0, 35.0, 32.0, 32.0, 36.0, 26.0, 23.0, 26.0, 17.0, 10.0, 18.0, 12.0, 10.0, 8.0, 3.0, 2.0, 3.0, 3.0, 2.0, 2.0, 1.0, 0.0, 3.0, 1.0, 0.0, 2.0, 2.0, 1.0], "bins": [-2.0078125, -1.944610595703125, -1.88140869140625, -1.818206787109375, -1.7550048828125, -1.691802978515625, -1.62860107421875, -1.565399169921875, -1.502197265625, -1.438995361328125, -1.37579345703125, -1.312591552734375, -1.2493896484375, -1.186187744140625, -1.12298583984375, -1.059783935546875, -0.99658203125, -0.933380126953125, -0.87017822265625, -0.806976318359375, -0.7437744140625, -0.680572509765625, -0.61737060546875, -0.554168701171875, -0.490966796875, -0.427764892578125, -0.36456298828125, -0.301361083984375, -0.2381591796875, -0.174957275390625, -0.11175537109375, -0.048553466796875, 0.0146484375, 0.077850341796875, 0.14105224609375, 0.204254150390625, 0.2674560546875, 0.330657958984375, 0.39385986328125, 0.457061767578125, 0.520263671875, 0.583465576171875, 0.64666748046875, 0.709869384765625, 0.7730712890625, 0.836273193359375, 0.89947509765625, 0.962677001953125, 1.02587890625, 1.089080810546875, 1.15228271484375, 1.215484619140625, 1.2786865234375, 1.341888427734375, 1.40509033203125, 1.468292236328125, 1.531494140625, 1.594696044921875, 1.65789794921875, 1.721099853515625, 1.7843017578125, 1.847503662109375, 1.91070556640625, 1.973907470703125, 2.037109375]}, "gradients/encoder.encoder.layers.7.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 3.0, 0.0, 0.0, 1.0, 2.0, 4.0, 3.0, 4.0, 1.0, 7.0, 19.0, 14.0, 19.0, 23.0, 40.0, 74.0, 138.0, 233.0, 390.0, 994.0, 3136.0, 20259.0, 563784.0, 436430.0, 18105.0, 2948.0, 981.0, 408.0, 199.0, 129.0, 71.0, 39.0, 21.0, 18.0, 15.0, 14.0, 7.0, 6.0, 7.0, 1.0, 2.0, 5.0, 4.0, 6.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 4.0], "bins": [-0.96337890625, -0.9358367919921875, -0.908294677734375, -0.8807525634765625, -0.85321044921875, -0.8256683349609375, -0.798126220703125, -0.7705841064453125, -0.7430419921875, -0.7154998779296875, -0.687957763671875, -0.6604156494140625, -0.63287353515625, -0.6053314208984375, -0.577789306640625, -0.5502471923828125, -0.522705078125, -0.4951629638671875, -0.467620849609375, -0.4400787353515625, -0.41253662109375, -0.3849945068359375, -0.357452392578125, -0.3299102783203125, -0.3023681640625, -0.2748260498046875, -0.247283935546875, -0.2197418212890625, -0.19219970703125, -0.1646575927734375, -0.137115478515625, -0.1095733642578125, -0.08203125, -0.0544891357421875, -0.026947021484375, 0.0005950927734375, 0.02813720703125, 0.0556793212890625, 0.083221435546875, 0.1107635498046875, 0.1383056640625, 0.1658477783203125, 0.193389892578125, 0.2209320068359375, 0.24847412109375, 0.2760162353515625, 0.303558349609375, 0.3311004638671875, 0.358642578125, 0.3861846923828125, 0.413726806640625, 0.4412689208984375, 0.46881103515625, 0.4963531494140625, 0.523895263671875, 0.5514373779296875, 0.5789794921875, 0.6065216064453125, 0.634063720703125, 0.6616058349609375, 0.68914794921875, 0.7166900634765625, 0.744232177734375, 0.7717742919921875, 0.79931640625]}, "gradients/encoder.encoder.layers.7.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 2.0, 2.0, 4.0, 2.0, 8.0, 6.0, 6.0, 12.0, 5.0, 9.0, 7.0, 13.0, 18.0, 29.0, 25.0, 32.0, 36.0, 52.0, 65.0, 84.0, 91.0, 86.0, 84.0, 66.0, 52.0, 46.0, 26.0, 29.0, 16.0, 16.0, 10.0, 13.0, 9.0, 12.0, 8.0, 9.0, 1.0, 6.0, 1.0, 1.0, 2.0, 2.0, 1.0, 2.0, 0.0, 4.0, 1.0, 0.0, 2.0, 1.0, 1.0], "bins": [-8.064508438110352e-05, -7.827114313840866e-05, -7.58972018957138e-05, -7.352326065301895e-05, -7.11493194103241e-05, -6.877537816762924e-05, -6.640143692493439e-05, -6.402749568223953e-05, -6.165355443954468e-05, -5.927961319684982e-05, -5.690567195415497e-05, -5.4531730711460114e-05, -5.215778946876526e-05, -4.9783848226070404e-05, -4.740990698337555e-05, -4.5035965740680695e-05, -4.266202449798584e-05, -4.0288083255290985e-05, -3.791414201259613e-05, -3.5540200769901276e-05, -3.316625952720642e-05, -3.0792318284511566e-05, -2.841837704181671e-05, -2.6044435799121857e-05, -2.3670494556427002e-05, -2.1296553313732147e-05, -1.8922612071037292e-05, -1.6548670828342438e-05, -1.4174729585647583e-05, -1.1800788342952728e-05, -9.426847100257874e-06, -7.052905857563019e-06, -4.678964614868164e-06, -2.3050233721733093e-06, 6.891787052154541e-08, 2.4428591132164e-06, 4.816800355911255e-06, 7.19074159860611e-06, 9.564682841300964e-06, 1.1938624083995819e-05, 1.4312565326690674e-05, 1.668650656938553e-05, 1.9060447812080383e-05, 2.1434389054775238e-05, 2.3808330297470093e-05, 2.6182271540164948e-05, 2.8556212782859802e-05, 3.093015402555466e-05, 3.330409526824951e-05, 3.5678036510944366e-05, 3.805197775363922e-05, 4.0425918996334076e-05, 4.279986023902893e-05, 4.5173801481723785e-05, 4.754774272441864e-05, 4.9921683967113495e-05, 5.229562520980835e-05, 5.4669566452503204e-05, 5.704350769519806e-05, 5.9417448937892914e-05, 6.179139018058777e-05, 6.416533142328262e-05, 6.653927266597748e-05, 6.891321390867233e-05, 7.128715515136719e-05]}, "gradients/encoder.encoder.layers.7.attention.q_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 2.0, 2.0, 4.0, 2.0, 8.0, 3.0, 5.0, 7.0, 16.0, 18.0, 21.0, 37.0, 56.0, 88.0, 186.0, 277.0, 473.0, 1064.0, 2710.0, 10019.0, 98444.0, 836598.0, 84622.0, 9158.0, 2518.0, 1034.0, 478.0, 274.0, 151.0, 92.0, 64.0, 38.0, 34.0, 24.0, 9.0, 7.0, 8.0, 3.0, 1.0, 3.0, 1.0, 4.0, 1.0, 0.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.7294921875, -0.7038726806640625, -0.678253173828125, -0.6526336669921875, -0.62701416015625, -0.6013946533203125, -0.575775146484375, -0.5501556396484375, -0.5245361328125, -0.4989166259765625, -0.473297119140625, -0.4476776123046875, -0.42205810546875, -0.3964385986328125, -0.370819091796875, -0.3451995849609375, -0.319580078125, -0.2939605712890625, -0.268341064453125, -0.2427215576171875, -0.21710205078125, -0.1914825439453125, -0.165863037109375, -0.1402435302734375, -0.1146240234375, -0.0890045166015625, -0.063385009765625, -0.0377655029296875, -0.01214599609375, 0.0134735107421875, 0.039093017578125, 0.0647125244140625, 0.09033203125, 0.1159515380859375, 0.141571044921875, 0.1671905517578125, 0.19281005859375, 0.2184295654296875, 0.244049072265625, 0.2696685791015625, 0.2952880859375, 0.3209075927734375, 0.346527099609375, 0.3721466064453125, 0.39776611328125, 0.4233856201171875, 0.449005126953125, 0.4746246337890625, 0.500244140625, 0.5258636474609375, 0.551483154296875, 0.5771026611328125, 0.60272216796875, 0.6283416748046875, 0.653961181640625, 0.6795806884765625, 0.7052001953125, 0.7308197021484375, 0.756439208984375, 0.7820587158203125, 0.80767822265625, 0.8332977294921875, 0.858917236328125, 0.8845367431640625, 0.91015625]}, "gradients/encoder.encoder.layers.7.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 1.0, 4.0, 11.0, 5.0, 6.0, 9.0, 19.0, 17.0, 28.0, 35.0, 32.0, 48.0, 56.0, 65.0, 90.0, 101.0, 79.0, 74.0, 70.0, 56.0, 39.0, 49.0, 31.0, 21.0, 11.0, 18.0, 12.0, 4.0, 7.0, 2.0, 4.0, 2.0, 4.0, 2.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.54443359375, -0.5223312377929688, -0.5002288818359375, -0.47812652587890625, -0.456024169921875, -0.43392181396484375, -0.4118194580078125, -0.38971710205078125, -0.36761474609375, -0.34551239013671875, -0.3234100341796875, -0.30130767822265625, -0.279205322265625, -0.25710296630859375, -0.2350006103515625, -0.21289825439453125, -0.1907958984375, -0.16869354248046875, -0.1465911865234375, -0.12448883056640625, -0.102386474609375, -0.08028411865234375, -0.0581817626953125, -0.03607940673828125, -0.01397705078125, 0.00812530517578125, 0.0302276611328125, 0.05233001708984375, 0.074432373046875, 0.09653472900390625, 0.1186370849609375, 0.14073944091796875, 0.162841796875, 0.18494415283203125, 0.2070465087890625, 0.22914886474609375, 0.251251220703125, 0.27335357666015625, 0.2954559326171875, 0.31755828857421875, 0.33966064453125, 0.36176300048828125, 0.3838653564453125, 0.40596771240234375, 0.428070068359375, 0.45017242431640625, 0.4722747802734375, 0.49437713623046875, 0.5164794921875, 0.5385818481445312, 0.5606842041015625, 0.5827865600585938, 0.604888916015625, 0.6269912719726562, 0.6490936279296875, 0.6711959838867188, 0.69329833984375, 0.7154006958007812, 0.7375030517578125, 0.7596054077148438, 0.781707763671875, 0.8038101196289062, 0.8259124755859375, 0.8480148315429688, 0.8701171875]}, "gradients/encoder.encoder.layers.7.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 3.0, 5.0, 2.0, 7.0, 12.0, 28.0, 70.0, 208.0, 360.0, 200.0, 87.0, 18.0, 6.0, 4.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-16.452632904052734, -15.788700103759766, -15.124768257141113, -14.460835456848145, -13.796902656555176, -13.132970809936523, -12.469038009643555, -11.805105209350586, -11.141172409057617, -10.477239608764648, -9.813307762145996, -9.149374961853027, -8.485442161560059, -7.821509838104248, -7.1575775146484375, -6.493644714355469, -5.829712867736816, -5.165780544281006, -4.501847743988037, -3.8379154205322266, -3.173982858657837, -2.5100502967834473, -1.8461179733276367, -1.182185173034668, -0.5182528495788574, 0.14567965269088745, 0.8096121549606323, 1.4735445976257324, 2.137477159500122, 2.8014097213745117, 3.4653420448303223, 4.129274845123291, 4.793207168579102, 5.457139492034912, 6.121072292327881, 6.785004615783691, 7.44893741607666, 8.112869262695312, 8.776802062988281, 9.44073486328125, 10.104667663574219, 10.768600463867188, 11.43253231048584, 12.096465110778809, 12.760397911071777, 13.42432975769043, 14.088262557983398, 14.752195358276367, 15.41612720489502, 16.080059051513672, 16.74399185180664, 17.40792465209961, 18.071857452392578, 18.735790252685547, 19.399723052978516, 20.063655853271484, 20.72758674621582, 21.39151954650879, 22.055452346801758, 22.719383239746094, 23.383316040039062, 24.04724884033203, 24.711181640625, 25.37511444091797, 26.039047241210938]}, "gradients/encoder.encoder.layers.7.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 2.0, 1.0, 2.0, 4.0, 5.0, 2.0, 6.0, 6.0, 7.0, 9.0, 8.0, 5.0, 11.0, 10.0, 16.0, 21.0, 19.0, 15.0, 18.0, 36.0, 33.0, 31.0, 36.0, 38.0, 40.0, 45.0, 50.0, 51.0, 45.0, 43.0, 48.0, 40.0, 36.0, 27.0, 37.0, 27.0, 26.0, 28.0, 22.0, 26.0, 12.0, 16.0, 10.0, 10.0, 5.0, 6.0, 6.0, 3.0, 3.0, 3.0, 4.0, 3.0, 3.0, 1.0, 3.0], "bins": [-9.18872356414795, -8.937575340270996, -8.68642807006836, -8.435279846191406, -8.184131622314453, -7.932984352111816, -7.681836128234863, -7.430688381195068, -7.179540634155273, -6.9283928871154785, -6.677245140075684, -6.4260969161987305, -6.1749491691589355, -5.923801422119141, -5.6726531982421875, -5.421505451202393, -5.170357704162598, -4.919209957122803, -4.668062210083008, -4.416913986206055, -4.16576623916626, -3.914618492126465, -3.663470506668091, -3.412322521209717, -3.161174774169922, -2.910027027130127, -2.658879041671753, -2.407731056213379, -2.156583309173584, -1.9054354429244995, -1.654287576675415, -1.4031397104263306, -1.1519923210144043, -0.9008444547653198, -0.6496965885162354, -0.3985487222671509, -0.1474008560180664, 0.10374701023101807, 0.35489487648010254, 0.606042742729187, 0.8571906089782715, 1.108338475227356, 1.3594863414764404, 1.610634207725525, 1.8617820739746094, 2.1129298210144043, 2.3640778064727783, 2.6152257919311523, 2.8663735389709473, 3.117521286010742, 3.368669271469116, 3.6198172569274902, 3.870965003967285, 4.12211275100708, 4.373260498046875, 4.624408721923828, 4.875556468963623, 5.126704216003418, 5.377852439880371, 5.629000186920166, 5.880147933959961, 6.131295680999756, 6.382443428039551, 6.633591651916504, 6.884739398956299]}, "gradients/encoder.encoder.layers.6.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 0.0, 1.0, 2.0, 4.0, 3.0, 4.0, 4.0, 11.0, 11.0, 18.0, 24.0, 45.0, 61.0, 98.0, 152.0, 274.0, 737.0, 4133806.0, 58006.0, 419.0, 219.0, 138.0, 76.0, 58.0, 40.0, 23.0, 16.0, 14.0, 13.0, 4.0, 5.0, 3.0, 4.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-8.4453125, -8.15283203125, -7.8603515625, -7.56787109375, -7.275390625, -6.98291015625, -6.6904296875, -6.39794921875, -6.10546875, -5.81298828125, -5.5205078125, -5.22802734375, -4.935546875, -4.64306640625, -4.3505859375, -4.05810546875, -3.765625, -3.47314453125, -3.1806640625, -2.88818359375, -2.595703125, -2.30322265625, -2.0107421875, -1.71826171875, -1.42578125, -1.13330078125, -0.8408203125, -0.54833984375, -0.255859375, 0.03662109375, 0.3291015625, 0.62158203125, 0.9140625, 1.20654296875, 1.4990234375, 1.79150390625, 2.083984375, 2.37646484375, 2.6689453125, 2.96142578125, 3.25390625, 3.54638671875, 3.8388671875, 4.13134765625, 4.423828125, 4.71630859375, 5.0087890625, 5.30126953125, 5.59375, 5.88623046875, 6.1787109375, 6.47119140625, 6.763671875, 7.05615234375, 7.3486328125, 7.64111328125, 7.93359375, 8.22607421875, 8.5185546875, 8.81103515625, 9.103515625, 9.39599609375, 9.6884765625, 9.98095703125, 10.2734375]}, "gradients/encoder.encoder.layers.6.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 4.0, 3.0, 3.0, 3.0, 11.0, 9.0, 18.0, 33.0, 25.0, 61.0, 79.0, 80.0, 98.0, 102.0, 125.0, 97.0, 77.0, 65.0, 38.0, 37.0, 18.0, 12.0, 5.0, 5.0, 5.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.42578125, -1.3906097412109375, -1.355438232421875, -1.3202667236328125, -1.28509521484375, -1.2499237060546875, -1.214752197265625, -1.1795806884765625, -1.1444091796875, -1.1092376708984375, -1.074066162109375, -1.0388946533203125, -1.00372314453125, -0.9685516357421875, -0.933380126953125, -0.8982086181640625, -0.863037109375, -0.8278656005859375, -0.792694091796875, -0.7575225830078125, -0.72235107421875, -0.6871795654296875, -0.652008056640625, -0.6168365478515625, -0.5816650390625, -0.5464935302734375, -0.511322021484375, -0.4761505126953125, -0.44097900390625, -0.4058074951171875, -0.370635986328125, -0.3354644775390625, -0.30029296875, -0.2651214599609375, -0.229949951171875, -0.1947784423828125, -0.15960693359375, -0.1244354248046875, -0.089263916015625, -0.0540924072265625, -0.0189208984375, 0.0162506103515625, 0.051422119140625, 0.0865936279296875, 0.12176513671875, 0.1569366455078125, 0.192108154296875, 0.2272796630859375, 0.262451171875, 0.2976226806640625, 0.332794189453125, 0.3679656982421875, 0.40313720703125, 0.4383087158203125, 0.473480224609375, 0.5086517333984375, 0.5438232421875, 0.5789947509765625, 0.614166259765625, 0.6493377685546875, 0.68450927734375, 0.7196807861328125, 0.754852294921875, 0.7900238037109375, 0.8251953125]}, "gradients/encoder.encoder.layers.6.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 0.0, 3.0, 1.0, 2.0, 5.0, 2.0, 5.0, 7.0, 13.0, 23.0, 44.0, 80.0, 152.0, 396.0, 872.0, 2076.0, 5978.0, 22948.0, 167268.0, 3409878.0, 526373.0, 41951.0, 10107.0, 3489.0, 1463.0, 582.0, 264.0, 132.0, 72.0, 37.0, 21.0, 12.0, 10.0, 4.0, 7.0, 4.0, 7.0, 2.0, 2.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0], "bins": [-1.1318359375, -1.1001129150390625, -1.068389892578125, -1.0366668701171875, -1.00494384765625, -0.9732208251953125, -0.941497802734375, -0.9097747802734375, -0.8780517578125, -0.8463287353515625, -0.814605712890625, -0.7828826904296875, -0.75115966796875, -0.7194366455078125, -0.687713623046875, -0.6559906005859375, -0.624267578125, -0.5925445556640625, -0.560821533203125, -0.5290985107421875, -0.49737548828125, -0.4656524658203125, -0.433929443359375, -0.4022064208984375, -0.3704833984375, -0.3387603759765625, -0.307037353515625, -0.2753143310546875, -0.24359130859375, -0.2118682861328125, -0.180145263671875, -0.1484222412109375, -0.11669921875, -0.0849761962890625, -0.053253173828125, -0.0215301513671875, 0.01019287109375, 0.0419158935546875, 0.073638916015625, 0.1053619384765625, 0.1370849609375, 0.1688079833984375, 0.200531005859375, 0.2322540283203125, 0.26397705078125, 0.2957000732421875, 0.327423095703125, 0.3591461181640625, 0.390869140625, 0.4225921630859375, 0.454315185546875, 0.4860382080078125, 0.51776123046875, 0.5494842529296875, 0.581207275390625, 0.6129302978515625, 0.6446533203125, 0.6763763427734375, 0.708099365234375, 0.7398223876953125, 0.77154541015625, 0.8032684326171875, 0.834991455078125, 0.8667144775390625, 0.8984375]}, "gradients/encoder.encoder.layers.6.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 0.0, 2.0, 1.0, 3.0, 6.0, 3.0, 9.0, 11.0, 11.0, 25.0, 41.0, 48.0, 92.0, 179.0, 317.0, 660.0, 981.0, 796.0, 399.0, 220.0, 99.0, 48.0, 38.0, 25.0, 18.0, 9.0, 9.0, 5.0, 4.0, 6.0, 1.0, 5.0, 5.0, 1.0, 0.0, 0.0, 1.0, 3.0, 0.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-1.048828125, -1.01458740234375, -0.9803466796875, -0.94610595703125, -0.911865234375, -0.87762451171875, -0.8433837890625, -0.80914306640625, -0.77490234375, -0.74066162109375, -0.7064208984375, -0.67218017578125, -0.637939453125, -0.60369873046875, -0.5694580078125, -0.53521728515625, -0.5009765625, -0.46673583984375, -0.4324951171875, -0.39825439453125, -0.364013671875, -0.32977294921875, -0.2955322265625, -0.26129150390625, -0.22705078125, -0.19281005859375, -0.1585693359375, -0.12432861328125, -0.090087890625, -0.05584716796875, -0.0216064453125, 0.01263427734375, 0.046875, 0.08111572265625, 0.1153564453125, 0.14959716796875, 0.183837890625, 0.21807861328125, 0.2523193359375, 0.28656005859375, 0.32080078125, 0.35504150390625, 0.3892822265625, 0.42352294921875, 0.457763671875, 0.49200439453125, 0.5262451171875, 0.56048583984375, 0.5947265625, 0.62896728515625, 0.6632080078125, 0.69744873046875, 0.731689453125, 0.76593017578125, 0.8001708984375, 0.83441162109375, 0.86865234375, 0.90289306640625, 0.9371337890625, 0.97137451171875, 1.005615234375, 1.03985595703125, 1.0740966796875, 1.10833740234375, 1.142578125]}, "gradients/encoder.encoder.layers.6.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 1.0, 3.0, 2.0, 4.0, 12.0, 36.0, 171.0, 362.0, 292.0, 92.0, 21.0, 9.0, 6.0, 0.0, 3.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-19.129640579223633, -18.661569595336914, -18.193498611450195, -17.725427627563477, -17.257356643676758, -16.78928565979004, -16.32121467590332, -15.853143692016602, -15.385072708129883, -14.917001724243164, -14.448930740356445, -13.980859756469727, -13.512788772583008, -13.044717788696289, -12.57664680480957, -12.108575820922852, -11.640504837036133, -11.172433853149414, -10.704362869262695, -10.236291885375977, -9.768220901489258, -9.300149917602539, -8.83207893371582, -8.364007949829102, -7.895937919616699, -7.4278669357299805, -6.959795951843262, -6.491724967956543, -6.023653984069824, -5.5555830001831055, -5.087512016296387, -4.619441032409668, -4.151370048522949, -3.6832990646362305, -3.2152280807495117, -2.747157096862793, -2.279086112976074, -1.8110153675079346, -1.3429443836212158, -0.8748733997344971, -0.4068024158477783, 0.06126853823661804, 0.5293394923210144, 0.9974104166030884, 1.4654814004898071, 1.9335522651672363, 2.401623249053955, 2.869694232940674, 3.3377652168273926, 3.8058362007141113, 4.27390718460083, 4.741978168487549, 5.210049152374268, 5.678119659423828, 6.146190643310547, 6.614261627197266, 7.082332611083984, 7.550403594970703, 8.018474578857422, 8.48654556274414, 8.95461654663086, 9.422687530517578, 9.890758514404297, 10.358829498291016, 10.826900482177734]}, "gradients/encoder.encoder.layers.6.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 1.0, 4.0, 2.0, 7.0, 5.0, 5.0, 10.0, 6.0, 12.0, 18.0, 13.0, 25.0, 29.0, 40.0, 38.0, 36.0, 37.0, 45.0, 54.0, 56.0, 57.0, 53.0, 55.0, 52.0, 43.0, 52.0, 48.0, 27.0, 32.0, 33.0, 27.0, 20.0, 13.0, 14.0, 12.0, 9.0, 5.0, 6.0, 5.0, 5.0, 1.0, 2.0, 2.0, 1.0, 1.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.8907530307769775, -2.758129596710205, -2.6255059242248535, -2.492882490158081, -2.3602588176727295, -2.227635383605957, -2.0950117111206055, -1.962388277053833, -1.829764723777771, -1.697141170501709, -1.564517617225647, -1.431894063949585, -1.2992706298828125, -1.166646957397461, -1.0340235233306885, -0.9013999700546265, -0.7687764167785645, -0.6361528635025024, -0.5035293102264404, -0.3709058165550232, -0.23828226327896118, -0.10565871000289917, 0.026964783668518066, 0.15958833694458008, 0.2922118902206421, 0.4248354434967041, 0.5574589967727661, 0.6900824904441833, 0.8227060437202454, 0.9553295969963074, 1.0879530906677246, 1.2205766439437866, 1.3531999588012695, 1.4858235120773315, 1.6184470653533936, 1.751070499420166, 1.8836941719055176, 2.01631760597229, 2.1489410400390625, 2.281564712524414, 2.4141883850097656, 2.546811819076538, 2.6794354915618896, 2.812058925628662, 2.9446825981140137, 3.077306032180786, 3.2099294662475586, 3.34255313873291, 3.4751765727996826, 3.607800006866455, 3.7404236793518066, 3.873047113418579, 4.005670547485352, 4.138294219970703, 4.270917892456055, 4.403541564941406, 4.5361647605896, 4.668788433074951, 4.8014116287231445, 4.934035301208496, 5.066658973693848, 5.199282646179199, 5.331905841827393, 5.464529514312744, 5.597153186798096]}, "gradients/encoder.encoder.layers.6.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 2.0, 0.0, 2.0, 3.0, 1.0, 1.0, 2.0, 7.0, 10.0, 6.0, 10.0, 14.0, 22.0, 36.0, 54.0, 55.0, 98.0, 131.0, 167.0, 299.0, 472.0, 706.0, 1290.0, 2258.0, 4404.0, 9078.0, 20458.0, 50685.0, 134769.0, 328357.0, 298775.0, 116575.0, 43923.0, 18238.0, 8373.0, 4070.0, 2100.0, 1198.0, 680.0, 386.0, 259.0, 162.0, 137.0, 89.0, 63.0, 47.0, 31.0, 17.0, 9.0, 10.0, 7.0, 6.0, 8.0, 0.0, 6.0, 2.0, 2.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.74462890625, -0.722015380859375, -0.69940185546875, -0.676788330078125, -0.6541748046875, -0.631561279296875, -0.60894775390625, -0.586334228515625, -0.563720703125, -0.541107177734375, -0.51849365234375, -0.495880126953125, -0.4732666015625, -0.450653076171875, -0.42803955078125, -0.405426025390625, -0.3828125, -0.360198974609375, -0.33758544921875, -0.314971923828125, -0.2923583984375, -0.269744873046875, -0.24713134765625, -0.224517822265625, -0.201904296875, -0.179290771484375, -0.15667724609375, -0.134063720703125, -0.1114501953125, -0.088836669921875, -0.06622314453125, -0.043609619140625, -0.02099609375, 0.001617431640625, 0.02423095703125, 0.046844482421875, 0.0694580078125, 0.092071533203125, 0.11468505859375, 0.137298583984375, 0.159912109375, 0.182525634765625, 0.20513916015625, 0.227752685546875, 0.2503662109375, 0.272979736328125, 0.29559326171875, 0.318206787109375, 0.3408203125, 0.363433837890625, 0.38604736328125, 0.408660888671875, 0.4312744140625, 0.453887939453125, 0.47650146484375, 0.499114990234375, 0.521728515625, 0.544342041015625, 0.56695556640625, 0.589569091796875, 0.6121826171875, 0.634796142578125, 0.65740966796875, 0.680023193359375, 0.70263671875]}, "gradients/encoder.encoder.layers.6.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 4.0, 0.0, 3.0, 3.0, 1.0, 7.0, 15.0, 14.0, 25.0, 30.0, 43.0, 58.0, 69.0, 83.0, 84.0, 78.0, 100.0, 90.0, 66.0, 65.0, 47.0, 47.0, 37.0, 13.0, 15.0, 4.0, 4.0, 6.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.98583984375, -0.9574356079101562, -0.9290313720703125, -0.9006271362304688, -0.872222900390625, -0.8438186645507812, -0.8154144287109375, -0.7870101928710938, -0.75860595703125, -0.7302017211914062, -0.7017974853515625, -0.6733932495117188, -0.644989013671875, -0.6165847778320312, -0.5881805419921875, -0.5597763061523438, -0.5313720703125, -0.5029678344726562, -0.4745635986328125, -0.44615936279296875, -0.417755126953125, -0.38935089111328125, -0.3609466552734375, -0.33254241943359375, -0.30413818359375, -0.27573394775390625, -0.2473297119140625, -0.21892547607421875, -0.190521240234375, -0.16211700439453125, -0.1337127685546875, -0.10530853271484375, -0.076904296875, -0.04850006103515625, -0.0200958251953125, 0.00830841064453125, 0.036712646484375, 0.06511688232421875, 0.0935211181640625, 0.12192535400390625, 0.15032958984375, 0.17873382568359375, 0.2071380615234375, 0.23554229736328125, 0.263946533203125, 0.29235076904296875, 0.3207550048828125, 0.34915924072265625, 0.3775634765625, 0.40596771240234375, 0.4343719482421875, 0.46277618408203125, 0.491180419921875, 0.5195846557617188, 0.5479888916015625, 0.5763931274414062, 0.60479736328125, 0.6332015991210938, 0.6616058349609375, 0.6900100708007812, 0.718414306640625, 0.7468185424804688, 0.7752227783203125, 0.8036270141601562, 0.83203125]}, "gradients/encoder.encoder.layers.6.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 3.0, 2.0, 3.0, 6.0, 3.0, 22.0, 22.0, 31.0, 47.0, 106.0, 195.0, 407.0, 1006.0, 3431.0, 32453.0, 763599.0, 232417.0, 11430.0, 2021.0, 702.0, 327.0, 138.0, 84.0, 33.0, 25.0, 15.0, 10.0, 7.0, 5.0, 2.0, 4.0, 1.0, 4.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0], "bins": [-2.732421875, -2.658172607421875, -2.58392333984375, -2.509674072265625, -2.4354248046875, -2.361175537109375, -2.28692626953125, -2.212677001953125, -2.138427734375, -2.064178466796875, -1.98992919921875, -1.915679931640625, -1.8414306640625, -1.767181396484375, -1.69293212890625, -1.618682861328125, -1.54443359375, -1.470184326171875, -1.39593505859375, -1.321685791015625, -1.2474365234375, -1.173187255859375, -1.09893798828125, -1.024688720703125, -0.950439453125, -0.876190185546875, -0.80194091796875, -0.727691650390625, -0.6534423828125, -0.579193115234375, -0.50494384765625, -0.430694580078125, -0.3564453125, -0.282196044921875, -0.20794677734375, -0.133697509765625, -0.0594482421875, 0.014801025390625, 0.08905029296875, 0.163299560546875, 0.237548828125, 0.311798095703125, 0.38604736328125, 0.460296630859375, 0.5345458984375, 0.608795166015625, 0.68304443359375, 0.757293701171875, 0.83154296875, 0.905792236328125, 0.98004150390625, 1.054290771484375, 1.1285400390625, 1.202789306640625, 1.27703857421875, 1.351287841796875, 1.425537109375, 1.499786376953125, 1.57403564453125, 1.648284912109375, 1.7225341796875, 1.796783447265625, 1.87103271484375, 1.945281982421875, 2.01953125]}, "gradients/encoder.encoder.layers.6.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 2.0, 1.0, 8.0, 3.0, 1.0, 4.0, 9.0, 10.0, 11.0, 9.0, 9.0, 12.0, 15.0, 18.0, 17.0, 19.0, 25.0, 25.0, 33.0, 28.0, 33.0, 38.0, 41.0, 36.0, 44.0, 49.0, 38.0, 40.0, 35.0, 41.0, 47.0, 31.0, 36.0, 27.0, 29.0, 22.0, 24.0, 24.0, 18.0, 16.0, 18.0, 15.0, 11.0, 6.0, 7.0, 6.0, 4.0, 4.0, 4.0, 4.0, 1.0, 3.0, 3.0, 1.0, 3.0, 2.0], "bins": [-1.650390625, -1.6019439697265625, -1.553497314453125, -1.5050506591796875, -1.45660400390625, -1.4081573486328125, -1.359710693359375, -1.3112640380859375, -1.2628173828125, -1.2143707275390625, -1.165924072265625, -1.1174774169921875, -1.06903076171875, -1.0205841064453125, -0.972137451171875, -0.9236907958984375, -0.875244140625, -0.8267974853515625, -0.778350830078125, -0.7299041748046875, -0.68145751953125, -0.6330108642578125, -0.584564208984375, -0.5361175537109375, -0.4876708984375, -0.4392242431640625, -0.390777587890625, -0.3423309326171875, -0.29388427734375, -0.2454376220703125, -0.196990966796875, -0.1485443115234375, -0.10009765625, -0.0516510009765625, -0.003204345703125, 0.0452423095703125, 0.09368896484375, 0.1421356201171875, 0.190582275390625, 0.2390289306640625, 0.2874755859375, 0.3359222412109375, 0.384368896484375, 0.4328155517578125, 0.48126220703125, 0.5297088623046875, 0.578155517578125, 0.6266021728515625, 0.675048828125, 0.7234954833984375, 0.771942138671875, 0.8203887939453125, 0.86883544921875, 0.9172821044921875, 0.965728759765625, 1.0141754150390625, 1.0626220703125, 1.1110687255859375, 1.159515380859375, 1.2079620361328125, 1.25640869140625, 1.3048553466796875, 1.353302001953125, 1.4017486572265625, 1.4501953125]}, "gradients/encoder.encoder.layers.6.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 0.0, 0.0, 2.0, 1.0, 2.0, 2.0, 4.0, 3.0, 6.0, 7.0, 7.0, 23.0, 28.0, 32.0, 75.0, 153.0, 289.0, 764.0, 3296.0, 32004.0, 952363.0, 53777.0, 4036.0, 1004.0, 338.0, 143.0, 76.0, 42.0, 26.0, 14.0, 8.0, 10.0, 12.0, 5.0, 1.0, 3.0, 1.0, 0.0, 3.0, 1.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-1.07421875, -1.0378875732421875, -1.001556396484375, -0.9652252197265625, -0.92889404296875, -0.8925628662109375, -0.856231689453125, -0.8199005126953125, -0.7835693359375, -0.7472381591796875, -0.710906982421875, -0.6745758056640625, -0.63824462890625, -0.6019134521484375, -0.565582275390625, -0.5292510986328125, -0.492919921875, -0.4565887451171875, -0.420257568359375, -0.3839263916015625, -0.34759521484375, -0.3112640380859375, -0.274932861328125, -0.2386016845703125, -0.2022705078125, -0.1659393310546875, -0.129608154296875, -0.0932769775390625, -0.05694580078125, -0.0206146240234375, 0.015716552734375, 0.0520477294921875, 0.08837890625, 0.1247100830078125, 0.161041259765625, 0.1973724365234375, 0.23370361328125, 0.2700347900390625, 0.306365966796875, 0.3426971435546875, 0.3790283203125, 0.4153594970703125, 0.451690673828125, 0.4880218505859375, 0.52435302734375, 0.5606842041015625, 0.597015380859375, 0.6333465576171875, 0.669677734375, 0.7060089111328125, 0.742340087890625, 0.7786712646484375, 0.81500244140625, 0.8513336181640625, 0.887664794921875, 0.9239959716796875, 0.9603271484375, 0.9966583251953125, 1.032989501953125, 1.0693206787109375, 1.10565185546875, 1.1419830322265625, 1.178314208984375, 1.2146453857421875, 1.2509765625]}, "gradients/encoder.encoder.layers.6.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 3.0, 1.0, 2.0, 1.0, 1.0, 1.0, 2.0, 7.0, 5.0, 4.0, 9.0, 8.0, 12.0, 9.0, 18.0, 22.0, 43.0, 65.0, 82.0, 119.0, 137.0, 132.0, 83.0, 68.0, 39.0, 36.0, 24.0, 22.0, 7.0, 12.0, 16.0, 5.0, 10.0, 5.0, 2.0, 3.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-9.822845458984375e-05, -9.521190077066422e-05, -9.219534695148468e-05, -8.917879313230515e-05, -8.616223931312561e-05, -8.314568549394608e-05, -8.012913167476654e-05, -7.7112577855587e-05, -7.409602403640747e-05, -7.107947021722794e-05, -6.80629163980484e-05, -6.504636257886887e-05, -6.202980875968933e-05, -5.9013254940509796e-05, -5.599670112133026e-05, -5.2980147302150726e-05, -4.996359348297119e-05, -4.6947039663791656e-05, -4.393048584461212e-05, -4.091393202543259e-05, -3.789737820625305e-05, -3.488082438707352e-05, -3.186427056789398e-05, -2.8847716748714447e-05, -2.5831162929534912e-05, -2.2814609110355377e-05, -1.9798055291175842e-05, -1.6781501471996307e-05, -1.3764947652816772e-05, -1.0748393833637238e-05, -7.731840014457703e-06, -4.715286195278168e-06, -1.6987323760986328e-06, 1.317821443080902e-06, 4.334375262260437e-06, 7.350929081439972e-06, 1.0367482900619507e-05, 1.3384036719799042e-05, 1.6400590538978577e-05, 1.941714435815811e-05, 2.2433698177337646e-05, 2.545025199651718e-05, 2.8466805815696716e-05, 3.148335963487625e-05, 3.4499913454055786e-05, 3.751646727323532e-05, 4.0533021092414856e-05, 4.354957491159439e-05, 4.6566128730773926e-05, 4.958268254995346e-05, 5.2599236369132996e-05, 5.561579018831253e-05, 5.8632344007492065e-05, 6.16488978266716e-05, 6.466545164585114e-05, 6.768200546503067e-05, 7.06985592842102e-05, 7.371511310338974e-05, 7.673166692256927e-05, 7.974822074174881e-05, 8.276477456092834e-05, 8.578132838010788e-05, 8.879788219928741e-05, 9.181443601846695e-05, 9.483098983764648e-05]}, "gradients/encoder.encoder.layers.6.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 2.0, 6.0, 1.0, 3.0, 7.0, 5.0, 11.0, 13.0, 19.0, 39.0, 46.0, 78.0, 134.0, 247.0, 460.0, 1022.0, 2925.0, 12762.0, 167054.0, 803437.0, 50231.0, 6619.0, 1869.0, 757.0, 320.0, 208.0, 94.0, 56.0, 36.0, 28.0, 28.0, 9.0, 11.0, 5.0, 8.0, 1.0, 6.0, 2.0, 3.0, 0.0, 3.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.7666015625, -0.7448883056640625, -0.723175048828125, -0.7014617919921875, -0.67974853515625, -0.6580352783203125, -0.636322021484375, -0.6146087646484375, -0.5928955078125, -0.5711822509765625, -0.549468994140625, -0.5277557373046875, -0.50604248046875, -0.4843292236328125, -0.462615966796875, -0.4409027099609375, -0.419189453125, -0.3974761962890625, -0.375762939453125, -0.3540496826171875, -0.33233642578125, -0.3106231689453125, -0.288909912109375, -0.2671966552734375, -0.2454833984375, -0.2237701416015625, -0.202056884765625, -0.1803436279296875, -0.15863037109375, -0.1369171142578125, -0.115203857421875, -0.0934906005859375, -0.07177734375, -0.0500640869140625, -0.028350830078125, -0.0066375732421875, 0.01507568359375, 0.0367889404296875, 0.058502197265625, 0.0802154541015625, 0.1019287109375, 0.1236419677734375, 0.145355224609375, 0.1670684814453125, 0.18878173828125, 0.2104949951171875, 0.232208251953125, 0.2539215087890625, 0.275634765625, 0.2973480224609375, 0.319061279296875, 0.3407745361328125, 0.36248779296875, 0.3842010498046875, 0.405914306640625, 0.4276275634765625, 0.4493408203125, 0.4710540771484375, 0.492767333984375, 0.5144805908203125, 0.53619384765625, 0.5579071044921875, 0.579620361328125, 0.6013336181640625, 0.623046875]}, "gradients/encoder.encoder.layers.6.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 4.0, 3.0, 5.0, 9.0, 8.0, 5.0, 19.0, 21.0, 29.0, 37.0, 36.0, 55.0, 67.0, 86.0, 90.0, 88.0, 97.0, 74.0, 59.0, 58.0, 37.0, 26.0, 26.0, 19.0, 22.0, 10.0, 5.0, 3.0, 2.0, 3.0, 3.0, 2.0, 0.0, 3.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.537109375, -0.5209732055664062, -0.5048370361328125, -0.48870086669921875, -0.472564697265625, -0.45642852783203125, -0.4402923583984375, -0.42415618896484375, -0.40802001953125, -0.39188385009765625, -0.3757476806640625, -0.35961151123046875, -0.343475341796875, -0.32733917236328125, -0.3112030029296875, -0.29506683349609375, -0.2789306640625, -0.26279449462890625, -0.2466583251953125, -0.23052215576171875, -0.214385986328125, -0.19824981689453125, -0.1821136474609375, -0.16597747802734375, -0.14984130859375, -0.13370513916015625, -0.1175689697265625, -0.10143280029296875, -0.085296630859375, -0.06916046142578125, -0.0530242919921875, -0.03688812255859375, -0.020751953125, -0.00461578369140625, 0.0115203857421875, 0.02765655517578125, 0.043792724609375, 0.05992889404296875, 0.0760650634765625, 0.09220123291015625, 0.10833740234375, 0.12447357177734375, 0.1406097412109375, 0.15674591064453125, 0.172882080078125, 0.18901824951171875, 0.2051544189453125, 0.22129058837890625, 0.2374267578125, 0.25356292724609375, 0.2696990966796875, 0.28583526611328125, 0.301971435546875, 0.31810760498046875, 0.3342437744140625, 0.35037994384765625, 0.36651611328125, 0.38265228271484375, 0.3987884521484375, 0.41492462158203125, 0.431060791015625, 0.44719696044921875, 0.4633331298828125, 0.47946929931640625, 0.49560546875]}, "gradients/encoder.encoder.layers.6.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 0.0, 0.0, 2.0, 2.0, 4.0, 7.0, 5.0, 10.0, 31.0, 83.0, 263.0, 334.0, 173.0, 56.0, 23.0, 7.0, 4.0, 3.0, 4.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-22.919252395629883, -22.401060104370117, -21.88286590576172, -21.364673614501953, -20.846481323242188, -20.32828712463379, -19.810094833374023, -19.291902542114258, -18.77370834350586, -18.255516052246094, -17.737321853637695, -17.21912956237793, -16.700937271118164, -16.182743072509766, -15.66455078125, -15.146357536315918, -14.628165245056152, -14.10997200012207, -13.591779708862305, -13.073586463928223, -12.55539321899414, -12.037200927734375, -11.519007682800293, -11.000814437866211, -10.482622146606445, -9.964428901672363, -9.446236610412598, -8.928043365478516, -8.409850120544434, -7.89165735244751, -7.373464584350586, -6.855271339416504, -6.337078094482422, -5.818885326385498, -5.300692081451416, -4.782499313354492, -4.26430606842041, -3.7461133003234863, -3.2279205322265625, -2.7097275257110596, -2.1915345191955566, -1.6733415126800537, -1.1551486253738403, -0.636955738067627, -0.11876273155212402, 0.3994302749633789, 0.9176230430603027, 1.4358160495758057, 1.9540090560913086, 2.4722020626068115, 2.9903950691223145, 3.5085878372192383, 4.02678108215332, 4.544973850250244, 5.063166618347168, 5.58135986328125, 6.099552631378174, 6.617745399475098, 7.13593864440918, 7.6541314125061035, 8.172324180603027, 8.69051742553711, 9.208709716796875, 9.726902961730957, 10.245096206665039]}, "gradients/encoder.encoder.layers.6.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 3.0, 1.0, 6.0, 4.0, 4.0, 5.0, 7.0, 8.0, 13.0, 5.0, 20.0, 18.0, 14.0, 21.0, 18.0, 36.0, 40.0, 37.0, 34.0, 42.0, 56.0, 47.0, 67.0, 57.0, 52.0, 44.0, 43.0, 58.0, 24.0, 35.0, 21.0, 15.0, 26.0, 30.0, 18.0, 7.0, 20.0, 7.0, 10.0, 12.0, 4.0, 9.0, 5.0, 1.0, 4.0, 1.0, 3.0, 4.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-6.808499336242676, -6.574554920196533, -6.340610504150391, -6.106666564941406, -5.872722148895264, -5.638777732849121, -5.404833793640137, -5.170889377593994, -4.936944961547852, -4.703000545501709, -4.469056129455566, -4.235112190246582, -4.0011677742004395, -3.767223358154297, -3.5332791805267334, -3.29933500289917, -3.0653905868530273, -2.8314461708068848, -2.5975019931793213, -2.363557815551758, -2.1296133995056152, -1.8956691026687622, -1.6617248058319092, -1.4277805089950562, -1.1938362121582031, -0.9598919153213501, -0.7259476184844971, -0.49200332164764404, -0.258059024810791, -0.02411472797393799, 0.20982956886291504, 0.44377386569976807, 0.6777181625366211, 0.9116624593734741, 1.1456067562103271, 1.3795510530471802, 1.6134953498840332, 1.8474396467208862, 2.0813839435577393, 2.3153281211853027, 2.5492725372314453, 2.783216953277588, 3.0171611309051514, 3.251105308532715, 3.4850497245788574, 3.718994140625, 3.9529383182525635, 4.186882495880127, 4.4208269119262695, 4.654771327972412, 4.888715744018555, 5.122659683227539, 5.356604099273682, 5.590548515319824, 5.824492454528809, 6.058436870574951, 6.292381286621094, 6.526325702667236, 6.760270118713379, 6.994214057922363, 7.228158473968506, 7.462102890014648, 7.696046829223633, 7.929991245269775, 8.163935661315918]}, "gradients/encoder.encoder.layers.5.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 1.0, 1.0, 2.0, 1.0, 5.0, 5.0, 9.0, 7.0, 11.0, 20.0, 32.0, 40.0, 81.0, 128.0, 191.0, 422.0, 1039.0, 3308.0, 16675.0, 192017.0, 3529800.0, 419879.0, 23823.0, 4441.0, 1229.0, 484.0, 229.0, 146.0, 79.0, 53.0, 38.0, 38.0, 17.0, 13.0, 9.0, 9.0, 6.0, 3.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 2.0], "bins": [-1.2626953125, -1.2308120727539062, -1.1989288330078125, -1.1670455932617188, -1.135162353515625, -1.1032791137695312, -1.0713958740234375, -1.0395126342773438, -1.00762939453125, -0.9757461547851562, -0.9438629150390625, -0.9119796752929688, -0.880096435546875, -0.8482131958007812, -0.8163299560546875, -0.7844467163085938, -0.7525634765625, -0.7206802368164062, -0.6887969970703125, -0.6569137573242188, -0.625030517578125, -0.5931472778320312, -0.5612640380859375, -0.5293807983398438, -0.49749755859375, -0.46561431884765625, -0.4337310791015625, -0.40184783935546875, -0.369964599609375, -0.33808135986328125, -0.3061981201171875, -0.27431488037109375, -0.242431640625, -0.21054840087890625, -0.1786651611328125, -0.14678192138671875, -0.114898681640625, -0.08301544189453125, -0.0511322021484375, -0.01924896240234375, 0.01263427734375, 0.04451751708984375, 0.0764007568359375, 0.10828399658203125, 0.140167236328125, 0.17205047607421875, 0.2039337158203125, 0.23581695556640625, 0.2677001953125, 0.29958343505859375, 0.3314666748046875, 0.36334991455078125, 0.395233154296875, 0.42711639404296875, 0.4589996337890625, 0.49088287353515625, 0.52276611328125, 0.5546493530273438, 0.5865325927734375, 0.6184158325195312, 0.650299072265625, 0.6821823120117188, 0.7140655517578125, 0.7459487915039062, 0.77783203125]}, "gradients/encoder.encoder.layers.5.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 2.0, 4.0, 2.0, 0.0, 6.0, 6.0, 18.0, 28.0, 26.0, 30.0, 63.0, 62.0, 83.0, 98.0, 103.0, 91.0, 96.0, 85.0, 71.0, 41.0, 38.0, 22.0, 21.0, 8.0, 4.0, 3.0, 1.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.95263671875, -0.9244003295898438, -0.8961639404296875, -0.8679275512695312, -0.839691162109375, -0.8114547729492188, -0.7832183837890625, -0.7549819946289062, -0.72674560546875, -0.6985092163085938, -0.6702728271484375, -0.6420364379882812, -0.613800048828125, -0.5855636596679688, -0.5573272705078125, -0.5290908813476562, -0.5008544921875, -0.47261810302734375, -0.4443817138671875, -0.41614532470703125, -0.387908935546875, -0.35967254638671875, -0.3314361572265625, -0.30319976806640625, -0.27496337890625, -0.24672698974609375, -0.2184906005859375, -0.19025421142578125, -0.162017822265625, -0.13378143310546875, -0.1055450439453125, -0.07730865478515625, -0.049072265625, -0.02083587646484375, 0.0074005126953125, 0.03563690185546875, 0.063873291015625, 0.09210968017578125, 0.1203460693359375, 0.14858245849609375, 0.17681884765625, 0.20505523681640625, 0.2332916259765625, 0.26152801513671875, 0.289764404296875, 0.31800079345703125, 0.3462371826171875, 0.37447357177734375, 0.4027099609375, 0.43094635009765625, 0.4591827392578125, 0.48741912841796875, 0.515655517578125, 0.5438919067382812, 0.5721282958984375, 0.6003646850585938, 0.62860107421875, 0.6568374633789062, 0.6850738525390625, 0.7133102416992188, 0.741546630859375, 0.7697830200195312, 0.7980194091796875, 0.8262557983398438, 0.8544921875]}, "gradients/encoder.encoder.layers.5.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 2.0, 1.0, 4.0, 2.0, 11.0, 22.0, 38.0, 63.0, 138.0, 293.0, 772.0, 3267.0, 29310.0, 2471930.0, 1660037.0, 24659.0, 2753.0, 610.0, 224.0, 81.0, 31.0, 22.0, 8.0, 11.0, 2.0, 1.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-1.9853515625, -1.9344024658203125, -1.883453369140625, -1.8325042724609375, -1.78155517578125, -1.7306060791015625, -1.679656982421875, -1.6287078857421875, -1.5777587890625, -1.5268096923828125, -1.475860595703125, -1.4249114990234375, -1.37396240234375, -1.3230133056640625, -1.272064208984375, -1.2211151123046875, -1.170166015625, -1.1192169189453125, -1.068267822265625, -1.0173187255859375, -0.96636962890625, -0.9154205322265625, -0.864471435546875, -0.8135223388671875, -0.7625732421875, -0.7116241455078125, -0.660675048828125, -0.6097259521484375, -0.55877685546875, -0.5078277587890625, -0.456878662109375, -0.4059295654296875, -0.35498046875, -0.3040313720703125, -0.253082275390625, -0.2021331787109375, -0.15118408203125, -0.1002349853515625, -0.049285888671875, 0.0016632080078125, 0.0526123046875, 0.1035614013671875, 0.154510498046875, 0.2054595947265625, 0.25640869140625, 0.3073577880859375, 0.358306884765625, 0.4092559814453125, 0.460205078125, 0.5111541748046875, 0.562103271484375, 0.6130523681640625, 0.66400146484375, 0.7149505615234375, 0.765899658203125, 0.8168487548828125, 0.8677978515625, 0.9187469482421875, 0.969696044921875, 1.0206451416015625, 1.07159423828125, 1.1225433349609375, 1.173492431640625, 1.2244415283203125, 1.275390625]}, "gradients/encoder.encoder.layers.5.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 3.0, 3.0, 6.0, 9.0, 10.0, 6.0, 12.0, 18.0, 35.0, 48.0, 69.0, 114.0, 304.0, 598.0, 951.0, 865.0, 477.0, 239.0, 106.0, 68.0, 44.0, 34.0, 19.0, 11.0, 12.0, 8.0, 6.0, 3.0, 1.0, 2.0, 2.0, 0.0, 3.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.4091796875, -1.3698883056640625, -1.330596923828125, -1.2913055419921875, -1.25201416015625, -1.2127227783203125, -1.173431396484375, -1.1341400146484375, -1.0948486328125, -1.0555572509765625, -1.016265869140625, -0.9769744873046875, -0.93768310546875, -0.8983917236328125, -0.859100341796875, -0.8198089599609375, -0.780517578125, -0.7412261962890625, -0.701934814453125, -0.6626434326171875, -0.62335205078125, -0.5840606689453125, -0.544769287109375, -0.5054779052734375, -0.4661865234375, -0.4268951416015625, -0.387603759765625, -0.3483123779296875, -0.30902099609375, -0.2697296142578125, -0.230438232421875, -0.1911468505859375, -0.15185546875, -0.1125640869140625, -0.073272705078125, -0.0339813232421875, 0.00531005859375, 0.0446014404296875, 0.083892822265625, 0.1231842041015625, 0.1624755859375, 0.2017669677734375, 0.241058349609375, 0.2803497314453125, 0.31964111328125, 0.3589324951171875, 0.398223876953125, 0.4375152587890625, 0.476806640625, 0.5160980224609375, 0.555389404296875, 0.5946807861328125, 0.63397216796875, 0.6732635498046875, 0.712554931640625, 0.7518463134765625, 0.7911376953125, 0.8304290771484375, 0.869720458984375, 0.9090118408203125, 0.94830322265625, 0.9875946044921875, 1.026885986328125, 1.0661773681640625, 1.10546875]}, "gradients/encoder.encoder.layers.5.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 2.0, 1.0, 0.0, 2.0, 4.0, 10.0, 21.0, 62.0, 183.0, 344.0, 236.0, 103.0, 19.0, 9.0, 3.0, 6.0, 1.0, 1.0, 2.0, 2.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-12.195302963256836, -11.739367485046387, -11.283432960510254, -10.827497482299805, -10.371562957763672, -9.915627479553223, -9.45969295501709, -9.00375747680664, -8.547822952270508, -8.091887474060059, -7.635952949523926, -7.180017948150635, -6.724082946777344, -6.2681474685668945, -5.812212944030762, -5.3562774658203125, -4.9003424644470215, -4.4444074630737305, -3.9884724617004395, -3.5325374603271484, -3.0766024589538574, -2.6206672191619873, -2.1647322177886963, -1.7087972164154053, -1.2528622150421143, -0.7969272136688232, -0.34099215269088745, 0.11494290828704834, 0.5708779096603394, 1.02681303024292, 1.482748031616211, 1.938683032989502, 2.394618034362793, 2.850553035736084, 3.306488037109375, 3.762423038482666, 4.218358039855957, 4.674293518066406, 5.130228042602539, 5.586163520812988, 6.042098045349121, 6.498033046722412, 6.953968048095703, 7.409903049468994, 7.865838050842285, 8.321773529052734, 8.777708053588867, 9.233643531799316, 9.689579010009766, 10.145514488220215, 10.601449012756348, 11.057384490966797, 11.51331901550293, 11.969254493713379, 12.425189018249512, 12.881124496459961, 13.337059020996094, 13.792994499206543, 14.248929023742676, 14.704864501953125, 15.160799026489258, 15.616734504699707, 16.072669982910156, 16.52860450744629, 16.984539031982422]}, "gradients/encoder.encoder.layers.5.final_layer_norm.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0, 1.0, 3.0, 3.0, 6.0, 6.0, 5.0, 6.0, 10.0, 6.0, 19.0, 11.0, 14.0, 19.0, 27.0, 28.0, 43.0, 41.0, 42.0, 56.0, 58.0, 49.0, 66.0, 58.0, 54.0, 55.0, 50.0, 49.0, 40.0, 36.0, 25.0, 17.0, 21.0, 22.0, 14.0, 9.0, 12.0, 8.0, 9.0, 4.0, 2.0, 2.0, 2.0, 4.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.975935935974121, -4.809560775756836, -4.643185615539551, -4.476810932159424, -4.310435771942139, -4.1440606117248535, -3.9776854515075684, -3.811310291290283, -3.644935369491577, -3.478560209274292, -3.312185287475586, -3.145810127258301, -2.9794349670410156, -2.8130600452423096, -2.6466848850250244, -2.4803099632263184, -2.313934803009033, -2.147559642791748, -1.981184720993042, -1.8148095607757568, -1.6484345197677612, -1.4820594787597656, -1.3156843185424805, -1.1493092775344849, -0.9829342365264893, -0.8165591955184937, -0.6501840949058533, -0.4838090240955353, -0.3174339532852173, -0.15105891227722168, 0.015316188335418701, 0.18169128894805908, 0.3480663299560547, 0.5144413709640503, 0.6808164715766907, 0.847191572189331, 1.0135666131973267, 1.1799416542053223, 1.3463168144226074, 1.512691855430603, 1.6790668964385986, 1.8454419374465942, 2.01181697845459, 2.178192138671875, 2.34456729888916, 2.510942220687866, 2.6773173809051514, 2.8436923027038574, 3.0100674629211426, 3.1764426231384277, 3.342817544937134, 3.509192705154419, 3.675567626953125, 3.84194278717041, 4.008317947387695, 4.1746931076049805, 4.341068267822266, 4.507443428039551, 4.673818588256836, 4.840193271636963, 5.006568431854248, 5.172943592071533, 5.339318752288818, 5.5056939125061035, 5.6720685958862305]}, "gradients/encoder.encoder.layers.5.attention.out_proj.weight": {"_type": "histogram", "values": [3.0, 0.0, 0.0, 0.0, 0.0, 3.0, 1.0, 3.0, 6.0, 5.0, 15.0, 15.0, 14.0, 28.0, 41.0, 56.0, 69.0, 127.0, 186.0, 243.0, 385.0, 681.0, 1165.0, 2219.0, 4604.0, 9824.0, 23457.0, 59631.0, 154248.0, 312223.0, 275130.0, 122370.0, 46872.0, 18565.0, 8027.0, 3794.0, 1885.0, 1036.0, 545.0, 389.0, 225.0, 148.0, 104.0, 65.0, 48.0, 30.0, 37.0, 17.0, 8.0, 8.0, 9.0, 3.0, 2.0, 2.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.603515625, -0.58331298828125, -0.5631103515625, -0.54290771484375, -0.522705078125, -0.50250244140625, -0.4822998046875, -0.46209716796875, -0.44189453125, -0.42169189453125, -0.4014892578125, -0.38128662109375, -0.361083984375, -0.34088134765625, -0.3206787109375, -0.30047607421875, -0.2802734375, -0.26007080078125, -0.2398681640625, -0.21966552734375, -0.199462890625, -0.17926025390625, -0.1590576171875, -0.13885498046875, -0.11865234375, -0.09844970703125, -0.0782470703125, -0.05804443359375, -0.037841796875, -0.01763916015625, 0.0025634765625, 0.02276611328125, 0.04296875, 0.06317138671875, 0.0833740234375, 0.10357666015625, 0.123779296875, 0.14398193359375, 0.1641845703125, 0.18438720703125, 0.20458984375, 0.22479248046875, 0.2449951171875, 0.26519775390625, 0.285400390625, 0.30560302734375, 0.3258056640625, 0.34600830078125, 0.3662109375, 0.38641357421875, 0.4066162109375, 0.42681884765625, 0.447021484375, 0.46722412109375, 0.4874267578125, 0.50762939453125, 0.52783203125, 0.54803466796875, 0.5682373046875, 0.58843994140625, 0.608642578125, 0.62884521484375, 0.6490478515625, 0.66925048828125, 0.689453125]}, "gradients/encoder.encoder.layers.5.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 3.0, 5.0, 7.0, 7.0, 16.0, 25.0, 26.0, 42.0, 50.0, 67.0, 72.0, 92.0, 97.0, 104.0, 76.0, 79.0, 63.0, 52.0, 49.0, 30.0, 14.0, 14.0, 4.0, 4.0, 5.0, 0.0, 5.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.87060546875, -0.8449172973632812, -0.8192291259765625, -0.7935409545898438, -0.767852783203125, -0.7421646118164062, -0.7164764404296875, -0.6907882690429688, -0.66510009765625, -0.6394119262695312, -0.6137237548828125, -0.5880355834960938, -0.562347412109375, -0.5366592407226562, -0.5109710693359375, -0.48528289794921875, -0.4595947265625, -0.43390655517578125, -0.4082183837890625, -0.38253021240234375, -0.356842041015625, -0.33115386962890625, -0.3054656982421875, -0.27977752685546875, -0.25408935546875, -0.22840118408203125, -0.2027130126953125, -0.17702484130859375, -0.151336669921875, -0.12564849853515625, -0.0999603271484375, -0.07427215576171875, -0.048583984375, -0.02289581298828125, 0.0027923583984375, 0.02848052978515625, 0.054168701171875, 0.07985687255859375, 0.1055450439453125, 0.13123321533203125, 0.15692138671875, 0.18260955810546875, 0.2082977294921875, 0.23398590087890625, 0.259674072265625, 0.28536224365234375, 0.3110504150390625, 0.33673858642578125, 0.3624267578125, 0.38811492919921875, 0.4138031005859375, 0.43949127197265625, 0.465179443359375, 0.49086761474609375, 0.5165557861328125, 0.5422439575195312, 0.56793212890625, 0.5936203002929688, 0.6193084716796875, 0.6449966430664062, 0.670684814453125, 0.6963729858398438, 0.7220611572265625, 0.7477493286132812, 0.7734375]}, "gradients/encoder.encoder.layers.5.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 1.0, 0.0, 2.0, 1.0, 2.0, 2.0, 0.0, 5.0, 1.0, 8.0, 6.0, 15.0, 16.0, 13.0, 27.0, 31.0, 47.0, 59.0, 79.0, 130.0, 169.0, 242.0, 292.0, 478.0, 730.0, 1209.0, 2458.0, 7311.0, 42912.0, 551185.0, 392141.0, 36717.0, 6575.0, 2326.0, 1181.0, 627.0, 443.0, 316.0, 222.0, 146.0, 115.0, 83.0, 61.0, 39.0, 39.0, 28.0, 16.0, 20.0, 11.0, 7.0, 10.0, 5.0, 6.0, 1.0, 5.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-1.5517578125, -1.5046844482421875, -1.457611083984375, -1.4105377197265625, -1.36346435546875, -1.3163909912109375, -1.269317626953125, -1.2222442626953125, -1.1751708984375, -1.1280975341796875, -1.081024169921875, -1.0339508056640625, -0.98687744140625, -0.9398040771484375, -0.892730712890625, -0.8456573486328125, -0.798583984375, -0.7515106201171875, -0.704437255859375, -0.6573638916015625, -0.61029052734375, -0.5632171630859375, -0.516143798828125, -0.4690704345703125, -0.4219970703125, -0.3749237060546875, -0.327850341796875, -0.2807769775390625, -0.23370361328125, -0.1866302490234375, -0.139556884765625, -0.0924835205078125, -0.04541015625, 0.0016632080078125, 0.048736572265625, 0.0958099365234375, 0.14288330078125, 0.1899566650390625, 0.237030029296875, 0.2841033935546875, 0.3311767578125, 0.3782501220703125, 0.425323486328125, 0.4723968505859375, 0.51947021484375, 0.5665435791015625, 0.613616943359375, 0.6606903076171875, 0.707763671875, 0.7548370361328125, 0.801910400390625, 0.8489837646484375, 0.89605712890625, 0.9431304931640625, 0.990203857421875, 1.0372772216796875, 1.0843505859375, 1.1314239501953125, 1.178497314453125, 1.2255706787109375, 1.27264404296875, 1.3197174072265625, 1.366790771484375, 1.4138641357421875, 1.4609375]}, "gradients/encoder.encoder.layers.5.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 5.0, 3.0, 8.0, 7.0, 9.0, 5.0, 13.0, 12.0, 7.0, 11.0, 16.0, 20.0, 18.0, 14.0, 25.0, 32.0, 33.0, 33.0, 49.0, 58.0, 43.0, 43.0, 39.0, 42.0, 49.0, 45.0, 48.0, 37.0, 38.0, 39.0, 27.0, 17.0, 24.0, 17.0, 20.0, 19.0, 14.0, 16.0, 13.0, 7.0, 6.0, 10.0, 2.0, 8.0, 4.0, 2.0, 1.0, 5.0, 1.0, 1.0, 0.0, 3.0, 1.0], "bins": [-1.94140625, -1.8848114013671875, -1.828216552734375, -1.7716217041015625, -1.71502685546875, -1.6584320068359375, -1.601837158203125, -1.5452423095703125, -1.4886474609375, -1.4320526123046875, -1.375457763671875, -1.3188629150390625, -1.26226806640625, -1.2056732177734375, -1.149078369140625, -1.0924835205078125, -1.035888671875, -0.9792938232421875, -0.922698974609375, -0.8661041259765625, -0.80950927734375, -0.7529144287109375, -0.696319580078125, -0.6397247314453125, -0.5831298828125, -0.5265350341796875, -0.469940185546875, -0.4133453369140625, -0.35675048828125, -0.3001556396484375, -0.243560791015625, -0.1869659423828125, -0.13037109375, -0.0737762451171875, -0.017181396484375, 0.0394134521484375, 0.09600830078125, 0.1526031494140625, 0.209197998046875, 0.2657928466796875, 0.3223876953125, 0.3789825439453125, 0.435577392578125, 0.4921722412109375, 0.54876708984375, 0.6053619384765625, 0.661956787109375, 0.7185516357421875, 0.775146484375, 0.8317413330078125, 0.888336181640625, 0.9449310302734375, 1.00152587890625, 1.0581207275390625, 1.114715576171875, 1.1713104248046875, 1.2279052734375, 1.2845001220703125, 1.341094970703125, 1.3976898193359375, 1.45428466796875, 1.5108795166015625, 1.567474365234375, 1.6240692138671875, 1.6806640625]}, "gradients/encoder.encoder.layers.5.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 5.0, 3.0, 2.0, 5.0, 5.0, 7.0, 18.0, 14.0, 28.0, 38.0, 52.0, 99.0, 179.0, 271.0, 678.0, 1843.0, 5841.0, 33848.0, 668913.0, 307529.0, 22064.0, 4465.0, 1363.0, 621.0, 317.0, 133.0, 79.0, 44.0, 37.0, 21.0, 18.0, 3.0, 10.0, 6.0, 4.0, 2.0, 1.0, 0.0, 2.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.59912109375, -0.5809097290039062, -0.5626983642578125, -0.5444869995117188, -0.526275634765625, -0.5080642700195312, -0.4898529052734375, -0.47164154052734375, -0.45343017578125, -0.43521881103515625, -0.4170074462890625, -0.39879608154296875, -0.380584716796875, -0.36237335205078125, -0.3441619873046875, -0.32595062255859375, -0.3077392578125, -0.28952789306640625, -0.2713165283203125, -0.25310516357421875, -0.234893798828125, -0.21668243408203125, -0.1984710693359375, -0.18025970458984375, -0.16204833984375, -0.14383697509765625, -0.1256256103515625, -0.10741424560546875, -0.089202880859375, -0.07099151611328125, -0.0527801513671875, -0.03456878662109375, -0.016357421875, 0.00185394287109375, 0.0200653076171875, 0.03827667236328125, 0.056488037109375, 0.07469940185546875, 0.0929107666015625, 0.11112213134765625, 0.12933349609375, 0.14754486083984375, 0.1657562255859375, 0.18396759033203125, 0.202178955078125, 0.22039031982421875, 0.2386016845703125, 0.25681304931640625, 0.2750244140625, 0.29323577880859375, 0.3114471435546875, 0.32965850830078125, 0.347869873046875, 0.36608123779296875, 0.3842926025390625, 0.40250396728515625, 0.42071533203125, 0.43892669677734375, 0.4571380615234375, 0.47534942626953125, 0.493560791015625, 0.5117721557617188, 0.5299835205078125, 0.5481948852539062, 0.56640625]}, "gradients/encoder.encoder.layers.5.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 3.0, 2.0, 3.0, 4.0, 7.0, 6.0, 9.0, 5.0, 18.0, 23.0, 19.0, 35.0, 38.0, 49.0, 63.0, 91.0, 108.0, 118.0, 79.0, 84.0, 59.0, 33.0, 41.0, 19.0, 22.0, 19.0, 22.0, 12.0, 8.0, 2.0, 2.0, 4.0, 1.0, 2.0, 1.0, 3.0, 2.0, 1.0, 0.0, 1.0, 2.0], "bins": [-0.00011360645294189453, -0.00011077430099248886, -0.00010794214904308319, -0.00010510999709367752, -0.00010227784514427185, -9.944569319486618e-05, -9.661354124546051e-05, -9.378138929605484e-05, -9.094923734664917e-05, -8.81170853972435e-05, -8.528493344783783e-05, -8.245278149843216e-05, -7.962062954902649e-05, -7.678847759962082e-05, -7.395632565021515e-05, -7.112417370080948e-05, -6.829202175140381e-05, -6.545986980199814e-05, -6.262771785259247e-05, -5.97955659031868e-05, -5.696341395378113e-05, -5.413126200437546e-05, -5.129911005496979e-05, -4.846695810556412e-05, -4.563480615615845e-05, -4.280265420675278e-05, -3.997050225734711e-05, -3.713835030794144e-05, -3.4306198358535767e-05, -3.1474046409130096e-05, -2.8641894459724426e-05, -2.5809742510318756e-05, -2.2977590560913086e-05, -2.0145438611507416e-05, -1.7313286662101746e-05, -1.4481134712696075e-05, -1.1648982763290405e-05, -8.816830813884735e-06, -5.984678864479065e-06, -3.1525269150733948e-06, -3.203749656677246e-07, 2.5117769837379456e-06, 5.343928933143616e-06, 8.176080882549286e-06, 1.1008232831954956e-05, 1.3840384781360626e-05, 1.6672536730766296e-05, 1.9504688680171967e-05, 2.2336840629577637e-05, 2.5168992578983307e-05, 2.8001144528388977e-05, 3.083329647779465e-05, 3.366544842720032e-05, 3.649760037660599e-05, 3.932975232601166e-05, 4.216190427541733e-05, 4.4994056224823e-05, 4.782620817422867e-05, 5.065836012363434e-05, 5.349051207304001e-05, 5.632266402244568e-05, 5.915481597185135e-05, 6.198696792125702e-05, 6.481911987066269e-05, 6.765127182006836e-05]}, "gradients/encoder.encoder.layers.5.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0, 2.0, 1.0, 1.0, 4.0, 6.0, 2.0, 5.0, 13.0, 18.0, 37.0, 44.0, 70.0, 112.0, 207.0, 378.0, 710.0, 1893.0, 6822.0, 56294.0, 863266.0, 104752.0, 9680.0, 2350.0, 883.0, 421.0, 226.0, 137.0, 75.0, 51.0, 43.0, 21.0, 12.0, 13.0, 6.0, 3.0, 2.0, 1.0, 2.0, 1.0, 4.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.68994140625, -0.670562744140625, -0.65118408203125, -0.631805419921875, -0.6124267578125, -0.593048095703125, -0.57366943359375, -0.554290771484375, -0.534912109375, -0.515533447265625, -0.49615478515625, -0.476776123046875, -0.4573974609375, -0.438018798828125, -0.41864013671875, -0.399261474609375, -0.3798828125, -0.360504150390625, -0.34112548828125, -0.321746826171875, -0.3023681640625, -0.282989501953125, -0.26361083984375, -0.244232177734375, -0.224853515625, -0.205474853515625, -0.18609619140625, -0.166717529296875, -0.1473388671875, -0.127960205078125, -0.10858154296875, -0.089202880859375, -0.06982421875, -0.050445556640625, -0.03106689453125, -0.011688232421875, 0.0076904296875, 0.027069091796875, 0.04644775390625, 0.065826416015625, 0.085205078125, 0.104583740234375, 0.12396240234375, 0.143341064453125, 0.1627197265625, 0.182098388671875, 0.20147705078125, 0.220855712890625, 0.240234375, 0.259613037109375, 0.27899169921875, 0.298370361328125, 0.3177490234375, 0.337127685546875, 0.35650634765625, 0.375885009765625, 0.395263671875, 0.414642333984375, 0.43402099609375, 0.453399658203125, 0.4727783203125, 0.492156982421875, 0.51153564453125, 0.530914306640625, 0.55029296875]}, "gradients/encoder.encoder.layers.5.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 2.0, 3.0, 4.0, 5.0, 3.0, 7.0, 9.0, 12.0, 18.0, 19.0, 25.0, 34.0, 47.0, 64.0, 98.0, 107.0, 104.0, 107.0, 108.0, 59.0, 47.0, 31.0, 26.0, 15.0, 18.0, 9.0, 8.0, 9.0, 5.0, 2.0, 3.0, 2.0, 2.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.6044921875, -0.5864791870117188, -0.5684661865234375, -0.5504531860351562, -0.532440185546875, -0.5144271850585938, -0.4964141845703125, -0.47840118408203125, -0.46038818359375, -0.44237518310546875, -0.4243621826171875, -0.40634918212890625, -0.388336181640625, -0.37032318115234375, -0.3523101806640625, -0.33429718017578125, -0.3162841796875, -0.29827117919921875, -0.2802581787109375, -0.26224517822265625, -0.244232177734375, -0.22621917724609375, -0.2082061767578125, -0.19019317626953125, -0.17218017578125, -0.15416717529296875, -0.1361541748046875, -0.11814117431640625, -0.100128173828125, -0.08211517333984375, -0.0641021728515625, -0.04608917236328125, -0.028076171875, -0.01006317138671875, 0.0079498291015625, 0.02596282958984375, 0.043975830078125, 0.06198883056640625, 0.0800018310546875, 0.09801483154296875, 0.11602783203125, 0.13404083251953125, 0.1520538330078125, 0.17006683349609375, 0.188079833984375, 0.20609283447265625, 0.2241058349609375, 0.24211883544921875, 0.2601318359375, 0.27814483642578125, 0.2961578369140625, 0.31417083740234375, 0.332183837890625, 0.35019683837890625, 0.3682098388671875, 0.38622283935546875, 0.40423583984375, 0.42224884033203125, 0.4402618408203125, 0.45827484130859375, 0.476287841796875, 0.49430084228515625, 0.5123138427734375, 0.5303268432617188, 0.54833984375]}, "gradients/encoder.encoder.layers.5.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 3.0, 1.0, 0.0, 2.0, 3.0, 2.0, 2.0, 4.0, 6.0, 7.0, 39.0, 77.0, 212.0, 346.0, 185.0, 66.0, 29.0, 18.0, 5.0, 4.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-22.19564437866211, -21.73833465576172, -21.281023025512695, -20.823713302612305, -20.36640167236328, -19.90909194946289, -19.451780319213867, -18.994470596313477, -18.537158966064453, -18.079849243164062, -17.62253761291504, -17.16522789001465, -16.707916259765625, -16.250606536865234, -15.793295860290527, -15.33598518371582, -14.87867546081543, -14.421364784240723, -13.964054107666016, -13.506743431091309, -13.049432754516602, -12.592123031616211, -12.134812355041504, -11.677501678466797, -11.22019100189209, -10.762880325317383, -10.305569648742676, -9.848258972167969, -9.390949249267578, -8.933637619018555, -8.476327896118164, -8.019017219543457, -7.561707019805908, -7.104396343231201, -6.647086143493652, -6.189775466918945, -5.732464790344238, -5.275154113769531, -4.817843437194824, -4.360533237457275, -3.9032225608825684, -3.4459118843078613, -2.9886014461517334, -2.5312910079956055, -2.0739803314208984, -1.6166696548461914, -1.1593592166900635, -0.7020487785339355, -0.24473810195922852, 0.21257245540618896, 0.6698830127716064, 1.127193570137024, 1.5845041275024414, 2.0418148040771484, 2.4991252422332764, 2.9564356803894043, 3.4137463569641113, 3.8710570335388184, 4.328367233276367, 4.785677909851074, 5.242988586425781, 5.700299263000488, 6.157609939575195, 6.614920139312744, 7.072230815887451]}, "gradients/encoder.encoder.layers.5.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 4.0, 1.0, 5.0, 1.0, 3.0, 0.0, 4.0, 7.0, 11.0, 10.0, 7.0, 17.0, 18.0, 14.0, 15.0, 15.0, 18.0, 33.0, 32.0, 32.0, 24.0, 41.0, 39.0, 45.0, 53.0, 84.0, 53.0, 62.0, 53.0, 41.0, 34.0, 30.0, 36.0, 28.0, 21.0, 24.0, 15.0, 21.0, 12.0, 9.0, 8.0, 9.0, 7.0, 5.0, 2.0, 6.0, 2.0, 1.0, 3.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.471921920776367, -6.248087406158447, -6.024252891540527, -5.800418376922607, -5.5765838623046875, -5.352749824523926, -5.128914833068848, -4.905080795288086, -4.681246280670166, -4.457411766052246, -4.233577251434326, -4.009742736816406, -3.7859084606170654, -3.5620739459991455, -3.3382394313812256, -3.1144051551818848, -2.8905704021453857, -2.666735887527466, -2.442901372909546, -2.219067096710205, -1.9952325820922852, -1.7713980674743652, -1.5475635528564453, -1.323729157447815, -1.099894642829895, -0.8760601878166199, -0.6522257328033447, -0.4283912181854248, -0.20455676317214966, 0.01927769184112549, 0.2431122064590454, 0.4669466018676758, 0.6907811164855957, 0.9146155714988708, 1.138450026512146, 1.362284541130066, 1.5861189365386963, 1.8099534511566162, 2.033787965774536, 2.257622241973877, 2.481456756591797, 2.705291271209717, 2.9291257858276367, 3.1529603004455566, 3.3767945766448975, 3.6006290912628174, 3.8244636058807373, 4.048297882080078, 4.272132873535156, 4.495967388153076, 4.719801902770996, 4.943636417388916, 5.167470932006836, 5.391304969787598, 5.615139961242676, 5.8389739990234375, 6.062808513641357, 6.286643028259277, 6.510477542877197, 6.734312057495117, 6.958146572113037, 7.181981086730957, 7.405815124511719, 7.629649639129639, 7.853484153747559]}, "gradients/encoder.encoder.layers.4.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 1.0, 6.0, 3.0, 5.0, 7.0, 13.0, 9.0, 24.0, 43.0, 73.0, 122.0, 225.0, 473.0, 1012.0, 2842.0, 9793.0, 48000.0, 508167.0, 3087990.0, 477593.0, 44336.0, 8839.0, 2690.0, 976.0, 463.0, 244.0, 146.0, 66.0, 45.0, 30.0, 14.0, 18.0, 9.0, 4.0, 2.0, 2.0, 2.0, 0.0, 2.0, 3.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.73779296875, -0.7170028686523438, -0.6962127685546875, -0.6754226684570312, -0.654632568359375, -0.6338424682617188, -0.6130523681640625, -0.5922622680664062, -0.57147216796875, -0.5506820678710938, -0.5298919677734375, -0.5091018676757812, -0.488311767578125, -0.46752166748046875, -0.4467315673828125, -0.42594146728515625, -0.4051513671875, -0.38436126708984375, -0.3635711669921875, -0.34278106689453125, -0.321990966796875, -0.30120086669921875, -0.2804107666015625, -0.25962066650390625, -0.23883056640625, -0.21804046630859375, -0.1972503662109375, -0.17646026611328125, -0.155670166015625, -0.13488006591796875, -0.1140899658203125, -0.09329986572265625, -0.072509765625, -0.05171966552734375, -0.0309295654296875, -0.01013946533203125, 0.010650634765625, 0.03144073486328125, 0.0522308349609375, 0.07302093505859375, 0.09381103515625, 0.11460113525390625, 0.1353912353515625, 0.15618133544921875, 0.176971435546875, 0.19776153564453125, 0.2185516357421875, 0.23934173583984375, 0.2601318359375, 0.28092193603515625, 0.3017120361328125, 0.32250213623046875, 0.343292236328125, 0.36408233642578125, 0.3848724365234375, 0.40566253662109375, 0.42645263671875, 0.44724273681640625, 0.4680328369140625, 0.48882293701171875, 0.509613037109375, 0.5304031372070312, 0.5511932373046875, 0.5719833374023438, 0.5927734375]}, "gradients/encoder.encoder.layers.4.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 3.0, 3.0, 4.0, 5.0, 9.0, 4.0, 11.0, 30.0, 22.0, 39.0, 53.0, 66.0, 77.0, 90.0, 85.0, 105.0, 75.0, 84.0, 63.0, 54.0, 40.0, 30.0, 19.0, 14.0, 8.0, 7.0, 1.0, 8.0, 2.0, 0.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.87548828125, -0.8504791259765625, -0.825469970703125, -0.8004608154296875, -0.77545166015625, -0.7504425048828125, -0.725433349609375, -0.7004241943359375, -0.6754150390625, -0.6504058837890625, -0.625396728515625, -0.6003875732421875, -0.57537841796875, -0.5503692626953125, -0.525360107421875, -0.5003509521484375, -0.475341796875, -0.4503326416015625, -0.425323486328125, -0.4003143310546875, -0.37530517578125, -0.3502960205078125, -0.325286865234375, -0.3002777099609375, -0.2752685546875, -0.2502593994140625, -0.225250244140625, -0.2002410888671875, -0.17523193359375, -0.1502227783203125, -0.125213623046875, -0.1002044677734375, -0.0751953125, -0.0501861572265625, -0.025177001953125, -0.0001678466796875, 0.02484130859375, 0.0498504638671875, 0.074859619140625, 0.0998687744140625, 0.1248779296875, 0.1498870849609375, 0.174896240234375, 0.1999053955078125, 0.22491455078125, 0.2499237060546875, 0.274932861328125, 0.2999420166015625, 0.324951171875, 0.3499603271484375, 0.374969482421875, 0.3999786376953125, 0.42498779296875, 0.4499969482421875, 0.475006103515625, 0.5000152587890625, 0.5250244140625, 0.5500335693359375, 0.575042724609375, 0.6000518798828125, 0.62506103515625, 0.6500701904296875, 0.675079345703125, 0.7000885009765625, 0.72509765625]}, "gradients/encoder.encoder.layers.4.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 2.0, 3.0, 4.0, 6.0, 10.0, 36.0, 55.0, 91.0, 185.0, 532.0, 2272.0, 25967.0, 3783608.0, 371839.0, 8048.0, 1125.0, 310.0, 107.0, 47.0, 24.0, 9.0, 8.0, 5.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.1953125, -2.1372222900390625, -2.079132080078125, -2.0210418701171875, -1.96295166015625, -1.9048614501953125, -1.846771240234375, -1.7886810302734375, -1.7305908203125, -1.6725006103515625, -1.614410400390625, -1.5563201904296875, -1.49822998046875, -1.4401397705078125, -1.382049560546875, -1.3239593505859375, -1.265869140625, -1.2077789306640625, -1.149688720703125, -1.0915985107421875, -1.03350830078125, -0.9754180908203125, -0.917327880859375, -0.8592376708984375, -0.8011474609375, -0.7430572509765625, -0.684967041015625, -0.6268768310546875, -0.56878662109375, -0.5106964111328125, -0.452606201171875, -0.3945159912109375, -0.33642578125, -0.2783355712890625, -0.220245361328125, -0.1621551513671875, -0.10406494140625, -0.0459747314453125, 0.012115478515625, 0.0702056884765625, 0.1282958984375, 0.1863861083984375, 0.244476318359375, 0.3025665283203125, 0.36065673828125, 0.4187469482421875, 0.476837158203125, 0.5349273681640625, 0.593017578125, 0.6511077880859375, 0.709197998046875, 0.7672882080078125, 0.82537841796875, 0.8834686279296875, 0.941558837890625, 0.9996490478515625, 1.0577392578125, 1.1158294677734375, 1.173919677734375, 1.2320098876953125, 1.29010009765625, 1.3481903076171875, 1.406280517578125, 1.4643707275390625, 1.5224609375]}, "gradients/encoder.encoder.layers.4.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 2.0, 2.0, 3.0, 4.0, 6.0, 2.0, 8.0, 15.0, 19.0, 26.0, 53.0, 88.0, 205.0, 520.0, 1063.0, 1024.0, 485.0, 255.0, 124.0, 65.0, 42.0, 24.0, 17.0, 13.0, 6.0, 6.0, 2.0, 2.0, 0.0, 2.0, 2.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-1.6474609375, -1.6063766479492188, -1.5652923583984375, -1.5242080688476562, -1.483123779296875, -1.4420394897460938, -1.4009552001953125, -1.3598709106445312, -1.31878662109375, -1.2777023315429688, -1.2366180419921875, -1.1955337524414062, -1.154449462890625, -1.1133651733398438, -1.0722808837890625, -1.0311965942382812, -0.9901123046875, -0.9490280151367188, -0.9079437255859375, -0.8668594360351562, -0.825775146484375, -0.7846908569335938, -0.7436065673828125, -0.7025222778320312, -0.66143798828125, -0.6203536987304688, -0.5792694091796875, -0.5381851196289062, -0.497100830078125, -0.45601654052734375, -0.4149322509765625, -0.37384796142578125, -0.332763671875, -0.29167938232421875, -0.2505950927734375, -0.20951080322265625, -0.168426513671875, -0.12734222412109375, -0.0862579345703125, -0.04517364501953125, -0.00408935546875, 0.03699493408203125, 0.0780792236328125, 0.11916351318359375, 0.160247802734375, 0.20133209228515625, 0.2424163818359375, 0.28350067138671875, 0.3245849609375, 0.36566925048828125, 0.4067535400390625, 0.44783782958984375, 0.488922119140625, 0.5300064086914062, 0.5710906982421875, 0.6121749877929688, 0.65325927734375, 0.6943435668945312, 0.7354278564453125, 0.7765121459960938, 0.817596435546875, 0.8586807250976562, 0.8997650146484375, 0.9408493041992188, 0.98193359375]}, "gradients/encoder.encoder.layers.4.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 0.0, 2.0, 5.0, 13.0, 44.0, 79.0, 154.0, 239.0, 242.0, 118.0, 38.0, 30.0, 7.0, 10.0, 5.0, 3.0, 2.0, 2.0, 3.0, 5.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0], "bins": [-9.334019660949707, -9.047407150268555, -8.760793685913086, -8.474181175231934, -8.187568664550781, -7.900955677032471, -7.61434268951416, -7.327730178833008, -7.041117191314697, -6.754504203796387, -6.467891693115234, -6.181278705596924, -5.894665718078613, -5.608053207397461, -5.32144021987915, -5.03482723236084, -4.7482147216796875, -4.461601734161377, -4.174989223480225, -3.888376235961914, -3.6017634868621826, -3.315150737762451, -3.0285377502441406, -2.741925001144409, -2.4553122520446777, -2.1686995029449463, -1.8820866346359253, -1.5954737663269043, -1.3088610172271729, -1.0222482681274414, -0.7356353998184204, -0.4490225315093994, -0.16241073608398438, 0.12420207262039185, 0.41081488132476807, 0.6974276900291443, 0.9840404987335205, 1.270653247833252, 1.557266116142273, 1.843878984451294, 2.1304917335510254, 2.417104482650757, 2.7037172317504883, 2.990330219268799, 3.2769429683685303, 3.5635557174682617, 3.8501687049865723, 4.136781692504883, 4.423394203186035, 4.710007190704346, 4.996619701385498, 5.283232688903809, 5.569845199584961, 5.8564581871032715, 6.143071174621582, 6.429683685302734, 6.716296672821045, 7.0029096603393555, 7.289522171020508, 7.576135158538818, 7.862748146057129, 8.149360656738281, 8.435973167419434, 8.722586631774902, 9.009199142456055]}, "gradients/encoder.encoder.layers.4.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 3.0, 1.0, 2.0, 3.0, 3.0, 10.0, 5.0, 7.0, 20.0, 14.0, 16.0, 16.0, 19.0, 28.0, 28.0, 37.0, 35.0, 44.0, 38.0, 46.0, 47.0, 62.0, 51.0, 46.0, 55.0, 44.0, 48.0, 38.0, 40.0, 30.0, 29.0, 19.0, 32.0, 19.0, 12.0, 11.0, 16.0, 4.0, 10.0, 3.0, 6.0, 8.0, 2.0, 5.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-4.3766865730285645, -4.241964340209961, -4.107242107391357, -3.972520112991333, -3.8377978801727295, -3.703075647354126, -3.5683536529541016, -3.433631420135498, -3.2989091873168945, -3.164186954498291, -3.0294647216796875, -2.894742727279663, -2.7600204944610596, -2.625298261642456, -2.4905762672424316, -2.355854034423828, -2.2211318016052246, -2.086409568786621, -1.9516874551773071, -1.8169653415679932, -1.6822431087493896, -1.5475208759307861, -1.4127987623214722, -1.2780766487121582, -1.1433544158935547, -1.0086321830749512, -0.8739100694656372, -0.7391878962516785, -0.6044657230377197, -0.469743549823761, -0.33502137660980225, -0.2002992033958435, -0.06557655334472656, 0.06914561986923218, 0.20386779308319092, 0.33858996629714966, 0.4733121395111084, 0.6080343127250671, 0.7427564859390259, 0.8774786591529846, 1.0122008323669434, 1.1469230651855469, 1.2816451787948608, 1.4163672924041748, 1.5510895252227783, 1.6858117580413818, 1.8205338716506958, 1.9552559852600098, 2.0899782180786133, 2.224700450897217, 2.3594226837158203, 2.4941446781158447, 2.6288669109344482, 2.7635891437530518, 2.898311138153076, 3.0330333709716797, 3.167755603790283, 3.3024778366088867, 3.4372000694274902, 3.5719220638275146, 3.706644296646118, 3.8413665294647217, 3.976088523864746, 4.11081075668335, 4.245532989501953]}, "gradients/encoder.encoder.layers.4.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 5.0, 1.0, 2.0, 9.0, 15.0, 20.0, 43.0, 45.0, 88.0, 160.0, 273.0, 607.0, 1091.0, 2639.0, 6787.0, 20665.0, 80607.0, 340930.0, 437857.0, 113879.0, 28102.0, 8621.0, 3290.0, 1401.0, 657.0, 323.0, 179.0, 101.0, 60.0, 38.0, 27.0, 15.0, 7.0, 9.0, 1.0, 6.0, 0.0, 2.0, 1.0, 0.0, 2.0, 0.0, 3.0, 0.0, 2.0], "bins": [-1.05859375, -1.0300750732421875, -1.001556396484375, -0.9730377197265625, -0.94451904296875, -0.9160003662109375, -0.887481689453125, -0.8589630126953125, -0.8304443359375, -0.8019256591796875, -0.773406982421875, -0.7448883056640625, -0.71636962890625, -0.6878509521484375, -0.659332275390625, -0.6308135986328125, -0.602294921875, -0.5737762451171875, -0.545257568359375, -0.5167388916015625, -0.48822021484375, -0.4597015380859375, -0.431182861328125, -0.4026641845703125, -0.3741455078125, -0.3456268310546875, -0.317108154296875, -0.2885894775390625, -0.26007080078125, -0.2315521240234375, -0.203033447265625, -0.1745147705078125, -0.14599609375, -0.1174774169921875, -0.088958740234375, -0.0604400634765625, -0.03192138671875, -0.0034027099609375, 0.025115966796875, 0.0536346435546875, 0.0821533203125, 0.1106719970703125, 0.139190673828125, 0.1677093505859375, 0.19622802734375, 0.2247467041015625, 0.253265380859375, 0.2817840576171875, 0.310302734375, 0.3388214111328125, 0.367340087890625, 0.3958587646484375, 0.42437744140625, 0.4528961181640625, 0.481414794921875, 0.5099334716796875, 0.5384521484375, 0.5669708251953125, 0.595489501953125, 0.6240081787109375, 0.65252685546875, 0.6810455322265625, 0.709564208984375, 0.7380828857421875, 0.7666015625]}, "gradients/encoder.encoder.layers.4.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 7.0, 4.0, 7.0, 9.0, 8.0, 19.0, 28.0, 28.0, 38.0, 54.0, 60.0, 82.0, 110.0, 78.0, 86.0, 85.0, 56.0, 83.0, 43.0, 48.0, 22.0, 21.0, 12.0, 9.0, 7.0, 3.0, 4.0, 0.0, 1.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.85400390625, -0.8294296264648438, -0.8048553466796875, -0.7802810668945312, -0.755706787109375, -0.7311325073242188, -0.7065582275390625, -0.6819839477539062, -0.65740966796875, -0.6328353881835938, -0.6082611083984375, -0.5836868286132812, -0.559112548828125, -0.5345382690429688, -0.5099639892578125, -0.48538970947265625, -0.4608154296875, -0.43624114990234375, -0.4116668701171875, -0.38709259033203125, -0.362518310546875, -0.33794403076171875, -0.3133697509765625, -0.28879547119140625, -0.26422119140625, -0.23964691162109375, -0.2150726318359375, -0.19049835205078125, -0.165924072265625, -0.14134979248046875, -0.1167755126953125, -0.09220123291015625, -0.067626953125, -0.04305267333984375, -0.0184783935546875, 0.00609588623046875, 0.030670166015625, 0.05524444580078125, 0.0798187255859375, 0.10439300537109375, 0.12896728515625, 0.15354156494140625, 0.1781158447265625, 0.20269012451171875, 0.227264404296875, 0.25183868408203125, 0.2764129638671875, 0.30098724365234375, 0.3255615234375, 0.35013580322265625, 0.3747100830078125, 0.39928436279296875, 0.423858642578125, 0.44843292236328125, 0.4730072021484375, 0.49758148193359375, 0.52215576171875, 0.5467300415039062, 0.5713043212890625, 0.5958786010742188, 0.620452880859375, 0.6450271606445312, 0.6696014404296875, 0.6941757202148438, 0.71875]}, "gradients/encoder.encoder.layers.4.attention.v_proj.weight": {"_type": "histogram", "values": [3.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 3.0, 6.0, 5.0, 12.0, 16.0, 26.0, 29.0, 29.0, 62.0, 72.0, 125.0, 215.0, 350.0, 558.0, 1205.0, 3399.0, 22804.0, 630962.0, 365553.0, 17389.0, 3058.0, 1160.0, 605.0, 341.0, 203.0, 130.0, 77.0, 53.0, 40.0, 32.0, 10.0, 9.0, 9.0, 5.0, 5.0, 1.0, 4.0, 2.0, 0.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.5732421875, -1.5101776123046875, -1.447113037109375, -1.3840484619140625, -1.32098388671875, -1.2579193115234375, -1.194854736328125, -1.1317901611328125, -1.0687255859375, -1.0056610107421875, -0.942596435546875, -0.8795318603515625, -0.81646728515625, -0.7534027099609375, -0.690338134765625, -0.6272735595703125, -0.564208984375, -0.5011444091796875, -0.438079833984375, -0.3750152587890625, -0.31195068359375, -0.2488861083984375, -0.185821533203125, -0.1227569580078125, -0.0596923828125, 0.0033721923828125, 0.066436767578125, 0.1295013427734375, 0.19256591796875, 0.2556304931640625, 0.318695068359375, 0.3817596435546875, 0.44482421875, 0.5078887939453125, 0.570953369140625, 0.6340179443359375, 0.69708251953125, 0.7601470947265625, 0.823211669921875, 0.8862762451171875, 0.9493408203125, 1.0124053955078125, 1.075469970703125, 1.1385345458984375, 1.20159912109375, 1.2646636962890625, 1.327728271484375, 1.3907928466796875, 1.453857421875, 1.5169219970703125, 1.579986572265625, 1.6430511474609375, 1.70611572265625, 1.7691802978515625, 1.832244873046875, 1.8953094482421875, 1.9583740234375, 2.0214385986328125, 2.084503173828125, 2.1475677490234375, 2.21063232421875, 2.2736968994140625, 2.336761474609375, 2.3998260498046875, 2.462890625]}, "gradients/encoder.encoder.layers.4.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 2.0, 0.0, 1.0, 1.0, 4.0, 4.0, 4.0, 1.0, 10.0, 7.0, 14.0, 7.0, 17.0, 21.0, 18.0, 36.0, 25.0, 22.0, 33.0, 36.0, 30.0, 28.0, 53.0, 50.0, 59.0, 57.0, 57.0, 61.0, 50.0, 33.0, 35.0, 33.0, 42.0, 23.0, 33.0, 20.0, 22.0, 14.0, 11.0, 13.0, 9.0, 6.0, 3.0, 3.0, 2.0, 2.0, 3.0, 2.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.6083984375, -1.5524749755859375, -1.496551513671875, -1.4406280517578125, -1.38470458984375, -1.3287811279296875, -1.272857666015625, -1.2169342041015625, -1.1610107421875, -1.1050872802734375, -1.049163818359375, -0.9932403564453125, -0.93731689453125, -0.8813934326171875, -0.825469970703125, -0.7695465087890625, -0.713623046875, -0.6576995849609375, -0.601776123046875, -0.5458526611328125, -0.48992919921875, -0.4340057373046875, -0.378082275390625, -0.3221588134765625, -0.2662353515625, -0.2103118896484375, -0.154388427734375, -0.0984649658203125, -0.04254150390625, 0.0133819580078125, 0.069305419921875, 0.1252288818359375, 0.18115234375, 0.2370758056640625, 0.292999267578125, 0.3489227294921875, 0.40484619140625, 0.4607696533203125, 0.516693115234375, 0.5726165771484375, 0.6285400390625, 0.6844635009765625, 0.740386962890625, 0.7963104248046875, 0.85223388671875, 0.9081573486328125, 0.964080810546875, 1.0200042724609375, 1.075927734375, 1.1318511962890625, 1.187774658203125, 1.2436981201171875, 1.29962158203125, 1.3555450439453125, 1.411468505859375, 1.4673919677734375, 1.5233154296875, 1.5792388916015625, 1.635162353515625, 1.6910858154296875, 1.74700927734375, 1.8029327392578125, 1.858856201171875, 1.9147796630859375, 1.970703125]}, "gradients/encoder.encoder.layers.4.attention.k_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 2.0, 0.0, 0.0, 5.0, 2.0, 0.0, 2.0, 0.0, 3.0, 1.0, 1.0, 3.0, 11.0, 17.0, 9.0, 19.0, 13.0, 19.0, 32.0, 59.0, 81.0, 146.0, 271.0, 710.0, 1947.0, 10253.0, 175761.0, 824561.0, 28864.0, 3811.0, 1056.0, 412.0, 210.0, 96.0, 53.0, 38.0, 23.0, 21.0, 11.0, 7.0, 9.0, 6.0, 8.0, 8.0, 4.0, 3.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.64990234375, -0.6276626586914062, -0.6054229736328125, -0.5831832885742188, -0.560943603515625, -0.5387039184570312, -0.5164642333984375, -0.49422454833984375, -0.47198486328125, -0.44974517822265625, -0.4275054931640625, -0.40526580810546875, -0.383026123046875, -0.36078643798828125, -0.3385467529296875, -0.31630706787109375, -0.2940673828125, -0.27182769775390625, -0.2495880126953125, -0.22734832763671875, -0.205108642578125, -0.18286895751953125, -0.1606292724609375, -0.13838958740234375, -0.11614990234375, -0.09391021728515625, -0.0716705322265625, -0.04943084716796875, -0.027191162109375, -0.00495147705078125, 0.0172882080078125, 0.03952789306640625, 0.061767578125, 0.08400726318359375, 0.1062469482421875, 0.12848663330078125, 0.150726318359375, 0.17296600341796875, 0.1952056884765625, 0.21744537353515625, 0.23968505859375, 0.26192474365234375, 0.2841644287109375, 0.30640411376953125, 0.328643798828125, 0.35088348388671875, 0.3731231689453125, 0.39536285400390625, 0.4176025390625, 0.43984222412109375, 0.4620819091796875, 0.48432159423828125, 0.506561279296875, 0.5288009643554688, 0.5510406494140625, 0.5732803344726562, 0.59552001953125, 0.6177597045898438, 0.6399993896484375, 0.6622390747070312, 0.684478759765625, 0.7067184448242188, 0.7289581298828125, 0.7511978149414062, 0.7734375]}, "gradients/encoder.encoder.layers.4.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 3.0, 2.0, 3.0, 3.0, 1.0, 5.0, 0.0, 3.0, 7.0, 9.0, 8.0, 7.0, 12.0, 10.0, 24.0, 21.0, 24.0, 31.0, 45.0, 69.0, 82.0, 117.0, 103.0, 83.0, 91.0, 58.0, 35.0, 32.0, 25.0, 22.0, 9.0, 11.0, 10.0, 17.0, 7.0, 6.0, 3.0, 5.0, 1.0, 2.0, 3.0, 3.0, 1.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0], "bins": [-7.730722427368164e-05, -7.474608719348907e-05, -7.218495011329651e-05, -6.962381303310394e-05, -6.706267595291138e-05, -6.450153887271881e-05, -6.194040179252625e-05, -5.937926471233368e-05, -5.681812763214111e-05, -5.425699055194855e-05, -5.169585347175598e-05, -4.9134716391563416e-05, -4.657357931137085e-05, -4.4012442231178284e-05, -4.145130515098572e-05, -3.889016807079315e-05, -3.6329030990600586e-05, -3.376789391040802e-05, -3.1206756830215454e-05, -2.8645619750022888e-05, -2.6084482669830322e-05, -2.3523345589637756e-05, -2.096220850944519e-05, -1.8401071429252625e-05, -1.583993434906006e-05, -1.3278797268867493e-05, -1.0717660188674927e-05, -8.15652310848236e-06, -5.595386028289795e-06, -3.034248948097229e-06, -4.731118679046631e-07, 2.088025212287903e-06, 4.649162292480469e-06, 7.210299372673035e-06, 9.7714364528656e-06, 1.2332573533058167e-05, 1.4893710613250732e-05, 1.74548476934433e-05, 2.0015984773635864e-05, 2.257712185382843e-05, 2.5138258934020996e-05, 2.7699396014213562e-05, 3.0260533094406128e-05, 3.2821670174598694e-05, 3.538280725479126e-05, 3.7943944334983826e-05, 4.050508141517639e-05, 4.306621849536896e-05, 4.5627355575561523e-05, 4.818849265575409e-05, 5.0749629735946655e-05, 5.331076681613922e-05, 5.587190389633179e-05, 5.843304097652435e-05, 6.099417805671692e-05, 6.355531513690948e-05, 6.611645221710205e-05, 6.867758929729462e-05, 7.123872637748718e-05, 7.379986345767975e-05, 7.636100053787231e-05, 7.892213761806488e-05, 8.148327469825745e-05, 8.404441177845001e-05, 8.660554885864258e-05]}, "gradients/encoder.encoder.layers.4.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 3.0, 3.0, 6.0, 1.0, 6.0, 8.0, 21.0, 23.0, 35.0, 53.0, 92.0, 175.0, 388.0, 922.0, 2847.0, 19398.0, 784106.0, 227706.0, 9544.0, 1951.0, 641.0, 284.0, 126.0, 87.0, 56.0, 35.0, 16.0, 9.0, 8.0, 9.0, 4.0, 1.0, 3.0, 4.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.49609375, -0.4743804931640625, -0.452667236328125, -0.4309539794921875, -0.40924072265625, -0.3875274658203125, -0.365814208984375, -0.3441009521484375, -0.3223876953125, -0.3006744384765625, -0.278961181640625, -0.2572479248046875, -0.23553466796875, -0.2138214111328125, -0.192108154296875, -0.1703948974609375, -0.148681640625, -0.1269683837890625, -0.105255126953125, -0.0835418701171875, -0.06182861328125, -0.0401153564453125, -0.018402099609375, 0.0033111572265625, 0.0250244140625, 0.0467376708984375, 0.068450927734375, 0.0901641845703125, 0.11187744140625, 0.1335906982421875, 0.155303955078125, 0.1770172119140625, 0.19873046875, 0.2204437255859375, 0.242156982421875, 0.2638702392578125, 0.28558349609375, 0.3072967529296875, 0.329010009765625, 0.3507232666015625, 0.3724365234375, 0.3941497802734375, 0.415863037109375, 0.4375762939453125, 0.45928955078125, 0.4810028076171875, 0.502716064453125, 0.5244293212890625, 0.546142578125, 0.5678558349609375, 0.589569091796875, 0.6112823486328125, 0.63299560546875, 0.6547088623046875, 0.676422119140625, 0.6981353759765625, 0.7198486328125, 0.7415618896484375, 0.763275146484375, 0.7849884033203125, 0.80670166015625, 0.8284149169921875, 0.850128173828125, 0.8718414306640625, 0.8935546875]}, "gradients/encoder.encoder.layers.4.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 3.0, 0.0, 5.0, 5.0, 5.0, 3.0, 7.0, 16.0, 18.0, 28.0, 46.0, 58.0, 74.0, 96.0, 125.0, 112.0, 109.0, 90.0, 59.0, 56.0, 28.0, 18.0, 14.0, 7.0, 6.0, 8.0, 5.0, 3.0, 4.0, 2.0, 2.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.544921875, -0.5286026000976562, -0.5122833251953125, -0.49596405029296875, -0.479644775390625, -0.46332550048828125, -0.4470062255859375, -0.43068695068359375, -0.41436767578125, -0.39804840087890625, -0.3817291259765625, -0.36540985107421875, -0.349090576171875, -0.33277130126953125, -0.3164520263671875, -0.30013275146484375, -0.2838134765625, -0.26749420166015625, -0.2511749267578125, -0.23485565185546875, -0.218536376953125, -0.20221710205078125, -0.1858978271484375, -0.16957855224609375, -0.15325927734375, -0.13694000244140625, -0.1206207275390625, -0.10430145263671875, -0.087982177734375, -0.07166290283203125, -0.0553436279296875, -0.03902435302734375, -0.022705078125, -0.00638580322265625, 0.0099334716796875, 0.02625274658203125, 0.042572021484375, 0.05889129638671875, 0.0752105712890625, 0.09152984619140625, 0.10784912109375, 0.12416839599609375, 0.1404876708984375, 0.15680694580078125, 0.173126220703125, 0.18944549560546875, 0.2057647705078125, 0.22208404541015625, 0.2384033203125, 0.25472259521484375, 0.2710418701171875, 0.28736114501953125, 0.303680419921875, 0.31999969482421875, 0.3363189697265625, 0.35263824462890625, 0.36895751953125, 0.38527679443359375, 0.4015960693359375, 0.41791534423828125, 0.434234619140625, 0.45055389404296875, 0.4668731689453125, 0.48319244384765625, 0.49951171875]}, "gradients/encoder.encoder.layers.4.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 2.0, 1.0, 1.0, 1.0, 4.0, 2.0, 3.0, 2.0, 5.0, 4.0, 8.0, 11.0, 33.0, 45.0, 101.0, 165.0, 269.0, 186.0, 84.0, 45.0, 18.0, 10.0, 6.0, 2.0, 0.0, 4.0, 2.0, 1.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-13.406059265136719, -13.091744422912598, -12.777429580688477, -12.463113784790039, -12.148798942565918, -11.834484100341797, -11.520169258117676, -11.205854415893555, -10.891538619995117, -10.577223777770996, -10.262908935546875, -9.948593139648438, -9.634278297424316, -9.319963455200195, -9.005648612976074, -8.691333770751953, -8.377018928527832, -8.062704086303711, -7.748388767242432, -7.4340739250183105, -7.119758605957031, -6.80544376373291, -6.491128921508789, -6.176814079284668, -5.862498760223389, -5.548183917999268, -5.233868598937988, -4.919553756713867, -4.605238914489746, -4.290923595428467, -3.9766087532043457, -3.6622936725616455, -3.3479795455932617, -3.0336644649505615, -2.7193493843078613, -2.4050345420837402, -2.09071946144104, -1.7764043807983398, -1.4620894193649292, -1.1477744579315186, -0.8334593772888184, -0.5191443562507629, -0.20482933521270752, 0.1094856858253479, 0.4238007068634033, 0.7381157875061035, 1.0524307489395142, 1.3667457103729248, 1.681060791015625, 1.9953758716583252, 2.3096909523010254, 2.6240057945251465, 2.9383208751678467, 3.252635955810547, 3.566950798034668, 3.881265878677368, 4.195580959320068, 4.5098958015441895, 4.824211120605469, 5.13852596282959, 5.452840805053711, 5.76715612411499, 6.081470966339111, 6.395786285400391, 6.710101127624512]}, "gradients/encoder.encoder.layers.4.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 3.0, 5.0, 3.0, 6.0, 3.0, 5.0, 5.0, 7.0, 7.0, 12.0, 15.0, 19.0, 20.0, 18.0, 20.0, 27.0, 21.0, 33.0, 33.0, 39.0, 42.0, 76.0, 60.0, 69.0, 88.0, 58.0, 46.0, 28.0, 25.0, 24.0, 26.0, 21.0, 35.0, 19.0, 21.0, 12.0, 17.0, 7.0, 5.0, 5.0, 7.0, 4.0, 4.0, 3.0, 6.0, 2.0, 3.0, 1.0, 0.0, 1.0, 1.0, 0.0, 2.0, 1.0], "bins": [-7.3751115798950195, -7.153839588165283, -6.932567596435547, -6.7112956047058105, -6.490023612976074, -6.26875114440918, -6.047479152679443, -5.826207160949707, -5.604935169219971, -5.383663177490234, -5.162391185760498, -4.941119194030762, -4.719846725463867, -4.498575210571289, -4.2773027420043945, -4.056030750274658, -3.834758758544922, -3.6134867668151855, -3.392214775085449, -3.170942544937134, -2.9496705532073975, -2.728398561477661, -2.5071263313293457, -2.2858543395996094, -2.064582347869873, -1.8433103561401367, -1.6220382452011108, -1.400766134262085, -1.1794941425323486, -0.9582221508026123, -0.7369500398635864, -0.5156779289245605, -0.2944064140319824, -0.07313436269760132, 0.14813768863677979, 0.3694097399711609, 0.590681791305542, 0.8119537830352783, 1.0332258939743042, 1.25449800491333, 1.4757699966430664, 1.6970419883728027, 1.9183140993118286, 2.1395862102508545, 2.360858201980591, 2.582130193710327, 2.8034024238586426, 3.024674415588379, 3.2459464073181152, 3.4672183990478516, 3.688490390777588, 3.9097626209259033, 4.131034851074219, 4.352306365966797, 4.573578834533691, 4.794850826263428, 5.016122817993164, 5.2373948097229, 5.458666801452637, 5.679938793182373, 5.901210784912109, 6.122483253479004, 6.34375524520874, 6.565027236938477, 6.786299228668213]}, "gradients/encoder.encoder.layers.3.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 3.0, 1.0, 2.0, 12.0, 11.0, 11.0, 29.0, 36.0, 60.0, 120.0, 305.0, 656.0, 1942.0, 9935.0, 149303.0, 3719025.0, 297144.0, 12021.0, 2269.0, 707.0, 279.0, 169.0, 93.0, 53.0, 29.0, 24.0, 16.0, 9.0, 8.0, 5.0, 4.0, 6.0, 3.0, 0.0, 0.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.9931640625, -0.9617156982421875, -0.930267333984375, -0.8988189697265625, -0.86737060546875, -0.8359222412109375, -0.804473876953125, -0.7730255126953125, -0.7415771484375, -0.7101287841796875, -0.678680419921875, -0.6472320556640625, -0.61578369140625, -0.5843353271484375, -0.552886962890625, -0.5214385986328125, -0.489990234375, -0.4585418701171875, -0.427093505859375, -0.3956451416015625, -0.36419677734375, -0.3327484130859375, -0.301300048828125, -0.2698516845703125, -0.2384033203125, -0.2069549560546875, -0.175506591796875, -0.1440582275390625, -0.11260986328125, -0.0811614990234375, -0.049713134765625, -0.0182647705078125, 0.01318359375, 0.0446319580078125, 0.076080322265625, 0.1075286865234375, 0.13897705078125, 0.1704254150390625, 0.201873779296875, 0.2333221435546875, 0.2647705078125, 0.2962188720703125, 0.327667236328125, 0.3591156005859375, 0.39056396484375, 0.4220123291015625, 0.453460693359375, 0.4849090576171875, 0.516357421875, 0.5478057861328125, 0.579254150390625, 0.6107025146484375, 0.64215087890625, 0.6735992431640625, 0.705047607421875, 0.7364959716796875, 0.7679443359375, 0.7993927001953125, 0.830841064453125, 0.8622894287109375, 0.89373779296875, 0.9251861572265625, 0.956634521484375, 0.9880828857421875, 1.01953125]}, "gradients/encoder.encoder.layers.3.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 2.0, 4.0, 3.0, 5.0, 5.0, 9.0, 18.0, 17.0, 30.0, 34.0, 40.0, 64.0, 61.0, 99.0, 86.0, 95.0, 92.0, 82.0, 64.0, 53.0, 49.0, 33.0, 22.0, 10.0, 10.0, 9.0, 7.0, 4.0, 3.0, 1.0, 2.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.8671875, -0.8426132202148438, -0.8180389404296875, -0.7934646606445312, -0.768890380859375, -0.7443161010742188, -0.7197418212890625, -0.6951675415039062, -0.67059326171875, -0.6460189819335938, -0.6214447021484375, -0.5968704223632812, -0.572296142578125, -0.5477218627929688, -0.5231475830078125, -0.49857330322265625, -0.4739990234375, -0.44942474365234375, -0.4248504638671875, -0.40027618408203125, -0.375701904296875, -0.35112762451171875, -0.3265533447265625, -0.30197906494140625, -0.27740478515625, -0.25283050537109375, -0.2282562255859375, -0.20368194580078125, -0.179107666015625, -0.15453338623046875, -0.1299591064453125, -0.10538482666015625, -0.080810546875, -0.05623626708984375, -0.0316619873046875, -0.00708770751953125, 0.017486572265625, 0.04206085205078125, 0.0666351318359375, 0.09120941162109375, 0.11578369140625, 0.14035797119140625, 0.1649322509765625, 0.18950653076171875, 0.214080810546875, 0.23865509033203125, 0.2632293701171875, 0.28780364990234375, 0.3123779296875, 0.33695220947265625, 0.3615264892578125, 0.38610076904296875, 0.410675048828125, 0.43524932861328125, 0.4598236083984375, 0.48439788818359375, 0.50897216796875, 0.5335464477539062, 0.5581207275390625, 0.5826950073242188, 0.607269287109375, 0.6318435668945312, 0.6564178466796875, 0.6809921264648438, 0.70556640625]}, "gradients/encoder.encoder.layers.3.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 1.0, 0.0, 2.0, 1.0, 1.0, 1.0, 2.0, 1.0, 4.0, 6.0, 3.0, 5.0, 8.0, 10.0, 6.0, 16.0, 25.0, 32.0, 85.0, 119.0, 262.0, 505.0, 1088.0, 2641.0, 9104.0, 63157.0, 3368683.0, 711049.0, 27958.0, 5567.0, 2077.0, 905.0, 421.0, 244.0, 117.0, 66.0, 43.0, 18.0, 12.0, 13.0, 9.0, 7.0, 6.0, 3.0, 2.0, 3.0, 3.0, 1.0, 0.0, 1.0, 2.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.93115234375, -0.8999252319335938, -0.8686981201171875, -0.8374710083007812, -0.806243896484375, -0.7750167846679688, -0.7437896728515625, -0.7125625610351562, -0.68133544921875, -0.6501083374023438, -0.6188812255859375, -0.5876541137695312, -0.556427001953125, -0.5251998901367188, -0.4939727783203125, -0.46274566650390625, -0.4315185546875, -0.40029144287109375, -0.3690643310546875, -0.33783721923828125, -0.306610107421875, -0.27538299560546875, -0.2441558837890625, -0.21292877197265625, -0.18170166015625, -0.15047454833984375, -0.1192474365234375, -0.08802032470703125, -0.056793212890625, -0.02556610107421875, 0.0056610107421875, 0.03688812255859375, 0.068115234375, 0.09934234619140625, 0.1305694580078125, 0.16179656982421875, 0.193023681640625, 0.22425079345703125, 0.2554779052734375, 0.28670501708984375, 0.31793212890625, 0.34915924072265625, 0.3803863525390625, 0.41161346435546875, 0.442840576171875, 0.47406768798828125, 0.5052947998046875, 0.5365219116210938, 0.5677490234375, 0.5989761352539062, 0.6302032470703125, 0.6614303588867188, 0.692657470703125, 0.7238845825195312, 0.7551116943359375, 0.7863388061523438, 0.81756591796875, 0.8487930297851562, 0.8800201416015625, 0.9112472534179688, 0.942474365234375, 0.9737014770507812, 1.0049285888671875, 1.0361557006835938, 1.0673828125]}, "gradients/encoder.encoder.layers.3.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 2.0, 1.0, 1.0, 1.0, 1.0, 3.0, 1.0, 0.0, 3.0, 8.0, 9.0, 12.0, 24.0, 22.0, 39.0, 64.0, 132.0, 321.0, 706.0, 1175.0, 828.0, 360.0, 152.0, 89.0, 40.0, 27.0, 26.0, 13.0, 4.0, 8.0, 5.0, 3.0, 0.0, 4.0, 0.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.0302734375, -0.99658203125, -0.962890625, -0.92919921875, -0.8955078125, -0.86181640625, -0.828125, -0.79443359375, -0.7607421875, -0.72705078125, -0.693359375, -0.65966796875, -0.6259765625, -0.59228515625, -0.55859375, -0.52490234375, -0.4912109375, -0.45751953125, -0.423828125, -0.39013671875, -0.3564453125, -0.32275390625, -0.2890625, -0.25537109375, -0.2216796875, -0.18798828125, -0.154296875, -0.12060546875, -0.0869140625, -0.05322265625, -0.01953125, 0.01416015625, 0.0478515625, 0.08154296875, 0.115234375, 0.14892578125, 0.1826171875, 0.21630859375, 0.25, 0.28369140625, 0.3173828125, 0.35107421875, 0.384765625, 0.41845703125, 0.4521484375, 0.48583984375, 0.51953125, 0.55322265625, 0.5869140625, 0.62060546875, 0.654296875, 0.68798828125, 0.7216796875, 0.75537109375, 0.7890625, 0.82275390625, 0.8564453125, 0.89013671875, 0.923828125, 0.95751953125, 0.9912109375, 1.02490234375, 1.05859375, 1.09228515625, 1.1259765625]}, "gradients/encoder.encoder.layers.3.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 4.0, 4.0, 3.0, 5.0, 10.0, 21.0, 44.0, 119.0, 246.0, 252.0, 152.0, 63.0, 32.0, 16.0, 9.0, 8.0, 8.0, 3.0, 1.0, 1.0, 3.0, 1.0, 3.0, 1.0, 2.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-8.174121856689453, -7.919416427612305, -7.6647114753723145, -7.410006046295166, -7.155301094055176, -6.900595664978027, -6.645890235900879, -6.3911848068237305, -6.13647985458374, -5.881774425506592, -5.627069473266602, -5.372364044189453, -5.117658615112305, -4.8629536628723145, -4.608248233795166, -4.353543281555176, -4.098837852478027, -3.844132661819458, -3.5894274711608887, -3.3347220420837402, -3.080016851425171, -2.8253116607666016, -2.570606231689453, -2.315901041030884, -2.0611958503723145, -1.8064906597137451, -1.5517853498458862, -1.2970800399780273, -1.042374849319458, -0.7876696586608887, -0.5329643487930298, -0.2782590389251709, -0.023553848266601562, 0.23115140199661255, 0.48585665225982666, 0.7405619025230408, 0.9952671527862549, 1.2499723434448242, 1.504677653312683, 1.759382963180542, 2.0140881538391113, 2.2687933444976807, 2.52349853515625, 2.7782039642333984, 3.0329091548919678, 3.287614345550537, 3.5423197746276855, 3.797024965286255, 4.051730155944824, 4.306435585021973, 4.561140537261963, 4.815845966339111, 5.070550918579102, 5.32525634765625, 5.579961776733398, 5.834667205810547, 6.089372158050537, 6.3440775871276855, 6.598782539367676, 6.853487968444824, 7.108193397521973, 7.362898349761963, 7.617603778839111, 7.872308731079102, 8.12701416015625]}, "gradients/encoder.encoder.layers.3.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 1.0, 1.0, 2.0, 3.0, 11.0, 14.0, 10.0, 17.0, 24.0, 30.0, 35.0, 45.0, 44.0, 69.0, 56.0, 73.0, 105.0, 77.0, 66.0, 66.0, 58.0, 53.0, 39.0, 32.0, 29.0, 10.0, 16.0, 12.0, 5.0, 5.0, 1.0, 1.0, 1.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.137676239013672, -5.969976425170898, -5.802276611328125, -5.634576797485352, -5.466876983642578, -5.299177169799805, -5.131477355957031, -4.963777542114258, -4.796077728271484, -4.628377914428711, -4.4606781005859375, -4.292978286743164, -4.125278472900391, -3.957578659057617, -3.7898788452148438, -3.6221790313720703, -3.454479455947876, -3.2867796421051025, -3.119079828262329, -2.9513800144195557, -2.7836802005767822, -2.615980386734009, -2.4482808113098145, -2.280580997467041, -2.1128811836242676, -1.9451813697814941, -1.7774815559387207, -1.6097817420959473, -1.4420819282531738, -1.2743821144104004, -1.1066824197769165, -0.9389826059341431, -0.7712826728820801, -0.6035828590393066, -0.4358830749988556, -0.26818329095840454, -0.1004834771156311, 0.06721633672714233, 0.234916090965271, 0.40261590480804443, 0.5703157186508179, 0.7380155324935913, 0.9057153463363647, 1.0734150409698486, 1.241114854812622, 1.4088146686553955, 1.576514482498169, 1.7442142963409424, 1.9119141101837158, 2.0796139240264893, 2.2473137378692627, 2.415013551712036, 2.5827133655548096, 2.750413179397583, 2.9181127548217773, 3.085812568664551, 3.253512382507324, 3.4212121963500977, 3.588912010192871, 3.7566118240356445, 3.924311637878418, 4.092011451721191, 4.259711265563965, 4.427411079406738, 4.595110893249512]}, "gradients/encoder.encoder.layers.3.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 3.0, 2.0, 3.0, 2.0, 2.0, 6.0, 3.0, 6.0, 8.0, 18.0, 27.0, 37.0, 85.0, 115.0, 265.0, 515.0, 1258.0, 3878.0, 15113.0, 86926.0, 505120.0, 366210.0, 53885.0, 10267.0, 2832.0, 1038.0, 459.0, 206.0, 111.0, 57.0, 33.0, 19.0, 11.0, 13.0, 11.0, 3.0, 9.0, 0.0, 3.0, 0.0, 1.0, 1.0, 1.0, 3.0, 0.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.9921875, -0.960052490234375, -0.92791748046875, -0.895782470703125, -0.8636474609375, -0.831512451171875, -0.79937744140625, -0.767242431640625, -0.735107421875, -0.702972412109375, -0.67083740234375, -0.638702392578125, -0.6065673828125, -0.574432373046875, -0.54229736328125, -0.510162353515625, -0.47802734375, -0.445892333984375, -0.41375732421875, -0.381622314453125, -0.3494873046875, -0.317352294921875, -0.28521728515625, -0.253082275390625, -0.220947265625, -0.188812255859375, -0.15667724609375, -0.124542236328125, -0.0924072265625, -0.060272216796875, -0.02813720703125, 0.003997802734375, 0.0361328125, 0.068267822265625, 0.10040283203125, 0.132537841796875, 0.1646728515625, 0.196807861328125, 0.22894287109375, 0.261077880859375, 0.293212890625, 0.325347900390625, 0.35748291015625, 0.389617919921875, 0.4217529296875, 0.453887939453125, 0.48602294921875, 0.518157958984375, 0.55029296875, 0.582427978515625, 0.61456298828125, 0.646697998046875, 0.6788330078125, 0.710968017578125, 0.74310302734375, 0.775238037109375, 0.807373046875, 0.839508056640625, 0.87164306640625, 0.903778076171875, 0.9359130859375, 0.968048095703125, 1.00018310546875, 1.032318115234375, 1.064453125]}, "gradients/encoder.encoder.layers.3.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 0.0, 1.0, 0.0, 2.0, 4.0, 4.0, 7.0, 4.0, 5.0, 11.0, 25.0, 24.0, 33.0, 49.0, 58.0, 84.0, 106.0, 77.0, 77.0, 88.0, 85.0, 66.0, 56.0, 36.0, 34.0, 24.0, 13.0, 14.0, 10.0, 5.0, 7.0, 1.0, 2.0, 2.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.85986328125, -0.8354644775390625, -0.811065673828125, -0.7866668701171875, -0.76226806640625, -0.7378692626953125, -0.713470458984375, -0.6890716552734375, -0.6646728515625, -0.6402740478515625, -0.615875244140625, -0.5914764404296875, -0.56707763671875, -0.5426788330078125, -0.518280029296875, -0.4938812255859375, -0.469482421875, -0.4450836181640625, -0.420684814453125, -0.3962860107421875, -0.37188720703125, -0.3474884033203125, -0.323089599609375, -0.2986907958984375, -0.2742919921875, -0.2498931884765625, -0.225494384765625, -0.2010955810546875, -0.17669677734375, -0.1522979736328125, -0.127899169921875, -0.1035003662109375, -0.0791015625, -0.0547027587890625, -0.030303955078125, -0.0059051513671875, 0.01849365234375, 0.0428924560546875, 0.067291259765625, 0.0916900634765625, 0.1160888671875, 0.1404876708984375, 0.164886474609375, 0.1892852783203125, 0.21368408203125, 0.2380828857421875, 0.262481689453125, 0.2868804931640625, 0.311279296875, 0.3356781005859375, 0.360076904296875, 0.3844757080078125, 0.40887451171875, 0.4332733154296875, 0.457672119140625, 0.4820709228515625, 0.5064697265625, 0.5308685302734375, 0.555267333984375, 0.5796661376953125, 0.60406494140625, 0.6284637451171875, 0.652862548828125, 0.6772613525390625, 0.70166015625]}, "gradients/encoder.encoder.layers.3.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 3.0, 4.0, 9.0, 2.0, 5.0, 15.0, 18.0, 25.0, 22.0, 32.0, 43.0, 73.0, 143.0, 239.0, 594.0, 2396.0, 17745.0, 288439.0, 696987.0, 36315.0, 3806.0, 906.0, 308.0, 150.0, 74.0, 54.0, 37.0, 35.0, 22.0, 19.0, 15.0, 8.0, 5.0, 7.0, 5.0, 5.0, 2.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.2724609375, -1.2218475341796875, -1.171234130859375, -1.1206207275390625, -1.07000732421875, -1.0193939208984375, -0.968780517578125, -0.9181671142578125, -0.8675537109375, -0.8169403076171875, -0.766326904296875, -0.7157135009765625, -0.66510009765625, -0.6144866943359375, -0.563873291015625, -0.5132598876953125, -0.462646484375, -0.4120330810546875, -0.361419677734375, -0.3108062744140625, -0.26019287109375, -0.2095794677734375, -0.158966064453125, -0.1083526611328125, -0.0577392578125, -0.0071258544921875, 0.043487548828125, 0.0941009521484375, 0.14471435546875, 0.1953277587890625, 0.245941162109375, 0.2965545654296875, 0.34716796875, 0.3977813720703125, 0.448394775390625, 0.4990081787109375, 0.54962158203125, 0.6002349853515625, 0.650848388671875, 0.7014617919921875, 0.7520751953125, 0.8026885986328125, 0.853302001953125, 0.9039154052734375, 0.95452880859375, 1.0051422119140625, 1.055755615234375, 1.1063690185546875, 1.156982421875, 1.2075958251953125, 1.258209228515625, 1.3088226318359375, 1.35943603515625, 1.4100494384765625, 1.460662841796875, 1.5112762451171875, 1.5618896484375, 1.6125030517578125, 1.663116455078125, 1.7137298583984375, 1.76434326171875, 1.8149566650390625, 1.865570068359375, 1.9161834716796875, 1.966796875]}, "gradients/encoder.encoder.layers.3.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 3.0, 2.0, 4.0, 3.0, 4.0, 4.0, 5.0, 10.0, 9.0, 16.0, 15.0, 19.0, 18.0, 38.0, 32.0, 41.0, 37.0, 44.0, 55.0, 48.0, 55.0, 59.0, 49.0, 48.0, 53.0, 58.0, 34.0, 34.0, 41.0, 33.0, 33.0, 24.0, 13.0, 11.0, 16.0, 9.0, 7.0, 9.0, 4.0, 5.0, 3.0, 0.0, 1.0, 2.0, 2.0, 2.0, 3.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-1.64453125, -1.588623046875, -1.53271484375, -1.476806640625, -1.4208984375, -1.364990234375, -1.30908203125, -1.253173828125, -1.197265625, -1.141357421875, -1.08544921875, -1.029541015625, -0.9736328125, -0.917724609375, -0.86181640625, -0.805908203125, -0.75, -0.694091796875, -0.63818359375, -0.582275390625, -0.5263671875, -0.470458984375, -0.41455078125, -0.358642578125, -0.302734375, -0.246826171875, -0.19091796875, -0.135009765625, -0.0791015625, -0.023193359375, 0.03271484375, 0.088623046875, 0.14453125, 0.200439453125, 0.25634765625, 0.312255859375, 0.3681640625, 0.424072265625, 0.47998046875, 0.535888671875, 0.591796875, 0.647705078125, 0.70361328125, 0.759521484375, 0.8154296875, 0.871337890625, 0.92724609375, 0.983154296875, 1.0390625, 1.094970703125, 1.15087890625, 1.206787109375, 1.2626953125, 1.318603515625, 1.37451171875, 1.430419921875, 1.486328125, 1.542236328125, 1.59814453125, 1.654052734375, 1.7099609375, 1.765869140625, 1.82177734375, 1.877685546875, 1.93359375]}, "gradients/encoder.encoder.layers.3.attention.k_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 3.0, 0.0, 4.0, 1.0, 12.0, 13.0, 23.0, 27.0, 57.0, 95.0, 205.0, 458.0, 1018.0, 2797.0, 10517.0, 58005.0, 695990.0, 241905.0, 28067.0, 6076.0, 1884.0, 710.0, 308.0, 169.0, 102.0, 49.0, 27.0, 16.0, 9.0, 5.0, 3.0, 6.0, 3.0, 1.0, 0.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.270751953125, -0.2593803405761719, -0.24800872802734375, -0.23663711547851562, -0.2252655029296875, -0.21389389038085938, -0.20252227783203125, -0.19115066528320312, -0.179779052734375, -0.16840744018554688, -0.15703582763671875, -0.14566421508789062, -0.1342926025390625, -0.12292098999023438, -0.11154937744140625, -0.10017776489257812, -0.08880615234375, -0.07743453979492188, -0.06606292724609375, -0.054691314697265625, -0.0433197021484375, -0.031948089599609375, -0.02057647705078125, -0.009204864501953125, 0.002166748046875, 0.013538360595703125, 0.02490997314453125, 0.036281585693359375, 0.0476531982421875, 0.059024810791015625, 0.07039642333984375, 0.08176803588867188, 0.0931396484375, 0.10451126098632812, 0.11588287353515625, 0.12725448608398438, 0.1386260986328125, 0.14999771118164062, 0.16136932373046875, 0.17274093627929688, 0.184112548828125, 0.19548416137695312, 0.20685577392578125, 0.21822738647460938, 0.2295989990234375, 0.24097061157226562, 0.25234222412109375, 0.2637138366699219, 0.27508544921875, 0.2864570617675781, 0.29782867431640625, 0.3092002868652344, 0.3205718994140625, 0.3319435119628906, 0.34331512451171875, 0.3546867370605469, 0.366058349609375, 0.3774299621582031, 0.38880157470703125, 0.4001731872558594, 0.4115447998046875, 0.4229164123535156, 0.43428802490234375, 0.4456596374511719, 0.45703125]}, "gradients/encoder.encoder.layers.3.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 2.0, 3.0, 5.0, 5.0, 4.0, 7.0, 8.0, 4.0, 6.0, 13.0, 26.0, 30.0, 45.0, 55.0, 76.0, 88.0, 111.0, 115.0, 91.0, 70.0, 53.0, 51.0, 25.0, 23.0, 28.0, 22.0, 9.0, 9.0, 7.0, 5.0, 4.0, 2.0, 3.0, 0.0, 1.0, 1.0, 2.0, 2.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.798173904418945e-05, -4.59868460893631e-05, -4.399195313453674e-05, -4.199706017971039e-05, -4.000216722488403e-05, -3.800727427005768e-05, -3.601238131523132e-05, -3.401748836040497e-05, -3.202259540557861e-05, -3.0027702450752258e-05, -2.8032809495925903e-05, -2.603791654109955e-05, -2.4043023586273193e-05, -2.204813063144684e-05, -2.0053237676620483e-05, -1.805834472179413e-05, -1.6063451766967773e-05, -1.4068558812141418e-05, -1.2073665857315063e-05, -1.0078772902488708e-05, -8.083879947662354e-06, -6.0889869928359985e-06, -4.0940940380096436e-06, -2.0992010831832886e-06, -1.043081283569336e-07, 1.8905848264694214e-06, 3.885477781295776e-06, 5.880370736122131e-06, 7.875263690948486e-06, 9.870156645774841e-06, 1.1865049600601196e-05, 1.3859942555427551e-05, 1.5854835510253906e-05, 1.784972846508026e-05, 1.9844621419906616e-05, 2.183951437473297e-05, 2.3834407329559326e-05, 2.582930028438568e-05, 2.7824193239212036e-05, 2.981908619403839e-05, 3.1813979148864746e-05, 3.38088721036911e-05, 3.5803765058517456e-05, 3.779865801334381e-05, 3.9793550968170166e-05, 4.178844392299652e-05, 4.3783336877822876e-05, 4.577822983264923e-05, 4.7773122787475586e-05, 4.976801574230194e-05, 5.1762908697128296e-05, 5.375780165195465e-05, 5.5752694606781006e-05, 5.774758756160736e-05, 5.9742480516433716e-05, 6.173737347126007e-05, 6.373226642608643e-05, 6.572715938091278e-05, 6.772205233573914e-05, 6.971694529056549e-05, 7.171183824539185e-05, 7.37067312002182e-05, 7.570162415504456e-05, 7.769651710987091e-05, 7.969141006469727e-05]}, "gradients/encoder.encoder.layers.3.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 2.0, 1.0, 4.0, 6.0, 6.0, 13.0, 20.0, 24.0, 39.0, 88.0, 143.0, 306.0, 921.0, 4756.0, 51691.0, 842038.0, 137009.0, 9243.0, 1438.0, 418.0, 166.0, 106.0, 45.0, 39.0, 14.0, 11.0, 11.0, 4.0, 1.0, 4.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.364501953125, -0.3490943908691406, -0.33368682861328125, -0.3182792663574219, -0.3028717041015625, -0.2874641418457031, -0.27205657958984375, -0.2566490173339844, -0.241241455078125, -0.22583389282226562, -0.21042633056640625, -0.19501876831054688, -0.1796112060546875, -0.16420364379882812, -0.14879608154296875, -0.13338851928710938, -0.11798095703125, -0.10257339477539062, -0.08716583251953125, -0.07175827026367188, -0.0563507080078125, -0.040943145751953125, -0.02553558349609375, -0.010128021240234375, 0.005279541015625, 0.020687103271484375, 0.03609466552734375, 0.051502227783203125, 0.0669097900390625, 0.08231735229492188, 0.09772491455078125, 0.11313247680664062, 0.1285400390625, 0.14394760131835938, 0.15935516357421875, 0.17476272583007812, 0.1901702880859375, 0.20557785034179688, 0.22098541259765625, 0.23639297485351562, 0.251800537109375, 0.2672080993652344, 0.28261566162109375, 0.2980232238769531, 0.3134307861328125, 0.3288383483886719, 0.34424591064453125, 0.3596534729003906, 0.37506103515625, 0.3904685974121094, 0.40587615966796875, 0.4212837219238281, 0.4366912841796875, 0.4520988464355469, 0.46750640869140625, 0.4829139709472656, 0.498321533203125, 0.5137290954589844, 0.5291366577148438, 0.5445442199707031, 0.5599517822265625, 0.5753593444824219, 0.5907669067382812, 0.6061744689941406, 0.62158203125]}, "gradients/encoder.encoder.layers.3.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 5.0, 0.0, 8.0, 9.0, 10.0, 20.0, 31.0, 32.0, 54.0, 76.0, 109.0, 127.0, 134.0, 106.0, 109.0, 69.0, 36.0, 27.0, 19.0, 15.0, 8.0, 5.0, 3.0, 3.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.3505859375, -0.3341522216796875, -0.317718505859375, -0.3012847900390625, -0.28485107421875, -0.2684173583984375, -0.251983642578125, -0.2355499267578125, -0.2191162109375, -0.2026824951171875, -0.186248779296875, -0.1698150634765625, -0.15338134765625, -0.1369476318359375, -0.120513916015625, -0.1040802001953125, -0.087646484375, -0.0712127685546875, -0.054779052734375, -0.0383453369140625, -0.02191162109375, -0.0054779052734375, 0.010955810546875, 0.0273895263671875, 0.0438232421875, 0.0602569580078125, 0.076690673828125, 0.0931243896484375, 0.10955810546875, 0.1259918212890625, 0.142425537109375, 0.1588592529296875, 0.17529296875, 0.1917266845703125, 0.208160400390625, 0.2245941162109375, 0.24102783203125, 0.2574615478515625, 0.273895263671875, 0.2903289794921875, 0.3067626953125, 0.3231964111328125, 0.339630126953125, 0.3560638427734375, 0.37249755859375, 0.3889312744140625, 0.405364990234375, 0.4217987060546875, 0.438232421875, 0.4546661376953125, 0.471099853515625, 0.4875335693359375, 0.50396728515625, 0.5204010009765625, 0.536834716796875, 0.5532684326171875, 0.5697021484375, 0.5861358642578125, 0.602569580078125, 0.6190032958984375, 0.63543701171875, 0.6518707275390625, 0.668304443359375, 0.6847381591796875, 0.701171875]}, "gradients/encoder.encoder.layers.3.layer_norm.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 3.0, 0.0, 0.0, 7.0, 3.0, 16.0, 68.0, 171.0, 422.0, 187.0, 66.0, 30.0, 13.0, 8.0, 4.0, 6.0, 1.0, 0.0, 3.0, 1.0, 1.0, 2.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-6.818939208984375, -6.402278423309326, -5.985617637634277, -5.56895637512207, -5.1522955894470215, -4.735634803771973, -4.318974018096924, -3.902312994003296, -3.485652208328247, -3.0689914226531982, -2.6523303985595703, -2.2356696128845215, -1.819008708000183, -1.4023478031158447, -0.9856870174407959, -0.569025993347168, -0.15236520767211914, 0.26429566740989685, 0.6809565424919128, 1.0976173877716064, 1.5142782926559448, 1.9309391975402832, 2.347599983215332, 2.76426100730896, 3.180921792984009, 3.5975825786590576, 4.0142436027526855, 4.430904388427734, 4.847565174102783, 5.264225959777832, 5.680887222290039, 6.097548007965088, 6.514208793640137, 6.9308695793151855, 7.347530364990234, 7.764191627502441, 8.180851936340332, 8.597513198852539, 9.01417350769043, 9.430834770202637, 9.847496032714844, 10.26415729522705, 10.680817604064941, 11.097478866577148, 11.514139175415039, 11.930800437927246, 12.347461700439453, 12.764122009277344, 13.180782318115234, 13.597443580627441, 14.014103889465332, 14.430765151977539, 14.84742546081543, 15.264086723327637, 15.680747985839844, 16.097408294677734, 16.514070510864258, 16.93073081970215, 17.347393035888672, 17.764053344726562, 18.180713653564453, 18.597373962402344, 19.014036178588867, 19.430696487426758, 19.84735679626465]}, "gradients/encoder.encoder.layers.3.layer_norm.bias": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 2.0, 1.0, 2.0, 2.0, 3.0, 2.0, 7.0, 4.0, 8.0, 8.0, 7.0, 9.0, 12.0, 13.0, 26.0, 12.0, 27.0, 26.0, 35.0, 39.0, 44.0, 34.0, 40.0, 55.0, 87.0, 94.0, 54.0, 43.0, 45.0, 35.0, 28.0, 34.0, 22.0, 22.0, 25.0, 19.0, 10.0, 20.0, 7.0, 6.0, 10.0, 5.0, 8.0, 8.0, 2.0, 3.0, 2.0, 1.0, 5.0, 1.0, 2.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.6862077713012695, -5.483908176422119, -5.281608581542969, -5.07930850982666, -4.87700891494751, -4.674709320068359, -4.472409725189209, -4.270110130310059, -4.06781005859375, -3.8655104637145996, -3.66321063041687, -3.4609110355377197, -3.2586112022399902, -3.05631160736084, -2.8540120124816895, -2.651712417602539, -2.4494128227233887, -2.2471132278442383, -2.044813394546509, -1.8425137996673584, -1.6402140855789185, -1.4379143714904785, -1.2356147766113281, -1.0333150625228882, -0.8310153484344482, -0.6287156343460083, -0.42641597986221313, -0.22411632537841797, -0.021816611289978027, 0.18048310279846191, 0.3827826976776123, 0.5850824117660522, 0.7873821258544922, 0.9896818399429321, 1.191981554031372, 1.3942811489105225, 1.5965808629989624, 1.7988805770874023, 2.0011801719665527, 2.203479766845703, 2.4057796001434326, 2.608079195022583, 2.8103790283203125, 3.012678623199463, 3.2149782180786133, 3.4172780513763428, 3.619577646255493, 3.8218774795532227, 4.024177074432373, 4.226476669311523, 4.428776264190674, 4.631075859069824, 4.833375930786133, 5.035675525665283, 5.237975120544434, 5.440274715423584, 5.642574310302734, 5.844873905181885, 6.047173500061035, 6.249473571777344, 6.451773166656494, 6.6540727615356445, 6.856372356414795, 7.058671951293945, 7.260972023010254]}, "gradients/encoder.encoder.layers.2.feed_forward.output_dense.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 3.0, 0.0, 1.0, 4.0, 4.0, 10.0, 13.0, 23.0, 33.0, 53.0, 97.0, 152.0, 282.0, 552.0, 1400.0, 3717.0, 11287.0, 60842.0, 688798.0, 2938586.0, 433199.0, 41506.0, 8479.0, 2783.0, 1168.0, 562.0, 266.0, 162.0, 92.0, 67.0, 42.0, 25.0, 27.0, 15.0, 12.0, 12.0, 8.0, 4.0, 3.0, 1.0, 2.0, 1.0, 2.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.480224609375, -0.4633216857910156, -0.44641876220703125, -0.4295158386230469, -0.4126129150390625, -0.3957099914550781, -0.37880706787109375, -0.3619041442871094, -0.345001220703125, -0.3280982971191406, -0.31119537353515625, -0.2942924499511719, -0.2773895263671875, -0.2604866027832031, -0.24358367919921875, -0.22668075561523438, -0.20977783203125, -0.19287490844726562, -0.17597198486328125, -0.15906906127929688, -0.1421661376953125, -0.12526321411132812, -0.10836029052734375, -0.09145736694335938, -0.074554443359375, -0.057651519775390625, -0.04074859619140625, -0.023845672607421875, -0.0069427490234375, 0.009960174560546875, 0.02686309814453125, 0.043766021728515625, 0.0606689453125, 0.07757186889648438, 0.09447479248046875, 0.11137771606445312, 0.1282806396484375, 0.14518356323242188, 0.16208648681640625, 0.17898941040039062, 0.195892333984375, 0.21279525756835938, 0.22969818115234375, 0.24660110473632812, 0.2635040283203125, 0.2804069519042969, 0.29730987548828125, 0.3142127990722656, 0.33111572265625, 0.3480186462402344, 0.36492156982421875, 0.3818244934082031, 0.3987274169921875, 0.4156303405761719, 0.43253326416015625, 0.4494361877441406, 0.466339111328125, 0.4832420349121094, 0.5001449584960938, 0.5170478820800781, 0.5339508056640625, 0.5508537292480469, 0.5677566528320312, 0.5846595764160156, 0.6015625]}, "gradients/encoder.encoder.layers.2.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 2.0, 3.0, 3.0, 9.0, 10.0, 15.0, 16.0, 37.0, 32.0, 54.0, 59.0, 77.0, 90.0, 85.0, 99.0, 103.0, 78.0, 61.0, 52.0, 41.0, 31.0, 17.0, 11.0, 9.0, 6.0, 7.0, 4.0, 2.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.904296875, -0.87921142578125, -0.8541259765625, -0.82904052734375, -0.803955078125, -0.77886962890625, -0.7537841796875, -0.72869873046875, -0.70361328125, -0.67852783203125, -0.6534423828125, -0.62835693359375, -0.603271484375, -0.57818603515625, -0.5531005859375, -0.52801513671875, -0.5029296875, -0.47784423828125, -0.4527587890625, -0.42767333984375, -0.402587890625, -0.37750244140625, -0.3524169921875, -0.32733154296875, -0.30224609375, -0.27716064453125, -0.2520751953125, -0.22698974609375, -0.201904296875, -0.17681884765625, -0.1517333984375, -0.12664794921875, -0.1015625, -0.07647705078125, -0.0513916015625, -0.02630615234375, -0.001220703125, 0.02386474609375, 0.0489501953125, 0.07403564453125, 0.09912109375, 0.12420654296875, 0.1492919921875, 0.17437744140625, 0.199462890625, 0.22454833984375, 0.2496337890625, 0.27471923828125, 0.2998046875, 0.32489013671875, 0.3499755859375, 0.37506103515625, 0.400146484375, 0.42523193359375, 0.4503173828125, 0.47540283203125, 0.50048828125, 0.52557373046875, 0.5506591796875, 0.57574462890625, 0.600830078125, 0.62591552734375, 0.6510009765625, 0.67608642578125, 0.701171875]}, "gradients/encoder.encoder.layers.2.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 4.0, 5.0, 1.0, 11.0, 8.0, 9.0, 17.0, 26.0, 58.0, 85.0, 188.0, 403.0, 1202.0, 5193.0, 72500.0, 3958470.0, 146614.0, 7077.0, 1484.0, 469.0, 224.0, 97.0, 55.0, 26.0, 18.0, 10.0, 13.0, 4.0, 5.0, 5.0, 3.0, 1.0, 2.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.04296875, -1.005157470703125, -0.96734619140625, -0.929534912109375, -0.8917236328125, -0.853912353515625, -0.81610107421875, -0.778289794921875, -0.740478515625, -0.702667236328125, -0.66485595703125, -0.627044677734375, -0.5892333984375, -0.551422119140625, -0.51361083984375, -0.475799560546875, -0.43798828125, -0.400177001953125, -0.36236572265625, -0.324554443359375, -0.2867431640625, -0.248931884765625, -0.21112060546875, -0.173309326171875, -0.135498046875, -0.097686767578125, -0.05987548828125, -0.022064208984375, 0.0157470703125, 0.053558349609375, 0.09136962890625, 0.129180908203125, 0.1669921875, 0.204803466796875, 0.24261474609375, 0.280426025390625, 0.3182373046875, 0.356048583984375, 0.39385986328125, 0.431671142578125, 0.469482421875, 0.507293701171875, 0.54510498046875, 0.582916259765625, 0.6207275390625, 0.658538818359375, 0.69635009765625, 0.734161376953125, 0.77197265625, 0.809783935546875, 0.84759521484375, 0.885406494140625, 0.9232177734375, 0.961029052734375, 0.99884033203125, 1.036651611328125, 1.074462890625, 1.112274169921875, 1.15008544921875, 1.187896728515625, 1.2257080078125, 1.263519287109375, 1.30133056640625, 1.339141845703125, 1.376953125]}, "gradients/encoder.encoder.layers.2.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 2.0, 1.0, 1.0, 2.0, 3.0, 3.0, 6.0, 10.0, 15.0, 14.0, 29.0, 43.0, 62.0, 150.0, 230.0, 440.0, 853.0, 889.0, 667.0, 301.0, 161.0, 72.0, 49.0, 25.0, 17.0, 18.0, 2.0, 5.0, 3.0, 4.0, 3.0, 0.0, 2.0, 2.0, 2.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.6875, -0.6602020263671875, -0.632904052734375, -0.6056060791015625, -0.57830810546875, -0.5510101318359375, -0.523712158203125, -0.4964141845703125, -0.4691162109375, -0.4418182373046875, -0.414520263671875, -0.3872222900390625, -0.35992431640625, -0.3326263427734375, -0.305328369140625, -0.2780303955078125, -0.250732421875, -0.2234344482421875, -0.196136474609375, -0.1688385009765625, -0.14154052734375, -0.1142425537109375, -0.086944580078125, -0.0596466064453125, -0.0323486328125, -0.0050506591796875, 0.022247314453125, 0.0495452880859375, 0.07684326171875, 0.1041412353515625, 0.131439208984375, 0.1587371826171875, 0.18603515625, 0.2133331298828125, 0.240631103515625, 0.2679290771484375, 0.29522705078125, 0.3225250244140625, 0.349822998046875, 0.3771209716796875, 0.4044189453125, 0.4317169189453125, 0.459014892578125, 0.4863128662109375, 0.51361083984375, 0.5409088134765625, 0.568206787109375, 0.5955047607421875, 0.622802734375, 0.6501007080078125, 0.677398681640625, 0.7046966552734375, 0.73199462890625, 0.7592926025390625, 0.786590576171875, 0.8138885498046875, 0.8411865234375, 0.8684844970703125, 0.895782470703125, 0.9230804443359375, 0.95037841796875, 0.9776763916015625, 1.004974365234375, 1.0322723388671875, 1.0595703125]}, "gradients/encoder.encoder.layers.2.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 4.0, 9.0, 17.0, 81.0, 244.0, 354.0, 203.0, 72.0, 16.0, 3.0, 5.0, 4.0, 1.0, 2.0, 1.0, 2.0], "bins": [-17.57353973388672, -17.244478225708008, -16.915416717529297, -16.586353302001953, -16.257291793823242, -15.928230285644531, -15.59916877746582, -15.27010726928711, -14.941044807434082, -14.611983299255371, -14.282920837402344, -13.953859329223633, -13.624797821044922, -13.295735359191895, -12.966673851013184, -12.637611389160156, -12.308549880981445, -11.979488372802734, -11.650425910949707, -11.321364402770996, -10.992302894592285, -10.663240432739258, -10.334178924560547, -10.005117416381836, -9.676055908203125, -9.346994400024414, -9.017931938171387, -8.688870429992676, -8.359808921813965, -8.030746459960938, -7.701684951782227, -7.372622966766357, -7.043560981750488, -6.714498996734619, -6.385437488555908, -6.056375503540039, -5.72731351852417, -5.398251533508301, -5.06919002532959, -4.740128040313721, -4.41106653213501, -4.082004547119141, -3.7529428005218506, -3.4238810539245605, -3.0948190689086914, -2.7657573223114014, -2.4366955757141113, -2.107633590698242, -1.7785718441009521, -1.4495099782943726, -1.120448112487793, -0.7913863658905029, -0.46232450008392334, -0.13326263427734375, 0.1957991123199463, 0.5248610973358154, 0.8539228439331055, 1.182984709739685, 1.5120465755462646, 1.8411083221435547, 2.170170307159424, 2.499232053756714, 2.828293800354004, 3.157355785369873, 3.486417531967163]}, "gradients/encoder.encoder.layers.2.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 2.0, 0.0, 5.0, 2.0, 1.0, 6.0, 4.0, 9.0, 14.0, 9.0, 24.0, 19.0, 22.0, 22.0, 37.0, 30.0, 42.0, 49.0, 50.0, 56.0, 63.0, 58.0, 50.0, 82.0, 62.0, 49.0, 49.0, 37.0, 28.0, 20.0, 30.0, 22.0, 9.0, 12.0, 13.0, 10.0, 1.0, 9.0, 4.0, 1.0, 2.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.4971179962158203, -3.3783347606658936, -3.2595512866973877, -3.140768051147461, -3.021984577178955, -2.9032013416290283, -2.7844181060791016, -2.6656346321105957, -2.546851396560669, -2.428068161010742, -2.3092846870422363, -2.1905014514923096, -2.071718215942383, -1.952934741973877, -1.8341515064239502, -1.7153681516647339, -1.5965847969055176, -1.4778014421463013, -1.359018087387085, -1.2402348518371582, -1.121451497077942, -1.0026681423187256, -0.883884847164154, -0.7651015520095825, -0.6463181972503662, -0.5275348424911499, -0.40875154733657837, -0.28996822237968445, -0.17118489742279053, -0.05240154266357422, 0.06638175249099731, 0.18516504764556885, 0.30394840240478516, 0.4227317273616791, 0.541515052318573, 0.6602983474731445, 0.7790817022323608, 0.8978650569915771, 1.016648292541504, 1.1354316473007202, 1.2542150020599365, 1.3729983568191528, 1.4917817115783691, 1.610564947128296, 1.7293483018875122, 1.8481316566467285, 1.9669148921966553, 2.085698127746582, 2.204481601715088, 2.3232648372650146, 2.4420483112335205, 2.5608315467834473, 2.679615020751953, 2.79839825630188, 2.9171814918518066, 3.0359649658203125, 3.1547482013702393, 3.273531436920166, 3.392314910888672, 3.5110981464385986, 3.6298813819885254, 3.7486648559570312, 3.867448091506958, 3.9862313270568848, 4.105014801025391]}, "gradients/encoder.encoder.layers.2.attention.out_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 2.0, 4.0, 6.0, 11.0, 13.0, 14.0, 35.0, 51.0, 94.0, 175.0, 310.0, 734.0, 1776.0, 5340.0, 21493.0, 122894.0, 582966.0, 258841.0, 40410.0, 8870.0, 2591.0, 1019.0, 416.0, 203.0, 111.0, 68.0, 54.0, 16.0, 15.0, 12.0, 8.0, 3.0, 5.0, 2.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.75390625, -0.7245635986328125, -0.695220947265625, -0.6658782958984375, -0.63653564453125, -0.6071929931640625, -0.577850341796875, -0.5485076904296875, -0.5191650390625, -0.4898223876953125, -0.460479736328125, -0.4311370849609375, -0.40179443359375, -0.3724517822265625, -0.343109130859375, -0.3137664794921875, -0.284423828125, -0.2550811767578125, -0.225738525390625, -0.1963958740234375, -0.16705322265625, -0.1377105712890625, -0.108367919921875, -0.0790252685546875, -0.0496826171875, -0.0203399658203125, 0.009002685546875, 0.0383453369140625, 0.06768798828125, 0.0970306396484375, 0.126373291015625, 0.1557159423828125, 0.18505859375, 0.2144012451171875, 0.243743896484375, 0.2730865478515625, 0.30242919921875, 0.3317718505859375, 0.361114501953125, 0.3904571533203125, 0.4197998046875, 0.4491424560546875, 0.478485107421875, 0.5078277587890625, 0.53717041015625, 0.5665130615234375, 0.595855712890625, 0.6251983642578125, 0.654541015625, 0.6838836669921875, 0.713226318359375, 0.7425689697265625, 0.77191162109375, 0.8012542724609375, 0.830596923828125, 0.8599395751953125, 0.8892822265625, 0.9186248779296875, 0.947967529296875, 0.9773101806640625, 1.00665283203125, 1.0359954833984375, 1.065338134765625, 1.0946807861328125, 1.1240234375]}, "gradients/encoder.encoder.layers.2.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 2.0, 1.0, 4.0, 8.0, 8.0, 15.0, 18.0, 27.0, 33.0, 33.0, 50.0, 82.0, 82.0, 97.0, 88.0, 83.0, 89.0, 69.0, 61.0, 48.0, 38.0, 27.0, 11.0, 20.0, 3.0, 6.0, 4.0, 6.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.8271484375, -0.8032150268554688, -0.7792816162109375, -0.7553482055664062, -0.731414794921875, -0.7074813842773438, -0.6835479736328125, -0.6596145629882812, -0.63568115234375, -0.6117477416992188, -0.5878143310546875, -0.5638809204101562, -0.539947509765625, -0.5160140991210938, -0.4920806884765625, -0.46814727783203125, -0.4442138671875, -0.42028045654296875, -0.3963470458984375, -0.37241363525390625, -0.348480224609375, -0.32454681396484375, -0.3006134033203125, -0.27667999267578125, -0.25274658203125, -0.22881317138671875, -0.2048797607421875, -0.18094635009765625, -0.157012939453125, -0.13307952880859375, -0.1091461181640625, -0.08521270751953125, -0.061279296875, -0.03734588623046875, -0.0134124755859375, 0.01052093505859375, 0.034454345703125, 0.05838775634765625, 0.0823211669921875, 0.10625457763671875, 0.13018798828125, 0.15412139892578125, 0.1780548095703125, 0.20198822021484375, 0.225921630859375, 0.24985504150390625, 0.2737884521484375, 0.29772186279296875, 0.3216552734375, 0.34558868408203125, 0.3695220947265625, 0.39345550537109375, 0.417388916015625, 0.44132232666015625, 0.4652557373046875, 0.48918914794921875, 0.51312255859375, 0.5370559692382812, 0.5609893798828125, 0.5849227905273438, 0.608856201171875, 0.6327896118164062, 0.6567230224609375, 0.6806564331054688, 0.70458984375]}, "gradients/encoder.encoder.layers.2.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0, 1.0, 2.0, 3.0, 1.0, 4.0, 2.0, 7.0, 5.0, 13.0, 14.0, 10.0, 19.0, 37.0, 42.0, 45.0, 70.0, 110.0, 152.0, 230.0, 374.0, 658.0, 1188.0, 3173.0, 13778.0, 111038.0, 766200.0, 129191.0, 15841.0, 3431.0, 1221.0, 617.0, 364.0, 215.0, 134.0, 97.0, 61.0, 58.0, 37.0, 24.0, 20.0, 27.0, 17.0, 7.0, 10.0, 4.0, 3.0, 3.0, 3.0, 3.0, 1.0, 3.0, 0.0, 0.0, 1.0, 1.0, 1.0], "bins": [-1.12109375, -1.0876617431640625, -1.054229736328125, -1.0207977294921875, -0.98736572265625, -0.9539337158203125, -0.920501708984375, -0.8870697021484375, -0.8536376953125, -0.8202056884765625, -0.786773681640625, -0.7533416748046875, -0.71990966796875, -0.6864776611328125, -0.653045654296875, -0.6196136474609375, -0.586181640625, -0.5527496337890625, -0.519317626953125, -0.4858856201171875, -0.45245361328125, -0.4190216064453125, -0.385589599609375, -0.3521575927734375, -0.3187255859375, -0.2852935791015625, -0.251861572265625, -0.2184295654296875, -0.18499755859375, -0.1515655517578125, -0.118133544921875, -0.0847015380859375, -0.05126953125, -0.0178375244140625, 0.015594482421875, 0.0490264892578125, 0.08245849609375, 0.1158905029296875, 0.149322509765625, 0.1827545166015625, 0.2161865234375, 0.2496185302734375, 0.283050537109375, 0.3164825439453125, 0.34991455078125, 0.3833465576171875, 0.416778564453125, 0.4502105712890625, 0.483642578125, 0.5170745849609375, 0.550506591796875, 0.5839385986328125, 0.61737060546875, 0.6508026123046875, 0.684234619140625, 0.7176666259765625, 0.7510986328125, 0.7845306396484375, 0.817962646484375, 0.8513946533203125, 0.88482666015625, 0.9182586669921875, 0.951690673828125, 0.9851226806640625, 1.0185546875]}, "gradients/encoder.encoder.layers.2.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 5.0, 2.0, 5.0, 10.0, 10.0, 10.0, 22.0, 24.0, 39.0, 41.0, 43.0, 48.0, 66.0, 63.0, 60.0, 80.0, 65.0, 65.0, 73.0, 54.0, 45.0, 46.0, 37.0, 19.0, 18.0, 13.0, 10.0, 8.0, 7.0, 7.0, 3.0, 8.0, 1.0, 1.0, 3.0, 0.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.09765625, -2.0360107421875, -1.974365234375, -1.9127197265625, -1.85107421875, -1.7894287109375, -1.727783203125, -1.6661376953125, -1.6044921875, -1.5428466796875, -1.481201171875, -1.4195556640625, -1.35791015625, -1.2962646484375, -1.234619140625, -1.1729736328125, -1.111328125, -1.0496826171875, -0.988037109375, -0.9263916015625, -0.86474609375, -0.8031005859375, -0.741455078125, -0.6798095703125, -0.6181640625, -0.5565185546875, -0.494873046875, -0.4332275390625, -0.37158203125, -0.3099365234375, -0.248291015625, -0.1866455078125, -0.125, -0.0633544921875, -0.001708984375, 0.0599365234375, 0.12158203125, 0.1832275390625, 0.244873046875, 0.3065185546875, 0.3681640625, 0.4298095703125, 0.491455078125, 0.5531005859375, 0.61474609375, 0.6763916015625, 0.738037109375, 0.7996826171875, 0.861328125, 0.9229736328125, 0.984619140625, 1.0462646484375, 1.10791015625, 1.1695556640625, 1.231201171875, 1.2928466796875, 1.3544921875, 1.4161376953125, 1.477783203125, 1.5394287109375, 1.60107421875, 1.6627197265625, 1.724365234375, 1.7860107421875, 1.84765625]}, "gradients/encoder.encoder.layers.2.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 4.0, 2.0, 3.0, 5.0, 4.0, 5.0, 8.0, 14.0, 11.0, 13.0, 20.0, 25.0, 41.0, 51.0, 94.0, 123.0, 183.0, 280.0, 486.0, 757.0, 1359.0, 2415.0, 4838.0, 10592.0, 26445.0, 79751.0, 581927.0, 242476.0, 58481.0, 20539.0, 8687.0, 4023.0, 1961.0, 1189.0, 616.0, 378.0, 263.0, 164.0, 92.0, 73.0, 53.0, 25.0, 22.0, 24.0, 13.0, 7.0, 6.0, 10.0, 5.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 0.0, 1.0], "bins": [-0.2156982421875, -0.20932960510253906, -0.20296096801757812, -0.1965923309326172, -0.19022369384765625, -0.1838550567626953, -0.17748641967773438, -0.17111778259277344, -0.1647491455078125, -0.15838050842285156, -0.15201187133789062, -0.1456432342529297, -0.13927459716796875, -0.1329059600830078, -0.12653732299804688, -0.12016868591308594, -0.113800048828125, -0.10743141174316406, -0.10106277465820312, -0.09469413757324219, -0.08832550048828125, -0.08195686340332031, -0.07558822631835938, -0.06921958923339844, -0.0628509521484375, -0.05648231506347656, -0.050113677978515625, -0.04374504089355469, -0.03737640380859375, -0.031007766723632812, -0.024639129638671875, -0.018270492553710938, -0.01190185546875, -0.0055332183837890625, 0.000835418701171875, 0.0072040557861328125, 0.01357269287109375, 0.019941329956054688, 0.026309967041015625, 0.03267860412597656, 0.0390472412109375, 0.04541587829589844, 0.051784515380859375, 0.05815315246582031, 0.06452178955078125, 0.07089042663574219, 0.07725906372070312, 0.08362770080566406, 0.089996337890625, 0.09636497497558594, 0.10273361206054688, 0.10910224914550781, 0.11547088623046875, 0.12183952331542969, 0.12820816040039062, 0.13457679748535156, 0.1409454345703125, 0.14731407165527344, 0.15368270874023438, 0.1600513458251953, 0.16641998291015625, 0.1727886199951172, 0.17915725708007812, 0.18552589416503906, 0.19189453125]}, "gradients/encoder.encoder.layers.2.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 3.0, 3.0, 1.0, 4.0, 2.0, 5.0, 7.0, 9.0, 9.0, 18.0, 18.0, 15.0, 32.0, 34.0, 60.0, 60.0, 89.0, 105.0, 122.0, 104.0, 74.0, 57.0, 39.0, 36.0, 22.0, 19.0, 14.0, 6.0, 7.0, 9.0, 11.0, 3.0, 5.0, 2.0, 1.0, 4.0, 2.0, 1.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.508827209472656e-05, -6.323400884866714e-05, -6.137974560260773e-05, -5.952548235654831e-05, -5.767121911048889e-05, -5.5816955864429474e-05, -5.3962692618370056e-05, -5.210842937231064e-05, -5.025416612625122e-05, -4.83999028801918e-05, -4.6545639634132385e-05, -4.469137638807297e-05, -4.283711314201355e-05, -4.098284989595413e-05, -3.9128586649894714e-05, -3.72743234038353e-05, -3.542006015777588e-05, -3.356579691171646e-05, -3.1711533665657043e-05, -2.9857270419597626e-05, -2.8003007173538208e-05, -2.614874392747879e-05, -2.4294480681419373e-05, -2.2440217435359955e-05, -2.0585954189300537e-05, -1.873169094324112e-05, -1.68774276971817e-05, -1.5023164451122284e-05, -1.3168901205062866e-05, -1.1314637959003448e-05, -9.46037471294403e-06, -7.606111466884613e-06, -5.751848220825195e-06, -3.897584974765778e-06, -2.04332172870636e-06, -1.8905848264694214e-07, 1.6652047634124756e-06, 3.5194680094718933e-06, 5.373731255531311e-06, 7.227994501590729e-06, 9.082257747650146e-06, 1.0936520993709564e-05, 1.2790784239768982e-05, 1.46450474858284e-05, 1.6499310731887817e-05, 1.8353573977947235e-05, 2.0207837224006653e-05, 2.206210047006607e-05, 2.3916363716125488e-05, 2.5770626962184906e-05, 2.7624890208244324e-05, 2.947915345430374e-05, 3.133341670036316e-05, 3.318767994642258e-05, 3.5041943192481995e-05, 3.689620643854141e-05, 3.875046968460083e-05, 4.060473293066025e-05, 4.2458996176719666e-05, 4.431325942277908e-05, 4.61675226688385e-05, 4.802178591489792e-05, 4.9876049160957336e-05, 5.1730312407016754e-05, 5.358457565307617e-05]}, "gradients/encoder.encoder.layers.2.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 3.0, 2.0, 7.0, 14.0, 17.0, 45.0, 62.0, 147.0, 345.0, 1007.0, 6572.0, 122929.0, 886889.0, 26827.0, 2668.0, 602.0, 205.0, 90.0, 61.0, 24.0, 20.0, 12.0, 6.0, 8.0, 4.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.681640625, -0.6638298034667969, -0.6460189819335938, -0.6282081604003906, -0.6103973388671875, -0.5925865173339844, -0.5747756958007812, -0.5569648742675781, -0.539154052734375, -0.5213432312011719, -0.5035324096679688, -0.4857215881347656, -0.4679107666015625, -0.4500999450683594, -0.43228912353515625, -0.4144783020019531, -0.39666748046875, -0.3788566589355469, -0.36104583740234375, -0.3432350158691406, -0.3254241943359375, -0.3076133728027344, -0.28980255126953125, -0.2719917297363281, -0.254180908203125, -0.23637008666992188, -0.21855926513671875, -0.20074844360351562, -0.1829376220703125, -0.16512680053710938, -0.14731597900390625, -0.12950515747070312, -0.1116943359375, -0.09388351440429688, -0.07607269287109375, -0.058261871337890625, -0.0404510498046875, -0.022640228271484375, -0.00482940673828125, 0.012981414794921875, 0.030792236328125, 0.048603057861328125, 0.06641387939453125, 0.08422470092773438, 0.1020355224609375, 0.11984634399414062, 0.13765716552734375, 0.15546798706054688, 0.17327880859375, 0.19108963012695312, 0.20890045166015625, 0.22671127319335938, 0.2445220947265625, 0.2623329162597656, 0.28014373779296875, 0.2979545593261719, 0.315765380859375, 0.3335762023925781, 0.35138702392578125, 0.3691978454589844, 0.3870086669921875, 0.4048194885253906, 0.42263031005859375, 0.4404411315917969, 0.458251953125]}, "gradients/encoder.encoder.layers.2.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 1.0, 2.0, 1.0, 10.0, 5.0, 8.0, 15.0, 17.0, 18.0, 38.0, 48.0, 62.0, 59.0, 120.0, 133.0, 123.0, 93.0, 64.0, 54.0, 29.0, 33.0, 23.0, 19.0, 4.0, 10.0, 4.0, 6.0, 4.0, 3.0, 4.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.463623046875, -0.4510002136230469, -0.43837738037109375, -0.4257545471191406, -0.4131317138671875, -0.4005088806152344, -0.38788604736328125, -0.3752632141113281, -0.362640380859375, -0.3500175476074219, -0.33739471435546875, -0.3247718811035156, -0.3121490478515625, -0.2995262145996094, -0.28690338134765625, -0.2742805480957031, -0.26165771484375, -0.24903488159179688, -0.23641204833984375, -0.22378921508789062, -0.2111663818359375, -0.19854354858398438, -0.18592071533203125, -0.17329788208007812, -0.160675048828125, -0.14805221557617188, -0.13542938232421875, -0.12280654907226562, -0.1101837158203125, -0.09756088256835938, -0.08493804931640625, -0.07231521606445312, -0.0596923828125, -0.047069549560546875, -0.03444671630859375, -0.021823883056640625, -0.0092010498046875, 0.003421783447265625, 0.01604461669921875, 0.028667449951171875, 0.041290283203125, 0.053913116455078125, 0.06653594970703125, 0.07915878295898438, 0.0917816162109375, 0.10440444946289062, 0.11702728271484375, 0.12965011596679688, 0.14227294921875, 0.15489578247070312, 0.16751861572265625, 0.18014144897460938, 0.1927642822265625, 0.20538711547851562, 0.21800994873046875, 0.23063278198242188, 0.243255615234375, 0.2558784484863281, 0.26850128173828125, 0.2811241149902344, 0.2937469482421875, 0.3063697814941406, 0.31899261474609375, 0.3316154479980469, 0.34423828125]}, "gradients/encoder.encoder.layers.2.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 4.0, 8.0, 34.0, 229.0, 538.0, 144.0, 34.0, 14.0, 5.0, 4.0, 2.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-11.751058578491211, -11.136977195739746, -10.522895812988281, -9.908814430236816, -9.294733047485352, -8.68065071105957, -8.066569328308105, -7.452487945556641, -6.838406562805176, -6.224325180053711, -5.610243797302246, -4.996161937713623, -4.382080554962158, -3.7679991722106934, -3.1539175510406494, -2.5398359298706055, -1.9257545471191406, -1.3116730451583862, -0.6975915431976318, -0.08351004123687744, 0.530571460723877, 1.1446528434753418, 1.7587344646453857, 2.3728160858154297, 2.9868974685668945, 3.6009788513183594, 4.215060234069824, 4.829142093658447, 5.443223476409912, 6.057304859161377, 6.67138671875, 7.285468101501465, 7.899547576904297, 8.513628959655762, 9.127710342407227, 9.741791725158691, 10.355873107910156, 10.969955444335938, 11.584036827087402, 12.198118209838867, 12.812199592590332, 13.426280975341797, 14.040362358093262, 14.654443740844727, 15.268526077270508, 15.882606506347656, 16.496688842773438, 17.11077117919922, 17.724851608276367, 18.33893394470215, 18.953014373779297, 19.567096710205078, 20.181177139282227, 20.795259475708008, 21.409339904785156, 22.023422241210938, 22.63750457763672, 23.2515869140625, 23.86566734313965, 24.47974967956543, 25.093830108642578, 25.70791244506836, 26.321992874145508, 26.93607521057129, 27.550155639648438]}, "gradients/encoder.encoder.layers.2.layer_norm.bias": {"_type": "histogram", "values": [3.0, 0.0, 0.0, 0.0, 3.0, 3.0, 1.0, 2.0, 1.0, 4.0, 7.0, 6.0, 4.0, 7.0, 1.0, 8.0, 16.0, 16.0, 12.0, 23.0, 21.0, 23.0, 16.0, 32.0, 33.0, 33.0, 39.0, 42.0, 73.0, 85.0, 94.0, 71.0, 45.0, 36.0, 38.0, 18.0, 19.0, 29.0, 18.0, 16.0, 18.0, 9.0, 11.0, 13.0, 15.0, 12.0, 4.0, 4.0, 9.0, 6.0, 6.0, 2.0, 5.0, 2.0, 3.0, 3.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-4.552083969116211, -4.4005937576293945, -4.249103546142578, -4.097613334655762, -3.9461231231689453, -3.794632911682129, -3.6431427001953125, -3.491652488708496, -3.3401622772216797, -3.1886720657348633, -3.037181854248047, -2.8856916427612305, -2.734201431274414, -2.5827112197875977, -2.4312210083007812, -2.279730796813965, -2.1282408237457275, -1.9767506122589111, -1.8252604007720947, -1.6737701892852783, -1.522279977798462, -1.3707897663116455, -1.2192996740341187, -1.0678094625473022, -0.9163192510604858, -0.7648290395736694, -0.613338828086853, -0.4618486762046814, -0.310358464717865, -0.15886825323104858, -0.007378101348876953, 0.14411211013793945, 0.29560232162475586, 0.44709253311157227, 0.5985827445983887, 0.7500728964805603, 0.9015631079673767, 1.053053379058838, 1.2045434713363647, 1.3560336828231812, 1.5075238943099976, 1.659014105796814, 1.8105043172836304, 1.9619944095611572, 2.1134846210479736, 2.26497483253479, 2.4164650440216064, 2.567955255508423, 2.7194454669952393, 2.8709356784820557, 3.022425889968872, 3.1739161014556885, 3.325406312942505, 3.4768965244293213, 3.6283864974975586, 3.779876708984375, 3.9313669204711914, 4.082857131958008, 4.234347343444824, 4.385837554931641, 4.537327766418457, 4.688817977905273, 4.84030818939209, 4.991798400878906, 5.143288612365723]}, "gradients/encoder.encoder.layers.1.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 2.0, 2.0, 3.0, 3.0, 3.0, 9.0, 5.0, 17.0, 27.0, 39.0, 43.0, 104.0, 177.0, 317.0, 663.0, 2308.0, 10744.0, 129127.0, 2721995.0, 1273684.0, 46837.0, 5546.0, 1468.0, 501.0, 249.0, 137.0, 96.0, 68.0, 31.0, 20.0, 20.0, 13.0, 8.0, 8.0, 7.0, 3.0, 3.0, 1.0, 2.0, 2.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.8662109375, -0.842041015625, -0.81787109375, -0.793701171875, -0.76953125, -0.745361328125, -0.72119140625, -0.697021484375, -0.6728515625, -0.648681640625, -0.62451171875, -0.600341796875, -0.576171875, -0.552001953125, -0.52783203125, -0.503662109375, -0.4794921875, -0.455322265625, -0.43115234375, -0.406982421875, -0.3828125, -0.358642578125, -0.33447265625, -0.310302734375, -0.2861328125, -0.261962890625, -0.23779296875, -0.213623046875, -0.189453125, -0.165283203125, -0.14111328125, -0.116943359375, -0.0927734375, -0.068603515625, -0.04443359375, -0.020263671875, 0.00390625, 0.028076171875, 0.05224609375, 0.076416015625, 0.1005859375, 0.124755859375, 0.14892578125, 0.173095703125, 0.197265625, 0.221435546875, 0.24560546875, 0.269775390625, 0.2939453125, 0.318115234375, 0.34228515625, 0.366455078125, 0.390625, 0.414794921875, 0.43896484375, 0.463134765625, 0.4873046875, 0.511474609375, 0.53564453125, 0.559814453125, 0.583984375, 0.608154296875, 0.63232421875, 0.656494140625, 0.6806640625]}, "gradients/encoder.encoder.layers.1.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 2.0, 2.0, 11.0, 6.0, 10.0, 16.0, 24.0, 36.0, 40.0, 43.0, 66.0, 70.0, 82.0, 101.0, 93.0, 75.0, 75.0, 65.0, 52.0, 37.0, 34.0, 18.0, 17.0, 10.0, 10.0, 4.0, 5.0, 5.0, 2.0, 1.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.75537109375, -0.7325439453125, -0.709716796875, -0.6868896484375, -0.6640625, -0.6412353515625, -0.618408203125, -0.5955810546875, -0.57275390625, -0.5499267578125, -0.527099609375, -0.5042724609375, -0.4814453125, -0.4586181640625, -0.435791015625, -0.4129638671875, -0.39013671875, -0.3673095703125, -0.344482421875, -0.3216552734375, -0.298828125, -0.2760009765625, -0.253173828125, -0.2303466796875, -0.20751953125, -0.1846923828125, -0.161865234375, -0.1390380859375, -0.1162109375, -0.0933837890625, -0.070556640625, -0.0477294921875, -0.02490234375, -0.0020751953125, 0.020751953125, 0.0435791015625, 0.06640625, 0.0892333984375, 0.112060546875, 0.1348876953125, 0.15771484375, 0.1805419921875, 0.203369140625, 0.2261962890625, 0.2490234375, 0.2718505859375, 0.294677734375, 0.3175048828125, 0.34033203125, 0.3631591796875, 0.385986328125, 0.4088134765625, 0.431640625, 0.4544677734375, 0.477294921875, 0.5001220703125, 0.52294921875, 0.5457763671875, 0.568603515625, 0.5914306640625, 0.6142578125, 0.6370849609375, 0.659912109375, 0.6827392578125, 0.70556640625]}, "gradients/encoder.encoder.layers.1.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 4.0, 1.0, 10.0, 10.0, 40.0, 83.0, 296.0, 1097.0, 17244.0, 4163471.0, 10814.0, 854.0, 270.0, 54.0, 25.0, 11.0, 3.0, 3.0, 2.0, 3.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.8828125, -2.79400634765625, -2.7052001953125, -2.61639404296875, -2.527587890625, -2.43878173828125, -2.3499755859375, -2.26116943359375, -2.17236328125, -2.08355712890625, -1.9947509765625, -1.90594482421875, -1.817138671875, -1.72833251953125, -1.6395263671875, -1.55072021484375, -1.4619140625, -1.37310791015625, -1.2843017578125, -1.19549560546875, -1.106689453125, -1.01788330078125, -0.9290771484375, -0.84027099609375, -0.75146484375, -0.66265869140625, -0.5738525390625, -0.48504638671875, -0.396240234375, -0.30743408203125, -0.2186279296875, -0.12982177734375, -0.041015625, 0.04779052734375, 0.1365966796875, 0.22540283203125, 0.314208984375, 0.40301513671875, 0.4918212890625, 0.58062744140625, 0.66943359375, 0.75823974609375, 0.8470458984375, 0.93585205078125, 1.024658203125, 1.11346435546875, 1.2022705078125, 1.29107666015625, 1.3798828125, 1.46868896484375, 1.5574951171875, 1.64630126953125, 1.735107421875, 1.82391357421875, 1.9127197265625, 2.00152587890625, 2.09033203125, 2.17913818359375, 2.2679443359375, 2.35675048828125, 2.445556640625, 2.53436279296875, 2.6231689453125, 2.71197509765625, 2.80078125]}, "gradients/encoder.encoder.layers.1.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 0.0, 2.0, 2.0, 4.0, 2.0, 10.0, 12.0, 19.0, 27.0, 51.0, 105.0, 201.0, 458.0, 889.0, 1069.0, 641.0, 318.0, 126.0, 52.0, 45.0, 19.0, 15.0, 3.0, 6.0, 2.0, 2.0, 1.0, 5.0, 1.0], "bins": [-1.7548828125, -1.7192764282226562, -1.6836700439453125, -1.6480636596679688, -1.612457275390625, -1.5768508911132812, -1.5412445068359375, -1.5056381225585938, -1.47003173828125, -1.4344253540039062, -1.3988189697265625, -1.3632125854492188, -1.327606201171875, -1.2919998168945312, -1.2563934326171875, -1.2207870483398438, -1.1851806640625, -1.1495742797851562, -1.1139678955078125, -1.0783615112304688, -1.042755126953125, -1.0071487426757812, -0.9715423583984375, -0.9359359741210938, -0.90032958984375, -0.8647232055664062, -0.8291168212890625, -0.7935104370117188, -0.757904052734375, -0.7222976684570312, -0.6866912841796875, -0.6510848999023438, -0.615478515625, -0.5798721313476562, -0.5442657470703125, -0.5086593627929688, -0.473052978515625, -0.43744659423828125, -0.4018402099609375, -0.36623382568359375, -0.33062744140625, -0.29502105712890625, -0.2594146728515625, -0.22380828857421875, -0.188201904296875, -0.15259552001953125, -0.1169891357421875, -0.08138275146484375, -0.0457763671875, -0.01016998291015625, 0.0254364013671875, 0.06104278564453125, 0.096649169921875, 0.13225555419921875, 0.1678619384765625, 0.20346832275390625, 0.23907470703125, 0.27468109130859375, 0.3102874755859375, 0.34589385986328125, 0.381500244140625, 0.41710662841796875, 0.4527130126953125, 0.48831939697265625, 0.52392578125]}, "gradients/encoder.encoder.layers.1.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 2.0, 1.0, 6.0, 16.0, 23.0, 79.0, 253.0, 397.0, 166.0, 41.0, 14.0, 7.0, 4.0, 4.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-16.131092071533203, -15.67730712890625, -15.22352123260498, -14.769736289978027, -14.315950393676758, -13.862165451049805, -13.408380508422852, -12.954595565795898, -12.500809669494629, -12.047024726867676, -11.593238830566406, -11.139453887939453, -10.6856689453125, -10.23188304901123, -9.778098106384277, -9.324312210083008, -8.870527267456055, -8.416742324829102, -7.962956428527832, -7.509171485900879, -7.055386066436768, -6.601600646972656, -6.147815704345703, -5.694030284881592, -5.2402448654174805, -4.786459445953369, -4.332674026489258, -3.8788890838623047, -3.4251036643981934, -2.971318244934082, -2.51753306388855, -2.0637478828430176, -1.6099634170532227, -1.1561781167984009, -0.7023928165435791, -0.24860751628875732, 0.20517778396606445, 0.6589632034301758, 1.112748384475708, 1.5665335655212402, 2.0203189849853516, 2.474104404449463, 2.927889585494995, 3.3816747665405273, 3.8354601860046387, 4.28924560546875, 4.743030548095703, 5.1968159675598145, 5.650601387023926, 6.104386806488037, 6.558172225952148, 7.011957168579102, 7.465742588043213, 7.919528007507324, 8.373312950134277, 8.827098846435547, 9.2808837890625, 9.734668731689453, 10.188454627990723, 10.642239570617676, 11.096025466918945, 11.549810409545898, 12.003595352172852, 12.457380294799805, 12.911166191101074]}, "gradients/encoder.encoder.layers.1.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 4.0, 2.0, 2.0, 2.0, 2.0, 5.0, 7.0, 5.0, 10.0, 12.0, 13.0, 18.0, 29.0, 14.0, 29.0, 17.0, 28.0, 30.0, 45.0, 35.0, 34.0, 45.0, 38.0, 41.0, 41.0, 43.0, 53.0, 38.0, 45.0, 41.0, 38.0, 33.0, 29.0, 33.0, 19.0, 21.0, 27.0, 18.0, 20.0, 11.0, 11.0, 2.0, 8.0, 5.0, 5.0, 2.0, 1.0, 0.0, 2.0, 1.0, 1.0, 1.0, 1.0, 2.0], "bins": [-4.32381010055542, -4.196715354919434, -4.0696210861206055, -3.9425265789031982, -3.815432071685791, -3.688337564468384, -3.5612430572509766, -3.4341483116149902, -3.307054042816162, -3.179959535598755, -3.0528650283813477, -2.9257705211639404, -2.798676013946533, -2.671581506729126, -2.5444869995117188, -2.4173922538757324, -2.290297746658325, -2.163203239440918, -2.0361087322235107, -1.9090142250061035, -1.7819197177886963, -1.654825210571289, -1.5277305841445923, -1.400636076927185, -1.2735415697097778, -1.1464470624923706, -1.0193525552749634, -0.8922579884529114, -0.7651634812355042, -0.6380689740180969, -0.5109744071960449, -0.3838798999786377, -0.25678539276123047, -0.12969087064266205, -0.002596348524093628, 0.12449818849563599, 0.2515926957130432, 0.37868720293045044, 0.5057817697525024, 0.6328762769699097, 0.7599707841873169, 0.8870652914047241, 1.0141597986221313, 1.1412544250488281, 1.2683489322662354, 1.3954434394836426, 1.5225379467010498, 1.649632453918457, 1.7767269611358643, 1.9038214683532715, 2.0309159755706787, 2.158010482788086, 2.285104990005493, 2.4121994972229004, 2.5392942428588867, 2.666388511657715, 2.793483257293701, 2.9205777645111084, 3.0476722717285156, 3.174766778945923, 3.30186128616333, 3.4289557933807373, 3.5560503005981445, 3.683145046234131, 3.810239315032959]}, "gradients/encoder.encoder.layers.1.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 3.0, 5.0, 3.0, 5.0, 11.0, 13.0, 14.0, 19.0, 48.0, 73.0, 140.0, 234.0, 464.0, 1098.0, 2729.0, 7690.0, 26089.0, 122783.0, 544165.0, 271925.0, 50661.0, 13130.0, 4256.0, 1589.0, 658.0, 339.0, 195.0, 89.0, 60.0, 25.0, 15.0, 19.0, 10.0, 7.0, 2.0, 3.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.134765625, -1.106903076171875, -1.07904052734375, -1.051177978515625, -1.0233154296875, -0.995452880859375, -0.96759033203125, -0.939727783203125, -0.911865234375, -0.884002685546875, -0.85614013671875, -0.828277587890625, -0.8004150390625, -0.772552490234375, -0.74468994140625, -0.716827392578125, -0.68896484375, -0.661102294921875, -0.63323974609375, -0.605377197265625, -0.5775146484375, -0.549652099609375, -0.52178955078125, -0.493927001953125, -0.466064453125, -0.438201904296875, -0.41033935546875, -0.382476806640625, -0.3546142578125, -0.326751708984375, -0.29888916015625, -0.271026611328125, -0.2431640625, -0.215301513671875, -0.18743896484375, -0.159576416015625, -0.1317138671875, -0.103851318359375, -0.07598876953125, -0.048126220703125, -0.020263671875, 0.007598876953125, 0.03546142578125, 0.063323974609375, 0.0911865234375, 0.119049072265625, 0.14691162109375, 0.174774169921875, 0.20263671875, 0.230499267578125, 0.25836181640625, 0.286224365234375, 0.3140869140625, 0.341949462890625, 0.36981201171875, 0.397674560546875, 0.425537109375, 0.453399658203125, 0.48126220703125, 0.509124755859375, 0.5369873046875, 0.564849853515625, 0.59271240234375, 0.620574951171875, 0.6484375]}, "gradients/encoder.encoder.layers.1.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 4.0, 5.0, 4.0, 5.0, 7.0, 16.0, 16.0, 22.0, 36.0, 43.0, 62.0, 53.0, 79.0, 68.0, 90.0, 88.0, 75.0, 76.0, 61.0, 48.0, 51.0, 21.0, 29.0, 18.0, 12.0, 9.0, 4.0, 2.0, 5.0, 3.0, 0.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.75048828125, -0.7277145385742188, -0.7049407958984375, -0.6821670532226562, -0.659393310546875, -0.6366195678710938, -0.6138458251953125, -0.5910720825195312, -0.56829833984375, -0.5455245971679688, -0.5227508544921875, -0.49997711181640625, -0.477203369140625, -0.45442962646484375, -0.4316558837890625, -0.40888214111328125, -0.3861083984375, -0.36333465576171875, -0.3405609130859375, -0.31778717041015625, -0.295013427734375, -0.27223968505859375, -0.2494659423828125, -0.22669219970703125, -0.20391845703125, -0.18114471435546875, -0.1583709716796875, -0.13559722900390625, -0.112823486328125, -0.09004974365234375, -0.0672760009765625, -0.04450225830078125, -0.021728515625, 0.00104522705078125, 0.0238189697265625, 0.04659271240234375, 0.069366455078125, 0.09214019775390625, 0.1149139404296875, 0.13768768310546875, 0.16046142578125, 0.18323516845703125, 0.2060089111328125, 0.22878265380859375, 0.251556396484375, 0.27433013916015625, 0.2971038818359375, 0.31987762451171875, 0.3426513671875, 0.36542510986328125, 0.3881988525390625, 0.41097259521484375, 0.433746337890625, 0.45652008056640625, 0.4792938232421875, 0.5020675659179688, 0.52484130859375, 0.5476150512695312, 0.5703887939453125, 0.5931625366210938, 0.615936279296875, 0.6387100219726562, 0.6614837646484375, 0.6842575073242188, 0.70703125]}, "gradients/encoder.encoder.layers.1.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 3.0, 1.0, 3.0, 2.0, 3.0, 7.0, 6.0, 6.0, 9.0, 8.0, 11.0, 19.0, 20.0, 26.0, 40.0, 63.0, 95.0, 114.0, 175.0, 272.0, 445.0, 748.0, 1544.0, 4413.0, 27706.0, 759643.0, 232671.0, 14419.0, 3064.0, 1191.0, 618.0, 381.0, 254.0, 154.0, 132.0, 91.0, 61.0, 41.0, 30.0, 14.0, 14.0, 10.0, 8.0, 3.0, 7.0, 2.0, 7.0, 1.0, 4.0, 4.0, 1.0, 1.0, 1.0, 0.0, 2.0, 2.0], "bins": [-1.4951171875, -1.450958251953125, -1.40679931640625, -1.362640380859375, -1.3184814453125, -1.274322509765625, -1.23016357421875, -1.186004638671875, -1.141845703125, -1.097686767578125, -1.05352783203125, -1.009368896484375, -0.9652099609375, -0.921051025390625, -0.87689208984375, -0.832733154296875, -0.78857421875, -0.744415283203125, -0.70025634765625, -0.656097412109375, -0.6119384765625, -0.567779541015625, -0.52362060546875, -0.479461669921875, -0.435302734375, -0.391143798828125, -0.34698486328125, -0.302825927734375, -0.2586669921875, -0.214508056640625, -0.17034912109375, -0.126190185546875, -0.08203125, -0.037872314453125, 0.00628662109375, 0.050445556640625, 0.0946044921875, 0.138763427734375, 0.18292236328125, 0.227081298828125, 0.271240234375, 0.315399169921875, 0.35955810546875, 0.403717041015625, 0.4478759765625, 0.492034912109375, 0.53619384765625, 0.580352783203125, 0.62451171875, 0.668670654296875, 0.71282958984375, 0.756988525390625, 0.8011474609375, 0.845306396484375, 0.88946533203125, 0.933624267578125, 0.977783203125, 1.021942138671875, 1.06610107421875, 1.110260009765625, 1.1544189453125, 1.198577880859375, 1.24273681640625, 1.286895751953125, 1.3310546875]}, "gradients/encoder.encoder.layers.1.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 1.0, 3.0, 1.0, 1.0, 5.0, 2.0, 4.0, 9.0, 12.0, 15.0, 15.0, 27.0, 37.0, 34.0, 47.0, 53.0, 72.0, 67.0, 63.0, 79.0, 77.0, 63.0, 55.0, 64.0, 38.0, 38.0, 34.0, 24.0, 14.0, 12.0, 16.0, 9.0, 6.0, 2.0, 3.0, 1.0, 1.0, 1.0, 0.0, 3.0, 1.0, 2.0, 0.0, 1.0, 2.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.275390625, -2.207366943359375, -2.13934326171875, -2.071319580078125, -2.0032958984375, -1.935272216796875, -1.86724853515625, -1.799224853515625, -1.731201171875, -1.663177490234375, -1.59515380859375, -1.527130126953125, -1.4591064453125, -1.391082763671875, -1.32305908203125, -1.255035400390625, -1.18701171875, -1.118988037109375, -1.05096435546875, -0.982940673828125, -0.9149169921875, -0.846893310546875, -0.77886962890625, -0.710845947265625, -0.642822265625, -0.574798583984375, -0.50677490234375, -0.438751220703125, -0.3707275390625, -0.302703857421875, -0.23468017578125, -0.166656494140625, -0.0986328125, -0.030609130859375, 0.03741455078125, 0.105438232421875, 0.1734619140625, 0.241485595703125, 0.30950927734375, 0.377532958984375, 0.445556640625, 0.513580322265625, 0.58160400390625, 0.649627685546875, 0.7176513671875, 0.785675048828125, 0.85369873046875, 0.921722412109375, 0.98974609375, 1.057769775390625, 1.12579345703125, 1.193817138671875, 1.2618408203125, 1.329864501953125, 1.39788818359375, 1.465911865234375, 1.533935546875, 1.601959228515625, 1.66998291015625, 1.738006591796875, 1.8060302734375, 1.874053955078125, 1.94207763671875, 2.010101318359375, 2.078125]}, "gradients/encoder.encoder.layers.1.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 1.0, 0.0, 1.0, 3.0, 2.0, 7.0, 3.0, 7.0, 4.0, 4.0, 4.0, 4.0, 17.0, 11.0, 32.0, 29.0, 59.0, 113.0, 158.0, 304.0, 624.0, 1540.0, 4452.0, 20656.0, 234450.0, 744901.0, 31751.0, 5940.0, 1902.0, 721.0, 337.0, 170.0, 126.0, 67.0, 35.0, 27.0, 26.0, 19.0, 9.0, 5.0, 10.0, 9.0, 5.0, 5.0, 1.0, 1.0, 2.0, 6.0, 2.0, 1.0, 2.0, 2.0, 3.0, 2.0], "bins": [-0.373291015625, -0.3626594543457031, -0.35202789306640625, -0.3413963317871094, -0.3307647705078125, -0.3201332092285156, -0.30950164794921875, -0.2988700866699219, -0.288238525390625, -0.2776069641113281, -0.26697540283203125, -0.2563438415527344, -0.2457122802734375, -0.23508071899414062, -0.22444915771484375, -0.21381759643554688, -0.20318603515625, -0.19255447387695312, -0.18192291259765625, -0.17129135131835938, -0.1606597900390625, -0.15002822875976562, -0.13939666748046875, -0.12876510620117188, -0.118133544921875, -0.10750198364257812, -0.09687042236328125, -0.08623886108398438, -0.0756072998046875, -0.06497573852539062, -0.05434417724609375, -0.043712615966796875, -0.0330810546875, -0.022449493408203125, -0.01181793212890625, -0.001186370849609375, 0.0094451904296875, 0.020076751708984375, 0.03070831298828125, 0.041339874267578125, 0.051971435546875, 0.06260299682617188, 0.07323455810546875, 0.08386611938476562, 0.0944976806640625, 0.10512924194335938, 0.11576080322265625, 0.12639236450195312, 0.13702392578125, 0.14765548706054688, 0.15828704833984375, 0.16891860961914062, 0.1795501708984375, 0.19018173217773438, 0.20081329345703125, 0.21144485473632812, 0.222076416015625, 0.23270797729492188, 0.24333953857421875, 0.2539710998535156, 0.2646026611328125, 0.2752342224121094, 0.28586578369140625, 0.2964973449707031, 0.30712890625]}, "gradients/encoder.encoder.layers.1.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 2.0, 1.0, 4.0, 8.0, 2.0, 2.0, 14.0, 12.0, 18.0, 30.0, 48.0, 61.0, 129.0, 185.0, 183.0, 102.0, 69.0, 50.0, 26.0, 17.0, 13.0, 7.0, 10.0, 11.0, 5.0, 0.0, 2.0, 1.0, 1.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-8.249282836914062e-05, -7.973518222570419e-05, -7.697753608226776e-05, -7.421988993883133e-05, -7.14622437953949e-05, -6.870459765195847e-05, -6.594695150852203e-05, -6.31893053650856e-05, -6.043165922164917e-05, -5.767401307821274e-05, -5.4916366934776306e-05, -5.2158720791339874e-05, -4.940107464790344e-05, -4.664342850446701e-05, -4.388578236103058e-05, -4.112813621759415e-05, -3.8370490074157715e-05, -3.561284393072128e-05, -3.285519778728485e-05, -3.009755164384842e-05, -2.7339905500411987e-05, -2.4582259356975555e-05, -2.1824613213539124e-05, -1.906696707010269e-05, -1.630932092666626e-05, -1.3551674783229828e-05, -1.0794028639793396e-05, -8.036382496356964e-06, -5.278736352920532e-06, -2.5210902094841003e-06, 2.3655593395233154e-07, 2.9942020773887634e-06, 5.751848220825195e-06, 8.509494364261627e-06, 1.1267140507698059e-05, 1.4024786651134491e-05, 1.6782432794570923e-05, 1.9540078938007355e-05, 2.2297725081443787e-05, 2.505537122488022e-05, 2.781301736831665e-05, 3.057066351175308e-05, 3.3328309655189514e-05, 3.6085955798625946e-05, 3.884360194206238e-05, 4.160124808549881e-05, 4.435889422893524e-05, 4.7116540372371674e-05, 4.9874186515808105e-05, 5.263183265924454e-05, 5.538947880268097e-05, 5.81471249461174e-05, 6.090477108955383e-05, 6.366241723299026e-05, 6.64200633764267e-05, 6.917770951986313e-05, 7.193535566329956e-05, 7.469300180673599e-05, 7.745064795017242e-05, 8.020829409360886e-05, 8.296594023704529e-05, 8.572358638048172e-05, 8.848123252391815e-05, 9.123887866735458e-05, 9.399652481079102e-05]}, "gradients/encoder.encoder.layers.1.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 4.0, 1.0, 5.0, 5.0, 7.0, 9.0, 12.0, 21.0, 27.0, 41.0, 80.0, 136.0, 294.0, 751.0, 2772.0, 17308.0, 780469.0, 232699.0, 10692.0, 2006.0, 634.0, 264.0, 115.0, 87.0, 36.0, 39.0, 18.0, 10.0, 6.0, 7.0, 4.0, 4.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.32177734375, -0.30831146240234375, -0.2948455810546875, -0.28137969970703125, -0.267913818359375, -0.25444793701171875, -0.2409820556640625, -0.22751617431640625, -0.21405029296875, -0.20058441162109375, -0.1871185302734375, -0.17365264892578125, -0.160186767578125, -0.14672088623046875, -0.1332550048828125, -0.11978912353515625, -0.1063232421875, -0.09285736083984375, -0.0793914794921875, -0.06592559814453125, -0.052459716796875, -0.03899383544921875, -0.0255279541015625, -0.01206207275390625, 0.00140380859375, 0.01486968994140625, 0.0283355712890625, 0.04180145263671875, 0.055267333984375, 0.06873321533203125, 0.0821990966796875, 0.09566497802734375, 0.109130859375, 0.12259674072265625, 0.1360626220703125, 0.14952850341796875, 0.162994384765625, 0.17646026611328125, 0.1899261474609375, 0.20339202880859375, 0.21685791015625, 0.23032379150390625, 0.2437896728515625, 0.25725555419921875, 0.270721435546875, 0.28418731689453125, 0.2976531982421875, 0.31111907958984375, 0.3245849609375, 0.33805084228515625, 0.3515167236328125, 0.36498260498046875, 0.378448486328125, 0.39191436767578125, 0.4053802490234375, 0.41884613037109375, 0.43231201171875, 0.44577789306640625, 0.4592437744140625, 0.47270965576171875, 0.486175537109375, 0.49964141845703125, 0.5131072998046875, 0.5265731811523438, 0.5400390625]}, "gradients/encoder.encoder.layers.1.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 3.0, 1.0, 6.0, 6.0, 12.0, 13.0, 17.0, 16.0, 35.0, 46.0, 33.0, 61.0, 86.0, 107.0, 153.0, 106.0, 92.0, 63.0, 41.0, 29.0, 19.0, 17.0, 11.0, 17.0, 4.0, 6.0, 3.0, 3.0, 3.0, 2.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.295166015625, -0.2839775085449219, -0.27278900146484375, -0.2616004943847656, -0.2504119873046875, -0.23922348022460938, -0.22803497314453125, -0.21684646606445312, -0.205657958984375, -0.19446945190429688, -0.18328094482421875, -0.17209243774414062, -0.1609039306640625, -0.14971542358398438, -0.13852691650390625, -0.12733840942382812, -0.11614990234375, -0.10496139526367188, -0.09377288818359375, -0.08258438110351562, -0.0713958740234375, -0.060207366943359375, -0.04901885986328125, -0.037830352783203125, -0.026641845703125, -0.015453338623046875, -0.00426483154296875, 0.006923675537109375, 0.0181121826171875, 0.029300689697265625, 0.04048919677734375, 0.051677703857421875, 0.0628662109375, 0.07405471801757812, 0.08524322509765625, 0.09643173217773438, 0.1076202392578125, 0.11880874633789062, 0.12999725341796875, 0.14118576049804688, 0.152374267578125, 0.16356277465820312, 0.17475128173828125, 0.18593978881835938, 0.1971282958984375, 0.20831680297851562, 0.21950531005859375, 0.23069381713867188, 0.24188232421875, 0.2530708312988281, 0.26425933837890625, 0.2754478454589844, 0.2866363525390625, 0.2978248596191406, 0.30901336669921875, 0.3202018737792969, 0.331390380859375, 0.3425788879394531, 0.35376739501953125, 0.3649559020996094, 0.3761444091796875, 0.3873329162597656, 0.39852142333984375, 0.4097099304199219, 0.4208984375]}, "gradients/encoder.encoder.layers.1.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 3.0, 2.0, 6.0, 8.0, 20.0, 68.0, 363.0, 449.0, 67.0, 14.0, 5.0, 6.0, 2.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-17.502649307250977, -16.93880271911621, -16.374954223632812, -15.811107635498047, -15.247261047363281, -14.6834135055542, -14.119565963745117, -13.555719375610352, -12.99187183380127, -12.428024291992188, -11.864177703857422, -11.30033016204834, -10.736482620239258, -10.172636032104492, -9.60878849029541, -9.044940948486328, -8.481094360351562, -7.917247295379639, -7.353400230407715, -6.789552688598633, -6.225705623626709, -5.661858558654785, -5.098011016845703, -4.534163951873779, -3.9703168869018555, -3.4064698219299316, -2.8426225185394287, -2.278775215148926, -1.714928150177002, -1.1510810852050781, -0.5872337818145752, -0.023386478424072266, 0.5404605865478516, 1.104307770729065, 1.6681549549102783, 2.2320022583007812, 2.795849323272705, 3.359696388244629, 3.923543691635132, 4.487390995025635, 5.051238059997559, 5.615085124969482, 6.178932189941406, 6.742779731750488, 7.306626796722412, 7.870473861694336, 8.434321403503418, 8.9981689453125, 9.562015533447266, 10.125863075256348, 10.689709663391113, 11.253557205200195, 11.817403793334961, 12.381251335144043, 12.945098876953125, 13.50894546508789, 14.072793006896973, 14.636640548706055, 15.20048713684082, 15.764334678649902, 16.328182220458984, 16.89202880859375, 17.455875396728516, 18.019723892211914, 18.58357048034668]}, "gradients/encoder.encoder.layers.1.layer_norm.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 2.0, 1.0, 3.0, 1.0, 4.0, 4.0, 4.0, 5.0, 5.0, 11.0, 8.0, 21.0, 12.0, 16.0, 21.0, 19.0, 29.0, 30.0, 27.0, 33.0, 34.0, 51.0, 111.0, 182.0, 74.0, 34.0, 36.0, 30.0, 35.0, 17.0, 18.0, 27.0, 21.0, 15.0, 8.0, 20.0, 13.0, 7.0, 5.0, 4.0, 4.0, 5.0, 2.0, 2.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.206874847412109, -6.0015082359313965, -5.796141624450684, -5.5907745361328125, -5.3854079246521, -5.180041313171387, -4.974674701690674, -4.769308090209961, -4.56394100189209, -4.358574390411377, -4.153207778930664, -3.947840929031372, -3.74247407913208, -3.537107467651367, -3.3317408561706543, -3.1263740062713623, -2.9210073947906494, -2.7156407833099365, -2.5102739334106445, -2.3049073219299316, -2.0995404720306396, -1.8941738605499268, -1.6888071298599243, -1.4834403991699219, -1.2780736684799194, -1.072706937789917, -0.8673402070999146, -0.6619735360145569, -0.45660680532455444, -0.2512401342391968, -0.045873403549194336, 0.1594933271408081, 0.36486005783081055, 0.570226788520813, 0.7755935192108154, 0.9809601902961731, 1.1863269805908203, 1.3916935920715332, 1.5970603227615356, 1.802427053451538, 2.00779390335083, 2.213160514831543, 2.418527364730835, 2.623893976211548, 2.82926082611084, 3.0346274375915527, 3.2399940490722656, 3.4453608989715576, 3.6507275104522705, 3.8560941219329834, 4.061460971832275, 4.266827583312988, 4.472194194793701, 4.677560806274414, 4.882927894592285, 5.088294506072998, 5.293661117553711, 5.499027729034424, 5.704394340515137, 5.909761428833008, 6.115128040313721, 6.320494651794434, 6.5258612632751465, 6.731227874755859, 6.9365949630737305]}, "gradients/encoder.encoder.layers.0.feed_forward.output_dense.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 3.0, 1.0, 6.0, 3.0, 12.0, 9.0, 14.0, 12.0, 29.0, 48.0, 77.0, 139.0, 274.0, 466.0, 921.0, 2229.0, 6480.0, 20959.0, 153581.0, 1384559.0, 2206382.0, 364843.0, 38547.0, 8830.0, 3125.0, 1254.0, 630.0, 340.0, 172.0, 116.0, 68.0, 53.0, 28.0, 20.0, 11.0, 12.0, 9.0, 5.0, 6.0, 7.0, 2.0, 4.0, 2.0, 1.0, 2.0, 0.0, 0.0, 2.0, 1.0, 2.0, 0.0, 1.0, 0.0, 2.0, 1.0], "bins": [-0.646484375, -0.62432861328125, -0.6021728515625, -0.58001708984375, -0.557861328125, -0.53570556640625, -0.5135498046875, -0.49139404296875, -0.46923828125, -0.44708251953125, -0.4249267578125, -0.40277099609375, -0.380615234375, -0.35845947265625, -0.3363037109375, -0.31414794921875, -0.2919921875, -0.26983642578125, -0.2476806640625, -0.22552490234375, -0.203369140625, -0.18121337890625, -0.1590576171875, -0.13690185546875, -0.11474609375, -0.09259033203125, -0.0704345703125, -0.04827880859375, -0.026123046875, -0.00396728515625, 0.0181884765625, 0.04034423828125, 0.0625, 0.08465576171875, 0.1068115234375, 0.12896728515625, 0.151123046875, 0.17327880859375, 0.1954345703125, 0.21759033203125, 0.23974609375, 0.26190185546875, 0.2840576171875, 0.30621337890625, 0.328369140625, 0.35052490234375, 0.3726806640625, 0.39483642578125, 0.4169921875, 0.43914794921875, 0.4613037109375, 0.48345947265625, 0.505615234375, 0.52777099609375, 0.5499267578125, 0.57208251953125, 0.59423828125, 0.61639404296875, 0.6385498046875, 0.66070556640625, 0.682861328125, 0.70501708984375, 0.7271728515625, 0.74932861328125, 0.771484375]}, "gradients/encoder.encoder.layers.0.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 3.0, 2.0, 6.0, 5.0, 11.0, 12.0, 18.0, 14.0, 31.0, 37.0, 58.0, 69.0, 84.0, 63.0, 84.0, 91.0, 94.0, 78.0, 57.0, 47.0, 43.0, 31.0, 28.0, 11.0, 16.0, 6.0, 7.0, 2.0, 1.0, 3.0, 0.0, 1.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.748046875, -0.7253799438476562, -0.7027130126953125, -0.6800460815429688, -0.657379150390625, -0.6347122192382812, -0.6120452880859375, -0.5893783569335938, -0.56671142578125, -0.5440444946289062, -0.5213775634765625, -0.49871063232421875, -0.476043701171875, -0.45337677001953125, -0.4307098388671875, -0.40804290771484375, -0.3853759765625, -0.36270904541015625, -0.3400421142578125, -0.31737518310546875, -0.294708251953125, -0.27204132080078125, -0.2493743896484375, -0.22670745849609375, -0.20404052734375, -0.18137359619140625, -0.1587066650390625, -0.13603973388671875, -0.113372802734375, -0.09070587158203125, -0.0680389404296875, -0.04537200927734375, -0.022705078125, -3.814697265625e-05, 0.0226287841796875, 0.04529571533203125, 0.067962646484375, 0.09062957763671875, 0.1132965087890625, 0.13596343994140625, 0.15863037109375, 0.18129730224609375, 0.2039642333984375, 0.22663116455078125, 0.249298095703125, 0.27196502685546875, 0.2946319580078125, 0.31729888916015625, 0.3399658203125, 0.36263275146484375, 0.3852996826171875, 0.40796661376953125, 0.430633544921875, 0.45330047607421875, 0.4759674072265625, 0.49863433837890625, 0.52130126953125, 0.5439682006835938, 0.5666351318359375, 0.5893020629882812, 0.611968994140625, 0.6346359252929688, 0.6573028564453125, 0.6799697875976562, 0.70263671875]}, "gradients/encoder.encoder.layers.0.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 2.0, 0.0, 1.0, 1.0, 4.0, 4.0, 10.0, 10.0, 17.0, 27.0, 60.0, 115.0, 309.0, 923.0, 4326.0, 3665484.0, 518611.0, 3225.0, 715.0, 240.0, 98.0, 47.0, 29.0, 16.0, 10.0, 4.0, 1.0, 2.0, 2.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.39453125, -4.25689697265625, -4.1192626953125, -3.98162841796875, -3.843994140625, -3.70635986328125, -3.5687255859375, -3.43109130859375, -3.29345703125, -3.15582275390625, -3.0181884765625, -2.88055419921875, -2.742919921875, -2.60528564453125, -2.4676513671875, -2.33001708984375, -2.1923828125, -2.05474853515625, -1.9171142578125, -1.77947998046875, -1.641845703125, -1.50421142578125, -1.3665771484375, -1.22894287109375, -1.09130859375, -0.95367431640625, -0.8160400390625, -0.67840576171875, -0.540771484375, -0.40313720703125, -0.2655029296875, -0.12786865234375, 0.009765625, 0.14739990234375, 0.2850341796875, 0.42266845703125, 0.560302734375, 0.69793701171875, 0.8355712890625, 0.97320556640625, 1.11083984375, 1.24847412109375, 1.3861083984375, 1.52374267578125, 1.661376953125, 1.79901123046875, 1.9366455078125, 2.07427978515625, 2.2119140625, 2.34954833984375, 2.4871826171875, 2.62481689453125, 2.762451171875, 2.90008544921875, 3.0377197265625, 3.17535400390625, 3.31298828125, 3.45062255859375, 3.5882568359375, 3.72589111328125, 3.863525390625, 4.00115966796875, 4.1387939453125, 4.27642822265625, 4.4140625]}, "gradients/encoder.encoder.layers.0.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 4.0, 6.0, 5.0, 12.0, 10.0, 31.0, 55.0, 108.0, 234.0, 573.0, 1168.0, 1000.0, 459.0, 212.0, 90.0, 43.0, 20.0, 21.0, 10.0, 9.0, 8.0, 4.0, 2.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.361328125, -3.282989501953125, -3.20465087890625, -3.126312255859375, -3.0479736328125, -2.969635009765625, -2.89129638671875, -2.812957763671875, -2.734619140625, -2.656280517578125, -2.57794189453125, -2.499603271484375, -2.4212646484375, -2.342926025390625, -2.26458740234375, -2.186248779296875, -2.10791015625, -2.029571533203125, -1.95123291015625, -1.872894287109375, -1.7945556640625, -1.716217041015625, -1.63787841796875, -1.559539794921875, -1.481201171875, -1.402862548828125, -1.32452392578125, -1.246185302734375, -1.1678466796875, -1.089508056640625, -1.01116943359375, -0.932830810546875, -0.8544921875, -0.776153564453125, -0.69781494140625, -0.619476318359375, -0.5411376953125, -0.462799072265625, -0.38446044921875, -0.306121826171875, -0.227783203125, -0.149444580078125, -0.07110595703125, 0.007232666015625, 0.0855712890625, 0.163909912109375, 0.24224853515625, 0.320587158203125, 0.39892578125, 0.477264404296875, 0.55560302734375, 0.633941650390625, 0.7122802734375, 0.790618896484375, 0.86895751953125, 0.947296142578125, 1.025634765625, 1.103973388671875, 1.18231201171875, 1.260650634765625, 1.3389892578125, 1.417327880859375, 1.49566650390625, 1.574005126953125, 1.65234375]}, "gradients/encoder.encoder.layers.0.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 1.0, 0.0, 3.0, 1.0, 3.0, 0.0, 4.0, 0.0, 6.0, 10.0, 17.0, 19.0, 38.0, 71.0, 152.0, 225.0, 225.0, 123.0, 49.0, 27.0, 11.0, 4.0, 7.0, 4.0, 5.0, 2.0, 0.0, 0.0, 1.0, 3.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-16.177806854248047, -15.654191970825195, -15.130577087402344, -14.606961250305176, -14.083346366882324, -13.559731483459473, -13.036115646362305, -12.512500762939453, -11.988885879516602, -11.46527099609375, -10.941656112670898, -10.41804027557373, -9.894425392150879, -9.370810508728027, -8.84719467163086, -8.323579788208008, -7.799964904785156, -7.276350021362305, -6.752734661102295, -6.229119300842285, -5.705504417419434, -5.181889533996582, -4.658274173736572, -4.1346588134765625, -3.611043930053711, -3.0874288082122803, -2.5638136863708496, -2.040198564529419, -1.5165834426879883, -0.9929683208465576, -0.46935319900512695, 0.05426192283630371, 0.5778770446777344, 1.101492166519165, 1.6251072883605957, 2.1487224102020264, 2.672337532043457, 3.1959526538848877, 3.7195677757263184, 4.243183135986328, 4.76679801940918, 5.290412902832031, 5.814028263092041, 6.337643623352051, 6.861258506774902, 7.384873390197754, 7.908488750457764, 8.432104110717773, 8.955718994140625, 9.479333877563477, 10.002948760986328, 10.526564598083496, 11.050179481506348, 11.5737943649292, 12.097410202026367, 12.621025085449219, 13.14463996887207, 13.668254852294922, 14.191869735717773, 14.715485572814941, 15.239100456237793, 15.762715339660645, 16.286331176757812, 16.809946060180664, 17.333560943603516]}, "gradients/encoder.encoder.layers.0.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0, 0.0, 1.0, 2.0, 2.0, 9.0, 5.0, 8.0, 20.0, 28.0, 32.0, 35.0, 47.0, 53.0, 52.0, 67.0, 90.0, 86.0, 73.0, 80.0, 69.0, 48.0, 48.0, 31.0, 22.0, 24.0, 14.0, 19.0, 15.0, 4.0, 4.0, 6.0, 4.0, 4.0, 1.0, 3.0, 1.0, 1.0, 4.0, 0.0, 2.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-9.463289260864258, -9.098551750183105, -8.73381519317627, -8.369077682495117, -8.004341125488281, -7.639603614807129, -7.274866580963135, -6.910129547119141, -6.5453925132751465, -6.180655479431152, -5.815918445587158, -5.451181411743164, -5.086443901062012, -4.721707344055176, -4.356969833374023, -3.9922327995300293, -3.627495765686035, -3.262758731842041, -2.898021697998047, -2.5332844257354736, -2.1685473918914795, -1.8038103580474854, -1.439073085784912, -1.074336051940918, -0.7095990180969238, -0.3448619246482849, 0.019875168800354004, 0.3846123218536377, 0.7493493556976318, 1.114086389541626, 1.4788236618041992, 1.8435606956481934, 2.208296775817871, 2.5730338096618652, 2.9377708435058594, 3.3025081157684326, 3.6672451496124268, 4.031982421875, 4.396719455718994, 4.761456489562988, 5.126193523406982, 5.490930557250977, 5.855667591094971, 6.220404624938965, 6.585142135620117, 6.949878692626953, 7.3146162033081055, 7.6793532371521, 8.044090270996094, 8.408827781677246, 8.773564338684082, 9.138301849365234, 9.50303840637207, 9.867775917053223, 10.232513427734375, 10.597249984741211, 10.961986541748047, 11.3267240524292, 11.691460609436035, 12.056198120117188, 12.420934677124023, 12.785672187805176, 13.150409698486328, 13.515146255493164, 13.879883766174316]}, "gradients/encoder.encoder.layers.0.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 2.0, 5.0, 9.0, 3.0, 10.0, 9.0, 23.0, 22.0, 18.0, 49.0, 78.0, 106.0, 217.0, 404.0, 816.0, 1871.0, 5164.0, 18007.0, 95168.0, 727433.0, 161653.0, 26094.0, 6909.0, 2380.0, 977.0, 467.0, 243.0, 133.0, 99.0, 59.0, 45.0, 31.0, 24.0, 20.0, 8.0, 2.0, 2.0, 2.0, 4.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.9111328125, -0.88232421875, -0.853515625, -0.82470703125, -0.7958984375, -0.76708984375, -0.73828125, -0.70947265625, -0.6806640625, -0.65185546875, -0.623046875, -0.59423828125, -0.5654296875, -0.53662109375, -0.5078125, -0.47900390625, -0.4501953125, -0.42138671875, -0.392578125, -0.36376953125, -0.3349609375, -0.30615234375, -0.27734375, -0.24853515625, -0.2197265625, -0.19091796875, -0.162109375, -0.13330078125, -0.1044921875, -0.07568359375, -0.046875, -0.01806640625, 0.0107421875, 0.03955078125, 0.068359375, 0.09716796875, 0.1259765625, 0.15478515625, 0.18359375, 0.21240234375, 0.2412109375, 0.27001953125, 0.298828125, 0.32763671875, 0.3564453125, 0.38525390625, 0.4140625, 0.44287109375, 0.4716796875, 0.50048828125, 0.529296875, 0.55810546875, 0.5869140625, 0.61572265625, 0.64453125, 0.67333984375, 0.7021484375, 0.73095703125, 0.759765625, 0.78857421875, 0.8173828125, 0.84619140625, 0.875, 0.90380859375, 0.9326171875]}, "gradients/encoder.encoder.layers.0.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 5.0, 0.0, 3.0, 3.0, 6.0, 7.0, 13.0, 27.0, 38.0, 47.0, 67.0, 84.0, 99.0, 100.0, 98.0, 97.0, 87.0, 78.0, 46.0, 37.0, 17.0, 17.0, 15.0, 5.0, 6.0, 3.0, 2.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.021484375, -0.9906158447265625, -0.959747314453125, -0.9288787841796875, -0.89801025390625, -0.8671417236328125, -0.836273193359375, -0.8054046630859375, -0.7745361328125, -0.7436676025390625, -0.712799072265625, -0.6819305419921875, -0.65106201171875, -0.6201934814453125, -0.589324951171875, -0.5584564208984375, -0.527587890625, -0.4967193603515625, -0.465850830078125, -0.4349822998046875, -0.40411376953125, -0.3732452392578125, -0.342376708984375, -0.3115081787109375, -0.2806396484375, -0.2497711181640625, -0.218902587890625, -0.1880340576171875, -0.15716552734375, -0.1262969970703125, -0.095428466796875, -0.0645599365234375, -0.03369140625, -0.0028228759765625, 0.028045654296875, 0.0589141845703125, 0.08978271484375, 0.1206512451171875, 0.151519775390625, 0.1823883056640625, 0.2132568359375, 0.2441253662109375, 0.274993896484375, 0.3058624267578125, 0.33673095703125, 0.3675994873046875, 0.398468017578125, 0.4293365478515625, 0.460205078125, 0.4910736083984375, 0.521942138671875, 0.5528106689453125, 0.58367919921875, 0.6145477294921875, 0.645416259765625, 0.6762847900390625, 0.7071533203125, 0.7380218505859375, 0.768890380859375, 0.7997589111328125, 0.83062744140625, 0.8614959716796875, 0.892364501953125, 0.9232330322265625, 0.9541015625]}, "gradients/encoder.encoder.layers.0.attention.v_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0, 3.0, 3.0, 3.0, 5.0, 5.0, 8.0, 10.0, 14.0, 21.0, 33.0, 37.0, 63.0, 113.0, 158.0, 314.0, 549.0, 1394.0, 6492.0, 85829.0, 922179.0, 25746.0, 3343.0, 1031.0, 455.0, 259.0, 157.0, 95.0, 54.0, 58.0, 37.0, 25.0, 13.0, 13.0, 9.0, 7.0, 6.0, 5.0, 9.0, 5.0, 4.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.02734375, -0.98828125, -0.94921875, -0.91015625, -0.87109375, -0.83203125, -0.79296875, -0.75390625, -0.71484375, -0.67578125, -0.63671875, -0.59765625, -0.55859375, -0.51953125, -0.48046875, -0.44140625, -0.40234375, -0.36328125, -0.32421875, -0.28515625, -0.24609375, -0.20703125, -0.16796875, -0.12890625, -0.08984375, -0.05078125, -0.01171875, 0.02734375, 0.06640625, 0.10546875, 0.14453125, 0.18359375, 0.22265625, 0.26171875, 0.30078125, 0.33984375, 0.37890625, 0.41796875, 0.45703125, 0.49609375, 0.53515625, 0.57421875, 0.61328125, 0.65234375, 0.69140625, 0.73046875, 0.76953125, 0.80859375, 0.84765625, 0.88671875, 0.92578125, 0.96484375, 1.00390625, 1.04296875, 1.08203125, 1.12109375, 1.16015625, 1.19921875, 1.23828125, 1.27734375, 1.31640625, 1.35546875, 1.39453125, 1.43359375, 1.47265625]}, "gradients/encoder.encoder.layers.0.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 2.0, 4.0, 2.0, 1.0, 5.0, 8.0, 4.0, 7.0, 7.0, 7.0, 21.0, 18.0, 23.0, 13.0, 32.0, 34.0, 44.0, 51.0, 66.0, 68.0, 64.0, 83.0, 72.0, 55.0, 47.0, 35.0, 41.0, 31.0, 23.0, 31.0, 22.0, 19.0, 13.0, 11.0, 11.0, 6.0, 5.0, 4.0, 5.0, 6.0, 2.0, 3.0, 4.0, 3.0, 3.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-1.8544921875, -1.7902984619140625, -1.726104736328125, -1.6619110107421875, -1.59771728515625, -1.5335235595703125, -1.469329833984375, -1.4051361083984375, -1.3409423828125, -1.2767486572265625, -1.212554931640625, -1.1483612060546875, -1.08416748046875, -1.0199737548828125, -0.955780029296875, -0.8915863037109375, -0.827392578125, -0.7631988525390625, -0.699005126953125, -0.6348114013671875, -0.57061767578125, -0.5064239501953125, -0.442230224609375, -0.3780364990234375, -0.3138427734375, -0.2496490478515625, -0.185455322265625, -0.1212615966796875, -0.05706787109375, 0.0071258544921875, 0.071319580078125, 0.1355133056640625, 0.19970703125, 0.2639007568359375, 0.328094482421875, 0.3922882080078125, 0.45648193359375, 0.5206756591796875, 0.584869384765625, 0.6490631103515625, 0.7132568359375, 0.7774505615234375, 0.841644287109375, 0.9058380126953125, 0.97003173828125, 1.0342254638671875, 1.098419189453125, 1.1626129150390625, 1.226806640625, 1.2910003662109375, 1.355194091796875, 1.4193878173828125, 1.48358154296875, 1.5477752685546875, 1.611968994140625, 1.6761627197265625, 1.7403564453125, 1.8045501708984375, 1.868743896484375, 1.9329376220703125, 1.99713134765625, 2.0613250732421875, 2.125518798828125, 2.1897125244140625, 2.25390625]}, "gradients/encoder.encoder.layers.0.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 5.0, 3.0, 3.0, 2.0, 3.0, 7.0, 4.0, 8.0, 11.0, 15.0, 17.0, 26.0, 48.0, 61.0, 87.0, 150.0, 289.0, 636.0, 1798.0, 7549.0, 87477.0, 922487.0, 22280.0, 3536.0, 1056.0, 412.0, 221.0, 132.0, 71.0, 45.0, 30.0, 16.0, 15.0, 11.0, 10.0, 12.0, 11.0, 6.0, 5.0, 3.0, 0.0, 2.0, 1.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.2451171875, -0.2379627227783203, -0.23080825805664062, -0.22365379333496094, -0.21649932861328125, -0.20934486389160156, -0.20219039916992188, -0.1950359344482422, -0.1878814697265625, -0.1807270050048828, -0.17357254028320312, -0.16641807556152344, -0.15926361083984375, -0.15210914611816406, -0.14495468139648438, -0.1378002166748047, -0.130645751953125, -0.12349128723144531, -0.11633682250976562, -0.10918235778808594, -0.10202789306640625, -0.09487342834472656, -0.08771896362304688, -0.08056449890136719, -0.0734100341796875, -0.06625556945800781, -0.059101104736328125, -0.05194664001464844, -0.04479217529296875, -0.03763771057128906, -0.030483245849609375, -0.023328781127929688, -0.01617431640625, -0.009019851684570312, -0.001865386962890625, 0.0052890777587890625, 0.01244354248046875, 0.019598007202148438, 0.026752471923828125, 0.03390693664550781, 0.0410614013671875, 0.04821586608886719, 0.055370330810546875, 0.06252479553222656, 0.06967926025390625, 0.07683372497558594, 0.08398818969726562, 0.09114265441894531, 0.098297119140625, 0.10545158386230469, 0.11260604858398438, 0.11976051330566406, 0.12691497802734375, 0.13406944274902344, 0.14122390747070312, 0.1483783721923828, 0.1555328369140625, 0.1626873016357422, 0.16984176635742188, 0.17699623107910156, 0.18415069580078125, 0.19130516052246094, 0.19845962524414062, 0.2056140899658203, 0.2127685546875]}, "gradients/encoder.encoder.layers.0.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0, 1.0, 2.0, 4.0, 1.0, 14.0, 5.0, 5.0, 13.0, 16.0, 25.0, 31.0, 37.0, 52.0, 84.0, 102.0, 109.0, 124.0, 102.0, 69.0, 57.0, 44.0, 24.0, 17.0, 14.0, 10.0, 18.0, 5.0, 10.0, 4.0, 2.0, 2.0, 2.0, 0.0, 2.0, 1.0, 2.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.737211227416992e-05, -3.620237112045288e-05, -3.503262996673584e-05, -3.38628888130188e-05, -3.269314765930176e-05, -3.152340650558472e-05, -3.0353665351867676e-05, -2.9183924198150635e-05, -2.8014183044433594e-05, -2.6844441890716553e-05, -2.5674700736999512e-05, -2.450495958328247e-05, -2.333521842956543e-05, -2.216547727584839e-05, -2.0995736122131348e-05, -1.9825994968414307e-05, -1.8656253814697266e-05, -1.7486512660980225e-05, -1.6316771507263184e-05, -1.5147030353546143e-05, -1.3977289199829102e-05, -1.280754804611206e-05, -1.163780689239502e-05, -1.0468065738677979e-05, -9.298324584960938e-06, -8.128583431243896e-06, -6.9588422775268555e-06, -5.7891011238098145e-06, -4.6193599700927734e-06, -3.4496188163757324e-06, -2.2798776626586914e-06, -1.1101365089416504e-06, 5.960464477539063e-08, 1.2293457984924316e-06, 2.3990869522094727e-06, 3.5688281059265137e-06, 4.738569259643555e-06, 5.908310413360596e-06, 7.078051567077637e-06, 8.247792720794678e-06, 9.417533874511719e-06, 1.058727502822876e-05, 1.17570161819458e-05, 1.2926757335662842e-05, 1.4096498489379883e-05, 1.5266239643096924e-05, 1.6435980796813965e-05, 1.7605721950531006e-05, 1.8775463104248047e-05, 1.9945204257965088e-05, 2.111494541168213e-05, 2.228468656539917e-05, 2.345442771911621e-05, 2.4624168872833252e-05, 2.5793910026550293e-05, 2.6963651180267334e-05, 2.8133392333984375e-05, 2.9303133487701416e-05, 3.0472874641418457e-05, 3.16426157951355e-05, 3.281235694885254e-05, 3.398209810256958e-05, 3.515183925628662e-05, 3.632158041000366e-05, 3.74913215637207e-05]}, "gradients/encoder.encoder.layers.0.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 3.0, 1.0, 3.0, 1.0, 1.0, 5.0, 6.0, 9.0, 6.0, 6.0, 11.0, 27.0, 31.0, 43.0, 104.0, 188.0, 341.0, 747.0, 1772.0, 4962.0, 19621.0, 175400.0, 802643.0, 31693.0, 7015.0, 2152.0, 820.0, 401.0, 229.0, 114.0, 75.0, 40.0, 24.0, 17.0, 13.0, 6.0, 5.0, 8.0, 2.0, 3.0, 7.0, 5.0, 4.0, 0.0, 1.0, 0.0, 1.0, 1.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.12384033203125, -0.11943912506103516, -0.11503791809082031, -0.11063671112060547, -0.10623550415039062, -0.10183429718017578, -0.09743309020996094, -0.0930318832397461, -0.08863067626953125, -0.0842294692993164, -0.07982826232910156, -0.07542705535888672, -0.07102584838867188, -0.06662464141845703, -0.06222343444824219, -0.057822227478027344, -0.0534210205078125, -0.049019813537597656, -0.04461860656738281, -0.04021739959716797, -0.035816192626953125, -0.03141498565673828, -0.027013778686523438, -0.022612571716308594, -0.01821136474609375, -0.013810157775878906, -0.009408950805664062, -0.005007743835449219, -0.000606536865234375, 0.0037946701049804688, 0.008195877075195312, 0.012597084045410156, 0.016998291015625, 0.021399497985839844, 0.025800704956054688, 0.03020191192626953, 0.034603118896484375, 0.03900432586669922, 0.04340553283691406, 0.047806739807128906, 0.05220794677734375, 0.056609153747558594, 0.06101036071777344, 0.06541156768798828, 0.06981277465820312, 0.07421398162841797, 0.07861518859863281, 0.08301639556884766, 0.0874176025390625, 0.09181880950927734, 0.09622001647949219, 0.10062122344970703, 0.10502243041992188, 0.10942363739013672, 0.11382484436035156, 0.1182260513305664, 0.12262725830078125, 0.1270284652709961, 0.13142967224121094, 0.13583087921142578, 0.14023208618164062, 0.14463329315185547, 0.1490345001220703, 0.15343570709228516, 0.1578369140625]}, "gradients/encoder.encoder.layers.0.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 3.0, 2.0, 3.0, 3.0, 7.0, 5.0, 8.0, 6.0, 9.0, 8.0, 9.0, 17.0, 29.0, 38.0, 45.0, 81.0, 92.0, 122.0, 135.0, 99.0, 77.0, 62.0, 45.0, 26.0, 16.0, 11.0, 9.0, 8.0, 7.0, 2.0, 4.0, 2.0, 3.0, 2.0, 3.0, 1.0, 4.0, 2.0, 4.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.125, -0.119842529296875, -0.11468505859375, -0.109527587890625, -0.1043701171875, -0.099212646484375, -0.09405517578125, -0.088897705078125, -0.083740234375, -0.078582763671875, -0.07342529296875, -0.068267822265625, -0.0631103515625, -0.057952880859375, -0.05279541015625, -0.047637939453125, -0.04248046875, -0.037322998046875, -0.03216552734375, -0.027008056640625, -0.0218505859375, -0.016693115234375, -0.01153564453125, -0.006378173828125, -0.001220703125, 0.003936767578125, 0.00909423828125, 0.014251708984375, 0.0194091796875, 0.024566650390625, 0.02972412109375, 0.034881591796875, 0.0400390625, 0.045196533203125, 0.05035400390625, 0.055511474609375, 0.0606689453125, 0.065826416015625, 0.07098388671875, 0.076141357421875, 0.081298828125, 0.086456298828125, 0.09161376953125, 0.096771240234375, 0.1019287109375, 0.107086181640625, 0.11224365234375, 0.117401123046875, 0.12255859375, 0.127716064453125, 0.13287353515625, 0.138031005859375, 0.1431884765625, 0.148345947265625, 0.15350341796875, 0.158660888671875, 0.163818359375, 0.168975830078125, 0.17413330078125, 0.179290771484375, 0.1844482421875, 0.189605712890625, 0.19476318359375, 0.199920654296875, 0.205078125]}, "gradients/encoder.encoder.layers.0.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0, 2.0, 4.0, 18.0, 43.0, 121.0, 626.0, 110.0, 49.0, 23.0, 5.0, 2.0, 3.0, 4.0, 2.0, 1.0, 0.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-9.510433197021484, -9.08431625366211, -8.658199310302734, -8.23208236694336, -7.805965423583984, -7.379848480224609, -6.953731060028076, -6.527614116668701, -6.101497173309326, -5.675380229949951, -5.249263286590576, -4.823145866394043, -4.397028923034668, -3.970912218093872, -3.544795036315918, -3.118678092956543, -2.692561149597168, -2.266444206237793, -1.8403271436691284, -1.4142100811004639, -0.9880931377410889, -0.5619761943817139, -0.13585901260375977, 0.29025793075561523, 0.7163748741149902, 1.1424918174743652, 1.5686088800430298, 1.9947259426116943, 2.4208428859710693, 2.8469598293304443, 3.2730770111083984, 3.6991939544677734, 4.125310897827148, 4.551427841186523, 4.977544784545898, 5.403661727905273, 5.829778671264648, 6.255895614624023, 6.682013034820557, 7.108129978179932, 7.534246921539307, 7.960363864898682, 8.386481285095215, 8.81259822845459, 9.238715171813965, 9.66483211517334, 10.090949058532715, 10.51706600189209, 10.943182945251465, 11.36929988861084, 11.795416831970215, 12.22153377532959, 12.647650718688965, 13.07376766204834, 13.499885559082031, 13.926002502441406, 14.352119445800781, 14.778236389160156, 15.204353332519531, 15.630470275878906, 16.05658721923828, 16.482704162597656, 16.90882110595703, 17.334938049316406, 17.76105499267578]}, "gradients/encoder.encoder.layers.0.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 3.0, 5.0, 6.0, 14.0, 10.0, 11.0, 13.0, 19.0, 19.0, 32.0, 25.0, 29.0, 41.0, 61.0, 422.0, 76.0, 35.0, 38.0, 23.0, 28.0, 17.0, 18.0, 16.0, 10.0, 9.0, 5.0, 8.0, 12.0, 2.0, 2.0, 3.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-10.549214363098145, -10.295380592346191, -10.041546821594238, -9.787712097167969, -9.533878326416016, -9.280044555664062, -9.02621078491211, -8.772377014160156, -8.518543243408203, -8.26470947265625, -8.010875701904297, -7.7570414543151855, -7.503207683563232, -7.249373435974121, -6.995539665222168, -6.741705894470215, -6.4878716468811035, -6.23403787612915, -5.980203628540039, -5.726369857788086, -5.472536087036133, -5.21870231628418, -4.964868068695068, -4.711034297943115, -4.457200050354004, -4.203366279602051, -3.9495322704315186, -3.6956982612609863, -3.441864490509033, -3.188030481338501, -2.9341964721679688, -2.6803627014160156, -2.4265289306640625, -2.1726949214935303, -1.9188611507415771, -1.665027141571045, -1.4111932516098022, -1.1573593616485596, -0.9035253524780273, -0.6496914625167847, -0.395857572555542, -0.14202365279197693, 0.11181026697158813, 0.3656442165374756, 0.6194781064987183, 0.8733119964599609, 1.1271460056304932, 1.3809798955917358, 1.6348137855529785, 1.8886476755142212, 2.142481565475464, 2.396315574645996, 2.650149345397949, 2.9039833545684814, 3.1578173637390137, 3.411651134490967, 3.665485143661499, 3.9193191528320312, 4.173152923583984, 4.4269866943359375, 4.680820941925049, 4.934654712677002, 5.188488960266113, 5.442322731018066, 5.6961565017700195]}, "gradients/encoder.encoder.pos_conv_embed.conv.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 2.0, 1.0, 5.0, 3.0, 6.0, 9.0, 6.0, 16.0, 11.0, 16.0, 15.0, 23.0, 22.0, 33.0, 47.0, 36.0, 113.0, 280.0, 92.0, 43.0, 39.0, 23.0, 25.0, 32.0, 23.0, 15.0, 14.0, 8.0, 8.0, 9.0, 12.0, 7.0, 5.0, 3.0, 5.0, 2.0, 2.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.580078125, -0.5631637573242188, -0.5462493896484375, -0.5293350219726562, -0.512420654296875, -0.49550628662109375, -0.4785919189453125, -0.46167755126953125, -0.44476318359375, -0.42784881591796875, -0.4109344482421875, -0.39402008056640625, -0.377105712890625, -0.36019134521484375, -0.3432769775390625, -0.32636260986328125, -0.3094482421875, -0.29253387451171875, -0.2756195068359375, -0.25870513916015625, -0.241790771484375, -0.22487640380859375, -0.2079620361328125, -0.19104766845703125, -0.17413330078125, -0.15721893310546875, -0.1403045654296875, -0.12339019775390625, -0.106475830078125, -0.08956146240234375, -0.0726470947265625, -0.05573272705078125, -0.038818359375, -0.02190399169921875, -0.0049896240234375, 0.01192474365234375, 0.028839111328125, 0.04575347900390625, 0.0626678466796875, 0.07958221435546875, 0.09649658203125, 0.11341094970703125, 0.1303253173828125, 0.14723968505859375, 0.164154052734375, 0.18106842041015625, 0.1979827880859375, 0.21489715576171875, 0.2318115234375, 0.24872589111328125, 0.2656402587890625, 0.28255462646484375, 0.299468994140625, 0.31638336181640625, 0.3332977294921875, 0.35021209716796875, 0.36712646484375, 0.38404083251953125, 0.4009552001953125, 0.41786956787109375, 0.434783935546875, 0.45169830322265625, 0.4686126708984375, 0.48552703857421875, 0.50244140625]}, "gradients/encoder.encoder.pos_conv_embed.conv.weight_v": {"_type": "histogram", "values": [1.0, 3.0, 4.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0, 1.0, 1.0, 3.0, 1.0, 8.0, 10.0, 12.0, 18.0, 21.0, 17.0, 38.0, 59.0, 108.0, 247.0, 567.0, 1998.0, 18255.0, 8357651.0, 7508.0, 1256.0, 401.0, 158.0, 89.0, 64.0, 27.0, 15.0, 5.0, 9.0, 15.0, 3.0, 0.0, 8.0, 1.0, 3.0, 7.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 3.0], "bins": [-5.078289985656738, -4.905268669128418, -4.732247829437256, -4.5592265129089355, -4.386205196380615, -4.213184356689453, -4.040163040161133, -3.8671417236328125, -3.6941206455230713, -3.52109956741333, -3.3480782508850098, -3.1750571727752686, -3.0020360946655273, -2.829014778137207, -2.655993700027466, -2.4829726219177246, -2.3099513053894043, -2.136930227279663, -1.9639089107513428, -1.7908878326416016, -1.6178666353225708, -1.44484543800354, -1.2718243598937988, -1.098803162574768, -0.9257819652557373, -0.7527607679367065, -0.5797396302223206, -0.40671849250793457, -0.2336972951889038, -0.06067609786987305, 0.11234498023986816, 0.2853661775588989, 0.4583878517150879, 0.6314090490341187, 0.8044301867485046, 0.9774513244628906, 1.1504725217819214, 1.3234937191009521, 1.4965147972106934, 1.6695359945297241, 1.8425571918487549, 2.015578269958496, 2.1885995864868164, 2.3616206645965576, 2.534641742706299, 2.707663059234619, 2.8806841373443604, 3.0537052154541016, 3.226726531982422, 3.399747610092163, 3.5727689266204834, 3.7457900047302246, 3.918811321258545, 4.091832160949707, 4.264853477478027, 4.437874794006348, 4.610896110534668, 4.783917427062988, 4.95693826675415, 5.129959583282471, 5.302980899810791, 5.476001739501953, 5.649023056030273, 5.822044372558594, 5.995065212249756]}, "gradients/encoder.encoder.pos_conv_embed.conv.weight_g": {"_type": "histogram", "values": [1.0, 0.0, 4.0, 0.0, 2.0, 0.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 1.0, 1.0, 4.0, 4.0, 2.0, 2.0, 3.0, 4.0, 2.0, 2.0, 2.0, 4.0, 4.0, 7.0, 6.0, 4.0, 0.0, 3.0, 4.0, 5.0, 4.0, 4.0, 4.0, 4.0, 3.0, 6.0, 3.0, 3.0, 1.0, 2.0, 1.0, 0.0, 3.0, 1.0, 3.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-7.025946140289307, -6.829422950744629, -6.632899761199951, -6.436376571655273, -6.239853858947754, -6.043330669403076, -5.846807479858398, -5.650284290313721, -5.453761100769043, -5.257237911224365, -5.0607147216796875, -4.864192008972168, -4.66766881942749, -4.4711456298828125, -4.274622440338135, -4.078099250793457, -3.8815762996673584, -3.6850531101226807, -3.488530158996582, -3.2920069694519043, -3.0954837799072266, -2.898960590362549, -2.70243763923645, -2.5059144496917725, -2.309391498565674, -2.112868309020996, -1.916345238685608, -1.7198221683502197, -1.523298978805542, -1.3267759084701538, -1.1302528381347656, -0.9337296485900879, -0.7372064590454102, -0.5406833291053772, -0.3441602289676666, -0.14763712882995605, 0.048886001110076904, 0.24540913105010986, 0.44193220138549805, 0.6384553909301758, 0.834978461265564, 1.0315015316009521, 1.2280247211456299, 1.424547791481018, 1.6210708618164062, 1.817594051361084, 2.0141172409057617, 2.2106404304504395, 2.407163381576538, 2.603686571121216, 2.8002095222473145, 2.996732711791992, 3.19325590133667, 3.3897790908813477, 3.5863020420074463, 3.782825231552124, 3.9793481826782227, 4.1758713722229, 4.372394561767578, 4.568917274475098, 4.765440464019775, 4.961963653564453, 5.158486843109131, 5.355010032653809, 5.551533222198486]}, "gradients/encoder.feature_projection.projection.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0, 0.0, 1.0, 4.0, 1.0, 2.0, 1.0, 4.0, 4.0, 8.0, 8.0, 3.0, 10.0, 19.0, 19.0, 29.0, 39.0, 53.0, 81.0, 119.0, 215.0, 350.0, 618.0, 1250.0, 2799.0, 7178.0, 19828.0, 59519.0, 170968.0, 170874.0, 58794.0, 19134.0, 6916.0, 2712.0, 1214.0, 597.0, 336.0, 196.0, 118.0, 88.0, 41.0, 40.0, 26.0, 24.0, 17.0, 4.0, 8.0, 7.0, 3.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0], "bins": [-3.150390625, -3.062896728515625, -2.97540283203125, -2.887908935546875, -2.8004150390625, -2.712921142578125, -2.62542724609375, -2.537933349609375, -2.450439453125, -2.362945556640625, -2.27545166015625, -2.187957763671875, -2.1004638671875, -2.012969970703125, -1.92547607421875, -1.837982177734375, -1.75048828125, -1.662994384765625, -1.57550048828125, -1.488006591796875, -1.4005126953125, -1.313018798828125, -1.22552490234375, -1.138031005859375, -1.050537109375, -0.963043212890625, -0.87554931640625, -0.788055419921875, -0.7005615234375, -0.613067626953125, -0.52557373046875, -0.438079833984375, -0.3505859375, -0.263092041015625, -0.17559814453125, -0.088104248046875, -0.0006103515625, 0.086883544921875, 0.17437744140625, 0.261871337890625, 0.349365234375, 0.436859130859375, 0.52435302734375, 0.611846923828125, 0.6993408203125, 0.786834716796875, 0.87432861328125, 0.961822509765625, 1.04931640625, 1.136810302734375, 1.22430419921875, 1.311798095703125, 1.3992919921875, 1.486785888671875, 1.57427978515625, 1.661773681640625, 1.749267578125, 1.836761474609375, 1.92425537109375, 2.011749267578125, 2.0992431640625, 2.186737060546875, 2.27423095703125, 2.361724853515625, 2.44921875]}, "gradients/encoder.feature_projection.projection.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 2.0, 0.0, 5.0, 4.0, 6.0, 9.0, 15.0, 21.0, 37.0, 38.0, 65.0, 94.0, 112.0, 157.0, 141.0, 112.0, 60.0, 52.0, 28.0, 23.0, 15.0, 9.0, 3.0, 1.0, 3.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.1904296875, -1.155120849609375, -1.11981201171875, -1.084503173828125, -1.0491943359375, -1.013885498046875, -0.97857666015625, -0.943267822265625, -0.907958984375, -0.872650146484375, -0.83734130859375, -0.802032470703125, -0.7667236328125, -0.731414794921875, -0.69610595703125, -0.660797119140625, -0.62548828125, -0.590179443359375, -0.55487060546875, -0.519561767578125, -0.4842529296875, -0.448944091796875, -0.41363525390625, -0.378326416015625, -0.343017578125, -0.307708740234375, -0.27239990234375, -0.237091064453125, -0.2017822265625, -0.166473388671875, -0.13116455078125, -0.095855712890625, -0.060546875, -0.025238037109375, 0.01007080078125, 0.045379638671875, 0.0806884765625, 0.115997314453125, 0.15130615234375, 0.186614990234375, 0.221923828125, 0.257232666015625, 0.29254150390625, 0.327850341796875, 0.3631591796875, 0.398468017578125, 0.43377685546875, 0.469085693359375, 0.50439453125, 0.539703369140625, 0.57501220703125, 0.610321044921875, 0.6456298828125, 0.680938720703125, 0.71624755859375, 0.751556396484375, 0.786865234375, 0.822174072265625, 0.85748291015625, 0.892791748046875, 0.9281005859375, 0.963409423828125, 0.99871826171875, 1.034027099609375, 1.0693359375]}, "gradients/encoder.feature_projection.layer_norm.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 2.0, 1.0, 1.0, 3.0, 1.0, 3.0, 3.0, 4.0, 8.0, 3.0, 11.0, 19.0, 33.0, 54.0, 101.0, 104.0, 69.0, 34.0, 13.0, 9.0, 9.0, 2.0, 3.0, 2.0, 3.0, 1.0, 0.0, 4.0, 1.0, 1.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.014385223388672, -2.877683162689209, -2.740981101989746, -2.604279041290283, -2.4675769805908203, -2.3308749198913574, -2.1941728591918945, -2.0574707984924316, -1.9207688570022583, -1.7840667963027954, -1.6473647356033325, -1.5106627941131592, -1.3739607334136963, -1.2372586727142334, -1.1005566120147705, -0.9638545513153076, -0.8271524906158447, -0.6904504299163818, -0.553748369216919, -0.41704636812210083, -0.28034430742263794, -0.14364224672317505, -0.006940245628356934, 0.12976181507110596, 0.26646387577056885, 0.40316593647003174, 0.5398679971694946, 0.6765699982643127, 0.8132720589637756, 0.9499741196632385, 1.0866761207580566, 1.2233781814575195, 1.3600802421569824, 1.4967823028564453, 1.6334843635559082, 1.770186424255371, 1.906888484954834, 2.043590545654297, 2.1802926063537598, 2.3169946670532227, 2.4536967277526855, 2.5903987884521484, 2.7271008491516113, 2.863802909851074, 3.000504970550537, 3.13720703125, 3.273909091949463, 3.410611152648926, 3.5473129749298096, 3.6840150356292725, 3.8207170963287354, 3.9574191570281982, 4.094120979309082, 4.230823040008545, 4.367525100708008, 4.504227161407471, 4.640929222106934, 4.7776312828063965, 4.914333343505859, 5.051035404205322, 5.187737464904785, 5.324439525604248, 5.461141586303711, 5.597843647003174, 5.734545707702637]}, "gradients/encoder.feature_projection.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 1.0, 1.0, 3.0, 5.0, 2.0, 5.0, 3.0, 3.0, 6.0, 8.0, 3.0, 14.0, 26.0, 63.0, 133.0, 105.0, 51.0, 26.0, 8.0, 5.0, 4.0, 4.0, 3.0, 2.0, 5.0, 1.0, 4.0, 5.0, 0.0, 0.0, 4.0, 1.0, 0.0, 0.0, 1.0, 1.0], "bins": [-5.935749530792236, -5.798346042633057, -5.660942554473877, -5.5235395431518555, -5.386136054992676, -5.248732566833496, -5.111329078674316, -4.973925590515137, -4.836522579193115, -4.6991190910339355, -4.561715602874756, -4.424312591552734, -4.286909103393555, -4.149505615234375, -4.012102127075195, -3.8746988773345947, -3.737295389175415, -3.5998919010162354, -3.4624886512756348, -3.325085163116455, -3.1876819133758545, -3.050278425216675, -2.912875175476074, -2.7754716873168945, -2.638068199157715, -2.500664710998535, -2.3632614612579346, -2.225857973098755, -2.0884547233581543, -1.9510512351989746, -1.8136478662490845, -1.6762444972991943, -1.5388410091400146, -1.4014376401901245, -1.2640342712402344, -1.1266307830810547, -0.9892274737358093, -0.8518241047859192, -0.7144206762313843, -0.5770173072814941, -0.439613938331604, -0.30221056938171387, -0.16480717062950134, -0.02740377187728882, 0.10999959707260132, 0.24740296602249146, 0.38480639457702637, 0.5222097635269165, 0.6596131324768066, 0.7970165014266968, 0.9344198703765869, 1.0718233585357666, 1.2092266082763672, 1.3466300964355469, 1.484033465385437, 1.6214368343353271, 1.7588402032852173, 1.8962435722351074, 2.033647060394287, 2.1710503101348877, 2.3084537982940674, 2.445857048034668, 2.5832605361938477, 2.7206640243530273, 2.858067274093628]}, "train/train_runtime": 4795.9642, "train/train_samples_per_second": 5.95, "train/train_steps_per_second": 0.062, "train/total_flos": 0.0, "train/train_loss": 4.303745459225844, "eval/loss": 4.729526996612549, "eval/wer": 2.3516065053550177, "eval/runtime": 946.0728, "eval/samples_per_second": 2.793, "eval/steps_per_second": 0.234, "_wandb": {"runtime": 6147}} \ No newline at end of file diff --git a/wandb/run-20220302_200036-31e4k99c/logs/debug-internal.log b/wandb/run-20220302_200036-31e4k99c/logs/debug-internal.log index b31b10b4629d3dfd8d973c703dc10626939bbc98..ec830189e0a061dd13e19cd8cf31e336da5293f2 100644 --- a/wandb/run-20220302_200036-31e4k99c/logs/debug-internal.log +++ b/wandb/run-20220302_200036-31e4k99c/logs/debug-internal.log @@ -7522,3 +7522,115 @@ 2022-03-02 21:42:43,423 DEBUG HandlerThread:264383 [handler.py:handle_request():131] handle_request: stop_status 2022-03-02 21:42:43,569 DEBUG SenderThread:264383 [sender.py:send_request():249] send_request: stop_status 2022-03-02 21:42:44,190 INFO Thread-8 :264383 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_200036-31e4k99c/files/config.yaml +2022-03-02 21:42:58,627 DEBUG HandlerThread:264383 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 21:42:58,627 DEBUG SenderThread:264383 [sender.py:send_request():249] send_request: stop_status +2022-03-02 21:42:59,195 INFO Thread-8 :264383 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_200036-31e4k99c/files/output.log +2022-03-02 21:43:01,196 INFO Thread-8 :264383 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_200036-31e4k99c/files/output.log +2022-03-02 21:43:02,805 DEBUG SenderThread:264383 [sender.py:send():235] send: stats +2022-03-02 21:43:04,931 DEBUG SenderThread:264383 [sender.py:send():235] send: telemetry +2022-03-02 21:43:04,931 DEBUG HandlerThread:264383 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-02 21:43:04,931 DEBUG SenderThread:264383 [sender.py:send():235] send: exit +2022-03-02 21:43:04,932 INFO SenderThread:264383 [sender.py:send_exit():371] handling exit code: 1 +2022-03-02 21:43:04,932 INFO SenderThread:264383 [sender.py:send_exit():373] handling runtime: 6147 +2022-03-02 21:43:04,983 INFO SenderThread:264383 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:43:04,983 INFO SenderThread:264383 [sender.py:send_exit():379] send defer +2022-03-02 21:43:04,984 DEBUG SenderThread:264383 [sender.py:send_request():249] send_request: poll_exit +2022-03-02 21:43:04,984 DEBUG HandlerThread:264383 [handler.py:handle_request():131] handle_request: defer +2022-03-02 21:43:04,984 INFO HandlerThread:264383 [handler.py:handle_request_defer():154] handle defer: 0 +2022-03-02 21:43:04,984 DEBUG SenderThread:264383 [sender.py:send_request():249] send_request: defer +2022-03-02 21:43:04,985 INFO SenderThread:264383 [sender.py:send_request_defer():388] handle sender defer: 0 +2022-03-02 21:43:04,985 INFO SenderThread:264383 [sender.py:transition_state():392] send defer: 1 +2022-03-02 21:43:04,985 DEBUG HandlerThread:264383 [handler.py:handle_request():131] handle_request: defer +2022-03-02 21:43:04,985 INFO HandlerThread:264383 [handler.py:handle_request_defer():154] handle defer: 1 +2022-03-02 21:43:05,048 DEBUG SenderThread:264383 [sender.py:send_request():249] send_request: defer +2022-03-02 21:43:05,048 INFO SenderThread:264383 [sender.py:send_request_defer():388] handle sender defer: 1 +2022-03-02 21:43:05,048 INFO SenderThread:264383 [sender.py:transition_state():392] send defer: 2 +2022-03-02 21:43:05,048 DEBUG SenderThread:264383 [sender.py:send():235] send: stats +2022-03-02 21:43:05,049 DEBUG HandlerThread:264383 [handler.py:handle_request():131] handle_request: defer +2022-03-02 21:43:05,049 INFO HandlerThread:264383 [handler.py:handle_request_defer():154] handle defer: 2 +2022-03-02 21:43:05,050 DEBUG SenderThread:264383 [sender.py:send_request():249] send_request: defer +2022-03-02 21:43:05,050 INFO SenderThread:264383 [sender.py:send_request_defer():388] handle sender defer: 2 +2022-03-02 21:43:05,050 INFO SenderThread:264383 [sender.py:transition_state():392] send defer: 3 +2022-03-02 21:43:05,050 DEBUG HandlerThread:264383 [handler.py:handle_request():131] handle_request: defer +2022-03-02 21:43:05,050 INFO HandlerThread:264383 [handler.py:handle_request_defer():154] handle defer: 3 +2022-03-02 21:43:05,108 DEBUG HandlerThread:264383 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-02 21:43:05,111 DEBUG SenderThread:264383 [sender.py:send():235] send: summary +2022-03-02 21:43:05,195 INFO SenderThread:264383 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:43:05,195 DEBUG SenderThread:264383 [sender.py:send_request():249] send_request: defer +2022-03-02 21:43:05,195 INFO SenderThread:264383 [sender.py:send_request_defer():388] handle sender defer: 3 +2022-03-02 21:43:05,195 INFO SenderThread:264383 [sender.py:transition_state():392] send defer: 4 +2022-03-02 21:43:05,195 DEBUG SenderThread:264383 [sender.py:send_request():249] send_request: poll_exit +2022-03-02 21:43:05,196 DEBUG HandlerThread:264383 [handler.py:handle_request():131] handle_request: defer +2022-03-02 21:43:05,196 INFO HandlerThread:264383 [handler.py:handle_request_defer():154] handle defer: 4 +2022-03-02 21:43:05,196 DEBUG SenderThread:264383 [sender.py:send_request():249] send_request: defer +2022-03-02 21:43:05,196 INFO SenderThread:264383 [sender.py:send_request_defer():388] handle sender defer: 4 +2022-03-02 21:43:05,209 INFO Thread-8 :264383 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_200036-31e4k99c/files/output.log +2022-03-02 21:43:05,209 INFO Thread-8 :264383 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_200036-31e4k99c/files/wandb-summary.json +2022-03-02 21:43:05,297 DEBUG HandlerThread:264383 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-02 21:43:06,125 INFO SenderThread:264383 [sender.py:transition_state():392] send defer: 5 +2022-03-02 21:43:06,125 DEBUG SenderThread:264383 [sender.py:send_request():249] send_request: poll_exit +2022-03-02 21:43:06,126 DEBUG HandlerThread:264383 [handler.py:handle_request():131] handle_request: defer +2022-03-02 21:43:06,126 INFO HandlerThread:264383 [handler.py:handle_request_defer():154] handle defer: 5 +2022-03-02 21:43:06,126 DEBUG SenderThread:264383 [sender.py:send_request():249] send_request: defer +2022-03-02 21:43:06,126 INFO SenderThread:264383 [sender.py:send_request_defer():388] handle sender defer: 5 +2022-03-02 21:43:06,126 INFO SenderThread:264383 [dir_watcher.py:finish():283] shutting down directory watcher +2022-03-02 21:43:06,209 INFO Thread-8 :264383 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_200036-31e4k99c/files/config.yaml +2022-03-02 21:43:06,209 INFO SenderThread:264383 [dir_watcher.py:finish():313] scan: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_200036-31e4k99c/files +2022-03-02 21:43:06,210 INFO SenderThread:264383 [dir_watcher.py:finish():327] scan save: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_200036-31e4k99c/files/wandb-metadata.json wandb-metadata.json +2022-03-02 21:43:06,210 INFO SenderThread:264383 [dir_watcher.py:finish():327] scan save: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_200036-31e4k99c/files/output.log output.log +2022-03-02 21:43:06,210 INFO SenderThread:264383 [dir_watcher.py:finish():327] scan save: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_200036-31e4k99c/files/wandb-summary.json wandb-summary.json +2022-03-02 21:43:06,210 INFO SenderThread:264383 [dir_watcher.py:finish():327] scan save: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_200036-31e4k99c/files/requirements.txt requirements.txt +2022-03-02 21:43:06,211 INFO SenderThread:264383 [dir_watcher.py:finish():327] scan save: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_200036-31e4k99c/files/config.yaml config.yaml +2022-03-02 21:43:06,214 INFO SenderThread:264383 [sender.py:transition_state():392] send defer: 6 +2022-03-02 21:43:06,222 DEBUG HandlerThread:264383 [handler.py:handle_request():131] handle_request: defer +2022-03-02 21:43:06,223 INFO HandlerThread:264383 [handler.py:handle_request_defer():154] handle defer: 6 +2022-03-02 21:43:06,223 DEBUG SenderThread:264383 [sender.py:send_request():249] send_request: defer +2022-03-02 21:43:06,223 INFO SenderThread:264383 [sender.py:send_request_defer():388] handle sender defer: 6 +2022-03-02 21:43:06,223 INFO SenderThread:264383 [file_pusher.py:finish():177] shutting down file pusher +2022-03-02 21:43:06,227 DEBUG HandlerThread:264383 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-02 21:43:06,227 DEBUG SenderThread:264383 [sender.py:send_request():249] send_request: poll_exit +2022-03-02 21:43:06,328 DEBUG HandlerThread:264383 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-02 21:43:06,329 DEBUG SenderThread:264383 [sender.py:send_request():249] send_request: poll_exit +2022-03-02 21:43:06,430 DEBUG HandlerThread:264383 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-02 21:43:06,430 DEBUG SenderThread:264383 [sender.py:send_request():249] send_request: poll_exit +2022-03-02 21:43:06,527 INFO Thread-14 :264383 [upload_job.py:push():137] Uploaded file /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_200036-31e4k99c/files/requirements.txt +2022-03-02 21:43:06,532 DEBUG HandlerThread:264383 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-02 21:43:06,532 DEBUG SenderThread:264383 [sender.py:send_request():249] send_request: poll_exit +2022-03-02 21:43:06,540 INFO Thread-12 :264383 [upload_job.py:push():137] Uploaded file /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_200036-31e4k99c/files/output.log +2022-03-02 21:43:06,575 INFO Thread-13 :264383 [upload_job.py:push():137] Uploaded file /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_200036-31e4k99c/files/wandb-summary.json +2022-03-02 21:43:06,584 INFO Thread-15 :264383 [upload_job.py:push():137] Uploaded file /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_200036-31e4k99c/files/config.yaml +2022-03-02 21:43:06,634 DEBUG HandlerThread:264383 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-02 21:43:06,634 DEBUG SenderThread:264383 [sender.py:send_request():249] send_request: poll_exit +2022-03-02 21:43:06,735 DEBUG HandlerThread:264383 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-02 21:43:06,736 DEBUG SenderThread:264383 [sender.py:send_request():249] send_request: poll_exit +2022-03-02 21:43:06,784 INFO Thread-7 :264383 [sender.py:transition_state():392] send defer: 7 +2022-03-02 21:43:06,785 DEBUG HandlerThread:264383 [handler.py:handle_request():131] handle_request: defer +2022-03-02 21:43:06,785 INFO HandlerThread:264383 [handler.py:handle_request_defer():154] handle defer: 7 +2022-03-02 21:43:06,785 DEBUG SenderThread:264383 [sender.py:send_request():249] send_request: defer +2022-03-02 21:43:06,785 INFO SenderThread:264383 [sender.py:send_request_defer():388] handle sender defer: 7 +2022-03-02 21:43:06,837 DEBUG HandlerThread:264383 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-02 21:43:08,117 INFO SenderThread:264383 [sender.py:transition_state():392] send defer: 8 +2022-03-02 21:43:08,117 DEBUG SenderThread:264383 [sender.py:send_request():249] send_request: poll_exit +2022-03-02 21:43:08,118 DEBUG HandlerThread:264383 [handler.py:handle_request():131] handle_request: defer +2022-03-02 21:43:08,118 INFO HandlerThread:264383 [handler.py:handle_request_defer():154] handle defer: 8 +2022-03-02 21:43:08,118 DEBUG SenderThread:264383 [sender.py:send_request():249] send_request: defer +2022-03-02 21:43:08,118 INFO SenderThread:264383 [sender.py:send_request_defer():388] handle sender defer: 8 +2022-03-02 21:43:08,118 INFO SenderThread:264383 [sender.py:transition_state():392] send defer: 9 +2022-03-02 21:43:08,119 DEBUG SenderThread:264383 [sender.py:send():235] send: final +2022-03-02 21:43:08,120 DEBUG SenderThread:264383 [sender.py:send():235] send: footer +2022-03-02 21:43:08,120 DEBUG HandlerThread:264383 [handler.py:handle_request():131] handle_request: defer +2022-03-02 21:43:08,121 INFO HandlerThread:264383 [handler.py:handle_request_defer():154] handle defer: 9 +2022-03-02 21:43:08,121 DEBUG SenderThread:264383 [sender.py:send_request():249] send_request: defer +2022-03-02 21:43:08,121 INFO SenderThread:264383 [sender.py:send_request_defer():388] handle sender defer: 9 +2022-03-02 21:43:08,219 DEBUG HandlerThread:264383 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-02 21:43:08,219 DEBUG SenderThread:264383 [sender.py:send_request():249] send_request: poll_exit +2022-03-02 21:43:08,219 INFO SenderThread:264383 [file_pusher.py:join():182] waiting for file pusher +2022-03-02 21:43:08,285 DEBUG HandlerThread:264383 [handler.py:handle_request():131] handle_request: get_summary +2022-03-02 21:43:08,384 DEBUG HandlerThread:264383 [handler.py:handle_request():131] handle_request: sampled_history +2022-03-02 21:43:08,387 DEBUG HandlerThread:264383 [handler.py:handle_request():131] handle_request: shutdown +2022-03-02 21:43:08,387 INFO HandlerThread:264383 [handler.py:finish():739] shutting down handler +2022-03-02 21:43:09,120 INFO WriterThread:264383 [datastore.py:close():281] close: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_200036-31e4k99c/run-31e4k99c.wandb +2022-03-02 21:43:09,284 INFO SenderThread:264383 [sender.py:finish():1075] shutting down sender +2022-03-02 21:43:09,284 INFO SenderThread:264383 [file_pusher.py:finish():177] shutting down file pusher +2022-03-02 21:43:09,284 INFO SenderThread:264383 [file_pusher.py:join():182] waiting for file pusher +2022-03-02 21:43:09,291 INFO MainThread:264383 [internal.py:handle_exit():79] Internal process exited diff --git a/wandb/run-20220302_200036-31e4k99c/logs/debug.log b/wandb/run-20220302_200036-31e4k99c/logs/debug.log index 287c88bc32fe629001f3abe14ba6c87c18a387b3..8147468a51c9e963fba1efbcfaca3f7673f54d52 100644 --- a/wandb/run-20220302_200036-31e4k99c/logs/debug.log +++ b/wandb/run-20220302_200036-31e4k99c/logs/debug.log @@ -25,3 +25,101 @@ config: {} 2022-03-02 20:00:37,941 INFO MainThread:264283 [wandb_init.py:init():651] run started, returning control to user process 2022-03-02 20:00:37,944 INFO MainThread:264283 [wandb_run.py:_config_callback():966] config_cb None None {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'torch.float32', 'use_bfloat16': False, 'pruned_heads': {}, 'tie_word_embeddings': False, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 50, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'chunk_size_feed_forward': 0, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'architectures': ['SpeechEncoderDecoderModel'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': None, 'pad_token_id': 50256, 'eos_token_id': 50256, 'sep_token_id': None, 'decoder_start_token_id': 50256, 'task_specific_params': None, 'problem_type': None, '_name_or_path': './', 'transformers_version': None, 'decoder': {'vocab_size': 50257, 'n_positions': 1024, 'n_embd': 1024, 'n_layer': 24, 'n_head': 16, 'n_inner': None, 'activation_function': 'gelu_new', 'resid_pdrop': 0.0, 'embd_pdrop': 0.0, 'attn_pdrop': 0.0, 'layer_norm_epsilon': 1e-05, 'initializer_range': 0.02, 'summary_type': 'cls_index', 'summary_use_proj': True, 'summary_activation': None, 'summary_first_dropout': 0.0, 'summary_proj_to_labels': True, 'scale_attn_weights': True, 'use_cache': False, 'scale_attn_by_inverse_layer_idx': False, 'reorder_and_upcast_attn': False, 'bos_token_id': 50256, 'eos_token_id': 50256, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': None, 'use_bfloat16': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'is_encoder_decoder': False, 'is_decoder': True, 'cross_attention_hidden_size': None, 'add_cross_attention': True, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'chunk_size_feed_forward': 0, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'architectures': ['GPT2LMHeadModel'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'pad_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': {'text-generation': {'do_sample': True, 'max_length': 50}}, 'problem_type': None, '_name_or_path': 'gpt2-medium', 'transformers_version': '4.17.0.dev0', 'n_ctx': 1024, 'n_special': 0, 'predict_special_tokens': True, 'model_type': 'gpt2'}, 'encoder': {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': None, 'use_bfloat16': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'chunk_size_feed_forward': 0, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'architectures': ['Wav2Vec2ForPreTraining'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 1, 'pad_token_id': 0, 'eos_token_id': 2, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'facebook/wav2vec2-large-lv60', 'transformers_version': '4.17.0.dev0', 'feat_extract_dropout': 0.0, 'gradient_checkpointing': False, 'hidden_dropout_prob': 0.0, 'num_feat_extract_layers': 7, 'hidden_size': 1024, 'feat_extract_norm': 'layer', 'feat_extract_activation': 'gelu', 'conv_dim': [512, 512, 512, 512, 512, 512, 512], 'conv_stride': [5, 2, 2, 2, 2, 2, 2], 'conv_kernel': [10, 3, 3, 3, 3, 2, 2], 'conv_bias': True, 'num_conv_pos_embeddings': 128, 'num_conv_pos_embedding_groups': 16, 'num_hidden_layers': 24, 'intermediate_size': 4096, 'hidden_act': 'gelu', 'num_attention_heads': 16, 'hidden_dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.0, 'feat_proj_dropout': 0.0, 'final_dropout': 0.0, 'layerdrop': 0.0, 'layer_norm_eps': 1e-05, 'initializer_range': 0.02, 'vocab_size': 32, 'do_stable_layer_norm': True, 'use_weighted_layer_sum': False, 'apply_spec_augment': False, 'mask_time_prob': 0.0, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'num_codevectors_per_group': 320, 'num_codevector_groups': 2, 'contrastive_logits_temperature': 0.1, 'feat_quantizer_dropout': 0.0, 'num_negatives': 100, 'codevector_dim': 768, 'proj_codevector_dim': 768, 'diversity_loss_weight': 0.1, 'ctc_loss_reduction': 'sum', 'ctc_zero_infinity': False, 'add_adapter': True, 'adapter_kernel_size': 3, 'adapter_stride': 2, 'num_adapter_layers': 3, 'output_hidden_size': 1024, 'classifier_proj_size': 256, 'tdnn_dim': [512, 512, 512, 512, 1500], 'tdnn_kernel': [5, 3, 3, 1, 1], 'tdnn_dilation': [1, 2, 3, 1, 1], 'xvector_output_dim': 512, 'model_type': 'wav2vec2'}, 'model_type': 'speech-encoder-decoder', 'processor_class': 'Wav2Vec2Processor', 'use_cache': False, 'output_dir': './', 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'evaluation_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 12, 'per_device_eval_batch_size': 12, 'per_gpu_train_batch_size': 'None', 'per_gpu_eval_batch_size': 'None', 'gradient_accumulation_steps': 8, 'eval_accumulation_steps': 'None', 'learning_rate': 0.0003, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 1.0, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'warmup_ratio': 0.0, 'warmup_steps': 500, 'log_level': -1, 'log_level_replica': -1, 'log_on_each_node': True, 'logging_dir': './runs/Mar02_19-59-53_sanchit--v100', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 1, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 500, 'save_total_limit': 1, 'save_on_each_node': False, 'no_cuda': False, 'seed': 42, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'amp', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': 'None', 'local_rank': -1, 'xpu_backend': 'None', 'tpu_num_cores': 'None', 'tpu_metrics_debug': False, 'debug': '[]', 'dataloader_drop_last': False, 'eval_steps': 500, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': 'None', 'load_best_model_at_end': False, 'metric_for_best_model': 'None', 'greater_is_better': 'None', 'ignore_data_skip': False, 'sharded_ddp': '[]', 'deepspeed': 'None', 'label_smoothing_factor': 0.0, 'optim': 'adamw_hf', 'adafactor': False, 'group_by_length': True, 'length_column_name': 'input_length', 'report_to': "['wandb']", 'ddp_find_unused_parameters': 'None', 'ddp_bucket_cap_mb': 'None', 'dataloader_pin_memory': True, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': 'None', 'hub_model_id': 'None', 'hub_strategy': 'every_save', 'hub_token': '', 'gradient_checkpointing': True, 'fp16_backend': 'auto', 'push_to_hub_model_id': 'None', 'push_to_hub_organization': 'None', 'push_to_hub_token': '', '_n_gpu': 1, 'mp_parameters': '', 'sortish_sampler': False, 'predict_with_generate': True, 'generation_max_length': 40, 'generation_num_beams': 1, 'train_batch_size': 12, 'eval_batch_size': 12} 2022-03-02 20:00:37,946 INFO MainThread:264283 [wandb_watch.py:watch():43] Watching +2022-03-02 21:43:02,636 INFO MainThread:264283 [wandb_run.py:_atexit_cleanup():1797] got exitcode: 1 +2022-03-02 21:43:02,640 INFO MainThread:264283 [wandb_run.py:_restore():1769] restore +2022-03-02 21:43:04,984 INFO MainThread:264283 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 1 +} +pusher_stats { + uploaded_bytes: 2095 + total_bytes: 2095 +} + +2022-03-02 21:43:05,196 INFO MainThread:264283 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 1 +} +pusher_stats { + uploaded_bytes: 2095 + total_bytes: 2095 +} + +2022-03-02 21:43:06,126 INFO MainThread:264283 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 1 +} +pusher_stats { + uploaded_bytes: 2095 + total_bytes: 2095 +} + +2022-03-02 21:43:06,227 INFO MainThread:264283 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 2095 + total_bytes: 2674763 +} + +2022-03-02 21:43:06,329 INFO MainThread:264283 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 2041516 + total_bytes: 2674763 +} + +2022-03-02 21:43:06,431 INFO MainThread:264283 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 2674763 + total_bytes: 2674763 +} + +2022-03-02 21:43:06,533 INFO MainThread:264283 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 2674763 + total_bytes: 2674763 +} + +2022-03-02 21:43:06,634 INFO MainThread:264283 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 2674763 + total_bytes: 2674763 +} + +2022-03-02 21:43:06,736 INFO MainThread:264283 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 2674763 + total_bytes: 2674763 +} + +2022-03-02 21:43:08,118 INFO MainThread:264283 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 2674763 + total_bytes: 2674763 +} + +2022-03-02 21:43:08,284 INFO MainThread:264283 [wandb_run.py:_wait_for_finish():1929] got exit ret: done: true +exit_result { +} +file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 2674763 + total_bytes: 2674763 +} +local_info { +} + +2022-03-02 21:43:09,438 INFO MainThread:264283 [wandb_run.py:_append_history():2144] rendering history +2022-03-02 21:43:09,439 INFO MainThread:264283 [wandb_run.py:_append_summary():2102] rendering summary +2022-03-02 21:43:09,440 INFO MainThread:264283 [wandb_run.py:_append_files():2194] logging synced files diff --git a/wandb/run-20220302_200036-31e4k99c/run-31e4k99c.wandb b/wandb/run-20220302_200036-31e4k99c/run-31e4k99c.wandb index fc8e06d46a1cfa7dd8f6acd550ac83ddc19da095..1122cb44c4c3eb17868dcf57cebaaf60308e180d 100644 --- a/wandb/run-20220302_200036-31e4k99c/run-31e4k99c.wandb +++ b/wandb/run-20220302_200036-31e4k99c/run-31e4k99c.wandb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:fa51a0a26ce5e172e476442812ae6212e91addd95b58f5626c318ed39b664595 -size 36405850 +oid sha256:c7bba159d4dadd3685bda07a160b03aef6a022bcd1ffe05c918057d8d7e27558 +size 37907046 diff --git a/wandb/run-20220302_214437-2u4nhnsf/files/config.yaml b/wandb/run-20220302_214437-2u4nhnsf/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9985cca73e97948b6a6895e022ea2ba6bb33a32d --- /dev/null +++ b/wandb/run-20220302_214437-2u4nhnsf/files/config.yaml @@ -0,0 +1,11321 @@ +wandb_version: 1 + +_n_gpu: + desc: null + value: 1 +_name_or_path: + desc: null + value: ./ +_wandb: + desc: null + value: + cli_version: 0.12.10 + framework: huggingface + huggingface_version: 4.17.0.dev0 + is_jupyter_run: false + is_kaggle_kernel: false + m: + - 1: train/global_step + 6: + - 3 + - 1: train/loss + 5: 1 + 6: + - 1 + - 1: train/learning_rate + 5: 1 + 6: + - 1 + - 1: train/epoch + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.ln_f\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.ln_f\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.ln_f\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.ln_f\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.ln_f\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.ln_f\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.wpe\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.wpe\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.wpe\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.wte\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.wte\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.wte\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.2\.conv\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.2\.conv\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.2\.conv\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.2\.conv\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.2\.conv\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.2\.conv\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.1\.conv\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.1\.conv\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.1\.conv\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.1\.conv\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.1\.conv\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.1\.conv\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.0\.conv\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.0\.conv\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.0\.conv\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.0\.conv\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.0\.conv\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.0\.conv\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.pos_conv_embed\.conv\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.pos_conv_embed\.conv\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.pos_conv_embed\.conv\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.pos_conv_embed\.conv\.weight_v._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.pos_conv_embed\.conv\.weight_v.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.pos_conv_embed\.conv\.weight_v.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.pos_conv_embed\.conv\.weight_g._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.pos_conv_embed\.conv\.weight_g.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.pos_conv_embed\.conv\.weight_g.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.feature_projection\.projection\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.feature_projection\.projection\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.feature_projection\.projection\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.feature_projection\.projection\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.feature_projection\.projection\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.feature_projection\.projection\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.feature_projection\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.feature_projection\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.feature_projection\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.feature_projection\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.feature_projection\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.feature_projection\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + python_version: 3.9.5 + start_time: 1646257477 + t: + 1: + - 1 + - 5 + - 11 + 2: + - 1 + - 5 + - 11 + 3: + - 1 + - 7 + - 13 + 4: 3.9.5 + 5: 0.12.10 + 6: 4.17.0.dev0 + 8: + - 5 +adafactor: + desc: null + value: false +adam_beta1: + desc: null + value: 0.9 +adam_beta2: + desc: null + value: 0.999 +adam_epsilon: + desc: null + value: 1.0e-08 +add_cross_attention: + desc: null + value: false +architectures: + desc: null + value: + - SpeechEncoderDecoderModel +bad_words_ids: + desc: null + value: null +bf16: + desc: null + value: false +bf16_full_eval: + desc: null + value: false +bos_token_id: + desc: null + value: null +chunk_size_feed_forward: + desc: null + value: 0 +cross_attention_hidden_size: + desc: null + value: null +dataloader_drop_last: + desc: null + value: false +dataloader_num_workers: + desc: null + value: 0 +dataloader_pin_memory: + desc: null + value: true +ddp_bucket_cap_mb: + desc: null + value: None +ddp_find_unused_parameters: + desc: null + value: None +debug: + desc: null + value: '[]' +decoder: + desc: null + value: + _name_or_path: gpt2-medium + activation_function: gelu_new + add_cross_attention: true + architectures: + - GPT2LMHeadModel + attn_pdrop: 0.0 + bad_words_ids: null + bos_token_id: 50256 + chunk_size_feed_forward: 0 + cross_attention_hidden_size: null + decoder_start_token_id: null + diversity_penalty: 0.0 + do_sample: false + early_stopping: false + embd_pdrop: 0.0 + encoder_no_repeat_ngram_size: 0 + eos_token_id: 50256 + finetuning_task: null + forced_bos_token_id: null + forced_eos_token_id: null + id2label: + '0': LABEL_0 + '1': LABEL_1 + initializer_range: 0.02 + is_decoder: true + is_encoder_decoder: false + label2id: + LABEL_0: 0 + LABEL_1: 1 + layer_norm_epsilon: 1.0e-05 + length_penalty: 1.0 + max_length: 20 + min_length: 0 + model_type: gpt2 + n_ctx: 1024 + n_embd: 1024 + n_head: 16 + n_inner: null + n_layer: 24 + n_positions: 1024 + n_special: 0 + no_repeat_ngram_size: 0 + num_beam_groups: 1 + num_beams: 1 + num_return_sequences: 1 + output_attentions: false + output_hidden_states: false + output_scores: false + pad_token_id: null + predict_special_tokens: true + prefix: null + problem_type: null + pruned_heads: {} + remove_invalid_values: false + reorder_and_upcast_attn: false + repetition_penalty: 1.0 + resid_pdrop: 0.0 + return_dict: true + return_dict_in_generate: false + scale_attn_by_inverse_layer_idx: false + scale_attn_weights: true + sep_token_id: null + summary_activation: null + summary_first_dropout: 0.0 + summary_proj_to_labels: true + summary_type: cls_index + summary_use_proj: true + task_specific_params: + text-generation: + do_sample: true + max_length: 50 + temperature: 1.0 + tie_encoder_decoder: false + tie_word_embeddings: true + tokenizer_class: null + top_k: 50 + top_p: 1.0 + torch_dtype: null + torchscript: false + transformers_version: 4.17.0.dev0 + use_bfloat16: false + use_cache: false + vocab_size: 50257 +decoder_start_token_id: + desc: null + value: 50256 +deepspeed: + desc: null + value: None +disable_tqdm: + desc: null + value: false +diversity_penalty: + desc: null + value: 0.0 +do_eval: + desc: null + value: true +do_predict: + desc: null + value: false +do_sample: + desc: null + value: false +do_train: + desc: null + value: true +early_stopping: + desc: null + value: false +encoder: + desc: null + value: + _name_or_path: facebook/wav2vec2-large-lv60 + activation_dropout: 0.0 + adapter_kernel_size: 3 + adapter_stride: 2 + add_adapter: true + add_cross_attention: false + apply_spec_augment: false + architectures: + - Wav2Vec2ForPreTraining + attention_dropout: 0.0 + bad_words_ids: null + bos_token_id: 1 + chunk_size_feed_forward: 0 + classifier_proj_size: 256 + codevector_dim: 768 + contrastive_logits_temperature: 0.1 + conv_bias: true + conv_dim: + - 512 + - 512 + - 512 + - 512 + - 512 + - 512 + - 512 + conv_kernel: + - 10 + - 3 + - 3 + - 3 + - 3 + - 2 + - 2 + conv_stride: + - 5 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + cross_attention_hidden_size: null + ctc_loss_reduction: sum + ctc_zero_infinity: false + decoder_start_token_id: null + diversity_loss_weight: 0.1 + diversity_penalty: 0.0 + do_sample: false + do_stable_layer_norm: true + early_stopping: false + encoder_no_repeat_ngram_size: 0 + eos_token_id: 2 + feat_extract_activation: gelu + feat_extract_dropout: 0.0 + feat_extract_norm: layer + feat_proj_dropout: 0.0 + feat_quantizer_dropout: 0.0 + final_dropout: 0.0 + finetuning_task: null + forced_bos_token_id: null + forced_eos_token_id: null + gradient_checkpointing: false + hidden_act: gelu + hidden_dropout: 0.0 + hidden_dropout_prob: 0.0 + hidden_size: 1024 + id2label: + '0': LABEL_0 + '1': LABEL_1 + initializer_range: 0.02 + intermediate_size: 4096 + is_decoder: false + is_encoder_decoder: false + label2id: + LABEL_0: 0 + LABEL_1: 1 + layer_norm_eps: 1.0e-05 + layerdrop: 0.0 + length_penalty: 1.0 + mask_feature_length: 10 + mask_feature_min_masks: 0 + mask_feature_prob: 0.0 + mask_time_length: 10 + mask_time_min_masks: 2 + mask_time_prob: 0.0 + max_length: 20 + min_length: 0 + model_type: wav2vec2 + no_repeat_ngram_size: 0 + num_adapter_layers: 3 + num_attention_heads: 16 + num_beam_groups: 1 + num_beams: 1 + num_codevector_groups: 2 + num_codevectors_per_group: 320 + num_conv_pos_embedding_groups: 16 + num_conv_pos_embeddings: 128 + num_feat_extract_layers: 7 + num_hidden_layers: 24 + num_negatives: 100 + num_return_sequences: 1 + output_attentions: false + output_hidden_size: 1024 + output_hidden_states: false + output_scores: false + pad_token_id: 0 + prefix: null + problem_type: null + proj_codevector_dim: 768 + pruned_heads: {} + remove_invalid_values: false + repetition_penalty: 1.0 + return_dict: true + return_dict_in_generate: false + sep_token_id: null + task_specific_params: null + tdnn_dilation: + - 1 + - 2 + - 3 + - 1 + - 1 + tdnn_dim: + - 512 + - 512 + - 512 + - 512 + - 1500 + tdnn_kernel: + - 5 + - 3 + - 3 + - 1 + - 1 + temperature: 1.0 + tie_encoder_decoder: false + tie_word_embeddings: true + tokenizer_class: null + top_k: 50 + top_p: 1.0 + torch_dtype: null + torchscript: false + transformers_version: 4.17.0.dev0 + use_bfloat16: false + use_weighted_layer_sum: false + vocab_size: 32 + xvector_output_dim: 512 +encoder_no_repeat_ngram_size: + desc: null + value: 0 +eos_token_id: + desc: null + value: 50256 +eval_accumulation_steps: + desc: null + value: None +eval_batch_size: + desc: null + value: 14 +eval_steps: + desc: null + value: 500 +evaluation_strategy: + desc: null + value: steps +finetuning_task: + desc: null + value: null +forced_bos_token_id: + desc: null + value: null +forced_eos_token_id: + desc: null + value: null +fp16: + desc: null + value: true +fp16_backend: + desc: null + value: auto +fp16_full_eval: + desc: null + value: false +fp16_opt_level: + desc: null + value: O1 +generation_max_length: + desc: null + value: 40 +generation_num_beams: + desc: null + value: 1 +gradient_accumulation_steps: + desc: null + value: 2 +gradient_checkpointing: + desc: null + value: true +greater_is_better: + desc: null + value: None +group_by_length: + desc: null + value: true +half_precision_backend: + desc: null + value: amp +hub_model_id: + desc: null + value: None +hub_strategy: + desc: null + value: every_save +hub_token: + desc: null + value: +id2label: + desc: null + value: + '0': LABEL_0 + '1': LABEL_1 +ignore_data_skip: + desc: null + value: false +is_decoder: + desc: null + value: false +is_encoder_decoder: + desc: null + value: true +label2id: + desc: null + value: + LABEL_0: 0 + LABEL_1: 1 +label_names: + desc: null + value: None +label_smoothing_factor: + desc: null + value: 0.0 +learning_rate: + desc: null + value: 0.0003 +length_column_name: + desc: null + value: input_length +length_penalty: + desc: null + value: 1.0 +load_best_model_at_end: + desc: null + value: false +local_rank: + desc: null + value: -1 +log_level: + desc: null + value: -1 +log_level_replica: + desc: null + value: -1 +log_on_each_node: + desc: null + value: true +logging_dir: + desc: null + value: ./runs/Mar02_21-43-55_sanchit--v100 +logging_first_step: + desc: null + value: false +logging_nan_inf_filter: + desc: null + value: true +logging_steps: + desc: null + value: 1 +logging_strategy: + desc: null + value: steps +lr_scheduler_type: + desc: null + value: linear +max_grad_norm: + desc: null + value: 1.0 +max_length: + desc: null + value: 50 +max_steps: + desc: null + value: -1 +metric_for_best_model: + desc: null + value: None +min_length: + desc: null + value: 0 +model_type: + desc: null + value: speech-encoder-decoder +mp_parameters: + desc: null + value: '' +no_cuda: + desc: null + value: false +no_repeat_ngram_size: + desc: null + value: 0 +num_beam_groups: + desc: null + value: 1 +num_beams: + desc: null + value: 1 +num_return_sequences: + desc: null + value: 1 +num_train_epochs: + desc: null + value: 1.0 +optim: + desc: null + value: adamw_hf +output_attentions: + desc: null + value: false +output_dir: + desc: null + value: ./ +output_hidden_states: + desc: null + value: false +output_scores: + desc: null + value: false +overwrite_output_dir: + desc: null + value: true +pad_token_id: + desc: null + value: 50256 +past_index: + desc: null + value: -1 +per_device_eval_batch_size: + desc: null + value: 14 +per_device_train_batch_size: + desc: null + value: 14 +per_gpu_eval_batch_size: + desc: null + value: None +per_gpu_train_batch_size: + desc: null + value: None +predict_with_generate: + desc: null + value: true +prediction_loss_only: + desc: null + value: false +prefix: + desc: null + value: null +problem_type: + desc: null + value: null +processor_class: + desc: null + value: Wav2Vec2Processor +pruned_heads: + desc: null + value: {} +push_to_hub: + desc: null + value: true +push_to_hub_model_id: + desc: null + value: None +push_to_hub_organization: + desc: null + value: None +push_to_hub_token: + desc: null + value: +remove_invalid_values: + desc: null + value: false +remove_unused_columns: + desc: null + value: true +repetition_penalty: + desc: null + value: 1.0 +report_to: + desc: null + value: '[''wandb'']' +resume_from_checkpoint: + desc: null + value: None +return_dict: + desc: null + value: true +return_dict_in_generate: + desc: null + value: false +run_name: + desc: null + value: ./ +save_on_each_node: + desc: null + value: false +save_steps: + desc: null + value: 500 +save_strategy: + desc: null + value: steps +save_total_limit: + desc: null + value: 1 +seed: + desc: null + value: 42 +sep_token_id: + desc: null + value: null +sharded_ddp: + desc: null + value: '[]' +skip_memory_metrics: + desc: null + value: true +sortish_sampler: + desc: null + value: false +task_specific_params: + desc: null + value: null +temperature: + desc: null + value: 1.0 +tf32: + desc: null + value: None +tie_encoder_decoder: + desc: null + value: false +tie_word_embeddings: + desc: null + value: false +tokenizer_class: + desc: null + value: null +top_k: + desc: null + value: 50 +top_p: + desc: null + value: 1.0 +torch_dtype: + desc: null + value: torch.float32 +torchscript: + desc: null + value: false +tpu_metrics_debug: + desc: null + value: false +tpu_num_cores: + desc: null + value: None +train_batch_size: + desc: null + value: 14 +transformers_version: + desc: null + value: null +use_bfloat16: + desc: null + value: false +use_cache: + desc: null + value: false +use_legacy_prediction_loop: + desc: null + value: false +warmup_ratio: + desc: null + value: 0.0 +warmup_steps: + desc: null + value: 500 +weight_decay: + desc: null + value: 0.0 +xpu_backend: + desc: null + value: None diff --git a/wandb/run-20220302_214437-2u4nhnsf/files/output.log b/wandb/run-20220302_214437-2u4nhnsf/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..127e9aac96acd702382608c3403544071c91f757 --- /dev/null +++ b/wandb/run-20220302_214437-2u4nhnsf/files/output.log @@ -0,0 +1,1523 @@ + + + 0%| | 0/1019 [00:00> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:44:46,125 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 0%| | 1/1019 [00:07<2:03:59, 7.31s/it] + + 0%| | 1/1019 [00:07<2:03:59, 7.31s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:44:49,184 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:44:52,177 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 0%|▏ | 2/1019 [00:13<1:51:20, 6.57s/it] + + 0%|▏ | 2/1019 [00:13<1:51:20, 6.57s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:44:55,229 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:44:58,140 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 0%|▏ | 3/1019 [00:19<1:48:26, 6.40s/it] + + 0%|▏ | 3/1019 [00:19<1:48:26, 6.40s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:45:01,343 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.8536, 'learning_rate': 1.2e-06, 'epoch': 0.0} +[WARNING|modeling_utils.py:388] 2022-03-02 21:45:04,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 0%|▎ | 4/1019 [00:25<1:45:16, 6.22s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:45:07,247 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.654, 'learning_rate': 1.8e-06, 'epoch': 0.0} +[WARNING|modeling_utils.py:388] 2022-03-02 21:45:10,104 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 0%|▍ | 5/1019 [00:31<1:42:46, 6.08s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:45:13,114 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.9403, 'learning_rate': 2.4e-06, 'epoch': 0.01} +[WARNING|modeling_utils.py:388] 2022-03-02 21:45:16,011 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 1%|▍ | 6/1019 [00:37<1:41:41, 6.02s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:45:18,999 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6693, 'learning_rate': 2.9999999999999997e-06, 'epoch': 0.01} +[WARNING|modeling_utils.py:388] 2022-03-02 21:45:21,856 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 1%|▌ | 7/1019 [00:43<1:40:35, 5.96s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:45:24,800 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:45:27,633 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▋ | 8/1019 [00:48<1:39:28, 5.90s/it] + + 1%|▋ | 8/1019 [00:48<1:39:28, 5.90s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:45:30,492 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:45:33,315 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▋ | 9/1019 [00:54<1:38:13, 5.83s/it] + + 1%|▋ | 9/1019 [00:54<1:38:13, 5.83s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:45:36,252 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:45:39,086 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▊ | 10/1019 [01:00<1:37:47, 5.82s/it] + + 1%|▊ | 10/1019 [01:00<1:37:47, 5.82s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:45:42,023 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.7137, 'learning_rate': 5.399999999999999e-06, 'epoch': 0.01} +[WARNING|modeling_utils.py:388] 2022-03-02 21:45:44,799 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 1%|▊ | 11/1019 [01:06<1:37:10, 5.78s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:45:47,715 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6946, 'learning_rate': 5.999999999999999e-06, 'epoch': 0.01} +[WARNING|modeling_utils.py:388] 2022-03-02 21:45:50,513 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 1%|▉ | 12/1019 [01:11<1:36:42, 5.76s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:45:53,461 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:45:56,219 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6247, 'learning_rate': 6.599999999999999e-06, 'epoch': 0.01} + + 1%|█ | 13/1019 [01:17<1:36:19, 5.75s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:45:59,073 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:46:01,853 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|█ | 14/1019 [01:23<1:35:40, 5.71s/it] + + 1%|█ | 14/1019 [01:23<1:35:40, 5.71s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:46:04,741 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.456, 'learning_rate': 7.799999999999998e-06, 'epoch': 0.01} +[WARNING|modeling_utils.py:388] 2022-03-02 21:46:07,453 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 1%|█▏ | 15/1019 [01:28<1:35:00, 5.68s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:46:10,290 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6667, 'learning_rate': 8.4e-06, 'epoch': 0.02} +[WARNING|modeling_utils.py:388] 2022-03-02 21:46:12,989 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 2%|█▏ | 16/1019 [01:34<1:34:12, 5.64s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:46:15,768 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:46:18,418 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 2%|█▎ | 17/1019 [01:39<1:33:04, 5.57s/it] + + 2%|█▎ | 17/1019 [01:39<1:33:04, 5.57s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:46:21,164 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6961, 'learning_rate': 9.6e-06, 'epoch': 0.02} +[WARNING|modeling_utils.py:388] 2022-03-02 21:46:23,791 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 2%|█▍ | 18/1019 [01:45<1:31:58, 5.51s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:46:26,526 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:46:29,157 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 2%|█▍ | 19/1019 [01:50<1:31:08, 5.47s/it] + + 2%|█▍ | 19/1019 [01:50<1:31:08, 5.47s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:46:31,919 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:46:34,546 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 2%|█▌ | 20/1019 [01:55<1:30:39, 5.45s/it] + + 2%|█▌ | 20/1019 [01:55<1:30:39, 5.45s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:46:37,250 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:46:39,806 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 2%|█▋ | 21/1019 [02:01<1:29:38, 5.39s/it] + + 2%|█▋ | 21/1019 [02:01<1:29:38, 5.39s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:46:42,473 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:46:45,140 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 2%|█▋ | 22/1019 [02:06<1:29:16, 5.37s/it] + + 2%|█▋ | 22/1019 [02:06<1:29:16, 5.37s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:46:47,832 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:46:50,388 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4451, 'learning_rate': 1.26e-05, 'epoch': 0.02} + + 2%|█▊ | 23/1019 [02:11<1:28:34, 5.34s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:46:53,021 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:46:55,546 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 2%|█▊ | 24/1019 [02:16<1:27:35, 5.28s/it] + + 2%|█▊ | 24/1019 [02:16<1:27:35, 5.28s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:46:58,147 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3828, 'learning_rate': 1.3799999999999998e-05, 'epoch': 0.02} +[WARNING|modeling_utils.py:388] 2022-03-02 21:47:00,678 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 2%|█▉ | 25/1019 [02:21<1:26:45, 5.24s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:47:03,341 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:47:05,829 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 3%|██ | 26/1019 [02:27<1:26:14, 5.21s/it] + + 3%|██ | 26/1019 [02:27<1:26:14, 5.21s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:47:08,439 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:47:10,907 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 3%|██ | 27/1019 [02:32<1:25:29, 5.17s/it] + + 3%|██ | 27/1019 [02:32<1:25:29, 5.17s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:47:13,431 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:47:15,893 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 3%|██▏ | 28/1019 [02:37<1:24:30, 5.12s/it] + + 3%|██▏ | 28/1019 [02:37<1:24:30, 5.12s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:47:18,508 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:47:20,955 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 3%|██▏ | 29/1019 [02:42<1:24:08, 5.10s/it] + + 3%|██▏ | 29/1019 [02:42<1:24:08, 5.10s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:47:23,544 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:47:26,030 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 3%|██▎ | 30/1019 [02:47<1:23:56, 5.09s/it] + + 3%|██▎ | 30/1019 [02:47<1:23:56, 5.09s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:47:28,626 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:47:31,091 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 3%|██▍ | 31/1019 [02:52<1:23:41, 5.08s/it] + + 3%|██▍ | 31/1019 [02:52<1:23:41, 5.08s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:47:33,727 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:47:36,228 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 3%|██▍ | 32/1019 [02:57<1:23:52, 5.10s/it] + + 3%|██▍ | 32/1019 [02:57<1:23:52, 5.10s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:47:38,802 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:47:41,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 3%|██▌ | 33/1019 [03:02<1:23:30, 5.08s/it] + + 3%|██▌ | 33/1019 [03:02<1:23:30, 5.08s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:47:43,802 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:47:46,187 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 3%|██▋ | 34/1019 [03:07<1:22:37, 5.03s/it] + + 3%|██▋ | 34/1019 [03:07<1:22:37, 5.03s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:47:48,693 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:47:51,005 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 3%|██▋ | 35/1019 [03:12<1:21:30, 4.97s/it] + + 3%|██▋ | 35/1019 [03:12<1:21:30, 4.97s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:47:53,450 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:47:55,684 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|██▊ | 36/1019 [03:16<1:19:58, 4.88s/it] + + 4%|██▊ | 36/1019 [03:16<1:19:58, 4.88s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:47:58,083 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:48:00,262 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|██▊ | 37/1019 [03:21<1:18:24, 4.79s/it] + + 4%|██▊ | 37/1019 [03:21<1:18:24, 4.79s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:48:02,530 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:48:04,670 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|██▉ | 38/1019 [03:25<1:16:26, 4.68s/it] + + 4%|██▉ | 38/1019 [03:25<1:16:26, 4.68s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:48:06,869 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:48:08,958 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███ | 39/1019 [03:30<1:14:28, 4.56s/it] + + 4%|███ | 39/1019 [03:30<1:14:28, 4.56s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:48:11,110 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:48:13,136 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███ | 40/1019 [03:34<1:12:31, 4.45s/it] + + 4%|███ | 40/1019 [03:34<1:12:31, 4.45s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:48:15,177 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:48:17,049 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███▏ | 41/1019 [03:38<1:09:50, 4.28s/it] + + 4%|███▏ | 41/1019 [03:38<1:09:50, 4.28s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:48:18,943 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3835, 'learning_rate': 2.3999999999999997e-05, 'epoch': 0.04} +[WARNING|modeling_utils.py:388] 2022-03-02 21:48:20,709 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███▎ | 42/1019 [03:41<1:06:43, 4.10s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:48:22,498 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:48:24,087 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███▎ | 43/1019 [03:45<1:03:08, 3.88s/it] + + 4%|███▎ | 43/1019 [03:45<1:03:08, 3.88s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:48:25,720 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:48:27,177 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███▍ | 44/1019 [03:48<59:13, 3.65s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:48:28,660 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4275, 'learning_rate': 2.52e-05, 'epoch': 0.04} +[WARNING|modeling_utils.py:388] 2022-03-02 21:48:29,876 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███▌ | 45/1019 [03:51<54:32, 3.36s/it] + 4%|███▌ | 45/1019 [03:51<54:32, 3.36s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:48:31,134 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:48:32,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|███▋ | 46/1019 [03:53<49:46, 3.07s/it] + 5%|███▋ | 46/1019 [03:53<49:46, 3.07s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:48:33,478 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:48:34,443 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|███▋ | 47/1019 [03:55<45:22, 2.80s/it] + 5%|███▋ | 47/1019 [03:55<45:22, 2.80s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:48:35,474 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:48:36,352 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|███▊ | 48/1019 [03:57<41:00, 2.53s/it] + 5%|███▊ | 48/1019 [03:57<41:00, 2.53s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:48:37,242 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:48:38,000 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|███▉ | 49/1019 [03:59<36:23, 2.25s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:48:38,744 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.0704, 'learning_rate': 2.7599999999999997e-05, 'epoch': 0.05} +[WARNING|modeling_utils.py:388] 2022-03-02 21:48:39,944 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|███▉ | 50/1019 [04:01<35:09, 2.18s/it] + 5%|███▉ | 50/1019 [04:01<35:09, 2.18s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:48:43,315 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████ | 51/1019 [04:07<56:07, 3.48s/it]g-point operations will not be computed-02 21:48:43,315 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████ | 51/1019 [04:07<56:07, 3.48s/it]g-point operations will not be computed-02 21:48:43,315 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████ | 51/1019 [04:07<56:07, 3.48s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:48:49,626 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████ | 52/1019 [04:13<1:09:13, 4.30s/it]g-point operations will not be computed-02 21:48:49,626 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████ | 52/1019 [04:13<1:09:13, 4.30s/it]g-point operations will not be computed-02 21:48:49,626 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████ | 52/1019 [04:13<1:09:13, 4.30s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:48:55,791 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████ | 53/1019 [04:19<1:17:42, 4.83s/it]g-point operations will not be computed-02 21:48:55,791 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████ | 53/1019 [04:19<1:17:42, 4.83s/it]g-point operations will not be computed-02 21:48:55,791 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████ | 53/1019 [04:19<1:17:42, 4.83s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:49:02,208 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████▏ | 54/1019 [04:26<1:25:49, 5.34s/it]g-point operations will not be computed-02 21:49:02,208 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████▏ | 54/1019 [04:26<1:25:49, 5.34s/it]g-point operations will not be computed-02 21:49:02,208 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████▏ | 54/1019 [04:26<1:25:49, 5.34s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:49:08,273 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████▎ | 55/1019 [04:32<1:28:47, 5.53s/it]g-point operations will not be computed-02 21:49:08,273 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████▎ | 55/1019 [04:32<1:28:47, 5.53s/it]g-point operations will not be computed-02 21:49:08,273 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████▎ | 55/1019 [04:32<1:28:47, 5.53s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:49:14,372 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████▎ | 56/1019 [04:38<1:31:34, 5.71s/it]g-point operations will not be computed-02 21:49:14,372 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████▎ | 56/1019 [04:38<1:31:34, 5.71s/it]g-point operations will not be computed-02 21:49:14,372 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████▎ | 56/1019 [04:38<1:31:34, 5.71s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:49:20,449 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▍ | 57/1019 [04:44<1:33:03, 5.80s/it]g-point operations will not be computed-02 21:49:20,449 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▍ | 57/1019 [04:44<1:33:03, 5.80s/it]g-point operations will not be computed-02 21:49:20,449 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▍ | 57/1019 [04:44<1:33:03, 5.80s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:49:26,316 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▍ | 58/1019 [04:50<1:32:55, 5.80s/it]g-point operations will not be computed-02 21:49:26,316 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▍ | 58/1019 [04:50<1:32:55, 5.80s/it]g-point operations will not be computed-02 21:49:26,316 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▍ | 58/1019 [04:50<1:32:55, 5.80s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:49:32,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▌ | 59/1019 [04:56<1:32:56, 5.81s/it]g-point operations will not be computed-02 21:49:32,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▌ | 59/1019 [04:56<1:32:56, 5.81s/it]g-point operations will not be computed-02 21:49:32,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▌ | 59/1019 [04:56<1:32:56, 5.81s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:49:37,910 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▋ | 60/1019 [05:02<1:32:39, 5.80s/it]g-point operations will not be computed-02 21:49:37,910 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▋ | 60/1019 [05:02<1:32:39, 5.80s/it]g-point operations will not be computed-02 21:49:37,910 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▋ | 60/1019 [05:02<1:32:39, 5.80s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:49:43,767 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▋ | 61/1019 [05:07<1:32:23, 5.79s/it]g-point operations will not be computed-02 21:49:43,767 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▋ | 61/1019 [05:07<1:32:23, 5.79s/it]g-point operations will not be computed-02 21:49:43,767 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▋ | 61/1019 [05:07<1:32:23, 5.79s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:49:49,420 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▊ | 62/1019 [05:13<1:31:44, 5.75s/it]g-point operations will not be computed-02 21:49:49,420 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▊ | 62/1019 [05:13<1:31:44, 5.75s/it]g-point operations will not be computed-02 21:49:49,420 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▊ | 62/1019 [05:13<1:31:44, 5.75s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:49:55,064 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▉ | 63/1019 [05:19<1:30:55, 5.71s/it]g-point operations will not be computed-02 21:49:55,064 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▉ | 63/1019 [05:19<1:30:55, 5.71s/it]g-point operations will not be computed-02 21:49:55,064 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▉ | 63/1019 [05:19<1:30:55, 5.71s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:50:00,656 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▉ | 64/1019 [05:24<1:29:55, 5.65s/it]g-point operations will not be computed-02 21:50:00,656 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▉ | 64/1019 [05:24<1:29:55, 5.65s/it]g-point operations will not be computed-02 21:50:00,656 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▉ | 64/1019 [05:24<1:29:55, 5.65s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:50:06,176 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|█████ | 65/1019 [05:30<1:29:10, 5.61s/it]g-point operations will not be computed-02 21:50:06,176 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|█████ | 65/1019 [05:30<1:29:10, 5.61s/it]g-point operations will not be computed-02 21:50:06,176 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|█████ | 65/1019 [05:30<1:29:10, 5.61s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:50:11,628 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|█████ | 66/1019 [05:35<1:28:17, 5.56s/it]g-point operations will not be computed-02 21:50:11,628 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|█████ | 66/1019 [05:35<1:28:17, 5.56s/it]g-point operations will not be computed-02 21:50:11,628 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|█████ | 66/1019 [05:35<1:28:17, 5.56s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:50:17,081 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▏ | 67/1019 [05:41<1:28:09, 5.56s/it]g-point operations will not be computed-02 21:50:17,081 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▏ | 67/1019 [05:41<1:28:09, 5.56s/it]g-point operations will not be computed-02 21:50:17,081 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▏ | 67/1019 [05:41<1:28:09, 5.56s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:50:22,613 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▏ | 67/1019 [05:41<1:28:09, 5.56s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:50:22,613 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▎ | 68/1019 [05:46<1:27:40, 5.53s/it]g-point operations will not be computed-02 21:50:22,613 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▎ | 68/1019 [05:46<1:27:40, 5.53s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:50:28,128 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▎ | 68/1019 [05:46<1:27:40, 5.53s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:50:28,128 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▎ | 69/1019 [05:52<1:27:15, 5.51s/it]g-point operations will not be computed-02 21:50:28,128 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▎ | 69/1019 [05:52<1:27:15, 5.51s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:50:33,676 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▎ | 69/1019 [05:52<1:27:15, 5.51s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:50:33,676 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▍ | 70/1019 [05:57<1:28:23, 5.59s/it]g-point operations will not be computed-02 21:50:33,676 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▍ | 70/1019 [05:57<1:28:23, 5.59s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:50:39,221 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▌ | 71/1019 [06:03<1:27:04, 5.51s/it]g-point operations will not be computed-02 21:50:39,221 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▌ | 71/1019 [06:03<1:27:04, 5.51s/it]g-point operations will not be computed-02 21:50:39,221 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▌ | 71/1019 [06:03<1:27:04, 5.51s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:50:44,677 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▌ | 71/1019 [06:03<1:27:04, 5.51s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:50:44,677 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▌ | 72/1019 [06:08<1:26:47, 5.50s/it]g-point operations will not be computed-02 21:50:44,677 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▌ | 72/1019 [06:08<1:26:47, 5.50s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:50:50,100 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▌ | 72/1019 [06:08<1:26:47, 5.50s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:50:50,100 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▋ | 73/1019 [06:13<1:26:14, 5.47s/it]g-point operations will not be computed-02 21:50:50,100 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▋ | 73/1019 [06:13<1:26:14, 5.47s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:50:55,436 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▋ | 74/1019 [06:19<1:25:06, 5.40s/it]g-point operations will not be computed-02 21:50:55,436 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▋ | 74/1019 [06:19<1:25:06, 5.40s/it]g-point operations will not be computed-02 21:50:55,436 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▋ | 74/1019 [06:19<1:25:06, 5.40s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:51:00,650 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▋ | 74/1019 [06:19<1:25:06, 5.40s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:51:00,650 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▊ | 75/1019 [06:24<1:23:53, 5.33s/it]g-point operations will not be computed-02 21:51:00,650 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▊ | 75/1019 [06:24<1:23:53, 5.33s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:51:05,753 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▊ | 75/1019 [06:24<1:23:53, 5.33s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:51:05,753 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▉ | 76/1019 [06:29<1:22:52, 5.27s/it]g-point operations will not be computed-02 21:51:05,753 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▉ | 76/1019 [06:29<1:22:52, 5.27s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:51:10,919 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▉ | 76/1019 [06:29<1:22:52, 5.27s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:51:10,919 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|█████▉ | 77/1019 [06:34<1:21:48, 5.21s/it]g-point operations will not be computed-02 21:51:10,919 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|█████▉ | 77/1019 [06:34<1:21:48, 5.21s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:51:16,017 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|█████▉ | 77/1019 [06:34<1:21:48, 5.21s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:51:16,017 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████ | 78/1019 [06:39<1:21:15, 5.18s/it]g-point operations will not be computed-02 21:51:16,017 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████ | 78/1019 [06:39<1:21:15, 5.18s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:51:21,044 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████ | 78/1019 [06:39<1:21:15, 5.18s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:51:21,044 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████ | 79/1019 [06:44<1:20:53, 5.16s/it]g-point operations will not be computed-02 21:51:21,044 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████ | 79/1019 [06:44<1:20:53, 5.16s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:51:26,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▏ | 80/1019 [06:49<1:19:59, 5.11s/it]g-point operations will not be computed-02 21:51:26,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▏ | 80/1019 [06:49<1:19:59, 5.11s/it]g-point operations will not be computed-02 21:51:26,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▏ | 80/1019 [06:49<1:19:59, 5.11s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:51:31,118 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▎ | 81/1019 [06:54<1:19:04, 5.06s/it]g-point operations will not be computed-02 21:51:31,118 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▎ | 81/1019 [06:54<1:19:04, 5.06s/it]g-point operations will not be computed-02 21:51:31,118 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▎ | 81/1019 [06:54<1:19:04, 5.06s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:51:36,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▎ | 81/1019 [06:54<1:19:04, 5.06s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:51:36,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▎ | 82/1019 [06:59<1:18:18, 5.01s/it]g-point operations will not be computed-02 21:51:36,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▎ | 82/1019 [06:59<1:18:18, 5.01s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:51:40,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▍ | 83/1019 [07:04<1:17:36, 4.97s/it]g-point operations will not be computed-02 21:51:40,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▍ | 83/1019 [07:04<1:17:36, 4.97s/it]g-point operations will not be computed-02 21:51:40,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▍ | 83/1019 [07:04<1:17:36, 4.97s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:51:45,726 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▌ | 84/1019 [07:09<1:16:08, 4.89s/it]g-point operations will not be computed-02 21:51:45,726 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▌ | 84/1019 [07:09<1:16:08, 4.89s/it]g-point operations will not be computed-02 21:51:45,726 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▌ | 84/1019 [07:09<1:16:08, 4.89s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:51:50,311 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▌ | 84/1019 [07:09<1:16:08, 4.89s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:51:50,311 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▌ | 85/1019 [07:13<1:14:32, 4.79s/it]g-point operations will not be computed-02 21:51:50,311 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▌ | 85/1019 [07:13<1:14:32, 4.79s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:51:54,874 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▋ | 86/1019 [07:18<1:13:24, 4.72s/it]g-point operations will not be computed-02 21:51:54,874 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▋ | 86/1019 [07:18<1:13:24, 4.72s/it]g-point operations will not be computed-02 21:51:54,874 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▋ | 86/1019 [07:18<1:13:24, 4.72s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:51:59,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|██████▋ | 87/1019 [07:22<1:11:53, 4.63s/it]g-point operations will not be computed-02 21:51:59,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|██████▋ | 87/1019 [07:22<1:11:53, 4.63s/it]g-point operations will not be computed-02 21:51:59,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|██████▋ | 87/1019 [07:22<1:11:53, 4.63s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:52:03,743 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|██████▊ | 88/1019 [07:27<1:10:40, 4.56s/it]g-point operations will not be computed-02 21:52:03,743 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|██████▊ | 88/1019 [07:27<1:10:40, 4.56s/it]g-point operations will not be computed-02 21:52:03,743 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|██████▊ | 88/1019 [07:27<1:10:40, 4.56s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:52:08,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|██████▉ | 89/1019 [07:31<1:08:37, 4.43s/it]g-point operations will not be computed-02 21:52:08,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|██████▉ | 89/1019 [07:31<1:08:37, 4.43s/it]g-point operations will not be computed-02 21:52:08,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|██████▉ | 89/1019 [07:31<1:08:37, 4.43s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:52:12,089 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|██████▉ | 90/1019 [07:35<1:06:13, 4.28s/it]g-point operations will not be computed-02 21:52:12,089 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|██████▉ | 90/1019 [07:35<1:06:13, 4.28s/it]g-point operations will not be computed-02 21:52:12,089 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|██████▉ | 90/1019 [07:35<1:06:13, 4.28s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:52:15,895 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████ | 91/1019 [07:38<1:03:29, 4.11s/it]g-point operations will not be computed-02 21:52:15,895 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████ | 91/1019 [07:38<1:03:29, 4.11s/it]g-point operations will not be computed-02 21:52:15,895 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████ | 91/1019 [07:38<1:03:29, 4.11s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:52:19,504 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▏ | 92/1019 [07:42<1:00:37, 3.92s/it]g-point operations will not be computed-02 21:52:19,504 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▏ | 92/1019 [07:42<1:00:37, 3.92s/it]g-point operations will not be computed-02 21:52:19,504 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:52:24,480 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 21:52:22,890 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:52:24,480 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 21:52:22,890 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▍ | 93/1019 [07:45<57:43, 3.74s/it] + 9%|███████▍ | 93/1019 [07:45<57:43, 3.74s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:52:26,088 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▍ | 94/1019 [07:48<54:30, 3.54s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:52:26,088 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▍ | 94/1019 [07:48<54:30, 3.54s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:52:26,088 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▌ | 95/1019 [07:51<51:04, 3.32s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:52:29,024 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▌ | 95/1019 [07:51<51:04, 3.32s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:52:29,024 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▌ | 95/1019 [07:51<51:04, 3.32s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:52:31,730 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▌ | 95/1019 [07:51<51:04, 3.32s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:52:31,730 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▋ | 96/1019 [07:54<47:26, 3.08s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:52:34,092 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▋ | 96/1019 [07:54<47:26, 3.08s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:52:34,092 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▋ | 97/1019 [07:56<43:21, 2.82s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:52:36,158 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▋ | 97/1019 [07:56<43:21, 2.82s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:52:36,158 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▊ | 99/1019 [08:00<35:31, 2.32s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:52:38,021 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▊ | 99/1019 [08:00<35:31, 2.32s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:52:38,021 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▊ | 100/1019 [08:02<33:54, 2.21s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:52:39,617 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▊ | 100/1019 [08:02<33:54, 2.21s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:52:39,617 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▊ | 100/1019 [08:02<33:54, 2.21s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:52:44,085 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▉ | 101/1019 [08:08<52:49, 3.45s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:52:44,085 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▉ | 101/1019 [08:08<52:49, 3.45s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:52:44,085 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▉ | 101/1019 [08:08<52:49, 3.45s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:52:50,175 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▊ | 102/1019 [08:14<1:04:18, 4.21s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:52:50,175 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▊ | 102/1019 [08:14<1:04:18, 4.21s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:52:50,175 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▊ | 102/1019 [08:14<1:04:18, 4.21s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:52:56,168 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▉ | 103/1019 [08:20<1:12:36, 4.76s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:52:56,168 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▉ | 103/1019 [08:20<1:12:36, 4.76s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:52:56,168 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▉ | 103/1019 [08:20<1:12:36, 4.76s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:53:02,086 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▉ | 104/1019 [08:26<1:17:24, 5.08s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:53:02,086 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▉ | 104/1019 [08:26<1:17:24, 5.08s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:53:02,086 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▉ | 104/1019 [08:26<1:17:24, 5.08s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:53:07,956 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████ | 105/1019 [08:32<1:21:10, 5.33s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:53:07,956 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████ | 105/1019 [08:32<1:21:10, 5.33s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:53:07,956 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████ | 105/1019 [08:32<1:21:10, 5.33s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:53:13,881 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████ | 106/1019 [08:38<1:23:42, 5.50s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:53:13,881 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████ | 106/1019 [08:38<1:23:42, 5.50s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:53:13,881 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████ | 106/1019 [08:38<1:23:42, 5.50s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:53:19,772 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▏ | 107/1019 [08:43<1:25:05, 5.60s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:53:19,772 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▏ | 107/1019 [08:43<1:25:05, 5.60s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:53:19,772 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▏ | 107/1019 [08:43<1:25:05, 5.60s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:53:25,569 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▏ | 107/1019 [08:43<1:25:05, 5.60s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:53:25,569 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▎ | 108/1019 [08:49<1:25:51, 5.65s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:53:25,569 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▎ | 108/1019 [08:49<1:25:51, 5.65s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:53:31,296 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▎ | 109/1019 [08:55<1:26:10, 5.68s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:53:31,296 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▎ | 109/1019 [08:55<1:26:10, 5.68s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:53:31,296 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▎ | 109/1019 [08:55<1:26:10, 5.68s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:53:37,091 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▍ | 110/1019 [09:01<1:26:19, 5.70s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:53:37,091 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▍ | 110/1019 [09:01<1:26:19, 5.70s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:53:37,091 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▍ | 110/1019 [09:01<1:26:19, 5.70s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:53:42,691 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▍ | 110/1019 [09:01<1:26:19, 5.70s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:53:42,691 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▍ | 111/1019 [09:06<1:25:46, 5.67s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:53:42,691 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▍ | 111/1019 [09:06<1:25:46, 5.67s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:53:48,316 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▍ | 111/1019 [09:06<1:25:46, 5.67s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:53:48,316 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▌ | 112/1019 [09:12<1:25:26, 5.65s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:53:48,316 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▌ | 112/1019 [09:12<1:25:26, 5.65s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:53:53,915 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▌ | 112/1019 [09:12<1:25:26, 5.65s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:53:53,915 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▋ | 113/1019 [09:17<1:25:00, 5.63s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:53:53,915 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▋ | 113/1019 [09:17<1:25:00, 5.63s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:53:59,507 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▋ | 113/1019 [09:17<1:25:00, 5.63s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:53:59,507 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▋ | 114/1019 [09:23<1:24:25, 5.60s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:53:59,507 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▋ | 114/1019 [09:23<1:24:25, 5.60s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:54:04,979 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + [WARNING|modeling_utils.py:388] 2022-03-02 21:54:04,979 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + [WARNING|modeling_utils.py:388] 2022-03-02 21:54:04,979 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▊ | 115/1019 [09:28<1:24:02, 5.58s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:54:04,979 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▊ | 115/1019 [09:28<1:24:02, 5.58s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:54:10,514 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▊ | 115/1019 [09:28<1:24:02, 5.58s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:54:10,514 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▉ | 116/1019 [09:34<1:23:47, 5.57s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:54:10,514 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▉ | 116/1019 [09:34<1:23:47, 5.57s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:54:16,078 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▉ | 116/1019 [09:34<1:23:47, 5.57s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:54:16,078 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▉ | 117/1019 [09:40<1:23:36, 5.56s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:54:16,078 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▉ | 117/1019 [09:40<1:23:36, 5.56s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:54:21,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▉ | 117/1019 [09:40<1:23:36, 5.56s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:54:21,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▉ | 117/1019 [09:40<1:23:36, 5.56s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:54:21,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████ | 118/1019 [09:45<1:22:32, 5.50s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:54:21,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████ | 118/1019 [09:45<1:22:32, 5.50s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:54:26,810 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████ | 118/1019 [09:45<1:22:32, 5.50s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:54:26,810 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████ | 119/1019 [09:50<1:21:38, 5.44s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:54:26,810 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████ | 119/1019 [09:50<1:21:38, 5.44s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:54:32,208 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████ | 119/1019 [09:50<1:21:38, 5.44s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:54:32,208 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▏ | 120/1019 [09:56<1:21:25, 5.43s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:54:32,208 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▏ | 120/1019 [09:56<1:21:25, 5.43s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:54:37,615 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▎ | 121/1019 [10:01<1:20:48, 5.40s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:54:37,615 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▎ | 121/1019 [10:01<1:20:48, 5.40s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:54:37,615 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▎ | 121/1019 [10:01<1:20:48, 5.40s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:54:42,952 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▎ | 121/1019 [10:01<1:20:48, 5.40s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:54:42,952 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▎ | 122/1019 [10:06<1:20:47, 5.40s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:54:42,952 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▎ | 122/1019 [10:06<1:20:47, 5.40s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:54:48,285 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▎ | 122/1019 [10:06<1:20:47, 5.40s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:54:48,285 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▎ | 122/1019 [10:06<1:20:47, 5.40s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:54:48,285 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▍ | 123/1019 [10:12<1:20:07, 5.37s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:54:48,285 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▍ | 123/1019 [10:12<1:20:07, 5.37s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:54:53,568 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▍ | 123/1019 [10:12<1:20:07, 5.37s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:54:53,568 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▍ | 124/1019 [10:17<1:19:23, 5.32s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:54:53,568 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▍ | 124/1019 [10:17<1:19:23, 5.32s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:54:58,749 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▌ | 125/1019 [10:22<1:18:24, 5.26s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:54:58,749 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▌ | 125/1019 [10:22<1:18:24, 5.26s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:54:58,749 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▌ | 125/1019 [10:22<1:18:24, 5.26s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:55:03,837 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▋ | 126/1019 [10:27<1:17:40, 5.22s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:55:03,837 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▋ | 126/1019 [10:27<1:17:40, 5.22s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:55:03,837 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▋ | 126/1019 [10:27<1:17:40, 5.22s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:55:09,025 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▋ | 127/1019 [10:32<1:17:22, 5.20s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:55:09,025 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▋ | 127/1019 [10:32<1:17:22, 5.20s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:55:09,025 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▋ | 127/1019 [10:32<1:17:22, 5.20s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:55:14,067 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▋ | 127/1019 [10:32<1:17:22, 5.20s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:55:14,067 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|█████████▊ | 128/1019 [10:37<1:16:24, 5.14s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:55:14,067 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|█████████▊ | 128/1019 [10:37<1:16:24, 5.14s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:55:19,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|█████████▊ | 129/1019 [10:42<1:15:42, 5.10s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:55:19,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|█████████▊ | 129/1019 [10:42<1:15:42, 5.10s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:55:19,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|█████████▊ | 129/1019 [10:42<1:15:42, 5.10s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:55:24,055 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|█████████▉ | 130/1019 [10:47<1:14:51, 5.05s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:55:24,055 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|█████████▉ | 130/1019 [10:47<1:14:51, 5.05s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:55:24,055 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|█████████▉ | 130/1019 [10:47<1:14:51, 5.05s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:55:28,933 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|█████████▉ | 130/1019 [10:47<1:14:51, 5.05s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:55:28,933 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████ | 131/1019 [10:52<1:13:59, 5.00s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:55:28,933 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████ | 131/1019 [10:52<1:13:59, 5.00s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:55:33,832 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████ | 131/1019 [10:52<1:13:59, 5.00s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:55:33,832 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████ | 132/1019 [10:57<1:12:55, 4.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:55:33,832 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████ | 132/1019 [10:57<1:12:55, 4.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:55:38,539 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▏ | 133/1019 [11:02<1:12:04, 4.88s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:55:38,539 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▏ | 133/1019 [11:02<1:12:04, 4.88s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:55:38,539 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▏ | 133/1019 [11:02<1:12:04, 4.88s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:55:43,313 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▏ | 133/1019 [11:02<1:12:04, 4.88s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:55:43,313 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▎ | 134/1019 [11:06<1:11:07, 4.82s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:55:43,313 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▎ | 134/1019 [11:06<1:11:07, 4.82s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:55:47,905 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▎ | 134/1019 [11:06<1:11:07, 4.82s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:55:47,905 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▎ | 135/1019 [11:11<1:10:00, 4.75s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:55:47,905 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▎ | 135/1019 [11:11<1:10:00, 4.75s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:55:52,459 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▎ | 135/1019 [11:11<1:10:00, 4.75s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:55:52,459 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▍ | 136/1019 [11:15<1:09:06, 4.70s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:55:52,459 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▍ | 136/1019 [11:15<1:09:06, 4.70s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:55:57,013 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▍ | 137/1019 [11:20<1:07:52, 4.62s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:55:57,013 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▍ | 137/1019 [11:20<1:07:52, 4.62s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:55:57,013 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▍ | 137/1019 [11:20<1:07:52, 4.62s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:56:01,373 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|██████████▌ | 138/1019 [11:24<1:06:15, 4.51s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:56:01,373 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|██████████▌ | 138/1019 [11:24<1:06:15, 4.51s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:56:01,373 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|██████████▌ | 138/1019 [11:24<1:06:15, 4.51s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:56:05,525 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|██████████▋ | 139/1019 [11:28<1:04:26, 4.39s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:56:05,525 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|██████████▋ | 139/1019 [11:28<1:04:26, 4.39s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:56:05,525 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|██████████▋ | 139/1019 [11:28<1:04:26, 4.39s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:56:09,623 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|██████████▋ | 140/1019 [11:32<1:02:36, 4.27s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:56:09,623 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|██████████▋ | 140/1019 [11:32<1:02:36, 4.27s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:56:09,623 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|██████████▋ | 140/1019 [11:32<1:02:36, 4.27s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:56:13,496 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|██████████▋ | 140/1019 [11:32<1:02:36, 4.27s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:56:13,496 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|██████████▊ | 141/1019 [11:36<1:00:12, 4.11s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:56:13,496 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|██████████▊ | 141/1019 [11:36<1:00:12, 4.11s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:56:17,075 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|██████████▊ | 141/1019 [11:36<1:00:12, 4.11s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:56:17,075 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████▏ | 142/1019 [11:39<57:04, 3.90s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:56:20,339 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████▏ | 143/1019 [11:43<53:45, 3.68s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:56:20,339 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████▏ | 143/1019 [11:43<53:45, 3.68s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:56:20,339 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████▏ | 143/1019 [11:43<53:45, 3.68s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:56:23,359 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████▏ | 143/1019 [11:43<53:45, 3.68s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:56:23,359 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████▎ | 144/1019 [11:46<50:22, 3.45s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:56:26,134 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████▎ | 144/1019 [11:46<50:22, 3.45s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:56:26,134 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████▍ | 145/1019 [11:48<46:25, 3.19s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:56:26,134 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████▍ | 145/1019 [11:48<46:25, 3.19s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:56:26,134 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████▍ | 146/1019 [11:50<42:39, 2.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:56:28,590 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████▍ | 146/1019 [11:50<42:39, 2.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:56:28,590 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████▌ | 147/1019 [11:53<39:00, 2.68s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:56:30,812 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████▌ | 147/1019 [11:53<39:00, 2.68s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:56:30,812 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▌ | 148/1019 [11:54<35:19, 2.43s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:56:34,526 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▌ | 148/1019 [11:54<35:19, 2.43s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:56:34,526 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▋ | 149/1019 [11:56<32:14, 2.22s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:56:36,237 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▋ | 149/1019 [11:56<32:14, 2.22s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:56:36,237 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▊ | 150/1019 [11:58<31:20, 2.16s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:56:36,237 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▊ | 150/1019 [11:58<31:20, 2.16s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:56:40,776 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▊ | 150/1019 [11:58<31:20, 2.16s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:56:40,776 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▊ | 151/1019 [12:05<49:45, 3.44s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:56:40,776 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▊ | 151/1019 [12:05<49:45, 3.44s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:56:46,858 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▊ | 151/1019 [12:05<49:45, 3.44s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:56:46,858 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▋ | 152/1019 [12:11<1:00:46, 4.21s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:56:46,858 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▋ | 152/1019 [12:11<1:00:46, 4.21s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:56:52,896 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▋ | 152/1019 [12:11<1:00:46, 4.21s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:56:52,896 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▋ | 153/1019 [12:17<1:08:44, 4.76s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:56:52,896 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▋ | 153/1019 [12:17<1:08:44, 4.76s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:56:52,896 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▋ | 153/1019 [12:17<1:08:44, 4.76s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:56:52,896 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▊ | 154/1019 [12:22<1:13:32, 5.10s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:56:52,896 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▊ | 154/1019 [12:22<1:13:32, 5.10s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:56:52,896 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▊ | 154/1019 [12:22<1:13:32, 5.10s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:56:52,896 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▊ | 155/1019 [12:28<1:16:48, 5.33s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:56:52,896 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▊ | 155/1019 [12:28<1:16:48, 5.33s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:56:52,896 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▊ | 155/1019 [12:28<1:16:48, 5.33s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:56:52,896 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▊ | 155/1019 [12:28<1:16:48, 5.33s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:56:52,896 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3932, 'learning_rate': 9.18e-05, 'epoch': 0.15} + 15%|███████████▊ | 155/1019 [12:28<1:16:48, 5.33s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:56:52,896 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|████████████ | 157/1019 [12:40<1:19:38, 5.54s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:56:52,896 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|████████████ | 157/1019 [12:40<1:19:38, 5.54s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:56:52,896 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3333, 'learning_rate': 9.24e-05, 'epoch': 0.15} + 15%|████████████ | 157/1019 [12:40<1:19:38, 5.54s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:56:52,896 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████ | 158/1019 [12:46<1:20:50, 5.63s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:56:52,896 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████ | 158/1019 [12:46<1:20:50, 5.63s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:56:52,896 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████ | 158/1019 [12:46<1:20:50, 5.63s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:56:52,896 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▏ | 159/1019 [12:51<1:20:58, 5.65s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:56:52,896 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▏ | 159/1019 [12:51<1:20:58, 5.65s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:56:52,896 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▏ | 159/1019 [12:51<1:20:58, 5.65s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:56:52,896 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▏ | 159/1019 [12:51<1:20:58, 5.65s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:56:52,896 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3985, 'learning_rate': 9.419999999999999e-05, 'epoch': 0.16} + 16%|████████████▏ | 159/1019 [12:51<1:20:58, 5.65s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:56:52,896 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▎ | 161/1019 [13:03<1:20:27, 5.63s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:56:52,896 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▎ | 161/1019 [13:03<1:20:27, 5.63s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:56:52,896 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3598, 'learning_rate': 9.479999999999999e-05, 'epoch': 0.16} + 16%|████████████▎ | 161/1019 [13:03<1:20:27, 5.63s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:56:52,896 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▍ | 162/1019 [13:08<1:19:49, 5.59s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:56:52,896 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▍ | 162/1019 [13:08<1:19:49, 5.59s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:56:52,896 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▍ | 162/1019 [13:08<1:19:49, 5.59s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:56:52,896 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▍ | 163/1019 [13:14<1:19:31, 5.57s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:56:52,896 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▍ | 163/1019 [13:14<1:19:31, 5.57s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:56:52,896 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + [WARNING|modeling_utils.py:388] 2022-03-02 21:56:52,896 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + [WARNING|modeling_utils.py:388] 2022-03-02 21:56:52,896 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3323, 'learning_rate': 9.659999999999999e-05, 'epoch': 0.16} + [WARNING|modeling_utils.py:388] 2022-03-02 21:56:52,896 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▋ | 165/1019 [13:25<1:18:56, 5.55s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:56:52,896 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▋ | 165/1019 [13:25<1:18:56, 5.55s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:56:52,896 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3129, 'learning_rate': 9.719999999999999e-05, 'epoch': 0.16} + 16%|████████████▋ | 165/1019 [13:25<1:18:56, 5.55s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:56:52,896 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▋ | 166/1019 [13:30<1:18:37, 5.53s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:56:52,896 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▋ | 166/1019 [13:30<1:18:37, 5.53s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:56:52,896 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▋ | 166/1019 [13:30<1:18:37, 5.53s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:56:52,896 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▊ | 167/1019 [13:36<1:18:15, 5.51s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:56:52,896 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▊ | 167/1019 [13:36<1:18:15, 5.51s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:56:52,896 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▊ | 167/1019 [13:36<1:18:15, 5.51s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:56:52,896 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▊ | 168/1019 [13:41<1:17:45, 5.48s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:56:52,896 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▊ | 168/1019 [13:41<1:17:45, 5.48s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:56:52,896 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▊ | 168/1019 [13:41<1:17:45, 5.48s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:56:52,896 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|████████████▉ | 169/1019 [13:46<1:16:51, 5.43s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:56:52,896 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|████████████▉ | 169/1019 [13:46<1:16:51, 5.43s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:56:52,896 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████ | 170/1019 [13:52<1:16:03, 5.38s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:56:52,896 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████ | 170/1019 [13:52<1:16:03, 5.38s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:56:52,896 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2216, 'learning_rate': 0.0001002, 'epoch': 0.17} + 17%|█████████████ | 170/1019 [13:52<1:16:03, 5.38s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:56:52,896 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████ | 171/1019 [13:57<1:15:08, 5.32s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:56:52,896 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████ | 171/1019 [13:57<1:15:08, 5.32s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:56:52,896 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▏ | 172/1019 [14:02<1:14:27, 5.27s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:56:52,896 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▏ | 172/1019 [14:02<1:14:27, 5.27s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:56:52,896 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5884, 'learning_rate': 0.0001014, 'epoch': 0.17} + 17%|█████████████▏ | 172/1019 [14:02<1:14:27, 5.27s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:56:52,896 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▏ | 173/1019 [14:07<1:13:49, 5.24s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:56:52,896 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▏ | 173/1019 [14:07<1:13:49, 5.24s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:56:52,896 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▎ | 174/1019 [14:12<1:12:50, 5.17s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:56:52,896 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▎ | 174/1019 [14:12<1:12:50, 5.17s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:56:52,896 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.397, 'learning_rate': 0.0001026, 'epoch': 0.17} + 17%|█████████████▎ | 174/1019 [14:12<1:12:50, 5.17s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:56:52,896 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▎ | 174/1019 [14:12<1:12:50, 5.17s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:56:52,896 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3738, 'learning_rate': 0.00010319999999999999, 'epoch': 0.17} + 17%|█████████████▎ | 174/1019 [14:12<1:12:50, 5.17s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:56:52,896 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▍ | 176/1019 [14:22<1:11:29, 5.09s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:56:52,896 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▍ | 176/1019 [14:22<1:11:29, 5.09s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:56:52,896 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3302, 'learning_rate': 0.00010379999999999999, 'epoch': 0.17} + 17%|█████████████▌ | 177/1019 [14:27<1:11:23, 5.09s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:56:52,896 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▌ | 177/1019 [14:27<1:11:23, 5.09s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:56:52,896 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3525, 'learning_rate': 0.00010439999999999999, 'epoch': 0.17} + 17%|█████████████▋ | 178/1019 [14:32<1:10:42, 5.05s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:56:52,896 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▋ | 178/1019 [14:32<1:10:42, 5.05s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:56:52,896 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3466, 'learning_rate': 0.00010499999999999999, 'epoch': 0.17} + 18%|█████████████▋ | 179/1019 [14:37<1:09:59, 5.00s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:56:52,896 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|█████████████▋ | 179/1019 [14:37<1:09:59, 5.00s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:56:52,896 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2784, 'learning_rate': 0.00010559999999999998, 'epoch': 0.18} + 18%|█████████████▋ | 179/1019 [14:37<1:09:59, 5.00s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:56:52,896 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|█████████████▊ | 180/1019 [14:42<1:09:13, 4.95s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:56:52,896 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|█████████████▊ | 180/1019 [14:42<1:09:13, 4.95s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:56:52,896 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|█████████████▊ | 181/1019 [14:47<1:08:24, 4.90s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:56:52,896 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|█████████████▊ | 181/1019 [14:47<1:08:24, 4.90s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:56:52,896 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5627, 'learning_rate': 0.00010679999999999998, 'epoch': 0.18} + 18%|█████████████▉ | 182/1019 [14:52<1:07:56, 4.87s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:56:52,896 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|█████████████▉ | 182/1019 [14:52<1:07:56, 4.87s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:56:52,896 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4303, 'learning_rate': 0.00010739999999999998, 'epoch': 0.18} + 18%|█████████████▉ | 182/1019 [14:52<1:07:56, 4.87s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:56:52,896 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████ | 183/1019 [14:56<1:07:01, 4.81s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:59:37,841 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████ | 184/1019 [15:01<1:06:15, 4.76s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:59:37,841 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████ | 184/1019 [15:01<1:06:15, 4.76s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:59:37,841 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3409, 'learning_rate': 0.00010859999999999998, 'epoch': 0.18} + 18%|██████████████▏ | 185/1019 [15:05<1:05:12, 4.69s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:59:37,841 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▏ | 185/1019 [15:05<1:05:12, 4.69s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:59:37,841 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4081, 'learning_rate': 0.00010919999999999998, 'epoch': 0.18} + 18%|██████████████▏ | 186/1019 [15:10<1:04:08, 4.62s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:59:37,841 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▏ | 186/1019 [15:10<1:04:08, 4.62s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:59:37,841 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4373, 'learning_rate': 0.00010979999999999999, 'epoch': 0.18} + 18%|██████████████▎ | 187/1019 [15:14<1:02:40, 4.52s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:59:37,841 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▎ | 187/1019 [15:14<1:02:40, 4.52s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:59:37,841 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4595, 'learning_rate': 0.00011039999999999999, 'epoch': 0.18} + 18%|██████████████▍ | 188/1019 [15:18<1:01:38, 4.45s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:59:37,841 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▍ | 188/1019 [15:18<1:01:38, 4.45s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:59:37,841 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2412, 'learning_rate': 0.00011099999999999999, 'epoch': 0.18} + 18%|██████████████▍ | 188/1019 [15:18<1:01:38, 4.45s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:59:37,841 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|██████████████▍ | 189/1019 [15:22<1:00:04, 4.34s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:59:37,841 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|██████████████▍ | 189/1019 [15:22<1:00:04, 4.34s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:59:37,841 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|██████████████▍ | 189/1019 [15:22<1:00:04, 4.34s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:59:37,841 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|██████████████▉ | 190/1019 [15:26<58:29, 4.23s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:59:37,841 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|██████████████▉ | 190/1019 [15:26<58:29, 4.23s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:59:37,841 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|██████████████▉ | 190/1019 [15:26<58:29, 4.23s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:59:37,841 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|██████████████▉ | 191/1019 [15:30<56:25, 4.09s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:59:37,841 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|██████████████▉ | 191/1019 [15:30<56:25, 4.09s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:59:37,841 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|██████████████▉ | 191/1019 [15:30<56:25, 4.09s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:59:37,841 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|███████████████ | 192/1019 [15:34<54:14, 3.94s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:59:37,841 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|███████████████ | 192/1019 [15:34<54:14, 3.94s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:59:37,841 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:00:16,375 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 21:59:37,841 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:00:16,375 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 21:59:37,841 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:00:16,375 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 21:59:37,841 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|███████████████▏ | 194/1019 [15:40<48:39, 3.54s/it]g-point operations will not be computed-02 21:59:37,841 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|███████████████▏ | 194/1019 [15:40<48:39, 3.54s/it]g-point operations will not be computed-02 21:59:37,841 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:00:22,209 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 21:59:37,841 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:00:22,209 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 21:59:37,841 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:00:22,209 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 21:59:37,841 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|███████████████▍ | 196/1019 [15:46<42:18, 3.08s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:00:26,024 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|███████████████▍ | 196/1019 [15:46<42:18, 3.08s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:00:26,024 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|███████████████▍ | 197/1019 [15:48<39:07, 2.86s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:00:28,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|███████████████▍ | 197/1019 [15:48<39:07, 2.86s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:00:28,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|███████████████▌ | 198/1019 [15:50<35:50, 2.62s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:00:30,130 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|███████████████▌ | 198/1019 [15:50<35:50, 2.62s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:00:30,130 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▌ | 199/1019 [15:52<32:25, 2.37s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:00:31,773 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▌ | 199/1019 [15:52<32:25, 2.37s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:00:31,773 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▋ | 200/1019 [15:54<30:45, 2.25s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:00:31,773 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▋ | 200/1019 [15:54<30:45, 2.25s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:00:31,773 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▋ | 200/1019 [15:54<30:45, 2.25s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:00:31,773 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▋ | 200/1019 [15:54<30:45, 2.25s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:00:31,773 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6225, 'learning_rate': 0.0001188, 'epoch': 0.2} + 20%|███████████████▋ | 200/1019 [15:54<30:45, 2.25s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:00:31,773 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▊ | 202/1019 [16:06<57:04, 4.19s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:00:31,773 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▊ | 202/1019 [16:06<57:04, 4.19s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:00:31,773 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.449, 'learning_rate': 0.0001194, 'epoch': 0.2} + 20%|███████████████▌ | 203/1019 [16:12<1:03:53, 4.70s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:00:31,773 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▌ | 203/1019 [16:12<1:03:53, 4.70s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:00:31,773 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5392, 'learning_rate': 0.00011999999999999999, 'epoch': 0.2} + 20%|███████████████▌ | 204/1019 [16:18<1:08:24, 5.04s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:00:31,773 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▌ | 204/1019 [16:18<1:08:24, 5.04s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:00:31,773 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.386, 'learning_rate': 0.00012059999999999999, 'epoch': 0.2} + 20%|███████████████▌ | 204/1019 [16:18<1:08:24, 5.04s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:00:31,773 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▋ | 205/1019 [16:23<1:10:58, 5.23s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:00:31,773 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▋ | 205/1019 [16:23<1:10:58, 5.23s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:00:31,773 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▋ | 205/1019 [16:23<1:10:58, 5.23s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:00:31,773 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▋ | 205/1019 [16:23<1:10:58, 5.23s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:00:31,773 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5596, 'learning_rate': 0.00012179999999999999, 'epoch': 0.2} + 20%|███████████████▋ | 205/1019 [16:23<1:10:58, 5.23s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:00:31,773 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▊ | 207/1019 [16:34<1:13:30, 5.43s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:00:31,773 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▊ | 207/1019 [16:34<1:13:30, 5.43s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:00:31,773 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3989, 'learning_rate': 0.0001224, 'epoch': 0.2} + 20%|███████████████▉ | 208/1019 [16:40<1:14:14, 5.49s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:00:31,773 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▉ | 208/1019 [16:40<1:14:14, 5.49s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:00:31,773 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3484, 'learning_rate': 0.00012299999999999998, 'epoch': 0.2} + 20%|███████████████▉ | 208/1019 [16:40<1:14:14, 5.49s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:00:31,773 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|███████████████▉ | 209/1019 [16:46<1:14:23, 5.51s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:00:31,773 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|███████████████▉ | 209/1019 [16:46<1:14:23, 5.51s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:00:31,773 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|███████████████▉ | 209/1019 [16:46<1:14:23, 5.51s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:00:31,773 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████ | 210/1019 [16:51<1:14:53, 5.55s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:00:31,773 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████ | 210/1019 [16:51<1:14:53, 5.55s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:00:31,773 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▏ | 211/1019 [16:57<1:14:57, 5.57s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:00:31,773 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▏ | 211/1019 [16:57<1:14:57, 5.57s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:00:31,773 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4333, 'learning_rate': 0.00012479999999999997, 'epoch': 0.21} + 21%|████████████████▏ | 212/1019 [17:02<1:14:44, 5.56s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:00:31,773 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▏ | 212/1019 [17:02<1:14:44, 5.56s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:00:31,773 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2828, 'learning_rate': 0.00012539999999999999, 'epoch': 0.21} + [WARNING|modeling_utils.py:388] 2022-03-02 22:00:31,773 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + [WARNING|modeling_utils.py:388] 2022-03-02 22:00:31,773 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2647, 'learning_rate': 0.00012599999999999997, 'epoch': 0.21} + [WARNING|modeling_utils.py:388] 2022-03-02 22:00:31,773 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▍ | 214/1019 [17:13<1:13:49, 5.50s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:00:31,773 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▍ | 214/1019 [17:13<1:13:49, 5.50s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:00:31,773 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.8459, 'learning_rate': 0.0001266, 'epoch': 0.21} + 21%|████████████████▍ | 215/1019 [17:19<1:13:18, 5.47s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:00:31,773 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▍ | 215/1019 [17:19<1:13:18, 5.47s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:00:31,773 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4322, 'learning_rate': 0.00012719999999999997, 'epoch': 0.21} + 21%|████████████████▌ | 216/1019 [17:24<1:12:29, 5.42s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:00:31,773 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▌ | 216/1019 [17:24<1:12:29, 5.42s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:00:31,773 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.006, 'learning_rate': 0.0001278, 'epoch': 0.21} + 21%|████████████████▌ | 217/1019 [17:29<1:12:07, 5.40s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:00:31,773 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▌ | 217/1019 [17:29<1:12:07, 5.40s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:00:31,773 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4751, 'learning_rate': 0.00012839999999999998, 'epoch': 0.21} + 21%|████████████████▌ | 217/1019 [17:29<1:12:07, 5.40s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:00:31,773 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▋ | 218/1019 [17:35<1:12:15, 5.41s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:00:31,773 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▋ | 218/1019 [17:35<1:12:15, 5.41s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:00:31,773 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▊ | 219/1019 [17:40<1:11:47, 5.38s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:00:31,773 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▊ | 219/1019 [17:40<1:11:47, 5.38s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:00:31,773 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5713, 'learning_rate': 0.00012959999999999998, 'epoch': 0.21} + 22%|████████████████▊ | 220/1019 [17:45<1:11:08, 5.34s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:00:31,773 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|████████████████▊ | 220/1019 [17:45<1:11:08, 5.34s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:00:31,773 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3418, 'learning_rate': 0.0001302, 'epoch': 0.22} + 22%|████████████████▉ | 221/1019 [17:50<1:10:03, 5.27s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:00:31,773 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|████████████████▉ | 221/1019 [17:50<1:10:03, 5.27s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:00:31,773 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2869, 'learning_rate': 0.00013079999999999998, 'epoch': 0.22} + 22%|████████████████▉ | 222/1019 [17:56<1:09:32, 5.24s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:00:31,773 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|████████████████▉ | 222/1019 [17:56<1:09:32, 5.24s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:00:31,773 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5247, 'learning_rate': 0.0001314, 'epoch': 0.22} + 22%|█████████████████ | 223/1019 [18:01<1:09:09, 5.21s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:00:31,773 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████ | 223/1019 [18:01<1:09:09, 5.21s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:00:31,773 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4046, 'learning_rate': 0.00013199999999999998, 'epoch': 0.22} + 22%|█████████████████▏ | 224/1019 [18:06<1:08:19, 5.16s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:00:31,773 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▏ | 224/1019 [18:06<1:08:19, 5.16s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:00:31,773 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6572, 'learning_rate': 0.0001326, 'epoch': 0.22} + 22%|█████████████████▏ | 224/1019 [18:06<1:08:19, 5.16s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:00:31,773 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▏ | 225/1019 [18:11<1:07:59, 5.14s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:00:31,773 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▏ | 225/1019 [18:11<1:07:59, 5.14s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:00:31,773 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▏ | 225/1019 [18:11<1:07:59, 5.14s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:00:31,773 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▎ | 226/1019 [18:16<1:07:17, 5.09s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:00:31,773 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▎ | 226/1019 [18:16<1:07:17, 5.09s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:00:31,773 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▍ | 227/1019 [18:21<1:07:01, 5.08s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:00:31,773 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▍ | 227/1019 [18:21<1:07:01, 5.08s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:00:31,773 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.467, 'learning_rate': 0.0001344, 'epoch': 0.22} + 22%|█████████████████▍ | 227/1019 [18:21<1:07:01, 5.08s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:00:31,773 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▍ | 228/1019 [18:26<1:06:11, 5.02s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:00:31,773 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▍ | 228/1019 [18:26<1:06:11, 5.02s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:00:31,773 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▌ | 229/1019 [18:31<1:05:19, 4.96s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:00:31,773 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▌ | 229/1019 [18:31<1:05:19, 4.96s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:00:31,773 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.382, 'learning_rate': 0.0001356, 'epoch': 0.22} + 22%|█████████████████▌ | 229/1019 [18:31<1:05:19, 4.96s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:00:31,773 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|█████████████████▌ | 230/1019 [18:35<1:04:37, 4.91s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:00:31,773 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|█████████████████▌ | 230/1019 [18:35<1:04:37, 4.91s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:00:31,773 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|█████████████████▌ | 230/1019 [18:35<1:04:37, 4.91s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:00:31,773 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|█████████████████▋ | 231/1019 [18:40<1:03:30, 4.84s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:00:31,773 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|█████████████████▋ | 231/1019 [18:40<1:03:30, 4.84s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:00:31,773 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|█████████████████▊ | 232/1019 [18:45<1:03:12, 4.82s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:00:31,773 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|█████████████████▊ | 232/1019 [18:45<1:03:12, 4.82s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:00:31,773 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3485, 'learning_rate': 0.0001374, 'epoch': 0.23} + 23%|█████████████████▊ | 233/1019 [18:49<1:02:09, 4.75s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:00:31,773 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|█████████████████▊ | 233/1019 [18:49<1:02:09, 4.75s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:00:31,773 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4238, 'learning_rate': 0.000138, 'epoch': 0.23} + 23%|█████████████████▉ | 234/1019 [18:54<1:01:33, 4.71s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:00:31,773 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|█████████████████▉ | 234/1019 [18:54<1:01:33, 4.71s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:00:31,773 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3862, 'learning_rate': 0.0001386, 'epoch': 0.23} + 23%|█████████████████▉ | 235/1019 [18:59<1:00:54, 4.66s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:00:31,773 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|█████████████████▉ | 235/1019 [18:59<1:00:54, 4.66s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:00:31,773 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4833, 'learning_rate': 0.0001392, 'epoch': 0.23} + 23%|██████████████████▌ | 236/1019 [19:03<59:32, 4.56s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:00:31,773 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▌ | 236/1019 [19:03<59:32, 4.56s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:00:31,773 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.303, 'learning_rate': 0.00013979999999999998, 'epoch': 0.23} + 23%|██████████████████▌ | 237/1019 [19:07<58:34, 4.49s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:00:31,773 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▌ | 237/1019 [19:07<58:34, 4.49s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:00:31,773 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3132, 'learning_rate': 0.0001404, 'epoch': 0.23} + 23%|██████████████████▋ | 238/1019 [19:12<57:32, 4.42s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:00:31,773 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▋ | 238/1019 [19:12<57:32, 4.42s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:00:31,773 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5828, 'learning_rate': 0.00014099999999999998, 'epoch': 0.23} + 23%|██████████████████▊ | 239/1019 [19:16<56:19, 4.33s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:00:31,773 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▊ | 239/1019 [19:16<56:19, 4.33s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:00:31,773 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2905, 'learning_rate': 0.00014159999999999997, 'epoch': 0.23} + 24%|██████████████████▊ | 240/1019 [19:20<54:30, 4.20s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:00:31,773 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|██████████████████▊ | 240/1019 [19:20<54:30, 4.20s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:00:31,773 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.8632, 'learning_rate': 0.0001422, 'epoch': 0.24} + 24%|██████████████████▉ | 241/1019 [19:23<52:43, 4.07s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:00:31,773 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|██████████████████▉ | 241/1019 [19:23<52:43, 4.07s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:00:31,773 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3419, 'learning_rate': 0.00014279999999999997, 'epoch': 0.24} + 24%|██████████████████▉ | 241/1019 [19:23<52:43, 4.07s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:00:31,773 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|██████████████████▉ | 242/1019 [19:27<49:58, 3.86s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:00:31,773 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:04:09,174 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:00:31,773 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:04:09,174 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:00:31,773 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:04:12,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:00:31,773 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:04:12,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:00:31,773 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6121, 'learning_rate': 0.0001446, 'epoch': 0.24} + 24%|███████████████████▏ | 245/1019 [19:35<41:01, 3.18s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:04:15,926 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████▏ | 245/1019 [19:35<41:01, 3.18s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:04:15,926 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████▎ | 246/1019 [19:38<37:51, 2.94s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:04:18,128 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████▎ | 246/1019 [19:38<37:51, 2.94s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:04:18,128 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████▍ | 247/1019 [19:40<34:20, 2.67s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:04:20,053 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████▍ | 247/1019 [19:40<34:20, 2.67s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:04:20,053 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████▍ | 248/1019 [19:42<30:55, 2.41s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:04:21,736 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████▍ | 248/1019 [19:42<30:55, 2.41s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:04:21,736 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.6138, 'learning_rate': 0.000147, 'epoch': 0.24} + 24%|███████████████████▌ | 249/1019 [19:43<27:50, 2.17s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████▌ | 249/1019 [19:43<27:50, 2.17s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▋ | 250/1019 [19:45<26:44, 2.09s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▋ | 250/1019 [19:45<26:44, 2.09s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▋ | 250/1019 [19:45<26:44, 2.09s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▋ | 251/1019 [19:51<42:41, 3.34s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▋ | 251/1019 [19:51<42:41, 3.34s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▋ | 251/1019 [19:51<42:41, 3.34s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▊ | 252/1019 [19:57<52:32, 4.11s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▊ | 252/1019 [19:57<52:32, 4.11s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▊ | 252/1019 [19:57<52:32, 4.11s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▊ | 253/1019 [20:03<59:10, 4.63s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▊ | 253/1019 [20:03<59:10, 4.63s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▍ | 254/1019 [20:09<1:03:34, 4.99s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▍ | 254/1019 [20:09<1:03:34, 4.99s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2367, 'learning_rate': 0.00015059999999999997, 'epoch': 0.25} + 25%|███████████████████▌ | 255/1019 [20:15<1:06:27, 5.22s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▌ | 255/1019 [20:15<1:06:27, 5.22s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3365, 'learning_rate': 0.0001512, 'epoch': 0.25} + 25%|███████████████████▌ | 256/1019 [20:21<1:08:35, 5.39s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▌ | 256/1019 [20:21<1:08:35, 5.39s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3775, 'learning_rate': 0.00015179999999999998, 'epoch': 0.25} + 25%|███████████████████▌ | 256/1019 [20:21<1:08:35, 5.39s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▋ | 257/1019 [20:26<1:09:30, 5.47s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▋ | 257/1019 [20:26<1:09:30, 5.47s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▋ | 257/1019 [20:26<1:09:30, 5.47s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▋ | 258/1019 [20:32<1:10:04, 5.53s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▋ | 258/1019 [20:32<1:10:04, 5.53s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▊ | 259/1019 [20:38<1:10:39, 5.58s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▊ | 259/1019 [20:38<1:10:39, 5.58s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3316, 'learning_rate': 0.0001536, 'epoch': 0.25} + 26%|███████████████████▉ | 260/1019 [20:43<1:10:47, 5.60s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|███████████████████▉ | 260/1019 [20:43<1:10:47, 5.60s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6933, 'learning_rate': 0.00015419999999999998, 'epoch': 0.26} + 26%|███████████████████▉ | 261/1019 [20:49<1:10:31, 5.58s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|███████████████████▉ | 261/1019 [20:49<1:10:31, 5.58s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3414, 'learning_rate': 0.0001548, 'epoch': 0.26} + 26%|████████████████████ | 262/1019 [20:54<1:09:56, 5.54s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████ | 262/1019 [20:54<1:09:56, 5.54s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4034, 'learning_rate': 0.00015539999999999998, 'epoch': 0.26} + 26%|████████████████████▏ | 263/1019 [21:00<1:09:55, 5.55s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▏ | 263/1019 [21:00<1:09:55, 5.55s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.383, 'learning_rate': 0.000156, 'epoch': 0.26} + 26%|████████████████████▏ | 264/1019 [21:05<1:09:34, 5.53s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▏ | 264/1019 [21:05<1:09:34, 5.53s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5517, 'learning_rate': 0.00015659999999999998, 'epoch': 0.26} + 26%|████████████████████▎ | 265/1019 [21:11<1:08:54, 5.48s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▎ | 265/1019 [21:11<1:08:54, 5.48s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2632, 'learning_rate': 0.0001572, 'epoch': 0.26} + 26%|████████████████████▎ | 266/1019 [21:16<1:08:36, 5.47s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▎ | 266/1019 [21:16<1:08:36, 5.47s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6418, 'learning_rate': 0.0001578, 'epoch': 0.26} + 26%|████████████████████▎ | 266/1019 [21:16<1:08:36, 5.47s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▍ | 267/1019 [21:21<1:08:25, 5.46s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▍ | 267/1019 [21:21<1:08:25, 5.46s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▌ | 268/1019 [21:27<1:07:45, 5.41s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▌ | 268/1019 [21:27<1:07:45, 5.41s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6749, 'learning_rate': 0.000159, 'epoch': 0.26} + 26%|████████████████████▌ | 269/1019 [21:32<1:07:24, 5.39s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▌ | 269/1019 [21:32<1:07:24, 5.39s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.546, 'learning_rate': 0.0001596, 'epoch': 0.26} + 26%|████████████████████▌ | 269/1019 [21:32<1:07:24, 5.39s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 270/1019 [21:37<1:07:11, 5.38s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 270/1019 [21:37<1:07:11, 5.38s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|████████████████████▋ | 271/1019 [21:43<1:06:36, 5.34s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|████████████████████▋ | 271/1019 [21:43<1:06:36, 5.34s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3069, 'learning_rate': 0.0001608, 'epoch': 0.27} + 27%|████████████████████▋ | 271/1019 [21:43<1:06:36, 5.34s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|████████████████████▊ | 272/1019 [21:48<1:05:59, 5.30s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|████████████████████▊ | 272/1019 [21:48<1:05:59, 5.30s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|████████████████████▉ | 273/1019 [21:53<1:05:20, 5.26s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|████████████████████▉ | 273/1019 [21:53<1:05:20, 5.26s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3899, 'learning_rate': 0.000162, 'epoch': 0.27} + 27%|████████████████████▉ | 273/1019 [21:53<1:05:20, 5.26s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|████████████████████▉ | 274/1019 [21:58<1:04:55, 5.23s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|████████████████████▉ | 274/1019 [21:58<1:04:55, 5.23s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████ | 275/1019 [22:03<1:04:12, 5.18s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████ | 275/1019 [22:03<1:04:12, 5.18s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4663, 'learning_rate': 0.0001632, 'epoch': 0.27} + 27%|█████████████████████ | 275/1019 [22:03<1:04:12, 5.18s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▏ | 276/1019 [22:08<1:03:56, 5.16s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▏ | 276/1019 [22:08<1:03:56, 5.16s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▏ | 277/1019 [22:13<1:03:07, 5.10s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▏ | 277/1019 [22:13<1:03:07, 5.10s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.9303, 'learning_rate': 0.0001644, 'epoch': 0.27} + 27%|█████████████████████▏ | 277/1019 [22:13<1:03:07, 5.10s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▎ | 278/1019 [22:18<1:02:20, 5.05s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▎ | 278/1019 [22:18<1:02:20, 5.05s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▎ | 279/1019 [22:23<1:01:46, 5.01s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▎ | 279/1019 [22:23<1:01:46, 5.01s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.528, 'learning_rate': 0.0001656, 'epoch': 0.27} + 27%|█████████████████████▍ | 280/1019 [22:28<1:01:17, 4.98s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▍ | 280/1019 [22:28<1:01:17, 4.98s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5544, 'learning_rate': 0.0001662, 'epoch': 0.27} + 27%|█████████████████████▍ | 280/1019 [22:28<1:01:17, 4.98s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|█████████████████████▌ | 281/1019 [22:33<1:00:24, 4.91s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|█████████████████████▌ | 281/1019 [22:33<1:00:24, 4.91s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▏ | 282/1019 [22:38<59:55, 4.88s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▏ | 282/1019 [22:38<59:55, 4.88s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4356, 'learning_rate': 0.0001674, 'epoch': 0.28} + 28%|██████████████████████▏ | 283/1019 [22:42<59:32, 4.85s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▏ | 283/1019 [22:42<59:32, 4.85s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3989, 'learning_rate': 0.000168, 'epoch': 0.28} + 28%|██████████████████████▏ | 283/1019 [22:42<59:32, 4.85s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▎ | 284/1019 [22:47<58:34, 4.78s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▎ | 284/1019 [22:47<58:34, 4.78s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▎ | 285/1019 [22:52<57:35, 4.71s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▎ | 285/1019 [22:52<57:35, 4.71s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.9425, 'learning_rate': 0.00016919999999999997, 'epoch': 0.28} + 28%|██████████████████████▍ | 286/1019 [22:56<56:37, 4.64s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▍ | 286/1019 [22:56<56:37, 4.64s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.384, 'learning_rate': 0.00016979999999999998, 'epoch': 0.28} + 28%|██████████████████████▌ | 287/1019 [23:00<55:37, 4.56s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▌ | 287/1019 [23:00<55:37, 4.56s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4586, 'learning_rate': 0.00017039999999999997, 'epoch': 0.28} + 28%|██████████████████████▌ | 288/1019 [23:05<54:19, 4.46s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▌ | 288/1019 [23:05<54:19, 4.46s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6842, 'learning_rate': 0.00017099999999999998, 'epoch': 0.28} + 28%|██████████████████████▋ | 289/1019 [23:09<52:47, 4.34s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▋ | 289/1019 [23:09<52:47, 4.34s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4578, 'learning_rate': 0.00017159999999999997, 'epoch': 0.28} + 28%|██████████████████████▊ | 290/1019 [23:13<51:07, 4.21s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▊ | 290/1019 [23:13<51:07, 4.21s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.8488, 'learning_rate': 0.00017219999999999998, 'epoch': 0.28} + 29%|██████████████████████▊ | 291/1019 [23:16<49:10, 4.05s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|██████████████████████▊ | 291/1019 [23:16<49:10, 4.05s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.846, 'learning_rate': 0.00017279999999999997, 'epoch': 0.29} + 29%|██████████████████████▉ | 292/1019 [23:20<46:59, 3.88s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|██████████████████████▉ | 292/1019 [23:20<46:59, 3.88s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:08:02,296 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:08:02,296 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6816, 'learning_rate': 0.00017399999999999997, 'epoch': 0.29} + 29%|███████████████████████ | 294/1019 [23:26<41:56, 3.47s/it]g-point operations will not be computed-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|███████████████████████ | 294/1019 [23:26<41:56, 3.47s/it]g-point operations will not be computed-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:08:08,026 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:08:08,026 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:08:10,463 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:08:10,463 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:08:12,616 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:08:12,616 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:08:14,537 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:08:14,537 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:08:16,179 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:08:16,179 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:08:18,110 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:08:18,110 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6737, 'learning_rate': 0.00017819999999999997, 'epoch': 0.29} + 30%|███████████████████████▋ | 301/1019 [23:45<39:50, 3.33s/it]g-point operations will not be computed-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▋ | 301/1019 [23:45<39:50, 3.33s/it]g-point operations will not be computed-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.9764, 'learning_rate': 0.00017879999999999998, 'epoch': 0.3} + 30%|███████████████████████▋ | 302/1019 [23:51<49:18, 4.13s/it]g-point operations will not be computed-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▋ | 302/1019 [23:51<49:18, 4.13s/it]g-point operations will not be computed-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6205, 'learning_rate': 0.00017939999999999997, 'epoch': 0.3} + 30%|███████████████████████▊ | 303/1019 [23:57<55:23, 4.64s/it]g-point operations will not be computed-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▊ | 303/1019 [23:57<55:23, 4.64s/it]g-point operations will not be computed-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.7629, 'learning_rate': 0.00017999999999999998, 'epoch': 0.3} + 30%|███████████████████████▊ | 303/1019 [23:57<55:23, 4.64s/it]g-point operations will not be computed-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▊ | 304/1019 [24:03<59:20, 4.98s/it]g-point operations will not be computed-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▊ | 304/1019 [24:03<59:20, 4.98s/it]g-point operations will not be computed-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▎ | 305/1019 [24:08<1:02:10, 5.23s/it]g-point operations will not be computed-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▎ | 305/1019 [24:08<1:02:10, 5.23s/it]g-point operations will not be computed-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.353, 'learning_rate': 0.00018119999999999999, 'epoch': 0.3} + 30%|███████████████████████▎ | 305/1019 [24:08<1:02:10, 5.23s/it]g-point operations will not be computed-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▍ | 306/1019 [24:14<1:03:48, 5.37s/it]g-point operations will not be computed-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▍ | 306/1019 [24:14<1:03:48, 5.37s/it]g-point operations will not be computed-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▍ | 306/1019 [24:14<1:03:48, 5.37s/it]g-point operations will not be computed-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▍ | 307/1019 [24:20<1:05:09, 5.49s/it]g-point operations will not be computed-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▍ | 307/1019 [24:20<1:05:09, 5.49s/it]g-point operations will not be computed-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▍ | 307/1019 [24:20<1:05:09, 5.49s/it]g-point operations will not be computed-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▌ | 308/1019 [24:26<1:05:50, 5.56s/it]g-point operations will not be computed-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▌ | 308/1019 [24:26<1:05:50, 5.56s/it]g-point operations will not be computed-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▌ | 308/1019 [24:26<1:05:50, 5.56s/it]g-point operations will not be computed-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▌ | 308/1019 [24:26<1:05:50, 5.56s/it]g-point operations will not be computed-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.7656, 'learning_rate': 0.0001836, 'epoch': 0.3} + 30%|███████████████████████▌ | 308/1019 [24:26<1:05:50, 5.56s/it]g-point operations will not be computed-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▋ | 310/1019 [24:37<1:06:17, 5.61s/it]g-point operations will not be computed-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▋ | 310/1019 [24:37<1:06:17, 5.61s/it]g-point operations will not be computed-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.7829, 'learning_rate': 0.00018419999999999998, 'epoch': 0.3} + 30%|███████████████████████▋ | 310/1019 [24:37<1:06:17, 5.61s/it]g-point operations will not be computed-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|███████████████████████▊ | 311/1019 [24:42<1:06:04, 5.60s/it]g-point operations will not be computed-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|███████████████████████▊ | 311/1019 [24:42<1:06:04, 5.60s/it]g-point operations will not be computed-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|███████████████████████▉ | 312/1019 [24:48<1:05:36, 5.57s/it]g-point operations will not be computed-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|███████████████████████▉ | 312/1019 [24:48<1:05:36, 5.57s/it]g-point operations will not be computed-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6093, 'learning_rate': 0.00018539999999999998, 'epoch': 0.31} + 31%|███████████████████████▉ | 312/1019 [24:48<1:05:36, 5.57s/it]g-point operations will not be computed-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|███████████████████████▉ | 313/1019 [24:53<1:05:06, 5.53s/it]g-point operations will not be computed-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|███████████████████████▉ | 313/1019 [24:53<1:05:06, 5.53s/it]g-point operations will not be computed-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.485, 'learning_rate': 0.000186, 'epoch': 0.31} + 31%|███████████████████████▉ | 313/1019 [24:53<1:05:06, 5.53s/it]g-point operations will not be computed-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|████████████████████████ | 314/1019 [24:59<1:04:42, 5.51s/it]g-point operations will not be computed-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|████████████████████████ | 314/1019 [24:59<1:04:42, 5.51s/it]g-point operations will not be computed-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|████████████████████████ | 314/1019 [24:59<1:04:42, 5.51s/it]g-point operations will not be computed-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|████████████████████████ | 314/1019 [24:59<1:04:42, 5.51s/it]g-point operations will not be computed-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4405, 'learning_rate': 0.0001872, 'epoch': 0.31} + 31%|████████████████████████ | 314/1019 [24:59<1:04:42, 5.51s/it]g-point operations will not be computed-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|████████████████████████▏ | 316/1019 [25:10<1:04:13, 5.48s/it]g-point operations will not be computed-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|████████████████████████▏ | 316/1019 [25:10<1:04:13, 5.48s/it]g-point operations will not be computed-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.8867, 'learning_rate': 0.00018779999999999998, 'epoch': 0.31} + 31%|████████████████████████▏ | 316/1019 [25:10<1:04:13, 5.48s/it]g-point operations will not be computed-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|████████████████████████▎ | 317/1019 [25:15<1:03:47, 5.45s/it]g-point operations will not be computed-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|████████████████████████▎ | 317/1019 [25:15<1:03:47, 5.45s/it]g-point operations will not be computed-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|████████████████████████▎ | 318/1019 [25:20<1:03:17, 5.42s/it]g-point operations will not be computed-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|████████████████████████▎ | 318/1019 [25:20<1:03:17, 5.42s/it]g-point operations will not be computed-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5744, 'learning_rate': 0.00018899999999999999, 'epoch': 0.31} + 31%|████████████████████████▍ | 319/1019 [25:26<1:02:49, 5.38s/it]g-point operations will not be computed-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|████████████████████████▍ | 319/1019 [25:26<1:02:49, 5.38s/it]g-point operations will not be computed-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6423, 'learning_rate': 0.00018959999999999997, 'epoch': 0.31} + 31%|████████████████████████▍ | 320/1019 [25:31<1:02:25, 5.36s/it]g-point operations will not be computed-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|████████████████████████▍ | 320/1019 [25:31<1:02:25, 5.36s/it]g-point operations will not be computed-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.9354, 'learning_rate': 0.0001902, 'epoch': 0.31} + 32%|████████████████████████▌ | 321/1019 [25:36<1:02:01, 5.33s/it]g-point operations will not be computed-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|████████████████████████▌ | 321/1019 [25:36<1:02:01, 5.33s/it]g-point operations will not be computed-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3763, 'learning_rate': 0.00019079999999999998, 'epoch': 0.31} + 32%|████████████████████████▌ | 321/1019 [25:36<1:02:01, 5.33s/it]g-point operations will not be computed-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|████████████████████████▋ | 322/1019 [25:42<1:01:49, 5.32s/it]g-point operations will not be computed-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|████████████████████████▋ | 322/1019 [25:42<1:01:49, 5.32s/it]g-point operations will not be computed-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|████████████████████████▋ | 323/1019 [25:47<1:01:02, 5.26s/it]g-point operations will not be computed-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|████████████████████████▋ | 323/1019 [25:47<1:01:02, 5.26s/it]g-point operations will not be computed-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3223, 'learning_rate': 0.00019199999999999998, 'epoch': 0.32} + 32%|████████████████████████▊ | 324/1019 [25:52<1:00:10, 5.19s/it]g-point operations will not be computed-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|████████████████████████▊ | 324/1019 [25:52<1:00:10, 5.19s/it]g-point operations will not be computed-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3524, 'learning_rate': 0.0001926, 'epoch': 0.32} + 32%|█████████████████████████▌ | 325/1019 [25:57<59:12, 5.12s/it]g-point operations will not be computed-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|█████████████████████████▌ | 325/1019 [25:57<59:12, 5.12s/it]g-point operations will not be computed-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5332, 'learning_rate': 0.00019319999999999998, 'epoch': 0.32} + 32%|█████████████████████████▌ | 326/1019 [26:02<58:54, 5.10s/it]g-point operations will not be computed-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|█████████████████████████▌ | 326/1019 [26:02<58:54, 5.10s/it]g-point operations will not be computed-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5867, 'learning_rate': 0.0001938, 'epoch': 0.32} + 32%|█████████████████████████▋ | 327/1019 [26:07<58:25, 5.07s/it]g-point operations will not be computed-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|█████████████████████████▋ | 327/1019 [26:07<58:25, 5.07s/it]g-point operations will not be computed-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3014, 'learning_rate': 0.00019439999999999998, 'epoch': 0.32} + 32%|█████████████████████████▊ | 328/1019 [26:12<57:49, 5.02s/it]g-point operations will not be computed-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|█████████████████████████▊ | 328/1019 [26:12<57:49, 5.02s/it]g-point operations will not be computed-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5994, 'learning_rate': 0.000195, 'epoch': 0.32} + 32%|█████████████████████████▊ | 328/1019 [26:12<57:49, 5.02s/it]g-point operations will not be computed-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|█████████████████████████▊ | 328/1019 [26:12<57:49, 5.02s/it]g-point operations will not be computed-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|█████████████████████████▊ | 328/1019 [26:12<57:49, 5.02s/it]g-point operations will not be computed-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.7089, 'learning_rate': 0.00019559999999999998, 'epoch': 0.32} + 32%|█████████████████████████▊ | 328/1019 [26:12<57:49, 5.02s/it]g-point operations will not be computed-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|█████████████████████████▉ | 330/1019 [26:21<56:51, 4.95s/it]g-point operations will not be computed-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|█████████████████████████▉ | 330/1019 [26:21<56:51, 4.95s/it]g-point operations will not be computed-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|█████████████████████████▉ | 331/1019 [26:26<56:29, 4.93s/it]g-point operations will not be computed-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|█████████████████████████▉ | 331/1019 [26:26<56:29, 4.93s/it]g-point operations will not be computed-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3585, 'learning_rate': 0.00019679999999999999, 'epoch': 0.32} + 32%|█████████████████████████▉ | 331/1019 [26:26<56:29, 4.93s/it]g-point operations will not be computed-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|█████████████████████████▉ | 331/1019 [26:26<56:29, 4.93s/it]g-point operations will not be computed-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5627, 'learning_rate': 0.0001974, 'epoch': 0.33} + 32%|█████████████████████████▉ | 331/1019 [26:26<56:29, 4.93s/it]g-point operations will not be computed-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|█████████████████████████▉ | 331/1019 [26:26<56:29, 4.93s/it]g-point operations will not be computed-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|██████████████████████████▏ | 333/1019 [26:36<54:57, 4.81s/it]g-point operations will not be computed-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|██████████████████████████▏ | 333/1019 [26:36<54:57, 4.81s/it]g-point operations will not be computed-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|██████████████████████████▏ | 334/1019 [26:40<53:55, 4.72s/it]g-point operations will not be computed-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|██████████████████████████▏ | 334/1019 [26:40<53:55, 4.72s/it]g-point operations will not be computed-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5892, 'learning_rate': 0.0001986, 'epoch': 0.33} + 33%|██████████████████████████▎ | 335/1019 [26:45<53:11, 4.67s/it]g-point operations will not be computed-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|██████████████████████████▎ | 335/1019 [26:45<53:11, 4.67s/it]g-point operations will not be computed-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5918, 'learning_rate': 0.0001992, 'epoch': 0.33} + 33%|██████████████████████████▎ | 335/1019 [26:45<53:11, 4.67s/it]g-point operations will not be computed-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|██████████████████████████▍ | 337/1019 [26:53<50:58, 4.49s/it]g-point operations will not be computed-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:11:36,867 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:11:36,867 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4946, 'learning_rate': 0.000201, 'epoch': 0.33} + 33%|██████████████████████████▌ | 339/1019 [27:02<48:30, 4.28s/it]g-point operations will not be computed-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|██████████████████████████▌ | 339/1019 [27:02<48:30, 4.28s/it]g-point operations will not be computed-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.676, 'learning_rate': 0.0002016, 'epoch': 0.33} + 33%|██████████████████████████▌ | 339/1019 [27:02<48:30, 4.28s/it]g-point operations will not be computed-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|██████████████████████████▋ | 340/1019 [27:05<47:02, 4.16s/it]g-point operations will not be computed-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|██████████████████████████▋ | 340/1019 [27:05<47:02, 4.16s/it]g-point operations will not be computed-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|██████████████████████████▋ | 340/1019 [27:05<47:02, 4.16s/it]g-point operations will not be computed-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|██████████████████████████▊ | 341/1019 [27:09<45:09, 4.00s/it]g-point operations will not be computed-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|██████████████████████████▊ | 341/1019 [27:09<45:09, 4.00s/it]g-point operations will not be computed-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|██████████████████████████▊ | 341/1019 [27:09<45:09, 4.00s/it]g-point operations will not be computed-02 22:04:23,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|██████████████████████████▊ | 342/1019 [27:13<43:20, 3.84s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:11:53,535 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|██████████████████████████▊ | 342/1019 [27:13<43:20, 3.84s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:11:53,535 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|██████████████████████████▉ | 343/1019 [27:16<41:05, 3.65s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:11:53,535 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|██████████████████████████▉ | 343/1019 [27:16<41:05, 3.65s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:11:53,535 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|██████████████████████████▉ | 343/1019 [27:16<41:05, 3.65s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:11:53,535 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████ | 344/1019 [27:19<38:36, 3.43s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████ | 344/1019 [27:19<38:36, 3.43s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████ | 345/1019 [27:21<36:01, 3.21s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████ | 345/1019 [27:21<36:01, 3.21s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:12:03,112 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:12:03,112 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:12:05,227 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:12:05,227 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:12:07,100 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:12:07,100 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:12:08,671 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:12:08,671 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:12:10,585 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:12:10,585 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:12:10,585 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████▌ | 351/1019 [27:37<37:04, 3.33s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████▌ | 351/1019 [27:37<37:04, 3.33s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████▌ | 351/1019 [27:37<37:04, 3.33s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|███████████████████████████▋ | 352/1019 [27:43<45:50, 4.12s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|███████████████████████████▋ | 352/1019 [27:43<45:50, 4.12s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|███████████████████████████▋ | 352/1019 [27:43<45:50, 4.12s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|███████████████████████████▋ | 353/1019 [27:49<51:35, 4.65s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|███████████████████████████▋ | 353/1019 [27:49<51:35, 4.65s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|███████████████████████████▋ | 353/1019 [27:49<51:35, 4.65s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|███████████████████████████▋ | 353/1019 [27:49<51:35, 4.65s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5144, 'learning_rate': 0.00021059999999999997, 'epoch': 0.35} + 35%|███████████████████████████▋ | 353/1019 [27:49<51:35, 4.65s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|███████████████████████████▊ | 355/1019 [28:01<57:53, 5.23s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|███████████████████████████▊ | 355/1019 [28:01<57:53, 5.23s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4082, 'learning_rate': 0.00021119999999999996, 'epoch': 0.35} + 35%|███████████████████████████▊ | 355/1019 [28:01<57:53, 5.23s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|███████████████████████████▉ | 356/1019 [28:07<59:17, 5.37s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|███████████████████████████▉ | 356/1019 [28:07<59:17, 5.37s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|███████████████████████████▉ | 356/1019 [28:07<59:17, 5.37s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|███████████████████████████▎ | 357/1019 [28:12<1:00:54, 5.52s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|███████████████████████████▎ | 357/1019 [28:12<1:00:54, 5.52s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|███████████████████████████▎ | 357/1019 [28:12<1:00:54, 5.52s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|███████████████████████████▍ | 358/1019 [28:18<1:01:29, 5.58s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|███████████████████████████▍ | 358/1019 [28:18<1:01:29, 5.58s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|███████████████████████████▍ | 358/1019 [28:18<1:01:29, 5.58s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|███████████████████████████▍ | 359/1019 [28:24<1:01:44, 5.61s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|███████████████████████████▍ | 359/1019 [28:24<1:01:44, 5.61s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|███████████████████████████▍ | 359/1019 [28:24<1:01:44, 5.61s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|███████████████████████████▌ | 360/1019 [28:30<1:01:48, 5.63s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|███████████████████████████▌ | 360/1019 [28:30<1:01:48, 5.63s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|███████████████████████████▌ | 360/1019 [28:30<1:01:48, 5.63s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|███████████████████████████▋ | 361/1019 [28:35<1:01:34, 5.62s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|███████████████████████████▋ | 361/1019 [28:35<1:01:34, 5.62s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|███████████████████████████▋ | 361/1019 [28:35<1:01:34, 5.62s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|███████████████████████████▋ | 362/1019 [28:41<1:01:16, 5.60s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|███████████████████████████▋ | 362/1019 [28:41<1:01:16, 5.60s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|███████████████████████████▋ | 362/1019 [28:41<1:01:16, 5.60s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|███████████████████████████▊ | 363/1019 [28:46<1:00:40, 5.55s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|███████████████████████████▊ | 363/1019 [28:46<1:00:40, 5.55s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|███████████████████████████▊ | 364/1019 [28:52<1:00:10, 5.51s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|███████████████████████████▊ | 364/1019 [28:52<1:00:10, 5.51s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.7992, 'learning_rate': 0.00021659999999999998, 'epoch': 0.36} + 36%|███████████████████████████▊ | 364/1019 [28:52<1:00:10, 5.51s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|████████████████████████████▋ | 365/1019 [28:57<59:51, 5.49s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|████████████████████████████▋ | 365/1019 [28:57<59:51, 5.49s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|████████████████████████████▋ | 365/1019 [28:57<59:51, 5.49s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|████████████████████████████▋ | 365/1019 [28:57<59:51, 5.49s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.7664, 'learning_rate': 0.00021779999999999998, 'epoch': 0.36} + 36%|████████████████████████████▋ | 365/1019 [28:57<59:51, 5.49s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|████████████████████████████▊ | 367/1019 [29:08<59:27, 5.47s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|████████████████████████████▊ | 367/1019 [29:08<59:27, 5.47s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2414, 'learning_rate': 0.00021839999999999997, 'epoch': 0.36} + 36%|████████████████████████████▊ | 367/1019 [29:08<59:27, 5.47s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|████████████████████████████▉ | 368/1019 [29:13<58:51, 5.42s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|████████████████████████████▉ | 368/1019 [29:13<58:51, 5.42s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|████████████████████████████▉ | 369/1019 [29:19<58:39, 5.42s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|████████████████████████████▉ | 369/1019 [29:19<58:39, 5.42s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6271, 'learning_rate': 0.00021959999999999997, 'epoch': 0.36} + 36%|████████████████████████████▉ | 369/1019 [29:19<58:39, 5.42s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|████████████████████████████▉ | 369/1019 [29:19<58:39, 5.42s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|█████████████████████████████ | 370/1019 [29:24<58:21, 5.39s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|█████████████████████████████ | 370/1019 [29:24<58:21, 5.39s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|█████████████████████████████ | 370/1019 [29:24<58:21, 5.39s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|█████████████████████████████ | 370/1019 [29:24<58:21, 5.39s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.8406, 'learning_rate': 0.00022079999999999997, 'epoch': 0.36} + 36%|█████████████████████████████ | 370/1019 [29:24<58:21, 5.39s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|█████████████████████████████▏ | 372/1019 [29:34<57:05, 5.29s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|█████████████████████████████▏ | 372/1019 [29:34<57:05, 5.29s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4773, 'learning_rate': 0.0002214, 'epoch': 0.36} + 37%|█████████████████████████████▏ | 372/1019 [29:34<57:05, 5.29s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|█████████████████████████████▎ | 373/1019 [29:40<56:33, 5.25s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|█████████████████████████████▎ | 373/1019 [29:40<56:33, 5.25s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|█████████████████████████████▎ | 374/1019 [29:45<56:13, 5.23s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|█████████████████████████████▎ | 374/1019 [29:45<56:13, 5.23s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.61, 'learning_rate': 0.0002226, 'epoch': 0.37} + 37%|█████████████████████████████▎ | 374/1019 [29:45<56:13, 5.23s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|█████████████████████████████▍ | 375/1019 [29:50<55:46, 5.20s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|█████████████████████████████▍ | 375/1019 [29:50<55:46, 5.20s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|█████████████████████████████▌ | 376/1019 [29:55<55:10, 5.15s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|█████████████████████████████▌ | 376/1019 [29:55<55:10, 5.15s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6115, 'learning_rate': 0.0002238, 'epoch': 0.37} + g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.382, 'learning_rate': 0.00022439999999999998, 'epoch': 0.37} + g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|█████████████████████████████▋ | 378/1019 [30:05<54:10, 5.07s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|█████████████████████████████▋ | 378/1019 [30:05<54:10, 5.07s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3134, 'learning_rate': 0.000225, 'epoch': 0.37} + 37%|█████████████████████████████▊ | 379/1019 [30:10<53:35, 5.02s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|█████████████████████████████▊ | 379/1019 [30:10<53:35, 5.02s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.7434, 'learning_rate': 0.00022559999999999998, 'epoch': 0.37} + 37%|█████████████████████████████▊ | 379/1019 [30:10<53:35, 5.02s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|█████████████████████████████▊ | 380/1019 [30:15<53:14, 5.00s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|█████████████████████████████▊ | 380/1019 [30:15<53:14, 5.00s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|█████████████████████████████▉ | 381/1019 [30:20<52:38, 4.95s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|█████████████████████████████▉ | 381/1019 [30:20<52:38, 4.95s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4288, 'learning_rate': 0.00022679999999999998, 'epoch': 0.37} + 37%|█████████████████████████████▉ | 382/1019 [30:24<51:57, 4.89s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|█████████████████████████████▉ | 382/1019 [30:24<51:57, 4.89s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4034, 'learning_rate': 0.00022739999999999997, 'epoch': 0.37} + 38%|██████████████████████████████ | 383/1019 [30:29<51:11, 4.83s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████ | 383/1019 [30:29<51:11, 4.83s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.715, 'learning_rate': 0.00022799999999999999, 'epoch': 0.38} + 38%|██████████████████████████████▏ | 384/1019 [30:34<50:20, 4.76s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▏ | 384/1019 [30:34<50:20, 4.76s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.7009, 'learning_rate': 0.00022859999999999997, 'epoch': 0.38} + 38%|██████████████████████████████▏ | 385/1019 [30:38<49:31, 4.69s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▏ | 385/1019 [30:38<49:31, 4.69s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5989, 'learning_rate': 0.0002292, 'epoch': 0.38} + 38%|██████████████████████████████▎ | 386/1019 [30:43<48:33, 4.60s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▎ | 386/1019 [30:43<48:33, 4.60s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5708, 'learning_rate': 0.00022979999999999997, 'epoch': 0.38} + 38%|██████████████████████████████▍ | 387/1019 [30:47<47:31, 4.51s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▍ | 387/1019 [30:47<47:31, 4.51s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3128, 'learning_rate': 0.0002304, 'epoch': 0.38} + 38%|██████████████████████████████▍ | 388/1019 [30:51<46:36, 4.43s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▍ | 388/1019 [30:51<46:36, 4.43s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4654, 'learning_rate': 0.00023099999999999998, 'epoch': 0.38} + 38%|██████████████████████████████▌ | 389/1019 [30:55<45:29, 4.33s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▌ | 389/1019 [30:55<45:29, 4.33s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.388, 'learning_rate': 0.0002316, 'epoch': 0.38} + 38%|██████████████████████████████▌ | 389/1019 [30:55<45:29, 4.33s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▌ | 390/1019 [30:59<44:02, 4.20s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▌ | 390/1019 [30:59<44:02, 4.20s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▌ | 390/1019 [30:59<44:02, 4.20s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▋ | 391/1019 [31:03<42:35, 4.07s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▋ | 391/1019 [31:03<42:35, 4.07s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▋ | 391/1019 [31:03<42:35, 4.07s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▊ | 392/1019 [31:06<40:45, 3.90s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▊ | 392/1019 [31:06<40:45, 3.90s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:15:48,851 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:15:48,851 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 39%|██████████████████████████████▉ | 394/1019 [31:13<36:18, 3.49s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 39%|██████████████████████████████▉ | 394/1019 [31:13<36:18, 3.49s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:15:54,476 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:15:54,476 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:15:56,804 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:15:56,804 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:15:58,899 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:15:58,899 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:16:00,735 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:16:00,735 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:16:02,360 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:16:02,360 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.9028, 'learning_rate': 0.0002376, 'epoch': 0.39} +[WARNING|modeling_utils.py:388] 2022-03-02 22:16:04,279 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:16:04,279 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:16:04,279 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 39%|███████████████████████████████▍ | 401/1019 [31:31<34:34, 3.36s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 39%|███████████████████████████████▍ | 401/1019 [31:31<34:34, 3.36s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 39%|███████████████████████████████▍ | 401/1019 [31:31<34:34, 3.36s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 39%|███████████████████████████████▌ | 402/1019 [31:37<42:37, 4.14s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 39%|███████████████████████████████▌ | 402/1019 [31:37<42:37, 4.14s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 39%|███████████████████████████████▌ | 402/1019 [31:37<42:37, 4.14s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|███████████████████████████████▋ | 403/1019 [31:43<47:31, 4.63s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|███████████████████████████████▋ | 403/1019 [31:43<47:31, 4.63s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|███████████████████████████████▋ | 403/1019 [31:43<47:31, 4.63s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|███████████████████████████████▋ | 404/1019 [31:49<50:38, 4.94s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|███████████████████████████████▋ | 404/1019 [31:49<50:38, 4.94s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|███████████████████████████████▋ | 404/1019 [31:49<50:38, 4.94s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|███████████████████████████████▊ | 405/1019 [31:54<53:01, 5.18s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|███████████████████████████████▊ | 405/1019 [31:54<53:01, 5.18s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|███████████████████████████████▊ | 405/1019 [31:54<53:01, 5.18s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|███████████████████████████████▊ | 406/1019 [32:00<54:39, 5.35s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|███████████████████████████████▊ | 406/1019 [32:00<54:39, 5.35s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|███████████████████████████████▊ | 406/1019 [32:00<54:39, 5.35s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|███████████████████████████████▉ | 407/1019 [32:06<55:42, 5.46s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|███████████████████████████████▉ | 407/1019 [32:06<55:42, 5.46s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|███████████████████████████████▉ | 407/1019 [32:06<55:42, 5.46s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|███████████████████████████████▉ | 407/1019 [32:06<55:42, 5.46s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.1687, 'learning_rate': 0.000243, 'epoch': 0.4} + 40%|███████████████████████████████▉ | 407/1019 [32:06<55:42, 5.46s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|███████████████████████████████▉ | 407/1019 [32:06<55:42, 5.46s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████ | 409/1019 [32:17<56:32, 5.56s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████ | 409/1019 [32:17<56:32, 5.56s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████ | 409/1019 [32:17<56:32, 5.56s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▏ | 410/1019 [32:23<56:36, 5.58s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▏ | 410/1019 [32:23<56:36, 5.58s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▏ | 410/1019 [32:23<56:36, 5.58s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▏ | 410/1019 [32:23<56:36, 5.58s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6197, 'learning_rate': 0.0002448, 'epoch': 0.4} + 40%|████████████████████████████████▏ | 410/1019 [32:23<56:36, 5.58s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▎ | 412/1019 [32:34<55:51, 5.52s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▎ | 412/1019 [32:34<55:51, 5.52s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6007, 'learning_rate': 0.00024539999999999995, 'epoch': 0.4} + 40%|████████████████████████████████▎ | 412/1019 [32:34<55:51, 5.52s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|████████████████████████████████▍ | 413/1019 [32:39<55:45, 5.52s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|████████████████████████████████▍ | 413/1019 [32:39<55:45, 5.52s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|████████████████████████████████▌ | 414/1019 [32:45<55:33, 5.51s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|████████████████████████████████▌ | 414/1019 [32:45<55:33, 5.51s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.8696, 'learning_rate': 0.0002466, 'epoch': 0.41} + 41%|████████████████████████████████▌ | 415/1019 [32:50<54:54, 5.46s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|████████████████████████████████▌ | 415/1019 [32:50<54:54, 5.46s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.398, 'learning_rate': 0.0002472, 'epoch': 0.41} + 41%|████████████████████████████████▌ | 415/1019 [32:50<54:54, 5.46s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|████████████████████████████████▋ | 416/1019 [32:55<54:34, 5.43s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|████████████████████████████████▋ | 416/1019 [32:55<54:34, 5.43s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|████████████████████████████████▋ | 417/1019 [33:01<54:05, 5.39s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|████████████████████████████████▋ | 417/1019 [33:01<54:05, 5.39s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.412, 'learning_rate': 0.00024839999999999997, 'epoch': 0.41} + 41%|████████████████████████████████▊ | 418/1019 [33:06<53:58, 5.39s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|████████████████████████████████▊ | 418/1019 [33:06<53:58, 5.39s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6433, 'learning_rate': 0.000249, 'epoch': 0.41} + 41%|████████████████████████████████▊ | 418/1019 [33:06<53:58, 5.39s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|████████████████████████████████▊ | 418/1019 [33:06<53:58, 5.39s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6482, 'learning_rate': 0.00024959999999999994, 'epoch': 0.41} + 41%|████████████████████████████████▊ | 418/1019 [33:06<53:58, 5.39s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|████████████████████████████████▉ | 420/1019 [33:17<53:03, 5.31s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|████████████████████████████████▉ | 420/1019 [33:17<53:03, 5.31s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6418, 'learning_rate': 0.00025019999999999996, 'epoch': 0.41} + 41%|████████████████████████████████▉ | 420/1019 [33:17<53:03, 5.31s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████ | 421/1019 [33:22<52:50, 5.30s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████ | 421/1019 [33:22<52:50, 5.30s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████▏ | 422/1019 [33:27<52:35, 5.29s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████▏ | 422/1019 [33:27<52:35, 5.29s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6701, 'learning_rate': 0.0002514, 'epoch': 0.41} + 41%|█████████████████████████████████▏ | 422/1019 [33:27<52:35, 5.29s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|█████████████████████████████████▏ | 423/1019 [33:32<51:54, 5.23s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|█████████████████████████████████▏ | 423/1019 [33:32<51:54, 5.23s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|█████████████████████████████████▎ | 424/1019 [33:37<51:39, 5.21s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|█████████████████████████████████▎ | 424/1019 [33:37<51:39, 5.21s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.0263, 'learning_rate': 0.00025259999999999996, 'epoch': 0.42} + 42%|█████████████████████████████████▎ | 424/1019 [33:37<51:39, 5.21s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|█████████████████████████████████▎ | 425/1019 [33:42<51:11, 5.17s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|█████████████████████████████████▎ | 425/1019 [33:42<51:11, 5.17s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|█████████████████████████████████▍ | 426/1019 [33:48<50:38, 5.12s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|█████████████████████████████████▍ | 426/1019 [33:48<50:38, 5.12s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.8681, 'learning_rate': 0.0002538, 'epoch': 0.42} + g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2982, 'learning_rate': 0.00025439999999999995, 'epoch': 0.42} + g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|█████████████████████████████████▌ | 428/1019 [33:57<49:40, 5.04s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|█████████████████████████████████▌ | 428/1019 [33:57<49:40, 5.04s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5738, 'learning_rate': 0.00025499999999999996, 'epoch': 0.42} + 42%|█████████████████████████████████▋ | 429/1019 [34:02<49:07, 5.00s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|█████████████████████████████████▋ | 429/1019 [34:02<49:07, 5.00s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.549, 'learning_rate': 0.0002556, 'epoch': 0.42} + 42%|█████████████████████████████████▋ | 429/1019 [34:02<49:07, 5.00s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|█████████████████████████████████▊ | 430/1019 [34:07<48:48, 4.97s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|█████████████████████████████████▊ | 430/1019 [34:07<48:48, 4.97s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|█████████████████████████████████▊ | 431/1019 [34:12<48:13, 4.92s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|█████████████████████████████████▊ | 431/1019 [34:12<48:13, 4.92s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.426, 'learning_rate': 0.00025679999999999995, 'epoch': 0.42} + 42%|█████████████████████████████████▉ | 432/1019 [34:17<47:30, 4.86s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|█████████████████████████████████▉ | 432/1019 [34:17<47:30, 4.86s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5729, 'learning_rate': 0.00025739999999999997, 'epoch': 0.42} + 42%|█████████████████████████████████▉ | 432/1019 [34:17<47:30, 4.86s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|█████████████████████████████████▉ | 433/1019 [34:21<46:55, 4.80s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|█████████████████████████████████▉ | 433/1019 [34:21<46:55, 4.80s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████ | 434/1019 [34:26<46:06, 4.73s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████ | 434/1019 [34:26<46:06, 4.73s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5049, 'learning_rate': 0.0002586, 'epoch': 0.43} + 43%|██████████████████████████████████▏ | 435/1019 [34:30<45:19, 4.66s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▏ | 435/1019 [34:30<45:19, 4.66s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4762, 'learning_rate': 0.00025919999999999996, 'epoch': 0.43} + 43%|██████████████████████████████████▏ | 436/1019 [34:35<44:22, 4.57s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▏ | 436/1019 [34:35<44:22, 4.57s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.8848, 'learning_rate': 0.00025979999999999997, 'epoch': 0.43} + 43%|██████████████████████████████████▎ | 437/1019 [34:39<43:38, 4.50s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▎ | 437/1019 [34:39<43:38, 4.50s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4942, 'learning_rate': 0.0002604, 'epoch': 0.43} + 43%|██████████████████████████████████▎ | 437/1019 [34:39<43:38, 4.50s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▍ | 438/1019 [34:43<42:43, 4.41s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▍ | 438/1019 [34:43<42:43, 4.41s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▍ | 438/1019 [34:43<42:43, 4.41s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▍ | 439/1019 [34:48<41:49, 4.33s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▍ | 439/1019 [34:48<41:49, 4.33s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▍ | 439/1019 [34:48<41:49, 4.33s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▌ | 440/1019 [34:51<40:31, 4.20s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▌ | 440/1019 [34:51<40:31, 4.20s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▌ | 440/1019 [34:51<40:31, 4.20s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▌ | 441/1019 [34:55<39:06, 4.06s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▌ | 441/1019 [34:55<39:06, 4.06s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▌ | 441/1019 [34:55<39:06, 4.06s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▋ | 442/1019 [34:59<37:47, 3.93s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:19:41,403 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:19:41,403 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4947, 'learning_rate': 0.00026399999999999997, 'epoch': 0.43} +[WARNING|modeling_utils.py:388] 2022-03-02 22:19:41,403 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|██████████████████████████████████▊ | 444/1019 [35:05<34:07, 3.56s/it]g-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:19:47,352 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:19:47,352 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:19:49,877 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:19:49,877 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.3024, 'learning_rate': 0.00026579999999999996, 'epoch': 0.44} +[WARNING|modeling_utils.py:388] 2022-03-02 22:19:49,877 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:11:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████ | 447/1019 [35:13<27:06, 2.84s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:19:53,179 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████ | 447/1019 [35:13<27:06, 2.84s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:19:53,179 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▎ | 449/1019 [35:16<21:48, 2.30s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:19:54,969 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▎ | 449/1019 [35:16<21:48, 2.30s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:19:54,969 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▎ | 450/1019 [35:18<20:48, 2.19s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:19:56,506 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▎ | 450/1019 [35:18<20:48, 2.19s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:19:56,506 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▎ | 450/1019 [35:18<20:48, 2.19s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:20:00,965 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▎ | 450/1019 [35:18<20:48, 2.19s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:20:00,965 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▍ | 451/1019 [35:25<32:41, 3.45s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:20:00,965 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▍ | 451/1019 [35:25<32:41, 3.45s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:20:00,965 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▍ | 451/1019 [35:25<32:41, 3.45s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:20:00,965 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▍ | 452/1019 [35:31<39:53, 4.22s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:20:00,965 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▍ | 452/1019 [35:31<39:53, 4.22s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:20:00,965 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▌ | 453/1019 [35:37<44:26, 4.71s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:20:00,965 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▌ | 453/1019 [35:37<44:26, 4.71s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:20:00,965 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5276, 'learning_rate': 0.00027, 'epoch': 0.44} + 45%|███████████████████████████████████▋ | 454/1019 [35:43<47:43, 5.07s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:20:00,965 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|███████████████████████████████████▋ | 454/1019 [35:43<47:43, 5.07s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:20:00,965 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.8693, 'learning_rate': 0.00027059999999999996, 'epoch': 0.45} + 45%|███████████████████████████████████▋ | 454/1019 [35:43<47:43, 5.07s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:20:00,965 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|███████████████████████████████████▋ | 455/1019 [35:48<49:33, 5.27s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:20:00,965 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|███████████████████████████████████▋ | 455/1019 [35:48<49:33, 5.27s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:20:00,965 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|███████████████████████████████████▋ | 455/1019 [35:48<49:33, 5.27s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:20:00,965 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|███████████████████████████████████▊ | 456/1019 [35:54<50:50, 5.42s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:20:00,965 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|███████████████████████████████████▊ | 456/1019 [35:54<50:50, 5.42s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:20:00,965 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|███████████████████████████████████▊ | 456/1019 [35:54<50:50, 5.42s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:20:00,965 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|███████████████████████████████████▉ | 457/1019 [36:00<51:57, 5.55s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:20:00,965 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|███████████████████████████████████▉ | 457/1019 [36:00<51:57, 5.55s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:20:00,965 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|███████████████████████████████████▉ | 457/1019 [36:00<51:57, 5.55s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:20:00,965 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|███████████████████████████████████▉ | 457/1019 [36:00<51:57, 5.55s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:20:00,965 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5308, 'learning_rate': 0.00027299999999999997, 'epoch': 0.45} + 45%|███████████████████████████████████▉ | 457/1019 [36:00<51:57, 5.55s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:20:00,965 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████ | 459/1019 [36:11<52:28, 5.62s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:20:00,965 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████ | 459/1019 [36:11<52:28, 5.62s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:20:00,965 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6579, 'learning_rate': 0.0002736, 'epoch': 0.45} + 45%|████████████████████████████████████ | 459/1019 [36:11<52:28, 5.62s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:20:00,965 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████ | 460/1019 [36:17<52:14, 5.61s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:20:00,965 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████ | 460/1019 [36:17<52:14, 5.61s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:20:00,965 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████ | 460/1019 [36:17<52:14, 5.61s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:20:00,965 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▏ | 461/1019 [36:22<52:00, 5.59s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:20:00,965 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▏ | 461/1019 [36:22<52:00, 5.59s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:20:00,965 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▎ | 462/1019 [36:28<51:45, 5.58s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:20:00,965 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▎ | 462/1019 [36:28<51:45, 5.58s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:20:00,965 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.9032, 'learning_rate': 0.00027539999999999997, 'epoch': 0.45} + 45%|████████████████████████████████████▎ | 463/1019 [36:34<51:29, 5.56s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:20:00,965 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▎ | 463/1019 [36:34<51:29, 5.56s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:20:00,965 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.9936, 'learning_rate': 0.000276, 'epoch': 0.45} + 45%|████████████████████████████████████▎ | 463/1019 [36:34<51:29, 5.56s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:20:00,965 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|████████████████████████████████████▍ | 464/1019 [36:39<51:16, 5.54s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:20:00,965 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|████████████████████████████████████▍ | 464/1019 [36:39<51:16, 5.54s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:20:00,965 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|████████████████████████████████████▍ | 464/1019 [36:39<51:16, 5.54s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:20:00,965 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|████████████████████████████████████▌ | 465/1019 [36:44<50:45, 5.50s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:20:00,965 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|████████████████████████████████████▌ | 465/1019 [36:44<50:45, 5.50s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:20:00,965 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|████████████████████████████████████▌ | 466/1019 [36:50<50:24, 5.47s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:20:00,965 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|████████████████████████████████████▌ | 466/1019 [36:50<50:24, 5.47s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:20:00,965 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.9673, 'learning_rate': 0.0002778, 'epoch': 0.46} + 46%|████████████████████████████████████▌ | 466/1019 [36:50<50:24, 5.47s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:20:00,965 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|████████████████████████████████████▋ | 467/1019 [36:55<50:16, 5.47s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:20:00,965 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|████████████████████████████████████▋ | 467/1019 [36:55<50:16, 5.47s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:20:00,965 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|████████████████████████████████████▋ | 467/1019 [36:55<50:16, 5.47s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:20:00,965 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|████████████████████████████████████▋ | 467/1019 [36:55<50:16, 5.47s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:20:00,965 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5706, 'learning_rate': 0.000279, 'epoch': 0.46} + 46%|████████████████████████████████████▋ | 467/1019 [36:55<50:16, 5.47s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:20:00,965 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|████████████████████████████████████▊ | 469/1019 [37:06<49:37, 5.41s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:20:00,965 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|████████████████████████████████████▊ | 469/1019 [37:06<49:37, 5.41s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:20:00,965 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4252, 'learning_rate': 0.00027959999999999997, 'epoch': 0.46} + 46%|████████████████████████████████████▉ | 470/1019 [37:11<49:19, 5.39s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:20:00,965 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|████████████████████████████████████▉ | 470/1019 [37:11<49:19, 5.39s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:20:00,965 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.623, 'learning_rate': 0.0002802, 'epoch': 0.46} + 46%|████████████████████████████████████▉ | 471/1019 [37:17<48:58, 5.36s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:20:00,965 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|████████████████████████████████████▉ | 471/1019 [37:17<48:58, 5.36s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:20:00,965 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5258, 'learning_rate': 0.0002808, 'epoch': 0.46} + 46%|█████████████████████████████████████ | 472/1019 [37:22<48:19, 5.30s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:20:00,965 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████ | 472/1019 [37:22<48:19, 5.30s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:20:00,965 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.0276, 'learning_rate': 0.00028139999999999996, 'epoch': 0.46} + 46%|█████████████████████████████████████▏ | 473/1019 [37:27<47:56, 5.27s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:20:00,965 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████▏ | 473/1019 [37:27<47:56, 5.27s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:20:00,965 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6158, 'learning_rate': 0.00028199999999999997, 'epoch': 0.46} + 46%|█████████████████████████████████████▏ | 473/1019 [37:27<47:56, 5.27s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:20:00,965 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|█████████████████████████████████████▏ | 474/1019 [37:32<48:16, 5.31s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:20:00,965 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|█████████████████████████████████████▏ | 474/1019 [37:32<48:16, 5.31s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:20:00,965 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|█████████████████████████████████████▎ | 475/1019 [37:38<47:58, 5.29s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:20:00,965 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|█████████████████████████████████████▎ | 475/1019 [37:38<47:58, 5.29s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:20:00,965 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.499, 'learning_rate': 0.00028319999999999994, 'epoch': 0.47} + 47%|█████████████████████████████████████▎ | 476/1019 [37:43<47:24, 5.24s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:20:00,965 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|█████████████████████████████████████▎ | 476/1019 [37:43<47:24, 5.24s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:20:00,965 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.9739, 'learning_rate': 0.00028379999999999996, 'epoch': 0.47} + 47%|█████████████████████████████████████▍ | 477/1019 [37:48<46:44, 5.17s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:20:00,965 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|█████████████████████████████████████▍ | 477/1019 [37:48<46:44, 5.17s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:20:00,965 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4816, 'learning_rate': 0.0002844, 'epoch': 0.47} + 47%|█████████████████████████████████████▍ | 477/1019 [37:48<46:44, 5.17s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:20:00,965 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|█████████████████████████████████████▌ | 478/1019 [37:53<46:02, 5.11s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:20:00,965 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|█████████████████████████████████████▌ | 478/1019 [37:53<46:02, 5.11s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:20:00,965 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|█████████████████████████████████████▌ | 479/1019 [37:58<45:33, 5.06s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:20:00,965 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|█████████████████████████████████████▌ | 479/1019 [37:58<45:33, 5.06s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:20:00,965 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6396, 'learning_rate': 0.00028559999999999995, 'epoch': 0.47} + 47%|█████████████████████████████████████▋ | 480/1019 [38:03<45:09, 5.03s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:20:00,965 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|█████████████████████████████████████▋ | 480/1019 [38:03<45:09, 5.03s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:20:00,965 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5892, 'learning_rate': 0.00028619999999999996, 'epoch': 0.47} + 47%|█████████████████████████████████████▊ | 481/1019 [38:08<44:36, 4.97s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:20:00,965 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|█████████████████████████████████████▊ | 481/1019 [38:08<44:36, 4.97s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:20:00,965 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.7315, 'learning_rate': 0.0002868, 'epoch': 0.47} + 47%|█████████████████████████████████████▊ | 482/1019 [38:12<44:06, 4.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:20:00,965 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|█████████████████████████████████████▊ | 482/1019 [38:12<44:06, 4.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:20:00,965 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6936, 'learning_rate': 0.00028739999999999994, 'epoch': 0.47} + 47%|█████████████████████████████████████▊ | 482/1019 [38:12<44:06, 4.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:20:00,965 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|█████████████████████████████████████▊ | 482/1019 [38:12<44:06, 4.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:20:00,965 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4115, 'learning_rate': 0.00028799999999999995, 'epoch': 0.47} + 47%|█████████████████████████████████████▊ | 482/1019 [38:12<44:06, 4.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:20:00,965 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|█████████████████████████████████████▉ | 484/1019 [38:22<43:35, 4.89s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:20:00,965 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|█████████████████████████████████████▉ | 484/1019 [38:22<43:35, 4.89s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:20:00,965 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.8432, 'learning_rate': 0.00028859999999999997, 'epoch': 0.47} + 48%|██████████████████████████████████████ | 485/1019 [38:27<42:36, 4.79s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:20:00,965 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|██████████████████████████████████████ | 485/1019 [38:27<42:36, 4.79s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:20:00,965 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2421, 'learning_rate': 0.0002892, 'epoch': 0.48} + 48%|██████████████████████████████████████ | 485/1019 [38:27<42:36, 4.79s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:20:00,965 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|██████████████████████████████████████▏ | 486/1019 [38:31<41:58, 4.72s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:20:00,965 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|██████████████████████████████████████▏ | 486/1019 [38:31<41:58, 4.72s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:20:00,965 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|██████████████████████████████████████▏ | 486/1019 [38:31<41:58, 4.72s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:20:00,965 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|██████████████████████████████████████▏ | 487/1019 [38:36<40:59, 4.62s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:20:00,965 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|██████████████████████████████████████▏ | 487/1019 [38:36<40:59, 4.62s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:20:00,965 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|██████████████████████████████████████▎ | 488/1019 [38:40<40:07, 4.53s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:20:00,965 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|██████████████████████████████████████▎ | 488/1019 [38:40<40:07, 4.53s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:20:00,965 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5901, 'learning_rate': 0.00029099999999999997, 'epoch': 0.48} + 48%|██████████████████████████████████████▍ | 489/1019 [38:44<39:10, 4.44s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:20:00,965 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|██████████████████████████████████████▍ | 489/1019 [38:44<39:10, 4.44s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:20:00,965 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.7235, 'learning_rate': 0.0002916, 'epoch': 0.48} + 48%|██████████████████████████████████████▍ | 490/1019 [38:48<38:12, 4.33s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:20:00,965 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|██████████████████████████████████████▍ | 490/1019 [38:48<38:12, 4.33s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:20:00,965 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4504, 'learning_rate': 0.00029219999999999995, 'epoch': 0.48} + 48%|██████████████████████████████████████▌ | 491/1019 [38:52<36:47, 4.18s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:20:00,965 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|██████████████████████████████████████▌ | 491/1019 [38:52<36:47, 4.18s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:20:00,965 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.8153, 'learning_rate': 0.00029279999999999996, 'epoch': 0.48} + 48%|██████████████████████████████████████▌ | 491/1019 [38:52<36:47, 4.18s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:20:00,965 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|██████████████████████████████████████▋ | 492/1019 [38:56<35:12, 4.01s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:20:00,965 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|██████████████████████████████████████▋ | 492/1019 [38:56<35:12, 4.01s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:20:00,965 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|██████████████████████████████████████▋ | 492/1019 [38:56<35:12, 4.01s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:20:00,965 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|██████████████████████████████████████▋ | 493/1019 [38:59<33:16, 3.79s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:23:39,805 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|██████████████████████████████████████▋ | 493/1019 [38:59<33:16, 3.79s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:23:39,805 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|██████████████████████████████████████▊ | 494/1019 [39:02<31:23, 3.59s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:23:39,805 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|██████████████████████████████████████▊ | 494/1019 [39:02<31:23, 3.59s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:23:39,805 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|██████████████████████████████████████▊ | 494/1019 [39:02<31:23, 3.59s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:23:39,805 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 49%|██████████████████████████████████████▊ | 495/1019 [39:05<29:26, 3.37s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:23:45,562 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 49%|██████████████████████████████████████▊ | 495/1019 [39:05<29:26, 3.37s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:23:45,562 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 49%|██████████████████████████████████████▉ | 496/1019 [39:07<27:22, 3.14s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:23:47,989 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 49%|██████████████████████████████████████▉ | 496/1019 [39:07<27:22, 3.14s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:23:47,989 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 49%|███████████████████████████████████████ | 497/1019 [39:10<25:09, 2.89s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:23:50,143 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 49%|███████████████████████████████████████ | 497/1019 [39:10<25:09, 2.89s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:23:50,143 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 49%|███████████████████████████████████████ | 498/1019 [39:12<22:46, 2.62s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:23:52,037 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 49%|███████████████████████████████████████ | 498/1019 [39:12<22:46, 2.62s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:23:52,037 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 49%|███████████████████████████████████████▏ | 499/1019 [39:14<20:26, 2.36s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:23:53,575 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 49%|███████████████████████████████████████▏ | 499/1019 [39:14<20:26, 2.36s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:23:53,575 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 22:23:54,903 >> Num examples = 2642 | 500/1019 [39:16<19:15, 2.23s/it][INFO|trainer.py:560] 2022-03-02 22:23:54,900 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. +[INFO|trainer.py:2366] 2022-03-02 22:23:54,903 >> Num examples = 2642 | 500/1019 [39:16<19:15, 2.23s/it][INFO|trainer.py:560] 2022-03-02 22:23:54,900 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. +[INFO|trainer.py:2366] 2022-03-02 22:23:54,903 >> Num examples = 2642 | 500/1019 [39:16<19:15, 2.23s/it][INFO|trainer.py:560] 2022-03-02 22:23:54,900 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 2%|█▎ | 3/189 [00:06<06:55, 2.24s/it][INFO|trainer.py:560] 2022-03-02 22:23:54,900 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 2%|█▊ | 4/189 [00:09<07:47, 2.53s/it][INFO|trainer.py:560] 2022-03-02 22:23:54,900 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 3%|██▏ | 5/189 [00:12<08:59, 2.93s/it][INFO|trainer.py:560] 2022-03-02 22:23:54,900 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 3%|██▋ | 6/189 [00:17<10:11, 3.34s/it][INFO|trainer.py:560] 2022-03-02 22:23:54,900 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 4%|███ | 7/189 [00:20<10:02, 3.31s/it][INFO|trainer.py:560] 2022-03-02 22:23:54,900 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 4%|███▌ | 8/189 [00:23<09:53, 3.28s/it][INFO|trainer.py:560] 2022-03-02 22:23:54,900 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 5%|███▉ | 9/189 [00:28<11:13, 3.74s/it][INFO|trainer.py:560] 2022-03-02 22:23:54,900 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 5%|████▎ | 10/189 [00:32<11:29, 3.85s/it][INFO|trainer.py:560] 2022-03-02 22:23:54,900 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 6%|████▊ | 11/189 [00:35<10:56, 3.69s/it][INFO|trainer.py:560] 2022-03-02 22:23:54,900 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 6%|█████▏ | 12/189 [00:39<10:54, 3.70s/it][INFO|trainer.py:560] 2022-03-02 22:23:54,900 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. +RuntimeError: CUDA out of memory. Tried to allocate 1.69 GiB (GPU 0; 15.78 GiB total capacity; 9.19 GiB already allocated; 1.65 GiB free; 12.44 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONFare not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. +RuntimeError: CUDA out of memory. Tried to allocate 1.69 GiB (GPU 0; 15.78 GiB total capacity; 9.19 GiB already allocated; 1.65 GiB free; 12.44 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONFare not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. \ No newline at end of file diff --git a/wandb/run-20220302_214437-2u4nhnsf/files/requirements.txt b/wandb/run-20220302_214437-2u4nhnsf/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..3974f97a24952deb24d97850f53367da9e7c347d --- /dev/null +++ b/wandb/run-20220302_214437-2u4nhnsf/files/requirements.txt @@ -0,0 +1,184 @@ +absl-py==1.0.0 +aiohttp==3.8.1 +aiosignal==1.2.0 +anyio==3.5.0 +appdirs==1.4.4 +argon2-cffi-bindings==21.2.0 +argon2-cffi==21.3.0 +asttokens==2.0.5 +async-timeout==4.0.2 +attrs==21.4.0 +audioread==2.1.9 +babel==2.9.1 +backcall==0.2.0 +bitsandbytes-cuda113==0.26.0 +black==22.1.0 +bleach==4.1.0 +cachetools==5.0.0 +certifi==2021.10.8 +cffi==1.15.0 +charset-normalizer==2.0.11 +chex==0.1.0 +click==8.0.3 +clldutils==3.10.1 +colorlog==6.6.0 +csvw==1.11.0 +cycler==0.11.0 +datasets==1.18.3 +debugpy==1.5.1 +decorator==5.1.1 +defusedxml==0.7.1 +dill==0.3.4 +dlinfo==1.2.1 +dm-tree==0.1.6 +docker-pycreds==0.4.0 +entrypoints==0.4 +executing==0.8.2 +filelock==3.4.2 +flatbuffers==2.0 +flax==0.4.0 +fonttools==4.29.1 +frozenlist==1.3.0 +fsspec==2022.1.0 +gitdb==4.0.9 +gitpython==3.1.27 +google-auth-oauthlib==0.4.6 +google-auth==2.6.0 +grpcio==1.43.0 +huggingface-hub==0.4.0 +hypothesis==6.36.1 +idna==3.3 +importlib-metadata==4.10.1 +ipykernel==6.8.0 +ipython-genutils==0.2.0 +ipython==8.0.1 +ipywidgets==7.6.5 +isodate==0.6.1 +jax==0.2.28 +jaxlib==0.1.76+cuda11.cudnn82 +jedi==0.18.1 +jinja2==3.0.3 +jiwer==2.3.0 +joblib==1.1.0 +json5==0.9.6 +jsonschema==4.4.0 +jupyter-client==7.1.2 +jupyter-console==6.4.0 +jupyter-core==4.9.1 +jupyter-server==1.13.5 +jupyter==1.0.0 +jupyterlab-pygments==0.1.2 +jupyterlab-server==2.10.3 +jupyterlab-widgets==1.0.2 +jupyterlab==3.2.9 +kiwisolver==1.3.2 +librosa==0.8.1 +llvmlite==0.38.0 +markdown==3.3.6 +markupsafe==2.0.1 +matplotlib-inline==0.1.3 +matplotlib==3.5.1 +mistune==0.8.4 +msgpack==1.0.3 +multidict==6.0.2 +multiprocess==0.70.12.2 +mypy-extensions==0.4.3 +nbclassic==0.3.5 +nbclient==0.5.10 +nbconvert==6.4.1 +nbformat==5.1.3 +nest-asyncio==1.5.4 +notebook==6.4.8 +numba==0.55.1 +numpy==1.21.5 +oauthlib==3.2.0 +opt-einsum==3.3.0 +optax==0.1.0 +packaging==21.3 +pandas==1.4.0 +pandocfilters==1.5.0 +parso==0.8.3 +pathspec==0.9.0 +pathtools==0.1.2 +pexpect==4.8.0 +phonemizer==3.0.1 +pickleshare==0.7.5 +pillow==9.0.0 +pip==22.0.2 +pkg-resources==0.0.0 +platformdirs==2.4.1 +pooch==1.6.0 +prometheus-client==0.13.1 +promise==2.3 +prompt-toolkit==3.0.26 +protobuf==3.19.4 +psutil==5.9.0 +ptyprocess==0.7.0 +pure-eval==0.2.2 +pyarrow==6.0.1 +pyasn1-modules==0.2.8 +pyasn1==0.4.8 +pycparser==2.21 +pyctcdecode==0.3.0 +pygments==2.11.2 +pygtrie==2.4.2 +pyparsing==3.0.7 +pyrsistent==0.18.1 +python-dateutil==2.8.2 +python-levenshtein==0.12.2 +pytz==2021.3 +pyyaml==6.0 +pyzmq==22.3.0 +qtconsole==5.2.2 +qtpy==2.0.1 +regex==2022.1.18 +requests-oauthlib==1.3.1 +requests==2.27.1 +resampy==0.2.2 +rfc3986==2.0.0 +rsa==4.8 +sacremoses==0.0.47 +scikit-learn==1.0.2 +scipy==1.7.3 +segments==2.2.0 +send2trash==1.8.0 +sentry-sdk==1.5.6 +setuptools==44.1.1 +shortuuid==1.0.8 +six==1.16.0 +smmap==5.0.0 +sniffio==1.2.0 +sortedcontainers==2.4.0 +soundfile==0.10.3.post1 +stack-data==0.1.4 +tabulate==0.8.9 +tensorboard-data-server==0.6.1 +tensorboard-plugin-wit==1.8.1 +tensorboard==2.8.0 +termcolor==1.1.0 +terminado==0.13.1 +testpath==0.5.0 +threadpoolctl==3.1.0 +tokenizers==0.11.4 +tomli==2.0.0 +toolz==0.11.2 +torch==1.10.2+cu113 +torchaudio==0.10.2+cu113 +tornado==6.1 +tqdm==4.62.3 +traitlets==5.1.1 +transformers==4.17.0.dev0 +typing-extensions==3.10.0.2 +uritemplate==4.1.1 +urllib3==1.26.8 +wandb==0.12.10 +wcwidth==0.2.5 +webencodings==0.5.1 +websocket-client==1.2.3 +werkzeug==2.0.2 +wheel==0.37.1 +widgetsnbextension==3.5.2 +xxhash==2.0.2 +yarl==1.7.2 +yaspin==2.1.0 +zipp==3.7.0 \ No newline at end of file diff --git a/wandb/run-20220302_214437-2u4nhnsf/files/wandb-metadata.json b/wandb/run-20220302_214437-2u4nhnsf/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..8db51c9dccaaa7da8bd91342e64871fe0b18b4ab --- /dev/null +++ b/wandb/run-20220302_214437-2u4nhnsf/files/wandb-metadata.json @@ -0,0 +1,60 @@ +{ + "os": "Linux-5.11.0-1028-gcp-x86_64-with-glibc2.33", + "python": "3.9.5", + "heartbeatAt": "2022-03-02T21:44:38.711819", + "startedAt": "2022-03-02T21:44:37.567182", + "docker": null, + "gpu": "Tesla V100-SXM2-16GB", + "gpu_count": 2, + "cpu_count": 16, + "cuda": null, + "args": [ + "--dataset_name=librispeech_asr", + "--model_name_or_path=./", + "--tokenizer_name=./", + "--dataset_config_name=clean", + "--train_split_name=train.100", + "--eval_split_name=validation", + "--output_dir=./", + "--preprocessing_num_workers=1", + "--length_column_name=input_length", + "--overwrite_output_dir", + "--num_train_epochs=1", + "--per_device_train_batch_size=14", + "--per_device_eval_batch_size=14", + "--gradient_accumulation_steps=2", + "--generation_max_length=40", + "--generation_num_beams=1", + "--learning_rate=3e-4", + "--warmup_steps=500", + "--evaluation_strategy=steps", + "--text_column_name=text", + "--save_steps=500", + "--eval_steps=500", + "--logging_steps=1", + "--save_total_limit=1", + "--freeze_feature_encoder", + "--gradient_checkpointing", + "--fp16", + "--group_by_length", + "--predict_with_generate", + "--do_lower_case", + "--do_train", + "--do_eval", + "--report_to=wandb", + "--push_to_hub", + "--use_auth_token" + ], + "state": "running", + "program": "/home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/run_speech_recognition_seq2seq.py", + "codePath": "run_speech_recognition_seq2seq.py", + "git": { + "remote": "https://huggingface.co/sanchit-gandhi/wav2vec2-gpt2-wandb-grid-search", + "commit": "8c7181143c175387040dc1a6ac2ddbc9179b550c" + }, + "email": "sanchit@huggingface.co", + "root": "/home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search", + "host": "sanchit--v100", + "username": "sanchit_huggingface_co", + "executable": "/home/sanchit_huggingface_co/gcp/bin/python" +} diff --git a/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json b/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..51988c39602e3b239967b7b459d29c7da51a81a7 --- /dev/null +++ b/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json @@ -0,0 +1 @@ +{"train/loss": 6.632, "train/learning_rate": 0.0002982, "train/epoch": 0.49, "train/global_step": 500, "_runtime": 2357, "_timestamp": 1646259834, "_step": 499, "gradients/decoder.transformer.ln_f.weight": {"_type": "histogram", "values": [4.0, 2.0, 2.0, 7.0, 11.0, 21.0, 48.0, 206.0, 453.0, 126.0, 65.0, 23.0, 21.0, 7.0, 4.0, 4.0, 8.0, 3.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-165.7322998046875, -145.85516357421875, -125.97804260253906, -106.10090637207031, -86.2237777709961, -66.34664916992188, -46.469512939453125, -26.592391967773438, -6.7152557373046875, 13.161874771118164, 33.039005279541016, 52.9161376953125, 72.79326629638672, 92.67039489746094, 112.54753112792969, 132.42465209960938, 152.30178833007812, 172.17892456054688, 192.05604553222656, 211.9331817626953, 231.810302734375, 251.68743896484375, 271.5645751953125, 291.44171142578125, 311.31884765625, 331.19598388671875, 351.0731201171875, 370.95025634765625, 390.8273620605469, 410.7044982910156, 430.5816345214844, 450.458740234375, 470.33587646484375, 490.2130126953125, 510.09014892578125, 529.96728515625, 549.8444213867188, 569.7215576171875, 589.5986328125, 609.4757690429688, 629.3529052734375, 649.2300415039062, 669.107177734375, 688.9843139648438, 708.8614501953125, 728.738525390625, 748.61572265625, 768.4927978515625, 788.3699951171875, 808.2471313476562, 828.124267578125, 848.0014038085938, 867.8785400390625, 887.755615234375, 907.6328125, 927.5098876953125, 947.3870239257812, 967.26416015625, 987.1412963867188, 1007.0184326171875, 1026.8955078125, 1046.772705078125, 1066.6497802734375, 1086.5269775390625, 1106.404052734375]}, "gradients/decoder.transformer.ln_f.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 1.0, 2.0, 0.0, 3.0, 1.0, 4.0, 5.0, 5.0, 4.0, 9.0, 7.0, 5.0, 12.0, 13.0, 10.0, 15.0, 13.0, 17.0, 21.0, 23.0, 22.0, 28.0, 24.0, 26.0, 43.0, 67.0, 106.0, 124.0, 91.0, 41.0, 32.0, 34.0, 29.0, 24.0, 29.0, 19.0, 13.0, 12.0, 11.0, 15.0, 10.0, 11.0, 4.0, 5.0, 3.0, 3.0, 8.0, 4.0, 4.0, 3.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-919.1222534179688, -892.2859497070312, -865.44970703125, -838.6134033203125, -811.7771606445312, -784.9408569335938, -758.1046142578125, -731.268310546875, -704.4320068359375, -677.595703125, -650.7594604492188, -623.9231567382812, -597.0869140625, -570.2506103515625, -543.414306640625, -516.5780639648438, -489.7417907714844, -462.905517578125, -436.0692443847656, -409.23297119140625, -382.39666748046875, -355.5603942871094, -328.72412109375, -301.8878173828125, -275.05157470703125, -248.21530151367188, -221.37901306152344, -194.54273986816406, -167.70645141601562, -140.87017822265625, -114.03390502929688, -87.19761657714844, -60.361328125, -33.525047302246094, -6.688770294189453, 20.147506713867188, 46.983787536621094, 73.820068359375, 100.65634155273438, 127.49263000488281, 154.3289031982422, 181.16517639160156, 208.00146484375, 234.83773803710938, 261.67401123046875, 288.51031494140625, 315.3465576171875, 342.182861328125, 369.0191345214844, 395.85540771484375, 422.6916809082031, 449.5279541015625, 476.3642578125, 503.2005310058594, 530.0368041992188, 556.8731079101562, 583.7093505859375, 610.545654296875, 637.3818969726562, 664.2182006835938, 691.054443359375, 717.8907470703125, 744.72705078125, 771.5632934570312, 798.3995971679688]}, "gradients/decoder.transformer.h.23.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 1.0, 3.0, 3.0, 1.0, 2.0, 4.0, 6.0, 3.0, 11.0, 7.0, 8.0, 6.0, 14.0, 14.0, 15.0, 8.0, 23.0, 22.0, 28.0, 24.0, 25.0, 33.0, 39.0, 64.0, 94.0, 125.0, 99.0, 54.0, 36.0, 34.0, 29.0, 31.0, 29.0, 18.0, 12.0, 11.0, 16.0, 15.0, 11.0, 5.0, 4.0, 6.0, 2.0, 8.0, 4.0, 5.0, 1.0, 1.0, 1.0, 0.0, 2.0, 1.0, 0.0, 1.0, 1.0], "bins": [-26.484375, -25.708251953125, -24.93212890625, -24.156005859375, -23.3798828125, -22.603759765625, -21.82763671875, -21.051513671875, -20.275390625, -19.499267578125, -18.72314453125, -17.947021484375, -17.1708984375, -16.394775390625, -15.61865234375, -14.842529296875, -14.06640625, -13.290283203125, -12.51416015625, -11.738037109375, -10.9619140625, -10.185791015625, -9.40966796875, -8.633544921875, -7.857421875, -7.081298828125, -6.30517578125, -5.529052734375, -4.7529296875, -3.976806640625, -3.20068359375, -2.424560546875, -1.6484375, -0.872314453125, -0.09619140625, 0.679931640625, 1.4560546875, 2.232177734375, 3.00830078125, 3.784423828125, 4.560546875, 5.336669921875, 6.11279296875, 6.888916015625, 7.6650390625, 8.441162109375, 9.21728515625, 9.993408203125, 10.76953125, 11.545654296875, 12.32177734375, 13.097900390625, 13.8740234375, 14.650146484375, 15.42626953125, 16.202392578125, 16.978515625, 17.754638671875, 18.53076171875, 19.306884765625, 20.0830078125, 20.859130859375, 21.63525390625, 22.411376953125, 23.1875]}, "gradients/decoder.transformer.h.23.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 3.0, 3.0, 2.0, 7.0, 8.0, 7.0, 9.0, 11.0, 21.0, 18.0, 35.0, 38.0, 55.0, 56.0, 82.0, 128.0, 154.0, 203.0, 316.0, 456.0, 715.0, 1075.0, 1847.0, 3663.0, 8482.0, 24284.0, 407340.0, 3681098.0, 42518.0, 11499.0, 4491.0, 2157.0, 1182.0, 729.0, 470.0, 320.0, 195.0, 166.0, 134.0, 73.0, 63.0, 35.0, 24.0, 35.0, 19.0, 23.0, 14.0, 11.0, 3.0, 5.0, 3.0, 3.0, 4.0, 3.0, 1.0, 2.0, 1.0], "bins": [-174.125, -169.033203125, -163.94140625, -158.849609375, -153.7578125, -148.666015625, -143.57421875, -138.482421875, -133.390625, -128.298828125, -123.20703125, -118.115234375, -113.0234375, -107.931640625, -102.83984375, -97.748046875, -92.65625, -87.564453125, -82.47265625, -77.380859375, -72.2890625, -67.197265625, -62.10546875, -57.013671875, -51.921875, -46.830078125, -41.73828125, -36.646484375, -31.5546875, -26.462890625, -21.37109375, -16.279296875, -11.1875, -6.095703125, -1.00390625, 4.087890625, 9.1796875, 14.271484375, 19.36328125, 24.455078125, 29.546875, 34.638671875, 39.73046875, 44.822265625, 49.9140625, 55.005859375, 60.09765625, 65.189453125, 70.28125, 75.373046875, 80.46484375, 85.556640625, 90.6484375, 95.740234375, 100.83203125, 105.923828125, 111.015625, 116.107421875, 121.19921875, 126.291015625, 131.3828125, 136.474609375, 141.56640625, 146.658203125, 151.75]}, "gradients/decoder.transformer.h.23.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 1.0, 2.0, 2.0, 2.0, 3.0, 2.0, 6.0, 4.0, 5.0, 9.0, 15.0, 12.0, 19.0, 21.0, 24.0, 24.0, 29.0, 36.0, 33.0, 43.0, 64.0, 82.0, 132.0, 197.0, 413.0, 1017.0, 878.0, 402.0, 161.0, 107.0, 56.0, 60.0, 39.0, 34.0, 27.0, 19.0, 20.0, 3.0, 16.0, 12.0, 13.0, 9.0, 9.0, 2.0, 8.0, 4.0, 3.0, 2.0, 3.0, 2.0, 0.0, 0.0, 1.0, 3.0], "bins": [-91.5625, -89.01171875, -86.4609375, -83.91015625, -81.359375, -78.80859375, -76.2578125, -73.70703125, -71.15625, -68.60546875, -66.0546875, -63.50390625, -60.953125, -58.40234375, -55.8515625, -53.30078125, -50.75, -48.19921875, -45.6484375, -43.09765625, -40.546875, -37.99609375, -35.4453125, -32.89453125, -30.34375, -27.79296875, -25.2421875, -22.69140625, -20.140625, -17.58984375, -15.0390625, -12.48828125, -9.9375, -7.38671875, -4.8359375, -2.28515625, 0.265625, 2.81640625, 5.3671875, 7.91796875, 10.46875, 13.01953125, 15.5703125, 18.12109375, 20.671875, 23.22265625, 25.7734375, 28.32421875, 30.875, 33.42578125, 35.9765625, 38.52734375, 41.078125, 43.62890625, 46.1796875, 48.73046875, 51.28125, 53.83203125, 56.3828125, 58.93359375, 61.484375, 64.03515625, 66.5859375, 69.13671875, 71.6875]}, "gradients/decoder.transformer.h.23.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 6.0, 3.0, 6.0, 2.0, 10.0, 9.0, 11.0, 17.0, 20.0, 24.0, 33.0, 46.0, 59.0, 64.0, 69.0, 77.0, 111.0, 135.0, 201.0, 368.0, 919.0, 3283.0, 19238.0, 270568.0, 3833671.0, 54853.0, 7303.0, 1623.0, 555.0, 270.0, 157.0, 120.0, 85.0, 67.0, 52.0, 48.0, 48.0, 33.0, 33.0, 25.0, 14.0, 15.0, 7.0, 13.0, 6.0, 4.0, 5.0, 2.0, 1.0, 2.0, 3.0, 0.0, 0.0, 1.0, 1.0], "bins": [-262.0, -254.1171875, -246.234375, -238.3515625, -230.46875, -222.5859375, -214.703125, -206.8203125, -198.9375, -191.0546875, -183.171875, -175.2890625, -167.40625, -159.5234375, -151.640625, -143.7578125, -135.875, -127.9921875, -120.109375, -112.2265625, -104.34375, -96.4609375, -88.578125, -80.6953125, -72.8125, -64.9296875, -57.046875, -49.1640625, -41.28125, -33.3984375, -25.515625, -17.6328125, -9.75, -1.8671875, 6.015625, 13.8984375, 21.78125, 29.6640625, 37.546875, 45.4296875, 53.3125, 61.1953125, 69.078125, 76.9609375, 84.84375, 92.7265625, 100.609375, 108.4921875, 116.375, 124.2578125, 132.140625, 140.0234375, 147.90625, 155.7890625, 163.671875, 171.5546875, 179.4375, 187.3203125, 195.203125, 203.0859375, 210.96875, 218.8515625, 226.734375, 234.6171875, 242.5]}, "gradients/decoder.transformer.h.23.ln_2.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 3.0, 6.0, 13.0, 214.0, 748.0, 29.0, 3.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-401.31201171875, -346.63958740234375, -291.9671936035156, -237.29478454589844, -182.62237548828125, -127.949951171875, -73.27755737304688, -18.60516357421875, 36.0672607421875, 90.73966979980469, 145.41207885742188, 200.08448791503906, 254.75689697265625, 309.4293212890625, 364.1017150878906, 418.77410888671875, 473.446533203125, 528.1189575195312, 582.7913818359375, 637.4637451171875, 692.1361694335938, 746.80859375, 801.48095703125, 856.1533813476562, 910.8258056640625, 965.4982299804688, 1020.170654296875, 1074.843017578125, 1129.515380859375, 1184.1878662109375, 1238.8602294921875, 1293.53271484375, 1348.205078125, 1402.87744140625, 1457.5499267578125, 1512.2222900390625, 1566.894775390625, 1621.567138671875, 1676.239501953125, 1730.911865234375, 1785.5843505859375, 1840.2567138671875, 1894.92919921875, 1949.6015625, 2004.27392578125, 2058.9462890625, 2113.618896484375, 2168.291259765625, 2222.963623046875, 2277.635986328125, 2332.308349609375, 2386.98095703125, 2441.6533203125, 2496.32568359375, 2550.998046875, 2605.67041015625, 2660.3427734375, 2715.01513671875, 2769.6875, 2824.360107421875, 2879.032470703125, 2933.704833984375, 2988.377197265625, 3043.049560546875, 3097.72216796875]}, "gradients/decoder.transformer.h.23.ln_2.bias": {"_type": "histogram", "values": [2.0, 0.0, 2.0, 0.0, 3.0, 2.0, 0.0, 1.0, 2.0, 1.0, 4.0, 7.0, 4.0, 5.0, 13.0, 12.0, 15.0, 16.0, 17.0, 14.0, 30.0, 33.0, 25.0, 34.0, 45.0, 41.0, 44.0, 29.0, 44.0, 48.0, 31.0, 47.0, 45.0, 44.0, 36.0, 40.0, 32.0, 31.0, 23.0, 29.0, 28.0, 30.0, 25.0, 16.0, 10.0, 14.0, 12.0, 8.0, 5.0, 5.0, 3.0, 2.0, 2.0, 2.0, 0.0, 0.0, 2.0, 2.0, 1.0, 1.0, 2.0, 0.0, 1.0, 2.0], "bins": [-349.6394958496094, -338.2491455078125, -326.8587951660156, -315.46844482421875, -304.0780944824219, -292.687744140625, -281.29742431640625, -269.9070739746094, -258.5167236328125, -247.12637329101562, -235.73602294921875, -224.34567260742188, -212.95533752441406, -201.5649871826172, -190.1746368408203, -178.7843017578125, -167.39393615722656, -156.0035858154297, -144.6132354736328, -133.222900390625, -121.83255004882812, -110.44219970703125, -99.05184936523438, -87.66150665283203, -76.27115631103516, -64.88080596923828, -53.49046325683594, -42.10011291503906, -30.709766387939453, -19.319419860839844, -7.929069519042969, 3.461273193359375, 14.85162353515625, 26.24197006225586, 37.63231658935547, 49.022666931152344, 60.41301345825195, 71.80335998535156, 83.19371032714844, 94.58405303955078, 105.97440338134766, 117.36475372314453, 128.75509643554688, 140.14544677734375, 151.53579711914062, 162.9261474609375, 174.31649780273438, 185.7068328857422, 197.09718322753906, 208.48753356933594, 219.8778839111328, 231.26821899414062, 242.6585693359375, 254.04891967773438, 265.43927001953125, 276.8296203613281, 288.219970703125, 299.6103210449219, 311.00067138671875, 322.3910217285156, 333.7813720703125, 345.17169189453125, 356.56207275390625, 367.952392578125, 379.3427429199219]}, "gradients/decoder.transformer.h.23.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 3.0, 3.0, 3.0, 3.0, 3.0, 5.0, 5.0, 6.0, 9.0, 12.0, 10.0, 13.0, 14.0, 13.0, 20.0, 20.0, 30.0, 29.0, 23.0, 46.0, 44.0, 48.0, 82.0, 76.0, 75.0, 67.0, 55.0, 63.0, 45.0, 32.0, 28.0, 22.0, 18.0, 17.0, 10.0, 10.0, 7.0, 11.0, 8.0, 4.0, 5.0, 4.0, 4.0, 5.0, 3.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-28.078125, -27.263671875, -26.44921875, -25.634765625, -24.8203125, -24.005859375, -23.19140625, -22.376953125, -21.5625, -20.748046875, -19.93359375, -19.119140625, -18.3046875, -17.490234375, -16.67578125, -15.861328125, -15.046875, -14.232421875, -13.41796875, -12.603515625, -11.7890625, -10.974609375, -10.16015625, -9.345703125, -8.53125, -7.716796875, -6.90234375, -6.087890625, -5.2734375, -4.458984375, -3.64453125, -2.830078125, -2.015625, -1.201171875, -0.38671875, 0.427734375, 1.2421875, 2.056640625, 2.87109375, 3.685546875, 4.5, 5.314453125, 6.12890625, 6.943359375, 7.7578125, 8.572265625, 9.38671875, 10.201171875, 11.015625, 11.830078125, 12.64453125, 13.458984375, 14.2734375, 15.087890625, 15.90234375, 16.716796875, 17.53125, 18.345703125, 19.16015625, 19.974609375, 20.7890625, 21.603515625, 22.41796875, 23.232421875, 24.046875]}, "gradients/decoder.transformer.h.23.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 3.0, 1.0, 2.0, 6.0, 10.0, 10.0, 16.0, 18.0, 32.0, 38.0, 58.0, 85.0, 127.0, 186.0, 256.0, 379.0, 573.0, 859.0, 1300.0, 1987.0, 3040.0, 4692.0, 7300.0, 11860.0, 19714.0, 36127.0, 73939.0, 197245.0, 446154.0, 120518.0, 52516.0, 27280.0, 15588.0, 9426.0, 5990.0, 3783.0, 2518.0, 1681.0, 1033.0, 706.0, 505.0, 314.0, 222.0, 135.0, 100.0, 70.0, 50.0, 39.0, 29.0, 15.0, 10.0, 9.0, 4.0, 3.0, 3.0, 2.0, 3.0, 3.0], "bins": [-1.0771484375, -1.0457229614257812, -1.0142974853515625, -0.9828720092773438, -0.951446533203125, -0.9200210571289062, -0.8885955810546875, -0.8571701049804688, -0.82574462890625, -0.7943191528320312, -0.7628936767578125, -0.7314682006835938, -0.700042724609375, -0.6686172485351562, -0.6371917724609375, -0.6057662963867188, -0.5743408203125, -0.5429153442382812, -0.5114898681640625, -0.48006439208984375, -0.448638916015625, -0.41721343994140625, -0.3857879638671875, -0.35436248779296875, -0.32293701171875, -0.29151153564453125, -0.2600860595703125, -0.22866058349609375, -0.197235107421875, -0.16580963134765625, -0.1343841552734375, -0.10295867919921875, -0.071533203125, -0.04010772705078125, -0.0086822509765625, 0.02274322509765625, 0.054168701171875, 0.08559417724609375, 0.1170196533203125, 0.14844512939453125, 0.17987060546875, 0.21129608154296875, 0.2427215576171875, 0.27414703369140625, 0.305572509765625, 0.33699798583984375, 0.3684234619140625, 0.39984893798828125, 0.4312744140625, 0.46269989013671875, 0.4941253662109375, 0.5255508422851562, 0.556976318359375, 0.5884017944335938, 0.6198272705078125, 0.6512527465820312, 0.68267822265625, 0.7141036987304688, 0.7455291748046875, 0.7769546508789062, 0.808380126953125, 0.8398056030273438, 0.8712310791015625, 0.9026565551757812, 0.93408203125]}, "gradients/decoder.transformer.h.23.crossattention.c_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 3.0, 0.0, 3.0, 0.0, 4.0, 5.0, 2.0, 5.0, 4.0, 6.0, 3.0, 11.0, 8.0, 15.0, 20.0, 10.0, 31.0, 24.0, 21.0, 23.0, 23.0, 23.0, 27.0, 37.0, 38.0, 33.0, 43.0, 34.0, 44.0, 1064.0, 33.0, 40.0, 37.0, 43.0, 29.0, 33.0, 24.0, 20.0, 19.0, 27.0, 19.0, 17.0, 25.0, 27.0, 14.0, 16.0, 10.0, 10.0, 5.0, 4.0, 5.0, 7.0, 7.0, 2.0, 3.0, 1.0, 2.0, 1.0, 1.0, 1.0], "bins": [-11.8671875, -11.5162353515625, -11.165283203125, -10.8143310546875, -10.46337890625, -10.1124267578125, -9.761474609375, -9.4105224609375, -9.0595703125, -8.7086181640625, -8.357666015625, -8.0067138671875, -7.65576171875, -7.3048095703125, -6.953857421875, -6.6029052734375, -6.251953125, -5.9010009765625, -5.550048828125, -5.1990966796875, -4.84814453125, -4.4971923828125, -4.146240234375, -3.7952880859375, -3.4443359375, -3.0933837890625, -2.742431640625, -2.3914794921875, -2.04052734375, -1.6895751953125, -1.338623046875, -0.9876708984375, -0.63671875, -0.2857666015625, 0.065185546875, 0.4161376953125, 0.76708984375, 1.1180419921875, 1.468994140625, 1.8199462890625, 2.1708984375, 2.5218505859375, 2.872802734375, 3.2237548828125, 3.57470703125, 3.9256591796875, 4.276611328125, 4.6275634765625, 4.978515625, 5.3294677734375, 5.680419921875, 6.0313720703125, 6.38232421875, 6.7332763671875, 7.084228515625, 7.4351806640625, 7.7861328125, 8.1370849609375, 8.488037109375, 8.8389892578125, 9.18994140625, 9.5408935546875, 9.891845703125, 10.2427978515625, 10.59375]}, "gradients/decoder.transformer.h.23.crossattention.c_attn.weight": {"_type": "histogram", "values": [2.0, 2.0, 3.0, 4.0, 6.0, 11.0, 11.0, 23.0, 24.0, 42.0, 77.0, 87.0, 114.0, 140.0, 239.0, 326.0, 448.0, 640.0, 959.0, 1384.0, 2031.0, 2996.0, 4145.0, 6323.0, 9218.0, 13765.0, 20089.0, 29807.0, 44978.0, 72009.0, 126919.0, 1352300.0, 163740.0, 85476.0, 52719.0, 34147.0, 22826.0, 15752.0, 10620.0, 7122.0, 4794.0, 3310.0, 2277.0, 1617.0, 1118.0, 759.0, 533.0, 357.0, 262.0, 173.0, 121.0, 87.0, 68.0, 47.0, 34.0, 24.0, 13.0, 12.0, 9.0, 7.0, 0.0, 1.0, 2.0, 3.0], "bins": [-0.475830078125, -0.4608039855957031, -0.44577789306640625, -0.4307518005371094, -0.4157257080078125, -0.4006996154785156, -0.38567352294921875, -0.3706474304199219, -0.355621337890625, -0.3405952453613281, -0.32556915283203125, -0.3105430603027344, -0.2955169677734375, -0.2804908752441406, -0.26546478271484375, -0.2504386901855469, -0.23541259765625, -0.22038650512695312, -0.20536041259765625, -0.19033432006835938, -0.1753082275390625, -0.16028213500976562, -0.14525604248046875, -0.13022994995117188, -0.115203857421875, -0.10017776489257812, -0.08515167236328125, -0.07012557983398438, -0.0550994873046875, -0.040073394775390625, -0.02504730224609375, -0.010021209716796875, 0.0050048828125, 0.020030975341796875, 0.03505706787109375, 0.050083160400390625, 0.0651092529296875, 0.08013534545898438, 0.09516143798828125, 0.11018753051757812, 0.125213623046875, 0.14023971557617188, 0.15526580810546875, 0.17029190063476562, 0.1853179931640625, 0.20034408569335938, 0.21537017822265625, 0.23039627075195312, 0.24542236328125, 0.2604484558105469, 0.27547454833984375, 0.2905006408691406, 0.3055267333984375, 0.3205528259277344, 0.33557891845703125, 0.3506050109863281, 0.365631103515625, 0.3806571960449219, 0.39568328857421875, 0.4107093811035156, 0.4257354736328125, 0.4407615661621094, 0.45578765869140625, 0.4708137512207031, 0.48583984375]}, "gradients/decoder.transformer.h.23.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 3.0, 1.0, 2.0, 4.0, 3.0, 9.0, 2.0, 2.0, 4.0, 5.0, 8.0, 9.0, 15.0, 10.0, 12.0, 9.0, 16.0, 26.0, 37.0, 36.0, 62.0, 82.0, 130.0, 164.0, 77.0, 69.0, 42.0, 33.0, 25.0, 25.0, 23.0, 15.0, 9.0, 9.0, 5.0, 6.0, 5.0, 7.0, 2.0, 5.0, 3.0, 1.0, 1.0, 2.0, 2.0, 1.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.594160079956055e-05, -3.456044942140579e-05, -3.317929804325104e-05, -3.179814666509628e-05, -3.041699528694153e-05, -2.9035843908786774e-05, -2.765469253063202e-05, -2.6273541152477264e-05, -2.489238977432251e-05, -2.3511238396167755e-05, -2.2130087018013e-05, -2.0748935639858246e-05, -1.936778426170349e-05, -1.7986632883548737e-05, -1.6605481505393982e-05, -1.5224330127239227e-05, -1.3843178749084473e-05, -1.2462027370929718e-05, -1.1080875992774963e-05, -9.699724614620209e-06, -8.318573236465454e-06, -6.9374218583106995e-06, -5.556270480155945e-06, -4.17511910200119e-06, -2.7939677238464355e-06, -1.412816345691681e-06, -3.166496753692627e-08, 1.3494864106178284e-06, 2.730637788772583e-06, 4.111789166927338e-06, 5.492940545082092e-06, 6.874091923236847e-06, 8.255243301391602e-06, 9.636394679546356e-06, 1.101754605770111e-05, 1.2398697435855865e-05, 1.377984881401062e-05, 1.5161000192165375e-05, 1.654215157032013e-05, 1.7923302948474884e-05, 1.930445432662964e-05, 2.0685605704784393e-05, 2.2066757082939148e-05, 2.3447908461093903e-05, 2.4829059839248657e-05, 2.6210211217403412e-05, 2.7591362595558167e-05, 2.897251397371292e-05, 3.0353665351867676e-05, 3.173481673002243e-05, 3.3115968108177185e-05, 3.449711948633194e-05, 3.5878270864486694e-05, 3.725942224264145e-05, 3.8640573620796204e-05, 4.002172499895096e-05, 4.140287637710571e-05, 4.278402775526047e-05, 4.416517913341522e-05, 4.554633051156998e-05, 4.692748188972473e-05, 4.8308633267879486e-05, 4.968978464603424e-05, 5.1070936024188995e-05, 5.245208740234375e-05]}, "gradients/decoder.transformer.h.23.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 4.0, 3.0, 6.0, 9.0, 13.0, 12.0, 15.0, 9.0, 11.0, 29.0, 24.0, 40.0, 46.0, 50.0, 53.0, 103.0, 134.0, 175.0, 349.0, 960.0, 1021326.0, 23734.0, 525.0, 280.0, 169.0, 99.0, 86.0, 71.0, 49.0, 35.0, 30.0, 23.0, 16.0, 17.0, 18.0, 10.0, 8.0, 8.0, 5.0, 4.0, 4.0, 3.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.0008249282836914062, -0.0007972493767738342, -0.0007695704698562622, -0.0007418915629386902, -0.0007142126560211182, -0.0006865337491035461, -0.0006588548421859741, -0.0006311759352684021, -0.0006034970283508301, -0.0005758181214332581, -0.000548139214515686, -0.000520460307598114, -0.000492781400680542, -0.00046510249376296997, -0.00043742358684539795, -0.00040974467992782593, -0.0003820657730102539, -0.0003543868660926819, -0.00032670795917510986, -0.00029902905225753784, -0.0002713501453399658, -0.0002436712384223938, -0.00021599233150482178, -0.00018831342458724976, -0.00016063451766967773, -0.0001329556107521057, -0.00010527670383453369, -7.759779691696167e-05, -4.991888999938965e-05, -2.2239983081817627e-05, 5.4389238357543945e-06, 3.3117830753326416e-05, 6.079673767089844e-05, 8.847564458847046e-05, 0.00011615455150604248, 0.0001438334584236145, 0.00017151236534118652, 0.00019919127225875854, 0.00022687017917633057, 0.0002545490860939026, 0.0002822279930114746, 0.00030990689992904663, 0.00033758580684661865, 0.0003652647137641907, 0.0003929436206817627, 0.0004206225275993347, 0.00044830143451690674, 0.00047598034143447876, 0.0005036592483520508, 0.0005313381552696228, 0.0005590170621871948, 0.0005866959691047668, 0.0006143748760223389, 0.0006420537829399109, 0.0006697326898574829, 0.0006974115967750549, 0.000725090503692627, 0.000752769410610199, 0.000780448317527771, 0.000808127224445343, 0.000835806131362915, 0.0008634850382804871, 0.0008911639451980591, 0.0009188428521156311, 0.0009465217590332031]}, "gradients/decoder.transformer.h.23.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 6.0, 17.0, 68.0, 340.0, 489.0, 69.0, 21.0, 3.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-7.718855340499431e-05, -7.561613892903551e-05, -7.404372445307672e-05, -7.247131725307554e-05, -7.089890277711675e-05, -6.932648830115795e-05, -6.775407382519916e-05, -6.618165934924036e-05, -6.460925214923918e-05, -6.303683767328039e-05, -6.14644231973216e-05, -5.9892012359341606e-05, -5.831960152136162e-05, -5.6747187045402825e-05, -5.517477256944403e-05, -5.360236173146404e-05, -5.202994725550525e-05, -5.0457532779546455e-05, -4.888512194156647e-05, -4.731270746560767e-05, -4.5740296627627686e-05, -4.416788215166889e-05, -4.2595471313688904e-05, -4.102305683773011e-05, -3.9450642361771315e-05, -3.787822788581252e-05, -3.6305817047832534e-05, -3.473340257187374e-05, -3.316099173389375e-05, -3.158857725793496e-05, -3.0016164600965567e-05, -2.8443751943996176e-05, -2.6871339287026785e-05, -2.5298926630057395e-05, -2.3726513973088004e-05, -2.2154101316118613e-05, -2.058168684015982e-05, -1.900927600217983e-05, -1.7436861526221037e-05, -1.5864448869251646e-05, -1.4292036212282255e-05, -1.2719623555312864e-05, -1.1147210898343474e-05, -9.574797331879381e-06, -8.00238467490999e-06, -6.4299720179405995e-06, -4.857558451476507e-06, -3.285145794507116e-06, -1.7127331375377253e-06, -1.4032025319465902e-07, 1.4320926311484072e-06, 3.004505742865149e-06, 4.57691839983454e-06, 6.1493310568039306e-06, 7.721744623268023e-06, 9.294157280237414e-06, 1.0866569937206805e-05, 1.2438982594176196e-05, 1.4011395251145586e-05, 1.558380972710438e-05, 1.7156220565084368e-05, 1.8728635041043162e-05, 2.0301047698012553e-05, 2.1873460354981944e-05, 2.3445873011951335e-05]}, "gradients/decoder.transformer.h.23.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 3.0, 0.0, 2.0, 1.0, 1.0, 1.0, 6.0, 1.0, 4.0, 7.0, 9.0, 8.0, 14.0, 11.0, 7.0, 16.0, 16.0, 24.0, 32.0, 26.0, 32.0, 38.0, 33.0, 44.0, 44.0, 55.0, 49.0, 36.0, 48.0, 32.0, 51.0, 50.0, 37.0, 33.0, 44.0, 34.0, 23.0, 31.0, 22.0, 18.0, 16.0, 18.0, 11.0, 3.0, 5.0, 6.0, 4.0, 2.0, 4.0, 3.0, 2.0, 2.0, 0.0, 2.0, 0.0, 0.0, 1.0], "bins": [-2.1159648895263672e-05, -2.0544975996017456e-05, -1.993030309677124e-05, -1.9315630197525024e-05, -1.870095729827881e-05, -1.8086284399032593e-05, -1.7471611499786377e-05, -1.685693860054016e-05, -1.6242265701293945e-05, -1.562759280204773e-05, -1.5012919902801514e-05, -1.4398247003555298e-05, -1.3783574104309082e-05, -1.3168901205062866e-05, -1.255422830581665e-05, -1.1939555406570435e-05, -1.1324882507324219e-05, -1.0710209608078003e-05, -1.0095536708831787e-05, -9.480863809585571e-06, -8.866190910339355e-06, -8.25151801109314e-06, -7.636845111846924e-06, -7.022172212600708e-06, -6.407499313354492e-06, -5.792826414108276e-06, -5.1781535148620605e-06, -4.563480615615845e-06, -3.948807716369629e-06, -3.334134817123413e-06, -2.7194619178771973e-06, -2.1047890186309814e-06, -1.4901161193847656e-06, -8.754432201385498e-07, -2.60770320892334e-07, 3.5390257835388184e-07, 9.685754776000977e-07, 1.5832483768463135e-06, 2.1979212760925293e-06, 2.812594175338745e-06, 3.427267074584961e-06, 4.041939973831177e-06, 4.656612873077393e-06, 5.271285772323608e-06, 5.885958671569824e-06, 6.50063157081604e-06, 7.115304470062256e-06, 7.729977369308472e-06, 8.344650268554688e-06, 8.959323167800903e-06, 9.573996067047119e-06, 1.0188668966293335e-05, 1.080334186553955e-05, 1.1418014764785767e-05, 1.2032687664031982e-05, 1.2647360563278198e-05, 1.3262033462524414e-05, 1.387670636177063e-05, 1.4491379261016846e-05, 1.5106052160263062e-05, 1.5720725059509277e-05, 1.6335397958755493e-05, 1.695007085800171e-05, 1.7564743757247925e-05, 1.817941665649414e-05]}, "gradients/decoder.transformer.h.23.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 3.0, 3.0, 3.0, 3.0, 3.0, 5.0, 5.0, 6.0, 9.0, 12.0, 10.0, 13.0, 14.0, 13.0, 20.0, 20.0, 30.0, 29.0, 23.0, 46.0, 44.0, 48.0, 82.0, 76.0, 75.0, 67.0, 55.0, 63.0, 45.0, 32.0, 28.0, 22.0, 18.0, 17.0, 10.0, 10.0, 7.0, 11.0, 8.0, 4.0, 5.0, 4.0, 4.0, 5.0, 3.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-28.078125, -27.263671875, -26.44921875, -25.634765625, -24.8203125, -24.005859375, -23.19140625, -22.376953125, -21.5625, -20.748046875, -19.93359375, -19.119140625, -18.3046875, -17.490234375, -16.67578125, -15.861328125, -15.046875, -14.232421875, -13.41796875, -12.603515625, -11.7890625, -10.974609375, -10.16015625, -9.345703125, -8.53125, -7.716796875, -6.90234375, -6.087890625, -5.2734375, -4.458984375, -3.64453125, -2.830078125, -2.015625, -1.201171875, -0.38671875, 0.427734375, 1.2421875, 2.056640625, 2.87109375, 3.685546875, 4.5, 5.314453125, 6.12890625, 6.943359375, 7.7578125, 8.572265625, 9.38671875, 10.201171875, 11.015625, 11.830078125, 12.64453125, 13.458984375, 14.2734375, 15.087890625, 15.90234375, 16.716796875, 17.53125, 18.345703125, 19.16015625, 19.974609375, 20.7890625, 21.603515625, 22.41796875, 23.232421875, 24.046875]}, "gradients/decoder.transformer.h.23.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 2.0, 1.0, 1.0, 2.0, 3.0, 1.0, 1.0, 5.0, 4.0, 12.0, 10.0, 17.0, 19.0, 23.0, 28.0, 45.0, 60.0, 74.0, 111.0, 144.0, 249.0, 345.0, 455.0, 692.0, 1119.0, 1711.0, 2874.0, 5180.0, 10881.0, 29415.0, 133848.0, 728061.0, 88757.0, 23246.0, 9248.0, 4606.0, 2543.0, 1627.0, 1038.0, 647.0, 427.0, 318.0, 201.0, 164.0, 96.0, 60.0, 55.0, 36.0, 36.0, 28.0, 12.0, 13.0, 8.0, 3.0, 4.0, 2.0, 2.0, 3.0, 0.0, 0.0, 1.0], "bins": [-104.5625, -101.5224609375, -98.482421875, -95.4423828125, -92.40234375, -89.3623046875, -86.322265625, -83.2822265625, -80.2421875, -77.2021484375, -74.162109375, -71.1220703125, -68.08203125, -65.0419921875, -62.001953125, -58.9619140625, -55.921875, -52.8818359375, -49.841796875, -46.8017578125, -43.76171875, -40.7216796875, -37.681640625, -34.6416015625, -31.6015625, -28.5615234375, -25.521484375, -22.4814453125, -19.44140625, -16.4013671875, -13.361328125, -10.3212890625, -7.28125, -4.2412109375, -1.201171875, 1.8388671875, 4.87890625, 7.9189453125, 10.958984375, 13.9990234375, 17.0390625, 20.0791015625, 23.119140625, 26.1591796875, 29.19921875, 32.2392578125, 35.279296875, 38.3193359375, 41.359375, 44.3994140625, 47.439453125, 50.4794921875, 53.51953125, 56.5595703125, 59.599609375, 62.6396484375, 65.6796875, 68.7197265625, 71.759765625, 74.7998046875, 77.83984375, 80.8798828125, 83.919921875, 86.9599609375, 90.0]}, "gradients/decoder.transformer.h.23.attn.c_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 3.0, 3.0, 2.0, 2.0, 2.0, 6.0, 7.0, 11.0, 10.0, 11.0, 13.0, 14.0, 24.0, 30.0, 30.0, 30.0, 44.0, 33.0, 49.0, 54.0, 59.0, 94.0, 221.0, 1651.0, 153.0, 59.0, 61.0, 43.0, 36.0, 34.0, 32.0, 37.0, 36.0, 28.0, 28.0, 23.0, 8.0, 11.0, 15.0, 16.0, 4.0, 9.0, 9.0, 1.0, 5.0, 5.0, 1.0, 2.0, 1.0, 0.0, 2.0, 2.0], "bins": [-86.0, -83.5673828125, -81.134765625, -78.7021484375, -76.26953125, -73.8369140625, -71.404296875, -68.9716796875, -66.5390625, -64.1064453125, -61.673828125, -59.2412109375, -56.80859375, -54.3759765625, -51.943359375, -49.5107421875, -47.078125, -44.6455078125, -42.212890625, -39.7802734375, -37.34765625, -34.9150390625, -32.482421875, -30.0498046875, -27.6171875, -25.1845703125, -22.751953125, -20.3193359375, -17.88671875, -15.4541015625, -13.021484375, -10.5888671875, -8.15625, -5.7236328125, -3.291015625, -0.8583984375, 1.57421875, 4.0068359375, 6.439453125, 8.8720703125, 11.3046875, 13.7373046875, 16.169921875, 18.6025390625, 21.03515625, 23.4677734375, 25.900390625, 28.3330078125, 30.765625, 33.1982421875, 35.630859375, 38.0634765625, 40.49609375, 42.9287109375, 45.361328125, 47.7939453125, 50.2265625, 52.6591796875, 55.091796875, 57.5244140625, 59.95703125, 62.3896484375, 64.822265625, 67.2548828125, 69.6875]}, "gradients/decoder.transformer.h.23.attn.c_attn.weight": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 1.0, 1.0, 1.0, 5.0, 2.0, 2.0, 4.0, 7.0, 6.0, 16.0, 16.0, 9.0, 7.0, 18.0, 23.0, 28.0, 28.0, 31.0, 39.0, 45.0, 71.0, 157.0, 321.0, 1366.0, 30238.0, 3100125.0, 11588.0, 900.0, 239.0, 111.0, 70.0, 42.0, 38.0, 27.0, 29.0, 19.0, 24.0, 10.0, 10.0, 10.0, 8.0, 9.0, 4.0, 5.0, 1.0, 0.0, 2.0, 3.0, 2.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0], "bins": [-340.25, -328.265625, -316.28125, -304.296875, -292.3125, -280.328125, -268.34375, -256.359375, -244.375, -232.390625, -220.40625, -208.421875, -196.4375, -184.453125, -172.46875, -160.484375, -148.5, -136.515625, -124.53125, -112.546875, -100.5625, -88.578125, -76.59375, -64.609375, -52.625, -40.640625, -28.65625, -16.671875, -4.6875, 7.296875, 19.28125, 31.265625, 43.25, 55.234375, 67.21875, 79.203125, 91.1875, 103.171875, 115.15625, 127.140625, 139.125, 151.109375, 163.09375, 175.078125, 187.0625, 199.046875, 211.03125, 223.015625, 235.0, 246.984375, 258.96875, 270.953125, 282.9375, 294.921875, 306.90625, 318.890625, 330.875, 342.859375, 354.84375, 366.828125, 378.8125, 390.796875, 402.78125, 414.765625, 426.75]}, "gradients/decoder.transformer.h.23.ln_1.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 14.0, 208.0, 707.0, 78.0, 8.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-808.6544189453125, -781.9524536132812, -755.25048828125, -728.5485229492188, -701.8465576171875, -675.1445922851562, -648.442626953125, -621.7406616210938, -595.0386962890625, -568.3367309570312, -541.634765625, -514.9328002929688, -488.2308349609375, -461.52886962890625, -434.826904296875, -408.12493896484375, -381.4229736328125, -354.72100830078125, -328.01904296875, -301.31707763671875, -274.6151123046875, -247.91314697265625, -221.211181640625, -194.50921630859375, -167.8072509765625, -141.10528564453125, -114.4033203125, -87.70135498046875, -60.9993896484375, -34.29742431640625, -7.595458984375, 19.10650634765625, 45.80841064453125, 72.5103759765625, 99.21234130859375, 125.914306640625, 152.61627197265625, 179.3182373046875, 206.02020263671875, 232.72216796875, 259.42413330078125, 286.1260986328125, 312.82806396484375, 339.530029296875, 366.23199462890625, 392.9339599609375, 419.63592529296875, 446.337890625, 473.03985595703125, 499.7418212890625, 526.4437866210938, 553.145751953125, 579.8477172851562, 606.5496826171875, 633.2516479492188, 659.95361328125, 686.6555786132812, 713.3575439453125, 740.0595092773438, 766.761474609375, 793.4634399414062, 820.1654052734375, 846.8673706054688, 873.5693359375, 900.2713012695312]}, "gradients/decoder.transformer.h.23.ln_1.bias": {"_type": "histogram", "values": [1.0, 2.0, 1.0, 3.0, 1.0, 5.0, 3.0, 4.0, 4.0, 5.0, 5.0, 7.0, 6.0, 5.0, 14.0, 7.0, 13.0, 16.0, 10.0, 21.0, 20.0, 25.0, 29.0, 30.0, 29.0, 18.0, 35.0, 22.0, 21.0, 33.0, 39.0, 39.0, 34.0, 34.0, 30.0, 26.0, 39.0, 37.0, 15.0, 28.0, 29.0, 24.0, 31.0, 24.0, 27.0, 20.0, 17.0, 16.0, 22.0, 14.0, 9.0, 13.0, 10.0, 8.0, 10.0, 4.0, 10.0, 2.0, 4.0, 3.0, 2.0, 4.0, 1.0, 4.0], "bins": [-201.40931701660156, -195.08128356933594, -188.75326538085938, -182.42523193359375, -176.09719848632812, -169.76918029785156, -163.44114685058594, -157.11312866210938, -150.78509521484375, -144.45706176757812, -138.12904357910156, -131.80101013183594, -125.47298431396484, -119.14495849609375, -112.81692504882812, -106.48889923095703, -100.16087341308594, -93.83284759521484, -87.50482177734375, -81.17678833007812, -74.84876251220703, -68.52073669433594, -62.19270706176758, -55.86467742919922, -49.536651611328125, -43.20862579345703, -36.88059616088867, -30.552568435668945, -24.22454071044922, -17.896512985229492, -11.568485260009766, -5.240455627441406, 1.087554931640625, 7.415582656860352, 13.743610382080078, 20.071638107299805, 26.39966583251953, 32.727691650390625, 39.055721282958984, 45.383750915527344, 51.71177673339844, 58.03980255126953, 64.36782836914062, 70.69586181640625, 77.02388763427734, 83.35191345214844, 89.67994689941406, 96.00797271728516, 102.33599853515625, 108.66402435302734, 114.99205017089844, 121.32008361816406, 127.64810943603516, 133.97613525390625, 140.30416870117188, 146.6322021484375, 152.96022033691406, 159.2882537841797, 165.61627197265625, 171.94430541992188, 178.2723388671875, 184.60035705566406, 190.9283905029297, 197.25640869140625, 203.58444213867188]}, "gradients/decoder.transformer.h.22.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 1.0, 3.0, 4.0, 1.0, 2.0, 7.0, 7.0, 2.0, 11.0, 8.0, 12.0, 9.0, 14.0, 13.0, 18.0, 9.0, 32.0, 26.0, 26.0, 45.0, 35.0, 51.0, 54.0, 59.0, 91.0, 51.0, 65.0, 67.0, 58.0, 42.0, 32.0, 25.0, 21.0, 15.0, 15.0, 15.0, 10.0, 7.0, 15.0, 7.0, 6.0, 4.0, 4.0, 1.0, 3.0, 6.0, 2.0, 0.0, 1.0, 2.0, 1.0, 1.0], "bins": [-29.78125, -28.9599609375, -28.138671875, -27.3173828125, -26.49609375, -25.6748046875, -24.853515625, -24.0322265625, -23.2109375, -22.3896484375, -21.568359375, -20.7470703125, -19.92578125, -19.1044921875, -18.283203125, -17.4619140625, -16.640625, -15.8193359375, -14.998046875, -14.1767578125, -13.35546875, -12.5341796875, -11.712890625, -10.8916015625, -10.0703125, -9.2490234375, -8.427734375, -7.6064453125, -6.78515625, -5.9638671875, -5.142578125, -4.3212890625, -3.5, -2.6787109375, -1.857421875, -1.0361328125, -0.21484375, 0.6064453125, 1.427734375, 2.2490234375, 3.0703125, 3.8916015625, 4.712890625, 5.5341796875, 6.35546875, 7.1767578125, 7.998046875, 8.8193359375, 9.640625, 10.4619140625, 11.283203125, 12.1044921875, 12.92578125, 13.7470703125, 14.568359375, 15.3896484375, 16.2109375, 17.0322265625, 17.853515625, 18.6748046875, 19.49609375, 20.3173828125, 21.138671875, 21.9599609375, 22.78125]}, "gradients/decoder.transformer.h.22.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 1.0, 2.0, 1.0, 6.0, 4.0, 8.0, 3.0, 7.0, 15.0, 19.0, 18.0, 24.0, 30.0, 43.0, 46.0, 69.0, 89.0, 110.0, 173.0, 271.0, 401.0, 696.0, 1317.0, 2404.0, 4990.0, 11174.0, 40646.0, 3911420.0, 193224.0, 15440.0, 5751.0, 2633.0, 1346.0, 698.0, 382.0, 254.0, 151.0, 99.0, 85.0, 66.0, 29.0, 36.0, 26.0, 24.0, 16.0, 16.0, 8.0, 7.0, 6.0, 3.0, 6.0, 1.0, 2.0, 0.0, 2.0], "bins": [-166.875, -162.3232421875, -157.771484375, -153.2197265625, -148.66796875, -144.1162109375, -139.564453125, -135.0126953125, -130.4609375, -125.9091796875, -121.357421875, -116.8056640625, -112.25390625, -107.7021484375, -103.150390625, -98.5986328125, -94.046875, -89.4951171875, -84.943359375, -80.3916015625, -75.83984375, -71.2880859375, -66.736328125, -62.1845703125, -57.6328125, -53.0810546875, -48.529296875, -43.9775390625, -39.42578125, -34.8740234375, -30.322265625, -25.7705078125, -21.21875, -16.6669921875, -12.115234375, -7.5634765625, -3.01171875, 1.5400390625, 6.091796875, 10.6435546875, 15.1953125, 19.7470703125, 24.298828125, 28.8505859375, 33.40234375, 37.9541015625, 42.505859375, 47.0576171875, 51.609375, 56.1611328125, 60.712890625, 65.2646484375, 69.81640625, 74.3681640625, 78.919921875, 83.4716796875, 88.0234375, 92.5751953125, 97.126953125, 101.6787109375, 106.23046875, 110.7822265625, 115.333984375, 119.8857421875, 124.4375]}, "gradients/decoder.transformer.h.22.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 3.0, 1.0, 0.0, 0.0, 1.0, 3.0, 2.0, 4.0, 3.0, 9.0, 7.0, 15.0, 11.0, 22.0, 25.0, 35.0, 43.0, 51.0, 122.0, 478.0, 1609.0, 1055.0, 277.0, 92.0, 50.0, 33.0, 37.0, 26.0, 16.0, 19.0, 11.0, 8.0, 4.0, 5.0, 3.0, 2.0, 1.0, 0.0, 2.0, 1.0, 0.0, 3.0, 0.0, 2.0, 3.0], "bins": [-142.375, -138.794921875, -135.21484375, -131.634765625, -128.0546875, -124.474609375, -120.89453125, -117.314453125, -113.734375, -110.154296875, -106.57421875, -102.994140625, -99.4140625, -95.833984375, -92.25390625, -88.673828125, -85.09375, -81.513671875, -77.93359375, -74.353515625, -70.7734375, -67.193359375, -63.61328125, -60.033203125, -56.453125, -52.873046875, -49.29296875, -45.712890625, -42.1328125, -38.552734375, -34.97265625, -31.392578125, -27.8125, -24.232421875, -20.65234375, -17.072265625, -13.4921875, -9.912109375, -6.33203125, -2.751953125, 0.828125, 4.408203125, 7.98828125, 11.568359375, 15.1484375, 18.728515625, 22.30859375, 25.888671875, 29.46875, 33.048828125, 36.62890625, 40.208984375, 43.7890625, 47.369140625, 50.94921875, 54.529296875, 58.109375, 61.689453125, 65.26953125, 68.849609375, 72.4296875, 76.009765625, 79.58984375, 83.169921875, 86.75]}, "gradients/decoder.transformer.h.22.mlp.c_fc.weight": {"_type": "histogram", "values": [2.0, 1.0, 2.0, 1.0, 3.0, 7.0, 1.0, 11.0, 7.0, 20.0, 20.0, 26.0, 35.0, 38.0, 62.0, 125.0, 182.0, 303.0, 432.0, 803.0, 1457.0, 2810.0, 6005.0, 14534.0, 40965.0, 225194.0, 3713759.0, 132644.0, 32270.0, 11858.0, 5093.0, 2512.0, 1274.0, 705.0, 395.0, 264.0, 158.0, 96.0, 80.0, 43.0, 37.0, 21.0, 12.0, 12.0, 6.0, 7.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-127.75, -122.91015625, -118.0703125, -113.23046875, -108.390625, -103.55078125, -98.7109375, -93.87109375, -89.03125, -84.19140625, -79.3515625, -74.51171875, -69.671875, -64.83203125, -59.9921875, -55.15234375, -50.3125, -45.47265625, -40.6328125, -35.79296875, -30.953125, -26.11328125, -21.2734375, -16.43359375, -11.59375, -6.75390625, -1.9140625, 2.92578125, 7.765625, 12.60546875, 17.4453125, 22.28515625, 27.125, 31.96484375, 36.8046875, 41.64453125, 46.484375, 51.32421875, 56.1640625, 61.00390625, 65.84375, 70.68359375, 75.5234375, 80.36328125, 85.203125, 90.04296875, 94.8828125, 99.72265625, 104.5625, 109.40234375, 114.2421875, 119.08203125, 123.921875, 128.76171875, 133.6015625, 138.44140625, 143.28125, 148.12109375, 152.9609375, 157.80078125, 162.640625, 167.48046875, 172.3203125, 177.16015625, 182.0]}, "gradients/decoder.transformer.h.22.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 6.0, 18.0, 58.0, 284.0, 502.0, 106.0, 26.0, 9.0, 5.0, 3.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-465.0841979980469, -432.31146240234375, -399.53875732421875, -366.7660217285156, -333.9932861328125, -301.2205505371094, -268.44781494140625, -235.67510986328125, -202.90237426757812, -170.129638671875, -137.35691833496094, -104.58419036865234, -71.81146240234375, -39.038726806640625, -6.2660064697265625, 26.5067138671875, 59.279449462890625, 92.05217742919922, 124.82490539550781, 157.59762573242188, 190.370361328125, 223.14309692382812, 255.9158172607422, 288.68853759765625, 321.4612731933594, 354.2340087890625, 387.0067138671875, 419.7794494628906, 452.55218505859375, 485.3249206542969, 518.09765625, 550.870361328125, 583.6431884765625, 616.4158935546875, 649.1886596679688, 681.9613647460938, 714.734130859375, 747.5068359375, 780.279541015625, 813.05224609375, 845.8250122070312, 878.5977172851562, 911.3704833984375, 944.1431884765625, 976.9158935546875, 1009.6886596679688, 1042.46142578125, 1075.234130859375, 1108.0068359375, 1140.779541015625, 1173.55224609375, 1206.3250732421875, 1239.0977783203125, 1271.8704833984375, 1304.6431884765625, 1337.4158935546875, 1370.188720703125, 1402.96142578125, 1435.734130859375, 1468.5069580078125, 1501.2796630859375, 1534.0523681640625, 1566.8250732421875, 1599.5977783203125, 1632.3704833984375]}, "gradients/decoder.transformer.h.22.ln_2.bias": {"_type": "histogram", "values": [3.0, 1.0, 3.0, 1.0, 3.0, 2.0, 1.0, 4.0, 4.0, 10.0, 3.0, 7.0, 8.0, 8.0, 19.0, 16.0, 13.0, 24.0, 28.0, 28.0, 32.0, 22.0, 48.0, 43.0, 23.0, 33.0, 36.0, 30.0, 42.0, 44.0, 40.0, 36.0, 32.0, 32.0, 30.0, 39.0, 30.0, 23.0, 39.0, 31.0, 27.0, 24.0, 11.0, 15.0, 16.0, 13.0, 7.0, 6.0, 11.0, 4.0, 3.0, 1.0, 4.0, 2.0, 0.0, 2.0, 2.0, 0.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-196.9012908935547, -190.0442657470703, -183.18724060058594, -176.33021545410156, -169.47317504882812, -162.61614990234375, -155.75912475585938, -148.902099609375, -142.04507446289062, -135.18804931640625, -128.33102416992188, -121.47399139404297, -114.6169662475586, -107.75994110107422, -100.90290832519531, -94.04588317871094, -87.18885803222656, -80.33183288574219, -73.47480773925781, -66.6177749633789, -59.76074981689453, -52.903724670410156, -46.046695709228516, -39.189666748046875, -32.3326416015625, -25.475614547729492, -18.618587493896484, -11.761560440063477, -4.904533386230469, 1.9524917602539062, 8.809520721435547, 15.666549682617188, 22.5235595703125, 29.380586624145508, 36.237613677978516, 43.094642639160156, 49.95166778564453, 56.808692932128906, 63.66572189331055, 70.52275085449219, 77.37977600097656, 84.23680114746094, 91.09382629394531, 97.95085906982422, 104.8078842163086, 111.66490936279297, 118.52194213867188, 125.37896728515625, 132.23599243164062, 139.093017578125, 145.95004272460938, 152.80706787109375, 159.66409301757812, 166.5211181640625, 173.37815856933594, 180.2351837158203, 187.0922088623047, 193.94923400878906, 200.80625915527344, 207.6632843017578, 214.52032470703125, 221.37734985351562, 228.234375, 235.09140014648438, 241.94842529296875]}, "gradients/decoder.transformer.h.22.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 3.0, 2.0, 3.0, 4.0, 5.0, 5.0, 3.0, 11.0, 9.0, 7.0, 11.0, 15.0, 10.0, 12.0, 14.0, 26.0, 23.0, 23.0, 29.0, 34.0, 54.0, 50.0, 61.0, 55.0, 61.0, 56.0, 52.0, 71.0, 54.0, 41.0, 44.0, 17.0, 25.0, 25.0, 18.0, 14.0, 16.0, 11.0, 7.0, 6.0, 5.0, 6.0, 6.0, 4.0, 0.0, 3.0, 0.0, 2.0, 1.0, 2.0, 0.0, 0.0, 2.0, 1.0], "bins": [-30.0, -29.124755859375, -28.24951171875, -27.374267578125, -26.4990234375, -25.623779296875, -24.74853515625, -23.873291015625, -22.998046875, -22.122802734375, -21.24755859375, -20.372314453125, -19.4970703125, -18.621826171875, -17.74658203125, -16.871337890625, -15.99609375, -15.120849609375, -14.24560546875, -13.370361328125, -12.4951171875, -11.619873046875, -10.74462890625, -9.869384765625, -8.994140625, -8.118896484375, -7.24365234375, -6.368408203125, -5.4931640625, -4.617919921875, -3.74267578125, -2.867431640625, -1.9921875, -1.116943359375, -0.24169921875, 0.633544921875, 1.5087890625, 2.384033203125, 3.25927734375, 4.134521484375, 5.009765625, 5.885009765625, 6.76025390625, 7.635498046875, 8.5107421875, 9.385986328125, 10.26123046875, 11.136474609375, 12.01171875, 12.886962890625, 13.76220703125, 14.637451171875, 15.5126953125, 16.387939453125, 17.26318359375, 18.138427734375, 19.013671875, 19.888916015625, 20.76416015625, 21.639404296875, 22.5146484375, 23.389892578125, 24.26513671875, 25.140380859375, 26.015625]}, "gradients/decoder.transformer.h.22.crossattention.c_proj.weight": {"_type": "histogram", "values": [4.0, 2.0, 1.0, 5.0, 2.0, 1.0, 9.0, 6.0, 13.0, 19.0, 28.0, 44.0, 41.0, 63.0, 84.0, 131.0, 178.0, 250.0, 367.0, 485.0, 744.0, 1069.0, 1509.0, 2263.0, 3424.0, 5175.0, 8220.0, 13116.0, 21701.0, 38733.0, 77037.0, 191934.0, 406925.0, 133794.0, 59390.0, 31552.0, 18140.0, 11055.0, 7093.0, 4530.0, 2993.0, 2026.0, 1348.0, 939.0, 631.0, 426.0, 304.0, 216.0, 169.0, 115.0, 80.0, 53.0, 43.0, 27.0, 18.0, 7.0, 15.0, 9.0, 6.0, 6.0, 4.0, 1.0, 1.0, 2.0], "bins": [-1.0625, -1.029632568359375, -0.99676513671875, -0.963897705078125, -0.9310302734375, -0.898162841796875, -0.86529541015625, -0.832427978515625, -0.799560546875, -0.766693115234375, -0.73382568359375, -0.700958251953125, -0.6680908203125, -0.635223388671875, -0.60235595703125, -0.569488525390625, -0.53662109375, -0.503753662109375, -0.47088623046875, -0.438018798828125, -0.4051513671875, -0.372283935546875, -0.33941650390625, -0.306549072265625, -0.273681640625, -0.240814208984375, -0.20794677734375, -0.175079345703125, -0.1422119140625, -0.109344482421875, -0.07647705078125, -0.043609619140625, -0.0107421875, 0.022125244140625, 0.05499267578125, 0.087860107421875, 0.1207275390625, 0.153594970703125, 0.18646240234375, 0.219329833984375, 0.252197265625, 0.285064697265625, 0.31793212890625, 0.350799560546875, 0.3836669921875, 0.416534423828125, 0.44940185546875, 0.482269287109375, 0.51513671875, 0.548004150390625, 0.58087158203125, 0.613739013671875, 0.6466064453125, 0.679473876953125, 0.71234130859375, 0.745208740234375, 0.778076171875, 0.810943603515625, 0.84381103515625, 0.876678466796875, 0.9095458984375, 0.942413330078125, 0.97528076171875, 1.008148193359375, 1.041015625]}, "gradients/decoder.transformer.h.22.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 3.0, 4.0, 3.0, 2.0, 3.0, 6.0, 11.0, 10.0, 18.0, 15.0, 25.0, 16.0, 21.0, 27.0, 29.0, 34.0, 37.0, 39.0, 36.0, 42.0, 46.0, 34.0, 1079.0, 52.0, 31.0, 46.0, 53.0, 46.0, 33.0, 47.0, 35.0, 23.0, 32.0, 10.0, 20.0, 13.0, 10.0, 8.0, 13.0, 8.0, 4.0, 1.0, 7.0, 2.0, 5.0, 1.0, 1.0, 1.0, 2.0, 1.0], "bins": [-17.5625, -17.08154296875, -16.6005859375, -16.11962890625, -15.638671875, -15.15771484375, -14.6767578125, -14.19580078125, -13.71484375, -13.23388671875, -12.7529296875, -12.27197265625, -11.791015625, -11.31005859375, -10.8291015625, -10.34814453125, -9.8671875, -9.38623046875, -8.9052734375, -8.42431640625, -7.943359375, -7.46240234375, -6.9814453125, -6.50048828125, -6.01953125, -5.53857421875, -5.0576171875, -4.57666015625, -4.095703125, -3.61474609375, -3.1337890625, -2.65283203125, -2.171875, -1.69091796875, -1.2099609375, -0.72900390625, -0.248046875, 0.23291015625, 0.7138671875, 1.19482421875, 1.67578125, 2.15673828125, 2.6376953125, 3.11865234375, 3.599609375, 4.08056640625, 4.5615234375, 5.04248046875, 5.5234375, 6.00439453125, 6.4853515625, 6.96630859375, 7.447265625, 7.92822265625, 8.4091796875, 8.89013671875, 9.37109375, 9.85205078125, 10.3330078125, 10.81396484375, 11.294921875, 11.77587890625, 12.2568359375, 12.73779296875, 13.21875]}, "gradients/decoder.transformer.h.22.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 1.0, 0.0, 2.0, 1.0, 5.0, 5.0, 9.0, 9.0, 15.0, 33.0, 45.0, 54.0, 75.0, 131.0, 201.0, 297.0, 489.0, 761.0, 1218.0, 1937.0, 3077.0, 5040.0, 8036.0, 13234.0, 22463.0, 39488.0, 71083.0, 137288.0, 1409632.0, 178459.0, 88157.0, 47581.0, 27102.0, 16005.0, 9738.0, 5848.0, 3488.0, 2192.0, 1413.0, 890.0, 601.0, 344.0, 224.0, 178.0, 100.0, 69.0, 42.0, 35.0, 21.0, 10.0, 8.0, 2.0, 4.0, 1.0, 3.0, 2.0, 0.0, 0.0, 0.0, 1.0, 2.0], "bins": [-0.70458984375, -0.682342529296875, -0.66009521484375, -0.637847900390625, -0.6156005859375, -0.593353271484375, -0.57110595703125, -0.548858642578125, -0.526611328125, -0.504364013671875, -0.48211669921875, -0.459869384765625, -0.4376220703125, -0.415374755859375, -0.39312744140625, -0.370880126953125, -0.3486328125, -0.326385498046875, -0.30413818359375, -0.281890869140625, -0.2596435546875, -0.237396240234375, -0.21514892578125, -0.192901611328125, -0.170654296875, -0.148406982421875, -0.12615966796875, -0.103912353515625, -0.0816650390625, -0.059417724609375, -0.03717041015625, -0.014923095703125, 0.00732421875, 0.029571533203125, 0.05181884765625, 0.074066162109375, 0.0963134765625, 0.118560791015625, 0.14080810546875, 0.163055419921875, 0.185302734375, 0.207550048828125, 0.22979736328125, 0.252044677734375, 0.2742919921875, 0.296539306640625, 0.31878662109375, 0.341033935546875, 0.36328125, 0.385528564453125, 0.40777587890625, 0.430023193359375, 0.4522705078125, 0.474517822265625, 0.49676513671875, 0.519012451171875, 0.541259765625, 0.563507080078125, 0.58575439453125, 0.608001708984375, 0.6302490234375, 0.652496337890625, 0.67474365234375, 0.696990966796875, 0.71923828125]}, "gradients/decoder.transformer.h.22.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 1.0, 3.0, 2.0, 2.0, 3.0, 5.0, 5.0, 7.0, 5.0, 11.0, 10.0, 15.0, 17.0, 15.0, 25.0, 24.0, 28.0, 41.0, 38.0, 53.0, 61.0, 74.0, 122.0, 67.0, 57.0, 60.0, 34.0, 41.0, 34.0, 28.0, 24.0, 23.0, 11.0, 17.0, 14.0, 9.0, 7.0, 5.0, 6.0, 4.0, 2.0, 1.0, 1.0, 1.0, 1.0, 0.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-6.23464584350586e-05, -6.041862070560455e-05, -5.849078297615051e-05, -5.656294524669647e-05, -5.463510751724243e-05, -5.270726978778839e-05, -5.077943205833435e-05, -4.885159432888031e-05, -4.692375659942627e-05, -4.499591886997223e-05, -4.306808114051819e-05, -4.114024341106415e-05, -3.921240568161011e-05, -3.728456795215607e-05, -3.5356730222702026e-05, -3.3428892493247986e-05, -3.1501054763793945e-05, -2.9573217034339905e-05, -2.7645379304885864e-05, -2.5717541575431824e-05, -2.3789703845977783e-05, -2.1861866116523743e-05, -1.9934028387069702e-05, -1.800619065761566e-05, -1.607835292816162e-05, -1.415051519870758e-05, -1.222267746925354e-05, -1.02948397397995e-05, -8.367002010345459e-06, -6.4391642808914185e-06, -4.511326551437378e-06, -2.5834888219833374e-06, -6.556510925292969e-07, 1.2721866369247437e-06, 3.200024366378784e-06, 5.127862095832825e-06, 7.055699825286865e-06, 8.983537554740906e-06, 1.0911375284194946e-05, 1.2839213013648987e-05, 1.4767050743103027e-05, 1.6694888472557068e-05, 1.862272620201111e-05, 2.055056393146515e-05, 2.247840166091919e-05, 2.440623939037323e-05, 2.633407711982727e-05, 2.826191484928131e-05, 3.018975257873535e-05, 3.211759030818939e-05, 3.404542803764343e-05, 3.597326576709747e-05, 3.7901103496551514e-05, 3.9828941226005554e-05, 4.1756778955459595e-05, 4.3684616684913635e-05, 4.5612454414367676e-05, 4.7540292143821716e-05, 4.946812987327576e-05, 5.13959676027298e-05, 5.332380533218384e-05, 5.525164306163788e-05, 5.717948079109192e-05, 5.910731852054596e-05, 6.103515625e-05]}, "gradients/decoder.transformer.h.22.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 4.0, 1.0, 5.0, 2.0, 2.0, 4.0, 8.0, 6.0, 9.0, 14.0, 13.0, 20.0, 25.0, 30.0, 40.0, 47.0, 59.0, 62.0, 88.0, 100.0, 110.0, 159.0, 284.0, 1162.0, 51577.0, 984820.0, 8411.0, 543.0, 205.0, 136.0, 113.0, 93.0, 77.0, 68.0, 54.0, 34.0, 49.0, 30.0, 26.0, 19.0, 13.0, 12.0, 8.0, 5.0, 4.0, 2.0, 4.0, 5.0, 3.0, 0.0, 2.0, 1.0, 1.0, 1.0, 2.0, 0.0, 1.0], "bins": [-0.001018524169921875, -0.0009869933128356934, -0.0009554624557495117, -0.0009239315986633301, -0.0008924007415771484, -0.0008608698844909668, -0.0008293390274047852, -0.0007978081703186035, -0.0007662773132324219, -0.0007347464561462402, -0.0007032155990600586, -0.000671684741973877, -0.0006401538848876953, -0.0006086230278015137, -0.000577092170715332, -0.0005455613136291504, -0.0005140304565429688, -0.0004824995994567871, -0.00045096874237060547, -0.00041943788528442383, -0.0003879070281982422, -0.00035637617111206055, -0.0003248453140258789, -0.00029331445693969727, -0.0002617835998535156, -0.00023025274276733398, -0.00019872188568115234, -0.0001671910285949707, -0.00013566017150878906, -0.00010412931442260742, -7.259845733642578e-05, -4.106760025024414e-05, -9.5367431640625e-06, 2.199411392211914e-05, 5.352497100830078e-05, 8.505582809448242e-05, 0.00011658668518066406, 0.0001481175422668457, 0.00017964839935302734, 0.00021117925643920898, 0.00024271011352539062, 0.00027424097061157227, 0.0003057718276977539, 0.00033730268478393555, 0.0003688335418701172, 0.00040036439895629883, 0.00043189525604248047, 0.0004634261131286621, 0.0004949569702148438, 0.0005264878273010254, 0.000558018684387207, 0.0005895495414733887, 0.0006210803985595703, 0.000652611255645752, 0.0006841421127319336, 0.0007156729698181152, 0.0007472038269042969, 0.0007787346839904785, 0.0008102655410766602, 0.0008417963981628418, 0.0008733272552490234, 0.0009048581123352051, 0.0009363889694213867, 0.0009679198265075684, 0.00099945068359375]}, "gradients/decoder.transformer.h.22.ln_cross_attn.weight": {"_type": "histogram", "values": [2.0, 5.0, 6.0, 4.0, 31.0, 58.0, 107.0, 417.0, 248.0, 81.0, 32.0, 15.0, 5.0, 3.0, 4.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.124879574694205e-05, -1.852347304520663e-05, -1.5798148524481803e-05, -1.3072825822746381e-05, -1.0347502211516257e-05, -7.622178600286134e-06, -4.8968558985507116e-06, -2.171531377825886e-06, 5.53791323909536e-07, 3.2791147077659843e-06, 6.004438091622433e-06, 8.729761248105206e-06, 1.145508485933533e-05, 1.4180408470565453e-05, 1.6905731172300875e-05, 1.96310556930257e-05, 2.2356378394761123e-05, 2.5081701096496545e-05, 2.780702561722137e-05, 3.053234831895679e-05, 3.3257671020692214e-05, 3.5982993722427636e-05, 3.8708320062141865e-05, 4.143364276387729e-05, 4.415896546561271e-05, 4.688428816734813e-05, 4.9609610869083554e-05, 5.233493720879778e-05, 5.5060259910533205e-05, 5.778558261226863e-05, 6.051090531400405e-05, 6.323622801573947e-05, 6.59615543554537e-05, 6.868688069516793e-05, 7.141219975892454e-05, 7.413752609863877e-05, 7.686284516239539e-05, 7.958817150210962e-05, 8.231349056586623e-05, 8.503881690558046e-05, 8.776414324529469e-05, 9.048946958500892e-05, 9.321478864876553e-05, 9.594011498847976e-05, 9.866543405223638e-05, 0.00010139076039195061, 0.00010411608673166484, 0.00010684140579542145, 0.00010956672485917807, 0.0001122920511988923, 0.00011501737026264891, 0.00011774269660236314, 0.00012046801566611975, 0.00012319334200583398, 0.0001259186683455482, 0.00012864399468526244, 0.00013136932102497667, 0.0001340946473646909, 0.00013681997370440513, 0.00013954528549220413, 0.00014227061183191836, 0.0001449959381716326, 0.00014772126451134682, 0.00015044659085106105, 0.00015317190263886005]}, "gradients/decoder.transformer.h.22.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 3.0, 4.0, 7.0, 3.0, 8.0, 7.0, 14.0, 9.0, 14.0, 17.0, 25.0, 17.0, 25.0, 27.0, 23.0, 32.0, 32.0, 38.0, 30.0, 40.0, 43.0, 47.0, 37.0, 41.0, 38.0, 30.0, 43.0, 42.0, 25.0, 33.0, 42.0, 31.0, 34.0, 33.0, 20.0, 20.0, 15.0, 12.0, 10.0, 11.0, 4.0, 7.0, 5.0, 6.0, 1.0, 6.0, 3.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 2.0], "bins": [-3.24249267578125e-05, -3.145076334476471e-05, -3.047659993171692e-05, -2.950243651866913e-05, -2.8528273105621338e-05, -2.7554109692573547e-05, -2.6579946279525757e-05, -2.5605782866477966e-05, -2.4631619453430176e-05, -2.3657456040382385e-05, -2.2683292627334595e-05, -2.1709129214286804e-05, -2.0734965801239014e-05, -1.9760802388191223e-05, -1.8786638975143433e-05, -1.7812475562095642e-05, -1.683831214904785e-05, -1.586414873600006e-05, -1.488998532295227e-05, -1.391582190990448e-05, -1.294165849685669e-05, -1.1967495083808899e-05, -1.0993331670761108e-05, -1.0019168257713318e-05, -9.045004844665527e-06, -8.070841431617737e-06, -7.096678018569946e-06, -6.122514605522156e-06, -5.148351192474365e-06, -4.174187779426575e-06, -3.200024366378784e-06, -2.2258609533309937e-06, -1.2516975402832031e-06, -2.775341272354126e-07, 6.966292858123779e-07, 1.6707926988601685e-06, 2.644956111907959e-06, 3.6191195249557495e-06, 4.59328293800354e-06, 5.5674463510513306e-06, 6.541609764099121e-06, 7.515773177146912e-06, 8.489936590194702e-06, 9.464100003242493e-06, 1.0438263416290283e-05, 1.1412426829338074e-05, 1.2386590242385864e-05, 1.3360753655433655e-05, 1.4334917068481445e-05, 1.5309080481529236e-05, 1.6283243894577026e-05, 1.7257407307624817e-05, 1.8231570720672607e-05, 1.9205734133720398e-05, 2.017989754676819e-05, 2.115406095981598e-05, 2.212822437286377e-05, 2.310238778591156e-05, 2.407655119895935e-05, 2.505071461200714e-05, 2.602487802505493e-05, 2.6999041438102722e-05, 2.7973204851150513e-05, 2.8947368264198303e-05, 2.9921531677246094e-05]}, "gradients/decoder.transformer.h.22.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 3.0, 2.0, 3.0, 4.0, 5.0, 5.0, 3.0, 11.0, 9.0, 7.0, 11.0, 15.0, 10.0, 12.0, 14.0, 26.0, 23.0, 23.0, 29.0, 34.0, 54.0, 50.0, 61.0, 55.0, 61.0, 56.0, 52.0, 71.0, 54.0, 41.0, 44.0, 17.0, 25.0, 25.0, 18.0, 14.0, 16.0, 11.0, 7.0, 6.0, 5.0, 6.0, 6.0, 4.0, 0.0, 3.0, 0.0, 2.0, 1.0, 2.0, 0.0, 0.0, 2.0, 1.0], "bins": [-30.0, -29.124755859375, -28.24951171875, -27.374267578125, -26.4990234375, -25.623779296875, -24.74853515625, -23.873291015625, -22.998046875, -22.122802734375, -21.24755859375, -20.372314453125, -19.4970703125, -18.621826171875, -17.74658203125, -16.871337890625, -15.99609375, -15.120849609375, -14.24560546875, -13.370361328125, -12.4951171875, -11.619873046875, -10.74462890625, -9.869384765625, -8.994140625, -8.118896484375, -7.24365234375, -6.368408203125, -5.4931640625, -4.617919921875, -3.74267578125, -2.867431640625, -1.9921875, -1.116943359375, -0.24169921875, 0.633544921875, 1.5087890625, 2.384033203125, 3.25927734375, 4.134521484375, 5.009765625, 5.885009765625, 6.76025390625, 7.635498046875, 8.5107421875, 9.385986328125, 10.26123046875, 11.136474609375, 12.01171875, 12.886962890625, 13.76220703125, 14.637451171875, 15.5126953125, 16.387939453125, 17.26318359375, 18.138427734375, 19.013671875, 19.888916015625, 20.76416015625, 21.639404296875, 22.5146484375, 23.389892578125, 24.26513671875, 25.140380859375, 26.015625]}, "gradients/decoder.transformer.h.22.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 2.0, 1.0, 1.0, 1.0, 3.0, 2.0, 1.0, 3.0, 1.0, 6.0, 8.0, 7.0, 12.0, 17.0, 22.0, 20.0, 31.0, 46.0, 53.0, 64.0, 83.0, 104.0, 132.0, 193.0, 291.0, 529.0, 793.0, 1431.0, 2903.0, 6420.0, 15968.0, 47355.0, 215330.0, 598497.0, 107270.0, 29836.0, 10922.0, 4807.0, 2326.0, 1096.0, 633.0, 397.0, 259.0, 183.0, 125.0, 95.0, 74.0, 51.0, 38.0, 34.0, 21.0, 23.0, 10.0, 7.0, 8.0, 5.0, 5.0, 7.0, 4.0, 2.0, 2.0, 4.0, 1.0], "bins": [-26.8125, -26.03076171875, -25.2490234375, -24.46728515625, -23.685546875, -22.90380859375, -22.1220703125, -21.34033203125, -20.55859375, -19.77685546875, -18.9951171875, -18.21337890625, -17.431640625, -16.64990234375, -15.8681640625, -15.08642578125, -14.3046875, -13.52294921875, -12.7412109375, -11.95947265625, -11.177734375, -10.39599609375, -9.6142578125, -8.83251953125, -8.05078125, -7.26904296875, -6.4873046875, -5.70556640625, -4.923828125, -4.14208984375, -3.3603515625, -2.57861328125, -1.796875, -1.01513671875, -0.2333984375, 0.54833984375, 1.330078125, 2.11181640625, 2.8935546875, 3.67529296875, 4.45703125, 5.23876953125, 6.0205078125, 6.80224609375, 7.583984375, 8.36572265625, 9.1474609375, 9.92919921875, 10.7109375, 11.49267578125, 12.2744140625, 13.05615234375, 13.837890625, 14.61962890625, 15.4013671875, 16.18310546875, 16.96484375, 17.74658203125, 18.5283203125, 19.31005859375, 20.091796875, 20.87353515625, 21.6552734375, 22.43701171875, 23.21875]}, "gradients/decoder.transformer.h.22.attn.c_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 4.0, 8.0, 9.0, 4.0, 5.0, 12.0, 8.0, 9.0, 15.0, 21.0, 17.0, 16.0, 25.0, 22.0, 31.0, 41.0, 36.0, 34.0, 38.0, 33.0, 40.0, 55.0, 257.0, 1844.0, 57.0, 51.0, 34.0, 43.0, 34.0, 35.0, 36.0, 23.0, 27.0, 19.0, 15.0, 14.0, 21.0, 11.0, 6.0, 13.0, 11.0, 3.0, 7.0, 7.0, 3.0, 7.0, 1.0, 0.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-84.5625, -81.75, -78.9375, -76.125, -73.3125, -70.5, -67.6875, -64.875, -62.0625, -59.25, -56.4375, -53.625, -50.8125, -48.0, -45.1875, -42.375, -39.5625, -36.75, -33.9375, -31.125, -28.3125, -25.5, -22.6875, -19.875, -17.0625, -14.25, -11.4375, -8.625, -5.8125, -3.0, -0.1875, 2.625, 5.4375, 8.25, 11.0625, 13.875, 16.6875, 19.5, 22.3125, 25.125, 27.9375, 30.75, 33.5625, 36.375, 39.1875, 42.0, 44.8125, 47.625, 50.4375, 53.25, 56.0625, 58.875, 61.6875, 64.5, 67.3125, 70.125, 72.9375, 75.75, 78.5625, 81.375, 84.1875, 87.0, 89.8125, 92.625, 95.4375]}, "gradients/decoder.transformer.h.22.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 0.0, 1.0, 6.0, 4.0, 7.0, 6.0, 6.0, 8.0, 14.0, 6.0, 12.0, 22.0, 15.0, 14.0, 22.0, 38.0, 29.0, 61.0, 62.0, 98.0, 154.0, 259.0, 596.0, 1955.0, 3068127.0, 71898.0, 1068.0, 448.0, 224.0, 130.0, 88.0, 58.0, 56.0, 46.0, 27.0, 27.0, 14.0, 23.0, 19.0, 13.0, 7.0, 10.0, 10.0, 6.0, 3.0, 12.0, 5.0, 4.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0], "bins": [-217.375, -210.947265625, -204.51953125, -198.091796875, -191.6640625, -185.236328125, -178.80859375, -172.380859375, -165.953125, -159.525390625, -153.09765625, -146.669921875, -140.2421875, -133.814453125, -127.38671875, -120.958984375, -114.53125, -108.103515625, -101.67578125, -95.248046875, -88.8203125, -82.392578125, -75.96484375, -69.537109375, -63.109375, -56.681640625, -50.25390625, -43.826171875, -37.3984375, -30.970703125, -24.54296875, -18.115234375, -11.6875, -5.259765625, 1.16796875, 7.595703125, 14.0234375, 20.451171875, 26.87890625, 33.306640625, 39.734375, 46.162109375, 52.58984375, 59.017578125, 65.4453125, 71.873046875, 78.30078125, 84.728515625, 91.15625, 97.583984375, 104.01171875, 110.439453125, 116.8671875, 123.294921875, 129.72265625, 136.150390625, 142.578125, 149.005859375, 155.43359375, 161.861328125, 168.2890625, 174.716796875, 181.14453125, 187.572265625, 194.0]}, "gradients/decoder.transformer.h.22.ln_1.weight": {"_type": "histogram", "values": [1.0, 1.0, 6.0, 718.0, 289.0, 8.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-79.73577880859375, -59.19568634033203, -38.65559387207031, -18.115501403808594, 2.424591064453125, 22.964683532714844, 43.50477600097656, 64.04486083984375, 84.5849609375, 105.12505340576172, 125.66514587402344, 146.20523071289062, 166.74533081054688, 187.28543090820312, 207.8255157470703, 228.3656005859375, 248.90570068359375, 269.44580078125, 289.98590087890625, 310.5259704589844, 331.0660705566406, 351.6061706542969, 372.146240234375, 392.68634033203125, 413.2264404296875, 433.76654052734375, 454.306640625, 474.8467102050781, 495.3868103027344, 515.9268798828125, 536.4669799804688, 557.007080078125, 577.5472412109375, 598.0873413085938, 618.62744140625, 639.1675415039062, 659.7076416015625, 680.2476806640625, 700.7877807617188, 721.327880859375, 741.8679809570312, 762.4080810546875, 782.9481811523438, 803.48828125, 824.0283203125, 844.5684204101562, 865.1085205078125, 885.6486206054688, 906.188720703125, 926.7288208007812, 947.2689208984375, 967.8090209960938, 988.34912109375, 1008.88916015625, 1029.4293212890625, 1049.9693603515625, 1070.509521484375, 1091.049560546875, 1111.5897216796875, 1132.1297607421875, 1152.669921875, 1173.2099609375, 1193.7501220703125, 1214.2901611328125, 1234.8302001953125]}, "gradients/decoder.transformer.h.22.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 0.0, 1.0, 1.0, 0.0, 4.0, 1.0, 3.0, 4.0, 4.0, 4.0, 6.0, 8.0, 14.0, 7.0, 13.0, 19.0, 21.0, 20.0, 21.0, 24.0, 23.0, 35.0, 36.0, 32.0, 33.0, 32.0, 35.0, 30.0, 55.0, 40.0, 34.0, 38.0, 37.0, 43.0, 27.0, 27.0, 37.0, 31.0, 24.0, 19.0, 23.0, 27.0, 18.0, 20.0, 14.0, 11.0, 10.0, 7.0, 11.0, 6.0, 5.0, 5.0, 5.0, 4.0, 2.0, 2.0, 1.0, 3.0, 1.0, 1.0, 2.0], "bins": [-263.47332763671875, -255.39288330078125, -247.3124237060547, -239.2319793701172, -231.15151977539062, -223.07107543945312, -214.99063110351562, -206.91017150878906, -198.8297119140625, -190.749267578125, -182.66880798339844, -174.58836364746094, -166.50790405273438, -158.42745971679688, -150.34701538085938, -142.2665557861328, -134.1861114501953, -126.10565948486328, -118.02520751953125, -109.94476318359375, -101.86430358886719, -93.78385925292969, -85.70340728759766, -77.62295532226562, -69.5425033569336, -61.46205139160156, -53.38159942626953, -45.301151275634766, -37.220699310302734, -29.140247344970703, -21.059799194335938, -12.979347229003906, -4.8988800048828125, 3.1815710067749023, 11.262022018432617, 19.342472076416016, 27.422924041748047, 35.50337600708008, 43.583824157714844, 51.664276123046875, 59.744728088378906, 67.82518005371094, 75.90563201904297, 83.986083984375, 92.0665283203125, 100.14698791503906, 108.22743225097656, 116.3078842163086, 124.38833618164062, 132.46878051757812, 140.5492401123047, 148.6296844482422, 156.71014404296875, 164.79058837890625, 172.87103271484375, 180.9514923095703, 189.03195190429688, 197.11239624023438, 205.19285583496094, 213.27330017089844, 221.353759765625, 229.4342041015625, 237.5146484375, 245.59510803222656, 253.67555236816406]}, "gradients/decoder.transformer.h.21.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 2.0, 1.0, 2.0, 3.0, 3.0, 7.0, 4.0, 6.0, 12.0, 7.0, 10.0, 10.0, 11.0, 14.0, 13.0, 22.0, 19.0, 24.0, 27.0, 33.0, 40.0, 51.0, 55.0, 62.0, 64.0, 52.0, 50.0, 69.0, 57.0, 56.0, 34.0, 34.0, 24.0, 23.0, 22.0, 12.0, 19.0, 13.0, 10.0, 5.0, 8.0, 7.0, 1.0, 8.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 2.0, 0.0, 2.0, 1.0], "bins": [-31.09375, -30.201416015625, -29.30908203125, -28.416748046875, -27.5244140625, -26.632080078125, -25.73974609375, -24.847412109375, -23.955078125, -23.062744140625, -22.17041015625, -21.278076171875, -20.3857421875, -19.493408203125, -18.60107421875, -17.708740234375, -16.81640625, -15.924072265625, -15.03173828125, -14.139404296875, -13.2470703125, -12.354736328125, -11.46240234375, -10.570068359375, -9.677734375, -8.785400390625, -7.89306640625, -7.000732421875, -6.1083984375, -5.216064453125, -4.32373046875, -3.431396484375, -2.5390625, -1.646728515625, -0.75439453125, 0.137939453125, 1.0302734375, 1.922607421875, 2.81494140625, 3.707275390625, 4.599609375, 5.491943359375, 6.38427734375, 7.276611328125, 8.1689453125, 9.061279296875, 9.95361328125, 10.845947265625, 11.73828125, 12.630615234375, 13.52294921875, 14.415283203125, 15.3076171875, 16.199951171875, 17.09228515625, 17.984619140625, 18.876953125, 19.769287109375, 20.66162109375, 21.553955078125, 22.4462890625, 23.338623046875, 24.23095703125, 25.123291015625, 26.015625]}, "gradients/decoder.transformer.h.21.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 2.0, 0.0, 4.0, 5.0, 3.0, 4.0, 8.0, 12.0, 11.0, 9.0, 10.0, 15.0, 14.0, 27.0, 35.0, 51.0, 86.0, 131.0, 260.0, 552.0, 1324.0, 3456.0, 12810.0, 3423262.0, 737802.0, 9984.0, 2566.0, 941.0, 366.0, 200.0, 107.0, 61.0, 46.0, 28.0, 23.0, 20.0, 14.0, 7.0, 8.0, 6.0, 9.0, 6.0, 0.0, 2.0, 3.0, 0.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0], "bins": [-211.625, -205.38671875, -199.1484375, -192.91015625, -186.671875, -180.43359375, -174.1953125, -167.95703125, -161.71875, -155.48046875, -149.2421875, -143.00390625, -136.765625, -130.52734375, -124.2890625, -118.05078125, -111.8125, -105.57421875, -99.3359375, -93.09765625, -86.859375, -80.62109375, -74.3828125, -68.14453125, -61.90625, -55.66796875, -49.4296875, -43.19140625, -36.953125, -30.71484375, -24.4765625, -18.23828125, -12.0, -5.76171875, 0.4765625, 6.71484375, 12.953125, 19.19140625, 25.4296875, 31.66796875, 37.90625, 44.14453125, 50.3828125, 56.62109375, 62.859375, 69.09765625, 75.3359375, 81.57421875, 87.8125, 94.05078125, 100.2890625, 106.52734375, 112.765625, 119.00390625, 125.2421875, 131.48046875, 137.71875, 143.95703125, 150.1953125, 156.43359375, 162.671875, 168.91015625, 175.1484375, 181.38671875, 187.625]}, "gradients/decoder.transformer.h.21.mlp.c_fc.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 3.0, 1.0, 4.0, 1.0, 2.0, 2.0, 4.0, 3.0, 5.0, 4.0, 6.0, 9.0, 13.0, 12.0, 18.0, 22.0, 22.0, 34.0, 53.0, 104.0, 305.0, 803.0, 1490.0, 664.0, 225.0, 76.0, 42.0, 28.0, 24.0, 17.0, 14.0, 12.0, 7.0, 3.0, 13.0, 1.0, 8.0, 2.0, 3.0, 4.0, 3.0, 3.0, 2.0, 5.0, 5.0, 2.0, 0.0, 2.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0], "bins": [-77.5, -74.861328125, -72.22265625, -69.583984375, -66.9453125, -64.306640625, -61.66796875, -59.029296875, -56.390625, -53.751953125, -51.11328125, -48.474609375, -45.8359375, -43.197265625, -40.55859375, -37.919921875, -35.28125, -32.642578125, -30.00390625, -27.365234375, -24.7265625, -22.087890625, -19.44921875, -16.810546875, -14.171875, -11.533203125, -8.89453125, -6.255859375, -3.6171875, -0.978515625, 1.66015625, 4.298828125, 6.9375, 9.576171875, 12.21484375, 14.853515625, 17.4921875, 20.130859375, 22.76953125, 25.408203125, 28.046875, 30.685546875, 33.32421875, 35.962890625, 38.6015625, 41.240234375, 43.87890625, 46.517578125, 49.15625, 51.794921875, 54.43359375, 57.072265625, 59.7109375, 62.349609375, 64.98828125, 67.626953125, 70.265625, 72.904296875, 75.54296875, 78.181640625, 80.8203125, 83.458984375, 86.09765625, 88.736328125, 91.375]}, "gradients/decoder.transformer.h.21.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 6.0, 1.0, 0.0, 6.0, 6.0, 8.0, 9.0, 15.0, 17.0, 23.0, 29.0, 45.0, 74.0, 86.0, 114.0, 152.0, 185.0, 300.0, 409.0, 536.0, 741.0, 1162.0, 1733.0, 2416.0, 3587.0, 5735.0, 9438.0, 17440.0, 37212.0, 140779.0, 3297075.0, 554559.0, 62193.0, 23970.0, 12652.0, 7451.0, 4493.0, 2964.0, 2005.0, 1392.0, 928.0, 643.0, 487.0, 346.0, 238.0, 153.0, 124.0, 104.0, 69.0, 58.0, 41.0, 24.0, 17.0, 14.0, 13.0, 8.0, 3.0, 4.0, 4.0, 2.0, 3.0, 0.0, 2.0], "bins": [-96.1875, -93.1572265625, -90.126953125, -87.0966796875, -84.06640625, -81.0361328125, -78.005859375, -74.9755859375, -71.9453125, -68.9150390625, -65.884765625, -62.8544921875, -59.82421875, -56.7939453125, -53.763671875, -50.7333984375, -47.703125, -44.6728515625, -41.642578125, -38.6123046875, -35.58203125, -32.5517578125, -29.521484375, -26.4912109375, -23.4609375, -20.4306640625, -17.400390625, -14.3701171875, -11.33984375, -8.3095703125, -5.279296875, -2.2490234375, 0.78125, 3.8115234375, 6.841796875, 9.8720703125, 12.90234375, 15.9326171875, 18.962890625, 21.9931640625, 25.0234375, 28.0537109375, 31.083984375, 34.1142578125, 37.14453125, 40.1748046875, 43.205078125, 46.2353515625, 49.265625, 52.2958984375, 55.326171875, 58.3564453125, 61.38671875, 64.4169921875, 67.447265625, 70.4775390625, 73.5078125, 76.5380859375, 79.568359375, 82.5986328125, 85.62890625, 88.6591796875, 91.689453125, 94.7197265625, 97.75]}, "gradients/decoder.transformer.h.21.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 4.0, 4.0, 5.0, 7.0, 13.0, 21.0, 29.0, 42.0, 54.0, 102.0, 202.0, 217.0, 112.0, 65.0, 38.0, 37.0, 17.0, 12.0, 13.0, 6.0, 5.0, 3.0, 4.0, 3.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-425.1797790527344, -413.6929016113281, -402.2060241699219, -390.7191467285156, -379.2322692871094, -367.7453918457031, -356.258544921875, -344.77166748046875, -333.2847900390625, -321.79791259765625, -310.31103515625, -298.82415771484375, -287.3372802734375, -275.85040283203125, -264.363525390625, -252.8766632080078, -241.3897705078125, -229.90289306640625, -218.416015625, -206.92913818359375, -195.4422607421875, -183.95538330078125, -172.46852111816406, -160.9816436767578, -149.49476623535156, -138.0078887939453, -126.52101135253906, -115.03414154052734, -103.5472640991211, -92.06038665771484, -80.57351684570312, -69.08663940429688, -57.5997314453125, -46.11285400390625, -34.625980377197266, -23.13910675048828, -11.652229309082031, -0.16535186767578125, 11.321517944335938, 22.808395385742188, 34.29527282714844, 45.78215026855469, 57.26902389526367, 68.75589752197266, 80.2427749633789, 91.72965240478516, 103.21652221679688, 114.70339965820312, 126.19027709960938, 137.67715454101562, 149.16403198242188, 160.65090942382812, 172.13778686523438, 183.62466430664062, 195.1115264892578, 206.59840393066406, 218.0852813720703, 229.57215881347656, 241.0590362548828, 252.5458984375, 264.03277587890625, 275.5196533203125, 287.00653076171875, 298.493408203125, 309.98028564453125]}, "gradients/decoder.transformer.h.21.ln_2.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 2.0, 7.0, 5.0, 5.0, 4.0, 8.0, 12.0, 15.0, 16.0, 13.0, 30.0, 28.0, 29.0, 30.0, 29.0, 40.0, 39.0, 29.0, 39.0, 51.0, 45.0, 54.0, 40.0, 47.0, 42.0, 37.0, 37.0, 37.0, 40.0, 30.0, 29.0, 25.0, 28.0, 16.0, 10.0, 12.0, 12.0, 10.0, 9.0, 6.0, 7.0, 3.0, 4.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 1.0], "bins": [-211.86265563964844, -204.887451171875, -197.9122314453125, -190.93701171875, -183.96180725097656, -176.98660278320312, -170.01138305664062, -163.03616333007812, -156.0609588623047, -149.08575439453125, -142.11053466796875, -135.13531494140625, -128.1601104736328, -121.18489837646484, -114.20968627929688, -107.2344741821289, -100.25926208496094, -93.28404998779297, -86.308837890625, -79.33362579345703, -72.35841369628906, -65.3832015991211, -58.407989501953125, -51.432777404785156, -44.45756530761719, -37.48235321044922, -30.50714111328125, -23.53192901611328, -16.556716918945312, -9.581504821777344, -2.606292724609375, 4.368919372558594, 11.344131469726562, 18.31934356689453, 25.2945556640625, 32.26976776123047, 39.24497985839844, 46.220191955566406, 53.195404052734375, 60.170616149902344, 67.14582824707031, 74.12104034423828, 81.09625244140625, 88.07146453857422, 95.04667663574219, 102.02188873291016, 108.99710083007812, 115.9723129272461, 122.94752502441406, 129.9227294921875, 136.89794921875, 143.8731689453125, 150.84837341308594, 157.82357788085938, 164.79879760742188, 171.77401733398438, 178.7492218017578, 185.72442626953125, 192.69964599609375, 199.67486572265625, 206.6500701904297, 213.62527465820312, 220.60049438476562, 227.57571411132812, 234.55091857910156]}, "gradients/decoder.transformer.h.21.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0, 2.0, 2.0, 2.0, 3.0, 7.0, 5.0, 4.0, 7.0, 5.0, 5.0, 11.0, 12.0, 8.0, 10.0, 8.0, 18.0, 19.0, 21.0, 33.0, 35.0, 21.0, 32.0, 48.0, 53.0, 48.0, 44.0, 59.0, 45.0, 54.0, 47.0, 51.0, 49.0, 38.0, 29.0, 29.0, 24.0, 17.0, 20.0, 13.0, 20.0, 10.0, 12.0, 6.0, 7.0, 6.0, 4.0, 4.0, 2.0, 0.0, 0.0, 2.0, 3.0, 0.0, 1.0, 1.0, 2.0, 1.0], "bins": [-29.0625, -28.19384765625, -27.3251953125, -26.45654296875, -25.587890625, -24.71923828125, -23.8505859375, -22.98193359375, -22.11328125, -21.24462890625, -20.3759765625, -19.50732421875, -18.638671875, -17.77001953125, -16.9013671875, -16.03271484375, -15.1640625, -14.29541015625, -13.4267578125, -12.55810546875, -11.689453125, -10.82080078125, -9.9521484375, -9.08349609375, -8.21484375, -7.34619140625, -6.4775390625, -5.60888671875, -4.740234375, -3.87158203125, -3.0029296875, -2.13427734375, -1.265625, -0.39697265625, 0.4716796875, 1.34033203125, 2.208984375, 3.07763671875, 3.9462890625, 4.81494140625, 5.68359375, 6.55224609375, 7.4208984375, 8.28955078125, 9.158203125, 10.02685546875, 10.8955078125, 11.76416015625, 12.6328125, 13.50146484375, 14.3701171875, 15.23876953125, 16.107421875, 16.97607421875, 17.8447265625, 18.71337890625, 19.58203125, 20.45068359375, 21.3193359375, 22.18798828125, 23.056640625, 23.92529296875, 24.7939453125, 25.66259765625, 26.53125]}, "gradients/decoder.transformer.h.21.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 4.0, 3.0, 9.0, 8.0, 8.0, 26.0, 32.0, 40.0, 68.0, 96.0, 118.0, 169.0, 213.0, 270.0, 450.0, 608.0, 900.0, 1141.0, 1690.0, 2414.0, 3446.0, 4815.0, 7474.0, 11149.0, 17394.0, 27925.0, 47708.0, 90412.0, 210802.0, 341008.0, 121503.0, 60715.0, 34512.0, 20754.0, 13105.0, 8610.0, 5856.0, 3912.0, 2619.0, 1891.0, 1372.0, 937.0, 657.0, 501.0, 367.0, 267.0, 169.0, 118.0, 97.0, 59.0, 47.0, 35.0, 22.0, 21.0, 10.0, 5.0, 6.0, 4.0, 1.0, 0.0, 1.0], "bins": [-1.0537109375, -1.0209808349609375, -0.988250732421875, -0.9555206298828125, -0.92279052734375, -0.8900604248046875, -0.857330322265625, -0.8246002197265625, -0.7918701171875, -0.7591400146484375, -0.726409912109375, -0.6936798095703125, -0.66094970703125, -0.6282196044921875, -0.595489501953125, -0.5627593994140625, -0.530029296875, -0.4972991943359375, -0.464569091796875, -0.4318389892578125, -0.39910888671875, -0.3663787841796875, -0.333648681640625, -0.3009185791015625, -0.2681884765625, -0.2354583740234375, -0.202728271484375, -0.1699981689453125, -0.13726806640625, -0.1045379638671875, -0.071807861328125, -0.0390777587890625, -0.00634765625, 0.0263824462890625, 0.059112548828125, 0.0918426513671875, 0.12457275390625, 0.1573028564453125, 0.190032958984375, 0.2227630615234375, 0.2554931640625, 0.2882232666015625, 0.320953369140625, 0.3536834716796875, 0.38641357421875, 0.4191436767578125, 0.451873779296875, 0.4846038818359375, 0.517333984375, 0.5500640869140625, 0.582794189453125, 0.6155242919921875, 0.64825439453125, 0.6809844970703125, 0.713714599609375, 0.7464447021484375, 0.7791748046875, 0.8119049072265625, 0.844635009765625, 0.8773651123046875, 0.91009521484375, 0.9428253173828125, 0.975555419921875, 1.0082855224609375, 1.041015625]}, "gradients/decoder.transformer.h.21.crossattention.c_attn.bias": {"_type": "histogram", "values": [3.0, 0.0, 0.0, 1.0, 0.0, 3.0, 1.0, 4.0, 5.0, 3.0, 3.0, 4.0, 5.0, 3.0, 17.0, 7.0, 12.0, 11.0, 11.0, 22.0, 11.0, 25.0, 32.0, 31.0, 27.0, 26.0, 37.0, 33.0, 41.0, 29.0, 37.0, 40.0, 43.0, 1057.0, 38.0, 45.0, 29.0, 29.0, 31.0, 26.0, 29.0, 32.0, 24.0, 21.0, 22.0, 13.0, 23.0, 11.0, 16.0, 13.0, 10.0, 14.0, 6.0, 4.0, 7.0, 5.0, 4.0, 6.0, 1.0, 1.0, 2.0, 1.0, 0.0, 1.0], "bins": [-15.2109375, -14.75048828125, -14.2900390625, -13.82958984375, -13.369140625, -12.90869140625, -12.4482421875, -11.98779296875, -11.52734375, -11.06689453125, -10.6064453125, -10.14599609375, -9.685546875, -9.22509765625, -8.7646484375, -8.30419921875, -7.84375, -7.38330078125, -6.9228515625, -6.46240234375, -6.001953125, -5.54150390625, -5.0810546875, -4.62060546875, -4.16015625, -3.69970703125, -3.2392578125, -2.77880859375, -2.318359375, -1.85791015625, -1.3974609375, -0.93701171875, -0.4765625, -0.01611328125, 0.4443359375, 0.90478515625, 1.365234375, 1.82568359375, 2.2861328125, 2.74658203125, 3.20703125, 3.66748046875, 4.1279296875, 4.58837890625, 5.048828125, 5.50927734375, 5.9697265625, 6.43017578125, 6.890625, 7.35107421875, 7.8115234375, 8.27197265625, 8.732421875, 9.19287109375, 9.6533203125, 10.11376953125, 10.57421875, 11.03466796875, 11.4951171875, 11.95556640625, 12.416015625, 12.87646484375, 13.3369140625, 13.79736328125, 14.2578125]}, "gradients/decoder.transformer.h.21.crossattention.c_attn.weight": {"_type": "histogram", "values": [3.0, 4.0, 5.0, 3.0, 5.0, 12.0, 18.0, 29.0, 38.0, 52.0, 76.0, 104.0, 129.0, 208.0, 287.0, 377.0, 524.0, 770.0, 1067.0, 1569.0, 2244.0, 3148.0, 4552.0, 6679.0, 9515.0, 13876.0, 20287.0, 30397.0, 46526.0, 71992.0, 124037.0, 1343812.0, 162657.0, 86860.0, 54140.0, 35834.0, 23834.0, 15768.0, 10808.0, 7392.0, 5234.0, 3618.0, 2552.0, 1797.0, 1270.0, 901.0, 662.0, 424.0, 299.0, 235.0, 137.0, 116.0, 84.0, 49.0, 37.0, 36.0, 17.0, 20.0, 11.0, 3.0, 2.0, 2.0, 3.0, 5.0], "bins": [-0.6103515625, -0.5910873413085938, -0.5718231201171875, -0.5525588989257812, -0.533294677734375, -0.5140304565429688, -0.4947662353515625, -0.47550201416015625, -0.45623779296875, -0.43697357177734375, -0.4177093505859375, -0.39844512939453125, -0.379180908203125, -0.35991668701171875, -0.3406524658203125, -0.32138824462890625, -0.3021240234375, -0.28285980224609375, -0.2635955810546875, -0.24433135986328125, -0.225067138671875, -0.20580291748046875, -0.1865386962890625, -0.16727447509765625, -0.14801025390625, -0.12874603271484375, -0.1094818115234375, -0.09021759033203125, -0.070953369140625, -0.05168914794921875, -0.0324249267578125, -0.01316070556640625, 0.006103515625, 0.02536773681640625, 0.0446319580078125, 0.06389617919921875, 0.083160400390625, 0.10242462158203125, 0.1216888427734375, 0.14095306396484375, 0.16021728515625, 0.17948150634765625, 0.1987457275390625, 0.21800994873046875, 0.237274169921875, 0.25653839111328125, 0.2758026123046875, 0.29506683349609375, 0.3143310546875, 0.33359527587890625, 0.3528594970703125, 0.37212371826171875, 0.391387939453125, 0.41065216064453125, 0.4299163818359375, 0.44918060302734375, 0.46844482421875, 0.48770904541015625, 0.5069732666015625, 0.5262374877929688, 0.545501708984375, 0.5647659301757812, 0.5840301513671875, 0.6032943725585938, 0.62255859375]}, "gradients/decoder.transformer.h.21.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 3.0, 0.0, 3.0, 4.0, 6.0, 10.0, 6.0, 5.0, 9.0, 12.0, 13.0, 19.0, 21.0, 24.0, 19.0, 23.0, 41.0, 28.0, 50.0, 50.0, 53.0, 97.0, 95.0, 56.0, 58.0, 28.0, 38.0, 35.0, 23.0, 25.0, 27.0, 14.0, 19.0, 12.0, 15.0, 7.0, 10.0, 8.0, 8.0, 5.0, 7.0, 6.0, 8.0, 5.0, 3.0, 3.0, 0.0, 1.0, 1.0, 2.0], "bins": [-6.413459777832031e-05, -6.236415356397629e-05, -6.059370934963226e-05, -5.882326513528824e-05, -5.7052820920944214e-05, -5.528237670660019e-05, -5.3511932492256165e-05, -5.174148827791214e-05, -4.9971044063568115e-05, -4.820059984922409e-05, -4.6430155634880066e-05, -4.465971142053604e-05, -4.2889267206192017e-05, -4.111882299184799e-05, -3.934837877750397e-05, -3.757793456315994e-05, -3.580749034881592e-05, -3.403704613447189e-05, -3.226660192012787e-05, -3.0496157705783844e-05, -2.872571349143982e-05, -2.6955269277095795e-05, -2.518482506275177e-05, -2.3414380848407745e-05, -2.164393663406372e-05, -1.9873492419719696e-05, -1.810304820537567e-05, -1.6332603991031647e-05, -1.4562159776687622e-05, -1.2791715562343597e-05, -1.1021271347999573e-05, -9.250827133655548e-06, -7.4803829193115234e-06, -5.709938704967499e-06, -3.939494490623474e-06, -2.1690502762794495e-06, -3.986060619354248e-07, 1.3718381524085999e-06, 3.1422823667526245e-06, 4.912726581096649e-06, 6.683170795440674e-06, 8.453615009784698e-06, 1.0224059224128723e-05, 1.1994503438472748e-05, 1.3764947652816772e-05, 1.5535391867160797e-05, 1.7305836081504822e-05, 1.9076280295848846e-05, 2.084672451019287e-05, 2.2617168724536896e-05, 2.438761293888092e-05, 2.6158057153224945e-05, 2.792850136756897e-05, 2.9698945581912994e-05, 3.146938979625702e-05, 3.3239834010601044e-05, 3.501027822494507e-05, 3.678072243928909e-05, 3.855116665363312e-05, 4.032161086797714e-05, 4.209205508232117e-05, 4.386249929666519e-05, 4.5632943511009216e-05, 4.740338772535324e-05, 4.9173831939697266e-05]}, "gradients/decoder.transformer.h.21.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 4.0, 2.0, 2.0, 6.0, 10.0, 7.0, 9.0, 5.0, 12.0, 15.0, 21.0, 31.0, 31.0, 36.0, 43.0, 44.0, 56.0, 84.0, 101.0, 109.0, 167.0, 232.0, 871.0, 13233.0, 923992.0, 105140.0, 3017.0, 389.0, 187.0, 126.0, 107.0, 83.0, 63.0, 58.0, 48.0, 37.0, 33.0, 35.0, 25.0, 25.0, 24.0, 14.0, 10.0, 9.0, 4.0, 5.0, 1.0, 2.0, 3.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.0009188652038574219, -0.0008889958262443542, -0.0008591264486312866, -0.000829257071018219, -0.0007993876934051514, -0.0007695183157920837, -0.0007396489381790161, -0.0007097795605659485, -0.0006799101829528809, -0.0006500408053398132, -0.0006201714277267456, -0.000590302050113678, -0.0005604326725006104, -0.0005305632948875427, -0.0005006939172744751, -0.00047082453966140747, -0.00044095516204833984, -0.0004110857844352722, -0.0003812164068222046, -0.00035134702920913696, -0.00032147765159606934, -0.0002916082739830017, -0.0002617388963699341, -0.00023186951875686646, -0.00020200014114379883, -0.0001721307635307312, -0.00014226138591766357, -0.00011239200830459595, -8.252263069152832e-05, -5.265325307846069e-05, -2.2783875465393066e-05, 7.0855021476745605e-06, 3.695487976074219e-05, 6.682425737380981e-05, 9.669363498687744e-05, 0.00012656301259994507, 0.0001564323902130127, 0.00018630176782608032, 0.00021617114543914795, 0.0002460405230522156, 0.0002759099006652832, 0.00030577927827835083, 0.00033564865589141846, 0.0003655180335044861, 0.0003953874111175537, 0.00042525678873062134, 0.00045512616634368896, 0.0004849955439567566, 0.0005148649215698242, 0.0005447342991828918, 0.0005746036767959595, 0.0006044730544090271, 0.0006343424320220947, 0.0006642118096351624, 0.00069408118724823, 0.0007239505648612976, 0.0007538199424743652, 0.0007836893200874329, 0.0008135586977005005, 0.0008434280753135681, 0.0008732974529266357, 0.0009031668305397034, 0.000933036208152771, 0.0009629055857658386, 0.0009927749633789062]}, "gradients/decoder.transformer.h.21.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 4.0, 12.0, 20.0, 90.0, 595.0, 242.0, 42.0, 11.0, 4.0, 1.0], "bins": [-0.0003470896335784346, -0.00034118382609449327, -0.0003352779895067215, -0.0003293721820227802, -0.0003234663454350084, -0.0003175605379510671, -0.0003116547013632953, -0.000305748893879354, -0.0002998430572915822, -0.0002939372498076409, -0.00028803141321986914, -0.0002821256057359278, -0.00027621976914815605, -0.00027031396166421473, -0.00026440812507644296, -0.00025850231759250164, -0.0002525965101085603, -0.000246690702624619, -0.00024078486603684723, -0.0002348790440009907, -0.00022897322196513414, -0.00022306741448119283, -0.00021716157789342105, -0.00021125577040947974, -0.00020534993382170796, -0.00019944411178585142, -0.00019353828974999487, -0.00018763246771413833, -0.00018172664567828178, -0.00017582083819434047, -0.0001699150016065687, -0.00016400919412262738, -0.00015810337208677083, -0.0001521975500509143, -0.00014629172801505774, -0.0001403859059792012, -0.00013448008394334465, -0.0001285742619074881, -0.0001226684544235468, -0.00011676263238769025, -0.0001108568103518337, -0.00010495098831597716, -9.904516628012061e-05, -9.313934424426407e-05, -8.723352948436514e-05, -8.132770744850859e-05, -7.542188541265205e-05, -6.951607065275311e-05, -6.361024134093896e-05, -5.770441930508241e-05, -5.179860090720467e-05, -4.589277887134813e-05, -3.998696047347039e-05, -3.4081138437613845e-05, -2.81753164017573e-05, -2.2269498003879562e-05, -1.6363675968023017e-05, -1.0457855751155876e-05, -4.5520346247940324e-06, 1.3537865015678108e-06, 7.259606718434952e-06, 1.3165426935302094e-05, 1.907124897115864e-05, 2.4977067369036376e-05, 3.088288940489292e-05]}, "gradients/decoder.transformer.h.21.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 2.0, 1.0, 1.0, 5.0, 1.0, 4.0, 5.0, 3.0, 3.0, 11.0, 5.0, 14.0, 19.0, 14.0, 11.0, 14.0, 19.0, 18.0, 23.0, 27.0, 45.0, 28.0, 42.0, 33.0, 34.0, 39.0, 42.0, 35.0, 34.0, 50.0, 39.0, 46.0, 37.0, 40.0, 40.0, 24.0, 21.0, 28.0, 24.0, 19.0, 15.0, 19.0, 14.0, 12.0, 9.0, 8.0, 9.0, 7.0, 6.0, 3.0, 4.0, 3.0, 3.0, 2.0, 2.0, 2.0, 0.0, 3.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.7298927307128906e-05, -2.636108547449112e-05, -2.5423243641853333e-05, -2.4485401809215546e-05, -2.354755997657776e-05, -2.2609718143939972e-05, -2.1671876311302185e-05, -2.0734034478664398e-05, -1.979619264602661e-05, -1.8858350813388824e-05, -1.7920508980751038e-05, -1.698266714811325e-05, -1.6044825315475464e-05, -1.5106983482837677e-05, -1.416914165019989e-05, -1.3231299817562103e-05, -1.2293457984924316e-05, -1.135561615228653e-05, -1.0417774319648743e-05, -9.479932487010956e-06, -8.542090654373169e-06, -7.604248821735382e-06, -6.666406989097595e-06, -5.728565156459808e-06, -4.7907233238220215e-06, -3.852881491184235e-06, -2.9150396585464478e-06, -1.977197825908661e-06, -1.039355993270874e-06, -1.0151416063308716e-07, 8.363276720046997e-07, 1.7741695046424866e-06, 2.7120113372802734e-06, 3.6498531699180603e-06, 4.587695002555847e-06, 5.525536835193634e-06, 6.463378667831421e-06, 7.401220500469208e-06, 8.339062333106995e-06, 9.276904165744781e-06, 1.0214745998382568e-05, 1.1152587831020355e-05, 1.2090429663658142e-05, 1.3028271496295929e-05, 1.3966113328933716e-05, 1.4903955161571503e-05, 1.584179699420929e-05, 1.6779638826847076e-05, 1.7717480659484863e-05, 1.865532249212265e-05, 1.9593164324760437e-05, 2.0531006157398224e-05, 2.146884799003601e-05, 2.2406689822673798e-05, 2.3344531655311584e-05, 2.428237348794937e-05, 2.5220215320587158e-05, 2.6158057153224945e-05, 2.7095898985862732e-05, 2.803374081850052e-05, 2.8971582651138306e-05, 2.9909424483776093e-05, 3.084726631641388e-05, 3.1785108149051666e-05, 3.272294998168945e-05]}, "gradients/decoder.transformer.h.21.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0, 2.0, 2.0, 2.0, 3.0, 7.0, 5.0, 4.0, 7.0, 5.0, 5.0, 11.0, 12.0, 8.0, 10.0, 8.0, 18.0, 19.0, 21.0, 33.0, 35.0, 21.0, 32.0, 48.0, 53.0, 48.0, 44.0, 59.0, 45.0, 54.0, 47.0, 51.0, 49.0, 38.0, 29.0, 29.0, 24.0, 17.0, 20.0, 13.0, 20.0, 10.0, 12.0, 6.0, 7.0, 6.0, 4.0, 4.0, 2.0, 0.0, 0.0, 2.0, 3.0, 0.0, 1.0, 1.0, 2.0, 1.0], "bins": [-29.0625, -28.19384765625, -27.3251953125, -26.45654296875, -25.587890625, -24.71923828125, -23.8505859375, -22.98193359375, -22.11328125, -21.24462890625, -20.3759765625, -19.50732421875, -18.638671875, -17.77001953125, -16.9013671875, -16.03271484375, -15.1640625, -14.29541015625, -13.4267578125, -12.55810546875, -11.689453125, -10.82080078125, -9.9521484375, -9.08349609375, -8.21484375, -7.34619140625, -6.4775390625, -5.60888671875, -4.740234375, -3.87158203125, -3.0029296875, -2.13427734375, -1.265625, -0.39697265625, 0.4716796875, 1.34033203125, 2.208984375, 3.07763671875, 3.9462890625, 4.81494140625, 5.68359375, 6.55224609375, 7.4208984375, 8.28955078125, 9.158203125, 10.02685546875, 10.8955078125, 11.76416015625, 12.6328125, 13.50146484375, 14.3701171875, 15.23876953125, 16.107421875, 16.97607421875, 17.8447265625, 18.71337890625, 19.58203125, 20.45068359375, 21.3193359375, 22.18798828125, 23.056640625, 23.92529296875, 24.7939453125, 25.66259765625, 26.53125]}, "gradients/decoder.transformer.h.21.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 3.0, 0.0, 0.0, 2.0, 0.0, 2.0, 3.0, 4.0, 7.0, 4.0, 13.0, 13.0, 27.0, 29.0, 41.0, 59.0, 82.0, 95.0, 149.0, 208.0, 330.0, 543.0, 935.0, 1839.0, 4004.0, 11008.0, 37227.0, 212767.0, 657467.0, 87768.0, 20794.0, 6886.0, 2867.0, 1305.0, 699.0, 428.0, 317.0, 197.0, 128.0, 82.0, 62.0, 52.0, 30.0, 27.0, 14.0, 8.0, 12.0, 7.0, 6.0, 6.0, 5.0, 5.0, 2.0, 2.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-25.953125, -25.095947265625, -24.23876953125, -23.381591796875, -22.5244140625, -21.667236328125, -20.81005859375, -19.952880859375, -19.095703125, -18.238525390625, -17.38134765625, -16.524169921875, -15.6669921875, -14.809814453125, -13.95263671875, -13.095458984375, -12.23828125, -11.381103515625, -10.52392578125, -9.666748046875, -8.8095703125, -7.952392578125, -7.09521484375, -6.238037109375, -5.380859375, -4.523681640625, -3.66650390625, -2.809326171875, -1.9521484375, -1.094970703125, -0.23779296875, 0.619384765625, 1.4765625, 2.333740234375, 3.19091796875, 4.048095703125, 4.9052734375, 5.762451171875, 6.61962890625, 7.476806640625, 8.333984375, 9.191162109375, 10.04833984375, 10.905517578125, 11.7626953125, 12.619873046875, 13.47705078125, 14.334228515625, 15.19140625, 16.048583984375, 16.90576171875, 17.762939453125, 18.6201171875, 19.477294921875, 20.33447265625, 21.191650390625, 22.048828125, 22.906005859375, 23.76318359375, 24.620361328125, 25.4775390625, 26.334716796875, 27.19189453125, 28.049072265625, 28.90625]}, "gradients/decoder.transformer.h.21.attn.c_attn.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0, 1.0, 2.0, 5.0, 5.0, 5.0, 3.0, 7.0, 6.0, 11.0, 6.0, 10.0, 13.0, 22.0, 18.0, 23.0, 30.0, 20.0, 44.0, 24.0, 39.0, 49.0, 57.0, 42.0, 59.0, 105.0, 1910.0, 110.0, 52.0, 38.0, 43.0, 34.0, 36.0, 34.0, 36.0, 24.0, 26.0, 15.0, 20.0, 24.0, 9.0, 8.0, 7.0, 8.0, 6.0, 5.0, 1.0, 5.0, 1.0, 2.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-105.125, -101.986328125, -98.84765625, -95.708984375, -92.5703125, -89.431640625, -86.29296875, -83.154296875, -80.015625, -76.876953125, -73.73828125, -70.599609375, -67.4609375, -64.322265625, -61.18359375, -58.044921875, -54.90625, -51.767578125, -48.62890625, -45.490234375, -42.3515625, -39.212890625, -36.07421875, -32.935546875, -29.796875, -26.658203125, -23.51953125, -20.380859375, -17.2421875, -14.103515625, -10.96484375, -7.826171875, -4.6875, -1.548828125, 1.58984375, 4.728515625, 7.8671875, 11.005859375, 14.14453125, 17.283203125, 20.421875, 23.560546875, 26.69921875, 29.837890625, 32.9765625, 36.115234375, 39.25390625, 42.392578125, 45.53125, 48.669921875, 51.80859375, 54.947265625, 58.0859375, 61.224609375, 64.36328125, 67.501953125, 70.640625, 73.779296875, 76.91796875, 80.056640625, 83.1953125, 86.333984375, 89.47265625, 92.611328125, 95.75]}, "gradients/decoder.transformer.h.21.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 1.0, 2.0, 2.0, 3.0, 2.0, 7.0, 6.0, 11.0, 6.0, 10.0, 20.0, 21.0, 17.0, 24.0, 28.0, 44.0, 60.0, 78.0, 135.0, 253.0, 562.0, 1785.0, 13671.0, 3068146.0, 55654.0, 3322.0, 863.0, 368.0, 177.0, 112.0, 64.0, 50.0, 38.0, 34.0, 24.0, 23.0, 20.0, 13.0, 6.0, 9.0, 11.0, 5.0, 9.0, 3.0, 8.0, 4.0, 4.0, 1.0, 0.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 2.0], "bins": [-166.375, -160.953125, -155.53125, -150.109375, -144.6875, -139.265625, -133.84375, -128.421875, -123.0, -117.578125, -112.15625, -106.734375, -101.3125, -95.890625, -90.46875, -85.046875, -79.625, -74.203125, -68.78125, -63.359375, -57.9375, -52.515625, -47.09375, -41.671875, -36.25, -30.828125, -25.40625, -19.984375, -14.5625, -9.140625, -3.71875, 1.703125, 7.125, 12.546875, 17.96875, 23.390625, 28.8125, 34.234375, 39.65625, 45.078125, 50.5, 55.921875, 61.34375, 66.765625, 72.1875, 77.609375, 83.03125, 88.453125, 93.875, 99.296875, 104.71875, 110.140625, 115.5625, 120.984375, 126.40625, 131.828125, 137.25, 142.671875, 148.09375, 153.515625, 158.9375, 164.359375, 169.78125, 175.203125, 180.625]}, "gradients/decoder.transformer.h.21.ln_1.weight": {"_type": "histogram", "values": [1.0, 7.0, 324.0, 673.0, 13.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-96.2940444946289, -65.51303100585938, -34.732017517089844, -3.9510040283203125, 26.83000946044922, 57.61101531982422, 88.39203643798828, 119.17305755615234, 149.95407104492188, 180.73507690429688, 211.51609802246094, 242.297119140625, 273.078125, 303.859130859375, 334.64013671875, 365.4211730957031, 396.2021789550781, 426.9831848144531, 457.76422119140625, 488.54522705078125, 519.3262329101562, 550.1072387695312, 580.8882446289062, 611.6693115234375, 642.4502563476562, 673.2312622070312, 704.0122680664062, 734.7932739257812, 765.5743408203125, 796.3553466796875, 827.1363525390625, 857.9173583984375, 888.6983642578125, 919.4793701171875, 950.2603759765625, 981.0413818359375, 1011.8223876953125, 1042.6033935546875, 1073.3843994140625, 1104.16552734375, 1134.946533203125, 1165.7275390625, 1196.508544921875, 1227.28955078125, 1258.070556640625, 1288.8515625, 1319.632568359375, 1350.41357421875, 1381.194580078125, 1411.9755859375, 1442.756591796875, 1473.53759765625, 1504.318603515625, 1535.099609375, 1565.880615234375, 1596.66162109375, 1627.442626953125, 1658.2236328125, 1689.004638671875, 1719.78564453125, 1750.566650390625, 1781.34765625, 1812.128662109375, 1842.90966796875, 1873.6907958984375]}, "gradients/decoder.transformer.h.21.ln_1.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 0.0, 4.0, 1.0, 2.0, 3.0, 1.0, 5.0, 8.0, 8.0, 6.0, 11.0, 21.0, 20.0, 19.0, 23.0, 12.0, 20.0, 20.0, 23.0, 22.0, 29.0, 48.0, 37.0, 34.0, 39.0, 45.0, 58.0, 33.0, 48.0, 29.0, 31.0, 34.0, 31.0, 40.0, 36.0, 29.0, 24.0, 24.0, 20.0, 16.0, 11.0, 16.0, 19.0, 18.0, 12.0, 2.0, 8.0, 4.0, 3.0, 3.0, 4.0, 2.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-325.37841796875, -315.216064453125, -305.0536804199219, -294.8913269042969, -284.7289733886719, -274.56658935546875, -264.40423583984375, -254.24188232421875, -244.0795135498047, -233.91714477539062, -223.75479125976562, -213.59242248535156, -203.4300537109375, -193.2677001953125, -183.10533142089844, -172.94296264648438, -162.78060913085938, -152.6182403564453, -142.4558868408203, -132.29351806640625, -122.13115692138672, -111.96879577636719, -101.80642700195312, -91.6440658569336, -81.48170471191406, -71.31934356689453, -61.156978607177734, -50.99461364746094, -40.832252502441406, -30.669891357421875, -20.507526397705078, -10.345161437988281, -0.182830810546875, 9.979532241821289, 20.141895294189453, 30.304258346557617, 40.46662139892578, 50.62898254394531, 60.79134750366211, 70.9537124633789, 81.11607360839844, 91.27843475341797, 101.4407958984375, 111.60316467285156, 121.7655258178711, 131.92788696289062, 142.0902557373047, 152.25262451171875, 162.41497802734375, 172.5773468017578, 182.7397003173828, 192.90206909179688, 203.06442260742188, 213.22679138183594, 223.38916015625, 233.551513671875, 243.71388244628906, 253.87625122070312, 264.0386047363281, 274.2009582519531, 284.36334228515625, 294.52569580078125, 304.68804931640625, 314.8504333496094, 325.0127868652344]}, "gradients/decoder.transformer.h.20.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 3.0, 4.0, 3.0, 5.0, 4.0, 8.0, 7.0, 2.0, 6.0, 9.0, 12.0, 7.0, 7.0, 11.0, 11.0, 22.0, 21.0, 36.0, 22.0, 25.0, 38.0, 41.0, 53.0, 38.0, 46.0, 56.0, 44.0, 57.0, 50.0, 52.0, 42.0, 52.0, 30.0, 22.0, 26.0, 29.0, 18.0, 9.0, 19.0, 15.0, 9.0, 12.0, 4.0, 10.0, 1.0, 5.0, 3.0, 1.0, 2.0, 0.0, 2.0, 3.0, 1.0, 1.0, 1.0, 2.0], "bins": [-29.765625, -28.88720703125, -28.0087890625, -27.13037109375, -26.251953125, -25.37353515625, -24.4951171875, -23.61669921875, -22.73828125, -21.85986328125, -20.9814453125, -20.10302734375, -19.224609375, -18.34619140625, -17.4677734375, -16.58935546875, -15.7109375, -14.83251953125, -13.9541015625, -13.07568359375, -12.197265625, -11.31884765625, -10.4404296875, -9.56201171875, -8.68359375, -7.80517578125, -6.9267578125, -6.04833984375, -5.169921875, -4.29150390625, -3.4130859375, -2.53466796875, -1.65625, -0.77783203125, 0.1005859375, 0.97900390625, 1.857421875, 2.73583984375, 3.6142578125, 4.49267578125, 5.37109375, 6.24951171875, 7.1279296875, 8.00634765625, 8.884765625, 9.76318359375, 10.6416015625, 11.52001953125, 12.3984375, 13.27685546875, 14.1552734375, 15.03369140625, 15.912109375, 16.79052734375, 17.6689453125, 18.54736328125, 19.42578125, 20.30419921875, 21.1826171875, 22.06103515625, 22.939453125, 23.81787109375, 24.6962890625, 25.57470703125, 26.453125]}, "gradients/decoder.transformer.h.20.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 3.0, 1.0, 5.0, 4.0, 3.0, 7.0, 9.0, 4.0, 3.0, 9.0, 8.0, 12.0, 11.0, 11.0, 14.0, 28.0, 54.0, 94.0, 154.0, 272.0, 469.0, 802.0, 1551.0, 3591.0, 9842.0, 323088.0, 3826396.0, 18821.0, 4943.0, 1986.0, 963.0, 475.0, 233.0, 137.0, 85.0, 57.0, 34.0, 23.0, 30.0, 12.0, 13.0, 7.0, 7.0, 7.0, 3.0, 4.0, 4.0, 1.0, 1.0, 1.0, 2.0, 2.0, 1.0, 2.0, 0.0, 2.0], "bins": [-179.125, -173.88671875, -168.6484375, -163.41015625, -158.171875, -152.93359375, -147.6953125, -142.45703125, -137.21875, -131.98046875, -126.7421875, -121.50390625, -116.265625, -111.02734375, -105.7890625, -100.55078125, -95.3125, -90.07421875, -84.8359375, -79.59765625, -74.359375, -69.12109375, -63.8828125, -58.64453125, -53.40625, -48.16796875, -42.9296875, -37.69140625, -32.453125, -27.21484375, -21.9765625, -16.73828125, -11.5, -6.26171875, -1.0234375, 4.21484375, 9.453125, 14.69140625, 19.9296875, 25.16796875, 30.40625, 35.64453125, 40.8828125, 46.12109375, 51.359375, 56.59765625, 61.8359375, 67.07421875, 72.3125, 77.55078125, 82.7890625, 88.02734375, 93.265625, 98.50390625, 103.7421875, 108.98046875, 114.21875, 119.45703125, 124.6953125, 129.93359375, 135.171875, 140.41015625, 145.6484375, 150.88671875, 156.125]}, "gradients/decoder.transformer.h.20.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 2.0, 3.0, 0.0, 0.0, 4.0, 0.0, 1.0, 4.0, 6.0, 6.0, 9.0, 10.0, 10.0, 15.0, 26.0, 25.0, 43.0, 83.0, 244.0, 927.0, 1775.0, 558.0, 160.0, 55.0, 29.0, 32.0, 16.0, 12.0, 5.0, 3.0, 11.0, 4.0, 4.0, 0.0, 2.0, 1.0, 1.0, 1.0, 2.0, 2.0], "bins": [-149.875, -146.4892578125, -143.103515625, -139.7177734375, -136.33203125, -132.9462890625, -129.560546875, -126.1748046875, -122.7890625, -119.4033203125, -116.017578125, -112.6318359375, -109.24609375, -105.8603515625, -102.474609375, -99.0888671875, -95.703125, -92.3173828125, -88.931640625, -85.5458984375, -82.16015625, -78.7744140625, -75.388671875, -72.0029296875, -68.6171875, -65.2314453125, -61.845703125, -58.4599609375, -55.07421875, -51.6884765625, -48.302734375, -44.9169921875, -41.53125, -38.1455078125, -34.759765625, -31.3740234375, -27.98828125, -24.6025390625, -21.216796875, -17.8310546875, -14.4453125, -11.0595703125, -7.673828125, -4.2880859375, -0.90234375, 2.4833984375, 5.869140625, 9.2548828125, 12.640625, 16.0263671875, 19.412109375, 22.7978515625, 26.18359375, 29.5693359375, 32.955078125, 36.3408203125, 39.7265625, 43.1123046875, 46.498046875, 49.8837890625, 53.26953125, 56.6552734375, 60.041015625, 63.4267578125, 66.8125]}, "gradients/decoder.transformer.h.20.mlp.c_fc.weight": {"_type": "histogram", "values": [2.0, 1.0, 2.0, 1.0, 2.0, 4.0, 4.0, 4.0, 9.0, 8.0, 12.0, 13.0, 15.0, 49.0, 77.0, 107.0, 191.0, 331.0, 675.0, 1430.0, 3269.0, 8129.0, 27118.0, 300504.0, 3772522.0, 57065.0, 13871.0, 4810.0, 2004.0, 980.0, 483.0, 233.0, 131.0, 82.0, 42.0, 37.0, 29.0, 15.0, 9.0, 8.0, 6.0, 3.0, 2.0, 5.0, 4.0, 2.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-158.75, -152.19921875, -145.6484375, -139.09765625, -132.546875, -125.99609375, -119.4453125, -112.89453125, -106.34375, -99.79296875, -93.2421875, -86.69140625, -80.140625, -73.58984375, -67.0390625, -60.48828125, -53.9375, -47.38671875, -40.8359375, -34.28515625, -27.734375, -21.18359375, -14.6328125, -8.08203125, -1.53125, 5.01953125, 11.5703125, 18.12109375, 24.671875, 31.22265625, 37.7734375, 44.32421875, 50.875, 57.42578125, 63.9765625, 70.52734375, 77.078125, 83.62890625, 90.1796875, 96.73046875, 103.28125, 109.83203125, 116.3828125, 122.93359375, 129.484375, 136.03515625, 142.5859375, 149.13671875, 155.6875, 162.23828125, 168.7890625, 175.33984375, 181.890625, 188.44140625, 194.9921875, 201.54296875, 208.09375, 214.64453125, 221.1953125, 227.74609375, 234.296875, 240.84765625, 247.3984375, 253.94921875, 260.5]}, "gradients/decoder.transformer.h.20.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 2.0, 1.0, 0.0, 3.0, 0.0, 1.0, 0.0, 2.0, 4.0, 8.0, 4.0, 6.0, 9.0, 16.0, 24.0, 27.0, 46.0, 54.0, 76.0, 160.0, 198.0, 139.0, 72.0, 49.0, 36.0, 28.0, 13.0, 10.0, 7.0, 3.0, 6.0, 1.0, 6.0, 0.0, 4.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-421.85284423828125, -411.6576843261719, -401.4625244140625, -391.267333984375, -381.0721740722656, -370.87701416015625, -360.6818542480469, -350.4866943359375, -340.29150390625, -330.0963439941406, -319.90118408203125, -309.70599365234375, -299.5108337402344, -289.315673828125, -279.1205139160156, -268.92535400390625, -258.7301940917969, -248.5350341796875, -238.33985900878906, -228.1446990966797, -217.94952392578125, -207.75436401367188, -197.5592041015625, -187.36404418945312, -177.1688690185547, -166.9737091064453, -156.77853393554688, -146.5833740234375, -136.38821411132812, -126.19303894042969, -115.99787902832031, -105.8027114868164, -95.6075439453125, -85.4123764038086, -75.21720886230469, -65.02204895019531, -54.826881408691406, -44.6317138671875, -34.43655014038086, -24.24138641357422, -14.046218872070312, -3.851053237915039, 6.344112396240234, 16.539278030395508, 26.73444366455078, 36.92961120605469, 47.12477493286133, 57.31993865966797, 67.51510620117188, 77.71027374267578, 87.90544128417969, 98.10060119628906, 108.29576873779297, 118.49093627929688, 128.68609619140625, 138.88125610351562, 149.07643127441406, 159.27159118652344, 169.46676635742188, 179.66192626953125, 189.85708618164062, 200.05226135253906, 210.24742126464844, 220.44259643554688, 230.63775634765625]}, "gradients/decoder.transformer.h.20.ln_2.bias": {"_type": "histogram", "values": [3.0, 0.0, 0.0, 1.0, 4.0, 0.0, 2.0, 4.0, 6.0, 5.0, 9.0, 10.0, 9.0, 11.0, 13.0, 13.0, 25.0, 31.0, 27.0, 24.0, 31.0, 35.0, 26.0, 34.0, 45.0, 32.0, 50.0, 34.0, 50.0, 53.0, 44.0, 35.0, 50.0, 30.0, 35.0, 30.0, 22.0, 27.0, 26.0, 26.0, 13.0, 23.0, 14.0, 16.0, 10.0, 4.0, 11.0, 4.0, 3.0, 4.0, 2.0, 2.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-172.03485107421875, -165.90151977539062, -159.76817321777344, -153.63482666015625, -147.50149536132812, -141.3681640625, -135.2348175048828, -129.10147094726562, -122.9681396484375, -116.83480072021484, -110.70146179199219, -104.56812286376953, -98.43478393554688, -92.30144500732422, -86.16810607910156, -80.0347671508789, -73.90142822265625, -67.7680892944336, -61.63475036621094, -55.50141143798828, -49.368072509765625, -43.23473358154297, -37.10139465332031, -30.968055725097656, -24.834716796875, -18.701377868652344, -12.568038940429688, -6.434700012207031, -0.301361083984375, 5.831977844238281, 11.965316772460938, 18.098655700683594, 24.23199462890625, 30.365333557128906, 36.49867248535156, 42.63201141357422, 48.765350341796875, 54.89868927001953, 61.03202819824219, 67.16536712646484, 73.2987060546875, 79.43204498291016, 85.56538391113281, 91.69872283935547, 97.83206176757812, 103.96540069580078, 110.09873962402344, 116.2320785522461, 122.36541748046875, 128.49874877929688, 134.63209533691406, 140.76544189453125, 146.89877319335938, 153.0321044921875, 159.1654510498047, 165.29879760742188, 171.43212890625, 177.56546020507812, 183.6988067626953, 189.8321533203125, 195.96548461914062, 202.09881591796875, 208.23216247558594, 214.36550903320312, 220.49884033203125]}, "gradients/decoder.transformer.h.20.crossattention.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 2.0, 2.0, 0.0, 1.0, 6.0, 6.0, 7.0, 2.0, 5.0, 5.0, 8.0, 6.0, 8.0, 9.0, 12.0, 15.0, 14.0, 19.0, 21.0, 25.0, 21.0, 27.0, 36.0, 37.0, 32.0, 46.0, 50.0, 48.0, 43.0, 42.0, 47.0, 70.0, 40.0, 30.0, 43.0, 36.0, 26.0, 25.0, 20.0, 21.0, 14.0, 15.0, 10.0, 22.0, 12.0, 5.0, 3.0, 7.0, 4.0, 3.0, 1.0, 1.0, 4.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0], "bins": [-28.0625, -27.175048828125, -26.28759765625, -25.400146484375, -24.5126953125, -23.625244140625, -22.73779296875, -21.850341796875, -20.962890625, -20.075439453125, -19.18798828125, -18.300537109375, -17.4130859375, -16.525634765625, -15.63818359375, -14.750732421875, -13.86328125, -12.975830078125, -12.08837890625, -11.200927734375, -10.3134765625, -9.426025390625, -8.53857421875, -7.651123046875, -6.763671875, -5.876220703125, -4.98876953125, -4.101318359375, -3.2138671875, -2.326416015625, -1.43896484375, -0.551513671875, 0.3359375, 1.223388671875, 2.11083984375, 2.998291015625, 3.8857421875, 4.773193359375, 5.66064453125, 6.548095703125, 7.435546875, 8.322998046875, 9.21044921875, 10.097900390625, 10.9853515625, 11.872802734375, 12.76025390625, 13.647705078125, 14.53515625, 15.422607421875, 16.31005859375, 17.197509765625, 18.0849609375, 18.972412109375, 19.85986328125, 20.747314453125, 21.634765625, 22.522216796875, 23.40966796875, 24.297119140625, 25.1845703125, 26.072021484375, 26.95947265625, 27.846923828125, 28.734375]}, "gradients/decoder.transformer.h.20.crossattention.c_proj.weight": {"_type": "histogram", "values": [2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 3.0, 2.0, 6.0, 6.0, 13.0, 20.0, 30.0, 51.0, 76.0, 109.0, 197.0, 213.0, 400.0, 544.0, 740.0, 1108.0, 1710.0, 2458.0, 3958.0, 6025.0, 9450.0, 14731.0, 24148.0, 41030.0, 75450.0, 173667.0, 394028.0, 138316.0, 64901.0, 35645.0, 21685.0, 13336.0, 8448.0, 5372.0, 3591.0, 2391.0, 1660.0, 992.0, 657.0, 479.0, 285.0, 197.0, 135.0, 98.0, 76.0, 35.0, 25.0, 27.0, 15.0, 14.0, 7.0, 3.0, 2.0, 2.0, 2.0, 0.0, 0.0, 2.0], "bins": [-1.2783203125, -1.2388458251953125, -1.199371337890625, -1.1598968505859375, -1.12042236328125, -1.0809478759765625, -1.041473388671875, -1.0019989013671875, -0.9625244140625, -0.9230499267578125, -0.883575439453125, -0.8441009521484375, -0.80462646484375, -0.7651519775390625, -0.725677490234375, -0.6862030029296875, -0.646728515625, -0.6072540283203125, -0.567779541015625, -0.5283050537109375, -0.48883056640625, -0.4493560791015625, -0.409881591796875, -0.3704071044921875, -0.3309326171875, -0.2914581298828125, -0.251983642578125, -0.2125091552734375, -0.17303466796875, -0.1335601806640625, -0.094085693359375, -0.0546112060546875, -0.01513671875, 0.0243377685546875, 0.063812255859375, 0.1032867431640625, 0.14276123046875, 0.1822357177734375, 0.221710205078125, 0.2611846923828125, 0.3006591796875, 0.3401336669921875, 0.379608154296875, 0.4190826416015625, 0.45855712890625, 0.4980316162109375, 0.537506103515625, 0.5769805908203125, 0.616455078125, 0.6559295654296875, 0.695404052734375, 0.7348785400390625, 0.77435302734375, 0.8138275146484375, 0.853302001953125, 0.8927764892578125, 0.9322509765625, 0.9717254638671875, 1.011199951171875, 1.0506744384765625, 1.09014892578125, 1.1296234130859375, 1.169097900390625, 1.2085723876953125, 1.248046875]}, "gradients/decoder.transformer.h.20.crossattention.c_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 5.0, 3.0, 4.0, 4.0, 4.0, 5.0, 5.0, 6.0, 9.0, 7.0, 10.0, 18.0, 13.0, 22.0, 24.0, 18.0, 16.0, 28.0, 22.0, 15.0, 35.0, 34.0, 32.0, 37.0, 41.0, 34.0, 43.0, 1061.0, 41.0, 42.0, 34.0, 37.0, 35.0, 39.0, 40.0, 27.0, 33.0, 21.0, 20.0, 16.0, 20.0, 9.0, 14.0, 9.0, 7.0, 11.0, 6.0, 8.0, 4.0, 3.0, 3.0, 1.0, 2.0, 2.0, 0.0, 2.0, 1.0, 1.0, 0.0, 0.0, 2.0], "bins": [-15.4453125, -14.9337158203125, -14.422119140625, -13.9105224609375, -13.39892578125, -12.8873291015625, -12.375732421875, -11.8641357421875, -11.3525390625, -10.8409423828125, -10.329345703125, -9.8177490234375, -9.30615234375, -8.7945556640625, -8.282958984375, -7.7713623046875, -7.259765625, -6.7481689453125, -6.236572265625, -5.7249755859375, -5.21337890625, -4.7017822265625, -4.190185546875, -3.6785888671875, -3.1669921875, -2.6553955078125, -2.143798828125, -1.6322021484375, -1.12060546875, -0.6090087890625, -0.097412109375, 0.4141845703125, 0.92578125, 1.4373779296875, 1.948974609375, 2.4605712890625, 2.97216796875, 3.4837646484375, 3.995361328125, 4.5069580078125, 5.0185546875, 5.5301513671875, 6.041748046875, 6.5533447265625, 7.06494140625, 7.5765380859375, 8.088134765625, 8.5997314453125, 9.111328125, 9.6229248046875, 10.134521484375, 10.6461181640625, 11.15771484375, 11.6693115234375, 12.180908203125, 12.6925048828125, 13.2041015625, 13.7156982421875, 14.227294921875, 14.7388916015625, 15.25048828125, 15.7620849609375, 16.273681640625, 16.7852783203125, 17.296875]}, "gradients/decoder.transformer.h.20.crossattention.c_attn.weight": {"_type": "histogram", "values": [3.0, 2.0, 2.0, 1.0, 7.0, 8.0, 15.0, 15.0, 30.0, 35.0, 44.0, 62.0, 97.0, 144.0, 179.0, 277.0, 388.0, 546.0, 766.0, 1070.0, 1571.0, 2128.0, 3156.0, 4610.0, 6645.0, 9815.0, 14421.0, 21705.0, 33534.0, 52836.0, 86389.0, 167280.0, 1357121.0, 127660.0, 72361.0, 44056.0, 28659.0, 18860.0, 12862.0, 8544.0, 5820.0, 4104.0, 2722.0, 1979.0, 1422.0, 958.0, 643.0, 474.0, 312.0, 221.0, 165.0, 125.0, 89.0, 68.0, 37.0, 33.0, 27.0, 19.0, 10.0, 7.0, 4.0, 5.0, 2.0, 2.0], "bins": [-0.7080078125, -0.6861038208007812, -0.6641998291015625, -0.6422958374023438, -0.620391845703125, -0.5984878540039062, -0.5765838623046875, -0.5546798706054688, -0.53277587890625, -0.5108718872070312, -0.4889678955078125, -0.46706390380859375, -0.445159912109375, -0.42325592041015625, -0.4013519287109375, -0.37944793701171875, -0.3575439453125, -0.33563995361328125, -0.3137359619140625, -0.29183197021484375, -0.269927978515625, -0.24802398681640625, -0.2261199951171875, -0.20421600341796875, -0.18231201171875, -0.16040802001953125, -0.1385040283203125, -0.11660003662109375, -0.094696044921875, -0.07279205322265625, -0.0508880615234375, -0.02898406982421875, -0.007080078125, 0.01482391357421875, 0.0367279052734375, 0.05863189697265625, 0.080535888671875, 0.10243988037109375, 0.1243438720703125, 0.14624786376953125, 0.16815185546875, 0.19005584716796875, 0.2119598388671875, 0.23386383056640625, 0.255767822265625, 0.27767181396484375, 0.2995758056640625, 0.32147979736328125, 0.3433837890625, 0.36528778076171875, 0.3871917724609375, 0.40909576416015625, 0.430999755859375, 0.45290374755859375, 0.4748077392578125, 0.49671173095703125, 0.51861572265625, 0.5405197143554688, 0.5624237060546875, 0.5843276977539062, 0.606231689453125, 0.6281356811523438, 0.6500396728515625, 0.6719436645507812, 0.69384765625]}, "gradients/decoder.transformer.h.20.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 4.0, 3.0, 2.0, 2.0, 3.0, 2.0, 6.0, 5.0, 8.0, 6.0, 12.0, 13.0, 18.0, 13.0, 20.0, 31.0, 32.0, 41.0, 47.0, 59.0, 88.0, 86.0, 103.0, 77.0, 55.0, 48.0, 43.0, 40.0, 24.0, 19.0, 26.0, 22.0, 13.0, 12.0, 6.0, 7.0, 4.0, 3.0, 6.0, 3.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.788969039916992e-05, -6.585568189620972e-05, -6.382167339324951e-05, -6.17876648902893e-05, -5.97536563873291e-05, -5.7719647884368896e-05, -5.568563938140869e-05, -5.3651630878448486e-05, -5.161762237548828e-05, -4.9583613872528076e-05, -4.754960536956787e-05, -4.5515596866607666e-05, -4.348158836364746e-05, -4.1447579860687256e-05, -3.941357135772705e-05, -3.7379562854766846e-05, -3.534555435180664e-05, -3.3311545848846436e-05, -3.127753734588623e-05, -2.9243528842926025e-05, -2.720952033996582e-05, -2.5175511837005615e-05, -2.314150333404541e-05, -2.1107494831085205e-05, -1.9073486328125e-05, -1.7039477825164795e-05, -1.500546932220459e-05, -1.2971460819244385e-05, -1.093745231628418e-05, -8.903443813323975e-06, -6.8694353103637695e-06, -4.8354268074035645e-06, -2.8014183044433594e-06, -7.674098014831543e-07, 1.2665987014770508e-06, 3.300607204437256e-06, 5.334615707397461e-06, 7.368624210357666e-06, 9.402632713317871e-06, 1.1436641216278076e-05, 1.3470649719238281e-05, 1.5504658222198486e-05, 1.753866672515869e-05, 1.9572675228118896e-05, 2.16066837310791e-05, 2.3640692234039307e-05, 2.5674700736999512e-05, 2.7708709239959717e-05, 2.9742717742919922e-05, 3.177672624588013e-05, 3.381073474884033e-05, 3.584474325180054e-05, 3.787875175476074e-05, 3.991276025772095e-05, 4.194676876068115e-05, 4.398077726364136e-05, 4.601478576660156e-05, 4.804879426956177e-05, 5.008280277252197e-05, 5.211681127548218e-05, 5.415081977844238e-05, 5.618482828140259e-05, 5.821883678436279e-05, 6.0252845287323e-05, 6.22868537902832e-05]}, "gradients/decoder.transformer.h.20.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 1.0, 1.0, 1.0, 2.0, 3.0, 4.0, 13.0, 6.0, 4.0, 20.0, 12.0, 24.0, 32.0, 44.0, 64.0, 61.0, 84.0, 121.0, 203.0, 377.0, 1688.0, 22540.0, 956718.0, 62199.0, 3061.0, 523.0, 216.0, 137.0, 104.0, 86.0, 50.0, 29.0, 43.0, 18.0, 21.0, 11.0, 14.0, 7.0, 5.0, 4.0, 4.0, 3.0, 4.0, 2.0, 4.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.0009441375732421875, -0.0009133219718933105, -0.0008825063705444336, -0.0008516907691955566, -0.0008208751678466797, -0.0007900595664978027, -0.0007592439651489258, -0.0007284283638000488, -0.0006976127624511719, -0.0006667971611022949, -0.000635981559753418, -0.000605165958404541, -0.0005743503570556641, -0.0005435347557067871, -0.0005127191543579102, -0.0004819035530090332, -0.00045108795166015625, -0.0004202723503112793, -0.00038945674896240234, -0.0003586411476135254, -0.00032782554626464844, -0.0002970099449157715, -0.00026619434356689453, -0.00023537874221801758, -0.00020456314086914062, -0.00017374753952026367, -0.00014293193817138672, -0.00011211633682250977, -8.130073547363281e-05, -5.048513412475586e-05, -1.9669532775878906e-05, 1.1146068572998047e-05, 4.1961669921875e-05, 7.277727127075195e-05, 0.0001035928726196289, 0.00013440847396850586, 0.0001652240753173828, 0.00019603967666625977, 0.00022685527801513672, 0.00025767087936401367, 0.0002884864807128906, 0.0003193020820617676, 0.00035011768341064453, 0.0003809332847595215, 0.00041174888610839844, 0.0004425644874572754, 0.00047338008880615234, 0.0005041956901550293, 0.0005350112915039062, 0.0005658268928527832, 0.0005966424942016602, 0.0006274580955505371, 0.0006582736968994141, 0.000689089298248291, 0.000719904899597168, 0.0007507205009460449, 0.0007815361022949219, 0.0008123517036437988, 0.0008431673049926758, 0.0008739829063415527, 0.0009047985076904297, 0.0009356141090393066, 0.0009664297103881836, 0.0009972453117370605, 0.0010280609130859375]}, "gradients/decoder.transformer.h.20.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 3.0, 1.0, 3.0, 12.0, 23.0, 40.0, 60.0, 131.0, 342.0, 195.0, 81.0, 54.0, 29.0, 17.0, 14.0, 8.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.198625513003208e-05, -4.95935782964807e-05, -4.7200897824950516e-05, -4.480822099139914e-05, -4.2415540519868955e-05, -4.002286368631758e-05, -3.7630183214787394e-05, -3.523750638123602e-05, -3.284482954768464e-05, -3.045215089514386e-05, -2.805947224260308e-05, -2.56667954090517e-05, -2.3274114937521517e-05, -2.088143810397014e-05, -1.848875945142936e-05, -1.609608079888858e-05, -1.3703400327358395e-05, -1.1310721674817614e-05, -8.918043022276834e-06, -6.525365279230755e-06, -4.132686626689974e-06, -1.7400079741491936e-06, 6.526697688968852e-07, 3.045348421437666e-06, 5.4380270739784464e-06, 7.830705726519227e-06, 1.0223384379060008e-05, 1.2616062122106086e-05, 1.5008740774646867e-05, 1.740142033668235e-05, 1.9794097170233727e-05, 2.2186775822774507e-05, 2.457945083733648e-05, 2.697212948987726e-05, 2.9364808142418042e-05, 3.175748497596942e-05, 3.41501654474996e-05, 3.654284228105098e-05, 3.893551911460236e-05, 4.132819958613254e-05, 4.3720880057662725e-05, 4.61135568912141e-05, 4.850623736274429e-05, 5.0898914196295664e-05, 5.329159466782585e-05, 5.5684271501377225e-05, 5.80769483349286e-05, 6.0469628806458786e-05, 6.286230927798897e-05, 6.525498611154035e-05, 6.764766294509172e-05, 7.004034705460072e-05, 7.243302388815209e-05, 7.482570072170347e-05, 7.721837755525485e-05, 7.961105438880622e-05, 8.20037312223576e-05, 8.439640805590898e-05, 8.678908488946036e-05, 8.918176899896935e-05, 9.157444583252072e-05, 9.39671226660721e-05, 9.635979949962348e-05, 9.875248360913247e-05, 0.00010114516044268385]}, "gradients/decoder.transformer.h.20.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 5.0, 3.0, 1.0, 6.0, 1.0, 7.0, 6.0, 5.0, 9.0, 6.0, 14.0, 12.0, 16.0, 25.0, 21.0, 25.0, 24.0, 22.0, 37.0, 40.0, 38.0, 47.0, 38.0, 39.0, 39.0, 39.0, 41.0, 36.0, 42.0, 39.0, 37.0, 26.0, 37.0, 30.0, 23.0, 23.0, 32.0, 26.0, 22.0, 15.0, 11.0, 8.0, 11.0, 6.0, 9.0, 4.0, 6.0, 4.0, 4.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 2.0, 1.0], "bins": [-2.9087066650390625e-05, -2.820882946252823e-05, -2.7330592274665833e-05, -2.6452355086803436e-05, -2.557411789894104e-05, -2.4695880711078644e-05, -2.3817643523216248e-05, -2.293940633535385e-05, -2.2061169147491455e-05, -2.118293195962906e-05, -2.0304694771766663e-05, -1.9426457583904266e-05, -1.854822039604187e-05, -1.7669983208179474e-05, -1.6791746020317078e-05, -1.591350883245468e-05, -1.5035271644592285e-05, -1.4157034456729889e-05, -1.3278797268867493e-05, -1.2400560081005096e-05, -1.15223228931427e-05, -1.0644085705280304e-05, -9.765848517417908e-06, -8.887611329555511e-06, -8.009374141693115e-06, -7.131136953830719e-06, -6.252899765968323e-06, -5.3746625781059265e-06, -4.49642539024353e-06, -3.618188202381134e-06, -2.739951014518738e-06, -1.8617138266563416e-06, -9.834766387939453e-07, -1.0523945093154907e-07, 7.729977369308472e-07, 1.6512349247932434e-06, 2.5294721126556396e-06, 3.407709300518036e-06, 4.285946488380432e-06, 5.164183676242828e-06, 6.042420864105225e-06, 6.920658051967621e-06, 7.798895239830017e-06, 8.677132427692413e-06, 9.55536961555481e-06, 1.0433606803417206e-05, 1.1311843991279602e-05, 1.2190081179141998e-05, 1.3068318367004395e-05, 1.394655555486679e-05, 1.4824792742729187e-05, 1.5703029930591583e-05, 1.658126711845398e-05, 1.7459504306316376e-05, 1.8337741494178772e-05, 1.9215978682041168e-05, 2.0094215869903564e-05, 2.097245305776596e-05, 2.1850690245628357e-05, 2.2728927433490753e-05, 2.360716462135315e-05, 2.4485401809215546e-05, 2.5363638997077942e-05, 2.6241876184940338e-05, 2.7120113372802734e-05]}, "gradients/decoder.transformer.h.20.attn.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 2.0, 2.0, 0.0, 1.0, 6.0, 6.0, 7.0, 2.0, 5.0, 5.0, 8.0, 6.0, 8.0, 9.0, 12.0, 15.0, 14.0, 19.0, 21.0, 25.0, 21.0, 27.0, 36.0, 37.0, 32.0, 46.0, 50.0, 48.0, 43.0, 42.0, 47.0, 70.0, 40.0, 30.0, 43.0, 36.0, 26.0, 25.0, 20.0, 21.0, 14.0, 15.0, 10.0, 22.0, 12.0, 5.0, 3.0, 7.0, 4.0, 3.0, 1.0, 1.0, 4.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0], "bins": [-28.0625, -27.175048828125, -26.28759765625, -25.400146484375, -24.5126953125, -23.625244140625, -22.73779296875, -21.850341796875, -20.962890625, -20.075439453125, -19.18798828125, -18.300537109375, -17.4130859375, -16.525634765625, -15.63818359375, -14.750732421875, -13.86328125, -12.975830078125, -12.08837890625, -11.200927734375, -10.3134765625, -9.426025390625, -8.53857421875, -7.651123046875, -6.763671875, -5.876220703125, -4.98876953125, -4.101318359375, -3.2138671875, -2.326416015625, -1.43896484375, -0.551513671875, 0.3359375, 1.223388671875, 2.11083984375, 2.998291015625, 3.8857421875, 4.773193359375, 5.66064453125, 6.548095703125, 7.435546875, 8.322998046875, 9.21044921875, 10.097900390625, 10.9853515625, 11.872802734375, 12.76025390625, 13.647705078125, 14.53515625, 15.422607421875, 16.31005859375, 17.197509765625, 18.0849609375, 18.972412109375, 19.85986328125, 20.747314453125, 21.634765625, 22.522216796875, 23.40966796875, 24.297119140625, 25.1845703125, 26.072021484375, 26.95947265625, 27.846923828125, 28.734375]}, "gradients/decoder.transformer.h.20.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 3.0, 4.0, 2.0, 4.0, 2.0, 9.0, 5.0, 8.0, 14.0, 10.0, 23.0, 37.0, 33.0, 53.0, 59.0, 76.0, 119.0, 171.0, 206.0, 327.0, 539.0, 820.0, 1371.0, 2531.0, 4958.0, 10593.0, 24702.0, 69137.0, 295966.0, 479075.0, 98380.0, 32107.0, 13325.0, 6165.0, 3160.0, 1712.0, 949.0, 630.0, 358.0, 249.0, 189.0, 135.0, 106.0, 57.0, 52.0, 38.0, 21.0, 20.0, 10.0, 18.0, 10.0, 5.0, 5.0, 2.0, 5.0, 1.0, 1.0, 2.0, 2.0, 0.0, 3.0], "bins": [-15.640625, -15.154052734375, -14.66748046875, -14.180908203125, -13.6943359375, -13.207763671875, -12.72119140625, -12.234619140625, -11.748046875, -11.261474609375, -10.77490234375, -10.288330078125, -9.8017578125, -9.315185546875, -8.82861328125, -8.342041015625, -7.85546875, -7.368896484375, -6.88232421875, -6.395751953125, -5.9091796875, -5.422607421875, -4.93603515625, -4.449462890625, -3.962890625, -3.476318359375, -2.98974609375, -2.503173828125, -2.0166015625, -1.530029296875, -1.04345703125, -0.556884765625, -0.0703125, 0.416259765625, 0.90283203125, 1.389404296875, 1.8759765625, 2.362548828125, 2.84912109375, 3.335693359375, 3.822265625, 4.308837890625, 4.79541015625, 5.281982421875, 5.7685546875, 6.255126953125, 6.74169921875, 7.228271484375, 7.71484375, 8.201416015625, 8.68798828125, 9.174560546875, 9.6611328125, 10.147705078125, 10.63427734375, 11.120849609375, 11.607421875, 12.093994140625, 12.58056640625, 13.067138671875, 13.5537109375, 14.040283203125, 14.52685546875, 15.013427734375, 15.5]}, "gradients/decoder.transformer.h.20.attn.c_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 6.0, 2.0, 7.0, 6.0, 7.0, 11.0, 8.0, 10.0, 20.0, 29.0, 21.0, 21.0, 37.0, 37.0, 35.0, 33.0, 48.0, 54.0, 45.0, 83.0, 1981.0, 91.0, 58.0, 65.0, 43.0, 37.0, 52.0, 28.0, 25.0, 25.0, 25.0, 22.0, 22.0, 18.0, 12.0, 12.0, 12.0, 5.0, 3.0, 2.0, 2.0, 3.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-103.5625, -99.9248046875, -96.287109375, -92.6494140625, -89.01171875, -85.3740234375, -81.736328125, -78.0986328125, -74.4609375, -70.8232421875, -67.185546875, -63.5478515625, -59.91015625, -56.2724609375, -52.634765625, -48.9970703125, -45.359375, -41.7216796875, -38.083984375, -34.4462890625, -30.80859375, -27.1708984375, -23.533203125, -19.8955078125, -16.2578125, -12.6201171875, -8.982421875, -5.3447265625, -1.70703125, 1.9306640625, 5.568359375, 9.2060546875, 12.84375, 16.4814453125, 20.119140625, 23.7568359375, 27.39453125, 31.0322265625, 34.669921875, 38.3076171875, 41.9453125, 45.5830078125, 49.220703125, 52.8583984375, 56.49609375, 60.1337890625, 63.771484375, 67.4091796875, 71.046875, 74.6845703125, 78.322265625, 81.9599609375, 85.59765625, 89.2353515625, 92.873046875, 96.5107421875, 100.1484375, 103.7861328125, 107.423828125, 111.0615234375, 114.69921875, 118.3369140625, 121.974609375, 125.6123046875, 129.25]}, "gradients/decoder.transformer.h.20.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 3.0, 2.0, 2.0, 3.0, 5.0, 11.0, 11.0, 14.0, 18.0, 21.0, 23.0, 28.0, 37.0, 31.0, 56.0, 113.0, 157.0, 244.0, 442.0, 1193.0, 9487.0, 3113266.0, 17917.0, 1312.0, 521.0, 264.0, 159.0, 91.0, 79.0, 48.0, 25.0, 26.0, 31.0, 22.0, 9.0, 9.0, 10.0, 9.0, 6.0, 6.0, 4.0, 4.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 2.0], "bins": [-196.125, -190.611328125, -185.09765625, -179.583984375, -174.0703125, -168.556640625, -163.04296875, -157.529296875, -152.015625, -146.501953125, -140.98828125, -135.474609375, -129.9609375, -124.447265625, -118.93359375, -113.419921875, -107.90625, -102.392578125, -96.87890625, -91.365234375, -85.8515625, -80.337890625, -74.82421875, -69.310546875, -63.796875, -58.283203125, -52.76953125, -47.255859375, -41.7421875, -36.228515625, -30.71484375, -25.201171875, -19.6875, -14.173828125, -8.66015625, -3.146484375, 2.3671875, 7.880859375, 13.39453125, 18.908203125, 24.421875, 29.935546875, 35.44921875, 40.962890625, 46.4765625, 51.990234375, 57.50390625, 63.017578125, 68.53125, 74.044921875, 79.55859375, 85.072265625, 90.5859375, 96.099609375, 101.61328125, 107.126953125, 112.640625, 118.154296875, 123.66796875, 129.181640625, 134.6953125, 140.208984375, 145.72265625, 151.236328125, 156.75]}, "gradients/decoder.transformer.h.20.ln_1.weight": {"_type": "histogram", "values": [1.0, 1.0, 267.0, 749.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-116.88456726074219, -79.25813293457031, -41.631690979003906, -4.0052490234375, 33.621185302734375, 71.24761962890625, 108.87406921386719, 146.50050354003906, 184.12693786621094, 221.7533721923828, 259.37982177734375, 297.0062561035156, 334.6326904296875, 372.2591247558594, 409.88555908203125, 447.51202392578125, 485.138427734375, 522.764892578125, 560.3912963867188, 598.0177612304688, 635.6441650390625, 673.2706298828125, 710.8970947265625, 748.5234985351562, 786.1499633789062, 823.7764282226562, 861.40283203125, 899.029296875, 936.6557006835938, 974.2821655273438, 1011.9085693359375, 1049.5350341796875, 1087.1614990234375, 1124.7879638671875, 1162.4144287109375, 1200.040771484375, 1237.667236328125, 1275.293701171875, 1312.920166015625, 1350.546630859375, 1388.1729736328125, 1425.7994384765625, 1463.4259033203125, 1501.05224609375, 1538.6787109375, 1576.30517578125, 1613.931640625, 1651.55810546875, 1689.1845703125, 1726.81103515625, 1764.4375, 1802.0638427734375, 1839.6903076171875, 1877.3167724609375, 1914.9432373046875, 1952.569580078125, 1990.196044921875, 2027.822509765625, 2065.448974609375, 2103.075439453125, 2140.701904296875, 2178.328125, 2215.95458984375, 2253.5810546875, 2291.20751953125]}, "gradients/decoder.transformer.h.20.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 1.0, 0.0, 2.0, 5.0, 7.0, 9.0, 7.0, 15.0, 17.0, 12.0, 18.0, 18.0, 27.0, 32.0, 33.0, 37.0, 47.0, 36.0, 41.0, 41.0, 38.0, 42.0, 41.0, 49.0, 52.0, 39.0, 49.0, 40.0, 38.0, 23.0, 27.0, 27.0, 29.0, 24.0, 15.0, 12.0, 6.0, 16.0, 7.0, 9.0, 5.0, 6.0, 4.0, 4.0, 2.0, 4.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0], "bins": [-378.63482666015625, -367.45819091796875, -356.2815246582031, -345.1048889160156, -333.9282531738281, -322.7515869140625, -311.574951171875, -300.3983154296875, -289.2216796875, -278.0450439453125, -266.8683776855469, -255.69174194335938, -244.51510620117188, -233.3384552001953, -222.16180419921875, -210.98516845703125, -199.80850219726562, -188.63185119628906, -177.45521545410156, -166.278564453125, -155.1019287109375, -143.92527770996094, -132.74862670898438, -121.57198333740234, -110.39533996582031, -99.21869659423828, -88.04205322265625, -76.86540222167969, -65.68875885009766, -54.512115478515625, -43.33546447753906, -32.15882110595703, -20.982147216796875, -9.805501937866211, 1.3711433410644531, 12.54779052734375, 23.72443389892578, 34.90107727050781, 46.077728271484375, 57.254371643066406, 68.43101501464844, 79.60765838623047, 90.7843017578125, 101.96095275878906, 113.1375961303711, 124.31423950195312, 135.4908905029297, 146.66754150390625, 157.84417724609375, 169.0208282470703, 180.1974639892578, 191.37411499023438, 202.55075073242188, 213.72740173339844, 224.904052734375, 236.0806884765625, 247.25733947753906, 258.4339904785156, 269.6106262207031, 280.78729248046875, 291.96392822265625, 303.14056396484375, 314.31719970703125, 325.4938659667969, 336.6705017089844]}, "gradients/decoder.transformer.h.19.mlp.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 1.0, 2.0, 1.0, 1.0, 6.0, 4.0, 6.0, 4.0, 5.0, 4.0, 6.0, 8.0, 7.0, 7.0, 13.0, 18.0, 9.0, 17.0, 27.0, 24.0, 24.0, 18.0, 36.0, 36.0, 35.0, 48.0, 44.0, 54.0, 45.0, 34.0, 43.0, 64.0, 52.0, 33.0, 40.0, 39.0, 23.0, 29.0, 18.0, 18.0, 19.0, 14.0, 14.0, 13.0, 19.0, 4.0, 5.0, 7.0, 5.0, 3.0, 1.0, 2.0, 2.0, 2.0, 1.0, 2.0, 0.0, 0.0, 1.0, 1.0, 3.0], "bins": [-28.390625, -27.498046875, -26.60546875, -25.712890625, -24.8203125, -23.927734375, -23.03515625, -22.142578125, -21.25, -20.357421875, -19.46484375, -18.572265625, -17.6796875, -16.787109375, -15.89453125, -15.001953125, -14.109375, -13.216796875, -12.32421875, -11.431640625, -10.5390625, -9.646484375, -8.75390625, -7.861328125, -6.96875, -6.076171875, -5.18359375, -4.291015625, -3.3984375, -2.505859375, -1.61328125, -0.720703125, 0.171875, 1.064453125, 1.95703125, 2.849609375, 3.7421875, 4.634765625, 5.52734375, 6.419921875, 7.3125, 8.205078125, 9.09765625, 9.990234375, 10.8828125, 11.775390625, 12.66796875, 13.560546875, 14.453125, 15.345703125, 16.23828125, 17.130859375, 18.0234375, 18.916015625, 19.80859375, 20.701171875, 21.59375, 22.486328125, 23.37890625, 24.271484375, 25.1640625, 26.056640625, 26.94921875, 27.841796875, 28.734375]}, "gradients/decoder.transformer.h.19.mlp.c_proj.weight": {"_type": "histogram", "values": [3.0, 1.0, 2.0, 4.0, 5.0, 5.0, 16.0, 19.0, 19.0, 23.0, 29.0, 58.0, 61.0, 81.0, 110.0, 144.0, 201.0, 227.0, 371.0, 454.0, 679.0, 912.0, 1266.0, 1772.0, 2546.0, 3801.0, 6014.0, 10273.0, 36984.0, 513258.0, 3348034.0, 218541.0, 23753.0, 8804.0, 5140.0, 3390.0, 2181.0, 1500.0, 1018.0, 693.0, 499.0, 349.0, 279.0, 216.0, 135.0, 117.0, 73.0, 50.0, 47.0, 32.0, 26.0, 14.0, 20.0, 10.0, 10.0, 7.0, 8.0, 3.0, 2.0, 7.0, 4.0, 3.0, 0.0, 1.0], "bins": [-62.9375, -60.86328125, -58.7890625, -56.71484375, -54.640625, -52.56640625, -50.4921875, -48.41796875, -46.34375, -44.26953125, -42.1953125, -40.12109375, -38.046875, -35.97265625, -33.8984375, -31.82421875, -29.75, -27.67578125, -25.6015625, -23.52734375, -21.453125, -19.37890625, -17.3046875, -15.23046875, -13.15625, -11.08203125, -9.0078125, -6.93359375, -4.859375, -2.78515625, -0.7109375, 1.36328125, 3.4375, 5.51171875, 7.5859375, 9.66015625, 11.734375, 13.80859375, 15.8828125, 17.95703125, 20.03125, 22.10546875, 24.1796875, 26.25390625, 28.328125, 30.40234375, 32.4765625, 34.55078125, 36.625, 38.69921875, 40.7734375, 42.84765625, 44.921875, 46.99609375, 49.0703125, 51.14453125, 53.21875, 55.29296875, 57.3671875, 59.44140625, 61.515625, 63.58984375, 65.6640625, 67.73828125, 69.8125]}, "gradients/decoder.transformer.h.19.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 2.0, 6.0, 3.0, 5.0, 4.0, 5.0, 10.0, 11.0, 17.0, 12.0, 22.0, 31.0, 40.0, 77.0, 227.0, 737.0, 1651.0, 772.0, 207.0, 78.0, 42.0, 27.0, 11.0, 12.0, 9.0, 13.0, 7.0, 9.0, 7.0, 4.0, 2.0, 1.0, 6.0, 7.0, 2.0, 4.0, 3.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-114.375, -111.2412109375, -108.107421875, -104.9736328125, -101.83984375, -98.7060546875, -95.572265625, -92.4384765625, -89.3046875, -86.1708984375, -83.037109375, -79.9033203125, -76.76953125, -73.6357421875, -70.501953125, -67.3681640625, -64.234375, -61.1005859375, -57.966796875, -54.8330078125, -51.69921875, -48.5654296875, -45.431640625, -42.2978515625, -39.1640625, -36.0302734375, -32.896484375, -29.7626953125, -26.62890625, -23.4951171875, -20.361328125, -17.2275390625, -14.09375, -10.9599609375, -7.826171875, -4.6923828125, -1.55859375, 1.5751953125, 4.708984375, 7.8427734375, 10.9765625, 14.1103515625, 17.244140625, 20.3779296875, 23.51171875, 26.6455078125, 29.779296875, 32.9130859375, 36.046875, 39.1806640625, 42.314453125, 45.4482421875, 48.58203125, 51.7158203125, 54.849609375, 57.9833984375, 61.1171875, 64.2509765625, 67.384765625, 70.5185546875, 73.65234375, 76.7861328125, 79.919921875, 83.0537109375, 86.1875]}, "gradients/decoder.transformer.h.19.mlp.c_fc.weight": {"_type": "histogram", "values": [3.0, 1.0, 0.0, 4.0, 1.0, 4.0, 8.0, 7.0, 14.0, 13.0, 19.0, 28.0, 57.0, 66.0, 85.0, 116.0, 210.0, 318.0, 479.0, 740.0, 1116.0, 1710.0, 2698.0, 4330.0, 7267.0, 13141.0, 27867.0, 97445.0, 3269291.0, 664405.0, 55003.0, 20887.0, 10731.0, 6088.0, 3642.0, 2311.0, 1420.0, 916.0, 608.0, 401.0, 281.0, 183.0, 128.0, 88.0, 58.0, 32.0, 20.0, 20.0, 14.0, 9.0, 4.0, 5.0, 3.0, 1.0, 2.0, 3.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-108.75, -104.9765625, -101.203125, -97.4296875, -93.65625, -89.8828125, -86.109375, -82.3359375, -78.5625, -74.7890625, -71.015625, -67.2421875, -63.46875, -59.6953125, -55.921875, -52.1484375, -48.375, -44.6015625, -40.828125, -37.0546875, -33.28125, -29.5078125, -25.734375, -21.9609375, -18.1875, -14.4140625, -10.640625, -6.8671875, -3.09375, 0.6796875, 4.453125, 8.2265625, 12.0, 15.7734375, 19.546875, 23.3203125, 27.09375, 30.8671875, 34.640625, 38.4140625, 42.1875, 45.9609375, 49.734375, 53.5078125, 57.28125, 61.0546875, 64.828125, 68.6015625, 72.375, 76.1484375, 79.921875, 83.6953125, 87.46875, 91.2421875, 95.015625, 98.7890625, 102.5625, 106.3359375, 110.109375, 113.8828125, 117.65625, 121.4296875, 125.203125, 128.9765625, 132.75]}, "gradients/decoder.transformer.h.19.ln_2.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 0.0, 0.0, 5.0, 2.0, 3.0, 1.0, 4.0, 8.0, 12.0, 18.0, 13.0, 24.0, 26.0, 44.0, 73.0, 129.0, 183.0, 155.0, 92.0, 63.0, 32.0, 31.0, 23.0, 18.0, 16.0, 15.0, 10.0, 5.0, 1.0, 2.0, 0.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-270.86236572265625, -259.8997802734375, -248.93719482421875, -237.97459411621094, -227.0120086669922, -216.04942321777344, -205.08682250976562, -194.12423706054688, -183.16165161132812, -172.19906616210938, -161.23648071289062, -150.2738800048828, -139.31129455566406, -128.3487091064453, -117.38611602783203, -106.42352294921875, -95.4609375, -84.49835205078125, -73.53575897216797, -62.57316970825195, -51.61058044433594, -40.64799118041992, -29.685401916503906, -18.722808837890625, -7.760223388671875, 3.2023658752441406, 14.164955139160156, 25.127544403076172, 36.09013366699219, 47.0527229309082, 58.01531219482422, 68.9779052734375, 79.94046020507812, 90.90304565429688, 101.86563873291016, 112.82823181152344, 123.79081726074219, 134.75340270996094, 145.71600341796875, 156.6785888671875, 167.64117431640625, 178.603759765625, 189.56634521484375, 200.52894592285156, 211.4915313720703, 222.45411682128906, 233.41671752929688, 244.37930297851562, 255.34188842773438, 266.3044738769531, 277.2670593261719, 288.2296447753906, 299.1922607421875, 310.15484619140625, 321.117431640625, 332.08001708984375, 343.0426025390625, 354.00518798828125, 364.9677734375, 375.93035888671875, 386.8929443359375, 397.8555603027344, 408.8181457519531, 419.7807312011719, 430.7433166503906]}, "gradients/decoder.transformer.h.19.ln_2.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0, 4.0, 7.0, 7.0, 10.0, 6.0, 11.0, 5.0, 12.0, 18.0, 10.0, 9.0, 18.0, 18.0, 23.0, 26.0, 18.0, 38.0, 27.0, 29.0, 44.0, 44.0, 32.0, 32.0, 28.0, 43.0, 30.0, 38.0, 32.0, 41.0, 30.0, 22.0, 35.0, 29.0, 33.0, 29.0, 25.0, 14.0, 27.0, 21.0, 17.0, 10.0, 9.0, 7.0, 10.0, 7.0, 9.0, 6.0, 4.0, 4.0, 5.0, 1.0, 0.0, 1.0, 1.0, 1.0], "bins": [-188.9227752685547, -183.27813720703125, -177.63348388671875, -171.98883056640625, -166.3441925048828, -160.69955444335938, -155.05490112304688, -149.41024780273438, -143.76560974121094, -138.1209716796875, -132.476318359375, -126.83167266845703, -121.18702697753906, -115.5423812866211, -109.89773559570312, -104.25308990478516, -98.60844421386719, -92.96379852294922, -87.31915283203125, -81.67450714111328, -76.02986145019531, -70.38521575927734, -64.74057006835938, -59.095924377441406, -53.45127868652344, -47.80663299560547, -42.1619873046875, -36.51734161376953, -30.872695922851562, -25.228050231933594, -19.583404541015625, -13.938758850097656, -8.29412841796875, -2.6494827270507812, 2.9951629638671875, 8.639808654785156, 14.284454345703125, 19.929100036621094, 25.573745727539062, 31.21839141845703, 36.863037109375, 42.50768280029297, 48.15232849121094, 53.796974182128906, 59.441619873046875, 65.08626556396484, 70.73091125488281, 76.37555694580078, 82.02020263671875, 87.66484832763672, 93.30949401855469, 98.95413970947266, 104.59878540039062, 110.2434310913086, 115.88807678222656, 121.53272247314453, 127.1773681640625, 132.822021484375, 138.46665954589844, 144.11129760742188, 149.75595092773438, 155.40060424804688, 161.0452423095703, 166.68988037109375, 172.33453369140625]}, "gradients/decoder.transformer.h.19.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 2.0, 3.0, 2.0, 7.0, 6.0, 7.0, 8.0, 2.0, 6.0, 8.0, 8.0, 7.0, 17.0, 16.0, 17.0, 17.0, 17.0, 37.0, 28.0, 26.0, 32.0, 45.0, 48.0, 31.0, 49.0, 53.0, 46.0, 56.0, 40.0, 43.0, 31.0, 39.0, 39.0, 37.0, 25.0, 20.0, 19.0, 21.0, 15.0, 10.0, 16.0, 9.0, 12.0, 10.0, 8.0, 5.0, 5.0, 3.0, 3.0, 1.0, 1.0, 2.0, 1.0, 2.0, 1.0, 0.0, 0.0, 1.0], "bins": [-29.8125, -28.884033203125, -27.95556640625, -27.027099609375, -26.0986328125, -25.170166015625, -24.24169921875, -23.313232421875, -22.384765625, -21.456298828125, -20.52783203125, -19.599365234375, -18.6708984375, -17.742431640625, -16.81396484375, -15.885498046875, -14.95703125, -14.028564453125, -13.10009765625, -12.171630859375, -11.2431640625, -10.314697265625, -9.38623046875, -8.457763671875, -7.529296875, -6.600830078125, -5.67236328125, -4.743896484375, -3.8154296875, -2.886962890625, -1.95849609375, -1.030029296875, -0.1015625, 0.826904296875, 1.75537109375, 2.683837890625, 3.6123046875, 4.540771484375, 5.46923828125, 6.397705078125, 7.326171875, 8.254638671875, 9.18310546875, 10.111572265625, 11.0400390625, 11.968505859375, 12.89697265625, 13.825439453125, 14.75390625, 15.682373046875, 16.61083984375, 17.539306640625, 18.4677734375, 19.396240234375, 20.32470703125, 21.253173828125, 22.181640625, 23.110107421875, 24.03857421875, 24.967041015625, 25.8955078125, 26.823974609375, 27.75244140625, 28.680908203125, 29.609375]}, "gradients/decoder.transformer.h.19.crossattention.c_proj.weight": {"_type": "histogram", "values": [2.0, 3.0, 2.0, 4.0, 9.0, 8.0, 11.0, 15.0, 26.0, 28.0, 68.0, 82.0, 117.0, 141.0, 217.0, 293.0, 435.0, 638.0, 917.0, 1279.0, 1933.0, 2687.0, 3753.0, 5241.0, 7696.0, 10953.0, 16064.0, 24807.0, 38175.0, 61607.0, 108594.0, 293793.0, 212464.0, 95006.0, 55759.0, 34738.0, 22025.0, 15184.0, 10272.0, 7021.0, 4847.0, 3496.0, 2394.0, 1673.0, 1214.0, 873.0, 582.0, 434.0, 302.0, 225.0, 129.0, 113.0, 62.0, 54.0, 36.0, 17.0, 22.0, 12.0, 8.0, 6.0, 3.0, 2.0, 3.0, 2.0], "bins": [-1.0283203125, -0.9960784912109375, -0.963836669921875, -0.9315948486328125, -0.89935302734375, -0.8671112060546875, -0.834869384765625, -0.8026275634765625, -0.7703857421875, -0.7381439208984375, -0.705902099609375, -0.6736602783203125, -0.64141845703125, -0.6091766357421875, -0.576934814453125, -0.5446929931640625, -0.512451171875, -0.4802093505859375, -0.447967529296875, -0.4157257080078125, -0.38348388671875, -0.3512420654296875, -0.319000244140625, -0.2867584228515625, -0.2545166015625, -0.2222747802734375, -0.190032958984375, -0.1577911376953125, -0.12554931640625, -0.0933074951171875, -0.061065673828125, -0.0288238525390625, 0.00341796875, 0.0356597900390625, 0.067901611328125, 0.1001434326171875, 0.13238525390625, 0.1646270751953125, 0.196868896484375, 0.2291107177734375, 0.2613525390625, 0.2935943603515625, 0.325836181640625, 0.3580780029296875, 0.39031982421875, 0.4225616455078125, 0.454803466796875, 0.4870452880859375, 0.519287109375, 0.5515289306640625, 0.583770751953125, 0.6160125732421875, 0.64825439453125, 0.6804962158203125, 0.712738037109375, 0.7449798583984375, 0.7772216796875, 0.8094635009765625, 0.841705322265625, 0.8739471435546875, 0.90618896484375, 0.9384307861328125, 0.970672607421875, 1.0029144287109375, 1.03515625]}, "gradients/decoder.transformer.h.19.crossattention.c_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 3.0, 4.0, 1.0, 2.0, 3.0, 3.0, 7.0, 4.0, 6.0, 11.0, 16.0, 12.0, 14.0, 13.0, 11.0, 20.0, 19.0, 23.0, 25.0, 17.0, 18.0, 21.0, 31.0, 35.0, 41.0, 31.0, 37.0, 1064.0, 34.0, 35.0, 39.0, 47.0, 37.0, 28.0, 31.0, 44.0, 30.0, 34.0, 21.0, 24.0, 26.0, 24.0, 12.0, 15.0, 16.0, 8.0, 5.0, 12.0, 10.0, 2.0, 6.0, 5.0, 0.0, 5.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-15.671875, -15.149169921875, -14.62646484375, -14.103759765625, -13.5810546875, -13.058349609375, -12.53564453125, -12.012939453125, -11.490234375, -10.967529296875, -10.44482421875, -9.922119140625, -9.3994140625, -8.876708984375, -8.35400390625, -7.831298828125, -7.30859375, -6.785888671875, -6.26318359375, -5.740478515625, -5.2177734375, -4.695068359375, -4.17236328125, -3.649658203125, -3.126953125, -2.604248046875, -2.08154296875, -1.558837890625, -1.0361328125, -0.513427734375, 0.00927734375, 0.531982421875, 1.0546875, 1.577392578125, 2.10009765625, 2.622802734375, 3.1455078125, 3.668212890625, 4.19091796875, 4.713623046875, 5.236328125, 5.759033203125, 6.28173828125, 6.804443359375, 7.3271484375, 7.849853515625, 8.37255859375, 8.895263671875, 9.41796875, 9.940673828125, 10.46337890625, 10.986083984375, 11.5087890625, 12.031494140625, 12.55419921875, 13.076904296875, 13.599609375, 14.122314453125, 14.64501953125, 15.167724609375, 15.6904296875, 16.213134765625, 16.73583984375, 17.258544921875, 17.78125]}, "gradients/decoder.transformer.h.19.crossattention.c_attn.weight": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 0.0, 2.0, 9.0, 10.0, 13.0, 20.0, 24.0, 41.0, 61.0, 78.0, 129.0, 164.0, 252.0, 368.0, 506.0, 747.0, 1065.0, 1560.0, 2296.0, 3373.0, 4839.0, 7073.0, 10539.0, 15446.0, 23100.0, 34857.0, 53320.0, 86352.0, 162372.0, 1349895.0, 127704.0, 72643.0, 45334.0, 30121.0, 20018.0, 13436.0, 9229.0, 6375.0, 4334.0, 2961.0, 1954.0, 1417.0, 971.0, 669.0, 440.0, 335.0, 206.0, 158.0, 86.0, 77.0, 67.0, 32.0, 19.0, 17.0, 16.0, 9.0, 3.0, 4.0, 1.0, 1.0, 1.0], "bins": [-0.72802734375, -0.7055130004882812, -0.6829986572265625, -0.6604843139648438, -0.637969970703125, -0.6154556274414062, -0.5929412841796875, -0.5704269409179688, -0.54791259765625, -0.5253982543945312, -0.5028839111328125, -0.48036956787109375, -0.457855224609375, -0.43534088134765625, -0.4128265380859375, -0.39031219482421875, -0.3677978515625, -0.34528350830078125, -0.3227691650390625, -0.30025482177734375, -0.277740478515625, -0.25522613525390625, -0.2327117919921875, -0.21019744873046875, -0.18768310546875, -0.16516876220703125, -0.1426544189453125, -0.12014007568359375, -0.097625732421875, -0.07511138916015625, -0.0525970458984375, -0.03008270263671875, -0.007568359375, 0.01494598388671875, 0.0374603271484375, 0.05997467041015625, 0.082489013671875, 0.10500335693359375, 0.1275177001953125, 0.15003204345703125, 0.17254638671875, 0.19506072998046875, 0.2175750732421875, 0.24008941650390625, 0.262603759765625, 0.28511810302734375, 0.3076324462890625, 0.33014678955078125, 0.3526611328125, 0.37517547607421875, 0.3976898193359375, 0.42020416259765625, 0.442718505859375, 0.46523284912109375, 0.4877471923828125, 0.5102615356445312, 0.53277587890625, 0.5552902221679688, 0.5778045654296875, 0.6003189086914062, 0.622833251953125, 0.6453475952148438, 0.6678619384765625, 0.6903762817382812, 0.712890625]}, "gradients/decoder.transformer.h.19.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 3.0, 0.0, 1.0, 0.0, 0.0, 2.0, 2.0, 2.0, 5.0, 5.0, 3.0, 4.0, 9.0, 6.0, 4.0, 8.0, 8.0, 10.0, 28.0, 40.0, 47.0, 84.0, 162.0, 234.0, 112.0, 75.0, 35.0, 30.0, 18.0, 19.0, 12.0, 12.0, 9.0, 7.0, 5.0, 4.0, 2.0, 3.0, 2.0, 3.0, 2.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-9.721517562866211e-05, -9.42060723900795e-05, -9.119696915149689e-05, -8.818786591291428e-05, -8.517876267433167e-05, -8.216965943574905e-05, -7.916055619716644e-05, -7.615145295858383e-05, -7.314234972000122e-05, -7.013324648141861e-05, -6.7124143242836e-05, -6.411504000425339e-05, -6.110593676567078e-05, -5.8096833527088165e-05, -5.5087730288505554e-05, -5.207862704992294e-05, -4.906952381134033e-05, -4.606042057275772e-05, -4.305131733417511e-05, -4.00422140955925e-05, -3.703311085700989e-05, -3.4024007618427277e-05, -3.1014904379844666e-05, -2.8005801141262054e-05, -2.4996697902679443e-05, -2.1987594664096832e-05, -1.897849142551422e-05, -1.596938818693161e-05, -1.2960284948348999e-05, -9.951181709766388e-06, -6.942078471183777e-06, -3.932975232601166e-06, -9.238719940185547e-07, 2.0852312445640564e-06, 5.0943344831466675e-06, 8.103437721729279e-06, 1.111254096031189e-05, 1.41216441988945e-05, 1.7130747437477112e-05, 2.0139850676059723e-05, 2.3148953914642334e-05, 2.6158057153224945e-05, 2.9167160391807556e-05, 3.217626363039017e-05, 3.518536686897278e-05, 3.819447010755539e-05, 4.1203573346138e-05, 4.421267658472061e-05, 4.722177982330322e-05, 5.0230883061885834e-05, 5.3239986300468445e-05, 5.6249089539051056e-05, 5.925819277763367e-05, 6.226729601621628e-05, 6.527639925479889e-05, 6.82855024933815e-05, 7.129460573196411e-05, 7.430370897054672e-05, 7.731281220912933e-05, 8.032191544771194e-05, 8.333101868629456e-05, 8.634012192487717e-05, 8.934922516345978e-05, 9.235832840204239e-05, 9.5367431640625e-05]}, "gradients/decoder.transformer.h.19.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 2.0, 2.0, 3.0, 3.0, 5.0, 8.0, 4.0, 7.0, 15.0, 18.0, 27.0, 26.0, 51.0, 52.0, 89.0, 240.0, 1020.0, 10788.0, 994002.0, 39261.0, 2196.0, 367.0, 145.0, 80.0, 42.0, 25.0, 17.0, 14.0, 15.0, 10.0, 8.0, 2.0, 6.0, 7.0, 3.0, 2.0, 2.0, 1.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0014514923095703125, -0.0014056861400604248, -0.0013598799705505371, -0.0013140738010406494, -0.0012682676315307617, -0.001222461462020874, -0.0011766552925109863, -0.0011308491230010986, -0.001085042953491211, -0.0010392367839813232, -0.0009934306144714355, -0.0009476244449615479, -0.0009018182754516602, -0.0008560121059417725, -0.0008102059364318848, -0.0007643997669219971, -0.0007185935974121094, -0.0006727874279022217, -0.000626981258392334, -0.0005811750888824463, -0.0005353689193725586, -0.0004895627498626709, -0.0004437565803527832, -0.0003979504108428955, -0.0003521442413330078, -0.0003063380718231201, -0.0002605319023132324, -0.00021472573280334473, -0.00016891956329345703, -0.00012311339378356934, -7.730722427368164e-05, -3.1501054763793945e-05, 1.430511474609375e-05, 6.0111284255981445e-05, 0.00010591745376586914, 0.00015172362327575684, 0.00019752979278564453, 0.00024333596229553223, 0.0002891421318054199, 0.0003349483013153076, 0.0003807544708251953, 0.000426560640335083, 0.0004723668098449707, 0.0005181729793548584, 0.0005639791488647461, 0.0006097853183746338, 0.0006555914878845215, 0.0007013976573944092, 0.0007472038269042969, 0.0007930099964141846, 0.0008388161659240723, 0.00088462233543396, 0.0009304285049438477, 0.0009762346744537354, 0.001022040843963623, 0.0010678470134735107, 0.0011136531829833984, 0.0011594593524932861, 0.0012052655220031738, 0.0012510716915130615, 0.0012968778610229492, 0.001342684030532837, 0.0013884902000427246, 0.0014342963695526123, 0.0014801025390625]}, "gradients/decoder.transformer.h.19.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 2.0, 4.0, 5.0, 9.0, 21.0, 29.0, 97.0, 190.0, 379.0, 151.0, 60.0, 31.0, 23.0, 10.0, 3.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-7.83355935709551e-05, -7.511926378356293e-05, -7.190294127212837e-05, -6.86866114847362e-05, -6.547028169734403e-05, -6.225395190995187e-05, -5.903762939851731e-05, -5.582129961112514e-05, -5.260496982373297e-05, -4.938864367431961e-05, -4.617231388692744e-05, -4.295598773751408e-05, -3.973965795012191e-05, -3.652333180070855e-05, -3.3307005651295185e-05, -3.0090675863903016e-05, -2.6874349714489654e-05, -2.3658021746086888e-05, -2.0441693777684122e-05, -1.722536762827076e-05, -1.4009038750373293e-05, -1.0792711691465229e-05, -7.576383723062463e-06, -4.360055754659697e-06, -1.1437277862569317e-06, 2.0725999547721585e-06, 5.288927695801249e-06, 8.505255209456664e-06, 1.172158317785943e-05, 1.4937910236767493e-05, 1.815423820517026e-05, 2.1370566173573025e-05, 2.458689414197579e-05, 2.7803222110378556e-05, 3.101955007878132e-05, 3.4235876228194684e-05, 3.745220601558685e-05, 4.0668532165000215e-05, 4.388485831441358e-05, 4.7101188101805747e-05, 5.0317517889197916e-05, 5.353384403861128e-05, 5.675017382600345e-05, 5.996649997541681e-05, 6.318282976280898e-05, 6.639915227424353e-05, 6.96154820616357e-05, 7.283181184902787e-05, 7.604813436046243e-05, 7.92644641478546e-05, 8.248078665928915e-05, 8.569711644668132e-05, 8.891344623407349e-05, 9.212977602146566e-05, 9.534609853290021e-05, 9.856242832029238e-05, 0.00010177875810768455, 0.00010499508789507672, 0.00010821141040651128, 0.00011142774019390345, 0.00011464406998129562, 0.00011786039976868778, 0.00012107672228012234, 0.00012429305934347212, 0.00012750938185490668]}, "gradients/decoder.transformer.h.19.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 5.0, 2.0, 3.0, 6.0, 4.0, 9.0, 9.0, 12.0, 15.0, 14.0, 18.0, 15.0, 27.0, 32.0, 29.0, 41.0, 25.0, 43.0, 38.0, 36.0, 35.0, 46.0, 50.0, 44.0, 39.0, 40.0, 30.0, 34.0, 40.0, 40.0, 25.0, 31.0, 29.0, 23.0, 28.0, 14.0, 20.0, 15.0, 9.0, 9.0, 8.0, 3.0, 6.0, 2.0, 6.0, 3.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.9802322387695312e-05, -2.8777867555618286e-05, -2.775341272354126e-05, -2.6728957891464233e-05, -2.5704503059387207e-05, -2.468004822731018e-05, -2.3655593395233154e-05, -2.2631138563156128e-05, -2.16066837310791e-05, -2.0582228899002075e-05, -1.955777406692505e-05, -1.8533319234848022e-05, -1.7508864402770996e-05, -1.648440957069397e-05, -1.5459954738616943e-05, -1.4435499906539917e-05, -1.341104507446289e-05, -1.2386590242385864e-05, -1.1362135410308838e-05, -1.0337680578231812e-05, -9.313225746154785e-06, -8.288770914077759e-06, -7.264316082000732e-06, -6.239861249923706e-06, -5.21540641784668e-06, -4.190951585769653e-06, -3.166496753692627e-06, -2.1420419216156006e-06, -1.1175870895385742e-06, -9.313225746154785e-08, 9.313225746154785e-07, 1.955777406692505e-06, 2.9802322387695312e-06, 4.004687070846558e-06, 5.029141902923584e-06, 6.05359673500061e-06, 7.078051567077637e-06, 8.102506399154663e-06, 9.12696123123169e-06, 1.0151416063308716e-05, 1.1175870895385742e-05, 1.2200325727462769e-05, 1.3224780559539795e-05, 1.4249235391616821e-05, 1.5273690223693848e-05, 1.6298145055770874e-05, 1.73225998878479e-05, 1.8347054719924927e-05, 1.9371509552001953e-05, 2.039596438407898e-05, 2.1420419216156006e-05, 2.2444874048233032e-05, 2.346932888031006e-05, 2.4493783712387085e-05, 2.551823854446411e-05, 2.6542693376541138e-05, 2.7567148208618164e-05, 2.859160304069519e-05, 2.9616057872772217e-05, 3.064051270484924e-05, 3.166496753692627e-05, 3.2689422369003296e-05, 3.371387720108032e-05, 3.473833203315735e-05, 3.5762786865234375e-05]}, "gradients/decoder.transformer.h.19.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 2.0, 3.0, 2.0, 7.0, 6.0, 7.0, 8.0, 2.0, 6.0, 8.0, 8.0, 7.0, 17.0, 16.0, 17.0, 17.0, 17.0, 37.0, 28.0, 26.0, 32.0, 45.0, 48.0, 31.0, 49.0, 53.0, 46.0, 56.0, 40.0, 43.0, 31.0, 39.0, 39.0, 37.0, 25.0, 20.0, 19.0, 21.0, 15.0, 10.0, 16.0, 9.0, 12.0, 10.0, 8.0, 5.0, 5.0, 3.0, 3.0, 1.0, 1.0, 2.0, 1.0, 2.0, 1.0, 0.0, 0.0, 1.0], "bins": [-29.8125, -28.884033203125, -27.95556640625, -27.027099609375, -26.0986328125, -25.170166015625, -24.24169921875, -23.313232421875, -22.384765625, -21.456298828125, -20.52783203125, -19.599365234375, -18.6708984375, -17.742431640625, -16.81396484375, -15.885498046875, -14.95703125, -14.028564453125, -13.10009765625, -12.171630859375, -11.2431640625, -10.314697265625, -9.38623046875, -8.457763671875, -7.529296875, -6.600830078125, -5.67236328125, -4.743896484375, -3.8154296875, -2.886962890625, -1.95849609375, -1.030029296875, -0.1015625, 0.826904296875, 1.75537109375, 2.683837890625, 3.6123046875, 4.540771484375, 5.46923828125, 6.397705078125, 7.326171875, 8.254638671875, 9.18310546875, 10.111572265625, 11.0400390625, 11.968505859375, 12.89697265625, 13.825439453125, 14.75390625, 15.682373046875, 16.61083984375, 17.539306640625, 18.4677734375, 19.396240234375, 20.32470703125, 21.253173828125, 22.181640625, 23.110107421875, 24.03857421875, 24.967041015625, 25.8955078125, 26.823974609375, 27.75244140625, 28.680908203125, 29.609375]}, "gradients/decoder.transformer.h.19.attn.c_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 4.0, 2.0, 6.0, 8.0, 5.0, 7.0, 5.0, 6.0, 13.0, 4.0, 24.0, 26.0, 54.0, 48.0, 72.0, 150.0, 209.0, 320.0, 601.0, 1219.0, 2893.0, 7169.0, 19518.0, 61442.0, 233590.0, 521074.0, 138318.0, 39132.0, 13295.0, 4998.0, 2088.0, 997.0, 490.0, 246.0, 153.0, 112.0, 82.0, 53.0, 22.0, 28.0, 19.0, 16.0, 11.0, 10.0, 8.0, 5.0, 4.0, 2.0, 2.0, 3.0, 1.0, 1.0, 2.0, 0.0, 2.0, 0.0, 1.0], "bins": [-14.484375, -14.0361328125, -13.587890625, -13.1396484375, -12.69140625, -12.2431640625, -11.794921875, -11.3466796875, -10.8984375, -10.4501953125, -10.001953125, -9.5537109375, -9.10546875, -8.6572265625, -8.208984375, -7.7607421875, -7.3125, -6.8642578125, -6.416015625, -5.9677734375, -5.51953125, -5.0712890625, -4.623046875, -4.1748046875, -3.7265625, -3.2783203125, -2.830078125, -2.3818359375, -1.93359375, -1.4853515625, -1.037109375, -0.5888671875, -0.140625, 0.3076171875, 0.755859375, 1.2041015625, 1.65234375, 2.1005859375, 2.548828125, 2.9970703125, 3.4453125, 3.8935546875, 4.341796875, 4.7900390625, 5.23828125, 5.6865234375, 6.134765625, 6.5830078125, 7.03125, 7.4794921875, 7.927734375, 8.3759765625, 8.82421875, 9.2724609375, 9.720703125, 10.1689453125, 10.6171875, 11.0654296875, 11.513671875, 11.9619140625, 12.41015625, 12.8583984375, 13.306640625, 13.7548828125, 14.203125]}, "gradients/decoder.transformer.h.19.attn.c_attn.bias": {"_type": "histogram", "values": [3.0, 0.0, 0.0, 1.0, 0.0, 1.0, 3.0, 2.0, 5.0, 4.0, 1.0, 6.0, 7.0, 2.0, 12.0, 7.0, 17.0, 11.0, 21.0, 12.0, 21.0, 28.0, 25.0, 34.0, 26.0, 32.0, 23.0, 31.0, 34.0, 35.0, 36.0, 45.0, 1838.0, 268.0, 46.0, 41.0, 40.0, 31.0, 30.0, 24.0, 23.0, 33.0, 37.0, 21.0, 21.0, 16.0, 21.0, 17.0, 14.0, 10.0, 11.0, 9.0, 6.0, 4.0, 4.0, 5.0, 2.0, 5.0, 2.0, 3.0, 1.0, 1.0, 1.0, 2.0], "bins": [-96.1875, -93.263671875, -90.33984375, -87.416015625, -84.4921875, -81.568359375, -78.64453125, -75.720703125, -72.796875, -69.873046875, -66.94921875, -64.025390625, -61.1015625, -58.177734375, -55.25390625, -52.330078125, -49.40625, -46.482421875, -43.55859375, -40.634765625, -37.7109375, -34.787109375, -31.86328125, -28.939453125, -26.015625, -23.091796875, -20.16796875, -17.244140625, -14.3203125, -11.396484375, -8.47265625, -5.548828125, -2.625, 0.298828125, 3.22265625, 6.146484375, 9.0703125, 11.994140625, 14.91796875, 17.841796875, 20.765625, 23.689453125, 26.61328125, 29.537109375, 32.4609375, 35.384765625, 38.30859375, 41.232421875, 44.15625, 47.080078125, 50.00390625, 52.927734375, 55.8515625, 58.775390625, 61.69921875, 64.623046875, 67.546875, 70.470703125, 73.39453125, 76.318359375, 79.2421875, 82.166015625, 85.08984375, 88.013671875, 90.9375]}, "gradients/decoder.transformer.h.19.attn.c_attn.weight": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 1.0, 3.0, 3.0, 4.0, 2.0, 4.0, 5.0, 4.0, 9.0, 7.0, 9.0, 11.0, 14.0, 18.0, 24.0, 17.0, 31.0, 33.0, 55.0, 63.0, 71.0, 104.0, 156.0, 220.0, 350.0, 539.0, 1268.0, 236537.0, 2902640.0, 1612.0, 622.0, 343.0, 244.0, 173.0, 113.0, 85.0, 60.0, 51.0, 37.0, 37.0, 26.0, 11.0, 26.0, 11.0, 14.0, 10.0, 12.0, 3.0, 5.0, 7.0, 1.0, 4.0, 4.0, 2.0, 4.0, 1.0, 0.0, 1.0, 0.0, 1.0, 2.0], "bins": [-142.5, -137.916015625, -133.33203125, -128.748046875, -124.1640625, -119.580078125, -114.99609375, -110.412109375, -105.828125, -101.244140625, -96.66015625, -92.076171875, -87.4921875, -82.908203125, -78.32421875, -73.740234375, -69.15625, -64.572265625, -59.98828125, -55.404296875, -50.8203125, -46.236328125, -41.65234375, -37.068359375, -32.484375, -27.900390625, -23.31640625, -18.732421875, -14.1484375, -9.564453125, -4.98046875, -0.396484375, 4.1875, 8.771484375, 13.35546875, 17.939453125, 22.5234375, 27.107421875, 31.69140625, 36.275390625, 40.859375, 45.443359375, 50.02734375, 54.611328125, 59.1953125, 63.779296875, 68.36328125, 72.947265625, 77.53125, 82.115234375, 86.69921875, 91.283203125, 95.8671875, 100.451171875, 105.03515625, 109.619140625, 114.203125, 118.787109375, 123.37109375, 127.955078125, 132.5390625, 137.123046875, 141.70703125, 146.291015625, 150.875]}, "gradients/decoder.transformer.h.19.ln_1.weight": {"_type": "histogram", "values": [76.0, 942.0, 5.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-33.193641662597656, -7.141706466674805, 18.910228729248047, 44.96216583251953, 71.01409912109375, 97.06603240966797, 123.11797332763672, 149.16989135742188, 175.22183227539062, 201.27377319335938, 227.32569885253906, 253.3776397705078, 279.4295654296875, 305.48150634765625, 331.533447265625, 357.58538818359375, 383.6373291015625, 409.68927001953125, 435.7412109375, 461.79315185546875, 487.8450622558594, 513.89697265625, 539.948974609375, 566.0008544921875, 592.0527954101562, 618.104736328125, 644.1566772460938, 670.2086181640625, 696.2605590820312, 722.3125, 748.3643798828125, 774.4163208007812, 800.4683227539062, 826.520263671875, 852.5722045898438, 878.6241455078125, 904.6760864257812, 930.72802734375, 956.7799072265625, 982.8318481445312, 1008.8837890625, 1034.9356689453125, 1060.9876708984375, 1087.03955078125, 1113.091552734375, 1139.1434326171875, 1165.1954345703125, 1191.247314453125, 1217.29931640625, 1243.3511962890625, 1269.4031982421875, 1295.455078125, 1321.507080078125, 1347.5589599609375, 1373.6109619140625, 1399.662841796875, 1425.7147216796875, 1451.7666015625, 1477.818603515625, 1503.8704833984375, 1529.9224853515625, 1555.974365234375, 1582.0263671875, 1608.0782470703125, 1634.1302490234375]}, "gradients/decoder.transformer.h.19.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 0.0, 3.0, 2.0, 1.0, 2.0, 7.0, 5.0, 2.0, 4.0, 8.0, 18.0, 12.0, 17.0, 19.0, 27.0, 15.0, 26.0, 22.0, 32.0, 38.0, 40.0, 49.0, 42.0, 43.0, 52.0, 51.0, 49.0, 43.0, 44.0, 42.0, 41.0, 33.0, 28.0, 34.0, 30.0, 27.0, 21.0, 16.0, 13.0, 15.0, 12.0, 4.0, 6.0, 3.0, 1.0, 7.0, 3.0, 5.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-358.9853820800781, -348.2940673828125, -337.6027526855469, -326.91143798828125, -316.22015380859375, -305.5288391113281, -294.8375244140625, -284.1462097167969, -273.45489501953125, -262.7635803222656, -252.072265625, -241.38096618652344, -230.6896514892578, -219.9983367919922, -209.30703735351562, -198.61572265625, -187.92440795898438, -177.23309326171875, -166.54177856445312, -155.85047912597656, -145.15916442871094, -134.4678497314453, -123.77654266357422, -113.08523559570312, -102.3939208984375, -91.70260620117188, -81.01129913330078, -70.31999206542969, -59.62867736816406, -48.9373664855957, -38.246055603027344, -27.55474853515625, -16.8634033203125, -6.172092437744141, 4.519218444824219, 15.210529327392578, 25.901840209960938, 36.5931510925293, 47.284461975097656, 57.97576904296875, 68.66708374023438, 79.3583984375, 90.0497055053711, 100.74101257324219, 111.43232727050781, 122.12364196777344, 132.81494140625, 143.50625610351562, 154.19757080078125, 164.88888549804688, 175.5802001953125, 186.27149963378906, 196.9628143310547, 207.6541290283203, 218.34542846679688, 229.0367431640625, 239.72805786132812, 250.41937255859375, 261.1106872558594, 271.802001953125, 282.4932861328125, 293.1846008300781, 303.87591552734375, 314.5672302246094, 325.258544921875]}, "gradients/decoder.transformer.h.18.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 0.0, 2.0, 1.0, 2.0, 2.0, 7.0, 4.0, 8.0, 6.0, 6.0, 3.0, 8.0, 9.0, 7.0, 9.0, 24.0, 17.0, 15.0, 22.0, 25.0, 29.0, 27.0, 37.0, 30.0, 47.0, 47.0, 41.0, 56.0, 48.0, 48.0, 43.0, 37.0, 41.0, 36.0, 39.0, 36.0, 28.0, 24.0, 19.0, 21.0, 14.0, 10.0, 20.0, 8.0, 12.0, 10.0, 9.0, 6.0, 6.0, 3.0, 2.0, 2.0, 0.0, 3.0, 0.0, 3.0, 0.0, 1.0, 0.0, 1.0], "bins": [-30.234375, -29.301025390625, -28.36767578125, -27.434326171875, -26.5009765625, -25.567626953125, -24.63427734375, -23.700927734375, -22.767578125, -21.834228515625, -20.90087890625, -19.967529296875, -19.0341796875, -18.100830078125, -17.16748046875, -16.234130859375, -15.30078125, -14.367431640625, -13.43408203125, -12.500732421875, -11.5673828125, -10.634033203125, -9.70068359375, -8.767333984375, -7.833984375, -6.900634765625, -5.96728515625, -5.033935546875, -4.1005859375, -3.167236328125, -2.23388671875, -1.300537109375, -0.3671875, 0.566162109375, 1.49951171875, 2.432861328125, 3.3662109375, 4.299560546875, 5.23291015625, 6.166259765625, 7.099609375, 8.032958984375, 8.96630859375, 9.899658203125, 10.8330078125, 11.766357421875, 12.69970703125, 13.633056640625, 14.56640625, 15.499755859375, 16.43310546875, 17.366455078125, 18.2998046875, 19.233154296875, 20.16650390625, 21.099853515625, 22.033203125, 22.966552734375, 23.89990234375, 24.833251953125, 25.7666015625, 26.699951171875, 27.63330078125, 28.566650390625, 29.5]}, "gradients/decoder.transformer.h.18.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 2.0, 0.0, 2.0, 1.0, 5.0, 4.0, 7.0, 9.0, 28.0, 19.0, 15.0, 39.0, 46.0, 47.0, 120.0, 136.0, 208.0, 254.0, 366.0, 562.0, 749.0, 1145.0, 1589.0, 2461.0, 3772.0, 6385.0, 11756.0, 47462.0, 1301391.0, 2692568.0, 89722.0, 14079.0, 7329.0, 4138.0, 2616.0, 1663.0, 1159.0, 738.0, 518.0, 347.0, 231.0, 195.0, 117.0, 73.0, 56.0, 45.0, 23.0, 37.0, 23.0, 11.0, 9.0, 7.0, 2.0, 4.0, 2.0, 3.0, 4.0, 0.0, 0.0, 2.0], "bins": [-86.5, -83.8857421875, -81.271484375, -78.6572265625, -76.04296875, -73.4287109375, -70.814453125, -68.2001953125, -65.5859375, -62.9716796875, -60.357421875, -57.7431640625, -55.12890625, -52.5146484375, -49.900390625, -47.2861328125, -44.671875, -42.0576171875, -39.443359375, -36.8291015625, -34.21484375, -31.6005859375, -28.986328125, -26.3720703125, -23.7578125, -21.1435546875, -18.529296875, -15.9150390625, -13.30078125, -10.6865234375, -8.072265625, -5.4580078125, -2.84375, -0.2294921875, 2.384765625, 4.9990234375, 7.61328125, 10.2275390625, 12.841796875, 15.4560546875, 18.0703125, 20.6845703125, 23.298828125, 25.9130859375, 28.52734375, 31.1416015625, 33.755859375, 36.3701171875, 38.984375, 41.5986328125, 44.212890625, 46.8271484375, 49.44140625, 52.0556640625, 54.669921875, 57.2841796875, 59.8984375, 62.5126953125, 65.126953125, 67.7412109375, 70.35546875, 72.9697265625, 75.583984375, 78.1982421875, 80.8125]}, "gradients/decoder.transformer.h.18.mlp.c_fc.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 2.0, 0.0, 2.0, 3.0, 2.0, 0.0, 2.0, 4.0, 8.0, 5.0, 4.0, 5.0, 7.0, 2.0, 13.0, 11.0, 11.0, 21.0, 23.0, 31.0, 48.0, 104.0, 271.0, 765.0, 1419.0, 716.0, 277.0, 104.0, 54.0, 36.0, 14.0, 17.0, 21.0, 12.0, 12.0, 6.0, 11.0, 7.0, 6.0, 2.0, 4.0, 4.0, 4.0, 4.0, 3.0, 4.0, 0.0, 3.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 3.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-73.375, -70.703125, -68.03125, -65.359375, -62.6875, -60.015625, -57.34375, -54.671875, -52.0, -49.328125, -46.65625, -43.984375, -41.3125, -38.640625, -35.96875, -33.296875, -30.625, -27.953125, -25.28125, -22.609375, -19.9375, -17.265625, -14.59375, -11.921875, -9.25, -6.578125, -3.90625, -1.234375, 1.4375, 4.109375, 6.78125, 9.453125, 12.125, 14.796875, 17.46875, 20.140625, 22.8125, 25.484375, 28.15625, 30.828125, 33.5, 36.171875, 38.84375, 41.515625, 44.1875, 46.859375, 49.53125, 52.203125, 54.875, 57.546875, 60.21875, 62.890625, 65.5625, 68.234375, 70.90625, 73.578125, 76.25, 78.921875, 81.59375, 84.265625, 86.9375, 89.609375, 92.28125, 94.953125, 97.625]}, "gradients/decoder.transformer.h.18.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 5.0, 5.0, 3.0, 7.0, 10.0, 14.0, 23.0, 22.0, 46.0, 59.0, 79.0, 112.0, 121.0, 197.0, 246.0, 355.0, 534.0, 720.0, 1056.0, 1547.0, 2160.0, 3371.0, 5029.0, 8105.0, 13622.0, 24832.0, 60940.0, 587089.0, 3261498.0, 137941.0, 36598.0, 18092.0, 10334.0, 6497.0, 4177.0, 2674.0, 1883.0, 1251.0, 875.0, 621.0, 389.0, 342.0, 232.0, 164.0, 112.0, 81.0, 56.0, 48.0, 29.0, 19.0, 23.0, 15.0, 17.0, 3.0, 5.0, 2.0, 7.0, 1.0], "bins": [-108.5, -105.33203125, -102.1640625, -98.99609375, -95.828125, -92.66015625, -89.4921875, -86.32421875, -83.15625, -79.98828125, -76.8203125, -73.65234375, -70.484375, -67.31640625, -64.1484375, -60.98046875, -57.8125, -54.64453125, -51.4765625, -48.30859375, -45.140625, -41.97265625, -38.8046875, -35.63671875, -32.46875, -29.30078125, -26.1328125, -22.96484375, -19.796875, -16.62890625, -13.4609375, -10.29296875, -7.125, -3.95703125, -0.7890625, 2.37890625, 5.546875, 8.71484375, 11.8828125, 15.05078125, 18.21875, 21.38671875, 24.5546875, 27.72265625, 30.890625, 34.05859375, 37.2265625, 40.39453125, 43.5625, 46.73046875, 49.8984375, 53.06640625, 56.234375, 59.40234375, 62.5703125, 65.73828125, 68.90625, 72.07421875, 75.2421875, 78.41015625, 81.578125, 84.74609375, 87.9140625, 91.08203125, 94.25]}, "gradients/decoder.transformer.h.18.ln_2.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 2.0, 2.0, 2.0, 1.0, 4.0, 4.0, 6.0, 12.0, 16.0, 17.0, 19.0, 22.0, 46.0, 45.0, 68.0, 127.0, 158.0, 142.0, 90.0, 54.0, 42.0, 32.0, 25.0, 17.0, 14.0, 12.0, 5.0, 8.0, 7.0, 3.0, 5.0, 1.0, 1.0, 5.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-400.8241271972656, -389.8207702636719, -378.8174133300781, -367.8140563964844, -356.8106689453125, -345.80731201171875, -334.803955078125, -323.80059814453125, -312.7972412109375, -301.79388427734375, -290.79052734375, -279.78717041015625, -268.7838134765625, -257.7804260253906, -246.77706909179688, -235.77371215820312, -224.77035522460938, -213.76699829101562, -202.76364135742188, -191.76026916503906, -180.7569122314453, -169.75355529785156, -158.75018310546875, -147.746826171875, -136.74346923828125, -125.7401123046875, -114.73674774169922, -103.73338317871094, -92.73002624511719, -81.72666931152344, -70.72330474853516, -59.719940185546875, -48.71661376953125, -37.713253021240234, -26.70989227294922, -15.706531524658203, -4.7031707763671875, 6.300189971923828, 17.303550720214844, 28.306915283203125, 39.310272216796875, 50.31363296508789, 61.316993713378906, 72.32035827636719, 83.32371520996094, 94.32707214355469, 105.33043670654297, 116.33380126953125, 127.337158203125, 138.34051513671875, 149.3438720703125, 160.3472442626953, 171.35060119628906, 182.3539581298828, 193.35733032226562, 204.36068725585938, 215.36404418945312, 226.36740112304688, 237.37075805664062, 248.37413024902344, 259.37750244140625, 270.380859375, 281.38421630859375, 292.3875732421875, 303.39093017578125]}, "gradients/decoder.transformer.h.18.ln_2.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 3.0, 1.0, 3.0, 1.0, 1.0, 4.0, 6.0, 7.0, 6.0, 8.0, 9.0, 14.0, 18.0, 17.0, 11.0, 20.0, 28.0, 23.0, 29.0, 24.0, 38.0, 40.0, 29.0, 31.0, 28.0, 42.0, 33.0, 29.0, 45.0, 35.0, 32.0, 42.0, 39.0, 36.0, 26.0, 39.0, 33.0, 23.0, 21.0, 24.0, 22.0, 19.0, 15.0, 16.0, 9.0, 9.0, 8.0, 3.0, 1.0, 4.0, 4.0, 3.0, 3.0, 2.0, 1.0, 0.0, 2.0, 0.0, 1.0, 1.0], "bins": [-207.72976684570312, -201.13792419433594, -194.54608154296875, -187.95423889160156, -181.36239624023438, -174.7705535888672, -168.1787109375, -161.5868682861328, -154.99502563476562, -148.40318298339844, -141.81134033203125, -135.21949768066406, -128.62765502929688, -122.03581237792969, -115.4439697265625, -108.85212707519531, -102.2602767944336, -95.6684341430664, -89.07659149169922, -82.48474884033203, -75.89290618896484, -69.30105590820312, -62.7092170715332, -56.117374420166016, -49.52553176879883, -42.93368911743164, -36.34184646606445, -29.750001907348633, -23.158159255981445, -16.566314697265625, -9.974472045898438, -3.38262939453125, 3.2092132568359375, 9.801055908203125, 16.392898559570312, 22.984743118286133, 29.57658576965332, 36.16843032836914, 42.76027297973633, 49.352115631103516, 55.9439582824707, 62.53580093383789, 69.12764739990234, 75.71949005126953, 82.31133270263672, 88.9031753540039, 95.4950180053711, 102.08686065673828, 108.67870330810547, 115.27054595947266, 121.86238861083984, 128.45423889160156, 135.04608154296875, 141.63792419433594, 148.22976684570312, 154.8216094970703, 161.4134521484375, 168.0052947998047, 174.59713745117188, 181.18898010253906, 187.78082275390625, 194.37266540527344, 200.96450805664062, 207.5563507080078, 214.148193359375]}, "gradients/decoder.transformer.h.18.crossattention.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 2.0, 0.0, 1.0, 2.0, 2.0, 1.0, 2.0, 3.0, 5.0, 6.0, 5.0, 3.0, 7.0, 6.0, 16.0, 14.0, 11.0, 21.0, 22.0, 18.0, 37.0, 30.0, 31.0, 26.0, 44.0, 33.0, 46.0, 45.0, 55.0, 44.0, 45.0, 56.0, 47.0, 39.0, 27.0, 36.0, 30.0, 24.0, 21.0, 26.0, 20.0, 17.0, 19.0, 14.0, 19.0, 7.0, 6.0, 7.0, 3.0, 5.0, 5.0, 3.0, 3.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-32.34375, -31.3427734375, -30.341796875, -29.3408203125, -28.33984375, -27.3388671875, -26.337890625, -25.3369140625, -24.3359375, -23.3349609375, -22.333984375, -21.3330078125, -20.33203125, -19.3310546875, -18.330078125, -17.3291015625, -16.328125, -15.3271484375, -14.326171875, -13.3251953125, -12.32421875, -11.3232421875, -10.322265625, -9.3212890625, -8.3203125, -7.3193359375, -6.318359375, -5.3173828125, -4.31640625, -3.3154296875, -2.314453125, -1.3134765625, -0.3125, 0.6884765625, 1.689453125, 2.6904296875, 3.69140625, 4.6923828125, 5.693359375, 6.6943359375, 7.6953125, 8.6962890625, 9.697265625, 10.6982421875, 11.69921875, 12.7001953125, 13.701171875, 14.7021484375, 15.703125, 16.7041015625, 17.705078125, 18.7060546875, 19.70703125, 20.7080078125, 21.708984375, 22.7099609375, 23.7109375, 24.7119140625, 25.712890625, 26.7138671875, 27.71484375, 28.7158203125, 29.716796875, 30.7177734375, 31.71875]}, "gradients/decoder.transformer.h.18.crossattention.c_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 2.0, 0.0, 3.0, 8.0, 7.0, 11.0, 16.0, 23.0, 33.0, 43.0, 62.0, 104.0, 141.0, 212.0, 308.0, 459.0, 616.0, 931.0, 1400.0, 2054.0, 3134.0, 4696.0, 7305.0, 11316.0, 17631.0, 27969.0, 45875.0, 80778.0, 178821.0, 368343.0, 127352.0, 64303.0, 38112.0, 23528.0, 14804.0, 9639.0, 6245.0, 4065.0, 2661.0, 1844.0, 1176.0, 792.0, 561.0, 366.0, 252.0, 181.0, 121.0, 84.0, 50.0, 42.0, 28.0, 26.0, 15.0, 8.0, 7.0, 5.0, 3.0, 1.0, 0.0, 0.0, 1.0], "bins": [-1.30859375, -1.268096923828125, -1.22760009765625, -1.187103271484375, -1.1466064453125, -1.106109619140625, -1.06561279296875, -1.025115966796875, -0.984619140625, -0.944122314453125, -0.90362548828125, -0.863128662109375, -0.8226318359375, -0.782135009765625, -0.74163818359375, -0.701141357421875, -0.66064453125, -0.620147705078125, -0.57965087890625, -0.539154052734375, -0.4986572265625, -0.458160400390625, -0.41766357421875, -0.377166748046875, -0.336669921875, -0.296173095703125, -0.25567626953125, -0.215179443359375, -0.1746826171875, -0.134185791015625, -0.09368896484375, -0.053192138671875, -0.0126953125, 0.027801513671875, 0.06829833984375, 0.108795166015625, 0.1492919921875, 0.189788818359375, 0.23028564453125, 0.270782470703125, 0.311279296875, 0.351776123046875, 0.39227294921875, 0.432769775390625, 0.4732666015625, 0.513763427734375, 0.55426025390625, 0.594757080078125, 0.63525390625, 0.675750732421875, 0.71624755859375, 0.756744384765625, 0.7972412109375, 0.837738037109375, 0.87823486328125, 0.918731689453125, 0.959228515625, 0.999725341796875, 1.04022216796875, 1.080718994140625, 1.1212158203125, 1.161712646484375, 1.20220947265625, 1.242706298828125, 1.283203125]}, "gradients/decoder.transformer.h.18.crossattention.c_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 1.0, 3.0, 1.0, 5.0, 5.0, 5.0, 10.0, 10.0, 6.0, 13.0, 14.0, 17.0, 16.0, 20.0, 23.0, 35.0, 31.0, 27.0, 33.0, 39.0, 28.0, 51.0, 36.0, 45.0, 1078.0, 34.0, 36.0, 37.0, 36.0, 47.0, 30.0, 21.0, 37.0, 33.0, 36.0, 30.0, 15.0, 17.0, 16.0, 14.0, 14.0, 10.0, 5.0, 2.0, 4.0, 3.0, 1.0, 3.0, 1.0, 2.0, 1.0, 1.0, 2.0, 0.0, 1.0, 0.0, 1.0], "bins": [-19.265625, -18.656494140625, -18.04736328125, -17.438232421875, -16.8291015625, -16.219970703125, -15.61083984375, -15.001708984375, -14.392578125, -13.783447265625, -13.17431640625, -12.565185546875, -11.9560546875, -11.346923828125, -10.73779296875, -10.128662109375, -9.51953125, -8.910400390625, -8.30126953125, -7.692138671875, -7.0830078125, -6.473876953125, -5.86474609375, -5.255615234375, -4.646484375, -4.037353515625, -3.42822265625, -2.819091796875, -2.2099609375, -1.600830078125, -0.99169921875, -0.382568359375, 0.2265625, 0.835693359375, 1.44482421875, 2.053955078125, 2.6630859375, 3.272216796875, 3.88134765625, 4.490478515625, 5.099609375, 5.708740234375, 6.31787109375, 6.927001953125, 7.5361328125, 8.145263671875, 8.75439453125, 9.363525390625, 9.97265625, 10.581787109375, 11.19091796875, 11.800048828125, 12.4091796875, 13.018310546875, 13.62744140625, 14.236572265625, 14.845703125, 15.454833984375, 16.06396484375, 16.673095703125, 17.2822265625, 17.891357421875, 18.50048828125, 19.109619140625, 19.71875]}, "gradients/decoder.transformer.h.18.crossattention.c_attn.weight": {"_type": "histogram", "values": [2.0, 2.0, 3.0, 4.0, 4.0, 9.0, 9.0, 18.0, 21.0, 22.0, 38.0, 59.0, 69.0, 99.0, 129.0, 226.0, 287.0, 415.0, 637.0, 871.0, 1268.0, 1781.0, 2649.0, 4008.0, 6070.0, 9069.0, 13835.0, 21918.0, 33673.0, 53117.0, 88479.0, 165108.0, 1363647.0, 128420.0, 72911.0, 45197.0, 28448.0, 18455.0, 11882.0, 7921.0, 5292.0, 3551.0, 2336.0, 1593.0, 1039.0, 756.0, 543.0, 368.0, 259.0, 188.0, 132.0, 86.0, 61.0, 52.0, 30.0, 21.0, 20.0, 16.0, 11.0, 4.0, 3.0, 4.0, 3.0, 4.0], "bins": [-0.806640625, -0.7816848754882812, -0.7567291259765625, -0.7317733764648438, -0.706817626953125, -0.6818618774414062, -0.6569061279296875, -0.6319503784179688, -0.60699462890625, -0.5820388793945312, -0.5570831298828125, -0.5321273803710938, -0.507171630859375, -0.48221588134765625, -0.4572601318359375, -0.43230438232421875, -0.4073486328125, -0.38239288330078125, -0.3574371337890625, -0.33248138427734375, -0.307525634765625, -0.28256988525390625, -0.2576141357421875, -0.23265838623046875, -0.20770263671875, -0.18274688720703125, -0.1577911376953125, -0.13283538818359375, -0.107879638671875, -0.08292388916015625, -0.0579681396484375, -0.03301239013671875, -0.008056640625, 0.01689910888671875, 0.0418548583984375, 0.06681060791015625, 0.091766357421875, 0.11672210693359375, 0.1416778564453125, 0.16663360595703125, 0.19158935546875, 0.21654510498046875, 0.2415008544921875, 0.26645660400390625, 0.291412353515625, 0.31636810302734375, 0.3413238525390625, 0.36627960205078125, 0.3912353515625, 0.41619110107421875, 0.4411468505859375, 0.46610260009765625, 0.491058349609375, 0.5160140991210938, 0.5409698486328125, 0.5659255981445312, 0.59088134765625, 0.6158370971679688, 0.6407928466796875, 0.6657485961914062, 0.690704345703125, 0.7156600952148438, 0.7406158447265625, 0.7655715942382812, 0.79052734375]}, "gradients/decoder.transformer.h.18.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 2.0, 1.0, 2.0, 1.0, 1.0, 1.0, 7.0, 7.0, 3.0, 5.0, 4.0, 7.0, 10.0, 13.0, 14.0, 20.0, 17.0, 25.0, 22.0, 40.0, 30.0, 51.0, 47.0, 44.0, 76.0, 106.0, 76.0, 55.0, 39.0, 51.0, 39.0, 27.0, 26.0, 25.0, 21.0, 13.0, 24.0, 13.0, 12.0, 7.0, 11.0, 5.0, 3.0, 5.0, 3.0, 5.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.639957427978516e-05, -6.408244371414185e-05, -6.176531314849854e-05, -5.9448182582855225e-05, -5.7131052017211914e-05, -5.4813921451568604e-05, -5.249679088592529e-05, -5.017966032028198e-05, -4.786252975463867e-05, -4.554539918899536e-05, -4.322826862335205e-05, -4.091113805770874e-05, -3.859400749206543e-05, -3.627687692642212e-05, -3.395974636077881e-05, -3.16426157951355e-05, -2.9325485229492188e-05, -2.7008354663848877e-05, -2.4691224098205566e-05, -2.2374093532562256e-05, -2.0056962966918945e-05, -1.7739832401275635e-05, -1.5422701835632324e-05, -1.3105571269989014e-05, -1.0788440704345703e-05, -8.471310138702393e-06, -6.154179573059082e-06, -3.8370490074157715e-06, -1.519918441772461e-06, 7.972121238708496e-07, 3.11434268951416e-06, 5.431473255157471e-06, 7.748603820800781e-06, 1.0065734386444092e-05, 1.2382864952087402e-05, 1.4699995517730713e-05, 1.7017126083374023e-05, 1.9334256649017334e-05, 2.1651387214660645e-05, 2.3968517780303955e-05, 2.6285648345947266e-05, 2.8602778911590576e-05, 3.091990947723389e-05, 3.32370400428772e-05, 3.555417060852051e-05, 3.787130117416382e-05, 4.018843173980713e-05, 4.250556230545044e-05, 4.482269287109375e-05, 4.713982343673706e-05, 4.945695400238037e-05, 5.177408456802368e-05, 5.409121513366699e-05, 5.64083456993103e-05, 5.872547626495361e-05, 6.104260683059692e-05, 6.335973739624023e-05, 6.567686796188354e-05, 6.799399852752686e-05, 7.031112909317017e-05, 7.262825965881348e-05, 7.494539022445679e-05, 7.72625207901001e-05, 7.957965135574341e-05, 8.189678192138672e-05]}, "gradients/decoder.transformer.h.18.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 3.0, 0.0, 3.0, 3.0, 4.0, 6.0, 6.0, 9.0, 13.0, 13.0, 18.0, 17.0, 29.0, 40.0, 44.0, 69.0, 102.0, 144.0, 256.0, 680.0, 4878.0, 90499.0, 917280.0, 31007.0, 2309.0, 441.0, 229.0, 117.0, 102.0, 56.0, 45.0, 33.0, 27.0, 21.0, 19.0, 5.0, 9.0, 6.0, 3.0, 3.0, 7.0, 6.0, 3.0, 2.0, 2.0, 1.0, 0.0, 0.0, 2.0, 1.0, 1.0], "bins": [-0.0012569427490234375, -0.0012213736772537231, -0.0011858046054840088, -0.0011502355337142944, -0.00111466646194458, -0.0010790973901748657, -0.0010435283184051514, -0.001007959246635437, -0.0009723901748657227, -0.0009368211030960083, -0.0009012520313262939, -0.0008656829595565796, -0.0008301138877868652, -0.0007945448160171509, -0.0007589757442474365, -0.0007234066724777222, -0.0006878376007080078, -0.0006522685289382935, -0.0006166994571685791, -0.0005811303853988647, -0.0005455613136291504, -0.000509992241859436, -0.0004744231700897217, -0.0004388540983200073, -0.00040328502655029297, -0.0003677159547805786, -0.00033214688301086426, -0.0002965778112411499, -0.00026100873947143555, -0.0002254396677017212, -0.00018987059593200684, -0.00015430152416229248, -0.00011873245239257812, -8.316338062286377e-05, -4.7594308853149414e-05, -1.2025237083435059e-05, 2.3543834686279297e-05, 5.911290645599365e-05, 9.468197822570801e-05, 0.00013025104999542236, 0.00016582012176513672, 0.00020138919353485107, 0.00023695826530456543, 0.0002725273370742798, 0.00030809640884399414, 0.0003436654806137085, 0.00037923455238342285, 0.0004148036241531372, 0.00045037269592285156, 0.0004859417676925659, 0.0005215108394622803, 0.0005570799112319946, 0.000592648983001709, 0.0006282180547714233, 0.0006637871265411377, 0.000699356198310852, 0.0007349252700805664, 0.0007704943418502808, 0.0008060634136199951, 0.0008416324853897095, 0.0008772015571594238, 0.0009127706289291382, 0.0009483397006988525, 0.000983908772468567, 0.0010194778442382812]}, "gradients/decoder.transformer.h.18.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 7.0, 5.0, 16.0, 30.0, 69.0, 179.0, 463.0, 137.0, 52.0, 26.0, 14.0, 12.0, 2.0, 3.0, 2.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.0002196619170717895, -0.00021538787405006588, -0.00021111383102834225, -0.0002068397734547034, -0.00020256573043297976, -0.00019829168741125613, -0.0001940176443895325, -0.00018974358681589365, -0.00018546954379417002, -0.0001811955007724464, -0.00017692145775072277, -0.0001726474001770839, -0.00016837335715536028, -0.00016409931413363665, -0.00015982527111191303, -0.00015555121353827417, -0.00015127717051655054, -0.0001470031274948269, -0.00014272908447310328, -0.00013845502689946443, -0.0001341809838777408, -0.00012990694085601717, -0.00012563289783429354, -0.0001213588475366123, -0.00011708481179084629, -0.00011281076876912266, -0.00010853671847144142, -0.00010426267544971779, -9.998862515203655e-05, -9.571458213031292e-05, -9.144053910858929e-05, -8.716648881090805e-05, -8.289243851322681e-05, -7.861839549150318e-05, -7.434434519382194e-05, -7.007030217209831e-05, -6.579625187441707e-05, -6.152220885269344e-05, -5.7248162192991003e-05, -5.297411553328857e-05, -4.8700065235607326e-05, -4.442601857590489e-05, -4.0151971916202456e-05, -3.587792889447883e-05, -3.1603878596797585e-05, -2.7329833756084554e-05, -2.3055788915371522e-05, -1.8781742255669087e-05, -1.4507695595966652e-05, -1.0233648936264217e-05, -5.9596031860564835e-06, -1.6855574358487502e-06, 2.588489223853685e-06, 6.86253588355612e-06, 1.1136580724269152e-05, 1.5410627383971587e-05, 1.9684674043674022e-05, 2.3958720703376457e-05, 2.8232767363078892e-05, 3.250681038480252e-05, 3.678086068248376e-05, 4.105490370420739e-05, 4.5328950363909826e-05, 4.960299702361226e-05, 5.3877043683314696e-05]}, "gradients/decoder.transformer.h.18.ln_cross_attn.bias": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 0.0, 0.0, 3.0, 1.0, 1.0, 3.0, 3.0, 8.0, 4.0, 7.0, 6.0, 9.0, 10.0, 9.0, 11.0, 13.0, 17.0, 22.0, 24.0, 23.0, 28.0, 41.0, 30.0, 23.0, 31.0, 49.0, 56.0, 41.0, 42.0, 35.0, 41.0, 42.0, 41.0, 40.0, 33.0, 35.0, 31.0, 28.0, 31.0, 30.0, 15.0, 21.0, 12.0, 13.0, 7.0, 9.0, 4.0, 4.0, 8.0, 3.0, 6.0, 5.0, 4.0, 2.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-3.6776065826416016e-05, -3.563147038221359e-05, -3.448687493801117e-05, -3.3342279493808746e-05, -3.219768404960632e-05, -3.10530886054039e-05, -2.9908493161201477e-05, -2.8763897716999054e-05, -2.761930227279663e-05, -2.6474706828594208e-05, -2.5330111384391785e-05, -2.418551594018936e-05, -2.304092049598694e-05, -2.1896325051784515e-05, -2.0751729607582092e-05, -1.960713416337967e-05, -1.8462538719177246e-05, -1.7317943274974823e-05, -1.61733478307724e-05, -1.5028752386569977e-05, -1.3884156942367554e-05, -1.273956149816513e-05, -1.1594966053962708e-05, -1.0450370609760284e-05, -9.305775165557861e-06, -8.161179721355438e-06, -7.016584277153015e-06, -5.871988832950592e-06, -4.727393388748169e-06, -3.582797944545746e-06, -2.4382025003433228e-06, -1.2936070561408997e-06, -1.4901161193847656e-07, 9.955838322639465e-07, 2.1401792764663696e-06, 3.2847747206687927e-06, 4.429370164871216e-06, 5.573965609073639e-06, 6.718561053276062e-06, 7.863156497478485e-06, 9.007751941680908e-06, 1.0152347385883331e-05, 1.1296942830085754e-05, 1.2441538274288177e-05, 1.35861337184906e-05, 1.4730729162693024e-05, 1.5875324606895447e-05, 1.701992005109787e-05, 1.8164515495300293e-05, 1.9309110939502716e-05, 2.045370638370514e-05, 2.1598301827907562e-05, 2.2742897272109985e-05, 2.388749271631241e-05, 2.503208816051483e-05, 2.6176683604717255e-05, 2.7321279048919678e-05, 2.84658744931221e-05, 2.9610469937324524e-05, 3.075506538152695e-05, 3.189966082572937e-05, 3.304425626993179e-05, 3.4188851714134216e-05, 3.533344715833664e-05, 3.647804260253906e-05]}, "gradients/decoder.transformer.h.18.attn.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 2.0, 0.0, 1.0, 2.0, 2.0, 1.0, 2.0, 3.0, 5.0, 6.0, 5.0, 3.0, 7.0, 6.0, 16.0, 14.0, 11.0, 21.0, 22.0, 18.0, 37.0, 30.0, 31.0, 26.0, 44.0, 33.0, 46.0, 45.0, 55.0, 44.0, 45.0, 56.0, 47.0, 39.0, 27.0, 36.0, 30.0, 24.0, 21.0, 26.0, 20.0, 17.0, 19.0, 14.0, 19.0, 7.0, 6.0, 7.0, 3.0, 5.0, 5.0, 3.0, 3.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-32.34375, -31.3427734375, -30.341796875, -29.3408203125, -28.33984375, -27.3388671875, -26.337890625, -25.3369140625, -24.3359375, -23.3349609375, -22.333984375, -21.3330078125, -20.33203125, -19.3310546875, -18.330078125, -17.3291015625, -16.328125, -15.3271484375, -14.326171875, -13.3251953125, -12.32421875, -11.3232421875, -10.322265625, -9.3212890625, -8.3203125, -7.3193359375, -6.318359375, -5.3173828125, -4.31640625, -3.3154296875, -2.314453125, -1.3134765625, -0.3125, 0.6884765625, 1.689453125, 2.6904296875, 3.69140625, 4.6923828125, 5.693359375, 6.6943359375, 7.6953125, 8.6962890625, 9.697265625, 10.6982421875, 11.69921875, 12.7001953125, 13.701171875, 14.7021484375, 15.703125, 16.7041015625, 17.705078125, 18.7060546875, 19.70703125, 20.7080078125, 21.708984375, 22.7099609375, 23.7109375, 24.7119140625, 25.712890625, 26.7138671875, 27.71484375, 28.7158203125, 29.716796875, 30.7177734375, 31.71875]}, "gradients/decoder.transformer.h.18.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 2.0, 2.0, 3.0, 4.0, 2.0, 7.0, 4.0, 11.0, 11.0, 16.0, 21.0, 30.0, 32.0, 58.0, 76.0, 98.0, 144.0, 177.0, 260.0, 393.0, 559.0, 734.0, 1167.0, 1823.0, 2537.0, 4038.0, 6256.0, 10960.0, 55285.0, 887552.0, 47528.0, 10643.0, 6159.0, 3860.0, 2545.0, 1740.0, 1209.0, 795.0, 539.0, 397.0, 232.0, 182.0, 157.0, 85.0, 72.0, 44.0, 34.0, 25.0, 16.0, 11.0, 15.0, 6.0, 5.0, 3.0, 2.0, 3.0, 2.0, 1.0, 0.0, 1.0, 1.0], "bins": [-68.375, -66.26953125, -64.1640625, -62.05859375, -59.953125, -57.84765625, -55.7421875, -53.63671875, -51.53125, -49.42578125, -47.3203125, -45.21484375, -43.109375, -41.00390625, -38.8984375, -36.79296875, -34.6875, -32.58203125, -30.4765625, -28.37109375, -26.265625, -24.16015625, -22.0546875, -19.94921875, -17.84375, -15.73828125, -13.6328125, -11.52734375, -9.421875, -7.31640625, -5.2109375, -3.10546875, -1.0, 1.10546875, 3.2109375, 5.31640625, 7.421875, 9.52734375, 11.6328125, 13.73828125, 15.84375, 17.94921875, 20.0546875, 22.16015625, 24.265625, 26.37109375, 28.4765625, 30.58203125, 32.6875, 34.79296875, 36.8984375, 39.00390625, 41.109375, 43.21484375, 45.3203125, 47.42578125, 49.53125, 51.63671875, 53.7421875, 55.84765625, 57.953125, 60.05859375, 62.1640625, 64.26953125, 66.375]}, "gradients/decoder.transformer.h.18.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 6.0, 4.0, 8.0, 11.0, 11.0, 8.0, 8.0, 15.0, 22.0, 28.0, 22.0, 28.0, 27.0, 37.0, 29.0, 37.0, 43.0, 58.0, 68.0, 369.0, 1706.0, 69.0, 51.0, 49.0, 51.0, 49.0, 46.0, 34.0, 30.0, 21.0, 35.0, 16.0, 10.0, 10.0, 12.0, 10.0, 7.0, 3.0, 2.0, 4.0, 4.0, 1.0, 1.0, 0.0, 3.0, 1.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-105.5, -101.740234375, -97.98046875, -94.220703125, -90.4609375, -86.701171875, -82.94140625, -79.181640625, -75.421875, -71.662109375, -67.90234375, -64.142578125, -60.3828125, -56.623046875, -52.86328125, -49.103515625, -45.34375, -41.583984375, -37.82421875, -34.064453125, -30.3046875, -26.544921875, -22.78515625, -19.025390625, -15.265625, -11.505859375, -7.74609375, -3.986328125, -0.2265625, 3.533203125, 7.29296875, 11.052734375, 14.8125, 18.572265625, 22.33203125, 26.091796875, 29.8515625, 33.611328125, 37.37109375, 41.130859375, 44.890625, 48.650390625, 52.41015625, 56.169921875, 59.9296875, 63.689453125, 67.44921875, 71.208984375, 74.96875, 78.728515625, 82.48828125, 86.248046875, 90.0078125, 93.767578125, 97.52734375, 101.287109375, 105.046875, 108.806640625, 112.56640625, 116.326171875, 120.0859375, 123.845703125, 127.60546875, 131.365234375, 135.125]}, "gradients/decoder.transformer.h.18.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 2.0, 2.0, 4.0, 1.0, 0.0, 0.0, 3.0, 6.0, 4.0, 3.0, 5.0, 7.0, 16.0, 26.0, 21.0, 30.0, 52.0, 62.0, 119.0, 214.0, 434.0, 909.0, 2096.0, 5813.0, 23368.0, 3028451.0, 67631.0, 10565.0, 3283.0, 1280.0, 604.0, 256.0, 161.0, 92.0, 43.0, 45.0, 27.0, 20.0, 17.0, 14.0, 13.0, 7.0, 3.0, 5.0, 3.0, 2.0, 1.0, 0.0, 0.0, 0.0, 3.0, 0.0, 1.0], "bins": [-261.75, -254.625, -247.5, -240.375, -233.25, -226.125, -219.0, -211.875, -204.75, -197.625, -190.5, -183.375, -176.25, -169.125, -162.0, -154.875, -147.75, -140.625, -133.5, -126.375, -119.25, -112.125, -105.0, -97.875, -90.75, -83.625, -76.5, -69.375, -62.25, -55.125, -48.0, -40.875, -33.75, -26.625, -19.5, -12.375, -5.25, 1.875, 9.0, 16.125, 23.25, 30.375, 37.5, 44.625, 51.75, 58.875, 66.0, 73.125, 80.25, 87.375, 94.5, 101.625, 108.75, 115.875, 123.0, 130.125, 137.25, 144.375, 151.5, 158.625, 165.75, 172.875, 180.0, 187.125, 194.25]}, "gradients/decoder.transformer.h.18.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 2.0, 0.0, 2.0, 2.0, 2.0, 8.0, 28.0, 71.0, 176.0, 419.0, 189.0, 67.0, 31.0, 15.0, 4.0, 3.0, 0.0, 1.0], "bins": [-999.427734375, -981.4186401367188, -963.4096069335938, -945.4005126953125, -927.3914794921875, -909.3823852539062, -891.373291015625, -873.3642578125, -855.3551635742188, -837.3460693359375, -819.3370361328125, -801.3279418945312, -783.31884765625, -765.309814453125, -747.3007202148438, -729.2916870117188, -711.2825927734375, -693.2734985351562, -675.2644653320312, -657.25537109375, -639.246337890625, -621.2372436523438, -603.2281494140625, -585.2191162109375, -567.2100219726562, -549.200927734375, -531.19189453125, -513.1828002929688, -495.1737365722656, -477.1646728515625, -459.15557861328125, -441.1465148925781, -423.13751220703125, -405.1284484863281, -387.119384765625, -369.11029052734375, -351.1012268066406, -333.0921630859375, -315.08306884765625, -297.0740051269531, -279.06494140625, -261.0558776855469, -243.0467987060547, -225.0377197265625, -207.02865600585938, -189.01959228515625, -171.01051330566406, -153.00143432617188, -134.99237060546875, -116.9832992553711, -98.97422790527344, -80.96515655517578, -62.956085205078125, -44.94701385498047, -26.937942504882812, -8.928871154785156, 9.0802001953125, 27.089271545410156, 45.09834289550781, 63.10741424560547, 81.11648559570312, 99.12555694580078, 117.13462829589844, 135.14370727539062, 153.15277099609375]}, "gradients/decoder.transformer.h.18.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 6.0, 2.0, 4.0, 3.0, 6.0, 6.0, 13.0, 8.0, 9.0, 7.0, 21.0, 19.0, 25.0, 19.0, 20.0, 31.0, 37.0, 40.0, 33.0, 39.0, 46.0, 44.0, 44.0, 38.0, 50.0, 33.0, 52.0, 42.0, 37.0, 48.0, 34.0, 29.0, 22.0, 25.0, 26.0, 23.0, 16.0, 12.0, 7.0, 12.0, 2.0, 4.0, 5.0, 5.0, 1.0, 2.0, 2.0, 1.0, 2.0, 2.0, 3.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-340.4291687011719, -329.4747009277344, -318.5202331542969, -307.5657653808594, -296.6112976074219, -285.6568298339844, -274.70233154296875, -263.74786376953125, -252.7934112548828, -241.8389434814453, -230.8844757080078, -219.92999267578125, -208.97552490234375, -198.02105712890625, -187.06658935546875, -176.11212158203125, -165.15765380859375, -154.20318603515625, -143.24871826171875, -132.29425048828125, -121.33977508544922, -110.38530731201172, -99.43083190917969, -88.47636413574219, -77.52189636230469, -66.56742858886719, -55.61295700073242, -44.658485412597656, -33.704017639160156, -22.749549865722656, -11.795074462890625, -0.840606689453125, 10.1138916015625, 21.068361282348633, 32.022830963134766, 42.97730255126953, 53.93177032470703, 64.88623809814453, 75.84071350097656, 86.79518127441406, 97.74964904785156, 108.70411682128906, 119.65858459472656, 130.61306762695312, 141.56753540039062, 152.52200317382812, 163.47647094726562, 174.43093872070312, 185.38540649414062, 196.33987426757812, 207.29434204101562, 218.24880981445312, 229.20327758789062, 240.15774536132812, 251.1122283935547, 262.06671142578125, 273.02117919921875, 283.97564697265625, 294.93011474609375, 305.88458251953125, 316.83905029296875, 327.79351806640625, 338.74798583984375, 349.70245361328125, 360.65692138671875]}, "gradients/decoder.transformer.h.17.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 2.0, 1.0, 1.0, 1.0, 4.0, 2.0, 3.0, 5.0, 6.0, 4.0, 8.0, 7.0, 12.0, 12.0, 16.0, 19.0, 30.0, 27.0, 23.0, 30.0, 38.0, 34.0, 42.0, 38.0, 50.0, 42.0, 52.0, 40.0, 53.0, 49.0, 38.0, 50.0, 34.0, 39.0, 17.0, 23.0, 21.0, 24.0, 20.0, 19.0, 15.0, 22.0, 10.0, 8.0, 5.0, 5.0, 4.0, 3.0, 6.0, 1.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0], "bins": [-35.3125, -34.271484375, -33.23046875, -32.189453125, -31.1484375, -30.107421875, -29.06640625, -28.025390625, -26.984375, -25.943359375, -24.90234375, -23.861328125, -22.8203125, -21.779296875, -20.73828125, -19.697265625, -18.65625, -17.615234375, -16.57421875, -15.533203125, -14.4921875, -13.451171875, -12.41015625, -11.369140625, -10.328125, -9.287109375, -8.24609375, -7.205078125, -6.1640625, -5.123046875, -4.08203125, -3.041015625, -2.0, -0.958984375, 0.08203125, 1.123046875, 2.1640625, 3.205078125, 4.24609375, 5.287109375, 6.328125, 7.369140625, 8.41015625, 9.451171875, 10.4921875, 11.533203125, 12.57421875, 13.615234375, 14.65625, 15.697265625, 16.73828125, 17.779296875, 18.8203125, 19.861328125, 20.90234375, 21.943359375, 22.984375, 24.025390625, 25.06640625, 26.107421875, 27.1484375, 28.189453125, 29.23046875, 30.271484375, 31.3125]}, "gradients/decoder.transformer.h.17.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 2.0, 1.0, 0.0, 3.0, 3.0, 4.0, 4.0, 5.0, 10.0, 13.0, 10.0, 24.0, 23.0, 28.0, 37.0, 51.0, 64.0, 94.0, 142.0, 182.0, 272.0, 321.0, 446.0, 625.0, 969.0, 1393.0, 2062.0, 3301.0, 5230.0, 9683.0, 23608.0, 378711.0, 3491683.0, 230455.0, 21845.0, 8895.0, 4883.0, 2946.0, 1980.0, 1316.0, 861.0, 594.0, 460.0, 307.0, 221.0, 147.0, 107.0, 75.0, 67.0, 44.0, 23.0, 23.0, 17.0, 9.0, 5.0, 5.0, 4.0, 4.0, 1.0, 2.0, 1.0, 1.0, 1.0], "bins": [-93.1875, -90.3994140625, -87.611328125, -84.8232421875, -82.03515625, -79.2470703125, -76.458984375, -73.6708984375, -70.8828125, -68.0947265625, -65.306640625, -62.5185546875, -59.73046875, -56.9423828125, -54.154296875, -51.3662109375, -48.578125, -45.7900390625, -43.001953125, -40.2138671875, -37.42578125, -34.6376953125, -31.849609375, -29.0615234375, -26.2734375, -23.4853515625, -20.697265625, -17.9091796875, -15.12109375, -12.3330078125, -9.544921875, -6.7568359375, -3.96875, -1.1806640625, 1.607421875, 4.3955078125, 7.18359375, 9.9716796875, 12.759765625, 15.5478515625, 18.3359375, 21.1240234375, 23.912109375, 26.7001953125, 29.48828125, 32.2763671875, 35.064453125, 37.8525390625, 40.640625, 43.4287109375, 46.216796875, 49.0048828125, 51.79296875, 54.5810546875, 57.369140625, 60.1572265625, 62.9453125, 65.7333984375, 68.521484375, 71.3095703125, 74.09765625, 76.8857421875, 79.673828125, 82.4619140625, 85.25]}, "gradients/decoder.transformer.h.17.mlp.c_fc.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 1.0, 0.0, 2.0, 6.0, 11.0, 13.0, 10.0, 9.0, 14.0, 13.0, 22.0, 19.0, 30.0, 37.0, 74.0, 188.0, 564.0, 1414.0, 1013.0, 307.0, 121.0, 57.0, 30.0, 28.0, 16.0, 14.0, 20.0, 10.0, 12.0, 6.0, 4.0, 5.0, 2.0, 5.0, 1.0, 1.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-90.8125, -87.416015625, -84.01953125, -80.623046875, -77.2265625, -73.830078125, -70.43359375, -67.037109375, -63.640625, -60.244140625, -56.84765625, -53.451171875, -50.0546875, -46.658203125, -43.26171875, -39.865234375, -36.46875, -33.072265625, -29.67578125, -26.279296875, -22.8828125, -19.486328125, -16.08984375, -12.693359375, -9.296875, -5.900390625, -2.50390625, 0.892578125, 4.2890625, 7.685546875, 11.08203125, 14.478515625, 17.875, 21.271484375, 24.66796875, 28.064453125, 31.4609375, 34.857421875, 38.25390625, 41.650390625, 45.046875, 48.443359375, 51.83984375, 55.236328125, 58.6328125, 62.029296875, 65.42578125, 68.822265625, 72.21875, 75.615234375, 79.01171875, 82.408203125, 85.8046875, 89.201171875, 92.59765625, 95.994140625, 99.390625, 102.787109375, 106.18359375, 109.580078125, 112.9765625, 116.373046875, 119.76953125, 123.166015625, 126.5625]}, "gradients/decoder.transformer.h.17.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 1.0, 1.0, 2.0, 5.0, 6.0, 9.0, 14.0, 8.0, 20.0, 38.0, 31.0, 62.0, 87.0, 121.0, 125.0, 195.0, 331.0, 460.0, 829.0, 1399.0, 2331.0, 3939.0, 7496.0, 14220.0, 29498.0, 88803.0, 2807933.0, 1113668.0, 68605.0, 25639.0, 12539.0, 6622.0, 3784.0, 2072.0, 1170.0, 771.0, 445.0, 324.0, 220.0, 133.0, 102.0, 63.0, 40.0, 34.0, 26.0, 19.0, 11.0, 8.0, 13.0, 9.0, 4.0, 4.0, 5.0, 2.0, 2.0, 0.0, 1.0], "bins": [-141.875, -137.69140625, -133.5078125, -129.32421875, -125.140625, -120.95703125, -116.7734375, -112.58984375, -108.40625, -104.22265625, -100.0390625, -95.85546875, -91.671875, -87.48828125, -83.3046875, -79.12109375, -74.9375, -70.75390625, -66.5703125, -62.38671875, -58.203125, -54.01953125, -49.8359375, -45.65234375, -41.46875, -37.28515625, -33.1015625, -28.91796875, -24.734375, -20.55078125, -16.3671875, -12.18359375, -8.0, -3.81640625, 0.3671875, 4.55078125, 8.734375, 12.91796875, 17.1015625, 21.28515625, 25.46875, 29.65234375, 33.8359375, 38.01953125, 42.203125, 46.38671875, 50.5703125, 54.75390625, 58.9375, 63.12109375, 67.3046875, 71.48828125, 75.671875, 79.85546875, 84.0390625, 88.22265625, 92.40625, 96.58984375, 100.7734375, 104.95703125, 109.140625, 113.32421875, 117.5078125, 121.69140625, 125.875]}, "gradients/decoder.transformer.h.17.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0, 2.0, 3.0, 0.0, 0.0, 1.0, 2.0, 2.0, 6.0, 4.0, 8.0, 17.0, 18.0, 9.0, 19.0, 34.0, 45.0, 57.0, 80.0, 147.0, 156.0, 137.0, 71.0, 48.0, 39.0, 26.0, 23.0, 17.0, 16.0, 8.0, 6.0, 2.0, 2.0, 2.0, 4.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-380.4411315917969, -368.7041015625, -356.9670715332031, -345.23004150390625, -333.4930114746094, -321.7559814453125, -310.01898193359375, -298.2819519042969, -286.544921875, -274.8078918457031, -263.07086181640625, -251.33383178710938, -239.59681701660156, -227.8597869873047, -216.1227569580078, -204.3857421875, -192.64869689941406, -180.9116668701172, -169.1746368408203, -157.4376220703125, -145.70059204101562, -133.96356201171875, -122.22653198242188, -110.48950958251953, -98.75247955322266, -87.01544952392578, -75.27842712402344, -63.54139709472656, -51.80437088012695, -40.067344665527344, -28.33031463623047, -16.593292236328125, -4.85626220703125, 6.880764961242676, 18.6177921295166, 30.354820251464844, 42.09184646606445, 53.82887268066406, 65.56590270996094, 77.30292510986328, 89.03995513916016, 100.77698516845703, 112.51400756835938, 124.25103759765625, 135.98806762695312, 147.72509765625, 159.46212768554688, 171.1991424560547, 182.93617248535156, 194.67320251464844, 206.4102325439453, 218.14724731445312, 229.88427734375, 241.62130737304688, 253.35833740234375, 265.0953674316406, 276.8323974609375, 288.5694274902344, 300.30645751953125, 312.0434875488281, 323.780517578125, 335.51751708984375, 347.25457763671875, 358.9915771484375, 370.7286071777344]}, "gradients/decoder.transformer.h.17.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 3.0, 2.0, 3.0, 2.0, 8.0, 7.0, 6.0, 4.0, 7.0, 6.0, 12.0, 14.0, 6.0, 6.0, 17.0, 21.0, 33.0, 18.0, 29.0, 34.0, 30.0, 35.0, 33.0, 44.0, 36.0, 44.0, 39.0, 39.0, 36.0, 25.0, 34.0, 41.0, 40.0, 30.0, 30.0, 20.0, 27.0, 30.0, 29.0, 20.0, 17.0, 12.0, 24.0, 16.0, 12.0, 5.0, 5.0, 4.0, 5.0, 6.0, 6.0, 2.0, 1.0, 1.0, 1.0, 2.0, 2.0, 0.0, 1.0], "bins": [-212.4288330078125, -205.9185028076172, -199.40818786621094, -192.89785766601562, -186.38754272460938, -179.87721252441406, -173.3668975830078, -166.8565673828125, -160.34625244140625, -153.83592224121094, -147.3256072998047, -140.81527709960938, -134.30496215820312, -127.79463195800781, -121.28431701660156, -114.77398681640625, -108.26366424560547, -101.75334167480469, -95.2430191040039, -88.73269653320312, -82.22237396240234, -75.71205139160156, -69.20172119140625, -62.691402435302734, -56.18107986450195, -49.67075729370117, -43.16043472290039, -36.650108337402344, -30.139787673950195, -23.62946319580078, -17.119140625, -10.608818054199219, -4.0984954833984375, 2.411827564239502, 8.922150611877441, 15.432474136352539, 21.94279670715332, 28.453121185302734, 34.963443756103516, 41.4737663269043, 47.98408889770508, 54.49441146850586, 61.00473403930664, 67.51506042480469, 74.02538299560547, 80.53570556640625, 87.04602813720703, 93.55635070800781, 100.0666732788086, 106.57699584960938, 113.08731842041016, 119.59764099121094, 126.10796356201172, 132.6182861328125, 139.1286163330078, 145.63893127441406, 152.14926147460938, 158.6595916748047, 165.16990661621094, 171.68023681640625, 178.1905517578125, 184.7008819580078, 191.21119689941406, 197.72152709960938, 204.23184204101562]}, "gradients/decoder.transformer.h.17.crossattention.c_proj.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 3.0, 3.0, 2.0, 1.0, 5.0, 3.0, 4.0, 4.0, 6.0, 11.0, 6.0, 11.0, 11.0, 12.0, 18.0, 23.0, 23.0, 29.0, 26.0, 29.0, 47.0, 35.0, 56.0, 46.0, 42.0, 46.0, 38.0, 44.0, 45.0, 50.0, 47.0, 34.0, 38.0, 25.0, 27.0, 20.0, 15.0, 19.0, 22.0, 17.0, 15.0, 13.0, 9.0, 8.0, 14.0, 3.0, 6.0, 2.0, 0.0, 2.0, 1.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-33.5625, -32.52197265625, -31.4814453125, -30.44091796875, -29.400390625, -28.35986328125, -27.3193359375, -26.27880859375, -25.23828125, -24.19775390625, -23.1572265625, -22.11669921875, -21.076171875, -20.03564453125, -18.9951171875, -17.95458984375, -16.9140625, -15.87353515625, -14.8330078125, -13.79248046875, -12.751953125, -11.71142578125, -10.6708984375, -9.63037109375, -8.58984375, -7.54931640625, -6.5087890625, -5.46826171875, -4.427734375, -3.38720703125, -2.3466796875, -1.30615234375, -0.265625, 0.77490234375, 1.8154296875, 2.85595703125, 3.896484375, 4.93701171875, 5.9775390625, 7.01806640625, 8.05859375, 9.09912109375, 10.1396484375, 11.18017578125, 12.220703125, 13.26123046875, 14.3017578125, 15.34228515625, 16.3828125, 17.42333984375, 18.4638671875, 19.50439453125, 20.544921875, 21.58544921875, 22.6259765625, 23.66650390625, 24.70703125, 25.74755859375, 26.7880859375, 27.82861328125, 28.869140625, 29.90966796875, 30.9501953125, 31.99072265625, 33.03125]}, "gradients/decoder.transformer.h.17.crossattention.c_proj.weight": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 3.0, 6.0, 9.0, 3.0, 9.0, 7.0, 24.0, 31.0, 33.0, 65.0, 82.0, 123.0, 187.0, 312.0, 486.0, 774.0, 1244.0, 2078.0, 3324.0, 5575.0, 9060.0, 14827.0, 25486.0, 45473.0, 87587.0, 201654.0, 374209.0, 129194.0, 62659.0, 33896.0, 19778.0, 11998.0, 7098.0, 4307.0, 2589.0, 1613.0, 998.0, 658.0, 384.0, 264.0, 171.0, 106.0, 62.0, 40.0, 35.0, 16.0, 16.0, 6.0, 2.0, 2.0, 5.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.59375, -1.544342041015625, -1.49493408203125, -1.445526123046875, -1.3961181640625, -1.346710205078125, -1.29730224609375, -1.247894287109375, -1.198486328125, -1.149078369140625, -1.09967041015625, -1.050262451171875, -1.0008544921875, -0.951446533203125, -0.90203857421875, -0.852630615234375, -0.80322265625, -0.753814697265625, -0.70440673828125, -0.654998779296875, -0.6055908203125, -0.556182861328125, -0.50677490234375, -0.457366943359375, -0.407958984375, -0.358551025390625, -0.30914306640625, -0.259735107421875, -0.2103271484375, -0.160919189453125, -0.11151123046875, -0.062103271484375, -0.0126953125, 0.036712646484375, 0.08612060546875, 0.135528564453125, 0.1849365234375, 0.234344482421875, 0.28375244140625, 0.333160400390625, 0.382568359375, 0.431976318359375, 0.48138427734375, 0.530792236328125, 0.5802001953125, 0.629608154296875, 0.67901611328125, 0.728424072265625, 0.77783203125, 0.827239990234375, 0.87664794921875, 0.926055908203125, 0.9754638671875, 1.024871826171875, 1.07427978515625, 1.123687744140625, 1.173095703125, 1.222503662109375, 1.27191162109375, 1.321319580078125, 1.3707275390625, 1.420135498046875, 1.46954345703125, 1.518951416015625, 1.568359375]}, "gradients/decoder.transformer.h.17.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 2.0, 1.0, 4.0, 8.0, 4.0, 4.0, 10.0, 11.0, 8.0, 17.0, 17.0, 21.0, 20.0, 23.0, 26.0, 31.0, 35.0, 26.0, 29.0, 33.0, 34.0, 36.0, 46.0, 38.0, 1071.0, 49.0, 42.0, 36.0, 43.0, 32.0, 42.0, 31.0, 20.0, 27.0, 29.0, 19.0, 15.0, 17.0, 15.0, 15.0, 16.0, 8.0, 8.0, 4.0, 8.0, 4.0, 2.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0], "bins": [-20.453125, -19.815673828125, -19.17822265625, -18.540771484375, -17.9033203125, -17.265869140625, -16.62841796875, -15.990966796875, -15.353515625, -14.716064453125, -14.07861328125, -13.441162109375, -12.8037109375, -12.166259765625, -11.52880859375, -10.891357421875, -10.25390625, -9.616455078125, -8.97900390625, -8.341552734375, -7.7041015625, -7.066650390625, -6.42919921875, -5.791748046875, -5.154296875, -4.516845703125, -3.87939453125, -3.241943359375, -2.6044921875, -1.967041015625, -1.32958984375, -0.692138671875, -0.0546875, 0.582763671875, 1.22021484375, 1.857666015625, 2.4951171875, 3.132568359375, 3.77001953125, 4.407470703125, 5.044921875, 5.682373046875, 6.31982421875, 6.957275390625, 7.5947265625, 8.232177734375, 8.86962890625, 9.507080078125, 10.14453125, 10.781982421875, 11.41943359375, 12.056884765625, 12.6943359375, 13.331787109375, 13.96923828125, 14.606689453125, 15.244140625, 15.881591796875, 16.51904296875, 17.156494140625, 17.7939453125, 18.431396484375, 19.06884765625, 19.706298828125, 20.34375]}, "gradients/decoder.transformer.h.17.crossattention.c_attn.weight": {"_type": "histogram", "values": [3.0, 4.0, 2.0, 0.0, 2.0, 7.0, 8.0, 14.0, 16.0, 18.0, 29.0, 64.0, 80.0, 95.0, 158.0, 246.0, 352.0, 447.0, 735.0, 994.0, 1549.0, 2261.0, 3434.0, 5123.0, 7700.0, 11386.0, 17238.0, 26041.0, 40176.0, 64234.0, 107813.0, 1328818.0, 211430.0, 97753.0, 58760.0, 37388.0, 24443.0, 16047.0, 10533.0, 7097.0, 4743.0, 3214.0, 2214.0, 1412.0, 950.0, 679.0, 436.0, 308.0, 229.0, 140.0, 93.0, 73.0, 66.0, 28.0, 15.0, 17.0, 10.0, 10.0, 7.0, 1.0, 1.0, 2.0, 3.0, 3.0], "bins": [-0.83251953125, -0.8064346313476562, -0.7803497314453125, -0.7542648315429688, -0.728179931640625, -0.7020950317382812, -0.6760101318359375, -0.6499252319335938, -0.62384033203125, -0.5977554321289062, -0.5716705322265625, -0.5455856323242188, -0.519500732421875, -0.49341583251953125, -0.4673309326171875, -0.44124603271484375, -0.4151611328125, -0.38907623291015625, -0.3629913330078125, -0.33690643310546875, -0.310821533203125, -0.28473663330078125, -0.2586517333984375, -0.23256683349609375, -0.20648193359375, -0.18039703369140625, -0.1543121337890625, -0.12822723388671875, -0.102142333984375, -0.07605743408203125, -0.0499725341796875, -0.02388763427734375, 0.002197265625, 0.02828216552734375, 0.0543670654296875, 0.08045196533203125, 0.106536865234375, 0.13262176513671875, 0.1587066650390625, 0.18479156494140625, 0.21087646484375, 0.23696136474609375, 0.2630462646484375, 0.28913116455078125, 0.315216064453125, 0.34130096435546875, 0.3673858642578125, 0.39347076416015625, 0.4195556640625, 0.44564056396484375, 0.4717254638671875, 0.49781036376953125, 0.523895263671875, 0.5499801635742188, 0.5760650634765625, 0.6021499633789062, 0.62823486328125, 0.6543197631835938, 0.6804046630859375, 0.7064895629882812, 0.732574462890625, 0.7586593627929688, 0.7847442626953125, 0.8108291625976562, 0.8369140625]}, "gradients/decoder.transformer.h.17.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 5.0, 0.0, 2.0, 2.0, 0.0, 4.0, 4.0, 2.0, 8.0, 10.0, 5.0, 15.0, 13.0, 12.0, 25.0, 23.0, 32.0, 40.0, 48.0, 95.0, 259.0, 121.0, 76.0, 58.0, 36.0, 28.0, 20.0, 24.0, 13.0, 7.0, 6.0, 3.0, 9.0, 4.0, 3.0, 2.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-8.785724639892578e-05, -8.479133248329163e-05, -8.172541856765747e-05, -7.865950465202332e-05, -7.559359073638916e-05, -7.2527676820755e-05, -6.946176290512085e-05, -6.63958489894867e-05, -6.332993507385254e-05, -6.0264021158218384e-05, -5.719810724258423e-05, -5.413219332695007e-05, -5.106627941131592e-05, -4.800036549568176e-05, -4.493445158004761e-05, -4.186853766441345e-05, -3.88026237487793e-05, -3.573670983314514e-05, -3.2670795917510986e-05, -2.960488200187683e-05, -2.6538968086242676e-05, -2.347305417060852e-05, -2.0407140254974365e-05, -1.734122633934021e-05, -1.4275312423706055e-05, -1.12093985080719e-05, -8.143484592437744e-06, -5.077570676803589e-06, -2.0116567611694336e-06, 1.0542571544647217e-06, 4.120171070098877e-06, 7.186084985733032e-06, 1.0251998901367188e-05, 1.3317912817001343e-05, 1.6383826732635498e-05, 1.9449740648269653e-05, 2.251565456390381e-05, 2.5581568479537964e-05, 2.864748239517212e-05, 3.1713396310806274e-05, 3.477931022644043e-05, 3.7845224142074585e-05, 4.091113805770874e-05, 4.3977051973342896e-05, 4.704296588897705e-05, 5.0108879804611206e-05, 5.317479372024536e-05, 5.6240707635879517e-05, 5.930662155151367e-05, 6.237253546714783e-05, 6.543844938278198e-05, 6.850436329841614e-05, 7.157027721405029e-05, 7.463619112968445e-05, 7.77021050453186e-05, 8.076801896095276e-05, 8.383393287658691e-05, 8.689984679222107e-05, 8.996576070785522e-05, 9.303167462348938e-05, 9.609758853912354e-05, 9.916350245475769e-05, 0.00010222941637039185, 0.000105295330286026, 0.00010836124420166016]}, "gradients/decoder.transformer.h.17.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 2.0, 1.0, 1.0, 0.0, 2.0, 4.0, 5.0, 7.0, 3.0, 6.0, 16.0, 16.0, 23.0, 35.0, 42.0, 86.0, 169.0, 480.0, 2346.0, 42574.0, 986611.0, 14237.0, 1254.0, 308.0, 110.0, 65.0, 44.0, 24.0, 23.0, 20.0, 10.0, 12.0, 6.0, 9.0, 1.0, 3.0, 2.0, 4.0, 2.0, 2.0, 4.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0016164779663085938, -0.0015707463026046753, -0.0015250146389007568, -0.0014792829751968384, -0.00143355131149292, -0.0013878196477890015, -0.001342087984085083, -0.0012963563203811646, -0.001250624656677246, -0.0012048929929733276, -0.0011591613292694092, -0.0011134296655654907, -0.0010676980018615723, -0.0010219663381576538, -0.0009762346744537354, -0.0009305030107498169, -0.0008847713470458984, -0.00083903968334198, -0.0007933080196380615, -0.0007475763559341431, -0.0007018446922302246, -0.0006561130285263062, -0.0006103813648223877, -0.0005646497011184692, -0.0005189180374145508, -0.0004731863737106323, -0.00042745471000671387, -0.0003817230463027954, -0.00033599138259887695, -0.0002902597188949585, -0.00024452805519104004, -0.00019879639148712158, -0.00015306472778320312, -0.00010733306407928467, -6.160140037536621e-05, -1.5869736671447754e-05, 2.9861927032470703e-05, 7.559359073638916e-05, 0.00012132525444030762, 0.00016705691814422607, 0.00021278858184814453, 0.000258520245552063, 0.00030425190925598145, 0.0003499835729598999, 0.00039571523666381836, 0.0004414469003677368, 0.0004871785640716553, 0.0005329102277755737, 0.0005786418914794922, 0.0006243735551834106, 0.0006701052188873291, 0.0007158368825912476, 0.000761568546295166, 0.0008073002099990845, 0.0008530318737030029, 0.0008987635374069214, 0.0009444952011108398, 0.0009902268648147583, 0.0010359585285186768, 0.0010816901922225952, 0.0011274218559265137, 0.0011731535196304321, 0.0012188851833343506, 0.001264616847038269, 0.0013103485107421875]}, "gradients/decoder.transformer.h.17.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 2.0, 6.0, 4.0, 11.0, 11.0, 18.0, 42.0, 69.0, 141.0, 322.0, 201.0, 87.0, 47.0, 22.0, 15.0, 5.0, 7.0, 1.0, 2.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.255185533314943e-05, -4.9569531256565824e-05, -4.6587207179982215e-05, -4.3604883103398606e-05, -4.0622559026815e-05, -3.764023495023139e-05, -3.465791087364778e-05, -3.167558679706417e-05, -2.869326272048056e-05, -2.5710938643896952e-05, -2.2728614567313343e-05, -1.9746290490729734e-05, -1.6763966414146125e-05, -1.3781642337562516e-05, -1.0799318260978907e-05, -7.816994184395298e-06, -4.834670107811689e-06, -1.8523460312280804e-06, 1.1299780453555286e-06, 4.112302121939138e-06, 7.0946261985227466e-06, 1.0076950275106356e-05, 1.3059274351689965e-05, 1.6041598428273574e-05, 1.9023922504857183e-05, 2.200624658144079e-05, 2.49885706580244e-05, 2.797089473460801e-05, 3.095321881119162e-05, 3.393554288777523e-05, 3.6917866964358836e-05, 3.9900191040942445e-05, 4.2882515117526054e-05, 4.586483919410966e-05, 4.884716327069327e-05, 5.182948734727688e-05, 5.481181142386049e-05, 5.77941355004441e-05, 6.077645957702771e-05, 6.375878001563251e-05, 6.674110773019493e-05, 6.972343544475734e-05, 7.270575588336214e-05, 7.568807632196695e-05, 7.867040403652936e-05, 8.165273175109178e-05, 8.463505218969658e-05, 8.761737262830138e-05, 9.05997003428638e-05, 9.358202805742621e-05, 9.656434849603102e-05, 9.954666893463582e-05, 0.00010252899664919823, 0.00010551132436376065, 0.00010849364480236545, 0.00011147596524097025, 0.00011445829295553267, 0.00011744062067009509, 0.00012042294110869989, 0.0001234052615473047, 0.0001263875892618671, 0.00012936991697642952, 0.0001323522301390767, 0.00013533455785363913, 0.00013831688556820154]}, "gradients/decoder.transformer.h.17.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 0.0, 1.0, 1.0, 3.0, 4.0, 2.0, 4.0, 6.0, 6.0, 7.0, 14.0, 9.0, 17.0, 18.0, 21.0, 17.0, 21.0, 18.0, 29.0, 37.0, 26.0, 37.0, 31.0, 39.0, 38.0, 42.0, 31.0, 38.0, 39.0, 50.0, 52.0, 35.0, 36.0, 25.0, 34.0, 30.0, 23.0, 21.0, 35.0, 24.0, 15.0, 15.0, 12.0, 13.0, 6.0, 5.0, 7.0, 4.0, 3.0, 5.0, 6.0, 2.0, 0.0, 0.0, 1.0, 5.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.254413604736328e-05, -3.14861536026001e-05, -3.0428171157836914e-05, -2.937018871307373e-05, -2.8312206268310547e-05, -2.7254223823547363e-05, -2.619624137878418e-05, -2.5138258934020996e-05, -2.4080276489257812e-05, -2.302229404449463e-05, -2.1964311599731445e-05, -2.0906329154968262e-05, -1.9848346710205078e-05, -1.8790364265441895e-05, -1.773238182067871e-05, -1.6674399375915527e-05, -1.5616416931152344e-05, -1.455843448638916e-05, -1.3500452041625977e-05, -1.2442469596862793e-05, -1.138448715209961e-05, -1.0326504707336426e-05, -9.268522262573242e-06, -8.210539817810059e-06, -7.152557373046875e-06, -6.094574928283691e-06, -5.036592483520508e-06, -3.978610038757324e-06, -2.9206275939941406e-06, -1.862645149230957e-06, -8.046627044677734e-07, 2.5331974029541016e-07, 1.3113021850585938e-06, 2.3692846298217773e-06, 3.427267074584961e-06, 4.4852495193481445e-06, 5.543231964111328e-06, 6.601214408874512e-06, 7.659196853637695e-06, 8.717179298400879e-06, 9.775161743164062e-06, 1.0833144187927246e-05, 1.189112663269043e-05, 1.2949109077453613e-05, 1.4007091522216797e-05, 1.506507396697998e-05, 1.6123056411743164e-05, 1.7181038856506348e-05, 1.823902130126953e-05, 1.9297003746032715e-05, 2.03549861907959e-05, 2.1412968635559082e-05, 2.2470951080322266e-05, 2.352893352508545e-05, 2.4586915969848633e-05, 2.5644898414611816e-05, 2.6702880859375e-05, 2.7760863304138184e-05, 2.8818845748901367e-05, 2.987682819366455e-05, 3.0934810638427734e-05, 3.199279308319092e-05, 3.30507755279541e-05, 3.4108757972717285e-05, 3.516674041748047e-05]}, "gradients/decoder.transformer.h.17.attn.c_proj.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 3.0, 3.0, 2.0, 1.0, 5.0, 3.0, 4.0, 4.0, 6.0, 11.0, 6.0, 11.0, 11.0, 12.0, 18.0, 23.0, 23.0, 29.0, 26.0, 29.0, 47.0, 35.0, 56.0, 46.0, 42.0, 46.0, 38.0, 44.0, 45.0, 50.0, 47.0, 34.0, 38.0, 25.0, 27.0, 20.0, 15.0, 19.0, 22.0, 17.0, 15.0, 13.0, 9.0, 8.0, 14.0, 3.0, 6.0, 2.0, 0.0, 2.0, 1.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-33.5625, -32.52197265625, -31.4814453125, -30.44091796875, -29.400390625, -28.35986328125, -27.3193359375, -26.27880859375, -25.23828125, -24.19775390625, -23.1572265625, -22.11669921875, -21.076171875, -20.03564453125, -18.9951171875, -17.95458984375, -16.9140625, -15.87353515625, -14.8330078125, -13.79248046875, -12.751953125, -11.71142578125, -10.6708984375, -9.63037109375, -8.58984375, -7.54931640625, -6.5087890625, -5.46826171875, -4.427734375, -3.38720703125, -2.3466796875, -1.30615234375, -0.265625, 0.77490234375, 1.8154296875, 2.85595703125, 3.896484375, 4.93701171875, 5.9775390625, 7.01806640625, 8.05859375, 9.09912109375, 10.1396484375, 11.18017578125, 12.220703125, 13.26123046875, 14.3017578125, 15.34228515625, 16.3828125, 17.42333984375, 18.4638671875, 19.50439453125, 20.544921875, 21.58544921875, 22.6259765625, 23.66650390625, 24.70703125, 25.74755859375, 26.7880859375, 27.82861328125, 28.869140625, 29.90966796875, 30.9501953125, 31.99072265625, 33.03125]}, "gradients/decoder.transformer.h.17.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 5.0, 2.0, 6.0, 7.0, 4.0, 11.0, 10.0, 29.0, 29.0, 42.0, 54.0, 69.0, 91.0, 133.0, 146.0, 201.0, 303.0, 404.0, 554.0, 827.0, 1146.0, 1743.0, 2532.0, 4372.0, 11026.0, 178529.0, 810441.0, 20620.0, 5600.0, 3022.0, 1971.0, 1321.0, 922.0, 656.0, 487.0, 338.0, 245.0, 172.0, 109.0, 108.0, 67.0, 56.0, 48.0, 23.0, 25.0, 16.0, 13.0, 7.0, 9.0, 6.0, 2.0, 3.0, 2.0, 2.0, 3.0, 1.0, 1.0, 1.0], "bins": [-68.25, -66.12890625, -64.0078125, -61.88671875, -59.765625, -57.64453125, -55.5234375, -53.40234375, -51.28125, -49.16015625, -47.0390625, -44.91796875, -42.796875, -40.67578125, -38.5546875, -36.43359375, -34.3125, -32.19140625, -30.0703125, -27.94921875, -25.828125, -23.70703125, -21.5859375, -19.46484375, -17.34375, -15.22265625, -13.1015625, -10.98046875, -8.859375, -6.73828125, -4.6171875, -2.49609375, -0.375, 1.74609375, 3.8671875, 5.98828125, 8.109375, 10.23046875, 12.3515625, 14.47265625, 16.59375, 18.71484375, 20.8359375, 22.95703125, 25.078125, 27.19921875, 29.3203125, 31.44140625, 33.5625, 35.68359375, 37.8046875, 39.92578125, 42.046875, 44.16796875, 46.2890625, 48.41015625, 50.53125, 52.65234375, 54.7734375, 56.89453125, 59.015625, 61.13671875, 63.2578125, 65.37890625, 67.5]}, "gradients/decoder.transformer.h.17.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 3.0, 2.0, 3.0, 7.0, 7.0, 9.0, 8.0, 12.0, 15.0, 8.0, 30.0, 18.0, 31.0, 31.0, 49.0, 47.0, 45.0, 46.0, 56.0, 65.0, 104.0, 1792.0, 180.0, 102.0, 45.0, 52.0, 41.0, 37.0, 38.0, 27.0, 21.0, 17.0, 19.0, 21.0, 13.0, 12.0, 9.0, 8.0, 11.0, 4.0, 4.0, 1.0, 2.0, 5.0, 1.0, 4.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-115.25, -111.5009765625, -107.751953125, -104.0029296875, -100.25390625, -96.5048828125, -92.755859375, -89.0068359375, -85.2578125, -81.5087890625, -77.759765625, -74.0107421875, -70.26171875, -66.5126953125, -62.763671875, -59.0146484375, -55.265625, -51.5166015625, -47.767578125, -44.0185546875, -40.26953125, -36.5205078125, -32.771484375, -29.0224609375, -25.2734375, -21.5244140625, -17.775390625, -14.0263671875, -10.27734375, -6.5283203125, -2.779296875, 0.9697265625, 4.71875, 8.4677734375, 12.216796875, 15.9658203125, 19.71484375, 23.4638671875, 27.212890625, 30.9619140625, 34.7109375, 38.4599609375, 42.208984375, 45.9580078125, 49.70703125, 53.4560546875, 57.205078125, 60.9541015625, 64.703125, 68.4521484375, 72.201171875, 75.9501953125, 79.69921875, 83.4482421875, 87.197265625, 90.9462890625, 94.6953125, 98.4443359375, 102.193359375, 105.9423828125, 109.69140625, 113.4404296875, 117.189453125, 120.9384765625, 124.6875]}, "gradients/decoder.transformer.h.17.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 3.0, 2.0, 2.0, 5.0, 3.0, 1.0, 4.0, 7.0, 10.0, 6.0, 12.0, 9.0, 17.0, 30.0, 29.0, 25.0, 26.0, 65.0, 101.0, 159.0, 262.0, 549.0, 1244.0, 3366.0, 14152.0, 2894183.0, 217109.0, 9382.0, 2700.0, 1011.0, 465.0, 252.0, 150.0, 111.0, 73.0, 51.0, 31.0, 25.0, 16.0, 12.0, 13.0, 8.0, 12.0, 6.0, 9.0, 4.0, 4.0, 0.0, 2.0, 4.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-225.25, -218.185546875, -211.12109375, -204.056640625, -196.9921875, -189.927734375, -182.86328125, -175.798828125, -168.734375, -161.669921875, -154.60546875, -147.541015625, -140.4765625, -133.412109375, -126.34765625, -119.283203125, -112.21875, -105.154296875, -98.08984375, -91.025390625, -83.9609375, -76.896484375, -69.83203125, -62.767578125, -55.703125, -48.638671875, -41.57421875, -34.509765625, -27.4453125, -20.380859375, -13.31640625, -6.251953125, 0.8125, 7.876953125, 14.94140625, 22.005859375, 29.0703125, 36.134765625, 43.19921875, 50.263671875, 57.328125, 64.392578125, 71.45703125, 78.521484375, 85.5859375, 92.650390625, 99.71484375, 106.779296875, 113.84375, 120.908203125, 127.97265625, 135.037109375, 142.1015625, 149.166015625, 156.23046875, 163.294921875, 170.359375, 177.423828125, 184.48828125, 191.552734375, 198.6171875, 205.681640625, 212.74609375, 219.810546875, 226.875]}, "gradients/decoder.transformer.h.17.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0, 1.0, 3.0, 0.0, 2.0, 0.0, 1.0, 1.0, 6.0, 4.0, 8.0, 5.0, 12.0, 7.0, 17.0, 22.0, 24.0, 26.0, 41.0, 43.0, 82.0, 118.0, 131.0, 134.0, 86.0, 65.0, 40.0, 33.0, 17.0, 16.0, 11.0, 20.0, 10.0, 5.0, 6.0, 6.0, 7.0, 3.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-167.5320281982422, -163.5190887451172, -159.5061492919922, -155.4932098388672, -151.4802703857422, -147.4673309326172, -143.4543914794922, -139.4414520263672, -135.4285125732422, -131.4155731201172, -127.40263366699219, -123.38969421386719, -119.37675476074219, -115.36381530761719, -111.35087585449219, -107.33793640136719, -103.32499694824219, -99.31205749511719, -95.29911804199219, -91.28617858886719, -87.27323913574219, -83.26029968261719, -79.24736022949219, -75.23442077636719, -71.22148132324219, -67.20854187011719, -63.19560241699219, -59.18266296386719, -55.16972351074219, -51.15678405761719, -47.14384460449219, -43.13090515136719, -39.11796569824219, -35.10502624511719, -31.092086791992188, -27.079147338867188, -23.066207885742188, -19.053268432617188, -15.040328979492188, -11.027389526367188, -7.0144500732421875, -3.0015106201171875, 1.0114288330078125, 5.0243682861328125, 9.037307739257812, 13.050247192382812, 17.063186645507812, 21.076126098632812, 25.089065551757812, 29.102005004882812, 33.11494445800781, 37.12788391113281, 41.14082336425781, 45.15376281738281, 49.16670227050781, 53.17964172363281, 57.19258117675781, 61.20552062988281, 65.21846008300781, 69.23139953613281, 73.24433898925781, 77.25727844238281, 81.27021789550781, 85.28315734863281, 89.29609680175781]}, "gradients/decoder.transformer.h.17.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 2.0, 0.0, 0.0, 3.0, 1.0, 0.0, 1.0, 2.0, 3.0, 1.0, 5.0, 3.0, 8.0, 5.0, 16.0, 10.0, 12.0, 15.0, 17.0, 20.0, 24.0, 21.0, 26.0, 29.0, 30.0, 29.0, 35.0, 35.0, 43.0, 36.0, 47.0, 45.0, 39.0, 38.0, 49.0, 39.0, 27.0, 32.0, 28.0, 21.0, 23.0, 23.0, 30.0, 22.0, 14.0, 15.0, 19.0, 11.0, 9.0, 13.0, 11.0, 8.0, 5.0, 5.0, 5.0, 2.0, 3.0, 4.0, 1.0, 1.0], "bins": [-304.0479431152344, -295.5722351074219, -287.0965576171875, -278.620849609375, -270.1451721191406, -261.6694641113281, -253.1937713623047, -244.71807861328125, -236.24237060546875, -227.7666778564453, -219.29098510742188, -210.81527709960938, -202.33958435058594, -193.8638916015625, -185.38819885253906, -176.91250610351562, -168.4368133544922, -159.96112060546875, -151.4854278564453, -143.00973510742188, -134.53402709960938, -126.05833435058594, -117.5826416015625, -109.10694885253906, -100.6312484741211, -92.15555572509766, -83.67985534667969, -75.20416259765625, -66.72846984863281, -58.252769470214844, -49.777076721191406, -41.3013801574707, -32.82569885253906, -24.35000228881836, -15.874307632446289, -7.398612976074219, 1.0770835876464844, 9.552780151367188, 18.028472900390625, 26.504169464111328, 34.97986602783203, 43.455562591552734, 51.93125915527344, 60.406951904296875, 68.88264465332031, 77.35834503173828, 85.83403778076172, 94.30973815917969, 102.78543090820312, 111.26112365722656, 119.73682403564453, 128.2125244140625, 136.68821716308594, 145.16390991210938, 153.6396026611328, 162.11529541015625, 170.59100341796875, 179.0666961669922, 187.54238891601562, 196.01809692382812, 204.49378967285156, 212.969482421875, 221.44517517089844, 229.92086791992188, 238.3965606689453]}, "gradients/decoder.transformer.h.16.mlp.c_proj.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 0.0, 1.0, 1.0, 3.0, 2.0, 4.0, 1.0, 5.0, 5.0, 1.0, 3.0, 5.0, 9.0, 11.0, 8.0, 12.0, 11.0, 22.0, 21.0, 22.0, 26.0, 27.0, 33.0, 39.0, 51.0, 37.0, 44.0, 47.0, 40.0, 38.0, 52.0, 28.0, 51.0, 43.0, 50.0, 37.0, 27.0, 18.0, 24.0, 21.0, 20.0, 21.0, 18.0, 18.0, 12.0, 10.0, 6.0, 13.0, 7.0, 3.0, 2.0, 3.0, 3.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0], "bins": [-32.59375, -31.590087890625, -30.58642578125, -29.582763671875, -28.5791015625, -27.575439453125, -26.57177734375, -25.568115234375, -24.564453125, -23.560791015625, -22.55712890625, -21.553466796875, -20.5498046875, -19.546142578125, -18.54248046875, -17.538818359375, -16.53515625, -15.531494140625, -14.52783203125, -13.524169921875, -12.5205078125, -11.516845703125, -10.51318359375, -9.509521484375, -8.505859375, -7.502197265625, -6.49853515625, -5.494873046875, -4.4912109375, -3.487548828125, -2.48388671875, -1.480224609375, -0.4765625, 0.527099609375, 1.53076171875, 2.534423828125, 3.5380859375, 4.541748046875, 5.54541015625, 6.549072265625, 7.552734375, 8.556396484375, 9.56005859375, 10.563720703125, 11.5673828125, 12.571044921875, 13.57470703125, 14.578369140625, 15.58203125, 16.585693359375, 17.58935546875, 18.593017578125, 19.5966796875, 20.600341796875, 21.60400390625, 22.607666015625, 23.611328125, 24.614990234375, 25.61865234375, 26.622314453125, 27.6259765625, 28.629638671875, 29.63330078125, 30.636962890625, 31.640625]}, "gradients/decoder.transformer.h.16.mlp.c_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 3.0, 1.0, 10.0, 7.0, 11.0, 9.0, 13.0, 17.0, 29.0, 37.0, 51.0, 51.0, 83.0, 129.0, 159.0, 198.0, 335.0, 413.0, 641.0, 938.0, 1372.0, 2115.0, 3341.0, 5629.0, 10336.0, 24293.0, 302462.0, 3475968.0, 310761.0, 28333.0, 10817.0, 5846.0, 3436.0, 2212.0, 1349.0, 908.0, 585.0, 412.0, 301.0, 175.0, 141.0, 114.0, 85.0, 44.0, 39.0, 25.0, 25.0, 8.0, 12.0, 6.0, 2.0, 3.0, 4.0, 0.0, 3.0, 2.0, 1.0], "bins": [-94.9375, -92.1865234375, -89.435546875, -86.6845703125, -83.93359375, -81.1826171875, -78.431640625, -75.6806640625, -72.9296875, -70.1787109375, -67.427734375, -64.6767578125, -61.92578125, -59.1748046875, -56.423828125, -53.6728515625, -50.921875, -48.1708984375, -45.419921875, -42.6689453125, -39.91796875, -37.1669921875, -34.416015625, -31.6650390625, -28.9140625, -26.1630859375, -23.412109375, -20.6611328125, -17.91015625, -15.1591796875, -12.408203125, -9.6572265625, -6.90625, -4.1552734375, -1.404296875, 1.3466796875, 4.09765625, 6.8486328125, 9.599609375, 12.3505859375, 15.1015625, 17.8525390625, 20.603515625, 23.3544921875, 26.10546875, 28.8564453125, 31.607421875, 34.3583984375, 37.109375, 39.8603515625, 42.611328125, 45.3623046875, 48.11328125, 50.8642578125, 53.615234375, 56.3662109375, 59.1171875, 61.8681640625, 64.619140625, 67.3701171875, 70.12109375, 72.8720703125, 75.623046875, 78.3740234375, 81.125]}, "gradients/decoder.transformer.h.16.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 2.0, 1.0, 2.0, 2.0, 3.0, 0.0, 2.0, 5.0, 11.0, 11.0, 8.0, 6.0, 9.0, 14.0, 20.0, 26.0, 29.0, 42.0, 80.0, 193.0, 600.0, 1555.0, 907.0, 269.0, 75.0, 52.0, 43.0, 22.0, 22.0, 13.0, 9.0, 11.0, 10.0, 5.0, 5.0, 8.0, 2.0, 4.0, 5.0, 0.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-99.25, -95.7841796875, -92.318359375, -88.8525390625, -85.38671875, -81.9208984375, -78.455078125, -74.9892578125, -71.5234375, -68.0576171875, -64.591796875, -61.1259765625, -57.66015625, -54.1943359375, -50.728515625, -47.2626953125, -43.796875, -40.3310546875, -36.865234375, -33.3994140625, -29.93359375, -26.4677734375, -23.001953125, -19.5361328125, -16.0703125, -12.6044921875, -9.138671875, -5.6728515625, -2.20703125, 1.2587890625, 4.724609375, 8.1904296875, 11.65625, 15.1220703125, 18.587890625, 22.0537109375, 25.51953125, 28.9853515625, 32.451171875, 35.9169921875, 39.3828125, 42.8486328125, 46.314453125, 49.7802734375, 53.24609375, 56.7119140625, 60.177734375, 63.6435546875, 67.109375, 70.5751953125, 74.041015625, 77.5068359375, 80.97265625, 84.4384765625, 87.904296875, 91.3701171875, 94.8359375, 98.3017578125, 101.767578125, 105.2333984375, 108.69921875, 112.1650390625, 115.630859375, 119.0966796875, 122.5625]}, "gradients/decoder.transformer.h.16.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 2.0, 1.0, 3.0, 1.0, 9.0, 4.0, 11.0, 20.0, 25.0, 40.0, 100.0, 145.0, 316.0, 597.0, 1240.0, 2592.0, 5877.0, 14847.0, 48501.0, 2089864.0, 1956504.0, 47973.0, 14601.0, 5936.0, 2613.0, 1223.0, 598.0, 276.0, 158.0, 77.0, 52.0, 39.0, 18.0, 16.0, 3.0, 2.0, 2.0, 2.0, 4.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-253.875, -246.822265625, -239.76953125, -232.716796875, -225.6640625, -218.611328125, -211.55859375, -204.505859375, -197.453125, -190.400390625, -183.34765625, -176.294921875, -169.2421875, -162.189453125, -155.13671875, -148.083984375, -141.03125, -133.978515625, -126.92578125, -119.873046875, -112.8203125, -105.767578125, -98.71484375, -91.662109375, -84.609375, -77.556640625, -70.50390625, -63.451171875, -56.3984375, -49.345703125, -42.29296875, -35.240234375, -28.1875, -21.134765625, -14.08203125, -7.029296875, 0.0234375, 7.076171875, 14.12890625, 21.181640625, 28.234375, 35.287109375, 42.33984375, 49.392578125, 56.4453125, 63.498046875, 70.55078125, 77.603515625, 84.65625, 91.708984375, 98.76171875, 105.814453125, 112.8671875, 119.919921875, 126.97265625, 134.025390625, 141.078125, 148.130859375, 155.18359375, 162.236328125, 169.2890625, 176.341796875, 183.39453125, 190.447265625, 197.5]}, "gradients/decoder.transformer.h.16.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 2.0, 0.0, 0.0, 2.0, 1.0, 1.0, 4.0, 5.0, 15.0, 20.0, 29.0, 81.0, 130.0, 334.0, 203.0, 72.0, 45.0, 31.0, 16.0, 11.0, 9.0, 0.0, 3.0, 0.0, 2.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-627.7488403320312, -603.3330688476562, -578.9172973632812, -554.5015258789062, -530.0857543945312, -505.66998291015625, -481.25421142578125, -456.83843994140625, -432.42266845703125, -408.00689697265625, -383.59112548828125, -359.17535400390625, -334.75958251953125, -310.34381103515625, -285.92803955078125, -261.51226806640625, -237.09646606445312, -212.68069458007812, -188.26492309570312, -163.84915161132812, -139.43338012695312, -115.0176010131836, -90.60182189941406, -66.18605041503906, -41.77027893066406, -17.35450553894043, 7.061267852783203, 31.47704315185547, 55.89281463623047, 80.30858612060547, 104.724365234375, 129.14013671875, 153.555908203125, 177.9716796875, 202.387451171875, 226.80322265625, 251.218994140625, 275.634765625, 300.050537109375, 324.46630859375, 348.882080078125, 373.2978515625, 397.713623046875, 422.12939453125, 446.545166015625, 470.9609375, 495.376708984375, 519.79248046875, 544.208251953125, 568.6240234375, 593.039794921875, 617.45556640625, 641.871337890625, 666.287109375, 690.702880859375, 715.11865234375, 739.5344848632812, 763.9502563476562, 788.3660278320312, 812.7817993164062, 837.1975708007812, 861.6133422851562, 886.0291137695312, 910.4448852539062, 934.8606567382812]}, "gradients/decoder.transformer.h.16.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 2.0, 0.0, 1.0, 2.0, 2.0, 2.0, 2.0, 5.0, 2.0, 8.0, 11.0, 5.0, 16.0, 18.0, 21.0, 21.0, 20.0, 23.0, 35.0, 29.0, 39.0, 30.0, 34.0, 41.0, 47.0, 46.0, 46.0, 44.0, 50.0, 42.0, 50.0, 41.0, 48.0, 38.0, 31.0, 23.0, 24.0, 20.0, 20.0, 12.0, 10.0, 11.0, 15.0, 11.0, 5.0, 6.0, 4.0, 3.0, 1.0, 0.0, 1.0, 1.0], "bins": [-303.9730224609375, -296.0724792480469, -288.17193603515625, -280.2713623046875, -272.3708190917969, -264.47027587890625, -256.5697326660156, -248.669189453125, -240.7686309814453, -232.8680877685547, -224.967529296875, -217.06698608398438, -209.16644287109375, -201.26588439941406, -193.36534118652344, -185.46478271484375, -177.56423950195312, -169.6636962890625, -161.7631378173828, -153.8625946044922, -145.96205139160156, -138.06149291992188, -130.16094970703125, -122.2603988647461, -114.35986328125, -106.45931243896484, -98.55876922607422, -90.65821838378906, -82.7576675415039, -74.85711669921875, -66.95657348632812, -59.05602264404297, -51.15547180175781, -43.25492477416992, -35.354373931884766, -27.453826904296875, -19.55327796936035, -11.652729034423828, -3.7521820068359375, 4.148368835449219, 12.04891586303711, 19.949464797973633, 27.850013732910156, 35.75056076049805, 43.65110778808594, 51.551658630371094, 59.452205657958984, 67.35275268554688, 75.25330352783203, 83.15385437011719, 91.05439758300781, 98.95494842529297, 106.85549926757812, 114.75604248046875, 122.6565933227539, 130.55714416503906, 138.4576873779297, 146.3582305908203, 154.2587890625, 162.15933227539062, 170.05987548828125, 177.96043395996094, 185.86097717285156, 193.76153564453125, 201.66207885742188]}, "gradients/decoder.transformer.h.16.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 3.0, 1.0, 0.0, 1.0, 0.0, 1.0, 2.0, 5.0, 1.0, 3.0, 5.0, 3.0, 6.0, 8.0, 7.0, 11.0, 12.0, 24.0, 18.0, 22.0, 24.0, 30.0, 28.0, 31.0, 27.0, 37.0, 44.0, 35.0, 37.0, 44.0, 44.0, 39.0, 41.0, 44.0, 37.0, 31.0, 44.0, 38.0, 27.0, 31.0, 20.0, 21.0, 18.0, 21.0, 21.0, 16.0, 8.0, 15.0, 14.0, 2.0, 2.0, 3.0, 5.0, 3.0, 2.0, 2.0, 0.0, 0.0, 3.0, 1.0], "bins": [-34.59375, -33.603271484375, -32.61279296875, -31.622314453125, -30.6318359375, -29.641357421875, -28.65087890625, -27.660400390625, -26.669921875, -25.679443359375, -24.68896484375, -23.698486328125, -22.7080078125, -21.717529296875, -20.72705078125, -19.736572265625, -18.74609375, -17.755615234375, -16.76513671875, -15.774658203125, -14.7841796875, -13.793701171875, -12.80322265625, -11.812744140625, -10.822265625, -9.831787109375, -8.84130859375, -7.850830078125, -6.8603515625, -5.869873046875, -4.87939453125, -3.888916015625, -2.8984375, -1.907958984375, -0.91748046875, 0.072998046875, 1.0634765625, 2.053955078125, 3.04443359375, 4.034912109375, 5.025390625, 6.015869140625, 7.00634765625, 7.996826171875, 8.9873046875, 9.977783203125, 10.96826171875, 11.958740234375, 12.94921875, 13.939697265625, 14.93017578125, 15.920654296875, 16.9111328125, 17.901611328125, 18.89208984375, 19.882568359375, 20.873046875, 21.863525390625, 22.85400390625, 23.844482421875, 24.8349609375, 25.825439453125, 26.81591796875, 27.806396484375, 28.796875]}, "gradients/decoder.transformer.h.16.crossattention.c_proj.weight": {"_type": "histogram", "values": [3.0, 1.0, 1.0, 5.0, 11.0, 6.0, 11.0, 21.0, 25.0, 44.0, 56.0, 96.0, 134.0, 181.0, 267.0, 406.0, 535.0, 833.0, 1192.0, 1823.0, 2644.0, 3991.0, 5833.0, 8598.0, 12997.0, 19816.0, 30328.0, 47649.0, 77383.0, 141894.0, 316392.0, 151471.0, 81068.0, 49724.0, 31287.0, 20510.0, 13679.0, 9121.0, 6013.0, 4024.0, 2671.0, 1836.0, 1223.0, 830.0, 612.0, 396.0, 286.0, 171.0, 152.0, 103.0, 78.0, 46.0, 27.0, 26.0, 25.0, 6.0, 3.0, 5.0, 2.0, 2.0, 2.0, 0.0, 0.0, 1.0], "bins": [-1.1474609375, -1.109893798828125, -1.07232666015625, -1.034759521484375, -0.9971923828125, -0.959625244140625, -0.92205810546875, -0.884490966796875, -0.846923828125, -0.809356689453125, -0.77178955078125, -0.734222412109375, -0.6966552734375, -0.659088134765625, -0.62152099609375, -0.583953857421875, -0.54638671875, -0.508819580078125, -0.47125244140625, -0.433685302734375, -0.3961181640625, -0.358551025390625, -0.32098388671875, -0.283416748046875, -0.245849609375, -0.208282470703125, -0.17071533203125, -0.133148193359375, -0.0955810546875, -0.058013916015625, -0.02044677734375, 0.017120361328125, 0.0546875, 0.092254638671875, 0.12982177734375, 0.167388916015625, 0.2049560546875, 0.242523193359375, 0.28009033203125, 0.317657470703125, 0.355224609375, 0.392791748046875, 0.43035888671875, 0.467926025390625, 0.5054931640625, 0.543060302734375, 0.58062744140625, 0.618194580078125, 0.65576171875, 0.693328857421875, 0.73089599609375, 0.768463134765625, 0.8060302734375, 0.843597412109375, 0.88116455078125, 0.918731689453125, 0.956298828125, 0.993865966796875, 1.03143310546875, 1.069000244140625, 1.1065673828125, 1.144134521484375, 1.18170166015625, 1.219268798828125, 1.2568359375]}, "gradients/decoder.transformer.h.16.crossattention.c_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 2.0, 2.0, 1.0, 3.0, 7.0, 8.0, 5.0, 8.0, 5.0, 8.0, 8.0, 21.0, 11.0, 19.0, 17.0, 20.0, 17.0, 28.0, 18.0, 28.0, 23.0, 39.0, 28.0, 44.0, 47.0, 42.0, 32.0, 1061.0, 29.0, 43.0, 45.0, 34.0, 27.0, 37.0, 34.0, 36.0, 21.0, 22.0, 32.0, 22.0, 9.0, 8.0, 22.0, 4.0, 13.0, 9.0, 10.0, 6.0, 7.0, 5.0, 5.0, 1.0, 3.0, 3.0, 3.0, 1.0, 0.0, 0.0, 1.0, 1.0], "bins": [-18.703125, -18.10986328125, -17.5166015625, -16.92333984375, -16.330078125, -15.73681640625, -15.1435546875, -14.55029296875, -13.95703125, -13.36376953125, -12.7705078125, -12.17724609375, -11.583984375, -10.99072265625, -10.3974609375, -9.80419921875, -9.2109375, -8.61767578125, -8.0244140625, -7.43115234375, -6.837890625, -6.24462890625, -5.6513671875, -5.05810546875, -4.46484375, -3.87158203125, -3.2783203125, -2.68505859375, -2.091796875, -1.49853515625, -0.9052734375, -0.31201171875, 0.28125, 0.87451171875, 1.4677734375, 2.06103515625, 2.654296875, 3.24755859375, 3.8408203125, 4.43408203125, 5.02734375, 5.62060546875, 6.2138671875, 6.80712890625, 7.400390625, 7.99365234375, 8.5869140625, 9.18017578125, 9.7734375, 10.36669921875, 10.9599609375, 11.55322265625, 12.146484375, 12.73974609375, 13.3330078125, 13.92626953125, 14.51953125, 15.11279296875, 15.7060546875, 16.29931640625, 16.892578125, 17.48583984375, 18.0791015625, 18.67236328125, 19.265625]}, "gradients/decoder.transformer.h.16.crossattention.c_attn.weight": {"_type": "histogram", "values": [2.0, 4.0, 3.0, 3.0, 3.0, 12.0, 18.0, 23.0, 23.0, 36.0, 64.0, 95.0, 110.0, 160.0, 222.0, 328.0, 441.0, 661.0, 921.0, 1253.0, 1851.0, 2605.0, 3709.0, 5238.0, 7413.0, 10633.0, 15393.0, 22907.0, 34487.0, 53106.0, 86190.0, 165012.0, 1342301.0, 128607.0, 72433.0, 45412.0, 29814.0, 19991.0, 13866.0, 9468.0, 6659.0, 4552.0, 3257.0, 2340.0, 1637.0, 1123.0, 814.0, 547.0, 412.0, 289.0, 214.0, 134.0, 104.0, 80.0, 56.0, 35.0, 23.0, 24.0, 13.0, 5.0, 5.0, 3.0, 4.0, 4.0], "bins": [-0.78857421875, -0.7641830444335938, -0.7397918701171875, -0.7154006958007812, -0.691009521484375, -0.6666183471679688, -0.6422271728515625, -0.6178359985351562, -0.59344482421875, -0.5690536499023438, -0.5446624755859375, -0.5202713012695312, -0.495880126953125, -0.47148895263671875, -0.4470977783203125, -0.42270660400390625, -0.3983154296875, -0.37392425537109375, -0.3495330810546875, -0.32514190673828125, -0.300750732421875, -0.27635955810546875, -0.2519683837890625, -0.22757720947265625, -0.20318603515625, -0.17879486083984375, -0.1544036865234375, -0.13001251220703125, -0.105621337890625, -0.08123016357421875, -0.0568389892578125, -0.03244781494140625, -0.008056640625, 0.01633453369140625, 0.0407257080078125, 0.06511688232421875, 0.089508056640625, 0.11389923095703125, 0.1382904052734375, 0.16268157958984375, 0.18707275390625, 0.21146392822265625, 0.2358551025390625, 0.26024627685546875, 0.284637451171875, 0.30902862548828125, 0.3334197998046875, 0.35781097412109375, 0.3822021484375, 0.40659332275390625, 0.4309844970703125, 0.45537567138671875, 0.479766845703125, 0.5041580200195312, 0.5285491943359375, 0.5529403686523438, 0.57733154296875, 0.6017227172851562, 0.6261138916015625, 0.6505050659179688, 0.674896240234375, 0.6992874145507812, 0.7236785888671875, 0.7480697631835938, 0.7724609375]}, "gradients/decoder.transformer.h.16.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 2.0, 1.0, 2.0, 0.0, 1.0, 8.0, 2.0, 8.0, 9.0, 11.0, 9.0, 9.0, 10.0, 11.0, 21.0, 21.0, 25.0, 28.0, 48.0, 70.0, 128.0, 139.0, 134.0, 81.0, 53.0, 37.0, 20.0, 25.0, 21.0, 15.0, 13.0, 10.0, 6.0, 6.0, 7.0, 5.0, 8.0, 1.0, 3.0, 2.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-6.389617919921875e-05, -6.187707185745239e-05, -5.9857964515686035e-05, -5.783885717391968e-05, -5.581974983215332e-05, -5.380064249038696e-05, -5.1781535148620605e-05, -4.976242780685425e-05, -4.774332046508789e-05, -4.572421312332153e-05, -4.3705105781555176e-05, -4.168599843978882e-05, -3.966689109802246e-05, -3.7647783756256104e-05, -3.5628676414489746e-05, -3.360956907272339e-05, -3.159046173095703e-05, -2.9571354389190674e-05, -2.7552247047424316e-05, -2.553313970565796e-05, -2.35140323638916e-05, -2.1494925022125244e-05, -1.9475817680358887e-05, -1.745671033859253e-05, -1.5437602996826172e-05, -1.3418495655059814e-05, -1.1399388313293457e-05, -9.3802809715271e-06, -7.361173629760742e-06, -5.342066287994385e-06, -3.3229589462280273e-06, -1.30385160446167e-06, 7.152557373046875e-07, 2.734363079071045e-06, 4.753470420837402e-06, 6.77257776260376e-06, 8.791685104370117e-06, 1.0810792446136475e-05, 1.2829899787902832e-05, 1.484900712966919e-05, 1.6868114471435547e-05, 1.8887221813201904e-05, 2.0906329154968262e-05, 2.292543649673462e-05, 2.4944543838500977e-05, 2.6963651180267334e-05, 2.898275852203369e-05, 3.100186586380005e-05, 3.3020973205566406e-05, 3.5040080547332764e-05, 3.705918788909912e-05, 3.907829523086548e-05, 4.1097402572631836e-05, 4.311650991439819e-05, 4.513561725616455e-05, 4.715472459793091e-05, 4.9173831939697266e-05, 5.119293928146362e-05, 5.321204662322998e-05, 5.523115396499634e-05, 5.7250261306762695e-05, 5.926936864852905e-05, 6.128847599029541e-05, 6.330758333206177e-05, 6.532669067382812e-05]}, "gradients/decoder.transformer.h.16.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 3.0, 7.0, 7.0, 4.0, 9.0, 6.0, 11.0, 15.0, 15.0, 27.0, 46.0, 63.0, 151.0, 399.0, 2523.0, 53044.0, 971231.0, 18962.0, 1463.0, 242.0, 111.0, 67.0, 33.0, 23.0, 16.0, 14.0, 10.0, 8.0, 10.0, 10.0, 10.0, 1.0, 5.0, 3.0, 1.0, 3.0, 0.0, 2.0, 3.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0], "bins": [-0.0010623931884765625, -0.0010295659303665161, -0.0009967386722564697, -0.0009639114141464233, -0.000931084156036377, -0.0008982568979263306, -0.0008654296398162842, -0.0008326023817062378, -0.0007997751235961914, -0.000766947865486145, -0.0007341206073760986, -0.0007012933492660522, -0.0006684660911560059, -0.0006356388330459595, -0.0006028115749359131, -0.0005699843168258667, -0.0005371570587158203, -0.0005043298006057739, -0.00047150254249572754, -0.00043867528438568115, -0.00040584802627563477, -0.0003730207681655884, -0.000340193510055542, -0.0003073662519454956, -0.0002745389938354492, -0.00024171173572540283, -0.00020888447761535645, -0.00017605721950531006, -0.00014322996139526367, -0.00011040270328521729, -7.75754451751709e-05, -4.474818706512451e-05, -1.1920928955078125e-05, 2.0906329154968262e-05, 5.373358726501465e-05, 8.656084537506104e-05, 0.00011938810348510742, 0.0001522153615951538, 0.0001850426197052002, 0.00021786987781524658, 0.00025069713592529297, 0.00028352439403533936, 0.00031635165214538574, 0.00034917891025543213, 0.0003820061683654785, 0.0004148334264755249, 0.0004476606845855713, 0.0004804879426956177, 0.0005133152008056641, 0.0005461424589157104, 0.0005789697170257568, 0.0006117969751358032, 0.0006446242332458496, 0.000677451491355896, 0.0007102787494659424, 0.0007431060075759888, 0.0007759332656860352, 0.0008087605237960815, 0.0008415877819061279, 0.0008744150400161743, 0.0009072422981262207, 0.0009400695562362671, 0.0009728968143463135, 0.0010057240724563599, 0.0010385513305664062]}, "gradients/decoder.transformer.h.16.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 5.0, 19.0, 30.0, 82.0, 381.0, 348.0, 86.0, 33.0, 20.0, 8.0, 4.0, 1.0, 2.0], "bins": [-0.00021893756638746709, -0.00021502682648133487, -0.00021111610112711787, -0.00020720536122098565, -0.00020329462131485343, -0.0001993838814087212, -0.000195473141502589, -0.000191562416148372, -0.00018765167624223977, -0.00018374093633610755, -0.00017983021098189056, -0.00017591947107575834, -0.00017200873116962612, -0.0001680979912634939, -0.00016418725135736167, -0.00016027652600314468, -0.00015636578609701246, -0.00015245504619088024, -0.00014854432083666325, -0.00014463358093053102, -0.0001407228410243988, -0.00013681210111826658, -0.00013290136121213436, -0.00012899063585791737, -0.00012507989595178515, -0.00012116915604565293, -0.00011725842341547832, -0.00011334769078530371, -0.00010943695087917149, -0.00010552621097303927, -0.00010161547834286466, -9.770474571269006e-05, -9.379399853060022e-05, -8.988326590042561e-05, -8.597252599429339e-05, -8.206178608816117e-05, -7.815105345798656e-05, -7.424032082781196e-05, -7.032958092167974e-05, -6.641884101554751e-05, -6.250810838537291e-05, -5.859737211721949e-05, -5.468663584906608e-05, -5.0775899580912665e-05, -4.686516331275925e-05, -4.2954427044605836e-05, -3.904369077645242e-05, -3.513295450829901e-05, -3.1222218240145594e-05, -2.731148197199218e-05, -2.3400745703838766e-05, -1.949000943568535e-05, -1.5579273167531937e-05, -1.1668536899378523e-05, -7.757800631225109e-06, -3.847064363071695e-06, 6.367190508171916e-08, 3.974408173235133e-06, 7.885144441388547e-06, 1.1795880709541962e-05, 1.5706616977695376e-05, 1.961735324584879e-05, 2.3528089514002204e-05, 2.7438825782155618e-05, 3.134956205030903e-05]}, "gradients/decoder.transformer.h.16.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 3.0, 2.0, 3.0, 0.0, 4.0, 8.0, 4.0, 5.0, 7.0, 12.0, 6.0, 10.0, 16.0, 13.0, 23.0, 22.0, 18.0, 31.0, 33.0, 31.0, 35.0, 41.0, 49.0, 43.0, 56.0, 30.0, 39.0, 37.0, 38.0, 36.0, 38.0, 46.0, 35.0, 30.0, 27.0, 24.0, 16.0, 26.0, 23.0, 12.0, 16.0, 12.0, 11.0, 11.0, 8.0, 6.0, 4.0, 8.0, 2.0, 3.0, 3.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-2.7835369110107422e-05, -2.7008354663848877e-05, -2.6181340217590332e-05, -2.5354325771331787e-05, -2.4527311325073242e-05, -2.3700296878814697e-05, -2.2873282432556152e-05, -2.2046267986297607e-05, -2.1219253540039062e-05, -2.0392239093780518e-05, -1.9565224647521973e-05, -1.8738210201263428e-05, -1.7911195755004883e-05, -1.7084181308746338e-05, -1.6257166862487793e-05, -1.5430152416229248e-05, -1.4603137969970703e-05, -1.3776123523712158e-05, -1.2949109077453613e-05, -1.2122094631195068e-05, -1.1295080184936523e-05, -1.0468065738677979e-05, -9.641051292419434e-06, -8.814036846160889e-06, -7.987022399902344e-06, -7.160007953643799e-06, -6.332993507385254e-06, -5.505979061126709e-06, -4.678964614868164e-06, -3.851950168609619e-06, -3.0249357223510742e-06, -2.1979212760925293e-06, -1.3709068298339844e-06, -5.438923835754395e-07, 2.8312206268310547e-07, 1.1101365089416504e-06, 1.9371509552001953e-06, 2.7641654014587402e-06, 3.591179847717285e-06, 4.41819429397583e-06, 5.245208740234375e-06, 6.07222318649292e-06, 6.899237632751465e-06, 7.72625207901001e-06, 8.553266525268555e-06, 9.3802809715271e-06, 1.0207295417785645e-05, 1.103430986404419e-05, 1.1861324310302734e-05, 1.268833875656128e-05, 1.3515353202819824e-05, 1.4342367649078369e-05, 1.5169382095336914e-05, 1.599639654159546e-05, 1.6823410987854004e-05, 1.765042543411255e-05, 1.8477439880371094e-05, 1.930445432662964e-05, 2.0131468772888184e-05, 2.095848321914673e-05, 2.1785497665405273e-05, 2.261251211166382e-05, 2.3439526557922363e-05, 2.4266541004180908e-05, 2.5093555450439453e-05]}, "gradients/decoder.transformer.h.16.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 3.0, 1.0, 0.0, 1.0, 0.0, 1.0, 2.0, 5.0, 1.0, 3.0, 5.0, 3.0, 6.0, 8.0, 7.0, 11.0, 12.0, 24.0, 18.0, 22.0, 24.0, 30.0, 28.0, 31.0, 27.0, 37.0, 44.0, 35.0, 37.0, 44.0, 44.0, 39.0, 41.0, 44.0, 37.0, 31.0, 44.0, 38.0, 27.0, 31.0, 20.0, 21.0, 18.0, 21.0, 21.0, 16.0, 8.0, 15.0, 14.0, 2.0, 2.0, 3.0, 5.0, 3.0, 2.0, 2.0, 0.0, 0.0, 3.0, 1.0], "bins": [-34.59375, -33.603271484375, -32.61279296875, -31.622314453125, -30.6318359375, -29.641357421875, -28.65087890625, -27.660400390625, -26.669921875, -25.679443359375, -24.68896484375, -23.698486328125, -22.7080078125, -21.717529296875, -20.72705078125, -19.736572265625, -18.74609375, -17.755615234375, -16.76513671875, -15.774658203125, -14.7841796875, -13.793701171875, -12.80322265625, -11.812744140625, -10.822265625, -9.831787109375, -8.84130859375, -7.850830078125, -6.8603515625, -5.869873046875, -4.87939453125, -3.888916015625, -2.8984375, -1.907958984375, -0.91748046875, 0.072998046875, 1.0634765625, 2.053955078125, 3.04443359375, 4.034912109375, 5.025390625, 6.015869140625, 7.00634765625, 7.996826171875, 8.9873046875, 9.977783203125, 10.96826171875, 11.958740234375, 12.94921875, 13.939697265625, 14.93017578125, 15.920654296875, 16.9111328125, 17.901611328125, 18.89208984375, 19.882568359375, 20.873046875, 21.863525390625, 22.85400390625, 23.844482421875, 24.8349609375, 25.825439453125, 26.81591796875, 27.806396484375, 28.796875]}, "gradients/decoder.transformer.h.16.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 2.0, 1.0, 5.0, 8.0, 6.0, 9.0, 8.0, 12.0, 17.0, 23.0, 29.0, 44.0, 35.0, 71.0, 99.0, 122.0, 164.0, 233.0, 330.0, 427.0, 596.0, 775.0, 1033.0, 1304.0, 1838.0, 2513.0, 3424.0, 5291.0, 8718.0, 19904.0, 128408.0, 754558.0, 77564.0, 16265.0, 7597.0, 4722.0, 3272.0, 2272.0, 1776.0, 1360.0, 956.0, 690.0, 493.0, 414.0, 308.0, 232.0, 154.0, 134.0, 107.0, 74.0, 47.0, 37.0, 30.0, 16.0, 12.0, 11.0, 9.0, 6.0, 4.0, 3.0, 2.0, 0.0, 1.0], "bins": [-43.0, -41.6728515625, -40.345703125, -39.0185546875, -37.69140625, -36.3642578125, -35.037109375, -33.7099609375, -32.3828125, -31.0556640625, -29.728515625, -28.4013671875, -27.07421875, -25.7470703125, -24.419921875, -23.0927734375, -21.765625, -20.4384765625, -19.111328125, -17.7841796875, -16.45703125, -15.1298828125, -13.802734375, -12.4755859375, -11.1484375, -9.8212890625, -8.494140625, -7.1669921875, -5.83984375, -4.5126953125, -3.185546875, -1.8583984375, -0.53125, 0.7958984375, 2.123046875, 3.4501953125, 4.77734375, 6.1044921875, 7.431640625, 8.7587890625, 10.0859375, 11.4130859375, 12.740234375, 14.0673828125, 15.39453125, 16.7216796875, 18.048828125, 19.3759765625, 20.703125, 22.0302734375, 23.357421875, 24.6845703125, 26.01171875, 27.3388671875, 28.666015625, 29.9931640625, 31.3203125, 32.6474609375, 33.974609375, 35.3017578125, 36.62890625, 37.9560546875, 39.283203125, 40.6103515625, 41.9375]}, "gradients/decoder.transformer.h.16.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 3.0, 2.0, 2.0, 5.0, 7.0, 4.0, 5.0, 8.0, 9.0, 17.0, 14.0, 15.0, 15.0, 24.0, 36.0, 38.0, 30.0, 43.0, 39.0, 58.0, 56.0, 66.0, 189.0, 1810.0, 83.0, 70.0, 65.0, 49.0, 48.0, 37.0, 38.0, 36.0, 26.0, 18.0, 17.0, 12.0, 9.0, 13.0, 12.0, 12.0, 5.0, 5.0, 4.0, 4.0, 0.0, 1.0, 3.0, 0.0, 4.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-96.25, -92.72265625, -89.1953125, -85.66796875, -82.140625, -78.61328125, -75.0859375, -71.55859375, -68.03125, -64.50390625, -60.9765625, -57.44921875, -53.921875, -50.39453125, -46.8671875, -43.33984375, -39.8125, -36.28515625, -32.7578125, -29.23046875, -25.703125, -22.17578125, -18.6484375, -15.12109375, -11.59375, -8.06640625, -4.5390625, -1.01171875, 2.515625, 6.04296875, 9.5703125, 13.09765625, 16.625, 20.15234375, 23.6796875, 27.20703125, 30.734375, 34.26171875, 37.7890625, 41.31640625, 44.84375, 48.37109375, 51.8984375, 55.42578125, 58.953125, 62.48046875, 66.0078125, 69.53515625, 73.0625, 76.58984375, 80.1171875, 83.64453125, 87.171875, 90.69921875, 94.2265625, 97.75390625, 101.28125, 104.80859375, 108.3359375, 111.86328125, 115.390625, 118.91796875, 122.4453125, 125.97265625, 129.5]}, "gradients/decoder.transformer.h.16.attn.c_attn.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 2.0, 1.0, 3.0, 6.0, 7.0, 8.0, 13.0, 18.0, 34.0, 41.0, 50.0, 93.0, 222.0, 605.0, 2162.0, 14902.0, 3018000.0, 100794.0, 6674.0, 1297.0, 405.0, 164.0, 60.0, 58.0, 27.0, 14.0, 23.0, 8.0, 9.0, 5.0, 3.0, 2.0, 1.0, 3.0, 2.0, 0.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-364.75, -353.28515625, -341.8203125, -330.35546875, -318.890625, -307.42578125, -295.9609375, -284.49609375, -273.03125, -261.56640625, -250.1015625, -238.63671875, -227.171875, -215.70703125, -204.2421875, -192.77734375, -181.3125, -169.84765625, -158.3828125, -146.91796875, -135.453125, -123.98828125, -112.5234375, -101.05859375, -89.59375, -78.12890625, -66.6640625, -55.19921875, -43.734375, -32.26953125, -20.8046875, -9.33984375, 2.125, 13.58984375, 25.0546875, 36.51953125, 47.984375, 59.44921875, 70.9140625, 82.37890625, 93.84375, 105.30859375, 116.7734375, 128.23828125, 139.703125, 151.16796875, 162.6328125, 174.09765625, 185.5625, 197.02734375, 208.4921875, 219.95703125, 231.421875, 242.88671875, 254.3515625, 265.81640625, 277.28125, 288.74609375, 300.2109375, 311.67578125, 323.140625, 334.60546875, 346.0703125, 357.53515625, 369.0]}, "gradients/decoder.transformer.h.16.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 4.0, 5.0, 10.0, 14.0, 10.0, 52.0, 53.0, 118.0, 233.0, 209.0, 150.0, 90.0, 36.0, 13.0, 8.0, 5.0, 4.0, 6.0, 0.0, 0.0, 2.0], "bins": [-558.7932739257812, -548.0675048828125, -537.341796875, -526.6160278320312, -515.8903198242188, -505.16455078125, -494.4388122558594, -483.71307373046875, -472.9873352050781, -462.2615966796875, -451.5358581542969, -440.81011962890625, -430.0843505859375, -419.3586120605469, -408.63287353515625, -397.9071350097656, -387.181396484375, -376.4556579589844, -365.72991943359375, -355.0041809082031, -344.2784423828125, -333.55267333984375, -322.8269348144531, -312.1011962890625, -301.3754577636719, -290.64971923828125, -279.9239807128906, -269.1982421875, -258.47247314453125, -247.7467498779297, -237.02099609375, -226.29525756835938, -215.56954956054688, -204.84381103515625, -194.11807250976562, -183.39231872558594, -172.6665802001953, -161.9408416748047, -151.215087890625, -140.48934936523438, -129.76361083984375, -119.03787231445312, -108.31212615966797, -97.58638000488281, -86.86064147949219, -76.13490295410156, -65.4091567993164, -54.68341064453125, -43.957672119140625, -33.231929779052734, -22.506187438964844, -11.780445098876953, -1.0547027587890625, 9.671039581298828, 20.39678192138672, 31.122528076171875, 41.8482666015625, 52.57400894165039, 63.29975128173828, 74.02549743652344, 84.75123596191406, 95.47697448730469, 106.20272064208984, 116.928466796875, 127.65420532226562]}, "gradients/decoder.transformer.h.16.ln_1.bias": {"_type": "histogram", "values": [2.0, 1.0, 2.0, 5.0, 2.0, 4.0, 4.0, 7.0, 6.0, 6.0, 9.0, 10.0, 10.0, 10.0, 16.0, 8.0, 16.0, 18.0, 18.0, 21.0, 22.0, 16.0, 27.0, 42.0, 34.0, 23.0, 32.0, 40.0, 39.0, 41.0, 34.0, 40.0, 28.0, 31.0, 36.0, 27.0, 33.0, 23.0, 23.0, 31.0, 27.0, 23.0, 21.0, 32.0, 14.0, 26.0, 15.0, 13.0, 6.0, 6.0, 4.0, 5.0, 5.0, 7.0, 7.0, 1.0, 7.0, 3.0, 2.0, 0.0, 0.0, 1.0, 1.0, 1.0], "bins": [-256.8808898925781, -248.49310302734375, -240.1053009033203, -231.71749877929688, -223.3297119140625, -214.94192504882812, -206.5541229248047, -198.16632080078125, -189.77853393554688, -181.3907470703125, -173.00294494628906, -164.61514282226562, -156.22735595703125, -147.83956909179688, -139.45176696777344, -131.06396484375, -122.67617797851562, -114.28838348388672, -105.90058898925781, -97.5127944946289, -89.125, -80.7372055053711, -72.34941101074219, -63.96161651611328, -55.573822021484375, -47.18602752685547, -38.79823303222656, -30.410438537597656, -22.02264404296875, -13.634849548339844, -5.2470550537109375, 3.1407394409179688, 11.52850341796875, 19.916297912597656, 28.304092407226562, 36.69188690185547, 45.079681396484375, 53.46747589111328, 61.85527038574219, 70.2430648803711, 78.630859375, 87.0186538696289, 95.40644836425781, 103.79424285888672, 112.18203735351562, 120.56983184814453, 128.95762634277344, 137.34542846679688, 145.73321533203125, 154.12100219726562, 162.50880432128906, 170.8966064453125, 179.28439331054688, 187.67218017578125, 196.0599822998047, 204.44778442382812, 212.8355712890625, 221.22335815429688, 229.6111602783203, 237.99896240234375, 246.38674926757812, 254.7745361328125, 263.162353515625, 271.5501403808594, 279.93792724609375]}, "gradients/decoder.transformer.h.15.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 3.0, 0.0, 1.0, 5.0, 3.0, 3.0, 9.0, 3.0, 11.0, 7.0, 13.0, 18.0, 20.0, 20.0, 30.0, 18.0, 21.0, 36.0, 29.0, 35.0, 39.0, 37.0, 34.0, 33.0, 47.0, 37.0, 36.0, 53.0, 43.0, 23.0, 47.0, 33.0, 27.0, 39.0, 23.0, 24.0, 23.0, 21.0, 24.0, 19.0, 11.0, 10.0, 9.0, 8.0, 11.0, 4.0, 2.0, 6.0, 0.0, 5.0, 0.0, 0.0, 2.0, 0.0, 3.0], "bins": [-35.0625, -34.052978515625, -33.04345703125, -32.033935546875, -31.0244140625, -30.014892578125, -29.00537109375, -27.995849609375, -26.986328125, -25.976806640625, -24.96728515625, -23.957763671875, -22.9482421875, -21.938720703125, -20.92919921875, -19.919677734375, -18.91015625, -17.900634765625, -16.89111328125, -15.881591796875, -14.8720703125, -13.862548828125, -12.85302734375, -11.843505859375, -10.833984375, -9.824462890625, -8.81494140625, -7.805419921875, -6.7958984375, -5.786376953125, -4.77685546875, -3.767333984375, -2.7578125, -1.748291015625, -0.73876953125, 0.270751953125, 1.2802734375, 2.289794921875, 3.29931640625, 4.308837890625, 5.318359375, 6.327880859375, 7.33740234375, 8.346923828125, 9.3564453125, 10.365966796875, 11.37548828125, 12.385009765625, 13.39453125, 14.404052734375, 15.41357421875, 16.423095703125, 17.4326171875, 18.442138671875, 19.45166015625, 20.461181640625, 21.470703125, 22.480224609375, 23.48974609375, 24.499267578125, 25.5087890625, 26.518310546875, 27.52783203125, 28.537353515625, 29.546875]}, "gradients/decoder.transformer.h.15.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 3.0, 2.0, 6.0, 3.0, 8.0, 9.0, 16.0, 16.0, 27.0, 21.0, 35.0, 41.0, 61.0, 93.0, 104.0, 146.0, 223.0, 291.0, 385.0, 500.0, 660.0, 876.0, 1323.0, 1875.0, 2636.0, 3778.0, 6198.0, 10447.0, 20423.0, 121684.0, 2164392.0, 1705184.0, 103720.0, 20139.0, 10159.0, 6059.0, 3924.0, 2528.0, 1774.0, 1294.0, 907.0, 619.0, 446.0, 349.0, 252.0, 183.0, 112.0, 109.0, 80.0, 44.0, 27.0, 40.0, 15.0, 12.0, 15.0, 9.0, 7.0, 3.0, 2.0, 4.0, 3.0, 1.0], "bins": [-79.1875, -76.78515625, -74.3828125, -71.98046875, -69.578125, -67.17578125, -64.7734375, -62.37109375, -59.96875, -57.56640625, -55.1640625, -52.76171875, -50.359375, -47.95703125, -45.5546875, -43.15234375, -40.75, -38.34765625, -35.9453125, -33.54296875, -31.140625, -28.73828125, -26.3359375, -23.93359375, -21.53125, -19.12890625, -16.7265625, -14.32421875, -11.921875, -9.51953125, -7.1171875, -4.71484375, -2.3125, 0.08984375, 2.4921875, 4.89453125, 7.296875, 9.69921875, 12.1015625, 14.50390625, 16.90625, 19.30859375, 21.7109375, 24.11328125, 26.515625, 28.91796875, 31.3203125, 33.72265625, 36.125, 38.52734375, 40.9296875, 43.33203125, 45.734375, 48.13671875, 50.5390625, 52.94140625, 55.34375, 57.74609375, 60.1484375, 62.55078125, 64.953125, 67.35546875, 69.7578125, 72.16015625, 74.5625]}, "gradients/decoder.transformer.h.15.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 3.0, 1.0, 1.0, 0.0, 1.0, 3.0, 4.0, 3.0, 5.0, 8.0, 20.0, 10.0, 14.0, 16.0, 35.0, 35.0, 53.0, 84.0, 224.0, 669.0, 1489.0, 844.0, 277.0, 96.0, 40.0, 32.0, 30.0, 21.0, 13.0, 10.0, 8.0, 7.0, 8.0, 8.0, 1.0, 5.0, 4.0, 0.0, 3.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-91.0, -87.580078125, -84.16015625, -80.740234375, -77.3203125, -73.900390625, -70.48046875, -67.060546875, -63.640625, -60.220703125, -56.80078125, -53.380859375, -49.9609375, -46.541015625, -43.12109375, -39.701171875, -36.28125, -32.861328125, -29.44140625, -26.021484375, -22.6015625, -19.181640625, -15.76171875, -12.341796875, -8.921875, -5.501953125, -2.08203125, 1.337890625, 4.7578125, 8.177734375, 11.59765625, 15.017578125, 18.4375, 21.857421875, 25.27734375, 28.697265625, 32.1171875, 35.537109375, 38.95703125, 42.376953125, 45.796875, 49.216796875, 52.63671875, 56.056640625, 59.4765625, 62.896484375, 66.31640625, 69.736328125, 73.15625, 76.576171875, 79.99609375, 83.416015625, 86.8359375, 90.255859375, 93.67578125, 97.095703125, 100.515625, 103.935546875, 107.35546875, 110.775390625, 114.1953125, 117.615234375, 121.03515625, 124.455078125, 127.875]}, "gradients/decoder.transformer.h.15.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 2.0, 2.0, 6.0, 9.0, 6.0, 12.0, 5.0, 20.0, 30.0, 45.0, 58.0, 93.0, 140.0, 214.0, 309.0, 420.0, 726.0, 1142.0, 1929.0, 3503.0, 6073.0, 12006.0, 27030.0, 80995.0, 2166147.0, 1764416.0, 76593.0, 25996.0, 12013.0, 5889.0, 3240.0, 2037.0, 1138.0, 713.0, 421.0, 300.0, 183.0, 131.0, 105.0, 68.0, 34.0, 30.0, 22.0, 15.0, 17.0, 6.0, 2.0, 0.0, 2.0, 1.0, 4.0], "bins": [-157.75, -153.5966796875, -149.443359375, -145.2900390625, -141.13671875, -136.9833984375, -132.830078125, -128.6767578125, -124.5234375, -120.3701171875, -116.216796875, -112.0634765625, -107.91015625, -103.7568359375, -99.603515625, -95.4501953125, -91.296875, -87.1435546875, -82.990234375, -78.8369140625, -74.68359375, -70.5302734375, -66.376953125, -62.2236328125, -58.0703125, -53.9169921875, -49.763671875, -45.6103515625, -41.45703125, -37.3037109375, -33.150390625, -28.9970703125, -24.84375, -20.6904296875, -16.537109375, -12.3837890625, -8.23046875, -4.0771484375, 0.076171875, 4.2294921875, 8.3828125, 12.5361328125, 16.689453125, 20.8427734375, 24.99609375, 29.1494140625, 33.302734375, 37.4560546875, 41.609375, 45.7626953125, 49.916015625, 54.0693359375, 58.22265625, 62.3759765625, 66.529296875, 70.6826171875, 74.8359375, 78.9892578125, 83.142578125, 87.2958984375, 91.44921875, 95.6025390625, 99.755859375, 103.9091796875, 108.0625]}, "gradients/decoder.transformer.h.15.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 2.0, 3.0, 4.0, 6.0, 3.0, 10.0, 8.0, 8.0, 9.0, 14.0, 21.0, 26.0, 38.0, 44.0, 81.0, 104.0, 185.0, 122.0, 97.0, 55.0, 46.0, 32.0, 15.0, 17.0, 8.0, 14.0, 11.0, 9.0, 5.0, 4.0, 2.0, 4.0, 3.0, 1.0, 1.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-294.21136474609375, -283.2371826171875, -272.2630310058594, -261.28887939453125, -250.314697265625, -239.3405303955078, -228.36636352539062, -217.39219665527344, -206.41802978515625, -195.44386291503906, -184.46969604492188, -173.4955291748047, -162.5213623046875, -151.5471954345703, -140.57302856445312, -129.59886169433594, -118.62469482421875, -107.65052795410156, -96.67636108398438, -85.70219421386719, -74.72802734375, -63.75386047363281, -52.779693603515625, -41.80552673339844, -30.83135986328125, -19.857192993164062, -8.883026123046875, 2.0911407470703125, 13.0653076171875, 24.039474487304688, 35.013641357421875, 45.98780822753906, 56.961944580078125, 67.93611145019531, 78.9102783203125, 89.88444519042969, 100.85861206054688, 111.83277893066406, 122.80694580078125, 133.78111267089844, 144.75527954101562, 155.7294464111328, 166.70361328125, 177.6777801513672, 188.65194702148438, 199.62611389160156, 210.60028076171875, 221.57444763183594, 232.54861450195312, 243.5227813720703, 254.4969482421875, 265.47113037109375, 276.4452819824219, 287.41943359375, 298.39361572265625, 309.3677978515625, 320.3419494628906, 331.31610107421875, 342.290283203125, 353.26446533203125, 364.2386169433594, 375.2127685546875, 386.18695068359375, 397.1611328125, 408.1352844238281]}, "gradients/decoder.transformer.h.15.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 5.0, 3.0, 1.0, 0.0, 2.0, 6.0, 6.0, 1.0, 9.0, 4.0, 9.0, 10.0, 11.0, 5.0, 22.0, 11.0, 23.0, 16.0, 17.0, 23.0, 29.0, 37.0, 40.0, 35.0, 36.0, 39.0, 37.0, 44.0, 40.0, 35.0, 44.0, 30.0, 32.0, 29.0, 43.0, 37.0, 31.0, 34.0, 23.0, 17.0, 30.0, 18.0, 18.0, 19.0, 12.0, 7.0, 5.0, 5.0, 3.0, 5.0, 4.0, 7.0, 1.0, 4.0, 1.0, 2.0, 2.0, 0.0, 2.0, 0.0, 0.0, 2.0], "bins": [-200.75729370117188, -194.22927856445312, -187.70126342773438, -181.17324829101562, -174.64523315429688, -168.11721801757812, -161.58920288085938, -155.06117248535156, -148.5331573486328, -142.00514221191406, -135.4771270751953, -128.94911193847656, -122.42108917236328, -115.89307403564453, -109.36505889892578, -102.8370361328125, -96.30902862548828, -89.78101348876953, -83.25299835205078, -76.7249755859375, -70.19696044921875, -63.6689453125, -57.14093017578125, -50.612911224365234, -44.084896087646484, -37.556880950927734, -31.02886199951172, -24.50084686279297, -17.972829818725586, -11.444812774658203, -4.916797637939453, 1.6112213134765625, 8.139236450195312, 14.667253494262695, 21.195270538330078, 27.723285675048828, 34.251304626464844, 40.779319763183594, 47.307334899902344, 53.83535385131836, 60.36336898803711, 66.89138793945312, 73.41940307617188, 79.94741821289062, 86.47543334960938, 93.00344848632812, 99.53146362304688, 106.05948638916016, 112.5875015258789, 119.11551666259766, 125.6435317993164, 132.1715545654297, 138.69956970214844, 145.2275848388672, 151.75559997558594, 158.2836151123047, 164.81163024902344, 171.3396453857422, 177.86766052246094, 184.3956756591797, 190.92369079589844, 197.45172119140625, 203.979736328125, 210.50775146484375, 217.0357666015625]}, "gradients/decoder.transformer.h.15.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 0.0, 1.0, 0.0, 3.0, 2.0, 1.0, 1.0, 0.0, 2.0, 4.0, 7.0, 6.0, 7.0, 14.0, 19.0, 10.0, 18.0, 18.0, 28.0, 17.0, 26.0, 38.0, 27.0, 27.0, 29.0, 41.0, 38.0, 44.0, 46.0, 45.0, 43.0, 43.0, 36.0, 49.0, 27.0, 31.0, 31.0, 17.0, 28.0, 22.0, 21.0, 25.0, 20.0, 25.0, 16.0, 11.0, 9.0, 11.0, 9.0, 4.0, 4.0, 6.0, 2.0, 2.0, 2.0, 3.0, 0.0, 1.0, 1.0, 0.0, 2.0], "bins": [-33.34375, -32.332275390625, -31.32080078125, -30.309326171875, -29.2978515625, -28.286376953125, -27.27490234375, -26.263427734375, -25.251953125, -24.240478515625, -23.22900390625, -22.217529296875, -21.2060546875, -20.194580078125, -19.18310546875, -18.171630859375, -17.16015625, -16.148681640625, -15.13720703125, -14.125732421875, -13.1142578125, -12.102783203125, -11.09130859375, -10.079833984375, -9.068359375, -8.056884765625, -7.04541015625, -6.033935546875, -5.0224609375, -4.010986328125, -2.99951171875, -1.988037109375, -0.9765625, 0.034912109375, 1.04638671875, 2.057861328125, 3.0693359375, 4.080810546875, 5.09228515625, 6.103759765625, 7.115234375, 8.126708984375, 9.13818359375, 10.149658203125, 11.1611328125, 12.172607421875, 13.18408203125, 14.195556640625, 15.20703125, 16.218505859375, 17.22998046875, 18.241455078125, 19.2529296875, 20.264404296875, 21.27587890625, 22.287353515625, 23.298828125, 24.310302734375, 25.32177734375, 26.333251953125, 27.3447265625, 28.356201171875, 29.36767578125, 30.379150390625, 31.390625]}, "gradients/decoder.transformer.h.15.crossattention.c_proj.weight": {"_type": "histogram", "values": [6.0, 6.0, 15.0, 16.0, 22.0, 25.0, 37.0, 45.0, 80.0, 71.0, 96.0, 146.0, 203.0, 276.0, 381.0, 573.0, 815.0, 1088.0, 1499.0, 2090.0, 2898.0, 4120.0, 5794.0, 8470.0, 12227.0, 17487.0, 25857.0, 37875.0, 57692.0, 97707.0, 226233.0, 257183.0, 102827.0, 59510.0, 38532.0, 26354.0, 18132.0, 12532.0, 8664.0, 6037.0, 4231.0, 3095.0, 2174.0, 1526.0, 1105.0, 764.0, 558.0, 398.0, 306.0, 203.0, 159.0, 107.0, 79.0, 71.0, 60.0, 33.0, 27.0, 20.0, 18.0, 8.0, 8.0, 2.0, 2.0, 1.0], "bins": [-1.052734375, -1.0188140869140625, -0.984893798828125, -0.9509735107421875, -0.91705322265625, -0.8831329345703125, -0.849212646484375, -0.8152923583984375, -0.7813720703125, -0.7474517822265625, -0.713531494140625, -0.6796112060546875, -0.64569091796875, -0.6117706298828125, -0.577850341796875, -0.5439300537109375, -0.510009765625, -0.4760894775390625, -0.442169189453125, -0.4082489013671875, -0.37432861328125, -0.3404083251953125, -0.306488037109375, -0.2725677490234375, -0.2386474609375, -0.2047271728515625, -0.170806884765625, -0.1368865966796875, -0.10296630859375, -0.0690460205078125, -0.035125732421875, -0.0012054443359375, 0.03271484375, 0.0666351318359375, 0.100555419921875, 0.1344757080078125, 0.16839599609375, 0.2023162841796875, 0.236236572265625, 0.2701568603515625, 0.3040771484375, 0.3379974365234375, 0.371917724609375, 0.4058380126953125, 0.43975830078125, 0.4736785888671875, 0.507598876953125, 0.5415191650390625, 0.575439453125, 0.6093597412109375, 0.643280029296875, 0.6772003173828125, 0.71112060546875, 0.7450408935546875, 0.778961181640625, 0.8128814697265625, 0.8468017578125, 0.8807220458984375, 0.914642333984375, 0.9485626220703125, 0.98248291015625, 1.0164031982421875, 1.050323486328125, 1.0842437744140625, 1.1181640625]}, "gradients/decoder.transformer.h.15.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 4.0, 5.0, 5.0, 2.0, 1.0, 3.0, 6.0, 7.0, 13.0, 6.0, 12.0, 8.0, 17.0, 18.0, 9.0, 22.0, 32.0, 26.0, 30.0, 37.0, 34.0, 41.0, 26.0, 39.0, 36.0, 50.0, 1061.0, 39.0, 40.0, 39.0, 29.0, 43.0, 45.0, 24.0, 29.0, 37.0, 29.0, 14.0, 16.0, 19.0, 11.0, 10.0, 9.0, 12.0, 10.0, 11.0, 3.0, 5.0, 5.0, 3.0, 1.0, 3.0, 3.0, 2.0, 1.0, 0.0, 2.0], "bins": [-21.421875, -20.79052734375, -20.1591796875, -19.52783203125, -18.896484375, -18.26513671875, -17.6337890625, -17.00244140625, -16.37109375, -15.73974609375, -15.1083984375, -14.47705078125, -13.845703125, -13.21435546875, -12.5830078125, -11.95166015625, -11.3203125, -10.68896484375, -10.0576171875, -9.42626953125, -8.794921875, -8.16357421875, -7.5322265625, -6.90087890625, -6.26953125, -5.63818359375, -5.0068359375, -4.37548828125, -3.744140625, -3.11279296875, -2.4814453125, -1.85009765625, -1.21875, -0.58740234375, 0.0439453125, 0.67529296875, 1.306640625, 1.93798828125, 2.5693359375, 3.20068359375, 3.83203125, 4.46337890625, 5.0947265625, 5.72607421875, 6.357421875, 6.98876953125, 7.6201171875, 8.25146484375, 8.8828125, 9.51416015625, 10.1455078125, 10.77685546875, 11.408203125, 12.03955078125, 12.6708984375, 13.30224609375, 13.93359375, 14.56494140625, 15.1962890625, 15.82763671875, 16.458984375, 17.09033203125, 17.7216796875, 18.35302734375, 18.984375]}, "gradients/decoder.transformer.h.15.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 2.0, 3.0, 6.0, 3.0, 6.0, 9.0, 23.0, 37.0, 38.0, 47.0, 75.0, 110.0, 135.0, 213.0, 304.0, 443.0, 565.0, 884.0, 1254.0, 1742.0, 2497.0, 3746.0, 5389.0, 7942.0, 11911.0, 17937.0, 27834.0, 44488.0, 73245.0, 129136.0, 1364709.0, 166071.0, 87362.0, 52491.0, 32876.0, 20968.0, 13570.0, 9073.0, 6163.0, 4279.0, 2830.0, 2029.0, 1439.0, 999.0, 650.0, 489.0, 342.0, 220.0, 158.0, 140.0, 86.0, 46.0, 30.0, 31.0, 31.0, 20.0, 11.0, 4.0, 1.0, 1.0, 4.0, 2.0, 2.0], "bins": [-0.859375, -0.8322372436523438, -0.8050994873046875, -0.7779617309570312, -0.750823974609375, -0.7236862182617188, -0.6965484619140625, -0.6694107055664062, -0.64227294921875, -0.6151351928710938, -0.5879974365234375, -0.5608596801757812, -0.533721923828125, -0.5065841674804688, -0.4794464111328125, -0.45230865478515625, -0.4251708984375, -0.39803314208984375, -0.3708953857421875, -0.34375762939453125, -0.316619873046875, -0.28948211669921875, -0.2623443603515625, -0.23520660400390625, -0.20806884765625, -0.18093109130859375, -0.1537933349609375, -0.12665557861328125, -0.099517822265625, -0.07238006591796875, -0.0452423095703125, -0.01810455322265625, 0.009033203125, 0.03617095947265625, 0.0633087158203125, 0.09044647216796875, 0.117584228515625, 0.14472198486328125, 0.1718597412109375, 0.19899749755859375, 0.22613525390625, 0.25327301025390625, 0.2804107666015625, 0.30754852294921875, 0.334686279296875, 0.36182403564453125, 0.3889617919921875, 0.41609954833984375, 0.4432373046875, 0.47037506103515625, 0.4975128173828125, 0.5246505737304688, 0.551788330078125, 0.5789260864257812, 0.6060638427734375, 0.6332015991210938, 0.66033935546875, 0.6874771118164062, 0.7146148681640625, 0.7417526245117188, 0.768890380859375, 0.7960281372070312, 0.8231658935546875, 0.8503036499023438, 0.87744140625]}, "gradients/decoder.transformer.h.15.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 0.0, 1.0, 2.0, 0.0, 2.0, 0.0, 0.0, 2.0, 2.0, 2.0, 3.0, 2.0, 7.0, 4.0, 4.0, 14.0, 14.0, 16.0, 28.0, 24.0, 32.0, 46.0, 52.0, 87.0, 185.0, 166.0, 72.0, 53.0, 51.0, 38.0, 30.0, 25.0, 10.0, 10.0, 6.0, 2.0, 8.0, 5.0, 5.0, 2.0, 1.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-7.104873657226562e-05, -6.851088255643845e-05, -6.597302854061127e-05, -6.343517452478409e-05, -6.089732050895691e-05, -5.835946649312973e-05, -5.582161247730255e-05, -5.328375846147537e-05, -5.074590444564819e-05, -4.8208050429821014e-05, -4.5670196413993835e-05, -4.3132342398166656e-05, -4.059448838233948e-05, -3.80566343665123e-05, -3.551878035068512e-05, -3.298092633485794e-05, -3.0443072319030762e-05, -2.7905218303203583e-05, -2.5367364287376404e-05, -2.2829510271549225e-05, -2.0291656255722046e-05, -1.7753802239894867e-05, -1.5215948224067688e-05, -1.2678094208240509e-05, -1.014024019241333e-05, -7.602386176586151e-06, -5.064532160758972e-06, -2.5266781449317932e-06, 1.1175870895385742e-08, 2.5490298867225647e-06, 5.086883902549744e-06, 7.624737918376923e-06, 1.0162591934204102e-05, 1.270044595003128e-05, 1.523829996585846e-05, 1.777615398168564e-05, 2.0314007997512817e-05, 2.2851862013339996e-05, 2.5389716029167175e-05, 2.7927570044994354e-05, 3.0465424060821533e-05, 3.300327807664871e-05, 3.554113209247589e-05, 3.807898610830307e-05, 4.061684012413025e-05, 4.315469413995743e-05, 4.569254815578461e-05, 4.8230402171611786e-05, 5.0768256187438965e-05, 5.3306110203266144e-05, 5.584396421909332e-05, 5.83818182349205e-05, 6.091967225074768e-05, 6.345752626657486e-05, 6.599538028240204e-05, 6.853323429822922e-05, 7.10710883140564e-05, 7.360894232988358e-05, 7.614679634571075e-05, 7.868465036153793e-05, 8.122250437736511e-05, 8.376035839319229e-05, 8.629821240901947e-05, 8.883606642484665e-05, 9.137392044067383e-05]}, "gradients/decoder.transformer.h.15.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 2.0, 1.0, 0.0, 1.0, 3.0, 5.0, 5.0, 5.0, 3.0, 5.0, 10.0, 21.0, 26.0, 55.0, 82.0, 115.0, 607.0, 5637.0, 347447.0, 686985.0, 6493.0, 694.0, 148.0, 61.0, 37.0, 28.0, 26.0, 13.0, 20.0, 10.0, 6.0, 3.0, 1.0, 5.0, 0.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 2.0, 1.0], "bins": [-0.0014858245849609375, -0.0014445781707763672, -0.0014033317565917969, -0.0013620853424072266, -0.0013208389282226562, -0.001279592514038086, -0.0012383460998535156, -0.0011970996856689453, -0.001155853271484375, -0.0011146068572998047, -0.0010733604431152344, -0.001032114028930664, -0.0009908676147460938, -0.0009496212005615234, -0.0009083747863769531, -0.0008671283721923828, -0.0008258819580078125, -0.0007846355438232422, -0.0007433891296386719, -0.0007021427154541016, -0.0006608963012695312, -0.0006196498870849609, -0.0005784034729003906, -0.0005371570587158203, -0.00049591064453125, -0.0004546642303466797, -0.0004134178161621094, -0.00037217140197753906, -0.00033092498779296875, -0.00028967857360839844, -0.0002484321594238281, -0.0002071857452392578, -0.0001659393310546875, -0.0001246929168701172, -8.344650268554688e-05, -4.220008850097656e-05, -9.5367431640625e-07, 4.029273986816406e-05, 8.153915405273438e-05, 0.0001227855682373047, 0.000164031982421875, 0.0002052783966064453, 0.0002465248107910156, 0.00028777122497558594, 0.00032901763916015625, 0.00037026405334472656, 0.0004115104675292969, 0.0004527568817138672, 0.0004940032958984375, 0.0005352497100830078, 0.0005764961242675781, 0.0006177425384521484, 0.0006589889526367188, 0.0007002353668212891, 0.0007414817810058594, 0.0007827281951904297, 0.000823974609375, 0.0008652210235595703, 0.0009064674377441406, 0.0009477138519287109, 0.0009889602661132812, 0.0010302066802978516, 0.0010714530944824219, 0.0011126995086669922, 0.0011539459228515625]}, "gradients/decoder.transformer.h.15.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 2.0, 3.0, 2.0, 3.0, 2.0, 6.0, 7.0, 8.0, 10.0, 13.0, 13.0, 15.0, 37.0, 43.0, 56.0, 98.0, 143.0, 174.0, 121.0, 72.0, 52.0, 33.0, 20.0, 18.0, 10.0, 9.0, 11.0, 5.0, 5.0, 6.0, 2.0, 2.0, 4.0, 2.0, 3.0, 2.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0], "bins": [-4.753056418849155e-05, -4.622126289177686e-05, -4.4911961595062166e-05, -4.360266029834747e-05, -4.229335900163278e-05, -4.0984057704918087e-05, -3.967475640820339e-05, -3.83654551114887e-05, -3.7056153814774007e-05, -3.574685251805931e-05, -3.443755122134462e-05, -3.312824992462993e-05, -3.181894862791523e-05, -3.050964733120054e-05, -2.9200344215496443e-05, -2.789104291878175e-05, -2.6581739803077653e-05, -2.527243850636296e-05, -2.3963137209648266e-05, -2.2653835912933573e-05, -2.134453461621888e-05, -2.0035233319504187e-05, -1.872593020380009e-05, -1.7416628907085396e-05, -1.6107327610370703e-05, -1.479802631365601e-05, -1.3488725016941316e-05, -1.2179422810731921e-05, -1.0870121514017228e-05, -9.560820217302535e-06, -8.25151801109314e-06, -6.942216714378446e-06, -5.632911779684946e-06, -4.323610482970253e-06, -3.0143087315082084e-06, -1.7050069800461642e-06, -3.9570568333147094e-07, 9.135956133832224e-07, 2.2228978195926175e-06, 3.5321991163073108e-06, 4.841500413022004e-06, 6.150801709736697e-06, 7.4601034611987416e-06, 8.769405212660786e-06, 1.0078706509375479e-05, 1.1388007806090172e-05, 1.2697310012299567e-05, 1.400661130901426e-05, 1.5315912605728954e-05, 1.6625213902443647e-05, 1.793451519915834e-05, 1.9243816495873034e-05, 2.0553117792587727e-05, 2.186241908930242e-05, 2.3171722205006517e-05, 2.448102350172121e-05, 2.5790324798435904e-05, 2.7099626095150597e-05, 2.840892739186529e-05, 2.9718230507569388e-05, 3.102753180428408e-05, 3.2336833100998774e-05, 3.364613439771347e-05, 3.495543569442816e-05, 3.6264736991142854e-05]}, "gradients/decoder.transformer.h.15.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 0.0, 2.0, 4.0, 6.0, 4.0, 4.0, 6.0, 5.0, 8.0, 10.0, 4.0, 4.0, 6.0, 19.0, 7.0, 18.0, 17.0, 32.0, 29.0, 28.0, 24.0, 29.0, 36.0, 26.0, 18.0, 27.0, 44.0, 46.0, 32.0, 50.0, 44.0, 40.0, 39.0, 32.0, 23.0, 29.0, 29.0, 25.0, 29.0, 25.0, 18.0, 18.0, 15.0, 10.0, 14.0, 14.0, 8.0, 16.0, 9.0, 5.0, 8.0, 3.0, 4.0, 3.0, 4.0, 4.0, 3.0, 2.0, 1.0, 2.0], "bins": [-2.6881694793701172e-05, -2.6081688702106476e-05, -2.528168261051178e-05, -2.4481676518917084e-05, -2.3681670427322388e-05, -2.288166433572769e-05, -2.2081658244132996e-05, -2.12816521525383e-05, -2.0481646060943604e-05, -1.9681639969348907e-05, -1.888163387775421e-05, -1.8081627786159515e-05, -1.728162169456482e-05, -1.6481615602970123e-05, -1.5681609511375427e-05, -1.4881603419780731e-05, -1.4081597328186035e-05, -1.3281591236591339e-05, -1.2481585144996643e-05, -1.1681579053401947e-05, -1.0881572961807251e-05, -1.0081566870212555e-05, -9.281560778617859e-06, -8.481554687023163e-06, -7.681548595428467e-06, -6.881542503833771e-06, -6.081536412239075e-06, -5.281530320644379e-06, -4.481524229049683e-06, -3.6815181374549866e-06, -2.8815120458602905e-06, -2.0815059542655945e-06, -1.2814998626708984e-06, -4.814937710762024e-07, 3.1851232051849365e-07, 1.1185184121131897e-06, 1.9185245037078857e-06, 2.7185305953025818e-06, 3.518536686897278e-06, 4.318542778491974e-06, 5.11854887008667e-06, 5.918554961681366e-06, 6.718561053276062e-06, 7.518567144870758e-06, 8.318573236465454e-06, 9.11857932806015e-06, 9.918585419654846e-06, 1.0718591511249542e-05, 1.1518597602844238e-05, 1.2318603694438934e-05, 1.311860978603363e-05, 1.3918615877628326e-05, 1.4718621969223022e-05, 1.551862806081772e-05, 1.6318634152412415e-05, 1.711864024400711e-05, 1.7918646335601807e-05, 1.8718652427196503e-05, 1.95186585187912e-05, 2.0318664610385895e-05, 2.111867070198059e-05, 2.1918676793575287e-05, 2.2718682885169983e-05, 2.351868897676468e-05, 2.4318695068359375e-05]}, "gradients/decoder.transformer.h.15.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 0.0, 1.0, 0.0, 3.0, 2.0, 1.0, 1.0, 0.0, 2.0, 4.0, 7.0, 6.0, 7.0, 14.0, 19.0, 10.0, 18.0, 18.0, 28.0, 17.0, 26.0, 38.0, 27.0, 27.0, 29.0, 41.0, 38.0, 44.0, 46.0, 45.0, 43.0, 43.0, 36.0, 49.0, 27.0, 31.0, 31.0, 17.0, 28.0, 22.0, 21.0, 25.0, 20.0, 25.0, 16.0, 11.0, 9.0, 11.0, 9.0, 4.0, 4.0, 6.0, 2.0, 2.0, 2.0, 3.0, 0.0, 1.0, 1.0, 0.0, 2.0], "bins": [-33.34375, -32.332275390625, -31.32080078125, -30.309326171875, -29.2978515625, -28.286376953125, -27.27490234375, -26.263427734375, -25.251953125, -24.240478515625, -23.22900390625, -22.217529296875, -21.2060546875, -20.194580078125, -19.18310546875, -18.171630859375, -17.16015625, -16.148681640625, -15.13720703125, -14.125732421875, -13.1142578125, -12.102783203125, -11.09130859375, -10.079833984375, -9.068359375, -8.056884765625, -7.04541015625, -6.033935546875, -5.0224609375, -4.010986328125, -2.99951171875, -1.988037109375, -0.9765625, 0.034912109375, 1.04638671875, 2.057861328125, 3.0693359375, 4.080810546875, 5.09228515625, 6.103759765625, 7.115234375, 8.126708984375, 9.13818359375, 10.149658203125, 11.1611328125, 12.172607421875, 13.18408203125, 14.195556640625, 15.20703125, 16.218505859375, 17.22998046875, 18.241455078125, 19.2529296875, 20.264404296875, 21.27587890625, 22.287353515625, 23.298828125, 24.310302734375, 25.32177734375, 26.333251953125, 27.3447265625, 28.356201171875, 29.36767578125, 30.379150390625, 31.390625]}, "gradients/decoder.transformer.h.15.attn.c_proj.weight": {"_type": "histogram", "values": [3.0, 0.0, 0.0, 2.0, 2.0, 1.0, 2.0, 6.0, 1.0, 6.0, 2.0, 5.0, 3.0, 14.0, 11.0, 19.0, 16.0, 27.0, 38.0, 54.0, 75.0, 89.0, 112.0, 191.0, 296.0, 424.0, 643.0, 973.0, 1502.0, 2372.0, 4009.0, 13342.0, 406454.0, 590686.0, 15936.0, 4173.0, 2409.0, 1546.0, 1017.0, 671.0, 427.0, 288.0, 192.0, 143.0, 98.0, 76.0, 60.0, 29.0, 38.0, 21.0, 12.0, 5.0, 18.0, 7.0, 5.0, 2.0, 7.0, 6.0, 1.0, 3.0, 2.0, 1.0, 2.0, 1.0], "bins": [-84.4375, -81.8818359375, -79.326171875, -76.7705078125, -74.21484375, -71.6591796875, -69.103515625, -66.5478515625, -63.9921875, -61.4365234375, -58.880859375, -56.3251953125, -53.76953125, -51.2138671875, -48.658203125, -46.1025390625, -43.546875, -40.9912109375, -38.435546875, -35.8798828125, -33.32421875, -30.7685546875, -28.212890625, -25.6572265625, -23.1015625, -20.5458984375, -17.990234375, -15.4345703125, -12.87890625, -10.3232421875, -7.767578125, -5.2119140625, -2.65625, -0.1005859375, 2.455078125, 5.0107421875, 7.56640625, 10.1220703125, 12.677734375, 15.2333984375, 17.7890625, 20.3447265625, 22.900390625, 25.4560546875, 28.01171875, 30.5673828125, 33.123046875, 35.6787109375, 38.234375, 40.7900390625, 43.345703125, 45.9013671875, 48.45703125, 51.0126953125, 53.568359375, 56.1240234375, 58.6796875, 61.2353515625, 63.791015625, 66.3466796875, 68.90234375, 71.4580078125, 74.013671875, 76.5693359375, 79.125]}, "gradients/decoder.transformer.h.15.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 3.0, 4.0, 1.0, 8.0, 9.0, 4.0, 10.0, 7.0, 13.0, 16.0, 12.0, 18.0, 15.0, 25.0, 26.0, 22.0, 27.0, 37.0, 42.0, 42.0, 51.0, 50.0, 79.0, 396.0, 1596.0, 91.0, 60.0, 50.0, 38.0, 35.0, 35.0, 35.0, 32.0, 33.0, 23.0, 22.0, 12.0, 11.0, 9.0, 19.0, 10.0, 10.0, 7.0, 4.0, 4.0, 3.0, 1.0, 3.0, 1.0, 2.0, 4.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-102.6875, -99.3837890625, -96.080078125, -92.7763671875, -89.47265625, -86.1689453125, -82.865234375, -79.5615234375, -76.2578125, -72.9541015625, -69.650390625, -66.3466796875, -63.04296875, -59.7392578125, -56.435546875, -53.1318359375, -49.828125, -46.5244140625, -43.220703125, -39.9169921875, -36.61328125, -33.3095703125, -30.005859375, -26.7021484375, -23.3984375, -20.0947265625, -16.791015625, -13.4873046875, -10.18359375, -6.8798828125, -3.576171875, -0.2724609375, 3.03125, 6.3349609375, 9.638671875, 12.9423828125, 16.24609375, 19.5498046875, 22.853515625, 26.1572265625, 29.4609375, 32.7646484375, 36.068359375, 39.3720703125, 42.67578125, 45.9794921875, 49.283203125, 52.5869140625, 55.890625, 59.1943359375, 62.498046875, 65.8017578125, 69.10546875, 72.4091796875, 75.712890625, 79.0166015625, 82.3203125, 85.6240234375, 88.927734375, 92.2314453125, 95.53515625, 98.8388671875, 102.142578125, 105.4462890625, 108.75]}, "gradients/decoder.transformer.h.15.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 2.0, 4.0, 2.0, 4.0, 1.0, 3.0, 3.0, 4.0, 10.0, 10.0, 12.0, 28.0, 18.0, 20.0, 27.0, 36.0, 60.0, 90.0, 97.0, 143.0, 234.0, 519.0, 1146.0, 3292.0, 15417.0, 3009257.0, 103908.0, 7480.0, 2069.0, 782.0, 364.0, 184.0, 120.0, 78.0, 57.0, 66.0, 35.0, 33.0, 23.0, 20.0, 11.0, 10.0, 7.0, 6.0, 11.0, 5.0, 4.0, 4.0, 1.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-236.875, -229.423828125, -221.97265625, -214.521484375, -207.0703125, -199.619140625, -192.16796875, -184.716796875, -177.265625, -169.814453125, -162.36328125, -154.912109375, -147.4609375, -140.009765625, -132.55859375, -125.107421875, -117.65625, -110.205078125, -102.75390625, -95.302734375, -87.8515625, -80.400390625, -72.94921875, -65.498046875, -58.046875, -50.595703125, -43.14453125, -35.693359375, -28.2421875, -20.791015625, -13.33984375, -5.888671875, 1.5625, 9.013671875, 16.46484375, 23.916015625, 31.3671875, 38.818359375, 46.26953125, 53.720703125, 61.171875, 68.623046875, 76.07421875, 83.525390625, 90.9765625, 98.427734375, 105.87890625, 113.330078125, 120.78125, 128.232421875, 135.68359375, 143.134765625, 150.5859375, 158.037109375, 165.48828125, 172.939453125, 180.390625, 187.841796875, 195.29296875, 202.744140625, 210.1953125, 217.646484375, 225.09765625, 232.548828125, 240.0]}, "gradients/decoder.transformer.h.15.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 6.0, 5.0, 3.0, 8.0, 14.0, 29.0, 40.0, 90.0, 190.0, 271.0, 171.0, 100.0, 43.0, 24.0, 14.0, 5.0, 2.0, 1.0, 4.0, 2.0], "bins": [-488.95184326171875, -479.8100891113281, -470.6683349609375, -461.5265808105469, -452.38482666015625, -443.2430725097656, -434.101318359375, -424.9595642089844, -415.81781005859375, -406.6760559082031, -397.5343017578125, -388.3925476074219, -379.25079345703125, -370.1090393066406, -360.96728515625, -351.8255310058594, -342.68377685546875, -333.5420227050781, -324.4002685546875, -315.2585144042969, -306.11676025390625, -296.9750061035156, -287.833251953125, -278.6914978027344, -269.5497741699219, -260.40802001953125, -251.26626586914062, -242.12451171875, -232.98275756835938, -223.84100341796875, -214.69924926757812, -205.5574951171875, -196.41574096679688, -187.27398681640625, -178.13223266601562, -168.990478515625, -159.84872436523438, -150.70697021484375, -141.56521606445312, -132.4234619140625, -123.28172302246094, -114.13996887207031, -104.99821472167969, -95.85646057128906, -86.71470642089844, -77.57295227050781, -68.43120574951172, -59.289451599121094, -50.14769744873047, -41.005943298339844, -31.86419105529785, -22.72243881225586, -13.580684661865234, -4.438930511474609, 4.70281982421875, 13.844573974609375, 22.986328125, 32.128082275390625, 41.26983642578125, 50.41158676147461, 59.553340911865234, 68.69509887695312, 77.83684539794922, 86.97859954833984, 96.12035369873047]}, "gradients/decoder.transformer.h.15.ln_1.bias": {"_type": "histogram", "values": [1.0, 2.0, 1.0, 4.0, 2.0, 0.0, 2.0, 5.0, 4.0, 6.0, 8.0, 6.0, 12.0, 13.0, 10.0, 10.0, 12.0, 18.0, 27.0, 24.0, 24.0, 25.0, 36.0, 45.0, 36.0, 36.0, 43.0, 38.0, 42.0, 50.0, 54.0, 44.0, 46.0, 37.0, 30.0, 30.0, 35.0, 31.0, 30.0, 27.0, 20.0, 16.0, 14.0, 15.0, 18.0, 3.0, 5.0, 3.0, 8.0, 5.0, 5.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-261.15704345703125, -252.15626525878906, -243.15548706054688, -234.1547088623047, -225.1539306640625, -216.1531524658203, -207.15237426757812, -198.15159606933594, -189.15081787109375, -180.15003967285156, -171.14926147460938, -162.1484832763672, -153.147705078125, -144.1469268798828, -135.14614868164062, -126.14537048339844, -117.14459228515625, -108.14381408691406, -99.14303588867188, -90.14225769042969, -81.1414794921875, -72.14070129394531, -63.139923095703125, -54.13914489746094, -45.13836669921875, -36.13758850097656, -27.136810302734375, -18.136032104492188, -9.13525390625, -0.1344757080078125, 8.866302490234375, 17.867080688476562, 26.867889404296875, 35.86866760253906, 44.86944580078125, 53.87022399902344, 62.871002197265625, 71.87178039550781, 80.87255859375, 89.87333679199219, 98.87411499023438, 107.87489318847656, 116.87567138671875, 125.87644958496094, 134.87722778320312, 143.8780059814453, 152.8787841796875, 161.8795623779297, 170.88034057617188, 179.88111877441406, 188.88189697265625, 197.88267517089844, 206.88345336914062, 215.8842315673828, 224.885009765625, 233.8857879638672, 242.88656616210938, 251.88734436035156, 260.88812255859375, 269.888916015625, 278.8896789550781, 287.89044189453125, 296.8912353515625, 305.89202880859375, 314.8927917480469]}, "gradients/decoder.transformer.h.14.mlp.c_proj.bias": {"_type": "histogram", "values": [2.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 5.0, 4.0, 5.0, 11.0, 12.0, 9.0, 14.0, 19.0, 15.0, 19.0, 16.0, 25.0, 28.0, 31.0, 30.0, 43.0, 32.0, 36.0, 32.0, 39.0, 47.0, 38.0, 44.0, 56.0, 32.0, 31.0, 33.0, 27.0, 34.0, 27.0, 15.0, 29.0, 22.0, 23.0, 26.0, 17.0, 17.0, 12.0, 13.0, 8.0, 4.0, 5.0, 5.0, 4.0, 6.0, 2.0, 1.0, 1.0, 2.0, 0.0, 3.0, 0.0, 0.0, 2.0], "bins": [-32.4375, -31.41748046875, -30.3974609375, -29.37744140625, -28.357421875, -27.33740234375, -26.3173828125, -25.29736328125, -24.27734375, -23.25732421875, -22.2373046875, -21.21728515625, -20.197265625, -19.17724609375, -18.1572265625, -17.13720703125, -16.1171875, -15.09716796875, -14.0771484375, -13.05712890625, -12.037109375, -11.01708984375, -9.9970703125, -8.97705078125, -7.95703125, -6.93701171875, -5.9169921875, -4.89697265625, -3.876953125, -2.85693359375, -1.8369140625, -0.81689453125, 0.203125, 1.22314453125, 2.2431640625, 3.26318359375, 4.283203125, 5.30322265625, 6.3232421875, 7.34326171875, 8.36328125, 9.38330078125, 10.4033203125, 11.42333984375, 12.443359375, 13.46337890625, 14.4833984375, 15.50341796875, 16.5234375, 17.54345703125, 18.5634765625, 19.58349609375, 20.603515625, 21.62353515625, 22.6435546875, 23.66357421875, 24.68359375, 25.70361328125, 26.7236328125, 27.74365234375, 28.763671875, 29.78369140625, 30.8037109375, 31.82373046875, 32.84375]}, "gradients/decoder.transformer.h.14.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 5.0, 2.0, 5.0, 6.0, 9.0, 7.0, 11.0, 16.0, 14.0, 23.0, 32.0, 40.0, 54.0, 73.0, 97.0, 110.0, 157.0, 206.0, 287.0, 410.0, 526.0, 720.0, 1150.0, 1657.0, 2476.0, 3972.0, 6805.0, 12842.0, 48155.0, 950340.0, 2988085.0, 135308.0, 17735.0, 8577.0, 4849.0, 3089.0, 1961.0, 1247.0, 856.0, 632.0, 460.0, 343.0, 237.0, 182.0, 117.0, 103.0, 74.0, 47.0, 42.0, 40.0, 28.0, 19.0, 16.0, 11.0, 6.0, 11.0, 6.0, 5.0, 4.0, 2.0, 2.0], "bins": [-96.625, -93.7138671875, -90.802734375, -87.8916015625, -84.98046875, -82.0693359375, -79.158203125, -76.2470703125, -73.3359375, -70.4248046875, -67.513671875, -64.6025390625, -61.69140625, -58.7802734375, -55.869140625, -52.9580078125, -50.046875, -47.1357421875, -44.224609375, -41.3134765625, -38.40234375, -35.4912109375, -32.580078125, -29.6689453125, -26.7578125, -23.8466796875, -20.935546875, -18.0244140625, -15.11328125, -12.2021484375, -9.291015625, -6.3798828125, -3.46875, -0.5576171875, 2.353515625, 5.2646484375, 8.17578125, 11.0869140625, 13.998046875, 16.9091796875, 19.8203125, 22.7314453125, 25.642578125, 28.5537109375, 31.46484375, 34.3759765625, 37.287109375, 40.1982421875, 43.109375, 46.0205078125, 48.931640625, 51.8427734375, 54.75390625, 57.6650390625, 60.576171875, 63.4873046875, 66.3984375, 69.3095703125, 72.220703125, 75.1318359375, 78.04296875, 80.9541015625, 83.865234375, 86.7763671875, 89.6875]}, "gradients/decoder.transformer.h.14.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 2.0, 3.0, 5.0, 8.0, 5.0, 13.0, 22.0, 22.0, 29.0, 42.0, 79.0, 212.0, 924.0, 1820.0, 602.0, 122.0, 46.0, 35.0, 28.0, 17.0, 16.0, 11.0, 7.0, 5.0, 5.0, 2.0, 3.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-164.875, -160.3359375, -155.796875, -151.2578125, -146.71875, -142.1796875, -137.640625, -133.1015625, -128.5625, -124.0234375, -119.484375, -114.9453125, -110.40625, -105.8671875, -101.328125, -96.7890625, -92.25, -87.7109375, -83.171875, -78.6328125, -74.09375, -69.5546875, -65.015625, -60.4765625, -55.9375, -51.3984375, -46.859375, -42.3203125, -37.78125, -33.2421875, -28.703125, -24.1640625, -19.625, -15.0859375, -10.546875, -6.0078125, -1.46875, 3.0703125, 7.609375, 12.1484375, 16.6875, 21.2265625, 25.765625, 30.3046875, 34.84375, 39.3828125, 43.921875, 48.4609375, 53.0, 57.5390625, 62.078125, 66.6171875, 71.15625, 75.6953125, 80.234375, 84.7734375, 89.3125, 93.8515625, 98.390625, 102.9296875, 107.46875, 112.0078125, 116.546875, 121.0859375, 125.625]}, "gradients/decoder.transformer.h.14.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 3.0, 5.0, 4.0, 12.0, 13.0, 14.0, 19.0, 18.0, 34.0, 58.0, 96.0, 196.0, 341.0, 551.0, 1164.0, 2705.0, 6947.0, 20817.0, 99405.0, 3870288.0, 152403.0, 25319.0, 7976.0, 3112.0, 1319.0, 670.0, 314.0, 157.0, 113.0, 73.0, 54.0, 39.0, 14.0, 13.0, 3.0, 9.0, 4.0, 5.0, 4.0, 3.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-185.875, -179.134765625, -172.39453125, -165.654296875, -158.9140625, -152.173828125, -145.43359375, -138.693359375, -131.953125, -125.212890625, -118.47265625, -111.732421875, -104.9921875, -98.251953125, -91.51171875, -84.771484375, -78.03125, -71.291015625, -64.55078125, -57.810546875, -51.0703125, -44.330078125, -37.58984375, -30.849609375, -24.109375, -17.369140625, -10.62890625, -3.888671875, 2.8515625, 9.591796875, 16.33203125, 23.072265625, 29.8125, 36.552734375, 43.29296875, 50.033203125, 56.7734375, 63.513671875, 70.25390625, 76.994140625, 83.734375, 90.474609375, 97.21484375, 103.955078125, 110.6953125, 117.435546875, 124.17578125, 130.916015625, 137.65625, 144.396484375, 151.13671875, 157.876953125, 164.6171875, 171.357421875, 178.09765625, 184.837890625, 191.578125, 198.318359375, 205.05859375, 211.798828125, 218.5390625, 225.279296875, 232.01953125, 238.759765625, 245.5]}, "gradients/decoder.transformer.h.14.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 4.0, 4.0, 6.0, 5.0, 5.0, 8.0, 16.0, 26.0, 39.0, 45.0, 87.0, 196.0, 242.0, 125.0, 77.0, 50.0, 22.0, 15.0, 15.0, 10.0, 11.0, 4.0, 2.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-709.44775390625, -693.0075073242188, -676.5672607421875, -660.126953125, -643.6867065429688, -627.2464599609375, -610.8062133789062, -594.365966796875, -577.9256591796875, -561.4854125976562, -545.045166015625, -528.6048583984375, -512.1646118164062, -495.724365234375, -479.28411865234375, -462.8438415527344, -446.4035949707031, -429.9633483886719, -413.5230712890625, -397.08282470703125, -380.6425476074219, -364.2023010253906, -347.76202392578125, -331.32177734375, -314.88153076171875, -298.4412841796875, -282.0010070800781, -265.5607604980469, -249.1204833984375, -232.68023681640625, -216.23997497558594, -199.79971313476562, -183.35946655273438, -166.91920471191406, -150.47894287109375, -134.0386962890625, -117.59842681884766, -101.15816497802734, -84.71791076660156, -68.27764892578125, -51.83738708496094, -35.397125244140625, -18.956867218017578, -2.5166091918945312, 13.923652648925781, 30.363914489746094, 46.804168701171875, 63.24443054199219, 79.6846923828125, 96.12495422363281, 112.56521606445312, 129.00546264648438, 145.44573974609375, 161.885986328125, 178.3262481689453, 194.76651000976562, 211.20677185058594, 227.64703369140625, 244.08729553222656, 260.5275573730469, 276.9678039550781, 293.4080810546875, 309.84832763671875, 326.28857421875, 342.7288513183594]}, "gradients/decoder.transformer.h.14.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 3.0, 1.0, 1.0, 0.0, 2.0, 4.0, 0.0, 4.0, 4.0, 7.0, 8.0, 9.0, 14.0, 13.0, 13.0, 23.0, 18.0, 20.0, 20.0, 29.0, 35.0, 28.0, 33.0, 34.0, 43.0, 46.0, 43.0, 45.0, 49.0, 40.0, 39.0, 39.0, 43.0, 33.0, 36.0, 43.0, 28.0, 14.0, 22.0, 19.0, 27.0, 14.0, 16.0, 14.0, 13.0, 6.0, 10.0, 4.0, 4.0, 1.0, 2.0, 3.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-237.5008544921875, -230.16421508789062, -222.82757568359375, -215.4909210205078, -208.15428161621094, -200.81764221191406, -193.48098754882812, -186.14434814453125, -178.80770874023438, -171.4710693359375, -164.13442993164062, -156.7977752685547, -149.4611358642578, -142.12449645996094, -134.787841796875, -127.45120239257812, -120.11456298828125, -112.77792358398438, -105.44127655029297, -98.10462951660156, -90.76799011230469, -83.43135070800781, -76.0947036743164, -68.758056640625, -61.421417236328125, -54.084774017333984, -46.748130798339844, -39.4114875793457, -32.07484436035156, -24.738201141357422, -17.40155792236328, -10.06491470336914, -2.7282562255859375, 4.608386993408203, 11.945030212402344, 19.281673431396484, 26.618316650390625, 33.954959869384766, 41.291603088378906, 48.62824630737305, 55.96488952636719, 63.30153274536133, 70.63817596435547, 77.97482299804688, 85.31146240234375, 92.64810180664062, 99.98474884033203, 107.32139587402344, 114.65803527832031, 121.99467468261719, 129.33132934570312, 136.66796875, 144.00460815429688, 151.34124755859375, 158.67788696289062, 166.01454162597656, 173.35118103027344, 180.6878204345703, 188.02447509765625, 195.36111450195312, 202.69775390625, 210.03439331054688, 217.37103271484375, 224.7076873779297, 232.04432678222656]}, "gradients/decoder.transformer.h.14.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 1.0, 4.0, 0.0, 0.0, 6.0, 1.0, 6.0, 10.0, 11.0, 12.0, 15.0, 8.0, 17.0, 25.0, 23.0, 29.0, 35.0, 22.0, 39.0, 41.0, 46.0, 42.0, 44.0, 46.0, 49.0, 51.0, 29.0, 42.0, 33.0, 42.0, 37.0, 27.0, 38.0, 22.0, 27.0, 28.0, 19.0, 16.0, 13.0, 17.0, 11.0, 9.0, 5.0, 4.0, 4.0, 3.0, 0.0, 2.0, 3.0, 0.0, 2.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-32.65625, -31.48291015625, -30.3095703125, -29.13623046875, -27.962890625, -26.78955078125, -25.6162109375, -24.44287109375, -23.26953125, -22.09619140625, -20.9228515625, -19.74951171875, -18.576171875, -17.40283203125, -16.2294921875, -15.05615234375, -13.8828125, -12.70947265625, -11.5361328125, -10.36279296875, -9.189453125, -8.01611328125, -6.8427734375, -5.66943359375, -4.49609375, -3.32275390625, -2.1494140625, -0.97607421875, 0.197265625, 1.37060546875, 2.5439453125, 3.71728515625, 4.890625, 6.06396484375, 7.2373046875, 8.41064453125, 9.583984375, 10.75732421875, 11.9306640625, 13.10400390625, 14.27734375, 15.45068359375, 16.6240234375, 17.79736328125, 18.970703125, 20.14404296875, 21.3173828125, 22.49072265625, 23.6640625, 24.83740234375, 26.0107421875, 27.18408203125, 28.357421875, 29.53076171875, 30.7041015625, 31.87744140625, 33.05078125, 34.22412109375, 35.3974609375, 36.57080078125, 37.744140625, 38.91748046875, 40.0908203125, 41.26416015625, 42.4375]}, "gradients/decoder.transformer.h.14.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 3.0, 3.0, 4.0, 7.0, 11.0, 16.0, 20.0, 35.0, 49.0, 79.0, 125.0, 157.0, 252.0, 349.0, 551.0, 842.0, 1275.0, 1966.0, 3041.0, 4568.0, 7180.0, 11439.0, 17759.0, 29327.0, 48954.0, 86574.0, 175210.0, 347696.0, 132517.0, 70825.0, 40765.0, 24408.0, 15214.0, 9732.0, 6155.0, 4021.0, 2594.0, 1718.0, 1051.0, 709.0, 430.0, 307.0, 190.0, 138.0, 106.0, 59.0, 42.0, 36.0, 18.0, 11.0, 15.0, 8.0, 2.0, 5.0, 2.0, 1.0, 1.0, 0.0, 1.0], "bins": [-1.4609375, -1.415740966796875, -1.37054443359375, -1.325347900390625, -1.2801513671875, -1.234954833984375, -1.18975830078125, -1.144561767578125, -1.099365234375, -1.054168701171875, -1.00897216796875, -0.963775634765625, -0.9185791015625, -0.873382568359375, -0.82818603515625, -0.782989501953125, -0.73779296875, -0.692596435546875, -0.64739990234375, -0.602203369140625, -0.5570068359375, -0.511810302734375, -0.46661376953125, -0.421417236328125, -0.376220703125, -0.331024169921875, -0.28582763671875, -0.240631103515625, -0.1954345703125, -0.150238037109375, -0.10504150390625, -0.059844970703125, -0.0146484375, 0.030548095703125, 0.07574462890625, 0.120941162109375, 0.1661376953125, 0.211334228515625, 0.25653076171875, 0.301727294921875, 0.346923828125, 0.392120361328125, 0.43731689453125, 0.482513427734375, 0.5277099609375, 0.572906494140625, 0.61810302734375, 0.663299560546875, 0.70849609375, 0.753692626953125, 0.79888916015625, 0.844085693359375, 0.8892822265625, 0.934478759765625, 0.97967529296875, 1.024871826171875, 1.070068359375, 1.115264892578125, 1.16046142578125, 1.205657958984375, 1.2508544921875, 1.296051025390625, 1.34124755859375, 1.386444091796875, 1.431640625]}, "gradients/decoder.transformer.h.14.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 3.0, 0.0, 1.0, 1.0, 4.0, 2.0, 3.0, 4.0, 4.0, 7.0, 4.0, 6.0, 16.0, 15.0, 13.0, 25.0, 14.0, 20.0, 32.0, 25.0, 34.0, 27.0, 38.0, 56.0, 38.0, 37.0, 36.0, 30.0, 1063.0, 40.0, 44.0, 40.0, 31.0, 33.0, 27.0, 48.0, 30.0, 37.0, 25.0, 14.0, 13.0, 22.0, 16.0, 9.0, 11.0, 9.0, 10.0, 7.0, 4.0, 4.0, 2.0, 3.0, 5.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-21.421875, -20.73681640625, -20.0517578125, -19.36669921875, -18.681640625, -17.99658203125, -17.3115234375, -16.62646484375, -15.94140625, -15.25634765625, -14.5712890625, -13.88623046875, -13.201171875, -12.51611328125, -11.8310546875, -11.14599609375, -10.4609375, -9.77587890625, -9.0908203125, -8.40576171875, -7.720703125, -7.03564453125, -6.3505859375, -5.66552734375, -4.98046875, -4.29541015625, -3.6103515625, -2.92529296875, -2.240234375, -1.55517578125, -0.8701171875, -0.18505859375, 0.5, 1.18505859375, 1.8701171875, 2.55517578125, 3.240234375, 3.92529296875, 4.6103515625, 5.29541015625, 5.98046875, 6.66552734375, 7.3505859375, 8.03564453125, 8.720703125, 9.40576171875, 10.0908203125, 10.77587890625, 11.4609375, 12.14599609375, 12.8310546875, 13.51611328125, 14.201171875, 14.88623046875, 15.5712890625, 16.25634765625, 16.94140625, 17.62646484375, 18.3115234375, 18.99658203125, 19.681640625, 20.36669921875, 21.0517578125, 21.73681640625, 22.421875]}, "gradients/decoder.transformer.h.14.crossattention.c_attn.weight": {"_type": "histogram", "values": [2.0, 2.0, 2.0, 2.0, 2.0, 7.0, 10.0, 12.0, 23.0, 26.0, 35.0, 53.0, 67.0, 128.0, 156.0, 201.0, 293.0, 460.0, 618.0, 887.0, 1318.0, 1867.0, 2826.0, 4182.0, 6216.0, 9323.0, 13754.0, 21394.0, 33832.0, 54343.0, 88826.0, 167610.0, 1350347.0, 132094.0, 75806.0, 45473.0, 29221.0, 18572.0, 12100.0, 8091.0, 5403.0, 3549.0, 2487.0, 1667.0, 1183.0, 827.0, 562.0, 392.0, 273.0, 177.0, 136.0, 89.0, 62.0, 45.0, 40.0, 31.0, 14.0, 7.0, 9.0, 5.0, 5.0, 3.0, 3.0, 2.0], "bins": [-0.91845703125, -0.8900527954101562, -0.8616485595703125, -0.8332443237304688, -0.804840087890625, -0.7764358520507812, -0.7480316162109375, -0.7196273803710938, -0.69122314453125, -0.6628189086914062, -0.6344146728515625, -0.6060104370117188, -0.577606201171875, -0.5492019653320312, -0.5207977294921875, -0.49239349365234375, -0.4639892578125, -0.43558502197265625, -0.4071807861328125, -0.37877655029296875, -0.350372314453125, -0.32196807861328125, -0.2935638427734375, -0.26515960693359375, -0.23675537109375, -0.20835113525390625, -0.1799468994140625, -0.15154266357421875, -0.123138427734375, -0.09473419189453125, -0.0663299560546875, -0.03792572021484375, -0.009521484375, 0.01888275146484375, 0.0472869873046875, 0.07569122314453125, 0.104095458984375, 0.13249969482421875, 0.1609039306640625, 0.18930816650390625, 0.21771240234375, 0.24611663818359375, 0.2745208740234375, 0.30292510986328125, 0.331329345703125, 0.35973358154296875, 0.3881378173828125, 0.41654205322265625, 0.4449462890625, 0.47335052490234375, 0.5017547607421875, 0.5301589965820312, 0.558563232421875, 0.5869674682617188, 0.6153717041015625, 0.6437759399414062, 0.67218017578125, 0.7005844116210938, 0.7289886474609375, 0.7573928833007812, 0.785797119140625, 0.8142013549804688, 0.8426055908203125, 0.8710098266601562, 0.8994140625]}, "gradients/decoder.transformer.h.14.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 1.0, 0.0, 3.0, 0.0, 4.0, 1.0, 0.0, 1.0, 5.0, 5.0, 9.0, 2.0, 2.0, 16.0, 17.0, 23.0, 19.0, 41.0, 68.0, 209.0, 280.0, 107.0, 55.0, 37.0, 25.0, 19.0, 10.0, 10.0, 9.0, 6.0, 6.0, 8.0, 2.0, 0.0, 4.0, 4.0, 0.0, 0.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0], "bins": [-9.179115295410156e-05, -8.894223719835281e-05, -8.609332144260406e-05, -8.324440568685532e-05, -8.039548993110657e-05, -7.754657417535782e-05, -7.469765841960907e-05, -7.184874266386032e-05, -6.899982690811157e-05, -6.615091115236282e-05, -6.330199539661407e-05, -6.0453079640865326e-05, -5.760416388511658e-05, -5.475524812936783e-05, -5.190633237361908e-05, -4.905741661787033e-05, -4.620850086212158e-05, -4.335958510637283e-05, -4.0510669350624084e-05, -3.7661753594875336e-05, -3.481283783912659e-05, -3.196392208337784e-05, -2.911500632762909e-05, -2.626609057188034e-05, -2.3417174816131592e-05, -2.0568259060382843e-05, -1.7719343304634094e-05, -1.4870427548885345e-05, -1.2021511793136597e-05, -9.172596037387848e-06, -6.323680281639099e-06, -3.4747645258903503e-06, -6.258487701416016e-07, 2.2230669856071472e-06, 5.071982741355896e-06, 7.920898497104645e-06, 1.0769814252853394e-05, 1.3618730008602142e-05, 1.646764576435089e-05, 1.931656152009964e-05, 2.216547727584839e-05, 2.5014393031597137e-05, 2.7863308787345886e-05, 3.0712224543094635e-05, 3.3561140298843384e-05, 3.641005605459213e-05, 3.925897181034088e-05, 4.210788756608963e-05, 4.495680332183838e-05, 4.780571907758713e-05, 5.0654634833335876e-05, 5.3503550589084625e-05, 5.6352466344833374e-05, 5.920138210058212e-05, 6.205029785633087e-05, 6.489921361207962e-05, 6.774812936782837e-05, 7.059704512357712e-05, 7.344596087932587e-05, 7.629487663507462e-05, 7.914379239082336e-05, 8.199270814657211e-05, 8.484162390232086e-05, 8.769053965806961e-05, 9.053945541381836e-05]}, "gradients/decoder.transformer.h.14.crossattention.q_attn.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 3.0, 0.0, 2.0, 1.0, 4.0, 5.0, 2.0, 7.0, 10.0, 12.0, 14.0, 14.0, 21.0, 24.0, 49.0, 91.0, 273.0, 1210.0, 7499.0, 950305.0, 83957.0, 3943.0, 722.0, 187.0, 72.0, 35.0, 25.0, 19.0, 14.0, 10.0, 7.0, 8.0, 2.0, 4.0, 2.0, 3.0, 2.0, 4.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.0013303756713867188, -0.00128859281539917, -0.001246809959411621, -0.0012050271034240723, -0.0011632442474365234, -0.0011214613914489746, -0.0010796785354614258, -0.001037895679473877, -0.0009961128234863281, -0.0009543299674987793, -0.0009125471115112305, -0.0008707642555236816, -0.0008289813995361328, -0.000787198543548584, -0.0007454156875610352, -0.0007036328315734863, -0.0006618499755859375, -0.0006200671195983887, -0.0005782842636108398, -0.000536501407623291, -0.0004947185516357422, -0.00045293569564819336, -0.00041115283966064453, -0.0003693699836730957, -0.0003275871276855469, -0.00028580427169799805, -0.00024402141571044922, -0.0002022385597229004, -0.00016045570373535156, -0.00011867284774780273, -7.68899917602539e-05, -3.510713577270508e-05, 6.67572021484375e-06, 4.845857620239258e-05, 9.02414321899414e-05, 0.00013202428817749023, 0.00017380714416503906, 0.0002155900001525879, 0.0002573728561401367, 0.00029915571212768555, 0.0003409385681152344, 0.0003827214241027832, 0.00042450428009033203, 0.00046628713607788086, 0.0005080699920654297, 0.0005498528480529785, 0.0005916357040405273, 0.0006334185600280762, 0.000675201416015625, 0.0007169842720031738, 0.0007587671279907227, 0.0008005499839782715, 0.0008423328399658203, 0.0008841156959533691, 0.000925898551940918, 0.0009676814079284668, 0.0010094642639160156, 0.0010512471199035645, 0.0010930299758911133, 0.0011348128318786621, 0.001176595687866211, 0.0012183785438537598, 0.0012601613998413086, 0.0013019442558288574, 0.0013437271118164062]}, "gradients/decoder.transformer.h.14.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 1.0, 2.0, 6.0, 5.0, 9.0, 7.0, 12.0, 12.0, 20.0, 29.0, 38.0, 72.0, 140.0, 228.0, 162.0, 107.0, 64.0, 34.0, 28.0, 16.0, 7.0, 8.0, 5.0, 2.0, 1.0, 1.0, 2.0], "bins": [-9.389992919750512e-05, -9.204779780702665e-05, -9.019565914059058e-05, -8.834352775011212e-05, -8.649138908367604e-05, -8.463925769319758e-05, -8.278712630271912e-05, -8.093498763628304e-05, -7.908285624580458e-05, -7.723072485532612e-05, -7.537858618889004e-05, -7.352645479841158e-05, -7.16743161319755e-05, -6.982218474149704e-05, -6.797005335101858e-05, -6.61179146845825e-05, -6.426578329410404e-05, -6.241365190362558e-05, -6.05615132371895e-05, -5.870938184671104e-05, -5.685724681825377e-05, -5.50051117897965e-05, -5.315297676133923e-05, -5.130084173288196e-05, -4.9448706704424694e-05, -4.7596571675967425e-05, -4.5744436647510156e-05, -4.3892305257031694e-05, -4.2040170228574425e-05, -4.0188035200117156e-05, -3.833590017165989e-05, -3.6483768781181425e-05, -3.463163739070296e-05, -3.2779502362245694e-05, -3.0927367333788425e-05, -2.907523412432056e-05, -2.7223100914852694e-05, -2.5370965886395425e-05, -2.3518830857938156e-05, -2.166669764847029e-05, -1.9814560801023617e-05, -1.796242577256635e-05, -1.6110292563098483e-05, -1.4258157534641214e-05, -1.2406024325173348e-05, -1.0553889296716079e-05, -8.701755177753512e-06, -6.8496210587909445e-06, -4.997487849323079e-06, -3.1453537303605117e-06, -1.293219384024269e-06, 5.589149623119738e-07, 2.411049081274541e-06, 4.263183654984459e-06, 6.115317773947027e-06, 7.967451892909594e-06, 9.819586011872161e-06, 1.1671720130834728e-05, 1.3523854249797296e-05, 1.5375988368759863e-05, 1.7228123397217132e-05, 1.9080256606684998e-05, 2.0932391635142267e-05, 2.2784526663599536e-05, 2.46366598730674e-05]}, "gradients/decoder.transformer.h.14.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 1.0, 1.0, 3.0, 1.0, 5.0, 4.0, 0.0, 0.0, 3.0, 5.0, 8.0, 8.0, 6.0, 5.0, 13.0, 10.0, 19.0, 16.0, 12.0, 29.0, 14.0, 20.0, 18.0, 31.0, 27.0, 46.0, 29.0, 42.0, 29.0, 34.0, 43.0, 35.0, 43.0, 42.0, 47.0, 34.0, 50.0, 35.0, 30.0, 22.0, 38.0, 21.0, 21.0, 24.0, 14.0, 17.0, 6.0, 11.0, 6.0, 15.0, 7.0, 0.0, 8.0, 2.0, 5.0, 0.0, 1.0, 1.0, 2.0, 2.0], "bins": [-3.057718276977539e-05, -2.9704533517360687e-05, -2.8831884264945984e-05, -2.795923501253128e-05, -2.7086585760116577e-05, -2.6213936507701874e-05, -2.534128725528717e-05, -2.4468638002872467e-05, -2.3595988750457764e-05, -2.272333949804306e-05, -2.1850690245628357e-05, -2.0978040993213654e-05, -2.010539174079895e-05, -1.9232742488384247e-05, -1.8360093235969543e-05, -1.748744398355484e-05, -1.6614794731140137e-05, -1.5742145478725433e-05, -1.486949622631073e-05, -1.3996846973896027e-05, -1.3124197721481323e-05, -1.225154846906662e-05, -1.1378899216651917e-05, -1.0506249964237213e-05, -9.63360071182251e-06, -8.760951459407806e-06, -7.888302206993103e-06, -7.0156529545784e-06, -6.143003702163696e-06, -5.270354449748993e-06, -4.3977051973342896e-06, -3.525055944919586e-06, -2.652406692504883e-06, -1.7797574400901794e-06, -9.071081876754761e-07, -3.4458935260772705e-08, 8.381903171539307e-07, 1.710839569568634e-06, 2.5834888219833374e-06, 3.4561380743980408e-06, 4.328787326812744e-06, 5.2014365792274475e-06, 6.074085831642151e-06, 6.946735084056854e-06, 7.819384336471558e-06, 8.692033588886261e-06, 9.564682841300964e-06, 1.0437332093715668e-05, 1.1309981346130371e-05, 1.2182630598545074e-05, 1.3055279850959778e-05, 1.3927929103374481e-05, 1.4800578355789185e-05, 1.5673227608203888e-05, 1.654587686061859e-05, 1.7418526113033295e-05, 1.8291175365447998e-05, 1.91638246178627e-05, 2.0036473870277405e-05, 2.0909123122692108e-05, 2.178177237510681e-05, 2.2654421627521515e-05, 2.3527070879936218e-05, 2.439972013235092e-05, 2.5272369384765625e-05]}, "gradients/decoder.transformer.h.14.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 1.0, 4.0, 0.0, 0.0, 6.0, 1.0, 6.0, 10.0, 11.0, 12.0, 15.0, 8.0, 17.0, 25.0, 23.0, 29.0, 35.0, 22.0, 39.0, 41.0, 46.0, 42.0, 44.0, 46.0, 49.0, 51.0, 29.0, 42.0, 33.0, 42.0, 37.0, 27.0, 38.0, 22.0, 27.0, 28.0, 19.0, 16.0, 13.0, 17.0, 11.0, 9.0, 5.0, 4.0, 4.0, 3.0, 0.0, 2.0, 3.0, 0.0, 2.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-32.65625, -31.48291015625, -30.3095703125, -29.13623046875, -27.962890625, -26.78955078125, -25.6162109375, -24.44287109375, -23.26953125, -22.09619140625, -20.9228515625, -19.74951171875, -18.576171875, -17.40283203125, -16.2294921875, -15.05615234375, -13.8828125, -12.70947265625, -11.5361328125, -10.36279296875, -9.189453125, -8.01611328125, -6.8427734375, -5.66943359375, -4.49609375, -3.32275390625, -2.1494140625, -0.97607421875, 0.197265625, 1.37060546875, 2.5439453125, 3.71728515625, 4.890625, 6.06396484375, 7.2373046875, 8.41064453125, 9.583984375, 10.75732421875, 11.9306640625, 13.10400390625, 14.27734375, 15.45068359375, 16.6240234375, 17.79736328125, 18.970703125, 20.14404296875, 21.3173828125, 22.49072265625, 23.6640625, 24.83740234375, 26.0107421875, 27.18408203125, 28.357421875, 29.53076171875, 30.7041015625, 31.87744140625, 33.05078125, 34.22412109375, 35.3974609375, 36.57080078125, 37.744140625, 38.91748046875, 40.0908203125, 41.26416015625, 42.4375]}, "gradients/decoder.transformer.h.14.attn.c_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 1.0, 5.0, 5.0, 3.0, 9.0, 14.0, 9.0, 14.0, 17.0, 23.0, 33.0, 44.0, 59.0, 76.0, 96.0, 142.0, 189.0, 250.0, 378.0, 442.0, 624.0, 842.0, 1107.0, 1563.0, 2348.0, 4294.0, 11985.0, 60328.0, 505009.0, 388034.0, 48362.0, 10406.0, 3929.0, 2195.0, 1486.0, 1077.0, 838.0, 572.0, 456.0, 326.0, 263.0, 196.0, 132.0, 104.0, 75.0, 54.0, 43.0, 29.0, 20.0, 14.0, 11.0, 10.0, 8.0, 5.0, 3.0, 6.0, 2.0, 5.0, 1.0, 0.0, 2.0], "bins": [-29.140625, -28.228271484375, -27.31591796875, -26.403564453125, -25.4912109375, -24.578857421875, -23.66650390625, -22.754150390625, -21.841796875, -20.929443359375, -20.01708984375, -19.104736328125, -18.1923828125, -17.280029296875, -16.36767578125, -15.455322265625, -14.54296875, -13.630615234375, -12.71826171875, -11.805908203125, -10.8935546875, -9.981201171875, -9.06884765625, -8.156494140625, -7.244140625, -6.331787109375, -5.41943359375, -4.507080078125, -3.5947265625, -2.682373046875, -1.77001953125, -0.857666015625, 0.0546875, 0.967041015625, 1.87939453125, 2.791748046875, 3.7041015625, 4.616455078125, 5.52880859375, 6.441162109375, 7.353515625, 8.265869140625, 9.17822265625, 10.090576171875, 11.0029296875, 11.915283203125, 12.82763671875, 13.739990234375, 14.65234375, 15.564697265625, 16.47705078125, 17.389404296875, 18.3017578125, 19.214111328125, 20.12646484375, 21.038818359375, 21.951171875, 22.863525390625, 23.77587890625, 24.688232421875, 25.6005859375, 26.512939453125, 27.42529296875, 28.337646484375, 29.25]}, "gradients/decoder.transformer.h.14.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 2.0, 2.0, 1.0, 2.0, 0.0, 1.0, 6.0, 4.0, 5.0, 7.0, 17.0, 13.0, 14.0, 10.0, 14.0, 10.0, 16.0, 33.0, 26.0, 37.0, 37.0, 40.0, 30.0, 44.0, 45.0, 64.0, 321.0, 1715.0, 75.0, 50.0, 43.0, 44.0, 48.0, 47.0, 33.0, 37.0, 27.0, 34.0, 16.0, 23.0, 11.0, 9.0, 3.0, 8.0, 13.0, 8.0, 7.0, 4.0, 5.0, 2.0, 0.0, 3.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-105.25, -101.8642578125, -98.478515625, -95.0927734375, -91.70703125, -88.3212890625, -84.935546875, -81.5498046875, -78.1640625, -74.7783203125, -71.392578125, -68.0068359375, -64.62109375, -61.2353515625, -57.849609375, -54.4638671875, -51.078125, -47.6923828125, -44.306640625, -40.9208984375, -37.53515625, -34.1494140625, -30.763671875, -27.3779296875, -23.9921875, -20.6064453125, -17.220703125, -13.8349609375, -10.44921875, -7.0634765625, -3.677734375, -0.2919921875, 3.09375, 6.4794921875, 9.865234375, 13.2509765625, 16.63671875, 20.0224609375, 23.408203125, 26.7939453125, 30.1796875, 33.5654296875, 36.951171875, 40.3369140625, 43.72265625, 47.1083984375, 50.494140625, 53.8798828125, 57.265625, 60.6513671875, 64.037109375, 67.4228515625, 70.80859375, 74.1943359375, 77.580078125, 80.9658203125, 84.3515625, 87.7373046875, 91.123046875, 94.5087890625, 97.89453125, 101.2802734375, 104.666015625, 108.0517578125, 111.4375]}, "gradients/decoder.transformer.h.14.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 3.0, 5.0, 3.0, 10.0, 7.0, 13.0, 9.0, 5.0, 10.0, 11.0, 32.0, 34.0, 52.0, 56.0, 68.0, 75.0, 114.0, 146.0, 194.0, 317.0, 915.0, 5423.0, 2946235.0, 186753.0, 3546.0, 709.0, 257.0, 153.0, 106.0, 83.0, 96.0, 63.0, 51.0, 33.0, 17.0, 20.0, 17.0, 16.0, 10.0, 20.0, 10.0, 4.0, 5.0, 4.0, 2.0, 0.0, 2.0, 1.0, 1.0, 3.0, 0.0, 1.0, 0.0, 1.0], "bins": [-244.0, -236.58203125, -229.1640625, -221.74609375, -214.328125, -206.91015625, -199.4921875, -192.07421875, -184.65625, -177.23828125, -169.8203125, -162.40234375, -154.984375, -147.56640625, -140.1484375, -132.73046875, -125.3125, -117.89453125, -110.4765625, -103.05859375, -95.640625, -88.22265625, -80.8046875, -73.38671875, -65.96875, -58.55078125, -51.1328125, -43.71484375, -36.296875, -28.87890625, -21.4609375, -14.04296875, -6.625, 0.79296875, 8.2109375, 15.62890625, 23.046875, 30.46484375, 37.8828125, 45.30078125, 52.71875, 60.13671875, 67.5546875, 74.97265625, 82.390625, 89.80859375, 97.2265625, 104.64453125, 112.0625, 119.48046875, 126.8984375, 134.31640625, 141.734375, 149.15234375, 156.5703125, 163.98828125, 171.40625, 178.82421875, 186.2421875, 193.66015625, 201.078125, 208.49609375, 215.9140625, 223.33203125, 230.75]}, "gradients/decoder.transformer.h.14.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 4.0, 18.0, 312.0, 556.0, 114.0, 12.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-695.890869140625, -680.5185546875, -665.1463012695312, -649.7739868164062, -634.4016723632812, -619.0294189453125, -603.6571044921875, -588.2847900390625, -572.9125366210938, -557.5402221679688, -542.16796875, -526.795654296875, -511.4233703613281, -496.05108642578125, -480.67877197265625, -465.3064880371094, -449.9341735839844, -434.5618896484375, -419.1895751953125, -403.8172912597656, -388.44500732421875, -373.07269287109375, -357.7004089355469, -342.328125, -326.955810546875, -311.5835266113281, -296.2112121582031, -280.83892822265625, -265.4666442871094, -250.09434509277344, -234.7220458984375, -219.34976196289062, -203.97747802734375, -188.6051788330078, -173.23289489746094, -157.860595703125, -142.48831176757812, -127.11601257324219, -111.74371337890625, -96.37142181396484, -80.99913024902344, -65.62683868408203, -50.25454330444336, -34.88224792480469, -19.50995635986328, -4.137664794921875, 11.234634399414062, 26.60692596435547, 41.979217529296875, 57.35150909423828, 72.72380065917969, 88.09609985351562, 103.46839141845703, 118.84068298339844, 134.21298217773438, 149.58526611328125, 164.9575653076172, 180.32986450195312, 195.7021484375, 211.07444763183594, 226.44674682617188, 241.81903076171875, 257.19134521484375, 272.5636291503906, 287.9359130859375]}, "gradients/decoder.transformer.h.14.ln_1.bias": {"_type": "histogram", "values": [1.0, 1.0, 3.0, 2.0, 1.0, 3.0, 5.0, 2.0, 2.0, 6.0, 6.0, 8.0, 10.0, 14.0, 13.0, 17.0, 23.0, 20.0, 22.0, 28.0, 24.0, 34.0, 24.0, 45.0, 43.0, 46.0, 49.0, 44.0, 46.0, 44.0, 38.0, 46.0, 32.0, 46.0, 45.0, 34.0, 29.0, 30.0, 16.0, 19.0, 17.0, 18.0, 11.0, 13.0, 8.0, 7.0, 7.0, 4.0, 5.0, 2.0, 1.0, 1.0, 2.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0], "bins": [-295.79742431640625, -285.4211730957031, -275.0448913574219, -264.66864013671875, -254.29237365722656, -243.91610717773438, -233.53985595703125, -223.16358947753906, -212.78732299804688, -202.4110565185547, -192.0347900390625, -181.65853881835938, -171.2822723388672, -160.906005859375, -150.52975463867188, -140.1534881591797, -129.7772216796875, -119.40095520019531, -109.02469635009766, -98.6484375, -88.27217102050781, -77.89590454101562, -67.51964569091797, -57.14338684082031, -46.767120361328125, -36.3908576965332, -26.01459503173828, -15.63833236694336, -5.2620697021484375, 5.114192962646484, 15.490455627441406, 25.866714477539062, 36.242950439453125, 46.61921310424805, 56.99547576904297, 67.37173461914062, 77.74800109863281, 88.124267578125, 98.50052642822266, 108.87678527832031, 119.2530517578125, 129.6293182373047, 140.00558471679688, 150.3818359375, 160.7581024169922, 171.13436889648438, 181.5106201171875, 191.8868865966797, 202.26315307617188, 212.63941955566406, 223.01568603515625, 233.39193725585938, 243.76820373535156, 254.14447021484375, 264.5207214355469, 274.89697265625, 285.27325439453125, 295.6495056152344, 306.0257873535156, 316.40203857421875, 326.7783203125, 337.1545715332031, 347.53082275390625, 357.9071044921875, 368.2833557128906]}, "gradients/decoder.transformer.h.13.mlp.c_proj.bias": {"_type": "histogram", "values": [2.0, 2.0, 1.0, 2.0, 0.0, 0.0, 3.0, 5.0, 9.0, 5.0, 7.0, 13.0, 14.0, 10.0, 15.0, 22.0, 23.0, 28.0, 27.0, 31.0, 37.0, 31.0, 41.0, 44.0, 45.0, 50.0, 42.0, 47.0, 44.0, 37.0, 37.0, 39.0, 34.0, 31.0, 32.0, 27.0, 29.0, 29.0, 22.0, 16.0, 16.0, 20.0, 8.0, 15.0, 7.0, 5.0, 2.0, 2.0, 4.0, 2.0, 0.0, 3.0, 1.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-32.3125, -31.13037109375, -29.9482421875, -28.76611328125, -27.583984375, -26.40185546875, -25.2197265625, -24.03759765625, -22.85546875, -21.67333984375, -20.4912109375, -19.30908203125, -18.126953125, -16.94482421875, -15.7626953125, -14.58056640625, -13.3984375, -12.21630859375, -11.0341796875, -9.85205078125, -8.669921875, -7.48779296875, -6.3056640625, -5.12353515625, -3.94140625, -2.75927734375, -1.5771484375, -0.39501953125, 0.787109375, 1.96923828125, 3.1513671875, 4.33349609375, 5.515625, 6.69775390625, 7.8798828125, 9.06201171875, 10.244140625, 11.42626953125, 12.6083984375, 13.79052734375, 14.97265625, 16.15478515625, 17.3369140625, 18.51904296875, 19.701171875, 20.88330078125, 22.0654296875, 23.24755859375, 24.4296875, 25.61181640625, 26.7939453125, 27.97607421875, 29.158203125, 30.34033203125, 31.5224609375, 32.70458984375, 33.88671875, 35.06884765625, 36.2509765625, 37.43310546875, 38.615234375, 39.79736328125, 40.9794921875, 42.16162109375, 43.34375]}, "gradients/decoder.transformer.h.13.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 2.0, 1.0, 3.0, 2.0, 5.0, 8.0, 6.0, 13.0, 14.0, 22.0, 31.0, 44.0, 57.0, 67.0, 97.0, 149.0, 206.0, 281.0, 414.0, 630.0, 1000.0, 1701.0, 3209.0, 6550.0, 15923.0, 107923.0, 3344770.0, 661933.0, 28622.0, 9761.0, 4614.0, 2312.0, 1280.0, 806.0, 556.0, 368.0, 255.0, 181.0, 128.0, 88.0, 62.0, 53.0, 34.0, 30.0, 24.0, 14.0, 11.0, 8.0, 13.0, 3.0, 5.0, 2.0, 2.0, 3.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0], "bins": [-111.25, -107.509765625, -103.76953125, -100.029296875, -96.2890625, -92.548828125, -88.80859375, -85.068359375, -81.328125, -77.587890625, -73.84765625, -70.107421875, -66.3671875, -62.626953125, -58.88671875, -55.146484375, -51.40625, -47.666015625, -43.92578125, -40.185546875, -36.4453125, -32.705078125, -28.96484375, -25.224609375, -21.484375, -17.744140625, -14.00390625, -10.263671875, -6.5234375, -2.783203125, 0.95703125, 4.697265625, 8.4375, 12.177734375, 15.91796875, 19.658203125, 23.3984375, 27.138671875, 30.87890625, 34.619140625, 38.359375, 42.099609375, 45.83984375, 49.580078125, 53.3203125, 57.060546875, 60.80078125, 64.541015625, 68.28125, 72.021484375, 75.76171875, 79.501953125, 83.2421875, 86.982421875, 90.72265625, 94.462890625, 98.203125, 101.943359375, 105.68359375, 109.423828125, 113.1640625, 116.904296875, 120.64453125, 124.384765625, 128.125]}, "gradients/decoder.transformer.h.13.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0, 2.0, 4.0, 4.0, 3.0, 3.0, 6.0, 4.0, 18.0, 12.0, 17.0, 21.0, 30.0, 28.0, 36.0, 43.0, 109.0, 251.0, 673.0, 1360.0, 771.0, 353.0, 120.0, 44.0, 42.0, 26.0, 25.0, 14.0, 12.0, 10.0, 10.0, 4.0, 7.0, 5.0, 6.0, 0.0, 2.0, 1.0, 2.0, 3.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-100.625, -97.4375, -94.25, -91.0625, -87.875, -84.6875, -81.5, -78.3125, -75.125, -71.9375, -68.75, -65.5625, -62.375, -59.1875, -56.0, -52.8125, -49.625, -46.4375, -43.25, -40.0625, -36.875, -33.6875, -30.5, -27.3125, -24.125, -20.9375, -17.75, -14.5625, -11.375, -8.1875, -5.0, -1.8125, 1.375, 4.5625, 7.75, 10.9375, 14.125, 17.3125, 20.5, 23.6875, 26.875, 30.0625, 33.25, 36.4375, 39.625, 42.8125, 46.0, 49.1875, 52.375, 55.5625, 58.75, 61.9375, 65.125, 68.3125, 71.5, 74.6875, 77.875, 81.0625, 84.25, 87.4375, 90.625, 93.8125, 97.0, 100.1875, 103.375]}, "gradients/decoder.transformer.h.13.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 1.0, 1.0, 1.0, 2.0, 4.0, 2.0, 2.0, 5.0, 11.0, 9.0, 17.0, 14.0, 28.0, 51.0, 86.0, 168.0, 412.0, 1004.0, 2404.0, 7092.0, 25398.0, 185188.0, 3862281.0, 84823.0, 16940.0, 5056.0, 1908.0, 664.0, 332.0, 149.0, 84.0, 52.0, 33.0, 29.0, 11.0, 6.0, 7.0, 5.0, 5.0, 2.0, 4.0, 2.0, 1.0, 4.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-249.75, -242.03125, -234.3125, -226.59375, -218.875, -211.15625, -203.4375, -195.71875, -188.0, -180.28125, -172.5625, -164.84375, -157.125, -149.40625, -141.6875, -133.96875, -126.25, -118.53125, -110.8125, -103.09375, -95.375, -87.65625, -79.9375, -72.21875, -64.5, -56.78125, -49.0625, -41.34375, -33.625, -25.90625, -18.1875, -10.46875, -2.75, 4.96875, 12.6875, 20.40625, 28.125, 35.84375, 43.5625, 51.28125, 59.0, 66.71875, 74.4375, 82.15625, 89.875, 97.59375, 105.3125, 113.03125, 120.75, 128.46875, 136.1875, 143.90625, 151.625, 159.34375, 167.0625, 174.78125, 182.5, 190.21875, 197.9375, 205.65625, 213.375, 221.09375, 228.8125, 236.53125, 244.25]}, "gradients/decoder.transformer.h.13.ln_2.weight": {"_type": "histogram", "values": [3.0, 3.0, 1.0, 8.0, 8.0, 12.0, 24.0, 18.0, 51.0, 70.0, 140.0, 280.0, 203.0, 89.0, 53.0, 21.0, 14.0, 11.0, 8.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-250.4799346923828, -229.0304412841797, -207.58096313476562, -186.1314697265625, -164.68197631835938, -143.2324981689453, -121.78300476074219, -100.33352661132812, -78.884033203125, -57.434547424316406, -35.98505783081055, -14.535568237304688, 6.913917541503906, 28.3634033203125, 49.812896728515625, 71.26237487792969, 92.71186828613281, 114.1613540649414, 135.61083984375, 157.06033325195312, 178.50982666015625, 199.9593048095703, 221.40879821777344, 242.8582763671875, 264.3077697753906, 285.75726318359375, 307.2067565917969, 328.65625, 350.105712890625, 371.5552062988281, 393.00469970703125, 414.45416259765625, 435.90362548828125, 457.3531188964844, 478.8026123046875, 500.2520751953125, 521.7015991210938, 543.1510620117188, 564.6005859375, 586.050048828125, 607.49951171875, 628.948974609375, 650.3984985351562, 671.8479614257812, 693.2974853515625, 714.7469482421875, 736.1964111328125, 757.6459350585938, 779.095458984375, 800.544921875, 821.9944458007812, 843.4439086914062, 864.8934326171875, 886.3428955078125, 907.7923583984375, 929.2418823242188, 950.6913452148438, 972.1408081054688, 993.59033203125, 1015.039794921875, 1036.4892578125, 1057.938720703125, 1079.3883056640625, 1100.8377685546875, 1122.2872314453125]}, "gradients/decoder.transformer.h.13.ln_2.bias": {"_type": "histogram", "values": [1.0, 2.0, 2.0, 1.0, 0.0, 1.0, 1.0, 2.0, 5.0, 8.0, 6.0, 8.0, 3.0, 8.0, 13.0, 11.0, 9.0, 19.0, 17.0, 13.0, 27.0, 23.0, 31.0, 14.0, 38.0, 43.0, 38.0, 42.0, 40.0, 48.0, 43.0, 42.0, 26.0, 35.0, 35.0, 41.0, 24.0, 38.0, 33.0, 26.0, 29.0, 23.0, 21.0, 19.0, 20.0, 16.0, 16.0, 8.0, 8.0, 10.0, 3.0, 8.0, 0.0, 4.0, 6.0, 3.0, 0.0, 4.0, 4.0, 1.0, 1.0, 1.0, 0.0, 2.0], "bins": [-208.52981567382812, -201.77549743652344, -195.0211639404297, -188.266845703125, -181.5125274658203, -174.75820922851562, -168.00387573242188, -161.2495574951172, -154.4952392578125, -147.7409210205078, -140.98658752441406, -134.23226928710938, -127.47795104980469, -120.72362518310547, -113.96929931640625, -107.21498107910156, -100.46064758300781, -93.7063217163086, -86.9520034790039, -80.19767761230469, -73.443359375, -66.68903350830078, -59.93470764160156, -53.18038558959961, -46.426063537597656, -39.6717414855957, -32.91741943359375, -26.16309356689453, -19.408771514892578, -12.654449462890625, -5.900123596191406, 0.8541984558105469, 7.6085205078125, 14.36284351348877, 21.11716651916504, 27.871490478515625, 34.62581253051758, 41.38013458251953, 48.13446044921875, 54.8887825012207, 61.643104553222656, 68.39743041992188, 75.15174865722656, 81.90607452392578, 88.660400390625, 95.41471862792969, 102.1690444946289, 108.92337036132812, 115.67768859863281, 122.43201446533203, 129.18634033203125, 135.94065856933594, 142.69497680664062, 149.44931030273438, 156.20362854003906, 162.95794677734375, 169.7122802734375, 176.4665985107422, 183.22093200683594, 189.97525024414062, 196.7295684814453, 203.48388671875, 210.23822021484375, 216.99253845214844, 223.74685668945312]}, "gradients/decoder.transformer.h.13.crossattention.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 2.0, 1.0, 2.0, 2.0, 2.0, 6.0, 9.0, 11.0, 6.0, 8.0, 14.0, 12.0, 15.0, 20.0, 12.0, 20.0, 28.0, 30.0, 29.0, 43.0, 35.0, 30.0, 50.0, 35.0, 35.0, 52.0, 49.0, 43.0, 43.0, 37.0, 27.0, 38.0, 28.0, 34.0, 38.0, 25.0, 25.0, 16.0, 21.0, 17.0, 12.0, 8.0, 11.0, 10.0, 4.0, 4.0, 4.0, 8.0, 3.0, 3.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0], "bins": [-33.46875, -32.3173828125, -31.166015625, -30.0146484375, -28.86328125, -27.7119140625, -26.560546875, -25.4091796875, -24.2578125, -23.1064453125, -21.955078125, -20.8037109375, -19.65234375, -18.5009765625, -17.349609375, -16.1982421875, -15.046875, -13.8955078125, -12.744140625, -11.5927734375, -10.44140625, -9.2900390625, -8.138671875, -6.9873046875, -5.8359375, -4.6845703125, -3.533203125, -2.3818359375, -1.23046875, -0.0791015625, 1.072265625, 2.2236328125, 3.375, 4.5263671875, 5.677734375, 6.8291015625, 7.98046875, 9.1318359375, 10.283203125, 11.4345703125, 12.5859375, 13.7373046875, 14.888671875, 16.0400390625, 17.19140625, 18.3427734375, 19.494140625, 20.6455078125, 21.796875, 22.9482421875, 24.099609375, 25.2509765625, 26.40234375, 27.5537109375, 28.705078125, 29.8564453125, 31.0078125, 32.1591796875, 33.310546875, 34.4619140625, 35.61328125, 36.7646484375, 37.916015625, 39.0673828125, 40.21875]}, "gradients/decoder.transformer.h.13.crossattention.c_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 2.0, 0.0, 1.0, 2.0, 3.0, 5.0, 3.0, 4.0, 12.0, 8.0, 11.0, 33.0, 37.0, 59.0, 81.0, 138.0, 190.0, 283.0, 386.0, 601.0, 948.0, 1321.0, 1915.0, 2915.0, 4177.0, 6337.0, 9697.0, 14948.0, 23761.0, 37484.0, 62092.0, 109445.0, 285302.0, 227461.0, 101411.0, 58079.0, 35279.0, 22035.0, 14275.0, 9318.0, 5993.0, 4072.0, 2729.0, 1842.0, 1249.0, 789.0, 576.0, 412.0, 276.0, 185.0, 110.0, 87.0, 53.0, 44.0, 31.0, 23.0, 14.0, 13.0, 7.0, 2.0, 4.0, 4.0], "bins": [-1.4638671875, -1.421966552734375, -1.38006591796875, -1.338165283203125, -1.2962646484375, -1.254364013671875, -1.21246337890625, -1.170562744140625, -1.128662109375, -1.086761474609375, -1.04486083984375, -1.002960205078125, -0.9610595703125, -0.919158935546875, -0.87725830078125, -0.835357666015625, -0.79345703125, -0.751556396484375, -0.70965576171875, -0.667755126953125, -0.6258544921875, -0.583953857421875, -0.54205322265625, -0.500152587890625, -0.458251953125, -0.416351318359375, -0.37445068359375, -0.332550048828125, -0.2906494140625, -0.248748779296875, -0.20684814453125, -0.164947509765625, -0.123046875, -0.081146240234375, -0.03924560546875, 0.002655029296875, 0.0445556640625, 0.086456298828125, 0.12835693359375, 0.170257568359375, 0.212158203125, 0.254058837890625, 0.29595947265625, 0.337860107421875, 0.3797607421875, 0.421661376953125, 0.46356201171875, 0.505462646484375, 0.54736328125, 0.589263916015625, 0.63116455078125, 0.673065185546875, 0.7149658203125, 0.756866455078125, 0.79876708984375, 0.840667724609375, 0.882568359375, 0.924468994140625, 0.96636962890625, 1.008270263671875, 1.0501708984375, 1.092071533203125, 1.13397216796875, 1.175872802734375, 1.2177734375]}, "gradients/decoder.transformer.h.13.crossattention.c_attn.bias": {"_type": "histogram", "values": [2.0, 1.0, 2.0, 1.0, 1.0, 3.0, 5.0, 3.0, 7.0, 3.0, 8.0, 11.0, 10.0, 15.0, 15.0, 21.0, 22.0, 14.0, 24.0, 26.0, 22.0, 33.0, 45.0, 34.0, 35.0, 38.0, 39.0, 39.0, 32.0, 1071.0, 45.0, 33.0, 35.0, 32.0, 34.0, 34.0, 28.0, 29.0, 28.0, 21.0, 23.0, 17.0, 17.0, 10.0, 15.0, 14.0, 9.0, 12.0, 6.0, 5.0, 1.0, 0.0, 3.0, 1.0, 3.0, 5.0, 0.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-19.734375, -19.055419921875, -18.37646484375, -17.697509765625, -17.0185546875, -16.339599609375, -15.66064453125, -14.981689453125, -14.302734375, -13.623779296875, -12.94482421875, -12.265869140625, -11.5869140625, -10.907958984375, -10.22900390625, -9.550048828125, -8.87109375, -8.192138671875, -7.51318359375, -6.834228515625, -6.1552734375, -5.476318359375, -4.79736328125, -4.118408203125, -3.439453125, -2.760498046875, -2.08154296875, -1.402587890625, -0.7236328125, -0.044677734375, 0.63427734375, 1.313232421875, 1.9921875, 2.671142578125, 3.35009765625, 4.029052734375, 4.7080078125, 5.386962890625, 6.06591796875, 6.744873046875, 7.423828125, 8.102783203125, 8.78173828125, 9.460693359375, 10.1396484375, 10.818603515625, 11.49755859375, 12.176513671875, 12.85546875, 13.534423828125, 14.21337890625, 14.892333984375, 15.5712890625, 16.250244140625, 16.92919921875, 17.608154296875, 18.287109375, 18.966064453125, 19.64501953125, 20.323974609375, 21.0029296875, 21.681884765625, 22.36083984375, 23.039794921875, 23.71875]}, "gradients/decoder.transformer.h.13.crossattention.c_attn.weight": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 1.0, 3.0, 8.0, 9.0, 11.0, 26.0, 25.0, 26.0, 44.0, 64.0, 88.0, 125.0, 198.0, 276.0, 408.0, 640.0, 886.0, 1249.0, 1803.0, 2607.0, 4059.0, 6044.0, 9091.0, 13531.0, 20978.0, 32871.0, 52618.0, 87603.0, 167808.0, 1365369.0, 129052.0, 73164.0, 44669.0, 27761.0, 18165.0, 11810.0, 7914.0, 5244.0, 3470.0, 2345.0, 1566.0, 1090.0, 725.0, 520.0, 346.0, 269.0, 166.0, 112.0, 76.0, 69.0, 49.0, 29.0, 20.0, 16.0, 13.0, 9.0, 3.0, 3.0, 2.0, 1.0, 1.0], "bins": [-0.970703125, -0.940673828125, -0.91064453125, -0.880615234375, -0.8505859375, -0.820556640625, -0.79052734375, -0.760498046875, -0.73046875, -0.700439453125, -0.67041015625, -0.640380859375, -0.6103515625, -0.580322265625, -0.55029296875, -0.520263671875, -0.490234375, -0.460205078125, -0.43017578125, -0.400146484375, -0.3701171875, -0.340087890625, -0.31005859375, -0.280029296875, -0.25, -0.219970703125, -0.18994140625, -0.159912109375, -0.1298828125, -0.099853515625, -0.06982421875, -0.039794921875, -0.009765625, 0.020263671875, 0.05029296875, 0.080322265625, 0.1103515625, 0.140380859375, 0.17041015625, 0.200439453125, 0.23046875, 0.260498046875, 0.29052734375, 0.320556640625, 0.3505859375, 0.380615234375, 0.41064453125, 0.440673828125, 0.470703125, 0.500732421875, 0.53076171875, 0.560791015625, 0.5908203125, 0.620849609375, 0.65087890625, 0.680908203125, 0.7109375, 0.740966796875, 0.77099609375, 0.801025390625, 0.8310546875, 0.861083984375, 0.89111328125, 0.921142578125, 0.951171875]}, "gradients/decoder.transformer.h.13.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 2.0, 1.0, 2.0, 2.0, 2.0, 1.0, 1.0, 2.0, 4.0, 1.0, 5.0, 4.0, 3.0, 9.0, 9.0, 8.0, 26.0, 23.0, 31.0, 42.0, 70.0, 120.0, 332.0, 87.0, 73.0, 31.0, 29.0, 27.0, 14.0, 12.0, 6.0, 9.0, 7.0, 4.0, 3.0, 1.0, 2.0, 5.0, 4.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-7.987022399902344e-05, -7.731374353170395e-05, -7.475726306438446e-05, -7.220078259706497e-05, -6.964430212974548e-05, -6.7087821662426e-05, -6.45313411951065e-05, -6.197486072778702e-05, -5.941838026046753e-05, -5.686189979314804e-05, -5.430541932582855e-05, -5.1748938858509064e-05, -4.9192458391189575e-05, -4.663597792387009e-05, -4.40794974565506e-05, -4.152301698923111e-05, -3.896653652191162e-05, -3.641005605459213e-05, -3.3853575587272644e-05, -3.1297095119953156e-05, -2.8740614652633667e-05, -2.618413418531418e-05, -2.362765371799469e-05, -2.10711732506752e-05, -1.8514692783355713e-05, -1.5958212316036224e-05, -1.3401731848716736e-05, -1.0845251381397247e-05, -8.288770914077759e-06, -5.73229044675827e-06, -3.1758099794387817e-06, -6.193295121192932e-07, 1.9371509552001953e-06, 4.493631422519684e-06, 7.050111889839172e-06, 9.606592357158661e-06, 1.216307282447815e-05, 1.4719553291797638e-05, 1.7276033759117126e-05, 1.9832514226436615e-05, 2.2388994693756104e-05, 2.4945475161075592e-05, 2.750195562839508e-05, 3.005843609571457e-05, 3.261491656303406e-05, 3.5171397030353546e-05, 3.7727877497673035e-05, 4.028435796499252e-05, 4.284083843231201e-05, 4.53973188996315e-05, 4.795379936695099e-05, 5.051027983427048e-05, 5.3066760301589966e-05, 5.5623240768909454e-05, 5.817972123622894e-05, 6.073620170354843e-05, 6.329268217086792e-05, 6.584916263818741e-05, 6.84056431055069e-05, 7.096212357282639e-05, 7.351860404014587e-05, 7.607508450746536e-05, 7.863156497478485e-05, 8.118804544210434e-05, 8.374452590942383e-05]}, "gradients/decoder.transformer.h.13.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 3.0, 3.0, 2.0, 3.0, 5.0, 6.0, 8.0, 8.0, 10.0, 19.0, 22.0, 34.0, 60.0, 114.0, 555.0, 6154.0, 986418.0, 53136.0, 1549.0, 247.0, 71.0, 34.0, 32.0, 14.0, 11.0, 10.0, 8.0, 3.0, 3.0, 4.0, 5.0, 3.0, 3.0, 1.0, 3.0, 1.0, 1.0, 1.0, 3.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.001468658447265625, -0.0014238208532333374, -0.0013789832592010498, -0.0013341456651687622, -0.0012893080711364746, -0.001244470477104187, -0.0011996328830718994, -0.0011547952890396118, -0.0011099576950073242, -0.0010651201009750366, -0.001020282506942749, -0.0009754449129104614, -0.0009306073188781738, -0.0008857697248458862, -0.0008409321308135986, -0.000796094536781311, -0.0007512569427490234, -0.0007064193487167358, -0.0006615817546844482, -0.0006167441606521606, -0.000571906566619873, -0.0005270689725875854, -0.00048223137855529785, -0.00043739378452301025, -0.00039255619049072266, -0.00034771859645843506, -0.00030288100242614746, -0.00025804340839385986, -0.00021320581436157227, -0.00016836822032928467, -0.00012353062629699707, -7.869303226470947e-05, -3.3855438232421875e-05, 1.0982155799865723e-05, 5.581974983215332e-05, 0.00010065734386444092, 0.00014549493789672852, 0.0001903325319290161, 0.0002351701259613037, 0.0002800077199935913, 0.0003248453140258789, 0.0003696829080581665, 0.0004145205020904541, 0.0004593580961227417, 0.0005041956901550293, 0.0005490332841873169, 0.0005938708782196045, 0.0006387084722518921, 0.0006835460662841797, 0.0007283836603164673, 0.0007732212543487549, 0.0008180588483810425, 0.0008628964424133301, 0.0009077340364456177, 0.0009525716304779053, 0.0009974092245101929, 0.0010422468185424805, 0.001087084412574768, 0.0011319220066070557, 0.0011767596006393433, 0.0012215971946716309, 0.0012664347887039185, 0.001311272382736206, 0.0013561099767684937, 0.0014009475708007812]}, "gradients/decoder.transformer.h.13.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 4.0, 1.0, 5.0, 6.0, 24.0, 42.0, 80.0, 246.0, 388.0, 133.0, 39.0, 25.0, 13.0, 8.0, 2.0, 2.0, 0.0, 0.0, 1.0], "bins": [-0.00015841747517697513, -0.00015544008056167513, -0.0001524626713944599, -0.0001494852767791599, -0.0001465078821638599, -0.0001435304875485599, -0.00014055307838134468, -0.00013757568376604468, -0.00013459828915074468, -0.00013162089453544468, -0.00012864348536822945, -0.00012566609075292945, -0.00012268869613762945, -0.00011971129424637184, -0.00011673389235511422, -0.00011375649773981422, -0.000110779088572599, -0.00010780168668134138, -0.00010482429206604138, -0.00010184689017478377, -9.886949555948377e-05, -9.589209366822615e-05, -9.291469177696854e-05, -8.993729716166854e-05, -8.695990254636854e-05, -8.398250065511093e-05, -8.100510603981093e-05, -7.802770414855331e-05, -7.505030953325331e-05, -7.20729076419957e-05, -6.909550575073808e-05, -6.611811113543808e-05, -6.314070924418047e-05, -6.0163310990901664e-05, -5.718591273762286e-05, -5.420851084636524e-05, -5.1231112593086436e-05, -4.825371433980763e-05, -4.527631608652882e-05, -4.2298917833250016e-05, -3.93215159419924e-05, -3.6344117688713595e-05, -3.336671943543479e-05, -3.0389319363166578e-05, -2.7411919290898368e-05, -2.443452103761956e-05, -2.1457122784340754e-05, -1.8479722712072544e-05, -1.550232627778314e-05, -1.2524927115009632e-05, -9.547527952236123e-06, -6.570129698957317e-06, -3.592730536183808e-06, -6.153313734102994e-07, 2.3620668798685074e-06, 5.339466952136718e-06, 8.316865205415525e-06, 1.1294264368189033e-05, 1.4271663530962542e-05, 1.724906178424135e-05, 2.0226460037520155e-05, 2.3203860109788366e-05, 2.6181258363067172e-05, 2.9158658435335383e-05, 3.213605668861419e-05]}, "gradients/decoder.transformer.h.13.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 3.0, 2.0, 2.0, 3.0, 4.0, 5.0, 11.0, 3.0, 11.0, 7.0, 17.0, 7.0, 16.0, 12.0, 17.0, 16.0, 23.0, 23.0, 22.0, 21.0, 29.0, 42.0, 29.0, 34.0, 35.0, 34.0, 38.0, 38.0, 39.0, 34.0, 25.0, 40.0, 39.0, 42.0, 40.0, 29.0, 21.0, 25.0, 22.0, 20.0, 24.0, 17.0, 13.0, 16.0, 13.0, 15.0, 13.0, 4.0, 7.0, 6.0, 2.0, 5.0, 3.0, 2.0, 1.0, 0.0, 0.0, 1.0, 1.0], "bins": [-2.5093555450439453e-05, -2.4335458874702454e-05, -2.3577362298965454e-05, -2.2819265723228455e-05, -2.2061169147491455e-05, -2.1303072571754456e-05, -2.0544975996017456e-05, -1.9786879420280457e-05, -1.9028782844543457e-05, -1.8270686268806458e-05, -1.7512589693069458e-05, -1.675449311733246e-05, -1.599639654159546e-05, -1.523829996585846e-05, -1.448020339012146e-05, -1.372210681438446e-05, -1.2964010238647461e-05, -1.2205913662910461e-05, -1.1447817087173462e-05, -1.0689720511436462e-05, -9.931623935699463e-06, -9.173527359962463e-06, -8.415430784225464e-06, -7.657334208488464e-06, -6.899237632751465e-06, -6.141141057014465e-06, -5.383044481277466e-06, -4.624947905540466e-06, -3.866851329803467e-06, -3.1087547540664673e-06, -2.3506581783294678e-06, -1.5925616025924683e-06, -8.344650268554688e-07, -7.636845111846924e-08, 6.817281246185303e-07, 1.4398247003555298e-06, 2.1979212760925293e-06, 2.956017851829529e-06, 3.7141144275665283e-06, 4.472211003303528e-06, 5.230307579040527e-06, 5.988404154777527e-06, 6.746500730514526e-06, 7.504597306251526e-06, 8.262693881988525e-06, 9.020790457725525e-06, 9.778887033462524e-06, 1.0536983609199524e-05, 1.1295080184936523e-05, 1.2053176760673523e-05, 1.2811273336410522e-05, 1.3569369912147522e-05, 1.4327466487884521e-05, 1.5085563063621521e-05, 1.584365963935852e-05, 1.660175621509552e-05, 1.735985279083252e-05, 1.811794936656952e-05, 1.887604594230652e-05, 1.9634142518043518e-05, 2.0392239093780518e-05, 2.1150335669517517e-05, 2.1908432245254517e-05, 2.2666528820991516e-05, 2.3424625396728516e-05]}, "gradients/decoder.transformer.h.13.attn.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 2.0, 1.0, 2.0, 2.0, 2.0, 6.0, 9.0, 11.0, 6.0, 8.0, 14.0, 12.0, 15.0, 20.0, 12.0, 20.0, 28.0, 30.0, 29.0, 43.0, 35.0, 30.0, 50.0, 35.0, 35.0, 52.0, 49.0, 43.0, 43.0, 37.0, 27.0, 38.0, 28.0, 34.0, 38.0, 25.0, 25.0, 16.0, 21.0, 17.0, 12.0, 8.0, 11.0, 10.0, 4.0, 4.0, 4.0, 8.0, 3.0, 3.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0], "bins": [-33.46875, -32.3173828125, -31.166015625, -30.0146484375, -28.86328125, -27.7119140625, -26.560546875, -25.4091796875, -24.2578125, -23.1064453125, -21.955078125, -20.8037109375, -19.65234375, -18.5009765625, -17.349609375, -16.1982421875, -15.046875, -13.8955078125, -12.744140625, -11.5927734375, -10.44140625, -9.2900390625, -8.138671875, -6.9873046875, -5.8359375, -4.6845703125, -3.533203125, -2.3818359375, -1.23046875, -0.0791015625, 1.072265625, 2.2236328125, 3.375, 4.5263671875, 5.677734375, 6.8291015625, 7.98046875, 9.1318359375, 10.283203125, 11.4345703125, 12.5859375, 13.7373046875, 14.888671875, 16.0400390625, 17.19140625, 18.3427734375, 19.494140625, 20.6455078125, 21.796875, 22.9482421875, 24.099609375, 25.2509765625, 26.40234375, 27.5537109375, 28.705078125, 29.8564453125, 31.0078125, 32.1591796875, 33.310546875, 34.4619140625, 35.61328125, 36.7646484375, 37.916015625, 39.0673828125, 40.21875]}, "gradients/decoder.transformer.h.13.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 3.0, 2.0, 7.0, 11.0, 11.0, 15.0, 16.0, 23.0, 29.0, 52.0, 56.0, 109.0, 135.0, 215.0, 274.0, 407.0, 598.0, 894.0, 1182.0, 1875.0, 2788.0, 4336.0, 8806.0, 148015.0, 843968.0, 18932.0, 5490.0, 3434.0, 2205.0, 1464.0, 955.0, 644.0, 486.0, 314.0, 231.0, 166.0, 114.0, 82.0, 56.0, 51.0, 39.0, 17.0, 18.0, 10.0, 7.0, 4.0, 10.0, 3.0, 6.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-76.1875, -73.8935546875, -71.599609375, -69.3056640625, -67.01171875, -64.7177734375, -62.423828125, -60.1298828125, -57.8359375, -55.5419921875, -53.248046875, -50.9541015625, -48.66015625, -46.3662109375, -44.072265625, -41.7783203125, -39.484375, -37.1904296875, -34.896484375, -32.6025390625, -30.30859375, -28.0146484375, -25.720703125, -23.4267578125, -21.1328125, -18.8388671875, -16.544921875, -14.2509765625, -11.95703125, -9.6630859375, -7.369140625, -5.0751953125, -2.78125, -0.4873046875, 1.806640625, 4.1005859375, 6.39453125, 8.6884765625, 10.982421875, 13.2763671875, 15.5703125, 17.8642578125, 20.158203125, 22.4521484375, 24.74609375, 27.0400390625, 29.333984375, 31.6279296875, 33.921875, 36.2158203125, 38.509765625, 40.8037109375, 43.09765625, 45.3916015625, 47.685546875, 49.9794921875, 52.2734375, 54.5673828125, 56.861328125, 59.1552734375, 61.44921875, 63.7431640625, 66.037109375, 68.3310546875, 70.625]}, "gradients/decoder.transformer.h.13.attn.c_attn.bias": {"_type": "histogram", "values": [3.0, 1.0, 1.0, 2.0, 1.0, 1.0, 0.0, 5.0, 0.0, 1.0, 5.0, 0.0, 3.0, 5.0, 9.0, 12.0, 11.0, 12.0, 15.0, 17.0, 19.0, 24.0, 19.0, 24.0, 29.0, 30.0, 33.0, 29.0, 39.0, 55.0, 71.0, 117.0, 1910.0, 79.0, 63.0, 48.0, 45.0, 34.0, 26.0, 30.0, 30.0, 23.0, 26.0, 32.0, 22.0, 17.0, 10.0, 8.0, 5.0, 12.0, 11.0, 9.0, 10.0, 7.0, 3.0, 4.0, 3.0, 4.0, 1.0, 2.0, 1.0, 1.0, 0.0, 3.0], "bins": [-101.25, -98.1181640625, -94.986328125, -91.8544921875, -88.72265625, -85.5908203125, -82.458984375, -79.3271484375, -76.1953125, -73.0634765625, -69.931640625, -66.7998046875, -63.66796875, -60.5361328125, -57.404296875, -54.2724609375, -51.140625, -48.0087890625, -44.876953125, -41.7451171875, -38.61328125, -35.4814453125, -32.349609375, -29.2177734375, -26.0859375, -22.9541015625, -19.822265625, -16.6904296875, -13.55859375, -10.4267578125, -7.294921875, -4.1630859375, -1.03125, 2.1005859375, 5.232421875, 8.3642578125, 11.49609375, 14.6279296875, 17.759765625, 20.8916015625, 24.0234375, 27.1552734375, 30.287109375, 33.4189453125, 36.55078125, 39.6826171875, 42.814453125, 45.9462890625, 49.078125, 52.2099609375, 55.341796875, 58.4736328125, 61.60546875, 64.7373046875, 67.869140625, 71.0009765625, 74.1328125, 77.2646484375, 80.396484375, 83.5283203125, 86.66015625, 89.7919921875, 92.923828125, 96.0556640625, 99.1875]}, "gradients/decoder.transformer.h.13.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 3.0, 5.0, 6.0, 4.0, 13.0, 8.0, 28.0, 34.0, 49.0, 67.0, 93.0, 190.0, 306.0, 759.0, 3523.0, 58204.0, 3073650.0, 6689.0, 1180.0, 400.0, 202.0, 103.0, 76.0, 49.0, 23.0, 15.0, 10.0, 6.0, 6.0, 4.0, 5.0, 4.0, 0.0, 3.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-380.75, -367.6953125, -354.640625, -341.5859375, -328.53125, -315.4765625, -302.421875, -289.3671875, -276.3125, -263.2578125, -250.203125, -237.1484375, -224.09375, -211.0390625, -197.984375, -184.9296875, -171.875, -158.8203125, -145.765625, -132.7109375, -119.65625, -106.6015625, -93.546875, -80.4921875, -67.4375, -54.3828125, -41.328125, -28.2734375, -15.21875, -2.1640625, 10.890625, 23.9453125, 37.0, 50.0546875, 63.109375, 76.1640625, 89.21875, 102.2734375, 115.328125, 128.3828125, 141.4375, 154.4921875, 167.546875, 180.6015625, 193.65625, 206.7109375, 219.765625, 232.8203125, 245.875, 258.9296875, 271.984375, 285.0390625, 298.09375, 311.1484375, 324.203125, 337.2578125, 350.3125, 363.3671875, 376.421875, 389.4765625, 402.53125, 415.5859375, 428.640625, 441.6953125, 454.75]}, "gradients/decoder.transformer.h.13.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 4.0, 13.0, 150.0, 667.0, 159.0, 20.0, 7.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1370.9456787109375, -1343.770751953125, -1316.5958251953125, -1289.4208984375, -1262.2459716796875, -1235.071044921875, -1207.8961181640625, -1180.72119140625, -1153.5462646484375, -1126.371337890625, -1099.1964111328125, -1072.021484375, -1044.8465576171875, -1017.671630859375, -990.4967041015625, -963.32177734375, -936.1469116210938, -908.9719848632812, -881.7970581054688, -854.6221313476562, -827.4472045898438, -800.2722778320312, -773.097412109375, -745.9224853515625, -718.74755859375, -691.5726318359375, -664.397705078125, -637.2227783203125, -610.0478515625, -582.8729248046875, -555.697998046875, -528.5230712890625, -501.34820556640625, -474.17327880859375, -446.99835205078125, -419.82342529296875, -392.64849853515625, -365.47357177734375, -338.2986755371094, -311.1237487792969, -283.9488220214844, -256.7738952636719, -229.59896850585938, -202.42405700683594, -175.24913024902344, -148.07420349121094, -120.8992919921875, -93.724365234375, -66.5494384765625, -39.374515533447266, -12.199592590332031, 14.975326538085938, 42.15025329589844, 69.32518005371094, 96.50009155273438, 123.67501831054688, 150.84994506835938, 178.02487182617188, 205.19979858398438, 232.3747100830078, 259.54962158203125, 286.72454833984375, 313.89947509765625, 341.07440185546875, 368.24932861328125]}, "gradients/decoder.transformer.h.13.ln_1.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 1.0, 3.0, 4.0, 0.0, 3.0, 5.0, 1.0, 6.0, 12.0, 6.0, 12.0, 15.0, 18.0, 16.0, 21.0, 22.0, 31.0, 32.0, 33.0, 47.0, 38.0, 47.0, 36.0, 31.0, 51.0, 52.0, 41.0, 42.0, 41.0, 42.0, 38.0, 31.0, 30.0, 24.0, 25.0, 26.0, 23.0, 20.0, 14.0, 20.0, 11.0, 7.0, 13.0, 9.0, 4.0, 6.0, 6.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-287.1763610839844, -277.3305969238281, -267.4848327636719, -257.6390686035156, -247.7932891845703, -237.94752502441406, -228.10174560546875, -218.2559814453125, -208.41021728515625, -198.564453125, -188.71868896484375, -178.87290954589844, -169.0271453857422, -159.18138122558594, -149.33560180664062, -139.48983764648438, -129.64407348632812, -119.79830932617188, -109.9525375366211, -100.10676574707031, -90.26100158691406, -80.41523742675781, -70.56946563720703, -60.72369384765625, -50.8779296875, -41.032161712646484, -31.18639373779297, -21.340625762939453, -11.494857788085938, -1.6490898132324219, 8.196678161621094, 18.042449951171875, 27.888214111328125, 37.73398208618164, 47.579750061035156, 57.42551803588867, 67.27128601074219, 77.11705017089844, 86.96282196044922, 96.80859375, 106.65435791015625, 116.5001220703125, 126.34589385986328, 136.19166564941406, 146.0374298095703, 155.88319396972656, 165.72897338867188, 175.57473754882812, 185.42050170898438, 195.26626586914062, 205.11203002929688, 214.9578094482422, 224.80357360839844, 234.6493377685547, 244.4951171875, 254.34088134765625, 264.1866455078125, 274.03240966796875, 283.878173828125, 293.72393798828125, 303.5697021484375, 313.4154968261719, 323.2612609863281, 333.1070251464844, 342.9527893066406]}, "gradients/decoder.transformer.h.12.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 2.0, 2.0, 1.0, 1.0, 4.0, 5.0, 9.0, 10.0, 5.0, 8.0, 14.0, 9.0, 14.0, 23.0, 21.0, 17.0, 35.0, 27.0, 30.0, 28.0, 35.0, 37.0, 42.0, 47.0, 42.0, 41.0, 48.0, 38.0, 32.0, 50.0, 42.0, 36.0, 24.0, 36.0, 27.0, 29.0, 27.0, 18.0, 16.0, 12.0, 12.0, 16.0, 10.0, 7.0, 4.0, 6.0, 4.0, 7.0, 5.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0], "bins": [-34.40625, -33.22021484375, -32.0341796875, -30.84814453125, -29.662109375, -28.47607421875, -27.2900390625, -26.10400390625, -24.91796875, -23.73193359375, -22.5458984375, -21.35986328125, -20.173828125, -18.98779296875, -17.8017578125, -16.61572265625, -15.4296875, -14.24365234375, -13.0576171875, -11.87158203125, -10.685546875, -9.49951171875, -8.3134765625, -7.12744140625, -5.94140625, -4.75537109375, -3.5693359375, -2.38330078125, -1.197265625, -0.01123046875, 1.1748046875, 2.36083984375, 3.546875, 4.73291015625, 5.9189453125, 7.10498046875, 8.291015625, 9.47705078125, 10.6630859375, 11.84912109375, 13.03515625, 14.22119140625, 15.4072265625, 16.59326171875, 17.779296875, 18.96533203125, 20.1513671875, 21.33740234375, 22.5234375, 23.70947265625, 24.8955078125, 26.08154296875, 27.267578125, 28.45361328125, 29.6396484375, 30.82568359375, 32.01171875, 33.19775390625, 34.3837890625, 35.56982421875, 36.755859375, 37.94189453125, 39.1279296875, 40.31396484375, 41.5]}, "gradients/decoder.transformer.h.12.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 5.0, 4.0, 6.0, 10.0, 12.0, 20.0, 32.0, 34.0, 64.0, 94.0, 125.0, 153.0, 242.0, 347.0, 408.0, 612.0, 866.0, 1258.0, 1890.0, 3093.0, 4953.0, 8521.0, 18988.0, 108854.0, 2237765.0, 1679946.0, 87767.0, 16560.0, 8287.0, 4617.0, 2875.0, 1793.0, 1211.0, 841.0, 519.0, 390.0, 322.0, 229.0, 153.0, 109.0, 84.0, 70.0, 55.0, 30.0, 17.0, 24.0, 10.0, 7.0, 12.0, 6.0, 0.0, 3.0, 4.0, 1.0, 0.0, 1.0, 2.0], "bins": [-92.9375, -90.029296875, -87.12109375, -84.212890625, -81.3046875, -78.396484375, -75.48828125, -72.580078125, -69.671875, -66.763671875, -63.85546875, -60.947265625, -58.0390625, -55.130859375, -52.22265625, -49.314453125, -46.40625, -43.498046875, -40.58984375, -37.681640625, -34.7734375, -31.865234375, -28.95703125, -26.048828125, -23.140625, -20.232421875, -17.32421875, -14.416015625, -11.5078125, -8.599609375, -5.69140625, -2.783203125, 0.125, 3.033203125, 5.94140625, 8.849609375, 11.7578125, 14.666015625, 17.57421875, 20.482421875, 23.390625, 26.298828125, 29.20703125, 32.115234375, 35.0234375, 37.931640625, 40.83984375, 43.748046875, 46.65625, 49.564453125, 52.47265625, 55.380859375, 58.2890625, 61.197265625, 64.10546875, 67.013671875, 69.921875, 72.830078125, 75.73828125, 78.646484375, 81.5546875, 84.462890625, 87.37109375, 90.279296875, 93.1875]}, "gradients/decoder.transformer.h.12.mlp.c_fc.bias": {"_type": "histogram", "values": [3.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 2.0, 1.0, 1.0, 2.0, 3.0, 3.0, 2.0, 4.0, 5.0, 4.0, 7.0, 5.0, 11.0, 14.0, 14.0, 11.0, 19.0, 21.0, 30.0, 44.0, 76.0, 139.0, 291.0, 689.0, 1135.0, 821.0, 315.0, 150.0, 71.0, 49.0, 37.0, 18.0, 19.0, 10.0, 11.0, 13.0, 2.0, 7.0, 7.0, 2.0, 1.0, 3.0, 2.0, 2.0, 5.0, 3.0, 0.0, 2.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 1.0], "bins": [-92.375, -89.6240234375, -86.873046875, -84.1220703125, -81.37109375, -78.6201171875, -75.869140625, -73.1181640625, -70.3671875, -67.6162109375, -64.865234375, -62.1142578125, -59.36328125, -56.6123046875, -53.861328125, -51.1103515625, -48.359375, -45.6083984375, -42.857421875, -40.1064453125, -37.35546875, -34.6044921875, -31.853515625, -29.1025390625, -26.3515625, -23.6005859375, -20.849609375, -18.0986328125, -15.34765625, -12.5966796875, -9.845703125, -7.0947265625, -4.34375, -1.5927734375, 1.158203125, 3.9091796875, 6.66015625, 9.4111328125, 12.162109375, 14.9130859375, 17.6640625, 20.4150390625, 23.166015625, 25.9169921875, 28.66796875, 31.4189453125, 34.169921875, 36.9208984375, 39.671875, 42.4228515625, 45.173828125, 47.9248046875, 50.67578125, 53.4267578125, 56.177734375, 58.9287109375, 61.6796875, 64.4306640625, 67.181640625, 69.9326171875, 72.68359375, 75.4345703125, 78.185546875, 80.9365234375, 83.6875]}, "gradients/decoder.transformer.h.12.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 2.0, 1.0, 4.0, 6.0, 4.0, 3.0, 5.0, 5.0, 4.0, 8.0, 8.0, 15.0, 31.0, 38.0, 102.0, 154.0, 230.0, 418.0, 834.0, 1524.0, 3128.0, 7198.0, 19407.0, 85447.0, 3690900.0, 330569.0, 34235.0, 10951.0, 4500.0, 2113.0, 1120.0, 520.0, 337.0, 162.0, 96.0, 62.0, 43.0, 28.0, 18.0, 12.0, 12.0, 6.0, 4.0, 7.0, 4.0, 3.0, 3.0, 2.0, 3.0, 5.0, 2.0, 0.0, 1.0, 2.0, 0.0, 1.0, 3.0], "bins": [-202.25, -195.875, -189.5, -183.125, -176.75, -170.375, -164.0, -157.625, -151.25, -144.875, -138.5, -132.125, -125.75, -119.375, -113.0, -106.625, -100.25, -93.875, -87.5, -81.125, -74.75, -68.375, -62.0, -55.625, -49.25, -42.875, -36.5, -30.125, -23.75, -17.375, -11.0, -4.625, 1.75, 8.125, 14.5, 20.875, 27.25, 33.625, 40.0, 46.375, 52.75, 59.125, 65.5, 71.875, 78.25, 84.625, 91.0, 97.375, 103.75, 110.125, 116.5, 122.875, 129.25, 135.625, 142.0, 148.375, 154.75, 161.125, 167.5, 173.875, 180.25, 186.625, 193.0, 199.375, 205.75]}, "gradients/decoder.transformer.h.12.ln_2.weight": {"_type": "histogram", "values": [2.0, 1.0, 2.0, 8.0, 9.0, 22.0, 35.0, 69.0, 133.0, 292.0, 225.0, 118.0, 56.0, 26.0, 9.0, 5.0, 3.0, 5.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-259.5345458984375, -233.40792846679688, -207.28131103515625, -181.15467834472656, -155.02806091308594, -128.9014434814453, -102.77481079101562, -76.648193359375, -50.521575927734375, -24.394954681396484, 1.7316665649414062, 27.858291625976562, 53.98490905761719, 80.11152648925781, 106.2381591796875, 132.36477661132812, 158.49139404296875, 184.61801147460938, 210.74462890625, 236.8712615966797, 262.99786376953125, 289.12451171875, 315.2511291503906, 341.37774658203125, 367.5043640136719, 393.6309814453125, 419.7575988769531, 445.88421630859375, 472.0108642578125, 498.137451171875, 524.2640991210938, 550.3907470703125, 576.517333984375, 602.6439819335938, 628.7705688476562, 654.897216796875, 681.0238037109375, 707.1504516601562, 733.277099609375, 759.4036865234375, 785.5302734375, 811.6569213867188, 837.7835083007812, 863.91015625, 890.0367431640625, 916.1633911132812, 942.2900390625, 968.4166259765625, 994.5432739257812, 1020.669921875, 1046.7965087890625, 1072.923095703125, 1099.0498046875, 1125.1763916015625, 1151.302978515625, 1177.4296875, 1203.5562744140625, 1229.682861328125, 1255.8095703125, 1281.9361572265625, 1308.062744140625, 1334.1893310546875, 1360.3160400390625, 1386.442626953125, 1412.5692138671875]}, "gradients/decoder.transformer.h.12.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 3.0, 4.0, 5.0, 9.0, 3.0, 7.0, 8.0, 8.0, 17.0, 14.0, 14.0, 24.0, 29.0, 25.0, 34.0, 41.0, 29.0, 37.0, 30.0, 45.0, 42.0, 41.0, 38.0, 50.0, 44.0, 35.0, 46.0, 32.0, 39.0, 33.0, 36.0, 27.0, 21.0, 26.0, 16.0, 22.0, 9.0, 13.0, 9.0, 9.0, 6.0, 8.0, 6.0, 6.0, 4.0, 2.0, 4.0, 1.0, 1.0, 2.0, 0.0, 4.0], "bins": [-263.22503662109375, -255.7518310546875, -248.27862548828125, -240.805419921875, -233.33221435546875, -225.8590087890625, -218.38580322265625, -210.91261291503906, -203.4394073486328, -195.96620178222656, -188.4929962158203, -181.01979064941406, -173.5465850830078, -166.07339477539062, -158.60018920898438, -151.12698364257812, -143.65377807617188, -136.18057250976562, -128.70736694335938, -121.23416137695312, -113.7609634399414, -106.28775787353516, -98.8145523071289, -91.34135437011719, -83.86813354492188, -76.39492797851562, -68.92172241210938, -61.44852066040039, -53.975318908691406, -46.502113342285156, -39.028907775878906, -31.555706024169922, -24.082504272460938, -16.60930061340332, -9.136096000671387, -1.6628913879394531, 5.810312271118164, 13.283515930175781, 20.75672149658203, 28.229923248291016, 35.703128814697266, 43.176334381103516, 50.6495361328125, 58.12274169921875, 65.595947265625, 73.06915283203125, 80.5423583984375, 88.01555633544922, 95.48876190185547, 102.96196746826172, 110.43517303466797, 117.90837097167969, 125.38157653808594, 132.8547821044922, 140.32798767089844, 147.8011932373047, 155.27439880371094, 162.7476043701172, 170.22080993652344, 177.6940155029297, 185.16722106933594, 192.64041137695312, 200.11361694335938, 207.58682250976562, 215.06002807617188]}, "gradients/decoder.transformer.h.12.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 3.0, 1.0, 3.0, 11.0, 6.0, 3.0, 4.0, 11.0, 13.0, 8.0, 9.0, 25.0, 27.0, 19.0, 25.0, 34.0, 28.0, 32.0, 46.0, 43.0, 38.0, 44.0, 57.0, 42.0, 47.0, 33.0, 40.0, 44.0, 36.0, 45.0, 33.0, 26.0, 23.0, 20.0, 25.0, 18.0, 17.0, 14.0, 13.0, 12.0, 9.0, 5.0, 7.0, 5.0, 4.0, 4.0, 1.0, 1.0, 0.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0], "bins": [-35.84375, -34.59814453125, -33.3525390625, -32.10693359375, -30.861328125, -29.61572265625, -28.3701171875, -27.12451171875, -25.87890625, -24.63330078125, -23.3876953125, -22.14208984375, -20.896484375, -19.65087890625, -18.4052734375, -17.15966796875, -15.9140625, -14.66845703125, -13.4228515625, -12.17724609375, -10.931640625, -9.68603515625, -8.4404296875, -7.19482421875, -5.94921875, -4.70361328125, -3.4580078125, -2.21240234375, -0.966796875, 0.27880859375, 1.5244140625, 2.77001953125, 4.015625, 5.26123046875, 6.5068359375, 7.75244140625, 8.998046875, 10.24365234375, 11.4892578125, 12.73486328125, 13.98046875, 15.22607421875, 16.4716796875, 17.71728515625, 18.962890625, 20.20849609375, 21.4541015625, 22.69970703125, 23.9453125, 25.19091796875, 26.4365234375, 27.68212890625, 28.927734375, 30.17333984375, 31.4189453125, 32.66455078125, 33.91015625, 35.15576171875, 36.4013671875, 37.64697265625, 38.892578125, 40.13818359375, 41.3837890625, 42.62939453125, 43.875]}, "gradients/decoder.transformer.h.12.crossattention.c_proj.weight": {"_type": "histogram", "values": [4.0, 1.0, 4.0, 3.0, 3.0, 4.0, 14.0, 20.0, 23.0, 34.0, 35.0, 85.0, 107.0, 149.0, 220.0, 358.0, 506.0, 736.0, 1091.0, 1650.0, 2481.0, 3701.0, 5723.0, 8879.0, 13792.0, 21338.0, 34632.0, 57022.0, 100943.0, 221083.0, 293758.0, 112834.0, 63262.0, 37786.0, 23368.0, 14878.0, 9570.0, 6258.0, 4011.0, 2677.0, 1921.0, 1089.0, 832.0, 562.0, 339.0, 243.0, 156.0, 126.0, 75.0, 55.0, 36.0, 23.0, 22.0, 15.0, 11.0, 12.0, 4.0, 6.0, 1.0, 0.0, 3.0, 0.0, 0.0, 2.0], "bins": [-1.43359375, -1.385955810546875, -1.33831787109375, -1.290679931640625, -1.2430419921875, -1.195404052734375, -1.14776611328125, -1.100128173828125, -1.052490234375, -1.004852294921875, -0.95721435546875, -0.909576416015625, -0.8619384765625, -0.814300537109375, -0.76666259765625, -0.719024658203125, -0.67138671875, -0.623748779296875, -0.57611083984375, -0.528472900390625, -0.4808349609375, -0.433197021484375, -0.38555908203125, -0.337921142578125, -0.290283203125, -0.242645263671875, -0.19500732421875, -0.147369384765625, -0.0997314453125, -0.052093505859375, -0.00445556640625, 0.043182373046875, 0.0908203125, 0.138458251953125, 0.18609619140625, 0.233734130859375, 0.2813720703125, 0.329010009765625, 0.37664794921875, 0.424285888671875, 0.471923828125, 0.519561767578125, 0.56719970703125, 0.614837646484375, 0.6624755859375, 0.710113525390625, 0.75775146484375, 0.805389404296875, 0.85302734375, 0.900665283203125, 0.94830322265625, 0.995941162109375, 1.0435791015625, 1.091217041015625, 1.13885498046875, 1.186492919921875, 1.234130859375, 1.281768798828125, 1.32940673828125, 1.377044677734375, 1.4246826171875, 1.472320556640625, 1.51995849609375, 1.567596435546875, 1.615234375]}, "gradients/decoder.transformer.h.12.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 4.0, 2.0, 5.0, 2.0, 6.0, 5.0, 4.0, 9.0, 7.0, 5.0, 12.0, 8.0, 16.0, 21.0, 27.0, 17.0, 22.0, 30.0, 38.0, 24.0, 29.0, 34.0, 38.0, 42.0, 37.0, 31.0, 39.0, 1060.0, 34.0, 36.0, 35.0, 37.0, 32.0, 28.0, 32.0, 35.0, 28.0, 17.0, 23.0, 18.0, 15.0, 19.0, 13.0, 11.0, 13.0, 9.0, 6.0, 8.0, 5.0, 3.0, 3.0, 2.0, 2.0, 2.0, 1.0, 0.0, 3.0, 1.0], "bins": [-23.0625, -22.370361328125, -21.67822265625, -20.986083984375, -20.2939453125, -19.601806640625, -18.90966796875, -18.217529296875, -17.525390625, -16.833251953125, -16.14111328125, -15.448974609375, -14.7568359375, -14.064697265625, -13.37255859375, -12.680419921875, -11.98828125, -11.296142578125, -10.60400390625, -9.911865234375, -9.2197265625, -8.527587890625, -7.83544921875, -7.143310546875, -6.451171875, -5.759033203125, -5.06689453125, -4.374755859375, -3.6826171875, -2.990478515625, -2.29833984375, -1.606201171875, -0.9140625, -0.221923828125, 0.47021484375, 1.162353515625, 1.8544921875, 2.546630859375, 3.23876953125, 3.930908203125, 4.623046875, 5.315185546875, 6.00732421875, 6.699462890625, 7.3916015625, 8.083740234375, 8.77587890625, 9.468017578125, 10.16015625, 10.852294921875, 11.54443359375, 12.236572265625, 12.9287109375, 13.620849609375, 14.31298828125, 15.005126953125, 15.697265625, 16.389404296875, 17.08154296875, 17.773681640625, 18.4658203125, 19.157958984375, 19.85009765625, 20.542236328125, 21.234375]}, "gradients/decoder.transformer.h.12.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 3.0, 4.0, 7.0, 6.0, 5.0, 17.0, 26.0, 31.0, 44.0, 64.0, 91.0, 118.0, 159.0, 241.0, 364.0, 503.0, 682.0, 1006.0, 1402.0, 2053.0, 2872.0, 4264.0, 6293.0, 9172.0, 13536.0, 19917.0, 30335.0, 47117.0, 74390.0, 128081.0, 1342736.0, 161685.0, 88216.0, 54375.0, 35339.0, 23317.0, 15280.0, 10481.0, 6964.0, 4848.0, 3367.0, 2326.0, 1625.0, 1181.0, 775.0, 531.0, 388.0, 253.0, 186.0, 152.0, 92.0, 67.0, 52.0, 30.0, 27.0, 18.0, 17.0, 8.0, 5.0, 1.0, 1.0, 3.0, 2.0], "bins": [-0.9248046875, -0.895599365234375, -0.86639404296875, -0.837188720703125, -0.8079833984375, -0.778778076171875, -0.74957275390625, -0.720367431640625, -0.691162109375, -0.661956787109375, -0.63275146484375, -0.603546142578125, -0.5743408203125, -0.545135498046875, -0.51593017578125, -0.486724853515625, -0.45751953125, -0.428314208984375, -0.39910888671875, -0.369903564453125, -0.3406982421875, -0.311492919921875, -0.28228759765625, -0.253082275390625, -0.223876953125, -0.194671630859375, -0.16546630859375, -0.136260986328125, -0.1070556640625, -0.077850341796875, -0.04864501953125, -0.019439697265625, 0.009765625, 0.038970947265625, 0.06817626953125, 0.097381591796875, 0.1265869140625, 0.155792236328125, 0.18499755859375, 0.214202880859375, 0.243408203125, 0.272613525390625, 0.30181884765625, 0.331024169921875, 0.3602294921875, 0.389434814453125, 0.41864013671875, 0.447845458984375, 0.47705078125, 0.506256103515625, 0.53546142578125, 0.564666748046875, 0.5938720703125, 0.623077392578125, 0.65228271484375, 0.681488037109375, 0.710693359375, 0.739898681640625, 0.76910400390625, 0.798309326171875, 0.8275146484375, 0.856719970703125, 0.88592529296875, 0.915130615234375, 0.9443359375]}, "gradients/decoder.transformer.h.12.crossattention.q_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 2.0, 1.0, 3.0, 7.0, 5.0, 2.0, 6.0, 8.0, 8.0, 9.0, 17.0, 20.0, 15.0, 13.0, 32.0, 29.0, 48.0, 66.0, 71.0, 137.0, 117.0, 78.0, 72.0, 43.0, 32.0, 36.0, 28.0, 17.0, 9.0, 16.0, 9.0, 12.0, 8.0, 7.0, 6.0, 5.0, 3.0, 2.0, 5.0, 1.0, 3.0, 1.0, 4.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.537271499633789e-05, -5.338899791240692e-05, -5.140528082847595e-05, -4.942156374454498e-05, -4.7437846660614014e-05, -4.5454129576683044e-05, -4.3470412492752075e-05, -4.1486695408821106e-05, -3.950297832489014e-05, -3.751926124095917e-05, -3.55355441570282e-05, -3.355182707309723e-05, -3.156810998916626e-05, -2.958439290523529e-05, -2.760067582130432e-05, -2.5616958737373352e-05, -2.3633241653442383e-05, -2.1649524569511414e-05, -1.9665807485580444e-05, -1.7682090401649475e-05, -1.5698373317718506e-05, -1.3714656233787537e-05, -1.1730939149856567e-05, -9.747222065925598e-06, -7.763504981994629e-06, -5.77978789806366e-06, -3.7960708141326904e-06, -1.8123537302017212e-06, 1.7136335372924805e-07, 2.1550804376602173e-06, 4.1387975215911865e-06, 6.122514605522156e-06, 8.106231689453125e-06, 1.0089948773384094e-05, 1.2073665857315063e-05, 1.4057382941246033e-05, 1.6041100025177002e-05, 1.802481710910797e-05, 2.000853419303894e-05, 2.199225127696991e-05, 2.397596836090088e-05, 2.5959685444831848e-05, 2.7943402528762817e-05, 2.9927119612693787e-05, 3.1910836696624756e-05, 3.3894553780555725e-05, 3.5878270864486694e-05, 3.7861987948417664e-05, 3.984570503234863e-05, 4.18294221162796e-05, 4.381313920021057e-05, 4.579685628414154e-05, 4.778057336807251e-05, 4.976429045200348e-05, 5.174800753593445e-05, 5.373172461986542e-05, 5.571544170379639e-05, 5.7699158787727356e-05, 5.9682875871658325e-05, 6.16665929555893e-05, 6.365031003952026e-05, 6.563402712345123e-05, 6.76177442073822e-05, 6.960146129131317e-05, 7.158517837524414e-05]}, "gradients/decoder.transformer.h.12.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 3.0, 5.0, 2.0, 4.0, 2.0, 4.0, 1.0, 8.0, 11.0, 13.0, 12.0, 18.0, 21.0, 19.0, 34.0, 36.0, 61.0, 95.0, 192.0, 1055.0, 21005.0, 976526.0, 47007.0, 1859.0, 243.0, 100.0, 58.0, 38.0, 25.0, 24.0, 21.0, 12.0, 5.0, 11.0, 7.0, 4.0, 7.0, 7.0, 3.0, 3.0, 2.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.0012416839599609375, -0.0012058019638061523, -0.0011699199676513672, -0.001134037971496582, -0.0010981559753417969, -0.0010622739791870117, -0.0010263919830322266, -0.0009905099868774414, -0.0009546279907226562, -0.0009187459945678711, -0.0008828639984130859, -0.0008469820022583008, -0.0008111000061035156, -0.0007752180099487305, -0.0007393360137939453, -0.0007034540176391602, -0.000667572021484375, -0.0006316900253295898, -0.0005958080291748047, -0.0005599260330200195, -0.0005240440368652344, -0.0004881620407104492, -0.00045228004455566406, -0.0004163980484008789, -0.00038051605224609375, -0.0003446340560913086, -0.00030875205993652344, -0.0002728700637817383, -0.00023698806762695312, -0.00020110607147216797, -0.0001652240753173828, -0.00012934207916259766, -9.34600830078125e-05, -5.7578086853027344e-05, -2.1696090698242188e-05, 1.4185905456542969e-05, 5.0067901611328125e-05, 8.594989776611328e-05, 0.00012183189392089844, 0.0001577138900756836, 0.00019359588623046875, 0.0002294778823852539, 0.00026535987854003906, 0.0003012418746948242, 0.0003371238708496094, 0.00037300586700439453, 0.0004088878631591797, 0.00044476985931396484, 0.00048065185546875, 0.0005165338516235352, 0.0005524158477783203, 0.0005882978439331055, 0.0006241798400878906, 0.0006600618362426758, 0.0006959438323974609, 0.0007318258285522461, 0.0007677078247070312, 0.0008035898208618164, 0.0008394718170166016, 0.0008753538131713867, 0.0009112358093261719, 0.000947117805480957, 0.0009829998016357422, 0.0010188817977905273, 0.0010547637939453125]}, "gradients/decoder.transformer.h.12.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 6.0, 8.0, 16.0, 39.0, 98.0, 233.0, 352.0, 157.0, 52.0, 31.0, 14.0, 3.0, 6.0, 1.0, 5.0], "bins": [-0.00018189776164945215, -0.00017860790831036866, -0.00017531805497128516, -0.00017202820163220167, -0.00016873834829311818, -0.00016544849495403469, -0.00016215862706303596, -0.00015886877372395247, -0.00015557892038486898, -0.0001522890670457855, -0.000148999213706702, -0.0001457093603676185, -0.000142419507028535, -0.0001391296391375363, -0.0001358397857984528, -0.0001325499324593693, -0.0001292600791202858, -0.00012597022578120232, -0.00012268037244211882, -0.00011939051182707772, -0.00011610065848799422, -0.00011281080514891073, -0.00010952095180982724, -0.00010623109119478613, -0.00010294124513166025, -9.965139179257676e-05, -9.636153845349327e-05, -9.307167783845216e-05, -8.978182449936867e-05, -8.649197116028517e-05, -8.320211782120168e-05, -7.991226448211819e-05, -7.662240386707708e-05, -7.333255052799359e-05, -7.00426971889101e-05, -6.675283657386899e-05, -6.34629832347855e-05, -6.0173129895702004e-05, -5.688327655661851e-05, -5.359341957955621e-05, -5.030356624047272e-05, -4.7013712901389226e-05, -4.3723855924326926e-05, -4.0434002585243434e-05, -3.7144145608181134e-05, -3.385429226909764e-05, -3.056443529203534e-05, -2.727458195295185e-05, -2.3984726794878952e-05, -2.0694871636806056e-05, -1.740501647873316e-05, -1.4115162230154965e-05, -1.0825307072082069e-05, -7.535452823503874e-06, -4.245597665430978e-06, -9.557425073580816e-07, 2.3341126507148147e-06, 5.623967808787711e-06, 8.913822966860607e-06, 1.2203677215438802e-05, 1.54935332830064e-05, 1.8783386622089893e-05, 2.207324178016279e-05, 2.5363096938235685e-05, 2.865295209630858e-05]}, "gradients/decoder.transformer.h.12.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 2.0, 4.0, 1.0, 2.0, 3.0, 5.0, 1.0, 12.0, 12.0, 7.0, 10.0, 11.0, 12.0, 13.0, 19.0, 32.0, 23.0, 26.0, 29.0, 22.0, 42.0, 32.0, 36.0, 43.0, 34.0, 42.0, 47.0, 33.0, 38.0, 29.0, 37.0, 29.0, 30.0, 42.0, 36.0, 35.0, 20.0, 16.0, 22.0, 22.0, 20.0, 17.0, 11.0, 9.0, 9.0, 5.0, 7.0, 8.0, 9.0, 2.0, 6.0, 2.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0], "bins": [-2.568960189819336e-05, -2.4870038032531738e-05, -2.4050474166870117e-05, -2.3230910301208496e-05, -2.2411346435546875e-05, -2.1591782569885254e-05, -2.0772218704223633e-05, -1.9952654838562012e-05, -1.913309097290039e-05, -1.831352710723877e-05, -1.749396324157715e-05, -1.6674399375915527e-05, -1.5854835510253906e-05, -1.5035271644592285e-05, -1.4215707778930664e-05, -1.3396143913269043e-05, -1.2576580047607422e-05, -1.17570161819458e-05, -1.093745231628418e-05, -1.0117888450622559e-05, -9.298324584960938e-06, -8.478760719299316e-06, -7.659196853637695e-06, -6.839632987976074e-06, -6.020069122314453e-06, -5.200505256652832e-06, -4.380941390991211e-06, -3.56137752532959e-06, -2.7418136596679688e-06, -1.9222497940063477e-06, -1.1026859283447266e-06, -2.8312206268310547e-07, 5.364418029785156e-07, 1.3560056686401367e-06, 2.175569534301758e-06, 2.995133399963379e-06, 3.814697265625e-06, 4.634261131286621e-06, 5.453824996948242e-06, 6.273388862609863e-06, 7.092952728271484e-06, 7.912516593933105e-06, 8.732080459594727e-06, 9.551644325256348e-06, 1.0371208190917969e-05, 1.119077205657959e-05, 1.2010335922241211e-05, 1.2829899787902832e-05, 1.3649463653564453e-05, 1.4469027519226074e-05, 1.5288591384887695e-05, 1.6108155250549316e-05, 1.6927719116210938e-05, 1.774728298187256e-05, 1.856684684753418e-05, 1.93864107131958e-05, 2.0205974578857422e-05, 2.1025538444519043e-05, 2.1845102310180664e-05, 2.2664666175842285e-05, 2.3484230041503906e-05, 2.4303793907165527e-05, 2.512335777282715e-05, 2.594292163848877e-05, 2.676248550415039e-05]}, "gradients/decoder.transformer.h.12.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 3.0, 1.0, 3.0, 11.0, 6.0, 3.0, 4.0, 11.0, 13.0, 8.0, 9.0, 25.0, 27.0, 19.0, 25.0, 34.0, 28.0, 32.0, 46.0, 43.0, 38.0, 44.0, 57.0, 42.0, 47.0, 33.0, 40.0, 44.0, 36.0, 45.0, 33.0, 26.0, 23.0, 20.0, 25.0, 18.0, 17.0, 14.0, 13.0, 12.0, 9.0, 5.0, 7.0, 5.0, 4.0, 4.0, 1.0, 1.0, 0.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0], "bins": [-35.84375, -34.59814453125, -33.3525390625, -32.10693359375, -30.861328125, -29.61572265625, -28.3701171875, -27.12451171875, -25.87890625, -24.63330078125, -23.3876953125, -22.14208984375, -20.896484375, -19.65087890625, -18.4052734375, -17.15966796875, -15.9140625, -14.66845703125, -13.4228515625, -12.17724609375, -10.931640625, -9.68603515625, -8.4404296875, -7.19482421875, -5.94921875, -4.70361328125, -3.4580078125, -2.21240234375, -0.966796875, 0.27880859375, 1.5244140625, 2.77001953125, 4.015625, 5.26123046875, 6.5068359375, 7.75244140625, 8.998046875, 10.24365234375, 11.4892578125, 12.73486328125, 13.98046875, 15.22607421875, 16.4716796875, 17.71728515625, 18.962890625, 20.20849609375, 21.4541015625, 22.69970703125, 23.9453125, 25.19091796875, 26.4365234375, 27.68212890625, 28.927734375, 30.17333984375, 31.4189453125, 32.66455078125, 33.91015625, 35.15576171875, 36.4013671875, 37.64697265625, 38.892578125, 40.13818359375, 41.3837890625, 42.62939453125, 43.875]}, "gradients/decoder.transformer.h.12.attn.c_proj.weight": {"_type": "histogram", "values": [2.0, 2.0, 0.0, 2.0, 1.0, 5.0, 2.0, 3.0, 12.0, 10.0, 10.0, 13.0, 26.0, 37.0, 33.0, 57.0, 83.0, 107.0, 163.0, 241.0, 382.0, 539.0, 845.0, 1421.0, 2307.0, 4389.0, 10803.0, 50042.0, 724192.0, 215026.0, 22566.0, 6812.0, 3322.0, 1874.0, 1101.0, 696.0, 449.0, 309.0, 207.0, 123.0, 93.0, 60.0, 44.0, 48.0, 28.0, 24.0, 17.0, 13.0, 9.0, 6.0, 6.0, 3.0, 2.0, 0.0, 3.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0], "bins": [-35.1875, -33.96484375, -32.7421875, -31.51953125, -30.296875, -29.07421875, -27.8515625, -26.62890625, -25.40625, -24.18359375, -22.9609375, -21.73828125, -20.515625, -19.29296875, -18.0703125, -16.84765625, -15.625, -14.40234375, -13.1796875, -11.95703125, -10.734375, -9.51171875, -8.2890625, -7.06640625, -5.84375, -4.62109375, -3.3984375, -2.17578125, -0.953125, 0.26953125, 1.4921875, 2.71484375, 3.9375, 5.16015625, 6.3828125, 7.60546875, 8.828125, 10.05078125, 11.2734375, 12.49609375, 13.71875, 14.94140625, 16.1640625, 17.38671875, 18.609375, 19.83203125, 21.0546875, 22.27734375, 23.5, 24.72265625, 25.9453125, 27.16796875, 28.390625, 29.61328125, 30.8359375, 32.05859375, 33.28125, 34.50390625, 35.7265625, 36.94921875, 38.171875, 39.39453125, 40.6171875, 41.83984375, 43.0625]}, "gradients/decoder.transformer.h.12.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 1.0, 1.0, 0.0, 2.0, 2.0, 0.0, 3.0, 8.0, 4.0, 4.0, 3.0, 8.0, 8.0, 22.0, 12.0, 20.0, 26.0, 20.0, 14.0, 31.0, 24.0, 19.0, 34.0, 27.0, 46.0, 38.0, 45.0, 62.0, 1962.0, 129.0, 54.0, 53.0, 36.0, 46.0, 35.0, 31.0, 35.0, 36.0, 16.0, 30.0, 23.0, 24.0, 11.0, 14.0, 8.0, 6.0, 5.0, 7.0, 7.0, 2.0, 4.0, 4.0, 4.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-112.5, -108.958984375, -105.41796875, -101.876953125, -98.3359375, -94.794921875, -91.25390625, -87.712890625, -84.171875, -80.630859375, -77.08984375, -73.548828125, -70.0078125, -66.466796875, -62.92578125, -59.384765625, -55.84375, -52.302734375, -48.76171875, -45.220703125, -41.6796875, -38.138671875, -34.59765625, -31.056640625, -27.515625, -23.974609375, -20.43359375, -16.892578125, -13.3515625, -9.810546875, -6.26953125, -2.728515625, 0.8125, 4.353515625, 7.89453125, 11.435546875, 14.9765625, 18.517578125, 22.05859375, 25.599609375, 29.140625, 32.681640625, 36.22265625, 39.763671875, 43.3046875, 46.845703125, 50.38671875, 53.927734375, 57.46875, 61.009765625, 64.55078125, 68.091796875, 71.6328125, 75.173828125, 78.71484375, 82.255859375, 85.796875, 89.337890625, 92.87890625, 96.419921875, 99.9609375, 103.501953125, 107.04296875, 110.583984375, 114.125]}, "gradients/decoder.transformer.h.12.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 5.0, 6.0, 6.0, 7.0, 9.0, 10.0, 9.0, 15.0, 18.0, 27.0, 51.0, 54.0, 72.0, 87.0, 128.0, 192.0, 279.0, 638.0, 2638.0, 50425.0, 3083123.0, 5880.0, 960.0, 331.0, 199.0, 135.0, 102.0, 71.0, 53.0, 36.0, 41.0, 21.0, 30.0, 16.0, 12.0, 5.0, 7.0, 4.0, 8.0, 0.0, 5.0, 1.0, 0.0, 3.0, 2.0, 0.0, 2.0, 1.0], "bins": [-329.25, -320.15234375, -311.0546875, -301.95703125, -292.859375, -283.76171875, -274.6640625, -265.56640625, -256.46875, -247.37109375, -238.2734375, -229.17578125, -220.078125, -210.98046875, -201.8828125, -192.78515625, -183.6875, -174.58984375, -165.4921875, -156.39453125, -147.296875, -138.19921875, -129.1015625, -120.00390625, -110.90625, -101.80859375, -92.7109375, -83.61328125, -74.515625, -65.41796875, -56.3203125, -47.22265625, -38.125, -29.02734375, -19.9296875, -10.83203125, -1.734375, 7.36328125, 16.4609375, 25.55859375, 34.65625, 43.75390625, 52.8515625, 61.94921875, 71.046875, 80.14453125, 89.2421875, 98.33984375, 107.4375, 116.53515625, 125.6328125, 134.73046875, 143.828125, 152.92578125, 162.0234375, 171.12109375, 180.21875, 189.31640625, 198.4140625, 207.51171875, 216.609375, 225.70703125, 234.8046875, 243.90234375, 253.0]}, "gradients/decoder.transformer.h.12.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 13.0, 150.0, 682.0, 158.0, 14.0], "bins": [-1178.568359375, -1159.3905029296875, -1140.2127685546875, -1121.034912109375, -1101.857177734375, -1082.6793212890625, -1063.50146484375, -1044.32373046875, -1025.1458740234375, -1005.9680786132812, -986.790283203125, -967.6124267578125, -948.4346313476562, -929.2568359375, -910.0789794921875, -890.9011840820312, -871.723388671875, -852.5455932617188, -833.3677978515625, -814.18994140625, -795.0121459960938, -775.8343505859375, -756.656494140625, -737.4786987304688, -718.3009033203125, -699.1231079101562, -679.9453125, -660.7674560546875, -641.5896606445312, -622.411865234375, -603.2340087890625, -584.0562133789062, -564.8784790039062, -545.70068359375, -526.5228271484375, -507.34503173828125, -488.167236328125, -468.98944091796875, -449.8116149902344, -430.6337890625, -411.45599365234375, -392.2781982421875, -373.1003723144531, -353.92254638671875, -334.7447509765625, -315.56695556640625, -296.3891296386719, -277.2113037109375, -258.03350830078125, -238.85569763183594, -219.67788696289062, -200.5000762939453, -181.322265625, -162.1444549560547, -142.96664428710938, -123.78883361816406, -104.61102294921875, -85.43321228027344, -66.25540161132812, -47.07759094238281, -27.8997802734375, -8.721969604492188, 10.455841064453125, 29.633651733398438, 48.81146240234375]}, "gradients/decoder.transformer.h.12.ln_1.bias": {"_type": "histogram", "values": [2.0, 0.0, 2.0, 1.0, 3.0, 1.0, 0.0, 2.0, 6.0, 6.0, 9.0, 7.0, 10.0, 6.0, 8.0, 25.0, 19.0, 18.0, 21.0, 31.0, 23.0, 33.0, 31.0, 49.0, 42.0, 36.0, 41.0, 39.0, 42.0, 34.0, 43.0, 48.0, 47.0, 40.0, 38.0, 36.0, 34.0, 22.0, 34.0, 15.0, 17.0, 18.0, 18.0, 9.0, 12.0, 11.0, 6.0, 5.0, 7.0, 3.0, 4.0, 1.0, 1.0, 3.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0], "bins": [-281.3603210449219, -271.6579284667969, -261.95550537109375, -252.25311279296875, -242.55072021484375, -232.84832763671875, -223.1459197998047, -213.44351196289062, -203.74111938476562, -194.03872680664062, -184.33631896972656, -174.6339111328125, -164.9315185546875, -155.2291259765625, -145.52671813964844, -135.82431030273438, -126.12191772460938, -116.41951751708984, -106.71711730957031, -97.01471710205078, -87.31231689453125, -77.60991668701172, -67.90751647949219, -58.205116271972656, -48.502716064453125, -38.800315856933594, -29.097915649414062, -19.39551544189453, -9.693115234375, 0.00928497314453125, 9.711685180664062, 19.414085388183594, 29.116485595703125, 38.818885803222656, 48.52128601074219, 58.22368621826172, 67.92608642578125, 77.62848663330078, 87.33088684082031, 97.03328704833984, 106.73568725585938, 116.4380874633789, 126.14048767089844, 135.8428955078125, 145.5452880859375, 155.2476806640625, 164.95008850097656, 174.65249633789062, 184.35488891601562, 194.05728149414062, 203.7596893310547, 213.46209716796875, 223.16448974609375, 232.86688232421875, 242.5692901611328, 252.27169799804688, 261.9740905761719, 271.6764831542969, 281.37890625, 291.081298828125, 300.78369140625, 310.486083984375, 320.1884765625, 329.8908996582031, 339.5932922363281]}, "gradients/decoder.transformer.h.11.mlp.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 2.0, 1.0, 1.0, 8.0, 3.0, 8.0, 5.0, 8.0, 8.0, 11.0, 10.0, 21.0, 18.0, 19.0, 29.0, 27.0, 24.0, 39.0, 36.0, 34.0, 49.0, 44.0, 43.0, 48.0, 52.0, 35.0, 40.0, 47.0, 44.0, 38.0, 35.0, 31.0, 17.0, 26.0, 21.0, 23.0, 25.0, 16.0, 10.0, 16.0, 5.0, 12.0, 10.0, 5.0, 3.0, 2.0, 4.0, 1.0, 0.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0], "bins": [-36.375, -35.0986328125, -33.822265625, -32.5458984375, -31.26953125, -29.9931640625, -28.716796875, -27.4404296875, -26.1640625, -24.8876953125, -23.611328125, -22.3349609375, -21.05859375, -19.7822265625, -18.505859375, -17.2294921875, -15.953125, -14.6767578125, -13.400390625, -12.1240234375, -10.84765625, -9.5712890625, -8.294921875, -7.0185546875, -5.7421875, -4.4658203125, -3.189453125, -1.9130859375, -0.63671875, 0.6396484375, 1.916015625, 3.1923828125, 4.46875, 5.7451171875, 7.021484375, 8.2978515625, 9.57421875, 10.8505859375, 12.126953125, 13.4033203125, 14.6796875, 15.9560546875, 17.232421875, 18.5087890625, 19.78515625, 21.0615234375, 22.337890625, 23.6142578125, 24.890625, 26.1669921875, 27.443359375, 28.7197265625, 29.99609375, 31.2724609375, 32.548828125, 33.8251953125, 35.1015625, 36.3779296875, 37.654296875, 38.9306640625, 40.20703125, 41.4833984375, 42.759765625, 44.0361328125, 45.3125]}, "gradients/decoder.transformer.h.11.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 2.0, 5.0, 1.0, 8.0, 11.0, 9.0, 14.0, 16.0, 28.0, 41.0, 68.0, 108.0, 153.0, 177.0, 287.0, 406.0, 612.0, 1000.0, 1714.0, 2937.0, 5573.0, 11900.0, 51021.0, 886964.0, 3031486.0, 165579.0, 17289.0, 7293.0, 3821.0, 2077.0, 1240.0, 793.0, 508.0, 338.0, 231.0, 163.0, 126.0, 76.0, 63.0, 42.0, 29.0, 18.0, 18.0, 16.0, 10.0, 9.0, 5.0, 6.0, 2.0, 1.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0], "bins": [-86.5625, -83.4951171875, -80.427734375, -77.3603515625, -74.29296875, -71.2255859375, -68.158203125, -65.0908203125, -62.0234375, -58.9560546875, -55.888671875, -52.8212890625, -49.75390625, -46.6865234375, -43.619140625, -40.5517578125, -37.484375, -34.4169921875, -31.349609375, -28.2822265625, -25.21484375, -22.1474609375, -19.080078125, -16.0126953125, -12.9453125, -9.8779296875, -6.810546875, -3.7431640625, -0.67578125, 2.3916015625, 5.458984375, 8.5263671875, 11.59375, 14.6611328125, 17.728515625, 20.7958984375, 23.86328125, 26.9306640625, 29.998046875, 33.0654296875, 36.1328125, 39.2001953125, 42.267578125, 45.3349609375, 48.40234375, 51.4697265625, 54.537109375, 57.6044921875, 60.671875, 63.7392578125, 66.806640625, 69.8740234375, 72.94140625, 76.0087890625, 79.076171875, 82.1435546875, 85.2109375, 88.2783203125, 91.345703125, 94.4130859375, 97.48046875, 100.5478515625, 103.615234375, 106.6826171875, 109.75]}, "gradients/decoder.transformer.h.11.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 3.0, 4.0, 0.0, 0.0, 4.0, 1.0, 1.0, 1.0, 3.0, 1.0, 7.0, 4.0, 6.0, 10.0, 18.0, 18.0, 24.0, 26.0, 31.0, 66.0, 141.0, 265.0, 734.0, 1268.0, 775.0, 312.0, 122.0, 64.0, 45.0, 38.0, 18.0, 16.0, 12.0, 14.0, 5.0, 4.0, 3.0, 10.0, 1.0, 0.0, 4.0, 0.0, 2.0, 3.0, 3.0, 1.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-89.4375, -86.41015625, -83.3828125, -80.35546875, -77.328125, -74.30078125, -71.2734375, -68.24609375, -65.21875, -62.19140625, -59.1640625, -56.13671875, -53.109375, -50.08203125, -47.0546875, -44.02734375, -41.0, -37.97265625, -34.9453125, -31.91796875, -28.890625, -25.86328125, -22.8359375, -19.80859375, -16.78125, -13.75390625, -10.7265625, -7.69921875, -4.671875, -1.64453125, 1.3828125, 4.41015625, 7.4375, 10.46484375, 13.4921875, 16.51953125, 19.546875, 22.57421875, 25.6015625, 28.62890625, 31.65625, 34.68359375, 37.7109375, 40.73828125, 43.765625, 46.79296875, 49.8203125, 52.84765625, 55.875, 58.90234375, 61.9296875, 64.95703125, 67.984375, 71.01171875, 74.0390625, 77.06640625, 80.09375, 83.12109375, 86.1484375, 89.17578125, 92.203125, 95.23046875, 98.2578125, 101.28515625, 104.3125]}, "gradients/decoder.transformer.h.11.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 1.0, 3.0, 3.0, 4.0, 3.0, 0.0, 0.0, 3.0, 11.0, 4.0, 5.0, 12.0, 16.0, 17.0, 39.0, 70.0, 194.0, 468.0, 1146.0, 3350.0, 13470.0, 136179.0, 3968166.0, 58203.0, 8903.0, 2440.0, 908.0, 370.0, 137.0, 61.0, 33.0, 23.0, 15.0, 8.0, 5.0, 6.0, 2.0, 2.0, 2.0, 2.0, 1.0, 1.0, 1.0, 1.0, 6.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-347.25, -337.14453125, -327.0390625, -316.93359375, -306.828125, -296.72265625, -286.6171875, -276.51171875, -266.40625, -256.30078125, -246.1953125, -236.08984375, -225.984375, -215.87890625, -205.7734375, -195.66796875, -185.5625, -175.45703125, -165.3515625, -155.24609375, -145.140625, -135.03515625, -124.9296875, -114.82421875, -104.71875, -94.61328125, -84.5078125, -74.40234375, -64.296875, -54.19140625, -44.0859375, -33.98046875, -23.875, -13.76953125, -3.6640625, 6.44140625, 16.546875, 26.65234375, 36.7578125, 46.86328125, 56.96875, 67.07421875, 77.1796875, 87.28515625, 97.390625, 107.49609375, 117.6015625, 127.70703125, 137.8125, 147.91796875, 158.0234375, 168.12890625, 178.234375, 188.33984375, 198.4453125, 208.55078125, 218.65625, 228.76171875, 238.8671875, 248.97265625, 259.078125, 269.18359375, 279.2890625, 289.39453125, 299.5]}, "gradients/decoder.transformer.h.11.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 7.0, 1.0, 11.0, 7.0, 16.0, 24.0, 45.0, 73.0, 176.0, 213.0, 197.0, 97.0, 52.0, 32.0, 21.0, 13.0, 13.0, 5.0, 4.0, 2.0, 1.0, 2.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-411.0187072753906, -394.3304748535156, -377.6422119140625, -360.9539794921875, -344.2657470703125, -327.5775146484375, -310.8892822265625, -294.2010192871094, -277.5127868652344, -260.8245544433594, -244.1363067626953, -227.44805908203125, -210.75982666015625, -194.07159423828125, -177.3833465576172, -160.69509887695312, -144.00686645507812, -127.3186264038086, -110.63038635253906, -93.94214630126953, -77.25390625, -60.56566619873047, -43.87742614746094, -27.189186096191406, -10.500946044921875, 6.187294006347656, 22.875534057617188, 39.56377410888672, 56.25201416015625, 72.94025421142578, 89.62849426269531, 106.31673431396484, 123.0050048828125, 139.6932373046875, 156.38148498535156, 173.06973266601562, 189.75796508789062, 206.44619750976562, 223.1344451904297, 239.82269287109375, 256.51092529296875, 273.19915771484375, 289.88739013671875, 306.5756530761719, 323.2638854980469, 339.9521179199219, 356.640380859375, 373.32861328125, 390.016845703125, 406.705078125, 423.393310546875, 440.0815734863281, 456.7698059082031, 473.4580383300781, 490.14630126953125, 506.83453369140625, 523.5227661132812, 540.2109985351562, 556.8992309570312, 573.5874633789062, 590.2757568359375, 606.9639892578125, 623.6522216796875, 640.3404541015625, 657.0286865234375]}, "gradients/decoder.transformer.h.11.ln_2.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 4.0, 2.0, 2.0, 3.0, 6.0, 9.0, 4.0, 7.0, 12.0, 5.0, 10.0, 11.0, 21.0, 14.0, 20.0, 16.0, 28.0, 37.0, 20.0, 30.0, 34.0, 32.0, 39.0, 37.0, 31.0, 43.0, 43.0, 46.0, 34.0, 37.0, 43.0, 22.0, 31.0, 39.0, 29.0, 25.0, 29.0, 26.0, 28.0, 14.0, 17.0, 14.0, 12.0, 13.0, 11.0, 4.0, 2.0, 6.0, 4.0, 2.0, 4.0, 3.0, 0.0, 1.0, 3.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-195.2861328125, -189.0306396484375, -182.775146484375, -176.51966857910156, -170.26417541503906, -164.00868225097656, -157.75320434570312, -151.49771118164062, -145.24221801757812, -138.98672485351562, -132.73123168945312, -126.47575378417969, -120.22026062011719, -113.96476745605469, -107.70928192138672, -101.45379638671875, -95.19830322265625, -88.94281005859375, -82.68732452392578, -76.43183898925781, -70.17634582519531, -63.92085647583008, -57.665367126464844, -51.40987777709961, -45.154388427734375, -38.89889907836914, -32.643409729003906, -26.387920379638672, -20.132431030273438, -13.876941680908203, -7.621452331542969, -1.3659629821777344, 4.8895263671875, 11.145015716552734, 17.40050506591797, 23.655994415283203, 29.911483764648438, 36.16697311401367, 42.422462463378906, 48.67795181274414, 54.933441162109375, 61.18893051147461, 67.44441986083984, 73.69990539550781, 79.95539855957031, 86.21089172363281, 92.46637725830078, 98.72186279296875, 104.97735595703125, 111.23284912109375, 117.48833465576172, 123.74382019042969, 129.9993133544922, 136.2548065185547, 142.51028442382812, 148.76577758789062, 155.02127075195312, 161.27676391601562, 167.53225708007812, 173.78773498535156, 180.04322814941406, 186.29872131347656, 192.55419921875, 198.8096923828125, 205.065185546875]}, "gradients/decoder.transformer.h.11.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 4.0, 3.0, 4.0, 2.0, 2.0, 9.0, 10.0, 18.0, 11.0, 19.0, 15.0, 29.0, 18.0, 40.0, 37.0, 38.0, 28.0, 43.0, 42.0, 53.0, 48.0, 48.0, 45.0, 42.0, 44.0, 39.0, 38.0, 48.0, 35.0, 28.0, 34.0, 19.0, 25.0, 23.0, 18.0, 14.0, 6.0, 11.0, 8.0, 6.0, 4.0, 2.0, 3.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-38.84375, -37.45068359375, -36.0576171875, -34.66455078125, -33.271484375, -31.87841796875, -30.4853515625, -29.09228515625, -27.69921875, -26.30615234375, -24.9130859375, -23.52001953125, -22.126953125, -20.73388671875, -19.3408203125, -17.94775390625, -16.5546875, -15.16162109375, -13.7685546875, -12.37548828125, -10.982421875, -9.58935546875, -8.1962890625, -6.80322265625, -5.41015625, -4.01708984375, -2.6240234375, -1.23095703125, 0.162109375, 1.55517578125, 2.9482421875, 4.34130859375, 5.734375, 7.12744140625, 8.5205078125, 9.91357421875, 11.306640625, 12.69970703125, 14.0927734375, 15.48583984375, 16.87890625, 18.27197265625, 19.6650390625, 21.05810546875, 22.451171875, 23.84423828125, 25.2373046875, 26.63037109375, 28.0234375, 29.41650390625, 30.8095703125, 32.20263671875, 33.595703125, 34.98876953125, 36.3818359375, 37.77490234375, 39.16796875, 40.56103515625, 41.9541015625, 43.34716796875, 44.740234375, 46.13330078125, 47.5263671875, 48.91943359375, 50.3125]}, "gradients/decoder.transformer.h.11.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 2.0, 2.0, 10.0, 5.0, 15.0, 15.0, 25.0, 41.0, 58.0, 92.0, 135.0, 174.0, 283.0, 448.0, 677.0, 1117.0, 1696.0, 2798.0, 4456.0, 7290.0, 11867.0, 19730.0, 33352.0, 59518.0, 111470.0, 314561.0, 246962.0, 100212.0, 53389.0, 31241.0, 18229.0, 10916.0, 6631.0, 4131.0, 2532.0, 1666.0, 1030.0, 635.0, 418.0, 265.0, 160.0, 106.0, 51.0, 40.0, 34.0, 21.0, 17.0, 12.0, 8.0, 10.0, 3.0, 5.0, 5.0, 2.0, 1.0, 2.0], "bins": [-1.8193359375, -1.7657012939453125, -1.712066650390625, -1.6584320068359375, -1.60479736328125, -1.5511627197265625, -1.497528076171875, -1.4438934326171875, -1.3902587890625, -1.3366241455078125, -1.282989501953125, -1.2293548583984375, -1.17572021484375, -1.1220855712890625, -1.068450927734375, -1.0148162841796875, -0.961181640625, -0.9075469970703125, -0.853912353515625, -0.8002777099609375, -0.74664306640625, -0.6930084228515625, -0.639373779296875, -0.5857391357421875, -0.5321044921875, -0.4784698486328125, -0.424835205078125, -0.3712005615234375, -0.31756591796875, -0.2639312744140625, -0.210296630859375, -0.1566619873046875, -0.10302734375, -0.0493927001953125, 0.004241943359375, 0.0578765869140625, 0.11151123046875, 0.1651458740234375, 0.218780517578125, 0.2724151611328125, 0.3260498046875, 0.3796844482421875, 0.433319091796875, 0.4869537353515625, 0.54058837890625, 0.5942230224609375, 0.647857666015625, 0.7014923095703125, 0.755126953125, 0.8087615966796875, 0.862396240234375, 0.9160308837890625, 0.96966552734375, 1.0233001708984375, 1.076934814453125, 1.1305694580078125, 1.1842041015625, 1.2378387451171875, 1.291473388671875, 1.3451080322265625, 1.39874267578125, 1.4523773193359375, 1.506011962890625, 1.5596466064453125, 1.61328125]}, "gradients/decoder.transformer.h.11.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 2.0, 3.0, 3.0, 3.0, 4.0, 5.0, 5.0, 4.0, 4.0, 11.0, 8.0, 13.0, 14.0, 14.0, 24.0, 23.0, 13.0, 24.0, 21.0, 29.0, 35.0, 41.0, 32.0, 31.0, 44.0, 52.0, 35.0, 1062.0, 30.0, 37.0, 35.0, 33.0, 45.0, 37.0, 38.0, 29.0, 22.0, 20.0, 28.0, 21.0, 21.0, 13.0, 18.0, 15.0, 12.0, 8.0, 4.0, 3.0, 4.0, 0.0, 3.0, 2.0, 3.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-24.921875, -24.114013671875, -23.30615234375, -22.498291015625, -21.6904296875, -20.882568359375, -20.07470703125, -19.266845703125, -18.458984375, -17.651123046875, -16.84326171875, -16.035400390625, -15.2275390625, -14.419677734375, -13.61181640625, -12.803955078125, -11.99609375, -11.188232421875, -10.38037109375, -9.572509765625, -8.7646484375, -7.956787109375, -7.14892578125, -6.341064453125, -5.533203125, -4.725341796875, -3.91748046875, -3.109619140625, -2.3017578125, -1.493896484375, -0.68603515625, 0.121826171875, 0.9296875, 1.737548828125, 2.54541015625, 3.353271484375, 4.1611328125, 4.968994140625, 5.77685546875, 6.584716796875, 7.392578125, 8.200439453125, 9.00830078125, 9.816162109375, 10.6240234375, 11.431884765625, 12.23974609375, 13.047607421875, 13.85546875, 14.663330078125, 15.47119140625, 16.279052734375, 17.0869140625, 17.894775390625, 18.70263671875, 19.510498046875, 20.318359375, 21.126220703125, 21.93408203125, 22.741943359375, 23.5498046875, 24.357666015625, 25.16552734375, 25.973388671875, 26.78125]}, "gradients/decoder.transformer.h.11.crossattention.c_attn.weight": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 1.0, 3.0, 7.0, 12.0, 15.0, 25.0, 30.0, 34.0, 58.0, 86.0, 121.0, 155.0, 236.0, 357.0, 462.0, 710.0, 994.0, 1455.0, 1986.0, 2909.0, 4337.0, 6584.0, 9809.0, 14701.0, 21995.0, 33799.0, 53570.0, 89443.0, 168485.0, 1348478.0, 130062.0, 73576.0, 45521.0, 28933.0, 18802.0, 12589.0, 8514.0, 5805.0, 3803.0, 2731.0, 1809.0, 1282.0, 906.0, 582.0, 411.0, 291.0, 196.0, 129.0, 96.0, 68.0, 57.0, 38.0, 31.0, 17.0, 16.0, 11.0, 6.0, 3.0, 3.0, 2.0, 1.0], "bins": [-1.095703125, -1.061798095703125, -1.02789306640625, -0.993988037109375, -0.9600830078125, -0.926177978515625, -0.89227294921875, -0.858367919921875, -0.824462890625, -0.790557861328125, -0.75665283203125, -0.722747802734375, -0.6888427734375, -0.654937744140625, -0.62103271484375, -0.587127685546875, -0.55322265625, -0.519317626953125, -0.48541259765625, -0.451507568359375, -0.4176025390625, -0.383697509765625, -0.34979248046875, -0.315887451171875, -0.281982421875, -0.248077392578125, -0.21417236328125, -0.180267333984375, -0.1463623046875, -0.112457275390625, -0.07855224609375, -0.044647216796875, -0.0107421875, 0.023162841796875, 0.05706787109375, 0.090972900390625, 0.1248779296875, 0.158782958984375, 0.19268798828125, 0.226593017578125, 0.260498046875, 0.294403076171875, 0.32830810546875, 0.362213134765625, 0.3961181640625, 0.430023193359375, 0.46392822265625, 0.497833251953125, 0.53173828125, 0.565643310546875, 0.59954833984375, 0.633453369140625, 0.6673583984375, 0.701263427734375, 0.73516845703125, 0.769073486328125, 0.802978515625, 0.836883544921875, 0.87078857421875, 0.904693603515625, 0.9385986328125, 0.972503662109375, 1.00640869140625, 1.040313720703125, 1.07421875]}, "gradients/decoder.transformer.h.11.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 4.0, 2.0, 7.0, 5.0, 7.0, 8.0, 13.0, 10.0, 9.0, 8.0, 25.0, 36.0, 21.0, 38.0, 51.0, 77.0, 216.0, 172.0, 72.0, 45.0, 36.0, 26.0, 27.0, 15.0, 14.0, 13.0, 10.0, 16.0, 5.0, 5.0, 6.0, 5.0, 1.0, 1.0, 4.0, 1.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.759166717529297e-05, -6.540119647979736e-05, -6.321072578430176e-05, -6.102025508880615e-05, -5.882978439331055e-05, -5.663931369781494e-05, -5.4448843002319336e-05, -5.225837230682373e-05, -5.0067901611328125e-05, -4.787743091583252e-05, -4.5686960220336914e-05, -4.349648952484131e-05, -4.13060188293457e-05, -3.91155481338501e-05, -3.692507743835449e-05, -3.473460674285889e-05, -3.254413604736328e-05, -3.0353665351867676e-05, -2.816319465637207e-05, -2.5972723960876465e-05, -2.378225326538086e-05, -2.1591782569885254e-05, -1.940131187438965e-05, -1.7210841178894043e-05, -1.5020370483398438e-05, -1.2829899787902832e-05, -1.0639429092407227e-05, -8.448958396911621e-06, -6.258487701416016e-06, -4.06801700592041e-06, -1.8775463104248047e-06, 3.129243850708008e-07, 2.5033950805664062e-06, 4.693865776062012e-06, 6.884336471557617e-06, 9.074807167053223e-06, 1.1265277862548828e-05, 1.3455748558044434e-05, 1.564621925354004e-05, 1.7836689949035645e-05, 2.002716064453125e-05, 2.2217631340026855e-05, 2.440810203552246e-05, 2.6598572731018066e-05, 2.8789043426513672e-05, 3.097951412200928e-05, 3.316998481750488e-05, 3.536045551300049e-05, 3.7550926208496094e-05, 3.97413969039917e-05, 4.1931867599487305e-05, 4.412233829498291e-05, 4.6312808990478516e-05, 4.850327968597412e-05, 5.0693750381469727e-05, 5.288422107696533e-05, 5.507469177246094e-05, 5.726516246795654e-05, 5.945563316345215e-05, 6.164610385894775e-05, 6.383657455444336e-05, 6.602704524993896e-05, 6.821751594543457e-05, 7.040798664093018e-05, 7.259845733642578e-05]}, "gradients/decoder.transformer.h.11.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 1.0, 3.0, 1.0, 3.0, 3.0, 8.0, 8.0, 10.0, 18.0, 13.0, 24.0, 15.0, 51.0, 60.0, 90.0, 198.0, 1090.0, 20304.0, 986601.0, 37998.0, 1462.0, 288.0, 93.0, 61.0, 45.0, 20.0, 26.0, 12.0, 16.0, 7.0, 6.0, 6.0, 6.0, 7.0, 8.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0013179779052734375, -0.0012798607349395752, -0.0012417435646057129, -0.0012036263942718506, -0.0011655092239379883, -0.001127392053604126, -0.0010892748832702637, -0.0010511577129364014, -0.001013040542602539, -0.0009749233722686768, -0.0009368062019348145, -0.0008986890316009521, -0.0008605718612670898, -0.0008224546909332275, -0.0007843375205993652, -0.0007462203502655029, -0.0007081031799316406, -0.0006699860095977783, -0.000631868839263916, -0.0005937516689300537, -0.0005556344985961914, -0.0005175173282623291, -0.0004794001579284668, -0.0004412829875946045, -0.0004031658172607422, -0.0003650486469268799, -0.0003269314765930176, -0.0002888143062591553, -0.00025069713592529297, -0.00021257996559143066, -0.00017446279525756836, -0.00013634562492370605, -9.822845458984375e-05, -6.0111284255981445e-05, -2.199411392211914e-05, 1.6123056411743164e-05, 5.424022674560547e-05, 9.235739707946777e-05, 0.00013047456741333008, 0.00016859173774719238, 0.0002067089080810547, 0.000244826078414917, 0.0002829432487487793, 0.0003210604190826416, 0.0003591775894165039, 0.0003972947597503662, 0.0004354119300842285, 0.0004735291004180908, 0.0005116462707519531, 0.0005497634410858154, 0.0005878806114196777, 0.00062599778175354, 0.0006641149520874023, 0.0007022321224212646, 0.000740349292755127, 0.0007784664630889893, 0.0008165836334228516, 0.0008547008037567139, 0.0008928179740905762, 0.0009309351444244385, 0.0009690523147583008, 0.001007169485092163, 0.0010452866554260254, 0.0010834038257598877, 0.00112152099609375]}, "gradients/decoder.transformer.h.11.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 3.0, 4.0, 1.0, 5.0, 4.0, 8.0, 10.0, 26.0, 45.0, 48.0, 76.0, 161.0, 250.0, 146.0, 92.0, 55.0, 32.0, 18.0, 12.0, 5.0, 11.0, 2.0, 3.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-9.208517440129071e-05, -9.006063919514418e-05, -8.803609671304002e-05, -8.601155423093587e-05, -8.398701902478933e-05, -8.19624838186428e-05, -7.993794133653864e-05, -7.791339885443449e-05, -7.588886364828795e-05, -7.386432844214141e-05, -7.183978596003726e-05, -6.981524347793311e-05, -6.779070827178657e-05, -6.576617306564003e-05, -6.374163058353588e-05, -6.171708810143173e-05, -5.969255289528519e-05, -5.7668014051159844e-05, -5.56434752070345e-05, -5.361893636290915e-05, -5.159439751878381e-05, -4.956985867465846e-05, -4.754531983053312e-05, -4.552078098640777e-05, -4.3496242142282426e-05, -4.147170329815708e-05, -3.9447164454031736e-05, -3.742262560990639e-05, -3.5398086765781045e-05, -3.33735479216557e-05, -3.1349009077530354e-05, -2.932447023340501e-05, -2.7299927751300856e-05, -2.527538890717551e-05, -2.3250850063050166e-05, -2.122631121892482e-05, -1.9201772374799475e-05, -1.717723353067413e-05, -1.5152694686548784e-05, -1.3128155842423439e-05, -1.1103616998298094e-05, -9.079078154172748e-06, -7.054539310047403e-06, -5.030000465922058e-06, -3.0054616217967123e-06, -9.80922777671367e-07, 1.0436160664539784e-06, 3.0681549105793238e-06, 5.092693754704669e-06, 7.1172325988300145e-06, 9.14177144295536e-06, 1.1166310287080705e-05, 1.319084913120605e-05, 1.5215387975331396e-05, 1.723992681945674e-05, 1.9264465663582087e-05, 2.1289004507707432e-05, 2.3313543351832777e-05, 2.5338082195958123e-05, 2.7362621040083468e-05, 2.9387159884208813e-05, 3.141169872833416e-05, 3.3436237572459504e-05, 3.546077641658485e-05, 3.7485315260710195e-05]}, "gradients/decoder.transformer.h.11.ln_cross_attn.bias": {"_type": "histogram", "values": [4.0, 0.0, 1.0, 1.0, 1.0, 1.0, 3.0, 2.0, 5.0, 4.0, 7.0, 6.0, 7.0, 9.0, 15.0, 8.0, 12.0, 19.0, 8.0, 26.0, 31.0, 23.0, 26.0, 39.0, 27.0, 38.0, 42.0, 28.0, 29.0, 43.0, 38.0, 49.0, 33.0, 36.0, 29.0, 45.0, 27.0, 50.0, 32.0, 18.0, 34.0, 15.0, 23.0, 23.0, 22.0, 14.0, 9.0, 11.0, 14.0, 1.0, 8.0, 6.0, 4.0, 4.0, 2.0, 1.0, 2.0, 2.0, 2.0, 0.0, 4.0, 0.0, 0.0, 1.0], "bins": [-2.485513687133789e-05, -2.406258136034012e-05, -2.3270025849342346e-05, -2.2477470338344574e-05, -2.1684914827346802e-05, -2.089235931634903e-05, -2.0099803805351257e-05, -1.9307248294353485e-05, -1.8514692783355713e-05, -1.772213727235794e-05, -1.692958176136017e-05, -1.6137026250362396e-05, -1.5344470739364624e-05, -1.4551915228366852e-05, -1.375935971736908e-05, -1.2966804206371307e-05, -1.2174248695373535e-05, -1.1381693184375763e-05, -1.058913767337799e-05, -9.796582162380219e-06, -9.004026651382446e-06, -8.211471140384674e-06, -7.418915629386902e-06, -6.62636011838913e-06, -5.833804607391357e-06, -5.041249096393585e-06, -4.248693585395813e-06, -3.4561380743980408e-06, -2.6635825634002686e-06, -1.8710270524024963e-06, -1.0784715414047241e-06, -2.859160304069519e-07, 5.066394805908203e-07, 1.2991949915885925e-06, 2.0917505025863647e-06, 2.884306013584137e-06, 3.676861524581909e-06, 4.469417035579681e-06, 5.261972546577454e-06, 6.054528057575226e-06, 6.847083568572998e-06, 7.63963907957077e-06, 8.432194590568542e-06, 9.224750101566315e-06, 1.0017305612564087e-05, 1.0809861123561859e-05, 1.1602416634559631e-05, 1.2394972145557404e-05, 1.3187527656555176e-05, 1.3980083167552948e-05, 1.477263867855072e-05, 1.5565194189548492e-05, 1.6357749700546265e-05, 1.7150305211544037e-05, 1.794286072254181e-05, 1.873541623353958e-05, 1.9527971744537354e-05, 2.0320527255535126e-05, 2.1113082766532898e-05, 2.190563827753067e-05, 2.2698193788528442e-05, 2.3490749299526215e-05, 2.4283304810523987e-05, 2.507586032152176e-05, 2.586841583251953e-05]}, "gradients/decoder.transformer.h.11.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 4.0, 3.0, 4.0, 2.0, 2.0, 9.0, 10.0, 18.0, 11.0, 19.0, 15.0, 29.0, 18.0, 40.0, 37.0, 38.0, 28.0, 43.0, 42.0, 53.0, 48.0, 48.0, 45.0, 42.0, 44.0, 39.0, 38.0, 48.0, 35.0, 28.0, 34.0, 19.0, 25.0, 23.0, 18.0, 14.0, 6.0, 11.0, 8.0, 6.0, 4.0, 2.0, 3.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-38.84375, -37.45068359375, -36.0576171875, -34.66455078125, -33.271484375, -31.87841796875, -30.4853515625, -29.09228515625, -27.69921875, -26.30615234375, -24.9130859375, -23.52001953125, -22.126953125, -20.73388671875, -19.3408203125, -17.94775390625, -16.5546875, -15.16162109375, -13.7685546875, -12.37548828125, -10.982421875, -9.58935546875, -8.1962890625, -6.80322265625, -5.41015625, -4.01708984375, -2.6240234375, -1.23095703125, 0.162109375, 1.55517578125, 2.9482421875, 4.34130859375, 5.734375, 7.12744140625, 8.5205078125, 9.91357421875, 11.306640625, 12.69970703125, 14.0927734375, 15.48583984375, 16.87890625, 18.27197265625, 19.6650390625, 21.05810546875, 22.451171875, 23.84423828125, 25.2373046875, 26.63037109375, 28.0234375, 29.41650390625, 30.8095703125, 32.20263671875, 33.595703125, 34.98876953125, 36.3818359375, 37.77490234375, 39.16796875, 40.56103515625, 41.9541015625, 43.34716796875, 44.740234375, 46.13330078125, 47.5263671875, 48.91943359375, 50.3125]}, "gradients/decoder.transformer.h.11.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 5.0, 3.0, 3.0, 3.0, 2.0, 13.0, 16.0, 16.0, 19.0, 31.0, 36.0, 76.0, 120.0, 185.0, 325.0, 554.0, 1190.0, 2268.0, 4898.0, 12892.0, 39410.0, 142655.0, 558944.0, 204445.0, 52006.0, 16658.0, 6154.0, 2705.0, 1273.0, 712.0, 353.0, 215.0, 130.0, 73.0, 55.0, 41.0, 30.0, 16.0, 9.0, 9.0, 7.0, 4.0, 2.0, 3.0, 1.0, 1.0, 1.0, 3.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-20.140625, -19.411376953125, -18.68212890625, -17.952880859375, -17.2236328125, -16.494384765625, -15.76513671875, -15.035888671875, -14.306640625, -13.577392578125, -12.84814453125, -12.118896484375, -11.3896484375, -10.660400390625, -9.93115234375, -9.201904296875, -8.47265625, -7.743408203125, -7.01416015625, -6.284912109375, -5.5556640625, -4.826416015625, -4.09716796875, -3.367919921875, -2.638671875, -1.909423828125, -1.18017578125, -0.450927734375, 0.2783203125, 1.007568359375, 1.73681640625, 2.466064453125, 3.1953125, 3.924560546875, 4.65380859375, 5.383056640625, 6.1123046875, 6.841552734375, 7.57080078125, 8.300048828125, 9.029296875, 9.758544921875, 10.48779296875, 11.217041015625, 11.9462890625, 12.675537109375, 13.40478515625, 14.134033203125, 14.86328125, 15.592529296875, 16.32177734375, 17.051025390625, 17.7802734375, 18.509521484375, 19.23876953125, 19.968017578125, 20.697265625, 21.426513671875, 22.15576171875, 22.885009765625, 23.6142578125, 24.343505859375, 25.07275390625, 25.802001953125, 26.53125]}, "gradients/decoder.transformer.h.11.attn.c_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 2.0, 2.0, 2.0, 4.0, 7.0, 4.0, 7.0, 10.0, 9.0, 15.0, 5.0, 6.0, 21.0, 10.0, 23.0, 33.0, 31.0, 29.0, 32.0, 31.0, 39.0, 49.0, 37.0, 43.0, 132.0, 1929.0, 92.0, 56.0, 41.0, 39.0, 40.0, 38.0, 29.0, 23.0, 35.0, 23.0, 22.0, 18.0, 18.0, 12.0, 16.0, 11.0, 3.0, 8.0, 6.0, 6.0, 3.0, 2.0, 3.0, 4.0, 1.0, 1.0, 0.0, 2.0, 0.0, 1.0], "bins": [-123.25, -119.560546875, -115.87109375, -112.181640625, -108.4921875, -104.802734375, -101.11328125, -97.423828125, -93.734375, -90.044921875, -86.35546875, -82.666015625, -78.9765625, -75.287109375, -71.59765625, -67.908203125, -64.21875, -60.529296875, -56.83984375, -53.150390625, -49.4609375, -45.771484375, -42.08203125, -38.392578125, -34.703125, -31.013671875, -27.32421875, -23.634765625, -19.9453125, -16.255859375, -12.56640625, -8.876953125, -5.1875, -1.498046875, 2.19140625, 5.880859375, 9.5703125, 13.259765625, 16.94921875, 20.638671875, 24.328125, 28.017578125, 31.70703125, 35.396484375, 39.0859375, 42.775390625, 46.46484375, 50.154296875, 53.84375, 57.533203125, 61.22265625, 64.912109375, 68.6015625, 72.291015625, 75.98046875, 79.669921875, 83.359375, 87.048828125, 90.73828125, 94.427734375, 98.1171875, 101.806640625, 105.49609375, 109.185546875, 112.875]}, "gradients/decoder.transformer.h.11.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 4.0, 4.0, 2.0, 2.0, 6.0, 6.0, 5.0, 7.0, 10.0, 15.0, 20.0, 25.0, 22.0, 37.0, 38.0, 60.0, 62.0, 74.0, 113.0, 136.0, 172.0, 253.0, 496.0, 2205.0, 3085687.0, 53840.0, 1074.0, 353.0, 232.0, 176.0, 119.0, 66.0, 77.0, 66.0, 62.0, 46.0, 31.0, 19.0, 16.0, 12.0, 16.0, 11.0, 9.0, 9.0, 4.0, 6.0, 4.0, 4.0, 2.0, 1.0, 1.0, 4.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-272.25, -263.41796875, -254.5859375, -245.75390625, -236.921875, -228.08984375, -219.2578125, -210.42578125, -201.59375, -192.76171875, -183.9296875, -175.09765625, -166.265625, -157.43359375, -148.6015625, -139.76953125, -130.9375, -122.10546875, -113.2734375, -104.44140625, -95.609375, -86.77734375, -77.9453125, -69.11328125, -60.28125, -51.44921875, -42.6171875, -33.78515625, -24.953125, -16.12109375, -7.2890625, 1.54296875, 10.375, 19.20703125, 28.0390625, 36.87109375, 45.703125, 54.53515625, 63.3671875, 72.19921875, 81.03125, 89.86328125, 98.6953125, 107.52734375, 116.359375, 125.19140625, 134.0234375, 142.85546875, 151.6875, 160.51953125, 169.3515625, 178.18359375, 187.015625, 195.84765625, 204.6796875, 213.51171875, 222.34375, 231.17578125, 240.0078125, 248.83984375, 257.671875, 266.50390625, 275.3359375, 284.16796875, 293.0]}, "gradients/decoder.transformer.h.11.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 4.0, 171.0, 755.0, 90.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-88.08861541748047, -74.3661880493164, -60.64376449584961, -46.92134094238281, -33.19891357421875, -19.476486206054688, -5.754066467285156, 7.968360900878906, 21.69078826904297, 35.41321563720703, 49.13563919067383, 62.858062744140625, 76.58049011230469, 90.30291748046875, 104.02533721923828, 117.74776458740234, 131.47018432617188, 145.19261169433594, 158.9150390625, 172.637451171875, 186.35989379882812, 200.08230590820312, 213.8047332763672, 227.52716064453125, 241.24960327148438, 254.97203063964844, 268.6944580078125, 282.4168701171875, 296.1393127441406, 309.8617248535156, 323.58416748046875, 337.30657958984375, 351.02899169921875, 364.75140380859375, 378.4738464355469, 392.1962585449219, 405.918701171875, 419.64111328125, 433.363525390625, 447.0859680175781, 460.80841064453125, 474.53082275390625, 488.2532653808594, 501.9756774902344, 515.6981201171875, 529.4205322265625, 543.1429443359375, 556.8653564453125, 570.5877685546875, 584.3101806640625, 598.0325927734375, 611.7550659179688, 625.4774780273438, 639.1998901367188, 652.9223022460938, 666.644775390625, 680.3671875, 694.089599609375, 707.81201171875, 721.5344848632812, 735.2568969726562, 748.9793090820312, 762.7017211914062, 776.4241943359375, 790.1466064453125]}, "gradients/decoder.transformer.h.11.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 0.0, 2.0, 0.0, 1.0, 2.0, 2.0, 2.0, 2.0, 8.0, 7.0, 3.0, 6.0, 10.0, 10.0, 10.0, 6.0, 16.0, 21.0, 24.0, 21.0, 23.0, 25.0, 38.0, 33.0, 32.0, 40.0, 43.0, 43.0, 32.0, 42.0, 49.0, 44.0, 49.0, 46.0, 40.0, 42.0, 29.0, 35.0, 35.0, 21.0, 25.0, 21.0, 8.0, 12.0, 10.0, 7.0, 11.0, 11.0, 6.0, 4.0, 1.0, 3.0, 3.0, 1.0, 0.0, 0.0, 3.0, 0.0, 0.0, 1.0], "bins": [-336.6199645996094, -326.78619384765625, -316.95245361328125, -307.1186828613281, -297.284912109375, -287.451171875, -277.6174011230469, -267.78363037109375, -257.94989013671875, -248.1161346435547, -238.28237915039062, -228.4486083984375, -218.61485290527344, -208.78109741210938, -198.94732666015625, -189.1135711669922, -179.27981567382812, -169.44606018066406, -159.6123046875, -149.77853393554688, -139.9447784423828, -130.11102294921875, -120.27725982666016, -110.44349670410156, -100.6097412109375, -90.77598571777344, -80.94222259521484, -71.10845947265625, -61.27470397949219, -51.44094467163086, -41.60718536376953, -31.773422241210938, -21.939697265625, -12.105937957763672, -2.2721786499023438, 7.561580657958984, 17.395339965820312, 27.22909927368164, 37.06285858154297, 46.89662170410156, 56.730377197265625, 66.56413269042969, 76.39789581298828, 86.23165893554688, 96.06541442871094, 105.899169921875, 115.7329330444336, 125.56669616699219, 135.40045166015625, 145.2342071533203, 155.06796264648438, 164.9017333984375, 174.73548889160156, 184.56924438476562, 194.40301513671875, 204.2367706298828, 214.07052612304688, 223.90428161621094, 233.738037109375, 243.57180786132812, 253.4055633544922, 263.23931884765625, 273.0730895996094, 282.9068603515625, 292.7406005859375]}, "gradients/decoder.transformer.h.10.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 3.0, 1.0, 2.0, 6.0, 2.0, 2.0, 12.0, 9.0, 15.0, 12.0, 17.0, 19.0, 26.0, 32.0, 34.0, 38.0, 35.0, 30.0, 46.0, 40.0, 45.0, 62.0, 44.0, 44.0, 44.0, 41.0, 39.0, 44.0, 43.0, 36.0, 28.0, 28.0, 22.0, 22.0, 30.0, 13.0, 13.0, 8.0, 8.0, 6.0, 6.0, 4.0, 3.0, 2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-39.34375, -37.92138671875, -36.4990234375, -35.07666015625, -33.654296875, -32.23193359375, -30.8095703125, -29.38720703125, -27.96484375, -26.54248046875, -25.1201171875, -23.69775390625, -22.275390625, -20.85302734375, -19.4306640625, -18.00830078125, -16.5859375, -15.16357421875, -13.7412109375, -12.31884765625, -10.896484375, -9.47412109375, -8.0517578125, -6.62939453125, -5.20703125, -3.78466796875, -2.3623046875, -0.93994140625, 0.482421875, 1.90478515625, 3.3271484375, 4.74951171875, 6.171875, 7.59423828125, 9.0166015625, 10.43896484375, 11.861328125, 13.28369140625, 14.7060546875, 16.12841796875, 17.55078125, 18.97314453125, 20.3955078125, 21.81787109375, 23.240234375, 24.66259765625, 26.0849609375, 27.50732421875, 28.9296875, 30.35205078125, 31.7744140625, 33.19677734375, 34.619140625, 36.04150390625, 37.4638671875, 38.88623046875, 40.30859375, 41.73095703125, 43.1533203125, 44.57568359375, 45.998046875, 47.42041015625, 48.8427734375, 50.26513671875, 51.6875]}, "gradients/decoder.transformer.h.10.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 0.0, 2.0, 6.0, 6.0, 19.0, 16.0, 29.0, 28.0, 38.0, 67.0, 78.0, 110.0, 175.0, 219.0, 346.0, 450.0, 690.0, 1039.0, 1605.0, 2485.0, 4243.0, 7760.0, 18114.0, 105513.0, 2385307.0, 1556755.0, 76363.0, 14764.0, 7132.0, 3921.0, 2317.0, 1498.0, 953.0, 664.0, 413.0, 351.0, 228.0, 158.0, 113.0, 79.0, 65.0, 50.0, 42.0, 18.0, 17.0, 16.0, 7.0, 8.0, 3.0, 5.0, 4.0, 0.0, 4.0, 3.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0], "bins": [-86.375, -83.283203125, -80.19140625, -77.099609375, -74.0078125, -70.916015625, -67.82421875, -64.732421875, -61.640625, -58.548828125, -55.45703125, -52.365234375, -49.2734375, -46.181640625, -43.08984375, -39.998046875, -36.90625, -33.814453125, -30.72265625, -27.630859375, -24.5390625, -21.447265625, -18.35546875, -15.263671875, -12.171875, -9.080078125, -5.98828125, -2.896484375, 0.1953125, 3.287109375, 6.37890625, 9.470703125, 12.5625, 15.654296875, 18.74609375, 21.837890625, 24.9296875, 28.021484375, 31.11328125, 34.205078125, 37.296875, 40.388671875, 43.48046875, 46.572265625, 49.6640625, 52.755859375, 55.84765625, 58.939453125, 62.03125, 65.123046875, 68.21484375, 71.306640625, 74.3984375, 77.490234375, 80.58203125, 83.673828125, 86.765625, 89.857421875, 92.94921875, 96.041015625, 99.1328125, 102.224609375, 105.31640625, 108.408203125, 111.5]}, "gradients/decoder.transformer.h.10.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 2.0, 1.0, 2.0, 1.0, 1.0, 3.0, 3.0, 2.0, 6.0, 4.0, 11.0, 7.0, 21.0, 29.0, 35.0, 44.0, 56.0, 122.0, 282.0, 747.0, 1340.0, 737.0, 267.0, 118.0, 71.0, 33.0, 37.0, 24.0, 24.0, 10.0, 9.0, 8.0, 8.0, 4.0, 5.0, 7.0, 0.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-93.6875, -90.5087890625, -87.330078125, -84.1513671875, -80.97265625, -77.7939453125, -74.615234375, -71.4365234375, -68.2578125, -65.0791015625, -61.900390625, -58.7216796875, -55.54296875, -52.3642578125, -49.185546875, -46.0068359375, -42.828125, -39.6494140625, -36.470703125, -33.2919921875, -30.11328125, -26.9345703125, -23.755859375, -20.5771484375, -17.3984375, -14.2197265625, -11.041015625, -7.8623046875, -4.68359375, -1.5048828125, 1.673828125, 4.8525390625, 8.03125, 11.2099609375, 14.388671875, 17.5673828125, 20.74609375, 23.9248046875, 27.103515625, 30.2822265625, 33.4609375, 36.6396484375, 39.818359375, 42.9970703125, 46.17578125, 49.3544921875, 52.533203125, 55.7119140625, 58.890625, 62.0693359375, 65.248046875, 68.4267578125, 71.60546875, 74.7841796875, 77.962890625, 81.1416015625, 84.3203125, 87.4990234375, 90.677734375, 93.8564453125, 97.03515625, 100.2138671875, 103.392578125, 106.5712890625, 109.75]}, "gradients/decoder.transformer.h.10.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 2.0, 1.0, 1.0, 3.0, 6.0, 4.0, 8.0, 12.0, 17.0, 24.0, 23.0, 68.0, 147.0, 324.0, 781.0, 2211.0, 8742.0, 70267.0, 3993676.0, 103162.0, 10700.0, 2608.0, 850.0, 328.0, 140.0, 87.0, 41.0, 17.0, 13.0, 8.0, 5.0, 4.0, 2.0, 2.0, 4.0, 1.0, 0.0, 1.0, 0.0, 1.0, 2.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-394.25, -383.1640625, -372.078125, -360.9921875, -349.90625, -338.8203125, -327.734375, -316.6484375, -305.5625, -294.4765625, -283.390625, -272.3046875, -261.21875, -250.1328125, -239.046875, -227.9609375, -216.875, -205.7890625, -194.703125, -183.6171875, -172.53125, -161.4453125, -150.359375, -139.2734375, -128.1875, -117.1015625, -106.015625, -94.9296875, -83.84375, -72.7578125, -61.671875, -50.5859375, -39.5, -28.4140625, -17.328125, -6.2421875, 4.84375, 15.9296875, 27.015625, 38.1015625, 49.1875, 60.2734375, 71.359375, 82.4453125, 93.53125, 104.6171875, 115.703125, 126.7890625, 137.875, 148.9609375, 160.046875, 171.1328125, 182.21875, 193.3046875, 204.390625, 215.4765625, 226.5625, 237.6484375, 248.734375, 259.8203125, 270.90625, 281.9921875, 293.078125, 304.1640625, 315.25]}, "gradients/decoder.transformer.h.10.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 3.0, 4.0, 5.0, 6.0, 15.0, 26.0, 34.0, 67.0, 99.0, 160.0, 204.0, 154.0, 92.0, 52.0, 30.0, 24.0, 13.0, 9.0, 5.0, 1.0, 7.0, 4.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-410.4713439941406, -394.8402404785156, -379.2091369628906, -363.5780334472656, -347.9469299316406, -332.3158264160156, -316.6847229003906, -301.0536193847656, -285.4225158691406, -269.7914123535156, -254.16030883789062, -238.52920532226562, -222.89810180664062, -207.26699829101562, -191.63589477539062, -176.00479125976562, -160.37368774414062, -144.74258422851562, -129.11148071289062, -113.48037719726562, -97.84927368164062, -82.21817016601562, -66.58706665039062, -50.955963134765625, -35.324859619140625, -19.693756103515625, -4.062652587890625, 11.568450927734375, 27.199554443359375, 42.830657958984375, 58.461761474609375, 74.09286499023438, 89.7239990234375, 105.3551025390625, 120.9862060546875, 136.6173095703125, 152.2484130859375, 167.8795166015625, 183.5106201171875, 199.1417236328125, 214.7728271484375, 230.4039306640625, 246.0350341796875, 261.6661376953125, 277.2972412109375, 292.9283447265625, 308.5594482421875, 324.1905517578125, 339.8216552734375, 355.4527587890625, 371.0838623046875, 386.7149658203125, 402.3460693359375, 417.9771728515625, 433.6082763671875, 449.2393798828125, 464.8704833984375, 480.5015869140625, 496.1326904296875, 511.7637939453125, 527.3948974609375, 543.0260009765625, 558.6571044921875, 574.2882080078125, 589.9193115234375]}, "gradients/decoder.transformer.h.10.ln_2.bias": {"_type": "histogram", "values": [3.0, 1.0, 0.0, 1.0, 5.0, 1.0, 3.0, 5.0, 4.0, 8.0, 7.0, 14.0, 14.0, 9.0, 8.0, 19.0, 24.0, 13.0, 16.0, 20.0, 25.0, 37.0, 26.0, 34.0, 28.0, 37.0, 32.0, 40.0, 39.0, 37.0, 39.0, 35.0, 28.0, 21.0, 35.0, 29.0, 37.0, 25.0, 29.0, 27.0, 22.0, 27.0, 19.0, 26.0, 20.0, 12.0, 10.0, 18.0, 10.0, 6.0, 11.0, 6.0, 4.0, 4.0, 4.0, 2.0, 2.0, 1.0, 1.0, 3.0, 0.0, 0.0, 0.0, 1.0], "bins": [-178.1181640625, -172.29171752929688, -166.4652557373047, -160.63880920410156, -154.81236267089844, -148.98590087890625, -143.15945434570312, -137.3330078125, -131.5065460205078, -125.68009185791016, -119.85364532470703, -114.02719116210938, -108.20073699951172, -102.37428283691406, -96.54783630371094, -90.72138214111328, -84.89493560791016, -79.0684814453125, -73.24203491210938, -67.41558074951172, -61.58912658691406, -55.76267623901367, -49.93622589111328, -44.109771728515625, -38.283321380615234, -32.456871032714844, -26.630416870117188, -20.803966522216797, -14.977514266967773, -9.15106201171875, -3.3246116638183594, 2.501842498779297, 8.328292846679688, 14.154745101928711, 19.981197357177734, 25.807647705078125, 31.63409996032715, 37.46055221557617, 43.28700256347656, 49.11345672607422, 54.93990707397461, 60.766357421875, 66.59281158447266, 72.41926574707031, 78.24571228027344, 84.0721664428711, 89.89862060546875, 95.72506713867188, 101.55152130126953, 107.37797546386719, 113.20442199707031, 119.03087615966797, 124.85733032226562, 130.68377685546875, 136.51022338867188, 142.33668518066406, 148.1631317138672, 153.9895782470703, 159.8160400390625, 165.64248657226562, 171.46893310546875, 177.29539489746094, 183.12184143066406, 188.94830322265625, 194.77474975585938]}, "gradients/decoder.transformer.h.10.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 2.0, 4.0, 2.0, 7.0, 10.0, 7.0, 8.0, 6.0, 16.0, 18.0, 27.0, 21.0, 27.0, 24.0, 36.0, 29.0, 40.0, 43.0, 40.0, 51.0, 46.0, 48.0, 39.0, 46.0, 58.0, 41.0, 33.0, 42.0, 39.0, 26.0, 24.0, 20.0, 29.0, 20.0, 14.0, 20.0, 14.0, 9.0, 7.0, 10.0, 5.0, 2.0, 5.0, 0.0, 0.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-38.625, -37.19775390625, -35.7705078125, -34.34326171875, -32.916015625, -31.48876953125, -30.0615234375, -28.63427734375, -27.20703125, -25.77978515625, -24.3525390625, -22.92529296875, -21.498046875, -20.07080078125, -18.6435546875, -17.21630859375, -15.7890625, -14.36181640625, -12.9345703125, -11.50732421875, -10.080078125, -8.65283203125, -7.2255859375, -5.79833984375, -4.37109375, -2.94384765625, -1.5166015625, -0.08935546875, 1.337890625, 2.76513671875, 4.1923828125, 5.61962890625, 7.046875, 8.47412109375, 9.9013671875, 11.32861328125, 12.755859375, 14.18310546875, 15.6103515625, 17.03759765625, 18.46484375, 19.89208984375, 21.3193359375, 22.74658203125, 24.173828125, 25.60107421875, 27.0283203125, 28.45556640625, 29.8828125, 31.31005859375, 32.7373046875, 34.16455078125, 35.591796875, 37.01904296875, 38.4462890625, 39.87353515625, 41.30078125, 42.72802734375, 44.1552734375, 45.58251953125, 47.009765625, 48.43701171875, 49.8642578125, 51.29150390625, 52.71875]}, "gradients/decoder.transformer.h.10.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 2.0, 1.0, 2.0, 4.0, 5.0, 6.0, 5.0, 8.0, 15.0, 19.0, 23.0, 36.0, 49.0, 78.0, 101.0, 187.0, 253.0, 363.0, 608.0, 929.0, 1567.0, 2356.0, 3827.0, 6118.0, 9848.0, 16081.0, 26410.0, 42920.0, 75832.0, 154373.0, 355772.0, 159111.0, 77350.0, 44301.0, 26421.0, 16633.0, 10220.0, 6292.0, 3812.0, 2355.0, 1535.0, 973.0, 643.0, 379.0, 276.0, 190.0, 101.0, 59.0, 47.0, 18.0, 17.0, 16.0, 9.0, 3.0, 2.0, 2.0, 3.0, 0.0, 4.0, 1.0, 1.0, 0.0, 3.0], "bins": [-1.6943359375, -1.6405792236328125, -1.586822509765625, -1.5330657958984375, -1.47930908203125, -1.4255523681640625, -1.371795654296875, -1.3180389404296875, -1.2642822265625, -1.2105255126953125, -1.156768798828125, -1.1030120849609375, -1.04925537109375, -0.9954986572265625, -0.941741943359375, -0.8879852294921875, -0.834228515625, -0.7804718017578125, -0.726715087890625, -0.6729583740234375, -0.61920166015625, -0.5654449462890625, -0.511688232421875, -0.4579315185546875, -0.4041748046875, -0.3504180908203125, -0.296661376953125, -0.2429046630859375, -0.18914794921875, -0.1353912353515625, -0.081634521484375, -0.0278778076171875, 0.02587890625, 0.0796356201171875, 0.133392333984375, 0.1871490478515625, 0.24090576171875, 0.2946624755859375, 0.348419189453125, 0.4021759033203125, 0.4559326171875, 0.5096893310546875, 0.563446044921875, 0.6172027587890625, 0.67095947265625, 0.7247161865234375, 0.778472900390625, 0.8322296142578125, 0.885986328125, 0.9397430419921875, 0.993499755859375, 1.0472564697265625, 1.10101318359375, 1.1547698974609375, 1.208526611328125, 1.2622833251953125, 1.3160400390625, 1.3697967529296875, 1.423553466796875, 1.4773101806640625, 1.53106689453125, 1.5848236083984375, 1.638580322265625, 1.6923370361328125, 1.74609375]}, "gradients/decoder.transformer.h.10.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 4.0, 1.0, 5.0, 4.0, 4.0, 5.0, 6.0, 8.0, 14.0, 20.0, 9.0, 18.0, 18.0, 25.0, 34.0, 25.0, 26.0, 37.0, 31.0, 45.0, 42.0, 43.0, 43.0, 43.0, 1066.0, 49.0, 30.0, 43.0, 34.0, 47.0, 25.0, 33.0, 26.0, 31.0, 26.0, 22.0, 15.0, 17.0, 14.0, 10.0, 3.0, 13.0, 6.0, 1.0, 4.0, 2.0, 5.0, 5.0, 0.0, 5.0, 0.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-23.8125, -22.92431640625, -22.0361328125, -21.14794921875, -20.259765625, -19.37158203125, -18.4833984375, -17.59521484375, -16.70703125, -15.81884765625, -14.9306640625, -14.04248046875, -13.154296875, -12.26611328125, -11.3779296875, -10.48974609375, -9.6015625, -8.71337890625, -7.8251953125, -6.93701171875, -6.048828125, -5.16064453125, -4.2724609375, -3.38427734375, -2.49609375, -1.60791015625, -0.7197265625, 0.16845703125, 1.056640625, 1.94482421875, 2.8330078125, 3.72119140625, 4.609375, 5.49755859375, 6.3857421875, 7.27392578125, 8.162109375, 9.05029296875, 9.9384765625, 10.82666015625, 11.71484375, 12.60302734375, 13.4912109375, 14.37939453125, 15.267578125, 16.15576171875, 17.0439453125, 17.93212890625, 18.8203125, 19.70849609375, 20.5966796875, 21.48486328125, 22.373046875, 23.26123046875, 24.1494140625, 25.03759765625, 25.92578125, 26.81396484375, 27.7021484375, 28.59033203125, 29.478515625, 30.36669921875, 31.2548828125, 32.14306640625, 33.03125]}, "gradients/decoder.transformer.h.10.crossattention.c_attn.weight": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 0.0, 0.0, 2.0, 4.0, 4.0, 5.0, 5.0, 12.0, 25.0, 25.0, 50.0, 64.0, 92.0, 143.0, 206.0, 325.0, 475.0, 773.0, 1169.0, 1751.0, 2857.0, 4215.0, 6679.0, 10753.0, 17261.0, 29032.0, 48966.0, 87404.0, 177258.0, 1395154.0, 135280.0, 71662.0, 40571.0, 24376.0, 15096.0, 9248.0, 5886.0, 3629.0, 2326.0, 1467.0, 1028.0, 582.0, 427.0, 300.0, 172.0, 124.0, 90.0, 60.0, 39.0, 26.0, 22.0, 9.0, 7.0, 5.0, 2.0, 2.0, 0.0, 1.0, 1.0, 1.0, 1.0], "bins": [-1.3525390625, -1.310699462890625, -1.26885986328125, -1.227020263671875, -1.1851806640625, -1.143341064453125, -1.10150146484375, -1.059661865234375, -1.017822265625, -0.975982666015625, -0.93414306640625, -0.892303466796875, -0.8504638671875, -0.808624267578125, -0.76678466796875, -0.724945068359375, -0.68310546875, -0.641265869140625, -0.59942626953125, -0.557586669921875, -0.5157470703125, -0.473907470703125, -0.43206787109375, -0.390228271484375, -0.348388671875, -0.306549072265625, -0.26470947265625, -0.222869873046875, -0.1810302734375, -0.139190673828125, -0.09735107421875, -0.055511474609375, -0.013671875, 0.028167724609375, 0.07000732421875, 0.111846923828125, 0.1536865234375, 0.195526123046875, 0.23736572265625, 0.279205322265625, 0.321044921875, 0.362884521484375, 0.40472412109375, 0.446563720703125, 0.4884033203125, 0.530242919921875, 0.57208251953125, 0.613922119140625, 0.65576171875, 0.697601318359375, 0.73944091796875, 0.781280517578125, 0.8231201171875, 0.864959716796875, 0.90679931640625, 0.948638916015625, 0.990478515625, 1.032318115234375, 1.07415771484375, 1.115997314453125, 1.1578369140625, 1.199676513671875, 1.24151611328125, 1.283355712890625, 1.3251953125]}, "gradients/decoder.transformer.h.10.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 3.0, 5.0, 3.0, 1.0, 8.0, 6.0, 8.0, 17.0, 22.0, 24.0, 37.0, 47.0, 58.0, 89.0, 89.0, 102.0, 104.0, 97.0, 60.0, 55.0, 51.0, 29.0, 25.0, 13.0, 15.0, 13.0, 12.0, 7.0, 1.0, 1.0, 2.0, 1.0, 3.0, 1.0, 2.0, 1.0, 3.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-9.649991989135742e-05, -9.365379810333252e-05, -9.080767631530762e-05, -8.796155452728271e-05, -8.511543273925781e-05, -8.226931095123291e-05, -7.942318916320801e-05, -7.65770673751831e-05, -7.37309455871582e-05, -7.08848237991333e-05, -6.80387020111084e-05, -6.51925802230835e-05, -6.23464584350586e-05, -5.950033664703369e-05, -5.665421485900879e-05, -5.380809307098389e-05, -5.0961971282958984e-05, -4.811584949493408e-05, -4.526972770690918e-05, -4.242360591888428e-05, -3.9577484130859375e-05, -3.673136234283447e-05, -3.388524055480957e-05, -3.103911876678467e-05, -2.8192996978759766e-05, -2.5346875190734863e-05, -2.250075340270996e-05, -1.965463161468506e-05, -1.6808509826660156e-05, -1.3962388038635254e-05, -1.1116266250610352e-05, -8.27014446258545e-06, -5.424022674560547e-06, -2.5779008865356445e-06, 2.682209014892578e-07, 3.11434268951416e-06, 5.9604644775390625e-06, 8.806586265563965e-06, 1.1652708053588867e-05, 1.449882984161377e-05, 1.7344951629638672e-05, 2.0191073417663574e-05, 2.3037195205688477e-05, 2.588331699371338e-05, 2.872943878173828e-05, 3.1575560569763184e-05, 3.4421682357788086e-05, 3.726780414581299e-05, 4.011392593383789e-05, 4.296004772186279e-05, 4.5806169509887695e-05, 4.86522912979126e-05, 5.14984130859375e-05, 5.43445348739624e-05, 5.7190656661987305e-05, 6.003677845001221e-05, 6.288290023803711e-05, 6.572902202606201e-05, 6.857514381408691e-05, 7.142126560211182e-05, 7.426738739013672e-05, 7.711350917816162e-05, 7.995963096618652e-05, 8.280575275421143e-05, 8.565187454223633e-05]}, "gradients/decoder.transformer.h.10.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 3.0, 3.0, 0.0, 0.0, 3.0, 4.0, 2.0, 6.0, 2.0, 7.0, 10.0, 14.0, 15.0, 23.0, 31.0, 47.0, 64.0, 113.0, 213.0, 786.0, 7019.0, 252675.0, 774437.0, 11342.0, 1115.0, 283.0, 106.0, 65.0, 50.0, 37.0, 31.0, 13.0, 17.0, 10.0, 7.0, 0.0, 5.0, 5.0, 2.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0012454986572265625, -0.0012041032314300537, -0.001162707805633545, -0.0011213123798370361, -0.0010799169540405273, -0.0010385215282440186, -0.0009971261024475098, -0.000955730676651001, -0.0009143352508544922, -0.0008729398250579834, -0.0008315443992614746, -0.0007901489734649658, -0.000748753547668457, -0.0007073581218719482, -0.0006659626960754395, -0.0006245672702789307, -0.0005831718444824219, -0.0005417764186859131, -0.0005003809928894043, -0.0004589855670928955, -0.0004175901412963867, -0.00037619471549987793, -0.00033479928970336914, -0.00029340386390686035, -0.00025200843811035156, -0.00021061301231384277, -0.00016921758651733398, -0.0001278221607208252, -8.64267349243164e-05, -4.503130912780762e-05, -3.635883331298828e-06, 3.775954246520996e-05, 7.915496826171875e-05, 0.00012055039405822754, 0.00016194581985473633, 0.00020334124565124512, 0.0002447366714477539, 0.0002861320972442627, 0.0003275275230407715, 0.0003689229488372803, 0.00041031837463378906, 0.00045171380043029785, 0.0004931092262268066, 0.0005345046520233154, 0.0005759000778198242, 0.000617295503616333, 0.0006586909294128418, 0.0007000863552093506, 0.0007414817810058594, 0.0007828772068023682, 0.000824272632598877, 0.0008656680583953857, 0.0009070634841918945, 0.0009484589099884033, 0.0009898543357849121, 0.001031249761581421, 0.0010726451873779297, 0.0011140406131744385, 0.0011554360389709473, 0.001196831464767456, 0.0012382268905639648, 0.0012796223163604736, 0.0013210177421569824, 0.0013624131679534912, 0.00140380859375]}, "gradients/decoder.transformer.h.10.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 2.0, 2.0, 3.0, 2.0, 3.0, 12.0, 18.0, 34.0, 49.0, 122.0, 287.0, 240.0, 121.0, 59.0, 28.0, 17.0, 11.0, 4.0, 2.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.24573736381717e-05, -3.8906138797756284e-05, -3.5354907595319673e-05, -3.180367639288306e-05, -2.8252441552467644e-05, -2.470120853104163e-05, -2.1149975509615615e-05, -1.75987424881896e-05, -1.4047509466763586e-05, -1.0496276445337571e-05, -6.945043423911557e-06, -3.393810402485542e-06, 1.574226189404726e-07, 3.708655640366487e-06, 7.259888661792502e-06, 1.0811121683218516e-05, 1.4362354704644531e-05, 1.7913587726070546e-05, 2.146482074749656e-05, 2.5016053768922575e-05, 2.856728679034859e-05, 3.211852163076401e-05, 3.566975283320062e-05, 3.922098403563723e-05, 4.277221887605265e-05, 4.6323453716468066e-05, 4.987468491890468e-05, 5.342591612134129e-05, 5.6977150961756706e-05, 6.0528385802172124e-05, 6.407962064258754e-05, 6.763084820704535e-05, 7.118209032341838e-05, 7.47333251638338e-05, 7.828456000424922e-05, 8.183578756870702e-05, 8.538702240912244e-05, 8.893825724953786e-05, 9.248948481399566e-05, 9.604071965441108e-05, 9.95919544948265e-05, 0.00010314318933524191, 0.00010669442417565733, 0.00011024565174011514, 0.00011379688658053055, 0.00011734812142094597, 0.00012089934898540378, 0.0001244505838258192, 0.0001280018186662346, 0.00013155305350665003, 0.00013510428834706545, 0.00013865552318748087, 0.00014220675802789629, 0.00014575797831639647, 0.0001493092131568119, 0.0001528604479972273, 0.00015641168283764273, 0.00015996291767805815, 0.00016351415251847357, 0.00016706538735888898, 0.00017061660764738917, 0.0001741678424878046, 0.00017771907732822, 0.00018127031216863543, 0.00018482154700905085]}, "gradients/decoder.transformer.h.10.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 4.0, 2.0, 2.0, 6.0, 5.0, 4.0, 5.0, 13.0, 12.0, 11.0, 18.0, 18.0, 11.0, 14.0, 17.0, 35.0, 10.0, 28.0, 32.0, 21.0, 45.0, 42.0, 39.0, 32.0, 43.0, 57.0, 45.0, 32.0, 35.0, 37.0, 44.0, 32.0, 29.0, 36.0, 31.0, 30.0, 22.0, 19.0, 20.0, 16.0, 22.0, 9.0, 5.0, 10.0, 2.0, 2.0, 1.0, 1.0, 5.0, 3.0, 1.0, 0.0, 2.0, 2.0, 0.0, 3.0], "bins": [-3.4868717193603516e-05, -3.383960574865341e-05, -3.281049430370331e-05, -3.1781382858753204e-05, -3.07522714138031e-05, -2.9723159968852997e-05, -2.8694048523902893e-05, -2.766493707895279e-05, -2.6635825634002686e-05, -2.5606714189052582e-05, -2.4577602744102478e-05, -2.3548491299152374e-05, -2.251937985420227e-05, -2.1490268409252167e-05, -2.0461156964302063e-05, -1.943204551935196e-05, -1.8402934074401855e-05, -1.7373822629451752e-05, -1.6344711184501648e-05, -1.5315599739551544e-05, -1.428648829460144e-05, -1.3257376849651337e-05, -1.2228265404701233e-05, -1.1199153959751129e-05, -1.0170042514801025e-05, -9.140931069850922e-06, -8.111819624900818e-06, -7.082708179950714e-06, -6.05359673500061e-06, -5.024485290050507e-06, -3.995373845100403e-06, -2.966262400150299e-06, -1.9371509552001953e-06, -9.080395102500916e-07, 1.210719347000122e-07, 1.150183379650116e-06, 2.1792948246002197e-06, 3.2084062695503235e-06, 4.237517714500427e-06, 5.266629159450531e-06, 6.295740604400635e-06, 7.3248520493507385e-06, 8.353963494300842e-06, 9.383074939250946e-06, 1.041218638420105e-05, 1.1441297829151154e-05, 1.2470409274101257e-05, 1.3499520719051361e-05, 1.4528632164001465e-05, 1.555774360895157e-05, 1.6586855053901672e-05, 1.7615966498851776e-05, 1.864507794380188e-05, 1.9674189388751984e-05, 2.0703300833702087e-05, 2.173241227865219e-05, 2.2761523723602295e-05, 2.37906351685524e-05, 2.4819746613502502e-05, 2.5848858058452606e-05, 2.687796950340271e-05, 2.7907080948352814e-05, 2.8936192393302917e-05, 2.996530383825302e-05, 3.0994415283203125e-05]}, "gradients/decoder.transformer.h.10.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 2.0, 4.0, 2.0, 7.0, 10.0, 7.0, 8.0, 6.0, 16.0, 18.0, 27.0, 21.0, 27.0, 24.0, 36.0, 29.0, 40.0, 43.0, 40.0, 51.0, 46.0, 48.0, 39.0, 46.0, 58.0, 41.0, 33.0, 42.0, 39.0, 26.0, 24.0, 20.0, 29.0, 20.0, 14.0, 20.0, 14.0, 9.0, 7.0, 10.0, 5.0, 2.0, 5.0, 0.0, 0.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-38.625, -37.19775390625, -35.7705078125, -34.34326171875, -32.916015625, -31.48876953125, -30.0615234375, -28.63427734375, -27.20703125, -25.77978515625, -24.3525390625, -22.92529296875, -21.498046875, -20.07080078125, -18.6435546875, -17.21630859375, -15.7890625, -14.36181640625, -12.9345703125, -11.50732421875, -10.080078125, -8.65283203125, -7.2255859375, -5.79833984375, -4.37109375, -2.94384765625, -1.5166015625, -0.08935546875, 1.337890625, 2.76513671875, 4.1923828125, 5.61962890625, 7.046875, 8.47412109375, 9.9013671875, 11.32861328125, 12.755859375, 14.18310546875, 15.6103515625, 17.03759765625, 18.46484375, 19.89208984375, 21.3193359375, 22.74658203125, 24.173828125, 25.60107421875, 27.0283203125, 28.45556640625, 29.8828125, 31.31005859375, 32.7373046875, 34.16455078125, 35.591796875, 37.01904296875, 38.4462890625, 39.87353515625, 41.30078125, 42.72802734375, 44.1552734375, 45.58251953125, 47.009765625, 48.43701171875, 49.8642578125, 51.29150390625, 52.71875]}, "gradients/decoder.transformer.h.10.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 3.0, 3.0, 1.0, 7.0, 4.0, 8.0, 13.0, 11.0, 25.0, 18.0, 33.0, 35.0, 60.0, 70.0, 95.0, 133.0, 202.0, 377.0, 659.0, 1389.0, 3386.0, 12497.0, 100422.0, 799251.0, 109908.0, 13128.0, 3634.0, 1413.0, 680.0, 376.0, 206.0, 134.0, 96.0, 71.0, 48.0, 36.0, 40.0, 27.0, 18.0, 11.0, 18.0, 5.0, 7.0, 4.0, 4.0, 2.0, 0.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-49.6875, -47.8818359375, -46.076171875, -44.2705078125, -42.46484375, -40.6591796875, -38.853515625, -37.0478515625, -35.2421875, -33.4365234375, -31.630859375, -29.8251953125, -28.01953125, -26.2138671875, -24.408203125, -22.6025390625, -20.796875, -18.9912109375, -17.185546875, -15.3798828125, -13.57421875, -11.7685546875, -9.962890625, -8.1572265625, -6.3515625, -4.5458984375, -2.740234375, -0.9345703125, 0.87109375, 2.6767578125, 4.482421875, 6.2880859375, 8.09375, 9.8994140625, 11.705078125, 13.5107421875, 15.31640625, 17.1220703125, 18.927734375, 20.7333984375, 22.5390625, 24.3447265625, 26.150390625, 27.9560546875, 29.76171875, 31.5673828125, 33.373046875, 35.1787109375, 36.984375, 38.7900390625, 40.595703125, 42.4013671875, 44.20703125, 46.0126953125, 47.818359375, 49.6240234375, 51.4296875, 53.2353515625, 55.041015625, 56.8466796875, 58.65234375, 60.4580078125, 62.263671875, 64.0693359375, 65.875]}, "gradients/decoder.transformer.h.10.attn.c_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 2.0, 3.0, 3.0, 0.0, 2.0, 2.0, 3.0, 4.0, 1.0, 7.0, 6.0, 5.0, 8.0, 4.0, 12.0, 11.0, 22.0, 24.0, 26.0, 19.0, 13.0, 24.0, 25.0, 30.0, 34.0, 30.0, 39.0, 36.0, 42.0, 49.0, 313.0, 1759.0, 51.0, 42.0, 37.0, 38.0, 41.0, 38.0, 34.0, 24.0, 19.0, 31.0, 17.0, 19.0, 11.0, 13.0, 14.0, 19.0, 11.0, 7.0, 12.0, 6.0, 5.0, 3.0, 5.0, 3.0, 3.0, 2.0, 0.0, 1.0, 3.0, 3.0], "bins": [-104.0, -100.8623046875, -97.724609375, -94.5869140625, -91.44921875, -88.3115234375, -85.173828125, -82.0361328125, -78.8984375, -75.7607421875, -72.623046875, -69.4853515625, -66.34765625, -63.2099609375, -60.072265625, -56.9345703125, -53.796875, -50.6591796875, -47.521484375, -44.3837890625, -41.24609375, -38.1083984375, -34.970703125, -31.8330078125, -28.6953125, -25.5576171875, -22.419921875, -19.2822265625, -16.14453125, -13.0068359375, -9.869140625, -6.7314453125, -3.59375, -0.4560546875, 2.681640625, 5.8193359375, 8.95703125, 12.0947265625, 15.232421875, 18.3701171875, 21.5078125, 24.6455078125, 27.783203125, 30.9208984375, 34.05859375, 37.1962890625, 40.333984375, 43.4716796875, 46.609375, 49.7470703125, 52.884765625, 56.0224609375, 59.16015625, 62.2978515625, 65.435546875, 68.5732421875, 71.7109375, 74.8486328125, 77.986328125, 81.1240234375, 84.26171875, 87.3994140625, 90.537109375, 93.6748046875, 96.8125]}, "gradients/decoder.transformer.h.10.attn.c_attn.weight": {"_type": "histogram", "values": [3.0, 2.0, 1.0, 1.0, 2.0, 0.0, 4.0, 7.0, 3.0, 1.0, 6.0, 13.0, 15.0, 10.0, 18.0, 20.0, 23.0, 17.0, 35.0, 42.0, 48.0, 65.0, 66.0, 86.0, 146.0, 223.0, 379.0, 656.0, 1395.0, 6298.0, 2947743.0, 181947.0, 3673.0, 1149.0, 557.0, 286.0, 204.0, 108.0, 90.0, 81.0, 54.0, 24.0, 40.0, 42.0, 34.0, 22.0, 14.0, 15.0, 9.0, 13.0, 4.0, 10.0, 3.0, 1.0, 4.0, 2.0, 2.0, 2.0, 1.0, 3.0, 3.0, 1.0, 0.0, 2.0], "bins": [-192.125, -185.900390625, -179.67578125, -173.451171875, -167.2265625, -161.001953125, -154.77734375, -148.552734375, -142.328125, -136.103515625, -129.87890625, -123.654296875, -117.4296875, -111.205078125, -104.98046875, -98.755859375, -92.53125, -86.306640625, -80.08203125, -73.857421875, -67.6328125, -61.408203125, -55.18359375, -48.958984375, -42.734375, -36.509765625, -30.28515625, -24.060546875, -17.8359375, -11.611328125, -5.38671875, 0.837890625, 7.0625, 13.287109375, 19.51171875, 25.736328125, 31.9609375, 38.185546875, 44.41015625, 50.634765625, 56.859375, 63.083984375, 69.30859375, 75.533203125, 81.7578125, 87.982421875, 94.20703125, 100.431640625, 106.65625, 112.880859375, 119.10546875, 125.330078125, 131.5546875, 137.779296875, 144.00390625, 150.228515625, 156.453125, 162.677734375, 168.90234375, 175.126953125, 181.3515625, 187.576171875, 193.80078125, 200.025390625, 206.25]}, "gradients/decoder.transformer.h.10.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 3.0, 11.0, 5.0, 22.0, 35.0, 56.0, 103.0, 164.0, 196.0, 164.0, 103.0, 57.0, 46.0, 27.0, 7.0, 8.0, 3.0, 0.0, 1.0, 0.0, 2.0, 0.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-101.73131561279297, -98.40980529785156, -95.08828735351562, -91.76677703857422, -88.44525909423828, -85.12374877929688, -81.80223083496094, -78.48072052001953, -75.15921020507812, -71.83769989013672, -68.51618194580078, -65.19467163085938, -61.87315368652344, -58.55164337158203, -55.23012924194336, -51.90861511230469, -48.58709716796875, -45.26558303833008, -41.944068908691406, -38.62255859375, -35.30104064941406, -31.979528427124023, -28.658016204833984, -25.336502075195312, -22.01498794555664, -18.69347381591797, -15.371960639953613, -12.050447463989258, -8.728933334350586, -5.407419204711914, -2.085906982421875, 1.2356071472167969, 4.55712890625, 7.878642559051514, 11.200156211853027, 14.521669387817383, 17.843183517456055, 21.164697647094727, 24.486209869384766, 27.807723999023438, 31.12923812866211, 34.45075225830078, 37.77226638793945, 41.093780517578125, 44.41529083251953, 47.73680877685547, 51.058319091796875, 54.37983322143555, 57.70134735107422, 61.02286148071289, 64.34437561035156, 67.66588592529297, 70.9874038696289, 74.30891418457031, 77.63043212890625, 80.95194244384766, 84.27345275878906, 87.59496307373047, 90.9164810180664, 94.23799133300781, 97.55950927734375, 100.88101959228516, 104.20252990722656, 107.5240478515625, 110.84556579589844]}, "gradients/decoder.transformer.h.10.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 1.0, 3.0, 3.0, 3.0, 2.0, 8.0, 7.0, 9.0, 13.0, 11.0, 19.0, 25.0, 30.0, 15.0, 33.0, 33.0, 39.0, 38.0, 41.0, 53.0, 52.0, 52.0, 46.0, 51.0, 48.0, 49.0, 48.0, 52.0, 32.0, 33.0, 35.0, 20.0, 15.0, 18.0, 23.0, 12.0, 13.0, 7.0, 3.0, 11.0, 3.0, 2.0, 2.0, 3.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-435.68597412109375, -423.80145263671875, -411.91693115234375, -400.03240966796875, -388.14788818359375, -376.2633361816406, -364.3788146972656, -352.4942932128906, -340.6097717285156, -328.7252502441406, -316.8407287597656, -304.9562072753906, -293.0716552734375, -281.1871337890625, -269.3026123046875, -257.4180908203125, -245.5335693359375, -233.6490478515625, -221.7645263671875, -209.87998962402344, -197.99546813964844, -186.11094665527344, -174.22640991210938, -162.34188842773438, -150.45736694335938, -138.57284545898438, -126.68831634521484, -114.80378723144531, -102.91926574707031, -91.03474426269531, -79.15021514892578, -67.26568603515625, -55.381134033203125, -43.49660873413086, -31.612083435058594, -19.727558135986328, -7.8430328369140625, 4.041492462158203, 15.926017761230469, 27.810546875, 39.695068359375, 51.579593658447266, 63.46411895751953, 75.34864807128906, 87.23316955566406, 99.11769104003906, 111.0022201538086, 122.88674926757812, 134.77127075195312, 146.65579223632812, 158.54031372070312, 170.4248504638672, 182.3093719482422, 194.1938934326172, 206.07843017578125, 217.96295166015625, 229.84747314453125, 241.73199462890625, 253.61651611328125, 265.50103759765625, 277.38555908203125, 289.2701110839844, 301.1546325683594, 313.0391540527344, 324.9236755371094]}, "gradients/decoder.transformer.h.9.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 3.0, 4.0, 5.0, 6.0, 7.0, 13.0, 6.0, 12.0, 14.0, 27.0, 27.0, 21.0, 28.0, 38.0, 34.0, 40.0, 39.0, 45.0, 43.0, 48.0, 57.0, 27.0, 51.0, 57.0, 44.0, 41.0, 45.0, 30.0, 21.0, 31.0, 22.0, 30.0, 22.0, 16.0, 11.0, 10.0, 11.0, 9.0, 8.0, 4.0, 6.0, 1.0, 1.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-40.15625, -38.6748046875, -37.193359375, -35.7119140625, -34.23046875, -32.7490234375, -31.267578125, -29.7861328125, -28.3046875, -26.8232421875, -25.341796875, -23.8603515625, -22.37890625, -20.8974609375, -19.416015625, -17.9345703125, -16.453125, -14.9716796875, -13.490234375, -12.0087890625, -10.52734375, -9.0458984375, -7.564453125, -6.0830078125, -4.6015625, -3.1201171875, -1.638671875, -0.1572265625, 1.32421875, 2.8056640625, 4.287109375, 5.7685546875, 7.25, 8.7314453125, 10.212890625, 11.6943359375, 13.17578125, 14.6572265625, 16.138671875, 17.6201171875, 19.1015625, 20.5830078125, 22.064453125, 23.5458984375, 25.02734375, 26.5087890625, 27.990234375, 29.4716796875, 30.953125, 32.4345703125, 33.916015625, 35.3974609375, 36.87890625, 38.3603515625, 39.841796875, 41.3232421875, 42.8046875, 44.2861328125, 45.767578125, 47.2490234375, 48.73046875, 50.2119140625, 51.693359375, 53.1748046875, 54.65625]}, "gradients/decoder.transformer.h.9.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 0.0, 3.0, 7.0, 7.0, 7.0, 17.0, 20.0, 28.0, 33.0, 55.0, 60.0, 84.0, 150.0, 145.0, 287.0, 426.0, 693.0, 1276.0, 2451.0, 4555.0, 9630.0, 29038.0, 718024.0, 3336944.0, 63356.0, 13753.0, 6083.0, 2989.0, 1636.0, 889.0, 516.0, 371.0, 217.0, 158.0, 101.0, 79.0, 54.0, 35.0, 38.0, 22.0, 17.0, 12.0, 11.0, 6.0, 2.0, 7.0, 3.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-129.875, -125.099609375, -120.32421875, -115.548828125, -110.7734375, -105.998046875, -101.22265625, -96.447265625, -91.671875, -86.896484375, -82.12109375, -77.345703125, -72.5703125, -67.794921875, -63.01953125, -58.244140625, -53.46875, -48.693359375, -43.91796875, -39.142578125, -34.3671875, -29.591796875, -24.81640625, -20.041015625, -15.265625, -10.490234375, -5.71484375, -0.939453125, 3.8359375, 8.611328125, 13.38671875, 18.162109375, 22.9375, 27.712890625, 32.48828125, 37.263671875, 42.0390625, 46.814453125, 51.58984375, 56.365234375, 61.140625, 65.916015625, 70.69140625, 75.466796875, 80.2421875, 85.017578125, 89.79296875, 94.568359375, 99.34375, 104.119140625, 108.89453125, 113.669921875, 118.4453125, 123.220703125, 127.99609375, 132.771484375, 137.546875, 142.322265625, 147.09765625, 151.873046875, 156.6484375, 161.423828125, 166.19921875, 170.974609375, 175.75]}, "gradients/decoder.transformer.h.9.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 2.0, 3.0, 1.0, 2.0, 4.0, 6.0, 8.0, 5.0, 8.0, 14.0, 18.0, 27.0, 22.0, 45.0, 56.0, 99.0, 234.0, 497.0, 1144.0, 979.0, 421.0, 181.0, 89.0, 61.0, 29.0, 31.0, 17.0, 18.0, 10.0, 13.0, 4.0, 6.0, 7.0, 5.0, 3.0, 5.0, 3.0, 2.0, 1.0, 2.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-80.8125, -77.69140625, -74.5703125, -71.44921875, -68.328125, -65.20703125, -62.0859375, -58.96484375, -55.84375, -52.72265625, -49.6015625, -46.48046875, -43.359375, -40.23828125, -37.1171875, -33.99609375, -30.875, -27.75390625, -24.6328125, -21.51171875, -18.390625, -15.26953125, -12.1484375, -9.02734375, -5.90625, -2.78515625, 0.3359375, 3.45703125, 6.578125, 9.69921875, 12.8203125, 15.94140625, 19.0625, 22.18359375, 25.3046875, 28.42578125, 31.546875, 34.66796875, 37.7890625, 40.91015625, 44.03125, 47.15234375, 50.2734375, 53.39453125, 56.515625, 59.63671875, 62.7578125, 65.87890625, 69.0, 72.12109375, 75.2421875, 78.36328125, 81.484375, 84.60546875, 87.7265625, 90.84765625, 93.96875, 97.08984375, 100.2109375, 103.33203125, 106.453125, 109.57421875, 112.6953125, 115.81640625, 118.9375]}, "gradients/decoder.transformer.h.9.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 2.0, 2.0, 4.0, 4.0, 6.0, 3.0, 4.0, 7.0, 10.0, 12.0, 26.0, 28.0, 57.0, 110.0, 285.0, 747.0, 2287.0, 9763.0, 78423.0, 3986007.0, 101076.0, 11359.0, 2710.0, 777.0, 307.0, 108.0, 64.0, 36.0, 16.0, 17.0, 8.0, 6.0, 5.0, 4.0, 3.0, 1.0, 4.0, 1.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-470.75, -458.2109375, -445.671875, -433.1328125, -420.59375, -408.0546875, -395.515625, -382.9765625, -370.4375, -357.8984375, -345.359375, -332.8203125, -320.28125, -307.7421875, -295.203125, -282.6640625, -270.125, -257.5859375, -245.046875, -232.5078125, -219.96875, -207.4296875, -194.890625, -182.3515625, -169.8125, -157.2734375, -144.734375, -132.1953125, -119.65625, -107.1171875, -94.578125, -82.0390625, -69.5, -56.9609375, -44.421875, -31.8828125, -19.34375, -6.8046875, 5.734375, 18.2734375, 30.8125, 43.3515625, 55.890625, 68.4296875, 80.96875, 93.5078125, 106.046875, 118.5859375, 131.125, 143.6640625, 156.203125, 168.7421875, 181.28125, 193.8203125, 206.359375, 218.8984375, 231.4375, 243.9765625, 256.515625, 269.0546875, 281.59375, 294.1328125, 306.671875, 319.2109375, 331.75]}, "gradients/decoder.transformer.h.9.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 2.0, 6.0, 5.0, 9.0, 10.0, 19.0, 30.0, 51.0, 102.0, 145.0, 213.0, 139.0, 106.0, 60.0, 40.0, 29.0, 21.0, 13.0, 5.0, 4.0, 3.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-726.8011474609375, -710.0838623046875, -693.3665771484375, -676.6492919921875, -659.9320068359375, -643.2147216796875, -626.4974365234375, -609.7801513671875, -593.0628662109375, -576.3455810546875, -559.6282958984375, -542.9110107421875, -526.1937255859375, -509.4764404296875, -492.7591552734375, -476.0418701171875, -459.3245849609375, -442.6072998046875, -425.8900146484375, -409.1727294921875, -392.4554443359375, -375.7381591796875, -359.0208740234375, -342.3035888671875, -325.5863037109375, -308.8690185546875, -292.1517333984375, -275.4344482421875, -258.7171630859375, -241.9998779296875, -225.2825927734375, -208.5653076171875, -191.84796142578125, -175.13067626953125, -158.41339111328125, -141.69610595703125, -124.97882080078125, -108.26153564453125, -91.54425048828125, -74.82696533203125, -58.10968017578125, -41.39239501953125, -24.67510986328125, -7.95782470703125, 8.75946044921875, 25.47674560546875, 42.19403076171875, 58.91131591796875, 75.62860107421875, 92.34588623046875, 109.06317138671875, 125.78045654296875, 142.49774169921875, 159.21502685546875, 175.93231201171875, 192.64959716796875, 209.36688232421875, 226.08416748046875, 242.80145263671875, 259.51873779296875, 276.23602294921875, 292.95330810546875, 309.67059326171875, 326.38787841796875, 343.10516357421875]}, "gradients/decoder.transformer.h.9.ln_2.bias": {"_type": "histogram", "values": [2.0, 0.0, 2.0, 1.0, 3.0, 1.0, 2.0, 5.0, 3.0, 2.0, 10.0, 9.0, 8.0, 11.0, 11.0, 17.0, 20.0, 17.0, 18.0, 15.0, 28.0, 25.0, 30.0, 31.0, 31.0, 38.0, 41.0, 30.0, 35.0, 44.0, 38.0, 49.0, 49.0, 42.0, 38.0, 31.0, 27.0, 34.0, 31.0, 23.0, 27.0, 15.0, 17.0, 19.0, 10.0, 14.0, 16.0, 7.0, 6.0, 9.0, 6.0, 3.0, 6.0, 4.0, 2.0, 2.0, 1.0, 0.0, 2.0, 2.0, 3.0, 0.0, 0.0, 1.0], "bins": [-212.0902557373047, -205.22824096679688, -198.36622619628906, -191.50421142578125, -184.64219665527344, -177.78018188476562, -170.91818237304688, -164.05615234375, -157.19415283203125, -150.33213806152344, -143.47012329101562, -136.6081085205078, -129.74609375, -122.88407897949219, -116.0220718383789, -109.1600570678711, -102.29803466796875, -95.43601989746094, -88.57400512695312, -81.71199035644531, -74.8499755859375, -67.98796081542969, -61.125953674316406, -54.263938903808594, -47.40192413330078, -40.53990936279297, -33.677894592285156, -26.81588363647461, -19.953868865966797, -13.091854095458984, -6.2298431396484375, 0.632171630859375, 7.49420166015625, 14.356215476989746, 21.218229293823242, 28.080242156982422, 34.942256927490234, 41.80427169799805, 48.666282653808594, 55.528297424316406, 62.39031219482422, 69.25232696533203, 76.11434173583984, 82.97634887695312, 89.83836364746094, 96.70037841796875, 103.56239318847656, 110.42440795898438, 117.28642272949219, 124.1484375, 131.0104522705078, 137.87246704101562, 144.73448181152344, 151.59649658203125, 158.45849609375, 165.32052612304688, 172.18252563476562, 179.04454040527344, 185.90655517578125, 192.76856994628906, 199.63058471679688, 206.4925994873047, 213.3546142578125, 220.21661376953125, 227.07864379882812]}, "gradients/decoder.transformer.h.9.crossattention.c_proj.bias": {"_type": "histogram", "values": [3.0, 2.0, 1.0, 1.0, 4.0, 3.0, 3.0, 8.0, 6.0, 6.0, 8.0, 14.0, 15.0, 15.0, 22.0, 25.0, 28.0, 36.0, 37.0, 34.0, 44.0, 45.0, 37.0, 46.0, 41.0, 35.0, 55.0, 27.0, 38.0, 37.0, 47.0, 43.0, 34.0, 28.0, 25.0, 20.0, 23.0, 22.0, 17.0, 19.0, 8.0, 11.0, 2.0, 13.0, 8.0, 7.0, 7.0, 2.0, 2.0, 1.0, 5.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-37.46875, -36.03125, -34.59375, -33.15625, -31.71875, -30.28125, -28.84375, -27.40625, -25.96875, -24.53125, -23.09375, -21.65625, -20.21875, -18.78125, -17.34375, -15.90625, -14.46875, -13.03125, -11.59375, -10.15625, -8.71875, -7.28125, -5.84375, -4.40625, -2.96875, -1.53125, -0.09375, 1.34375, 2.78125, 4.21875, 5.65625, 7.09375, 8.53125, 9.96875, 11.40625, 12.84375, 14.28125, 15.71875, 17.15625, 18.59375, 20.03125, 21.46875, 22.90625, 24.34375, 25.78125, 27.21875, 28.65625, 30.09375, 31.53125, 32.96875, 34.40625, 35.84375, 37.28125, 38.71875, 40.15625, 41.59375, 43.03125, 44.46875, 45.90625, 47.34375, 48.78125, 50.21875, 51.65625, 53.09375, 54.53125]}, "gradients/decoder.transformer.h.9.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 2.0, 3.0, 3.0, 1.0, 5.0, 3.0, 13.0, 11.0, 22.0, 21.0, 34.0, 41.0, 84.0, 152.0, 193.0, 348.0, 589.0, 1056.0, 1912.0, 3281.0, 5715.0, 10644.0, 19953.0, 38879.0, 78988.0, 184937.0, 417892.0, 146812.0, 65572.0, 32965.0, 17299.0, 9201.0, 5170.0, 2865.0, 1596.0, 923.0, 586.0, 301.0, 173.0, 130.0, 64.0, 42.0, 25.0, 21.0, 16.0, 6.0, 8.0, 6.0, 2.0, 1.0, 2.0, 2.0, 2.0], "bins": [-2.802734375, -2.727752685546875, -2.65277099609375, -2.577789306640625, -2.5028076171875, -2.427825927734375, -2.35284423828125, -2.277862548828125, -2.202880859375, -2.127899169921875, -2.05291748046875, -1.977935791015625, -1.9029541015625, -1.827972412109375, -1.75299072265625, -1.678009033203125, -1.60302734375, -1.528045654296875, -1.45306396484375, -1.378082275390625, -1.3031005859375, -1.228118896484375, -1.15313720703125, -1.078155517578125, -1.003173828125, -0.928192138671875, -0.85321044921875, -0.778228759765625, -0.7032470703125, -0.628265380859375, -0.55328369140625, -0.478302001953125, -0.4033203125, -0.328338623046875, -0.25335693359375, -0.178375244140625, -0.1033935546875, -0.028411865234375, 0.04656982421875, 0.121551513671875, 0.196533203125, 0.271514892578125, 0.34649658203125, 0.421478271484375, 0.4964599609375, 0.571441650390625, 0.64642333984375, 0.721405029296875, 0.79638671875, 0.871368408203125, 0.94635009765625, 1.021331787109375, 1.0963134765625, 1.171295166015625, 1.24627685546875, 1.321258544921875, 1.396240234375, 1.471221923828125, 1.54620361328125, 1.621185302734375, 1.6961669921875, 1.771148681640625, 1.84613037109375, 1.921112060546875, 1.99609375]}, "gradients/decoder.transformer.h.9.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 4.0, 2.0, 6.0, 8.0, 5.0, 8.0, 7.0, 22.0, 12.0, 17.0, 20.0, 21.0, 17.0, 19.0, 25.0, 33.0, 31.0, 31.0, 35.0, 49.0, 36.0, 36.0, 39.0, 50.0, 1060.0, 34.0, 39.0, 28.0, 31.0, 33.0, 34.0, 33.0, 35.0, 23.0, 28.0, 20.0, 19.0, 13.0, 15.0, 15.0, 12.0, 3.0, 8.0, 6.0, 4.0, 5.0, 1.0, 1.0, 4.0, 3.0, 1.0, 2.0, 0.0, 0.0, 1.0], "bins": [-28.546875, -27.6826171875, -26.818359375, -25.9541015625, -25.08984375, -24.2255859375, -23.361328125, -22.4970703125, -21.6328125, -20.7685546875, -19.904296875, -19.0400390625, -18.17578125, -17.3115234375, -16.447265625, -15.5830078125, -14.71875, -13.8544921875, -12.990234375, -12.1259765625, -11.26171875, -10.3974609375, -9.533203125, -8.6689453125, -7.8046875, -6.9404296875, -6.076171875, -5.2119140625, -4.34765625, -3.4833984375, -2.619140625, -1.7548828125, -0.890625, -0.0263671875, 0.837890625, 1.7021484375, 2.56640625, 3.4306640625, 4.294921875, 5.1591796875, 6.0234375, 6.8876953125, 7.751953125, 8.6162109375, 9.48046875, 10.3447265625, 11.208984375, 12.0732421875, 12.9375, 13.8017578125, 14.666015625, 15.5302734375, 16.39453125, 17.2587890625, 18.123046875, 18.9873046875, 19.8515625, 20.7158203125, 21.580078125, 22.4443359375, 23.30859375, 24.1728515625, 25.037109375, 25.9013671875, 26.765625]}, "gradients/decoder.transformer.h.9.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 3.0, 2.0, 1.0, 2.0, 8.0, 9.0, 18.0, 17.0, 23.0, 43.0, 68.0, 88.0, 121.0, 153.0, 256.0, 417.0, 551.0, 833.0, 1237.0, 1782.0, 2636.0, 3860.0, 5647.0, 8581.0, 12599.0, 19202.0, 29719.0, 46214.0, 73357.0, 127017.0, 1353649.0, 164343.0, 88542.0, 54187.0, 34319.0, 22307.0, 14755.0, 10023.0, 6723.0, 4504.0, 3001.0, 1991.0, 1361.0, 951.0, 607.0, 414.0, 334.0, 218.0, 141.0, 94.0, 77.0, 52.0, 24.0, 24.0, 18.0, 8.0, 9.0, 5.0, 1.0, 1.0, 1.0, 1.0, 2.0], "bins": [-1.1455078125, -1.109344482421875, -1.07318115234375, -1.037017822265625, -1.0008544921875, -0.964691162109375, -0.92852783203125, -0.892364501953125, -0.856201171875, -0.820037841796875, -0.78387451171875, -0.747711181640625, -0.7115478515625, -0.675384521484375, -0.63922119140625, -0.603057861328125, -0.56689453125, -0.530731201171875, -0.49456787109375, -0.458404541015625, -0.4222412109375, -0.386077880859375, -0.34991455078125, -0.313751220703125, -0.277587890625, -0.241424560546875, -0.20526123046875, -0.169097900390625, -0.1329345703125, -0.096771240234375, -0.06060791015625, -0.024444580078125, 0.01171875, 0.047882080078125, 0.08404541015625, 0.120208740234375, 0.1563720703125, 0.192535400390625, 0.22869873046875, 0.264862060546875, 0.301025390625, 0.337188720703125, 0.37335205078125, 0.409515380859375, 0.4456787109375, 0.481842041015625, 0.51800537109375, 0.554168701171875, 0.59033203125, 0.626495361328125, 0.66265869140625, 0.698822021484375, 0.7349853515625, 0.771148681640625, 0.80731201171875, 0.843475341796875, 0.879638671875, 0.915802001953125, 0.95196533203125, 0.988128662109375, 1.0242919921875, 1.060455322265625, 1.09661865234375, 1.132781982421875, 1.1689453125]}, "gradients/decoder.transformer.h.9.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 3.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 3.0, 1.0, 3.0, 11.0, 5.0, 4.0, 5.0, 4.0, 7.0, 6.0, 11.0, 12.0, 18.0, 20.0, 14.0, 23.0, 47.0, 77.0, 141.0, 199.0, 132.0, 63.0, 44.0, 21.0, 28.0, 20.0, 13.0, 10.0, 10.0, 10.0, 7.0, 5.0, 3.0, 4.0, 8.0, 2.0, 2.0, 6.0, 3.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-7.337331771850586e-05, -7.103756070137024e-05, -6.870180368423462e-05, -6.6366046667099e-05, -6.403028964996338e-05, -6.169453263282776e-05, -5.935877561569214e-05, -5.702301859855652e-05, -5.46872615814209e-05, -5.235150456428528e-05, -5.001574754714966e-05, -4.767999053001404e-05, -4.534423351287842e-05, -4.30084764957428e-05, -4.067271947860718e-05, -3.833696246147156e-05, -3.600120544433594e-05, -3.366544842720032e-05, -3.13296914100647e-05, -2.8993934392929077e-05, -2.6658177375793457e-05, -2.4322420358657837e-05, -2.1986663341522217e-05, -1.9650906324386597e-05, -1.7315149307250977e-05, -1.4979392290115356e-05, -1.2643635272979736e-05, -1.0307878255844116e-05, -7.972121238708496e-06, -5.636364221572876e-06, -3.300607204437256e-06, -9.648501873016357e-07, 1.3709068298339844e-06, 3.7066638469696045e-06, 6.042420864105225e-06, 8.378177881240845e-06, 1.0713934898376465e-05, 1.3049691915512085e-05, 1.5385448932647705e-05, 1.7721205949783325e-05, 2.0056962966918945e-05, 2.2392719984054565e-05, 2.4728477001190186e-05, 2.7064234018325806e-05, 2.9399991035461426e-05, 3.1735748052597046e-05, 3.4071505069732666e-05, 3.6407262086868286e-05, 3.8743019104003906e-05, 4.1078776121139526e-05, 4.3414533138275146e-05, 4.5750290155410767e-05, 4.808604717254639e-05, 5.042180418968201e-05, 5.275756120681763e-05, 5.509331822395325e-05, 5.742907524108887e-05, 5.976483225822449e-05, 6.210058927536011e-05, 6.443634629249573e-05, 6.677210330963135e-05, 6.910786032676697e-05, 7.144361734390259e-05, 7.377937436103821e-05, 7.611513137817383e-05]}, "gradients/decoder.transformer.h.9.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 3.0, 2.0, 5.0, 3.0, 3.0, 9.0, 3.0, 6.0, 8.0, 8.0, 5.0, 22.0, 23.0, 26.0, 40.0, 74.0, 71.0, 219.0, 1326.0, 17663.0, 976298.0, 49673.0, 2390.0, 336.0, 97.0, 68.0, 41.0, 25.0, 26.0, 18.0, 7.0, 13.0, 7.0, 8.0, 6.0, 8.0, 9.0, 2.0, 3.0, 3.0, 0.0, 3.0, 1.0, 1.0, 2.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.0013227462768554688, -0.0012822151184082031, -0.0012416839599609375, -0.0012011528015136719, -0.0011606216430664062, -0.0011200904846191406, -0.001079559326171875, -0.0010390281677246094, -0.0009984970092773438, -0.0009579658508300781, -0.0009174346923828125, -0.0008769035339355469, -0.0008363723754882812, -0.0007958412170410156, -0.00075531005859375, -0.0007147789001464844, -0.0006742477416992188, -0.0006337165832519531, -0.0005931854248046875, -0.0005526542663574219, -0.0005121231079101562, -0.0004715919494628906, -0.000431060791015625, -0.0003905296325683594, -0.00034999847412109375, -0.0003094673156738281, -0.0002689361572265625, -0.00022840499877929688, -0.00018787384033203125, -0.00014734268188476562, -0.0001068115234375, -6.628036499023438e-05, -2.574920654296875e-05, 1.4781951904296875e-05, 5.53131103515625e-05, 9.584426879882812e-05, 0.00013637542724609375, 0.00017690658569335938, 0.000217437744140625, 0.0002579689025878906, 0.00029850006103515625, 0.0003390312194824219, 0.0003795623779296875, 0.0004200935363769531, 0.00046062469482421875, 0.0005011558532714844, 0.00054168701171875, 0.0005822181701660156, 0.0006227493286132812, 0.0006632804870605469, 0.0007038116455078125, 0.0007443428039550781, 0.0007848739624023438, 0.0008254051208496094, 0.000865936279296875, 0.0009064674377441406, 0.0009469985961914062, 0.0009875297546386719, 0.0010280609130859375, 0.0010685920715332031, 0.0011091232299804688, 0.0011496543884277344, 0.001190185546875, 0.0012307167053222656, 0.0012712478637695312]}, "gradients/decoder.transformer.h.9.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 5.0, 16.0, 73.0, 419.0, 416.0, 73.0, 13.0, 5.0, 1.0, 1.0], "bins": [-0.0004778263682965189, -0.0004695859388448298, -0.0004613454802893102, -0.0004531050508376211, -0.0004448645922821015, -0.0004366241628304124, -0.0004283837042748928, -0.0004201432748232037, -0.0004119028162676841, -0.000403662386815995, -0.0003954219282604754, -0.0003871814988087863, -0.0003789410402532667, -0.00037070061080157757, -0.000362460152246058, -0.00035421972279436886, -0.00034597929334267974, -0.0003377388638909906, -0.00032949840533547103, -0.0003212579758837819, -0.00031301751732826233, -0.0003047770878765732, -0.0002965366293210536, -0.0002882961998693645, -0.0002800557413138449, -0.0002718153118621558, -0.0002635748533066362, -0.0002553344238549471, -0.0002470939652994275, -0.00023885352129582316, -0.0002306130772922188, -0.00022237264784052968, -0.0002141321892850101, -0.00020589174528140575, -0.0001976513012778014, -0.00018941085727419704, -0.0001811704132705927, -0.00017292996926698834, -0.00016468952526338398, -0.00015644909581169486, -0.0001482086518080905, -0.00013996820780448616, -0.0001317277638008818, -0.00012348731979727745, -0.0001152468757936731, -0.00010700643179006875, -9.876599506242201e-05, -9.052555105881765e-05, -8.22851070552133e-05, -7.404466305160895e-05, -6.58042190480046e-05, -5.756377868237905e-05, -4.93233346787747e-05, -4.108289067517035e-05, -3.28424503095448e-05, -2.460200630594045e-05, -1.6361562302336097e-05, -8.121119208226446e-06, 1.1932388588320464e-07, 8.359766070498154e-06, 1.6600210074102506e-05, 2.484065407770686e-05, 3.3081094443332404e-05, 4.1321538446936756e-05, 4.956198245054111e-05]}, "gradients/decoder.transformer.h.9.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 2.0, 3.0, 1.0, 4.0, 4.0, 6.0, 10.0, 12.0, 11.0, 12.0, 20.0, 19.0, 15.0, 28.0, 24.0, 24.0, 33.0, 36.0, 30.0, 31.0, 27.0, 33.0, 30.0, 47.0, 45.0, 31.0, 50.0, 43.0, 52.0, 31.0, 27.0, 25.0, 30.0, 20.0, 24.0, 22.0, 29.0, 16.0, 20.0, 16.0, 12.0, 13.0, 7.0, 8.0, 9.0, 7.0, 4.0, 3.0, 2.0, 3.0, 2.0, 0.0, 0.0, 0.0, 4.0, 1.0], "bins": [-3.1948089599609375e-05, -3.098789602518082e-05, -3.0027702450752258e-05, -2.90675088763237e-05, -2.810731530189514e-05, -2.7147121727466583e-05, -2.6186928153038025e-05, -2.5226734578609467e-05, -2.4266541004180908e-05, -2.330634742975235e-05, -2.234615385532379e-05, -2.1385960280895233e-05, -2.0425766706466675e-05, -1.9465573132038116e-05, -1.8505379557609558e-05, -1.7545185983181e-05, -1.658499240875244e-05, -1.5624798834323883e-05, -1.4664605259895325e-05, -1.3704411685466766e-05, -1.2744218111038208e-05, -1.178402453660965e-05, -1.0823830962181091e-05, -9.863637387752533e-06, -8.903443813323975e-06, -7.943250238895416e-06, -6.983056664466858e-06, -6.0228630900382996e-06, -5.062669515609741e-06, -4.102475941181183e-06, -3.1422823667526245e-06, -2.182088792324066e-06, -1.2218952178955078e-06, -2.6170164346694946e-07, 6.984919309616089e-07, 1.6586855053901672e-06, 2.6188790798187256e-06, 3.579072654247284e-06, 4.539266228675842e-06, 5.499459803104401e-06, 6.459653377532959e-06, 7.419846951961517e-06, 8.380040526390076e-06, 9.340234100818634e-06, 1.0300427675247192e-05, 1.126062124967575e-05, 1.2220814824104309e-05, 1.3181008398532867e-05, 1.4141201972961426e-05, 1.5101395547389984e-05, 1.6061589121818542e-05, 1.70217826962471e-05, 1.798197627067566e-05, 1.8942169845104218e-05, 1.9902363419532776e-05, 2.0862556993961334e-05, 2.1822750568389893e-05, 2.278294414281845e-05, 2.374313771724701e-05, 2.4703331291675568e-05, 2.5663524866104126e-05, 2.6623718440532684e-05, 2.7583912014961243e-05, 2.85441055893898e-05, 2.950429916381836e-05]}, "gradients/decoder.transformer.h.9.attn.c_proj.bias": {"_type": "histogram", "values": [3.0, 2.0, 1.0, 1.0, 4.0, 3.0, 3.0, 8.0, 6.0, 6.0, 8.0, 14.0, 15.0, 15.0, 22.0, 25.0, 28.0, 36.0, 37.0, 34.0, 44.0, 45.0, 37.0, 46.0, 41.0, 35.0, 55.0, 27.0, 38.0, 37.0, 47.0, 43.0, 34.0, 28.0, 25.0, 20.0, 23.0, 22.0, 17.0, 19.0, 8.0, 11.0, 2.0, 13.0, 8.0, 7.0, 7.0, 2.0, 2.0, 1.0, 5.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-37.46875, -36.03125, -34.59375, -33.15625, -31.71875, -30.28125, -28.84375, -27.40625, -25.96875, -24.53125, -23.09375, -21.65625, -20.21875, -18.78125, -17.34375, -15.90625, -14.46875, -13.03125, -11.59375, -10.15625, -8.71875, -7.28125, -5.84375, -4.40625, -2.96875, -1.53125, -0.09375, 1.34375, 2.78125, 4.21875, 5.65625, 7.09375, 8.53125, 9.96875, 11.40625, 12.84375, 14.28125, 15.71875, 17.15625, 18.59375, 20.03125, 21.46875, 22.90625, 24.34375, 25.78125, 27.21875, 28.65625, 30.09375, 31.53125, 32.96875, 34.40625, 35.84375, 37.28125, 38.71875, 40.15625, 41.59375, 43.03125, 44.46875, 45.90625, 47.34375, 48.78125, 50.21875, 51.65625, 53.09375, 54.53125]}, "gradients/decoder.transformer.h.9.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 1.0, 6.0, 6.0, 8.0, 5.0, 15.0, 19.0, 27.0, 53.0, 51.0, 78.0, 100.0, 139.0, 214.0, 273.0, 387.0, 519.0, 790.0, 1098.0, 1630.0, 2539.0, 4106.0, 7662.0, 18490.0, 98478.0, 764987.0, 107666.0, 19190.0, 7767.0, 4360.0, 2451.0, 1613.0, 1140.0, 758.0, 519.0, 390.0, 291.0, 209.0, 169.0, 109.0, 68.0, 49.0, 41.0, 31.0, 26.0, 7.0, 10.0, 7.0, 8.0, 4.0, 3.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-65.25, -62.9619140625, -60.673828125, -58.3857421875, -56.09765625, -53.8095703125, -51.521484375, -49.2333984375, -46.9453125, -44.6572265625, -42.369140625, -40.0810546875, -37.79296875, -35.5048828125, -33.216796875, -30.9287109375, -28.640625, -26.3525390625, -24.064453125, -21.7763671875, -19.48828125, -17.2001953125, -14.912109375, -12.6240234375, -10.3359375, -8.0478515625, -5.759765625, -3.4716796875, -1.18359375, 1.1044921875, 3.392578125, 5.6806640625, 7.96875, 10.2568359375, 12.544921875, 14.8330078125, 17.12109375, 19.4091796875, 21.697265625, 23.9853515625, 26.2734375, 28.5615234375, 30.849609375, 33.1376953125, 35.42578125, 37.7138671875, 40.001953125, 42.2900390625, 44.578125, 46.8662109375, 49.154296875, 51.4423828125, 53.73046875, 56.0185546875, 58.306640625, 60.5947265625, 62.8828125, 65.1708984375, 67.458984375, 69.7470703125, 72.03515625, 74.3232421875, 76.611328125, 78.8994140625, 81.1875]}, "gradients/decoder.transformer.h.9.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 5.0, 3.0, 4.0, 6.0, 5.0, 1.0, 12.0, 6.0, 11.0, 9.0, 11.0, 19.0, 26.0, 32.0, 18.0, 34.0, 32.0, 41.0, 36.0, 48.0, 47.0, 73.0, 99.0, 232.0, 1678.0, 126.0, 76.0, 58.0, 43.0, 48.0, 26.0, 30.0, 30.0, 26.0, 21.0, 22.0, 10.0, 11.0, 13.0, 14.0, 3.0, 4.0, 2.0, 3.0, 2.0, 2.0, 4.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-158.75, -153.9609375, -149.171875, -144.3828125, -139.59375, -134.8046875, -130.015625, -125.2265625, -120.4375, -115.6484375, -110.859375, -106.0703125, -101.28125, -96.4921875, -91.703125, -86.9140625, -82.125, -77.3359375, -72.546875, -67.7578125, -62.96875, -58.1796875, -53.390625, -48.6015625, -43.8125, -39.0234375, -34.234375, -29.4453125, -24.65625, -19.8671875, -15.078125, -10.2890625, -5.5, -0.7109375, 4.078125, 8.8671875, 13.65625, 18.4453125, 23.234375, 28.0234375, 32.8125, 37.6015625, 42.390625, 47.1796875, 51.96875, 56.7578125, 61.546875, 66.3359375, 71.125, 75.9140625, 80.703125, 85.4921875, 90.28125, 95.0703125, 99.859375, 104.6484375, 109.4375, 114.2265625, 119.015625, 123.8046875, 128.59375, 133.3828125, 138.171875, 142.9609375, 147.75]}, "gradients/decoder.transformer.h.9.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 2.0, 0.0, 3.0, 1.0, 0.0, 1.0, 3.0, 2.0, 5.0, 1.0, 5.0, 6.0, 13.0, 14.0, 12.0, 19.0, 26.0, 31.0, 37.0, 41.0, 68.0, 99.0, 219.0, 456.0, 1172.0, 4545.0, 66551.0, 3040014.0, 27402.0, 2966.0, 986.0, 420.0, 187.0, 93.0, 64.0, 58.0, 35.0, 28.0, 25.0, 22.0, 17.0, 13.0, 9.0, 8.0, 8.0, 8.0, 4.0, 6.0, 7.0, 3.0, 4.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0], "bins": [-386.25, -373.53515625, -360.8203125, -348.10546875, -335.390625, -322.67578125, -309.9609375, -297.24609375, -284.53125, -271.81640625, -259.1015625, -246.38671875, -233.671875, -220.95703125, -208.2421875, -195.52734375, -182.8125, -170.09765625, -157.3828125, -144.66796875, -131.953125, -119.23828125, -106.5234375, -93.80859375, -81.09375, -68.37890625, -55.6640625, -42.94921875, -30.234375, -17.51953125, -4.8046875, 7.91015625, 20.625, 33.33984375, 46.0546875, 58.76953125, 71.484375, 84.19921875, 96.9140625, 109.62890625, 122.34375, 135.05859375, 147.7734375, 160.48828125, 173.203125, 185.91796875, 198.6328125, 211.34765625, 224.0625, 236.77734375, 249.4921875, 262.20703125, 274.921875, 287.63671875, 300.3515625, 313.06640625, 325.78125, 338.49609375, 351.2109375, 363.92578125, 376.640625, 389.35546875, 402.0703125, 414.78515625, 427.5]}, "gradients/decoder.transformer.h.9.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 3.0, 11.0, 27.0, 146.0, 527.0, 236.0, 47.0, 16.0, 5.0], "bins": [-1364.74169921875, -1341.8203125, -1318.89892578125, -1295.9775390625, -1273.05615234375, -1250.134765625, -1227.21337890625, -1204.2919921875, -1181.37060546875, -1158.44921875, -1135.52783203125, -1112.6064453125, -1089.68505859375, -1066.763671875, -1043.84228515625, -1020.9208374023438, -997.9993896484375, -975.0780029296875, -952.1566162109375, -929.2352294921875, -906.3138427734375, -883.3923950195312, -860.4710083007812, -837.5496215820312, -814.6282348632812, -791.7068481445312, -768.7854614257812, -745.8640747070312, -722.942626953125, -700.021240234375, -677.099853515625, -654.178466796875, -631.257080078125, -608.335693359375, -585.414306640625, -562.492919921875, -539.571533203125, -516.6500854492188, -493.72869873046875, -470.80731201171875, -447.88592529296875, -424.96453857421875, -402.04315185546875, -379.1217346191406, -356.2003479003906, -333.2789611816406, -310.3575439453125, -287.4361572265625, -264.5147705078125, -241.5933837890625, -218.67198181152344, -195.75057983398438, -172.82919311523438, -149.90780639648438, -126.98640441894531, -104.06500244140625, -81.14360809326172, -58.22221374511719, -35.300819396972656, -12.379425048828125, 10.541969299316406, 33.46336364746094, 56.38475799560547, 79.30615997314453, 102.22754669189453]}, "gradients/decoder.transformer.h.9.ln_1.bias": {"_type": "histogram", "values": [2.0, 1.0, 3.0, 0.0, 0.0, 3.0, 2.0, 5.0, 3.0, 4.0, 5.0, 7.0, 10.0, 5.0, 12.0, 8.0, 11.0, 12.0, 24.0, 15.0, 16.0, 22.0, 20.0, 26.0, 39.0, 25.0, 36.0, 42.0, 33.0, 44.0, 29.0, 36.0, 36.0, 29.0, 42.0, 37.0, 30.0, 35.0, 27.0, 27.0, 31.0, 29.0, 20.0, 21.0, 25.0, 22.0, 19.0, 14.0, 11.0, 8.0, 6.0, 11.0, 9.0, 8.0, 4.0, 2.0, 5.0, 2.0, 2.0, 4.0, 4.0, 0.0, 3.0, 1.0], "bins": [-371.7740173339844, -360.10107421875, -348.4281311035156, -336.75518798828125, -325.0822448730469, -313.4093017578125, -301.73638916015625, -290.06341552734375, -278.3905029296875, -266.7175598144531, -255.04461669921875, -243.37167358398438, -231.69873046875, -220.02578735351562, -208.3528594970703, -196.67991638183594, -185.0069580078125, -173.33401489257812, -161.66107177734375, -149.98812866210938, -138.315185546875, -126.64225006103516, -114.96931457519531, -103.29637145996094, -91.62342834472656, -79.95048522949219, -68.27754211425781, -56.60460662841797, -44.931663513183594, -33.25872039794922, -21.585784912109375, -9.912841796875, 1.7601318359375, 13.433073043823242, 25.106014251708984, 36.778953552246094, 48.45189666748047, 60.124839782714844, 71.79777526855469, 83.47071838378906, 95.14366149902344, 106.81660461425781, 118.48954772949219, 130.1624755859375, 141.83541870117188, 153.50836181640625, 165.18130493164062, 176.854248046875, 188.52719116210938, 200.20013427734375, 211.87307739257812, 223.5460205078125, 235.21896362304688, 246.89190673828125, 258.5648193359375, 270.23779296875, 281.91070556640625, 293.5836486816406, 305.256591796875, 316.9295349121094, 328.60247802734375, 340.2754211425781, 351.9483642578125, 363.62127685546875, 375.29425048828125]}, "gradients/decoder.transformer.h.8.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 0.0, 5.0, 1.0, 3.0, 7.0, 8.0, 5.0, 6.0, 12.0, 17.0, 16.0, 16.0, 20.0, 33.0, 28.0, 29.0, 35.0, 33.0, 42.0, 37.0, 44.0, 43.0, 54.0, 36.0, 41.0, 50.0, 46.0, 36.0, 30.0, 31.0, 37.0, 32.0, 27.0, 30.0, 15.0, 17.0, 13.0, 14.0, 12.0, 10.0, 11.0, 6.0, 10.0, 3.0, 3.0, 6.0, 2.0, 1.0, 0.0, 2.0, 0.0, 1.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-41.90625, -40.33935546875, -38.7724609375, -37.20556640625, -35.638671875, -34.07177734375, -32.5048828125, -30.93798828125, -29.37109375, -27.80419921875, -26.2373046875, -24.67041015625, -23.103515625, -21.53662109375, -19.9697265625, -18.40283203125, -16.8359375, -15.26904296875, -13.7021484375, -12.13525390625, -10.568359375, -9.00146484375, -7.4345703125, -5.86767578125, -4.30078125, -2.73388671875, -1.1669921875, 0.39990234375, 1.966796875, 3.53369140625, 5.1005859375, 6.66748046875, 8.234375, 9.80126953125, 11.3681640625, 12.93505859375, 14.501953125, 16.06884765625, 17.6357421875, 19.20263671875, 20.76953125, 22.33642578125, 23.9033203125, 25.47021484375, 27.037109375, 28.60400390625, 30.1708984375, 31.73779296875, 33.3046875, 34.87158203125, 36.4384765625, 38.00537109375, 39.572265625, 41.13916015625, 42.7060546875, 44.27294921875, 45.83984375, 47.40673828125, 48.9736328125, 50.54052734375, 52.107421875, 53.67431640625, 55.2412109375, 56.80810546875, 58.375]}, "gradients/decoder.transformer.h.8.mlp.c_proj.weight": {"_type": "histogram", "values": [2.0, 2.0, 1.0, 2.0, 4.0, 2.0, 8.0, 9.0, 14.0, 32.0, 28.0, 36.0, 56.0, 105.0, 110.0, 175.0, 214.0, 317.0, 445.0, 576.0, 888.0, 1310.0, 1850.0, 2919.0, 4660.0, 8186.0, 16518.0, 71574.0, 818059.0, 2936858.0, 270086.0, 29059.0, 11881.0, 6311.0, 3918.0, 2444.0, 1681.0, 1107.0, 793.0, 588.0, 381.0, 278.0, 232.0, 150.0, 116.0, 83.0, 59.0, 50.0, 39.0, 23.0, 18.0, 13.0, 8.0, 9.0, 5.0, 3.0, 3.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-92.5625, -89.40234375, -86.2421875, -83.08203125, -79.921875, -76.76171875, -73.6015625, -70.44140625, -67.28125, -64.12109375, -60.9609375, -57.80078125, -54.640625, -51.48046875, -48.3203125, -45.16015625, -42.0, -38.83984375, -35.6796875, -32.51953125, -29.359375, -26.19921875, -23.0390625, -19.87890625, -16.71875, -13.55859375, -10.3984375, -7.23828125, -4.078125, -0.91796875, 2.2421875, 5.40234375, 8.5625, 11.72265625, 14.8828125, 18.04296875, 21.203125, 24.36328125, 27.5234375, 30.68359375, 33.84375, 37.00390625, 40.1640625, 43.32421875, 46.484375, 49.64453125, 52.8046875, 55.96484375, 59.125, 62.28515625, 65.4453125, 68.60546875, 71.765625, 74.92578125, 78.0859375, 81.24609375, 84.40625, 87.56640625, 90.7265625, 93.88671875, 97.046875, 100.20703125, 103.3671875, 106.52734375, 109.6875]}, "gradients/decoder.transformer.h.8.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 2.0, 4.0, 5.0, 6.0, 11.0, 11.0, 13.0, 19.0, 20.0, 25.0, 35.0, 44.0, 60.0, 93.0, 192.0, 425.0, 854.0, 1128.0, 579.0, 236.0, 110.0, 62.0, 34.0, 37.0, 20.0, 9.0, 13.0, 7.0, 6.0, 3.0, 9.0, 4.0, 1.0, 1.0, 2.0, 2.0, 0.0, 3.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-103.6875, -100.138671875, -96.58984375, -93.041015625, -89.4921875, -85.943359375, -82.39453125, -78.845703125, -75.296875, -71.748046875, -68.19921875, -64.650390625, -61.1015625, -57.552734375, -54.00390625, -50.455078125, -46.90625, -43.357421875, -39.80859375, -36.259765625, -32.7109375, -29.162109375, -25.61328125, -22.064453125, -18.515625, -14.966796875, -11.41796875, -7.869140625, -4.3203125, -0.771484375, 2.77734375, 6.326171875, 9.875, 13.423828125, 16.97265625, 20.521484375, 24.0703125, 27.619140625, 31.16796875, 34.716796875, 38.265625, 41.814453125, 45.36328125, 48.912109375, 52.4609375, 56.009765625, 59.55859375, 63.107421875, 66.65625, 70.205078125, 73.75390625, 77.302734375, 80.8515625, 84.400390625, 87.94921875, 91.498046875, 95.046875, 98.595703125, 102.14453125, 105.693359375, 109.2421875, 112.791015625, 116.33984375, 119.888671875, 123.4375]}, "gradients/decoder.transformer.h.8.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 2.0, 1.0, 2.0, 4.0, 1.0, 3.0, 3.0, 7.0, 9.0, 18.0, 15.0, 40.0, 59.0, 92.0, 181.0, 354.0, 737.0, 1859.0, 5568.0, 21110.0, 149051.0, 3867147.0, 120922.0, 18771.0, 5060.0, 1743.0, 708.0, 327.0, 204.0, 110.0, 49.0, 38.0, 28.0, 18.0, 16.0, 11.0, 13.0, 5.0, 4.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 3.0], "bins": [-329.0, -319.72265625, -310.4453125, -301.16796875, -291.890625, -282.61328125, -273.3359375, -264.05859375, -254.78125, -245.50390625, -236.2265625, -226.94921875, -217.671875, -208.39453125, -199.1171875, -189.83984375, -180.5625, -171.28515625, -162.0078125, -152.73046875, -143.453125, -134.17578125, -124.8984375, -115.62109375, -106.34375, -97.06640625, -87.7890625, -78.51171875, -69.234375, -59.95703125, -50.6796875, -41.40234375, -32.125, -22.84765625, -13.5703125, -4.29296875, 4.984375, 14.26171875, 23.5390625, 32.81640625, 42.09375, 51.37109375, 60.6484375, 69.92578125, 79.203125, 88.48046875, 97.7578125, 107.03515625, 116.3125, 125.58984375, 134.8671875, 144.14453125, 153.421875, 162.69921875, 171.9765625, 181.25390625, 190.53125, 199.80859375, 209.0859375, 218.36328125, 227.640625, 236.91796875, 246.1953125, 255.47265625, 264.75]}, "gradients/decoder.transformer.h.8.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 28.0, 263.0, 639.0, 75.0, 7.0, 3.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3861.946044921875, -3776.6220703125, -3691.298095703125, -3605.973876953125, -3520.64990234375, -3435.325927734375, -3350.001953125, -3264.677978515625, -3179.35400390625, -3094.030029296875, -3008.7060546875, -2923.3818359375, -2838.057861328125, -2752.73388671875, -2667.409912109375, -2582.0859375, -2496.76171875, -2411.437744140625, -2326.11376953125, -2240.78955078125, -2155.465576171875, -2070.1416015625, -1984.817626953125, -1899.49365234375, -1814.1695556640625, -1728.8455810546875, -1643.521484375, -1558.197509765625, -1472.87353515625, -1387.5494384765625, -1302.2254638671875, -1216.9013671875, -1131.5771484375, -1046.253173828125, -960.9290771484375, -875.6051025390625, -790.2810668945312, -704.95703125, -619.633056640625, -534.3090209960938, -448.9849853515625, -363.66094970703125, -278.3369445800781, -193.01292419433594, -107.68890380859375, -22.3648681640625, 62.959136962890625, 148.28314208984375, 233.607177734375, 318.93121337890625, 404.2552185058594, 489.5792236328125, 574.9032592773438, 660.227294921875, 745.55126953125, 830.8753051757812, 916.1993408203125, 1001.5233764648438, 1086.847412109375, 1172.17138671875, 1257.495361328125, 1342.8194580078125, 1428.1434326171875, 1513.467529296875, 1598.79150390625]}, "gradients/decoder.transformer.h.8.ln_2.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 2.0, 3.0, 5.0, 2.0, 3.0, 4.0, 5.0, 13.0, 18.0, 19.0, 21.0, 31.0, 34.0, 28.0, 35.0, 42.0, 53.0, 41.0, 48.0, 54.0, 44.0, 66.0, 52.0, 45.0, 38.0, 43.0, 51.0, 37.0, 40.0, 30.0, 26.0, 22.0, 16.0, 9.0, 7.0, 14.0, 5.0, 4.0, 0.0, 1.0, 4.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-280.37890625, -270.19659423828125, -260.0142517089844, -249.83192443847656, -239.64959716796875, -229.46728515625, -219.2849578857422, -209.10263061523438, -198.92030334472656, -188.73797607421875, -178.55564880371094, -168.37332153320312, -158.19100952148438, -148.0086669921875, -137.82635498046875, -127.64402770996094, -117.46170043945312, -107.27937316894531, -97.0970458984375, -86.91472625732422, -76.7323989868164, -66.5500717163086, -56.36774826049805, -46.1854248046875, -36.00309753417969, -25.820772171020508, -15.638446807861328, -5.456121444702148, 4.726203918457031, 14.908531188964844, 25.09085464477539, 35.27317810058594, 45.455535888671875, 55.63786315917969, 65.8201904296875, 76.00251007080078, 86.1848373413086, 96.3671646118164, 106.54948425292969, 116.7318115234375, 126.91413879394531, 137.09646606445312, 147.27879333496094, 157.46112060546875, 167.6434326171875, 177.82577514648438, 188.00808715820312, 198.19041442871094, 208.37274169921875, 218.55506896972656, 228.73739624023438, 238.9197235107422, 249.10205078125, 259.28436279296875, 269.4667053222656, 279.6490173339844, 289.83135986328125, 300.013671875, 310.1960144042969, 320.3783264160156, 330.5606689453125, 340.74298095703125, 350.9253234863281, 361.1076354980469, 371.2899475097656]}, "gradients/decoder.transformer.h.8.crossattention.c_proj.bias": {"_type": "histogram", "values": [2.0, 1.0, 3.0, 2.0, 1.0, 4.0, 2.0, 6.0, 6.0, 5.0, 14.0, 15.0, 12.0, 10.0, 16.0, 20.0, 22.0, 22.0, 26.0, 24.0, 37.0, 28.0, 36.0, 43.0, 44.0, 38.0, 48.0, 40.0, 35.0, 30.0, 33.0, 42.0, 45.0, 45.0, 35.0, 17.0, 36.0, 21.0, 20.0, 19.0, 14.0, 16.0, 11.0, 11.0, 9.0, 10.0, 8.0, 10.0, 4.0, 4.0, 4.0, 6.0, 1.0, 1.0, 1.0, 1.0, 4.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0], "bins": [-39.96875, -38.54443359375, -37.1201171875, -35.69580078125, -34.271484375, -32.84716796875, -31.4228515625, -29.99853515625, -28.57421875, -27.14990234375, -25.7255859375, -24.30126953125, -22.876953125, -21.45263671875, -20.0283203125, -18.60400390625, -17.1796875, -15.75537109375, -14.3310546875, -12.90673828125, -11.482421875, -10.05810546875, -8.6337890625, -7.20947265625, -5.78515625, -4.36083984375, -2.9365234375, -1.51220703125, -0.087890625, 1.33642578125, 2.7607421875, 4.18505859375, 5.609375, 7.03369140625, 8.4580078125, 9.88232421875, 11.306640625, 12.73095703125, 14.1552734375, 15.57958984375, 17.00390625, 18.42822265625, 19.8525390625, 21.27685546875, 22.701171875, 24.12548828125, 25.5498046875, 26.97412109375, 28.3984375, 29.82275390625, 31.2470703125, 32.67138671875, 34.095703125, 35.52001953125, 36.9443359375, 38.36865234375, 39.79296875, 41.21728515625, 42.6416015625, 44.06591796875, 45.490234375, 46.91455078125, 48.3388671875, 49.76318359375, 51.1875]}, "gradients/decoder.transformer.h.8.crossattention.c_proj.weight": {"_type": "histogram", "values": [3.0, 2.0, 8.0, 12.0, 19.0, 21.0, 36.0, 56.0, 69.0, 102.0, 175.0, 226.0, 324.0, 475.0, 660.0, 967.0, 1432.0, 2052.0, 3039.0, 4422.0, 6514.0, 9768.0, 14783.0, 22506.0, 35497.0, 58800.0, 104776.0, 230998.0, 268616.0, 111786.0, 62457.0, 37240.0, 23542.0, 15413.0, 10190.0, 6788.0, 4649.0, 3179.0, 2118.0, 1476.0, 1031.0, 692.0, 510.0, 311.0, 248.0, 176.0, 107.0, 79.0, 76.0, 36.0, 25.0, 24.0, 21.0, 12.0, 5.0, 5.0, 11.0, 2.0, 3.0, 2.0, 3.0, 0.0, 0.0, 1.0], "bins": [-1.482421875, -1.4295806884765625, -1.376739501953125, -1.3238983154296875, -1.27105712890625, -1.2182159423828125, -1.165374755859375, -1.1125335693359375, -1.0596923828125, -1.0068511962890625, -0.954010009765625, -0.9011688232421875, -0.84832763671875, -0.7954864501953125, -0.742645263671875, -0.6898040771484375, -0.636962890625, -0.5841217041015625, -0.531280517578125, -0.4784393310546875, -0.42559814453125, -0.3727569580078125, -0.319915771484375, -0.2670745849609375, -0.2142333984375, -0.1613922119140625, -0.108551025390625, -0.0557098388671875, -0.00286865234375, 0.0499725341796875, 0.102813720703125, 0.1556549072265625, 0.20849609375, 0.2613372802734375, 0.314178466796875, 0.3670196533203125, 0.41986083984375, 0.4727020263671875, 0.525543212890625, 0.5783843994140625, 0.6312255859375, 0.6840667724609375, 0.736907958984375, 0.7897491455078125, 0.84259033203125, 0.8954315185546875, 0.948272705078125, 1.0011138916015625, 1.053955078125, 1.1067962646484375, 1.159637451171875, 1.2124786376953125, 1.26531982421875, 1.3181610107421875, 1.371002197265625, 1.4238433837890625, 1.4766845703125, 1.5295257568359375, 1.582366943359375, 1.6352081298828125, 1.68804931640625, 1.7408905029296875, 1.793731689453125, 1.8465728759765625, 1.8994140625]}, "gradients/decoder.transformer.h.8.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 3.0, 0.0, 2.0, 2.0, 3.0, 5.0, 7.0, 5.0, 14.0, 10.0, 14.0, 17.0, 22.0, 13.0, 26.0, 27.0, 29.0, 41.0, 35.0, 42.0, 49.0, 45.0, 45.0, 36.0, 1062.0, 48.0, 46.0, 43.0, 32.0, 38.0, 36.0, 31.0, 25.0, 28.0, 23.0, 22.0, 28.0, 20.0, 13.0, 13.0, 12.0, 7.0, 9.0, 5.0, 4.0, 4.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-33.15625, -32.10888671875, -31.0615234375, -30.01416015625, -28.966796875, -27.91943359375, -26.8720703125, -25.82470703125, -24.77734375, -23.72998046875, -22.6826171875, -21.63525390625, -20.587890625, -19.54052734375, -18.4931640625, -17.44580078125, -16.3984375, -15.35107421875, -14.3037109375, -13.25634765625, -12.208984375, -11.16162109375, -10.1142578125, -9.06689453125, -8.01953125, -6.97216796875, -5.9248046875, -4.87744140625, -3.830078125, -2.78271484375, -1.7353515625, -0.68798828125, 0.359375, 1.40673828125, 2.4541015625, 3.50146484375, 4.548828125, 5.59619140625, 6.6435546875, 7.69091796875, 8.73828125, 9.78564453125, 10.8330078125, 11.88037109375, 12.927734375, 13.97509765625, 15.0224609375, 16.06982421875, 17.1171875, 18.16455078125, 19.2119140625, 20.25927734375, 21.306640625, 22.35400390625, 23.4013671875, 24.44873046875, 25.49609375, 26.54345703125, 27.5908203125, 28.63818359375, 29.685546875, 30.73291015625, 31.7802734375, 32.82763671875, 33.875]}, "gradients/decoder.transformer.h.8.crossattention.c_attn.weight": {"_type": "histogram", "values": [2.0, 2.0, 1.0, 1.0, 0.0, 4.0, 5.0, 5.0, 10.0, 11.0, 14.0, 34.0, 38.0, 59.0, 94.0, 138.0, 203.0, 302.0, 406.0, 627.0, 1002.0, 1466.0, 2340.0, 3555.0, 5451.0, 8455.0, 13199.0, 20497.0, 32032.0, 52411.0, 88782.0, 173436.0, 1367159.0, 131839.0, 73256.0, 43890.0, 27241.0, 17162.0, 11194.0, 7203.0, 4735.0, 3030.0, 2011.0, 1271.0, 884.0, 549.0, 393.0, 230.0, 178.0, 106.0, 73.0, 53.0, 39.0, 22.0, 13.0, 8.0, 11.0, 5.0, 7.0, 1.0, 1.0, 1.0, 2.0, 3.0], "bins": [-1.38671875, -1.34381103515625, -1.3009033203125, -1.25799560546875, -1.215087890625, -1.17218017578125, -1.1292724609375, -1.08636474609375, -1.04345703125, -1.00054931640625, -0.9576416015625, -0.91473388671875, -0.871826171875, -0.82891845703125, -0.7860107421875, -0.74310302734375, -0.7001953125, -0.65728759765625, -0.6143798828125, -0.57147216796875, -0.528564453125, -0.48565673828125, -0.4427490234375, -0.39984130859375, -0.35693359375, -0.31402587890625, -0.2711181640625, -0.22821044921875, -0.185302734375, -0.14239501953125, -0.0994873046875, -0.05657958984375, -0.013671875, 0.02923583984375, 0.0721435546875, 0.11505126953125, 0.157958984375, 0.20086669921875, 0.2437744140625, 0.28668212890625, 0.32958984375, 0.37249755859375, 0.4154052734375, 0.45831298828125, 0.501220703125, 0.54412841796875, 0.5870361328125, 0.62994384765625, 0.6728515625, 0.71575927734375, 0.7586669921875, 0.80157470703125, 0.844482421875, 0.88739013671875, 0.9302978515625, 0.97320556640625, 1.01611328125, 1.05902099609375, 1.1019287109375, 1.14483642578125, 1.187744140625, 1.23065185546875, 1.2735595703125, 1.31646728515625, 1.359375]}, "gradients/decoder.transformer.h.8.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 3.0, 0.0, 3.0, 1.0, 2.0, 6.0, 11.0, 10.0, 12.0, 9.0, 16.0, 21.0, 35.0, 33.0, 47.0, 57.0, 63.0, 71.0, 202.0, 75.0, 78.0, 49.0, 38.0, 32.0, 30.0, 28.0, 19.0, 16.0, 9.0, 11.0, 3.0, 2.0, 5.0, 3.0, 6.0, 2.0, 4.0, 0.0, 0.0, 3.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-7.641315460205078e-05, -7.391534745693207e-05, -7.141754031181335e-05, -6.891973316669464e-05, -6.642192602157593e-05, -6.392411887645721e-05, -6.14263117313385e-05, -5.892850458621979e-05, -5.6430697441101074e-05, -5.393289029598236e-05, -5.143508315086365e-05, -4.8937276005744934e-05, -4.643946886062622e-05, -4.394166171550751e-05, -4.1443854570388794e-05, -3.894604742527008e-05, -3.644824028015137e-05, -3.3950433135032654e-05, -3.145262598991394e-05, -2.8954818844795227e-05, -2.6457011699676514e-05, -2.39592045545578e-05, -2.1461397409439087e-05, -1.8963590264320374e-05, -1.646578311920166e-05, -1.3967975974082947e-05, -1.1470168828964233e-05, -8.97236168384552e-06, -6.474554538726807e-06, -3.976747393608093e-06, -1.4789402484893799e-06, 1.0188668966293335e-06, 3.516674041748047e-06, 6.01448118686676e-06, 8.512288331985474e-06, 1.1010095477104187e-05, 1.35079026222229e-05, 1.6005709767341614e-05, 1.8503516912460327e-05, 2.100132405757904e-05, 2.3499131202697754e-05, 2.5996938347816467e-05, 2.849474549293518e-05, 3.0992552638053894e-05, 3.349035978317261e-05, 3.598816692829132e-05, 3.8485974073410034e-05, 4.098378121852875e-05, 4.348158836364746e-05, 4.5979395508766174e-05, 4.847720265388489e-05, 5.09750097990036e-05, 5.3472816944122314e-05, 5.597062408924103e-05, 5.846843123435974e-05, 6.0966238379478455e-05, 6.346404552459717e-05, 6.596185266971588e-05, 6.84596598148346e-05, 7.095746695995331e-05, 7.345527410507202e-05, 7.595308125019073e-05, 7.845088839530945e-05, 8.094869554042816e-05, 8.344650268554688e-05]}, "gradients/decoder.transformer.h.8.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 3.0, 2.0, 1.0, 1.0, 4.0, 3.0, 3.0, 5.0, 4.0, 9.0, 9.0, 19.0, 15.0, 21.0, 26.0, 34.0, 62.0, 64.0, 136.0, 359.0, 2048.0, 38318.0, 987083.0, 18280.0, 1389.0, 296.0, 108.0, 84.0, 37.0, 38.0, 29.0, 24.0, 14.0, 8.0, 11.0, 8.0, 3.0, 6.0, 1.0, 2.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.0014324188232421875, -0.001389533281326294, -0.0013466477394104004, -0.0013037621974945068, -0.0012608766555786133, -0.0012179911136627197, -0.0011751055717468262, -0.0011322200298309326, -0.001089334487915039, -0.0010464489459991455, -0.001003563404083252, -0.0009606778621673584, -0.0009177923202514648, -0.0008749067783355713, -0.0008320212364196777, -0.0007891356945037842, -0.0007462501525878906, -0.0007033646106719971, -0.0006604790687561035, -0.00061759352684021, -0.0005747079849243164, -0.0005318224430084229, -0.0004889369010925293, -0.00044605135917663574, -0.0004031658172607422, -0.00036028027534484863, -0.0003173947334289551, -0.0002745091915130615, -0.00023162364959716797, -0.00018873810768127441, -0.00014585256576538086, -0.0001029670238494873, -6.008148193359375e-05, -1.7195940017700195e-05, 2.568960189819336e-05, 6.857514381408691e-05, 0.00011146068572998047, 0.00015434622764587402, 0.00019723176956176758, 0.00024011731147766113, 0.0002830028533935547, 0.00032588839530944824, 0.0003687739372253418, 0.00041165947914123535, 0.0004545450210571289, 0.0004974305629730225, 0.000540316104888916, 0.0005832016468048096, 0.0006260871887207031, 0.0006689727306365967, 0.0007118582725524902, 0.0007547438144683838, 0.0007976293563842773, 0.0008405148983001709, 0.0008834004402160645, 0.000926285982131958, 0.0009691715240478516, 0.0010120570659637451, 0.0010549426078796387, 0.0010978281497955322, 0.0011407136917114258, 0.0011835992336273193, 0.0012264847755432129, 0.0012693703174591064, 0.001312255859375]}, "gradients/decoder.transformer.h.8.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 2.0, 2.0, 4.0, 2.0, 4.0, 4.0, 5.0, 3.0, 3.0, 12.0, 18.0, 7.0, 12.0, 31.0, 24.0, 60.0, 76.0, 86.0, 138.0, 124.0, 101.0, 78.0, 54.0, 39.0, 34.0, 24.0, 18.0, 7.0, 5.0, 6.0, 6.0, 2.0, 7.0, 2.0, 3.0, 2.0, 4.0, 2.0, 0.0, 1.0, 0.0, 0.0, 3.0, 1.0, 1.0], "bins": [-4.7277906560339034e-05, -4.6032557293074206e-05, -4.478720438783057e-05, -4.354185512056574e-05, -4.229650221532211e-05, -4.105115294805728e-05, -3.980580368079245e-05, -3.8560450775548816e-05, -3.731510150828399e-05, -3.606975224101916e-05, -3.4824399335775524e-05, -3.3579050068510696e-05, -3.233369716326706e-05, -3.108834789600223e-05, -2.9842996809748e-05, -2.859764572349377e-05, -2.7352294637239538e-05, -2.6106943550985307e-05, -2.4861592464731075e-05, -2.3616241378476843e-05, -2.2370892111212015e-05, -2.1125541024957784e-05, -1.9880189938703552e-05, -1.8634840671438724e-05, -1.738948776619509e-05, -1.6144136679940857e-05, -1.4898786503181327e-05, -1.3653435416927096e-05, -1.2408085240167566e-05, -1.1162734153913334e-05, -9.917383067659102e-06, -8.672032890899573e-06, -7.426682714140043e-06, -6.181332082633162e-06, -4.935981451126281e-06, -3.6906303648720495e-06, -2.445279733365169e-06, -1.199929101858288e-06, 4.542198439594358e-08, 1.2907721611554734e-06, 2.536123247409705e-06, 3.781473878916586e-06, 5.0268245104234666e-06, 6.272175596677698e-06, 7.517526228184579e-06, 8.76287685969146e-06, 1.0008227945945691e-05, 1.1253578122705221e-05, 1.2498929208959453e-05, 1.3744280295213684e-05, 1.4989630471973214e-05, 1.6234982467722148e-05, 1.7480331734986976e-05, 1.8725682821241207e-05, 1.997103390749544e-05, 2.1216383174760267e-05, 2.2461736080003902e-05, 2.3707087166258134e-05, 2.4952438252512366e-05, 2.6197787519777194e-05, 2.7443138606031425e-05, 2.8688489692285657e-05, 2.993384077853989e-05, 3.117919186479412e-05, 3.242454113205895e-05]}, "gradients/decoder.transformer.h.8.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 1.0, 2.0, 1.0, 4.0, 3.0, 2.0, 3.0, 8.0, 9.0, 4.0, 11.0, 4.0, 9.0, 22.0, 22.0, 25.0, 25.0, 27.0, 23.0, 32.0, 37.0, 33.0, 39.0, 36.0, 42.0, 39.0, 41.0, 48.0, 34.0, 46.0, 41.0, 32.0, 33.0, 48.0, 28.0, 29.0, 19.0, 19.0, 19.0, 21.0, 18.0, 9.0, 17.0, 9.0, 11.0, 11.0, 5.0, 2.0, 3.0, 5.0, 1.0, 1.0, 1.0, 3.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0], "bins": [-2.9802322387695312e-05, -2.884306013584137e-05, -2.7883797883987427e-05, -2.6924535632133484e-05, -2.596527338027954e-05, -2.5006011128425598e-05, -2.4046748876571655e-05, -2.3087486624717712e-05, -2.212822437286377e-05, -2.1168962121009827e-05, -2.0209699869155884e-05, -1.925043761730194e-05, -1.8291175365447998e-05, -1.7331913113594055e-05, -1.6372650861740112e-05, -1.541338860988617e-05, -1.4454126358032227e-05, -1.3494864106178284e-05, -1.253560185432434e-05, -1.1576339602470398e-05, -1.0617077350616455e-05, -9.657815098762512e-06, -8.69855284690857e-06, -7.739290595054626e-06, -6.780028343200684e-06, -5.820766091346741e-06, -4.861503839492798e-06, -3.902241587638855e-06, -2.942979335784912e-06, -1.9837170839309692e-06, -1.0244548320770264e-06, -6.51925802230835e-08, 8.940696716308594e-07, 1.8533319234848022e-06, 2.812594175338745e-06, 3.771856427192688e-06, 4.731118679046631e-06, 5.690380930900574e-06, 6.649643182754517e-06, 7.6089054346084595e-06, 8.568167686462402e-06, 9.527429938316345e-06, 1.0486692190170288e-05, 1.1445954442024231e-05, 1.2405216693878174e-05, 1.3364478945732117e-05, 1.432374119758606e-05, 1.5283003449440002e-05, 1.6242265701293945e-05, 1.7201527953147888e-05, 1.816079020500183e-05, 1.9120052456855774e-05, 2.0079314708709717e-05, 2.103857696056366e-05, 2.1997839212417603e-05, 2.2957101464271545e-05, 2.3916363716125488e-05, 2.487562596797943e-05, 2.5834888219833374e-05, 2.6794150471687317e-05, 2.775341272354126e-05, 2.8712674975395203e-05, 2.9671937227249146e-05, 3.063119947910309e-05, 3.159046173095703e-05]}, "gradients/decoder.transformer.h.8.attn.c_proj.bias": {"_type": "histogram", "values": [2.0, 1.0, 3.0, 2.0, 1.0, 4.0, 2.0, 6.0, 6.0, 5.0, 14.0, 15.0, 12.0, 10.0, 16.0, 20.0, 22.0, 22.0, 26.0, 24.0, 37.0, 28.0, 36.0, 43.0, 44.0, 38.0, 48.0, 40.0, 35.0, 30.0, 33.0, 42.0, 45.0, 45.0, 35.0, 17.0, 36.0, 21.0, 20.0, 19.0, 14.0, 16.0, 11.0, 11.0, 9.0, 10.0, 8.0, 10.0, 4.0, 4.0, 4.0, 6.0, 1.0, 1.0, 1.0, 1.0, 4.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0], "bins": [-39.96875, -38.54443359375, -37.1201171875, -35.69580078125, -34.271484375, -32.84716796875, -31.4228515625, -29.99853515625, -28.57421875, -27.14990234375, -25.7255859375, -24.30126953125, -22.876953125, -21.45263671875, -20.0283203125, -18.60400390625, -17.1796875, -15.75537109375, -14.3310546875, -12.90673828125, -11.482421875, -10.05810546875, -8.6337890625, -7.20947265625, -5.78515625, -4.36083984375, -2.9365234375, -1.51220703125, -0.087890625, 1.33642578125, 2.7607421875, 4.18505859375, 5.609375, 7.03369140625, 8.4580078125, 9.88232421875, 11.306640625, 12.73095703125, 14.1552734375, 15.57958984375, 17.00390625, 18.42822265625, 19.8525390625, 21.27685546875, 22.701171875, 24.12548828125, 25.5498046875, 26.97412109375, 28.3984375, 29.82275390625, 31.2470703125, 32.67138671875, 34.095703125, 35.52001953125, 36.9443359375, 38.36865234375, 39.79296875, 41.21728515625, 42.6416015625, 44.06591796875, 45.490234375, 46.91455078125, 48.3388671875, 49.76318359375, 51.1875]}, "gradients/decoder.transformer.h.8.attn.c_proj.weight": {"_type": "histogram", "values": [3.0, 4.0, 2.0, 2.0, 9.0, 5.0, 6.0, 11.0, 14.0, 24.0, 25.0, 34.0, 49.0, 62.0, 99.0, 100.0, 136.0, 173.0, 267.0, 356.0, 452.0, 612.0, 771.0, 1101.0, 1579.0, 2589.0, 4505.0, 9867.0, 30216.0, 145480.0, 657475.0, 140492.0, 29572.0, 9703.0, 4395.0, 2454.0, 1603.0, 1073.0, 814.0, 606.0, 443.0, 331.0, 271.0, 203.0, 154.0, 104.0, 87.0, 58.0, 46.0, 44.0, 18.0, 20.0, 12.0, 12.0, 11.0, 5.0, 4.0, 4.0, 2.0, 1.0, 3.0, 0.0, 0.0, 3.0], "bins": [-55.25, -53.43798828125, -51.6259765625, -49.81396484375, -48.001953125, -46.18994140625, -44.3779296875, -42.56591796875, -40.75390625, -38.94189453125, -37.1298828125, -35.31787109375, -33.505859375, -31.69384765625, -29.8818359375, -28.06982421875, -26.2578125, -24.44580078125, -22.6337890625, -20.82177734375, -19.009765625, -17.19775390625, -15.3857421875, -13.57373046875, -11.76171875, -9.94970703125, -8.1376953125, -6.32568359375, -4.513671875, -2.70166015625, -0.8896484375, 0.92236328125, 2.734375, 4.54638671875, 6.3583984375, 8.17041015625, 9.982421875, 11.79443359375, 13.6064453125, 15.41845703125, 17.23046875, 19.04248046875, 20.8544921875, 22.66650390625, 24.478515625, 26.29052734375, 28.1025390625, 29.91455078125, 31.7265625, 33.53857421875, 35.3505859375, 37.16259765625, 38.974609375, 40.78662109375, 42.5986328125, 44.41064453125, 46.22265625, 48.03466796875, 49.8466796875, 51.65869140625, 53.470703125, 55.28271484375, 57.0947265625, 58.90673828125, 60.71875]}, "gradients/decoder.transformer.h.8.attn.c_attn.bias": {"_type": "histogram", "values": [3.0, 2.0, 0.0, 2.0, 1.0, 5.0, 3.0, 5.0, 5.0, 10.0, 8.0, 11.0, 11.0, 17.0, 12.0, 8.0, 17.0, 22.0, 18.0, 24.0, 16.0, 37.0, 27.0, 34.0, 35.0, 40.0, 46.0, 53.0, 157.0, 1812.0, 150.0, 52.0, 50.0, 34.0, 35.0, 32.0, 32.0, 30.0, 34.0, 22.0, 17.0, 15.0, 16.0, 14.0, 15.0, 12.0, 9.0, 6.0, 12.0, 12.0, 8.0, 6.0, 2.0, 8.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0], "bins": [-119.125, -115.080078125, -111.03515625, -106.990234375, -102.9453125, -98.900390625, -94.85546875, -90.810546875, -86.765625, -82.720703125, -78.67578125, -74.630859375, -70.5859375, -66.541015625, -62.49609375, -58.451171875, -54.40625, -50.361328125, -46.31640625, -42.271484375, -38.2265625, -34.181640625, -30.13671875, -26.091796875, -22.046875, -18.001953125, -13.95703125, -9.912109375, -5.8671875, -1.822265625, 2.22265625, 6.267578125, 10.3125, 14.357421875, 18.40234375, 22.447265625, 26.4921875, 30.537109375, 34.58203125, 38.626953125, 42.671875, 46.716796875, 50.76171875, 54.806640625, 58.8515625, 62.896484375, 66.94140625, 70.986328125, 75.03125, 79.076171875, 83.12109375, 87.166015625, 91.2109375, 95.255859375, 99.30078125, 103.345703125, 107.390625, 111.435546875, 115.48046875, 119.525390625, 123.5703125, 127.615234375, 131.66015625, 135.705078125, 139.75]}, "gradients/decoder.transformer.h.8.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 1.0, 1.0, 2.0, 0.0, 2.0, 6.0, 4.0, 8.0, 9.0, 10.0, 12.0, 7.0, 13.0, 23.0, 29.0, 26.0, 35.0, 59.0, 64.0, 86.0, 106.0, 141.0, 231.0, 469.0, 1224.0, 5899.0, 106187.0, 3007092.0, 19581.0, 2577.0, 757.0, 338.0, 165.0, 128.0, 92.0, 76.0, 57.0, 51.0, 29.0, 22.0, 16.0, 15.0, 17.0, 14.0, 11.0, 6.0, 4.0, 8.0, 4.0, 2.0, 1.0, 2.0, 1.0, 3.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-228.75, -221.43359375, -214.1171875, -206.80078125, -199.484375, -192.16796875, -184.8515625, -177.53515625, -170.21875, -162.90234375, -155.5859375, -148.26953125, -140.953125, -133.63671875, -126.3203125, -119.00390625, -111.6875, -104.37109375, -97.0546875, -89.73828125, -82.421875, -75.10546875, -67.7890625, -60.47265625, -53.15625, -45.83984375, -38.5234375, -31.20703125, -23.890625, -16.57421875, -9.2578125, -1.94140625, 5.375, 12.69140625, 20.0078125, 27.32421875, 34.640625, 41.95703125, 49.2734375, 56.58984375, 63.90625, 71.22265625, 78.5390625, 85.85546875, 93.171875, 100.48828125, 107.8046875, 115.12109375, 122.4375, 129.75390625, 137.0703125, 144.38671875, 151.703125, 159.01953125, 166.3359375, 173.65234375, 180.96875, 188.28515625, 195.6015625, 202.91796875, 210.234375, 217.55078125, 224.8671875, 232.18359375, 239.5]}, "gradients/decoder.transformer.h.8.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 19.0, 924.0, 77.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-343.4330139160156, -291.4416198730469, -239.45022583007812, -187.45884704589844, -135.4674530029297, -83.47607421875, -31.48468017578125, 20.5067138671875, 72.49810791015625, 124.489501953125, 176.48089599609375, 228.47227478027344, 280.46368408203125, 332.4550476074219, 384.4464416503906, 436.4378356933594, 488.4292297363281, 540.4205932617188, 592.4119873046875, 644.4033813476562, 696.394775390625, 748.3861694335938, 800.3775634765625, 852.3689575195312, 904.3603515625, 956.3517456054688, 1008.3431396484375, 1060.33447265625, 1112.325927734375, 1164.3172607421875, 1216.3087158203125, 1268.300048828125, 1320.2913818359375, 1372.28271484375, 1424.274169921875, 1476.2655029296875, 1528.2569580078125, 1580.248291015625, 1632.23974609375, 1684.2310791015625, 1736.2225341796875, 1788.2138671875, 1840.205322265625, 1892.1966552734375, 1944.1881103515625, 1996.179443359375, 2048.1708984375, 2100.162353515625, 2152.153564453125, 2204.14501953125, 2256.13623046875, 2308.127685546875, 2360.119140625, 2412.110595703125, 2464.101806640625, 2516.09326171875, 2568.084716796875, 2620.076171875, 2672.0673828125, 2724.058837890625, 2776.05029296875, 2828.041748046875, 2880.032958984375, 2932.0244140625, 2984.015869140625]}, "gradients/decoder.transformer.h.8.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 2.0, 3.0, 3.0, 2.0, 2.0, 5.0, 10.0, 6.0, 8.0, 7.0, 6.0, 13.0, 16.0, 13.0, 17.0, 17.0, 20.0, 27.0, 34.0, 28.0, 29.0, 34.0, 38.0, 29.0, 32.0, 37.0, 46.0, 58.0, 45.0, 40.0, 42.0, 28.0, 39.0, 35.0, 31.0, 23.0, 21.0, 24.0, 14.0, 10.0, 22.0, 20.0, 15.0, 8.0, 12.0, 10.0, 7.0, 13.0, 4.0, 2.0, 5.0, 2.0, 1.0, 2.0, 2.0, 1.0, 1.0], "bins": [-393.0500793457031, -381.58319091796875, -370.11627197265625, -358.64935302734375, -347.1824645996094, -335.715576171875, -324.2486572265625, -312.78173828125, -301.3148498535156, -289.84796142578125, -278.38104248046875, -266.91412353515625, -255.44723510742188, -243.98033142089844, -232.513427734375, -221.04652404785156, -209.57962036132812, -198.1127166748047, -186.64581298828125, -175.1789093017578, -163.71200561523438, -152.24510192871094, -140.7781982421875, -129.31129455566406, -117.84439086914062, -106.37748718261719, -94.91058349609375, -83.44367980957031, -71.97677612304688, -60.50987243652344, -49.04296875, -37.57606506347656, -26.109161376953125, -14.642257690429688, -3.17535400390625, 8.291549682617188, 19.758453369140625, 31.225357055664062, 42.6922607421875, 54.15916442871094, 65.62606811523438, 77.09297180175781, 88.55987548828125, 100.02677917480469, 111.49368286132812, 122.96058654785156, 134.427490234375, 145.89439392089844, 157.36129760742188, 168.8282012939453, 180.29510498046875, 191.7620086669922, 203.22891235351562, 214.69581604003906, 226.1627197265625, 237.62962341308594, 249.09652709960938, 260.56341552734375, 272.03033447265625, 283.49725341796875, 294.9641418457031, 306.4310302734375, 317.89794921875, 329.3648681640625, 340.8317565917969]}, "gradients/decoder.transformer.h.7.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 3.0, 3.0, 3.0, 3.0, 4.0, 3.0, 8.0, 10.0, 12.0, 13.0, 10.0, 10.0, 15.0, 24.0, 21.0, 18.0, 25.0, 34.0, 27.0, 36.0, 37.0, 43.0, 38.0, 59.0, 44.0, 23.0, 37.0, 42.0, 33.0, 34.0, 30.0, 49.0, 32.0, 32.0, 23.0, 26.0, 18.0, 20.0, 17.0, 16.0, 9.0, 8.0, 12.0, 14.0, 5.0, 4.0, 6.0, 8.0, 3.0, 2.0, 3.0, 2.0, 2.0, 1.0, 2.0, 3.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0], "bins": [-40.34375, -38.90087890625, -37.4580078125, -36.01513671875, -34.572265625, -33.12939453125, -31.6865234375, -30.24365234375, -28.80078125, -27.35791015625, -25.9150390625, -24.47216796875, -23.029296875, -21.58642578125, -20.1435546875, -18.70068359375, -17.2578125, -15.81494140625, -14.3720703125, -12.92919921875, -11.486328125, -10.04345703125, -8.6005859375, -7.15771484375, -5.71484375, -4.27197265625, -2.8291015625, -1.38623046875, 0.056640625, 1.49951171875, 2.9423828125, 4.38525390625, 5.828125, 7.27099609375, 8.7138671875, 10.15673828125, 11.599609375, 13.04248046875, 14.4853515625, 15.92822265625, 17.37109375, 18.81396484375, 20.2568359375, 21.69970703125, 23.142578125, 24.58544921875, 26.0283203125, 27.47119140625, 28.9140625, 30.35693359375, 31.7998046875, 33.24267578125, 34.685546875, 36.12841796875, 37.5712890625, 39.01416015625, 40.45703125, 41.89990234375, 43.3427734375, 44.78564453125, 46.228515625, 47.67138671875, 49.1142578125, 50.55712890625, 52.0]}, "gradients/decoder.transformer.h.7.mlp.c_proj.weight": {"_type": "histogram", "values": [2.0, 1.0, 4.0, 7.0, 5.0, 12.0, 15.0, 27.0, 24.0, 40.0, 65.0, 68.0, 117.0, 134.0, 159.0, 234.0, 303.0, 377.0, 513.0, 663.0, 891.0, 1228.0, 1743.0, 2424.0, 3718.0, 5806.0, 10246.0, 19695.0, 75021.0, 834474.0, 2969918.0, 199795.0, 29684.0, 13645.0, 7594.0, 4644.0, 3019.0, 2072.0, 1481.0, 1067.0, 791.0, 622.0, 457.0, 345.0, 242.0, 208.0, 158.0, 118.0, 105.0, 70.0, 57.0, 56.0, 27.0, 27.0, 21.0, 13.0, 17.0, 10.0, 7.0, 6.0, 3.0, 3.0, 1.0, 5.0], "bins": [-96.6875, -93.490234375, -90.29296875, -87.095703125, -83.8984375, -80.701171875, -77.50390625, -74.306640625, -71.109375, -67.912109375, -64.71484375, -61.517578125, -58.3203125, -55.123046875, -51.92578125, -48.728515625, -45.53125, -42.333984375, -39.13671875, -35.939453125, -32.7421875, -29.544921875, -26.34765625, -23.150390625, -19.953125, -16.755859375, -13.55859375, -10.361328125, -7.1640625, -3.966796875, -0.76953125, 2.427734375, 5.625, 8.822265625, 12.01953125, 15.216796875, 18.4140625, 21.611328125, 24.80859375, 28.005859375, 31.203125, 34.400390625, 37.59765625, 40.794921875, 43.9921875, 47.189453125, 50.38671875, 53.583984375, 56.78125, 59.978515625, 63.17578125, 66.373046875, 69.5703125, 72.767578125, 75.96484375, 79.162109375, 82.359375, 85.556640625, 88.75390625, 91.951171875, 95.1484375, 98.345703125, 101.54296875, 104.740234375, 107.9375]}, "gradients/decoder.transformer.h.7.mlp.c_fc.bias": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 0.0, 0.0, 3.0, 4.0, 1.0, 3.0, 3.0, 3.0, 12.0, 10.0, 14.0, 19.0, 35.0, 49.0, 73.0, 132.0, 350.0, 910.0, 1457.0, 534.0, 190.0, 92.0, 64.0, 31.0, 25.0, 17.0, 14.0, 8.0, 5.0, 7.0, 6.0, 5.0, 3.0, 4.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-100.25, -95.53515625, -90.8203125, -86.10546875, -81.390625, -76.67578125, -71.9609375, -67.24609375, -62.53125, -57.81640625, -53.1015625, -48.38671875, -43.671875, -38.95703125, -34.2421875, -29.52734375, -24.8125, -20.09765625, -15.3828125, -10.66796875, -5.953125, -1.23828125, 3.4765625, 8.19140625, 12.90625, 17.62109375, 22.3359375, 27.05078125, 31.765625, 36.48046875, 41.1953125, 45.91015625, 50.625, 55.33984375, 60.0546875, 64.76953125, 69.484375, 74.19921875, 78.9140625, 83.62890625, 88.34375, 93.05859375, 97.7734375, 102.48828125, 107.203125, 111.91796875, 116.6328125, 121.34765625, 126.0625, 130.77734375, 135.4921875, 140.20703125, 144.921875, 149.63671875, 154.3515625, 159.06640625, 163.78125, 168.49609375, 173.2109375, 177.92578125, 182.640625, 187.35546875, 192.0703125, 196.78515625, 201.5]}, "gradients/decoder.transformer.h.7.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 3.0, 0.0, 0.0, 0.0, 1.0, 0.0, 4.0, 8.0, 9.0, 8.0, 22.0, 34.0, 42.0, 79.0, 151.0, 271.0, 590.0, 1475.0, 4567.0, 17940.0, 167039.0, 3907541.0, 76902.0, 11962.0, 3404.0, 1155.0, 525.0, 269.0, 123.0, 63.0, 51.0, 22.0, 13.0, 7.0, 8.0, 6.0, 1.0, 0.0, 2.0, 0.0, 2.0, 0.0, 1.0], "bins": [-543.5, -530.671875, -517.84375, -505.015625, -492.1875, -479.359375, -466.53125, -453.703125, -440.875, -428.046875, -415.21875, -402.390625, -389.5625, -376.734375, -363.90625, -351.078125, -338.25, -325.421875, -312.59375, -299.765625, -286.9375, -274.109375, -261.28125, -248.453125, -235.625, -222.796875, -209.96875, -197.140625, -184.3125, -171.484375, -158.65625, -145.828125, -133.0, -120.171875, -107.34375, -94.515625, -81.6875, -68.859375, -56.03125, -43.203125, -30.375, -17.546875, -4.71875, 8.109375, 20.9375, 33.765625, 46.59375, 59.421875, 72.25, 85.078125, 97.90625, 110.734375, 123.5625, 136.390625, 149.21875, 162.046875, 174.875, 187.703125, 200.53125, 213.359375, 226.1875, 239.015625, 251.84375, 264.671875, 277.5]}, "gradients/decoder.transformer.h.7.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 6.0, 2.0, 12.0, 11.0, 15.0, 29.0, 42.0, 73.0, 118.0, 147.0, 181.0, 122.0, 87.0, 63.0, 37.0, 19.0, 16.0, 17.0, 6.0, 5.0, 5.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-500.73388671875, -482.9439392089844, -465.15399169921875, -447.3640441894531, -429.5740966796875, -411.78411865234375, -393.9941711425781, -376.2042236328125, -358.4142761230469, -340.62432861328125, -322.8343811035156, -305.04443359375, -287.25445556640625, -269.46453857421875, -251.674560546875, -233.88461303710938, -216.09466552734375, -198.30471801757812, -180.5147705078125, -162.7248077392578, -144.9348602294922, -127.14491271972656, -109.3549575805664, -91.56500244140625, -73.77505493164062, -55.985103607177734, -38.195152282714844, -20.405200958251953, -2.6152496337890625, 15.174697875976562, 32.96465301513672, 50.754608154296875, 68.54461669921875, 86.33456420898438, 104.12451934814453, 121.91447448730469, 139.7044219970703, 157.49436950683594, 175.28433227539062, 193.07427978515625, 210.86422729492188, 228.6541748046875, 246.44412231445312, 264.23406982421875, 282.0240478515625, 299.81396484375, 317.60394287109375, 335.3938903808594, 353.183837890625, 370.9737854003906, 388.76373291015625, 406.5536804199219, 424.3436279296875, 442.13360595703125, 459.9235534667969, 477.7135009765625, 495.5034484863281, 513.2933959960938, 531.0833740234375, 548.873291015625, 566.6632690429688, 584.4531860351562, 602.2431640625, 620.0330810546875, 637.8230590820312]}, "gradients/decoder.transformer.h.7.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 3.0, 0.0, 2.0, 1.0, 3.0, 4.0, 4.0, 6.0, 4.0, 21.0, 11.0, 12.0, 13.0, 25.0, 12.0, 17.0, 14.0, 28.0, 30.0, 29.0, 39.0, 46.0, 35.0, 44.0, 44.0, 49.0, 50.0, 36.0, 46.0, 38.0, 37.0, 34.0, 36.0, 35.0, 28.0, 23.0, 30.0, 21.0, 27.0, 17.0, 14.0, 11.0, 10.0, 5.0, 7.0, 3.0, 3.0, 1.0, 7.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-278.71942138671875, -269.37646484375, -260.03350830078125, -250.69053649902344, -241.34756469726562, -232.00460815429688, -222.66165161132812, -213.31869506835938, -203.97572326660156, -194.6327667236328, -185.289794921875, -175.94683837890625, -166.6038818359375, -157.2609100341797, -147.91795349121094, -138.57498168945312, -129.23202514648438, -119.8890609741211, -110.54609680175781, -101.20314025878906, -91.86017608642578, -82.5172119140625, -73.17425537109375, -63.83129119873047, -54.48832702636719, -45.145362854003906, -35.80240249633789, -26.459440231323242, -17.116477966308594, -7.7735137939453125, 1.5694465637207031, 10.912406921386719, 20.25537109375, 29.59833335876465, 38.9412956237793, 48.28425598144531, 57.627220153808594, 66.97018432617188, 76.31314086914062, 85.6561050415039, 94.99906921386719, 104.34203338623047, 113.68499755859375, 123.0279541015625, 132.37091064453125, 141.71388244628906, 151.0568389892578, 160.39981079101562, 169.74276733398438, 179.08572387695312, 188.42869567871094, 197.7716522216797, 207.1146240234375, 216.45758056640625, 225.800537109375, 235.14349365234375, 244.48646545410156, 253.8294219970703, 263.1723937988281, 272.5153503417969, 281.8583068847656, 291.2012939453125, 300.54425048828125, 309.88720703125, 319.23016357421875]}, "gradients/decoder.transformer.h.7.crossattention.c_proj.bias": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 4.0, 7.0, 3.0, 6.0, 4.0, 2.0, 13.0, 18.0, 12.0, 15.0, 24.0, 29.0, 27.0, 31.0, 34.0, 38.0, 40.0, 47.0, 47.0, 53.0, 42.0, 53.0, 51.0, 51.0, 44.0, 46.0, 38.0, 32.0, 27.0, 26.0, 27.0, 30.0, 17.0, 17.0, 13.0, 11.0, 3.0, 9.0, 7.0, 6.0, 3.0, 5.0, 2.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-47.3125, -45.361328125, -43.41015625, -41.458984375, -39.5078125, -37.556640625, -35.60546875, -33.654296875, -31.703125, -29.751953125, -27.80078125, -25.849609375, -23.8984375, -21.947265625, -19.99609375, -18.044921875, -16.09375, -14.142578125, -12.19140625, -10.240234375, -8.2890625, -6.337890625, -4.38671875, -2.435546875, -0.484375, 1.466796875, 3.41796875, 5.369140625, 7.3203125, 9.271484375, 11.22265625, 13.173828125, 15.125, 17.076171875, 19.02734375, 20.978515625, 22.9296875, 24.880859375, 26.83203125, 28.783203125, 30.734375, 32.685546875, 34.63671875, 36.587890625, 38.5390625, 40.490234375, 42.44140625, 44.392578125, 46.34375, 48.294921875, 50.24609375, 52.197265625, 54.1484375, 56.099609375, 58.05078125, 60.001953125, 61.953125, 63.904296875, 65.85546875, 67.806640625, 69.7578125, 71.708984375, 73.66015625, 75.611328125, 77.5625]}, "gradients/decoder.transformer.h.7.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 2.0, 1.0, 3.0, 5.0, 3.0, 3.0, 8.0, 15.0, 11.0, 26.0, 40.0, 56.0, 84.0, 162.0, 257.0, 422.0, 640.0, 1167.0, 1905.0, 3167.0, 5255.0, 9261.0, 16167.0, 28529.0, 53241.0, 107737.0, 294324.0, 297553.0, 107690.0, 53411.0, 28612.0, 16266.0, 9267.0, 5259.0, 3152.0, 1943.0, 1117.0, 720.0, 433.0, 231.0, 149.0, 93.0, 64.0, 28.0, 21.0, 23.0, 11.0, 7.0, 11.0, 7.0, 3.0, 2.0, 4.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.578125, -2.494964599609375, -2.41180419921875, -2.328643798828125, -2.2454833984375, -2.162322998046875, -2.07916259765625, -1.996002197265625, -1.912841796875, -1.829681396484375, -1.74652099609375, -1.663360595703125, -1.5802001953125, -1.497039794921875, -1.41387939453125, -1.330718994140625, -1.24755859375, -1.164398193359375, -1.08123779296875, -0.998077392578125, -0.9149169921875, -0.831756591796875, -0.74859619140625, -0.665435791015625, -0.582275390625, -0.499114990234375, -0.41595458984375, -0.332794189453125, -0.2496337890625, -0.166473388671875, -0.08331298828125, -0.000152587890625, 0.0830078125, 0.166168212890625, 0.24932861328125, 0.332489013671875, 0.4156494140625, 0.498809814453125, 0.58197021484375, 0.665130615234375, 0.748291015625, 0.831451416015625, 0.91461181640625, 0.997772216796875, 1.0809326171875, 1.164093017578125, 1.24725341796875, 1.330413818359375, 1.41357421875, 1.496734619140625, 1.57989501953125, 1.663055419921875, 1.7462158203125, 1.829376220703125, 1.91253662109375, 1.995697021484375, 2.078857421875, 2.162017822265625, 2.24517822265625, 2.328338623046875, 2.4114990234375, 2.494659423828125, 2.57781982421875, 2.660980224609375, 2.744140625]}, "gradients/decoder.transformer.h.7.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 1.0, 1.0, 4.0, 4.0, 4.0, 2.0, 4.0, 9.0, 5.0, 7.0, 9.0, 13.0, 17.0, 13.0, 24.0, 32.0, 30.0, 22.0, 39.0, 29.0, 40.0, 34.0, 38.0, 39.0, 60.0, 1076.0, 41.0, 44.0, 39.0, 41.0, 39.0, 34.0, 33.0, 36.0, 17.0, 26.0, 24.0, 24.0, 13.0, 18.0, 13.0, 13.0, 5.0, 4.0, 8.0, 4.0, 3.0, 1.0, 2.0, 2.0, 0.0, 2.0, 0.0, 1.0], "bins": [-41.625, -40.44580078125, -39.2666015625, -38.08740234375, -36.908203125, -35.72900390625, -34.5498046875, -33.37060546875, -32.19140625, -31.01220703125, -29.8330078125, -28.65380859375, -27.474609375, -26.29541015625, -25.1162109375, -23.93701171875, -22.7578125, -21.57861328125, -20.3994140625, -19.22021484375, -18.041015625, -16.86181640625, -15.6826171875, -14.50341796875, -13.32421875, -12.14501953125, -10.9658203125, -9.78662109375, -8.607421875, -7.42822265625, -6.2490234375, -5.06982421875, -3.890625, -2.71142578125, -1.5322265625, -0.35302734375, 0.826171875, 2.00537109375, 3.1845703125, 4.36376953125, 5.54296875, 6.72216796875, 7.9013671875, 9.08056640625, 10.259765625, 11.43896484375, 12.6181640625, 13.79736328125, 14.9765625, 16.15576171875, 17.3349609375, 18.51416015625, 19.693359375, 20.87255859375, 22.0517578125, 23.23095703125, 24.41015625, 25.58935546875, 26.7685546875, 27.94775390625, 29.126953125, 30.30615234375, 31.4853515625, 32.66455078125, 33.84375]}, "gradients/decoder.transformer.h.7.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 1.0, 0.0, 4.0, 4.0, 5.0, 10.0, 13.0, 21.0, 33.0, 46.0, 71.0, 86.0, 131.0, 219.0, 322.0, 438.0, 706.0, 1071.0, 1590.0, 2479.0, 3819.0, 6049.0, 9701.0, 15522.0, 24903.0, 41175.0, 70426.0, 129262.0, 1403433.0, 171482.0, 85571.0, 49511.0, 29644.0, 18121.0, 11217.0, 7001.0, 4514.0, 2963.0, 1943.0, 1191.0, 840.0, 536.0, 344.0, 221.0, 172.0, 110.0, 69.0, 53.0, 38.0, 24.0, 10.0, 11.0, 10.0, 4.0, 4.0, 2.0, 0.0, 0.0, 0.0, 1.0, 2.0], "bins": [-1.669921875, -1.6172027587890625, -1.564483642578125, -1.5117645263671875, -1.45904541015625, -1.4063262939453125, -1.353607177734375, -1.3008880615234375, -1.2481689453125, -1.1954498291015625, -1.142730712890625, -1.0900115966796875, -1.03729248046875, -0.9845733642578125, -0.931854248046875, -0.8791351318359375, -0.826416015625, -0.7736968994140625, -0.720977783203125, -0.6682586669921875, -0.61553955078125, -0.5628204345703125, -0.510101318359375, -0.4573822021484375, -0.4046630859375, -0.3519439697265625, -0.299224853515625, -0.2465057373046875, -0.19378662109375, -0.1410675048828125, -0.088348388671875, -0.0356292724609375, 0.01708984375, 0.0698089599609375, 0.122528076171875, 0.1752471923828125, 0.22796630859375, 0.2806854248046875, 0.333404541015625, 0.3861236572265625, 0.4388427734375, 0.4915618896484375, 0.544281005859375, 0.5970001220703125, 0.64971923828125, 0.7024383544921875, 0.755157470703125, 0.8078765869140625, 0.860595703125, 0.9133148193359375, 0.966033935546875, 1.0187530517578125, 1.07147216796875, 1.1241912841796875, 1.176910400390625, 1.2296295166015625, 1.2823486328125, 1.3350677490234375, 1.387786865234375, 1.4405059814453125, 1.49322509765625, 1.5459442138671875, 1.598663330078125, 1.6513824462890625, 1.7041015625]}, "gradients/decoder.transformer.h.7.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 4.0, 2.0, 1.0, 2.0, 3.0, 14.0, 11.0, 13.0, 25.0, 30.0, 48.0, 66.0, 90.0, 122.0, 159.0, 121.0, 90.0, 59.0, 37.0, 22.0, 26.0, 14.0, 19.0, 5.0, 7.0, 5.0, 1.0, 2.0, 1.0, 1.0, 1.0, 0.0, 1.0, 2.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00010418891906738281, -0.0001010717824101448, -9.79546457529068e-05, -9.483750909566879e-05, -9.172037243843079e-05, -8.860323578119278e-05, -8.548609912395477e-05, -8.236896246671677e-05, -7.925182580947876e-05, -7.613468915224075e-05, -7.301755249500275e-05, -6.990041583776474e-05, -6.678327918052673e-05, -6.366614252328873e-05, -6.054900586605072e-05, -5.7431869208812714e-05, -5.431473255157471e-05, -5.11975958943367e-05, -4.8080459237098694e-05, -4.496332257986069e-05, -4.184618592262268e-05, -3.8729049265384674e-05, -3.561191260814667e-05, -3.249477595090866e-05, -2.9377639293670654e-05, -2.6260502636432648e-05, -2.314336597919464e-05, -2.0026229321956635e-05, -1.6909092664718628e-05, -1.3791956007480621e-05, -1.0674819350242615e-05, -7.557682693004608e-06, -4.4405460357666016e-06, -1.323409378528595e-06, 1.7937272787094116e-06, 4.910863935947418e-06, 8.028000593185425e-06, 1.1145137250423431e-05, 1.4262273907661438e-05, 1.7379410564899445e-05, 2.049654722213745e-05, 2.3613683879375458e-05, 2.6730820536613464e-05, 2.984795719385147e-05, 3.296509385108948e-05, 3.6082230508327484e-05, 3.919936716556549e-05, 4.23165038228035e-05, 4.5433640480041504e-05, 4.855077713727951e-05, 5.166791379451752e-05, 5.4785050451755524e-05, 5.790218710899353e-05, 6.101932376623154e-05, 6.413646042346954e-05, 6.725359708070755e-05, 7.037073373794556e-05, 7.348787039518356e-05, 7.660500705242157e-05, 7.972214370965958e-05, 8.283928036689758e-05, 8.595641702413559e-05, 8.90735536813736e-05, 9.21906903386116e-05, 9.530782699584961e-05]}, "gradients/decoder.transformer.h.7.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 3.0, 2.0, 2.0, 0.0, 1.0, 1.0, 1.0, 3.0, 6.0, 5.0, 8.0, 8.0, 11.0, 24.0, 41.0, 49.0, 81.0, 167.0, 502.0, 15132.0, 1005100.0, 26279.0, 689.0, 191.0, 87.0, 66.0, 37.0, 17.0, 18.0, 5.0, 7.0, 0.0, 5.0, 2.0, 5.0, 2.0, 3.0, 1.0, 1.0, 0.0, 2.0, 1.0, 0.0, 1.0, 2.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.001735687255859375, -0.0016788989305496216, -0.0016221106052398682, -0.0015653222799301147, -0.0015085339546203613, -0.001451745629310608, -0.0013949573040008545, -0.001338168978691101, -0.0012813806533813477, -0.0012245923280715942, -0.0011678040027618408, -0.0011110156774520874, -0.001054227352142334, -0.0009974390268325806, -0.0009406507015228271, -0.0008838623762130737, -0.0008270740509033203, -0.0007702857255935669, -0.0007134974002838135, -0.0006567090749740601, -0.0005999207496643066, -0.0005431324243545532, -0.0004863440990447998, -0.0004295557737350464, -0.00037276744842529297, -0.00031597912311553955, -0.00025919079780578613, -0.00020240247249603271, -0.0001456141471862793, -8.882582187652588e-05, -3.203749656677246e-05, 2.4750828742980957e-05, 8.153915405273438e-05, 0.0001383274793624878, 0.0001951158046722412, 0.00025190412998199463, 0.00030869245529174805, 0.00036548078060150146, 0.0004222691059112549, 0.0004790574312210083, 0.0005358457565307617, 0.0005926340818405151, 0.0006494224071502686, 0.000706210732460022, 0.0007629990577697754, 0.0008197873830795288, 0.0008765757083892822, 0.0009333640336990356, 0.000990152359008789, 0.0010469406843185425, 0.001103729009628296, 0.0011605173349380493, 0.0012173056602478027, 0.0012740939855575562, 0.0013308823108673096, 0.001387670636177063, 0.0014444589614868164, 0.0015012472867965698, 0.0015580356121063232, 0.0016148239374160767, 0.00167161226272583, 0.0017284005880355835, 0.001785188913345337, 0.0018419772386550903, 0.0018987655639648438]}, "gradients/decoder.transformer.h.7.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 3.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 1.0, 3.0, 2.0, 5.0, 6.0, 6.0, 7.0, 8.0, 10.0, 19.0, 19.0, 32.0, 34.0, 47.0, 49.0, 72.0, 108.0, 108.0, 101.0, 79.0, 64.0, 51.0, 43.0, 29.0, 24.0, 22.0, 11.0, 10.0, 9.0, 4.0, 1.0, 7.0, 4.0, 2.0, 1.0, 3.0, 2.0, 1.0, 1.0, 2.0, 0.0, 0.0, 2.0, 1.0], "bins": [-4.5213266275823116e-05, -4.4011838326696306e-05, -4.281040673959069e-05, -4.160897879046388e-05, -4.040755084133707e-05, -3.920611925423145e-05, -3.800469130510464e-05, -3.680326335597783e-05, -3.5601835406851023e-05, -3.4400407457724214e-05, -3.31989758706186e-05, -3.199754792149179e-05, -3.079611997236498e-05, -2.9594690204248764e-05, -2.839326043613255e-05, -2.719183248700574e-05, -2.5990400899900123e-05, -2.478897113178391e-05, -2.35875431826571e-05, -2.2386113414540887e-05, -2.1184685465414077e-05, -1.9983255697297864e-05, -1.878182592918165e-05, -1.758039798005484e-05, -1.6378968211938627e-05, -1.5177539353317115e-05, -1.3976110494695604e-05, -1.277468072657939e-05, -1.1573251867957879e-05, -1.0371823009336367e-05, -9.170393241220154e-06, -7.968964382598642e-06, -6.767535523977131e-06, -5.566106665355619e-06, -4.3646773519867565e-06, -3.1632482659915695e-06, -1.9618191799963824e-06, -7.603903213748708e-07, 4.4103899199399166e-07, 1.6424683053628542e-06, 2.8438971639843658e-06, 4.045326022605877e-06, 5.24675533597474e-06, 6.448184649343602e-06, 7.649613507965114e-06, 8.851042366586626e-06, 1.0052472134702839e-05, 1.125390099332435e-05, 1.2455329851945862e-05, 1.3656758710567374e-05, 1.4858187569188885e-05, 1.60596173373051e-05, 1.726104528643191e-05, 1.8462475054548122e-05, 1.9663904822664335e-05, 2.0865332771791145e-05, 2.206676253990736e-05, 2.3268192308023572e-05, 2.4469620257150382e-05, 2.5671050025266595e-05, 2.687247979338281e-05, 2.807390774250962e-05, 2.9275337510625832e-05, 3.0476767278742045e-05, 3.1678195227868855e-05]}, "gradients/decoder.transformer.h.7.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 2.0, 4.0, 1.0, 2.0, 6.0, 9.0, 9.0, 7.0, 8.0, 10.0, 13.0, 16.0, 15.0, 18.0, 22.0, 26.0, 28.0, 39.0, 39.0, 37.0, 44.0, 49.0, 46.0, 42.0, 36.0, 43.0, 52.0, 41.0, 32.0, 37.0, 31.0, 36.0, 34.0, 24.0, 27.0, 21.0, 20.0, 16.0, 20.0, 9.0, 13.0, 12.0, 3.0, 6.0, 4.0, 4.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0], "bins": [-3.629922866821289e-05, -3.52468341588974e-05, -3.419443964958191e-05, -3.314204514026642e-05, -3.208965063095093e-05, -3.103725612163544e-05, -2.9984861612319946e-05, -2.8932467103004456e-05, -2.7880072593688965e-05, -2.6827678084373474e-05, -2.5775283575057983e-05, -2.4722889065742493e-05, -2.3670494556427002e-05, -2.261810004711151e-05, -2.156570553779602e-05, -2.051331102848053e-05, -1.946091651916504e-05, -1.840852200984955e-05, -1.7356127500534058e-05, -1.6303732991218567e-05, -1.5251338481903076e-05, -1.4198943972587585e-05, -1.3146549463272095e-05, -1.2094154953956604e-05, -1.1041760444641113e-05, -9.989365935325623e-06, -8.936971426010132e-06, -7.884576916694641e-06, -6.83218240737915e-06, -5.77978789806366e-06, -4.727393388748169e-06, -3.6749988794326782e-06, -2.6226043701171875e-06, -1.5702098608016968e-06, -5.178153514862061e-07, 5.345791578292847e-07, 1.5869736671447754e-06, 2.639368176460266e-06, 3.691762685775757e-06, 4.7441571950912476e-06, 5.796551704406738e-06, 6.848946213722229e-06, 7.90134072303772e-06, 8.95373523235321e-06, 1.0006129741668701e-05, 1.1058524250984192e-05, 1.2110918760299683e-05, 1.3163313269615173e-05, 1.4215707778930664e-05, 1.5268102288246155e-05, 1.6320496797561646e-05, 1.7372891306877136e-05, 1.8425285816192627e-05, 1.9477680325508118e-05, 2.053007483482361e-05, 2.15824693441391e-05, 2.263486385345459e-05, 2.368725836277008e-05, 2.473965287208557e-05, 2.5792047381401062e-05, 2.6844441890716553e-05, 2.7896836400032043e-05, 2.8949230909347534e-05, 3.0001625418663025e-05, 3.1054019927978516e-05]}, "gradients/decoder.transformer.h.7.attn.c_proj.bias": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 4.0, 7.0, 3.0, 6.0, 4.0, 2.0, 13.0, 18.0, 12.0, 15.0, 24.0, 29.0, 27.0, 31.0, 34.0, 38.0, 40.0, 47.0, 47.0, 53.0, 42.0, 53.0, 51.0, 51.0, 44.0, 46.0, 38.0, 32.0, 27.0, 26.0, 27.0, 30.0, 17.0, 17.0, 13.0, 11.0, 3.0, 9.0, 7.0, 6.0, 3.0, 5.0, 2.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-47.3125, -45.361328125, -43.41015625, -41.458984375, -39.5078125, -37.556640625, -35.60546875, -33.654296875, -31.703125, -29.751953125, -27.80078125, -25.849609375, -23.8984375, -21.947265625, -19.99609375, -18.044921875, -16.09375, -14.142578125, -12.19140625, -10.240234375, -8.2890625, -6.337890625, -4.38671875, -2.435546875, -0.484375, 1.466796875, 3.41796875, 5.369140625, 7.3203125, 9.271484375, 11.22265625, 13.173828125, 15.125, 17.076171875, 19.02734375, 20.978515625, 22.9296875, 24.880859375, 26.83203125, 28.783203125, 30.734375, 32.685546875, 34.63671875, 36.587890625, 38.5390625, 40.490234375, 42.44140625, 44.392578125, 46.34375, 48.294921875, 50.24609375, 52.197265625, 54.1484375, 56.099609375, 58.05078125, 60.001953125, 61.953125, 63.904296875, 65.85546875, 67.806640625, 69.7578125, 71.708984375, 73.66015625, 75.611328125, 77.5625]}, "gradients/decoder.transformer.h.7.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 4.0, 1.0, 2.0, 5.0, 7.0, 4.0, 7.0, 8.0, 12.0, 31.0, 35.0, 46.0, 73.0, 135.0, 174.0, 271.0, 425.0, 689.0, 1102.0, 2209.0, 5029.0, 15276.0, 100492.0, 855428.0, 48136.0, 10591.0, 3921.0, 1843.0, 941.0, 565.0, 387.0, 239.0, 176.0, 100.0, 60.0, 47.0, 33.0, 16.0, 12.0, 12.0, 10.0, 3.0, 5.0, 4.0, 4.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-96.625, -92.814453125, -89.00390625, -85.193359375, -81.3828125, -77.572265625, -73.76171875, -69.951171875, -66.140625, -62.330078125, -58.51953125, -54.708984375, -50.8984375, -47.087890625, -43.27734375, -39.466796875, -35.65625, -31.845703125, -28.03515625, -24.224609375, -20.4140625, -16.603515625, -12.79296875, -8.982421875, -5.171875, -1.361328125, 2.44921875, 6.259765625, 10.0703125, 13.880859375, 17.69140625, 21.501953125, 25.3125, 29.123046875, 32.93359375, 36.744140625, 40.5546875, 44.365234375, 48.17578125, 51.986328125, 55.796875, 59.607421875, 63.41796875, 67.228515625, 71.0390625, 74.849609375, 78.66015625, 82.470703125, 86.28125, 90.091796875, 93.90234375, 97.712890625, 101.5234375, 105.333984375, 109.14453125, 112.955078125, 116.765625, 120.576171875, 124.38671875, 128.197265625, 132.0078125, 135.818359375, 139.62890625, 143.439453125, 147.25]}, "gradients/decoder.transformer.h.7.attn.c_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 2.0, 2.0, 6.0, 7.0, 3.0, 8.0, 5.0, 7.0, 8.0, 14.0, 16.0, 11.0, 17.0, 16.0, 27.0, 32.0, 25.0, 25.0, 40.0, 47.0, 56.0, 61.0, 63.0, 91.0, 1876.0, 88.0, 68.0, 44.0, 45.0, 49.0, 37.0, 38.0, 33.0, 22.0, 31.0, 20.0, 15.0, 19.0, 23.0, 18.0, 9.0, 11.0, 2.0, 10.0, 6.0, 4.0, 1.0, 2.0, 3.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-141.375, -136.23046875, -131.0859375, -125.94140625, -120.796875, -115.65234375, -110.5078125, -105.36328125, -100.21875, -95.07421875, -89.9296875, -84.78515625, -79.640625, -74.49609375, -69.3515625, -64.20703125, -59.0625, -53.91796875, -48.7734375, -43.62890625, -38.484375, -33.33984375, -28.1953125, -23.05078125, -17.90625, -12.76171875, -7.6171875, -2.47265625, 2.671875, 7.81640625, 12.9609375, 18.10546875, 23.25, 28.39453125, 33.5390625, 38.68359375, 43.828125, 48.97265625, 54.1171875, 59.26171875, 64.40625, 69.55078125, 74.6953125, 79.83984375, 84.984375, 90.12890625, 95.2734375, 100.41796875, 105.5625, 110.70703125, 115.8515625, 120.99609375, 126.140625, 131.28515625, 136.4296875, 141.57421875, 146.71875, 151.86328125, 157.0078125, 162.15234375, 167.296875, 172.44140625, 177.5859375, 182.73046875, 187.875]}, "gradients/decoder.transformer.h.7.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 3.0, 2.0, 1.0, 3.0, 6.0, 8.0, 6.0, 12.0, 8.0, 18.0, 16.0, 22.0, 20.0, 29.0, 35.0, 46.0, 76.0, 104.0, 158.0, 238.0, 566.0, 1481.0, 5486.0, 41483.0, 3025471.0, 60406.0, 6896.0, 1688.0, 588.0, 284.0, 168.0, 87.0, 56.0, 62.0, 45.0, 34.0, 26.0, 11.0, 10.0, 15.0, 7.0, 8.0, 5.0, 4.0, 4.0, 5.0, 7.0, 2.0, 3.0, 1.0, 0.0, 2.0], "bins": [-415.5, -404.13671875, -392.7734375, -381.41015625, -370.046875, -358.68359375, -347.3203125, -335.95703125, -324.59375, -313.23046875, -301.8671875, -290.50390625, -279.140625, -267.77734375, -256.4140625, -245.05078125, -233.6875, -222.32421875, -210.9609375, -199.59765625, -188.234375, -176.87109375, -165.5078125, -154.14453125, -142.78125, -131.41796875, -120.0546875, -108.69140625, -97.328125, -85.96484375, -74.6015625, -63.23828125, -51.875, -40.51171875, -29.1484375, -17.78515625, -6.421875, 4.94140625, 16.3046875, 27.66796875, 39.03125, 50.39453125, 61.7578125, 73.12109375, 84.484375, 95.84765625, 107.2109375, 118.57421875, 129.9375, 141.30078125, 152.6640625, 164.02734375, 175.390625, 186.75390625, 198.1171875, 209.48046875, 220.84375, 232.20703125, 243.5703125, 254.93359375, 266.296875, 277.66015625, 289.0234375, 300.38671875, 311.75]}, "gradients/decoder.transformer.h.7.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 10.0, 33.0, 226.0, 593.0, 124.0, 34.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-226.96212768554688, -191.27572631835938, -155.58934020996094, -119.90294647216797, -84.216552734375, -48.5301513671875, -12.843765258789062, 22.842620849609375, 58.529022216796875, 94.21541595458984, 129.9018096923828, 165.58819580078125, 201.27459716796875, 236.96099853515625, 272.64739990234375, 308.3337707519531, 344.0201721191406, 379.7065734863281, 415.3929443359375, 451.079345703125, 486.7657470703125, 522.4521484375, 558.1385498046875, 593.824951171875, 629.5113525390625, 665.19775390625, 700.8841552734375, 736.570556640625, 772.2569580078125, 807.943359375, 843.6296997070312, 879.3161010742188, 915.00244140625, 950.6888427734375, 986.375244140625, 1022.0616455078125, 1057.748046875, 1093.4344482421875, 1129.120849609375, 1164.80712890625, 1200.49365234375, 1236.1800537109375, 1271.866455078125, 1307.5528564453125, 1343.2392578125, 1378.9256591796875, 1414.612060546875, 1450.29833984375, 1485.9847412109375, 1521.671142578125, 1557.3575439453125, 1593.0439453125, 1628.7303466796875, 1664.416748046875, 1700.1031494140625, 1735.78955078125, 1771.475830078125, 1807.1622314453125, 1842.8486328125, 1878.5350341796875, 1914.221435546875, 1949.9078369140625, 1985.59423828125, 2021.280517578125, 2056.967041015625]}, "gradients/decoder.transformer.h.7.ln_1.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 2.0, 2.0, 4.0, 3.0, 3.0, 6.0, 6.0, 6.0, 6.0, 11.0, 10.0, 20.0, 13.0, 17.0, 14.0, 24.0, 26.0, 32.0, 37.0, 34.0, 48.0, 35.0, 42.0, 37.0, 45.0, 31.0, 38.0, 38.0, 43.0, 35.0, 40.0, 33.0, 33.0, 19.0, 29.0, 34.0, 21.0, 18.0, 20.0, 16.0, 22.0, 9.0, 3.0, 10.0, 5.0, 3.0, 3.0, 7.0, 8.0, 8.0, 3.0, 1.0, 1.0, 3.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-391.8494873046875, -378.2784423828125, -364.7074279785156, -351.1363830566406, -337.56536865234375, -323.99432373046875, -310.42327880859375, -296.85223388671875, -283.2812194824219, -269.7101745605469, -256.13916015625, -242.568115234375, -228.99708557128906, -215.42605590820312, -201.85501098632812, -188.2839813232422, -174.71295166015625, -161.1419219970703, -147.57089233398438, -133.99984741210938, -120.42881774902344, -106.8577880859375, -93.28675079345703, -79.71571350097656, -66.14468383789062, -52.57365036010742, -39.00261688232422, -25.431583404541016, -11.860549926757812, 1.710479736328125, 15.281517028808594, 28.852554321289062, 42.423553466796875, 55.99458694458008, 69.56562042236328, 83.13665771484375, 96.70768737792969, 110.27871704101562, 123.8497543334961, 137.42079162597656, 150.9918212890625, 164.56285095214844, 178.13388061523438, 191.70492553710938, 205.2759552001953, 218.84698486328125, 232.41802978515625, 245.9890594482422, 259.5600891113281, 273.1311340332031, 286.7021484375, 300.273193359375, 313.84423828125, 327.4152526855469, 340.9862976074219, 354.55731201171875, 368.12835693359375, 381.69940185546875, 395.2704162597656, 408.8414611816406, 422.4124755859375, 435.9835205078125, 449.5545654296875, 463.1256103515625, 476.6966247558594]}, "gradients/decoder.transformer.h.6.mlp.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 2.0, 2.0, 7.0, 5.0, 6.0, 8.0, 5.0, 7.0, 17.0, 11.0, 15.0, 26.0, 29.0, 26.0, 30.0, 38.0, 44.0, 37.0, 46.0, 49.0, 45.0, 49.0, 61.0, 43.0, 51.0, 44.0, 41.0, 41.0, 34.0, 28.0, 25.0, 28.0, 25.0, 15.0, 16.0, 12.0, 15.0, 6.0, 4.0, 4.0, 5.0, 6.0, 2.0, 4.0, 1.0, 4.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-50.1875, -48.19140625, -46.1953125, -44.19921875, -42.203125, -40.20703125, -38.2109375, -36.21484375, -34.21875, -32.22265625, -30.2265625, -28.23046875, -26.234375, -24.23828125, -22.2421875, -20.24609375, -18.25, -16.25390625, -14.2578125, -12.26171875, -10.265625, -8.26953125, -6.2734375, -4.27734375, -2.28125, -0.28515625, 1.7109375, 3.70703125, 5.703125, 7.69921875, 9.6953125, 11.69140625, 13.6875, 15.68359375, 17.6796875, 19.67578125, 21.671875, 23.66796875, 25.6640625, 27.66015625, 29.65625, 31.65234375, 33.6484375, 35.64453125, 37.640625, 39.63671875, 41.6328125, 43.62890625, 45.625, 47.62109375, 49.6171875, 51.61328125, 53.609375, 55.60546875, 57.6015625, 59.59765625, 61.59375, 63.58984375, 65.5859375, 67.58203125, 69.578125, 71.57421875, 73.5703125, 75.56640625, 77.5625]}, "gradients/decoder.transformer.h.6.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 1.0, 4.0, 2.0, 4.0, 8.0, 11.0, 17.0, 37.0, 42.0, 58.0, 114.0, 161.0, 273.0, 480.0, 751.0, 1197.0, 2056.0, 3639.0, 6587.0, 13314.0, 31853.0, 197945.0, 3677950.0, 197387.0, 31404.0, 13403.0, 6732.0, 3622.0, 2045.0, 1179.0, 734.0, 459.0, 279.0, 177.0, 126.0, 74.0, 60.0, 27.0, 23.0, 12.0, 16.0, 11.0, 4.0, 8.0, 5.0, 3.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-167.5, -161.1796875, -154.859375, -148.5390625, -142.21875, -135.8984375, -129.578125, -123.2578125, -116.9375, -110.6171875, -104.296875, -97.9765625, -91.65625, -85.3359375, -79.015625, -72.6953125, -66.375, -60.0546875, -53.734375, -47.4140625, -41.09375, -34.7734375, -28.453125, -22.1328125, -15.8125, -9.4921875, -3.171875, 3.1484375, 9.46875, 15.7890625, 22.109375, 28.4296875, 34.75, 41.0703125, 47.390625, 53.7109375, 60.03125, 66.3515625, 72.671875, 78.9921875, 85.3125, 91.6328125, 97.953125, 104.2734375, 110.59375, 116.9140625, 123.234375, 129.5546875, 135.875, 142.1953125, 148.515625, 154.8359375, 161.15625, 167.4765625, 173.796875, 180.1171875, 186.4375, 192.7578125, 199.078125, 205.3984375, 211.71875, 218.0390625, 224.359375, 230.6796875, 237.0]}, "gradients/decoder.transformer.h.6.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 2.0, 3.0, 3.0, 2.0, 3.0, 6.0, 13.0, 7.0, 13.0, 10.0, 17.0, 36.0, 42.0, 59.0, 96.0, 198.0, 526.0, 1382.0, 993.0, 323.0, 132.0, 66.0, 31.0, 19.0, 25.0, 27.0, 9.0, 10.0, 6.0, 5.0, 6.0, 2.0, 2.0, 3.0, 1.0, 2.0, 4.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-138.125, -132.767578125, -127.41015625, -122.052734375, -116.6953125, -111.337890625, -105.98046875, -100.623046875, -95.265625, -89.908203125, -84.55078125, -79.193359375, -73.8359375, -68.478515625, -63.12109375, -57.763671875, -52.40625, -47.048828125, -41.69140625, -36.333984375, -30.9765625, -25.619140625, -20.26171875, -14.904296875, -9.546875, -4.189453125, 1.16796875, 6.525390625, 11.8828125, 17.240234375, 22.59765625, 27.955078125, 33.3125, 38.669921875, 44.02734375, 49.384765625, 54.7421875, 60.099609375, 65.45703125, 70.814453125, 76.171875, 81.529296875, 86.88671875, 92.244140625, 97.6015625, 102.958984375, 108.31640625, 113.673828125, 119.03125, 124.388671875, 129.74609375, 135.103515625, 140.4609375, 145.818359375, 151.17578125, 156.533203125, 161.890625, 167.248046875, 172.60546875, 177.962890625, 183.3203125, 188.677734375, 194.03515625, 199.392578125, 204.75]}, "gradients/decoder.transformer.h.6.mlp.c_fc.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 1.0, 3.0, 2.0, 8.0, 5.0, 11.0, 11.0, 29.0, 47.0, 62.0, 110.0, 209.0, 411.0, 733.0, 1334.0, 2781.0, 6129.0, 15328.0, 49409.0, 405732.0, 3564452.0, 104003.0, 26050.0, 9338.0, 4060.0, 1906.0, 954.0, 535.0, 273.0, 129.0, 106.0, 53.0, 24.0, 14.0, 13.0, 11.0, 9.0, 3.0, 3.0, 1.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-365.0, -355.19921875, -345.3984375, -335.59765625, -325.796875, -315.99609375, -306.1953125, -296.39453125, -286.59375, -276.79296875, -266.9921875, -257.19140625, -247.390625, -237.58984375, -227.7890625, -217.98828125, -208.1875, -198.38671875, -188.5859375, -178.78515625, -168.984375, -159.18359375, -149.3828125, -139.58203125, -129.78125, -119.98046875, -110.1796875, -100.37890625, -90.578125, -80.77734375, -70.9765625, -61.17578125, -51.375, -41.57421875, -31.7734375, -21.97265625, -12.171875, -2.37109375, 7.4296875, 17.23046875, 27.03125, 36.83203125, 46.6328125, 56.43359375, 66.234375, 76.03515625, 85.8359375, 95.63671875, 105.4375, 115.23828125, 125.0390625, 134.83984375, 144.640625, 154.44140625, 164.2421875, 174.04296875, 183.84375, 193.64453125, 203.4453125, 213.24609375, 223.046875, 232.84765625, 242.6484375, 252.44921875, 262.25]}, "gradients/decoder.transformer.h.6.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 3.0, 5.0, 5.0, 5.0, 11.0, 14.0, 17.0, 37.0, 50.0, 101.0, 137.0, 166.0, 171.0, 93.0, 73.0, 44.0, 20.0, 20.0, 15.0, 13.0, 5.0, 6.0, 2.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-461.546630859375, -439.4291687011719, -417.3117370605469, -395.19427490234375, -373.07684326171875, -350.9593811035156, -328.8419189453125, -306.7244873046875, -284.6070251464844, -262.48956298828125, -240.37213134765625, -218.25466918945312, -196.13722229003906, -174.019775390625, -151.90231323242188, -129.7848663330078, -107.66741943359375, -85.54997253417969, -63.432518005371094, -41.3150634765625, -19.197616577148438, 2.919830322265625, 25.03729248046875, 47.15473937988281, 69.27218627929688, 91.38963317871094, 113.50708770751953, 135.62454223632812, 157.7419891357422, 179.85943603515625, 201.97689819335938, 224.09434509277344, 246.21173095703125, 268.3291931152344, 290.4466247558594, 312.5640869140625, 334.6815185546875, 356.7989807128906, 378.91644287109375, 401.03387451171875, 423.1513366699219, 445.268798828125, 467.38623046875, 489.5036926269531, 511.62115478515625, 533.7385864257812, 555.8560791015625, 577.9735107421875, 600.0909423828125, 622.2083740234375, 644.3258666992188, 666.4432983398438, 688.5607299804688, 710.67822265625, 732.795654296875, 754.9130859375, 777.030517578125, 799.14794921875, 821.2654418945312, 843.3828735351562, 865.5003051757812, 887.6177978515625, 909.7352294921875, 931.8526611328125, 953.9701538085938]}, "gradients/decoder.transformer.h.6.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 6.0, 5.0, 6.0, 7.0, 6.0, 9.0, 13.0, 12.0, 18.0, 11.0, 24.0, 22.0, 25.0, 23.0, 30.0, 26.0, 40.0, 31.0, 35.0, 37.0, 30.0, 34.0, 40.0, 49.0, 38.0, 50.0, 36.0, 36.0, 33.0, 38.0, 44.0, 23.0, 28.0, 22.0, 20.0, 18.0, 15.0, 13.0, 9.0, 11.0, 9.0, 6.0, 8.0, 7.0, 3.0, 0.0, 1.0, 3.0, 3.0, 1.0, 1.0, 0.0, 1.0, 1.0, 2.0, 0.0, 1.0], "bins": [-311.7288818359375, -301.2641296386719, -290.79937744140625, -280.3345947265625, -269.8698425292969, -259.40509033203125, -248.94033813476562, -238.4755859375, -228.0108184814453, -217.5460662841797, -207.081298828125, -196.61654663085938, -186.15179443359375, -175.68702697753906, -165.22227478027344, -154.75750732421875, -144.29275512695312, -133.8280029296875, -123.36323547363281, -112.89848327636719, -102.43372344970703, -91.96896362304688, -81.50421142578125, -71.0394515991211, -60.57469177246094, -50.10993194580078, -39.64517593383789, -29.180418014526367, -18.715660095214844, -8.250900268554688, 2.213855743408203, 12.678611755371094, 23.143341064453125, 33.60810089111328, 44.07285690307617, 54.53761291503906, 65.00237274169922, 75.46713256835938, 85.931884765625, 96.39664459228516, 106.86140441894531, 117.32616424560547, 127.79092407226562, 138.25567626953125, 148.72042846679688, 159.18519592285156, 169.6499481201172, 180.11471557617188, 190.5794677734375, 201.04421997070312, 211.5089874267578, 221.97373962402344, 232.43850708007812, 242.90325927734375, 253.36801147460938, 263.832763671875, 274.29754638671875, 284.7622985839844, 295.22705078125, 305.69183349609375, 316.1565856933594, 326.621337890625, 337.0860900878906, 347.55084228515625, 358.0155944824219]}, "gradients/decoder.transformer.h.6.crossattention.c_proj.bias": {"_type": "histogram", "values": [5.0, 1.0, 2.0, 3.0, 2.0, 2.0, 2.0, 1.0, 2.0, 8.0, 7.0, 13.0, 6.0, 15.0, 13.0, 12.0, 15.0, 27.0, 20.0, 21.0, 17.0, 28.0, 26.0, 35.0, 43.0, 40.0, 29.0, 34.0, 39.0, 35.0, 38.0, 44.0, 38.0, 48.0, 28.0, 40.0, 23.0, 34.0, 22.0, 22.0, 30.0, 14.0, 24.0, 16.0, 14.0, 21.0, 10.0, 9.0, 8.0, 7.0, 6.0, 6.0, 3.0, 3.0, 2.0, 2.0, 1.0, 4.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0], "bins": [-51.09375, -49.39599609375, -47.6982421875, -46.00048828125, -44.302734375, -42.60498046875, -40.9072265625, -39.20947265625, -37.51171875, -35.81396484375, -34.1162109375, -32.41845703125, -30.720703125, -29.02294921875, -27.3251953125, -25.62744140625, -23.9296875, -22.23193359375, -20.5341796875, -18.83642578125, -17.138671875, -15.44091796875, -13.7431640625, -12.04541015625, -10.34765625, -8.64990234375, -6.9521484375, -5.25439453125, -3.556640625, -1.85888671875, -0.1611328125, 1.53662109375, 3.234375, 4.93212890625, 6.6298828125, 8.32763671875, 10.025390625, 11.72314453125, 13.4208984375, 15.11865234375, 16.81640625, 18.51416015625, 20.2119140625, 21.90966796875, 23.607421875, 25.30517578125, 27.0029296875, 28.70068359375, 30.3984375, 32.09619140625, 33.7939453125, 35.49169921875, 37.189453125, 38.88720703125, 40.5849609375, 42.28271484375, 43.98046875, 45.67822265625, 47.3759765625, 49.07373046875, 50.771484375, 52.46923828125, 54.1669921875, 55.86474609375, 57.5625]}, "gradients/decoder.transformer.h.6.crossattention.c_proj.weight": {"_type": "histogram", "values": [5.0, 1.0, 2.0, 3.0, 3.0, 2.0, 4.0, 3.0, 6.0, 23.0, 31.0, 69.0, 83.0, 142.0, 168.0, 262.0, 408.0, 679.0, 1024.0, 1447.0, 2237.0, 3501.0, 5433.0, 8270.0, 12824.0, 19894.0, 31986.0, 53018.0, 97708.0, 227924.0, 314887.0, 110486.0, 59468.0, 35102.0, 21778.0, 13767.0, 9053.0, 5934.0, 3740.0, 2447.0, 1599.0, 1095.0, 729.0, 454.0, 290.0, 211.0, 122.0, 94.0, 57.0, 37.0, 22.0, 19.0, 8.0, 4.0, 2.0, 2.0, 1.0, 4.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0], "bins": [-2.20703125, -2.133697509765625, -2.06036376953125, -1.987030029296875, -1.9136962890625, -1.840362548828125, -1.76702880859375, -1.693695068359375, -1.620361328125, -1.547027587890625, -1.47369384765625, -1.400360107421875, -1.3270263671875, -1.253692626953125, -1.18035888671875, -1.107025146484375, -1.03369140625, -0.960357666015625, -0.88702392578125, -0.813690185546875, -0.7403564453125, -0.667022705078125, -0.59368896484375, -0.520355224609375, -0.447021484375, -0.373687744140625, -0.30035400390625, -0.227020263671875, -0.1536865234375, -0.080352783203125, -0.00701904296875, 0.066314697265625, 0.1396484375, 0.212982177734375, 0.28631591796875, 0.359649658203125, 0.4329833984375, 0.506317138671875, 0.57965087890625, 0.652984619140625, 0.726318359375, 0.799652099609375, 0.87298583984375, 0.946319580078125, 1.0196533203125, 1.092987060546875, 1.16632080078125, 1.239654541015625, 1.31298828125, 1.386322021484375, 1.45965576171875, 1.532989501953125, 1.6063232421875, 1.679656982421875, 1.75299072265625, 1.826324462890625, 1.899658203125, 1.972991943359375, 2.04632568359375, 2.119659423828125, 2.1929931640625, 2.266326904296875, 2.33966064453125, 2.412994384765625, 2.486328125]}, "gradients/decoder.transformer.h.6.crossattention.c_attn.bias": {"_type": "histogram", "values": [3.0, 0.0, 2.0, 2.0, 3.0, 7.0, 2.0, 5.0, 8.0, 6.0, 13.0, 16.0, 15.0, 15.0, 21.0, 20.0, 25.0, 30.0, 27.0, 26.0, 31.0, 35.0, 30.0, 30.0, 32.0, 37.0, 30.0, 42.0, 1069.0, 37.0, 31.0, 26.0, 42.0, 40.0, 35.0, 27.0, 24.0, 19.0, 18.0, 24.0, 27.0, 19.0, 18.0, 13.0, 12.0, 12.0, 5.0, 10.0, 7.0, 2.0, 0.0, 2.0, 4.0, 2.0, 4.0, 1.0, 0.0, 0.0, 1.0, 3.0, 0.0, 0.0, 0.0, 1.0], "bins": [-30.28125, -29.2158203125, -28.150390625, -27.0849609375, -26.01953125, -24.9541015625, -23.888671875, -22.8232421875, -21.7578125, -20.6923828125, -19.626953125, -18.5615234375, -17.49609375, -16.4306640625, -15.365234375, -14.2998046875, -13.234375, -12.1689453125, -11.103515625, -10.0380859375, -8.97265625, -7.9072265625, -6.841796875, -5.7763671875, -4.7109375, -3.6455078125, -2.580078125, -1.5146484375, -0.44921875, 0.6162109375, 1.681640625, 2.7470703125, 3.8125, 4.8779296875, 5.943359375, 7.0087890625, 8.07421875, 9.1396484375, 10.205078125, 11.2705078125, 12.3359375, 13.4013671875, 14.466796875, 15.5322265625, 16.59765625, 17.6630859375, 18.728515625, 19.7939453125, 20.859375, 21.9248046875, 22.990234375, 24.0556640625, 25.12109375, 26.1865234375, 27.251953125, 28.3173828125, 29.3828125, 30.4482421875, 31.513671875, 32.5791015625, 33.64453125, 34.7099609375, 35.775390625, 36.8408203125, 37.90625]}, "gradients/decoder.transformer.h.6.crossattention.c_attn.weight": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 3.0, 4.0, 7.0, 3.0, 6.0, 20.0, 27.0, 28.0, 42.0, 48.0, 76.0, 149.0, 183.0, 239.0, 411.0, 628.0, 839.0, 1277.0, 1900.0, 2764.0, 4219.0, 6270.0, 9590.0, 14603.0, 22226.0, 34100.0, 53661.0, 88102.0, 163185.0, 1358802.0, 127239.0, 73643.0, 46236.0, 29574.0, 19229.0, 12722.0, 8447.0, 5612.0, 3620.0, 2399.0, 1592.0, 1121.0, 691.0, 488.0, 347.0, 233.0, 169.0, 110.0, 81.0, 48.0, 53.0, 30.0, 17.0, 10.0, 7.0, 8.0, 4.0, 4.0, 1.0, 1.0, 1.0], "bins": [-1.5517578125, -1.503753662109375, -1.45574951171875, -1.407745361328125, -1.3597412109375, -1.311737060546875, -1.26373291015625, -1.215728759765625, -1.167724609375, -1.119720458984375, -1.07171630859375, -1.023712158203125, -0.9757080078125, -0.927703857421875, -0.87969970703125, -0.831695556640625, -0.78369140625, -0.735687255859375, -0.68768310546875, -0.639678955078125, -0.5916748046875, -0.543670654296875, -0.49566650390625, -0.447662353515625, -0.399658203125, -0.351654052734375, -0.30364990234375, -0.255645751953125, -0.2076416015625, -0.159637451171875, -0.11163330078125, -0.063629150390625, -0.015625, 0.032379150390625, 0.08038330078125, 0.128387451171875, 0.1763916015625, 0.224395751953125, 0.27239990234375, 0.320404052734375, 0.368408203125, 0.416412353515625, 0.46441650390625, 0.512420654296875, 0.5604248046875, 0.608428955078125, 0.65643310546875, 0.704437255859375, 0.75244140625, 0.800445556640625, 0.84844970703125, 0.896453857421875, 0.9444580078125, 0.992462158203125, 1.04046630859375, 1.088470458984375, 1.136474609375, 1.184478759765625, 1.23248291015625, 1.280487060546875, 1.3284912109375, 1.376495361328125, 1.42449951171875, 1.472503662109375, 1.5205078125]}, "gradients/decoder.transformer.h.6.crossattention.q_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 2.0, 2.0, 0.0, 0.0, 1.0, 2.0, 1.0, 2.0, 1.0, 1.0, 4.0, 4.0, 7.0, 9.0, 8.0, 13.0, 11.0, 19.0, 19.0, 26.0, 35.0, 48.0, 82.0, 132.0, 228.0, 109.0, 49.0, 46.0, 29.0, 33.0, 20.0, 24.0, 12.0, 10.0, 2.0, 3.0, 8.0, 5.0, 3.0, 4.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00011795759201049805, -0.00011380109935998917, -0.00010964460670948029, -0.0001054881140589714, -0.00010133162140846252, -9.717512875795364e-05, -9.301863610744476e-05, -8.886214345693588e-05, -8.4705650806427e-05, -8.054915815591812e-05, -7.639266550540924e-05, -7.223617285490036e-05, -6.807968020439148e-05, -6.39231875538826e-05, -5.976669490337372e-05, -5.561020225286484e-05, -5.145370960235596e-05, -4.7297216951847076e-05, -4.3140724301338196e-05, -3.8984231650829315e-05, -3.4827739000320435e-05, -3.0671246349811554e-05, -2.6514753699302673e-05, -2.2358261048793793e-05, -1.8201768398284912e-05, -1.4045275747776031e-05, -9.888783097267151e-06, -5.73229044675827e-06, -1.5757977962493896e-06, 2.580694854259491e-06, 6.737187504768372e-06, 1.0893680155277252e-05, 1.5050172805786133e-05, 1.9206665456295013e-05, 2.3363158106803894e-05, 2.7519650757312775e-05, 3.1676143407821655e-05, 3.5832636058330536e-05, 3.9989128708839417e-05, 4.41456213593483e-05, 4.830211400985718e-05, 5.245860666036606e-05, 5.661509931087494e-05, 6.077159196138382e-05, 6.49280846118927e-05, 6.908457726240158e-05, 7.324106991291046e-05, 7.739756256341934e-05, 8.155405521392822e-05, 8.57105478644371e-05, 8.986704051494598e-05, 9.402353316545486e-05, 9.818002581596375e-05, 0.00010233651846647263, 0.0001064930111169815, 0.00011064950376749039, 0.00011480599641799927, 0.00011896248906850815, 0.00012311898171901703, 0.0001272754743695259, 0.0001314319670200348, 0.00013558845967054367, 0.00013974495232105255, 0.00014390144497156143, 0.0001480579376220703]}, "gradients/decoder.transformer.h.6.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 4.0, 4.0, 3.0, 2.0, 2.0, 6.0, 9.0, 24.0, 20.0, 27.0, 42.0, 72.0, 91.0, 143.0, 598.0, 11952.0, 996239.0, 37514.0, 1261.0, 208.0, 105.0, 66.0, 37.0, 30.0, 23.0, 19.0, 17.0, 12.0, 9.0, 3.0, 4.0, 2.0, 3.0, 3.0, 2.0, 2.0, 1.0, 2.0, 0.0, 0.0, 3.0, 0.0, 0.0, 1.0, 2.0], "bins": [-0.0025234222412109375, -0.0024526119232177734, -0.0023818016052246094, -0.0023109912872314453, -0.0022401809692382812, -0.002169370651245117, -0.002098560333251953, -0.002027750015258789, -0.001956939697265625, -0.001886129379272461, -0.0018153190612792969, -0.0017445087432861328, -0.0016736984252929688, -0.0016028881072998047, -0.0015320777893066406, -0.0014612674713134766, -0.0013904571533203125, -0.0013196468353271484, -0.0012488365173339844, -0.0011780261993408203, -0.0011072158813476562, -0.0010364055633544922, -0.0009655952453613281, -0.0008947849273681641, -0.000823974609375, -0.0007531642913818359, -0.0006823539733886719, -0.0006115436553955078, -0.0005407333374023438, -0.0004699230194091797, -0.0003991127014160156, -0.00032830238342285156, -0.0002574920654296875, -0.00018668174743652344, -0.00011587142944335938, -4.506111145019531e-05, 2.574920654296875e-05, 9.655952453613281e-05, 0.00016736984252929688, 0.00023818016052246094, 0.000308990478515625, 0.00037980079650878906, 0.0004506111145019531, 0.0005214214324951172, 0.0005922317504882812, 0.0006630420684814453, 0.0007338523864746094, 0.0008046627044677734, 0.0008754730224609375, 0.0009462833404541016, 0.0010170936584472656, 0.0010879039764404297, 0.0011587142944335938, 0.0012295246124267578, 0.0013003349304199219, 0.001371145248413086, 0.00144195556640625, 0.001512765884399414, 0.0015835762023925781, 0.0016543865203857422, 0.0017251968383789062, 0.0017960071563720703, 0.0018668174743652344, 0.0019376277923583984, 0.0020084381103515625]}, "gradients/decoder.transformer.h.6.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 4.0, 3.0, 7.0, 11.0, 14.0, 20.0, 34.0, 36.0, 61.0, 101.0, 182.0, 173.0, 148.0, 75.0, 53.0, 31.0, 18.0, 12.0, 12.0, 8.0, 2.0, 6.0, 2.0, 2.0, 3.0, 0.0, 1.0, 1.0], "bins": [-0.00013911361747886986, -0.00013616749492939562, -0.00013322137237992138, -0.00013027524983044714, -0.0001273291272809729, -0.0001243830192834139, -0.00012143688945798203, -0.0001184907669085078, -0.00011554465163499117, -0.00011259852908551693, -0.00010965240653604269, -0.00010670628398656845, -0.00010376016871305183, -0.00010081404616357759, -9.786792361410335e-05, -9.492180106462911e-05, -9.197567851515487e-05, -8.902955596568063e-05, -8.608343341620639e-05, -8.313731814268976e-05, -8.019119559321553e-05, -7.724507304374129e-05, -7.429895049426705e-05, -7.135282794479281e-05, -6.840670539531857e-05, -6.546058284584433e-05, -6.251446029637009e-05, -5.956834138487466e-05, -5.6622222473379225e-05, -5.3676099923904985e-05, -5.0729977374430746e-05, -4.7783854824956506e-05, -4.483774318941869e-05, -4.189162063994445e-05, -3.894550172844902e-05, -3.599937917897478e-05, -3.305325662950054e-05, -3.0107137718005106e-05, -2.7161015168530867e-05, -2.421489443804603e-05, -2.1268773707561195e-05, -1.832265297707636e-05, -1.5376532246591523e-05, -1.2430409697117284e-05, -9.484288966632448e-06, -6.538168236147612e-06, -3.592045686673373e-06, -6.459249561885372e-07, 2.3001957742962986e-06, 5.246316959528485e-06, 8.192438144760672e-06, 1.113855978474021e-05, 1.4084680515225045e-05, 1.703080124570988e-05, 1.997692379518412e-05, 2.2923044525668956e-05, 2.5869165256153792e-05, 2.8815285986638628e-05, 3.1761406717123464e-05, 3.47075292665977e-05, 3.7653648178093135e-05, 4.0599770727567375e-05, 4.3545893277041614e-05, 4.6492015826515853e-05, 4.9438134738011286e-05]}, "gradients/decoder.transformer.h.6.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 2.0, 1.0, 0.0, 4.0, 2.0, 4.0, 2.0, 11.0, 3.0, 4.0, 15.0, 14.0, 11.0, 21.0, 19.0, 26.0, 33.0, 27.0, 23.0, 33.0, 27.0, 42.0, 36.0, 39.0, 40.0, 36.0, 45.0, 28.0, 43.0, 40.0, 32.0, 38.0, 33.0, 37.0, 29.0, 28.0, 28.0, 22.0, 17.0, 20.0, 19.0, 13.0, 10.0, 13.0, 13.0, 3.0, 6.0, 7.0, 7.0, 4.0, 4.0, 2.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.64320182800293e-05, -4.494842141866684e-05, -4.346482455730438e-05, -4.1981227695941925e-05, -4.049763083457947e-05, -3.901403397321701e-05, -3.753043711185455e-05, -3.6046840250492096e-05, -3.456324338912964e-05, -3.307964652776718e-05, -3.1596049666404724e-05, -3.0112452805042267e-05, -2.862885594367981e-05, -2.7145259082317352e-05, -2.5661662220954895e-05, -2.4178065359592438e-05, -2.269446849822998e-05, -2.1210871636867523e-05, -1.9727274775505066e-05, -1.824367791414261e-05, -1.676008105278015e-05, -1.5276484191417694e-05, -1.3792887330055237e-05, -1.230929046869278e-05, -1.0825693607330322e-05, -9.342096745967865e-06, -7.858499884605408e-06, -6.3749030232429504e-06, -4.891306161880493e-06, -3.407709300518036e-06, -1.9241124391555786e-06, -4.4051557779312134e-07, 1.043081283569336e-06, 2.5266781449317932e-06, 4.0102750062942505e-06, 5.493871867656708e-06, 6.977468729019165e-06, 8.461065590381622e-06, 9.94466245174408e-06, 1.1428259313106537e-05, 1.2911856174468994e-05, 1.4395453035831451e-05, 1.587904989719391e-05, 1.7362646758556366e-05, 1.8846243619918823e-05, 2.032984048128128e-05, 2.1813437342643738e-05, 2.3297034204006195e-05, 2.4780631065368652e-05, 2.626422792673111e-05, 2.7747824788093567e-05, 2.9231421649456024e-05, 3.071501851081848e-05, 3.219861537218094e-05, 3.3682212233543396e-05, 3.516580909490585e-05, 3.664940595626831e-05, 3.813300281763077e-05, 3.9616599678993225e-05, 4.110019654035568e-05, 4.258379340171814e-05, 4.40673902630806e-05, 4.5550987124443054e-05, 4.703458398580551e-05, 4.851818084716797e-05]}, "gradients/decoder.transformer.h.6.attn.c_proj.bias": {"_type": "histogram", "values": [5.0, 1.0, 2.0, 3.0, 2.0, 2.0, 2.0, 1.0, 2.0, 8.0, 7.0, 13.0, 6.0, 15.0, 13.0, 12.0, 15.0, 27.0, 20.0, 21.0, 17.0, 28.0, 26.0, 35.0, 43.0, 40.0, 29.0, 34.0, 39.0, 35.0, 38.0, 44.0, 38.0, 48.0, 28.0, 40.0, 23.0, 34.0, 22.0, 22.0, 30.0, 14.0, 24.0, 16.0, 14.0, 21.0, 10.0, 9.0, 8.0, 7.0, 6.0, 6.0, 3.0, 3.0, 2.0, 2.0, 1.0, 4.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0], "bins": [-51.09375, -49.39599609375, -47.6982421875, -46.00048828125, -44.302734375, -42.60498046875, -40.9072265625, -39.20947265625, -37.51171875, -35.81396484375, -34.1162109375, -32.41845703125, -30.720703125, -29.02294921875, -27.3251953125, -25.62744140625, -23.9296875, -22.23193359375, -20.5341796875, -18.83642578125, -17.138671875, -15.44091796875, -13.7431640625, -12.04541015625, -10.34765625, -8.64990234375, -6.9521484375, -5.25439453125, -3.556640625, -1.85888671875, -0.1611328125, 1.53662109375, 3.234375, 4.93212890625, 6.6298828125, 8.32763671875, 10.025390625, 11.72314453125, 13.4208984375, 15.11865234375, 16.81640625, 18.51416015625, 20.2119140625, 21.90966796875, 23.607421875, 25.30517578125, 27.0029296875, 28.70068359375, 30.3984375, 32.09619140625, 33.7939453125, 35.49169921875, 37.189453125, 38.88720703125, 40.5849609375, 42.28271484375, 43.98046875, 45.67822265625, 47.3759765625, 49.07373046875, 50.771484375, 52.46923828125, 54.1669921875, 55.86474609375, 57.5625]}, "gradients/decoder.transformer.h.6.attn.c_proj.weight": {"_type": "histogram", "values": [3.0, 5.0, 5.0, 3.0, 5.0, 7.0, 3.0, 1.0, 5.0, 10.0, 16.0, 24.0, 23.0, 22.0, 30.0, 42.0, 50.0, 77.0, 89.0, 146.0, 186.0, 289.0, 460.0, 779.0, 1259.0, 2178.0, 3874.0, 7653.0, 19279.0, 251311.0, 714271.0, 26164.0, 9276.0, 4603.0, 2489.0, 1448.0, 829.0, 537.0, 323.0, 202.0, 149.0, 114.0, 71.0, 47.0, 51.0, 35.0, 27.0, 19.0, 18.0, 13.0, 11.0, 12.0, 5.0, 6.0, 2.0, 6.0, 2.0, 5.0, 3.0, 1.0, 1.0, 1.0, 0.0, 1.0], "bins": [-117.0, -113.115234375, -109.23046875, -105.345703125, -101.4609375, -97.576171875, -93.69140625, -89.806640625, -85.921875, -82.037109375, -78.15234375, -74.267578125, -70.3828125, -66.498046875, -62.61328125, -58.728515625, -54.84375, -50.958984375, -47.07421875, -43.189453125, -39.3046875, -35.419921875, -31.53515625, -27.650390625, -23.765625, -19.880859375, -15.99609375, -12.111328125, -8.2265625, -4.341796875, -0.45703125, 3.427734375, 7.3125, 11.197265625, 15.08203125, 18.966796875, 22.8515625, 26.736328125, 30.62109375, 34.505859375, 38.390625, 42.275390625, 46.16015625, 50.044921875, 53.9296875, 57.814453125, 61.69921875, 65.583984375, 69.46875, 73.353515625, 77.23828125, 81.123046875, 85.0078125, 88.892578125, 92.77734375, 96.662109375, 100.546875, 104.431640625, 108.31640625, 112.201171875, 116.0859375, 119.970703125, 123.85546875, 127.740234375, 131.625]}, "gradients/decoder.transformer.h.6.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 3.0, 1.0, 2.0, 7.0, 4.0, 7.0, 4.0, 8.0, 14.0, 11.0, 17.0, 19.0, 20.0, 26.0, 25.0, 37.0, 24.0, 39.0, 29.0, 41.0, 56.0, 36.0, 63.0, 1563.0, 473.0, 70.0, 56.0, 41.0, 52.0, 43.0, 18.0, 28.0, 33.0, 29.0, 33.0, 20.0, 19.0, 13.0, 18.0, 13.0, 4.0, 9.0, 7.0, 9.0, 8.0, 5.0, 4.0, 0.0, 1.0, 2.0, 1.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-134.5, -129.515625, -124.53125, -119.546875, -114.5625, -109.578125, -104.59375, -99.609375, -94.625, -89.640625, -84.65625, -79.671875, -74.6875, -69.703125, -64.71875, -59.734375, -54.75, -49.765625, -44.78125, -39.796875, -34.8125, -29.828125, -24.84375, -19.859375, -14.875, -9.890625, -4.90625, 0.078125, 5.0625, 10.046875, 15.03125, 20.015625, 25.0, 29.984375, 34.96875, 39.953125, 44.9375, 49.921875, 54.90625, 59.890625, 64.875, 69.859375, 74.84375, 79.828125, 84.8125, 89.796875, 94.78125, 99.765625, 104.75, 109.734375, 114.71875, 119.703125, 124.6875, 129.671875, 134.65625, 139.640625, 144.625, 149.609375, 154.59375, 159.578125, 164.5625, 169.546875, 174.53125, 179.515625, 184.5]}, "gradients/decoder.transformer.h.6.attn.c_attn.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 2.0, 2.0, 3.0, 1.0, 3.0, 2.0, 10.0, 6.0, 9.0, 11.0, 10.0, 21.0, 25.0, 26.0, 49.0, 63.0, 116.0, 165.0, 302.0, 566.0, 1119.0, 2033.0, 4390.0, 9987.0, 41351.0, 3027413.0, 39278.0, 9998.0, 4257.0, 2141.0, 1043.0, 578.0, 263.0, 158.0, 91.0, 73.0, 36.0, 34.0, 17.0, 19.0, 7.0, 11.0, 7.0, 7.0, 2.0, 0.0, 5.0, 4.0, 2.0, 1.0, 2.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-452.75, -439.23046875, -425.7109375, -412.19140625, -398.671875, -385.15234375, -371.6328125, -358.11328125, -344.59375, -331.07421875, -317.5546875, -304.03515625, -290.515625, -276.99609375, -263.4765625, -249.95703125, -236.4375, -222.91796875, -209.3984375, -195.87890625, -182.359375, -168.83984375, -155.3203125, -141.80078125, -128.28125, -114.76171875, -101.2421875, -87.72265625, -74.203125, -60.68359375, -47.1640625, -33.64453125, -20.125, -6.60546875, 6.9140625, 20.43359375, 33.953125, 47.47265625, 60.9921875, 74.51171875, 88.03125, 101.55078125, 115.0703125, 128.58984375, 142.109375, 155.62890625, 169.1484375, 182.66796875, 196.1875, 209.70703125, 223.2265625, 236.74609375, 250.265625, 263.78515625, 277.3046875, 290.82421875, 304.34375, 317.86328125, 331.3828125, 344.90234375, 358.421875, 371.94140625, 385.4609375, 398.98046875, 412.5]}, "gradients/decoder.transformer.h.6.ln_1.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 2.0, 0.0, 1.0, 2.0, 2.0, 3.0, 2.0, 4.0, 4.0, 4.0, 7.0, 13.0, 11.0, 18.0, 15.0, 26.0, 37.0, 55.0, 69.0, 75.0, 107.0, 146.0, 109.0, 70.0, 66.0, 36.0, 28.0, 27.0, 18.0, 10.0, 15.0, 8.0, 7.0, 4.0, 4.0, 4.0, 2.0, 2.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-338.05255126953125, -326.4740295410156, -314.8955383300781, -303.3170166015625, -291.738525390625, -280.1600036621094, -268.58148193359375, -257.00299072265625, -245.42446899414062, -233.84596252441406, -222.2674560546875, -210.68893432617188, -199.1104278564453, -187.53192138671875, -175.95339965820312, -164.37489318847656, -152.79638671875, -141.21788024902344, -129.63937377929688, -118.06085205078125, -106.48234558105469, -94.90383911132812, -83.32532501220703, -71.74681091308594, -60.168304443359375, -48.58979415893555, -37.01128387451172, -25.43277359008789, -13.854263305664062, -2.2757530212402344, 9.302757263183594, 20.881271362304688, 32.45977783203125, 44.03828811645508, 55.616798400878906, 67.1953125, 78.77381896972656, 90.35232543945312, 101.93083953857422, 113.50935363769531, 125.08786010742188, 136.66636657714844, 148.244873046875, 159.82339477539062, 171.4019012451172, 182.98040771484375, 194.55892944335938, 206.13743591308594, 217.7159423828125, 229.29444885253906, 240.87295532226562, 252.45147705078125, 264.02996826171875, 275.6084899902344, 287.18701171875, 298.7655029296875, 310.3440246582031, 321.92254638671875, 333.50103759765625, 345.0795593261719, 356.6580810546875, 368.236572265625, 379.8150939941406, 391.39361572265625, 402.97210693359375]}, "gradients/decoder.transformer.h.6.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 2.0, 2.0, 0.0, 4.0, 4.0, 4.0, 5.0, 11.0, 9.0, 12.0, 15.0, 14.0, 24.0, 27.0, 17.0, 36.0, 26.0, 38.0, 39.0, 41.0, 50.0, 41.0, 49.0, 47.0, 42.0, 53.0, 55.0, 35.0, 48.0, 29.0, 42.0, 28.0, 26.0, 14.0, 18.0, 20.0, 14.0, 15.0, 12.0, 10.0, 11.0, 4.0, 8.0, 3.0, 4.0, 3.0, 1.0, 3.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-521.4607543945312, -506.02197265625, -490.58319091796875, -475.1444091796875, -459.70562744140625, -444.266845703125, -428.8280944824219, -413.3893127441406, -397.9505310058594, -382.5117492675781, -367.0729675292969, -351.6341857910156, -336.1954345703125, -320.75665283203125, -305.31787109375, -289.87908935546875, -274.4403076171875, -259.00152587890625, -243.562744140625, -228.1239776611328, -212.68519592285156, -197.2464141845703, -181.80764770507812, -166.36886596679688, -150.93008422851562, -135.49130249023438, -120.05252838134766, -104.61375427246094, -89.17497253417969, -73.73619079589844, -58.29741668701172, -42.858642578125, -27.419891357421875, -11.98111343383789, 3.4576644897460938, 18.896442413330078, 34.33522033691406, 49.77400207519531, 65.21277618408203, 80.65155029296875, 96.09033203125, 111.52911376953125, 126.96788787841797, 142.4066619873047, 157.84544372558594, 173.2842254638672, 188.72299194335938, 204.16177368164062, 219.60055541992188, 235.03933715820312, 250.47811889648438, 265.9169006347656, 281.35565185546875, 296.79443359375, 312.23321533203125, 327.6719970703125, 343.11077880859375, 358.549560546875, 373.98834228515625, 389.4271240234375, 404.86590576171875, 420.3046875, 435.7434387207031, 451.1822204589844, 466.6210021972656]}, "gradients/decoder.transformer.h.5.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 2.0, 2.0, 1.0, 3.0, 3.0, 4.0, 3.0, 3.0, 3.0, 9.0, 7.0, 9.0, 16.0, 17.0, 20.0, 21.0, 17.0, 23.0, 21.0, 17.0, 28.0, 36.0, 32.0, 44.0, 33.0, 41.0, 41.0, 50.0, 31.0, 43.0, 39.0, 30.0, 38.0, 30.0, 27.0, 34.0, 36.0, 29.0, 29.0, 24.0, 22.0, 14.0, 17.0, 16.0, 9.0, 9.0, 2.0, 8.0, 5.0, 2.0, 5.0, 7.0, 1.0, 4.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0], "bins": [-58.75, -56.8779296875, -55.005859375, -53.1337890625, -51.26171875, -49.3896484375, -47.517578125, -45.6455078125, -43.7734375, -41.9013671875, -40.029296875, -38.1572265625, -36.28515625, -34.4130859375, -32.541015625, -30.6689453125, -28.796875, -26.9248046875, -25.052734375, -23.1806640625, -21.30859375, -19.4365234375, -17.564453125, -15.6923828125, -13.8203125, -11.9482421875, -10.076171875, -8.2041015625, -6.33203125, -4.4599609375, -2.587890625, -0.7158203125, 1.15625, 3.0283203125, 4.900390625, 6.7724609375, 8.64453125, 10.5166015625, 12.388671875, 14.2607421875, 16.1328125, 18.0048828125, 19.876953125, 21.7490234375, 23.62109375, 25.4931640625, 27.365234375, 29.2373046875, 31.109375, 32.9814453125, 34.853515625, 36.7255859375, 38.59765625, 40.4697265625, 42.341796875, 44.2138671875, 46.0859375, 47.9580078125, 49.830078125, 51.7021484375, 53.57421875, 55.4462890625, 57.318359375, 59.1904296875, 61.0625]}, "gradients/decoder.transformer.h.5.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 3.0, 2.0, 1.0, 2.0, 2.0, 1.0, 1.0, 5.0, 10.0, 6.0, 13.0, 17.0, 17.0, 33.0, 47.0, 56.0, 69.0, 83.0, 141.0, 186.0, 241.0, 293.0, 430.0, 614.0, 839.0, 1173.0, 1770.0, 2563.0, 3952.0, 6550.0, 10984.0, 20086.0, 46648.0, 431247.0, 3302189.0, 278464.0, 39358.0, 18230.0, 10101.0, 5902.0, 3778.0, 2459.0, 1609.0, 1101.0, 816.0, 550.0, 438.0, 279.0, 262.0, 162.0, 136.0, 87.0, 78.0, 69.0, 36.0, 38.0, 20.0, 12.0, 17.0, 13.0, 3.0, 6.0, 5.0], "bins": [-168.125, -163.37890625, -158.6328125, -153.88671875, -149.140625, -144.39453125, -139.6484375, -134.90234375, -130.15625, -125.41015625, -120.6640625, -115.91796875, -111.171875, -106.42578125, -101.6796875, -96.93359375, -92.1875, -87.44140625, -82.6953125, -77.94921875, -73.203125, -68.45703125, -63.7109375, -58.96484375, -54.21875, -49.47265625, -44.7265625, -39.98046875, -35.234375, -30.48828125, -25.7421875, -20.99609375, -16.25, -11.50390625, -6.7578125, -2.01171875, 2.734375, 7.48046875, 12.2265625, 16.97265625, 21.71875, 26.46484375, 31.2109375, 35.95703125, 40.703125, 45.44921875, 50.1953125, 54.94140625, 59.6875, 64.43359375, 69.1796875, 73.92578125, 78.671875, 83.41796875, 88.1640625, 92.91015625, 97.65625, 102.40234375, 107.1484375, 111.89453125, 116.640625, 121.38671875, 126.1328125, 130.87890625, 135.625]}, "gradients/decoder.transformer.h.5.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 2.0, 1.0, 2.0, 2.0, 1.0, 6.0, 7.0, 9.0, 5.0, 8.0, 17.0, 16.0, 15.0, 34.0, 47.0, 63.0, 97.0, 214.0, 556.0, 1429.0, 874.0, 304.0, 123.0, 71.0, 44.0, 43.0, 21.0, 15.0, 18.0, 8.0, 6.0, 11.0, 5.0, 4.0, 2.0, 2.0, 2.0, 3.0, 2.0, 1.0, 0.0, 2.0], "bins": [-217.625, -212.4052734375, -207.185546875, -201.9658203125, -196.74609375, -191.5263671875, -186.306640625, -181.0869140625, -175.8671875, -170.6474609375, -165.427734375, -160.2080078125, -154.98828125, -149.7685546875, -144.548828125, -139.3291015625, -134.109375, -128.8896484375, -123.669921875, -118.4501953125, -113.23046875, -108.0107421875, -102.791015625, -97.5712890625, -92.3515625, -87.1318359375, -81.912109375, -76.6923828125, -71.47265625, -66.2529296875, -61.033203125, -55.8134765625, -50.59375, -45.3740234375, -40.154296875, -34.9345703125, -29.71484375, -24.4951171875, -19.275390625, -14.0556640625, -8.8359375, -3.6162109375, 1.603515625, 6.8232421875, 12.04296875, 17.2626953125, 22.482421875, 27.7021484375, 32.921875, 38.1416015625, 43.361328125, 48.5810546875, 53.80078125, 59.0205078125, 64.240234375, 69.4599609375, 74.6796875, 79.8994140625, 85.119140625, 90.3388671875, 95.55859375, 100.7783203125, 105.998046875, 111.2177734375, 116.4375]}, "gradients/decoder.transformer.h.5.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 4.0, 3.0, 2.0, 6.0, 8.0, 8.0, 9.0, 15.0, 20.0, 29.0, 34.0, 54.0, 90.0, 126.0, 228.0, 495.0, 1056.0, 2270.0, 5228.0, 13715.0, 46977.0, 627373.0, 3389391.0, 76061.0, 18764.0, 6858.0, 2835.0, 1222.0, 625.0, 306.0, 183.0, 94.0, 52.0, 39.0, 26.0, 22.0, 14.0, 17.0, 11.0, 10.0, 6.0, 3.0, 3.0, 2.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-327.25, -316.39453125, -305.5390625, -294.68359375, -283.828125, -272.97265625, -262.1171875, -251.26171875, -240.40625, -229.55078125, -218.6953125, -207.83984375, -196.984375, -186.12890625, -175.2734375, -164.41796875, -153.5625, -142.70703125, -131.8515625, -120.99609375, -110.140625, -99.28515625, -88.4296875, -77.57421875, -66.71875, -55.86328125, -45.0078125, -34.15234375, -23.296875, -12.44140625, -1.5859375, 9.26953125, 20.125, 30.98046875, 41.8359375, 52.69140625, 63.546875, 74.40234375, 85.2578125, 96.11328125, 106.96875, 117.82421875, 128.6796875, 139.53515625, 150.390625, 161.24609375, 172.1015625, 182.95703125, 193.8125, 204.66796875, 215.5234375, 226.37890625, 237.234375, 248.08984375, 258.9453125, 269.80078125, 280.65625, 291.51171875, 302.3671875, 313.22265625, 324.078125, 334.93359375, 345.7890625, 356.64453125, 367.5]}, "gradients/decoder.transformer.h.5.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 2.0, 4.0, 3.0, 10.0, 6.0, 7.0, 12.0, 26.0, 25.0, 43.0, 46.0, 74.0, 114.0, 147.0, 123.0, 123.0, 73.0, 62.0, 34.0, 29.0, 19.0, 13.0, 5.0, 6.0, 5.0, 3.0, 1.0, 2.0, 0.0, 0.0, 1.0], "bins": [-885.46875, -866.6326904296875, -847.796630859375, -828.9605712890625, -810.12451171875, -791.2884521484375, -772.452392578125, -753.6163330078125, -734.7802734375, -715.9442138671875, -697.108154296875, -678.2720947265625, -659.43603515625, -640.5999755859375, -621.763916015625, -602.9278564453125, -584.0918579101562, -565.2557983398438, -546.4197387695312, -527.5836791992188, -508.74761962890625, -489.91156005859375, -471.0755310058594, -452.2394714355469, -433.4034118652344, -414.5673522949219, -395.7312927246094, -376.8952331542969, -358.0592041015625, -339.22314453125, -320.3870849609375, -301.551025390625, -282.71490478515625, -263.87884521484375, -245.04278564453125, -226.2067413330078, -207.3706817626953, -188.5346221923828, -169.69857788085938, -150.86251831054688, -132.02645874023438, -113.19039916992188, -94.3543472290039, -75.51829528808594, -56.68223571777344, -37.84617614746094, -19.01012420654297, -0.174072265625, 18.6619873046875, 37.498043060302734, 56.33409881591797, 75.17015075683594, 94.00621032714844, 112.84226989746094, 131.67831420898438, 150.51437377929688, 169.35043334960938, 188.18649291992188, 207.02255249023438, 225.8585968017578, 244.6946563720703, 263.53070068359375, 282.36676025390625, 301.20281982421875, 320.03887939453125]}, "gradients/decoder.transformer.h.5.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 1.0, 0.0, 2.0, 1.0, 3.0, 2.0, 5.0, 1.0, 3.0, 5.0, 11.0, 11.0, 6.0, 20.0, 15.0, 28.0, 23.0, 22.0, 34.0, 25.0, 34.0, 30.0, 40.0, 38.0, 31.0, 44.0, 31.0, 43.0, 38.0, 47.0, 37.0, 39.0, 39.0, 37.0, 27.0, 21.0, 31.0, 37.0, 26.0, 20.0, 23.0, 12.0, 15.0, 9.0, 7.0, 14.0, 7.0, 2.0, 4.0, 3.0, 4.0, 1.0, 0.0, 3.0, 1.0, 2.0, 3.0, 1.0, 1.0, 1.0], "bins": [-360.9302673339844, -349.7471008300781, -338.5639343261719, -327.3807678222656, -316.1976013183594, -305.0144348144531, -293.831298828125, -282.64813232421875, -271.4649658203125, -260.28179931640625, -249.0986328125, -237.91546630859375, -226.7322998046875, -215.54913330078125, -204.36598205566406, -193.1828155517578, -181.9996337890625, -170.81646728515625, -159.63330078125, -148.45013427734375, -137.2669677734375, -126.08380889892578, -114.90065002441406, -103.71748352050781, -92.53431701660156, -81.35115051269531, -70.16798400878906, -58.984825134277344, -47.801658630371094, -36.618492126464844, -25.435333251953125, -14.252166748046875, -3.069000244140625, 8.114164352416992, 19.29732894897461, 30.480491638183594, 41.663658142089844, 52.846824645996094, 64.02998352050781, 75.21315002441406, 86.39631652832031, 97.57948303222656, 108.76264953613281, 119.94580841064453, 131.12896728515625, 142.3121337890625, 153.49530029296875, 164.678466796875, 175.86163330078125, 187.0447998046875, 198.22796630859375, 209.4111328125, 220.59429931640625, 231.7774658203125, 242.9606170654297, 254.14378356933594, 265.32696533203125, 276.5101318359375, 287.69329833984375, 298.87646484375, 310.05963134765625, 321.2427978515625, 332.42596435546875, 343.609130859375, 354.7922668457031]}, "gradients/decoder.transformer.h.5.crossattention.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 3.0, 1.0, 1.0, 4.0, 3.0, 1.0, 8.0, 7.0, 10.0, 10.0, 11.0, 15.0, 22.0, 23.0, 25.0, 20.0, 32.0, 30.0, 40.0, 27.0, 30.0, 51.0, 45.0, 56.0, 50.0, 32.0, 51.0, 45.0, 43.0, 32.0, 27.0, 33.0, 33.0, 35.0, 22.0, 24.0, 22.0, 20.0, 15.0, 10.0, 8.0, 9.0, 4.0, 6.0, 5.0, 5.0, 2.0, 2.0, 4.0, 0.0, 0.0, 3.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-66.8125, -64.716796875, -62.62109375, -60.525390625, -58.4296875, -56.333984375, -54.23828125, -52.142578125, -50.046875, -47.951171875, -45.85546875, -43.759765625, -41.6640625, -39.568359375, -37.47265625, -35.376953125, -33.28125, -31.185546875, -29.08984375, -26.994140625, -24.8984375, -22.802734375, -20.70703125, -18.611328125, -16.515625, -14.419921875, -12.32421875, -10.228515625, -8.1328125, -6.037109375, -3.94140625, -1.845703125, 0.25, 2.345703125, 4.44140625, 6.537109375, 8.6328125, 10.728515625, 12.82421875, 14.919921875, 17.015625, 19.111328125, 21.20703125, 23.302734375, 25.3984375, 27.494140625, 29.58984375, 31.685546875, 33.78125, 35.876953125, 37.97265625, 40.068359375, 42.1640625, 44.259765625, 46.35546875, 48.451171875, 50.546875, 52.642578125, 54.73828125, 56.833984375, 58.9296875, 61.025390625, 63.12109375, 65.216796875, 67.3125]}, "gradients/decoder.transformer.h.5.crossattention.c_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 2.0, 3.0, 1.0, 2.0, 3.0, 3.0, 8.0, 12.0, 19.0, 24.0, 36.0, 48.0, 90.0, 113.0, 162.0, 287.0, 345.0, 568.0, 829.0, 1308.0, 1938.0, 3044.0, 4827.0, 7705.0, 12470.0, 20419.0, 33880.0, 57471.0, 109673.0, 334256.0, 236755.0, 92667.0, 50644.0, 30197.0, 18307.0, 11131.0, 6927.0, 4401.0, 2737.0, 1759.0, 1178.0, 771.0, 482.0, 365.0, 201.0, 164.0, 101.0, 72.0, 60.0, 26.0, 30.0, 17.0, 5.0, 15.0, 5.0, 2.0, 5.0, 1.0, 1.0, 1.0, 0.0, 1.0], "bins": [-2.744140625, -2.6580810546875, -2.572021484375, -2.4859619140625, -2.39990234375, -2.3138427734375, -2.227783203125, -2.1417236328125, -2.0556640625, -1.9696044921875, -1.883544921875, -1.7974853515625, -1.71142578125, -1.6253662109375, -1.539306640625, -1.4532470703125, -1.3671875, -1.2811279296875, -1.195068359375, -1.1090087890625, -1.02294921875, -0.9368896484375, -0.850830078125, -0.7647705078125, -0.6787109375, -0.5926513671875, -0.506591796875, -0.4205322265625, -0.33447265625, -0.2484130859375, -0.162353515625, -0.0762939453125, 0.009765625, 0.0958251953125, 0.181884765625, 0.2679443359375, 0.35400390625, 0.4400634765625, 0.526123046875, 0.6121826171875, 0.6982421875, 0.7843017578125, 0.870361328125, 0.9564208984375, 1.04248046875, 1.1285400390625, 1.214599609375, 1.3006591796875, 1.38671875, 1.4727783203125, 1.558837890625, 1.6448974609375, 1.73095703125, 1.8170166015625, 1.903076171875, 1.9891357421875, 2.0751953125, 2.1612548828125, 2.247314453125, 2.3333740234375, 2.41943359375, 2.5054931640625, 2.591552734375, 2.6776123046875, 2.763671875]}, "gradients/decoder.transformer.h.5.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 2.0, 0.0, 1.0, 0.0, 6.0, 3.0, 6.0, 5.0, 8.0, 6.0, 7.0, 14.0, 8.0, 12.0, 14.0, 25.0, 19.0, 27.0, 29.0, 34.0, 29.0, 28.0, 25.0, 38.0, 41.0, 31.0, 41.0, 33.0, 1057.0, 54.0, 44.0, 48.0, 42.0, 32.0, 36.0, 23.0, 30.0, 32.0, 22.0, 20.0, 17.0, 18.0, 12.0, 7.0, 8.0, 14.0, 9.0, 10.0, 5.0, 4.0, 2.0, 1.0, 3.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-38.21875, -37.01220703125, -35.8056640625, -34.59912109375, -33.392578125, -32.18603515625, -30.9794921875, -29.77294921875, -28.56640625, -27.35986328125, -26.1533203125, -24.94677734375, -23.740234375, -22.53369140625, -21.3271484375, -20.12060546875, -18.9140625, -17.70751953125, -16.5009765625, -15.29443359375, -14.087890625, -12.88134765625, -11.6748046875, -10.46826171875, -9.26171875, -8.05517578125, -6.8486328125, -5.64208984375, -4.435546875, -3.22900390625, -2.0224609375, -0.81591796875, 0.390625, 1.59716796875, 2.8037109375, 4.01025390625, 5.216796875, 6.42333984375, 7.6298828125, 8.83642578125, 10.04296875, 11.24951171875, 12.4560546875, 13.66259765625, 14.869140625, 16.07568359375, 17.2822265625, 18.48876953125, 19.6953125, 20.90185546875, 22.1083984375, 23.31494140625, 24.521484375, 25.72802734375, 26.9345703125, 28.14111328125, 29.34765625, 30.55419921875, 31.7607421875, 32.96728515625, 34.173828125, 35.38037109375, 36.5869140625, 37.79345703125, 39.0]}, "gradients/decoder.transformer.h.5.crossattention.c_attn.weight": {"_type": "histogram", "values": [2.0, 2.0, 2.0, 4.0, 7.0, 8.0, 9.0, 7.0, 25.0, 29.0, 43.0, 61.0, 80.0, 133.0, 175.0, 243.0, 366.0, 498.0, 728.0, 1010.0, 1402.0, 2144.0, 3087.0, 4689.0, 6621.0, 9931.0, 14635.0, 22313.0, 33756.0, 52998.0, 86383.0, 167470.0, 1356335.0, 126259.0, 71855.0, 44794.0, 29346.0, 19439.0, 12798.0, 8494.0, 5940.0, 4022.0, 2813.0, 1913.0, 1308.0, 891.0, 629.0, 424.0, 302.0, 213.0, 164.0, 106.0, 66.0, 47.0, 39.0, 34.0, 21.0, 8.0, 9.0, 5.0, 6.0, 4.0, 4.0, 3.0], "bins": [-1.595703125, -1.54632568359375, -1.4969482421875, -1.44757080078125, -1.398193359375, -1.34881591796875, -1.2994384765625, -1.25006103515625, -1.20068359375, -1.15130615234375, -1.1019287109375, -1.05255126953125, -1.003173828125, -0.95379638671875, -0.9044189453125, -0.85504150390625, -0.8056640625, -0.75628662109375, -0.7069091796875, -0.65753173828125, -0.608154296875, -0.55877685546875, -0.5093994140625, -0.46002197265625, -0.41064453125, -0.36126708984375, -0.3118896484375, -0.26251220703125, -0.213134765625, -0.16375732421875, -0.1143798828125, -0.06500244140625, -0.015625, 0.03375244140625, 0.0831298828125, 0.13250732421875, 0.181884765625, 0.23126220703125, 0.2806396484375, 0.33001708984375, 0.37939453125, 0.42877197265625, 0.4781494140625, 0.52752685546875, 0.576904296875, 0.62628173828125, 0.6756591796875, 0.72503662109375, 0.7744140625, 0.82379150390625, 0.8731689453125, 0.92254638671875, 0.971923828125, 1.02130126953125, 1.0706787109375, 1.12005615234375, 1.16943359375, 1.21881103515625, 1.2681884765625, 1.31756591796875, 1.366943359375, 1.41632080078125, 1.4656982421875, 1.51507568359375, 1.564453125]}, "gradients/decoder.transformer.h.5.crossattention.q_attn.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 0.0, 2.0, 1.0, 1.0, 0.0, 3.0, 4.0, 1.0, 2.0, 6.0, 6.0, 11.0, 8.0, 8.0, 16.0, 11.0, 27.0, 27.0, 28.0, 42.0, 51.0, 54.0, 69.0, 150.0, 142.0, 81.0, 45.0, 35.0, 29.0, 25.0, 15.0, 15.0, 19.0, 17.0, 5.0, 12.0, 7.0, 7.0, 7.0, 2.0, 7.0, 4.0, 2.0, 3.0, 2.0, 2.0, 2.0, 1.0, 2.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-6.693601608276367e-05, -6.444007158279419e-05, -6.194412708282471e-05, -5.9448182582855225e-05, -5.695223808288574e-05, -5.445629358291626e-05, -5.196034908294678e-05, -4.9464404582977295e-05, -4.696846008300781e-05, -4.447251558303833e-05, -4.197657108306885e-05, -3.9480626583099365e-05, -3.698468208312988e-05, -3.44887375831604e-05, -3.199279308319092e-05, -2.9496848583221436e-05, -2.7000904083251953e-05, -2.450495958328247e-05, -2.2009015083312988e-05, -1.9513070583343506e-05, -1.7017126083374023e-05, -1.4521181583404541e-05, -1.2025237083435059e-05, -9.529292583465576e-06, -7.033348083496094e-06, -4.537403583526611e-06, -2.041459083557129e-06, 4.544854164123535e-07, 2.950429916381836e-06, 5.446374416351318e-06, 7.9423189163208e-06, 1.0438263416290283e-05, 1.2934207916259766e-05, 1.5430152416229248e-05, 1.792609691619873e-05, 2.0422041416168213e-05, 2.2917985916137695e-05, 2.5413930416107178e-05, 2.790987491607666e-05, 3.0405819416046143e-05, 3.2901763916015625e-05, 3.539770841598511e-05, 3.789365291595459e-05, 4.038959741592407e-05, 4.2885541915893555e-05, 4.538148641586304e-05, 4.787743091583252e-05, 5.0373375415802e-05, 5.2869319915771484e-05, 5.536526441574097e-05, 5.786120891571045e-05, 6.035715341567993e-05, 6.285309791564941e-05, 6.53490424156189e-05, 6.784498691558838e-05, 7.034093141555786e-05, 7.283687591552734e-05, 7.533282041549683e-05, 7.782876491546631e-05, 8.032470941543579e-05, 8.282065391540527e-05, 8.531659841537476e-05, 8.781254291534424e-05, 9.030848741531372e-05, 9.28044319152832e-05]}, "gradients/decoder.transformer.h.5.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 0.0, 1.0, 4.0, 3.0, 1.0, 1.0, 4.0, 2.0, 2.0, 7.0, 6.0, 6.0, 6.0, 11.0, 7.0, 18.0, 15.0, 15.0, 25.0, 41.0, 31.0, 67.0, 96.0, 219.0, 929.0, 7687.0, 189630.0, 831399.0, 16078.0, 1536.0, 319.0, 145.0, 62.0, 46.0, 35.0, 27.0, 18.0, 23.0, 12.0, 9.0, 4.0, 6.0, 1.0, 0.0, 4.0, 3.0, 2.0, 0.0, 3.0, 0.0, 0.0, 0.0, 1.0, 3.0], "bins": [-0.0017156600952148438, -0.001669466495513916, -0.0016232728958129883, -0.0015770792961120605, -0.0015308856964111328, -0.001484692096710205, -0.0014384984970092773, -0.0013923048973083496, -0.0013461112976074219, -0.0012999176979064941, -0.0012537240982055664, -0.0012075304985046387, -0.001161336898803711, -0.0011151432991027832, -0.0010689496994018555, -0.0010227560997009277, -0.0009765625, -0.0009303689002990723, -0.0008841753005981445, -0.0008379817008972168, -0.0007917881011962891, -0.0007455945014953613, -0.0006994009017944336, -0.0006532073020935059, -0.0006070137023925781, -0.0005608201026916504, -0.0005146265029907227, -0.0004684329032897949, -0.0004222393035888672, -0.00037604570388793945, -0.0003298521041870117, -0.000283658504486084, -0.00023746490478515625, -0.00019127130508422852, -0.00014507770538330078, -9.888410568237305e-05, -5.269050598144531e-05, -6.496906280517578e-06, 3.9696693420410156e-05, 8.589029312133789e-05, 0.00013208389282226562, 0.00017827749252319336, 0.0002244710922241211, 0.00027066469192504883, 0.00031685829162597656, 0.0003630518913269043, 0.00040924549102783203, 0.00045543909072875977, 0.0005016326904296875, 0.0005478262901306152, 0.000594019889831543, 0.0006402134895324707, 0.0006864070892333984, 0.0007326006889343262, 0.0007787942886352539, 0.0008249878883361816, 0.0008711814880371094, 0.0009173750877380371, 0.0009635686874389648, 0.0010097622871398926, 0.0010559558868408203, 0.001102149486541748, 0.0011483430862426758, 0.0011945366859436035, 0.0012407302856445312]}, "gradients/decoder.transformer.h.5.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 3.0, 8.0, 10.0, 19.0, 55.0, 132.0, 330.0, 259.0, 104.0, 47.0, 15.0, 18.0, 7.0, 8.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0002329054696019739, -0.00022747441835235804, -0.0002220433671027422, -0.00021661231585312635, -0.0002111812646035105, -0.00020575022790580988, -0.0002003191621042788, -0.00019488812540657818, -0.00018945707415696234, -0.0001840260229073465, -0.00017859497165773064, -0.0001731639204081148, -0.00016773286915849894, -0.0001623018179088831, -0.00015687078121118248, -0.00015143972996156663, -0.00014600867871195078, -0.00014057762746233493, -0.00013514657621271908, -0.00012971552496310323, -0.0001242844737134874, -0.00011885342973982915, -0.0001134223784902133, -0.00010799133451655507, -0.000102560268715024, -9.712921746540815e-05, -9.16981662157923e-05, -8.626711496617645e-05, -8.083607099251822e-05, -7.540501974290237e-05, -6.997396849328652e-05, -6.454292451962829e-05, -5.9111865994054824e-05, -5.3680814744438976e-05, -4.8249767132801935e-05, -4.281871588318609e-05, -3.7387668271549046e-05, -3.19566170219332e-05, -2.652556577231735e-05, -2.109451816068031e-05, -1.566346691106446e-05, -1.0232417480438016e-05, -4.80136714031687e-06, 6.296831998042762e-07, 6.060732630430721e-06, 1.1491782061057165e-05, 1.6922833310673013e-05, 2.2353880922310054e-05, 2.7784932171925902e-05, 3.321598342154175e-05, 3.864703103317879e-05, 4.407808228279464e-05, 4.950912989443168e-05, 5.494018114404753e-05, 6.0371232393663377e-05, 6.580227636732161e-05, 7.123332761693746e-05, 7.66643788665533e-05, 8.209543011616915e-05, 8.7526481365785e-05, 9.295752533944324e-05, 9.838857658905908e-05, 0.00010381962783867493, 0.00010925067181233317, 0.00011468173033790663]}, "gradients/decoder.transformer.h.5.ln_cross_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 2.0, 1.0, 3.0, 1.0, 1.0, 2.0, 3.0, 3.0, 7.0, 7.0, 12.0, 13.0, 9.0, 14.0, 17.0, 21.0, 23.0, 21.0, 26.0, 30.0, 31.0, 28.0, 23.0, 50.0, 44.0, 39.0, 32.0, 34.0, 34.0, 45.0, 46.0, 41.0, 33.0, 32.0, 33.0, 31.0, 33.0, 30.0, 29.0, 24.0, 21.0, 10.0, 13.0, 19.0, 9.0, 6.0, 7.0, 4.0, 3.0, 7.0, 1.0, 5.0, 0.0, 2.0, 2.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0], "bins": [-3.4689903259277344e-05, -3.354158252477646e-05, -3.2393261790275574e-05, -3.124494105577469e-05, -3.0096620321273804e-05, -2.894829958677292e-05, -2.7799978852272034e-05, -2.665165811777115e-05, -2.5503337383270264e-05, -2.435501664876938e-05, -2.3206695914268494e-05, -2.205837517976761e-05, -2.0910054445266724e-05, -1.976173371076584e-05, -1.8613412976264954e-05, -1.746509224176407e-05, -1.6316771507263184e-05, -1.5168450772762299e-05, -1.4020130038261414e-05, -1.2871809303760529e-05, -1.1723488569259644e-05, -1.0575167834758759e-05, -9.426847100257874e-06, -8.278526365756989e-06, -7.1302056312561035e-06, -5.9818848967552185e-06, -4.8335641622543335e-06, -3.6852434277534485e-06, -2.5369226932525635e-06, -1.3886019587516785e-06, -2.4028122425079346e-07, 9.080395102500916e-07, 2.0563602447509766e-06, 3.2046809792518616e-06, 4.353001713752747e-06, 5.501322448253632e-06, 6.649643182754517e-06, 7.797963917255402e-06, 8.946284651756287e-06, 1.0094605386257172e-05, 1.1242926120758057e-05, 1.2391246855258942e-05, 1.3539567589759827e-05, 1.4687888324260712e-05, 1.5836209058761597e-05, 1.698452979326248e-05, 1.8132850527763367e-05, 1.9281171262264252e-05, 2.0429491996765137e-05, 2.1577812731266022e-05, 2.2726133465766907e-05, 2.3874454200267792e-05, 2.5022774934768677e-05, 2.6171095669269562e-05, 2.7319416403770447e-05, 2.8467737138271332e-05, 2.9616057872772217e-05, 3.07643786072731e-05, 3.191269934177399e-05, 3.306102007627487e-05, 3.420934081077576e-05, 3.535766154527664e-05, 3.650598227977753e-05, 3.765430301427841e-05, 3.88026237487793e-05]}, "gradients/decoder.transformer.h.5.attn.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 3.0, 1.0, 1.0, 4.0, 3.0, 1.0, 8.0, 7.0, 10.0, 10.0, 11.0, 15.0, 22.0, 23.0, 25.0, 20.0, 32.0, 30.0, 40.0, 27.0, 30.0, 51.0, 45.0, 56.0, 50.0, 32.0, 51.0, 45.0, 43.0, 32.0, 27.0, 33.0, 33.0, 35.0, 22.0, 24.0, 22.0, 20.0, 15.0, 10.0, 8.0, 9.0, 4.0, 6.0, 5.0, 5.0, 2.0, 2.0, 4.0, 0.0, 0.0, 3.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-66.8125, -64.716796875, -62.62109375, -60.525390625, -58.4296875, -56.333984375, -54.23828125, -52.142578125, -50.046875, -47.951171875, -45.85546875, -43.759765625, -41.6640625, -39.568359375, -37.47265625, -35.376953125, -33.28125, -31.185546875, -29.08984375, -26.994140625, -24.8984375, -22.802734375, -20.70703125, -18.611328125, -16.515625, -14.419921875, -12.32421875, -10.228515625, -8.1328125, -6.037109375, -3.94140625, -1.845703125, 0.25, 2.345703125, 4.44140625, 6.537109375, 8.6328125, 10.728515625, 12.82421875, 14.919921875, 17.015625, 19.111328125, 21.20703125, 23.302734375, 25.3984375, 27.494140625, 29.58984375, 31.685546875, 33.78125, 35.876953125, 37.97265625, 40.068359375, 42.1640625, 44.259765625, 46.35546875, 48.451171875, 50.546875, 52.642578125, 54.73828125, 56.833984375, 58.9296875, 61.025390625, 63.12109375, 65.216796875, 67.3125]}, "gradients/decoder.transformer.h.5.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 3.0, 2.0, 3.0, 2.0, 4.0, 0.0, 2.0, 2.0, 7.0, 10.0, 10.0, 17.0, 15.0, 37.0, 55.0, 56.0, 93.0, 111.0, 178.0, 235.0, 326.0, 431.0, 726.0, 1119.0, 1782.0, 2962.0, 5496.0, 11577.0, 31714.0, 164171.0, 681321.0, 100603.0, 24099.0, 9394.0, 4550.0, 2643.0, 1647.0, 1018.0, 669.0, 437.0, 295.0, 230.0, 116.0, 110.0, 70.0, 57.0, 38.0, 33.0, 25.0, 17.0, 9.0, 16.0, 5.0, 10.0, 5.0, 4.0, 2.0, 0.0, 2.0, 1.0, 1.0], "bins": [-96.75, -93.8515625, -90.953125, -88.0546875, -85.15625, -82.2578125, -79.359375, -76.4609375, -73.5625, -70.6640625, -67.765625, -64.8671875, -61.96875, -59.0703125, -56.171875, -53.2734375, -50.375, -47.4765625, -44.578125, -41.6796875, -38.78125, -35.8828125, -32.984375, -30.0859375, -27.1875, -24.2890625, -21.390625, -18.4921875, -15.59375, -12.6953125, -9.796875, -6.8984375, -4.0, -1.1015625, 1.796875, 4.6953125, 7.59375, 10.4921875, 13.390625, 16.2890625, 19.1875, 22.0859375, 24.984375, 27.8828125, 30.78125, 33.6796875, 36.578125, 39.4765625, 42.375, 45.2734375, 48.171875, 51.0703125, 53.96875, 56.8671875, 59.765625, 62.6640625, 65.5625, 68.4609375, 71.359375, 74.2578125, 77.15625, 80.0546875, 82.953125, 85.8515625, 88.75]}, "gradients/decoder.transformer.h.5.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 0.0, 4.0, 4.0, 5.0, 2.0, 6.0, 9.0, 16.0, 5.0, 18.0, 16.0, 19.0, 18.0, 29.0, 19.0, 22.0, 28.0, 26.0, 33.0, 36.0, 40.0, 49.0, 65.0, 79.0, 138.0, 1716.0, 154.0, 67.0, 50.0, 55.0, 44.0, 39.0, 30.0, 26.0, 28.0, 23.0, 25.0, 18.0, 25.0, 12.0, 12.0, 8.0, 10.0, 12.0, 4.0, 7.0, 2.0, 5.0, 2.0, 1.0, 0.0, 0.0, 3.0, 1.0, 1.0, 1.0], "bins": [-163.375, -158.490234375, -153.60546875, -148.720703125, -143.8359375, -138.951171875, -134.06640625, -129.181640625, -124.296875, -119.412109375, -114.52734375, -109.642578125, -104.7578125, -99.873046875, -94.98828125, -90.103515625, -85.21875, -80.333984375, -75.44921875, -70.564453125, -65.6796875, -60.794921875, -55.91015625, -51.025390625, -46.140625, -41.255859375, -36.37109375, -31.486328125, -26.6015625, -21.716796875, -16.83203125, -11.947265625, -7.0625, -2.177734375, 2.70703125, 7.591796875, 12.4765625, 17.361328125, 22.24609375, 27.130859375, 32.015625, 36.900390625, 41.78515625, 46.669921875, 51.5546875, 56.439453125, 61.32421875, 66.208984375, 71.09375, 75.978515625, 80.86328125, 85.748046875, 90.6328125, 95.517578125, 100.40234375, 105.287109375, 110.171875, 115.056640625, 119.94140625, 124.826171875, 129.7109375, 134.595703125, 139.48046875, 144.365234375, 149.25]}, "gradients/decoder.transformer.h.5.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 3.0, 1.0, 0.0, 5.0, 2.0, 2.0, 3.0, 9.0, 10.0, 6.0, 12.0, 23.0, 37.0, 49.0, 70.0, 72.0, 128.0, 191.0, 356.0, 667.0, 1747.0, 5795.0, 25429.0, 422089.0, 2633479.0, 43255.0, 7986.0, 2289.0, 904.0, 404.0, 194.0, 141.0, 104.0, 65.0, 55.0, 37.0, 28.0, 21.0, 15.0, 9.0, 8.0, 8.0, 2.0, 3.0, 0.0, 0.0, 3.0, 2.0, 2.0, 1.0, 0.0, 0.0, 1.0], "bins": [-250.875, -243.734375, -236.59375, -229.453125, -222.3125, -215.171875, -208.03125, -200.890625, -193.75, -186.609375, -179.46875, -172.328125, -165.1875, -158.046875, -150.90625, -143.765625, -136.625, -129.484375, -122.34375, -115.203125, -108.0625, -100.921875, -93.78125, -86.640625, -79.5, -72.359375, -65.21875, -58.078125, -50.9375, -43.796875, -36.65625, -29.515625, -22.375, -15.234375, -8.09375, -0.953125, 6.1875, 13.328125, 20.46875, 27.609375, 34.75, 41.890625, 49.03125, 56.171875, 63.3125, 70.453125, 77.59375, 84.734375, 91.875, 99.015625, 106.15625, 113.296875, 120.4375, 127.578125, 134.71875, 141.859375, 149.0, 156.140625, 163.28125, 170.421875, 177.5625, 184.703125, 191.84375, 198.984375, 206.125]}, "gradients/decoder.transformer.h.5.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 24.0, 897.0, 96.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2094.220947265625, -2001.781005859375, -1909.3411865234375, -1816.9013671875, -1724.46142578125, -1632.021484375, -1539.5816650390625, -1447.141845703125, -1354.701904296875, -1262.261962890625, -1169.8221435546875, -1077.38232421875, -984.9423828125, -892.5025024414062, -800.0626220703125, -707.6227416992188, -615.182861328125, -522.7429809570312, -430.3031005859375, -337.86322021484375, -245.42333984375, -152.98345947265625, -60.5435791015625, 31.89630126953125, 124.336181640625, 216.77606201171875, 309.2159423828125, 401.65582275390625, 494.095703125, 586.5355834960938, 678.9754638671875, 771.4153442382812, 863.855224609375, 956.2951049804688, 1048.7349853515625, 1141.1748046875, 1233.61474609375, 1326.0546875, 1418.4945068359375, 1510.934326171875, 1603.374267578125, 1695.814208984375, 1788.2540283203125, 1880.69384765625, 1973.1337890625, 2065.57373046875, 2158.013671875, 2250.453369140625, 2342.893310546875, 2435.333251953125, 2527.77294921875, 2620.212890625, 2712.65283203125, 2805.0927734375, 2897.53271484375, 2989.972412109375, 3082.412353515625, 3174.852294921875, 3267.2919921875, 3359.73193359375, 3452.171875, 3544.61181640625, 3637.0517578125, 3729.491455078125, 3821.931396484375]}, "gradients/decoder.transformer.h.5.ln_1.bias": {"_type": "histogram", "values": [3.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 1.0, 1.0, 5.0, 3.0, 7.0, 4.0, 4.0, 6.0, 11.0, 10.0, 24.0, 16.0, 17.0, 18.0, 13.0, 38.0, 28.0, 40.0, 23.0, 47.0, 34.0, 31.0, 41.0, 49.0, 40.0, 46.0, 49.0, 49.0, 38.0, 35.0, 38.0, 31.0, 26.0, 26.0, 23.0, 26.0, 16.0, 20.0, 24.0, 11.0, 8.0, 4.0, 5.0, 5.0, 7.0, 3.0, 3.0, 2.0, 2.0, 4.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-418.1995544433594, -405.60662841796875, -393.013671875, -380.4207458496094, -367.82781982421875, -355.23486328125, -342.6419372558594, -330.04901123046875, -317.4560546875, -304.8631286621094, -292.2701721191406, -279.67724609375, -267.0843200683594, -254.49136352539062, -241.8984375, -229.3054962158203, -216.7125701904297, -204.11962890625, -191.52670288085938, -178.9337615966797, -166.3408203125, -153.74789428710938, -141.1549530029297, -128.56201171875, -115.96907806396484, -103.37614440917969, -90.783203125, -78.19026947021484, -65.59733581542969, -53.00439453125, -40.411460876464844, -27.818519592285156, -15.2255859375, -2.6326494216918945, 9.960287094116211, 22.55322265625, 35.14616012573242, 47.739097595214844, 60.33203125, 72.92497253417969, 85.51790618896484, 98.11083984375, 110.70378112792969, 123.29671478271484, 135.8896484375, 148.4825897216797, 161.07553100585938, 173.66845703125, 186.2613983154297, 198.85433959960938, 211.447265625, 224.0402069091797, 236.63314819335938, 249.22607421875, 261.81903076171875, 274.4119567871094, 287.0048828125, 299.5978088378906, 312.1907653808594, 324.78369140625, 337.3766174316406, 349.9695739746094, 362.5625, 375.15545654296875, 387.7483825683594]}, "gradients/decoder.transformer.h.4.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 3.0, 3.0, 3.0, 1.0, 3.0, 5.0, 2.0, 6.0, 8.0, 16.0, 18.0, 15.0, 12.0, 28.0, 26.0, 19.0, 27.0, 30.0, 51.0, 48.0, 47.0, 29.0, 50.0, 41.0, 52.0, 59.0, 44.0, 35.0, 31.0, 40.0, 31.0, 27.0, 29.0, 36.0, 28.0, 20.0, 22.0, 10.0, 7.0, 11.0, 15.0, 8.0, 4.0, 5.0, 3.0, 2.0, 1.0, 4.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-71.8125, -69.5927734375, -67.373046875, -65.1533203125, -62.93359375, -60.7138671875, -58.494140625, -56.2744140625, -54.0546875, -51.8349609375, -49.615234375, -47.3955078125, -45.17578125, -42.9560546875, -40.736328125, -38.5166015625, -36.296875, -34.0771484375, -31.857421875, -29.6376953125, -27.41796875, -25.1982421875, -22.978515625, -20.7587890625, -18.5390625, -16.3193359375, -14.099609375, -11.8798828125, -9.66015625, -7.4404296875, -5.220703125, -3.0009765625, -0.78125, 1.4384765625, 3.658203125, 5.8779296875, 8.09765625, 10.3173828125, 12.537109375, 14.7568359375, 16.9765625, 19.1962890625, 21.416015625, 23.6357421875, 25.85546875, 28.0751953125, 30.294921875, 32.5146484375, 34.734375, 36.9541015625, 39.173828125, 41.3935546875, 43.61328125, 45.8330078125, 48.052734375, 50.2724609375, 52.4921875, 54.7119140625, 56.931640625, 59.1513671875, 61.37109375, 63.5908203125, 65.810546875, 68.0302734375, 70.25]}, "gradients/decoder.transformer.h.4.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 2.0, 2.0, 1.0, 3.0, 4.0, 2.0, 3.0, 2.0, 3.0, 1.0, 10.0, 10.0, 20.0, 23.0, 33.0, 42.0, 69.0, 77.0, 112.0, 162.0, 238.0, 333.0, 499.0, 830.0, 1481.0, 2627.0, 4701.0, 9154.0, 21458.0, 70282.0, 3229283.0, 774018.0, 45478.0, 16270.0, 7540.0, 3998.0, 2114.0, 1258.0, 740.0, 480.0, 281.0, 191.0, 137.0, 90.0, 58.0, 61.0, 21.0, 25.0, 17.0, 16.0, 11.0, 6.0, 10.0, 4.0, 2.0, 2.0, 4.0, 1.0, 0.0, 0.0, 1.0], "bins": [-279.25, -271.0, -262.75, -254.5, -246.25, -238.0, -229.75, -221.5, -213.25, -205.0, -196.75, -188.5, -180.25, -172.0, -163.75, -155.5, -147.25, -139.0, -130.75, -122.5, -114.25, -106.0, -97.75, -89.5, -81.25, -73.0, -64.75, -56.5, -48.25, -40.0, -31.75, -23.5, -15.25, -7.0, 1.25, 9.5, 17.75, 26.0, 34.25, 42.5, 50.75, 59.0, 67.25, 75.5, 83.75, 92.0, 100.25, 108.5, 116.75, 125.0, 133.25, 141.5, 149.75, 158.0, 166.25, 174.5, 182.75, 191.0, 199.25, 207.5, 215.75, 224.0, 232.25, 240.5, 248.75]}, "gradients/decoder.transformer.h.4.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 4.0, 1.0, 4.0, 4.0, 4.0, 10.0, 11.0, 8.0, 13.0, 19.0, 15.0, 23.0, 18.0, 28.0, 41.0, 44.0, 74.0, 98.0, 229.0, 694.0, 1345.0, 733.0, 241.0, 125.0, 58.0, 51.0, 46.0, 27.0, 26.0, 19.0, 12.0, 12.0, 10.0, 8.0, 7.0, 8.0, 1.0, 4.0, 0.0, 4.0, 2.0, 1.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-132.875, -128.521484375, -124.16796875, -119.814453125, -115.4609375, -111.107421875, -106.75390625, -102.400390625, -98.046875, -93.693359375, -89.33984375, -84.986328125, -80.6328125, -76.279296875, -71.92578125, -67.572265625, -63.21875, -58.865234375, -54.51171875, -50.158203125, -45.8046875, -41.451171875, -37.09765625, -32.744140625, -28.390625, -24.037109375, -19.68359375, -15.330078125, -10.9765625, -6.623046875, -2.26953125, 2.083984375, 6.4375, 10.791015625, 15.14453125, 19.498046875, 23.8515625, 28.205078125, 32.55859375, 36.912109375, 41.265625, 45.619140625, 49.97265625, 54.326171875, 58.6796875, 63.033203125, 67.38671875, 71.740234375, 76.09375, 80.447265625, 84.80078125, 89.154296875, 93.5078125, 97.861328125, 102.21484375, 106.568359375, 110.921875, 115.275390625, 119.62890625, 123.982421875, 128.3359375, 132.689453125, 137.04296875, 141.396484375, 145.75]}, "gradients/decoder.transformer.h.4.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 2.0, 1.0, 1.0, 0.0, 1.0, 6.0, 2.0, 7.0, 6.0, 11.0, 15.0, 30.0, 38.0, 54.0, 61.0, 117.0, 159.0, 229.0, 350.0, 459.0, 623.0, 1015.0, 1505.0, 2261.0, 3364.0, 5370.0, 8910.0, 15547.0, 28534.0, 60125.0, 191288.0, 3150198.0, 544294.0, 91006.0, 38433.0, 20142.0, 11173.0, 6645.0, 4182.0, 2706.0, 1709.0, 1123.0, 843.0, 532.0, 367.0, 259.0, 154.0, 142.0, 101.0, 51.0, 46.0, 42.0, 17.0, 15.0, 7.0, 10.0, 5.0, 4.0, 1.0, 2.0, 2.0, 0.0, 1.0], "bins": [-176.0, -170.62109375, -165.2421875, -159.86328125, -154.484375, -149.10546875, -143.7265625, -138.34765625, -132.96875, -127.58984375, -122.2109375, -116.83203125, -111.453125, -106.07421875, -100.6953125, -95.31640625, -89.9375, -84.55859375, -79.1796875, -73.80078125, -68.421875, -63.04296875, -57.6640625, -52.28515625, -46.90625, -41.52734375, -36.1484375, -30.76953125, -25.390625, -20.01171875, -14.6328125, -9.25390625, -3.875, 1.50390625, 6.8828125, 12.26171875, 17.640625, 23.01953125, 28.3984375, 33.77734375, 39.15625, 44.53515625, 49.9140625, 55.29296875, 60.671875, 66.05078125, 71.4296875, 76.80859375, 82.1875, 87.56640625, 92.9453125, 98.32421875, 103.703125, 109.08203125, 114.4609375, 119.83984375, 125.21875, 130.59765625, 135.9765625, 141.35546875, 146.734375, 152.11328125, 157.4921875, 162.87109375, 168.25]}, "gradients/decoder.transformer.h.4.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 4.0, 9.0, 9.0, 15.0, 40.0, 31.0, 71.0, 110.0, 165.0, 170.0, 159.0, 87.0, 56.0, 29.0, 21.0, 14.0, 10.0, 5.0, 2.0, 3.0, 1.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-530.7828979492188, -506.9376220703125, -483.09234619140625, -459.2470703125, -435.40179443359375, -411.5565185546875, -387.7112731933594, -363.8659973144531, -340.0207214355469, -316.1754455566406, -292.3301696777344, -268.48492431640625, -244.63963317871094, -220.7943572998047, -196.9490966796875, -173.10382080078125, -149.258544921875, -125.41326904296875, -101.56800079345703, -77.72273254394531, -53.87745666503906, -30.032180786132812, -6.186920166015625, 17.658355712890625, 41.503631591796875, 65.34890747070312, 89.19417572021484, 113.03944396972656, 136.8847198486328, 160.72999572753906, 184.57525634765625, 208.4205322265625, 232.2657470703125, 256.11102294921875, 279.956298828125, 303.80157470703125, 327.6468505859375, 351.49212646484375, 375.3373718261719, 399.1826477050781, 423.0279235839844, 446.8731994628906, 470.7184753417969, 494.563720703125, 518.4089965820312, 542.2542724609375, 566.0995483398438, 589.94482421875, 613.7901000976562, 637.6353759765625, 661.4806518554688, 685.325927734375, 709.1712036132812, 733.0164794921875, 756.8616943359375, 780.70703125, 804.55224609375, 828.3975219726562, 852.2427978515625, 876.0880737304688, 899.933349609375, 923.7786254882812, 947.6239013671875, 971.4691162109375, 995.314453125]}, "gradients/decoder.transformer.h.4.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 2.0, 3.0, 5.0, 3.0, 6.0, 4.0, 12.0, 5.0, 14.0, 12.0, 17.0, 19.0, 22.0, 11.0, 34.0, 23.0, 43.0, 30.0, 52.0, 35.0, 43.0, 41.0, 38.0, 34.0, 42.0, 33.0, 50.0, 34.0, 42.0, 30.0, 24.0, 37.0, 25.0, 31.0, 31.0, 28.0, 18.0, 20.0, 12.0, 9.0, 8.0, 7.0, 3.0, 9.0, 4.0, 7.0, 3.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0], "bins": [-332.7120361328125, -321.55078125, -310.3895263671875, -299.228271484375, -288.0669860839844, -276.9057312011719, -265.7444763183594, -254.58322143554688, -243.42196655273438, -232.26071166992188, -221.0994415283203, -209.9381866455078, -198.7769317626953, -187.61566162109375, -176.45440673828125, -165.29315185546875, -154.1318817138672, -142.9706268310547, -131.80935668945312, -120.64810180664062, -109.48684692382812, -98.3255844116211, -87.16432189941406, -76.00306701660156, -64.84180450439453, -53.680545806884766, -42.519287109375, -31.35802459716797, -20.196765899658203, -9.035507202148438, 2.1257553100585938, 13.287010192871094, 24.448272705078125, 35.60953140258789, 46.770790100097656, 57.93205261230469, 69.09330749511719, 80.25457000732422, 91.41583251953125, 102.57708740234375, 113.73834991455078, 124.89961242675781, 136.0608673095703, 147.22213745117188, 158.38339233398438, 169.54464721679688, 180.70590209960938, 191.86715698242188, 203.02842712402344, 214.18968200683594, 225.3509521484375, 236.51220703125, 247.6734619140625, 258.834716796875, 269.9959716796875, 281.1572265625, 292.3185119628906, 303.4797668457031, 314.6410217285156, 325.80230712890625, 336.96356201171875, 348.12481689453125, 359.28607177734375, 370.44732666015625, 381.60858154296875]}, "gradients/decoder.transformer.h.4.crossattention.c_proj.bias": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 1.0, 0.0, 1.0, 3.0, 4.0, 1.0, 6.0, 3.0, 6.0, 7.0, 8.0, 7.0, 11.0, 12.0, 18.0, 16.0, 16.0, 18.0, 25.0, 31.0, 31.0, 30.0, 34.0, 46.0, 35.0, 32.0, 42.0, 40.0, 46.0, 43.0, 30.0, 52.0, 37.0, 31.0, 35.0, 27.0, 25.0, 26.0, 24.0, 20.0, 21.0, 20.0, 13.0, 18.0, 12.0, 13.0, 11.0, 11.0, 1.0, 2.0, 3.0, 3.0, 4.0, 2.0, 0.0, 1.0, 1.0, 2.0, 1.0, 0.0, 1.0], "bins": [-66.875, -64.7587890625, -62.642578125, -60.5263671875, -58.41015625, -56.2939453125, -54.177734375, -52.0615234375, -49.9453125, -47.8291015625, -45.712890625, -43.5966796875, -41.48046875, -39.3642578125, -37.248046875, -35.1318359375, -33.015625, -30.8994140625, -28.783203125, -26.6669921875, -24.55078125, -22.4345703125, -20.318359375, -18.2021484375, -16.0859375, -13.9697265625, -11.853515625, -9.7373046875, -7.62109375, -5.5048828125, -3.388671875, -1.2724609375, 0.84375, 2.9599609375, 5.076171875, 7.1923828125, 9.30859375, 11.4248046875, 13.541015625, 15.6572265625, 17.7734375, 19.8896484375, 22.005859375, 24.1220703125, 26.23828125, 28.3544921875, 30.470703125, 32.5869140625, 34.703125, 36.8193359375, 38.935546875, 41.0517578125, 43.16796875, 45.2841796875, 47.400390625, 49.5166015625, 51.6328125, 53.7490234375, 55.865234375, 57.9814453125, 60.09765625, 62.2138671875, 64.330078125, 66.4462890625, 68.5625]}, "gradients/decoder.transformer.h.4.crossattention.c_proj.weight": {"_type": "histogram", "values": [7.0, 5.0, 3.0, 11.0, 12.0, 18.0, 18.0, 37.0, 41.0, 56.0, 69.0, 90.0, 120.0, 199.0, 266.0, 365.0, 501.0, 703.0, 938.0, 1422.0, 1952.0, 2843.0, 4113.0, 6071.0, 8978.0, 13043.0, 19651.0, 29890.0, 45331.0, 72635.0, 126800.0, 307491.0, 161301.0, 86335.0, 53042.0, 34294.0, 22521.0, 14917.0, 10056.0, 6810.0, 4643.0, 3362.0, 2097.0, 1580.0, 1062.0, 855.0, 571.0, 415.0, 263.0, 231.0, 150.0, 94.0, 67.0, 61.0, 52.0, 37.0, 25.0, 10.0, 15.0, 12.0, 8.0, 3.0, 6.0, 2.0], "bins": [-2.322265625, -2.248931884765625, -2.17559814453125, -2.102264404296875, -2.0289306640625, -1.955596923828125, -1.88226318359375, -1.808929443359375, -1.735595703125, -1.662261962890625, -1.58892822265625, -1.515594482421875, -1.4422607421875, -1.368927001953125, -1.29559326171875, -1.222259521484375, -1.14892578125, -1.075592041015625, -1.00225830078125, -0.928924560546875, -0.8555908203125, -0.782257080078125, -0.70892333984375, -0.635589599609375, -0.562255859375, -0.488922119140625, -0.41558837890625, -0.342254638671875, -0.2689208984375, -0.195587158203125, -0.12225341796875, -0.048919677734375, 0.0244140625, 0.097747802734375, 0.17108154296875, 0.244415283203125, 0.3177490234375, 0.391082763671875, 0.46441650390625, 0.537750244140625, 0.611083984375, 0.684417724609375, 0.75775146484375, 0.831085205078125, 0.9044189453125, 0.977752685546875, 1.05108642578125, 1.124420166015625, 1.19775390625, 1.271087646484375, 1.34442138671875, 1.417755126953125, 1.4910888671875, 1.564422607421875, 1.63775634765625, 1.711090087890625, 1.784423828125, 1.857757568359375, 1.93109130859375, 2.004425048828125, 2.0777587890625, 2.151092529296875, 2.22442626953125, 2.297760009765625, 2.37109375]}, "gradients/decoder.transformer.h.4.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 0.0, 3.0, 7.0, 8.0, 3.0, 9.0, 7.0, 11.0, 12.0, 18.0, 22.0, 16.0, 23.0, 26.0, 35.0, 39.0, 19.0, 39.0, 44.0, 39.0, 39.0, 50.0, 1071.0, 50.0, 52.0, 50.0, 45.0, 39.0, 31.0, 36.0, 36.0, 23.0, 18.0, 18.0, 15.0, 10.0, 16.0, 11.0, 8.0, 10.0, 5.0, 5.0, 6.0, 7.0, 2.0, 3.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-49.5, -47.97265625, -46.4453125, -44.91796875, -43.390625, -41.86328125, -40.3359375, -38.80859375, -37.28125, -35.75390625, -34.2265625, -32.69921875, -31.171875, -29.64453125, -28.1171875, -26.58984375, -25.0625, -23.53515625, -22.0078125, -20.48046875, -18.953125, -17.42578125, -15.8984375, -14.37109375, -12.84375, -11.31640625, -9.7890625, -8.26171875, -6.734375, -5.20703125, -3.6796875, -2.15234375, -0.625, 0.90234375, 2.4296875, 3.95703125, 5.484375, 7.01171875, 8.5390625, 10.06640625, 11.59375, 13.12109375, 14.6484375, 16.17578125, 17.703125, 19.23046875, 20.7578125, 22.28515625, 23.8125, 25.33984375, 26.8671875, 28.39453125, 29.921875, 31.44921875, 32.9765625, 34.50390625, 36.03125, 37.55859375, 39.0859375, 40.61328125, 42.140625, 43.66796875, 45.1953125, 46.72265625, 48.25]}, "gradients/decoder.transformer.h.4.crossattention.c_attn.weight": {"_type": "histogram", "values": [3.0, 3.0, 2.0, 4.0, 2.0, 6.0, 11.0, 9.0, 18.0, 24.0, 37.0, 59.0, 71.0, 103.0, 158.0, 217.0, 317.0, 446.0, 635.0, 982.0, 1470.0, 2122.0, 3135.0, 4751.0, 7017.0, 10487.0, 16153.0, 25702.0, 41317.0, 69667.0, 129483.0, 1387828.0, 173191.0, 85595.0, 49500.0, 30334.0, 19259.0, 12351.0, 8073.0, 5377.0, 3538.0, 2493.0, 1600.0, 1119.0, 752.0, 531.0, 363.0, 273.0, 168.0, 108.0, 98.0, 63.0, 42.0, 21.0, 18.0, 11.0, 11.0, 10.0, 4.0, 1.0, 1.0, 3.0, 3.0, 2.0], "bins": [-1.9853515625, -1.9226837158203125, -1.860015869140625, -1.7973480224609375, -1.73468017578125, -1.6720123291015625, -1.609344482421875, -1.5466766357421875, -1.4840087890625, -1.4213409423828125, -1.358673095703125, -1.2960052490234375, -1.23333740234375, -1.1706695556640625, -1.108001708984375, -1.0453338623046875, -0.982666015625, -0.9199981689453125, -0.857330322265625, -0.7946624755859375, -0.73199462890625, -0.6693267822265625, -0.606658935546875, -0.5439910888671875, -0.4813232421875, -0.4186553955078125, -0.355987548828125, -0.2933197021484375, -0.23065185546875, -0.1679840087890625, -0.105316162109375, -0.0426483154296875, 0.02001953125, 0.0826873779296875, 0.145355224609375, 0.2080230712890625, 0.27069091796875, 0.3333587646484375, 0.396026611328125, 0.4586944580078125, 0.5213623046875, 0.5840301513671875, 0.646697998046875, 0.7093658447265625, 0.77203369140625, 0.8347015380859375, 0.897369384765625, 0.9600372314453125, 1.022705078125, 1.0853729248046875, 1.148040771484375, 1.2107086181640625, 1.27337646484375, 1.3360443115234375, 1.398712158203125, 1.4613800048828125, 1.5240478515625, 1.5867156982421875, 1.649383544921875, 1.7120513916015625, 1.77471923828125, 1.8373870849609375, 1.900054931640625, 1.9627227783203125, 2.025390625]}, "gradients/decoder.transformer.h.4.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 1.0, 0.0, 4.0, 3.0, 2.0, 2.0, 2.0, 3.0, 4.0, 7.0, 5.0, 12.0, 9.0, 10.0, 8.0, 15.0, 18.0, 37.0, 50.0, 84.0, 221.0, 180.0, 109.0, 50.0, 32.0, 22.0, 24.0, 12.0, 17.0, 10.0, 11.0, 9.0, 5.0, 7.0, 9.0, 8.0, 2.0, 4.0, 4.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.00011360645294189453, -0.00010992772877216339, -0.00010624900460243225, -0.00010257028043270111, -9.889155626296997e-05, -9.521283209323883e-05, -9.153410792350769e-05, -8.785538375377655e-05, -8.417665958404541e-05, -8.049793541431427e-05, -7.681921124458313e-05, -7.314048707485199e-05, -6.946176290512085e-05, -6.578303873538971e-05, -6.210431456565857e-05, -5.842559039592743e-05, -5.474686622619629e-05, -5.106814205646515e-05, -4.738941788673401e-05, -4.371069371700287e-05, -4.003196954727173e-05, -3.635324537754059e-05, -3.267452120780945e-05, -2.8995797038078308e-05, -2.5317072868347168e-05, -2.1638348698616028e-05, -1.7959624528884888e-05, -1.4280900359153748e-05, -1.0602176189422607e-05, -6.923452019691467e-06, -3.244727849960327e-06, 4.33996319770813e-07, 4.112720489501953e-06, 7.791444659233093e-06, 1.1470168828964233e-05, 1.5148892998695374e-05, 1.8827617168426514e-05, 2.2506341338157654e-05, 2.6185065507888794e-05, 2.9863789677619934e-05, 3.3542513847351074e-05, 3.7221238017082214e-05, 4.0899962186813354e-05, 4.4578686356544495e-05, 4.8257410526275635e-05, 5.1936134696006775e-05, 5.5614858865737915e-05, 5.9293583035469055e-05, 6.29723072052002e-05, 6.665103137493134e-05, 7.032975554466248e-05, 7.400847971439362e-05, 7.768720388412476e-05, 8.13659280538559e-05, 8.504465222358704e-05, 8.872337639331818e-05, 9.240210056304932e-05, 9.608082473278046e-05, 9.97595489025116e-05, 0.00010343827307224274, 0.00010711699724197388, 0.00011079572141170502, 0.00011447444558143616, 0.0001181531697511673, 0.00012183189392089844]}, "gradients/decoder.transformer.h.4.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 2.0, 0.0, 0.0, 1.0, 0.0, 3.0, 4.0, 0.0, 8.0, 11.0, 16.0, 17.0, 10.0, 28.0, 18.0, 34.0, 26.0, 54.0, 86.0, 133.0, 338.0, 3205.0, 158652.0, 878354.0, 6702.0, 465.0, 138.0, 70.0, 41.0, 41.0, 21.0, 16.0, 19.0, 10.0, 10.0, 3.0, 5.0, 6.0, 5.0, 4.0, 3.0, 4.0, 1.0, 0.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.0021724700927734375, -0.00210687518119812, -0.0020412802696228027, -0.0019756853580474854, -0.001910090446472168, -0.0018444955348968506, -0.0017789006233215332, -0.0017133057117462158, -0.0016477108001708984, -0.001582115888595581, -0.0015165209770202637, -0.0014509260654449463, -0.001385331153869629, -0.0013197362422943115, -0.0012541413307189941, -0.0011885464191436768, -0.0011229515075683594, -0.001057356595993042, -0.0009917616844177246, -0.0009261667728424072, -0.0008605718612670898, -0.0007949769496917725, -0.0007293820381164551, -0.0006637871265411377, -0.0005981922149658203, -0.0005325973033905029, -0.00046700239181518555, -0.00040140748023986816, -0.0003358125686645508, -0.0002702176570892334, -0.00020462274551391602, -0.00013902783393859863, -7.343292236328125e-05, -7.838010787963867e-06, 5.7756900787353516e-05, 0.0001233518123626709, 0.00018894672393798828, 0.00025454163551330566, 0.00032013654708862305, 0.00038573145866394043, 0.0004513263702392578, 0.0005169212818145752, 0.0005825161933898926, 0.00064811110496521, 0.0007137060165405273, 0.0007793009281158447, 0.0008448958396911621, 0.0009104907512664795, 0.0009760856628417969, 0.0010416805744171143, 0.0011072754859924316, 0.001172870397567749, 0.0012384653091430664, 0.0013040602207183838, 0.0013696551322937012, 0.0014352500438690186, 0.001500844955444336, 0.0015664398670196533, 0.0016320347785949707, 0.001697629690170288, 0.0017632246017456055, 0.0018288195133209229, 0.0018944144248962402, 0.0019600093364715576, 0.002025604248046875]}, "gradients/decoder.transformer.h.4.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 3.0, 3.0, 10.0, 25.0, 60.0, 148.0, 265.0, 297.0, 120.0, 48.0, 18.0, 17.0, 4.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.7190707593690604e-05, -5.1480183174135163e-05, -4.576965875457972e-05, -4.0059130697045475e-05, -3.434860991546884e-05, -2.8638083676923998e-05, -2.2927557438379154e-05, -1.7217033018823713e-05, -1.1506508599268273e-05, -5.795983724965481e-06, -8.545885066268966e-08, 5.625066478387453e-06, 1.1335590897942893e-05, 1.7046115317498334e-05, 2.2756641556043178e-05, 2.846716597559862e-05, 3.417769039515406e-05, 3.98882148147095e-05, 4.559873923426494e-05, 5.130926729179919e-05, 5.701978807337582e-05, 6.273031613091007e-05, 6.844084418844432e-05, 7.415136497002095e-05, 7.986188575159758e-05, 8.557241380913183e-05, 9.128293459070846e-05, 9.699346264824271e-05, 0.00010270398342981935, 0.00010841451148735359, 0.00011412503954488784, 0.00011983556032646447, 0.0001255460811080411, 0.00013125660188961774, 0.0001369671372231096, 0.00014267765800468624, 0.00014838817878626287, 0.0001540986995678395, 0.00015980923490133137, 0.000165519755682908, 0.00017123027646448463, 0.00017694079724606127, 0.00018265133257955313, 0.00018836185336112976, 0.0001940723741427064, 0.00019978289492428303, 0.0002054934302577749, 0.00021120395103935152, 0.00021691448637284338, 0.00022262500715442002, 0.00022833554248791188, 0.0002340460632694885, 0.00023975658405106515, 0.000245467119384557, 0.00025117764016613364, 0.0002568881609477103, 0.0002625986817292869, 0.00026830920251086354, 0.0002740197232924402, 0.0002797302440740168, 0.0002854407939594239, 0.00029115131474100053, 0.00029686183552257717, 0.0003025723563041538, 0.00030828287708573043]}, "gradients/decoder.transformer.h.4.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 2.0, 4.0, 2.0, 0.0, 2.0, 6.0, 6.0, 8.0, 5.0, 15.0, 14.0, 7.0, 15.0, 12.0, 19.0, 18.0, 15.0, 20.0, 17.0, 22.0, 31.0, 38.0, 38.0, 38.0, 29.0, 36.0, 41.0, 24.0, 34.0, 38.0, 28.0, 40.0, 22.0, 54.0, 36.0, 28.0, 32.0, 27.0, 28.0, 28.0, 22.0, 19.0, 14.0, 13.0, 19.0, 13.0, 14.0, 7.0, 5.0, 4.0, 5.0, 3.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.190206527709961e-05, -4.0628015995025635e-05, -3.935396671295166e-05, -3.8079917430877686e-05, -3.680586814880371e-05, -3.5531818866729736e-05, -3.425776958465576e-05, -3.298372030258179e-05, -3.170967102050781e-05, -3.0435621738433838e-05, -2.9161572456359863e-05, -2.788752317428589e-05, -2.6613473892211914e-05, -2.533942461013794e-05, -2.4065375328063965e-05, -2.279132604598999e-05, -2.1517276763916016e-05, -2.024322748184204e-05, -1.8969178199768066e-05, -1.7695128917694092e-05, -1.6421079635620117e-05, -1.5147030353546143e-05, -1.3872981071472168e-05, -1.2598931789398193e-05, -1.1324882507324219e-05, -1.0050833225250244e-05, -8.77678394317627e-06, -7.502734661102295e-06, -6.22868537902832e-06, -4.954636096954346e-06, -3.680586814880371e-06, -2.4065375328063965e-06, -1.1324882507324219e-06, 1.4156103134155273e-07, 1.4156103134155273e-06, 2.689659595489502e-06, 3.9637088775634766e-06, 5.237758159637451e-06, 6.511807441711426e-06, 7.7858567237854e-06, 9.059906005859375e-06, 1.033395528793335e-05, 1.1608004570007324e-05, 1.2882053852081299e-05, 1.4156103134155273e-05, 1.5430152416229248e-05, 1.6704201698303223e-05, 1.7978250980377197e-05, 1.9252300262451172e-05, 2.0526349544525146e-05, 2.180039882659912e-05, 2.3074448108673096e-05, 2.434849739074707e-05, 2.5622546672821045e-05, 2.689659595489502e-05, 2.8170645236968994e-05, 2.944469451904297e-05, 3.071874380111694e-05, 3.199279308319092e-05, 3.326684236526489e-05, 3.454089164733887e-05, 3.581494092941284e-05, 3.7088990211486816e-05, 3.836303949356079e-05, 3.9637088775634766e-05]}, "gradients/decoder.transformer.h.4.attn.c_proj.bias": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 1.0, 0.0, 1.0, 3.0, 4.0, 1.0, 6.0, 3.0, 6.0, 7.0, 8.0, 7.0, 11.0, 12.0, 18.0, 16.0, 16.0, 18.0, 25.0, 31.0, 31.0, 30.0, 34.0, 46.0, 35.0, 32.0, 42.0, 40.0, 46.0, 43.0, 30.0, 52.0, 37.0, 31.0, 35.0, 27.0, 25.0, 26.0, 24.0, 20.0, 21.0, 20.0, 13.0, 18.0, 12.0, 13.0, 11.0, 11.0, 1.0, 2.0, 3.0, 3.0, 4.0, 2.0, 0.0, 1.0, 1.0, 2.0, 1.0, 0.0, 1.0], "bins": [-66.875, -64.7587890625, -62.642578125, -60.5263671875, -58.41015625, -56.2939453125, -54.177734375, -52.0615234375, -49.9453125, -47.8291015625, -45.712890625, -43.5966796875, -41.48046875, -39.3642578125, -37.248046875, -35.1318359375, -33.015625, -30.8994140625, -28.783203125, -26.6669921875, -24.55078125, -22.4345703125, -20.318359375, -18.2021484375, -16.0859375, -13.9697265625, -11.853515625, -9.7373046875, -7.62109375, -5.5048828125, -3.388671875, -1.2724609375, 0.84375, 2.9599609375, 5.076171875, 7.1923828125, 9.30859375, 11.4248046875, 13.541015625, 15.6572265625, 17.7734375, 19.8896484375, 22.005859375, 24.1220703125, 26.23828125, 28.3544921875, 30.470703125, 32.5869140625, 34.703125, 36.8193359375, 38.935546875, 41.0517578125, 43.16796875, 45.2841796875, 47.400390625, 49.5166015625, 51.6328125, 53.7490234375, 55.865234375, 57.9814453125, 60.09765625, 62.2138671875, 64.330078125, 66.4462890625, 68.5625]}, "gradients/decoder.transformer.h.4.attn.c_proj.weight": {"_type": "histogram", "values": [3.0, 0.0, 3.0, 2.0, 1.0, 4.0, 7.0, 10.0, 13.0, 20.0, 25.0, 35.0, 47.0, 73.0, 91.0, 147.0, 193.0, 270.0, 376.0, 540.0, 718.0, 1038.0, 1485.0, 2048.0, 3003.0, 4672.0, 6930.0, 11527.0, 19534.0, 39042.0, 137256.0, 628873.0, 106528.0, 34832.0, 17935.0, 10702.0, 6611.0, 4285.0, 2929.0, 1969.0, 1386.0, 955.0, 706.0, 485.0, 360.0, 248.0, 200.0, 138.0, 85.0, 61.0, 53.0, 32.0, 31.0, 15.0, 17.0, 8.0, 6.0, 2.0, 6.0, 2.0, 1.0, 0.0, 0.0, 2.0], "bins": [-101.3125, -98.0888671875, -94.865234375, -91.6416015625, -88.41796875, -85.1943359375, -81.970703125, -78.7470703125, -75.5234375, -72.2998046875, -69.076171875, -65.8525390625, -62.62890625, -59.4052734375, -56.181640625, -52.9580078125, -49.734375, -46.5107421875, -43.287109375, -40.0634765625, -36.83984375, -33.6162109375, -30.392578125, -27.1689453125, -23.9453125, -20.7216796875, -17.498046875, -14.2744140625, -11.05078125, -7.8271484375, -4.603515625, -1.3798828125, 1.84375, 5.0673828125, 8.291015625, 11.5146484375, 14.73828125, 17.9619140625, 21.185546875, 24.4091796875, 27.6328125, 30.8564453125, 34.080078125, 37.3037109375, 40.52734375, 43.7509765625, 46.974609375, 50.1982421875, 53.421875, 56.6455078125, 59.869140625, 63.0927734375, 66.31640625, 69.5400390625, 72.763671875, 75.9873046875, 79.2109375, 82.4345703125, 85.658203125, 88.8818359375, 92.10546875, 95.3291015625, 98.552734375, 101.7763671875, 105.0]}, "gradients/decoder.transformer.h.4.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 2.0, 4.0, 4.0, 15.0, 17.0, 15.0, 14.0, 11.0, 28.0, 17.0, 27.0, 42.0, 45.0, 47.0, 56.0, 66.0, 94.0, 195.0, 1708.0, 213.0, 69.0, 57.0, 38.0, 37.0, 31.0, 48.0, 27.0, 31.0, 18.0, 24.0, 11.0, 15.0, 9.0, 4.0, 5.0, 5.0, 5.0, 3.0, 0.0, 3.0, 2.0, 2.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-165.125, -158.908203125, -152.69140625, -146.474609375, -140.2578125, -134.041015625, -127.82421875, -121.607421875, -115.390625, -109.173828125, -102.95703125, -96.740234375, -90.5234375, -84.306640625, -78.08984375, -71.873046875, -65.65625, -59.439453125, -53.22265625, -47.005859375, -40.7890625, -34.572265625, -28.35546875, -22.138671875, -15.921875, -9.705078125, -3.48828125, 2.728515625, 8.9453125, 15.162109375, 21.37890625, 27.595703125, 33.8125, 40.029296875, 46.24609375, 52.462890625, 58.6796875, 64.896484375, 71.11328125, 77.330078125, 83.546875, 89.763671875, 95.98046875, 102.197265625, 108.4140625, 114.630859375, 120.84765625, 127.064453125, 133.28125, 139.498046875, 145.71484375, 151.931640625, 158.1484375, 164.365234375, 170.58203125, 176.798828125, 183.015625, 189.232421875, 195.44921875, 201.666015625, 207.8828125, 214.099609375, 220.31640625, 226.533203125, 232.75]}, "gradients/decoder.transformer.h.4.attn.c_attn.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 2.0, 5.0, 3.0, 10.0, 12.0, 5.0, 10.0, 16.0, 22.0, 21.0, 45.0, 38.0, 60.0, 118.0, 252.0, 539.0, 1415.0, 5534.0, 29564.0, 2577258.0, 501921.0, 22089.0, 4455.0, 1238.0, 511.0, 209.0, 115.0, 75.0, 52.0, 44.0, 23.0, 23.0, 2.0, 4.0, 5.0, 4.0, 3.0, 6.0, 3.0, 1.0, 2.0, 3.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-310.0, -299.640625, -289.28125, -278.921875, -268.5625, -258.203125, -247.84375, -237.484375, -227.125, -216.765625, -206.40625, -196.046875, -185.6875, -175.328125, -164.96875, -154.609375, -144.25, -133.890625, -123.53125, -113.171875, -102.8125, -92.453125, -82.09375, -71.734375, -61.375, -51.015625, -40.65625, -30.296875, -19.9375, -9.578125, 0.78125, 11.140625, 21.5, 31.859375, 42.21875, 52.578125, 62.9375, 73.296875, 83.65625, 94.015625, 104.375, 114.734375, 125.09375, 135.453125, 145.8125, 156.171875, 166.53125, 176.890625, 187.25, 197.609375, 207.96875, 218.328125, 228.6875, 239.046875, 249.40625, 259.765625, 270.125, 280.484375, 290.84375, 301.203125, 311.5625, 321.921875, 332.28125, 342.640625, 353.0]}, "gradients/decoder.transformer.h.4.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 33.0, 185.0, 486.0, 243.0, 53.0, 11.0, 2.0, 3.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-704.7377319335938, -670.683837890625, -636.6298828125, -602.5759887695312, -568.5220947265625, -534.4682006835938, -500.4142761230469, -466.3603515625, -432.30645751953125, -398.2525634765625, -364.1986389160156, -330.14471435546875, -296.0908203125, -262.03692626953125, -227.98300170898438, -193.92909240722656, -159.87518310546875, -125.82127380371094, -91.76736450195312, -57.71345520019531, -23.6595458984375, 10.394363403320312, 44.448272705078125, 78.50218200683594, 112.55609130859375, 146.61000061035156, 180.66390991210938, 214.7178192138672, 248.771728515625, 282.82562255859375, 316.8795471191406, 350.9334716796875, 384.9874267578125, 419.04132080078125, 453.0952453613281, 487.149169921875, 521.2030639648438, 555.2569580078125, 589.3109130859375, 623.3648071289062, 657.418701171875, 691.4725952148438, 725.5264892578125, 759.5804443359375, 793.6343383789062, 827.688232421875, 861.7421875, 895.7960815429688, 929.8499755859375, 963.9038696289062, 997.957763671875, 1032.01171875, 1066.065673828125, 1100.1195068359375, 1134.1734619140625, 1168.227294921875, 1202.28125, 1236.335205078125, 1270.3890380859375, 1304.4429931640625, 1338.496826171875, 1372.55078125, 1406.604736328125, 1440.6585693359375, 1474.7125244140625]}, "gradients/decoder.transformer.h.4.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 3.0, 1.0, 2.0, 5.0, 4.0, 4.0, 10.0, 3.0, 12.0, 8.0, 17.0, 11.0, 16.0, 20.0, 14.0, 26.0, 21.0, 24.0, 29.0, 30.0, 34.0, 38.0, 51.0, 42.0, 34.0, 45.0, 43.0, 35.0, 45.0, 41.0, 30.0, 29.0, 31.0, 35.0, 30.0, 40.0, 28.0, 20.0, 21.0, 20.0, 10.0, 12.0, 9.0, 5.0, 6.0, 5.0, 4.0, 2.0, 2.0, 0.0, 3.0, 4.0, 1.0, 2.0, 3.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-327.2459411621094, -316.4234313964844, -305.60089111328125, -294.77838134765625, -283.95587158203125, -273.13336181640625, -262.31085205078125, -251.48831176757812, -240.66580200195312, -229.84329223632812, -219.02076721191406, -208.1982421875, -197.375732421875, -186.55322265625, -175.73069763183594, -164.90817260742188, -154.08566284179688, -143.26315307617188, -132.4406280517578, -121.61811065673828, -110.79559326171875, -99.97307586669922, -89.15055847167969, -78.32804107666016, -67.50552368164062, -56.683006286621094, -45.86048889160156, -35.03797149658203, -24.2154541015625, -13.392936706542969, -2.5704193115234375, 8.252098083496094, 19.074615478515625, 29.897132873535156, 40.71965026855469, 51.54216766357422, 62.36468505859375, 73.18720245361328, 84.00971984863281, 94.83223724365234, 105.65475463867188, 116.4772720336914, 127.29978942871094, 138.122314453125, 148.94482421875, 159.767333984375, 170.58985900878906, 181.41238403320312, 192.23489379882812, 203.05740356445312, 213.8799285888672, 224.70245361328125, 235.52496337890625, 246.34747314453125, 257.16998291015625, 267.9925231933594, 278.8150329589844, 289.6375427246094, 300.4600830078125, 311.2825927734375, 322.1051025390625, 332.9276123046875, 343.7501220703125, 354.5726623535156, 365.3951721191406]}, "gradients/decoder.transformer.h.3.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 2.0, 3.0, 2.0, 1.0, 6.0, 1.0, 2.0, 8.0, 9.0, 2.0, 9.0, 10.0, 14.0, 15.0, 16.0, 16.0, 23.0, 22.0, 28.0, 33.0, 33.0, 41.0, 35.0, 35.0, 41.0, 43.0, 41.0, 38.0, 47.0, 55.0, 35.0, 23.0, 36.0, 30.0, 32.0, 33.0, 22.0, 23.0, 17.0, 24.0, 21.0, 16.0, 13.0, 13.0, 13.0, 7.0, 7.0, 3.0, 7.0, 2.0, 2.0, 2.0, 3.0, 0.0, 3.0, 1.0, 1.0, 1.0, 0.0, 1.0], "bins": [-67.8125, -65.6669921875, -63.521484375, -61.3759765625, -59.23046875, -57.0849609375, -54.939453125, -52.7939453125, -50.6484375, -48.5029296875, -46.357421875, -44.2119140625, -42.06640625, -39.9208984375, -37.775390625, -35.6298828125, -33.484375, -31.3388671875, -29.193359375, -27.0478515625, -24.90234375, -22.7568359375, -20.611328125, -18.4658203125, -16.3203125, -14.1748046875, -12.029296875, -9.8837890625, -7.73828125, -5.5927734375, -3.447265625, -1.3017578125, 0.84375, 2.9892578125, 5.134765625, 7.2802734375, 9.42578125, 11.5712890625, 13.716796875, 15.8623046875, 18.0078125, 20.1533203125, 22.298828125, 24.4443359375, 26.58984375, 28.7353515625, 30.880859375, 33.0263671875, 35.171875, 37.3173828125, 39.462890625, 41.6083984375, 43.75390625, 45.8994140625, 48.044921875, 50.1904296875, 52.3359375, 54.4814453125, 56.626953125, 58.7724609375, 60.91796875, 63.0634765625, 65.208984375, 67.3544921875, 69.5]}, "gradients/decoder.transformer.h.3.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 3.0, 1.0, 1.0, 2.0, 3.0, 6.0, 10.0, 12.0, 5.0, 19.0, 17.0, 19.0, 20.0, 31.0, 35.0, 45.0, 69.0, 79.0, 125.0, 220.0, 413.0, 804.0, 1964.0, 5959.0, 30710.0, 4016625.0, 120930.0, 10551.0, 3045.0, 1181.0, 535.0, 259.0, 177.0, 95.0, 61.0, 55.0, 46.0, 27.0, 28.0, 9.0, 19.0, 17.0, 13.0, 15.0, 7.0, 7.0, 7.0, 7.0, 1.0, 1.0, 2.0, 4.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0], "bins": [-590.0, -571.390625, -552.78125, -534.171875, -515.5625, -496.953125, -478.34375, -459.734375, -441.125, -422.515625, -403.90625, -385.296875, -366.6875, -348.078125, -329.46875, -310.859375, -292.25, -273.640625, -255.03125, -236.421875, -217.8125, -199.203125, -180.59375, -161.984375, -143.375, -124.765625, -106.15625, -87.546875, -68.9375, -50.328125, -31.71875, -13.109375, 5.5, 24.109375, 42.71875, 61.328125, 79.9375, 98.546875, 117.15625, 135.765625, 154.375, 172.984375, 191.59375, 210.203125, 228.8125, 247.421875, 266.03125, 284.640625, 303.25, 321.859375, 340.46875, 359.078125, 377.6875, 396.296875, 414.90625, 433.515625, 452.125, 470.734375, 489.34375, 507.953125, 526.5625, 545.171875, 563.78125, 582.390625, 601.0]}, "gradients/decoder.transformer.h.3.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 3.0, 0.0, 1.0, 2.0, 4.0, 3.0, 5.0, 2.0, 4.0, 9.0, 9.0, 15.0, 19.0, 13.0, 27.0, 17.0, 36.0, 39.0, 66.0, 132.0, 274.0, 820.0, 1318.0, 617.0, 247.0, 108.0, 83.0, 48.0, 41.0, 21.0, 18.0, 21.0, 15.0, 11.0, 5.0, 6.0, 4.0, 6.0, 4.0, 3.0, 0.0, 7.0, 1.0, 2.0, 1.0, 1.0, 0.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-123.25, -118.904296875, -114.55859375, -110.212890625, -105.8671875, -101.521484375, -97.17578125, -92.830078125, -88.484375, -84.138671875, -79.79296875, -75.447265625, -71.1015625, -66.755859375, -62.41015625, -58.064453125, -53.71875, -49.373046875, -45.02734375, -40.681640625, -36.3359375, -31.990234375, -27.64453125, -23.298828125, -18.953125, -14.607421875, -10.26171875, -5.916015625, -1.5703125, 2.775390625, 7.12109375, 11.466796875, 15.8125, 20.158203125, 24.50390625, 28.849609375, 33.1953125, 37.541015625, 41.88671875, 46.232421875, 50.578125, 54.923828125, 59.26953125, 63.615234375, 67.9609375, 72.306640625, 76.65234375, 80.998046875, 85.34375, 89.689453125, 94.03515625, 98.380859375, 102.7265625, 107.072265625, 111.41796875, 115.763671875, 120.109375, 124.455078125, 128.80078125, 133.146484375, 137.4921875, 141.837890625, 146.18359375, 150.529296875, 154.875]}, "gradients/decoder.transformer.h.3.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 3.0, 4.0, 3.0, 7.0, 3.0, 16.0, 13.0, 22.0, 35.0, 47.0, 114.0, 163.0, 314.0, 685.0, 1462.0, 3394.0, 8928.0, 27804.0, 142512.0, 3791198.0, 170852.0, 30358.0, 9591.0, 3727.0, 1541.0, 707.0, 375.0, 178.0, 87.0, 60.0, 26.0, 23.0, 12.0, 8.0, 10.0, 3.0, 3.0, 2.0, 2.0, 3.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-272.0, -261.34765625, -250.6953125, -240.04296875, -229.390625, -218.73828125, -208.0859375, -197.43359375, -186.78125, -176.12890625, -165.4765625, -154.82421875, -144.171875, -133.51953125, -122.8671875, -112.21484375, -101.5625, -90.91015625, -80.2578125, -69.60546875, -58.953125, -48.30078125, -37.6484375, -26.99609375, -16.34375, -5.69140625, 4.9609375, 15.61328125, 26.265625, 36.91796875, 47.5703125, 58.22265625, 68.875, 79.52734375, 90.1796875, 100.83203125, 111.484375, 122.13671875, 132.7890625, 143.44140625, 154.09375, 164.74609375, 175.3984375, 186.05078125, 196.703125, 207.35546875, 218.0078125, 228.66015625, 239.3125, 249.96484375, 260.6171875, 271.26953125, 281.921875, 292.57421875, 303.2265625, 313.87890625, 324.53125, 335.18359375, 345.8359375, 356.48828125, 367.140625, 377.79296875, 388.4453125, 399.09765625, 409.75]}, "gradients/decoder.transformer.h.3.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 4.0, 17.0, 49.0, 69.0, 222.0, 367.0, 185.0, 59.0, 20.0, 14.0, 4.0, 4.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-2131.30419921875, -2089.8017578125, -2048.29931640625, -2006.796875, -1965.29443359375, -1923.7919921875, -1882.28955078125, -1840.787109375, -1799.28466796875, -1757.7822265625, -1716.27978515625, -1674.77734375, -1633.27490234375, -1591.7724609375, -1550.27001953125, -1508.767578125, -1467.2650146484375, -1425.7625732421875, -1384.2601318359375, -1342.7576904296875, -1301.2552490234375, -1259.7528076171875, -1218.2503662109375, -1176.747802734375, -1135.245361328125, -1093.742919921875, -1052.240478515625, -1010.738037109375, -969.235595703125, -927.733154296875, -886.2306518554688, -844.7282104492188, -803.225830078125, -761.723388671875, -720.220947265625, -678.718505859375, -637.216064453125, -595.713623046875, -554.2111206054688, -512.7086791992188, -471.20623779296875, -429.70379638671875, -388.20135498046875, -346.6988830566406, -305.1964416503906, -263.6940002441406, -222.19154357910156, -180.6890869140625, -139.1866455078125, -97.68419647216797, -56.18174743652344, -14.679298400878906, 26.823150634765625, 68.32559204101562, 109.82804870605469, 151.33050537109375, 192.83294677734375, 234.33538818359375, 275.83782958984375, 317.3403015136719, 358.8427429199219, 400.3451843261719, 441.84765625, 483.35009765625, 524.8525390625]}, "gradients/decoder.transformer.h.3.ln_2.bias": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 1.0, 1.0, 2.0, 2.0, 5.0, 5.0, 9.0, 4.0, 5.0, 4.0, 8.0, 10.0, 15.0, 20.0, 18.0, 12.0, 29.0, 23.0, 38.0, 36.0, 30.0, 35.0, 36.0, 35.0, 47.0, 38.0, 45.0, 40.0, 48.0, 38.0, 43.0, 42.0, 37.0, 31.0, 29.0, 26.0, 25.0, 25.0, 15.0, 15.0, 19.0, 11.0, 11.0, 6.0, 9.0, 8.0, 9.0, 5.0, 4.0, 2.0, 2.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-319.42138671875, -308.74334716796875, -298.0653076171875, -287.38726806640625, -276.709228515625, -266.03118896484375, -255.3531494140625, -244.67510986328125, -233.9970703125, -223.31903076171875, -212.6409912109375, -201.96295166015625, -191.284912109375, -180.60687255859375, -169.9288330078125, -159.25079345703125, -148.57273864746094, -137.8946990966797, -127.21665954589844, -116.53861999511719, -105.86058044433594, -95.18254089355469, -84.5044937133789, -73.82645416259766, -63.148414611816406, -52.470375061035156, -41.792335510253906, -31.11429214477539, -20.43625259399414, -9.75821304321289, 0.919830322265625, 11.597869873046875, 22.275909423828125, 32.953948974609375, 43.631988525390625, 54.31003189086914, 64.98806762695312, 75.66610717773438, 86.34415435791016, 97.0221939086914, 107.70023345947266, 118.3782730102539, 129.0563201904297, 139.73435974121094, 150.4123992919922, 161.09043884277344, 171.7684783935547, 182.44651794433594, 193.1245574951172, 203.80259704589844, 214.4806365966797, 225.15867614746094, 235.8367156982422, 246.51475524902344, 257.19281005859375, 267.870849609375, 278.54888916015625, 289.2269287109375, 299.90496826171875, 310.5830078125, 321.26104736328125, 331.9390869140625, 342.61712646484375, 353.295166015625, 363.97320556640625]}, "gradients/decoder.transformer.h.3.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 2.0, 3.0, 1.0, 0.0, 0.0, 4.0, 4.0, 2.0, 7.0, 10.0, 9.0, 4.0, 10.0, 5.0, 10.0, 15.0, 23.0, 30.0, 18.0, 33.0, 30.0, 30.0, 35.0, 36.0, 31.0, 38.0, 50.0, 29.0, 44.0, 30.0, 53.0, 37.0, 43.0, 39.0, 44.0, 22.0, 31.0, 32.0, 19.0, 25.0, 22.0, 12.0, 20.0, 7.0, 15.0, 13.0, 7.0, 10.0, 5.0, 5.0, 5.0, 2.0, 3.0, 3.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0], "bins": [-71.125, -68.962890625, -66.80078125, -64.638671875, -62.4765625, -60.314453125, -58.15234375, -55.990234375, -53.828125, -51.666015625, -49.50390625, -47.341796875, -45.1796875, -43.017578125, -40.85546875, -38.693359375, -36.53125, -34.369140625, -32.20703125, -30.044921875, -27.8828125, -25.720703125, -23.55859375, -21.396484375, -19.234375, -17.072265625, -14.91015625, -12.748046875, -10.5859375, -8.423828125, -6.26171875, -4.099609375, -1.9375, 0.224609375, 2.38671875, 4.548828125, 6.7109375, 8.873046875, 11.03515625, 13.197265625, 15.359375, 17.521484375, 19.68359375, 21.845703125, 24.0078125, 26.169921875, 28.33203125, 30.494140625, 32.65625, 34.818359375, 36.98046875, 39.142578125, 41.3046875, 43.466796875, 45.62890625, 47.791015625, 49.953125, 52.115234375, 54.27734375, 56.439453125, 58.6015625, 60.763671875, 62.92578125, 65.087890625, 67.25]}, "gradients/decoder.transformer.h.3.crossattention.c_proj.weight": {"_type": "histogram", "values": [3.0, 2.0, 3.0, 5.0, 7.0, 13.0, 18.0, 24.0, 28.0, 45.0, 70.0, 92.0, 140.0, 196.0, 304.0, 376.0, 548.0, 789.0, 1071.0, 1568.0, 2209.0, 3186.0, 4395.0, 6736.0, 9806.0, 14510.0, 22297.0, 34660.0, 55823.0, 96733.0, 211033.0, 291966.0, 112775.0, 63265.0, 38687.0, 24404.0, 16158.0, 10694.0, 7353.0, 5035.0, 3447.0, 2330.0, 1706.0, 1232.0, 864.0, 543.0, 427.0, 273.0, 198.0, 188.0, 99.0, 65.0, 59.0, 41.0, 18.0, 9.0, 15.0, 13.0, 9.0, 6.0, 2.0, 2.0, 1.0, 2.0], "bins": [-2.517578125, -2.436676025390625, -2.35577392578125, -2.274871826171875, -2.1939697265625, -2.113067626953125, -2.03216552734375, -1.951263427734375, -1.870361328125, -1.789459228515625, -1.70855712890625, -1.627655029296875, -1.5467529296875, -1.465850830078125, -1.38494873046875, -1.304046630859375, -1.22314453125, -1.142242431640625, -1.06134033203125, -0.980438232421875, -0.8995361328125, -0.818634033203125, -0.73773193359375, -0.656829833984375, -0.575927734375, -0.495025634765625, -0.41412353515625, -0.333221435546875, -0.2523193359375, -0.171417236328125, -0.09051513671875, -0.009613037109375, 0.0712890625, 0.152191162109375, 0.23309326171875, 0.313995361328125, 0.3948974609375, 0.475799560546875, 0.55670166015625, 0.637603759765625, 0.718505859375, 0.799407958984375, 0.88031005859375, 0.961212158203125, 1.0421142578125, 1.123016357421875, 1.20391845703125, 1.284820556640625, 1.36572265625, 1.446624755859375, 1.52752685546875, 1.608428955078125, 1.6893310546875, 1.770233154296875, 1.85113525390625, 1.932037353515625, 2.012939453125, 2.093841552734375, 2.17474365234375, 2.255645751953125, 2.3365478515625, 2.417449951171875, 2.49835205078125, 2.579254150390625, 2.66015625]}, "gradients/decoder.transformer.h.3.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 5.0, 1.0, 4.0, 3.0, 4.0, 5.0, 12.0, 6.0, 7.0, 19.0, 15.0, 13.0, 15.0, 21.0, 21.0, 37.0, 36.0, 35.0, 31.0, 39.0, 48.0, 46.0, 51.0, 34.0, 1066.0, 39.0, 37.0, 41.0, 30.0, 29.0, 31.0, 31.0, 28.0, 32.0, 27.0, 21.0, 17.0, 20.0, 10.0, 12.0, 9.0, 9.0, 9.0, 9.0, 6.0, 6.0, 8.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 0.0, 1.0], "bins": [-46.9375, -45.49951171875, -44.0615234375, -42.62353515625, -41.185546875, -39.74755859375, -38.3095703125, -36.87158203125, -35.43359375, -33.99560546875, -32.5576171875, -31.11962890625, -29.681640625, -28.24365234375, -26.8056640625, -25.36767578125, -23.9296875, -22.49169921875, -21.0537109375, -19.61572265625, -18.177734375, -16.73974609375, -15.3017578125, -13.86376953125, -12.42578125, -10.98779296875, -9.5498046875, -8.11181640625, -6.673828125, -5.23583984375, -3.7978515625, -2.35986328125, -0.921875, 0.51611328125, 1.9541015625, 3.39208984375, 4.830078125, 6.26806640625, 7.7060546875, 9.14404296875, 10.58203125, 12.02001953125, 13.4580078125, 14.89599609375, 16.333984375, 17.77197265625, 19.2099609375, 20.64794921875, 22.0859375, 23.52392578125, 24.9619140625, 26.39990234375, 27.837890625, 29.27587890625, 30.7138671875, 32.15185546875, 33.58984375, 35.02783203125, 36.4658203125, 37.90380859375, 39.341796875, 40.77978515625, 42.2177734375, 43.65576171875, 45.09375]}, "gradients/decoder.transformer.h.3.crossattention.c_attn.weight": {"_type": "histogram", "values": [2.0, 3.0, 4.0, 4.0, 8.0, 11.0, 11.0, 10.0, 28.0, 35.0, 57.0, 79.0, 85.0, 144.0, 208.0, 317.0, 377.0, 609.0, 832.0, 1128.0, 1770.0, 2576.0, 3761.0, 5483.0, 8023.0, 11941.0, 18491.0, 28416.0, 44506.0, 72216.0, 130092.0, 1359963.0, 168103.0, 87721.0, 52414.0, 33221.0, 21463.0, 13834.0, 9282.0, 6213.0, 4262.0, 2927.0, 1918.0, 1421.0, 930.0, 682.0, 486.0, 302.0, 223.0, 171.0, 119.0, 84.0, 63.0, 26.0, 22.0, 24.0, 16.0, 14.0, 8.0, 2.0, 3.0, 3.0, 3.0, 2.0], "bins": [-1.8828125, -1.8233642578125, -1.763916015625, -1.7044677734375, -1.64501953125, -1.5855712890625, -1.526123046875, -1.4666748046875, -1.4072265625, -1.3477783203125, -1.288330078125, -1.2288818359375, -1.16943359375, -1.1099853515625, -1.050537109375, -0.9910888671875, -0.931640625, -0.8721923828125, -0.812744140625, -0.7532958984375, -0.69384765625, -0.6343994140625, -0.574951171875, -0.5155029296875, -0.4560546875, -0.3966064453125, -0.337158203125, -0.2777099609375, -0.21826171875, -0.1588134765625, -0.099365234375, -0.0399169921875, 0.01953125, 0.0789794921875, 0.138427734375, 0.1978759765625, 0.25732421875, 0.3167724609375, 0.376220703125, 0.4356689453125, 0.4951171875, 0.5545654296875, 0.614013671875, 0.6734619140625, 0.73291015625, 0.7923583984375, 0.851806640625, 0.9112548828125, 0.970703125, 1.0301513671875, 1.089599609375, 1.1490478515625, 1.20849609375, 1.2679443359375, 1.327392578125, 1.3868408203125, 1.4462890625, 1.5057373046875, 1.565185546875, 1.6246337890625, 1.68408203125, 1.7435302734375, 1.802978515625, 1.8624267578125, 1.921875]}, "gradients/decoder.transformer.h.3.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 2.0, 3.0, 1.0, 0.0, 1.0, 2.0, 2.0, 2.0, 1.0, 6.0, 4.0, 7.0, 5.0, 10.0, 8.0, 8.0, 18.0, 9.0, 17.0, 19.0, 26.0, 33.0, 41.0, 55.0, 77.0, 98.0, 128.0, 98.0, 69.0, 45.0, 42.0, 28.0, 36.0, 15.0, 12.0, 15.0, 10.0, 8.0, 13.0, 8.0, 4.0, 5.0, 6.0, 3.0, 2.0, 3.0, 3.0, 2.0, 1.0, 0.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 0.0, 2.0], "bins": [-9.679794311523438e-05, -9.380653500556946e-05, -9.081512689590454e-05, -8.782371878623962e-05, -8.483231067657471e-05, -8.184090256690979e-05, -7.884949445724487e-05, -7.585808634757996e-05, -7.286667823791504e-05, -6.987527012825012e-05, -6.68838620185852e-05, -6.389245390892029e-05, -6.090104579925537e-05, -5.7909637689590454e-05, -5.491822957992554e-05, -5.192682147026062e-05, -4.89354133605957e-05, -4.5944005250930786e-05, -4.295259714126587e-05, -3.996118903160095e-05, -3.6969780921936035e-05, -3.397837281227112e-05, -3.09869647026062e-05, -2.7995556592941284e-05, -2.5004148483276367e-05, -2.201274037361145e-05, -1.9021332263946533e-05, -1.6029924154281616e-05, -1.30385160446167e-05, -1.0047107934951782e-05, -7.055699825286865e-06, -4.064291715621948e-06, -1.0728836059570312e-06, 1.9185245037078857e-06, 4.909932613372803e-06, 7.90134072303772e-06, 1.0892748832702637e-05, 1.3884156942367554e-05, 1.687556505203247e-05, 1.9866973161697388e-05, 2.2858381271362305e-05, 2.584978938102722e-05, 2.884119749069214e-05, 3.1832605600357056e-05, 3.482401371002197e-05, 3.781542181968689e-05, 4.080682992935181e-05, 4.3798238039016724e-05, 4.678964614868164e-05, 4.978105425834656e-05, 5.2772462368011475e-05, 5.576387047767639e-05, 5.875527858734131e-05, 6.174668669700623e-05, 6.473809480667114e-05, 6.772950291633606e-05, 7.072091102600098e-05, 7.37123191356659e-05, 7.670372724533081e-05, 7.969513535499573e-05, 8.268654346466064e-05, 8.567795157432556e-05, 8.866935968399048e-05, 9.16607677936554e-05, 9.465217590332031e-05]}, "gradients/decoder.transformer.h.3.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 4.0, 1.0, 1.0, 2.0, 4.0, 1.0, 2.0, 2.0, 3.0, 3.0, 6.0, 6.0, 7.0, 8.0, 9.0, 19.0, 16.0, 15.0, 20.0, 25.0, 28.0, 40.0, 54.0, 66.0, 142.0, 220.0, 456.0, 2275.0, 27441.0, 943688.0, 68616.0, 3986.0, 648.0, 235.0, 139.0, 90.0, 52.0, 44.0, 43.0, 25.0, 24.0, 18.0, 12.0, 15.0, 13.0, 12.0, 2.0, 5.0, 6.0, 4.0, 0.0, 2.0, 3.0, 5.0, 2.0, 2.0, 2.0, 2.0, 1.0, 2.0, 0.0, 1.0], "bins": [-0.0015115737915039062, -0.0014637857675552368, -0.0014159977436065674, -0.001368209719657898, -0.0013204216957092285, -0.001272633671760559, -0.0012248456478118896, -0.0011770576238632202, -0.0011292695999145508, -0.0010814815759658813, -0.001033693552017212, -0.0009859055280685425, -0.000938117504119873, -0.0008903294801712036, -0.0008425414562225342, -0.0007947534322738647, -0.0007469654083251953, -0.0006991773843765259, -0.0006513893604278564, -0.000603601336479187, -0.0005558133125305176, -0.0005080252885818481, -0.0004602372646331787, -0.0004124492406845093, -0.00036466121673583984, -0.0003168731927871704, -0.000269085168838501, -0.00022129714488983154, -0.0001735091209411621, -0.00012572109699249268, -7.793307304382324e-05, -3.014504909515381e-05, 1.7642974853515625e-05, 6.543099880218506e-05, 0.00011321902275085449, 0.00016100704669952393, 0.00020879507064819336, 0.0002565830945968628, 0.0003043711185455322, 0.00035215914249420166, 0.0003999471664428711, 0.00044773519039154053, 0.00049552321434021, 0.0005433112382888794, 0.0005910992622375488, 0.0006388872861862183, 0.0006866753101348877, 0.0007344633340835571, 0.0007822513580322266, 0.000830039381980896, 0.0008778274059295654, 0.0009256154298782349, 0.0009734034538269043, 0.0010211914777755737, 0.0010689795017242432, 0.0011167675256729126, 0.001164555549621582, 0.0012123435735702515, 0.001260131597518921, 0.0013079196214675903, 0.0013557076454162598, 0.0014034956693649292, 0.0014512836933135986, 0.001499071717262268, 0.0015468597412109375]}, "gradients/decoder.transformer.h.3.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 4.0, 3.0, 13.0, 29.0, 39.0, 59.0, 116.0, 253.0, 249.0, 124.0, 59.0, 25.0, 18.0, 12.0, 9.0, 3.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.00020603692973963916, -0.00020183276501484215, -0.00019762858573812991, -0.0001934244210133329, -0.00018922024173662066, -0.00018501607701182365, -0.00018081189773511142, -0.0001766077330103144, -0.00017240355373360217, -0.00016819938900880516, -0.00016399520973209292, -0.0001597910450072959, -0.00015558686573058367, -0.00015138270100578666, -0.00014717852172907442, -0.0001429743570042774, -0.00013877017772756517, -0.00013456601300276816, -0.00013036183372605592, -0.0001261576690012589, -0.00012195348972454667, -0.00011774931772379205, -0.00011354514572303742, -0.00010934098099824041, -0.0001051368162734434, -0.00010093264427268878, -9.672847227193415e-05, -9.252430027117953e-05, -8.83201282704249e-05, -8.411595626967028e-05, -7.991178426891565e-05, -7.570761954411864e-05, -7.150344754336402e-05, -6.72992755426094e-05, -6.309510354185477e-05, -5.8890931541100144e-05, -5.468675954034552e-05, -5.0482587539590895e-05, -4.627841917681508e-05, -4.207424717606045e-05, -3.787007517530583e-05, -3.3665903174551204e-05, -2.946173117379658e-05, -2.5257560992031358e-05, -2.1053388991276734e-05, -1.684921699052211e-05, -1.2645046808756888e-05, -8.440874808002263e-06, -4.236702807247639e-06, -3.2531261240364984e-08, 4.171640284766909e-06, 8.375811376026832e-06, 1.2579983376781456e-05, 1.678415537753608e-05, 2.0988325559301302e-05, 2.5192497560055926e-05, 2.939666956081055e-05, 3.3600841561565176e-05, 3.78050135623198e-05, 4.200918192509562e-05, 4.621335392585024e-05, 5.041752592660487e-05, 5.462169792735949e-05, 5.8825869928114116e-05, 6.303004192886874e-05]}, "gradients/decoder.transformer.h.3.ln_cross_attn.bias": {"_type": "histogram", "values": [2.0, 1.0, 2.0, 2.0, 1.0, 1.0, 0.0, 2.0, 9.0, 3.0, 5.0, 6.0, 7.0, 8.0, 7.0, 15.0, 11.0, 17.0, 23.0, 26.0, 22.0, 21.0, 35.0, 26.0, 30.0, 44.0, 32.0, 43.0, 30.0, 41.0, 34.0, 41.0, 35.0, 40.0, 29.0, 37.0, 46.0, 32.0, 29.0, 22.0, 29.0, 28.0, 23.0, 16.0, 17.0, 18.0, 11.0, 11.0, 8.0, 9.0, 9.0, 6.0, 4.0, 3.0, 3.0, 3.0, 4.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 1.0], "bins": [-4.202127456665039e-05, -4.06680628657341e-05, -3.931485116481781e-05, -3.796163946390152e-05, -3.660842776298523e-05, -3.525521606206894e-05, -3.390200436115265e-05, -3.254879266023636e-05, -3.119558095932007e-05, -2.9842369258403778e-05, -2.8489157557487488e-05, -2.7135945856571198e-05, -2.5782734155654907e-05, -2.4429522454738617e-05, -2.3076310753822327e-05, -2.1723099052906036e-05, -2.0369887351989746e-05, -1.9016675651073456e-05, -1.7663463950157166e-05, -1.6310252249240875e-05, -1.4957040548324585e-05, -1.3603828847408295e-05, -1.2250617146492004e-05, -1.0897405445575714e-05, -9.544193744659424e-06, -8.190982043743134e-06, -6.837770342826843e-06, -5.484558641910553e-06, -4.131346940994263e-06, -2.7781352400779724e-06, -1.4249235391616821e-06, -7.171183824539185e-08, 1.2814998626708984e-06, 2.6347115635871887e-06, 3.987923264503479e-06, 5.341134965419769e-06, 6.6943466663360596e-06, 8.04755836725235e-06, 9.40077006816864e-06, 1.075398176908493e-05, 1.210719347000122e-05, 1.3460405170917511e-05, 1.4813616871833801e-05, 1.616682857275009e-05, 1.7520040273666382e-05, 1.8873251974582672e-05, 2.0226463675498962e-05, 2.1579675376415253e-05, 2.2932887077331543e-05, 2.4286098778247833e-05, 2.5639310479164124e-05, 2.6992522180080414e-05, 2.8345733880996704e-05, 2.9698945581912994e-05, 3.1052157282829285e-05, 3.2405368983745575e-05, 3.3758580684661865e-05, 3.5111792385578156e-05, 3.6465004086494446e-05, 3.7818215787410736e-05, 3.9171427488327026e-05, 4.052463918924332e-05, 4.187785089015961e-05, 4.32310625910759e-05, 4.458427429199219e-05]}, "gradients/decoder.transformer.h.3.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 2.0, 3.0, 1.0, 0.0, 0.0, 4.0, 4.0, 2.0, 7.0, 10.0, 9.0, 4.0, 10.0, 5.0, 10.0, 15.0, 23.0, 30.0, 18.0, 33.0, 30.0, 30.0, 35.0, 36.0, 31.0, 38.0, 50.0, 29.0, 44.0, 30.0, 53.0, 37.0, 43.0, 39.0, 44.0, 22.0, 31.0, 32.0, 19.0, 25.0, 22.0, 12.0, 20.0, 7.0, 15.0, 13.0, 7.0, 10.0, 5.0, 5.0, 5.0, 2.0, 3.0, 3.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0], "bins": [-71.125, -68.962890625, -66.80078125, -64.638671875, -62.4765625, -60.314453125, -58.15234375, -55.990234375, -53.828125, -51.666015625, -49.50390625, -47.341796875, -45.1796875, -43.017578125, -40.85546875, -38.693359375, -36.53125, -34.369140625, -32.20703125, -30.044921875, -27.8828125, -25.720703125, -23.55859375, -21.396484375, -19.234375, -17.072265625, -14.91015625, -12.748046875, -10.5859375, -8.423828125, -6.26171875, -4.099609375, -1.9375, 0.224609375, 2.38671875, 4.548828125, 6.7109375, 8.873046875, 11.03515625, 13.197265625, 15.359375, 17.521484375, 19.68359375, 21.845703125, 24.0078125, 26.169921875, 28.33203125, 30.494140625, 32.65625, 34.818359375, 36.98046875, 39.142578125, 41.3046875, 43.466796875, 45.62890625, 47.791015625, 49.953125, 52.115234375, 54.27734375, 56.439453125, 58.6015625, 60.763671875, 62.92578125, 65.087890625, 67.25]}, "gradients/decoder.transformer.h.3.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 3.0, 1.0, 7.0, 3.0, 7.0, 9.0, 8.0, 12.0, 20.0, 31.0, 45.0, 70.0, 60.0, 89.0, 120.0, 189.0, 231.0, 298.0, 401.0, 578.0, 789.0, 1198.0, 1670.0, 2524.0, 4132.0, 6749.0, 12457.0, 25729.0, 66023.0, 235868.0, 480805.0, 123767.0, 41666.0, 18003.0, 9303.0, 5341.0, 3282.0, 2044.0, 1462.0, 983.0, 665.0, 508.0, 380.0, 252.0, 231.0, 141.0, 123.0, 80.0, 50.0, 45.0, 36.0, 29.0, 13.0, 16.0, 6.0, 9.0, 5.0, 4.0, 3.0, 0.0, 0.0, 1.0], "bins": [-119.375, -115.67578125, -111.9765625, -108.27734375, -104.578125, -100.87890625, -97.1796875, -93.48046875, -89.78125, -86.08203125, -82.3828125, -78.68359375, -74.984375, -71.28515625, -67.5859375, -63.88671875, -60.1875, -56.48828125, -52.7890625, -49.08984375, -45.390625, -41.69140625, -37.9921875, -34.29296875, -30.59375, -26.89453125, -23.1953125, -19.49609375, -15.796875, -12.09765625, -8.3984375, -4.69921875, -1.0, 2.69921875, 6.3984375, 10.09765625, 13.796875, 17.49609375, 21.1953125, 24.89453125, 28.59375, 32.29296875, 35.9921875, 39.69140625, 43.390625, 47.08984375, 50.7890625, 54.48828125, 58.1875, 61.88671875, 65.5859375, 69.28515625, 72.984375, 76.68359375, 80.3828125, 84.08203125, 87.78125, 91.48046875, 95.1796875, 98.87890625, 102.578125, 106.27734375, 109.9765625, 113.67578125, 117.375]}, "gradients/decoder.transformer.h.3.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0, 5.0, 5.0, 6.0, 4.0, 7.0, 14.0, 13.0, 14.0, 21.0, 30.0, 33.0, 32.0, 52.0, 42.0, 50.0, 79.0, 132.0, 446.0, 1505.0, 166.0, 81.0, 56.0, 55.0, 46.0, 32.0, 31.0, 18.0, 21.0, 13.0, 16.0, 7.0, 8.0, 4.0, 3.0, 5.0, 2.0, 4.0, 0.0, 1.0, 2.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-198.75, -192.3515625, -185.953125, -179.5546875, -173.15625, -166.7578125, -160.359375, -153.9609375, -147.5625, -141.1640625, -134.765625, -128.3671875, -121.96875, -115.5703125, -109.171875, -102.7734375, -96.375, -89.9765625, -83.578125, -77.1796875, -70.78125, -64.3828125, -57.984375, -51.5859375, -45.1875, -38.7890625, -32.390625, -25.9921875, -19.59375, -13.1953125, -6.796875, -0.3984375, 6.0, 12.3984375, 18.796875, 25.1953125, 31.59375, 37.9921875, 44.390625, 50.7890625, 57.1875, 63.5859375, 69.984375, 76.3828125, 82.78125, 89.1796875, 95.578125, 101.9765625, 108.375, 114.7734375, 121.171875, 127.5703125, 133.96875, 140.3671875, 146.765625, 153.1640625, 159.5625, 165.9609375, 172.359375, 178.7578125, 185.15625, 191.5546875, 197.953125, 204.3515625, 210.75]}, "gradients/decoder.transformer.h.3.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 3.0, 1.0, 0.0, 1.0, 5.0, 2.0, 5.0, 6.0, 10.0, 12.0, 8.0, 18.0, 35.0, 41.0, 56.0, 97.0, 192.0, 583.0, 4001.0, 3062483.0, 75944.0, 1434.0, 356.0, 153.0, 76.0, 48.0, 37.0, 27.0, 25.0, 16.0, 14.0, 8.0, 8.0, 2.0, 0.0, 4.0, 1.0, 3.0, 0.0, 3.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-767.0, -741.2578125, -715.515625, -689.7734375, -664.03125, -638.2890625, -612.546875, -586.8046875, -561.0625, -535.3203125, -509.578125, -483.8359375, -458.09375, -432.3515625, -406.609375, -380.8671875, -355.125, -329.3828125, -303.640625, -277.8984375, -252.15625, -226.4140625, -200.671875, -174.9296875, -149.1875, -123.4453125, -97.703125, -71.9609375, -46.21875, -20.4765625, 5.265625, 31.0078125, 56.75, 82.4921875, 108.234375, 133.9765625, 159.71875, 185.4609375, 211.203125, 236.9453125, 262.6875, 288.4296875, 314.171875, 339.9140625, 365.65625, 391.3984375, 417.140625, 442.8828125, 468.625, 494.3671875, 520.109375, 545.8515625, 571.59375, 597.3359375, 623.078125, 648.8203125, 674.5625, 700.3046875, 726.046875, 751.7890625, 777.53125, 803.2734375, 829.015625, 854.7578125, 880.5]}, "gradients/decoder.transformer.h.3.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 4.0, 9.0, 35.0, 178.0, 397.0, 270.0, 91.0, 24.0, 6.0, 4.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-430.9970397949219, -399.5403747558594, -368.0837097167969, -336.6270446777344, -305.1703796386719, -273.7137451171875, -242.25706481933594, -210.80039978027344, -179.34373474121094, -147.88706970214844, -116.43040466308594, -84.97374725341797, -53.51708221435547, -22.0604248046875, 9.396240234375, 40.8529052734375, 72.3095703125, 103.7662353515625, 135.222900390625, 166.6795654296875, 198.13623046875, 229.59288024902344, 261.049560546875, 292.5062255859375, 323.962890625, 355.4195556640625, 386.876220703125, 418.3328857421875, 449.78955078125, 481.2462158203125, 512.702880859375, 544.1595458984375, 575.6161499023438, 607.0728149414062, 638.5294799804688, 669.9861450195312, 701.4428100585938, 732.8994750976562, 764.3561401367188, 795.8128051757812, 827.2694702148438, 858.7261352539062, 890.1828002929688, 921.6394653320312, 953.0961303710938, 984.5527954101562, 1016.0094604492188, 1047.466064453125, 1078.9227294921875, 1110.37939453125, 1141.8360595703125, 1173.292724609375, 1204.7493896484375, 1236.2060546875, 1267.6627197265625, 1299.119384765625, 1330.5760498046875, 1362.03271484375, 1393.4893798828125, 1424.946044921875, 1456.4027099609375, 1487.859375, 1519.3160400390625, 1550.772705078125, 1582.2293701171875]}, "gradients/decoder.transformer.h.3.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 2.0, 2.0, 2.0, 1.0, 4.0, 2.0, 7.0, 10.0, 9.0, 10.0, 15.0, 13.0, 18.0, 25.0, 22.0, 19.0, 18.0, 30.0, 36.0, 29.0, 38.0, 36.0, 37.0, 39.0, 41.0, 39.0, 37.0, 44.0, 41.0, 48.0, 37.0, 37.0, 29.0, 30.0, 27.0, 31.0, 19.0, 16.0, 20.0, 20.0, 16.0, 15.0, 14.0, 2.0, 9.0, 5.0, 3.0, 1.0, 2.0, 3.0, 4.0, 2.0, 0.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-372.13116455078125, -359.1233215332031, -346.115478515625, -333.1076354980469, -320.09979248046875, -307.0919494628906, -294.0841064453125, -281.07623291015625, -268.06842041015625, -255.06057739257812, -242.052734375, -229.04489135742188, -216.03704833984375, -203.02920532226562, -190.02134704589844, -177.0135040283203, -164.00564575195312, -150.997802734375, -137.98995971679688, -124.98210906982422, -111.9742660522461, -98.96642303466797, -85.95857238769531, -72.95072937011719, -59.94288635253906, -46.93504333496094, -33.92719650268555, -20.919349670410156, -7.911506652832031, 5.096336364746094, 18.10418701171875, 31.112030029296875, 44.119873046875, 57.127716064453125, 70.13555908203125, 83.1434097290039, 96.15125274658203, 109.15909576416016, 122.16694641113281, 135.17478942871094, 148.18263244628906, 161.1904754638672, 174.1983184814453, 187.2061767578125, 200.21401977539062, 213.22186279296875, 226.22970581054688, 239.237548828125, 252.24539184570312, 265.25323486328125, 278.2610778808594, 291.2689208984375, 304.2767639160156, 317.28460693359375, 330.29248046875, 343.30029296875, 356.30816650390625, 369.3160095214844, 382.3238525390625, 395.3316955566406, 408.33953857421875, 421.3473815917969, 434.355224609375, 447.36309814453125, 460.37091064453125]}, "gradients/decoder.transformer.h.2.mlp.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 1.0, 1.0, 3.0, 4.0, 4.0, 4.0, 4.0, 10.0, 5.0, 7.0, 8.0, 9.0, 15.0, 19.0, 23.0, 17.0, 25.0, 23.0, 35.0, 31.0, 30.0, 31.0, 43.0, 26.0, 50.0, 37.0, 43.0, 42.0, 39.0, 50.0, 31.0, 40.0, 38.0, 30.0, 29.0, 26.0, 37.0, 12.0, 22.0, 21.0, 15.0, 17.0, 9.0, 6.0, 15.0, 8.0, 4.0, 6.0, 3.0, 2.0, 2.0, 1.0, 2.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-65.5, -63.3134765625, -61.126953125, -58.9404296875, -56.75390625, -54.5673828125, -52.380859375, -50.1943359375, -48.0078125, -45.8212890625, -43.634765625, -41.4482421875, -39.26171875, -37.0751953125, -34.888671875, -32.7021484375, -30.515625, -28.3291015625, -26.142578125, -23.9560546875, -21.76953125, -19.5830078125, -17.396484375, -15.2099609375, -13.0234375, -10.8369140625, -8.650390625, -6.4638671875, -4.27734375, -2.0908203125, 0.095703125, 2.2822265625, 4.46875, 6.6552734375, 8.841796875, 11.0283203125, 13.21484375, 15.4013671875, 17.587890625, 19.7744140625, 21.9609375, 24.1474609375, 26.333984375, 28.5205078125, 30.70703125, 32.8935546875, 35.080078125, 37.2666015625, 39.453125, 41.6396484375, 43.826171875, 46.0126953125, 48.19921875, 50.3857421875, 52.572265625, 54.7587890625, 56.9453125, 59.1318359375, 61.318359375, 63.5048828125, 65.69140625, 67.8779296875, 70.064453125, 72.2509765625, 74.4375]}, "gradients/decoder.transformer.h.2.mlp.c_proj.weight": {"_type": "histogram", "values": [2.0, 2.0, 2.0, 1.0, 1.0, 1.0, 5.0, 5.0, 3.0, 7.0, 8.0, 11.0, 16.0, 23.0, 28.0, 39.0, 52.0, 63.0, 79.0, 131.0, 179.0, 229.0, 350.0, 473.0, 688.0, 1046.0, 1619.0, 2479.0, 4252.0, 7562.0, 15800.0, 49289.0, 419670.0, 2876049.0, 697009.0, 76249.0, 18992.0, 8719.0, 4776.0, 2963.0, 1746.0, 1114.0, 754.0, 535.0, 344.0, 272.0, 170.0, 134.0, 87.0, 84.0, 42.0, 42.0, 37.0, 16.0, 10.0, 20.0, 6.0, 4.0, 5.0, 2.0, 3.0, 1.0, 2.0, 2.0], "bins": [-163.375, -158.513671875, -153.65234375, -148.791015625, -143.9296875, -139.068359375, -134.20703125, -129.345703125, -124.484375, -119.623046875, -114.76171875, -109.900390625, -105.0390625, -100.177734375, -95.31640625, -90.455078125, -85.59375, -80.732421875, -75.87109375, -71.009765625, -66.1484375, -61.287109375, -56.42578125, -51.564453125, -46.703125, -41.841796875, -36.98046875, -32.119140625, -27.2578125, -22.396484375, -17.53515625, -12.673828125, -7.8125, -2.951171875, 1.91015625, 6.771484375, 11.6328125, 16.494140625, 21.35546875, 26.216796875, 31.078125, 35.939453125, 40.80078125, 45.662109375, 50.5234375, 55.384765625, 60.24609375, 65.107421875, 69.96875, 74.830078125, 79.69140625, 84.552734375, 89.4140625, 94.275390625, 99.13671875, 103.998046875, 108.859375, 113.720703125, 118.58203125, 123.443359375, 128.3046875, 133.166015625, 138.02734375, 142.888671875, 147.75]}, "gradients/decoder.transformer.h.2.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 3.0, 1.0, 3.0, 1.0, 4.0, 3.0, 4.0, 10.0, 14.0, 9.0, 20.0, 21.0, 29.0, 52.0, 62.0, 82.0, 171.0, 258.0, 529.0, 899.0, 818.0, 424.0, 254.0, 125.0, 73.0, 47.0, 42.0, 39.0, 21.0, 20.0, 15.0, 8.0, 4.0, 5.0, 4.0, 2.0, 2.0, 4.0, 1.0, 1.0, 0.0, 3.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-131.125, -126.416015625, -121.70703125, -116.998046875, -112.2890625, -107.580078125, -102.87109375, -98.162109375, -93.453125, -88.744140625, -84.03515625, -79.326171875, -74.6171875, -69.908203125, -65.19921875, -60.490234375, -55.78125, -51.072265625, -46.36328125, -41.654296875, -36.9453125, -32.236328125, -27.52734375, -22.818359375, -18.109375, -13.400390625, -8.69140625, -3.982421875, 0.7265625, 5.435546875, 10.14453125, 14.853515625, 19.5625, 24.271484375, 28.98046875, 33.689453125, 38.3984375, 43.107421875, 47.81640625, 52.525390625, 57.234375, 61.943359375, 66.65234375, 71.361328125, 76.0703125, 80.779296875, 85.48828125, 90.197265625, 94.90625, 99.615234375, 104.32421875, 109.033203125, 113.7421875, 118.451171875, 123.16015625, 127.869140625, 132.578125, 137.287109375, 141.99609375, 146.705078125, 151.4140625, 156.123046875, 160.83203125, 165.541015625, 170.25]}, "gradients/decoder.transformer.h.2.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 2.0, 3.0, 0.0, 3.0, 1.0, 7.0, 3.0, 5.0, 15.0, 15.0, 18.0, 36.0, 60.0, 115.0, 183.0, 330.0, 591.0, 1049.0, 1935.0, 4109.0, 9246.0, 25126.0, 93814.0, 1083417.0, 2784035.0, 135900.0, 32937.0, 11396.0, 4943.0, 2334.0, 1154.0, 626.0, 367.0, 225.0, 109.0, 76.0, 42.0, 25.0, 10.0, 13.0, 9.0, 5.0, 1.0, 3.0, 1.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-245.875, -239.06640625, -232.2578125, -225.44921875, -218.640625, -211.83203125, -205.0234375, -198.21484375, -191.40625, -184.59765625, -177.7890625, -170.98046875, -164.171875, -157.36328125, -150.5546875, -143.74609375, -136.9375, -130.12890625, -123.3203125, -116.51171875, -109.703125, -102.89453125, -96.0859375, -89.27734375, -82.46875, -75.66015625, -68.8515625, -62.04296875, -55.234375, -48.42578125, -41.6171875, -34.80859375, -28.0, -21.19140625, -14.3828125, -7.57421875, -0.765625, 6.04296875, 12.8515625, 19.66015625, 26.46875, 33.27734375, 40.0859375, 46.89453125, 53.703125, 60.51171875, 67.3203125, 74.12890625, 80.9375, 87.74609375, 94.5546875, 101.36328125, 108.171875, 114.98046875, 121.7890625, 128.59765625, 135.40625, 142.21484375, 149.0234375, 155.83203125, 162.640625, 169.44921875, 176.2578125, 183.06640625, 189.875]}, "gradients/decoder.transformer.h.2.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 6.0, 9.0, 29.0, 94.0, 296.0, 381.0, 137.0, 49.0, 9.0, 3.0, 4.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1074.2515869140625, -1020.8753051757812, -967.4990234375, -914.1227416992188, -860.7464599609375, -807.3701782226562, -753.993896484375, -700.6176147460938, -647.2413330078125, -593.8650512695312, -540.48876953125, -487.11248779296875, -433.7362060546875, -380.35992431640625, -326.983642578125, -273.60736083984375, -220.2310791015625, -166.85479736328125, -113.478515625, -60.10223388671875, -6.7259521484375, 46.65032958984375, 100.026611328125, 153.40289306640625, 206.7791748046875, 260.15545654296875, 313.53173828125, 366.90802001953125, 420.2843017578125, 473.66058349609375, 527.036865234375, 580.4131469726562, 633.789306640625, 687.1655883789062, 740.5418701171875, 793.9181518554688, 847.29443359375, 900.6707153320312, 954.0469970703125, 1007.4232788085938, 1060.799560546875, 1114.17578125, 1167.5521240234375, 1220.928466796875, 1274.3046875, 1327.680908203125, 1381.0572509765625, 1434.43359375, 1487.809814453125, 1541.18603515625, 1594.5623779296875, 1647.938720703125, 1701.31494140625, 1754.691162109375, 1808.0675048828125, 1861.44384765625, 1914.820068359375, 1968.1962890625, 2021.5726318359375, 2074.948974609375, 2128.3251953125, 2181.701416015625, 2235.07763671875, 2288.4541015625, 2341.830322265625]}, "gradients/decoder.transformer.h.2.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 2.0, 3.0, 1.0, 0.0, 3.0, 7.0, 7.0, 10.0, 13.0, 15.0, 15.0, 20.0, 23.0, 20.0, 27.0, 36.0, 36.0, 32.0, 46.0, 37.0, 34.0, 48.0, 44.0, 31.0, 35.0, 28.0, 51.0, 31.0, 48.0, 23.0, 32.0, 28.0, 28.0, 21.0, 22.0, 29.0, 19.0, 19.0, 19.0, 11.0, 14.0, 8.0, 10.0, 7.0, 6.0, 4.0, 4.0, 3.0, 4.0, 1.0, 2.0, 1.0, 0.0, 0.0, 1.0], "bins": [-393.8462219238281, -381.8681335449219, -369.8900451660156, -357.9119567871094, -345.9338684082031, -333.9557800292969, -321.9776611328125, -309.99957275390625, -298.021484375, -286.04339599609375, -274.0653076171875, -262.08721923828125, -250.109130859375, -238.13104248046875, -226.15293884277344, -214.1748504638672, -202.19677734375, -190.21868896484375, -178.2406005859375, -166.26251220703125, -154.284423828125, -142.30633544921875, -130.32823181152344, -118.35014343261719, -106.37205505371094, -94.39396667480469, -82.41587829589844, -70.43778228759766, -58.459693908691406, -46.481605529785156, -34.503509521484375, -22.525421142578125, -10.54730224609375, 1.4307880401611328, 13.408878326416016, 25.38697052001953, 37.36505889892578, 49.34314727783203, 61.32124328613281, 73.29933166503906, 85.27742004394531, 97.25550842285156, 109.23359680175781, 121.2116928100586, 133.18978881835938, 145.16787719726562, 157.14596557617188, 169.12405395507812, 181.10214233398438, 193.08023071289062, 205.05831909179688, 217.03640747070312, 229.01449584960938, 240.99258422851562, 252.97068786621094, 264.94879150390625, 276.9268798828125, 288.90496826171875, 300.883056640625, 312.86114501953125, 324.8392333984375, 336.81732177734375, 348.79541015625, 360.77349853515625, 372.7515869140625]}, "gradients/decoder.transformer.h.2.crossattention.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 2.0, 3.0, 3.0, 1.0, 4.0, 3.0, 3.0, 9.0, 4.0, 4.0, 7.0, 14.0, 12.0, 14.0, 14.0, 23.0, 21.0, 26.0, 26.0, 25.0, 39.0, 35.0, 40.0, 46.0, 39.0, 38.0, 40.0, 40.0, 32.0, 36.0, 42.0, 45.0, 26.0, 34.0, 30.0, 23.0, 26.0, 26.0, 21.0, 25.0, 26.0, 21.0, 20.0, 14.0, 7.0, 5.0, 3.0, 2.0, 7.0, 3.0, 4.0, 2.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0], "bins": [-58.90625, -56.91943359375, -54.9326171875, -52.94580078125, -50.958984375, -48.97216796875, -46.9853515625, -44.99853515625, -43.01171875, -41.02490234375, -39.0380859375, -37.05126953125, -35.064453125, -33.07763671875, -31.0908203125, -29.10400390625, -27.1171875, -25.13037109375, -23.1435546875, -21.15673828125, -19.169921875, -17.18310546875, -15.1962890625, -13.20947265625, -11.22265625, -9.23583984375, -7.2490234375, -5.26220703125, -3.275390625, -1.28857421875, 0.6982421875, 2.68505859375, 4.671875, 6.65869140625, 8.6455078125, 10.63232421875, 12.619140625, 14.60595703125, 16.5927734375, 18.57958984375, 20.56640625, 22.55322265625, 24.5400390625, 26.52685546875, 28.513671875, 30.50048828125, 32.4873046875, 34.47412109375, 36.4609375, 38.44775390625, 40.4345703125, 42.42138671875, 44.408203125, 46.39501953125, 48.3818359375, 50.36865234375, 52.35546875, 54.34228515625, 56.3291015625, 58.31591796875, 60.302734375, 62.28955078125, 64.2763671875, 66.26318359375, 68.25]}, "gradients/decoder.transformer.h.2.crossattention.c_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 2.0, 5.0, 4.0, 5.0, 10.0, 13.0, 17.0, 30.0, 29.0, 59.0, 95.0, 158.0, 240.0, 315.0, 435.0, 657.0, 1014.0, 1689.0, 2552.0, 3933.0, 6267.0, 10209.0, 15914.0, 26559.0, 43309.0, 75179.0, 141379.0, 327600.0, 172551.0, 89006.0, 49612.0, 30236.0, 18268.0, 11298.0, 7032.0, 4604.0, 2921.0, 1820.0, 1200.0, 792.0, 530.0, 342.0, 212.0, 165.0, 101.0, 69.0, 43.0, 32.0, 21.0, 14.0, 7.0, 8.0, 6.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0], "bins": [-2.47265625, -2.38922119140625, -2.3057861328125, -2.22235107421875, -2.138916015625, -2.05548095703125, -1.9720458984375, -1.88861083984375, -1.80517578125, -1.72174072265625, -1.6383056640625, -1.55487060546875, -1.471435546875, -1.38800048828125, -1.3045654296875, -1.22113037109375, -1.1376953125, -1.05426025390625, -0.9708251953125, -0.88739013671875, -0.803955078125, -0.72052001953125, -0.6370849609375, -0.55364990234375, -0.47021484375, -0.38677978515625, -0.3033447265625, -0.21990966796875, -0.136474609375, -0.05303955078125, 0.0303955078125, 0.11383056640625, 0.197265625, 0.28070068359375, 0.3641357421875, 0.44757080078125, 0.531005859375, 0.61444091796875, 0.6978759765625, 0.78131103515625, 0.86474609375, 0.94818115234375, 1.0316162109375, 1.11505126953125, 1.198486328125, 1.28192138671875, 1.3653564453125, 1.44879150390625, 1.5322265625, 1.61566162109375, 1.6990966796875, 1.78253173828125, 1.865966796875, 1.94940185546875, 2.0328369140625, 2.11627197265625, 2.19970703125, 2.28314208984375, 2.3665771484375, 2.45001220703125, 2.533447265625, 2.61688232421875, 2.7003173828125, 2.78375244140625, 2.8671875]}, "gradients/decoder.transformer.h.2.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 0.0, 3.0, 5.0, 3.0, 4.0, 5.0, 4.0, 8.0, 7.0, 4.0, 9.0, 9.0, 9.0, 10.0, 7.0, 13.0, 12.0, 16.0, 26.0, 24.0, 20.0, 18.0, 30.0, 23.0, 35.0, 39.0, 32.0, 42.0, 28.0, 32.0, 1069.0, 39.0, 35.0, 32.0, 30.0, 28.0, 42.0, 21.0, 32.0, 22.0, 26.0, 30.0, 22.0, 17.0, 19.0, 14.0, 16.0, 14.0, 8.0, 5.0, 9.0, 5.0, 8.0, 9.0, 2.0, 2.0, 3.0, 4.0, 2.0, 3.0], "bins": [-39.46875, -38.33544921875, -37.2021484375, -36.06884765625, -34.935546875, -33.80224609375, -32.6689453125, -31.53564453125, -30.40234375, -29.26904296875, -28.1357421875, -27.00244140625, -25.869140625, -24.73583984375, -23.6025390625, -22.46923828125, -21.3359375, -20.20263671875, -19.0693359375, -17.93603515625, -16.802734375, -15.66943359375, -14.5361328125, -13.40283203125, -12.26953125, -11.13623046875, -10.0029296875, -8.86962890625, -7.736328125, -6.60302734375, -5.4697265625, -4.33642578125, -3.203125, -2.06982421875, -0.9365234375, 0.19677734375, 1.330078125, 2.46337890625, 3.5966796875, 4.72998046875, 5.86328125, 6.99658203125, 8.1298828125, 9.26318359375, 10.396484375, 11.52978515625, 12.6630859375, 13.79638671875, 14.9296875, 16.06298828125, 17.1962890625, 18.32958984375, 19.462890625, 20.59619140625, 21.7294921875, 22.86279296875, 23.99609375, 25.12939453125, 26.2626953125, 27.39599609375, 28.529296875, 29.66259765625, 30.7958984375, 31.92919921875, 33.0625]}, "gradients/decoder.transformer.h.2.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 4.0, 8.0, 12.0, 23.0, 29.0, 31.0, 52.0, 81.0, 133.0, 141.0, 192.0, 310.0, 423.0, 565.0, 816.0, 1158.0, 1645.0, 2339.0, 3191.0, 4647.0, 6492.0, 9524.0, 13792.0, 20438.0, 30012.0, 45952.0, 72958.0, 125294.0, 1349061.0, 158306.0, 86607.0, 53484.0, 34469.0, 23165.0, 15602.0, 10725.0, 7358.0, 5286.0, 3743.0, 2666.0, 1915.0, 1330.0, 989.0, 599.0, 460.0, 339.0, 218.0, 154.0, 122.0, 98.0, 58.0, 40.0, 31.0, 22.0, 21.0, 8.0, 5.0, 3.0, 0.0, 1.0, 2.0], "bins": [-1.583984375, -1.533966064453125, -1.48394775390625, -1.433929443359375, -1.3839111328125, -1.333892822265625, -1.28387451171875, -1.233856201171875, -1.183837890625, -1.133819580078125, -1.08380126953125, -1.033782958984375, -0.9837646484375, -0.933746337890625, -0.88372802734375, -0.833709716796875, -0.78369140625, -0.733673095703125, -0.68365478515625, -0.633636474609375, -0.5836181640625, -0.533599853515625, -0.48358154296875, -0.433563232421875, -0.383544921875, -0.333526611328125, -0.28350830078125, -0.233489990234375, -0.1834716796875, -0.133453369140625, -0.08343505859375, -0.033416748046875, 0.0166015625, 0.066619873046875, 0.11663818359375, 0.166656494140625, 0.2166748046875, 0.266693115234375, 0.31671142578125, 0.366729736328125, 0.416748046875, 0.466766357421875, 0.51678466796875, 0.566802978515625, 0.6168212890625, 0.666839599609375, 0.71685791015625, 0.766876220703125, 0.81689453125, 0.866912841796875, 0.91693115234375, 0.966949462890625, 1.0169677734375, 1.066986083984375, 1.11700439453125, 1.167022705078125, 1.217041015625, 1.267059326171875, 1.31707763671875, 1.367095947265625, 1.4171142578125, 1.467132568359375, 1.51715087890625, 1.567169189453125, 1.6171875]}, "gradients/decoder.transformer.h.2.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 2.0, 4.0, 3.0, 10.0, 4.0, 11.0, 5.0, 15.0, 11.0, 11.0, 26.0, 17.0, 29.0, 29.0, 41.0, 53.0, 63.0, 100.0, 125.0, 100.0, 72.0, 51.0, 50.0, 33.0, 33.0, 18.0, 18.0, 20.0, 10.0, 4.0, 7.0, 7.0, 4.0, 5.0, 5.0, 3.0, 3.0, 4.0, 1.0, 2.0, 2.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-9.655952453613281e-05, -9.328126907348633e-05, -9.000301361083984e-05, -8.672475814819336e-05, -8.344650268554688e-05, -8.016824722290039e-05, -7.68899917602539e-05, -7.361173629760742e-05, -7.033348083496094e-05, -6.705522537231445e-05, -6.377696990966797e-05, -6.0498714447021484e-05, -5.7220458984375e-05, -5.3942203521728516e-05, -5.066394805908203e-05, -4.738569259643555e-05, -4.410743713378906e-05, -4.082918167114258e-05, -3.7550926208496094e-05, -3.427267074584961e-05, -3.0994415283203125e-05, -2.771615982055664e-05, -2.4437904357910156e-05, -2.1159648895263672e-05, -1.7881393432617188e-05, -1.4603137969970703e-05, -1.1324882507324219e-05, -8.046627044677734e-06, -4.76837158203125e-06, -1.4901161193847656e-06, 1.7881393432617188e-06, 5.066394805908203e-06, 8.344650268554688e-06, 1.1622905731201172e-05, 1.4901161193847656e-05, 1.817941665649414e-05, 2.1457672119140625e-05, 2.473592758178711e-05, 2.8014183044433594e-05, 3.129243850708008e-05, 3.457069396972656e-05, 3.784894943237305e-05, 4.112720489501953e-05, 4.4405460357666016e-05, 4.76837158203125e-05, 5.0961971282958984e-05, 5.424022674560547e-05, 5.751848220825195e-05, 6.079673767089844e-05, 6.407499313354492e-05, 6.73532485961914e-05, 7.063150405883789e-05, 7.390975952148438e-05, 7.718801498413086e-05, 8.046627044677734e-05, 8.374452590942383e-05, 8.702278137207031e-05, 9.03010368347168e-05, 9.357929229736328e-05, 9.685754776000977e-05, 0.00010013580322265625, 0.00010341405868530273, 0.00010669231414794922, 0.0001099705696105957, 0.00011324882507324219]}, "gradients/decoder.transformer.h.2.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 4.0, 3.0, 0.0, 7.0, 3.0, 9.0, 5.0, 9.0, 9.0, 7.0, 15.0, 15.0, 11.0, 24.0, 49.0, 37.0, 57.0, 94.0, 140.0, 242.0, 546.0, 5198.0, 789773.0, 247943.0, 3208.0, 499.0, 216.0, 113.0, 75.0, 50.0, 47.0, 24.0, 29.0, 25.0, 20.0, 18.0, 12.0, 7.0, 10.0, 3.0, 5.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0], "bins": [-0.001983642578125, -0.0019268393516540527, -0.0018700361251831055, -0.0018132328987121582, -0.001756429672241211, -0.0016996264457702637, -0.0016428232192993164, -0.0015860199928283691, -0.0015292167663574219, -0.0014724135398864746, -0.0014156103134155273, -0.00135880708694458, -0.0013020038604736328, -0.0012452006340026855, -0.0011883974075317383, -0.001131594181060791, -0.0010747909545898438, -0.0010179877281188965, -0.0009611845016479492, -0.000904381275177002, -0.0008475780487060547, -0.0007907748222351074, -0.0007339715957641602, -0.0006771683692932129, -0.0006203651428222656, -0.0005635619163513184, -0.0005067586898803711, -0.00044995546340942383, -0.00039315223693847656, -0.0003363490104675293, -0.00027954578399658203, -0.00022274255752563477, -0.0001659393310546875, -0.00010913610458374023, -5.233287811279297e-05, 4.470348358154297e-06, 6.127357482910156e-05, 0.00011807680130004883, 0.0001748800277709961, 0.00023168325424194336, 0.0002884864807128906, 0.0003452897071838379, 0.00040209293365478516, 0.0004588961601257324, 0.0005156993865966797, 0.000572502613067627, 0.0006293058395385742, 0.0006861090660095215, 0.0007429122924804688, 0.000799715518951416, 0.0008565187454223633, 0.0009133219718933105, 0.0009701251983642578, 0.001026928424835205, 0.0010837316513061523, 0.0011405348777770996, 0.0011973381042480469, 0.0012541413307189941, 0.0013109445571899414, 0.0013677477836608887, 0.001424551010131836, 0.0014813542366027832, 0.0015381574630737305, 0.0015949606895446777, 0.001651763916015625]}, "gradients/decoder.transformer.h.2.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 5.0, 3.0, 8.0, 19.0, 32.0, 90.0, 212.0, 318.0, 183.0, 83.0, 41.0, 16.0, 9.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-9.824001608649269e-05, -9.366520680487156e-05, -8.909039752325043e-05, -8.45155882416293e-05, -7.994078623596579e-05, -7.536596967838705e-05, -7.079116767272353e-05, -6.62163583911024e-05, -6.164154910948128e-05, -5.706673982786015e-05, -5.249193054623902e-05, -4.79171249025967e-05, -4.334231562097557e-05, -3.876750633935444e-05, -3.419270069571212e-05, -2.961789141409099e-05, -2.5043082132469863e-05, -2.0468272850848734e-05, -1.589346538821701e-05, -1.1318657016090583e-05, -6.7438486439641565e-06, -2.169039362343028e-06, 2.4057681002886966e-06, 6.980575562920421e-06, 1.155538484454155e-05, 1.6130194126162678e-05, 2.0705001588794403e-05, 2.5279809051426128e-05, 2.9854618333047256e-05, 3.4429427614668384e-05, 3.9004233258310705e-05, 4.3579042539931834e-05, 4.815384454559535e-05, 5.2728653827216476e-05, 5.7303463108837605e-05, 6.187827239045873e-05, 6.645308167207986e-05, 7.102788367774338e-05, 7.56026929593645e-05, 8.017750224098563e-05, 8.475231152260676e-05, 8.932712080422789e-05, 9.390193008584902e-05, 9.847673936747015e-05, 0.00010305154137313366, 0.0001076263579307124, 0.00011220115993637592, 0.00011677596921799704, 0.00012135077849961817, 0.0001259255805052817, 0.00013050039706286043, 0.00013507519906852394, 0.00013965001562610269, 0.0001442248176317662, 0.00014879963418934494, 0.00015337443619500846, 0.0001579492527525872, 0.0001625240547582507, 0.00016709887131582946, 0.00017167367332149297, 0.0001762484898790717, 0.00018082329188473523, 0.00018539810844231397, 0.00018997291044797748, 0.000194547712453641]}, "gradients/decoder.transformer.h.2.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 3.0, 5.0, 3.0, 1.0, 7.0, 7.0, 16.0, 11.0, 14.0, 21.0, 24.0, 20.0, 27.0, 20.0, 28.0, 27.0, 29.0, 37.0, 52.0, 49.0, 45.0, 44.0, 27.0, 43.0, 45.0, 45.0, 46.0, 40.0, 37.0, 33.0, 38.0, 26.0, 25.0, 18.0, 21.0, 16.0, 11.0, 13.0, 12.0, 8.0, 4.0, 5.0, 7.0, 2.0, 3.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-5.173683166503906e-05, -5.017593502998352e-05, -4.861503839492798e-05, -4.7054141759872437e-05, -4.5493245124816895e-05, -4.393234848976135e-05, -4.237145185470581e-05, -4.081055521965027e-05, -3.9249658584594727e-05, -3.7688761949539185e-05, -3.612786531448364e-05, -3.45669686794281e-05, -3.300607204437256e-05, -3.1445175409317017e-05, -2.9884278774261475e-05, -2.8323382139205933e-05, -2.676248550415039e-05, -2.520158886909485e-05, -2.3640692234039307e-05, -2.2079795598983765e-05, -2.0518898963928223e-05, -1.895800232887268e-05, -1.739710569381714e-05, -1.5836209058761597e-05, -1.4275312423706055e-05, -1.2714415788650513e-05, -1.115351915359497e-05, -9.592622518539429e-06, -8.031725883483887e-06, -6.470829248428345e-06, -4.909932613372803e-06, -3.3490359783172607e-06, -1.7881393432617188e-06, -2.2724270820617676e-07, 1.3336539268493652e-06, 2.8945505619049072e-06, 4.455447196960449e-06, 6.016343832015991e-06, 7.577240467071533e-06, 9.138137102127075e-06, 1.0699033737182617e-05, 1.225993037223816e-05, 1.3820827007293701e-05, 1.5381723642349243e-05, 1.6942620277404785e-05, 1.8503516912460327e-05, 2.006441354751587e-05, 2.162531018257141e-05, 2.3186206817626953e-05, 2.4747103452682495e-05, 2.6308000087738037e-05, 2.786889672279358e-05, 2.942979335784912e-05, 3.099068999290466e-05, 3.2551586627960205e-05, 3.411248326301575e-05, 3.567337989807129e-05, 3.723427653312683e-05, 3.879517316818237e-05, 4.0356069803237915e-05, 4.191696643829346e-05, 4.3477863073349e-05, 4.503875970840454e-05, 4.659965634346008e-05, 4.8160552978515625e-05]}, "gradients/decoder.transformer.h.2.attn.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 2.0, 3.0, 3.0, 1.0, 4.0, 3.0, 3.0, 9.0, 4.0, 4.0, 7.0, 14.0, 12.0, 14.0, 14.0, 23.0, 21.0, 26.0, 26.0, 25.0, 39.0, 35.0, 40.0, 46.0, 39.0, 38.0, 40.0, 40.0, 32.0, 36.0, 42.0, 45.0, 26.0, 34.0, 30.0, 23.0, 26.0, 26.0, 21.0, 25.0, 26.0, 21.0, 20.0, 14.0, 7.0, 5.0, 3.0, 2.0, 7.0, 3.0, 4.0, 2.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0], "bins": [-58.90625, -56.91943359375, -54.9326171875, -52.94580078125, -50.958984375, -48.97216796875, -46.9853515625, -44.99853515625, -43.01171875, -41.02490234375, -39.0380859375, -37.05126953125, -35.064453125, -33.07763671875, -31.0908203125, -29.10400390625, -27.1171875, -25.13037109375, -23.1435546875, -21.15673828125, -19.169921875, -17.18310546875, -15.1962890625, -13.20947265625, -11.22265625, -9.23583984375, -7.2490234375, -5.26220703125, -3.275390625, -1.28857421875, 0.6982421875, 2.68505859375, 4.671875, 6.65869140625, 8.6455078125, 10.63232421875, 12.619140625, 14.60595703125, 16.5927734375, 18.57958984375, 20.56640625, 22.55322265625, 24.5400390625, 26.52685546875, 28.513671875, 30.50048828125, 32.4873046875, 34.47412109375, 36.4609375, 38.44775390625, 40.4345703125, 42.42138671875, 44.408203125, 46.39501953125, 48.3818359375, 50.36865234375, 52.35546875, 54.34228515625, 56.3291015625, 58.31591796875, 60.302734375, 62.28955078125, 64.2763671875, 66.26318359375, 68.25]}, "gradients/decoder.transformer.h.2.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 4.0, 3.0, 6.0, 14.0, 11.0, 17.0, 16.0, 29.0, 25.0, 38.0, 71.0, 99.0, 135.0, 192.0, 230.0, 361.0, 486.0, 763.0, 1082.0, 1626.0, 2470.0, 4106.0, 7104.0, 15564.0, 43428.0, 195790.0, 600056.0, 115080.0, 31167.0, 12109.0, 6112.0, 3640.0, 2109.0, 1452.0, 942.0, 658.0, 431.0, 322.0, 241.0, 175.0, 115.0, 94.0, 55.0, 37.0, 23.0, 20.0, 22.0, 10.0, 11.0, 3.0, 5.0, 3.0, 2.0, 2.0, 2.0, 0.0, 4.0], "bins": [-138.75, -134.58984375, -130.4296875, -126.26953125, -122.109375, -117.94921875, -113.7890625, -109.62890625, -105.46875, -101.30859375, -97.1484375, -92.98828125, -88.828125, -84.66796875, -80.5078125, -76.34765625, -72.1875, -68.02734375, -63.8671875, -59.70703125, -55.546875, -51.38671875, -47.2265625, -43.06640625, -38.90625, -34.74609375, -30.5859375, -26.42578125, -22.265625, -18.10546875, -13.9453125, -9.78515625, -5.625, -1.46484375, 2.6953125, 6.85546875, 11.015625, 15.17578125, 19.3359375, 23.49609375, 27.65625, 31.81640625, 35.9765625, 40.13671875, 44.296875, 48.45703125, 52.6171875, 56.77734375, 60.9375, 65.09765625, 69.2578125, 73.41796875, 77.578125, 81.73828125, 85.8984375, 90.05859375, 94.21875, 98.37890625, 102.5390625, 106.69921875, 110.859375, 115.01953125, 119.1796875, 123.33984375, 127.5]}, "gradients/decoder.transformer.h.2.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 2.0, 4.0, 6.0, 4.0, 6.0, 4.0, 11.0, 3.0, 12.0, 16.0, 17.0, 29.0, 17.0, 27.0, 23.0, 25.0, 43.0, 29.0, 46.0, 47.0, 71.0, 81.0, 93.0, 316.0, 1460.0, 142.0, 101.0, 59.0, 46.0, 43.0, 42.0, 29.0, 28.0, 24.0, 19.0, 20.0, 21.0, 13.0, 12.0, 8.0, 12.0, 8.0, 10.0, 12.0, 4.0, 2.0, 3.0, 3.0, 2.0, 2.0, 4.0, 0.0, 1.0, 2.0, 0.0, 1.0, 1.0], "bins": [-124.5625, -120.5615234375, -116.560546875, -112.5595703125, -108.55859375, -104.5576171875, -100.556640625, -96.5556640625, -92.5546875, -88.5537109375, -84.552734375, -80.5517578125, -76.55078125, -72.5498046875, -68.548828125, -64.5478515625, -60.546875, -56.5458984375, -52.544921875, -48.5439453125, -44.54296875, -40.5419921875, -36.541015625, -32.5400390625, -28.5390625, -24.5380859375, -20.537109375, -16.5361328125, -12.53515625, -8.5341796875, -4.533203125, -0.5322265625, 3.46875, 7.4697265625, 11.470703125, 15.4716796875, 19.47265625, 23.4736328125, 27.474609375, 31.4755859375, 35.4765625, 39.4775390625, 43.478515625, 47.4794921875, 51.48046875, 55.4814453125, 59.482421875, 63.4833984375, 67.484375, 71.4853515625, 75.486328125, 79.4873046875, 83.48828125, 87.4892578125, 91.490234375, 95.4912109375, 99.4921875, 103.4931640625, 107.494140625, 111.4951171875, 115.49609375, 119.4970703125, 123.498046875, 127.4990234375, 131.5]}, "gradients/decoder.transformer.h.2.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 4.0, 4.0, 6.0, 5.0, 2.0, 5.0, 6.0, 10.0, 16.0, 26.0, 26.0, 42.0, 61.0, 72.0, 132.0, 208.0, 393.0, 1207.0, 13280.0, 3082144.0, 44937.0, 1897.0, 517.0, 260.0, 143.0, 95.0, 58.0, 49.0, 38.0, 22.0, 16.0, 11.0, 8.0, 8.0, 3.0, 3.0, 1.0, 1.0, 2.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-478.75, -462.59765625, -446.4453125, -430.29296875, -414.140625, -397.98828125, -381.8359375, -365.68359375, -349.53125, -333.37890625, -317.2265625, -301.07421875, -284.921875, -268.76953125, -252.6171875, -236.46484375, -220.3125, -204.16015625, -188.0078125, -171.85546875, -155.703125, -139.55078125, -123.3984375, -107.24609375, -91.09375, -74.94140625, -58.7890625, -42.63671875, -26.484375, -10.33203125, 5.8203125, 21.97265625, 38.125, 54.27734375, 70.4296875, 86.58203125, 102.734375, 118.88671875, 135.0390625, 151.19140625, 167.34375, 183.49609375, 199.6484375, 215.80078125, 231.953125, 248.10546875, 264.2578125, 280.41015625, 296.5625, 312.71484375, 328.8671875, 345.01953125, 361.171875, 377.32421875, 393.4765625, 409.62890625, 425.78125, 441.93359375, 458.0859375, 474.23828125, 490.390625, 506.54296875, 522.6953125, 538.84765625, 555.0]}, "gradients/decoder.transformer.h.2.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 3.0, 3.0, 17.0, 157.0, 545.0, 229.0, 53.0, 10.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2238.07177734375, -2177.052734375, -2116.033935546875, -2055.014892578125, -1993.995849609375, -1932.9769287109375, -1871.9578857421875, -1810.93896484375, -1749.919921875, -1688.9010009765625, -1627.8819580078125, -1566.863037109375, -1505.843994140625, -1444.8250732421875, -1383.8060302734375, -1322.787109375, -1261.76806640625, -1200.7491455078125, -1139.7301025390625, -1078.711181640625, -1017.692138671875, -956.6732177734375, -895.6541748046875, -834.63525390625, -773.6163330078125, -712.5973510742188, -651.578369140625, -590.5593872070312, -529.5404052734375, -468.5214538574219, -407.5024719238281, -346.4834899902344, -285.4644775390625, -224.44549560546875, -163.426513671875, -102.40754699707031, -41.38856506347656, 19.630401611328125, 80.64938354492188, 141.66836547851562, 202.68734741210938, 263.7063293457031, 324.7253112792969, 385.7442626953125, 446.76324462890625, 507.7822265625, 568.8012084960938, 629.8201904296875, 690.8391723632812, 751.858154296875, 812.8771362304688, 873.8961181640625, 934.9151000976562, 995.93408203125, 1056.9530029296875, 1117.9720458984375, 1178.990966796875, 1240.0098876953125, 1301.0289306640625, 1362.0478515625, 1423.06689453125, 1484.0858154296875, 1545.1048583984375, 1606.123779296875, 1667.142822265625]}, "gradients/decoder.transformer.h.2.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 4.0, 6.0, 1.0, 4.0, 3.0, 6.0, 2.0, 10.0, 10.0, 14.0, 13.0, 9.0, 14.0, 26.0, 27.0, 23.0, 25.0, 34.0, 28.0, 35.0, 41.0, 49.0, 36.0, 49.0, 44.0, 48.0, 42.0, 40.0, 43.0, 50.0, 40.0, 36.0, 36.0, 31.0, 23.0, 11.0, 17.0, 13.0, 12.0, 9.0, 15.0, 10.0, 7.0, 9.0, 4.0, 3.0, 1.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-495.6785583496094, -481.4095764160156, -467.1405944824219, -452.8716125488281, -438.6026611328125, -424.33367919921875, -410.064697265625, -395.79571533203125, -381.5267333984375, -367.25775146484375, -352.98876953125, -338.71978759765625, -324.4508056640625, -310.18182373046875, -295.9128723144531, -281.6438903808594, -267.3749084472656, -253.10592651367188, -238.83694458007812, -224.56797790527344, -210.2989959716797, -196.03001403808594, -181.76104736328125, -167.4920654296875, -153.22308349609375, -138.9541015625, -124.68512725830078, -110.41615295410156, -96.14717102050781, -81.87818908691406, -67.60921478271484, -53.340240478515625, -39.071258544921875, -24.80228042602539, -10.533302307128906, 3.735675811767578, 18.004653930664062, 32.27363586425781, 46.54261016845703, 60.81158447265625, 75.08056640625, 89.34954833984375, 103.61852264404297, 117.88749694824219, 132.15647888183594, 146.4254608154297, 160.69442749023438, 174.96340942382812, 189.23239135742188, 203.50137329101562, 217.77035522460938, 232.03932189941406, 246.3083038330078, 260.5772705078125, 274.84625244140625, 289.115234375, 303.38421630859375, 317.6531982421875, 331.92218017578125, 346.191162109375, 360.46014404296875, 374.7291259765625, 388.9980773925781, 403.2670593261719, 417.5360412597656]}, "gradients/decoder.transformer.h.1.mlp.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 1.0, 0.0, 5.0, 1.0, 6.0, 2.0, 6.0, 7.0, 7.0, 12.0, 7.0, 12.0, 14.0, 16.0, 21.0, 22.0, 33.0, 30.0, 52.0, 31.0, 28.0, 42.0, 45.0, 33.0, 42.0, 34.0, 42.0, 42.0, 36.0, 39.0, 44.0, 34.0, 29.0, 39.0, 26.0, 29.0, 26.0, 18.0, 21.0, 14.0, 12.0, 11.0, 10.0, 12.0, 4.0, 3.0, 3.0, 6.0, 5.0, 2.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-63.0625, -60.888671875, -58.71484375, -56.541015625, -54.3671875, -52.193359375, -50.01953125, -47.845703125, -45.671875, -43.498046875, -41.32421875, -39.150390625, -36.9765625, -34.802734375, -32.62890625, -30.455078125, -28.28125, -26.107421875, -23.93359375, -21.759765625, -19.5859375, -17.412109375, -15.23828125, -13.064453125, -10.890625, -8.716796875, -6.54296875, -4.369140625, -2.1953125, -0.021484375, 2.15234375, 4.326171875, 6.5, 8.673828125, 10.84765625, 13.021484375, 15.1953125, 17.369140625, 19.54296875, 21.716796875, 23.890625, 26.064453125, 28.23828125, 30.412109375, 32.5859375, 34.759765625, 36.93359375, 39.107421875, 41.28125, 43.455078125, 45.62890625, 47.802734375, 49.9765625, 52.150390625, 54.32421875, 56.498046875, 58.671875, 60.845703125, 63.01953125, 65.193359375, 67.3671875, 69.541015625, 71.71484375, 73.888671875, 76.0625]}, "gradients/decoder.transformer.h.1.mlp.c_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 2.0, 3.0, 1.0, 5.0, 8.0, 9.0, 6.0, 17.0, 15.0, 31.0, 45.0, 53.0, 72.0, 112.0, 105.0, 193.0, 261.0, 361.0, 543.0, 836.0, 1362.0, 2377.0, 4494.0, 10679.0, 34702.0, 414038.0, 3352849.0, 319697.0, 30706.0, 10097.0, 4356.0, 2221.0, 1419.0, 847.0, 511.0, 323.0, 241.0, 169.0, 148.0, 110.0, 78.0, 68.0, 37.0, 35.0, 14.0, 14.0, 11.0, 5.0, 3.0, 3.0, 1.0, 4.0, 0.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-198.25, -191.28515625, -184.3203125, -177.35546875, -170.390625, -163.42578125, -156.4609375, -149.49609375, -142.53125, -135.56640625, -128.6015625, -121.63671875, -114.671875, -107.70703125, -100.7421875, -93.77734375, -86.8125, -79.84765625, -72.8828125, -65.91796875, -58.953125, -51.98828125, -45.0234375, -38.05859375, -31.09375, -24.12890625, -17.1640625, -10.19921875, -3.234375, 3.73046875, 10.6953125, 17.66015625, 24.625, 31.58984375, 38.5546875, 45.51953125, 52.484375, 59.44921875, 66.4140625, 73.37890625, 80.34375, 87.30859375, 94.2734375, 101.23828125, 108.203125, 115.16796875, 122.1328125, 129.09765625, 136.0625, 143.02734375, 149.9921875, 156.95703125, 163.921875, 170.88671875, 177.8515625, 184.81640625, 191.78125, 198.74609375, 205.7109375, 212.67578125, 219.640625, 226.60546875, 233.5703125, 240.53515625, 247.5]}, "gradients/decoder.transformer.h.1.mlp.c_fc.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 1.0, 0.0, 0.0, 2.0, 4.0, 0.0, 1.0, 5.0, 10.0, 4.0, 13.0, 8.0, 21.0, 19.0, 31.0, 46.0, 66.0, 92.0, 149.0, 253.0, 434.0, 739.0, 926.0, 498.0, 273.0, 159.0, 108.0, 68.0, 58.0, 25.0, 21.0, 12.0, 11.0, 6.0, 6.0, 3.0, 4.0, 4.0, 1.0, 0.0, 2.0, 2.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-154.375, -149.724609375, -145.07421875, -140.423828125, -135.7734375, -131.123046875, -126.47265625, -121.822265625, -117.171875, -112.521484375, -107.87109375, -103.220703125, -98.5703125, -93.919921875, -89.26953125, -84.619140625, -79.96875, -75.318359375, -70.66796875, -66.017578125, -61.3671875, -56.716796875, -52.06640625, -47.416015625, -42.765625, -38.115234375, -33.46484375, -28.814453125, -24.1640625, -19.513671875, -14.86328125, -10.212890625, -5.5625, -0.912109375, 3.73828125, 8.388671875, 13.0390625, 17.689453125, 22.33984375, 26.990234375, 31.640625, 36.291015625, 40.94140625, 45.591796875, 50.2421875, 54.892578125, 59.54296875, 64.193359375, 68.84375, 73.494140625, 78.14453125, 82.794921875, 87.4453125, 92.095703125, 96.74609375, 101.396484375, 106.046875, 110.697265625, 115.34765625, 119.998046875, 124.6484375, 129.298828125, 133.94921875, 138.599609375, 143.25]}, "gradients/decoder.transformer.h.1.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 2.0, 1.0, 2.0, 3.0, 5.0, 7.0, 12.0, 12.0, 19.0, 32.0, 56.0, 86.0, 147.0, 249.0, 527.0, 1145.0, 2999.0, 10481.0, 61206.0, 2193265.0, 1850114.0, 58523.0, 10029.0, 2999.0, 1172.0, 514.0, 284.0, 148.0, 80.0, 48.0, 40.0, 32.0, 17.0, 15.0, 7.0, 2.0, 4.0, 5.0, 3.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-299.0, -289.3515625, -279.703125, -270.0546875, -260.40625, -250.7578125, -241.109375, -231.4609375, -221.8125, -212.1640625, -202.515625, -192.8671875, -183.21875, -173.5703125, -163.921875, -154.2734375, -144.625, -134.9765625, -125.328125, -115.6796875, -106.03125, -96.3828125, -86.734375, -77.0859375, -67.4375, -57.7890625, -48.140625, -38.4921875, -28.84375, -19.1953125, -9.546875, 0.1015625, 9.75, 19.3984375, 29.046875, 38.6953125, 48.34375, 57.9921875, 67.640625, 77.2890625, 86.9375, 96.5859375, 106.234375, 115.8828125, 125.53125, 135.1796875, 144.828125, 154.4765625, 164.125, 173.7734375, 183.421875, 193.0703125, 202.71875, 212.3671875, 222.015625, 231.6640625, 241.3125, 250.9609375, 260.609375, 270.2578125, 279.90625, 289.5546875, 299.203125, 308.8515625, 318.5]}, "gradients/decoder.transformer.h.1.ln_2.weight": {"_type": "histogram", "values": [1.0, 2.0, 1.0, 2.0, 11.0, 64.0, 398.0, 448.0, 72.0, 17.0, 3.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-684.424072265625, -588.0590209960938, -491.6939697265625, -395.32891845703125, -298.9638671875, -202.59881591796875, -106.2337646484375, -9.86871337890625, 86.496337890625, 182.86138916015625, 279.2264404296875, 375.59149169921875, 471.95654296875, 568.3215942382812, 664.6866455078125, 761.0516967773438, 857.416748046875, 953.7817993164062, 1050.1468505859375, 1146.511962890625, 1242.876953125, 1339.241943359375, 1435.6070556640625, 1531.97216796875, 1628.337158203125, 1724.7021484375, 1821.0672607421875, 1917.432373046875, 2013.79736328125, 2110.162353515625, 2206.52734375, 2302.892578125, 2399.25732421875, 2495.622314453125, 2591.9873046875, 2688.3525390625, 2784.717529296875, 2881.08251953125, 2977.44775390625, 3073.812744140625, 3170.177734375, 3266.542724609375, 3362.90771484375, 3459.27294921875, 3555.637939453125, 3652.0029296875, 3748.3681640625, 3844.733154296875, 3941.09814453125, 4037.463134765625, 4133.828125, 4230.193359375, 4326.55859375, 4422.92333984375, 4519.28857421875, 4615.6533203125, 4712.0185546875, 4808.3837890625, 4904.74853515625, 5001.11376953125, 5097.478515625, 5193.84375, 5290.208984375, 5386.57373046875, 5482.93896484375]}, "gradients/decoder.transformer.h.1.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 2.0, 2.0, 1.0, 1.0, 3.0, 1.0, 3.0, 4.0, 3.0, 4.0, 5.0, 4.0, 8.0, 14.0, 15.0, 17.0, 23.0, 26.0, 19.0, 40.0, 33.0, 35.0, 31.0, 33.0, 40.0, 46.0, 51.0, 49.0, 36.0, 29.0, 56.0, 37.0, 45.0, 48.0, 27.0, 36.0, 32.0, 18.0, 21.0, 16.0, 19.0, 17.0, 15.0, 7.0, 9.0, 10.0, 7.0, 5.0, 3.0, 3.0, 3.0, 4.0, 3.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-409.103759765625, -396.1347961425781, -383.16583251953125, -370.1968688964844, -357.2279052734375, -344.2589416503906, -331.28997802734375, -318.321044921875, -305.35205078125, -292.3830871582031, -279.41412353515625, -266.4451599121094, -253.4761962890625, -240.50723266601562, -227.5382843017578, -214.56932067871094, -201.60037231445312, -188.63140869140625, -175.66244506835938, -162.6934814453125, -149.72451782226562, -136.75555419921875, -123.78660583496094, -110.81764221191406, -97.84867858886719, -84.87971496582031, -71.91075134277344, -58.941795349121094, -45.97283172607422, -33.003868103027344, -20.034912109375, -7.065948486328125, 5.90301513671875, 18.871976852416992, 31.840938568115234, 44.809898376464844, 57.77886199951172, 70.7478256225586, 83.71678161621094, 96.68574523925781, 109.65470886230469, 122.62367248535156, 135.59263610839844, 148.56158447265625, 161.53054809570312, 174.49951171875, 187.46847534179688, 200.43743896484375, 213.40640258789062, 226.3753662109375, 239.34432983398438, 252.31329345703125, 265.2822570800781, 278.251220703125, 291.22015380859375, 304.18914794921875, 317.1580810546875, 330.1270446777344, 343.09600830078125, 356.0649719238281, 369.033935546875, 382.0028991699219, 394.97186279296875, 407.9407958984375, 420.9097900390625]}, "gradients/decoder.transformer.h.1.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 3.0, 5.0, 4.0, 5.0, 4.0, 16.0, 15.0, 6.0, 19.0, 15.0, 15.0, 15.0, 28.0, 39.0, 27.0, 21.0, 33.0, 49.0, 37.0, 41.0, 35.0, 46.0, 44.0, 41.0, 43.0, 33.0, 42.0, 41.0, 29.0, 37.0, 29.0, 25.0, 30.0, 23.0, 18.0, 15.0, 16.0, 10.0, 6.0, 16.0, 7.0, 9.0, 4.0, 8.0, 3.0, 1.0, 0.0, 1.0, 4.0, 0.0, 1.0, 2.0, 1.0, 1.0, 1.0], "bins": [-67.0625, -64.95703125, -62.8515625, -60.74609375, -58.640625, -56.53515625, -54.4296875, -52.32421875, -50.21875, -48.11328125, -46.0078125, -43.90234375, -41.796875, -39.69140625, -37.5859375, -35.48046875, -33.375, -31.26953125, -29.1640625, -27.05859375, -24.953125, -22.84765625, -20.7421875, -18.63671875, -16.53125, -14.42578125, -12.3203125, -10.21484375, -8.109375, -6.00390625, -3.8984375, -1.79296875, 0.3125, 2.41796875, 4.5234375, 6.62890625, 8.734375, 10.83984375, 12.9453125, 15.05078125, 17.15625, 19.26171875, 21.3671875, 23.47265625, 25.578125, 27.68359375, 29.7890625, 31.89453125, 34.0, 36.10546875, 38.2109375, 40.31640625, 42.421875, 44.52734375, 46.6328125, 48.73828125, 50.84375, 52.94921875, 55.0546875, 57.16015625, 59.265625, 61.37109375, 63.4765625, 65.58203125, 67.6875]}, "gradients/decoder.transformer.h.1.crossattention.c_proj.weight": {"_type": "histogram", "values": [3.0, 1.0, 4.0, 2.0, 8.0, 6.0, 10.0, 15.0, 21.0, 21.0, 40.0, 56.0, 60.0, 101.0, 158.0, 234.0, 332.0, 471.0, 689.0, 969.0, 1452.0, 2092.0, 3253.0, 4791.0, 7091.0, 10755.0, 16382.0, 25364.0, 40396.0, 66863.0, 117122.0, 298566.0, 198723.0, 96647.0, 56241.0, 34524.0, 22358.0, 14071.0, 9444.0, 6329.0, 4155.0, 2875.0, 1752.0, 1304.0, 844.0, 623.0, 390.0, 281.0, 205.0, 151.0, 87.0, 76.0, 56.0, 33.0, 28.0, 12.0, 9.0, 11.0, 4.0, 4.0, 5.0, 1.0, 4.0, 1.0], "bins": [-2.494140625, -2.415802001953125, -2.33746337890625, -2.259124755859375, -2.1807861328125, -2.102447509765625, -2.02410888671875, -1.945770263671875, -1.867431640625, -1.789093017578125, -1.71075439453125, -1.632415771484375, -1.5540771484375, -1.475738525390625, -1.39739990234375, -1.319061279296875, -1.24072265625, -1.162384033203125, -1.08404541015625, -1.005706787109375, -0.9273681640625, -0.849029541015625, -0.77069091796875, -0.692352294921875, -0.614013671875, -0.535675048828125, -0.45733642578125, -0.378997802734375, -0.3006591796875, -0.222320556640625, -0.14398193359375, -0.065643310546875, 0.0126953125, 0.091033935546875, 0.16937255859375, 0.247711181640625, 0.3260498046875, 0.404388427734375, 0.48272705078125, 0.561065673828125, 0.639404296875, 0.717742919921875, 0.79608154296875, 0.874420166015625, 0.9527587890625, 1.031097412109375, 1.10943603515625, 1.187774658203125, 1.26611328125, 1.344451904296875, 1.42279052734375, 1.501129150390625, 1.5794677734375, 1.657806396484375, 1.73614501953125, 1.814483642578125, 1.892822265625, 1.971160888671875, 2.04949951171875, 2.127838134765625, 2.2061767578125, 2.284515380859375, 2.36285400390625, 2.441192626953125, 2.51953125]}, "gradients/decoder.transformer.h.1.crossattention.c_attn.bias": {"_type": "histogram", "values": [2.0, 4.0, 0.0, 3.0, 2.0, 2.0, 2.0, 4.0, 8.0, 5.0, 7.0, 5.0, 8.0, 12.0, 17.0, 15.0, 11.0, 14.0, 25.0, 21.0, 31.0, 29.0, 35.0, 38.0, 25.0, 40.0, 36.0, 32.0, 33.0, 33.0, 1065.0, 29.0, 41.0, 36.0, 38.0, 34.0, 38.0, 27.0, 23.0, 34.0, 31.0, 29.0, 20.0, 12.0, 20.0, 14.0, 9.0, 6.0, 10.0, 7.0, 4.0, 4.0, 4.0, 5.0, 3.0, 1.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-39.03125, -37.744140625, -36.45703125, -35.169921875, -33.8828125, -32.595703125, -31.30859375, -30.021484375, -28.734375, -27.447265625, -26.16015625, -24.873046875, -23.5859375, -22.298828125, -21.01171875, -19.724609375, -18.4375, -17.150390625, -15.86328125, -14.576171875, -13.2890625, -12.001953125, -10.71484375, -9.427734375, -8.140625, -6.853515625, -5.56640625, -4.279296875, -2.9921875, -1.705078125, -0.41796875, 0.869140625, 2.15625, 3.443359375, 4.73046875, 6.017578125, 7.3046875, 8.591796875, 9.87890625, 11.166015625, 12.453125, 13.740234375, 15.02734375, 16.314453125, 17.6015625, 18.888671875, 20.17578125, 21.462890625, 22.75, 24.037109375, 25.32421875, 26.611328125, 27.8984375, 29.185546875, 30.47265625, 31.759765625, 33.046875, 34.333984375, 35.62109375, 36.908203125, 38.1953125, 39.482421875, 40.76953125, 42.056640625, 43.34375]}, "gradients/decoder.transformer.h.1.crossattention.c_attn.weight": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 2.0, 5.0, 9.0, 12.0, 12.0, 26.0, 26.0, 45.0, 66.0, 77.0, 143.0, 168.0, 267.0, 323.0, 497.0, 744.0, 992.0, 1457.0, 2142.0, 3037.0, 4494.0, 6501.0, 9906.0, 14798.0, 22825.0, 34852.0, 54573.0, 89333.0, 161605.0, 1344669.0, 130111.0, 75586.0, 47272.0, 30339.0, 19637.0, 12977.0, 8718.0, 5864.0, 4023.0, 2733.0, 1920.0, 1308.0, 940.0, 632.0, 443.0, 308.0, 224.0, 138.0, 98.0, 75.0, 71.0, 39.0, 27.0, 19.0, 12.0, 8.0, 9.0, 8.0, 2.0, 1.0, 1.0], "bins": [-1.775390625, -1.720489501953125, -1.66558837890625, -1.610687255859375, -1.5557861328125, -1.500885009765625, -1.44598388671875, -1.391082763671875, -1.336181640625, -1.281280517578125, -1.22637939453125, -1.171478271484375, -1.1165771484375, -1.061676025390625, -1.00677490234375, -0.951873779296875, -0.89697265625, -0.842071533203125, -0.78717041015625, -0.732269287109375, -0.6773681640625, -0.622467041015625, -0.56756591796875, -0.512664794921875, -0.457763671875, -0.402862548828125, -0.34796142578125, -0.293060302734375, -0.2381591796875, -0.183258056640625, -0.12835693359375, -0.073455810546875, -0.0185546875, 0.036346435546875, 0.09124755859375, 0.146148681640625, 0.2010498046875, 0.255950927734375, 0.31085205078125, 0.365753173828125, 0.420654296875, 0.475555419921875, 0.53045654296875, 0.585357666015625, 0.6402587890625, 0.695159912109375, 0.75006103515625, 0.804962158203125, 0.85986328125, 0.914764404296875, 0.96966552734375, 1.024566650390625, 1.0794677734375, 1.134368896484375, 1.18927001953125, 1.244171142578125, 1.299072265625, 1.353973388671875, 1.40887451171875, 1.463775634765625, 1.5186767578125, 1.573577880859375, 1.62847900390625, 1.683380126953125, 1.73828125]}, "gradients/decoder.transformer.h.1.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 4.0, 3.0, 4.0, 0.0, 2.0, 2.0, 5.0, 7.0, 5.0, 8.0, 7.0, 9.0, 17.0, 11.0, 16.0, 19.0, 29.0, 26.0, 60.0, 90.0, 155.0, 171.0, 105.0, 62.0, 32.0, 20.0, 25.0, 24.0, 17.0, 9.0, 12.0, 13.0, 13.0, 8.0, 5.0, 5.0, 3.0, 3.0, 3.0, 1.0, 1.0, 5.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00014853477478027344, -0.0001437664031982422, -0.00013899803161621094, -0.0001342296600341797, -0.00012946128845214844, -0.0001246929168701172, -0.00011992454528808594, -0.00011515617370605469, -0.00011038780212402344, -0.00010561943054199219, -0.00010085105895996094, -9.608268737792969e-05, -9.131431579589844e-05, -8.654594421386719e-05, -8.177757263183594e-05, -7.700920104980469e-05, -7.224082946777344e-05, -6.747245788574219e-05, -6.270408630371094e-05, -5.793571472167969e-05, -5.316734313964844e-05, -4.839897155761719e-05, -4.363059997558594e-05, -3.886222839355469e-05, -3.409385681152344e-05, -2.9325485229492188e-05, -2.4557113647460938e-05, -1.9788742065429688e-05, -1.5020370483398438e-05, -1.0251998901367188e-05, -5.4836273193359375e-06, -7.152557373046875e-07, 4.0531158447265625e-06, 8.821487426757812e-06, 1.3589859008789062e-05, 1.8358230590820312e-05, 2.3126602172851562e-05, 2.7894973754882812e-05, 3.266334533691406e-05, 3.743171691894531e-05, 4.220008850097656e-05, 4.696846008300781e-05, 5.173683166503906e-05, 5.650520324707031e-05, 6.127357482910156e-05, 6.604194641113281e-05, 7.081031799316406e-05, 7.557868957519531e-05, 8.034706115722656e-05, 8.511543273925781e-05, 8.988380432128906e-05, 9.465217590332031e-05, 9.942054748535156e-05, 0.00010418891906738281, 0.00010895729064941406, 0.00011372566223144531, 0.00011849403381347656, 0.0001232624053955078, 0.00012803077697753906, 0.0001327991485595703, 0.00013756752014160156, 0.0001423358917236328, 0.00014710426330566406, 0.0001518726348876953, 0.00015664100646972656]}, "gradients/decoder.transformer.h.1.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 2.0, 1.0, 3.0, 0.0, 0.0, 4.0, 2.0, 4.0, 4.0, 6.0, 11.0, 6.0, 13.0, 12.0, 21.0, 20.0, 24.0, 32.0, 34.0, 47.0, 75.0, 83.0, 128.0, 247.0, 620.0, 6626.0, 842121.0, 193892.0, 3375.0, 483.0, 189.0, 130.0, 73.0, 63.0, 42.0, 35.0, 29.0, 15.0, 17.0, 16.0, 14.0, 4.0, 7.0, 6.0, 7.0, 8.0, 5.0, 4.0, 2.0, 1.0, 2.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 2.0], "bins": [-0.00232696533203125, -0.0022562146186828613, -0.0021854639053344727, -0.002114713191986084, -0.0020439624786376953, -0.0019732117652893066, -0.001902461051940918, -0.0018317103385925293, -0.0017609596252441406, -0.001690208911895752, -0.0016194581985473633, -0.0015487074851989746, -0.001477956771850586, -0.0014072060585021973, -0.0013364553451538086, -0.00126570463180542, -0.0011949539184570312, -0.0011242032051086426, -0.001053452491760254, -0.0009827017784118652, -0.0009119510650634766, -0.0008412003517150879, -0.0007704496383666992, -0.0006996989250183105, -0.0006289482116699219, -0.0005581974983215332, -0.00048744678497314453, -0.00041669607162475586, -0.0003459453582763672, -0.0002751946449279785, -0.00020444393157958984, -0.00013369321823120117, -6.29425048828125e-05, 7.808208465576172e-06, 7.855892181396484e-05, 0.00014930963516235352, 0.0002200603485107422, 0.00029081106185913086, 0.00036156177520751953, 0.0004323124885559082, 0.0005030632019042969, 0.0005738139152526855, 0.0006445646286010742, 0.0007153153419494629, 0.0007860660552978516, 0.0008568167686462402, 0.0009275674819946289, 0.0009983181953430176, 0.0010690689086914062, 0.001139819622039795, 0.0012105703353881836, 0.0012813210487365723, 0.001352071762084961, 0.0014228224754333496, 0.0014935731887817383, 0.001564323902130127, 0.0016350746154785156, 0.0017058253288269043, 0.001776576042175293, 0.0018473267555236816, 0.0019180774688720703, 0.001988828182220459, 0.0020595788955688477, 0.0021303296089172363, 0.002201080322265625]}, "gradients/decoder.transformer.h.1.ln_cross_attn.weight": {"_type": "histogram", "values": [4.0, 1.0, 0.0, 1.0, 5.0, 10.0, 31.0, 109.0, 425.0, 333.0, 73.0, 21.0, 8.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-9.037957352120429e-05, -8.015183266252279e-05, -6.992409180384129e-05, -5.969635822111741e-05, -4.946861736243591e-05, -3.924087650375441e-05, -2.901314292103052e-05, -1.878540206234902e-05, -8.557661203667521e-06, 1.670077836024575e-06, 1.1897816875716671e-05, 2.2125554096419364e-05, 3.2353294955100864e-05, 4.2581035813782364e-05, 5.280876939650625e-05, 6.303651025518775e-05, 7.326425111386925e-05, 8.349199197255075e-05, 9.371973283123225e-05, 0.00010394746641395614, 0.00011417520727263764, 0.00012440295540727675, 0.00013463068171404302, 0.00014485842257272452, 0.00015508616343140602, 0.00016531390429008752, 0.00017554164514876902, 0.0001857693714555353, 0.00019599712686613202, 0.0002062248531728983, 0.0002164525940315798, 0.0002266803348902613, 0.00023690809030085802, 0.0002471358166076243, 0.000257363572018221, 0.0002675912983249873, 0.000277819053735584, 0.0002880467800423503, 0.00029827450634911656, 0.0003085022617597133, 0.00031873001717031, 0.0003289577434770763, 0.000339185498887673, 0.0003494132251944393, 0.000359640980605036, 0.0003698687069118023, 0.00038009643321856856, 0.0003903241886291653, 0.00040055191493593156, 0.00041077964124269783, 0.00042100739665329456, 0.00043123512296006083, 0.00044146287837065756, 0.00045169060467742383, 0.00046191836008802056, 0.00047214608639478683, 0.0004823738127015531, 0.0004926015390083194, 0.0005028292653150856, 0.0005130570498295128, 0.0005232847761362791, 0.0005335125024430454, 0.0005437402287498116, 0.0005539680132642388, 0.0005641957395710051]}, "gradients/decoder.transformer.h.1.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 2.0, 2.0, 4.0, 2.0, 1.0, 1.0, 4.0, 9.0, 2.0, 8.0, 7.0, 10.0, 11.0, 10.0, 13.0, 18.0, 12.0, 23.0, 21.0, 21.0, 33.0, 32.0, 28.0, 26.0, 46.0, 38.0, 34.0, 36.0, 41.0, 45.0, 28.0, 39.0, 39.0, 29.0, 27.0, 27.0, 30.0, 36.0, 37.0, 18.0, 28.0, 27.0, 17.0, 24.0, 14.0, 12.0, 7.0, 10.0, 5.0, 4.0, 2.0, 3.0, 6.0, 1.0, 4.0, 1.0, 5.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-5.412101745605469e-05, -5.235057324171066e-05, -5.058012902736664e-05, -4.8809684813022614e-05, -4.703924059867859e-05, -4.5268796384334564e-05, -4.349835216999054e-05, -4.1727907955646515e-05, -3.995746374130249e-05, -3.8187019526958466e-05, -3.641657531261444e-05, -3.4646131098270416e-05, -3.287568688392639e-05, -3.110524266958237e-05, -2.9334798455238342e-05, -2.7564354240894318e-05, -2.5793910026550293e-05, -2.402346581220627e-05, -2.2253021597862244e-05, -2.048257738351822e-05, -1.8712133169174194e-05, -1.694168895483017e-05, -1.5171244740486145e-05, -1.340080052614212e-05, -1.1630356311798096e-05, -9.859912097454071e-06, -8.089467883110046e-06, -6.319023668766022e-06, -4.548579454421997e-06, -2.7781352400779724e-06, -1.0076910257339478e-06, 7.627531886100769e-07, 2.5331974029541016e-06, 4.303641617298126e-06, 6.074085831642151e-06, 7.844530045986176e-06, 9.6149742603302e-06, 1.1385418474674225e-05, 1.315586268901825e-05, 1.4926306903362274e-05, 1.66967511177063e-05, 1.8467195332050323e-05, 2.0237639546394348e-05, 2.2008083760738373e-05, 2.3778527975082397e-05, 2.5548972189426422e-05, 2.7319416403770447e-05, 2.908986061811447e-05, 3.0860304832458496e-05, 3.263074904680252e-05, 3.4401193261146545e-05, 3.617163747549057e-05, 3.7942081689834595e-05, 3.971252590417862e-05, 4.1482970118522644e-05, 4.325341433286667e-05, 4.502385854721069e-05, 4.679430276155472e-05, 4.856474697589874e-05, 5.033519119024277e-05, 5.210563540458679e-05, 5.387607961893082e-05, 5.564652383327484e-05, 5.7416968047618866e-05, 5.918741226196289e-05]}, "gradients/decoder.transformer.h.1.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 3.0, 5.0, 4.0, 5.0, 4.0, 16.0, 15.0, 6.0, 19.0, 15.0, 15.0, 15.0, 28.0, 39.0, 27.0, 21.0, 33.0, 49.0, 37.0, 41.0, 35.0, 46.0, 44.0, 41.0, 43.0, 33.0, 42.0, 41.0, 29.0, 37.0, 29.0, 25.0, 30.0, 23.0, 18.0, 15.0, 16.0, 10.0, 6.0, 16.0, 7.0, 9.0, 4.0, 8.0, 3.0, 1.0, 0.0, 1.0, 4.0, 0.0, 1.0, 2.0, 1.0, 1.0, 1.0], "bins": [-67.0625, -64.95703125, -62.8515625, -60.74609375, -58.640625, -56.53515625, -54.4296875, -52.32421875, -50.21875, -48.11328125, -46.0078125, -43.90234375, -41.796875, -39.69140625, -37.5859375, -35.48046875, -33.375, -31.26953125, -29.1640625, -27.05859375, -24.953125, -22.84765625, -20.7421875, -18.63671875, -16.53125, -14.42578125, -12.3203125, -10.21484375, -8.109375, -6.00390625, -3.8984375, -1.79296875, 0.3125, 2.41796875, 4.5234375, 6.62890625, 8.734375, 10.83984375, 12.9453125, 15.05078125, 17.15625, 19.26171875, 21.3671875, 23.47265625, 25.578125, 27.68359375, 29.7890625, 31.89453125, 34.0, 36.10546875, 38.2109375, 40.31640625, 42.421875, 44.52734375, 46.6328125, 48.73828125, 50.84375, 52.94921875, 55.0546875, 57.16015625, 59.265625, 61.37109375, 63.4765625, 65.58203125, 67.6875]}, "gradients/decoder.transformer.h.1.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 2.0, 2.0, 3.0, 1.0, 4.0, 2.0, 10.0, 10.0, 9.0, 21.0, 33.0, 39.0, 72.0, 79.0, 119.0, 196.0, 213.0, 365.0, 496.0, 835.0, 1164.0, 1821.0, 2988.0, 5188.0, 9718.0, 21885.0, 58892.0, 222395.0, 514626.0, 131692.0, 40514.0, 16045.0, 7648.0, 4280.0, 2400.0, 1586.0, 1056.0, 661.0, 458.0, 300.0, 204.0, 151.0, 106.0, 81.0, 55.0, 40.0, 27.0, 23.0, 14.0, 12.0, 11.0, 2.0, 2.0, 3.0, 8.0, 3.0, 0.0, 0.0, 3.0, 1.0, 0.0, 0.0, 1.0], "bins": [-89.6875, -86.62890625, -83.5703125, -80.51171875, -77.453125, -74.39453125, -71.3359375, -68.27734375, -65.21875, -62.16015625, -59.1015625, -56.04296875, -52.984375, -49.92578125, -46.8671875, -43.80859375, -40.75, -37.69140625, -34.6328125, -31.57421875, -28.515625, -25.45703125, -22.3984375, -19.33984375, -16.28125, -13.22265625, -10.1640625, -7.10546875, -4.046875, -0.98828125, 2.0703125, 5.12890625, 8.1875, 11.24609375, 14.3046875, 17.36328125, 20.421875, 23.48046875, 26.5390625, 29.59765625, 32.65625, 35.71484375, 38.7734375, 41.83203125, 44.890625, 47.94921875, 51.0078125, 54.06640625, 57.125, 60.18359375, 63.2421875, 66.30078125, 69.359375, 72.41796875, 75.4765625, 78.53515625, 81.59375, 84.65234375, 87.7109375, 90.76953125, 93.828125, 96.88671875, 99.9453125, 103.00390625, 106.0625]}, "gradients/decoder.transformer.h.1.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 1.0, 2.0, 3.0, 2.0, 4.0, 3.0, 3.0, 2.0, 10.0, 14.0, 18.0, 7.0, 15.0, 22.0, 37.0, 34.0, 48.0, 44.0, 53.0, 69.0, 79.0, 141.0, 347.0, 1466.0, 165.0, 82.0, 45.0, 65.0, 45.0, 35.0, 32.0, 39.0, 31.0, 21.0, 13.0, 17.0, 13.0, 8.0, 8.0, 3.0, 2.0, 2.0, 3.0, 2.0, 2.0, 1.0, 1.0, 3.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-160.0, -154.849609375, -149.69921875, -144.548828125, -139.3984375, -134.248046875, -129.09765625, -123.947265625, -118.796875, -113.646484375, -108.49609375, -103.345703125, -98.1953125, -93.044921875, -87.89453125, -82.744140625, -77.59375, -72.443359375, -67.29296875, -62.142578125, -56.9921875, -51.841796875, -46.69140625, -41.541015625, -36.390625, -31.240234375, -26.08984375, -20.939453125, -15.7890625, -10.638671875, -5.48828125, -0.337890625, 4.8125, 9.962890625, 15.11328125, 20.263671875, 25.4140625, 30.564453125, 35.71484375, 40.865234375, 46.015625, 51.166015625, 56.31640625, 61.466796875, 66.6171875, 71.767578125, 76.91796875, 82.068359375, 87.21875, 92.369140625, 97.51953125, 102.669921875, 107.8203125, 112.970703125, 118.12109375, 123.271484375, 128.421875, 133.572265625, 138.72265625, 143.873046875, 149.0234375, 154.173828125, 159.32421875, 164.474609375, 169.625]}, "gradients/decoder.transformer.h.1.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 3.0, 0.0, 4.0, 3.0, 3.0, 0.0, 1.0, 2.0, 1.0, 4.0, 7.0, 13.0, 13.0, 25.0, 30.0, 40.0, 47.0, 66.0, 83.0, 164.0, 333.0, 698.0, 3265.0, 116082.0, 3009123.0, 13312.0, 1359.0, 415.0, 215.0, 132.0, 80.0, 57.0, 37.0, 25.0, 15.0, 10.0, 19.0, 5.0, 7.0, 4.0, 5.0, 3.0, 1.0, 3.0, 1.0, 1.0, 2.0, 0.0, 0.0, 1.0, 3.0, 0.0, 0.0, 1.0, 1.0], "bins": [-428.75, -415.85546875, -402.9609375, -390.06640625, -377.171875, -364.27734375, -351.3828125, -338.48828125, -325.59375, -312.69921875, -299.8046875, -286.91015625, -274.015625, -261.12109375, -248.2265625, -235.33203125, -222.4375, -209.54296875, -196.6484375, -183.75390625, -170.859375, -157.96484375, -145.0703125, -132.17578125, -119.28125, -106.38671875, -93.4921875, -80.59765625, -67.703125, -54.80859375, -41.9140625, -29.01953125, -16.125, -3.23046875, 9.6640625, 22.55859375, 35.453125, 48.34765625, 61.2421875, 74.13671875, 87.03125, 99.92578125, 112.8203125, 125.71484375, 138.609375, 151.50390625, 164.3984375, 177.29296875, 190.1875, 203.08203125, 215.9765625, 228.87109375, 241.765625, 254.66015625, 267.5546875, 280.44921875, 293.34375, 306.23828125, 319.1328125, 332.02734375, 344.921875, 357.81640625, 370.7109375, 383.60546875, 396.5]}, "gradients/decoder.transformer.h.1.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 10.0, 352.0, 631.0, 22.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3949.839599609375, -3846.263427734375, -3742.68701171875, -3639.11083984375, -3535.53466796875, -3431.95849609375, -3328.38232421875, -3224.805908203125, -3121.229736328125, -3017.653564453125, -2914.0771484375, -2810.5009765625, -2706.9248046875, -2603.3486328125, -2499.7724609375, -2396.196044921875, -2292.619873046875, -2189.043701171875, -2085.46728515625, -1981.89111328125, -1878.31494140625, -1774.73876953125, -1671.1624755859375, -1567.586181640625, -1464.010009765625, -1360.433837890625, -1256.8575439453125, -1153.28125, -1049.705078125, -946.1288452148438, -842.5526123046875, -738.9763793945312, -635.39990234375, -531.8236694335938, -428.2474365234375, -324.67120361328125, -221.094970703125, -117.51873779296875, -13.9425048828125, 89.63372802734375, 193.2099609375, 296.78619384765625, 400.3624267578125, 503.93865966796875, 607.514892578125, 711.0911254882812, 814.6673583984375, 918.2435913085938, 1021.81982421875, 1125.39599609375, 1228.9722900390625, 1332.548583984375, 1436.124755859375, 1539.700927734375, 1643.2772216796875, 1746.853515625, 1850.4296875, 1954.005859375, 2057.58203125, 2161.158447265625, 2264.734619140625, 2368.310791015625, 2471.88720703125, 2575.46337890625, 2679.03955078125]}, "gradients/decoder.transformer.h.1.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 2.0, 1.0, 1.0, 3.0, 3.0, 4.0, 4.0, 10.0, 8.0, 8.0, 14.0, 8.0, 8.0, 20.0, 27.0, 21.0, 31.0, 36.0, 25.0, 31.0, 35.0, 30.0, 38.0, 38.0, 45.0, 33.0, 35.0, 48.0, 54.0, 37.0, 30.0, 29.0, 39.0, 38.0, 27.0, 21.0, 15.0, 16.0, 16.0, 28.0, 20.0, 21.0, 10.0, 15.0, 7.0, 4.0, 5.0, 7.0, 6.0, 2.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0], "bins": [-396.3738708496094, -385.3954162597656, -374.4169616699219, -363.4384765625, -352.46002197265625, -341.4815673828125, -330.50311279296875, -319.524658203125, -308.54620361328125, -297.5677490234375, -286.58929443359375, -275.61083984375, -264.6323547363281, -253.65390014648438, -242.67544555664062, -231.69699096679688, -220.718505859375, -209.74005126953125, -198.76158142089844, -187.7831268310547, -176.80465698242188, -165.82620239257812, -154.84774780273438, -143.86929321289062, -132.8908233642578, -121.91236114501953, -110.93389892578125, -99.9554443359375, -88.97698211669922, -77.99851989746094, -67.02006530761719, -56.041603088378906, -45.0631103515625, -34.08464813232422, -23.106189727783203, -12.127731323242188, -1.1492691040039062, 9.829193115234375, 20.807647705078125, 31.786109924316406, 42.76457214355469, 53.74303436279297, 64.72149658203125, 75.699951171875, 86.67841339111328, 97.65687561035156, 108.63533020019531, 119.6137924194336, 130.59225463867188, 141.57070922851562, 152.54917907714844, 163.5276336669922, 174.506103515625, 185.48455810546875, 196.4630126953125, 207.44146728515625, 218.41993713378906, 229.3983917236328, 240.37686157226562, 251.35531616210938, 262.3337707519531, 273.312255859375, 284.29071044921875, 295.2691650390625, 306.24761962890625]}, "gradients/decoder.transformer.h.0.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 2.0, 5.0, 1.0, 0.0, 3.0, 6.0, 11.0, 5.0, 11.0, 14.0, 4.0, 7.0, 23.0, 16.0, 27.0, 22.0, 25.0, 27.0, 40.0, 44.0, 46.0, 39.0, 40.0, 34.0, 44.0, 23.0, 45.0, 40.0, 38.0, 30.0, 43.0, 35.0, 33.0, 38.0, 27.0, 19.0, 27.0, 20.0, 20.0, 10.0, 8.0, 9.0, 10.0, 10.0, 10.0, 3.0, 9.0, 2.0, 3.0, 0.0, 3.0, 3.0, 0.0, 0.0, 2.0, 1.0, 3.0], "bins": [-74.75, -72.4638671875, -70.177734375, -67.8916015625, -65.60546875, -63.3193359375, -61.033203125, -58.7470703125, -56.4609375, -54.1748046875, -51.888671875, -49.6025390625, -47.31640625, -45.0302734375, -42.744140625, -40.4580078125, -38.171875, -35.8857421875, -33.599609375, -31.3134765625, -29.02734375, -26.7412109375, -24.455078125, -22.1689453125, -19.8828125, -17.5966796875, -15.310546875, -13.0244140625, -10.73828125, -8.4521484375, -6.166015625, -3.8798828125, -1.59375, 0.6923828125, 2.978515625, 5.2646484375, 7.55078125, 9.8369140625, 12.123046875, 14.4091796875, 16.6953125, 18.9814453125, 21.267578125, 23.5537109375, 25.83984375, 28.1259765625, 30.412109375, 32.6982421875, 34.984375, 37.2705078125, 39.556640625, 41.8427734375, 44.12890625, 46.4150390625, 48.701171875, 50.9873046875, 53.2734375, 55.5595703125, 57.845703125, 60.1318359375, 62.41796875, 64.7041015625, 66.990234375, 69.2763671875, 71.5625]}, "gradients/decoder.transformer.h.0.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 1.0, 2.0, 6.0, 1.0, 8.0, 10.0, 12.0, 13.0, 10.0, 21.0, 18.0, 42.0, 43.0, 70.0, 97.0, 131.0, 193.0, 283.0, 484.0, 935.0, 1777.0, 3950.0, 9424.0, 27246.0, 137014.0, 3062775.0, 864917.0, 56348.0, 16562.0, 6066.0, 2626.0, 1282.0, 641.0, 401.0, 248.0, 191.0, 127.0, 78.0, 56.0, 40.0, 32.0, 19.0, 28.0, 20.0, 11.0, 10.0, 11.0, 2.0, 3.0, 2.0, 1.0, 4.0, 2.0, 1.0, 2.0, 1.0, 1.0, 1.0], "bins": [-338.25, -327.6015625, -316.953125, -306.3046875, -295.65625, -285.0078125, -274.359375, -263.7109375, -253.0625, -242.4140625, -231.765625, -221.1171875, -210.46875, -199.8203125, -189.171875, -178.5234375, -167.875, -157.2265625, -146.578125, -135.9296875, -125.28125, -114.6328125, -103.984375, -93.3359375, -82.6875, -72.0390625, -61.390625, -50.7421875, -40.09375, -29.4453125, -18.796875, -8.1484375, 2.5, 13.1484375, 23.796875, 34.4453125, 45.09375, 55.7421875, 66.390625, 77.0390625, 87.6875, 98.3359375, 108.984375, 119.6328125, 130.28125, 140.9296875, 151.578125, 162.2265625, 172.875, 183.5234375, 194.171875, 204.8203125, 215.46875, 226.1171875, 236.765625, 247.4140625, 258.0625, 268.7109375, 279.359375, 290.0078125, 300.65625, 311.3046875, 321.953125, 332.6015625, 343.25]}, "gradients/decoder.transformer.h.0.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 5.0, 2.0, 3.0, 5.0, 4.0, 4.0, 6.0, 8.0, 9.0, 19.0, 17.0, 18.0, 26.0, 23.0, 39.0, 51.0, 59.0, 72.0, 115.0, 152.0, 191.0, 307.0, 450.0, 646.0, 581.0, 382.0, 218.0, 152.0, 118.0, 74.0, 71.0, 56.0, 40.0, 32.0, 23.0, 18.0, 23.0, 13.0, 11.0, 8.0, 6.0, 6.0, 1.0, 4.0, 6.0, 3.0, 5.0, 2.0, 1.0, 0.0, 0.0, 0.0, 2.0], "bins": [-183.625, -178.345703125, -173.06640625, -167.787109375, -162.5078125, -157.228515625, -151.94921875, -146.669921875, -141.390625, -136.111328125, -130.83203125, -125.552734375, -120.2734375, -114.994140625, -109.71484375, -104.435546875, -99.15625, -93.876953125, -88.59765625, -83.318359375, -78.0390625, -72.759765625, -67.48046875, -62.201171875, -56.921875, -51.642578125, -46.36328125, -41.083984375, -35.8046875, -30.525390625, -25.24609375, -19.966796875, -14.6875, -9.408203125, -4.12890625, 1.150390625, 6.4296875, 11.708984375, 16.98828125, 22.267578125, 27.546875, 32.826171875, 38.10546875, 43.384765625, 48.6640625, 53.943359375, 59.22265625, 64.501953125, 69.78125, 75.060546875, 80.33984375, 85.619140625, 90.8984375, 96.177734375, 101.45703125, 106.736328125, 112.015625, 117.294921875, 122.57421875, 127.853515625, 133.1328125, 138.412109375, 143.69140625, 148.970703125, 154.25]}, "gradients/decoder.transformer.h.0.mlp.c_fc.weight": {"_type": "histogram", "values": [5.0, 3.0, 7.0, 5.0, 9.0, 8.0, 18.0, 30.0, 25.0, 55.0, 68.0, 91.0, 122.0, 221.0, 308.0, 413.0, 699.0, 1005.0, 1550.0, 2385.0, 3946.0, 6259.0, 10601.0, 18297.0, 34337.0, 70895.0, 171551.0, 618460.0, 2416205.0, 536548.0, 157323.0, 66326.0, 32393.0, 17702.0, 10020.0, 6052.0, 3589.0, 2345.0, 1490.0, 954.0, 642.0, 426.0, 279.0, 203.0, 138.0, 96.0, 63.0, 42.0, 24.0, 21.0, 13.0, 14.0, 9.0, 4.0, 2.0, 1.0, 1.0, 1.0, 1.0, 3.0, 0.0, 0.0, 0.0, 1.0], "bins": [-104.6875, -101.0087890625, -97.330078125, -93.6513671875, -89.97265625, -86.2939453125, -82.615234375, -78.9365234375, -75.2578125, -71.5791015625, -67.900390625, -64.2216796875, -60.54296875, -56.8642578125, -53.185546875, -49.5068359375, -45.828125, -42.1494140625, -38.470703125, -34.7919921875, -31.11328125, -27.4345703125, -23.755859375, -20.0771484375, -16.3984375, -12.7197265625, -9.041015625, -5.3623046875, -1.68359375, 1.9951171875, 5.673828125, 9.3525390625, 13.03125, 16.7099609375, 20.388671875, 24.0673828125, 27.74609375, 31.4248046875, 35.103515625, 38.7822265625, 42.4609375, 46.1396484375, 49.818359375, 53.4970703125, 57.17578125, 60.8544921875, 64.533203125, 68.2119140625, 71.890625, 75.5693359375, 79.248046875, 82.9267578125, 86.60546875, 90.2841796875, 93.962890625, 97.6416015625, 101.3203125, 104.9990234375, 108.677734375, 112.3564453125, 116.03515625, 119.7138671875, 123.392578125, 127.0712890625, 130.75]}, "gradients/decoder.transformer.h.0.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 2.0, 1.0, 2.0, 6.0, 8.0, 7.0, 8.0, 7.0, 10.0, 11.0, 32.0, 29.0, 44.0, 55.0, 71.0, 123.0, 131.0, 133.0, 100.0, 70.0, 55.0, 26.0, 24.0, 22.0, 13.0, 7.0, 6.0, 7.0, 1.0, 1.0, 1.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1723.2225341796875, -1669.1153564453125, -1615.0081787109375, -1560.9010009765625, -1506.7939453125, -1452.686767578125, -1398.57958984375, -1344.472412109375, -1290.365234375, -1236.258056640625, -1182.15087890625, -1128.043701171875, -1073.9365234375, -1019.8294067382812, -965.7222900390625, -911.6151123046875, -857.5079345703125, -803.4007568359375, -749.2935791015625, -695.1864624023438, -641.0792846679688, -586.9721069335938, -532.864990234375, -478.7578125, -424.650634765625, -370.54345703125, -316.4363098144531, -262.32916259765625, -208.22198486328125, -154.11480712890625, -100.00765991210938, -45.9005126953125, 8.20654296875, 62.31370544433594, 116.42086791992188, 170.5280303955078, 224.63519287109375, 278.74237060546875, 332.8495178222656, 386.9566650390625, 441.0638427734375, 495.1710205078125, 549.2781982421875, 603.3853149414062, 657.4924926757812, 711.5996704101562, 765.706787109375, 819.81396484375, 873.921142578125, 928.0283203125, 982.135498046875, 1036.24267578125, 1090.349853515625, 1144.45703125, 1198.5640869140625, 1252.6712646484375, 1306.7784423828125, 1360.8856201171875, 1414.9927978515625, 1469.0999755859375, 1523.20703125, 1577.314208984375, 1631.42138671875, 1685.528564453125, 1739.6357421875]}, "gradients/decoder.transformer.h.0.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 2.0, 1.0, 2.0, 2.0, 1.0, 6.0, 1.0, 9.0, 5.0, 5.0, 10.0, 12.0, 16.0, 16.0, 11.0, 19.0, 20.0, 24.0, 33.0, 27.0, 24.0, 32.0, 48.0, 34.0, 44.0, 32.0, 41.0, 39.0, 40.0, 37.0, 35.0, 50.0, 40.0, 36.0, 45.0, 29.0, 31.0, 25.0, 19.0, 19.0, 18.0, 15.0, 13.0, 16.0, 7.0, 5.0, 3.0, 5.0, 2.0, 4.0, 3.0, 1.0, 1.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-807.42333984375, -781.0552368164062, -754.6870727539062, -728.3189697265625, -701.9508056640625, -675.5827026367188, -649.214599609375, -622.846435546875, -596.4783325195312, -570.1102294921875, -543.7420654296875, -517.3739624023438, -491.0058288574219, -464.6376953125, -438.2695617675781, -411.90142822265625, -385.5332946777344, -359.1651611328125, -332.7970275878906, -306.42889404296875, -280.060791015625, -253.69265747070312, -227.32452392578125, -200.95640563964844, -174.58827209472656, -148.2201385498047, -121.85202026367188, -95.48388671875, -69.11576080322266, -42.74763488769531, -16.379501342773438, 9.988616943359375, 36.35675048828125, 62.724876403808594, 89.09300231933594, 115.46113586425781, 141.82925415039062, 168.1973876953125, 194.56552124023438, 220.9336395263672, 247.30177307128906, 273.6698913574219, 300.03802490234375, 326.4061584472656, 352.7742919921875, 379.14239501953125, 405.51055908203125, 431.878662109375, 458.2467956542969, 484.61492919921875, 510.9830627441406, 537.3511962890625, 563.7192993164062, 590.08740234375, 616.45556640625, 642.8236694335938, 669.1918334960938, 695.5599365234375, 721.9281005859375, 748.2962036132812, 774.6643676757812, 801.032470703125, 827.400634765625, 853.7687377929688, 880.1368408203125]}, "gradients/decoder.transformer.h.0.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 3.0, 0.0, 1.0, 3.0, 4.0, 5.0, 5.0, 6.0, 5.0, 8.0, 14.0, 16.0, 12.0, 16.0, 18.0, 23.0, 16.0, 27.0, 36.0, 32.0, 31.0, 46.0, 37.0, 44.0, 52.0, 38.0, 55.0, 37.0, 47.0, 43.0, 46.0, 45.0, 33.0, 30.0, 33.0, 24.0, 20.0, 20.0, 11.0, 23.0, 9.0, 15.0, 9.0, 4.0, 2.0, 3.0, 4.0, 2.0, 0.0, 1.0, 2.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-1396.0, -1347.921875, -1299.84375, -1251.765625, -1203.6875, -1155.609375, -1107.53125, -1059.453125, -1011.375, -963.296875, -915.21875, -867.140625, -819.0625, -770.984375, -722.90625, -674.828125, -626.75, -578.671875, -530.59375, -482.515625, -434.4375, -386.359375, -338.28125, -290.203125, -242.125, -194.046875, -145.96875, -97.890625, -49.8125, -1.734375, 46.34375, 94.421875, 142.5, 190.578125, 238.65625, 286.734375, 334.8125, 382.890625, 430.96875, 479.046875, 527.125, 575.203125, 623.28125, 671.359375, 719.4375, 767.515625, 815.59375, 863.671875, 911.75, 959.828125, 1007.90625, 1055.984375, 1104.0625, 1152.140625, 1200.21875, 1248.296875, 1296.375, 1344.453125, 1392.53125, 1440.609375, 1488.6875, 1536.765625, 1584.84375, 1632.921875, 1681.0]}, "gradients/decoder.transformer.h.0.crossattention.c_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 4.0, 3.0, 5.0, 2.0, 12.0, 20.0, 14.0, 49.0, 46.0, 99.0, 114.0, 209.0, 268.0, 393.0, 578.0, 840.0, 1249.0, 1813.0, 2874.0, 4522.0, 7191.0, 11476.0, 19007.0, 32133.0, 56554.0, 106892.0, 252819.0, 291720.0, 112483.0, 58952.0, 33471.0, 19731.0, 11999.0, 7448.0, 4713.0, 3006.0, 2028.0, 1338.0, 852.0, 512.0, 353.0, 223.0, 157.0, 117.0, 73.0, 51.0, 45.0, 38.0, 21.0, 16.0, 14.0, 7.0, 7.0, 5.0, 3.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-71.0625, -68.615234375, -66.16796875, -63.720703125, -61.2734375, -58.826171875, -56.37890625, -53.931640625, -51.484375, -49.037109375, -46.58984375, -44.142578125, -41.6953125, -39.248046875, -36.80078125, -34.353515625, -31.90625, -29.458984375, -27.01171875, -24.564453125, -22.1171875, -19.669921875, -17.22265625, -14.775390625, -12.328125, -9.880859375, -7.43359375, -4.986328125, -2.5390625, -0.091796875, 2.35546875, 4.802734375, 7.25, 9.697265625, 12.14453125, 14.591796875, 17.0390625, 19.486328125, 21.93359375, 24.380859375, 26.828125, 29.275390625, 31.72265625, 34.169921875, 36.6171875, 39.064453125, 41.51171875, 43.958984375, 46.40625, 48.853515625, 51.30078125, 53.748046875, 56.1953125, 58.642578125, 61.08984375, 63.537109375, 65.984375, 68.431640625, 70.87890625, 73.326171875, 75.7734375, 78.220703125, 80.66796875, 83.115234375, 85.5625]}, "gradients/decoder.transformer.h.0.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 3.0, 3.0, 4.0, 8.0, 4.0, 4.0, 7.0, 8.0, 7.0, 12.0, 11.0, 12.0, 22.0, 23.0, 18.0, 23.0, 24.0, 34.0, 30.0, 37.0, 33.0, 40.0, 47.0, 36.0, 36.0, 1056.0, 37.0, 46.0, 43.0, 29.0, 40.0, 25.0, 36.0, 31.0, 30.0, 33.0, 24.0, 30.0, 15.0, 12.0, 16.0, 9.0, 8.0, 5.0, 6.0, 5.0, 1.0, 10.0, 0.0, 3.0, 3.0, 2.0, 0.0, 1.0, 3.0], "bins": [-969.0, -940.859375, -912.71875, -884.578125, -856.4375, -828.296875, -800.15625, -772.015625, -743.875, -715.734375, -687.59375, -659.453125, -631.3125, -603.171875, -575.03125, -546.890625, -518.75, -490.609375, -462.46875, -434.328125, -406.1875, -378.046875, -349.90625, -321.765625, -293.625, -265.484375, -237.34375, -209.203125, -181.0625, -152.921875, -124.78125, -96.640625, -68.5, -40.359375, -12.21875, 15.921875, 44.0625, 72.203125, 100.34375, 128.484375, 156.625, 184.765625, 212.90625, 241.046875, 269.1875, 297.328125, 325.46875, 353.609375, 381.75, 409.890625, 438.03125, 466.171875, 494.3125, 522.453125, 550.59375, 578.734375, 606.875, 635.015625, 663.15625, 691.296875, 719.4375, 747.578125, 775.71875, 803.859375, 832.0]}, "gradients/decoder.transformer.h.0.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 2.0, 5.0, 9.0, 4.0, 6.0, 23.0, 34.0, 37.0, 71.0, 76.0, 103.0, 170.0, 219.0, 333.0, 521.0, 733.0, 1060.0, 1641.0, 2302.0, 3386.0, 4906.0, 7667.0, 11711.0, 18227.0, 28462.0, 45206.0, 73490.0, 132971.0, 1357958.0, 169950.0, 88683.0, 52970.0, 32825.0, 20867.0, 13698.0, 8892.0, 5741.0, 3900.0, 2625.0, 1826.0, 1209.0, 801.0, 572.0, 347.0, 277.0, 199.0, 140.0, 88.0, 66.0, 48.0, 32.0, 22.0, 14.0, 5.0, 9.0, 7.0, 0.0, 0.0, 0.0, 1.0, 2.0], "bins": [-38.8125, -37.5869140625, -36.361328125, -35.1357421875, -33.91015625, -32.6845703125, -31.458984375, -30.2333984375, -29.0078125, -27.7822265625, -26.556640625, -25.3310546875, -24.10546875, -22.8798828125, -21.654296875, -20.4287109375, -19.203125, -17.9775390625, -16.751953125, -15.5263671875, -14.30078125, -13.0751953125, -11.849609375, -10.6240234375, -9.3984375, -8.1728515625, -6.947265625, -5.7216796875, -4.49609375, -3.2705078125, -2.044921875, -0.8193359375, 0.40625, 1.6318359375, 2.857421875, 4.0830078125, 5.30859375, 6.5341796875, 7.759765625, 8.9853515625, 10.2109375, 11.4365234375, 12.662109375, 13.8876953125, 15.11328125, 16.3388671875, 17.564453125, 18.7900390625, 20.015625, 21.2412109375, 22.466796875, 23.6923828125, 24.91796875, 26.1435546875, 27.369140625, 28.5947265625, 29.8203125, 31.0458984375, 32.271484375, 33.4970703125, 34.72265625, 35.9482421875, 37.173828125, 38.3994140625, 39.625]}, "gradients/decoder.transformer.h.0.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 2.0, 3.0, 3.0, 3.0, 4.0, 2.0, 4.0, 3.0, 5.0, 3.0, 7.0, 16.0, 11.0, 17.0, 22.0, 20.0, 27.0, 41.0, 25.0, 52.0, 86.0, 131.0, 140.0, 89.0, 57.0, 38.0, 43.0, 23.0, 27.0, 19.0, 13.0, 14.0, 11.0, 10.0, 5.0, 12.0, 9.0, 5.0, 3.0, 3.0, 3.0, 2.0, 0.0, 0.0, 1.0, 3.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0052490234375, -0.005040526390075684, -0.004832029342651367, -0.004623532295227051, -0.004415035247802734, -0.004206538200378418, -0.0039980411529541016, -0.003789544105529785, -0.0035810470581054688, -0.0033725500106811523, -0.003164052963256836, -0.0029555559158325195, -0.002747058868408203, -0.0025385618209838867, -0.0023300647735595703, -0.002121567726135254, -0.0019130706787109375, -0.001704573631286621, -0.0014960765838623047, -0.0012875795364379883, -0.0010790824890136719, -0.0008705854415893555, -0.0006620883941650391, -0.00045359134674072266, -0.00024509429931640625, -3.6597251892089844e-05, 0.00017189979553222656, 0.00038039684295654297, 0.0005888938903808594, 0.0007973909378051758, 0.0010058879852294922, 0.0012143850326538086, 0.001422882080078125, 0.0016313791275024414, 0.0018398761749267578, 0.0020483732223510742, 0.0022568702697753906, 0.002465367317199707, 0.0026738643646240234, 0.00288236141204834, 0.0030908584594726562, 0.0032993555068969727, 0.003507852554321289, 0.0037163496017456055, 0.003924846649169922, 0.004133343696594238, 0.004341840744018555, 0.004550337791442871, 0.0047588348388671875, 0.004967331886291504, 0.00517582893371582, 0.005384325981140137, 0.005592823028564453, 0.0058013200759887695, 0.006009817123413086, 0.006218314170837402, 0.006426811218261719, 0.006635308265686035, 0.0068438053131103516, 0.007052302360534668, 0.007260799407958984, 0.007469296455383301, 0.007677793502807617, 0.007886290550231934, 0.00809478759765625]}, "gradients/decoder.transformer.h.0.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 1.0, 2.0, 1.0, 3.0, 2.0, 2.0, 4.0, 3.0, 6.0, 7.0, 11.0, 9.0, 12.0, 26.0, 39.0, 61.0, 95.0, 129.0, 210.0, 317.0, 489.0, 815.0, 1336.0, 2238.0, 3819.0, 7210.0, 14145.0, 29843.0, 72721.0, 300976.0, 463580.0, 83654.0, 33265.0, 15344.0, 7681.0, 4175.0, 2400.0, 1528.0, 878.0, 568.0, 325.0, 186.0, 139.0, 89.0, 61.0, 53.0, 29.0, 23.0, 20.0, 17.0, 9.0, 5.0, 2.0, 4.0, 2.0, 2.0], "bins": [-0.03375244140625, -0.03286576271057129, -0.03197908401489258, -0.031092405319213867, -0.030205726623535156, -0.029319047927856445, -0.028432369232177734, -0.027545690536499023, -0.026659011840820312, -0.0257723331451416, -0.02488565444946289, -0.02399897575378418, -0.02311229705810547, -0.022225618362426758, -0.021338939666748047, -0.020452260971069336, -0.019565582275390625, -0.018678903579711914, -0.017792224884033203, -0.016905546188354492, -0.01601886749267578, -0.01513218879699707, -0.01424551010131836, -0.013358831405639648, -0.012472152709960938, -0.011585474014282227, -0.010698795318603516, -0.009812116622924805, -0.008925437927246094, -0.008038759231567383, -0.007152080535888672, -0.006265401840209961, -0.00537872314453125, -0.004492044448852539, -0.003605365753173828, -0.002718687057495117, -0.0018320083618164062, -0.0009453296661376953, -5.8650970458984375e-05, 0.0008280277252197266, 0.0017147064208984375, 0.0026013851165771484, 0.0034880638122558594, 0.00437474250793457, 0.005261421203613281, 0.006148099899291992, 0.007034778594970703, 0.007921457290649414, 0.008808135986328125, 0.009694814682006836, 0.010581493377685547, 0.011468172073364258, 0.012354850769042969, 0.01324152946472168, 0.01412820816040039, 0.015014886856079102, 0.015901565551757812, 0.016788244247436523, 0.017674922943115234, 0.018561601638793945, 0.019448280334472656, 0.020334959030151367, 0.021221637725830078, 0.02210831642150879, 0.0229949951171875]}, "gradients/decoder.transformer.h.0.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0, 0.0, 3.0, 2.0, 3.0, 2.0, 4.0, 2.0, 2.0, 6.0, 5.0, 6.0, 16.0, 12.0, 17.0, 17.0, 26.0, 25.0, 47.0, 61.0, 108.0, 202.0, 221.0, 102.0, 43.0, 33.0, 23.0, 13.0, 8.0, 2.0, 2.0, 0.0, 2.0], "bins": [-0.025583025068044662, -0.025102611631155014, -0.024622198194265366, -0.024141782894730568, -0.02366136945784092, -0.02318095602095127, -0.022700542584061623, -0.022220127284526825, -0.021739713847637177, -0.021259300410747528, -0.02077888697385788, -0.020298471674323082, -0.019818058237433434, -0.019337644800543785, -0.018857231363654137, -0.01837681606411934, -0.01789640262722969, -0.017415989190340042, -0.016935575753450394, -0.016455160453915596, -0.015974747017025948, -0.0154943335801363, -0.01501392014324665, -0.014533505775034428, -0.014053093269467354, -0.013572679832577705, -0.013092265464365482, -0.012611852027475834, -0.01213143765926361, -0.011651024222373962, -0.011170610785484314, -0.010690196417272091, -0.010209782049059868, -0.00972936861217022, -0.009248954243957996, -0.008768540807068348, -0.008288126438856125, -0.0078077130019664764, -0.007327299099415541, -0.006846885196864605, -0.0063664717599749565, -0.005886057857424021, -0.005405643954873085, -0.004925230517983437, -0.0044448161497712135, -0.003964402712881565, -0.0034839888103306293, -0.0030035749077796936, -0.002523161005228758, -0.002042747102677822, -0.0015623333165422082, -0.0010819195304065943, -0.0006015056278556585, -0.00012109172530472279, 0.0003593219444155693, 0.000839735846966505, 0.0013201497495174408, 0.0018005636520683765, 0.0022809775546193123, 0.0027613912243396044, 0.00324180512689054, 0.003722219029441476, 0.004202632699161768, 0.004683046601712704, 0.0051634605042636395]}, "gradients/decoder.transformer.h.0.ln_cross_attn.bias": {"_type": "histogram", "values": [3.0, 0.0, 1.0, 0.0, 1.0, 2.0, 2.0, 2.0, 2.0, 2.0, 3.0, 6.0, 14.0, 9.0, 9.0, 11.0, 13.0, 14.0, 13.0, 19.0, 21.0, 18.0, 26.0, 28.0, 28.0, 41.0, 41.0, 31.0, 42.0, 31.0, 40.0, 51.0, 38.0, 47.0, 44.0, 43.0, 40.0, 39.0, 37.0, 24.0, 28.0, 19.0, 35.0, 17.0, 12.0, 19.0, 13.0, 7.0, 6.0, 3.0, 4.0, 6.0, 3.0, 3.0, 3.0, 2.0, 2.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 2.0], "bins": [-0.005986213684082031, -0.005795221775770187, -0.0056042298674583435, -0.0054132379591465, -0.005222246050834656, -0.005031254142522812, -0.004840262234210968, -0.004649270325899124, -0.00445827841758728, -0.004267286509275436, -0.0040762946009635925, -0.0038853026926517487, -0.003694310784339905, -0.003503318876028061, -0.003312326967716217, -0.003121335059404373, -0.0029303431510925293, -0.0027393512427806854, -0.0025483593344688416, -0.0023573674261569977, -0.002166375517845154, -0.00197538360953331, -0.001784391701221466, -0.0015933997929096222, -0.0014024078845977783, -0.0012114159762859344, -0.0010204240679740906, -0.0008294321596622467, -0.0006384402513504028, -0.00044744834303855896, -0.0002564564347267151, -6.546452641487122e-05, 0.00012552738189697266, 0.00031651929020881653, 0.0005075111985206604, 0.0006985031068325043, 0.0008894950151443481, 0.001080486923456192, 0.0012714788317680359, 0.0014624707400798798, 0.0016534626483917236, 0.0018444545567035675, 0.0020354464650154114, 0.0022264383733272552, 0.002417430281639099, 0.002608422189950943, 0.002799414098262787, 0.0029904060065746307, 0.0031813979148864746, 0.0033723898231983185, 0.0035633817315101624, 0.0037543736398220062, 0.00394536554813385, 0.004136357456445694, 0.004327349364757538, 0.004518341273069382, 0.004709333181381226, 0.0049003250896930695, 0.005091316998004913, 0.005282308906316757, 0.005473300814628601, 0.005664292722940445, 0.005855284631252289, 0.006046276539564133, 0.0062372684478759766]}, "gradients/decoder.transformer.h.0.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 3.0, 0.0, 1.0, 3.0, 4.0, 5.0, 5.0, 6.0, 5.0, 8.0, 14.0, 16.0, 12.0, 16.0, 18.0, 23.0, 16.0, 27.0, 36.0, 32.0, 31.0, 46.0, 37.0, 44.0, 52.0, 38.0, 55.0, 37.0, 47.0, 43.0, 46.0, 45.0, 33.0, 30.0, 33.0, 24.0, 20.0, 20.0, 11.0, 23.0, 9.0, 15.0, 9.0, 4.0, 2.0, 3.0, 4.0, 2.0, 0.0, 1.0, 2.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-1396.0, -1347.921875, -1299.84375, -1251.765625, -1203.6875, -1155.609375, -1107.53125, -1059.453125, -1011.375, -963.296875, -915.21875, -867.140625, -819.0625, -770.984375, -722.90625, -674.828125, -626.75, -578.671875, -530.59375, -482.515625, -434.4375, -386.359375, -338.28125, -290.203125, -242.125, -194.046875, -145.96875, -97.890625, -49.8125, -1.734375, 46.34375, 94.421875, 142.5, 190.578125, 238.65625, 286.734375, 334.8125, 382.890625, 430.96875, 479.046875, 527.125, 575.203125, 623.28125, 671.359375, 719.4375, 767.515625, 815.59375, 863.671875, 911.75, 959.828125, 1007.90625, 1055.984375, 1104.0625, 1152.140625, 1200.21875, 1248.296875, 1296.375, 1344.453125, 1392.53125, 1440.609375, 1488.6875, 1536.765625, 1584.84375, 1632.921875, 1681.0]}, "gradients/decoder.transformer.h.0.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 4.0, 0.0, 4.0, 2.0, 8.0, 5.0, 8.0, 13.0, 20.0, 24.0, 32.0, 42.0, 45.0, 88.0, 123.0, 163.0, 274.0, 465.0, 828.0, 1656.0, 3468.0, 8176.0, 21839.0, 70155.0, 336815.0, 465742.0, 92891.0, 27262.0, 10084.0, 4064.0, 1865.0, 976.0, 506.0, 328.0, 185.0, 133.0, 84.0, 47.0, 43.0, 28.0, 22.0, 12.0, 10.0, 7.0, 8.0, 2.0, 4.0, 2.0, 3.0, 3.0, 1.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-222.25, -214.61328125, -206.9765625, -199.33984375, -191.703125, -184.06640625, -176.4296875, -168.79296875, -161.15625, -153.51953125, -145.8828125, -138.24609375, -130.609375, -122.97265625, -115.3359375, -107.69921875, -100.0625, -92.42578125, -84.7890625, -77.15234375, -69.515625, -61.87890625, -54.2421875, -46.60546875, -38.96875, -31.33203125, -23.6953125, -16.05859375, -8.421875, -0.78515625, 6.8515625, 14.48828125, 22.125, 29.76171875, 37.3984375, 45.03515625, 52.671875, 60.30859375, 67.9453125, 75.58203125, 83.21875, 90.85546875, 98.4921875, 106.12890625, 113.765625, 121.40234375, 129.0390625, 136.67578125, 144.3125, 151.94921875, 159.5859375, 167.22265625, 174.859375, 182.49609375, 190.1328125, 197.76953125, 205.40625, 213.04296875, 220.6796875, 228.31640625, 235.953125, 243.58984375, 251.2265625, 258.86328125, 266.5]}, "gradients/decoder.transformer.h.0.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 3.0, 1.0, 6.0, 6.0, 9.0, 6.0, 36.0, 20.0, 36.0, 48.0, 55.0, 49.0, 84.0, 81.0, 2096.0, 102.0, 72.0, 68.0, 68.0, 56.0, 45.0, 35.0, 18.0, 18.0, 18.0, 15.0, 5.0, 2.0, 1.0, 2.0, 3.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3062.0, -2968.40625, -2874.8125, -2781.21875, -2687.625, -2594.03125, -2500.4375, -2406.84375, -2313.25, -2219.65625, -2126.0625, -2032.46875, -1938.875, -1845.28125, -1751.6875, -1658.09375, -1564.5, -1470.90625, -1377.3125, -1283.71875, -1190.125, -1096.53125, -1002.9375, -909.34375, -815.75, -722.15625, -628.5625, -534.96875, -441.375, -347.78125, -254.1875, -160.59375, -67.0, 26.59375, 120.1875, 213.78125, 307.375, 400.96875, 494.5625, 588.15625, 681.75, 775.34375, 868.9375, 962.53125, 1056.125, 1149.71875, 1243.3125, 1336.90625, 1430.5, 1524.09375, 1617.6875, 1711.28125, 1804.875, 1898.46875, 1992.0625, 2085.65625, 2179.25, 2272.84375, 2366.4375, 2460.03125, 2553.625, 2647.21875, 2740.8125, 2834.40625, 2928.0]}, "gradients/decoder.transformer.h.0.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 1.0, 0.0, 1.0, 5.0, 1.0, 5.0, 7.0, 8.0, 13.0, 26.0, 33.0, 40.0, 63.0, 90.0, 139.0, 219.0, 337.0, 683.0, 1282.0, 3822.0, 19973.0, 241562.0, 2816872.0, 49588.0, 6943.0, 1990.0, 824.0, 482.0, 263.0, 137.0, 95.0, 74.0, 33.0, 23.0, 32.0, 14.0, 14.0, 2.0, 5.0, 7.0, 3.0, 5.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0], "bins": [-391.75, -379.953125, -368.15625, -356.359375, -344.5625, -332.765625, -320.96875, -309.171875, -297.375, -285.578125, -273.78125, -261.984375, -250.1875, -238.390625, -226.59375, -214.796875, -203.0, -191.203125, -179.40625, -167.609375, -155.8125, -144.015625, -132.21875, -120.421875, -108.625, -96.828125, -85.03125, -73.234375, -61.4375, -49.640625, -37.84375, -26.046875, -14.25, -2.453125, 9.34375, 21.140625, 32.9375, 44.734375, 56.53125, 68.328125, 80.125, 91.921875, 103.71875, 115.515625, 127.3125, 139.109375, 150.90625, 162.703125, 174.5, 186.296875, 198.09375, 209.890625, 221.6875, 233.484375, 245.28125, 257.078125, 268.875, 280.671875, 292.46875, 304.265625, 316.0625, 327.859375, 339.65625, 351.453125, 363.25]}, "gradients/decoder.transformer.h.0.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 1.0, 1.0, 3.0, 2.0, 9.0, 4.0, 20.0, 30.0, 50.0, 89.0, 201.0, 302.0, 183.0, 61.0, 26.0, 9.0, 9.0, 5.0, 8.0, 1.0, 2.0, 1.0], "bins": [-9103.0927734375, -8932.533203125, -8761.974609375, -8591.4150390625, -8420.85546875, -8250.2958984375, -8079.73681640625, -7909.177734375, -7738.6181640625, -7568.05859375, -7397.49951171875, -7226.9404296875, -7056.380859375, -6885.8212890625, -6715.26220703125, -6544.703125, -6374.1435546875, -6203.583984375, -6033.02490234375, -5862.4658203125, -5691.90625, -5521.3466796875, -5350.78759765625, -5180.228515625, -5009.6689453125, -4839.109375, -4668.55029296875, -4497.9912109375, -4327.431640625, -4156.8720703125, -3986.31298828125, -3815.753662109375, -3645.19482421875, -3474.635498046875, -3304.076171875, -3133.516845703125, -2962.95751953125, -2792.398193359375, -2621.8388671875, -2451.279541015625, -2280.72021484375, -2110.160888671875, -1939.6015625, -1769.042236328125, -1598.48291015625, -1427.923583984375, -1257.3642578125, -1086.804931640625, -916.2454833984375, -745.6861572265625, -575.1268310546875, -404.5675048828125, -234.0081787109375, -63.4488525390625, 107.1104736328125, 277.6697998046875, 448.2291259765625, 618.7884521484375, 789.3477783203125, 959.9071044921875, 1130.4664306640625, 1301.0257568359375, 1471.5850830078125, 1642.1444091796875, 1812.7037353515625]}, "gradients/decoder.transformer.h.0.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 1.0, 4.0, 0.0, 2.0, 5.0, 6.0, 12.0, 10.0, 10.0, 18.0, 17.0, 11.0, 24.0, 24.0, 27.0, 45.0, 47.0, 38.0, 33.0, 40.0, 45.0, 43.0, 50.0, 45.0, 38.0, 47.0, 57.0, 45.0, 43.0, 39.0, 32.0, 21.0, 23.0, 16.0, 17.0, 18.0, 15.0, 16.0, 12.0, 6.0, 4.0, 4.0, 2.0, 2.0, 3.0, 0.0, 0.0, 0.0, 2.0], "bins": [-3429.6953125, -3339.20166015625, -3248.707763671875, -3158.2138671875, -3067.72021484375, -2977.2265625, -2886.732666015625, -2796.23876953125, -2705.7451171875, -2615.25146484375, -2524.757568359375, -2434.263671875, -2343.77001953125, -2253.2763671875, -2162.782470703125, -2072.28857421875, -1981.794921875, -1891.3011474609375, -1800.807373046875, -1710.3135986328125, -1619.81982421875, -1529.3260498046875, -1438.832275390625, -1348.3385009765625, -1257.8447265625, -1167.3509521484375, -1076.857177734375, -986.3634033203125, -895.86962890625, -805.3758544921875, -714.882080078125, -624.3883056640625, -533.89453125, -443.4007568359375, -352.906982421875, -262.4132080078125, -171.91943359375, -81.4256591796875, 9.068115234375, 99.5618896484375, 190.0556640625, 280.5494384765625, 371.043212890625, 461.5369873046875, 552.03076171875, 642.5245361328125, 733.018310546875, 823.5120849609375, 914.005859375, 1004.4996337890625, 1094.993408203125, 1185.4871826171875, 1275.98095703125, 1366.4747314453125, 1456.968505859375, 1547.4622802734375, 1637.9560546875, 1728.4498291015625, 1818.943603515625, 1909.4373779296875, 1999.93115234375, 2090.4248046875, 2180.918701171875, 2271.41259765625, 2361.90625]}, "gradients/decoder.transformer.wpe.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 4.0, 3.0, 1.0, 4.0, 4.0, 2.0, 9.0, 13.0, 10.0, 20.0, 18.0, 14.0, 16.0, 32.0, 52.0, 43.0, 44.0, 107.0, 199.0, 339.0, 545.0, 950.0, 1044146.0, 707.0, 510.0, 271.0, 140.0, 95.0, 60.0, 39.0, 40.0, 25.0, 24.0, 19.0, 12.0, 10.0, 7.0, 9.0, 8.0, 5.0, 0.0, 2.0, 4.0, 4.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-1432.5977783203125, -1383.76611328125, -1334.9344482421875, -1286.102783203125, -1237.2711181640625, -1188.439453125, -1139.60791015625, -1090.776123046875, -1041.944580078125, -993.1129150390625, -944.28125, -895.4495849609375, -846.617919921875, -797.7862548828125, -748.9546508789062, -700.1229858398438, -651.291259765625, -602.4595947265625, -553.6279296875, -504.7962951660156, -455.9646301269531, -407.1329650878906, -358.30133056640625, -309.46966552734375, -260.63800048828125, -211.80633544921875, -162.9746856689453, -114.14303588867188, -65.31137084960938, -16.479705810546875, 32.3519287109375, 81.18359375, 130.015380859375, 178.8470458984375, 227.67869567871094, 276.5103454589844, 325.3420104980469, 374.1736755371094, 423.00531005859375, 471.83697509765625, 520.6686401367188, 569.5003051757812, 618.3319702148438, 667.16357421875, 715.9952392578125, 764.826904296875, 813.6585693359375, 862.490234375, 911.3218994140625, 960.153564453125, 1008.9852294921875, 1057.81689453125, 1106.6485595703125, 1155.480224609375, 1204.311767578125, 1253.1435546875, 1301.97509765625, 1350.8067626953125, 1399.638427734375, 1448.4700927734375, 1497.3017578125, 1546.1334228515625, 1594.965087890625, 1643.796630859375, 1692.62841796875]}, "gradients/decoder.transformer.wte.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 1.0, 3.0, 7.0, 13.0, 8.0, 21.0, 36.0, 47.0, 54.0, 98.0, 139.0, 377.0, 1467.0, 51457376.0, 2561.0, 468.0, 176.0, 94.0, 64.0, 40.0, 30.0, 22.0, 8.0, 15.0, 6.0, 10.0, 8.0, 1.0, 1.0, 4.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-3951.513916015625, -3849.203857421875, -3746.89404296875, -3644.583984375, -3542.27392578125, -3439.9638671875, -3337.65380859375, -3235.343994140625, -3133.033935546875, -3030.723876953125, -2928.4140625, -2826.10400390625, -2723.7939453125, -2621.48388671875, -2519.173828125, -2416.864013671875, -2314.553955078125, -2212.243896484375, -2109.93408203125, -2007.6240234375, -1905.31396484375, -1803.00390625, -1700.6939697265625, -1598.384033203125, -1496.073974609375, -1393.763916015625, -1291.4539794921875, -1189.14404296875, -1086.833984375, -984.5239868164062, -882.2139892578125, -779.9039916992188, -677.59375, -575.2837524414062, -472.9737548828125, -370.66375732421875, -268.353759765625, -166.04376220703125, -63.7337646484375, 38.57623291015625, 140.88623046875, 243.19622802734375, 345.5062255859375, 447.81622314453125, 550.126220703125, 652.4362182617188, 754.7462158203125, 857.0562133789062, 959.3662109375, 1061.67626953125, 1163.9862060546875, 1266.296142578125, 1368.606201171875, 1470.916259765625, 1573.2261962890625, 1675.5361328125, 1777.84619140625, 1880.15625, 1982.4661865234375, 2084.776123046875, 2187.086181640625, 2289.396240234375, 2391.7060546875, 2494.01611328125, 2596.326171875]}, "gradients/encoder.adapter.layers.2.conv.weight": {"_type": "histogram", "values": [3.0, 1.0, 4.0, 5.0, 5.0, 4.0, 11.0, 19.0, 35.0, 60.0, 62.0, 108.0, 170.0, 237.0, 349.0, 499.0, 763.0, 1140.0, 1665.0, 2487.0, 3814.0, 5758.0, 8176.0, 12453.0, 18950.0, 28759.0, 44160.0, 68936.0, 108437.0, 181774.0, 347086.0, 4070095.0, 669297.0, 286061.0, 158396.0, 94800.0, 60131.0, 39252.0, 25806.0, 17385.0, 11588.0, 7515.0, 5053.0, 3281.0, 2232.0, 1506.0, 1004.0, 729.0, 470.0, 305.0, 184.0, 148.0, 100.0, 71.0, 36.0, 24.0, 21.0, 19.0, 7.0, 4.0, 3.0, 1.0, 0.0, 2.0], "bins": [-12.890625, -12.486083984375, -12.08154296875, -11.677001953125, -11.2724609375, -10.867919921875, -10.46337890625, -10.058837890625, -9.654296875, -9.249755859375, -8.84521484375, -8.440673828125, -8.0361328125, -7.631591796875, -7.22705078125, -6.822509765625, -6.41796875, -6.013427734375, -5.60888671875, -5.204345703125, -4.7998046875, -4.395263671875, -3.99072265625, -3.586181640625, -3.181640625, -2.777099609375, -2.37255859375, -1.968017578125, -1.5634765625, -1.158935546875, -0.75439453125, -0.349853515625, 0.0546875, 0.459228515625, 0.86376953125, 1.268310546875, 1.6728515625, 2.077392578125, 2.48193359375, 2.886474609375, 3.291015625, 3.695556640625, 4.10009765625, 4.504638671875, 4.9091796875, 5.313720703125, 5.71826171875, 6.122802734375, 6.52734375, 6.931884765625, 7.33642578125, 7.740966796875, 8.1455078125, 8.550048828125, 8.95458984375, 9.359130859375, 9.763671875, 10.168212890625, 10.57275390625, 10.977294921875, 11.3818359375, 11.786376953125, 12.19091796875, 12.595458984375, 13.0]}, "gradients/encoder.adapter.layers.2.conv.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 6.0, 3.0, 2.0, 1.0, 6.0, 6.0, 9.0, 9.0, 5.0, 14.0, 17.0, 22.0, 20.0, 12.0, 20.0, 28.0, 23.0, 20.0, 25.0, 28.0, 35.0, 38.0, 43.0, 39.0, 37.0, 100.0, 1001.0, 43.0, 34.0, 44.0, 31.0, 35.0, 29.0, 25.0, 33.0, 26.0, 21.0, 26.0, 20.0, 16.0, 11.0, 10.0, 10.0, 11.0, 3.0, 8.0, 11.0, 5.0, 7.0, 5.0, 3.0, 0.0, 1.0, 1.0, 1.0, 2.0, 0.0, 2.0], "bins": [-312.5, -302.7734375, -293.046875, -283.3203125, -273.59375, -263.8671875, -254.140625, -244.4140625, -234.6875, -224.9609375, -215.234375, -205.5078125, -195.78125, -186.0546875, -176.328125, -166.6015625, -156.875, -147.1484375, -137.421875, -127.6953125, -117.96875, -108.2421875, -98.515625, -88.7890625, -79.0625, -69.3359375, -59.609375, -49.8828125, -40.15625, -30.4296875, -20.703125, -10.9765625, -1.25, 8.4765625, 18.203125, 27.9296875, 37.65625, 47.3828125, 57.109375, 66.8359375, 76.5625, 86.2890625, 96.015625, 105.7421875, 115.46875, 125.1953125, 134.921875, 144.6484375, 154.375, 164.1015625, 173.828125, 183.5546875, 193.28125, 203.0078125, 212.734375, 222.4609375, 232.1875, 241.9140625, 251.640625, 261.3671875, 271.09375, 280.8203125, 290.546875, 300.2734375, 310.0]}, "gradients/encoder.adapter.layers.1.conv.weight": {"_type": "histogram", "values": [3.0, 3.0, 0.0, 0.0, 3.0, 5.0, 12.0, 12.0, 13.0, 20.0, 33.0, 46.0, 54.0, 81.0, 109.0, 162.0, 226.0, 290.0, 454.0, 706.0, 1041.0, 1602.0, 2572.0, 4056.0, 6915.0, 12870.0, 25712.0, 60955.0, 189637.0, 4716862.0, 1000804.0, 159848.0, 54009.0, 23405.0, 11696.0, 6351.0, 3960.0, 2316.0, 1468.0, 993.0, 682.0, 440.0, 295.0, 219.0, 167.0, 106.0, 60.0, 62.0, 26.0, 26.0, 19.0, 11.0, 10.0, 6.0, 6.0, 6.0, 2.0, 4.0, 2.0, 0.0, 0.0, 0.0, 0.0, 3.0], "bins": [-26.265625, -25.38818359375, -24.5107421875, -23.63330078125, -22.755859375, -21.87841796875, -21.0009765625, -20.12353515625, -19.24609375, -18.36865234375, -17.4912109375, -16.61376953125, -15.736328125, -14.85888671875, -13.9814453125, -13.10400390625, -12.2265625, -11.34912109375, -10.4716796875, -9.59423828125, -8.716796875, -7.83935546875, -6.9619140625, -6.08447265625, -5.20703125, -4.32958984375, -3.4521484375, -2.57470703125, -1.697265625, -0.81982421875, 0.0576171875, 0.93505859375, 1.8125, 2.68994140625, 3.5673828125, 4.44482421875, 5.322265625, 6.19970703125, 7.0771484375, 7.95458984375, 8.83203125, 9.70947265625, 10.5869140625, 11.46435546875, 12.341796875, 13.21923828125, 14.0966796875, 14.97412109375, 15.8515625, 16.72900390625, 17.6064453125, 18.48388671875, 19.361328125, 20.23876953125, 21.1162109375, 21.99365234375, 22.87109375, 23.74853515625, 24.6259765625, 25.50341796875, 26.380859375, 27.25830078125, 28.1357421875, 29.01318359375, 29.890625]}, "gradients/encoder.adapter.layers.1.conv.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 3.0, 0.0, 2.0, 1.0, 4.0, 3.0, 10.0, 7.0, 10.0, 13.0, 11.0, 14.0, 20.0, 26.0, 25.0, 32.0, 32.0, 42.0, 34.0, 32.0, 51.0, 41.0, 41.0, 349.0, 766.0, 47.0, 39.0, 42.0, 51.0, 37.0, 34.0, 40.0, 28.0, 29.0, 19.0, 25.0, 11.0, 15.0, 11.0, 8.0, 8.0, 8.0, 7.0, 2.0, 2.0, 2.0, 4.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-376.5, -365.4296875, -354.359375, -343.2890625, -332.21875, -321.1484375, -310.078125, -299.0078125, -287.9375, -276.8671875, -265.796875, -254.7265625, -243.65625, -232.5859375, -221.515625, -210.4453125, -199.375, -188.3046875, -177.234375, -166.1640625, -155.09375, -144.0234375, -132.953125, -121.8828125, -110.8125, -99.7421875, -88.671875, -77.6015625, -66.53125, -55.4609375, -44.390625, -33.3203125, -22.25, -11.1796875, -0.109375, 10.9609375, 22.03125, 33.1015625, 44.171875, 55.2421875, 66.3125, 77.3828125, 88.453125, 99.5234375, 110.59375, 121.6640625, 132.734375, 143.8046875, 154.875, 165.9453125, 177.015625, 188.0859375, 199.15625, 210.2265625, 221.296875, 232.3671875, 243.4375, 254.5078125, 265.578125, 276.6484375, 287.71875, 298.7890625, 309.859375, 320.9296875, 332.0]}, "gradients/encoder.adapter.layers.0.conv.weight": {"_type": "histogram", "values": [3.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 3.0, 0.0, 3.0, 0.0, 0.0, 7.0, 2.0, 16.0, 16.0, 13.0, 27.0, 39.0, 28.0, 49.0, 107.0, 129.0, 210.0, 285.0, 524.0, 771.0, 1559.0, 3212.0, 8410.0, 33381.0, 6034746.0, 177264.0, 19344.0, 5804.0, 2511.0, 1147.0, 676.0, 421.0, 217.0, 144.0, 104.0, 78.0, 39.0, 35.0, 40.0, 24.0, 11.0, 11.0, 11.0, 3.0, 5.0, 3.0, 4.0, 3.0, 2.0, 0.0, 3.0, 3.0, 0.0, 1.0, 2.0, 3.0], "bins": [-220.5, -213.783203125, -207.06640625, -200.349609375, -193.6328125, -186.916015625, -180.19921875, -173.482421875, -166.765625, -160.048828125, -153.33203125, -146.615234375, -139.8984375, -133.181640625, -126.46484375, -119.748046875, -113.03125, -106.314453125, -99.59765625, -92.880859375, -86.1640625, -79.447265625, -72.73046875, -66.013671875, -59.296875, -52.580078125, -45.86328125, -39.146484375, -32.4296875, -25.712890625, -18.99609375, -12.279296875, -5.5625, 1.154296875, 7.87109375, 14.587890625, 21.3046875, 28.021484375, 34.73828125, 41.455078125, 48.171875, 54.888671875, 61.60546875, 68.322265625, 75.0390625, 81.755859375, 88.47265625, 95.189453125, 101.90625, 108.623046875, 115.33984375, 122.056640625, 128.7734375, 135.490234375, 142.20703125, 148.923828125, 155.640625, 162.357421875, 169.07421875, 175.791015625, 182.5078125, 189.224609375, 195.94140625, 202.658203125, 209.375]}, "gradients/encoder.adapter.layers.0.conv.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 3.0, 1.0, 4.0, 2.0, 1.0, 4.0, 6.0, 3.0, 6.0, 6.0, 7.0, 17.0, 22.0, 22.0, 27.0, 38.0, 53.0, 68.0, 97.0, 133.0, 877.0, 231.0, 98.0, 83.0, 53.0, 46.0, 36.0, 18.0, 19.0, 15.0, 12.0, 6.0, 6.0, 5.0, 5.0, 2.0, 1.0, 4.0, 0.0, 1.0, 0.0, 0.0, 2.0], "bins": [-93.8125, -91.5458984375, -89.279296875, -87.0126953125, -84.74609375, -82.4794921875, -80.212890625, -77.9462890625, -75.6796875, -73.4130859375, -71.146484375, -68.8798828125, -66.61328125, -64.3466796875, -62.080078125, -59.8134765625, -57.546875, -55.2802734375, -53.013671875, -50.7470703125, -48.48046875, -46.2138671875, -43.947265625, -41.6806640625, -39.4140625, -37.1474609375, -34.880859375, -32.6142578125, -30.34765625, -28.0810546875, -25.814453125, -23.5478515625, -21.28125, -19.0146484375, -16.748046875, -14.4814453125, -12.21484375, -9.9482421875, -7.681640625, -5.4150390625, -3.1484375, -0.8818359375, 1.384765625, 3.6513671875, 5.91796875, 8.1845703125, 10.451171875, 12.7177734375, 14.984375, 17.2509765625, 19.517578125, 21.7841796875, 24.05078125, 26.3173828125, 28.583984375, 30.8505859375, 33.1171875, 35.3837890625, 37.650390625, 39.9169921875, 42.18359375, 44.4501953125, 46.716796875, 48.9833984375, 51.25]}, "gradients/encoder.encoder.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 0.0, 4.0, 1.0, 7.0, 2.0, 9.0, 8.0, 15.0, 15.0, 28.0, 55.0, 104.0, 360.0, 239.0, 64.0, 35.0, 19.0, 13.0, 8.0, 8.0, 6.0, 3.0, 5.0, 0.0, 0.0, 2.0, 1.0, 3.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-679.916748046875, -663.2799072265625, -646.6430053710938, -630.0061645507812, -613.3692626953125, -596.732421875, -580.0955810546875, -563.4586791992188, -546.8218383789062, -530.1849975585938, -513.548095703125, -496.9112548828125, -480.2743835449219, -463.63751220703125, -447.0006408691406, -430.36376953125, -413.7268981933594, -397.09002685546875, -380.4531555175781, -363.8162841796875, -347.179443359375, -330.5425720214844, -313.90570068359375, -297.2688293457031, -280.6319580078125, -263.9950866699219, -247.3582305908203, -230.7213592529297, -214.08450317382812, -197.4476318359375, -180.81076049804688, -164.17388916015625, -147.53701782226562, -130.900146484375, -114.26329040527344, -97.62641906738281, -80.98955535888672, -64.35269165039062, -47.7158203125, -31.078956604003906, -14.442092895507812, 2.194772720336914, 18.83163833618164, 35.468505859375, 52.105369567871094, 68.74223327636719, 85.37910461425781, 102.0159683227539, 118.65283203125, 135.28970336914062, 151.9265594482422, 168.5634307861328, 185.20028686523438, 201.837158203125, 218.47402954101562, 235.11090087890625, 251.7477569580078, 268.3846130371094, 285.021484375, 301.6583557128906, 318.29522705078125, 334.93206787109375, 351.5689697265625, 368.205810546875, 384.8426818847656]}, "gradients/encoder.encoder.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 2.0, 3.0, 3.0, 3.0, 5.0, 4.0, 4.0, 7.0, 6.0, 10.0, 4.0, 20.0, 16.0, 46.0, 98.0, 214.0, 238.0, 153.0, 51.0, 34.0, 21.0, 10.0, 16.0, 11.0, 9.0, 2.0, 2.0, 4.0, 7.0, 4.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-748.8748168945312, -727.6260375976562, -706.3772583007812, -685.1284790039062, -663.8796997070312, -642.6309814453125, -621.3822021484375, -600.1334228515625, -578.8846435546875, -557.6358642578125, -536.3870849609375, -515.1383056640625, -493.8895568847656, -472.6407775878906, -451.3919982910156, -430.14324951171875, -408.8944396972656, -387.6456604003906, -366.3968811035156, -345.14813232421875, -323.89935302734375, -302.65057373046875, -281.40179443359375, -260.15301513671875, -238.9042510986328, -217.6554718017578, -196.40670776367188, -175.15792846679688, -153.90914916992188, -132.66038513183594, -111.41160583496094, -90.162841796875, -68.9140625, -47.66529083251953, -26.416515350341797, -5.1677398681640625, 16.081031799316406, 37.329803466796875, 58.578582763671875, 79.82734680175781, 101.07612609863281, 122.32489776611328, 143.57366943359375, 164.82244873046875, 186.07122802734375, 207.3199920654297, 228.5687713623047, 249.81753540039062, 271.0663146972656, 292.3150939941406, 313.5638732910156, 334.8126220703125, 356.0614013671875, 377.3101806640625, 398.5589599609375, 419.8077392578125, 441.0565185546875, 462.3052978515625, 483.5540771484375, 504.8028564453125, 526.0516357421875, 547.3004150390625, 568.5491943359375, 589.7979125976562, 611.0466918945312]}, "gradients/encoder.encoder.layers.23.feed_forward.output_dense.weight": {"_type": "histogram", "values": [2.0, 3.0, 3.0, 1.0, 2.0, 4.0, 1.0, 4.0, 7.0, 8.0, 13.0, 18.0, 8.0, 18.0, 19.0, 42.0, 53.0, 65.0, 83.0, 126.0, 170.0, 238.0, 369.0, 553.0, 850.0, 1329.0, 2140.0, 3940.0, 8095.0, 24864.0, 4026051.0, 98095.0, 13004.0, 5812.0, 3111.0, 1841.0, 1069.0, 717.0, 458.0, 314.0, 228.0, 143.0, 105.0, 88.0, 48.0, 41.0, 33.0, 34.0, 21.0, 11.0, 9.0, 6.0, 9.0, 5.0, 5.0, 4.0, 2.0, 2.0, 1.0, 1.0, 2.0, 2.0, 2.0, 2.0], "bins": [-0.1392822265625, -0.13475608825683594, -0.13022994995117188, -0.1257038116455078, -0.12117767333984375, -0.11665153503417969, -0.11212539672851562, -0.10759925842285156, -0.1030731201171875, -0.09854698181152344, -0.09402084350585938, -0.08949470520019531, -0.08496856689453125, -0.08044242858886719, -0.07591629028320312, -0.07139015197753906, -0.066864013671875, -0.06233787536621094, -0.057811737060546875, -0.05328559875488281, -0.04875946044921875, -0.04423332214355469, -0.039707183837890625, -0.03518104553222656, -0.0306549072265625, -0.026128768920898438, -0.021602630615234375, -0.017076492309570312, -0.01255035400390625, -0.008024215698242188, -0.003498077392578125, 0.0010280609130859375, 0.00555419921875, 0.010080337524414062, 0.014606475830078125, 0.019132614135742188, 0.02365875244140625, 0.028184890747070312, 0.032711029052734375, 0.03723716735839844, 0.0417633056640625, 0.04628944396972656, 0.050815582275390625, 0.05534172058105469, 0.05986785888671875, 0.06439399719238281, 0.06892013549804688, 0.07344627380371094, 0.077972412109375, 0.08249855041503906, 0.08702468872070312, 0.09155082702636719, 0.09607696533203125, 0.10060310363769531, 0.10512924194335938, 0.10965538024902344, 0.1141815185546875, 0.11870765686035156, 0.12323379516601562, 0.1277599334716797, 0.13228607177734375, 0.1368122100830078, 0.14133834838867188, 0.14586448669433594, 0.150390625]}, "gradients/encoder.encoder.layers.23.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 2.0, 2.0, 1.0, 0.0, 1.0, 2.0, 2.0, 2.0, 3.0, 4.0, 2.0, 1.0, 7.0, 3.0, 8.0, 10.0, 9.0, 12.0, 16.0, 59.0, 157.0, 257.0, 245.0, 101.0, 33.0, 15.0, 10.0, 5.0, 4.0, 3.0, 5.0, 8.0, 5.0, 3.0, 5.0, 1.0, 0.0, 3.0, 2.0, 3.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.0083770751953125, -0.008104920387268066, -0.007832765579223633, -0.007560610771179199, -0.007288455963134766, -0.007016301155090332, -0.0067441463470458984, -0.006471991539001465, -0.006199836730957031, -0.005927681922912598, -0.005655527114868164, -0.0053833723068237305, -0.005111217498779297, -0.004839062690734863, -0.00456690788269043, -0.004294753074645996, -0.0040225982666015625, -0.003750443458557129, -0.0034782886505126953, -0.0032061338424682617, -0.002933979034423828, -0.0026618242263793945, -0.002389669418334961, -0.0021175146102905273, -0.0018453598022460938, -0.0015732049942016602, -0.0013010501861572266, -0.001028895378112793, -0.0007567405700683594, -0.0004845857620239258, -0.0002124309539794922, 5.9723854064941406e-05, 0.000331878662109375, 0.0006040334701538086, 0.0008761882781982422, 0.0011483430862426758, 0.0014204978942871094, 0.001692652702331543, 0.0019648075103759766, 0.00223696231842041, 0.0025091171264648438, 0.0027812719345092773, 0.003053426742553711, 0.0033255815505981445, 0.003597736358642578, 0.0038698911666870117, 0.004142045974731445, 0.004414200782775879, 0.0046863555908203125, 0.004958510398864746, 0.00523066520690918, 0.005502820014953613, 0.005774974822998047, 0.0060471296310424805, 0.006319284439086914, 0.006591439247131348, 0.006863594055175781, 0.007135748863220215, 0.0074079036712646484, 0.007680058479309082, 0.007952213287353516, 0.00822436809539795, 0.008496522903442383, 0.008768677711486816, 0.00904083251953125]}, "gradients/encoder.encoder.layers.23.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 0.0, 0.0, 0.0, 2.0, 3.0, 1.0, 11.0, 4.0, 7.0, 16.0, 18.0, 27.0, 41.0, 78.0, 79.0, 108.0, 224.0, 288.0, 527.0, 961.0, 2241.0, 7147.0, 44810.0, 3932220.0, 179173.0, 18520.0, 4260.0, 1575.0, 836.0, 391.0, 229.0, 163.0, 107.0, 74.0, 44.0, 31.0, 21.0, 16.0, 11.0, 13.0, 9.0, 5.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.20361328125, -0.19739723205566406, -0.19118118286132812, -0.1849651336669922, -0.17874908447265625, -0.1725330352783203, -0.16631698608398438, -0.16010093688964844, -0.1538848876953125, -0.14766883850097656, -0.14145278930664062, -0.1352367401123047, -0.12902069091796875, -0.12280464172363281, -0.11658859252929688, -0.11037254333496094, -0.104156494140625, -0.09794044494628906, -0.09172439575195312, -0.08550834655761719, -0.07929229736328125, -0.07307624816894531, -0.06686019897460938, -0.06064414978027344, -0.0544281005859375, -0.04821205139160156, -0.041996002197265625, -0.03577995300292969, -0.02956390380859375, -0.023347854614257812, -0.017131805419921875, -0.010915756225585938, -0.00469970703125, 0.0015163421630859375, 0.007732391357421875, 0.013948440551757812, 0.02016448974609375, 0.026380538940429688, 0.032596588134765625, 0.03881263732910156, 0.0450286865234375, 0.05124473571777344, 0.057460784912109375, 0.06367683410644531, 0.06989288330078125, 0.07610893249511719, 0.08232498168945312, 0.08854103088378906, 0.094757080078125, 0.10097312927246094, 0.10718917846679688, 0.11340522766113281, 0.11962127685546875, 0.1258373260498047, 0.13205337524414062, 0.13826942443847656, 0.1444854736328125, 0.15070152282714844, 0.15691757202148438, 0.1631336212158203, 0.16934967041015625, 0.1755657196044922, 0.18178176879882812, 0.18799781799316406, 0.1942138671875]}, "gradients/encoder.encoder.layers.23.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 3.0, 2.0, 1.0, 3.0, 1.0, 11.0, 8.0, 5.0, 8.0, 11.0, 22.0, 20.0, 29.0, 29.0, 33.0, 50.0, 73.0, 374.0, 2797.0, 181.0, 67.0, 39.0, 47.0, 44.0, 33.0, 37.0, 36.0, 25.0, 22.0, 23.0, 8.0, 10.0, 12.0, 5.0, 10.0, 5.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0], "bins": [-0.0246734619140625, -0.02397298812866211, -0.02327251434326172, -0.022572040557861328, -0.021871566772460938, -0.021171092987060547, -0.020470619201660156, -0.019770145416259766, -0.019069671630859375, -0.018369197845458984, -0.017668724060058594, -0.016968250274658203, -0.016267776489257812, -0.015567302703857422, -0.014866828918457031, -0.01416635513305664, -0.01346588134765625, -0.01276540756225586, -0.012064933776855469, -0.011364459991455078, -0.010663986206054688, -0.009963512420654297, -0.009263038635253906, -0.008562564849853516, -0.007862091064453125, -0.007161617279052734, -0.006461143493652344, -0.005760669708251953, -0.0050601959228515625, -0.004359722137451172, -0.0036592483520507812, -0.0029587745666503906, -0.00225830078125, -0.0015578269958496094, -0.0008573532104492188, -0.00015687942504882812, 0.0005435943603515625, 0.0012440681457519531, 0.0019445419311523438, 0.0026450157165527344, 0.003345489501953125, 0.004045963287353516, 0.004746437072753906, 0.005446910858154297, 0.0061473846435546875, 0.006847858428955078, 0.007548332214355469, 0.00824880599975586, 0.00894927978515625, 0.00964975357055664, 0.010350227355957031, 0.011050701141357422, 0.011751174926757812, 0.012451648712158203, 0.013152122497558594, 0.013852596282958984, 0.014553070068359375, 0.015253543853759766, 0.015954017639160156, 0.016654491424560547, 0.017354965209960938, 0.018055438995361328, 0.01875591278076172, 0.01945638656616211, 0.0201568603515625]}, "gradients/encoder.encoder.layers.23.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 2.0, 5.0, 4.0, 10.0, 35.0, 431.0, 431.0, 77.0, 13.0, 7.0, 3.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.20509663224220276, -0.17570139467716217, -0.14630615711212158, -0.11691092699766159, -0.087515689432621, -0.05812045931816101, -0.028725221753120422, 0.000670015811920166, 0.030065253376960754, 0.05946049094200134, 0.08885572850704193, 0.11825095862150192, 0.14764618873596191, 0.1770414263010025, 0.2064366638660431, 0.23583190143108368, 0.26522713899612427, 0.29462236166000366, 0.32401761412620544, 0.35341283679008484, 0.3828080892562866, 0.412203311920166, 0.4415985345840454, 0.4709937870502472, 0.500389039516449, 0.5297842621803284, 0.5591794848442078, 0.5885747671127319, 0.6179699897766113, 0.6473652124404907, 0.6767604351043701, 0.7061556577682495, 0.7355509400367737, 0.7649461627006531, 0.7943413853645325, 0.8237366676330566, 0.853131890296936, 0.8825271129608154, 0.9119223356246948, 0.9413175582885742, 0.9707128405570984, 1.0001081228256226, 1.029503345489502, 1.0588985681533813, 1.0882937908172607, 1.1176890134811401, 1.1470842361450195, 1.176479458808899, 1.2058746814727783, 1.2352699041366577, 1.264665126800537, 1.2940603494644165, 1.323455572128296, 1.3528509140014648, 1.3822461366653442, 1.4116413593292236, 1.441036581993103, 1.4704318046569824, 1.4998270273208618, 1.5292222499847412, 1.5586174726486206, 1.5880126953125, 1.617408037185669, 1.6468032598495483, 1.6761984825134277]}, "gradients/encoder.encoder.layers.23.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 2.0, 5.0, 1.0, 4.0, 3.0, 8.0, 8.0, 10.0, 19.0, 32.0, 27.0, 42.0, 47.0, 59.0, 51.0, 63.0, 68.0, 94.0, 67.0, 62.0, 61.0, 63.0, 56.0, 37.0, 32.0, 24.0, 17.0, 12.0, 13.0, 7.0, 7.0, 7.0, 4.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 0.0, 0.0, 1.0], "bins": [-0.16973507404327393, -0.1655745804309845, -0.16141408681869507, -0.15725359320640564, -0.1530930995941162, -0.14893262088298798, -0.14477212727069855, -0.14061163365840912, -0.1364511400461197, -0.13229064643383026, -0.12813015282154083, -0.123969666659832, -0.11980917304754257, -0.11564867943525314, -0.11148819327354431, -0.10732769966125488, -0.10316720604896545, -0.09900671243667603, -0.0948462188243866, -0.09068573266267776, -0.08652523905038834, -0.08236474543809891, -0.07820425927639008, -0.07404376566410065, -0.06988327205181122, -0.06572277843952179, -0.06156228855252266, -0.05740179866552353, -0.0532413050532341, -0.04908081144094467, -0.04492032155394554, -0.04075983166694641, -0.03659933805465698, -0.032438844442367554, -0.028278354555368423, -0.024117862805724144, -0.019957371056079865, -0.015796879306435585, -0.011636387556791306, -0.007475895807147026, -0.0033154040575027466, 0.0008450876921415329, 0.005005579441785812, 0.009166071191430092, 0.013326562941074371, 0.01748705469071865, 0.02164754644036293, 0.02580803819000721, 0.02996852993965149, 0.03412902355194092, 0.03828951343894005, 0.04245000332593918, 0.04661049693822861, 0.050770990550518036, 0.054931480437517166, 0.059091970324516296, 0.06325246393680573, 0.06741295754909515, 0.07157345116138458, 0.07573393732309341, 0.07989443093538284, 0.08405492454767227, 0.0882154107093811, 0.09237590432167053, 0.09653639793395996]}, "gradients/encoder.encoder.layers.23.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 3.0, 2.0, 4.0, 7.0, 7.0, 7.0, 10.0, 10.0, 19.0, 15.0, 25.0, 35.0, 44.0, 51.0, 75.0, 106.0, 127.0, 161.0, 227.0, 318.0, 436.0, 599.0, 871.0, 1180.0, 1673.0, 2580.0, 3955.0, 6539.0, 12930.0, 41748.0, 779580.0, 149733.0, 20865.0, 8953.0, 5196.0, 3218.0, 2112.0, 1513.0, 967.0, 688.0, 516.0, 377.0, 268.0, 195.0, 152.0, 106.0, 77.0, 79.0, 54.0, 35.0, 24.0, 28.0, 15.0, 15.0, 11.0, 10.0, 7.0, 5.0, 5.0, 3.0, 1.0, 3.0], "bins": [-0.10260009765625, -0.09946823120117188, -0.09633636474609375, -0.09320449829101562, -0.0900726318359375, -0.08694076538085938, -0.08380889892578125, -0.08067703247070312, -0.077545166015625, -0.07441329956054688, -0.07128143310546875, -0.06814956665039062, -0.0650177001953125, -0.061885833740234375, -0.05875396728515625, -0.055622100830078125, -0.052490234375, -0.049358367919921875, -0.04622650146484375, -0.043094635009765625, -0.0399627685546875, -0.036830902099609375, -0.03369903564453125, -0.030567169189453125, -0.027435302734375, -0.024303436279296875, -0.02117156982421875, -0.018039703369140625, -0.0149078369140625, -0.011775970458984375, -0.00864410400390625, -0.005512237548828125, -0.00238037109375, 0.000751495361328125, 0.00388336181640625, 0.007015228271484375, 0.0101470947265625, 0.013278961181640625, 0.01641082763671875, 0.019542694091796875, 0.022674560546875, 0.025806427001953125, 0.02893829345703125, 0.032070159912109375, 0.0352020263671875, 0.038333892822265625, 0.04146575927734375, 0.044597625732421875, 0.0477294921875, 0.050861358642578125, 0.05399322509765625, 0.057125091552734375, 0.0602569580078125, 0.06338882446289062, 0.06652069091796875, 0.06965255737304688, 0.072784423828125, 0.07591629028320312, 0.07904815673828125, 0.08218002319335938, 0.0853118896484375, 0.08844375610351562, 0.09157562255859375, 0.09470748901367188, 0.09783935546875]}, "gradients/encoder.encoder.layers.23.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 2.0, 2.0, 1.0, 0.0, 1.0, 2.0, 2.0, 2.0, 4.0, 3.0, 2.0, 1.0, 7.0, 3.0, 8.0, 10.0, 11.0, 10.0, 16.0, 64.0, 171.0, 263.0, 230.0, 100.0, 24.0, 16.0, 9.0, 5.0, 4.0, 5.0, 3.0, 9.0, 4.0, 3.0, 5.0, 1.0, 0.0, 3.0, 3.0, 2.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.00836181640625, -0.008089661598205566, -0.007817506790161133, -0.007545351982116699, -0.007273197174072266, -0.007001042366027832, -0.0067288875579833984, -0.006456732749938965, -0.006184577941894531, -0.005912423133850098, -0.005640268325805664, -0.0053681135177612305, -0.005095958709716797, -0.004823803901672363, -0.00455164909362793, -0.004279494285583496, -0.0040073394775390625, -0.003735184669494629, -0.0034630298614501953, -0.0031908750534057617, -0.002918720245361328, -0.0026465654373168945, -0.002374410629272461, -0.0021022558212280273, -0.0018301010131835938, -0.0015579462051391602, -0.0012857913970947266, -0.001013636589050293, -0.0007414817810058594, -0.0004693269729614258, -0.0001971721649169922, 7.49826431274414e-05, 0.000347137451171875, 0.0006192922592163086, 0.0008914470672607422, 0.0011636018753051758, 0.0014357566833496094, 0.001707911491394043, 0.0019800662994384766, 0.00225222110748291, 0.0025243759155273438, 0.0027965307235717773, 0.003068685531616211, 0.0033408403396606445, 0.003612995147705078, 0.0038851499557495117, 0.004157304763793945, 0.004429459571838379, 0.0047016143798828125, 0.004973769187927246, 0.00524592399597168, 0.005518078804016113, 0.005790233612060547, 0.0060623884201049805, 0.006334543228149414, 0.006606698036193848, 0.006878852844238281, 0.007151007652282715, 0.0074231624603271484, 0.007695317268371582, 0.007967472076416016, 0.00823962688446045, 0.008511781692504883, 0.008783936500549316, 0.00905609130859375]}, "gradients/encoder.encoder.layers.23.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 4.0, 4.0, 0.0, 6.0, 5.0, 12.0, 16.0, 12.0, 25.0, 57.0, 46.0, 89.0, 100.0, 159.0, 262.0, 348.0, 534.0, 936.0, 1518.0, 3229.0, 7652.0, 32971.0, 598535.0, 360095.0, 27901.0, 7175.0, 2882.0, 1521.0, 815.0, 562.0, 354.0, 220.0, 139.0, 116.0, 88.0, 55.0, 33.0, 29.0, 16.0, 13.0, 9.0, 5.0, 8.0, 5.0, 3.0, 2.0, 1.0, 3.0, 0.0, 0.0, 1.0], "bins": [-0.17919921875, -0.17421531677246094, -0.16923141479492188, -0.1642475128173828, -0.15926361083984375, -0.1542797088623047, -0.14929580688476562, -0.14431190490722656, -0.1393280029296875, -0.13434410095214844, -0.12936019897460938, -0.12437629699707031, -0.11939239501953125, -0.11440849304199219, -0.10942459106445312, -0.10444068908691406, -0.099456787109375, -0.09447288513183594, -0.08948898315429688, -0.08450508117675781, -0.07952117919921875, -0.07453727722167969, -0.06955337524414062, -0.06456947326660156, -0.0595855712890625, -0.05460166931152344, -0.049617767333984375, -0.04463386535644531, -0.03964996337890625, -0.03466606140136719, -0.029682159423828125, -0.024698257446289062, -0.01971435546875, -0.014730453491210938, -0.009746551513671875, -0.0047626495361328125, 0.00022125244140625, 0.0052051544189453125, 0.010189056396484375, 0.015172958374023438, 0.0201568603515625, 0.025140762329101562, 0.030124664306640625, 0.03510856628417969, 0.04009246826171875, 0.04507637023925781, 0.050060272216796875, 0.05504417419433594, 0.060028076171875, 0.06501197814941406, 0.06999588012695312, 0.07497978210449219, 0.07996368408203125, 0.08494758605957031, 0.08993148803710938, 0.09491539001464844, 0.0998992919921875, 0.10488319396972656, 0.10986709594726562, 0.11485099792480469, 0.11983489990234375, 0.12481880187988281, 0.12980270385742188, 0.13478660583496094, 0.1397705078125]}, "gradients/encoder.encoder.layers.23.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 3.0, 3.0, 0.0, 5.0, 4.0, 10.0, 11.0, 6.0, 9.0, 20.0, 9.0, 24.0, 27.0, 23.0, 28.0, 30.0, 23.0, 42.0, 30.0, 41.0, 46.0, 56.0, 52.0, 54.0, 42.0, 42.0, 36.0, 41.0, 38.0, 45.0, 30.0, 29.0, 26.0, 26.0, 19.0, 19.0, 15.0, 17.0, 3.0, 6.0, 7.0, 3.0, 8.0, 4.0, 3.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.0421142578125, -0.040943145751953125, -0.03977203369140625, -0.038600921630859375, -0.0374298095703125, -0.036258697509765625, -0.03508758544921875, -0.033916473388671875, -0.032745361328125, -0.031574249267578125, -0.03040313720703125, -0.029232025146484375, -0.0280609130859375, -0.026889801025390625, -0.02571868896484375, -0.024547576904296875, -0.02337646484375, -0.022205352783203125, -0.02103424072265625, -0.019863128662109375, -0.0186920166015625, -0.017520904541015625, -0.01634979248046875, -0.015178680419921875, -0.014007568359375, -0.012836456298828125, -0.01166534423828125, -0.010494232177734375, -0.0093231201171875, -0.008152008056640625, -0.00698089599609375, -0.005809783935546875, -0.004638671875, -0.003467559814453125, -0.00229644775390625, -0.001125335693359375, 4.57763671875e-05, 0.001216888427734375, 0.00238800048828125, 0.003559112548828125, 0.004730224609375, 0.005901336669921875, 0.00707244873046875, 0.008243560791015625, 0.0094146728515625, 0.010585784912109375, 0.01175689697265625, 0.012928009033203125, 0.01409912109375, 0.015270233154296875, 0.01644134521484375, 0.017612457275390625, 0.0187835693359375, 0.019954681396484375, 0.02112579345703125, 0.022296905517578125, 0.023468017578125, 0.024639129638671875, 0.02581024169921875, 0.026981353759765625, 0.0281524658203125, 0.029323577880859375, 0.03049468994140625, 0.031665802001953125, 0.0328369140625]}, "gradients/encoder.encoder.layers.23.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 2.0, 2.0, 4.0, 1.0, 1.0, 10.0, 2.0, 3.0, 9.0, 17.0, 15.0, 21.0, 27.0, 26.0, 59.0, 71.0, 94.0, 171.0, 268.0, 507.0, 1067.0, 2957.0, 13419.0, 657751.0, 354995.0, 11931.0, 2885.0, 966.0, 498.0, 261.0, 159.0, 111.0, 68.0, 52.0, 29.0, 23.0, 26.0, 15.0, 7.0, 8.0, 4.0, 5.0, 4.0, 4.0, 3.0, 3.0, 2.0, 2.0, 1.0, 1.0, 2.0], "bins": [-3.975629806518555e-05, -3.868062049150467e-05, -3.760494291782379e-05, -3.6529265344142914e-05, -3.5453587770462036e-05, -3.437791019678116e-05, -3.330223262310028e-05, -3.22265550494194e-05, -3.1150877475738525e-05, -3.0075199902057648e-05, -2.899952232837677e-05, -2.7923844754695892e-05, -2.6848167181015015e-05, -2.5772489607334137e-05, -2.469681203365326e-05, -2.362113445997238e-05, -2.2545456886291504e-05, -2.1469779312610626e-05, -2.039410173892975e-05, -1.931842416524887e-05, -1.8242746591567993e-05, -1.7167069017887115e-05, -1.6091391444206238e-05, -1.501571387052536e-05, -1.3940036296844482e-05, -1.2864358723163605e-05, -1.1788681149482727e-05, -1.071300357580185e-05, -9.637326002120972e-06, -8.561648428440094e-06, -7.485970854759216e-06, -6.410293281078339e-06, -5.334615707397461e-06, -4.258938133716583e-06, -3.1832605600357056e-06, -2.107582986354828e-06, -1.0319054126739502e-06, 4.377216100692749e-08, 1.1194497346878052e-06, 2.195127308368683e-06, 3.2708048820495605e-06, 4.346482455730438e-06, 5.422160029411316e-06, 6.497837603092194e-06, 7.573515176773071e-06, 8.649192750453949e-06, 9.724870324134827e-06, 1.0800547897815704e-05, 1.1876225471496582e-05, 1.295190304517746e-05, 1.4027580618858337e-05, 1.5103258192539215e-05, 1.6178935766220093e-05, 1.725461333990097e-05, 1.8330290913581848e-05, 1.9405968487262726e-05, 2.0481646060943604e-05, 2.155732363462448e-05, 2.263300120830536e-05, 2.3708678781986237e-05, 2.4784356355667114e-05, 2.5860033929347992e-05, 2.693571150302887e-05, 2.8011389076709747e-05, 2.9087066650390625e-05]}, "gradients/encoder.encoder.layers.23.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 4.0, 1.0, 0.0, 1.0, 0.0, 4.0, 6.0, 1.0, 2.0, 4.0, 9.0, 8.0, 9.0, 15.0, 14.0, 16.0, 54.0, 49.0, 74.0, 105.0, 160.0, 129.0, 104.0, 51.0, 43.0, 43.0, 25.0, 24.0, 11.0, 14.0, 10.0, 6.0, 2.0, 2.0, 5.0, 4.0, 4.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 2.0], "bins": [-9.417533874511719e-06, -9.181909263134003e-06, -8.946284651756287e-06, -8.71066004037857e-06, -8.475035429000854e-06, -8.239410817623138e-06, -8.003786206245422e-06, -7.768161594867706e-06, -7.53253698348999e-06, -7.296912372112274e-06, -7.061287760734558e-06, -6.825663149356842e-06, -6.590038537979126e-06, -6.35441392660141e-06, -6.118789315223694e-06, -5.883164703845978e-06, -5.647540092468262e-06, -5.411915481090546e-06, -5.17629086971283e-06, -4.9406662583351135e-06, -4.7050416469573975e-06, -4.469417035579681e-06, -4.233792424201965e-06, -3.998167812824249e-06, -3.762543201446533e-06, -3.526918590068817e-06, -3.291293978691101e-06, -3.055669367313385e-06, -2.820044755935669e-06, -2.584420144557953e-06, -2.348795533180237e-06, -2.1131709218025208e-06, -1.8775463104248047e-06, -1.6419216990470886e-06, -1.4062970876693726e-06, -1.1706724762916565e-06, -9.350478649139404e-07, -6.994232535362244e-07, -4.637986421585083e-07, -2.2817403078079224e-07, 7.450580596923828e-09, 2.430751919746399e-07, 4.78699803352356e-07, 7.14324414730072e-07, 9.499490261077881e-07, 1.1855736374855042e-06, 1.4211982488632202e-06, 1.6568228602409363e-06, 1.8924474716186523e-06, 2.1280720829963684e-06, 2.3636966943740845e-06, 2.5993213057518005e-06, 2.8349459171295166e-06, 3.0705705285072327e-06, 3.3061951398849487e-06, 3.541819751262665e-06, 3.777444362640381e-06, 4.013068974018097e-06, 4.248693585395813e-06, 4.484318196773529e-06, 4.719942808151245e-06, 4.955567419528961e-06, 5.191192030906677e-06, 5.426816642284393e-06, 5.662441253662109e-06]}, "gradients/encoder.encoder.layers.23.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 3.0, 2.0, 6.0, 9.0, 2.0, 6.0, 9.0, 11.0, 14.0, 34.0, 33.0, 62.0, 67.0, 118.0, 214.0, 402.0, 837.0, 2571.0, 11907.0, 973414.0, 51541.0, 4674.0, 1338.0, 565.0, 255.0, 157.0, 104.0, 66.0, 40.0, 23.0, 18.0, 22.0, 3.0, 6.0, 5.0, 7.0, 6.0, 6.0, 3.0, 3.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.16312026977539e-05, -5.9689395129680634e-05, -5.774758756160736e-05, -5.580577999353409e-05, -5.3863972425460815e-05, -5.192216485738754e-05, -4.998035728931427e-05, -4.8038549721241e-05, -4.6096742153167725e-05, -4.415493458509445e-05, -4.221312701702118e-05, -4.0271319448947906e-05, -3.8329511880874634e-05, -3.638770431280136e-05, -3.444589674472809e-05, -3.2504089176654816e-05, -3.056228160858154e-05, -2.862047404050827e-05, -2.6678666472434998e-05, -2.4736858904361725e-05, -2.2795051336288452e-05, -2.085324376821518e-05, -1.8911436200141907e-05, -1.6969628632068634e-05, -1.5027821063995361e-05, -1.3086013495922089e-05, -1.1144205927848816e-05, -9.202398359775543e-06, -7.2605907917022705e-06, -5.318783223628998e-06, -3.376975655555725e-06, -1.4351680874824524e-06, 5.066394805908203e-07, 2.448447048664093e-06, 4.390254616737366e-06, 6.3320621848106384e-06, 8.273869752883911e-06, 1.0215677320957184e-05, 1.2157484889030457e-05, 1.409929245710373e-05, 1.6041100025177002e-05, 1.7982907593250275e-05, 1.9924715161323547e-05, 2.186652272939682e-05, 2.3808330297470093e-05, 2.5750137865543365e-05, 2.7691945433616638e-05, 2.963375300168991e-05, 3.1575560569763184e-05, 3.3517368137836456e-05, 3.545917570590973e-05, 3.7400983273983e-05, 3.9342790842056274e-05, 4.128459841012955e-05, 4.322640597820282e-05, 4.516821354627609e-05, 4.7110021114349365e-05, 4.905182868242264e-05, 5.099363625049591e-05, 5.293544381856918e-05, 5.4877251386642456e-05, 5.681905895471573e-05, 5.8760866522789e-05, 6.0702674090862274e-05, 6.264448165893555e-05]}, "gradients/encoder.encoder.layers.23.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 1.0, 1.0, 1.0, 3.0, 5.0, 2.0, 7.0, 8.0, 5.0, 6.0, 17.0, 14.0, 23.0, 37.0, 50.0, 89.0, 362.0, 173.0, 68.0, 33.0, 43.0, 20.0, 13.0, 9.0, 3.0, 3.0, 2.0, 3.0, 4.0, 1.0, 2.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.4483928680419922e-05, -1.4027580618858337e-05, -1.3571232557296753e-05, -1.3114884495735168e-05, -1.2658536434173584e-05, -1.2202188372612e-05, -1.1745840311050415e-05, -1.128949224948883e-05, -1.0833144187927246e-05, -1.0376796126365662e-05, -9.920448064804077e-06, -9.464100003242493e-06, -9.007751941680908e-06, -8.551403880119324e-06, -8.09505581855774e-06, -7.638707756996155e-06, -7.18235969543457e-06, -6.726011633872986e-06, -6.269663572311401e-06, -5.813315510749817e-06, -5.356967449188232e-06, -4.900619387626648e-06, -4.4442713260650635e-06, -3.987923264503479e-06, -3.5315752029418945e-06, -3.07522714138031e-06, -2.6188790798187256e-06, -2.162531018257141e-06, -1.7061829566955566e-06, -1.2498348951339722e-06, -7.934868335723877e-07, -3.371387720108032e-07, 1.1920928955078125e-07, 5.755573511123657e-07, 1.0319054126739502e-06, 1.4882534742355347e-06, 1.944601535797119e-06, 2.4009495973587036e-06, 2.857297658920288e-06, 3.3136457204818726e-06, 3.769993782043457e-06, 4.2263418436050415e-06, 4.682689905166626e-06, 5.1390379667282104e-06, 5.595386028289795e-06, 6.051734089851379e-06, 6.508082151412964e-06, 6.964430212974548e-06, 7.420778274536133e-06, 7.877126336097717e-06, 8.333474397659302e-06, 8.789822459220886e-06, 9.24617052078247e-06, 9.702518582344055e-06, 1.015886664390564e-05, 1.0615214705467224e-05, 1.1071562767028809e-05, 1.1527910828590393e-05, 1.1984258890151978e-05, 1.2440606951713562e-05, 1.2896955013275146e-05, 1.3353303074836731e-05, 1.3809651136398315e-05, 1.42659991979599e-05, 1.4722347259521484e-05]}, "gradients/encoder.encoder.layers.23.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 3.0, 1.0, 0.0, 2.0, 0.0, 2.0, 1.0, 1.0, 9.0, 9.0, 14.0, 23.0, 99.0, 508.0, 229.0, 47.0, 18.0, 16.0, 7.0, 3.0, 4.0, 2.0, 5.0, 1.0, 3.0, 0.0, 1.0, 3.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0], "bins": [-1.525599718093872, -1.4792274236679077, -1.4328551292419434, -1.3864829540252686, -1.3401106595993042, -1.2937383651733398, -1.247366189956665, -1.2009938955307007, -1.1546216011047363, -1.108249306678772, -1.0618770122528076, -1.0155048370361328, -0.9691325426101685, -0.9227602481842041, -0.8763880133628845, -0.8300157785415649, -0.7836434841156006, -0.7372711896896362, -0.6908989548683167, -0.6445267200469971, -0.5981544256210327, -0.5517821311950684, -0.5054098963737488, -0.4590376317501068, -0.41266536712646484, -0.3662931025028229, -0.3199208378791809, -0.27354857325553894, -0.22717630863189697, -0.180804044008255, -0.13443177938461304, -0.08805951476097107, -0.0416872501373291, 0.004685014486312866, 0.051057279109954834, 0.0974295437335968, 0.14380180835723877, 0.19017407298088074, 0.2365463376045227, 0.2829186022281647, 0.32929086685180664, 0.3756631314754486, 0.4220353960990906, 0.46840766072273254, 0.5147799253463745, 0.5611522197723389, 0.6075244545936584, 0.653896689414978, 0.7002689838409424, 0.7466412782669067, 0.7930135130882263, 0.8393857479095459, 0.8857580423355103, 0.9321303367614746, 0.9785025715827942, 1.0248748064041138, 1.0712471008300781, 1.1176193952560425, 1.1639916896820068, 1.2103638648986816, 1.256736159324646, 1.3031084537506104, 1.3494806289672852, 1.3958529233932495, 1.4422252178192139]}, "gradients/encoder.encoder.layers.23.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0, 4.0, 3.0, 1.0, 3.0, 3.0, 0.0, 1.0, 4.0, 7.0, 3.0, 3.0, 11.0, 6.0, 8.0, 9.0, 18.0, 23.0, 36.0, 61.0, 67.0, 116.0, 123.0, 113.0, 103.0, 85.0, 54.0, 44.0, 18.0, 16.0, 9.0, 14.0, 9.0, 5.0, 6.0, 3.0, 3.0, 2.0, 1.0, 5.0, 2.0, 2.0, 2.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 2.0, 0.0, 0.0, 1.0], "bins": [-0.5926666855812073, -0.5752219557762146, -0.5577771663665771, -0.5403324365615845, -0.522887647151947, -0.5054429173469543, -0.4879981577396393, -0.4705533981323242, -0.45310863852500916, -0.4356638789176941, -0.41821911931037903, -0.40077435970306396, -0.3833296298980713, -0.36588484048843384, -0.34844011068344116, -0.3309953510761261, -0.31355059146881104, -0.29610583186149597, -0.2786610722541809, -0.26121631264686584, -0.24377156794071198, -0.2263268083333969, -0.20888206362724304, -0.19143730401992798, -0.17399254441261292, -0.15654778480529785, -0.1391030251979828, -0.12165828049182892, -0.10421352088451385, -0.08676876127719879, -0.06932400912046432, -0.05187925696372986, -0.03443443775177002, -0.016989681869745255, 0.0004550740122795105, 0.017899829894304276, 0.03534458577632904, 0.052789345383644104, 0.07023409754037857, 0.08767884969711304, 0.1051236093044281, 0.12256836891174316, 0.14001312851905823, 0.1574578732252121, 0.17490263283252716, 0.19234739243984222, 0.2097921371459961, 0.22723689675331116, 0.24468165636062622, 0.2621264159679413, 0.27957117557525635, 0.2970159351825714, 0.3144606947898865, 0.33190542459487915, 0.3493501842021942, 0.3667949438095093, 0.38423970341682434, 0.4016844630241394, 0.41912922263145447, 0.43657398223876953, 0.4540187120437622, 0.47146350145339966, 0.48890823125839233, 0.5063530206680298, 0.5237977504730225]}, "gradients/encoder.encoder.layers.22.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 2.0, 2.0, 1.0, 1.0, 0.0, 3.0, 3.0, 4.0, 2.0, 6.0, 8.0, 3.0, 11.0, 8.0, 19.0, 17.0, 25.0, 32.0, 67.0, 267.0, 3679.0, 4148563.0, 40270.0, 996.0, 124.0, 50.0, 26.0, 24.0, 16.0, 7.0, 11.0, 10.0, 10.0, 4.0, 5.0, 3.0, 2.0, 3.0, 4.0, 3.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-16.59375, -16.0546875, -15.515625, -14.9765625, -14.4375, -13.8984375, -13.359375, -12.8203125, -12.28125, -11.7421875, -11.203125, -10.6640625, -10.125, -9.5859375, -9.046875, -8.5078125, -7.96875, -7.4296875, -6.890625, -6.3515625, -5.8125, -5.2734375, -4.734375, -4.1953125, -3.65625, -3.1171875, -2.578125, -2.0390625, -1.5, -0.9609375, -0.421875, 0.1171875, 0.65625, 1.1953125, 1.734375, 2.2734375, 2.8125, 3.3515625, 3.890625, 4.4296875, 4.96875, 5.5078125, 6.046875, 6.5859375, 7.125, 7.6640625, 8.203125, 8.7421875, 9.28125, 9.8203125, 10.359375, 10.8984375, 11.4375, 11.9765625, 12.515625, 13.0546875, 13.59375, 14.1328125, 14.671875, 15.2109375, 15.75, 16.2890625, 16.828125, 17.3671875, 17.90625]}, "gradients/encoder.encoder.layers.22.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 2.0, 2.0, 1.0, 1.0, 0.0, 2.0, 2.0, 3.0, 2.0, 4.0, 2.0, 1.0, 7.0, 3.0, 10.0, 9.0, 10.0, 8.0, 17.0, 57.0, 155.0, 255.0, 247.0, 105.0, 34.0, 15.0, 10.0, 4.0, 4.0, 5.0, 4.0, 6.0, 7.0, 3.0, 3.0, 2.0, 1.0, 3.0, 3.0, 2.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.00823974609375, -0.007972240447998047, -0.007704734802246094, -0.007437229156494141, -0.0071697235107421875, -0.006902217864990234, -0.006634712219238281, -0.006367206573486328, -0.006099700927734375, -0.005832195281982422, -0.005564689636230469, -0.005297183990478516, -0.0050296783447265625, -0.004762172698974609, -0.004494667053222656, -0.004227161407470703, -0.00395965576171875, -0.003692150115966797, -0.0034246444702148438, -0.0031571388244628906, -0.0028896331787109375, -0.0026221275329589844, -0.0023546218872070312, -0.002087116241455078, -0.001819610595703125, -0.0015521049499511719, -0.0012845993041992188, -0.0010170936584472656, -0.0007495880126953125, -0.0004820823669433594, -0.00021457672119140625, 5.2928924560546875e-05, 0.0003204345703125, 0.0005879402160644531, 0.0008554458618164062, 0.0011229515075683594, 0.0013904571533203125, 0.0016579627990722656, 0.0019254684448242188, 0.002192974090576172, 0.002460479736328125, 0.002727985382080078, 0.0029954910278320312, 0.0032629966735839844, 0.0035305023193359375, 0.0037980079650878906, 0.004065513610839844, 0.004333019256591797, 0.00460052490234375, 0.004868030548095703, 0.005135536193847656, 0.005403041839599609, 0.0056705474853515625, 0.005938053131103516, 0.006205558776855469, 0.006473064422607422, 0.006740570068359375, 0.007008075714111328, 0.007275581359863281, 0.007543087005615234, 0.0078105926513671875, 0.00807809829711914, 0.008345603942871094, 0.008613109588623047, 0.008880615234375]}, "gradients/encoder.encoder.layers.22.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 3.0, 8.0, 5.0, 3.0, 6.0, 5.0, 6.0, 14.0, 12.0, 18.0, 12.0, 20.0, 18.0, 29.0, 36.0, 44.0, 43.0, 57.0, 58.0, 64.0, 102.0, 298.0, 7418.0, 4157579.0, 26331.0, 976.0, 182.0, 134.0, 89.0, 85.0, 85.0, 75.0, 68.0, 58.0, 56.0, 56.0, 43.0, 36.0, 30.0, 19.0, 22.0, 15.0, 15.0, 7.0, 10.0, 8.0, 12.0, 5.0, 7.0, 6.0, 3.0, 2.0, 2.0, 0.0, 1.0, 2.0, 1.0], "bins": [-2.310546875, -2.235107421875, -2.15966796875, -2.084228515625, -2.0087890625, -1.933349609375, -1.85791015625, -1.782470703125, -1.70703125, -1.631591796875, -1.55615234375, -1.480712890625, -1.4052734375, -1.329833984375, -1.25439453125, -1.178955078125, -1.103515625, -1.028076171875, -0.95263671875, -0.877197265625, -0.8017578125, -0.726318359375, -0.65087890625, -0.575439453125, -0.5, -0.424560546875, -0.34912109375, -0.273681640625, -0.1982421875, -0.122802734375, -0.04736328125, 0.028076171875, 0.103515625, 0.178955078125, 0.25439453125, 0.329833984375, 0.4052734375, 0.480712890625, 0.55615234375, 0.631591796875, 0.70703125, 0.782470703125, 0.85791015625, 0.933349609375, 1.0087890625, 1.084228515625, 1.15966796875, 1.235107421875, 1.310546875, 1.385986328125, 1.46142578125, 1.536865234375, 1.6123046875, 1.687744140625, 1.76318359375, 1.838623046875, 1.9140625, 1.989501953125, 2.06494140625, 2.140380859375, 2.2158203125, 2.291259765625, 2.36669921875, 2.442138671875, 2.517578125]}, "gradients/encoder.encoder.layers.22.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 3.0, 8.0, 5.0, 5.0, 4.0, 5.0, 6.0, 14.0, 12.0, 18.0, 12.0, 20.0, 19.0, 29.0, 35.0, 44.0, 44.0, 57.0, 58.0, 61.0, 77.0, 103.0, 139.0, 1948.0, 199.0, 134.0, 100.0, 116.0, 87.0, 87.0, 83.0, 75.0, 70.0, 57.0, 55.0, 56.0, 43.0, 36.0, 30.0, 19.0, 22.0, 15.0, 15.0, 7.0, 10.0, 8.0, 12.0, 5.0, 7.0, 6.0, 3.0, 2.0, 2.0, 0.0, 1.0, 2.0, 1.0], "bins": [-0.0181732177734375, -0.01757979393005371, -0.016986370086669922, -0.016392946243286133, -0.015799522399902344, -0.015206098556518555, -0.014612674713134766, -0.014019250869750977, -0.013425827026367188, -0.012832403182983398, -0.01223897933959961, -0.01164555549621582, -0.011052131652832031, -0.010458707809448242, -0.009865283966064453, -0.009271860122680664, -0.008678436279296875, -0.008085012435913086, -0.007491588592529297, -0.006898164749145508, -0.006304740905761719, -0.00571131706237793, -0.005117893218994141, -0.0045244693756103516, -0.0039310455322265625, -0.0033376216888427734, -0.0027441978454589844, -0.0021507740020751953, -0.0015573501586914062, -0.0009639263153076172, -0.0003705024719238281, 0.00022292137145996094, 0.00081634521484375, 0.001409769058227539, 0.002003192901611328, 0.002596616744995117, 0.0031900405883789062, 0.0037834644317626953, 0.004376888275146484, 0.0049703121185302734, 0.0055637359619140625, 0.0061571598052978516, 0.006750583648681641, 0.00734400749206543, 0.007937431335449219, 0.008530855178833008, 0.009124279022216797, 0.009717702865600586, 0.010311126708984375, 0.010904550552368164, 0.011497974395751953, 0.012091398239135742, 0.012684822082519531, 0.01327824592590332, 0.01387166976928711, 0.014465093612670898, 0.015058517456054688, 0.015651941299438477, 0.016245365142822266, 0.016838788986206055, 0.017432212829589844, 0.018025636672973633, 0.018619060516357422, 0.01921248435974121, 0.019805908203125]}, "gradients/encoder.encoder.layers.22.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 4.0, 0.0, 4.0, 14.0, 20.0, 117.0, 522.0, 161.0, 77.0, 35.0, 27.0, 11.0, 7.0, 6.0, 3.0, 1.0, 1.0, 2.0, 1.0, 1.0, 0.0, 2.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.8999651670455933, -0.8597136735916138, -0.8194621205329895, -0.77921062707901, -0.7389590740203857, -0.6987075805664062, -0.6584560871124268, -0.6182045936584473, -0.577953040599823, -0.5377015471458435, -0.49744999408721924, -0.45719850063323975, -0.41694697737693787, -0.376695454120636, -0.3364439606666565, -0.2961924374103546, -0.25594091415405273, -0.21568939089775085, -0.17543788254261017, -0.13518637418746948, -0.0949348509311676, -0.05468332767486572, -0.014431819319725037, 0.02581968903541565, 0.06607121229171753, 0.10632272809743881, 0.1465742439031601, 0.18682575225830078, 0.22707727551460266, 0.26732879877090454, 0.30758029222488403, 0.3478318154811859, 0.38808345794677734, 0.4283349812030792, 0.4685865044593811, 0.5088379979133606, 0.5490895509719849, 0.5893410444259644, 0.6295925378799438, 0.6698440313339233, 0.7100955843925476, 0.7503470778465271, 0.7905986309051514, 0.8308501243591309, 0.8711016178131104, 0.9113531708717346, 0.9516046643257141, 0.9918562173843384, 1.0321077108383179, 1.0723592042922974, 1.1126106977462769, 1.152862310409546, 1.1931138038635254, 1.2333652973175049, 1.2736167907714844, 1.3138682842254639, 1.3541197776794434, 1.3943712711334229, 1.4346227645874023, 1.4748743772506714, 1.5151258707046509, 1.5553773641586304, 1.5956288576126099, 1.6358803510665894, 1.6761319637298584]}, "gradients/encoder.encoder.layers.22.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 2.0, 3.0, 3.0, 3.0, 2.0, 4.0, 4.0, 7.0, 9.0, 8.0, 13.0, 8.0, 16.0, 16.0, 36.0, 38.0, 25.0, 35.0, 61.0, 70.0, 63.0, 61.0, 74.0, 74.0, 54.0, 57.0, 47.0, 43.0, 34.0, 40.0, 30.0, 15.0, 15.0, 11.0, 7.0, 7.0, 4.0, 6.0, 2.0, 0.0, 2.0, 2.0, 2.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.4819045066833496, -0.46891212463378906, -0.4559197425842285, -0.4429273307323456, -0.42993494868278503, -0.4169425666332245, -0.40395015478134155, -0.390957772731781, -0.37796539068222046, -0.3649730086326599, -0.35198062658309937, -0.33898821473121643, -0.3259958326816559, -0.31300345063209534, -0.3000110387802124, -0.28701865673065186, -0.2740262746810913, -0.26103389263153076, -0.24804149568080902, -0.23504909873008728, -0.22205671668052673, -0.2090643346309662, -0.19607193768024445, -0.1830795407295227, -0.17008715867996216, -0.1570947766304016, -0.14410237967967987, -0.13110998272895813, -0.11811760067939758, -0.10512521117925644, -0.0921328216791153, -0.07914043217897415, -0.06614804267883301, -0.053155653178691864, -0.04016326367855072, -0.027170874178409576, -0.014178484678268433, -0.0011860951781272888, 0.011806294322013855, 0.024798683822155, 0.03779107332229614, 0.050783462822437286, 0.06377585232257843, 0.07676824182271957, 0.08976063132286072, 0.10275302082300186, 0.115745410323143, 0.12873780727386475, 0.1417301893234253, 0.15472257137298584, 0.16771496832370758, 0.18070736527442932, 0.19369974732398987, 0.20669212937355042, 0.21968452632427216, 0.2326769232749939, 0.24566930532455444, 0.258661687374115, 0.27165406942367554, 0.28464648127555847, 0.297638863325119, 0.31063124537467957, 0.3236236572265625, 0.33661603927612305, 0.3496084213256836]}, "gradients/encoder.encoder.layers.22.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 2.0, 0.0, 2.0, 5.0, 2.0, 10.0, 8.0, 9.0, 17.0, 17.0, 16.0, 30.0, 43.0, 60.0, 71.0, 117.0, 159.0, 189.0, 265.0, 363.0, 492.0, 732.0, 995.0, 1401.0, 2005.0, 3269.0, 4983.0, 8418.0, 17614.0, 88312.0, 804837.0, 74639.0, 16594.0, 8169.0, 4784.0, 3109.0, 1984.0, 1356.0, 971.0, 669.0, 506.0, 346.0, 248.0, 215.0, 137.0, 106.0, 86.0, 63.0, 25.0, 34.0, 17.0, 19.0, 16.0, 9.0, 12.0, 4.0, 3.0, 6.0, 2.0, 0.0, 2.0], "bins": [-0.182861328125, -0.1773967742919922, -0.17193222045898438, -0.16646766662597656, -0.16100311279296875, -0.15553855895996094, -0.15007400512695312, -0.1446094512939453, -0.1391448974609375, -0.1336803436279297, -0.12821578979492188, -0.12275123596191406, -0.11728668212890625, -0.11182212829589844, -0.10635757446289062, -0.10089302062988281, -0.095428466796875, -0.08996391296386719, -0.08449935913085938, -0.07903480529785156, -0.07357025146484375, -0.06810569763183594, -0.06264114379882812, -0.05717658996582031, -0.0517120361328125, -0.04624748229980469, -0.040782928466796875, -0.03531837463378906, -0.02985382080078125, -0.024389266967773438, -0.018924713134765625, -0.013460159301757812, -0.00799560546875, -0.0025310516357421875, 0.002933502197265625, 0.008398056030273438, 0.01386260986328125, 0.019327163696289062, 0.024791717529296875, 0.030256271362304688, 0.0357208251953125, 0.04118537902832031, 0.046649932861328125, 0.05211448669433594, 0.05757904052734375, 0.06304359436035156, 0.06850814819335938, 0.07397270202636719, 0.079437255859375, 0.08490180969238281, 0.09036636352539062, 0.09583091735839844, 0.10129547119140625, 0.10676002502441406, 0.11222457885742188, 0.11768913269042969, 0.1231536865234375, 0.1286182403564453, 0.13408279418945312, 0.13954734802246094, 0.14501190185546875, 0.15047645568847656, 0.15594100952148438, 0.1614055633544922, 0.1668701171875]}, "gradients/encoder.encoder.layers.22.attention.out_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 3.0, 0.0, 1.0, 1.0, 1.0, 2.0, 0.0, 4.0, 2.0, 5.0, 2.0, 2.0, 5.0, 6.0, 10.0, 7.0, 12.0, 10.0, 21.0, 98.0, 172.0, 265.0, 200.0, 82.0, 31.0, 15.0, 7.0, 3.0, 6.0, 4.0, 3.0, 7.0, 8.0, 3.0, 2.0, 2.0, 2.0, 1.0, 3.0, 1.0, 2.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.00798797607421875, -0.007726311683654785, -0.00746464729309082, -0.0072029829025268555, -0.006941318511962891, -0.006679654121398926, -0.006417989730834961, -0.006156325340270996, -0.005894660949707031, -0.005632996559143066, -0.0053713321685791016, -0.005109667778015137, -0.004848003387451172, -0.004586338996887207, -0.004324674606323242, -0.004063010215759277, -0.0038013458251953125, -0.0035396814346313477, -0.003278017044067383, -0.003016352653503418, -0.002754688262939453, -0.0024930238723754883, -0.0022313594818115234, -0.0019696950912475586, -0.0017080307006835938, -0.001446366310119629, -0.001184701919555664, -0.0009230375289916992, -0.0006613731384277344, -0.00039970874786376953, -0.0001380443572998047, 0.00012362003326416016, 0.000385284423828125, 0.0006469488143920898, 0.0009086132049560547, 0.0011702775955200195, 0.0014319419860839844, 0.0016936063766479492, 0.001955270767211914, 0.002216935157775879, 0.0024785995483398438, 0.0027402639389038086, 0.0030019283294677734, 0.0032635927200317383, 0.003525257110595703, 0.003786921501159668, 0.004048585891723633, 0.004310250282287598, 0.0045719146728515625, 0.004833579063415527, 0.005095243453979492, 0.005356907844543457, 0.005618572235107422, 0.005880236625671387, 0.0061419010162353516, 0.006403565406799316, 0.006665229797363281, 0.006926894187927246, 0.007188558578491211, 0.007450222969055176, 0.007711887359619141, 0.007973551750183105, 0.00823521614074707, 0.008496880531311035, 0.008758544921875]}, "gradients/encoder.encoder.layers.22.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 0.0, 2.0, 1.0, 4.0, 8.0, 3.0, 3.0, 5.0, 1.0, 7.0, 8.0, 7.0, 14.0, 13.0, 9.0, 16.0, 12.0, 19.0, 19.0, 23.0, 27.0, 31.0, 41.0, 43.0, 74.0, 239.0, 1543.0, 17898.0, 1000128.0, 25642.0, 2021.0, 261.0, 81.0, 54.0, 30.0, 32.0, 35.0, 26.0, 28.0, 26.0, 23.0, 12.0, 16.0, 15.0, 19.0, 7.0, 10.0, 3.0, 7.0, 8.0, 2.0, 7.0, 5.0, 0.0, 4.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.48583984375, -0.4704399108886719, -0.45503997802734375, -0.4396400451660156, -0.4242401123046875, -0.4088401794433594, -0.39344024658203125, -0.3780403137207031, -0.362640380859375, -0.3472404479980469, -0.33184051513671875, -0.3164405822753906, -0.3010406494140625, -0.2856407165527344, -0.27024078369140625, -0.2548408508300781, -0.23944091796875, -0.22404098510742188, -0.20864105224609375, -0.19324111938476562, -0.1778411865234375, -0.16244125366210938, -0.14704132080078125, -0.13164138793945312, -0.116241455078125, -0.10084152221679688, -0.08544158935546875, -0.07004165649414062, -0.0546417236328125, -0.039241790771484375, -0.02384185791015625, -0.008441925048828125, 0.0069580078125, 0.022357940673828125, 0.03775787353515625, 0.053157806396484375, 0.0685577392578125, 0.08395767211914062, 0.09935760498046875, 0.11475753784179688, 0.130157470703125, 0.14555740356445312, 0.16095733642578125, 0.17635726928710938, 0.1917572021484375, 0.20715713500976562, 0.22255706787109375, 0.23795700073242188, 0.25335693359375, 0.2687568664550781, 0.28415679931640625, 0.2995567321777344, 0.3149566650390625, 0.3303565979003906, 0.34575653076171875, 0.3611564636230469, 0.376556396484375, 0.3919563293457031, 0.40735626220703125, 0.4227561950683594, 0.4381561279296875, 0.4535560607910156, 0.46895599365234375, 0.4843559265136719, 0.499755859375]}, "gradients/encoder.encoder.layers.22.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 0.0, 2.0, 1.0, 4.0, 8.0, 3.0, 3.0, 5.0, 1.0, 7.0, 8.0, 7.0, 14.0, 13.0, 9.0, 16.0, 12.0, 19.0, 19.0, 23.0, 27.0, 31.0, 40.0, 29.0, 42.0, 36.0, 41.0, 33.0, 51.0, 38.0, 42.0, 33.0, 44.0, 45.0, 29.0, 32.0, 35.0, 26.0, 28.0, 26.0, 23.0, 12.0, 16.0, 15.0, 19.0, 7.0, 10.0, 3.0, 7.0, 8.0, 2.0, 7.0, 5.0, 0.0, 4.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0242156982421875, -0.02344822883605957, -0.02268075942993164, -0.02191329002380371, -0.02114582061767578, -0.02037835121154785, -0.019610881805419922, -0.018843412399291992, -0.018075942993164062, -0.017308473587036133, -0.016541004180908203, -0.015773534774780273, -0.015006065368652344, -0.014238595962524414, -0.013471126556396484, -0.012703657150268555, -0.011936187744140625, -0.011168718338012695, -0.010401248931884766, -0.009633779525756836, -0.008866310119628906, -0.008098840713500977, -0.007331371307373047, -0.006563901901245117, -0.0057964324951171875, -0.005028963088989258, -0.004261493682861328, -0.0034940242767333984, -0.0027265548706054688, -0.001959085464477539, -0.0011916160583496094, -0.0004241466522216797, 0.00034332275390625, 0.0011107921600341797, 0.0018782615661621094, 0.002645730972290039, 0.0034132003784179688, 0.0041806697845458984, 0.004948139190673828, 0.005715608596801758, 0.0064830780029296875, 0.007250547409057617, 0.008018016815185547, 0.008785486221313477, 0.009552955627441406, 0.010320425033569336, 0.011087894439697266, 0.011855363845825195, 0.012622833251953125, 0.013390302658081055, 0.014157772064208984, 0.014925241470336914, 0.015692710876464844, 0.016460180282592773, 0.017227649688720703, 0.017995119094848633, 0.018762588500976562, 0.019530057907104492, 0.020297527313232422, 0.02106499671936035, 0.02183246612548828, 0.02259993553161621, 0.02336740493774414, 0.02413487434387207, 0.02490234375]}, "gradients/encoder.encoder.layers.22.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 0.0, 3.0, 5.0, 9.0, 13.0, 20.0, 43.0, 106.0, 356.0, 7916.0, 1038186.0, 1521.0, 194.0, 75.0, 37.0, 31.0, 16.0, 12.0, 3.0, 5.0, 0.0, 1.0, 2.0, 2.0, 3.0, 2.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.0006136894226074219, -0.0005934387445449829, -0.0005731880664825439, -0.000552937388420105, -0.000532686710357666, -0.000512436032295227, -0.0004921853542327881, -0.0004719346761703491, -0.00045168399810791016, -0.0004314333200454712, -0.0004111826419830322, -0.00039093196392059326, -0.0003706812858581543, -0.00035043060779571533, -0.00033017992973327637, -0.0003099292516708374, -0.00028967857360839844, -0.00026942789554595947, -0.0002491772174835205, -0.00022892653942108154, -0.00020867586135864258, -0.0001884251832962036, -0.00016817450523376465, -0.00014792382717132568, -0.00012767314910888672, -0.00010742247104644775, -8.717179298400879e-05, -6.692111492156982e-05, -4.667043685913086e-05, -2.6419758796691895e-05, -6.16908073425293e-06, 1.4081597328186035e-05, 3.4332275390625e-05, 5.4582953453063965e-05, 7.483363151550293e-05, 9.50843095779419e-05, 0.00011533498764038086, 0.00013558566570281982, 0.0001558363437652588, 0.00017608702182769775, 0.00019633769989013672, 0.00021658837795257568, 0.00023683905601501465, 0.0002570897340774536, 0.0002773404121398926, 0.00029759109020233154, 0.0003178417682647705, 0.00033809244632720947, 0.00035834312438964844, 0.0003785938024520874, 0.00039884448051452637, 0.00041909515857696533, 0.0004393458366394043, 0.00045959651470184326, 0.0004798471927642822, 0.0005000978708267212, 0.0005203485488891602, 0.0005405992269515991, 0.0005608499050140381, 0.000581100583076477, 0.000601351261138916, 0.000621601939201355, 0.0006418526172637939, 0.0006621032953262329, 0.0006823539733886719]}, "gradients/encoder.encoder.layers.22.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 0.0, 3.0, 6.0, 9.0, 11.0, 20.0, 34.0, 65.0, 98.0, 184.0, 202.0, 149.0, 69.0, 52.0, 33.0, 26.0, 18.0, 12.0, 3.0, 5.0, 0.0, 1.0, 2.0, 2.0, 3.0, 2.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-3.039836883544922e-05, -2.939533442258835e-05, -2.8392300009727478e-05, -2.7389265596866608e-05, -2.6386231184005737e-05, -2.5383196771144867e-05, -2.4380162358283997e-05, -2.3377127945423126e-05, -2.2374093532562256e-05, -2.1371059119701385e-05, -2.0368024706840515e-05, -1.9364990293979645e-05, -1.8361955881118774e-05, -1.7358921468257904e-05, -1.6355887055397034e-05, -1.5352852642536163e-05, -1.4349818229675293e-05, -1.3346783816814423e-05, -1.2343749403953552e-05, -1.1340714991092682e-05, -1.0337680578231812e-05, -9.334646165370941e-06, -8.33161175251007e-06, -7.3285773396492004e-06, -6.32554292678833e-06, -5.32250851392746e-06, -4.319474101066589e-06, -3.316439688205719e-06, -2.3134052753448486e-06, -1.3103708624839783e-06, -3.073364496231079e-07, 6.956979632377625e-07, 1.6987323760986328e-06, 2.701766788959503e-06, 3.7048012018203735e-06, 4.707835614681244e-06, 5.710870027542114e-06, 6.713904440402985e-06, 7.716938853263855e-06, 8.719973266124725e-06, 9.723007678985596e-06, 1.0726042091846466e-05, 1.1729076504707336e-05, 1.2732110917568207e-05, 1.3735145330429077e-05, 1.4738179743289948e-05, 1.5741214156150818e-05, 1.6744248569011688e-05, 1.774728298187256e-05, 1.875031739473343e-05, 1.97533518075943e-05, 2.075638622045517e-05, 2.175942063331604e-05, 2.276245504617691e-05, 2.376548945903778e-05, 2.476852387189865e-05, 2.577155828475952e-05, 2.6774592697620392e-05, 2.7777627110481262e-05, 2.8780661523342133e-05, 2.9783695936203003e-05, 3.078673034906387e-05, 3.1789764761924744e-05, 3.2792799174785614e-05, 3.3795833587646484e-05]}, "gradients/encoder.encoder.layers.22.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 2.0, 4.0, 3.0, 8.0, 7.0, 16.0, 17.0, 25.0, 48.0, 127.0, 685.0, 379675.0, 666836.0, 806.0, 132.0, 63.0, 26.0, 22.0, 19.0, 7.0, 7.0, 2.0, 8.0, 2.0, 6.0, 3.0, 2.0, 0.0, 2.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.0006337165832519531, -0.0006139129400253296, -0.0005941092967987061, -0.0005743056535720825, -0.000554502010345459, -0.0005346983671188354, -0.0005148947238922119, -0.0004950910806655884, -0.00047528743743896484, -0.0004554837942123413, -0.0004356801509857178, -0.00041587650775909424, -0.0003960728645324707, -0.00037626922130584717, -0.00035646557807922363, -0.0003366619348526001, -0.00031685829162597656, -0.00029705464839935303, -0.0002772510051727295, -0.00025744736194610596, -0.00023764371871948242, -0.0002178400754928589, -0.00019803643226623535, -0.00017823278903961182, -0.00015842914581298828, -0.00013862550258636475, -0.00011882185935974121, -9.901821613311768e-05, -7.921457290649414e-05, -5.9410929679870605e-05, -3.960728645324707e-05, -1.9803643226623535e-05, 0.0, 1.9803643226623535e-05, 3.960728645324707e-05, 5.9410929679870605e-05, 7.921457290649414e-05, 9.901821613311768e-05, 0.00011882185935974121, 0.00013862550258636475, 0.00015842914581298828, 0.00017823278903961182, 0.00019803643226623535, 0.0002178400754928589, 0.00023764371871948242, 0.00025744736194610596, 0.0002772510051727295, 0.00029705464839935303, 0.00031685829162597656, 0.0003366619348526001, 0.00035646557807922363, 0.00037626922130584717, 0.0003960728645324707, 0.00041587650775909424, 0.0004356801509857178, 0.0004554837942123413, 0.00047528743743896484, 0.0004950910806655884, 0.0005148947238922119, 0.0005346983671188354, 0.000554502010345459, 0.0005743056535720825, 0.0005941092967987061, 0.0006139129400253296, 0.0006337165832519531]}, "gradients/encoder.encoder.layers.22.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 2.0, 4.0, 3.0, 8.0, 7.0, 15.0, 17.0, 22.0, 35.0, 55.0, 66.0, 168.0, 331.0, 73.0, 53.0, 46.0, 25.0, 21.0, 18.0, 7.0, 7.0, 2.0, 8.0, 2.0, 6.0, 3.0, 2.0, 0.0, 2.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-3.159046173095703e-05, -3.0603259801864624e-05, -2.9616057872772217e-05, -2.862885594367981e-05, -2.7641654014587402e-05, -2.6654452085494995e-05, -2.5667250156402588e-05, -2.468004822731018e-05, -2.3692846298217773e-05, -2.2705644369125366e-05, -2.171844244003296e-05, -2.0731240510940552e-05, -1.9744038581848145e-05, -1.8756836652755737e-05, -1.776963472366333e-05, -1.6782432794570923e-05, -1.5795230865478516e-05, -1.4808028936386108e-05, -1.3820827007293701e-05, -1.2833625078201294e-05, -1.1846423149108887e-05, -1.085922122001648e-05, -9.872019290924072e-06, -8.884817361831665e-06, -7.897615432739258e-06, -6.910413503646851e-06, -5.923211574554443e-06, -4.936009645462036e-06, -3.948807716369629e-06, -2.9616057872772217e-06, -1.9744038581848145e-06, -9.872019290924072e-07, 0.0, 9.872019290924072e-07, 1.9744038581848145e-06, 2.9616057872772217e-06, 3.948807716369629e-06, 4.936009645462036e-06, 5.923211574554443e-06, 6.910413503646851e-06, 7.897615432739258e-06, 8.884817361831665e-06, 9.872019290924072e-06, 1.085922122001648e-05, 1.1846423149108887e-05, 1.2833625078201294e-05, 1.3820827007293701e-05, 1.4808028936386108e-05, 1.5795230865478516e-05, 1.6782432794570923e-05, 1.776963472366333e-05, 1.8756836652755737e-05, 1.9744038581848145e-05, 2.0731240510940552e-05, 2.171844244003296e-05, 2.2705644369125366e-05, 2.3692846298217773e-05, 2.468004822731018e-05, 2.5667250156402588e-05, 2.6654452085494995e-05, 2.7641654014587402e-05, 2.862885594367981e-05, 2.9616057872772217e-05, 3.0603259801864624e-05, 3.159046173095703e-05]}, "gradients/encoder.encoder.layers.22.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 2.0, 2.0, 1.0, 3.0, 4.0, 3.0, 11.0, 18.0, 43.0, 158.0, 620.0, 92.0, 19.0, 14.0, 9.0, 6.0, 2.0, 3.0, 4.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.8127468824386597, -0.7807133197784424, -0.7486797571182251, -0.7166461944580078, -0.6846126317977905, -0.6525790691375732, -0.620545506477356, -0.5885119438171387, -0.5564783811569214, -0.5244448184967041, -0.4924112558364868, -0.46037769317626953, -0.42834413051605225, -0.39631056785583496, -0.3642770051956177, -0.3322434425354004, -0.3002098798751831, -0.2681763172149658, -0.23614275455474854, -0.20410919189453125, -0.17207562923431396, -0.14004206657409668, -0.1080085039138794, -0.07597494125366211, -0.043941378593444824, -0.011907815933227539, 0.020125746726989746, 0.05215930938720703, 0.08419287204742432, 0.1162264347076416, 0.1482599973678589, 0.18029356002807617, 0.2123270034790039, 0.2443605661392212, 0.2763941287994385, 0.30842769145965576, 0.34046125411987305, 0.37249481678009033, 0.4045283794403076, 0.4365619421005249, 0.4685955047607422, 0.5006290674209595, 0.5326626300811768, 0.564696192741394, 0.5967297554016113, 0.6287633180618286, 0.6607968807220459, 0.6928304433822632, 0.7248640060424805, 0.7568975687026978, 0.788931131362915, 0.8209646940231323, 0.8529982566833496, 0.8850318193435669, 0.9170653820037842, 0.9490989446640015, 0.9811325073242188, 1.013166069984436, 1.0451996326446533, 1.0772331953048706, 1.109266757965088, 1.1413003206253052, 1.1733338832855225, 1.2053674459457397, 1.237401008605957]}, "gradients/encoder.encoder.layers.22.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 0.0, 0.0, 2.0, 1.0, 1.0, 1.0, 5.0, 4.0, 2.0, 6.0, 5.0, 6.0, 5.0, 16.0, 11.0, 21.0, 41.0, 57.0, 66.0, 71.0, 74.0, 68.0, 107.0, 82.0, 58.0, 65.0, 63.0, 36.0, 35.0, 17.0, 21.0, 14.0, 13.0, 6.0, 9.0, 2.0, 4.0, 2.0, 5.0, 3.0, 3.0, 4.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.2577208876609802, -0.2492363303899765, -0.24075177311897278, -0.23226720094680786, -0.22378264367580414, -0.21529808640480042, -0.2068135291337967, -0.19832897186279297, -0.18984439969062805, -0.18135984241962433, -0.1728752851486206, -0.1643907129764557, -0.15590615570545197, -0.14742159843444824, -0.13893704116344452, -0.1304524838924408, -0.12196792662143707, -0.11348336935043335, -0.10499880462884903, -0.0965142473578453, -0.08802968263626099, -0.07954512536525726, -0.07106056809425354, -0.06257601082324982, -0.0540914461016655, -0.045606885105371475, -0.037122324109077454, -0.02863776683807373, -0.02015320584177971, -0.011668644845485687, -0.003184087574481964, 0.0053004734218120575, 0.013785034418106079, 0.0222695954144001, 0.030754154548048973, 0.039238713681697845, 0.04772327467799187, 0.05620783567428589, 0.06469239294528961, 0.07317695021629333, 0.08166151493787766, 0.09014607220888138, 0.0986306369304657, 0.10711519420146942, 0.11559975147247314, 0.12408431619405746, 0.1325688660144806, 0.1410534381866455, 0.14953799545764923, 0.15802255272865295, 0.16650710999965668, 0.1749916672706604, 0.18347623944282532, 0.19196079671382904, 0.20044535398483276, 0.2089299112558365, 0.2174144685268402, 0.22589902579784393, 0.23438358306884766, 0.24286815524101257, 0.2513526976108551, 0.25983726978302, 0.26832181215286255, 0.27680638432502747, 0.2852909564971924]}, "gradients/encoder.encoder.layers.21.feed_forward.output_dense.weight": {"_type": "histogram", "values": [4.0, 0.0, 0.0, 2.0, 0.0, 0.0, 2.0, 2.0, 2.0, 6.0, 0.0, 2.0, 2.0, 2.0, 4.0, 0.0, 8.0, 4.0, 10.0, 4.0, 4.0, 12.0, 12.0, 16.0, 22.0, 22.0, 14.0, 56.0, 200.0, 378.0, 4192723.0, 417.0, 162.0, 58.0, 32.0, 12.0, 10.0, 10.0, 6.0, 8.0, 16.0, 14.0, 6.0, 4.0, 2.0, 4.0, 4.0, 6.0, 2.0, 4.0, 4.0, 0.0, 0.0, 2.0, 0.0, 0.0, 2.0, 2.0, 0.0, 0.0, 0.0, 2.0, 0.0, 2.0], "bins": [-11.5234375, -11.1453857421875, -10.767333984375, -10.3892822265625, -10.01123046875, -9.6331787109375, -9.255126953125, -8.8770751953125, -8.4990234375, -8.1209716796875, -7.742919921875, -7.3648681640625, -6.98681640625, -6.6087646484375, -6.230712890625, -5.8526611328125, -5.474609375, -5.0965576171875, -4.718505859375, -4.3404541015625, -3.96240234375, -3.5843505859375, -3.206298828125, -2.8282470703125, -2.4501953125, -2.0721435546875, -1.694091796875, -1.3160400390625, -0.93798828125, -0.5599365234375, -0.181884765625, 0.1961669921875, 0.57421875, 0.9522705078125, 1.330322265625, 1.7083740234375, 2.08642578125, 2.4644775390625, 2.842529296875, 3.2205810546875, 3.5986328125, 3.9766845703125, 4.354736328125, 4.7327880859375, 5.11083984375, 5.4888916015625, 5.866943359375, 6.2449951171875, 6.623046875, 7.0010986328125, 7.379150390625, 7.7572021484375, 8.13525390625, 8.5133056640625, 8.891357421875, 9.2694091796875, 9.6474609375, 10.0255126953125, 10.403564453125, 10.7816162109375, 11.15966796875, 11.5377197265625, 11.915771484375, 12.2938232421875, 12.671875]}, "gradients/encoder.encoder.layers.21.feed_forward.output_dense.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 3.0, 0.0, 1.0, 1.0, 1.0, 2.0, 0.0, 4.0, 2.0, 5.0, 2.0, 2.0, 6.0, 6.0, 8.0, 11.0, 11.0, 7.0, 28.0, 101.0, 175.0, 262.0, 194.0, 80.0, 29.0, 16.0, 6.0, 5.0, 5.0, 3.0, 4.0, 8.0, 7.0, 3.0, 2.0, 1.0, 2.0, 2.0, 3.0, 1.0, 2.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.007808685302734375, -0.007552444934844971, -0.007296204566955566, -0.007039964199066162, -0.006783723831176758, -0.0065274834632873535, -0.006271243095397949, -0.006015002727508545, -0.005758762359619141, -0.005502521991729736, -0.005246281623840332, -0.004990041255950928, -0.0047338008880615234, -0.004477560520172119, -0.004221320152282715, -0.0039650797843933105, -0.0037088394165039062, -0.003452599048614502, -0.0031963586807250977, -0.0029401183128356934, -0.002683877944946289, -0.0024276375770568848, -0.0021713972091674805, -0.0019151568412780762, -0.0016589164733886719, -0.0014026761054992676, -0.0011464357376098633, -0.000890195369720459, -0.0006339550018310547, -0.0003777146339416504, -0.0001214742660522461, 0.0001347661018371582, 0.0003910064697265625, 0.0006472468376159668, 0.0009034872055053711, 0.0011597275733947754, 0.0014159679412841797, 0.001672208309173584, 0.0019284486770629883, 0.0021846890449523926, 0.002440929412841797, 0.002697169780731201, 0.0029534101486206055, 0.0032096505165100098, 0.003465890884399414, 0.0037221312522888184, 0.003978371620178223, 0.004234611988067627, 0.004490852355957031, 0.0047470927238464355, 0.00500333309173584, 0.005259573459625244, 0.0055158138275146484, 0.005772054195404053, 0.006028294563293457, 0.006284534931182861, 0.006540775299072266, 0.00679701566696167, 0.007053256034851074, 0.0073094964027404785, 0.007565736770629883, 0.007821977138519287, 0.008078217506408691, 0.008334457874298096, 0.0085906982421875]}, "gradients/encoder.encoder.layers.21.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 5.0, 6.0, 5.0, 9.0, 10.0, 16.0, 14.0, 22.0, 36.0, 47.0, 60.0, 82.0, 79.0, 91.0, 133.0, 163.0, 109575.0, 4083120.0, 174.0, 132.0, 100.0, 105.0, 60.0, 68.0, 54.0, 29.0, 17.0, 23.0, 16.0, 6.0, 10.0, 2.0, 5.0, 6.0, 1.0, 2.0, 1.0, 1.0, 2.0, 3.0, 1.0, 0.0, 1.0], "bins": [-4.171875, -4.062286376953125, -3.95269775390625, -3.843109130859375, -3.7335205078125, -3.623931884765625, -3.51434326171875, -3.404754638671875, -3.295166015625, -3.185577392578125, -3.07598876953125, -2.966400146484375, -2.8568115234375, -2.747222900390625, -2.63763427734375, -2.528045654296875, -2.41845703125, -2.308868408203125, -2.19927978515625, -2.089691162109375, -1.9801025390625, -1.870513916015625, -1.76092529296875, -1.651336669921875, -1.541748046875, -1.432159423828125, -1.32257080078125, -1.212982177734375, -1.1033935546875, -0.993804931640625, -0.88421630859375, -0.774627685546875, -0.6650390625, -0.555450439453125, -0.44586181640625, -0.336273193359375, -0.2266845703125, -0.117095947265625, -0.00750732421875, 0.102081298828125, 0.211669921875, 0.321258544921875, 0.43084716796875, 0.540435791015625, 0.6500244140625, 0.759613037109375, 0.86920166015625, 0.978790283203125, 1.08837890625, 1.197967529296875, 1.30755615234375, 1.417144775390625, 1.5267333984375, 1.636322021484375, 1.74591064453125, 1.855499267578125, 1.965087890625, 2.074676513671875, 2.18426513671875, 2.293853759765625, 2.4034423828125, 2.513031005859375, 2.62261962890625, 2.732208251953125, 2.841796875]}, "gradients/encoder.encoder.layers.21.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 2.0, 0.0, 2.0, 1.0, 1.0, 1.0, 5.0, 6.0, 5.0, 9.0, 10.0, 16.0, 14.0, 22.0, 36.0, 47.0, 60.0, 82.0, 79.0, 91.0, 133.0, 163.0, 400.0, 2091.0, 170.0, 132.0, 102.0, 103.0, 59.0, 69.0, 54.0, 29.0, 17.0, 23.0, 16.0, 6.0, 10.0, 2.0, 5.0, 6.0, 1.0, 2.0, 1.0, 1.0, 2.0, 3.0, 1.0, 0.0, 1.0], "bins": [-0.029632568359375, -0.0288541316986084, -0.028075695037841797, -0.027297258377075195, -0.026518821716308594, -0.025740385055541992, -0.02496194839477539, -0.02418351173400879, -0.023405075073242188, -0.022626638412475586, -0.021848201751708984, -0.021069765090942383, -0.02029132843017578, -0.01951289176940918, -0.018734455108642578, -0.017956018447875977, -0.017177581787109375, -0.016399145126342773, -0.015620708465576172, -0.01484227180480957, -0.014063835144042969, -0.013285398483276367, -0.012506961822509766, -0.011728525161743164, -0.010950088500976562, -0.010171651840209961, -0.00939321517944336, -0.008614778518676758, -0.007836341857910156, -0.007057905197143555, -0.006279468536376953, -0.0055010318756103516, -0.00472259521484375, -0.0039441585540771484, -0.003165721893310547, -0.0023872852325439453, -0.0016088485717773438, -0.0008304119110107422, -5.1975250244140625e-05, 0.0007264614105224609, 0.0015048980712890625, 0.002283334732055664, 0.0030617713928222656, 0.003840208053588867, 0.004618644714355469, 0.00539708137512207, 0.006175518035888672, 0.0069539546966552734, 0.007732391357421875, 0.008510828018188477, 0.009289264678955078, 0.01006770133972168, 0.010846138000488281, 0.011624574661254883, 0.012403011322021484, 0.013181447982788086, 0.013959884643554688, 0.014738321304321289, 0.01551675796508789, 0.016295194625854492, 0.017073631286621094, 0.017852067947387695, 0.018630504608154297, 0.0194089412689209, 0.0201873779296875]}, "gradients/encoder.encoder.layers.21.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 4.0, 174.0, 841.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.8255228996276855, -6.7060160636901855, -6.5865092277526855, -6.467002868652344, -6.347496032714844, -6.227989196777344, -6.108482360839844, -5.988975524902344, -5.869469165802002, -5.749962329864502, -5.630455493927002, -5.51094913482666, -5.39144229888916, -5.27193546295166, -5.15242862701416, -5.03292179107666, -4.91341495513916, -4.79390811920166, -4.67440128326416, -4.554894924163818, -4.435388088226318, -4.315881252288818, -4.196374416351318, -4.076867580413818, -3.9573612213134766, -3.8378543853759766, -3.7183477878570557, -3.5988409519195557, -3.4793343544006348, -3.3598275184631348, -3.2403206825256348, -3.120814085006714, -3.001307249069214, -2.881800413131714, -2.762293815612793, -2.642786979675293, -2.523280382156372, -2.403773546218872, -2.284266948699951, -2.164760112762451, -2.045253276824951, -1.9257465600967407, -1.8062398433685303, -1.6867330074310303, -1.5672262907028198, -1.4477195739746094, -1.328212857246399, -1.2087061405181885, -1.0891993045806885, -0.969692587852478, -0.8501858115196228, -0.7306790947914124, -0.6111723184585571, -0.4916656017303467, -0.37215888500213623, -0.252652108669281, -0.13314545154571533, -0.013638712465763092, 0.10586802661418915, 0.2253747582435608, 0.34488150477409363, 0.46438825130462646, 0.5838949680328369, 0.7034017443656921, 0.8229084610939026]}, "gradients/encoder.encoder.layers.21.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 2.0, 1.0, 4.0, 6.0, 5.0, 3.0, 18.0, 27.0, 38.0, 78.0, 77.0, 124.0, 107.0, 134.0, 124.0, 91.0, 62.0, 45.0, 33.0, 8.0, 12.0, 4.0, 4.0, 6.0, 1.0, 0.0, 0.0, 2.0, 1.0, 1.0], "bins": [-0.4184265732765198, -0.40945902466773987, -0.4004914462566376, -0.39152389764785767, -0.38255631923675537, -0.37358877062797546, -0.36462122201919556, -0.35565364360809326, -0.34668609499931335, -0.33771854639053345, -0.32875096797943115, -0.31978341937065125, -0.31081584095954895, -0.30184829235076904, -0.29288071393966675, -0.28391316533088684, -0.27494561672210693, -0.265978068113327, -0.25701048970222473, -0.24804294109344482, -0.23907537758350372, -0.23010781407356262, -0.22114025056362152, -0.21217268705368042, -0.20320510864257812, -0.19423754513263702, -0.18526998162269592, -0.17630243301391602, -0.16733486950397491, -0.1583673059940338, -0.1493997424840927, -0.1404321789741516, -0.1314646303653717, -0.1224970668554306, -0.1135295107960701, -0.104561947286129, -0.0955943912267685, -0.08662682771682739, -0.07765926420688629, -0.06869170814752579, -0.059724144637584686, -0.050756584852933884, -0.04178902506828308, -0.03282146155834198, -0.023853901773691177, -0.014886341989040375, -0.005918778479099274, 0.0030487775802612305, 0.012016341090202332, 0.020983900874853134, 0.029951462522149086, 0.03891902416944504, 0.04788658395409584, 0.05685414373874664, 0.06582170724868774, 0.07478926330804825, 0.08375682681798935, 0.09272439032793045, 0.10169194638729095, 0.11065950989723206, 0.11962707340717316, 0.12859463691711426, 0.13756218552589417, 0.14652974903583527, 0.15549731254577637]}, "gradients/encoder.encoder.layers.21.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 2.0, 2.0, 1.0, 4.0, 2.0, 4.0, 3.0, 5.0, 12.0, 9.0, 18.0, 25.0, 39.0, 59.0, 93.0, 161.0, 348.0, 931.0, 3544.0, 25354.0, 947420.0, 62840.0, 5413.0, 1352.0, 445.0, 191.0, 101.0, 56.0, 33.0, 23.0, 22.0, 13.0, 11.0, 3.0, 6.0, 7.0, 3.0, 3.0, 1.0, 4.0, 1.0, 1.0, 3.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0], "bins": [-0.265625, -0.25817108154296875, -0.2507171630859375, -0.24326324462890625, -0.235809326171875, -0.22835540771484375, -0.2209014892578125, -0.21344757080078125, -0.20599365234375, -0.19853973388671875, -0.1910858154296875, -0.18363189697265625, -0.176177978515625, -0.16872406005859375, -0.1612701416015625, -0.15381622314453125, -0.1463623046875, -0.13890838623046875, -0.1314544677734375, -0.12400054931640625, -0.116546630859375, -0.10909271240234375, -0.1016387939453125, -0.09418487548828125, -0.08673095703125, -0.07927703857421875, -0.0718231201171875, -0.06436920166015625, -0.056915283203125, -0.04946136474609375, -0.0420074462890625, -0.03455352783203125, -0.027099609375, -0.01964569091796875, -0.0121917724609375, -0.00473785400390625, 0.002716064453125, 0.01016998291015625, 0.0176239013671875, 0.02507781982421875, 0.03253173828125, 0.03998565673828125, 0.0474395751953125, 0.05489349365234375, 0.062347412109375, 0.06980133056640625, 0.0772552490234375, 0.08470916748046875, 0.0921630859375, 0.09961700439453125, 0.1070709228515625, 0.11452484130859375, 0.121978759765625, 0.12943267822265625, 0.1368865966796875, 0.14434051513671875, 0.15179443359375, 0.15924835205078125, 0.1667022705078125, 0.17415618896484375, 0.181610107421875, 0.18906402587890625, 0.1965179443359375, 0.20397186279296875, 0.21142578125]}, "gradients/encoder.encoder.layers.21.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 2.0, 3.0, 4.0, 3.0, 3.0, 7.0, 7.0, 13.0, 50.0, 100.0, 172.0, 243.0, 203.0, 105.0, 54.0, 15.0, 12.0, 6.0, 4.0, 5.0, 1.0, 2.0, 0.0, 1.0, 2.0, 2.0, 0.0, 1.0], "bins": [-0.0198516845703125, -0.019433975219726562, -0.019016265869140625, -0.018598556518554688, -0.01818084716796875, -0.017763137817382812, -0.017345428466796875, -0.016927719116210938, -0.016510009765625, -0.016092300415039062, -0.015674591064453125, -0.015256881713867188, -0.01483917236328125, -0.014421463012695312, -0.014003753662109375, -0.013586044311523438, -0.0131683349609375, -0.012750625610351562, -0.012332916259765625, -0.011915206909179688, -0.01149749755859375, -0.011079788208007812, -0.010662078857421875, -0.010244369506835938, -0.00982666015625, -0.009408950805664062, -0.008991241455078125, -0.008573532104492188, -0.00815582275390625, -0.0077381134033203125, -0.007320404052734375, -0.0069026947021484375, -0.0064849853515625, -0.0060672760009765625, -0.005649566650390625, -0.0052318572998046875, -0.00481414794921875, -0.0043964385986328125, -0.003978729248046875, -0.0035610198974609375, -0.003143310546875, -0.0027256011962890625, -0.002307891845703125, -0.0018901824951171875, -0.00147247314453125, -0.0010547637939453125, -0.000637054443359375, -0.0002193450927734375, 0.0001983642578125, 0.0006160736083984375, 0.001033782958984375, 0.0014514923095703125, 0.00186920166015625, 0.0022869110107421875, 0.002704620361328125, 0.0031223297119140625, 0.0035400390625, 0.0039577484130859375, 0.004375457763671875, 0.0047931671142578125, 0.00521087646484375, 0.0056285858154296875, 0.006046295166015625, 0.0064640045166015625, 0.0068817138671875]}, "gradients/encoder.encoder.layers.21.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 5.0, 5.0, 5.0, 4.0, 3.0, 6.0, 6.0, 2.0, 8.0, 12.0, 12.0, 15.0, 25.0, 10.0, 24.0, 35.0, 59.0, 88.0, 146.0, 298.0, 697.0, 1495.0, 3783.0, 9890.0, 29452.0, 103396.0, 604003.0, 217674.0, 51499.0, 15951.0, 5685.0, 2264.0, 900.0, 470.0, 247.0, 128.0, 62.0, 41.0, 37.0, 33.0, 19.0, 12.0, 11.0, 8.0, 7.0, 10.0, 5.0, 8.0, 5.0, 3.0, 0.0, 2.0, 1.0, 2.0, 0.0, 1.0, 2.0, 0.0, 2.0], "bins": [-0.07879638671875, -0.07631301879882812, -0.07382965087890625, -0.07134628295898438, -0.0688629150390625, -0.06637954711914062, -0.06389617919921875, -0.061412811279296875, -0.058929443359375, -0.056446075439453125, -0.05396270751953125, -0.051479339599609375, -0.0489959716796875, -0.046512603759765625, -0.04402923583984375, -0.041545867919921875, -0.0390625, -0.036579132080078125, -0.03409576416015625, -0.031612396240234375, -0.0291290283203125, -0.026645660400390625, -0.02416229248046875, -0.021678924560546875, -0.019195556640625, -0.016712188720703125, -0.01422882080078125, -0.011745452880859375, -0.0092620849609375, -0.006778717041015625, -0.00429534912109375, -0.001811981201171875, 0.00067138671875, 0.003154754638671875, 0.00563812255859375, 0.008121490478515625, 0.0106048583984375, 0.013088226318359375, 0.01557159423828125, 0.018054962158203125, 0.020538330078125, 0.023021697998046875, 0.02550506591796875, 0.027988433837890625, 0.0304718017578125, 0.032955169677734375, 0.03543853759765625, 0.037921905517578125, 0.0404052734375, 0.042888641357421875, 0.04537200927734375, 0.047855377197265625, 0.0503387451171875, 0.052822113037109375, 0.05530548095703125, 0.057788848876953125, 0.060272216796875, 0.06275558471679688, 0.06523895263671875, 0.06772232055664062, 0.0702056884765625, 0.07268905639648438, 0.07517242431640625, 0.07765579223632812, 0.08013916015625]}, "gradients/encoder.encoder.layers.21.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 5.0, 5.0, 5.0, 4.0, 3.0, 6.0, 6.0, 2.0, 8.0, 12.0, 12.0, 13.0, 22.0, 10.0, 17.0, 25.0, 22.0, 22.0, 24.0, 25.0, 30.0, 23.0, 37.0, 38.0, 37.0, 38.0, 42.0, 40.0, 45.0, 38.0, 35.0, 46.0, 33.0, 24.0, 37.0, 39.0, 24.0, 24.0, 22.0, 25.0, 18.0, 11.0, 11.0, 8.0, 7.0, 10.0, 5.0, 8.0, 5.0, 3.0, 1.0, 1.0, 1.0, 2.0, 0.0, 1.0, 2.0, 0.0, 2.0], "bins": [-0.022247314453125, -0.021546125411987305, -0.02084493637084961, -0.020143747329711914, -0.01944255828857422, -0.018741369247436523, -0.018040180206298828, -0.017338991165161133, -0.016637802124023438, -0.015936613082885742, -0.015235424041748047, -0.014534235000610352, -0.013833045959472656, -0.013131856918334961, -0.012430667877197266, -0.01172947883605957, -0.011028289794921875, -0.01032710075378418, -0.009625911712646484, -0.008924722671508789, -0.008223533630371094, -0.0075223445892333984, -0.006821155548095703, -0.006119966506958008, -0.0054187774658203125, -0.004717588424682617, -0.004016399383544922, -0.0033152103424072266, -0.0026140213012695312, -0.001912832260131836, -0.0012116432189941406, -0.0005104541778564453, 0.00019073486328125, 0.0008919239044189453, 0.0015931129455566406, 0.002294301986694336, 0.0029954910278320312, 0.0036966800689697266, 0.004397869110107422, 0.005099058151245117, 0.0058002471923828125, 0.006501436233520508, 0.007202625274658203, 0.007903814315795898, 0.008605003356933594, 0.009306192398071289, 0.010007381439208984, 0.01070857048034668, 0.011409759521484375, 0.01211094856262207, 0.012812137603759766, 0.013513326644897461, 0.014214515686035156, 0.014915704727172852, 0.015616893768310547, 0.016318082809448242, 0.017019271850585938, 0.017720460891723633, 0.018421649932861328, 0.019122838973999023, 0.01982402801513672, 0.020525217056274414, 0.02122640609741211, 0.021927595138549805, 0.0226287841796875]}, "gradients/encoder.encoder.layers.21.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 4.0, 2.0, 1.0, 1.0, 4.0, 5.0, 5.0, 4.0, 5.0, 20.0, 13.0, 16.0, 24.0, 28.0, 32.0, 50.0, 103.0, 170.0, 270.0, 545.0, 1216.0, 3301.0, 11413.0, 69054.0, 889960.0, 56734.0, 10213.0, 3010.0, 1143.0, 483.0, 244.0, 157.0, 95.0, 72.0, 36.0, 29.0, 28.0, 16.0, 21.0, 12.0, 8.0, 6.0, 4.0, 0.0, 1.0, 3.0, 1.0, 3.0, 2.0, 1.0, 0.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.0010280609130859375, -0.0009943097829818726, -0.0009605586528778076, -0.0009268075227737427, -0.0008930563926696777, -0.0008593052625656128, -0.0008255541324615479, -0.0007918030023574829, -0.000758051872253418, -0.000724300742149353, -0.0006905496120452881, -0.0006567984819412231, -0.0006230473518371582, -0.0005892962217330933, -0.0005555450916290283, -0.0005217939615249634, -0.00048804283142089844, -0.0004542917013168335, -0.00042054057121276855, -0.0003867894411087036, -0.00035303831100463867, -0.00031928718090057373, -0.0002855360507965088, -0.00025178492069244385, -0.0002180337905883789, -0.00018428266048431396, -0.00015053153038024902, -0.00011678040027618408, -8.302927017211914e-05, -4.92781400680542e-05, -1.5527009963989258e-05, 1.8224120140075684e-05, 5.1975250244140625e-05, 8.572638034820557e-05, 0.00011947751045227051, 0.00015322864055633545, 0.0001869797706604004, 0.00022073090076446533, 0.0002544820308685303, 0.0002882331609725952, 0.00032198429107666016, 0.0003557354211807251, 0.00038948655128479004, 0.000423237681388855, 0.0004569888114929199, 0.0004907399415969849, 0.0005244910717010498, 0.0005582422018051147, 0.0005919933319091797, 0.0006257444620132446, 0.0006594955921173096, 0.0006932467222213745, 0.0007269978523254395, 0.0007607489824295044, 0.0007945001125335693, 0.0008282512426376343, 0.0008620023727416992, 0.0008957535028457642, 0.0009295046329498291, 0.000963255763053894, 0.000997006893157959, 0.001030758023262024, 0.0010645091533660889, 0.0010982602834701538, 0.0011320114135742188]}, "gradients/encoder.encoder.layers.21.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 3.0, 2.0, 6.0, 7.0, 3.0, 7.0, 6.0, 9.0, 5.0, 10.0, 13.0, 14.0, 18.0, 28.0, 35.0, 53.0, 76.0, 136.0, 165.0, 120.0, 81.0, 47.0, 32.0, 28.0, 22.0, 15.0, 14.0, 13.0, 10.0, 9.0, 2.0, 7.0, 6.0, 2.0, 5.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0], "bins": [-2.1278858184814453e-05, -2.0643696188926697e-05, -2.000853419303894e-05, -1.9373372197151184e-05, -1.8738210201263428e-05, -1.810304820537567e-05, -1.7467886209487915e-05, -1.683272421360016e-05, -1.6197562217712402e-05, -1.5562400221824646e-05, -1.492723822593689e-05, -1.4292076230049133e-05, -1.3656914234161377e-05, -1.302175223827362e-05, -1.2386590242385864e-05, -1.1751428246498108e-05, -1.1116266250610352e-05, -1.0481104254722595e-05, -9.845942258834839e-06, -9.210780262947083e-06, -8.575618267059326e-06, -7.94045627117157e-06, -7.3052942752838135e-06, -6.670132279396057e-06, -6.034970283508301e-06, -5.3998082876205444e-06, -4.764646291732788e-06, -4.129484295845032e-06, -3.4943222999572754e-06, -2.859160304069519e-06, -2.2239983081817627e-06, -1.5888363122940063e-06, -9.5367431640625e-07, -3.1851232051849365e-07, 3.166496753692627e-07, 9.51811671257019e-07, 1.5869736671447754e-06, 2.2221356630325317e-06, 2.857297658920288e-06, 3.4924596548080444e-06, 4.127621650695801e-06, 4.762783646583557e-06, 5.3979456424713135e-06, 6.03310763835907e-06, 6.668269634246826e-06, 7.3034316301345825e-06, 7.938593626022339e-06, 8.573755621910095e-06, 9.208917617797852e-06, 9.844079613685608e-06, 1.0479241609573364e-05, 1.111440360546112e-05, 1.1749565601348877e-05, 1.2384727597236633e-05, 1.301988959312439e-05, 1.3655051589012146e-05, 1.4290213584899902e-05, 1.4925375580787659e-05, 1.5560537576675415e-05, 1.619569957256317e-05, 1.6830861568450928e-05, 1.7466023564338684e-05, 1.810118556022644e-05, 1.8736347556114197e-05, 1.9371509552001953e-05]}, "gradients/encoder.encoder.layers.21.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 2.0, 0.0, 2.0, 1.0, 5.0, 7.0, 6.0, 5.0, 16.0, 21.0, 36.0, 77.0, 134.0, 314.0, 823.0, 2428.0, 9851.0, 71284.0, 886228.0, 64191.0, 9436.0, 2345.0, 749.0, 299.0, 134.0, 61.0, 27.0, 24.0, 15.0, 15.0, 7.0, 8.0, 7.0, 3.0, 1.0, 1.0, 3.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0010442733764648438, -0.0010164082050323486, -0.0009885430335998535, -0.0009606778621673584, -0.0009328126907348633, -0.0009049475193023682, -0.000877082347869873, -0.0008492171764373779, -0.0008213520050048828, -0.0007934868335723877, -0.0007656216621398926, -0.0007377564907073975, -0.0007098913192749023, -0.0006820261478424072, -0.0006541609764099121, -0.000626295804977417, -0.0005984306335449219, -0.0005705654621124268, -0.0005427002906799316, -0.0005148351192474365, -0.0004869699478149414, -0.0004591047763824463, -0.00043123960494995117, -0.00040337443351745605, -0.00037550926208496094, -0.0003476440906524658, -0.0003197789192199707, -0.0002919137477874756, -0.00026404857635498047, -0.00023618340492248535, -0.00020831823348999023, -0.00018045306205749512, -0.000152587890625, -0.00012472271919250488, -9.685754776000977e-05, -6.899237632751465e-05, -4.112720489501953e-05, -1.3262033462524414e-05, 1.4603137969970703e-05, 4.246830940246582e-05, 7.033348083496094e-05, 9.819865226745605e-05, 0.00012606382369995117, 0.0001539289951324463, 0.0001817941665649414, 0.00020965933799743652, 0.00023752450942993164, 0.00026538968086242676, 0.0002932548522949219, 0.000321120023727417, 0.0003489851951599121, 0.0003768503665924072, 0.00040471553802490234, 0.00043258070945739746, 0.0004604458808898926, 0.0004883110523223877, 0.0005161762237548828, 0.0005440413951873779, 0.000571906566619873, 0.0005997717380523682, 0.0006276369094848633, 0.0006555020809173584, 0.0006833672523498535, 0.0007112324237823486, 0.0007390975952148438]}, "gradients/encoder.encoder.layers.21.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 2.0, 0.0, 2.0, 0.0, 5.0, 5.0, 5.0, 4.0, 6.0, 7.0, 13.0, 18.0, 14.0, 39.0, 36.0, 51.0, 109.0, 114.0, 120.0, 108.0, 97.0, 72.0, 50.0, 27.0, 30.0, 15.0, 12.0, 11.0, 9.0, 10.0, 3.0, 7.0, 6.0, 3.0, 1.0, 1.0, 2.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0002949237823486328, -0.000287054106593132, -0.0002791844308376312, -0.00027131475508213043, -0.00026344507932662964, -0.00025557540357112885, -0.00024770572781562805, -0.00023983605206012726, -0.00023196637630462646, -0.00022409670054912567, -0.00021622702479362488, -0.00020835734903812408, -0.0002004876732826233, -0.0001926179975271225, -0.0001847483217716217, -0.0001768786460161209, -0.00016900897026062012, -0.00016113929450511932, -0.00015326961874961853, -0.00014539994299411774, -0.00013753026723861694, -0.00012966059148311615, -0.00012179091572761536, -0.00011392123997211456, -0.00010605156421661377, -9.818188846111298e-05, -9.031221270561218e-05, -8.244253695011139e-05, -7.45728611946106e-05, -6.67031854391098e-05, -5.883350968360901e-05, -5.0963833928108215e-05, -4.309415817260742e-05, -3.522448241710663e-05, -2.7354806661605835e-05, -1.948513090610504e-05, -1.1615455150604248e-05, -3.7457793951034546e-06, 4.123896360397339e-06, 1.1993572115898132e-05, 1.9863247871398926e-05, 2.773292362689972e-05, 3.560259938240051e-05, 4.3472275137901306e-05, 5.13419508934021e-05, 5.921162664890289e-05, 6.708130240440369e-05, 7.495097815990448e-05, 8.282065391540527e-05, 9.069032967090607e-05, 9.856000542640686e-05, 0.00010642968118190765, 0.00011429935693740845, 0.00012216903269290924, 0.00013003870844841003, 0.00013790838420391083, 0.00014577805995941162, 0.00015364773571491241, 0.0001615174114704132, 0.000169387087225914, 0.0001772567629814148, 0.0001851264387369156, 0.00019299611449241638, 0.00020086579024791718, 0.00020873546600341797]}, "gradients/encoder.encoder.layers.21.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 10.0, 118.0, 705.0, 127.0, 34.0, 16.0, 4.0, 2.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.6601436138153076, -1.6071665287017822, -1.5541894435882568, -1.5012123584747314, -1.4482353925704956, -1.3952583074569702, -1.3422812223434448, -1.2893041372299194, -1.236327052116394, -1.1833499670028687, -1.1303728818893433, -1.0773959159851074, -1.024418830871582, -0.9714417457580566, -0.9184646606445312, -0.8654875755310059, -0.8125105500221252, -0.7595334649085999, -0.7065564393997192, -0.6535793542861938, -0.6006022691726685, -0.5476251840591431, -0.49464815855026245, -0.44167107343673706, -0.38869401812553406, -0.33571696281433105, -0.28273987770080566, -0.22976282238960266, -0.17678575217723846, -0.12380868196487427, -0.07083162665367126, -0.017854541540145874, 0.03512251377105713, 0.08809958398342133, 0.14107665419578552, 0.19405370950698853, 0.24703077971935272, 0.3000078499317169, 0.3529849052429199, 0.4059619903564453, 0.4589390456676483, 0.5119161009788513, 0.5648931860923767, 0.6178702116012573, 0.6708472967147827, 0.7238243818283081, 0.7768014669418335, 0.8297785520553589, 0.8827555775642395, 0.9357326626777649, 0.9887096881866455, 1.041686773300171, 1.0946638584136963, 1.1476409435272217, 1.200618028640747, 1.2535951137542725, 1.3065720796585083, 1.3595491647720337, 1.412526249885559, 1.465503215789795, 1.5184803009033203, 1.5714573860168457, 1.624434471130371, 1.6774115562438965, 1.7303886413574219]}, "gradients/encoder.encoder.layers.21.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 2.0, 3.0, 2.0, 5.0, 2.0, 8.0, 4.0, 15.0, 21.0, 10.0, 22.0, 23.0, 22.0, 34.0, 35.0, 36.0, 45.0, 54.0, 44.0, 37.0, 55.0, 54.0, 72.0, 49.0, 41.0, 50.0, 38.0, 46.0, 36.0, 33.0, 17.0, 25.0, 21.0, 12.0, 13.0, 10.0, 10.0, 3.0, 1.0, 3.0, 2.0, 3.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.2528589963912964, -0.2457127422094345, -0.23856650292873383, -0.23142024874687195, -0.22427399456501007, -0.2171277403831482, -0.2099815011024475, -0.20283524692058563, -0.19568899273872375, -0.18854273855686188, -0.1813964992761612, -0.17425024509429932, -0.16710399091243744, -0.15995773673057556, -0.15281149744987488, -0.145665243268013, -0.13851900398731232, -0.13137274980545044, -0.12422650307416916, -0.11708025634288788, -0.109934002161026, -0.10278775542974472, -0.09564150869846344, -0.08849525451660156, -0.08134900778532028, -0.074202761054039, -0.06705650687217712, -0.059910260140895844, -0.052764009684324265, -0.045617759227752686, -0.038471512496471405, -0.031325262039899826, -0.024179011583328247, -0.017032761126756668, -0.009886512532830238, -0.0027402639389038086, 0.00440598651766777, 0.01155223697423935, 0.01869848370552063, 0.02584473416209221, 0.03299098461866379, 0.04013723507523537, 0.047283485531806946, 0.054429732263088226, 0.061575982719659805, 0.06872223317623138, 0.07586847990751266, 0.08301472663879395, 0.09016098082065582, 0.0973072275519371, 0.10445348173379898, 0.11159972846508026, 0.11874598264694214, 0.12589222192764282, 0.1330384761095047, 0.14018473029136658, 0.14733096957206726, 0.15447722375392914, 0.16162346303462982, 0.1687697172164917, 0.17591597139835358, 0.18306222558021545, 0.19020846486091614, 0.19735471904277802, 0.2045009732246399]}, "gradients/encoder.encoder.layers.20.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 2.0, 2.0, 4.0, 5.0, 11.0, 9.0, 14.0, 24.0, 38.0, 92.0, 217.0, 546.0, 4835.0, 4181364.0, 6393.0, 490.0, 130.0, 53.0, 27.0, 12.0, 8.0, 6.0, 3.0, 2.0, 5.0, 3.0, 1.0, 2.0, 1.0], "bins": [-1.583984375, -1.5513458251953125, -1.518707275390625, -1.4860687255859375, -1.45343017578125, -1.4207916259765625, -1.388153076171875, -1.3555145263671875, -1.3228759765625, -1.2902374267578125, -1.257598876953125, -1.2249603271484375, -1.19232177734375, -1.1596832275390625, -1.127044677734375, -1.0944061279296875, -1.061767578125, -1.0291290283203125, -0.996490478515625, -0.9638519287109375, -0.93121337890625, -0.8985748291015625, -0.865936279296875, -0.8332977294921875, -0.8006591796875, -0.7680206298828125, -0.735382080078125, -0.7027435302734375, -0.67010498046875, -0.6374664306640625, -0.604827880859375, -0.5721893310546875, -0.53955078125, -0.5069122314453125, -0.474273681640625, -0.4416351318359375, -0.40899658203125, -0.3763580322265625, -0.343719482421875, -0.3110809326171875, -0.2784423828125, -0.2458038330078125, -0.213165283203125, -0.1805267333984375, -0.14788818359375, -0.1152496337890625, -0.082611083984375, -0.0499725341796875, -0.017333984375, 0.0153045654296875, 0.047943115234375, 0.0805816650390625, 0.11322021484375, 0.1458587646484375, 0.178497314453125, 0.2111358642578125, 0.2437744140625, 0.2764129638671875, 0.309051513671875, 0.3416900634765625, 0.37432861328125, 0.4069671630859375, 0.439605712890625, 0.4722442626953125, 0.5048828125]}, "gradients/encoder.encoder.layers.20.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 2.0, 3.0, 3.0, 3.0, 3.0, 9.0, 4.0, 21.0, 45.0, 100.0, 173.0, 241.0, 202.0, 111.0, 48.0, 18.0, 13.0, 5.0, 4.0, 4.0, 2.0, 0.0, 2.0, 3.0, 0.0, 1.0, 1.0], "bins": [-0.0208587646484375, -0.020429015159606934, -0.019999265670776367, -0.0195695161819458, -0.019139766693115234, -0.018710017204284668, -0.0182802677154541, -0.017850518226623535, -0.01742076873779297, -0.016991019248962402, -0.016561269760131836, -0.01613152027130127, -0.015701770782470703, -0.015272021293640137, -0.01484227180480957, -0.014412522315979004, -0.013982772827148438, -0.013553023338317871, -0.013123273849487305, -0.012693524360656738, -0.012263774871826172, -0.011834025382995605, -0.011404275894165039, -0.010974526405334473, -0.010544776916503906, -0.01011502742767334, -0.009685277938842773, -0.009255528450012207, -0.00882577896118164, -0.008396029472351074, -0.007966279983520508, -0.007536530494689941, -0.007106781005859375, -0.006677031517028809, -0.006247282028198242, -0.005817532539367676, -0.005387783050537109, -0.004958033561706543, -0.0045282840728759766, -0.00409853458404541, -0.0036687850952148438, -0.0032390356063842773, -0.002809286117553711, -0.0023795366287231445, -0.0019497871398925781, -0.0015200376510620117, -0.0010902881622314453, -0.0006605386734008789, -0.0002307891845703125, 0.0001989603042602539, 0.0006287097930908203, 0.0010584592819213867, 0.0014882087707519531, 0.0019179582595825195, 0.002347707748413086, 0.0027774572372436523, 0.0032072067260742188, 0.003636956214904785, 0.0040667057037353516, 0.004496455192565918, 0.004926204681396484, 0.005355954170227051, 0.005785703659057617, 0.006215453147888184, 0.00664520263671875]}, "gradients/encoder.encoder.layers.20.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 3.0, 8.0, 6.0, 13.0, 25.0, 43.0, 79.0, 127.0, 168.0, 279.0, 753.0, 112165.0, 4079000.0, 992.0, 261.0, 136.0, 92.0, 66.0, 34.0, 23.0, 13.0, 6.0, 3.0, 0.0, 2.0, 2.0], "bins": [-1.6279296875, -1.5954322814941406, -1.5629348754882812, -1.5304374694824219, -1.4979400634765625, -1.4654426574707031, -1.4329452514648438, -1.4004478454589844, -1.367950439453125, -1.3354530334472656, -1.3029556274414062, -1.2704582214355469, -1.2379608154296875, -1.2054634094238281, -1.1729660034179688, -1.1404685974121094, -1.10797119140625, -1.0754737854003906, -1.0429763793945312, -1.0104789733886719, -0.9779815673828125, -0.9454841613769531, -0.9129867553710938, -0.8804893493652344, -0.847991943359375, -0.8154945373535156, -0.7829971313476562, -0.7504997253417969, -0.7180023193359375, -0.6855049133300781, -0.6530075073242188, -0.6205101013183594, -0.5880126953125, -0.5555152893066406, -0.5230178833007812, -0.4905204772949219, -0.4580230712890625, -0.4255256652832031, -0.39302825927734375, -0.3605308532714844, -0.328033447265625, -0.2955360412597656, -0.26303863525390625, -0.23054122924804688, -0.1980438232421875, -0.16554641723632812, -0.13304901123046875, -0.10055160522460938, -0.06805419921875, -0.035556793212890625, -0.00305938720703125, 0.029438018798828125, 0.0619354248046875, 0.09443283081054688, 0.12693023681640625, 0.15942764282226562, 0.191925048828125, 0.22442245483398438, 0.25691986083984375, 0.2894172668457031, 0.3219146728515625, 0.3544120788574219, 0.38690948486328125, 0.4194068908691406, 0.451904296875]}, "gradients/encoder.encoder.layers.20.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 13.0, 24.0, 29.0, 73.0, 126.0, 3367.0, 288.0, 87.0, 50.0, 15.0, 7.0, 4.0, 3.0, 2.0], "bins": [-0.111572265625, -0.10957574844360352, -0.10757923126220703, -0.10558271408081055, -0.10358619689941406, -0.10158967971801758, -0.0995931625366211, -0.09759664535522461, -0.09560012817382812, -0.09360361099243164, -0.09160709381103516, -0.08961057662963867, -0.08761405944824219, -0.0856175422668457, -0.08362102508544922, -0.08162450790405273, -0.07962799072265625, -0.07763147354125977, -0.07563495635986328, -0.0736384391784668, -0.07164192199707031, -0.06964540481567383, -0.06764888763427734, -0.06565237045288086, -0.06365585327148438, -0.06165933609008789, -0.059662818908691406, -0.05766630172729492, -0.05566978454589844, -0.05367326736450195, -0.05167675018310547, -0.049680233001708984, -0.0476837158203125, -0.045687198638916016, -0.04369068145751953, -0.04169416427612305, -0.03969764709472656, -0.03770112991333008, -0.035704612731933594, -0.03370809555053711, -0.031711578369140625, -0.02971506118774414, -0.027718544006347656, -0.025722026824951172, -0.023725509643554688, -0.021728992462158203, -0.01973247528076172, -0.017735958099365234, -0.01573944091796875, -0.013742923736572266, -0.011746406555175781, -0.009749889373779297, -0.0077533721923828125, -0.005756855010986328, -0.0037603378295898438, -0.0017638206481933594, 0.000232696533203125, 0.0022292137145996094, 0.004225730895996094, 0.006222248077392578, 0.008218765258789062, 0.010215282440185547, 0.012211799621582031, 0.014208316802978516, 0.016204833984375]}, "gradients/encoder.encoder.layers.20.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 1.0, 1.0, 2.0, 7.0, 5.0, 31.0, 68.0, 417.0, 461.0, 19.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.4453599452972412, -1.405264139175415, -1.3651682138442993, -1.3250724077224731, -1.2849764823913574, -1.2448806762695312, -1.204784870147705, -1.1646889448165894, -1.1245931386947632, -1.084497332572937, -1.0444014072418213, -1.0043056011199951, -0.9642097353935242, -0.9241138696670532, -0.8840180039405823, -0.8439221382141113, -0.8038262724876404, -0.7637304067611694, -0.7236345410346985, -0.6835386753082275, -0.6434428691864014, -0.6033470034599304, -0.5632511377334595, -0.5231553316116333, -0.48305943608283997, -0.442963570356369, -0.40286773443222046, -0.3627718687057495, -0.32267600297927856, -0.28258016705513, -0.24248430132865906, -0.2023884654045105, -0.16229259967803955, -0.1221967488527298, -0.08210089057683945, -0.0420050323009491, -0.0019091814756393433, 0.03818666934967041, 0.07828253507614136, 0.11837837100028992, 0.15847423672676086, 0.19857008755207062, 0.23866593837738037, 0.2787618041038513, 0.31885766983032227, 0.3589535057544708, 0.3990493714809418, 0.43914520740509033, 0.4792410731315613, 0.5193369388580322, 0.5594328045845032, 0.5995286703109741, 0.6396244764328003, 0.6797203421592712, 0.7198162078857422, 0.7599120140075684, 0.8000079393386841, 0.840103805065155, 0.880199670791626, 0.9202954769134521, 0.9603913426399231, 1.000487208366394, 1.0405831336975098, 1.080678939819336, 1.120774745941162]}, "gradients/encoder.encoder.layers.20.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 3.0, 3.0, 0.0, 3.0, 2.0, 2.0, 13.0, 25.0, 34.0, 58.0, 100.0, 115.0, 135.0, 149.0, 124.0, 95.0, 65.0, 37.0, 28.0, 13.0, 7.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.374489963054657, -0.36227187514305115, -0.3500537872314453, -0.3378356993198395, -0.32561761140823364, -0.3133995532989502, -0.301181435585022, -0.2889633774757385, -0.2767452895641327, -0.26452720165252686, -0.252309113740921, -0.24009102582931519, -0.22787295281887054, -0.2156548649072647, -0.20343677699565887, -0.19121870398521423, -0.1790006011724472, -0.16678251326084137, -0.15456442534923553, -0.1423463523387909, -0.13012826442718506, -0.11791017651557922, -0.10569208860397339, -0.09347400814294815, -0.08125592023134232, -0.06903783231973648, -0.05681975185871124, -0.04460166394710541, -0.03238357976078987, -0.020165495574474335, -0.0079474076628685, 0.004270672798156738, 0.016488760709762573, 0.02870684489607811, 0.040924929082393646, 0.05314301699399948, 0.06536109745502472, 0.07757918536663055, 0.08979727327823639, 0.10201535373926163, 0.11423344165086746, 0.1264515221118927, 0.13866961002349854, 0.15088769793510437, 0.1631057858467102, 0.17532387375831604, 0.18754196166992188, 0.19976003468036652, 0.21197812259197235, 0.22419621050357819, 0.23641429841518402, 0.24863237142562866, 0.2608504593372345, 0.27306854724884033, 0.28528663516044617, 0.297504723072052, 0.30972281098365784, 0.32194089889526367, 0.3341589868068695, 0.34637707471847534, 0.3585951626300812, 0.370813250541687, 0.38303130865097046, 0.3952493965625763, 0.40746748447418213]}, "gradients/encoder.encoder.layers.20.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 2.0, 4.0, 1.0, 1.0, 7.0, 3.0, 7.0, 10.0, 11.0, 20.0, 22.0, 31.0, 49.0, 68.0, 134.0, 290.0, 763.0, 3234.0, 28523.0, 947709.0, 61214.0, 4706.0, 1009.0, 341.0, 145.0, 91.0, 52.0, 40.0, 16.0, 23.0, 6.0, 8.0, 5.0, 6.0, 0.0, 5.0, 4.0, 4.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.271484375, -0.26340675354003906, -0.2553291320800781, -0.2472515106201172, -0.23917388916015625, -0.2310962677001953, -0.22301864624023438, -0.21494102478027344, -0.2068634033203125, -0.19878578186035156, -0.19070816040039062, -0.1826305389404297, -0.17455291748046875, -0.1664752960205078, -0.15839767456054688, -0.15032005310058594, -0.142242431640625, -0.13416481018066406, -0.12608718872070312, -0.11800956726074219, -0.10993194580078125, -0.10185432434082031, -0.09377670288085938, -0.08569908142089844, -0.0776214599609375, -0.06954383850097656, -0.061466217041015625, -0.05338859558105469, -0.04531097412109375, -0.03723335266113281, -0.029155731201171875, -0.021078109741210938, -0.01300048828125, -0.0049228668212890625, 0.003154754638671875, 0.011232376098632812, 0.01930999755859375, 0.027387619018554688, 0.035465240478515625, 0.04354286193847656, 0.0516204833984375, 0.05969810485839844, 0.06777572631835938, 0.07585334777832031, 0.08393096923828125, 0.09200859069824219, 0.10008621215820312, 0.10816383361816406, 0.116241455078125, 0.12431907653808594, 0.13239669799804688, 0.1404743194580078, 0.14855194091796875, 0.1566295623779297, 0.16470718383789062, 0.17278480529785156, 0.1808624267578125, 0.18894004821777344, 0.19701766967773438, 0.2050952911376953, 0.21317291259765625, 0.2212505340576172, 0.22932815551757812, 0.23740577697753906, 0.2454833984375]}, "gradients/encoder.encoder.layers.20.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 6.0, 1.0, 4.0, 8.0, 12.0, 21.0, 44.0, 112.0, 190.0, 231.0, 169.0, 106.0, 66.0, 22.0, 9.0, 4.0, 4.0, 1.0, 0.0, 5.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.020721435546875, -0.020275235176086426, -0.01982903480529785, -0.019382834434509277, -0.018936634063720703, -0.01849043369293213, -0.018044233322143555, -0.01759803295135498, -0.017151832580566406, -0.016705632209777832, -0.016259431838989258, -0.015813231468200684, -0.01536703109741211, -0.014920830726623535, -0.014474630355834961, -0.014028429985046387, -0.013582229614257812, -0.013136029243469238, -0.012689828872680664, -0.01224362850189209, -0.011797428131103516, -0.011351227760314941, -0.010905027389526367, -0.010458827018737793, -0.010012626647949219, -0.009566426277160645, -0.00912022590637207, -0.008674025535583496, -0.008227825164794922, -0.007781624794006348, -0.0073354244232177734, -0.006889224052429199, -0.006443023681640625, -0.005996823310852051, -0.0055506229400634766, -0.005104422569274902, -0.004658222198486328, -0.004212021827697754, -0.0037658214569091797, -0.0033196210861206055, -0.0028734207153320312, -0.002427220344543457, -0.001981019973754883, -0.0015348196029663086, -0.0010886192321777344, -0.0006424188613891602, -0.00019621849060058594, 0.0002499818801879883, 0.0006961822509765625, 0.0011423826217651367, 0.001588582992553711, 0.002034783363342285, 0.0024809837341308594, 0.0029271841049194336, 0.003373384475708008, 0.003819584846496582, 0.004265785217285156, 0.0047119855880737305, 0.005158185958862305, 0.005604386329650879, 0.006050586700439453, 0.006496787071228027, 0.0069429874420166016, 0.007389187812805176, 0.00783538818359375]}, "gradients/encoder.encoder.layers.20.attention.v_proj.weight": {"_type": "histogram", "values": [2.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 1.0, 6.0, 1.0, 3.0, 8.0, 4.0, 7.0, 11.0, 12.0, 24.0, 15.0, 45.0, 40.0, 80.0, 108.0, 207.0, 386.0, 759.0, 1653.0, 3976.0, 9955.0, 28078.0, 94414.0, 559444.0, 258111.0, 59372.0, 19168.0, 7130.0, 2918.0, 1228.0, 644.0, 293.0, 149.0, 77.0, 61.0, 34.0, 24.0, 31.0, 18.0, 17.0, 11.0, 5.0, 6.0, 5.0, 6.0, 5.0, 6.0, 3.0, 2.0, 3.0, 0.0, 0.0, 1.0, 1.0, 1.0], "bins": [-0.08184814453125, -0.07935428619384766, -0.07686042785644531, -0.07436656951904297, -0.07187271118164062, -0.06937885284423828, -0.06688499450683594, -0.0643911361694336, -0.06189727783203125, -0.059403419494628906, -0.05690956115722656, -0.05441570281982422, -0.051921844482421875, -0.04942798614501953, -0.04693412780761719, -0.044440269470214844, -0.0419464111328125, -0.039452552795410156, -0.03695869445800781, -0.03446483612060547, -0.031970977783203125, -0.02947711944580078, -0.026983261108398438, -0.024489402770996094, -0.02199554443359375, -0.019501686096191406, -0.017007827758789062, -0.014513969421386719, -0.012020111083984375, -0.009526252746582031, -0.0070323944091796875, -0.004538536071777344, -0.002044677734375, 0.00044918060302734375, 0.0029430389404296875, 0.005436897277832031, 0.007930755615234375, 0.010424613952636719, 0.012918472290039062, 0.015412330627441406, 0.01790618896484375, 0.020400047302246094, 0.022893905639648438, 0.02538776397705078, 0.027881622314453125, 0.03037548065185547, 0.03286933898925781, 0.035363197326660156, 0.0378570556640625, 0.040350914001464844, 0.04284477233886719, 0.04533863067626953, 0.047832489013671875, 0.05032634735107422, 0.05282020568847656, 0.055314064025878906, 0.05780792236328125, 0.060301780700683594, 0.06279563903808594, 0.06528949737548828, 0.06778335571289062, 0.07027721405029297, 0.07277107238769531, 0.07526493072509766, 0.0777587890625]}, "gradients/encoder.encoder.layers.20.attention.v_proj.bias": {"_type": "histogram", "values": [2.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 1.0, 6.0, 1.0, 3.0, 8.0, 4.0, 6.0, 9.0, 7.0, 20.0, 10.0, 27.0, 19.0, 26.0, 22.0, 31.0, 32.0, 36.0, 44.0, 42.0, 33.0, 34.0, 31.0, 51.0, 49.0, 33.0, 37.0, 36.0, 44.0, 40.0, 41.0, 30.0, 25.0, 21.0, 22.0, 13.0, 17.0, 25.0, 13.0, 16.0, 8.0, 3.0, 6.0, 5.0, 7.0, 4.0, 6.0, 3.0, 2.0, 3.0, 0.0, 0.0, 1.0, 1.0, 1.0], "bins": [-0.0263214111328125, -0.025519371032714844, -0.024717330932617188, -0.02391529083251953, -0.023113250732421875, -0.02231121063232422, -0.021509170532226562, -0.020707130432128906, -0.01990509033203125, -0.019103050231933594, -0.018301010131835938, -0.01749897003173828, -0.016696929931640625, -0.01589488983154297, -0.015092849731445312, -0.014290809631347656, -0.01348876953125, -0.012686729431152344, -0.011884689331054688, -0.011082649230957031, -0.010280609130859375, -0.009478569030761719, -0.008676528930664062, -0.007874488830566406, -0.00707244873046875, -0.006270408630371094, -0.0054683685302734375, -0.004666328430175781, -0.003864288330078125, -0.0030622482299804688, -0.0022602081298828125, -0.0014581680297851562, -0.0006561279296875, 0.00014591217041015625, 0.0009479522705078125, 0.0017499923706054688, 0.002552032470703125, 0.0033540725708007812, 0.0041561126708984375, 0.004958152770996094, 0.00576019287109375, 0.006562232971191406, 0.0073642730712890625, 0.008166313171386719, 0.008968353271484375, 0.009770393371582031, 0.010572433471679688, 0.011374473571777344, 0.012176513671875, 0.012978553771972656, 0.013780593872070312, 0.014582633972167969, 0.015384674072265625, 0.01618671417236328, 0.016988754272460938, 0.017790794372558594, 0.01859283447265625, 0.019394874572753906, 0.020196914672851562, 0.02099895477294922, 0.021800994873046875, 0.02260303497314453, 0.023405075073242188, 0.024207115173339844, 0.0250091552734375]}, "gradients/encoder.encoder.layers.20.attention.k_proj.weight": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 3.0, 5.0, 2.0, 3.0, 3.0, 9.0, 11.0, 5.0, 9.0, 12.0, 19.0, 27.0, 29.0, 44.0, 64.0, 93.0, 106.0, 205.0, 357.0, 580.0, 1312.0, 3229.0, 8514.0, 31899.0, 773018.0, 196006.0, 21579.0, 6485.0, 2514.0, 1069.0, 555.0, 287.0, 167.0, 110.0, 66.0, 42.0, 26.0, 23.0, 22.0, 13.0, 8.0, 10.0, 4.0, 9.0, 3.0, 2.0, 2.0, 2.0, 1.0, 1.0, 1.0, 2.0, 2.0], "bins": [-0.0015649795532226562, -0.0015213489532470703, -0.0014777183532714844, -0.0014340877532958984, -0.0013904571533203125, -0.0013468265533447266, -0.0013031959533691406, -0.0012595653533935547, -0.0012159347534179688, -0.0011723041534423828, -0.0011286735534667969, -0.001085042953491211, -0.001041412353515625, -0.000997781753540039, -0.0009541511535644531, -0.0009105205535888672, -0.0008668899536132812, -0.0008232593536376953, -0.0007796287536621094, -0.0007359981536865234, -0.0006923675537109375, -0.0006487369537353516, -0.0006051063537597656, -0.0005614757537841797, -0.0005178451538085938, -0.0004742145538330078, -0.0004305839538574219, -0.00038695335388183594, -0.00034332275390625, -0.00029969215393066406, -0.0002560615539550781, -0.0002124309539794922, -0.00016880035400390625, -0.0001251697540283203, -8.153915405273438e-05, -3.790855407714844e-05, 5.7220458984375e-06, 4.935264587402344e-05, 9.298324584960938e-05, 0.0001366138458251953, 0.00018024444580078125, 0.0002238750457763672, 0.0002675056457519531, 0.00031113624572753906, 0.000354766845703125, 0.00039839744567871094, 0.0004420280456542969, 0.0004856586456298828, 0.0005292892456054688, 0.0005729198455810547, 0.0006165504455566406, 0.0006601810455322266, 0.0007038116455078125, 0.0007474422454833984, 0.0007910728454589844, 0.0008347034454345703, 0.0008783340454101562, 0.0009219646453857422, 0.0009655952453613281, 0.001009225845336914, 0.0010528564453125, 0.001096487045288086, 0.0011401176452636719, 0.0011837482452392578, 0.0012273788452148438]}, "gradients/encoder.encoder.layers.20.attention.k_proj.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 2.0, 1.0, 1.0, 4.0, 6.0, 5.0, 4.0, 5.0, 9.0, 11.0, 13.0, 13.0, 33.0, 26.0, 51.0, 78.0, 148.0, 208.0, 136.0, 56.0, 50.0, 49.0, 20.0, 17.0, 16.0, 13.0, 7.0, 7.0, 6.0, 2.0, 1.0, 1.0, 2.0, 2.0, 2.0, 2.0, 1.0, 2.0, 2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-2.9087066650390625e-05, -2.8135254979133606e-05, -2.7183443307876587e-05, -2.6231631636619568e-05, -2.527981996536255e-05, -2.432800829410553e-05, -2.337619662284851e-05, -2.2424384951591492e-05, -2.1472573280334473e-05, -2.0520761609077454e-05, -1.9568949937820435e-05, -1.8617138266563416e-05, -1.7665326595306396e-05, -1.6713514924049377e-05, -1.576170325279236e-05, -1.480989158153534e-05, -1.385807991027832e-05, -1.2906268239021301e-05, -1.1954456567764282e-05, -1.1002644896507263e-05, -1.0050833225250244e-05, -9.099021553993225e-06, -8.147209882736206e-06, -7.195398211479187e-06, -6.243586540222168e-06, -5.291774868965149e-06, -4.33996319770813e-06, -3.388151526451111e-06, -2.436339855194092e-06, -1.4845281839370728e-06, -5.327165126800537e-07, 4.1909515857696533e-07, 1.3709068298339844e-06, 2.3227185010910034e-06, 3.2745301723480225e-06, 4.2263418436050415e-06, 5.1781535148620605e-06, 6.12996518611908e-06, 7.081776857376099e-06, 8.033588528633118e-06, 8.985400199890137e-06, 9.937211871147156e-06, 1.0889023542404175e-05, 1.1840835213661194e-05, 1.2792646884918213e-05, 1.3744458556175232e-05, 1.4696270227432251e-05, 1.564808189868927e-05, 1.659989356994629e-05, 1.7551705241203308e-05, 1.8503516912460327e-05, 1.9455328583717346e-05, 2.0407140254974365e-05, 2.1358951926231384e-05, 2.2310763597488403e-05, 2.3262575268745422e-05, 2.421438694000244e-05, 2.516619861125946e-05, 2.611801028251648e-05, 2.70698219537735e-05, 2.8021633625030518e-05, 2.8973445296287537e-05, 2.9925256967544556e-05, 3.0877068638801575e-05, 3.1828880310058594e-05]}, "gradients/encoder.encoder.layers.20.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 2.0, 1.0, 1.0, 3.0, 12.0, 16.0, 16.0, 35.0, 33.0, 69.0, 93.0, 196.0, 355.0, 784.0, 2371.0, 10225.0, 161442.0, 851314.0, 16424.0, 3262.0, 962.0, 418.0, 210.0, 123.0, 76.0, 43.0, 26.0, 20.0, 12.0, 5.0, 8.0, 3.0, 1.0, 1.0, 2.0, 0.0, 1.0, 0.0, 3.0, 2.0], "bins": [-0.002716064453125, -0.002650022506713867, -0.0025839805603027344, -0.0025179386138916016, -0.0024518966674804688, -0.002385854721069336, -0.002319812774658203, -0.0022537708282470703, -0.0021877288818359375, -0.0021216869354248047, -0.002055644989013672, -0.001989603042602539, -0.0019235610961914062, -0.0018575191497802734, -0.0017914772033691406, -0.0017254352569580078, -0.001659393310546875, -0.0015933513641357422, -0.0015273094177246094, -0.0014612674713134766, -0.0013952255249023438, -0.001329183578491211, -0.0012631416320800781, -0.0011970996856689453, -0.0011310577392578125, -0.0010650157928466797, -0.0009989738464355469, -0.0009329319000244141, -0.0008668899536132812, -0.0008008480072021484, -0.0007348060607910156, -0.0006687641143798828, -0.00060272216796875, -0.0005366802215576172, -0.0004706382751464844, -0.00040459632873535156, -0.00033855438232421875, -0.00027251243591308594, -0.00020647048950195312, -0.0001404285430908203, -7.43865966796875e-05, -8.344650268554688e-06, 5.7697296142578125e-05, 0.00012373924255371094, 0.00018978118896484375, 0.00025582313537597656, 0.0003218650817871094, 0.0003879070281982422, 0.000453948974609375, 0.0005199909210205078, 0.0005860328674316406, 0.0006520748138427734, 0.0007181167602539062, 0.0007841587066650391, 0.0008502006530761719, 0.0009162425994873047, 0.0009822845458984375, 0.0010483264923095703, 0.0011143684387207031, 0.001180410385131836, 0.0012464523315429688, 0.0013124942779541016, 0.0013785362243652344, 0.0014445781707763672, 0.0015106201171875]}, "gradients/encoder.encoder.layers.20.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 2.0, 4.0, 2.0, 8.0, 5.0, 5.0, 10.0, 10.0, 14.0, 30.0, 54.0, 104.0, 223.0, 249.0, 127.0, 55.0, 34.0, 20.0, 14.0, 11.0, 7.0, 12.0, 3.0, 3.0, 1.0, 2.0, 3.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0], "bins": [-0.0008740425109863281, -0.0008527897298336029, -0.0008315369486808777, -0.0008102841675281525, -0.0007890313863754272, -0.000767778605222702, -0.0007465258240699768, -0.0007252730429172516, -0.0007040202617645264, -0.0006827674806118011, -0.0006615146994590759, -0.0006402619183063507, -0.0006190091371536255, -0.0005977563560009003, -0.000576503574848175, -0.0005552507936954498, -0.0005339980125427246, -0.0005127452313899994, -0.0004914924502372742, -0.00047023966908454895, -0.00044898688793182373, -0.0004277341067790985, -0.0004064813256263733, -0.00038522854447364807, -0.00036397576332092285, -0.00034272298216819763, -0.0003214702010154724, -0.0003002174198627472, -0.00027896463871002197, -0.00025771185755729675, -0.00023645907640457153, -0.0002152062952518463, -0.0001939535140991211, -0.00017270073294639587, -0.00015144795179367065, -0.00013019517064094543, -0.00010894238948822021, -8.7689608335495e-05, -6.643682718276978e-05, -4.5184046030044556e-05, -2.3931264877319336e-05, -2.678483724594116e-06, 1.8574297428131104e-05, 3.982707858085632e-05, 6.107985973358154e-05, 8.233264088630676e-05, 0.00010358542203903198, 0.0001248382031917572, 0.00014609098434448242, 0.00016734376549720764, 0.00018859654664993286, 0.00020984932780265808, 0.0002311021089553833, 0.0002523548901081085, 0.00027360767126083374, 0.00029486045241355896, 0.0003161132335662842, 0.0003373660147190094, 0.0003586187958717346, 0.00037987157702445984, 0.00040112435817718506, 0.0004223771393299103, 0.0004436299204826355, 0.0004648827016353607, 0.00048613548278808594]}, "gradients/encoder.encoder.layers.20.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 3.0, 20.0, 166.0, 661.0, 111.0, 35.0, 13.0, 7.0, 3.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.40777289867401123, -0.3510960042476654, -0.2944191098213196, -0.23774221539497375, -0.18106532096862793, -0.1243884265422821, -0.06771153211593628, -0.011034637689590454, 0.04564225673675537, 0.1023191511631012, 0.15899604558944702, 0.21567294001579285, 0.27234983444213867, 0.3290267288684845, 0.3857036232948303, 0.44238051772117615, 0.499057412147522, 0.5557342767715454, 0.6124112010002136, 0.6690881252288818, 0.7257649898529053, 0.7824418544769287, 0.8391187787055969, 0.8957957029342651, 0.9524725675582886, 1.009149432182312, 1.065826416015625, 1.1225032806396484, 1.1791801452636719, 1.2358570098876953, 1.2925338745117188, 1.3492108583450317, 1.4058876037597656, 1.462564468383789, 1.5192413330078125, 1.5759183168411255, 1.632595181465149, 1.6892720460891724, 1.7459490299224854, 1.8026258945465088, 1.8593027591705322, 1.9159796237945557, 1.972656488418579, 2.0293333530426025, 2.086010456085205, 2.1426873207092285, 2.199364185333252, 2.2560410499572754, 2.312717914581299, 2.3693947792053223, 2.4260716438293457, 2.482748508453369, 2.5394253730773926, 2.596102476119995, 2.6527793407440186, 2.709456205368042, 2.7661330699920654, 2.822809934616089, 2.8794867992401123, 2.9361636638641357, 2.9928407669067383, 3.0495176315307617, 3.106194496154785, 3.1628713607788086, 3.219548225402832]}, "gradients/encoder.encoder.layers.20.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 3.0, 2.0, 1.0, 3.0, 2.0, 6.0, 4.0, 10.0, 9.0, 9.0, 16.0, 21.0, 32.0, 37.0, 46.0, 34.0, 32.0, 51.0, 50.0, 54.0, 46.0, 57.0, 50.0, 61.0, 60.0, 46.0, 45.0, 36.0, 45.0, 19.0, 15.0, 17.0, 21.0, 17.0, 13.0, 8.0, 11.0, 9.0, 4.0, 5.0, 3.0, 1.0, 2.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0], "bins": [-0.2274121642112732, -0.2198096662759781, -0.21220716834068298, -0.20460467040538788, -0.19700217247009277, -0.18939965963363647, -0.18179716169834137, -0.17419466376304626, -0.16659216582775116, -0.15898966789245605, -0.15138716995716095, -0.14378467202186584, -0.13618215918540955, -0.12857967615127563, -0.12097716331481934, -0.11337466537952423, -0.10577216744422913, -0.09816966950893402, -0.09056717157363892, -0.08296466618776321, -0.07536216825246811, -0.067759670317173, -0.0601571686565876, -0.0525546669960022, -0.04495216906070709, -0.03734967112541199, -0.029747169464826584, -0.02214466966688633, -0.014542169868946075, -0.0069396719336509705, 0.000662829726934433, 0.008265331387519836, 0.01586782932281494, 0.023470329120755196, 0.03107282891869545, 0.03867533057928085, 0.04627782851457596, 0.05388032644987106, 0.06148282811045647, 0.06908532977104187, 0.07668782770633698, 0.08429032564163208, 0.09189282357692719, 0.09949532896280289, 0.10709782689809799, 0.1147003248333931, 0.1223028302192688, 0.1299053281545639, 0.137507826089859, 0.1451103240251541, 0.15271282196044922, 0.16031531989574432, 0.16791781783103943, 0.17552033066749573, 0.18312282860279083, 0.19072532653808594, 0.19832782447338104, 0.20593032240867615, 0.21353282034397125, 0.22113531827926636, 0.22873783111572266, 0.23634031414985657, 0.24394282698631287, 0.2515453100204468, 0.2591478228569031]}, "gradients/encoder.encoder.layers.19.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 5.0, 3.0, 5.0, 4.0, 8.0, 11.0, 9.0, 24.0, 24.0, 39.0, 80.0, 129.0, 259.0, 713.0, 3036.0, 60119.0, 4122958.0, 5300.0, 1048.0, 288.0, 108.0, 48.0, 24.0, 19.0, 14.0, 5.0, 5.0, 2.0, 1.0, 2.0, 0.0, 1.0], "bins": [-0.59716796875, -0.5847625732421875, -0.572357177734375, -0.5599517822265625, -0.54754638671875, -0.5351409912109375, -0.522735595703125, -0.5103302001953125, -0.4979248046875, -0.4855194091796875, -0.473114013671875, -0.4607086181640625, -0.44830322265625, -0.4358978271484375, -0.423492431640625, -0.4110870361328125, -0.398681640625, -0.3862762451171875, -0.373870849609375, -0.3614654541015625, -0.34906005859375, -0.3366546630859375, -0.324249267578125, -0.3118438720703125, -0.2994384765625, -0.2870330810546875, -0.274627685546875, -0.2622222900390625, -0.24981689453125, -0.2374114990234375, -0.225006103515625, -0.2126007080078125, -0.2001953125, -0.1877899169921875, -0.175384521484375, -0.1629791259765625, -0.15057373046875, -0.1381683349609375, -0.125762939453125, -0.1133575439453125, -0.1009521484375, -0.0885467529296875, -0.076141357421875, -0.0637359619140625, -0.05133056640625, -0.0389251708984375, -0.026519775390625, -0.0141143798828125, -0.001708984375, 0.0106964111328125, 0.023101806640625, 0.0355072021484375, 0.04791259765625, 0.0603179931640625, 0.072723388671875, 0.0851287841796875, 0.0975341796875, 0.1099395751953125, 0.122344970703125, 0.1347503662109375, 0.14715576171875, 0.1595611572265625, 0.171966552734375, 0.1843719482421875, 0.19677734375]}, "gradients/encoder.encoder.layers.19.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 2.0, 5.0, 2.0, 5.0, 9.0, 17.0, 29.0, 64.0, 125.0, 217.0, 204.0, 157.0, 95.0, 49.0, 18.0, 5.0, 5.0, 3.0, 3.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0], "bins": [-0.02166748046875, -0.02121734619140625, -0.0207672119140625, -0.02031707763671875, -0.019866943359375, -0.01941680908203125, -0.0189666748046875, -0.01851654052734375, -0.01806640625, -0.01761627197265625, -0.0171661376953125, -0.01671600341796875, -0.016265869140625, -0.01581573486328125, -0.0153656005859375, -0.01491546630859375, -0.01446533203125, -0.01401519775390625, -0.0135650634765625, -0.01311492919921875, -0.012664794921875, -0.01221466064453125, -0.0117645263671875, -0.01131439208984375, -0.0108642578125, -0.01041412353515625, -0.0099639892578125, -0.00951385498046875, -0.009063720703125, -0.00861358642578125, -0.0081634521484375, -0.00771331787109375, -0.00726318359375, -0.00681304931640625, -0.0063629150390625, -0.00591278076171875, -0.005462646484375, -0.00501251220703125, -0.0045623779296875, -0.00411224365234375, -0.003662109375, -0.00321197509765625, -0.0027618408203125, -0.00231170654296875, -0.001861572265625, -0.00141143798828125, -0.0009613037109375, -0.00051116943359375, -6.103515625e-05, 0.00038909912109375, 0.0008392333984375, 0.00128936767578125, 0.001739501953125, 0.00218963623046875, 0.0026397705078125, 0.00308990478515625, 0.0035400390625, 0.00399017333984375, 0.0044403076171875, 0.00489044189453125, 0.005340576171875, 0.00579071044921875, 0.0062408447265625, 0.00669097900390625, 0.00714111328125]}, "gradients/encoder.encoder.layers.19.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 2.0, 2.0, 3.0, 6.0, 14.0, 11.0, 38.0, 65.0, 151.0, 341.0, 1562.0, 25243.0, 4156941.0, 8725.0, 800.0, 198.0, 89.0, 52.0, 14.0, 23.0, 7.0, 3.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.783203125, -0.7655448913574219, -0.7478866577148438, -0.7302284240722656, -0.7125701904296875, -0.6949119567871094, -0.6772537231445312, -0.6595954895019531, -0.641937255859375, -0.6242790222167969, -0.6066207885742188, -0.5889625549316406, -0.5713043212890625, -0.5536460876464844, -0.5359878540039062, -0.5183296203613281, -0.50067138671875, -0.4830131530761719, -0.46535491943359375, -0.4476966857910156, -0.4300384521484375, -0.4123802185058594, -0.39472198486328125, -0.3770637512207031, -0.359405517578125, -0.3417472839355469, -0.32408905029296875, -0.3064308166503906, -0.2887725830078125, -0.2711143493652344, -0.25345611572265625, -0.23579788208007812, -0.2181396484375, -0.20048141479492188, -0.18282318115234375, -0.16516494750976562, -0.1475067138671875, -0.12984848022460938, -0.11219024658203125, -0.09453201293945312, -0.076873779296875, -0.059215545654296875, -0.04155731201171875, -0.023899078369140625, -0.0062408447265625, 0.011417388916015625, 0.02907562255859375, 0.046733856201171875, 0.06439208984375, 0.08205032348632812, 0.09970855712890625, 0.11736679077148438, 0.1350250244140625, 0.15268325805664062, 0.17034149169921875, 0.18799972534179688, 0.205657958984375, 0.22331619262695312, 0.24097442626953125, 0.2586326599121094, 0.2762908935546875, 0.2939491271972656, 0.31160736083984375, 0.3292655944824219, 0.346923828125]}, "gradients/encoder.encoder.layers.19.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 2.0, 1.0, 2.0, 2.0, 7.0, 4.0, 12.0, 10.0, 23.0, 28.0, 49.0, 114.0, 3627.0, 61.0, 51.0, 22.0, 25.0, 14.0, 5.0, 18.0, 6.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.057220458984375, -0.055930376052856445, -0.05464029312133789, -0.053350210189819336, -0.05206012725830078, -0.05077004432678223, -0.04947996139526367, -0.04818987846374512, -0.04689979553222656, -0.04560971260070801, -0.04431962966918945, -0.0430295467376709, -0.041739463806152344, -0.04044938087463379, -0.039159297943115234, -0.03786921501159668, -0.036579132080078125, -0.03528904914855957, -0.033998966217041016, -0.03270888328552246, -0.031418800354003906, -0.03012871742248535, -0.028838634490966797, -0.027548551559448242, -0.026258468627929688, -0.024968385696411133, -0.023678302764892578, -0.022388219833374023, -0.02109813690185547, -0.019808053970336914, -0.01851797103881836, -0.017227888107299805, -0.01593780517578125, -0.014647722244262695, -0.01335763931274414, -0.012067556381225586, -0.010777473449707031, -0.009487390518188477, -0.008197307586669922, -0.006907224655151367, -0.0056171417236328125, -0.004327058792114258, -0.003036975860595703, -0.0017468929290771484, -0.00045680999755859375, 0.0008332729339599609, 0.0021233558654785156, 0.0034134387969970703, 0.004703521728515625, 0.00599360466003418, 0.007283687591552734, 0.008573770523071289, 0.009863853454589844, 0.011153936386108398, 0.012444019317626953, 0.013734102249145508, 0.015024185180664062, 0.016314268112182617, 0.017604351043701172, 0.018894433975219727, 0.02018451690673828, 0.021474599838256836, 0.02276468276977539, 0.024054765701293945, 0.0253448486328125]}, "gradients/encoder.encoder.layers.19.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 3.0, 5.0, 4.0, 5.0, 10.0, 11.0, 20.0, 27.0, 50.0, 76.0, 207.0, 383.0, 103.0, 54.0, 29.0, 14.0, 8.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.3843596279621124, -0.3758871853351593, -0.3674147129058838, -0.35894227027893066, -0.35046982765197754, -0.3419973850250244, -0.3335249125957489, -0.3250524699687958, -0.31657999753952026, -0.30810755491256714, -0.2996350824832916, -0.2911626398563385, -0.2826901972293854, -0.27421772480010986, -0.26574528217315674, -0.2572728395462036, -0.2488003969192505, -0.24032793939113617, -0.23185549676418304, -0.22338303923606873, -0.2149105966091156, -0.20643813908100128, -0.19796568155288696, -0.18949323892593384, -0.18102078139781952, -0.1725483238697052, -0.16407588124275208, -0.15560342371463776, -0.14713096618652344, -0.1386585235595703, -0.130186066031456, -0.12171361595392227, -0.11324116587638855, -0.10476871579885483, -0.0962962657213211, -0.08782380819320679, -0.07935135811567307, -0.07087890803813934, -0.06240645423531532, -0.0539340004324913, -0.04546155035495758, -0.03698910027742386, -0.028516646474599838, -0.020044194534420967, -0.011571742594242096, -0.003099292516708374, 0.005373161286115646, 0.013845615088939667, 0.02231806516647339, 0.03079051710665226, 0.03926296904683113, 0.04773542284965515, 0.05620787292718887, 0.0646803230047226, 0.07315278053283691, 0.08162523061037064, 0.09009768068790436, 0.09857013076543808, 0.1070425808429718, 0.11551503837108612, 0.12398748844861984, 0.13245993852615356, 0.14093239605426788, 0.1494048535823822, 0.15787729620933533]}, "gradients/encoder.encoder.layers.19.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 5.0, 1.0, 2.0, 3.0, 4.0, 10.0, 16.0, 35.0, 36.0, 64.0, 73.0, 89.0, 78.0, 116.0, 96.0, 109.0, 79.0, 67.0, 53.0, 28.0, 24.0, 14.0, 5.0, 5.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.18515360355377197, -0.1793646365404129, -0.17357566952705383, -0.16778670251369476, -0.1619977355003357, -0.15620876848697662, -0.15041980147361755, -0.14463083446025848, -0.13884186744689941, -0.13305290043354034, -0.12726393342018127, -0.1214749664068222, -0.11568599939346313, -0.10989703238010406, -0.104108065366745, -0.09831909835338593, -0.09253013134002686, -0.08674116432666779, -0.08095219731330872, -0.07516323029994965, -0.06937426328659058, -0.0635852962732315, -0.057796329259872437, -0.05200736224651337, -0.0462183952331543, -0.04042942821979523, -0.03464046120643616, -0.028851494193077087, -0.023062527179718018, -0.017273560166358948, -0.011484593152999878, -0.005695626139640808, 9.334087371826172e-05, 0.0058823078870773315, 0.011671274900436401, 0.01746024191379547, 0.02324920892715454, 0.02903817594051361, 0.03482714295387268, 0.04061610996723175, 0.04640507698059082, 0.05219404399394989, 0.05798301100730896, 0.06377197802066803, 0.0695609450340271, 0.07534991204738617, 0.08113887906074524, 0.08692784607410431, 0.09271681308746338, 0.09850578010082245, 0.10429474711418152, 0.11008371412754059, 0.11587268114089966, 0.12166164815425873, 0.1274506151676178, 0.13323958218097687, 0.13902854919433594, 0.144817516207695, 0.15060648322105408, 0.15639545023441315, 0.16218441724777222, 0.1679733842611313, 0.17376235127449036, 0.17955131828784943, 0.1853402853012085]}, "gradients/encoder.encoder.layers.19.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 1.0, 3.0, 1.0, 2.0, 1.0, 4.0, 6.0, 8.0, 8.0, 10.0, 10.0, 10.0, 19.0, 20.0, 29.0, 36.0, 53.0, 59.0, 115.0, 229.0, 537.0, 1519.0, 6412.0, 51413.0, 868462.0, 106172.0, 9896.0, 2109.0, 705.0, 284.0, 128.0, 83.0, 42.0, 36.0, 26.0, 19.0, 18.0, 16.0, 13.0, 10.0, 6.0, 5.0, 5.0, 4.0, 6.0, 7.0, 2.0, 4.0, 1.0, 0.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.134765625, -0.13050460815429688, -0.12624359130859375, -0.12198257446289062, -0.1177215576171875, -0.11346054077148438, -0.10919952392578125, -0.10493850708007812, -0.100677490234375, -0.09641647338867188, -0.09215545654296875, -0.08789443969726562, -0.0836334228515625, -0.07937240600585938, -0.07511138916015625, -0.07085037231445312, -0.06658935546875, -0.062328338623046875, -0.05806732177734375, -0.053806304931640625, -0.0495452880859375, -0.045284271240234375, -0.04102325439453125, -0.036762237548828125, -0.032501220703125, -0.028240203857421875, -0.02397918701171875, -0.019718170166015625, -0.0154571533203125, -0.011196136474609375, -0.00693511962890625, -0.002674102783203125, 0.0015869140625, 0.005847930908203125, 0.01010894775390625, 0.014369964599609375, 0.0186309814453125, 0.022891998291015625, 0.02715301513671875, 0.031414031982421875, 0.035675048828125, 0.039936065673828125, 0.04419708251953125, 0.048458099365234375, 0.0527191162109375, 0.056980133056640625, 0.06124114990234375, 0.06550216674804688, 0.06976318359375, 0.07402420043945312, 0.07828521728515625, 0.08254623413085938, 0.0868072509765625, 0.09106826782226562, 0.09532928466796875, 0.09959030151367188, 0.103851318359375, 0.10811233520507812, 0.11237335205078125, 0.11663436889648438, 0.1208953857421875, 0.12515640258789062, 0.12941741943359375, 0.13367843627929688, 0.137939453125]}, "gradients/encoder.encoder.layers.19.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 2.0, 7.0, 0.0, 8.0, 14.0, 21.0, 53.0, 100.0, 152.0, 210.0, 188.0, 122.0, 65.0, 44.0, 13.0, 6.0, 5.0, 2.0, 3.0, 1.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0209197998046875, -0.020471692085266113, -0.020023584365844727, -0.01957547664642334, -0.019127368927001953, -0.018679261207580566, -0.01823115348815918, -0.017783045768737793, -0.017334938049316406, -0.01688683032989502, -0.016438722610473633, -0.015990614891052246, -0.01554250717163086, -0.015094399452209473, -0.014646291732788086, -0.0141981840133667, -0.013750076293945312, -0.013301968574523926, -0.012853860855102539, -0.012405753135681152, -0.011957645416259766, -0.011509537696838379, -0.011061429977416992, -0.010613322257995605, -0.010165214538574219, -0.009717106819152832, -0.009268999099731445, -0.008820891380310059, -0.008372783660888672, -0.007924675941467285, -0.0074765682220458984, -0.007028460502624512, -0.006580352783203125, -0.006132245063781738, -0.0056841373443603516, -0.005236029624938965, -0.004787921905517578, -0.004339814186096191, -0.0038917064666748047, -0.003443598747253418, -0.0029954910278320312, -0.0025473833084106445, -0.002099275588989258, -0.001651167869567871, -0.0012030601501464844, -0.0007549524307250977, -0.00030684471130371094, 0.00014126300811767578, 0.0005893707275390625, 0.0010374784469604492, 0.001485586166381836, 0.0019336938858032227, 0.0023818016052246094, 0.002829909324645996, 0.003278017044067383, 0.0037261247634887695, 0.004174232482910156, 0.004622340202331543, 0.00507044792175293, 0.005518555641174316, 0.005966663360595703, 0.00641477108001709, 0.0068628787994384766, 0.007310986518859863, 0.00775909423828125]}, "gradients/encoder.encoder.layers.19.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 4.0, 3.0, 4.0, 4.0, 7.0, 11.0, 17.0, 22.0, 27.0, 49.0, 50.0, 93.0, 141.0, 239.0, 343.0, 660.0, 1120.0, 1937.0, 3654.0, 7006.0, 14020.0, 30534.0, 73964.0, 230030.0, 481056.0, 118338.0, 44746.0, 19899.0, 9566.0, 4897.0, 2627.0, 1455.0, 782.0, 458.0, 277.0, 167.0, 124.0, 67.0, 51.0, 43.0, 19.0, 14.0, 6.0, 6.0, 10.0, 6.0, 2.0, 3.0, 1.0, 1.0, 4.0, 1.0, 2.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.04144287109375, -0.0401158332824707, -0.038788795471191406, -0.03746175765991211, -0.03613471984863281, -0.034807682037353516, -0.03348064422607422, -0.03215360641479492, -0.030826568603515625, -0.029499530792236328, -0.02817249298095703, -0.026845455169677734, -0.025518417358398438, -0.02419137954711914, -0.022864341735839844, -0.021537303924560547, -0.02021026611328125, -0.018883228302001953, -0.017556190490722656, -0.01622915267944336, -0.014902114868164062, -0.013575077056884766, -0.012248039245605469, -0.010921001434326172, -0.009593963623046875, -0.008266925811767578, -0.006939888000488281, -0.005612850189208984, -0.0042858123779296875, -0.0029587745666503906, -0.0016317367553710938, -0.0003046989440917969, 0.0010223388671875, 0.002349376678466797, 0.0036764144897460938, 0.005003452301025391, 0.0063304901123046875, 0.007657527923583984, 0.008984565734863281, 0.010311603546142578, 0.011638641357421875, 0.012965679168701172, 0.014292716979980469, 0.015619754791259766, 0.016946792602539062, 0.01827383041381836, 0.019600868225097656, 0.020927906036376953, 0.02225494384765625, 0.023581981658935547, 0.024909019470214844, 0.02623605728149414, 0.027563095092773438, 0.028890132904052734, 0.03021717071533203, 0.03154420852661133, 0.032871246337890625, 0.03419828414916992, 0.03552532196044922, 0.036852359771728516, 0.03817939758300781, 0.03950643539428711, 0.040833473205566406, 0.0421605110168457, 0.043487548828125]}, "gradients/encoder.encoder.layers.19.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 1.0, 1.0, 5.0, 6.0, 12.0, 9.0, 12.0, 11.0, 14.0, 16.0, 20.0, 28.0, 38.0, 35.0, 28.0, 39.0, 30.0, 51.0, 48.0, 37.0, 48.0, 45.0, 51.0, 33.0, 52.0, 34.0, 41.0, 34.0, 35.0, 23.0, 35.0, 15.0, 17.0, 23.0, 18.0, 15.0, 9.0, 8.0, 7.0, 4.0, 4.0, 6.0, 4.0, 2.0, 3.0, 1.0, 0.0, 3.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0], "bins": [-0.0235137939453125, -0.022761106491088867, -0.022008419036865234, -0.0212557315826416, -0.02050304412841797, -0.019750356674194336, -0.018997669219970703, -0.01824498176574707, -0.017492294311523438, -0.016739606857299805, -0.015986919403076172, -0.015234231948852539, -0.014481544494628906, -0.013728857040405273, -0.01297616958618164, -0.012223482131958008, -0.011470794677734375, -0.010718107223510742, -0.00996541976928711, -0.009212732315063477, -0.008460044860839844, -0.007707357406616211, -0.006954669952392578, -0.006201982498168945, -0.0054492950439453125, -0.00469660758972168, -0.003943920135498047, -0.003191232681274414, -0.0024385452270507812, -0.0016858577728271484, -0.0009331703186035156, -0.0001804828643798828, 0.00057220458984375, 0.0013248920440673828, 0.0020775794982910156, 0.0028302669525146484, 0.0035829544067382812, 0.004335641860961914, 0.005088329315185547, 0.00584101676940918, 0.0065937042236328125, 0.007346391677856445, 0.008099079132080078, 0.008851766586303711, 0.009604454040527344, 0.010357141494750977, 0.01110982894897461, 0.011862516403198242, 0.012615203857421875, 0.013367891311645508, 0.01412057876586914, 0.014873266220092773, 0.015625953674316406, 0.01637864112854004, 0.017131328582763672, 0.017884016036987305, 0.018636703491210938, 0.01938939094543457, 0.020142078399658203, 0.020894765853881836, 0.02164745330810547, 0.0224001407623291, 0.023152828216552734, 0.023905515670776367, 0.024658203125]}, "gradients/encoder.encoder.layers.19.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 1.0, 2.0, 2.0, 3.0, 6.0, 7.0, 10.0, 19.0, 18.0, 28.0, 47.0, 100.0, 190.0, 345.0, 750.0, 1576.0, 3852.0, 12165.0, 79908.0, 873450.0, 59385.0, 10332.0, 3564.0, 1414.0, 637.0, 343.0, 168.0, 84.0, 40.0, 26.0, 24.0, 18.0, 11.0, 7.0, 7.0, 9.0, 4.0, 6.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 2.0, 1.0], "bins": [-0.0010423660278320312, -0.0010121017694473267, -0.000981837511062622, -0.0009515732526779175, -0.0009213089942932129, -0.0008910447359085083, -0.0008607804775238037, -0.0008305162191390991, -0.0008002519607543945, -0.0007699877023696899, -0.0007397234439849854, -0.0007094591856002808, -0.0006791949272155762, -0.0006489306688308716, -0.000618666410446167, -0.0005884021520614624, -0.0005581378936767578, -0.0005278736352920532, -0.0004976093769073486, -0.00046734511852264404, -0.00043708086013793945, -0.00040681660175323486, -0.0003765523433685303, -0.0003462880849838257, -0.0003160238265991211, -0.0002857595682144165, -0.0002554953098297119, -0.00022523105144500732, -0.00019496679306030273, -0.00016470253467559814, -0.00013443827629089355, -0.00010417401790618896, -7.390975952148438e-05, -4.3645501136779785e-05, -1.3381242752075195e-05, 1.6883015632629395e-05, 4.7147274017333984e-05, 7.741153240203857e-05, 0.00010767579078674316, 0.00013794004917144775, 0.00016820430755615234, 0.00019846856594085693, 0.00022873282432556152, 0.0002589970827102661, 0.0002892613410949707, 0.0003195255994796753, 0.0003497898578643799, 0.00038005411624908447, 0.00041031837463378906, 0.00044058263301849365, 0.00047084689140319824, 0.0005011111497879028, 0.0005313754081726074, 0.000561639666557312, 0.0005919039249420166, 0.0006221681833267212, 0.0006524324417114258, 0.0006826967000961304, 0.000712960958480835, 0.0007432252168655396, 0.0007734894752502441, 0.0008037537336349487, 0.0008340179920196533, 0.0008642822504043579, 0.0008945465087890625]}, "gradients/encoder.encoder.layers.19.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 2.0, 1.0, 3.0, 0.0, 0.0, 3.0, 2.0, 5.0, 7.0, 9.0, 12.0, 10.0, 13.0, 23.0, 32.0, 66.0, 167.0, 337.0, 136.0, 62.0, 37.0, 25.0, 17.0, 11.0, 8.0, 5.0, 2.0, 4.0, 4.0, 1.0, 2.0, 2.0, 1.0, 0.0, 3.0, 3.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.273653030395508e-05, -4.1330233216285706e-05, -3.992393612861633e-05, -3.851763904094696e-05, -3.711134195327759e-05, -3.5705044865608215e-05, -3.429874777793884e-05, -3.289245069026947e-05, -3.14861536026001e-05, -3.0079856514930725e-05, -2.8673559427261353e-05, -2.726726233959198e-05, -2.5860965251922607e-05, -2.4454668164253235e-05, -2.3048371076583862e-05, -2.164207398891449e-05, -2.0235776901245117e-05, -1.8829479813575745e-05, -1.7423182725906372e-05, -1.6016885638237e-05, -1.4610588550567627e-05, -1.3204291462898254e-05, -1.1797994375228882e-05, -1.039169728755951e-05, -8.985400199890137e-06, -7.579103112220764e-06, -6.172806024551392e-06, -4.766508936882019e-06, -3.3602118492126465e-06, -1.953914761543274e-06, -5.476176738739014e-07, 8.586794137954712e-07, 2.2649765014648438e-06, 3.6712735891342163e-06, 5.077570676803589e-06, 6.4838677644729614e-06, 7.890164852142334e-06, 9.296461939811707e-06, 1.0702759027481079e-05, 1.2109056115150452e-05, 1.3515353202819824e-05, 1.4921650290489197e-05, 1.632794737815857e-05, 1.7734244465827942e-05, 1.9140541553497314e-05, 2.0546838641166687e-05, 2.195313572883606e-05, 2.3359432816505432e-05, 2.4765729904174805e-05, 2.6172026991844177e-05, 2.757832407951355e-05, 2.8984621167182922e-05, 3.0390918254852295e-05, 3.179721534252167e-05, 3.320351243019104e-05, 3.460980951786041e-05, 3.6016106605529785e-05, 3.742240369319916e-05, 3.882870078086853e-05, 4.02349978685379e-05, 4.1641294956207275e-05, 4.304759204387665e-05, 4.445388913154602e-05, 4.586018621921539e-05, 4.7266483306884766e-05]}, "gradients/encoder.encoder.layers.19.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 3.0, 1.0, 6.0, 2.0, 4.0, 9.0, 13.0, 19.0, 25.0, 35.0, 58.0, 121.0, 210.0, 433.0, 986.0, 2499.0, 8343.0, 51888.0, 891901.0, 76472.0, 10352.0, 3020.0, 1134.0, 487.0, 233.0, 122.0, 66.0, 42.0, 25.0, 17.0, 14.0, 12.0, 4.0, 5.0, 5.0, 2.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0010385513305664062, -0.0010076165199279785, -0.0009766817092895508, -0.000945746898651123, -0.0009148120880126953, -0.0008838772773742676, -0.0008529424667358398, -0.0008220076560974121, -0.0007910728454589844, -0.0007601380348205566, -0.0007292032241821289, -0.0006982684135437012, -0.0006673336029052734, -0.0006363987922668457, -0.000605463981628418, -0.0005745291709899902, -0.0005435943603515625, -0.0005126595497131348, -0.00048172473907470703, -0.0004507899284362793, -0.00041985511779785156, -0.00038892030715942383, -0.0003579854965209961, -0.00032705068588256836, -0.0002961158752441406, -0.0002651810646057129, -0.00023424625396728516, -0.00020331144332885742, -0.0001723766326904297, -0.00014144182205200195, -0.00011050701141357422, -7.957220077514648e-05, -4.863739013671875e-05, -1.7702579498291016e-05, 1.3232231140136719e-05, 4.416704177856445e-05, 7.510185241699219e-05, 0.00010603666305541992, 0.00013697147369384766, 0.0001679062843322754, 0.00019884109497070312, 0.00022977590560913086, 0.0002607107162475586, 0.00029164552688598633, 0.00032258033752441406, 0.0003535151481628418, 0.00038444995880126953, 0.00041538476943969727, 0.000446319580078125, 0.00047725439071655273, 0.0005081892013549805, 0.0005391240119934082, 0.0005700588226318359, 0.0006009936332702637, 0.0006319284439086914, 0.0006628632545471191, 0.0006937980651855469, 0.0007247328758239746, 0.0007556676864624023, 0.0007866024971008301, 0.0008175373077392578, 0.0008484721183776855, 0.0008794069290161133, 0.000910341739654541, 0.0009412765502929688]}, "gradients/encoder.encoder.layers.19.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0, 5.0, 3.0, 1.0, 5.0, 5.0, 7.0, 10.0, 12.0, 25.0, 32.0, 39.0, 64.0, 124.0, 183.0, 179.0, 122.0, 76.0, 47.0, 30.0, 6.0, 11.0, 8.0, 3.0, 5.0, 2.0, 3.0, 1.0, 3.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.0005903244018554688, -0.0005762539803981781, -0.0005621835589408875, -0.0005481131374835968, -0.0005340427160263062, -0.0005199722945690155, -0.0005059018731117249, -0.0004918314516544342, -0.00047776103019714355, -0.0004636906087398529, -0.00044962018728256226, -0.0004355497658252716, -0.00042147934436798096, -0.0004074089229106903, -0.00039333850145339966, -0.000379268079996109, -0.00036519765853881836, -0.0003511272370815277, -0.00033705681562423706, -0.0003229863941669464, -0.00030891597270965576, -0.0002948455512523651, -0.00028077512979507446, -0.0002667047083377838, -0.00025263428688049316, -0.00023856386542320251, -0.00022449344396591187, -0.00021042302250862122, -0.00019635260105133057, -0.00018228217959403992, -0.00016821175813674927, -0.00015414133667945862, -0.00014007091522216797, -0.00012600049376487732, -0.00011193007230758667, -9.785965085029602e-05, -8.378922939300537e-05, -6.971880793571472e-05, -5.564838647842407e-05, -4.157796502113342e-05, -2.7507543563842773e-05, -1.3437122106552124e-05, 6.332993507385254e-07, 1.4703720808029175e-05, 2.8774142265319824e-05, 4.2844563722610474e-05, 5.691498517990112e-05, 7.098540663719177e-05, 8.505582809448242e-05, 9.912624955177307e-05, 0.00011319667100906372, 0.00012726709246635437, 0.00014133751392364502, 0.00015540793538093567, 0.00016947835683822632, 0.00018354877829551697, 0.00019761919975280762, 0.00021168962121009827, 0.00022576004266738892, 0.00023983046412467957, 0.0002539008855819702, 0.00026797130703926086, 0.0002820417284965515, 0.00029611214995384216, 0.0003101825714111328]}, "gradients/encoder.encoder.layers.19.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 5.0, 9.0, 67.0, 302.0, 445.0, 102.0, 43.0, 24.0, 13.0, 6.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.7805824279785156, -0.7527408599853516, -0.7248993515968323, -0.697057843208313, -0.6692162752151489, -0.6413747072219849, -0.6135331988334656, -0.5856916904449463, -0.5578501224517822, -0.5300085544586182, -0.5021670460700989, -0.4743255078792572, -0.4464839696884155, -0.41864243149757385, -0.3908008933067322, -0.3629593551158905, -0.33511781692504883, -0.30727627873420715, -0.2794347405433655, -0.2515932023525238, -0.22375166416168213, -0.19591012597084045, -0.16806858777999878, -0.1402270495891571, -0.11238551139831543, -0.08454397320747375, -0.05670243501663208, -0.028860896825790405, -0.0010193586349487305, 0.026822179555892944, 0.05466371774673462, 0.0825052559375763, 0.11034679412841797, 0.13818833231925964, 0.16602987051010132, 0.193871408700943, 0.22171294689178467, 0.24955448508262634, 0.277396023273468, 0.3052375614643097, 0.33307909965515137, 0.36092063784599304, 0.3887621760368347, 0.4166037142276764, 0.44444525241851807, 0.47228679060935974, 0.5001283288002014, 0.5279698371887207, 0.5558114051818848, 0.5836529731750488, 0.6114944815635681, 0.6393359899520874, 0.6671775579452515, 0.6950191259384155, 0.7228606343269348, 0.7507021427154541, 0.7785437107086182, 0.8063852787017822, 0.8342267870903015, 0.8620682954788208, 0.8899098634719849, 0.9177514314651489, 0.9455929398536682, 0.9734344482421875, 1.0012760162353516]}, "gradients/encoder.encoder.layers.19.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 2.0, 0.0, 1.0, 3.0, 5.0, 4.0, 7.0, 13.0, 8.0, 16.0, 21.0, 19.0, 19.0, 40.0, 31.0, 43.0, 44.0, 49.0, 61.0, 69.0, 60.0, 66.0, 63.0, 52.0, 42.0, 44.0, 32.0, 33.0, 35.0, 24.0, 24.0, 19.0, 18.0, 14.0, 9.0, 5.0, 8.0, 5.0, 1.0, 2.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.18730640411376953, -0.18169750273227692, -0.1760886013507843, -0.1704796999692917, -0.16487079858779907, -0.15926188230514526, -0.15365298092365265, -0.14804407954216003, -0.14243517816066742, -0.1368262767791748, -0.1312173753976822, -0.12560847401618958, -0.11999956518411636, -0.11439066380262375, -0.10878175497055054, -0.10317285358905792, -0.09756395220756531, -0.09195505082607269, -0.08634614944458008, -0.08073724061250687, -0.07512833923101425, -0.06951943784952164, -0.06391052901744843, -0.05830162763595581, -0.052692726254463196, -0.04708382487297058, -0.04147491976618767, -0.035866014659404755, -0.03025711327791214, -0.024648210033774376, -0.019039306789636612, -0.013430401682853699, -0.007821500301361084, -0.00221259705722332, 0.003396306186914444, 0.009005209431052208, 0.014614112675189972, 0.020223015919327736, 0.0258319191634655, 0.03144082427024841, 0.03704972565174103, 0.04265862703323364, 0.048267532140016556, 0.05387643724679947, 0.059485338628292084, 0.0650942400097847, 0.07070314884185791, 0.07631205022335052, 0.08192095160484314, 0.08752985298633575, 0.09313875436782837, 0.09874766319990158, 0.1043565645813942, 0.10996546596288681, 0.11557437479496002, 0.12118327617645264, 0.12679217755794525, 0.13240107893943787, 0.13800998032093048, 0.1436188817024231, 0.1492277979850769, 0.15483669936656952, 0.16044560074806213, 0.16605450212955475, 0.17166340351104736]}, "gradients/encoder.encoder.layers.18.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 0.0, 0.0, 4.0, 2.0, 1.0, 5.0, 3.0, 4.0, 6.0, 7.0, 4.0, 10.0, 15.0, 20.0, 44.0, 58.0, 135.0, 338.0, 1274.0, 11045.0, 4167738.0, 11750.0, 1281.0, 329.0, 125.0, 51.0, 22.0, 7.0, 8.0, 4.0, 1.0, 2.0, 2.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.529296875, -0.5181541442871094, -0.5070114135742188, -0.4958686828613281, -0.4847259521484375, -0.4735832214355469, -0.46244049072265625, -0.4512977600097656, -0.440155029296875, -0.4290122985839844, -0.41786956787109375, -0.4067268371582031, -0.3955841064453125, -0.3844413757324219, -0.37329864501953125, -0.3621559143066406, -0.35101318359375, -0.3398704528808594, -0.32872772216796875, -0.3175849914550781, -0.3064422607421875, -0.2952995300292969, -0.28415679931640625, -0.2730140686035156, -0.261871337890625, -0.2507286071777344, -0.23958587646484375, -0.22844314575195312, -0.2173004150390625, -0.20615768432617188, -0.19501495361328125, -0.18387222290039062, -0.1727294921875, -0.16158676147460938, -0.15044403076171875, -0.13930130004882812, -0.1281585693359375, -0.11701583862304688, -0.10587310791015625, -0.09473037719726562, -0.083587646484375, -0.07244491577148438, -0.06130218505859375, -0.050159454345703125, -0.0390167236328125, -0.027873992919921875, -0.01673126220703125, -0.005588531494140625, 0.00555419921875, 0.016696929931640625, 0.02783966064453125, 0.038982391357421875, 0.0501251220703125, 0.061267852783203125, 0.07241058349609375, 0.08355331420898438, 0.094696044921875, 0.10583877563476562, 0.11698150634765625, 0.12812423706054688, 0.1392669677734375, 0.15040969848632812, 0.16155242919921875, 0.17269515991210938, 0.183837890625]}, "gradients/encoder.encoder.layers.18.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 2.0, 5.0, 4.0, 8.0, 14.0, 21.0, 60.0, 109.0, 172.0, 206.0, 176.0, 114.0, 61.0, 35.0, 15.0, 6.0, 3.0, 3.0, 2.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.0214385986328125, -0.020987331867218018, -0.020536065101623535, -0.020084798336029053, -0.01963353157043457, -0.019182264804840088, -0.018730998039245605, -0.018279731273651123, -0.01782846450805664, -0.017377197742462158, -0.016925930976867676, -0.016474664211273193, -0.01602339744567871, -0.015572130680084229, -0.015120863914489746, -0.014669597148895264, -0.014218330383300781, -0.013767063617706299, -0.013315796852111816, -0.012864530086517334, -0.012413263320922852, -0.01196199655532837, -0.011510729789733887, -0.011059463024139404, -0.010608196258544922, -0.01015692949295044, -0.009705662727355957, -0.009254395961761475, -0.008803129196166992, -0.00835186243057251, -0.007900595664978027, -0.007449328899383545, -0.0069980621337890625, -0.00654679536819458, -0.006095528602600098, -0.005644261837005615, -0.005192995071411133, -0.00474172830581665, -0.004290461540222168, -0.0038391947746276855, -0.003387928009033203, -0.0029366612434387207, -0.0024853944778442383, -0.002034127712249756, -0.0015828609466552734, -0.001131594181060791, -0.0006803274154663086, -0.00022906064987182617, 0.00022220611572265625, 0.0006734728813171387, 0.001124739646911621, 0.0015760064125061035, 0.002027273178100586, 0.0024785399436950684, 0.0029298067092895508, 0.003381073474884033, 0.0038323402404785156, 0.004283607006072998, 0.0047348737716674805, 0.005186140537261963, 0.005637407302856445, 0.006088674068450928, 0.00653994083404541, 0.006991207599639893, 0.007442474365234375]}, "gradients/encoder.encoder.layers.18.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 1.0, 1.0, 2.0, 4.0, 6.0, 7.0, 15.0, 18.0, 28.0, 46.0, 80.0, 152.0, 361.0, 1719.0, 43167.0, 4142542.0, 5084.0, 549.0, 198.0, 108.0, 68.0, 36.0, 34.0, 27.0, 10.0, 15.0, 5.0, 6.0, 7.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.27685546875, -0.2643890380859375, -0.251922607421875, -0.2394561767578125, -0.22698974609375, -0.2145233154296875, -0.202056884765625, -0.1895904541015625, -0.1771240234375, -0.1646575927734375, -0.152191162109375, -0.1397247314453125, -0.12725830078125, -0.1147918701171875, -0.102325439453125, -0.0898590087890625, -0.077392578125, -0.0649261474609375, -0.052459716796875, -0.0399932861328125, -0.02752685546875, -0.0150604248046875, -0.002593994140625, 0.0098724365234375, 0.0223388671875, 0.0348052978515625, 0.047271728515625, 0.0597381591796875, 0.07220458984375, 0.0846710205078125, 0.097137451171875, 0.1096038818359375, 0.1220703125, 0.1345367431640625, 0.147003173828125, 0.1594696044921875, 0.17193603515625, 0.1844024658203125, 0.196868896484375, 0.2093353271484375, 0.2218017578125, 0.2342681884765625, 0.246734619140625, 0.2592010498046875, 0.27166748046875, 0.2841339111328125, 0.296600341796875, 0.3090667724609375, 0.321533203125, 0.3339996337890625, 0.346466064453125, 0.3589324951171875, 0.37139892578125, 0.3838653564453125, 0.396331787109375, 0.4087982177734375, 0.4212646484375, 0.4337310791015625, 0.446197509765625, 0.4586639404296875, 0.47113037109375, 0.4835968017578125, 0.496063232421875, 0.5085296630859375, 0.52099609375]}, "gradients/encoder.encoder.layers.18.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 3.0, 2.0, 9.0, 14.0, 17.0, 29.0, 87.0, 3783.0, 57.0, 36.0, 23.0, 12.0, 6.0, 7.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0271759033203125, -0.02561354637145996, -0.024051189422607422, -0.022488832473754883, -0.020926475524902344, -0.019364118576049805, -0.017801761627197266, -0.016239404678344727, -0.014677047729492188, -0.013114690780639648, -0.01155233383178711, -0.00998997688293457, -0.008427619934082031, -0.006865262985229492, -0.005302906036376953, -0.003740549087524414, -0.002178192138671875, -0.0006158351898193359, 0.0009465217590332031, 0.002508878707885742, 0.004071235656738281, 0.00563359260559082, 0.007195949554443359, 0.008758306503295898, 0.010320663452148438, 0.011883020401000977, 0.013445377349853516, 0.015007734298706055, 0.016570091247558594, 0.018132448196411133, 0.019694805145263672, 0.02125716209411621, 0.02281951904296875, 0.02438187599182129, 0.025944232940673828, 0.027506589889526367, 0.029068946838378906, 0.030631303787231445, 0.032193660736083984, 0.03375601768493652, 0.03531837463378906, 0.0368807315826416, 0.03844308853149414, 0.04000544548034668, 0.04156780242919922, 0.04313015937805176, 0.0446925163269043, 0.046254873275756836, 0.047817230224609375, 0.049379587173461914, 0.05094194412231445, 0.05250430107116699, 0.05406665802001953, 0.05562901496887207, 0.05719137191772461, 0.05875372886657715, 0.06031608581542969, 0.06187844276428223, 0.06344079971313477, 0.0650031566619873, 0.06656551361083984, 0.06812787055969238, 0.06969022750854492, 0.07125258445739746, 0.07281494140625]}, "gradients/encoder.encoder.layers.18.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 4.0, 13.0, 170.0, 774.0, 49.0, 5.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.9727463722229004, -0.9497187733650208, -0.9266911745071411, -0.9036635756492615, -0.8806359767913818, -0.8576083779335022, -0.8345807790756226, -0.8115531802177429, -0.7885255813598633, -0.7654979825019836, -0.742470383644104, -0.7194427847862244, -0.6964151859283447, -0.6733875870704651, -0.6503599882125854, -0.6273323893547058, -0.6043047904968262, -0.5812771916389465, -0.5582495927810669, -0.5352219939231873, -0.5121943950653076, -0.489166796207428, -0.46613919734954834, -0.4431115984916687, -0.42008399963378906, -0.3970564007759094, -0.3740288019180298, -0.35100120306015015, -0.3279736042022705, -0.30494600534439087, -0.28191840648651123, -0.2588908076286316, -0.23586320877075195, -0.21283560991287231, -0.18980801105499268, -0.16678041219711304, -0.1437528133392334, -0.12072521448135376, -0.09769761562347412, -0.07467001676559448, -0.051642417907714844, -0.028614819049835205, -0.005587220191955566, 0.017440378665924072, 0.04046797752380371, 0.06349557638168335, 0.08652317523956299, 0.10955077409744263, 0.13257837295532227, 0.1556059718132019, 0.17863357067108154, 0.20166116952896118, 0.22468876838684082, 0.24771636724472046, 0.2707439661026001, 0.29377156496047974, 0.3167991638183594, 0.339826762676239, 0.36285436153411865, 0.3858819603919983, 0.40890955924987793, 0.43193715810775757, 0.4549647569656372, 0.47799235582351685, 0.5010199546813965]}, "gradients/encoder.encoder.layers.18.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 4.0, 8.0, 18.0, 31.0, 26.0, 50.0, 56.0, 79.0, 69.0, 89.0, 101.0, 97.0, 80.0, 86.0, 62.0, 51.0, 35.0, 30.0, 12.0, 15.0, 3.0, 5.0, 1.0, 4.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.10517275333404541, -0.1022692397236824, -0.0993657261133194, -0.09646221250295639, -0.09355869889259338, -0.09065519273281097, -0.08775167912244797, -0.08484816551208496, -0.08194465190172195, -0.07904113829135895, -0.07613762468099594, -0.07323411107063293, -0.07033060491085052, -0.06742709130048752, -0.06452357769012451, -0.061620064079761505, -0.0587165504693985, -0.05581303685903549, -0.052909523248672485, -0.05000601336359978, -0.04710249975323677, -0.044198986142873764, -0.041295476257801056, -0.03839196264743805, -0.03548844903707504, -0.032584935426712036, -0.02968142367899418, -0.02677791193127632, -0.023874398320913315, -0.020970884710550308, -0.01806737296283245, -0.015163861215114594, -0.012260347604751587, -0.009356834925711155, -0.006453322246670723, -0.003549809567630291, -0.000646296888589859, 0.002257215790450573, 0.005160728469491005, 0.008064240217208862, 0.010967753827571869, 0.013871266506612301, 0.016774779185652733, 0.01967829093337059, 0.022581804543733597, 0.025485318154096603, 0.02838882990181446, 0.03129234164953232, 0.034195855259895325, 0.03709936887025833, 0.04000288248062134, 0.042906392365694046, 0.04580990597605705, 0.04871341958642006, 0.05161692947149277, 0.054520443081855774, 0.05742395669221878, 0.06032747030258179, 0.0632309839129448, 0.0661344975233078, 0.06903800368309021, 0.07194151729345322, 0.07484503090381622, 0.07774854451417923, 0.08065205812454224]}, "gradients/encoder.encoder.layers.18.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 4.0, 0.0, 2.0, 7.0, 10.0, 8.0, 8.0, 17.0, 19.0, 18.0, 25.0, 34.0, 31.0, 48.0, 75.0, 120.0, 250.0, 592.0, 2091.0, 9668.0, 106344.0, 872347.0, 48434.0, 5864.0, 1486.0, 489.0, 212.0, 115.0, 57.0, 39.0, 34.0, 25.0, 19.0, 12.0, 10.0, 10.0, 5.0, 6.0, 7.0, 5.0, 2.0, 3.0, 1.0, 3.0, 1.0, 1.0, 1.0, 0.0, 3.0, 0.0, 1.0, 2.0, 0.0, 3.0], "bins": [-0.1236572265625, -0.11971473693847656, -0.11577224731445312, -0.11182975769042969, -0.10788726806640625, -0.10394477844238281, -0.10000228881835938, -0.09605979919433594, -0.0921173095703125, -0.08817481994628906, -0.08423233032226562, -0.08028984069824219, -0.07634735107421875, -0.07240486145019531, -0.06846237182617188, -0.06451988220214844, -0.060577392578125, -0.05663490295410156, -0.052692413330078125, -0.04874992370605469, -0.04480743408203125, -0.04086494445800781, -0.036922454833984375, -0.03297996520996094, -0.0290374755859375, -0.025094985961914062, -0.021152496337890625, -0.017210006713867188, -0.01326751708984375, -0.009325027465820312, -0.005382537841796875, -0.0014400482177734375, 0.00250244140625, 0.0064449310302734375, 0.010387420654296875, 0.014329910278320312, 0.01827239990234375, 0.022214889526367188, 0.026157379150390625, 0.030099868774414062, 0.0340423583984375, 0.03798484802246094, 0.041927337646484375, 0.04586982727050781, 0.04981231689453125, 0.05375480651855469, 0.057697296142578125, 0.06163978576660156, 0.065582275390625, 0.06952476501464844, 0.07346725463867188, 0.07740974426269531, 0.08135223388671875, 0.08529472351074219, 0.08923721313476562, 0.09317970275878906, 0.0971221923828125, 0.10106468200683594, 0.10500717163085938, 0.10894966125488281, 0.11289215087890625, 0.11683464050292969, 0.12077713012695312, 0.12471961975097656, 0.128662109375]}, "gradients/encoder.encoder.layers.18.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 23.0, 15.0, 62.0, 117.0, 161.0, 196.0, 175.0, 119.0, 58.0, 38.0, 17.0, 7.0, 4.0, 3.0, 1.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.020660400390625, -0.020225465297698975, -0.01979053020477295, -0.019355595111846924, -0.0189206600189209, -0.018485724925994873, -0.018050789833068848, -0.017615854740142822, -0.017180919647216797, -0.01674598455429077, -0.016311049461364746, -0.01587611436843872, -0.015441179275512695, -0.01500624418258667, -0.014571309089660645, -0.01413637399673462, -0.013701438903808594, -0.013266503810882568, -0.012831568717956543, -0.012396633625030518, -0.011961698532104492, -0.011526763439178467, -0.011091828346252441, -0.010656893253326416, -0.01022195816040039, -0.009787023067474365, -0.00935208797454834, -0.008917152881622314, -0.008482217788696289, -0.008047282695770264, -0.007612347602844238, -0.007177412509918213, -0.0067424774169921875, -0.006307542324066162, -0.005872607231140137, -0.005437672138214111, -0.005002737045288086, -0.0045678019523620605, -0.004132866859436035, -0.0036979317665100098, -0.0032629966735839844, -0.002828061580657959, -0.0023931264877319336, -0.001958191394805908, -0.0015232563018798828, -0.0010883212089538574, -0.000653386116027832, -0.00021845102310180664, 0.00021648406982421875, 0.0006514191627502441, 0.0010863542556762695, 0.001521289348602295, 0.0019562244415283203, 0.0023911595344543457, 0.002826094627380371, 0.0032610297203063965, 0.003695964813232422, 0.004130899906158447, 0.004565834999084473, 0.005000770092010498, 0.0054357051849365234, 0.005870640277862549, 0.006305575370788574, 0.0067405104637146, 0.007175445556640625]}, "gradients/encoder.encoder.layers.18.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 3.0, 1.0, 5.0, 6.0, 9.0, 11.0, 16.0, 32.0, 41.0, 65.0, 102.0, 139.0, 258.0, 445.0, 835.0, 1632.0, 3451.0, 7389.0, 17549.0, 48306.0, 167559.0, 578338.0, 148535.0, 44223.0, 16190.0, 6752.0, 3182.0, 1572.0, 818.0, 426.0, 259.0, 136.0, 95.0, 62.0, 43.0, 25.0, 20.0, 10.0, 9.0, 5.0, 4.0, 4.0, 3.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0469970703125, -0.04545402526855469, -0.043910980224609375, -0.04236793518066406, -0.04082489013671875, -0.03928184509277344, -0.037738800048828125, -0.03619575500488281, -0.0346527099609375, -0.03310966491699219, -0.031566619873046875, -0.030023574829101562, -0.02848052978515625, -0.026937484741210938, -0.025394439697265625, -0.023851394653320312, -0.022308349609375, -0.020765304565429688, -0.019222259521484375, -0.017679214477539062, -0.01613616943359375, -0.014593124389648438, -0.013050079345703125, -0.011507034301757812, -0.0099639892578125, -0.008420944213867188, -0.006877899169921875, -0.0053348541259765625, -0.00379180908203125, -0.0022487640380859375, -0.000705718994140625, 0.0008373260498046875, 0.00238037109375, 0.0039234161376953125, 0.005466461181640625, 0.0070095062255859375, 0.00855255126953125, 0.010095596313476562, 0.011638641357421875, 0.013181686401367188, 0.0147247314453125, 0.016267776489257812, 0.017810821533203125, 0.019353866577148438, 0.02089691162109375, 0.022439956665039062, 0.023983001708984375, 0.025526046752929688, 0.027069091796875, 0.028612136840820312, 0.030155181884765625, 0.03169822692871094, 0.03324127197265625, 0.03478431701660156, 0.036327362060546875, 0.03787040710449219, 0.0394134521484375, 0.04095649719238281, 0.042499542236328125, 0.04404258728027344, 0.04558563232421875, 0.04712867736816406, 0.048671722412109375, 0.05021476745605469, 0.0517578125]}, "gradients/encoder.encoder.layers.18.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 0.0, 4.0, 3.0, 5.0, 1.0, 9.0, 11.0, 8.0, 11.0, 9.0, 17.0, 22.0, 32.0, 33.0, 45.0, 34.0, 41.0, 60.0, 53.0, 53.0, 48.0, 61.0, 46.0, 38.0, 46.0, 50.0, 35.0, 40.0, 37.0, 26.0, 25.0, 19.0, 26.0, 19.0, 10.0, 10.0, 8.0, 4.0, 4.0, 4.0, 4.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.03204345703125, -0.031200885772705078, -0.030358314514160156, -0.029515743255615234, -0.028673171997070312, -0.02783060073852539, -0.02698802947998047, -0.026145458221435547, -0.025302886962890625, -0.024460315704345703, -0.02361774444580078, -0.02277517318725586, -0.021932601928710938, -0.021090030670166016, -0.020247459411621094, -0.019404888153076172, -0.01856231689453125, -0.017719745635986328, -0.016877174377441406, -0.016034603118896484, -0.015192031860351562, -0.01434946060180664, -0.013506889343261719, -0.012664318084716797, -0.011821746826171875, -0.010979175567626953, -0.010136604309082031, -0.00929403305053711, -0.008451461791992188, -0.007608890533447266, -0.006766319274902344, -0.005923748016357422, -0.0050811767578125, -0.004238605499267578, -0.0033960342407226562, -0.0025534629821777344, -0.0017108917236328125, -0.0008683204650878906, -2.574920654296875e-05, 0.0008168220520019531, 0.001659393310546875, 0.002501964569091797, 0.0033445358276367188, 0.004187107086181641, 0.0050296783447265625, 0.005872249603271484, 0.006714820861816406, 0.007557392120361328, 0.00839996337890625, 0.009242534637451172, 0.010085105895996094, 0.010927677154541016, 0.011770248413085938, 0.01261281967163086, 0.013455390930175781, 0.014297962188720703, 0.015140533447265625, 0.015983104705810547, 0.01682567596435547, 0.01766824722290039, 0.018510818481445312, 0.019353389739990234, 0.020195960998535156, 0.021038532257080078, 0.021881103515625]}, "gradients/encoder.encoder.layers.18.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 1.0, 2.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 4.0, 3.0, 4.0, 7.0, 4.0, 17.0, 22.0, 23.0, 27.0, 61.0, 97.0, 137.0, 208.0, 388.0, 754.0, 1554.0, 3561.0, 10290.0, 44229.0, 713450.0, 233285.0, 27357.0, 7463.0, 2860.0, 1256.0, 621.0, 349.0, 177.0, 118.0, 75.0, 57.0, 27.0, 23.0, 16.0, 8.0, 8.0, 4.0, 3.0, 4.0, 3.0, 3.0, 1.0, 1.0, 0.0, 0.0, 2.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0006628036499023438, -0.0006413236260414124, -0.000619843602180481, -0.0005983635783195496, -0.0005768835544586182, -0.0005554035305976868, -0.0005339235067367554, -0.000512443482875824, -0.0004909634590148926, -0.0004694834351539612, -0.0004480034112930298, -0.0004265233874320984, -0.000405043363571167, -0.0003835633397102356, -0.0003620833158493042, -0.0003406032919883728, -0.0003191232681274414, -0.00029764324426651, -0.0002761632204055786, -0.0002546831965446472, -0.00023320317268371582, -0.00021172314882278442, -0.00019024312496185303, -0.00016876310110092163, -0.00014728307723999023, -0.00012580305337905884, -0.00010432302951812744, -8.284300565719604e-05, -6.136298179626465e-05, -3.988295793533325e-05, -1.8402934074401855e-05, 3.077089786529541e-06, 2.4557113647460938e-05, 4.6037137508392334e-05, 6.751716136932373e-05, 8.899718523025513e-05, 0.00011047720909118652, 0.00013195723295211792, 0.00015343725681304932, 0.0001749172806739807, 0.0001963973045349121, 0.0002178773283958435, 0.0002393573522567749, 0.0002608373761177063, 0.0002823173999786377, 0.0003037974238395691, 0.0003252774477005005, 0.0003467574715614319, 0.0003682374954223633, 0.0003897175192832947, 0.0004111975431442261, 0.00043267756700515747, 0.00045415759086608887, 0.00047563761472702026, 0.0004971176385879517, 0.0005185976624488831, 0.0005400776863098145, 0.0005615577101707458, 0.0005830377340316772, 0.0006045177578926086, 0.00062599778175354, 0.0006474778056144714, 0.0006689578294754028, 0.0006904378533363342, 0.0007119178771972656]}, "gradients/encoder.encoder.layers.18.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 2.0, 1.0, 2.0, 0.0, 2.0, 3.0, 5.0, 4.0, 5.0, 11.0, 10.0, 16.0, 13.0, 20.0, 29.0, 38.0, 51.0, 71.0, 160.0, 164.0, 129.0, 80.0, 55.0, 34.0, 30.0, 17.0, 12.0, 9.0, 9.0, 9.0, 3.0, 6.0, 8.0, 3.0, 1.0, 1.0, 1.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.141164779663086e-05, -3.0411407351493835e-05, -2.941116690635681e-05, -2.8410926461219788e-05, -2.7410686016082764e-05, -2.641044557094574e-05, -2.5410205125808716e-05, -2.4409964680671692e-05, -2.3409724235534668e-05, -2.2409483790397644e-05, -2.140924334526062e-05, -2.0409002900123596e-05, -1.9408762454986572e-05, -1.840852200984955e-05, -1.7408281564712524e-05, -1.64080411195755e-05, -1.5407800674438477e-05, -1.4407560229301453e-05, -1.3407319784164429e-05, -1.2407079339027405e-05, -1.1406838893890381e-05, -1.0406598448753357e-05, -9.406358003616333e-06, -8.406117558479309e-06, -7.405877113342285e-06, -6.405636668205261e-06, -5.405396223068237e-06, -4.405155777931213e-06, -3.4049153327941895e-06, -2.4046748876571655e-06, -1.4044344425201416e-06, -4.041939973831177e-07, 5.960464477539062e-07, 1.5962868928909302e-06, 2.596527338027954e-06, 3.596767783164978e-06, 4.597008228302002e-06, 5.597248673439026e-06, 6.59748911857605e-06, 7.597729563713074e-06, 8.597970008850098e-06, 9.598210453987122e-06, 1.0598450899124146e-05, 1.159869134426117e-05, 1.2598931789398193e-05, 1.3599172234535217e-05, 1.4599412679672241e-05, 1.5599653124809265e-05, 1.659989356994629e-05, 1.7600134015083313e-05, 1.8600374460220337e-05, 1.960061490535736e-05, 2.0600855350494385e-05, 2.160109579563141e-05, 2.2601336240768433e-05, 2.3601576685905457e-05, 2.460181713104248e-05, 2.5602057576179504e-05, 2.660229802131653e-05, 2.7602538466453552e-05, 2.8602778911590576e-05, 2.96030193567276e-05, 3.0603259801864624e-05, 3.160350024700165e-05, 3.260374069213867e-05]}, "gradients/encoder.encoder.layers.18.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 2.0, 5.0, 5.0, 2.0, 5.0, 8.0, 17.0, 17.0, 32.0, 38.0, 57.0, 93.0, 172.0, 265.0, 513.0, 1108.0, 3263.0, 14200.0, 141803.0, 836419.0, 40320.0, 6685.0, 1838.0, 755.0, 362.0, 206.0, 128.0, 74.0, 53.0, 39.0, 27.0, 15.0, 14.0, 7.0, 7.0, 3.0, 4.0, 4.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0007157325744628906, -0.0006920844316482544, -0.0006684362888336182, -0.0006447881460189819, -0.0006211400032043457, -0.0005974918603897095, -0.0005738437175750732, -0.000550195574760437, -0.0005265474319458008, -0.0005028992891311646, -0.0004792511463165283, -0.0004556030035018921, -0.00043195486068725586, -0.00040830671787261963, -0.0003846585750579834, -0.00036101043224334717, -0.00033736228942871094, -0.0003137141466140747, -0.0002900660037994385, -0.00026641786098480225, -0.00024276971817016602, -0.00021912157535552979, -0.00019547343254089355, -0.00017182528972625732, -0.0001481771469116211, -0.00012452900409698486, -0.00010088086128234863, -7.72327184677124e-05, -5.358457565307617e-05, -2.993643283843994e-05, -6.288290023803711e-06, 1.735985279083252e-05, 4.100799560546875e-05, 6.465613842010498e-05, 8.830428123474121e-05, 0.00011195242404937744, 0.00013560056686401367, 0.0001592487096786499, 0.00018289685249328613, 0.00020654499530792236, 0.0002301931381225586, 0.0002538412809371948, 0.00027748942375183105, 0.0003011375665664673, 0.0003247857093811035, 0.00034843385219573975, 0.000372081995010376, 0.0003957301378250122, 0.00041937828063964844, 0.00044302642345428467, 0.0004666745662689209, 0.0004903227090835571, 0.0005139708518981934, 0.0005376189947128296, 0.0005612671375274658, 0.000584915280342102, 0.0006085634231567383, 0.0006322115659713745, 0.0006558597087860107, 0.000679507851600647, 0.0007031559944152832, 0.0007268041372299194, 0.0007504522800445557, 0.0007741004228591919, 0.0007977485656738281]}, "gradients/encoder.encoder.layers.18.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 3.0, 2.0, 4.0, 14.0, 17.0, 24.0, 40.0, 54.0, 96.0, 144.0, 184.0, 160.0, 83.0, 57.0, 51.0, 25.0, 18.0, 12.0, 10.0, 4.0, 4.0, 0.0, 2.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.0004892349243164062, -0.00047710537910461426, -0.00046497583389282227, -0.0004528462886810303, -0.0004407167434692383, -0.0004285871982574463, -0.0004164576530456543, -0.0004043281078338623, -0.0003921985626220703, -0.0003800690174102783, -0.00036793947219848633, -0.00035580992698669434, -0.00034368038177490234, -0.00033155083656311035, -0.00031942129135131836, -0.00030729174613952637, -0.0002951622009277344, -0.0002830326557159424, -0.0002709031105041504, -0.0002587735652923584, -0.0002466440200805664, -0.00023451447486877441, -0.00022238492965698242, -0.00021025538444519043, -0.00019812583923339844, -0.00018599629402160645, -0.00017386674880981445, -0.00016173720359802246, -0.00014960765838623047, -0.00013747811317443848, -0.00012534856796264648, -0.00011321902275085449, -0.0001010894775390625, -8.895993232727051e-05, -7.683038711547852e-05, -6.470084190368652e-05, -5.257129669189453e-05, -4.044175148010254e-05, -2.8312206268310547e-05, -1.6182661056518555e-05, -4.0531158447265625e-06, 8.07642936706543e-06, 2.0205974578857422e-05, 3.2335519790649414e-05, 4.4465065002441406e-05, 5.65946102142334e-05, 6.872415542602539e-05, 8.085370063781738e-05, 9.298324584960938e-05, 0.00010511279106140137, 0.00011724233627319336, 0.00012937188148498535, 0.00014150142669677734, 0.00015363097190856934, 0.00016576051712036133, 0.00017789006233215332, 0.0001900196075439453, 0.0002021491527557373, 0.0002142786979675293, 0.0002264082431793213, 0.00023853778839111328, 0.0002506673336029053, 0.00026279687881469727, 0.00027492642402648926, 0.00028705596923828125]}, "gradients/encoder.encoder.layers.18.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 5.0, 7.0, 12.0, 22.0, 52.0, 113.0, 399.0, 197.0, 83.0, 48.0, 25.0, 15.0, 15.0, 10.0, 4.0, 4.0, 1.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.47009068727493286, -0.45615702867507935, -0.44222334027290344, -0.42828965187072754, -0.414355993270874, -0.4004223346710205, -0.3864886462688446, -0.3725549578666687, -0.3586212992668152, -0.34468764066696167, -0.33075395226478577, -0.31682026386260986, -0.30288660526275635, -0.28895294666290283, -0.27501925826072693, -0.261085569858551, -0.2471519112586975, -0.2332182377576828, -0.2192845642566681, -0.20535089075565338, -0.19141721725463867, -0.17748354375362396, -0.16354987025260925, -0.14961619675159454, -0.13568252325057983, -0.12174884974956512, -0.10781517624855042, -0.0938815027475357, -0.079947829246521, -0.06601415574550629, -0.05208048224449158, -0.03814680874347687, -0.024213165044784546, -0.010279491543769836, 0.003654181957244873, 0.017587855458259583, 0.03152152895927429, 0.045455202460289, 0.05938887596130371, 0.07332254946231842, 0.08725622296333313, 0.10118989646434784, 0.11512356996536255, 0.12905724346637726, 0.14299091696739197, 0.15692459046840668, 0.1708582639694214, 0.1847919374704361, 0.1987256109714508, 0.21265928447246552, 0.22659295797348022, 0.24052663147449493, 0.25446030497550964, 0.26839399337768555, 0.28232765197753906, 0.2962613105773926, 0.3101949989795685, 0.3241286873817444, 0.3380623459815979, 0.3519960045814514, 0.3659296929836273, 0.3798633813858032, 0.39379703998565674, 0.40773069858551025, 0.42166438698768616]}, "gradients/encoder.encoder.layers.18.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 3.0, 5.0, 1.0, 5.0, 12.0, 12.0, 10.0, 18.0, 13.0, 21.0, 24.0, 36.0, 46.0, 45.0, 54.0, 54.0, 63.0, 66.0, 59.0, 62.0, 52.0, 50.0, 52.0, 52.0, 38.0, 33.0, 36.0, 26.0, 17.0, 14.0, 6.0, 9.0, 8.0, 4.0, 1.0, 4.0, 2.0, 2.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.18377745151519775, -0.17875464260578156, -0.17373183369636536, -0.16870903968811035, -0.16368623077869415, -0.15866342186927795, -0.15364061295986176, -0.14861780405044556, -0.14359501004219055, -0.13857220113277435, -0.13354939222335815, -0.12852659821510315, -0.12350378930568695, -0.11848098039627075, -0.11345817148685455, -0.10843536257743835, -0.10341255366802216, -0.09838974475860596, -0.09336694329977036, -0.08834413439035416, -0.08332133293151855, -0.07829852402210236, -0.07327571511268616, -0.06825290620326996, -0.06323010474443436, -0.058207299560308456, -0.053184494376182556, -0.04816168546676636, -0.04313888028264046, -0.03811607509851456, -0.03309326618909836, -0.028070461004972458, -0.023047655820846558, -0.018024850636720657, -0.013002043589949608, -0.007979237474501133, -0.002956431359052658, 0.002066373825073242, 0.007089180871844292, 0.012111987918615341, 0.01713479310274124, 0.02215759828686714, 0.02718040533363819, 0.03220321238040924, 0.03722601756453514, 0.04224882274866104, 0.04727163165807724, 0.05229443684220314, 0.05731724202632904, 0.06234004721045494, 0.06736285239458084, 0.07238566130399704, 0.07740846276283264, 0.08243127167224884, 0.08745408058166504, 0.09247688949108124, 0.09749969094991684, 0.10252249985933304, 0.10754530131816864, 0.11256811022758484, 0.11759091913700104, 0.12261372059583664, 0.12763652205467224, 0.13265933096408844, 0.13768213987350464]}, "gradients/encoder.encoder.layers.17.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 2.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 3.0, 2.0, 1.0, 3.0, 2.0, 3.0, 5.0, 11.0, 5.0, 5.0, 6.0, 8.0, 7.0, 6.0, 7.0, 10.0, 11.0, 12.0, 15.0, 26.0, 19.0, 34.0, 51.0, 96.0, 176.0, 435.0, 972.0, 3148.0, 13576.0, 3647162.0, 509823.0, 13845.0, 3049.0, 963.0, 378.0, 187.0, 92.0, 61.0, 24.0, 20.0, 14.0, 5.0, 6.0, 4.0, 1.0, 2.0], "bins": [-0.2239990234375, -0.2193317413330078, -0.21466445922851562, -0.20999717712402344, -0.20532989501953125, -0.20066261291503906, -0.19599533081054688, -0.1913280487060547, -0.1866607666015625, -0.1819934844970703, -0.17732620239257812, -0.17265892028808594, -0.16799163818359375, -0.16332435607910156, -0.15865707397460938, -0.1539897918701172, -0.149322509765625, -0.1446552276611328, -0.13998794555664062, -0.13532066345214844, -0.13065338134765625, -0.12598609924316406, -0.12131881713867188, -0.11665153503417969, -0.1119842529296875, -0.10731697082519531, -0.10264968872070312, -0.09798240661621094, -0.09331512451171875, -0.08864784240722656, -0.08398056030273438, -0.07931327819824219, -0.07464599609375, -0.06997871398925781, -0.06531143188476562, -0.06064414978027344, -0.05597686767578125, -0.05130958557128906, -0.046642303466796875, -0.04197502136230469, -0.0373077392578125, -0.03264045715332031, -0.027973175048828125, -0.023305892944335938, -0.01863861083984375, -0.013971328735351562, -0.009304046630859375, -0.0046367645263671875, 3.0517578125e-05, 0.0046977996826171875, 0.009365081787109375, 0.014032363891601562, 0.01869964599609375, 0.023366928100585938, 0.028034210205078125, 0.03270149230957031, 0.0373687744140625, 0.04203605651855469, 0.046703338623046875, 0.05137062072753906, 0.05603790283203125, 0.06070518493652344, 0.06537246704101562, 0.07003974914550781, 0.07470703125]}, "gradients/encoder.encoder.layers.17.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 2.0, 4.0, 6.0, 11.0, 22.0, 35.0, 86.0, 146.0, 197.0, 177.0, 158.0, 84.0, 49.0, 16.0, 11.0, 5.0, 2.0, 2.0, 2.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0], "bins": [-0.0209197998046875, -0.020483970642089844, -0.020048141479492188, -0.01961231231689453, -0.019176483154296875, -0.01874065399169922, -0.018304824829101562, -0.017868995666503906, -0.01743316650390625, -0.016997337341308594, -0.016561508178710938, -0.01612567901611328, -0.015689849853515625, -0.015254020690917969, -0.014818191528320312, -0.014382362365722656, -0.013946533203125, -0.013510704040527344, -0.013074874877929688, -0.012639045715332031, -0.012203216552734375, -0.011767387390136719, -0.011331558227539062, -0.010895729064941406, -0.01045989990234375, -0.010024070739746094, -0.009588241577148438, -0.009152412414550781, -0.008716583251953125, -0.008280754089355469, -0.007844924926757812, -0.007409095764160156, -0.0069732666015625, -0.006537437438964844, -0.0061016082763671875, -0.005665779113769531, -0.005229949951171875, -0.004794120788574219, -0.0043582916259765625, -0.003922462463378906, -0.00348663330078125, -0.0030508041381835938, -0.0026149749755859375, -0.0021791458129882812, -0.001743316650390625, -0.0013074874877929688, -0.0008716583251953125, -0.00043582916259765625, 0.0, 0.00043582916259765625, 0.0008716583251953125, 0.0013074874877929688, 0.001743316650390625, 0.0021791458129882812, 0.0026149749755859375, 0.0030508041381835938, 0.00348663330078125, 0.003922462463378906, 0.0043582916259765625, 0.004794120788574219, 0.005229949951171875, 0.005665779113769531, 0.0061016082763671875, 0.006537437438964844, 0.0069732666015625]}, "gradients/encoder.encoder.layers.17.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 8.0, 12.0, 6.0, 7.0, 9.0, 16.0, 22.0, 29.0, 74.0, 101.0, 164.0, 250.0, 517.0, 944.0, 2029.0, 5988.0, 28885.0, 4015062.0, 120767.0, 13358.0, 3451.0, 1228.0, 599.0, 307.0, 156.0, 106.0, 66.0, 51.0, 33.0, 17.0, 12.0, 10.0, 2.0, 2.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.141845703125, -0.13788318634033203, -0.13392066955566406, -0.1299581527709961, -0.12599563598632812, -0.12203311920166016, -0.11807060241699219, -0.11410808563232422, -0.11014556884765625, -0.10618305206298828, -0.10222053527832031, -0.09825801849365234, -0.09429550170898438, -0.0903329849243164, -0.08637046813964844, -0.08240795135498047, -0.0784454345703125, -0.07448291778564453, -0.07052040100097656, -0.0665578842163086, -0.06259536743164062, -0.058632850646972656, -0.05467033386230469, -0.05070781707763672, -0.04674530029296875, -0.04278278350830078, -0.03882026672363281, -0.034857749938964844, -0.030895233154296875, -0.026932716369628906, -0.022970199584960938, -0.01900768280029297, -0.015045166015625, -0.011082649230957031, -0.0071201324462890625, -0.0031576156616210938, 0.000804901123046875, 0.004767417907714844, 0.008729934692382812, 0.012692451477050781, 0.01665496826171875, 0.02061748504638672, 0.024580001831054688, 0.028542518615722656, 0.032505035400390625, 0.036467552185058594, 0.04043006896972656, 0.04439258575439453, 0.0483551025390625, 0.05231761932373047, 0.05628013610839844, 0.060242652893066406, 0.06420516967773438, 0.06816768646240234, 0.07213020324707031, 0.07609272003173828, 0.08005523681640625, 0.08401775360107422, 0.08798027038574219, 0.09194278717041016, 0.09590530395507812, 0.0998678207397461, 0.10383033752441406, 0.10779285430908203, 0.11175537109375]}, "gradients/encoder.encoder.layers.17.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 2.0, 3.0, 3.0, 1.0, 1.0, 2.0, 3.0, 2.0, 4.0, 4.0, 11.0, 12.0, 24.0, 26.0, 29.0, 42.0, 70.0, 3283.0, 385.0, 55.0, 41.0, 29.0, 17.0, 14.0, 8.0, 7.0, 7.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.036346435546875, -0.035331010818481445, -0.03431558609008789, -0.033300161361694336, -0.03228473663330078, -0.03126931190490723, -0.030253887176513672, -0.029238462448120117, -0.028223037719726562, -0.027207612991333008, -0.026192188262939453, -0.0251767635345459, -0.024161338806152344, -0.02314591407775879, -0.022130489349365234, -0.02111506462097168, -0.020099639892578125, -0.01908421516418457, -0.018068790435791016, -0.01705336570739746, -0.016037940979003906, -0.015022516250610352, -0.014007091522216797, -0.012991666793823242, -0.011976242065429688, -0.010960817337036133, -0.009945392608642578, -0.008929967880249023, -0.007914543151855469, -0.006899118423461914, -0.005883693695068359, -0.004868268966674805, -0.00385284423828125, -0.0028374195098876953, -0.0018219947814941406, -0.0008065700531005859, 0.00020885467529296875, 0.0012242794036865234, 0.002239704132080078, 0.003255128860473633, 0.0042705535888671875, 0.005285978317260742, 0.006301403045654297, 0.0073168277740478516, 0.008332252502441406, 0.009347677230834961, 0.010363101959228516, 0.01137852668762207, 0.012393951416015625, 0.01340937614440918, 0.014424800872802734, 0.015440225601196289, 0.016455650329589844, 0.0174710750579834, 0.018486499786376953, 0.019501924514770508, 0.020517349243164062, 0.021532773971557617, 0.022548198699951172, 0.023563623428344727, 0.02457904815673828, 0.025594472885131836, 0.02660989761352539, 0.027625322341918945, 0.0286407470703125]}, "gradients/encoder.encoder.layers.17.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 8.0, 9.0, 5.0, 43.0, 226.0, 633.0, 67.0, 20.0, 5.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.8044624328613281, -0.7858608961105347, -0.7672593593597412, -0.7486578226089478, -0.7300562858581543, -0.7114547491073608, -0.6928532123565674, -0.6742516756057739, -0.6556501388549805, -0.637048602104187, -0.6184470653533936, -0.5998455286026001, -0.5812439918518066, -0.5626424551010132, -0.5440409183502197, -0.5254393815994263, -0.5068378448486328, -0.48823630809783936, -0.4696347713470459, -0.45103323459625244, -0.432431697845459, -0.4138301610946655, -0.39522862434387207, -0.3766270875930786, -0.3580254912376404, -0.3394239544868469, -0.32082241773605347, -0.30222088098526, -0.28361934423446655, -0.2650178074836731, -0.24641625583171844, -0.227814719080925, -0.20921319723129272, -0.19061166048049927, -0.1720101237297058, -0.15340858697891235, -0.1348070502281189, -0.11620550602674484, -0.09760396182537079, -0.07900242507457733, -0.060400888323783875, -0.04179935157299042, -0.023197811096906662, -0.0045962706208229065, 0.01400526612997055, 0.03260680288076401, 0.05120834708213806, 0.06980988383293152, 0.08841142058372498, 0.10701295733451843, 0.1256144940853119, 0.14421603083610535, 0.1628175675868988, 0.18141910433769226, 0.2000206559896469, 0.21862219274044037, 0.23722372949123383, 0.2558252811431885, 0.27442681789398193, 0.2930283546447754, 0.31162989139556885, 0.3302314281463623, 0.34883296489715576, 0.3674345016479492, 0.3860360383987427]}, "gradients/encoder.encoder.layers.17.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 3.0, 3.0, 6.0, 13.0, 15.0, 21.0, 31.0, 47.0, 100.0, 103.0, 101.0, 93.0, 101.0, 104.0, 98.0, 56.0, 40.0, 30.0, 16.0, 12.0, 9.0, 3.0, 3.0, 2.0, 2.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.15818160772323608, -0.1537822037935257, -0.1493828147649765, -0.1449834108352661, -0.14058400690555573, -0.13618460297584534, -0.13178521394729614, -0.12738581001758575, -0.12298641353845596, -0.11858701705932617, -0.11418761312961578, -0.10978821665048599, -0.1053888201713562, -0.10098941624164581, -0.09659001976251602, -0.09219062328338623, -0.08779121935367584, -0.08339182287454605, -0.07899241894483566, -0.07459302246570587, -0.07019361853599548, -0.06579422205686569, -0.0613948255777359, -0.05699542537331581, -0.05259602516889572, -0.04819662496447563, -0.04379722476005554, -0.03939782828092575, -0.03499842807650566, -0.03059902787208557, -0.02619962953031063, -0.02180023118853569, -0.0174008309841156, -0.013001431711018085, -0.00860203243792057, -0.004202633164823055, 0.00019676610827445984, 0.0045961663126945496, 0.00899556465446949, 0.01339496299624443, 0.01779436320066452, 0.02219376340508461, 0.02659316174685955, 0.03099256008863449, 0.03539196029305458, 0.03979136049747467, 0.04419075697660446, 0.04859015718102455, 0.05298955738544464, 0.05738895758986473, 0.06178835779428482, 0.06618775427341461, 0.070587158203125, 0.07498655468225479, 0.07938595116138458, 0.08378535509109497, 0.08818475157022476, 0.09258414804935455, 0.09698355197906494, 0.10138294845819473, 0.10578234493732452, 0.11018174886703491, 0.1145811453461647, 0.1189805418252945, 0.12337994575500488]}, "gradients/encoder.encoder.layers.17.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 7.0, 4.0, 8.0, 6.0, 8.0, 8.0, 8.0, 8.0, 14.0, 15.0, 17.0, 20.0, 29.0, 31.0, 30.0, 58.0, 95.0, 182.0, 401.0, 1111.0, 3604.0, 18781.0, 191196.0, 772612.0, 49517.0, 7563.0, 1871.0, 611.0, 279.0, 131.0, 70.0, 43.0, 28.0, 32.0, 30.0, 13.0, 21.0, 16.0, 18.0, 12.0, 8.0, 13.0, 9.0, 6.0, 3.0, 6.0, 3.0, 3.0, 2.0, 0.0, 3.0, 2.0, 0.0, 2.0, 1.0], "bins": [-0.09942626953125, -0.09634208679199219, -0.09325790405273438, -0.09017372131347656, -0.08708953857421875, -0.08400535583496094, -0.08092117309570312, -0.07783699035644531, -0.0747528076171875, -0.07166862487792969, -0.06858444213867188, -0.06550025939941406, -0.06241607666015625, -0.05933189392089844, -0.056247711181640625, -0.05316352844238281, -0.050079345703125, -0.04699516296386719, -0.043910980224609375, -0.04082679748535156, -0.03774261474609375, -0.03465843200683594, -0.031574249267578125, -0.028490066528320312, -0.0254058837890625, -0.022321701049804688, -0.019237518310546875, -0.016153335571289062, -0.01306915283203125, -0.009984970092773438, -0.006900787353515625, -0.0038166046142578125, -0.000732421875, 0.0023517608642578125, 0.005435943603515625, 0.008520126342773438, 0.01160430908203125, 0.014688491821289062, 0.017772674560546875, 0.020856857299804688, 0.0239410400390625, 0.027025222778320312, 0.030109405517578125, 0.03319358825683594, 0.03627777099609375, 0.03936195373535156, 0.042446136474609375, 0.04553031921386719, 0.048614501953125, 0.05169868469238281, 0.054782867431640625, 0.05786705017089844, 0.06095123291015625, 0.06403541564941406, 0.06711959838867188, 0.07020378112792969, 0.0732879638671875, 0.07637214660644531, 0.07945632934570312, 0.08254051208496094, 0.08562469482421875, 0.08870887756347656, 0.09179306030273438, 0.09487724304199219, 0.09796142578125]}, "gradients/encoder.encoder.layers.17.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 3.0, 6.0, 5.0, 7.0, 22.0, 27.0, 78.0, 120.0, 179.0, 188.0, 161.0, 101.0, 63.0, 25.0, 18.0, 5.0, 3.0, 2.0, 2.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.01995849609375, -0.019536495208740234, -0.01911449432373047, -0.018692493438720703, -0.018270492553710938, -0.017848491668701172, -0.017426490783691406, -0.01700448989868164, -0.016582489013671875, -0.01616048812866211, -0.015738487243652344, -0.015316486358642578, -0.014894485473632812, -0.014472484588623047, -0.014050483703613281, -0.013628482818603516, -0.01320648193359375, -0.012784481048583984, -0.012362480163574219, -0.011940479278564453, -0.011518478393554688, -0.011096477508544922, -0.010674476623535156, -0.01025247573852539, -0.009830474853515625, -0.00940847396850586, -0.008986473083496094, -0.008564472198486328, -0.008142471313476562, -0.007720470428466797, -0.007298469543457031, -0.006876468658447266, -0.0064544677734375, -0.006032466888427734, -0.005610466003417969, -0.005188465118408203, -0.0047664642333984375, -0.004344463348388672, -0.003922462463378906, -0.0035004615783691406, -0.003078460693359375, -0.0026564598083496094, -0.0022344589233398438, -0.0018124580383300781, -0.0013904571533203125, -0.0009684562683105469, -0.0005464553833007812, -0.00012445449829101562, 0.00029754638671875, 0.0007195472717285156, 0.0011415481567382812, 0.0015635490417480469, 0.0019855499267578125, 0.002407550811767578, 0.0028295516967773438, 0.0032515525817871094, 0.003673553466796875, 0.004095554351806641, 0.004517555236816406, 0.004939556121826172, 0.0053615570068359375, 0.005783557891845703, 0.006205558776855469, 0.006627559661865234, 0.007049560546875]}, "gradients/encoder.encoder.layers.17.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 3.0, 0.0, 1.0, 2.0, 3.0, 2.0, 6.0, 8.0, 13.0, 17.0, 26.0, 34.0, 49.0, 78.0, 124.0, 176.0, 316.0, 565.0, 903.0, 1489.0, 2551.0, 4677.0, 8808.0, 16884.0, 33702.0, 72401.0, 176101.0, 449428.0, 151829.0, 64655.0, 30520.0, 15069.0, 7986.0, 4299.0, 2411.0, 1381.0, 780.0, 466.0, 305.0, 188.0, 110.0, 67.0, 38.0, 30.0, 17.0, 14.0, 6.0, 9.0, 7.0, 5.0, 3.0, 2.0, 5.0, 1.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.02862548828125, -0.027652740478515625, -0.02667999267578125, -0.025707244873046875, -0.0247344970703125, -0.023761749267578125, -0.02278900146484375, -0.021816253662109375, -0.020843505859375, -0.019870758056640625, -0.01889801025390625, -0.017925262451171875, -0.0169525146484375, -0.015979766845703125, -0.01500701904296875, -0.014034271240234375, -0.0130615234375, -0.012088775634765625, -0.01111602783203125, -0.010143280029296875, -0.0091705322265625, -0.008197784423828125, -0.00722503662109375, -0.006252288818359375, -0.005279541015625, -0.004306793212890625, -0.00333404541015625, -0.002361297607421875, -0.0013885498046875, -0.000415802001953125, 0.00055694580078125, 0.001529693603515625, 0.00250244140625, 0.003475189208984375, 0.00444793701171875, 0.005420684814453125, 0.0063934326171875, 0.007366180419921875, 0.00833892822265625, 0.009311676025390625, 0.010284423828125, 0.011257171630859375, 0.01222991943359375, 0.013202667236328125, 0.0141754150390625, 0.015148162841796875, 0.01612091064453125, 0.017093658447265625, 0.01806640625, 0.019039154052734375, 0.02001190185546875, 0.020984649658203125, 0.0219573974609375, 0.022930145263671875, 0.02390289306640625, 0.024875640869140625, 0.025848388671875, 0.026821136474609375, 0.02779388427734375, 0.028766632080078125, 0.0297393798828125, 0.030712127685546875, 0.03168487548828125, 0.032657623291015625, 0.03363037109375]}, "gradients/encoder.encoder.layers.17.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 3.0, 0.0, 0.0, 2.0, 2.0, 2.0, 3.0, 5.0, 7.0, 9.0, 8.0, 14.0, 8.0, 12.0, 23.0, 25.0, 23.0, 28.0, 24.0, 32.0, 34.0, 27.0, 49.0, 36.0, 39.0, 30.0, 37.0, 44.0, 49.0, 40.0, 34.0, 41.0, 29.0, 37.0, 36.0, 26.0, 29.0, 29.0, 16.0, 27.0, 17.0, 19.0, 7.0, 13.0, 7.0, 5.0, 5.0, 5.0, 6.0, 4.0, 3.0, 2.0, 5.0, 1.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0186920166015625, -0.018056392669677734, -0.01742076873779297, -0.016785144805908203, -0.016149520874023438, -0.015513896942138672, -0.014878273010253906, -0.01424264907836914, -0.013607025146484375, -0.01297140121459961, -0.012335777282714844, -0.011700153350830078, -0.011064529418945312, -0.010428905487060547, -0.009793281555175781, -0.009157657623291016, -0.00852203369140625, -0.007886409759521484, -0.007250785827636719, -0.006615161895751953, -0.0059795379638671875, -0.005343914031982422, -0.004708290100097656, -0.004072666168212891, -0.003437042236328125, -0.0028014183044433594, -0.0021657943725585938, -0.0015301704406738281, -0.0008945465087890625, -0.0002589225769042969, 0.00037670135498046875, 0.0010123252868652344, 0.00164794921875, 0.0022835731506347656, 0.0029191970825195312, 0.003554821014404297, 0.0041904449462890625, 0.004826068878173828, 0.005461692810058594, 0.006097316741943359, 0.006732940673828125, 0.007368564605712891, 0.008004188537597656, 0.008639812469482422, 0.009275436401367188, 0.009911060333251953, 0.010546684265136719, 0.011182308197021484, 0.01181793212890625, 0.012453556060791016, 0.013089179992675781, 0.013724803924560547, 0.014360427856445312, 0.014996051788330078, 0.015631675720214844, 0.01626729965209961, 0.016902923583984375, 0.01753854751586914, 0.018174171447753906, 0.018809795379638672, 0.019445419311523438, 0.020081043243408203, 0.02071666717529297, 0.021352291107177734, 0.0219879150390625]}, "gradients/encoder.encoder.layers.17.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 2.0, 5.0, 2.0, 10.0, 17.0, 25.0, 35.0, 60.0, 99.0, 179.0, 331.0, 632.0, 1284.0, 2787.0, 6795.0, 20071.0, 96606.0, 770329.0, 113529.0, 22623.0, 7361.0, 2926.0, 1364.0, 666.0, 359.0, 171.0, 102.0, 58.0, 29.0, 28.0, 19.0, 8.0, 7.0, 7.0, 5.0, 2.0, 2.0, 4.0, 5.0, 0.0, 2.0, 0.0, 3.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0005059242248535156, -0.0004898831248283386, -0.0004738420248031616, -0.0004578009247779846, -0.0004417598247528076, -0.0004257187247276306, -0.0004096776247024536, -0.0003936365246772766, -0.0003775954246520996, -0.0003615543246269226, -0.0003455132246017456, -0.0003294721245765686, -0.0003134310245513916, -0.0002973899245262146, -0.0002813488245010376, -0.0002653077244758606, -0.0002492666244506836, -0.0002332255244255066, -0.0002171844244003296, -0.0002011433243751526, -0.00018510222434997559, -0.00016906112432479858, -0.00015302002429962158, -0.00013697892427444458, -0.00012093782424926758, -0.00010489672422409058, -8.885562419891357e-05, -7.281452417373657e-05, -5.677342414855957e-05, -4.073232412338257e-05, -2.4691224098205566e-05, -8.650124073028564e-06, 7.3909759521484375e-06, 2.343207597732544e-05, 3.947317600250244e-05, 5.551427602767944e-05, 7.155537605285645e-05, 8.759647607803345e-05, 0.00010363757610321045, 0.00011967867612838745, 0.00013571977615356445, 0.00015176087617874146, 0.00016780197620391846, 0.00018384307622909546, 0.00019988417625427246, 0.00021592527627944946, 0.00023196637630462646, 0.00024800747632980347, 0.00026404857635498047, 0.00028008967638015747, 0.00029613077640533447, 0.0003121718764305115, 0.0003282129764556885, 0.0003442540764808655, 0.0003602951765060425, 0.0003763362765312195, 0.0003923773765563965, 0.0004084184765815735, 0.0004244595766067505, 0.0004405006766319275, 0.0004565417766571045, 0.0004725828766822815, 0.0004886239767074585, 0.0005046650767326355, 0.0005207061767578125]}, "gradients/encoder.encoder.layers.17.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 4.0, 4.0, 7.0, 12.0, 10.0, 15.0, 12.0, 31.0, 35.0, 69.0, 111.0, 221.0, 197.0, 97.0, 76.0, 45.0, 32.0, 8.0, 10.0, 11.0, 5.0, 2.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-6.67572021484375e-05, -6.513018161058426e-05, -6.350316107273102e-05, -6.187614053487778e-05, -6.0249119997024536e-05, -5.8622099459171295e-05, -5.6995078921318054e-05, -5.536805838346481e-05, -5.374103784561157e-05, -5.211401730775833e-05, -5.048699676990509e-05, -4.885997623205185e-05, -4.723295569419861e-05, -4.560593515634537e-05, -4.3978914618492126e-05, -4.2351894080638885e-05, -4.0724873542785645e-05, -3.9097853004932404e-05, -3.747083246707916e-05, -3.584381192922592e-05, -3.421679139137268e-05, -3.258977085351944e-05, -3.09627503156662e-05, -2.9335729777812958e-05, -2.7708709239959717e-05, -2.6081688702106476e-05, -2.4454668164253235e-05, -2.2827647626399994e-05, -2.1200627088546753e-05, -1.9573606550693512e-05, -1.794658601284027e-05, -1.631956547498703e-05, -1.4692544937133789e-05, -1.3065524399280548e-05, -1.1438503861427307e-05, -9.811483323574066e-06, -8.184462785720825e-06, -6.557442247867584e-06, -4.930421710014343e-06, -3.3034011721611023e-06, -1.6763806343078613e-06, -4.936009645462036e-08, 1.5776604413986206e-06, 3.2046809792518616e-06, 4.8317015171051025e-06, 6.4587220549583435e-06, 8.085742592811584e-06, 9.712763130664825e-06, 1.1339783668518066e-05, 1.2966804206371307e-05, 1.4593824744224548e-05, 1.622084528207779e-05, 1.784786581993103e-05, 1.947488635778427e-05, 2.1101906895637512e-05, 2.2728927433490753e-05, 2.4355947971343994e-05, 2.5982968509197235e-05, 2.7609989047050476e-05, 2.9237009584903717e-05, 3.086403012275696e-05, 3.24910506606102e-05, 3.411807119846344e-05, 3.574509173631668e-05, 3.737211227416992e-05]}, "gradients/encoder.encoder.layers.17.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 2.0, 3.0, 2.0, 2.0, 10.0, 15.0, 9.0, 8.0, 22.0, 37.0, 64.0, 86.0, 146.0, 314.0, 570.0, 1255.0, 3260.0, 9901.0, 40275.0, 489860.0, 447760.0, 39417.0, 9807.0, 3232.0, 1271.0, 531.0, 288.0, 151.0, 115.0, 43.0, 36.0, 17.0, 18.0, 12.0, 9.0, 7.0, 5.0, 3.0, 2.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00043654441833496094, -0.00042037293314933777, -0.0004042014479637146, -0.00038802996277809143, -0.00037185847759246826, -0.0003556869924068451, -0.0003395155072212219, -0.00032334402203559875, -0.0003071725368499756, -0.0002910010516643524, -0.00027482956647872925, -0.0002586580812931061, -0.0002424865961074829, -0.00022631511092185974, -0.00021014362573623657, -0.0001939721405506134, -0.00017780065536499023, -0.00016162917017936707, -0.0001454576849937439, -0.00012928619980812073, -0.00011311471462249756, -9.694322943687439e-05, -8.077174425125122e-05, -6.460025906562805e-05, -4.842877388000488e-05, -3.2257288694381714e-05, -1.6085803508758545e-05, 8.568167686462402e-08, 1.6257166862487793e-05, 3.242865204811096e-05, 4.860013723373413e-05, 6.47716224193573e-05, 8.094310760498047e-05, 9.711459279060364e-05, 0.0001132860779762268, 0.00012945756316184998, 0.00014562904834747314, 0.0001618005335330963, 0.00017797201871871948, 0.00019414350390434265, 0.00021031498908996582, 0.000226486474275589, 0.00024265795946121216, 0.00025882944464683533, 0.0002750009298324585, 0.00029117241501808167, 0.00030734390020370483, 0.000323515385389328, 0.00033968687057495117, 0.00035585835576057434, 0.0003720298409461975, 0.0003882013261318207, 0.00040437281131744385, 0.000420544296503067, 0.0004367157816886902, 0.00045288726687431335, 0.0004690587520599365, 0.0004852302372455597, 0.0005014017224311829, 0.000517573207616806, 0.0005337446928024292, 0.0005499161779880524, 0.0005660876631736755, 0.0005822591483592987, 0.0005984306335449219]}, "gradients/encoder.encoder.layers.17.attention.q_proj.bias": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 4.0, 6.0, 9.0, 7.0, 20.0, 20.0, 27.0, 32.0, 34.0, 72.0, 115.0, 142.0, 145.0, 101.0, 82.0, 66.0, 40.0, 21.0, 21.0, 12.0, 6.0, 12.0, 5.0, 3.0, 3.0, 1.0, 5.0, 0.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00022268295288085938, -0.00021306052803993225, -0.00020343810319900513, -0.000193815678358078, -0.00018419325351715088, -0.00017457082867622375, -0.00016494840383529663, -0.0001553259789943695, -0.00014570355415344238, -0.00013608112931251526, -0.00012645870447158813, -0.00011683627963066101, -0.00010721385478973389, -9.759142994880676e-05, -8.796900510787964e-05, -7.834658026695251e-05, -6.872415542602539e-05, -5.9101730585098267e-05, -4.947930574417114e-05, -3.985688090324402e-05, -3.0234456062316895e-05, -2.061203122138977e-05, -1.0989606380462646e-05, -1.3671815395355225e-06, 8.255243301391602e-06, 1.7877668142318726e-05, 2.750009298324585e-05, 3.7122517824172974e-05, 4.67449426651001e-05, 5.636736750602722e-05, 6.598979234695435e-05, 7.561221718788147e-05, 8.52346420288086e-05, 9.485706686973572e-05, 0.00010447949171066284, 0.00011410191655158997, 0.0001237243413925171, 0.00013334676623344421, 0.00014296919107437134, 0.00015259161591529846, 0.00016221404075622559, 0.0001718364655971527, 0.00018145889043807983, 0.00019108131527900696, 0.00020070374011993408, 0.0002103261649608612, 0.00021994858980178833, 0.00022957101464271545, 0.00023919343948364258, 0.0002488158643245697, 0.0002584382891654968, 0.00026806071400642395, 0.0002776831388473511, 0.0002873055636882782, 0.0002969279885292053, 0.00030655041337013245, 0.00031617283821105957, 0.0003257952630519867, 0.0003354176878929138, 0.00034504011273384094, 0.00035466253757476807, 0.0003642849624156952, 0.0003739073872566223, 0.00038352981209754944, 0.00039315223693847656]}, "gradients/encoder.encoder.layers.17.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 4.0, 7.0, 24.0, 90.0, 562.0, 202.0, 70.0, 35.0, 17.0, 6.0, 1.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.5318701267242432, -0.5061094760894775, -0.4803488254547119, -0.4545882046222687, -0.42882755398750305, -0.4030669033527374, -0.3773062825202942, -0.35154563188552856, -0.32578498125076294, -0.3000243306159973, -0.2742636799812317, -0.24850305914878845, -0.22274240851402283, -0.1969817578792572, -0.17122112214565277, -0.14546048641204834, -0.11969983577728271, -0.09393919259309769, -0.06817854940891266, -0.04241790622472763, -0.016657263040542603, 0.009103387594223022, 0.034864023327827454, 0.060624659061431885, 0.08638530969619751, 0.11214595288038254, 0.13790659606456757, 0.163667231798172, 0.18942788243293762, 0.21518853306770325, 0.24094916880130768, 0.2667098045349121, 0.29247045516967773, 0.31823110580444336, 0.343991756439209, 0.3697523772716522, 0.39551302790641785, 0.42127367854118347, 0.4470342993736267, 0.47279495000839233, 0.49855560064315796, 0.5243162512779236, 0.5500769019126892, 0.5758375525474548, 0.6015981435775757, 0.6273587942123413, 0.6531194448471069, 0.6788800954818726, 0.7046407461166382, 0.7304013967514038, 0.7561620473861694, 0.7819226980209351, 0.8076833486557007, 0.8334439992904663, 0.8592045903205872, 0.8849652409553528, 0.9107258915901184, 0.936486542224884, 0.9622471928596497, 0.9880078434944153, 1.0137684345245361, 1.0395290851593018, 1.0652897357940674, 1.091050386428833, 1.1168110370635986]}, "gradients/encoder.encoder.layers.17.layer_norm.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 1.0, 0.0, 2.0, 2.0, 2.0, 6.0, 5.0, 10.0, 13.0, 21.0, 24.0, 33.0, 37.0, 51.0, 51.0, 62.0, 53.0, 74.0, 81.0, 66.0, 70.0, 70.0, 58.0, 48.0, 43.0, 38.0, 38.0, 17.0, 12.0, 16.0, 8.0, 3.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.1306578516960144, -0.12466008216142654, -0.11866230517625809, -0.11266453564167023, -0.10666675865650177, -0.10066898912191391, -0.09467121958732605, -0.08867344260215759, -0.08267567306756973, -0.07667790353298187, -0.07068012654781342, -0.06468235701322556, -0.0586845837533474, -0.05268681049346924, -0.04668904095888138, -0.04069126769900322, -0.03469349443912506, -0.028695721179246902, -0.022697949782013893, -0.016700178384780884, -0.010702405124902725, -0.004704631865024567, 0.0012931376695632935, 0.007290910929441452, 0.01328868418931961, 0.01928645744919777, 0.02528422884643078, 0.03128200024366379, 0.037279773503541946, 0.043277546763420105, 0.049275316298007965, 0.055273089557886124, 0.06127086281776428, 0.06726863235235214, 0.0732664093375206, 0.07926417887210846, 0.08526195585727692, 0.09125972539186478, 0.09725749492645264, 0.1032552719116211, 0.10925304144620895, 0.11525081098079681, 0.12124858796596527, 0.12724635004997253, 0.133244127035141, 0.13924190402030945, 0.1452396810054779, 0.15123744308948517, 0.15723522007465363, 0.16323299705982208, 0.16923075914382935, 0.1752285361289978, 0.18122631311416626, 0.18722409009933472, 0.19322185218334198, 0.19921962916851044, 0.2052173912525177, 0.21121516823768616, 0.21721293032169342, 0.22321070730686188, 0.22920848429203033, 0.2352062463760376, 0.24120402336120605, 0.2472018003463745, 0.25319957733154297]}, "gradients/encoder.encoder.layers.16.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 2.0, 2.0, 2.0, 5.0, 4.0, 6.0, 3.0, 3.0, 4.0, 4.0, 3.0, 6.0, 2.0, 4.0, 7.0, 3.0, 19.0, 17.0, 29.0, 55.0, 99.0, 270.0, 561.0, 1694.0, 5927.0, 46566.0, 4123963.0, 11067.0, 2600.0, 730.0, 300.0, 161.0, 72.0, 43.0, 23.0, 15.0, 8.0, 4.0, 5.0, 1.0, 3.0, 1.0, 1.0], "bins": [-0.1563720703125, -0.15305709838867188, -0.14974212646484375, -0.14642715454101562, -0.1431121826171875, -0.13979721069335938, -0.13648223876953125, -0.13316726684570312, -0.129852294921875, -0.12653732299804688, -0.12322235107421875, -0.11990737915039062, -0.1165924072265625, -0.11327743530273438, -0.10996246337890625, -0.10664749145507812, -0.10333251953125, -0.10001754760742188, -0.09670257568359375, -0.09338760375976562, -0.0900726318359375, -0.08675765991210938, -0.08344268798828125, -0.08012771606445312, -0.076812744140625, -0.07349777221679688, -0.07018280029296875, -0.06686782836914062, -0.0635528564453125, -0.060237884521484375, -0.05692291259765625, -0.053607940673828125, -0.05029296875, -0.046977996826171875, -0.04366302490234375, -0.040348052978515625, -0.0370330810546875, -0.033718109130859375, -0.03040313720703125, -0.027088165283203125, -0.023773193359375, -0.020458221435546875, -0.01714324951171875, -0.013828277587890625, -0.0105133056640625, -0.007198333740234375, -0.00388336181640625, -0.000568389892578125, 0.00274658203125, 0.006061553955078125, 0.00937652587890625, 0.012691497802734375, 0.0160064697265625, 0.019321441650390625, 0.02263641357421875, 0.025951385498046875, 0.029266357421875, 0.032581329345703125, 0.03589630126953125, 0.039211273193359375, 0.0425262451171875, 0.045841217041015625, 0.04915618896484375, 0.052471160888671875, 0.0557861328125]}, "gradients/encoder.encoder.layers.16.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 3.0, 7.0, 3.0, 12.0, 26.0, 33.0, 78.0, 130.0, 177.0, 166.0, 155.0, 101.0, 59.0, 29.0, 20.0, 8.0, 3.0, 1.0, 1.0, 3.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-0.018798828125, -0.01840066909790039, -0.01800251007080078, -0.017604351043701172, -0.017206192016601562, -0.016808032989501953, -0.016409873962402344, -0.016011714935302734, -0.015613555908203125, -0.015215396881103516, -0.014817237854003906, -0.014419078826904297, -0.014020919799804688, -0.013622760772705078, -0.013224601745605469, -0.01282644271850586, -0.01242828369140625, -0.01203012466430664, -0.011631965637207031, -0.011233806610107422, -0.010835647583007812, -0.010437488555908203, -0.010039329528808594, -0.009641170501708984, -0.009243011474609375, -0.008844852447509766, -0.008446693420410156, -0.008048534393310547, -0.0076503753662109375, -0.007252216339111328, -0.006854057312011719, -0.006455898284912109, -0.0060577392578125, -0.005659580230712891, -0.005261421203613281, -0.004863262176513672, -0.0044651031494140625, -0.004066944122314453, -0.0036687850952148438, -0.0032706260681152344, -0.002872467041015625, -0.0024743080139160156, -0.0020761489868164062, -0.0016779899597167969, -0.0012798309326171875, -0.0008816719055175781, -0.00048351287841796875, -8.535385131835938e-05, 0.00031280517578125, 0.0007109642028808594, 0.0011091232299804688, 0.0015072822570800781, 0.0019054412841796875, 0.002303600311279297, 0.0027017593383789062, 0.0030999183654785156, 0.003498077392578125, 0.0038962364196777344, 0.004294395446777344, 0.004692554473876953, 0.0050907135009765625, 0.005488872528076172, 0.005887031555175781, 0.006285190582275391, 0.006683349609375]}, "gradients/encoder.encoder.layers.16.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 4.0, 1.0, 1.0, 4.0, 10.0, 4.0, 11.0, 15.0, 18.0, 22.0, 41.0, 54.0, 73.0, 89.0, 171.0, 271.0, 552.0, 1215.0, 3280.0, 11191.0, 67549.0, 4071508.0, 27511.0, 6513.0, 2133.0, 887.0, 422.0, 240.0, 149.0, 86.0, 87.0, 63.0, 26.0, 22.0, 25.0, 11.0, 14.0, 7.0, 8.0, 0.0, 3.0, 4.0, 1.0, 1.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0611572265625, -0.05873870849609375, -0.0563201904296875, -0.05390167236328125, -0.051483154296875, -0.04906463623046875, -0.0466461181640625, -0.04422760009765625, -0.04180908203125, -0.03939056396484375, -0.0369720458984375, -0.03455352783203125, -0.032135009765625, -0.02971649169921875, -0.0272979736328125, -0.02487945556640625, -0.0224609375, -0.02004241943359375, -0.0176239013671875, -0.01520538330078125, -0.012786865234375, -0.01036834716796875, -0.0079498291015625, -0.00553131103515625, -0.00311279296875, -0.00069427490234375, 0.0017242431640625, 0.00414276123046875, 0.006561279296875, 0.00897979736328125, 0.0113983154296875, 0.01381683349609375, 0.0162353515625, 0.01865386962890625, 0.0210723876953125, 0.02349090576171875, 0.025909423828125, 0.02832794189453125, 0.0307464599609375, 0.03316497802734375, 0.03558349609375, 0.03800201416015625, 0.0404205322265625, 0.04283905029296875, 0.045257568359375, 0.04767608642578125, 0.0500946044921875, 0.05251312255859375, 0.054931640625, 0.05735015869140625, 0.0597686767578125, 0.06218719482421875, 0.064605712890625, 0.06702423095703125, 0.0694427490234375, 0.07186126708984375, 0.07427978515625, 0.07669830322265625, 0.0791168212890625, 0.08153533935546875, 0.083953857421875, 0.08637237548828125, 0.0887908935546875, 0.09120941162109375, 0.0936279296875]}, "gradients/encoder.encoder.layers.16.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 0.0, 2.0, 2.0, 5.0, 2.0, 3.0, 6.0, 5.0, 16.0, 12.0, 11.0, 20.0, 37.0, 292.0, 3465.0, 96.0, 26.0, 25.0, 15.0, 14.0, 9.0, 4.0, 6.0, 3.0, 5.0, 2.0, 4.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.01042938232421875, -0.009822249412536621, -0.009215116500854492, -0.008607983589172363, -0.008000850677490234, -0.0073937177658081055, -0.0067865848541259766, -0.006179451942443848, -0.005572319030761719, -0.00496518611907959, -0.004358053207397461, -0.003750920295715332, -0.003143787384033203, -0.0025366544723510742, -0.0019295215606689453, -0.0013223886489868164, -0.0007152557373046875, -0.0001081228256225586, 0.0004990100860595703, 0.0011061429977416992, 0.0017132759094238281, 0.002320408821105957, 0.002927541732788086, 0.003534674644470215, 0.004141807556152344, 0.004748940467834473, 0.0053560733795166016, 0.0059632062911987305, 0.006570339202880859, 0.007177472114562988, 0.007784605026245117, 0.008391737937927246, 0.008998870849609375, 0.009606003761291504, 0.010213136672973633, 0.010820269584655762, 0.01142740249633789, 0.01203453540802002, 0.012641668319702148, 0.013248801231384277, 0.013855934143066406, 0.014463067054748535, 0.015070199966430664, 0.015677332878112793, 0.016284465789794922, 0.01689159870147705, 0.01749873161315918, 0.01810586452484131, 0.018712997436523438, 0.019320130348205566, 0.019927263259887695, 0.020534396171569824, 0.021141529083251953, 0.021748661994934082, 0.02235579490661621, 0.02296292781829834, 0.02357006072998047, 0.024177193641662598, 0.024784326553344727, 0.025391459465026855, 0.025998592376708984, 0.026605725288391113, 0.027212858200073242, 0.02781999111175537, 0.0284271240234375]}, "gradients/encoder.encoder.layers.16.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 5.0, 14.0, 54.0, 349.0, 499.0, 70.0, 15.0, 6.0, 2.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.3638467788696289, -0.3534621596336365, -0.34307754039764404, -0.332692950963974, -0.32230833172798157, -0.31192371249198914, -0.3015391230583191, -0.29115450382232666, -0.28076988458633423, -0.2703852653503418, -0.26000064611434937, -0.24961605668067932, -0.2392314374446869, -0.22884681820869446, -0.21846221387386322, -0.20807760953903198, -0.19769299030303955, -0.18730837106704712, -0.17692376673221588, -0.16653916239738464, -0.1561545431613922, -0.14576992392539978, -0.13538531959056854, -0.1250007152557373, -0.11461609601974487, -0.10423148423433304, -0.0938468724489212, -0.08346226066350937, -0.07307764887809753, -0.0626930370926857, -0.052308425307273865, -0.04192381352186203, -0.03153923153877258, -0.02115461975336075, -0.010770007967948914, -0.00038539618253707886, 0.009999215602874756, 0.02038382738828659, 0.030768439173698425, 0.04115305095911026, 0.051537662744522095, 0.06192227452993393, 0.07230688631534576, 0.0826914981007576, 0.09307610988616943, 0.10346072167158127, 0.1138453334569931, 0.12422994524240494, 0.13461455702781677, 0.1449991762638092, 0.15538378059864044, 0.16576838493347168, 0.1761530041694641, 0.18653762340545654, 0.19692222774028778, 0.20730683207511902, 0.21769145131111145, 0.22807607054710388, 0.23846067488193512, 0.24884527921676636, 0.2592298984527588, 0.2696145176887512, 0.27999913692474365, 0.2903837263584137, 0.30076834559440613]}, "gradients/encoder.encoder.layers.16.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 2.0, 4.0, 9.0, 11.0, 17.0, 35.0, 52.0, 57.0, 69.0, 89.0, 111.0, 93.0, 105.0, 86.0, 86.0, 67.0, 45.0, 23.0, 20.0, 17.0, 6.0, 6.0, 4.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.07523179054260254, -0.07285727560520172, -0.0704827532172203, -0.06810823827981949, -0.06573371589183807, -0.06335920095443726, -0.06098468601703644, -0.05861016735434532, -0.056235648691654205, -0.05386113002896309, -0.05148661136627197, -0.049112096428871155, -0.04673757776618004, -0.04436305910348892, -0.041988544166088104, -0.03961402550339699, -0.03723950684070587, -0.034864988178014755, -0.03249046951532364, -0.03011595457792282, -0.027741435915231705, -0.02536691725254059, -0.02299240045249462, -0.020617883652448654, -0.018243364989757538, -0.01586884632706642, -0.013494329527020454, -0.011119811795651913, -0.008745294064283371, -0.006370776332914829, -0.0039962586015462875, -0.0016217418015003204, 0.0007527768611907959, 0.0031272945925593376, 0.005501812323927879, 0.007876330055296421, 0.010250847786664963, 0.012625365518033504, 0.014999883249402046, 0.017374400049448013, 0.01974891871213913, 0.022123437374830246, 0.024497954174876213, 0.02687247097492218, 0.029246989637613297, 0.03162150830030441, 0.03399602323770523, 0.03637054190039635, 0.03874506056308746, 0.04111957922577858, 0.043494097888469696, 0.045868612825870514, 0.04824313148856163, 0.05061765015125275, 0.052992165088653564, 0.05536668375134468, 0.0577412024140358, 0.06011572107672691, 0.06249023973941803, 0.06486475467681885, 0.06723926961421967, 0.06961379200220108, 0.0719883069396019, 0.07436282932758331, 0.07673734426498413]}, "gradients/encoder.encoder.layers.16.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 1.0, 4.0, 5.0, 7.0, 9.0, 9.0, 13.0, 19.0, 23.0, 25.0, 36.0, 45.0, 75.0, 101.0, 267.0, 914.0, 4492.0, 47941.0, 900360.0, 85828.0, 6500.0, 1144.0, 356.0, 136.0, 66.0, 41.0, 29.0, 34.0, 23.0, 17.0, 14.0, 14.0, 4.0, 6.0, 5.0, 2.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.1392822265625, -0.1352558135986328, -0.13122940063476562, -0.12720298767089844, -0.12317657470703125, -0.11915016174316406, -0.11512374877929688, -0.11109733581542969, -0.1070709228515625, -0.10304450988769531, -0.09901809692382812, -0.09499168395996094, -0.09096527099609375, -0.08693885803222656, -0.08291244506835938, -0.07888603210449219, -0.074859619140625, -0.07083320617675781, -0.06680679321289062, -0.06278038024902344, -0.05875396728515625, -0.05472755432128906, -0.050701141357421875, -0.04667472839355469, -0.0426483154296875, -0.03862190246582031, -0.034595489501953125, -0.030569076538085938, -0.02654266357421875, -0.022516250610351562, -0.018489837646484375, -0.014463424682617188, -0.01043701171875, -0.0064105987548828125, -0.002384185791015625, 0.0016422271728515625, 0.00566864013671875, 0.009695053100585938, 0.013721466064453125, 0.017747879028320312, 0.0217742919921875, 0.025800704956054688, 0.029827117919921875, 0.03385353088378906, 0.03787994384765625, 0.04190635681152344, 0.045932769775390625, 0.04995918273925781, 0.053985595703125, 0.05801200866699219, 0.062038421630859375, 0.06606483459472656, 0.07009124755859375, 0.07411766052246094, 0.07814407348632812, 0.08217048645019531, 0.0861968994140625, 0.09022331237792969, 0.09424972534179688, 0.09827613830566406, 0.10230255126953125, 0.10632896423339844, 0.11035537719726562, 0.11438179016113281, 0.118408203125]}, "gradients/encoder.encoder.layers.16.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 1.0, 8.0, 5.0, 16.0, 20.0, 33.0, 81.0, 110.0, 185.0, 171.0, 157.0, 93.0, 68.0, 30.0, 19.0, 10.0, 3.0, 1.0, 1.0, 2.0, 2.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-0.018524169921875, -0.018132269382476807, -0.017740368843078613, -0.01734846830368042, -0.016956567764282227, -0.016564667224884033, -0.01617276668548584, -0.015780866146087646, -0.015388965606689453, -0.01499706506729126, -0.014605164527893066, -0.014213263988494873, -0.01382136344909668, -0.013429462909698486, -0.013037562370300293, -0.0126456618309021, -0.012253761291503906, -0.011861860752105713, -0.01146996021270752, -0.011078059673309326, -0.010686159133911133, -0.01029425859451294, -0.009902358055114746, -0.009510457515716553, -0.00911855697631836, -0.008726656436920166, -0.008334755897521973, -0.00794285535812378, -0.007550954818725586, -0.007159054279327393, -0.006767153739929199, -0.006375253200531006, -0.0059833526611328125, -0.005591452121734619, -0.005199551582336426, -0.004807651042938232, -0.004415750503540039, -0.004023849964141846, -0.0036319494247436523, -0.003240048885345459, -0.0028481483459472656, -0.0024562478065490723, -0.002064347267150879, -0.0016724467277526855, -0.0012805461883544922, -0.0008886456489562988, -0.0004967451095581055, -0.00010484457015991211, 0.00028705596923828125, 0.0006789565086364746, 0.001070857048034668, 0.0014627575874328613, 0.0018546581268310547, 0.002246558666229248, 0.0026384592056274414, 0.0030303597450256348, 0.003422260284423828, 0.0038141608238220215, 0.004206061363220215, 0.004597961902618408, 0.0049898624420166016, 0.005381762981414795, 0.005773663520812988, 0.006165564060211182, 0.006557464599609375]}, "gradients/encoder.encoder.layers.16.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 1.0, 1.0, 4.0, 3.0, 2.0, 2.0, 4.0, 15.0, 6.0, 25.0, 29.0, 44.0, 58.0, 96.0, 165.0, 235.0, 394.0, 628.0, 1059.0, 1857.0, 3219.0, 5924.0, 10923.0, 21362.0, 43779.0, 96783.0, 268809.0, 380221.0, 111627.0, 49878.0, 23954.0, 12303.0, 6486.0, 3593.0, 2013.0, 1190.0, 707.0, 426.0, 253.0, 172.0, 103.0, 73.0, 48.0, 24.0, 29.0, 13.0, 8.0, 5.0, 11.0, 2.0, 1.0, 4.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0251617431640625, -0.02432537078857422, -0.023488998413085938, -0.022652626037597656, -0.021816253662109375, -0.020979881286621094, -0.020143508911132812, -0.01930713653564453, -0.01847076416015625, -0.01763439178466797, -0.016798019409179688, -0.015961647033691406, -0.015125274658203125, -0.014288902282714844, -0.013452529907226562, -0.012616157531738281, -0.01177978515625, -0.010943412780761719, -0.010107040405273438, -0.009270668029785156, -0.008434295654296875, -0.007597923278808594, -0.0067615509033203125, -0.005925178527832031, -0.00508880615234375, -0.004252433776855469, -0.0034160614013671875, -0.0025796890258789062, -0.001743316650390625, -0.0009069442749023438, -7.05718994140625e-05, 0.0007658004760742188, 0.0016021728515625, 0.0024385452270507812, 0.0032749176025390625, 0.004111289978027344, 0.004947662353515625, 0.005784034729003906, 0.0066204071044921875, 0.007456779479980469, 0.00829315185546875, 0.009129524230957031, 0.009965896606445312, 0.010802268981933594, 0.011638641357421875, 0.012475013732910156, 0.013311386108398438, 0.014147758483886719, 0.014984130859375, 0.01582050323486328, 0.016656875610351562, 0.017493247985839844, 0.018329620361328125, 0.019165992736816406, 0.020002365112304688, 0.02083873748779297, 0.02167510986328125, 0.02251148223876953, 0.023347854614257812, 0.024184226989746094, 0.025020599365234375, 0.025856971740722656, 0.026693344116210938, 0.02752971649169922, 0.0283660888671875]}, "gradients/encoder.encoder.layers.16.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 1.0, 2.0, 2.0, 0.0, 2.0, 3.0, 5.0, 4.0, 8.0, 7.0, 8.0, 13.0, 19.0, 18.0, 20.0, 22.0, 21.0, 34.0, 35.0, 31.0, 37.0, 42.0, 49.0, 45.0, 33.0, 40.0, 50.0, 49.0, 36.0, 36.0, 39.0, 46.0, 42.0, 35.0, 31.0, 25.0, 28.0, 13.0, 19.0, 13.0, 13.0, 9.0, 8.0, 8.0, 3.0, 3.0, 2.0, 5.0, 1.0, 2.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.019073486328125, -0.018439531326293945, -0.01780557632446289, -0.017171621322631836, -0.01653766632080078, -0.015903711318969727, -0.015269756317138672, -0.014635801315307617, -0.014001846313476562, -0.013367891311645508, -0.012733936309814453, -0.012099981307983398, -0.011466026306152344, -0.010832071304321289, -0.010198116302490234, -0.00956416130065918, -0.008930206298828125, -0.00829625129699707, -0.007662296295166016, -0.007028341293334961, -0.006394386291503906, -0.0057604312896728516, -0.005126476287841797, -0.004492521286010742, -0.0038585662841796875, -0.003224611282348633, -0.002590656280517578, -0.0019567012786865234, -0.0013227462768554688, -0.0006887912750244141, -5.4836273193359375e-05, 0.0005791187286376953, 0.00121307373046875, 0.0018470287322998047, 0.0024809837341308594, 0.003114938735961914, 0.0037488937377929688, 0.0043828487396240234, 0.005016803741455078, 0.005650758743286133, 0.0062847137451171875, 0.006918668746948242, 0.007552623748779297, 0.008186578750610352, 0.008820533752441406, 0.009454488754272461, 0.010088443756103516, 0.01072239875793457, 0.011356353759765625, 0.01199030876159668, 0.012624263763427734, 0.013258218765258789, 0.013892173767089844, 0.014526128768920898, 0.015160083770751953, 0.015794038772583008, 0.016427993774414062, 0.017061948776245117, 0.017695903778076172, 0.018329858779907227, 0.01896381378173828, 0.019597768783569336, 0.02023172378540039, 0.020865678787231445, 0.0214996337890625]}, "gradients/encoder.encoder.layers.16.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 3.0, 5.0, 5.0, 6.0, 14.0, 20.0, 31.0, 50.0, 98.0, 137.0, 249.0, 448.0, 827.0, 1722.0, 3827.0, 10276.0, 38301.0, 501310.0, 436740.0, 37111.0, 9967.0, 3810.0, 1708.0, 859.0, 414.0, 249.0, 142.0, 78.0, 58.0, 27.0, 20.0, 12.0, 13.0, 6.0, 6.0, 5.0, 4.0, 2.0, 0.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-0.0004763603210449219, -0.0004619210958480835, -0.0004474818706512451, -0.00043304264545440674, -0.00041860342025756836, -0.00040416419506073, -0.0003897249698638916, -0.0003752857446670532, -0.00036084651947021484, -0.00034640729427337646, -0.0003319680690765381, -0.0003175288438796997, -0.00030308961868286133, -0.00028865039348602295, -0.00027421116828918457, -0.0002597719430923462, -0.0002453327178955078, -0.00023089349269866943, -0.00021645426750183105, -0.00020201504230499268, -0.0001875758171081543, -0.00017313659191131592, -0.00015869736671447754, -0.00014425814151763916, -0.00012981891632080078, -0.0001153796911239624, -0.00010094046592712402, -8.650124073028564e-05, -7.206201553344727e-05, -5.762279033660889e-05, -4.318356513977051e-05, -2.874433994293213e-05, -1.430511474609375e-05, 1.341104507446289e-07, 1.4573335647583008e-05, 2.9012560844421387e-05, 4.3451786041259766e-05, 5.7891011238098145e-05, 7.233023643493652e-05, 8.67694616317749e-05, 0.00010120868682861328, 0.00011564791202545166, 0.00013008713722229004, 0.00014452636241912842, 0.0001589655876159668, 0.00017340481281280518, 0.00018784403800964355, 0.00020228326320648193, 0.0002167224884033203, 0.0002311617136001587, 0.00024560093879699707, 0.00026004016399383545, 0.00027447938919067383, 0.0002889186143875122, 0.0003033578395843506, 0.00031779706478118896, 0.00033223628997802734, 0.0003466755151748657, 0.0003611147403717041, 0.0003755539655685425, 0.00038999319076538086, 0.00040443241596221924, 0.0004188716411590576, 0.000433310866355896, 0.0004477500915527344]}, "gradients/encoder.encoder.layers.16.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 0.0, 4.0, 3.0, 7.0, 5.0, 15.0, 12.0, 16.0, 16.0, 16.0, 32.0, 49.0, 116.0, 156.0, 191.0, 147.0, 75.0, 51.0, 35.0, 10.0, 16.0, 6.0, 6.0, 4.0, 6.0, 6.0, 7.0, 3.0, 1.0, 0.0, 1.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.5822391510009766e-05, -3.4681521356105804e-05, -3.354065120220184e-05, -3.239978104829788e-05, -3.125891089439392e-05, -3.011804074048996e-05, -2.8977170586586e-05, -2.7836300432682037e-05, -2.6695430278778076e-05, -2.5554560124874115e-05, -2.4413689970970154e-05, -2.3272819817066193e-05, -2.213194966316223e-05, -2.099107950925827e-05, -1.985020935535431e-05, -1.8709339201450348e-05, -1.7568469047546387e-05, -1.6427598893642426e-05, -1.5286728739738464e-05, -1.4145858585834503e-05, -1.3004988431930542e-05, -1.186411827802658e-05, -1.072324812412262e-05, -9.582377970218658e-06, -8.441507816314697e-06, -7.300637662410736e-06, -6.159767508506775e-06, -5.018897354602814e-06, -3.8780272006988525e-06, -2.7371570467948914e-06, -1.5962868928909302e-06, -4.55416738986969e-07, 6.854534149169922e-07, 1.8263235688209534e-06, 2.9671937227249146e-06, 4.108063876628876e-06, 5.248934030532837e-06, 6.389804184436798e-06, 7.530674338340759e-06, 8.67154449224472e-06, 9.812414646148682e-06, 1.0953284800052643e-05, 1.2094154953956604e-05, 1.3235025107860565e-05, 1.4375895261764526e-05, 1.5516765415668488e-05, 1.665763556957245e-05, 1.779850572347641e-05, 1.893937587738037e-05, 2.0080246031284332e-05, 2.1221116185188293e-05, 2.2361986339092255e-05, 2.3502856492996216e-05, 2.4643726646900177e-05, 2.5784596800804138e-05, 2.69254669547081e-05, 2.806633710861206e-05, 2.9207207262516022e-05, 3.0348077416419983e-05, 3.1488947570323944e-05, 3.2629817724227905e-05, 3.3770687878131866e-05, 3.491155803203583e-05, 3.605242818593979e-05, 3.719329833984375e-05]}, "gradients/encoder.encoder.layers.16.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 4.0, 2.0, 2.0, 7.0, 7.0, 12.0, 12.0, 16.0, 17.0, 38.0, 53.0, 74.0, 145.0, 281.0, 708.0, 1841.0, 5756.0, 21897.0, 142248.0, 791841.0, 64253.0, 13286.0, 3750.0, 1243.0, 500.0, 251.0, 121.0, 62.0, 33.0, 23.0, 25.0, 16.0, 13.0, 7.0, 6.0, 5.0, 3.0, 5.0, 0.0, 3.0, 1.0, 0.0, 0.0, 1.0, 3.0, 0.0, 0.0, 1.0], "bins": [-0.0005502700805664062, -0.0005346909165382385, -0.0005191117525100708, -0.0005035325884819031, -0.00048795342445373535, -0.0004723742604255676, -0.0004567950963973999, -0.0004412159323692322, -0.00042563676834106445, -0.00041005760431289673, -0.000394478440284729, -0.0003788992762565613, -0.00036332011222839355, -0.00034774094820022583, -0.0003321617841720581, -0.0003165826201438904, -0.00030100345611572266, -0.00028542429208755493, -0.0002698451280593872, -0.0002542659640312195, -0.00023868680000305176, -0.00022310763597488403, -0.0002075284719467163, -0.00019194930791854858, -0.00017637014389038086, -0.00016079097986221313, -0.0001452118158340454, -0.00012963265180587769, -0.00011405348777770996, -9.847432374954224e-05, -8.289515972137451e-05, -6.731599569320679e-05, -5.173683166503906e-05, -3.615766763687134e-05, -2.0578503608703613e-05, -4.999339580535889e-06, 1.0579824447631836e-05, 2.615898847579956e-05, 4.1738152503967285e-05, 5.731731653213501e-05, 7.289648056030273e-05, 8.847564458847046e-05, 0.00010405480861663818, 0.00011963397264480591, 0.00013521313667297363, 0.00015079230070114136, 0.00016637146472930908, 0.0001819506287574768, 0.00019752979278564453, 0.00021310895681381226, 0.00022868812084197998, 0.0002442672848701477, 0.00025984644889831543, 0.00027542561292648315, 0.0002910047769546509, 0.0003065839409828186, 0.00032216310501098633, 0.00033774226903915405, 0.0003533214330673218, 0.0003689005970954895, 0.0003844797611236572, 0.00040005892515182495, 0.0004156380891799927, 0.0004312172532081604, 0.0004467964172363281]}, "gradients/encoder.encoder.layers.16.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 3.0, 5.0, 5.0, 7.0, 5.0, 22.0, 17.0, 23.0, 33.0, 51.0, 74.0, 92.0, 144.0, 174.0, 123.0, 77.0, 41.0, 35.0, 16.0, 18.0, 19.0, 14.0, 7.0, 4.0, 0.0, 2.0, 2.0, 1.0, 2.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.00041866302490234375, -0.0004091579467058182, -0.0003996528685092926, -0.00039014779031276703, -0.00038064271211624146, -0.0003711376339197159, -0.0003616325557231903, -0.00035212747752666473, -0.00034262239933013916, -0.0003331173211336136, -0.000323612242937088, -0.00031410716474056244, -0.00030460208654403687, -0.0002950970083475113, -0.0002855919301509857, -0.00027608685195446014, -0.00026658177375793457, -0.000257076695561409, -0.0002475716173648834, -0.00023806653916835785, -0.00022856146097183228, -0.0002190563827753067, -0.00020955130457878113, -0.00020004622638225555, -0.00019054114818572998, -0.0001810360699892044, -0.00017153099179267883, -0.00016202591359615326, -0.00015252083539962769, -0.0001430157572031021, -0.00013351067900657654, -0.00012400560081005096, -0.00011450052261352539, -0.00010499544441699982, -9.549036622047424e-05, -8.598528802394867e-05, -7.64802098274231e-05, -6.697513163089752e-05, -5.747005343437195e-05, -4.7964975237846375e-05, -3.84598970413208e-05, -2.8954818844795227e-05, -1.9449740648269653e-05, -9.94466245174408e-06, -4.3958425521850586e-07, 9.065493941307068e-06, 1.857057213783264e-05, 2.8075650334358215e-05, 3.758072853088379e-05, 4.708580672740936e-05, 5.6590884923934937e-05, 6.609596312046051e-05, 7.560104131698608e-05, 8.510611951351166e-05, 9.461119771003723e-05, 0.0001041162759065628, 0.00011362135410308838, 0.00012312643229961395, 0.00013263151049613953, 0.0001421365886926651, 0.00015164166688919067, 0.00016114674508571625, 0.00017065182328224182, 0.0001801569014787674, 0.00018966197967529297]}, "gradients/encoder.encoder.layers.16.layer_norm.weight": {"_type": "histogram", "values": [3.0, 1.0, 5.0, 5.0, 10.0, 11.0, 22.0, 62.0, 142.0, 379.0, 147.0, 80.0, 50.0, 33.0, 19.0, 15.0, 11.0, 10.0, 6.0, 2.0, 1.0, 1.0, 6.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.10913528501987457, -0.09747326374053955, -0.08581123501062393, -0.07414921373128891, -0.06248718872666359, -0.05082516372203827, -0.03916314244270325, -0.027501113712787628, -0.015839092433452606, -0.00417706836014986, 0.0074849557131528854, 0.019146978855133057, 0.030809003859758377, 0.0424710288643837, 0.05413305014371872, 0.06579507887363434, 0.07745710015296936, 0.08911912143230438, 0.10078115016222, 0.11244317144155502, 0.12410520017147064, 0.13576722145080566, 0.14742924273014069, 0.1590912640094757, 0.17075330018997192, 0.18241532146930695, 0.19407734274864197, 0.20573937892913818, 0.2174014002084732, 0.22906342148780823, 0.24072544276714325, 0.25238746404647827, 0.2640495002269745, 0.2757115364074707, 0.28737354278564453, 0.29903557896614075, 0.3106975853443146, 0.3223596215248108, 0.3340216279029846, 0.34568366408348083, 0.35734570026397705, 0.36900773644447327, 0.3806697428226471, 0.3923317790031433, 0.40399378538131714, 0.41565582156181335, 0.42731785774230957, 0.4389798641204834, 0.4506418704986572, 0.46230390667915344, 0.47396591305732727, 0.4856279492378235, 0.4972899556159973, 0.5089520215988159, 0.5206140279769897, 0.5322760343551636, 0.5439381003379822, 0.555600106716156, 0.5672621726989746, 0.5789241790771484, 0.5905861854553223, 0.6022481918334961, 0.6139102578163147, 0.6255722641944885, 0.6372342705726624]}, "gradients/encoder.encoder.layers.16.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 2.0, 1.0, 1.0, 4.0, 2.0, 5.0, 5.0, 7.0, 11.0, 19.0, 16.0, 16.0, 23.0, 18.0, 38.0, 37.0, 40.0, 53.0, 43.0, 62.0, 61.0, 67.0, 63.0, 57.0, 44.0, 65.0, 56.0, 32.0, 32.0, 32.0, 26.0, 17.0, 21.0, 9.0, 10.0, 6.0, 9.0, 4.0, 2.0, 0.0, 0.0, 3.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.11584603786468506, -0.11133427172899246, -0.10682250559329987, -0.10231074690818787, -0.09779898077249527, -0.09328721463680267, -0.08877545595169067, -0.08426368981599808, -0.07975192368030548, -0.07524015754461288, -0.07072839140892029, -0.06621663272380829, -0.06170486658811569, -0.057193100452423096, -0.0526813380420208, -0.0481695756316185, -0.0436578094959259, -0.03914604336023331, -0.03463428094983101, -0.03012251667678356, -0.025610752403736115, -0.021098988130688667, -0.01658722385764122, -0.012075461447238922, -0.007563695311546326, -0.0030519310384988785, 0.0014598332345485687, 0.005971597507596016, 0.010483361780643463, 0.01499512605369091, 0.019506890326738358, 0.024018652737140656, 0.028530418872833252, 0.03304218500852585, 0.037553947418928146, 0.042065709829330444, 0.04657747596502304, 0.05108924210071564, 0.055601004511117935, 0.06011276692152023, 0.06462453305721283, 0.06913629919290543, 0.07364806532859802, 0.07815982401371002, 0.08267159014940262, 0.08718335628509521, 0.09169511497020721, 0.09620688110589981, 0.10071864724159241, 0.105230413377285, 0.1097421795129776, 0.1142539381980896, 0.1187657043337822, 0.12327747046947479, 0.1277892291545868, 0.13230100274085999, 0.13681276142597198, 0.14132452011108398, 0.14583629369735718, 0.15034805238246918, 0.15485981106758118, 0.15937158465385437, 0.16388334333896637, 0.16839510202407837, 0.17290687561035156]}, "gradients/encoder.encoder.layers.15.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0, 2.0, 1.0, 1.0, 2.0, 0.0, 0.0, 2.0, 4.0, 5.0, 7.0, 9.0, 11.0, 11.0, 18.0, 21.0, 39.0, 70.0, 166.0, 535.0, 2294.0, 20844.0, 4158936.0, 9361.0, 1301.0, 400.0, 133.0, 55.0, 33.0, 17.0, 6.0, 5.0, 3.0, 2.0, 1.0, 0.0, 1.0, 1.0, 1.0], "bins": [-0.20068359375, -0.19644451141357422, -0.19220542907714844, -0.18796634674072266, -0.18372726440429688, -0.1794881820678711, -0.1752490997314453, -0.17101001739501953, -0.16677093505859375, -0.16253185272216797, -0.1582927703857422, -0.1540536880493164, -0.14981460571289062, -0.14557552337646484, -0.14133644104003906, -0.13709735870361328, -0.1328582763671875, -0.12861919403076172, -0.12438011169433594, -0.12014102935791016, -0.11590194702148438, -0.1116628646850586, -0.10742378234863281, -0.10318470001220703, -0.09894561767578125, -0.09470653533935547, -0.09046745300292969, -0.0862283706665039, -0.08198928833007812, -0.07775020599365234, -0.07351112365722656, -0.06927204132080078, -0.065032958984375, -0.06079387664794922, -0.05655479431152344, -0.052315711975097656, -0.048076629638671875, -0.043837547302246094, -0.03959846496582031, -0.03535938262939453, -0.03112030029296875, -0.02688121795654297, -0.022642135620117188, -0.018403053283691406, -0.014163970947265625, -0.009924888610839844, -0.0056858062744140625, -0.0014467239379882812, 0.0027923583984375, 0.007031440734863281, 0.011270523071289062, 0.015509605407714844, 0.019748687744140625, 0.023987770080566406, 0.028226852416992188, 0.03246593475341797, 0.03670501708984375, 0.04094409942626953, 0.04518318176269531, 0.049422264099121094, 0.053661346435546875, 0.057900428771972656, 0.06213951110839844, 0.06637859344482422, 0.07061767578125]}, "gradients/encoder.encoder.layers.15.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 2.0, 8.0, 5.0, 16.0, 18.0, 41.0, 77.0, 108.0, 162.0, 184.0, 152.0, 94.0, 71.0, 36.0, 19.0, 12.0, 5.0, 0.0, 2.0, 1.0, 2.0, 1.0, 0.0, 1.0, 1.0, 1.0], "bins": [-0.0176849365234375, -0.017311394214630127, -0.016937851905822754, -0.01656430959701538, -0.016190767288208008, -0.015817224979400635, -0.015443682670593262, -0.015070140361785889, -0.014696598052978516, -0.014323055744171143, -0.01394951343536377, -0.013575971126556396, -0.013202428817749023, -0.01282888650894165, -0.012455344200134277, -0.012081801891326904, -0.011708259582519531, -0.011334717273712158, -0.010961174964904785, -0.010587632656097412, -0.010214090347290039, -0.009840548038482666, -0.009467005729675293, -0.00909346342086792, -0.008719921112060547, -0.008346378803253174, -0.0079728364944458, -0.007599294185638428, -0.007225751876831055, -0.006852209568023682, -0.006478667259216309, -0.0061051249504089355, -0.0057315826416015625, -0.0053580403327941895, -0.004984498023986816, -0.004610955715179443, -0.00423741340637207, -0.0038638710975646973, -0.0034903287887573242, -0.003116786479949951, -0.002743244171142578, -0.002369701862335205, -0.001996159553527832, -0.001622617244720459, -0.001249074935913086, -0.0008755326271057129, -0.0005019903182983398, -0.0001284480094909668, 0.00024509429931640625, 0.0006186366081237793, 0.0009921789169311523, 0.0013657212257385254, 0.0017392635345458984, 0.0021128058433532715, 0.0024863481521606445, 0.0028598904609680176, 0.0032334327697753906, 0.0036069750785827637, 0.003980517387390137, 0.00435405969619751, 0.004727602005004883, 0.005101144313812256, 0.005474686622619629, 0.005848228931427002, 0.006221771240234375]}, "gradients/encoder.encoder.layers.15.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [3.0, 6.0, 3.0, 3.0, 4.0, 8.0, 15.0, 16.0, 15.0, 20.0, 25.0, 30.0, 64.0, 84.0, 143.0, 163.0, 225.0, 354.0, 566.0, 865.0, 1270.0, 2008.0, 3471.0, 5856.0, 11110.0, 23079.0, 97705.0, 3953120.0, 53347.0, 18316.0, 9122.0, 4975.0, 2947.0, 1820.0, 1198.0, 758.0, 468.0, 307.0, 242.0, 164.0, 108.0, 76.0, 65.0, 33.0, 28.0, 30.0, 20.0, 12.0, 11.0, 5.0, 7.0, 6.0, 1.0, 1.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.0253143310546875, -0.024389028549194336, -0.023463726043701172, -0.022538423538208008, -0.021613121032714844, -0.02068781852722168, -0.019762516021728516, -0.01883721351623535, -0.017911911010742188, -0.016986608505249023, -0.01606130599975586, -0.015136003494262695, -0.014210700988769531, -0.013285398483276367, -0.012360095977783203, -0.011434793472290039, -0.010509490966796875, -0.009584188461303711, -0.008658885955810547, -0.007733583450317383, -0.006808280944824219, -0.005882978439331055, -0.004957675933837891, -0.0040323734283447266, -0.0031070709228515625, -0.0021817684173583984, -0.0012564659118652344, -0.0003311634063720703, 0.0005941390991210938, 0.0015194416046142578, 0.002444744110107422, 0.003370046615600586, 0.00429534912109375, 0.005220651626586914, 0.006145954132080078, 0.007071256637573242, 0.007996559143066406, 0.00892186164855957, 0.009847164154052734, 0.010772466659545898, 0.011697769165039062, 0.012623071670532227, 0.01354837417602539, 0.014473676681518555, 0.015398979187011719, 0.016324281692504883, 0.017249584197998047, 0.01817488670349121, 0.019100189208984375, 0.02002549171447754, 0.020950794219970703, 0.021876096725463867, 0.02280139923095703, 0.023726701736450195, 0.02465200424194336, 0.025577306747436523, 0.026502609252929688, 0.02742791175842285, 0.028353214263916016, 0.02927851676940918, 0.030203819274902344, 0.031129121780395508, 0.03205442428588867, 0.032979726791381836, 0.033905029296875]}, "gradients/encoder.encoder.layers.15.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 3.0, 1.0, 1.0, 1.0, 4.0, 3.0, 3.0, 3.0, 3.0, 8.0, 9.0, 9.0, 6.0, 5.0, 16.0, 15.0, 20.0, 34.0, 117.0, 3214.0, 389.0, 98.0, 20.0, 20.0, 19.0, 14.0, 7.0, 9.0, 6.0, 1.0, 3.0, 7.0, 5.0, 4.0, 4.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.01140594482421875, -0.010980725288391113, -0.010555505752563477, -0.01013028621673584, -0.009705066680908203, -0.009279847145080566, -0.00885462760925293, -0.008429408073425293, -0.008004188537597656, -0.0075789690017700195, -0.007153749465942383, -0.006728529930114746, -0.006303310394287109, -0.005878090858459473, -0.005452871322631836, -0.005027651786804199, -0.0046024322509765625, -0.004177212715148926, -0.003751993179321289, -0.0033267736434936523, -0.0029015541076660156, -0.002476334571838379, -0.002051115036010742, -0.0016258955001831055, -0.0012006759643554688, -0.000775456428527832, -0.0003502368927001953, 7.49826431274414e-05, 0.0005002021789550781, 0.0009254217147827148, 0.0013506412506103516, 0.0017758607864379883, 0.002201080322265625, 0.0026262998580932617, 0.0030515193939208984, 0.003476738929748535, 0.003901958465576172, 0.004327178001403809, 0.004752397537231445, 0.005177617073059082, 0.005602836608886719, 0.0060280561447143555, 0.006453275680541992, 0.006878495216369629, 0.007303714752197266, 0.007728934288024902, 0.008154153823852539, 0.008579373359680176, 0.009004592895507812, 0.00942981243133545, 0.009855031967163086, 0.010280251502990723, 0.01070547103881836, 0.011130690574645996, 0.011555910110473633, 0.01198112964630127, 0.012406349182128906, 0.012831568717956543, 0.01325678825378418, 0.013682007789611816, 0.014107227325439453, 0.01453244686126709, 0.014957666397094727, 0.015382885932922363, 0.01580810546875]}, "gradients/encoder.encoder.layers.15.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 1.0, 3.0, 2.0, 1.0, 13.0, 27.0, 37.0, 86.0, 230.0, 357.0, 124.0, 64.0, 29.0, 13.0, 6.0, 6.0, 3.0, 1.0, 1.0, 2.0, 2.0, 3.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.07526051253080368, -0.07132313400506973, -0.06738575547933578, -0.06344837695360184, -0.05951100215315819, -0.05557362362742424, -0.05163624882698059, -0.04769887030124664, -0.043761491775512695, -0.03982411324977875, -0.0358867347240448, -0.03194935992360115, -0.028011981397867203, -0.024074602872133255, -0.020137226209044456, -0.016199849545955658, -0.01226247102022171, -0.008325093425810337, -0.004387715831398964, -0.0004503382369875908, 0.0034870393574237823, 0.00742441788315773, 0.011361794546246529, 0.015299171209335327, 0.019236549735069275, 0.023173928260803223, 0.02711130492389202, 0.03104868158698082, 0.03498606011271477, 0.038923438638448715, 0.042860813438892365, 0.04679819196462631, 0.05073556303977966, 0.05467294156551361, 0.05861032009124756, 0.0625476986169815, 0.06648507714271545, 0.0704224556684494, 0.07435982674360275, 0.0782972052693367, 0.08223458379507065, 0.0861719623208046, 0.09010934084653854, 0.09404671937227249, 0.09798409044742584, 0.10192146897315979, 0.10585884749889374, 0.10979622602462769, 0.11373360455036163, 0.11767098307609558, 0.12160836160182953, 0.12554574012756348, 0.12948311865329742, 0.13342049717903137, 0.13735787570476532, 0.14129525423049927, 0.14523261785507202, 0.14916999638080597, 0.15310737490653992, 0.15704475343227386, 0.1609821319580078, 0.16491951048374176, 0.1688568890094757, 0.17279425263404846, 0.1767316460609436]}, "gradients/encoder.encoder.layers.15.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 3.0, 1.0, 0.0, 4.0, 13.0, 8.0, 18.0, 16.0, 19.0, 27.0, 43.0, 52.0, 40.0, 57.0, 66.0, 65.0, 54.0, 73.0, 72.0, 54.0, 59.0, 49.0, 41.0, 40.0, 49.0, 16.0, 27.0, 13.0, 9.0, 6.0, 7.0, 4.0, 3.0, 2.0, 2.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.04995453357696533, -0.04853484407067299, -0.047115154564380646, -0.0456954650580883, -0.04427577555179596, -0.042856086045503616, -0.04143639653921127, -0.04001670703291893, -0.03859701752662659, -0.037177328020334244, -0.0357576385140419, -0.03433794900774956, -0.032918259501457214, -0.03149856999516487, -0.030078880488872528, -0.028659190982580185, -0.027239501476287842, -0.0258198119699955, -0.024400122463703156, -0.022980432957410812, -0.02156074345111847, -0.020141053944826126, -0.018721364438533783, -0.01730167493224144, -0.015881985425949097, -0.014462295919656754, -0.01304260641336441, -0.011622916907072067, -0.010203227400779724, -0.008783537894487381, -0.007363848388195038, -0.005944158881902695, -0.0045244693756103516, -0.0031047798693180084, -0.0016850903630256653, -0.00026540085673332214, 0.001154288649559021, 0.002573978155851364, 0.003993667662143707, 0.00541335716843605, 0.0068330466747283936, 0.008252736181020737, 0.00967242568731308, 0.011092115193605423, 0.012511804699897766, 0.01393149420619011, 0.015351183712482452, 0.016770873218774796, 0.01819056272506714, 0.019610252231359482, 0.021029941737651825, 0.022449631243944168, 0.02386932075023651, 0.025289010256528854, 0.026708699762821198, 0.02812838926911354, 0.029548078775405884, 0.030967768281698227, 0.03238745778799057, 0.03380714729428291, 0.035226836800575256, 0.0366465263068676, 0.03806621581315994, 0.039485905319452286, 0.04090559482574463]}, "gradients/encoder.encoder.layers.15.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 2.0, 4.0, 4.0, 8.0, 10.0, 3.0, 11.0, 9.0, 9.0, 14.0, 19.0, 19.0, 26.0, 29.0, 33.0, 55.0, 76.0, 164.0, 368.0, 1095.0, 3883.0, 21029.0, 249599.0, 722679.0, 40600.0, 6211.0, 1487.0, 524.0, 211.0, 113.0, 68.0, 41.0, 29.0, 25.0, 28.0, 11.0, 10.0, 16.0, 10.0, 9.0, 8.0, 5.0, 7.0, 1.0, 1.0, 3.0, 0.0, 2.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.0819091796875, -0.0793619155883789, -0.07681465148925781, -0.07426738739013672, -0.07172012329101562, -0.06917285919189453, -0.06662559509277344, -0.06407833099365234, -0.06153106689453125, -0.058983802795410156, -0.05643653869628906, -0.05388927459716797, -0.051342010498046875, -0.04879474639892578, -0.04624748229980469, -0.043700218200683594, -0.0411529541015625, -0.038605690002441406, -0.03605842590332031, -0.03351116180419922, -0.030963897705078125, -0.02841663360595703, -0.025869369506835938, -0.023322105407714844, -0.02077484130859375, -0.018227577209472656, -0.015680313110351562, -0.013133049011230469, -0.010585784912109375, -0.008038520812988281, -0.0054912567138671875, -0.0029439926147460938, -0.000396728515625, 0.0021505355834960938, 0.0046977996826171875, 0.007245063781738281, 0.009792327880859375, 0.012339591979980469, 0.014886856079101562, 0.017434120178222656, 0.01998138427734375, 0.022528648376464844, 0.025075912475585938, 0.02762317657470703, 0.030170440673828125, 0.03271770477294922, 0.03526496887207031, 0.037812232971191406, 0.0403594970703125, 0.042906761169433594, 0.04545402526855469, 0.04800128936767578, 0.050548553466796875, 0.05309581756591797, 0.05564308166503906, 0.058190345764160156, 0.06073760986328125, 0.06328487396240234, 0.06583213806152344, 0.06837940216064453, 0.07092666625976562, 0.07347393035888672, 0.07602119445800781, 0.0785684585571289, 0.08111572265625]}, "gradients/encoder.encoder.layers.15.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 3.0, 7.0, 7.0, 14.0, 19.0, 47.0, 73.0, 110.0, 171.0, 172.0, 155.0, 92.0, 69.0, 38.0, 14.0, 13.0, 5.0, 1.0, 1.0, 2.0, 2.0, 1.0, 0.0, 1.0, 1.0, 1.0], "bins": [-0.0174407958984375, -0.017072081565856934, -0.016703367233276367, -0.0163346529006958, -0.015965938568115234, -0.015597224235534668, -0.015228509902954102, -0.014859795570373535, -0.014491081237792969, -0.014122366905212402, -0.013753652572631836, -0.01338493824005127, -0.013016223907470703, -0.012647509574890137, -0.01227879524230957, -0.011910080909729004, -0.011541366577148438, -0.011172652244567871, -0.010803937911987305, -0.010435223579406738, -0.010066509246826172, -0.009697794914245605, -0.009329080581665039, -0.008960366249084473, -0.008591651916503906, -0.00822293758392334, -0.007854223251342773, -0.007485508918762207, -0.007116794586181641, -0.006748080253601074, -0.006379365921020508, -0.006010651588439941, -0.005641937255859375, -0.005273222923278809, -0.004904508590698242, -0.004535794258117676, -0.004167079925537109, -0.003798365592956543, -0.0034296512603759766, -0.00306093692779541, -0.0026922225952148438, -0.0023235082626342773, -0.001954793930053711, -0.0015860795974731445, -0.0012173652648925781, -0.0008486509323120117, -0.0004799365997314453, -0.0001112222671508789, 0.0002574920654296875, 0.0006262063980102539, 0.0009949207305908203, 0.0013636350631713867, 0.0017323493957519531, 0.0021010637283325195, 0.002469778060913086, 0.0028384923934936523, 0.0032072067260742188, 0.003575921058654785, 0.0039446353912353516, 0.004313349723815918, 0.004682064056396484, 0.005050778388977051, 0.005419492721557617, 0.005788207054138184, 0.00615692138671875]}, "gradients/encoder.encoder.layers.15.attention.v_proj.weight": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 3.0, 1.0, 5.0, 5.0, 5.0, 8.0, 13.0, 18.0, 26.0, 34.0, 46.0, 83.0, 109.0, 157.0, 188.0, 282.0, 441.0, 663.0, 961.0, 1420.0, 2181.0, 3327.0, 5211.0, 8522.0, 13461.0, 22885.0, 39665.0, 72626.0, 154317.0, 400625.0, 151348.0, 71551.0, 38893.0, 22635.0, 13435.0, 8359.0, 5174.0, 3301.0, 2162.0, 1417.0, 934.0, 625.0, 446.0, 316.0, 206.0, 128.0, 118.0, 68.0, 35.0, 46.0, 27.0, 15.0, 13.0, 9.0, 8.0, 6.0, 1.0, 4.0, 2.0, 0.0, 3.0], "bins": [-0.018798828125, -0.018220186233520508, -0.017641544342041016, -0.017062902450561523, -0.01648426055908203, -0.01590561866760254, -0.015326976776123047, -0.014748334884643555, -0.014169692993164062, -0.01359105110168457, -0.013012409210205078, -0.012433767318725586, -0.011855125427246094, -0.011276483535766602, -0.01069784164428711, -0.010119199752807617, -0.009540557861328125, -0.008961915969848633, -0.00838327407836914, -0.0078046321868896484, -0.007225990295410156, -0.006647348403930664, -0.006068706512451172, -0.00549006462097168, -0.0049114227294921875, -0.004332780838012695, -0.003754138946533203, -0.003175497055053711, -0.0025968551635742188, -0.0020182132720947266, -0.0014395713806152344, -0.0008609294891357422, -0.00028228759765625, 0.0002963542938232422, 0.0008749961853027344, 0.0014536380767822266, 0.0020322799682617188, 0.002610921859741211, 0.003189563751220703, 0.0037682056427001953, 0.0043468475341796875, 0.00492548942565918, 0.005504131317138672, 0.006082773208618164, 0.006661415100097656, 0.0072400569915771484, 0.00781869888305664, 0.008397340774536133, 0.008975982666015625, 0.009554624557495117, 0.01013326644897461, 0.010711908340454102, 0.011290550231933594, 0.011869192123413086, 0.012447834014892578, 0.01302647590637207, 0.013605117797851562, 0.014183759689331055, 0.014762401580810547, 0.015341043472290039, 0.01591968536376953, 0.016498327255249023, 0.017076969146728516, 0.017655611038208008, 0.0182342529296875]}, "gradients/encoder.encoder.layers.15.attention.v_proj.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 1.0, 1.0, 2.0, 0.0, 1.0, 2.0, 2.0, 4.0, 3.0, 7.0, 1.0, 11.0, 8.0, 11.0, 11.0, 12.0, 12.0, 14.0, 19.0, 18.0, 17.0, 29.0, 36.0, 32.0, 37.0, 31.0, 35.0, 36.0, 44.0, 37.0, 45.0, 47.0, 41.0, 45.0, 34.0, 39.0, 23.0, 31.0, 23.0, 28.0, 27.0, 20.0, 24.0, 23.0, 14.0, 9.0, 11.0, 10.0, 13.0, 9.0, 6.0, 4.0, 4.0, 2.0, 4.0, 0.0, 4.0, 3.0, 0.0, 2.0, 2.0], "bins": [-0.0178070068359375, -0.017276525497436523, -0.016746044158935547, -0.01621556282043457, -0.015685081481933594, -0.015154600143432617, -0.01462411880493164, -0.014093637466430664, -0.013563156127929688, -0.013032674789428711, -0.012502193450927734, -0.011971712112426758, -0.011441230773925781, -0.010910749435424805, -0.010380268096923828, -0.009849786758422852, -0.009319305419921875, -0.008788824081420898, -0.008258342742919922, -0.007727861404418945, -0.007197380065917969, -0.006666898727416992, -0.006136417388916016, -0.005605936050415039, -0.0050754547119140625, -0.004544973373413086, -0.004014492034912109, -0.003484010696411133, -0.0029535293579101562, -0.0024230480194091797, -0.0018925666809082031, -0.0013620853424072266, -0.00083160400390625, -0.00030112266540527344, 0.00022935867309570312, 0.0007598400115966797, 0.0012903213500976562, 0.0018208026885986328, 0.0023512840270996094, 0.002881765365600586, 0.0034122467041015625, 0.003942728042602539, 0.004473209381103516, 0.005003690719604492, 0.005534172058105469, 0.006064653396606445, 0.006595134735107422, 0.0071256160736083984, 0.007656097412109375, 0.008186578750610352, 0.008717060089111328, 0.009247541427612305, 0.009778022766113281, 0.010308504104614258, 0.010838985443115234, 0.011369466781616211, 0.011899948120117188, 0.012430429458618164, 0.01296091079711914, 0.013491392135620117, 0.014021873474121094, 0.01455235481262207, 0.015082836151123047, 0.015613317489624023, 0.016143798828125]}, "gradients/encoder.encoder.layers.15.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 2.0, 0.0, 2.0, 1.0, 3.0, 1.0, 1.0, 3.0, 9.0, 8.0, 11.0, 18.0, 37.0, 64.0, 114.0, 201.0, 363.0, 685.0, 1436.0, 3017.0, 7874.0, 31131.0, 625046.0, 340634.0, 25354.0, 7061.0, 2795.0, 1282.0, 648.0, 332.0, 190.0, 95.0, 59.0, 30.0, 22.0, 14.0, 6.0, 5.0, 5.0, 3.0, 2.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00042319297790527344, -0.00040905922651290894, -0.00039492547512054443, -0.00038079172372817993, -0.00036665797233581543, -0.00035252422094345093, -0.0003383904695510864, -0.0003242567181587219, -0.0003101229667663574, -0.0002959892153739929, -0.0002818554639816284, -0.0002677217125892639, -0.0002535879611968994, -0.0002394542098045349, -0.0002253204584121704, -0.0002111867070198059, -0.0001970529556274414, -0.0001829192042350769, -0.0001687854528427124, -0.0001546517014503479, -0.0001405179500579834, -0.0001263841986656189, -0.0001122504472732544, -9.811669588088989e-05, -8.398294448852539e-05, -6.984919309616089e-05, -5.571544170379639e-05, -4.1581690311431885e-05, -2.7447938919067383e-05, -1.3314187526702881e-05, 8.195638656616211e-07, 1.4953315258026123e-05, 2.9087066650390625e-05, 4.322081804275513e-05, 5.735456943511963e-05, 7.148832082748413e-05, 8.562207221984863e-05, 9.975582361221313e-05, 0.00011388957500457764, 0.00012802332639694214, 0.00014215707778930664, 0.00015629082918167114, 0.00017042458057403564, 0.00018455833196640015, 0.00019869208335876465, 0.00021282583475112915, 0.00022695958614349365, 0.00024109333753585815, 0.00025522708892822266, 0.00026936084032058716, 0.00028349459171295166, 0.00029762834310531616, 0.00031176209449768066, 0.00032589584589004517, 0.00034002959728240967, 0.00035416334867477417, 0.00036829710006713867, 0.0003824308514595032, 0.0003965646028518677, 0.0004106983542442322, 0.0004248321056365967, 0.0004389658570289612, 0.0004530996084213257, 0.0004672333598136902, 0.0004813671112060547]}, "gradients/encoder.encoder.layers.15.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 4.0, 0.0, 2.0, 1.0, 2.0, 0.0, 1.0, 4.0, 1.0, 4.0, 3.0, 5.0, 2.0, 6.0, 6.0, 8.0, 14.0, 20.0, 34.0, 43.0, 59.0, 128.0, 184.0, 180.0, 114.0, 52.0, 25.0, 28.0, 26.0, 19.0, 9.0, 2.0, 4.0, 4.0, 3.0, 6.0, 2.0, 0.0, 1.0, 2.0, 1.0, 1.0, 0.0, 1.0, 3.0, 0.0, 0.0, 2.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.792213439941406e-05, -4.6319328248500824e-05, -4.4716522097587585e-05, -4.311371594667435e-05, -4.151090979576111e-05, -3.990810364484787e-05, -3.830529749393463e-05, -3.670249134302139e-05, -3.5099685192108154e-05, -3.3496879041194916e-05, -3.189407289028168e-05, -3.029126673936844e-05, -2.86884605884552e-05, -2.708565443754196e-05, -2.5482848286628723e-05, -2.3880042135715485e-05, -2.2277235984802246e-05, -2.0674429833889008e-05, -1.907162368297577e-05, -1.746881753206253e-05, -1.5866011381149292e-05, -1.4263205230236053e-05, -1.2660399079322815e-05, -1.1057592928409576e-05, -9.454786777496338e-06, -7.8519806265831e-06, -6.249174475669861e-06, -4.646368324756622e-06, -3.043562173843384e-06, -1.4407560229301453e-06, 1.6205012798309326e-07, 1.7648562788963318e-06, 3.3676624298095703e-06, 4.970468580722809e-06, 6.573274731636047e-06, 8.176080882549286e-06, 9.778887033462524e-06, 1.1381693184375763e-05, 1.2984499335289001e-05, 1.458730548620224e-05, 1.619011163711548e-05, 1.7792917788028717e-05, 1.9395723938941956e-05, 2.0998530089855194e-05, 2.2601336240768433e-05, 2.420414239168167e-05, 2.580694854259491e-05, 2.7409754693508148e-05, 2.9012560844421387e-05, 3.0615366995334625e-05, 3.2218173146247864e-05, 3.38209792971611e-05, 3.542378544807434e-05, 3.702659159898758e-05, 3.862939774990082e-05, 4.0232203900814056e-05, 4.1835010051727295e-05, 4.3437816202640533e-05, 4.504062235355377e-05, 4.664342850446701e-05, 4.824623465538025e-05, 4.984904080629349e-05, 5.1451846957206726e-05, 5.3054653108119965e-05, 5.46574592590332e-05]}, "gradients/encoder.encoder.layers.15.attention.q_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 2.0, 0.0, 2.0, 3.0, 1.0, 5.0, 6.0, 5.0, 11.0, 9.0, 20.0, 21.0, 33.0, 39.0, 52.0, 72.0, 105.0, 162.0, 201.0, 329.0, 472.0, 707.0, 1231.0, 2001.0, 3647.0, 7495.0, 18097.0, 59091.0, 439480.0, 423268.0, 57682.0, 17847.0, 7423.0, 3642.0, 1993.0, 1166.0, 721.0, 452.0, 320.0, 219.0, 157.0, 97.0, 80.0, 55.0, 38.0, 33.0, 19.0, 17.0, 10.0, 11.0, 5.0, 4.0, 5.0, 2.0, 3.0, 2.0, 1.0, 0.0, 1.0, 1.0, 1.0], "bins": [-0.00023794174194335938, -0.00023050419986248016, -0.00022306665778160095, -0.00021562911570072174, -0.00020819157361984253, -0.00020075403153896332, -0.0001933164894580841, -0.0001858789473772049, -0.00017844140529632568, -0.00017100386321544647, -0.00016356632113456726, -0.00015612877905368805, -0.00014869123697280884, -0.00014125369489192963, -0.00013381615281105042, -0.0001263786107301712, -0.00011894106864929199, -0.00011150352656841278, -0.00010406598448753357, -9.662844240665436e-05, -8.919090032577515e-05, -8.175335824489594e-05, -7.431581616401672e-05, -6.687827408313751e-05, -5.94407320022583e-05, -5.200318992137909e-05, -4.456564784049988e-05, -3.7128105759620667e-05, -2.9690563678741455e-05, -2.2253021597862244e-05, -1.4815479516983032e-05, -7.377937436103821e-06, 5.960464477539063e-08, 7.497146725654602e-06, 1.4934688806533813e-05, 2.2372230887413025e-05, 2.9809772968292236e-05, 3.724731504917145e-05, 4.468485713005066e-05, 5.212239921092987e-05, 5.955994129180908e-05, 6.69974833726883e-05, 7.44350254535675e-05, 8.187256753444672e-05, 8.931010961532593e-05, 9.674765169620514e-05, 0.00010418519377708435, 0.00011162273585796356, 0.00011906027793884277, 0.00012649782001972198, 0.0001339353621006012, 0.0001413729041814804, 0.00014881044626235962, 0.00015624798834323883, 0.00016368553042411804, 0.00017112307250499725, 0.00017856061458587646, 0.00018599815666675568, 0.0001934356987476349, 0.0002008732408285141, 0.0002083107829093933, 0.00021574832499027252, 0.00022318586707115173, 0.00023062340915203094, 0.00023806095123291016]}, "gradients/encoder.encoder.layers.15.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 3.0, 2.0, 1.0, 3.0, 0.0, 4.0, 7.0, 4.0, 10.0, 6.0, 12.0, 17.0, 21.0, 45.0, 50.0, 65.0, 109.0, 140.0, 127.0, 107.0, 87.0, 71.0, 39.0, 29.0, 10.0, 5.0, 11.0, 10.0, 2.0, 5.0, 3.0, 4.0, 2.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.00022721290588378906, -0.00022012554109096527, -0.00021303817629814148, -0.0002059508115053177, -0.0001988634467124939, -0.0001917760819196701, -0.0001846887171268463, -0.00017760135233402252, -0.00017051398754119873, -0.00016342662274837494, -0.00015633925795555115, -0.00014925189316272736, -0.00014216452836990356, -0.00013507716357707977, -0.00012798979878425598, -0.00012090243399143219, -0.0001138150691986084, -0.00010672770440578461, -9.964033961296082e-05, -9.255297482013702e-05, -8.546561002731323e-05, -7.837824523448944e-05, -7.129088044166565e-05, -6.420351564884186e-05, -5.7116150856018066e-05, -5.0028786063194275e-05, -4.2941421270370483e-05, -3.585405647754669e-05, -2.87666916847229e-05, -2.167932689189911e-05, -1.4591962099075317e-05, -7.504597306251526e-06, -4.172325134277344e-07, 6.670132279396057e-06, 1.3757497072219849e-05, 2.084486186504364e-05, 2.793222665786743e-05, 3.501959145069122e-05, 4.2106956243515015e-05, 4.9194321036338806e-05, 5.62816858291626e-05, 6.336905062198639e-05, 7.045641541481018e-05, 7.754378020763397e-05, 8.463114500045776e-05, 9.171850979328156e-05, 9.880587458610535e-05, 0.00010589323937892914, 0.00011298060417175293, 0.00012006796896457672, 0.0001271553337574005, 0.0001342426985502243, 0.0001413300633430481, 0.0001484174281358719, 0.00015550479292869568, 0.00016259215772151947, 0.00016967952251434326, 0.00017676688730716705, 0.00018385425209999084, 0.00019094161689281464, 0.00019802898168563843, 0.00020511634647846222, 0.000212203711271286, 0.0002192910760641098, 0.0002263784408569336]}, "gradients/encoder.encoder.layers.15.layer_norm.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 2.0, 2.0, 0.0, 3.0, 5.0, 9.0, 18.0, 21.0, 55.0, 132.0, 368.0, 154.0, 85.0, 48.0, 32.0, 28.0, 16.0, 12.0, 9.0, 6.0, 4.0, 4.0, 1.0, 0.0, 3.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.13259384036064148, -0.12279576063156128, -0.11299768835306168, -0.10319961607456207, -0.09340153634548187, -0.08360345661640167, -0.07380538433790207, -0.06400731205940247, -0.054209232330322266, -0.044411156326532364, -0.03461308032274246, -0.02481500431895256, -0.015016928315162659, -0.005218852311372757, 0.004579223692417145, 0.014377295970916748, 0.024175375699996948, 0.03397345170378685, 0.04377152770757675, 0.05356960371136665, 0.06336767971515656, 0.07316575944423676, 0.08296383172273636, 0.09276190400123596, 0.10255998373031616, 0.11235806345939636, 0.12215613573789597, 0.13195420801639557, 0.14175228774547577, 0.15155036747455597, 0.16134843230247498, 0.17114651203155518, 0.18094459176063538, 0.19074267148971558, 0.20054075121879578, 0.21033881604671478, 0.22013689577579498, 0.22993497550487518, 0.2397330403327942, 0.2495311200618744, 0.2593291997909546, 0.2691272795200348, 0.278925359249115, 0.2887234389781952, 0.2985215187072754, 0.3083195686340332, 0.3181176483631134, 0.3279157280921936, 0.3377138078212738, 0.347511887550354, 0.3573099672794342, 0.3671080470085144, 0.3769060969352722, 0.3867041766643524, 0.3965022563934326, 0.4063003361225128, 0.416098415851593, 0.4258964955806732, 0.4356945753097534, 0.4454926550388336, 0.4552907347679138, 0.46508878469467163, 0.47488686442375183, 0.48468494415283203, 0.49448302388191223]}, "gradients/encoder.encoder.layers.15.layer_norm.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 0.0, 2.0, 1.0, 2.0, 1.0, 3.0, 2.0, 14.0, 6.0, 13.0, 16.0, 16.0, 21.0, 31.0, 39.0, 37.0, 43.0, 40.0, 54.0, 57.0, 55.0, 71.0, 74.0, 50.0, 67.0, 48.0, 38.0, 39.0, 42.0, 27.0, 23.0, 22.0, 13.0, 19.0, 7.0, 6.0, 4.0, 7.0, 2.0, 4.0, 4.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.10395467281341553, -0.09981619566679001, -0.09567771852016449, -0.09153924882411957, -0.08740077167749405, -0.08326229453086853, -0.07912382483482361, -0.07498534768819809, -0.07084687054157257, -0.06670839339494705, -0.06256991624832153, -0.05843144655227661, -0.05429296940565109, -0.050154492259025574, -0.04601601883769035, -0.04187754541635513, -0.037739068269729614, -0.033600591123104095, -0.029462117701768875, -0.025323642417788506, -0.021185167133808136, -0.017046691849827766, -0.012908216565847397, -0.008769741281867027, -0.004631265997886658, -0.0004927907139062881, 0.0036456845700740814, 0.007784159854054451, 0.01192263513803482, 0.01606111042201519, 0.02019958570599556, 0.02433806098997593, 0.0284765362739563, 0.03261501342058182, 0.03675348684191704, 0.04089196026325226, 0.04503043740987778, 0.049168914556503296, 0.053307387977838516, 0.05744586139917374, 0.061584338545799255, 0.06572281569242477, 0.06986129283905029, 0.07399976253509521, 0.07813823968172073, 0.08227671682834625, 0.08641518652439117, 0.0905536636710167, 0.09469214081764221, 0.09883061796426773, 0.10296909511089325, 0.10710756480693817, 0.11124604195356369, 0.11538451910018921, 0.11952298879623413, 0.12366146594285965, 0.12779994308948517, 0.1319384127855301, 0.1360768973827362, 0.14021536707878113, 0.14435383677482605, 0.14849232137203217, 0.1526307910680771, 0.1567692756652832, 0.16090774536132812]}, "gradients/encoder.encoder.layers.14.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 0.0, 1.0, 4.0, 0.0, 2.0, 1.0, 1.0, 1.0, 2.0, 3.0, 5.0, 2.0, 10.0, 14.0, 20.0, 52.0, 90.0, 226.0, 667.0, 2674.0, 261496.0, 3925126.0, 2808.0, 661.0, 220.0, 98.0, 51.0, 29.0, 12.0, 9.0, 2.0, 3.0, 3.0, 1.0, 0.0, 1.0, 0.0, 2.0], "bins": [-0.1260986328125, -0.12341594696044922, -0.12073326110839844, -0.11805057525634766, -0.11536788940429688, -0.1126852035522461, -0.11000251770019531, -0.10731983184814453, -0.10463714599609375, -0.10195446014404297, -0.09927177429199219, -0.0965890884399414, -0.09390640258789062, -0.09122371673583984, -0.08854103088378906, -0.08585834503173828, -0.0831756591796875, -0.08049297332763672, -0.07781028747558594, -0.07512760162353516, -0.07244491577148438, -0.0697622299194336, -0.06707954406738281, -0.06439685821533203, -0.06171417236328125, -0.05903148651123047, -0.05634880065917969, -0.053666114807128906, -0.050983428955078125, -0.048300743103027344, -0.04561805725097656, -0.04293537139892578, -0.040252685546875, -0.03756999969482422, -0.03488731384277344, -0.032204627990722656, -0.029521942138671875, -0.026839256286621094, -0.024156570434570312, -0.02147388458251953, -0.01879119873046875, -0.01610851287841797, -0.013425827026367188, -0.010743141174316406, -0.008060455322265625, -0.005377769470214844, -0.0026950836181640625, -1.239776611328125e-05, 0.0026702880859375, 0.005352973937988281, 0.008035659790039062, 0.010718345642089844, 0.013401031494140625, 0.016083717346191406, 0.018766403198242188, 0.02144908905029297, 0.02413177490234375, 0.02681446075439453, 0.029497146606445312, 0.032179832458496094, 0.034862518310546875, 0.037545204162597656, 0.04022789001464844, 0.04291057586669922, 0.04559326171875]}, "gradients/encoder.encoder.layers.14.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 5.0, 7.0, 6.0, 21.0, 23.0, 51.0, 82.0, 127.0, 180.0, 151.0, 152.0, 77.0, 63.0, 31.0, 16.0, 13.0, 3.0, 1.0, 2.0, 2.0, 2.0, 0.0, 0.0, 1.0, 0.0, 2.0], "bins": [-0.01666259765625, -0.016308367252349854, -0.015954136848449707, -0.01559990644454956, -0.015245676040649414, -0.014891445636749268, -0.014537215232849121, -0.014182984828948975, -0.013828754425048828, -0.013474524021148682, -0.013120293617248535, -0.012766063213348389, -0.012411832809448242, -0.012057602405548096, -0.01170337200164795, -0.011349141597747803, -0.010994911193847656, -0.01064068078994751, -0.010286450386047363, -0.009932219982147217, -0.00957798957824707, -0.009223759174346924, -0.008869528770446777, -0.00851529836654663, -0.008161067962646484, -0.007806837558746338, -0.007452607154846191, -0.007098376750946045, -0.0067441463470458984, -0.006389915943145752, -0.0060356855392456055, -0.005681455135345459, -0.0053272247314453125, -0.004972994327545166, -0.0046187639236450195, -0.004264533519744873, -0.0039103031158447266, -0.00355607271194458, -0.0032018423080444336, -0.002847611904144287, -0.0024933815002441406, -0.002139151096343994, -0.0017849206924438477, -0.0014306902885437012, -0.0010764598846435547, -0.0007222294807434082, -0.0003679990768432617, -1.3768672943115234e-05, 0.00034046173095703125, 0.0006946921348571777, 0.0010489225387573242, 0.0014031529426574707, 0.0017573833465576172, 0.0021116137504577637, 0.00246584415435791, 0.0028200745582580566, 0.003174304962158203, 0.0035285353660583496, 0.003882765769958496, 0.004236996173858643, 0.004591226577758789, 0.0049454569816589355, 0.005299687385559082, 0.0056539177894592285, 0.006008148193359375]}, "gradients/encoder.encoder.layers.14.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 3.0, 4.0, 3.0, 10.0, 6.0, 5.0, 6.0, 5.0, 12.0, 15.0, 19.0, 24.0, 35.0, 49.0, 83.0, 83.0, 125.0, 210.0, 342.0, 651.0, 1142.0, 2365.0, 5404.0, 19290.0, 4123117.0, 28558.0, 6520.0, 2818.0, 1314.0, 774.0, 456.0, 233.0, 156.0, 114.0, 86.0, 41.0, 63.0, 34.0, 26.0, 22.0, 17.0, 13.0, 10.0, 3.0, 6.0, 5.0, 3.0, 4.0, 5.0, 6.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.038543701171875, -0.03714609146118164, -0.03574848175048828, -0.03435087203979492, -0.03295326232910156, -0.0315556526184082, -0.030158042907714844, -0.028760433197021484, -0.027362823486328125, -0.025965213775634766, -0.024567604064941406, -0.023169994354248047, -0.021772384643554688, -0.020374774932861328, -0.01897716522216797, -0.01757955551147461, -0.01618194580078125, -0.01478433609008789, -0.013386726379394531, -0.011989116668701172, -0.010591506958007812, -0.009193897247314453, -0.007796287536621094, -0.006398677825927734, -0.005001068115234375, -0.0036034584045410156, -0.0022058486938476562, -0.0008082389831542969, 0.0005893707275390625, 0.001986980438232422, 0.0033845901489257812, 0.004782199859619141, 0.0061798095703125, 0.007577419281005859, 0.008975028991699219, 0.010372638702392578, 0.011770248413085938, 0.013167858123779297, 0.014565467834472656, 0.015963077545166016, 0.017360687255859375, 0.018758296966552734, 0.020155906677246094, 0.021553516387939453, 0.022951126098632812, 0.024348735809326172, 0.02574634552001953, 0.02714395523071289, 0.02854156494140625, 0.02993917465209961, 0.03133678436279297, 0.03273439407348633, 0.03413200378417969, 0.03552961349487305, 0.036927223205566406, 0.038324832916259766, 0.039722442626953125, 0.041120052337646484, 0.042517662048339844, 0.0439152717590332, 0.04531288146972656, 0.04671049118041992, 0.04810810089111328, 0.04950571060180664, 0.0509033203125]}, "gradients/encoder.encoder.layers.14.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [2.0, 2.0, 2.0, 1.0, 0.0, 2.0, 2.0, 1.0, 6.0, 5.0, 5.0, 18.0, 63.0, 3744.0, 147.0, 33.0, 14.0, 9.0, 7.0, 4.0, 6.0, 1.0, 1.0, 4.0, 4.0, 3.0, 1.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.006134033203125, -0.005685091018676758, -0.005236148834228516, -0.0047872066497802734, -0.004338264465332031, -0.003889322280883789, -0.003440380096435547, -0.0029914379119873047, -0.0025424957275390625, -0.0020935535430908203, -0.0016446113586425781, -0.001195669174194336, -0.0007467269897460938, -0.00029778480529785156, 0.00015115737915039062, 0.0006000995635986328, 0.001049041748046875, 0.0014979839324951172, 0.0019469261169433594, 0.0023958683013916016, 0.0028448104858398438, 0.003293752670288086, 0.003742694854736328, 0.00419163703918457, 0.0046405792236328125, 0.005089521408081055, 0.005538463592529297, 0.005987405776977539, 0.006436347961425781, 0.0068852901458740234, 0.007334232330322266, 0.007783174514770508, 0.00823211669921875, 0.008681058883666992, 0.009130001068115234, 0.009578943252563477, 0.010027885437011719, 0.010476827621459961, 0.010925769805908203, 0.011374711990356445, 0.011823654174804688, 0.01227259635925293, 0.012721538543701172, 0.013170480728149414, 0.013619422912597656, 0.014068365097045898, 0.01451730728149414, 0.014966249465942383, 0.015415191650390625, 0.015864133834838867, 0.01631307601928711, 0.01676201820373535, 0.017210960388183594, 0.017659902572631836, 0.018108844757080078, 0.01855778694152832, 0.019006729125976562, 0.019455671310424805, 0.019904613494873047, 0.02035355567932129, 0.02080249786376953, 0.021251440048217773, 0.021700382232666016, 0.022149324417114258, 0.0225982666015625]}, "gradients/encoder.encoder.layers.14.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 2.0, 0.0, 4.0, 4.0, 15.0, 25.0, 67.0, 176.0, 444.0, 119.0, 62.0, 28.0, 22.0, 9.0, 8.0, 6.0, 5.0, 5.0, 4.0, 3.0, 2.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.07073025405406952, -0.06793145835399628, -0.06513265520334244, -0.062333859503269196, -0.05953506380319595, -0.05673626437783241, -0.05393746495246887, -0.05113866925239563, -0.04833986982703209, -0.04554107040166855, -0.042742274701595306, -0.039943475276231766, -0.037144675850868225, -0.03434588015079498, -0.03154708072543144, -0.02874828316271305, -0.02594948559999466, -0.023150688037276268, -0.020351890474557877, -0.017553091049194336, -0.014754293486475945, -0.011955495923757553, -0.009156696498394012, -0.006357898935675621, -0.0035591013729572296, -0.0007603033445775509, 0.002038494683802128, 0.004837293177843094, 0.007636090740561485, 0.010434888303279877, 0.013233687728643417, 0.01603248529136181, 0.018831275403499603, 0.021630072966217995, 0.024428870528936386, 0.027227669954299927, 0.030026467517018318, 0.03282526507973671, 0.03562406450510025, 0.03842286020517349, 0.04122165963053703, 0.044020459055900574, 0.046819254755973816, 0.04961805418133736, 0.0524168536067009, 0.05521564930677414, 0.05801444873213768, 0.06081324815750122, 0.06361204385757446, 0.0664108395576477, 0.06920964270830154, 0.07200843840837479, 0.07480723410844803, 0.07760603725910187, 0.08040483295917511, 0.08320362865924835, 0.0860024243593216, 0.08880122005939484, 0.09160002321004868, 0.09439881891012192, 0.09719761461019516, 0.099996417760849, 0.10279521346092224, 0.10559400916099548, 0.10839281231164932]}, "gradients/encoder.encoder.layers.14.final_layer_norm.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 3.0, 3.0, 4.0, 6.0, 6.0, 6.0, 11.0, 9.0, 18.0, 32.0, 21.0, 40.0, 38.0, 63.0, 56.0, 71.0, 62.0, 73.0, 77.0, 82.0, 60.0, 61.0, 46.0, 40.0, 28.0, 21.0, 18.0, 17.0, 13.0, 13.0, 3.0, 3.0, 1.0, 2.0, 4.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-0.02626824378967285, -0.02526085451245308, -0.024253465235233307, -0.023246075958013535, -0.022238686680793762, -0.02123129740357399, -0.020223908126354218, -0.019216518849134445, -0.018209129571914673, -0.0172017402946949, -0.016194351017475128, -0.015186961740255356, -0.014179572463035583, -0.013172183185815811, -0.012164793908596039, -0.011157404631376266, -0.010150015354156494, -0.009142626076936722, -0.00813523679971695, -0.007127847522497177, -0.006120458245277405, -0.0051130689680576324, -0.00410567969083786, -0.0030982904136180878, -0.0020909011363983154, -0.001083511859178543, -7.612258195877075e-05, 0.0009312666952610016, 0.001938655972480774, 0.0029460452497005463, 0.003953434526920319, 0.004960823804140091, 0.005968213081359863, 0.006975602358579636, 0.007982991635799408, 0.00899038091301918, 0.009997770190238953, 0.011005159467458725, 0.012012548744678497, 0.01301993802189827, 0.014027327299118042, 0.015034716576337814, 0.016042105853557587, 0.01704949513077736, 0.01805688440799713, 0.019064273685216904, 0.020071662962436676, 0.02107905223965645, 0.02208644151687622, 0.023093830794095993, 0.024101220071315765, 0.025108609348535538, 0.02611599862575531, 0.027123387902975082, 0.028130777180194855, 0.029138166457414627, 0.0301455557346344, 0.031152945011854172, 0.032160334289073944, 0.033167723566293716, 0.03417511284351349, 0.03518250212073326, 0.03618989139795303, 0.037197280675172806, 0.03820466995239258]}, "gradients/encoder.encoder.layers.14.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 2.0, 3.0, 2.0, 3.0, 5.0, 6.0, 5.0, 6.0, 7.0, 10.0, 15.0, 15.0, 19.0, 21.0, 28.0, 29.0, 46.0, 62.0, 124.0, 216.0, 525.0, 1570.0, 5578.0, 29341.0, 355221.0, 607628.0, 38414.0, 6594.0, 1744.0, 601.0, 270.0, 125.0, 76.0, 52.0, 46.0, 32.0, 24.0, 15.0, 14.0, 18.0, 8.0, 6.0, 8.0, 8.0, 6.0, 5.0, 2.0, 2.0, 4.0, 2.0, 3.0, 0.0, 0.0, 3.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.0628662109375, -0.06084251403808594, -0.058818817138671875, -0.05679512023925781, -0.05477142333984375, -0.05274772644042969, -0.050724029541015625, -0.04870033264160156, -0.0466766357421875, -0.04465293884277344, -0.042629241943359375, -0.04060554504394531, -0.03858184814453125, -0.03655815124511719, -0.034534454345703125, -0.03251075744628906, -0.030487060546875, -0.028463363647460938, -0.026439666748046875, -0.024415969848632812, -0.02239227294921875, -0.020368576049804688, -0.018344879150390625, -0.016321182250976562, -0.0142974853515625, -0.012273788452148438, -0.010250091552734375, -0.008226394653320312, -0.00620269775390625, -0.0041790008544921875, -0.002155303955078125, -0.0001316070556640625, 0.00189208984375, 0.0039157867431640625, 0.005939483642578125, 0.007963180541992188, 0.00998687744140625, 0.012010574340820312, 0.014034271240234375, 0.016057968139648438, 0.0180816650390625, 0.020105361938476562, 0.022129058837890625, 0.024152755737304688, 0.02617645263671875, 0.028200149536132812, 0.030223846435546875, 0.03224754333496094, 0.034271240234375, 0.03629493713378906, 0.038318634033203125, 0.04034233093261719, 0.04236602783203125, 0.04438972473144531, 0.046413421630859375, 0.04843711853027344, 0.0504608154296875, 0.05248451232910156, 0.054508209228515625, 0.05653190612792969, 0.05855560302734375, 0.06057929992675781, 0.06260299682617188, 0.06462669372558594, 0.066650390625]}, "gradients/encoder.encoder.layers.14.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 5.0, 7.0, 6.0, 22.0, 15.0, 55.0, 80.0, 115.0, 172.0, 146.0, 160.0, 92.0, 63.0, 37.0, 14.0, 17.0, 1.0, 3.0, 2.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 2.0], "bins": [-0.016632080078125, -0.016279637813568115, -0.01592719554901123, -0.015574753284454346, -0.015222311019897461, -0.014869868755340576, -0.014517426490783691, -0.014164984226226807, -0.013812541961669922, -0.013460099697113037, -0.013107657432556152, -0.012755215167999268, -0.012402772903442383, -0.012050330638885498, -0.011697888374328613, -0.011345446109771729, -0.010993003845214844, -0.010640561580657959, -0.010288119316101074, -0.00993567705154419, -0.009583234786987305, -0.00923079252243042, -0.008878350257873535, -0.00852590799331665, -0.008173465728759766, -0.00782102346420288, -0.007468581199645996, -0.007116138935089111, -0.0067636966705322266, -0.006411254405975342, -0.006058812141418457, -0.005706369876861572, -0.0053539276123046875, -0.005001485347747803, -0.004649043083190918, -0.004296600818634033, -0.0039441585540771484, -0.0035917162895202637, -0.003239274024963379, -0.002886831760406494, -0.0025343894958496094, -0.0021819472312927246, -0.0018295049667358398, -0.001477062702178955, -0.0011246204376220703, -0.0007721781730651855, -0.0004197359085083008, -6.729364395141602e-05, 0.00028514862060546875, 0.0006375908851623535, 0.0009900331497192383, 0.001342475414276123, 0.0016949176788330078, 0.0020473599433898926, 0.0023998022079467773, 0.002752244472503662, 0.003104686737060547, 0.0034571290016174316, 0.0038095712661743164, 0.004162013530731201, 0.004514455795288086, 0.004866898059844971, 0.0052193403244018555, 0.00557178258895874, 0.005924224853515625]}, "gradients/encoder.encoder.layers.14.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 2.0, 4.0, 1.0, 9.0, 9.0, 23.0, 24.0, 24.0, 46.0, 66.0, 108.0, 155.0, 247.0, 372.0, 567.0, 820.0, 1380.0, 2206.0, 3455.0, 5764.0, 9485.0, 15755.0, 27945.0, 49823.0, 93439.0, 211462.0, 355976.0, 122116.0, 62960.0, 34931.0, 19863.0, 11314.0, 6884.0, 4127.0, 2548.0, 1627.0, 1032.0, 657.0, 459.0, 290.0, 225.0, 129.0, 75.0, 59.0, 29.0, 28.0, 20.0, 7.0, 10.0, 7.0, 2.0, 2.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0], "bins": [-0.0165557861328125, -0.01604151725769043, -0.01552724838256836, -0.015012979507446289, -0.014498710632324219, -0.013984441757202148, -0.013470172882080078, -0.012955904006958008, -0.012441635131835938, -0.011927366256713867, -0.011413097381591797, -0.010898828506469727, -0.010384559631347656, -0.009870290756225586, -0.009356021881103516, -0.008841753005981445, -0.008327484130859375, -0.007813215255737305, -0.007298946380615234, -0.006784677505493164, -0.006270408630371094, -0.0057561397552490234, -0.005241870880126953, -0.004727602005004883, -0.0042133331298828125, -0.003699064254760742, -0.003184795379638672, -0.0026705265045166016, -0.0021562576293945312, -0.001641988754272461, -0.0011277198791503906, -0.0006134510040283203, -9.918212890625e-05, 0.0004150867462158203, 0.0009293556213378906, 0.001443624496459961, 0.0019578933715820312, 0.0024721622467041016, 0.002986431121826172, 0.003500699996948242, 0.0040149688720703125, 0.004529237747192383, 0.005043506622314453, 0.0055577754974365234, 0.006072044372558594, 0.006586313247680664, 0.007100582122802734, 0.007614850997924805, 0.008129119873046875, 0.008643388748168945, 0.009157657623291016, 0.009671926498413086, 0.010186195373535156, 0.010700464248657227, 0.011214733123779297, 0.011729001998901367, 0.012243270874023438, 0.012757539749145508, 0.013271808624267578, 0.013786077499389648, 0.014300346374511719, 0.014814615249633789, 0.01532888412475586, 0.01584315299987793, 0.016357421875]}, "gradients/encoder.encoder.layers.14.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 3.0, 3.0, 3.0, 5.0, 6.0, 4.0, 5.0, 7.0, 10.0, 11.0, 22.0, 14.0, 19.0, 24.0, 27.0, 28.0, 32.0, 32.0, 44.0, 37.0, 38.0, 51.0, 37.0, 50.0, 37.0, 39.0, 35.0, 37.0, 52.0, 32.0, 46.0, 37.0, 24.0, 29.0, 28.0, 23.0, 23.0, 16.0, 11.0, 9.0, 5.0, 6.0, 3.0, 2.0, 3.0, 5.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.016265869140625, -0.015728235244750977, -0.015190601348876953, -0.01465296745300293, -0.014115333557128906, -0.013577699661254883, -0.01304006576538086, -0.012502431869506836, -0.011964797973632812, -0.011427164077758789, -0.010889530181884766, -0.010351896286010742, -0.009814262390136719, -0.009276628494262695, -0.008738994598388672, -0.008201360702514648, -0.007663726806640625, -0.0071260929107666016, -0.006588459014892578, -0.006050825119018555, -0.005513191223144531, -0.004975557327270508, -0.004437923431396484, -0.003900289535522461, -0.0033626556396484375, -0.002825021743774414, -0.0022873878479003906, -0.0017497539520263672, -0.0012121200561523438, -0.0006744861602783203, -0.00013685226440429688, 0.00040078163146972656, 0.00093841552734375, 0.0014760494232177734, 0.002013683319091797, 0.0025513172149658203, 0.0030889511108398438, 0.003626585006713867, 0.004164218902587891, 0.004701852798461914, 0.0052394866943359375, 0.005777120590209961, 0.006314754486083984, 0.006852388381958008, 0.007390022277832031, 0.007927656173706055, 0.008465290069580078, 0.009002923965454102, 0.009540557861328125, 0.010078191757202148, 0.010615825653076172, 0.011153459548950195, 0.011691093444824219, 0.012228727340698242, 0.012766361236572266, 0.013303995132446289, 0.013841629028320312, 0.014379262924194336, 0.01491689682006836, 0.015454530715942383, 0.015992164611816406, 0.01652979850769043, 0.017067432403564453, 0.017605066299438477, 0.0181427001953125]}, "gradients/encoder.encoder.layers.14.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0, 3.0, 3.0, 1.0, 6.0, 7.0, 11.0, 21.0, 39.0, 33.0, 70.0, 124.0, 239.0, 401.0, 956.0, 2325.0, 6648.0, 22746.0, 132681.0, 773197.0, 83559.0, 16848.0, 5105.0, 1883.0, 793.0, 396.0, 183.0, 106.0, 63.0, 37.0, 34.0, 14.0, 10.0, 7.0, 9.0, 1.0, 1.0, 3.0, 1.0, 0.0, 1.0, 1.0, 1.0, 2.0], "bins": [-0.00041937828063964844, -0.0004087314009666443, -0.00039808452129364014, -0.000387437641620636, -0.00037679076194763184, -0.0003661438822746277, -0.00035549700260162354, -0.0003448501229286194, -0.00033420324325561523, -0.0003235563635826111, -0.00031290948390960693, -0.0003022626042366028, -0.00029161572456359863, -0.0002809688448905945, -0.00027032196521759033, -0.0002596750855445862, -0.00024902820587158203, -0.00023838132619857788, -0.00022773444652557373, -0.00021708756685256958, -0.00020644068717956543, -0.00019579380750656128, -0.00018514692783355713, -0.00017450004816055298, -0.00016385316848754883, -0.00015320628881454468, -0.00014255940914154053, -0.00013191252946853638, -0.00012126564979553223, -0.00011061877012252808, -9.997189044952393e-05, -8.932501077651978e-05, -7.867813110351562e-05, -6.803125143051147e-05, -5.7384371757507324e-05, -4.6737492084503174e-05, -3.6090612411499023e-05, -2.5443732738494873e-05, -1.4796853065490723e-05, -4.149973392486572e-06, 6.496906280517578e-06, 1.714378595352173e-05, 2.779066562652588e-05, 3.843754529953003e-05, 4.908442497253418e-05, 5.973130464553833e-05, 7.037818431854248e-05, 8.102506399154663e-05, 9.167194366455078e-05, 0.00010231882333755493, 0.00011296570301055908, 0.00012361258268356323, 0.00013425946235656738, 0.00014490634202957153, 0.00015555322170257568, 0.00016620010137557983, 0.00017684698104858398, 0.00018749386072158813, 0.00019814074039459229, 0.00020878762006759644, 0.00021943449974060059, 0.00023008137941360474, 0.0002407282590866089, 0.00025137513875961304, 0.0002620220184326172]}, "gradients/encoder.encoder.layers.14.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 2.0, 1.0, 2.0, 0.0, 4.0, 3.0, 4.0, 4.0, 7.0, 7.0, 12.0, 15.0, 20.0, 22.0, 41.0, 46.0, 63.0, 93.0, 166.0, 152.0, 93.0, 67.0, 52.0, 43.0, 28.0, 20.0, 12.0, 8.0, 5.0, 4.0, 2.0, 4.0, 3.0, 4.0, 0.0, 2.0, 2.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.594160079956055e-05, -3.4918077290058136e-05, -3.3894553780555725e-05, -3.2871030271053314e-05, -3.18475067615509e-05, -3.082398325204849e-05, -2.980045974254608e-05, -2.877693623304367e-05, -2.775341272354126e-05, -2.672988921403885e-05, -2.5706365704536438e-05, -2.4682842195034027e-05, -2.3659318685531616e-05, -2.2635795176029205e-05, -2.1612271666526794e-05, -2.0588748157024384e-05, -1.9565224647521973e-05, -1.8541701138019562e-05, -1.751817762851715e-05, -1.649465411901474e-05, -1.547113060951233e-05, -1.4447607100009918e-05, -1.3424083590507507e-05, -1.2400560081005096e-05, -1.1377036571502686e-05, -1.0353513062000275e-05, -9.329989552497864e-06, -8.306466042995453e-06, -7.282942533493042e-06, -6.259419023990631e-06, -5.23589551448822e-06, -4.212372004985809e-06, -3.1888484954833984e-06, -2.1653249859809875e-06, -1.1418014764785767e-06, -1.1827796697616577e-07, 9.052455425262451e-07, 1.928769052028656e-06, 2.952292561531067e-06, 3.975816071033478e-06, 4.999339580535889e-06, 6.0228630900382996e-06, 7.0463865995407104e-06, 8.069910109043121e-06, 9.093433618545532e-06, 1.0116957128047943e-05, 1.1140480637550354e-05, 1.2164004147052765e-05, 1.3187527656555176e-05, 1.4211051166057587e-05, 1.5234574675559998e-05, 1.625809818506241e-05, 1.728162169456482e-05, 1.830514520406723e-05, 1.932866871356964e-05, 2.0352192223072052e-05, 2.1375715732574463e-05, 2.2399239242076874e-05, 2.3422762751579285e-05, 2.4446286261081696e-05, 2.5469809770584106e-05, 2.6493333280086517e-05, 2.7516856789588928e-05, 2.854038029909134e-05, 2.956390380859375e-05]}, "gradients/encoder.encoder.layers.14.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 5.0, 2.0, 6.0, 5.0, 8.0, 4.0, 11.0, 12.0, 19.0, 19.0, 26.0, 43.0, 68.0, 72.0, 115.0, 197.0, 302.0, 608.0, 1361.0, 3839.0, 13713.0, 66733.0, 749198.0, 175046.0, 26383.0, 6700.0, 2107.0, 869.0, 392.0, 232.0, 114.0, 78.0, 75.0, 45.0, 37.0, 32.0, 23.0, 20.0, 12.0, 10.0, 10.0, 3.0, 4.0, 3.0, 2.0, 4.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.00036072731018066406, -0.00034970417618751526, -0.00033868104219436646, -0.00032765790820121765, -0.00031663477420806885, -0.00030561164021492004, -0.00029458850622177124, -0.00028356537222862244, -0.00027254223823547363, -0.00026151910424232483, -0.000250495970249176, -0.00023947283625602722, -0.00022844970226287842, -0.00021742656826972961, -0.0002064034342765808, -0.000195380300283432, -0.0001843571662902832, -0.0001733340322971344, -0.0001623108983039856, -0.0001512877643108368, -0.000140264630317688, -0.00012924149632453918, -0.00011821836233139038, -0.00010719522833824158, -9.617209434509277e-05, -8.514896035194397e-05, -7.412582635879517e-05, -6.310269236564636e-05, -5.207955837249756e-05, -4.1056424379348755e-05, -3.003329038619995e-05, -1.9010156393051147e-05, -7.987022399902344e-06, 3.03611159324646e-06, 1.4059245586395264e-05, 2.5082379579544067e-05, 3.610551357269287e-05, 4.7128647565841675e-05, 5.815178155899048e-05, 6.917491555213928e-05, 8.019804954528809e-05, 9.122118353843689e-05, 0.0001022443175315857, 0.0001132674515247345, 0.0001242905855178833, 0.0001353137195110321, 0.0001463368535041809, 0.0001573599874973297, 0.00016838312149047852, 0.00017940625548362732, 0.00019042938947677612, 0.00020145252346992493, 0.00021247565746307373, 0.00022349879145622253, 0.00023452192544937134, 0.00024554505944252014, 0.00025656819343566895, 0.00026759132742881775, 0.00027861446142196655, 0.00028963759541511536, 0.00030066072940826416, 0.00031168386340141296, 0.00032270699739456177, 0.00033373013138771057, 0.0003447532653808594]}, "gradients/encoder.encoder.layers.14.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 5.0, 0.0, 3.0, 3.0, 7.0, 13.0, 28.0, 39.0, 62.0, 94.0, 127.0, 202.0, 166.0, 104.0, 62.0, 32.0, 30.0, 14.0, 10.0, 8.0, 5.0, 2.0, 0.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.000274658203125, -0.00026436150074005127, -0.00025406479835510254, -0.0002437680959701538, -0.00023347139358520508, -0.00022317469120025635, -0.00021287798881530762, -0.0002025812864303589, -0.00019228458404541016, -0.00018198788166046143, -0.0001716911792755127, -0.00016139447689056396, -0.00015109777450561523, -0.0001408010721206665, -0.00013050436973571777, -0.00012020766735076904, -0.00010991096496582031, -9.961426258087158e-05, -8.931756019592285e-05, -7.902085781097412e-05, -6.872415542602539e-05, -5.842745304107666e-05, -4.813075065612793e-05, -3.78340482711792e-05, -2.753734588623047e-05, -1.7240643501281738e-05, -6.943941116333008e-06, 3.3527612686157227e-06, 1.3649463653564453e-05, 2.3946166038513184e-05, 3.4242868423461914e-05, 4.4539570808410645e-05, 5.4836273193359375e-05, 6.51329755783081e-05, 7.542967796325684e-05, 8.572638034820557e-05, 9.60230827331543e-05, 0.00010631978511810303, 0.00011661648750305176, 0.0001269131898880005, 0.00013720989227294922, 0.00014750659465789795, 0.00015780329704284668, 0.0001680999994277954, 0.00017839670181274414, 0.00018869340419769287, 0.0001989901065826416, 0.00020928680896759033, 0.00021958351135253906, 0.0002298802137374878, 0.00024017691612243652, 0.00025047361850738525, 0.000260770320892334, 0.0002710670232772827, 0.00028136372566223145, 0.0002916604280471802, 0.0003019571304321289, 0.00031225383281707764, 0.00032255053520202637, 0.0003328472375869751, 0.00034314393997192383, 0.00035344064235687256, 0.0003637373447418213, 0.00037403404712677, 0.00038433074951171875]}, "gradients/encoder.encoder.layers.14.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 4.0, 3.0, 6.0, 12.0, 13.0, 18.0, 32.0, 61.0, 110.0, 272.0, 199.0, 91.0, 58.0, 31.0, 35.0, 16.0, 15.0, 11.0, 6.0, 6.0, 6.0, 2.0, 3.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.1721349060535431, -0.16547891497612, -0.1588229387998581, -0.152166947722435, -0.1455109715461731, -0.13885498046875, -0.1321989893913269, -0.125543013215065, -0.1188870221376419, -0.11223103851079941, -0.10557505488395691, -0.09891906380653381, -0.09226308017969131, -0.08560709655284882, -0.07895111292600632, -0.07229512929916382, -0.06563914567232132, -0.05898316204547882, -0.052327174693346024, -0.045671191066503525, -0.03901520371437073, -0.03235922008752823, -0.02570323646068573, -0.019047249108552933, -0.012391265481710434, -0.005735280457884073, 0.0009207045659422874, 0.007576689124107361, 0.014232674613595009, 0.020888660103082657, 0.027544643729925156, 0.03420063108205795, 0.04085661470890045, 0.04751259833574295, 0.05416858568787575, 0.060824569314718246, 0.06748055666685104, 0.07413654029369354, 0.08079252392053604, 0.08744850754737854, 0.09410449862480164, 0.10076048225164413, 0.10741646587848663, 0.11407245695590973, 0.12072844058275223, 0.12738442420959473, 0.13404041528701782, 0.14069639146327972, 0.14735236763954163, 0.15400835871696472, 0.16066433489322662, 0.16732032597064972, 0.17397630214691162, 0.18063229322433472, 0.1872882843017578, 0.19394426047801971, 0.2006002515554428, 0.2072562426328659, 0.2139122188091278, 0.2205682098865509, 0.2272241860628128, 0.2338801771402359, 0.2405361533164978, 0.2471921443939209, 0.253848135471344]}, "gradients/encoder.encoder.layers.14.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 2.0, 0.0, 0.0, 3.0, 5.0, 8.0, 8.0, 9.0, 10.0, 13.0, 16.0, 18.0, 18.0, 18.0, 30.0, 34.0, 34.0, 36.0, 42.0, 30.0, 40.0, 48.0, 47.0, 53.0, 47.0, 50.0, 55.0, 43.0, 42.0, 39.0, 32.0, 27.0, 16.0, 22.0, 19.0, 12.0, 24.0, 14.0, 7.0, 9.0, 11.0, 5.0, 9.0, 3.0, 1.0, 4.0, 4.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.07878965139389038, -0.0760628879070282, -0.07333612442016602, -0.07060935348272324, -0.06788258999586105, -0.06515582650899887, -0.06242905929684639, -0.05970229208469391, -0.056975528597831726, -0.05424876511096954, -0.05152199789881706, -0.04879523068666458, -0.0460684671998024, -0.043341703712940216, -0.040614936500787735, -0.037888169288635254, -0.03516140580177307, -0.03243464231491089, -0.029707875102758408, -0.026981109753251076, -0.024254344403743744, -0.021527579054236412, -0.01880081370472908, -0.01607404835522175, -0.013347283005714417, -0.010620517656207085, -0.007893752306699753, -0.005166986957192421, -0.002440221607685089, 0.00028654374182224274, 0.0030133090913295746, 0.005740074440836906, 0.008466839790344238, 0.01119360513985157, 0.013920370489358902, 0.016647135838866234, 0.019373901188373566, 0.022100666537880898, 0.02482743188738823, 0.02755419723689556, 0.030280962586402893, 0.033007726073265076, 0.03573449328541756, 0.03846126049757004, 0.04118802398443222, 0.0439147874712944, 0.046641554683446884, 0.049368321895599365, 0.05209508538246155, 0.05482184886932373, 0.05754861608147621, 0.06027538329362869, 0.06300214678049088, 0.06572891026735306, 0.06845568120479584, 0.07118244469165802, 0.0739092081785202, 0.07663597166538239, 0.07936273515224457, 0.08208950608968735, 0.08481626957654953, 0.08754303306341171, 0.09026980400085449, 0.09299656748771667, 0.09572333097457886]}, "gradients/encoder.encoder.layers.13.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 3.0, 0.0, 0.0, 0.0, 1.0, 3.0, 0.0, 2.0, 0.0, 0.0, 0.0, 3.0, 1.0, 0.0, 2.0, 5.0, 3.0, 3.0, 8.0, 16.0, 32.0, 50.0, 121.0, 218.0, 498.0, 1306.0, 3994.0, 4165003.0, 18324.0, 2910.0, 1024.0, 395.0, 166.0, 85.0, 43.0, 27.0, 11.0, 15.0, 10.0, 6.0, 1.0, 2.0, 2.0, 2.0, 2.0], "bins": [-0.0994873046875, -0.09736394882202148, -0.09524059295654297, -0.09311723709106445, -0.09099388122558594, -0.08887052536010742, -0.0867471694946289, -0.08462381362915039, -0.08250045776367188, -0.08037710189819336, -0.07825374603271484, -0.07613039016723633, -0.07400703430175781, -0.0718836784362793, -0.06976032257080078, -0.06763696670532227, -0.06551361083984375, -0.06339025497436523, -0.06126689910888672, -0.0591435432434082, -0.05702018737792969, -0.05489683151245117, -0.052773475646972656, -0.05065011978149414, -0.048526763916015625, -0.04640340805053711, -0.044280052185058594, -0.04215669631958008, -0.04003334045410156, -0.03790998458862305, -0.03578662872314453, -0.033663272857666016, -0.0315399169921875, -0.029416561126708984, -0.02729320526123047, -0.025169849395751953, -0.023046493530273438, -0.020923137664794922, -0.018799781799316406, -0.01667642593383789, -0.014553070068359375, -0.01242971420288086, -0.010306358337402344, -0.008183002471923828, -0.0060596466064453125, -0.003936290740966797, -0.0018129348754882812, 0.0003104209899902344, 0.00243377685546875, 0.004557132720947266, 0.006680488586425781, 0.008803844451904297, 0.010927200317382812, 0.013050556182861328, 0.015173912048339844, 0.01729726791381836, 0.019420623779296875, 0.02154397964477539, 0.023667335510253906, 0.025790691375732422, 0.027914047241210938, 0.030037403106689453, 0.03216075897216797, 0.034284114837646484, 0.036407470703125]}, "gradients/encoder.encoder.layers.13.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 3.0, 5.0, 8.0, 13.0, 14.0, 39.0, 45.0, 93.0, 139.0, 168.0, 152.0, 136.0, 76.0, 50.0, 38.0, 20.0, 9.0, 3.0, 0.0, 2.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 2.0], "bins": [-0.0161285400390625, -0.015784502029418945, -0.01544046401977539, -0.015096426010131836, -0.014752388000488281, -0.014408349990844727, -0.014064311981201172, -0.013720273971557617, -0.013376235961914062, -0.013032197952270508, -0.012688159942626953, -0.012344121932983398, -0.012000083923339844, -0.011656045913696289, -0.011312007904052734, -0.01096796989440918, -0.010623931884765625, -0.01027989387512207, -0.009935855865478516, -0.009591817855834961, -0.009247779846191406, -0.008903741836547852, -0.008559703826904297, -0.008215665817260742, -0.007871627807617188, -0.007527589797973633, -0.007183551788330078, -0.0068395137786865234, -0.006495475769042969, -0.006151437759399414, -0.005807399749755859, -0.005463361740112305, -0.00511932373046875, -0.004775285720825195, -0.004431247711181641, -0.004087209701538086, -0.0037431716918945312, -0.0033991336822509766, -0.003055095672607422, -0.002711057662963867, -0.0023670196533203125, -0.002022981643676758, -0.0016789436340332031, -0.0013349056243896484, -0.0009908676147460938, -0.0006468296051025391, -0.0003027915954589844, 4.124641418457031e-05, 0.000385284423828125, 0.0007293224334716797, 0.0010733604431152344, 0.001417398452758789, 0.0017614364624023438, 0.0021054744720458984, 0.002449512481689453, 0.002793550491333008, 0.0031375885009765625, 0.003481626510620117, 0.003825664520263672, 0.0041697025299072266, 0.004513740539550781, 0.004857778549194336, 0.005201816558837891, 0.005545854568481445, 0.005889892578125]}, "gradients/encoder.encoder.layers.13.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [3.0, 1.0, 3.0, 3.0, 6.0, 9.0, 15.0, 18.0, 23.0, 29.0, 36.0, 41.0, 52.0, 61.0, 91.0, 100.0, 117.0, 182.0, 217.0, 250.0, 356.0, 429.0, 554.0, 786.0, 931.0, 1284.0, 1712.0, 2466.0, 4002.0, 7339.0, 21642.0, 4061054.0, 62206.0, 11324.0, 5289.0, 3118.0, 2091.0, 1504.0, 1123.0, 858.0, 646.0, 507.0, 387.0, 305.0, 244.0, 184.0, 149.0, 133.0, 83.0, 61.0, 74.0, 56.0, 33.0, 31.0, 23.0, 25.0, 13.0, 5.0, 6.0, 7.0, 3.0, 2.0, 1.0, 1.0], "bins": [-0.01861572265625, -0.018029212951660156, -0.017442703247070312, -0.01685619354248047, -0.016269683837890625, -0.01568317413330078, -0.015096664428710938, -0.014510154724121094, -0.01392364501953125, -0.013337135314941406, -0.012750625610351562, -0.012164115905761719, -0.011577606201171875, -0.010991096496582031, -0.010404586791992188, -0.009818077087402344, -0.0092315673828125, -0.008645057678222656, -0.008058547973632812, -0.007472038269042969, -0.006885528564453125, -0.006299018859863281, -0.0057125091552734375, -0.005125999450683594, -0.00453948974609375, -0.003952980041503906, -0.0033664703369140625, -0.0027799606323242188, -0.002193450927734375, -0.0016069412231445312, -0.0010204315185546875, -0.00043392181396484375, 0.000152587890625, 0.0007390975952148438, 0.0013256072998046875, 0.0019121170043945312, 0.002498626708984375, 0.0030851364135742188, 0.0036716461181640625, 0.004258155822753906, 0.00484466552734375, 0.005431175231933594, 0.0060176849365234375, 0.006604194641113281, 0.007190704345703125, 0.007777214050292969, 0.008363723754882812, 0.008950233459472656, 0.0095367431640625, 0.010123252868652344, 0.010709762573242188, 0.011296272277832031, 0.011882781982421875, 0.012469291687011719, 0.013055801391601562, 0.013642311096191406, 0.01422882080078125, 0.014815330505371094, 0.015401840209960938, 0.01598834991455078, 0.016574859619140625, 0.01716136932373047, 0.017747879028320312, 0.018334388732910156, 0.0189208984375]}, "gradients/encoder.encoder.layers.13.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 2.0, 1.0, 1.0, 3.0, 4.0, 4.0, 6.0, 10.0, 16.0, 74.0, 3765.0, 101.0, 31.0, 14.0, 10.0, 7.0, 4.0, 5.0, 1.0, 1.0, 2.0, 3.0, 0.0, 3.0, 2.0, 1.0, 2.0, 2.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.0084228515625, -0.008157014846801758, -0.007891178131103516, -0.0076253414154052734, -0.007359504699707031, -0.007093667984008789, -0.006827831268310547, -0.006561994552612305, -0.0062961578369140625, -0.00603032112121582, -0.005764484405517578, -0.005498647689819336, -0.005232810974121094, -0.0049669742584228516, -0.004701137542724609, -0.004435300827026367, -0.004169464111328125, -0.003903627395629883, -0.0036377906799316406, -0.0033719539642333984, -0.0031061172485351562, -0.002840280532836914, -0.002574443817138672, -0.0023086071014404297, -0.0020427703857421875, -0.0017769336700439453, -0.0015110969543457031, -0.001245260238647461, -0.0009794235229492188, -0.0007135868072509766, -0.0004477500915527344, -0.0001819133758544922, 8.392333984375e-05, 0.0003497600555419922, 0.0006155967712402344, 0.0008814334869384766, 0.0011472702026367188, 0.001413106918334961, 0.0016789436340332031, 0.0019447803497314453, 0.0022106170654296875, 0.0024764537811279297, 0.002742290496826172, 0.003008127212524414, 0.0032739639282226562, 0.0035398006439208984, 0.0038056373596191406, 0.004071474075317383, 0.004337310791015625, 0.004603147506713867, 0.004868984222412109, 0.0051348209381103516, 0.005400657653808594, 0.005666494369506836, 0.005932331085205078, 0.00619816780090332, 0.0064640045166015625, 0.006729841232299805, 0.006995677947998047, 0.007261514663696289, 0.007527351379394531, 0.0077931880950927734, 0.008059024810791016, 0.008324861526489258, 0.0085906982421875]}, "gradients/encoder.encoder.layers.13.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 4.0, 1.0, 6.0, 5.0, 5.0, 6.0, 9.0, 15.0, 20.0, 41.0, 56.0, 118.0, 309.0, 183.0, 93.0, 54.0, 28.0, 14.0, 9.0, 13.0, 6.0, 5.0, 5.0, 5.0, 2.0, 2.0, 2.0, 0.0, 0.0, 2.0], "bins": [-0.06678162515163422, -0.06535442918539047, -0.06392724066972733, -0.06250004470348358, -0.061072852462530136, -0.05964566022157669, -0.05821846425533295, -0.0567912720143795, -0.055364079773426056, -0.05393688753247261, -0.052509695291519165, -0.05108249932527542, -0.049655307084321976, -0.04822811484336853, -0.046800918877124786, -0.04537372663617134, -0.043946534395217896, -0.04251934215426445, -0.041092149913311005, -0.03966495394706726, -0.038237761706113815, -0.03681056946516037, -0.035383373498916626, -0.03395618125796318, -0.032528989017009735, -0.03110179677605629, -0.029674602672457695, -0.0282474085688591, -0.026820216327905655, -0.02539302408695221, -0.023965829983353615, -0.02253863587975502, -0.021111439913511276, -0.01968424767255783, -0.018257053568959236, -0.01682985946536064, -0.015402667224407196, -0.013975474052131176, -0.012548280879855156, -0.011121087707579136, -0.009693894535303116, -0.008266701363027096, -0.006839508190751076, -0.005412315018475056, -0.003985121846199036, -0.0025579286739230156, -0.0011307355016469955, 0.0002964576706290245, 0.0017236508429050446, 0.0031508440151810646, 0.004578037187457085, 0.006005230359733105, 0.007432423532009125, 0.008859616704285145, 0.010286809876561165, 0.011714003048837185, 0.013141196221113205, 0.014568389393389225, 0.015995582565665245, 0.01742277666926384, 0.018849968910217285, 0.02027716115117073, 0.021704355254769325, 0.02313154935836792, 0.024558741599321365]}, "gradients/encoder.encoder.layers.13.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 3.0, 0.0, 1.0, 3.0, 5.0, 0.0, 6.0, 9.0, 7.0, 10.0, 8.0, 16.0, 22.0, 21.0, 24.0, 24.0, 40.0, 43.0, 25.0, 54.0, 41.0, 64.0, 48.0, 46.0, 51.0, 60.0, 46.0, 58.0, 34.0, 48.0, 28.0, 24.0, 27.0, 21.0, 19.0, 14.0, 16.0, 13.0, 8.0, 7.0, 2.0, 6.0, 4.0, 4.0, 0.0, 3.0, 3.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.018233835697174072, -0.01767660304903984, -0.01711937040090561, -0.016562137752771378, -0.016004905104637146, -0.015447672456502914, -0.014890439808368683, -0.014333207160234451, -0.01377597451210022, -0.013218741863965988, -0.012661509215831757, -0.012104276567697525, -0.011547043919563293, -0.010989811271429062, -0.01043257862329483, -0.009875345975160599, -0.009318113327026367, -0.008760880678892136, -0.008203648030757904, -0.0076464153826236725, -0.007089182734489441, -0.006531950086355209, -0.005974717438220978, -0.005417484790086746, -0.004860252141952515, -0.004303019493818283, -0.0037457868456840515, -0.00318855419754982, -0.0026313215494155884, -0.002074088901281357, -0.0015168562531471252, -0.0009596236050128937, -0.0004023909568786621, 0.00015484169125556946, 0.000712074339389801, 0.0012693069875240326, 0.0018265396356582642, 0.0023837722837924957, 0.0029410049319267273, 0.003498237580060959, 0.00405547022819519, 0.004612702876329422, 0.0051699355244636536, 0.005727168172597885, 0.006284400820732117, 0.006841633468866348, 0.00739886611700058, 0.007956098765134811, 0.008513331413269043, 0.009070564061403275, 0.009627796709537506, 0.010185029357671738, 0.01074226200580597, 0.0112994946539402, 0.011856727302074432, 0.012413959950208664, 0.012971192598342896, 0.013528425246477127, 0.014085657894611359, 0.01464289054274559, 0.015200123190879822, 0.015757355839014053, 0.016314588487148285, 0.016871821135282516, 0.017429053783416748]}, "gradients/encoder.encoder.layers.13.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 2.0, 2.0, 3.0, 4.0, 4.0, 11.0, 11.0, 4.0, 15.0, 17.0, 28.0, 35.0, 49.0, 76.0, 131.0, 380.0, 1102.0, 5249.0, 51688.0, 877786.0, 101161.0, 8422.0, 1518.0, 400.0, 172.0, 83.0, 54.0, 38.0, 27.0, 31.0, 16.0, 17.0, 6.0, 9.0, 6.0, 5.0, 3.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0848388671875, -0.08187389373779297, -0.07890892028808594, -0.0759439468383789, -0.07297897338867188, -0.07001399993896484, -0.06704902648925781, -0.06408405303955078, -0.06111907958984375, -0.05815410614013672, -0.05518913269042969, -0.052224159240722656, -0.049259185791015625, -0.046294212341308594, -0.04332923889160156, -0.04036426544189453, -0.0373992919921875, -0.03443431854248047, -0.03146934509277344, -0.028504371643066406, -0.025539398193359375, -0.022574424743652344, -0.019609451293945312, -0.01664447784423828, -0.01367950439453125, -0.010714530944824219, -0.0077495574951171875, -0.004784584045410156, -0.001819610595703125, 0.0011453628540039062, 0.0041103363037109375, 0.007075309753417969, 0.010040283203125, 0.013005256652832031, 0.015970230102539062, 0.018935203552246094, 0.021900177001953125, 0.024865150451660156, 0.027830123901367188, 0.03079509735107422, 0.03376007080078125, 0.03672504425048828, 0.03969001770019531, 0.042654991149902344, 0.045619964599609375, 0.048584938049316406, 0.05154991149902344, 0.05451488494873047, 0.0574798583984375, 0.06044483184814453, 0.06340980529785156, 0.0663747787475586, 0.06933975219726562, 0.07230472564697266, 0.07526969909667969, 0.07823467254638672, 0.08119964599609375, 0.08416461944580078, 0.08712959289550781, 0.09009456634521484, 0.09305953979492188, 0.0960245132446289, 0.09898948669433594, 0.10195446014404297, 0.10491943359375]}, "gradients/encoder.encoder.layers.13.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 2.0, 6.0, 7.0, 13.0, 17.0, 39.0, 45.0, 106.0, 138.0, 164.0, 146.0, 134.0, 78.0, 49.0, 37.0, 17.0, 8.0, 3.0, 1.0, 2.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 2.0], "bins": [-0.01605224609375, -0.015709340572357178, -0.015366435050964355, -0.015023529529571533, -0.014680624008178711, -0.014337718486785889, -0.013994812965393066, -0.013651907444000244, -0.013309001922607422, -0.0129660964012146, -0.012623190879821777, -0.012280285358428955, -0.011937379837036133, -0.01159447431564331, -0.011251568794250488, -0.010908663272857666, -0.010565757751464844, -0.010222852230072021, -0.0098799467086792, -0.009537041187286377, -0.009194135665893555, -0.008851230144500732, -0.00850832462310791, -0.008165419101715088, -0.007822513580322266, -0.007479608058929443, -0.007136702537536621, -0.006793797016143799, -0.0064508914947509766, -0.006107985973358154, -0.005765080451965332, -0.00542217493057251, -0.0050792694091796875, -0.004736363887786865, -0.004393458366394043, -0.004050552845001221, -0.0037076473236083984, -0.003364741802215576, -0.003021836280822754, -0.0026789307594299316, -0.0023360252380371094, -0.001993119716644287, -0.0016502141952514648, -0.0013073086738586426, -0.0009644031524658203, -0.000621497631072998, -0.0002785921096801758, 6.431341171264648e-05, 0.00040721893310546875, 0.000750124454498291, 0.0010930299758911133, 0.0014359354972839355, 0.0017788410186767578, 0.00212174654006958, 0.0024646520614624023, 0.0028075575828552246, 0.003150463104248047, 0.003493368625640869, 0.0038362741470336914, 0.004179179668426514, 0.004522085189819336, 0.004864990711212158, 0.0052078962326049805, 0.005550801753997803, 0.005893707275390625]}, "gradients/encoder.encoder.layers.13.attention.v_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 2.0, 0.0, 5.0, 3.0, 6.0, 2.0, 9.0, 22.0, 26.0, 48.0, 41.0, 67.0, 120.0, 163.0, 243.0, 379.0, 614.0, 912.0, 1531.0, 2450.0, 3927.0, 6747.0, 11417.0, 19397.0, 34899.0, 65991.0, 138496.0, 416367.0, 172730.0, 77180.0, 40269.0, 22282.0, 12963.0, 7487.0, 4582.0, 2651.0, 1656.0, 991.0, 683.0, 401.0, 257.0, 180.0, 115.0, 77.0, 57.0, 34.0, 24.0, 16.0, 14.0, 9.0, 9.0, 5.0, 4.0, 2.0, 0.0, 4.0, 0.0, 2.0, 0.0, 0.0, 4.0, 2.0], "bins": [-0.01629638671875, -0.015746116638183594, -0.015195846557617188, -0.014645576477050781, -0.014095306396484375, -0.013545036315917969, -0.012994766235351562, -0.012444496154785156, -0.01189422607421875, -0.011343955993652344, -0.010793685913085938, -0.010243415832519531, -0.009693145751953125, -0.009142875671386719, -0.008592605590820312, -0.008042335510253906, -0.0074920654296875, -0.006941795349121094, -0.0063915252685546875, -0.005841255187988281, -0.005290985107421875, -0.004740715026855469, -0.0041904449462890625, -0.0036401748657226562, -0.00308990478515625, -0.0025396347045898438, -0.0019893646240234375, -0.0014390945434570312, -0.000888824462890625, -0.00033855438232421875, 0.0002117156982421875, 0.0007619857788085938, 0.001312255859375, 0.0018625259399414062, 0.0024127960205078125, 0.0029630661010742188, 0.003513336181640625, 0.004063606262207031, 0.0046138763427734375, 0.005164146423339844, 0.00571441650390625, 0.006264686584472656, 0.0068149566650390625, 0.007365226745605469, 0.007915496826171875, 0.008465766906738281, 0.009016036987304688, 0.009566307067871094, 0.0101165771484375, 0.010666847229003906, 0.011217117309570312, 0.011767387390136719, 0.012317657470703125, 0.012867927551269531, 0.013418197631835938, 0.013968467712402344, 0.01451873779296875, 0.015069007873535156, 0.015619277954101562, 0.01616954803466797, 0.016719818115234375, 0.01727008819580078, 0.017820358276367188, 0.018370628356933594, 0.0189208984375]}, "gradients/encoder.encoder.layers.13.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 1.0, 2.0, 3.0, 3.0, 3.0, 5.0, 4.0, 3.0, 4.0, 13.0, 7.0, 13.0, 12.0, 13.0, 29.0, 22.0, 16.0, 26.0, 20.0, 28.0, 47.0, 35.0, 37.0, 46.0, 33.0, 45.0, 47.0, 50.0, 42.0, 38.0, 37.0, 40.0, 47.0, 30.0, 29.0, 31.0, 25.0, 29.0, 19.0, 15.0, 11.0, 16.0, 8.0, 6.0, 8.0, 7.0, 6.0, 1.0, 0.0, 2.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.0185546875, -0.018015384674072266, -0.01747608184814453, -0.016936779022216797, -0.016397476196289062, -0.015858173370361328, -0.015318870544433594, -0.01477956771850586, -0.014240264892578125, -0.01370096206665039, -0.013161659240722656, -0.012622356414794922, -0.012083053588867188, -0.011543750762939453, -0.011004447937011719, -0.010465145111083984, -0.00992584228515625, -0.009386539459228516, -0.008847236633300781, -0.008307933807373047, -0.0077686309814453125, -0.007229328155517578, -0.006690025329589844, -0.006150722503662109, -0.005611419677734375, -0.005072116851806641, -0.004532814025878906, -0.003993511199951172, -0.0034542083740234375, -0.002914905548095703, -0.0023756027221679688, -0.0018362998962402344, -0.0012969970703125, -0.0007576942443847656, -0.00021839141845703125, 0.0003209114074707031, 0.0008602142333984375, 0.0013995170593261719, 0.0019388198852539062, 0.0024781227111816406, 0.003017425537109375, 0.0035567283630371094, 0.004096031188964844, 0.004635334014892578, 0.0051746368408203125, 0.005713939666748047, 0.006253242492675781, 0.006792545318603516, 0.00733184814453125, 0.007871150970458984, 0.008410453796386719, 0.008949756622314453, 0.009489059448242188, 0.010028362274169922, 0.010567665100097656, 0.01110696792602539, 0.011646270751953125, 0.01218557357788086, 0.012724876403808594, 0.013264179229736328, 0.013803482055664062, 0.014342784881591797, 0.014882087707519531, 0.015421390533447266, 0.015960693359375]}, "gradients/encoder.encoder.layers.13.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 4.0, 3.0, 8.0, 4.0, 6.0, 3.0, 3.0, 1.0, 10.0, 16.0, 20.0, 33.0, 36.0, 79.0, 97.0, 187.0, 295.0, 526.0, 990.0, 1949.0, 4529.0, 11483.0, 39247.0, 303514.0, 611247.0, 50348.0, 13924.0, 5175.0, 2237.0, 1100.0, 595.0, 342.0, 207.0, 127.0, 87.0, 33.0, 26.0, 22.0, 21.0, 13.0, 6.0, 5.0, 2.0, 1.0, 0.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 3.0, 0.0, 1.0, 1.0, 0.0, 3.0], "bins": [-0.0002655982971191406, -0.0002567693591117859, -0.00024794042110443115, -0.00023911148309707642, -0.00023028254508972168, -0.00022145360708236694, -0.0002126246690750122, -0.00020379573106765747, -0.00019496679306030273, -0.000186137855052948, -0.00017730891704559326, -0.00016847997903823853, -0.0001596510410308838, -0.00015082210302352905, -0.00014199316501617432, -0.00013316422700881958, -0.00012433528900146484, -0.00011550635099411011, -0.00010667741298675537, -9.784847497940063e-05, -8.90195369720459e-05, -8.019059896469116e-05, -7.136166095733643e-05, -6.253272294998169e-05, -5.370378494262695e-05, -4.487484693527222e-05, -3.604590892791748e-05, -2.7216970920562744e-05, -1.8388032913208008e-05, -9.559094905853271e-06, -7.301568984985352e-07, 8.098781108856201e-06, 1.6927719116210938e-05, 2.5756657123565674e-05, 3.458559513092041e-05, 4.3414533138275146e-05, 5.224347114562988e-05, 6.107240915298462e-05, 6.990134716033936e-05, 7.873028516769409e-05, 8.755922317504883e-05, 9.638816118240356e-05, 0.0001052170991897583, 0.00011404603719711304, 0.00012287497520446777, 0.0001317039132118225, 0.00014053285121917725, 0.00014936178922653198, 0.00015819072723388672, 0.00016701966524124146, 0.0001758486032485962, 0.00018467754125595093, 0.00019350647926330566, 0.0002023354172706604, 0.00021116435527801514, 0.00021999329328536987, 0.0002288222312927246, 0.00023765116930007935, 0.0002464801073074341, 0.0002553090453147888, 0.00026413798332214355, 0.0002729669213294983, 0.00028179585933685303, 0.00029062479734420776, 0.0002994537353515625]}, "gradients/encoder.encoder.layers.13.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 3.0, 2.0, 2.0, 0.0, 3.0, 1.0, 3.0, 4.0, 7.0, 4.0, 5.0, 9.0, 7.0, 18.0, 28.0, 27.0, 24.0, 35.0, 49.0, 55.0, 94.0, 118.0, 119.0, 101.0, 78.0, 54.0, 26.0, 33.0, 28.0, 10.0, 17.0, 9.0, 8.0, 7.0, 4.0, 3.0, 2.0, 2.0, 4.0, 3.0, 2.0, 2.0, 2.0, 3.0, 0.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.1696090698242188e-05, -2.103857696056366e-05, -2.0381063222885132e-05, -1.9723549485206604e-05, -1.9066035747528076e-05, -1.840852200984955e-05, -1.775100827217102e-05, -1.7093494534492493e-05, -1.6435980796813965e-05, -1.5778467059135437e-05, -1.512095332145691e-05, -1.4463439583778381e-05, -1.3805925846099854e-05, -1.3148412108421326e-05, -1.2490898370742798e-05, -1.183338463306427e-05, -1.1175870895385742e-05, -1.0518357157707214e-05, -9.860843420028687e-06, -9.203329682350159e-06, -8.545815944671631e-06, -7.888302206993103e-06, -7.230788469314575e-06, -6.573274731636047e-06, -5.9157609939575195e-06, -5.258247256278992e-06, -4.600733518600464e-06, -3.943219780921936e-06, -3.285706043243408e-06, -2.6281923055648804e-06, -1.9706785678863525e-06, -1.3131648302078247e-06, -6.556510925292969e-07, 1.862645149230957e-09, 6.593763828277588e-07, 1.3168901205062866e-06, 1.9744038581848145e-06, 2.6319175958633423e-06, 3.28943133354187e-06, 3.946945071220398e-06, 4.604458808898926e-06, 5.261972546577454e-06, 5.9194862842559814e-06, 6.577000021934509e-06, 7.234513759613037e-06, 7.892027497291565e-06, 8.549541234970093e-06, 9.20705497264862e-06, 9.864568710327148e-06, 1.0522082448005676e-05, 1.1179596185684204e-05, 1.1837109923362732e-05, 1.249462366104126e-05, 1.3152137398719788e-05, 1.3809651136398315e-05, 1.4467164874076843e-05, 1.5124678611755371e-05, 1.57821923494339e-05, 1.6439706087112427e-05, 1.7097219824790955e-05, 1.7754733562469482e-05, 1.841224730014801e-05, 1.9069761037826538e-05, 1.9727274775505066e-05, 2.0384788513183594e-05]}, "gradients/encoder.encoder.layers.13.attention.q_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 3.0, 3.0, 4.0, 4.0, 8.0, 11.0, 14.0, 21.0, 16.0, 30.0, 35.0, 52.0, 88.0, 159.0, 347.0, 647.0, 1227.0, 2528.0, 5457.0, 12950.0, 36790.0, 175159.0, 692687.0, 80868.0, 22749.0, 8829.0, 3893.0, 1937.0, 951.0, 464.0, 246.0, 149.0, 72.0, 46.0, 37.0, 22.0, 11.0, 13.0, 13.0, 8.0, 4.0, 3.0, 8.0, 2.0, 2.0, 1.0, 1.0, 0.0, 2.0], "bins": [-0.00029730796813964844, -0.0002893339842557907, -0.000281360000371933, -0.00027338601648807526, -0.00026541203260421753, -0.0002574380487203598, -0.0002494640648365021, -0.00024149008095264435, -0.00023351609706878662, -0.0002255421131849289, -0.00021756812930107117, -0.00020959414541721344, -0.0002016201615333557, -0.00019364617764949799, -0.00018567219376564026, -0.00017769820988178253, -0.0001697242259979248, -0.00016175024211406708, -0.00015377625823020935, -0.00014580227434635162, -0.0001378282904624939, -0.00012985430657863617, -0.00012188032269477844, -0.00011390633881092072, -0.00010593235492706299, -9.795837104320526e-05, -8.998438715934753e-05, -8.201040327548981e-05, -7.403641939163208e-05, -6.606243550777435e-05, -5.8088451623916626e-05, -5.01144677400589e-05, -4.214048385620117e-05, -3.4166499972343445e-05, -2.6192516088485718e-05, -1.821853220462799e-05, -1.0244548320770264e-05, -2.2705644369125366e-06, 5.7034194469451904e-06, 1.3677403330802917e-05, 2.1651387214660645e-05, 2.962537109851837e-05, 3.75993549823761e-05, 4.5573338866233826e-05, 5.354732275009155e-05, 6.152130663394928e-05, 6.949529051780701e-05, 7.746927440166473e-05, 8.544325828552246e-05, 9.341724216938019e-05, 0.00010139122605323792, 0.00010936520993709564, 0.00011733919382095337, 0.0001253131777048111, 0.00013328716158866882, 0.00014126114547252655, 0.00014923512935638428, 0.000157209113240242, 0.00016518309712409973, 0.00017315708100795746, 0.00018113106489181519, 0.0001891050487756729, 0.00019707903265953064, 0.00020505301654338837, 0.0002130270004272461]}, "gradients/encoder.encoder.layers.13.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 3.0, 2.0, 4.0, 3.0, 5.0, 10.0, 7.0, 6.0, 16.0, 19.0, 23.0, 40.0, 44.0, 70.0, 93.0, 173.0, 109.0, 106.0, 77.0, 59.0, 26.0, 36.0, 14.0, 14.0, 14.0, 13.0, 5.0, 5.0, 4.0, 2.0, 6.0, 3.0, 5.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00015163421630859375, -0.00014471635222434998, -0.0001377984881401062, -0.00013088062405586243, -0.00012396275997161865, -0.00011704489588737488, -0.0001101270318031311, -0.00010320916771888733, -9.629130363464355e-05, -8.937343955039978e-05, -8.2455575466156e-05, -7.553771138191223e-05, -6.861984729766846e-05, -6.170198321342468e-05, -5.478411912918091e-05, -4.7866255044937134e-05, -4.094839096069336e-05, -3.4030526876449585e-05, -2.711266279220581e-05, -2.0194798707962036e-05, -1.3276934623718262e-05, -6.359070539474487e-06, 5.587935447692871e-07, 7.4766576290130615e-06, 1.4394521713256836e-05, 2.131238579750061e-05, 2.8230249881744385e-05, 3.514811396598816e-05, 4.2065978050231934e-05, 4.898384213447571e-05, 5.590170621871948e-05, 6.281957030296326e-05, 6.973743438720703e-05, 7.66552984714508e-05, 8.357316255569458e-05, 9.049102663993835e-05, 9.740889072418213e-05, 0.0001043267548084259, 0.00011124461889266968, 0.00011816248297691345, 0.00012508034706115723, 0.000131998211145401, 0.00013891607522964478, 0.00014583393931388855, 0.00015275180339813232, 0.0001596696674823761, 0.00016658753156661987, 0.00017350539565086365, 0.00018042325973510742, 0.0001873411238193512, 0.00019425898790359497, 0.00020117685198783875, 0.00020809471607208252, 0.0002150125801563263, 0.00022193044424057007, 0.00022884830832481384, 0.00023576617240905762, 0.0002426840364933014, 0.00024960190057754517, 0.00025651976466178894, 0.0002634376287460327, 0.0002703554928302765, 0.00027727335691452026, 0.00028419122099876404, 0.0002911090850830078]}, "gradients/encoder.encoder.layers.13.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 3.0, 4.0, 10.0, 5.0, 12.0, 15.0, 22.0, 39.0, 73.0, 151.0, 293.0, 122.0, 76.0, 46.0, 34.0, 31.0, 21.0, 18.0, 11.0, 10.0, 6.0, 2.0, 4.0, 1.0, 4.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.14017176628112793, -0.1336183100938797, -0.12706485390663147, -0.12051139026880264, -0.11395792663097382, -0.10740447044372559, -0.10085101425647736, -0.09429755806922913, -0.0877440944314003, -0.08119063824415207, -0.07463717460632324, -0.06808371841907501, -0.061530258506536484, -0.054976798593997955, -0.048423342406749725, -0.0418698824942112, -0.03531642258167267, -0.02876296266913414, -0.02220950461924076, -0.01565604656934738, -0.009102586656808853, -0.0025491267442703247, 0.004004329442977905, 0.010557789355516434, 0.017111249268054962, 0.02366470918059349, 0.03021816723048687, 0.03677162528038025, 0.04332508519291878, 0.049878545105457306, 0.056432001292705536, 0.06298546493053436, 0.06953892111778259, 0.07609237730503082, 0.08264584094285965, 0.08919929713010788, 0.0957527607679367, 0.10230621695518494, 0.10885967314243317, 0.1154131293296814, 0.12196659296751022, 0.12852005660533905, 0.13507351279258728, 0.1416269689798355, 0.14818042516708374, 0.15473389625549316, 0.1612873375415802, 0.16784080862998962, 0.17439426481723785, 0.18094772100448608, 0.18750117719173431, 0.19405463337898254, 0.20060810446739197, 0.2071615606546402, 0.21371501684188843, 0.22026847302913666, 0.2268219292163849, 0.23337538540363312, 0.23992884159088135, 0.24648231267929077, 0.2530357539653778, 0.25958922505378723, 0.26614266633987427, 0.2726961374282837, 0.2792496085166931]}, "gradients/encoder.encoder.layers.13.layer_norm.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 3.0, 2.0, 2.0, 3.0, 9.0, 2.0, 6.0, 10.0, 7.0, 16.0, 24.0, 25.0, 32.0, 30.0, 38.0, 45.0, 48.0, 50.0, 69.0, 63.0, 65.0, 66.0, 59.0, 53.0, 45.0, 42.0, 36.0, 33.0, 25.0, 19.0, 20.0, 13.0, 13.0, 11.0, 11.0, 5.0, 4.0, 1.0, 4.0, 4.0, 1.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.09801137447357178, -0.09446129202842712, -0.09091120958328247, -0.08736112713813782, -0.08381104469299316, -0.08026096224784851, -0.07671087235212326, -0.07316078990697861, -0.06961070746183395, -0.0660606250166893, -0.06251054257154465, -0.058960456401109695, -0.05541037395596504, -0.05186029151082039, -0.04831020534038544, -0.044760122895240784, -0.04121004045009613, -0.03765995800495148, -0.034109875559806824, -0.030559789389371872, -0.02700970694422722, -0.023459624499082565, -0.019909540191292763, -0.01635945588350296, -0.012809373438358307, -0.009259290061891079, -0.005709206685423851, -0.002159123308956623, 0.0013909600675106049, 0.004941042512655258, 0.00849112682044506, 0.012041211128234863, 0.015591293573379517, 0.01914137601852417, 0.022691460326313972, 0.026241544634103775, 0.02979162707924843, 0.03334170952439308, 0.03689179569482803, 0.04044187813997269, 0.04399196058511734, 0.04754204303026199, 0.05109212547540665, 0.0546422116458416, 0.05819229409098625, 0.061742376536130905, 0.06529246270656586, 0.06884254515171051, 0.07239262759685516, 0.07594271004199982, 0.07949279248714447, 0.08304287493228912, 0.08659295737743378, 0.09014303982257843, 0.09369312971830368, 0.09724321216344833, 0.10079329460859299, 0.10434337705373764, 0.1078934594988823, 0.11144354194402695, 0.1149936318397522, 0.11854371428489685, 0.1220937967300415, 0.12564387917518616, 0.1291939616203308]}, "gradients/encoder.encoder.layers.12.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 3.0, 2.0, 4.0, 4.0, 2.0, 10.0, 22.0, 32.0, 53.0, 122.0, 272.0, 648.0, 2028.0, 13411.0, 4172109.0, 3808.0, 1018.0, 407.0, 156.0, 79.0, 39.0, 17.0, 16.0, 8.0, 7.0, 5.0, 2.0, 1.0, 3.0, 1.0, 0.0, 2.0], "bins": [-0.095458984375, -0.09339284896850586, -0.09132671356201172, -0.08926057815551758, -0.08719444274902344, -0.0851283073425293, -0.08306217193603516, -0.08099603652954102, -0.07892990112304688, -0.07686376571655273, -0.0747976303100586, -0.07273149490356445, -0.07066535949707031, -0.06859922409057617, -0.06653308868408203, -0.06446695327758789, -0.06240081787109375, -0.06033468246459961, -0.05826854705810547, -0.05620241165161133, -0.05413627624511719, -0.05207014083862305, -0.050004005432128906, -0.047937870025634766, -0.045871734619140625, -0.043805599212646484, -0.041739463806152344, -0.0396733283996582, -0.03760719299316406, -0.03554105758666992, -0.03347492218017578, -0.03140878677368164, -0.0293426513671875, -0.02727651596069336, -0.02521038055419922, -0.023144245147705078, -0.021078109741210938, -0.019011974334716797, -0.016945838928222656, -0.014879703521728516, -0.012813568115234375, -0.010747432708740234, -0.008681297302246094, -0.006615161895751953, -0.0045490264892578125, -0.002482891082763672, -0.00041675567626953125, 0.0016493797302246094, 0.00371551513671875, 0.005781650543212891, 0.007847785949707031, 0.009913921356201172, 0.011980056762695312, 0.014046192169189453, 0.016112327575683594, 0.018178462982177734, 0.020244598388671875, 0.022310733795166016, 0.024376869201660156, 0.026443004608154297, 0.028509140014648438, 0.030575275421142578, 0.03264141082763672, 0.03470754623413086, 0.036773681640625]}, "gradients/encoder.encoder.layers.12.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 2.0, 4.0, 9.0, 13.0, 16.0, 35.0, 49.0, 92.0, 135.0, 165.0, 136.0, 146.0, 81.0, 53.0, 43.0, 17.0, 8.0, 5.0, 2.0, 1.0, 3.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0], "bins": [-0.0153350830078125, -0.015001475811004639, -0.014667868614196777, -0.014334261417388916, -0.014000654220581055, -0.013667047023773193, -0.013333439826965332, -0.01299983263015747, -0.01266622543334961, -0.012332618236541748, -0.011999011039733887, -0.011665403842926025, -0.011331796646118164, -0.010998189449310303, -0.010664582252502441, -0.01033097505569458, -0.009997367858886719, -0.009663760662078857, -0.009330153465270996, -0.008996546268463135, -0.008662939071655273, -0.008329331874847412, -0.00799572467803955, -0.0076621174812316895, -0.007328510284423828, -0.006994903087615967, -0.0066612958908081055, -0.006327688694000244, -0.005994081497192383, -0.0056604743003845215, -0.00532686710357666, -0.004993259906768799, -0.0046596527099609375, -0.004326045513153076, -0.003992438316345215, -0.0036588311195373535, -0.003325223922729492, -0.002991616725921631, -0.0026580095291137695, -0.002324402332305908, -0.001990795135498047, -0.0016571879386901855, -0.0013235807418823242, -0.0009899735450744629, -0.0006563663482666016, -0.00032275915145874023, 1.0848045349121094e-05, 0.0003444552421569824, 0.0006780624389648438, 0.001011669635772705, 0.0013452768325805664, 0.0016788840293884277, 0.002012491226196289, 0.0023460984230041504, 0.0026797056198120117, 0.003013312816619873, 0.0033469200134277344, 0.0036805272102355957, 0.004014134407043457, 0.004347741603851318, 0.00468134880065918, 0.005014955997467041, 0.005348563194274902, 0.005682170391082764, 0.006015777587890625]}, "gradients/encoder.encoder.layers.12.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 1.0, 4.0, 6.0, 3.0, 6.0, 4.0, 8.0, 11.0, 11.0, 9.0, 13.0, 16.0, 27.0, 28.0, 37.0, 45.0, 70.0, 162.0, 269.0, 570.0, 1055.0, 2129.0, 5006.0, 20478.0, 4140119.0, 15628.0, 4400.0, 2005.0, 978.0, 487.0, 249.0, 147.0, 79.0, 58.0, 43.0, 20.0, 22.0, 11.0, 17.0, 7.0, 17.0, 9.0, 5.0, 1.0, 6.0, 2.0, 7.0, 3.0, 1.0, 4.0, 1.0, 3.0, 2.0], "bins": [-0.043853759765625, -0.04261636734008789, -0.04137897491455078, -0.04014158248901367, -0.03890419006347656, -0.03766679763793945, -0.036429405212402344, -0.035192012786865234, -0.033954620361328125, -0.032717227935791016, -0.031479835510253906, -0.030242443084716797, -0.029005050659179688, -0.027767658233642578, -0.02653026580810547, -0.02529287338256836, -0.02405548095703125, -0.02281808853149414, -0.02158069610595703, -0.020343303680419922, -0.019105911254882812, -0.017868518829345703, -0.016631126403808594, -0.015393733978271484, -0.014156341552734375, -0.012918949127197266, -0.011681556701660156, -0.010444164276123047, -0.009206771850585938, -0.007969379425048828, -0.006731986999511719, -0.005494594573974609, -0.0042572021484375, -0.0030198097229003906, -0.0017824172973632812, -0.0005450248718261719, 0.0006923675537109375, 0.0019297599792480469, 0.0031671524047851562, 0.004404544830322266, 0.005641937255859375, 0.006879329681396484, 0.008116722106933594, 0.009354114532470703, 0.010591506958007812, 0.011828899383544922, 0.013066291809082031, 0.01430368423461914, 0.01554107666015625, 0.01677846908569336, 0.01801586151123047, 0.019253253936767578, 0.020490646362304688, 0.021728038787841797, 0.022965431213378906, 0.024202823638916016, 0.025440216064453125, 0.026677608489990234, 0.027915000915527344, 0.029152393341064453, 0.030389785766601562, 0.03162717819213867, 0.03286457061767578, 0.03410196304321289, 0.03533935546875]}, "gradients/encoder.encoder.layers.12.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 3.0, 2.0, 4.0, 2.0, 2.0, 3.0, 3.0, 10.0, 14.0, 85.0, 3840.0, 78.0, 13.0, 7.0, 6.0, 2.0, 3.0, 2.0, 1.0, 0.0, 4.0, 1.0, 1.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00949859619140625, -0.009011626243591309, -0.008524656295776367, -0.008037686347961426, -0.007550716400146484, -0.007063746452331543, -0.0065767765045166016, -0.00608980655670166, -0.005602836608886719, -0.005115866661071777, -0.004628896713256836, -0.0041419267654418945, -0.003654956817626953, -0.0031679868698120117, -0.0026810169219970703, -0.002194046974182129, -0.0017070770263671875, -0.001220107078552246, -0.0007331371307373047, -0.0002461671829223633, 0.00024080276489257812, 0.0007277727127075195, 0.001214742660522461, 0.0017017126083374023, 0.0021886825561523438, 0.002675652503967285, 0.0031626224517822266, 0.003649592399597168, 0.004136562347412109, 0.004623532295227051, 0.005110502243041992, 0.005597472190856934, 0.006084442138671875, 0.006571412086486816, 0.007058382034301758, 0.007545351982116699, 0.00803232192993164, 0.008519291877746582, 0.009006261825561523, 0.009493231773376465, 0.009980201721191406, 0.010467171669006348, 0.010954141616821289, 0.01144111156463623, 0.011928081512451172, 0.012415051460266113, 0.012902021408081055, 0.013388991355895996, 0.013875961303710938, 0.014362931251525879, 0.01484990119934082, 0.015336871147155762, 0.015823841094970703, 0.016310811042785645, 0.016797780990600586, 0.017284750938415527, 0.01777172088623047, 0.01825869083404541, 0.01874566078186035, 0.019232630729675293, 0.019719600677490234, 0.020206570625305176, 0.020693540573120117, 0.02118051052093506, 0.02166748046875]}, "gradients/encoder.encoder.layers.12.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 2.0, 3.0, 0.0, 0.0, 3.0, 0.0, 2.0, 3.0, 6.0, 4.0, 11.0, 14.0, 19.0, 18.0, 23.0, 37.0, 66.0, 78.0, 119.0, 236.0, 112.0, 76.0, 40.0, 33.0, 20.0, 31.0, 22.0, 5.0, 4.0, 9.0, 3.0, 4.0, 3.0, 2.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 0.0, 1.0], "bins": [-0.03700931370258331, -0.03596419841051102, -0.03491908311843872, -0.03387397155165672, -0.03282885625958443, -0.03178374096751213, -0.030738625675439835, -0.029693512246012688, -0.02864839881658554, -0.027603283524513245, -0.026558170095086098, -0.0255130548030138, -0.024467941373586655, -0.02342282608151436, -0.022377710789442062, -0.021332597360014915, -0.02028748206794262, -0.019242366775870323, -0.018197253346443176, -0.01715213805437088, -0.016107024624943733, -0.015061909332871437, -0.014016794972121716, -0.012971680611371994, -0.011926566250622272, -0.010881451889872551, -0.00983633752912283, -0.008791223168373108, -0.007746108341962099, -0.0067009939812123775, -0.005655879154801369, -0.004610764794051647, -0.003565652295947075, -0.0025205379351973534, -0.0014754233416169882, -0.000430308748036623, 0.0006148056127130985, 0.00165991997346282, 0.002705034799873829, 0.0037501491606235504, 0.004795263521373272, 0.0058403778821229935, 0.006885492242872715, 0.007930606603622437, 0.008975721895694733, 0.01002083532512188, 0.011065950617194176, 0.012111064977943897, 0.013156179338693619, 0.01420129369944334, 0.015246408060193062, 0.016291523352265358, 0.017336636781692505, 0.0183817520737648, 0.019426867365837097, 0.020471980795264244, 0.02151709422469139, 0.022562209516763687, 0.023607322946190834, 0.02465243823826313, 0.025697551667690277, 0.026742666959762573, 0.02778778225183487, 0.028832895681262016, 0.029878010973334312]}, "gradients/encoder.encoder.layers.12.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 4.0, 4.0, 4.0, 3.0, 3.0, 3.0, 4.0, 7.0, 12.0, 11.0, 14.0, 18.0, 13.0, 21.0, 24.0, 18.0, 33.0, 33.0, 31.0, 25.0, 42.0, 53.0, 33.0, 40.0, 35.0, 35.0, 44.0, 32.0, 45.0, 34.0, 36.0, 38.0, 35.0, 34.0, 24.0, 22.0, 24.0, 27.0, 19.0, 13.0, 11.0, 9.0, 11.0, 9.0, 9.0, 5.0, 6.0, 2.0, 1.0, 0.0, 2.0, 1.0, 0.0, 3.0], "bins": [-0.017805039882659912, -0.017306458204984665, -0.016807876527309418, -0.01630929484963417, -0.015810713171958923, -0.015312131494283676, -0.014813549816608429, -0.014314968138933182, -0.013816386461257935, -0.013317804783582687, -0.01281922310590744, -0.012320641428232193, -0.011822059750556946, -0.011323478072881699, -0.010824896395206451, -0.010326314717531204, -0.009827733039855957, -0.00932915136218071, -0.008830569684505463, -0.008331988006830215, -0.007833406329154968, -0.007334824651479721, -0.006836242973804474, -0.006337661296129227, -0.0058390796184539795, -0.005340497940778732, -0.004841916263103485, -0.004343334585428238, -0.0038447529077529907, -0.0033461712300777435, -0.0028475895524024963, -0.002349007874727249, -0.001850426197052002, -0.0013518445193767548, -0.0008532628417015076, -0.0003546811640262604, 0.00014390051364898682, 0.000642482191324234, 0.0011410638689994812, 0.0016396455466747284, 0.0021382272243499756, 0.0026368089020252228, 0.00313539057970047, 0.003633972257375717, 0.004132553935050964, 0.0046311356127262115, 0.005129717290401459, 0.005628298968076706, 0.006126880645751953, 0.0066254623234272, 0.0071240440011024475, 0.007622625678777695, 0.008121207356452942, 0.008619789034128189, 0.009118370711803436, 0.009616952389478683, 0.01011553406715393, 0.010614115744829178, 0.011112697422504425, 0.011611279100179672, 0.01210986077785492, 0.012608442455530167, 0.013107024133205414, 0.013605605810880661, 0.014104187488555908]}, "gradients/encoder.encoder.layers.12.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 2.0, 5.0, 2.0, 3.0, 4.0, 5.0, 6.0, 13.0, 14.0, 8.0, 6.0, 12.0, 20.0, 28.0, 40.0, 31.0, 50.0, 84.0, 149.0, 293.0, 711.0, 2137.0, 8592.0, 49654.0, 704470.0, 248186.0, 26176.0, 5267.0, 1428.0, 509.0, 223.0, 141.0, 78.0, 50.0, 31.0, 29.0, 22.0, 11.0, 16.0, 13.0, 9.0, 8.0, 8.0, 5.0, 8.0, 3.0, 3.0, 1.0, 2.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.06402587890625, -0.06201457977294922, -0.06000328063964844, -0.057991981506347656, -0.055980682373046875, -0.053969383239746094, -0.05195808410644531, -0.04994678497314453, -0.04793548583984375, -0.04592418670654297, -0.04391288757324219, -0.041901588439941406, -0.039890289306640625, -0.037878990173339844, -0.03586769104003906, -0.03385639190673828, -0.0318450927734375, -0.02983379364013672, -0.027822494506835938, -0.025811195373535156, -0.023799896240234375, -0.021788597106933594, -0.019777297973632812, -0.01776599884033203, -0.01575469970703125, -0.013743400573730469, -0.011732101440429688, -0.009720802307128906, -0.007709503173828125, -0.005698204040527344, -0.0036869049072265625, -0.0016756057739257812, 0.000335693359375, 0.0023469924926757812, 0.0043582916259765625, 0.006369590759277344, 0.008380889892578125, 0.010392189025878906, 0.012403488159179688, 0.014414787292480469, 0.01642608642578125, 0.01843738555908203, 0.020448684692382812, 0.022459983825683594, 0.024471282958984375, 0.026482582092285156, 0.028493881225585938, 0.03050518035888672, 0.0325164794921875, 0.03452777862548828, 0.03653907775878906, 0.038550376892089844, 0.040561676025390625, 0.042572975158691406, 0.04458427429199219, 0.04659557342529297, 0.04860687255859375, 0.05061817169189453, 0.05262947082519531, 0.054640769958496094, 0.056652069091796875, 0.058663368225097656, 0.06067466735839844, 0.06268596649169922, 0.064697265625]}, "gradients/encoder.encoder.layers.12.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 2.0, 6.0, 7.0, 14.0, 15.0, 33.0, 56.0, 95.0, 132.0, 164.0, 147.0, 134.0, 80.0, 57.0, 40.0, 14.0, 8.0, 5.0, 2.0, 1.0, 3.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0], "bins": [-0.015289306640625, -0.014956295490264893, -0.014623284339904785, -0.014290273189544678, -0.01395726203918457, -0.013624250888824463, -0.013291239738464355, -0.012958228588104248, -0.01262521743774414, -0.012292206287384033, -0.011959195137023926, -0.011626183986663818, -0.011293172836303711, -0.010960161685943604, -0.010627150535583496, -0.010294139385223389, -0.009961128234863281, -0.009628117084503174, -0.009295105934143066, -0.008962094783782959, -0.008629083633422852, -0.008296072483062744, -0.007963061332702637, -0.007630050182342529, -0.007297039031982422, -0.0069640278816223145, -0.006631016731262207, -0.0062980055809021, -0.005964994430541992, -0.005631983280181885, -0.005298972129821777, -0.00496596097946167, -0.0046329498291015625, -0.004299938678741455, -0.003966927528381348, -0.0036339163780212402, -0.003300905227661133, -0.0029678940773010254, -0.002634882926940918, -0.0023018717765808105, -0.001968860626220703, -0.0016358494758605957, -0.0013028383255004883, -0.0009698271751403809, -0.0006368160247802734, -0.000303804874420166, 2.9206275939941406e-05, 0.00036221742630004883, 0.0006952285766601562, 0.0010282397270202637, 0.001361250877380371, 0.0016942620277404785, 0.002027273178100586, 0.0023602843284606934, 0.0026932954788208008, 0.003026306629180908, 0.0033593177795410156, 0.003692328929901123, 0.0040253400802612305, 0.004358351230621338, 0.004691362380981445, 0.005024373531341553, 0.00535738468170166, 0.005690395832061768, 0.006023406982421875]}, "gradients/encoder.encoder.layers.12.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 2.0, 4.0, 8.0, 11.0, 2.0, 8.0, 16.0, 23.0, 57.0, 52.0, 84.0, 182.0, 342.0, 692.0, 1217.0, 2533.0, 5101.0, 10876.0, 24884.0, 61214.0, 177679.0, 529174.0, 143136.0, 51588.0, 21100.0, 9539.0, 4374.0, 2185.0, 1126.0, 591.0, 290.0, 153.0, 119.0, 71.0, 42.0, 33.0, 23.0, 12.0, 4.0, 4.0, 8.0, 3.0, 2.0, 4.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0251617431640625, -0.02436065673828125, -0.0235595703125, -0.02275848388671875, -0.0219573974609375, -0.02115631103515625, -0.020355224609375, -0.01955413818359375, -0.0187530517578125, -0.01795196533203125, -0.01715087890625, -0.01634979248046875, -0.0155487060546875, -0.01474761962890625, -0.013946533203125, -0.01314544677734375, -0.0123443603515625, -0.01154327392578125, -0.0107421875, -0.00994110107421875, -0.0091400146484375, -0.00833892822265625, -0.007537841796875, -0.00673675537109375, -0.0059356689453125, -0.00513458251953125, -0.00433349609375, -0.00353240966796875, -0.0027313232421875, -0.00193023681640625, -0.001129150390625, -0.00032806396484375, 0.0004730224609375, 0.00127410888671875, 0.0020751953125, 0.00287628173828125, 0.0036773681640625, 0.00447845458984375, 0.005279541015625, 0.00608062744140625, 0.0068817138671875, 0.00768280029296875, 0.00848388671875, 0.00928497314453125, 0.0100860595703125, 0.01088714599609375, 0.011688232421875, 0.01248931884765625, 0.0132904052734375, 0.01409149169921875, 0.014892578125, 0.01569366455078125, 0.0164947509765625, 0.01729583740234375, 0.018096923828125, 0.01889801025390625, 0.0196990966796875, 0.02050018310546875, 0.02130126953125, 0.02210235595703125, 0.0229034423828125, 0.02370452880859375, 0.024505615234375, 0.02530670166015625, 0.0261077880859375]}, "gradients/encoder.encoder.layers.12.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 2.0, 6.0, 4.0, 4.0, 2.0, 11.0, 15.0, 9.0, 10.0, 17.0, 24.0, 19.0, 32.0, 20.0, 33.0, 43.0, 38.0, 45.0, 51.0, 41.0, 66.0, 49.0, 51.0, 45.0, 51.0, 44.0, 43.0, 35.0, 39.0, 40.0, 28.0, 22.0, 16.0, 11.0, 9.0, 9.0, 9.0, 4.0, 1.0, 5.0, 9.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.0201568603515625, -0.01953864097595215, -0.018920421600341797, -0.018302202224731445, -0.017683982849121094, -0.017065763473510742, -0.01644754409790039, -0.01582932472229004, -0.015211105346679688, -0.014592885971069336, -0.013974666595458984, -0.013356447219848633, -0.012738227844238281, -0.01212000846862793, -0.011501789093017578, -0.010883569717407227, -0.010265350341796875, -0.009647130966186523, -0.009028911590576172, -0.00841069221496582, -0.007792472839355469, -0.007174253463745117, -0.006556034088134766, -0.005937814712524414, -0.0053195953369140625, -0.004701375961303711, -0.004083156585693359, -0.003464937210083008, -0.0028467178344726562, -0.0022284984588623047, -0.0016102790832519531, -0.0009920597076416016, -0.00037384033203125, 0.00024437904357910156, 0.0008625984191894531, 0.0014808177947998047, 0.0020990371704101562, 0.002717256546020508, 0.0033354759216308594, 0.003953695297241211, 0.0045719146728515625, 0.005190134048461914, 0.005808353424072266, 0.006426572799682617, 0.007044792175292969, 0.00766301155090332, 0.008281230926513672, 0.008899450302124023, 0.009517669677734375, 0.010135889053344727, 0.010754108428955078, 0.01137232780456543, 0.011990547180175781, 0.012608766555786133, 0.013226985931396484, 0.013845205307006836, 0.014463424682617188, 0.015081644058227539, 0.01569986343383789, 0.016318082809448242, 0.016936302185058594, 0.017554521560668945, 0.018172740936279297, 0.01879096031188965, 0.0194091796875]}, "gradients/encoder.encoder.layers.12.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 2.0, 3.0, 2.0, 2.0, 2.0, 4.0, 4.0, 9.0, 10.0, 11.0, 24.0, 32.0, 66.0, 108.0, 197.0, 340.0, 728.0, 1495.0, 3532.0, 9364.0, 30597.0, 183959.0, 720734.0, 69993.0, 16986.0, 5812.0, 2364.0, 1069.0, 479.0, 254.0, 159.0, 71.0, 60.0, 36.0, 22.0, 13.0, 9.0, 6.0, 4.0, 5.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0003070831298828125, -0.00029582157731056213, -0.00028456002473831177, -0.0002732984721660614, -0.00026203691959381104, -0.00025077536702156067, -0.0002395138144493103, -0.00022825226187705994, -0.00021699070930480957, -0.0002057291567325592, -0.00019446760416030884, -0.00018320605158805847, -0.0001719444990158081, -0.00016068294644355774, -0.00014942139387130737, -0.000138159841299057, -0.00012689828872680664, -0.00011563673615455627, -0.00010437518358230591, -9.311363101005554e-05, -8.185207843780518e-05, -7.059052586555481e-05, -5.932897329330444e-05, -4.806742072105408e-05, -3.680586814880371e-05, -2.5544315576553345e-05, -1.4282763004302979e-05, -3.0212104320526123e-06, 8.240342140197754e-06, 1.950189471244812e-05, 3.0763447284698486e-05, 4.202499985694885e-05, 5.328655242919922e-05, 6.454810500144958e-05, 7.580965757369995e-05, 8.707121014595032e-05, 9.833276271820068e-05, 0.00010959431529045105, 0.00012085586786270142, 0.00013211742043495178, 0.00014337897300720215, 0.00015464052557945251, 0.00016590207815170288, 0.00017716363072395325, 0.0001884251832962036, 0.00019968673586845398, 0.00021094828844070435, 0.0002222098410129547, 0.00023347139358520508, 0.00024473294615745544, 0.0002559944987297058, 0.0002672560513019562, 0.00027851760387420654, 0.0002897791564464569, 0.0003010407090187073, 0.00031230226159095764, 0.000323563814163208, 0.0003348253667354584, 0.00034608691930770874, 0.0003573484718799591, 0.00036861002445220947, 0.00037987157702445984, 0.0003911331295967102, 0.00040239468216896057, 0.00041365623474121094]}, "gradients/encoder.encoder.layers.12.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 5.0, 0.0, 5.0, 1.0, 7.0, 7.0, 7.0, 7.0, 7.0, 11.0, 16.0, 15.0, 20.0, 17.0, 13.0, 15.0, 36.0, 45.0, 46.0, 65.0, 113.0, 114.0, 101.0, 72.0, 39.0, 43.0, 33.0, 23.0, 15.0, 15.0, 22.0, 12.0, 8.0, 17.0, 7.0, 8.0, 4.0, 8.0, 1.0, 4.0, 3.0, 2.0, 2.0, 2.0, 1.0, 0.0, 1.0, 2.0, 1.0], "bins": [-3.0934810638427734e-05, -3.0083581805229187e-05, -2.923235297203064e-05, -2.8381124138832092e-05, -2.7529895305633545e-05, -2.6678666472434998e-05, -2.582743763923645e-05, -2.4976208806037903e-05, -2.4124979972839355e-05, -2.3273751139640808e-05, -2.242252230644226e-05, -2.1571293473243713e-05, -2.0720064640045166e-05, -1.986883580684662e-05, -1.901760697364807e-05, -1.8166378140449524e-05, -1.7315149307250977e-05, -1.646392047405243e-05, -1.5612691640853882e-05, -1.4761462807655334e-05, -1.3910233974456787e-05, -1.305900514125824e-05, -1.2207776308059692e-05, -1.1356547474861145e-05, -1.0505318641662598e-05, -9.65408980846405e-06, -8.802860975265503e-06, -7.951632142066956e-06, -7.100403308868408e-06, -6.249174475669861e-06, -5.3979456424713135e-06, -4.546716809272766e-06, -3.6954879760742188e-06, -2.8442591428756714e-06, -1.993030309677124e-06, -1.1418014764785767e-06, -2.905726432800293e-07, 5.606561899185181e-07, 1.4118850231170654e-06, 2.263113856315613e-06, 3.11434268951416e-06, 3.9655715227127075e-06, 4.816800355911255e-06, 5.668029189109802e-06, 6.51925802230835e-06, 7.370486855506897e-06, 8.221715688705444e-06, 9.072944521903992e-06, 9.924173355102539e-06, 1.0775402188301086e-05, 1.1626631021499634e-05, 1.2477859854698181e-05, 1.3329088687896729e-05, 1.4180317521095276e-05, 1.5031546354293823e-05, 1.588277518749237e-05, 1.6734004020690918e-05, 1.7585232853889465e-05, 1.8436461687088013e-05, 1.928769052028656e-05, 2.0138919353485107e-05, 2.0990148186683655e-05, 2.1841377019882202e-05, 2.269260585308075e-05, 2.3543834686279297e-05]}, "gradients/encoder.encoder.layers.12.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 5.0, 4.0, 9.0, 14.0, 12.0, 15.0, 24.0, 58.0, 79.0, 161.0, 212.0, 342.0, 705.0, 1503.0, 3169.0, 7403.0, 19202.0, 62978.0, 480768.0, 382358.0, 58254.0, 18357.0, 6954.0, 2926.0, 1388.0, 743.0, 375.0, 201.0, 138.0, 69.0, 41.0, 21.0, 21.0, 15.0, 15.0, 10.0, 9.0, 1.0, 2.0, 0.0, 2.0, 4.0, 1.0, 3.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00019693374633789062, -0.0001887194812297821, -0.00018050521612167358, -0.00017229095101356506, -0.00016407668590545654, -0.00015586242079734802, -0.0001476481556892395, -0.00013943389058113098, -0.00013121962547302246, -0.00012300536036491394, -0.00011479109525680542, -0.0001065768301486969, -9.836256504058838e-05, -9.014829993247986e-05, -8.193403482437134e-05, -7.371976971626282e-05, -6.55055046081543e-05, -5.7291239500045776e-05, -4.9076974391937256e-05, -4.0862709283828735e-05, -3.2648444175720215e-05, -2.4434179067611694e-05, -1.6219913959503174e-05, -8.005648851394653e-06, 2.086162567138672e-07, 8.422881364822388e-06, 1.6637146472930908e-05, 2.485141158103943e-05, 3.306567668914795e-05, 4.127994179725647e-05, 4.949420690536499e-05, 5.770847201347351e-05, 6.592273712158203e-05, 7.413700222969055e-05, 8.235126733779907e-05, 9.056553244590759e-05, 9.877979755401611e-05, 0.00010699406266212463, 0.00011520832777023315, 0.00012342259287834167, 0.0001316368579864502, 0.00013985112309455872, 0.00014806538820266724, 0.00015627965331077576, 0.00016449391841888428, 0.0001727081835269928, 0.00018092244863510132, 0.00018913671374320984, 0.00019735097885131836, 0.00020556524395942688, 0.0002137795090675354, 0.00022199377417564392, 0.00023020803928375244, 0.00023842230439186096, 0.0002466365694999695, 0.000254850834608078, 0.0002630650997161865, 0.00027127936482429504, 0.00027949362993240356, 0.0002877078950405121, 0.0002959221601486206, 0.0003041364252567291, 0.00031235069036483765, 0.00032056495547294617, 0.0003287792205810547]}, "gradients/encoder.encoder.layers.12.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 0.0, 2.0, 3.0, 0.0, 1.0, 0.0, 2.0, 3.0, 0.0, 4.0, 3.0, 6.0, 1.0, 4.0, 9.0, 20.0, 19.0, 20.0, 24.0, 35.0, 42.0, 44.0, 57.0, 83.0, 85.0, 95.0, 83.0, 83.0, 66.0, 46.0, 37.0, 32.0, 25.0, 11.0, 15.0, 11.0, 8.0, 13.0, 4.0, 7.0, 5.0, 4.0, 1.0, 4.0, 1.0, 0.0, 1.0], "bins": [-0.0002536773681640625, -0.00024774298071861267, -0.00024180859327316284, -0.000235874205827713, -0.00022993981838226318, -0.00022400543093681335, -0.00021807104349136353, -0.0002121366560459137, -0.00020620226860046387, -0.00020026788115501404, -0.0001943334937095642, -0.00018839910626411438, -0.00018246471881866455, -0.00017653033137321472, -0.0001705959439277649, -0.00016466155648231506, -0.00015872716903686523, -0.0001527927815914154, -0.00014685839414596558, -0.00014092400670051575, -0.00013498961925506592, -0.0001290552318096161, -0.00012312084436416626, -0.00011718645691871643, -0.0001112520694732666, -0.00010531768202781677, -9.938329458236694e-05, -9.344890713691711e-05, -8.751451969146729e-05, -8.158013224601746e-05, -7.564574480056763e-05, -6.97113573551178e-05, -6.377696990966797e-05, -5.784258246421814e-05, -5.190819501876831e-05, -4.597380757331848e-05, -4.003942012786865e-05, -3.410503268241882e-05, -2.8170645236968994e-05, -2.2236257791519165e-05, -1.6301870346069336e-05, -1.0367482900619507e-05, -4.433095455169678e-06, 1.5012919902801514e-06, 7.4356794357299805e-06, 1.337006688117981e-05, 1.930445432662964e-05, 2.5238841772079468e-05, 3.11732292175293e-05, 3.7107616662979126e-05, 4.3042004108428955e-05, 4.8976391553878784e-05, 5.491077899932861e-05, 6.084516644477844e-05, 6.677955389022827e-05, 7.27139413356781e-05, 7.864832878112793e-05, 8.458271622657776e-05, 9.051710367202759e-05, 9.645149111747742e-05, 0.00010238587856292725, 0.00010832026600837708, 0.0001142546534538269, 0.00012018904089927673, 0.00012612342834472656]}, "gradients/encoder.encoder.layers.12.layer_norm.weight": {"_type": "histogram", "values": [3.0, 2.0, 2.0, 1.0, 3.0, 3.0, 7.0, 2.0, 7.0, 5.0, 9.0, 9.0, 14.0, 16.0, 27.0, 25.0, 49.0, 50.0, 79.0, 120.0, 166.0, 74.0, 62.0, 48.0, 41.0, 34.0, 30.0, 26.0, 21.0, 13.0, 11.0, 6.0, 7.0, 8.0, 4.0, 4.0, 4.0, 4.0, 0.0, 5.0, 2.0, 5.0, 2.0, 1.0, 4.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0], "bins": [-0.06922060251235962, -0.0657728835940361, -0.062325168401002884, -0.058877453207969666, -0.05542973801493645, -0.05198202282190323, -0.04853430390357971, -0.045086588710546494, -0.041638873517513275, -0.03819115832448006, -0.03474343940615654, -0.03129572421312332, -0.027848009020090103, -0.024400291964411736, -0.020952574908733368, -0.01750485971570015, -0.014057140797376633, -0.01060942467302084, -0.007161708083003759, -0.003713991492986679, -0.0002662753686308861, 0.003181440755724907, 0.0066291578114032745, 0.010076873004436493, 0.01352459006011486, 0.016972307115793228, 0.020420022308826447, 0.023867739364504814, 0.027315456420183182, 0.0307631716132164, 0.03421089053153992, 0.03765860199928284, 0.041106320917606354, 0.04455403611063957, 0.04800175502896309, 0.05144947022199631, 0.054897185415029526, 0.058344900608062744, 0.06179261952638626, 0.06524033844470978, 0.0686880499124527, 0.07213576883077621, 0.07558348029851913, 0.07903119921684265, 0.08247891813516617, 0.08592662960290909, 0.0893743485212326, 0.09282205998897552, 0.09626978635787964, 0.09971750527620316, 0.10316521674394608, 0.10661293566226959, 0.11006065458059311, 0.11350836604833603, 0.11695608496665955, 0.12040379643440247, 0.12385151535272598, 0.1272992342710495, 0.13074694573879242, 0.13419467210769653, 0.13764238357543945, 0.14109009504318237, 0.1445378065109253, 0.1479855328798294, 0.15143324434757233]}, "gradients/encoder.encoder.layers.12.layer_norm.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 4.0, 1.0, 4.0, 6.0, 8.0, 12.0, 6.0, 9.0, 12.0, 8.0, 17.0, 17.0, 27.0, 23.0, 26.0, 40.0, 37.0, 48.0, 39.0, 48.0, 53.0, 53.0, 68.0, 45.0, 46.0, 43.0, 44.0, 41.0, 25.0, 27.0, 33.0, 19.0, 24.0, 18.0, 21.0, 16.0, 14.0, 13.0, 9.0, 2.0, 3.0, 3.0, 3.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0], "bins": [-0.10321676731109619, -0.10011403262615204, -0.09701129794120789, -0.09390856325626373, -0.09080582857131958, -0.08770310133695602, -0.08460036665201187, -0.08149763196706772, -0.07839489728212357, -0.07529216259717941, -0.07218942791223526, -0.06908669322729111, -0.06598396599292755, -0.0628812313079834, -0.059778496623039246, -0.05667576193809509, -0.05357302725315094, -0.05047029256820679, -0.047367557883262634, -0.04426482692360878, -0.04116209223866463, -0.038059357553720474, -0.03495662659406662, -0.03185389190912247, -0.028751157224178314, -0.02564842253923416, -0.022545689716935158, -0.019442956894636154, -0.016340222209692, -0.013237488456070423, -0.010134754702448845, -0.007032021880149841, -0.0039292871952056885, -0.0008265534415841103, 0.002276180312037468, 0.005378914065659046, 0.008481647819280624, 0.011584381572902203, 0.01468711532652378, 0.017789848148822784, 0.020892582833766937, 0.02399531751871109, 0.027098050341010094, 0.030200783163309097, 0.03330351784825325, 0.0364062525331974, 0.03950898349285126, 0.04261171817779541, 0.04571445286273956, 0.048817187547683716, 0.05191992223262787, 0.05502265319228172, 0.058125387877225876, 0.06122812256217003, 0.06433085352182388, 0.06743358820676804, 0.07053632289171219, 0.07363905757665634, 0.0767417922616005, 0.07984452694654465, 0.0829472541809082, 0.08604998886585236, 0.08915272355079651, 0.09225545823574066, 0.09535819292068481]}, "gradients/encoder.encoder.layers.11.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 6.0, 8.0, 15.0, 23.0, 39.0, 73.0, 115.0, 208.0, 446.0, 4192303.0, 502.0, 245.0, 129.0, 89.0, 46.0, 21.0, 9.0, 4.0, 6.0, 1.0, 3.0, 1.0, 1.0, 1.0, 2.0, 0.0, 0.0, 2.0], "bins": [-0.08258056640625, -0.08076810836791992, -0.07895565032958984, -0.07714319229125977, -0.07533073425292969, -0.07351827621459961, -0.07170581817626953, -0.06989336013793945, -0.06808090209960938, -0.0662684440612793, -0.06445598602294922, -0.06264352798461914, -0.06083106994628906, -0.059018611907958984, -0.057206153869628906, -0.05539369583129883, -0.05358123779296875, -0.05176877975463867, -0.049956321716308594, -0.048143863677978516, -0.04633140563964844, -0.04451894760131836, -0.04270648956298828, -0.0408940315246582, -0.039081573486328125, -0.03726911544799805, -0.03545665740966797, -0.03364419937133789, -0.03183174133300781, -0.030019283294677734, -0.028206825256347656, -0.026394367218017578, -0.0245819091796875, -0.022769451141357422, -0.020956993103027344, -0.019144535064697266, -0.017332077026367188, -0.01551961898803711, -0.013707160949707031, -0.011894702911376953, -0.010082244873046875, -0.008269786834716797, -0.006457328796386719, -0.004644870758056641, -0.0028324127197265625, -0.0010199546813964844, 0.0007925033569335938, 0.002604961395263672, 0.00441741943359375, 0.006229877471923828, 0.008042335510253906, 0.009854793548583984, 0.011667251586914062, 0.01347970962524414, 0.015292167663574219, 0.017104625701904297, 0.018917083740234375, 0.020729541778564453, 0.02254199981689453, 0.02435445785522461, 0.026166915893554688, 0.027979373931884766, 0.029791831970214844, 0.03160429000854492, 0.033416748046875]}, "gradients/encoder.encoder.layers.11.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 5.0, 7.0, 9.0, 15.0, 26.0, 48.0, 61.0, 116.0, 156.0, 144.0, 154.0, 112.0, 54.0, 55.0, 24.0, 15.0, 6.0, 2.0, 3.0, 1.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0], "bins": [-0.01470947265625, -0.014385521411895752, -0.014061570167541504, -0.013737618923187256, -0.013413667678833008, -0.01308971643447876, -0.012765765190124512, -0.012441813945770264, -0.012117862701416016, -0.011793911457061768, -0.01146996021270752, -0.011146008968353271, -0.010822057723999023, -0.010498106479644775, -0.010174155235290527, -0.00985020399093628, -0.009526252746582031, -0.009202301502227783, -0.008878350257873535, -0.008554399013519287, -0.008230447769165039, -0.007906496524810791, -0.007582545280456543, -0.007258594036102295, -0.006934642791748047, -0.006610691547393799, -0.006286740303039551, -0.005962789058685303, -0.005638837814331055, -0.005314886569976807, -0.004990935325622559, -0.0046669840812683105, -0.0043430328369140625, -0.0040190815925598145, -0.0036951303482055664, -0.0033711791038513184, -0.0030472278594970703, -0.0027232766151428223, -0.0023993253707885742, -0.002075374126434326, -0.0017514228820800781, -0.00142747163772583, -0.001103520393371582, -0.000779569149017334, -0.00045561790466308594, -0.0001316666603088379, 0.00019228458404541016, 0.0005162358283996582, 0.0008401870727539062, 0.0011641383171081543, 0.0014880895614624023, 0.0018120408058166504, 0.0021359920501708984, 0.0024599432945251465, 0.0027838945388793945, 0.0031078457832336426, 0.0034317970275878906, 0.0037557482719421387, 0.004079699516296387, 0.004403650760650635, 0.004727602005004883, 0.005051553249359131, 0.005375504493713379, 0.005699455738067627, 0.006023406982421875]}, "gradients/encoder.encoder.layers.11.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 2.0, 1.0, 1.0, 1.0, 4.0, 2.0, 3.0, 9.0, 10.0, 8.0, 19.0, 15.0, 21.0, 26.0, 19.0, 28.0, 38.0, 47.0, 72.0, 72.0, 91.0, 106.0, 177.0, 254.0, 565.0, 2820.0, 4188066.0, 755.0, 310.0, 184.0, 121.0, 88.0, 61.0, 64.0, 41.0, 34.0, 36.0, 25.0, 23.0, 11.0, 17.0, 10.0, 15.0, 3.0, 2.0, 3.0, 6.0, 4.0, 3.0, 2.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00641632080078125, -0.006216764450073242, -0.006017208099365234, -0.0058176517486572266, -0.005618095397949219, -0.005418539047241211, -0.005218982696533203, -0.005019426345825195, -0.0048198699951171875, -0.00462031364440918, -0.004420757293701172, -0.004221200942993164, -0.004021644592285156, -0.0038220882415771484, -0.0036225318908691406, -0.003422975540161133, -0.003223419189453125, -0.003023862838745117, -0.0028243064880371094, -0.0026247501373291016, -0.0024251937866210938, -0.002225637435913086, -0.002026081085205078, -0.0018265247344970703, -0.0016269683837890625, -0.0014274120330810547, -0.0012278556823730469, -0.001028299331665039, -0.0008287429809570312, -0.0006291866302490234, -0.0004296302795410156, -0.0002300739288330078, -3.0517578125e-05, 0.0001690387725830078, 0.0003685951232910156, 0.0005681514739990234, 0.0007677078247070312, 0.0009672641754150391, 0.0011668205261230469, 0.0013663768768310547, 0.0015659332275390625, 0.0017654895782470703, 0.001965045928955078, 0.002164602279663086, 0.0023641586303710938, 0.0025637149810791016, 0.0027632713317871094, 0.002962827682495117, 0.003162384033203125, 0.003361940383911133, 0.0035614967346191406, 0.0037610530853271484, 0.003960609436035156, 0.004160165786743164, 0.004359722137451172, 0.00455927848815918, 0.0047588348388671875, 0.004958391189575195, 0.005157947540283203, 0.005357503890991211, 0.005557060241699219, 0.0057566165924072266, 0.005956172943115234, 0.006155729293823242, 0.00635528564453125]}, "gradients/encoder.encoder.layers.11.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 3.0, 4084.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0013742446899414062, -0.0013476237654685974, -0.0013210028409957886, -0.0012943819165229797, -0.001267760992050171, -0.001241140067577362, -0.0012145191431045532, -0.0011878982186317444, -0.0011612772941589355, -0.0011346563696861267, -0.0011080354452133179, -0.001081414520740509, -0.0010547935962677002, -0.0010281726717948914, -0.0010015517473220825, -0.0009749308228492737, -0.0009483098983764648, -0.000921688973903656, -0.0008950680494308472, -0.0008684471249580383, -0.0008418262004852295, -0.0008152052760124207, -0.0007885843515396118, -0.000761963427066803, -0.0007353425025939941, -0.0007087215781211853, -0.0006821006536483765, -0.0006554797291755676, -0.0006288588047027588, -0.00060223788022995, -0.0005756169557571411, -0.0005489960312843323, -0.0005223751068115234, -0.0004957541823387146, -0.00046913325786590576, -0.0004425123333930969, -0.0004158914089202881, -0.00038927048444747925, -0.0003626495599746704, -0.00033602863550186157, -0.00030940771102905273, -0.0002827867865562439, -0.00025616586208343506, -0.00022954493761062622, -0.00020292401313781738, -0.00017630308866500854, -0.0001496821641921997, -0.00012306123971939087, -9.644031524658203e-05, -6.98193907737732e-05, -4.3198466300964355e-05, -1.6577541828155518e-05, 1.004338264465332e-05, 3.666430711746216e-05, 6.3285231590271e-05, 8.990615606307983e-05, 0.00011652708053588867, 0.0001431480050086975, 0.00016976892948150635, 0.00019638985395431519, 0.00022301077842712402, 0.00024963170289993286, 0.0002762526273727417, 0.00030287355184555054, 0.0003294944763183594]}, "gradients/encoder.encoder.layers.11.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 1.0, 1.0, 2.0, 7.0, 2.0, 5.0, 2.0, 9.0, 10.0, 10.0, 13.0, 22.0, 31.0, 25.0, 42.0, 77.0, 132.0, 252.0, 125.0, 86.0, 41.0, 34.0, 30.0, 11.0, 10.0, 7.0, 5.0, 6.0, 5.0, 3.0, 1.0, 3.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0013026318047195673, -0.0012625500094145536, -0.00122246821410954, -0.0011823864188045263, -0.0011423046234995127, -0.001102222828194499, -0.0010621410328894854, -0.0010220592375844717, -0.000981977442279458, -0.0009418956469744444, -0.0009018138516694307, -0.0008617320563644171, -0.0008216502610594034, -0.0007815684657543898, -0.0007414866122417152, -0.0007014048169367015, -0.000661322963424027, -0.0006212411681190133, -0.0005811593728139997, -0.000541077577508986, -0.0005009957822039723, -0.0004609139577951282, -0.0004208321333862841, -0.00038075033808127046, -0.0003406685427762568, -0.00030058674747124314, -0.0002605049521662295, -0.00022042312775738537, -0.00018034133245237172, -0.00014025953714735806, -0.00010017772729042917, -6.009591743350029e-05, -2.0014005713164806e-05, 2.0067796867806464e-05, 6.0149599448777735e-05, 0.000100231402029749, 0.00014031320461072028, 0.00018039499991573393, 0.00022047680977266282, 0.0002605586196295917, 0.00030064041493460536, 0.000340722210239619, 0.0003808040055446327, 0.0004208858299534768, 0.00046096762525849044, 0.0005010494496673346, 0.0005411312449723482, 0.0005812130402773619, 0.0006212948355823755, 0.0006613766308873892, 0.0007014584261924028, 0.0007415402214974165, 0.0007816220168024302, 0.0008217038121074438, 0.0008617856656201184, 0.000901867460925132, 0.0009419492562301457, 0.0009820311097428203, 0.001022112905047834, 0.0010621947003528476, 0.0011022764956578612, 0.0011423582909628749, 0.0011824400862678885, 0.0012225218815729022, 0.0012626036768779159]}, "gradients/encoder.encoder.layers.11.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 0.0, 3.0, 3.0, 4.0, 4.0, 2.0, 2.0, 6.0, 10.0, 13.0, 14.0, 15.0, 27.0, 33.0, 31.0, 32.0, 50.0, 28.0, 35.0, 45.0, 49.0, 59.0, 58.0, 62.0, 45.0, 49.0, 52.0, 40.0, 37.0, 41.0, 14.0, 22.0, 19.0, 25.0, 17.0, 12.0, 9.0, 11.0, 6.0, 8.0, 6.0, 2.0, 3.0, 2.0, 3.0, 1.0, 3.0, 3.0, 0.0, 1.0, 1.0, 1.0], "bins": [-0.0007683038711547852, -0.000745994970202446, -0.0007236860692501068, -0.0007013771682977676, -0.0006790682673454285, -0.0006567593663930893, -0.0006344504654407501, -0.000612141564488411, -0.0005898326635360718, -0.0005675237625837326, -0.0005452148616313934, -0.0005229059606790543, -0.0005005970597267151, -0.0004782881587743759, -0.00045597925782203674, -0.00043367035686969757, -0.0004113614559173584, -0.0003890525549650192, -0.00036674365401268005, -0.0003444347530603409, -0.0003221258521080017, -0.00029981695115566254, -0.00027750805020332336, -0.0002551991492509842, -0.00023289024829864502, -0.00021058134734630585, -0.00018827244639396667, -0.0001659635454416275, -0.00014365464448928833, -0.00012134574353694916, -9.903684258460999e-05, -7.672794163227081e-05, -5.441904067993164e-05, -3.211013972759247e-05, -9.801238775253296e-06, 1.2507662177085876e-05, 3.481656312942505e-05, 5.712546408176422e-05, 7.94343650341034e-05, 0.00010174326598644257, 0.00012405216693878174, 0.0001463610678911209, 0.00016866996884346008, 0.00019097886979579926, 0.00021328777074813843, 0.0002355966717004776, 0.00025790557265281677, 0.00028021447360515594, 0.0003025233745574951, 0.0003248322755098343, 0.00034714117646217346, 0.00036945007741451263, 0.0003917589783668518, 0.000414067879319191, 0.00043637678027153015, 0.0004586856812238693, 0.0004809945821762085, 0.0005033034831285477, 0.0005256123840808868, 0.000547921285033226, 0.0005702301859855652, 0.0005925390869379044, 0.0006148479878902435, 0.0006371568888425827, 0.0006594657897949219]}, "gradients/encoder.encoder.layers.11.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 2.0, 1.0, 2.0, 2.0, 3.0, 2.0, 1.0, 3.0, 7.0, 6.0, 4.0, 3.0, 7.0, 15.0, 10.0, 16.0, 17.0, 19.0, 19.0, 44.0, 44.0, 52.0, 72.0, 127.0, 248.0, 564.0, 1369.0, 4366.0, 17615.0, 102174.0, 764607.0, 128539.0, 20725.0, 4978.0, 1541.0, 590.0, 268.0, 145.0, 85.0, 63.0, 43.0, 34.0, 26.0, 24.0, 19.0, 10.0, 10.0, 11.0, 4.0, 6.0, 10.0, 3.0, 3.0, 3.0, 3.0, 3.0, 2.0, 1.0, 2.0, 0.0, 1.0], "bins": [-0.05377197265625, -0.0521693229675293, -0.050566673278808594, -0.04896402359008789, -0.04736137390136719, -0.045758724212646484, -0.04415607452392578, -0.04255342483520508, -0.040950775146484375, -0.03934812545776367, -0.03774547576904297, -0.036142826080322266, -0.03454017639160156, -0.03293752670288086, -0.031334877014160156, -0.029732227325439453, -0.02812957763671875, -0.026526927947998047, -0.024924278259277344, -0.02332162857055664, -0.021718978881835938, -0.020116329193115234, -0.01851367950439453, -0.016911029815673828, -0.015308380126953125, -0.013705730438232422, -0.012103080749511719, -0.010500431060791016, -0.008897781372070312, -0.007295131683349609, -0.005692481994628906, -0.004089832305908203, -0.0024871826171875, -0.0008845329284667969, 0.0007181167602539062, 0.0023207664489746094, 0.0039234161376953125, 0.005526065826416016, 0.007128715515136719, 0.008731365203857422, 0.010334014892578125, 0.011936664581298828, 0.013539314270019531, 0.015141963958740234, 0.016744613647460938, 0.01834726333618164, 0.019949913024902344, 0.021552562713623047, 0.02315521240234375, 0.024757862091064453, 0.026360511779785156, 0.02796316146850586, 0.029565811157226562, 0.031168460845947266, 0.03277111053466797, 0.03437376022338867, 0.035976409912109375, 0.03757905960083008, 0.03918170928955078, 0.040784358978271484, 0.04238700866699219, 0.04398965835571289, 0.045592308044433594, 0.0471949577331543, 0.048797607421875]}, "gradients/encoder.encoder.layers.11.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 5.0, 7.0, 9.0, 15.0, 27.0, 47.0, 61.0, 117.0, 157.0, 144.0, 153.0, 111.0, 55.0, 53.0, 25.0, 15.0, 6.0, 2.0, 3.0, 1.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0], "bins": [-0.01470184326171875, -0.014378011226654053, -0.014054179191589355, -0.013730347156524658, -0.013406515121459961, -0.013082683086395264, -0.012758851051330566, -0.01243501901626587, -0.012111186981201172, -0.011787354946136475, -0.011463522911071777, -0.01113969087600708, -0.010815858840942383, -0.010492026805877686, -0.010168194770812988, -0.009844362735748291, -0.009520530700683594, -0.009196698665618896, -0.0088728666305542, -0.008549034595489502, -0.008225202560424805, -0.007901370525360107, -0.00757753849029541, -0.007253706455230713, -0.006929874420166016, -0.006606042385101318, -0.006282210350036621, -0.005958378314971924, -0.0056345462799072266, -0.005310714244842529, -0.004986882209777832, -0.004663050174713135, -0.0043392181396484375, -0.00401538610458374, -0.003691554069519043, -0.0033677220344543457, -0.0030438899993896484, -0.002720057964324951, -0.002396225929260254, -0.0020723938941955566, -0.0017485618591308594, -0.0014247298240661621, -0.0011008977890014648, -0.0007770657539367676, -0.0004532337188720703, -0.00012940168380737305, 0.00019443035125732422, 0.0005182623863220215, 0.0008420944213867188, 0.001165926456451416, 0.0014897584915161133, 0.0018135905265808105, 0.002137422561645508, 0.002461254596710205, 0.0027850866317749023, 0.0031089186668395996, 0.003432750701904297, 0.003756582736968994, 0.004080414772033691, 0.004404246807098389, 0.004728078842163086, 0.005051910877227783, 0.0053757429122924805, 0.005699574947357178, 0.006023406982421875]}, "gradients/encoder.encoder.layers.11.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 2.0, 3.0, 2.0, 2.0, 8.0, 6.0, 8.0, 7.0, 8.0, 14.0, 25.0, 22.0, 42.0, 66.0, 117.0, 231.0, 452.0, 989.0, 2147.0, 5119.0, 12595.0, 32997.0, 95336.0, 504491.0, 278019.0, 72425.0, 25877.0, 9967.0, 4140.0, 1802.0, 796.0, 386.0, 186.0, 90.0, 66.0, 30.0, 26.0, 17.0, 11.0, 14.0, 9.0, 2.0, 3.0, 5.0, 1.0, 0.0, 4.0, 1.0, 2.0, 0.0, 0.0, 1.0, 2.0], "bins": [-0.0281219482421875, -0.027315616607666016, -0.02650928497314453, -0.025702953338623047, -0.024896621704101562, -0.024090290069580078, -0.023283958435058594, -0.02247762680053711, -0.021671295166015625, -0.02086496353149414, -0.020058631896972656, -0.019252300262451172, -0.018445968627929688, -0.017639636993408203, -0.01683330535888672, -0.016026973724365234, -0.01522064208984375, -0.014414310455322266, -0.013607978820800781, -0.012801647186279297, -0.011995315551757812, -0.011188983917236328, -0.010382652282714844, -0.00957632064819336, -0.008769989013671875, -0.00796365737915039, -0.007157325744628906, -0.006350994110107422, -0.0055446624755859375, -0.004738330841064453, -0.003931999206542969, -0.0031256675720214844, -0.0023193359375, -0.0015130043029785156, -0.0007066726684570312, 9.965896606445312e-05, 0.0009059906005859375, 0.0017123222351074219, 0.0025186538696289062, 0.0033249855041503906, 0.004131317138671875, 0.004937648773193359, 0.005743980407714844, 0.006550312042236328, 0.0073566436767578125, 0.008162975311279297, 0.008969306945800781, 0.009775638580322266, 0.01058197021484375, 0.011388301849365234, 0.012194633483886719, 0.013000965118408203, 0.013807296752929688, 0.014613628387451172, 0.015419960021972656, 0.01622629165649414, 0.017032623291015625, 0.01783895492553711, 0.018645286560058594, 0.019451618194580078, 0.020257949829101562, 0.021064281463623047, 0.02187061309814453, 0.022676944732666016, 0.0234832763671875]}, "gradients/encoder.encoder.layers.11.attention.v_proj.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 1.0, 1.0, 1.0, 4.0, 0.0, 2.0, 5.0, 2.0, 3.0, 10.0, 12.0, 8.0, 14.0, 17.0, 20.0, 26.0, 27.0, 28.0, 36.0, 32.0, 38.0, 42.0, 44.0, 36.0, 51.0, 50.0, 36.0, 43.0, 40.0, 42.0, 44.0, 37.0, 34.0, 42.0, 28.0, 28.0, 22.0, 22.0, 12.0, 21.0, 12.0, 7.0, 5.0, 10.0, 4.0, 8.0, 2.0, 2.0, 3.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.01557159423828125, -0.01503598690032959, -0.01450037956237793, -0.01396477222442627, -0.01342916488647461, -0.01289355754852295, -0.012357950210571289, -0.011822342872619629, -0.011286735534667969, -0.010751128196716309, -0.010215520858764648, -0.009679913520812988, -0.009144306182861328, -0.008608698844909668, -0.008073091506958008, -0.007537484169006348, -0.0070018768310546875, -0.006466269493103027, -0.005930662155151367, -0.005395054817199707, -0.004859447479248047, -0.004323840141296387, -0.0037882328033447266, -0.0032526254653930664, -0.0027170181274414062, -0.002181410789489746, -0.001645803451538086, -0.0011101961135864258, -0.0005745887756347656, -3.898143768310547e-05, 0.0004966259002685547, 0.0010322332382202148, 0.001567840576171875, 0.002103447914123535, 0.0026390552520751953, 0.0031746625900268555, 0.0037102699279785156, 0.004245877265930176, 0.004781484603881836, 0.005317091941833496, 0.005852699279785156, 0.006388306617736816, 0.0069239139556884766, 0.007459521293640137, 0.007995128631591797, 0.008530735969543457, 0.009066343307495117, 0.009601950645446777, 0.010137557983398438, 0.010673165321350098, 0.011208772659301758, 0.011744379997253418, 0.012279987335205078, 0.012815594673156738, 0.013351202011108398, 0.013886809349060059, 0.014422416687011719, 0.014958024024963379, 0.015493631362915039, 0.0160292387008667, 0.01656484603881836, 0.01710045337677002, 0.01763606071472168, 0.01817166805267334, 0.018707275390625]}, "gradients/encoder.encoder.layers.11.attention.k_proj.weight": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 4.0, 2.0, 3.0, 5.0, 7.0, 8.0, 10.0, 15.0, 14.0, 31.0, 43.0, 56.0, 71.0, 124.0, 194.0, 340.0, 538.0, 1006.0, 1919.0, 3980.0, 9443.0, 28239.0, 153148.0, 747631.0, 71012.0, 17612.0, 6664.0, 2940.0, 1502.0, 795.0, 426.0, 277.0, 162.0, 103.0, 79.0, 46.0, 42.0, 24.0, 13.0, 10.0, 10.0, 5.0, 1.0, 3.0, 4.0, 2.0, 0.0, 2.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0], "bins": [-0.00021350383758544922, -0.00020689517259597778, -0.00020028650760650635, -0.0001936778426170349, -0.00018706917762756348, -0.00018046051263809204, -0.0001738518476486206, -0.00016724318265914917, -0.00016063451766967773, -0.0001540258526802063, -0.00014741718769073486, -0.00014080852270126343, -0.000134199857711792, -0.00012759119272232056, -0.00012098252773284912, -0.00011437386274337769, -0.00010776519775390625, -0.00010115653276443481, -9.454786777496338e-05, -8.793920278549194e-05, -8.133053779602051e-05, -7.472187280654907e-05, -6.811320781707764e-05, -6.15045428276062e-05, -5.4895877838134766e-05, -4.828721284866333e-05, -4.1678547859191895e-05, -3.506988286972046e-05, -2.8461217880249023e-05, -2.1852552890777588e-05, -1.5243887901306152e-05, -8.635222911834717e-06, -2.0265579223632812e-06, 4.582107067108154e-06, 1.119077205657959e-05, 1.7799437046051025e-05, 2.440810203552246e-05, 3.1016767024993896e-05, 3.762543201446533e-05, 4.423409700393677e-05, 5.08427619934082e-05, 5.745142698287964e-05, 6.406009197235107e-05, 7.066875696182251e-05, 7.727742195129395e-05, 8.388608694076538e-05, 9.049475193023682e-05, 9.710341691970825e-05, 0.00010371208190917969, 0.00011032074689865112, 0.00011692941188812256, 0.000123538076877594, 0.00013014674186706543, 0.00013675540685653687, 0.0001433640718460083, 0.00014997273683547974, 0.00015658140182495117, 0.0001631900668144226, 0.00016979873180389404, 0.00017640739679336548, 0.00018301606178283691, 0.00018962472677230835, 0.00019623339176177979, 0.00020284205675125122, 0.00020945072174072266]}, "gradients/encoder.encoder.layers.11.attention.k_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 1.0, 1.0, 3.0, 2.0, 3.0, 3.0, 4.0, 3.0, 7.0, 5.0, 11.0, 9.0, 7.0, 13.0, 26.0, 35.0, 36.0, 54.0, 84.0, 107.0, 122.0, 134.0, 77.0, 73.0, 51.0, 26.0, 30.0, 12.0, 18.0, 7.0, 8.0, 3.0, 9.0, 2.0, 2.0, 5.0, 8.0, 5.0, 5.0, 1.0, 1.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-1.9371509552001953e-05, -1.870095729827881e-05, -1.8030405044555664e-05, -1.735985279083252e-05, -1.6689300537109375e-05, -1.601874828338623e-05, -1.5348196029663086e-05, -1.4677643775939941e-05, -1.4007091522216797e-05, -1.3336539268493652e-05, -1.2665987014770508e-05, -1.1995434761047363e-05, -1.1324882507324219e-05, -1.0654330253601074e-05, -9.98377799987793e-06, -9.313225746154785e-06, -8.64267349243164e-06, -7.972121238708496e-06, -7.3015689849853516e-06, -6.631016731262207e-06, -5.9604644775390625e-06, -5.289912223815918e-06, -4.6193599700927734e-06, -3.948807716369629e-06, -3.2782554626464844e-06, -2.60770320892334e-06, -1.9371509552001953e-06, -1.2665987014770508e-06, -5.960464477539062e-07, 7.450580596923828e-08, 7.450580596923828e-07, 1.4156103134155273e-06, 2.086162567138672e-06, 2.7567148208618164e-06, 3.427267074584961e-06, 4.0978193283081055e-06, 4.76837158203125e-06, 5.4389238357543945e-06, 6.109476089477539e-06, 6.780028343200684e-06, 7.450580596923828e-06, 8.121132850646973e-06, 8.791685104370117e-06, 9.462237358093262e-06, 1.0132789611816406e-05, 1.080334186553955e-05, 1.1473894119262695e-05, 1.214444637298584e-05, 1.2814998626708984e-05, 1.3485550880432129e-05, 1.4156103134155273e-05, 1.4826655387878418e-05, 1.5497207641601562e-05, 1.6167759895324707e-05, 1.683831214904785e-05, 1.7508864402770996e-05, 1.817941665649414e-05, 1.8849968910217285e-05, 1.952052116394043e-05, 2.0191073417663574e-05, 2.086162567138672e-05, 2.1532177925109863e-05, 2.2202730178833008e-05, 2.2873282432556152e-05, 2.3543834686279297e-05]}, "gradients/encoder.encoder.layers.11.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 3.0, 6.0, 4.0, 12.0, 15.0, 43.0, 55.0, 128.0, 236.0, 416.0, 882.0, 1872.0, 4613.0, 13911.0, 85613.0, 848803.0, 71396.0, 12767.0, 4215.0, 1812.0, 851.0, 426.0, 212.0, 108.0, 73.0, 31.0, 23.0, 11.0, 7.0, 4.0, 3.0, 2.0, 2.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00032591819763183594, -0.0003152191638946533, -0.0003045201301574707, -0.0002938210964202881, -0.00028312206268310547, -0.00027242302894592285, -0.00026172399520874023, -0.0002510249614715576, -0.000240325927734375, -0.00022962689399719238, -0.00021892786026000977, -0.00020822882652282715, -0.00019752979278564453, -0.00018683075904846191, -0.0001761317253112793, -0.00016543269157409668, -0.00015473365783691406, -0.00014403462409973145, -0.00013333559036254883, -0.0001226365566253662, -0.0001119375228881836, -0.00010123848915100098, -9.053945541381836e-05, -7.984042167663574e-05, -6.914138793945312e-05, -5.844235420227051e-05, -4.774332046508789e-05, -3.7044286727905273e-05, -2.6345252990722656e-05, -1.564621925354004e-05, -4.947185516357422e-06, 5.751848220825195e-06, 1.6450881958007812e-05, 2.714991569519043e-05, 3.784894943237305e-05, 4.8547983169555664e-05, 5.924701690673828e-05, 6.99460506439209e-05, 8.064508438110352e-05, 9.134411811828613e-05, 0.00010204315185546875, 0.00011274218559265137, 0.00012344121932983398, 0.0001341402530670166, 0.00014483928680419922, 0.00015553832054138184, 0.00016623735427856445, 0.00017693638801574707, 0.0001876354217529297, 0.0001983344554901123, 0.00020903348922729492, 0.00021973252296447754, 0.00023043155670166016, 0.00024113059043884277, 0.0002518296241760254, 0.000262528657913208, 0.0002732276916503906, 0.00028392672538757324, 0.00029462575912475586, 0.0003053247928619385, 0.0003160238265991211, 0.0003267228603363037, 0.00033742189407348633, 0.00034812092781066895, 0.00035881996154785156]}, "gradients/encoder.encoder.layers.11.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 3.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 2.0, 2.0, 0.0, 3.0, 3.0, 3.0, 8.0, 5.0, 13.0, 11.0, 9.0, 21.0, 26.0, 49.0, 57.0, 100.0, 132.0, 137.0, 134.0, 99.0, 57.0, 49.0, 27.0, 13.0, 12.0, 8.0, 5.0, 4.0, 2.0, 5.0, 1.0, 5.0, 4.0, 0.0, 2.0, 1.0, 1.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00023818016052246094, -0.0002310723066329956, -0.00022396445274353027, -0.00021685659885406494, -0.0002097487449645996, -0.00020264089107513428, -0.00019553303718566895, -0.0001884251832962036, -0.00018131732940673828, -0.00017420947551727295, -0.00016710162162780762, -0.00015999376773834229, -0.00015288591384887695, -0.00014577805995941162, -0.0001386702060699463, -0.00013156235218048096, -0.00012445449829101562, -0.00011734664440155029, -0.00011023879051208496, -0.00010313093662261963, -9.60230827331543e-05, -8.891522884368896e-05, -8.180737495422363e-05, -7.46995210647583e-05, -6.759166717529297e-05, -6.048381328582764e-05, -5.3375959396362305e-05, -4.626810550689697e-05, -3.916025161743164e-05, -3.205239772796631e-05, -2.4944543838500977e-05, -1.7836689949035645e-05, -1.0728836059570312e-05, -3.6209821701049805e-06, 3.4868717193603516e-06, 1.0594725608825684e-05, 1.7702579498291016e-05, 2.4810433387756348e-05, 3.191828727722168e-05, 3.902614116668701e-05, 4.6133995056152344e-05, 5.3241848945617676e-05, 6.034970283508301e-05, 6.745755672454834e-05, 7.456541061401367e-05, 8.1673264503479e-05, 8.878111839294434e-05, 9.588897228240967e-05, 0.000102996826171875, 0.00011010468006134033, 0.00011721253395080566, 0.000124320387840271, 0.00013142824172973633, 0.00013853609561920166, 0.000145643949508667, 0.00015275180339813232, 0.00015985965728759766, 0.000166967511177063, 0.00017407536506652832, 0.00018118321895599365, 0.00018829107284545898, 0.00019539892673492432, 0.00020250678062438965, 0.00020961463451385498, 0.0002167224884033203]}, "gradients/encoder.encoder.layers.11.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 1.0, 4.0, 2.0, 7.0, 7.0, 7.0, 11.0, 12.0, 30.0, 38.0, 51.0, 99.0, 266.0, 191.0, 87.0, 58.0, 27.0, 33.0, 24.0, 21.0, 11.0, 10.0, 5.0, 4.0, 3.0, 2.0, 0.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.11800666153430939, -0.11201057583093643, -0.10601448267698288, -0.10001839697360992, -0.09402230381965637, -0.08802621811628342, -0.08203013241291046, -0.07603403925895691, -0.07003795355558395, -0.064041867852211, -0.058045774698257446, -0.05204968899488449, -0.04605359956622124, -0.04005751013755798, -0.03406142443418503, -0.028065335005521774, -0.02206924557685852, -0.016073156148195267, -0.010077068582177162, -0.004080981016159058, 0.0019151084125041962, 0.00791119784116745, 0.013907283544540405, 0.01990337297320366, 0.025899462401866913, 0.03189555183053017, 0.03789164125919342, 0.043887726962566376, 0.04988381639122963, 0.05587990581989288, 0.06187599152326584, 0.0678720772266388, 0.07386815547943115, 0.07986424118280411, 0.08586033433675766, 0.09185642004013062, 0.09785251319408417, 0.10384859889745712, 0.10984468460083008, 0.11584077775478363, 0.12183686345815659, 0.12783294916152954, 0.1338290423154831, 0.13982513546943665, 0.145821213722229, 0.15181730687618256, 0.1578134000301361, 0.16380947828292847, 0.16980557143688202, 0.17580166459083557, 0.18179774284362793, 0.18779383599758148, 0.19378992915153503, 0.1997860074043274, 0.20578210055828094, 0.2117781937122345, 0.21777427196502686, 0.2237703651189804, 0.22976644337177277, 0.23576253652572632, 0.24175862967967987, 0.24775472283363342, 0.2537508010864258, 0.25974687933921814, 0.2657429873943329]}, "gradients/encoder.encoder.layers.11.layer_norm.bias": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 4.0, 1.0, 4.0, 2.0, 14.0, 8.0, 12.0, 13.0, 20.0, 30.0, 35.0, 40.0, 50.0, 50.0, 54.0, 54.0, 83.0, 63.0, 79.0, 64.0, 58.0, 38.0, 57.0, 39.0, 31.0, 24.0, 27.0, 15.0, 14.0, 9.0, 9.0, 4.0, 3.0, 1.0, 4.0, 4.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0791899561882019, -0.07535061240196228, -0.07151126116514206, -0.06767190992832184, -0.06383256614208221, -0.05999321863055229, -0.05615387111902237, -0.05231452360749245, -0.048475176095962524, -0.0446358285844326, -0.04079648107290268, -0.03695713356137276, -0.033117786049842834, -0.029278438538312912, -0.02543909102678299, -0.021599743515253067, -0.017760396003723145, -0.013921048492193222, -0.0100817009806633, -0.006242353469133377, -0.0024030059576034546, 0.001436341553926468, 0.00527568906545639, 0.009115036576986313, 0.012954384088516235, 0.016793731600046158, 0.02063307911157608, 0.024472426623106003, 0.028311774134635925, 0.03215112164616585, 0.03599046915769577, 0.03982981666922569, 0.043669164180755615, 0.04750851169228554, 0.05134785920381546, 0.05518720671534538, 0.059026554226875305, 0.06286589801311493, 0.06670524924993515, 0.07054460048675537, 0.074383944272995, 0.07822328805923462, 0.08206263929605484, 0.08590199053287506, 0.08974133431911469, 0.09358067810535431, 0.09742002934217453, 0.10125938057899475, 0.10509872436523438, 0.108938068151474, 0.11277741938829422, 0.11661677062511444, 0.12045611441135406, 0.12429545819759369, 0.1281348168849945, 0.13197416067123413, 0.13581350445747375, 0.13965284824371338, 0.143492192029953, 0.14733155071735382, 0.15117089450359344, 0.15501023828983307, 0.1588495969772339, 0.1626889407634735, 0.16652828454971313]}, "gradients/encoder.encoder.layers.10.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 1.0, 3.0, 3.0, 9.0, 11.0, 24.0, 36.0, 71.0, 151.0, 306.0, 682.0, 4190710.0, 1404.0, 453.0, 210.0, 97.0, 52.0, 27.0, 19.0, 11.0, 2.0, 5.0, 3.0, 1.0, 2.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.02984619140625, -0.029147982597351074, -0.02844977378845215, -0.027751564979553223, -0.027053356170654297, -0.02635514736175537, -0.025656938552856445, -0.02495872974395752, -0.024260520935058594, -0.023562312126159668, -0.022864103317260742, -0.022165894508361816, -0.02146768569946289, -0.020769476890563965, -0.02007126808166504, -0.019373059272766113, -0.018674850463867188, -0.01797664165496826, -0.017278432846069336, -0.01658022403717041, -0.015882015228271484, -0.015183806419372559, -0.014485597610473633, -0.013787388801574707, -0.013089179992675781, -0.012390971183776855, -0.01169276237487793, -0.010994553565979004, -0.010296344757080078, -0.009598135948181152, -0.008899927139282227, -0.0082017183303833, -0.007503509521484375, -0.006805300712585449, -0.0061070919036865234, -0.005408883094787598, -0.004710674285888672, -0.004012465476989746, -0.0033142566680908203, -0.0026160478591918945, -0.0019178390502929688, -0.001219630241394043, -0.0005214214324951172, 0.0001767873764038086, 0.0008749961853027344, 0.0015732049942016602, 0.002271413803100586, 0.0029696226119995117, 0.0036678314208984375, 0.004366040229797363, 0.005064249038696289, 0.005762457847595215, 0.006460666656494141, 0.007158875465393066, 0.007857084274291992, 0.008555293083190918, 0.009253501892089844, 0.00995171070098877, 0.010649919509887695, 0.011348128318786621, 0.012046337127685547, 0.012744545936584473, 0.013442754745483398, 0.014140963554382324, 0.01483917236328125]}, "gradients/encoder.encoder.layers.10.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 3.0, 6.0, 14.0, 12.0, 27.0, 39.0, 70.0, 118.0, 144.0, 143.0, 147.0, 118.0, 53.0, 58.0, 27.0, 16.0, 7.0, 5.0, 2.0, 2.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.0137481689453125, -0.01343923807144165, -0.0131303071975708, -0.012821376323699951, -0.012512445449829102, -0.012203514575958252, -0.011894583702087402, -0.011585652828216553, -0.011276721954345703, -0.010967791080474854, -0.010658860206604004, -0.010349929332733154, -0.010040998458862305, -0.009732067584991455, -0.009423136711120605, -0.009114205837249756, -0.008805274963378906, -0.008496344089508057, -0.008187413215637207, -0.007878482341766357, -0.007569551467895508, -0.007260620594024658, -0.006951689720153809, -0.006642758846282959, -0.006333827972412109, -0.00602489709854126, -0.00571596622467041, -0.0054070353507995605, -0.005098104476928711, -0.004789173603057861, -0.004480242729187012, -0.004171311855316162, -0.0038623809814453125, -0.003553450107574463, -0.0032445192337036133, -0.0029355883598327637, -0.002626657485961914, -0.0023177266120910645, -0.002008795738220215, -0.0016998648643493652, -0.0013909339904785156, -0.001082003116607666, -0.0007730722427368164, -0.0004641413688659668, -0.0001552104949951172, 0.00015372037887573242, 0.00046265125274658203, 0.0007715821266174316, 0.0010805130004882812, 0.0013894438743591309, 0.0016983747482299805, 0.00200730562210083, 0.0023162364959716797, 0.0026251673698425293, 0.002934098243713379, 0.0032430291175842285, 0.003551959991455078, 0.0038608908653259277, 0.004169821739196777, 0.004478752613067627, 0.0047876834869384766, 0.005096614360809326, 0.005405545234680176, 0.005714476108551025, 0.006023406982421875]}, "gradients/encoder.encoder.layers.10.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 2.0, 1.0, 0.0, 1.0, 5.0, 6.0, 1.0, 6.0, 9.0, 17.0, 16.0, 18.0, 39.0, 40.0, 50.0, 73.0, 105.0, 116.0, 155.0, 194.0, 246.0, 313.0, 438.0, 924.0, 4183281.0, 5930.0, 654.0, 382.0, 266.0, 244.0, 164.0, 138.0, 134.0, 78.0, 66.0, 48.0, 30.0, 37.0, 21.0, 10.0, 10.0, 8.0, 8.0, 4.0, 3.0, 3.0, 4.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0164031982421875, -0.015768766403198242, -0.015134334564208984, -0.014499902725219727, -0.013865470886230469, -0.013231039047241211, -0.012596607208251953, -0.011962175369262695, -0.011327743530273438, -0.01069331169128418, -0.010058879852294922, -0.009424448013305664, -0.008790016174316406, -0.008155584335327148, -0.007521152496337891, -0.006886720657348633, -0.006252288818359375, -0.005617856979370117, -0.004983425140380859, -0.0043489933013916016, -0.0037145614624023438, -0.003080129623413086, -0.002445697784423828, -0.0018112659454345703, -0.0011768341064453125, -0.0005424022674560547, 9.202957153320312e-05, 0.0007264614105224609, 0.0013608932495117188, 0.0019953250885009766, 0.0026297569274902344, 0.003264188766479492, 0.00389862060546875, 0.004533052444458008, 0.005167484283447266, 0.0058019161224365234, 0.006436347961425781, 0.007070779800415039, 0.007705211639404297, 0.008339643478393555, 0.008974075317382812, 0.00960850715637207, 0.010242938995361328, 0.010877370834350586, 0.011511802673339844, 0.012146234512329102, 0.01278066635131836, 0.013415098190307617, 0.014049530029296875, 0.014683961868286133, 0.01531839370727539, 0.01595282554626465, 0.016587257385253906, 0.017221689224243164, 0.017856121063232422, 0.01849055290222168, 0.019124984741210938, 0.019759416580200195, 0.020393848419189453, 0.02102828025817871, 0.02166271209716797, 0.022297143936157227, 0.022931575775146484, 0.023566007614135742, 0.024200439453125]}, "gradients/encoder.encoder.layers.10.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 8.0, 4065.0, 11.0, 2.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00484466552734375, -0.004720181226730347, -0.004595696926116943, -0.00447121262550354, -0.004346728324890137, -0.004222244024276733, -0.00409775972366333, -0.003973275423049927, -0.0038487911224365234, -0.00372430682182312, -0.003599822521209717, -0.0034753382205963135, -0.00335085391998291, -0.003226369619369507, -0.0031018853187561035, -0.0029774010181427, -0.002852916717529297, -0.0027284324169158936, -0.0026039481163024902, -0.002479463815689087, -0.0023549795150756836, -0.0022304952144622803, -0.002106010913848877, -0.0019815266132354736, -0.0018570423126220703, -0.001732558012008667, -0.0016080737113952637, -0.0014835894107818604, -0.001359105110168457, -0.0012346208095550537, -0.0011101365089416504, -0.000985652208328247, -0.0008611679077148438, -0.0007366836071014404, -0.0006121993064880371, -0.0004877150058746338, -0.00036323070526123047, -0.00023874640464782715, -0.00011426210403442383, 1.0222196578979492e-05, 0.0001347064971923828, 0.00025919079780578613, 0.00038367509841918945, 0.0005081593990325928, 0.0006326436996459961, 0.0007571280002593994, 0.0008816123008728027, 0.001006096601486206, 0.0011305809020996094, 0.0012550652027130127, 0.001379549503326416, 0.0015040338039398193, 0.0016285181045532227, 0.001753002405166626, 0.0018774867057800293, 0.0020019710063934326, 0.002126455307006836, 0.0022509396076202393, 0.0023754239082336426, 0.002499908208847046, 0.0026243925094604492, 0.0027488768100738525, 0.002873361110687256, 0.002997845411300659, 0.0031223297119140625]}, "gradients/encoder.encoder.layers.10.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 4.0, 2.0, 0.0, 3.0, 8.0, 3.0, 12.0, 7.0, 13.0, 19.0, 20.0, 15.0, 50.0, 59.0, 92.0, 154.0, 250.0, 108.0, 52.0, 53.0, 23.0, 14.0, 14.0, 12.0, 5.0, 6.0, 10.0, 5.0, 3.0, 1.0, 2.0, 1.0], "bins": [-0.01142862904816866, -0.011192009784281254, -0.010955390520393848, -0.010718772187829018, -0.010482152923941612, -0.010245533660054207, -0.010008914396166801, -0.009772295132279396, -0.00953567586839199, -0.009299056604504585, -0.00906243734061718, -0.008825818076729774, -0.008589199744164944, -0.008352580480277538, -0.008115961216390133, -0.007879341952502728, -0.007642723154276609, -0.007406103890389204, -0.007169485092163086, -0.0069328658282756805, -0.006696246564388275, -0.00645962730050087, -0.006223008502274752, -0.005986389238387346, -0.005749770440161228, -0.005513151176273823, -0.005276532378047705, -0.005039913114160299, -0.004803293850272894, -0.004566675052046776, -0.00433005578815937, -0.004093436524271965, -0.0038568174932152033, -0.0036201984621584415, -0.003383579198271036, -0.0031469601672142744, -0.0029103411361575127, -0.0026737218722701073, -0.0024371028412133455, -0.002200483810156584, -0.0019638645462691784, -0.0017272453987970948, -0.0014906262513250113, -0.0012540072202682495, -0.001017388072796166, -0.0007807689253240824, -0.0005441498942673206, -0.00030753074679523706, -7.09115993231535e-05, 0.00016570751904509962, 0.00040232663741335273, 0.0006389457266777754, 0.000875564874149859, 0.0011121840216219425, 0.0013488030526787043, 0.0015854222001507878, 0.0018220413476228714, 0.002058660378679633, 0.0022952796425670385, 0.0025318986736238003, 0.0027685179375112057, 0.0030051369685679674, 0.003241755999624729, 0.0034783752635121346, 0.0037149942945688963]}, "gradients/encoder.encoder.layers.10.final_layer_norm.bias": {"_type": "histogram", "values": [2.0, 1.0, 2.0, 3.0, 0.0, 1.0, 5.0, 5.0, 8.0, 8.0, 8.0, 7.0, 6.0, 18.0, 14.0, 17.0, 17.0, 17.0, 20.0, 22.0, 32.0, 37.0, 27.0, 29.0, 44.0, 36.0, 40.0, 38.0, 48.0, 38.0, 42.0, 40.0, 38.0, 40.0, 42.0, 37.0, 29.0, 20.0, 27.0, 27.0, 22.0, 17.0, 16.0, 16.0, 9.0, 6.0, 6.0, 5.0, 12.0, 1.0, 3.0, 2.0, 5.0, 2.0, 3.0, 1.0, 2.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.0029709339141845703, -0.002868802286684513, -0.002766670659184456, -0.0026645390316843987, -0.0025624074041843414, -0.002460275776684284, -0.002358144149184227, -0.0022560125216841698, -0.0021538808941841125, -0.0020517492666840553, -0.001949617639183998, -0.0018474860116839409, -0.0017453543841838837, -0.0016432227566838264, -0.0015410911291837692, -0.001438959501683712, -0.0013368278741836548, -0.0012346962466835976, -0.0011325646191835403, -0.0010304329916834831, -0.0009283013641834259, -0.0008261697366833687, -0.0007240381091833115, -0.0006219064816832542, -0.000519774854183197, -0.0004176432266831398, -0.0003155115991830826, -0.00021337997168302536, -0.00011124834418296814, -9.11671668291092e-06, 9.30149108171463e-05, 0.00019514653831720352, 0.00029727816581726074, 0.00039940979331731796, 0.0005015414208173752, 0.0006036730483174324, 0.0007058046758174896, 0.0008079363033175468, 0.0009100679308176041, 0.0010121995583176613, 0.0011143311858177185, 0.0012164628133177757, 0.001318594440817833, 0.0014207260683178902, 0.0015228576958179474, 0.0016249893233180046, 0.0017271209508180618, 0.001829252578318119, 0.0019313842058181763, 0.0020335158333182335, 0.0021356474608182907, 0.002237779088318348, 0.002339910715818405, 0.0024420423433184624, 0.0025441739708185196, 0.002646305598318577, 0.002748437225818634, 0.0028505688533186913, 0.0029527004808187485, 0.0030548321083188057, 0.003156963735818863, 0.00325909536331892, 0.0033612269908189774, 0.0034633586183190346, 0.003565490245819092]}, "gradients/encoder.encoder.layers.10.attention.out_proj.weight": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 1.0, 0.0, 6.0, 2.0, 1.0, 3.0, 4.0, 6.0, 17.0, 13.0, 13.0, 14.0, 24.0, 28.0, 41.0, 57.0, 65.0, 120.0, 146.0, 344.0, 720.0, 2004.0, 7181.0, 33353.0, 267701.0, 660562.0, 59791.0, 11322.0, 2921.0, 1059.0, 426.0, 194.0, 139.0, 58.0, 54.0, 29.0, 28.0, 20.0, 18.0, 10.0, 10.0, 12.0, 10.0, 12.0, 7.0, 5.0, 3.0, 6.0, 2.0, 4.0, 0.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.0406494140625, -0.03920602798461914, -0.03776264190673828, -0.03631925582885742, -0.03487586975097656, -0.0334324836730957, -0.031989097595214844, -0.030545711517333984, -0.029102325439453125, -0.027658939361572266, -0.026215553283691406, -0.024772167205810547, -0.023328781127929688, -0.021885395050048828, -0.02044200897216797, -0.01899862289428711, -0.01755523681640625, -0.01611185073852539, -0.014668464660644531, -0.013225078582763672, -0.011781692504882812, -0.010338306427001953, -0.008894920349121094, -0.007451534271240234, -0.006008148193359375, -0.004564762115478516, -0.0031213760375976562, -0.0016779899597167969, -0.0002346038818359375, 0.0012087821960449219, 0.0026521682739257812, 0.004095554351806641, 0.0055389404296875, 0.006982326507568359, 0.008425712585449219, 0.009869098663330078, 0.011312484741210938, 0.012755870819091797, 0.014199256896972656, 0.015642642974853516, 0.017086029052734375, 0.018529415130615234, 0.019972801208496094, 0.021416187286376953, 0.022859573364257812, 0.024302959442138672, 0.02574634552001953, 0.02718973159790039, 0.02863311767578125, 0.03007650375366211, 0.03151988983154297, 0.03296327590942383, 0.03440666198730469, 0.03585004806518555, 0.037293434143066406, 0.038736820220947266, 0.040180206298828125, 0.041623592376708984, 0.043066978454589844, 0.0445103645324707, 0.04595375061035156, 0.04739713668823242, 0.04884052276611328, 0.05028390884399414, 0.051727294921875]}, "gradients/encoder.encoder.layers.10.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 4.0, 5.0, 13.0, 13.0, 29.0, 39.0, 69.0, 119.0, 145.0, 142.0, 152.0, 112.0, 54.0, 57.0, 27.0, 16.0, 5.0, 6.0, 2.0, 2.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.01372528076171875, -0.013416707515716553, -0.013108134269714355, -0.012799561023712158, -0.012490987777709961, -0.012182414531707764, -0.011873841285705566, -0.01156526803970337, -0.011256694793701172, -0.010948121547698975, -0.010639548301696777, -0.01033097505569458, -0.010022401809692383, -0.009713828563690186, -0.009405255317687988, -0.009096682071685791, -0.008788108825683594, -0.008479535579681396, -0.0081709623336792, -0.007862389087677002, -0.007553815841674805, -0.007245242595672607, -0.00693666934967041, -0.006628096103668213, -0.006319522857666016, -0.006010949611663818, -0.005702376365661621, -0.005393803119659424, -0.0050852298736572266, -0.004776656627655029, -0.004468083381652832, -0.004159510135650635, -0.0038509368896484375, -0.0035423636436462402, -0.003233790397644043, -0.0029252171516418457, -0.0026166439056396484, -0.002308070659637451, -0.001999497413635254, -0.0016909241676330566, -0.0013823509216308594, -0.0010737776756286621, -0.0007652044296264648, -0.0004566311836242676, -0.0001480579376220703, 0.00016051530838012695, 0.0004690885543823242, 0.0007776618003845215, 0.0010862350463867188, 0.001394808292388916, 0.0017033815383911133, 0.0020119547843933105, 0.002320528030395508, 0.002629101276397705, 0.0029376745223999023, 0.0032462477684020996, 0.003554821014404297, 0.003863394260406494, 0.004171967506408691, 0.004480540752410889, 0.004789113998413086, 0.005097687244415283, 0.0054062604904174805, 0.005714833736419678, 0.006023406982421875]}, "gradients/encoder.encoder.layers.10.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 3.0, 2.0, 1.0, 3.0, 4.0, 3.0, 9.0, 5.0, 12.0, 12.0, 14.0, 31.0, 27.0, 45.0, 66.0, 105.0, 275.0, 625.0, 1350.0, 3284.0, 8301.0, 22350.0, 69154.0, 357827.0, 469990.0, 75670.0, 24197.0, 8915.0, 3521.0, 1473.0, 632.0, 281.0, 139.0, 69.0, 34.0, 37.0, 27.0, 14.0, 14.0, 12.0, 12.0, 5.0, 8.0, 6.0, 3.0, 1.0, 3.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0191497802734375, -0.018384933471679688, -0.017620086669921875, -0.016855239868164062, -0.01609039306640625, -0.015325546264648438, -0.014560699462890625, -0.013795852661132812, -0.013031005859375, -0.012266159057617188, -0.011501312255859375, -0.010736465454101562, -0.00997161865234375, -0.009206771850585938, -0.008441925048828125, -0.0076770782470703125, -0.0069122314453125, -0.0061473846435546875, -0.005382537841796875, -0.0046176910400390625, -0.00385284423828125, -0.0030879974365234375, -0.002323150634765625, -0.0015583038330078125, -0.00079345703125, -2.86102294921875e-05, 0.000736236572265625, 0.0015010833740234375, 0.00226593017578125, 0.0030307769775390625, 0.003795623779296875, 0.0045604705810546875, 0.0053253173828125, 0.0060901641845703125, 0.006855010986328125, 0.0076198577880859375, 0.00838470458984375, 0.009149551391601562, 0.009914398193359375, 0.010679244995117188, 0.011444091796875, 0.012208938598632812, 0.012973785400390625, 0.013738632202148438, 0.01450347900390625, 0.015268325805664062, 0.016033172607421875, 0.016798019409179688, 0.0175628662109375, 0.018327713012695312, 0.019092559814453125, 0.019857406616210938, 0.02062225341796875, 0.021387100219726562, 0.022151947021484375, 0.022916793823242188, 0.023681640625, 0.024446487426757812, 0.025211334228515625, 0.025976181030273438, 0.02674102783203125, 0.027505874633789062, 0.028270721435546875, 0.029035568237304688, 0.0298004150390625]}, "gradients/encoder.encoder.layers.10.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 2.0, 2.0, 2.0, 5.0, 8.0, 5.0, 11.0, 10.0, 14.0, 9.0, 19.0, 22.0, 19.0, 17.0, 31.0, 39.0, 33.0, 44.0, 41.0, 44.0, 33.0, 43.0, 46.0, 37.0, 56.0, 49.0, 49.0, 43.0, 34.0, 44.0, 36.0, 24.0, 20.0, 25.0, 17.0, 23.0, 13.0, 8.0, 12.0, 4.0, 9.0, 2.0, 6.0, 3.0, 1.0, 2.0, 2.0, 2.0], "bins": [-0.019287109375, -0.018793940544128418, -0.018300771713256836, -0.017807602882385254, -0.017314434051513672, -0.01682126522064209, -0.016328096389770508, -0.015834927558898926, -0.015341758728027344, -0.014848589897155762, -0.01435542106628418, -0.013862252235412598, -0.013369083404541016, -0.012875914573669434, -0.012382745742797852, -0.01188957691192627, -0.011396408081054688, -0.010903239250183105, -0.010410070419311523, -0.009916901588439941, -0.00942373275756836, -0.008930563926696777, -0.008437395095825195, -0.007944226264953613, -0.007451057434082031, -0.006957888603210449, -0.006464719772338867, -0.005971550941467285, -0.005478382110595703, -0.004985213279724121, -0.004492044448852539, -0.003998875617980957, -0.003505706787109375, -0.003012537956237793, -0.002519369125366211, -0.002026200294494629, -0.0015330314636230469, -0.0010398626327514648, -0.0005466938018798828, -5.352497100830078e-05, 0.00043964385986328125, 0.0009328126907348633, 0.0014259815216064453, 0.0019191503524780273, 0.0024123191833496094, 0.0029054880142211914, 0.0033986568450927734, 0.0038918256759643555, 0.0043849945068359375, 0.0048781633377075195, 0.0053713321685791016, 0.005864500999450684, 0.006357669830322266, 0.006850838661193848, 0.00734400749206543, 0.007837176322937012, 0.008330345153808594, 0.008823513984680176, 0.009316682815551758, 0.00980985164642334, 0.010303020477294922, 0.010796189308166504, 0.011289358139038086, 0.011782526969909668, 0.01227569580078125]}, "gradients/encoder.encoder.layers.10.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 0.0, 3.0, 4.0, 2.0, 3.0, 9.0, 11.0, 23.0, 20.0, 34.0, 69.0, 101.0, 201.0, 371.0, 941.0, 2613.0, 9302.0, 57128.0, 865194.0, 94546.0, 12700.0, 3159.0, 1137.0, 479.0, 221.0, 108.0, 81.0, 29.0, 26.0, 19.0, 8.0, 2.0, 8.0, 7.0, 2.0, 4.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.0002770423889160156, -0.0002692602574825287, -0.00026147812604904175, -0.0002536959946155548, -0.00024591386318206787, -0.00023813173174858093, -0.000230349600315094, -0.00022256746888160706, -0.00021478533744812012, -0.00020700320601463318, -0.00019922107458114624, -0.0001914389431476593, -0.00018365681171417236, -0.00017587468028068542, -0.00016809254884719849, -0.00016031041741371155, -0.0001525282859802246, -0.00014474615454673767, -0.00013696402311325073, -0.0001291818916797638, -0.00012139976024627686, -0.00011361762881278992, -0.00010583549737930298, -9.805336594581604e-05, -9.02712345123291e-05, -8.248910307884216e-05, -7.470697164535522e-05, -6.692484021186829e-05, -5.914270877838135e-05, -5.136057734489441e-05, -4.357844591140747e-05, -3.579631447792053e-05, -2.8014183044433594e-05, -2.0232051610946655e-05, -1.2449920177459717e-05, -4.667788743972778e-06, 3.11434268951416e-06, 1.0896474123001099e-05, 1.8678605556488037e-05, 2.6460736989974976e-05, 3.4242868423461914e-05, 4.202499985694885e-05, 4.980713129043579e-05, 5.758926272392273e-05, 6.537139415740967e-05, 7.31535255908966e-05, 8.093565702438354e-05, 8.871778845787048e-05, 9.649991989135742e-05, 0.00010428205132484436, 0.0001120641827583313, 0.00011984631419181824, 0.00012762844562530518, 0.00013541057705879211, 0.00014319270849227905, 0.000150974839925766, 0.00015875697135925293, 0.00016653910279273987, 0.0001743212342262268, 0.00018210336565971375, 0.00018988549709320068, 0.00019766762852668762, 0.00020544975996017456, 0.0002132318913936615, 0.00022101402282714844]}, "gradients/encoder.encoder.layers.10.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 2.0, 1.0, 2.0, 2.0, 4.0, 0.0, 4.0, 4.0, 3.0, 4.0, 5.0, 10.0, 14.0, 11.0, 26.0, 31.0, 39.0, 62.0, 95.0, 148.0, 147.0, 111.0, 91.0, 55.0, 40.0, 24.0, 14.0, 12.0, 12.0, 3.0, 12.0, 6.0, 3.0, 5.0, 2.0, 3.0, 1.0, 2.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.9669532775878906e-05, -1.9078142940998077e-05, -1.848675310611725e-05, -1.789536327123642e-05, -1.730397343635559e-05, -1.6712583601474762e-05, -1.6121193766593933e-05, -1.5529803931713104e-05, -1.4938414096832275e-05, -1.4347024261951447e-05, -1.3755634427070618e-05, -1.3164244592189789e-05, -1.257285475730896e-05, -1.1981464922428131e-05, -1.1390075087547302e-05, -1.0798685252666473e-05, -1.0207295417785645e-05, -9.615905582904816e-06, -9.024515748023987e-06, -8.433125913143158e-06, -7.841736078262329e-06, -7.2503462433815e-06, -6.658956408500671e-06, -6.0675665736198425e-06, -5.476176738739014e-06, -4.884786903858185e-06, -4.293397068977356e-06, -3.702007234096527e-06, -3.1106173992156982e-06, -2.5192275643348694e-06, -1.9278377294540405e-06, -1.3364478945732117e-06, -7.450580596923828e-07, -1.5366822481155396e-07, 4.377216100692749e-07, 1.0291114449501038e-06, 1.6205012798309326e-06, 2.2118911147117615e-06, 2.8032809495925903e-06, 3.394670784473419e-06, 3.986060619354248e-06, 4.577450454235077e-06, 5.168840289115906e-06, 5.760230123996735e-06, 6.3516199588775635e-06, 6.943009793758392e-06, 7.534399628639221e-06, 8.12578946352005e-06, 8.717179298400879e-06, 9.308569133281708e-06, 9.899958968162537e-06, 1.0491348803043365e-05, 1.1082738637924194e-05, 1.1674128472805023e-05, 1.2265518307685852e-05, 1.2856908142566681e-05, 1.344829797744751e-05, 1.4039687812328339e-05, 1.4631077647209167e-05, 1.5222467482089996e-05, 1.5813857316970825e-05, 1.6405247151851654e-05, 1.6996636986732483e-05, 1.7588026821613312e-05, 1.817941665649414e-05]}, "gradients/encoder.encoder.layers.10.attention.q_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 5.0, 5.0, 1.0, 7.0, 10.0, 14.0, 12.0, 18.0, 29.0, 40.0, 74.0, 141.0, 360.0, 1129.0, 4910.0, 32497.0, 892389.0, 104238.0, 9747.0, 1955.0, 534.0, 192.0, 95.0, 39.0, 36.0, 27.0, 19.0, 13.0, 9.0, 9.0, 5.0, 6.0, 3.0, 1.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00020456314086914062, -0.00019362568855285645, -0.00018268823623657227, -0.00017175078392028809, -0.0001608133316040039, -0.00014987587928771973, -0.00013893842697143555, -0.00012800097465515137, -0.00011706352233886719, -0.00010612607002258301, -9.518861770629883e-05, -8.425116539001465e-05, -7.331371307373047e-05, -6.237626075744629e-05, -5.143880844116211e-05, -4.050135612487793e-05, -2.956390380859375e-05, -1.862645149230957e-05, -7.68899917602539e-06, 3.248453140258789e-06, 1.4185905456542969e-05, 2.512335777282715e-05, 3.606081008911133e-05, 4.699826240539551e-05, 5.793571472167969e-05, 6.887316703796387e-05, 7.981061935424805e-05, 9.074807167053223e-05, 0.0001016855239868164, 0.00011262297630310059, 0.00012356042861938477, 0.00013449788093566895, 0.00014543533325195312, 0.0001563727855682373, 0.00016731023788452148, 0.00017824769020080566, 0.00018918514251708984, 0.00020012259483337402, 0.0002110600471496582, 0.00022199749946594238, 0.00023293495178222656, 0.00024387240409851074, 0.0002548098564147949, 0.0002657473087310791, 0.0002766847610473633, 0.00028762221336364746, 0.00029855966567993164, 0.0003094971179962158, 0.0003204345703125, 0.0003313720226287842, 0.00034230947494506836, 0.00035324692726135254, 0.0003641843795776367, 0.0003751218318939209, 0.0003860592842102051, 0.00039699673652648926, 0.00040793418884277344, 0.0004188716411590576, 0.0004298090934753418, 0.000440746545791626, 0.00045168399810791016, 0.00046262145042419434, 0.0004735589027404785, 0.0004844963550567627, 0.0004954338073730469]}, "gradients/encoder.encoder.layers.10.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 4.0, 5.0, 9.0, 7.0, 9.0, 14.0, 14.0, 23.0, 36.0, 67.0, 102.0, 173.0, 153.0, 138.0, 93.0, 57.0, 38.0, 24.0, 19.0, 10.0, 6.0, 4.0, 3.0, 3.0, 1.0, 2.0, 1.0, 1.0], "bins": [-0.0003204345703125, -0.0003137849271297455, -0.00030713528394699097, -0.00030048564076423645, -0.00029383599758148193, -0.0002871863543987274, -0.0002805367112159729, -0.0002738870680332184, -0.00026723742485046387, -0.00026058778166770935, -0.00025393813848495483, -0.0002472884953022003, -0.0002406388521194458, -0.00023398920893669128, -0.00022733956575393677, -0.00022068992257118225, -0.00021404027938842773, -0.00020739063620567322, -0.0002007409930229187, -0.00019409134984016418, -0.00018744170665740967, -0.00018079206347465515, -0.00017414242029190063, -0.00016749277710914612, -0.0001608431339263916, -0.00015419349074363708, -0.00014754384756088257, -0.00014089420437812805, -0.00013424456119537354, -0.00012759491801261902, -0.0001209452748298645, -0.00011429563164710999, -0.00010764598846435547, -0.00010099634528160095, -9.434670209884644e-05, -8.769705891609192e-05, -8.10474157333374e-05, -7.439777255058289e-05, -6.774812936782837e-05, -6.109848618507385e-05, -5.4448843002319336e-05, -4.779919981956482e-05, -4.11495566368103e-05, -3.4499913454055786e-05, -2.785027027130127e-05, -2.1200627088546753e-05, -1.4550983905792236e-05, -7.90134072303772e-06, -1.2516975402832031e-06, 5.3979456424713135e-06, 1.204758882522583e-05, 1.8697232007980347e-05, 2.5346875190734863e-05, 3.199651837348938e-05, 3.8646161556243896e-05, 4.529580473899841e-05, 5.194544792175293e-05, 5.8595091104507446e-05, 6.524473428726196e-05, 7.189437747001648e-05, 7.8544020652771e-05, 8.519366383552551e-05, 9.184330701828003e-05, 9.849295020103455e-05, 0.00010514259338378906]}, "gradients/encoder.encoder.layers.10.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 2.0, 1.0, 2.0, 2.0, 6.0, 2.0, 4.0, 8.0, 10.0, 11.0, 10.0, 16.0, 32.0, 19.0, 42.0, 72.0, 96.0, 230.0, 134.0, 87.0, 51.0, 35.0, 38.0, 25.0, 14.0, 11.0, 11.0, 9.0, 10.0, 0.0, 3.0, 5.0, 4.0, 4.0, 4.0, 1.0, 1.0, 0.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.11983984708786011, -0.11593308299779892, -0.11202631890773773, -0.10811954736709595, -0.10421278327703476, -0.10030601918697357, -0.09639924764633179, -0.0924924835562706, -0.08858571946620941, -0.08467895537614822, -0.08077219128608704, -0.07686541974544525, -0.07295865565538406, -0.06905189156532288, -0.06514512002468109, -0.061238355934619904, -0.057331591844558716, -0.05342482775449753, -0.04951805993914604, -0.045611292123794556, -0.04170452803373337, -0.03779776394367218, -0.033890996128320694, -0.029984230175614357, -0.02607746422290802, -0.022170698270201683, -0.018263932317495346, -0.014357166364789009, -0.010450400412082672, -0.006543634459376335, -0.002636868506669998, 0.0012698974460363388, 0.005176663398742676, 0.009083429351449013, 0.01299019530415535, 0.016896961256861687, 0.020803727209568024, 0.02471049316227436, 0.028617259114980698, 0.032524026930332184, 0.03643079102039337, 0.04033755511045456, 0.044244322925806046, 0.04815109074115753, 0.05205785483121872, 0.05596461892127991, 0.05987138673663139, 0.06377815455198288, 0.06768491864204407, 0.07159168273210526, 0.07549844682216644, 0.07940521836280823, 0.08331198245286942, 0.0872187465429306, 0.09112551808357239, 0.09503228217363358, 0.09893904626369476, 0.10284581035375595, 0.10675257444381714, 0.11065934598445892, 0.11456611007452011, 0.1184728741645813, 0.12237964570522308, 0.12628640234470367, 0.13019317388534546]}, "gradients/encoder.encoder.layers.10.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 4.0, 3.0, 5.0, 9.0, 11.0, 6.0, 8.0, 13.0, 11.0, 19.0, 36.0, 37.0, 36.0, 36.0, 54.0, 54.0, 64.0, 59.0, 67.0, 67.0, 52.0, 57.0, 46.0, 45.0, 45.0, 35.0, 26.0, 24.0, 22.0, 20.0, 8.0, 9.0, 12.0, 6.0, 1.0, 4.0, 1.0, 3.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.0989154577255249, -0.09587566554546356, -0.09283588081598282, -0.08979608863592148, -0.08675630390644073, -0.0837165117263794, -0.08067671954631805, -0.07763692736625671, -0.07459714263677597, -0.07155735045671463, -0.06851756572723389, -0.06547777354717255, -0.062437985092401505, -0.05939819663763046, -0.05635840445756912, -0.05331861600279808, -0.05027882754802704, -0.047239039093256, -0.044199250638484955, -0.041159458458423615, -0.03811967000365257, -0.03507988154888153, -0.03204008936882019, -0.02900030091404915, -0.025960512459278107, -0.022920724004507065, -0.019880933687090874, -0.016841143369674683, -0.01380135491490364, -0.010761565528810024, -0.007721776142716408, -0.004681985825300217, -0.0016421973705291748, 0.0013975920155644417, 0.004437381401658058, 0.007477170787751675, 0.010516960173845291, 0.013556749559938908, 0.016596538946032524, 0.019636329263448715, 0.022676117718219757, 0.0257159061729908, 0.02875569649040699, 0.03179548680782318, 0.03483527526259422, 0.037875063717365265, 0.040914855897426605, 0.04395464435219765, 0.04699443280696869, 0.05003422126173973, 0.05307400971651077, 0.05611380189657211, 0.059153590351343155, 0.0621933788061142, 0.06523317098617554, 0.06827296316623688, 0.07131274789571762, 0.07435254007577896, 0.0773923248052597, 0.08043211698532104, 0.08347190916538239, 0.08651169389486313, 0.08955148607492447, 0.09259127080440521, 0.09563106298446655]}, "gradients/encoder.encoder.layers.9.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 3.0, 3.0, 4.0, 12.0, 8.0, 33.0, 49.0, 98.0, 170.0, 350.0, 634.0, 1368.0, 4188926.0, 1270.0, 620.0, 344.0, 196.0, 92.0, 47.0, 21.0, 13.0, 7.0, 6.0, 7.0, 5.0, 0.0, 4.0, 2.0, 0.0, 1.0, 0.0, 3.0], "bins": [-0.10223388671875, -0.09993505477905273, -0.09763622283935547, -0.0953373908996582, -0.09303855895996094, -0.09073972702026367, -0.0884408950805664, -0.08614206314086914, -0.08384323120117188, -0.08154439926147461, -0.07924556732177734, -0.07694673538208008, -0.07464790344238281, -0.07234907150268555, -0.07005023956298828, -0.06775140762329102, -0.06545257568359375, -0.06315374374389648, -0.06085491180419922, -0.05855607986450195, -0.05625724792480469, -0.05395841598510742, -0.051659584045410156, -0.04936075210571289, -0.047061920166015625, -0.04476308822631836, -0.042464256286621094, -0.04016542434692383, -0.03786659240722656, -0.0355677604675293, -0.03326892852783203, -0.030970096588134766, -0.0286712646484375, -0.026372432708740234, -0.02407360076904297, -0.021774768829345703, -0.019475936889648438, -0.017177104949951172, -0.014878273010253906, -0.01257944107055664, -0.010280609130859375, -0.00798177719116211, -0.005682945251464844, -0.003384113311767578, -0.0010852813720703125, 0.0012135505676269531, 0.0035123825073242188, 0.005811214447021484, 0.00811004638671875, 0.010408878326416016, 0.012707710266113281, 0.015006542205810547, 0.017305374145507812, 0.019604206085205078, 0.021903038024902344, 0.02420186996459961, 0.026500701904296875, 0.02879953384399414, 0.031098365783691406, 0.03339719772338867, 0.03569602966308594, 0.0379948616027832, 0.04029369354248047, 0.042592525482177734, 0.044891357421875]}, "gradients/encoder.encoder.layers.9.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 3.0, 5.0, 10.0, 10.0, 24.0, 36.0, 53.0, 81.0, 141.0, 152.0, 144.0, 131.0, 78.0, 47.0, 52.0, 20.0, 14.0, 6.0, 2.0, 1.0, 3.0, 2.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.01323699951171875, -0.012936055660247803, -0.012635111808776855, -0.012334167957305908, -0.012033224105834961, -0.011732280254364014, -0.011431336402893066, -0.01113039255142212, -0.010829448699951172, -0.010528504848480225, -0.010227560997009277, -0.00992661714553833, -0.009625673294067383, -0.009324729442596436, -0.009023785591125488, -0.008722841739654541, -0.008421897888183594, -0.008120954036712646, -0.0078200101852417, -0.007519066333770752, -0.007218122482299805, -0.006917178630828857, -0.00661623477935791, -0.006315290927886963, -0.006014347076416016, -0.005713403224945068, -0.005412459373474121, -0.005111515522003174, -0.0048105716705322266, -0.004509627819061279, -0.004208683967590332, -0.003907740116119385, -0.0036067962646484375, -0.0033058524131774902, -0.003004908561706543, -0.0027039647102355957, -0.0024030208587646484, -0.002102077007293701, -0.001801133155822754, -0.0015001893043518066, -0.0011992454528808594, -0.0008983016014099121, -0.0005973577499389648, -0.0002964138984680176, 4.5299530029296875e-06, 0.00030547380447387695, 0.0006064176559448242, 0.0009073615074157715, 0.0012083053588867188, 0.001509249210357666, 0.0018101930618286133, 0.0021111369132995605, 0.002412080764770508, 0.002713024616241455, 0.0030139684677124023, 0.0033149123191833496, 0.003615856170654297, 0.003916800022125244, 0.004217743873596191, 0.004518687725067139, 0.004819631576538086, 0.005120575428009033, 0.0054215192794799805, 0.005722463130950928, 0.006023406982421875]}, "gradients/encoder.encoder.layers.9.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 3.0, 4.0, 1.0, 3.0, 7.0, 6.0, 17.0, 13.0, 15.0, 26.0, 36.0, 33.0, 75.0, 84.0, 135.0, 200.0, 245.0, 378.0, 495.0, 776.0, 2198.0, 4183250.0, 3328.0, 955.0, 570.0, 391.0, 306.0, 216.0, 147.0, 119.0, 65.0, 68.0, 35.0, 26.0, 18.0, 20.0, 3.0, 8.0, 10.0, 5.0, 3.0, 1.0, 3.0, 1.0, 0.0, 3.0], "bins": [-0.01739501953125, -0.016956090927124023, -0.016517162322998047, -0.01607823371887207, -0.015639305114746094, -0.015200376510620117, -0.01476144790649414, -0.014322519302368164, -0.013883590698242188, -0.013444662094116211, -0.013005733489990234, -0.012566804885864258, -0.012127876281738281, -0.011688947677612305, -0.011250019073486328, -0.010811090469360352, -0.010372161865234375, -0.009933233261108398, -0.009494304656982422, -0.009055376052856445, -0.008616447448730469, -0.008177518844604492, -0.007738590240478516, -0.007299661636352539, -0.0068607330322265625, -0.006421804428100586, -0.005982875823974609, -0.005543947219848633, -0.005105018615722656, -0.00466609001159668, -0.004227161407470703, -0.0037882328033447266, -0.00334930419921875, -0.0029103755950927734, -0.002471446990966797, -0.0020325183868408203, -0.0015935897827148438, -0.0011546611785888672, -0.0007157325744628906, -0.00027680397033691406, 0.0001621246337890625, 0.0006010532379150391, 0.0010399818420410156, 0.0014789104461669922, 0.0019178390502929688, 0.0023567676544189453, 0.002795696258544922, 0.0032346248626708984, 0.003673553466796875, 0.0041124820709228516, 0.004551410675048828, 0.004990339279174805, 0.005429267883300781, 0.005868196487426758, 0.006307125091552734, 0.006746053695678711, 0.0071849822998046875, 0.007623910903930664, 0.00806283950805664, 0.008501768112182617, 0.008940696716308594, 0.00937962532043457, 0.009818553924560547, 0.010257482528686523, 0.0106964111328125]}, "gradients/encoder.encoder.layers.9.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 4.0, 1.0, 4072.0, 5.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0019664764404296875, -0.0018892288208007812, -0.001811981201171875, -0.0017347335815429688, -0.0016574859619140625, -0.0015802383422851562, -0.00150299072265625, -0.0014257431030273438, -0.0013484954833984375, -0.0012712478637695312, -0.001194000244140625, -0.0011167526245117188, -0.0010395050048828125, -0.0009622573852539062, -0.000885009765625, -0.0008077621459960938, -0.0007305145263671875, -0.0006532669067382812, -0.000576019287109375, -0.0004987716674804688, -0.0004215240478515625, -0.00034427642822265625, -0.00026702880859375, -0.00018978118896484375, -0.0001125335693359375, -3.528594970703125e-05, 4.1961669921875e-05, 0.00011920928955078125, 0.0001964569091796875, 0.00027370452880859375, 0.0003509521484375, 0.00042819976806640625, 0.0005054473876953125, 0.0005826950073242188, 0.000659942626953125, 0.0007371902465820312, 0.0008144378662109375, 0.0008916854858398438, 0.00096893310546875, 0.0010461807250976562, 0.0011234283447265625, 0.0012006759643554688, 0.001277923583984375, 0.0013551712036132812, 0.0014324188232421875, 0.0015096664428710938, 0.0015869140625, 0.0016641616821289062, 0.0017414093017578125, 0.0018186569213867188, 0.001895904541015625, 0.0019731521606445312, 0.0020503997802734375, 0.0021276473999023438, 0.00220489501953125, 0.0022821426391601562, 0.0023593902587890625, 0.0024366378784179688, 0.002513885498046875, 0.0025911331176757812, 0.0026683807373046875, 0.0027456283569335938, 0.0028228759765625, 0.0029001235961914062, 0.0029773712158203125]}, "gradients/encoder.encoder.layers.9.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 4.0, 1.0, 2.0, 5.0, 3.0, 9.0, 10.0, 5.0, 15.0, 14.0, 27.0, 27.0, 41.0, 82.0, 118.0, 226.0, 129.0, 71.0, 49.0, 40.0, 28.0, 25.0, 16.0, 20.0, 8.0, 9.0, 7.0, 2.0, 3.0, 2.0, 3.0, 4.0, 1.0, 1.0, 6.0, 1.0, 3.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0033214713912457228, -0.0031969347037374973, -0.0030723982490599155, -0.00294786156155169, -0.0028233248740434647, -0.002698788419365883, -0.0025742517318576574, -0.002449715044349432, -0.00232517858967185, -0.0022006419021636248, -0.002076105447486043, -0.0019515687599778175, -0.001827032072469592, -0.0017024955013766885, -0.0015779589302837849, -0.0014534222427755594, -0.001328885555267334, -0.0012043489841744304, -0.001079812296666205, -0.0009552757255733013, -0.0008307390962727368, -0.0007062024669721723, -0.0005816658958792686, -0.0004571292665787041, -0.0003325926372781396, -0.0002080560225294903, -8.351940778084099e-05, 4.101719241589308e-05, 0.0001655538217164576, 0.00029009045101702213, 0.00041462702210992575, 0.0005391636514104903, 0.0006637000478804111, 0.0007882366771809757, 0.0009127733064815402, 0.0010373098775744438, 0.0011618465650826693, 0.0012863831361755729, 0.0014109197072684765, 0.001535456394776702, 0.0016599929658696055, 0.0017845295369625092, 0.0019090662244707346, 0.00203360291197896, 0.002158139366656542, 0.0022826760541647673, 0.0024072127416729927, 0.0025317491963505745, 0.0026562858838588, 0.0027808225713670254, 0.002905359026044607, 0.0030298957135528326, 0.003154432401061058, 0.00327896885573864, 0.0034035055432468653, 0.0035280422307550907, 0.0036525786854326725, 0.003777115372940898, 0.0039016518276184797, 0.004026188515126705, 0.004150724969804287, 0.004275261890143156, 0.004399798344820738, 0.00452433479949832, 0.004648871719837189]}, "gradients/encoder.encoder.layers.9.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 4.0, 3.0, 3.0, 2.0, 4.0, 12.0, 7.0, 4.0, 10.0, 10.0, 11.0, 14.0, 17.0, 15.0, 22.0, 30.0, 22.0, 27.0, 36.0, 42.0, 40.0, 44.0, 39.0, 45.0, 48.0, 36.0, 42.0, 37.0, 33.0, 34.0, 31.0, 37.0, 32.0, 30.0, 27.0, 19.0, 26.0, 24.0, 17.0, 16.0, 9.0, 11.0, 5.0, 12.0, 5.0, 7.0, 4.0, 2.0, 4.0, 3.0, 0.0, 2.0, 2.0, 1.0], "bins": [-0.0022591352462768555, -0.0021954048424959183, -0.002131674438714981, -0.002067944034934044, -0.0020042136311531067, -0.0019404832273721695, -0.0018767528235912323, -0.001813022419810295, -0.001749292016029358, -0.0016855616122484207, -0.0016218312084674835, -0.0015581008046865463, -0.0014943704009056091, -0.001430639997124672, -0.0013669095933437347, -0.0013031791895627975, -0.0012394487857818604, -0.0011757183820009232, -0.001111987978219986, -0.0010482575744390488, -0.0009845271706581116, -0.0009207967668771744, -0.0008570663630962372, -0.0007933359593153, -0.0007296055555343628, -0.0006658751517534256, -0.0006021447479724884, -0.0005384143441915512, -0.000474683940410614, -0.0004109535366296768, -0.0003472231328487396, -0.00028349272906780243, -0.00021976232528686523, -0.00015603192150592804, -9.230151772499084e-05, -2.857111394405365e-05, 3.5159289836883545e-05, 9.888969361782074e-05, 0.00016262009739875793, 0.00022635050117969513, 0.0002900809049606323, 0.0003538113087415695, 0.0004175417125225067, 0.0004812721163034439, 0.0005450025200843811, 0.0006087329238653183, 0.0006724633276462555, 0.0007361937314271927, 0.0007999241352081299, 0.0008636545389890671, 0.0009273849427700043, 0.0009911153465509415, 0.0010548457503318787, 0.0011185761541128159, 0.001182306557893753, 0.0012460369616746902, 0.0013097673654556274, 0.0013734977692365646, 0.0014372281730175018, 0.001500958576798439, 0.0015646889805793762, 0.0016284193843603134, 0.0016921497881412506, 0.0017558801919221878, 0.001819610595703125]}, "gradients/encoder.encoder.layers.9.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 4.0, 4.0, 3.0, 4.0, 10.0, 10.0, 12.0, 9.0, 15.0, 18.0, 25.0, 38.0, 46.0, 57.0, 133.0, 212.0, 455.0, 1088.0, 3445.0, 15578.0, 106363.0, 795482.0, 104426.0, 15571.0, 3453.0, 1076.0, 439.0, 188.0, 127.0, 70.0, 50.0, 33.0, 28.0, 13.0, 19.0, 17.0, 10.0, 12.0, 8.0, 4.0, 1.0, 4.0, 2.0, 2.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.051025390625, -0.04945516586303711, -0.04788494110107422, -0.04631471633911133, -0.04474449157714844, -0.04317426681518555, -0.041604042053222656, -0.040033817291259766, -0.038463592529296875, -0.036893367767333984, -0.035323143005371094, -0.0337529182434082, -0.03218269348144531, -0.030612468719482422, -0.02904224395751953, -0.02747201919555664, -0.02590179443359375, -0.02433156967163086, -0.02276134490966797, -0.021191120147705078, -0.019620895385742188, -0.018050670623779297, -0.016480445861816406, -0.014910221099853516, -0.013339996337890625, -0.011769771575927734, -0.010199546813964844, -0.008629322052001953, -0.0070590972900390625, -0.005488872528076172, -0.003918647766113281, -0.0023484230041503906, -0.0007781982421875, 0.0007920265197753906, 0.0023622512817382812, 0.003932476043701172, 0.0055027008056640625, 0.007072925567626953, 0.008643150329589844, 0.010213375091552734, 0.011783599853515625, 0.013353824615478516, 0.014924049377441406, 0.016494274139404297, 0.018064498901367188, 0.019634723663330078, 0.02120494842529297, 0.02277517318725586, 0.02434539794921875, 0.02591562271118164, 0.02748584747314453, 0.029056072235107422, 0.030626296997070312, 0.0321965217590332, 0.033766746520996094, 0.035336971282958984, 0.036907196044921875, 0.038477420806884766, 0.040047645568847656, 0.04161787033081055, 0.04318809509277344, 0.04475831985473633, 0.04632854461669922, 0.04789876937866211, 0.049468994140625]}, "gradients/encoder.encoder.layers.9.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 3.0, 5.0, 10.0, 10.0, 27.0, 34.0, 56.0, 79.0, 138.0, 158.0, 145.0, 130.0, 73.0, 46.0, 52.0, 21.0, 14.0, 6.0, 2.0, 1.0, 3.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.013214111328125, -0.012913644313812256, -0.012613177299499512, -0.012312710285186768, -0.012012243270874023, -0.01171177625656128, -0.011411309242248535, -0.011110842227935791, -0.010810375213623047, -0.010509908199310303, -0.010209441184997559, -0.009908974170684814, -0.00960850715637207, -0.009308040142059326, -0.009007573127746582, -0.008707106113433838, -0.008406639099121094, -0.00810617208480835, -0.0078057050704956055, -0.007505238056182861, -0.007204771041870117, -0.006904304027557373, -0.006603837013244629, -0.006303369998931885, -0.006002902984619141, -0.0057024359703063965, -0.005401968955993652, -0.005101501941680908, -0.004801034927368164, -0.00450056791305542, -0.004200100898742676, -0.0038996338844299316, -0.0035991668701171875, -0.0032986998558044434, -0.0029982328414916992, -0.002697765827178955, -0.002397298812866211, -0.002096831798553467, -0.0017963647842407227, -0.0014958977699279785, -0.0011954307556152344, -0.0008949637413024902, -0.0005944967269897461, -0.00029402971267700195, 6.4373016357421875e-06, 0.00030690431594848633, 0.0006073713302612305, 0.0009078383445739746, 0.0012083053588867188, 0.0015087723731994629, 0.001809239387512207, 0.002109706401824951, 0.0024101734161376953, 0.0027106404304504395, 0.0030111074447631836, 0.0033115744590759277, 0.003612041473388672, 0.003912508487701416, 0.00421297550201416, 0.004513442516326904, 0.0048139095306396484, 0.005114376544952393, 0.005414843559265137, 0.005715310573577881, 0.006015777587890625]}, "gradients/encoder.encoder.layers.9.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 4.0, 2.0, 2.0, 2.0, 3.0, 4.0, 6.0, 4.0, 4.0, 10.0, 11.0, 14.0, 16.0, 20.0, 19.0, 26.0, 37.0, 59.0, 110.0, 207.0, 359.0, 755.0, 1837.0, 4633.0, 12652.0, 39816.0, 176740.0, 671153.0, 98667.0, 26504.0, 8864.0, 3296.0, 1387.0, 633.0, 292.0, 123.0, 75.0, 40.0, 32.0, 28.0, 16.0, 15.0, 19.0, 13.0, 10.0, 8.0, 7.0, 9.0, 9.0, 1.0, 8.0, 4.0, 3.0, 3.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.024566650390625, -0.023699522018432617, -0.022832393646240234, -0.02196526527404785, -0.02109813690185547, -0.020231008529663086, -0.019363880157470703, -0.01849675178527832, -0.017629623413085938, -0.016762495040893555, -0.015895366668701172, -0.015028238296508789, -0.014161109924316406, -0.013293981552124023, -0.01242685317993164, -0.011559724807739258, -0.010692596435546875, -0.009825468063354492, -0.00895833969116211, -0.008091211318969727, -0.007224082946777344, -0.006356954574584961, -0.005489826202392578, -0.004622697830200195, -0.0037555694580078125, -0.0028884410858154297, -0.002021312713623047, -0.001154184341430664, -0.00028705596923828125, 0.0005800724029541016, 0.0014472007751464844, 0.002314329147338867, 0.00318145751953125, 0.004048585891723633, 0.004915714263916016, 0.0057828426361083984, 0.006649971008300781, 0.007517099380493164, 0.008384227752685547, 0.00925135612487793, 0.010118484497070312, 0.010985612869262695, 0.011852741241455078, 0.012719869613647461, 0.013586997985839844, 0.014454126358032227, 0.01532125473022461, 0.016188383102416992, 0.017055511474609375, 0.017922639846801758, 0.01878976821899414, 0.019656896591186523, 0.020524024963378906, 0.02139115333557129, 0.022258281707763672, 0.023125410079956055, 0.023992538452148438, 0.02485966682434082, 0.025726795196533203, 0.026593923568725586, 0.02746105194091797, 0.02832818031311035, 0.029195308685302734, 0.030062437057495117, 0.0309295654296875]}, "gradients/encoder.encoder.layers.9.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 3.0, 4.0, 4.0, 8.0, 2.0, 8.0, 8.0, 8.0, 9.0, 9.0, 13.0, 18.0, 14.0, 15.0, 23.0, 21.0, 16.0, 25.0, 25.0, 33.0, 36.0, 30.0, 38.0, 38.0, 37.0, 40.0, 43.0, 40.0, 33.0, 31.0, 34.0, 56.0, 25.0, 30.0, 38.0, 28.0, 20.0, 20.0, 19.0, 17.0, 18.0, 17.0, 15.0, 10.0, 10.0, 5.0, 4.0, 6.0, 3.0, 4.0, 1.0, 3.0, 1.0, 4.0, 2.0], "bins": [-0.01488494873046875, -0.014469146728515625, -0.0140533447265625, -0.013637542724609375, -0.01322174072265625, -0.012805938720703125, -0.01239013671875, -0.011974334716796875, -0.01155853271484375, -0.011142730712890625, -0.0107269287109375, -0.010311126708984375, -0.00989532470703125, -0.009479522705078125, -0.009063720703125, -0.008647918701171875, -0.00823211669921875, -0.007816314697265625, -0.0074005126953125, -0.006984710693359375, -0.00656890869140625, -0.006153106689453125, -0.0057373046875, -0.005321502685546875, -0.00490570068359375, -0.004489898681640625, -0.0040740966796875, -0.003658294677734375, -0.00324249267578125, -0.002826690673828125, -0.002410888671875, -0.001995086669921875, -0.00157928466796875, -0.001163482666015625, -0.0007476806640625, -0.000331878662109375, 8.392333984375e-05, 0.000499725341796875, 0.00091552734375, 0.001331329345703125, 0.00174713134765625, 0.002162933349609375, 0.0025787353515625, 0.002994537353515625, 0.00341033935546875, 0.003826141357421875, 0.004241943359375, 0.004657745361328125, 0.00507354736328125, 0.005489349365234375, 0.0059051513671875, 0.006320953369140625, 0.00673675537109375, 0.007152557373046875, 0.007568359375, 0.007984161376953125, 0.00839996337890625, 0.008815765380859375, 0.0092315673828125, 0.009647369384765625, 0.01006317138671875, 0.010478973388671875, 0.010894775390625, 0.011310577392578125, 0.01172637939453125]}, "gradients/encoder.encoder.layers.9.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 4.0, 3.0, 5.0, 9.0, 10.0, 14.0, 16.0, 25.0, 50.0, 66.0, 83.0, 118.0, 201.0, 367.0, 732.0, 1552.0, 3476.0, 10729.0, 46841.0, 746321.0, 200501.0, 25544.0, 6769.0, 2584.0, 1106.0, 595.0, 307.0, 176.0, 118.0, 72.0, 53.0, 33.0, 28.0, 13.0, 13.0, 9.0, 3.0, 9.0, 2.0, 3.0, 1.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0], "bins": [-0.00019037723541259766, -0.00018419884145259857, -0.0001780204474925995, -0.0001718420535326004, -0.00016566365957260132, -0.00015948526561260223, -0.00015330687165260315, -0.00014712847769260406, -0.00014095008373260498, -0.0001347716897726059, -0.0001285932958126068, -0.00012241490185260773, -0.00011623650789260864, -0.00011005811393260956, -0.00010387971997261047, -9.770132601261139e-05, -9.15229320526123e-05, -8.534453809261322e-05, -7.916614413261414e-05, -7.298775017261505e-05, -6.680935621261597e-05, -6.063096225261688e-05, -5.44525682926178e-05, -4.827417433261871e-05, -4.209578037261963e-05, -3.5917386412620544e-05, -2.973899245262146e-05, -2.3560598492622375e-05, -1.738220453262329e-05, -1.1203810572624207e-05, -5.025416612625122e-06, 1.1529773473739624e-06, 7.331371307373047e-06, 1.3509765267372131e-05, 1.9688159227371216e-05, 2.58665531873703e-05, 3.2044947147369385e-05, 3.822334110736847e-05, 4.4401735067367554e-05, 5.058012902736664e-05, 5.675852298736572e-05, 6.293691694736481e-05, 6.911531090736389e-05, 7.529370486736298e-05, 8.147209882736206e-05, 8.765049278736115e-05, 9.382888674736023e-05, 0.00010000728070735931, 0.0001061856746673584, 0.00011236406862735748, 0.00011854246258735657, 0.00012472085654735565, 0.00013089925050735474, 0.00013707764446735382, 0.0001432560384273529, 0.000149434432387352, 0.00015561282634735107, 0.00016179122030735016, 0.00016796961426734924, 0.00017414800822734833, 0.0001803264021873474, 0.0001865047961473465, 0.00019268319010734558, 0.00019886158406734467, 0.00020503997802734375]}, "gradients/encoder.encoder.layers.9.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 1.0, 2.0, 2.0, 3.0, 6.0, 5.0, 4.0, 2.0, 10.0, 5.0, 10.0, 10.0, 10.0, 10.0, 17.0, 12.0, 24.0, 34.0, 47.0, 46.0, 60.0, 84.0, 136.0, 109.0, 69.0, 53.0, 50.0, 37.0, 43.0, 20.0, 16.0, 9.0, 20.0, 9.0, 9.0, 8.0, 8.0, 2.0, 1.0, 1.0, 1.0, 2.0, 6.0, 2.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-1.8656253814697266e-05, -1.8149614334106445e-05, -1.7642974853515625e-05, -1.7136335372924805e-05, -1.6629695892333984e-05, -1.6123056411743164e-05, -1.5616416931152344e-05, -1.5109777450561523e-05, -1.4603137969970703e-05, -1.4096498489379883e-05, -1.3589859008789062e-05, -1.3083219528198242e-05, -1.2576580047607422e-05, -1.2069940567016602e-05, -1.1563301086425781e-05, -1.1056661605834961e-05, -1.055002212524414e-05, -1.004338264465332e-05, -9.5367431640625e-06, -9.03010368347168e-06, -8.52346420288086e-06, -8.016824722290039e-06, -7.510185241699219e-06, -7.0035457611083984e-06, -6.496906280517578e-06, -5.990266799926758e-06, -5.4836273193359375e-06, -4.976987838745117e-06, -4.470348358154297e-06, -3.9637088775634766e-06, -3.4570693969726562e-06, -2.950429916381836e-06, -2.4437904357910156e-06, -1.9371509552001953e-06, -1.430511474609375e-06, -9.238719940185547e-07, -4.172325134277344e-07, 8.940696716308594e-08, 5.960464477539062e-07, 1.1026859283447266e-06, 1.6093254089355469e-06, 2.115964889526367e-06, 2.6226043701171875e-06, 3.129243850708008e-06, 3.635883331298828e-06, 4.1425228118896484e-06, 4.649162292480469e-06, 5.155801773071289e-06, 5.662441253662109e-06, 6.16908073425293e-06, 6.67572021484375e-06, 7.18235969543457e-06, 7.68899917602539e-06, 8.195638656616211e-06, 8.702278137207031e-06, 9.208917617797852e-06, 9.715557098388672e-06, 1.0222196578979492e-05, 1.0728836059570312e-05, 1.1235475540161133e-05, 1.1742115020751953e-05, 1.2248754501342773e-05, 1.2755393981933594e-05, 1.3262033462524414e-05, 1.3768672943115234e-05]}, "gradients/encoder.encoder.layers.9.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 3.0, 1.0, 1.0, 2.0, 2.0, 2.0, 1.0, 3.0, 3.0, 3.0, 6.0, 8.0, 7.0, 13.0, 15.0, 27.0, 57.0, 92.0, 159.0, 323.0, 645.0, 1552.0, 4290.0, 14916.0, 76702.0, 803787.0, 117586.0, 19535.0, 5353.0, 1912.0, 773.0, 345.0, 152.0, 87.0, 60.0, 37.0, 33.0, 18.0, 16.0, 14.0, 2.0, 2.0, 7.0, 5.0, 2.0, 3.0, 2.0, 0.0, 1.0, 0.0, 3.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.00021064281463623047, -0.00020397640764713287, -0.00019731000065803528, -0.00019064359366893768, -0.0001839771866798401, -0.0001773107796907425, -0.0001706443727016449, -0.0001639779657125473, -0.0001573115587234497, -0.0001506451517343521, -0.00014397874474525452, -0.00013731233775615692, -0.00013064593076705933, -0.00012397952377796173, -0.00011731311678886414, -0.00011064670979976654, -0.00010398030281066895, -9.731389582157135e-05, -9.064748883247375e-05, -8.398108184337616e-05, -7.731467485427856e-05, -7.064826786518097e-05, -6.398186087608337e-05, -5.731545388698578e-05, -5.0649046897888184e-05, -4.398263990879059e-05, -3.731623291969299e-05, -3.06498259305954e-05, -2.3983418941497803e-05, -1.7317011952400208e-05, -1.0650604963302612e-05, -3.984197974205017e-06, 2.682209014892578e-06, 9.348616003990173e-06, 1.601502299308777e-05, 2.2681429982185364e-05, 2.934783697128296e-05, 3.6014243960380554e-05, 4.268065094947815e-05, 4.9347057938575745e-05, 5.601346492767334e-05, 6.267987191677094e-05, 6.934627890586853e-05, 7.601268589496613e-05, 8.267909288406372e-05, 8.934549987316132e-05, 9.601190686225891e-05, 0.0001026783138513565, 0.0001093447208404541, 0.0001160111278295517, 0.0001226775348186493, 0.0001293439418077469, 0.00013601034879684448, 0.00014267675578594208, 0.00014934316277503967, 0.00015600956976413727, 0.00016267597675323486, 0.00016934238374233246, 0.00017600879073143005, 0.00018267519772052765, 0.00018934160470962524, 0.00019600801169872284, 0.00020267441868782043, 0.00020934082567691803, 0.00021600723266601562]}, "gradients/encoder.encoder.layers.9.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 3.0, 0.0, 1.0, 0.0, 2.0, 2.0, 3.0, 4.0, 7.0, 2.0, 3.0, 13.0, 8.0, 15.0, 18.0, 18.0, 15.0, 22.0, 30.0, 34.0, 46.0, 47.0, 51.0, 58.0, 77.0, 80.0, 58.0, 56.0, 71.0, 52.0, 42.0, 30.0, 20.0, 23.0, 29.0, 18.0, 9.0, 9.0, 6.0, 5.0, 8.0, 2.0, 4.0, 2.0, 2.0, 1.0, 3.0, 2.0, 0.0, 2.0, 3.0, 1.0, 1.0, 0.0, 1.0, 0.0, 2.0], "bins": [-0.00010389089584350586, -0.00010070577263832092, -9.752064943313599e-05, -9.433552622795105e-05, -9.115040302276611e-05, -8.796527981758118e-05, -8.478015661239624e-05, -8.15950334072113e-05, -7.840991020202637e-05, -7.522478699684143e-05, -7.20396637916565e-05, -6.885454058647156e-05, -6.566941738128662e-05, -6.248429417610168e-05, -5.929917097091675e-05, -5.611404776573181e-05, -5.2928924560546875e-05, -4.974380135536194e-05, -4.6558678150177e-05, -4.3373554944992065e-05, -4.018843173980713e-05, -3.700330853462219e-05, -3.3818185329437256e-05, -3.063306212425232e-05, -2.7447938919067383e-05, -2.4262815713882446e-05, -2.107769250869751e-05, -1.7892569303512573e-05, -1.4707446098327637e-05, -1.15223228931427e-05, -8.337199687957764e-06, -5.152076482772827e-06, -1.9669532775878906e-06, 1.218169927597046e-06, 4.403293132781982e-06, 7.588416337966919e-06, 1.0773539543151855e-05, 1.3958662748336792e-05, 1.714378595352173e-05, 2.0328909158706665e-05, 2.35140323638916e-05, 2.6699155569076538e-05, 2.9884278774261475e-05, 3.306940197944641e-05, 3.625452518463135e-05, 3.9439648389816284e-05, 4.262477159500122e-05, 4.580989480018616e-05, 4.8995018005371094e-05, 5.218014121055603e-05, 5.536526441574097e-05, 5.85503876209259e-05, 6.173551082611084e-05, 6.492063403129578e-05, 6.810575723648071e-05, 7.129088044166565e-05, 7.447600364685059e-05, 7.766112685203552e-05, 8.084625005722046e-05, 8.40313732624054e-05, 8.721649646759033e-05, 9.040161967277527e-05, 9.35867428779602e-05, 9.677186608314514e-05, 9.995698928833008e-05]}, "gradients/encoder.encoder.layers.9.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 2.0, 4.0, 0.0, 1.0, 1.0, 7.0, 5.0, 10.0, 16.0, 13.0, 22.0, 28.0, 38.0, 62.0, 88.0, 184.0, 197.0, 90.0, 60.0, 38.0, 30.0, 22.0, 21.0, 19.0, 8.0, 6.0, 10.0, 5.0, 7.0, 7.0, 1.0, 4.0, 1.0, 1.0, 0.0, 2.0, 3.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0], "bins": [-0.12331752479076385, -0.1196894645690918, -0.11606140434741974, -0.11243334412574768, -0.10880528390407562, -0.10517722368240356, -0.10154915601015091, -0.09792109578847885, -0.0942930355668068, -0.09066497534513474, -0.08703691512346268, -0.08340885490179062, -0.07978078722953796, -0.0761527270078659, -0.07252466678619385, -0.06889660656452179, -0.06526854634284973, -0.06164048612117767, -0.058012425899505615, -0.05438436195254326, -0.0507563017308712, -0.04712824150919914, -0.043500177562236786, -0.03987211734056473, -0.03624405711889267, -0.03261599689722061, -0.028987934812903404, -0.025359872728586197, -0.02173181250691414, -0.01810375228524208, -0.014475690200924873, -0.010847628116607666, -0.007219560444355011, -0.0035914992913603783, 3.6561861634254456e-05, 0.003664623014628887, 0.00729268416762352, 0.010920744389295578, 0.014548806473612785, 0.018176868557929993, 0.02180492877960205, 0.02543298900127411, 0.029061051085591316, 0.032689113169908524, 0.03631717339158058, 0.03994523361325264, 0.043573297560214996, 0.047201357781887054, 0.05082941800355911, 0.05445747822523117, 0.05808553844690323, 0.061713602393865585, 0.06534166634082794, 0.0689697265625, 0.07259778678417206, 0.07622584700584412, 0.07985390722751617, 0.08348196744918823, 0.08711002767086029, 0.09073808789253235, 0.0943661481142044, 0.09799420833587646, 0.10162227600812912, 0.10525033622980118, 0.10887839645147324]}, "gradients/encoder.encoder.layers.9.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 3.0, 2.0, 3.0, 2.0, 2.0, 13.0, 12.0, 14.0, 15.0, 19.0, 25.0, 19.0, 37.0, 36.0, 30.0, 48.0, 42.0, 58.0, 52.0, 61.0, 60.0, 64.0, 55.0, 53.0, 36.0, 40.0, 37.0, 30.0, 29.0, 26.0, 17.0, 16.0, 12.0, 12.0, 8.0, 9.0, 5.0, 6.0, 5.0, 0.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.07691287994384766, -0.07434757053852081, -0.07178226858377457, -0.06921695917844772, -0.06665165722370148, -0.06408634781837463, -0.06152103841304779, -0.058955732733011246, -0.0563904270529747, -0.053825121372938156, -0.05125981569290161, -0.04869450628757477, -0.04612920060753822, -0.04356389492750168, -0.040998585522174835, -0.03843327984213829, -0.035867974162101746, -0.0333026684820652, -0.030737360939383507, -0.028172053396701813, -0.025606747716665268, -0.023041442036628723, -0.02047613449394703, -0.017910826951265335, -0.01534552127122879, -0.012780214659869671, -0.010214908048510551, -0.007649601437151432, -0.005084294825792313, -0.002518988214433193, 4.631839692592621e-05, 0.0026116259396076202, 0.005176931619644165, 0.0077422382310032845, 0.010307544842362404, 0.012872851453721523, 0.015438158065080643, 0.018003463745117188, 0.02056877128779888, 0.023134078830480576, 0.02569938451051712, 0.028264690190553665, 0.03082999773323536, 0.03339530527591705, 0.0359606109559536, 0.03852591663599014, 0.041091226041316986, 0.04365653172135353, 0.046221837401390076, 0.04878714308142662, 0.051352448761463165, 0.05391775816679001, 0.05648306384682655, 0.0590483695268631, 0.06161367893218994, 0.06417898833751678, 0.06674429029226303, 0.06930959969758987, 0.07187490165233612, 0.07444021105766296, 0.07700552046298981, 0.07957082241773605, 0.0821361318230629, 0.08470143377780914, 0.08726674318313599]}, "gradients/encoder.encoder.layers.8.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 6.0, 2.0, 2.0, 6.0, 5.0, 11.0, 23.0, 30.0, 61.0, 121.0, 276.0, 779.0, 3012.0, 4183585.0, 4618.0, 1100.0, 321.0, 151.0, 81.0, 54.0, 19.0, 10.0, 6.0, 5.0, 4.0, 2.0, 1.0, 0.0, 3.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-0.08831787109375, -0.08629274368286133, -0.08426761627197266, -0.08224248886108398, -0.08021736145019531, -0.07819223403930664, -0.07616710662841797, -0.0741419792175293, -0.07211685180664062, -0.07009172439575195, -0.06806659698486328, -0.06604146957397461, -0.06401634216308594, -0.061991214752197266, -0.059966087341308594, -0.05794095993041992, -0.05591583251953125, -0.05389070510864258, -0.051865577697753906, -0.049840450286865234, -0.04781532287597656, -0.04579019546508789, -0.04376506805419922, -0.04173994064331055, -0.039714813232421875, -0.0376896858215332, -0.03566455841064453, -0.03363943099975586, -0.03161430358886719, -0.029589176177978516, -0.027564048767089844, -0.025538921356201172, -0.0235137939453125, -0.021488666534423828, -0.019463539123535156, -0.017438411712646484, -0.015413284301757812, -0.01338815689086914, -0.011363029479980469, -0.009337902069091797, -0.007312774658203125, -0.005287647247314453, -0.0032625198364257812, -0.0012373924255371094, 0.0007877349853515625, 0.0028128623962402344, 0.004837989807128906, 0.006863117218017578, 0.00888824462890625, 0.010913372039794922, 0.012938499450683594, 0.014963626861572266, 0.016988754272460938, 0.01901388168334961, 0.02103900909423828, 0.023064136505126953, 0.025089263916015625, 0.027114391326904297, 0.02913951873779297, 0.03116464614868164, 0.03318977355957031, 0.035214900970458984, 0.037240028381347656, 0.03926515579223633, 0.041290283203125]}, "gradients/encoder.encoder.layers.8.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 4.0, 7.0, 2.0, 13.0, 20.0, 29.0, 45.0, 57.0, 119.0, 156.0, 148.0, 129.0, 106.0, 62.0, 46.0, 34.0, 17.0, 13.0, 3.0, 1.0, 2.0, 2.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.01268768310546875, -0.01239544153213501, -0.01210319995880127, -0.01181095838546753, -0.011518716812133789, -0.011226475238800049, -0.010934233665466309, -0.010641992092132568, -0.010349750518798828, -0.010057508945465088, -0.009765267372131348, -0.009473025798797607, -0.009180784225463867, -0.008888542652130127, -0.008596301078796387, -0.008304059505462646, -0.008011817932128906, -0.007719576358795166, -0.007427334785461426, -0.0071350932121276855, -0.006842851638793945, -0.006550610065460205, -0.006258368492126465, -0.005966126918792725, -0.005673885345458984, -0.005381643772125244, -0.005089402198791504, -0.004797160625457764, -0.0045049190521240234, -0.004212677478790283, -0.003920435905456543, -0.0036281943321228027, -0.0033359527587890625, -0.0030437111854553223, -0.002751469612121582, -0.002459228038787842, -0.0021669864654541016, -0.0018747448921203613, -0.001582503318786621, -0.0012902617454528809, -0.0009980201721191406, -0.0007057785987854004, -0.00041353702545166016, -0.00012129545211791992, 0.0001709461212158203, 0.00046318769454956055, 0.0007554292678833008, 0.001047670841217041, 0.0013399124145507812, 0.0016321539878845215, 0.0019243955612182617, 0.002216637134552002, 0.002508878707885742, 0.0028011202812194824, 0.0030933618545532227, 0.003385603427886963, 0.003677845001220703, 0.003970086574554443, 0.004262328147888184, 0.004554569721221924, 0.004846811294555664, 0.005139052867889404, 0.0054312944412231445, 0.005723536014556885, 0.006015777587890625]}, "gradients/encoder.encoder.layers.8.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 3.0, 5.0, 4.0, 3.0, 3.0, 6.0, 7.0, 27.0, 24.0, 32.0, 49.0, 71.0, 102.0, 163.0, 278.0, 476.0, 1145.0, 3562.0, 24316.0, 4155127.0, 5707.0, 1655.0, 658.0, 326.0, 204.0, 117.0, 69.0, 54.0, 31.0, 31.0, 11.0, 12.0, 8.0, 5.0, 2.0, 2.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0217437744140625, -0.020763635635375977, -0.019783496856689453, -0.01880335807800293, -0.017823219299316406, -0.016843080520629883, -0.01586294174194336, -0.014882802963256836, -0.013902664184570312, -0.012922525405883789, -0.011942386627197266, -0.010962247848510742, -0.009982109069824219, -0.009001970291137695, -0.008021831512451172, -0.0070416927337646484, -0.006061553955078125, -0.0050814151763916016, -0.004101276397705078, -0.0031211376190185547, -0.0021409988403320312, -0.0011608600616455078, -0.00018072128295898438, 0.0007994174957275391, 0.0017795562744140625, 0.002759695053100586, 0.0037398338317871094, 0.004719972610473633, 0.005700111389160156, 0.00668025016784668, 0.007660388946533203, 0.008640527725219727, 0.00962066650390625, 0.010600805282592773, 0.011580944061279297, 0.01256108283996582, 0.013541221618652344, 0.014521360397338867, 0.01550149917602539, 0.016481637954711914, 0.017461776733398438, 0.01844191551208496, 0.019422054290771484, 0.020402193069458008, 0.02138233184814453, 0.022362470626831055, 0.023342609405517578, 0.0243227481842041, 0.025302886962890625, 0.02628302574157715, 0.027263164520263672, 0.028243303298950195, 0.02922344207763672, 0.030203580856323242, 0.031183719635009766, 0.03216385841369629, 0.03314399719238281, 0.034124135971069336, 0.03510427474975586, 0.03608441352844238, 0.037064552307128906, 0.03804469108581543, 0.03902482986450195, 0.04000496864318848, 0.040985107421875]}, "gradients/encoder.encoder.layers.8.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 5.0, 4.0, 0.0, 2.0, 11.0, 37.0, 3955.0, 33.0, 14.0, 6.0, 2.0, 1.0, 3.0, 5.0, 3.0, 1.0, 3.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.009918212890625, -0.009700655937194824, -0.009483098983764648, -0.009265542030334473, -0.009047985076904297, -0.008830428123474121, -0.008612871170043945, -0.00839531421661377, -0.008177757263183594, -0.007960200309753418, -0.007742643356323242, -0.007525086402893066, -0.007307529449462891, -0.007089972496032715, -0.006872415542602539, -0.006654858589172363, -0.0064373016357421875, -0.006219744682312012, -0.006002187728881836, -0.00578463077545166, -0.005567073822021484, -0.005349516868591309, -0.005131959915161133, -0.004914402961730957, -0.004696846008300781, -0.0044792890548706055, -0.00426173210144043, -0.004044175148010254, -0.003826618194580078, -0.0036090612411499023, -0.0033915042877197266, -0.0031739473342895508, -0.002956390380859375, -0.0027388334274291992, -0.0025212764739990234, -0.0023037195205688477, -0.002086162567138672, -0.001868605613708496, -0.0016510486602783203, -0.0014334917068481445, -0.0012159347534179688, -0.000998377799987793, -0.0007808208465576172, -0.0005632638931274414, -0.0003457069396972656, -0.00012814998626708984, 8.940696716308594e-05, 0.0003069639205932617, 0.0005245208740234375, 0.0007420778274536133, 0.0009596347808837891, 0.0011771917343139648, 0.0013947486877441406, 0.0016123056411743164, 0.0018298625946044922, 0.002047419548034668, 0.0022649765014648438, 0.0024825334548950195, 0.0027000904083251953, 0.002917647361755371, 0.003135204315185547, 0.0033527612686157227, 0.0035703182220458984, 0.0037878751754760742, 0.00400543212890625]}, "gradients/encoder.encoder.layers.8.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 3.0, 2.0, 2.0, 2.0, 5.0, 4.0, 8.0, 12.0, 11.0, 32.0, 36.0, 55.0, 86.0, 195.0, 286.0, 105.0, 65.0, 36.0, 23.0, 19.0, 9.0, 5.0, 4.0, 2.0, 2.0, 5.0, 0.0, 1.0, 1.0, 3.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.032492250204086304, -0.03175739571452141, -0.031022541224956512, -0.030287686735391617, -0.02955283224582672, -0.028817977756261826, -0.02808312326669693, -0.027348268777132034, -0.02661341428756714, -0.025878559798002243, -0.025143705308437347, -0.024408850818872452, -0.023673996329307556, -0.02293914183974266, -0.022204287350177765, -0.02146943286061287, -0.020734576508402824, -0.01999972201883793, -0.019264867529273033, -0.018530013039708138, -0.017795158550143242, -0.017060304060578346, -0.0163254477083683, -0.01559059415012598, -0.014855739660561085, -0.014120885170996189, -0.013386030681431293, -0.012651175260543823, -0.011916320770978928, -0.011181466281414032, -0.010446611791849136, -0.00971175730228424, -0.00897690374404192, -0.008242049254477024, -0.0075071947649121284, -0.0067723398096859455, -0.00603748532012105, -0.005302630830556154, -0.004567775875329971, -0.0038329213857650757, -0.00309806689620018, -0.0023632124066352844, -0.0016283576842397451, -0.0008935029618442059, -0.00015864847227931023, 0.0005762060172855854, 0.0013110609725117683, 0.002045915462076664, 0.0027807699516415596, 0.0035156244412064552, 0.004250478930771351, 0.004985333885997534, 0.005720188375562429, 0.006455042865127325, 0.007189897820353508, 0.007924752309918404, 0.0086596067994833, 0.009394461289048195, 0.01012931577861309, 0.010864170268177986, 0.011599024757742882, 0.012333879247307777, 0.013068734668195248, 0.013803589157760143, 0.014538443647325039]}, "gradients/encoder.encoder.layers.8.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 3.0, 1.0, 2.0, 6.0, 6.0, 16.0, 17.0, 24.0, 33.0, 41.0, 46.0, 74.0, 92.0, 73.0, 94.0, 86.0, 77.0, 70.0, 61.0, 55.0, 49.0, 28.0, 19.0, 13.0, 7.0, 7.0, 5.0, 8.0, 1.0, 1.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.016002535820007324, -0.015486223623156548, -0.014969911426305771, -0.014453599229454994, -0.013937287032604218, -0.01342097483575344, -0.012904662638902664, -0.012388350442051888, -0.01187203824520111, -0.011355726048350334, -0.010839413851499557, -0.01032310165464878, -0.009806789457798004, -0.009290477260947227, -0.00877416506409645, -0.008257852867245674, -0.0077415406703948975, -0.007225228473544121, -0.006708916276693344, -0.0061926040798425674, -0.005676291882991791, -0.005159979686141014, -0.004643667489290237, -0.004127355292439461, -0.003611043095588684, -0.0030947308987379074, -0.0025784187018871307, -0.002062106505036354, -0.0015457943081855774, -0.0010294821113348007, -0.000513169914484024, 3.1422823667526245e-06, 0.0005194544792175293, 0.001035766676068306, 0.0015520788729190826, 0.0020683910697698593, 0.002584703266620636, 0.0031010154634714127, 0.0036173276603221893, 0.004133639857172966, 0.004649952054023743, 0.005166264250874519, 0.005682576447725296, 0.006198888644576073, 0.006715200841426849, 0.007231513038277626, 0.007747825235128403, 0.00826413743197918, 0.008780449628829956, 0.009296761825680733, 0.00981307402253151, 0.010329386219382286, 0.010845698416233063, 0.01136201061308384, 0.011878322809934616, 0.012394635006785393, 0.01291094720363617, 0.013427259400486946, 0.013943571597337723, 0.0144598837941885, 0.014976195991039276, 0.015492508187890053, 0.01600882038474083, 0.016525132581591606, 0.017041444778442383]}, "gradients/encoder.encoder.layers.8.attention.out_proj.weight": {"_type": "histogram", "values": [2.0, 2.0, 4.0, 1.0, 4.0, 4.0, 4.0, 6.0, 4.0, 7.0, 13.0, 8.0, 15.0, 24.0, 31.0, 42.0, 80.0, 130.0, 229.0, 574.0, 1588.0, 5980.0, 35461.0, 662902.0, 309768.0, 24720.0, 4656.0, 1275.0, 434.0, 215.0, 125.0, 78.0, 53.0, 44.0, 22.0, 18.0, 7.0, 11.0, 14.0, 3.0, 5.0, 1.0, 2.0, 5.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0390625, -0.037428855895996094, -0.03579521179199219, -0.03416156768798828, -0.032527923583984375, -0.03089427947998047, -0.029260635375976562, -0.027626991271972656, -0.02599334716796875, -0.024359703063964844, -0.022726058959960938, -0.02109241485595703, -0.019458770751953125, -0.01782512664794922, -0.016191482543945312, -0.014557838439941406, -0.0129241943359375, -0.011290550231933594, -0.009656906127929688, -0.008023262023925781, -0.006389617919921875, -0.004755973815917969, -0.0031223297119140625, -0.0014886856079101562, 0.00014495849609375, 0.0017786026000976562, 0.0034122467041015625, 0.005045890808105469, 0.006679534912109375, 0.008313179016113281, 0.009946823120117188, 0.011580467224121094, 0.013214111328125, 0.014847755432128906, 0.016481399536132812, 0.01811504364013672, 0.019748687744140625, 0.02138233184814453, 0.023015975952148438, 0.024649620056152344, 0.02628326416015625, 0.027916908264160156, 0.029550552368164062, 0.03118419647216797, 0.032817840576171875, 0.03445148468017578, 0.03608512878417969, 0.037718772888183594, 0.0393524169921875, 0.040986061096191406, 0.04261970520019531, 0.04425334930419922, 0.045886993408203125, 0.04752063751220703, 0.04915428161621094, 0.050787925720214844, 0.05242156982421875, 0.054055213928222656, 0.05568885803222656, 0.05732250213623047, 0.058956146240234375, 0.06058979034423828, 0.06222343444824219, 0.0638570785522461, 0.06549072265625]}, "gradients/encoder.encoder.layers.8.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 4.0, 5.0, 4.0, 12.0, 19.0, 28.0, 41.0, 58.0, 118.0, 152.0, 148.0, 135.0, 107.0, 63.0, 46.0, 36.0, 18.0, 13.0, 3.0, 1.0, 2.0, 2.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.01274871826171875, -0.012455523014068604, -0.012162327766418457, -0.01186913251876831, -0.011575937271118164, -0.011282742023468018, -0.010989546775817871, -0.010696351528167725, -0.010403156280517578, -0.010109961032867432, -0.009816765785217285, -0.009523570537567139, -0.009230375289916992, -0.008937180042266846, -0.0086439847946167, -0.008350789546966553, -0.008057594299316406, -0.00776439905166626, -0.007471203804016113, -0.007178008556365967, -0.00688481330871582, -0.006591618061065674, -0.006298422813415527, -0.006005227565765381, -0.005712032318115234, -0.005418837070465088, -0.005125641822814941, -0.004832446575164795, -0.0045392513275146484, -0.004246056079864502, -0.0039528608322143555, -0.003659665584564209, -0.0033664703369140625, -0.003073275089263916, -0.0027800798416137695, -0.002486884593963623, -0.0021936893463134766, -0.00190049409866333, -0.0016072988510131836, -0.0013141036033630371, -0.0010209083557128906, -0.0007277131080627441, -0.00043451786041259766, -0.00014132261276245117, 0.0001518726348876953, 0.0004450678825378418, 0.0007382631301879883, 0.0010314583778381348, 0.0013246536254882812, 0.0016178488731384277, 0.0019110441207885742, 0.0022042393684387207, 0.002497434616088867, 0.0027906298637390137, 0.00308382511138916, 0.0033770203590393066, 0.003670215606689453, 0.0039634108543396, 0.004256606101989746, 0.004549801349639893, 0.004842996597290039, 0.0051361918449401855, 0.005429387092590332, 0.0057225823402404785, 0.006015777587890625]}, "gradients/encoder.encoder.layers.8.attention.v_proj.weight": {"_type": "histogram", "values": [3.0, 1.0, 1.0, 2.0, 1.0, 3.0, 2.0, 1.0, 5.0, 6.0, 10.0, 5.0, 12.0, 15.0, 23.0, 30.0, 22.0, 40.0, 60.0, 90.0, 169.0, 344.0, 644.0, 1411.0, 3304.0, 8286.0, 23952.0, 87123.0, 645774.0, 209662.0, 44208.0, 13984.0, 5206.0, 2108.0, 969.0, 460.0, 220.0, 150.0, 67.0, 58.0, 32.0, 30.0, 17.0, 9.0, 6.0, 14.0, 8.0, 5.0, 4.0, 6.0, 2.0, 1.0, 1.0, 3.0, 0.0, 0.0, 1.0, 2.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.0222320556640625, -0.021458864212036133, -0.020685672760009766, -0.0199124813079834, -0.01913928985595703, -0.018366098403930664, -0.017592906951904297, -0.01681971549987793, -0.016046524047851562, -0.015273332595825195, -0.014500141143798828, -0.013726949691772461, -0.012953758239746094, -0.012180566787719727, -0.01140737533569336, -0.010634183883666992, -0.009860992431640625, -0.009087800979614258, -0.00831460952758789, -0.0075414180755615234, -0.006768226623535156, -0.005995035171508789, -0.005221843719482422, -0.004448652267456055, -0.0036754608154296875, -0.0029022693634033203, -0.002129077911376953, -0.001355886459350586, -0.0005826950073242188, 0.00019049644470214844, 0.0009636878967285156, 0.0017368793487548828, 0.00251007080078125, 0.003283262252807617, 0.004056453704833984, 0.0048296451568603516, 0.005602836608886719, 0.006376028060913086, 0.007149219512939453, 0.00792241096496582, 0.008695602416992188, 0.009468793869018555, 0.010241985321044922, 0.011015176773071289, 0.011788368225097656, 0.012561559677124023, 0.01333475112915039, 0.014107942581176758, 0.014881134033203125, 0.015654325485229492, 0.01642751693725586, 0.017200708389282227, 0.017973899841308594, 0.01874709129333496, 0.019520282745361328, 0.020293474197387695, 0.021066665649414062, 0.02183985710144043, 0.022613048553466797, 0.023386240005493164, 0.02415943145751953, 0.0249326229095459, 0.025705814361572266, 0.026479005813598633, 0.027252197265625]}, "gradients/encoder.encoder.layers.8.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 3.0, 1.0, 1.0, 0.0, 0.0, 3.0, 2.0, 0.0, 3.0, 6.0, 3.0, 5.0, 8.0, 13.0, 6.0, 11.0, 13.0, 20.0, 20.0, 27.0, 18.0, 25.0, 25.0, 33.0, 36.0, 44.0, 51.0, 46.0, 42.0, 43.0, 34.0, 37.0, 32.0, 49.0, 44.0, 43.0, 27.0, 36.0, 31.0, 27.0, 27.0, 13.0, 15.0, 16.0, 19.0, 15.0, 9.0, 5.0, 10.0, 7.0, 4.0, 1.0, 2.0, 3.0, 1.0, 3.0, 0.0, 1.0, 3.0], "bins": [-0.01445770263671875, -0.014046192169189453, -0.013634681701660156, -0.01322317123413086, -0.012811660766601562, -0.012400150299072266, -0.011988639831542969, -0.011577129364013672, -0.011165618896484375, -0.010754108428955078, -0.010342597961425781, -0.009931087493896484, -0.009519577026367188, -0.00910806655883789, -0.008696556091308594, -0.008285045623779297, -0.00787353515625, -0.007462024688720703, -0.007050514221191406, -0.006639003753662109, -0.0062274932861328125, -0.005815982818603516, -0.005404472351074219, -0.004992961883544922, -0.004581451416015625, -0.004169940948486328, -0.0037584304809570312, -0.0033469200134277344, -0.0029354095458984375, -0.0025238990783691406, -0.0021123886108398438, -0.0017008781433105469, -0.00128936767578125, -0.0008778572082519531, -0.00046634674072265625, -5.4836273193359375e-05, 0.0003566741943359375, 0.0007681846618652344, 0.0011796951293945312, 0.0015912055969238281, 0.002002716064453125, 0.002414226531982422, 0.0028257369995117188, 0.0032372474670410156, 0.0036487579345703125, 0.004060268402099609, 0.004471778869628906, 0.004883289337158203, 0.0052947998046875, 0.005706310272216797, 0.006117820739746094, 0.006529331207275391, 0.0069408416748046875, 0.007352352142333984, 0.007763862609863281, 0.008175373077392578, 0.008586883544921875, 0.008998394012451172, 0.009409904479980469, 0.009821414947509766, 0.010232925415039062, 0.01064443588256836, 0.011055946350097656, 0.011467456817626953, 0.01187896728515625]}, "gradients/encoder.encoder.layers.8.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 2.0, 3.0, 2.0, 3.0, 1.0, 4.0, 9.0, 6.0, 11.0, 13.0, 26.0, 30.0, 38.0, 56.0, 70.0, 135.0, 177.0, 269.0, 409.0, 654.0, 1242.0, 2056.0, 4167.0, 9197.0, 25575.0, 112620.0, 768749.0, 84691.0, 21261.0, 8348.0, 3722.0, 2056.0, 1111.0, 668.0, 400.0, 260.0, 163.0, 107.0, 80.0, 51.0, 40.0, 30.0, 17.0, 8.0, 5.0, 8.0, 5.0, 7.0, 3.0, 4.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-9.548664093017578e-05, -9.224191308021545e-05, -8.899718523025513e-05, -8.57524573802948e-05, -8.250772953033447e-05, -7.926300168037415e-05, -7.601827383041382e-05, -7.277354598045349e-05, -6.952881813049316e-05, -6.628409028053284e-05, -6.303936243057251e-05, -5.979463458061218e-05, -5.6549906730651855e-05, -5.330517888069153e-05, -5.00604510307312e-05, -4.6815723180770874e-05, -4.357099533081055e-05, -4.032626748085022e-05, -3.708153963088989e-05, -3.3836811780929565e-05, -3.059208393096924e-05, -2.734735608100891e-05, -2.4102628231048584e-05, -2.0857900381088257e-05, -1.761317253112793e-05, -1.4368444681167603e-05, -1.1123716831207275e-05, -7.878988981246948e-06, -4.634261131286621e-06, -1.389533281326294e-06, 1.8551945686340332e-06, 5.09992241859436e-06, 8.344650268554688e-06, 1.1589378118515015e-05, 1.4834105968475342e-05, 1.807883381843567e-05, 2.1323561668395996e-05, 2.4568289518356323e-05, 2.781301736831665e-05, 3.105774521827698e-05, 3.4302473068237305e-05, 3.754720091819763e-05, 4.079192876815796e-05, 4.4036656618118286e-05, 4.728138446807861e-05, 5.052611231803894e-05, 5.377084016799927e-05, 5.7015568017959595e-05, 6.026029586791992e-05, 6.350502371788025e-05, 6.674975156784058e-05, 6.99944794178009e-05, 7.323920726776123e-05, 7.648393511772156e-05, 7.972866296768188e-05, 8.297339081764221e-05, 8.621811866760254e-05, 8.946284651756287e-05, 9.27075743675232e-05, 9.595230221748352e-05, 9.919703006744385e-05, 0.00010244175791740417, 0.0001056864857673645, 0.00010893121361732483, 0.00011217594146728516]}, "gradients/encoder.encoder.layers.8.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 2.0, 0.0, 0.0, 0.0, 2.0, 2.0, 0.0, 2.0, 5.0, 5.0, 4.0, 6.0, 5.0, 9.0, 5.0, 17.0, 23.0, 12.0, 22.0, 50.0, 51.0, 99.0, 120.0, 161.0, 126.0, 83.0, 64.0, 35.0, 25.0, 14.0, 15.0, 9.0, 13.0, 6.0, 3.0, 3.0, 3.0, 1.0, 3.0, 3.0, 1.0, 2.0, 1.0, 2.0, 2.0, 0.0, 0.0, 0.0, 2.0, 1.0], "bins": [-1.8656253814697266e-05, -1.8156133592128754e-05, -1.7656013369560242e-05, -1.715589314699173e-05, -1.6655772924423218e-05, -1.6155652701854706e-05, -1.5655532479286194e-05, -1.5155412256717682e-05, -1.465529203414917e-05, -1.4155171811580658e-05, -1.3655051589012146e-05, -1.3154931366443634e-05, -1.2654811143875122e-05, -1.215469092130661e-05, -1.1654570698738098e-05, -1.1154450476169586e-05, -1.0654330253601074e-05, -1.0154210031032562e-05, -9.65408980846405e-06, -9.153969585895538e-06, -8.653849363327026e-06, -8.153729140758514e-06, -7.653608918190002e-06, -7.1534886956214905e-06, -6.6533684730529785e-06, -6.1532482504844666e-06, -5.653128027915955e-06, -5.153007805347443e-06, -4.652887582778931e-06, -4.152767360210419e-06, -3.6526471376419067e-06, -3.1525269150733948e-06, -2.652406692504883e-06, -2.152286469936371e-06, -1.6521662473678589e-06, -1.152046024799347e-06, -6.51925802230835e-07, -1.51805579662323e-07, 3.4831464290618896e-07, 8.484348654747009e-07, 1.3485550880432129e-06, 1.8486753106117249e-06, 2.348795533180237e-06, 2.8489157557487488e-06, 3.3490359783172607e-06, 3.849156200885773e-06, 4.349276423454285e-06, 4.849396646022797e-06, 5.349516868591309e-06, 5.8496370911598206e-06, 6.3497573137283325e-06, 6.8498775362968445e-06, 7.3499977588653564e-06, 7.850117981433868e-06, 8.35023820400238e-06, 8.850358426570892e-06, 9.350478649139404e-06, 9.850598871707916e-06, 1.0350719094276428e-05, 1.085083931684494e-05, 1.1350959539413452e-05, 1.1851079761981964e-05, 1.2351199984550476e-05, 1.2851320207118988e-05, 1.33514404296875e-05]}, "gradients/encoder.encoder.layers.8.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 3.0, 2.0, 2.0, 4.0, 6.0, 11.0, 12.0, 24.0, 27.0, 45.0, 107.0, 195.0, 446.0, 1336.0, 4693.0, 25701.0, 896418.0, 105299.0, 10410.0, 2446.0, 764.0, 287.0, 125.0, 78.0, 54.0, 18.0, 20.0, 12.0, 5.0, 8.0, 4.0, 5.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00032806396484375, -0.0003188885748386383, -0.0003097131848335266, -0.0003005377948284149, -0.0002913624048233032, -0.00028218701481819153, -0.00027301162481307983, -0.00026383623480796814, -0.00025466084480285645, -0.00024548545479774475, -0.00023631006479263306, -0.00022713467478752136, -0.00021795928478240967, -0.00020878389477729797, -0.00019960850477218628, -0.00019043311476707458, -0.0001812577247619629, -0.0001720823347568512, -0.0001629069447517395, -0.0001537315547466278, -0.0001445561647415161, -0.00013538077473640442, -0.00012620538473129272, -0.00011702999472618103, -0.00010785460472106934, -9.867921471595764e-05, -8.950382471084595e-05, -8.032843470573425e-05, -7.115304470062256e-05, -6.197765469551086e-05, -5.280226469039917e-05, -4.3626874685287476e-05, -3.445148468017578e-05, -2.5276094675064087e-05, -1.6100704669952393e-05, -6.925314664840698e-06, 2.250075340270996e-06, 1.142546534538269e-05, 2.0600855350494385e-05, 2.977624535560608e-05, 3.8951635360717773e-05, 4.812702536582947e-05, 5.730241537094116e-05, 6.647780537605286e-05, 7.565319538116455e-05, 8.482858538627625e-05, 9.400397539138794e-05, 0.00010317936539649963, 0.00011235475540161133, 0.00012153014540672302, 0.00013070553541183472, 0.0001398809254169464, 0.0001490563154220581, 0.0001582317054271698, 0.0001674070954322815, 0.0001765824854373932, 0.00018575787544250488, 0.00019493326544761658, 0.00020410865545272827, 0.00021328404545783997, 0.00022245943546295166, 0.00023163482546806335, 0.00024081021547317505, 0.00024998560547828674, 0.00025916099548339844]}, "gradients/encoder.encoder.layers.8.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 4.0, 0.0, 4.0, 2.0, 7.0, 14.0, 6.0, 10.0, 13.0, 11.0, 25.0, 30.0, 51.0, 93.0, 166.0, 209.0, 118.0, 81.0, 48.0, 34.0, 16.0, 24.0, 15.0, 4.0, 8.0, 5.0, 5.0, 5.0, 4.0, 1.0, 1.0, 3.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00013828277587890625, -0.0001334063708782196, -0.00012852996587753296, -0.0001236535608768463, -0.00011877715587615967, -0.00011390075087547302, -0.00010902434587478638, -0.00010414794087409973, -9.927153587341309e-05, -9.439513087272644e-05, -8.95187258720398e-05, -8.464232087135315e-05, -7.97659158706665e-05, -7.488951086997986e-05, -7.001310586929321e-05, -6.513670086860657e-05, -6.026029586791992e-05, -5.5383890867233276e-05, -5.050748586654663e-05, -4.5631080865859985e-05, -4.075467586517334e-05, -3.5878270864486694e-05, -3.100186586380005e-05, -2.6125460863113403e-05, -2.1249055862426758e-05, -1.6372650861740112e-05, -1.1496245861053467e-05, -6.619840860366821e-06, -1.7434358596801758e-06, 3.1329691410064697e-06, 8.009374141693115e-06, 1.288577914237976e-05, 1.7762184143066406e-05, 2.2638589143753052e-05, 2.7514994144439697e-05, 3.239139914512634e-05, 3.726780414581299e-05, 4.2144209146499634e-05, 4.702061414718628e-05, 5.1897019147872925e-05, 5.677342414855957e-05, 6.164982914924622e-05, 6.652623414993286e-05, 7.140263915061951e-05, 7.627904415130615e-05, 8.11554491519928e-05, 8.603185415267944e-05, 9.090825915336609e-05, 9.578466415405273e-05, 0.00010066106915473938, 0.00010553747415542603, 0.00011041387915611267, 0.00011529028415679932, 0.00012016668915748596, 0.0001250430941581726, 0.00012991949915885925, 0.0001347959041595459, 0.00013967230916023254, 0.0001445487141609192, 0.00014942511916160583, 0.00015430152416229248, 0.00015917792916297913, 0.00016405433416366577, 0.00016893073916435242, 0.00017380714416503906]}, "gradients/encoder.encoder.layers.8.layer_norm.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 5.0, 1.0, 4.0, 3.0, 5.0, 5.0, 9.0, 9.0, 10.0, 22.0, 29.0, 22.0, 32.0, 68.0, 108.0, 203.0, 139.0, 95.0, 70.0, 36.0, 41.0, 26.0, 20.0, 13.0, 12.0, 6.0, 4.0, 3.0, 5.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.09341301023960114, -0.08993567526340485, -0.08645833283662796, -0.08298099040985107, -0.07950365543365479, -0.0760263204574585, -0.07254897803068161, -0.06907163560390472, -0.06559430062770844, -0.06211696192622185, -0.05863962322473526, -0.05516228452324867, -0.051684945821762085, -0.0482076071202755, -0.04473026841878891, -0.04125292971730232, -0.037775591015815735, -0.03429825231432915, -0.03082091361284256, -0.027343574911355972, -0.023866236209869385, -0.020388897508382797, -0.01691155880689621, -0.013434220105409622, -0.009956881403923035, -0.006479542702436447, -0.0030022040009498596, 0.0004751347005367279, 0.003952473402023315, 0.007429812103509903, 0.01090715080499649, 0.014384489506483078, 0.017861828207969666, 0.021339166909456253, 0.02481650561094284, 0.028293844312429428, 0.031771183013916016, 0.0352485217154026, 0.03872586041688919, 0.04220319911837578, 0.045680537819862366, 0.04915787652134895, 0.05263521522283554, 0.05611255392432213, 0.059589892625808716, 0.063067227602005, 0.06654457002878189, 0.07002191245555878, 0.07349924743175507, 0.07697658240795135, 0.08045392483472824, 0.08393126726150513, 0.08740860223770142, 0.0908859372138977, 0.09436327964067459, 0.09784062206745148, 0.10131795704364777, 0.10479529201984406, 0.10827263444662094, 0.11174997687339783, 0.11522731184959412, 0.1187046468257904, 0.12218198925256729, 0.12565933167934418, 0.12913666665554047]}, "gradients/encoder.encoder.layers.8.layer_norm.bias": {"_type": "histogram", "values": [1.0, 3.0, 3.0, 4.0, 6.0, 7.0, 17.0, 9.0, 16.0, 19.0, 17.0, 32.0, 39.0, 35.0, 44.0, 35.0, 59.0, 82.0, 51.0, 51.0, 59.0, 55.0, 72.0, 38.0, 47.0, 38.0, 49.0, 30.0, 17.0, 26.0, 20.0, 13.0, 12.0, 3.0, 4.0, 4.0, 3.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.05144846439361572, -0.048815228044986725, -0.04618199169635773, -0.04354875534772873, -0.04091551899909973, -0.038282282650470734, -0.035649046301841736, -0.03301580995321274, -0.03038257360458374, -0.027749337255954742, -0.025116100907325745, -0.022482864558696747, -0.01984962821006775, -0.01721639186143875, -0.014583155512809753, -0.011949919164180756, -0.009316682815551758, -0.00668344646692276, -0.004050210118293762, -0.0014169737696647644, 0.0012162625789642334, 0.003849498927593231, 0.006482735276222229, 0.009115971624851227, 0.011749207973480225, 0.014382444322109222, 0.01701568067073822, 0.019648917019367218, 0.022282153367996216, 0.024915389716625214, 0.02754862606525421, 0.03018186241388321, 0.03281509876251221, 0.035448335111141205, 0.0380815714597702, 0.0407148078083992, 0.0433480441570282, 0.045981280505657196, 0.048614516854286194, 0.05124775320291519, 0.05388098955154419, 0.05651422590017319, 0.059147462248802185, 0.06178069859743118, 0.06441393494606018, 0.06704717129468918, 0.06968040764331818, 0.07231364399194717, 0.07494688034057617, 0.07758011668920517, 0.08021335303783417, 0.08284658938646317, 0.08547982573509216, 0.08811306208372116, 0.09074629843235016, 0.09337953478097916, 0.09601277112960815, 0.09864600747823715, 0.10127924382686615, 0.10391248017549515, 0.10654571652412415, 0.10917895287275314, 0.11181218922138214, 0.11444542557001114, 0.11707866191864014]}, "gradients/encoder.encoder.layers.7.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 1.0, 2.0, 2.0, 1.0, 2.0, 1.0, 2.0, 2.0, 7.0, 4.0, 5.0, 12.0, 13.0, 28.0, 36.0, 66.0, 115.0, 241.0, 421.0, 942.0, 2312.0, 9857.0, 3856913.0, 312319.0, 6658.0, 2559.0, 895.0, 416.0, 194.0, 102.0, 49.0, 25.0, 26.0, 17.0, 14.0, 4.0, 5.0, 8.0, 5.0, 1.0, 3.0, 0.0, 4.0, 1.0], "bins": [-0.030670166015625, -0.029955267906188965, -0.02924036979675293, -0.028525471687316895, -0.02781057357788086, -0.027095675468444824, -0.02638077735900879, -0.025665879249572754, -0.02495098114013672, -0.024236083030700684, -0.02352118492126465, -0.022806286811828613, -0.022091388702392578, -0.021376490592956543, -0.020661592483520508, -0.019946694374084473, -0.019231796264648438, -0.018516898155212402, -0.017802000045776367, -0.017087101936340332, -0.016372203826904297, -0.01565730571746826, -0.014942407608032227, -0.014227509498596191, -0.013512611389160156, -0.012797713279724121, -0.012082815170288086, -0.01136791706085205, -0.010653018951416016, -0.00993812084197998, -0.009223222732543945, -0.00850832462310791, -0.007793426513671875, -0.00707852840423584, -0.006363630294799805, -0.0056487321853637695, -0.004933834075927734, -0.004218935966491699, -0.003504037857055664, -0.002789139747619629, -0.0020742416381835938, -0.0013593435287475586, -0.0006444454193115234, 7.045269012451172e-05, 0.0007853507995605469, 0.001500248908996582, 0.002215147018432617, 0.0029300451278686523, 0.0036449432373046875, 0.004359841346740723, 0.005074739456176758, 0.005789637565612793, 0.006504535675048828, 0.007219433784484863, 0.007934331893920898, 0.008649230003356934, 0.009364128112792969, 0.010079026222229004, 0.010793924331665039, 0.011508822441101074, 0.01222372055053711, 0.012938618659973145, 0.01365351676940918, 0.014368414878845215, 0.01508331298828125]}, "gradients/encoder.encoder.layers.7.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0, 4.0, 14.0, 17.0, 23.0, 39.0, 58.0, 93.0, 157.0, 144.0, 136.0, 115.0, 69.0, 47.0, 43.0, 22.0, 14.0, 6.0, 1.0, 1.0, 4.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.0120697021484375, -0.011787116527557373, -0.011504530906677246, -0.01122194528579712, -0.010939359664916992, -0.010656774044036865, -0.010374188423156738, -0.010091602802276611, -0.009809017181396484, -0.009526431560516357, -0.00924384593963623, -0.008961260318756104, -0.008678674697875977, -0.00839608907699585, -0.008113503456115723, -0.007830917835235596, -0.007548332214355469, -0.007265746593475342, -0.006983160972595215, -0.006700575351715088, -0.006417989730834961, -0.006135404109954834, -0.005852818489074707, -0.00557023286819458, -0.005287647247314453, -0.005005061626434326, -0.004722476005554199, -0.004439890384674072, -0.004157304763793945, -0.0038747191429138184, -0.0035921335220336914, -0.0033095479011535645, -0.0030269622802734375, -0.0027443766593933105, -0.0024617910385131836, -0.0021792054176330566, -0.0018966197967529297, -0.0016140341758728027, -0.0013314485549926758, -0.0010488629341125488, -0.0007662773132324219, -0.0004836916923522949, -0.00020110607147216797, 8.147954940795898e-05, 0.00036406517028808594, 0.0006466507911682129, 0.0009292364120483398, 0.0012118220329284668, 0.0014944076538085938, 0.0017769932746887207, 0.0020595788955688477, 0.0023421645164489746, 0.0026247501373291016, 0.0029073357582092285, 0.0031899213790893555, 0.0034725069999694824, 0.0037550926208496094, 0.004037678241729736, 0.004320263862609863, 0.00460284948348999, 0.004885435104370117, 0.005168020725250244, 0.005450606346130371, 0.005733191967010498, 0.006015777587890625]}, "gradients/encoder.encoder.layers.7.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 4.0, 0.0, 2.0, 6.0, 4.0, 4.0, 6.0, 10.0, 9.0, 32.0, 58.0, 84.0, 111.0, 212.0, 323.0, 637.0, 1036.0, 1805.0, 3388.0, 7177.0, 23617.0, 3731841.0, 393772.0, 17299.0, 5940.0, 2930.0, 1674.0, 865.0, 542.0, 367.0, 205.0, 115.0, 74.0, 38.0, 29.0, 25.0, 18.0, 9.0, 9.0, 8.0, 1.0, 3.0, 0.0, 1.0, 3.0, 2.0, 2.0], "bins": [-0.0205230712890625, -0.019995450973510742, -0.019467830657958984, -0.018940210342407227, -0.01841259002685547, -0.01788496971130371, -0.017357349395751953, -0.016829729080200195, -0.016302108764648438, -0.01577448844909668, -0.015246868133544922, -0.014719247817993164, -0.014191627502441406, -0.013664007186889648, -0.01313638687133789, -0.012608766555786133, -0.012081146240234375, -0.011553525924682617, -0.01102590560913086, -0.010498285293579102, -0.009970664978027344, -0.009443044662475586, -0.008915424346923828, -0.00838780403137207, -0.007860183715820312, -0.007332563400268555, -0.006804943084716797, -0.006277322769165039, -0.005749702453613281, -0.0052220821380615234, -0.004694461822509766, -0.004166841506958008, -0.00363922119140625, -0.003111600875854492, -0.0025839805603027344, -0.0020563602447509766, -0.0015287399291992188, -0.001001119613647461, -0.0004734992980957031, 5.412101745605469e-05, 0.0005817413330078125, 0.0011093616485595703, 0.0016369819641113281, 0.002164602279663086, 0.0026922225952148438, 0.0032198429107666016, 0.0037474632263183594, 0.004275083541870117, 0.004802703857421875, 0.005330324172973633, 0.005857944488525391, 0.0063855648040771484, 0.006913185119628906, 0.007440805435180664, 0.007968425750732422, 0.00849604606628418, 0.009023666381835938, 0.009551286697387695, 0.010078907012939453, 0.010606527328491211, 0.011134147644042969, 0.011661767959594727, 0.012189388275146484, 0.012717008590698242, 0.01324462890625]}, "gradients/encoder.encoder.layers.7.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [2.0, 2.0, 3.0, 1.0, 0.0, 1.0, 0.0, 1.0, 4.0, 2.0, 3.0, 3.0, 1.0, 1.0, 1.0, 3.0, 8.0, 9.0, 3.0, 8.0, 18.0, 22.0, 47.0, 195.0, 741.0, 2466.0, 348.0, 91.0, 32.0, 18.0, 14.0, 7.0, 5.0, 2.0, 5.0, 2.0, 3.0, 0.0, 2.0, 2.0, 2.0, 0.0, 3.0, 4.0, 2.0, 0.0, 3.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.008697509765625, -0.008352041244506836, -0.008006572723388672, -0.007661104202270508, -0.007315635681152344, -0.00697016716003418, -0.006624698638916016, -0.0062792301177978516, -0.0059337615966796875, -0.0055882930755615234, -0.005242824554443359, -0.004897356033325195, -0.004551887512207031, -0.004206418991088867, -0.003860950469970703, -0.003515481948852539, -0.003170013427734375, -0.002824544906616211, -0.002479076385498047, -0.002133607864379883, -0.0017881393432617188, -0.0014426708221435547, -0.0010972023010253906, -0.0007517337799072266, -0.0004062652587890625, -6.079673767089844e-05, 0.0002846717834472656, 0.0006301403045654297, 0.0009756088256835938, 0.0013210773468017578, 0.0016665458679199219, 0.002012014389038086, 0.00235748291015625, 0.002702951431274414, 0.003048419952392578, 0.003393888473510742, 0.0037393569946289062, 0.00408482551574707, 0.004430294036865234, 0.0047757625579833984, 0.0051212310791015625, 0.0054666996002197266, 0.005812168121337891, 0.006157636642456055, 0.006503105163574219, 0.006848573684692383, 0.007194042205810547, 0.007539510726928711, 0.007884979248046875, 0.008230447769165039, 0.008575916290283203, 0.008921384811401367, 0.009266853332519531, 0.009612321853637695, 0.00995779037475586, 0.010303258895874023, 0.010648727416992188, 0.010994195938110352, 0.011339664459228516, 0.01168513298034668, 0.012030601501464844, 0.012376070022583008, 0.012721538543701172, 0.013067007064819336, 0.0134124755859375]}, "gradients/encoder.encoder.layers.7.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 3.0, 1.0, 1.0, 4.0, 3.0, 5.0, 6.0, 5.0, 12.0, 19.0, 18.0, 26.0, 40.0, 52.0, 59.0, 128.0, 231.0, 115.0, 69.0, 60.0, 22.0, 28.0, 21.0, 17.0, 19.0, 13.0, 9.0, 6.0, 3.0, 2.0, 9.0, 1.0, 3.0, 0.0, 1.0, 1.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0], "bins": [-0.06320729851722717, -0.06151523068547249, -0.059823162853717804, -0.05813109502196312, -0.056439027190208435, -0.05474695935845375, -0.053054891526699066, -0.05136282369494438, -0.0496707558631897, -0.04797868803143501, -0.04628662019968033, -0.044594552367925644, -0.04290248453617096, -0.041210416704416275, -0.03951834887266159, -0.037826281040906906, -0.03613421320915222, -0.03444214537739754, -0.03275007754564285, -0.03105800971388817, -0.029365941882133484, -0.0276738740503788, -0.025981806218624115, -0.02428973838686943, -0.022597670555114746, -0.02090560272336006, -0.019213534891605377, -0.017521467059850693, -0.01582939922809601, -0.014137331396341324, -0.01244526356458664, -0.010753195732831955, -0.009061131626367569, -0.0073690637946128845, -0.0056769959628582, -0.003984928131103516, -0.002292860299348831, -0.0006007924675941467, 0.0010912753641605377, 0.002783343195915222, 0.004475411027669907, 0.006167478859424591, 0.007859546691179276, 0.00955161452293396, 0.011243682354688644, 0.012935750186443329, 0.014627818018198013, 0.016319885849952698, 0.018011953681707382, 0.019704021513462067, 0.02139608934521675, 0.023088157176971436, 0.02478022500872612, 0.026472292840480804, 0.02816436067223549, 0.029856428503990173, 0.03154849633574486, 0.03324056416749954, 0.03493263199925423, 0.03662469983100891, 0.038316767662763596, 0.04000883549451828, 0.041700903326272964, 0.04339297115802765, 0.04508503898978233]}, "gradients/encoder.encoder.layers.7.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 2.0, 0.0, 2.0, 0.0, 2.0, 1.0, 7.0, 3.0, 3.0, 5.0, 14.0, 7.0, 8.0, 10.0, 17.0, 11.0, 26.0, 18.0, 23.0, 21.0, 29.0, 35.0, 39.0, 43.0, 48.0, 46.0, 52.0, 36.0, 51.0, 41.0, 49.0, 47.0, 37.0, 28.0, 33.0, 29.0, 21.0, 30.0, 23.0, 23.0, 21.0, 18.0, 14.0, 15.0, 9.0, 7.0, 2.0, 4.0, 5.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0], "bins": [-0.030657470226287842, -0.029674887657165527, -0.028692305088043213, -0.0277097225189209, -0.026727139949798584, -0.02574455738067627, -0.024761974811553955, -0.02377939224243164, -0.022796809673309326, -0.02181422710418701, -0.020831644535064697, -0.019849061965942383, -0.01886647939682007, -0.017883896827697754, -0.01690131425857544, -0.015918731689453125, -0.01493614912033081, -0.013953566551208496, -0.012970983982086182, -0.011988401412963867, -0.011005818843841553, -0.010023236274719238, -0.009040653705596924, -0.00805807113647461, -0.007075488567352295, -0.0060929059982299805, -0.005110323429107666, -0.0041277408599853516, -0.003145158290863037, -0.0021625757217407227, -0.0011799931526184082, -0.00019741058349609375, 0.0007851719856262207, 0.0017677545547485352, 0.0027503371238708496, 0.003732919692993164, 0.0047155022621154785, 0.005698084831237793, 0.006680667400360107, 0.007663249969482422, 0.008645832538604736, 0.00962841510772705, 0.010610997676849365, 0.01159358024597168, 0.012576162815093994, 0.013558745384216309, 0.014541327953338623, 0.015523910522460938, 0.016506493091583252, 0.017489075660705566, 0.01847165822982788, 0.019454240798950195, 0.02043682336807251, 0.021419405937194824, 0.02240198850631714, 0.023384571075439453, 0.024367153644561768, 0.025349736213684082, 0.026332318782806396, 0.02731490135192871, 0.028297483921051025, 0.02928006649017334, 0.030262649059295654, 0.03124523162841797, 0.03222781419754028]}, "gradients/encoder.encoder.layers.7.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 3.0, 0.0, 1.0, 1.0, 4.0, 4.0, 7.0, 4.0, 9.0, 9.0, 12.0, 22.0, 22.0, 18.0, 27.0, 34.0, 77.0, 148.0, 229.0, 470.0, 1193.0, 3497.0, 15180.0, 96535.0, 793993.0, 113791.0, 17115.0, 3828.0, 1195.0, 461.0, 249.0, 149.0, 66.0, 65.0, 44.0, 26.0, 13.0, 17.0, 12.0, 9.0, 17.0, 4.0, 5.0, 1.0, 0.0, 0.0, 0.0, 3.0, 1.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.037200927734375, -0.035897254943847656, -0.03459358215332031, -0.03328990936279297, -0.031986236572265625, -0.03068256378173828, -0.029378890991210938, -0.028075218200683594, -0.02677154541015625, -0.025467872619628906, -0.024164199829101562, -0.02286052703857422, -0.021556854248046875, -0.02025318145751953, -0.018949508666992188, -0.017645835876464844, -0.0163421630859375, -0.015038490295410156, -0.013734817504882812, -0.012431144714355469, -0.011127471923828125, -0.009823799133300781, -0.008520126342773438, -0.007216453552246094, -0.00591278076171875, -0.004609107971191406, -0.0033054351806640625, -0.0020017623901367188, -0.000698089599609375, 0.0006055831909179688, 0.0019092559814453125, 0.0032129287719726562, 0.0045166015625, 0.005820274353027344, 0.0071239471435546875, 0.008427619934082031, 0.009731292724609375, 0.011034965515136719, 0.012338638305664062, 0.013642311096191406, 0.01494598388671875, 0.016249656677246094, 0.017553329467773438, 0.01885700225830078, 0.020160675048828125, 0.02146434783935547, 0.022768020629882812, 0.024071693420410156, 0.0253753662109375, 0.026679039001464844, 0.027982711791992188, 0.02928638458251953, 0.030590057373046875, 0.03189373016357422, 0.03319740295410156, 0.034501075744628906, 0.03580474853515625, 0.037108421325683594, 0.03841209411621094, 0.03971576690673828, 0.041019439697265625, 0.04232311248779297, 0.04362678527832031, 0.044930458068847656, 0.046234130859375]}, "gradients/encoder.encoder.layers.7.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 12.0, 16.0, 28.0, 40.0, 58.0, 103.0, 152.0, 146.0, 134.0, 110.0, 69.0, 45.0, 42.0, 22.0, 14.0, 5.0, 1.0, 2.0, 3.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.01202392578125, -0.011742055416107178, -0.011460185050964355, -0.011178314685821533, -0.010896444320678711, -0.010614573955535889, -0.010332703590393066, -0.010050833225250244, -0.009768962860107422, -0.0094870924949646, -0.009205222129821777, -0.008923351764678955, -0.008641481399536133, -0.00835961103439331, -0.008077740669250488, -0.007795870304107666, -0.007513999938964844, -0.0072321295738220215, -0.006950259208679199, -0.006668388843536377, -0.006386518478393555, -0.006104648113250732, -0.00582277774810791, -0.005540907382965088, -0.005259037017822266, -0.004977166652679443, -0.004695296287536621, -0.004413425922393799, -0.0041315555572509766, -0.0038496851921081543, -0.003567814826965332, -0.0032859444618225098, -0.0030040740966796875, -0.0027222037315368652, -0.002440333366394043, -0.0021584630012512207, -0.0018765926361083984, -0.0015947222709655762, -0.001312851905822754, -0.0010309815406799316, -0.0007491111755371094, -0.0004672408103942871, -0.00018537044525146484, 9.649991989135742e-05, 0.0003783702850341797, 0.000660240650177002, 0.0009421110153198242, 0.0012239813804626465, 0.0015058517456054688, 0.001787722110748291, 0.0020695924758911133, 0.0023514628410339355, 0.002633333206176758, 0.00291520357131958, 0.0031970739364624023, 0.0034789443016052246, 0.003760814666748047, 0.004042685031890869, 0.004324555397033691, 0.004606425762176514, 0.004888296127319336, 0.005170166492462158, 0.0054520368576049805, 0.005733907222747803, 0.006015777587890625]}, "gradients/encoder.encoder.layers.7.attention.v_proj.weight": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 1.0, 2.0, 3.0, 2.0, 2.0, 6.0, 4.0, 4.0, 6.0, 8.0, 17.0, 11.0, 17.0, 17.0, 31.0, 51.0, 66.0, 109.0, 157.0, 262.0, 492.0, 950.0, 1857.0, 3936.0, 8370.0, 19496.0, 52860.0, 213564.0, 602734.0, 90879.0, 29748.0, 11996.0, 5359.0, 2678.0, 1292.0, 645.0, 330.0, 187.0, 132.0, 84.0, 50.0, 37.0, 21.0, 23.0, 16.0, 12.0, 9.0, 9.0, 6.0, 11.0, 2.0, 4.0, 1.0, 0.0, 3.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.02056884765625, -0.019910097122192383, -0.019251346588134766, -0.01859259605407715, -0.01793384552001953, -0.017275094985961914, -0.016616344451904297, -0.01595759391784668, -0.015298843383789062, -0.014640092849731445, -0.013981342315673828, -0.013322591781616211, -0.012663841247558594, -0.012005090713500977, -0.01134634017944336, -0.010687589645385742, -0.010028839111328125, -0.009370088577270508, -0.00871133804321289, -0.008052587509155273, -0.007393836975097656, -0.006735086441040039, -0.006076335906982422, -0.005417585372924805, -0.0047588348388671875, -0.00410008430480957, -0.003441333770751953, -0.002782583236694336, -0.0021238327026367188, -0.0014650821685791016, -0.0008063316345214844, -0.0001475811004638672, 0.00051116943359375, 0.0011699199676513672, 0.0018286705017089844, 0.0024874210357666016, 0.0031461715698242188, 0.003804922103881836, 0.004463672637939453, 0.00512242317199707, 0.0057811737060546875, 0.006439924240112305, 0.007098674774169922, 0.007757425308227539, 0.008416175842285156, 0.009074926376342773, 0.00973367691040039, 0.010392427444458008, 0.011051177978515625, 0.011709928512573242, 0.01236867904663086, 0.013027429580688477, 0.013686180114746094, 0.014344930648803711, 0.015003681182861328, 0.015662431716918945, 0.016321182250976562, 0.01697993278503418, 0.017638683319091797, 0.018297433853149414, 0.01895618438720703, 0.01961493492126465, 0.020273685455322266, 0.020932435989379883, 0.0215911865234375]}, "gradients/encoder.encoder.layers.7.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 5.0, 3.0, 9.0, 6.0, 8.0, 10.0, 9.0, 13.0, 12.0, 11.0, 14.0, 13.0, 29.0, 28.0, 19.0, 28.0, 31.0, 32.0, 33.0, 36.0, 39.0, 44.0, 34.0, 35.0, 44.0, 42.0, 45.0, 31.0, 37.0, 48.0, 29.0, 33.0, 21.0, 17.0, 26.0, 28.0, 18.0, 12.0, 12.0, 9.0, 9.0, 15.0, 6.0, 6.0, 4.0, 4.0, 6.0, 4.0, 2.0, 2.0, 1.0, 1.0, 0.0, 1.0, 2.0], "bins": [-0.0126953125, -0.012306928634643555, -0.01191854476928711, -0.011530160903930664, -0.011141777038574219, -0.010753393173217773, -0.010365009307861328, -0.009976625442504883, -0.009588241577148438, -0.009199857711791992, -0.008811473846435547, -0.008423089981079102, -0.008034706115722656, -0.007646322250366211, -0.007257938385009766, -0.00686955451965332, -0.006481170654296875, -0.00609278678894043, -0.005704402923583984, -0.005316019058227539, -0.004927635192871094, -0.0045392513275146484, -0.004150867462158203, -0.003762483596801758, -0.0033740997314453125, -0.002985715866088867, -0.002597332000732422, -0.0022089481353759766, -0.0018205642700195312, -0.001432180404663086, -0.0010437965393066406, -0.0006554126739501953, -0.00026702880859375, 0.00012135505676269531, 0.0005097389221191406, 0.0008981227874755859, 0.0012865066528320312, 0.0016748905181884766, 0.002063274383544922, 0.002451658248901367, 0.0028400421142578125, 0.003228425979614258, 0.003616809844970703, 0.0040051937103271484, 0.004393577575683594, 0.004781961441040039, 0.005170345306396484, 0.00555872917175293, 0.005947113037109375, 0.00633549690246582, 0.006723880767822266, 0.007112264633178711, 0.007500648498535156, 0.007889032363891602, 0.008277416229248047, 0.008665800094604492, 0.009054183959960938, 0.009442567825317383, 0.009830951690673828, 0.010219335556030273, 0.010607719421386719, 0.010996103286743164, 0.01138448715209961, 0.011772871017456055, 0.0121612548828125]}, "gradients/encoder.encoder.layers.7.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 3.0, 0.0, 1.0, 1.0, 2.0, 0.0, 1.0, 4.0, 2.0, 3.0, 7.0, 4.0, 7.0, 4.0, 16.0, 17.0, 26.0, 28.0, 53.0, 78.0, 104.0, 160.0, 231.0, 391.0, 605.0, 1056.0, 2071.0, 4793.0, 15846.0, 122320.0, 840176.0, 44127.0, 9253.0, 3364.0, 1554.0, 813.0, 477.0, 295.0, 177.0, 145.0, 87.0, 77.0, 51.0, 28.0, 30.0, 16.0, 22.0, 7.0, 18.0, 5.0, 4.0, 6.0, 3.0, 4.0, 0.0, 1.0], "bins": [-0.00022220611572265625, -0.00021640397608280182, -0.0002106018364429474, -0.00020479969680309296, -0.00019899755716323853, -0.0001931954175233841, -0.00018739327788352966, -0.00018159113824367523, -0.0001757889986038208, -0.00016998685896396637, -0.00016418471932411194, -0.0001583825796842575, -0.00015258044004440308, -0.00014677830040454865, -0.00014097616076469421, -0.00013517402112483978, -0.00012937188148498535, -0.00012356974184513092, -0.00011776760220527649, -0.00011196546256542206, -0.00010616332292556763, -0.0001003611832857132, -9.455904364585876e-05, -8.875690400600433e-05, -8.29547643661499e-05, -7.715262472629547e-05, -7.135048508644104e-05, -6.554834544658661e-05, -5.974620580673218e-05, -5.3944066166877747e-05, -4.8141926527023315e-05, -4.2339786887168884e-05, -3.653764724731445e-05, -3.073550760746002e-05, -2.493336796760559e-05, -1.913122832775116e-05, -1.3329088687896729e-05, -7.526949048042297e-06, -1.7248094081878662e-06, 4.077330231666565e-06, 9.879469871520996e-06, 1.5681609511375427e-05, 2.148374915122986e-05, 2.728588879108429e-05, 3.308802843093872e-05, 3.889016807079315e-05, 4.469230771064758e-05, 5.0494447350502014e-05, 5.6296586990356445e-05, 6.209872663021088e-05, 6.790086627006531e-05, 7.370300590991974e-05, 7.950514554977417e-05, 8.53072851896286e-05, 9.110942482948303e-05, 9.691156446933746e-05, 0.0001027137041091919, 0.00010851584374904633, 0.00011431798338890076, 0.00012012012302875519, 0.00012592226266860962, 0.00013172440230846405, 0.00013752654194831848, 0.0001433286815881729, 0.00014913082122802734]}, "gradients/encoder.encoder.layers.7.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 2.0, 0.0, 2.0, 2.0, 0.0, 1.0, 2.0, 2.0, 6.0, 1.0, 2.0, 6.0, 5.0, 7.0, 10.0, 19.0, 16.0, 28.0, 33.0, 67.0, 132.0, 134.0, 158.0, 120.0, 79.0, 58.0, 43.0, 24.0, 10.0, 10.0, 7.0, 5.0, 7.0, 2.0, 5.0, 2.0, 1.0, 1.0, 0.0, 4.0, 2.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.7418136596679688e-05, -2.6565976440906525e-05, -2.5713816285133362e-05, -2.48616561293602e-05, -2.4009495973587036e-05, -2.3157335817813873e-05, -2.230517566204071e-05, -2.1453015506267548e-05, -2.0600855350494385e-05, -1.9748695194721222e-05, -1.889653503894806e-05, -1.8044374883174896e-05, -1.7192214727401733e-05, -1.634005457162857e-05, -1.5487894415855408e-05, -1.4635734260082245e-05, -1.3783574104309082e-05, -1.293141394853592e-05, -1.2079253792762756e-05, -1.1227093636989594e-05, -1.037493348121643e-05, -9.522773325443268e-06, -8.670613169670105e-06, -7.818453013896942e-06, -6.966292858123779e-06, -6.1141327023506165e-06, -5.261972546577454e-06, -4.409812390804291e-06, -3.557652235031128e-06, -2.705492079257965e-06, -1.8533319234848022e-06, -1.0011717677116394e-06, -1.4901161193847656e-07, 7.031485438346863e-07, 1.5553086996078491e-06, 2.407468855381012e-06, 3.259629011154175e-06, 4.111789166927338e-06, 4.9639493227005005e-06, 5.816109478473663e-06, 6.668269634246826e-06, 7.520429790019989e-06, 8.372589945793152e-06, 9.224750101566315e-06, 1.0076910257339478e-05, 1.092907041311264e-05, 1.1781230568885803e-05, 1.2633390724658966e-05, 1.3485550880432129e-05, 1.4337711036205292e-05, 1.5189871191978455e-05, 1.6042031347751617e-05, 1.689419150352478e-05, 1.7746351659297943e-05, 1.8598511815071106e-05, 1.945067197084427e-05, 2.030283212661743e-05, 2.1154992282390594e-05, 2.2007152438163757e-05, 2.285931259393692e-05, 2.3711472749710083e-05, 2.4563632905483246e-05, 2.541579306125641e-05, 2.626795321702957e-05, 2.7120113372802734e-05]}, "gradients/encoder.encoder.layers.7.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 3.0, 1.0, 0.0, 3.0, 5.0, 6.0, 11.0, 17.0, 29.0, 41.0, 66.0, 134.0, 207.0, 444.0, 927.0, 2134.0, 6855.0, 296182.0, 729632.0, 7596.0, 2305.0, 956.0, 452.0, 257.0, 114.0, 79.0, 43.0, 16.0, 20.0, 10.0, 5.0, 2.0, 3.0, 4.0, 4.0, 2.0, 1.0, 0.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0007357597351074219, -0.0007147565484046936, -0.0006937533617019653, -0.0006727501749992371, -0.0006517469882965088, -0.0006307438015937805, -0.0006097406148910522, -0.000588737428188324, -0.0005677342414855957, -0.0005467310547828674, -0.0005257278680801392, -0.0005047246813774109, -0.0004837214946746826, -0.00046271830797195435, -0.0004417151212692261, -0.0004207119345664978, -0.00039970874786376953, -0.00037870556116104126, -0.000357702374458313, -0.0003366991877555847, -0.00031569600105285645, -0.0002946928143501282, -0.0002736896276473999, -0.00025268644094467163, -0.00023168325424194336, -0.0002106800675392151, -0.00018967688083648682, -0.00016867369413375854, -0.00014767050743103027, -0.000126667320728302, -0.00010566413402557373, -8.466094732284546e-05, -6.365776062011719e-05, -4.2654573917388916e-05, -2.1651387214660645e-05, -6.48200511932373e-07, 2.03549861907959e-05, 4.135817289352417e-05, 6.236135959625244e-05, 8.336454629898071e-05, 0.00010436773300170898, 0.00012537091970443726, 0.00014637410640716553, 0.0001673772931098938, 0.00018838047981262207, 0.00020938366651535034, 0.0002303868532180786, 0.0002513900399208069, 0.00027239322662353516, 0.00029339641332626343, 0.0003143996000289917, 0.00033540278673171997, 0.00035640597343444824, 0.0003774091601371765, 0.0003984123468399048, 0.00041941553354263306, 0.00044041872024536133, 0.0004614219069480896, 0.00048242509365081787, 0.0005034282803535461, 0.0005244314670562744, 0.0005454346537590027, 0.000566437840461731, 0.0005874410271644592, 0.0006084442138671875]}, "gradients/encoder.encoder.layers.7.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 3.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 2.0, 3.0, 4.0, 5.0, 4.0, 17.0, 49.0, 133.0, 267.0, 280.0, 139.0, 48.0, 18.0, 8.0, 4.0, 6.0, 2.0, 1.0, 2.0, 4.0, 4.0, 1.0, 2.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0003552436828613281, -0.0003429800271987915, -0.0003307163715362549, -0.00031845271587371826, -0.00030618906021118164, -0.000293925404548645, -0.0002816617488861084, -0.0002693980932235718, -0.00025713443756103516, -0.00024487078189849854, -0.00023260712623596191, -0.0002203434705734253, -0.00020807981491088867, -0.00019581615924835205, -0.00018355250358581543, -0.0001712888479232788, -0.0001590251922607422, -0.00014676153659820557, -0.00013449788093566895, -0.00012223422527313232, -0.0001099705696105957, -9.770691394805908e-05, -8.544325828552246e-05, -7.317960262298584e-05, -6.091594696044922e-05, -4.86522912979126e-05, -3.6388635635375977e-05, -2.4124979972839355e-05, -1.1861324310302734e-05, 4.023313522338867e-07, 1.2665987014770508e-05, 2.492964267730713e-05, 3.719329833984375e-05, 4.945695400238037e-05, 6.172060966491699e-05, 7.398426532745361e-05, 8.624792098999023e-05, 9.851157665252686e-05, 0.00011077523231506348, 0.0001230388879776001, 0.00013530254364013672, 0.00014756619930267334, 0.00015982985496520996, 0.00017209351062774658, 0.0001843571662902832, 0.00019662082195281982, 0.00020888447761535645, 0.00022114813327789307, 0.0002334117889404297, 0.0002456754446029663, 0.00025793910026550293, 0.00027020275592803955, 0.00028246641159057617, 0.0002947300672531128, 0.0003069937229156494, 0.00031925737857818604, 0.00033152103424072266, 0.0003437846899032593, 0.0003560483455657959, 0.0003683120012283325, 0.00038057565689086914, 0.00039283931255340576, 0.0004051029682159424, 0.000417366623878479, 0.0004296302795410156]}, "gradients/encoder.encoder.layers.7.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 3.0, 3.0, 4.0, 4.0, 4.0, 5.0, 8.0, 5.0, 6.0, 13.0, 26.0, 27.0, 34.0, 54.0, 82.0, 127.0, 252.0, 114.0, 48.0, 50.0, 40.0, 23.0, 26.0, 10.0, 9.0, 8.0, 6.0, 3.0, 6.0, 3.0, 3.0, 1.0, 3.0, 2.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.08876732736825943, -0.08564143627882004, -0.08251554518938065, -0.07938964664936066, -0.07626375555992126, -0.07313786447048187, -0.07001197338104248, -0.06688608229160309, -0.0637601912021637, -0.060634300112724304, -0.057508405297994614, -0.05438251420855522, -0.05125661939382553, -0.04813072830438614, -0.04500483721494675, -0.041878946125507355, -0.038753047585487366, -0.035627156496047974, -0.03250126168131828, -0.02937537059187889, -0.02624947763979435, -0.02312358468770981, -0.019997693598270416, -0.016871800646185875, -0.013745907694101334, -0.010620014742016792, -0.007494122721254826, -0.004368230700492859, -0.0012423377484083176, 0.0018835552036762238, 0.005009446293115616, 0.008135339245200157, 0.011261239647865295, 0.014387132599949837, 0.017513025552034378, 0.02063891664147377, 0.02376480959355831, 0.026890702545642853, 0.030016593635082245, 0.03314248472452164, 0.03626837953925133, 0.03939427062869072, 0.04252016544342041, 0.0456460565328598, 0.048771947622299194, 0.051897842437028885, 0.05502373352646828, 0.05814962834119797, 0.06127551943063736, 0.06440141052007675, 0.06752730160951614, 0.07065320014953613, 0.07377909123897552, 0.07690498232841492, 0.08003087341785431, 0.0831567645072937, 0.0862826555967331, 0.08940854668617249, 0.09253443777561188, 0.09566032886505127, 0.09878622740507126, 0.10191211849451065, 0.10503800958395004, 0.10816390067338943, 0.11128979921340942]}, "gradients/encoder.encoder.layers.7.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0, 1.0, 6.0, 9.0, 11.0, 19.0, 10.0, 31.0, 33.0, 33.0, 43.0, 47.0, 42.0, 78.0, 50.0, 75.0, 57.0, 65.0, 66.0, 53.0, 55.0, 39.0, 45.0, 22.0, 35.0, 18.0, 14.0, 13.0, 8.0, 9.0, 9.0, 4.0, 8.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.05976712703704834, -0.05738585442304611, -0.055004578083753586, -0.05262330174446106, -0.05024202913045883, -0.047860756516456604, -0.04547948017716408, -0.04309820383787155, -0.040716931223869324, -0.038335658609867096, -0.03595438227057457, -0.03357310593128204, -0.031191833317279816, -0.02881055884063244, -0.02642928436398506, -0.024048009887337685, -0.021666735410690308, -0.01928546093404293, -0.016904186457395554, -0.014522911980748177, -0.0121416375041008, -0.009760363027453423, -0.0073790885508060455, -0.0049978140741586685, -0.0026165395975112915, -0.0002352651208639145, 0.0021460093557834625, 0.0045272838324308395, 0.0069085583090782166, 0.009289832785725594, 0.01167110726237297, 0.014052381739020348, 0.016433656215667725, 0.0188149306923151, 0.02119620516896248, 0.023577479645609856, 0.025958754122257233, 0.02834002859890461, 0.030721303075551987, 0.03310257941484451, 0.03548385202884674, 0.03786512464284897, 0.040246400982141495, 0.04262767732143402, 0.04500894993543625, 0.04739022254943848, 0.049771498888731, 0.05215277522802353, 0.05453404784202576, 0.056915320456027985, 0.05929659679532051, 0.06167787313461304, 0.06405914574861526, 0.06644041836261749, 0.06882169842720032, 0.07120297104120255, 0.07358424365520477, 0.075965516269207, 0.07834678888320923, 0.08072806894779205, 0.08310934156179428, 0.08549061417579651, 0.08787189424037933, 0.09025316685438156, 0.09263443946838379]}, "gradients/encoder.encoder.layers.6.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 3.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 3.0, 2.0, 1.0, 0.0, 2.0, 2.0, 2.0, 3.0, 1.0, 3.0, 2.0, 2.0, 9.0, 7.0, 4.0, 14.0, 24.0, 44.0, 67.0, 98.0, 221.0, 432.0, 801.0, 1783.0, 5506.0, 77302.0, 4085961.0, 15023.0, 3743.0, 1796.0, 691.0, 337.0, 162.0, 73.0, 45.0, 37.0, 24.0, 12.0, 11.0, 18.0, 2.0, 3.0, 7.0, 4.0, 3.0, 3.0, 2.0, 1.0], "bins": [-0.02392578125, -0.023360252380371094, -0.022794723510742188, -0.02222919464111328, -0.021663665771484375, -0.02109813690185547, -0.020532608032226562, -0.019967079162597656, -0.01940155029296875, -0.018836021423339844, -0.018270492553710938, -0.01770496368408203, -0.017139434814453125, -0.01657390594482422, -0.016008377075195312, -0.015442848205566406, -0.0148773193359375, -0.014311790466308594, -0.013746261596679688, -0.013180732727050781, -0.012615203857421875, -0.012049674987792969, -0.011484146118164062, -0.010918617248535156, -0.01035308837890625, -0.009787559509277344, -0.009222030639648438, -0.008656501770019531, -0.008090972900390625, -0.007525444030761719, -0.0069599151611328125, -0.006394386291503906, -0.005828857421875, -0.005263328552246094, -0.0046977996826171875, -0.004132270812988281, -0.003566741943359375, -0.0030012130737304688, -0.0024356842041015625, -0.0018701553344726562, -0.00130462646484375, -0.0007390975952148438, -0.0001735687255859375, 0.00039196014404296875, 0.000957489013671875, 0.0015230178833007812, 0.0020885467529296875, 0.0026540756225585938, 0.0032196044921875, 0.0037851333618164062, 0.0043506622314453125, 0.004916191101074219, 0.005481719970703125, 0.006047248840332031, 0.0066127777099609375, 0.007178306579589844, 0.00774383544921875, 0.008309364318847656, 0.008874893188476562, 0.009440422058105469, 0.010005950927734375, 0.010571479797363281, 0.011137008666992188, 0.011702537536621094, 0.01226806640625]}, "gradients/encoder.encoder.layers.6.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 4.0, 4.0, 4.0, 7.0, 19.0, 26.0, 28.0, 50.0, 98.0, 140.0, 146.0, 137.0, 110.0, 93.0, 43.0, 43.0, 30.0, 16.0, 8.0, 3.0, 0.0, 4.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.01143646240234375, -0.011163830757141113, -0.010891199111938477, -0.01061856746673584, -0.010345935821533203, -0.010073304176330566, -0.00980067253112793, -0.009528040885925293, -0.009255409240722656, -0.00898277759552002, -0.008710145950317383, -0.008437514305114746, -0.00816488265991211, -0.007892251014709473, -0.007619619369506836, -0.007346987724304199, -0.0070743560791015625, -0.006801724433898926, -0.006529092788696289, -0.006256461143493652, -0.005983829498291016, -0.005711197853088379, -0.005438566207885742, -0.0051659345626831055, -0.004893302917480469, -0.004620671272277832, -0.004348039627075195, -0.004075407981872559, -0.003802776336669922, -0.003530144691467285, -0.0032575130462646484, -0.0029848814010620117, -0.002712249755859375, -0.0024396181106567383, -0.0021669864654541016, -0.0018943548202514648, -0.0016217231750488281, -0.0013490915298461914, -0.0010764598846435547, -0.000803828239440918, -0.0005311965942382812, -0.00025856494903564453, 1.4066696166992188e-05, 0.0002866983413696289, 0.0005593299865722656, 0.0008319616317749023, 0.001104593276977539, 0.0013772249221801758, 0.0016498565673828125, 0.0019224882125854492, 0.002195119857788086, 0.0024677515029907227, 0.0027403831481933594, 0.003013014793395996, 0.003285646438598633, 0.0035582780838012695, 0.0038309097290039062, 0.004103541374206543, 0.00437617301940918, 0.004648804664611816, 0.004921436309814453, 0.00519406795501709, 0.0054666996002197266, 0.005739331245422363, 0.006011962890625]}, "gradients/encoder.encoder.layers.6.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 6.0, 0.0, 1.0, 4.0, 4.0, 2.0, 11.0, 17.0, 13.0, 24.0, 27.0, 41.0, 81.0, 100.0, 246.0, 700.0, 2797.0, 34131.0, 4145258.0, 8203.0, 1584.0, 466.0, 221.0, 121.0, 62.0, 42.0, 43.0, 22.0, 24.0, 17.0, 6.0, 9.0, 4.0, 1.0, 3.0, 7.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.053070068359375, -0.05164623260498047, -0.05022239685058594, -0.048798561096191406, -0.047374725341796875, -0.045950889587402344, -0.04452705383300781, -0.04310321807861328, -0.04167938232421875, -0.04025554656982422, -0.03883171081542969, -0.037407875061035156, -0.035984039306640625, -0.034560203552246094, -0.03313636779785156, -0.03171253204345703, -0.0302886962890625, -0.02886486053466797, -0.027441024780273438, -0.026017189025878906, -0.024593353271484375, -0.023169517517089844, -0.021745681762695312, -0.02032184600830078, -0.01889801025390625, -0.01747417449951172, -0.016050338745117188, -0.014626502990722656, -0.013202667236328125, -0.011778831481933594, -0.010354995727539062, -0.008931159973144531, -0.00750732421875, -0.006083488464355469, -0.0046596527099609375, -0.0032358169555664062, -0.001811981201171875, -0.00038814544677734375, 0.0010356903076171875, 0.0024595260620117188, 0.00388336181640625, 0.005307197570800781, 0.0067310333251953125, 0.008154869079589844, 0.009578704833984375, 0.011002540588378906, 0.012426376342773438, 0.013850212097167969, 0.0152740478515625, 0.01669788360595703, 0.018121719360351562, 0.019545555114746094, 0.020969390869140625, 0.022393226623535156, 0.023817062377929688, 0.02524089813232422, 0.02666473388671875, 0.02808856964111328, 0.029512405395507812, 0.030936241149902344, 0.032360076904296875, 0.033783912658691406, 0.03520774841308594, 0.03663158416748047, 0.038055419921875]}, "gradients/encoder.encoder.layers.6.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 5.0, 6.0, 2.0, 12.0, 31.0, 171.0, 3399.0, 382.0, 40.0, 12.0, 5.0, 4.0, 5.0, 5.0, 4.0, 1.0, 1.0, 2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.01165008544921875, -0.010950684547424316, -0.010251283645629883, -0.00955188274383545, -0.008852481842041016, -0.008153080940246582, -0.0074536800384521484, -0.006754279136657715, -0.006054878234863281, -0.005355477333068848, -0.004656076431274414, -0.0039566755294799805, -0.003257274627685547, -0.0025578737258911133, -0.0018584728240966797, -0.001159071922302246, -0.0004596710205078125, 0.0002397298812866211, 0.0009391307830810547, 0.0016385316848754883, 0.002337932586669922, 0.0030373334884643555, 0.003736734390258789, 0.004436135292053223, 0.005135536193847656, 0.00583493709564209, 0.0065343379974365234, 0.007233738899230957, 0.00793313980102539, 0.008632540702819824, 0.009331941604614258, 0.010031342506408691, 0.010730743408203125, 0.011430144309997559, 0.012129545211791992, 0.012828946113586426, 0.01352834701538086, 0.014227747917175293, 0.014927148818969727, 0.01562654972076416, 0.016325950622558594, 0.017025351524353027, 0.01772475242614746, 0.018424153327941895, 0.019123554229736328, 0.01982295513153076, 0.020522356033325195, 0.02122175693511963, 0.021921157836914062, 0.022620558738708496, 0.02331995964050293, 0.024019360542297363, 0.024718761444091797, 0.02541816234588623, 0.026117563247680664, 0.026816964149475098, 0.02751636505126953, 0.028215765953063965, 0.0289151668548584, 0.029614567756652832, 0.030313968658447266, 0.0310133695602417, 0.03171277046203613, 0.032412171363830566, 0.033111572265625]}, "gradients/encoder.encoder.layers.6.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 2.0, 4.0, 0.0, 3.0, 0.0, 5.0, 5.0, 11.0, 14.0, 16.0, 21.0, 42.0, 56.0, 123.0, 322.0, 155.0, 94.0, 53.0, 30.0, 22.0, 6.0, 9.0, 10.0, 3.0, 1.0, 5.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.12957608699798584, -0.12130890786647797, -0.1130417212843895, -0.10477453470230103, -0.09650735557079315, -0.08824017643928528, -0.07997298985719681, -0.07170580327510834, -0.06343862414360046, -0.05517144128680229, -0.04690425843000412, -0.03863707557320595, -0.030369892716407776, -0.022102709859609604, -0.013835527002811432, -0.00556834414601326, 0.002698838710784912, 0.010966021567583084, 0.019233204424381256, 0.027500387281179428, 0.0357675701379776, 0.04403475299477577, 0.052301935851573944, 0.060569118708372116, 0.06883630156517029, 0.07710348069667816, 0.08537066727876663, 0.0936378538608551, 0.10190503299236298, 0.11017221212387085, 0.11843939870595932, 0.1267065852880478, 0.13497373461723328, 0.14324091374874115, 0.15150809288024902, 0.1597752869129181, 0.16804246604442596, 0.17630964517593384, 0.1845768392086029, 0.19284401834011078, 0.20111119747161865, 0.20937837660312653, 0.2176455557346344, 0.22591274976730347, 0.23417992889881134, 0.2424471080303192, 0.2507143020629883, 0.25898146629333496, 0.26724866032600403, 0.2755158543586731, 0.2837830185890198, 0.29205021262168884, 0.3003174066543579, 0.3085845708847046, 0.31685176491737366, 0.32511892914772034, 0.3333861231803894, 0.34165331721305847, 0.34992048144340515, 0.3581876754760742, 0.3664548397064209, 0.37472203373908997, 0.38298922777175903, 0.3912563920021057, 0.3995235860347748]}, "gradients/encoder.encoder.layers.6.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 1.0, 2.0, 3.0, 3.0, 0.0, 4.0, 3.0, 7.0, 9.0, 8.0, 16.0, 15.0, 19.0, 32.0, 39.0, 56.0, 59.0, 81.0, 90.0, 95.0, 96.0, 85.0, 73.0, 44.0, 45.0, 36.0, 30.0, 22.0, 12.0, 4.0, 9.0, 6.0, 5.0, 2.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.22167569398880005, -0.21580113470554352, -0.209926575422287, -0.20405200123786926, -0.19817744195461273, -0.1923028826713562, -0.18642832338809967, -0.18055376410484314, -0.17467918992042542, -0.16880463063716888, -0.16293007135391235, -0.15705549716949463, -0.1511809378862381, -0.14530637860298157, -0.13943181931972504, -0.1335572600364685, -0.12768270075321198, -0.12180814146995544, -0.11593357473611832, -0.11005901545286179, -0.10418444871902466, -0.09830988943576813, -0.0924353301525116, -0.08656077086925507, -0.08068620413541794, -0.07481164485216141, -0.06893707811832428, -0.06306251883506775, -0.05718795582652092, -0.05131339281797409, -0.04543883353471756, -0.03956427052617073, -0.0336897075176239, -0.027815144509077072, -0.021940583363175392, -0.016066022217273712, -0.010191459208726883, -0.004316896200180054, 0.001557663083076477, 0.007432226091623306, 0.013306789100170135, 0.019181352108716965, 0.025055913254618645, 0.030930474400520325, 0.036805037409067154, 0.04267960041761398, 0.048554159700870514, 0.05442872270941734, 0.06030328571796417, 0.0661778450012207, 0.07205241173505783, 0.07792697101831436, 0.08380153775215149, 0.08967609703540802, 0.09555065631866455, 0.10142521560192108, 0.10729978233575821, 0.11317434161901474, 0.11904890835285187, 0.1249234676361084, 0.13079802691936493, 0.13667258620262146, 0.14254716038703918, 0.14842171967029572, 0.15429627895355225]}, "gradients/encoder.encoder.layers.6.attention.out_proj.weight": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 3.0, 1.0, 3.0, 5.0, 5.0, 4.0, 14.0, 12.0, 15.0, 26.0, 33.0, 31.0, 51.0, 73.0, 123.0, 221.0, 438.0, 1051.0, 2917.0, 11042.0, 55707.0, 656697.0, 274847.0, 34033.0, 7337.0, 2200.0, 757.0, 375.0, 182.0, 117.0, 62.0, 38.0, 39.0, 26.0, 18.0, 15.0, 19.0, 12.0, 1.0, 4.0, 2.0, 1.0, 2.0, 2.0, 0.0, 2.0, 4.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0340576171875, -0.032877445220947266, -0.03169727325439453, -0.030517101287841797, -0.029336929321289062, -0.028156757354736328, -0.026976585388183594, -0.02579641342163086, -0.024616241455078125, -0.02343606948852539, -0.022255897521972656, -0.021075725555419922, -0.019895553588867188, -0.018715381622314453, -0.01753520965576172, -0.016355037689208984, -0.01517486572265625, -0.013994693756103516, -0.012814521789550781, -0.011634349822998047, -0.010454177856445312, -0.009274005889892578, -0.008093833923339844, -0.006913661956787109, -0.005733489990234375, -0.004553318023681641, -0.0033731460571289062, -0.002192974090576172, -0.0010128021240234375, 0.00016736984252929688, 0.0013475418090820312, 0.0025277137756347656, 0.0037078857421875, 0.004888057708740234, 0.006068229675292969, 0.007248401641845703, 0.008428573608398438, 0.009608745574951172, 0.010788917541503906, 0.01196908950805664, 0.013149261474609375, 0.01432943344116211, 0.015509605407714844, 0.016689777374267578, 0.017869949340820312, 0.019050121307373047, 0.02023029327392578, 0.021410465240478516, 0.02259063720703125, 0.023770809173583984, 0.02495098114013672, 0.026131153106689453, 0.027311325073242188, 0.028491497039794922, 0.029671669006347656, 0.03085184097290039, 0.032032012939453125, 0.03321218490600586, 0.034392356872558594, 0.03557252883911133, 0.03675270080566406, 0.0379328727722168, 0.03911304473876953, 0.040293216705322266, 0.041473388671875]}, "gradients/encoder.encoder.layers.6.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 4.0, 4.0, 4.0, 7.0, 18.0, 23.0, 31.0, 49.0, 83.0, 130.0, 132.0, 124.0, 134.0, 94.0, 56.0, 48.0, 30.0, 20.0, 13.0, 4.0, 2.0, 3.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.01151275634765625, -0.011240065097808838, -0.010967373847961426, -0.010694682598114014, -0.010421991348266602, -0.01014930009841919, -0.009876608848571777, -0.009603917598724365, -0.009331226348876953, -0.009058535099029541, -0.008785843849182129, -0.008513152599334717, -0.008240461349487305, -0.007967770099639893, -0.0076950788497924805, -0.007422387599945068, -0.007149696350097656, -0.006877005100250244, -0.006604313850402832, -0.00633162260055542, -0.006058931350708008, -0.005786240100860596, -0.005513548851013184, -0.0052408576011657715, -0.004968166351318359, -0.004695475101470947, -0.004422783851623535, -0.004150092601776123, -0.003877401351928711, -0.003604710102081299, -0.0033320188522338867, -0.0030593276023864746, -0.0027866363525390625, -0.0025139451026916504, -0.0022412538528442383, -0.001968562602996826, -0.001695871353149414, -0.001423180103302002, -0.0011504888534545898, -0.0008777976036071777, -0.0006051063537597656, -0.0003324151039123535, -5.9723854064941406e-05, 0.0002129673957824707, 0.0004856586456298828, 0.0007583498954772949, 0.001031041145324707, 0.0013037323951721191, 0.0015764236450195312, 0.0018491148948669434, 0.0021218061447143555, 0.0023944973945617676, 0.0026671886444091797, 0.002939879894256592, 0.003212571144104004, 0.003485262393951416, 0.003757953643798828, 0.00403064489364624, 0.004303336143493652, 0.0045760273933410645, 0.0048487186431884766, 0.005121409893035889, 0.005394101142883301, 0.005666792392730713, 0.005939483642578125]}, "gradients/encoder.encoder.layers.6.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 3.0, 1.0, 5.0, 4.0, 7.0, 8.0, 10.0, 10.0, 22.0, 19.0, 32.0, 45.0, 70.0, 124.0, 223.0, 422.0, 936.0, 2074.0, 4786.0, 12032.0, 33266.0, 111657.0, 651499.0, 162030.0, 43532.0, 15086.0, 5880.0, 2510.0, 1060.0, 545.0, 253.0, 139.0, 89.0, 61.0, 33.0, 21.0, 16.0, 15.0, 11.0, 4.0, 7.0, 6.0, 6.0, 2.0, 1.0, 3.0, 3.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.0236663818359375, -0.022917985916137695, -0.02216958999633789, -0.021421194076538086, -0.02067279815673828, -0.019924402236938477, -0.019176006317138672, -0.018427610397338867, -0.017679214477539062, -0.016930818557739258, -0.016182422637939453, -0.015434026718139648, -0.014685630798339844, -0.013937234878540039, -0.013188838958740234, -0.01244044303894043, -0.011692047119140625, -0.01094365119934082, -0.010195255279541016, -0.009446859359741211, -0.008698463439941406, -0.007950067520141602, -0.007201671600341797, -0.006453275680541992, -0.0057048797607421875, -0.004956483840942383, -0.004208087921142578, -0.0034596920013427734, -0.0027112960815429688, -0.001962900161743164, -0.0012145042419433594, -0.0004661083221435547, 0.00028228759765625, 0.0010306835174560547, 0.0017790794372558594, 0.002527475357055664, 0.0032758712768554688, 0.0040242671966552734, 0.004772663116455078, 0.005521059036254883, 0.0062694549560546875, 0.007017850875854492, 0.007766246795654297, 0.008514642715454102, 0.009263038635253906, 0.010011434555053711, 0.010759830474853516, 0.01150822639465332, 0.012256622314453125, 0.01300501823425293, 0.013753414154052734, 0.014501810073852539, 0.015250205993652344, 0.01599860191345215, 0.016746997833251953, 0.017495393753051758, 0.018243789672851562, 0.018992185592651367, 0.019740581512451172, 0.020488977432250977, 0.02123737335205078, 0.021985769271850586, 0.02273416519165039, 0.023482561111450195, 0.02423095703125]}, "gradients/encoder.encoder.layers.6.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 4.0, 2.0, 1.0, 5.0, 7.0, 6.0, 3.0, 6.0, 15.0, 12.0, 12.0, 17.0, 23.0, 33.0, 38.0, 34.0, 41.0, 40.0, 34.0, 44.0, 45.0, 40.0, 54.0, 43.0, 50.0, 40.0, 43.0, 43.0, 26.0, 39.0, 40.0, 27.0, 23.0, 24.0, 16.0, 13.0, 14.0, 17.0, 7.0, 8.0, 7.0, 5.0, 6.0, 4.0, 1.0, 3.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.01381683349609375, -0.01339268684387207, -0.01296854019165039, -0.012544393539428711, -0.012120246887207031, -0.011696100234985352, -0.011271953582763672, -0.010847806930541992, -0.010423660278320312, -0.009999513626098633, -0.009575366973876953, -0.009151220321655273, -0.008727073669433594, -0.008302927017211914, -0.007878780364990234, -0.007454633712768555, -0.007030487060546875, -0.006606340408325195, -0.006182193756103516, -0.005758047103881836, -0.005333900451660156, -0.0049097537994384766, -0.004485607147216797, -0.004061460494995117, -0.0036373138427734375, -0.003213167190551758, -0.002789020538330078, -0.0023648738861083984, -0.0019407272338867188, -0.001516580581665039, -0.0010924339294433594, -0.0006682872772216797, -0.000244140625, 0.0001800060272216797, 0.0006041526794433594, 0.001028299331665039, 0.0014524459838867188, 0.0018765926361083984, 0.002300739288330078, 0.002724885940551758, 0.0031490325927734375, 0.003573179244995117, 0.003997325897216797, 0.0044214725494384766, 0.004845619201660156, 0.005269765853881836, 0.005693912506103516, 0.006118059158325195, 0.006542205810546875, 0.006966352462768555, 0.007390499114990234, 0.007814645767211914, 0.008238792419433594, 0.008662939071655273, 0.009087085723876953, 0.009511232376098633, 0.009935379028320312, 0.010359525680541992, 0.010783672332763672, 0.011207818984985352, 0.011631965637207031, 0.012056112289428711, 0.01248025894165039, 0.01290440559387207, 0.01332855224609375]}, "gradients/encoder.encoder.layers.6.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 3.0, 8.0, 7.0, 8.0, 14.0, 14.0, 28.0, 37.0, 54.0, 80.0, 159.0, 265.0, 414.0, 821.0, 1444.0, 3230.0, 8408.0, 28140.0, 409247.0, 552396.0, 28673.0, 8454.0, 3302.0, 1545.0, 761.0, 397.0, 256.0, 115.0, 101.0, 64.0, 45.0, 25.0, 9.0, 12.0, 13.0, 6.0, 2.0, 2.0, 3.0, 4.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0002887248992919922, -0.00027877092361450195, -0.0002688169479370117, -0.0002588629722595215, -0.00024890899658203125, -0.00023895502090454102, -0.00022900104522705078, -0.00021904706954956055, -0.0002090930938720703, -0.00019913911819458008, -0.00018918514251708984, -0.0001792311668395996, -0.00016927719116210938, -0.00015932321548461914, -0.0001493692398071289, -0.00013941526412963867, -0.00012946128845214844, -0.0001195073127746582, -0.00010955333709716797, -9.959936141967773e-05, -8.96453857421875e-05, -7.969141006469727e-05, -6.973743438720703e-05, -5.97834587097168e-05, -4.982948303222656e-05, -3.987550735473633e-05, -2.9921531677246094e-05, -1.996755599975586e-05, -1.0013580322265625e-05, -5.960464477539063e-08, 9.894371032714844e-06, 1.9848346710205078e-05, 2.9802322387695312e-05, 3.975629806518555e-05, 4.971027374267578e-05, 5.9664249420166016e-05, 6.961822509765625e-05, 7.957220077514648e-05, 8.952617645263672e-05, 9.948015213012695e-05, 0.00010943412780761719, 0.00011938810348510742, 0.00012934207916259766, 0.0001392960548400879, 0.00014925003051757812, 0.00015920400619506836, 0.0001691579818725586, 0.00017911195755004883, 0.00018906593322753906, 0.0001990199089050293, 0.00020897388458251953, 0.00021892786026000977, 0.0002288818359375, 0.00023883581161499023, 0.00024878978729248047, 0.0002587437629699707, 0.00026869773864746094, 0.00027865171432495117, 0.0002886056900024414, 0.00029855966567993164, 0.0003085136413574219, 0.0003184676170349121, 0.00032842159271240234, 0.0003383755683898926, 0.0003483295440673828]}, "gradients/encoder.encoder.layers.6.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 0.0, 1.0, 1.0, 9.0, 13.0, 8.0, 13.0, 11.0, 21.0, 31.0, 38.0, 70.0, 103.0, 91.0, 143.0, 121.0, 84.0, 90.0, 42.0, 34.0, 24.0, 11.0, 22.0, 7.0, 9.0, 9.0, 3.0, 2.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.8312206268310547e-05, -2.7497299015522003e-05, -2.668239176273346e-05, -2.5867484509944916e-05, -2.5052577257156372e-05, -2.423767000436783e-05, -2.3422762751579285e-05, -2.260785549879074e-05, -2.1792948246002197e-05, -2.0978040993213654e-05, -2.016313374042511e-05, -1.9348226487636566e-05, -1.8533319234848022e-05, -1.771841198205948e-05, -1.6903504729270935e-05, -1.608859747648239e-05, -1.5273690223693848e-05, -1.4458782970905304e-05, -1.364387571811676e-05, -1.2828968465328217e-05, -1.2014061212539673e-05, -1.1199153959751129e-05, -1.0384246706962585e-05, -9.569339454174042e-06, -8.754432201385498e-06, -7.939524948596954e-06, -7.124617695808411e-06, -6.309710443019867e-06, -5.494803190231323e-06, -4.6798959374427795e-06, -3.864988684654236e-06, -3.050081431865692e-06, -2.2351741790771484e-06, -1.4202669262886047e-06, -6.05359673500061e-07, 2.0954757928848267e-07, 1.0244548320770264e-06, 1.83936208486557e-06, 2.6542693376541138e-06, 3.4691765904426575e-06, 4.284083843231201e-06, 5.098991096019745e-06, 5.9138983488082886e-06, 6.728805601596832e-06, 7.543712854385376e-06, 8.35862010717392e-06, 9.173527359962463e-06, 9.988434612751007e-06, 1.080334186553955e-05, 1.1618249118328094e-05, 1.2433156371116638e-05, 1.3248063623905182e-05, 1.4062970876693726e-05, 1.487787812948227e-05, 1.5692785382270813e-05, 1.6507692635059357e-05, 1.73225998878479e-05, 1.8137507140636444e-05, 1.8952414393424988e-05, 1.976732164621353e-05, 2.0582228899002075e-05, 2.139713615179062e-05, 2.2212043404579163e-05, 2.3026950657367706e-05, 2.384185791015625e-05]}, "gradients/encoder.encoder.layers.6.attention.q_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 2.0, 3.0, 5.0, 3.0, 4.0, 8.0, 6.0, 10.0, 20.0, 24.0, 42.0, 85.0, 145.0, 269.0, 512.0, 965.0, 2078.0, 4705.0, 12752.0, 51965.0, 855955.0, 90841.0, 17108.0, 5993.0, 2514.0, 1204.0, 596.0, 318.0, 176.0, 83.0, 64.0, 34.0, 25.0, 17.0, 6.0, 5.0, 6.0, 5.0, 3.0, 4.0, 1.0, 1.0, 0.0, 2.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.000507354736328125, -0.000491313636302948, -0.000475272536277771, -0.000459231436252594, -0.000443190336227417, -0.00042714923620224, -0.000411108136177063, -0.000395067036151886, -0.000379025936126709, -0.000362984836101532, -0.000346943736076355, -0.000330902636051178, -0.000314861536026001, -0.000298820436000824, -0.00028277933597564697, -0.00026673823595046997, -0.00025069713592529297, -0.00023465603590011597, -0.00021861493587493896, -0.00020257383584976196, -0.00018653273582458496, -0.00017049163579940796, -0.00015445053577423096, -0.00013840943574905396, -0.00012236833572387695, -0.00010632723569869995, -9.028613567352295e-05, -7.424503564834595e-05, -5.8203935623168945e-05, -4.216283559799194e-05, -2.612173557281494e-05, -1.008063554763794e-05, 5.9604644775390625e-06, 2.2001564502716064e-05, 3.8042664527893066e-05, 5.408376455307007e-05, 7.012486457824707e-05, 8.616596460342407e-05, 0.00010220706462860107, 0.00011824816465377808, 0.00013428926467895508, 0.00015033036470413208, 0.00016637146472930908, 0.00018241256475448608, 0.00019845366477966309, 0.0002144947648048401, 0.0002305358648300171, 0.0002465769648551941, 0.0002626180648803711, 0.0002786591649055481, 0.0002947002649307251, 0.0003107413649559021, 0.0003267824649810791, 0.0003428235650062561, 0.0003588646650314331, 0.0003749057650566101, 0.0003909468650817871, 0.0004069879651069641, 0.0004230290651321411, 0.0004390701651573181, 0.0004551112651824951, 0.0004711523652076721, 0.0004871934652328491, 0.0005032345652580261, 0.0005192756652832031]}, "gradients/encoder.encoder.layers.6.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 2.0, 0.0, 1.0, 0.0, 3.0, 2.0, 3.0, 3.0, 2.0, 4.0, 7.0, 6.0, 5.0, 12.0, 8.0, 9.0, 16.0, 18.0, 33.0, 33.0, 64.0, 85.0, 144.0, 161.0, 145.0, 65.0, 32.0, 28.0, 20.0, 23.0, 11.0, 13.0, 13.0, 4.0, 7.0, 6.0, 4.0, 3.0, 4.0, 2.0, 2.0, 5.0, 3.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0], "bins": [-0.0002949237823486328, -0.0002858005464076996, -0.00027667731046676636, -0.00026755407452583313, -0.0002584308385848999, -0.0002493076026439667, -0.00024018436670303345, -0.00023106113076210022, -0.000221937894821167, -0.00021281465888023376, -0.00020369142293930054, -0.0001945681869983673, -0.00018544495105743408, -0.00017632171511650085, -0.00016719847917556763, -0.0001580752432346344, -0.00014895200729370117, -0.00013982877135276794, -0.00013070553541183472, -0.00012158229947090149, -0.00011245906352996826, -0.00010333582758903503, -9.42125916481018e-05, -8.508935570716858e-05, -7.596611976623535e-05, -6.684288382530212e-05, -5.7719647884368896e-05, -4.859641194343567e-05, -3.947317600250244e-05, -3.0349940061569214e-05, -2.1226704120635986e-05, -1.2103468179702759e-05, -2.9802322387695312e-06, 6.143003702163696e-06, 1.5266239643096924e-05, 2.438947558403015e-05, 3.351271152496338e-05, 4.2635947465896606e-05, 5.1759183406829834e-05, 6.088241934776306e-05, 7.000565528869629e-05, 7.912889122962952e-05, 8.825212717056274e-05, 9.737536311149597e-05, 0.0001064985990524292, 0.00011562183499336243, 0.00012474507093429565, 0.00013386830687522888, 0.0001429915428161621, 0.00015211477875709534, 0.00016123801469802856, 0.0001703612506389618, 0.00017948448657989502, 0.00018860772252082825, 0.00019773095846176147, 0.0002068541944026947, 0.00021597743034362793, 0.00022510066628456116, 0.00023422390222549438, 0.0002433471381664276, 0.00025247037410736084, 0.00026159361004829407, 0.0002707168459892273, 0.0002798400819301605, 0.00028896331787109375]}, "gradients/encoder.encoder.layers.6.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0, 2.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 5.0, 8.0, 7.0, 4.0, 8.0, 10.0, 20.0, 18.0, 36.0, 65.0, 75.0, 113.0, 267.0, 107.0, 60.0, 55.0, 32.0, 34.0, 18.0, 7.0, 12.0, 8.0, 6.0, 4.0, 7.0, 5.0, 3.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.12252210080623627, -0.11933580785989761, -0.11614950746297836, -0.11296321451663971, -0.10977691411972046, -0.1065906211733818, -0.10340432822704315, -0.1002180278301239, -0.09703173488378525, -0.0938454419374466, -0.09065914154052734, -0.08747284859418869, -0.08428655564785004, -0.08110025525093079, -0.07791396230459213, -0.07472766935825348, -0.07154136896133423, -0.06835507601499557, -0.06516877561807632, -0.06198248267173767, -0.05879618600010872, -0.05560988932847977, -0.05242359638214111, -0.04923729971051216, -0.04605100303888321, -0.04286470636725426, -0.039678409695625305, -0.03649211674928665, -0.0333058200776577, -0.030119523406028748, -0.026933228597044945, -0.023746933788061142, -0.020560629665851593, -0.01737433299422264, -0.014188038185238838, -0.01100174244493246, -0.007815446704626083, -0.004629150032997131, -0.0014428552240133286, 0.0017434395849704742, 0.004929736256599426, 0.008116031996905804, 0.011302327737212181, 0.014488623477518559, 0.017674919217824936, 0.020861215889453888, 0.02404751069843769, 0.027233805507421494, 0.030420102179050446, 0.0336063988506794, 0.03679269552230835, 0.039978988468647, 0.043165285140275955, 0.04635158181190491, 0.04953787475824356, 0.05272417142987251, 0.055910468101501465, 0.05909676477313042, 0.06228306144475937, 0.06546935439109802, 0.06865565478801727, 0.07184194773435593, 0.07502824068069458, 0.07821454107761383, 0.08140083402395248]}, "gradients/encoder.encoder.layers.6.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 2.0, 1.0, 0.0, 2.0, 4.0, 1.0, 7.0, 8.0, 1.0, 8.0, 8.0, 12.0, 19.0, 23.0, 30.0, 22.0, 37.0, 35.0, 47.0, 55.0, 49.0, 80.0, 76.0, 63.0, 62.0, 57.0, 51.0, 38.0, 42.0, 26.0, 36.0, 22.0, 20.0, 16.0, 17.0, 11.0, 7.0, 7.0, 4.0, 5.0, 4.0, 3.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.05956077575683594, -0.05723828822374344, -0.05491579696536064, -0.052593305706977844, -0.050270818173885345, -0.04794833064079285, -0.04562583938241005, -0.04330334812402725, -0.04098086059093475, -0.038658373057842255, -0.03633588179945946, -0.03401339054107666, -0.03169090300798416, -0.029368413612246513, -0.027045924216508865, -0.024723434820771217, -0.02240094542503357, -0.02007845602929592, -0.017755966633558273, -0.015433477237820625, -0.013110987842082977, -0.01078849844634533, -0.008466009050607681, -0.006143519654870033, -0.0038210302591323853, -0.0014985408633947372, 0.0008239485323429108, 0.0031464379280805588, 0.005468927323818207, 0.007791416719555855, 0.010113906115293503, 0.01243639551103115, 0.014758884906768799, 0.017081374302506447, 0.019403863698244095, 0.021726353093981743, 0.02404884248971939, 0.02637133188545704, 0.028693821281194687, 0.031016310676932335, 0.03333880007266998, 0.03566128760576248, 0.03798377886414528, 0.040306270122528076, 0.042628757655620575, 0.044951245188713074, 0.04727373644709587, 0.04959622770547867, 0.05191871523857117, 0.054241202771663666, 0.05656369403004646, 0.05888618528842926, 0.06120867282152176, 0.06353116035461426, 0.06585365533828735, 0.06817614287137985, 0.07049863040447235, 0.07282111793756485, 0.07514360547065735, 0.07746610045433044, 0.07978858798742294, 0.08211107552051544, 0.08443357050418854, 0.08675605803728104, 0.08907854557037354]}, "gradients/encoder.encoder.layers.5.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 4.0, 2.0, 0.0, 0.0, 0.0, 2.0, 2.0, 5.0, 2.0, 3.0, 7.0, 7.0, 4.0, 5.0, 8.0, 7.0, 11.0, 11.0, 14.0, 25.0, 39.0, 91.0, 120.0, 267.0, 555.0, 1258.0, 3234.0, 12865.0, 369970.0, 3773310.0, 22324.0, 6082.0, 2515.0, 751.0, 346.0, 167.0, 103.0, 55.0, 38.0, 25.0, 23.0, 10.0, 4.0, 11.0, 5.0, 3.0, 2.0, 3.0, 1.0, 0.0, 2.0], "bins": [-0.0252685546875, -0.024669647216796875, -0.02407073974609375, -0.023471832275390625, -0.0228729248046875, -0.022274017333984375, -0.02167510986328125, -0.021076202392578125, -0.020477294921875, -0.019878387451171875, -0.01927947998046875, -0.018680572509765625, -0.0180816650390625, -0.017482757568359375, -0.01688385009765625, -0.016284942626953125, -0.01568603515625, -0.015087127685546875, -0.01448822021484375, -0.013889312744140625, -0.0132904052734375, -0.012691497802734375, -0.01209259033203125, -0.011493682861328125, -0.010894775390625, -0.010295867919921875, -0.00969696044921875, -0.009098052978515625, -0.0084991455078125, -0.007900238037109375, -0.00730133056640625, -0.006702423095703125, -0.006103515625, -0.005504608154296875, -0.00490570068359375, -0.004306793212890625, -0.0037078857421875, -0.003108978271484375, -0.00251007080078125, -0.001911163330078125, -0.001312255859375, -0.000713348388671875, -0.00011444091796875, 0.000484466552734375, 0.0010833740234375, 0.001682281494140625, 0.00228118896484375, 0.002880096435546875, 0.00347900390625, 0.004077911376953125, 0.00467681884765625, 0.005275726318359375, 0.0058746337890625, 0.006473541259765625, 0.00707244873046875, 0.007671356201171875, 0.008270263671875, 0.008869171142578125, 0.00946807861328125, 0.010066986083984375, 0.0106658935546875, 0.011264801025390625, 0.01186370849609375, 0.012462615966796875, 0.0130615234375]}, "gradients/encoder.encoder.layers.5.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 4.0, 1.0, 7.0, 4.0, 12.0, 23.0, 27.0, 40.0, 66.0, 105.0, 142.0, 132.0, 126.0, 122.0, 62.0, 51.0, 42.0, 18.0, 17.0, 7.0, 2.0, 2.0, 3.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.01104736328125, -0.010781943798065186, -0.010516524314880371, -0.010251104831695557, -0.009985685348510742, -0.009720265865325928, -0.009454846382141113, -0.009189426898956299, -0.008924007415771484, -0.00865858793258667, -0.008393168449401855, -0.008127748966217041, -0.007862329483032227, -0.007596909999847412, -0.007331490516662598, -0.007066071033477783, -0.006800651550292969, -0.006535232067108154, -0.00626981258392334, -0.006004393100738525, -0.005738973617553711, -0.0054735541343688965, -0.005208134651184082, -0.004942715167999268, -0.004677295684814453, -0.004411876201629639, -0.004146456718444824, -0.0038810372352600098, -0.0036156177520751953, -0.003350198268890381, -0.0030847787857055664, -0.002819359302520752, -0.0025539398193359375, -0.002288520336151123, -0.0020231008529663086, -0.0017576813697814941, -0.0014922618865966797, -0.0012268424034118652, -0.0009614229202270508, -0.0006960034370422363, -0.0004305839538574219, -0.00016516447067260742, 0.00010025501251220703, 0.0003656744956970215, 0.0006310939788818359, 0.0008965134620666504, 0.0011619329452514648, 0.0014273524284362793, 0.0016927719116210938, 0.001958191394805908, 0.0022236108779907227, 0.002489030361175537, 0.0027544498443603516, 0.003019869327545166, 0.0032852888107299805, 0.003550708293914795, 0.0038161277770996094, 0.004081547260284424, 0.004346966743469238, 0.004612386226654053, 0.004877805709838867, 0.005143225193023682, 0.005408644676208496, 0.0056740641593933105, 0.005939483642578125]}, "gradients/encoder.encoder.layers.5.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 5.0, 6.0, 5.0, 7.0, 7.0, 20.0, 22.0, 27.0, 48.0, 64.0, 82.0, 162.0, 291.0, 738.0, 2027.0, 8410.0, 59336.0, 4032553.0, 76474.0, 10107.0, 2475.0, 817.0, 307.0, 128.0, 64.0, 46.0, 14.0, 18.0, 14.0, 7.0, 5.0, 5.0, 1.0, 3.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.021820068359375, -0.021027803421020508, -0.020235538482666016, -0.019443273544311523, -0.01865100860595703, -0.01785874366760254, -0.017066478729248047, -0.016274213790893555, -0.015481948852539062, -0.01468968391418457, -0.013897418975830078, -0.013105154037475586, -0.012312889099121094, -0.011520624160766602, -0.01072835922241211, -0.009936094284057617, -0.009143829345703125, -0.008351564407348633, -0.007559299468994141, -0.0067670345306396484, -0.005974769592285156, -0.005182504653930664, -0.004390239715576172, -0.0035979747772216797, -0.0028057098388671875, -0.0020134449005126953, -0.0012211799621582031, -0.00042891502380371094, 0.00036334991455078125, 0.0011556148529052734, 0.0019478797912597656, 0.002740144729614258, 0.00353240966796875, 0.004324674606323242, 0.005116939544677734, 0.0059092044830322266, 0.006701469421386719, 0.007493734359741211, 0.008285999298095703, 0.009078264236450195, 0.009870529174804688, 0.01066279411315918, 0.011455059051513672, 0.012247323989868164, 0.013039588928222656, 0.013831853866577148, 0.01462411880493164, 0.015416383743286133, 0.016208648681640625, 0.017000913619995117, 0.01779317855834961, 0.0185854434967041, 0.019377708435058594, 0.020169973373413086, 0.020962238311767578, 0.02175450325012207, 0.022546768188476562, 0.023339033126831055, 0.024131298065185547, 0.02492356300354004, 0.02571582794189453, 0.026508092880249023, 0.027300357818603516, 0.028092622756958008, 0.0288848876953125]}, "gradients/encoder.encoder.layers.5.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 1.0, 0.0, 1.0, 3.0, 4.0, 6.0, 5.0, 13.0, 23.0, 33.0, 29.0, 60.0, 189.0, 1054.0, 2073.0, 350.0, 80.0, 51.0, 33.0, 22.0, 14.0, 12.0, 10.0, 4.0, 4.0, 4.0, 4.0, 2.0, 2.0, 2.0, 0.0, 1.0], "bins": [-0.02191162109375, -0.021437406539916992, -0.020963191986083984, -0.020488977432250977, -0.02001476287841797, -0.01954054832458496, -0.019066333770751953, -0.018592119216918945, -0.018117904663085938, -0.01764369010925293, -0.017169475555419922, -0.016695261001586914, -0.016221046447753906, -0.0157468318939209, -0.01527261734008789, -0.014798402786254883, -0.014324188232421875, -0.013849973678588867, -0.01337575912475586, -0.012901544570922852, -0.012427330017089844, -0.011953115463256836, -0.011478900909423828, -0.01100468635559082, -0.010530471801757812, -0.010056257247924805, -0.009582042694091797, -0.009107828140258789, -0.008633613586425781, -0.008159399032592773, -0.007685184478759766, -0.007210969924926758, -0.00673675537109375, -0.006262540817260742, -0.005788326263427734, -0.0053141117095947266, -0.004839897155761719, -0.004365682601928711, -0.003891468048095703, -0.0034172534942626953, -0.0029430389404296875, -0.0024688243865966797, -0.001994609832763672, -0.001520395278930664, -0.0010461807250976562, -0.0005719661712646484, -9.775161743164062e-05, 0.0003764629364013672, 0.000850677490234375, 0.0013248920440673828, 0.0017991065979003906, 0.0022733211517333984, 0.0027475357055664062, 0.003221750259399414, 0.003695964813232422, 0.00417017936706543, 0.0046443939208984375, 0.005118608474731445, 0.005592823028564453, 0.006067037582397461, 0.006541252136230469, 0.0070154666900634766, 0.007489681243896484, 0.007963895797729492, 0.0084381103515625]}, "gradients/encoder.encoder.layers.5.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 2.0, 2.0, 6.0, 4.0, 10.0, 13.0, 21.0, 37.0, 60.0, 82.0, 212.0, 257.0, 110.0, 63.0, 36.0, 27.0, 21.0, 15.0, 14.0, 5.0, 6.0, 5.0, 3.0, 1.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.06460139900445938, -0.06191864609718323, -0.059235893189907074, -0.05655314028263092, -0.05387038737535477, -0.05118763446807861, -0.04850488156080246, -0.045822128653526306, -0.04313937574625015, -0.040456622838974, -0.037773869931697845, -0.03509111702442169, -0.03240836411714554, -0.029725611209869385, -0.027042856439948082, -0.02436010353267193, -0.021677348762750626, -0.018994595855474472, -0.01631184294819832, -0.01362908910959959, -0.010946336202323437, -0.008263583295047283, -0.005580829456448555, -0.0028980765491724014, -0.00021532364189624786, 0.0024674294982105494, 0.005150182638317347, 0.007832936011254787, 0.010515688918530941, 0.013198441825807095, 0.015881195664405823, 0.018563948571681976, 0.02124670147895813, 0.023929454386234283, 0.026612207293510437, 0.02929496020078659, 0.031977713108062744, 0.0346604660153389, 0.03734321892261505, 0.040025971829891205, 0.04270872473716736, 0.04539147764444351, 0.048074230551719666, 0.05075698345899582, 0.05343973636627197, 0.056122489273548126, 0.05880524218082428, 0.06148799508810043, 0.06417074799537659, 0.06685350090265274, 0.0695362538099289, 0.07221900671720505, 0.0749017596244812, 0.07758451253175735, 0.08026726543903351, 0.08295001834630966, 0.08563277870416641, 0.08831553161144257, 0.09099828451871872, 0.09368103742599487, 0.09636379033327103, 0.09904654324054718, 0.10172929614782333, 0.10441204905509949, 0.10709480196237564]}, "gradients/encoder.encoder.layers.5.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 2.0, 1.0, 2.0, 2.0, 6.0, 5.0, 7.0, 4.0, 14.0, 10.0, 13.0, 17.0, 21.0, 39.0, 38.0, 39.0, 45.0, 55.0, 52.0, 40.0, 50.0, 37.0, 62.0, 58.0, 55.0, 56.0, 43.0, 38.0, 32.0, 28.0, 30.0, 23.0, 29.0, 16.0, 9.0, 10.0, 10.0, 6.0, 5.0, 2.0, 2.0, 5.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.038012802600860596, -0.03679495304822922, -0.03557710349559784, -0.03435925394296646, -0.03314140439033508, -0.031923554837703705, -0.030705705285072327, -0.02948785573244095, -0.02827000617980957, -0.027052156627178192, -0.025834307074546814, -0.024616457521915436, -0.023398607969284058, -0.02218075841665268, -0.0209629088640213, -0.019745059311389923, -0.018527209758758545, -0.017309360206127167, -0.01609151065349579, -0.01487366110086441, -0.013655811548233032, -0.012437961995601654, -0.011220112442970276, -0.010002262890338898, -0.00878441333770752, -0.007566563785076141, -0.006348714232444763, -0.005130864679813385, -0.003913015127182007, -0.0026951655745506287, -0.0014773160219192505, -0.0002594664692878723, 0.0009583830833435059, 0.002176232635974884, 0.003394082188606262, 0.00461193174123764, 0.0058297812938690186, 0.007047630846500397, 0.008265480399131775, 0.009483329951763153, 0.010701179504394531, 0.01191902905702591, 0.013136878609657288, 0.014354728162288666, 0.015572577714920044, 0.016790427267551422, 0.0180082768201828, 0.01922612637281418, 0.020443975925445557, 0.021661825478076935, 0.022879675030708313, 0.02409752458333969, 0.02531537413597107, 0.026533223688602448, 0.027751073241233826, 0.028968922793865204, 0.030186772346496582, 0.03140462189912796, 0.03262247145175934, 0.03384032100439072, 0.035058170557022095, 0.03627602010965347, 0.03749386966228485, 0.03871171921491623, 0.03992956876754761]}, "gradients/encoder.encoder.layers.5.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 3.0, 2.0, 3.0, 4.0, 8.0, 8.0, 10.0, 20.0, 14.0, 26.0, 36.0, 45.0, 64.0, 88.0, 151.0, 218.0, 439.0, 964.0, 2752.0, 9304.0, 40208.0, 325420.0, 595322.0, 55645.0, 11942.0, 3389.0, 1221.0, 525.0, 273.0, 146.0, 86.0, 64.0, 39.0, 25.0, 22.0, 13.0, 14.0, 13.0, 5.0, 9.0, 6.0, 5.0, 5.0, 3.0, 5.0, 1.0, 3.0, 0.0, 0.0, 1.0, 1.0, 2.0], "bins": [-0.034515380859375, -0.033503055572509766, -0.03249073028564453, -0.0314784049987793, -0.030466079711914062, -0.029453754425048828, -0.028441429138183594, -0.02742910385131836, -0.026416778564453125, -0.02540445327758789, -0.024392127990722656, -0.023379802703857422, -0.022367477416992188, -0.021355152130126953, -0.02034282684326172, -0.019330501556396484, -0.01831817626953125, -0.017305850982666016, -0.01629352569580078, -0.015281200408935547, -0.014268875122070312, -0.013256549835205078, -0.012244224548339844, -0.01123189926147461, -0.010219573974609375, -0.00920724868774414, -0.008194923400878906, -0.007182598114013672, -0.0061702728271484375, -0.005157947540283203, -0.004145622253417969, -0.0031332969665527344, -0.0021209716796875, -0.0011086463928222656, -9.632110595703125e-05, 0.0009160041809082031, 0.0019283294677734375, 0.002940654754638672, 0.003952980041503906, 0.004965305328369141, 0.005977630615234375, 0.006989955902099609, 0.008002281188964844, 0.009014606475830078, 0.010026931762695312, 0.011039257049560547, 0.012051582336425781, 0.013063907623291016, 0.01407623291015625, 0.015088558197021484, 0.01610088348388672, 0.017113208770751953, 0.018125534057617188, 0.019137859344482422, 0.020150184631347656, 0.02116250991821289, 0.022174835205078125, 0.02318716049194336, 0.024199485778808594, 0.025211811065673828, 0.026224136352539062, 0.027236461639404297, 0.02824878692626953, 0.029261112213134766, 0.0302734375]}, "gradients/encoder.encoder.layers.5.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 4.0, 1.0, 6.0, 5.0, 12.0, 24.0, 26.0, 41.0, 61.0, 103.0, 142.0, 125.0, 134.0, 119.0, 67.0, 53.0, 41.0, 20.0, 18.0, 6.0, 3.0, 1.0, 2.0, 2.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.0110321044921875, -0.01076728105545044, -0.010502457618713379, -0.010237634181976318, -0.009972810745239258, -0.009707987308502197, -0.009443163871765137, -0.009178340435028076, -0.008913516998291016, -0.008648693561553955, -0.008383870124816895, -0.008119046688079834, -0.007854223251342773, -0.007589399814605713, -0.007324576377868652, -0.007059752941131592, -0.006794929504394531, -0.006530106067657471, -0.00626528263092041, -0.00600045919418335, -0.005735635757446289, -0.0054708123207092285, -0.005205988883972168, -0.004941165447235107, -0.004676342010498047, -0.004411518573760986, -0.004146695137023926, -0.0038818717002868652, -0.0036170482635498047, -0.003352224826812744, -0.0030874013900756836, -0.002822577953338623, -0.0025577545166015625, -0.002292931079864502, -0.0020281076431274414, -0.0017632842063903809, -0.0014984607696533203, -0.0012336373329162598, -0.0009688138961791992, -0.0007039904594421387, -0.0004391670227050781, -0.00017434358596801758, 9.047985076904297e-05, 0.0003553032875061035, 0.0006201267242431641, 0.0008849501609802246, 0.0011497735977172852, 0.0014145970344543457, 0.0016794204711914062, 0.0019442439079284668, 0.0022090673446655273, 0.002473890781402588, 0.0027387142181396484, 0.003003537654876709, 0.0032683610916137695, 0.00353318452835083, 0.0037980079650878906, 0.004062831401824951, 0.004327654838562012, 0.004592478275299072, 0.004857301712036133, 0.005122125148773193, 0.005386948585510254, 0.0056517720222473145, 0.005916595458984375]}, "gradients/encoder.encoder.layers.5.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 6.0, 9.0, 5.0, 3.0, 17.0, 14.0, 16.0, 39.0, 43.0, 67.0, 104.0, 168.0, 302.0, 507.0, 878.0, 1674.0, 3499.0, 6902.0, 14523.0, 32606.0, 84512.0, 549925.0, 237325.0, 65020.0, 26285.0, 12208.0, 5769.0, 2787.0, 1433.0, 806.0, 442.0, 253.0, 143.0, 102.0, 57.0, 48.0, 18.0, 18.0, 6.0, 6.0, 10.0, 2.0, 3.0, 4.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.0167236328125, -0.01618194580078125, -0.0156402587890625, -0.01509857177734375, -0.014556884765625, -0.01401519775390625, -0.0134735107421875, -0.01293182373046875, -0.01239013671875, -0.01184844970703125, -0.0113067626953125, -0.01076507568359375, -0.010223388671875, -0.00968170166015625, -0.0091400146484375, -0.00859832763671875, -0.008056640625, -0.00751495361328125, -0.0069732666015625, -0.00643157958984375, -0.005889892578125, -0.00534820556640625, -0.0048065185546875, -0.00426483154296875, -0.00372314453125, -0.00318145751953125, -0.0026397705078125, -0.00209808349609375, -0.001556396484375, -0.00101470947265625, -0.0004730224609375, 6.866455078125e-05, 0.0006103515625, 0.00115203857421875, 0.0016937255859375, 0.00223541259765625, 0.002777099609375, 0.00331878662109375, 0.0038604736328125, 0.00440216064453125, 0.00494384765625, 0.00548553466796875, 0.0060272216796875, 0.00656890869140625, 0.007110595703125, 0.00765228271484375, 0.0081939697265625, 0.00873565673828125, 0.00927734375, 0.00981903076171875, 0.0103607177734375, 0.01090240478515625, 0.011444091796875, 0.01198577880859375, 0.0125274658203125, 0.01306915283203125, 0.01361083984375, 0.01415252685546875, 0.0146942138671875, 0.01523590087890625, 0.015777587890625, 0.01631927490234375, 0.0168609619140625, 0.01740264892578125, 0.0179443359375]}, "gradients/encoder.encoder.layers.5.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 2.0, 4.0, 3.0, 0.0, 4.0, 1.0, 1.0, 2.0, 4.0, 4.0, 9.0, 8.0, 21.0, 17.0, 19.0, 25.0, 19.0, 24.0, 27.0, 34.0, 32.0, 36.0, 40.0, 45.0, 49.0, 49.0, 49.0, 41.0, 52.0, 39.0, 40.0, 36.0, 44.0, 31.0, 29.0, 21.0, 26.0, 25.0, 23.0, 16.0, 11.0, 9.0, 9.0, 9.0, 6.0, 5.0, 6.0, 5.0, 1.0, 2.0, 4.0, 1.0], "bins": [-0.0157623291015625, -0.015355944633483887, -0.014949560165405273, -0.01454317569732666, -0.014136791229248047, -0.013730406761169434, -0.01332402229309082, -0.012917637825012207, -0.012511253356933594, -0.01210486888885498, -0.011698484420776367, -0.011292099952697754, -0.01088571548461914, -0.010479331016540527, -0.010072946548461914, -0.0096665620803833, -0.009260177612304688, -0.008853793144226074, -0.008447408676147461, -0.008041024208068848, -0.007634639739990234, -0.007228255271911621, -0.006821870803833008, -0.0064154863357543945, -0.006009101867675781, -0.005602717399597168, -0.005196332931518555, -0.004789948463439941, -0.004383563995361328, -0.003977179527282715, -0.0035707950592041016, -0.0031644105911254883, -0.002758026123046875, -0.0023516416549682617, -0.0019452571868896484, -0.0015388727188110352, -0.0011324882507324219, -0.0007261037826538086, -0.0003197193145751953, 8.666515350341797e-05, 0.0004930496215820312, 0.0008994340896606445, 0.0013058185577392578, 0.001712203025817871, 0.0021185874938964844, 0.0025249719619750977, 0.002931356430053711, 0.0033377408981323242, 0.0037441253662109375, 0.004150509834289551, 0.004556894302368164, 0.004963278770446777, 0.005369663238525391, 0.005776047706604004, 0.006182432174682617, 0.0065888166427612305, 0.006995201110839844, 0.007401585578918457, 0.00780797004699707, 0.008214354515075684, 0.008620738983154297, 0.00902712345123291, 0.009433507919311523, 0.009839892387390137, 0.01024627685546875]}, "gradients/encoder.encoder.layers.5.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 3.0, 3.0, 5.0, 7.0, 10.0, 9.0, 10.0, 11.0, 31.0, 36.0, 63.0, 88.0, 108.0, 175.0, 226.0, 344.0, 563.0, 830.0, 1416.0, 2616.0, 5973.0, 17593.0, 139491.0, 831511.0, 30559.0, 8466.0, 3619.0, 1823.0, 1027.0, 667.0, 407.0, 272.0, 180.0, 121.0, 89.0, 51.0, 38.0, 36.0, 21.0, 20.0, 12.0, 9.0, 9.0, 9.0, 1.0, 4.0, 1.0, 4.0, 1.0, 1.0, 1.0, 2.0], "bins": [-0.0002884864807128906, -0.0002802889794111252, -0.00027209147810935974, -0.0002638939768075943, -0.00025569647550582886, -0.0002474989742040634, -0.00023930147290229797, -0.00023110397160053253, -0.0002229064702987671, -0.00021470896899700165, -0.0002065114676952362, -0.00019831396639347076, -0.00019011646509170532, -0.00018191896378993988, -0.00017372146248817444, -0.000165523961186409, -0.00015732645988464355, -0.0001491289585828781, -0.00014093145728111267, -0.00013273395597934723, -0.0001245364546775818, -0.00011633895337581635, -0.0001081414520740509, -9.994395077228546e-05, -9.174644947052002e-05, -8.354894816875458e-05, -7.535144686698914e-05, -6.71539455652237e-05, -5.895644426345825e-05, -5.075894296169281e-05, -4.256144165992737e-05, -3.4363940358161926e-05, -2.6166439056396484e-05, -1.7968937754631042e-05, -9.7714364528656e-06, -1.5739351511001587e-06, 6.623566150665283e-06, 1.4821067452430725e-05, 2.3018568754196167e-05, 3.121607005596161e-05, 3.941357135772705e-05, 4.761107265949249e-05, 5.5808573961257935e-05, 6.400607526302338e-05, 7.220357656478882e-05, 8.040107786655426e-05, 8.85985791683197e-05, 9.679608047008514e-05, 0.00010499358177185059, 0.00011319108307361603, 0.00012138858437538147, 0.0001295860856771469, 0.00013778358697891235, 0.0001459810882806778, 0.00015417858958244324, 0.00016237609088420868, 0.00017057359218597412, 0.00017877109348773956, 0.000186968594789505, 0.00019516609609127045, 0.0002033635973930359, 0.00021156109869480133, 0.00021975859999656677, 0.00022795610129833221, 0.00023615360260009766]}, "gradients/encoder.encoder.layers.5.attention.k_proj.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 1.0, 0.0, 1.0, 4.0, 3.0, 1.0, 6.0, 4.0, 1.0, 3.0, 8.0, 4.0, 6.0, 21.0, 14.0, 16.0, 27.0, 41.0, 41.0, 77.0, 52.0, 65.0, 100.0, 105.0, 85.0, 69.0, 47.0, 53.0, 33.0, 28.0, 21.0, 19.0, 16.0, 8.0, 5.0, 12.0, 5.0, 3.0, 2.0, 3.0, 3.0, 4.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-9.775161743164062e-06, -9.39890742301941e-06, -9.022653102874756e-06, -8.646398782730103e-06, -8.27014446258545e-06, -7.893890142440796e-06, -7.517635822296143e-06, -7.141381502151489e-06, -6.765127182006836e-06, -6.388872861862183e-06, -6.012618541717529e-06, -5.636364221572876e-06, -5.260109901428223e-06, -4.883855581283569e-06, -4.507601261138916e-06, -4.131346940994263e-06, -3.7550926208496094e-06, -3.378838300704956e-06, -3.0025839805603027e-06, -2.6263296604156494e-06, -2.250075340270996e-06, -1.8738210201263428e-06, -1.4975666999816895e-06, -1.1213123798370361e-06, -7.450580596923828e-07, -3.688037395477295e-07, 7.450580596923828e-09, 3.8370490074157715e-07, 7.599592208862305e-07, 1.1362135410308838e-06, 1.5124678611755371e-06, 1.8887221813201904e-06, 2.2649765014648438e-06, 2.641230821609497e-06, 3.0174851417541504e-06, 3.3937394618988037e-06, 3.769993782043457e-06, 4.14624810218811e-06, 4.522502422332764e-06, 4.898756742477417e-06, 5.27501106262207e-06, 5.651265382766724e-06, 6.027519702911377e-06, 6.40377402305603e-06, 6.780028343200684e-06, 7.156282663345337e-06, 7.53253698348999e-06, 7.908791303634644e-06, 8.285045623779297e-06, 8.66129994392395e-06, 9.037554264068604e-06, 9.413808584213257e-06, 9.79006290435791e-06, 1.0166317224502563e-05, 1.0542571544647217e-05, 1.091882586479187e-05, 1.1295080184936523e-05, 1.1671334505081177e-05, 1.204758882522583e-05, 1.2423843145370483e-05, 1.2800097465515137e-05, 1.317635178565979e-05, 1.3552606105804443e-05, 1.3928860425949097e-05, 1.430511474609375e-05]}, "gradients/encoder.encoder.layers.5.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 6.0, 0.0, 5.0, 0.0, 2.0, 5.0, 6.0, 9.0, 21.0, 24.0, 41.0, 58.0, 114.0, 173.0, 269.0, 350.0, 514.0, 843.0, 1733.0, 3907.0, 12768.0, 140424.0, 856317.0, 20666.0, 5323.0, 2085.0, 1089.0, 608.0, 405.0, 244.0, 184.0, 136.0, 90.0, 52.0, 31.0, 20.0, 17.0, 9.0, 5.0, 2.0, 2.0, 3.0, 5.0, 1.0, 2.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0004756450653076172, -0.0004603751003742218, -0.0004451051354408264, -0.00042983517050743103, -0.00041456520557403564, -0.00039929524064064026, -0.0003840252757072449, -0.0003687553107738495, -0.0003534853458404541, -0.0003382153809070587, -0.00032294541597366333, -0.00030767545104026794, -0.00029240548610687256, -0.00027713552117347717, -0.0002618655562400818, -0.0002465955913066864, -0.00023132562637329102, -0.00021605566143989563, -0.00020078569650650024, -0.00018551573157310486, -0.00017024576663970947, -0.0001549758017063141, -0.0001397058367729187, -0.00012443587183952332, -0.00010916590690612793, -9.389594197273254e-05, -7.862597703933716e-05, -6.335601210594177e-05, -4.808604717254639e-05, -3.2816082239151e-05, -1.7546117305755615e-05, -2.2761523723602295e-06, 1.2993812561035156e-05, 2.8263777494430542e-05, 4.353374242782593e-05, 5.8803707361221313e-05, 7.40736722946167e-05, 8.934363722801208e-05, 0.00010461360216140747, 0.00011988356709480286, 0.00013515353202819824, 0.00015042349696159363, 0.00016569346189498901, 0.0001809634268283844, 0.00019623339176177979, 0.00021150335669517517, 0.00022677332162857056, 0.00024204328656196594, 0.00025731325149536133, 0.0002725832164287567, 0.0002878531813621521, 0.0003031231462955475, 0.00031839311122894287, 0.00033366307616233826, 0.00034893304109573364, 0.00036420300602912903, 0.0003794729709625244, 0.0003947429358959198, 0.0004100129008293152, 0.00042528286576271057, 0.00044055283069610596, 0.00045582279562950134, 0.00047109276056289673, 0.0004863627254962921, 0.0005016326904296875]}, "gradients/encoder.encoder.layers.5.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 2.0, 4.0, 1.0, 6.0, 2.0, 10.0, 12.0, 23.0, 27.0, 58.0, 177.0, 320.0, 202.0, 67.0, 30.0, 15.0, 18.0, 10.0, 5.0, 4.0, 5.0, 5.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 4.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00044536590576171875, -0.0004324689507484436, -0.00041957199573516846, -0.0004066750407218933, -0.00039377808570861816, -0.000380881130695343, -0.00036798417568206787, -0.0003550872206687927, -0.0003421902656555176, -0.00032929331064224243, -0.0003163963556289673, -0.00030349940061569214, -0.000290602445602417, -0.00027770549058914185, -0.0002648085355758667, -0.00025191158056259155, -0.0002390146255493164, -0.00022611767053604126, -0.0002132207155227661, -0.00020032376050949097, -0.00018742680549621582, -0.00017452985048294067, -0.00016163289546966553, -0.00014873594045639038, -0.00013583898544311523, -0.0001229420304298401, -0.00011004507541656494, -9.71481204032898e-05, -8.425116539001465e-05, -7.13542103767395e-05, -5.8457255363464355e-05, -4.556030035018921e-05, -3.266334533691406e-05, -1.9766390323638916e-05, -6.8694353103637695e-06, 6.027519702911377e-06, 1.8924474716186523e-05, 3.182142972946167e-05, 4.4718384742736816e-05, 5.761533975601196e-05, 7.051229476928711e-05, 8.340924978256226e-05, 9.63062047958374e-05, 0.00010920315980911255, 0.0001221001148223877, 0.00013499706983566284, 0.000147894024848938, 0.00016079097986221313, 0.00017368793487548828, 0.00018658488988876343, 0.00019948184490203857, 0.00021237879991531372, 0.00022527575492858887, 0.00023817270994186401, 0.00025106966495513916, 0.0002639666199684143, 0.00027686357498168945, 0.0002897605299949646, 0.00030265748500823975, 0.0003155544400215149, 0.00032845139503479004, 0.0003413483500480652, 0.00035424530506134033, 0.0003671422600746155, 0.0003800392150878906]}, "gradients/encoder.encoder.layers.5.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 3.0, 2.0, 4.0, 1.0, 4.0, 3.0, 9.0, 10.0, 6.0, 12.0, 10.0, 15.0, 23.0, 32.0, 42.0, 58.0, 125.0, 287.0, 110.0, 67.0, 47.0, 28.0, 27.0, 17.0, 19.0, 10.0, 6.0, 8.0, 7.0, 9.0, 4.0, 2.0, 2.0, 1.0, 1.0, 3.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.10940819978713989, -0.10623476654291153, -0.10306132584810257, -0.0998878926038742, -0.09671445190906525, -0.09354101866483688, -0.09036758542060852, -0.08719414472579956, -0.0840207040309906, -0.08084727078676224, -0.07767383009195328, -0.07450039684772491, -0.07132695615291595, -0.06815352290868759, -0.06498008966445923, -0.06180664896965027, -0.058633215725421906, -0.055459778755903244, -0.05228634178638458, -0.04911290854215622, -0.04593946784734726, -0.042766034603118896, -0.039592597633600235, -0.036419160664081573, -0.03324572369456291, -0.03007228672504425, -0.02689884975552559, -0.023725414648652077, -0.020551977679133415, -0.017378540709614754, -0.014205105602741241, -0.01103166863322258, -0.007858231663703918, -0.004684795159846544, -0.00151135865598917, 0.0016620773822069168, 0.004835514351725578, 0.00800895132124424, 0.011182386428117752, 0.014355823397636414, 0.017529260367155075, 0.020702697336673737, 0.023876134306192398, 0.02704956941306591, 0.030223006382584572, 0.03339644521474838, 0.036569878458976746, 0.03974331542849541, 0.04291675239801407, 0.04609018936753273, 0.04926362633705139, 0.052437059581279755, 0.055610500276088715, 0.05878393352031708, 0.06195737048983574, 0.0651308074593544, 0.06830424070358276, 0.07147767394781113, 0.07465111464262009, 0.07782454788684845, 0.08099798858165741, 0.08417142182588577, 0.08734485507011414, 0.0905182957649231, 0.09369173645973206]}, "gradients/encoder.encoder.layers.5.layer_norm.bias": {"_type": "histogram", "values": [3.0, 1.0, 0.0, 1.0, 0.0, 3.0, 5.0, 2.0, 5.0, 5.0, 2.0, 7.0, 5.0, 9.0, 8.0, 11.0, 12.0, 23.0, 13.0, 13.0, 27.0, 22.0, 23.0, 20.0, 30.0, 44.0, 40.0, 29.0, 36.0, 47.0, 57.0, 71.0, 53.0, 42.0, 41.0, 29.0, 27.0, 30.0, 20.0, 24.0, 16.0, 21.0, 23.0, 20.0, 12.0, 16.0, 9.0, 9.0, 12.0, 15.0, 5.0, 3.0, 3.0, 4.0, 1.0, 4.0, 2.0, 2.0, 3.0, 2.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.04677075147628784, -0.045248404145240784, -0.043726056814193726, -0.042203713208436966, -0.04068136587738991, -0.03915901854634285, -0.03763667494058609, -0.03611432760953903, -0.034591980278491974, -0.033069632947444916, -0.03154728561639786, -0.030024942010641098, -0.02850259467959404, -0.026980247348546982, -0.025457901880145073, -0.023935556411743164, -0.022413209080696106, -0.020890861749649048, -0.01936851628124714, -0.01784617081284523, -0.016323823481798172, -0.014801477082073689, -0.013279130682349205, -0.011756784282624722, -0.010234437882900238, -0.008712091483175755, -0.007189745083451271, -0.005667398683726788, -0.004145052284002304, -0.0026227058842778206, -0.001100359484553337, 0.0004219869151711464, 0.0019443333148956299, 0.0034666797146201134, 0.004989026114344597, 0.00651137251406908, 0.008033718913793564, 0.009556065313518047, 0.01107841171324253, 0.012600758112967014, 0.014123104512691498, 0.015645451843738556, 0.017167797312140465, 0.018690142780542374, 0.020212490111589432, 0.02173483744263649, 0.0232571829110384, 0.024779528379440308, 0.026301875710487366, 0.027824223041534424, 0.029346568509936333, 0.03086891397833824, 0.0323912613093853, 0.03391360864043236, 0.03543595224618912, 0.036958299577236176, 0.038480646908283234, 0.04000299423933029, 0.04152534157037735, 0.04304768517613411, 0.04457003250718117, 0.046092379838228226, 0.047614723443984985, 0.04913707077503204, 0.0506594181060791]}, "gradients/encoder.encoder.layers.4.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0, 1.0, 2.0, 1.0, 3.0, 2.0, 1.0, 2.0, 3.0, 4.0, 4.0, 3.0, 13.0, 17.0, 16.0, 22.0, 39.0, 81.0, 142.0, 382.0, 1017.0, 3465.0, 20911.0, 4143631.0, 19287.0, 3527.0, 997.0, 365.0, 158.0, 80.0, 51.0, 22.0, 18.0, 10.0, 6.0, 5.0, 4.0, 4.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.048797607421875, -0.04762125015258789, -0.04644489288330078, -0.04526853561401367, -0.04409217834472656, -0.04291582107543945, -0.041739463806152344, -0.040563106536865234, -0.039386749267578125, -0.038210391998291016, -0.037034034729003906, -0.0358576774597168, -0.03468132019042969, -0.03350496292114258, -0.03232860565185547, -0.03115224838256836, -0.02997589111328125, -0.02879953384399414, -0.02762317657470703, -0.026446819305419922, -0.025270462036132812, -0.024094104766845703, -0.022917747497558594, -0.021741390228271484, -0.020565032958984375, -0.019388675689697266, -0.018212318420410156, -0.017035961151123047, -0.015859603881835938, -0.014683246612548828, -0.013506889343261719, -0.01233053207397461, -0.0111541748046875, -0.00997781753540039, -0.008801460266113281, -0.007625102996826172, -0.0064487457275390625, -0.005272388458251953, -0.004096031188964844, -0.0029196739196777344, -0.001743316650390625, -0.0005669593811035156, 0.0006093978881835938, 0.0017857551574707031, 0.0029621124267578125, 0.004138469696044922, 0.005314826965332031, 0.006491184234619141, 0.00766754150390625, 0.00884389877319336, 0.010020256042480469, 0.011196613311767578, 0.012372970581054688, 0.013549327850341797, 0.014725685119628906, 0.015902042388916016, 0.017078399658203125, 0.018254756927490234, 0.019431114196777344, 0.020607471466064453, 0.021783828735351562, 0.022960186004638672, 0.02413654327392578, 0.02531290054321289, 0.0264892578125]}, "gradients/encoder.encoder.layers.4.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 5.0, 2.0, 4.0, 5.0, 17.0, 24.0, 30.0, 38.0, 71.0, 114.0, 147.0, 137.0, 131.0, 101.0, 61.0, 51.0, 34.0, 20.0, 15.0, 2.0, 3.0, 2.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.01084136962890625, -0.010579526424407959, -0.010317683219909668, -0.010055840015411377, -0.009793996810913086, -0.009532153606414795, -0.009270310401916504, -0.009008467197418213, -0.008746623992919922, -0.00848478078842163, -0.00822293758392334, -0.007961094379425049, -0.007699251174926758, -0.007437407970428467, -0.007175564765930176, -0.006913721561431885, -0.006651878356933594, -0.006390035152435303, -0.006128191947937012, -0.005866348743438721, -0.00560450553894043, -0.005342662334442139, -0.005080819129943848, -0.004818975925445557, -0.004557132720947266, -0.004295289516448975, -0.004033446311950684, -0.0037716031074523926, -0.0035097599029541016, -0.0032479166984558105, -0.0029860734939575195, -0.0027242302894592285, -0.0024623870849609375, -0.0022005438804626465, -0.0019387006759643555, -0.0016768574714660645, -0.0014150142669677734, -0.0011531710624694824, -0.0008913278579711914, -0.0006294846534729004, -0.0003676414489746094, -0.00010579824447631836, 0.00015604496002197266, 0.00041788816452026367, 0.0006797313690185547, 0.0009415745735168457, 0.0012034177780151367, 0.0014652609825134277, 0.0017271041870117188, 0.0019889473915100098, 0.0022507905960083008, 0.002512633800506592, 0.002774477005004883, 0.003036320209503174, 0.003298163414001465, 0.003560006618499756, 0.003821849822998047, 0.004083693027496338, 0.004345536231994629, 0.00460737943649292, 0.004869222640991211, 0.005131065845489502, 0.005392909049987793, 0.005654752254486084, 0.005916595458984375]}, "gradients/encoder.encoder.layers.4.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 2.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 4.0, 2.0, 7.0, 8.0, 8.0, 10.0, 15.0, 28.0, 31.0, 58.0, 70.0, 134.0, 194.0, 270.0, 437.0, 689.0, 1247.0, 2000.0, 3978.0, 8546.0, 22610.0, 192629.0, 3899395.0, 38024.0, 11618.0, 5213.0, 2736.0, 1570.0, 958.0, 570.0, 401.0, 274.0, 170.0, 124.0, 69.0, 64.0, 38.0, 30.0, 22.0, 21.0, 1.0, 6.0, 5.0, 5.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.01296234130859375, -0.012546777725219727, -0.012131214141845703, -0.01171565055847168, -0.011300086975097656, -0.010884523391723633, -0.01046895980834961, -0.010053396224975586, -0.009637832641601562, -0.009222269058227539, -0.008806705474853516, -0.008391141891479492, -0.007975578308105469, -0.007560014724731445, -0.007144451141357422, -0.0067288875579833984, -0.006313323974609375, -0.0058977603912353516, -0.005482196807861328, -0.005066633224487305, -0.004651069641113281, -0.004235506057739258, -0.0038199424743652344, -0.003404378890991211, -0.0029888153076171875, -0.002573251724243164, -0.0021576881408691406, -0.0017421245574951172, -0.0013265609741210938, -0.0009109973907470703, -0.0004954338073730469, -7.987022399902344e-05, 0.000335693359375, 0.0007512569427490234, 0.0011668205261230469, 0.0015823841094970703, 0.0019979476928710938, 0.002413511276245117, 0.0028290748596191406, 0.003244638442993164, 0.0036602020263671875, 0.004075765609741211, 0.004491329193115234, 0.004906892776489258, 0.005322456359863281, 0.005738019943237305, 0.006153583526611328, 0.0065691471099853516, 0.006984710693359375, 0.0074002742767333984, 0.007815837860107422, 0.008231401443481445, 0.008646965026855469, 0.009062528610229492, 0.009478092193603516, 0.009893655776977539, 0.010309219360351562, 0.010724782943725586, 0.01114034652709961, 0.011555910110473633, 0.011971473693847656, 0.01238703727722168, 0.012802600860595703, 0.013218164443969727, 0.01363372802734375]}, "gradients/encoder.encoder.layers.4.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 2.0, 3.0, 1.0, 4.0, 7.0, 5.0, 8.0, 7.0, 4.0, 17.0, 15.0, 22.0, 33.0, 78.0, 213.0, 3073.0, 347.0, 116.0, 37.0, 19.0, 13.0, 7.0, 8.0, 10.0, 6.0, 3.0, 4.0, 4.0, 3.0, 2.0, 0.0, 3.0, 2.0, 1.0, 2.0, 1.0, 1.0, 2.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 3.0], "bins": [-0.0108642578125, -0.010541200637817383, -0.010218143463134766, -0.009895086288452148, -0.009572029113769531, -0.009248971939086914, -0.008925914764404297, -0.00860285758972168, -0.008279800415039062, -0.007956743240356445, -0.007633686065673828, -0.007310628890991211, -0.006987571716308594, -0.0066645145416259766, -0.006341457366943359, -0.006018400192260742, -0.005695343017578125, -0.005372285842895508, -0.005049228668212891, -0.0047261714935302734, -0.004403114318847656, -0.004080057144165039, -0.003756999969482422, -0.0034339427947998047, -0.0031108856201171875, -0.0027878284454345703, -0.002464771270751953, -0.002141714096069336, -0.0018186569213867188, -0.0014955997467041016, -0.0011725425720214844, -0.0008494853973388672, -0.00052642822265625, -0.0002033710479736328, 0.00011968612670898438, 0.00044274330139160156, 0.0007658004760742188, 0.001088857650756836, 0.0014119148254394531, 0.0017349720001220703, 0.0020580291748046875, 0.0023810863494873047, 0.002704143524169922, 0.003027200698852539, 0.0033502578735351562, 0.0036733150482177734, 0.003996372222900391, 0.004319429397583008, 0.004642486572265625, 0.004965543746948242, 0.005288600921630859, 0.0056116580963134766, 0.005934715270996094, 0.006257772445678711, 0.006580829620361328, 0.006903886795043945, 0.0072269439697265625, 0.00755000114440918, 0.007873058319091797, 0.008196115493774414, 0.008519172668457031, 0.008842229843139648, 0.009165287017822266, 0.009488344192504883, 0.0098114013671875]}, "gradients/encoder.encoder.layers.4.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 1.0, 0.0, 4.0, 2.0, 5.0, 1.0, 8.0, 6.0, 9.0, 7.0, 13.0, 19.0, 23.0, 40.0, 50.0, 98.0, 222.0, 199.0, 98.0, 66.0, 40.0, 29.0, 14.0, 12.0, 11.0, 10.0, 8.0, 7.0, 1.0, 6.0, 3.0, 1.0, 0.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.04882966727018356, -0.0472487136721611, -0.04566775634884834, -0.04408680275082588, -0.04250584542751312, -0.04092489182949066, -0.0393439382314682, -0.03776298090815544, -0.03618202358484268, -0.03460106998682022, -0.03302011266350746, -0.031439159065485, -0.02985820174217224, -0.02827724814414978, -0.02669629268348217, -0.02511533722281456, -0.0235343836247921, -0.02195342816412449, -0.02037247270345688, -0.018791519105434418, -0.01721056178212166, -0.015629608184099197, -0.014048652723431587, -0.012467697262763977, -0.010886741802096367, -0.009305786341428757, -0.007724831346422434, -0.006143876351416111, -0.004562920890748501, -0.0029819654300808907, -0.001401010900735855, 0.00017994455993175507, 0.0017608962953090668, 0.0033418515231460333, 0.004922806750983, 0.006503761745989323, 0.008084717206656933, 0.009665672667324543, 0.011246627196669579, 0.012827582657337189, 0.014408538118004799, 0.01598949357867241, 0.01757044903934002, 0.01915140450000763, 0.02073235809803009, 0.02231331542134285, 0.02389426901936531, 0.02547522448003292, 0.02705617994070053, 0.02863713540136814, 0.03021809086203575, 0.03179904446005821, 0.03338000178337097, 0.03496095538139343, 0.036541908979415894, 0.03812286630272865, 0.03970382362604141, 0.04128477722406387, 0.04286573454737663, 0.044446688145399094, 0.04602764546871185, 0.047608599066734314, 0.049189552664756775, 0.050770509988069534, 0.052351463586091995]}, "gradients/encoder.encoder.layers.4.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 3.0, 1.0, 3.0, 3.0, 2.0, 0.0, 3.0, 3.0, 6.0, 8.0, 8.0, 10.0, 14.0, 13.0, 16.0, 12.0, 21.0, 30.0, 28.0, 23.0, 31.0, 31.0, 31.0, 43.0, 40.0, 50.0, 40.0, 57.0, 46.0, 44.0, 40.0, 38.0, 47.0, 38.0, 37.0, 27.0, 22.0, 23.0, 16.0, 18.0, 18.0, 18.0, 12.0, 5.0, 6.0, 10.0, 5.0, 4.0, 5.0, 3.0, 0.0, 4.0, 1.0, 3.0, 0.0, 0.0, 1.0], "bins": [-0.025822997093200684, -0.02509630098938942, -0.024369603022933006, -0.023642905056476593, -0.02291620895266533, -0.022189512848854065, -0.02146281488239765, -0.02073611691594124, -0.020009420812129974, -0.01928272470831871, -0.018556026741862297, -0.017829328775405884, -0.01710263267159462, -0.016375936567783356, -0.015649238601326942, -0.014922541566193104, -0.014195844531059265, -0.013469147495925426, -0.012742450460791588, -0.01201575342565775, -0.01128905639052391, -0.010562359355390072, -0.009835662320256233, -0.009108965285122395, -0.008382268249988556, -0.007655571214854717, -0.006928874179720879, -0.00620217714458704, -0.005475480109453201, -0.004748783074319363, -0.004022086039185524, -0.0032953890040516853, -0.0025686919689178467, -0.001841994933784008, -0.0011152978986501694, -0.0003886008635163307, 0.00033809617161750793, 0.0010647932067513466, 0.0017914902418851852, 0.002518187277019024, 0.0032448843121528625, 0.003971581347286701, 0.00469827838242054, 0.0054249754175543785, 0.006151672452688217, 0.006878369487822056, 0.0076050665229558945, 0.008331763558089733, 0.009058460593223572, 0.00978515762835741, 0.010511854663491249, 0.011238551698625088, 0.011965248733758926, 0.012691945768892765, 0.013418642804026604, 0.014145339839160442, 0.014872036874294281, 0.01559873390942812, 0.01632543094456196, 0.01705212891101837, 0.017778825014829636, 0.0185055211186409, 0.019232219085097313, 0.019958917051553726, 0.02068561315536499]}, "gradients/encoder.encoder.layers.4.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 4.0, 1.0, 2.0, 3.0, 0.0, 1.0, 1.0, 4.0, 6.0, 3.0, 5.0, 16.0, 15.0, 17.0, 20.0, 35.0, 39.0, 50.0, 95.0, 163.0, 278.0, 521.0, 1238.0, 3521.0, 13161.0, 65029.0, 722318.0, 203885.0, 27540.0, 6781.0, 2102.0, 786.0, 351.0, 178.0, 131.0, 76.0, 52.0, 27.0, 21.0, 17.0, 23.0, 13.0, 9.0, 9.0, 6.0, 4.0, 5.0, 4.0, 2.0, 2.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.049713134765625, -0.04804229736328125, -0.0463714599609375, -0.04470062255859375, -0.04302978515625, -0.04135894775390625, -0.0396881103515625, -0.03801727294921875, -0.036346435546875, -0.03467559814453125, -0.0330047607421875, -0.03133392333984375, -0.0296630859375, -0.02799224853515625, -0.0263214111328125, -0.02465057373046875, -0.022979736328125, -0.02130889892578125, -0.0196380615234375, -0.01796722412109375, -0.01629638671875, -0.01462554931640625, -0.0129547119140625, -0.01128387451171875, -0.009613037109375, -0.00794219970703125, -0.0062713623046875, -0.00460052490234375, -0.0029296875, -0.00125885009765625, 0.0004119873046875, 0.00208282470703125, 0.003753662109375, 0.00542449951171875, 0.0070953369140625, 0.00876617431640625, 0.01043701171875, 0.01210784912109375, 0.0137786865234375, 0.01544952392578125, 0.017120361328125, 0.01879119873046875, 0.0204620361328125, 0.02213287353515625, 0.0238037109375, 0.02547454833984375, 0.0271453857421875, 0.02881622314453125, 0.030487060546875, 0.03215789794921875, 0.0338287353515625, 0.03549957275390625, 0.03717041015625, 0.03884124755859375, 0.0405120849609375, 0.04218292236328125, 0.043853759765625, 0.04552459716796875, 0.0471954345703125, 0.04886627197265625, 0.050537109375, 0.05220794677734375, 0.0538787841796875, 0.05554962158203125, 0.057220458984375]}, "gradients/encoder.encoder.layers.4.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 4.0, 3.0, 3.0, 4.0, 18.0, 23.0, 29.0, 44.0, 73.0, 119.0, 148.0, 128.0, 130.0, 99.0, 64.0, 46.0, 35.0, 24.0, 12.0, 2.0, 3.0, 2.0, 1.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.01082611083984375, -0.01056438684463501, -0.01030266284942627, -0.01004093885421753, -0.009779214859008789, -0.009517490863800049, -0.009255766868591309, -0.008994042873382568, -0.008732318878173828, -0.008470594882965088, -0.008208870887756348, -0.007947146892547607, -0.007685422897338867, -0.007423698902130127, -0.007161974906921387, -0.0069002509117126465, -0.006638526916503906, -0.006376802921295166, -0.006115078926086426, -0.0058533549308776855, -0.005591630935668945, -0.005329906940460205, -0.005068182945251465, -0.004806458950042725, -0.004544734954833984, -0.004283010959625244, -0.004021286964416504, -0.0037595629692077637, -0.0034978389739990234, -0.003236114978790283, -0.002974390983581543, -0.0027126669883728027, -0.0024509429931640625, -0.0021892189979553223, -0.001927495002746582, -0.0016657710075378418, -0.0014040470123291016, -0.0011423230171203613, -0.0008805990219116211, -0.0006188750267028809, -0.0003571510314941406, -9.542703628540039e-05, 0.00016629695892333984, 0.0004280209541320801, 0.0006897449493408203, 0.0009514689445495605, 0.0012131929397583008, 0.001474916934967041, 0.0017366409301757812, 0.0019983649253845215, 0.0022600889205932617, 0.002521812915802002, 0.002783536911010742, 0.0030452609062194824, 0.0033069849014282227, 0.003568708896636963, 0.003830432891845703, 0.004092156887054443, 0.004353880882263184, 0.004615604877471924, 0.004877328872680664, 0.005139052867889404, 0.0054007768630981445, 0.005662500858306885, 0.005924224853515625]}, "gradients/encoder.encoder.layers.4.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 2.0, 0.0, 2.0, 10.0, 7.0, 9.0, 13.0, 33.0, 46.0, 62.0, 102.0, 127.0, 254.0, 441.0, 883.0, 1737.0, 3857.0, 9049.0, 22194.0, 64474.0, 503238.0, 342447.0, 61795.0, 21677.0, 8723.0, 3716.0, 1747.0, 844.0, 457.0, 211.0, 156.0, 73.0, 60.0, 39.0, 18.0, 17.0, 15.0, 9.0, 3.0, 11.0, 2.0, 2.0, 2.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0, 1.0, 1.0], "bins": [-0.024444580078125, -0.023725271224975586, -0.023005962371826172, -0.022286653518676758, -0.021567344665527344, -0.02084803581237793, -0.020128726959228516, -0.0194094181060791, -0.018690109252929688, -0.017970800399780273, -0.01725149154663086, -0.016532182693481445, -0.01581287384033203, -0.015093564987182617, -0.014374256134033203, -0.013654947280883789, -0.012935638427734375, -0.012216329574584961, -0.011497020721435547, -0.010777711868286133, -0.010058403015136719, -0.009339094161987305, -0.00861978530883789, -0.007900476455688477, -0.0071811676025390625, -0.0064618587493896484, -0.005742549896240234, -0.00502324104309082, -0.004303932189941406, -0.003584623336791992, -0.002865314483642578, -0.002146005630493164, -0.00142669677734375, -0.0007073879241943359, 1.1920928955078125e-05, 0.0007312297821044922, 0.0014505386352539062, 0.0021698474884033203, 0.0028891563415527344, 0.0036084651947021484, 0.0043277740478515625, 0.0050470829010009766, 0.005766391754150391, 0.006485700607299805, 0.007205009460449219, 0.007924318313598633, 0.008643627166748047, 0.009362936019897461, 0.010082244873046875, 0.010801553726196289, 0.011520862579345703, 0.012240171432495117, 0.012959480285644531, 0.013678789138793945, 0.01439809799194336, 0.015117406845092773, 0.015836715698242188, 0.0165560245513916, 0.017275333404541016, 0.01799464225769043, 0.018713951110839844, 0.019433259963989258, 0.020152568817138672, 0.020871877670288086, 0.0215911865234375]}, "gradients/encoder.encoder.layers.4.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 2.0, 4.0, 6.0, 1.0, 8.0, 8.0, 12.0, 7.0, 15.0, 12.0, 16.0, 30.0, 35.0, 24.0, 35.0, 35.0, 48.0, 44.0, 61.0, 56.0, 51.0, 65.0, 52.0, 44.0, 50.0, 52.0, 31.0, 31.0, 36.0, 24.0, 21.0, 28.0, 16.0, 18.0, 14.0, 5.0, 4.0, 4.0, 7.0, 2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.013671875, -0.013216137886047363, -0.012760400772094727, -0.01230466365814209, -0.011848926544189453, -0.011393189430236816, -0.01093745231628418, -0.010481715202331543, -0.010025978088378906, -0.00957024097442627, -0.009114503860473633, -0.008658766746520996, -0.00820302963256836, -0.007747292518615723, -0.007291555404663086, -0.006835818290710449, -0.0063800811767578125, -0.005924344062805176, -0.005468606948852539, -0.005012869834899902, -0.004557132720947266, -0.004101395606994629, -0.003645658493041992, -0.0031899213790893555, -0.0027341842651367188, -0.002278447151184082, -0.0018227100372314453, -0.0013669729232788086, -0.0009112358093261719, -0.00045549869537353516, 2.384185791015625e-07, 0.0004559755325317383, 0.000911712646484375, 0.0013674497604370117, 0.0018231868743896484, 0.002278923988342285, 0.002734661102294922, 0.0031903982162475586, 0.0036461353302001953, 0.004101872444152832, 0.004557609558105469, 0.0050133466720581055, 0.005469083786010742, 0.005924820899963379, 0.006380558013916016, 0.006836295127868652, 0.007292032241821289, 0.007747769355773926, 0.008203506469726562, 0.0086592435836792, 0.009114980697631836, 0.009570717811584473, 0.01002645492553711, 0.010482192039489746, 0.010937929153442383, 0.01139366626739502, 0.011849403381347656, 0.012305140495300293, 0.01276087760925293, 0.013216614723205566, 0.013672351837158203, 0.01412808895111084, 0.014583826065063477, 0.015039563179016113, 0.01549530029296875]}, "gradients/encoder.encoder.layers.4.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0, 2.0, 1.0, 2.0, 4.0, 2.0, 3.0, 1.0, 8.0, 14.0, 16.0, 20.0, 32.0, 62.0, 74.0, 98.0, 203.0, 322.0, 547.0, 890.0, 1618.0, 2848.0, 5729.0, 12287.0, 28897.0, 86375.0, 703989.0, 135542.0, 38559.0, 15291.0, 7156.0, 3538.0, 1879.0, 970.0, 641.0, 348.0, 206.0, 117.0, 87.0, 59.0, 46.0, 36.0, 15.0, 13.0, 5.0, 4.0, 8.0, 3.0, 1.0, 1.0, 0.0, 1.0], "bins": [-0.00022220611572265625, -0.00021645613014698029, -0.00021070614457130432, -0.00020495615899562836, -0.0001992061734199524, -0.00019345618784427643, -0.00018770620226860046, -0.0001819562166929245, -0.00017620623111724854, -0.00017045624554157257, -0.0001647062599658966, -0.00015895627439022064, -0.00015320628881454468, -0.0001474563032388687, -0.00014170631766319275, -0.00013595633208751678, -0.00013020634651184082, -0.00012445636093616486, -0.00011870637536048889, -0.00011295638978481293, -0.00010720640420913696, -0.000101456418633461, -9.570643305778503e-05, -8.995644748210907e-05, -8.42064619064331e-05, -7.845647633075714e-05, -7.270649075508118e-05, -6.695650517940521e-05, -6.120651960372925e-05, -5.5456534028053284e-05, -4.970654845237732e-05, -4.3956562876701355e-05, -3.820657730102539e-05, -3.2456591725349426e-05, -2.6706606149673462e-05, -2.0956620573997498e-05, -1.5206634998321533e-05, -9.456649422645569e-06, -3.7066638469696045e-06, 2.04332172870636e-06, 7.793307304382324e-06, 1.3543292880058289e-05, 1.9293278455734253e-05, 2.5043264031410217e-05, 3.079324960708618e-05, 3.6543235182762146e-05, 4.229322075843811e-05, 4.8043206334114075e-05, 5.379319190979004e-05, 5.9543177485466003e-05, 6.529316306114197e-05, 7.104314863681793e-05, 7.67931342124939e-05, 8.254311978816986e-05, 8.829310536384583e-05, 9.404309093952179e-05, 9.979307651519775e-05, 0.00010554306209087372, 0.00011129304766654968, 0.00011704303324222565, 0.0001227930188179016, 0.00012854300439357758, 0.00013429298996925354, 0.0001400429755449295, 0.00014579296112060547]}, "gradients/encoder.encoder.layers.4.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 2.0, 1.0, 2.0, 1.0, 3.0, 2.0, 3.0, 3.0, 1.0, 1.0, 4.0, 9.0, 6.0, 5.0, 3.0, 4.0, 8.0, 24.0, 25.0, 28.0, 59.0, 159.0, 237.0, 179.0, 88.0, 51.0, 19.0, 16.0, 9.0, 8.0, 11.0, 6.0, 3.0, 3.0, 5.0, 1.0, 4.0, 4.0, 1.0, 3.0, 2.0, 4.0, 2.0, 3.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-3.5762786865234375e-05, -3.473181277513504e-05, -3.3700838685035706e-05, -3.266986459493637e-05, -3.1638890504837036e-05, -3.06079164147377e-05, -2.9576942324638367e-05, -2.8545968234539032e-05, -2.7514994144439697e-05, -2.6484020054340363e-05, -2.5453045964241028e-05, -2.4422071874141693e-05, -2.339109778404236e-05, -2.2360123693943024e-05, -2.132914960384369e-05, -2.0298175513744354e-05, -1.926720142364502e-05, -1.8236227333545685e-05, -1.720525324344635e-05, -1.6174279153347015e-05, -1.514330506324768e-05, -1.4112330973148346e-05, -1.3081356883049011e-05, -1.2050382792949677e-05, -1.1019408702850342e-05, -9.988434612751007e-06, -8.957460522651672e-06, -7.926486432552338e-06, -6.895512342453003e-06, -5.864538252353668e-06, -4.8335641622543335e-06, -3.8025900721549988e-06, -2.771615982055664e-06, -1.7406418919563293e-06, -7.096678018569946e-07, 3.213062882423401e-07, 1.3522803783416748e-06, 2.3832544684410095e-06, 3.4142285585403442e-06, 4.445202648639679e-06, 5.476176738739014e-06, 6.507150828838348e-06, 7.538124918937683e-06, 8.569099009037018e-06, 9.600073099136353e-06, 1.0631047189235687e-05, 1.1662021279335022e-05, 1.2692995369434357e-05, 1.3723969459533691e-05, 1.4754943549633026e-05, 1.578591763973236e-05, 1.6816891729831696e-05, 1.784786581993103e-05, 1.8878839910030365e-05, 1.99098140001297e-05, 2.0940788090229034e-05, 2.197176218032837e-05, 2.3002736270427704e-05, 2.403371036052704e-05, 2.5064684450626373e-05, 2.6095658540725708e-05, 2.7126632630825043e-05, 2.8157606720924377e-05, 2.9188580811023712e-05, 3.0219554901123047e-05]}, "gradients/encoder.encoder.layers.4.attention.q_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0, 1.0, 4.0, 1.0, 5.0, 10.0, 19.0, 17.0, 22.0, 36.0, 42.0, 88.0, 161.0, 240.0, 459.0, 934.0, 1978.0, 4792.0, 12180.0, 39294.0, 379940.0, 546461.0, 40714.0, 12350.0, 4689.0, 2078.0, 915.0, 489.0, 256.0, 131.0, 78.0, 58.0, 41.0, 20.0, 17.0, 12.0, 14.0, 8.0, 4.0, 2.0, 2.0, 3.0, 2.0, 0.0, 0.0, 0.0, 0.0, 3.0], "bins": [-0.00032901763916015625, -0.0003201272338628769, -0.00031123682856559753, -0.0003023464232683182, -0.0002934560179710388, -0.00028456561267375946, -0.0002756752073764801, -0.00026678480207920074, -0.0002578943967819214, -0.00024900399148464203, -0.00024011358618736267, -0.0002312231808900833, -0.00022233277559280396, -0.0002134423702955246, -0.00020455196499824524, -0.00019566155970096588, -0.00018677115440368652, -0.00017788074910640717, -0.0001689903438091278, -0.00016009993851184845, -0.0001512095332145691, -0.00014231912791728973, -0.00013342872262001038, -0.00012453831732273102, -0.00011564791202545166, -0.0001067575067281723, -9.786710143089294e-05, -8.897669613361359e-05, -8.008629083633423e-05, -7.119588553905487e-05, -6.230548024177551e-05, -5.3415074944496155e-05, -4.45246696472168e-05, -3.563426434993744e-05, -2.674385905265808e-05, -1.7853453755378723e-05, -8.963048458099365e-06, -7.264316082000732e-08, 8.81776213645935e-06, 1.770816743373871e-05, 2.6598572731018066e-05, 3.5488978028297424e-05, 4.437938332557678e-05, 5.326978862285614e-05, 6.21601939201355e-05, 7.105059921741486e-05, 7.994100451469421e-05, 8.883140981197357e-05, 9.772181510925293e-05, 0.00010661222040653229, 0.00011550262570381165, 0.000124393031001091, 0.00013328343629837036, 0.00014217384159564972, 0.00015106424689292908, 0.00015995465219020844, 0.0001688450574874878, 0.00017773546278476715, 0.0001866258680820465, 0.00019551627337932587, 0.00020440667867660522, 0.00021329708397388458, 0.00022218748927116394, 0.0002310778945684433, 0.00023996829986572266]}, "gradients/encoder.encoder.layers.4.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 2.0, 1.0, 4.0, 5.0, 3.0, 7.0, 2.0, 10.0, 13.0, 11.0, 15.0, 13.0, 20.0, 28.0, 40.0, 37.0, 56.0, 83.0, 110.0, 130.0, 105.0, 67.0, 57.0, 35.0, 31.0, 29.0, 17.0, 12.0, 25.0, 10.0, 6.0, 7.0, 4.0, 7.0, 6.0, 3.0, 2.0, 1.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.00014734268188476562, -0.0001417696475982666, -0.00013619661331176758, -0.00013062357902526855, -0.00012505054473876953, -0.00011947751045227051, -0.00011390447616577148, -0.00010833144187927246, -0.00010275840759277344, -9.718537330627441e-05, -9.161233901977539e-05, -8.603930473327637e-05, -8.046627044677734e-05, -7.489323616027832e-05, -6.93202018737793e-05, -6.374716758728027e-05, -5.817413330078125e-05, -5.2601099014282227e-05, -4.70280647277832e-05, -4.145503044128418e-05, -3.5881996154785156e-05, -3.0308961868286133e-05, -2.473592758178711e-05, -1.9162893295288086e-05, -1.3589859008789062e-05, -8.016824722290039e-06, -2.4437904357910156e-06, 3.129243850708008e-06, 8.702278137207031e-06, 1.4275312423706055e-05, 1.9848346710205078e-05, 2.54213809967041e-05, 3.0994415283203125e-05, 3.656744956970215e-05, 4.214048385620117e-05, 4.7713518142700195e-05, 5.328655242919922e-05, 5.885958671569824e-05, 6.443262100219727e-05, 7.000565528869629e-05, 7.557868957519531e-05, 8.115172386169434e-05, 8.672475814819336e-05, 9.229779243469238e-05, 9.78708267211914e-05, 0.00010344386100769043, 0.00010901689529418945, 0.00011458992958068848, 0.0001201629638671875, 0.00012573599815368652, 0.00013130903244018555, 0.00013688206672668457, 0.0001424551010131836, 0.00014802813529968262, 0.00015360116958618164, 0.00015917420387268066, 0.0001647472381591797, 0.0001703202724456787, 0.00017589330673217773, 0.00018146634101867676, 0.00018703937530517578, 0.0001926124095916748, 0.00019818544387817383, 0.00020375847816467285, 0.00020933151245117188]}, "gradients/encoder.encoder.layers.4.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 4.0, 7.0, 8.0, 11.0, 18.0, 24.0, 32.0, 81.0, 201.0, 339.0, 91.0, 53.0, 36.0, 21.0, 20.0, 22.0, 8.0, 9.0, 10.0, 7.0, 3.0, 0.0, 5.0, 1.0, 0.0, 1.0, 2.0, 2.0, 0.0, 1.0], "bins": [-0.17582225799560547, -0.17174933850765228, -0.1676764190196991, -0.16360348463058472, -0.15953056514263153, -0.15545764565467834, -0.15138471126556396, -0.14731179177761078, -0.1432388722896576, -0.1391659528017044, -0.13509303331375122, -0.13102009892463684, -0.12694717943668365, -0.12287425994873047, -0.11880133301019669, -0.1147284060716629, -0.11065548658370972, -0.10658256709575653, -0.10250964015722275, -0.09843671321868896, -0.09436379373073578, -0.09029087424278259, -0.08621794730424881, -0.08214502036571503, -0.07807210087776184, -0.07399918138980865, -0.06992625445127487, -0.06585332751274109, -0.0617804080247879, -0.05770748481154442, -0.053634561598300934, -0.04956163838505745, -0.045488715171813965, -0.04141579195857048, -0.037342868745326996, -0.03326994553208351, -0.029197022318840027, -0.025124099105596542, -0.021051175892353058, -0.016978252679109573, -0.012905329465866089, -0.008832406252622604, -0.00475948303937912, -0.0006865598261356354, 0.003386363387107849, 0.007459286600351334, 0.011532209813594818, 0.015605133026838303, 0.019678056240081787, 0.02375097945332527, 0.027823902666568756, 0.03189682587981224, 0.035969749093055725, 0.04004267230629921, 0.044115595519542694, 0.04818851873278618, 0.05226144194602966, 0.05633436515927315, 0.06040728837251663, 0.06448021531105042, 0.0685531347990036, 0.07262605428695679, 0.07669898122549057, 0.08077190816402435, 0.08484482765197754]}, "gradients/encoder.encoder.layers.4.layer_norm.bias": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 2.0, 2.0, 3.0, 1.0, 5.0, 3.0, 7.0, 5.0, 8.0, 11.0, 12.0, 9.0, 13.0, 20.0, 19.0, 25.0, 28.0, 31.0, 39.0, 38.0, 48.0, 75.0, 91.0, 87.0, 55.0, 63.0, 45.0, 43.0, 41.0, 32.0, 19.0, 24.0, 15.0, 17.0, 18.0, 15.0, 9.0, 6.0, 9.0, 6.0, 7.0, 6.0, 0.0, 1.0, 2.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.04868197441101074, -0.04681331291794777, -0.044944651424884796, -0.043075986206531525, -0.04120732471346855, -0.03933866322040558, -0.03746999800205231, -0.035601336508989334, -0.03373267501592636, -0.03186401352286339, -0.029995350167155266, -0.028126686811447144, -0.02625802531838417, -0.024389363825321198, -0.022520700469613075, -0.020652037113904953, -0.01878337562084198, -0.016914714127779007, -0.015046050772070885, -0.013177388347685337, -0.01130872592329979, -0.009440063498914242, -0.007571401074528694, -0.0057027386501431465, -0.003834076225757599, -0.0019654138013720512, -9.67513769865036e-05, 0.001771911047399044, 0.0036405734717845917, 0.005509235896170139, 0.007377898320555687, 0.009246560744941235, 0.011115223169326782, 0.01298388559371233, 0.014852548018097878, 0.016721211373806, 0.018589872866868973, 0.020458534359931946, 0.022327197715640068, 0.02419586107134819, 0.026064522564411163, 0.027933184057474136, 0.02980184741318226, 0.03167051076889038, 0.033539172261953354, 0.03540783375501633, 0.0372764989733696, 0.03914516046643257, 0.041013821959495544, 0.04288248345255852, 0.04475114494562149, 0.04661981016397476, 0.048488471657037735, 0.05035713315010071, 0.05222579836845398, 0.05409445986151695, 0.055963121354579926, 0.0578317828476429, 0.05970044434070587, 0.06156910955905914, 0.06343777477741241, 0.06530643254518509, 0.06717509776353836, 0.06904375553131104, 0.0709124207496643]}, "gradients/encoder.encoder.layers.3.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 3.0, 1.0, 5.0, 6.0, 12.0, 15.0, 22.0, 16.0, 22.0, 37.0, 51.0, 57.0, 68.0, 90.0, 106.0, 126.0, 131.0, 174.0, 210.0, 196.0, 323.0, 515.0, 1040.0, 2375.0, 6238.0, 19409.0, 69699.0, 340741.0, 3222740.0, 412410.0, 83659.0, 20775.0, 6464.0, 2715.0, 1538.0, 879.0, 608.0, 314.0, 211.0, 112.0, 75.0, 54.0, 25.0, 12.0, 4.0, 3.0, 5.0, 1.0, 3.0, 1.0, 0.0, 1.0], "bins": [-0.0804443359375, -0.0784602165222168, -0.0764760971069336, -0.07449197769165039, -0.07250785827636719, -0.07052373886108398, -0.06853961944580078, -0.06655550003051758, -0.06457138061523438, -0.06258726119995117, -0.06060314178466797, -0.058619022369384766, -0.05663490295410156, -0.05465078353881836, -0.052666664123535156, -0.05068254470825195, -0.04869842529296875, -0.04671430587768555, -0.044730186462402344, -0.04274606704711914, -0.04076194763183594, -0.038777828216552734, -0.03679370880126953, -0.03480958938598633, -0.032825469970703125, -0.030841350555419922, -0.02885723114013672, -0.026873111724853516, -0.024888992309570312, -0.02290487289428711, -0.020920753479003906, -0.018936634063720703, -0.0169525146484375, -0.014968395233154297, -0.012984275817871094, -0.01100015640258789, -0.009016036987304688, -0.007031917572021484, -0.005047798156738281, -0.003063678741455078, -0.001079559326171875, 0.0009045600891113281, 0.0028886795043945312, 0.004872798919677734, 0.0068569183349609375, 0.00884103775024414, 0.010825157165527344, 0.012809276580810547, 0.01479339599609375, 0.016777515411376953, 0.018761634826660156, 0.02074575424194336, 0.022729873657226562, 0.024713993072509766, 0.02669811248779297, 0.028682231903076172, 0.030666351318359375, 0.03265047073364258, 0.03463459014892578, 0.036618709564208984, 0.03860282897949219, 0.04058694839477539, 0.042571067810058594, 0.0445551872253418, 0.046539306640625]}, "gradients/encoder.encoder.layers.3.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 5.0, 1.0, 5.0, 4.0, 8.0, 25.0, 26.0, 37.0, 61.0, 107.0, 132.0, 141.0, 124.0, 117.0, 80.0, 51.0, 38.0, 21.0, 21.0, 6.0, 2.0, 2.0, 1.0, 1.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.0103912353515625, -0.010136306285858154, -0.009881377220153809, -0.009626448154449463, -0.009371519088745117, -0.009116590023040771, -0.008861660957336426, -0.00860673189163208, -0.008351802825927734, -0.008096873760223389, -0.007841944694519043, -0.007587015628814697, -0.0073320865631103516, -0.007077157497406006, -0.00682222843170166, -0.0065672993659973145, -0.006312370300292969, -0.006057441234588623, -0.005802512168884277, -0.005547583103179932, -0.005292654037475586, -0.00503772497177124, -0.0047827959060668945, -0.004527866840362549, -0.004272937774658203, -0.004018008708953857, -0.0037630796432495117, -0.003508150577545166, -0.0032532215118408203, -0.0029982924461364746, -0.002743363380432129, -0.002488434314727783, -0.0022335052490234375, -0.001978576183319092, -0.001723647117614746, -0.0014687180519104004, -0.0012137889862060547, -0.000958859920501709, -0.0007039308547973633, -0.0004490017890930176, -0.00019407272338867188, 6.085634231567383e-05, 0.00031578540802001953, 0.0005707144737243652, 0.0008256435394287109, 0.0010805726051330566, 0.0013355016708374023, 0.001590430736541748, 0.0018453598022460938, 0.0021002888679504395, 0.002355217933654785, 0.002610146999359131, 0.0028650760650634766, 0.0031200051307678223, 0.003374934196472168, 0.0036298632621765137, 0.0038847923278808594, 0.004139721393585205, 0.004394650459289551, 0.0046495795249938965, 0.004904508590698242, 0.005159437656402588, 0.005414366722106934, 0.005669295787811279, 0.005924224853515625]}, "gradients/encoder.encoder.layers.3.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 6.0, 1.0, 2.0, 7.0, 9.0, 14.0, 20.0, 23.0, 33.0, 71.0, 96.0, 144.0, 217.0, 339.0, 538.0, 831.0, 1447.0, 2512.0, 4424.0, 7740.0, 14163.0, 28252.0, 59541.0, 140278.0, 415498.0, 2391915.0, 744813.0, 213457.0, 85198.0, 39023.0, 19512.0, 10225.0, 5690.0, 3234.0, 1896.0, 1195.0, 700.0, 461.0, 275.0, 165.0, 114.0, 72.0, 51.0, 32.0, 22.0, 11.0, 10.0, 8.0, 5.0, 7.0, 0.0, 2.0, 0.0, 0.0, 1.0], "bins": [-0.01555633544921875, -0.015107989311218262, -0.014659643173217773, -0.014211297035217285, -0.013762950897216797, -0.013314604759216309, -0.01286625862121582, -0.012417912483215332, -0.011969566345214844, -0.011521220207214355, -0.011072874069213867, -0.010624527931213379, -0.01017618179321289, -0.009727835655212402, -0.009279489517211914, -0.008831143379211426, -0.008382797241210938, -0.00793445110321045, -0.007486104965209961, -0.007037758827209473, -0.006589412689208984, -0.006141066551208496, -0.005692720413208008, -0.0052443742752075195, -0.004796028137207031, -0.004347681999206543, -0.0038993358612060547, -0.0034509897232055664, -0.003002643585205078, -0.00255429744720459, -0.0021059513092041016, -0.0016576051712036133, -0.001209259033203125, -0.0007609128952026367, -0.00031256675720214844, 0.00013577938079833984, 0.0005841255187988281, 0.0010324716567993164, 0.0014808177947998047, 0.001929163932800293, 0.0023775100708007812, 0.0028258562088012695, 0.003274202346801758, 0.003722548484802246, 0.004170894622802734, 0.004619240760803223, 0.005067586898803711, 0.005515933036804199, 0.0059642791748046875, 0.006412625312805176, 0.006860971450805664, 0.007309317588806152, 0.007757663726806641, 0.008206009864807129, 0.008654356002807617, 0.009102702140808105, 0.009551048278808594, 0.009999394416809082, 0.01044774055480957, 0.010896086692810059, 0.011344432830810547, 0.011792778968811035, 0.012241125106811523, 0.012689471244812012, 0.0131378173828125]}, "gradients/encoder.encoder.layers.3.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 2.0, 1.0, 0.0, 3.0, 3.0, 0.0, 4.0, 6.0, 6.0, 7.0, 10.0, 14.0, 26.0, 42.0, 36.0, 50.0, 54.0, 69.0, 84.0, 99.0, 122.0, 142.0, 210.0, 262.0, 288.0, 476.0, 450.0, 260.0, 232.0, 182.0, 167.0, 147.0, 129.0, 92.0, 95.0, 64.0, 52.0, 43.0, 34.0, 32.0, 22.0, 23.0, 7.0, 13.0, 7.0, 9.0, 2.0, 5.0, 2.0, 2.0, 0.0, 2.0, 1.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0137481689453125, -0.013237953186035156, -0.012727737426757812, -0.012217521667480469, -0.011707305908203125, -0.011197090148925781, -0.010686874389648438, -0.010176658630371094, -0.00966644287109375, -0.009156227111816406, -0.008646011352539062, -0.008135795593261719, -0.007625579833984375, -0.007115364074707031, -0.0066051483154296875, -0.006094932556152344, -0.005584716796875, -0.005074501037597656, -0.0045642852783203125, -0.004054069519042969, -0.003543853759765625, -0.0030336380004882812, -0.0025234222412109375, -0.0020132064819335938, -0.00150299072265625, -0.0009927749633789062, -0.0004825592041015625, 2.765655517578125e-05, 0.000537872314453125, 0.0010480880737304688, 0.0015583038330078125, 0.0020685195922851562, 0.0025787353515625, 0.0030889511108398438, 0.0035991668701171875, 0.004109382629394531, 0.004619598388671875, 0.005129814147949219, 0.0056400299072265625, 0.006150245666503906, 0.00666046142578125, 0.007170677185058594, 0.0076808929443359375, 0.008191108703613281, 0.008701324462890625, 0.009211540222167969, 0.009721755981445312, 0.010231971740722656, 0.0107421875, 0.011252403259277344, 0.011762619018554688, 0.012272834777832031, 0.012783050537109375, 0.013293266296386719, 0.013803482055664062, 0.014313697814941406, 0.01482391357421875, 0.015334129333496094, 0.015844345092773438, 0.01635456085205078, 0.016864776611328125, 0.01737499237060547, 0.017885208129882812, 0.018395423889160156, 0.0189056396484375]}, "gradients/encoder.encoder.layers.3.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 3.0, 1.0, 1.0, 4.0, 6.0, 12.0, 22.0, 27.0, 76.0, 130.0, 238.0, 128.0, 75.0, 68.0, 61.0, 39.0, 29.0, 15.0, 22.0, 5.0, 9.0, 7.0, 6.0, 6.0, 6.0, 7.0, 4.0, 2.0, 2.0, 3.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.29051944613456726, -0.2802116572856903, -0.26990389823913574, -0.2595961093902588, -0.24928833544254303, -0.23898056149482727, -0.22867277264595032, -0.21836499869823456, -0.2080572247505188, -0.19774945080280304, -0.1874416619539261, -0.17713388800621033, -0.16682611405849457, -0.1565183401107788, -0.14621055126190186, -0.1359027773141861, -0.12559498846530914, -0.11528720706701279, -0.10497943311929703, -0.09467165172100067, -0.08436387777328491, -0.07405609637498856, -0.0637483149766922, -0.05344054102897644, -0.043132759630680084, -0.032824981957674026, -0.02251720242202282, -0.012209422886371613, -0.0019016452133655548, 0.008406132459640503, 0.01871391385793686, 0.02902168780565262, 0.039329469203948975, 0.04963724687695503, 0.05994502454996109, 0.07025280594825745, 0.0805605798959732, 0.09086836129426956, 0.10117614269256592, 0.11148391664028168, 0.12179169803857803, 0.1320994794368744, 0.14240725338459015, 0.1527150273323059, 0.16302281618118286, 0.17333059012889862, 0.18363836407661438, 0.19394615292549133, 0.2042539268732071, 0.21456170082092285, 0.2248694896697998, 0.23517726361751556, 0.24548503756523132, 0.2557928264141083, 0.26610058546066284, 0.2764083743095398, 0.28671616315841675, 0.2970239520072937, 0.30733171105384827, 0.3176394999027252, 0.3279472887516022, 0.33825504779815674, 0.3485628366470337, 0.35887062549591064, 0.3691783845424652]}, "gradients/encoder.encoder.layers.3.final_layer_norm.bias": {"_type": "histogram", "values": [2.0, 2.0, 0.0, 1.0, 1.0, 1.0, 1.0, 2.0, 7.0, 6.0, 9.0, 6.0, 13.0, 13.0, 14.0, 13.0, 11.0, 14.0, 26.0, 25.0, 30.0, 42.0, 32.0, 51.0, 40.0, 58.0, 33.0, 49.0, 38.0, 38.0, 28.0, 29.0, 44.0, 32.0, 30.0, 40.0, 17.0, 28.0, 32.0, 24.0, 21.0, 12.0, 15.0, 18.0, 8.0, 14.0, 7.0, 12.0, 8.0, 4.0, 7.0, 4.0, 2.0, 1.0, 2.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0], "bins": [-0.14641225337982178, -0.14178502559661865, -0.13715781271457672, -0.1325305998325348, -0.12790337204933167, -0.12327615171670914, -0.11864893138408661, -0.11402171105146408, -0.10939449071884155, -0.10476727038621902, -0.1001400500535965, -0.09551282972097397, -0.09088560938835144, -0.08625838905572891, -0.08163116872310638, -0.07700394839048386, -0.07237672805786133, -0.0677495077252388, -0.06312228739261627, -0.058495067059993744, -0.053867846727371216, -0.04924062639474869, -0.04461340606212616, -0.03998618572950363, -0.035358965396881104, -0.030731745064258575, -0.026104524731636047, -0.02147730439901352, -0.01685008406639099, -0.012222863733768463, -0.007595643401145935, -0.002968423068523407, 0.001658797264099121, 0.006286017596721649, 0.010913237929344177, 0.015540458261966705, 0.020167678594589233, 0.02479489892721176, 0.02942211925983429, 0.03404933959245682, 0.038676559925079346, 0.043303780257701874, 0.0479310005903244, 0.05255822092294693, 0.05718544125556946, 0.061812661588191986, 0.06643988192081451, 0.07106710225343704, 0.07569432258605957, 0.0803215429186821, 0.08494876325130463, 0.08957598358392715, 0.09420320391654968, 0.09883042424917221, 0.10345764458179474, 0.10808486491441727, 0.1127120852470398, 0.11733930557966232, 0.12196652591228485, 0.12659373879432678, 0.1312209665775299, 0.13584819436073303, 0.14047540724277496, 0.1451026201248169, 0.14972984790802002]}, "gradients/encoder.encoder.layers.3.attention.out_proj.weight": {"_type": "histogram", "values": [3.0, 2.0, 4.0, 4.0, 2.0, 4.0, 3.0, 6.0, 9.0, 8.0, 7.0, 11.0, 14.0, 27.0, 37.0, 43.0, 74.0, 162.0, 304.0, 517.0, 1212.0, 2905.0, 8628.0, 29832.0, 137025.0, 683987.0, 139587.0, 30023.0, 8651.0, 3103.0, 1135.0, 519.0, 282.0, 149.0, 91.0, 56.0, 36.0, 21.0, 17.0, 17.0, 14.0, 12.0, 8.0, 4.0, 3.0, 2.0, 3.0, 2.0, 1.0, 4.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.0361328125, -0.034716129302978516, -0.03329944610595703, -0.03188276290893555, -0.030466079711914062, -0.029049396514892578, -0.027632713317871094, -0.02621603012084961, -0.024799346923828125, -0.02338266372680664, -0.021965980529785156, -0.020549297332763672, -0.019132614135742188, -0.017715930938720703, -0.01629924774169922, -0.014882564544677734, -0.01346588134765625, -0.012049198150634766, -0.010632514953613281, -0.009215831756591797, -0.0077991485595703125, -0.006382465362548828, -0.004965782165527344, -0.0035490989685058594, -0.002132415771484375, -0.0007157325744628906, 0.0007009506225585938, 0.002117633819580078, 0.0035343170166015625, 0.004951000213623047, 0.006367683410644531, 0.007784366607666016, 0.0092010498046875, 0.010617733001708984, 0.012034416198730469, 0.013451099395751953, 0.014867782592773438, 0.016284465789794922, 0.017701148986816406, 0.01911783218383789, 0.020534515380859375, 0.02195119857788086, 0.023367881774902344, 0.024784564971923828, 0.026201248168945312, 0.027617931365966797, 0.02903461456298828, 0.030451297760009766, 0.03186798095703125, 0.033284664154052734, 0.03470134735107422, 0.0361180305480957, 0.03753471374511719, 0.03895139694213867, 0.040368080139160156, 0.04178476333618164, 0.043201446533203125, 0.04461812973022461, 0.046034812927246094, 0.04745149612426758, 0.04886817932128906, 0.05028486251831055, 0.05170154571533203, 0.053118228912353516, 0.054534912109375]}, "gradients/encoder.encoder.layers.3.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 3.0, 2.0, 2.0, 4.0, 7.0, 7.0, 11.0, 16.0, 36.0, 35.0, 67.0, 78.0, 81.0, 87.0, 107.0, 93.0, 104.0, 71.0, 60.0, 51.0, 28.0, 22.0, 25.0, 7.0, 2.0, 1.0, 2.0, 5.0, 2.0, 2.0, 1.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.0117340087890625, -0.011465132236480713, -0.011196255683898926, -0.010927379131317139, -0.010658502578735352, -0.010389626026153564, -0.010120749473571777, -0.00985187292098999, -0.009582996368408203, -0.009314119815826416, -0.009045243263244629, -0.008776366710662842, -0.008507490158081055, -0.008238613605499268, -0.00796973705291748, -0.007700860500335693, -0.007431983947753906, -0.007163107395172119, -0.006894230842590332, -0.006625354290008545, -0.006356477737426758, -0.006087601184844971, -0.005818724632263184, -0.0055498480796813965, -0.005280971527099609, -0.005012094974517822, -0.004743218421936035, -0.004474341869354248, -0.004205465316772461, -0.003936588764190674, -0.0036677122116088867, -0.0033988356590270996, -0.0031299591064453125, -0.0028610825538635254, -0.0025922060012817383, -0.002323329448699951, -0.002054452896118164, -0.001785576343536377, -0.0015166997909545898, -0.0012478232383728027, -0.0009789466857910156, -0.0007100701332092285, -0.0004411935806274414, -0.0001723170280456543, 9.655952453613281e-05, 0.0003654360771179199, 0.000634312629699707, 0.0009031891822814941, 0.0011720657348632812, 0.0014409422874450684, 0.0017098188400268555, 0.0019786953926086426, 0.0022475719451904297, 0.002516448497772217, 0.002785325050354004, 0.003054201602935791, 0.003323078155517578, 0.0035919547080993652, 0.0038608312606811523, 0.0041297078132629395, 0.0043985843658447266, 0.004667460918426514, 0.004936337471008301, 0.005205214023590088, 0.005474090576171875]}, "gradients/encoder.encoder.layers.3.attention.v_proj.weight": {"_type": "histogram", "values": [3.0, 1.0, 2.0, 3.0, 5.0, 8.0, 3.0, 16.0, 21.0, 29.0, 40.0, 56.0, 87.0, 126.0, 187.0, 268.0, 396.0, 608.0, 966.0, 1577.0, 2525.0, 4453.0, 7843.0, 15500.0, 33622.0, 85363.0, 537667.0, 233521.0, 67091.0, 27000.0, 12937.0, 6776.0, 3768.0, 2177.0, 1344.0, 875.0, 539.0, 361.0, 260.0, 181.0, 111.0, 71.0, 61.0, 38.0, 28.0, 23.0, 7.0, 8.0, 3.0, 6.0, 7.0, 0.0, 1.0, 3.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.018524169921875, -0.017834901809692383, -0.017145633697509766, -0.01645636558532715, -0.01576709747314453, -0.015077829360961914, -0.014388561248779297, -0.01369929313659668, -0.013010025024414062, -0.012320756912231445, -0.011631488800048828, -0.010942220687866211, -0.010252952575683594, -0.009563684463500977, -0.00887441635131836, -0.008185148239135742, -0.007495880126953125, -0.006806612014770508, -0.006117343902587891, -0.0054280757904052734, -0.004738807678222656, -0.004049539566040039, -0.003360271453857422, -0.0026710033416748047, -0.0019817352294921875, -0.0012924671173095703, -0.0006031990051269531, 8.606910705566406e-05, 0.0007753372192382812, 0.0014646053314208984, 0.0021538734436035156, 0.002843141555786133, 0.00353240966796875, 0.004221677780151367, 0.004910945892333984, 0.0056002140045166016, 0.006289482116699219, 0.006978750228881836, 0.007668018341064453, 0.00835728645324707, 0.009046554565429688, 0.009735822677612305, 0.010425090789794922, 0.011114358901977539, 0.011803627014160156, 0.012492895126342773, 0.01318216323852539, 0.013871431350708008, 0.014560699462890625, 0.015249967575073242, 0.01593923568725586, 0.016628503799438477, 0.017317771911621094, 0.01800704002380371, 0.018696308135986328, 0.019385576248168945, 0.020074844360351562, 0.02076411247253418, 0.021453380584716797, 0.022142648696899414, 0.02283191680908203, 0.02352118492126465, 0.024210453033447266, 0.024899721145629883, 0.0255889892578125]}, "gradients/encoder.encoder.layers.3.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 3.0, 1.0, 2.0, 3.0, 3.0, 2.0, 6.0, 3.0, 7.0, 8.0, 7.0, 12.0, 23.0, 18.0, 25.0, 28.0, 34.0, 46.0, 55.0, 36.0, 56.0, 50.0, 61.0, 48.0, 56.0, 44.0, 44.0, 46.0, 45.0, 47.0, 29.0, 31.0, 24.0, 15.0, 15.0, 19.0, 13.0, 11.0, 9.0, 2.0, 9.0, 4.0, 5.0, 5.0, 3.0, 2.0, 1.0, 0.0, 1.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.017791748046875, -0.017177343368530273, -0.016562938690185547, -0.01594853401184082, -0.015334129333496094, -0.014719724655151367, -0.01410531997680664, -0.013490915298461914, -0.012876510620117188, -0.012262105941772461, -0.011647701263427734, -0.011033296585083008, -0.010418891906738281, -0.009804487228393555, -0.009190082550048828, -0.008575677871704102, -0.007961273193359375, -0.0073468685150146484, -0.006732463836669922, -0.006118059158325195, -0.005503654479980469, -0.004889249801635742, -0.004274845123291016, -0.003660440444946289, -0.0030460357666015625, -0.002431631088256836, -0.0018172264099121094, -0.0012028217315673828, -0.0005884170532226562, 2.5987625122070312e-05, 0.0006403923034667969, 0.0012547969818115234, 0.00186920166015625, 0.0024836063385009766, 0.003098011016845703, 0.0037124156951904297, 0.004326820373535156, 0.004941225051879883, 0.005555629730224609, 0.006170034408569336, 0.0067844390869140625, 0.007398843765258789, 0.008013248443603516, 0.008627653121948242, 0.009242057800292969, 0.009856462478637695, 0.010470867156982422, 0.011085271835327148, 0.011699676513671875, 0.012314081192016602, 0.012928485870361328, 0.013542890548706055, 0.014157295227050781, 0.014771699905395508, 0.015386104583740234, 0.01600050926208496, 0.016614913940429688, 0.017229318618774414, 0.01784372329711914, 0.018458127975463867, 0.019072532653808594, 0.01968693733215332, 0.020301342010498047, 0.020915746688842773, 0.0215301513671875]}, "gradients/encoder.encoder.layers.3.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 2.0, 1.0, 2.0, 4.0, 4.0, 7.0, 11.0, 17.0, 20.0, 27.0, 36.0, 77.0, 96.0, 152.0, 324.0, 490.0, 863.0, 1658.0, 3172.0, 6699.0, 15956.0, 48091.0, 250540.0, 619127.0, 64962.0, 20167.0, 7999.0, 3621.0, 1904.0, 1026.0, 566.0, 394.0, 191.0, 120.0, 79.0, 53.0, 34.0, 23.0, 15.0, 11.0, 9.0, 5.0, 7.0, 3.0, 1.0, 0.0, 3.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.0013227462768554688, -0.0012839734554290771, -0.0012452006340026855, -0.001206427812576294, -0.0011676549911499023, -0.0011288821697235107, -0.0010901093482971191, -0.0010513365268707275, -0.001012563705444336, -0.0009737908840179443, -0.0009350180625915527, -0.0008962452411651611, -0.0008574724197387695, -0.0008186995983123779, -0.0007799267768859863, -0.0007411539554595947, -0.0007023811340332031, -0.0006636083126068115, -0.0006248354911804199, -0.0005860626697540283, -0.0005472898483276367, -0.0005085170269012451, -0.0004697442054748535, -0.0004309713840484619, -0.0003921985626220703, -0.0003534257411956787, -0.0003146529197692871, -0.0002758800983428955, -0.0002371072769165039, -0.0001983344554901123, -0.0001595616340637207, -0.0001207888126373291, -8.20159912109375e-05, -4.32431697845459e-05, -4.470348358154297e-06, 3.4302473068237305e-05, 7.30752944946289e-05, 0.00011184811592102051, 0.0001506209373474121, 0.0001893937587738037, 0.0002281665802001953, 0.0002669394016265869, 0.0003057122230529785, 0.0003444850444793701, 0.0003832578659057617, 0.0004220306873321533, 0.0004608035087585449, 0.0004995763301849365, 0.0005383491516113281, 0.0005771219730377197, 0.0006158947944641113, 0.0006546676158905029, 0.0006934404373168945, 0.0007322132587432861, 0.0007709860801696777, 0.0008097589015960693, 0.0008485317230224609, 0.0008873045444488525, 0.0009260773658752441, 0.0009648501873016357, 0.0010036230087280273, 0.001042395830154419, 0.0010811686515808105, 0.0011199414730072021, 0.0011587142944335938]}, "gradients/encoder.encoder.layers.3.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 5.0, 1.0, 4.0, 5.0, 5.0, 5.0, 10.0, 14.0, 15.0, 19.0, 15.0, 21.0, 41.0, 34.0, 57.0, 83.0, 146.0, 127.0, 120.0, 70.0, 44.0, 33.0, 34.0, 25.0, 22.0, 16.0, 6.0, 13.0, 5.0, 1.0, 5.0, 1.0, 2.0, 2.0, 2.0, 4.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.7583370208740234e-05, -1.6963109374046326e-05, -1.6342848539352417e-05, -1.5722587704658508e-05, -1.51023268699646e-05, -1.4482066035270691e-05, -1.3861805200576782e-05, -1.3241544365882874e-05, -1.2621283531188965e-05, -1.2001022696495056e-05, -1.1380761861801147e-05, -1.0760501027107239e-05, -1.014024019241333e-05, -9.519979357719421e-06, -8.899718523025513e-06, -8.279457688331604e-06, -7.659196853637695e-06, -7.038936018943787e-06, -6.418675184249878e-06, -5.798414349555969e-06, -5.1781535148620605e-06, -4.557892680168152e-06, -3.937631845474243e-06, -3.3173710107803345e-06, -2.6971101760864258e-06, -2.076849341392517e-06, -1.4565885066986084e-06, -8.363276720046997e-07, -2.1606683731079102e-07, 4.041939973831177e-07, 1.0244548320770264e-06, 1.644715666770935e-06, 2.2649765014648438e-06, 2.8852373361587524e-06, 3.505498170852661e-06, 4.12575900554657e-06, 4.7460198402404785e-06, 5.366280674934387e-06, 5.986541509628296e-06, 6.606802344322205e-06, 7.227063179016113e-06, 7.847324013710022e-06, 8.46758484840393e-06, 9.08784568309784e-06, 9.708106517791748e-06, 1.0328367352485657e-05, 1.0948628187179565e-05, 1.1568889021873474e-05, 1.2189149856567383e-05, 1.2809410691261292e-05, 1.34296715259552e-05, 1.4049932360649109e-05, 1.4670193195343018e-05, 1.5290454030036926e-05, 1.5910714864730835e-05, 1.6530975699424744e-05, 1.7151236534118652e-05, 1.777149736881256e-05, 1.839175820350647e-05, 1.901201903820038e-05, 1.9632279872894287e-05, 2.0252540707588196e-05, 2.0872801542282104e-05, 2.1493062376976013e-05, 2.2113323211669922e-05]}, "gradients/encoder.encoder.layers.3.attention.q_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 2.0, 1.0, 1.0, 5.0, 3.0, 3.0, 6.0, 7.0, 11.0, 13.0, 22.0, 21.0, 31.0, 47.0, 59.0, 85.0, 116.0, 161.0, 206.0, 260.0, 362.0, 542.0, 835.0, 1085.0, 1715.0, 2925.0, 4833.0, 8534.0, 16129.0, 37564.0, 114727.0, 642011.0, 134266.0, 41517.0, 17741.0, 9028.0, 4789.0, 3039.0, 1862.0, 1199.0, 847.0, 537.0, 395.0, 233.0, 185.0, 166.0, 121.0, 69.0, 57.0, 45.0, 33.0, 29.0, 26.0, 21.0, 13.0, 8.0, 11.0, 4.0, 5.0, 4.0, 2.0], "bins": [-0.0009531974792480469, -0.0009256079792976379, -0.000898018479347229, -0.0008704289793968201, -0.0008428394794464111, -0.0008152499794960022, -0.0007876604795455933, -0.0007600709795951843, -0.0007324814796447754, -0.0007048919796943665, -0.0006773024797439575, -0.0006497129797935486, -0.0006221234798431396, -0.0005945339798927307, -0.0005669444799423218, -0.0005393549799919128, -0.0005117654800415039, -0.00048417598009109497, -0.00045658648014068604, -0.0004289969801902771, -0.00040140748023986816, -0.00037381798028945923, -0.0003462284803390503, -0.00031863898038864136, -0.0002910494804382324, -0.0002634599804878235, -0.00023587048053741455, -0.00020828098058700562, -0.00018069148063659668, -0.00015310198068618774, -0.0001255124807357788, -9.792298078536987e-05, -7.033348083496094e-05, -4.2743980884552e-05, -1.5154480934143066e-05, 1.2435019016265869e-05, 4.0024518966674805e-05, 6.761401891708374e-05, 9.520351886749268e-05, 0.0001227930188179016, 0.00015038251876831055, 0.00017797201871871948, 0.00020556151866912842, 0.00023315101861953735, 0.0002607405185699463, 0.0002883300185203552, 0.00031591951847076416, 0.0003435090184211731, 0.00037109851837158203, 0.00039868801832199097, 0.0004262775182723999, 0.00045386701822280884, 0.0004814565181732178, 0.0005090460181236267, 0.0005366355180740356, 0.0005642250180244446, 0.0005918145179748535, 0.0006194040179252625, 0.0006469935178756714, 0.0006745830178260803, 0.0007021725177764893, 0.0007297620177268982, 0.0007573515176773071, 0.0007849410176277161, 0.000812530517578125]}, "gradients/encoder.encoder.layers.3.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 0.0, 2.0, 1.0, 4.0, 4.0, 7.0, 4.0, 4.0, 7.0, 8.0, 7.0, 6.0, 13.0, 18.0, 21.0, 31.0, 34.0, 45.0, 53.0, 55.0, 63.0, 81.0, 82.0, 78.0, 68.0, 68.0, 41.0, 36.0, 39.0, 19.0, 25.0, 15.0, 18.0, 7.0, 11.0, 6.0, 4.0, 3.0, 2.0, 2.0, 2.0, 3.0, 4.0, 4.0, 2.0, 0.0, 0.0, 2.0, 0.0, 4.0, 0.0, 3.0], "bins": [-0.0009312629699707031, -0.000904373824596405, -0.0008774846792221069, -0.0008505955338478088, -0.0008237063884735107, -0.0007968172430992126, -0.0007699280977249146, -0.0007430389523506165, -0.0007161498069763184, -0.0006892606616020203, -0.0006623715162277222, -0.0006354823708534241, -0.000608593225479126, -0.0005817040801048279, -0.0005548149347305298, -0.0005279257893562317, -0.0005010366439819336, -0.0004741474986076355, -0.0004472583532333374, -0.0004203692078590393, -0.0003934800624847412, -0.0003665909171104431, -0.000339701771736145, -0.0003128126263618469, -0.00028592348098754883, -0.00025903433561325073, -0.00023214519023895264, -0.00020525604486465454, -0.00017836689949035645, -0.00015147775411605835, -0.00012458860874176025, -9.769946336746216e-05, -7.081031799316406e-05, -4.392117261886597e-05, -1.703202724456787e-05, 9.857118129730225e-06, 3.674626350402832e-05, 6.363540887832642e-05, 9.052455425262451e-05, 0.00011741369962692261, 0.0001443028450012207, 0.0001711919903755188, 0.0001980811357498169, 0.000224970281124115, 0.0002518594264984131, 0.0002787485718727112, 0.0003056377172470093, 0.0003325268626213074, 0.00035941600799560547, 0.00038630515336990356, 0.00041319429874420166, 0.00044008344411849976, 0.00046697258949279785, 0.000493861734867096, 0.000520750880241394, 0.0005476400256156921, 0.0005745291709899902, 0.0006014183163642883, 0.0006283074617385864, 0.0006551966071128845, 0.0006820857524871826, 0.0007089748978614807, 0.0007358640432357788, 0.0007627531886100769, 0.000789642333984375]}, "gradients/encoder.encoder.layers.3.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 2.0, 4.0, 5.0, 5.0, 2.0, 3.0, 4.0, 7.0, 12.0, 1.0, 16.0, 20.0, 21.0, 19.0, 23.0, 23.0, 43.0, 48.0, 71.0, 177.0, 236.0, 79.0, 55.0, 36.0, 23.0, 17.0, 9.0, 11.0, 7.0, 5.0, 6.0, 6.0, 3.0, 2.0, 2.0, 2.0, 1.0, 0.0, 1.0, 1.0], "bins": [-0.1368698924779892, -0.1337774097919464, -0.13068494200706482, -0.12759245932102203, -0.12449998408555984, -0.12140750885009766, -0.11831503361463547, -0.11522255837917328, -0.1121300756931305, -0.1090376004576683, -0.10594512522220612, -0.10285264253616333, -0.09976016730070114, -0.09666769206523895, -0.09357521682977676, -0.09048274159431458, -0.08739026635885239, -0.0842977911233902, -0.08120531588792801, -0.07811284065246582, -0.07502035796642303, -0.07192788273096085, -0.06883540749549866, -0.06574293226003647, -0.06265045702457428, -0.05955798178911209, -0.056465502828359604, -0.053373027592897415, -0.05028054863214493, -0.04718807339668274, -0.04409559816122055, -0.04100312292575836, -0.03791063651442528, -0.03481816127896309, -0.0317256823182106, -0.028633207082748413, -0.025540729984641075, -0.022448252886533737, -0.01935577765107155, -0.01626330055296421, -0.013170823454856873, -0.010078346356749535, -0.006985870189964771, -0.003893394023180008, -0.00080091692507267, 0.002291560173034668, 0.005384035408496857, 0.008476512506604195, 0.011568989604711533, 0.01466146670281887, 0.01775394380092621, 0.020846419036388397, 0.023938896134495735, 0.027031373232603073, 0.030123848468065262, 0.03321632742881775, 0.03630880266427994, 0.039401277899742126, 0.042493756860494614, 0.0455862320959568, 0.04867871105670929, 0.05177118629217148, 0.05486366152763367, 0.057956136763095856, 0.06104861572384834]}, "gradients/encoder.encoder.layers.3.layer_norm.bias": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 1.0, 4.0, 0.0, 4.0, 7.0, 3.0, 8.0, 7.0, 9.0, 15.0, 17.0, 14.0, 17.0, 15.0, 24.0, 28.0, 37.0, 39.0, 63.0, 52.0, 79.0, 90.0, 98.0, 57.0, 46.0, 36.0, 29.0, 29.0, 33.0, 28.0, 25.0, 27.0, 17.0, 14.0, 7.0, 8.0, 5.0, 8.0, 5.0, 5.0, 4.0, 2.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.06350511312484741, -0.06090283393859863, -0.058300554752349854, -0.05569827929139137, -0.05309600010514259, -0.050493720918893814, -0.04789144545793533, -0.045289166271686554, -0.042686887085437775, -0.040084607899188995, -0.037482328712940216, -0.034880053251981735, -0.032277774065732956, -0.029675494879484177, -0.027073217555880547, -0.024470940232276917, -0.021868661046028137, -0.019266381859779358, -0.016664104536175728, -0.014061826281249523, -0.011459548026323318, -0.008857269771397114, -0.006254991516470909, -0.003652714192867279, -0.0010504350066184998, 0.001551843248307705, 0.00415412150323391, 0.006756399758160114, 0.009358678013086319, 0.011960956268012524, 0.014563234522938728, 0.01716551184654236, 0.019767791032791138, 0.022370070219039917, 0.024972347542643547, 0.027574624866247177, 0.030176904052495956, 0.032779183238744736, 0.03538145869970322, 0.037983737885951996, 0.040586017072200775, 0.043188296258449554, 0.045790575444698334, 0.048392850905656815, 0.050995130091905594, 0.05359740927815437, 0.056199684739112854, 0.05880196392536163, 0.06140424311161041, 0.06400652229785919, 0.06660880148410797, 0.06921108067035675, 0.07181335985660553, 0.07441563159227371, 0.07701791077852249, 0.07962018996477127, 0.08222246915102005, 0.08482474833726883, 0.08742702752351761, 0.09002930670976639, 0.09263157844543457, 0.09523385763168335, 0.09783613681793213, 0.10043841600418091, 0.10304069519042969]}, "gradients/encoder.encoder.layers.2.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 1.0, 1.0, 4.0, 12.0, 10.0, 11.0, 18.0, 17.0, 27.0, 58.0, 73.0, 109.0, 198.0, 357.0, 609.0, 1467.0, 5523.0, 52320.0, 4075666.0, 49762.0, 5284.0, 1410.0, 563.0, 283.0, 194.0, 118.0, 88.0, 45.0, 13.0, 11.0, 4.0, 10.0, 7.0, 4.0, 5.0, 3.0, 3.0, 0.0, 3.0, 3.0, 1.0], "bins": [-0.10009765625, -0.09768486022949219, -0.09527206420898438, -0.09285926818847656, -0.09044647216796875, -0.08803367614746094, -0.08562088012695312, -0.08320808410644531, -0.0807952880859375, -0.07838249206542969, -0.07596969604492188, -0.07355690002441406, -0.07114410400390625, -0.06873130798339844, -0.06631851196289062, -0.06390571594238281, -0.061492919921875, -0.05908012390136719, -0.056667327880859375, -0.05425453186035156, -0.05184173583984375, -0.04942893981933594, -0.047016143798828125, -0.04460334777832031, -0.0421905517578125, -0.03977775573730469, -0.037364959716796875, -0.03495216369628906, -0.03253936767578125, -0.030126571655273438, -0.027713775634765625, -0.025300979614257812, -0.02288818359375, -0.020475387573242188, -0.018062591552734375, -0.015649795532226562, -0.01323699951171875, -0.010824203491210938, -0.008411407470703125, -0.0059986114501953125, -0.0035858154296875, -0.0011730194091796875, 0.001239776611328125, 0.0036525726318359375, 0.00606536865234375, 0.008478164672851562, 0.010890960693359375, 0.013303756713867188, 0.015716552734375, 0.018129348754882812, 0.020542144775390625, 0.022954940795898438, 0.02536773681640625, 0.027780532836914062, 0.030193328857421875, 0.03260612487792969, 0.0350189208984375, 0.03743171691894531, 0.039844512939453125, 0.04225730895996094, 0.04467010498046875, 0.04708290100097656, 0.049495697021484375, 0.05190849304199219, 0.0543212890625]}, "gradients/encoder.encoder.layers.2.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 4.0, 1.0, 2.0, 1.0, 2.0, 3.0, 11.0, 9.0, 14.0, 24.0, 43.0, 43.0, 62.0, 89.0, 85.0, 98.0, 92.0, 89.0, 85.0, 80.0, 50.0, 34.0, 32.0, 19.0, 17.0, 15.0, 5.0, 2.0, 1.0, 0.0, 4.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.00975799560546875, -0.009521186351776123, -0.009284377098083496, -0.00904756784439087, -0.008810758590698242, -0.008573949337005615, -0.008337140083312988, -0.008100330829620361, -0.007863521575927734, -0.007626712322235107, -0.0073899030685424805, -0.0071530938148498535, -0.0069162845611572266, -0.0066794753074646, -0.006442666053771973, -0.006205856800079346, -0.005969047546386719, -0.005732238292694092, -0.005495429039001465, -0.005258619785308838, -0.005021810531616211, -0.004785001277923584, -0.004548192024230957, -0.00431138277053833, -0.004074573516845703, -0.003837764263153076, -0.0036009550094604492, -0.0033641457557678223, -0.0031273365020751953, -0.0028905272483825684, -0.0026537179946899414, -0.0024169087409973145, -0.0021800994873046875, -0.0019432902336120605, -0.0017064809799194336, -0.0014696717262268066, -0.0012328624725341797, -0.0009960532188415527, -0.0007592439651489258, -0.0005224347114562988, -0.0002856254577636719, -4.881620407104492e-05, 0.00018799304962158203, 0.000424802303314209, 0.0006616115570068359, 0.0008984208106994629, 0.0011352300643920898, 0.0013720393180847168, 0.0016088485717773438, 0.0018456578254699707, 0.0020824670791625977, 0.0023192763328552246, 0.0025560855865478516, 0.0027928948402404785, 0.0030297040939331055, 0.0032665133476257324, 0.0035033226013183594, 0.0037401318550109863, 0.003976941108703613, 0.00421375036239624, 0.004450559616088867, 0.004687368869781494, 0.004924178123474121, 0.005160987377166748, 0.005397796630859375]}, "gradients/encoder.encoder.layers.2.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 3.0, 5.0, 6.0, 13.0, 14.0, 29.0, 50.0, 79.0, 101.0, 150.0, 240.0, 470.0, 927.0, 2627.0, 10008.0, 59898.0, 3487424.0, 585848.0, 35985.0, 6716.0, 1866.0, 720.0, 422.0, 234.0, 150.0, 99.0, 59.0, 49.0, 24.0, 29.0, 18.0, 14.0, 4.0, 2.0, 3.0, 2.0, 5.0, 3.0], "bins": [-0.04779052734375, -0.046675920486450195, -0.04556131362915039, -0.044446706771850586, -0.04333209991455078, -0.04221749305725098, -0.04110288619995117, -0.03998827934265137, -0.03887367248535156, -0.03775906562805176, -0.03664445877075195, -0.03552985191345215, -0.034415245056152344, -0.03330063819885254, -0.032186031341552734, -0.03107142448425293, -0.029956817626953125, -0.02884221076965332, -0.027727603912353516, -0.02661299705505371, -0.025498390197753906, -0.0243837833404541, -0.023269176483154297, -0.022154569625854492, -0.021039962768554688, -0.019925355911254883, -0.018810749053955078, -0.017696142196655273, -0.01658153533935547, -0.015466928482055664, -0.01435232162475586, -0.013237714767456055, -0.01212310791015625, -0.011008501052856445, -0.00989389419555664, -0.008779287338256836, -0.007664680480957031, -0.0065500736236572266, -0.005435466766357422, -0.004320859909057617, -0.0032062530517578125, -0.002091646194458008, -0.0009770393371582031, 0.00013756752014160156, 0.0012521743774414062, 0.002366781234741211, 0.0034813880920410156, 0.00459599494934082, 0.005710601806640625, 0.00682520866394043, 0.007939815521240234, 0.009054422378540039, 0.010169029235839844, 0.011283636093139648, 0.012398242950439453, 0.013512849807739258, 0.014627456665039062, 0.015742063522338867, 0.016856670379638672, 0.017971277236938477, 0.01908588409423828, 0.020200490951538086, 0.02131509780883789, 0.022429704666137695, 0.0235443115234375]}, "gradients/encoder.encoder.layers.2.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 2.0, 0.0, 1.0, 1.0, 7.0, 4.0, 7.0, 11.0, 6.0, 19.0, 22.0, 42.0, 46.0, 60.0, 119.0, 149.0, 288.0, 1108.0, 1519.0, 279.0, 139.0, 88.0, 53.0, 41.0, 20.0, 18.0, 19.0, 6.0, 4.0, 4.0, 2.0, 2.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.019744873046875, -0.018808364868164062, -0.017871856689453125, -0.016935348510742188, -0.01599884033203125, -0.015062332153320312, -0.014125823974609375, -0.013189315795898438, -0.0122528076171875, -0.011316299438476562, -0.010379791259765625, -0.009443283081054688, -0.00850677490234375, -0.0075702667236328125, -0.006633758544921875, -0.0056972503662109375, -0.0047607421875, -0.0038242340087890625, -0.002887725830078125, -0.0019512176513671875, -0.00101470947265625, -7.82012939453125e-05, 0.000858306884765625, 0.0017948150634765625, 0.0027313232421875, 0.0036678314208984375, 0.004604339599609375, 0.0055408477783203125, 0.00647735595703125, 0.0074138641357421875, 0.008350372314453125, 0.009286880493164062, 0.010223388671875, 0.011159896850585938, 0.012096405029296875, 0.013032913208007812, 0.01396942138671875, 0.014905929565429688, 0.015842437744140625, 0.016778945922851562, 0.0177154541015625, 0.018651962280273438, 0.019588470458984375, 0.020524978637695312, 0.02146148681640625, 0.022397994995117188, 0.023334503173828125, 0.024271011352539062, 0.02520751953125, 0.026144027709960938, 0.027080535888671875, 0.028017044067382812, 0.02895355224609375, 0.029890060424804688, 0.030826568603515625, 0.03176307678222656, 0.0326995849609375, 0.03363609313964844, 0.034572601318359375, 0.03550910949707031, 0.03644561767578125, 0.03738212585449219, 0.038318634033203125, 0.03925514221191406, 0.040191650390625]}, "gradients/encoder.encoder.layers.2.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 3.0, 2.0, 1.0, 2.0, 2.0, 6.0, 3.0, 8.0, 8.0, 13.0, 18.0, 14.0, 25.0, 45.0, 88.0, 150.0, 225.0, 150.0, 92.0, 50.0, 36.0, 24.0, 17.0, 10.0, 7.0, 5.0, 5.0, 3.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.2947234511375427, -0.2876898944377899, -0.2806563377380371, -0.2736227810382843, -0.2665892541408539, -0.2595556974411011, -0.25252214074134827, -0.24548858404159546, -0.23845502734184265, -0.23142147064208984, -0.22438792884349823, -0.21735437214374542, -0.21032081544399261, -0.203287273645401, -0.1962537169456482, -0.18922016024589539, -0.18218660354614258, -0.17515304684638977, -0.16811950504779816, -0.16108594834804535, -0.15405239164829254, -0.14701884984970093, -0.13998529314994812, -0.1329517364501953, -0.1259181946516037, -0.11888464540243149, -0.11185108870267868, -0.10481753945350647, -0.09778398275375366, -0.09075043350458145, -0.08371688425540924, -0.07668332755565643, -0.06964977085590363, -0.06261622160673141, -0.05558266490697861, -0.048549115657806396, -0.04151555895805359, -0.03448200970888138, -0.02744845673441887, -0.02041490375995636, -0.01338135078549385, -0.006347798276692629, 0.000685754232108593, 0.0077193062752485275, 0.014752859249711037, 0.021786410361528397, 0.028819963335990906, 0.035853516310453415, 0.042887069284915924, 0.04992062225937843, 0.05695417523384094, 0.06398772448301315, 0.07102128118276596, 0.07805483043193817, 0.08508838713169098, 0.09212193638086319, 0.0991554856300354, 0.10618903487920761, 0.11322259157896042, 0.12025614082813263, 0.12728969752788544, 0.13432323932647705, 0.14135679602622986, 0.14839035272598267, 0.15542390942573547]}, "gradients/encoder.encoder.layers.2.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 7.0, 7.0, 8.0, 10.0, 15.0, 28.0, 28.0, 33.0, 45.0, 43.0, 43.0, 48.0, 52.0, 57.0, 72.0, 55.0, 72.0, 62.0, 68.0, 57.0, 47.0, 37.0, 31.0, 22.0, 16.0, 14.0, 14.0, 10.0, 6.0, 3.0, 4.0, 1.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.12745237350463867, -0.12164591997861862, -0.11583946645259857, -0.11003301292657852, -0.10422655940055847, -0.09842010587453842, -0.09261365234851837, -0.08680719882249832, -0.08100074529647827, -0.07519429177045822, -0.06938783824443817, -0.06358138471841812, -0.05777493119239807, -0.05196847766637802, -0.04616202414035797, -0.04035557061433792, -0.03454911708831787, -0.02874266356229782, -0.02293621003627777, -0.01712975651025772, -0.011323302984237671, -0.005516849458217621, 0.0002896040678024292, 0.006096057593822479, 0.01190251111984253, 0.01770896464586258, 0.02351541817188263, 0.02932187169790268, 0.03512832522392273, 0.04093477874994278, 0.04674123227596283, 0.05254768580198288, 0.05835413932800293, 0.06416059285402298, 0.06996704638004303, 0.07577349990606308, 0.08157995343208313, 0.08738640695810318, 0.09319286048412323, 0.09899931401014328, 0.10480576753616333, 0.11061222106218338, 0.11641867458820343, 0.12222512811422348, 0.12803158164024353, 0.13383802771568298, 0.13964448869228363, 0.14545094966888428, 0.15125739574432373, 0.15706384181976318, 0.16287030279636383, 0.16867676377296448, 0.17448320984840393, 0.18028965592384338, 0.18609611690044403, 0.19190257787704468, 0.19770902395248413, 0.20351547002792358, 0.20932193100452423, 0.21512839198112488, 0.22093483805656433, 0.22674128413200378, 0.23254774510860443, 0.23835420608520508, 0.24416065216064453]}, "gradients/encoder.encoder.layers.2.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 2.0, 4.0, 1.0, 1.0, 3.0, 0.0, 2.0, 5.0, 2.0, 1.0, 13.0, 20.0, 26.0, 19.0, 31.0, 58.0, 95.0, 193.0, 393.0, 591.0, 1080.0, 2171.0, 4353.0, 9138.0, 22222.0, 60503.0, 230221.0, 542679.0, 111568.0, 36184.0, 14255.0, 6350.0, 2955.0, 1526.0, 826.0, 413.0, 220.0, 168.0, 110.0, 62.0, 38.0, 19.0, 12.0, 9.0, 5.0, 10.0, 5.0, 6.0, 1.0, 4.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0213470458984375, -0.02059149742126465, -0.019835948944091797, -0.019080400466918945, -0.018324851989746094, -0.017569303512573242, -0.01681375503540039, -0.01605820655822754, -0.015302658081054688, -0.014547109603881836, -0.013791561126708984, -0.013036012649536133, -0.012280464172363281, -0.01152491569519043, -0.010769367218017578, -0.010013818740844727, -0.009258270263671875, -0.008502721786499023, -0.007747173309326172, -0.00699162483215332, -0.006236076354980469, -0.005480527877807617, -0.004724979400634766, -0.003969430923461914, -0.0032138824462890625, -0.002458333969116211, -0.0017027854919433594, -0.0009472370147705078, -0.00019168853759765625, 0.0005638599395751953, 0.0013194084167480469, 0.0020749568939208984, 0.00283050537109375, 0.0035860538482666016, 0.004341602325439453, 0.005097150802612305, 0.005852699279785156, 0.006608247756958008, 0.007363796234130859, 0.008119344711303711, 0.008874893188476562, 0.009630441665649414, 0.010385990142822266, 0.011141538619995117, 0.011897087097167969, 0.01265263557434082, 0.013408184051513672, 0.014163732528686523, 0.014919281005859375, 0.015674829483032227, 0.016430377960205078, 0.01718592643737793, 0.01794147491455078, 0.018697023391723633, 0.019452571868896484, 0.020208120346069336, 0.020963668823242188, 0.02171921730041504, 0.02247476577758789, 0.023230314254760742, 0.023985862731933594, 0.024741411209106445, 0.025496959686279297, 0.02625250816345215, 0.027008056640625]}, "gradients/encoder.encoder.layers.2.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 4.0, 6.0, 7.0, 14.0, 10.0, 14.0, 17.0, 25.0, 36.0, 31.0, 42.0, 55.0, 66.0, 61.0, 58.0, 88.0, 86.0, 66.0, 71.0, 54.0, 43.0, 37.0, 30.0, 19.0, 19.0, 13.0, 7.0, 10.0, 7.0, 4.0, 4.0, 4.0, 6.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.01108551025390625, -0.010790646076202393, -0.010495781898498535, -0.010200917720794678, -0.00990605354309082, -0.009611189365386963, -0.009316325187683105, -0.009021461009979248, -0.00872659683227539, -0.008431732654571533, -0.008136868476867676, -0.007842004299163818, -0.007547140121459961, -0.0072522759437561035, -0.006957411766052246, -0.006662547588348389, -0.006367683410644531, -0.006072819232940674, -0.005777955055236816, -0.005483090877532959, -0.0051882266998291016, -0.004893362522125244, -0.004598498344421387, -0.004303634166717529, -0.004008769989013672, -0.0037139058113098145, -0.003419041633605957, -0.0031241774559020996, -0.002829313278198242, -0.0025344491004943848, -0.0022395849227905273, -0.00194472074508667, -0.0016498565673828125, -0.001354992389678955, -0.0010601282119750977, -0.0007652640342712402, -0.0004703998565673828, -0.0001755356788635254, 0.00011932849884033203, 0.00041419267654418945, 0.0007090568542480469, 0.0010039210319519043, 0.0012987852096557617, 0.0015936493873596191, 0.0018885135650634766, 0.002183377742767334, 0.0024782419204711914, 0.002773106098175049, 0.0030679702758789062, 0.0033628344535827637, 0.003657698631286621, 0.0039525628089904785, 0.004247426986694336, 0.004542291164398193, 0.004837155342102051, 0.005132019519805908, 0.005426883697509766, 0.005721747875213623, 0.0060166120529174805, 0.006311476230621338, 0.006606340408325195, 0.006901204586029053, 0.00719606876373291, 0.007490932941436768, 0.007785797119140625]}, "gradients/encoder.encoder.layers.2.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 1.0, 1.0, 1.0, 2.0, 5.0, 4.0, 6.0, 5.0, 15.0, 17.0, 31.0, 43.0, 47.0, 100.0, 132.0, 229.0, 313.0, 472.0, 877.0, 1559.0, 2823.0, 5536.0, 11562.0, 25252.0, 66575.0, 274916.0, 526088.0, 77063.0, 28633.0, 12629.0, 6333.0, 3145.0, 1635.0, 983.0, 553.0, 345.0, 210.0, 131.0, 87.0, 66.0, 46.0, 30.0, 22.0, 16.0, 13.0, 6.0, 3.0, 2.0, 1.0, 4.0, 2.0, 1.0], "bins": [-0.022705078125, -0.02210867404937744, -0.021512269973754883, -0.020915865898132324, -0.020319461822509766, -0.019723057746887207, -0.01912665367126465, -0.01853024959564209, -0.01793384552001953, -0.017337441444396973, -0.016741037368774414, -0.016144633293151855, -0.015548229217529297, -0.014951825141906738, -0.01435542106628418, -0.013759016990661621, -0.013162612915039062, -0.012566208839416504, -0.011969804763793945, -0.011373400688171387, -0.010776996612548828, -0.01018059253692627, -0.009584188461303711, -0.008987784385681152, -0.008391380310058594, -0.007794976234436035, -0.0071985721588134766, -0.006602168083190918, -0.006005764007568359, -0.005409359931945801, -0.004812955856323242, -0.004216551780700684, -0.003620147705078125, -0.0030237436294555664, -0.002427339553833008, -0.0018309354782104492, -0.0012345314025878906, -0.000638127326965332, -4.172325134277344e-05, 0.0005546808242797852, 0.0011510848999023438, 0.0017474889755249023, 0.002343893051147461, 0.0029402971267700195, 0.003536701202392578, 0.004133105278015137, 0.004729509353637695, 0.005325913429260254, 0.0059223175048828125, 0.006518721580505371, 0.00711512565612793, 0.007711529731750488, 0.008307933807373047, 0.008904337882995605, 0.009500741958618164, 0.010097146034240723, 0.010693550109863281, 0.01128995418548584, 0.011886358261108398, 0.012482762336730957, 0.013079166412353516, 0.013675570487976074, 0.014271974563598633, 0.014868378639221191, 0.01546478271484375]}, "gradients/encoder.encoder.layers.2.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0, 1.0, 2.0, 3.0, 4.0, 7.0, 5.0, 14.0, 13.0, 8.0, 12.0, 8.0, 15.0, 21.0, 28.0, 19.0, 36.0, 28.0, 52.0, 42.0, 59.0, 56.0, 68.0, 37.0, 55.0, 42.0, 35.0, 35.0, 55.0, 45.0, 35.0, 27.0, 28.0, 20.0, 13.0, 22.0, 12.0, 6.0, 7.0, 11.0, 7.0, 6.0, 5.0, 3.0, 1.0, 1.0, 3.0, 3.0, 2.0, 1.0], "bins": [-0.0335693359375, -0.03267979621887207, -0.03179025650024414, -0.03090071678161621, -0.03001117706298828, -0.02912163734436035, -0.028232097625732422, -0.027342557907104492, -0.026453018188476562, -0.025563478469848633, -0.024673938751220703, -0.023784399032592773, -0.022894859313964844, -0.022005319595336914, -0.021115779876708984, -0.020226240158081055, -0.019336700439453125, -0.018447160720825195, -0.017557621002197266, -0.016668081283569336, -0.015778541564941406, -0.014889001846313477, -0.013999462127685547, -0.013109922409057617, -0.012220382690429688, -0.011330842971801758, -0.010441303253173828, -0.009551763534545898, -0.008662223815917969, -0.007772684097290039, -0.006883144378662109, -0.00599360466003418, -0.00510406494140625, -0.00421452522277832, -0.0033249855041503906, -0.002435445785522461, -0.0015459060668945312, -0.0006563663482666016, 0.00023317337036132812, 0.0011227130889892578, 0.0020122528076171875, 0.002901792526245117, 0.003791332244873047, 0.0046808719635009766, 0.005570411682128906, 0.006459951400756836, 0.007349491119384766, 0.008239030838012695, 0.009128570556640625, 0.010018110275268555, 0.010907649993896484, 0.011797189712524414, 0.012686729431152344, 0.013576269149780273, 0.014465808868408203, 0.015355348587036133, 0.016244888305664062, 0.017134428024291992, 0.018023967742919922, 0.01891350746154785, 0.01980304718017578, 0.02069258689880371, 0.02158212661743164, 0.02247166633605957, 0.0233612060546875]}, "gradients/encoder.encoder.layers.2.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 2.0, 5.0, 7.0, 8.0, 15.0, 14.0, 22.0, 47.0, 57.0, 101.0, 143.0, 234.0, 389.0, 687.0, 1304.0, 2465.0, 5213.0, 12442.0, 38914.0, 301589.0, 616301.0, 43550.0, 13733.0, 5468.0, 2597.0, 1397.0, 731.0, 405.0, 282.0, 148.0, 92.0, 72.0, 53.0, 21.0, 18.0, 11.0, 10.0, 4.0, 6.0, 5.0, 3.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-0.006641387939453125, -0.00646209716796875, -0.006282806396484375, -0.006103515625, -0.005924224853515625, -0.00574493408203125, -0.005565643310546875, -0.0053863525390625, -0.005207061767578125, -0.00502777099609375, -0.004848480224609375, -0.004669189453125, -0.004489898681640625, -0.00431060791015625, -0.004131317138671875, -0.0039520263671875, -0.003772735595703125, -0.00359344482421875, -0.003414154052734375, -0.00323486328125, -0.003055572509765625, -0.00287628173828125, -0.002696990966796875, -0.0025177001953125, -0.002338409423828125, -0.00215911865234375, -0.001979827880859375, -0.001800537109375, -0.001621246337890625, -0.00144195556640625, -0.001262664794921875, -0.0010833740234375, -0.000904083251953125, -0.00072479248046875, -0.000545501708984375, -0.0003662109375, -0.000186920166015625, -7.62939453125e-06, 0.000171661376953125, 0.0003509521484375, 0.000530242919921875, 0.00070953369140625, 0.000888824462890625, 0.001068115234375, 0.001247406005859375, 0.00142669677734375, 0.001605987548828125, 0.0017852783203125, 0.001964569091796875, 0.00214385986328125, 0.002323150634765625, 0.00250244140625, 0.002681732177734375, 0.00286102294921875, 0.003040313720703125, 0.0032196044921875, 0.003398895263671875, 0.00357818603515625, 0.003757476806640625, 0.003936767578125, 0.004116058349609375, 0.00429534912109375, 0.004474639892578125, 0.0046539306640625, 0.004833221435546875]}, "gradients/encoder.encoder.layers.2.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 11.0, 13.0, 16.0, 15.0, 36.0, 33.0, 59.0, 72.0, 98.0, 100.0, 115.0, 88.0, 87.0, 77.0, 61.0, 45.0, 23.0, 20.0, 20.0, 7.0, 7.0, 3.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-7.927417755126953e-06, -7.65826553106308e-06, -7.3891133069992065e-06, -7.119961082935333e-06, -6.85080885887146e-06, -6.581656634807587e-06, -6.312504410743713e-06, -6.04335218667984e-06, -5.774199962615967e-06, -5.5050477385520935e-06, -5.23589551448822e-06, -4.966743290424347e-06, -4.697591066360474e-06, -4.4284388422966e-06, -4.159286618232727e-06, -3.890134394168854e-06, -3.6209821701049805e-06, -3.351829946041107e-06, -3.082677721977234e-06, -2.8135254979133606e-06, -2.5443732738494873e-06, -2.275221049785614e-06, -2.0060688257217407e-06, -1.7369166016578674e-06, -1.4677643775939941e-06, -1.1986121535301208e-06, -9.294599294662476e-07, -6.603077054023743e-07, -3.91155481338501e-07, -1.2200325727462769e-07, 1.471489667892456e-07, 4.163011908531189e-07, 6.854534149169922e-07, 9.546056389808655e-07, 1.2237578630447388e-06, 1.492910087108612e-06, 1.7620623111724854e-06, 2.0312145352363586e-06, 2.300366759300232e-06, 2.5695189833641052e-06, 2.8386712074279785e-06, 3.107823431491852e-06, 3.376975655555725e-06, 3.6461278796195984e-06, 3.915280103683472e-06, 4.184432327747345e-06, 4.453584551811218e-06, 4.7227367758750916e-06, 4.991888999938965e-06, 5.261041224002838e-06, 5.5301934480667114e-06, 5.799345672130585e-06, 6.068497896194458e-06, 6.337650120258331e-06, 6.606802344322205e-06, 6.875954568386078e-06, 7.145106792449951e-06, 7.4142590165138245e-06, 7.683411240577698e-06, 7.952563464641571e-06, 8.221715688705444e-06, 8.490867912769318e-06, 8.760020136833191e-06, 9.029172360897064e-06, 9.298324584960938e-06]}, "gradients/encoder.encoder.layers.2.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 2.0, 0.0, 2.0, 4.0, 2.0, 6.0, 4.0, 2.0, 8.0, 21.0, 17.0, 25.0, 31.0, 36.0, 82.0, 97.0, 133.0, 222.0, 411.0, 589.0, 1026.0, 1686.0, 3006.0, 5609.0, 11718.0, 27525.0, 83032.0, 693249.0, 149611.0, 38714.0, 15665.0, 7146.0, 3687.0, 1979.0, 1155.0, 693.0, 473.0, 273.0, 204.0, 138.0, 84.0, 67.0, 33.0, 28.0, 19.0, 17.0, 11.0, 7.0, 6.0, 4.0, 4.0, 2.0, 0.0, 3.0, 2.0, 0.0, 0.0, 2.0], "bins": [-0.00420379638671875, -0.004078984260559082, -0.003954172134399414, -0.003829360008239746, -0.003704547882080078, -0.00357973575592041, -0.003454923629760742, -0.0033301115036010742, -0.0032052993774414062, -0.0030804872512817383, -0.0029556751251220703, -0.0028308629989624023, -0.0027060508728027344, -0.0025812387466430664, -0.0024564266204833984, -0.0023316144943237305, -0.0022068023681640625, -0.0020819902420043945, -0.0019571781158447266, -0.0018323659896850586, -0.0017075538635253906, -0.0015827417373657227, -0.0014579296112060547, -0.0013331174850463867, -0.0012083053588867188, -0.0010834932327270508, -0.0009586811065673828, -0.0008338689804077148, -0.0007090568542480469, -0.0005842447280883789, -0.00045943260192871094, -0.00033462047576904297, -0.000209808349609375, -8.499622344970703e-05, 3.981590270996094e-05, 0.0001646280288696289, 0.0002894401550292969, 0.00041425228118896484, 0.0005390644073486328, 0.0006638765335083008, 0.0007886886596679688, 0.0009135007858276367, 0.0010383129119873047, 0.0011631250381469727, 0.0012879371643066406, 0.0014127492904663086, 0.0015375614166259766, 0.0016623735427856445, 0.0017871856689453125, 0.0019119977951049805, 0.0020368099212646484, 0.0021616220474243164, 0.0022864341735839844, 0.0024112462997436523, 0.0025360584259033203, 0.0026608705520629883, 0.0027856826782226562, 0.0029104948043823242, 0.003035306930541992, 0.00316011905670166, 0.003284931182861328, 0.003409743309020996, 0.003534555435180664, 0.003659367561340332, 0.0037841796875]}, "gradients/encoder.encoder.layers.2.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 1.0, 0.0, 1.0, 2.0, 2.0, 2.0, 1.0, 0.0, 4.0, 2.0, 4.0, 1.0, 6.0, 4.0, 6.0, 4.0, 16.0, 15.0, 18.0, 21.0, 29.0, 25.0, 38.0, 38.0, 61.0, 91.0, 117.0, 110.0, 73.0, 51.0, 50.0, 32.0, 34.0, 21.0, 21.0, 23.0, 15.0, 20.0, 9.0, 8.0, 8.0, 7.0, 5.0, 3.0, 7.0, 3.0, 2.0, 3.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0], "bins": [-0.005954742431640625, -0.005779385566711426, -0.0056040287017822266, -0.005428671836853027, -0.005253314971923828, -0.005077958106994629, -0.00490260124206543, -0.0047272443771362305, -0.004551887512207031, -0.004376530647277832, -0.004201173782348633, -0.004025816917419434, -0.0038504600524902344, -0.003675103187561035, -0.003499746322631836, -0.0033243894577026367, -0.0031490325927734375, -0.0029736757278442383, -0.002798318862915039, -0.00262296199798584, -0.0024476051330566406, -0.0022722482681274414, -0.002096891403198242, -0.001921534538269043, -0.0017461776733398438, -0.0015708208084106445, -0.0013954639434814453, -0.001220107078552246, -0.0010447502136230469, -0.0008693933486938477, -0.0006940364837646484, -0.0005186796188354492, -0.00034332275390625, -0.00016796588897705078, 7.3909759521484375e-06, 0.00018274784088134766, 0.0003581047058105469, 0.0005334615707397461, 0.0007088184356689453, 0.0008841753005981445, 0.0010595321655273438, 0.001234889030456543, 0.0014102458953857422, 0.0015856027603149414, 0.0017609596252441406, 0.0019363164901733398, 0.002111673355102539, 0.0022870302200317383, 0.0024623870849609375, 0.0026377439498901367, 0.002813100814819336, 0.002988457679748535, 0.0031638145446777344, 0.0033391714096069336, 0.003514528274536133, 0.003689885139465332, 0.0038652420043945312, 0.0040405988693237305, 0.00421595573425293, 0.004391312599182129, 0.004566669464111328, 0.004742026329040527, 0.0049173831939697266, 0.005092740058898926, 0.005268096923828125]}, "gradients/encoder.encoder.layers.2.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 3.0, 4.0, 11.0, 6.0, 14.0, 28.0, 52.0, 104.0, 289.0, 194.0, 97.0, 75.0, 40.0, 19.0, 23.0, 10.0, 10.0, 4.0, 9.0, 1.0, 6.0, 5.0, 2.0, 4.0, 2.0, 2.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.2167155146598816, -0.21127481758594513, -0.20583412051200867, -0.200393408536911, -0.19495271146297455, -0.18951201438903809, -0.18407130241394043, -0.17863060534000397, -0.1731899082660675, -0.16774921119213104, -0.16230851411819458, -0.15686780214309692, -0.15142710506916046, -0.145986407995224, -0.14054569602012634, -0.13510499894618988, -0.12966430187225342, -0.12422360479831696, -0.1187829002737999, -0.11334219574928284, -0.10790149867534637, -0.10246080160140991, -0.09702009707689285, -0.0915793925523758, -0.08613869547843933, -0.08069799840450287, -0.07525729387998581, -0.06981658935546875, -0.06437589228153229, -0.05893519148230553, -0.053494490683078766, -0.048053789883852005, -0.04261307418346405, -0.03717237338423729, -0.03173167258501053, -0.026290971785783768, -0.020850270986557007, -0.015409570187330246, -0.009968869388103485, -0.004528168588876724, 0.0009125322103500366, 0.0063532330095767975, 0.011793933808803558, 0.01723463460803032, 0.02267533540725708, 0.02811603620648384, 0.0335567370057106, 0.03899743780493736, 0.044438138604164124, 0.049878839403390884, 0.055319540202617645, 0.060760241001844406, 0.06620094180107117, 0.07164163887500763, 0.07708234339952469, 0.08252304792404175, 0.08796374499797821, 0.09340444207191467, 0.09884514659643173, 0.10428585112094879, 0.10972654819488525, 0.11516724526882172, 0.12060794979333878, 0.12604865431785583, 0.1314893513917923]}, "gradients/encoder.encoder.layers.2.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 1.0, 2.0, 1.0, 3.0, 3.0, 3.0, 8.0, 6.0, 7.0, 7.0, 7.0, 9.0, 13.0, 12.0, 16.0, 15.0, 16.0, 31.0, 34.0, 34.0, 37.0, 53.0, 66.0, 96.0, 101.0, 82.0, 43.0, 40.0, 36.0, 33.0, 33.0, 24.0, 27.0, 24.0, 18.0, 20.0, 11.0, 8.0, 7.0, 4.0, 8.0, 6.0, 4.0, 4.0, 0.0, 1.0, 0.0, 2.0, 0.0, 2.0, 1.0], "bins": [-0.1530930995941162, -0.14896145462989807, -0.14482980966567993, -0.14069817960262299, -0.13656653463840485, -0.1324348896741867, -0.12830324470996857, -0.12417159974575043, -0.12003996223211288, -0.11590831726789474, -0.1117766797542572, -0.10764503479003906, -0.10351338982582092, -0.09938175231218338, -0.09525010734796524, -0.0911184698343277, -0.08698682487010956, -0.08285517990589142, -0.07872354239225388, -0.07459189742803574, -0.0704602599143982, -0.06632861495018005, -0.062196969985961914, -0.05806532874703407, -0.05393368750810623, -0.04980204626917839, -0.04567040503025055, -0.04153876006603241, -0.03740711882710457, -0.03327547758817673, -0.029143834486603737, -0.025012191385030746, -0.020880550146102905, -0.016748908907175064, -0.012617265805602074, -0.008485623635351658, -0.004353981465101242, -0.00022234022617340088, 0.0039093028753995895, 0.00804094597697258, 0.012172587215900421, 0.016304228454828262, 0.020435871556401253, 0.024567514657974243, 0.028699155896902084, 0.032830797135829926, 0.036962442100048065, 0.041094083338975906, 0.04522572457790375, 0.04935736581683159, 0.05348900705575943, 0.05762065201997757, 0.06175229325890541, 0.06588393449783325, 0.07001557946205139, 0.07414722442626953, 0.07827886193990707, 0.08241050690412521, 0.08654214441776276, 0.0906737893819809, 0.09480543434619904, 0.09893707185983658, 0.10306871682405472, 0.10720035433769226, 0.1113319993019104]}, "gradients/encoder.encoder.layers.1.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 5.0, 1.0, 1.0, 4.0, 1.0, 3.0, 7.0, 12.0, 12.0, 10.0, 22.0, 18.0, 26.0, 35.0, 47.0, 57.0, 101.0, 139.0, 236.0, 351.0, 675.0, 1690.0, 15671.0, 949522.0, 3110162.0, 107553.0, 5195.0, 1356.0, 545.0, 290.0, 185.0, 124.0, 69.0, 46.0, 39.0, 22.0, 19.0, 14.0, 8.0, 9.0, 7.0, 1.0, 2.0, 2.0, 1.0, 1.0, 0.0, 1.0, 1.0], "bins": [-0.0165557861328125, -0.016133666038513184, -0.015711545944213867, -0.01528942584991455, -0.014867305755615234, -0.014445185661315918, -0.014023065567016602, -0.013600945472717285, -0.013178825378417969, -0.012756705284118652, -0.012334585189819336, -0.01191246509552002, -0.011490345001220703, -0.011068224906921387, -0.01064610481262207, -0.010223984718322754, -0.009801864624023438, -0.009379744529724121, -0.008957624435424805, -0.008535504341125488, -0.008113384246826172, -0.0076912641525268555, -0.007269144058227539, -0.006847023963928223, -0.006424903869628906, -0.00600278377532959, -0.0055806636810302734, -0.005158543586730957, -0.004736423492431641, -0.004314303398132324, -0.003892183303833008, -0.0034700632095336914, -0.003047943115234375, -0.0026258230209350586, -0.002203702926635742, -0.0017815828323364258, -0.0013594627380371094, -0.000937342643737793, -0.0005152225494384766, -9.310245513916016e-05, 0.00032901763916015625, 0.0007511377334594727, 0.001173257827758789, 0.0015953779220581055, 0.002017498016357422, 0.0024396181106567383, 0.0028617382049560547, 0.003283858299255371, 0.0037059783935546875, 0.004128098487854004, 0.00455021858215332, 0.004972338676452637, 0.005394458770751953, 0.0058165788650512695, 0.006238698959350586, 0.006660819053649902, 0.007082939147949219, 0.007505059242248535, 0.007927179336547852, 0.008349299430847168, 0.008771419525146484, 0.0091935396194458, 0.009615659713745117, 0.010037779808044434, 0.01045989990234375]}, "gradients/encoder.encoder.layers.1.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 4.0, 6.0, 5.0, 4.0, 10.0, 10.0, 12.0, 20.0, 17.0, 20.0, 18.0, 36.0, 37.0, 43.0, 31.0, 48.0, 45.0, 45.0, 59.0, 60.0, 55.0, 42.0, 67.0, 44.0, 48.0, 34.0, 37.0, 34.0, 24.0, 23.0, 16.0, 9.0, 8.0, 17.0, 4.0, 7.0, 4.0, 5.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-0.00839996337890625, -0.008180379867553711, -0.007960796356201172, -0.007741212844848633, -0.007521629333496094, -0.007302045822143555, -0.007082462310791016, -0.0068628787994384766, -0.0066432952880859375, -0.0064237117767333984, -0.006204128265380859, -0.00598454475402832, -0.005764961242675781, -0.005545377731323242, -0.005325794219970703, -0.005106210708618164, -0.004886627197265625, -0.004667043685913086, -0.004447460174560547, -0.004227876663208008, -0.004008293151855469, -0.0037887096405029297, -0.0035691261291503906, -0.0033495426177978516, -0.0031299591064453125, -0.0029103755950927734, -0.0026907920837402344, -0.0024712085723876953, -0.0022516250610351562, -0.002032041549682617, -0.0018124580383300781, -0.001592874526977539, -0.001373291015625, -0.001153707504272461, -0.0009341239929199219, -0.0007145404815673828, -0.0004949569702148438, -0.0002753734588623047, -5.5789947509765625e-05, 0.00016379356384277344, 0.0003833770751953125, 0.0006029605865478516, 0.0008225440979003906, 0.0010421276092529297, 0.0012617111206054688, 0.0014812946319580078, 0.0017008781433105469, 0.001920461654663086, 0.002140045166015625, 0.002359628677368164, 0.002579212188720703, 0.002798795700073242, 0.0030183792114257812, 0.0032379627227783203, 0.0034575462341308594, 0.0036771297454833984, 0.0038967132568359375, 0.0041162967681884766, 0.004335880279541016, 0.004555463790893555, 0.004775047302246094, 0.004994630813598633, 0.005214214324951172, 0.005433797836303711, 0.00565338134765625]}, "gradients/encoder.encoder.layers.1.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 2.0, 2.0, 3.0, 5.0, 5.0, 8.0, 14.0, 29.0, 54.0, 121.0, 244.0, 631.0, 1724.0, 8791.0, 1956783.0, 2213537.0, 9393.0, 1798.0, 651.0, 251.0, 113.0, 62.0, 34.0, 13.0, 7.0, 3.0, 6.0, 4.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0], "bins": [-0.0292205810546875, -0.028430938720703125, -0.02764129638671875, -0.026851654052734375, -0.02606201171875, -0.025272369384765625, -0.02448272705078125, -0.023693084716796875, -0.0229034423828125, -0.022113800048828125, -0.02132415771484375, -0.020534515380859375, -0.019744873046875, -0.018955230712890625, -0.01816558837890625, -0.017375946044921875, -0.0165863037109375, -0.015796661376953125, -0.01500701904296875, -0.014217376708984375, -0.013427734375, -0.012638092041015625, -0.01184844970703125, -0.011058807373046875, -0.0102691650390625, -0.009479522705078125, -0.00868988037109375, -0.007900238037109375, -0.007110595703125, -0.006320953369140625, -0.00553131103515625, -0.004741668701171875, -0.0039520263671875, -0.003162384033203125, -0.00237274169921875, -0.001583099365234375, -0.00079345703125, -3.814697265625e-06, 0.00078582763671875, 0.001575469970703125, 0.0023651123046875, 0.003154754638671875, 0.00394439697265625, 0.004734039306640625, 0.005523681640625, 0.006313323974609375, 0.00710296630859375, 0.007892608642578125, 0.0086822509765625, 0.009471893310546875, 0.01026153564453125, 0.011051177978515625, 0.0118408203125, 0.012630462646484375, 0.01342010498046875, 0.014209747314453125, 0.0149993896484375, 0.015789031982421875, 0.01657867431640625, 0.017368316650390625, 0.018157958984375, 0.018947601318359375, 0.01973724365234375, 0.020526885986328125, 0.0213165283203125]}, "gradients/encoder.encoder.layers.1.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 3.0, 3.0, 3.0, 3.0, 5.0, 12.0, 23.0, 47.0, 117.0, 425.0, 1082.0, 1265.0, 620.0, 246.0, 99.0, 47.0, 21.0, 16.0, 14.0, 11.0, 4.0, 6.0, 4.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0], "bins": [-0.0165863037109375, -0.016056060791015625, -0.01552581787109375, -0.014995574951171875, -0.01446533203125, -0.013935089111328125, -0.01340484619140625, -0.012874603271484375, -0.0123443603515625, -0.011814117431640625, -0.01128387451171875, -0.010753631591796875, -0.010223388671875, -0.009693145751953125, -0.00916290283203125, -0.008632659912109375, -0.0081024169921875, -0.007572174072265625, -0.00704193115234375, -0.006511688232421875, -0.0059814453125, -0.005451202392578125, -0.00492095947265625, -0.004390716552734375, -0.0038604736328125, -0.003330230712890625, -0.00279998779296875, -0.002269744873046875, -0.001739501953125, -0.001209259033203125, -0.00067901611328125, -0.000148773193359375, 0.0003814697265625, 0.000911712646484375, 0.00144195556640625, 0.001972198486328125, 0.00250244140625, 0.003032684326171875, 0.00356292724609375, 0.004093170166015625, 0.0046234130859375, 0.005153656005859375, 0.00568389892578125, 0.006214141845703125, 0.006744384765625, 0.007274627685546875, 0.00780487060546875, 0.008335113525390625, 0.0088653564453125, 0.009395599365234375, 0.00992584228515625, 0.010456085205078125, 0.010986328125, 0.011516571044921875, 0.01204681396484375, 0.012577056884765625, 0.0131072998046875, 0.013637542724609375, 0.01416778564453125, 0.014698028564453125, 0.015228271484375, 0.015758514404296875, 0.01628875732421875, 0.016819000244140625, 0.0173492431640625]}, "gradients/encoder.encoder.layers.1.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 6.0, 6.0, 5.0, 7.0, 24.0, 43.0, 64.0, 113.0, 181.0, 194.0, 131.0, 79.0, 65.0, 32.0, 14.0, 11.0, 8.0, 6.0, 6.0, 5.0, 2.0, 2.0, 1.0, 3.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.027537288144230843, -0.024989455938339233, -0.022441623732447624, -0.019893791526556015, -0.017345959320664406, -0.014798127114772797, -0.012250294908881187, -0.009702462702989578, -0.007154630497097969, -0.00460679829120636, -0.0020589660853147507, 0.0004888661205768585, 0.0030366983264684677, 0.005584530532360077, 0.008132362738251686, 0.010680194944143295, 0.013228027150034904, 0.015775859355926514, 0.018323691561818123, 0.020871523767709732, 0.02341935597360134, 0.02596718817949295, 0.02851502038538456, 0.03106285259127617, 0.03361068665981293, 0.03615851700305939, 0.038706351071596146, 0.041254185140132904, 0.043802015483379364, 0.046349845826625824, 0.04889767989516258, 0.05144551396369934, 0.0539933443069458, 0.05654117465019226, 0.05908900871872902, 0.06163684278726578, 0.06418467313051224, 0.0667325034737587, 0.06928034126758575, 0.07182817161083221, 0.07437600195407867, 0.07692383229732513, 0.0794716626405716, 0.08201950043439865, 0.08456733077764511, 0.08711516112089157, 0.08966299891471863, 0.09221082925796509, 0.09475865960121155, 0.09730648994445801, 0.09985432028770447, 0.10240215808153152, 0.10494998842477798, 0.10749781876802444, 0.1100456565618515, 0.11259348690509796, 0.11514131724834442, 0.11768914759159088, 0.12023697793483734, 0.1227848157286644, 0.12533265352249146, 0.12788048386573792, 0.13042831420898438, 0.13297614455223083, 0.1355239748954773]}, "gradients/encoder.encoder.layers.1.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 4.0, 4.0, 8.0, 2.0, 7.0, 9.0, 18.0, 16.0, 17.0, 21.0, 29.0, 27.0, 38.0, 24.0, 36.0, 45.0, 77.0, 55.0, 45.0, 55.0, 60.0, 63.0, 66.0, 43.0, 40.0, 35.0, 23.0, 36.0, 14.0, 16.0, 20.0, 11.0, 9.0, 10.0, 9.0, 5.0, 4.0, 2.0, 4.0, 3.0, 3.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.05511254072189331, -0.05345321074128151, -0.05179388076066971, -0.05013454705476761, -0.04847521707415581, -0.046815887093544006, -0.04515655338764191, -0.043497223407030106, -0.041837893426418304, -0.0401785634458065, -0.0385192334651947, -0.0368598997592926, -0.0352005697786808, -0.033541239798069, -0.0318819060921669, -0.0302225761115551, -0.0285632461309433, -0.026903916150331497, -0.025244584307074547, -0.023585252463817596, -0.021925922483205795, -0.020266592502593994, -0.018607260659337044, -0.016947928816080093, -0.015288598835468292, -0.013629267923533916, -0.01196993701159954, -0.010310606099665165, -0.00865127518773079, -0.006991944275796413, -0.005332613363862038, -0.003673282451927662, -0.002013951539993286, -0.00035462062805891037, 0.0013047102838754654, 0.002964041195809841, 0.004623372107744217, 0.006282703019678593, 0.007942033931612968, 0.009601364843547344, 0.01126069575548172, 0.012920026667416096, 0.014579357579350471, 0.016238689422607422, 0.017898019403219223, 0.019557349383831024, 0.021216681227087975, 0.022876013070344925, 0.024535343050956726, 0.026194673031568527, 0.027854004874825478, 0.029513336718082428, 0.03117266669869423, 0.03283199667930603, 0.03449133038520813, 0.03615066036581993, 0.03780999034643173, 0.03946932032704353, 0.041128650307655334, 0.042787984013557434, 0.044447313994169235, 0.046106643974781036, 0.047765977680683136, 0.04942530766129494, 0.05108463764190674]}, "gradients/encoder.encoder.layers.1.attention.out_proj.weight": {"_type": "histogram", "values": [4.0, 1.0, 3.0, 6.0, 3.0, 2.0, 9.0, 4.0, 16.0, 22.0, 25.0, 39.0, 50.0, 81.0, 108.0, 243.0, 361.0, 618.0, 1065.0, 1912.0, 3487.0, 7133.0, 15811.0, 41133.0, 142995.0, 560142.0, 187298.0, 50402.0, 18457.0, 8199.0, 3963.0, 2127.0, 1174.0, 666.0, 372.0, 229.0, 141.0, 79.0, 68.0, 32.0, 28.0, 15.0, 13.0, 14.0, 6.0, 2.0, 2.0, 7.0, 2.0, 0.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.01082611083984375, -0.010403037071228027, -0.009979963302612305, -0.009556889533996582, -0.00913381576538086, -0.008710741996765137, -0.008287668228149414, -0.007864594459533691, -0.007441520690917969, -0.007018446922302246, -0.0065953731536865234, -0.006172299385070801, -0.005749225616455078, -0.0053261518478393555, -0.004903078079223633, -0.00448000431060791, -0.0040569305419921875, -0.003633856773376465, -0.003210783004760742, -0.0027877092361450195, -0.002364635467529297, -0.0019415616989135742, -0.0015184879302978516, -0.001095414161682129, -0.0006723403930664062, -0.0002492666244506836, 0.00017380714416503906, 0.0005968809127807617, 0.0010199546813964844, 0.001443028450012207, 0.0018661022186279297, 0.0022891759872436523, 0.002712249755859375, 0.0031353235244750977, 0.0035583972930908203, 0.003981471061706543, 0.004404544830322266, 0.004827618598937988, 0.005250692367553711, 0.005673766136169434, 0.006096839904785156, 0.006519913673400879, 0.0069429874420166016, 0.007366061210632324, 0.007789134979248047, 0.00821220874786377, 0.008635282516479492, 0.009058356285095215, 0.009481430053710938, 0.00990450382232666, 0.010327577590942383, 0.010750651359558105, 0.011173725128173828, 0.01159679889678955, 0.012019872665405273, 0.012442946434020996, 0.012866020202636719, 0.013289093971252441, 0.013712167739868164, 0.014135241508483887, 0.01455831527709961, 0.014981389045715332, 0.015404462814331055, 0.015827536582946777, 0.0162506103515625]}, "gradients/encoder.encoder.layers.1.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 3.0, 2.0, 3.0, 0.0, 5.0, 4.0, 8.0, 5.0, 13.0, 13.0, 15.0, 8.0, 16.0, 22.0, 28.0, 35.0, 37.0, 32.0, 44.0, 42.0, 48.0, 77.0, 46.0, 57.0, 38.0, 53.0, 60.0, 47.0, 36.0, 33.0, 28.0, 29.0, 31.0, 14.0, 21.0, 9.0, 14.0, 7.0, 10.0, 6.0, 5.0, 3.0, 1.0, 1.0, 4.0, 2.0, 1.0, 2.0], "bins": [-0.00836181640625, -0.008147239685058594, -0.007932662963867188, -0.007718086242675781, -0.007503509521484375, -0.007288932800292969, -0.0070743560791015625, -0.006859779357910156, -0.00664520263671875, -0.006430625915527344, -0.0062160491943359375, -0.006001472473144531, -0.005786895751953125, -0.005572319030761719, -0.0053577423095703125, -0.005143165588378906, -0.0049285888671875, -0.004714012145996094, -0.0044994354248046875, -0.004284858703613281, -0.004070281982421875, -0.0038557052612304688, -0.0036411285400390625, -0.0034265518188476562, -0.00321197509765625, -0.0029973983764648438, -0.0027828216552734375, -0.0025682449340820312, -0.002353668212890625, -0.0021390914916992188, -0.0019245147705078125, -0.0017099380493164062, -0.001495361328125, -0.0012807846069335938, -0.0010662078857421875, -0.0008516311645507812, -0.000637054443359375, -0.00042247772216796875, -0.0002079010009765625, 6.67572021484375e-06, 0.00022125244140625, 0.00043582916259765625, 0.0006504058837890625, 0.0008649826049804688, 0.001079559326171875, 0.0012941360473632812, 0.0015087127685546875, 0.0017232894897460938, 0.0019378662109375, 0.0021524429321289062, 0.0023670196533203125, 0.0025815963745117188, 0.002796173095703125, 0.0030107498168945312, 0.0032253265380859375, 0.0034399032592773438, 0.00365447998046875, 0.0038690567016601562, 0.0040836334228515625, 0.004298210144042969, 0.004512786865234375, 0.004727363586425781, 0.0049419403076171875, 0.005156517028808594, 0.00537109375]}, "gradients/encoder.encoder.layers.1.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 0.0, 1.0, 5.0, 1.0, 2.0, 7.0, 2.0, 12.0, 16.0, 11.0, 21.0, 22.0, 26.0, 43.0, 57.0, 66.0, 89.0, 111.0, 214.0, 288.0, 510.0, 846.0, 1838.0, 4786.0, 16964.0, 109973.0, 831234.0, 62410.0, 11792.0, 3667.0, 1468.0, 706.0, 437.0, 268.0, 179.0, 138.0, 100.0, 55.0, 42.0, 43.0, 21.0, 17.0, 23.0, 14.0, 18.0, 4.0, 4.0, 6.0, 4.0, 1.0, 5.0, 3.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0213470458984375, -0.020620346069335938, -0.019893646240234375, -0.019166946411132812, -0.01844024658203125, -0.017713546752929688, -0.016986846923828125, -0.016260147094726562, -0.015533447265625, -0.014806747436523438, -0.014080047607421875, -0.013353347778320312, -0.01262664794921875, -0.011899948120117188, -0.011173248291015625, -0.010446548461914062, -0.0097198486328125, -0.008993148803710938, -0.008266448974609375, -0.0075397491455078125, -0.00681304931640625, -0.0060863494873046875, -0.005359649658203125, -0.0046329498291015625, -0.00390625, -0.0031795501708984375, -0.002452850341796875, -0.0017261505126953125, -0.00099945068359375, -0.0002727508544921875, 0.000453948974609375, 0.0011806488037109375, 0.0019073486328125, 0.0026340484619140625, 0.003360748291015625, 0.0040874481201171875, 0.00481414794921875, 0.0055408477783203125, 0.006267547607421875, 0.0069942474365234375, 0.007720947265625, 0.008447647094726562, 0.009174346923828125, 0.009901046752929688, 0.01062774658203125, 0.011354446411132812, 0.012081146240234375, 0.012807846069335938, 0.0135345458984375, 0.014261245727539062, 0.014987945556640625, 0.015714645385742188, 0.01644134521484375, 0.017168045043945312, 0.017894744873046875, 0.018621444702148438, 0.01934814453125, 0.020074844360351562, 0.020801544189453125, 0.021528244018554688, 0.02225494384765625, 0.022981643676757812, 0.023708343505859375, 0.024435043334960938, 0.0251617431640625]}, "gradients/encoder.encoder.layers.1.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 4.0, 3.0, 3.0, 7.0, 4.0, 6.0, 4.0, 7.0, 10.0, 7.0, 15.0, 9.0, 13.0, 27.0, 30.0, 33.0, 44.0, 40.0, 61.0, 49.0, 58.0, 71.0, 65.0, 62.0, 57.0, 57.0, 37.0, 44.0, 31.0, 25.0, 32.0, 17.0, 13.0, 13.0, 15.0, 5.0, 8.0, 1.0, 6.0, 5.0, 7.0, 1.0, 6.0, 0.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0277862548828125, -0.02688884735107422, -0.025991439819335938, -0.025094032287597656, -0.024196624755859375, -0.023299217224121094, -0.022401809692382812, -0.02150440216064453, -0.02060699462890625, -0.01970958709716797, -0.018812179565429688, -0.017914772033691406, -0.017017364501953125, -0.016119956970214844, -0.015222549438476562, -0.014325141906738281, -0.013427734375, -0.012530326843261719, -0.011632919311523438, -0.010735511779785156, -0.009838104248046875, -0.008940696716308594, -0.008043289184570312, -0.007145881652832031, -0.00624847412109375, -0.005351066589355469, -0.0044536590576171875, -0.0035562515258789062, -0.002658843994140625, -0.0017614364624023438, -0.0008640289306640625, 3.337860107421875e-05, 0.0009307861328125, 0.0018281936645507812, 0.0027256011962890625, 0.0036230087280273438, 0.004520416259765625, 0.005417823791503906, 0.0063152313232421875, 0.007212638854980469, 0.00811004638671875, 0.009007453918457031, 0.009904861450195312, 0.010802268981933594, 0.011699676513671875, 0.012597084045410156, 0.013494491577148438, 0.014391899108886719, 0.015289306640625, 0.01618671417236328, 0.017084121704101562, 0.017981529235839844, 0.018878936767578125, 0.019776344299316406, 0.020673751831054688, 0.02157115936279297, 0.02246856689453125, 0.02336597442626953, 0.024263381958007812, 0.025160789489746094, 0.026058197021484375, 0.026955604553222656, 0.027853012084960938, 0.02875041961669922, 0.0296478271484375]}, "gradients/encoder.encoder.layers.1.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 5.0, 6.0, 5.0, 6.0, 16.0, 15.0, 30.0, 28.0, 44.0, 76.0, 129.0, 228.0, 434.0, 922.0, 2631.0, 10050.0, 97971.0, 891277.0, 35532.0, 5895.0, 1779.0, 666.0, 335.0, 168.0, 106.0, 71.0, 43.0, 30.0, 16.0, 9.0, 12.0, 4.0, 9.0, 5.0, 2.0, 2.0, 2.0, 3.0, 1.0, 2.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.0038604736328125, -0.003728806972503662, -0.0035971403121948242, -0.0034654736518859863, -0.0033338069915771484, -0.0032021403312683105, -0.0030704736709594727, -0.0029388070106506348, -0.002807140350341797, -0.002675473690032959, -0.002543807029724121, -0.002412140369415283, -0.0022804737091064453, -0.0021488070487976074, -0.0020171403884887695, -0.0018854737281799316, -0.0017538070678710938, -0.0016221404075622559, -0.001490473747253418, -0.00135880708694458, -0.0012271404266357422, -0.0010954737663269043, -0.0009638071060180664, -0.0008321404457092285, -0.0007004737854003906, -0.0005688071250915527, -0.00043714046478271484, -0.00030547380447387695, -0.00017380714416503906, -4.214048385620117e-05, 8.952617645263672e-05, 0.0002211928367614746, 0.0003528594970703125, 0.0004845261573791504, 0.0006161928176879883, 0.0007478594779968262, 0.0008795261383056641, 0.001011192798614502, 0.0011428594589233398, 0.0012745261192321777, 0.0014061927795410156, 0.0015378594398498535, 0.0016695261001586914, 0.0018011927604675293, 0.0019328594207763672, 0.002064526081085205, 0.002196192741394043, 0.002327859401702881, 0.0024595260620117188, 0.0025911927223205566, 0.0027228593826293945, 0.0028545260429382324, 0.0029861927032470703, 0.003117859363555908, 0.003249526023864746, 0.003381192684173584, 0.003512859344482422, 0.0036445260047912598, 0.0037761926651000977, 0.0039078593254089355, 0.0040395259857177734, 0.004171192646026611, 0.004302859306335449, 0.004434525966644287, 0.004566192626953125]}, "gradients/encoder.encoder.layers.1.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 3.0, 7.0, 7.0, 5.0, 5.0, 10.0, 18.0, 31.0, 39.0, 56.0, 59.0, 78.0, 114.0, 124.0, 111.0, 85.0, 71.0, 54.0, 43.0, 30.0, 11.0, 11.0, 10.0, 6.0, 4.0, 6.0, 7.0, 2.0, 2.0, 3.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-1.0609626770019531e-05, -1.0263174772262573e-05, -9.916722774505615e-06, -9.570270776748657e-06, -9.2238187789917e-06, -8.877366781234741e-06, -8.530914783477783e-06, -8.184462785720825e-06, -7.838010787963867e-06, -7.491558790206909e-06, -7.145106792449951e-06, -6.798654794692993e-06, -6.452202796936035e-06, -6.105750799179077e-06, -5.759298801422119e-06, -5.412846803665161e-06, -5.066394805908203e-06, -4.719942808151245e-06, -4.373490810394287e-06, -4.027038812637329e-06, -3.680586814880371e-06, -3.334134817123413e-06, -2.987682819366455e-06, -2.641230821609497e-06, -2.294778823852539e-06, -1.948326826095581e-06, -1.601874828338623e-06, -1.255422830581665e-06, -9.08970832824707e-07, -5.62518835067749e-07, -2.1606683731079102e-07, 1.30385160446167e-07, 4.76837158203125e-07, 8.23289155960083e-07, 1.169741153717041e-06, 1.516193151473999e-06, 1.862645149230957e-06, 2.209097146987915e-06, 2.555549144744873e-06, 2.902001142501831e-06, 3.248453140258789e-06, 3.594905138015747e-06, 3.941357135772705e-06, 4.287809133529663e-06, 4.634261131286621e-06, 4.980713129043579e-06, 5.327165126800537e-06, 5.673617124557495e-06, 6.020069122314453e-06, 6.366521120071411e-06, 6.712973117828369e-06, 7.059425115585327e-06, 7.405877113342285e-06, 7.752329111099243e-06, 8.098781108856201e-06, 8.44523310661316e-06, 8.791685104370117e-06, 9.138137102127075e-06, 9.484589099884033e-06, 9.831041097640991e-06, 1.017749309539795e-05, 1.0523945093154907e-05, 1.0870397090911865e-05, 1.1216849088668823e-05, 1.1563301086425781e-05]}, "gradients/encoder.encoder.layers.1.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 2.0, 1.0, 2.0, 3.0, 3.0, 3.0, 4.0, 6.0, 11.0, 14.0, 17.0, 28.0, 37.0, 51.0, 74.0, 124.0, 223.0, 447.0, 1030.0, 2806.0, 10114.0, 99083.0, 888324.0, 36391.0, 6136.0, 1950.0, 806.0, 355.0, 172.0, 113.0, 74.0, 42.0, 28.0, 23.0, 17.0, 16.0, 7.0, 9.0, 4.0, 3.0, 3.0, 2.0, 5.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.004917144775390625, -0.004781782627105713, -0.004646420478820801, -0.004511058330535889, -0.0043756961822509766, -0.0042403340339660645, -0.004104971885681152, -0.00396960973739624, -0.003834247589111328, -0.003698885440826416, -0.003563523292541504, -0.003428161144256592, -0.0032927989959716797, -0.0031574368476867676, -0.0030220746994018555, -0.0028867125511169434, -0.0027513504028320312, -0.002615988254547119, -0.002480626106262207, -0.002345263957977295, -0.002209901809692383, -0.0020745396614074707, -0.0019391775131225586, -0.0018038153648376465, -0.0016684532165527344, -0.0015330910682678223, -0.0013977289199829102, -0.001262366771697998, -0.001127004623413086, -0.0009916424751281738, -0.0008562803268432617, -0.0007209181785583496, -0.0005855560302734375, -0.0004501938819885254, -0.0003148317337036133, -0.00017946958541870117, -4.410743713378906e-05, 9.125471115112305e-05, 0.00022661685943603516, 0.00036197900772094727, 0.0004973411560058594, 0.0006327033042907715, 0.0007680654525756836, 0.0009034276008605957, 0.0010387897491455078, 0.00117415189743042, 0.001309514045715332, 0.0014448761940002441, 0.0015802383422851562, 0.0017156004905700684, 0.0018509626388549805, 0.0019863247871398926, 0.0021216869354248047, 0.002257049083709717, 0.002392411231994629, 0.002527773380279541, 0.002663135528564453, 0.0027984976768493652, 0.0029338598251342773, 0.0030692219734191895, 0.0032045841217041016, 0.0033399462699890137, 0.0034753084182739258, 0.003610670566558838, 0.00374603271484375]}, "gradients/encoder.encoder.layers.1.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 2.0, 4.0, 2.0, 4.0, 0.0, 5.0, 8.0, 6.0, 8.0, 6.0, 18.0, 19.0, 50.0, 57.0, 84.0, 111.0, 161.0, 147.0, 88.0, 69.0, 47.0, 36.0, 30.0, 14.0, 13.0, 4.0, 5.0, 3.0, 2.0, 1.0, 1.0, 0.0, 4.0, 1.0, 2.0, 0.0, 2.0, 1.0, 1.0], "bins": [-0.00667572021484375, -0.0065155029296875, -0.00635528564453125, -0.006195068359375, -0.00603485107421875, -0.0058746337890625, -0.00571441650390625, -0.00555419921875, -0.00539398193359375, -0.0052337646484375, -0.00507354736328125, -0.004913330078125, -0.00475311279296875, -0.0045928955078125, -0.00443267822265625, -0.0042724609375, -0.00411224365234375, -0.0039520263671875, -0.00379180908203125, -0.003631591796875, -0.00347137451171875, -0.0033111572265625, -0.00315093994140625, -0.00299072265625, -0.00283050537109375, -0.0026702880859375, -0.00251007080078125, -0.002349853515625, -0.00218963623046875, -0.0020294189453125, -0.00186920166015625, -0.001708984375, -0.00154876708984375, -0.0013885498046875, -0.00122833251953125, -0.001068115234375, -0.00090789794921875, -0.0007476806640625, -0.00058746337890625, -0.00042724609375, -0.00026702880859375, -0.0001068115234375, 5.340576171875e-05, 0.000213623046875, 0.00037384033203125, 0.0005340576171875, 0.00069427490234375, 0.0008544921875, 0.00101470947265625, 0.0011749267578125, 0.00133514404296875, 0.001495361328125, 0.00165557861328125, 0.0018157958984375, 0.00197601318359375, 0.00213623046875, 0.00229644775390625, 0.0024566650390625, 0.00261688232421875, 0.002777099609375, 0.00293731689453125, 0.0030975341796875, 0.00325775146484375, 0.00341796875, 0.00357818603515625]}, "gradients/encoder.encoder.layers.1.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 0.0, 8.0, 18.0, 50.0, 110.0, 535.0, 168.0, 56.0, 26.0, 18.0, 7.0, 5.0, 5.0, 0.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.18286588788032532, -0.17570938169956207, -0.16855286061763763, -0.1613963544368744, -0.15423984825611115, -0.1470833271741867, -0.13992682099342346, -0.13277029991149902, -0.12561379373073578, -0.11845728009939194, -0.11130077391862869, -0.10414426028728485, -0.09698774665594101, -0.08983123302459717, -0.08267472684383392, -0.07551821321249008, -0.06836170703172684, -0.061205197125673294, -0.05404868349432945, -0.04689217358827591, -0.03973565995693207, -0.032579150050878525, -0.02542264014482498, -0.01826612651348114, -0.011109616607427597, -0.003953105304390192, 0.003203405998647213, 0.01035991683602333, 0.017516428604722023, 0.024672940373420715, 0.03182945027947426, 0.0389859639108181, 0.04614247381687164, 0.053298983722925186, 0.06045549735426903, 0.06761200726032257, 0.07476852089166641, 0.08192503452301025, 0.0890815407037735, 0.09623805433511734, 0.10339456796646118, 0.11055108159780502, 0.11770758777856827, 0.12486410140991211, 0.13202060759067535, 0.1391771286725998, 0.14633363485336304, 0.15349015593528748, 0.16064664721488953, 0.16780315339565277, 0.1749596744775772, 0.18211618065834045, 0.1892726868391037, 0.19642920792102814, 0.20358571410179138, 0.21074223518371582, 0.21789874136447906, 0.2250552475452423, 0.23221176862716675, 0.23936827480793, 0.24652478098869324, 0.2536813020706177, 0.2608377933502197, 0.26799431443214417, 0.2751508355140686]}, "gradients/encoder.encoder.layers.1.layer_norm.bias": {"_type": "histogram", "values": [2.0, 2.0, 1.0, 0.0, 4.0, 4.0, 1.0, 3.0, 5.0, 6.0, 9.0, 8.0, 16.0, 17.0, 17.0, 21.0, 17.0, 29.0, 31.0, 32.0, 28.0, 48.0, 64.0, 139.0, 135.0, 67.0, 43.0, 29.0, 34.0, 41.0, 37.0, 22.0, 16.0, 18.0, 14.0, 9.0, 14.0, 4.0, 10.0, 7.0, 6.0, 3.0, 2.0, 2.0, 3.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.10062956809997559, -0.09644424915313721, -0.09225893020629883, -0.08807360380887985, -0.08388828486204147, -0.0797029659152031, -0.07551763951778412, -0.07133232057094574, -0.06714700162410736, -0.06296168267726898, -0.058776360005140305, -0.05459103733301163, -0.05040571838617325, -0.04622039943933487, -0.04203507676720619, -0.037849754095077515, -0.033664435148239136, -0.029479114338755608, -0.02529379352927208, -0.02110847271978855, -0.016923151910305023, -0.012737831100821495, -0.008552510291337967, -0.004367189481854439, -0.00018186867237091064, 0.0040034521371126175, 0.008188772946596146, 0.012374093756079674, 0.016559414565563202, 0.02074473537504673, 0.024930056184530258, 0.029115376994013786, 0.033300697803497314, 0.03748601675033569, 0.04167133942246437, 0.04585666209459305, 0.05004198104143143, 0.054227299988269806, 0.05841262266039848, 0.06259794533252716, 0.06678326427936554, 0.07096858322620392, 0.0751539021730423, 0.07933922857046127, 0.08352454751729965, 0.08770986646413803, 0.091895192861557, 0.09608051180839539, 0.10026583075523376, 0.10445114970207214, 0.10863646864891052, 0.1128217950463295, 0.11700711399316788, 0.12119243294000626, 0.12537775933742523, 0.1295630782842636, 0.133748397231102, 0.13793371617794037, 0.14211903512477875, 0.14630435407161713, 0.1504896879196167, 0.15467500686645508, 0.15886032581329346, 0.16304564476013184, 0.16723096370697021]}, "gradients/encoder.encoder.layers.0.feed_forward.output_dense.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 3.0, 1.0, 2.0, 2.0, 4.0, 4.0, 10.0, 6.0, 7.0, 23.0, 27.0, 38.0, 46.0, 53.0, 81.0, 84.0, 146.0, 207.0, 257.0, 466.0, 686.0, 1222.0, 2895.0, 13627.0, 80651.0, 514790.0, 1881935.0, 1344911.0, 284094.0, 52130.0, 9792.0, 2541.0, 1372.0, 770.0, 469.0, 292.0, 202.0, 128.0, 91.0, 60.0, 52.0, 31.0, 24.0, 18.0, 13.0, 11.0, 7.0, 8.0, 2.0, 1.0, 2.0, 1.0, 1.0], "bins": [-0.0074005126953125, -0.007204949855804443, -0.007009387016296387, -0.00681382417678833, -0.0066182613372802734, -0.006422698497772217, -0.00622713565826416, -0.0060315728187561035, -0.005836009979248047, -0.00564044713973999, -0.005444884300231934, -0.005249321460723877, -0.00505375862121582, -0.004858195781707764, -0.004662632942199707, -0.00446707010269165, -0.004271507263183594, -0.004075944423675537, -0.0038803815841674805, -0.003684818744659424, -0.003489255905151367, -0.0032936930656433105, -0.003098130226135254, -0.0029025673866271973, -0.0027070045471191406, -0.002511441707611084, -0.0023158788681030273, -0.0021203160285949707, -0.001924753189086914, -0.0017291903495788574, -0.0015336275100708008, -0.0013380646705627441, -0.0011425018310546875, -0.0009469389915466309, -0.0007513761520385742, -0.0005558133125305176, -0.00036025047302246094, -0.0001646876335144043, 3.0875205993652344e-05, 0.00022643804550170898, 0.0004220008850097656, 0.0006175637245178223, 0.0008131265640258789, 0.0010086894035339355, 0.0012042522430419922, 0.0013998150825500488, 0.0015953779220581055, 0.0017909407615661621, 0.0019865036010742188, 0.0021820664405822754, 0.002377629280090332, 0.0025731921195983887, 0.0027687549591064453, 0.002964317798614502, 0.0031598806381225586, 0.0033554434776306152, 0.003551006317138672, 0.0037465691566467285, 0.003942131996154785, 0.004137694835662842, 0.0043332576751708984, 0.004528820514678955, 0.004724383354187012, 0.004919946193695068, 0.005115509033203125]}, "gradients/encoder.encoder.layers.0.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 2.0, 2.0, 3.0, 2.0, 0.0, 5.0, 6.0, 4.0, 10.0, 5.0, 7.0, 11.0, 13.0, 16.0, 21.0, 27.0, 28.0, 33.0, 38.0, 33.0, 49.0, 47.0, 53.0, 44.0, 59.0, 53.0, 48.0, 47.0, 35.0, 47.0, 38.0, 37.0, 26.0, 27.0, 32.0, 31.0, 19.0, 10.0, 11.0, 6.0, 7.0, 8.0, 5.0, 2.0, 6.0, 1.0, 1.0, 0.0, 2.0, 0.0, 1.0, 2.0], "bins": [-0.007171630859375, -0.006976902484893799, -0.006782174110412598, -0.0065874457359313965, -0.006392717361450195, -0.006197988986968994, -0.006003260612487793, -0.005808532238006592, -0.005613803863525391, -0.0054190754890441895, -0.005224347114562988, -0.005029618740081787, -0.004834890365600586, -0.004640161991119385, -0.004445433616638184, -0.004250705242156982, -0.004055976867675781, -0.00386124849319458, -0.003666520118713379, -0.0034717917442321777, -0.0032770633697509766, -0.0030823349952697754, -0.0028876066207885742, -0.002692878246307373, -0.002498149871826172, -0.0023034214973449707, -0.0021086931228637695, -0.0019139647483825684, -0.0017192363739013672, -0.001524507999420166, -0.0013297796249389648, -0.0011350512504577637, -0.0009403228759765625, -0.0007455945014953613, -0.0005508661270141602, -0.000356137752532959, -0.0001614093780517578, 3.331899642944336e-05, 0.00022804737091064453, 0.0004227757453918457, 0.0006175041198730469, 0.000812232494354248, 0.0010069608688354492, 0.0012016892433166504, 0.0013964176177978516, 0.0015911459922790527, 0.001785874366760254, 0.001980602741241455, 0.0021753311157226562, 0.0023700594902038574, 0.0025647878646850586, 0.0027595162391662598, 0.002954244613647461, 0.003148972988128662, 0.0033437013626098633, 0.0035384297370910645, 0.0037331581115722656, 0.003927886486053467, 0.004122614860534668, 0.004317343235015869, 0.00451207160949707, 0.0047067999839782715, 0.004901528358459473, 0.005096256732940674, 0.005290985107421875]}, "gradients/encoder.encoder.layers.0.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 3.0, 2.0, 4.0, 10.0, 14.0, 31.0, 47.0, 118.0, 278.0, 670.0, 2669.0, 46313.0, 4121568.0, 19607.0, 1905.0, 563.0, 213.0, 122.0, 57.0, 35.0, 18.0, 16.0, 7.0, 11.0, 3.0, 3.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.031494140625, -0.0305788516998291, -0.029663562774658203, -0.028748273849487305, -0.027832984924316406, -0.026917695999145508, -0.02600240707397461, -0.02508711814880371, -0.024171829223632812, -0.023256540298461914, -0.022341251373291016, -0.021425962448120117, -0.02051067352294922, -0.01959538459777832, -0.018680095672607422, -0.017764806747436523, -0.016849517822265625, -0.015934228897094727, -0.015018939971923828, -0.01410365104675293, -0.013188362121582031, -0.012273073196411133, -0.011357784271240234, -0.010442495346069336, -0.009527206420898438, -0.008611917495727539, -0.007696628570556641, -0.006781339645385742, -0.005866050720214844, -0.004950761795043945, -0.004035472869873047, -0.0031201839447021484, -0.00220489501953125, -0.0012896060943603516, -0.0003743171691894531, 0.0005409717559814453, 0.0014562606811523438, 0.002371549606323242, 0.0032868385314941406, 0.004202127456665039, 0.0051174163818359375, 0.006032705307006836, 0.006947994232177734, 0.007863283157348633, 0.008778572082519531, 0.00969386100769043, 0.010609149932861328, 0.011524438858032227, 0.012439727783203125, 0.013355016708374023, 0.014270305633544922, 0.01518559455871582, 0.01610088348388672, 0.017016172409057617, 0.017931461334228516, 0.018846750259399414, 0.019762039184570312, 0.02067732810974121, 0.02159261703491211, 0.022507905960083008, 0.023423194885253906, 0.024338483810424805, 0.025253772735595703, 0.0261690616607666, 0.0270843505859375]}, "gradients/encoder.encoder.layers.0.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 4.0, 1.0, 4.0, 7.0, 12.0, 18.0, 29.0, 34.0, 61.0, 100.0, 194.0, 403.0, 993.0, 1107.0, 599.0, 251.0, 120.0, 64.0, 30.0, 19.0, 10.0, 9.0, 7.0, 5.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0120849609375, -0.01148843765258789, -0.010891914367675781, -0.010295391082763672, -0.009698867797851562, -0.009102344512939453, -0.008505821228027344, -0.007909297943115234, -0.007312774658203125, -0.006716251373291016, -0.006119728088378906, -0.005523204803466797, -0.0049266815185546875, -0.004330158233642578, -0.0037336349487304688, -0.0031371116638183594, -0.00254058837890625, -0.0019440650939941406, -0.0013475418090820312, -0.0007510185241699219, -0.0001544952392578125, 0.0004420280456542969, 0.0010385513305664062, 0.0016350746154785156, 0.002231597900390625, 0.0028281211853027344, 0.0034246444702148438, 0.004021167755126953, 0.0046176910400390625, 0.005214214324951172, 0.005810737609863281, 0.006407260894775391, 0.0070037841796875, 0.007600307464599609, 0.008196830749511719, 0.008793354034423828, 0.009389877319335938, 0.009986400604248047, 0.010582923889160156, 0.011179447174072266, 0.011775970458984375, 0.012372493743896484, 0.012969017028808594, 0.013565540313720703, 0.014162063598632812, 0.014758586883544922, 0.015355110168457031, 0.01595163345336914, 0.01654815673828125, 0.01714468002319336, 0.01774120330810547, 0.018337726593017578, 0.018934249877929688, 0.019530773162841797, 0.020127296447753906, 0.020723819732666016, 0.021320343017578125, 0.021916866302490234, 0.022513389587402344, 0.023109912872314453, 0.023706436157226562, 0.024302959442138672, 0.02489948272705078, 0.02549600601196289, 0.026092529296875]}, "gradients/encoder.encoder.layers.0.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 2.0, 1.0, 0.0, 5.0, 2.0, 4.0, 3.0, 5.0, 6.0, 20.0, 39.0, 67.0, 120.0, 222.0, 238.0, 136.0, 68.0, 27.0, 19.0, 11.0, 4.0, 2.0, 8.0, 2.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.12647980451583862, -0.12232302129268646, -0.1181662380695343, -0.11400945484638214, -0.10985267162322998, -0.10569588840007782, -0.10153909772634506, -0.0973823145031929, -0.09322553128004074, -0.08906874805688858, -0.08491196483373642, -0.08075518161058426, -0.0765983909368515, -0.07244160771369934, -0.06828482449054718, -0.06412804126739502, -0.05997125804424286, -0.0558144748210907, -0.05165769159793854, -0.04750090464949608, -0.04334412142634392, -0.03918733820319176, -0.0350305512547493, -0.030873768031597137, -0.026716984808444977, -0.022560201585292816, -0.018403416499495506, -0.014246632345020771, -0.010089848190546036, -0.005933064967393875, -0.0017762798815965652, 0.0023805052042007446, 0.006537273526191711, 0.010694057680666447, 0.014850841835141182, 0.019007626920938492, 0.023164410144090652, 0.027321193367242813, 0.03147798031568527, 0.03563476353883743, 0.039791546761989594, 0.043948329985141754, 0.048105113208293915, 0.052261900156736374, 0.056418683379888535, 0.060575466603040695, 0.06473225355148315, 0.06888903677463531, 0.07304581999778748, 0.07720260322093964, 0.0813593864440918, 0.08551616966724396, 0.08967295289039612, 0.09382973611354828, 0.09798652678728104, 0.1021433100104332, 0.10630009323358536, 0.11045687645673752, 0.11461365967988968, 0.11877044290304184, 0.1229272335767746, 0.12708401679992676, 0.13124080002307892, 0.13539758324623108, 0.13955436646938324]}, "gradients/encoder.encoder.layers.0.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 4.0, 2.0, 4.0, 9.0, 10.0, 11.0, 14.0, 15.0, 23.0, 18.0, 25.0, 42.0, 34.0, 38.0, 41.0, 44.0, 50.0, 71.0, 80.0, 58.0, 62.0, 51.0, 34.0, 43.0, 41.0, 45.0, 24.0, 25.0, 17.0, 24.0, 10.0, 8.0, 10.0, 7.0, 4.0, 2.0, 3.0, 2.0, 1.0, 2.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.07072579860687256, -0.06849884241819382, -0.06627188622951508, -0.06404492259025574, -0.061817966401576996, -0.059591010212898254, -0.057364050298929214, -0.055137090384960175, -0.05291013419628143, -0.05068317800760269, -0.04845621809363365, -0.04622925817966461, -0.04400230199098587, -0.04177534580230713, -0.03954838588833809, -0.03732142597436905, -0.03509446978569031, -0.032867513597011566, -0.030640553683042526, -0.028413595631718636, -0.026186637580394745, -0.023959679529070854, -0.021732721477746964, -0.019505763426423073, -0.017278805375099182, -0.015051847323775291, -0.0128248892724514, -0.01059793122112751, -0.00837097316980362, -0.006144015118479729, -0.003917057067155838, -0.0016900990158319473, 0.0005368590354919434, 0.002763817086815834, 0.004990775138139725, 0.007217733189463615, 0.009444691240787506, 0.011671649292111397, 0.013898607343435287, 0.016125565394759178, 0.01835252344608307, 0.02057948149740696, 0.02280643954873085, 0.02503339760005474, 0.02726035565137863, 0.029487313702702522, 0.03171427175402641, 0.03394123166799545, 0.036168187856674194, 0.038395144045352936, 0.040622103959321976, 0.042849063873291016, 0.04507602006196976, 0.0473029762506485, 0.04952993616461754, 0.05175689607858658, 0.05398385226726532, 0.05621080845594406, 0.0584377683699131, 0.06066472828388214, 0.06289168447256088, 0.06511864066123962, 0.06734560430049896, 0.0695725604891777, 0.07179951667785645]}, "gradients/encoder.encoder.layers.0.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 3.0, 1.0, 3.0, 2.0, 6.0, 4.0, 4.0, 9.0, 16.0, 17.0, 23.0, 44.0, 74.0, 86.0, 127.0, 192.0, 319.0, 524.0, 841.0, 1498.0, 2751.0, 5316.0, 11827.0, 30395.0, 101223.0, 619051.0, 195815.0, 46289.0, 16701.0, 7284.0, 3505.0, 1872.0, 1072.0, 551.0, 384.0, 263.0, 154.0, 92.0, 67.0, 47.0, 35.0, 28.0, 16.0, 12.0, 7.0, 6.0, 11.0, 1.0, 3.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.0091400146484375, -0.00885164737701416, -0.00856328010559082, -0.00827491283416748, -0.00798654556274414, -0.007698178291320801, -0.007409811019897461, -0.007121443748474121, -0.006833076477050781, -0.006544709205627441, -0.0062563419342041016, -0.005967974662780762, -0.005679607391357422, -0.005391240119934082, -0.005102872848510742, -0.004814505577087402, -0.0045261383056640625, -0.004237771034240723, -0.003949403762817383, -0.003661036491394043, -0.003372669219970703, -0.0030843019485473633, -0.0027959346771240234, -0.0025075674057006836, -0.0022192001342773438, -0.001930832862854004, -0.001642465591430664, -0.0013540983200073242, -0.0010657310485839844, -0.0007773637771606445, -0.0004889965057373047, -0.00020062923431396484, 8.7738037109375e-05, 0.00037610530853271484, 0.0006644725799560547, 0.0009528398513793945, 0.0012412071228027344, 0.0015295743942260742, 0.001817941665649414, 0.002106308937072754, 0.0023946762084960938, 0.0026830434799194336, 0.0029714107513427734, 0.0032597780227661133, 0.003548145294189453, 0.003836512565612793, 0.004124879837036133, 0.004413247108459473, 0.0047016143798828125, 0.004989981651306152, 0.005278348922729492, 0.005566716194152832, 0.005855083465576172, 0.006143450736999512, 0.0064318180084228516, 0.006720185279846191, 0.007008552551269531, 0.007296919822692871, 0.007585287094116211, 0.00787365436553955, 0.00816202163696289, 0.00845038890838623, 0.00873875617980957, 0.00902712345123291, 0.00931549072265625]}, "gradients/encoder.encoder.layers.0.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 3.0, 0.0, 0.0, 3.0, 1.0, 3.0, 6.0, 9.0, 13.0, 6.0, 10.0, 15.0, 18.0, 32.0, 28.0, 43.0, 39.0, 60.0, 48.0, 63.0, 51.0, 69.0, 84.0, 61.0, 41.0, 55.0, 41.0, 42.0, 33.0, 36.0, 25.0, 19.0, 14.0, 7.0, 16.0, 6.0, 8.0, 4.0, 1.0, 3.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00765228271484375, -0.007403135299682617, -0.007153987884521484, -0.0069048404693603516, -0.006655693054199219, -0.006406545639038086, -0.006157398223876953, -0.00590825080871582, -0.0056591033935546875, -0.005409955978393555, -0.005160808563232422, -0.004911661148071289, -0.004662513732910156, -0.0044133663177490234, -0.004164218902587891, -0.003915071487426758, -0.003665924072265625, -0.003416776657104492, -0.0031676292419433594, -0.0029184818267822266, -0.0026693344116210938, -0.002420186996459961, -0.002171039581298828, -0.0019218921661376953, -0.0016727447509765625, -0.0014235973358154297, -0.0011744499206542969, -0.0009253025054931641, -0.0006761550903320312, -0.00042700767517089844, -0.00017786026000976562, 7.128715515136719e-05, 0.0003204345703125, 0.0005695819854736328, 0.0008187294006347656, 0.0010678768157958984, 0.0013170242309570312, 0.001566171646118164, 0.0018153190612792969, 0.0020644664764404297, 0.0023136138916015625, 0.0025627613067626953, 0.002811908721923828, 0.003061056137084961, 0.0033102035522460938, 0.0035593509674072266, 0.0038084983825683594, 0.004057645797729492, 0.004306793212890625, 0.004555940628051758, 0.004805088043212891, 0.0050542354583740234, 0.005303382873535156, 0.005552530288696289, 0.005801677703857422, 0.006050825119018555, 0.0062999725341796875, 0.00654911994934082, 0.006798267364501953, 0.007047414779663086, 0.007296562194824219, 0.0075457096099853516, 0.007794857025146484, 0.008044004440307617, 0.00829315185546875]}, "gradients/encoder.encoder.layers.0.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 2.0, 2.0, 5.0, 5.0, 4.0, 8.0, 7.0, 12.0, 25.0, 28.0, 40.0, 38.0, 63.0, 90.0, 149.0, 227.0, 340.0, 701.0, 1327.0, 3380.0, 11558.0, 70911.0, 898600.0, 47286.0, 8767.0, 2640.0, 1074.0, 512.0, 277.0, 160.0, 119.0, 61.0, 38.0, 34.0, 21.0, 16.0, 9.0, 8.0, 9.0, 3.0, 3.0, 1.0, 1.0, 4.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.01036834716796875, -0.009960293769836426, -0.009552240371704102, -0.009144186973571777, -0.008736133575439453, -0.008328080177307129, -0.007920026779174805, -0.0075119733810424805, -0.007103919982910156, -0.006695866584777832, -0.006287813186645508, -0.005879759788513184, -0.005471706390380859, -0.005063652992248535, -0.004655599594116211, -0.004247546195983887, -0.0038394927978515625, -0.0034314393997192383, -0.003023386001586914, -0.00261533260345459, -0.0022072792053222656, -0.0017992258071899414, -0.0013911724090576172, -0.000983119010925293, -0.0005750656127929688, -0.00016701221466064453, 0.0002410411834716797, 0.0006490945816040039, 0.0010571479797363281, 0.0014652013778686523, 0.0018732547760009766, 0.0022813081741333008, 0.002689361572265625, 0.0030974149703979492, 0.0035054683685302734, 0.003913521766662598, 0.004321575164794922, 0.004729628562927246, 0.00513768196105957, 0.0055457353591918945, 0.005953788757324219, 0.006361842155456543, 0.006769895553588867, 0.007177948951721191, 0.007586002349853516, 0.00799405574798584, 0.008402109146118164, 0.008810162544250488, 0.009218215942382812, 0.009626269340515137, 0.010034322738647461, 0.010442376136779785, 0.01085042953491211, 0.011258482933044434, 0.011666536331176758, 0.012074589729309082, 0.012482643127441406, 0.01289069652557373, 0.013298749923706055, 0.013706803321838379, 0.014114856719970703, 0.014522910118103027, 0.014930963516235352, 0.015339016914367676, 0.0157470703125]}, "gradients/encoder.encoder.layers.0.attention.v_proj.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 2.0, 4.0, 4.0, 3.0, 6.0, 6.0, 12.0, 14.0, 18.0, 22.0, 30.0, 25.0, 34.0, 38.0, 49.0, 85.0, 91.0, 105.0, 96.0, 76.0, 79.0, 44.0, 36.0, 26.0, 26.0, 22.0, 14.0, 13.0, 15.0, 6.0, 2.0, 3.0, 2.0, 3.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.027374267578125, -0.026322364807128906, -0.025270462036132812, -0.02421855926513672, -0.023166656494140625, -0.02211475372314453, -0.021062850952148438, -0.020010948181152344, -0.01895904541015625, -0.017907142639160156, -0.016855239868164062, -0.01580333709716797, -0.014751434326171875, -0.013699531555175781, -0.012647628784179688, -0.011595726013183594, -0.0105438232421875, -0.009491920471191406, -0.008440017700195312, -0.007388114929199219, -0.006336212158203125, -0.005284309387207031, -0.0042324066162109375, -0.0031805038452148438, -0.00212860107421875, -0.0010766983032226562, -2.47955322265625e-05, 0.0010271072387695312, 0.002079010009765625, 0.0031309127807617188, 0.0041828155517578125, 0.005234718322753906, 0.00628662109375, 0.007338523864746094, 0.008390426635742188, 0.009442329406738281, 0.010494232177734375, 0.011546134948730469, 0.012598037719726562, 0.013649940490722656, 0.01470184326171875, 0.015753746032714844, 0.016805648803710938, 0.01785755157470703, 0.018909454345703125, 0.01996135711669922, 0.021013259887695312, 0.022065162658691406, 0.0231170654296875, 0.024168968200683594, 0.025220870971679688, 0.02627277374267578, 0.027324676513671875, 0.02837657928466797, 0.029428482055664062, 0.030480384826660156, 0.03153228759765625, 0.032584190368652344, 0.03363609313964844, 0.03468799591064453, 0.035739898681640625, 0.03679180145263672, 0.03784370422363281, 0.038895606994628906, 0.039947509765625]}, "gradients/encoder.encoder.layers.0.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 2.0, 0.0, 2.0, 2.0, 4.0, 0.0, 8.0, 3.0, 7.0, 2.0, 3.0, 7.0, 9.0, 11.0, 14.0, 20.0, 19.0, 26.0, 34.0, 68.0, 73.0, 116.0, 207.0, 308.0, 632.0, 1370.0, 3756.0, 18401.0, 913850.0, 95960.0, 8909.0, 2542.0, 914.0, 495.0, 258.0, 172.0, 89.0, 70.0, 45.0, 39.0, 20.0, 18.0, 16.0, 16.0, 10.0, 6.0, 4.0, 4.0, 4.0, 4.0, 5.0, 4.0, 6.0, 1.0, 1.0, 2.0, 2.0, 2.0, 0.0, 0.0, 2.0], "bins": [-0.0038471221923828125, -0.0037261545658111572, -0.003605186939239502, -0.0034842193126678467, -0.0033632516860961914, -0.003242284059524536, -0.003121316432952881, -0.0030003488063812256, -0.0028793811798095703, -0.002758413553237915, -0.0026374459266662598, -0.0025164783000946045, -0.0023955106735229492, -0.002274543046951294, -0.0021535754203796387, -0.0020326077938079834, -0.0019116401672363281, -0.0017906725406646729, -0.0016697049140930176, -0.0015487372875213623, -0.001427769660949707, -0.0013068020343780518, -0.0011858344078063965, -0.0010648667812347412, -0.0009438991546630859, -0.0008229315280914307, -0.0007019639015197754, -0.0005809962749481201, -0.00046002864837646484, -0.00033906102180480957, -0.0002180933952331543, -9.712576866149902e-05, 2.384185791015625e-05, 0.00014480948448181152, 0.0002657771110534668, 0.00038674473762512207, 0.0005077123641967773, 0.0006286799907684326, 0.0007496476173400879, 0.0008706152439117432, 0.0009915828704833984, 0.0011125504970550537, 0.001233518123626709, 0.0013544857501983643, 0.0014754533767700195, 0.0015964210033416748, 0.00171738862991333, 0.0018383562564849854, 0.0019593238830566406, 0.002080291509628296, 0.002201259136199951, 0.0023222267627716064, 0.0024431943893432617, 0.002564162015914917, 0.0026851296424865723, 0.0028060972690582275, 0.002927064895629883, 0.003048032522201538, 0.0031690001487731934, 0.0032899677753448486, 0.003410935401916504, 0.003531903028488159, 0.0036528706550598145, 0.0037738382816314697, 0.003894805908203125]}, "gradients/encoder.encoder.layers.0.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 5.0, 3.0, 1.0, 3.0, 4.0, 3.0, 2.0, 3.0, 4.0, 6.0, 2.0, 3.0, 4.0, 2.0, 3.0, 7.0, 12.0, 6.0, 13.0, 8.0, 9.0, 16.0, 24.0, 30.0, 42.0, 34.0, 49.0, 72.0, 83.0, 97.0, 86.0, 82.0, 52.0, 49.0, 37.0, 27.0, 22.0, 19.0, 11.0, 11.0, 12.0, 7.0, 7.0, 5.0, 8.0, 5.0, 5.0, 5.0, 3.0, 2.0, 3.0, 1.0, 1.0, 1.0, 1.0, 4.0, 0.0, 1.0, 1.0, 2.0, 2.0], "bins": [-1.1742115020751953e-05, -1.1382624506950378e-05, -1.1023133993148804e-05, -1.0663643479347229e-05, -1.0304152965545654e-05, -9.94466245174408e-06, -9.585171937942505e-06, -9.22568142414093e-06, -8.866190910339355e-06, -8.50670039653778e-06, -8.147209882736206e-06, -7.787719368934631e-06, -7.428228855133057e-06, -7.068738341331482e-06, -6.709247827529907e-06, -6.3497573137283325e-06, -5.990266799926758e-06, -5.630776286125183e-06, -5.271285772323608e-06, -4.911795258522034e-06, -4.552304744720459e-06, -4.192814230918884e-06, -3.8333237171173096e-06, -3.473833203315735e-06, -3.11434268951416e-06, -2.7548521757125854e-06, -2.3953616619110107e-06, -2.035871148109436e-06, -1.6763806343078613e-06, -1.3168901205062866e-06, -9.57399606704712e-07, -5.979090929031372e-07, -2.384185791015625e-07, 1.210719347000122e-07, 4.805624485015869e-07, 8.400529623031616e-07, 1.1995434761047363e-06, 1.559033989906311e-06, 1.9185245037078857e-06, 2.2780150175094604e-06, 2.637505531311035e-06, 2.99699604511261e-06, 3.3564865589141846e-06, 3.7159770727157593e-06, 4.075467586517334e-06, 4.434958100318909e-06, 4.794448614120483e-06, 5.153939127922058e-06, 5.513429641723633e-06, 5.8729201555252075e-06, 6.232410669326782e-06, 6.591901183128357e-06, 6.951391696929932e-06, 7.310882210731506e-06, 7.670372724533081e-06, 8.029863238334656e-06, 8.38935375213623e-06, 8.748844265937805e-06, 9.10833477973938e-06, 9.467825293540955e-06, 9.82731580734253e-06, 1.0186806321144104e-05, 1.0546296834945679e-05, 1.0905787348747253e-05, 1.1265277862548828e-05]}, "gradients/encoder.encoder.layers.0.attention.q_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 6.0, 3.0, 3.0, 5.0, 11.0, 14.0, 11.0, 27.0, 24.0, 27.0, 49.0, 65.0, 112.0, 174.0, 269.0, 509.0, 916.0, 1764.0, 4011.0, 10164.0, 34974.0, 264190.0, 672020.0, 39512.0, 11240.0, 4305.0, 1855.0, 919.0, 527.0, 288.0, 179.0, 112.0, 84.0, 46.0, 34.0, 29.0, 23.0, 7.0, 17.0, 10.0, 10.0, 6.0, 3.0, 2.0, 4.0, 3.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0], "bins": [-0.0011920928955078125, -0.0011570751667022705, -0.0011220574378967285, -0.0010870397090911865, -0.0010520219802856445, -0.0010170042514801025, -0.0009819865226745605, -0.0009469687938690186, -0.0009119510650634766, -0.0008769333362579346, -0.0008419156074523926, -0.0008068978786468506, -0.0007718801498413086, -0.0007368624210357666, -0.0007018446922302246, -0.0006668269634246826, -0.0006318092346191406, -0.0005967915058135986, -0.0005617737770080566, -0.0005267560482025146, -0.0004917383193969727, -0.00045672059059143066, -0.00042170286178588867, -0.0003866851329803467, -0.0003516674041748047, -0.0003166496753692627, -0.0002816319465637207, -0.0002466142177581787, -0.00021159648895263672, -0.00017657876014709473, -0.00014156103134155273, -0.00010654330253601074, -7.152557373046875e-05, -3.650784492492676e-05, -1.4901161193847656e-06, 3.3527612686157227e-05, 6.854534149169922e-05, 0.00010356307029724121, 0.0001385807991027832, 0.0001735985279083252, 0.0002086162567138672, 0.00024363398551940918, 0.00027865171432495117, 0.00031366944313049316, 0.00034868717193603516, 0.00038370490074157715, 0.00041872262954711914, 0.00045374035835266113, 0.0004887580871582031, 0.0005237758159637451, 0.0005587935447692871, 0.0005938112735748291, 0.0006288290023803711, 0.0006638467311859131, 0.0006988644599914551, 0.0007338821887969971, 0.0007688999176025391, 0.0008039176464080811, 0.000838935375213623, 0.000873953104019165, 0.000908970832824707, 0.000943988561630249, 0.000979006290435791, 0.001014024019241333, 0.001049041748046875]}, "gradients/encoder.encoder.layers.0.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 1.0, 2.0, 3.0, 1.0, 0.0, 2.0, 0.0, 1.0, 4.0, 7.0, 7.0, 11.0, 15.0, 29.0, 39.0, 48.0, 66.0, 108.0, 180.0, 192.0, 97.0, 54.0, 39.0, 29.0, 22.0, 12.0, 10.0, 11.0, 3.0, 5.0, 3.0, 0.0, 2.0, 4.0, 1.0, 1.0, 1.0, 1.0, 3.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.004425048828125, -0.004294693470001221, -0.004164338111877441, -0.004033982753753662, -0.003903627395629883, -0.0037732720375061035, -0.0036429166793823242, -0.003512561321258545, -0.0033822059631347656, -0.0032518506050109863, -0.003121495246887207, -0.0029911398887634277, -0.0028607845306396484, -0.002730429172515869, -0.00260007381439209, -0.0024697184562683105, -0.0023393630981445312, -0.002209007740020752, -0.0020786523818969727, -0.0019482970237731934, -0.001817941665649414, -0.0016875863075256348, -0.0015572309494018555, -0.0014268755912780762, -0.0012965202331542969, -0.0011661648750305176, -0.0010358095169067383, -0.000905454158782959, -0.0007750988006591797, -0.0006447434425354004, -0.0005143880844116211, -0.0003840327262878418, -0.0002536773681640625, -0.0001233220100402832, 7.033348083496094e-06, 0.0001373887062072754, 0.0002677440643310547, 0.000398099422454834, 0.0005284547805786133, 0.0006588101387023926, 0.0007891654968261719, 0.0009195208549499512, 0.0010498762130737305, 0.0011802315711975098, 0.001310586929321289, 0.0014409422874450684, 0.0015712976455688477, 0.001701653003692627, 0.0018320083618164062, 0.0019623637199401855, 0.002092719078063965, 0.002223074436187744, 0.0023534297943115234, 0.0024837851524353027, 0.002614140510559082, 0.0027444958686828613, 0.0028748512268066406, 0.00300520658493042, 0.0031355619430541992, 0.0032659173011779785, 0.003396272659301758, 0.003526628017425537, 0.0036569833755493164, 0.0037873387336730957, 0.003917694091796875]}, "gradients/encoder.encoder.layers.0.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 4.0, 1.0, 0.0, 5.0, 1.0, 5.0, 6.0, 19.0, 33.0, 81.0, 489.0, 248.0, 63.0, 26.0, 15.0, 5.0, 5.0, 2.0, 3.0, 2.0, 3.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.10401590168476105, -0.09946416318416595, -0.09491242468357086, -0.09036068618297577, -0.08580894768238068, -0.08125720918178558, -0.0767054632306099, -0.0721537247300148, -0.06760198622941971, -0.06305024772882462, -0.05849850922822952, -0.05394676700234413, -0.04939502850174904, -0.044843290001153946, -0.040291547775268555, -0.03573980927467346, -0.03118807077407837, -0.026636332273483276, -0.022084591910243034, -0.017532851547002792, -0.0129811130464077, -0.008429374545812607, -0.003877634182572365, 0.0006741061806678772, 0.00522584468126297, 0.009777584113180637, 0.014329323545098305, 0.018881063908338547, 0.02343280240893364, 0.027984540909528732, 0.032536283135414124, 0.037088021636009216, 0.0416397750377655, 0.046191513538360596, 0.05074325203895569, 0.05529499426484108, 0.05984673276543617, 0.06439846754074097, 0.06895021349191666, 0.07350195199251175, 0.07805369049310684, 0.08260542899370193, 0.08715716749429703, 0.09170890599489212, 0.09626065194606781, 0.1008123904466629, 0.105364128947258, 0.10991586744785309, 0.11446760594844818, 0.11901934444904327, 0.12357108294963837, 0.12812282145023346, 0.13267455995082855, 0.13722629845142365, 0.14177803695201874, 0.14632979035377502, 0.15088152885437012, 0.1554332673549652, 0.1599850058555603, 0.1645367443561554, 0.1690884828567505, 0.17364022135734558, 0.17819195985794067, 0.18274369835853577, 0.18729543685913086]}, "gradients/encoder.encoder.layers.0.layer_norm.bias": {"_type": "histogram", "values": [2.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 3.0, 6.0, 3.0, 3.0, 16.0, 10.0, 10.0, 11.0, 18.0, 19.0, 23.0, 24.0, 18.0, 33.0, 39.0, 90.0, 187.0, 191.0, 78.0, 34.0, 23.0, 31.0, 30.0, 15.0, 18.0, 10.0, 10.0, 14.0, 19.0, 6.0, 5.0, 3.0, 2.0, 1.0, 1.0, 1.0, 1.0, 3.0, 1.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.07081735134124756, -0.06819088757038116, -0.06556441634893417, -0.06293795257806778, -0.06031148508191109, -0.057685017585754395, -0.055058553814888, -0.05243208631873131, -0.049805618822574615, -0.04717915132641792, -0.04455268383026123, -0.041926220059394836, -0.039299752563238144, -0.03667328506708145, -0.03404682129621506, -0.031420353800058365, -0.028793886303901672, -0.02616741880774498, -0.023540953174233437, -0.020914487540721893, -0.0182880200445652, -0.01566155254840851, -0.013035086914896965, -0.010408621281385422, -0.007782153785228729, -0.005155687220394611, -0.0025292206555604935, 9.724590927362442e-05, 0.0027237124741077423, 0.00535017903894186, 0.007976645603775978, 0.010603111237287521, 0.013229578733444214, 0.015856046229600906, 0.01848251186311245, 0.021108977496623993, 0.023735444992780685, 0.026361912488937378, 0.02898837812244892, 0.031614843755960464, 0.03424131125211716, 0.03686777874827385, 0.03949424624443054, 0.042120710015296936, 0.04474717751145363, 0.04737364500761032, 0.050000108778476715, 0.05262657627463341, 0.0552530437707901, 0.05787951126694679, 0.060505978763103485, 0.06313244253396988, 0.06575891375541687, 0.06838537752628326, 0.07101184129714966, 0.07363830506801605, 0.07626477628946304, 0.07889124006032944, 0.08151771128177643, 0.08414417505264282, 0.08677063882350922, 0.08939711004495621, 0.0920235738158226, 0.09465004503726959, 0.09727650880813599]}, "gradients/encoder.encoder.pos_conv_embed.conv.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 1.0, 3.0, 2.0, 1.0, 5.0, 1.0, 4.0, 3.0, 10.0, 9.0, 12.0, 9.0, 17.0, 16.0, 19.0, 27.0, 23.0, 28.0, 32.0, 46.0, 74.0, 153.0, 204.0, 77.0, 44.0, 28.0, 31.0, 28.0, 18.0, 14.0, 11.0, 7.0, 7.0, 9.0, 7.0, 7.0, 3.0, 6.0, 7.0, 5.0, 0.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0, 2.0], "bins": [-0.005344390869140625, -0.005177795886993408, -0.005011200904846191, -0.004844605922698975, -0.004678010940551758, -0.004511415958404541, -0.004344820976257324, -0.004178225994110107, -0.004011631011962891, -0.003845036029815674, -0.003678441047668457, -0.0035118460655212402, -0.0033452510833740234, -0.0031786561012268066, -0.00301206111907959, -0.002845466136932373, -0.0026788711547851562, -0.0025122761726379395, -0.0023456811904907227, -0.002179086208343506, -0.002012491226196289, -0.0018458962440490723, -0.0016793012619018555, -0.0015127062797546387, -0.0013461112976074219, -0.001179516315460205, -0.0010129213333129883, -0.0008463263511657715, -0.0006797313690185547, -0.0005131363868713379, -0.0003465414047241211, -0.0001799464225769043, -1.33514404296875e-05, 0.0001532435417175293, 0.0003198385238647461, 0.0004864335060119629, 0.0006530284881591797, 0.0008196234703063965, 0.0009862184524536133, 0.00115281343460083, 0.0013194084167480469, 0.0014860033988952637, 0.0016525983810424805, 0.0018191933631896973, 0.001985788345336914, 0.002152383327484131, 0.0023189783096313477, 0.0024855732917785645, 0.0026521682739257812, 0.002818763256072998, 0.002985358238220215, 0.0031519532203674316, 0.0033185482025146484, 0.0034851431846618652, 0.003651738166809082, 0.003818333148956299, 0.003984928131103516, 0.004151523113250732, 0.004318118095397949, 0.004484713077545166, 0.004651308059692383, 0.0048179030418396, 0.004984498023986816, 0.005151093006134033, 0.00531768798828125]}, "gradients/encoder.encoder.pos_conv_embed.conv.weight_v": {"_type": "histogram", "values": [2.0, 2.0, 4.0, 4.0, 1.0, 4.0, 1.0, 0.0, 3.0, 3.0, 2.0, 4.0, 6.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 4.0, 3.0, 0.0, 8.0, 19.0, 21.0, 20.0, 33.0, 42.0, 40.0, 91.0, 193.0, 544.0, 2096.0, 16875.0, 8357129.0, 9252.0, 1361.0, 379.0, 179.0, 56.0, 78.0, 39.0, 27.0, 13.0, 11.0, 5.0, 3.0, 1.0, 6.0, 4.0, 2.0, 2.0, 10.0, 5.0, 3.0, 1.0, 4.0, 5.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0], "bins": [-0.04955712705850601, -0.04811601713299751, -0.046674903482198715, -0.045233793556690216, -0.04379267990589142, -0.04235156998038292, -0.04091046005487442, -0.03946934640407562, -0.03802823647856712, -0.036587126553058624, -0.03514601290225983, -0.03370490297675133, -0.03226379305124283, -0.03082267940044403, -0.02938156947493553, -0.027940457686781883, -0.026499345898628235, -0.025058234110474586, -0.023617122322320938, -0.02217601239681244, -0.02073490060865879, -0.019293788820505142, -0.017852678894996643, -0.016411567106842995, -0.014970455318689346, -0.013529343530535698, -0.012088232673704624, -0.01064712181687355, -0.009206010028719902, -0.007764898706227541, -0.00632378738373518, -0.004882676526904106, -0.0034415684640407562, -0.002000457141548395, -0.0005593458190560341, 0.000881765503436327, 0.002322876825928688, 0.003763988148421049, 0.00520509947091341, 0.006646210327744484, 0.008087322115898132, 0.00952843390405178, 0.010969544760882854, 0.012410655617713928, 0.013851767405867577, 0.015292879194021225, 0.016733989119529724, 0.018175100907683372, 0.01961621269583702, 0.02105732448399067, 0.022498436272144318, 0.023939546197652817, 0.025380657985806465, 0.026821769773960114, 0.028262879699468613, 0.02970399148762226, 0.03114510327577591, 0.03258621320128441, 0.034027326852083206, 0.035468436777591705, 0.0369095504283905, 0.038350660353899, 0.0397917702794075, 0.0412328839302063, 0.0426739938557148]}, "gradients/encoder.encoder.pos_conv_embed.conv.weight_g": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 6.0, 2.0, 1.0, 3.0, 6.0, 4.0, 0.0, 2.0, 6.0, 1.0, 2.0, 4.0, 5.0, 3.0, 4.0, 4.0, 3.0, 7.0, 4.0, 3.0, 3.0, 8.0, 7.0, 6.0, 1.0, 5.0, 1.0, 1.0, 2.0, 3.0, 2.0, 0.0, 0.0, 2.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.013770464807748795, -0.013081957586109638, -0.012393450364470482, -0.0117049440741539, -0.011016436852514744, -0.010327929630875587, -0.009639423340559006, -0.00895091611891985, -0.008262408897280693, -0.007573901675641537, -0.006885394919663668, -0.006196888163685799, -0.005508380942046642, -0.004819873720407486, -0.004131366964429617, -0.003442860208451748, -0.0027543529868125916, -0.002065845998004079, -0.0013773390091955662, -0.0006888320203870535, -3.25031578540802e-07, 0.0006881819572299719, 0.0013766889460384846, 0.0020651957020163536, 0.00275370292365551, 0.0034422099124640226, 0.004130716901272535, 0.004819223657250404, 0.005507730878889561, 0.006196238100528717, 0.006884744856506586, 0.007573251612484455, 0.008261756971478462, 0.008950264193117619, 0.009638771414756775, 0.010327277705073357, 0.011015784926712513, 0.01170429214835167, 0.012392798438668251, 0.013081305660307407, 0.013769812881946564, 0.01445832010358572, 0.015146827325224876, 0.015835333615541458, 0.01652384176850319, 0.01721234805881977, 0.017900854349136353, 0.018589362502098083, 0.019277868792414665, 0.019966375082731247, 0.020654883235692978, 0.02134338952600956, 0.02203189581632614, 0.022720403969287872, 0.023408910259604454, 0.024097416549921036, 0.024785924702882767, 0.02547443099319935, 0.02616293914616108, 0.02685144543647766, 0.027539953589439392, 0.028228459879755974, 0.028916966170072556, 0.029605474323034286, 0.030293980613350868]}, "gradients/encoder.feature_projection.projection.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 3.0, 0.0, 2.0, 4.0, 7.0, 9.0, 9.0, 7.0, 22.0, 25.0, 39.0, 48.0, 62.0, 135.0, 190.0, 345.0, 718.0, 1259.0, 2547.0, 5973.0, 17263.0, 64874.0, 224668.0, 148850.0, 37303.0, 11351.0, 4367.0, 1953.0, 947.0, 541.0, 281.0, 157.0, 102.0, 70.0, 54.0, 33.0, 18.0, 11.0, 11.0, 4.0, 8.0, 4.0, 2.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-0.03369140625, -0.03275012969970703, -0.03180885314941406, -0.030867576599121094, -0.029926300048828125, -0.028985023498535156, -0.028043746948242188, -0.02710247039794922, -0.02616119384765625, -0.02521991729736328, -0.024278640747070312, -0.023337364196777344, -0.022396087646484375, -0.021454811096191406, -0.020513534545898438, -0.01957225799560547, -0.0186309814453125, -0.01768970489501953, -0.016748428344726562, -0.015807151794433594, -0.014865875244140625, -0.013924598693847656, -0.012983322143554688, -0.012042045593261719, -0.01110076904296875, -0.010159492492675781, -0.009218215942382812, -0.008276939392089844, -0.007335662841796875, -0.006394386291503906, -0.0054531097412109375, -0.004511833190917969, -0.003570556640625, -0.0026292800903320312, -0.0016880035400390625, -0.0007467269897460938, 0.000194549560546875, 0.0011358261108398438, 0.0020771026611328125, 0.0030183792114257812, 0.00395965576171875, 0.004900932312011719, 0.0058422088623046875, 0.006783485412597656, 0.007724761962890625, 0.008666038513183594, 0.009607315063476562, 0.010548591613769531, 0.0114898681640625, 0.012431144714355469, 0.013372421264648438, 0.014313697814941406, 0.015254974365234375, 0.016196250915527344, 0.017137527465820312, 0.01807880401611328, 0.01902008056640625, 0.01996135711669922, 0.020902633666992188, 0.021843910217285156, 0.022785186767578125, 0.023726463317871094, 0.024667739868164062, 0.02560901641845703, 0.02655029296875]}, "gradients/encoder.feature_projection.projection.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 2.0, 3.0, 2.0, 5.0, 1.0, 1.0, 6.0, 15.0, 7.0, 13.0, 14.0, 14.0, 15.0, 24.0, 29.0, 32.0, 52.0, 41.0, 61.0, 51.0, 71.0, 64.0, 49.0, 72.0, 43.0, 47.0, 31.0, 46.0, 37.0, 31.0, 27.0, 26.0, 6.0, 11.0, 18.0, 5.0, 8.0, 5.0, 7.0, 7.0, 4.0, 4.0, 3.0, 1.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.0062255859375, -0.006035566329956055, -0.005845546722412109, -0.005655527114868164, -0.005465507507324219, -0.0052754878997802734, -0.005085468292236328, -0.004895448684692383, -0.0047054290771484375, -0.004515409469604492, -0.004325389862060547, -0.0041353702545166016, -0.003945350646972656, -0.003755331039428711, -0.0035653114318847656, -0.0033752918243408203, -0.003185272216796875, -0.0029952526092529297, -0.0028052330017089844, -0.002615213394165039, -0.0024251937866210938, -0.0022351741790771484, -0.002045154571533203, -0.0018551349639892578, -0.0016651153564453125, -0.0014750957489013672, -0.0012850761413574219, -0.0010950565338134766, -0.0009050369262695312, -0.0007150173187255859, -0.0005249977111816406, -0.0003349781036376953, -0.00014495849609375, 4.506111145019531e-05, 0.00023508071899414062, 0.00042510032653808594, 0.0006151199340820312, 0.0008051395416259766, 0.0009951591491699219, 0.0011851787567138672, 0.0013751983642578125, 0.0015652179718017578, 0.0017552375793457031, 0.0019452571868896484, 0.0021352767944335938, 0.002325296401977539, 0.0025153160095214844, 0.0027053356170654297, 0.002895355224609375, 0.0030853748321533203, 0.0032753944396972656, 0.003465414047241211, 0.0036554336547851562, 0.0038454532623291016, 0.004035472869873047, 0.004225492477416992, 0.0044155120849609375, 0.004605531692504883, 0.004795551300048828, 0.0049855709075927734, 0.005175590515136719, 0.005365610122680664, 0.005555629730224609, 0.005745649337768555, 0.0059356689453125]}, "gradients/encoder.feature_projection.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 3.0, 4.0, 5.0, 9.0, 7.0, 12.0, 19.0, 69.0, 191.0, 121.0, 27.0, 13.0, 5.0, 6.0, 4.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.09296739846467972, -0.0908062532544136, -0.0886451005935669, -0.08648395538330078, -0.08432281017303467, -0.08216165751218796, -0.08000051230192184, -0.07783936709165573, -0.07567821443080902, -0.07351706922054291, -0.0713559165596962, -0.06919477134943008, -0.06703362613916397, -0.06487247347831726, -0.06271132826805115, -0.060550179332494736, -0.05838903412222862, -0.05622788518667221, -0.0540667399764061, -0.051905591040849686, -0.049744442105293274, -0.04758329689502716, -0.04542214795947075, -0.04326099902391434, -0.041099853813648224, -0.03893870487809181, -0.0367775596678257, -0.03461641073226929, -0.032455261796712875, -0.030294114723801613, -0.02813296765089035, -0.02597181871533394, -0.023810673505067825, -0.021649526432156563, -0.01948837749660015, -0.01732723042368889, -0.015166082419455051, -0.013004934415221214, -0.010843787342309952, -0.008682639338076115, -0.0065214913338422775, -0.00436034332960844, -0.0021991957910358906, -3.804825246334076e-05, 0.0021230997517704964, 0.0042842477560043335, 0.006445394828915596, 0.008606542833149433, 0.01076769083738327, 0.012928838841617107, 0.015089986845850945, 0.017251133918762207, 0.01941228285431862, 0.02157342992722988, 0.023734577000141144, 0.025895725935697556, 0.028056873008608818, 0.03021802008152008, 0.03237916901707649, 0.034540317952632904, 0.03670146316289902, 0.03886261209845543, 0.04102376103401184, 0.043184906244277954, 0.045346055179834366]}, "gradients/encoder.feature_projection.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 3.0, 1.0, 3.0, 0.0, 3.0, 6.0, 2.0, 6.0, 6.0, 6.0, 16.0, 40.0, 103.0, 152.0, 73.0, 27.0, 11.0, 9.0, 5.0, 8.0, 4.0, 2.0, 3.0, 2.0, 3.0, 4.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.040926337242126465, -0.03939737379550934, -0.03786841034889221, -0.036339450627565384, -0.03481048718094826, -0.03328152373433113, -0.0317525640130043, -0.030223600566387177, -0.02869463711977005, -0.027165673673152924, -0.025636712089180946, -0.02410775050520897, -0.022578787058591843, -0.021049823611974716, -0.01952086202800274, -0.01799190044403076, -0.016462936997413635, -0.014933974482119083, -0.013405011966824532, -0.01187604945152998, -0.010347086936235428, -0.008818124420940876, -0.007289161905646324, -0.005760199390351772, -0.0042312368750572205, -0.0027022743597626686, -0.0011733118444681168, 0.0003556506708264351, 0.001884613186120987, 0.003413575701415539, 0.004942538216710091, 0.0064715007320046425, 0.008000463247299194, 0.009529425762593746, 0.011058388277888298, 0.01258735079318285, 0.014116313308477402, 0.015645276755094528, 0.017174238339066505, 0.018703199923038483, 0.02023216336965561, 0.021761126816272736, 0.023290088400244713, 0.02481904998421669, 0.026348013430833817, 0.027876976877450943, 0.02940593846142292, 0.030934900045394897, 0.032463863492012024, 0.03399282693862915, 0.03552179038524628, 0.037050750106573105, 0.03857971355319023, 0.04010867699980736, 0.041637636721134186, 0.04316660016775131, 0.04469556361436844, 0.046224527060985565, 0.04775349050760269, 0.04928245022892952, 0.050811413675546646, 0.05234037712216377, 0.0538693368434906, 0.05539830029010773, 0.056927263736724854]}, "_wandb": {"runtime": 2401}} \ No newline at end of file diff --git a/wandb/run-20220302_214437-2u4nhnsf/logs/debug-internal.log b/wandb/run-20220302_214437-2u4nhnsf/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..777e6b33fcf03e967e54de620cb07f5e6df7903a --- /dev/null +++ b/wandb/run-20220302_214437-2u4nhnsf/logs/debug-internal.log @@ -0,0 +1,6411 @@ +2022-03-02 21:44:38,516 INFO MainThread:265689 [internal.py:wandb_internal():89] W&B internal server running at pid: 265689, started at: 2022-03-02 21:44:38.516518 +2022-03-02 21:44:38,519 INFO WriterThread:265689 [datastore.py:open_for_write():77] open: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/run-2u4nhnsf.wandb +2022-03-02 21:44:38,519 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: check_version +2022-03-02 21:44:38,521 DEBUG SenderThread:265689 [sender.py:send():235] send: header +2022-03-02 21:44:38,521 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: check_version +2022-03-02 21:44:38,596 DEBUG SenderThread:265689 [sender.py:send():235] send: run +2022-03-02 21:44:38,704 INFO SenderThread:265689 [dir_watcher.py:__init__():169] watching files in: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files +2022-03-02 21:44:38,704 INFO SenderThread:265689 [sender.py:_start_run_threads():809] run started: 2u4nhnsf with start time 1646257477 +2022-03-02 21:44:38,704 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:44:38,704 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:44:38,705 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: run_start +2022-03-02 21:44:38,711 DEBUG HandlerThread:265689 [meta.py:__init__():36] meta init +2022-03-02 21:44:38,711 DEBUG HandlerThread:265689 [meta.py:__init__():50] meta init done +2022-03-02 21:44:38,711 DEBUG HandlerThread:265689 [meta.py:probe():210] probe +2022-03-02 21:44:38,718 DEBUG HandlerThread:265689 [meta.py:_setup_git():200] setup git +2022-03-02 21:44:38,732 DEBUG HandlerThread:265689 [meta.py:_setup_git():207] setup git done +2022-03-02 21:44:38,732 DEBUG HandlerThread:265689 [meta.py:_save_pip():54] save pip +2022-03-02 21:44:38,733 DEBUG HandlerThread:265689 [meta.py:_save_pip():68] save pip done +2022-03-02 21:44:38,733 DEBUG HandlerThread:265689 [meta.py:probe():248] probe done +2022-03-02 21:44:38,840 DEBUG SenderThread:265689 [sender.py:send():235] send: files +2022-03-02 21:44:38,840 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-metadata.json with policy now +2022-03-02 21:44:38,844 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 21:44:38,845 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 21:44:38,906 DEBUG SenderThread:265689 [sender.py:send():235] send: config +2022-03-02 21:44:38,907 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:44:38,907 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:44:38,907 WARNING SenderThread:265689 [sender.py:send_metric():902] Seen metric with glob (shouldnt happen) +2022-03-02 21:44:39,110 INFO Thread-11 :265689 [upload_job.py:push():137] Uploaded file /tmp/tmpgshvafp1wandb/3ha445jj-wandb-metadata.json +2022-03-02 21:44:39,706 INFO Thread-8 :265689 [dir_watcher.py:_on_file_created():217] file/dir created: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:44:39,706 INFO Thread-8 :265689 [dir_watcher.py:_on_file_created():217] file/dir created: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-metadata.json +2022-03-02 21:44:39,707 INFO Thread-8 :265689 [dir_watcher.py:_on_file_created():217] file/dir created: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/requirements.txt +2022-03-02 21:44:39,707 INFO Thread-8 :265689 [dir_watcher.py:_on_file_created():217] file/dir created: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:44:41,704 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:44:45,706 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:44:46,177 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:44:46,177 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:44:46,177 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:44:46,177 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:44:46,177 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:44:46,178 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:44:46,706 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:44:47,707 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:44:51,708 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:44:52,228 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:44:52,229 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:44:52,229 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:44:52,709 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:44:53,709 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:44:54,088 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 21:44:54,088 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 21:44:57,710 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:44:58,435 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:44:58,435 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:44:58,437 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:44:58,711 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:44:59,711 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:45:03,712 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:45:04,381 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:45:04,381 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:45:04,382 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:45:04,713 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:45:05,713 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:45:07,129 DEBUG SenderThread:265689 [sender.py:send():235] send: stats +2022-03-02 21:45:09,136 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 21:45:09,283 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 21:45:09,714 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/config.yaml +2022-03-02 21:45:09,715 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:45:10,211 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:45:10,211 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:45:10,212 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:45:10,715 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:45:11,715 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:45:13,716 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:45:16,121 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:45:16,121 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:45:16,122 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:45:16,717 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:45:17,717 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:45:20,718 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:45:21,963 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:45:21,964 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:45:21,965 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:45:22,719 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:45:23,719 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:45:24,323 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 21:45:24,324 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 21:45:24,720 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:45:26,720 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:45:27,737 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:45:27,738 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:45:27,738 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:45:28,721 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:45:28,721 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:45:29,722 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:45:32,723 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:45:33,421 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:45:33,422 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:45:33,422 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:45:33,723 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:45:34,724 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:45:35,724 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:45:37,551 DEBUG SenderThread:265689 [sender.py:send():235] send: stats +2022-03-02 21:45:38,725 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:45:39,193 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:45:39,194 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:45:39,194 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:45:39,532 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 21:45:39,532 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 21:45:39,726 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:45:40,726 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:45:41,727 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:45:42,727 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:45:44,906 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:45:44,907 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:45:44,908 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:45:45,728 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:45:46,728 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:45:48,729 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:45:50,620 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:45:50,621 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:45:50,621 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:45:50,730 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:45:52,730 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:45:54,683 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 21:45:54,684 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 21:45:54,731 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:45:56,326 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:45:56,326 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:45:56,328 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:45:56,732 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:45:57,732 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:45:58,733 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:45:59,733 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:46:01,734 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:46:01,961 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:46:01,962 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:46:01,962 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:46:02,734 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:46:03,735 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:46:04,735 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:46:07,559 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:46:07,559 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:46:07,560 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:46:07,736 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:46:07,736 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:46:07,973 DEBUG SenderThread:265689 [sender.py:send():235] send: stats +2022-03-02 21:46:08,737 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:46:09,737 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:46:09,757 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 21:46:09,757 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 21:46:11,738 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:46:13,096 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:46:13,096 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:46:13,097 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:46:13,739 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:46:14,739 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:46:15,739 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:46:17,740 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:46:18,523 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:46:18,524 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:46:18,524 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:46:18,740 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:46:19,741 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:46:20,741 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:46:21,742 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:46:23,899 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:46:23,899 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:46:23,900 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:46:24,743 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:46:24,743 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:46:25,038 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 21:46:25,038 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 21:46:25,743 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:46:27,744 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:46:29,263 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:46:29,264 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:46:29,264 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:46:29,745 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:46:29,745 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:46:30,745 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:46:34,654 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:46:34,654 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:46:34,656 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:46:34,746 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:46:34,746 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:46:36,747 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:46:38,340 DEBUG SenderThread:265689 [sender.py:send():235] send: stats +2022-03-02 21:46:38,748 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:46:39,912 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:46:39,913 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:46:39,914 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:46:40,236 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 21:46:40,236 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 21:46:40,748 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:46:40,749 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:46:44,750 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:46:45,245 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:46:45,246 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:46:45,246 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:46:45,750 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:46:46,751 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:46:47,751 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:46:48,751 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:46:50,495 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:46:50,496 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:46:50,496 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:46:50,752 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:46:50,752 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:46:51,752 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:46:52,753 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:46:54,753 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:46:55,557 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 21:46:55,557 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 21:46:55,654 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:46:55,655 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:46:55,655 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:46:55,754 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:46:57,754 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:46:59,755 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:47:00,785 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:47:00,786 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:47:00,786 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:47:01,756 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:47:01,756 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:47:03,756 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:47:05,757 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:47:05,934 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:47:05,935 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:47:05,935 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:47:06,757 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:47:07,758 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:47:08,733 DEBUG SenderThread:265689 [sender.py:send():235] send: stats +2022-03-02 21:47:09,758 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:47:10,593 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 21:47:10,593 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 21:47:11,014 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:47:11,014 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:47:11,015 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:47:11,759 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:47:13,760 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:47:15,760 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:47:16,002 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:47:16,002 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:47:16,003 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:47:16,761 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:47:17,761 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:47:19,762 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:47:21,063 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:47:21,064 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:47:21,096 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:47:21,763 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:47:21,763 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:47:25,656 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 21:47:25,657 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 21:47:25,764 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:47:26,139 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:47:26,139 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:47:26,201 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:47:26,764 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:47:27,765 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:47:28,765 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:47:29,766 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:47:31,198 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:47:31,198 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:47:31,199 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:47:31,766 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:47:31,767 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:47:32,767 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:47:35,768 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:47:36,335 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:47:36,335 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:47:36,336 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:47:36,768 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:47:37,768 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:47:38,769 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:47:39,134 DEBUG SenderThread:265689 [sender.py:send():235] send: stats +2022-03-02 21:47:40,697 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 21:47:40,698 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 21:47:40,770 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:47:41,375 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:47:41,376 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:47:41,376 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:47:41,770 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:47:42,770 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:47:46,295 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:47:46,295 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:47:46,295 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:47:46,772 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:47:46,772 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:47:48,773 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:47:50,773 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:47:51,118 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:47:51,118 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:47:51,119 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:47:51,774 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:47:52,774 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:47:54,775 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:47:55,755 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 21:47:55,755 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 21:47:55,792 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:47:55,792 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:47:55,793 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:47:56,776 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:47:56,776 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:47:58,776 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:48:00,370 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:48:00,370 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:48:00,373 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:48:00,777 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:48:00,777 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:48:04,778 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:48:04,778 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:48:04,779 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:48:04,779 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:48:04,780 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:48:06,780 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:48:07,780 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:48:08,780 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:48:09,067 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:48:09,068 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:48:09,068 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:48:09,523 DEBUG SenderThread:265689 [sender.py:send():235] send: stats +2022-03-02 21:48:09,781 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:48:10,781 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:48:10,930 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 21:48:10,930 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 21:48:11,782 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:48:12,782 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:48:13,245 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:48:13,246 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:48:13,247 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:48:13,782 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:48:14,783 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:48:15,783 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:48:16,783 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:48:17,155 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:48:17,155 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:48:17,156 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:48:17,784 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:48:18,784 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:48:19,784 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:48:20,785 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:48:20,815 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:48:20,816 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:48:20,816 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:48:21,785 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:48:21,785 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:48:23,786 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:48:24,194 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:48:24,194 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:48:24,195 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:48:24,786 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:48:25,786 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:48:26,058 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 21:48:26,058 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 21:48:27,287 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:48:27,288 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:48:27,323 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:48:27,787 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:48:27,787 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:48:29,788 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:48:29,982 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:48:29,982 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:48:29,983 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:48:30,788 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:48:31,789 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:48:32,374 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:48:32,375 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:48:32,375 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:48:32,789 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:48:33,789 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:48:34,549 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:48:34,550 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:48:34,550 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:48:34,790 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:48:35,790 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:48:36,461 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:48:36,461 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:48:36,462 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:48:36,790 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:48:37,791 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:48:38,052 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:48:38,052 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:48:38,053 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:48:38,791 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:48:39,792 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:48:39,909 DEBUG SenderThread:265689 [sender.py:send():235] send: stats +2022-03-02 21:48:40,164 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,165 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,165 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,165 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,165 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,165 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,170 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,170 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,176 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,181 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,186 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,191 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,192 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,197 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,202 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,208 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,213 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,213 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,213 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,213 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,213 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,213 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,213 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,223 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,223 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,223 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,223 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,223 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,223 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,224 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,224 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,224 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,224 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,224 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,224 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,224 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,227 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,227 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,227 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,227 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,227 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,228 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,228 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,228 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,228 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,228 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,228 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,228 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,228 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,228 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,228 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,228 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,239 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,239 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,239 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,239 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,239 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,239 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,239 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,239 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,240 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,240 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,240 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,240 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,240 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,240 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,240 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,240 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,245 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,246 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,246 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,246 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,246 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,251 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,251 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,262 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,262 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,262 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,262 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,262 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,262 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,262 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,262 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,268 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,268 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,273 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,279 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,279 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,279 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,279 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,279 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,279 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,279 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,279 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,279 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,279 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,279 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,279 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,285 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,295 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,295 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,295 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,295 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,296 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,296 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,296 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,296 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,296 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,296 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,296 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,301 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,301 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,301 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,302 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,302 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,302 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,302 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,302 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,302 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,302 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,302 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,307 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,308 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,308 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,308 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,308 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,308 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,308 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,308 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,308 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,308 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,308 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,308 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,314 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,314 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,314 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,314 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,314 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,314 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,314 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,314 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,314 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,320 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,320 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,320 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,320 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,320 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,320 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,320 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,320 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,323 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,323 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,324 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,324 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,324 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,324 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,324 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,324 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,324 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,324 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,324 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,324 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,324 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,325 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,325 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,325 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,325 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,325 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,325 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,325 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,325 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,325 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,325 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,325 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,325 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,326 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,326 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,326 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,326 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,326 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,326 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,326 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,326 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,326 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,326 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,326 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,326 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,327 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,327 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,327 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,327 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,327 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,327 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,327 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,327 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,327 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,327 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,327 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,327 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,328 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,328 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,328 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,328 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,328 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,328 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,328 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,328 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,328 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,328 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,328 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,329 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,329 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,329 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,329 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,329 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,329 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,329 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,329 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,329 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,329 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,329 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,329 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,330 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,330 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,330 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,330 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,330 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,330 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,330 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,330 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,330 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,330 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,330 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,330 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,331 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,331 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,331 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,331 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,331 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,331 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,331 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,331 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,331 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,331 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,331 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,332 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,332 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,332 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,332 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,332 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,332 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,332 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,332 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,332 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,332 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,332 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,332 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,333 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,333 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,333 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,333 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,333 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,333 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,333 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,333 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,333 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,333 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,333 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,334 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,334 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,334 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,334 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,334 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,334 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,334 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,334 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,334 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,334 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,334 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,334 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,334 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,335 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,335 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,335 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,335 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,335 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,335 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,335 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,335 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,335 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,335 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,335 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,335 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,335 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,335 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,335 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,335 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,336 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,336 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,336 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,336 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,336 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,336 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,336 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,336 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,336 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,336 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,336 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,336 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,336 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,336 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,336 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,336 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,336 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,337 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,337 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,337 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,337 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,337 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,337 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,337 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,337 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,337 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,337 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,337 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,337 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,337 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,337 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,337 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,337 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,338 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,338 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,338 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,338 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,338 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,338 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,338 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,338 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,338 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,338 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,338 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,338 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,338 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,338 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,339 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,339 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,339 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,339 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,339 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,339 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,339 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,339 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,339 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,339 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,339 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,339 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,339 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,339 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,339 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,339 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,339 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,340 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,340 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,340 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,340 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,340 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,340 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,340 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,340 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,340 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,340 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,340 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,340 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,340 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,340 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,341 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,341 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,341 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,341 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,341 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,341 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,341 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,341 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,341 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,341 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,341 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,341 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,341 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,341 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,341 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,342 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,342 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,342 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,342 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,342 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,342 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,342 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,342 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,342 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,342 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,342 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,342 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,342 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,342 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,343 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,343 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,343 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,343 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,343 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,343 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,343 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,343 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,343 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,343 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,343 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,343 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,343 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,343 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,343 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,344 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,344 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,344 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,344 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,344 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,344 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,344 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,344 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,344 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,344 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,344 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,344 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,345 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,345 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,345 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,345 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,345 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,345 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,345 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,345 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,345 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,345 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,345 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,345 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,345 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,345 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,345 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,346 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,346 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,346 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,346 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,346 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,346 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,346 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,346 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,346 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,346 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,346 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,346 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,346 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,346 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,346 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,346 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,347 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,347 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,347 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,347 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,347 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,347 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,347 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,347 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,347 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,347 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,347 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,347 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,347 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,347 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,347 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,347 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,348 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,348 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,348 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,348 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,348 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,348 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,348 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,348 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,348 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,348 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,348 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,348 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,348 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,348 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,348 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,348 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,349 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,349 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,349 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,349 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,349 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,349 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,349 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,349 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,349 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,349 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,349 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,349 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,349 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,349 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,349 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,349 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,350 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,350 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,350 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,350 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,350 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,350 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,350 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,350 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,350 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,350 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,350 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,350 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,350 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,350 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,351 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,351 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,351 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,351 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,351 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,351 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,351 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,351 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,351 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,351 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,351 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,351 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,351 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,351 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,351 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,352 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,352 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,352 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,352 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,352 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,352 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,352 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,352 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,352 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,352 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,352 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,352 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,352 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,352 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,352 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,353 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,353 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,353 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,353 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,353 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,353 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,353 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,353 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,353 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,353 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,353 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,353 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,353 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,353 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,353 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,353 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,354 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,354 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,354 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,354 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,354 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,354 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,354 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,354 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,354 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,354 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,354 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,354 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,354 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,354 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,355 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,355 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,355 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,355 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,355 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,355 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,355 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,355 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,355 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,355 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,355 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,355 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,355 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,355 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,355 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,355 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,355 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,356 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,356 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,356 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,356 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,356 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,356 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,356 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,356 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,356 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,356 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,356 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,356 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,356 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,356 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,356 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,356 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,356 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,357 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,357 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,357 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,357 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,357 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,357 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,357 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,357 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,357 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,357 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,357 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,357 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,357 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,357 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,357 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,357 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,358 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,358 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,358 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,358 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,358 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,358 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,358 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,358 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,358 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,358 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,358 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,358 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,358 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,358 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,358 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,359 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,359 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,359 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,359 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,359 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,359 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,359 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,359 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,359 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,359 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,359 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,359 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,359 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,359 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,359 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,359 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,360 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,360 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,360 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,360 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,360 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,360 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,360 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,360 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,360 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,360 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,360 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,360 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,360 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,360 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,360 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,360 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,361 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,361 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,361 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,361 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,361 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,361 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,361 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,361 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,361 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,361 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,361 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,361 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,361 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,361 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,361 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,362 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,362 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,362 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,362 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,362 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,362 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,362 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,362 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,362 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,362 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,362 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,362 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,362 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,362 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,363 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,363 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,363 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,363 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,363 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,363 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,363 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,363 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,363 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,363 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,363 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,363 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,363 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,363 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,363 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,363 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,364 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,364 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,364 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,364 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,364 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,364 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,364 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,364 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,364 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,364 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,364 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,364 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,364 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,364 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,364 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,364 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,365 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,365 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,365 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,365 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,365 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,365 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,365 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,365 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,365 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,365 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,365 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,365 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,365 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,365 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,366 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,366 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,366 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,366 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,366 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,366 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,366 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,366 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,366 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,366 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,366 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,366 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,366 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,366 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,366 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,366 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,367 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,367 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,367 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,367 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,367 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,367 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,367 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,367 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,367 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,367 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,367 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,367 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,367 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,367 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,367 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,367 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,368 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,368 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,368 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,368 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,368 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,368 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,368 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,368 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,368 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,368 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,368 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,368 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,368 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,368 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,368 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,368 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,369 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,369 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,369 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,369 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,369 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,369 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,369 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,369 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,369 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,369 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,369 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,369 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,369 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,369 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,369 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,369 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,370 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,370 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,370 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,370 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,370 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,370 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,370 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,370 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,370 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,370 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,370 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,370 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,370 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,370 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,370 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,371 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,371 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,371 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,371 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,371 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,371 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,371 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,371 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,371 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,371 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,371 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,371 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,371 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,371 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,371 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,372 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,372 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,372 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,372 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,372 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,372 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,372 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,372 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,372 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,372 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,372 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,372 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,372 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,372 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,372 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,373 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,373 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,373 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,373 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,373 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,373 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,373 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,373 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,373 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,373 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,373 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,373 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,373 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,373 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,373 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,373 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,373 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,374 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,374 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,374 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,374 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,374 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,374 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,374 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,374 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,374 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,374 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,374 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,374 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,374 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,374 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,374 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,375 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,375 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,375 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,375 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,375 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,375 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,375 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,375 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,375 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,375 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,375 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,375 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,375 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,375 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,375 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,375 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,375 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,376 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,376 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,376 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,376 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,376 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,376 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,376 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,376 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,376 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,376 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,376 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,376 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,376 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,376 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,376 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,376 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,376 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,377 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,377 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,377 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,377 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,377 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,377 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,377 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,377 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,377 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,377 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,377 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,377 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,377 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,377 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,377 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,377 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,377 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,378 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,378 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,378 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,378 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,378 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,378 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,378 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,378 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,378 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,378 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,378 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,378 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,378 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,378 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,378 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,379 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,379 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,379 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,379 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,379 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,379 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,379 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,379 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,379 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,379 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,379 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,379 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,379 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,379 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,379 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,379 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,379 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,380 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,380 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,380 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,380 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,380 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,380 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,380 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,380 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,380 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,380 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,380 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,380 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,380 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,380 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,380 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,380 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,380 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,381 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,381 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,381 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,381 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,381 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,381 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,381 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,381 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,381 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,381 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,381 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,381 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,381 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,381 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,381 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,382 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,382 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,382 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,382 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,382 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,382 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,382 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,382 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,382 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,382 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,382 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,382 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,382 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,382 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,382 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,383 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,383 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,383 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,383 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,383 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,383 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,383 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,383 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,383 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,383 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,383 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,383 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,383 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,383 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,383 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,383 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,384 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,384 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,384 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,384 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,384 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,384 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,384 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,384 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,384 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,384 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,384 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,384 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,384 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,384 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,384 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,385 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,385 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,385 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,385 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,385 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,385 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,385 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,385 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,385 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,385 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,385 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,385 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,385 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,385 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,385 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,385 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,385 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,386 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,386 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,386 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,386 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,386 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,386 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,386 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,386 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,386 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,386 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,386 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,386 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,386 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,386 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,386 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,386 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,387 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,387 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,387 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,387 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,387 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,387 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,387 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,387 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,387 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,387 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,387 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,387 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,387 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,387 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,387 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,387 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,387 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,388 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,388 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,388 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,388 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,388 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,388 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,388 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,388 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,388 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,388 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,388 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,388 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,388 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,388 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,388 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,388 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,388 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,389 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,389 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,389 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,389 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,389 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,389 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,389 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,389 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,389 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,389 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,389 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,389 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,389 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,389 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,389 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,389 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,390 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,390 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,390 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,390 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,390 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,390 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,390 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,390 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,390 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,390 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,390 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,390 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,390 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,390 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,390 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,391 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,391 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,391 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,391 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,391 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,391 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,391 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,391 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,391 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,391 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,391 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,391 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,391 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,391 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,391 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,392 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,392 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,392 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,392 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,392 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,392 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,392 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,392 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,392 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,392 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,392 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,392 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,392 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,392 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,392 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,392 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,393 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,393 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,393 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,393 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,393 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,393 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,393 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,393 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,393 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,393 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,393 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,393 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,393 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,393 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,393 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,394 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,394 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,394 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,394 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,394 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,394 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,394 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,394 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,394 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,394 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,394 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,394 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,394 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,394 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,394 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,395 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,395 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,395 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,395 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,395 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,395 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,395 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,395 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,395 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,395 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,395 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,395 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,395 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,395 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,395 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,396 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,396 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,396 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,396 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,396 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,396 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,396 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,396 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,396 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,396 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,396 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,396 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,396 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,396 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,396 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,396 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,396 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,396 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,397 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,397 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,397 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,397 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,397 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,397 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,397 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,397 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,397 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,397 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,397 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,397 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,397 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,397 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,397 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,397 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,398 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,398 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,398 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,398 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,398 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,398 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,398 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,398 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,398 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,398 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,398 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,398 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,398 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,398 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,398 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,399 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,399 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,399 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,399 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,399 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,399 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,399 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,399 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,399 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,399 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,399 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,399 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,399 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,399 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,399 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,399 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,400 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,400 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,400 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,400 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,400 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,400 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,400 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,400 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,400 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,400 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,400 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,400 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,400 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,400 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,400 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,400 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,401 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,401 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,401 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,401 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,401 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,401 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,401 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,401 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,401 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,401 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,401 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,401 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,401 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,401 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,401 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,402 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,402 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,402 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,402 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,402 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,402 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,402 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,402 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,402 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,402 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,402 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,402 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,402 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,402 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,403 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,403 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,403 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,403 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,403 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,403 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,403 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,403 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,403 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,403 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,403 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,403 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,403 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,403 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,403 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,404 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,404 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,404 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,404 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,404 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,404 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,404 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,404 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,404 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,404 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,404 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,404 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,404 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,404 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,404 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,405 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,405 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,405 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,405 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,405 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,405 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,405 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,405 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,405 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,405 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,405 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,405 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,405 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,405 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,405 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,406 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,406 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,406 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,406 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,406 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,406 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,406 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,406 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,406 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,406 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,406 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,406 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,406 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,406 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,406 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,406 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,407 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,407 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,407 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,407 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,407 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,407 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,407 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,407 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,407 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,407 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,407 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,407 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,407 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,407 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,407 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,407 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,407 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,407 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,407 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,408 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,408 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,408 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,408 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,408 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,408 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,408 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,408 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,408 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,408 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,408 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,408 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,408 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,408 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,408 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,408 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,409 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,409 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,409 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,409 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,409 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,409 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,409 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,409 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,409 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,409 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,409 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,409 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,409 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,409 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,409 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,409 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,410 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,410 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,410 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,410 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,410 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,410 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,410 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,410 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,410 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,410 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,410 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,410 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,410 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,410 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,410 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,411 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,411 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,411 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,411 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,411 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,411 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,411 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,411 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,411 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,411 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,411 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,411 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,411 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,411 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,411 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,412 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,412 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,412 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,412 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,412 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,412 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,412 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,412 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,412 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,412 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,412 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,412 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,412 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,412 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,412 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,413 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,413 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,413 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,413 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,413 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,413 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,413 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,413 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,413 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,413 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,413 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,413 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,413 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,413 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,413 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,413 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,414 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,414 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,414 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,414 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,414 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,414 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,414 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,414 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,414 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,414 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,414 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,414 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,414 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,414 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,414 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,415 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,415 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,415 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,415 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,415 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,415 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,415 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,415 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,415 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,415 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,415 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,415 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,415 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,415 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,416 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,416 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,416 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,416 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,416 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,416 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,416 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,416 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,416 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,416 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,416 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,416 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,416 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,416 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,416 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,416 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,417 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,417 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,417 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,417 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,417 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,417 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,417 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,417 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,417 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,417 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,417 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,417 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,417 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,417 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,417 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,417 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,417 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,418 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,418 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,418 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,418 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,418 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,418 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,418 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,418 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,418 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,418 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,418 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,418 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,418 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,418 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,418 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,419 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,419 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,419 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,419 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,419 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,419 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,419 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,419 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,419 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,419 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,419 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,419 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,419 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,419 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,419 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,419 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,420 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,420 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,420 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,420 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,420 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,420 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,420 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,420 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,420 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,420 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,420 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,420 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,420 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,420 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,421 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,421 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,421 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,421 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,421 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,421 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,421 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,421 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,421 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,421 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,421 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,421 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,421 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,421 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,421 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,422 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,422 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,422 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,422 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,422 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,422 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,422 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,422 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,422 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,422 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,422 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,422 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,422 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,422 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,423 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,423 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,423 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,423 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,423 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,423 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,423 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,423 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,423 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,423 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,423 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,423 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,423 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,423 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,423 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,423 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,424 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,424 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,424 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,424 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,424 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,424 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,424 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,424 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,424 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,424 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,424 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,424 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,424 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,424 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,424 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,424 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,425 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,425 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,425 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,425 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,425 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,425 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,425 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,425 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,425 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,425 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,425 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,425 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,425 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,425 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,425 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,426 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,426 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,426 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,426 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,426 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,426 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,426 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,426 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,426 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,426 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,426 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,426 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,426 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,426 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,426 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,426 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,427 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,427 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,427 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,427 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,427 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,427 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,427 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,427 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,427 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,427 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,427 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,427 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,427 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,427 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,427 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,427 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,428 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,428 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,428 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,428 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,428 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,428 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,428 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,428 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,428 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,428 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,428 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,428 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,428 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,428 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,428 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,428 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,428 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,429 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,429 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,429 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,429 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,429 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,429 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,429 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,429 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,429 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,429 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,429 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,429 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,429 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,429 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,429 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,429 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,430 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,430 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,430 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,430 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,430 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,430 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,430 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,430 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,430 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,430 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,430 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,430 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,430 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,430 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,430 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,431 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,431 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,431 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,431 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,431 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,431 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,431 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,431 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,431 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,431 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,431 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,431 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,431 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,431 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,431 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,432 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,432 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,432 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,432 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,432 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,432 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,432 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,432 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,432 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,432 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,432 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,432 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,432 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,432 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,432 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,433 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,433 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,433 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,433 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,433 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,433 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,433 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,433 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,433 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,433 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,433 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,433 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,433 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,433 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,433 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,434 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,434 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,434 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,434 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,434 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,434 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,434 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,434 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,434 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,434 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,434 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,434 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,434 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,434 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,434 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,435 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,435 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,435 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,435 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,435 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,435 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,435 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,435 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,435 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,435 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,435 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,435 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,435 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,435 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,435 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,436 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,436 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,436 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,436 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,436 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,436 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,436 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,436 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,436 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,436 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,436 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,436 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,436 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,436 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,436 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,437 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,437 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,437 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,437 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,437 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,437 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,437 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,437 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,437 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,437 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,437 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,437 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,437 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,437 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,437 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,438 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,438 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,438 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,438 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,438 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,438 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,438 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,438 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,438 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,438 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,438 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,438 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,438 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,438 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,438 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,439 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,439 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,439 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,439 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,439 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,439 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,439 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,439 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,439 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,439 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,439 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,439 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,439 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,439 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,439 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,439 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,440 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,440 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,440 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,440 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,440 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,440 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,440 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,440 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,440 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,440 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,440 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,440 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,440 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,440 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,440 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,440 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,441 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,441 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,441 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,441 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,441 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,441 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,441 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,441 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,441 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,441 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,441 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,441 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,441 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,441 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,441 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,442 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,442 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,442 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,442 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,442 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,442 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,442 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,442 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,442 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,442 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,442 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,442 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,442 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,442 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,443 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,443 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,443 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,443 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,443 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,443 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,443 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,443 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,443 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,443 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,443 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,443 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,443 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,443 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,443 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,444 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,444 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,444 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,444 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,444 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,444 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,444 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,444 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,444 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,444 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,444 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,444 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,444 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,444 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,444 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,444 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,444 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,445 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,445 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,445 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,445 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,445 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,445 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,445 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,445 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,445 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,445 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,445 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,445 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,445 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,445 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,445 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,445 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,446 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,446 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,446 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,446 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,446 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,446 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,446 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,446 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,446 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,446 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,446 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,446 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,446 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,446 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,446 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,447 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,447 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,447 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,447 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,447 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,447 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,447 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,447 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,447 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,447 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,447 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,447 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,447 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,447 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,447 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,447 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,447 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,448 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,448 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,448 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,448 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,448 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,448 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,448 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,448 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,448 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,448 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,448 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,448 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,448 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,448 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,448 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,448 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,449 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,449 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,449 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,449 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,449 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,449 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,449 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,449 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,449 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,449 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,449 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,449 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,449 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,449 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,449 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,449 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,449 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,450 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,450 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,450 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,450 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,450 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,450 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,450 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,450 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,450 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,450 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,450 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,450 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,450 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,450 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,450 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,451 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,451 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,451 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,451 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,451 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,451 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,451 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,451 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,451 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,451 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,451 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,451 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,451 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,451 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,451 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,452 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,452 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,452 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,452 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,452 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,452 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,452 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,452 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,452 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,452 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,452 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,452 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,452 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,452 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,452 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,453 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,453 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,453 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,453 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,453 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,453 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,453 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,453 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,453 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,453 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,453 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,453 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,453 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,453 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,453 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,453 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,454 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,454 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,454 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,454 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,454 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,454 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,454 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,454 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,454 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,454 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,454 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,454 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,454 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,454 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,455 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,455 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,455 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,455 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,455 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,455 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,455 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,455 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,455 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,455 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,455 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,455 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,455 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,455 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,456 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,456 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,456 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,456 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,456 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,456 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,456 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,456 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,456 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,456 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,456 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,456 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,456 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,456 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,456 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,457 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,457 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,457 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,457 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,457 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,457 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,457 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,457 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,457 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,457 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,457 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,457 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,457 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,457 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,457 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,457 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,458 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,458 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,458 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,458 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,458 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,458 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,458 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,458 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,458 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,458 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,458 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,458 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,458 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,458 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,458 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,458 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,459 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,459 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,459 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,459 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,459 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,459 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,459 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,459 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,459 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,459 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,459 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,459 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,459 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,459 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,459 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,460 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,460 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,460 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,460 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,460 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,460 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,460 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,460 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,460 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,460 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,460 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,460 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,460 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,460 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,461 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,461 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,461 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,461 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,461 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,461 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,461 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,461 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,461 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,461 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,461 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,461 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,461 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,461 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,461 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,462 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,462 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,462 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,462 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,462 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,462 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,462 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,462 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,462 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,462 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,462 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,462 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,462 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,462 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,463 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,463 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,463 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,463 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,463 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,463 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,463 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,463 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,463 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,463 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,463 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,463 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,463 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,463 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,463 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,463 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,464 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,464 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,464 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,464 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,464 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,464 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,464 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,464 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,464 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,464 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,464 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,464 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,464 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,464 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,464 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,464 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,465 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,465 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,465 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,465 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,465 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,465 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,465 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,465 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,465 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,465 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,465 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,465 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,465 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,465 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,465 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,466 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,466 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,466 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,466 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,466 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,466 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,466 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,466 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,466 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,466 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,466 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,466 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,466 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,466 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,466 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,467 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,467 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,467 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,467 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,467 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,467 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,467 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,467 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,467 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,467 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,467 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,467 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,467 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,467 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,467 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,467 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,468 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,468 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,468 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,468 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,468 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,468 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,468 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,468 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,468 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,468 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,468 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,468 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,468 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,468 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,468 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,468 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,469 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,469 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,469 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,469 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,469 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,469 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,469 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,469 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,469 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,469 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,469 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,469 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,469 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,469 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,469 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,469 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,469 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,470 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,470 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,470 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,470 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,470 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,470 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,470 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,470 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,470 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,470 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,470 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,470 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,470 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,470 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,470 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,471 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,471 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,471 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,471 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,471 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,471 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,471 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,471 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,471 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,471 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,471 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,471 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,471 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,471 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,471 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,472 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,472 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,472 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,472 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,472 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,472 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,472 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,472 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,472 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,472 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,472 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,472 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,472 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,472 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,472 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,472 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,473 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,473 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,473 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,473 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,473 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,473 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,473 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,473 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,473 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,473 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,473 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,473 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,473 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,473 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,473 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,473 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,474 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,474 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,474 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,474 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,474 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,474 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,474 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,474 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,474 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,474 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,474 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,474 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,474 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,474 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,475 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,475 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,475 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,475 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,475 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,475 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,475 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,475 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,475 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,475 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,475 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,475 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,475 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,475 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,475 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,475 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,476 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,476 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,476 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,476 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,476 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,476 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,476 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,476 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,476 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,476 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,476 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,476 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,476 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,476 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,476 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,477 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,477 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,477 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,477 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,477 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,477 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,477 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,477 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,477 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,477 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,477 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,477 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,477 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,477 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,477 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,477 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,478 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,478 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,478 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,478 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,478 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,478 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,478 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,478 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,478 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,478 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,478 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,478 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,478 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,478 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,478 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,478 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,479 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,479 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,479 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,479 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,479 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,479 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,479 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,479 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,479 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,479 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,479 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,479 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,479 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,479 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,479 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,480 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,480 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,480 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,480 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,480 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,480 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,480 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,480 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,480 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,480 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,480 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,480 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,480 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,480 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,480 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,480 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,481 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,481 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,481 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,481 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,481 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,481 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,481 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,481 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,481 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,481 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,481 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,481 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,481 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,481 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,481 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,482 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,482 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,482 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,482 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,482 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,482 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,482 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,482 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,482 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,482 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,482 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,482 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,482 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,483 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,483 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,483 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,483 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,483 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,483 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,483 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,483 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,483 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,483 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,483 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,483 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,483 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,483 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,484 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,484 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,484 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,484 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,484 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,484 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,484 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,484 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,484 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,484 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,484 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,484 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,484 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,484 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,484 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,484 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,485 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,485 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,485 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,485 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,485 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,485 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,485 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,485 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,485 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,485 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,485 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,485 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,485 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,485 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,485 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,485 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,486 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,486 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,486 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,486 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,486 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,486 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,486 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,486 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,486 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,486 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,486 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,486 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,486 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,486 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,487 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,487 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,487 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,487 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,487 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,487 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,487 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,487 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,487 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,487 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,487 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,487 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,487 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,487 DEBUG SenderThread:265689 [sender.py:send():235] send: metric +2022-03-02 21:48:40,487 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:48:40,581 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:48:40,670 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:48:40,792 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:48:41,306 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 21:48:41,839 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:48:41,970 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 21:48:42,829 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/config.yaml +2022-03-02 21:48:45,830 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:48:46,576 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:48:46,632 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:48:46,749 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:48:46,831 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:48:47,831 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:48:51,833 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:48:52,873 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:48:52,927 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:48:53,010 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:48:53,833 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:48:53,834 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:48:54,834 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:48:57,038 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 21:48:57,038 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 21:48:57,835 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:48:58,844 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:48:58,897 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:48:58,985 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:48:59,836 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:48:59,836 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:49:00,837 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:49:03,843 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:49:05,369 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:49:05,421 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:49:05,514 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:49:05,844 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:49:06,844 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:49:10,494 DEBUG SenderThread:265689 [sender.py:send():235] send: stats +2022-03-02 21:49:10,846 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:49:11,338 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:49:11,391 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:49:11,484 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:49:11,846 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:49:12,394 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 21:49:12,395 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 21:49:12,846 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:49:16,848 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:49:17,463 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:49:17,518 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:49:17,603 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:49:17,848 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:49:18,849 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:49:22,850 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:49:23,497 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:49:23,550 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:49:23,642 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:49:23,850 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:49:24,851 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:49:27,636 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 21:49:27,636 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 21:49:28,852 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:49:29,292 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:49:29,346 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:49:29,435 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:49:29,853 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:49:30,853 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:49:31,853 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:49:32,854 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:49:35,117 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:49:35,170 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:49:35,257 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:49:35,855 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:49:36,855 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:49:37,856 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:49:39,857 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:49:40,890 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:49:40,944 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:49:41,029 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:49:41,075 DEBUG SenderThread:265689 [sender.py:send():235] send: stats +2022-03-02 21:49:41,858 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:49:42,708 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 21:49:42,709 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 21:49:43,858 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:49:45,859 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:49:46,650 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:49:46,705 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:49:46,795 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:49:46,859 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:49:47,860 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:49:51,861 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:49:52,320 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:49:52,375 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:49:52,465 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:49:52,861 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:49:53,862 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:49:57,764 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 21:49:57,765 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 21:49:57,863 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:49:57,922 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:49:57,978 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:49:58,069 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:49:58,864 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:49:59,864 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:50:01,865 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:50:03,437 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:50:03,490 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:50:03,579 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:50:03,865 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:50:05,866 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:50:07,867 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:50:08,952 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:50:09,004 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:50:09,095 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:50:09,868 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:50:09,868 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:50:11,492 DEBUG SenderThread:265689 [sender.py:send():235] send: stats +2022-03-02 21:50:12,853 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 21:50:12,854 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 21:50:13,869 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:50:14,395 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:50:14,448 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:50:14,540 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:50:14,869 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:50:15,870 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:50:17,870 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:50:19,943 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:50:20,000 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:50:20,090 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:50:20,872 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:50:21,872 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:50:23,873 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:50:25,419 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:50:25,473 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:50:25,555 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:50:25,873 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:50:26,874 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:50:27,963 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 21:50:27,964 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 21:50:30,875 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:50:30,882 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:50:30,956 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:50:31,047 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:50:31,876 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:50:32,876 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:50:34,877 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:50:36,649 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:50:36,703 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:50:36,791 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:50:36,878 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:50:38,879 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:50:40,880 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:50:41,982 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:50:42,041 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:50:42,138 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:50:42,140 DEBUG SenderThread:265689 [sender.py:send():235] send: stats +2022-03-02 21:50:42,880 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:50:42,881 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:50:43,169 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 21:50:43,170 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 21:50:44,881 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:50:46,882 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:50:47,450 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:50:47,507 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:50:47,617 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:50:47,882 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:50:48,883 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:50:50,883 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:50:52,853 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:50:52,906 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:50:52,993 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:50:53,917 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:50:54,918 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:50:56,918 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:50:58,102 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:50:58,154 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:50:58,240 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:50:58,411 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 21:50:58,413 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 21:50:58,919 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:50:59,920 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:51:00,920 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:51:01,920 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:51:03,266 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:51:03,321 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:51:03,406 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:51:03,921 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:51:04,922 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:51:05,922 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:51:07,923 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:51:08,403 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:51:08,488 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:51:08,569 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:51:08,923 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:51:08,923 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:51:09,924 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:51:11,924 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:51:12,659 DEBUG SenderThread:265689 [sender.py:send():235] send: stats +2022-03-02 21:51:13,468 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:51:13,522 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:51:13,544 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 21:51:13,614 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:51:13,616 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 21:51:13,925 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:51:14,925 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:51:15,926 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:51:17,926 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:51:18,579 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:51:18,633 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:51:18,718 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:51:18,927 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:51:18,927 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:51:19,927 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:51:21,928 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:51:23,703 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:51:23,757 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:51:23,841 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:51:23,929 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:51:25,929 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:51:27,930 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:51:28,692 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:51:28,746 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:51:28,831 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 21:51:28,838 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:51:28,839 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 21:51:28,930 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:51:29,931 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:51:31,931 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:51:32,932 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:51:33,627 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:51:33,680 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:51:33,770 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:51:33,932 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:51:34,932 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:51:35,933 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:51:38,538 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:51:38,592 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:51:38,692 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:51:38,934 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:51:38,934 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:51:39,935 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:51:40,935 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:51:42,936 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:51:43,188 DEBUG SenderThread:265689 [sender.py:send():235] send: stats +2022-03-02 21:51:43,421 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:51:43,473 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:51:43,564 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:51:43,936 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:51:44,119 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 21:51:44,120 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 21:51:44,936 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:51:45,937 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:51:46,937 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:51:48,101 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:51:48,157 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:51:48,245 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:51:48,938 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:51:48,938 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:51:49,938 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:51:50,939 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:51:52,662 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:51:52,715 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:51:52,806 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:51:52,939 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:51:53,940 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:51:54,940 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:51:56,941 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:51:57,226 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:51:57,278 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:51:57,364 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:51:57,941 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:51:58,942 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:51:59,193 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 21:51:59,194 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 21:51:59,942 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:52:00,943 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:52:01,636 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:52:01,699 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:52:01,806 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:52:01,943 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:52:02,943 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:52:03,944 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:52:04,944 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:52:06,022 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:52:06,077 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:52:06,164 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:52:06,945 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:52:06,945 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:52:07,945 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:52:08,946 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:52:10,151 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:52:10,226 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:52:10,318 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:52:10,946 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:52:10,947 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:52:11,947 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:52:13,601 DEBUG SenderThread:265689 [sender.py:send():235] send: stats +2022-03-02 21:52:13,947 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:52:14,079 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:52:14,131 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:52:14,217 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:52:14,349 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 21:52:14,351 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 21:52:14,948 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:52:15,948 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:52:17,781 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:52:17,836 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:52:17,926 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:52:17,949 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:52:17,949 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:52:19,949 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:52:21,283 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:52:21,337 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:52:21,425 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:52:21,950 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:52:21,950 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:52:23,951 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:52:24,595 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:52:24,674 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:52:24,805 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:52:24,951 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:52:25,952 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:52:27,651 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:52:27,704 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:52:27,796 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:52:27,952 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:52:27,953 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:52:29,577 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 21:52:29,578 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 21:52:29,953 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:52:30,458 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:52:30,512 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:52:30,595 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:52:30,954 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:52:31,954 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:52:32,999 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:52:33,062 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:52:33,153 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:52:33,955 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:52:33,955 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:52:35,210 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:52:35,265 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:52:35,351 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:52:35,956 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:52:35,956 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:52:37,176 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:52:37,229 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:52:37,318 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:52:37,956 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:52:37,957 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:52:38,915 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:52:38,968 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:52:39,055 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:52:39,985 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:52:39,985 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:52:40,990 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:52:41,171 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:52:41,254 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:52:42,039 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:52:42,039 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:52:44,214 DEBUG SenderThread:265689 [sender.py:send():235] send: stats +2022-03-02 21:52:44,725 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 21:52:44,726 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 21:52:46,040 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:52:47,229 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:52:47,283 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:52:47,368 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:52:48,041 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:52:48,042 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:52:52,043 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:52:53,200 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:52:53,253 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:52:53,343 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:52:54,044 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:52:54,044 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:52:55,044 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:52:58,045 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:52:59,234 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:52:59,287 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:52:59,373 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:53:00,046 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:53:00,046 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:53:00,112 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 21:53:00,113 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 21:53:01,046 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:53:05,048 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:53:05,057 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:53:05,112 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:53:05,205 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:53:06,048 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:53:07,048 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:53:10,977 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:53:11,033 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:53:11,115 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:53:11,122 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:53:12,116 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:53:13,116 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:53:14,680 DEBUG SenderThread:265689 [sender.py:send():235] send: stats +2022-03-02 21:53:15,117 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:53:15,306 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 21:53:15,307 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 21:53:16,878 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:53:16,931 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:53:17,034 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:53:17,117 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:53:19,118 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:53:21,119 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:53:22,704 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:53:22,757 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:53:22,848 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:53:23,120 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:53:25,121 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:53:27,122 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:53:28,491 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:53:28,546 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:53:28,630 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:53:29,123 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:53:29,123 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:53:30,368 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 21:53:30,370 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 21:53:33,124 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:53:34,236 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:53:34,296 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:53:34,379 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:53:35,125 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:53:35,125 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:53:39,972 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:53:40,025 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:53:40,109 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:53:40,127 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:53:40,127 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:53:42,127 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:53:44,128 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:53:45,194 DEBUG SenderThread:265689 [sender.py:send():235] send: stats +2022-03-02 21:53:45,417 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 21:53:45,419 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 21:53:45,570 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:53:45,624 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:53:45,707 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:53:46,129 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:53:48,130 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:53:50,130 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:53:51,188 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:53:51,243 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:53:51,333 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:53:52,131 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:53:52,131 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:53:56,133 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:53:56,765 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:53:56,819 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:53:56,912 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:53:57,133 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:53:58,133 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:54:00,477 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 21:54:00,479 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 21:54:02,135 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:54:02,285 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:54:02,338 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:54:02,435 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:54:03,135 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:54:04,136 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:54:06,137 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:54:07,819 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:54:07,875 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:54:07,965 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:54:08,137 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:54:09,138 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:54:10,138 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:54:11,139 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:54:13,139 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:54:13,363 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:54:13,418 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:54:13,504 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:54:14,140 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:54:14,140 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:54:15,140 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:54:15,661 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 21:54:15,663 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 21:54:15,800 DEBUG SenderThread:265689 [sender.py:send():235] send: stats +2022-03-02 21:54:17,141 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:54:18,908 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:54:18,960 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:54:19,045 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:54:19,141 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:54:20,142 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:54:21,142 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:54:23,143 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:54:24,255 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:54:24,305 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:54:24,407 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:54:25,144 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:54:25,144 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:54:26,144 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:54:27,144 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:54:29,145 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:54:29,571 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:54:29,626 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:54:29,711 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:54:30,146 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:54:30,146 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:54:30,765 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 21:54:30,767 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 21:54:31,146 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:54:33,147 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:54:34,985 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:54:35,038 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:54:35,120 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:54:35,147 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:54:37,148 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:54:39,149 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:54:40,304 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:54:40,358 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:54:40,449 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:54:41,150 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:54:41,150 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:54:43,150 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:54:44,151 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:54:45,720 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:54:45,774 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:54:45,894 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:54:46,061 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 21:54:46,062 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 21:54:46,151 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:54:46,335 DEBUG SenderThread:265689 [sender.py:send():235] send: stats +2022-03-02 21:54:47,152 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:54:48,152 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:54:50,153 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:54:50,995 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:54:51,048 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:54:51,136 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:54:51,153 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:54:52,154 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:54:53,154 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:54:54,154 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:54:56,155 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:54:56,217 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:54:56,271 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:54:56,355 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:54:57,156 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:54:57,156 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:54:58,156 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:55:00,157 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:55:01,139 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 21:55:01,141 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 21:55:01,340 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:55:01,393 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:55:01,480 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:55:02,157 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:55:02,158 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:55:03,158 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:55:04,158 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:55:06,457 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:55:06,521 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:55:06,606 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:55:07,159 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:55:09,160 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:55:11,161 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:55:11,626 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:55:11,682 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:55:11,767 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:55:12,161 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:55:13,161 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:55:15,162 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:55:16,183 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 21:55:16,184 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 21:55:16,633 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:55:16,685 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:55:16,779 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:55:16,911 DEBUG SenderThread:265689 [sender.py:send():235] send: stats +2022-03-02 21:55:17,163 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:55:17,163 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:55:19,163 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:55:21,164 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:55:21,642 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:55:21,698 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:55:21,788 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:55:22,165 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:55:23,165 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:55:25,166 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:55:26,573 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:55:26,624 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:55:26,713 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:55:27,166 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:55:27,167 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:55:29,167 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:55:31,168 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:55:31,231 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 21:55:31,232 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 21:55:31,451 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:55:31,505 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:55:31,597 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:55:32,168 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:55:33,168 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:55:35,169 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:55:36,229 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:55:36,282 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:55:36,367 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:55:37,170 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:55:37,170 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:55:40,171 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:55:40,987 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:55:41,059 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:55:41,153 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:55:41,171 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:55:42,172 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:55:43,172 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:55:45,672 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:55:45,729 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:55:45,812 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:55:46,173 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:55:46,173 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:55:46,437 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 21:55:46,438 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 21:55:47,173 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:55:47,357 DEBUG SenderThread:265689 [sender.py:send():235] send: stats +2022-03-02 21:55:48,174 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:55:50,174 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:55:50,258 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:55:50,312 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:55:50,450 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:55:51,175 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:55:52,175 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:55:54,176 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:55:54,826 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:55:54,904 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:55:54,992 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:55:55,176 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:55:56,177 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:55:58,177 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:55:59,257 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:55:59,310 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:55:59,401 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:56:00,178 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:56:00,178 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:56:01,632 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 21:56:01,634 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 21:56:02,179 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:56:03,525 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:56:03,579 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:56:03,669 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:56:04,179 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:56:04,180 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:56:06,180 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:56:07,642 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:56:07,705 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:56:07,821 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:56:08,181 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:56:09,181 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:56:10,181 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:56:11,182 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:56:11,635 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:56:11,711 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:56:11,795 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:56:12,182 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:56:13,182 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:56:14,183 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:56:15,183 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:56:15,380 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:56:15,434 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:56:15,518 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:56:16,184 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:56:16,184 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:56:16,697 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 21:56:16,698 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 21:56:17,184 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:56:18,005 DEBUG SenderThread:265689 [sender.py:send():235] send: stats +2022-03-02 21:56:18,794 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:56:18,848 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:56:18,943 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:56:19,185 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:56:19,185 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:56:20,185 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:56:21,185 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:56:21,957 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:56:22,011 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:56:22,106 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:56:22,186 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:56:23,186 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:56:24,186 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:56:24,878 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:56:24,934 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:56:25,022 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:56:25,187 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:56:25,187 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:56:26,187 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:56:27,187 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:56:27,444 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:56:27,499 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:56:27,582 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:56:28,188 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:56:28,188 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:56:29,188 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:56:29,778 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:56:29,831 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:56:29,918 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:56:30,189 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:56:30,189 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:56:31,189 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:56:31,759 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 21:56:31,760 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 21:56:31,885 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:56:31,936 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:56:32,028 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:56:32,189 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:56:33,190 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:56:33,732 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:56:33,785 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:56:33,870 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:56:34,190 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:56:35,191 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:56:35,467 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:56:35,521 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:56:35,634 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:56:36,191 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:56:37,191 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:56:37,605 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:56:37,864 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:56:37,962 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:56:38,192 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:56:39,192 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:56:41,193 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:56:43,908 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:56:43,965 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:56:44,073 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:56:44,194 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:56:45,194 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:56:46,814 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 21:56:46,815 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 21:56:48,195 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:56:48,669 DEBUG SenderThread:265689 [sender.py:send():235] send: stats +2022-03-02 21:56:49,901 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:56:49,957 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:56:50,049 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:56:50,196 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:56:51,197 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:56:52,197 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:56:54,198 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:56:55,963 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:56:56,018 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:56:56,102 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:56:56,199 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:56:57,199 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:56:58,199 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:57:00,200 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:57:01,856 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:57:01,909 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:57:01,998 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:57:02,196 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 21:57:02,198 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 21:57:02,201 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:57:03,201 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:57:04,202 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:57:06,203 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:57:07,732 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:57:07,788 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:57:07,879 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:57:08,204 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:57:10,204 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:57:12,205 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:57:13,574 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:57:13,628 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:57:13,713 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:57:14,206 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:57:14,206 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:57:16,206 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:57:17,461 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 21:57:17,462 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 21:57:18,207 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:57:19,142 DEBUG SenderThread:265689 [sender.py:send():235] send: stats +2022-03-02 21:57:19,249 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:57:19,305 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:57:19,394 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:57:20,208 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:57:20,208 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:57:22,209 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:57:24,210 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:57:25,094 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:57:25,148 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:57:25,240 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:57:26,233 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:57:26,234 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:57:29,234 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:57:30,782 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:57:30,836 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:57:30,926 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:57:31,235 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:57:32,235 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:57:32,577 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 21:57:32,578 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 21:57:33,236 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:57:35,236 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:57:36,494 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:57:36,548 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:57:36,642 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:57:37,237 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:57:37,237 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:57:38,237 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:57:39,238 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:57:41,239 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:57:42,022 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:57:42,075 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:57:42,170 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:57:42,239 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:57:43,239 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:57:44,240 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:57:47,241 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:57:47,523 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:57:47,577 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:57:47,666 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:57:47,698 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 21:57:47,700 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 21:57:48,241 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:57:48,241 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:57:49,242 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:57:49,727 DEBUG SenderThread:265689 [sender.py:send():235] send: stats +2022-03-02 21:57:51,242 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:57:53,063 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:57:53,118 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:57:53,207 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:57:53,243 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:57:54,244 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:57:55,244 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:57:57,245 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:57:58,566 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:57:58,622 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:57:58,717 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:57:59,245 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:57:59,245 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:58:01,246 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:58:02,778 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 21:58:02,780 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 21:58:03,247 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:58:04,095 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:58:04,150 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:58:04,244 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:58:04,247 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:58:06,248 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:58:07,248 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:58:08,249 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:58:09,592 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:58:09,651 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:58:09,741 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:58:10,250 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:58:11,250 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:58:12,250 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:58:14,251 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:58:15,294 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:58:15,351 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:58:15,437 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:58:16,252 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:58:17,252 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:58:17,959 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 21:58:17,961 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 21:58:18,252 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:58:20,253 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:58:20,322 DEBUG SenderThread:265689 [sender.py:send():235] send: stats +2022-03-02 21:58:20,473 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:58:20,526 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:58:20,610 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:58:21,254 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:58:21,254 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:58:22,254 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:58:24,255 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:58:25,765 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:58:25,818 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:58:25,906 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:58:26,255 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:58:27,256 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:58:28,256 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:58:30,257 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:58:31,023 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:58:31,077 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:58:31,185 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:58:31,257 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:58:32,258 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:58:33,122 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 21:58:33,123 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 21:58:33,258 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:58:34,258 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:58:36,204 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:58:36,260 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:58:36,344 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:58:37,281 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:58:37,281 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:58:38,281 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:58:40,282 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:58:41,378 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:58:41,434 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:58:41,519 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:58:42,283 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:58:42,283 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:58:43,283 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:58:45,284 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:58:46,524 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:58:46,580 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:58:46,665 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:58:47,284 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:58:48,285 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:58:48,305 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 21:58:48,306 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 21:58:49,285 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:58:50,762 DEBUG SenderThread:265689 [sender.py:send():235] send: stats +2022-03-02 21:58:51,286 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:58:51,549 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:58:51,604 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:58:51,698 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:58:52,286 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:58:53,287 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:58:54,287 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:58:55,288 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:58:56,548 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:58:56,608 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:58:56,700 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:58:57,289 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:58:57,289 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:58:58,289 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:58:59,289 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:59:01,290 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:59:01,560 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:59:01,615 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:59:01,707 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:59:02,291 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:59:03,291 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:59:03,360 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 21:59:03,361 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 21:59:04,291 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:59:05,292 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:59:06,647 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:59:06,704 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:59:06,838 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:59:07,292 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:59:07,293 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:59:08,293 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:59:11,593 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:59:11,647 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:59:11,736 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:59:12,294 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:59:12,295 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:59:14,295 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:59:16,296 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:59:16,486 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:59:16,537 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:59:16,619 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:59:17,296 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:59:18,297 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:59:18,421 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 21:59:18,422 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 21:59:20,297 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:59:21,303 DEBUG SenderThread:265689 [sender.py:send():235] send: stats +2022-03-02 21:59:21,319 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:59:21,372 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:59:21,475 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:59:22,298 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:59:22,298 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:59:24,299 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:59:26,097 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:59:26,150 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:59:26,235 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:59:26,299 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:59:26,299 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:59:28,300 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:59:30,301 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:59:30,902 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:59:30,962 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:59:31,050 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:59:31,301 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:59:32,301 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:59:33,476 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 21:59:33,478 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 21:59:35,574 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:59:35,627 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:59:35,712 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:59:36,303 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:59:36,303 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:59:38,304 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:59:40,221 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:59:40,274 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:59:40,367 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:59:41,358 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:59:42,358 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:59:44,359 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:59:44,748 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:59:44,801 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:59:44,894 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:59:45,359 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:59:46,360 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:59:48,361 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:59:48,691 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 21:59:48,693 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 21:59:49,201 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:59:49,253 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:59:49,341 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:59:49,361 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:59:50,361 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:59:51,362 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:59:51,748 DEBUG SenderThread:265689 [sender.py:send():235] send: stats +2022-03-02 21:59:52,362 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:59:53,488 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:59:53,564 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:59:53,656 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:59:54,363 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:59:54,363 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:59:55,363 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:59:56,363 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 21:59:57,776 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 21:59:57,831 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 21:59:57,914 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 21:59:58,364 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 21:59:59,364 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:00:01,365 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:00:01,869 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:00:01,954 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:00:02,049 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:00:02,366 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:00:03,366 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:00:03,744 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:00:03,745 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:00:05,367 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:00:05,845 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:00:05,900 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:00:06,009 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:00:06,367 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:00:07,367 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:00:09,368 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:00:09,598 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:00:09,653 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:00:09,743 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:00:10,369 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:00:11,369 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:00:13,174 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:00:13,230 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:00:13,325 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:00:13,370 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:00:13,370 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:00:15,370 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:00:16,489 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:00:16,544 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:00:16,637 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:00:17,371 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:00:17,371 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:00:18,817 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:00:18,818 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:00:19,372 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:00:19,535 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:00:19,587 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:00:19,677 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:00:20,372 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:00:21,372 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:00:22,322 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:00:22,375 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:00:22,470 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:00:22,502 DEBUG SenderThread:265689 [sender.py:send():235] send: stats +2022-03-02 22:00:23,461 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:00:23,461 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:00:24,873 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:00:24,926 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:00:25,017 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:00:25,462 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:00:25,462 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:00:27,196 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:00:27,248 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:00:27,335 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:00:27,463 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:00:27,463 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:00:29,263 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:00:29,317 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:00:29,406 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:00:29,463 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:00:29,463 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:00:31,059 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:00:31,109 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:00:31,204 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:00:31,464 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:00:31,464 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:00:33,240 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:00:33,420 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:00:33,507 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:00:33,510 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:00:34,023 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:00:34,024 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:00:34,507 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:00:35,507 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:00:37,508 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:00:39,183 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:00:39,239 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:00:39,324 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:00:39,509 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:00:40,509 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:00:41,510 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:00:42,510 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:00:44,511 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:00:45,172 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:00:45,227 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:00:45,323 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:00:45,511 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:00:46,512 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:00:47,512 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:00:49,273 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:00:49,274 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:00:50,513 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:00:51,052 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:00:51,108 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:00:51,190 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:00:51,513 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:00:52,514 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:00:53,031 DEBUG SenderThread:265689 [sender.py:send():235] send: stats +2022-03-02 22:00:53,514 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:00:54,514 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:00:56,879 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:00:56,933 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:00:57,026 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:00:57,515 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:00:58,516 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:00:59,516 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:01:00,516 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:01:02,564 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:01:02,623 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:01:02,713 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:01:03,518 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:01:03,518 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:01:04,350 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:01:04,351 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:01:04,518 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:01:06,519 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:01:08,264 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:01:08,315 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:01:08,405 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:01:08,519 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:01:09,520 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:01:10,520 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:01:11,520 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:01:13,521 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:01:13,833 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:01:13,897 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:01:13,978 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:01:14,521 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:01:15,522 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:01:16,522 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:01:19,461 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:01:19,467 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:01:19,526 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:01:19,568 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:01:19,568 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:01:19,654 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:01:20,527 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:01:21,527 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:01:22,527 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:01:23,462 DEBUG SenderThread:265689 [sender.py:send():235] send: stats +2022-03-02 22:01:23,528 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:01:25,021 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:01:25,072 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:01:25,160 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:01:25,528 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:01:26,529 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:01:27,529 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:01:29,530 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:01:30,677 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:01:30,732 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:01:30,825 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:01:31,531 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:01:32,531 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:01:33,532 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:01:34,733 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:01:34,735 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:01:35,532 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:01:36,270 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:01:36,351 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:01:36,464 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:01:36,533 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:01:37,533 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:01:38,533 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:01:41,535 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:01:41,806 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:01:41,862 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:01:41,953 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:01:42,535 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:01:43,535 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:01:44,536 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:01:45,536 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:01:47,328 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:01:47,382 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:01:47,471 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:01:47,537 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:01:47,537 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:01:48,537 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:01:49,826 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:01:49,828 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:01:50,538 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:01:52,539 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:01:52,726 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:01:52,781 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:01:52,903 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:01:53,539 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:01:53,936 DEBUG SenderThread:265689 [sender.py:send():235] send: stats +2022-03-02 22:01:54,540 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:01:58,127 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:01:58,187 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:01:58,275 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:01:58,541 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:01:58,541 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:02:00,542 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:02:02,543 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:02:03,415 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:02:03,467 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:02:03,550 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:02:03,551 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:02:04,550 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:02:04,902 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:02:04,904 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:02:08,551 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:02:08,762 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:02:08,816 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:02:08,905 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:02:09,551 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:02:10,552 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:02:12,552 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:02:14,213 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:02:14,279 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:02:14,367 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:02:14,553 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:02:15,554 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:02:16,554 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:02:18,555 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:02:19,533 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:02:19,592 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:02:19,720 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:02:20,069 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:02:20,071 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:02:20,592 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:02:20,592 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:02:21,592 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:02:24,353 DEBUG SenderThread:265689 [sender.py:send():235] send: stats +2022-03-02 22:02:24,593 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:02:24,778 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:02:24,834 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:02:24,924 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:02:25,594 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:02:26,594 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:02:27,594 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:02:28,595 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:02:29,872 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:02:29,924 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:02:30,017 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:02:30,595 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:02:30,596 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:02:31,596 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:02:34,597 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:02:35,031 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:02:35,118 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:02:35,212 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:02:35,446 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:02:35,447 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:02:35,597 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:02:36,598 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:02:37,598 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:02:38,598 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:02:40,191 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:02:40,247 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:02:40,341 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:02:40,599 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:02:40,599 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:02:41,599 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:02:44,600 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:02:45,218 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:02:45,273 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:02:45,388 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:02:45,601 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:02:46,601 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:02:47,602 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:02:48,602 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:02:50,313 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:02:50,364 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:02:50,454 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:02:50,603 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:02:50,674 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:02:50,675 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:02:51,603 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:02:53,604 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:02:54,837 DEBUG SenderThread:265689 [sender.py:send():235] send: stats +2022-03-02 22:02:55,296 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:02:55,357 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:02:55,448 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:02:55,604 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:02:55,605 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:02:57,605 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:02:59,606 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:03:00,341 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:03:00,397 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:03:00,492 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:03:00,606 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:03:01,607 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:03:02,607 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:03:05,228 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:03:05,285 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:03:05,399 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:03:05,608 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:03:05,608 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:03:06,155 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:03:06,156 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:03:06,608 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:03:07,609 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:03:09,609 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:03:10,052 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:03:10,106 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:03:10,201 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:03:10,610 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:03:11,610 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:03:12,611 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:03:13,611 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:03:14,858 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:03:14,921 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:03:15,010 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:03:15,612 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:03:16,612 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:03:17,613 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:03:19,509 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:03:19,564 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:03:19,671 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:03:19,678 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:03:20,671 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:03:20,671 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:03:21,240 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:03:21,241 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:03:21,672 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:03:23,672 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:03:24,286 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:03:24,341 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:03:24,427 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:03:24,673 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:03:25,366 DEBUG SenderThread:265689 [sender.py:send():235] send: stats +2022-03-02 22:03:25,673 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:03:26,673 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:03:27,674 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:03:28,862 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:03:28,917 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:03:29,028 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:03:29,675 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:03:29,675 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:03:30,675 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:03:31,675 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:03:33,473 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:03:33,528 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:03:33,621 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:03:33,676 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:03:34,676 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:03:36,365 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:03:36,366 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:03:38,033 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:03:38,090 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:03:38,183 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:03:38,678 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:03:38,678 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:03:40,678 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:03:42,364 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:03:42,418 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:03:42,510 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:03:42,679 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:03:42,679 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:03:44,680 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:03:46,680 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:03:46,700 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:03:46,755 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:03:46,864 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:03:47,681 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:03:48,681 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:03:50,682 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:03:50,947 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:03:51,021 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:03:51,106 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:03:51,660 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:03:51,661 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:03:51,682 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:03:52,683 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:03:54,683 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:03:55,074 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:03:55,128 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:03:55,218 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:03:55,684 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:03:55,954 DEBUG SenderThread:265689 [sender.py:send():235] send: stats +2022-03-02 22:03:56,684 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:03:58,685 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:03:58,958 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:03:59,029 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:03:59,118 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:03:59,685 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:04:00,685 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:04:02,686 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:04:02,717 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:04:02,771 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:04:02,864 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:04:03,686 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:04:04,687 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:04:06,091 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:04:06,167 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:04:06,263 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:04:06,687 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:04:06,688 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:04:06,811 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:04:06,812 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:04:08,688 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:04:09,287 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:04:09,367 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:04:09,454 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:04:09,689 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:04:10,689 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:04:11,690 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:04:12,142 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:04:12,204 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:04:12,300 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:04:12,690 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:04:12,691 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:04:13,691 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:04:14,691 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:04:14,765 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:04:14,820 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:04:14,912 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:04:15,691 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:04:16,692 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:04:17,139 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:04:17,195 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:04:17,292 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:04:17,692 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:04:17,692 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:04:18,692 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:04:19,180 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:04:19,234 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:04:19,328 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:04:19,693 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:04:19,693 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:04:20,693 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:04:20,973 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:04:21,029 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:04:21,118 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:04:21,694 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:04:21,694 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:04:21,943 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:04:21,944 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:04:22,599 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:04:22,646 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:04:22,733 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:04:22,734 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:04:23,733 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:04:23,733 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:04:24,591 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:04:24,787 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:04:24,787 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:04:24,877 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:04:25,777 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:04:25,777 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:04:26,777 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:04:26,934 DEBUG SenderThread:265689 [sender.py:send():235] send: stats +2022-03-02 22:04:28,778 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:04:30,732 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:04:30,789 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:04:30,876 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:04:31,821 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:04:31,821 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:04:32,821 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:04:34,822 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:04:36,648 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:04:36,702 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:04:36,792 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:04:36,823 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:04:37,351 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:04:37,352 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:04:37,823 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:04:38,823 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:04:40,824 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:04:42,509 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:04:42,565 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:04:42,652 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:04:42,825 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:04:43,825 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:04:44,826 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:04:46,827 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:04:48,315 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:04:48,369 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:04:48,461 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:04:48,827 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:04:48,827 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:04:49,828 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:04:52,550 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:04:52,552 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:04:53,829 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:04:54,079 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:04:54,134 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:04:54,221 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:04:54,829 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:04:55,830 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:04:57,411 DEBUG SenderThread:265689 [sender.py:send():235] send: stats +2022-03-02 22:04:59,831 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:04:59,880 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:04:59,956 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:05:00,049 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:05:00,831 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:05:01,832 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:05:03,833 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:05:05,538 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:05:05,593 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:05:05,679 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:05:05,833 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:05:05,834 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:05:07,728 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:05:07,730 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:05:07,834 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:05:09,835 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:05:11,184 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:05:11,242 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:05:11,330 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:05:11,835 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:05:12,836 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:05:13,836 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:05:15,837 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:05:16,889 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:05:16,953 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:05:17,094 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:05:17,838 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:05:17,838 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:05:18,838 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:05:21,839 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:05:22,525 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:05:22,577 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:05:22,682 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:05:22,840 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:05:22,859 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:05:22,860 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:05:23,840 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:05:24,841 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:05:25,841 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:05:27,965 DEBUG SenderThread:265689 [sender.py:send():235] send: stats +2022-03-02 22:05:28,074 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:05:28,151 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:05:28,297 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:05:28,842 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:05:29,842 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:05:30,843 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:05:32,843 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:05:33,530 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:05:33,607 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:05:33,695 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:05:33,844 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:05:34,844 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:05:38,018 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:05:38,020 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:05:38,846 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:05:39,090 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:05:39,147 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:05:39,260 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:05:39,846 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:05:40,846 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:05:44,572 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:05:44,639 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:05:44,727 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:05:44,848 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:05:44,848 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:05:46,848 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:05:48,849 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:05:49,949 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:05:50,005 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:05:50,094 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:05:50,850 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:05:50,850 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:05:53,155 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:05:53,157 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:05:54,851 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:05:55,375 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:05:55,431 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:05:55,522 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:05:55,851 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:05:56,852 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:05:58,483 DEBUG SenderThread:265689 [sender.py:send():235] send: stats +2022-03-02 22:06:00,819 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:06:00,876 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:06:00,892 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:06:00,965 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:06:01,887 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:06:01,888 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:06:02,888 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:06:04,888 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:06:06,126 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:06:06,181 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:06:06,270 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:06:06,889 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:06:06,889 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:06:07,890 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:06:08,323 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:06:08,325 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:06:10,891 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:06:11,471 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:06:11,528 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:06:11,621 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:06:11,891 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:06:12,892 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:06:13,892 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:06:14,892 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:06:16,827 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:06:16,884 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:06:16,978 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:06:17,972 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:06:17,973 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:06:18,973 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:06:20,973 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:06:22,078 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:06:22,132 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:06:22,217 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:06:22,974 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:06:22,974 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:06:23,386 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:06:23,387 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:06:23,974 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:06:25,975 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:06:27,282 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:06:27,337 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:06:27,431 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:06:27,976 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:06:27,976 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:06:28,965 DEBUG SenderThread:265689 [sender.py:send():235] send: stats +2022-03-02 22:06:29,976 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:06:31,977 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:06:32,431 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:06:32,486 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:06:32,580 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:06:32,978 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:06:33,978 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:06:37,597 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:06:37,654 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:06:37,745 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:06:37,979 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:06:37,980 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:06:38,660 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:06:38,661 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:06:38,980 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:06:39,980 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:06:41,981 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:06:42,655 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:06:42,716 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:06:42,817 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:06:42,981 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:06:43,982 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:06:44,982 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:06:47,784 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:06:47,840 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:06:47,929 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:06:47,983 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:06:47,984 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:06:48,984 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:06:49,984 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:06:51,985 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:06:52,751 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:06:52,807 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:06:52,897 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:06:52,985 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:06:53,833 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:06:53,834 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:06:53,986 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:06:54,986 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:06:55,987 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:06:57,667 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:06:57,721 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:06:57,808 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:06:57,987 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:06:58,987 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:06:59,447 DEBUG SenderThread:265689 [sender.py:send():235] send: stats +2022-03-02 22:06:59,988 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:07:01,989 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:07:02,585 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:07:02,640 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:07:02,763 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:07:02,989 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:07:03,989 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:07:04,990 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:07:05,990 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:07:07,486 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:07:07,540 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:07:07,627 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:07:07,991 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:07:07,991 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:07:08,910 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:07:08,911 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:07:08,991 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:07:11,992 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:07:12,243 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:07:12,302 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:07:12,405 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:07:12,993 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:07:12,993 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:07:13,993 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:07:15,993 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:07:17,047 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:07:17,106 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:07:17,198 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:07:17,994 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:07:18,995 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:07:20,995 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:07:21,842 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:07:21,898 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:07:21,986 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:07:21,996 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:07:22,996 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:07:23,974 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:07:23,975 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:07:26,461 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:07:26,508 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:07:26,601 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:07:26,998 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:07:26,998 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:07:27,998 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:07:28,998 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:07:30,157 DEBUG SenderThread:265689 [sender.py:send():235] send: stats +2022-03-02 22:07:30,991 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:07:31,044 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:07:31,045 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:07:31,157 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:07:32,034 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:07:33,034 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:07:34,035 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:07:35,035 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:07:35,458 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:07:35,539 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:07:35,633 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:07:36,035 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:07:37,036 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:07:38,036 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:07:39,030 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:07:39,032 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:07:39,037 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:07:39,840 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:07:39,897 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:07:39,989 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:07:40,037 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:07:41,037 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:07:42,038 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:07:43,038 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:07:44,066 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:07:44,118 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:07:44,213 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:07:45,039 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:07:45,039 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:07:46,039 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:07:47,040 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:07:48,125 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:07:48,181 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:07:48,277 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:07:49,040 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:07:49,041 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:07:50,041 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:07:51,041 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:07:52,026 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:07:52,082 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:07:52,164 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:07:53,082 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:07:53,082 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:07:54,082 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:07:54,119 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:07:54,120 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:07:55,083 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:07:55,715 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:07:55,770 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:07:55,859 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:07:56,083 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:07:57,084 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:07:58,084 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:07:59,084 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:07:59,198 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:07:59,243 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:07:59,337 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:08:00,085 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:08:00,671 DEBUG SenderThread:265689 [sender.py:send():235] send: stats +2022-03-02 22:08:01,085 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:08:02,086 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:08:02,407 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:08:02,462 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:08:02,554 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:08:03,086 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:08:04,086 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:08:05,387 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:08:05,442 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:08:05,533 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:08:06,087 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:08:06,087 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:08:08,088 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:08:08,140 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:08:08,193 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:08:08,285 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:08:09,088 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:08:09,281 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:08:09,282 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:08:10,088 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:08:10,577 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:08:10,630 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:08:10,723 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:08:11,089 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:08:12,089 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:08:12,728 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:08:12,784 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:08:12,877 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:08:13,090 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:08:14,090 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:08:14,650 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:08:14,703 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:08:14,794 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:08:15,090 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:08:16,091 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:08:16,290 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:08:16,347 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:08:16,437 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:08:17,091 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:08:18,092 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:08:18,337 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:08:18,511 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:08:18,597 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:08:19,092 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:08:20,092 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:08:24,094 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:08:24,304 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:08:24,358 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:08:24,444 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:08:24,447 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:08:24,448 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:08:25,094 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:08:26,094 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:08:30,096 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:08:30,289 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:08:30,342 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:08:30,433 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:08:31,096 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:08:31,184 DEBUG SenderThread:265689 [sender.py:send():235] send: stats +2022-03-02 22:08:32,097 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:08:34,097 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:08:36,131 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:08:36,187 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:08:36,323 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:08:37,099 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:08:38,099 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:08:39,668 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:08:39,669 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:08:40,100 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:08:41,903 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:08:41,956 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:08:42,046 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:08:42,100 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:08:44,101 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:08:46,102 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:08:47,701 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:08:47,774 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:08:47,859 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:08:48,102 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:08:49,103 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:08:50,103 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:08:53,104 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:08:53,414 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:08:53,465 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:08:53,562 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:08:54,104 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:08:54,105 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:08:54,783 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:08:54,784 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:08:55,105 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:08:57,105 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:08:59,182 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:08:59,239 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:08:59,333 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:09:00,106 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:09:01,107 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:09:01,699 DEBUG SenderThread:265689 [sender.py:send():235] send: stats +2022-03-02 22:09:03,107 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:09:04,892 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:09:04,947 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:09:05,038 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:09:05,108 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:09:07,109 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:09:09,110 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:09:09,853 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:09:09,854 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:09:10,550 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:09:10,606 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:09:10,693 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:09:11,111 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:09:11,111 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:09:13,111 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:09:14,112 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:09:16,112 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:09:16,215 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:09:16,271 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:09:16,379 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:09:17,113 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:09:18,113 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:09:19,113 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:09:20,114 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:09:21,791 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:09:21,843 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:09:21,935 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:09:22,115 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:09:23,115 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:09:24,115 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:09:25,058 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:09:25,059 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:09:26,116 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:09:27,285 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:09:27,338 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:09:27,426 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:09:28,117 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:09:28,117 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:09:29,117 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:09:30,117 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:09:32,118 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:09:32,127 DEBUG SenderThread:265689 [sender.py:send():235] send: stats +2022-03-02 22:09:32,737 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:09:32,796 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:09:32,888 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:09:33,119 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:09:34,119 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:09:35,119 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:09:36,120 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:09:38,183 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:09:38,238 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:09:38,325 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:09:39,121 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:09:39,121 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:09:40,121 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:09:40,222 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:09:40,223 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:09:42,122 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:09:43,655 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:09:43,713 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:09:43,801 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:09:44,123 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:09:44,123 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:09:45,123 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:09:47,124 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:09:49,099 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:09:49,154 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:09:49,165 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:09:49,243 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:09:50,155 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:09:51,156 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:09:53,156 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:09:54,485 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:09:54,540 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:09:54,628 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:09:55,157 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:09:55,157 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:09:55,420 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:09:55,422 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:09:57,158 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:09:59,158 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:09:59,823 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:09:59,879 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:09:59,973 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:10:00,159 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:10:01,159 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:10:02,773 DEBUG SenderThread:265689 [sender.py:send():235] send: stats +2022-03-02 22:10:03,160 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:10:05,129 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:10:05,186 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:10:05,275 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:10:06,186 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:10:07,187 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:10:08,187 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:10:09,188 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:10:10,429 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:10:10,486 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:10:10,527 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:10:10,596 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:10:10,597 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:10:11,188 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:10:11,189 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:10:12,189 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:10:15,190 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:10:15,698 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:10:15,753 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:10:15,843 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:10:16,190 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:10:17,191 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:10:18,191 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:10:20,192 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:10:20,998 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:10:21,055 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:10:21,142 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:10:21,192 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:10:22,193 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:10:24,194 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:10:25,762 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:10:25,763 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:10:26,122 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:10:26,170 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:10:26,280 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:10:26,282 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:10:27,281 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:10:28,281 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:10:30,282 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:10:31,156 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:10:31,213 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:10:31,305 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:10:32,299 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:10:32,300 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:10:33,385 DEBUG SenderThread:265689 [sender.py:send():235] send: stats +2022-03-02 22:10:36,099 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:10:36,164 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:10:36,256 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:10:36,301 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:10:36,301 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:10:38,302 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:10:40,302 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:10:40,847 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:10:40,848 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:10:41,156 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:10:41,214 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:10:41,303 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:10:41,305 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:10:42,303 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:10:46,139 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:10:46,195 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:10:46,290 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:10:46,304 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:10:46,305 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:10:48,305 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:10:50,306 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:10:51,057 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:10:51,110 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:10:51,233 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:10:51,306 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:10:52,307 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:10:54,307 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:10:55,897 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:10:55,898 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:10:56,002 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:10:56,014 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:10:56,103 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:10:56,308 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:10:56,308 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:10:58,309 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:11:00,309 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:11:00,830 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:11:00,883 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:11:00,975 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:11:01,310 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:11:02,310 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:11:03,853 DEBUG SenderThread:265689 [sender.py:send():235] send: stats +2022-03-02 22:11:04,311 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:11:05,700 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:11:05,756 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:11:05,844 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:11:06,312 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:11:07,312 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:11:09,313 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:11:10,446 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:11:10,502 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:11:10,588 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:11:11,209 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:11:11,210 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:11:11,313 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:11:11,314 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:11:13,314 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:11:15,097 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:11:15,180 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:11:15,272 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:11:15,315 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:11:15,315 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:11:17,316 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:11:19,317 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:11:19,628 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:11:19,684 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:11:19,775 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:11:20,317 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:11:21,317 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:11:23,318 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:11:24,161 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:11:24,217 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:11:24,310 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:11:24,318 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:11:25,319 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:11:26,468 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:11:26,469 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:11:27,320 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:11:28,501 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:11:28,566 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:11:28,656 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:11:29,320 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:11:29,320 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:11:31,321 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:11:32,791 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:11:32,847 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:11:32,941 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:11:33,322 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:11:34,307 DEBUG SenderThread:265689 [sender.py:send():235] send: stats +2022-03-02 22:11:35,322 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:11:36,979 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:11:37,034 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:11:37,125 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:11:37,323 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:11:38,323 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:11:40,324 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:11:40,991 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:11:41,046 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:11:41,140 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:11:41,324 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:11:41,823 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:11:41,824 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:11:42,325 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:11:44,325 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:11:44,858 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:11:44,915 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:11:45,009 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:11:45,326 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:11:46,326 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:11:48,327 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:11:48,478 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:11:48,535 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:11:48,666 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:11:49,328 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:11:50,328 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:11:51,959 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:11:52,042 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:11:52,155 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:11:52,329 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:11:52,329 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:11:54,329 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:11:55,155 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:11:55,239 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:11:55,330 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:11:55,335 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:11:56,330 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:11:56,959 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:11:56,960 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:11:58,122 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:11:58,176 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:11:58,273 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:11:58,331 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:11:58,331 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:12:00,332 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:12:00,764 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:12:00,844 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:12:00,938 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:12:01,332 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:12:02,332 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:12:03,224 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:12:03,279 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:12:03,371 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:12:04,365 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:12:04,365 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:12:05,021 DEBUG SenderThread:265689 [sender.py:send():235] send: stats +2022-03-02 22:12:05,338 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:12:05,392 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:12:05,540 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:12:06,392 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:12:06,393 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:12:07,211 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:12:07,287 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:12:07,379 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:12:07,393 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:12:08,393 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:12:08,783 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:12:08,837 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:12:08,926 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:12:09,393 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:12:10,394 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:12:10,898 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:12:11,076 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:12:11,159 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:12:11,394 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:12:12,089 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:12:12,090 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:12:12,395 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:12:14,395 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:12:16,954 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:12:17,007 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:12:17,088 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:12:17,396 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:12:18,397 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:12:22,398 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:12:22,843 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:12:22,923 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:12:23,008 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:12:23,398 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:12:24,399 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:12:27,189 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:12:27,191 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:12:27,400 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:12:28,713 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:12:28,769 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:12:28,860 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:12:29,400 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:12:30,401 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:12:31,401 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:12:33,402 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:12:34,525 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:12:34,577 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:12:34,666 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:12:35,403 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:12:35,403 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:12:35,511 DEBUG SenderThread:265689 [sender.py:send():235] send: stats +2022-03-02 22:12:36,403 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:12:37,403 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:12:39,404 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:12:40,303 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:12:40,356 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:12:40,440 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:12:41,439 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:12:41,439 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:12:42,256 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:12:42,257 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:12:42,439 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:12:45,440 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:12:45,993 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:12:46,037 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:12:46,154 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:12:46,441 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:12:47,441 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:12:51,442 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:12:51,862 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:12:51,917 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:12:52,011 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:12:52,443 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:12:53,443 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:12:57,303 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:12:57,304 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:12:57,445 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:12:57,588 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:12:57,641 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:12:57,730 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:12:58,445 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:12:59,445 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:13:01,446 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:13:03,272 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:13:03,326 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:13:03,412 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:13:03,447 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:13:05,448 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:13:05,915 DEBUG SenderThread:265689 [sender.py:send():235] send: stats +2022-03-02 22:13:07,448 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:13:08,936 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:13:08,989 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:13:09,080 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:13:09,449 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:13:11,450 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:13:12,505 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:13:12,506 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:13:13,451 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:13:14,521 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:13:14,576 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:13:14,666 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:13:15,451 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:13:15,452 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:13:19,453 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:13:20,075 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:13:20,155 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:13:20,239 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:13:20,453 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:13:21,454 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:13:23,455 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:13:25,514 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:13:25,567 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:13:25,658 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:13:26,456 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:13:27,456 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:13:27,670 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:13:27,671 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:13:28,456 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:13:30,457 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:13:30,938 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:13:30,993 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:13:31,077 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:13:31,457 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:13:32,458 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:13:33,458 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:13:36,384 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:13:36,438 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:13:36,480 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:13:36,552 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:13:36,597 DEBUG SenderThread:265689 [sender.py:send():235] send: stats +2022-03-02 22:13:37,480 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:13:37,480 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:13:38,480 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:13:40,481 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:13:41,907 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:13:41,961 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:13:42,050 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:13:42,482 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:13:42,483 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:13:42,883 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:13:42,885 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:13:43,483 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:13:44,483 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:13:46,484 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:13:47,309 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:13:47,364 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:13:47,455 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:13:47,484 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:13:48,484 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:13:50,485 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:13:52,626 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:13:52,678 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:13:52,768 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:13:53,486 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:13:54,487 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:13:56,488 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:13:57,966 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:13:57,967 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:13:58,063 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:13:58,075 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:13:58,159 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:13:58,488 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:13:58,489 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:14:00,489 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:14:01,489 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:14:03,364 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:14:03,418 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:14:03,501 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:14:03,503 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:14:04,501 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:14:04,502 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:14:05,502 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:14:07,068 DEBUG SenderThread:265689 [sender.py:send():235] send: stats +2022-03-02 22:14:07,503 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:14:08,604 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:14:08,654 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:14:08,736 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:14:09,503 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:14:09,504 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:14:10,504 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:14:11,504 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:14:13,181 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:14:13,183 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:14:13,505 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:14:13,771 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:14:13,824 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:14:13,910 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:14:14,505 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:14:15,505 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:14:16,506 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:14:17,506 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:14:18,930 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:14:18,984 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:14:19,071 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:14:19,507 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:14:20,507 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:14:21,508 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:14:23,508 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:14:24,108 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:14:24,162 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:14:24,250 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:14:24,509 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:14:25,509 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:14:26,510 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:14:27,510 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:14:28,238 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:14:28,240 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:14:29,222 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:14:29,277 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:14:29,361 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:14:29,511 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:14:30,511 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:14:31,511 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:14:33,512 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:14:34,260 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:14:34,334 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:14:34,419 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:14:34,512 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:14:35,513 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:14:36,513 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:14:37,500 DEBUG SenderThread:265689 [sender.py:send():235] send: stats +2022-03-02 22:14:38,514 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:14:39,302 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:14:39,355 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:14:39,446 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:14:39,514 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:14:40,515 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:14:42,515 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:14:43,305 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:14:43,306 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:14:44,268 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:14:44,320 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:14:44,429 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:14:44,516 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:14:44,517 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:14:46,517 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:14:48,518 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:14:49,184 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:14:49,237 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:14:49,328 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:14:49,518 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:14:50,519 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:14:52,519 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:14:54,122 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:14:54,173 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:14:54,263 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:14:54,520 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:14:55,520 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:14:56,521 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:14:58,359 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:14:58,360 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:14:58,522 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:14:58,961 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:14:59,014 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:14:59,103 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:14:59,522 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:15:00,522 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:15:01,523 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:15:02,523 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:15:03,723 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:15:03,784 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:15:03,869 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:15:04,524 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:15:04,524 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:15:05,524 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:15:07,525 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:15:07,987 DEBUG SenderThread:265689 [sender.py:send():235] send: stats +2022-03-02 22:15:08,404 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:15:08,457 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:15:08,542 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:15:08,542 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:15:08,543 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:15:11,543 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:15:12,991 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:15:13,045 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:15:13,134 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:15:13,465 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:15:13,466 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:15:13,544 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:15:13,544 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:15:15,544 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:15:17,513 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:15:17,567 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:15:17,578 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:15:17,659 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:15:18,568 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:15:19,568 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:15:21,569 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:15:21,920 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:15:21,973 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:15:22,063 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:15:22,569 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:15:23,570 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:15:25,570 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:15:26,221 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:15:26,296 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:15:26,383 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:15:26,571 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:15:27,571 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:15:28,695 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:15:28,696 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:15:29,572 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:15:30,465 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:15:30,517 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:15:30,609 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:15:31,603 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:15:31,603 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:15:33,604 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:15:34,565 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:15:34,626 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:15:34,753 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:15:35,620 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:15:35,620 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:15:37,620 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:15:38,461 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:15:38,516 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:15:38,607 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:15:38,621 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:15:38,655 DEBUG SenderThread:265689 [sender.py:send():235] send: stats +2022-03-02 22:15:39,621 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:15:41,622 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:15:42,220 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:15:42,272 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:15:42,358 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:15:42,622 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:15:43,622 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:15:43,823 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:15:43,825 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:15:45,623 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:15:45,729 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:15:45,783 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:15:45,866 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:15:46,623 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:15:47,624 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:15:48,962 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:15:49,016 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:15:49,104 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:15:49,625 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:15:49,625 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:15:51,625 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:15:51,948 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:15:51,999 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:15:52,090 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:15:52,626 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:15:53,626 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:15:54,590 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:15:54,647 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:15:54,647 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:15:54,737 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:15:55,648 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:15:55,648 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:15:56,648 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:15:56,919 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:15:56,971 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:15:57,064 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:15:57,649 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:15:58,649 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:15:58,872 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:15:58,874 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:15:59,012 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:15:59,066 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:15:59,151 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:15:59,649 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:16:00,650 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:16:00,848 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:16:00,901 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:16:00,989 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:16:01,650 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:16:02,474 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:16:02,529 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:16:02,612 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:16:02,650 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:16:02,651 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:16:04,512 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:16:04,741 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:16:04,741 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:16:04,842 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:16:05,741 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:16:06,742 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:16:09,417 DEBUG SenderThread:265689 [sender.py:send():235] send: stats +2022-03-02 22:16:10,648 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:16:10,707 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:16:10,796 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:16:10,803 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:16:11,797 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:16:12,797 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:16:14,001 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:16:14,003 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:16:14,798 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:16:16,633 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:16:16,712 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:16:16,807 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:16:17,801 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:16:18,802 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:16:20,802 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:16:22,395 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:16:22,450 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:16:22,540 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:16:22,803 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:16:24,804 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:16:26,805 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:16:28,060 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:16:28,116 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:16:28,213 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:16:28,805 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:16:29,208 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:16:29,209 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:16:30,806 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:16:32,807 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:16:33,806 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:16:33,883 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:16:33,973 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:16:34,864 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:16:34,865 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:16:38,866 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:16:39,549 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:16:39,608 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:16:39,697 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:16:39,866 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:16:39,878 DEBUG SenderThread:265689 [sender.py:send():235] send: stats +2022-03-02 22:16:40,867 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:16:42,867 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:16:44,282 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:16:44,284 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:16:45,270 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:16:45,329 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:16:45,422 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:16:45,869 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:16:46,869 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:16:48,870 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:16:50,883 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:16:50,941 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:16:51,037 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:16:51,871 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:16:51,871 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:16:52,871 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:16:53,872 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:16:55,873 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:16:56,573 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:16:56,631 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:16:56,728 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:16:56,873 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:16:57,874 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:16:59,341 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:16:59,342 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:16:59,874 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:17:02,185 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:17:02,241 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:17:02,339 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:17:02,875 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:17:03,876 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:17:05,876 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:17:07,660 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:17:07,718 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:17:07,811 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:17:07,877 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:17:07,877 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:17:09,878 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:17:10,467 DEBUG SenderThread:265689 [sender.py:send():235] send: stats +2022-03-02 22:17:11,878 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:17:13,126 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:17:13,182 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:17:13,279 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:17:13,879 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:17:13,880 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:17:14,535 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:17:14,536 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:17:15,880 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:17:16,880 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:17:18,640 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:17:18,698 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:17:18,794 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:17:18,881 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:17:19,882 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:17:20,882 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:17:22,883 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:17:24,129 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:17:24,185 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:17:24,281 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:17:24,884 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:17:24,884 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:17:25,884 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:17:28,885 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:17:29,455 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:17:29,512 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:17:29,609 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:17:29,691 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:17:29,693 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:17:29,885 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:17:30,886 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:17:31,886 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:17:34,825 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:17:34,883 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:17:34,970 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:17:34,977 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:17:35,971 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:17:35,971 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:17:36,971 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:17:38,972 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:17:40,129 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:17:40,196 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:17:40,292 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:17:40,925 DEBUG SenderThread:265689 [sender.py:send():235] send: stats +2022-03-02 22:17:40,972 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:17:40,973 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:17:41,973 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:17:44,852 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:17:44,853 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:17:44,974 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:17:45,512 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:17:45,571 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:17:45,669 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:17:45,974 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:17:46,975 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:17:48,976 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:17:50,804 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:17:50,862 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:17:50,959 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:17:50,976 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:17:51,977 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:17:52,977 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:17:53,978 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:17:55,978 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:17:56,010 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:17:56,067 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:17:56,164 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:17:56,979 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:17:57,979 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:17:58,979 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:17:59,929 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:17:59,931 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:17:59,980 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:18:01,285 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:18:01,346 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:18:01,439 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:18:01,980 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:18:02,981 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:18:03,981 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:18:05,982 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:18:06,531 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:18:06,589 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:18:06,684 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:18:06,982 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:18:07,983 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:18:08,983 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:18:09,983 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:18:11,376 DEBUG SenderThread:265689 [sender.py:send():235] send: stats +2022-03-02 22:18:11,618 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:18:11,692 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:18:11,792 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:18:11,984 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:18:12,984 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:18:13,985 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:18:15,113 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:18:15,115 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:18:15,985 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:18:16,788 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:18:16,846 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:18:16,944 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:18:16,986 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:18:17,986 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:18:18,986 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:18:19,987 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:18:21,869 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:18:21,925 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:18:22,015 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:18:23,014 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:18:23,014 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:18:24,015 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:18:26,015 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:18:26,891 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:18:26,944 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:18:27,043 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:18:28,035 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:18:28,035 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:18:29,035 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:18:30,036 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:18:30,433 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:18:30,434 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:18:31,899 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:18:31,957 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:18:32,046 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:18:32,053 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:18:33,047 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:18:33,047 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:18:35,047 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:18:36,830 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:18:36,888 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:18:36,983 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:18:37,048 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:18:37,048 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:18:39,049 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:18:41,049 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:18:41,718 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:18:41,785 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:18:41,877 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:18:41,915 DEBUG SenderThread:265689 [sender.py:send():235] send: stats +2022-03-02 22:18:42,050 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:18:43,050 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:18:45,537 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:18:45,538 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:18:46,630 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:18:46,688 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:18:46,783 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:18:47,051 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:18:47,052 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:18:49,052 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:18:51,053 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:18:51,443 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:18:51,491 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:18:51,625 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:18:52,053 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:18:53,053 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:18:55,054 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:18:56,139 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:18:56,197 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:18:56,287 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:18:57,055 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:18:57,055 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:18:59,056 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:19:00,586 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:19:00,587 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:19:00,821 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:19:00,877 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:19:00,973 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:19:01,056 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:19:02,057 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:19:03,057 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:19:05,058 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:19:05,373 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:19:05,431 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:19:05,530 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:19:06,058 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:19:07,058 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:19:08,059 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:19:09,059 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:19:09,862 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:19:09,920 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:19:10,011 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:19:10,059 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:19:11,060 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:19:12,060 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:19:12,377 DEBUG SenderThread:265689 [sender.py:send():235] send: stats +2022-03-02 22:19:13,060 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:19:14,220 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:19:14,279 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:19:14,375 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:19:15,061 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:19:15,061 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:19:15,656 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:19:15,657 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:19:16,062 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:19:18,062 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:19:18,558 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:19:18,614 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:19:18,712 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:19:19,063 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:19:20,063 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:19:22,064 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:19:22,772 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:19:22,828 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:19:22,924 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:19:23,064 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:19:24,064 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:19:26,065 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:19:26,900 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:19:26,959 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:19:27,051 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:19:27,066 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:19:28,066 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:19:30,067 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:19:30,801 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:19:30,857 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:19:30,858 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:19:30,953 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:19:30,954 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:19:31,067 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:19:32,067 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:19:34,068 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:19:34,534 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:19:34,617 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:19:34,736 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:19:35,068 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:19:36,069 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:19:38,069 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:19:38,160 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:19:38,216 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:19:38,311 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:19:39,070 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:19:40,070 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:19:41,611 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:19:41,669 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:19:41,768 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:19:42,071 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:19:42,071 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:19:43,004 DEBUG SenderThread:265689 [sender.py:send():235] send: stats +2022-03-02 22:19:44,072 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:19:44,617 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:19:44,676 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:19:44,770 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:19:45,072 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:19:46,072 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:19:46,143 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:19:46,144 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:19:47,073 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:19:47,466 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:19:47,527 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:19:47,625 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:19:48,073 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:19:49,074 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:19:49,990 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:19:50,049 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:19:50,141 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:19:50,149 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:19:51,141 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:19:51,141 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:19:52,141 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:19:52,237 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:19:52,297 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:19:52,393 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:19:53,142 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:19:53,142 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:19:54,142 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:19:54,171 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:19:54,228 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:19:54,323 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:19:55,143 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:19:55,143 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:19:55,823 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:19:55,881 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:19:55,991 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:19:56,143 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:19:56,143 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:19:57,143 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:19:57,902 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:19:58,088 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:19:58,178 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:19:58,181 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:19:59,179 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:19:59,179 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:20:00,179 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:20:01,277 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:20:01,277 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:20:03,180 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:20:04,169 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:20:04,229 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:20:04,328 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:20:05,227 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:20:06,228 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:20:07,228 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:20:09,229 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:20:10,186 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:20:10,245 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:20:10,343 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:20:11,251 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:20:12,252 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:20:13,252 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:20:13,600 DEBUG SenderThread:265689 [sender.py:send():235] send: stats +2022-03-02 22:20:15,253 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:20:16,038 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:20:16,117 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:20:16,209 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:20:16,253 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:20:16,544 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:20:16,546 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:20:17,254 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:20:18,254 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:20:21,255 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:20:21,941 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:20:22,000 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:20:22,090 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:20:22,255 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:20:23,256 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:20:24,256 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:20:27,257 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:20:27,691 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:20:27,749 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:20:27,846 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:20:28,257 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:20:28,258 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:20:29,258 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:20:31,259 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:20:31,746 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:20:31,747 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:20:33,461 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:20:33,544 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:20:33,659 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:20:34,260 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:20:34,260 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:20:35,261 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:20:37,261 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:20:39,298 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:20:39,355 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:20:39,457 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:20:40,263 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:20:41,263 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:20:44,148 DEBUG SenderThread:265689 [sender.py:send():235] send: stats +2022-03-02 22:20:44,264 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:20:45,163 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:20:45,221 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:20:45,320 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:20:46,312 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:20:46,312 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:20:46,873 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:20:46,874 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:20:47,312 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:20:48,312 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:20:50,313 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:20:50,739 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:20:50,795 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:20:50,892 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:20:51,314 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:20:52,314 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:20:53,315 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:20:56,311 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:20:56,353 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:20:56,370 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:20:56,487 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:20:57,353 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:20:57,354 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:20:58,354 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:21:00,355 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:21:01,866 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:21:01,924 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:21:01,954 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:21:02,019 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:21:02,020 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:21:02,355 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:21:03,356 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:21:04,356 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:21:06,357 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:21:07,406 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:21:07,463 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:21:07,562 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:21:08,357 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:21:08,358 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:21:09,358 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:21:12,359 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:21:12,918 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:21:12,976 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:21:13,072 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:21:13,359 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:21:14,360 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:21:14,832 DEBUG SenderThread:265689 [sender.py:send():235] send: stats +2022-03-02 22:21:15,360 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:21:16,360 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:21:17,117 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:21:17,118 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:21:18,428 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:21:18,510 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:21:18,604 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:21:19,362 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:21:19,362 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:21:20,362 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:21:22,363 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:21:23,816 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:21:23,876 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:21:23,968 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:21:24,363 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:21:25,364 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:21:26,364 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:21:28,365 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:21:29,224 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:21:29,280 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:21:29,370 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:21:29,376 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:21:30,370 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:21:32,371 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:21:32,478 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:21:32,480 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:21:33,371 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:21:34,677 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:21:34,736 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:21:34,833 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:21:35,372 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:21:36,372 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:21:37,372 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:21:39,373 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:21:40,015 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:21:40,073 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:21:40,191 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:21:40,373 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:21:41,374 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:21:42,374 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:21:43,375 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:21:45,301 DEBUG SenderThread:265689 [sender.py:send():235] send: stats +2022-03-02 22:21:45,375 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:21:45,395 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:21:45,455 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:21:45,572 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:21:46,376 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:21:47,376 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:21:47,640 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:21:47,641 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:21:48,377 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:21:49,377 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:21:50,732 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:21:50,804 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:21:50,895 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:21:51,378 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:21:51,378 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:21:52,378 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:21:55,379 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:21:56,031 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:21:56,088 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:21:56,184 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:21:56,380 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:21:57,380 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:21:58,381 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:21:59,381 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:22:01,185 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:22:01,266 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:22:01,363 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:22:01,382 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:22:01,382 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:22:02,382 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:22:02,715 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:22:02,716 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:22:05,383 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:22:06,381 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:22:06,439 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:22:06,578 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:22:07,434 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:22:07,434 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:22:08,434 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:22:10,435 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:22:11,803 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:22:11,863 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:22:11,957 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:22:12,436 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:22:12,436 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:22:14,436 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:22:15,824 DEBUG SenderThread:265689 [sender.py:send():235] send: stats +2022-03-02 22:22:16,437 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:22:17,043 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:22:17,099 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:22:17,197 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:22:17,437 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:22:17,840 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:22:17,841 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:22:18,438 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:22:22,157 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:22:22,213 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:22:22,307 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:22:22,439 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:22:22,439 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:22:24,440 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:22:26,440 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:22:27,182 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:22:27,238 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:22:27,336 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:22:27,441 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:22:28,441 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:22:30,442 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:22:32,128 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:22:32,185 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:22:32,274 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:22:32,442 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:22:33,006 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:22:33,008 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:22:33,443 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:22:34,443 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:22:36,444 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:22:37,085 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:22:37,145 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:22:37,241 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:22:37,444 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:22:38,444 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:22:39,445 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:22:40,445 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:22:42,032 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:22:42,090 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:22:42,187 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:22:42,446 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:22:42,446 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:22:43,446 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:22:46,273 DEBUG SenderThread:265689 [sender.py:send():235] send: stats +2022-03-02 22:22:46,884 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:22:46,940 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:22:47,058 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:22:47,448 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:22:47,448 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:22:48,167 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:22:48,169 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:22:49,448 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:22:51,449 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:22:51,706 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:22:51,764 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:22:51,862 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:22:52,450 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:22:53,450 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:22:55,451 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:22:56,632 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:22:56,693 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:22:56,790 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:22:57,452 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:22:57,452 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:22:59,452 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:23:01,433 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:23:01,492 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:23:01,492 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:23:01,589 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:23:02,490 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:23:03,282 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:23:03,283 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:23:03,490 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:23:05,491 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:23:05,979 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:23:06,037 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:23:06,130 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:23:06,491 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:23:07,491 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:23:09,492 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:23:10,559 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:23:10,638 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:23:10,741 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:23:11,493 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:23:11,493 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:23:13,494 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:23:14,946 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:23:15,003 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:23:15,095 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:23:15,494 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:23:15,495 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:23:16,844 DEBUG SenderThread:265689 [sender.py:send():235] send: stats +2022-03-02 22:23:18,339 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:23:18,341 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:23:18,495 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:23:19,269 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:23:19,330 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:23:19,426 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:23:19,496 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:23:20,496 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:23:22,497 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:23:23,477 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:23:23,536 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:23:23,635 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:23:24,537 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:23:24,537 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:23:26,537 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:23:27,571 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:23:27,628 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:23:27,722 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:23:28,538 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:23:28,538 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:23:30,539 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:23:31,395 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:23:31,455 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:23:31,566 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:23:32,564 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:23:32,565 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:23:33,524 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:23:33,525 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:23:34,565 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:23:35,007 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:23:35,087 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:23:35,180 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:23:35,565 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:23:36,566 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:23:38,299 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:23:38,358 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:23:38,475 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:23:38,567 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:23:38,567 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:23:40,567 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:23:41,531 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:23:41,590 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:23:41,682 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:23:42,591 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:23:42,591 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:23:44,267 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:23:44,326 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:23:44,446 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:23:44,591 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:23:44,592 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:23:46,592 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:23:46,868 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:23:46,932 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:23:47,027 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:23:47,457 DEBUG SenderThread:265689 [sender.py:send():235] send: stats +2022-03-02 22:23:47,593 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:23:48,593 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:23:48,708 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:23:48,710 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:23:49,183 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:23:49,241 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:23:49,338 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:23:49,593 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:23:50,594 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:23:51,176 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:23:51,235 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:23:51,328 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:23:51,594 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:23:52,594 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:23:52,917 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:23:52,974 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:23:53,074 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:23:53,595 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:23:54,595 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:23:54,952 DEBUG SenderThread:265689 [sender.py:send():235] send: history +2022-03-02 22:23:55,135 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:23:55,222 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:23:55,596 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:23:56,596 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:23:58,596 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:24:02,598 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:24:03,775 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:24:03,776 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:24:04,599 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:24:08,600 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:24:12,601 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:24:16,603 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:24:17,961 DEBUG SenderThread:265689 [sender.py:send():235] send: stats +2022-03-02 22:24:18,603 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:24:18,886 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:24:18,887 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:24:22,605 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:24:27,606 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:24:31,607 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:24:33,946 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:24:33,947 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:24:35,609 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:24:39,610 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:24:40,678 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-02 22:24:40,679 DEBUG SenderThread:265689 [sender.py:send():235] send: telemetry +2022-03-02 22:24:40,679 DEBUG SenderThread:265689 [sender.py:send():235] send: exit +2022-03-02 22:24:40,679 INFO SenderThread:265689 [sender.py:send_exit():371] handling exit code: 1 +2022-03-02 22:24:40,679 INFO SenderThread:265689 [sender.py:send_exit():373] handling runtime: 2401 +2022-03-02 22:24:40,736 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:24:40,736 INFO SenderThread:265689 [sender.py:send_exit():379] send defer +2022-03-02 22:24:40,736 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: poll_exit +2022-03-02 22:24:40,737 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: defer +2022-03-02 22:24:40,737 INFO HandlerThread:265689 [handler.py:handle_request_defer():154] handle defer: 0 +2022-03-02 22:24:40,737 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: defer +2022-03-02 22:24:40,737 INFO SenderThread:265689 [sender.py:send_request_defer():388] handle sender defer: 0 +2022-03-02 22:24:40,737 INFO SenderThread:265689 [sender.py:transition_state():392] send defer: 1 +2022-03-02 22:24:40,738 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: defer +2022-03-02 22:24:40,738 INFO HandlerThread:265689 [handler.py:handle_request_defer():154] handle defer: 1 +2022-03-02 22:24:40,854 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: defer +2022-03-02 22:24:40,854 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-02 22:24:40,854 INFO SenderThread:265689 [sender.py:send_request_defer():388] handle sender defer: 1 +2022-03-02 22:24:40,854 INFO SenderThread:265689 [sender.py:transition_state():392] send defer: 2 +2022-03-02 22:24:40,855 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: poll_exit +2022-03-02 22:24:40,855 DEBUG SenderThread:265689 [sender.py:send():235] send: stats +2022-03-02 22:24:40,855 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: defer +2022-03-02 22:24:40,855 INFO HandlerThread:265689 [handler.py:handle_request_defer():154] handle defer: 2 +2022-03-02 22:24:40,856 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: defer +2022-03-02 22:24:40,856 INFO SenderThread:265689 [sender.py:send_request_defer():388] handle sender defer: 2 +2022-03-02 22:24:40,856 INFO SenderThread:265689 [sender.py:transition_state():392] send defer: 3 +2022-03-02 22:24:40,856 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: defer +2022-03-02 22:24:40,856 INFO HandlerThread:265689 [handler.py:handle_request_defer():154] handle defer: 3 +2022-03-02 22:24:40,913 DEBUG SenderThread:265689 [sender.py:send():235] send: summary +2022-03-02 22:24:41,000 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-02 22:24:41,003 INFO SenderThread:265689 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:24:41,003 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: defer +2022-03-02 22:24:41,004 INFO SenderThread:265689 [sender.py:send_request_defer():388] handle sender defer: 3 +2022-03-02 22:24:41,004 INFO SenderThread:265689 [sender.py:transition_state():392] send defer: 4 +2022-03-02 22:24:41,004 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: poll_exit +2022-03-02 22:24:41,004 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: defer +2022-03-02 22:24:41,004 INFO HandlerThread:265689 [handler.py:handle_request_defer():154] handle defer: 4 +2022-03-02 22:24:41,005 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: defer +2022-03-02 22:24:41,005 INFO SenderThread:265689 [sender.py:send_request_defer():388] handle sender defer: 4 +2022-03-02 22:24:41,106 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-02 22:24:41,651 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:24:41,657 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:24:41,903 INFO SenderThread:265689 [sender.py:transition_state():392] send defer: 5 +2022-03-02 22:24:41,903 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: poll_exit +2022-03-02 22:24:41,904 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: defer +2022-03-02 22:24:41,904 INFO HandlerThread:265689 [handler.py:handle_request_defer():154] handle defer: 5 +2022-03-02 22:24:41,904 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: defer +2022-03-02 22:24:41,904 INFO SenderThread:265689 [sender.py:send_request_defer():388] handle sender defer: 5 +2022-03-02 22:24:41,904 INFO SenderThread:265689 [dir_watcher.py:finish():283] shutting down directory watcher +2022-03-02 22:24:42,005 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-02 22:24:42,642 INFO Thread-8 :265689 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/config.yaml +2022-03-02 22:24:42,642 INFO SenderThread:265689 [dir_watcher.py:finish():313] scan: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files +2022-03-02 22:24:42,643 INFO SenderThread:265689 [dir_watcher.py:finish():327] scan save: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-metadata.json wandb-metadata.json +2022-03-02 22:24:42,643 INFO SenderThread:265689 [dir_watcher.py:finish():327] scan save: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log output.log +2022-03-02 22:24:42,643 INFO SenderThread:265689 [dir_watcher.py:finish():327] scan save: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json wandb-summary.json +2022-03-02 22:24:42,646 INFO SenderThread:265689 [dir_watcher.py:finish():327] scan save: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/requirements.txt requirements.txt +2022-03-02 22:24:42,646 INFO SenderThread:265689 [dir_watcher.py:finish():327] scan save: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/config.yaml config.yaml +2022-03-02 22:24:42,647 INFO SenderThread:265689 [sender.py:transition_state():392] send defer: 6 +2022-03-02 22:24:42,647 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: poll_exit +2022-03-02 22:24:42,650 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: defer +2022-03-02 22:24:42,650 INFO HandlerThread:265689 [handler.py:handle_request_defer():154] handle defer: 6 +2022-03-02 22:24:42,650 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: defer +2022-03-02 22:24:42,650 INFO SenderThread:265689 [sender.py:send_request_defer():388] handle sender defer: 6 +2022-03-02 22:24:42,650 INFO SenderThread:265689 [file_pusher.py:finish():177] shutting down file pusher +2022-03-02 22:24:42,749 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-02 22:24:42,749 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: poll_exit +2022-03-02 22:24:42,851 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-02 22:24:42,851 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: poll_exit +2022-03-02 22:24:42,952 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-02 22:24:42,953 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: poll_exit +2022-03-02 22:24:42,981 INFO Thread-12 :265689 [upload_job.py:push():137] Uploaded file /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/output.log +2022-03-02 22:24:42,997 INFO Thread-15 :265689 [upload_job.py:push():137] Uploaded file /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/config.yaml +2022-03-02 22:24:43,005 INFO Thread-13 :265689 [upload_job.py:push():137] Uploaded file /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/wandb-summary.json +2022-03-02 22:24:43,054 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-02 22:24:43,054 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: poll_exit +2022-03-02 22:24:43,156 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-02 22:24:43,156 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: poll_exit +2022-03-02 22:24:43,167 INFO Thread-14 :265689 [upload_job.py:push():137] Uploaded file /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/files/requirements.txt +2022-03-02 22:24:43,258 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-02 22:24:43,258 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: poll_exit +2022-03-02 22:24:43,359 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-02 22:24:43,360 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: poll_exit +2022-03-02 22:24:43,367 INFO Thread-7 :265689 [sender.py:transition_state():392] send defer: 7 +2022-03-02 22:24:43,368 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: defer +2022-03-02 22:24:43,368 INFO HandlerThread:265689 [handler.py:handle_request_defer():154] handle defer: 7 +2022-03-02 22:24:43,368 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: defer +2022-03-02 22:24:43,368 INFO SenderThread:265689 [sender.py:send_request_defer():388] handle sender defer: 7 +2022-03-02 22:24:43,461 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-02 22:24:44,698 INFO SenderThread:265689 [sender.py:transition_state():392] send defer: 8 +2022-03-02 22:24:44,698 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: poll_exit +2022-03-02 22:24:44,699 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: defer +2022-03-02 22:24:44,699 INFO HandlerThread:265689 [handler.py:handle_request_defer():154] handle defer: 8 +2022-03-02 22:24:44,699 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: defer +2022-03-02 22:24:44,699 INFO SenderThread:265689 [sender.py:send_request_defer():388] handle sender defer: 8 +2022-03-02 22:24:44,699 INFO SenderThread:265689 [sender.py:transition_state():392] send defer: 9 +2022-03-02 22:24:44,700 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: defer +2022-03-02 22:24:44,700 INFO HandlerThread:265689 [handler.py:handle_request_defer():154] handle defer: 9 +2022-03-02 22:24:44,701 DEBUG SenderThread:265689 [sender.py:send():235] send: final +2022-03-02 22:24:44,701 DEBUG SenderThread:265689 [sender.py:send():235] send: footer +2022-03-02 22:24:44,701 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: defer +2022-03-02 22:24:44,701 INFO SenderThread:265689 [sender.py:send_request_defer():388] handle sender defer: 9 +2022-03-02 22:24:44,800 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-02 22:24:44,801 DEBUG SenderThread:265689 [sender.py:send_request():249] send_request: poll_exit +2022-03-02 22:24:44,801 INFO SenderThread:265689 [file_pusher.py:join():182] waiting for file pusher +2022-03-02 22:24:44,868 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: get_summary +2022-03-02 22:24:44,976 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: sampled_history +2022-03-02 22:24:44,980 DEBUG HandlerThread:265689 [handler.py:handle_request():131] handle_request: shutdown +2022-03-02 22:24:44,980 INFO HandlerThread:265689 [handler.py:finish():739] shutting down handler +2022-03-02 22:24:45,701 INFO WriterThread:265689 [datastore.py:close():281] close: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/run-2u4nhnsf.wandb +2022-03-02 22:24:45,867 INFO SenderThread:265689 [sender.py:finish():1075] shutting down sender +2022-03-02 22:24:45,867 INFO SenderThread:265689 [file_pusher.py:finish():177] shutting down file pusher +2022-03-02 22:24:45,867 INFO SenderThread:265689 [file_pusher.py:join():182] waiting for file pusher +2022-03-02 22:24:45,875 INFO MainThread:265689 [internal.py:handle_exit():79] Internal process exited diff --git a/wandb/run-20220302_214437-2u4nhnsf/logs/debug.log b/wandb/run-20220302_214437-2u4nhnsf/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..1f2c0140c53c12dd0facc81c125bf2fe68162bcd --- /dev/null +++ b/wandb/run-20220302_214437-2u4nhnsf/logs/debug.log @@ -0,0 +1,149 @@ +2022-03-02 21:44:37,569 INFO MainThread:265590 [wandb_setup.py:_flush():75] Loading settings from /home/sanchit_huggingface_co/.config/wandb/settings +2022-03-02 21:44:37,569 INFO MainThread:265590 [wandb_setup.py:_flush():75] Loading settings from /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/settings +2022-03-02 21:44:37,570 INFO MainThread:265590 [wandb_setup.py:_flush():75] Loading settings from environment variables: {} +2022-03-02 21:44:37,570 INFO MainThread:265590 [wandb_setup.py:_flush():75] Inferring run settings from compute environment: {'program_relpath': 'run_speech_recognition_seq2seq.py', 'program': '/home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/run_speech_recognition_seq2seq.py'} +2022-03-02 21:44:37,570 INFO MainThread:265590 [wandb_init.py:_log_setup():386] Logging user logs to /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/logs/debug.log +2022-03-02 21:44:37,570 INFO MainThread:265590 [wandb_init.py:_log_setup():387] Logging internal logs to /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_214437-2u4nhnsf/logs/debug-internal.log +2022-03-02 21:44:37,570 INFO MainThread:265590 [wandb_init.py:init():420] calling init triggers +2022-03-02 21:44:37,570 INFO MainThread:265590 [wandb_init.py:init():425] wandb.init called with sweep_config: {} +config: {} +2022-03-02 21:44:37,570 INFO MainThread:265590 [wandb_init.py:init():471] starting backend +2022-03-02 21:44:37,570 INFO MainThread:265590 [backend.py:_multiprocessing_setup():99] multiprocessing start_methods=fork,spawn,forkserver, using: spawn +2022-03-02 21:44:37,641 INFO MainThread:265590 [backend.py:ensure_launched():219] starting backend process... +2022-03-02 21:44:37,709 INFO MainThread:265590 [backend.py:ensure_launched():224] started backend process with pid: 265689 +2022-03-02 21:44:37,711 INFO MainThread:265590 [wandb_init.py:init():480] backend started and connected +2022-03-02 21:44:37,721 INFO MainThread:265590 [wandb_init.py:init():550] updated telemetry +2022-03-02 21:44:37,884 INFO MainThread:265590 [wandb_init.py:init():581] communicating current version +2022-03-02 21:44:38,595 INFO MainThread:265590 [wandb_init.py:init():586] got version response upgrade_message: "wandb version 0.12.11 is available! To upgrade, please run:\n $ pip install wandb --upgrade" + +2022-03-02 21:44:38,595 INFO MainThread:265590 [wandb_init.py:init():596] communicating run to backend with 30 second timeout +2022-03-02 21:44:38,705 INFO MainThread:265590 [wandb_init.py:init():624] starting run threads in backend +2022-03-02 21:44:38,844 INFO MainThread:265590 [wandb_run.py:_console_start():1827] atexit reg +2022-03-02 21:44:38,844 INFO MainThread:265590 [wandb_run.py:_redirect():1701] redirect: SettingsConsole.REDIRECT +2022-03-02 21:44:38,845 INFO MainThread:265590 [wandb_run.py:_redirect():1706] Redirecting console. +2022-03-02 21:44:38,846 INFO MainThread:265590 [wandb_run.py:_redirect():1762] Redirects installed. +2022-03-02 21:44:38,847 INFO MainThread:265590 [wandb_init.py:init():651] run started, returning control to user process +2022-03-02 21:44:38,849 INFO MainThread:265590 [wandb_run.py:_config_callback():966] config_cb None None {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'torch.float32', 'use_bfloat16': False, 'pruned_heads': {}, 'tie_word_embeddings': False, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 50, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'chunk_size_feed_forward': 0, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'architectures': ['SpeechEncoderDecoderModel'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': None, 'pad_token_id': 50256, 'eos_token_id': 50256, 'sep_token_id': None, 'decoder_start_token_id': 50256, 'task_specific_params': None, 'problem_type': None, '_name_or_path': './', 'transformers_version': None, 'decoder': {'vocab_size': 50257, 'n_positions': 1024, 'n_embd': 1024, 'n_layer': 24, 'n_head': 16, 'n_inner': None, 'activation_function': 'gelu_new', 'resid_pdrop': 0.0, 'embd_pdrop': 0.0, 'attn_pdrop': 0.0, 'layer_norm_epsilon': 1e-05, 'initializer_range': 0.02, 'summary_type': 'cls_index', 'summary_use_proj': True, 'summary_activation': None, 'summary_first_dropout': 0.0, 'summary_proj_to_labels': True, 'scale_attn_weights': True, 'use_cache': False, 'scale_attn_by_inverse_layer_idx': False, 'reorder_and_upcast_attn': False, 'bos_token_id': 50256, 'eos_token_id': 50256, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': None, 'use_bfloat16': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'is_encoder_decoder': False, 'is_decoder': True, 'cross_attention_hidden_size': None, 'add_cross_attention': True, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'chunk_size_feed_forward': 0, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'architectures': ['GPT2LMHeadModel'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'pad_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': {'text-generation': {'do_sample': True, 'max_length': 50}}, 'problem_type': None, '_name_or_path': 'gpt2-medium', 'transformers_version': '4.17.0.dev0', 'n_ctx': 1024, 'n_special': 0, 'predict_special_tokens': True, 'model_type': 'gpt2'}, 'encoder': {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': None, 'use_bfloat16': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'chunk_size_feed_forward': 0, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'architectures': ['Wav2Vec2ForPreTraining'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 1, 'pad_token_id': 0, 'eos_token_id': 2, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'facebook/wav2vec2-large-lv60', 'transformers_version': '4.17.0.dev0', 'feat_extract_dropout': 0.0, 'gradient_checkpointing': False, 'hidden_dropout_prob': 0.0, 'num_feat_extract_layers': 7, 'hidden_size': 1024, 'feat_extract_norm': 'layer', 'feat_extract_activation': 'gelu', 'conv_dim': [512, 512, 512, 512, 512, 512, 512], 'conv_stride': [5, 2, 2, 2, 2, 2, 2], 'conv_kernel': [10, 3, 3, 3, 3, 2, 2], 'conv_bias': True, 'num_conv_pos_embeddings': 128, 'num_conv_pos_embedding_groups': 16, 'num_hidden_layers': 24, 'intermediate_size': 4096, 'hidden_act': 'gelu', 'num_attention_heads': 16, 'hidden_dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.0, 'feat_proj_dropout': 0.0, 'final_dropout': 0.0, 'layerdrop': 0.0, 'layer_norm_eps': 1e-05, 'initializer_range': 0.02, 'vocab_size': 32, 'do_stable_layer_norm': True, 'use_weighted_layer_sum': False, 'apply_spec_augment': False, 'mask_time_prob': 0.0, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'num_codevectors_per_group': 320, 'num_codevector_groups': 2, 'contrastive_logits_temperature': 0.1, 'feat_quantizer_dropout': 0.0, 'num_negatives': 100, 'codevector_dim': 768, 'proj_codevector_dim': 768, 'diversity_loss_weight': 0.1, 'ctc_loss_reduction': 'sum', 'ctc_zero_infinity': False, 'add_adapter': True, 'adapter_kernel_size': 3, 'adapter_stride': 2, 'num_adapter_layers': 3, 'output_hidden_size': 1024, 'classifier_proj_size': 256, 'tdnn_dim': [512, 512, 512, 512, 1500], 'tdnn_kernel': [5, 3, 3, 1, 1], 'tdnn_dilation': [1, 2, 3, 1, 1], 'xvector_output_dim': 512, 'model_type': 'wav2vec2'}, 'model_type': 'speech-encoder-decoder', 'processor_class': 'Wav2Vec2Processor', 'use_cache': False, 'output_dir': './', 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'evaluation_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 14, 'per_device_eval_batch_size': 14, 'per_gpu_train_batch_size': 'None', 'per_gpu_eval_batch_size': 'None', 'gradient_accumulation_steps': 2, 'eval_accumulation_steps': 'None', 'learning_rate': 0.0003, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 1.0, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'warmup_ratio': 0.0, 'warmup_steps': 500, 'log_level': -1, 'log_level_replica': -1, 'log_on_each_node': True, 'logging_dir': './runs/Mar02_21-43-55_sanchit--v100', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 1, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 500, 'save_total_limit': 1, 'save_on_each_node': False, 'no_cuda': False, 'seed': 42, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'amp', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': 'None', 'local_rank': -1, 'xpu_backend': 'None', 'tpu_num_cores': 'None', 'tpu_metrics_debug': False, 'debug': '[]', 'dataloader_drop_last': False, 'eval_steps': 500, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': 'None', 'load_best_model_at_end': False, 'metric_for_best_model': 'None', 'greater_is_better': 'None', 'ignore_data_skip': False, 'sharded_ddp': '[]', 'deepspeed': 'None', 'label_smoothing_factor': 0.0, 'optim': 'adamw_hf', 'adafactor': False, 'group_by_length': True, 'length_column_name': 'input_length', 'report_to': "['wandb']", 'ddp_find_unused_parameters': 'None', 'ddp_bucket_cap_mb': 'None', 'dataloader_pin_memory': True, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': 'None', 'hub_model_id': 'None', 'hub_strategy': 'every_save', 'hub_token': '', 'gradient_checkpointing': True, 'fp16_backend': 'auto', 'push_to_hub_model_id': 'None', 'push_to_hub_organization': 'None', 'push_to_hub_token': '', '_n_gpu': 1, 'mp_parameters': '', 'sortish_sampler': False, 'predict_with_generate': True, 'generation_max_length': 40, 'generation_num_beams': 1, 'train_batch_size': 14, 'eval_batch_size': 14} +2022-03-02 21:44:38,852 INFO MainThread:265590 [wandb_watch.py:watch():43] Watching +2022-03-02 22:24:38,308 INFO MainThread:265590 [wandb_run.py:_atexit_cleanup():1797] got exitcode: 1 +2022-03-02 22:24:38,309 INFO MainThread:265590 [wandb_run.py:_restore():1769] restore +2022-03-02 22:24:40,737 INFO MainThread:265590 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 1 +} +pusher_stats { + uploaded_bytes: 2095 + total_bytes: 2095 +} + +2022-03-02 22:24:40,855 INFO MainThread:265590 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 1 +} +pusher_stats { + uploaded_bytes: 2095 + total_bytes: 2095 +} + +2022-03-02 22:24:41,005 INFO MainThread:265590 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 1 +} +pusher_stats { + uploaded_bytes: 2095 + total_bytes: 2095 +} + +2022-03-02 22:24:41,904 INFO MainThread:265590 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 1 +} +pusher_stats { + uploaded_bytes: 2095 + total_bytes: 2095 +} + +2022-03-02 22:24:42,648 INFO MainThread:265590 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 2095 + total_bytes: 2248104 +} + +2022-03-02 22:24:42,750 INFO MainThread:265590 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 2095 + total_bytes: 2248104 +} + +2022-03-02 22:24:42,851 INFO MainThread:265590 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 2244867 + total_bytes: 2248104 +} + +2022-03-02 22:24:42,953 INFO MainThread:265590 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 2248104 + total_bytes: 2248104 +} + +2022-03-02 22:24:43,055 INFO MainThread:265590 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 2248104 + total_bytes: 2248104 +} + +2022-03-02 22:24:43,157 INFO MainThread:265590 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 2248104 + total_bytes: 2248104 +} + +2022-03-02 22:24:43,258 INFO MainThread:265590 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 2248104 + total_bytes: 2248104 +} + +2022-03-02 22:24:43,360 INFO MainThread:265590 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 2248104 + total_bytes: 2248104 +} + +2022-03-02 22:24:44,699 INFO MainThread:265590 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 2248104 + total_bytes: 2248104 +} + +2022-03-02 22:24:44,867 INFO MainThread:265590 [wandb_run.py:_wait_for_finish():1929] got exit ret: done: true +exit_result { +} +file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 2248104 + total_bytes: 2248104 +} +local_info { +} + +2022-03-02 22:24:46,025 INFO MainThread:265590 [wandb_run.py:_append_history():2144] rendering history +2022-03-02 22:24:46,026 INFO MainThread:265590 [wandb_run.py:_append_summary():2102] rendering summary +2022-03-02 22:24:46,027 INFO MainThread:265590 [wandb_run.py:_append_files():2194] logging synced files diff --git a/wandb/run-20220302_214437-2u4nhnsf/run-2u4nhnsf.wandb b/wandb/run-20220302_214437-2u4nhnsf/run-2u4nhnsf.wandb new file mode 100644 index 0000000000000000000000000000000000000000..a15cfdb31f6148a13dac730ebea06f3fc51b3a92 --- /dev/null +++ b/wandb/run-20220302_214437-2u4nhnsf/run-2u4nhnsf.wandb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2097f4493d43dc5464038250e585dd89e0b00de7e3c675d9afcbfe58a143e87 +size 17687292 diff --git a/wandb/run-20220302_222605-10glutwr/files/config.yaml b/wandb/run-20220302_222605-10glutwr/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fa993865270d35796abb99eab34bb3d198ebde22 --- /dev/null +++ b/wandb/run-20220302_222605-10glutwr/files/config.yaml @@ -0,0 +1,11321 @@ +wandb_version: 1 + +_n_gpu: + desc: null + value: 1 +_name_or_path: + desc: null + value: ./ +_wandb: + desc: null + value: + cli_version: 0.12.10 + framework: huggingface + huggingface_version: 4.17.0.dev0 + is_jupyter_run: false + is_kaggle_kernel: false + m: + - 1: train/global_step + 6: + - 3 + - 1: train/loss + 5: 1 + 6: + - 1 + - 1: train/learning_rate + 5: 1 + 6: + - 1 + - 1: train/epoch + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.ln_f\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.ln_f\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.ln_f\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.ln_f\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.ln_f\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.ln_f\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.wpe\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.wpe\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.wpe\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.wte\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.wte\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.wte\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.2\.conv\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.2\.conv\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.2\.conv\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.2\.conv\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.2\.conv\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.2\.conv\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.1\.conv\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.1\.conv\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.1\.conv\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.1\.conv\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.1\.conv\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.1\.conv\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.0\.conv\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.0\.conv\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.0\.conv\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.0\.conv\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.0\.conv\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.0\.conv\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.pos_conv_embed\.conv\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.pos_conv_embed\.conv\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.pos_conv_embed\.conv\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.pos_conv_embed\.conv\.weight_v._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.pos_conv_embed\.conv\.weight_v.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.pos_conv_embed\.conv\.weight_v.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.pos_conv_embed\.conv\.weight_g._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.pos_conv_embed\.conv\.weight_g.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.pos_conv_embed\.conv\.weight_g.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.feature_projection\.projection\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.feature_projection\.projection\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.feature_projection\.projection\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.feature_projection\.projection\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.feature_projection\.projection\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.feature_projection\.projection\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.feature_projection\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.feature_projection\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.feature_projection\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.feature_projection\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.feature_projection\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.feature_projection\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + python_version: 3.9.5 + start_time: 1646259965 + t: + 1: + - 1 + - 5 + - 11 + 2: + - 1 + - 5 + - 11 + 3: + - 1 + - 7 + - 13 + 4: 3.9.5 + 5: 0.12.10 + 6: 4.17.0.dev0 + 8: + - 5 +adafactor: + desc: null + value: false +adam_beta1: + desc: null + value: 0.9 +adam_beta2: + desc: null + value: 0.999 +adam_epsilon: + desc: null + value: 1.0e-08 +add_cross_attention: + desc: null + value: false +architectures: + desc: null + value: + - SpeechEncoderDecoderModel +bad_words_ids: + desc: null + value: null +bf16: + desc: null + value: false +bf16_full_eval: + desc: null + value: false +bos_token_id: + desc: null + value: null +chunk_size_feed_forward: + desc: null + value: 0 +cross_attention_hidden_size: + desc: null + value: null +dataloader_drop_last: + desc: null + value: false +dataloader_num_workers: + desc: null + value: 0 +dataloader_pin_memory: + desc: null + value: true +ddp_bucket_cap_mb: + desc: null + value: None +ddp_find_unused_parameters: + desc: null + value: None +debug: + desc: null + value: '[]' +decoder: + desc: null + value: + _name_or_path: gpt2-medium + activation_function: gelu_new + add_cross_attention: true + architectures: + - GPT2LMHeadModel + attn_pdrop: 0.0 + bad_words_ids: null + bos_token_id: 50256 + chunk_size_feed_forward: 0 + cross_attention_hidden_size: null + decoder_start_token_id: null + diversity_penalty: 0.0 + do_sample: false + early_stopping: false + embd_pdrop: 0.0 + encoder_no_repeat_ngram_size: 0 + eos_token_id: 50256 + finetuning_task: null + forced_bos_token_id: null + forced_eos_token_id: null + id2label: + '0': LABEL_0 + '1': LABEL_1 + initializer_range: 0.02 + is_decoder: true + is_encoder_decoder: false + label2id: + LABEL_0: 0 + LABEL_1: 1 + layer_norm_epsilon: 1.0e-05 + length_penalty: 1.0 + max_length: 20 + min_length: 0 + model_type: gpt2 + n_ctx: 1024 + n_embd: 1024 + n_head: 16 + n_inner: null + n_layer: 24 + n_positions: 1024 + n_special: 0 + no_repeat_ngram_size: 0 + num_beam_groups: 1 + num_beams: 1 + num_return_sequences: 1 + output_attentions: false + output_hidden_states: false + output_scores: false + pad_token_id: null + predict_special_tokens: true + prefix: null + problem_type: null + pruned_heads: {} + remove_invalid_values: false + reorder_and_upcast_attn: false + repetition_penalty: 1.0 + resid_pdrop: 0.0 + return_dict: true + return_dict_in_generate: false + scale_attn_by_inverse_layer_idx: false + scale_attn_weights: true + sep_token_id: null + summary_activation: null + summary_first_dropout: 0.0 + summary_proj_to_labels: true + summary_type: cls_index + summary_use_proj: true + task_specific_params: + text-generation: + do_sample: true + max_length: 50 + temperature: 1.0 + tie_encoder_decoder: false + tie_word_embeddings: true + tokenizer_class: null + top_k: 50 + top_p: 1.0 + torch_dtype: null + torchscript: false + transformers_version: 4.17.0.dev0 + use_bfloat16: false + use_cache: false + vocab_size: 50257 +decoder_start_token_id: + desc: null + value: 50256 +deepspeed: + desc: null + value: None +disable_tqdm: + desc: null + value: false +diversity_penalty: + desc: null + value: 0.0 +do_eval: + desc: null + value: true +do_predict: + desc: null + value: false +do_sample: + desc: null + value: false +do_train: + desc: null + value: true +early_stopping: + desc: null + value: false +encoder: + desc: null + value: + _name_or_path: facebook/wav2vec2-large-lv60 + activation_dropout: 0.0 + adapter_kernel_size: 3 + adapter_stride: 2 + add_adapter: true + add_cross_attention: false + apply_spec_augment: false + architectures: + - Wav2Vec2ForPreTraining + attention_dropout: 0.0 + bad_words_ids: null + bos_token_id: 1 + chunk_size_feed_forward: 0 + classifier_proj_size: 256 + codevector_dim: 768 + contrastive_logits_temperature: 0.1 + conv_bias: true + conv_dim: + - 512 + - 512 + - 512 + - 512 + - 512 + - 512 + - 512 + conv_kernel: + - 10 + - 3 + - 3 + - 3 + - 3 + - 2 + - 2 + conv_stride: + - 5 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + cross_attention_hidden_size: null + ctc_loss_reduction: sum + ctc_zero_infinity: false + decoder_start_token_id: null + diversity_loss_weight: 0.1 + diversity_penalty: 0.0 + do_sample: false + do_stable_layer_norm: true + early_stopping: false + encoder_no_repeat_ngram_size: 0 + eos_token_id: 2 + feat_extract_activation: gelu + feat_extract_dropout: 0.0 + feat_extract_norm: layer + feat_proj_dropout: 0.0 + feat_quantizer_dropout: 0.0 + final_dropout: 0.0 + finetuning_task: null + forced_bos_token_id: null + forced_eos_token_id: null + gradient_checkpointing: false + hidden_act: gelu + hidden_dropout: 0.0 + hidden_dropout_prob: 0.0 + hidden_size: 1024 + id2label: + '0': LABEL_0 + '1': LABEL_1 + initializer_range: 0.02 + intermediate_size: 4096 + is_decoder: false + is_encoder_decoder: false + label2id: + LABEL_0: 0 + LABEL_1: 1 + layer_norm_eps: 1.0e-05 + layerdrop: 0.0 + length_penalty: 1.0 + mask_feature_length: 10 + mask_feature_min_masks: 0 + mask_feature_prob: 0.0 + mask_time_length: 10 + mask_time_min_masks: 2 + mask_time_prob: 0.0 + max_length: 20 + min_length: 0 + model_type: wav2vec2 + no_repeat_ngram_size: 0 + num_adapter_layers: 3 + num_attention_heads: 16 + num_beam_groups: 1 + num_beams: 1 + num_codevector_groups: 2 + num_codevectors_per_group: 320 + num_conv_pos_embedding_groups: 16 + num_conv_pos_embeddings: 128 + num_feat_extract_layers: 7 + num_hidden_layers: 24 + num_negatives: 100 + num_return_sequences: 1 + output_attentions: false + output_hidden_size: 1024 + output_hidden_states: false + output_scores: false + pad_token_id: 0 + prefix: null + problem_type: null + proj_codevector_dim: 768 + pruned_heads: {} + remove_invalid_values: false + repetition_penalty: 1.0 + return_dict: true + return_dict_in_generate: false + sep_token_id: null + task_specific_params: null + tdnn_dilation: + - 1 + - 2 + - 3 + - 1 + - 1 + tdnn_dim: + - 512 + - 512 + - 512 + - 512 + - 1500 + tdnn_kernel: + - 5 + - 3 + - 3 + - 1 + - 1 + temperature: 1.0 + tie_encoder_decoder: false + tie_word_embeddings: true + tokenizer_class: null + top_k: 50 + top_p: 1.0 + torch_dtype: null + torchscript: false + transformers_version: 4.17.0.dev0 + use_bfloat16: false + use_weighted_layer_sum: false + vocab_size: 32 + xvector_output_dim: 512 +encoder_no_repeat_ngram_size: + desc: null + value: 0 +eos_token_id: + desc: null + value: 50256 +eval_accumulation_steps: + desc: null + value: None +eval_batch_size: + desc: null + value: 14 +eval_steps: + desc: null + value: 500 +evaluation_strategy: + desc: null + value: steps +finetuning_task: + desc: null + value: null +forced_bos_token_id: + desc: null + value: null +forced_eos_token_id: + desc: null + value: null +fp16: + desc: null + value: true +fp16_backend: + desc: null + value: auto +fp16_full_eval: + desc: null + value: false +fp16_opt_level: + desc: null + value: O1 +generation_max_length: + desc: null + value: 40 +generation_num_beams: + desc: null + value: 1 +gradient_accumulation_steps: + desc: null + value: 4 +gradient_checkpointing: + desc: null + value: true +greater_is_better: + desc: null + value: None +group_by_length: + desc: null + value: true +half_precision_backend: + desc: null + value: amp +hub_model_id: + desc: null + value: None +hub_strategy: + desc: null + value: every_save +hub_token: + desc: null + value: +id2label: + desc: null + value: + '0': LABEL_0 + '1': LABEL_1 +ignore_data_skip: + desc: null + value: false +is_decoder: + desc: null + value: false +is_encoder_decoder: + desc: null + value: true +label2id: + desc: null + value: + LABEL_0: 0 + LABEL_1: 1 +label_names: + desc: null + value: None +label_smoothing_factor: + desc: null + value: 0.0 +learning_rate: + desc: null + value: 0.0003 +length_column_name: + desc: null + value: input_length +length_penalty: + desc: null + value: 1.0 +load_best_model_at_end: + desc: null + value: false +local_rank: + desc: null + value: -1 +log_level: + desc: null + value: -1 +log_level_replica: + desc: null + value: -1 +log_on_each_node: + desc: null + value: true +logging_dir: + desc: null + value: ./runs/Mar02_22-25-22_sanchit--v100 +logging_first_step: + desc: null + value: false +logging_nan_inf_filter: + desc: null + value: true +logging_steps: + desc: null + value: 1 +logging_strategy: + desc: null + value: steps +lr_scheduler_type: + desc: null + value: linear +max_grad_norm: + desc: null + value: 1.0 +max_length: + desc: null + value: 50 +max_steps: + desc: null + value: -1 +metric_for_best_model: + desc: null + value: None +min_length: + desc: null + value: 0 +model_type: + desc: null + value: speech-encoder-decoder +mp_parameters: + desc: null + value: '' +no_cuda: + desc: null + value: false +no_repeat_ngram_size: + desc: null + value: 0 +num_beam_groups: + desc: null + value: 1 +num_beams: + desc: null + value: 1 +num_return_sequences: + desc: null + value: 1 +num_train_epochs: + desc: null + value: 1.0 +optim: + desc: null + value: adamw_hf +output_attentions: + desc: null + value: false +output_dir: + desc: null + value: ./ +output_hidden_states: + desc: null + value: false +output_scores: + desc: null + value: false +overwrite_output_dir: + desc: null + value: true +pad_token_id: + desc: null + value: 50256 +past_index: + desc: null + value: -1 +per_device_eval_batch_size: + desc: null + value: 14 +per_device_train_batch_size: + desc: null + value: 14 +per_gpu_eval_batch_size: + desc: null + value: None +per_gpu_train_batch_size: + desc: null + value: None +predict_with_generate: + desc: null + value: true +prediction_loss_only: + desc: null + value: false +prefix: + desc: null + value: null +problem_type: + desc: null + value: null +processor_class: + desc: null + value: Wav2Vec2Processor +pruned_heads: + desc: null + value: {} +push_to_hub: + desc: null + value: true +push_to_hub_model_id: + desc: null + value: None +push_to_hub_organization: + desc: null + value: None +push_to_hub_token: + desc: null + value: +remove_invalid_values: + desc: null + value: false +remove_unused_columns: + desc: null + value: true +repetition_penalty: + desc: null + value: 1.0 +report_to: + desc: null + value: '[''wandb'']' +resume_from_checkpoint: + desc: null + value: None +return_dict: + desc: null + value: true +return_dict_in_generate: + desc: null + value: false +run_name: + desc: null + value: ./ +save_on_each_node: + desc: null + value: false +save_steps: + desc: null + value: 500 +save_strategy: + desc: null + value: steps +save_total_limit: + desc: null + value: 1 +seed: + desc: null + value: 42 +sep_token_id: + desc: null + value: null +sharded_ddp: + desc: null + value: '[]' +skip_memory_metrics: + desc: null + value: true +sortish_sampler: + desc: null + value: false +task_specific_params: + desc: null + value: null +temperature: + desc: null + value: 1.0 +tf32: + desc: null + value: None +tie_encoder_decoder: + desc: null + value: false +tie_word_embeddings: + desc: null + value: false +tokenizer_class: + desc: null + value: null +top_k: + desc: null + value: 50 +top_p: + desc: null + value: 1.0 +torch_dtype: + desc: null + value: torch.float32 +torchscript: + desc: null + value: false +tpu_metrics_debug: + desc: null + value: false +tpu_num_cores: + desc: null + value: None +train_batch_size: + desc: null + value: 14 +transformers_version: + desc: null + value: null +use_bfloat16: + desc: null + value: false +use_cache: + desc: null + value: false +use_legacy_prediction_loop: + desc: null + value: false +warmup_ratio: + desc: null + value: 0.0 +warmup_steps: + desc: null + value: 500 +weight_decay: + desc: null + value: 0.0 +xpu_backend: + desc: null + value: None diff --git a/wandb/run-20220302_222605-10glutwr/files/output.log b/wandb/run-20220302_222605-10glutwr/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..23f9c1d3be8c92fe0c02f9c4405c4341f2c030be --- /dev/null +++ b/wandb/run-20220302_222605-10glutwr/files/output.log @@ -0,0 +1,2163 @@ + + + 0%| | 0/509 [00:00> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:26:13,115 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:26:16,126 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.8074, 'learning_rate': 0.0, 'epoch': 0.0} +[WARNING|modeling_utils.py:388] 2022-03-02 22:26:19,106 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 0%|▏ | 1/509 [00:12<1:48:51, 12.86s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:26:22,182 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:26:25,160 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:26:28,129 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:26:31,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 0%|▎ | 2/509 [00:25<1:45:28, 12.48s/it] + + 0%|▎ | 2/509 [00:25<1:45:28, 12.48s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:26:34,361 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:26:37,355 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:26:40,301 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.8886, 'learning_rate': 1.2e-06, 'epoch': 0.01} +[WARNING|modeling_utils.py:388] 2022-03-02 22:26:43,229 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 1%|▍ | 3/509 [00:37<1:43:14, 12.24s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:26:46,240 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:26:49,228 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:26:52,127 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.8226, 'learning_rate': 1.8e-06, 'epoch': 0.01} +[WARNING|modeling_utils.py:388] 2022-03-02 22:26:55,048 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 1%|▋ | 4/509 [00:48<1:41:38, 12.08s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:26:58,072 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:27:00,951 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:27:03,888 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.7892, 'learning_rate': 2.4e-06, 'epoch': 0.01} +[WARNING|modeling_utils.py:388] 2022-03-02 22:27:06,726 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 1%|▊ | 5/509 [01:00<1:40:13, 11.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:27:09,757 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:27:12,657 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:27:15,524 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.8227, 'learning_rate': 2.9999999999999997e-06, 'epoch': 0.01} +[WARNING|modeling_utils.py:388] 2022-03-02 22:27:18,428 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 1%|▉ | 6/509 [01:12<1:39:22, 11.85s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:27:21,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:27:24,292 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:27:27,082 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.7757, 'learning_rate': 3.6e-06, 'epoch': 0.01} +[WARNING|modeling_utils.py:388] 2022-03-02 22:27:29,917 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 1%|█ | 7/509 [01:23<1:38:10, 11.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:27:32,853 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:27:35,716 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:27:38,545 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:27:41,342 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 2%|█▎ | 8/509 [01:35<1:37:09, 11.64s/it] + + 2%|█▎ | 8/509 [01:35<1:37:09, 11.64s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:27:44,291 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:27:47,088 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:27:49,941 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.7576, 'learning_rate': 4.8e-06, 'epoch': 0.02} +[WARNING|modeling_utils.py:388] 2022-03-02 22:27:52,795 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 2%|█▍ | 9/509 [01:46<1:36:29, 11.58s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:27:55,737 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:27:58,593 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:28:01,422 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:28:04,208 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 2%|█▌ | 10/509 [01:58<1:35:52, 11.53s/it] + + 2%|█▌ | 10/509 [01:58<1:35:52, 11.53s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:28:07,138 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:28:09,861 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:28:12,659 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:28:15,406 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 2%|█▋ | 11/509 [02:09<1:34:50, 11.43s/it] + + 2%|█▋ | 11/509 [02:09<1:34:50, 11.43s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:28:18,302 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:28:21,026 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:28:23,773 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:28:26,530 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 2%|█▉ | 12/509 [02:20<1:33:53, 11.33s/it] + + 2%|█▉ | 12/509 [02:20<1:33:53, 11.33s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:28:29,367 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:28:32,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:28:34,851 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:28:37,523 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 3%|██ | 13/509 [02:31<1:32:50, 11.23s/it] + + 3%|██ | 13/509 [02:31<1:32:50, 11.23s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:28:40,378 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:28:43,066 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:28:45,771 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5102, 'learning_rate': 7.799999999999998e-06, 'epoch': 0.03} +[WARNING|modeling_utils.py:388] 2022-03-02 22:28:48,501 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 3%|██▏ | 14/509 [02:42<1:32:01, 11.15s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:28:51,352 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:28:54,109 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:28:56,784 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.416, 'learning_rate': 8.4e-06, 'epoch': 0.03} +[WARNING|modeling_utils.py:388] 2022-03-02 22:28:59,425 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 3%|██▎ | 15/509 [02:53<1:31:16, 11.09s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:29:02,174 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:29:04,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:29:07,492 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5563, 'learning_rate': 8.999999999999999e-06, 'epoch': 0.03} +[WARNING|modeling_utils.py:388] 2022-03-02 22:29:10,108 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 3%|██▌ | 16/509 [03:03<1:30:05, 10.96s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:29:12,847 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:29:15,514 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:29:18,184 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4879, 'learning_rate': 9.6e-06, 'epoch': 0.03} +[WARNING|modeling_utils.py:388] 2022-03-02 22:29:20,818 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 3%|██▋ | 17/509 [03:14<1:29:17, 10.89s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:29:23,549 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:29:26,143 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:29:28,840 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:29:31,488 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|██▊ | 18/509 [03:25<1:28:33, 10.82s/it] + + 4%|██▊ | 18/509 [03:25<1:28:33, 10.82s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:29:34,262 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:29:36,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:29:39,510 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:29:42,064 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.351, 'learning_rate': 1.0799999999999998e-05, 'epoch': 0.04} + + 4%|██▉ | 19/509 [03:35<1:27:46, 10.75s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:29:44,783 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:29:47,347 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:29:49,934 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:29:52,538 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███▏ | 20/509 [03:46<1:26:55, 10.67s/it] + + 4%|███▏ | 20/509 [03:46<1:26:55, 10.67s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:29:55,219 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:29:57,872 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:30:00,439 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4766, 'learning_rate': 1.1999999999999999e-05, 'epoch': 0.04} +[WARNING|modeling_utils.py:388] 2022-03-02 22:30:03,054 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 4%|███▎ | 21/509 [03:56<1:26:23, 10.62s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:30:05,693 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:30:08,241 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:30:10,841 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:30:13,378 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.422, 'learning_rate': 1.26e-05, 'epoch': 0.04} + + 4%|███▍ | 22/509 [04:07<1:25:29, 10.53s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:30:15,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:30:18,490 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:30:21,059 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4017, 'learning_rate': 1.3199999999999997e-05, 'epoch': 0.05} +[WARNING|modeling_utils.py:388] 2022-03-02 22:30:23,596 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 5%|███▌ | 23/509 [04:17<1:24:32, 10.44s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:30:26,325 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:30:28,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:30:31,400 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.473, 'learning_rate': 1.3799999999999998e-05, 'epoch': 0.05} +[WARNING|modeling_utils.py:388] 2022-03-02 22:30:33,960 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 5%|███▊ | 24/509 [04:27<1:24:11, 10.42s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:30:36,567 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:30:39,075 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:30:41,547 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:30:44,578 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|███▉ | 25/509 [04:38<1:24:30, 10.48s/it] + 5%|███▉ | 25/509 [04:38<1:24:30, 10.48s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:30:47,244 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:30:49,752 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:30:47,244 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:30:52,203 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:30:47,244 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:30:52,203 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:30:47,244 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████ | 26/509 [04:48<1:23:28, 10.37s/it]g-point operations will not be computed-02 22:30:47,244 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████ | 26/509 [04:48<1:23:28, 10.37s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:30:57,252 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:30:59,762 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:30:57,252 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:31:02,206 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:30:57,252 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████▏ | 27/509 [04:58<1:22:16, 10.24s/it]g-point operations will not be computed-02 22:30:57,252 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████▏ | 27/509 [04:58<1:22:16, 10.24s/it]g-point operations will not be computed-02 22:30:57,252 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████▏ | 27/509 [04:58<1:22:16, 10.24s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:31:07,208 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:31:09,650 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:31:07,208 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:31:12,082 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:31:07,208 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▍ | 28/509 [05:08<1:21:11, 10.13s/it]g-point operations will not be computed-02 22:31:07,208 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▍ | 28/509 [05:08<1:21:11, 10.13s/it]g-point operations will not be computed-02 22:31:07,208 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▍ | 28/509 [05:08<1:21:11, 10.13s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:31:17,056 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:31:19,511 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:31:17,056 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:31:21,936 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:31:17,056 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▌ | 29/509 [05:18<1:20:15, 10.03s/it]g-point operations will not be computed-02 22:31:17,056 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▌ | 29/509 [05:18<1:20:15, 10.03s/it]g-point operations will not be computed-02 22:31:17,056 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▌ | 29/509 [05:18<1:20:15, 10.03s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:31:26,832 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:31:29,286 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:31:26,832 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:31:31,680 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:31:26,832 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:31:31,680 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:31:26,832 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▋ | 30/509 [05:27<1:19:29, 9.96s/it]g-point operations will not be computed-02 22:31:26,832 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▋ | 30/509 [05:27<1:19:29, 9.96s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:31:36,604 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:31:38,947 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:31:36,604 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:31:41,304 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:31:36,604 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▊ | 31/509 [05:37<1:18:24, 9.84s/it]g-point operations will not be computed-02 22:31:36,604 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▊ | 31/509 [05:37<1:18:24, 9.84s/it]g-point operations will not be computed-02 22:31:36,604 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▊ | 31/509 [05:37<1:18:24, 9.84s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:31:46,148 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:31:48,508 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:31:46,148 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:31:50,881 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:31:46,148 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:31:50,881 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:31:46,148 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|█████ | 32/509 [05:46<1:17:25, 9.74s/it]g-point operations will not be computed-02 22:31:46,148 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|█████ | 32/509 [05:46<1:17:25, 9.74s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:31:55,607 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:31:57,899 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:31:55,607 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:32:00,193 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:31:55,607 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:32:00,193 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:31:55,607 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|█████▏ | 33/509 [05:56<1:16:22, 9.63s/it]g-point operations will not be computed-02 22:31:55,607 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|█████▏ | 33/509 [05:56<1:16:22, 9.63s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:32:04,901 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:32:07,189 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:32:04,901 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:32:09,495 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:32:04,901 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:32:09,495 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:32:04,901 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▎ | 34/509 [06:05<1:15:15, 9.51s/it]g-point operations will not be computed-02 22:32:04,901 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▎ | 34/509 [06:05<1:15:15, 9.51s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:32:14,150 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:32:16,346 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:32:14,150 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:32:18,527 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:32:14,150 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:32:20,755 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:32:14,150 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:32:20,755 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:32:14,150 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▌ | 35/509 [06:14<1:13:53, 9.35s/it]g-point operations will not be computed-02 22:32:14,150 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▌ | 35/509 [06:14<1:13:53, 9.35s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:32:23,044 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:32:25,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:32:23,044 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:32:27,405 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:32:23,044 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:32:27,405 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:32:23,044 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▋ | 36/509 [06:23<1:12:26, 9.19s/it]g-point operations will not be computed-02 22:32:23,044 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▋ | 36/509 [06:23<1:12:26, 9.19s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:32:31,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:32:33,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:32:31,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:32:36,055 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:32:31,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▊ | 37/509 [06:31<1:10:49, 9.00s/it]g-point operations will not be computed-02 22:32:31,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▊ | 37/509 [06:31<1:10:49, 9.00s/it]g-point operations will not be computed-02 22:32:31,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▊ | 37/509 [06:31<1:10:49, 9.00s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:32:40,338 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:32:42,461 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:32:40,338 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:32:44,565 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:32:40,338 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▉ | 38/509 [06:40<1:09:30, 8.85s/it]g-point operations will not be computed-02 22:32:40,338 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▉ | 38/509 [06:40<1:09:30, 8.85s/it]g-point operations will not be computed-02 22:32:40,338 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▉ | 38/509 [06:40<1:09:30, 8.85s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:32:48,769 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:32:50,760 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:32:48,769 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:32:52,769 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:32:48,769 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▏ | 39/509 [06:48<1:07:38, 8.63s/it]g-point operations will not be computed-02 22:32:48,769 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▏ | 39/509 [06:48<1:07:38, 8.63s/it]g-point operations will not be computed-02 22:32:48,769 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▏ | 39/509 [06:48<1:07:38, 8.63s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:32:56,825 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:32:58,752 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:32:56,825 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:33:00,685 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:32:56,825 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:33:00,685 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:32:56,825 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▎ | 40/509 [06:56<1:05:24, 8.37s/it]g-point operations will not be computed-02 22:32:56,825 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▎ | 40/509 [06:56<1:05:24, 8.37s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:33:04,471 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:33:06,298 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:33:04,471 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:33:08,065 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:33:04,471 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:33:08,065 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:33:04,471 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▍ | 41/509 [07:03<1:02:50, 8.06s/it]g-point operations will not be computed-02 22:33:04,471 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:33:13,395 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:33:11,649 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:33:15,116 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:33:11,649 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:33:15,116 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:33:11,649 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▌ | 42/509 [07:10<1:00:11, 7.73s/it]g-point operations will not be computed-02 22:33:11,649 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▌ | 42/509 [07:10<1:00:11, 7.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:33:18,592 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:33:20,196 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:33:18,592 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:33:20,196 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:33:18,592 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:33:23,330 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:33:18,592 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:33:23,330 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:33:18,592 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▉ | 43/509 [07:17<57:13, 7.37s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:33:24,940 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▉ | 43/509 [07:17<57:13, 7.37s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:33:24,940 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▉ | 43/509 [07:17<57:13, 7.37s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:33:24,940 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████ | 44/509 [07:22<53:35, 6.92s/it]g-point operations will not be computed-02 22:33:24,940 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████ | 44/509 [07:22<53:35, 6.92s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:33:30,649 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:33:33,249 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:33:30,649 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:33:33,249 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:33:30,649 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▏ | 45/509 [07:28<49:44, 6.43s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:33:35,834 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▏ | 45/509 [07:28<49:44, 6.43s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:33:35,834 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▍ | 46/509 [07:33<45:45, 5.93s/it]g-point operations will not be computed-02 22:33:35,834 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▍ | 46/509 [07:33<45:45, 5.93s/it]g-point operations will not be computed-02 22:33:35,834 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▍ | 46/509 [07:33<45:45, 5.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:33:40,461 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▌ | 47/509 [07:37<41:36, 5.40s/it]g-point operations will not be computed-02 22:33:40,461 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▌ | 47/509 [07:37<41:36, 5.40s/it]g-point operations will not be computed-02 22:33:40,461 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▌ | 47/509 [07:37<41:36, 5.40s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:33:44,460 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▌ | 47/509 [07:37<41:36, 5.40s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:33:44,460 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▋ | 48/509 [07:40<37:31, 4.88s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:33:48,063 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:33:49,581 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:33:48,063 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:33:49,581 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:33:48,063 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▉ | 49/509 [07:44<33:35, 4.38s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:33:51,140 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████ | 50/509 [07:47<30:39, 4.01s/it]g-point operations will not be computed-02 22:33:51,140 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████ | 50/509 [07:47<30:39, 4.01s/it]g-point operations will not be computed-02 22:33:51,140 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████ | 50/509 [07:47<30:39, 4.01s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:33:56,738 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████ | 50/509 [07:47<30:39, 4.01s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:33:56,738 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:34:02,826 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:33:56,738 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:34:02,826 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:33:56,738 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████▏ | 51/509 [07:59<49:46, 6.52s/it]g-point operations will not be computed-02 22:33:56,738 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████▏ | 51/509 [07:59<49:46, 6.52s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:34:08,930 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████▏ | 51/509 [07:59<49:46, 6.52s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:34:08,930 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:34:14,799 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:34:08,930 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:34:14,799 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:34:08,930 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████▏ | 52/509 [08:11<1:01:54, 8.13s/it]g-point operations will not be computed-02 22:34:08,930 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████▏ | 52/509 [08:11<1:01:54, 8.13s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:34:20,776 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████▏ | 52/509 [08:11<1:01:54, 8.13s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:34:20,776 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:34:26,643 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:34:20,776 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:34:26,643 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:34:20,776 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████▎ | 53/509 [08:23<1:10:11, 9.24s/it]g-point operations will not be computed-02 22:34:20,776 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████▎ | 53/509 [08:23<1:10:11, 9.24s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:34:32,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████▎ | 53/509 [08:23<1:10:11, 9.24s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:34:32,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:34:38,399 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:34:32,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▍ | 54/509 [08:35<1:15:44, 9.99s/it]g-point operations will not be computed-02 22:34:32,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▍ | 54/509 [08:35<1:15:44, 9.99s/it]g-point operations will not be computed-02 22:34:32,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▍ | 54/509 [08:35<1:15:44, 9.99s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:34:44,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▍ | 54/509 [08:35<1:15:44, 9.99s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:34:44,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:34:49,874 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:34:44,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▋ | 55/509 [08:46<1:18:50, 10.42s/it]g-point operations will not be computed-02 22:34:44,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▋ | 55/509 [08:46<1:18:50, 10.42s/it]g-point operations will not be computed-02 22:34:44,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▋ | 55/509 [08:46<1:18:50, 10.42s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:34:55,603 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▋ | 55/509 [08:46<1:18:50, 10.42s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:34:55,603 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:35:01,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:34:55,603 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▊ | 56/509 [08:57<1:20:43, 10.69s/it]g-point operations will not be computed-02 22:34:55,603 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▊ | 56/509 [08:57<1:20:43, 10.69s/it]g-point operations will not be computed-02 22:34:55,603 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▊ | 56/509 [08:57<1:20:43, 10.69s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:35:06,925 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▊ | 56/509 [08:57<1:20:43, 10.69s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:35:06,925 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:35:12,464 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:35:06,925 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:35:12,464 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:35:06,925 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▉ | 57/509 [09:09<1:21:46, 10.85s/it]g-point operations will not be computed-02 22:35:06,925 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▉ | 57/509 [09:09<1:21:46, 10.85s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:35:18,162 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▉ | 57/509 [09:09<1:21:46, 10.85s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:35:18,162 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:35:23,780 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:35:18,162 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:35:23,780 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:35:18,162 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|█████████ | 58/509 [09:20<1:22:37, 10.99s/it]g-point operations will not be computed-02 22:35:18,162 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|█████████ | 58/509 [09:20<1:22:37, 10.99s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:35:29,485 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|█████████ | 58/509 [09:20<1:22:37, 10.99s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:35:29,485 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:35:34,991 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:35:29,485 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:35:34,991 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:35:29,485 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▎ | 59/509 [09:31<1:22:45, 11.03s/it]g-point operations will not be computed-02 22:35:29,485 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▎ | 59/509 [09:31<1:22:45, 11.03s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:35:40,626 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▎ | 59/509 [09:31<1:22:45, 11.03s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:35:40,626 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:35:46,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:35:40,626 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:35:46,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:35:40,626 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▍ | 60/509 [09:42<1:22:42, 11.05s/it]g-point operations will not be computed-02 22:35:40,626 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▍ | 60/509 [09:42<1:22:42, 11.05s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:35:51,541 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▍ | 60/509 [09:42<1:22:42, 11.05s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:35:51,541 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:35:56,946 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:35:51,541 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▌ | 61/509 [09:53<1:22:10, 11.01s/it]g-point operations will not be computed-02 22:35:51,541 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▌ | 61/509 [09:53<1:22:10, 11.01s/it]g-point operations will not be computed-02 22:35:51,541 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▌ | 61/509 [09:53<1:22:10, 11.01s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:36:02,452 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▌ | 61/509 [09:53<1:22:10, 11.01s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:36:02,452 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:36:07,842 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:36:02,452 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 22:36:02,452 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 22:36:02,452 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▋ | 62/509 [10:04<1:21:26, 10.93s/it]g-point operations will not be computed-02 22:36:02,452 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▋ | 62/509 [10:04<1:21:26, 10.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:36:13,247 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▋ | 62/509 [10:04<1:21:26, 10.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:36:13,247 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:36:18,570 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:36:13,247 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:36:18,570 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:36:13,247 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▉ | 63/509 [10:15<1:21:02, 10.90s/it]g-point operations will not be computed-02 22:36:13,247 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▉ | 63/509 [10:15<1:21:02, 10.90s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:36:24,071 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▉ | 63/509 [10:15<1:21:02, 10.90s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:36:24,071 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:36:29,388 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:36:24,071 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:36:29,388 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:36:24,071 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████ | 64/509 [10:25<1:20:24, 10.84s/it]g-point operations will not be computed-02 22:36:24,071 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████ | 64/509 [10:25<1:20:24, 10.84s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:36:34,781 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████ | 64/509 [10:25<1:20:24, 10.84s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:36:34,781 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:36:40,069 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:36:34,781 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▏ | 65/509 [10:36<1:19:49, 10.79s/it]g-point operations will not be computed-02 22:36:34,781 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▏ | 65/509 [10:36<1:19:49, 10.79s/it]g-point operations will not be computed-02 22:36:34,781 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▏ | 65/509 [10:36<1:19:49, 10.79s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:36:45,369 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▏ | 65/509 [10:36<1:19:49, 10.79s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:36:45,369 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:36:50,591 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:36:45,369 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▎ | 66/509 [10:46<1:18:59, 10.70s/it]g-point operations will not be computed-02 22:36:45,369 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▎ | 66/509 [10:46<1:18:59, 10.70s/it]g-point operations will not be computed-02 22:36:45,369 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▎ | 66/509 [10:46<1:18:59, 10.70s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:36:55,899 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▎ | 66/509 [10:46<1:18:59, 10.70s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:36:55,899 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:37:01,147 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:36:55,899 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:37:01,147 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:36:55,899 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:37:01,147 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:36:55,899 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▌ | 67/509 [10:57<1:18:38, 10.67s/it]g-point operations will not be computed-02 22:36:55,899 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▌ | 67/509 [10:57<1:18:38, 10.67s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:37:06,442 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▌ | 67/509 [10:57<1:18:38, 10.67s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:37:06,442 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:37:11,568 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:37:06,442 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:37:11,568 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:37:06,442 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▋ | 68/509 [11:07<1:17:55, 10.60s/it]g-point operations will not be computed-02 22:37:06,442 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▋ | 68/509 [11:07<1:17:55, 10.60s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:37:16,877 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▋ | 68/509 [11:07<1:17:55, 10.60s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:37:16,877 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:37:22,117 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:37:16,877 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|██████████▊ | 69/509 [11:18<1:17:41, 10.59s/it]g-point operations will not be computed-02 22:37:16,877 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|██████████▊ | 69/509 [11:18<1:17:41, 10.59s/it]g-point operations will not be computed-02 22:37:16,877 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|██████████▊ | 69/509 [11:18<1:17:41, 10.59s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:37:27,462 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|██████████▊ | 69/509 [11:18<1:17:41, 10.59s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:37:27,462 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:37:32,646 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:37:27,462 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:37:32,646 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:37:27,462 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████ | 70/509 [11:29<1:17:15, 10.56s/it]g-point operations will not be computed-02 22:37:27,462 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████ | 70/509 [11:29<1:17:15, 10.56s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:37:37,979 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████ | 70/509 [11:29<1:17:15, 10.56s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:37:37,979 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:37:43,183 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:37:37,979 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████▏ | 71/509 [11:39<1:16:55, 10.54s/it]g-point operations will not be computed-02 22:37:37,979 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████▏ | 71/509 [11:39<1:16:55, 10.54s/it]g-point operations will not be computed-02 22:37:37,979 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████▏ | 71/509 [11:39<1:16:55, 10.54s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:37:48,439 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████▏ | 71/509 [11:39<1:16:55, 10.54s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:37:48,439 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:37:53,546 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:37:48,439 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████▎ | 72/509 [11:49<1:16:25, 10.49s/it]g-point operations will not be computed-02 22:37:48,439 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████▎ | 72/509 [11:49<1:16:25, 10.49s/it]g-point operations will not be computed-02 22:37:48,439 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████▎ | 72/509 [11:49<1:16:25, 10.49s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:37:58,755 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████▎ | 72/509 [11:49<1:16:25, 10.49s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:37:58,755 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:38:03,861 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:37:58,755 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:38:03,861 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:37:58,755 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████▍ | 73/509 [12:00<1:15:50, 10.44s/it]g-point operations will not be computed-02 22:37:58,755 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████▍ | 73/509 [12:00<1:15:50, 10.44s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:38:09,116 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████▍ | 73/509 [12:00<1:15:50, 10.44s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:38:09,116 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████▍ | 73/509 [12:00<1:15:50, 10.44s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:38:09,116 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████▍ | 73/509 [12:00<1:15:50, 10.44s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:38:09,116 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▋ | 74/509 [12:10<1:15:25, 10.40s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:38:09,116 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▋ | 74/509 [12:10<1:15:25, 10.40s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:38:09,116 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▋ | 74/509 [12:10<1:15:25, 10.40s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:38:09,116 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▋ | 74/509 [12:10<1:15:25, 10.40s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:38:09,116 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▋ | 74/509 [12:10<1:15:25, 10.40s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:38:09,116 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▊ | 75/509 [12:21<1:15:32, 10.44s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:38:09,116 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▊ | 75/509 [12:21<1:15:32, 10.44s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:38:09,116 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▊ | 75/509 [12:21<1:15:32, 10.44s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:38:09,116 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▊ | 75/509 [12:21<1:15:32, 10.44s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:38:09,116 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▊ | 75/509 [12:21<1:15:32, 10.44s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:38:09,116 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▉ | 76/509 [12:31<1:14:41, 10.35s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:38:09,116 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▉ | 76/509 [12:31<1:14:41, 10.35s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:38:09,116 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▉ | 76/509 [12:31<1:14:41, 10.35s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:38:09,116 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▉ | 76/509 [12:31<1:14:41, 10.35s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:38:09,116 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▉ | 76/509 [12:31<1:14:41, 10.35s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:38:09,116 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|████████████ | 77/509 [12:41<1:13:31, 10.21s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:38:09,116 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|████████████ | 77/509 [12:41<1:13:31, 10.21s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:38:09,116 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|████████████ | 77/509 [12:41<1:13:31, 10.21s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:38:09,116 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|████████████ | 77/509 [12:41<1:13:31, 10.21s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:38:09,116 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|████████████▎ | 78/509 [12:51<1:12:40, 10.12s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:38:09,116 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|████████████▎ | 78/509 [12:51<1:12:40, 10.12s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:38:09,116 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2623, 'learning_rate': 4.56e-05, 'epoch': 0.15} + 15%|████████████▎ | 78/509 [12:51<1:12:40, 10.12s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:38:09,116 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|████████████▎ | 78/509 [12:51<1:12:40, 10.12s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:38:09,116 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▍ | 79/509 [13:00<1:11:33, 9.98s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:38:09,116 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▍ | 79/509 [13:00<1:11:33, 9.98s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:38:09,116 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1395, 'learning_rate': 4.62e-05, 'epoch': 0.15} + 16%|████████████▍ | 79/509 [13:00<1:11:33, 9.98s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:38:09,116 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▍ | 79/509 [13:00<1:11:33, 9.98s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:38:09,116 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▍ | 79/509 [13:00<1:11:33, 9.98s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:38:09,116 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▌ | 80/509 [13:10<1:10:19, 9.84s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:38:09,116 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▌ | 80/509 [13:10<1:10:19, 9.84s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:38:09,116 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▌ | 80/509 [13:10<1:10:19, 9.84s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:38:09,116 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▌ | 80/509 [13:10<1:10:19, 9.84s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:38:09,116 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▋ | 81/509 [13:19<1:09:36, 9.76s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:38:09,116 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▋ | 81/509 [13:19<1:09:36, 9.76s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:38:09,116 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2962, 'learning_rate': 4.7399999999999993e-05, 'epoch': 0.16} + 16%|████████████▋ | 81/509 [13:19<1:09:36, 9.76s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:38:09,116 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▋ | 81/509 [13:19<1:09:36, 9.76s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:38:09,116 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▉ | 82/509 [13:29<1:08:27, 9.62s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:38:09,116 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▉ | 82/509 [13:29<1:08:27, 9.62s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:38:09,116 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2672, 'learning_rate': 4.7999999999999994e-05, 'epoch': 0.16} + 16%|████████████▉ | 82/509 [13:29<1:08:27, 9.62s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:38:09,116 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▉ | 82/509 [13:29<1:08:27, 9.62s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:38:09,116 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▉ | 82/509 [13:29<1:08:27, 9.62s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:38:09,116 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▉ | 82/509 [13:29<1:08:27, 9.62s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:38:09,116 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3444, 'learning_rate': 4.8599999999999995e-05, 'epoch': 0.16} + 16%|████████████▉ | 82/509 [13:29<1:08:27, 9.62s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:38:09,116 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▉ | 82/509 [13:29<1:08:27, 9.62s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:38:09,116 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▏ | 84/509 [13:47<1:06:37, 9.41s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:38:09,116 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▏ | 84/509 [13:47<1:06:37, 9.41s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:38:09,116 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2708, 'learning_rate': 4.9199999999999997e-05, 'epoch': 0.16} + 17%|█████████████▏ | 84/509 [13:47<1:06:37, 9.41s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:38:09,116 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▏ | 84/509 [13:47<1:06:37, 9.41s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:38:09,116 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▎ | 85/509 [13:56<1:05:22, 9.25s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:38:09,116 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▎ | 85/509 [13:56<1:05:22, 9.25s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:38:09,116 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1827, 'learning_rate': 4.98e-05, 'epoch': 0.17} + 17%|█████████████▎ | 85/509 [13:56<1:05:22, 9.25s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:38:09,116 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▎ | 85/509 [13:56<1:05:22, 9.25s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:38:09,116 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▎ | 85/509 [13:56<1:05:22, 9.25s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:38:09,116 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▌ | 86/509 [14:05<1:04:28, 9.14s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:38:09,116 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▌ | 86/509 [14:05<1:04:28, 9.14s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:38:09,116 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:40:18,052 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:38:09,116 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:40:18,052 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:38:09,116 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▋ | 87/509 [14:13<1:03:21, 9.01s/it]g-point operations will not be computed-02 22:38:09,116 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▋ | 87/509 [14:13<1:03:21, 9.01s/it]g-point operations will not be computed-02 22:38:09,116 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▋ | 87/509 [14:13<1:03:21, 9.01s/it]g-point operations will not be computed-02 22:38:09,116 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▋ | 87/509 [14:13<1:03:21, 9.01s/it]g-point operations will not be computed-02 22:38:09,116 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▋ | 87/509 [14:13<1:03:21, 9.01s/it]g-point operations will not be computed-02 22:38:09,116 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▊ | 88/509 [14:22<1:02:01, 8.84s/it]g-point operations will not be computed-02 22:38:09,116 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▊ | 88/509 [14:22<1:02:01, 8.84s/it]g-point operations will not be computed-02 22:38:09,116 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▊ | 88/509 [14:22<1:02:01, 8.84s/it]g-point operations will not be computed-02 22:38:09,116 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▊ | 88/509 [14:22<1:02:01, 8.84s/it]g-point operations will not be computed-02 22:38:09,116 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▊ | 88/509 [14:22<1:02:01, 8.84s/it]g-point operations will not be computed-02 22:38:09,116 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▉ | 89/509 [14:30<1:00:36, 8.66s/it]g-point operations will not be computed-02 22:38:09,116 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▉ | 89/509 [14:30<1:00:36, 8.66s/it]g-point operations will not be computed-02 22:38:09,116 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▉ | 89/509 [14:30<1:00:36, 8.66s/it]g-point operations will not be computed-02 22:38:09,116 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▉ | 89/509 [14:30<1:00:36, 8.66s/it]g-point operations will not be computed-02 22:38:09,116 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▉ | 89/509 [14:30<1:00:36, 8.66s/it]g-point operations will not be computed-02 22:38:09,116 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▍ | 90/509 [14:38<58:43, 8.41s/it]g-point operations will not be computed-02 22:38:09,116 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▍ | 90/509 [14:38<58:43, 8.41s/it]g-point operations will not be computed-02 22:38:09,116 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:40:50,187 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:38:09,116 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:40:50,187 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:38:09,116 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▋ | 91/509 [14:45<56:10, 8.06s/it]g-point operations will not be computed-02 22:38:09,116 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▋ | 91/509 [14:45<56:10, 8.06s/it]g-point operations will not be computed-02 22:38:09,116 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▋ | 91/509 [14:45<56:10, 8.06s/it]g-point operations will not be computed-02 22:38:09,116 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▋ | 91/509 [14:45<56:10, 8.06s/it]g-point operations will not be computed-02 22:38:09,116 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▋ | 91/509 [14:45<56:10, 8.06s/it]g-point operations will not be computed-02 22:38:09,116 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▊ | 92/509 [14:52<53:15, 7.66s/it]g-point operations will not be computed-02 22:38:09,116 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:41:01,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:38:09,116 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:41:01,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:38:09,116 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:41:01,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:38:09,116 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▉ | 93/509 [14:58<50:06, 7.23s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▉ | 93/509 [14:58<50:06, 7.23s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:41:10,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:41:10,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4964, 'learning_rate': 5.519999999999999e-05, 'epoch': 0.18} +[WARNING|modeling_utils.py:388] 2022-03-02 22:41:14,386 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:41:14,386 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|███████████████▎ | 95/509 [15:09<43:10, 6.26s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:41:18,051 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:41:20,373 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:41:20,373 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:41:22,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:41:24,602 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:41:24,602 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:41:26,582 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:41:28,328 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:41:28,328 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:41:30,071 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:41:30,071 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:41:33,012 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:41:34,717 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:41:34,717 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2287, 'learning_rate': 5.88e-05, 'epoch': 0.2} +[WARNING|modeling_utils.py:388] 2022-03-02 22:41:40,971 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:41:40,971 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|████████████████ | 101/509 [15:40<43:46, 6.44s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|████████████████ | 101/509 [15:40<43:46, 6.44s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2066, 'learning_rate': 5.94e-05, 'epoch': 0.2} + 20%|████████████████ | 101/509 [15:40<43:46, 6.44s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|████████████████ | 101/509 [15:40<43:46, 6.44s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|████████████████▏ | 102/509 [15:52<54:26, 8.03s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|████████████████▏ | 102/509 [15:52<54:26, 8.03s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2299, 'learning_rate': 5.9999999999999995e-05, 'epoch': 0.2} + 20%|████████████████▏ | 102/509 [15:52<54:26, 8.03s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|████████████████▏ | 102/509 [15:52<54:26, 8.03s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|████████████████▏ | 102/509 [15:52<54:26, 8.03s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▉ | 103/509 [16:04<1:01:45, 9.13s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▉ | 103/509 [16:04<1:01:45, 9.13s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▉ | 103/509 [16:04<1:01:45, 9.13s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▉ | 103/509 [16:04<1:01:45, 9.13s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|████████████████▏ | 104/509 [16:15<1:06:28, 9.85s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|████████████████▏ | 104/509 [16:15<1:06:28, 9.85s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.0859, 'learning_rate': 6.12e-05, 'epoch': 0.2} + 20%|████████████████▏ | 104/509 [16:15<1:06:28, 9.85s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|████████████████▏ | 104/509 [16:15<1:06:28, 9.85s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▎ | 105/509 [16:26<1:09:10, 10.27s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▎ | 105/509 [16:26<1:09:10, 10.27s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.0669, 'learning_rate': 6.18e-05, 'epoch': 0.21} + 21%|████████████████▎ | 105/509 [16:26<1:09:10, 10.27s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▎ | 105/509 [16:26<1:09:10, 10.27s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▍ | 106/509 [16:38<1:11:11, 10.60s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▍ | 106/509 [16:38<1:11:11, 10.60s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.0643, 'learning_rate': 6.239999999999999e-05, 'epoch': 0.21} + 21%|████████████████▍ | 106/509 [16:38<1:11:11, 10.60s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▍ | 106/509 [16:38<1:11:11, 10.60s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▌ | 107/509 [16:49<1:12:19, 10.79s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▌ | 107/509 [16:49<1:12:19, 10.79s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1299, 'learning_rate': 6.299999999999999e-05, 'epoch': 0.21} + 21%|████████████████▌ | 107/509 [16:49<1:12:19, 10.79s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▌ | 107/509 [16:49<1:12:19, 10.79s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▊ | 108/509 [17:00<1:12:58, 10.92s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▊ | 108/509 [17:00<1:12:58, 10.92s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1738, 'learning_rate': 6.359999999999999e-05, 'epoch': 0.21} + 21%|████████████████▊ | 108/509 [17:00<1:12:58, 10.92s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▊ | 108/509 [17:00<1:12:58, 10.92s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▉ | 109/509 [17:11<1:12:59, 10.95s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▉ | 109/509 [17:11<1:12:59, 10.95s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2388, 'learning_rate': 6.419999999999999e-05, 'epoch': 0.21} + 21%|████████████████▉ | 109/509 [17:11<1:12:59, 10.95s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▉ | 109/509 [17:11<1:12:59, 10.95s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████ | 110/509 [17:22<1:13:08, 11.00s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████ | 110/509 [17:22<1:13:08, 11.00s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1986, 'learning_rate': 6.479999999999999e-05, 'epoch': 0.22} + 22%|█████████████████ | 110/509 [17:22<1:13:08, 11.00s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████ | 110/509 [17:22<1:13:08, 11.00s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████ | 110/509 [17:22<1:13:08, 11.00s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▏ | 111/509 [17:33<1:12:49, 10.98s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▏ | 111/509 [17:33<1:12:49, 10.98s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▏ | 111/509 [17:33<1:12:49, 10.98s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▏ | 111/509 [17:33<1:12:49, 10.98s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▏ | 111/509 [17:33<1:12:49, 10.98s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▍ | 112/509 [17:44<1:12:30, 10.96s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▍ | 112/509 [17:44<1:12:30, 10.96s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▍ | 112/509 [17:44<1:12:30, 10.96s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▍ | 112/509 [17:44<1:12:30, 10.96s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▍ | 112/509 [17:44<1:12:30, 10.96s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▌ | 113/509 [17:55<1:12:10, 10.93s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▌ | 113/509 [17:55<1:12:10, 10.93s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▌ | 113/509 [17:55<1:12:10, 10.93s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▌ | 113/509 [17:55<1:12:10, 10.93s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▋ | 114/509 [18:06<1:11:35, 10.87s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▋ | 114/509 [18:06<1:11:35, 10.87s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1904, 'learning_rate': 6.72e-05, 'epoch': 0.22} + 22%|█████████████████▋ | 114/509 [18:06<1:11:35, 10.87s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▋ | 114/509 [18:06<1:11:35, 10.87s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▋ | 114/509 [18:06<1:11:35, 10.87s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▋ | 114/509 [18:06<1:11:35, 10.87s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1663, 'learning_rate': 6.78e-05, 'epoch': 0.23} + 22%|█████████████████▋ | 114/509 [18:06<1:11:35, 10.87s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▋ | 114/509 [18:06<1:11:35, 10.87s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▋ | 114/509 [18:06<1:11:35, 10.87s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▋ | 114/509 [18:06<1:11:35, 10.87s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████ | 116/509 [18:27<1:10:39, 10.79s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████ | 116/509 [18:27<1:10:39, 10.79s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████ | 116/509 [18:27<1:10:39, 10.79s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████ | 116/509 [18:27<1:10:39, 10.79s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▏ | 117/509 [18:38<1:10:15, 10.75s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▏ | 117/509 [18:38<1:10:15, 10.75s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2877, 'learning_rate': 6.9e-05, 'epoch': 0.23} + 23%|██████████████████▏ | 117/509 [18:38<1:10:15, 10.75s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▏ | 117/509 [18:38<1:10:15, 10.75s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▎ | 118/509 [18:48<1:09:46, 10.71s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▎ | 118/509 [18:48<1:09:46, 10.71s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2288, 'learning_rate': 6.96e-05, 'epoch': 0.23} + 23%|██████████████████▎ | 118/509 [18:48<1:09:46, 10.71s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▎ | 118/509 [18:48<1:09:46, 10.71s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▍ | 119/509 [18:59<1:09:16, 10.66s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▍ | 119/509 [18:59<1:09:16, 10.66s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1526, 'learning_rate': 7.02e-05, 'epoch': 0.23} + 23%|██████████████████▍ | 119/509 [18:59<1:09:16, 10.66s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▍ | 119/509 [18:59<1:09:16, 10.66s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▍ | 119/509 [18:59<1:09:16, 10.66s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|██████████████████▌ | 120/509 [19:09<1:08:41, 10.60s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|██████████████████▌ | 120/509 [19:09<1:08:41, 10.60s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|██████████████████▌ | 120/509 [19:09<1:08:41, 10.60s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|██████████████████▌ | 120/509 [19:09<1:08:41, 10.60s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|██████████████████▌ | 120/509 [19:09<1:08:41, 10.60s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|██████████████████▊ | 121/509 [19:20<1:08:12, 10.55s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|██████████████████▊ | 121/509 [19:20<1:08:12, 10.55s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|██████████████████▊ | 121/509 [19:20<1:08:12, 10.55s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|██████████████████▊ | 121/509 [19:20<1:08:12, 10.55s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|██████████████████▊ | 121/509 [19:20<1:08:12, 10.55s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|██████████████████▉ | 122/509 [19:30<1:07:38, 10.49s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|██████████████████▉ | 122/509 [19:30<1:07:38, 10.49s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|██████████████████▉ | 122/509 [19:30<1:07:38, 10.49s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|██████████████████▉ | 122/509 [19:30<1:07:38, 10.49s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|██████████████████▉ | 122/509 [19:30<1:07:38, 10.49s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████ | 123/509 [19:41<1:07:06, 10.43s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████ | 123/509 [19:41<1:07:06, 10.43s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████ | 123/509 [19:41<1:07:06, 10.43s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████ | 123/509 [19:41<1:07:06, 10.43s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████ | 123/509 [19:41<1:07:06, 10.43s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████▏ | 124/509 [19:51<1:06:12, 10.32s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████▏ | 124/509 [19:51<1:06:12, 10.32s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████▏ | 124/509 [19:51<1:06:12, 10.32s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████▏ | 124/509 [19:51<1:06:12, 10.32s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████▏ | 124/509 [19:51<1:06:12, 10.32s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▍ | 125/509 [20:01<1:06:18, 10.36s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▍ | 125/509 [20:01<1:06:18, 10.36s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▍ | 125/509 [20:01<1:06:18, 10.36s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▍ | 125/509 [20:01<1:06:18, 10.36s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▍ | 125/509 [20:01<1:06:18, 10.36s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▌ | 126/509 [20:11<1:05:31, 10.26s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▌ | 126/509 [20:11<1:05:31, 10.26s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▌ | 126/509 [20:11<1:05:31, 10.26s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▌ | 126/509 [20:11<1:05:31, 10.26s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▌ | 126/509 [20:11<1:05:31, 10.26s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▋ | 127/509 [20:21<1:04:34, 10.14s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▋ | 127/509 [20:21<1:04:34, 10.14s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▋ | 127/509 [20:21<1:04:34, 10.14s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▋ | 127/509 [20:21<1:04:34, 10.14s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▋ | 127/509 [20:21<1:04:34, 10.14s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▋ | 127/509 [20:21<1:04:34, 10.14s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.162, 'learning_rate': 7.56e-05, 'epoch': 0.25} + 25%|███████████████████▋ | 127/509 [20:21<1:04:34, 10.14s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▋ | 127/509 [20:21<1:04:34, 10.14s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▋ | 127/509 [20:21<1:04:34, 10.14s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|████████████████████ | 129/509 [20:41<1:02:59, 9.95s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|████████████████████ | 129/509 [20:41<1:02:59, 9.95s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1419, 'learning_rate': 7.62e-05, 'epoch': 0.25} + 25%|████████████████████ | 129/509 [20:41<1:02:59, 9.95s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|████████████████████ | 129/509 [20:41<1:02:59, 9.95s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▏ | 130/509 [20:50<1:02:03, 9.82s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▏ | 130/509 [20:50<1:02:03, 9.82s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.338, 'learning_rate': 7.68e-05, 'epoch': 0.26} + 26%|████████████████████▏ | 130/509 [20:50<1:02:03, 9.82s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▏ | 130/509 [20:50<1:02:03, 9.82s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1503, 'learning_rate': 7.74e-05, 'epoch': 0.26} + g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▍ | 132/509 [21:09<1:00:22, 9.61s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▍ | 132/509 [21:09<1:00:22, 9.61s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.163, 'learning_rate': 7.8e-05, 'epoch': 0.26} + 26%|████████████████████▍ | 132/509 [21:09<1:00:22, 9.61s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▍ | 132/509 [21:09<1:00:22, 9.61s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|█████████████████████▏ | 133/509 [21:18<59:22, 9.47s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|█████████████████████▏ | 133/509 [21:18<59:22, 9.47s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1842, 'learning_rate': 7.86e-05, 'epoch': 0.26} + 26%|█████████████████████▏ | 133/509 [21:18<59:22, 9.47s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:47:33,724 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:47:33,724 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3043, 'learning_rate': 7.92e-05, 'epoch': 0.26} +[WARNING|modeling_utils.py:388] 2022-03-02 22:47:33,724 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:47:33,724 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:47:33,724 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:47:33,724 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▍ | 135/509 [21:36<57:15, 9.18s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▍ | 135/509 [21:36<57:15, 9.18s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▍ | 135/509 [21:36<57:15, 9.18s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▍ | 135/509 [21:36<57:15, 9.18s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▋ | 136/509 [21:45<56:27, 9.08s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▋ | 136/509 [21:45<56:27, 9.08s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2666, 'learning_rate': 8.04e-05, 'epoch': 0.27} + 27%|█████████████████████▋ | 136/509 [21:45<56:27, 9.08s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▋ | 136/509 [21:45<56:27, 9.08s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▊ | 137/509 [21:53<55:09, 8.90s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▊ | 137/509 [21:53<55:09, 8.90s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2247, 'learning_rate': 8.1e-05, 'epoch': 0.27} + 27%|█████████████████████▊ | 137/509 [21:53<55:09, 8.90s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▊ | 137/509 [21:53<55:09, 8.90s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▉ | 138/509 [22:01<53:49, 8.70s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▉ | 138/509 [22:01<53:49, 8.70s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2554, 'learning_rate': 8.16e-05, 'epoch': 0.27} + 27%|█████████████████████▉ | 138/509 [22:01<53:49, 8.70s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▉ | 138/509 [22:01<53:49, 8.70s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▉ | 138/509 [22:01<53:49, 8.70s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|██████████████████████ | 139/509 [22:09<52:18, 8.48s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|██████████████████████ | 139/509 [22:09<52:18, 8.48s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|██████████████████████ | 139/509 [22:09<52:18, 8.48s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|██████████████████████ | 139/509 [22:09<52:18, 8.48s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|██████████████████████ | 139/509 [22:09<52:18, 8.48s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▎ | 140/509 [22:17<50:27, 8.20s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▎ | 140/509 [22:17<50:27, 8.20s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▎ | 140/509 [22:17<50:27, 8.20s/it]g-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:48:30,811 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:48:30,811 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3292, 'learning_rate': 8.34e-05, 'epoch': 0.28} +[WARNING|modeling_utils.py:388] 2022-03-02 22:48:30,811 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:48:30,811 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:48:30,811 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:41:06,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▌ | 142/509 [22:31<46:04, 7.53s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:48:39,190 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▌ | 142/509 [22:31<46:04, 7.53s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:48:39,190 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▌ | 142/509 [22:31<46:04, 7.53s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:48:39,190 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▌ | 142/509 [22:31<46:04, 7.53s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:48:39,190 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▊ | 143/509 [22:37<43:29, 7.13s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▊ | 143/509 [22:37<43:29, 7.13s/it][WARNING|modeling_utils.py:388] 2022-03-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:48:49,359 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:48:49,359 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2392, 'learning_rate': 8.519999999999998e-05, 'epoch': 0.28} +[WARNING|modeling_utils.py:388] 2022-03-02 22:48:53,299 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:48:53,299 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|███████████████████████ | 145/509 [22:48<37:48, 6.23s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:48:56,969 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:48:59,157 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:48:59,157 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:49:01,265 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:49:03,157 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:49:03,157 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:49:05,026 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:49:05,026 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:49:06,639 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:49:09,683 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:49:09,683 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:49:11,117 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:49:11,117 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:49:12,840 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:49:12,840 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:49:19,183 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:49:19,183 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|████████████████████████ | 151/509 [23:18<38:16, 6.41s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|████████████████████████ | 151/509 [23:18<38:16, 6.41s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3844, 'learning_rate': 8.939999999999999e-05, 'epoch': 0.3} + 30%|████████████████████████ | 151/509 [23:18<38:16, 6.41s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|████████████████████████ | 151/509 [23:18<38:16, 6.41s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|████████████████████████ | 151/509 [23:18<38:16, 6.41s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|████████████████████████▏ | 152/509 [23:30<47:37, 8.00s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|████████████████████████▏ | 152/509 [23:30<47:37, 8.00s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|████████████████████████▏ | 152/509 [23:30<47:37, 8.00s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|████████████████████████▏ | 152/509 [23:30<47:37, 8.00s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2272, 'learning_rate': 9.059999999999999e-05, 'epoch': 0.3} + g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|████████████████████████▌ | 154/509 [23:53<58:04, 9.82s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|████████████████████████▌ | 154/509 [23:53<58:04, 9.82s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2883, 'learning_rate': 9.12e-05, 'epoch': 0.3} + 30%|████████████████████████▌ | 154/509 [23:53<58:04, 9.82s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|████████████████████████▌ | 154/509 [23:53<58:04, 9.82s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|████████████████████████▌ | 154/509 [23:53<58:04, 9.82s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|████████████████████████ | 155/509 [24:05<1:00:53, 10.32s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|████████████████████████ | 155/509 [24:05<1:00:53, 10.32s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|████████████████████████ | 155/509 [24:05<1:00:53, 10.32s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|████████████████████████ | 155/509 [24:05<1:00:53, 10.32s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|████████████████████████▏ | 156/509 [24:16<1:02:35, 10.64s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|████████████████████████▏ | 156/509 [24:16<1:02:35, 10.64s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1388, 'learning_rate': 9.24e-05, 'epoch': 0.31} + 31%|████████████████████████▏ | 156/509 [24:16<1:02:35, 10.64s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|████████████████████████▏ | 156/509 [24:16<1:02:35, 10.64s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|████████████████████████▏ | 156/509 [24:16<1:02:35, 10.64s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|████████████████████████▎ | 157/509 [24:27<1:03:24, 10.81s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|████████████████████████▎ | 157/509 [24:27<1:03:24, 10.81s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|████████████████████████▎ | 157/509 [24:27<1:03:24, 10.81s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|████████████████████████▎ | 157/509 [24:27<1:03:24, 10.81s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|████████████████████████▌ | 158/509 [24:39<1:03:52, 10.92s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|████████████████████████▌ | 158/509 [24:39<1:03:52, 10.92s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.32, 'learning_rate': 9.36e-05, 'epoch': 0.31} + 31%|████████████████████████▌ | 158/509 [24:39<1:03:52, 10.92s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|████████████████████████▌ | 158/509 [24:39<1:03:52, 10.92s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|████████████████████████▋ | 159/509 [24:50<1:04:17, 11.02s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|████████████████████████▋ | 159/509 [24:50<1:04:17, 11.02s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2029, 'learning_rate': 9.419999999999999e-05, 'epoch': 0.31} + 31%|████████████████████████▋ | 159/509 [24:50<1:04:17, 11.02s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|████████████████████████▋ | 159/509 [24:50<1:04:17, 11.02s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|████████████████████████▊ | 160/509 [25:01<1:04:26, 11.08s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|████████████████████████▊ | 160/509 [25:01<1:04:26, 11.08s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3667, 'learning_rate': 9.479999999999999e-05, 'epoch': 0.31} + 31%|████████████████████████▊ | 160/509 [25:01<1:04:26, 11.08s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|████████████████████████▊ | 160/509 [25:01<1:04:26, 11.08s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|████████████████████████▊ | 160/509 [25:01<1:04:26, 11.08s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|████████████████████████▉ | 161/509 [25:12<1:04:12, 11.07s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|████████████████████████▉ | 161/509 [25:12<1:04:12, 11.07s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|████████████████████████▉ | 161/509 [25:12<1:04:12, 11.07s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|████████████████████████▉ | 161/509 [25:12<1:04:12, 11.07s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|█████████████████████████▏ | 162/509 [25:23<1:03:39, 11.01s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|█████████████████████████▏ | 162/509 [25:23<1:03:39, 11.01s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2337, 'learning_rate': 9.599999999999999e-05, 'epoch': 0.32} + 32%|█████████████████████████▏ | 162/509 [25:23<1:03:39, 11.01s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|█████████████████████████▏ | 162/509 [25:23<1:03:39, 11.01s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|█████████████████████████▏ | 162/509 [25:23<1:03:39, 11.01s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|█████████████████████████▎ | 163/509 [25:34<1:03:21, 10.99s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|█████████████████████████▎ | 163/509 [25:34<1:03:21, 10.99s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|█████████████████████████▎ | 163/509 [25:34<1:03:21, 10.99s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|█████████████████████████▎ | 163/509 [25:34<1:03:21, 10.99s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|█████████████████████████▎ | 163/509 [25:34<1:03:21, 10.99s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|█████████████████████████▍ | 164/509 [25:45<1:02:53, 10.94s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|█████████████████████████▍ | 164/509 [25:45<1:02:53, 10.94s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|█████████████████████████▍ | 164/509 [25:45<1:02:53, 10.94s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|█████████████████████████▍ | 164/509 [25:45<1:02:53, 10.94s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|█████████████████████████▌ | 165/509 [25:56<1:02:32, 10.91s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|█████████████████████████▌ | 165/509 [25:56<1:02:32, 10.91s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1561, 'learning_rate': 9.779999999999999e-05, 'epoch': 0.32} + 32%|█████████████████████████▌ | 165/509 [25:56<1:02:32, 10.91s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|█████████████████████████▌ | 165/509 [25:56<1:02:32, 10.91s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|█████████████████████████▌ | 165/509 [25:56<1:02:32, 10.91s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|█████████████████████████▊ | 166/509 [26:06<1:02:09, 10.87s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|█████████████████████████▊ | 166/509 [26:06<1:02:09, 10.87s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|█████████████████████████▊ | 166/509 [26:06<1:02:09, 10.87s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|█████████████████████████▊ | 166/509 [26:06<1:02:09, 10.87s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|█████████████████████████▊ | 166/509 [26:06<1:02:09, 10.87s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|█████████████████████████▉ | 167/509 [26:17<1:01:39, 10.82s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|█████████████████████████▉ | 167/509 [26:17<1:01:39, 10.82s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|█████████████████████████▉ | 167/509 [26:17<1:01:39, 10.82s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|█████████████████████████▉ | 167/509 [26:17<1:01:39, 10.82s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|█████████████████████████▉ | 167/509 [26:17<1:01:39, 10.82s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|██████████████████████████ | 168/509 [26:28<1:01:02, 10.74s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|██████████████████████████ | 168/509 [26:28<1:01:02, 10.74s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|██████████████████████████ | 168/509 [26:28<1:01:02, 10.74s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|██████████████████████████ | 168/509 [26:28<1:01:02, 10.74s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|██████████████████████████▏ | 169/509 [26:38<1:00:32, 10.68s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|██████████████████████████▏ | 169/509 [26:38<1:00:32, 10.68s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2635, 'learning_rate': 0.0001002, 'epoch': 0.33} + 33%|██████████████████████████▏ | 169/509 [26:38<1:00:32, 10.68s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|██████████████████████████▏ | 169/509 [26:38<1:00:32, 10.68s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|███████████████████████████ | 170/509 [26:48<59:44, 10.57s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|███████████████████████████ | 170/509 [26:48<59:44, 10.57s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2645, 'learning_rate': 0.0001008, 'epoch': 0.33} + 33%|███████████████████████████ | 170/509 [26:48<59:44, 10.57s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|███████████████████████████ | 170/509 [26:48<59:44, 10.57s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████▏ | 171/509 [26:59<59:04, 10.49s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████▏ | 171/509 [26:59<59:04, 10.49s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2651, 'learning_rate': 0.0001014, 'epoch': 0.34} + 34%|███████████████████████████▏ | 171/509 [26:59<59:04, 10.49s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████▏ | 171/509 [26:59<59:04, 10.49s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████▎ | 172/509 [27:09<58:22, 10.39s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████▎ | 172/509 [27:09<58:22, 10.39s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1348, 'learning_rate': 0.000102, 'epoch': 0.34} + 34%|███████████████████████████▎ | 172/509 [27:09<58:22, 10.39s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████▎ | 172/509 [27:09<58:22, 10.39s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████▌ | 173/509 [27:19<57:47, 10.32s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████▌ | 173/509 [27:19<57:47, 10.32s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1364, 'learning_rate': 0.0001026, 'epoch': 0.34} + 34%|███████████████████████████▌ | 173/509 [27:19<57:47, 10.32s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████▌ | 173/509 [27:19<57:47, 10.32s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████▌ | 173/509 [27:19<57:47, 10.32s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████▋ | 174/509 [27:29<57:12, 10.25s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████▋ | 174/509 [27:29<57:12, 10.25s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████▋ | 174/509 [27:29<57:12, 10.25s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████▋ | 174/509 [27:29<57:12, 10.25s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████▋ | 174/509 [27:29<57:12, 10.25s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████▊ | 175/509 [27:40<57:21, 10.30s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████▊ | 175/509 [27:40<57:21, 10.30s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████▊ | 175/509 [27:40<57:21, 10.30s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████▊ | 175/509 [27:40<57:21, 10.30s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████▊ | 175/509 [27:40<57:21, 10.30s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████ | 176/509 [27:49<56:23, 10.16s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████ | 176/509 [27:49<56:23, 10.16s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████ | 176/509 [27:49<56:23, 10.16s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████ | 176/509 [27:49<56:23, 10.16s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▏ | 177/509 [27:59<55:25, 10.02s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▏ | 177/509 [27:59<55:25, 10.02s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1352, 'learning_rate': 0.00010499999999999999, 'epoch': 0.35} + 35%|████████████████████████████▏ | 177/509 [27:59<55:25, 10.02s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▏ | 177/509 [27:59<55:25, 10.02s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▎ | 178/509 [28:09<54:44, 9.92s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▎ | 178/509 [28:09<54:44, 9.92s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2304, 'learning_rate': 0.00010559999999999998, 'epoch': 0.35} + 35%|████████████████████████████▎ | 178/509 [28:09<54:44, 9.92s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▎ | 178/509 [28:09<54:44, 9.92s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▎ | 178/509 [28:09<54:44, 9.92s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▍ | 179/509 [28:18<54:01, 9.82s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▍ | 179/509 [28:18<54:01, 9.82s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▍ | 179/509 [28:18<54:01, 9.82s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▍ | 179/509 [28:18<54:01, 9.82s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▋ | 180/509 [28:28<53:05, 9.68s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▋ | 180/509 [28:28<53:05, 9.68s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3465, 'learning_rate': 0.00010679999999999998, 'epoch': 0.35} + 35%|████████████████████████████▋ | 180/509 [28:28<53:05, 9.68s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▋ | 180/509 [28:28<53:05, 9.68s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|████████████████████████████▊ | 181/509 [28:37<52:35, 9.62s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|████████████████████████████▊ | 181/509 [28:37<52:35, 9.62s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1655, 'learning_rate': 0.00010739999999999998, 'epoch': 0.36} + 36%|████████████████████████████▊ | 181/509 [28:37<52:35, 9.62s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|████████████████████████████▊ | 181/509 [28:37<52:35, 9.62s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|████████████████████████████▉ | 182/509 [28:47<51:56, 9.53s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|████████████████████████████▉ | 182/509 [28:47<51:56, 9.53s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:54:57,849 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:54:57,849 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|█████████████████████████████ | 183/509 [28:56<50:58, 9.38s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|█████████████████████████████ | 183/509 [28:56<50:58, 9.38s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1614, 'learning_rate': 0.00010859999999999998, 'epoch': 0.36} + 36%|█████████████████████████████ | 183/509 [28:56<50:58, 9.38s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|█████████████████████████████ | 183/509 [28:56<50:58, 9.38s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|█████████████████████████████▎ | 184/509 [29:04<50:02, 9.24s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|█████████████████████████████▎ | 184/509 [29:04<50:02, 9.24s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2064, 'learning_rate': 0.00010919999999999998, 'epoch': 0.36} + 36%|█████████████████████████████▎ | 184/509 [29:04<50:02, 9.24s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|█████████████████████████████▎ | 184/509 [29:04<50:02, 9.24s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|█████████████████████████████▎ | 184/509 [29:04<50:02, 9.24s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.326, 'learning_rate': 0.00010979999999999999, 'epoch': 0.36} + 36%|█████████████████████████████▎ | 184/509 [29:04<50:02, 9.24s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|█████████████████████████████▎ | 184/509 [29:04<50:02, 9.24s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:55:26,531 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|█████████████████████████████▌ | 186/509 [29:22<48:25, 8.99s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|█████████████████████████████▌ | 186/509 [29:22<48:25, 8.99s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1589, 'learning_rate': 0.00011039999999999999, 'epoch': 0.36} + 37%|█████████████████████████████▌ | 186/509 [29:22<48:25, 8.99s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:55:37,037 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:55:37,037 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2655, 'learning_rate': 0.00011099999999999999, 'epoch': 0.37} +[WARNING|modeling_utils.py:388] 2022-03-02 22:55:37,037 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:55:37,037 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|█████████████████████████████▉ | 188/509 [29:39<46:03, 8.61s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|█████████████████████████████▉ | 188/509 [29:39<46:03, 8.61s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2708, 'learning_rate': 0.00011159999999999999, 'epoch': 0.37} + 37%|█████████████████████████████▉ | 188/509 [29:39<46:03, 8.61s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|█████████████████████████████▉ | 188/509 [29:39<46:03, 8.61s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|██████████████████████████████ | 189/509 [29:46<44:48, 8.40s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|██████████████████████████████ | 189/509 [29:46<44:48, 8.40s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3677, 'learning_rate': 0.00011219999999999999, 'epoch': 0.37} + 37%|██████████████████████████████ | 189/509 [29:46<44:48, 8.40s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|██████████████████████████████ | 189/509 [29:46<44:48, 8.40s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|██████████████████████████████▏ | 190/509 [29:54<43:17, 8.14s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|██████████████████████████████▏ | 190/509 [29:54<43:17, 8.14s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2505, 'learning_rate': 0.00011279999999999999, 'epoch': 0.37} +[WARNING|modeling_utils.py:388] 2022-03-02 22:56:05,975 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▍ | 191/509 [30:01<41:21, 7.80s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▍ | 191/509 [30:01<41:21, 7.80s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4483, 'learning_rate': 0.00011339999999999999, 'epoch': 0.37} + 38%|██████████████████████████████▍ | 191/509 [30:01<41:21, 7.80s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▍ | 191/509 [30:01<41:21, 7.80s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▍ | 191/509 [30:01<41:21, 7.80s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▌ | 192/509 [30:08<39:21, 7.45s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:56:17,427 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:56:17,427 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:56:17,427 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▋ | 193/509 [30:14<37:07, 7.05s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:56:23,375 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:56:26,076 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:56:26,076 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3805, 'learning_rate': 0.0001152, 'epoch': 0.38} +[WARNING|modeling_utils.py:388] 2022-03-02 22:56:30,055 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|███████████████████████████████ | 195/509 [30:25<32:28, 6.21s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|███████████████████████████████ | 195/509 [30:25<32:28, 6.21s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:56:33,678 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:56:35,829 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:56:35,829 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:56:37,925 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:56:39,840 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:56:39,840 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:56:41,734 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:56:43,445 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:56:43,445 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:56:45,090 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:56:45,090 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:56:47,888 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:56:49,568 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:56:49,568 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6075, 'learning_rate': 0.0001188, 'epoch': 0.39} +[WARNING|modeling_utils.py:388] 2022-03-02 22:56:55,818 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 22:56:55,818 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 39%|███████████████████████████████▉ | 201/509 [30:55<32:51, 6.40s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 39%|███████████████████████████████▉ | 201/509 [30:55<32:51, 6.40s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3892, 'learning_rate': 0.0001194, 'epoch': 0.39} + 39%|███████████████████████████████▉ | 201/509 [30:55<32:51, 6.40s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 39%|███████████████████████████████▉ | 201/509 [30:55<32:51, 6.40s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▏ | 202/509 [31:07<40:58, 8.01s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▏ | 202/509 [31:07<40:58, 8.01s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3396, 'learning_rate': 0.00011999999999999999, 'epoch': 0.4} + 40%|████████████████████████████████▏ | 202/509 [31:07<40:58, 8.01s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▏ | 202/509 [31:07<40:58, 8.01s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▎ | 203/509 [31:18<46:14, 9.07s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▎ | 203/509 [31:18<46:14, 9.07s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3076, 'learning_rate': 0.00012059999999999999, 'epoch': 0.4} + 40%|████████████████████████████████▎ | 203/509 [31:18<46:14, 9.07s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▎ | 203/509 [31:18<46:14, 9.07s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▍ | 204/509 [31:30<49:41, 9.78s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▍ | 204/509 [31:30<49:41, 9.78s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3174, 'learning_rate': 0.00012119999999999999, 'epoch': 0.4} + 40%|████████████████████████████████▍ | 204/509 [31:30<49:41, 9.78s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▍ | 204/509 [31:30<49:41, 9.78s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▍ | 204/509 [31:30<49:41, 9.78s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▌ | 205/509 [31:41<51:50, 10.23s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▌ | 205/509 [31:41<51:50, 10.23s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▌ | 205/509 [31:41<51:50, 10.23s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▌ | 205/509 [31:41<51:50, 10.23s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▌ | 205/509 [31:41<51:50, 10.23s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▊ | 206/509 [31:53<53:20, 10.56s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▊ | 206/509 [31:53<53:20, 10.56s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▊ | 206/509 [31:53<53:20, 10.56s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▊ | 206/509 [31:53<53:20, 10.56s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▊ | 206/509 [31:53<53:20, 10.56s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▊ | 206/509 [31:53<53:20, 10.56s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1905, 'learning_rate': 0.00012299999999999998, 'epoch': 0.41} + 40%|████████████████████████████████▊ | 206/509 [31:53<53:20, 10.56s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▊ | 206/509 [31:53<53:20, 10.56s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▊ | 206/509 [31:53<53:20, 10.56s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▊ | 206/509 [31:53<53:20, 10.56s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████ | 208/509 [32:15<54:26, 10.85s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████ | 208/509 [32:15<54:26, 10.85s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████ | 208/509 [32:15<54:26, 10.85s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████ | 208/509 [32:15<54:26, 10.85s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████▎ | 209/509 [32:26<54:33, 10.91s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████▎ | 209/509 [32:26<54:33, 10.91s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2853, 'learning_rate': 0.00012419999999999998, 'epoch': 0.41} + 41%|█████████████████████████████████▎ | 209/509 [32:26<54:33, 10.91s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████▎ | 209/509 [32:26<54:33, 10.91s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████▎ | 209/509 [32:26<54:33, 10.91s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████▎ | 209/509 [32:26<54:33, 10.91s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4103, 'learning_rate': 0.00012479999999999997, 'epoch': 0.41} + 41%|█████████████████████████████████▎ | 209/509 [32:26<54:33, 10.91s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████▎ | 209/509 [32:26<54:33, 10.91s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████▎ | 209/509 [32:26<54:33, 10.91s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████▎ | 209/509 [32:26<54:33, 10.91s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████▌ | 211/509 [32:48<54:17, 10.93s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████▌ | 211/509 [32:48<54:17, 10.93s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████▌ | 211/509 [32:48<54:17, 10.93s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████▌ | 211/509 [32:48<54:17, 10.93s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|█████████████████████████████████▋ | 212/509 [32:59<53:52, 10.88s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|█████████████████████████████████▋ | 212/509 [32:59<53:52, 10.88s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2824, 'learning_rate': 0.00012599999999999997, 'epoch': 0.42} + 42%|█████████████████████████████████▋ | 212/509 [32:59<53:52, 10.88s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|█████████████████████████████████▋ | 212/509 [32:59<53:52, 10.88s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|█████████████████████████████████▉ | 213/509 [33:09<53:31, 10.85s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|█████████████████████████████████▉ | 213/509 [33:09<53:31, 10.85s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3676, 'learning_rate': 0.0001266, 'epoch': 0.42} + 42%|█████████████████████████████████▉ | 213/509 [33:09<53:31, 10.85s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|█████████████████████████████████▉ | 213/509 [33:09<53:31, 10.85s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|██████████████████████████████████ | 214/509 [33:20<53:09, 10.81s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|██████████████████████████████████ | 214/509 [33:20<53:09, 10.81s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3831, 'learning_rate': 0.00012719999999999997, 'epoch': 0.42} + 42%|██████████████████████████████████ | 214/509 [33:20<53:09, 10.81s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|██████████████████████████████████ | 214/509 [33:20<53:09, 10.81s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|██████████████████████████████████ | 214/509 [33:20<53:09, 10.81s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|██████████████████████████████████ | 214/509 [33:20<53:09, 10.81s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|██████████████████████████████████ | 214/509 [33:20<53:09, 10.81s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2179, 'learning_rate': 0.0001278, 'epoch': 0.42} + 42%|██████████████████████████████████ | 214/509 [33:20<53:09, 10.81s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|██████████████████████████████████ | 214/509 [33:20<53:09, 10.81s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|██████████████████████████████████ | 214/509 [33:20<53:09, 10.81s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|██████████████████████████████████▎ | 216/509 [33:41<52:14, 10.70s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|██████████████████████████████████▎ | 216/509 [33:41<52:14, 10.70s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|██████████████████████████████████▎ | 216/509 [33:41<52:14, 10.70s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|██████████████████████████████████▎ | 216/509 [33:41<52:14, 10.70s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|██████████████████████████████████▎ | 216/509 [33:41<52:14, 10.70s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▌ | 217/509 [33:52<51:45, 10.63s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▌ | 217/509 [33:52<51:45, 10.63s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▌ | 217/509 [33:52<51:45, 10.63s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▌ | 217/509 [33:52<51:45, 10.63s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▌ | 217/509 [33:52<51:45, 10.63s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▋ | 218/509 [34:02<51:13, 10.56s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▋ | 218/509 [34:02<51:13, 10.56s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▋ | 218/509 [34:02<51:13, 10.56s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▋ | 218/509 [34:02<51:13, 10.56s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▋ | 218/509 [34:02<51:13, 10.56s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▊ | 219/509 [34:12<50:43, 10.50s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▊ | 219/509 [34:12<50:43, 10.50s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▊ | 219/509 [34:12<50:43, 10.50s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▊ | 219/509 [34:12<50:43, 10.50s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▊ | 219/509 [34:12<50:43, 10.50s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|███████████████████████████████████ | 220/509 [34:23<50:20, 10.45s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|███████████████████████████████████ | 220/509 [34:23<50:20, 10.45s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|███████████████████████████████████ | 220/509 [34:23<50:20, 10.45s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|███████████████████████████████████ | 220/509 [34:23<50:20, 10.45s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|███████████████████████████████████ | 220/509 [34:23<50:20, 10.45s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|███████████████████████████████████▏ | 221/509 [34:33<49:44, 10.36s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|███████████████████████████████████▏ | 221/509 [34:33<49:44, 10.36s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|███████████████████████████████████▏ | 221/509 [34:33<49:44, 10.36s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|███████████████████████████████████▏ | 221/509 [34:33<49:44, 10.36s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|███████████████████████████████████▏ | 221/509 [34:33<49:44, 10.36s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▎ | 222/509 [34:43<49:11, 10.28s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▎ | 222/509 [34:43<49:11, 10.28s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▎ | 222/509 [34:43<49:11, 10.28s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▎ | 222/509 [34:43<49:11, 10.28s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▎ | 222/509 [34:43<49:11, 10.28s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▍ | 223/509 [34:53<48:47, 10.24s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▍ | 223/509 [34:53<48:47, 10.24s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▍ | 223/509 [34:53<48:47, 10.24s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▍ | 223/509 [34:53<48:47, 10.24s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▋ | 224/509 [35:03<48:18, 10.17s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▋ | 224/509 [35:03<48:18, 10.17s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3202, 'learning_rate': 0.00013319999999999999, 'epoch': 0.44} + 44%|███████████████████████████████████▋ | 224/509 [35:03<48:18, 10.17s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▋ | 224/509 [35:03<48:18, 10.17s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▊ | 225/509 [35:14<48:42, 10.29s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▊ | 225/509 [35:14<48:42, 10.29s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4132, 'learning_rate': 0.0001338, 'epoch': 0.44} + 44%|███████████████████████████████████▊ | 225/509 [35:14<48:42, 10.29s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▊ | 225/509 [35:14<48:42, 10.29s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▊ | 225/509 [35:14<48:42, 10.29s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▊ | 225/509 [35:14<48:42, 10.29s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▉ | 226/509 [35:24<48:03, 10.19s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▉ | 226/509 [35:24<48:03, 10.19s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▉ | 226/509 [35:24<48:03, 10.19s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▉ | 226/509 [35:24<48:03, 10.19s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████ | 227/509 [35:33<47:18, 10.07s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████ | 227/509 [35:33<47:18, 10.07s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4208, 'learning_rate': 0.000135, 'epoch': 0.45} + 45%|████████████████████████████████████ | 227/509 [35:33<47:18, 10.07s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████ | 227/509 [35:33<47:18, 10.07s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▎ | 228/509 [35:43<46:40, 9.97s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▎ | 228/509 [35:43<46:40, 9.97s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1902, 'learning_rate': 0.0001356, 'epoch': 0.45} + 45%|████████████████████████████████████▎ | 228/509 [35:43<46:40, 9.97s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▎ | 228/509 [35:43<46:40, 9.97s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▍ | 229/509 [35:53<46:02, 9.86s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▍ | 229/509 [35:53<46:02, 9.86s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4058, 'learning_rate': 0.0001362, 'epoch': 0.45} + 45%|████████████████████████████████████▍ | 229/509 [35:53<46:02, 9.86s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▍ | 229/509 [35:53<46:02, 9.86s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▌ | 230/509 [36:02<45:27, 9.78s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▌ | 230/509 [36:02<45:27, 9.78s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4925, 'learning_rate': 0.0001368, 'epoch': 0.45} + 45%|████████████████████████████████████▌ | 230/509 [36:02<45:27, 9.78s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▌ | 230/509 [36:02<45:27, 9.78s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▊ | 231/509 [36:12<44:47, 9.67s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▊ | 231/509 [36:12<44:47, 9.67s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3905, 'learning_rate': 0.0001374, 'epoch': 0.45} + 45%|████████████████████████████████████▊ | 231/509 [36:12<44:47, 9.67s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▊ | 231/509 [36:12<44:47, 9.67s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▊ | 231/509 [36:12<44:47, 9.67s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|████████████████████████████████████▉ | 232/509 [36:21<44:07, 9.56s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|████████████████████████████████████▉ | 232/509 [36:21<44:07, 9.56s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|████████████████████████████████████▉ | 232/509 [36:21<44:07, 9.56s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|████████████████████████████████████▉ | 232/509 [36:21<44:07, 9.56s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|████████████████████████████████████▉ | 232/509 [36:21<44:07, 9.56s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████ | 233/509 [36:30<43:40, 9.50s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████ | 233/509 [36:30<43:40, 9.50s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████ | 233/509 [36:30<43:40, 9.50s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████ | 233/509 [36:30<43:40, 9.50s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████▏ | 234/509 [36:40<42:58, 9.38s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████▏ | 234/509 [36:40<42:58, 9.38s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1011, 'learning_rate': 0.0001392, 'epoch': 0.46} + 46%|█████████████████████████████████████▏ | 234/509 [36:40<42:58, 9.38s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████▏ | 234/509 [36:40<42:58, 9.38s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████▏ | 234/509 [36:40<42:58, 9.38s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████▍ | 235/509 [36:49<42:19, 9.27s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████▍ | 235/509 [36:49<42:19, 9.27s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████▍ | 235/509 [36:49<42:19, 9.27s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████▍ | 235/509 [36:49<42:19, 9.27s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████▍ | 235/509 [36:49<42:19, 9.27s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████▌ | 236/509 [36:57<41:38, 9.15s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████▌ | 236/509 [36:57<41:38, 9.15s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████▌ | 236/509 [36:57<41:38, 9.15s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████▌ | 236/509 [36:57<41:38, 9.15s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|█████████████████████████████████████▋ | 237/509 [37:06<40:46, 8.99s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|█████████████████████████████████████▋ | 237/509 [37:06<40:46, 8.99s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3442, 'learning_rate': 0.00014099999999999998, 'epoch': 0.46} + 47%|█████████████████████████████████████▋ | 237/509 [37:06<40:46, 8.99s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|█████████████████████████████████████▋ | 237/509 [37:06<40:46, 8.99s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|█████████████████████████████████████▊ | 238/509 [37:14<39:43, 8.80s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|█████████████████████████████████████▊ | 238/509 [37:14<39:43, 8.80s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1006, 'learning_rate': 0.00014159999999999997, 'epoch': 0.47} + 47%|█████████████████████████████████████▊ | 238/509 [37:14<39:43, 8.80s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|█████████████████████████████████████▊ | 238/509 [37:14<39:43, 8.80s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|██████████████████████████████████████ | 239/509 [37:23<38:39, 8.59s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|██████████████████████████████████████ | 239/509 [37:23<38:39, 8.59s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4731, 'learning_rate': 0.0001422, 'epoch': 0.47} + 47%|██████████████████████████████████████ | 239/509 [37:23<38:39, 8.59s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|██████████████████████████████████████ | 239/509 [37:23<38:39, 8.59s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|██████████████████████████████████████▏ | 240/509 [37:30<37:22, 8.34s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|██████████████████████████████████████▏ | 240/509 [37:30<37:22, 8.34s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3852, 'learning_rate': 0.00014279999999999997, 'epoch': 0.47} + 47%|██████████████████████████████████████▏ | 240/509 [37:30<37:22, 8.34s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|██████████████████████████████████████▏ | 240/509 [37:30<37:22, 8.34s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|██████████████████████████████████████▏ | 240/509 [37:30<37:22, 8.34s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|██████████████████████████████████████▎ | 241/509 [37:38<35:55, 8.04s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:03:47,964 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:03:47,964 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|██████████████████████████████████████▌ | 242/509 [37:45<34:20, 7.72s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|██████████████████████████████████████▌ | 242/509 [37:45<34:20, 7.72s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3491, 'learning_rate': 0.00014399999999999998, 'epoch': 0.47} +[WARNING|modeling_utils.py:388] 2022-03-02 23:03:56,168 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|██████████████████████████████████████▋ | 243/509 [37:51<32:32, 7.34s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|██████████████████████████████████████▋ | 243/509 [37:51<32:32, 7.34s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.382, 'learning_rate': 0.0001446, 'epoch': 0.48} + 48%|██████████████████████████████████████▋ | 243/509 [37:51<32:32, 7.34s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:04:03,824 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:04:03,824 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3693, 'learning_rate': 0.00014519999999999998, 'epoch': 0.48} +[WARNING|modeling_utils.py:388] 2022-03-02 23:04:08,041 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:04:08,041 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|██████████████████████████████████████▉ | 245/509 [38:03<28:42, 6.52s/it]g-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:04:11,943 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:04:14,261 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:04:14,261 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3659, 'learning_rate': 0.00014639999999999998, 'epoch': 0.48} +[WARNING|modeling_utils.py:388] 2022-03-02 23:04:17,631 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:04:17,631 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 22:48:45,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 49%|███████████████████████████████████████▎ | 247/509 [38:12<24:12, 5.55s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:19,755 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:04:21,577 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:04:19,755 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:04:21,577 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:04:19,755 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 49%|███████████████████████████████████████▍ | 248/509 [38:16<21:51, 5.02s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:23,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 49%|███████████████████████████████████████▍ | 248/509 [38:16<21:51, 5.02s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:23,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 49%|███████████████████████████████████████▌ | 249/509 [38:19<19:24, 4.48s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:26,450 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:04:27,697 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:04:26,450 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:04:27,697 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:04:26,450 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 49%|███████████████████████████████████████▊ | 250/509 [38:22<17:37, 4.08s/it]g-point operations will not be computed-02 23:04:26,450 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 49%|███████████████████████████████████████▊ | 250/509 [38:22<17:37, 4.08s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 49%|███████████████████████████████████████▊ | 250/509 [38:22<17:37, 4.08s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 49%|███████████████████████████████████████▊ | 250/509 [38:22<17:37, 4.08s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 49%|███████████████████████████████████████▊ | 250/509 [38:22<17:37, 4.08s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 49%|███████████████████████████████████████▉ | 251/509 [38:34<28:05, 6.53s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 49%|███████████████████████████████████████▉ | 251/509 [38:34<28:05, 6.53s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 49%|███████████████████████████████████████▉ | 251/509 [38:34<28:05, 6.53s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 49%|███████████████████████████████████████▉ | 251/509 [38:34<28:05, 6.53s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████ | 252/509 [38:46<34:46, 8.12s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████ | 252/509 [38:46<34:46, 8.12s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4688, 'learning_rate': 0.00015, 'epoch': 0.49} + 50%|████████████████████████████████████████ | 252/509 [38:46<34:46, 8.12s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████ | 252/509 [38:46<34:46, 8.12s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▎ | 253/509 [38:58<39:09, 9.18s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▎ | 253/509 [38:58<39:09, 9.18s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4845, 'learning_rate': 0.00015059999999999997, 'epoch': 0.5} + 50%|████████████████████████████████████████▎ | 253/509 [38:58<39:09, 9.18s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▎ | 253/509 [38:58<39:09, 9.18s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▎ | 253/509 [38:58<39:09, 9.18s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▍ | 254/509 [39:10<42:14, 9.94s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▍ | 254/509 [39:10<42:14, 9.94s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▍ | 254/509 [39:10<42:14, 9.94s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▍ | 254/509 [39:10<42:14, 9.94s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▌ | 255/509 [39:21<44:02, 10.40s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▌ | 255/509 [39:21<44:02, 10.40s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4156, 'learning_rate': 0.00015179999999999998, 'epoch': 0.5} + 50%|████████████████████████████████████████▌ | 255/509 [39:21<44:02, 10.40s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▌ | 255/509 [39:21<44:02, 10.40s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▌ | 255/509 [39:21<44:02, 10.40s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▋ | 256/509 [39:32<45:10, 10.71s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▋ | 256/509 [39:32<45:10, 10.71s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▋ | 256/509 [39:32<45:10, 10.71s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▋ | 256/509 [39:32<45:10, 10.71s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▋ | 256/509 [39:32<45:10, 10.71s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▉ | 257/509 [39:44<45:49, 10.91s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▉ | 257/509 [39:44<45:49, 10.91s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▉ | 257/509 [39:44<45:49, 10.91s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▉ | 257/509 [39:44<45:49, 10.91s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + [WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + [WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4527, 'learning_rate': 0.0001536, 'epoch': 0.51} + [WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + [WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + [WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + [WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████▏ | 259/509 [40:06<46:03, 11.06s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████▏ | 259/509 [40:06<46:03, 11.06s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████▏ | 259/509 [40:06<46:03, 11.06s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████▏ | 259/509 [40:06<46:03, 11.06s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████▍ | 260/509 [40:17<46:00, 11.09s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████▍ | 260/509 [40:17<46:00, 11.09s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4085, 'learning_rate': 0.0001548, 'epoch': 0.51} + 51%|█████████████████████████████████████████▍ | 260/509 [40:17<46:00, 11.09s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████▍ | 260/509 [40:17<46:00, 11.09s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████▍ | 260/509 [40:17<46:00, 11.09s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████▌ | 261/509 [40:28<45:36, 11.03s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████▌ | 261/509 [40:28<45:36, 11.03s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████▌ | 261/509 [40:28<45:36, 11.03s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████▌ | 261/509 [40:28<45:36, 11.03s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████▋ | 262/509 [40:39<45:24, 11.03s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████▋ | 262/509 [40:39<45:24, 11.03s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2291, 'learning_rate': 0.000156, 'epoch': 0.51} + 51%|█████████████████████████████████████████▋ | 262/509 [40:39<45:24, 11.03s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████▋ | 262/509 [40:39<45:24, 11.03s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + [WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + [WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3193, 'learning_rate': 0.00015659999999999998, 'epoch': 0.52} + [WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + [WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + [WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + [WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████ | 264/509 [41:01<44:50, 10.98s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████ | 264/509 [41:01<44:50, 10.98s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████ | 264/509 [41:01<44:50, 10.98s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████ | 264/509 [41:01<44:50, 10.98s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████▏ | 265/509 [41:12<44:24, 10.92s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████▏ | 265/509 [41:12<44:24, 10.92s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4207, 'learning_rate': 0.0001578, 'epoch': 0.52} + 52%|██████████████████████████████████████████▏ | 265/509 [41:12<44:24, 10.92s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████▏ | 265/509 [41:12<44:24, 10.92s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████▏ | 265/509 [41:12<44:24, 10.92s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████▎ | 266/509 [41:23<44:07, 10.89s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████▎ | 266/509 [41:23<44:07, 10.89s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████▎ | 266/509 [41:23<44:07, 10.89s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████▎ | 266/509 [41:23<44:07, 10.89s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████▎ | 266/509 [41:23<44:07, 10.89s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████▍ | 267/509 [41:34<43:40, 10.83s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████▍ | 267/509 [41:34<43:40, 10.83s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████▍ | 267/509 [41:34<43:40, 10.83s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████▍ | 267/509 [41:34<43:40, 10.83s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████▍ | 267/509 [41:34<43:40, 10.83s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████▍ | 267/509 [41:34<43:40, 10.83s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3651, 'learning_rate': 0.0001596, 'epoch': 0.53} + 52%|██████████████████████████████████████████▍ | 267/509 [41:34<43:40, 10.83s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████▍ | 267/509 [41:34<43:40, 10.83s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████▍ | 267/509 [41:34<43:40, 10.83s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████▍ | 267/509 [41:34<43:40, 10.83s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|██████████████████████████████████████████▊ | 269/509 [41:55<42:54, 10.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|██████████████████████████████████████████▊ | 269/509 [41:55<42:54, 10.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|██████████████████████████████████████████▊ | 269/509 [41:55<42:54, 10.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|██████████████████████████████████████████▊ | 269/509 [41:55<42:54, 10.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|██████████████████████████████████████████▊ | 269/509 [41:55<42:54, 10.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|██████████████████████████████████████████▉ | 270/509 [42:05<42:24, 10.65s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|██████████████████████████████████████████▉ | 270/509 [42:05<42:24, 10.65s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|██████████████████████████████████████████▉ | 270/509 [42:05<42:24, 10.65s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|██████████████████████████████████████████▉ | 270/509 [42:05<42:24, 10.65s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|██████████████████████████████████████████▉ | 270/509 [42:05<42:24, 10.65s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|███████████████████████████████████████████▏ | 271/509 [42:16<41:50, 10.55s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|███████████████████████████████████████████▏ | 271/509 [42:16<41:50, 10.55s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|███████████████████████████████████████████▏ | 271/509 [42:16<41:50, 10.55s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|███████████████████████████████████████████▏ | 271/509 [42:16<41:50, 10.55s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|███████████████████████████████████████████▏ | 271/509 [42:16<41:50, 10.55s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|███████████████████████████████████████████▎ | 272/509 [42:26<41:22, 10.48s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|███████████████████████████████████████████▎ | 272/509 [42:26<41:22, 10.48s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|███████████████████████████████████████████▎ | 272/509 [42:26<41:22, 10.48s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|███████████████████████████████████████████▎ | 272/509 [42:26<41:22, 10.48s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|███████████████████████████████████████████▎ | 272/509 [42:26<41:22, 10.48s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▍ | 273/509 [42:36<40:55, 10.40s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▍ | 273/509 [42:36<40:55, 10.40s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▍ | 273/509 [42:36<40:55, 10.40s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▍ | 273/509 [42:36<40:55, 10.40s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▍ | 273/509 [42:36<40:55, 10.40s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▌ | 274/509 [42:46<40:22, 10.31s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▌ | 274/509 [42:46<40:22, 10.31s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▌ | 274/509 [42:46<40:22, 10.31s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▌ | 274/509 [42:46<40:22, 10.31s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▊ | 275/509 [42:57<40:37, 10.42s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▊ | 275/509 [42:57<40:37, 10.42s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4815, 'learning_rate': 0.0001638, 'epoch': 0.54} + 54%|███████████████████████████████████████████▊ | 275/509 [42:57<40:37, 10.42s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▊ | 275/509 [42:57<40:37, 10.42s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▊ | 275/509 [42:57<40:37, 10.42s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▉ | 276/509 [43:07<40:01, 10.30s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▉ | 276/509 [43:07<40:01, 10.30s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▉ | 276/509 [43:07<40:01, 10.30s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▉ | 276/509 [43:07<40:01, 10.30s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|████████████████████████████████████████████ | 277/509 [43:17<39:28, 10.21s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|████████████████████████████████████████████ | 277/509 [43:17<39:28, 10.21s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6488, 'learning_rate': 0.000165, 'epoch': 0.54} + 54%|████████████████████████████████████████████ | 277/509 [43:17<39:28, 10.21s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|████████████████████████████████████████████ | 277/509 [43:17<39:28, 10.21s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|████████████████████████████████████████████ | 277/509 [43:17<39:28, 10.21s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|████████████████████████████████████████████ | 277/509 [43:17<39:28, 10.21s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4677, 'learning_rate': 0.0001656, 'epoch': 0.55} + 54%|████████████████████████████████████████████ | 277/509 [43:17<39:28, 10.21s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|████████████████████████████████████████████ | 277/509 [43:17<39:28, 10.21s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|████████████████████████████████████████████ | 277/509 [43:17<39:28, 10.21s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▍ | 279/509 [43:37<38:25, 10.02s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▍ | 279/509 [43:37<38:25, 10.02s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▍ | 279/509 [43:37<38:25, 10.02s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▍ | 279/509 [43:37<38:25, 10.02s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▍ | 279/509 [43:37<38:25, 10.02s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▌ | 280/509 [43:46<37:46, 9.90s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▌ | 280/509 [43:46<37:46, 9.90s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▌ | 280/509 [43:46<37:46, 9.90s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▌ | 280/509 [43:46<37:46, 9.90s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▋ | 281/509 [43:56<37:17, 9.81s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▋ | 281/509 [43:56<37:17, 9.81s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4123, 'learning_rate': 0.0001674, 'epoch': 0.55} + 55%|████████████████████████████████████████████▋ | 281/509 [43:56<37:17, 9.81s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▋ | 281/509 [43:56<37:17, 9.81s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▋ | 281/509 [43:56<37:17, 9.81s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▉ | 282/509 [44:05<36:36, 9.68s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▉ | 282/509 [44:05<36:36, 9.68s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▉ | 282/509 [44:05<36:36, 9.68s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▉ | 282/509 [44:05<36:36, 9.68s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|█████████████████████████████████████████████ | 283/509 [44:15<35:56, 9.54s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|█████████████████████████████████████████████ | 283/509 [44:15<35:56, 9.54s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5083, 'learning_rate': 0.0001686, 'epoch': 0.56} + 56%|█████████████████████████████████████████████ | 283/509 [44:15<35:56, 9.54s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|█████████████████████████████████████████████ | 283/509 [44:15<35:56, 9.54s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|█████████████████████████████████████████████ | 283/509 [44:15<35:56, 9.54s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|█████████████████████████████████████████████▏ | 284/509 [44:24<35:18, 9.41s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|█████████████████████████████████████████████▏ | 284/509 [44:24<35:18, 9.41s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|█████████████████████████████████████████████▏ | 284/509 [44:24<35:18, 9.41s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|█████████████████████████████████████████████▏ | 284/509 [44:24<35:18, 9.41s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|█████████████████████████████████████████████▎ | 285/509 [44:33<34:41, 9.29s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|█████████████████████████████████████████████▎ | 285/509 [44:33<34:41, 9.29s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4569, 'learning_rate': 0.00016979999999999998, 'epoch': 0.56} + 56%|█████████████████████████████████████████████▎ | 285/509 [44:33<34:41, 9.29s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|█████████████████████████████████████████████▎ | 285/509 [44:33<34:41, 9.29s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|█████████████████████████████████████████████▌ | 286/509 [44:41<34:00, 9.15s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|█████████████████████████████████████████████▌ | 286/509 [44:41<34:00, 9.15s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4092, 'learning_rate': 0.00017039999999999997, 'epoch': 0.56} + 56%|█████████████████████████████████████████████▌ | 286/509 [44:41<34:00, 9.15s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|█████████████████████████████████████████████▌ | 286/509 [44:41<34:00, 9.15s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|█████████████████████████████████████████████▋ | 287/509 [44:50<33:16, 9.00s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|█████████████████████████████████████████████▋ | 287/509 [44:50<33:16, 9.00s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4716, 'learning_rate': 0.00017099999999999998, 'epoch': 0.56} + 56%|█████████████████████████████████████████████▋ | 287/509 [44:50<33:16, 9.00s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|█████████████████████████████████████████████▋ | 287/509 [44:50<33:16, 9.00s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|█████████████████████████████████████████████▋ | 287/509 [44:50<33:16, 9.00s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 57%|█████████████████████████████████████████████▊ | 288/509 [44:58<32:24, 8.80s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 57%|█████████████████████████████████████████████▊ | 288/509 [44:58<32:24, 8.80s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 57%|█████████████████████████████████████████████▊ | 288/509 [44:58<32:24, 8.80s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 57%|█████████████████████████████████████████████▊ | 288/509 [44:58<32:24, 8.80s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 57%|█████████████████████████████████████████████▊ | 288/509 [44:58<32:24, 8.80s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 57%|█████████████████████████████████████████████▉ | 289/509 [45:06<31:19, 8.54s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 57%|█████████████████████████████████████████████▉ | 289/509 [45:06<31:19, 8.54s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 57%|█████████████████████████████████████████████▉ | 289/509 [45:06<31:19, 8.54s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 57%|█████████████████████████████████████████████▉ | 289/509 [45:06<31:19, 8.54s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 57%|█████████████████████████████████████████████▉ | 289/509 [45:06<31:19, 8.54s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 57%|██████████████████████████████████████████████▏ | 290/509 [45:14<29:58, 8.21s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 57%|██████████████████████████████████████████████▏ | 290/509 [45:14<29:58, 8.21s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:11:25,846 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 57%|██████████████████████████████████████████████▎ | 291/509 [45:21<28:33, 7.86s/it]g-point operations will not be computed-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 57%|██████████████████████████████████████████████▎ | 291/509 [45:21<28:33, 7.86s/it]g-point operations will not be computed-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2536, 'learning_rate': 0.00017339999999999996, 'epoch': 0.57} + 57%|██████████████████████████████████████████████▎ | 291/509 [45:21<28:33, 7.86s/it]g-point operations will not be computed-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:11:34,188 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:11:34,188 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5786, 'learning_rate': 0.00017399999999999997, 'epoch': 0.57} +[WARNING|modeling_utils.py:388] 2022-03-02 23:11:34,188 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:11:34,188 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:11:34,188 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:04:32,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 58%|██████████████████████████████████████████████▋ | 293/509 [45:34<25:27, 7.07s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:11:41,787 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 58%|██████████████████████████████████████████████▋ | 293/509 [45:34<25:27, 7.07s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:11:41,787 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 58%|██████████████████████████████████████████████▋ | 293/509 [45:34<25:27, 7.07s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:11:41,787 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:11:45,822 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:11:41,787 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:11:45,822 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:11:41,787 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:11:49,536 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:11:41,787 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:11:49,536 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:11:41,787 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 58%|██████████████████████████████████████████████▉ | 295/509 [45:44<21:43, 6.09s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:11:51,892 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:11:53,983 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:11:51,892 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:11:53,983 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:11:51,892 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 58%|███████████████████████████████████████████████ | 296/509 [45:48<19:44, 5.56s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:11:56,092 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:11:58,004 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:11:56,092 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:11:58,004 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:11:56,092 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 58%|███████████████████████████████████████████████▎ | 297/509 [45:52<17:54, 5.07s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:11:59,890 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:12:01,588 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:11:59,890 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:12:01,588 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:11:59,890 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|███████████████████████████████████████████████▍ | 298/509 [45:56<16:07, 4.59s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:03,253 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|███████████████████████████████████████████████▍ | 298/509 [45:56<16:07, 4.59s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:03,253 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|███████████████████████████████████████████████▌ | 299/509 [45:59<14:21, 4.10s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:06,144 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:12:07,345 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:12:06,144 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:12:07,345 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:12:06,144 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|███████████████████████████████████████████████▋ | 300/509 [46:02<13:16, 3.81s/it]g-point operations will not be computed-02 23:12:06,144 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|███████████████████████████████████████████████▋ | 300/509 [46:02<13:16, 3.81s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|███████████████████████████████████████████████▋ | 300/509 [46:02<13:16, 3.81s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|███████████████████████████████████████████████▋ | 300/509 [46:02<13:16, 3.81s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|███████████████████████████████████████████████▉ | 301/509 [46:14<21:53, 6.31s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|███████████████████████████████████████████████▉ | 301/509 [46:14<21:53, 6.31s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5427, 'learning_rate': 0.00017939999999999997, 'epoch': 0.59} + 59%|███████████████████████████████████████████████▉ | 301/509 [46:14<21:53, 6.31s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|███████████████████████████████████████████████▉ | 301/509 [46:14<21:53, 6.31s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|███████████████████████████████████████████████▉ | 301/509 [46:14<21:53, 6.31s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|████████████████████████████████████████████████ | 302/509 [46:26<27:13, 7.89s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|████████████████████████████████████████████████ | 302/509 [46:26<27:13, 7.89s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|████████████████████████████████████████████████ | 302/509 [46:26<27:13, 7.89s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|████████████████████████████████████████████████ | 302/509 [46:26<27:13, 7.89s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|████████████████████████████████████████████████ | 302/509 [46:26<27:13, 7.89s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|████████████████████████████████████████████████ | 302/509 [46:26<27:13, 7.89s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.543, 'learning_rate': 0.00018059999999999997, 'epoch': 0.59} + 59%|████████████████████████████████████████████████ | 302/509 [46:26<27:13, 7.89s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|████████████████████████████████████████████████ | 302/509 [46:26<27:13, 7.89s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|████████████████████████████████████████████████ | 302/509 [46:26<27:13, 7.89s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▍ | 304/509 [46:48<33:10, 9.71s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▍ | 304/509 [46:48<33:10, 9.71s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4208, 'learning_rate': 0.00018119999999999999, 'epoch': 0.6} + 60%|████████████████████████████████████████████████▍ | 304/509 [46:48<33:10, 9.71s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▍ | 304/509 [46:48<33:10, 9.71s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▌ | 305/509 [47:00<34:46, 10.23s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▌ | 305/509 [47:00<34:46, 10.23s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2905, 'learning_rate': 0.00018179999999999997, 'epoch': 0.6} + 60%|████████████████████████████████████████████████▌ | 305/509 [47:00<34:46, 10.23s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▌ | 305/509 [47:00<34:46, 10.23s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▌ | 305/509 [47:00<34:46, 10.23s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▋ | 306/509 [47:11<35:39, 10.54s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▋ | 306/509 [47:11<35:39, 10.54s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▋ | 306/509 [47:11<35:39, 10.54s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▋ | 306/509 [47:11<35:39, 10.54s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▋ | 306/509 [47:11<35:39, 10.54s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▊ | 307/509 [47:22<36:16, 10.77s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▊ | 307/509 [47:22<36:16, 10.77s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▊ | 307/509 [47:22<36:16, 10.77s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▊ | 307/509 [47:22<36:16, 10.77s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▊ | 307/509 [47:22<36:16, 10.77s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████ | 308/509 [47:34<36:25, 10.87s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████ | 308/509 [47:34<36:25, 10.87s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████ | 308/509 [47:34<36:25, 10.87s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████ | 308/509 [47:34<36:25, 10.87s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▏ | 309/509 [47:45<36:31, 10.96s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▏ | 309/509 [47:45<36:31, 10.96s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3185, 'learning_rate': 0.00018419999999999998, 'epoch': 0.61} + 61%|█████████████████████████████████████████████████▏ | 309/509 [47:45<36:31, 10.96s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▏ | 309/509 [47:45<36:31, 10.96s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▎ | 310/509 [47:56<36:27, 10.99s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▎ | 310/509 [47:56<36:27, 10.99s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4775, 'learning_rate': 0.0001848, 'epoch': 0.61} + 61%|█████████████████████████████████████████████████▎ | 310/509 [47:56<36:27, 10.99s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▎ | 310/509 [47:56<36:27, 10.99s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▎ | 310/509 [47:56<36:27, 10.99s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▎ | 310/509 [47:56<36:27, 10.99s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3612, 'learning_rate': 0.00018539999999999998, 'epoch': 0.61} + 61%|█████████████████████████████████████████████████▎ | 310/509 [47:56<36:27, 10.99s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▎ | 310/509 [47:56<36:27, 10.99s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▎ | 310/509 [47:56<36:27, 10.99s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▋ | 312/509 [48:18<35:53, 10.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▋ | 312/509 [48:18<35:53, 10.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.591, 'learning_rate': 0.000186, 'epoch': 0.61} + 61%|█████████████████████████████████████████████████▋ | 312/509 [48:18<35:53, 10.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▋ | 312/509 [48:18<35:53, 10.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▊ | 313/509 [48:28<35:40, 10.92s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▊ | 313/509 [48:28<35:40, 10.92s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4811, 'learning_rate': 0.00018659999999999998, 'epoch': 0.61} + 61%|█████████████████████████████████████████████████▊ | 313/509 [48:28<35:40, 10.92s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▊ | 313/509 [48:28<35:40, 10.92s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|█████████████████████████████████████████████████▉ | 314/509 [48:39<35:21, 10.88s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|█████████████████████████████████████████████████▉ | 314/509 [48:39<35:21, 10.88s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.369, 'learning_rate': 0.0001872, 'epoch': 0.62} + 62%|█████████████████████████████████████████████████▉ | 314/509 [48:39<35:21, 10.88s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|█████████████████████████████████████████████████▉ | 314/509 [48:39<35:21, 10.88s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████▏ | 315/509 [48:50<35:02, 10.84s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████▏ | 315/509 [48:50<35:02, 10.84s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.611, 'learning_rate': 0.00018779999999999998, 'epoch': 0.62} + 62%|██████████████████████████████████████████████████▏ | 315/509 [48:50<35:02, 10.84s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████▏ | 315/509 [48:50<35:02, 10.84s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████▏ | 315/509 [48:50<35:02, 10.84s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████▎ | 316/509 [49:01<34:34, 10.75s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████▎ | 316/509 [49:01<34:34, 10.75s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████▎ | 316/509 [49:01<34:34, 10.75s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████▎ | 316/509 [49:01<34:34, 10.75s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████▎ | 316/509 [49:01<34:34, 10.75s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████▍ | 317/509 [49:11<34:16, 10.71s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████▍ | 317/509 [49:11<34:16, 10.71s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████▍ | 317/509 [49:11<34:16, 10.71s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████▍ | 317/509 [49:11<34:16, 10.71s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████▍ | 317/509 [49:11<34:16, 10.71s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████▌ | 318/509 [49:22<33:57, 10.67s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████▌ | 318/509 [49:22<33:57, 10.67s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████▌ | 318/509 [49:22<33:57, 10.67s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████▌ | 318/509 [49:22<33:57, 10.67s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████▌ | 318/509 [49:22<33:57, 10.67s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|██████████████████████████████████████████████████▊ | 319/509 [49:32<33:35, 10.61s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|██████████████████████████████████████████████████▊ | 319/509 [49:32<33:35, 10.61s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|██████████████████████████████████████████████████▊ | 319/509 [49:32<33:35, 10.61s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|██████████████████████████████████████████████████▊ | 319/509 [49:32<33:35, 10.61s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|██████████████████████████████████████████████████▉ | 320/509 [49:43<33:13, 10.55s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|██████████████████████████████████████████████████▉ | 320/509 [49:43<33:13, 10.55s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.79, 'learning_rate': 0.00019079999999999998, 'epoch': 0.63} + 63%|██████████████████████████████████████████████████▉ | 320/509 [49:43<33:13, 10.55s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|██████████████████████████████████████████████████▉ | 320/509 [49:43<33:13, 10.55s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|██████████████████████████████████████████████████▉ | 320/509 [49:43<33:13, 10.55s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|███████████████████████████████████████████████████ | 321/509 [49:53<32:46, 10.46s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|███████████████████████████████████████████████████ | 321/509 [49:53<32:46, 10.46s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|███████████████████████████████████████████████████ | 321/509 [49:53<32:46, 10.46s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|███████████████████████████████████████████████████ | 321/509 [49:53<32:46, 10.46s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|███████████████████████████████████████████████████ | 321/509 [49:53<32:46, 10.46s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|███████████████████████████████████████████████████▏ | 322/509 [50:03<32:24, 10.40s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|███████████████████████████████████████████████████▏ | 322/509 [50:03<32:24, 10.40s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|███████████████████████████████████████████████████▏ | 322/509 [50:03<32:24, 10.40s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|███████████████████████████████████████████████████▏ | 322/509 [50:03<32:24, 10.40s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|███████████████████████████████████████████████████▏ | 322/509 [50:03<32:24, 10.40s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|███████████████████████████████████████████████████▍ | 323/509 [50:13<32:07, 10.36s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|███████████████████████████████████████████████████▍ | 323/509 [50:13<32:07, 10.36s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|███████████████████████████████████████████████████▍ | 323/509 [50:13<32:07, 10.36s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|███████████████████████████████████████████████████▍ | 323/509 [50:13<32:07, 10.36s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 64%|███████████████████████████████████████████████████▌ | 324/509 [50:24<31:43, 10.29s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 64%|███████████████████████████████████████████████████▌ | 324/509 [50:24<31:43, 10.29s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.7002, 'learning_rate': 0.00019319999999999998, 'epoch': 0.64} + 64%|███████████████████████████████████████████████████▌ | 324/509 [50:24<31:43, 10.29s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 64%|███████████████████████████████████████████████████▌ | 324/509 [50:24<31:43, 10.29s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 64%|███████████████████████████████████████████████████▋ | 325/509 [50:34<31:51, 10.39s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 64%|███████████████████████████████████████████████████▋ | 325/509 [50:34<31:51, 10.39s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6622, 'learning_rate': 0.0001938, 'epoch': 0.64} + 64%|███████████████████████████████████████████████████▋ | 325/509 [50:34<31:51, 10.39s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 64%|███████████████████████████████████████████████████▋ | 325/509 [50:34<31:51, 10.39s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 64%|███████████████████████████████████████████████████▉ | 326/509 [50:44<31:24, 10.30s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 64%|███████████████████████████████████████████████████▉ | 326/509 [50:44<31:24, 10.30s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.502, 'learning_rate': 0.00019439999999999998, 'epoch': 0.64} + 64%|███████████████████████████████████████████████████▉ | 326/509 [50:44<31:24, 10.30s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 64%|███████████████████████████████████████████████████▉ | 326/509 [50:44<31:24, 10.30s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 64%|███████████████████████████████████████████████████▉ | 326/509 [50:44<31:24, 10.30s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 64%|████████████████████████████████████████████████████ | 327/509 [50:54<30:49, 10.16s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 64%|████████████████████████████████████████████████████ | 327/509 [50:54<30:49, 10.16s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 64%|████████████████████████████████████████████████████ | 327/509 [50:54<30:49, 10.16s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 64%|████████████████████████████████████████████████████ | 327/509 [50:54<30:49, 10.16s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 64%|████████████████████████████████████████████████████ | 327/509 [50:54<30:49, 10.16s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 64%|████████████████████████████████████████████████████▏ | 328/509 [51:04<30:11, 10.01s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 64%|████████████████████████████████████████████████████▏ | 328/509 [51:04<30:11, 10.01s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 64%|████████████████████████████████████████████████████▏ | 328/509 [51:04<30:11, 10.01s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 64%|████████████████████████████████████████████████████▏ | 328/509 [51:04<30:11, 10.01s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 64%|████████████████████████████████████████████████████▏ | 328/509 [51:04<30:11, 10.01s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▎ | 329/509 [51:14<29:50, 9.95s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▎ | 329/509 [51:14<29:50, 9.95s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▎ | 329/509 [51:14<29:50, 9.95s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▎ | 329/509 [51:14<29:50, 9.95s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▌ | 330/509 [51:23<29:21, 9.84s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▌ | 330/509 [51:23<29:21, 9.84s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5706, 'learning_rate': 0.00019679999999999999, 'epoch': 0.65} + 65%|████████████████████████████████████████████████████▌ | 330/509 [51:23<29:21, 9.84s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▌ | 330/509 [51:23<29:21, 9.84s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▌ | 330/509 [51:23<29:21, 9.84s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▋ | 331/509 [51:33<28:52, 9.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▋ | 331/509 [51:33<28:52, 9.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▋ | 331/509 [51:33<28:52, 9.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▋ | 331/509 [51:33<28:52, 9.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▊ | 332/509 [51:42<28:25, 9.64s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▊ | 332/509 [51:42<28:25, 9.64s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4544, 'learning_rate': 0.000198, 'epoch': 0.65} + 65%|████████████████████████████████████████████████████▊ | 332/509 [51:42<28:25, 9.64s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▊ | 332/509 [51:42<28:25, 9.64s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▉ | 333/509 [51:51<27:54, 9.51s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▉ | 333/509 [51:51<27:54, 9.51s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.489, 'learning_rate': 0.0001986, 'epoch': 0.65} + 65%|████████████████████████████████████████████████████▉ | 333/509 [51:51<27:54, 9.51s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▉ | 333/509 [51:51<27:54, 9.51s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 66%|█████████████████████████████████████████████████████▏ | 334/509 [52:00<27:20, 9.37s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 66%|█████████████████████████████████████████████████████▏ | 334/509 [52:00<27:20, 9.37s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4184, 'learning_rate': 0.0001992, 'epoch': 0.66} + 66%|█████████████████████████████████████████████████████▏ | 334/509 [52:00<27:20, 9.37s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 66%|█████████████████████████████████████████████████████▏ | 334/509 [52:00<27:20, 9.37s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 66%|█████████████████████████████████████████████████████▏ | 334/509 [52:00<27:20, 9.37s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 66%|█████████████████████████████████████████████████████▎ | 335/509 [52:09<26:47, 9.24s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 66%|█████████████████████████████████████████████████████▎ | 335/509 [52:09<26:47, 9.24s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 66%|█████████████████████████████████████████████████████▎ | 335/509 [52:09<26:47, 9.24s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 66%|█████████████████████████████████████████████████████▎ | 335/509 [52:09<26:47, 9.24s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 66%|█████████████████████████████████████████████████████▎ | 335/509 [52:09<26:47, 9.24s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 66%|█████████████████████████████████████████████████████▍ | 336/509 [52:18<26:20, 9.14s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 66%|█████████████████████████████████████████████████████▍ | 336/509 [52:18<26:20, 9.14s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 66%|█████████████████████████████████████████████████████▍ | 336/509 [52:18<26:20, 9.14s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 66%|█████████████████████████████████████████████████████▍ | 336/509 [52:18<26:20, 9.14s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 66%|█████████████████████████████████████████████████████▍ | 336/509 [52:18<26:20, 9.14s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 66%|█████████████████████████████████████████████████████▋ | 337/509 [52:27<25:48, 9.00s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 66%|█████████████████████████████████████████████████████▋ | 337/509 [52:27<25:48, 9.00s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 66%|█████████████████████████████████████████████████████▋ | 337/509 [52:27<25:48, 9.00s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 66%|█████████████████████████████████████████████████████▋ | 337/509 [52:27<25:48, 9.00s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 66%|█████████████████████████████████████████████████████▋ | 337/509 [52:27<25:48, 9.00s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 66%|█████████████████████████████████████████████████████▊ | 338/509 [52:35<25:15, 8.86s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 66%|█████████████████████████████████████████████████████▊ | 338/509 [52:35<25:15, 8.86s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 66%|█████████████████████████████████████████████████████▊ | 338/509 [52:35<25:15, 8.86s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 66%|█████████████████████████████████████████████████████▊ | 338/509 [52:35<25:15, 8.86s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 66%|█████████████████████████████████████████████████████▊ | 338/509 [52:35<25:15, 8.86s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|█████████████████████████████████████████████████████▉ | 339/509 [52:43<24:29, 8.65s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|█████████████████████████████████████████████████████▉ | 339/509 [52:43<24:29, 8.65s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|█████████████████████████████████████████████████████▉ | 339/509 [52:43<24:29, 8.65s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|█████████████████████████████████████████████████████▉ | 339/509 [52:43<24:29, 8.65s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|█████████████████████████████████████████████████████▉ | 339/509 [52:43<24:29, 8.65s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|██████████████████████████████████████████████████████ | 340/509 [52:51<23:43, 8.42s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|██████████████████████████████████████████████████████ | 340/509 [52:51<23:43, 8.42s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|██████████████████████████████████████████████████████ | 340/509 [52:51<23:43, 8.42s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|██████████████████████████████████████████████████████ | 340/509 [52:51<23:43, 8.42s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|██████████████████████████████████████████████████████ | 340/509 [52:51<23:43, 8.42s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|██████████████████████████████████████████████████████▎ | 341/509 [52:59<22:41, 8.10s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:19:08,975 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:19:08,975 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:19:08,975 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|██████████████████████████████████████████████████████▍ | 342/509 [53:06<21:32, 7.74s/it]g-point operations will not be computed-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|██████████████████████████████████████████████████████▍ | 342/509 [53:06<21:32, 7.74s/it]g-point operations will not be computed-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:19:17,118 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:19:17,118 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|██████████████████████████████████████████████████████▌ | 343/509 [53:12<20:12, 7.30s/it]g-point operations will not be computed-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|██████████████████████████████████████████████████████▌ | 343/509 [53:12<20:12, 7.30s/it]g-point operations will not be computed-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:19:23,003 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:19:23,003 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|██████████████████████████████████████████████████████▋ | 344/509 [53:18<18:48, 6.84s/it]g-point operations will not be computed-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:19:27,086 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:19:29,556 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:19:29,556 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.8199, 'learning_rate': 0.0002058, 'epoch': 0.68} +[WARNING|modeling_utils.py:388] 2022-03-02 23:19:33,132 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:19:33,132 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:12:11,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|███████████████████████████████████████████████████████ | 346/509 [53:28<15:52, 5.84s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:35,374 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:19:37,382 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:19:35,374 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:19:37,382 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:19:35,374 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|███████████████████████████████████████████████████████▏ | 347/509 [53:32<14:22, 5.33s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:39,377 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:19:41,131 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:19:39,377 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:19:41,131 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:19:39,377 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|███████████████████████████████████████████████████████▍ | 348/509 [53:35<12:54, 4.81s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:42,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|███████████████████████████████████████████████████████▍ | 348/509 [53:35<12:54, 4.81s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:42,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▌ | 349/509 [53:38<11:28, 4.30s/it]g-point operations will not be computed-02 23:19:42,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:19:47,055 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:19:45,830 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:19:47,055 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:19:45,830 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▋ | 350/509 [53:42<10:26, 3.94s/it]g-point operations will not be computed-02 23:19:45,830 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▋ | 350/509 [53:42<10:26, 3.94s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▋ | 350/509 [53:42<10:26, 3.94s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▋ | 350/509 [53:42<10:26, 3.94s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▋ | 350/509 [53:42<10:26, 3.94s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▊ | 351/509 [53:54<17:08, 6.51s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▊ | 351/509 [53:54<17:08, 6.51s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▊ | 351/509 [53:54<17:08, 6.51s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▊ | 351/509 [53:54<17:08, 6.51s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▊ | 351/509 [53:54<17:08, 6.51s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|████████████████████████████████████████████████████████ | 352/509 [54:06<21:08, 8.08s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|████████████████████████████████████████████████████████ | 352/509 [54:06<21:08, 8.08s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|████████████████████████████████████████████████████████ | 352/509 [54:06<21:08, 8.08s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|████████████████████████████████████████████████████████ | 352/509 [54:06<21:08, 8.08s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|████████████████████████████████████████████████████████ | 352/509 [54:06<21:08, 8.08s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|████████████████████████████████████████████████████████▏ | 353/509 [54:17<23:39, 9.10s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|████████████████████████████████████████████████████████▏ | 353/509 [54:17<23:39, 9.10s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|████████████████████████████████████████████████████████▏ | 353/509 [54:17<23:39, 9.10s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|████████████████████████████████████████████████████████▏ | 353/509 [54:17<23:39, 9.10s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|████████████████████████████████████████████████████████▏ | 353/509 [54:17<23:39, 9.10s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▎ | 354/509 [54:29<25:17, 9.79s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▎ | 354/509 [54:29<25:17, 9.79s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▎ | 354/509 [54:29<25:17, 9.79s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▎ | 354/509 [54:29<25:17, 9.79s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▍ | 355/509 [54:40<26:28, 10.31s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▍ | 355/509 [54:40<26:28, 10.31s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5034, 'learning_rate': 0.00021179999999999997, 'epoch': 0.7} + 70%|████████████████████████████████████████████████████████▍ | 355/509 [54:40<26:28, 10.31s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▍ | 355/509 [54:40<26:28, 10.31s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▍ | 355/509 [54:40<26:28, 10.31s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▋ | 356/509 [54:51<27:03, 10.61s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▋ | 356/509 [54:51<27:03, 10.61s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▋ | 356/509 [54:51<27:03, 10.61s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▋ | 356/509 [54:51<27:03, 10.61s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▊ | 357/509 [55:03<27:24, 10.82s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▊ | 357/509 [55:03<27:24, 10.82s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6016, 'learning_rate': 0.00021299999999999997, 'epoch': 0.7} + 70%|████████████████████████████████████████████████████████▊ | 357/509 [55:03<27:24, 10.82s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▊ | 357/509 [55:03<27:24, 10.82s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▊ | 357/509 [55:03<27:24, 10.82s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▉ | 358/509 [55:14<27:33, 10.95s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▉ | 358/509 [55:14<27:33, 10.95s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3612, 'learning_rate': 0.00021359999999999996, 'epoch': 0.7} + 70%|████████████████████████████████████████████████████████▉ | 358/509 [55:14<27:33, 10.95s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▉ | 358/509 [55:14<27:33, 10.95s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▏ | 359/509 [55:25<27:26, 10.98s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▏ | 359/509 [55:25<27:26, 10.98s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6466, 'learning_rate': 0.00021419999999999998, 'epoch': 0.7} + 71%|█████████████████████████████████████████████████████████▏ | 359/509 [55:25<27:26, 10.98s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▏ | 359/509 [55:25<27:26, 10.98s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▎ | 360/509 [55:36<27:26, 11.05s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▎ | 360/509 [55:36<27:26, 11.05s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6199, 'learning_rate': 0.00021479999999999996, 'epoch': 0.71} + 71%|█████████████████████████████████████████████████████████▎ | 360/509 [55:36<27:26, 11.05s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▎ | 360/509 [55:36<27:26, 11.05s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▍ | 361/509 [55:47<27:17, 11.07s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▍ | 361/509 [55:47<27:17, 11.07s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3133, 'learning_rate': 0.00021539999999999998, 'epoch': 0.71} + 71%|█████████████████████████████████████████████████████████▍ | 361/509 [55:47<27:17, 11.07s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▍ | 361/509 [55:47<27:17, 11.07s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▌ | 362/509 [55:58<27:00, 11.03s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▌ | 362/509 [55:58<27:00, 11.03s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5717, 'learning_rate': 0.00021599999999999996, 'epoch': 0.71} + 71%|█████████████████████████████████████████████████████████▌ | 362/509 [55:58<27:00, 11.03s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▌ | 362/509 [55:58<27:00, 11.03s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▊ | 363/509 [56:09<26:44, 10.99s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▊ | 363/509 [56:09<26:44, 10.99s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.7201, 'learning_rate': 0.00021659999999999998, 'epoch': 0.71} + 71%|█████████████████████████████████████████████████████████▊ | 363/509 [56:09<26:44, 10.99s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▊ | 363/509 [56:09<26:44, 10.99s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|█████████████████████████████████████████████████████████▉ | 364/509 [56:20<26:24, 10.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|█████████████████████████████████████████████████████████▉ | 364/509 [56:20<26:24, 10.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2984, 'learning_rate': 0.00021719999999999997, 'epoch': 0.71} + 72%|█████████████████████████████████████████████████████████▉ | 364/509 [56:20<26:24, 10.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|█████████████████████████████████████████████████████████▉ | 364/509 [56:20<26:24, 10.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████ | 365/509 [56:31<26:07, 10.89s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████ | 365/509 [56:31<26:07, 10.89s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6171, 'learning_rate': 0.00021779999999999998, 'epoch': 0.72} + 72%|██████████████████████████████████████████████████████████ | 365/509 [56:31<26:07, 10.89s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████ | 365/509 [56:31<26:07, 10.89s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████ | 365/509 [56:31<26:07, 10.89s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████ | 365/509 [56:31<26:07, 10.89s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4188, 'learning_rate': 0.00021839999999999997, 'epoch': 0.72} + 72%|██████████████████████████████████████████████████████████ | 365/509 [56:31<26:07, 10.89s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████ | 365/509 [56:31<26:07, 10.89s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████ | 365/509 [56:31<26:07, 10.89s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████ | 365/509 [56:31<26:07, 10.89s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▍ | 367/509 [56:52<25:33, 10.80s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▍ | 367/509 [56:52<25:33, 10.80s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▍ | 367/509 [56:52<25:33, 10.80s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▍ | 367/509 [56:52<25:33, 10.80s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▍ | 367/509 [56:52<25:33, 10.80s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▌ | 368/509 [57:03<25:12, 10.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▌ | 368/509 [57:03<25:12, 10.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▌ | 368/509 [57:03<25:12, 10.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▌ | 368/509 [57:03<25:12, 10.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▌ | 368/509 [57:03<25:12, 10.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▋ | 369/509 [57:13<24:53, 10.66s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▋ | 369/509 [57:13<24:53, 10.66s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▋ | 369/509 [57:13<24:53, 10.66s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▋ | 369/509 [57:13<24:53, 10.66s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▋ | 369/509 [57:13<24:53, 10.66s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|██████████████████████████████████████████████████████████▉ | 370/509 [57:24<24:33, 10.60s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|██████████████████████████████████████████████████████████▉ | 370/509 [57:24<24:33, 10.60s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|██████████████████████████████████████████████████████████▉ | 370/509 [57:24<24:33, 10.60s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|██████████████████████████████████████████████████████████▉ | 370/509 [57:24<24:33, 10.60s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|███████████████████████████████████████████████████████████ | 371/509 [57:34<24:16, 10.55s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|███████████████████████████████████████████████████████████ | 371/509 [57:34<24:16, 10.55s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4769, 'learning_rate': 0.0002214, 'epoch': 0.73} + 73%|███████████████████████████████████████████████████████████ | 371/509 [57:34<24:16, 10.55s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|███████████████████████████████████████████████████████████ | 371/509 [57:34<24:16, 10.55s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|███████████████████████████████████████████████████████████▏ | 372/509 [57:44<23:53, 10.47s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|███████████████████████████████████████████████████████████▏ | 372/509 [57:44<23:53, 10.47s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2707, 'learning_rate': 0.00022199999999999998, 'epoch': 0.73} + 73%|███████████████████████████████████████████████████████████▏ | 372/509 [57:44<23:53, 10.47s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|███████████████████████████████████████████████████████████▏ | 372/509 [57:44<23:53, 10.47s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|███████████████████████████████████████████████████████████▎ | 373/509 [57:55<23:34, 10.40s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|███████████████████████████████████████████████████████████▎ | 373/509 [57:55<23:34, 10.40s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6927, 'learning_rate': 0.0002226, 'epoch': 0.73} + 73%|███████████████████████████████████████████████████████████▎ | 373/509 [57:55<23:34, 10.40s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|███████████████████████████████████████████████████████████▎ | 373/509 [57:55<23:34, 10.40s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|███████████████████████████████████████████████████████████▌ | 374/509 [58:05<23:11, 10.30s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|███████████████████████████████████████████████████████████▌ | 374/509 [58:05<23:11, 10.30s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4139, 'learning_rate': 0.00022319999999999998, 'epoch': 0.73} + 73%|███████████████████████████████████████████████████████████▌ | 374/509 [58:05<23:11, 10.30s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|███████████████████████████████████████████████████████████▌ | 374/509 [58:05<23:11, 10.30s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|███████████████████████████████████████████████████████████▋ | 375/509 [58:15<23:14, 10.41s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|███████████████████████████████████████████████████████████▋ | 375/509 [58:15<23:14, 10.41s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5554, 'learning_rate': 0.0002238, 'epoch': 0.74} + 74%|███████████████████████████████████████████████████████████▋ | 375/509 [58:15<23:14, 10.41s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|███████████████████████████████████████████████████████████▋ | 375/509 [58:15<23:14, 10.41s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|███████████████████████████████████████████████████████████▊ | 376/509 [58:25<22:48, 10.29s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|███████████████████████████████████████████████████████████▊ | 376/509 [58:25<22:48, 10.29s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5681, 'learning_rate': 0.00022439999999999998, 'epoch': 0.74} + 74%|███████████████████████████████████████████████████████████▊ | 376/509 [58:25<22:48, 10.29s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|███████████████████████████████████████████████████████████▊ | 376/509 [58:25<22:48, 10.29s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|███████████████████████████████████████████████████████████▉ | 377/509 [58:35<22:23, 10.18s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|███████████████████████████████████████████████████████████▉ | 377/509 [58:35<22:23, 10.18s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4412, 'learning_rate': 0.000225, 'epoch': 0.74} + 74%|███████████████████████████████████████████████████████████▉ | 377/509 [58:35<22:23, 10.18s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|███████████████████████████████████████████████████████████▉ | 377/509 [58:35<22:23, 10.18s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|███████████████████████████████████████████████████████████▉ | 377/509 [58:35<22:23, 10.18s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|████████████████████████████████████████████████████████████▏ | 378/509 [58:46<22:27, 10.28s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|████████████████████████████████████████████████████████████▏ | 378/509 [58:46<22:27, 10.28s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|████████████████████████████████████████████████████████████▏ | 378/509 [58:46<22:27, 10.28s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|████████████████████████████████████████████████████████████▏ | 378/509 [58:46<22:27, 10.28s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|████████████████████████████████████████████████████████████▏ | 378/509 [58:46<22:27, 10.28s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|████████████████████████████████████████████████████████████▎ | 379/509 [58:56<21:53, 10.11s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|████████████████████████████████████████████████████████████▎ | 379/509 [58:56<21:53, 10.11s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|████████████████████████████████████████████████████████████▎ | 379/509 [58:56<21:53, 10.11s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|████████████████████████████████████████████████████████████▎ | 379/509 [58:56<21:53, 10.11s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 75%|████████████████████████████████████████████████████████████▍ | 380/509 [59:05<21:28, 9.99s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 75%|████████████████████████████████████████████████████████████▍ | 380/509 [59:05<21:28, 9.99s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5773, 'learning_rate': 0.00022679999999999998, 'epoch': 0.75} + 75%|████████████████████████████████████████████████████████████▍ | 380/509 [59:05<21:28, 9.99s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 75%|████████████████████████████████████████████████████████████▍ | 380/509 [59:05<21:28, 9.99s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 75%|████████████████████████████████████████████████████████████▍ | 380/509 [59:05<21:28, 9.99s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 75%|████████████████████████████████████████████████████████████▋ | 381/509 [59:15<21:01, 9.86s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 75%|████████████████████████████████████████████████████████████▋ | 381/509 [59:15<21:01, 9.86s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 75%|████████████████████████████████████████████████████████████▋ | 381/509 [59:15<21:01, 9.86s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 75%|████████████████████████████████████████████████████████████▋ | 381/509 [59:15<21:01, 9.86s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 75%|████████████████████████████████████████████████████████████▋ | 381/509 [59:15<21:01, 9.86s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 75%|████████████████████████████████████████████████████████████▊ | 382/509 [59:24<20:40, 9.77s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 75%|████████████████████████████████████████████████████████████▊ | 382/509 [59:24<20:40, 9.77s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 75%|████████████████████████████████████████████████████████████▊ | 382/509 [59:24<20:40, 9.77s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 75%|████████████████████████████████████████████████████████████▊ | 382/509 [59:24<20:40, 9.77s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 75%|████████████████████████████████████████████████████████████▉ | 383/509 [59:34<20:17, 9.66s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 75%|████████████████████████████████████████████████████████████▉ | 383/509 [59:34<20:17, 9.66s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.7158, 'learning_rate': 0.00022859999999999997, 'epoch': 0.75} + 75%|████████████████████████████████████████████████████████████▉ | 383/509 [59:34<20:17, 9.66s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 75%|████████████████████████████████████████████████████████████▉ | 383/509 [59:34<20:17, 9.66s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 75%|████████████████████████████████████████████████████████████▉ | 383/509 [59:34<20:17, 9.66s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 75%|█████████████████████████████████████████████████████████████ | 384/509 [59:43<19:56, 9.57s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 75%|█████████████████████████████████████████████████████████████ | 384/509 [59:43<19:56, 9.57s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 75%|█████████████████████████████████████████████████████████████ | 384/509 [59:43<19:56, 9.57s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 75%|█████████████████████████████████████████████████████████████ | 384/509 [59:43<19:56, 9.57s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 76%|█████████████████████████████████████████████████████████████▎ | 385/509 [59:52<19:36, 9.49s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 76%|█████████████████████████████████████████████████████████████▎ | 385/509 [59:52<19:36, 9.49s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6043, 'learning_rate': 0.00022979999999999997, 'epoch': 0.76} + 76%|█████████████████████████████████████████████████████████████▎ | 385/509 [59:52<19:36, 9.49s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 76%|█████████████████████████████████████████████████████████████▎ | 385/509 [59:52<19:36, 9.49s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 76%|█████████████████████████████████████████████████████████████▎ | 385/509 [59:52<19:36, 9.49s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 76%|███████████████████████████████████████████████████████████▉ | 386/509 [1:00:01<19:05, 9.32s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 76%|███████████████████████████████████████████████████████████▉ | 386/509 [1:00:01<19:05, 9.32s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 76%|███████████████████████████████████████████████████████████▉ | 386/509 [1:00:01<19:05, 9.32s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 76%|███████████████████████████████████████████████████████████▉ | 386/509 [1:00:01<19:05, 9.32s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 76%|████████████████████████████████████████████████████████████ | 387/509 [1:00:10<18:38, 9.17s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 76%|████████████████████████████████████████████████████████████ | 387/509 [1:00:10<18:38, 9.17s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4141, 'learning_rate': 0.00023099999999999998, 'epoch': 0.76} + 76%|████████████████████████████████████████████████████████████ | 387/509 [1:00:10<18:38, 9.17s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 76%|████████████████████████████████████████████████████████████ | 387/509 [1:00:10<18:38, 9.17s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 76%|████████████████████████████████████████████████████████████▏ | 388/509 [1:00:19<18:08, 9.00s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 76%|████████████████████████████████████████████████████████████▏ | 388/509 [1:00:19<18:08, 9.00s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4335, 'learning_rate': 0.0002316, 'epoch': 0.76} + 76%|████████████████████████████████████████████████████████████▏ | 388/509 [1:00:19<18:08, 9.00s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 76%|████████████████████████████████████████████████████████████▏ | 388/509 [1:00:19<18:08, 9.00s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 76%|████████████████████████████████████████████████████████████▍ | 389/509 [1:00:27<17:37, 8.81s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 76%|████████████████████████████████████████████████████████████▍ | 389/509 [1:00:27<17:37, 8.81s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.469, 'learning_rate': 0.00023219999999999998, 'epoch': 0.76} + 76%|████████████████████████████████████████████████████████████▍ | 389/509 [1:00:27<17:37, 8.81s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 76%|████████████████████████████████████████████████████████████▍ | 389/509 [1:00:27<17:37, 8.81s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 77%|████████████████████████████████████████████████████████████▌ | 390/509 [1:00:35<17:00, 8.57s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 77%|████████████████████████████████████████████████████████████▌ | 390/509 [1:00:35<17:00, 8.57s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5875, 'learning_rate': 0.0002328, 'epoch': 0.77} + 77%|████████████████████████████████████████████████████████████▌ | 390/509 [1:00:35<17:00, 8.57s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 77%|████████████████████████████████████████████████████████████▌ | 390/509 [1:00:35<17:00, 8.57s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 77%|████████████████████████████████████████████████████████████▋ | 391/509 [1:00:43<16:19, 8.30s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 77%|████████████████████████████████████████████████████████████▋ | 391/509 [1:00:43<16:19, 8.30s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3555, 'learning_rate': 0.00023339999999999998, 'epoch': 0.77} + 77%|████████████████████████████████████████████████████████████▋ | 391/509 [1:00:43<16:19, 8.30s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 77%|████████████████████████████████████████████████████████████▋ | 391/509 [1:00:43<16:19, 8.30s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 77%|████████████████████████████████████████████████████████████▋ | 391/509 [1:00:43<16:19, 8.30s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:19:51,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 77%|████████████████████████████████████████████████████████████▊ | 392/509 [1:00:50<15:32, 7.97s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:26:58,526 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 77%|████████████████████████████████████████████████████████████▊ | 392/509 [1:00:50<15:32, 7.97s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:26:58,526 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 77%|████████████████████████████████████████████████████████████▊ | 392/509 [1:00:50<15:32, 7.97s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:26:58,526 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 77%|████████████████████████████████████████████████████████████▉ | 393/509 [1:00:57<14:35, 7.55s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:26:58,526 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 77%|████████████████████████████████████████████████████████████▉ | 393/509 [1:00:57<14:35, 7.55s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:26:58,526 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:27:06,614 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:26:58,526 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:27:06,614 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:26:58,526 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 77%|█████████████████████████████████████████████████████████████▏ | 394/509 [1:01:03<13:43, 7.16s/it]g-point operations will not be computed-02 23:26:58,526 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 77%|█████████████████████████████████████████████████████████████▏ | 394/509 [1:01:03<13:43, 7.16s/it]g-point operations will not be computed-02 23:26:58,526 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:27:12,442 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:26:58,526 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:27:15,089 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:26:58,526 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:27:15,089 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:26:58,526 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5762, 'learning_rate': 0.00023579999999999999, 'epoch': 0.77} +[WARNING|modeling_utils.py:388] 2022-03-02 23:27:18,801 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:26:58,526 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 78%|█████████████████████████████████████████████████████████████▍ | 396/509 [1:01:13<11:31, 6.12s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 78%|█████████████████████████████████████████████████████████████▍ | 396/509 [1:01:13<11:31, 6.12s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.475, 'learning_rate': 0.0002364, 'epoch': 0.78} +[WARNING|modeling_utils.py:388] 2022-03-02 23:27:24,370 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:27:24,370 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:27:26,434 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:27:28,235 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:27:28,235 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:27:29,999 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:27:29,999 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:27:32,828 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:27:34,530 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:27:34,530 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4873, 'learning_rate': 0.0002388, 'epoch': 0.78} +[WARNING|modeling_utils.py:388] 2022-03-02 23:27:40,991 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:27:40,991 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 79%|██████████████████████████████████████████████████████████████▏ | 401/509 [1:01:40<11:54, 6.62s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 79%|██████████████████████████████████████████████████████████████▏ | 401/509 [1:01:40<11:54, 6.62s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4953, 'learning_rate': 0.0002394, 'epoch': 0.79} + 79%|██████████████████████████████████████████████████████████████▏ | 401/509 [1:01:40<11:54, 6.62s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 79%|██████████████████████████████████████████████████████████████▏ | 401/509 [1:01:40<11:54, 6.62s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 79%|██████████████████████████████████████████████████████████████▏ | 401/509 [1:01:40<11:54, 6.62s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 79%|██████████████████████████████████████████████████████████████▍ | 402/509 [1:01:52<14:33, 8.17s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 79%|██████████████████████████████████████████████████████████████▍ | 402/509 [1:01:52<14:33, 8.17s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 79%|██████████████████████████████████████████████████████████████▍ | 402/509 [1:01:52<14:33, 8.17s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 79%|██████████████████████████████████████████████████████████████▍ | 402/509 [1:01:52<14:33, 8.17s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 79%|██████████████████████████████████████████████████████████████▌ | 403/509 [1:02:04<16:16, 9.21s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 79%|██████████████████████████████████████████████████████████████▌ | 403/509 [1:02:04<16:16, 9.21s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3974, 'learning_rate': 0.0002406, 'epoch': 0.79} + 79%|██████████████████████████████████████████████████████████████▌ | 403/509 [1:02:04<16:16, 9.21s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 79%|██████████████████████████████████████████████████████████████▌ | 403/509 [1:02:04<16:16, 9.21s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 79%|██████████████████████████████████████████████████████████████▌ | 403/509 [1:02:04<16:16, 9.21s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 79%|██████████████████████████████████████████████████████████████▋ | 404/509 [1:02:15<17:23, 9.94s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 79%|██████████████████████████████████████████████████████████████▋ | 404/509 [1:02:15<17:23, 9.94s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 79%|██████████████████████████████████████████████████████████████▋ | 404/509 [1:02:15<17:23, 9.94s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 79%|██████████████████████████████████████████████████████████████▋ | 404/509 [1:02:15<17:23, 9.94s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 79%|██████████████████████████████████████████████████████████████▋ | 404/509 [1:02:15<17:23, 9.94s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 80%|██████████████████████████████████████████████████████████████▊ | 405/509 [1:02:27<18:05, 10.43s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 80%|██████████████████████████████████████████████████████████████▊ | 405/509 [1:02:27<18:05, 10.43s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 80%|██████████████████████████████████████████████████████████████▊ | 405/509 [1:02:27<18:05, 10.43s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 80%|██████████████████████████████████████████████████████████████▊ | 405/509 [1:02:27<18:05, 10.43s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 80%|██████████████████████████████████████████████████████████████▊ | 405/509 [1:02:27<18:05, 10.43s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 80%|██████████████████████████████████████████████████████████████▊ | 405/509 [1:02:27<18:05, 10.43s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6551, 'learning_rate': 0.00024239999999999998, 'epoch': 0.8} + 80%|██████████████████████████████████████████████████████████████▊ | 405/509 [1:02:27<18:05, 10.43s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 80%|██████████████████████████████████████████████████████████████▊ | 405/509 [1:02:27<18:05, 10.43s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 80%|██████████████████████████████████████████████████████████████▊ | 405/509 [1:02:27<18:05, 10.43s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 80%|███████████████████████████████████████████████████████████████▏ | 407/509 [1:02:50<18:31, 10.89s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 80%|███████████████████████████████████████████████████████████████▏ | 407/509 [1:02:50<18:31, 10.89s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6615, 'learning_rate': 0.000243, 'epoch': 0.8} + 80%|███████████████████████████████████████████████████████████████▏ | 407/509 [1:02:50<18:31, 10.89s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 80%|███████████████████████████████████████████████████████████████▏ | 407/509 [1:02:50<18:31, 10.89s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 80%|███████████████████████████████████████████████████████████████▎ | 408/509 [1:03:01<18:30, 10.99s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 80%|███████████████████████████████████████████████████████████████▎ | 408/509 [1:03:01<18:30, 10.99s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5641, 'learning_rate': 0.00024359999999999999, 'epoch': 0.8} + 80%|███████████████████████████████████████████████████████████████▎ | 408/509 [1:03:01<18:30, 10.99s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 80%|███████████████████████████████████████████████████████████████▎ | 408/509 [1:03:01<18:30, 10.99s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 80%|███████████████████████████████████████████████████████████████▍ | 409/509 [1:03:12<18:25, 11.05s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 80%|███████████████████████████████████████████████████████████████▍ | 409/509 [1:03:12<18:25, 11.05s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5003, 'learning_rate': 0.00024419999999999997, 'epoch': 0.8} + 80%|███████████████████████████████████████████████████████████████▍ | 409/509 [1:03:12<18:25, 11.05s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 80%|███████████████████████████████████████████████████████████████▍ | 409/509 [1:03:12<18:25, 11.05s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 81%|███████████████████████████████████████████████████████████████▋ | 410/509 [1:03:23<18:15, 11.07s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 81%|███████████████████████████████████████████████████████████████▋ | 410/509 [1:03:23<18:15, 11.07s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5603, 'learning_rate': 0.0002448, 'epoch': 0.8} + 81%|███████████████████████████████████████████████████████████████▋ | 410/509 [1:03:23<18:15, 11.07s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 81%|███████████████████████████████████████████████████████████████▋ | 410/509 [1:03:23<18:15, 11.07s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 81%|███████████████████████████████████████████████████████████████▋ | 410/509 [1:03:23<18:15, 11.07s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 81%|███████████████████████████████████████████████████████████████▊ | 411/509 [1:03:34<17:59, 11.01s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 81%|███████████████████████████████████████████████████████████████▊ | 411/509 [1:03:34<17:59, 11.01s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 81%|███████████████████████████████████████████████████████████████▊ | 411/509 [1:03:34<17:59, 11.01s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 81%|███████████████████████████████████████████████████████████████▊ | 411/509 [1:03:34<17:59, 11.01s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 81%|███████████████████████████████████████████████████████████████▊ | 411/509 [1:03:34<17:59, 11.01s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 81%|███████████████████████████████████████████████████████████████▉ | 412/509 [1:03:45<17:44, 10.98s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 81%|███████████████████████████████████████████████████████████████▉ | 412/509 [1:03:45<17:44, 10.98s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 81%|███████████████████████████████████████████████████████████████▉ | 412/509 [1:03:45<17:44, 10.98s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 81%|███████████████████████████████████████████████████████████████▉ | 412/509 [1:03:45<17:44, 10.98s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 81%|███████████████████████████████████████████████████████████████▉ | 412/509 [1:03:45<17:44, 10.98s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 81%|████████████████████████████████████████████████████████████████ | 413/509 [1:03:56<17:28, 10.93s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 81%|████████████████████████████████████████████████████████████████ | 413/509 [1:03:56<17:28, 10.93s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 81%|████████████████████████████████████████████████████████████████ | 413/509 [1:03:56<17:28, 10.93s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 81%|████████████████████████████████████████████████████████████████ | 413/509 [1:03:56<17:28, 10.93s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 81%|████████████████████████████████████████████████████████████████▎ | 414/509 [1:04:07<17:12, 10.86s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 81%|████████████████████████████████████████████████████████████████▎ | 414/509 [1:04:07<17:12, 10.86s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4854, 'learning_rate': 0.0002472, 'epoch': 0.81} + 81%|████████████████████████████████████████████████████████████████▎ | 414/509 [1:04:07<17:12, 10.86s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 81%|████████████████████████████████████████████████████████████████▎ | 414/509 [1:04:07<17:12, 10.86s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 81%|████████████████████████████████████████████████████████████████▎ | 414/509 [1:04:07<17:12, 10.86s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 82%|████████████████████████████████████████████████████████████████▍ | 415/509 [1:04:17<16:56, 10.81s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 82%|████████████████████████████████████████████████████████████████▍ | 415/509 [1:04:17<16:56, 10.81s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 82%|████████████████████████████████████████████████████████████████▍ | 415/509 [1:04:17<16:56, 10.81s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 82%|████████████████████████████████████████████████████████████████▍ | 415/509 [1:04:17<16:56, 10.81s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 82%|████████████████████████████████████████████████████████████████▍ | 415/509 [1:04:17<16:56, 10.81s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 82%|████████████████████████████████████████████████████████████████▌ | 416/509 [1:04:28<16:42, 10.78s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 82%|████████████████████████████████████████████████████████████████▌ | 416/509 [1:04:28<16:42, 10.78s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 82%|████████████████████████████████████████████████████████████████▌ | 416/509 [1:04:28<16:42, 10.78s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 82%|████████████████████████████████████████████████████████████████▌ | 416/509 [1:04:28<16:42, 10.78s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 82%|████████████████████████████████████████████████████████████████▌ | 416/509 [1:04:28<16:42, 10.78s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 82%|████████████████████████████████████████████████████████████████▋ | 417/509 [1:04:39<16:27, 10.74s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 82%|████████████████████████████████████████████████████████████████▋ | 417/509 [1:04:39<16:27, 10.74s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 82%|████████████████████████████████████████████████████████████████▋ | 417/509 [1:04:39<16:27, 10.74s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 82%|████████████████████████████████████████████████████████████████▋ | 417/509 [1:04:39<16:27, 10.74s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 82%|████████████████████████████████████████████████████████████████▉ | 418/509 [1:04:49<16:12, 10.69s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 82%|████████████████████████████████████████████████████████████████▉ | 418/509 [1:04:49<16:12, 10.69s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5617, 'learning_rate': 0.00024959999999999994, 'epoch': 0.82} + 82%|████████████████████████████████████████████████████████████████▉ | 418/509 [1:04:49<16:12, 10.69s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 82%|████████████████████████████████████████████████████████████████▉ | 418/509 [1:04:49<16:12, 10.69s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 82%|████████████████████████████████████████████████████████████████▉ | 418/509 [1:04:49<16:12, 10.69s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 82%|████████████████████████████████████████████████████████████████▉ | 418/509 [1:04:49<16:12, 10.69s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.8485, 'learning_rate': 0.00025019999999999996, 'epoch': 0.82} + 82%|████████████████████████████████████████████████████████████████▉ | 418/509 [1:04:49<16:12, 10.69s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 82%|████████████████████████████████████████████████████████████████▉ | 418/509 [1:04:49<16:12, 10.69s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 83%|█████████████████████████████████████████████████████████████████▏ | 420/509 [1:05:10<15:41, 10.58s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 83%|█████████████████████████████████████████████████████████████████▏ | 420/509 [1:05:10<15:41, 10.58s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4427, 'learning_rate': 0.00025079999999999997, 'epoch': 0.82} + 83%|█████████████████████████████████████████████████████████████████▏ | 420/509 [1:05:10<15:41, 10.58s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 83%|█████████████████████████████████████████████████████████████████▏ | 420/509 [1:05:10<15:41, 10.58s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 83%|█████████████████████████████████████████████████████████████████▏ | 420/509 [1:05:10<15:41, 10.58s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 83%|█████████████████████████████████████████████████████████████████▏ | 420/509 [1:05:10<15:41, 10.58s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3531, 'learning_rate': 0.0002514, 'epoch': 0.83} + 83%|█████████████████████████████████████████████████████████████████▏ | 420/509 [1:05:10<15:41, 10.58s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 83%|█████████████████████████████████████████████████████████████████▏ | 420/509 [1:05:10<15:41, 10.58s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 83%|█████████████████████████████████████████████████████████████████▏ | 420/509 [1:05:10<15:41, 10.58s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 83%|█████████████████████████████████████████████████████████████████▍ | 422/509 [1:05:31<15:10, 10.47s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 83%|█████████████████████████████████████████████████████████████████▍ | 422/509 [1:05:31<15:10, 10.47s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4535, 'learning_rate': 0.00025199999999999995, 'epoch': 0.83} + 83%|█████████████████████████████████████████████████████████████████▍ | 422/509 [1:05:31<15:10, 10.47s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 83%|█████████████████████████████████████████████████████████████████▍ | 422/509 [1:05:31<15:10, 10.47s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 83%|█████████████████████████████████████████████████████████████████▋ | 423/509 [1:05:41<14:54, 10.41s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 83%|█████████████████████████████████████████████████████████████████▋ | 423/509 [1:05:41<14:54, 10.41s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5116, 'learning_rate': 0.00025259999999999996, 'epoch': 0.83} + 83%|█████████████████████████████████████████████████████████████████▋ | 423/509 [1:05:41<14:54, 10.41s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 83%|█████████████████████████████████████████████████████████████████▋ | 423/509 [1:05:41<14:54, 10.41s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 83%|█████████████████████████████████████████████████████████████████▊ | 424/509 [1:05:51<14:35, 10.30s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 83%|█████████████████████████████████████████████████████████████████▊ | 424/509 [1:05:51<14:35, 10.30s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2711, 'learning_rate': 0.0002532, 'epoch': 0.83} + 83%|█████████████████████████████████████████████████████████████████▊ | 424/509 [1:05:51<14:35, 10.30s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 83%|█████████████████████████████████████████████████████████████████▊ | 424/509 [1:05:51<14:35, 10.30s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 83%|█████████████████████████████████████████████████████████████████▊ | 424/509 [1:05:51<14:35, 10.30s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 83%|█████████████████████████████████████████████████████████████████▊ | 424/509 [1:05:51<14:35, 10.30s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4944, 'learning_rate': 0.0002538, 'epoch': 0.83} + 83%|█████████████████████████████████████████████████████████████████▊ | 424/509 [1:05:51<14:35, 10.30s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 83%|█████████████████████████████████████████████████████████████████▊ | 424/509 [1:05:51<14:35, 10.30s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 83%|█████████████████████████████████████████████████████████████████▊ | 424/509 [1:05:51<14:35, 10.30s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 84%|██████████████████████████████████████████████████████████████████ | 426/509 [1:06:12<14:20, 10.37s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 84%|██████████████████████████████████████████████████████████████████ | 426/509 [1:06:12<14:20, 10.37s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4421, 'learning_rate': 0.00025439999999999995, 'epoch': 0.84} + 84%|██████████████████████████████████████████████████████████████████ | 426/509 [1:06:12<14:20, 10.37s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 84%|██████████████████████████████████████████████████████████████████ | 426/509 [1:06:12<14:20, 10.37s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 84%|██████████████████████████████████████████████████████████████████▎ | 427/509 [1:06:22<13:58, 10.22s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 84%|██████████████████████████████████████████████████████████████████▎ | 427/509 [1:06:22<13:58, 10.22s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4307, 'learning_rate': 0.00025499999999999996, 'epoch': 0.84} + 84%|██████████████████████████████████████████████████████████████████▎ | 427/509 [1:06:22<13:58, 10.22s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 84%|██████████████████████████████████████████████████████████████████▎ | 427/509 [1:06:22<13:58, 10.22s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 84%|██████████████████████████████████████████████████████████████████▍ | 428/509 [1:06:32<13:36, 10.08s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 84%|██████████████████████████████████████████████████████████████████▍ | 428/509 [1:06:32<13:36, 10.08s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4475, 'learning_rate': 0.0002556, 'epoch': 0.84} + 84%|██████████████████████████████████████████████████████████████████▍ | 428/509 [1:06:32<13:36, 10.08s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 84%|██████████████████████████████████████████████████████████████████▍ | 428/509 [1:06:32<13:36, 10.08s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 84%|██████████████████████████████████████████████████████████████████▍ | 428/509 [1:06:32<13:36, 10.08s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 84%|██████████████████████████████████████████████████████████████████▍ | 428/509 [1:06:32<13:36, 10.08s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4769, 'learning_rate': 0.0002562, 'epoch': 0.84} + 84%|██████████████████████████████████████████████████████████████████▍ | 428/509 [1:06:32<13:36, 10.08s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 84%|██████████████████████████████████████████████████████████████████▍ | 428/509 [1:06:32<13:36, 10.08s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 84%|██████████████████████████████████████████████████████████████████▍ | 428/509 [1:06:32<13:36, 10.08s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 84%|██████████████████████████████████████████████████████████████████▋ | 430/509 [1:06:51<12:59, 9.87s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 84%|██████████████████████████████████████████████████████████████████▋ | 430/509 [1:06:51<12:59, 9.87s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4074, 'learning_rate': 0.00025679999999999995, 'epoch': 0.84} + 84%|██████████████████████████████████████████████████████████████████▋ | 430/509 [1:06:51<12:59, 9.87s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 84%|██████████████████████████████████████████████████████████████████▋ | 430/509 [1:06:51<12:59, 9.87s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 84%|██████████████████████████████████████████████████████████████████▋ | 430/509 [1:06:51<12:59, 9.87s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 85%|██████████████████████████████████████████████████████████████████▉ | 431/509 [1:07:01<12:42, 9.78s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 85%|██████████████████████████████████████████████████████████████████▉ | 431/509 [1:07:01<12:42, 9.78s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 85%|██████████████████████████████████████████████████████████████████▉ | 431/509 [1:07:01<12:42, 9.78s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 85%|██████████████████████████████████████████████████████████████████▉ | 431/509 [1:07:01<12:42, 9.78s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 85%|██████████████████████████████████████████████████████████████████▉ | 431/509 [1:07:01<12:42, 9.78s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 85%|███████████████████████████████████████████████████████████████████ | 432/509 [1:07:10<12:27, 9.71s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 85%|███████████████████████████████████████████████████████████████████ | 432/509 [1:07:10<12:27, 9.71s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 85%|███████████████████████████████████████████████████████████████████ | 432/509 [1:07:10<12:27, 9.71s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 85%|███████████████████████████████████████████████████████████████████ | 432/509 [1:07:10<12:27, 9.71s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 85%|███████████████████████████████████████████████████████████████████▏ | 433/509 [1:07:20<12:09, 9.60s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 85%|███████████████████████████████████████████████████████████████████▏ | 433/509 [1:07:20<12:09, 9.60s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4432, 'learning_rate': 0.0002586, 'epoch': 0.85} + 85%|███████████████████████████████████████████████████████████████████▏ | 433/509 [1:07:20<12:09, 9.60s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 85%|███████████████████████████████████████████████████████████████████▏ | 433/509 [1:07:20<12:09, 9.60s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 85%|███████████████████████████████████████████████████████████████████▎ | 434/509 [1:07:29<11:48, 9.45s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 85%|███████████████████████████████████████████████████████████████████▎ | 434/509 [1:07:29<11:48, 9.45s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5023, 'learning_rate': 0.00025919999999999996, 'epoch': 0.85} + 85%|███████████████████████████████████████████████████████████████████▎ | 434/509 [1:07:29<11:48, 9.45s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 85%|███████████████████████████████████████████████████████████████████▎ | 434/509 [1:07:29<11:48, 9.45s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 85%|███████████████████████████████████████████████████████████████████▌ | 435/509 [1:07:38<11:29, 9.32s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 85%|███████████████████████████████████████████████████████████████████▌ | 435/509 [1:07:38<11:29, 9.32s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4265, 'learning_rate': 0.00025979999999999997, 'epoch': 0.85} + 85%|███████████████████████████████████████████████████████████████████▌ | 435/509 [1:07:38<11:29, 9.32s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 85%|███████████████████████████████████████████████████████████████████▌ | 435/509 [1:07:38<11:29, 9.32s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 86%|███████████████████████████████████████████████████████████████████▋ | 436/509 [1:07:46<11:08, 9.16s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 86%|███████████████████████████████████████████████████████████████████▋ | 436/509 [1:07:46<11:08, 9.16s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3814, 'learning_rate': 0.0002604, 'epoch': 0.86} + 86%|███████████████████████████████████████████████████████████████████▋ | 436/509 [1:07:46<11:08, 9.16s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 86%|███████████████████████████████████████████████████████████████████▋ | 436/509 [1:07:46<11:08, 9.16s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 86%|███████████████████████████████████████████████████████████████████▊ | 437/509 [1:07:55<10:47, 8.99s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 86%|███████████████████████████████████████████████████████████████████▊ | 437/509 [1:07:55<10:47, 8.99s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.7153, 'learning_rate': 0.000261, 'epoch': 0.86} + 86%|███████████████████████████████████████████████████████████████████▊ | 437/509 [1:07:55<10:47, 8.99s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 86%|███████████████████████████████████████████████████████████████████▊ | 437/509 [1:07:55<10:47, 8.99s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 86%|███████████████████████████████████████████████████████████████████▊ | 437/509 [1:07:55<10:47, 8.99s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 86%|███████████████████████████████████████████████████████████████████▉ | 438/509 [1:08:03<10:25, 8.81s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 86%|███████████████████████████████████████████████████████████████████▉ | 438/509 [1:08:03<10:25, 8.81s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 86%|███████████████████████████████████████████████████████████████████▉ | 438/509 [1:08:03<10:25, 8.81s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 86%|███████████████████████████████████████████████████████████████████▉ | 438/509 [1:08:03<10:25, 8.81s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 86%|███████████████████████████████████████████████████████████████████▉ | 438/509 [1:08:03<10:25, 8.81s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 86%|████████████████████████████████████████████████████████████████████▏ | 439/509 [1:08:12<10:02, 8.61s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 86%|████████████████████████████████████████████████████████████████████▏ | 439/509 [1:08:12<10:02, 8.61s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 86%|████████████████████████████████████████████████████████████████████▏ | 439/509 [1:08:12<10:02, 8.61s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 86%|████████████████████████████████████████████████████████████████████▏ | 439/509 [1:08:12<10:02, 8.61s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 86%|████████████████████████████████████████████████████████████████████▏ | 439/509 [1:08:12<10:02, 8.61s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 86%|████████████████████████████████████████████████████████████████████▎ | 440/509 [1:08:19<09:37, 8.37s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 86%|████████████████████████████████████████████████████████████████████▎ | 440/509 [1:08:19<09:37, 8.37s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 86%|████████████████████████████████████████████████████████████████████▎ | 440/509 [1:08:19<09:37, 8.37s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 86%|████████████████████████████████████████████████████████████████████▎ | 440/509 [1:08:19<09:37, 8.37s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 86%|████████████████████████████████████████████████████████████████████▎ | 440/509 [1:08:19<09:37, 8.37s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 87%|████████████████████████████████████████████████████████████████████▍ | 441/509 [1:08:27<09:10, 8.10s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 87%|████████████████████████████████████████████████████████████████████▍ | 441/509 [1:08:27<09:10, 8.10s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:34:38,914 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:34:38,914 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 87%|████████████████████████████████████████████████████████████████████▌ | 442/509 [1:08:34<08:42, 7.79s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 87%|████████████████████████████████████████████████████████████████████▌ | 442/509 [1:08:34<08:42, 7.79s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 87%|████████████████████████████████████████████████████████████████████▌ | 442/509 [1:08:34<08:42, 7.79s/it]g-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:34:47,402 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:34:47,402 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2913, 'learning_rate': 0.0002646, 'epoch': 0.87} +[WARNING|modeling_utils.py:388] 2022-03-02 23:34:47,402 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:34:47,402 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:34:47,402 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:27:21,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 87%|████████████████████████████████████████████████████████████████████▉ | 444/509 [1:08:47<07:42, 7.12s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:34:55,179 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 87%|████████████████████████████████████████████████████████████████████▉ | 444/509 [1:08:47<07:42, 7.12s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:34:55,179 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:34:59,199 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:34:55,179 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:34:59,199 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:34:55,179 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6385, 'learning_rate': 0.00026579999999999996, 'epoch': 0.87} +[WARNING|modeling_utils.py:388] 2022-03-02 23:35:02,993 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:34:55,179 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:35:02,993 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:34:55,179 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 88%|█████████████████████████████████████████████████████████████████████▏ | 446/509 [1:08:57<06:26, 6.14s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:35:05,335 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:35:07,431 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:35:05,335 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:35:07,431 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:35:05,335 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 88%|█████████████████████████████████████████████████████████████████████▍ | 447/509 [1:09:02<05:45, 5.58s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:35:09,499 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:35:11,336 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:35:09,499 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:35:11,336 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:35:09,499 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 88%|█████████████████████████████████████████████████████████████████████▌ | 448/509 [1:09:06<05:07, 5.04s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:35:13,155 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 88%|█████████████████████████████████████████████████████████████████████▋ | 449/509 [1:09:09<04:30, 4.51s/it]g-point operations will not be computed-02 23:35:13,155 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 88%|█████████████████████████████████████████████████████████████████████▋ | 449/509 [1:09:09<04:30, 4.51s/it]g-point operations will not be computed-02 23:35:13,155 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:35:17,652 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:35:16,324 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:35:17,652 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:35:16,324 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 88%|█████████████████████████████████████████████████████████████████████▊ | 450/509 [1:09:12<04:04, 4.15s/it]g-point operations will not be computed-02 23:35:16,324 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + return torch.layer_norm(input, normalized_shape, weight, bias, eps, torch.backends.cudnn.enabled)2<04:04, 4.15s/it]Traceback (most recent call last):puted-02 23:35:16,324 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + return torch.layer_norm(input, normalized_shape, weight, bias, eps, torch.backends.cudnn.enabled)2<04:04, 4.15s/it]Traceback (most recent call last):puted-02 23:35:16,324 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + return torch.layer_norm(input, normalized_shape, weight, bias, eps, torch.backends.cudnn.enabled)2<04:04, 4.15s/it]Traceback (most recent call last):puted-02 23:35:16,324 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed \ No newline at end of file diff --git a/wandb/run-20220302_222605-10glutwr/files/requirements.txt b/wandb/run-20220302_222605-10glutwr/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..3974f97a24952deb24d97850f53367da9e7c347d --- /dev/null +++ b/wandb/run-20220302_222605-10glutwr/files/requirements.txt @@ -0,0 +1,184 @@ +absl-py==1.0.0 +aiohttp==3.8.1 +aiosignal==1.2.0 +anyio==3.5.0 +appdirs==1.4.4 +argon2-cffi-bindings==21.2.0 +argon2-cffi==21.3.0 +asttokens==2.0.5 +async-timeout==4.0.2 +attrs==21.4.0 +audioread==2.1.9 +babel==2.9.1 +backcall==0.2.0 +bitsandbytes-cuda113==0.26.0 +black==22.1.0 +bleach==4.1.0 +cachetools==5.0.0 +certifi==2021.10.8 +cffi==1.15.0 +charset-normalizer==2.0.11 +chex==0.1.0 +click==8.0.3 +clldutils==3.10.1 +colorlog==6.6.0 +csvw==1.11.0 +cycler==0.11.0 +datasets==1.18.3 +debugpy==1.5.1 +decorator==5.1.1 +defusedxml==0.7.1 +dill==0.3.4 +dlinfo==1.2.1 +dm-tree==0.1.6 +docker-pycreds==0.4.0 +entrypoints==0.4 +executing==0.8.2 +filelock==3.4.2 +flatbuffers==2.0 +flax==0.4.0 +fonttools==4.29.1 +frozenlist==1.3.0 +fsspec==2022.1.0 +gitdb==4.0.9 +gitpython==3.1.27 +google-auth-oauthlib==0.4.6 +google-auth==2.6.0 +grpcio==1.43.0 +huggingface-hub==0.4.0 +hypothesis==6.36.1 +idna==3.3 +importlib-metadata==4.10.1 +ipykernel==6.8.0 +ipython-genutils==0.2.0 +ipython==8.0.1 +ipywidgets==7.6.5 +isodate==0.6.1 +jax==0.2.28 +jaxlib==0.1.76+cuda11.cudnn82 +jedi==0.18.1 +jinja2==3.0.3 +jiwer==2.3.0 +joblib==1.1.0 +json5==0.9.6 +jsonschema==4.4.0 +jupyter-client==7.1.2 +jupyter-console==6.4.0 +jupyter-core==4.9.1 +jupyter-server==1.13.5 +jupyter==1.0.0 +jupyterlab-pygments==0.1.2 +jupyterlab-server==2.10.3 +jupyterlab-widgets==1.0.2 +jupyterlab==3.2.9 +kiwisolver==1.3.2 +librosa==0.8.1 +llvmlite==0.38.0 +markdown==3.3.6 +markupsafe==2.0.1 +matplotlib-inline==0.1.3 +matplotlib==3.5.1 +mistune==0.8.4 +msgpack==1.0.3 +multidict==6.0.2 +multiprocess==0.70.12.2 +mypy-extensions==0.4.3 +nbclassic==0.3.5 +nbclient==0.5.10 +nbconvert==6.4.1 +nbformat==5.1.3 +nest-asyncio==1.5.4 +notebook==6.4.8 +numba==0.55.1 +numpy==1.21.5 +oauthlib==3.2.0 +opt-einsum==3.3.0 +optax==0.1.0 +packaging==21.3 +pandas==1.4.0 +pandocfilters==1.5.0 +parso==0.8.3 +pathspec==0.9.0 +pathtools==0.1.2 +pexpect==4.8.0 +phonemizer==3.0.1 +pickleshare==0.7.5 +pillow==9.0.0 +pip==22.0.2 +pkg-resources==0.0.0 +platformdirs==2.4.1 +pooch==1.6.0 +prometheus-client==0.13.1 +promise==2.3 +prompt-toolkit==3.0.26 +protobuf==3.19.4 +psutil==5.9.0 +ptyprocess==0.7.0 +pure-eval==0.2.2 +pyarrow==6.0.1 +pyasn1-modules==0.2.8 +pyasn1==0.4.8 +pycparser==2.21 +pyctcdecode==0.3.0 +pygments==2.11.2 +pygtrie==2.4.2 +pyparsing==3.0.7 +pyrsistent==0.18.1 +python-dateutil==2.8.2 +python-levenshtein==0.12.2 +pytz==2021.3 +pyyaml==6.0 +pyzmq==22.3.0 +qtconsole==5.2.2 +qtpy==2.0.1 +regex==2022.1.18 +requests-oauthlib==1.3.1 +requests==2.27.1 +resampy==0.2.2 +rfc3986==2.0.0 +rsa==4.8 +sacremoses==0.0.47 +scikit-learn==1.0.2 +scipy==1.7.3 +segments==2.2.0 +send2trash==1.8.0 +sentry-sdk==1.5.6 +setuptools==44.1.1 +shortuuid==1.0.8 +six==1.16.0 +smmap==5.0.0 +sniffio==1.2.0 +sortedcontainers==2.4.0 +soundfile==0.10.3.post1 +stack-data==0.1.4 +tabulate==0.8.9 +tensorboard-data-server==0.6.1 +tensorboard-plugin-wit==1.8.1 +tensorboard==2.8.0 +termcolor==1.1.0 +terminado==0.13.1 +testpath==0.5.0 +threadpoolctl==3.1.0 +tokenizers==0.11.4 +tomli==2.0.0 +toolz==0.11.2 +torch==1.10.2+cu113 +torchaudio==0.10.2+cu113 +tornado==6.1 +tqdm==4.62.3 +traitlets==5.1.1 +transformers==4.17.0.dev0 +typing-extensions==3.10.0.2 +uritemplate==4.1.1 +urllib3==1.26.8 +wandb==0.12.10 +wcwidth==0.2.5 +webencodings==0.5.1 +websocket-client==1.2.3 +werkzeug==2.0.2 +wheel==0.37.1 +widgetsnbextension==3.5.2 +xxhash==2.0.2 +yarl==1.7.2 +yaspin==2.1.0 +zipp==3.7.0 \ No newline at end of file diff --git a/wandb/run-20220302_222605-10glutwr/files/wandb-metadata.json b/wandb/run-20220302_222605-10glutwr/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..c423126033ca110c025a092430c6f4e49e0946cf --- /dev/null +++ b/wandb/run-20220302_222605-10glutwr/files/wandb-metadata.json @@ -0,0 +1,60 @@ +{ + "os": "Linux-5.11.0-1028-gcp-x86_64-with-glibc2.33", + "python": "3.9.5", + "heartbeatAt": "2022-03-02T22:26:06.164288", + "startedAt": "2022-03-02T22:26:05.008436", + "docker": null, + "gpu": "Tesla V100-SXM2-16GB", + "gpu_count": 2, + "cpu_count": 16, + "cuda": null, + "args": [ + "--dataset_name=librispeech_asr", + "--model_name_or_path=./", + "--tokenizer_name=./", + "--dataset_config_name=clean", + "--train_split_name=train.100", + "--eval_split_name=validation", + "--output_dir=./", + "--preprocessing_num_workers=1", + "--length_column_name=input_length", + "--overwrite_output_dir", + "--num_train_epochs=1", + "--per_device_train_batch_size=14", + "--per_device_eval_batch_size=14", + "--gradient_accumulation_steps=4", + "--generation_max_length=40", + "--generation_num_beams=1", + "--learning_rate=3e-4", + "--warmup_steps=500", + "--evaluation_strategy=steps", + "--text_column_name=text", + "--save_steps=500", + "--eval_steps=500", + "--logging_steps=1", + "--save_total_limit=1", + "--freeze_feature_encoder", + "--gradient_checkpointing", + "--fp16", + "--group_by_length", + "--predict_with_generate", + "--do_lower_case", + "--do_train", + "--do_eval", + "--report_to=wandb", + "--push_to_hub", + "--use_auth_token" + ], + "state": "running", + "program": "/home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/run_speech_recognition_seq2seq.py", + "codePath": "run_speech_recognition_seq2seq.py", + "git": { + "remote": "https://huggingface.co/sanchit-gandhi/wav2vec2-gpt2-wandb-grid-search", + "commit": "8c7181143c175387040dc1a6ac2ddbc9179b550c" + }, + "email": "sanchit@huggingface.co", + "root": "/home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search", + "host": "sanchit--v100", + "username": "sanchit_huggingface_co", + "executable": "/home/sanchit_huggingface_co/gcp/bin/python" +} diff --git a/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json b/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..6f90a33eddfd1f291534db0b1045e139af878773 --- /dev/null +++ b/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json @@ -0,0 +1 @@ +{"train/loss": 4.706, "train/learning_rate": 0.0002688, "train/epoch": 0.88, "train/global_step": 450, "_runtime": 4153, "_timestamp": 1646264118, "_step": 449, "gradients/decoder.transformer.ln_f.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0, 5.0, 2.0, 6.0, 8.0, 7.0, 13.0, 14.0, 21.0, 25.0, 20.0, 40.0, 70.0, 102.0, 153.0, 157.0, 83.0, 73.0, 48.0, 33.0, 18.0, 28.0, 16.0, 20.0, 7.0, 6.0, 8.0, 6.0, 3.0, 4.0, 1.0, 5.0, 2.0, 1.0, 1.0, 1.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-269.8289489746094, -260.6154479980469, -251.4019317626953, -242.18841552734375, -232.97491455078125, -223.76141357421875, -214.5478973388672, -205.33438110351562, -196.12088012695312, -186.90737915039062, -177.69386291503906, -168.4803466796875, -159.266845703125, -150.0533447265625, -140.83982849121094, -131.62631225585938, -122.41281127929688, -113.19930267333984, -103.98579406738281, -94.77228546142578, -85.55877685546875, -76.34526824951172, -67.13175964355469, -57.918251037597656, -48.704742431640625, -39.491233825683594, -30.277725219726562, -21.06421661376953, -11.8507080078125, -2.6371994018554688, 6.5763092041015625, 15.789817810058594, 25.00335693359375, 34.21686553955078, 43.43037414550781, 52.643882751464844, 61.857391357421875, 71.0708999633789, 80.28440856933594, 89.49791717529297, 98.71142578125, 107.92493438720703, 117.13844299316406, 126.3519515991211, 135.56546020507812, 144.77896118164062, 153.9924774169922, 163.20599365234375, 172.41949462890625, 181.63299560546875, 190.8465118408203, 200.06002807617188, 209.27352905273438, 218.48703002929688, 227.70054626464844, 236.9140625, 246.1275634765625, 255.341064453125, 264.5545654296875, 273.7680969238281, 282.9815979003906, 292.1950988769531, 301.40863037109375, 310.62213134765625, 319.83563232421875]}, "gradients/decoder.transformer.ln_f.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 1.0, 1.0, 1.0, 2.0, 1.0, 2.0, 1.0, 0.0, 1.0, 4.0, 2.0, 7.0, 1.0, 5.0, 2.0, 2.0, 5.0, 9.0, 11.0, 22.0, 26.0, 38.0, 46.0, 59.0, 92.0, 104.0, 129.0, 81.0, 76.0, 62.0, 47.0, 35.0, 22.0, 26.0, 16.0, 15.0, 10.0, 7.0, 8.0, 11.0, 7.0, 1.0, 5.0, 2.0, 4.0, 2.0, 3.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1637.169677734375, -1582.742919921875, -1528.3162841796875, -1473.8895263671875, -1419.462890625, -1365.0361328125, -1310.609375, -1256.1827392578125, -1201.7559814453125, -1147.3292236328125, -1092.902587890625, -1038.475830078125, -984.0491333007812, -929.6224365234375, -875.1957397460938, -820.76904296875, -766.3423461914062, -711.9156494140625, -657.4889526367188, -603.062255859375, -548.635498046875, -494.20880126953125, -439.7821044921875, -385.3553771972656, -330.9286804199219, -276.5019836425781, -222.07525634765625, -167.6485595703125, -113.22184753417969, -58.795135498046875, -4.368438720703125, 50.05828857421875, 104.4849853515625, 158.9116973876953, 213.33840942382812, 267.7651062011719, 322.19183349609375, 376.6185302734375, 431.04522705078125, 485.4719543457031, 539.898681640625, 594.3253784179688, 648.7520751953125, 703.1788330078125, 757.6055297851562, 812.0322265625, 866.4589233398438, 920.8856201171875, 975.3123168945312, 1029.739013671875, 1084.165771484375, 1138.5924072265625, 1193.0191650390625, 1247.44580078125, 1301.87255859375, 1356.29931640625, 1410.7259521484375, 1465.1527099609375, 1519.579345703125, 1574.006103515625, 1628.4327392578125, 1682.8594970703125, 1737.2861328125, 1791.712890625, 1846.1396484375]}, "gradients/decoder.transformer.h.23.mlp.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 2.0, 1.0, 1.0, 1.0, 2.0, 3.0, 1.0, 4.0, 5.0, 1.0, 4.0, 4.0, 7.0, 2.0, 6.0, 10.0, 11.0, 23.0, 28.0, 18.0, 32.0, 35.0, 61.0, 70.0, 67.0, 73.0, 73.0, 63.0, 75.0, 39.0, 47.0, 44.0, 37.0, 19.0, 14.0, 26.0, 21.0, 13.0, 15.0, 14.0, 13.0, 5.0, 6.0, 4.0, 5.0, 4.0, 2.0, 2.0, 1.0, 1.0, 0.0, 3.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-36.75, -35.56005859375, -34.3701171875, -33.18017578125, -31.990234375, -30.80029296875, -29.6103515625, -28.42041015625, -27.23046875, -26.04052734375, -24.8505859375, -23.66064453125, -22.470703125, -21.28076171875, -20.0908203125, -18.90087890625, -17.7109375, -16.52099609375, -15.3310546875, -14.14111328125, -12.951171875, -11.76123046875, -10.5712890625, -9.38134765625, -8.19140625, -7.00146484375, -5.8115234375, -4.62158203125, -3.431640625, -2.24169921875, -1.0517578125, 0.13818359375, 1.328125, 2.51806640625, 3.7080078125, 4.89794921875, 6.087890625, 7.27783203125, 8.4677734375, 9.65771484375, 10.84765625, 12.03759765625, 13.2275390625, 14.41748046875, 15.607421875, 16.79736328125, 17.9873046875, 19.17724609375, 20.3671875, 21.55712890625, 22.7470703125, 23.93701171875, 25.126953125, 26.31689453125, 27.5068359375, 28.69677734375, 29.88671875, 31.07666015625, 32.2666015625, 33.45654296875, 34.646484375, 35.83642578125, 37.0263671875, 38.21630859375, 39.40625]}, "gradients/decoder.transformer.h.23.mlp.c_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 2.0, 1.0, 3.0, 3.0, 4.0, 5.0, 6.0, 3.0, 9.0, 11.0, 15.0, 19.0, 33.0, 39.0, 58.0, 69.0, 87.0, 129.0, 188.0, 255.0, 382.0, 655.0, 1048.0, 1970.0, 4109.0, 10478.0, 41507.0, 3491079.0, 594508.0, 29934.0, 8952.0, 3760.0, 1949.0, 1092.0, 622.0, 420.0, 256.0, 196.0, 126.0, 89.0, 62.0, 43.0, 33.0, 25.0, 14.0, 15.0, 9.0, 7.0, 6.0, 4.0, 2.0, 3.0, 1.0, 1.0, 0.0, 4.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-273.5, -264.640625, -255.78125, -246.921875, -238.0625, -229.203125, -220.34375, -211.484375, -202.625, -193.765625, -184.90625, -176.046875, -167.1875, -158.328125, -149.46875, -140.609375, -131.75, -122.890625, -114.03125, -105.171875, -96.3125, -87.453125, -78.59375, -69.734375, -60.875, -52.015625, -43.15625, -34.296875, -25.4375, -16.578125, -7.71875, 1.140625, 10.0, 18.859375, 27.71875, 36.578125, 45.4375, 54.296875, 63.15625, 72.015625, 80.875, 89.734375, 98.59375, 107.453125, 116.3125, 125.171875, 134.03125, 142.890625, 151.75, 160.609375, 169.46875, 178.328125, 187.1875, 196.046875, 204.90625, 213.765625, 222.625, 231.484375, 240.34375, 249.203125, 258.0625, 266.921875, 275.78125, 284.640625, 293.5]}, "gradients/decoder.transformer.h.23.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 4.0, 2.0, 6.0, 4.0, 5.0, 6.0, 9.0, 18.0, 16.0, 26.0, 32.0, 49.0, 56.0, 78.0, 84.0, 169.0, 215.0, 411.0, 798.0, 938.0, 436.0, 210.0, 126.0, 90.0, 65.0, 65.0, 36.0, 25.0, 24.0, 17.0, 14.0, 14.0, 10.0, 11.0, 5.0, 3.0, 3.0, 5.0, 0.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-156.25, -151.669921875, -147.08984375, -142.509765625, -137.9296875, -133.349609375, -128.76953125, -124.189453125, -119.609375, -115.029296875, -110.44921875, -105.869140625, -101.2890625, -96.708984375, -92.12890625, -87.548828125, -82.96875, -78.388671875, -73.80859375, -69.228515625, -64.6484375, -60.068359375, -55.48828125, -50.908203125, -46.328125, -41.748046875, -37.16796875, -32.587890625, -28.0078125, -23.427734375, -18.84765625, -14.267578125, -9.6875, -5.107421875, -0.52734375, 4.052734375, 8.6328125, 13.212890625, 17.79296875, 22.373046875, 26.953125, 31.533203125, 36.11328125, 40.693359375, 45.2734375, 49.853515625, 54.43359375, 59.013671875, 63.59375, 68.173828125, 72.75390625, 77.333984375, 81.9140625, 86.494140625, 91.07421875, 95.654296875, 100.234375, 104.814453125, 109.39453125, 113.974609375, 118.5546875, 123.134765625, 127.71484375, 132.294921875, 136.875]}, "gradients/decoder.transformer.h.23.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 4.0, 3.0, 4.0, 5.0, 8.0, 2.0, 3.0, 8.0, 11.0, 15.0, 16.0, 16.0, 22.0, 34.0, 37.0, 60.0, 82.0, 133.0, 191.0, 326.0, 727.0, 2196.0, 14517.0, 579509.0, 3565348.0, 25867.0, 3095.0, 916.0, 370.0, 238.0, 159.0, 85.0, 70.0, 51.0, 43.0, 19.0, 27.0, 22.0, 18.0, 13.0, 7.0, 6.0, 5.0, 2.0, 3.0, 0.0, 1.0, 2.0, 2.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-462.0, -446.65625, -431.3125, -415.96875, -400.625, -385.28125, -369.9375, -354.59375, -339.25, -323.90625, -308.5625, -293.21875, -277.875, -262.53125, -247.1875, -231.84375, -216.5, -201.15625, -185.8125, -170.46875, -155.125, -139.78125, -124.4375, -109.09375, -93.75, -78.40625, -63.0625, -47.71875, -32.375, -17.03125, -1.6875, 13.65625, 29.0, 44.34375, 59.6875, 75.03125, 90.375, 105.71875, 121.0625, 136.40625, 151.75, 167.09375, 182.4375, 197.78125, 213.125, 228.46875, 243.8125, 259.15625, 274.5, 289.84375, 305.1875, 320.53125, 335.875, 351.21875, 366.5625, 381.90625, 397.25, 412.59375, 427.9375, 443.28125, 458.625, 473.96875, 489.3125, 504.65625, 520.0]}, "gradients/decoder.transformer.h.23.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 0.0, 3.0, 22.0, 63.0, 266.0, 498.0, 130.0, 21.0, 9.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-684.9306640625, -649.278564453125, -613.62646484375, -577.974365234375, -542.3222045898438, -506.67010498046875, -471.01800537109375, -435.36590576171875, -399.7137756347656, -364.0616760253906, -328.4095458984375, -292.7574462890625, -257.1053466796875, -221.45321655273438, -185.80111694335938, -150.1490020751953, -114.49688720703125, -78.84477233886719, -43.192665100097656, -7.540557861328125, 28.111557006835938, 63.763671875, 99.415771484375, 135.06788635253906, 170.72000122070312, 206.3721160888672, 242.02423095703125, 277.67633056640625, 313.32843017578125, 348.9805603027344, 384.6326599121094, 420.2847900390625, 455.9368896484375, 491.5889892578125, 527.2410888671875, 562.8931884765625, 598.5453491210938, 634.1974487304688, 669.8495483398438, 705.5016479492188, 741.15380859375, 776.805908203125, 812.4580078125, 848.110107421875, 883.7622680664062, 919.4143676757812, 955.0664672851562, 990.7185668945312, 1026.37060546875, 1062.022705078125, 1097.6748046875, 1133.326904296875, 1168.97900390625, 1204.631103515625, 1240.283203125, 1275.9354248046875, 1311.5875244140625, 1347.2396240234375, 1382.8917236328125, 1418.5438232421875, 1454.1959228515625, 1489.84814453125, 1525.500244140625, 1561.15234375, 1596.804443359375]}, "gradients/decoder.transformer.h.23.ln_2.bias": {"_type": "histogram", "values": [2.0, 3.0, 1.0, 0.0, 2.0, 1.0, 2.0, 4.0, 5.0, 3.0, 3.0, 10.0, 12.0, 10.0, 15.0, 16.0, 19.0, 19.0, 22.0, 20.0, 25.0, 23.0, 25.0, 34.0, 33.0, 23.0, 41.0, 28.0, 33.0, 43.0, 35.0, 40.0, 51.0, 32.0, 33.0, 32.0, 35.0, 35.0, 25.0, 23.0, 25.0, 28.0, 25.0, 19.0, 14.0, 15.0, 12.0, 5.0, 15.0, 10.0, 8.0, 6.0, 7.0, 6.0, 0.0, 2.0, 2.0, 2.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0], "bins": [-369.4579162597656, -357.46026611328125, -345.46258544921875, -333.4649353027344, -321.46728515625, -309.4696044921875, -297.4719543457031, -285.47430419921875, -273.47662353515625, -261.4789733886719, -249.48129272460938, -237.483642578125, -225.48597717285156, -213.48831176757812, -201.49066162109375, -189.4929962158203, -177.49533081054688, -165.49766540527344, -153.5, -141.50234985351562, -129.5046844482422, -117.50701904296875, -105.50936126708984, -93.51170349121094, -81.5140380859375, -69.51637268066406, -57.518714904785156, -45.521053314208984, -33.52339172363281, -21.52573013305664, -9.528068542480469, 2.4695892333984375, 14.46728515625, 26.464946746826172, 38.462608337402344, 50.460269927978516, 62.45793151855469, 74.45559692382812, 86.45325469970703, 98.45091247558594, 110.44857788085938, 122.44624328613281, 134.44390869140625, 146.44155883789062, 158.43922424316406, 170.4368896484375, 182.43453979492188, 194.4322052001953, 206.42987060546875, 218.4275360107422, 230.42520141601562, 242.4228515625, 254.42051696777344, 266.4181823730469, 278.41583251953125, 290.41351318359375, 302.4111633300781, 314.4088134765625, 326.406494140625, 338.4041442871094, 350.40179443359375, 362.39947509765625, 374.3971252441406, 386.394775390625, 398.3924560546875]}, "gradients/decoder.transformer.h.23.crossattention.c_proj.bias": {"_type": "histogram", "values": [3.0, 0.0, 2.0, 1.0, 0.0, 1.0, 1.0, 2.0, 2.0, 2.0, 5.0, 2.0, 2.0, 5.0, 6.0, 5.0, 4.0, 11.0, 10.0, 18.0, 18.0, 19.0, 23.0, 29.0, 39.0, 49.0, 50.0, 59.0, 70.0, 79.0, 59.0, 52.0, 70.0, 45.0, 41.0, 24.0, 40.0, 25.0, 20.0, 18.0, 17.0, 16.0, 10.0, 10.0, 14.0, 10.0, 3.0, 6.0, 7.0, 4.0, 4.0, 3.0, 2.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0], "bins": [-34.4375, -33.3056640625, -32.173828125, -31.0419921875, -29.91015625, -28.7783203125, -27.646484375, -26.5146484375, -25.3828125, -24.2509765625, -23.119140625, -21.9873046875, -20.85546875, -19.7236328125, -18.591796875, -17.4599609375, -16.328125, -15.1962890625, -14.064453125, -12.9326171875, -11.80078125, -10.6689453125, -9.537109375, -8.4052734375, -7.2734375, -6.1416015625, -5.009765625, -3.8779296875, -2.74609375, -1.6142578125, -0.482421875, 0.6494140625, 1.78125, 2.9130859375, 4.044921875, 5.1767578125, 6.30859375, 7.4404296875, 8.572265625, 9.7041015625, 10.8359375, 11.9677734375, 13.099609375, 14.2314453125, 15.36328125, 16.4951171875, 17.626953125, 18.7587890625, 19.890625, 21.0224609375, 22.154296875, 23.2861328125, 24.41796875, 25.5498046875, 26.681640625, 27.8134765625, 28.9453125, 30.0771484375, 31.208984375, 32.3408203125, 33.47265625, 34.6044921875, 35.736328125, 36.8681640625, 38.0]}, "gradients/decoder.transformer.h.23.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 3.0, 2.0, 3.0, 4.0, 2.0, 6.0, 7.0, 12.0, 12.0, 14.0, 41.0, 46.0, 84.0, 146.0, 239.0, 397.0, 683.0, 1235.0, 2254.0, 4792.0, 12840.0, 78165.0, 837398.0, 87100.0, 13159.0, 4802.0, 2282.0, 1168.0, 680.0, 387.0, 227.0, 146.0, 82.0, 52.0, 33.0, 24.0, 10.0, 6.0, 3.0, 5.0, 5.0, 3.0, 2.0, 1.0, 2.0, 2.0, 0.0, 1.0, 2.0, 0.0, 0.0, 2.0], "bins": [-21.0, -20.3916015625, -19.783203125, -19.1748046875, -18.56640625, -17.9580078125, -17.349609375, -16.7412109375, -16.1328125, -15.5244140625, -14.916015625, -14.3076171875, -13.69921875, -13.0908203125, -12.482421875, -11.8740234375, -11.265625, -10.6572265625, -10.048828125, -9.4404296875, -8.83203125, -8.2236328125, -7.615234375, -7.0068359375, -6.3984375, -5.7900390625, -5.181640625, -4.5732421875, -3.96484375, -3.3564453125, -2.748046875, -2.1396484375, -1.53125, -0.9228515625, -0.314453125, 0.2939453125, 0.90234375, 1.5107421875, 2.119140625, 2.7275390625, 3.3359375, 3.9443359375, 4.552734375, 5.1611328125, 5.76953125, 6.3779296875, 6.986328125, 7.5947265625, 8.203125, 8.8115234375, 9.419921875, 10.0283203125, 10.63671875, 11.2451171875, 11.853515625, 12.4619140625, 13.0703125, 13.6787109375, 14.287109375, 14.8955078125, 15.50390625, 16.1123046875, 16.720703125, 17.3291015625, 17.9375]}, "gradients/decoder.transformer.h.23.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 2.0, 1.0, 0.0, 2.0, 2.0, 1.0, 2.0, 4.0, 5.0, 13.0, 8.0, 8.0, 10.0, 12.0, 13.0, 17.0, 13.0, 15.0, 18.0, 22.0, 26.0, 20.0, 25.0, 26.0, 50.0, 33.0, 44.0, 37.0, 39.0, 31.0, 1062.0, 40.0, 35.0, 31.0, 41.0, 36.0, 39.0, 31.0, 29.0, 29.0, 28.0, 21.0, 17.0, 18.0, 11.0, 18.0, 11.0, 4.0, 7.0, 6.0, 2.0, 8.0, 7.0, 1.0, 1.0, 4.0, 4.0, 0.0, 1.0, 0.0, 3.0, 2.0, 1.0], "bins": [-18.96875, -18.36181640625, -17.7548828125, -17.14794921875, -16.541015625, -15.93408203125, -15.3271484375, -14.72021484375, -14.11328125, -13.50634765625, -12.8994140625, -12.29248046875, -11.685546875, -11.07861328125, -10.4716796875, -9.86474609375, -9.2578125, -8.65087890625, -8.0439453125, -7.43701171875, -6.830078125, -6.22314453125, -5.6162109375, -5.00927734375, -4.40234375, -3.79541015625, -3.1884765625, -2.58154296875, -1.974609375, -1.36767578125, -0.7607421875, -0.15380859375, 0.453125, 1.06005859375, 1.6669921875, 2.27392578125, 2.880859375, 3.48779296875, 4.0947265625, 4.70166015625, 5.30859375, 5.91552734375, 6.5224609375, 7.12939453125, 7.736328125, 8.34326171875, 8.9501953125, 9.55712890625, 10.1640625, 10.77099609375, 11.3779296875, 11.98486328125, 12.591796875, 13.19873046875, 13.8056640625, 14.41259765625, 15.01953125, 15.62646484375, 16.2333984375, 16.84033203125, 17.447265625, 18.05419921875, 18.6611328125, 19.26806640625, 19.875]}, "gradients/decoder.transformer.h.23.crossattention.c_attn.weight": {"_type": "histogram", "values": [2.0, 3.0, 1.0, 3.0, 2.0, 5.0, 3.0, 5.0, 7.0, 12.0, 19.0, 24.0, 37.0, 51.0, 72.0, 110.0, 153.0, 210.0, 241.0, 351.0, 532.0, 713.0, 1068.0, 1580.0, 2582.0, 4338.0, 7781.0, 15516.0, 35654.0, 100121.0, 496936.0, 1228684.0, 120043.0, 41323.0, 17411.0, 8558.0, 4701.0, 2772.0, 1801.0, 1165.0, 763.0, 498.0, 351.0, 234.0, 169.0, 138.0, 101.0, 73.0, 75.0, 45.0, 30.0, 20.0, 12.0, 14.0, 9.0, 9.0, 5.0, 3.0, 5.0, 3.0, 2.0, 1.0, 1.0, 1.0], "bins": [-4.28125, -4.14349365234375, -4.0057373046875, -3.86798095703125, -3.730224609375, -3.59246826171875, -3.4547119140625, -3.31695556640625, -3.17919921875, -3.04144287109375, -2.9036865234375, -2.76593017578125, -2.628173828125, -2.49041748046875, -2.3526611328125, -2.21490478515625, -2.0771484375, -1.93939208984375, -1.8016357421875, -1.66387939453125, -1.526123046875, -1.38836669921875, -1.2506103515625, -1.11285400390625, -0.97509765625, -0.83734130859375, -0.6995849609375, -0.56182861328125, -0.424072265625, -0.28631591796875, -0.1485595703125, -0.01080322265625, 0.126953125, 0.26470947265625, 0.4024658203125, 0.54022216796875, 0.677978515625, 0.81573486328125, 0.9534912109375, 1.09124755859375, 1.22900390625, 1.36676025390625, 1.5045166015625, 1.64227294921875, 1.780029296875, 1.91778564453125, 2.0555419921875, 2.19329833984375, 2.3310546875, 2.46881103515625, 2.6065673828125, 2.74432373046875, 2.882080078125, 3.01983642578125, 3.1575927734375, 3.29534912109375, 3.43310546875, 3.57086181640625, 3.7086181640625, 3.84637451171875, 3.984130859375, 4.12188720703125, 4.2596435546875, 4.39739990234375, 4.53515625]}, "gradients/decoder.transformer.h.23.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 2.0, 1.0, 3.0, 1.0, 0.0, 0.0, 2.0, 2.0, 3.0, 7.0, 4.0, 3.0, 3.0, 2.0, 6.0, 6.0, 5.0, 7.0, 10.0, 8.0, 9.0, 10.0, 12.0, 21.0, 18.0, 43.0, 54.0, 62.0, 77.0, 103.0, 129.0, 82.0, 62.0, 52.0, 30.0, 30.0, 27.0, 20.0, 18.0, 12.0, 11.0, 10.0, 10.0, 3.0, 7.0, 9.0, 2.0, 4.0, 2.0, 4.0, 3.0, 0.0, 1.0, 1.0, 0.0, 2.0, 2.0, 1.0, 4.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.7470703125, -0.7230682373046875, -0.699066162109375, -0.6750640869140625, -0.65106201171875, -0.6270599365234375, -0.603057861328125, -0.5790557861328125, -0.5550537109375, -0.5310516357421875, -0.507049560546875, -0.4830474853515625, -0.45904541015625, -0.4350433349609375, -0.411041259765625, -0.3870391845703125, -0.363037109375, -0.3390350341796875, -0.315032958984375, -0.2910308837890625, -0.26702880859375, -0.2430267333984375, -0.219024658203125, -0.1950225830078125, -0.1710205078125, -0.1470184326171875, -0.123016357421875, -0.0990142822265625, -0.07501220703125, -0.0510101318359375, -0.027008056640625, -0.0030059814453125, 0.02099609375, 0.0449981689453125, 0.069000244140625, 0.0930023193359375, 0.11700439453125, 0.1410064697265625, 0.165008544921875, 0.1890106201171875, 0.2130126953125, 0.2370147705078125, 0.261016845703125, 0.2850189208984375, 0.30902099609375, 0.3330230712890625, 0.357025146484375, 0.3810272216796875, 0.405029296875, 0.4290313720703125, 0.453033447265625, 0.4770355224609375, 0.50103759765625, 0.5250396728515625, 0.549041748046875, 0.5730438232421875, 0.5970458984375, 0.6210479736328125, 0.645050048828125, 0.6690521240234375, 0.69305419921875, 0.7170562744140625, 0.741058349609375, 0.7650604248046875, 0.7890625]}, "gradients/decoder.transformer.h.23.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 4.0, 1.0, 2.0, 2.0, 0.0, 1.0, 1.0, 0.0, 3.0, 3.0, 3.0, 4.0, 2.0, 9.0, 6.0, 4.0, 9.0, 11.0, 11.0, 13.0, 21.0, 35.0, 50.0, 71.0, 96.0, 151.0, 220.0, 586.0, 934147.0, 111831.0, 554.0, 244.0, 161.0, 99.0, 49.0, 39.0, 32.0, 12.0, 10.0, 10.0, 10.0, 7.0, 5.0, 5.0, 7.0, 2.0, 2.0, 4.0, 4.0, 6.0, 4.0, 1.0, 3.0, 0.0, 0.0, 1.0, 2.0, 2.0, 2.0, 1.0], "bins": [-21.96875, -21.30224609375, -20.6357421875, -19.96923828125, -19.302734375, -18.63623046875, -17.9697265625, -17.30322265625, -16.63671875, -15.97021484375, -15.3037109375, -14.63720703125, -13.970703125, -13.30419921875, -12.6376953125, -11.97119140625, -11.3046875, -10.63818359375, -9.9716796875, -9.30517578125, -8.638671875, -7.97216796875, -7.3056640625, -6.63916015625, -5.97265625, -5.30615234375, -4.6396484375, -3.97314453125, -3.306640625, -2.64013671875, -1.9736328125, -1.30712890625, -0.640625, 0.02587890625, 0.6923828125, 1.35888671875, 2.025390625, 2.69189453125, 3.3583984375, 4.02490234375, 4.69140625, 5.35791015625, 6.0244140625, 6.69091796875, 7.357421875, 8.02392578125, 8.6904296875, 9.35693359375, 10.0234375, 10.68994140625, 11.3564453125, 12.02294921875, 12.689453125, 13.35595703125, 14.0224609375, 14.68896484375, 15.35546875, 16.02197265625, 16.6884765625, 17.35498046875, 18.021484375, 18.68798828125, 19.3544921875, 20.02099609375, 20.6875]}, "gradients/decoder.transformer.h.23.ln_cross_attn.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 31.0, 982.0, 4.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-11.8468017578125, -11.068922996520996, -10.291044235229492, -9.513166427612305, -8.735286712646484, -7.957408428192139, -7.179530143737793, -6.401651382446289, -5.623772621154785, -4.845893859863281, -4.068015098571777, -3.2901368141174316, -2.5122580528259277, -1.7343792915344238, -0.9565010070800781, -0.17862224578857422, 0.5992565155029297, 1.377135157585144, 2.1550137996673584, 2.932892322540283, 3.710771083831787, 4.488649845123291, 5.266528129577637, 6.044406890869141, 6.8222856521606445, 7.600164413452148, 8.378043174743652, 9.155921936035156, 9.933799743652344, 10.711679458618164, 11.489557266235352, 12.267436027526855, 13.04531478881836, 13.823193550109863, 14.601072311401367, 15.378950119018555, 16.156829833984375, 16.934707641601562, 17.71258544921875, 18.49046516418457, 19.26834487915039, 20.046222686767578, 20.8241024017334, 21.601980209350586, 22.379859924316406, 23.157737731933594, 23.93561553955078, 24.7134952545166, 25.49137306213379, 26.269250869750977, 27.047130584716797, 27.825008392333984, 28.602888107299805, 29.380765914916992, 30.158645629882812, 30.9365234375, 31.714401245117188, 32.492279052734375, 33.27015686035156, 34.048038482666016, 34.8259162902832, 35.60379409790039, 36.38167190551758, 37.15955352783203, 37.93743133544922]}, "gradients/decoder.transformer.h.23.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 6.0, 1.0, 4.0, 2.0, 2.0, 6.0, 12.0, 13.0, 19.0, 36.0, 32.0, 42.0, 38.0, 39.0, 59.0, 65.0, 68.0, 72.0, 59.0, 57.0, 63.0, 51.0, 55.0, 35.0, 29.0, 42.0, 26.0, 16.0, 19.0, 12.0, 10.0, 8.0, 6.0, 3.0, 4.0, 2.0, 5.0, 0.0, 1.0, 1.0], "bins": [-3.3585169315338135, -3.2775042057037354, -3.196491241455078, -3.115478515625, -3.0344655513763428, -2.9534528255462646, -2.8724398612976074, -2.7914271354675293, -2.710414409637451, -2.629401683807373, -2.548388719558716, -2.4673759937286377, -2.3863630294799805, -2.3053503036499023, -2.224337339401245, -2.143324613571167, -2.0623116493225098, -1.981298804283142, -1.9002859592437744, -1.8192731142044067, -1.738260269165039, -1.657247543334961, -1.5762346982955933, -1.4952218532562256, -1.414209008216858, -1.3331961631774902, -1.2521833181381226, -1.1711704730987549, -1.0901577472686768, -1.0091447830200195, -0.9281320571899414, -0.8471192121505737, -0.7661066055297852, -0.6850937604904175, -0.6040809154510498, -0.5230681300163269, -0.44205528497695923, -0.36104243993759155, -0.28002962470054626, -0.19901680946350098, -0.1180039644241333, -0.03699113428592682, 0.04402169585227966, 0.12503452599048615, 0.20604735612869263, 0.2870602011680603, 0.3680730164051056, 0.4490858316421509, 0.5300986766815186, 0.6111115217208862, 0.6921243667602539, 0.7731371521949768, 0.8541499972343445, 0.9351628422737122, 1.016175627708435, 1.0971884727478027, 1.1782013177871704, 1.259214162826538, 1.3402270078659058, 1.4212398529052734, 1.5022525787353516, 1.5832655429840088, 1.664278268814087, 1.7452911138534546, 1.8263039588928223]}, "gradients/decoder.transformer.h.23.attn.c_proj.bias": {"_type": "histogram", "values": [3.0, 0.0, 2.0, 1.0, 0.0, 1.0, 1.0, 2.0, 2.0, 2.0, 5.0, 2.0, 2.0, 5.0, 6.0, 5.0, 4.0, 11.0, 10.0, 18.0, 18.0, 19.0, 23.0, 29.0, 39.0, 48.0, 51.0, 58.0, 72.0, 78.0, 58.0, 52.0, 70.0, 46.0, 41.0, 23.0, 40.0, 25.0, 20.0, 19.0, 17.0, 16.0, 10.0, 10.0, 14.0, 8.0, 5.0, 6.0, 7.0, 4.0, 4.0, 3.0, 2.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0], "bins": [-34.4375, -33.30615234375, -32.1748046875, -31.04345703125, -29.912109375, -28.78076171875, -27.6494140625, -26.51806640625, -25.38671875, -24.25537109375, -23.1240234375, -21.99267578125, -20.861328125, -19.72998046875, -18.5986328125, -17.46728515625, -16.3359375, -15.20458984375, -14.0732421875, -12.94189453125, -11.810546875, -10.67919921875, -9.5478515625, -8.41650390625, -7.28515625, -6.15380859375, -5.0224609375, -3.89111328125, -2.759765625, -1.62841796875, -0.4970703125, 0.63427734375, 1.765625, 2.89697265625, 4.0283203125, 5.15966796875, 6.291015625, 7.42236328125, 8.5537109375, 9.68505859375, 10.81640625, 11.94775390625, 13.0791015625, 14.21044921875, 15.341796875, 16.47314453125, 17.6044921875, 18.73583984375, 19.8671875, 20.99853515625, 22.1298828125, 23.26123046875, 24.392578125, 25.52392578125, 26.6552734375, 27.78662109375, 28.91796875, 30.04931640625, 31.1806640625, 32.31201171875, 33.443359375, 34.57470703125, 35.7060546875, 36.83740234375, 37.96875]}, "gradients/decoder.transformer.h.23.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 3.0, 3.0, 4.0, 5.0, 10.0, 7.0, 10.0, 17.0, 19.0, 23.0, 35.0, 50.0, 44.0, 73.0, 96.0, 156.0, 187.0, 251.0, 364.0, 570.0, 859.0, 1718.0, 3600.0, 9531.0, 48710.0, 882839.0, 78184.0, 12086.0, 4105.0, 1946.0, 1017.0, 623.0, 400.0, 265.0, 188.0, 137.0, 107.0, 72.0, 58.0, 47.0, 39.0, 24.0, 18.0, 17.0, 17.0, 7.0, 9.0, 6.0, 3.0, 4.0, 2.0, 2.0, 0.0, 2.0, 1.0, 0.0, 1.0], "bins": [-282.0, -273.34765625, -264.6953125, -256.04296875, -247.390625, -238.73828125, -230.0859375, -221.43359375, -212.78125, -204.12890625, -195.4765625, -186.82421875, -178.171875, -169.51953125, -160.8671875, -152.21484375, -143.5625, -134.91015625, -126.2578125, -117.60546875, -108.953125, -100.30078125, -91.6484375, -82.99609375, -74.34375, -65.69140625, -57.0390625, -48.38671875, -39.734375, -31.08203125, -22.4296875, -13.77734375, -5.125, 3.52734375, 12.1796875, 20.83203125, 29.484375, 38.13671875, 46.7890625, 55.44140625, 64.09375, 72.74609375, 81.3984375, 90.05078125, 98.703125, 107.35546875, 116.0078125, 124.66015625, 133.3125, 141.96484375, 150.6171875, 159.26953125, 167.921875, 176.57421875, 185.2265625, 193.87890625, 202.53125, 211.18359375, 219.8359375, 228.48828125, 237.140625, 245.79296875, 254.4453125, 263.09765625, 271.75]}, "gradients/decoder.transformer.h.23.attn.c_attn.bias": {"_type": "histogram", "values": [3.0, 1.0, 0.0, 0.0, 0.0, 6.0, 3.0, 2.0, 1.0, 7.0, 4.0, 6.0, 7.0, 8.0, 6.0, 9.0, 10.0, 15.0, 12.0, 22.0, 21.0, 24.0, 27.0, 26.0, 27.0, 31.0, 33.0, 43.0, 44.0, 55.0, 50.0, 90.0, 1862.0, 130.0, 55.0, 37.0, 50.0, 33.0, 30.0, 36.0, 21.0, 30.0, 28.0, 18.0, 25.0, 17.0, 25.0, 15.0, 14.0, 11.0, 9.0, 7.0, 5.0, 4.0, 2.0, 4.0, 4.0, 2.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-103.375, -100.2138671875, -97.052734375, -93.8916015625, -90.73046875, -87.5693359375, -84.408203125, -81.2470703125, -78.0859375, -74.9248046875, -71.763671875, -68.6025390625, -65.44140625, -62.2802734375, -59.119140625, -55.9580078125, -52.796875, -49.6357421875, -46.474609375, -43.3134765625, -40.15234375, -36.9912109375, -33.830078125, -30.6689453125, -27.5078125, -24.3466796875, -21.185546875, -18.0244140625, -14.86328125, -11.7021484375, -8.541015625, -5.3798828125, -2.21875, 0.9423828125, 4.103515625, 7.2646484375, 10.42578125, 13.5869140625, 16.748046875, 19.9091796875, 23.0703125, 26.2314453125, 29.392578125, 32.5537109375, 35.71484375, 38.8759765625, 42.037109375, 45.1982421875, 48.359375, 51.5205078125, 54.681640625, 57.8427734375, 61.00390625, 64.1650390625, 67.326171875, 70.4873046875, 73.6484375, 76.8095703125, 79.970703125, 83.1318359375, 86.29296875, 89.4541015625, 92.615234375, 95.7763671875, 98.9375]}, "gradients/decoder.transformer.h.23.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 2.0, 1.0, 2.0, 6.0, 1.0, 2.0, 3.0, 5.0, 8.0, 9.0, 11.0, 8.0, 16.0, 22.0, 18.0, 17.0, 17.0, 26.0, 29.0, 29.0, 39.0, 55.0, 73.0, 182.0, 456.0, 2233.0, 109923.0, 3024656.0, 6349.0, 781.0, 258.0, 113.0, 68.0, 45.0, 34.0, 32.0, 22.0, 22.0, 25.0, 24.0, 16.0, 11.0, 12.0, 8.0, 7.0, 6.0, 8.0, 5.0, 5.0, 7.0, 5.0, 1.0, 2.0, 3.0, 4.0, 0.0, 0.0, 0.0, 1.0, 3.0], "bins": [-486.0, -470.42578125, -454.8515625, -439.27734375, -423.703125, -408.12890625, -392.5546875, -376.98046875, -361.40625, -345.83203125, -330.2578125, -314.68359375, -299.109375, -283.53515625, -267.9609375, -252.38671875, -236.8125, -221.23828125, -205.6640625, -190.08984375, -174.515625, -158.94140625, -143.3671875, -127.79296875, -112.21875, -96.64453125, -81.0703125, -65.49609375, -49.921875, -34.34765625, -18.7734375, -3.19921875, 12.375, 27.94921875, 43.5234375, 59.09765625, 74.671875, 90.24609375, 105.8203125, 121.39453125, 136.96875, 152.54296875, 168.1171875, 183.69140625, 199.265625, 214.83984375, 230.4140625, 245.98828125, 261.5625, 277.13671875, 292.7109375, 308.28515625, 323.859375, 339.43359375, 355.0078125, 370.58203125, 386.15625, 401.73046875, 417.3046875, 432.87890625, 448.453125, 464.02734375, 479.6015625, 495.17578125, 510.75]}, "gradients/decoder.transformer.h.23.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 94.0, 913.0, 8.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-3376.05322265625, -3290.26806640625, -3204.483154296875, -3118.697998046875, -3032.912841796875, -2947.127685546875, -2861.3427734375, -2775.5576171875, -2689.7724609375, -2603.9873046875, -2518.202392578125, -2432.417236328125, -2346.632080078125, -2260.846923828125, -2175.06201171875, -2089.27685546875, -2003.4918212890625, -1917.706787109375, -1831.921630859375, -1746.1365966796875, -1660.3514404296875, -1574.56640625, -1488.78125, -1402.9962158203125, -1317.211181640625, -1231.4261474609375, -1145.6409912109375, -1059.85595703125, -974.07080078125, -888.2857666015625, -802.5006713867188, -716.715576171875, -630.930419921875, -545.1453247070312, -459.3602294921875, -373.5751647949219, -287.7900695800781, -202.00497436523438, -116.21990966796875, -30.434814453125, 55.35028076171875, 141.1353759765625, 226.9204559326172, 312.7055358886719, 398.4906311035156, 484.2757263183594, 570.060791015625, 655.8458862304688, 741.6309814453125, 827.4160766601562, 913.201171875, 998.9862060546875, 1084.7713623046875, 1170.556396484375, 1256.341552734375, 1342.1265869140625, 1427.91162109375, 1513.6966552734375, 1599.4818115234375, 1685.266845703125, 1771.052001953125, 1856.8370361328125, 1942.6220703125, 2028.4072265625, 2114.1923828125]}, "gradients/decoder.transformer.h.23.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 3.0, 0.0, 2.0, 2.0, 0.0, 3.0, 1.0, 6.0, 6.0, 8.0, 6.0, 11.0, 13.0, 15.0, 21.0, 26.0, 25.0, 30.0, 23.0, 23.0, 35.0, 41.0, 29.0, 39.0, 49.0, 47.0, 39.0, 30.0, 35.0, 42.0, 34.0, 44.0, 34.0, 32.0, 37.0, 24.0, 39.0, 19.0, 21.0, 20.0, 30.0, 10.0, 18.0, 8.0, 10.0, 5.0, 10.0, 3.0, 2.0, 3.0, 1.0, 0.0, 0.0, 2.0, 2.0, 2.0, 1.0, 0.0, 0.0, 2.0], "bins": [-437.02105712890625, -424.54473876953125, -412.0684509277344, -399.5921325683594, -387.1158447265625, -374.6395263671875, -362.1632385253906, -349.6869201660156, -337.21063232421875, -324.73431396484375, -312.2580261230469, -299.7817077636719, -287.305419921875, -274.8291015625, -262.3528137207031, -249.87649536132812, -237.4001922607422, -224.92388916015625, -212.4475860595703, -199.97128295898438, -187.49497985839844, -175.0186767578125, -162.5423583984375, -150.06607055664062, -137.58975219726562, -125.11344909667969, -112.63714599609375, -100.16084289550781, -87.68453979492188, -75.20823669433594, -62.73192596435547, -50.25562286376953, -37.779327392578125, -25.303024291992188, -12.826719284057617, -0.3504142761230469, 12.12588882446289, 24.602191925048828, 37.07849884033203, 49.55480194091797, 62.031105041503906, 74.50740814208984, 86.98371124267578, 99.46002197265625, 111.93632507324219, 124.41262817382812, 136.88893127441406, 149.365234375, 161.84153747558594, 174.31784057617188, 186.7941436767578, 199.27044677734375, 211.7467498779297, 224.22305297851562, 236.69937133789062, 249.1756591796875, 261.6519775390625, 274.1282958984375, 286.6045837402344, 299.0809020996094, 311.55718994140625, 324.03350830078125, 336.5097961425781, 348.9861145019531, 361.46240234375]}, "gradients/decoder.transformer.h.22.mlp.c_proj.bias": {"_type": "histogram", "values": [2.0, 2.0, 0.0, 1.0, 1.0, 2.0, 0.0, 1.0, 1.0, 2.0, 2.0, 3.0, 6.0, 6.0, 4.0, 2.0, 7.0, 5.0, 19.0, 10.0, 15.0, 28.0, 24.0, 27.0, 36.0, 46.0, 54.0, 63.0, 56.0, 67.0, 59.0, 67.0, 65.0, 46.0, 45.0, 29.0, 37.0, 20.0, 21.0, 25.0, 24.0, 11.0, 13.0, 9.0, 16.0, 7.0, 7.0, 5.0, 5.0, 5.0, 3.0, 6.0, 1.0, 0.0, 0.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-37.0, -35.79150390625, -34.5830078125, -33.37451171875, -32.166015625, -30.95751953125, -29.7490234375, -28.54052734375, -27.33203125, -26.12353515625, -24.9150390625, -23.70654296875, -22.498046875, -21.28955078125, -20.0810546875, -18.87255859375, -17.6640625, -16.45556640625, -15.2470703125, -14.03857421875, -12.830078125, -11.62158203125, -10.4130859375, -9.20458984375, -7.99609375, -6.78759765625, -5.5791015625, -4.37060546875, -3.162109375, -1.95361328125, -0.7451171875, 0.46337890625, 1.671875, 2.88037109375, 4.0888671875, 5.29736328125, 6.505859375, 7.71435546875, 8.9228515625, 10.13134765625, 11.33984375, 12.54833984375, 13.7568359375, 14.96533203125, 16.173828125, 17.38232421875, 18.5908203125, 19.79931640625, 21.0078125, 22.21630859375, 23.4248046875, 24.63330078125, 25.841796875, 27.05029296875, 28.2587890625, 29.46728515625, 30.67578125, 31.88427734375, 33.0927734375, 34.30126953125, 35.509765625, 36.71826171875, 37.9267578125, 39.13525390625, 40.34375]}, "gradients/decoder.transformer.h.22.mlp.c_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 4.0, 1.0, 3.0, 7.0, 8.0, 8.0, 13.0, 14.0, 16.0, 31.0, 48.0, 64.0, 121.0, 182.0, 281.0, 549.0, 1113.0, 2358.0, 5481.0, 17064.0, 90252.0, 3897356.0, 142836.0, 23188.0, 7426.0, 2927.0, 1313.0, 677.0, 345.0, 214.0, 134.0, 67.0, 60.0, 40.0, 25.0, 17.0, 12.0, 9.0, 4.0, 11.0, 5.0, 1.0, 1.0, 2.0, 4.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-249.0, -240.58203125, -232.1640625, -223.74609375, -215.328125, -206.91015625, -198.4921875, -190.07421875, -181.65625, -173.23828125, -164.8203125, -156.40234375, -147.984375, -139.56640625, -131.1484375, -122.73046875, -114.3125, -105.89453125, -97.4765625, -89.05859375, -80.640625, -72.22265625, -63.8046875, -55.38671875, -46.96875, -38.55078125, -30.1328125, -21.71484375, -13.296875, -4.87890625, 3.5390625, 11.95703125, 20.375, 28.79296875, 37.2109375, 45.62890625, 54.046875, 62.46484375, 70.8828125, 79.30078125, 87.71875, 96.13671875, 104.5546875, 112.97265625, 121.390625, 129.80859375, 138.2265625, 146.64453125, 155.0625, 163.48046875, 171.8984375, 180.31640625, 188.734375, 197.15234375, 205.5703125, 213.98828125, 222.40625, 230.82421875, 239.2421875, 247.66015625, 256.078125, 264.49609375, 272.9140625, 281.33203125, 289.75]}, "gradients/decoder.transformer.h.22.mlp.c_fc.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 3.0, 5.0, 1.0, 5.0, 6.0, 10.0, 10.0, 12.0, 23.0, 15.0, 30.0, 28.0, 36.0, 63.0, 44.0, 82.0, 118.0, 134.0, 200.0, 375.0, 643.0, 793.0, 435.0, 255.0, 183.0, 138.0, 93.0, 58.0, 53.0, 51.0, 33.0, 19.0, 24.0, 22.0, 13.0, 13.0, 13.0, 8.0, 6.0, 6.0, 2.0, 2.0, 3.0, 3.0, 4.0, 1.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-108.625, -105.0361328125, -101.447265625, -97.8583984375, -94.26953125, -90.6806640625, -87.091796875, -83.5029296875, -79.9140625, -76.3251953125, -72.736328125, -69.1474609375, -65.55859375, -61.9697265625, -58.380859375, -54.7919921875, -51.203125, -47.6142578125, -44.025390625, -40.4365234375, -36.84765625, -33.2587890625, -29.669921875, -26.0810546875, -22.4921875, -18.9033203125, -15.314453125, -11.7255859375, -8.13671875, -4.5478515625, -0.958984375, 2.6298828125, 6.21875, 9.8076171875, 13.396484375, 16.9853515625, 20.57421875, 24.1630859375, 27.751953125, 31.3408203125, 34.9296875, 38.5185546875, 42.107421875, 45.6962890625, 49.28515625, 52.8740234375, 56.462890625, 60.0517578125, 63.640625, 67.2294921875, 70.818359375, 74.4072265625, 77.99609375, 81.5849609375, 85.173828125, 88.7626953125, 92.3515625, 95.9404296875, 99.529296875, 103.1181640625, 106.70703125, 110.2958984375, 113.884765625, 117.4736328125, 121.0625]}, "gradients/decoder.transformer.h.22.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 3.0, 1.0, 3.0, 5.0, 9.0, 6.0, 9.0, 9.0, 10.0, 22.0, 29.0, 32.0, 51.0, 66.0, 110.0, 218.0, 366.0, 892.0, 2637.0, 9895.0, 58842.0, 3970596.0, 128183.0, 16159.0, 3771.0, 1194.0, 476.0, 235.0, 135.0, 79.0, 60.0, 46.0, 21.0, 27.0, 17.0, 11.0, 17.0, 11.0, 14.0, 4.0, 6.0, 0.0, 6.0, 3.0, 2.0, 0.0, 1.0, 2.0, 0.0, 1.0, 2.0, 0.0, 0.0, 2.0], "bins": [-474.0, -459.0234375, -444.046875, -429.0703125, -414.09375, -399.1171875, -384.140625, -369.1640625, -354.1875, -339.2109375, -324.234375, -309.2578125, -294.28125, -279.3046875, -264.328125, -249.3515625, -234.375, -219.3984375, -204.421875, -189.4453125, -174.46875, -159.4921875, -144.515625, -129.5390625, -114.5625, -99.5859375, -84.609375, -69.6328125, -54.65625, -39.6796875, -24.703125, -9.7265625, 5.25, 20.2265625, 35.203125, 50.1796875, 65.15625, 80.1328125, 95.109375, 110.0859375, 125.0625, 140.0390625, 155.015625, 169.9921875, 184.96875, 199.9453125, 214.921875, 229.8984375, 244.875, 259.8515625, 274.828125, 289.8046875, 304.78125, 319.7578125, 334.734375, 349.7109375, 364.6875, 379.6640625, 394.640625, 409.6171875, 424.59375, 439.5703125, 454.546875, 469.5234375, 484.5]}, "gradients/decoder.transformer.h.22.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 4.0, 88.0, 765.0, 147.0, 9.0, 2.0, 2.0, 1.0], "bins": [-5609.19091796875, -5513.380859375, -5417.5712890625, -5321.76123046875, -5225.95166015625, -5130.1416015625, -5034.33203125, -4938.52197265625, -4842.7119140625, -4746.90185546875, -4651.09228515625, -4555.2822265625, -4459.47265625, -4363.66259765625, -4267.85302734375, -4172.04296875, -4076.2333984375, -3980.423583984375, -3884.61376953125, -3788.803955078125, -3692.994140625, -3597.18408203125, -3501.374267578125, -3405.564453125, -3309.754638671875, -3213.94482421875, -3118.135009765625, -3022.3251953125, -2926.51513671875, -2830.705322265625, -2734.8955078125, -2639.085693359375, -2543.276123046875, -2447.46630859375, -2351.656494140625, -2255.8466796875, -2160.03662109375, -2064.226806640625, -1968.4169921875, -1872.607177734375, -1776.79736328125, -1680.987548828125, -1585.177734375, -1489.3677978515625, -1393.5579833984375, -1297.7481689453125, -1201.938232421875, -1106.12841796875, -1010.318603515625, -914.5087890625, -818.6989135742188, -722.8890380859375, -627.0792236328125, -531.2694091796875, -435.45953369140625, -339.649658203125, -243.83984375, -148.02999877929688, -52.22015380859375, 43.589691162109375, 139.3995361328125, 235.20938110351562, 331.01922607421875, 426.8291015625, 522.638916015625]}, "gradients/decoder.transformer.h.22.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 2.0, 4.0, 4.0, 4.0, 6.0, 6.0, 9.0, 8.0, 10.0, 11.0, 18.0, 20.0, 26.0, 26.0, 27.0, 20.0, 24.0, 26.0, 27.0, 25.0, 37.0, 38.0, 41.0, 44.0, 36.0, 39.0, 43.0, 32.0, 40.0, 31.0, 39.0, 29.0, 31.0, 25.0, 25.0, 31.0, 28.0, 15.0, 14.0, 15.0, 17.0, 14.0, 7.0, 6.0, 7.0, 5.0, 4.0, 6.0, 7.0, 5.0, 2.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-379.9024658203125, -367.69268798828125, -355.4829406738281, -343.2731628417969, -331.06341552734375, -318.8536376953125, -306.64385986328125, -294.4341125488281, -282.224365234375, -270.01458740234375, -257.8048400878906, -245.59506225585938, -233.38531494140625, -221.175537109375, -208.9657745361328, -196.75601196289062, -184.54623413085938, -172.3364715576172, -160.126708984375, -147.91693115234375, -135.70718383789062, -123.4974136352539, -111.28764343261719, -99.077880859375, -86.86811828613281, -74.65835571289062, -62.44858932495117, -50.23882293701172, -38.02906036376953, -25.819297790527344, -13.609527587890625, -1.3997650146484375, 10.80999755859375, 23.01976203918457, 35.22952651977539, 47.439292907714844, 59.64905548095703, 71.85881805419922, 84.06858825683594, 96.27835083007812, 108.48811340332031, 120.6978759765625, 132.9076385498047, 145.11740112304688, 157.32717895507812, 169.53692626953125, 181.7467041015625, 193.9564666748047, 206.16622924804688, 218.37599182128906, 230.58575439453125, 242.7955322265625, 255.00527954101562, 267.2150573730469, 279.4248046875, 291.63458251953125, 303.8443603515625, 316.05413818359375, 328.2638854980469, 340.4736633300781, 352.68341064453125, 364.8931884765625, 377.10296630859375, 389.3127136230469, 401.5224609375]}, "gradients/decoder.transformer.h.22.crossattention.c_proj.bias": {"_type": "histogram", "values": [2.0, 3.0, 0.0, 0.0, 2.0, 1.0, 1.0, 0.0, 1.0, 3.0, 3.0, 5.0, 12.0, 4.0, 4.0, 9.0, 19.0, 13.0, 11.0, 21.0, 29.0, 35.0, 39.0, 41.0, 44.0, 50.0, 55.0, 63.0, 53.0, 64.0, 49.0, 55.0, 34.0, 50.0, 35.0, 27.0, 25.0, 27.0, 19.0, 20.0, 12.0, 12.0, 10.0, 12.0, 8.0, 12.0, 4.0, 7.0, 5.0, 1.0, 3.0, 1.0, 3.0, 0.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-36.09375, -34.85400390625, -33.6142578125, -32.37451171875, -31.134765625, -29.89501953125, -28.6552734375, -27.41552734375, -26.17578125, -24.93603515625, -23.6962890625, -22.45654296875, -21.216796875, -19.97705078125, -18.7373046875, -17.49755859375, -16.2578125, -15.01806640625, -13.7783203125, -12.53857421875, -11.298828125, -10.05908203125, -8.8193359375, -7.57958984375, -6.33984375, -5.10009765625, -3.8603515625, -2.62060546875, -1.380859375, -0.14111328125, 1.0986328125, 2.33837890625, 3.578125, 4.81787109375, 6.0576171875, 7.29736328125, 8.537109375, 9.77685546875, 11.0166015625, 12.25634765625, 13.49609375, 14.73583984375, 15.9755859375, 17.21533203125, 18.455078125, 19.69482421875, 20.9345703125, 22.17431640625, 23.4140625, 24.65380859375, 25.8935546875, 27.13330078125, 28.373046875, 29.61279296875, 30.8525390625, 32.09228515625, 33.33203125, 34.57177734375, 35.8115234375, 37.05126953125, 38.291015625, 39.53076171875, 40.7705078125, 42.01025390625, 43.25]}, "gradients/decoder.transformer.h.22.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 4.0, 1.0, 1.0, 2.0, 3.0, 11.0, 9.0, 13.0, 13.0, 18.0, 38.0, 54.0, 64.0, 78.0, 122.0, 155.0, 217.0, 314.0, 411.0, 678.0, 1183.0, 2084.0, 4265.0, 9582.0, 31402.0, 402554.0, 539122.0, 35686.0, 10511.0, 4373.0, 2204.0, 1166.0, 689.0, 451.0, 303.0, 212.0, 152.0, 105.0, 90.0, 62.0, 46.0, 32.0, 19.0, 12.0, 18.0, 19.0, 5.0, 8.0, 3.0, 2.0, 0.0, 1.0, 3.0, 0.0, 2.0], "bins": [-23.78125, -23.1025390625, -22.423828125, -21.7451171875, -21.06640625, -20.3876953125, -19.708984375, -19.0302734375, -18.3515625, -17.6728515625, -16.994140625, -16.3154296875, -15.63671875, -14.9580078125, -14.279296875, -13.6005859375, -12.921875, -12.2431640625, -11.564453125, -10.8857421875, -10.20703125, -9.5283203125, -8.849609375, -8.1708984375, -7.4921875, -6.8134765625, -6.134765625, -5.4560546875, -4.77734375, -4.0986328125, -3.419921875, -2.7412109375, -2.0625, -1.3837890625, -0.705078125, -0.0263671875, 0.65234375, 1.3310546875, 2.009765625, 2.6884765625, 3.3671875, 4.0458984375, 4.724609375, 5.4033203125, 6.08203125, 6.7607421875, 7.439453125, 8.1181640625, 8.796875, 9.4755859375, 10.154296875, 10.8330078125, 11.51171875, 12.1904296875, 12.869140625, 13.5478515625, 14.2265625, 14.9052734375, 15.583984375, 16.2626953125, 16.94140625, 17.6201171875, 18.298828125, 18.9775390625, 19.65625]}, "gradients/decoder.transformer.h.22.crossattention.c_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0, 1.0, 4.0, 4.0, 7.0, 8.0, 11.0, 6.0, 12.0, 13.0, 20.0, 16.0, 27.0, 22.0, 29.0, 32.0, 32.0, 45.0, 37.0, 51.0, 38.0, 51.0, 32.0, 1072.0, 36.0, 38.0, 47.0, 35.0, 40.0, 31.0, 32.0, 33.0, 36.0, 22.0, 13.0, 17.0, 12.0, 17.0, 13.0, 13.0, 11.0, 7.0, 5.0, 4.0, 3.0, 1.0, 2.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-25.265625, -24.4775390625, -23.689453125, -22.9013671875, -22.11328125, -21.3251953125, -20.537109375, -19.7490234375, -18.9609375, -18.1728515625, -17.384765625, -16.5966796875, -15.80859375, -15.0205078125, -14.232421875, -13.4443359375, -12.65625, -11.8681640625, -11.080078125, -10.2919921875, -9.50390625, -8.7158203125, -7.927734375, -7.1396484375, -6.3515625, -5.5634765625, -4.775390625, -3.9873046875, -3.19921875, -2.4111328125, -1.623046875, -0.8349609375, -0.046875, 0.7412109375, 1.529296875, 2.3173828125, 3.10546875, 3.8935546875, 4.681640625, 5.4697265625, 6.2578125, 7.0458984375, 7.833984375, 8.6220703125, 9.41015625, 10.1982421875, 10.986328125, 11.7744140625, 12.5625, 13.3505859375, 14.138671875, 14.9267578125, 15.71484375, 16.5029296875, 17.291015625, 18.0791015625, 18.8671875, 19.6552734375, 20.443359375, 21.2314453125, 22.01953125, 22.8076171875, 23.595703125, 24.3837890625, 25.171875]}, "gradients/decoder.transformer.h.22.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 2.0, 2.0, 1.0, 3.0, 10.0, 5.0, 9.0, 15.0, 16.0, 25.0, 33.0, 56.0, 69.0, 98.0, 148.0, 188.0, 266.0, 370.0, 497.0, 673.0, 995.0, 1506.0, 2260.0, 3689.0, 6094.0, 11106.0, 22304.0, 53718.0, 164841.0, 1439925.0, 255505.0, 71741.0, 28165.0, 13181.0, 7069.0, 4211.0, 2754.0, 1779.0, 1124.0, 818.0, 516.0, 381.0, 256.0, 212.0, 144.0, 110.0, 73.0, 54.0, 31.0, 26.0, 19.0, 11.0, 13.0, 13.0, 6.0, 3.0, 5.0, 2.0, 2.0, 1.0, 0.0, 1.0, 1.0], "bins": [-5.55859375, -5.377197265625, -5.19580078125, -5.014404296875, -4.8330078125, -4.651611328125, -4.47021484375, -4.288818359375, -4.107421875, -3.926025390625, -3.74462890625, -3.563232421875, -3.3818359375, -3.200439453125, -3.01904296875, -2.837646484375, -2.65625, -2.474853515625, -2.29345703125, -2.112060546875, -1.9306640625, -1.749267578125, -1.56787109375, -1.386474609375, -1.205078125, -1.023681640625, -0.84228515625, -0.660888671875, -0.4794921875, -0.298095703125, -0.11669921875, 0.064697265625, 0.24609375, 0.427490234375, 0.60888671875, 0.790283203125, 0.9716796875, 1.153076171875, 1.33447265625, 1.515869140625, 1.697265625, 1.878662109375, 2.06005859375, 2.241455078125, 2.4228515625, 2.604248046875, 2.78564453125, 2.967041015625, 3.1484375, 3.329833984375, 3.51123046875, 3.692626953125, 3.8740234375, 4.055419921875, 4.23681640625, 4.418212890625, 4.599609375, 4.781005859375, 4.96240234375, 5.143798828125, 5.3251953125, 5.506591796875, 5.68798828125, 5.869384765625, 6.05078125]}, "gradients/decoder.transformer.h.22.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 2.0, 1.0, 3.0, 2.0, 6.0, 3.0, 3.0, 6.0, 12.0, 8.0, 10.0, 15.0, 9.0, 24.0, 18.0, 15.0, 18.0, 36.0, 34.0, 45.0, 42.0, 40.0, 34.0, 45.0, 176.0, 44.0, 44.0, 57.0, 38.0, 37.0, 35.0, 23.0, 18.0, 11.0, 11.0, 14.0, 15.0, 9.0, 8.0, 8.0, 6.0, 9.0, 5.0, 3.0, 4.0, 2.0, 3.0, 3.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.59716796875, -0.5760955810546875, -0.555023193359375, -0.5339508056640625, -0.51287841796875, -0.4918060302734375, -0.470733642578125, -0.4496612548828125, -0.4285888671875, -0.4075164794921875, -0.386444091796875, -0.3653717041015625, -0.34429931640625, -0.3232269287109375, -0.302154541015625, -0.2810821533203125, -0.260009765625, -0.2389373779296875, -0.217864990234375, -0.1967926025390625, -0.17572021484375, -0.1546478271484375, -0.133575439453125, -0.1125030517578125, -0.0914306640625, -0.0703582763671875, -0.049285888671875, -0.0282135009765625, -0.00714111328125, 0.0139312744140625, 0.035003662109375, 0.0560760498046875, 0.0771484375, 0.0982208251953125, 0.119293212890625, 0.1403656005859375, 0.16143798828125, 0.1825103759765625, 0.203582763671875, 0.2246551513671875, 0.2457275390625, 0.2667999267578125, 0.287872314453125, 0.3089447021484375, 0.33001708984375, 0.3510894775390625, 0.372161865234375, 0.3932342529296875, 0.414306640625, 0.4353790283203125, 0.456451416015625, 0.4775238037109375, 0.49859619140625, 0.5196685791015625, 0.540740966796875, 0.5618133544921875, 0.5828857421875, 0.6039581298828125, 0.625030517578125, 0.6461029052734375, 0.66717529296875, 0.6882476806640625, 0.709320068359375, 0.7303924560546875, 0.75146484375]}, "gradients/decoder.transformer.h.22.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 3.0, 3.0, 3.0, 5.0, 1.0, 7.0, 7.0, 6.0, 10.0, 6.0, 11.0, 14.0, 13.0, 11.0, 12.0, 16.0, 26.0, 37.0, 47.0, 59.0, 94.0, 107.0, 307.0, 1046656.0, 517.0, 144.0, 93.0, 66.0, 56.0, 37.0, 38.0, 15.0, 16.0, 18.0, 24.0, 10.0, 15.0, 9.0, 8.0, 14.0, 5.0, 2.0, 3.0, 5.0, 4.0, 3.0, 1.0, 2.0, 0.0, 1.0, 1.0, 1.0], "bins": [-23.4375, -22.7802734375, -22.123046875, -21.4658203125, -20.80859375, -20.1513671875, -19.494140625, -18.8369140625, -18.1796875, -17.5224609375, -16.865234375, -16.2080078125, -15.55078125, -14.8935546875, -14.236328125, -13.5791015625, -12.921875, -12.2646484375, -11.607421875, -10.9501953125, -10.29296875, -9.6357421875, -8.978515625, -8.3212890625, -7.6640625, -7.0068359375, -6.349609375, -5.6923828125, -5.03515625, -4.3779296875, -3.720703125, -3.0634765625, -2.40625, -1.7490234375, -1.091796875, -0.4345703125, 0.22265625, 0.8798828125, 1.537109375, 2.1943359375, 2.8515625, 3.5087890625, 4.166015625, 4.8232421875, 5.48046875, 6.1376953125, 6.794921875, 7.4521484375, 8.109375, 8.7666015625, 9.423828125, 10.0810546875, 10.73828125, 11.3955078125, 12.052734375, 12.7099609375, 13.3671875, 14.0244140625, 14.681640625, 15.3388671875, 15.99609375, 16.6533203125, 17.310546875, 17.9677734375, 18.625]}, "gradients/decoder.transformer.h.22.ln_cross_attn.weight": {"_type": "histogram", "values": [2.0, 251.0, 769.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.7698224782943726, -0.8929613828659058, -0.016100287437438965, 0.8607608079910278, 1.7376219034194946, 2.614482879638672, 3.4913439750671387, 4.3682050704956055, 5.245066165924072, 6.121927261352539, 6.998788356781006, 7.875649452209473, 8.752510070800781, 9.629371643066406, 10.506233215332031, 11.38309383392334, 12.259955406188965, 13.136816024780273, 14.013677597045898, 14.890539169311523, 15.767399787902832, 16.64426040649414, 17.521121978759766, 18.39798355102539, 19.274843215942383, 20.151704788208008, 21.028564453125, 21.905426025390625, 22.78228759765625, 23.659149169921875, 24.5360107421875, 25.412870407104492, 26.28973388671875, 27.166595458984375, 28.04345703125, 28.920316696166992, 29.797178268432617, 30.674039840698242, 31.550899505615234, 32.42776107788086, 33.304622650146484, 34.18148422241211, 35.058345794677734, 35.93520736694336, 36.81206512451172, 37.688926696777344, 38.56578826904297, 39.442649841308594, 40.31951141357422, 41.196372985839844, 42.07323455810547, 42.950096130371094, 43.82695770263672, 44.70381546020508, 45.5806770324707, 46.45753860473633, 47.33440017700195, 48.21126174926758, 49.0881233215332, 49.96498489379883, 50.84184265136719, 51.71870422363281, 52.59556579589844, 53.47242736816406, 54.34928894042969]}, "gradients/decoder.transformer.h.22.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 1.0, 2.0, 4.0, 6.0, 7.0, 5.0, 7.0, 16.0, 16.0, 15.0, 16.0, 11.0, 24.0, 28.0, 27.0, 29.0, 36.0, 43.0, 35.0, 30.0, 49.0, 48.0, 32.0, 41.0, 36.0, 49.0, 45.0, 46.0, 34.0, 30.0, 29.0, 27.0, 29.0, 21.0, 21.0, 21.0, 23.0, 15.0, 15.0, 7.0, 7.0, 10.0, 3.0, 5.0, 3.0, 3.0, 6.0, 1.0, 1.0, 2.0], "bins": [-2.155302047729492, -2.0971593856811523, -2.0390167236328125, -1.9808741807937622, -1.9227315187454224, -1.864588975906372, -1.8064463138580322, -1.7483036518096924, -1.6901609897613525, -1.6320183277130127, -1.5738757848739624, -1.5157331228256226, -1.4575904607772827, -1.3994479179382324, -1.3413052558898926, -1.2831625938415527, -1.2250200510025024, -1.1668773889541626, -1.1087348461151123, -1.0505921840667725, -0.9924495220184326, -0.9343069195747375, -0.8761643171310425, -0.8180216550827026, -0.7598790526390076, -0.7017364501953125, -0.6435937881469727, -0.5854511857032776, -0.5273085832595825, -0.4691659212112427, -0.4110233187675476, -0.35288068652153015, -0.29473793506622314, -0.2365953028202057, -0.17845268547534943, -0.12031006813049316, -0.06216743588447571, -0.004024803638458252, 0.054117798805236816, 0.11226043105125427, 0.17040306329727173, 0.22854569554328918, 0.28668832778930664, 0.3448309302330017, 0.40297356247901917, 0.4611161947250366, 0.5192587971687317, 0.5774013996124268, 0.6355440616607666, 0.6936866641044617, 0.7518293261528015, 0.8099719285964966, 0.8681145906448364, 0.9262571930885315, 0.9843997955322266, 1.0425424575805664, 1.1006851196289062, 1.158827781677246, 1.2169703245162964, 1.2751129865646362, 1.333255648612976, 1.3913981914520264, 1.4495408535003662, 1.507683515548706, 1.5658260583877563]}, "gradients/decoder.transformer.h.22.attn.c_proj.bias": {"_type": "histogram", "values": [2.0, 3.0, 0.0, 0.0, 2.0, 1.0, 1.0, 0.0, 1.0, 3.0, 3.0, 5.0, 12.0, 4.0, 4.0, 9.0, 19.0, 13.0, 11.0, 21.0, 29.0, 35.0, 39.0, 41.0, 44.0, 50.0, 55.0, 63.0, 53.0, 64.0, 49.0, 55.0, 34.0, 50.0, 35.0, 27.0, 25.0, 27.0, 19.0, 20.0, 13.0, 11.0, 10.0, 12.0, 8.0, 12.0, 4.0, 7.0, 5.0, 1.0, 3.0, 1.0, 3.0, 0.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-36.09375, -34.85400390625, -33.6142578125, -32.37451171875, -31.134765625, -29.89501953125, -28.6552734375, -27.41552734375, -26.17578125, -24.93603515625, -23.6962890625, -22.45654296875, -21.216796875, -19.97705078125, -18.7373046875, -17.49755859375, -16.2578125, -15.01806640625, -13.7783203125, -12.53857421875, -11.298828125, -10.05908203125, -8.8193359375, -7.57958984375, -6.33984375, -5.10009765625, -3.8603515625, -2.62060546875, -1.380859375, -0.14111328125, 1.0986328125, 2.33837890625, 3.578125, 4.81787109375, 6.0576171875, 7.29736328125, 8.537109375, 9.77685546875, 11.0166015625, 12.25634765625, 13.49609375, 14.73583984375, 15.9755859375, 17.21533203125, 18.455078125, 19.69482421875, 20.9345703125, 22.17431640625, 23.4140625, 24.65380859375, 25.8935546875, 27.13330078125, 28.373046875, 29.61279296875, 30.8525390625, 32.09228515625, 33.33203125, 34.57177734375, 35.8115234375, 37.05126953125, 38.291015625, 39.53076171875, 40.7705078125, 42.01025390625, 43.25]}, "gradients/decoder.transformer.h.22.attn.c_proj.weight": {"_type": "histogram", "values": [3.0, 3.0, 7.0, 4.0, 0.0, 3.0, 6.0, 2.0, 3.0, 10.0, 8.0, 20.0, 29.0, 38.0, 31.0, 45.0, 63.0, 107.0, 149.0, 204.0, 307.0, 580.0, 958.0, 1816.0, 3734.0, 8563.0, 22420.0, 76403.0, 482076.0, 352800.0, 63800.0, 19667.0, 7450.0, 3352.0, 1577.0, 815.0, 460.0, 310.0, 208.0, 133.0, 86.0, 61.0, 68.0, 49.0, 42.0, 23.0, 15.0, 13.0, 11.0, 9.0, 11.0, 5.0, 7.0, 2.0, 3.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 2.0], "bins": [-30.375, -29.3251953125, -28.275390625, -27.2255859375, -26.17578125, -25.1259765625, -24.076171875, -23.0263671875, -21.9765625, -20.9267578125, -19.876953125, -18.8271484375, -17.77734375, -16.7275390625, -15.677734375, -14.6279296875, -13.578125, -12.5283203125, -11.478515625, -10.4287109375, -9.37890625, -8.3291015625, -7.279296875, -6.2294921875, -5.1796875, -4.1298828125, -3.080078125, -2.0302734375, -0.98046875, 0.0693359375, 1.119140625, 2.1689453125, 3.21875, 4.2685546875, 5.318359375, 6.3681640625, 7.41796875, 8.4677734375, 9.517578125, 10.5673828125, 11.6171875, 12.6669921875, 13.716796875, 14.7666015625, 15.81640625, 16.8662109375, 17.916015625, 18.9658203125, 20.015625, 21.0654296875, 22.115234375, 23.1650390625, 24.21484375, 25.2646484375, 26.314453125, 27.3642578125, 28.4140625, 29.4638671875, 30.513671875, 31.5634765625, 32.61328125, 33.6630859375, 34.712890625, 35.7626953125, 36.8125]}, "gradients/decoder.transformer.h.22.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 4.0, 2.0, 1.0, 0.0, 3.0, 4.0, 5.0, 5.0, 15.0, 6.0, 12.0, 10.0, 14.0, 20.0, 23.0, 29.0, 33.0, 26.0, 28.0, 35.0, 47.0, 41.0, 39.0, 41.0, 41.0, 69.0, 2047.0, 47.0, 41.0, 46.0, 31.0, 37.0, 28.0, 24.0, 24.0, 28.0, 26.0, 16.0, 20.0, 17.0, 15.0, 19.0, 12.0, 5.0, 7.0, 3.0, 0.0, 8.0, 4.0, 4.0, 2.0, 2.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-123.0625, -118.9853515625, -114.908203125, -110.8310546875, -106.75390625, -102.6767578125, -98.599609375, -94.5224609375, -90.4453125, -86.3681640625, -82.291015625, -78.2138671875, -74.13671875, -70.0595703125, -65.982421875, -61.9052734375, -57.828125, -53.7509765625, -49.673828125, -45.5966796875, -41.51953125, -37.4423828125, -33.365234375, -29.2880859375, -25.2109375, -21.1337890625, -17.056640625, -12.9794921875, -8.90234375, -4.8251953125, -0.748046875, 3.3291015625, 7.40625, 11.4833984375, 15.560546875, 19.6376953125, 23.71484375, 27.7919921875, 31.869140625, 35.9462890625, 40.0234375, 44.1005859375, 48.177734375, 52.2548828125, 56.33203125, 60.4091796875, 64.486328125, 68.5634765625, 72.640625, 76.7177734375, 80.794921875, 84.8720703125, 88.94921875, 93.0263671875, 97.103515625, 101.1806640625, 105.2578125, 109.3349609375, 113.412109375, 117.4892578125, 121.56640625, 125.6435546875, 129.720703125, 133.7978515625, 137.875]}, "gradients/decoder.transformer.h.22.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 2.0, 2.0, 4.0, 3.0, 9.0, 0.0, 3.0, 7.0, 4.0, 14.0, 19.0, 15.0, 18.0, 18.0, 20.0, 30.0, 41.0, 30.0, 41.0, 74.0, 102.0, 154.0, 325.0, 663.0, 1935.0, 3086531.0, 53201.0, 1145.0, 499.0, 236.0, 144.0, 96.0, 61.0, 47.0, 35.0, 34.0, 36.0, 25.0, 19.0, 13.0, 11.0, 11.0, 8.0, 13.0, 6.0, 4.0, 4.0, 3.0, 0.0, 1.0, 3.0, 3.0, 1.0, 0.0, 0.0, 1.0], "bins": [-305.0, -295.9765625, -286.953125, -277.9296875, -268.90625, -259.8828125, -250.859375, -241.8359375, -232.8125, -223.7890625, -214.765625, -205.7421875, -196.71875, -187.6953125, -178.671875, -169.6484375, -160.625, -151.6015625, -142.578125, -133.5546875, -124.53125, -115.5078125, -106.484375, -97.4609375, -88.4375, -79.4140625, -70.390625, -61.3671875, -52.34375, -43.3203125, -34.296875, -25.2734375, -16.25, -7.2265625, 1.796875, 10.8203125, 19.84375, 28.8671875, 37.890625, 46.9140625, 55.9375, 64.9609375, 73.984375, 83.0078125, 92.03125, 101.0546875, 110.078125, 119.1015625, 128.125, 137.1484375, 146.171875, 155.1953125, 164.21875, 173.2421875, 182.265625, 191.2890625, 200.3125, 209.3359375, 218.359375, 227.3828125, 236.40625, 245.4296875, 254.453125, 263.4765625, 272.5]}, "gradients/decoder.transformer.h.22.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 89.0, 933.0], "bins": [-4926.447265625, -4848.41015625, -4770.373046875, -4692.3359375, -4614.298828125, -4536.26171875, -4458.224609375, -4380.1875, -4302.150390625, -4224.11328125, -4146.076171875, -4068.0390625, -3990.001953125, -3911.965087890625, -3833.927978515625, -3755.890869140625, -3677.85400390625, -3599.81689453125, -3521.77978515625, -3443.74267578125, -3365.70556640625, -3287.668701171875, -3209.631591796875, -3131.594482421875, -3053.557373046875, -2975.520263671875, -2897.483154296875, -2819.446044921875, -2741.4091796875, -2663.3720703125, -2585.3349609375, -2507.2978515625, -2429.2607421875, -2351.2236328125, -2273.1865234375, -2195.1494140625, -2117.1123046875, -2039.0753173828125, -1961.038330078125, -1883.001220703125, -1804.9642333984375, -1726.9271240234375, -1648.89013671875, -1570.85302734375, -1492.81591796875, -1414.77880859375, -1336.74169921875, -1258.7047119140625, -1180.6676025390625, -1102.6304931640625, -1024.593505859375, -946.556396484375, -868.519287109375, -790.482177734375, -712.4451293945312, -634.4080810546875, -556.3709106445312, -478.3338317871094, -400.2967529296875, -322.2596740722656, -244.22259521484375, -166.18551635742188, -88.1484375, -10.11138916015625, 67.92569732666016]}, "gradients/decoder.transformer.h.22.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 3.0, 2.0, 2.0, 5.0, 5.0, 7.0, 6.0, 18.0, 4.0, 14.0, 13.0, 16.0, 26.0, 20.0, 31.0, 29.0, 34.0, 35.0, 30.0, 36.0, 38.0, 38.0, 39.0, 45.0, 36.0, 44.0, 39.0, 49.0, 52.0, 33.0, 27.0, 41.0, 33.0, 22.0, 24.0, 15.0, 13.0, 15.0, 12.0, 12.0, 13.0, 8.0, 9.0, 5.0, 1.0, 6.0, 4.0, 1.0, 3.0, 2.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-427.7646789550781, -413.6469421386719, -399.52923583984375, -385.4114990234375, -371.29376220703125, -357.1760559082031, -343.0583190917969, -328.94061279296875, -314.8228759765625, -300.70513916015625, -286.5874328613281, -272.4696960449219, -258.35198974609375, -244.2342529296875, -230.11651611328125, -215.99879455566406, -201.88107299804688, -187.7633514404297, -173.6456298828125, -159.52789306640625, -145.41017150878906, -131.29244995117188, -117.17472076416016, -103.05699157714844, -88.93927001953125, -74.82154846191406, -60.703819274902344, -46.58609390258789, -32.46836853027344, -18.35064697265625, -4.232917785644531, 9.884811401367188, 24.00250244140625, 38.1202278137207, 52.237953186035156, 66.35568237304688, 80.47340393066406, 94.59112548828125, 108.70885467529297, 122.82658386230469, 136.94430541992188, 151.06202697753906, 165.17974853515625, 179.2974853515625, 193.4152069091797, 207.53292846679688, 221.65066528320312, 235.7683868408203, 249.8861083984375, 264.00384521484375, 278.1215515136719, 292.2392883300781, 306.35699462890625, 320.4747314453125, 334.59246826171875, 348.710205078125, 362.8279113769531, 376.9456481933594, 391.0633544921875, 405.18109130859375, 419.298828125, 433.4165344238281, 447.5342712402344, 461.6519775390625, 475.76971435546875]}, "gradients/decoder.transformer.h.21.mlp.c_proj.bias": {"_type": "histogram", "values": [2.0, 3.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 5.0, 2.0, 3.0, 11.0, 7.0, 2.0, 14.0, 20.0, 10.0, 14.0, 21.0, 36.0, 40.0, 31.0, 45.0, 42.0, 48.0, 59.0, 62.0, 70.0, 49.0, 51.0, 49.0, 42.0, 48.0, 32.0, 19.0, 28.0, 27.0, 21.0, 15.0, 14.0, 10.0, 10.0, 11.0, 10.0, 8.0, 6.0, 4.0, 5.0, 3.0, 1.0, 3.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-36.90625, -35.62646484375, -34.3466796875, -33.06689453125, -31.787109375, -30.50732421875, -29.2275390625, -27.94775390625, -26.66796875, -25.38818359375, -24.1083984375, -22.82861328125, -21.548828125, -20.26904296875, -18.9892578125, -17.70947265625, -16.4296875, -15.14990234375, -13.8701171875, -12.59033203125, -11.310546875, -10.03076171875, -8.7509765625, -7.47119140625, -6.19140625, -4.91162109375, -3.6318359375, -2.35205078125, -1.072265625, 0.20751953125, 1.4873046875, 2.76708984375, 4.046875, 5.32666015625, 6.6064453125, 7.88623046875, 9.166015625, 10.44580078125, 11.7255859375, 13.00537109375, 14.28515625, 15.56494140625, 16.8447265625, 18.12451171875, 19.404296875, 20.68408203125, 21.9638671875, 23.24365234375, 24.5234375, 25.80322265625, 27.0830078125, 28.36279296875, 29.642578125, 30.92236328125, 32.2021484375, 33.48193359375, 34.76171875, 36.04150390625, 37.3212890625, 38.60107421875, 39.880859375, 41.16064453125, 42.4404296875, 43.72021484375, 45.0]}, "gradients/decoder.transformer.h.21.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 3.0, 6.0, 8.0, 18.0, 15.0, 10.0, 26.0, 38.0, 41.0, 58.0, 82.0, 139.0, 171.0, 326.0, 472.0, 800.0, 1302.0, 2265.0, 4041.0, 7636.0, 16458.0, 57762.0, 1159893.0, 2806168.0, 95094.0, 19781.0, 9216.0, 4989.0, 2833.0, 1665.0, 1079.0, 637.0, 414.0, 259.0, 173.0, 105.0, 84.0, 53.0, 40.0, 34.0, 28.0, 19.0, 13.0, 9.0, 9.0, 10.0, 4.0, 5.0, 3.0, 1.0, 1.0, 1.0], "bins": [-125.6875, -122.1083984375, -118.529296875, -114.9501953125, -111.37109375, -107.7919921875, -104.212890625, -100.6337890625, -97.0546875, -93.4755859375, -89.896484375, -86.3173828125, -82.73828125, -79.1591796875, -75.580078125, -72.0009765625, -68.421875, -64.8427734375, -61.263671875, -57.6845703125, -54.10546875, -50.5263671875, -46.947265625, -43.3681640625, -39.7890625, -36.2099609375, -32.630859375, -29.0517578125, -25.47265625, -21.8935546875, -18.314453125, -14.7353515625, -11.15625, -7.5771484375, -3.998046875, -0.4189453125, 3.16015625, 6.7392578125, 10.318359375, 13.8974609375, 17.4765625, 21.0556640625, 24.634765625, 28.2138671875, 31.79296875, 35.3720703125, 38.951171875, 42.5302734375, 46.109375, 49.6884765625, 53.267578125, 56.8466796875, 60.42578125, 64.0048828125, 67.583984375, 71.1630859375, 74.7421875, 78.3212890625, 81.900390625, 85.4794921875, 89.05859375, 92.6376953125, 96.216796875, 99.7958984375, 103.375]}, "gradients/decoder.transformer.h.21.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 4.0, 3.0, 3.0, 3.0, 7.0, 3.0, 3.0, 12.0, 12.0, 14.0, 23.0, 25.0, 38.0, 49.0, 57.0, 83.0, 118.0, 163.0, 297.0, 543.0, 888.0, 683.0, 370.0, 206.0, 136.0, 98.0, 67.0, 50.0, 36.0, 16.0, 15.0, 14.0, 8.0, 12.0, 6.0, 7.0, 6.0, 4.0, 0.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0], "bins": [-118.9375, -114.7939453125, -110.650390625, -106.5068359375, -102.36328125, -98.2197265625, -94.076171875, -89.9326171875, -85.7890625, -81.6455078125, -77.501953125, -73.3583984375, -69.21484375, -65.0712890625, -60.927734375, -56.7841796875, -52.640625, -48.4970703125, -44.353515625, -40.2099609375, -36.06640625, -31.9228515625, -27.779296875, -23.6357421875, -19.4921875, -15.3486328125, -11.205078125, -7.0615234375, -2.91796875, 1.2255859375, 5.369140625, 9.5126953125, 13.65625, 17.7998046875, 21.943359375, 26.0869140625, 30.23046875, 34.3740234375, 38.517578125, 42.6611328125, 46.8046875, 50.9482421875, 55.091796875, 59.2353515625, 63.37890625, 67.5224609375, 71.666015625, 75.8095703125, 79.953125, 84.0966796875, 88.240234375, 92.3837890625, 96.52734375, 100.6708984375, 104.814453125, 108.9580078125, 113.1015625, 117.2451171875, 121.388671875, 125.5322265625, 129.67578125, 133.8193359375, 137.962890625, 142.1064453125, 146.25]}, "gradients/decoder.transformer.h.21.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 2.0, 3.0, 3.0, 2.0, 4.0, 10.0, 10.0, 11.0, 20.0, 27.0, 45.0, 77.0, 102.0, 209.0, 396.0, 757.0, 1661.0, 4227.0, 12525.0, 48051.0, 3179239.0, 890164.0, 39170.0, 10715.0, 3771.0, 1489.0, 734.0, 369.0, 176.0, 107.0, 73.0, 52.0, 29.0, 15.0, 16.0, 9.0, 2.0, 6.0, 3.0, 6.0, 1.0, 2.0, 1.0, 3.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-356.0, -344.8515625, -333.703125, -322.5546875, -311.40625, -300.2578125, -289.109375, -277.9609375, -266.8125, -255.6640625, -244.515625, -233.3671875, -222.21875, -211.0703125, -199.921875, -188.7734375, -177.625, -166.4765625, -155.328125, -144.1796875, -133.03125, -121.8828125, -110.734375, -99.5859375, -88.4375, -77.2890625, -66.140625, -54.9921875, -43.84375, -32.6953125, -21.546875, -10.3984375, 0.75, 11.8984375, 23.046875, 34.1953125, 45.34375, 56.4921875, 67.640625, 78.7890625, 89.9375, 101.0859375, 112.234375, 123.3828125, 134.53125, 145.6796875, 156.828125, 167.9765625, 179.125, 190.2734375, 201.421875, 212.5703125, 223.71875, 234.8671875, 246.015625, 257.1640625, 268.3125, 279.4609375, 290.609375, 301.7578125, 312.90625, 324.0546875, 335.203125, 346.3515625, 357.5]}, "gradients/decoder.transformer.h.21.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 3.0, 1.0, 1.0, 9.0, 7.0, 14.0, 37.0, 70.0, 175.0, 395.0, 174.0, 81.0, 23.0, 18.0, 6.0, 3.0, 1.0, 0.0, 2.0, 1.0], "bins": [-1845.301025390625, -1810.799560546875, -1776.2979736328125, -1741.79638671875, -1707.294921875, -1672.79345703125, -1638.2918701171875, -1603.790283203125, -1569.288818359375, -1534.787353515625, -1500.2857666015625, -1465.7841796875, -1431.28271484375, -1396.78125, -1362.2796630859375, -1327.778076171875, -1293.276611328125, -1258.775146484375, -1224.2735595703125, -1189.77197265625, -1155.2705078125, -1120.76904296875, -1086.2674560546875, -1051.765869140625, -1017.264404296875, -982.7628784179688, -948.2613525390625, -913.7598266601562, -879.25830078125, -844.7567749023438, -810.2552490234375, -775.7537231445312, -741.2521362304688, -706.7506103515625, -672.2490844726562, -637.74755859375, -603.2460327148438, -568.7445068359375, -534.2429809570312, -499.741455078125, -465.23992919921875, -430.7384033203125, -396.23687744140625, -361.7353515625, -327.23382568359375, -292.7322998046875, -258.23077392578125, -223.729248046875, -189.22772216796875, -154.7261962890625, -120.22467041015625, -85.72314453125, -51.22161865234375, -16.7200927734375, 17.78143310546875, 52.282958984375, 86.78448486328125, 121.2860107421875, 155.78753662109375, 190.2890625, 224.79058837890625, 259.2921142578125, 293.79364013671875, 328.295166015625, 362.79669189453125]}, "gradients/decoder.transformer.h.21.ln_2.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 2.0, 2.0, 4.0, 2.0, 2.0, 5.0, 4.0, 4.0, 10.0, 6.0, 5.0, 17.0, 13.0, 15.0, 13.0, 15.0, 13.0, 18.0, 24.0, 26.0, 47.0, 23.0, 29.0, 37.0, 38.0, 34.0, 38.0, 31.0, 44.0, 42.0, 40.0, 28.0, 31.0, 32.0, 28.0, 25.0, 28.0, 34.0, 23.0, 23.0, 18.0, 22.0, 28.0, 14.0, 17.0, 11.0, 10.0, 5.0, 10.0, 4.0, 6.0, 6.0, 6.0, 1.0, 3.0, 1.0, 2.0, 1.0], "bins": [-365.4508361816406, -354.922607421875, -344.3944091796875, -333.8661804199219, -323.3379821777344, -312.80975341796875, -302.28155517578125, -291.7533264160156, -281.22509765625, -270.6968688964844, -260.1686706542969, -249.6404571533203, -239.11224365234375, -228.58401489257812, -218.05580139160156, -207.527587890625, -196.9993896484375, -186.47117614746094, -175.94296264648438, -165.4147491455078, -154.88653564453125, -144.35830688476562, -133.83009338378906, -123.3018798828125, -112.77366638183594, -102.24545288085938, -91.71723937988281, -81.18901824951172, -70.66080474853516, -60.132591247558594, -49.604373931884766, -39.07615661621094, -28.54791259765625, -18.019697189331055, -7.491481781005859, 3.036733627319336, 13.564949035644531, 24.093162536621094, 34.62137985229492, 45.14959716796875, 55.67781066894531, 66.20602416992188, 76.73423767089844, 87.26245880126953, 97.7906723022461, 108.31888580322266, 118.84710693359375, 129.3753204345703, 139.90353393554688, 150.43174743652344, 160.9599609375, 171.48817443847656, 182.01638793945312, 192.54461669921875, 203.0728302001953, 213.60104370117188, 224.12925720214844, 234.657470703125, 245.18568420410156, 255.71389770507812, 266.24212646484375, 276.77032470703125, 287.2985534667969, 297.8267822265625, 308.35498046875]}, "gradients/decoder.transformer.h.21.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 2.0, 1.0, 1.0, 5.0, 1.0, 4.0, 10.0, 5.0, 4.0, 13.0, 12.0, 13.0, 20.0, 20.0, 17.0, 27.0, 33.0, 34.0, 46.0, 47.0, 56.0, 67.0, 64.0, 57.0, 40.0, 57.0, 53.0, 39.0, 35.0, 31.0, 28.0, 27.0, 28.0, 20.0, 8.0, 21.0, 7.0, 17.0, 11.0, 8.0, 6.0, 3.0, 3.0, 6.0, 2.0, 2.0, 2.0, 3.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-39.9375, -38.5888671875, -37.240234375, -35.8916015625, -34.54296875, -33.1943359375, -31.845703125, -30.4970703125, -29.1484375, -27.7998046875, -26.451171875, -25.1025390625, -23.75390625, -22.4052734375, -21.056640625, -19.7080078125, -18.359375, -17.0107421875, -15.662109375, -14.3134765625, -12.96484375, -11.6162109375, -10.267578125, -8.9189453125, -7.5703125, -6.2216796875, -4.873046875, -3.5244140625, -2.17578125, -0.8271484375, 0.521484375, 1.8701171875, 3.21875, 4.5673828125, 5.916015625, 7.2646484375, 8.61328125, 9.9619140625, 11.310546875, 12.6591796875, 14.0078125, 15.3564453125, 16.705078125, 18.0537109375, 19.40234375, 20.7509765625, 22.099609375, 23.4482421875, 24.796875, 26.1455078125, 27.494140625, 28.8427734375, 30.19140625, 31.5400390625, 32.888671875, 34.2373046875, 35.5859375, 36.9345703125, 38.283203125, 39.6318359375, 40.98046875, 42.3291015625, 43.677734375, 45.0263671875, 46.375]}, "gradients/decoder.transformer.h.21.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 3.0, 6.0, 5.0, 10.0, 9.0, 12.0, 15.0, 13.0, 22.0, 35.0, 61.0, 64.0, 93.0, 92.0, 155.0, 200.0, 288.0, 415.0, 653.0, 934.0, 1702.0, 3028.0, 6468.0, 17642.0, 115315.0, 793041.0, 79881.0, 15193.0, 5763.0, 2850.0, 1589.0, 962.0, 593.0, 392.0, 273.0, 228.0, 140.0, 123.0, 63.0, 66.0, 40.0, 37.0, 26.0, 24.0, 15.0, 6.0, 6.0, 10.0, 2.0, 0.0, 4.0, 1.0, 2.0, 1.0, 1.0, 2.0], "bins": [-23.359375, -22.66064453125, -21.9619140625, -21.26318359375, -20.564453125, -19.86572265625, -19.1669921875, -18.46826171875, -17.76953125, -17.07080078125, -16.3720703125, -15.67333984375, -14.974609375, -14.27587890625, -13.5771484375, -12.87841796875, -12.1796875, -11.48095703125, -10.7822265625, -10.08349609375, -9.384765625, -8.68603515625, -7.9873046875, -7.28857421875, -6.58984375, -5.89111328125, -5.1923828125, -4.49365234375, -3.794921875, -3.09619140625, -2.3974609375, -1.69873046875, -1.0, -0.30126953125, 0.3974609375, 1.09619140625, 1.794921875, 2.49365234375, 3.1923828125, 3.89111328125, 4.58984375, 5.28857421875, 5.9873046875, 6.68603515625, 7.384765625, 8.08349609375, 8.7822265625, 9.48095703125, 10.1796875, 10.87841796875, 11.5771484375, 12.27587890625, 12.974609375, 13.67333984375, 14.3720703125, 15.07080078125, 15.76953125, 16.46826171875, 17.1669921875, 17.86572265625, 18.564453125, 19.26318359375, 19.9619140625, 20.66064453125, 21.359375]}, "gradients/decoder.transformer.h.21.crossattention.c_attn.bias": {"_type": "histogram", "values": [2.0, 2.0, 2.0, 4.0, 2.0, 6.0, 4.0, 7.0, 6.0, 8.0, 8.0, 13.0, 14.0, 11.0, 16.0, 16.0, 20.0, 23.0, 22.0, 28.0, 20.0, 38.0, 37.0, 42.0, 45.0, 51.0, 42.0, 1068.0, 43.0, 39.0, 38.0, 41.0, 30.0, 39.0, 36.0, 26.0, 30.0, 38.0, 23.0, 18.0, 16.0, 9.0, 12.0, 13.0, 5.0, 11.0, 2.0, 4.0, 3.0, 4.0, 2.0, 0.0, 1.0, 1.0, 2.0, 3.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-20.65625, -19.9033203125, -19.150390625, -18.3974609375, -17.64453125, -16.8916015625, -16.138671875, -15.3857421875, -14.6328125, -13.8798828125, -13.126953125, -12.3740234375, -11.62109375, -10.8681640625, -10.115234375, -9.3623046875, -8.609375, -7.8564453125, -7.103515625, -6.3505859375, -5.59765625, -4.8447265625, -4.091796875, -3.3388671875, -2.5859375, -1.8330078125, -1.080078125, -0.3271484375, 0.42578125, 1.1787109375, 1.931640625, 2.6845703125, 3.4375, 4.1904296875, 4.943359375, 5.6962890625, 6.44921875, 7.2021484375, 7.955078125, 8.7080078125, 9.4609375, 10.2138671875, 10.966796875, 11.7197265625, 12.47265625, 13.2255859375, 13.978515625, 14.7314453125, 15.484375, 16.2373046875, 16.990234375, 17.7431640625, 18.49609375, 19.2490234375, 20.001953125, 20.7548828125, 21.5078125, 22.2607421875, 23.013671875, 23.7666015625, 24.51953125, 25.2724609375, 26.025390625, 26.7783203125, 27.53125]}, "gradients/decoder.transformer.h.21.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 2.0, 3.0, 1.0, 1.0, 2.0, 5.0, 6.0, 8.0, 4.0, 15.0, 13.0, 30.0, 32.0, 53.0, 55.0, 81.0, 124.0, 192.0, 313.0, 435.0, 644.0, 970.0, 1410.0, 2340.0, 4215.0, 7722.0, 15397.0, 36596.0, 112409.0, 1426430.0, 352622.0, 78627.0, 28164.0, 12291.0, 6347.0, 3682.0, 2060.0, 1233.0, 853.0, 521.0, 380.0, 245.0, 163.0, 137.0, 89.0, 60.0, 50.0, 37.0, 31.0, 17.0, 11.0, 6.0, 3.0, 5.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0], "bins": [-6.53125, -6.33245849609375, -6.1336669921875, -5.93487548828125, -5.736083984375, -5.53729248046875, -5.3385009765625, -5.13970947265625, -4.94091796875, -4.74212646484375, -4.5433349609375, -4.34454345703125, -4.145751953125, -3.94696044921875, -3.7481689453125, -3.54937744140625, -3.3505859375, -3.15179443359375, -2.9530029296875, -2.75421142578125, -2.555419921875, -2.35662841796875, -2.1578369140625, -1.95904541015625, -1.76025390625, -1.56146240234375, -1.3626708984375, -1.16387939453125, -0.965087890625, -0.76629638671875, -0.5675048828125, -0.36871337890625, -0.169921875, 0.02886962890625, 0.2276611328125, 0.42645263671875, 0.625244140625, 0.82403564453125, 1.0228271484375, 1.22161865234375, 1.42041015625, 1.61920166015625, 1.8179931640625, 2.01678466796875, 2.215576171875, 2.41436767578125, 2.6131591796875, 2.81195068359375, 3.0107421875, 3.20953369140625, 3.4083251953125, 3.60711669921875, 3.805908203125, 4.00469970703125, 4.2034912109375, 4.40228271484375, 4.60107421875, 4.79986572265625, 4.9986572265625, 5.19744873046875, 5.396240234375, 5.59503173828125, 5.7938232421875, 5.99261474609375, 6.19140625]}, "gradients/decoder.transformer.h.21.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 3.0, 2.0, 3.0, 2.0, 6.0, 4.0, 8.0, 8.0, 12.0, 10.0, 19.0, 27.0, 25.0, 29.0, 43.0, 61.0, 103.0, 121.0, 156.0, 101.0, 62.0, 45.0, 28.0, 26.0, 23.0, 23.0, 12.0, 6.0, 6.0, 9.0, 6.0, 3.0, 2.0, 2.0, 2.0, 2.0, 1.0, 1.0, 4.0, 4.0, 4.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.67431640625, -0.6525726318359375, -0.630828857421875, -0.6090850830078125, -0.58734130859375, -0.5655975341796875, -0.543853759765625, -0.5221099853515625, -0.5003662109375, -0.4786224365234375, -0.456878662109375, -0.4351348876953125, -0.41339111328125, -0.3916473388671875, -0.369903564453125, -0.3481597900390625, -0.326416015625, -0.3046722412109375, -0.282928466796875, -0.2611846923828125, -0.23944091796875, -0.2176971435546875, -0.195953369140625, -0.1742095947265625, -0.1524658203125, -0.1307220458984375, -0.108978271484375, -0.0872344970703125, -0.06549072265625, -0.0437469482421875, -0.022003173828125, -0.0002593994140625, 0.021484375, 0.0432281494140625, 0.064971923828125, 0.0867156982421875, 0.10845947265625, 0.1302032470703125, 0.151947021484375, 0.1736907958984375, 0.1954345703125, 0.2171783447265625, 0.238922119140625, 0.2606658935546875, 0.28240966796875, 0.3041534423828125, 0.325897216796875, 0.3476409912109375, 0.369384765625, 0.3911285400390625, 0.412872314453125, 0.4346160888671875, 0.45635986328125, 0.4781036376953125, 0.499847412109375, 0.5215911865234375, 0.5433349609375, 0.5650787353515625, 0.586822509765625, 0.6085662841796875, 0.63031005859375, 0.6520538330078125, 0.673797607421875, 0.6955413818359375, 0.71728515625]}, "gradients/decoder.transformer.h.21.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 3.0, 4.0, 2.0, 2.0, 2.0, 0.0, 2.0, 4.0, 3.0, 2.0, 5.0, 6.0, 6.0, 5.0, 7.0, 9.0, 26.0, 18.0, 30.0, 39.0, 48.0, 86.0, 174.0, 588.0, 1046877.0, 248.0, 126.0, 55.0, 46.0, 25.0, 29.0, 15.0, 21.0, 9.0, 13.0, 9.0, 3.0, 6.0, 2.0, 4.0, 3.0, 3.0, 2.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0], "bins": [-22.59375, -21.9140625, -21.234375, -20.5546875, -19.875, -19.1953125, -18.515625, -17.8359375, -17.15625, -16.4765625, -15.796875, -15.1171875, -14.4375, -13.7578125, -13.078125, -12.3984375, -11.71875, -11.0390625, -10.359375, -9.6796875, -9.0, -8.3203125, -7.640625, -6.9609375, -6.28125, -5.6015625, -4.921875, -4.2421875, -3.5625, -2.8828125, -2.203125, -1.5234375, -0.84375, -0.1640625, 0.515625, 1.1953125, 1.875, 2.5546875, 3.234375, 3.9140625, 4.59375, 5.2734375, 5.953125, 6.6328125, 7.3125, 7.9921875, 8.671875, 9.3515625, 10.03125, 10.7109375, 11.390625, 12.0703125, 12.75, 13.4296875, 14.109375, 14.7890625, 15.46875, 16.1484375, 16.828125, 17.5078125, 18.1875, 18.8671875, 19.546875, 20.2265625, 20.90625]}, "gradients/decoder.transformer.h.21.ln_cross_attn.weight": {"_type": "histogram", "values": [413.0, 609.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.49994513392448425, -0.004762709140777588, 0.4904197156429291, 0.9856021404266357, 1.48078453540802, 1.9759669303894043, 2.471149444580078, 2.966331720352173, 3.4615142345428467, 3.9566967487335205, 4.451879024505615, 4.947061538696289, 5.442244052886963, 5.937426567077637, 6.432608604431152, 6.927791118621826, 7.4229736328125, 7.918156147003174, 8.413338661193848, 8.908520698547363, 9.403703689575195, 9.898885726928711, 10.394067764282227, 10.889250755310059, 11.384432792663574, 11.87961483001709, 12.374797821044922, 12.869979858398438, 13.36516284942627, 13.860344886779785, 14.355527877807617, 14.850709915161133, 15.345892906188965, 15.84107494354248, 16.336257934570312, 16.831439971923828, 17.326622009277344, 17.82180404663086, 18.316987991333008, 18.812170028686523, 19.30735206604004, 19.802534103393555, 20.29771614074707, 20.79290008544922, 21.288082122802734, 21.78326416015625, 22.278446197509766, 22.77362823486328, 23.26881217956543, 23.763994216918945, 24.25917625427246, 24.75436019897461, 25.249542236328125, 25.74472427368164, 26.239906311035156, 26.735088348388672, 27.230270385742188, 27.725452423095703, 28.22063446044922, 28.715818405151367, 29.211000442504883, 29.7061824798584, 30.201364517211914, 30.69654655456543, 31.191730499267578]}, "gradients/decoder.transformer.h.21.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 3.0, 0.0, 0.0, 1.0, 1.0, 1.0, 3.0, 5.0, 5.0, 4.0, 11.0, 7.0, 6.0, 12.0, 15.0, 15.0, 16.0, 25.0, 13.0, 17.0, 24.0, 30.0, 38.0, 36.0, 21.0, 33.0, 38.0, 41.0, 36.0, 32.0, 44.0, 49.0, 33.0, 31.0, 33.0, 36.0, 25.0, 22.0, 25.0, 25.0, 28.0, 25.0, 21.0, 22.0, 19.0, 12.0, 14.0, 9.0, 14.0, 5.0, 6.0, 3.0, 7.0, 1.0, 3.0, 6.0, 6.0, 2.0, 3.0, 1.0, 1.0, 1.0, 2.0], "bins": [-1.2812654972076416, -1.2406190633773804, -1.1999726295471191, -1.1593260765075684, -1.1186796426773071, -1.078033208847046, -1.0373867750167847, -0.9967403411865234, -0.9560938477516174, -0.9154474139213562, -0.8748009204864502, -0.834154486656189, -0.7935080528259277, -0.7528615593910217, -0.7122151255607605, -0.6715686321258545, -0.6309221982955933, -0.590275764465332, -0.549629271030426, -0.5089828372001648, -0.4683363735675812, -0.42768990993499756, -0.38704347610473633, -0.3463970124721527, -0.3057505488395691, -0.2651040852069855, -0.22445763647556305, -0.18381118774414062, -0.143164724111557, -0.10251826047897339, -0.061871811747550964, -0.02122536301612854, 0.019420981407165527, 0.06006743758916855, 0.10071389377117157, 0.141360342502594, 0.1820068061351776, 0.22265326976776123, 0.26329970359802246, 0.3039461672306061, 0.3445926308631897, 0.3852390944957733, 0.42588555812835693, 0.46653199195861816, 0.5071784257888794, 0.5478249192237854, 0.5884713530540466, 0.6291178464889526, 0.6697642803192139, 0.7104107141494751, 0.7510572075843811, 0.7917036414146423, 0.8323501348495483, 0.8729965686798096, 0.9136430025100708, 0.954289436340332, 0.994935929775238, 1.035582423210144, 1.0762288570404053, 1.1168752908706665, 1.1575217247009277, 1.1981682777404785, 1.2388147115707397, 1.279461145401001, 1.3201075792312622]}, "gradients/decoder.transformer.h.21.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 2.0, 1.0, 1.0, 5.0, 1.0, 4.0, 10.0, 5.0, 4.0, 13.0, 12.0, 13.0, 20.0, 20.0, 17.0, 27.0, 33.0, 34.0, 46.0, 47.0, 56.0, 67.0, 64.0, 57.0, 40.0, 57.0, 53.0, 39.0, 35.0, 31.0, 28.0, 27.0, 28.0, 20.0, 8.0, 21.0, 7.0, 17.0, 11.0, 8.0, 6.0, 3.0, 3.0, 6.0, 2.0, 2.0, 2.0, 3.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-39.9375, -38.5888671875, -37.240234375, -35.8916015625, -34.54296875, -33.1943359375, -31.845703125, -30.4970703125, -29.1484375, -27.7998046875, -26.451171875, -25.1025390625, -23.75390625, -22.4052734375, -21.056640625, -19.7080078125, -18.359375, -17.0107421875, -15.662109375, -14.3134765625, -12.96484375, -11.6162109375, -10.267578125, -8.9189453125, -7.5703125, -6.2216796875, -4.873046875, -3.5244140625, -2.17578125, -0.8271484375, 0.521484375, 1.8701171875, 3.21875, 4.5673828125, 5.916015625, 7.2646484375, 8.61328125, 9.9619140625, 11.310546875, 12.6591796875, 14.0078125, 15.3564453125, 16.705078125, 18.0537109375, 19.40234375, 20.7509765625, 22.099609375, 23.4482421875, 24.796875, 26.1455078125, 27.494140625, 28.8427734375, 30.19140625, 31.5400390625, 32.888671875, 34.2373046875, 35.5859375, 36.9345703125, 38.283203125, 39.6318359375, 40.98046875, 42.3291015625, 43.677734375, 45.0263671875, 46.375]}, "gradients/decoder.transformer.h.21.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 4.0, 0.0, 4.0, 4.0, 8.0, 8.0, 17.0, 22.0, 29.0, 30.0, 58.0, 97.0, 167.0, 240.0, 466.0, 824.0, 1519.0, 3293.0, 6824.0, 16059.0, 43105.0, 148433.0, 542751.0, 196281.0, 53491.0, 19209.0, 8142.0, 3552.0, 1809.0, 923.0, 452.0, 286.0, 168.0, 93.0, 53.0, 40.0, 28.0, 23.0, 19.0, 9.0, 3.0, 8.0, 4.0, 6.0, 2.0, 2.0, 3.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-20.765625, -20.06396484375, -19.3623046875, -18.66064453125, -17.958984375, -17.25732421875, -16.5556640625, -15.85400390625, -15.15234375, -14.45068359375, -13.7490234375, -13.04736328125, -12.345703125, -11.64404296875, -10.9423828125, -10.24072265625, -9.5390625, -8.83740234375, -8.1357421875, -7.43408203125, -6.732421875, -6.03076171875, -5.3291015625, -4.62744140625, -3.92578125, -3.22412109375, -2.5224609375, -1.82080078125, -1.119140625, -0.41748046875, 0.2841796875, 0.98583984375, 1.6875, 2.38916015625, 3.0908203125, 3.79248046875, 4.494140625, 5.19580078125, 5.8974609375, 6.59912109375, 7.30078125, 8.00244140625, 8.7041015625, 9.40576171875, 10.107421875, 10.80908203125, 11.5107421875, 12.21240234375, 12.9140625, 13.61572265625, 14.3173828125, 15.01904296875, 15.720703125, 16.42236328125, 17.1240234375, 17.82568359375, 18.52734375, 19.22900390625, 19.9306640625, 20.63232421875, 21.333984375, 22.03564453125, 22.7373046875, 23.43896484375, 24.140625]}, "gradients/decoder.transformer.h.21.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0, 2.0, 2.0, 3.0, 2.0, 0.0, 1.0, 5.0, 3.0, 7.0, 8.0, 10.0, 13.0, 13.0, 12.0, 12.0, 15.0, 22.0, 16.0, 24.0, 23.0, 42.0, 34.0, 27.0, 36.0, 42.0, 38.0, 36.0, 60.0, 2001.0, 102.0, 49.0, 30.0, 39.0, 33.0, 29.0, 32.0, 31.0, 31.0, 27.0, 19.0, 16.0, 16.0, 16.0, 16.0, 10.0, 13.0, 6.0, 10.0, 6.0, 5.0, 2.0, 6.0, 6.0, 2.0, 1.0, 4.0, 1.0], "bins": [-142.125, -138.142578125, -134.16015625, -130.177734375, -126.1953125, -122.212890625, -118.23046875, -114.248046875, -110.265625, -106.283203125, -102.30078125, -98.318359375, -94.3359375, -90.353515625, -86.37109375, -82.388671875, -78.40625, -74.423828125, -70.44140625, -66.458984375, -62.4765625, -58.494140625, -54.51171875, -50.529296875, -46.546875, -42.564453125, -38.58203125, -34.599609375, -30.6171875, -26.634765625, -22.65234375, -18.669921875, -14.6875, -10.705078125, -6.72265625, -2.740234375, 1.2421875, 5.224609375, 9.20703125, 13.189453125, 17.171875, 21.154296875, 25.13671875, 29.119140625, 33.1015625, 37.083984375, 41.06640625, 45.048828125, 49.03125, 53.013671875, 56.99609375, 60.978515625, 64.9609375, 68.943359375, 72.92578125, 76.908203125, 80.890625, 84.873046875, 88.85546875, 92.837890625, 96.8203125, 100.802734375, 104.78515625, 108.767578125, 112.75]}, "gradients/decoder.transformer.h.21.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 3.0, 2.0, 2.0, 4.0, 7.0, 3.0, 5.0, 6.0, 9.0, 6.0, 14.0, 8.0, 14.0, 19.0, 18.0, 11.0, 21.0, 35.0, 48.0, 56.0, 86.0, 99.0, 166.0, 283.0, 500.0, 1036.0, 25152.0, 3112123.0, 3963.0, 756.0, 426.0, 252.0, 159.0, 96.0, 59.0, 62.0, 34.0, 27.0, 21.0, 25.0, 15.0, 12.0, 14.0, 10.0, 12.0, 12.0, 7.0, 6.0, 3.0, 5.0, 1.0, 0.0, 2.0, 3.0, 2.0, 2.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0], "bins": [-193.125, -186.30859375, -179.4921875, -172.67578125, -165.859375, -159.04296875, -152.2265625, -145.41015625, -138.59375, -131.77734375, -124.9609375, -118.14453125, -111.328125, -104.51171875, -97.6953125, -90.87890625, -84.0625, -77.24609375, -70.4296875, -63.61328125, -56.796875, -49.98046875, -43.1640625, -36.34765625, -29.53125, -22.71484375, -15.8984375, -9.08203125, -2.265625, 4.55078125, 11.3671875, 18.18359375, 25.0, 31.81640625, 38.6328125, 45.44921875, 52.265625, 59.08203125, 65.8984375, 72.71484375, 79.53125, 86.34765625, 93.1640625, 99.98046875, 106.796875, 113.61328125, 120.4296875, 127.24609375, 134.0625, 140.87890625, 147.6953125, 154.51171875, 161.328125, 168.14453125, 174.9609375, 181.77734375, 188.59375, 195.41015625, 202.2265625, 209.04296875, 215.859375, 222.67578125, 229.4921875, 236.30859375, 243.125]}, "gradients/decoder.transformer.h.21.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 7.0, 150.0, 818.0, 44.0, 2.0], "bins": [-1352.915771484375, -1330.8555908203125, -1308.7955322265625, -1286.7353515625, -1264.67529296875, -1242.6151123046875, -1220.554931640625, -1198.494873046875, -1176.4346923828125, -1154.37451171875, -1132.314453125, -1110.2542724609375, -1088.194091796875, -1066.134033203125, -1044.0738525390625, -1022.0137329101562, -999.95361328125, -977.8934936523438, -955.8333740234375, -933.773193359375, -911.7130737304688, -889.6529541015625, -867.5927734375, -845.5326538085938, -823.4725341796875, -801.4124145507812, -779.352294921875, -757.2921142578125, -735.2319946289062, -713.171875, -691.1116943359375, -669.0515747070312, -646.9913940429688, -624.9312744140625, -602.87109375, -580.8109741210938, -558.7508544921875, -536.6907348632812, -514.630615234375, -492.5704345703125, -470.5103454589844, -448.4501953125, -426.39007568359375, -404.3299560546875, -382.2698059082031, -360.20965576171875, -338.1495361328125, -316.08941650390625, -294.0292663574219, -271.9691162109375, -249.90899658203125, -227.84886169433594, -205.78872680664062, -183.7285919189453, -161.66845703125, -139.6083221435547, -117.54818725585938, -95.48805236816406, -73.42791748046875, -51.36778259277344, -29.307647705078125, -7.2475128173828125, 14.8126220703125, 36.87275695800781, 58.93289566040039]}, "gradients/decoder.transformer.h.21.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 3.0, 1.0, 3.0, 8.0, 9.0, 13.0, 5.0, 11.0, 12.0, 11.0, 12.0, 25.0, 24.0, 29.0, 20.0, 31.0, 33.0, 44.0, 26.0, 40.0, 32.0, 56.0, 35.0, 48.0, 40.0, 40.0, 38.0, 31.0, 37.0, 38.0, 41.0, 30.0, 21.0, 22.0, 24.0, 15.0, 22.0, 11.0, 12.0, 16.0, 5.0, 7.0, 9.0, 6.0, 7.0, 6.0, 2.0, 2.0, 1.0, 2.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0], "bins": [-456.897705078125, -442.4989013671875, -428.10009765625, -413.7012939453125, -399.302490234375, -384.9036865234375, -370.5048828125, -356.1060791015625, -341.707275390625, -327.3084716796875, -312.90966796875, -298.5108642578125, -284.112060546875, -269.7132568359375, -255.314453125, -240.9156494140625, -226.51683044433594, -212.11802673339844, -197.71922302246094, -183.32041931152344, -168.92161560058594, -154.52279663085938, -140.12399291992188, -125.7251968383789, -111.3263931274414, -96.9275894165039, -82.5287857055664, -68.12997436523438, -53.73117446899414, -39.332366943359375, -24.933563232421875, -10.534759521484375, 3.864044189453125, 18.262847900390625, 32.661651611328125, 47.06045913696289, 61.45926284790039, 75.85807037353516, 90.25687408447266, 104.65567779541016, 119.05448150634766, 133.4532928466797, 147.8520965576172, 162.2509002685547, 176.6497039794922, 191.0485076904297, 205.4473114013672, 219.8461151123047, 234.2449188232422, 248.6437225341797, 263.04254150390625, 277.44134521484375, 291.84014892578125, 306.23895263671875, 320.63775634765625, 335.03656005859375, 349.43536376953125, 363.83416748046875, 378.23297119140625, 392.63177490234375, 407.03057861328125, 421.42938232421875, 435.82818603515625, 450.22698974609375, 464.62579345703125]}, "gradients/decoder.transformer.h.20.mlp.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 2.0, 2.0, 1.0, 2.0, 4.0, 2.0, 6.0, 5.0, 8.0, 3.0, 13.0, 10.0, 12.0, 23.0, 21.0, 14.0, 22.0, 33.0, 38.0, 43.0, 40.0, 53.0, 60.0, 70.0, 53.0, 45.0, 47.0, 60.0, 34.0, 41.0, 33.0, 26.0, 31.0, 26.0, 24.0, 15.0, 13.0, 13.0, 13.0, 14.0, 10.0, 6.0, 6.0, 3.0, 5.0, 5.0, 0.0, 3.0, 1.0, 2.0, 3.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-38.6875, -37.35205078125, -36.0166015625, -34.68115234375, -33.345703125, -32.01025390625, -30.6748046875, -29.33935546875, -28.00390625, -26.66845703125, -25.3330078125, -23.99755859375, -22.662109375, -21.32666015625, -19.9912109375, -18.65576171875, -17.3203125, -15.98486328125, -14.6494140625, -13.31396484375, -11.978515625, -10.64306640625, -9.3076171875, -7.97216796875, -6.63671875, -5.30126953125, -3.9658203125, -2.63037109375, -1.294921875, 0.04052734375, 1.3759765625, 2.71142578125, 4.046875, 5.38232421875, 6.7177734375, 8.05322265625, 9.388671875, 10.72412109375, 12.0595703125, 13.39501953125, 14.73046875, 16.06591796875, 17.4013671875, 18.73681640625, 20.072265625, 21.40771484375, 22.7431640625, 24.07861328125, 25.4140625, 26.74951171875, 28.0849609375, 29.42041015625, 30.755859375, 32.09130859375, 33.4267578125, 34.76220703125, 36.09765625, 37.43310546875, 38.7685546875, 40.10400390625, 41.439453125, 42.77490234375, 44.1103515625, 45.44580078125, 46.78125]}, "gradients/decoder.transformer.h.20.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 2.0, 2.0, 1.0, 2.0, 0.0, 1.0, 0.0, 4.0, 2.0, 3.0, 4.0, 3.0, 5.0, 2.0, 1.0, 6.0, 13.0, 6.0, 30.0, 58.0, 109.0, 266.0, 537.0, 1191.0, 2902.0, 8685.0, 38719.0, 3865235.0, 252192.0, 15825.0, 4989.0, 1906.0, 842.0, 368.0, 177.0, 104.0, 35.0, 17.0, 8.0, 3.0, 3.0, 5.0, 6.0, 2.0, 3.0, 8.0, 1.0, 2.0, 3.0, 5.0, 2.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0], "bins": [-227.25, -219.853515625, -212.45703125, -205.060546875, -197.6640625, -190.267578125, -182.87109375, -175.474609375, -168.078125, -160.681640625, -153.28515625, -145.888671875, -138.4921875, -131.095703125, -123.69921875, -116.302734375, -108.90625, -101.509765625, -94.11328125, -86.716796875, -79.3203125, -71.923828125, -64.52734375, -57.130859375, -49.734375, -42.337890625, -34.94140625, -27.544921875, -20.1484375, -12.751953125, -5.35546875, 2.041015625, 9.4375, 16.833984375, 24.23046875, 31.626953125, 39.0234375, 46.419921875, 53.81640625, 61.212890625, 68.609375, 76.005859375, 83.40234375, 90.798828125, 98.1953125, 105.591796875, 112.98828125, 120.384765625, 127.78125, 135.177734375, 142.57421875, 149.970703125, 157.3671875, 164.763671875, 172.16015625, 179.556640625, 186.953125, 194.349609375, 201.74609375, 209.142578125, 216.5390625, 223.935546875, 231.33203125, 238.728515625, 246.125]}, "gradients/decoder.transformer.h.20.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 2.0, 0.0, 1.0, 1.0, 5.0, 4.0, 4.0, 5.0, 6.0, 11.0, 6.0, 18.0, 18.0, 19.0, 17.0, 35.0, 62.0, 71.0, 124.0, 185.0, 356.0, 724.0, 1021.0, 581.0, 278.0, 158.0, 89.0, 75.0, 54.0, 42.0, 30.0, 17.0, 20.0, 9.0, 13.0, 7.0, 5.0, 5.0, 3.0, 1.0, 3.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-124.4375, -120.4697265625, -116.501953125, -112.5341796875, -108.56640625, -104.5986328125, -100.630859375, -96.6630859375, -92.6953125, -88.7275390625, -84.759765625, -80.7919921875, -76.82421875, -72.8564453125, -68.888671875, -64.9208984375, -60.953125, -56.9853515625, -53.017578125, -49.0498046875, -45.08203125, -41.1142578125, -37.146484375, -33.1787109375, -29.2109375, -25.2431640625, -21.275390625, -17.3076171875, -13.33984375, -9.3720703125, -5.404296875, -1.4365234375, 2.53125, 6.4990234375, 10.466796875, 14.4345703125, 18.40234375, 22.3701171875, 26.337890625, 30.3056640625, 34.2734375, 38.2412109375, 42.208984375, 46.1767578125, 50.14453125, 54.1123046875, 58.080078125, 62.0478515625, 66.015625, 69.9833984375, 73.951171875, 77.9189453125, 81.88671875, 85.8544921875, 89.822265625, 93.7900390625, 97.7578125, 101.7255859375, 105.693359375, 109.6611328125, 113.62890625, 117.5966796875, 121.564453125, 125.5322265625, 129.5]}, "gradients/decoder.transformer.h.20.mlp.c_fc.weight": {"_type": "histogram", "values": [3.0, 0.0, 1.0, 1.0, 2.0, 0.0, 1.0, 2.0, 0.0, 4.0, 2.0, 5.0, 15.0, 12.0, 19.0, 28.0, 36.0, 51.0, 65.0, 102.0, 139.0, 238.0, 412.0, 801.0, 1535.0, 3423.0, 8628.0, 25426.0, 144956.0, 3886028.0, 88906.0, 20267.0, 7126.0, 2956.0, 1343.0, 701.0, 384.0, 233.0, 127.0, 90.0, 72.0, 38.0, 24.0, 26.0, 16.0, 9.0, 15.0, 6.0, 4.0, 5.0, 2.0, 4.0, 1.0, 1.0, 3.0, 0.0, 2.0, 2.0, 1.0, 1.0, 2.0, 0.0, 1.0, 1.0], "bins": [-279.0, -269.51171875, -260.0234375, -250.53515625, -241.046875, -231.55859375, -222.0703125, -212.58203125, -203.09375, -193.60546875, -184.1171875, -174.62890625, -165.140625, -155.65234375, -146.1640625, -136.67578125, -127.1875, -117.69921875, -108.2109375, -98.72265625, -89.234375, -79.74609375, -70.2578125, -60.76953125, -51.28125, -41.79296875, -32.3046875, -22.81640625, -13.328125, -3.83984375, 5.6484375, 15.13671875, 24.625, 34.11328125, 43.6015625, 53.08984375, 62.578125, 72.06640625, 81.5546875, 91.04296875, 100.53125, 110.01953125, 119.5078125, 128.99609375, 138.484375, 147.97265625, 157.4609375, 166.94921875, 176.4375, 185.92578125, 195.4140625, 204.90234375, 214.390625, 223.87890625, 233.3671875, 242.85546875, 252.34375, 261.83203125, 271.3203125, 280.80859375, 290.296875, 299.78515625, 309.2734375, 318.76171875, 328.25]}, "gradients/decoder.transformer.h.20.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 4.0, 1.0, 4.0, 4.0, 10.0, 13.0, 19.0, 35.0, 62.0, 88.0, 201.0, 257.0, 138.0, 76.0, 33.0, 21.0, 18.0, 7.0, 8.0, 3.0, 3.0, 3.0, 1.0, 3.0, 2.0, 1.0, 1.0, 0.0, 1.0], "bins": [-953.1666259765625, -932.5780029296875, -911.9893798828125, -891.4008178710938, -870.8121948242188, -850.2235717773438, -829.6349487304688, -809.0463256835938, -788.457763671875, -767.869140625, -747.280517578125, -726.6919555664062, -706.1033325195312, -685.5147094726562, -664.9260864257812, -644.3374633789062, -623.7488403320312, -603.1602172851562, -582.5715942382812, -561.9830322265625, -541.3944091796875, -520.8057861328125, -500.2171630859375, -479.6285400390625, -459.0399475097656, -438.4513244628906, -417.86273193359375, -397.27410888671875, -376.68548583984375, -356.0968933105469, -335.5082702636719, -314.919677734375, -294.3310546875, -273.742431640625, -253.15383911132812, -232.56521606445312, -211.9766082763672, -191.38800048828125, -170.79937744140625, -150.2107696533203, -129.62216186523438, -109.03355407714844, -88.44493865966797, -67.8563232421875, -47.26771545410156, -26.679107666015625, -6.090492248535156, 14.498123168945312, 35.08673095703125, 55.67534255981445, 76.26395416259766, 96.85256958007812, 117.44117736816406, 138.02978515625, 158.618408203125, 179.20701599121094, 199.79562377929688, 220.3842315673828, 240.97283935546875, 261.56146240234375, 282.15008544921875, 302.7386779785156, 323.3273010253906, 343.9158935546875, 364.5045166015625]}, "gradients/decoder.transformer.h.20.ln_2.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 4.0, 3.0, 4.0, 7.0, 3.0, 5.0, 16.0, 9.0, 9.0, 10.0, 18.0, 22.0, 15.0, 20.0, 18.0, 29.0, 30.0, 36.0, 36.0, 35.0, 45.0, 41.0, 28.0, 37.0, 45.0, 46.0, 42.0, 29.0, 45.0, 43.0, 36.0, 32.0, 32.0, 26.0, 20.0, 26.0, 20.0, 17.0, 14.0, 12.0, 9.0, 5.0, 5.0, 5.0, 6.0, 7.0, 1.0, 2.0, 5.0, 4.0, 1.0, 0.0, 3.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-326.9327392578125, -316.22528076171875, -305.5177917480469, -294.8103332519531, -284.10284423828125, -273.3953857421875, -262.68792724609375, -251.98045349121094, -241.27297973632812, -230.5655059814453, -219.8580322265625, -209.15057373046875, -198.44309997558594, -187.73562622070312, -177.02816772460938, -166.32069396972656, -155.61322021484375, -144.90574645996094, -134.19827270507812, -123.49081420898438, -112.78334045410156, -102.07586669921875, -91.36840057373047, -80.66093444824219, -69.95346069335938, -59.24599075317383, -48.53852081298828, -37.831050872802734, -27.123580932617188, -16.41611099243164, -5.708641052246094, 4.9988250732421875, 15.706298828125, 26.413768768310547, 37.121238708496094, 47.82870864868164, 58.53617858886719, 69.24365234375, 79.95111846923828, 90.65858459472656, 101.36605834960938, 112.07353210449219, 122.78099822998047, 133.48846435546875, 144.19593811035156, 154.90341186523438, 165.61087036132812, 176.31834411621094, 187.02581787109375, 197.73329162597656, 208.44076538085938, 219.14822387695312, 229.85569763183594, 240.56317138671875, 251.2706298828125, 261.97808837890625, 272.6855773925781, 283.3930358886719, 294.10052490234375, 304.8079833984375, 315.51544189453125, 326.2229309082031, 336.9303894042969, 347.63787841796875, 358.3453369140625]}, "gradients/decoder.transformer.h.20.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 7.0, 2.0, 2.0, 3.0, 2.0, 8.0, 10.0, 11.0, 18.0, 20.0, 19.0, 26.0, 30.0, 28.0, 23.0, 41.0, 51.0, 52.0, 40.0, 64.0, 49.0, 62.0, 45.0, 44.0, 42.0, 42.0, 39.0, 35.0, 31.0, 24.0, 24.0, 22.0, 14.0, 21.0, 8.0, 5.0, 10.0, 10.0, 6.0, 4.0, 4.0, 5.0, 1.0, 3.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0], "bins": [-43.4375, -42.04541015625, -40.6533203125, -39.26123046875, -37.869140625, -36.47705078125, -35.0849609375, -33.69287109375, -32.30078125, -30.90869140625, -29.5166015625, -28.12451171875, -26.732421875, -25.34033203125, -23.9482421875, -22.55615234375, -21.1640625, -19.77197265625, -18.3798828125, -16.98779296875, -15.595703125, -14.20361328125, -12.8115234375, -11.41943359375, -10.02734375, -8.63525390625, -7.2431640625, -5.85107421875, -4.458984375, -3.06689453125, -1.6748046875, -0.28271484375, 1.109375, 2.50146484375, 3.8935546875, 5.28564453125, 6.677734375, 8.06982421875, 9.4619140625, 10.85400390625, 12.24609375, 13.63818359375, 15.0302734375, 16.42236328125, 17.814453125, 19.20654296875, 20.5986328125, 21.99072265625, 23.3828125, 24.77490234375, 26.1669921875, 27.55908203125, 28.951171875, 30.34326171875, 31.7353515625, 33.12744140625, 34.51953125, 35.91162109375, 37.3037109375, 38.69580078125, 40.087890625, 41.47998046875, 42.8720703125, 44.26416015625, 45.65625]}, "gradients/decoder.transformer.h.20.crossattention.c_proj.weight": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 4.0, 2.0, 7.0, 13.0, 7.0, 12.0, 23.0, 23.0, 39.0, 52.0, 65.0, 76.0, 103.0, 144.0, 212.0, 281.0, 403.0, 547.0, 896.0, 1332.0, 2312.0, 4083.0, 8429.0, 23832.0, 109449.0, 656775.0, 183144.0, 33028.0, 10867.0, 4953.0, 2612.0, 1608.0, 983.0, 641.0, 453.0, 301.0, 237.0, 157.0, 126.0, 82.0, 62.0, 52.0, 45.0, 25.0, 19.0, 13.0, 13.0, 4.0, 7.0, 9.0, 3.0, 2.0, 2.0, 0.0, 2.0, 0.0, 0.0, 1.0], "bins": [-14.7265625, -14.260986328125, -13.79541015625, -13.329833984375, -12.8642578125, -12.398681640625, -11.93310546875, -11.467529296875, -11.001953125, -10.536376953125, -10.07080078125, -9.605224609375, -9.1396484375, -8.674072265625, -8.20849609375, -7.742919921875, -7.27734375, -6.811767578125, -6.34619140625, -5.880615234375, -5.4150390625, -4.949462890625, -4.48388671875, -4.018310546875, -3.552734375, -3.087158203125, -2.62158203125, -2.156005859375, -1.6904296875, -1.224853515625, -0.75927734375, -0.293701171875, 0.171875, 0.637451171875, 1.10302734375, 1.568603515625, 2.0341796875, 2.499755859375, 2.96533203125, 3.430908203125, 3.896484375, 4.362060546875, 4.82763671875, 5.293212890625, 5.7587890625, 6.224365234375, 6.68994140625, 7.155517578125, 7.62109375, 8.086669921875, 8.55224609375, 9.017822265625, 9.4833984375, 9.948974609375, 10.41455078125, 10.880126953125, 11.345703125, 11.811279296875, 12.27685546875, 12.742431640625, 13.2080078125, 13.673583984375, 14.13916015625, 14.604736328125, 15.0703125]}, "gradients/decoder.transformer.h.20.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 2.0, 7.0, 4.0, 4.0, 1.0, 8.0, 11.0, 13.0, 18.0, 17.0, 23.0, 19.0, 37.0, 30.0, 36.0, 39.0, 39.0, 43.0, 38.0, 51.0, 49.0, 1069.0, 40.0, 51.0, 42.0, 31.0, 43.0, 45.0, 48.0, 28.0, 47.0, 20.0, 13.0, 20.0, 12.0, 12.0, 5.0, 13.0, 3.0, 4.0, 2.0, 2.0, 0.0, 0.0, 1.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-24.0, -23.0341796875, -22.068359375, -21.1025390625, -20.13671875, -19.1708984375, -18.205078125, -17.2392578125, -16.2734375, -15.3076171875, -14.341796875, -13.3759765625, -12.41015625, -11.4443359375, -10.478515625, -9.5126953125, -8.546875, -7.5810546875, -6.615234375, -5.6494140625, -4.68359375, -3.7177734375, -2.751953125, -1.7861328125, -0.8203125, 0.1455078125, 1.111328125, 2.0771484375, 3.04296875, 4.0087890625, 4.974609375, 5.9404296875, 6.90625, 7.8720703125, 8.837890625, 9.8037109375, 10.76953125, 11.7353515625, 12.701171875, 13.6669921875, 14.6328125, 15.5986328125, 16.564453125, 17.5302734375, 18.49609375, 19.4619140625, 20.427734375, 21.3935546875, 22.359375, 23.3251953125, 24.291015625, 25.2568359375, 26.22265625, 27.1884765625, 28.154296875, 29.1201171875, 30.0859375, 31.0517578125, 32.017578125, 32.9833984375, 33.94921875, 34.9150390625, 35.880859375, 36.8466796875, 37.8125]}, "gradients/decoder.transformer.h.20.crossattention.c_attn.weight": {"_type": "histogram", "values": [3.0, 1.0, 1.0, 1.0, 3.0, 4.0, 4.0, 2.0, 7.0, 13.0, 14.0, 23.0, 45.0, 41.0, 68.0, 95.0, 119.0, 171.0, 231.0, 315.0, 496.0, 793.0, 1175.0, 2017.0, 3363.0, 6049.0, 11102.0, 23554.0, 54323.0, 139714.0, 1339248.0, 335449.0, 99037.0, 39948.0, 18282.0, 9020.0, 4814.0, 2691.0, 1680.0, 1094.0, 708.0, 439.0, 291.0, 187.0, 123.0, 118.0, 68.0, 63.0, 34.0, 28.0, 24.0, 11.0, 9.0, 11.0, 7.0, 5.0, 2.0, 5.0, 3.0, 3.0, 0.0, 1.0, 0.0, 2.0], "bins": [-4.47265625, -4.32757568359375, -4.1824951171875, -4.03741455078125, -3.892333984375, -3.74725341796875, -3.6021728515625, -3.45709228515625, -3.31201171875, -3.16693115234375, -3.0218505859375, -2.87677001953125, -2.731689453125, -2.58660888671875, -2.4415283203125, -2.29644775390625, -2.1513671875, -2.00628662109375, -1.8612060546875, -1.71612548828125, -1.571044921875, -1.42596435546875, -1.2808837890625, -1.13580322265625, -0.99072265625, -0.84564208984375, -0.7005615234375, -0.55548095703125, -0.410400390625, -0.26531982421875, -0.1202392578125, 0.02484130859375, 0.169921875, 0.31500244140625, 0.4600830078125, 0.60516357421875, 0.750244140625, 0.89532470703125, 1.0404052734375, 1.18548583984375, 1.33056640625, 1.47564697265625, 1.6207275390625, 1.76580810546875, 1.910888671875, 2.05596923828125, 2.2010498046875, 2.34613037109375, 2.4912109375, 2.63629150390625, 2.7813720703125, 2.92645263671875, 3.071533203125, 3.21661376953125, 3.3616943359375, 3.50677490234375, 3.65185546875, 3.79693603515625, 3.9420166015625, 4.08709716796875, 4.232177734375, 4.37725830078125, 4.5223388671875, 4.66741943359375, 4.8125]}, "gradients/decoder.transformer.h.20.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 3.0, 6.0, 8.0, 7.0, 3.0, 13.0, 7.0, 11.0, 6.0, 17.0, 21.0, 16.0, 18.0, 21.0, 21.0, 36.0, 29.0, 31.0, 51.0, 72.0, 101.0, 101.0, 75.0, 61.0, 39.0, 34.0, 28.0, 24.0, 19.0, 24.0, 19.0, 18.0, 9.0, 2.0, 11.0, 6.0, 10.0, 5.0, 4.0, 4.0, 3.0, 2.0, 7.0, 1.0, 3.0, 3.0, 1.0, 0.0, 0.0, 1.0, 0.0, 3.0, 1.0], "bins": [-0.453857421875, -0.4396781921386719, -0.42549896240234375, -0.4113197326660156, -0.3971405029296875, -0.3829612731933594, -0.36878204345703125, -0.3546028137207031, -0.340423583984375, -0.3262443542480469, -0.31206512451171875, -0.2978858947753906, -0.2837066650390625, -0.2695274353027344, -0.25534820556640625, -0.24116897583007812, -0.22698974609375, -0.21281051635742188, -0.19863128662109375, -0.18445205688476562, -0.1702728271484375, -0.15609359741210938, -0.14191436767578125, -0.12773513793945312, -0.113555908203125, -0.09937667846679688, -0.08519744873046875, -0.07101821899414062, -0.0568389892578125, -0.042659759521484375, -0.02848052978515625, -0.014301300048828125, -0.0001220703125, 0.014057159423828125, 0.02823638916015625, 0.042415618896484375, 0.0565948486328125, 0.07077407836914062, 0.08495330810546875, 0.09913253784179688, 0.113311767578125, 0.12749099731445312, 0.14167022705078125, 0.15584945678710938, 0.1700286865234375, 0.18420791625976562, 0.19838714599609375, 0.21256637573242188, 0.22674560546875, 0.24092483520507812, 0.25510406494140625, 0.2692832946777344, 0.2834625244140625, 0.2976417541503906, 0.31182098388671875, 0.3260002136230469, 0.340179443359375, 0.3543586730957031, 0.36853790283203125, 0.3827171325683594, 0.3968963623046875, 0.4110755920410156, 0.42525482177734375, 0.4394340515136719, 0.45361328125]}, "gradients/decoder.transformer.h.20.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 2.0, 1.0, 1.0, 0.0, 1.0, 2.0, 2.0, 2.0, 4.0, 5.0, 3.0, 3.0, 4.0, 5.0, 7.0, 7.0, 6.0, 13.0, 2.0, 10.0, 17.0, 18.0, 26.0, 19.0, 28.0, 33.0, 54.0, 74.0, 109.0, 192.0, 17887.0, 1029355.0, 203.0, 117.0, 80.0, 42.0, 43.0, 29.0, 19.0, 18.0, 18.0, 17.0, 20.0, 7.0, 8.0, 11.0, 8.0, 8.0, 5.0, 10.0, 4.0, 3.0, 4.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0], "bins": [-14.28125, -13.835693359375, -13.39013671875, -12.944580078125, -12.4990234375, -12.053466796875, -11.60791015625, -11.162353515625, -10.716796875, -10.271240234375, -9.82568359375, -9.380126953125, -8.9345703125, -8.489013671875, -8.04345703125, -7.597900390625, -7.15234375, -6.706787109375, -6.26123046875, -5.815673828125, -5.3701171875, -4.924560546875, -4.47900390625, -4.033447265625, -3.587890625, -3.142333984375, -2.69677734375, -2.251220703125, -1.8056640625, -1.360107421875, -0.91455078125, -0.468994140625, -0.0234375, 0.422119140625, 0.86767578125, 1.313232421875, 1.7587890625, 2.204345703125, 2.64990234375, 3.095458984375, 3.541015625, 3.986572265625, 4.43212890625, 4.877685546875, 5.3232421875, 5.768798828125, 6.21435546875, 6.659912109375, 7.10546875, 7.551025390625, 7.99658203125, 8.442138671875, 8.8876953125, 9.333251953125, 9.77880859375, 10.224365234375, 10.669921875, 11.115478515625, 11.56103515625, 12.006591796875, 12.4521484375, 12.897705078125, 13.34326171875, 13.788818359375, 14.234375]}, "gradients/decoder.transformer.h.20.ln_cross_attn.weight": {"_type": "histogram", "values": [873.0, 148.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.32646626234054565, 0.021451056003570557, 0.36936837434768677, 0.717285692691803, 1.0652029514312744, 1.4131202697753906, 1.7610375881195068, 2.108954906463623, 2.4568722248077393, 2.8047895431518555, 3.1527068614959717, 3.500624179840088, 3.848541498184204, 4.19645881652832, 4.544376373291016, 4.892293453216553, 5.240211009979248, 5.588128089904785, 5.9360456466674805, 6.283963203430176, 6.631880283355713, 6.97979736328125, 7.327714920043945, 7.675632476806641, 8.02354907989502, 8.371466636657715, 8.719383239746094, 9.067300796508789, 9.415218353271484, 9.76313591003418, 10.111053466796875, 10.458970069885254, 10.806888580322266, 11.154806137084961, 11.502723693847656, 11.850640296936035, 12.19855785369873, 12.546475410461426, 12.894392013549805, 13.2423095703125, 13.590227127075195, 13.93814468383789, 14.286062240600586, 14.633978843688965, 14.98189640045166, 15.329813957214355, 15.677730560302734, 16.02564811706543, 16.373565673828125, 16.72148323059082, 17.069400787353516, 17.41731834411621, 17.765235900878906, 18.11315155029297, 18.461069107055664, 18.80898666381836, 19.156904220581055, 19.50482177734375, 19.852739334106445, 20.20065689086914, 20.548572540283203, 20.8964900970459, 21.244407653808594, 21.59232521057129, 21.940242767333984]}, "gradients/decoder.transformer.h.20.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 2.0, 1.0, 0.0, 2.0, 2.0, 4.0, 6.0, 7.0, 10.0, 9.0, 13.0, 16.0, 10.0, 20.0, 17.0, 19.0, 25.0, 26.0, 32.0, 31.0, 33.0, 42.0, 24.0, 41.0, 39.0, 28.0, 32.0, 38.0, 37.0, 38.0, 35.0, 33.0, 25.0, 29.0, 32.0, 29.0, 32.0, 18.0, 23.0, 20.0, 14.0, 13.0, 19.0, 21.0, 15.0, 10.0, 6.0, 4.0, 9.0, 5.0, 5.0, 5.0, 4.0, 1.0, 1.0, 4.0, 4.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.1879254579544067, -1.1490732431411743, -1.1102209091186523, -1.07136869430542, -1.032516360282898, -0.9936641454696655, -0.9548118710517883, -0.9159595966339111, -0.8771073222160339, -0.8382550477981567, -0.7994027733802795, -0.7605504989624023, -0.7216982841491699, -0.682845950126648, -0.6439937353134155, -0.6051414608955383, -0.5662891864776611, -0.5274369120597839, -0.48858463764190674, -0.44973239302635193, -0.41088011860847473, -0.37202784419059753, -0.3331755995750427, -0.2943233251571655, -0.25547105073928833, -0.21661877632141113, -0.17776651680469513, -0.13891425728797913, -0.10006198287010193, -0.06120970845222473, -0.022357448935508728, 0.016494810581207275, 0.05534708499908447, 0.09419935196638107, 0.13305161893367767, 0.17190387845039368, 0.21075615286827087, 0.24960842728614807, 0.2884606719017029, 0.3273129463195801, 0.3661652207374573, 0.4050174951553345, 0.44386976957321167, 0.4827220141887665, 0.5215742588043213, 0.5604265928268433, 0.5992788076400757, 0.6381310820579529, 0.6769833564758301, 0.7158356308937073, 0.7546879053115845, 0.7935401797294617, 0.8323924541473389, 0.8712446689605713, 0.9100969433784485, 0.9489492177963257, 0.9878014922142029, 1.02665376663208, 1.0655059814453125, 1.1043583154678345, 1.143210530281067, 1.1820628643035889, 1.2209150791168213, 1.2597672939300537, 1.2986196279525757]}, "gradients/decoder.transformer.h.20.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 7.0, 2.0, 2.0, 3.0, 2.0, 8.0, 10.0, 11.0, 18.0, 20.0, 19.0, 26.0, 30.0, 28.0, 23.0, 41.0, 51.0, 52.0, 40.0, 64.0, 49.0, 62.0, 45.0, 44.0, 42.0, 42.0, 39.0, 35.0, 31.0, 24.0, 24.0, 22.0, 14.0, 21.0, 8.0, 5.0, 10.0, 10.0, 6.0, 4.0, 4.0, 5.0, 1.0, 3.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0], "bins": [-43.4375, -42.04541015625, -40.6533203125, -39.26123046875, -37.869140625, -36.47705078125, -35.0849609375, -33.69287109375, -32.30078125, -30.90869140625, -29.5166015625, -28.12451171875, -26.732421875, -25.34033203125, -23.9482421875, -22.55615234375, -21.1640625, -19.77197265625, -18.3798828125, -16.98779296875, -15.595703125, -14.20361328125, -12.8115234375, -11.41943359375, -10.02734375, -8.63525390625, -7.2431640625, -5.85107421875, -4.458984375, -3.06689453125, -1.6748046875, -0.28271484375, 1.109375, 2.50146484375, 3.8935546875, 5.28564453125, 6.677734375, 8.06982421875, 9.4619140625, 10.85400390625, 12.24609375, 13.63818359375, 15.0302734375, 16.42236328125, 17.814453125, 19.20654296875, 20.5986328125, 21.99072265625, 23.3828125, 24.77490234375, 26.1669921875, 27.55908203125, 28.951171875, 30.34326171875, 31.7353515625, 33.12744140625, 34.51953125, 35.91162109375, 37.3037109375, 38.69580078125, 40.087890625, 41.47998046875, 42.8720703125, 44.26416015625, 45.65625]}, "gradients/decoder.transformer.h.20.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 3.0, 4.0, 7.0, 2.0, 5.0, 10.0, 6.0, 20.0, 19.0, 30.0, 40.0, 72.0, 89.0, 141.0, 175.0, 251.0, 407.0, 628.0, 985.0, 1690.0, 3018.0, 5719.0, 11984.0, 27182.0, 71188.0, 240770.0, 462951.0, 137304.0, 46177.0, 18761.0, 8692.0, 4316.0, 2317.0, 1322.0, 791.0, 459.0, 324.0, 210.0, 161.0, 95.0, 67.0, 49.0, 29.0, 32.0, 19.0, 12.0, 7.0, 3.0, 10.0, 4.0, 4.0, 3.0, 1.0, 4.0, 1.0, 1.0, 1.0, 0.0, 2.0], "bins": [-17.40625, -16.849853515625, -16.29345703125, -15.737060546875, -15.1806640625, -14.624267578125, -14.06787109375, -13.511474609375, -12.955078125, -12.398681640625, -11.84228515625, -11.285888671875, -10.7294921875, -10.173095703125, -9.61669921875, -9.060302734375, -8.50390625, -7.947509765625, -7.39111328125, -6.834716796875, -6.2783203125, -5.721923828125, -5.16552734375, -4.609130859375, -4.052734375, -3.496337890625, -2.93994140625, -2.383544921875, -1.8271484375, -1.270751953125, -0.71435546875, -0.157958984375, 0.3984375, 0.954833984375, 1.51123046875, 2.067626953125, 2.6240234375, 3.180419921875, 3.73681640625, 4.293212890625, 4.849609375, 5.406005859375, 5.96240234375, 6.518798828125, 7.0751953125, 7.631591796875, 8.18798828125, 8.744384765625, 9.30078125, 9.857177734375, 10.41357421875, 10.969970703125, 11.5263671875, 12.082763671875, 12.63916015625, 13.195556640625, 13.751953125, 14.308349609375, 14.86474609375, 15.421142578125, 15.9775390625, 16.533935546875, 17.09033203125, 17.646728515625, 18.203125]}, "gradients/decoder.transformer.h.20.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 0.0, 2.0, 1.0, 1.0, 4.0, 1.0, 0.0, 2.0, 3.0, 2.0, 4.0, 9.0, 13.0, 8.0, 14.0, 16.0, 15.0, 27.0, 25.0, 21.0, 27.0, 32.0, 50.0, 35.0, 29.0, 40.0, 39.0, 68.0, 1836.0, 228.0, 60.0, 40.0, 50.0, 39.0, 43.0, 32.0, 27.0, 41.0, 27.0, 23.0, 10.0, 20.0, 21.0, 15.0, 13.0, 6.0, 8.0, 12.0, 6.0, 7.0, 4.0, 3.0, 4.0, 0.0, 2.0, 2.0, 0.0, 0.0, 1.0, 1.0], "bins": [-141.875, -137.564453125, -133.25390625, -128.943359375, -124.6328125, -120.322265625, -116.01171875, -111.701171875, -107.390625, -103.080078125, -98.76953125, -94.458984375, -90.1484375, -85.837890625, -81.52734375, -77.216796875, -72.90625, -68.595703125, -64.28515625, -59.974609375, -55.6640625, -51.353515625, -47.04296875, -42.732421875, -38.421875, -34.111328125, -29.80078125, -25.490234375, -21.1796875, -16.869140625, -12.55859375, -8.248046875, -3.9375, 0.373046875, 4.68359375, 8.994140625, 13.3046875, 17.615234375, 21.92578125, 26.236328125, 30.546875, 34.857421875, 39.16796875, 43.478515625, 47.7890625, 52.099609375, 56.41015625, 60.720703125, 65.03125, 69.341796875, 73.65234375, 77.962890625, 82.2734375, 86.583984375, 90.89453125, 95.205078125, 99.515625, 103.826171875, 108.13671875, 112.447265625, 116.7578125, 121.068359375, 125.37890625, 129.689453125, 134.0]}, "gradients/decoder.transformer.h.20.attn.c_attn.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 2.0, 2.0, 1.0, 3.0, 3.0, 6.0, 6.0, 8.0, 11.0, 7.0, 12.0, 10.0, 14.0, 22.0, 24.0, 19.0, 34.0, 46.0, 72.0, 83.0, 117.0, 157.0, 240.0, 405.0, 953.0, 5032.0, 3018718.0, 115763.0, 2250.0, 631.0, 313.0, 225.0, 127.0, 124.0, 55.0, 36.0, 33.0, 30.0, 33.0, 15.0, 16.0, 17.0, 8.0, 13.0, 7.0, 4.0, 1.0, 4.0, 1.0, 1.0, 1.0, 4.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0, 1.0, 1.0], "bins": [-203.125, -196.533203125, -189.94140625, -183.349609375, -176.7578125, -170.166015625, -163.57421875, -156.982421875, -150.390625, -143.798828125, -137.20703125, -130.615234375, -124.0234375, -117.431640625, -110.83984375, -104.248046875, -97.65625, -91.064453125, -84.47265625, -77.880859375, -71.2890625, -64.697265625, -58.10546875, -51.513671875, -44.921875, -38.330078125, -31.73828125, -25.146484375, -18.5546875, -11.962890625, -5.37109375, 1.220703125, 7.8125, 14.404296875, 20.99609375, 27.587890625, 34.1796875, 40.771484375, 47.36328125, 53.955078125, 60.546875, 67.138671875, 73.73046875, 80.322265625, 86.9140625, 93.505859375, 100.09765625, 106.689453125, 113.28125, 119.873046875, 126.46484375, 133.056640625, 139.6484375, 146.240234375, 152.83203125, 159.423828125, 166.015625, 172.607421875, 179.19921875, 185.791015625, 192.3828125, 198.974609375, 205.56640625, 212.158203125, 218.75]}, "gradients/decoder.transformer.h.20.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 167.0, 853.0], "bins": [-3870.767578125, -3809.46533203125, -3748.1630859375, -3686.86083984375, -3625.558349609375, -3564.256103515625, -3502.953857421875, -3441.651611328125, -3380.349365234375, -3319.047119140625, -3257.744873046875, -3196.4423828125, -3135.14013671875, -3073.837890625, -3012.53564453125, -2951.2333984375, -2889.930908203125, -2828.628662109375, -2767.326416015625, -2706.02392578125, -2644.7216796875, -2583.41943359375, -2522.1171875, -2460.81494140625, -2399.5126953125, -2338.21044921875, -2276.908203125, -2215.60595703125, -2154.303466796875, -2093.001220703125, -2031.698974609375, -1970.396728515625, -1909.0943603515625, -1847.7921142578125, -1786.48974609375, -1725.1875, -1663.88525390625, -1602.5828857421875, -1541.2806396484375, -1479.978271484375, -1418.676025390625, -1357.373779296875, -1296.0714111328125, -1234.7691650390625, -1173.4669189453125, -1112.16455078125, -1050.8623046875, -989.5599975585938, -928.2577514648438, -866.9554443359375, -805.6531982421875, -744.3508911132812, -683.048583984375, -621.746337890625, -560.4440307617188, -499.1417236328125, -437.8394775390625, -376.5372009277344, -315.2348937988281, -253.9326171875, -192.6303253173828, -131.32803344726562, -70.0257568359375, -8.72344970703125, 52.57883071899414]}, "gradients/decoder.transformer.h.20.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 3.0, 4.0, 2.0, 4.0, 8.0, 0.0, 4.0, 4.0, 2.0, 13.0, 12.0, 18.0, 13.0, 11.0, 9.0, 26.0, 23.0, 30.0, 39.0, 34.0, 41.0, 38.0, 30.0, 31.0, 51.0, 36.0, 40.0, 42.0, 53.0, 45.0, 32.0, 32.0, 36.0, 39.0, 28.0, 30.0, 27.0, 20.0, 19.0, 14.0, 11.0, 12.0, 6.0, 9.0, 6.0, 7.0, 5.0, 5.0, 2.0, 2.0, 2.0, 5.0, 2.0, 1.0, 2.0], "bins": [-498.8547058105469, -484.73089599609375, -470.6070861816406, -456.4832763671875, -442.3594665527344, -428.23565673828125, -414.1118469238281, -399.988037109375, -385.8642272949219, -371.74041748046875, -357.6166076660156, -343.4927978515625, -329.3689880371094, -315.24517822265625, -301.1213684082031, -286.99755859375, -272.8737487792969, -258.74993896484375, -244.62612915039062, -230.5023193359375, -216.37850952148438, -202.25469970703125, -188.13088989257812, -174.007080078125, -159.88327026367188, -145.75946044921875, -131.63565063476562, -117.5118408203125, -103.38803100585938, -89.26422119140625, -75.14041137695312, -61.0166015625, -46.892791748046875, -32.76898193359375, -18.645172119140625, -4.5213623046875, 9.602447509765625, 23.72625732421875, 37.850067138671875, 51.973876953125, 66.09768676757812, 80.22149658203125, 94.34530639648438, 108.4691162109375, 122.59292602539062, 136.71673583984375, 150.84054565429688, 164.96435546875, 179.08816528320312, 193.21197509765625, 207.33578491210938, 221.4595947265625, 235.58340454101562, 249.70721435546875, 263.8310241699219, 277.954833984375, 292.0786437988281, 306.20245361328125, 320.3262634277344, 334.4500732421875, 348.5738830566406, 362.69769287109375, 376.8215026855469, 390.9453125, 405.0691223144531]}, "gradients/decoder.transformer.h.19.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 5.0, 4.0, 2.0, 1.0, 2.0, 7.0, 9.0, 13.0, 12.0, 14.0, 20.0, 20.0, 26.0, 32.0, 27.0, 34.0, 33.0, 51.0, 47.0, 50.0, 52.0, 59.0, 54.0, 39.0, 41.0, 44.0, 40.0, 39.0, 34.0, 35.0, 19.0, 22.0, 29.0, 12.0, 15.0, 13.0, 6.0, 9.0, 9.0, 7.0, 6.0, 2.0, 5.0, 4.0, 2.0, 2.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0], "bins": [-42.28125, -40.9208984375, -39.560546875, -38.2001953125, -36.83984375, -35.4794921875, -34.119140625, -32.7587890625, -31.3984375, -30.0380859375, -28.677734375, -27.3173828125, -25.95703125, -24.5966796875, -23.236328125, -21.8759765625, -20.515625, -19.1552734375, -17.794921875, -16.4345703125, -15.07421875, -13.7138671875, -12.353515625, -10.9931640625, -9.6328125, -8.2724609375, -6.912109375, -5.5517578125, -4.19140625, -2.8310546875, -1.470703125, -0.1103515625, 1.25, 2.6103515625, 3.970703125, 5.3310546875, 6.69140625, 8.0517578125, 9.412109375, 10.7724609375, 12.1328125, 13.4931640625, 14.853515625, 16.2138671875, 17.57421875, 18.9345703125, 20.294921875, 21.6552734375, 23.015625, 24.3759765625, 25.736328125, 27.0966796875, 28.45703125, 29.8173828125, 31.177734375, 32.5380859375, 33.8984375, 35.2587890625, 36.619140625, 37.9794921875, 39.33984375, 40.7001953125, 42.060546875, 43.4208984375, 44.78125]}, "gradients/decoder.transformer.h.19.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 3.0, 1.0, 5.0, 3.0, 6.0, 1.0, 15.0, 14.0, 21.0, 28.0, 49.0, 68.0, 97.0, 100.0, 188.0, 259.0, 433.0, 637.0, 1005.0, 1646.0, 2935.0, 5331.0, 10364.0, 21521.0, 104199.0, 3002057.0, 960778.0, 46089.0, 16595.0, 8570.0, 4484.0, 2530.0, 1487.0, 912.0, 617.0, 402.0, 253.0, 186.0, 104.0, 87.0, 59.0, 47.0, 38.0, 21.0, 14.0, 12.0, 7.0, 5.0, 3.0, 5.0, 2.0, 1.0, 0.0, 4.0, 0.0, 1.0, 0.0, 1.0], "bins": [-122.0, -118.16796875, -114.3359375, -110.50390625, -106.671875, -102.83984375, -99.0078125, -95.17578125, -91.34375, -87.51171875, -83.6796875, -79.84765625, -76.015625, -72.18359375, -68.3515625, -64.51953125, -60.6875, -56.85546875, -53.0234375, -49.19140625, -45.359375, -41.52734375, -37.6953125, -33.86328125, -30.03125, -26.19921875, -22.3671875, -18.53515625, -14.703125, -10.87109375, -7.0390625, -3.20703125, 0.625, 4.45703125, 8.2890625, 12.12109375, 15.953125, 19.78515625, 23.6171875, 27.44921875, 31.28125, 35.11328125, 38.9453125, 42.77734375, 46.609375, 50.44140625, 54.2734375, 58.10546875, 61.9375, 65.76953125, 69.6015625, 73.43359375, 77.265625, 81.09765625, 84.9296875, 88.76171875, 92.59375, 96.42578125, 100.2578125, 104.08984375, 107.921875, 111.75390625, 115.5859375, 119.41796875, 123.25]}, "gradients/decoder.transformer.h.19.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 0.0, 3.0, 0.0, 4.0, 4.0, 5.0, 5.0, 9.0, 7.0, 12.0, 20.0, 11.0, 25.0, 21.0, 51.0, 62.0, 96.0, 122.0, 209.0, 393.0, 746.0, 899.0, 532.0, 282.0, 180.0, 98.0, 83.0, 48.0, 40.0, 23.0, 16.0, 14.0, 12.0, 10.0, 6.0, 13.0, 5.0, 3.0, 5.0, 4.0, 1.0, 1.0, 1.0, 1.0, 0.0, 3.0, 4.0, 0.0, 0.0, 1.0], "bins": [-140.75, -136.7587890625, -132.767578125, -128.7763671875, -124.78515625, -120.7939453125, -116.802734375, -112.8115234375, -108.8203125, -104.8291015625, -100.837890625, -96.8466796875, -92.85546875, -88.8642578125, -84.873046875, -80.8818359375, -76.890625, -72.8994140625, -68.908203125, -64.9169921875, -60.92578125, -56.9345703125, -52.943359375, -48.9521484375, -44.9609375, -40.9697265625, -36.978515625, -32.9873046875, -28.99609375, -25.0048828125, -21.013671875, -17.0224609375, -13.03125, -9.0400390625, -5.048828125, -1.0576171875, 2.93359375, 6.9248046875, 10.916015625, 14.9072265625, 18.8984375, 22.8896484375, 26.880859375, 30.8720703125, 34.86328125, 38.8544921875, 42.845703125, 46.8369140625, 50.828125, 54.8193359375, 58.810546875, 62.8017578125, 66.79296875, 70.7841796875, 74.775390625, 78.7666015625, 82.7578125, 86.7490234375, 90.740234375, 94.7314453125, 98.72265625, 102.7138671875, 106.705078125, 110.6962890625, 114.6875]}, "gradients/decoder.transformer.h.19.mlp.c_fc.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 5.0, 3.0, 4.0, 6.0, 1.0, 2.0, 7.0, 16.0, 15.0, 12.0, 25.0, 43.0, 47.0, 101.0, 148.0, 307.0, 455.0, 952.0, 1955.0, 4450.0, 11342.0, 35397.0, 315060.0, 3723587.0, 69843.0, 18140.0, 6679.0, 2838.0, 1328.0, 631.0, 317.0, 225.0, 110.0, 71.0, 51.0, 23.0, 22.0, 18.0, 11.0, 9.0, 12.0, 10.0, 4.0, 3.0, 2.0, 4.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 2.0, 1.0, 1.0], "bins": [-306.5, -296.68359375, -286.8671875, -277.05078125, -267.234375, -257.41796875, -247.6015625, -237.78515625, -227.96875, -218.15234375, -208.3359375, -198.51953125, -188.703125, -178.88671875, -169.0703125, -159.25390625, -149.4375, -139.62109375, -129.8046875, -119.98828125, -110.171875, -100.35546875, -90.5390625, -80.72265625, -70.90625, -61.08984375, -51.2734375, -41.45703125, -31.640625, -21.82421875, -12.0078125, -2.19140625, 7.625, 17.44140625, 27.2578125, 37.07421875, 46.890625, 56.70703125, 66.5234375, 76.33984375, 86.15625, 95.97265625, 105.7890625, 115.60546875, 125.421875, 135.23828125, 145.0546875, 154.87109375, 164.6875, 174.50390625, 184.3203125, 194.13671875, 203.953125, 213.76953125, 223.5859375, 233.40234375, 243.21875, 253.03515625, 262.8515625, 272.66796875, 282.484375, 292.30078125, 302.1171875, 311.93359375, 321.75]}, "gradients/decoder.transformer.h.19.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 0.0, 3.0, 6.0, 7.0, 9.0, 29.0, 39.0, 59.0, 138.0, 306.0, 189.0, 95.0, 53.0, 32.0, 17.0, 7.0, 10.0, 5.0, 2.0, 6.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-717.5640258789062, -690.7764892578125, -663.989013671875, -637.2014770507812, -610.4140014648438, -583.62646484375, -556.8389892578125, -530.0514526367188, -503.26397705078125, -476.4764709472656, -449.68896484375, -422.9014587402344, -396.11395263671875, -369.326416015625, -342.5389404296875, -315.75140380859375, -288.9638977050781, -262.1763916015625, -235.38888549804688, -208.60137939453125, -181.81387329101562, -155.02635192871094, -128.2388458251953, -101.45133972167969, -74.66383361816406, -47.87632751464844, -21.088817596435547, 5.698692321777344, 32.48619842529297, 59.273712158203125, 86.06121826171875, 112.84872436523438, 139.63623046875, 166.42373657226562, 193.21124267578125, 219.99874877929688, 246.7862548828125, 273.57379150390625, 300.36126708984375, 327.1488037109375, 353.936279296875, 380.7237854003906, 407.51129150390625, 434.2987976074219, 461.0863037109375, 487.87384033203125, 514.6613159179688, 541.4488525390625, 568.236328125, 595.0238647460938, 621.8113403320312, 648.598876953125, 675.3863525390625, 702.1738891601562, 728.9613647460938, 755.7489013671875, 782.5364379882812, 809.323974609375, 836.1114501953125, 862.8989868164062, 889.6864624023438, 916.4739990234375, 943.261474609375, 970.0490112304688, 996.8364868164062]}, "gradients/decoder.transformer.h.19.ln_2.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 0.0, 2.0, 2.0, 4.0, 6.0, 10.0, 5.0, 9.0, 8.0, 9.0, 14.0, 14.0, 14.0, 33.0, 26.0, 20.0, 16.0, 19.0, 26.0, 25.0, 35.0, 43.0, 57.0, 44.0, 42.0, 51.0, 39.0, 40.0, 50.0, 39.0, 33.0, 30.0, 35.0, 29.0, 31.0, 22.0, 25.0, 23.0, 14.0, 17.0, 14.0, 9.0, 5.0, 8.0, 3.0, 5.0, 7.0, 4.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-326.17828369140625, -315.122802734375, -304.0672912597656, -293.0118103027344, -281.956298828125, -270.90081787109375, -259.8453369140625, -248.7898406982422, -237.73434448242188, -226.67884826660156, -215.62335205078125, -204.56787109375, -193.5123748779297, -182.45687866210938, -171.40139770507812, -160.3459014892578, -149.2904052734375, -138.2349090576172, -127.1794204711914, -116.12393188476562, -105.06843566894531, -94.012939453125, -82.95745086669922, -71.90196228027344, -60.846466064453125, -49.79097366333008, -38.73548126220703, -27.679988861083984, -16.624496459960938, -5.569004058837891, 5.486488342285156, 16.541976928710938, 27.597442626953125, 38.65293502807617, 49.70842742919922, 60.763919830322266, 71.81941223144531, 82.87490844726562, 93.9303970336914, 104.98588562011719, 116.0413818359375, 127.09687805175781, 138.15237426757812, 149.20785522460938, 160.2633514404297, 171.31884765625, 182.37432861328125, 193.42982482910156, 204.48532104492188, 215.5408172607422, 226.5963134765625, 237.65179443359375, 248.70729064941406, 259.7627868652344, 270.8182678222656, 281.873779296875, 292.92926025390625, 303.9847412109375, 315.0402526855469, 326.0957336425781, 337.1512451171875, 348.20672607421875, 359.26220703125, 370.31768798828125, 381.3731994628906]}, "gradients/decoder.transformer.h.19.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 3.0, 1.0, 2.0, 3.0, 3.0, 3.0, 5.0, 6.0, 9.0, 4.0, 11.0, 13.0, 18.0, 15.0, 24.0, 29.0, 33.0, 37.0, 41.0, 42.0, 58.0, 35.0, 64.0, 45.0, 51.0, 45.0, 54.0, 53.0, 40.0, 32.0, 44.0, 27.0, 23.0, 21.0, 19.0, 20.0, 17.0, 17.0, 6.0, 7.0, 6.0, 8.0, 4.0, 6.0, 3.0, 4.0, 2.0, 0.0, 5.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-43.125, -41.65576171875, -40.1865234375, -38.71728515625, -37.248046875, -35.77880859375, -34.3095703125, -32.84033203125, -31.37109375, -29.90185546875, -28.4326171875, -26.96337890625, -25.494140625, -24.02490234375, -22.5556640625, -21.08642578125, -19.6171875, -18.14794921875, -16.6787109375, -15.20947265625, -13.740234375, -12.27099609375, -10.8017578125, -9.33251953125, -7.86328125, -6.39404296875, -4.9248046875, -3.45556640625, -1.986328125, -0.51708984375, 0.9521484375, 2.42138671875, 3.890625, 5.35986328125, 6.8291015625, 8.29833984375, 9.767578125, 11.23681640625, 12.7060546875, 14.17529296875, 15.64453125, 17.11376953125, 18.5830078125, 20.05224609375, 21.521484375, 22.99072265625, 24.4599609375, 25.92919921875, 27.3984375, 28.86767578125, 30.3369140625, 31.80615234375, 33.275390625, 34.74462890625, 36.2138671875, 37.68310546875, 39.15234375, 40.62158203125, 42.0908203125, 43.56005859375, 45.029296875, 46.49853515625, 47.9677734375, 49.43701171875, 50.90625]}, "gradients/decoder.transformer.h.19.crossattention.c_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 3.0, 0.0, 5.0, 5.0, 9.0, 11.0, 13.0, 21.0, 30.0, 33.0, 47.0, 70.0, 72.0, 117.0, 158.0, 244.0, 271.0, 441.0, 607.0, 837.0, 1301.0, 1836.0, 2741.0, 4458.0, 7690.0, 16226.0, 44122.0, 186020.0, 573111.0, 138293.0, 35955.0, 14173.0, 7042.0, 4212.0, 2622.0, 1684.0, 1177.0, 833.0, 607.0, 407.0, 274.0, 206.0, 187.0, 111.0, 94.0, 46.0, 36.0, 38.0, 26.0, 12.0, 5.0, 7.0, 10.0, 7.0, 2.0, 4.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0], "bins": [-12.4296875, -12.02099609375, -11.6123046875, -11.20361328125, -10.794921875, -10.38623046875, -9.9775390625, -9.56884765625, -9.16015625, -8.75146484375, -8.3427734375, -7.93408203125, -7.525390625, -7.11669921875, -6.7080078125, -6.29931640625, -5.890625, -5.48193359375, -5.0732421875, -4.66455078125, -4.255859375, -3.84716796875, -3.4384765625, -3.02978515625, -2.62109375, -2.21240234375, -1.8037109375, -1.39501953125, -0.986328125, -0.57763671875, -0.1689453125, 0.23974609375, 0.6484375, 1.05712890625, 1.4658203125, 1.87451171875, 2.283203125, 2.69189453125, 3.1005859375, 3.50927734375, 3.91796875, 4.32666015625, 4.7353515625, 5.14404296875, 5.552734375, 5.96142578125, 6.3701171875, 6.77880859375, 7.1875, 7.59619140625, 8.0048828125, 8.41357421875, 8.822265625, 9.23095703125, 9.6396484375, 10.04833984375, 10.45703125, 10.86572265625, 11.2744140625, 11.68310546875, 12.091796875, 12.50048828125, 12.9091796875, 13.31787109375, 13.7265625]}, "gradients/decoder.transformer.h.19.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 5.0, 1.0, 2.0, 4.0, 4.0, 4.0, 4.0, 7.0, 18.0, 17.0, 17.0, 14.0, 24.0, 21.0, 25.0, 25.0, 24.0, 32.0, 39.0, 42.0, 40.0, 49.0, 46.0, 40.0, 1059.0, 37.0, 38.0, 43.0, 39.0, 46.0, 28.0, 20.0, 35.0, 31.0, 20.0, 21.0, 16.0, 19.0, 11.0, 7.0, 12.0, 11.0, 8.0, 4.0, 8.0, 6.0, 5.0, 4.0, 4.0, 2.0, 2.0, 0.0, 1.0, 2.0, 1.0, 0.0, 0.0, 1.0], "bins": [-26.125, -25.272705078125, -24.42041015625, -23.568115234375, -22.7158203125, -21.863525390625, -21.01123046875, -20.158935546875, -19.306640625, -18.454345703125, -17.60205078125, -16.749755859375, -15.8974609375, -15.045166015625, -14.19287109375, -13.340576171875, -12.48828125, -11.635986328125, -10.78369140625, -9.931396484375, -9.0791015625, -8.226806640625, -7.37451171875, -6.522216796875, -5.669921875, -4.817626953125, -3.96533203125, -3.113037109375, -2.2607421875, -1.408447265625, -0.55615234375, 0.296142578125, 1.1484375, 2.000732421875, 2.85302734375, 3.705322265625, 4.5576171875, 5.409912109375, 6.26220703125, 7.114501953125, 7.966796875, 8.819091796875, 9.67138671875, 10.523681640625, 11.3759765625, 12.228271484375, 13.08056640625, 13.932861328125, 14.78515625, 15.637451171875, 16.48974609375, 17.342041015625, 18.1943359375, 19.046630859375, 19.89892578125, 20.751220703125, 21.603515625, 22.455810546875, 23.30810546875, 24.160400390625, 25.0126953125, 25.864990234375, 26.71728515625, 27.569580078125, 28.421875]}, "gradients/decoder.transformer.h.19.crossattention.c_attn.weight": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 3.0, 2.0, 2.0, 1.0, 4.0, 9.0, 9.0, 15.0, 20.0, 31.0, 34.0, 59.0, 76.0, 119.0, 206.0, 258.0, 379.0, 584.0, 951.0, 1541.0, 2471.0, 3943.0, 6681.0, 11895.0, 21792.0, 43180.0, 94139.0, 269078.0, 1309775.0, 179498.0, 72456.0, 35130.0, 18101.0, 9919.0, 5656.0, 3330.0, 2013.0, 1292.0, 880.0, 505.0, 350.0, 237.0, 185.0, 85.0, 85.0, 51.0, 38.0, 30.0, 11.0, 16.0, 6.0, 5.0, 4.0, 0.0, 4.0, 0.0, 2.0, 1.0, 1.0, 0.0, 1.0], "bins": [-4.375, -4.23541259765625, -4.0958251953125, -3.95623779296875, -3.816650390625, -3.67706298828125, -3.5374755859375, -3.39788818359375, -3.25830078125, -3.11871337890625, -2.9791259765625, -2.83953857421875, -2.699951171875, -2.56036376953125, -2.4207763671875, -2.28118896484375, -2.1416015625, -2.00201416015625, -1.8624267578125, -1.72283935546875, -1.583251953125, -1.44366455078125, -1.3040771484375, -1.16448974609375, -1.02490234375, -0.88531494140625, -0.7457275390625, -0.60614013671875, -0.466552734375, -0.32696533203125, -0.1873779296875, -0.04779052734375, 0.091796875, 0.23138427734375, 0.3709716796875, 0.51055908203125, 0.650146484375, 0.78973388671875, 0.9293212890625, 1.06890869140625, 1.20849609375, 1.34808349609375, 1.4876708984375, 1.62725830078125, 1.766845703125, 1.90643310546875, 2.0460205078125, 2.18560791015625, 2.3251953125, 2.46478271484375, 2.6043701171875, 2.74395751953125, 2.883544921875, 3.02313232421875, 3.1627197265625, 3.30230712890625, 3.44189453125, 3.58148193359375, 3.7210693359375, 3.86065673828125, 4.000244140625, 4.13983154296875, 4.2794189453125, 4.41900634765625, 4.55859375]}, "gradients/decoder.transformer.h.19.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 3.0, 0.0, 4.0, 2.0, 7.0, 7.0, 6.0, 9.0, 11.0, 13.0, 12.0, 16.0, 24.0, 30.0, 30.0, 42.0, 41.0, 63.0, 73.0, 71.0, 80.0, 72.0, 90.0, 56.0, 51.0, 43.0, 34.0, 30.0, 21.0, 10.0, 6.0, 8.0, 8.0, 8.0, 9.0, 3.0, 5.0, 5.0, 2.0, 5.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.83154296875, -0.8021163940429688, -0.7726898193359375, -0.7432632446289062, -0.713836669921875, -0.6844100952148438, -0.6549835205078125, -0.6255569458007812, -0.59613037109375, -0.5667037963867188, -0.5372772216796875, -0.5078506469726562, -0.478424072265625, -0.44899749755859375, -0.4195709228515625, -0.39014434814453125, -0.3607177734375, -0.33129119873046875, -0.3018646240234375, -0.27243804931640625, -0.243011474609375, -0.21358489990234375, -0.1841583251953125, -0.15473175048828125, -0.12530517578125, -0.09587860107421875, -0.0664520263671875, -0.03702545166015625, -0.007598876953125, 0.02182769775390625, 0.0512542724609375, 0.08068084716796875, 0.110107421875, 0.13953399658203125, 0.1689605712890625, 0.19838714599609375, 0.227813720703125, 0.25724029541015625, 0.2866668701171875, 0.31609344482421875, 0.34552001953125, 0.37494659423828125, 0.4043731689453125, 0.43379974365234375, 0.463226318359375, 0.49265289306640625, 0.5220794677734375, 0.5515060424804688, 0.5809326171875, 0.6103591918945312, 0.6397857666015625, 0.6692123413085938, 0.698638916015625, 0.7280654907226562, 0.7574920654296875, 0.7869186401367188, 0.81634521484375, 0.8457717895507812, 0.8751983642578125, 0.9046249389648438, 0.934051513671875, 0.9634780883789062, 0.9929046630859375, 1.0223312377929688, 1.0517578125]}, "gradients/decoder.transformer.h.19.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 4.0, 3.0, 2.0, 8.0, 2.0, 2.0, 11.0, 8.0, 6.0, 11.0, 8.0, 10.0, 21.0, 32.0, 39.0, 48.0, 72.0, 83.0, 140.0, 264.0, 1046913.0, 331.0, 162.0, 105.0, 56.0, 46.0, 33.0, 28.0, 27.0, 15.0, 11.0, 11.0, 12.0, 9.0, 8.0, 6.0, 6.0, 3.0, 3.0, 1.0, 2.0, 3.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-33.34375, -32.408447265625, -31.47314453125, -30.537841796875, -29.6025390625, -28.667236328125, -27.73193359375, -26.796630859375, -25.861328125, -24.926025390625, -23.99072265625, -23.055419921875, -22.1201171875, -21.184814453125, -20.24951171875, -19.314208984375, -18.37890625, -17.443603515625, -16.50830078125, -15.572998046875, -14.6376953125, -13.702392578125, -12.76708984375, -11.831787109375, -10.896484375, -9.961181640625, -9.02587890625, -8.090576171875, -7.1552734375, -6.219970703125, -5.28466796875, -4.349365234375, -3.4140625, -2.478759765625, -1.54345703125, -0.608154296875, 0.3271484375, 1.262451171875, 2.19775390625, 3.133056640625, 4.068359375, 5.003662109375, 5.93896484375, 6.874267578125, 7.8095703125, 8.744873046875, 9.68017578125, 10.615478515625, 11.55078125, 12.486083984375, 13.42138671875, 14.356689453125, 15.2919921875, 16.227294921875, 17.16259765625, 18.097900390625, 19.033203125, 19.968505859375, 20.90380859375, 21.839111328125, 22.7744140625, 23.709716796875, 24.64501953125, 25.580322265625, 26.515625]}, "gradients/decoder.transformer.h.19.ln_cross_attn.weight": {"_type": "histogram", "values": [1021.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.34220564365386963, 0.515373945236206, 1.3729535341262817, 2.2305331230163574, 3.0881128311157227, 3.945692539215088, 4.803271770477295, 5.66085147857666, 6.518431186676025, 7.376010894775391, 8.233590126037598, 9.091169357299805, 9.948749542236328, 10.806329727172852, 11.663908004760742, 12.521488189697266, 13.379067420959473, 14.23664665222168, 15.094226837158203, 15.95180606842041, 16.809385299682617, 17.66696548461914, 18.52454376220703, 19.382123947143555, 20.239704132080078, 21.0972843170166, 21.954862594604492, 22.812442779541016, 23.67002296447754, 24.527603149414062, 25.385181427001953, 26.242761611938477, 27.100341796875, 27.957921981811523, 28.815500259399414, 29.673080444335938, 30.53066062927246, 31.388240814208984, 32.245819091796875, 33.103397369384766, 33.96097946166992, 34.81855773925781, 35.67613983154297, 36.53371810913086, 37.39129638671875, 38.248878479003906, 39.1064567565918, 39.96403503417969, 40.821617126464844, 41.679195404052734, 42.53677749633789, 43.39435577392578, 44.25193405151367, 45.10951614379883, 45.96709442138672, 46.82467269897461, 47.6822509765625, 48.53982925415039, 49.39741134643555, 50.25498962402344, 51.11256790161133, 51.970149993896484, 52.827728271484375, 53.685306549072266, 54.54288864135742]}, "gradients/decoder.transformer.h.19.ln_cross_attn.bias": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 2.0, 1.0, 1.0, 6.0, 5.0, 8.0, 8.0, 11.0, 16.0, 14.0, 17.0, 25.0, 31.0, 18.0, 23.0, 31.0, 29.0, 35.0, 37.0, 40.0, 41.0, 32.0, 37.0, 44.0, 46.0, 34.0, 34.0, 36.0, 26.0, 36.0, 32.0, 33.0, 27.0, 27.0, 24.0, 26.0, 23.0, 15.0, 17.0, 9.0, 17.0, 9.0, 8.0, 3.0, 2.0, 6.0, 2.0, 2.0, 6.0, 0.0, 4.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.2174978256225586, -2.1358795166015625, -2.0542612075805664, -1.9726428985595703, -1.8910247087478638, -1.8094063997268677, -1.7277880907058716, -1.646169900894165, -1.564551591873169, -1.4829332828521729, -1.4013149738311768, -1.3196966648101807, -1.2380784749984741, -1.156460165977478, -1.074841856956482, -0.9932236075401306, -0.9116052389144897, -0.8299869298934937, -0.7483686804771423, -0.6667503714561462, -0.5851321220397949, -0.5035138130187988, -0.42189550399780273, -0.3402772545814514, -0.2586589455604553, -0.17704066634178162, -0.09542237222194672, -0.013804078102111816, 0.06781420111656189, 0.1494324803352356, 0.2310507893562317, 0.312669038772583, 0.3942873477935791, 0.4759056270122528, 0.5575239062309265, 0.6391422152519226, 0.7207604646682739, 0.80237877368927, 0.8839970827102661, 0.9656153321266174, 1.0472335815429688, 1.1288518905639648, 1.210470199584961, 1.292088508605957, 1.3737066984176636, 1.4553250074386597, 1.5369433164596558, 1.6185615062713623, 1.700179934501648, 1.781798243522644, 1.8634165525436401, 1.9450347423553467, 2.0266530513763428, 2.108271360397339, 2.189889669418335, 2.271507978439331, 2.353126287460327, 2.4347445964813232, 2.5163629055023193, 2.5979812145233154, 2.6795995235443115, 2.7612175941467285, 2.8428359031677246, 2.9244542121887207, 3.006072521209717]}, "gradients/decoder.transformer.h.19.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 3.0, 1.0, 2.0, 3.0, 3.0, 3.0, 5.0, 6.0, 9.0, 4.0, 11.0, 13.0, 18.0, 15.0, 24.0, 29.0, 33.0, 37.0, 41.0, 41.0, 59.0, 35.0, 64.0, 45.0, 51.0, 45.0, 54.0, 53.0, 40.0, 32.0, 44.0, 27.0, 23.0, 21.0, 19.0, 20.0, 17.0, 17.0, 6.0, 7.0, 6.0, 8.0, 4.0, 6.0, 3.0, 4.0, 2.0, 0.0, 5.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-43.125, -41.65576171875, -40.1865234375, -38.71728515625, -37.248046875, -35.77880859375, -34.3095703125, -32.84033203125, -31.37109375, -29.90185546875, -28.4326171875, -26.96337890625, -25.494140625, -24.02490234375, -22.5556640625, -21.08642578125, -19.6171875, -18.14794921875, -16.6787109375, -15.20947265625, -13.740234375, -12.27099609375, -10.8017578125, -9.33251953125, -7.86328125, -6.39404296875, -4.9248046875, -3.45556640625, -1.986328125, -0.51708984375, 0.9521484375, 2.42138671875, 3.890625, 5.35986328125, 6.8291015625, 8.29833984375, 9.767578125, 11.23681640625, 12.7060546875, 14.17529296875, 15.64453125, 17.11376953125, 18.5830078125, 20.05224609375, 21.521484375, 22.99072265625, 24.4599609375, 25.92919921875, 27.3984375, 28.86767578125, 30.3369140625, 31.80615234375, 33.275390625, 34.74462890625, 36.2138671875, 37.68310546875, 39.15234375, 40.62158203125, 42.0908203125, 43.56005859375, 45.029296875, 46.49853515625, 47.9677734375, 49.43701171875, 50.90625]}, "gradients/decoder.transformer.h.19.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 2.0, 3.0, 1.0, 3.0, 1.0, 4.0, 6.0, 11.0, 9.0, 7.0, 16.0, 22.0, 33.0, 44.0, 84.0, 146.0, 272.0, 581.0, 1234.0, 2907.0, 6903.0, 18870.0, 60798.0, 286971.0, 526434.0, 98987.0, 27430.0, 9909.0, 3704.0, 1663.0, 733.0, 328.0, 173.0, 82.0, 52.0, 33.0, 38.0, 12.0, 12.0, 7.0, 7.0, 11.0, 5.0, 6.0, 4.0, 4.0, 2.0, 0.0, 4.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-20.96875, -20.25, -19.53125, -18.8125, -18.09375, -17.375, -16.65625, -15.9375, -15.21875, -14.5, -13.78125, -13.0625, -12.34375, -11.625, -10.90625, -10.1875, -9.46875, -8.75, -8.03125, -7.3125, -6.59375, -5.875, -5.15625, -4.4375, -3.71875, -3.0, -2.28125, -1.5625, -0.84375, -0.125, 0.59375, 1.3125, 2.03125, 2.75, 3.46875, 4.1875, 4.90625, 5.625, 6.34375, 7.0625, 7.78125, 8.5, 9.21875, 9.9375, 10.65625, 11.375, 12.09375, 12.8125, 13.53125, 14.25, 14.96875, 15.6875, 16.40625, 17.125, 17.84375, 18.5625, 19.28125, 20.0, 20.71875, 21.4375, 22.15625, 22.875, 23.59375, 24.3125, 25.03125]}, "gradients/decoder.transformer.h.19.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 3.0, 1.0, 1.0, 3.0, 5.0, 2.0, 4.0, 5.0, 6.0, 7.0, 5.0, 8.0, 13.0, 12.0, 13.0, 21.0, 19.0, 23.0, 23.0, 25.0, 15.0, 31.0, 19.0, 40.0, 34.0, 44.0, 25.0, 41.0, 74.0, 2000.0, 68.0, 44.0, 43.0, 38.0, 34.0, 32.0, 24.0, 36.0, 25.0, 26.0, 29.0, 14.0, 15.0, 17.0, 20.0, 7.0, 16.0, 10.0, 7.0, 6.0, 5.0, 10.0, 4.0, 4.0, 4.0, 0.0, 3.0, 4.0, 2.0, 0.0, 1.0, 0.0, 1.0], "bins": [-121.0625, -117.0927734375, -113.123046875, -109.1533203125, -105.18359375, -101.2138671875, -97.244140625, -93.2744140625, -89.3046875, -85.3349609375, -81.365234375, -77.3955078125, -73.42578125, -69.4560546875, -65.486328125, -61.5166015625, -57.546875, -53.5771484375, -49.607421875, -45.6376953125, -41.66796875, -37.6982421875, -33.728515625, -29.7587890625, -25.7890625, -21.8193359375, -17.849609375, -13.8798828125, -9.91015625, -5.9404296875, -1.970703125, 1.9990234375, 5.96875, 9.9384765625, 13.908203125, 17.8779296875, 21.84765625, 25.8173828125, 29.787109375, 33.7568359375, 37.7265625, 41.6962890625, 45.666015625, 49.6357421875, 53.60546875, 57.5751953125, 61.544921875, 65.5146484375, 69.484375, 73.4541015625, 77.423828125, 81.3935546875, 85.36328125, 89.3330078125, 93.302734375, 97.2724609375, 101.2421875, 105.2119140625, 109.181640625, 113.1513671875, 117.12109375, 121.0908203125, 125.060546875, 129.0302734375, 133.0]}, "gradients/decoder.transformer.h.19.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 6.0, 3.0, 0.0, 4.0, 3.0, 5.0, 7.0, 8.0, 5.0, 7.0, 10.0, 15.0, 10.0, 19.0, 17.0, 19.0, 23.0, 35.0, 44.0, 53.0, 76.0, 86.0, 141.0, 194.0, 321.0, 514.0, 1320.0, 14334.0, 3116730.0, 9021.0, 1219.0, 435.0, 293.0, 198.0, 145.0, 75.0, 61.0, 41.0, 36.0, 40.0, 24.0, 19.0, 23.0, 14.0, 11.0, 15.0, 5.0, 5.0, 7.0, 5.0, 6.0, 4.0, 3.0, 5.0, 2.0, 0.0, 1.0, 3.0, 1.0], "bins": [-221.625, -214.998046875, -208.37109375, -201.744140625, -195.1171875, -188.490234375, -181.86328125, -175.236328125, -168.609375, -161.982421875, -155.35546875, -148.728515625, -142.1015625, -135.474609375, -128.84765625, -122.220703125, -115.59375, -108.966796875, -102.33984375, -95.712890625, -89.0859375, -82.458984375, -75.83203125, -69.205078125, -62.578125, -55.951171875, -49.32421875, -42.697265625, -36.0703125, -29.443359375, -22.81640625, -16.189453125, -9.5625, -2.935546875, 3.69140625, 10.318359375, 16.9453125, 23.572265625, 30.19921875, 36.826171875, 43.453125, 50.080078125, 56.70703125, 63.333984375, 69.9609375, 76.587890625, 83.21484375, 89.841796875, 96.46875, 103.095703125, 109.72265625, 116.349609375, 122.9765625, 129.603515625, 136.23046875, 142.857421875, 149.484375, 156.111328125, 162.73828125, 169.365234375, 175.9921875, 182.619140625, 189.24609375, 195.873046875, 202.5]}, "gradients/decoder.transformer.h.19.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 23.0, 919.0, 76.0, 0.0, 2.0], "bins": [-1884.5546875, -1853.460693359375, -1822.36669921875, -1791.272705078125, -1760.1785888671875, -1729.0845947265625, -1697.9906005859375, -1666.8966064453125, -1635.8026123046875, -1604.7086181640625, -1573.6146240234375, -1542.5205078125, -1511.426513671875, -1480.33251953125, -1449.238525390625, -1418.14453125, -1387.0504150390625, -1355.9564208984375, -1324.8624267578125, -1293.768310546875, -1262.67431640625, -1231.580322265625, -1200.486328125, -1169.392333984375, -1138.29833984375, -1107.204345703125, -1076.1103515625, -1045.016357421875, -1013.9223022460938, -982.8282470703125, -951.7342529296875, -920.6402587890625, -889.5462036132812, -858.4522094726562, -827.358154296875, -796.26416015625, -765.170166015625, -734.0761108398438, -702.9821166992188, -671.8880615234375, -640.7940673828125, -609.7000732421875, -578.6060180664062, -547.5120239257812, -516.4180297851562, -485.323974609375, -454.22998046875, -423.1359558105469, -392.04193115234375, -360.9479064941406, -329.8539123535156, -298.7598876953125, -267.6658630371094, -236.5718536376953, -205.47784423828125, -174.38381958007812, -143.28982543945312, -112.19580841064453, -81.10179138183594, -50.007781982421875, -18.91376495361328, 12.180252075195312, 43.274261474609375, 74.3682861328125, 105.46229553222656]}, "gradients/decoder.transformer.h.19.ln_1.bias": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 0.0, 2.0, 2.0, 2.0, 2.0, 3.0, 8.0, 14.0, 14.0, 7.0, 13.0, 14.0, 13.0, 17.0, 18.0, 12.0, 16.0, 22.0, 26.0, 28.0, 33.0, 32.0, 29.0, 42.0, 49.0, 24.0, 33.0, 39.0, 39.0, 39.0, 28.0, 33.0, 40.0, 36.0, 40.0, 37.0, 18.0, 32.0, 22.0, 12.0, 17.0, 20.0, 16.0, 23.0, 7.0, 12.0, 6.0, 3.0, 5.0, 3.0, 5.0, 4.0, 2.0, 0.0, 2.0, 3.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-398.93206787109375, -385.97369384765625, -373.0153503417969, -360.0569763183594, -347.0986328125, -334.1402587890625, -321.1819152832031, -308.2235412597656, -295.26519775390625, -282.30682373046875, -269.3484802246094, -256.3901062011719, -243.4317626953125, -230.473388671875, -217.51504516601562, -204.55667114257812, -191.5983123779297, -178.63995361328125, -165.6815948486328, -152.72323608398438, -139.76487731933594, -126.80651092529297, -113.84815216064453, -100.8897933959961, -87.93143463134766, -74.97307586669922, -62.01471710205078, -49.05635452270508, -36.09799575805664, -23.139633178710938, -10.1812744140625, 2.7770843505859375, 15.735443115234375, 28.693801879882812, 41.65216064453125, 54.61052322387695, 67.56887817382812, 80.5272445678711, 93.48560333251953, 106.44396209716797, 119.4023208618164, 132.36068725585938, 145.3190460205078, 158.27740478515625, 171.2357635498047, 184.19412231445312, 197.15248107910156, 210.11083984375, 223.06919860839844, 236.02755737304688, 248.9859161376953, 261.94427490234375, 274.90264892578125, 287.8609924316406, 300.8193664550781, 313.7777099609375, 326.736083984375, 339.6944580078125, 352.6528015136719, 365.6111755371094, 378.56951904296875, 391.52789306640625, 404.4862365722656, 417.4446105957031, 430.4029541015625]}, "gradients/decoder.transformer.h.18.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 3.0, 2.0, 0.0, 3.0, 3.0, 6.0, 5.0, 7.0, 5.0, 5.0, 17.0, 13.0, 15.0, 23.0, 24.0, 30.0, 35.0, 38.0, 37.0, 49.0, 47.0, 50.0, 59.0, 44.0, 47.0, 45.0, 59.0, 51.0, 37.0, 36.0, 37.0, 22.0, 20.0, 24.0, 19.0, 17.0, 21.0, 9.0, 9.0, 7.0, 6.0, 9.0, 5.0, 3.0, 3.0, 4.0, 1.0, 2.0, 4.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-42.65625, -41.18798828125, -39.7197265625, -38.25146484375, -36.783203125, -35.31494140625, -33.8466796875, -32.37841796875, -30.91015625, -29.44189453125, -27.9736328125, -26.50537109375, -25.037109375, -23.56884765625, -22.1005859375, -20.63232421875, -19.1640625, -17.69580078125, -16.2275390625, -14.75927734375, -13.291015625, -11.82275390625, -10.3544921875, -8.88623046875, -7.41796875, -5.94970703125, -4.4814453125, -3.01318359375, -1.544921875, -0.07666015625, 1.3916015625, 2.85986328125, 4.328125, 5.79638671875, 7.2646484375, 8.73291015625, 10.201171875, 11.66943359375, 13.1376953125, 14.60595703125, 16.07421875, 17.54248046875, 19.0107421875, 20.47900390625, 21.947265625, 23.41552734375, 24.8837890625, 26.35205078125, 27.8203125, 29.28857421875, 30.7568359375, 32.22509765625, 33.693359375, 35.16162109375, 36.6298828125, 38.09814453125, 39.56640625, 41.03466796875, 42.5029296875, 43.97119140625, 45.439453125, 46.90771484375, 48.3759765625, 49.84423828125, 51.3125]}, "gradients/decoder.transformer.h.18.mlp.c_proj.weight": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 5.0, 3.0, 6.0, 5.0, 11.0, 7.0, 14.0, 22.0, 21.0, 36.0, 43.0, 62.0, 104.0, 125.0, 191.0, 273.0, 376.0, 538.0, 826.0, 1199.0, 1829.0, 2925.0, 4477.0, 7594.0, 13617.0, 37410.0, 389934.0, 3309270.0, 356255.0, 32184.0, 13382.0, 7507.0, 4684.0, 2942.0, 1950.0, 1353.0, 880.0, 620.0, 462.0, 322.0, 212.0, 200.0, 98.0, 92.0, 61.0, 46.0, 25.0, 15.0, 26.0, 15.0, 12.0, 7.0, 9.0, 7.0, 3.0, 2.0, 1.0, 2.0, 1.0, 0.0, 2.0], "bins": [-97.9375, -94.7255859375, -91.513671875, -88.3017578125, -85.08984375, -81.8779296875, -78.666015625, -75.4541015625, -72.2421875, -69.0302734375, -65.818359375, -62.6064453125, -59.39453125, -56.1826171875, -52.970703125, -49.7587890625, -46.546875, -43.3349609375, -40.123046875, -36.9111328125, -33.69921875, -30.4873046875, -27.275390625, -24.0634765625, -20.8515625, -17.6396484375, -14.427734375, -11.2158203125, -8.00390625, -4.7919921875, -1.580078125, 1.6318359375, 4.84375, 8.0556640625, 11.267578125, 14.4794921875, 17.69140625, 20.9033203125, 24.115234375, 27.3271484375, 30.5390625, 33.7509765625, 36.962890625, 40.1748046875, 43.38671875, 46.5986328125, 49.810546875, 53.0224609375, 56.234375, 59.4462890625, 62.658203125, 65.8701171875, 69.08203125, 72.2939453125, 75.505859375, 78.7177734375, 81.9296875, 85.1416015625, 88.353515625, 91.5654296875, 94.77734375, 97.9892578125, 101.201171875, 104.4130859375, 107.625]}, "gradients/decoder.transformer.h.18.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 4.0, 1.0, 2.0, 5.0, 4.0, 4.0, 5.0, 7.0, 9.0, 10.0, 14.0, 22.0, 40.0, 43.0, 49.0, 77.0, 152.0, 260.0, 493.0, 917.0, 831.0, 459.0, 250.0, 147.0, 73.0, 47.0, 47.0, 30.0, 20.0, 14.0, 15.0, 9.0, 8.0, 3.0, 4.0, 1.0, 2.0, 3.0, 3.0, 3.0, 0.0, 1.0, 0.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-149.875, -145.45703125, -141.0390625, -136.62109375, -132.203125, -127.78515625, -123.3671875, -118.94921875, -114.53125, -110.11328125, -105.6953125, -101.27734375, -96.859375, -92.44140625, -88.0234375, -83.60546875, -79.1875, -74.76953125, -70.3515625, -65.93359375, -61.515625, -57.09765625, -52.6796875, -48.26171875, -43.84375, -39.42578125, -35.0078125, -30.58984375, -26.171875, -21.75390625, -17.3359375, -12.91796875, -8.5, -4.08203125, 0.3359375, 4.75390625, 9.171875, 13.58984375, 18.0078125, 22.42578125, 26.84375, 31.26171875, 35.6796875, 40.09765625, 44.515625, 48.93359375, 53.3515625, 57.76953125, 62.1875, 66.60546875, 71.0234375, 75.44140625, 79.859375, 84.27734375, 88.6953125, 93.11328125, 97.53125, 101.94921875, 106.3671875, 110.78515625, 115.203125, 119.62109375, 124.0390625, 128.45703125, 132.875]}, "gradients/decoder.transformer.h.18.mlp.c_fc.weight": {"_type": "histogram", "values": [2.0, 3.0, 1.0, 1.0, 4.0, 3.0, 5.0, 7.0, 5.0, 4.0, 9.0, 6.0, 12.0, 16.0, 24.0, 27.0, 41.0, 56.0, 103.0, 130.0, 236.0, 424.0, 647.0, 1277.0, 2489.0, 5448.0, 13222.0, 40056.0, 361877.0, 3654445.0, 77703.0, 20607.0, 8040.0, 3448.0, 1657.0, 912.0, 488.0, 298.0, 200.0, 114.0, 55.0, 54.0, 44.0, 26.0, 12.0, 15.0, 6.0, 8.0, 12.0, 5.0, 8.0, 3.0, 0.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0], "bins": [-253.75, -245.06640625, -236.3828125, -227.69921875, -219.015625, -210.33203125, -201.6484375, -192.96484375, -184.28125, -175.59765625, -166.9140625, -158.23046875, -149.546875, -140.86328125, -132.1796875, -123.49609375, -114.8125, -106.12890625, -97.4453125, -88.76171875, -80.078125, -71.39453125, -62.7109375, -54.02734375, -45.34375, -36.66015625, -27.9765625, -19.29296875, -10.609375, -1.92578125, 6.7578125, 15.44140625, 24.125, 32.80859375, 41.4921875, 50.17578125, 58.859375, 67.54296875, 76.2265625, 84.91015625, 93.59375, 102.27734375, 110.9609375, 119.64453125, 128.328125, 137.01171875, 145.6953125, 154.37890625, 163.0625, 171.74609375, 180.4296875, 189.11328125, 197.796875, 206.48046875, 215.1640625, 223.84765625, 232.53125, 241.21484375, 249.8984375, 258.58203125, 267.265625, 275.94921875, 284.6328125, 293.31640625, 302.0]}, "gradients/decoder.transformer.h.18.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 2.0, 5.0, 10.0, 8.0, 8.0, 20.0, 21.0, 28.0, 62.0, 96.0, 205.0, 224.0, 115.0, 69.0, 43.0, 22.0, 20.0, 15.0, 13.0, 8.0, 9.0, 2.0, 7.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-502.72283935546875, -481.8905334472656, -461.0582275390625, -440.2259216308594, -419.39361572265625, -398.56134033203125, -377.7290344238281, -356.896728515625, -336.0644226074219, -315.23211669921875, -294.3998107910156, -273.5675048828125, -252.73521423339844, -231.9029083251953, -211.07061767578125, -190.23831176757812, -169.406005859375, -148.57369995117188, -127.74140167236328, -106.90910339355469, -86.07679748535156, -65.24449157714844, -44.412193298339844, -23.57989501953125, -2.747589111328125, 18.084712982177734, 38.917015075683594, 59.74931716918945, 80.58161926269531, 101.41392517089844, 122.24622344970703, 143.07852172851562, 163.91082763671875, 184.74313354492188, 205.575439453125, 226.40773010253906, 247.2400360107422, 268.07232666015625, 288.9046325683594, 309.7369384765625, 330.5692443847656, 351.40155029296875, 372.2338562011719, 393.066162109375, 413.8984375, 434.73077392578125, 455.56304931640625, 476.3953552246094, 497.2276611328125, 518.0599365234375, 538.8922729492188, 559.7245483398438, 580.556884765625, 601.38916015625, 622.2214965820312, 643.0537719726562, 663.8861083984375, 684.7183837890625, 705.5507202148438, 726.3829956054688, 747.21533203125, 768.047607421875, 788.8799438476562, 809.7122192382812, 830.5444946289062]}, "gradients/decoder.transformer.h.18.ln_2.bias": {"_type": "histogram", "values": [2.0, 2.0, 0.0, 0.0, 1.0, 2.0, 2.0, 2.0, 4.0, 3.0, 3.0, 6.0, 7.0, 4.0, 5.0, 9.0, 6.0, 13.0, 17.0, 22.0, 24.0, 22.0, 28.0, 26.0, 19.0, 29.0, 24.0, 34.0, 32.0, 36.0, 31.0, 48.0, 42.0, 35.0, 32.0, 39.0, 31.0, 35.0, 41.0, 35.0, 33.0, 16.0, 19.0, 24.0, 26.0, 23.0, 16.0, 16.0, 19.0, 17.0, 10.0, 10.0, 7.0, 9.0, 3.0, 3.0, 6.0, 3.0, 3.0, 3.0, 3.0, 0.0, 1.0, 1.0], "bins": [-324.1053466796875, -314.41790771484375, -304.7304382324219, -295.0429992675781, -285.35552978515625, -275.6680908203125, -265.9806213378906, -256.2931823730469, -246.60572814941406, -236.91827392578125, -227.23081970214844, -217.54336547851562, -207.85592651367188, -198.16845703125, -188.48101806640625, -178.79356384277344, -169.10610961914062, -159.4186553955078, -149.731201171875, -140.0437469482422, -130.35629272460938, -120.6688461303711, -110.98139953613281, -101.2939453125, -91.60649108886719, -81.91903686523438, -72.23158264160156, -62.54413604736328, -52.85668182373047, -43.169227600097656, -33.48177719116211, -23.794326782226562, -14.106903076171875, -4.419450759887695, 5.268001556396484, 14.955453872680664, 24.642906188964844, 34.330360412597656, 44.0178108215332, 53.70526123046875, 63.39271545410156, 73.08016967773438, 82.76762390136719, 92.45507049560547, 102.14252471923828, 111.8299789428711, 121.51742553710938, 131.2048797607422, 140.892333984375, 150.5797882080078, 160.26724243164062, 169.95469665527344, 179.64215087890625, 189.32958984375, 199.0170440673828, 208.70449829101562, 218.39195251464844, 228.07940673828125, 237.76686096191406, 247.45431518554688, 257.1417541503906, 266.8292236328125, 276.51666259765625, 286.2041015625, 295.8915710449219]}, "gradients/decoder.transformer.h.18.crossattention.c_proj.bias": {"_type": "histogram", "values": [3.0, 1.0, 0.0, 0.0, 0.0, 5.0, 2.0, 1.0, 6.0, 3.0, 7.0, 3.0, 6.0, 11.0, 9.0, 21.0, 14.0, 17.0, 23.0, 25.0, 34.0, 26.0, 36.0, 34.0, 41.0, 56.0, 45.0, 48.0, 48.0, 45.0, 55.0, 46.0, 41.0, 38.0, 38.0, 33.0, 30.0, 23.0, 23.0, 16.0, 19.0, 9.0, 17.0, 12.0, 5.0, 6.0, 10.0, 7.0, 5.0, 6.0, 3.0, 6.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-41.15625, -39.7314453125, -38.306640625, -36.8818359375, -35.45703125, -34.0322265625, -32.607421875, -31.1826171875, -29.7578125, -28.3330078125, -26.908203125, -25.4833984375, -24.05859375, -22.6337890625, -21.208984375, -19.7841796875, -18.359375, -16.9345703125, -15.509765625, -14.0849609375, -12.66015625, -11.2353515625, -9.810546875, -8.3857421875, -6.9609375, -5.5361328125, -4.111328125, -2.6865234375, -1.26171875, 0.1630859375, 1.587890625, 3.0126953125, 4.4375, 5.8623046875, 7.287109375, 8.7119140625, 10.13671875, 11.5615234375, 12.986328125, 14.4111328125, 15.8359375, 17.2607421875, 18.685546875, 20.1103515625, 21.53515625, 22.9599609375, 24.384765625, 25.8095703125, 27.234375, 28.6591796875, 30.083984375, 31.5087890625, 32.93359375, 34.3583984375, 35.783203125, 37.2080078125, 38.6328125, 40.0576171875, 41.482421875, 42.9072265625, 44.33203125, 45.7568359375, 47.181640625, 48.6064453125, 50.03125]}, "gradients/decoder.transformer.h.18.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 1.0, 1.0, 8.0, 5.0, 8.0, 11.0, 15.0, 30.0, 33.0, 41.0, 72.0, 108.0, 159.0, 234.0, 332.0, 468.0, 778.0, 1184.0, 1883.0, 3253.0, 6366.0, 15482.0, 53985.0, 330350.0, 522274.0, 74928.0, 19543.0, 7458.0, 3613.0, 2162.0, 1267.0, 842.0, 527.0, 379.0, 240.0, 170.0, 111.0, 69.0, 59.0, 35.0, 27.0, 16.0, 16.0, 5.0, 4.0, 7.0, 4.0, 1.0, 2.0, 2.0, 0.0, 2.0, 0.0, 1.0], "bins": [-16.625, -16.1375732421875, -15.650146484375, -15.1627197265625, -14.67529296875, -14.1878662109375, -13.700439453125, -13.2130126953125, -12.7255859375, -12.2381591796875, -11.750732421875, -11.2633056640625, -10.77587890625, -10.2884521484375, -9.801025390625, -9.3135986328125, -8.826171875, -8.3387451171875, -7.851318359375, -7.3638916015625, -6.87646484375, -6.3890380859375, -5.901611328125, -5.4141845703125, -4.9267578125, -4.4393310546875, -3.951904296875, -3.4644775390625, -2.97705078125, -2.4896240234375, -2.002197265625, -1.5147705078125, -1.02734375, -0.5399169921875, -0.052490234375, 0.4349365234375, 0.92236328125, 1.4097900390625, 1.897216796875, 2.3846435546875, 2.8720703125, 3.3594970703125, 3.846923828125, 4.3343505859375, 4.82177734375, 5.3092041015625, 5.796630859375, 6.2840576171875, 6.771484375, 7.2589111328125, 7.746337890625, 8.2337646484375, 8.72119140625, 9.2086181640625, 9.696044921875, 10.1834716796875, 10.6708984375, 11.1583251953125, 11.645751953125, 12.1331787109375, 12.62060546875, 13.1080322265625, 13.595458984375, 14.0828857421875, 14.5703125]}, "gradients/decoder.transformer.h.18.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 2.0, 2.0, 1.0, 4.0, 4.0, 9.0, 8.0, 10.0, 12.0, 11.0, 18.0, 21.0, 20.0, 16.0, 27.0, 38.0, 32.0, 40.0, 36.0, 29.0, 40.0, 34.0, 43.0, 33.0, 1064.0, 39.0, 41.0, 34.0, 35.0, 50.0, 32.0, 47.0, 32.0, 29.0, 27.0, 22.0, 20.0, 15.0, 15.0, 9.0, 9.0, 8.0, 6.0, 8.0, 4.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-28.421875, -27.5126953125, -26.603515625, -25.6943359375, -24.78515625, -23.8759765625, -22.966796875, -22.0576171875, -21.1484375, -20.2392578125, -19.330078125, -18.4208984375, -17.51171875, -16.6025390625, -15.693359375, -14.7841796875, -13.875, -12.9658203125, -12.056640625, -11.1474609375, -10.23828125, -9.3291015625, -8.419921875, -7.5107421875, -6.6015625, -5.6923828125, -4.783203125, -3.8740234375, -2.96484375, -2.0556640625, -1.146484375, -0.2373046875, 0.671875, 1.5810546875, 2.490234375, 3.3994140625, 4.30859375, 5.2177734375, 6.126953125, 7.0361328125, 7.9453125, 8.8544921875, 9.763671875, 10.6728515625, 11.58203125, 12.4912109375, 13.400390625, 14.3095703125, 15.21875, 16.1279296875, 17.037109375, 17.9462890625, 18.85546875, 19.7646484375, 20.673828125, 21.5830078125, 22.4921875, 23.4013671875, 24.310546875, 25.2197265625, 26.12890625, 27.0380859375, 27.947265625, 28.8564453125, 29.765625]}, "gradients/decoder.transformer.h.18.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 2.0, 5.0, 3.0, 4.0, 8.0, 4.0, 15.0, 17.0, 21.0, 51.0, 56.0, 96.0, 123.0, 184.0, 315.0, 435.0, 708.0, 1086.0, 1823.0, 3189.0, 5712.0, 11119.0, 24128.0, 58953.0, 163388.0, 1417802.0, 260006.0, 83226.0, 32915.0, 14837.0, 7086.0, 3867.0, 2248.0, 1326.0, 819.0, 534.0, 340.0, 205.0, 158.0, 110.0, 73.0, 37.0, 31.0, 26.0, 17.0, 12.0, 11.0, 4.0, 4.0, 4.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-5.6171875, -5.4453125, -5.2734375, -5.1015625, -4.9296875, -4.7578125, -4.5859375, -4.4140625, -4.2421875, -4.0703125, -3.8984375, -3.7265625, -3.5546875, -3.3828125, -3.2109375, -3.0390625, -2.8671875, -2.6953125, -2.5234375, -2.3515625, -2.1796875, -2.0078125, -1.8359375, -1.6640625, -1.4921875, -1.3203125, -1.1484375, -0.9765625, -0.8046875, -0.6328125, -0.4609375, -0.2890625, -0.1171875, 0.0546875, 0.2265625, 0.3984375, 0.5703125, 0.7421875, 0.9140625, 1.0859375, 1.2578125, 1.4296875, 1.6015625, 1.7734375, 1.9453125, 2.1171875, 2.2890625, 2.4609375, 2.6328125, 2.8046875, 2.9765625, 3.1484375, 3.3203125, 3.4921875, 3.6640625, 3.8359375, 4.0078125, 4.1796875, 4.3515625, 4.5234375, 4.6953125, 4.8671875, 5.0390625, 5.2109375, 5.3828125]}, "gradients/decoder.transformer.h.18.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 3.0, 2.0, 1.0, 1.0, 4.0, 1.0, 7.0, 3.0, 8.0, 6.0, 6.0, 9.0, 13.0, 7.0, 13.0, 13.0, 18.0, 26.0, 25.0, 21.0, 34.0, 51.0, 68.0, 120.0, 153.0, 103.0, 41.0, 39.0, 36.0, 24.0, 19.0, 24.0, 11.0, 22.0, 14.0, 7.0, 9.0, 5.0, 7.0, 9.0, 7.0, 6.0, 3.0, 4.0, 4.0, 2.0, 1.0, 2.0, 1.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.59423828125, -0.5758209228515625, -0.557403564453125, -0.5389862060546875, -0.52056884765625, -0.5021514892578125, -0.483734130859375, -0.4653167724609375, -0.4468994140625, -0.4284820556640625, -0.410064697265625, -0.3916473388671875, -0.37322998046875, -0.3548126220703125, -0.336395263671875, -0.3179779052734375, -0.299560546875, -0.2811431884765625, -0.262725830078125, -0.2443084716796875, -0.22589111328125, -0.2074737548828125, -0.189056396484375, -0.1706390380859375, -0.1522216796875, -0.1338043212890625, -0.115386962890625, -0.0969696044921875, -0.07855224609375, -0.0601348876953125, -0.041717529296875, -0.0233001708984375, -0.0048828125, 0.0135345458984375, 0.031951904296875, 0.0503692626953125, 0.06878662109375, 0.0872039794921875, 0.105621337890625, 0.1240386962890625, 0.1424560546875, 0.1608734130859375, 0.179290771484375, 0.1977081298828125, 0.21612548828125, 0.2345428466796875, 0.252960205078125, 0.2713775634765625, 0.289794921875, 0.3082122802734375, 0.326629638671875, 0.3450469970703125, 0.36346435546875, 0.3818817138671875, 0.400299072265625, 0.4187164306640625, 0.4371337890625, 0.4555511474609375, 0.473968505859375, 0.4923858642578125, 0.51080322265625, 0.5292205810546875, 0.547637939453125, 0.5660552978515625, 0.58447265625]}, "gradients/decoder.transformer.h.18.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 2.0, 3.0, 1.0, 1.0, 5.0, 5.0, 3.0, 5.0, 7.0, 10.0, 7.0, 4.0, 8.0, 8.0, 18.0, 13.0, 16.0, 23.0, 22.0, 34.0, 52.0, 63.0, 99.0, 199.0, 1043114.0, 4290.0, 183.0, 84.0, 66.0, 37.0, 32.0, 32.0, 18.0, 13.0, 12.0, 8.0, 15.0, 9.0, 8.0, 5.0, 7.0, 2.0, 10.0, 2.0, 1.0, 2.0, 3.0, 1.0, 4.0, 1.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-18.65625, -18.071533203125, -17.48681640625, -16.902099609375, -16.3173828125, -15.732666015625, -15.14794921875, -14.563232421875, -13.978515625, -13.393798828125, -12.80908203125, -12.224365234375, -11.6396484375, -11.054931640625, -10.47021484375, -9.885498046875, -9.30078125, -8.716064453125, -8.13134765625, -7.546630859375, -6.9619140625, -6.377197265625, -5.79248046875, -5.207763671875, -4.623046875, -4.038330078125, -3.45361328125, -2.868896484375, -2.2841796875, -1.699462890625, -1.11474609375, -0.530029296875, 0.0546875, 0.639404296875, 1.22412109375, 1.808837890625, 2.3935546875, 2.978271484375, 3.56298828125, 4.147705078125, 4.732421875, 5.317138671875, 5.90185546875, 6.486572265625, 7.0712890625, 7.656005859375, 8.24072265625, 8.825439453125, 9.41015625, 9.994873046875, 10.57958984375, 11.164306640625, 11.7490234375, 12.333740234375, 12.91845703125, 13.503173828125, 14.087890625, 14.672607421875, 15.25732421875, 15.842041015625, 16.4267578125, 17.011474609375, 17.59619140625, 18.180908203125, 18.765625]}, "gradients/decoder.transformer.h.18.ln_cross_attn.weight": {"_type": "histogram", "values": [1019.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.28519344329833984, 0.21804159879684448, 0.7212766408920288, 1.2245116233825684, 1.7277467250823975, 2.2309818267822266, 2.7342166900634766, 3.2374517917633057, 3.7406868934631348, 4.243921756744385, 4.747157096862793, 5.250391960144043, 5.753626823425293, 6.256862163543701, 6.760097026824951, 7.263332366943359, 7.766567230224609, 8.26980209350586, 8.77303695678711, 9.27627182006836, 9.779507637023926, 10.282742500305176, 10.785977363586426, 11.289212226867676, 11.792448043823242, 12.295682907104492, 12.798917770385742, 13.302152633666992, 13.805388450622559, 14.308623313903809, 14.811858177185059, 15.315093040466309, 15.818326950073242, 16.321561813354492, 16.824796676635742, 17.328031539916992, 17.831266403198242, 18.334503173828125, 18.837738037109375, 19.340972900390625, 19.844207763671875, 20.347442626953125, 20.850677490234375, 21.353912353515625, 21.857147216796875, 22.360382080078125, 22.863616943359375, 23.366853713989258, 23.870086669921875, 24.373321533203125, 24.876556396484375, 25.379791259765625, 25.883026123046875, 26.386260986328125, 26.889495849609375, 27.392732620239258, 27.895967483520508, 28.399202346801758, 28.902437210083008, 29.405672073364258, 29.908906936645508, 30.41214370727539, 30.91537857055664, 31.41861343383789, 31.92184829711914]}, "gradients/decoder.transformer.h.18.ln_cross_attn.bias": {"_type": "histogram", "values": [2.0, 2.0, 5.0, 5.0, 3.0, 4.0, 9.0, 5.0, 9.0, 8.0, 18.0, 20.0, 14.0, 16.0, 13.0, 28.0, 31.0, 24.0, 19.0, 41.0, 30.0, 36.0, 36.0, 27.0, 25.0, 33.0, 37.0, 43.0, 41.0, 35.0, 31.0, 30.0, 34.0, 30.0, 34.0, 27.0, 23.0, 24.0, 16.0, 26.0, 30.0, 11.0, 13.0, 14.0, 3.0, 11.0, 7.0, 8.0, 7.0, 5.0, 4.0, 2.0, 0.0, 3.0, 5.0, 0.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-1.2132807970046997, -1.1686363220214844, -1.1239919662475586, -1.0793474912643433, -1.0347031354904175, -0.9900586605072021, -0.9454142451286316, -0.900769829750061, -0.8561253547668457, -0.8114809393882751, -0.7668365240097046, -0.7221920490264893, -0.6775476336479187, -0.6329032182693481, -0.5882588028907776, -0.543614387512207, -0.4989699721336365, -0.4543255567550659, -0.409681111574173, -0.3650366961956024, -0.3203922510147095, -0.2757478356361389, -0.23110342025756836, -0.18645897507667542, -0.14181455969810486, -0.09717012941837311, -0.052525706589221954, -0.0078812837600708, 0.03676314651966095, 0.0814075767993927, 0.12605199217796326, 0.1706964373588562, 0.21534085273742676, 0.2599852681159973, 0.30462971329689026, 0.3492741286754608, 0.39391857385635376, 0.4385629892349243, 0.4832074046134949, 0.5278518199920654, 0.5724962949752808, 0.6171407103538513, 0.6617851257324219, 0.7064296007156372, 0.7510740160942078, 0.7957184314727783, 0.8403628468513489, 0.8850072622299194, 0.92965167760849, 0.9742960929870605, 1.0189405679702759, 1.0635849237442017, 1.108229398727417, 1.1528737545013428, 1.197518229484558, 1.2421627044677734, 1.2868070602416992, 1.3314515352249146, 1.3760958909988403, 1.4207403659820557, 1.4653847217559814, 1.5100291967391968, 1.554673671722412, 1.599318027496338, 1.6439625024795532]}, "gradients/decoder.transformer.h.18.attn.c_proj.bias": {"_type": "histogram", "values": [3.0, 1.0, 0.0, 0.0, 0.0, 5.0, 2.0, 1.0, 6.0, 3.0, 7.0, 3.0, 6.0, 11.0, 9.0, 21.0, 14.0, 17.0, 23.0, 25.0, 34.0, 26.0, 36.0, 34.0, 41.0, 55.0, 46.0, 48.0, 48.0, 45.0, 55.0, 46.0, 41.0, 38.0, 38.0, 33.0, 30.0, 23.0, 23.0, 16.0, 19.0, 9.0, 17.0, 12.0, 5.0, 6.0, 10.0, 7.0, 5.0, 6.0, 3.0, 6.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-41.15625, -39.7314453125, -38.306640625, -36.8818359375, -35.45703125, -34.0322265625, -32.607421875, -31.1826171875, -29.7578125, -28.3330078125, -26.908203125, -25.4833984375, -24.05859375, -22.6337890625, -21.208984375, -19.7841796875, -18.359375, -16.9345703125, -15.509765625, -14.0849609375, -12.66015625, -11.2353515625, -9.810546875, -8.3857421875, -6.9609375, -5.5361328125, -4.111328125, -2.6865234375, -1.26171875, 0.1630859375, 1.587890625, 3.0126953125, 4.4375, 5.8623046875, 7.287109375, 8.7119140625, 10.13671875, 11.5615234375, 12.986328125, 14.4111328125, 15.8359375, 17.2607421875, 18.685546875, 20.1103515625, 21.53515625, 22.9599609375, 24.384765625, 25.8095703125, 27.234375, 28.6591796875, 30.083984375, 31.5087890625, 32.93359375, 34.3583984375, 35.783203125, 37.2080078125, 38.6328125, 40.0576171875, 41.482421875, 42.9072265625, 44.33203125, 45.7568359375, 47.181640625, 48.6064453125, 50.03125]}, "gradients/decoder.transformer.h.18.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 2.0, 0.0, 2.0, 2.0, 1.0, 4.0, 2.0, 8.0, 10.0, 9.0, 22.0, 31.0, 23.0, 41.0, 50.0, 79.0, 122.0, 141.0, 196.0, 250.0, 358.0, 549.0, 704.0, 1061.0, 1611.0, 2588.0, 5088.0, 20042.0, 549453.0, 435437.0, 17986.0, 4935.0, 2542.0, 1633.0, 993.0, 748.0, 518.0, 354.0, 242.0, 190.0, 146.0, 111.0, 80.0, 55.0, 39.0, 25.0, 24.0, 20.0, 17.0, 8.0, 7.0, 3.0, 3.0, 3.0, 0.0, 4.0, 0.0, 0.0, 1.0], "bins": [-80.5625, -78.19140625, -75.8203125, -73.44921875, -71.078125, -68.70703125, -66.3359375, -63.96484375, -61.59375, -59.22265625, -56.8515625, -54.48046875, -52.109375, -49.73828125, -47.3671875, -44.99609375, -42.625, -40.25390625, -37.8828125, -35.51171875, -33.140625, -30.76953125, -28.3984375, -26.02734375, -23.65625, -21.28515625, -18.9140625, -16.54296875, -14.171875, -11.80078125, -9.4296875, -7.05859375, -4.6875, -2.31640625, 0.0546875, 2.42578125, 4.796875, 7.16796875, 9.5390625, 11.91015625, 14.28125, 16.65234375, 19.0234375, 21.39453125, 23.765625, 26.13671875, 28.5078125, 30.87890625, 33.25, 35.62109375, 37.9921875, 40.36328125, 42.734375, 45.10546875, 47.4765625, 49.84765625, 52.21875, 54.58984375, 56.9609375, 59.33203125, 61.703125, 64.07421875, 66.4453125, 68.81640625, 71.1875]}, "gradients/decoder.transformer.h.18.attn.c_attn.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 2.0, 3.0, 3.0, 3.0, 5.0, 4.0, 2.0, 1.0, 16.0, 10.0, 11.0, 20.0, 24.0, 15.0, 25.0, 19.0, 17.0, 31.0, 26.0, 24.0, 43.0, 33.0, 33.0, 30.0, 39.0, 58.0, 193.0, 1884.0, 70.0, 29.0, 31.0, 37.0, 48.0, 43.0, 30.0, 29.0, 22.0, 22.0, 18.0, 13.0, 18.0, 18.0, 16.0, 10.0, 2.0, 7.0, 10.0, 3.0, 4.0, 3.0, 3.0, 4.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-127.9375, -123.7060546875, -119.474609375, -115.2431640625, -111.01171875, -106.7802734375, -102.548828125, -98.3173828125, -94.0859375, -89.8544921875, -85.623046875, -81.3916015625, -77.16015625, -72.9287109375, -68.697265625, -64.4658203125, -60.234375, -56.0029296875, -51.771484375, -47.5400390625, -43.30859375, -39.0771484375, -34.845703125, -30.6142578125, -26.3828125, -22.1513671875, -17.919921875, -13.6884765625, -9.45703125, -5.2255859375, -0.994140625, 3.2373046875, 7.46875, 11.7001953125, 15.931640625, 20.1630859375, 24.39453125, 28.6259765625, 32.857421875, 37.0888671875, 41.3203125, 45.5517578125, 49.783203125, 54.0146484375, 58.24609375, 62.4775390625, 66.708984375, 70.9404296875, 75.171875, 79.4033203125, 83.634765625, 87.8662109375, 92.09765625, 96.3291015625, 100.560546875, 104.7919921875, 109.0234375, 113.2548828125, 117.486328125, 121.7177734375, 125.94921875, 130.1806640625, 134.412109375, 138.6435546875, 142.875]}, "gradients/decoder.transformer.h.18.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 5.0, 4.0, 5.0, 4.0, 10.0, 7.0, 5.0, 17.0, 22.0, 27.0, 30.0, 51.0, 53.0, 98.0, 185.0, 330.0, 730.0, 1636.0, 4461.0, 16418.0, 3093897.0, 19484.0, 4690.0, 1700.0, 804.0, 376.0, 221.0, 131.0, 71.0, 52.0, 42.0, 33.0, 35.0, 21.0, 10.0, 18.0, 5.0, 2.0, 5.0, 6.0, 5.0, 4.0, 2.0, 1.0, 3.0, 2.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-249.375, -240.318359375, -231.26171875, -222.205078125, -213.1484375, -204.091796875, -195.03515625, -185.978515625, -176.921875, -167.865234375, -158.80859375, -149.751953125, -140.6953125, -131.638671875, -122.58203125, -113.525390625, -104.46875, -95.412109375, -86.35546875, -77.298828125, -68.2421875, -59.185546875, -50.12890625, -41.072265625, -32.015625, -22.958984375, -13.90234375, -4.845703125, 4.2109375, 13.267578125, 22.32421875, 31.380859375, 40.4375, 49.494140625, 58.55078125, 67.607421875, 76.6640625, 85.720703125, 94.77734375, 103.833984375, 112.890625, 121.947265625, 131.00390625, 140.060546875, 149.1171875, 158.173828125, 167.23046875, 176.287109375, 185.34375, 194.400390625, 203.45703125, 212.513671875, 221.5703125, 230.626953125, 239.68359375, 248.740234375, 257.796875, 266.853515625, 275.91015625, 284.966796875, 294.0234375, 303.080078125, 312.13671875, 321.193359375, 330.25]}, "gradients/decoder.transformer.h.18.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 29.0, 633.0, 347.0, 11.0], "bins": [-2950.283203125, -2902.640625, -2854.998046875, -2807.35546875, -2759.712646484375, -2712.070068359375, -2664.427490234375, -2616.784912109375, -2569.142333984375, -2521.499755859375, -2473.857177734375, -2426.214599609375, -2378.57177734375, -2330.92919921875, -2283.28662109375, -2235.64404296875, -2188.00146484375, -2140.35888671875, -2092.71630859375, -2045.0736083984375, -1997.4310302734375, -1949.7884521484375, -1902.145751953125, -1854.503173828125, -1806.8604736328125, -1759.2178955078125, -1711.5751953125, -1663.9326171875, -1616.2900390625, -1568.6474609375, -1521.0047607421875, -1473.3621826171875, -1425.7197265625, -1378.0771484375, -1330.4344482421875, -1282.7918701171875, -1235.1492919921875, -1187.5067138671875, -1139.864013671875, -1092.221435546875, -1044.578857421875, -996.9362182617188, -949.2936401367188, -901.6510009765625, -854.0084228515625, -806.3657836914062, -758.72314453125, -711.08056640625, -663.4379272460938, -615.7952880859375, -568.1527099609375, -520.5100708007812, -472.86749267578125, -425.224853515625, -377.5822448730469, -329.93963623046875, -282.2970275878906, -234.6544189453125, -187.01181030273438, -139.3691864013672, -91.72657775878906, -44.08396911621094, 3.55865478515625, 51.201263427734375, 98.84387969970703]}, "gradients/decoder.transformer.h.18.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 2.0, 0.0, 2.0, 0.0, 7.0, 4.0, 5.0, 12.0, 9.0, 20.0, 19.0, 23.0, 19.0, 33.0, 27.0, 38.0, 30.0, 38.0, 37.0, 36.0, 40.0, 34.0, 39.0, 55.0, 48.0, 46.0, 39.0, 43.0, 48.0, 38.0, 46.0, 23.0, 28.0, 22.0, 23.0, 14.0, 13.0, 11.0, 11.0, 10.0, 4.0, 5.0, 3.0, 4.0, 3.0, 1.0, 2.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-507.0089416503906, -491.6144104003906, -476.2198791503906, -460.82537841796875, -445.43084716796875, -430.03631591796875, -414.64178466796875, -399.24725341796875, -383.8527526855469, -368.4582214355469, -353.0636901855469, -337.669189453125, -322.274658203125, -306.880126953125, -291.485595703125, -276.091064453125, -260.696533203125, -245.302001953125, -229.90748596191406, -214.51295471191406, -199.11843872070312, -183.72390747070312, -168.32937622070312, -152.9348602294922, -137.54034423828125, -122.14582061767578, -106.75129699707031, -91.35676574707031, -75.96224975585938, -60.567718505859375, -45.173194885253906, -29.778671264648438, -14.3841552734375, 1.0103693008422852, 16.40489387512207, 31.799419403076172, 47.19394302368164, 62.588470458984375, 77.98299407958984, 93.37751770019531, 108.77204132080078, 124.16656494140625, 139.56109619140625, 154.9556121826172, 170.3501434326172, 185.74465942382812, 201.13919067382812, 216.53372192382812, 231.92823791503906, 247.32276916503906, 262.71728515625, 278.11181640625, 293.50634765625, 308.90087890625, 324.2953796386719, 339.6899108886719, 355.0844421386719, 370.4789733886719, 385.8735046386719, 401.26800537109375, 416.66253662109375, 432.05706787109375, 447.45159912109375, 462.84613037109375, 478.2406311035156]}, "gradients/decoder.transformer.h.17.mlp.c_proj.bias": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 0.0, 1.0, 3.0, 2.0, 4.0, 4.0, 8.0, 2.0, 5.0, 9.0, 7.0, 18.0, 19.0, 15.0, 23.0, 19.0, 23.0, 39.0, 28.0, 46.0, 31.0, 48.0, 49.0, 41.0, 59.0, 44.0, 60.0, 41.0, 35.0, 40.0, 36.0, 42.0, 32.0, 23.0, 27.0, 18.0, 16.0, 16.0, 16.0, 15.0, 6.0, 4.0, 7.0, 10.0, 7.0, 5.0, 3.0, 3.0, 4.0, 5.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-41.4375, -39.9765625, -38.515625, -37.0546875, -35.59375, -34.1328125, -32.671875, -31.2109375, -29.75, -28.2890625, -26.828125, -25.3671875, -23.90625, -22.4453125, -20.984375, -19.5234375, -18.0625, -16.6015625, -15.140625, -13.6796875, -12.21875, -10.7578125, -9.296875, -7.8359375, -6.375, -4.9140625, -3.453125, -1.9921875, -0.53125, 0.9296875, 2.390625, 3.8515625, 5.3125, 6.7734375, 8.234375, 9.6953125, 11.15625, 12.6171875, 14.078125, 15.5390625, 17.0, 18.4609375, 19.921875, 21.3828125, 22.84375, 24.3046875, 25.765625, 27.2265625, 28.6875, 30.1484375, 31.609375, 33.0703125, 34.53125, 35.9921875, 37.453125, 38.9140625, 40.375, 41.8359375, 43.296875, 44.7578125, 46.21875, 47.6796875, 49.140625, 50.6015625, 52.0625]}, "gradients/decoder.transformer.h.17.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 2.0, 4.0, 5.0, 4.0, 12.0, 18.0, 11.0, 22.0, 36.0, 48.0, 70.0, 100.0, 118.0, 153.0, 210.0, 289.0, 381.0, 501.0, 702.0, 984.0, 1375.0, 1711.0, 2548.0, 3537.0, 5037.0, 7798.0, 13361.0, 37158.0, 273932.0, 2604198.0, 1080139.0, 107831.0, 19370.0, 10031.0, 6383.0, 4462.0, 3149.0, 2176.0, 1692.0, 1118.0, 897.0, 698.0, 499.0, 381.0, 265.0, 208.0, 157.0, 119.0, 114.0, 82.0, 48.0, 34.0, 29.0, 20.0, 22.0, 13.0, 14.0, 8.0, 9.0, 4.0, 4.0, 1.0, 1.0], "bins": [-78.3125, -75.76953125, -73.2265625, -70.68359375, -68.140625, -65.59765625, -63.0546875, -60.51171875, -57.96875, -55.42578125, -52.8828125, -50.33984375, -47.796875, -45.25390625, -42.7109375, -40.16796875, -37.625, -35.08203125, -32.5390625, -29.99609375, -27.453125, -24.91015625, -22.3671875, -19.82421875, -17.28125, -14.73828125, -12.1953125, -9.65234375, -7.109375, -4.56640625, -2.0234375, 0.51953125, 3.0625, 5.60546875, 8.1484375, 10.69140625, 13.234375, 15.77734375, 18.3203125, 20.86328125, 23.40625, 25.94921875, 28.4921875, 31.03515625, 33.578125, 36.12109375, 38.6640625, 41.20703125, 43.75, 46.29296875, 48.8359375, 51.37890625, 53.921875, 56.46484375, 59.0078125, 61.55078125, 64.09375, 66.63671875, 69.1796875, 71.72265625, 74.265625, 76.80859375, 79.3515625, 81.89453125, 84.4375]}, "gradients/decoder.transformer.h.17.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 2.0, 2.0, 6.0, 4.0, 4.0, 1.0, 7.0, 3.0, 9.0, 11.0, 14.0, 7.0, 11.0, 15.0, 25.0, 37.0, 47.0, 59.0, 104.0, 120.0, 246.0, 403.0, 720.0, 785.0, 568.0, 262.0, 177.0, 132.0, 72.0, 60.0, 33.0, 40.0, 21.0, 11.0, 8.0, 13.0, 9.0, 6.0, 6.0, 3.0, 3.0, 6.0, 5.0, 2.0, 3.0, 1.0, 0.0, 2.0, 2.0, 0.0, 1.0, 2.0, 1.0], "bins": [-127.75, -124.0224609375, -120.294921875, -116.5673828125, -112.83984375, -109.1123046875, -105.384765625, -101.6572265625, -97.9296875, -94.2021484375, -90.474609375, -86.7470703125, -83.01953125, -79.2919921875, -75.564453125, -71.8369140625, -68.109375, -64.3818359375, -60.654296875, -56.9267578125, -53.19921875, -49.4716796875, -45.744140625, -42.0166015625, -38.2890625, -34.5615234375, -30.833984375, -27.1064453125, -23.37890625, -19.6513671875, -15.923828125, -12.1962890625, -8.46875, -4.7412109375, -1.013671875, 2.7138671875, 6.44140625, 10.1689453125, 13.896484375, 17.6240234375, 21.3515625, 25.0791015625, 28.806640625, 32.5341796875, 36.26171875, 39.9892578125, 43.716796875, 47.4443359375, 51.171875, 54.8994140625, 58.626953125, 62.3544921875, 66.08203125, 69.8095703125, 73.537109375, 77.2646484375, 80.9921875, 84.7197265625, 88.447265625, 92.1748046875, 95.90234375, 99.6298828125, 103.357421875, 107.0849609375, 110.8125]}, "gradients/decoder.transformer.h.17.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 2.0, 1.0, 2.0, 3.0, 4.0, 5.0, 4.0, 4.0, 12.0, 10.0, 11.0, 11.0, 28.0, 35.0, 50.0, 57.0, 84.0, 182.0, 283.0, 541.0, 1093.0, 2418.0, 5485.0, 14331.0, 47760.0, 1405980.0, 2636025.0, 53302.0, 15593.0, 5872.0, 2486.0, 1194.0, 582.0, 322.0, 180.0, 99.0, 58.0, 43.0, 34.0, 18.0, 19.0, 11.0, 16.0, 6.0, 12.0, 7.0, 4.0, 7.0, 6.0, 2.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0], "bins": [-317.5, -308.171875, -298.84375, -289.515625, -280.1875, -270.859375, -261.53125, -252.203125, -242.875, -233.546875, -224.21875, -214.890625, -205.5625, -196.234375, -186.90625, -177.578125, -168.25, -158.921875, -149.59375, -140.265625, -130.9375, -121.609375, -112.28125, -102.953125, -93.625, -84.296875, -74.96875, -65.640625, -56.3125, -46.984375, -37.65625, -28.328125, -19.0, -9.671875, -0.34375, 8.984375, 18.3125, 27.640625, 36.96875, 46.296875, 55.625, 64.953125, 74.28125, 83.609375, 92.9375, 102.265625, 111.59375, 120.921875, 130.25, 139.578125, 148.90625, 158.234375, 167.5625, 176.890625, 186.21875, 195.546875, 204.875, 214.203125, 223.53125, 232.859375, 242.1875, 251.515625, 260.84375, 270.171875, 279.5]}, "gradients/decoder.transformer.h.17.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 2.0, 1.0, 3.0, 9.0, 7.0, 5.0, 9.0, 3.0, 9.0, 8.0, 9.0, 16.0, 32.0, 44.0, 71.0, 102.0, 167.0, 173.0, 103.0, 77.0, 43.0, 28.0, 23.0, 20.0, 12.0, 9.0, 11.0, 3.0, 4.0, 3.0, 2.0, 1.0, 3.0, 1.0, 2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-393.4310607910156, -377.6955871582031, -361.96014404296875, -346.22467041015625, -330.48919677734375, -314.7537536621094, -299.0182800292969, -283.2828369140625, -267.54736328125, -251.81190490722656, -236.07644653320312, -220.34097290039062, -204.6055145263672, -188.87005615234375, -173.13458251953125, -157.3991241455078, -141.66366577148438, -125.92820739746094, -110.19274139404297, -94.457275390625, -78.72181701660156, -62.986358642578125, -47.250892639160156, -31.515426635742188, -15.77996826171875, -0.044506072998046875, 15.690956115722656, 31.42641830444336, 47.16188049316406, 62.8973388671875, 78.63280487060547, 94.36827087402344, 110.103759765625, 125.83921813964844, 141.57467651367188, 157.31015014648438, 173.0456085205078, 188.78106689453125, 204.51654052734375, 220.2519989013672, 235.98745727539062, 251.72291564941406, 267.4583740234375, 283.19384765625, 298.9293212890625, 314.6647644042969, 330.4002380371094, 346.13568115234375, 361.87115478515625, 377.60662841796875, 393.3420715332031, 409.0775451660156, 424.81298828125, 440.5484619140625, 456.283935546875, 472.0194091796875, 487.7548522949219, 503.4903259277344, 519.2257690429688, 534.9612426757812, 550.6967163085938, 566.43212890625, 582.1676025390625, 597.903076171875, 613.6385498046875]}, "gradients/decoder.transformer.h.17.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 4.0, 3.0, 9.0, 5.0, 8.0, 5.0, 8.0, 15.0, 16.0, 13.0, 16.0, 21.0, 23.0, 26.0, 34.0, 37.0, 36.0, 40.0, 36.0, 39.0, 39.0, 38.0, 40.0, 43.0, 42.0, 37.0, 44.0, 34.0, 36.0, 26.0, 28.0, 33.0, 34.0, 27.0, 19.0, 23.0, 12.0, 14.0, 13.0, 7.0, 9.0, 6.0, 2.0, 6.0, 2.0, 3.0, 1.0, 3.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-350.2659606933594, -339.25714111328125, -328.2483215332031, -317.239501953125, -306.23065185546875, -295.2218322753906, -284.2130126953125, -273.2041931152344, -262.19537353515625, -251.18655395507812, -240.17771911621094, -229.1688995361328, -218.1600799560547, -207.1512451171875, -196.14242553710938, -185.13360595703125, -174.12477111816406, -163.11595153808594, -152.10711669921875, -141.09829711914062, -130.0894775390625, -119.08065032958984, -108.07182312011719, -97.06300354003906, -86.0541763305664, -75.04534912109375, -64.03652954101562, -53.02770233154297, -42.01887893676758, -31.010055541992188, -20.00122833251953, -8.992408752441406, 2.01641845703125, 13.025242805480957, 24.034067153930664, 35.04289245605469, 46.05171585083008, 57.06053924560547, 68.06936645507812, 79.07818603515625, 90.0870132446289, 101.09584045410156, 112.10466003417969, 123.11348724365234, 134.122314453125, 145.13113403320312, 156.13995361328125, 167.14877319335938, 178.15760803222656, 189.1664276123047, 200.17526245117188, 211.18408203125, 222.19290161132812, 233.20172119140625, 244.21055603027344, 255.21937561035156, 266.22821044921875, 277.2370300292969, 288.245849609375, 299.25469970703125, 310.2635192871094, 321.2723388671875, 332.2811584472656, 343.28997802734375, 354.2987976074219]}, "gradients/decoder.transformer.h.17.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 2.0, 1.0, 2.0, 4.0, 1.0, 3.0, 4.0, 7.0, 2.0, 2.0, 6.0, 6.0, 11.0, 16.0, 10.0, 14.0, 21.0, 13.0, 25.0, 32.0, 34.0, 41.0, 31.0, 55.0, 51.0, 51.0, 44.0, 35.0, 51.0, 53.0, 41.0, 35.0, 29.0, 36.0, 46.0, 32.0, 22.0, 21.0, 20.0, 14.0, 16.0, 8.0, 6.0, 17.0, 11.0, 7.0, 6.0, 5.0, 4.0, 7.0, 2.0, 4.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0], "bins": [-45.625, -44.19140625, -42.7578125, -41.32421875, -39.890625, -38.45703125, -37.0234375, -35.58984375, -34.15625, -32.72265625, -31.2890625, -29.85546875, -28.421875, -26.98828125, -25.5546875, -24.12109375, -22.6875, -21.25390625, -19.8203125, -18.38671875, -16.953125, -15.51953125, -14.0859375, -12.65234375, -11.21875, -9.78515625, -8.3515625, -6.91796875, -5.484375, -4.05078125, -2.6171875, -1.18359375, 0.25, 1.68359375, 3.1171875, 4.55078125, 5.984375, 7.41796875, 8.8515625, 10.28515625, 11.71875, 13.15234375, 14.5859375, 16.01953125, 17.453125, 18.88671875, 20.3203125, 21.75390625, 23.1875, 24.62109375, 26.0546875, 27.48828125, 28.921875, 30.35546875, 31.7890625, 33.22265625, 34.65625, 36.08984375, 37.5234375, 38.95703125, 40.390625, 41.82421875, 43.2578125, 44.69140625, 46.125]}, "gradients/decoder.transformer.h.17.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 3.0, 2.0, 2.0, 0.0, 3.0, 3.0, 6.0, 9.0, 12.0, 19.0, 17.0, 45.0, 42.0, 57.0, 105.0, 146.0, 233.0, 295.0, 477.0, 682.0, 1023.0, 1591.0, 2606.0, 4349.0, 8575.0, 21662.0, 88365.0, 636567.0, 216233.0, 38029.0, 12547.0, 5854.0, 3163.0, 2009.0, 1290.0, 833.0, 568.0, 350.0, 225.0, 181.0, 109.0, 99.0, 60.0, 27.0, 32.0, 21.0, 16.0, 6.0, 10.0, 5.0, 4.0, 4.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-20.515625, -19.825439453125, -19.13525390625, -18.445068359375, -17.7548828125, -17.064697265625, -16.37451171875, -15.684326171875, -14.994140625, -14.303955078125, -13.61376953125, -12.923583984375, -12.2333984375, -11.543212890625, -10.85302734375, -10.162841796875, -9.47265625, -8.782470703125, -8.09228515625, -7.402099609375, -6.7119140625, -6.021728515625, -5.33154296875, -4.641357421875, -3.951171875, -3.260986328125, -2.57080078125, -1.880615234375, -1.1904296875, -0.500244140625, 0.18994140625, 0.880126953125, 1.5703125, 2.260498046875, 2.95068359375, 3.640869140625, 4.3310546875, 5.021240234375, 5.71142578125, 6.401611328125, 7.091796875, 7.781982421875, 8.47216796875, 9.162353515625, 9.8525390625, 10.542724609375, 11.23291015625, 11.923095703125, 12.61328125, 13.303466796875, 13.99365234375, 14.683837890625, 15.3740234375, 16.064208984375, 16.75439453125, 17.444580078125, 18.134765625, 18.824951171875, 19.51513671875, 20.205322265625, 20.8955078125, 21.585693359375, 22.27587890625, 22.966064453125, 23.65625]}, "gradients/decoder.transformer.h.17.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 1.0, 0.0, 3.0, 1.0, 2.0, 6.0, 2.0, 4.0, 9.0, 11.0, 11.0, 15.0, 14.0, 20.0, 20.0, 24.0, 20.0, 26.0, 31.0, 23.0, 27.0, 37.0, 43.0, 29.0, 35.0, 46.0, 1067.0, 43.0, 52.0, 42.0, 42.0, 31.0, 24.0, 32.0, 38.0, 23.0, 27.0, 26.0, 15.0, 19.0, 19.0, 4.0, 12.0, 19.0, 10.0, 7.0, 7.0, 3.0, 4.0, 0.0, 8.0, 3.0, 1.0, 1.0, 2.0, 1.0, 1.0], "bins": [-29.5625, -28.69140625, -27.8203125, -26.94921875, -26.078125, -25.20703125, -24.3359375, -23.46484375, -22.59375, -21.72265625, -20.8515625, -19.98046875, -19.109375, -18.23828125, -17.3671875, -16.49609375, -15.625, -14.75390625, -13.8828125, -13.01171875, -12.140625, -11.26953125, -10.3984375, -9.52734375, -8.65625, -7.78515625, -6.9140625, -6.04296875, -5.171875, -4.30078125, -3.4296875, -2.55859375, -1.6875, -0.81640625, 0.0546875, 0.92578125, 1.796875, 2.66796875, 3.5390625, 4.41015625, 5.28125, 6.15234375, 7.0234375, 7.89453125, 8.765625, 9.63671875, 10.5078125, 11.37890625, 12.25, 13.12109375, 13.9921875, 14.86328125, 15.734375, 16.60546875, 17.4765625, 18.34765625, 19.21875, 20.08984375, 20.9609375, 21.83203125, 22.703125, 23.57421875, 24.4453125, 25.31640625, 26.1875]}, "gradients/decoder.transformer.h.17.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 3.0, 2.0, 10.0, 7.0, 17.0, 14.0, 19.0, 35.0, 48.0, 59.0, 106.0, 164.0, 273.0, 370.0, 674.0, 1134.0, 2055.0, 3603.0, 6621.0, 14289.0, 33856.0, 97855.0, 553942.0, 1206044.0, 107822.0, 36656.0, 15321.0, 7174.0, 3787.0, 2045.0, 1210.0, 715.0, 433.0, 294.0, 161.0, 122.0, 73.0, 44.0, 28.0, 18.0, 14.0, 7.0, 6.0, 4.0, 4.0, 2.0, 2.0, 3.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-7.13671875, -6.90679931640625, -6.6768798828125, -6.44696044921875, -6.217041015625, -5.98712158203125, -5.7572021484375, -5.52728271484375, -5.29736328125, -5.06744384765625, -4.8375244140625, -4.60760498046875, -4.377685546875, -4.14776611328125, -3.9178466796875, -3.68792724609375, -3.4580078125, -3.22808837890625, -2.9981689453125, -2.76824951171875, -2.538330078125, -2.30841064453125, -2.0784912109375, -1.84857177734375, -1.61865234375, -1.38873291015625, -1.1588134765625, -0.92889404296875, -0.698974609375, -0.46905517578125, -0.2391357421875, -0.00921630859375, 0.220703125, 0.45062255859375, 0.6805419921875, 0.91046142578125, 1.140380859375, 1.37030029296875, 1.6002197265625, 1.83013916015625, 2.06005859375, 2.28997802734375, 2.5198974609375, 2.74981689453125, 2.979736328125, 3.20965576171875, 3.4395751953125, 3.66949462890625, 3.8994140625, 4.12933349609375, 4.3592529296875, 4.58917236328125, 4.819091796875, 5.04901123046875, 5.2789306640625, 5.50885009765625, 5.73876953125, 5.96868896484375, 6.1986083984375, 6.42852783203125, 6.658447265625, 6.88836669921875, 7.1182861328125, 7.34820556640625, 7.578125]}, "gradients/decoder.transformer.h.17.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 4.0, 3.0, 4.0, 1.0, 3.0, 4.0, 8.0, 3.0, 5.0, 4.0, 3.0, 3.0, 6.0, 4.0, 9.0, 14.0, 15.0, 21.0, 16.0, 27.0, 34.0, 47.0, 68.0, 184.0, 198.0, 75.0, 55.0, 27.0, 25.0, 14.0, 19.0, 9.0, 19.0, 12.0, 12.0, 9.0, 9.0, 9.0, 5.0, 4.0, 5.0, 5.0, 1.0, 1.0, 3.0, 2.0, 4.0, 0.0, 2.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0], "bins": [-1.0986328125, -1.064361572265625, -1.03009033203125, -0.995819091796875, -0.9615478515625, -0.927276611328125, -0.89300537109375, -0.858734130859375, -0.824462890625, -0.790191650390625, -0.75592041015625, -0.721649169921875, -0.6873779296875, -0.653106689453125, -0.61883544921875, -0.584564208984375, -0.55029296875, -0.516021728515625, -0.48175048828125, -0.447479248046875, -0.4132080078125, -0.378936767578125, -0.34466552734375, -0.310394287109375, -0.276123046875, -0.241851806640625, -0.20758056640625, -0.173309326171875, -0.1390380859375, -0.104766845703125, -0.07049560546875, -0.036224365234375, -0.001953125, 0.032318115234375, 0.06658935546875, 0.100860595703125, 0.1351318359375, 0.169403076171875, 0.20367431640625, 0.237945556640625, 0.272216796875, 0.306488037109375, 0.34075927734375, 0.375030517578125, 0.4093017578125, 0.443572998046875, 0.47784423828125, 0.512115478515625, 0.54638671875, 0.580657958984375, 0.61492919921875, 0.649200439453125, 0.6834716796875, 0.717742919921875, 0.75201416015625, 0.786285400390625, 0.820556640625, 0.854827880859375, 0.88909912109375, 0.923370361328125, 0.9576416015625, 0.991912841796875, 1.02618408203125, 1.060455322265625, 1.0947265625]}, "gradients/decoder.transformer.h.17.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 2.0, 1.0, 3.0, 3.0, 3.0, 0.0, 4.0, 5.0, 2.0, 5.0, 7.0, 7.0, 9.0, 7.0, 15.0, 11.0, 19.0, 12.0, 22.0, 19.0, 37.0, 65.0, 110.0, 159.0, 1022565.0, 24972.0, 183.0, 68.0, 59.0, 43.0, 33.0, 17.0, 18.0, 14.0, 10.0, 6.0, 5.0, 2.0, 4.0, 4.0, 4.0, 5.0, 6.0, 6.0, 3.0, 1.0, 3.0, 2.0, 4.0, 2.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0], "bins": [-35.0, -33.90478515625, -32.8095703125, -31.71435546875, -30.619140625, -29.52392578125, -28.4287109375, -27.33349609375, -26.23828125, -25.14306640625, -24.0478515625, -22.95263671875, -21.857421875, -20.76220703125, -19.6669921875, -18.57177734375, -17.4765625, -16.38134765625, -15.2861328125, -14.19091796875, -13.095703125, -12.00048828125, -10.9052734375, -9.81005859375, -8.71484375, -7.61962890625, -6.5244140625, -5.42919921875, -4.333984375, -3.23876953125, -2.1435546875, -1.04833984375, 0.046875, 1.14208984375, 2.2373046875, 3.33251953125, 4.427734375, 5.52294921875, 6.6181640625, 7.71337890625, 8.80859375, 9.90380859375, 10.9990234375, 12.09423828125, 13.189453125, 14.28466796875, 15.3798828125, 16.47509765625, 17.5703125, 18.66552734375, 19.7607421875, 20.85595703125, 21.951171875, 23.04638671875, 24.1416015625, 25.23681640625, 26.33203125, 27.42724609375, 28.5224609375, 29.61767578125, 30.712890625, 31.80810546875, 32.9033203125, 33.99853515625, 35.09375]}, "gradients/decoder.transformer.h.17.ln_cross_attn.weight": {"_type": "histogram", "values": [1019.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.4909548759460449, 0.4196431040763855, 1.330241084098816, 2.2408390045166016, 3.1514370441436768, 4.062035083770752, 4.972632884979248, 5.883231163024902, 6.793828964233398, 7.7044267654418945, 8.61502456665039, 9.525623321533203, 10.4362211227417, 11.346818923950195, 12.257416725158691, 13.168014526367188, 14.07861328125, 14.989211082458496, 15.899808883666992, 16.810407638549805, 17.721004486083984, 18.631603240966797, 19.54220199584961, 20.45279884338379, 21.36339569091797, 22.27399444580078, 23.18459129333496, 24.095190048217773, 25.005786895751953, 25.916385650634766, 26.826984405517578, 27.737581253051758, 28.64818000793457, 29.558778762817383, 30.469375610351562, 31.379974365234375, 32.29057312011719, 33.201171875, 34.11176681518555, 35.02236557006836, 35.93296432495117, 36.843563079833984, 37.7541618347168, 38.664756774902344, 39.575355529785156, 40.48595428466797, 41.39655303955078, 42.307151794433594, 43.21774673461914, 44.12834548950195, 45.038944244384766, 45.94953918457031, 46.860137939453125, 47.77073669433594, 48.68133544921875, 49.59193420410156, 50.502532958984375, 51.41313171386719, 52.32373046875, 53.23432540893555, 54.14492416381836, 55.05552291870117, 55.966121673583984, 56.8767204284668, 57.787315368652344]}, "gradients/decoder.transformer.h.17.ln_cross_attn.bias": {"_type": "histogram", "values": [3.0, 2.0, 3.0, 1.0, 4.0, 4.0, 6.0, 3.0, 7.0, 8.0, 10.0, 11.0, 16.0, 17.0, 18.0, 24.0, 31.0, 29.0, 25.0, 26.0, 37.0, 32.0, 36.0, 32.0, 37.0, 28.0, 38.0, 24.0, 33.0, 47.0, 34.0, 48.0, 37.0, 40.0, 40.0, 24.0, 25.0, 20.0, 21.0, 23.0, 19.0, 16.0, 11.0, 13.0, 8.0, 4.0, 7.0, 4.0, 8.0, 5.0, 3.0, 4.0, 8.0, 2.0, 2.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-2.4653775691986084, -2.3763115406036377, -2.287245273590088, -2.198179244995117, -2.1091132164001465, -2.020047187805176, -1.930980920791626, -1.8419148921966553, -1.752848744392395, -1.6637825965881348, -1.574716567993164, -1.4856504201889038, -1.3965842723846436, -1.3075182437896729, -1.2184520959854126, -1.1293859481811523, -1.0403199195861816, -0.9512538313865662, -0.8621877431869507, -0.7731215953826904, -0.684055507183075, -0.5949894189834595, -0.5059232711791992, -0.41685718297958374, -0.32779109477996826, -0.2387249916791916, -0.14965888857841492, -0.06059277057647705, 0.028473317623138428, 0.1175394058227539, 0.20660555362701416, 0.29567164182662964, 0.384737491607666, 0.4738035798072815, 0.562869668006897, 0.6519358158111572, 0.7410019040107727, 0.8300679922103882, 0.9191341400146484, 1.0082001686096191, 1.0972663164138794, 1.1863324642181396, 1.2753984928131104, 1.3644646406173706, 1.4535307884216309, 1.5425968170166016, 1.6316629648208618, 1.720729112625122, 1.8097951412200928, 1.898861289024353, 1.9879273176193237, 2.076993465423584, 2.1660594940185547, 2.2551255226135254, 2.344191789627075, 2.433257818222046, 2.5223240852355957, 2.6113901138305664, 2.700456380844116, 2.789522409439087, 2.8785884380340576, 2.9676547050476074, 3.056720733642578, 3.145786762237549, 3.2348527908325195]}, "gradients/decoder.transformer.h.17.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 2.0, 1.0, 2.0, 4.0, 1.0, 3.0, 3.0, 8.0, 2.0, 2.0, 6.0, 6.0, 11.0, 16.0, 10.0, 13.0, 22.0, 13.0, 25.0, 31.0, 35.0, 40.0, 32.0, 54.0, 50.0, 52.0, 45.0, 35.0, 50.0, 52.0, 43.0, 35.0, 29.0, 35.0, 47.0, 32.0, 22.0, 21.0, 20.0, 14.0, 17.0, 7.0, 6.0, 17.0, 11.0, 7.0, 6.0, 5.0, 4.0, 7.0, 2.0, 4.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0], "bins": [-45.65625, -44.22216796875, -42.7880859375, -41.35400390625, -39.919921875, -38.48583984375, -37.0517578125, -35.61767578125, -34.18359375, -32.74951171875, -31.3154296875, -29.88134765625, -28.447265625, -27.01318359375, -25.5791015625, -24.14501953125, -22.7109375, -21.27685546875, -19.8427734375, -18.40869140625, -16.974609375, -15.54052734375, -14.1064453125, -12.67236328125, -11.23828125, -9.80419921875, -8.3701171875, -6.93603515625, -5.501953125, -4.06787109375, -2.6337890625, -1.19970703125, 0.234375, 1.66845703125, 3.1025390625, 4.53662109375, 5.970703125, 7.40478515625, 8.8388671875, 10.27294921875, 11.70703125, 13.14111328125, 14.5751953125, 16.00927734375, 17.443359375, 18.87744140625, 20.3115234375, 21.74560546875, 23.1796875, 24.61376953125, 26.0478515625, 27.48193359375, 28.916015625, 30.35009765625, 31.7841796875, 33.21826171875, 34.65234375, 36.08642578125, 37.5205078125, 38.95458984375, 40.388671875, 41.82275390625, 43.2568359375, 44.69091796875, 46.125]}, "gradients/decoder.transformer.h.17.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 3.0, 3.0, 1.0, 4.0, 3.0, 3.0, 7.0, 13.0, 6.0, 14.0, 10.0, 11.0, 27.0, 26.0, 36.0, 49.0, 67.0, 95.0, 145.0, 223.0, 351.0, 607.0, 1313.0, 2557.0, 6024.0, 15537.0, 47509.0, 188545.0, 549225.0, 167659.0, 43141.0, 14325.0, 5835.0, 2423.0, 1100.0, 604.0, 343.0, 201.0, 133.0, 101.0, 56.0, 50.0, 41.0, 31.0, 25.0, 14.0, 13.0, 19.0, 12.0, 9.0, 4.0, 4.0, 5.0, 3.0, 2.0, 1.0, 3.0, 1.0, 1.0, 1.0], "bins": [-23.59375, -22.866943359375, -22.14013671875, -21.413330078125, -20.6865234375, -19.959716796875, -19.23291015625, -18.506103515625, -17.779296875, -17.052490234375, -16.32568359375, -15.598876953125, -14.8720703125, -14.145263671875, -13.41845703125, -12.691650390625, -11.96484375, -11.238037109375, -10.51123046875, -9.784423828125, -9.0576171875, -8.330810546875, -7.60400390625, -6.877197265625, -6.150390625, -5.423583984375, -4.69677734375, -3.969970703125, -3.2431640625, -2.516357421875, -1.78955078125, -1.062744140625, -0.3359375, 0.390869140625, 1.11767578125, 1.844482421875, 2.5712890625, 3.298095703125, 4.02490234375, 4.751708984375, 5.478515625, 6.205322265625, 6.93212890625, 7.658935546875, 8.3857421875, 9.112548828125, 9.83935546875, 10.566162109375, 11.29296875, 12.019775390625, 12.74658203125, 13.473388671875, 14.2001953125, 14.927001953125, 15.65380859375, 16.380615234375, 17.107421875, 17.834228515625, 18.56103515625, 19.287841796875, 20.0146484375, 20.741455078125, 21.46826171875, 22.195068359375, 22.921875]}, "gradients/decoder.transformer.h.17.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 2.0, 1.0, 3.0, 2.0, 2.0, 6.0, 1.0, 8.0, 7.0, 9.0, 13.0, 9.0, 12.0, 9.0, 14.0, 15.0, 14.0, 26.0, 35.0, 22.0, 37.0, 38.0, 27.0, 37.0, 40.0, 44.0, 54.0, 74.0, 1867.0, 170.0, 58.0, 41.0, 34.0, 39.0, 29.0, 40.0, 26.0, 29.0, 28.0, 23.0, 20.0, 8.0, 13.0, 16.0, 11.0, 15.0, 13.0, 6.0, 4.0, 4.0, 2.0, 2.0, 1.0, 2.0, 1.0, 2.0, 2.0, 1.0, 0.0, 2.0], "bins": [-135.625, -131.48046875, -127.3359375, -123.19140625, -119.046875, -114.90234375, -110.7578125, -106.61328125, -102.46875, -98.32421875, -94.1796875, -90.03515625, -85.890625, -81.74609375, -77.6015625, -73.45703125, -69.3125, -65.16796875, -61.0234375, -56.87890625, -52.734375, -48.58984375, -44.4453125, -40.30078125, -36.15625, -32.01171875, -27.8671875, -23.72265625, -19.578125, -15.43359375, -11.2890625, -7.14453125, -3.0, 1.14453125, 5.2890625, 9.43359375, 13.578125, 17.72265625, 21.8671875, 26.01171875, 30.15625, 34.30078125, 38.4453125, 42.58984375, 46.734375, 50.87890625, 55.0234375, 59.16796875, 63.3125, 67.45703125, 71.6015625, 75.74609375, 79.890625, 84.03515625, 88.1796875, 92.32421875, 96.46875, 100.61328125, 104.7578125, 108.90234375, 113.046875, 117.19140625, 121.3359375, 125.48046875, 129.625]}, "gradients/decoder.transformer.h.17.attn.c_attn.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 3.0, 2.0, 1.0, 1.0, 1.0, 2.0, 3.0, 3.0, 5.0, 9.0, 14.0, 13.0, 12.0, 18.0, 23.0, 16.0, 20.0, 33.0, 55.0, 84.0, 75.0, 109.0, 136.0, 181.0, 346.0, 732.0, 3885.0, 2430475.0, 704096.0, 3389.0, 764.0, 339.0, 199.0, 132.0, 93.0, 92.0, 87.0, 58.0, 49.0, 24.0, 23.0, 22.0, 13.0, 14.0, 12.0, 10.0, 13.0, 8.0, 7.0, 7.0, 4.0, 2.0, 2.0, 1.0, 3.0, 1.0, 1.0, 2.0, 0.0, 0.0, 1.0], "bins": [-256.5, -248.21875, -239.9375, -231.65625, -223.375, -215.09375, -206.8125, -198.53125, -190.25, -181.96875, -173.6875, -165.40625, -157.125, -148.84375, -140.5625, -132.28125, -124.0, -115.71875, -107.4375, -99.15625, -90.875, -82.59375, -74.3125, -66.03125, -57.75, -49.46875, -41.1875, -32.90625, -24.625, -16.34375, -8.0625, 0.21875, 8.5, 16.78125, 25.0625, 33.34375, 41.625, 49.90625, 58.1875, 66.46875, 74.75, 83.03125, 91.3125, 99.59375, 107.875, 116.15625, 124.4375, 132.71875, 141.0, 149.28125, 157.5625, 165.84375, 174.125, 182.40625, 190.6875, 198.96875, 207.25, 215.53125, 223.8125, 232.09375, 240.375, 248.65625, 256.9375, 265.21875, 273.5]}, "gradients/decoder.transformer.h.17.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 75.0, 774.0, 162.0, 9.0], "bins": [-1643.2232666015625, -1616.5428466796875, -1589.8623046875, -1563.181884765625, -1536.50146484375, -1509.8209228515625, -1483.1405029296875, -1456.4600830078125, -1429.779541015625, -1403.09912109375, -1376.4185791015625, -1349.7381591796875, -1323.0577392578125, -1296.377197265625, -1269.69677734375, -1243.016357421875, -1216.3359375, -1189.655517578125, -1162.9749755859375, -1136.2945556640625, -1109.6141357421875, -1082.93359375, -1056.253173828125, -1029.57275390625, -1002.8922119140625, -976.2117309570312, -949.5313110351562, -922.850830078125, -896.1703491210938, -869.4898681640625, -842.8094482421875, -816.1289672851562, -789.4484252929688, -762.7679443359375, -736.0875244140625, -709.4070434570312, -682.7265625, -656.046142578125, -629.3656616210938, -602.6851806640625, -576.0047607421875, -549.3242797851562, -522.6438598632812, -495.96337890625, -469.28289794921875, -442.6024475097656, -415.9219970703125, -389.24151611328125, -362.56103515625, -335.8805847167969, -309.2001037597656, -282.5196533203125, -255.8391876220703, -229.15872192382812, -202.478271484375, -175.7978057861328, -149.11734008789062, -122.43687438964844, -95.75641632080078, -69.07595825195312, -42.39549255371094, -15.71502685546875, 10.965423583984375, 37.64588928222656, 64.32635498046875]}, "gradients/decoder.transformer.h.17.ln_1.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 3.0, 3.0, 4.0, 4.0, 2.0, 6.0, 7.0, 5.0, 8.0, 8.0, 8.0, 17.0, 16.0, 18.0, 15.0, 25.0, 23.0, 30.0, 24.0, 33.0, 40.0, 33.0, 47.0, 42.0, 62.0, 42.0, 51.0, 36.0, 45.0, 45.0, 55.0, 22.0, 39.0, 28.0, 35.0, 23.0, 20.0, 20.0, 20.0, 10.0, 8.0, 15.0, 6.0, 4.0, 5.0, 1.0, 3.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-367.45843505859375, -354.465576171875, -341.47271728515625, -328.4798278808594, -315.4869689941406, -302.4941101074219, -289.501220703125, -276.50836181640625, -263.5155029296875, -250.52264404296875, -237.52976989746094, -224.53689575195312, -211.54403686523438, -198.55117797851562, -185.5583038330078, -172.5654296875, -159.57257080078125, -146.5797119140625, -133.5868377685547, -120.5939712524414, -107.60110473632812, -94.60823822021484, -81.61537170410156, -68.62250518798828, -55.629638671875, -42.63677215576172, -29.643905639648438, -16.651039123535156, -3.658172607421875, 9.334693908691406, 22.327560424804688, 35.32042694091797, 48.31329345703125, 61.30615997314453, 74.29902648925781, 87.2918930053711, 100.28475952148438, 113.27762603759766, 126.27049255371094, 139.26336669921875, 152.2562255859375, 165.24908447265625, 178.24195861816406, 191.23483276367188, 204.22769165039062, 217.22055053710938, 230.2134246826172, 243.206298828125, 256.19915771484375, 269.1920166015625, 282.18487548828125, 295.1777648925781, 308.1706237792969, 321.1634826660156, 334.1563720703125, 347.14923095703125, 360.14208984375, 373.13494873046875, 386.1278076171875, 399.1206970214844, 412.1135559082031, 425.1064147949219, 438.09930419921875, 451.0921630859375, 464.08502197265625]}, "gradients/decoder.transformer.h.16.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 3.0, 2.0, 0.0, 3.0, 3.0, 5.0, 5.0, 2.0, 3.0, 5.0, 8.0, 7.0, 13.0, 9.0, 12.0, 17.0, 20.0, 18.0, 28.0, 24.0, 42.0, 38.0, 46.0, 52.0, 42.0, 53.0, 39.0, 41.0, 50.0, 48.0, 44.0, 35.0, 23.0, 50.0, 35.0, 26.0, 21.0, 22.0, 21.0, 19.0, 10.0, 6.0, 8.0, 14.0, 14.0, 8.0, 3.0, 3.0, 6.0, 2.0, 7.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0], "bins": [-44.84375, -43.419921875, -41.99609375, -40.572265625, -39.1484375, -37.724609375, -36.30078125, -34.876953125, -33.453125, -32.029296875, -30.60546875, -29.181640625, -27.7578125, -26.333984375, -24.91015625, -23.486328125, -22.0625, -20.638671875, -19.21484375, -17.791015625, -16.3671875, -14.943359375, -13.51953125, -12.095703125, -10.671875, -9.248046875, -7.82421875, -6.400390625, -4.9765625, -3.552734375, -2.12890625, -0.705078125, 0.71875, 2.142578125, 3.56640625, 4.990234375, 6.4140625, 7.837890625, 9.26171875, 10.685546875, 12.109375, 13.533203125, 14.95703125, 16.380859375, 17.8046875, 19.228515625, 20.65234375, 22.076171875, 23.5, 24.923828125, 26.34765625, 27.771484375, 29.1953125, 30.619140625, 32.04296875, 33.466796875, 34.890625, 36.314453125, 37.73828125, 39.162109375, 40.5859375, 42.009765625, 43.43359375, 44.857421875, 46.28125]}, "gradients/decoder.transformer.h.16.mlp.c_proj.weight": {"_type": "histogram", "values": [2.0, 3.0, 1.0, 0.0, 1.0, 2.0, 2.0, 1.0, 2.0, 2.0, 3.0, 2.0, 2.0, 3.0, 6.0, 9.0, 12.0, 18.0, 27.0, 51.0, 76.0, 142.0, 244.0, 432.0, 794.0, 1621.0, 3413.0, 7902.0, 22609.0, 533535.0, 3550957.0, 49267.0, 12948.0, 5300.0, 2422.0, 1107.0, 618.0, 302.0, 177.0, 103.0, 50.0, 39.0, 31.0, 13.0, 10.0, 13.0, 5.0, 5.0, 1.0, 3.0, 4.0, 1.0, 2.0, 2.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 3.0, 0.0, 1.0], "bins": [-192.875, -186.494140625, -180.11328125, -173.732421875, -167.3515625, -160.970703125, -154.58984375, -148.208984375, -141.828125, -135.447265625, -129.06640625, -122.685546875, -116.3046875, -109.923828125, -103.54296875, -97.162109375, -90.78125, -84.400390625, -78.01953125, -71.638671875, -65.2578125, -58.876953125, -52.49609375, -46.115234375, -39.734375, -33.353515625, -26.97265625, -20.591796875, -14.2109375, -7.830078125, -1.44921875, 4.931640625, 11.3125, 17.693359375, 24.07421875, 30.455078125, 36.8359375, 43.216796875, 49.59765625, 55.978515625, 62.359375, 68.740234375, 75.12109375, 81.501953125, 87.8828125, 94.263671875, 100.64453125, 107.025390625, 113.40625, 119.787109375, 126.16796875, 132.548828125, 138.9296875, 145.310546875, 151.69140625, 158.072265625, 164.453125, 170.833984375, 177.21484375, 183.595703125, 189.9765625, 196.357421875, 202.73828125, 209.119140625, 215.5]}, "gradients/decoder.transformer.h.16.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 3.0, 1.0, 2.0, 2.0, 2.0, 5.0, 1.0, 7.0, 9.0, 10.0, 7.0, 11.0, 16.0, 15.0, 25.0, 29.0, 47.0, 85.0, 131.0, 297.0, 581.0, 1059.0, 844.0, 393.0, 194.0, 101.0, 68.0, 36.0, 29.0, 17.0, 18.0, 12.0, 7.0, 4.0, 3.0, 6.0, 3.0, 4.0, 2.0, 1.0, 2.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0], "bins": [-178.625, -174.0166015625, -169.408203125, -164.7998046875, -160.19140625, -155.5830078125, -150.974609375, -146.3662109375, -141.7578125, -137.1494140625, -132.541015625, -127.9326171875, -123.32421875, -118.7158203125, -114.107421875, -109.4990234375, -104.890625, -100.2822265625, -95.673828125, -91.0654296875, -86.45703125, -81.8486328125, -77.240234375, -72.6318359375, -68.0234375, -63.4150390625, -58.806640625, -54.1982421875, -49.58984375, -44.9814453125, -40.373046875, -35.7646484375, -31.15625, -26.5478515625, -21.939453125, -17.3310546875, -12.72265625, -8.1142578125, -3.505859375, 1.1025390625, 5.7109375, 10.3193359375, 14.927734375, 19.5361328125, 24.14453125, 28.7529296875, 33.361328125, 37.9697265625, 42.578125, 47.1865234375, 51.794921875, 56.4033203125, 61.01171875, 65.6201171875, 70.228515625, 74.8369140625, 79.4453125, 84.0537109375, 88.662109375, 93.2705078125, 97.87890625, 102.4873046875, 107.095703125, 111.7041015625, 116.3125]}, "gradients/decoder.transformer.h.16.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 1.0, 3.0, 4.0, 4.0, 6.0, 8.0, 5.0, 13.0, 17.0, 22.0, 21.0, 34.0, 50.0, 75.0, 115.0, 214.0, 447.0, 983.0, 2125.0, 5760.0, 18489.0, 94836.0, 3918236.0, 121286.0, 20627.0, 6464.0, 2351.0, 917.0, 498.0, 235.0, 156.0, 83.0, 53.0, 49.0, 31.0, 24.0, 12.0, 8.0, 7.0, 6.0, 5.0, 8.0, 2.0, 2.0, 1.0, 0.0, 1.0, 2.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-292.75, -281.72265625, -270.6953125, -259.66796875, -248.640625, -237.61328125, -226.5859375, -215.55859375, -204.53125, -193.50390625, -182.4765625, -171.44921875, -160.421875, -149.39453125, -138.3671875, -127.33984375, -116.3125, -105.28515625, -94.2578125, -83.23046875, -72.203125, -61.17578125, -50.1484375, -39.12109375, -28.09375, -17.06640625, -6.0390625, 4.98828125, 16.015625, 27.04296875, 38.0703125, 49.09765625, 60.125, 71.15234375, 82.1796875, 93.20703125, 104.234375, 115.26171875, 126.2890625, 137.31640625, 148.34375, 159.37109375, 170.3984375, 181.42578125, 192.453125, 203.48046875, 214.5078125, 225.53515625, 236.5625, 247.58984375, 258.6171875, 269.64453125, 280.671875, 291.69921875, 302.7265625, 313.75390625, 324.78125, 335.80859375, 346.8359375, 357.86328125, 368.890625, 379.91796875, 390.9453125, 401.97265625, 413.0]}, "gradients/decoder.transformer.h.16.ln_2.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 3.0, 1.0, 1.0, 2.0, 1.0, 3.0, 7.0, 7.0, 13.0, 14.0, 29.0, 34.0, 42.0, 86.0, 146.0, 219.0, 188.0, 91.0, 60.0, 25.0, 18.0, 9.0, 6.0, 4.0, 3.0, 4.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-602.813232421875, -578.3146362304688, -553.8159790039062, -529.3173828125, -504.8187255859375, -480.32012939453125, -455.8215026855469, -431.3228759765625, -406.8242492675781, -382.32562255859375, -357.8269958496094, -333.328369140625, -308.82977294921875, -284.33111572265625, -259.83251953125, -235.33389282226562, -210.83526611328125, -186.33663940429688, -161.8380126953125, -137.3394012451172, -112.84077453613281, -88.34214782714844, -63.843536376953125, -39.34490966796875, -14.846282958984375, 9.652339935302734, 34.150962829589844, 58.64958190917969, 83.14820861816406, 107.64683532714844, 132.14544677734375, 156.64407348632812, 181.1427001953125, 205.64132690429688, 230.13995361328125, 254.63856506347656, 279.13720703125, 303.63580322265625, 328.1344299316406, 352.633056640625, 377.1316833496094, 401.63031005859375, 426.1289367675781, 450.6275634765625, 475.12615966796875, 499.62481689453125, 524.1234130859375, 548.6220703125, 573.1206665039062, 597.6192626953125, 622.117919921875, 646.6165161132812, 671.1151733398438, 695.61376953125, 720.1124267578125, 744.6110229492188, 769.109619140625, 793.6082153320312, 818.1068725585938, 842.60546875, 867.1041259765625, 891.6027221679688, 916.101318359375, 940.5999755859375, 965.0986328125]}, "gradients/decoder.transformer.h.16.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 2.0, 3.0, 8.0, 8.0, 14.0, 12.0, 15.0, 15.0, 17.0, 28.0, 14.0, 22.0, 45.0, 37.0, 31.0, 33.0, 51.0, 52.0, 34.0, 46.0, 40.0, 30.0, 44.0, 52.0, 41.0, 37.0, 46.0, 38.0, 40.0, 31.0, 30.0, 19.0, 24.0, 12.0, 8.0, 11.0, 10.0, 3.0, 4.0, 3.0, 1.0, 2.0, 3.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-343.0869445800781, -331.10101318359375, -319.1151123046875, -307.1291809082031, -295.14324951171875, -283.1573181152344, -271.17138671875, -259.18548583984375, -247.19955444335938, -235.213623046875, -223.2277069091797, -211.24179077148438, -199.255859375, -187.26992797851562, -175.2840118408203, -163.298095703125, -151.31216430664062, -139.32623291015625, -127.34031677246094, -115.3543930053711, -103.36846923828125, -91.3825454711914, -79.39662170410156, -67.41069793701172, -55.424774169921875, -43.43885040283203, -31.452926635742188, -19.467002868652344, -7.4810791015625, 4.504844665527344, 16.490768432617188, 28.47669219970703, 40.462615966796875, 52.44853973388672, 64.43446350097656, 76.4203872680664, 88.40631103515625, 100.3922348022461, 112.37815856933594, 124.36408233642578, 136.35000610351562, 148.3359375, 160.3218536376953, 172.30776977539062, 184.293701171875, 196.27963256835938, 208.2655487060547, 220.25146484375, 232.23739624023438, 244.22332763671875, 256.209228515625, 268.1951599121094, 280.18109130859375, 292.1670227050781, 304.1529541015625, 316.13885498046875, 328.1247863769531, 340.1107177734375, 352.09661865234375, 364.0825500488281, 376.0684814453125, 388.0544128417969, 400.04034423828125, 412.0262451171875, 424.0121765136719]}, "gradients/decoder.transformer.h.16.crossattention.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 0.0, 2.0, 4.0, 2.0, 4.0, 0.0, 2.0, 1.0, 2.0, 6.0, 6.0, 8.0, 11.0, 14.0, 21.0, 15.0, 21.0, 27.0, 16.0, 28.0, 34.0, 35.0, 35.0, 58.0, 39.0, 55.0, 45.0, 50.0, 61.0, 46.0, 42.0, 36.0, 43.0, 40.0, 27.0, 26.0, 23.0, 23.0, 16.0, 13.0, 14.0, 10.0, 10.0, 12.0, 9.0, 7.0, 2.0, 4.0, 3.0, 2.0, 5.0, 1.0, 2.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0], "bins": [-46.53125, -45.04833984375, -43.5654296875, -42.08251953125, -40.599609375, -39.11669921875, -37.6337890625, -36.15087890625, -34.66796875, -33.18505859375, -31.7021484375, -30.21923828125, -28.736328125, -27.25341796875, -25.7705078125, -24.28759765625, -22.8046875, -21.32177734375, -19.8388671875, -18.35595703125, -16.873046875, -15.39013671875, -13.9072265625, -12.42431640625, -10.94140625, -9.45849609375, -7.9755859375, -6.49267578125, -5.009765625, -3.52685546875, -2.0439453125, -0.56103515625, 0.921875, 2.40478515625, 3.8876953125, 5.37060546875, 6.853515625, 8.33642578125, 9.8193359375, 11.30224609375, 12.78515625, 14.26806640625, 15.7509765625, 17.23388671875, 18.716796875, 20.19970703125, 21.6826171875, 23.16552734375, 24.6484375, 26.13134765625, 27.6142578125, 29.09716796875, 30.580078125, 32.06298828125, 33.5458984375, 35.02880859375, 36.51171875, 37.99462890625, 39.4775390625, 40.96044921875, 42.443359375, 43.92626953125, 45.4091796875, 46.89208984375, 48.375]}, "gradients/decoder.transformer.h.16.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 3.0, 3.0, 6.0, 3.0, 7.0, 12.0, 14.0, 12.0, 20.0, 37.0, 51.0, 63.0, 83.0, 119.0, 158.0, 211.0, 292.0, 434.0, 609.0, 861.0, 1400.0, 2559.0, 5935.0, 20107.0, 103970.0, 708431.0, 160341.0, 27381.0, 7521.0, 3071.0, 1634.0, 967.0, 639.0, 450.0, 305.0, 238.0, 157.0, 144.0, 79.0, 65.0, 47.0, 33.0, 26.0, 24.0, 16.0, 10.0, 8.0, 4.0, 4.0, 2.0, 0.0, 1.0, 2.0, 1.0, 3.0], "bins": [-20.03125, -19.452392578125, -18.87353515625, -18.294677734375, -17.7158203125, -17.136962890625, -16.55810546875, -15.979248046875, -15.400390625, -14.821533203125, -14.24267578125, -13.663818359375, -13.0849609375, -12.506103515625, -11.92724609375, -11.348388671875, -10.76953125, -10.190673828125, -9.61181640625, -9.032958984375, -8.4541015625, -7.875244140625, -7.29638671875, -6.717529296875, -6.138671875, -5.559814453125, -4.98095703125, -4.402099609375, -3.8232421875, -3.244384765625, -2.66552734375, -2.086669921875, -1.5078125, -0.928955078125, -0.35009765625, 0.228759765625, 0.8076171875, 1.386474609375, 1.96533203125, 2.544189453125, 3.123046875, 3.701904296875, 4.28076171875, 4.859619140625, 5.4384765625, 6.017333984375, 6.59619140625, 7.175048828125, 7.75390625, 8.332763671875, 8.91162109375, 9.490478515625, 10.0693359375, 10.648193359375, 11.22705078125, 11.805908203125, 12.384765625, 12.963623046875, 13.54248046875, 14.121337890625, 14.7001953125, 15.279052734375, 15.85791015625, 16.436767578125, 17.015625]}, "gradients/decoder.transformer.h.16.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 2.0, 1.0, 3.0, 4.0, 6.0, 5.0, 4.0, 7.0, 8.0, 6.0, 10.0, 10.0, 16.0, 18.0, 30.0, 13.0, 19.0, 34.0, 25.0, 26.0, 25.0, 32.0, 29.0, 35.0, 54.0, 47.0, 37.0, 1055.0, 42.0, 45.0, 45.0, 33.0, 42.0, 28.0, 31.0, 36.0, 25.0, 15.0, 19.0, 14.0, 17.0, 19.0, 9.0, 11.0, 13.0, 9.0, 4.0, 6.0, 3.0, 4.0, 4.0, 2.0, 3.0, 4.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-24.796875, -23.91943359375, -23.0419921875, -22.16455078125, -21.287109375, -20.40966796875, -19.5322265625, -18.65478515625, -17.77734375, -16.89990234375, -16.0224609375, -15.14501953125, -14.267578125, -13.39013671875, -12.5126953125, -11.63525390625, -10.7578125, -9.88037109375, -9.0029296875, -8.12548828125, -7.248046875, -6.37060546875, -5.4931640625, -4.61572265625, -3.73828125, -2.86083984375, -1.9833984375, -1.10595703125, -0.228515625, 0.64892578125, 1.5263671875, 2.40380859375, 3.28125, 4.15869140625, 5.0361328125, 5.91357421875, 6.791015625, 7.66845703125, 8.5458984375, 9.42333984375, 10.30078125, 11.17822265625, 12.0556640625, 12.93310546875, 13.810546875, 14.68798828125, 15.5654296875, 16.44287109375, 17.3203125, 18.19775390625, 19.0751953125, 19.95263671875, 20.830078125, 21.70751953125, 22.5849609375, 23.46240234375, 24.33984375, 25.21728515625, 26.0947265625, 26.97216796875, 27.849609375, 28.72705078125, 29.6044921875, 30.48193359375, 31.359375]}, "gradients/decoder.transformer.h.16.crossattention.c_attn.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 2.0, 1.0, 4.0, 4.0, 4.0, 6.0, 7.0, 20.0, 29.0, 29.0, 44.0, 60.0, 58.0, 102.0, 142.0, 179.0, 267.0, 378.0, 625.0, 940.0, 1476.0, 2473.0, 4506.0, 8457.0, 17865.0, 40836.0, 103329.0, 397564.0, 1297408.0, 128425.0, 48629.0, 20763.0, 9863.0, 5054.0, 2774.0, 1648.0, 1044.0, 617.0, 493.0, 297.0, 223.0, 147.0, 85.0, 81.0, 48.0, 39.0, 27.0, 17.0, 12.0, 13.0, 12.0, 2.0, 7.0, 3.0, 4.0, 1.0, 3.0, 1.0, 0.0, 0.0, 2.0], "bins": [-4.53515625, -4.38934326171875, -4.2435302734375, -4.09771728515625, -3.951904296875, -3.80609130859375, -3.6602783203125, -3.51446533203125, -3.36865234375, -3.22283935546875, -3.0770263671875, -2.93121337890625, -2.785400390625, -2.63958740234375, -2.4937744140625, -2.34796142578125, -2.2021484375, -2.05633544921875, -1.9105224609375, -1.76470947265625, -1.618896484375, -1.47308349609375, -1.3272705078125, -1.18145751953125, -1.03564453125, -0.88983154296875, -0.7440185546875, -0.59820556640625, -0.452392578125, -0.30657958984375, -0.1607666015625, -0.01495361328125, 0.130859375, 0.27667236328125, 0.4224853515625, 0.56829833984375, 0.714111328125, 0.85992431640625, 1.0057373046875, 1.15155029296875, 1.29736328125, 1.44317626953125, 1.5889892578125, 1.73480224609375, 1.880615234375, 2.02642822265625, 2.1722412109375, 2.31805419921875, 2.4638671875, 2.60968017578125, 2.7554931640625, 2.90130615234375, 3.047119140625, 3.19293212890625, 3.3387451171875, 3.48455810546875, 3.63037109375, 3.77618408203125, 3.9219970703125, 4.06781005859375, 4.213623046875, 4.35943603515625, 4.5052490234375, 4.65106201171875, 4.796875]}, "gradients/decoder.transformer.h.16.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 4.0, 1.0, 4.0, 1.0, 3.0, 2.0, 3.0, 3.0, 6.0, 7.0, 2.0, 7.0, 6.0, 10.0, 9.0, 17.0, 38.0, 43.0, 39.0, 89.0, 225.0, 212.0, 80.0, 51.0, 30.0, 26.0, 17.0, 19.0, 12.0, 8.0, 11.0, 3.0, 7.0, 2.0, 2.0, 3.0, 6.0, 5.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0], "bins": [-0.9453125, -0.9182662963867188, -0.8912200927734375, -0.8641738891601562, -0.837127685546875, -0.8100814819335938, -0.7830352783203125, -0.7559890747070312, -0.72894287109375, -0.7018966674804688, -0.6748504638671875, -0.6478042602539062, -0.620758056640625, -0.5937118530273438, -0.5666656494140625, -0.5396194458007812, -0.5125732421875, -0.48552703857421875, -0.4584808349609375, -0.43143463134765625, -0.404388427734375, -0.37734222412109375, -0.3502960205078125, -0.32324981689453125, -0.29620361328125, -0.26915740966796875, -0.2421112060546875, -0.21506500244140625, -0.188018798828125, -0.16097259521484375, -0.1339263916015625, -0.10688018798828125, -0.079833984375, -0.05278778076171875, -0.0257415771484375, 0.00130462646484375, 0.028350830078125, 0.05539703369140625, 0.0824432373046875, 0.10948944091796875, 0.13653564453125, 0.16358184814453125, 0.1906280517578125, 0.21767425537109375, 0.244720458984375, 0.27176666259765625, 0.2988128662109375, 0.32585906982421875, 0.3529052734375, 0.37995147705078125, 0.4069976806640625, 0.43404388427734375, 0.461090087890625, 0.48813629150390625, 0.5151824951171875, 0.5422286987304688, 0.56927490234375, 0.5963211059570312, 0.6233673095703125, 0.6504135131835938, 0.677459716796875, 0.7045059204101562, 0.7315521240234375, 0.7585983276367188, 0.78564453125]}, "gradients/decoder.transformer.h.16.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 4.0, 3.0, 4.0, 4.0, 4.0, 3.0, 5.0, 6.0, 8.0, 10.0, 13.0, 18.0, 24.0, 23.0, 54.0, 73.0, 157.0, 21887.0, 1025849.0, 157.0, 61.0, 65.0, 38.0, 20.0, 13.0, 11.0, 4.0, 5.0, 6.0, 4.0, 5.0, 7.0, 3.0, 4.0, 2.0, 2.0, 1.0, 3.0, 3.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-25.265625, -24.3955078125, -23.525390625, -22.6552734375, -21.78515625, -20.9150390625, -20.044921875, -19.1748046875, -18.3046875, -17.4345703125, -16.564453125, -15.6943359375, -14.82421875, -13.9541015625, -13.083984375, -12.2138671875, -11.34375, -10.4736328125, -9.603515625, -8.7333984375, -7.86328125, -6.9931640625, -6.123046875, -5.2529296875, -4.3828125, -3.5126953125, -2.642578125, -1.7724609375, -0.90234375, -0.0322265625, 0.837890625, 1.7080078125, 2.578125, 3.4482421875, 4.318359375, 5.1884765625, 6.05859375, 6.9287109375, 7.798828125, 8.6689453125, 9.5390625, 10.4091796875, 11.279296875, 12.1494140625, 13.01953125, 13.8896484375, 14.759765625, 15.6298828125, 16.5, 17.3701171875, 18.240234375, 19.1103515625, 19.98046875, 20.8505859375, 21.720703125, 22.5908203125, 23.4609375, 24.3310546875, 25.201171875, 26.0712890625, 26.94140625, 27.8115234375, 28.681640625, 29.5517578125, 30.421875]}, "gradients/decoder.transformer.h.16.ln_cross_attn.weight": {"_type": "histogram", "values": [2.0, 613.0, 405.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.7463743686676025, -0.3703273832798004, 0.005719602108001709, 0.38176655769348145, 0.757813572883606, 1.1338605880737305, 1.5099074840545654, 1.8859546184539795, 2.2620015144348145, 2.6380484104156494, 3.0140955448150635, 3.3901424407958984, 3.7661895751953125, 4.142236709594727, 4.518283367156982, 4.8943305015563965, 5.270377159118652, 5.646424293518066, 6.022470951080322, 6.398518085479736, 6.77456521987915, 7.150611877441406, 7.52665901184082, 7.902706146240234, 8.278753280639648, 8.654800415039062, 9.030847549438477, 9.40689468383789, 9.782940864562988, 10.158987998962402, 10.535035133361816, 10.91108226776123, 11.287129402160645, 11.663176536560059, 12.039223670959473, 12.41526985168457, 12.791316986083984, 13.167364120483398, 13.543411254882812, 13.919458389282227, 14.29550552368164, 14.671552658081055, 15.047599792480469, 15.423646926879883, 15.79969310760498, 16.17574119567871, 16.551788330078125, 16.927833557128906, 17.30388069152832, 17.679927825927734, 18.05597496032715, 18.432022094726562, 18.808069229125977, 19.18411636352539, 19.560161590576172, 19.93621063232422, 20.312257766723633, 20.688304901123047, 21.06435203552246, 21.440399169921875, 21.81644630432129, 22.192493438720703, 22.568538665771484, 22.94458770751953, 23.320632934570312]}, "gradients/decoder.transformer.h.16.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 2.0, 3.0, 1.0, 2.0, 2.0, 1.0, 4.0, 10.0, 8.0, 8.0, 13.0, 15.0, 19.0, 17.0, 28.0, 19.0, 19.0, 26.0, 27.0, 38.0, 41.0, 34.0, 28.0, 47.0, 29.0, 33.0, 31.0, 47.0, 50.0, 49.0, 33.0, 38.0, 20.0, 37.0, 31.0, 26.0, 28.0, 12.0, 18.0, 21.0, 17.0, 16.0, 13.0, 11.0, 10.0, 7.0, 5.0, 3.0, 8.0, 2.0, 2.0, 6.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0], "bins": [-1.2013076543807983, -1.1610857248306274, -1.120863914489746, -1.0806419849395752, -1.0404200553894043, -1.0001981258392334, -0.9599762558937073, -0.9197543859481812, -0.8795324563980103, -0.8393105268478394, -0.7990886569023132, -0.7588667869567871, -0.7186448574066162, -0.6784229278564453, -0.6382010579109192, -0.5979791879653931, -0.5577572584152222, -0.5175353288650513, -0.47731345891952515, -0.43709155917167664, -0.3968696594238281, -0.3566477596759796, -0.3164258599281311, -0.2762039601802826, -0.23598206043243408, -0.19576016068458557, -0.15553826093673706, -0.11531636118888855, -0.07509446144104004, -0.03487256169319153, 0.005349338054656982, 0.04557123780250549, 0.08579325675964355, 0.12601515650749207, 0.16623705625534058, 0.2064589560031891, 0.2466808557510376, 0.2869027554988861, 0.3271246552467346, 0.36734655499458313, 0.40756845474243164, 0.44779035449028015, 0.48801225423812866, 0.5282341241836548, 0.5684560537338257, 0.6086779832839966, 0.6488998532295227, 0.6891217231750488, 0.7293436527252197, 0.7695655822753906, 0.8097874522209167, 0.8500093221664429, 0.8902312517166138, 0.9304531812667847, 0.9706750512123108, 1.010896921157837, 1.0511188507080078, 1.0913407802581787, 1.1315627098083496, 1.171784520149231, 1.2120064496994019, 1.2522283792495728, 1.292450189590454, 1.332672119140625, 1.372894048690796]}, "gradients/decoder.transformer.h.16.attn.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 0.0, 2.0, 4.0, 2.0, 4.0, 0.0, 2.0, 1.0, 2.0, 6.0, 6.0, 8.0, 10.0, 15.0, 21.0, 15.0, 21.0, 27.0, 16.0, 28.0, 34.0, 35.0, 35.0, 58.0, 39.0, 55.0, 45.0, 50.0, 61.0, 46.0, 42.0, 36.0, 43.0, 40.0, 27.0, 26.0, 23.0, 23.0, 16.0, 13.0, 14.0, 10.0, 10.0, 12.0, 9.0, 7.0, 2.0, 4.0, 3.0, 2.0, 5.0, 1.0, 2.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0], "bins": [-46.53125, -45.04833984375, -43.5654296875, -42.08251953125, -40.599609375, -39.11669921875, -37.6337890625, -36.15087890625, -34.66796875, -33.18505859375, -31.7021484375, -30.21923828125, -28.736328125, -27.25341796875, -25.7705078125, -24.28759765625, -22.8046875, -21.32177734375, -19.8388671875, -18.35595703125, -16.873046875, -15.39013671875, -13.9072265625, -12.42431640625, -10.94140625, -9.45849609375, -7.9755859375, -6.49267578125, -5.009765625, -3.52685546875, -2.0439453125, -0.56103515625, 0.921875, 2.40478515625, 3.8876953125, 5.37060546875, 6.853515625, 8.33642578125, 9.8193359375, 11.30224609375, 12.78515625, 14.26806640625, 15.7509765625, 17.23388671875, 18.716796875, 20.19970703125, 21.6826171875, 23.16552734375, 24.6484375, 26.13134765625, 27.6142578125, 29.09716796875, 30.580078125, 32.06298828125, 33.5458984375, 35.02880859375, 36.51171875, 37.99462890625, 39.4775390625, 40.96044921875, 42.443359375, 43.92626953125, 45.4091796875, 46.89208984375, 48.375]}, "gradients/decoder.transformer.h.16.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 3.0, 3.0, 1.0, 4.0, 7.0, 10.0, 9.0, 6.0, 16.0, 29.0, 30.0, 55.0, 41.0, 92.0, 119.0, 183.0, 275.0, 431.0, 654.0, 1131.0, 1862.0, 3487.0, 6686.0, 14390.0, 47747.0, 875243.0, 63597.0, 16082.0, 7277.0, 3760.0, 2076.0, 1167.0, 695.0, 478.0, 276.0, 194.0, 141.0, 97.0, 55.0, 42.0, 28.0, 20.0, 14.0, 13.0, 15.0, 11.0, 7.0, 3.0, 2.0, 3.0, 2.0, 1.0, 1.0, 1.0, 0.0, 1.0], "bins": [-113.4375, -110.0576171875, -106.677734375, -103.2978515625, -99.91796875, -96.5380859375, -93.158203125, -89.7783203125, -86.3984375, -83.0185546875, -79.638671875, -76.2587890625, -72.87890625, -69.4990234375, -66.119140625, -62.7392578125, -59.359375, -55.9794921875, -52.599609375, -49.2197265625, -45.83984375, -42.4599609375, -39.080078125, -35.7001953125, -32.3203125, -28.9404296875, -25.560546875, -22.1806640625, -18.80078125, -15.4208984375, -12.041015625, -8.6611328125, -5.28125, -1.9013671875, 1.478515625, 4.8583984375, 8.23828125, 11.6181640625, 14.998046875, 18.3779296875, 21.7578125, 25.1376953125, 28.517578125, 31.8974609375, 35.27734375, 38.6572265625, 42.037109375, 45.4169921875, 48.796875, 52.1767578125, 55.556640625, 58.9365234375, 62.31640625, 65.6962890625, 69.076171875, 72.4560546875, 75.8359375, 79.2158203125, 82.595703125, 85.9755859375, 89.35546875, 92.7353515625, 96.115234375, 99.4951171875, 102.875]}, "gradients/decoder.transformer.h.16.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 2.0, 1.0, 1.0, 0.0, 2.0, 1.0, 4.0, 3.0, 8.0, 1.0, 8.0, 4.0, 4.0, 8.0, 12.0, 15.0, 14.0, 25.0, 20.0, 28.0, 23.0, 39.0, 28.0, 48.0, 43.0, 55.0, 43.0, 69.0, 96.0, 1815.0, 137.0, 66.0, 66.0, 47.0, 36.0, 38.0, 43.0, 24.0, 30.0, 21.0, 23.0, 17.0, 14.0, 13.0, 12.0, 10.0, 17.0, 6.0, 5.0, 6.0, 2.0, 4.0, 2.0, 1.0, 4.0, 1.0, 2.0, 1.0, 0.0, 2.0], "bins": [-154.75, -150.166015625, -145.58203125, -140.998046875, -136.4140625, -131.830078125, -127.24609375, -122.662109375, -118.078125, -113.494140625, -108.91015625, -104.326171875, -99.7421875, -95.158203125, -90.57421875, -85.990234375, -81.40625, -76.822265625, -72.23828125, -67.654296875, -63.0703125, -58.486328125, -53.90234375, -49.318359375, -44.734375, -40.150390625, -35.56640625, -30.982421875, -26.3984375, -21.814453125, -17.23046875, -12.646484375, -8.0625, -3.478515625, 1.10546875, 5.689453125, 10.2734375, 14.857421875, 19.44140625, 24.025390625, 28.609375, 33.193359375, 37.77734375, 42.361328125, 46.9453125, 51.529296875, 56.11328125, 60.697265625, 65.28125, 69.865234375, 74.44921875, 79.033203125, 83.6171875, 88.201171875, 92.78515625, 97.369140625, 101.953125, 106.537109375, 111.12109375, 115.705078125, 120.2890625, 124.873046875, 129.45703125, 134.041015625, 138.625]}, "gradients/decoder.transformer.h.16.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 4.0, 2.0, 2.0, 0.0, 3.0, 5.0, 3.0, 8.0, 12.0, 5.0, 15.0, 19.0, 28.0, 33.0, 43.0, 68.0, 108.0, 227.0, 629.0, 2419.0, 12444.0, 202910.0, 2902310.0, 19234.0, 3588.0, 897.0, 297.0, 124.0, 83.0, 47.0, 36.0, 26.0, 16.0, 12.0, 15.0, 11.0, 5.0, 5.0, 5.0, 8.0, 2.0, 2.0, 2.0, 0.0, 1.0, 4.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-447.5, -433.56640625, -419.6328125, -405.69921875, -391.765625, -377.83203125, -363.8984375, -349.96484375, -336.03125, -322.09765625, -308.1640625, -294.23046875, -280.296875, -266.36328125, -252.4296875, -238.49609375, -224.5625, -210.62890625, -196.6953125, -182.76171875, -168.828125, -154.89453125, -140.9609375, -127.02734375, -113.09375, -99.16015625, -85.2265625, -71.29296875, -57.359375, -43.42578125, -29.4921875, -15.55859375, -1.625, 12.30859375, 26.2421875, 40.17578125, 54.109375, 68.04296875, 81.9765625, 95.91015625, 109.84375, 123.77734375, 137.7109375, 151.64453125, 165.578125, 179.51171875, 193.4453125, 207.37890625, 221.3125, 235.24609375, 249.1796875, 263.11328125, 277.046875, 290.98046875, 304.9140625, 318.84765625, 332.78125, 346.71484375, 360.6484375, 374.58203125, 388.515625, 402.44921875, 416.3828125, 430.31640625, 444.25]}, "gradients/decoder.transformer.h.16.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 2.0, 17.0, 88.0, 455.0, 360.0, 76.0, 17.0, 4.0], "bins": [-2734.4541015625, -2688.796875, -2643.139404296875, -2597.482177734375, -2551.824951171875, -2506.16748046875, -2460.51025390625, -2414.85302734375, -2369.195556640625, -2323.538330078125, -2277.880859375, -2232.2236328125, -2186.56640625, -2140.908935546875, -2095.251708984375, -2049.594482421875, -2003.9371337890625, -1958.27978515625, -1912.62255859375, -1866.9652099609375, -1821.307861328125, -1775.650634765625, -1729.9932861328125, -1684.3359375, -1638.6787109375, -1593.0213623046875, -1547.3641357421875, -1501.706787109375, -1456.0494384765625, -1410.39208984375, -1364.73486328125, -1319.0775146484375, -1273.420166015625, -1227.7628173828125, -1182.1055908203125, -1136.4482421875, -1090.7908935546875, -1045.133544921875, -999.476318359375, -953.8189697265625, -908.1616821289062, -862.50439453125, -816.8470458984375, -771.1897583007812, -725.532470703125, -679.8751220703125, -634.2178344726562, -588.560546875, -542.9031982421875, -497.2458801269531, -451.58856201171875, -405.9312744140625, -360.2739562988281, -314.61663818359375, -268.9593505859375, -223.30203247070312, -177.6447296142578, -131.9874267578125, -86.33010864257812, -40.67280578613281, 4.9845123291015625, 50.64183044433594, 96.29911804199219, 141.95643615722656, 187.61375427246094]}, "gradients/decoder.transformer.h.16.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 4.0, 2.0, 2.0, 5.0, 7.0, 3.0, 8.0, 10.0, 13.0, 11.0, 10.0, 19.0, 24.0, 24.0, 34.0, 29.0, 37.0, 43.0, 35.0, 43.0, 55.0, 58.0, 46.0, 55.0, 44.0, 47.0, 41.0, 37.0, 34.0, 37.0, 37.0, 33.0, 29.0, 21.0, 17.0, 12.0, 6.0, 12.0, 13.0, 4.0, 6.0, 3.0, 6.0, 1.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-533.1029663085938, -517.2779541015625, -501.452880859375, -485.62786865234375, -469.8028259277344, -453.977783203125, -438.1527404785156, -422.32769775390625, -406.502685546875, -390.6776428222656, -374.85260009765625, -359.027587890625, -343.2025451660156, -327.37750244140625, -311.5524597167969, -295.7274169921875, -279.9023742675781, -264.07733154296875, -248.25230407714844, -232.42726135253906, -216.60223388671875, -200.77719116210938, -184.9521484375, -169.12710571289062, -153.3020782470703, -137.47703552246094, -121.65200805664062, -105.82696533203125, -90.0019302368164, -74.17689514160156, -58.35185241699219, -42.526817321777344, -26.701751708984375, -10.876714706420898, 4.948322296142578, 20.773361206054688, 36.59839630126953, 52.423431396484375, 68.24847412109375, 84.0735092163086, 99.89854431152344, 115.72357940673828, 131.54861450195312, 147.3736572265625, 163.19869995117188, 179.0237274169922, 194.84877014160156, 210.67379760742188, 226.49884033203125, 242.32388305664062, 258.14892578125, 273.97393798828125, 289.7989807128906, 305.6240234375, 321.4490661621094, 337.27410888671875, 353.09912109375, 368.9241638183594, 384.74920654296875, 400.57421875, 416.3992614746094, 432.22430419921875, 448.0493469238281, 463.8743896484375, 479.6994323730469]}, "gradients/decoder.transformer.h.15.mlp.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 0.0, 2.0, 5.0, 3.0, 3.0, 1.0, 1.0, 2.0, 8.0, 5.0, 7.0, 12.0, 16.0, 16.0, 18.0, 18.0, 21.0, 14.0, 21.0, 27.0, 42.0, 46.0, 41.0, 43.0, 40.0, 57.0, 48.0, 56.0, 52.0, 39.0, 47.0, 39.0, 33.0, 24.0, 26.0, 29.0, 28.0, 20.0, 22.0, 13.0, 12.0, 10.0, 10.0, 7.0, 5.0, 9.0, 3.0, 6.0, 3.0, 2.0, 3.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-48.15625, -46.59716796875, -45.0380859375, -43.47900390625, -41.919921875, -40.36083984375, -38.8017578125, -37.24267578125, -35.68359375, -34.12451171875, -32.5654296875, -31.00634765625, -29.447265625, -27.88818359375, -26.3291015625, -24.77001953125, -23.2109375, -21.65185546875, -20.0927734375, -18.53369140625, -16.974609375, -15.41552734375, -13.8564453125, -12.29736328125, -10.73828125, -9.17919921875, -7.6201171875, -6.06103515625, -4.501953125, -2.94287109375, -1.3837890625, 0.17529296875, 1.734375, 3.29345703125, 4.8525390625, 6.41162109375, 7.970703125, 9.52978515625, 11.0888671875, 12.64794921875, 14.20703125, 15.76611328125, 17.3251953125, 18.88427734375, 20.443359375, 22.00244140625, 23.5615234375, 25.12060546875, 26.6796875, 28.23876953125, 29.7978515625, 31.35693359375, 32.916015625, 34.47509765625, 36.0341796875, 37.59326171875, 39.15234375, 40.71142578125, 42.2705078125, 43.82958984375, 45.388671875, 46.94775390625, 48.5068359375, 50.06591796875, 51.625]}, "gradients/decoder.transformer.h.15.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 1.0, 3.0, 2.0, 1.0, 5.0, 3.0, 8.0, 5.0, 10.0, 13.0, 23.0, 19.0, 43.0, 56.0, 78.0, 92.0, 175.0, 281.0, 431.0, 687.0, 1123.0, 1962.0, 3420.0, 6595.0, 14468.0, 51786.0, 1587943.0, 2426542.0, 66099.0, 15745.0, 7358.0, 3899.0, 2123.0, 1204.0, 772.0, 455.0, 296.0, 185.0, 114.0, 84.0, 57.0, 30.0, 25.0, 25.0, 6.0, 11.0, 14.0, 5.0, 5.0, 2.0, 1.0, 2.0, 3.0, 3.0, 1.0, 0.0, 1.0], "bins": [-156.625, -152.025390625, -147.42578125, -142.826171875, -138.2265625, -133.626953125, -129.02734375, -124.427734375, -119.828125, -115.228515625, -110.62890625, -106.029296875, -101.4296875, -96.830078125, -92.23046875, -87.630859375, -83.03125, -78.431640625, -73.83203125, -69.232421875, -64.6328125, -60.033203125, -55.43359375, -50.833984375, -46.234375, -41.634765625, -37.03515625, -32.435546875, -27.8359375, -23.236328125, -18.63671875, -14.037109375, -9.4375, -4.837890625, -0.23828125, 4.361328125, 8.9609375, 13.560546875, 18.16015625, 22.759765625, 27.359375, 31.958984375, 36.55859375, 41.158203125, 45.7578125, 50.357421875, 54.95703125, 59.556640625, 64.15625, 68.755859375, 73.35546875, 77.955078125, 82.5546875, 87.154296875, 91.75390625, 96.353515625, 100.953125, 105.552734375, 110.15234375, 114.751953125, 119.3515625, 123.951171875, 128.55078125, 133.150390625, 137.75]}, "gradients/decoder.transformer.h.15.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 4.0, 5.0, 1.0, 11.0, 16.0, 20.0, 34.0, 36.0, 71.0, 140.0, 261.0, 704.0, 1349.0, 808.0, 284.0, 137.0, 80.0, 40.0, 29.0, 19.0, 6.0, 9.0, 6.0, 3.0, 4.0, 5.0, 5.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-259.25, -253.16015625, -247.0703125, -240.98046875, -234.890625, -228.80078125, -222.7109375, -216.62109375, -210.53125, -204.44140625, -198.3515625, -192.26171875, -186.171875, -180.08203125, -173.9921875, -167.90234375, -161.8125, -155.72265625, -149.6328125, -143.54296875, -137.453125, -131.36328125, -125.2734375, -119.18359375, -113.09375, -107.00390625, -100.9140625, -94.82421875, -88.734375, -82.64453125, -76.5546875, -70.46484375, -64.375, -58.28515625, -52.1953125, -46.10546875, -40.015625, -33.92578125, -27.8359375, -21.74609375, -15.65625, -9.56640625, -3.4765625, 2.61328125, 8.703125, 14.79296875, 20.8828125, 26.97265625, 33.0625, 39.15234375, 45.2421875, 51.33203125, 57.421875, 63.51171875, 69.6015625, 75.69140625, 81.78125, 87.87109375, 93.9609375, 100.05078125, 106.140625, 112.23046875, 118.3203125, 124.41015625, 130.5]}, "gradients/decoder.transformer.h.15.mlp.c_fc.weight": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 0.0, 1.0, 3.0, 1.0, 4.0, 4.0, 8.0, 7.0, 9.0, 10.0, 12.0, 14.0, 21.0, 15.0, 28.0, 30.0, 40.0, 51.0, 83.0, 104.0, 151.0, 280.0, 516.0, 1159.0, 3071.0, 9541.0, 40129.0, 1737747.0, 2343235.0, 42357.0, 9685.0, 3232.0, 1247.0, 560.0, 297.0, 184.0, 122.0, 75.0, 58.0, 46.0, 38.0, 26.0, 25.0, 16.0, 14.0, 14.0, 11.0, 5.0, 4.0, 1.0, 2.0, 0.0, 2.0, 0.0, 1.0, 2.0, 0.0, 2.0, 0.0, 0.0, 1.0], "bins": [-319.0, -308.71484375, -298.4296875, -288.14453125, -277.859375, -267.57421875, -257.2890625, -247.00390625, -236.71875, -226.43359375, -216.1484375, -205.86328125, -195.578125, -185.29296875, -175.0078125, -164.72265625, -154.4375, -144.15234375, -133.8671875, -123.58203125, -113.296875, -103.01171875, -92.7265625, -82.44140625, -72.15625, -61.87109375, -51.5859375, -41.30078125, -31.015625, -20.73046875, -10.4453125, -0.16015625, 10.125, 20.41015625, 30.6953125, 40.98046875, 51.265625, 61.55078125, 71.8359375, 82.12109375, 92.40625, 102.69140625, 112.9765625, 123.26171875, 133.546875, 143.83203125, 154.1171875, 164.40234375, 174.6875, 184.97265625, 195.2578125, 205.54296875, 215.828125, 226.11328125, 236.3984375, 246.68359375, 256.96875, 267.25390625, 277.5390625, 287.82421875, 298.109375, 308.39453125, 318.6796875, 328.96484375, 339.25]}, "gradients/decoder.transformer.h.15.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 2.0, 5.0, 8.0, 8.0, 8.0, 11.0, 28.0, 26.0, 40.0, 71.0, 130.0, 194.0, 169.0, 111.0, 60.0, 44.0, 28.0, 17.0, 13.0, 19.0, 5.0, 1.0, 8.0, 1.0, 2.0, 1.0, 3.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-419.8833312988281, -399.8243103027344, -379.7652587890625, -359.70623779296875, -339.647216796875, -319.58819580078125, -299.5291442871094, -279.4701232910156, -259.41107177734375, -239.35203552246094, -219.2930145263672, -199.23397827148438, -179.17495727539062, -159.1159210205078, -139.056884765625, -118.99786376953125, -98.9388427734375, -78.87981414794922, -58.82078170776367, -38.761749267578125, -18.702720642089844, 1.3563079833984375, 21.41534423828125, 41.474365234375, 61.53340148925781, 81.5924301147461, 101.65145874023438, 121.71049499511719, 141.76953125, 161.82855224609375, 181.88758850097656, 201.9466094970703, 222.005615234375, 242.0646514892578, 262.1236877441406, 282.1827087402344, 302.2417297363281, 322.30078125, 342.35980224609375, 362.4188232421875, 382.47784423828125, 402.536865234375, 422.5959167480469, 442.6549377441406, 462.7139587402344, 482.77301025390625, 502.83203125, 522.8910522460938, 542.9500732421875, 563.0090942382812, 583.068115234375, 603.127197265625, 623.1862182617188, 643.2452392578125, 663.3042602539062, 683.36328125, 703.42236328125, 723.4813842773438, 743.5404052734375, 763.5994873046875, 783.6585083007812, 803.717529296875, 823.7765502929688, 843.8355712890625, 863.8945922851562]}, "gradients/decoder.transformer.h.15.ln_2.bias": {"_type": "histogram", "values": [4.0, 1.0, 1.0, 2.0, 2.0, 5.0, 2.0, 3.0, 6.0, 9.0, 10.0, 9.0, 9.0, 17.0, 11.0, 19.0, 18.0, 16.0, 18.0, 25.0, 22.0, 28.0, 34.0, 31.0, 35.0, 21.0, 35.0, 36.0, 37.0, 30.0, 46.0, 33.0, 24.0, 38.0, 35.0, 36.0, 28.0, 25.0, 32.0, 24.0, 29.0, 21.0, 23.0, 19.0, 17.0, 13.0, 20.0, 6.0, 10.0, 7.0, 11.0, 8.0, 4.0, 7.0, 1.0, 2.0, 2.0, 2.0, 1.0, 0.0, 0.0, 3.0, 0.0, 1.0], "bins": [-271.6313781738281, -262.5910339355469, -253.55068969726562, -244.51036071777344, -235.4700164794922, -226.42967224121094, -217.38934326171875, -208.3489990234375, -199.30865478515625, -190.268310546875, -181.22796630859375, -172.18763732910156, -163.1472930908203, -154.10694885253906, -145.06661987304688, -136.02627563476562, -126.98593139648438, -117.94558715820312, -108.9052505493164, -99.86491394042969, -90.82456970214844, -81.78422546386719, -72.74388885498047, -63.703548431396484, -54.6632080078125, -45.622867584228516, -36.58252716064453, -27.542186737060547, -18.501846313476562, -9.461505889892578, -0.42116546630859375, 8.61917495727539, 17.65948486328125, 26.699825286865234, 35.74016571044922, 44.7805061340332, 53.82084655761719, 62.86118698120117, 71.90152740478516, 80.94186401367188, 89.98220825195312, 99.02255249023438, 108.0628890991211, 117.10322570800781, 126.14356994628906, 135.1839141845703, 144.2242431640625, 153.26458740234375, 162.304931640625, 171.34527587890625, 180.3856201171875, 189.4259490966797, 198.46629333496094, 207.5066375732422, 216.54696655273438, 225.58731079101562, 234.62765502929688, 243.66799926757812, 252.70834350585938, 261.7486877441406, 270.78900146484375, 279.829345703125, 288.86968994140625, 297.9100341796875, 306.95037841796875]}, "gradients/decoder.transformer.h.15.crossattention.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 2.0, 2.0, 5.0, 2.0, 3.0, 4.0, 2.0, 5.0, 2.0, 5.0, 8.0, 19.0, 18.0, 17.0, 17.0, 25.0, 28.0, 23.0, 30.0, 31.0, 43.0, 43.0, 40.0, 53.0, 44.0, 55.0, 56.0, 50.0, 54.0, 38.0, 38.0, 27.0, 42.0, 27.0, 21.0, 14.0, 27.0, 18.0, 17.0, 9.0, 9.0, 11.0, 7.0, 6.0, 4.0, 5.0, 4.0, 4.0, 3.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-44.5, -42.8916015625, -41.283203125, -39.6748046875, -38.06640625, -36.4580078125, -34.849609375, -33.2412109375, -31.6328125, -30.0244140625, -28.416015625, -26.8076171875, -25.19921875, -23.5908203125, -21.982421875, -20.3740234375, -18.765625, -17.1572265625, -15.548828125, -13.9404296875, -12.33203125, -10.7236328125, -9.115234375, -7.5068359375, -5.8984375, -4.2900390625, -2.681640625, -1.0732421875, 0.53515625, 2.1435546875, 3.751953125, 5.3603515625, 6.96875, 8.5771484375, 10.185546875, 11.7939453125, 13.40234375, 15.0107421875, 16.619140625, 18.2275390625, 19.8359375, 21.4443359375, 23.052734375, 24.6611328125, 26.26953125, 27.8779296875, 29.486328125, 31.0947265625, 32.703125, 34.3115234375, 35.919921875, 37.5283203125, 39.13671875, 40.7451171875, 42.353515625, 43.9619140625, 45.5703125, 47.1787109375, 48.787109375, 50.3955078125, 52.00390625, 53.6123046875, 55.220703125, 56.8291015625, 58.4375]}, "gradients/decoder.transformer.h.15.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 4.0, 3.0, 5.0, 2.0, 4.0, 6.0, 19.0, 15.0, 22.0, 17.0, 31.0, 64.0, 81.0, 99.0, 169.0, 233.0, 455.0, 660.0, 1221.0, 2402.0, 5301.0, 13121.0, 40953.0, 174907.0, 594000.0, 155189.0, 37205.0, 12269.0, 4826.0, 2257.0, 1173.0, 684.0, 376.0, 247.0, 170.0, 106.0, 80.0, 54.0, 34.0, 32.0, 20.0, 16.0, 11.0, 10.0, 1.0, 3.0, 2.0, 4.0, 4.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-13.71875, -13.28271484375, -12.8466796875, -12.41064453125, -11.974609375, -11.53857421875, -11.1025390625, -10.66650390625, -10.23046875, -9.79443359375, -9.3583984375, -8.92236328125, -8.486328125, -8.05029296875, -7.6142578125, -7.17822265625, -6.7421875, -6.30615234375, -5.8701171875, -5.43408203125, -4.998046875, -4.56201171875, -4.1259765625, -3.68994140625, -3.25390625, -2.81787109375, -2.3818359375, -1.94580078125, -1.509765625, -1.07373046875, -0.6376953125, -0.20166015625, 0.234375, 0.67041015625, 1.1064453125, 1.54248046875, 1.978515625, 2.41455078125, 2.8505859375, 3.28662109375, 3.72265625, 4.15869140625, 4.5947265625, 5.03076171875, 5.466796875, 5.90283203125, 6.3388671875, 6.77490234375, 7.2109375, 7.64697265625, 8.0830078125, 8.51904296875, 8.955078125, 9.39111328125, 9.8271484375, 10.26318359375, 10.69921875, 11.13525390625, 11.5712890625, 12.00732421875, 12.443359375, 12.87939453125, 13.3154296875, 13.75146484375, 14.1875]}, "gradients/decoder.transformer.h.15.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 3.0, 0.0, 3.0, 4.0, 2.0, 3.0, 13.0, 12.0, 8.0, 10.0, 12.0, 16.0, 21.0, 24.0, 25.0, 22.0, 32.0, 34.0, 32.0, 30.0, 38.0, 31.0, 32.0, 42.0, 42.0, 1058.0, 40.0, 37.0, 29.0, 38.0, 30.0, 41.0, 27.0, 28.0, 32.0, 30.0, 23.0, 12.0, 23.0, 16.0, 16.0, 8.0, 11.0, 9.0, 8.0, 6.0, 5.0, 7.0, 6.0, 3.0, 2.0, 2.0, 1.0, 3.0, 1.0, 0.0, 2.0], "bins": [-30.71875, -29.78564453125, -28.8525390625, -27.91943359375, -26.986328125, -26.05322265625, -25.1201171875, -24.18701171875, -23.25390625, -22.32080078125, -21.3876953125, -20.45458984375, -19.521484375, -18.58837890625, -17.6552734375, -16.72216796875, -15.7890625, -14.85595703125, -13.9228515625, -12.98974609375, -12.056640625, -11.12353515625, -10.1904296875, -9.25732421875, -8.32421875, -7.39111328125, -6.4580078125, -5.52490234375, -4.591796875, -3.65869140625, -2.7255859375, -1.79248046875, -0.859375, 0.07373046875, 1.0068359375, 1.93994140625, 2.873046875, 3.80615234375, 4.7392578125, 5.67236328125, 6.60546875, 7.53857421875, 8.4716796875, 9.40478515625, 10.337890625, 11.27099609375, 12.2041015625, 13.13720703125, 14.0703125, 15.00341796875, 15.9365234375, 16.86962890625, 17.802734375, 18.73583984375, 19.6689453125, 20.60205078125, 21.53515625, 22.46826171875, 23.4013671875, 24.33447265625, 25.267578125, 26.20068359375, 27.1337890625, 28.06689453125, 29.0]}, "gradients/decoder.transformer.h.15.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 2.0, 1.0, 0.0, 4.0, 2.0, 9.0, 8.0, 14.0, 23.0, 34.0, 35.0, 63.0, 90.0, 116.0, 193.0, 249.0, 338.0, 540.0, 773.0, 1236.0, 1899.0, 3018.0, 4752.0, 7770.0, 12928.0, 22229.0, 39487.0, 72856.0, 141735.0, 1104096.0, 406431.0, 124952.0, 64349.0, 35431.0, 20247.0, 11916.0, 7137.0, 4200.0, 2733.0, 1762.0, 1130.0, 741.0, 504.0, 360.0, 236.0, 150.0, 119.0, 64.0, 54.0, 44.0, 27.0, 16.0, 9.0, 19.0, 6.0, 1.0, 4.0, 2.0, 2.0, 0.0, 3.0], "bins": [-3.216796875, -3.119049072265625, -3.02130126953125, -2.923553466796875, -2.8258056640625, -2.728057861328125, -2.63031005859375, -2.532562255859375, -2.434814453125, -2.337066650390625, -2.23931884765625, -2.141571044921875, -2.0438232421875, -1.946075439453125, -1.84832763671875, -1.750579833984375, -1.65283203125, -1.555084228515625, -1.45733642578125, -1.359588623046875, -1.2618408203125, -1.164093017578125, -1.06634521484375, -0.968597412109375, -0.870849609375, -0.773101806640625, -0.67535400390625, -0.577606201171875, -0.4798583984375, -0.382110595703125, -0.28436279296875, -0.186614990234375, -0.0888671875, 0.008880615234375, 0.10662841796875, 0.204376220703125, 0.3021240234375, 0.399871826171875, 0.49761962890625, 0.595367431640625, 0.693115234375, 0.790863037109375, 0.88861083984375, 0.986358642578125, 1.0841064453125, 1.181854248046875, 1.27960205078125, 1.377349853515625, 1.47509765625, 1.572845458984375, 1.67059326171875, 1.768341064453125, 1.8660888671875, 1.963836669921875, 2.06158447265625, 2.159332275390625, 2.257080078125, 2.354827880859375, 2.45257568359375, 2.550323486328125, 2.6480712890625, 2.745819091796875, 2.84356689453125, 2.941314697265625, 3.0390625]}, "gradients/decoder.transformer.h.15.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 3.0, 1.0, 1.0, 0.0, 2.0, 6.0, 6.0, 4.0, 6.0, 5.0, 3.0, 10.0, 14.0, 14.0, 24.0, 25.0, 18.0, 29.0, 29.0, 43.0, 62.0, 83.0, 163.0, 114.0, 62.0, 60.0, 44.0, 29.0, 26.0, 23.0, 21.0, 12.0, 9.0, 9.0, 9.0, 11.0, 7.0, 5.0, 0.0, 3.0, 5.0, 4.0, 3.0, 1.0, 3.0, 2.0, 3.0, 0.0, 0.0, 2.0], "bins": [-0.8662109375, -0.8425979614257812, -0.8189849853515625, -0.7953720092773438, -0.771759033203125, -0.7481460571289062, -0.7245330810546875, -0.7009201049804688, -0.67730712890625, -0.6536941528320312, -0.6300811767578125, -0.6064682006835938, -0.582855224609375, -0.5592422485351562, -0.5356292724609375, -0.5120162963867188, -0.4884033203125, -0.46479034423828125, -0.4411773681640625, -0.41756439208984375, -0.393951416015625, -0.37033843994140625, -0.3467254638671875, -0.32311248779296875, -0.29949951171875, -0.27588653564453125, -0.2522735595703125, -0.22866058349609375, -0.205047607421875, -0.18143463134765625, -0.1578216552734375, -0.13420867919921875, -0.110595703125, -0.08698272705078125, -0.0633697509765625, -0.03975677490234375, -0.016143798828125, 0.00746917724609375, 0.0310821533203125, 0.05469512939453125, 0.07830810546875, 0.10192108154296875, 0.1255340576171875, 0.14914703369140625, 0.172760009765625, 0.19637298583984375, 0.2199859619140625, 0.24359893798828125, 0.2672119140625, 0.29082489013671875, 0.3144378662109375, 0.33805084228515625, 0.361663818359375, 0.38527679443359375, 0.4088897705078125, 0.43250274658203125, 0.45611572265625, 0.47972869873046875, 0.5033416748046875, 0.5269546508789062, 0.550567626953125, 0.5741806030273438, 0.5977935791015625, 0.6214065551757812, 0.64501953125]}, "gradients/decoder.transformer.h.15.crossattention.q_attn.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 4.0, 2.0, 2.0, 2.0, 3.0, 6.0, 3.0, 1.0, 4.0, 7.0, 8.0, 11.0, 8.0, 8.0, 13.0, 15.0, 24.0, 23.0, 40.0, 45.0, 83.0, 101.0, 226.0, 1046862.0, 570.0, 156.0, 85.0, 57.0, 38.0, 23.0, 22.0, 25.0, 21.0, 15.0, 10.0, 8.0, 4.0, 5.0, 8.0, 2.0, 8.0, 2.0, 2.0, 0.0, 1.0, 4.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-21.03125, -20.2744140625, -19.517578125, -18.7607421875, -18.00390625, -17.2470703125, -16.490234375, -15.7333984375, -14.9765625, -14.2197265625, -13.462890625, -12.7060546875, -11.94921875, -11.1923828125, -10.435546875, -9.6787109375, -8.921875, -8.1650390625, -7.408203125, -6.6513671875, -5.89453125, -5.1376953125, -4.380859375, -3.6240234375, -2.8671875, -2.1103515625, -1.353515625, -0.5966796875, 0.16015625, 0.9169921875, 1.673828125, 2.4306640625, 3.1875, 3.9443359375, 4.701171875, 5.4580078125, 6.21484375, 6.9716796875, 7.728515625, 8.4853515625, 9.2421875, 9.9990234375, 10.755859375, 11.5126953125, 12.26953125, 13.0263671875, 13.783203125, 14.5400390625, 15.296875, 16.0537109375, 16.810546875, 17.5673828125, 18.32421875, 19.0810546875, 19.837890625, 20.5947265625, 21.3515625, 22.1083984375, 22.865234375, 23.6220703125, 24.37890625, 25.1357421875, 25.892578125, 26.6494140625, 27.40625]}, "gradients/decoder.transformer.h.15.ln_cross_attn.weight": {"_type": "histogram", "values": [7.0, 1012.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.46293744444847107, -0.1248236894607544, 0.21329006552696228, 0.5514037609100342, 0.8895175457000732, 1.2276313304901123, 1.5657449960708618, 1.9038587808609009, 2.2419726848602295, 2.5800864696502686, 2.9182002544403076, 3.2563138008117676, 3.5944275856018066, 3.9325413703918457, 4.270655155181885, 4.608768939971924, 4.946882724761963, 5.284996509552002, 5.623110294342041, 5.96122407913208, 6.299337863922119, 6.637451648712158, 6.975564956665039, 7.313678741455078, 7.651792526245117, 7.989906311035156, 8.328020095825195, 8.666133880615234, 9.004247665405273, 9.342361450195312, 9.680475234985352, 10.01858901977539, 10.356701850891113, 10.694815635681152, 11.032929420471191, 11.37104320526123, 11.70915699005127, 12.047270774841309, 12.385384559631348, 12.723498344421387, 13.061612129211426, 13.399725914001465, 13.737839698791504, 14.075953483581543, 14.414067268371582, 14.752181053161621, 15.09029483795166, 15.4284086227417, 15.766521453857422, 16.10463523864746, 16.4427490234375, 16.78086280822754, 17.118976593017578, 17.457090377807617, 17.795204162597656, 18.133317947387695, 18.471431732177734, 18.809545516967773, 19.147659301757812, 19.48577308654785, 19.82388687133789, 20.16200065612793, 20.50011444091797, 20.838228225708008, 21.176342010498047]}, "gradients/decoder.transformer.h.15.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 4.0, 2.0, 3.0, 8.0, 6.0, 9.0, 20.0, 11.0, 10.0, 19.0, 21.0, 32.0, 47.0, 25.0, 30.0, 28.0, 42.0, 41.0, 37.0, 35.0, 42.0, 50.0, 39.0, 40.0, 38.0, 35.0, 39.0, 35.0, 23.0, 35.0, 20.0, 35.0, 24.0, 28.0, 17.0, 7.0, 20.0, 10.0, 9.0, 13.0, 6.0, 4.0, 7.0, 2.0, 4.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-1.9654772281646729, -1.9001182317733765, -1.83475923538208, -1.7694002389907837, -1.7040412425994873, -1.6386821269989014, -1.573323130607605, -1.5079641342163086, -1.4426051378250122, -1.3772461414337158, -1.3118871450424194, -1.246528148651123, -1.181169033050537, -1.1158101558685303, -1.0504510402679443, -0.985092043876648, -0.9197330474853516, -0.8543740510940552, -0.7890150547027588, -0.7236559987068176, -0.6582970023155212, -0.5929380059242249, -0.5275789499282837, -0.4622199535369873, -0.3968609571456909, -0.33150196075439453, -0.26614293456077576, -0.20078392326831818, -0.1354249119758606, -0.07006591558456421, -0.004706889390945435, 0.06065213680267334, 0.12601137161254883, 0.1913703829050064, 0.256729394197464, 0.32208842039108276, 0.38744741678237915, 0.45280641317367554, 0.5181654691696167, 0.5835244655609131, 0.6488834619522095, 0.7142424583435059, 0.7796014547348022, 0.8449605107307434, 0.9103195071220398, 0.9756785035133362, 1.0410375595092773, 1.1063965559005737, 1.1717555522918701, 1.2371145486831665, 1.302473545074463, 1.3678325414657593, 1.4331915378570557, 1.4985506534576416, 1.563909649848938, 1.6292686462402344, 1.6946276426315308, 1.7599866390228271, 1.8253456354141235, 1.89070463180542, 1.9560637474060059, 2.0214226245880127, 2.0867817401885986, 2.1521406173706055, 2.2174997329711914]}, "gradients/decoder.transformer.h.15.attn.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 2.0, 2.0, 5.0, 2.0, 4.0, 3.0, 2.0, 5.0, 2.0, 5.0, 8.0, 19.0, 18.0, 18.0, 16.0, 25.0, 28.0, 23.0, 31.0, 30.0, 43.0, 43.0, 40.0, 53.0, 44.0, 55.0, 56.0, 50.0, 54.0, 38.0, 39.0, 26.0, 42.0, 27.0, 21.0, 14.0, 28.0, 17.0, 17.0, 10.0, 8.0, 11.0, 7.0, 6.0, 4.0, 5.0, 4.0, 4.0, 3.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-44.46875, -42.86083984375, -41.2529296875, -39.64501953125, -38.037109375, -36.42919921875, -34.8212890625, -33.21337890625, -31.60546875, -29.99755859375, -28.3896484375, -26.78173828125, -25.173828125, -23.56591796875, -21.9580078125, -20.35009765625, -18.7421875, -17.13427734375, -15.5263671875, -13.91845703125, -12.310546875, -10.70263671875, -9.0947265625, -7.48681640625, -5.87890625, -4.27099609375, -2.6630859375, -1.05517578125, 0.552734375, 2.16064453125, 3.7685546875, 5.37646484375, 6.984375, 8.59228515625, 10.2001953125, 11.80810546875, 13.416015625, 15.02392578125, 16.6318359375, 18.23974609375, 19.84765625, 21.45556640625, 23.0634765625, 24.67138671875, 26.279296875, 27.88720703125, 29.4951171875, 31.10302734375, 32.7109375, 34.31884765625, 35.9267578125, 37.53466796875, 39.142578125, 40.75048828125, 42.3583984375, 43.96630859375, 45.57421875, 47.18212890625, 48.7900390625, 50.39794921875, 52.005859375, 53.61376953125, 55.2216796875, 56.82958984375, 58.4375]}, "gradients/decoder.transformer.h.15.attn.c_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 1.0, 2.0, 5.0, 4.0, 3.0, 10.0, 13.0, 18.0, 19.0, 34.0, 40.0, 67.0, 84.0, 151.0, 221.0, 281.0, 425.0, 727.0, 1150.0, 1793.0, 2818.0, 4758.0, 8208.0, 15881.0, 36256.0, 157698.0, 673274.0, 87195.0, 27122.0, 12634.0, 6956.0, 3958.0, 2414.0, 1526.0, 947.0, 621.0, 407.0, 263.0, 183.0, 126.0, 85.0, 52.0, 45.0, 32.0, 21.0, 15.0, 6.0, 7.0, 5.0, 2.0, 5.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-60.90625, -58.83056640625, -56.7548828125, -54.67919921875, -52.603515625, -50.52783203125, -48.4521484375, -46.37646484375, -44.30078125, -42.22509765625, -40.1494140625, -38.07373046875, -35.998046875, -33.92236328125, -31.8466796875, -29.77099609375, -27.6953125, -25.61962890625, -23.5439453125, -21.46826171875, -19.392578125, -17.31689453125, -15.2412109375, -13.16552734375, -11.08984375, -9.01416015625, -6.9384765625, -4.86279296875, -2.787109375, -0.71142578125, 1.3642578125, 3.43994140625, 5.515625, 7.59130859375, 9.6669921875, 11.74267578125, 13.818359375, 15.89404296875, 17.9697265625, 20.04541015625, 22.12109375, 24.19677734375, 26.2724609375, 28.34814453125, 30.423828125, 32.49951171875, 34.5751953125, 36.65087890625, 38.7265625, 40.80224609375, 42.8779296875, 44.95361328125, 47.029296875, 49.10498046875, 51.1806640625, 53.25634765625, 55.33203125, 57.40771484375, 59.4833984375, 61.55908203125, 63.634765625, 65.71044921875, 67.7861328125, 69.86181640625, 71.9375]}, "gradients/decoder.transformer.h.15.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 3.0, 1.0, 0.0, 1.0, 1.0, 4.0, 4.0, 8.0, 7.0, 12.0, 7.0, 11.0, 17.0, 13.0, 19.0, 28.0, 24.0, 32.0, 43.0, 43.0, 55.0, 61.0, 72.0, 99.0, 213.0, 1709.0, 107.0, 74.0, 76.0, 51.0, 50.0, 50.0, 26.0, 22.0, 30.0, 16.0, 18.0, 14.0, 14.0, 9.0, 6.0, 3.0, 2.0, 2.0, 4.0, 3.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-156.0, -150.0390625, -144.078125, -138.1171875, -132.15625, -126.1953125, -120.234375, -114.2734375, -108.3125, -102.3515625, -96.390625, -90.4296875, -84.46875, -78.5078125, -72.546875, -66.5859375, -60.625, -54.6640625, -48.703125, -42.7421875, -36.78125, -30.8203125, -24.859375, -18.8984375, -12.9375, -6.9765625, -1.015625, 4.9453125, 10.90625, 16.8671875, 22.828125, 28.7890625, 34.75, 40.7109375, 46.671875, 52.6328125, 58.59375, 64.5546875, 70.515625, 76.4765625, 82.4375, 88.3984375, 94.359375, 100.3203125, 106.28125, 112.2421875, 118.203125, 124.1640625, 130.125, 136.0859375, 142.046875, 148.0078125, 153.96875, 159.9296875, 165.890625, 171.8515625, 177.8125, 183.7734375, 189.734375, 195.6953125, 201.65625, 207.6171875, 213.578125, 219.5390625, 225.5]}, "gradients/decoder.transformer.h.15.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 3.0, 5.0, 3.0, 9.0, 10.0, 10.0, 24.0, 24.0, 30.0, 28.0, 53.0, 89.0, 119.0, 212.0, 368.0, 1117.0, 5204.0, 47509.0, 2996302.0, 84454.0, 7490.0, 1485.0, 482.0, 216.0, 136.0, 102.0, 59.0, 37.0, 34.0, 32.0, 18.0, 18.0, 8.0, 5.0, 7.0, 4.0, 2.0, 3.0, 1.0, 1.0, 3.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-486.5, -472.01953125, -457.5390625, -443.05859375, -428.578125, -414.09765625, -399.6171875, -385.13671875, -370.65625, -356.17578125, -341.6953125, -327.21484375, -312.734375, -298.25390625, -283.7734375, -269.29296875, -254.8125, -240.33203125, -225.8515625, -211.37109375, -196.890625, -182.41015625, -167.9296875, -153.44921875, -138.96875, -124.48828125, -110.0078125, -95.52734375, -81.046875, -66.56640625, -52.0859375, -37.60546875, -23.125, -8.64453125, 5.8359375, 20.31640625, 34.796875, 49.27734375, 63.7578125, 78.23828125, 92.71875, 107.19921875, 121.6796875, 136.16015625, 150.640625, 165.12109375, 179.6015625, 194.08203125, 208.5625, 223.04296875, 237.5234375, 252.00390625, 266.484375, 280.96484375, 295.4453125, 309.92578125, 324.40625, 338.88671875, 353.3671875, 367.84765625, 382.328125, 396.80859375, 411.2890625, 425.76953125, 440.25]}, "gradients/decoder.transformer.h.15.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 77.0, 905.0, 41.0], "bins": [-8190.32373046875, -8059.06640625, -7927.80908203125, -7796.5517578125, -7665.29443359375, -7534.037109375, -7402.77978515625, -7271.5224609375, -7140.26513671875, -7009.0078125, -6877.75048828125, -6746.4931640625, -6615.23583984375, -6483.978515625, -6352.72119140625, -6221.4638671875, -6090.20703125, -5958.94970703125, -5827.6923828125, -5696.43505859375, -5565.177734375, -5433.92041015625, -5302.6630859375, -5171.40576171875, -5040.1484375, -4908.89111328125, -4777.6337890625, -4646.37646484375, -4515.119140625, -4383.86181640625, -4252.6044921875, -4121.34716796875, -3990.08935546875, -3858.83203125, -3727.57470703125, -3596.3173828125, -3465.06005859375, -3333.802734375, -3202.54541015625, -3071.2880859375, -2940.031005859375, -2808.773681640625, -2677.516357421875, -2546.259033203125, -2415.001708984375, -2283.744384765625, -2152.4873046875, -2021.2298583984375, -1889.9725341796875, -1758.7152099609375, -1627.4578857421875, -1496.20068359375, -1364.943359375, -1233.68603515625, -1102.4287109375, -971.17138671875, -839.9140625, -708.65673828125, -577.3994140625, -446.14215087890625, -314.88482666015625, -183.62750244140625, -52.3702392578125, 78.8870849609375, 210.1444091796875]}, "gradients/decoder.transformer.h.15.ln_1.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 3.0, 1.0, 1.0, 1.0, 0.0, 3.0, 3.0, 5.0, 3.0, 9.0, 4.0, 12.0, 9.0, 15.0, 12.0, 14.0, 17.0, 18.0, 22.0, 31.0, 31.0, 33.0, 34.0, 37.0, 33.0, 35.0, 41.0, 32.0, 42.0, 28.0, 49.0, 37.0, 36.0, 31.0, 36.0, 30.0, 25.0, 38.0, 20.0, 22.0, 23.0, 24.0, 20.0, 10.0, 13.0, 14.0, 10.0, 8.0, 10.0, 6.0, 3.0, 8.0, 4.0, 8.0, 2.0, 0.0, 2.0, 0.0, 3.0, 0.0, 1.0], "bins": [-363.2686767578125, -351.8797912597656, -340.49090576171875, -329.1020202636719, -317.713134765625, -306.32421875, -294.9353332519531, -283.54644775390625, -272.1575622558594, -260.7686767578125, -249.37979125976562, -237.9908905029297, -226.6020050048828, -215.21311950683594, -203.82421875, -192.43533325195312, -181.04644775390625, -169.65756225585938, -158.2686767578125, -146.87977600097656, -135.4908905029297, -124.10200500488281, -112.7131118774414, -101.32421875, -89.93533325195312, -78.54644775390625, -67.15755462646484, -55.7686653137207, -44.37977600097656, -32.99088668823242, -21.60199737548828, -10.213104248046875, 1.17578125, 12.56467056274414, 23.95355987548828, 35.34244918823242, 46.73133850097656, 58.1202278137207, 69.50911712646484, 80.89801025390625, 92.28689575195312, 103.67578125, 115.0646743774414, 126.45356750488281, 137.8424530029297, 149.23133850097656, 160.6202392578125, 172.00912475585938, 183.39801025390625, 194.78689575195312, 206.17578125, 217.56468200683594, 228.9535675048828, 240.3424530029297, 251.73135375976562, 263.1202392578125, 274.5091247558594, 285.89801025390625, 297.2868957519531, 308.67578125, 320.064697265625, 331.4535827636719, 342.84246826171875, 354.2313537597656, 365.6202392578125]}, "gradients/decoder.transformer.h.14.mlp.c_proj.bias": {"_type": "histogram", "values": [2.0, 1.0, 3.0, 2.0, 1.0, 3.0, 5.0, 7.0, 2.0, 4.0, 5.0, 7.0, 10.0, 9.0, 10.0, 13.0, 25.0, 22.0, 27.0, 19.0, 28.0, 37.0, 31.0, 43.0, 28.0, 45.0, 58.0, 44.0, 41.0, 53.0, 56.0, 35.0, 42.0, 38.0, 33.0, 34.0, 30.0, 25.0, 14.0, 19.0, 10.0, 18.0, 10.0, 15.0, 11.0, 7.0, 9.0, 5.0, 6.0, 5.0, 5.0, 2.0, 3.0, 2.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-45.59375, -43.9951171875, -42.396484375, -40.7978515625, -39.19921875, -37.6005859375, -36.001953125, -34.4033203125, -32.8046875, -31.2060546875, -29.607421875, -28.0087890625, -26.41015625, -24.8115234375, -23.212890625, -21.6142578125, -20.015625, -18.4169921875, -16.818359375, -15.2197265625, -13.62109375, -12.0224609375, -10.423828125, -8.8251953125, -7.2265625, -5.6279296875, -4.029296875, -2.4306640625, -0.83203125, 0.7666015625, 2.365234375, 3.9638671875, 5.5625, 7.1611328125, 8.759765625, 10.3583984375, 11.95703125, 13.5556640625, 15.154296875, 16.7529296875, 18.3515625, 19.9501953125, 21.548828125, 23.1474609375, 24.74609375, 26.3447265625, 27.943359375, 29.5419921875, 31.140625, 32.7392578125, 34.337890625, 35.9365234375, 37.53515625, 39.1337890625, 40.732421875, 42.3310546875, 43.9296875, 45.5283203125, 47.126953125, 48.7255859375, 50.32421875, 51.9228515625, 53.521484375, 55.1201171875, 56.71875]}, "gradients/decoder.transformer.h.14.mlp.c_proj.weight": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 2.0, 1.0, 3.0, 4.0, 4.0, 5.0, 3.0, 6.0, 8.0, 8.0, 8.0, 15.0, 14.0, 23.0, 30.0, 53.0, 72.0, 120.0, 219.0, 344.0, 604.0, 1127.0, 2278.0, 5163.0, 13063.0, 71325.0, 3426351.0, 633047.0, 24821.0, 8179.0, 3495.0, 1745.0, 838.0, 523.0, 312.0, 148.0, 107.0, 57.0, 42.0, 32.0, 20.0, 19.0, 5.0, 11.0, 5.0, 8.0, 8.0, 5.0, 5.0, 3.0, 4.0, 3.0, 0.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-179.25, -173.228515625, -167.20703125, -161.185546875, -155.1640625, -149.142578125, -143.12109375, -137.099609375, -131.078125, -125.056640625, -119.03515625, -113.013671875, -106.9921875, -100.970703125, -94.94921875, -88.927734375, -82.90625, -76.884765625, -70.86328125, -64.841796875, -58.8203125, -52.798828125, -46.77734375, -40.755859375, -34.734375, -28.712890625, -22.69140625, -16.669921875, -10.6484375, -4.626953125, 1.39453125, 7.416015625, 13.4375, 19.458984375, 25.48046875, 31.501953125, 37.5234375, 43.544921875, 49.56640625, 55.587890625, 61.609375, 67.630859375, 73.65234375, 79.673828125, 85.6953125, 91.716796875, 97.73828125, 103.759765625, 109.78125, 115.802734375, 121.82421875, 127.845703125, 133.8671875, 139.888671875, 145.91015625, 151.931640625, 157.953125, 163.974609375, 169.99609375, 176.017578125, 182.0390625, 188.060546875, 194.08203125, 200.103515625, 206.125]}, "gradients/decoder.transformer.h.14.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 4.0, 1.0, 6.0, 7.0, 10.0, 17.0, 19.0, 34.0, 71.0, 142.0, 346.0, 885.0, 1442.0, 585.0, 260.0, 97.0, 56.0, 36.0, 24.0, 16.0, 9.0, 8.0, 3.0, 3.0, 1.0, 0.0, 2.0, 1.0, 0.0, 1.0, 0.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-210.0, -203.509765625, -197.01953125, -190.529296875, -184.0390625, -177.548828125, -171.05859375, -164.568359375, -158.078125, -151.587890625, -145.09765625, -138.607421875, -132.1171875, -125.626953125, -119.13671875, -112.646484375, -106.15625, -99.666015625, -93.17578125, -86.685546875, -80.1953125, -73.705078125, -67.21484375, -60.724609375, -54.234375, -47.744140625, -41.25390625, -34.763671875, -28.2734375, -21.783203125, -15.29296875, -8.802734375, -2.3125, 4.177734375, 10.66796875, 17.158203125, 23.6484375, 30.138671875, 36.62890625, 43.119140625, 49.609375, 56.099609375, 62.58984375, 69.080078125, 75.5703125, 82.060546875, 88.55078125, 95.041015625, 101.53125, 108.021484375, 114.51171875, 121.001953125, 127.4921875, 133.982421875, 140.47265625, 146.962890625, 153.453125, 159.943359375, 166.43359375, 172.923828125, 179.4140625, 185.904296875, 192.39453125, 198.884765625, 205.375]}, "gradients/decoder.transformer.h.14.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 3.0, 5.0, 0.0, 2.0, 4.0, 7.0, 5.0, 14.0, 12.0, 24.0, 33.0, 44.0, 59.0, 80.0, 113.0, 188.0, 280.0, 592.0, 1375.0, 3984.0, 17229.0, 271074.0, 3845174.0, 42543.0, 7278.0, 2158.0, 851.0, 393.0, 272.0, 146.0, 98.0, 72.0, 43.0, 37.0, 18.0, 27.0, 14.0, 15.0, 9.0, 5.0, 3.0, 2.0, 4.0, 2.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-391.5, -378.546875, -365.59375, -352.640625, -339.6875, -326.734375, -313.78125, -300.828125, -287.875, -274.921875, -261.96875, -249.015625, -236.0625, -223.109375, -210.15625, -197.203125, -184.25, -171.296875, -158.34375, -145.390625, -132.4375, -119.484375, -106.53125, -93.578125, -80.625, -67.671875, -54.71875, -41.765625, -28.8125, -15.859375, -2.90625, 10.046875, 23.0, 35.953125, 48.90625, 61.859375, 74.8125, 87.765625, 100.71875, 113.671875, 126.625, 139.578125, 152.53125, 165.484375, 178.4375, 191.390625, 204.34375, 217.296875, 230.25, 243.203125, 256.15625, 269.109375, 282.0625, 295.015625, 307.96875, 320.921875, 333.875, 346.828125, 359.78125, 372.734375, 385.6875, 398.640625, 411.59375, 424.546875, 437.5]}, "gradients/decoder.transformer.h.14.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 2.0, 3.0, 2.0, 4.0, 5.0, 3.0, 5.0, 8.0, 16.0, 19.0, 16.0, 36.0, 64.0, 75.0, 125.0, 143.0, 120.0, 117.0, 73.0, 54.0, 38.0, 21.0, 16.0, 12.0, 4.0, 10.0, 7.0, 2.0, 5.0, 0.0, 2.0, 5.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-459.1439208984375, -443.13702392578125, -427.130126953125, -411.1232604980469, -395.1163635253906, -379.1094665527344, -363.10260009765625, -347.095703125, -331.08880615234375, -315.0819091796875, -299.07501220703125, -283.0681457519531, -267.0612487792969, -251.05435180664062, -235.04747009277344, -219.04058837890625, -203.03369140625, -187.02679443359375, -171.01991271972656, -155.01303100585938, -139.00613403320312, -122.9992446899414, -106.99235534667969, -90.98546600341797, -74.97857666015625, -58.97168731689453, -42.96479797363281, -26.957908630371094, -10.951019287109375, 5.055870056152344, 21.062759399414062, 37.06964874267578, 53.07647705078125, 69.08336639404297, 85.09025573730469, 101.0971450805664, 117.10403442382812, 133.11093139648438, 149.11781311035156, 165.12469482421875, 181.131591796875, 197.13848876953125, 213.14537048339844, 229.15225219726562, 245.15914916992188, 261.1660461425781, 277.17291259765625, 293.1798095703125, 309.18670654296875, 325.193603515625, 341.20050048828125, 357.2073669433594, 373.2142639160156, 389.2211608886719, 405.22802734375, 421.23492431640625, 437.2418212890625, 453.24871826171875, 469.255615234375, 485.2624816894531, 501.2693786621094, 517.2762451171875, 533.2831420898438, 549.2900390625, 565.2969360351562]}, "gradients/decoder.transformer.h.14.ln_2.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 5.0, 0.0, 2.0, 3.0, 4.0, 12.0, 8.0, 12.0, 5.0, 14.0, 13.0, 19.0, 22.0, 23.0, 26.0, 25.0, 32.0, 27.0, 33.0, 37.0, 42.0, 55.0, 40.0, 49.0, 47.0, 49.0, 50.0, 37.0, 41.0, 39.0, 33.0, 32.0, 33.0, 25.0, 21.0, 19.0, 17.0, 10.0, 9.0, 17.0, 8.0, 7.0, 5.0, 2.0, 3.0, 3.0, 1.0, 0.0, 1.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-350.869384765625, -339.45782470703125, -328.0462951660156, -316.6347351074219, -305.22320556640625, -293.8116455078125, -282.40008544921875, -270.9885559082031, -259.5770263671875, -248.1654815673828, -236.75393676757812, -225.34237670898438, -213.93084716796875, -202.519287109375, -191.1077423095703, -179.69619750976562, -168.28463745117188, -156.8730926513672, -145.4615478515625, -134.04998779296875, -122.6384506225586, -111.2269058227539, -99.81535339355469, -88.40380859375, -76.99226379394531, -65.58071899414062, -54.16917037963867, -42.75762176513672, -31.34607696533203, -19.934532165527344, -8.522979736328125, 2.8885650634765625, 14.30010986328125, 25.71165657043457, 37.12320327758789, 48.534751892089844, 59.94629669189453, 71.35784149169922, 82.76939392089844, 94.18093872070312, 105.59248352050781, 117.0040283203125, 128.4155731201172, 139.82711791992188, 151.23867797851562, 162.65020751953125, 174.061767578125, 185.4733123779297, 196.88485717773438, 208.29640197753906, 219.70794677734375, 231.1195068359375, 242.53103637695312, 253.94259643554688, 265.3541259765625, 276.76568603515625, 288.17724609375, 299.58880615234375, 311.0003356933594, 322.4118957519531, 333.82342529296875, 345.2349853515625, 356.64654541015625, 368.0580749511719, 379.4696044921875]}, "gradients/decoder.transformer.h.14.crossattention.c_proj.bias": {"_type": "histogram", "values": [3.0, 2.0, 2.0, 3.0, 3.0, 3.0, 3.0, 4.0, 4.0, 3.0, 12.0, 18.0, 11.0, 11.0, 10.0, 14.0, 30.0, 25.0, 20.0, 37.0, 38.0, 44.0, 47.0, 48.0, 46.0, 47.0, 43.0, 54.0, 50.0, 41.0, 48.0, 40.0, 36.0, 29.0, 32.0, 19.0, 21.0, 23.0, 12.0, 14.0, 14.0, 15.0, 8.0, 7.0, 5.0, 8.0, 5.0, 1.0, 4.0, 2.0, 0.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-45.8125, -44.0888671875, -42.365234375, -40.6416015625, -38.91796875, -37.1943359375, -35.470703125, -33.7470703125, -32.0234375, -30.2998046875, -28.576171875, -26.8525390625, -25.12890625, -23.4052734375, -21.681640625, -19.9580078125, -18.234375, -16.5107421875, -14.787109375, -13.0634765625, -11.33984375, -9.6162109375, -7.892578125, -6.1689453125, -4.4453125, -2.7216796875, -0.998046875, 0.7255859375, 2.44921875, 4.1728515625, 5.896484375, 7.6201171875, 9.34375, 11.0673828125, 12.791015625, 14.5146484375, 16.23828125, 17.9619140625, 19.685546875, 21.4091796875, 23.1328125, 24.8564453125, 26.580078125, 28.3037109375, 30.02734375, 31.7509765625, 33.474609375, 35.1982421875, 36.921875, 38.6455078125, 40.369140625, 42.0927734375, 43.81640625, 45.5400390625, 47.263671875, 48.9873046875, 50.7109375, 52.4345703125, 54.158203125, 55.8818359375, 57.60546875, 59.3291015625, 61.052734375, 62.7763671875, 64.5]}, "gradients/decoder.transformer.h.14.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 0.0, 6.0, 7.0, 18.0, 23.0, 22.0, 28.0, 37.0, 40.0, 64.0, 95.0, 141.0, 167.0, 276.0, 422.0, 702.0, 1003.0, 1743.0, 3440.0, 7990.0, 23805.0, 96224.0, 541140.0, 288118.0, 55234.0, 15476.0, 5761.0, 2565.0, 1458.0, 795.0, 538.0, 384.0, 217.0, 161.0, 161.0, 102.0, 61.0, 45.0, 26.0, 21.0, 15.0, 9.0, 10.0, 5.0, 1.0, 2.0, 3.0, 3.0, 2.0, 4.0, 0.0, 1.0], "bins": [-18.984375, -18.4390869140625, -17.893798828125, -17.3485107421875, -16.80322265625, -16.2579345703125, -15.712646484375, -15.1673583984375, -14.6220703125, -14.0767822265625, -13.531494140625, -12.9862060546875, -12.44091796875, -11.8956298828125, -11.350341796875, -10.8050537109375, -10.259765625, -9.7144775390625, -9.169189453125, -8.6239013671875, -8.07861328125, -7.5333251953125, -6.988037109375, -6.4427490234375, -5.8974609375, -5.3521728515625, -4.806884765625, -4.2615966796875, -3.71630859375, -3.1710205078125, -2.625732421875, -2.0804443359375, -1.53515625, -0.9898681640625, -0.444580078125, 0.1007080078125, 0.64599609375, 1.1912841796875, 1.736572265625, 2.2818603515625, 2.8271484375, 3.3724365234375, 3.917724609375, 4.4630126953125, 5.00830078125, 5.5535888671875, 6.098876953125, 6.6441650390625, 7.189453125, 7.7347412109375, 8.280029296875, 8.8253173828125, 9.37060546875, 9.9158935546875, 10.461181640625, 11.0064697265625, 11.5517578125, 12.0970458984375, 12.642333984375, 13.1876220703125, 13.73291015625, 14.2781982421875, 14.823486328125, 15.3687744140625, 15.9140625]}, "gradients/decoder.transformer.h.14.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 5.0, 0.0, 2.0, 2.0, 7.0, 9.0, 6.0, 7.0, 9.0, 16.0, 13.0, 15.0, 25.0, 25.0, 23.0, 34.0, 41.0, 27.0, 39.0, 40.0, 53.0, 49.0, 33.0, 1082.0, 41.0, 52.0, 43.0, 44.0, 30.0, 42.0, 26.0, 27.0, 22.0, 24.0, 20.0, 24.0, 13.0, 15.0, 16.0, 9.0, 10.0, 3.0, 3.0, 2.0, 8.0, 4.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-37.8125, -36.64111328125, -35.4697265625, -34.29833984375, -33.126953125, -31.95556640625, -30.7841796875, -29.61279296875, -28.44140625, -27.27001953125, -26.0986328125, -24.92724609375, -23.755859375, -22.58447265625, -21.4130859375, -20.24169921875, -19.0703125, -17.89892578125, -16.7275390625, -15.55615234375, -14.384765625, -13.21337890625, -12.0419921875, -10.87060546875, -9.69921875, -8.52783203125, -7.3564453125, -6.18505859375, -5.013671875, -3.84228515625, -2.6708984375, -1.49951171875, -0.328125, 0.84326171875, 2.0146484375, 3.18603515625, 4.357421875, 5.52880859375, 6.7001953125, 7.87158203125, 9.04296875, 10.21435546875, 11.3857421875, 12.55712890625, 13.728515625, 14.89990234375, 16.0712890625, 17.24267578125, 18.4140625, 19.58544921875, 20.7568359375, 21.92822265625, 23.099609375, 24.27099609375, 25.4423828125, 26.61376953125, 27.78515625, 28.95654296875, 30.1279296875, 31.29931640625, 32.470703125, 33.64208984375, 34.8134765625, 35.98486328125, 37.15625]}, "gradients/decoder.transformer.h.14.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 3.0, 3.0, 6.0, 6.0, 13.0, 14.0, 25.0, 29.0, 43.0, 71.0, 110.0, 147.0, 219.0, 349.0, 515.0, 850.0, 1478.0, 2437.0, 4478.0, 8231.0, 16373.0, 33618.0, 74385.0, 185385.0, 1358979.0, 238190.0, 90542.0, 39996.0, 18952.0, 9588.0, 5139.0, 2792.0, 1546.0, 965.0, 545.0, 399.0, 226.0, 169.0, 113.0, 58.0, 45.0, 28.0, 22.0, 19.0, 10.0, 10.0, 5.0, 7.0, 2.0, 6.0, 0.0, 1.0, 2.0, 1.0, 0.0, 1.0, 1.0], "bins": [-4.60546875, -4.45977783203125, -4.3140869140625, -4.16839599609375, -4.022705078125, -3.87701416015625, -3.7313232421875, -3.58563232421875, -3.43994140625, -3.29425048828125, -3.1485595703125, -3.00286865234375, -2.857177734375, -2.71148681640625, -2.5657958984375, -2.42010498046875, -2.2744140625, -2.12872314453125, -1.9830322265625, -1.83734130859375, -1.691650390625, -1.54595947265625, -1.4002685546875, -1.25457763671875, -1.10888671875, -0.96319580078125, -0.8175048828125, -0.67181396484375, -0.526123046875, -0.38043212890625, -0.2347412109375, -0.08905029296875, 0.056640625, 0.20233154296875, 0.3480224609375, 0.49371337890625, 0.639404296875, 0.78509521484375, 0.9307861328125, 1.07647705078125, 1.22216796875, 1.36785888671875, 1.5135498046875, 1.65924072265625, 1.804931640625, 1.95062255859375, 2.0963134765625, 2.24200439453125, 2.3876953125, 2.53338623046875, 2.6790771484375, 2.82476806640625, 2.970458984375, 3.11614990234375, 3.2618408203125, 3.40753173828125, 3.55322265625, 3.69891357421875, 3.8446044921875, 3.99029541015625, 4.135986328125, 4.28167724609375, 4.4273681640625, 4.57305908203125, 4.71875]}, "gradients/decoder.transformer.h.14.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 2.0, 0.0, 3.0, 1.0, 6.0, 8.0, 7.0, 5.0, 13.0, 12.0, 25.0, 30.0, 36.0, 57.0, 61.0, 85.0, 144.0, 188.0, 82.0, 54.0, 45.0, 42.0, 19.0, 21.0, 9.0, 13.0, 8.0, 9.0, 7.0, 4.0, 1.0, 4.0, 5.0, 1.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.98095703125, -0.9494552612304688, -0.9179534912109375, -0.8864517211914062, -0.854949951171875, -0.8234481811523438, -0.7919464111328125, -0.7604446411132812, -0.72894287109375, -0.6974411010742188, -0.6659393310546875, -0.6344375610351562, -0.602935791015625, -0.5714340209960938, -0.5399322509765625, -0.5084304809570312, -0.4769287109375, -0.44542694091796875, -0.4139251708984375, -0.38242340087890625, -0.350921630859375, -0.31941986083984375, -0.2879180908203125, -0.25641632080078125, -0.22491455078125, -0.19341278076171875, -0.1619110107421875, -0.13040924072265625, -0.098907470703125, -0.06740570068359375, -0.0359039306640625, -0.00440216064453125, 0.027099609375, 0.05860137939453125, 0.0901031494140625, 0.12160491943359375, 0.153106689453125, 0.18460845947265625, 0.2161102294921875, 0.24761199951171875, 0.27911376953125, 0.31061553955078125, 0.3421173095703125, 0.37361907958984375, 0.405120849609375, 0.43662261962890625, 0.4681243896484375, 0.49962615966796875, 0.5311279296875, 0.5626296997070312, 0.5941314697265625, 0.6256332397460938, 0.657135009765625, 0.6886367797851562, 0.7201385498046875, 0.7516403198242188, 0.78314208984375, 0.8146438598632812, 0.8461456298828125, 0.8776473999023438, 0.909149169921875, 0.9406509399414062, 0.9721527099609375, 1.0036544799804688, 1.03515625]}, "gradients/decoder.transformer.h.14.crossattention.q_attn.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 0.0, 5.0, 4.0, 2.0, 5.0, 6.0, 8.0, 9.0, 12.0, 11.0, 25.0, 19.0, 49.0, 58.0, 98.0, 199.0, 1046968.0, 618.0, 159.0, 95.0, 56.0, 40.0, 31.0, 26.0, 12.0, 13.0, 4.0, 6.0, 12.0, 4.0, 1.0, 2.0, 0.0, 2.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0], "bins": [-34.3125, -33.26611328125, -32.2197265625, -31.17333984375, -30.126953125, -29.08056640625, -28.0341796875, -26.98779296875, -25.94140625, -24.89501953125, -23.8486328125, -22.80224609375, -21.755859375, -20.70947265625, -19.6630859375, -18.61669921875, -17.5703125, -16.52392578125, -15.4775390625, -14.43115234375, -13.384765625, -12.33837890625, -11.2919921875, -10.24560546875, -9.19921875, -8.15283203125, -7.1064453125, -6.06005859375, -5.013671875, -3.96728515625, -2.9208984375, -1.87451171875, -0.828125, 0.21826171875, 1.2646484375, 2.31103515625, 3.357421875, 4.40380859375, 5.4501953125, 6.49658203125, 7.54296875, 8.58935546875, 9.6357421875, 10.68212890625, 11.728515625, 12.77490234375, 13.8212890625, 14.86767578125, 15.9140625, 16.96044921875, 18.0068359375, 19.05322265625, 20.099609375, 21.14599609375, 22.1923828125, 23.23876953125, 24.28515625, 25.33154296875, 26.3779296875, 27.42431640625, 28.470703125, 29.51708984375, 30.5634765625, 31.60986328125, 32.65625]}, "gradients/decoder.transformer.h.14.ln_cross_attn.weight": {"_type": "histogram", "values": [4.0, 1015.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.7138117551803589, -0.24255841970443726, 0.22869491577148438, 0.6999481916427612, 1.1712015867233276, 1.642454981803894, 2.113708019256592, 2.584961414337158, 3.0562148094177246, 3.527468204498291, 3.9987215995788574, 4.469974517822266, 4.941227912902832, 5.412481307983398, 5.883734703063965, 6.354988098144531, 6.826241493225098, 7.297494888305664, 7.7687482833862305, 8.240001678466797, 8.711255073547363, 9.18250846862793, 9.65376091003418, 10.125015258789062, 10.596268653869629, 11.067522048950195, 11.538775444030762, 12.010028839111328, 12.481282234191895, 12.952535629272461, 13.423788070678711, 13.895042419433594, 14.366294860839844, 14.83754825592041, 15.308801651000977, 15.780055046081543, 16.25130844116211, 16.72256088256836, 17.193815231323242, 17.665067672729492, 18.136322021484375, 18.607574462890625, 19.078828811645508, 19.550081253051758, 20.02133560180664, 20.49258804321289, 20.963842391967773, 21.435094833374023, 21.906349182128906, 22.377601623535156, 22.84885597229004, 23.32010841369629, 23.791362762451172, 24.262615203857422, 24.733869552612305, 25.205121994018555, 25.676374435424805, 26.147626876831055, 26.618881225585938, 27.090133666992188, 27.56138801574707, 28.03264045715332, 28.503894805908203, 28.975147247314453, 29.446401596069336]}, "gradients/decoder.transformer.h.14.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 4.0, 2.0, 5.0, 2.0, 9.0, 8.0, 7.0, 10.0, 19.0, 10.0, 17.0, 26.0, 19.0, 27.0, 30.0, 28.0, 39.0, 26.0, 33.0, 25.0, 46.0, 36.0, 51.0, 37.0, 39.0, 38.0, 39.0, 30.0, 31.0, 30.0, 40.0, 34.0, 29.0, 35.0, 24.0, 20.0, 16.0, 11.0, 5.0, 11.0, 20.0, 11.0, 9.0, 7.0, 5.0, 4.0, 2.0, 4.0, 2.0, 2.0, 2.0, 2.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-2.0322227478027344, -1.9668182134628296, -1.9014136791229248, -1.8360092639923096, -1.7706047296524048, -1.7052001953125, -1.6397957801818848, -1.57439124584198, -1.5089867115020752, -1.4435821771621704, -1.3781776428222656, -1.3127732276916504, -1.2473686933517456, -1.1819641590118408, -1.1165597438812256, -1.0511552095413208, -0.985750675201416, -0.9203461408615112, -0.8549416661262512, -0.7895371913909912, -0.7241326570510864, -0.6587281227111816, -0.5933236479759216, -0.5279191732406616, -0.46251463890075684, -0.39711013436317444, -0.33170562982559204, -0.26630112528800964, -0.20089662075042725, -0.13549211621284485, -0.07008761167526245, -0.004683107137680054, 0.060721397399902344, 0.12612590193748474, 0.19153040647506714, 0.25693491101264954, 0.32233941555023193, 0.38774392008781433, 0.45314842462539673, 0.5185528993606567, 0.5839574337005615, 0.6493619680404663, 0.7147664427757263, 0.7801709175109863, 0.8455754518508911, 0.9109799861907959, 0.9763844609260559, 1.041788935661316, 1.1071934700012207, 1.1725980043411255, 1.2380025386810303, 1.3034069538116455, 1.3688114881515503, 1.434216022491455, 1.4996204376220703, 1.565024971961975, 1.6304295063018799, 1.6958340406417847, 1.7612385749816895, 1.8266429901123047, 1.8920475244522095, 1.9574520587921143, 2.0228564739227295, 2.088261127471924, 2.153665542602539]}, "gradients/decoder.transformer.h.14.attn.c_proj.bias": {"_type": "histogram", "values": [3.0, 2.0, 2.0, 3.0, 3.0, 3.0, 3.0, 4.0, 4.0, 3.0, 12.0, 18.0, 11.0, 11.0, 10.0, 14.0, 30.0, 25.0, 20.0, 37.0, 39.0, 43.0, 47.0, 48.0, 46.0, 47.0, 43.0, 54.0, 49.0, 42.0, 48.0, 40.0, 36.0, 29.0, 32.0, 19.0, 21.0, 23.0, 12.0, 14.0, 14.0, 15.0, 8.0, 7.0, 5.0, 8.0, 5.0, 1.0, 4.0, 2.0, 0.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-45.8125, -44.0888671875, -42.365234375, -40.6416015625, -38.91796875, -37.1943359375, -35.470703125, -33.7470703125, -32.0234375, -30.2998046875, -28.576171875, -26.8525390625, -25.12890625, -23.4052734375, -21.681640625, -19.9580078125, -18.234375, -16.5107421875, -14.787109375, -13.0634765625, -11.33984375, -9.6162109375, -7.892578125, -6.1689453125, -4.4453125, -2.7216796875, -0.998046875, 0.7255859375, 2.44921875, 4.1728515625, 5.896484375, 7.6201171875, 9.34375, 11.0673828125, 12.791015625, 14.5146484375, 16.23828125, 17.9619140625, 19.685546875, 21.4091796875, 23.1328125, 24.8564453125, 26.580078125, 28.3037109375, 30.02734375, 31.7509765625, 33.474609375, 35.1982421875, 36.921875, 38.6455078125, 40.369140625, 42.0927734375, 43.81640625, 45.5400390625, 47.263671875, 48.9873046875, 50.7109375, 52.4345703125, 54.158203125, 55.8818359375, 57.60546875, 59.3291015625, 61.052734375, 62.7763671875, 64.5]}, "gradients/decoder.transformer.h.14.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 2.0, 2.0, 6.0, 4.0, 13.0, 11.0, 13.0, 28.0, 23.0, 26.0, 44.0, 66.0, 118.0, 166.0, 232.0, 396.0, 590.0, 930.0, 1596.0, 3101.0, 7632.0, 30350.0, 762044.0, 212255.0, 17192.0, 5630.0, 2487.0, 1250.0, 841.0, 491.0, 328.0, 235.0, 149.0, 101.0, 58.0, 43.0, 23.0, 21.0, 13.0, 15.0, 11.0, 13.0, 3.0, 4.0, 2.0, 2.0, 1.0, 3.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-96.0, -92.88671875, -89.7734375, -86.66015625, -83.546875, -80.43359375, -77.3203125, -74.20703125, -71.09375, -67.98046875, -64.8671875, -61.75390625, -58.640625, -55.52734375, -52.4140625, -49.30078125, -46.1875, -43.07421875, -39.9609375, -36.84765625, -33.734375, -30.62109375, -27.5078125, -24.39453125, -21.28125, -18.16796875, -15.0546875, -11.94140625, -8.828125, -5.71484375, -2.6015625, 0.51171875, 3.625, 6.73828125, 9.8515625, 12.96484375, 16.078125, 19.19140625, 22.3046875, 25.41796875, 28.53125, 31.64453125, 34.7578125, 37.87109375, 40.984375, 44.09765625, 47.2109375, 50.32421875, 53.4375, 56.55078125, 59.6640625, 62.77734375, 65.890625, 69.00390625, 72.1171875, 75.23046875, 78.34375, 81.45703125, 84.5703125, 87.68359375, 90.796875, 93.91015625, 97.0234375, 100.13671875, 103.25]}, "gradients/decoder.transformer.h.14.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 2.0, 3.0, 2.0, 6.0, 5.0, 3.0, 14.0, 11.0, 16.0, 14.0, 21.0, 24.0, 25.0, 25.0, 35.0, 44.0, 31.0, 60.0, 60.0, 63.0, 115.0, 1808.0, 150.0, 76.0, 68.0, 42.0, 46.0, 58.0, 39.0, 25.0, 29.0, 32.0, 24.0, 15.0, 15.0, 14.0, 10.0, 8.0, 6.0, 7.0, 5.0, 2.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 3.0], "bins": [-214.75, -208.88671875, -203.0234375, -197.16015625, -191.296875, -185.43359375, -179.5703125, -173.70703125, -167.84375, -161.98046875, -156.1171875, -150.25390625, -144.390625, -138.52734375, -132.6640625, -126.80078125, -120.9375, -115.07421875, -109.2109375, -103.34765625, -97.484375, -91.62109375, -85.7578125, -79.89453125, -74.03125, -68.16796875, -62.3046875, -56.44140625, -50.578125, -44.71484375, -38.8515625, -32.98828125, -27.125, -21.26171875, -15.3984375, -9.53515625, -3.671875, 2.19140625, 8.0546875, 13.91796875, 19.78125, 25.64453125, 31.5078125, 37.37109375, 43.234375, 49.09765625, 54.9609375, 60.82421875, 66.6875, 72.55078125, 78.4140625, 84.27734375, 90.140625, 96.00390625, 101.8671875, 107.73046875, 113.59375, 119.45703125, 125.3203125, 131.18359375, 137.046875, 142.91015625, 148.7734375, 154.63671875, 160.5]}, "gradients/decoder.transformer.h.14.attn.c_attn.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 2.0, 0.0, 2.0, 2.0, 5.0, 2.0, 1.0, 6.0, 6.0, 8.0, 15.0, 17.0, 21.0, 23.0, 32.0, 42.0, 70.0, 110.0, 178.0, 293.0, 651.0, 1800.0, 7665.0, 173711.0, 2942835.0, 13852.0, 2559.0, 857.0, 373.0, 194.0, 121.0, 69.0, 48.0, 34.0, 28.0, 22.0, 21.0, 17.0, 10.0, 4.0, 5.0, 1.0, 3.0, 2.0, 2.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-390.5, -376.1171875, -361.734375, -347.3515625, -332.96875, -318.5859375, -304.203125, -289.8203125, -275.4375, -261.0546875, -246.671875, -232.2890625, -217.90625, -203.5234375, -189.140625, -174.7578125, -160.375, -145.9921875, -131.609375, -117.2265625, -102.84375, -88.4609375, -74.078125, -59.6953125, -45.3125, -30.9296875, -16.546875, -2.1640625, 12.21875, 26.6015625, 40.984375, 55.3671875, 69.75, 84.1328125, 98.515625, 112.8984375, 127.28125, 141.6640625, 156.046875, 170.4296875, 184.8125, 199.1953125, 213.578125, 227.9609375, 242.34375, 256.7265625, 271.109375, 285.4921875, 299.875, 314.2578125, 328.640625, 343.0234375, 357.40625, 371.7890625, 386.171875, 400.5546875, 414.9375, 429.3203125, 443.703125, 458.0859375, 472.46875, 486.8515625, 501.234375, 515.6171875, 530.0]}, "gradients/decoder.transformer.h.14.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 11.0, 926.0, 83.0, 1.0], "bins": [-10219.6171875, -10053.9287109375, -9888.2392578125, -9722.55078125, -9556.8623046875, -9391.1728515625, -9225.484375, -9059.794921875, -8894.1064453125, -8728.41796875, -8562.728515625, -8397.0400390625, -8231.3515625, -8065.662109375, -7899.9736328125, -7734.28466796875, -7568.595703125, -7402.90673828125, -7237.21826171875, -7071.529296875, -6905.84033203125, -6740.1513671875, -6574.462890625, -6408.77392578125, -6243.08544921875, -6077.396484375, -5911.7080078125, -5746.01904296875, -5580.330078125, -5414.64111328125, -5248.95263671875, -5083.263671875, -4917.5751953125, -4751.88623046875, -4586.19775390625, -4420.5087890625, -4254.81982421875, -4089.131103515625, -3923.4423828125, -3757.75341796875, -3592.064453125, -3426.375732421875, -3260.686767578125, -3094.998046875, -2929.30908203125, -2763.620361328125, -2597.931640625, -2432.24267578125, -2266.553955078125, -2100.865234375, -1935.17626953125, -1769.487548828125, -1603.798583984375, -1438.10986328125, -1272.4210205078125, -1106.732177734375, -941.0432739257812, -775.3544311523438, -609.6656494140625, -443.976806640625, -278.2879638671875, -112.59912109375, 53.08966064453125, 218.77850341796875, 384.46734619140625]}, "gradients/decoder.transformer.h.14.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 3.0, 4.0, 4.0, 3.0, 3.0, 4.0, 9.0, 11.0, 13.0, 13.0, 16.0, 15.0, 20.0, 19.0, 18.0, 31.0, 25.0, 37.0, 38.0, 31.0, 45.0, 26.0, 49.0, 44.0, 45.0, 35.0, 31.0, 41.0, 34.0, 42.0, 32.0, 42.0, 24.0, 28.0, 25.0, 25.0, 20.0, 22.0, 16.0, 15.0, 12.0, 13.0, 4.0, 6.0, 5.0, 5.0, 2.0, 4.0, 4.0, 4.0, 2.0, 0.0, 2.0], "bins": [-561.266845703125, -545.1923217773438, -529.1177978515625, -513.0432739257812, -496.9687194824219, -480.8941955566406, -464.8196716308594, -448.7451477050781, -432.67059326171875, -416.5960693359375, -400.52154541015625, -384.447021484375, -368.3724670410156, -352.2979431152344, -336.2234191894531, -320.1488952636719, -304.0743713378906, -287.9998474121094, -271.9253234863281, -255.8507843017578, -239.7762451171875, -223.70172119140625, -207.627197265625, -191.55267333984375, -175.47813415527344, -159.4036102294922, -143.32907104492188, -127.25454711914062, -111.18001556396484, -95.10548400878906, -79.03096008300781, -62.95642852783203, -46.881927490234375, -30.807397842407227, -14.732868194580078, 1.3416595458984375, 17.41619110107422, 33.49072265625, 49.56524658203125, 65.63977813720703, 81.71430969238281, 97.7888412475586, 113.86337280273438, 129.93789672851562, 146.01242065429688, 162.0869598388672, 178.16148376464844, 194.23602294921875, 210.310546875, 226.38507080078125, 242.45960998535156, 258.53411865234375, 274.6086730957031, 290.6831970214844, 306.7577209472656, 322.8322448730469, 338.90679931640625, 354.9813232421875, 371.05584716796875, 387.13037109375, 403.2049255371094, 419.2794494628906, 435.3539733886719, 451.4284973144531, 467.5030212402344]}, "gradients/decoder.transformer.h.13.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 3.0, 1.0, 2.0, 2.0, 4.0, 6.0, 3.0, 9.0, 4.0, 11.0, 10.0, 10.0, 13.0, 15.0, 14.0, 26.0, 25.0, 26.0, 31.0, 37.0, 45.0, 47.0, 45.0, 54.0, 53.0, 46.0, 43.0, 51.0, 41.0, 45.0, 31.0, 40.0, 34.0, 31.0, 23.0, 25.0, 14.0, 13.0, 16.0, 18.0, 11.0, 7.0, 3.0, 12.0, 4.0, 5.0, 3.0, 2.0, 2.0, 0.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-52.5625, -50.71875, -48.875, -47.03125, -45.1875, -43.34375, -41.5, -39.65625, -37.8125, -35.96875, -34.125, -32.28125, -30.4375, -28.59375, -26.75, -24.90625, -23.0625, -21.21875, -19.375, -17.53125, -15.6875, -13.84375, -12.0, -10.15625, -8.3125, -6.46875, -4.625, -2.78125, -0.9375, 0.90625, 2.75, 4.59375, 6.4375, 8.28125, 10.125, 11.96875, 13.8125, 15.65625, 17.5, 19.34375, 21.1875, 23.03125, 24.875, 26.71875, 28.5625, 30.40625, 32.25, 34.09375, 35.9375, 37.78125, 39.625, 41.46875, 43.3125, 45.15625, 47.0, 48.84375, 50.6875, 52.53125, 54.375, 56.21875, 58.0625, 59.90625, 61.75, 63.59375, 65.4375]}, "gradients/decoder.transformer.h.13.mlp.c_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 1.0, 2.0, 4.0, 3.0, 3.0, 7.0, 8.0, 13.0, 13.0, 23.0, 22.0, 41.0, 40.0, 47.0, 51.0, 84.0, 131.0, 256.0, 446.0, 799.0, 1409.0, 2968.0, 6583.0, 18766.0, 162901.0, 3649898.0, 311209.0, 23042.0, 8152.0, 3569.0, 1651.0, 860.0, 459.0, 256.0, 155.0, 110.0, 77.0, 62.0, 39.0, 43.0, 27.0, 16.0, 17.0, 7.0, 7.0, 7.0, 5.0, 3.0, 4.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-174.0, -168.119140625, -162.23828125, -156.357421875, -150.4765625, -144.595703125, -138.71484375, -132.833984375, -126.953125, -121.072265625, -115.19140625, -109.310546875, -103.4296875, -97.548828125, -91.66796875, -85.787109375, -79.90625, -74.025390625, -68.14453125, -62.263671875, -56.3828125, -50.501953125, -44.62109375, -38.740234375, -32.859375, -26.978515625, -21.09765625, -15.216796875, -9.3359375, -3.455078125, 2.42578125, 8.306640625, 14.1875, 20.068359375, 25.94921875, 31.830078125, 37.7109375, 43.591796875, 49.47265625, 55.353515625, 61.234375, 67.115234375, 72.99609375, 78.876953125, 84.7578125, 90.638671875, 96.51953125, 102.400390625, 108.28125, 114.162109375, 120.04296875, 125.923828125, 131.8046875, 137.685546875, 143.56640625, 149.447265625, 155.328125, 161.208984375, 167.08984375, 172.970703125, 178.8515625, 184.732421875, 190.61328125, 196.494140625, 202.375]}, "gradients/decoder.transformer.h.13.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 2.0, 2.0, 2.0, 0.0, 1.0, 0.0, 1.0, 4.0, 7.0, 7.0, 8.0, 9.0, 14.0, 21.0, 22.0, 25.0, 42.0, 73.0, 97.0, 201.0, 384.0, 993.0, 1115.0, 511.0, 225.0, 124.0, 72.0, 34.0, 24.0, 25.0, 14.0, 10.0, 5.0, 3.0, 1.0, 2.0, 2.0, 0.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0], "bins": [-172.25, -167.048828125, -161.84765625, -156.646484375, -151.4453125, -146.244140625, -141.04296875, -135.841796875, -130.640625, -125.439453125, -120.23828125, -115.037109375, -109.8359375, -104.634765625, -99.43359375, -94.232421875, -89.03125, -83.830078125, -78.62890625, -73.427734375, -68.2265625, -63.025390625, -57.82421875, -52.623046875, -47.421875, -42.220703125, -37.01953125, -31.818359375, -26.6171875, -21.416015625, -16.21484375, -11.013671875, -5.8125, -0.611328125, 4.58984375, 9.791015625, 14.9921875, 20.193359375, 25.39453125, 30.595703125, 35.796875, 40.998046875, 46.19921875, 51.400390625, 56.6015625, 61.802734375, 67.00390625, 72.205078125, 77.40625, 82.607421875, 87.80859375, 93.009765625, 98.2109375, 103.412109375, 108.61328125, 113.814453125, 119.015625, 124.216796875, 129.41796875, 134.619140625, 139.8203125, 145.021484375, 150.22265625, 155.423828125, 160.625]}, "gradients/decoder.transformer.h.13.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 3.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 3.0, 3.0, 5.0, 8.0, 13.0, 10.0, 11.0, 13.0, 22.0, 25.0, 48.0, 60.0, 95.0, 118.0, 204.0, 338.0, 659.0, 1305.0, 3613.0, 12240.0, 75915.0, 3881795.0, 189803.0, 19416.0, 4908.0, 1661.0, 752.0, 405.0, 254.0, 164.0, 99.0, 82.0, 57.0, 49.0, 40.0, 15.0, 12.0, 18.0, 15.0, 8.0, 5.0, 5.0, 6.0, 4.0, 3.0, 0.0, 1.0, 1.0, 2.0, 3.0, 2.0, 0.0, 0.0, 1.0], "bins": [-378.0, -366.05859375, -354.1171875, -342.17578125, -330.234375, -318.29296875, -306.3515625, -294.41015625, -282.46875, -270.52734375, -258.5859375, -246.64453125, -234.703125, -222.76171875, -210.8203125, -198.87890625, -186.9375, -174.99609375, -163.0546875, -151.11328125, -139.171875, -127.23046875, -115.2890625, -103.34765625, -91.40625, -79.46484375, -67.5234375, -55.58203125, -43.640625, -31.69921875, -19.7578125, -7.81640625, 4.125, 16.06640625, 28.0078125, 39.94921875, 51.890625, 63.83203125, 75.7734375, 87.71484375, 99.65625, 111.59765625, 123.5390625, 135.48046875, 147.421875, 159.36328125, 171.3046875, 183.24609375, 195.1875, 207.12890625, 219.0703125, 231.01171875, 242.953125, 254.89453125, 266.8359375, 278.77734375, 290.71875, 302.66015625, 314.6015625, 326.54296875, 338.484375, 350.42578125, 362.3671875, 374.30859375, 386.25]}, "gradients/decoder.transformer.h.13.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 5.0, 10.0, 11.0, 17.0, 21.0, 48.0, 84.0, 145.0, 225.0, 209.0, 97.0, 58.0, 26.0, 28.0, 13.0, 8.0, 2.0, 3.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-889.161376953125, -860.4528198242188, -831.7442626953125, -803.03564453125, -774.3270874023438, -745.6185302734375, -716.9099731445312, -688.201416015625, -659.4927978515625, -630.7842407226562, -602.07568359375, -573.3670654296875, -544.6585083007812, -515.949951171875, -487.24139404296875, -458.5328369140625, -429.82427978515625, -401.11572265625, -372.4071350097656, -343.6985778808594, -314.989990234375, -286.28143310546875, -257.5728759765625, -228.8643035888672, -200.15573120117188, -171.44715881347656, -142.73858642578125, -114.030029296875, -85.32145690917969, -56.612884521484375, -27.904327392578125, 0.8042449951171875, 29.51275634765625, 58.2213249206543, 86.92989349365234, 115.63845825195312, 144.34703063964844, 173.05560302734375, 201.76416015625, 230.4727325439453, 259.1813049316406, 287.8898620605469, 316.59844970703125, 345.3070068359375, 374.01556396484375, 402.7241516113281, 431.4327087402344, 460.14129638671875, 488.849853515625, 517.5584106445312, 546.2669677734375, 574.9755859375, 603.6841430664062, 632.3927001953125, 661.1012573242188, 689.809814453125, 718.5184326171875, 747.2269897460938, 775.935546875, 804.6441650390625, 833.3527221679688, 862.061279296875, 890.7698364257812, 919.4783935546875, 948.1869506835938]}, "gradients/decoder.transformer.h.13.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 0.0, 1.0, 2.0, 4.0, 2.0, 2.0, 3.0, 9.0, 8.0, 6.0, 7.0, 16.0, 14.0, 16.0, 19.0, 23.0, 13.0, 28.0, 28.0, 35.0, 43.0, 47.0, 40.0, 32.0, 39.0, 45.0, 38.0, 48.0, 44.0, 40.0, 47.0, 33.0, 39.0, 34.0, 33.0, 26.0, 23.0, 26.0, 20.0, 16.0, 13.0, 17.0, 11.0, 4.0, 8.0, 1.0, 4.0, 4.0, 4.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-343.57000732421875, -331.92864990234375, -320.2873229980469, -308.6459655761719, -297.0046081542969, -285.3632507324219, -273.721923828125, -262.08056640625, -250.439208984375, -238.79786682128906, -227.15650939941406, -215.51516723632812, -203.87380981445312, -192.2324676513672, -180.59112548828125, -168.94976806640625, -157.3084259033203, -145.66708374023438, -134.02572631835938, -122.38438415527344, -110.74302673339844, -99.1016845703125, -87.46033477783203, -75.81898498535156, -64.1776351928711, -52.536285400390625, -40.894935607910156, -29.253589630126953, -17.612239837646484, -5.970890045166016, 5.6704559326171875, 17.311805725097656, 28.953155517578125, 40.594505310058594, 52.23585510253906, 63.877201080322266, 75.5185546875, 87.15989685058594, 98.8012466430664, 110.44259643554688, 122.08394622802734, 133.7252960205078, 145.36663818359375, 157.00799560546875, 168.6493377685547, 180.2906951904297, 191.93203735351562, 203.57339477539062, 215.21473693847656, 226.8560791015625, 238.4974365234375, 250.13877868652344, 261.7801208496094, 273.4214782714844, 285.0628356933594, 296.70416259765625, 308.34552001953125, 319.98687744140625, 331.6282043457031, 343.2695617675781, 354.9109191894531, 366.5522766113281, 378.193603515625, 389.8349609375, 401.476318359375]}, "gradients/decoder.transformer.h.13.crossattention.c_proj.bias": {"_type": "histogram", "values": [3.0, 1.0, 1.0, 2.0, 1.0, 2.0, 2.0, 3.0, 5.0, 7.0, 4.0, 7.0, 6.0, 12.0, 7.0, 14.0, 15.0, 21.0, 26.0, 17.0, 32.0, 26.0, 45.0, 35.0, 46.0, 34.0, 30.0, 54.0, 56.0, 39.0, 51.0, 38.0, 51.0, 41.0, 27.0, 40.0, 34.0, 29.0, 19.0, 20.0, 17.0, 18.0, 15.0, 15.0, 8.0, 11.0, 12.0, 4.0, 4.0, 3.0, 4.0, 3.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-51.8125, -50.0302734375, -48.248046875, -46.4658203125, -44.68359375, -42.9013671875, -41.119140625, -39.3369140625, -37.5546875, -35.7724609375, -33.990234375, -32.2080078125, -30.42578125, -28.6435546875, -26.861328125, -25.0791015625, -23.296875, -21.5146484375, -19.732421875, -17.9501953125, -16.16796875, -14.3857421875, -12.603515625, -10.8212890625, -9.0390625, -7.2568359375, -5.474609375, -3.6923828125, -1.91015625, -0.1279296875, 1.654296875, 3.4365234375, 5.21875, 7.0009765625, 8.783203125, 10.5654296875, 12.34765625, 14.1298828125, 15.912109375, 17.6943359375, 19.4765625, 21.2587890625, 23.041015625, 24.8232421875, 26.60546875, 28.3876953125, 30.169921875, 31.9521484375, 33.734375, 35.5166015625, 37.298828125, 39.0810546875, 40.86328125, 42.6455078125, 44.427734375, 46.2099609375, 47.9921875, 49.7744140625, 51.556640625, 53.3388671875, 55.12109375, 56.9033203125, 58.685546875, 60.4677734375, 62.25]}, "gradients/decoder.transformer.h.13.crossattention.c_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 2.0, 1.0, 5.0, 3.0, 5.0, 8.0, 7.0, 19.0, 19.0, 23.0, 28.0, 47.0, 60.0, 87.0, 145.0, 183.0, 210.0, 346.0, 513.0, 743.0, 1141.0, 1869.0, 3227.0, 5603.0, 11050.0, 23022.0, 52237.0, 138704.0, 418007.0, 242274.0, 81129.0, 33514.0, 15529.0, 7929.0, 4255.0, 2298.0, 1418.0, 922.0, 596.0, 410.0, 280.0, 148.0, 165.0, 113.0, 63.0, 61.0, 40.0, 36.0, 22.0, 14.0, 16.0, 9.0, 6.0, 2.0, 5.0, 2.0, 1.0, 1.0, 1.0, 1.0], "bins": [-10.890625, -10.5579833984375, -10.225341796875, -9.8927001953125, -9.56005859375, -9.2274169921875, -8.894775390625, -8.5621337890625, -8.2294921875, -7.8968505859375, -7.564208984375, -7.2315673828125, -6.89892578125, -6.5662841796875, -6.233642578125, -5.9010009765625, -5.568359375, -5.2357177734375, -4.903076171875, -4.5704345703125, -4.23779296875, -3.9051513671875, -3.572509765625, -3.2398681640625, -2.9072265625, -2.5745849609375, -2.241943359375, -1.9093017578125, -1.57666015625, -1.2440185546875, -0.911376953125, -0.5787353515625, -0.24609375, 0.0865478515625, 0.419189453125, 0.7518310546875, 1.08447265625, 1.4171142578125, 1.749755859375, 2.0823974609375, 2.4150390625, 2.7476806640625, 3.080322265625, 3.4129638671875, 3.74560546875, 4.0782470703125, 4.410888671875, 4.7435302734375, 5.076171875, 5.4088134765625, 5.741455078125, 6.0740966796875, 6.40673828125, 6.7393798828125, 7.072021484375, 7.4046630859375, 7.7373046875, 8.0699462890625, 8.402587890625, 8.7352294921875, 9.06787109375, 9.4005126953125, 9.733154296875, 10.0657958984375, 10.3984375]}, "gradients/decoder.transformer.h.13.crossattention.c_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 3.0, 4.0, 0.0, 3.0, 6.0, 3.0, 10.0, 4.0, 13.0, 7.0, 17.0, 9.0, 20.0, 24.0, 19.0, 23.0, 17.0, 33.0, 29.0, 35.0, 33.0, 34.0, 23.0, 41.0, 38.0, 51.0, 1064.0, 33.0, 41.0, 45.0, 44.0, 37.0, 31.0, 27.0, 28.0, 27.0, 23.0, 17.0, 15.0, 16.0, 11.0, 16.0, 14.0, 10.0, 7.0, 5.0, 8.0, 5.0, 3.0, 5.0, 2.0, 4.0, 2.0, 1.0, 1.0, 1.0], "bins": [-36.46875, -35.40185546875, -34.3349609375, -33.26806640625, -32.201171875, -31.13427734375, -30.0673828125, -29.00048828125, -27.93359375, -26.86669921875, -25.7998046875, -24.73291015625, -23.666015625, -22.59912109375, -21.5322265625, -20.46533203125, -19.3984375, -18.33154296875, -17.2646484375, -16.19775390625, -15.130859375, -14.06396484375, -12.9970703125, -11.93017578125, -10.86328125, -9.79638671875, -8.7294921875, -7.66259765625, -6.595703125, -5.52880859375, -4.4619140625, -3.39501953125, -2.328125, -1.26123046875, -0.1943359375, 0.87255859375, 1.939453125, 3.00634765625, 4.0732421875, 5.14013671875, 6.20703125, 7.27392578125, 8.3408203125, 9.40771484375, 10.474609375, 11.54150390625, 12.6083984375, 13.67529296875, 14.7421875, 15.80908203125, 16.8759765625, 17.94287109375, 19.009765625, 20.07666015625, 21.1435546875, 22.21044921875, 23.27734375, 24.34423828125, 25.4111328125, 26.47802734375, 27.544921875, 28.61181640625, 29.6787109375, 30.74560546875, 31.8125]}, "gradients/decoder.transformer.h.13.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 3.0, 4.0, 6.0, 11.0, 7.0, 12.0, 27.0, 34.0, 38.0, 58.0, 82.0, 139.0, 201.0, 370.0, 660.0, 1277.0, 2552.0, 5305.0, 12103.0, 28194.0, 70867.0, 192746.0, 1458954.0, 198393.0, 73072.0, 28645.0, 12245.0, 5580.0, 2574.0, 1273.0, 703.0, 395.0, 218.0, 129.0, 82.0, 50.0, 39.0, 24.0, 24.0, 14.0, 12.0, 4.0, 6.0, 4.0, 0.0, 2.0, 4.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-5.6875, -5.50701904296875, -5.3265380859375, -5.14605712890625, -4.965576171875, -4.78509521484375, -4.6046142578125, -4.42413330078125, -4.24365234375, -4.06317138671875, -3.8826904296875, -3.70220947265625, -3.521728515625, -3.34124755859375, -3.1607666015625, -2.98028564453125, -2.7998046875, -2.61932373046875, -2.4388427734375, -2.25836181640625, -2.077880859375, -1.89739990234375, -1.7169189453125, -1.53643798828125, -1.35595703125, -1.17547607421875, -0.9949951171875, -0.81451416015625, -0.634033203125, -0.45355224609375, -0.2730712890625, -0.09259033203125, 0.087890625, 0.26837158203125, 0.4488525390625, 0.62933349609375, 0.809814453125, 0.99029541015625, 1.1707763671875, 1.35125732421875, 1.53173828125, 1.71221923828125, 1.8927001953125, 2.07318115234375, 2.253662109375, 2.43414306640625, 2.6146240234375, 2.79510498046875, 2.9755859375, 3.15606689453125, 3.3365478515625, 3.51702880859375, 3.697509765625, 3.87799072265625, 4.0584716796875, 4.23895263671875, 4.41943359375, 4.59991455078125, 4.7803955078125, 4.96087646484375, 5.141357421875, 5.32183837890625, 5.5023193359375, 5.68280029296875, 5.86328125]}, "gradients/decoder.transformer.h.13.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 2.0, 1.0, 3.0, 0.0, 3.0, 0.0, 2.0, 3.0, 5.0, 10.0, 6.0, 14.0, 12.0, 11.0, 23.0, 17.0, 32.0, 42.0, 54.0, 65.0, 92.0, 99.0, 120.0, 110.0, 65.0, 50.0, 40.0, 28.0, 23.0, 22.0, 12.0, 13.0, 10.0, 3.0, 10.0, 6.0, 2.0, 0.0, 4.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.6259765625, -0.609100341796875, -0.59222412109375, -0.575347900390625, -0.5584716796875, -0.541595458984375, -0.52471923828125, -0.507843017578125, -0.490966796875, -0.474090576171875, -0.45721435546875, -0.440338134765625, -0.4234619140625, -0.406585693359375, -0.38970947265625, -0.372833251953125, -0.35595703125, -0.339080810546875, -0.32220458984375, -0.305328369140625, -0.2884521484375, -0.271575927734375, -0.25469970703125, -0.237823486328125, -0.220947265625, -0.204071044921875, -0.18719482421875, -0.170318603515625, -0.1534423828125, -0.136566162109375, -0.11968994140625, -0.102813720703125, -0.0859375, -0.069061279296875, -0.05218505859375, -0.035308837890625, -0.0184326171875, -0.001556396484375, 0.01531982421875, 0.032196044921875, 0.049072265625, 0.065948486328125, 0.08282470703125, 0.099700927734375, 0.1165771484375, 0.133453369140625, 0.15032958984375, 0.167205810546875, 0.18408203125, 0.200958251953125, 0.21783447265625, 0.234710693359375, 0.2515869140625, 0.268463134765625, 0.28533935546875, 0.302215576171875, 0.319091796875, 0.335968017578125, 0.35284423828125, 0.369720458984375, 0.3865966796875, 0.403472900390625, 0.42034912109375, 0.437225341796875, 0.4541015625]}, "gradients/decoder.transformer.h.13.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 4.0, 1.0, 0.0, 2.0, 6.0, 9.0, 3.0, 8.0, 10.0, 16.0, 18.0, 25.0, 23.0, 42.0, 66.0, 97.0, 209.0, 1044303.0, 3204.0, 162.0, 97.0, 82.0, 36.0, 36.0, 22.0, 16.0, 13.0, 12.0, 11.0, 5.0, 10.0, 5.0, 3.0, 2.0, 0.0, 3.0, 0.0, 3.0, 1.0, 2.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-14.8671875, -14.3148193359375, -13.762451171875, -13.2100830078125, -12.65771484375, -12.1053466796875, -11.552978515625, -11.0006103515625, -10.4482421875, -9.8958740234375, -9.343505859375, -8.7911376953125, -8.23876953125, -7.6864013671875, -7.134033203125, -6.5816650390625, -6.029296875, -5.4769287109375, -4.924560546875, -4.3721923828125, -3.81982421875, -3.2674560546875, -2.715087890625, -2.1627197265625, -1.6103515625, -1.0579833984375, -0.505615234375, 0.0467529296875, 0.59912109375, 1.1514892578125, 1.703857421875, 2.2562255859375, 2.80859375, 3.3609619140625, 3.913330078125, 4.4656982421875, 5.01806640625, 5.5704345703125, 6.122802734375, 6.6751708984375, 7.2275390625, 7.7799072265625, 8.332275390625, 8.8846435546875, 9.43701171875, 9.9893798828125, 10.541748046875, 11.0941162109375, 11.646484375, 12.1988525390625, 12.751220703125, 13.3035888671875, 13.85595703125, 14.4083251953125, 14.960693359375, 15.5130615234375, 16.0654296875, 16.6177978515625, 17.170166015625, 17.7225341796875, 18.27490234375, 18.8272705078125, 19.379638671875, 19.9320068359375, 20.484375]}, "gradients/decoder.transformer.h.13.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 51.0, 960.0, 6.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.5375372171401978, -0.37334200739860535, -0.20914679765701294, -0.04495158791542053, 0.11924362182617188, 0.2834388017654419, 0.4476340413093567, 0.6118292808532715, 0.7760244607925415, 0.9402196407318115, 1.104414939880371, 1.2686101198196411, 1.4328052997589111, 1.5970004796981812, 1.7611956596374512, 1.9253909587860107, 2.0895862579345703, 2.25378155708313, 2.4179766178131104, 2.58217191696167, 2.7463669776916504, 2.91056227684021, 3.0747575759887695, 3.23895263671875, 3.4031476974487305, 3.56734299659729, 3.7315380573272705, 3.89573335647583, 4.0599284172058105, 4.224123954772949, 4.38831901550293, 4.55251407623291, 4.716709136962891, 4.880904197692871, 5.04509973526001, 5.20929479598999, 5.373489856719971, 5.537685394287109, 5.70188045501709, 5.86607551574707, 6.030270576477051, 6.194465637207031, 6.35866117477417, 6.52285623550415, 6.687051296234131, 6.8512468338012695, 7.01544189453125, 7.1796369552612305, 7.343832492828369, 7.50802755355835, 7.672223091125488, 7.836418151855469, 8.00061321258545, 8.16480827331543, 8.32900333404541, 8.493199348449707, 8.657394409179688, 8.821589469909668, 8.985784530639648, 9.149979591369629, 9.314175605773926, 9.478370666503906, 9.642565727233887, 9.806760787963867, 9.970955848693848]}, "gradients/decoder.transformer.h.13.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 3.0, 2.0, 0.0, 1.0, 5.0, 5.0, 6.0, 5.0, 10.0, 8.0, 14.0, 16.0, 22.0, 14.0, 26.0, 27.0, 34.0, 34.0, 31.0, 36.0, 33.0, 36.0, 39.0, 39.0, 51.0, 34.0, 43.0, 44.0, 36.0, 31.0, 37.0, 42.0, 27.0, 33.0, 28.0, 23.0, 24.0, 18.0, 20.0, 14.0, 8.0, 7.0, 13.0, 9.0, 2.0, 10.0, 4.0, 2.0, 5.0, 3.0, 3.0, 0.0, 1.0, 0.0, 0.0, 3.0, 0.0, 0.0, 1.0], "bins": [-0.6692379117012024, -0.6474929451942444, -0.6257479190826416, -0.6040029525756836, -0.5822579264640808, -0.5605129599571228, -0.53876793384552, -0.517022967338562, -0.4952779710292816, -0.4735329747200012, -0.4517879784107208, -0.43004298210144043, -0.4082980155944824, -0.38655298948287964, -0.36480802297592163, -0.34306302666664124, -0.32131803035736084, -0.29957303404808044, -0.27782803773880005, -0.25608307123184204, -0.23433806002140045, -0.21259306371212006, -0.19084808230400085, -0.16910308599472046, -0.14735808968544006, -0.12561309337615967, -0.10386810451745987, -0.08212311565876007, -0.060378119349479675, -0.03863312304019928, -0.016888141632080078, 0.004856854677200317, 0.026601791381835938, 0.048346783965826035, 0.07009177654981613, 0.09183676540851593, 0.11358176171779633, 0.13532675802707672, 0.15707173943519592, 0.17881673574447632, 0.2005617320537567, 0.2223067283630371, 0.2440517246723175, 0.2657967209815979, 0.2875416874885559, 0.3092867136001587, 0.3310316801071167, 0.3527766764163971, 0.3745216727256775, 0.3962666690349579, 0.4180116653442383, 0.4397566318511963, 0.4615016579627991, 0.4832466244697571, 0.5049916505813599, 0.5267366170883179, 0.5484815835952759, 0.5702265501022339, 0.5919715762138367, 0.6137165427207947, 0.6354615688323975, 0.6572065353393555, 0.6789515018463135, 0.7006965279579163, 0.722441554069519]}, "gradients/decoder.transformer.h.13.attn.c_proj.bias": {"_type": "histogram", "values": [3.0, 1.0, 1.0, 2.0, 1.0, 2.0, 2.0, 3.0, 5.0, 7.0, 4.0, 7.0, 6.0, 12.0, 6.0, 14.0, 16.0, 21.0, 26.0, 17.0, 32.0, 26.0, 44.0, 36.0, 46.0, 34.0, 30.0, 54.0, 55.0, 40.0, 51.0, 38.0, 50.0, 42.0, 28.0, 39.0, 34.0, 29.0, 20.0, 19.0, 17.0, 18.0, 14.0, 16.0, 8.0, 11.0, 12.0, 4.0, 4.0, 3.0, 5.0, 2.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-51.8125, -50.0302734375, -48.248046875, -46.4658203125, -44.68359375, -42.9013671875, -41.119140625, -39.3369140625, -37.5546875, -35.7724609375, -33.990234375, -32.2080078125, -30.42578125, -28.6435546875, -26.861328125, -25.0791015625, -23.296875, -21.5146484375, -19.732421875, -17.9501953125, -16.16796875, -14.3857421875, -12.603515625, -10.8212890625, -9.0390625, -7.2568359375, -5.474609375, -3.6923828125, -1.91015625, -0.1279296875, 1.654296875, 3.4365234375, 5.21875, 7.0009765625, 8.783203125, 10.5654296875, 12.34765625, 14.1298828125, 15.912109375, 17.6943359375, 19.4765625, 21.2587890625, 23.041015625, 24.8232421875, 26.60546875, 28.3876953125, 30.169921875, 31.9521484375, 33.734375, 35.5166015625, 37.298828125, 39.0810546875, 40.86328125, 42.6455078125, 44.427734375, 46.2099609375, 47.9921875, 49.7744140625, 51.556640625, 53.3388671875, 55.12109375, 56.9033203125, 58.685546875, 60.4677734375, 62.25]}, "gradients/decoder.transformer.h.13.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 2.0, 3.0, 4.0, 5.0, 7.0, 6.0, 9.0, 25.0, 24.0, 26.0, 46.0, 40.0, 93.0, 132.0, 191.0, 275.0, 395.0, 584.0, 898.0, 1467.0, 2387.0, 3939.0, 6927.0, 14675.0, 66619.0, 870562.0, 49299.0, 13437.0, 6656.0, 3616.0, 2136.0, 1364.0, 884.0, 591.0, 437.0, 246.0, 178.0, 89.0, 72.0, 57.0, 42.0, 38.0, 24.0, 17.0, 11.0, 9.0, 10.0, 6.0, 3.0, 6.0, 0.0, 1.0, 1.0], "bins": [-119.4375, -116.16015625, -112.8828125, -109.60546875, -106.328125, -103.05078125, -99.7734375, -96.49609375, -93.21875, -89.94140625, -86.6640625, -83.38671875, -80.109375, -76.83203125, -73.5546875, -70.27734375, -67.0, -63.72265625, -60.4453125, -57.16796875, -53.890625, -50.61328125, -47.3359375, -44.05859375, -40.78125, -37.50390625, -34.2265625, -30.94921875, -27.671875, -24.39453125, -21.1171875, -17.83984375, -14.5625, -11.28515625, -8.0078125, -4.73046875, -1.453125, 1.82421875, 5.1015625, 8.37890625, 11.65625, 14.93359375, 18.2109375, 21.48828125, 24.765625, 28.04296875, 31.3203125, 34.59765625, 37.875, 41.15234375, 44.4296875, 47.70703125, 50.984375, 54.26171875, 57.5390625, 60.81640625, 64.09375, 67.37109375, 70.6484375, 73.92578125, 77.203125, 80.48046875, 83.7578125, 87.03515625, 90.3125]}, "gradients/decoder.transformer.h.13.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 5.0, 3.0, 5.0, 5.0, 10.0, 9.0, 8.0, 9.0, 19.0, 23.0, 27.0, 24.0, 28.0, 28.0, 38.0, 41.0, 53.0, 44.0, 68.0, 80.0, 319.0, 1679.0, 76.0, 66.0, 60.0, 51.0, 48.0, 35.0, 24.0, 25.0, 30.0, 36.0, 23.0, 21.0, 10.0, 11.0, 8.0, 2.0, 3.0, 3.0, 4.0, 2.0, 1.0, 1.0, 2.0, 0.0, 2.0], "bins": [-241.625, -235.27734375, -228.9296875, -222.58203125, -216.234375, -209.88671875, -203.5390625, -197.19140625, -190.84375, -184.49609375, -178.1484375, -171.80078125, -165.453125, -159.10546875, -152.7578125, -146.41015625, -140.0625, -133.71484375, -127.3671875, -121.01953125, -114.671875, -108.32421875, -101.9765625, -95.62890625, -89.28125, -82.93359375, -76.5859375, -70.23828125, -63.890625, -57.54296875, -51.1953125, -44.84765625, -38.5, -32.15234375, -25.8046875, -19.45703125, -13.109375, -6.76171875, -0.4140625, 5.93359375, 12.28125, 18.62890625, 24.9765625, 31.32421875, 37.671875, 44.01953125, 50.3671875, 56.71484375, 63.0625, 69.41015625, 75.7578125, 82.10546875, 88.453125, 94.80078125, 101.1484375, 107.49609375, 113.84375, 120.19140625, 126.5390625, 132.88671875, 139.234375, 145.58203125, 151.9296875, 158.27734375, 164.625]}, "gradients/decoder.transformer.h.13.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 3.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 2.0, 3.0, 3.0, 6.0, 3.0, 14.0, 11.0, 19.0, 20.0, 37.0, 77.0, 94.0, 126.0, 222.0, 526.0, 1933.0, 12350.0, 2946461.0, 173575.0, 7797.0, 1427.0, 403.0, 181.0, 128.0, 95.0, 61.0, 31.0, 25.0, 23.0, 18.0, 10.0, 6.0, 2.0, 5.0, 3.0, 1.0, 3.0, 2.0, 3.0, 1.0, 2.0, 1.0, 1.0, 3.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-627.0, -607.9375, -588.875, -569.8125, -550.75, -531.6875, -512.625, -493.5625, -474.5, -455.4375, -436.375, -417.3125, -398.25, -379.1875, -360.125, -341.0625, -322.0, -302.9375, -283.875, -264.8125, -245.75, -226.6875, -207.625, -188.5625, -169.5, -150.4375, -131.375, -112.3125, -93.25, -74.1875, -55.125, -36.0625, -17.0, 2.0625, 21.125, 40.1875, 59.25, 78.3125, 97.375, 116.4375, 135.5, 154.5625, 173.625, 192.6875, 211.75, 230.8125, 249.875, 268.9375, 288.0, 307.0625, 326.125, 345.1875, 364.25, 383.3125, 402.375, 421.4375, 440.5, 459.5625, 478.625, 497.6875, 516.75, 535.8125, 554.875, 573.9375, 593.0]}, "gradients/decoder.transformer.h.13.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 8.0, 273.0, 712.0, 29.0], "bins": [-6241.05419921875, -6140.65478515625, -6040.25537109375, -5939.85595703125, -5839.45654296875, -5739.05712890625, -5638.65771484375, -5538.25830078125, -5437.85888671875, -5337.45947265625, -5237.06005859375, -5136.66064453125, -5036.26123046875, -4935.86181640625, -4835.46240234375, -4735.06298828125, -4634.66357421875, -4534.26416015625, -4433.86474609375, -4333.46533203125, -4233.06591796875, -4132.66650390625, -4032.26708984375, -3931.86767578125, -3831.46826171875, -3731.06884765625, -3630.66943359375, -3530.27001953125, -3429.87060546875, -3329.47119140625, -3229.07177734375, -3128.67236328125, -3028.272705078125, -2927.873291015625, -2827.473876953125, -2727.074462890625, -2626.675048828125, -2526.275634765625, -2425.876220703125, -2325.476806640625, -2225.077392578125, -2124.677978515625, -2024.278564453125, -1923.879150390625, -1823.479736328125, -1723.080322265625, -1622.680908203125, -1522.281494140625, -1421.882080078125, -1321.482666015625, -1221.083251953125, -1120.683837890625, -1020.284423828125, -919.885009765625, -819.485595703125, -719.086181640625, -618.6868286132812, -518.2874145507812, -417.88800048828125, -317.48858642578125, -217.08917236328125, -116.68975830078125, -16.29034423828125, 84.10906982421875, 184.50848388671875]}, "gradients/decoder.transformer.h.13.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 4.0, 0.0, 1.0, 2.0, 3.0, 3.0, 2.0, 2.0, 7.0, 7.0, 7.0, 11.0, 13.0, 11.0, 17.0, 17.0, 18.0, 24.0, 22.0, 27.0, 29.0, 27.0, 40.0, 41.0, 35.0, 36.0, 35.0, 35.0, 35.0, 35.0, 52.0, 46.0, 37.0, 41.0, 31.0, 40.0, 30.0, 19.0, 26.0, 26.0, 21.0, 17.0, 13.0, 13.0, 9.0, 11.0, 10.0, 4.0, 7.0, 5.0, 5.0, 4.0, 3.0, 1.0, 3.0, 2.0], "bins": [-521.9562377929688, -507.3730163574219, -492.7898254394531, -478.20660400390625, -463.6233825683594, -449.0401611328125, -434.45697021484375, -419.8737487792969, -405.29052734375, -390.7073059082031, -376.1241149902344, -361.5408935546875, -346.9576721191406, -332.37445068359375, -317.791259765625, -303.2080383300781, -288.62481689453125, -274.0415954589844, -259.4584045410156, -244.87518310546875, -230.29196166992188, -215.70875549316406, -201.12554931640625, -186.54232788085938, -171.95913696289062, -157.3759307861328, -142.79270935058594, -128.20950317382812, -113.62628173828125, -99.04307556152344, -84.4598617553711, -69.87664794921875, -55.293426513671875, -40.71021270751953, -26.12700080871582, -11.54378890991211, 3.0394248962402344, 17.622634887695312, 32.205848693847656, 46.7890625, 61.372276306152344, 75.95549011230469, 90.53870391845703, 105.12191772460938, 119.70512390136719, 134.288330078125, 148.87155151367188, 163.45477294921875, 178.03797912597656, 192.62118530273438, 207.20440673828125, 221.78761291503906, 236.37083435058594, 250.95404052734375, 265.5372619628906, 280.1204833984375, 294.70367431640625, 309.2868957519531, 323.8700866699219, 338.45330810546875, 353.0365295410156, 367.6197509765625, 382.20294189453125, 396.7861633300781, 411.369384765625]}, "gradients/decoder.transformer.h.12.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 2.0, 1.0, 0.0, 1.0, 1.0, 2.0, 2.0, 6.0, 4.0, 2.0, 7.0, 7.0, 5.0, 9.0, 8.0, 14.0, 12.0, 19.0, 30.0, 28.0, 29.0, 31.0, 31.0, 38.0, 34.0, 43.0, 42.0, 55.0, 40.0, 44.0, 55.0, 45.0, 45.0, 43.0, 41.0, 30.0, 27.0, 27.0, 21.0, 23.0, 19.0, 16.0, 16.0, 15.0, 7.0, 10.0, 7.0, 4.0, 5.0, 4.0, 4.0, 5.0, 0.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0], "bins": [-62.1875, -60.24755859375, -58.3076171875, -56.36767578125, -54.427734375, -52.48779296875, -50.5478515625, -48.60791015625, -46.66796875, -44.72802734375, -42.7880859375, -40.84814453125, -38.908203125, -36.96826171875, -35.0283203125, -33.08837890625, -31.1484375, -29.20849609375, -27.2685546875, -25.32861328125, -23.388671875, -21.44873046875, -19.5087890625, -17.56884765625, -15.62890625, -13.68896484375, -11.7490234375, -9.80908203125, -7.869140625, -5.92919921875, -3.9892578125, -2.04931640625, -0.109375, 1.83056640625, 3.7705078125, 5.71044921875, 7.650390625, 9.59033203125, 11.5302734375, 13.47021484375, 15.41015625, 17.35009765625, 19.2900390625, 21.22998046875, 23.169921875, 25.10986328125, 27.0498046875, 28.98974609375, 30.9296875, 32.86962890625, 34.8095703125, 36.74951171875, 38.689453125, 40.62939453125, 42.5693359375, 44.50927734375, 46.44921875, 48.38916015625, 50.3291015625, 52.26904296875, 54.208984375, 56.14892578125, 58.0888671875, 60.02880859375, 61.96875]}, "gradients/decoder.transformer.h.12.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 1.0, 2.0, 4.0, 3.0, 6.0, 5.0, 9.0, 6.0, 13.0, 11.0, 21.0, 40.0, 63.0, 102.0, 129.0, 161.0, 207.0, 281.0, 388.0, 586.0, 988.0, 1637.0, 2738.0, 5034.0, 11042.0, 36254.0, 499846.0, 3310410.0, 275397.0, 28174.0, 9392.0, 4478.0, 2358.0, 1469.0, 976.0, 644.0, 412.0, 283.0, 161.0, 124.0, 113.0, 75.0, 81.0, 67.0, 35.0, 26.0, 18.0, 6.0, 10.0, 2.0, 4.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 3.0, 1.0, 2.0], "bins": [-137.125, -132.615234375, -128.10546875, -123.595703125, -119.0859375, -114.576171875, -110.06640625, -105.556640625, -101.046875, -96.537109375, -92.02734375, -87.517578125, -83.0078125, -78.498046875, -73.98828125, -69.478515625, -64.96875, -60.458984375, -55.94921875, -51.439453125, -46.9296875, -42.419921875, -37.91015625, -33.400390625, -28.890625, -24.380859375, -19.87109375, -15.361328125, -10.8515625, -6.341796875, -1.83203125, 2.677734375, 7.1875, 11.697265625, 16.20703125, 20.716796875, 25.2265625, 29.736328125, 34.24609375, 38.755859375, 43.265625, 47.775390625, 52.28515625, 56.794921875, 61.3046875, 65.814453125, 70.32421875, 74.833984375, 79.34375, 83.853515625, 88.36328125, 92.873046875, 97.3828125, 101.892578125, 106.40234375, 110.912109375, 115.421875, 119.931640625, 124.44140625, 128.951171875, 133.4609375, 137.970703125, 142.48046875, 146.990234375, 151.5]}, "gradients/decoder.transformer.h.12.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 3.0, 1.0, 1.0, 1.0, 1.0, 6.0, 1.0, 3.0, 5.0, 4.0, 6.0, 15.0, 13.0, 15.0, 17.0, 26.0, 36.0, 50.0, 58.0, 108.0, 156.0, 286.0, 582.0, 941.0, 762.0, 399.0, 200.0, 125.0, 73.0, 60.0, 31.0, 27.0, 11.0, 7.0, 5.0, 13.0, 6.0, 9.0, 7.0, 5.0, 1.0, 3.0, 3.0, 2.0, 1.0, 0.0, 0.0, 3.0, 0.0, 1.0, 1.0], "bins": [-145.5, -141.5380859375, -137.576171875, -133.6142578125, -129.65234375, -125.6904296875, -121.728515625, -117.7666015625, -113.8046875, -109.8427734375, -105.880859375, -101.9189453125, -97.95703125, -93.9951171875, -90.033203125, -86.0712890625, -82.109375, -78.1474609375, -74.185546875, -70.2236328125, -66.26171875, -62.2998046875, -58.337890625, -54.3759765625, -50.4140625, -46.4521484375, -42.490234375, -38.5283203125, -34.56640625, -30.6044921875, -26.642578125, -22.6806640625, -18.71875, -14.7568359375, -10.794921875, -6.8330078125, -2.87109375, 1.0908203125, 5.052734375, 9.0146484375, 12.9765625, 16.9384765625, 20.900390625, 24.8623046875, 28.82421875, 32.7861328125, 36.748046875, 40.7099609375, 44.671875, 48.6337890625, 52.595703125, 56.5576171875, 60.51953125, 64.4814453125, 68.443359375, 72.4052734375, 76.3671875, 80.3291015625, 84.291015625, 88.2529296875, 92.21484375, 96.1767578125, 100.138671875, 104.1005859375, 108.0625]}, "gradients/decoder.transformer.h.12.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 2.0, 3.0, 2.0, 1.0, 8.0, 9.0, 16.0, 14.0, 12.0, 12.0, 19.0, 31.0, 33.0, 45.0, 63.0, 79.0, 116.0, 193.0, 403.0, 1061.0, 3515.0, 15641.0, 134235.0, 3926803.0, 94352.0, 12577.0, 3041.0, 992.0, 353.0, 172.0, 108.0, 99.0, 61.0, 35.0, 51.0, 22.0, 25.0, 23.0, 19.0, 13.0, 14.0, 7.0, 3.0, 4.0, 4.0, 3.0, 3.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-414.0, -401.23828125, -388.4765625, -375.71484375, -362.953125, -350.19140625, -337.4296875, -324.66796875, -311.90625, -299.14453125, -286.3828125, -273.62109375, -260.859375, -248.09765625, -235.3359375, -222.57421875, -209.8125, -197.05078125, -184.2890625, -171.52734375, -158.765625, -146.00390625, -133.2421875, -120.48046875, -107.71875, -94.95703125, -82.1953125, -69.43359375, -56.671875, -43.91015625, -31.1484375, -18.38671875, -5.625, 7.13671875, 19.8984375, 32.66015625, 45.421875, 58.18359375, 70.9453125, 83.70703125, 96.46875, 109.23046875, 121.9921875, 134.75390625, 147.515625, 160.27734375, 173.0390625, 185.80078125, 198.5625, 211.32421875, 224.0859375, 236.84765625, 249.609375, 262.37109375, 275.1328125, 287.89453125, 300.65625, 313.41796875, 326.1796875, 338.94140625, 351.703125, 364.46484375, 377.2265625, 389.98828125, 402.75]}, "gradients/decoder.transformer.h.12.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 3.0, 1.0, 3.0, 5.0, 2.0, 2.0, 3.0, 9.0, 9.0, 7.0, 15.0, 17.0, 27.0, 41.0, 35.0, 54.0, 71.0, 81.0, 103.0, 110.0, 78.0, 85.0, 65.0, 40.0, 42.0, 26.0, 14.0, 13.0, 13.0, 8.0, 8.0, 8.0, 5.0, 5.0, 2.0, 1.0, 2.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0], "bins": [-402.7503967285156, -390.7337951660156, -378.7171936035156, -366.7005920410156, -354.6839904785156, -342.6673889160156, -330.6507873535156, -318.6341857910156, -306.6175842285156, -294.6009826660156, -282.5843811035156, -270.5677795410156, -258.5511779785156, -246.53457641601562, -234.51797485351562, -222.50137329101562, -210.48477172851562, -198.46817016601562, -186.45156860351562, -174.43496704101562, -162.41836547851562, -150.40176391601562, -138.38516235351562, -126.36856079101562, -114.35195922851562, -102.33535766601562, -90.31875610351562, -78.30215454101562, -66.28555297851562, -54.268951416015625, -42.252349853515625, -30.235748291015625, -18.21917724609375, -6.20257568359375, 5.81402587890625, 17.83062744140625, 29.84722900390625, 41.86383056640625, 53.88043212890625, 65.89703369140625, 77.91363525390625, 89.93023681640625, 101.94683837890625, 113.96343994140625, 125.98004150390625, 137.99664306640625, 150.01324462890625, 162.02984619140625, 174.04644775390625, 186.06304931640625, 198.07965087890625, 210.09625244140625, 222.11285400390625, 234.12945556640625, 246.14605712890625, 258.16265869140625, 270.17926025390625, 282.19586181640625, 294.21246337890625, 306.22906494140625, 318.24566650390625, 330.26226806640625, 342.27886962890625, 354.29547119140625, 366.31207275390625]}, "gradients/decoder.transformer.h.12.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 2.0, 0.0, 0.0, 3.0, 7.0, 3.0, 3.0, 5.0, 12.0, 6.0, 12.0, 11.0, 17.0, 16.0, 11.0, 14.0, 25.0, 27.0, 30.0, 31.0, 31.0, 36.0, 27.0, 41.0, 44.0, 39.0, 39.0, 41.0, 41.0, 26.0, 45.0, 31.0, 34.0, 40.0, 38.0, 32.0, 21.0, 16.0, 26.0, 22.0, 15.0, 15.0, 14.0, 16.0, 7.0, 5.0, 4.0, 9.0, 5.0, 8.0, 6.0, 3.0, 5.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0], "bins": [-283.37322998046875, -274.0124206542969, -264.6515808105469, -255.29075622558594, -245.929931640625, -236.56912231445312, -227.2082977294922, -217.84747314453125, -208.4866485595703, -199.12582397460938, -189.76499938964844, -180.4041748046875, -171.04336547851562, -161.68252563476562, -152.32171630859375, -142.9608917236328, -133.60006713867188, -124.23924255371094, -114.87841796875, -105.5176010131836, -96.15677642822266, -86.79595184326172, -77.43513488769531, -68.07431030273438, -58.71348571777344, -49.3526611328125, -39.99184036254883, -30.631017684936523, -21.27019500732422, -11.909370422363281, -2.5485496520996094, 6.8122711181640625, 16.173095703125, 25.533918380737305, 34.89474105834961, 44.25556182861328, 53.61638641357422, 62.977210998535156, 72.33802795410156, 81.6988525390625, 91.05967712402344, 100.42050170898438, 109.78132629394531, 119.14214324951172, 128.50296020507812, 137.86380004882812, 147.224609375, 156.58543395996094, 165.94625854492188, 175.3070831298828, 184.66790771484375, 194.0287322998047, 203.38955688476562, 212.7503662109375, 222.11119079589844, 231.47201538085938, 240.8328399658203, 250.19366455078125, 259.5544738769531, 268.9153137207031, 278.276123046875, 287.636962890625, 296.9977722167969, 306.35858154296875, 315.71942138671875]}, "gradients/decoder.transformer.h.12.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 3.0, 1.0, 1.0, 1.0, 2.0, 2.0, 5.0, 2.0, 3.0, 6.0, 6.0, 6.0, 11.0, 10.0, 9.0, 16.0, 15.0, 24.0, 39.0, 26.0, 34.0, 42.0, 33.0, 47.0, 51.0, 36.0, 49.0, 65.0, 39.0, 47.0, 46.0, 40.0, 42.0, 34.0, 32.0, 37.0, 30.0, 19.0, 24.0, 8.0, 17.0, 9.0, 16.0, 11.0, 4.0, 2.0, 6.0, 1.0, 3.0, 6.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 2.0], "bins": [-63.96875, -62.00341796875, -60.0380859375, -58.07275390625, -56.107421875, -54.14208984375, -52.1767578125, -50.21142578125, -48.24609375, -46.28076171875, -44.3154296875, -42.35009765625, -40.384765625, -38.41943359375, -36.4541015625, -34.48876953125, -32.5234375, -30.55810546875, -28.5927734375, -26.62744140625, -24.662109375, -22.69677734375, -20.7314453125, -18.76611328125, -16.80078125, -14.83544921875, -12.8701171875, -10.90478515625, -8.939453125, -6.97412109375, -5.0087890625, -3.04345703125, -1.078125, 0.88720703125, 2.8525390625, 4.81787109375, 6.783203125, 8.74853515625, 10.7138671875, 12.67919921875, 14.64453125, 16.60986328125, 18.5751953125, 20.54052734375, 22.505859375, 24.47119140625, 26.4365234375, 28.40185546875, 30.3671875, 32.33251953125, 34.2978515625, 36.26318359375, 38.228515625, 40.19384765625, 42.1591796875, 44.12451171875, 46.08984375, 48.05517578125, 50.0205078125, 51.98583984375, 53.951171875, 55.91650390625, 57.8818359375, 59.84716796875, 61.8125]}, "gradients/decoder.transformer.h.12.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 3.0, 0.0, 1.0, 1.0, 1.0, 0.0, 2.0, 2.0, 2.0, 8.0, 6.0, 6.0, 16.0, 17.0, 49.0, 52.0, 64.0, 136.0, 188.0, 291.0, 501.0, 893.0, 1785.0, 3697.0, 9122.0, 26910.0, 101286.0, 525509.0, 288923.0, 59171.0, 17320.0, 6467.0, 2815.0, 1350.0, 780.0, 439.0, 285.0, 162.0, 135.0, 41.0, 45.0, 32.0, 18.0, 5.0, 11.0, 9.0, 3.0, 4.0, 5.0, 0.0, 3.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-15.1875, -14.660400390625, -14.13330078125, -13.606201171875, -13.0791015625, -12.552001953125, -12.02490234375, -11.497802734375, -10.970703125, -10.443603515625, -9.91650390625, -9.389404296875, -8.8623046875, -8.335205078125, -7.80810546875, -7.281005859375, -6.75390625, -6.226806640625, -5.69970703125, -5.172607421875, -4.6455078125, -4.118408203125, -3.59130859375, -3.064208984375, -2.537109375, -2.010009765625, -1.48291015625, -0.955810546875, -0.4287109375, 0.098388671875, 0.62548828125, 1.152587890625, 1.6796875, 2.206787109375, 2.73388671875, 3.260986328125, 3.7880859375, 4.315185546875, 4.84228515625, 5.369384765625, 5.896484375, 6.423583984375, 6.95068359375, 7.477783203125, 8.0048828125, 8.531982421875, 9.05908203125, 9.586181640625, 10.11328125, 10.640380859375, 11.16748046875, 11.694580078125, 12.2216796875, 12.748779296875, 13.27587890625, 13.802978515625, 14.330078125, 14.857177734375, 15.38427734375, 15.911376953125, 16.4384765625, 16.965576171875, 17.49267578125, 18.019775390625, 18.546875]}, "gradients/decoder.transformer.h.12.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 3.0, 4.0, 1.0, 2.0, 7.0, 4.0, 12.0, 7.0, 9.0, 12.0, 9.0, 27.0, 21.0, 21.0, 21.0, 26.0, 32.0, 36.0, 48.0, 49.0, 41.0, 48.0, 37.0, 43.0, 1062.0, 34.0, 40.0, 41.0, 39.0, 31.0, 34.0, 26.0, 28.0, 32.0, 27.0, 17.0, 15.0, 12.0, 18.0, 11.0, 10.0, 10.0, 11.0, 4.0, 4.0, 2.0, 5.0, 0.0, 2.0, 2.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0], "bins": [-39.125, -37.92724609375, -36.7294921875, -35.53173828125, -34.333984375, -33.13623046875, -31.9384765625, -30.74072265625, -29.54296875, -28.34521484375, -27.1474609375, -25.94970703125, -24.751953125, -23.55419921875, -22.3564453125, -21.15869140625, -19.9609375, -18.76318359375, -17.5654296875, -16.36767578125, -15.169921875, -13.97216796875, -12.7744140625, -11.57666015625, -10.37890625, -9.18115234375, -7.9833984375, -6.78564453125, -5.587890625, -4.39013671875, -3.1923828125, -1.99462890625, -0.796875, 0.40087890625, 1.5986328125, 2.79638671875, 3.994140625, 5.19189453125, 6.3896484375, 7.58740234375, 8.78515625, 9.98291015625, 11.1806640625, 12.37841796875, 13.576171875, 14.77392578125, 15.9716796875, 17.16943359375, 18.3671875, 19.56494140625, 20.7626953125, 21.96044921875, 23.158203125, 24.35595703125, 25.5537109375, 26.75146484375, 27.94921875, 29.14697265625, 30.3447265625, 31.54248046875, 32.740234375, 33.93798828125, 35.1357421875, 36.33349609375, 37.53125]}, "gradients/decoder.transformer.h.12.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 2.0, 4.0, 2.0, 7.0, 7.0, 13.0, 22.0, 29.0, 40.0, 46.0, 93.0, 136.0, 200.0, 328.0, 520.0, 705.0, 1169.0, 1816.0, 2956.0, 5044.0, 8563.0, 15092.0, 27637.0, 52066.0, 104100.0, 245789.0, 1302335.0, 161205.0, 76882.0, 39446.0, 21632.0, 11871.0, 6801.0, 4006.0, 2358.0, 1472.0, 949.0, 616.0, 399.0, 237.0, 189.0, 120.0, 68.0, 54.0, 38.0, 27.0, 20.0, 11.0, 10.0, 5.0, 6.0, 1.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-3.900390625, -3.775726318359375, -3.65106201171875, -3.526397705078125, -3.4017333984375, -3.277069091796875, -3.15240478515625, -3.027740478515625, -2.903076171875, -2.778411865234375, -2.65374755859375, -2.529083251953125, -2.4044189453125, -2.279754638671875, -2.15509033203125, -2.030426025390625, -1.90576171875, -1.781097412109375, -1.65643310546875, -1.531768798828125, -1.4071044921875, -1.282440185546875, -1.15777587890625, -1.033111572265625, -0.908447265625, -0.783782958984375, -0.65911865234375, -0.534454345703125, -0.4097900390625, -0.285125732421875, -0.16046142578125, -0.035797119140625, 0.0888671875, 0.213531494140625, 0.33819580078125, 0.462860107421875, 0.5875244140625, 0.712188720703125, 0.83685302734375, 0.961517333984375, 1.086181640625, 1.210845947265625, 1.33551025390625, 1.460174560546875, 1.5848388671875, 1.709503173828125, 1.83416748046875, 1.958831787109375, 2.08349609375, 2.208160400390625, 2.33282470703125, 2.457489013671875, 2.5821533203125, 2.706817626953125, 2.83148193359375, 2.956146240234375, 3.080810546875, 3.205474853515625, 3.33013916015625, 3.454803466796875, 3.5794677734375, 3.704132080078125, 3.82879638671875, 3.953460693359375, 4.078125]}, "gradients/decoder.transformer.h.12.crossattention.q_attn.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 0.0, 1.0, 1.0, 6.0, 4.0, 3.0, 2.0, 6.0, 1.0, 7.0, 3.0, 5.0, 5.0, 11.0, 5.0, 6.0, 14.0, 19.0, 25.0, 31.0, 33.0, 46.0, 100.0, 191.0, 156.0, 91.0, 46.0, 36.0, 34.0, 26.0, 13.0, 16.0, 8.0, 11.0, 12.0, 8.0, 5.0, 4.0, 7.0, 2.0, 6.0, 2.0, 1.0, 2.0, 0.0, 2.0, 1.0, 1.0, 0.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.68115234375, -0.6558990478515625, -0.630645751953125, -0.6053924560546875, -0.58013916015625, -0.5548858642578125, -0.529632568359375, -0.5043792724609375, -0.4791259765625, -0.4538726806640625, -0.428619384765625, -0.4033660888671875, -0.37811279296875, -0.3528594970703125, -0.327606201171875, -0.3023529052734375, -0.277099609375, -0.2518463134765625, -0.226593017578125, -0.2013397216796875, -0.17608642578125, -0.1508331298828125, -0.125579833984375, -0.1003265380859375, -0.0750732421875, -0.0498199462890625, -0.024566650390625, 0.0006866455078125, 0.02593994140625, 0.0511932373046875, 0.076446533203125, 0.1016998291015625, 0.126953125, 0.1522064208984375, 0.177459716796875, 0.2027130126953125, 0.22796630859375, 0.2532196044921875, 0.278472900390625, 0.3037261962890625, 0.3289794921875, 0.3542327880859375, 0.379486083984375, 0.4047393798828125, 0.42999267578125, 0.4552459716796875, 0.480499267578125, 0.5057525634765625, 0.531005859375, 0.5562591552734375, 0.581512451171875, 0.6067657470703125, 0.63201904296875, 0.6572723388671875, 0.682525634765625, 0.7077789306640625, 0.7330322265625, 0.7582855224609375, 0.783538818359375, 0.8087921142578125, 0.83404541015625, 0.8592987060546875, 0.884552001953125, 0.9098052978515625, 0.93505859375]}, "gradients/decoder.transformer.h.12.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 3.0, 1.0, 0.0, 1.0, 2.0, 1.0, 1.0, 2.0, 4.0, 3.0, 3.0, 8.0, 5.0, 6.0, 5.0, 9.0, 7.0, 16.0, 16.0, 14.0, 34.0, 38.0, 57.0, 83.0, 207.0, 1046555.0, 1065.0, 151.0, 59.0, 52.0, 45.0, 22.0, 20.0, 9.0, 9.0, 5.0, 9.0, 4.0, 4.0, 4.0, 7.0, 1.0, 6.0, 2.0, 4.0, 3.0, 6.0, 2.0, 0.0, 0.0, 1.0, 0.0, 2.0], "bins": [-30.265625, -29.44384765625, -28.6220703125, -27.80029296875, -26.978515625, -26.15673828125, -25.3349609375, -24.51318359375, -23.69140625, -22.86962890625, -22.0478515625, -21.22607421875, -20.404296875, -19.58251953125, -18.7607421875, -17.93896484375, -17.1171875, -16.29541015625, -15.4736328125, -14.65185546875, -13.830078125, -13.00830078125, -12.1865234375, -11.36474609375, -10.54296875, -9.72119140625, -8.8994140625, -8.07763671875, -7.255859375, -6.43408203125, -5.6123046875, -4.79052734375, -3.96875, -3.14697265625, -2.3251953125, -1.50341796875, -0.681640625, 0.14013671875, 0.9619140625, 1.78369140625, 2.60546875, 3.42724609375, 4.2490234375, 5.07080078125, 5.892578125, 6.71435546875, 7.5361328125, 8.35791015625, 9.1796875, 10.00146484375, 10.8232421875, 11.64501953125, 12.466796875, 13.28857421875, 14.1103515625, 14.93212890625, 15.75390625, 16.57568359375, 17.3974609375, 18.21923828125, 19.041015625, 19.86279296875, 20.6845703125, 21.50634765625, 22.328125]}, "gradients/decoder.transformer.h.12.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 64.0, 954.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.0713794231414795, -0.7348067760467529, -0.39823412895202637, -0.061661481857299805, 0.27491116523742676, 0.6114838123321533, 0.9480564594268799, 1.2846291065216064, 1.621201753616333, 1.9577744007110596, 2.294347047805786, 2.6309196949005127, 2.9674923419952393, 3.304064989089966, 3.6406376361846924, 3.977210283279419, 4.313782691955566, 4.650355339050293, 4.9869279861450195, 5.323500633239746, 5.660073280334473, 5.996645927429199, 6.333218574523926, 6.669791221618652, 7.006363868713379, 7.3429365158081055, 7.679509162902832, 8.016081809997559, 8.352654457092285, 8.689227104187012, 9.025799751281738, 9.362372398376465, 9.698945999145508, 10.035518646240234, 10.372091293334961, 10.708663940429688, 11.045236587524414, 11.38180923461914, 11.718381881713867, 12.054954528808594, 12.39152717590332, 12.728099822998047, 13.064672470092773, 13.4012451171875, 13.737817764282227, 14.074390411376953, 14.41096305847168, 14.747535705566406, 15.084108352661133, 15.42068099975586, 15.757253646850586, 16.093826293945312, 16.43039894104004, 16.766971588134766, 17.103544235229492, 17.44011688232422, 17.776689529418945, 18.113262176513672, 18.4498348236084, 18.786407470703125, 19.12298011779785, 19.459552764892578, 19.796125411987305, 20.13269805908203, 20.469270706176758]}, "gradients/decoder.transformer.h.12.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 2.0, 0.0, 1.0, 4.0, 1.0, 2.0, 2.0, 5.0, 7.0, 11.0, 12.0, 13.0, 13.0, 22.0, 24.0, 18.0, 33.0, 27.0, 31.0, 56.0, 43.0, 31.0, 28.0, 41.0, 57.0, 46.0, 48.0, 43.0, 33.0, 40.0, 44.0, 39.0, 32.0, 36.0, 24.0, 24.0, 20.0, 22.0, 16.0, 12.0, 13.0, 5.0, 10.0, 8.0, 7.0, 4.0, 1.0, 3.0, 1.0, 2.0, 3.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.2212004661560059, -1.1785672903060913, -1.1359341144561768, -1.0933010578155518, -1.0506678819656372, -1.0080347061157227, -0.9654015898704529, -0.9227684736251831, -0.8801352977752686, -0.837502121925354, -0.7948690056800842, -0.7522358894348145, -0.7096027135848999, -0.6669695377349854, -0.6243364214897156, -0.5817033052444458, -0.5390701293945312, -0.4964369833469391, -0.4538038372993469, -0.41117069125175476, -0.3685375452041626, -0.32590439915657043, -0.28327125310897827, -0.2406381070613861, -0.19800496101379395, -0.15537181496620178, -0.11273866891860962, -0.07010552287101746, -0.027472376823425293, 0.01516076922416687, 0.05779391527175903, 0.1004270613193512, 0.14306020736694336, 0.18569335341453552, 0.22832649946212769, 0.27095964550971985, 0.313592791557312, 0.3562259376049042, 0.39885908365249634, 0.4414922297000885, 0.48412537574768066, 0.5267585515975952, 0.569391667842865, 0.6120247840881348, 0.6546579599380493, 0.6972911357879639, 0.7399242520332336, 0.7825573682785034, 0.825190544128418, 0.8678237199783325, 0.9104568362236023, 0.9530899524688721, 0.9957231283187866, 1.0383563041687012, 1.0809893608093262, 1.1236225366592407, 1.1662557125091553, 1.2088888883590698, 1.2515220642089844, 1.2941551208496094, 1.336788296699524, 1.3794214725494385, 1.4220545291900635, 1.464687705039978, 1.5073208808898926]}, "gradients/decoder.transformer.h.12.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 3.0, 1.0, 1.0, 1.0, 2.0, 2.0, 5.0, 2.0, 3.0, 6.0, 6.0, 6.0, 11.0, 10.0, 9.0, 16.0, 15.0, 24.0, 39.0, 26.0, 34.0, 42.0, 34.0, 45.0, 52.0, 36.0, 49.0, 65.0, 39.0, 48.0, 45.0, 40.0, 42.0, 34.0, 32.0, 37.0, 30.0, 19.0, 24.0, 8.0, 17.0, 9.0, 16.0, 11.0, 4.0, 2.0, 6.0, 1.0, 3.0, 6.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 2.0], "bins": [-63.9375, -61.97314453125, -60.0087890625, -58.04443359375, -56.080078125, -54.11572265625, -52.1513671875, -50.18701171875, -48.22265625, -46.25830078125, -44.2939453125, -42.32958984375, -40.365234375, -38.40087890625, -36.4365234375, -34.47216796875, -32.5078125, -30.54345703125, -28.5791015625, -26.61474609375, -24.650390625, -22.68603515625, -20.7216796875, -18.75732421875, -16.79296875, -14.82861328125, -12.8642578125, -10.89990234375, -8.935546875, -6.97119140625, -5.0068359375, -3.04248046875, -1.078125, 0.88623046875, 2.8505859375, 4.81494140625, 6.779296875, 8.74365234375, 10.7080078125, 12.67236328125, 14.63671875, 16.60107421875, 18.5654296875, 20.52978515625, 22.494140625, 24.45849609375, 26.4228515625, 28.38720703125, 30.3515625, 32.31591796875, 34.2802734375, 36.24462890625, 38.208984375, 40.17333984375, 42.1376953125, 44.10205078125, 46.06640625, 48.03076171875, 49.9951171875, 51.95947265625, 53.923828125, 55.88818359375, 57.8525390625, 59.81689453125, 61.78125]}, "gradients/decoder.transformer.h.12.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 4.0, 2.0, 2.0, 6.0, 3.0, 6.0, 8.0, 10.0, 12.0, 13.0, 22.0, 20.0, 46.0, 43.0, 55.0, 80.0, 127.0, 167.0, 245.0, 348.0, 482.0, 836.0, 1648.0, 3322.0, 8077.0, 26375.0, 212184.0, 716748.0, 54750.0, 12860.0, 4740.0, 2154.0, 1130.0, 666.0, 413.0, 273.0, 184.0, 115.0, 95.0, 58.0, 59.0, 49.0, 39.0, 26.0, 11.0, 14.0, 12.0, 7.0, 9.0, 4.0, 2.0, 0.0, 4.0, 3.0, 1.0, 1.0, 1.0, 1.0, 2.0], "bins": [-59.65625, -57.8046875, -55.953125, -54.1015625, -52.25, -50.3984375, -48.546875, -46.6953125, -44.84375, -42.9921875, -41.140625, -39.2890625, -37.4375, -35.5859375, -33.734375, -31.8828125, -30.03125, -28.1796875, -26.328125, -24.4765625, -22.625, -20.7734375, -18.921875, -17.0703125, -15.21875, -13.3671875, -11.515625, -9.6640625, -7.8125, -5.9609375, -4.109375, -2.2578125, -0.40625, 1.4453125, 3.296875, 5.1484375, 7.0, 8.8515625, 10.703125, 12.5546875, 14.40625, 16.2578125, 18.109375, 19.9609375, 21.8125, 23.6640625, 25.515625, 27.3671875, 29.21875, 31.0703125, 32.921875, 34.7734375, 36.625, 38.4765625, 40.328125, 42.1796875, 44.03125, 45.8828125, 47.734375, 49.5859375, 51.4375, 53.2890625, 55.140625, 56.9921875, 58.84375]}, "gradients/decoder.transformer.h.12.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0, 2.0, 0.0, 5.0, 1.0, 3.0, 5.0, 5.0, 4.0, 7.0, 11.0, 9.0, 18.0, 23.0, 19.0, 15.0, 22.0, 31.0, 25.0, 27.0, 32.0, 39.0, 38.0, 48.0, 74.0, 103.0, 301.0, 1698.0, 84.0, 53.0, 55.0, 38.0, 41.0, 30.0, 29.0, 28.0, 27.0, 18.0, 20.0, 8.0, 6.0, 19.0, 10.0, 8.0, 7.0, 0.0, 2.0, 3.0, 7.0, 5.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-215.625, -209.291015625, -202.95703125, -196.623046875, -190.2890625, -183.955078125, -177.62109375, -171.287109375, -164.953125, -158.619140625, -152.28515625, -145.951171875, -139.6171875, -133.283203125, -126.94921875, -120.615234375, -114.28125, -107.947265625, -101.61328125, -95.279296875, -88.9453125, -82.611328125, -76.27734375, -69.943359375, -63.609375, -57.275390625, -50.94140625, -44.607421875, -38.2734375, -31.939453125, -25.60546875, -19.271484375, -12.9375, -6.603515625, -0.26953125, 6.064453125, 12.3984375, 18.732421875, 25.06640625, 31.400390625, 37.734375, 44.068359375, 50.40234375, 56.736328125, 63.0703125, 69.404296875, 75.73828125, 82.072265625, 88.40625, 94.740234375, 101.07421875, 107.408203125, 113.7421875, 120.076171875, 126.41015625, 132.744140625, 139.078125, 145.412109375, 151.74609375, 158.080078125, 164.4140625, 170.748046875, 177.08203125, 183.416015625, 189.75]}, "gradients/decoder.transformer.h.12.attn.c_attn.weight": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 1.0, 2.0, 2.0, 3.0, 9.0, 4.0, 3.0, 3.0, 5.0, 10.0, 12.0, 18.0, 17.0, 17.0, 34.0, 48.0, 58.0, 69.0, 88.0, 126.0, 190.0, 402.0, 1252.0, 6707.0, 909151.0, 2218127.0, 6900.0, 1232.0, 375.0, 194.0, 148.0, 115.0, 93.0, 59.0, 52.0, 37.0, 25.0, 34.0, 25.0, 15.0, 12.0, 7.0, 10.0, 8.0, 4.0, 3.0, 5.0, 3.0, 1.0, 1.0, 2.0, 1.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-397.5, -383.30859375, -369.1171875, -354.92578125, -340.734375, -326.54296875, -312.3515625, -298.16015625, -283.96875, -269.77734375, -255.5859375, -241.39453125, -227.203125, -213.01171875, -198.8203125, -184.62890625, -170.4375, -156.24609375, -142.0546875, -127.86328125, -113.671875, -99.48046875, -85.2890625, -71.09765625, -56.90625, -42.71484375, -28.5234375, -14.33203125, -0.140625, 14.05078125, 28.2421875, 42.43359375, 56.625, 70.81640625, 85.0078125, 99.19921875, 113.390625, 127.58203125, 141.7734375, 155.96484375, 170.15625, 184.34765625, 198.5390625, 212.73046875, 226.921875, 241.11328125, 255.3046875, 269.49609375, 283.6875, 297.87890625, 312.0703125, 326.26171875, 340.453125, 354.64453125, 368.8359375, 383.02734375, 397.21875, 411.41015625, 425.6015625, 439.79296875, 453.984375, 468.17578125, 482.3671875, 496.55859375, 510.75]}, "gradients/decoder.transformer.h.12.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 10.0, 112.0, 656.0, 219.0, 20.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1361.9676513671875, -1330.0133056640625, -1298.058837890625, -1266.1044921875, -1234.1500244140625, -1202.1956787109375, -1170.2412109375, -1138.286865234375, -1106.33251953125, -1074.378173828125, -1042.4237060546875, -1010.4692993164062, -978.514892578125, -946.560546875, -914.6061401367188, -882.6517333984375, -850.697265625, -818.7428588867188, -786.7884521484375, -754.8340454101562, -722.879638671875, -690.92529296875, -658.9708862304688, -627.0164794921875, -595.0620727539062, -563.107666015625, -531.1532592773438, -499.1988830566406, -467.2444763183594, -435.2900695800781, -403.335693359375, -371.38128662109375, -339.42694091796875, -307.4725341796875, -275.51812744140625, -243.56375122070312, -211.60934448242188, -179.65493774414062, -147.70054626464844, -115.74615478515625, -83.791748046875, -51.83734893798828, -19.882949829101562, 12.071449279785156, 44.025848388671875, 75.98025512695312, 107.93464660644531, 139.8890380859375, 171.84344482421875, 203.7978515625, 235.7522430419922, 267.7066345214844, 299.6610412597656, 331.6154479980469, 363.56982421875, 395.52423095703125, 427.4786376953125, 459.43304443359375, 491.387451171875, 523.3418579101562, 555.2962646484375, 587.2506103515625, 619.2050170898438, 651.159423828125, 683.1138305664062]}, "gradients/decoder.transformer.h.12.ln_1.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 2.0, 2.0, 4.0, 8.0, 12.0, 13.0, 7.0, 16.0, 8.0, 15.0, 23.0, 21.0, 22.0, 26.0, 27.0, 38.0, 32.0, 43.0, 37.0, 43.0, 35.0, 39.0, 41.0, 36.0, 32.0, 36.0, 39.0, 42.0, 39.0, 30.0, 30.0, 37.0, 20.0, 33.0, 19.0, 15.0, 12.0, 14.0, 16.0, 12.0, 5.0, 7.0, 3.0, 3.0, 4.0, 8.0, 1.0, 4.0, 3.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-497.30914306640625, -481.456298828125, -465.60345458984375, -449.7505798339844, -433.8977355957031, -418.0448913574219, -402.1920166015625, -386.33917236328125, -370.486328125, -354.63348388671875, -338.7806396484375, -322.9277648925781, -307.0749206542969, -291.2220764160156, -275.36920166015625, -259.516357421875, -243.66351318359375, -227.8106689453125, -211.9578094482422, -196.10494995117188, -180.25210571289062, -164.39926147460938, -148.54640197753906, -132.69354248046875, -116.8406982421875, -100.98784637451172, -85.13499450683594, -69.28214263916016, -53.429290771484375, -37.576438903808594, -21.723587036132812, -5.870735168457031, 9.98211669921875, 25.83496856689453, 41.68782043457031, 57.540672302246094, 73.39352416992188, 89.24637603759766, 105.09922790527344, 120.95207977294922, 136.804931640625, 152.65777587890625, 168.51063537597656, 184.36349487304688, 200.21633911132812, 216.06918334960938, 231.9220428466797, 247.77490234375, 263.62774658203125, 279.4805908203125, 295.33343505859375, 311.1863098144531, 327.0391540527344, 342.8919982910156, 358.744873046875, 374.59771728515625, 390.4505615234375, 406.30340576171875, 422.15625, 438.0091247558594, 453.8619689941406, 469.7148132324219, 485.56768798828125, 501.4205322265625, 517.2733764648438]}, "gradients/decoder.transformer.h.11.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 1.0, 1.0, 5.0, 3.0, 1.0, 2.0, 3.0, 6.0, 12.0, 7.0, 12.0, 4.0, 16.0, 15.0, 18.0, 38.0, 22.0, 34.0, 42.0, 37.0, 45.0, 37.0, 52.0, 49.0, 46.0, 48.0, 54.0, 46.0, 42.0, 37.0, 37.0, 35.0, 26.0, 34.0, 30.0, 25.0, 11.0, 16.0, 9.0, 13.0, 10.0, 8.0, 9.0, 5.0, 1.0, 2.0, 6.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 1.0], "bins": [-64.625, -62.6064453125, -60.587890625, -58.5693359375, -56.55078125, -54.5322265625, -52.513671875, -50.4951171875, -48.4765625, -46.4580078125, -44.439453125, -42.4208984375, -40.40234375, -38.3837890625, -36.365234375, -34.3466796875, -32.328125, -30.3095703125, -28.291015625, -26.2724609375, -24.25390625, -22.2353515625, -20.216796875, -18.1982421875, -16.1796875, -14.1611328125, -12.142578125, -10.1240234375, -8.10546875, -6.0869140625, -4.068359375, -2.0498046875, -0.03125, 1.9873046875, 4.005859375, 6.0244140625, 8.04296875, 10.0615234375, 12.080078125, 14.0986328125, 16.1171875, 18.1357421875, 20.154296875, 22.1728515625, 24.19140625, 26.2099609375, 28.228515625, 30.2470703125, 32.265625, 34.2841796875, 36.302734375, 38.3212890625, 40.33984375, 42.3583984375, 44.376953125, 46.3955078125, 48.4140625, 50.4326171875, 52.451171875, 54.4697265625, 56.48828125, 58.5068359375, 60.525390625, 62.5439453125, 64.5625]}, "gradients/decoder.transformer.h.11.mlp.c_proj.weight": {"_type": "histogram", "values": [2.0, 2.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 3.0, 3.0, 4.0, 8.0, 10.0, 15.0, 16.0, 24.0, 43.0, 48.0, 68.0, 70.0, 115.0, 165.0, 235.0, 342.0, 456.0, 701.0, 1064.0, 1715.0, 2768.0, 4995.0, 9826.0, 27637.0, 227777.0, 2725481.0, 1070936.0, 84881.0, 16586.0, 7490.0, 4013.0, 2314.0, 1486.0, 910.0, 620.0, 430.0, 283.0, 212.0, 159.0, 105.0, 70.0, 53.0, 41.0, 36.0, 27.0, 11.0, 12.0, 7.0, 7.0, 5.0, 4.0, 0.0, 6.0, 2.0, 1.0, 1.0], "bins": [-124.0625, -120.3916015625, -116.720703125, -113.0498046875, -109.37890625, -105.7080078125, -102.037109375, -98.3662109375, -94.6953125, -91.0244140625, -87.353515625, -83.6826171875, -80.01171875, -76.3408203125, -72.669921875, -68.9990234375, -65.328125, -61.6572265625, -57.986328125, -54.3154296875, -50.64453125, -46.9736328125, -43.302734375, -39.6318359375, -35.9609375, -32.2900390625, -28.619140625, -24.9482421875, -21.27734375, -17.6064453125, -13.935546875, -10.2646484375, -6.59375, -2.9228515625, 0.748046875, 4.4189453125, 8.08984375, 11.7607421875, 15.431640625, 19.1025390625, 22.7734375, 26.4443359375, 30.115234375, 33.7861328125, 37.45703125, 41.1279296875, 44.798828125, 48.4697265625, 52.140625, 55.8115234375, 59.482421875, 63.1533203125, 66.82421875, 70.4951171875, 74.166015625, 77.8369140625, 81.5078125, 85.1787109375, 88.849609375, 92.5205078125, 96.19140625, 99.8623046875, 103.533203125, 107.2041015625, 110.875]}, "gradients/decoder.transformer.h.11.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 0.0, 2.0, 1.0, 3.0, 2.0, 5.0, 5.0, 7.0, 5.0, 7.0, 9.0, 17.0, 16.0, 31.0, 45.0, 74.0, 110.0, 145.0, 332.0, 751.0, 1151.0, 645.0, 290.0, 150.0, 93.0, 66.0, 30.0, 30.0, 16.0, 8.0, 15.0, 7.0, 7.0, 4.0, 0.0, 2.0, 3.0, 2.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-139.625, -134.876953125, -130.12890625, -125.380859375, -120.6328125, -115.884765625, -111.13671875, -106.388671875, -101.640625, -96.892578125, -92.14453125, -87.396484375, -82.6484375, -77.900390625, -73.15234375, -68.404296875, -63.65625, -58.908203125, -54.16015625, -49.412109375, -44.6640625, -39.916015625, -35.16796875, -30.419921875, -25.671875, -20.923828125, -16.17578125, -11.427734375, -6.6796875, -1.931640625, 2.81640625, 7.564453125, 12.3125, 17.060546875, 21.80859375, 26.556640625, 31.3046875, 36.052734375, 40.80078125, 45.548828125, 50.296875, 55.044921875, 59.79296875, 64.541015625, 69.2890625, 74.037109375, 78.78515625, 83.533203125, 88.28125, 93.029296875, 97.77734375, 102.525390625, 107.2734375, 112.021484375, 116.76953125, 121.517578125, 126.265625, 131.013671875, 135.76171875, 140.509765625, 145.2578125, 150.005859375, 154.75390625, 159.501953125, 164.25]}, "gradients/decoder.transformer.h.11.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 3.0, 4.0, 3.0, 1.0, 9.0, 6.0, 5.0, 9.0, 16.0, 22.0, 29.0, 31.0, 41.0, 58.0, 102.0, 134.0, 196.0, 334.0, 816.0, 2798.0, 15145.0, 270505.0, 3854799.0, 41241.0, 5353.0, 1249.0, 510.0, 269.0, 155.0, 112.0, 82.0, 53.0, 56.0, 34.0, 23.0, 22.0, 20.0, 12.0, 4.0, 10.0, 5.0, 10.0, 5.0, 2.0, 3.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-411.75, -397.16015625, -382.5703125, -367.98046875, -353.390625, -338.80078125, -324.2109375, -309.62109375, -295.03125, -280.44140625, -265.8515625, -251.26171875, -236.671875, -222.08203125, -207.4921875, -192.90234375, -178.3125, -163.72265625, -149.1328125, -134.54296875, -119.953125, -105.36328125, -90.7734375, -76.18359375, -61.59375, -47.00390625, -32.4140625, -17.82421875, -3.234375, 11.35546875, 25.9453125, 40.53515625, 55.125, 69.71484375, 84.3046875, 98.89453125, 113.484375, 128.07421875, 142.6640625, 157.25390625, 171.84375, 186.43359375, 201.0234375, 215.61328125, 230.203125, 244.79296875, 259.3828125, 273.97265625, 288.5625, 303.15234375, 317.7421875, 332.33203125, 346.921875, 361.51171875, 376.1015625, 390.69140625, 405.28125, 419.87109375, 434.4609375, 449.05078125, 463.640625, 478.23046875, 492.8203125, 507.41015625, 522.0]}, "gradients/decoder.transformer.h.11.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 8.0, 9.0, 25.0, 36.0, 115.0, 261.0, 298.0, 142.0, 69.0, 27.0, 8.0, 10.0, 2.0, 1.0, 1.0, 1.0, 2.0, 1.0, 0.0, 1.0], "bins": [-1765.814697265625, -1730.64111328125, -1695.467529296875, -1660.2939453125, -1625.120361328125, -1589.94677734375, -1554.773193359375, -1519.5994873046875, -1484.4259033203125, -1449.2523193359375, -1414.0787353515625, -1378.9051513671875, -1343.7315673828125, -1308.557861328125, -1273.38427734375, -1238.210693359375, -1203.037109375, -1167.863525390625, -1132.68994140625, -1097.516357421875, -1062.3427734375, -1027.169189453125, -991.9955444335938, -956.8219604492188, -921.6484375, -886.474853515625, -851.30126953125, -816.127685546875, -780.9540405273438, -745.7804565429688, -710.6068725585938, -675.4332885742188, -640.2595825195312, -605.0859985351562, -569.9124145507812, -534.73876953125, -499.565185546875, -464.3916015625, -429.218017578125, -394.04443359375, -358.8708190917969, -323.6972351074219, -288.52362060546875, -253.35003662109375, -218.1764373779297, -183.00283813476562, -147.82925415039062, -112.65565490722656, -77.4820556640625, -42.3084602355957, -7.134864807128906, 28.038726806640625, 63.21232604980469, 98.38592529296875, 133.55950927734375, 168.7331085205078, 203.90670776367188, 239.08030700683594, 274.25390625, 309.427490234375, 344.60107421875, 379.7746887207031, 414.9482727050781, 450.12188720703125, 485.29547119140625]}, "gradients/decoder.transformer.h.11.ln_2.bias": {"_type": "histogram", "values": [1.0, 1.0, 4.0, 3.0, 3.0, 4.0, 2.0, 5.0, 4.0, 4.0, 8.0, 15.0, 12.0, 7.0, 11.0, 14.0, 24.0, 22.0, 25.0, 25.0, 25.0, 33.0, 29.0, 36.0, 46.0, 51.0, 35.0, 45.0, 29.0, 29.0, 34.0, 39.0, 31.0, 32.0, 40.0, 25.0, 34.0, 31.0, 27.0, 20.0, 19.0, 21.0, 13.0, 16.0, 18.0, 11.0, 12.0, 10.0, 11.0, 8.0, 4.0, 3.0, 2.0, 2.0, 0.0, 2.0, 0.0, 3.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-269.7220764160156, -260.63629150390625, -251.55050659179688, -242.46470642089844, -233.37892150878906, -224.2931365966797, -215.20733642578125, -206.12155151367188, -197.0357666015625, -187.94998168945312, -178.86419677734375, -169.7783966064453, -160.69261169433594, -151.60682678222656, -142.52102661132812, -133.43524169921875, -124.34945678710938, -115.263671875, -106.1778793334961, -97.09208679199219, -88.00630187988281, -78.92051696777344, -69.83472442626953, -60.74893569946289, -51.66314697265625, -42.57735824584961, -33.49156951904297, -24.405780792236328, -15.319992065429688, -6.234203338623047, 2.8515853881835938, 11.937374114990234, 21.02313232421875, 30.10892105102539, 39.19470977783203, 48.28049850463867, 57.36628723144531, 66.45207214355469, 75.5378646850586, 84.6236572265625, 93.70944213867188, 102.79522705078125, 111.88101959228516, 120.96681213378906, 130.05259704589844, 139.1383819580078, 148.22418212890625, 157.30996704101562, 166.395751953125, 175.48153686523438, 184.56732177734375, 193.6531219482422, 202.73890686035156, 211.82469177246094, 220.91049194335938, 229.99627685546875, 239.08206176757812, 248.1678466796875, 257.2536315917969, 266.33941650390625, 275.42523193359375, 284.5110168457031, 293.5968017578125, 302.6825866699219, 311.76837158203125]}, "gradients/decoder.transformer.h.11.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 2.0, 3.0, 4.0, 6.0, 4.0, 1.0, 4.0, 12.0, 7.0, 23.0, 13.0, 22.0, 25.0, 25.0, 25.0, 41.0, 37.0, 36.0, 41.0, 54.0, 51.0, 58.0, 55.0, 59.0, 45.0, 50.0, 34.0, 46.0, 33.0, 31.0, 26.0, 25.0, 22.0, 17.0, 24.0, 10.0, 12.0, 8.0, 3.0, 11.0, 4.0, 2.0, 2.0, 4.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-67.0, -64.8623046875, -62.724609375, -60.5869140625, -58.44921875, -56.3115234375, -54.173828125, -52.0361328125, -49.8984375, -47.7607421875, -45.623046875, -43.4853515625, -41.34765625, -39.2099609375, -37.072265625, -34.9345703125, -32.796875, -30.6591796875, -28.521484375, -26.3837890625, -24.24609375, -22.1083984375, -19.970703125, -17.8330078125, -15.6953125, -13.5576171875, -11.419921875, -9.2822265625, -7.14453125, -5.0068359375, -2.869140625, -0.7314453125, 1.40625, 3.5439453125, 5.681640625, 7.8193359375, 9.95703125, 12.0947265625, 14.232421875, 16.3701171875, 18.5078125, 20.6455078125, 22.783203125, 24.9208984375, 27.05859375, 29.1962890625, 31.333984375, 33.4716796875, 35.609375, 37.7470703125, 39.884765625, 42.0224609375, 44.16015625, 46.2978515625, 48.435546875, 50.5732421875, 52.7109375, 54.8486328125, 56.986328125, 59.1240234375, 61.26171875, 63.3994140625, 65.537109375, 67.6748046875, 69.8125]}, "gradients/decoder.transformer.h.11.crossattention.c_proj.weight": {"_type": "histogram", "values": [3.0, 2.0, 4.0, 2.0, 3.0, 1.0, 3.0, 5.0, 7.0, 8.0, 15.0, 15.0, 23.0, 35.0, 50.0, 82.0, 112.0, 163.0, 253.0, 467.0, 791.0, 1512.0, 2894.0, 5985.0, 12692.0, 28451.0, 71052.0, 198245.0, 449582.0, 167468.0, 60850.0, 25317.0, 11303.0, 5318.0, 2586.0, 1349.0, 742.0, 423.0, 267.0, 147.0, 104.0, 61.0, 37.0, 52.0, 26.0, 16.0, 15.0, 9.0, 9.0, 5.0, 4.0, 4.0, 2.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-8.3203125, -8.027587890625, -7.73486328125, -7.442138671875, -7.1494140625, -6.856689453125, -6.56396484375, -6.271240234375, -5.978515625, -5.685791015625, -5.39306640625, -5.100341796875, -4.8076171875, -4.514892578125, -4.22216796875, -3.929443359375, -3.63671875, -3.343994140625, -3.05126953125, -2.758544921875, -2.4658203125, -2.173095703125, -1.88037109375, -1.587646484375, -1.294921875, -1.002197265625, -0.70947265625, -0.416748046875, -0.1240234375, 0.168701171875, 0.46142578125, 0.754150390625, 1.046875, 1.339599609375, 1.63232421875, 1.925048828125, 2.2177734375, 2.510498046875, 2.80322265625, 3.095947265625, 3.388671875, 3.681396484375, 3.97412109375, 4.266845703125, 4.5595703125, 4.852294921875, 5.14501953125, 5.437744140625, 5.73046875, 6.023193359375, 6.31591796875, 6.608642578125, 6.9013671875, 7.194091796875, 7.48681640625, 7.779541015625, 8.072265625, 8.364990234375, 8.65771484375, 8.950439453125, 9.2431640625, 9.535888671875, 9.82861328125, 10.121337890625, 10.4140625]}, "gradients/decoder.transformer.h.11.crossattention.c_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 1.0, 0.0, 2.0, 2.0, 5.0, 3.0, 10.0, 3.0, 11.0, 8.0, 12.0, 15.0, 10.0, 11.0, 17.0, 21.0, 14.0, 43.0, 23.0, 21.0, 42.0, 41.0, 38.0, 38.0, 38.0, 40.0, 1058.0, 43.0, 32.0, 49.0, 37.0, 40.0, 28.0, 34.0, 31.0, 26.0, 22.0, 20.0, 12.0, 27.0, 21.0, 21.0, 16.0, 15.0, 7.0, 6.0, 7.0, 2.0, 9.0, 1.0, 0.0, 4.0, 2.0, 2.0, 0.0, 2.0], "bins": [-39.75, -38.61083984375, -37.4716796875, -36.33251953125, -35.193359375, -34.05419921875, -32.9150390625, -31.77587890625, -30.63671875, -29.49755859375, -28.3583984375, -27.21923828125, -26.080078125, -24.94091796875, -23.8017578125, -22.66259765625, -21.5234375, -20.38427734375, -19.2451171875, -18.10595703125, -16.966796875, -15.82763671875, -14.6884765625, -13.54931640625, -12.41015625, -11.27099609375, -10.1318359375, -8.99267578125, -7.853515625, -6.71435546875, -5.5751953125, -4.43603515625, -3.296875, -2.15771484375, -1.0185546875, 0.12060546875, 1.259765625, 2.39892578125, 3.5380859375, 4.67724609375, 5.81640625, 6.95556640625, 8.0947265625, 9.23388671875, 10.373046875, 11.51220703125, 12.6513671875, 13.79052734375, 14.9296875, 16.06884765625, 17.2080078125, 18.34716796875, 19.486328125, 20.62548828125, 21.7646484375, 22.90380859375, 24.04296875, 25.18212890625, 26.3212890625, 27.46044921875, 28.599609375, 29.73876953125, 30.8779296875, 32.01708984375, 33.15625]}, "gradients/decoder.transformer.h.11.crossattention.c_attn.weight": {"_type": "histogram", "values": [2.0, 1.0, 3.0, 2.0, 2.0, 4.0, 8.0, 8.0, 9.0, 26.0, 33.0, 42.0, 42.0, 88.0, 126.0, 168.0, 310.0, 378.0, 606.0, 907.0, 1316.0, 2054.0, 3185.0, 4946.0, 7672.0, 12122.0, 19278.0, 31611.0, 54117.0, 96298.0, 183120.0, 1303952.0, 160863.0, 86433.0, 49405.0, 29213.0, 17764.0, 11090.0, 6808.0, 4460.0, 2902.0, 1929.0, 1231.0, 807.0, 585.0, 352.0, 292.0, 188.0, 121.0, 95.0, 48.0, 42.0, 24.0, 23.0, 15.0, 6.0, 4.0, 7.0, 2.0, 3.0, 1.0, 0.0, 1.0, 2.0], "bins": [-2.87890625, -2.787261962890625, -2.69561767578125, -2.603973388671875, -2.5123291015625, -2.420684814453125, -2.32904052734375, -2.237396240234375, -2.145751953125, -2.054107666015625, -1.96246337890625, -1.870819091796875, -1.7791748046875, -1.687530517578125, -1.59588623046875, -1.504241943359375, -1.41259765625, -1.320953369140625, -1.22930908203125, -1.137664794921875, -1.0460205078125, -0.954376220703125, -0.86273193359375, -0.771087646484375, -0.679443359375, -0.587799072265625, -0.49615478515625, -0.404510498046875, -0.3128662109375, -0.221221923828125, -0.12957763671875, -0.037933349609375, 0.0537109375, 0.145355224609375, 0.23699951171875, 0.328643798828125, 0.4202880859375, 0.511932373046875, 0.60357666015625, 0.695220947265625, 0.786865234375, 0.878509521484375, 0.97015380859375, 1.061798095703125, 1.1534423828125, 1.245086669921875, 1.33673095703125, 1.428375244140625, 1.52001953125, 1.611663818359375, 1.70330810546875, 1.794952392578125, 1.8865966796875, 1.978240966796875, 2.06988525390625, 2.161529541015625, 2.253173828125, 2.344818115234375, 2.43646240234375, 2.528106689453125, 2.6197509765625, 2.711395263671875, 2.80303955078125, 2.894683837890625, 2.986328125]}, "gradients/decoder.transformer.h.11.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 4.0, 1.0, 2.0, 2.0, 4.0, 7.0, 12.0, 8.0, 13.0, 16.0, 17.0, 13.0, 38.0, 42.0, 63.0, 102.0, 198.0, 167.0, 108.0, 57.0, 40.0, 33.0, 20.0, 7.0, 7.0, 9.0, 4.0, 2.0, 1.0, 4.0, 2.0, 1.0, 2.0, 1.0, 0.0, 2.0, 0.0, 1.0, 1.0, 0.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.4306640625, -0.41710662841796875, -0.4035491943359375, -0.38999176025390625, -0.376434326171875, -0.36287689208984375, -0.3493194580078125, -0.33576202392578125, -0.32220458984375, -0.30864715576171875, -0.2950897216796875, -0.28153228759765625, -0.267974853515625, -0.25441741943359375, -0.2408599853515625, -0.22730255126953125, -0.2137451171875, -0.20018768310546875, -0.1866302490234375, -0.17307281494140625, -0.159515380859375, -0.14595794677734375, -0.1324005126953125, -0.11884307861328125, -0.10528564453125, -0.09172821044921875, -0.0781707763671875, -0.06461334228515625, -0.051055908203125, -0.03749847412109375, -0.0239410400390625, -0.01038360595703125, 0.003173828125, 0.01673126220703125, 0.0302886962890625, 0.04384613037109375, 0.057403564453125, 0.07096099853515625, 0.0845184326171875, 0.09807586669921875, 0.11163330078125, 0.12519073486328125, 0.1387481689453125, 0.15230560302734375, 0.165863037109375, 0.17942047119140625, 0.1929779052734375, 0.20653533935546875, 0.2200927734375, 0.23365020751953125, 0.2472076416015625, 0.26076507568359375, 0.274322509765625, 0.28787994384765625, 0.3014373779296875, 0.31499481201171875, 0.32855224609375, 0.34210968017578125, 0.3556671142578125, 0.36922454833984375, 0.382781982421875, 0.39633941650390625, 0.4098968505859375, 0.42345428466796875, 0.43701171875]}, "gradients/decoder.transformer.h.11.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 2.0, 2.0, 3.0, 2.0, 1.0, 3.0, 1.0, 9.0, 6.0, 11.0, 14.0, 34.0, 35.0, 69.0, 132.0, 523.0, 1047213.0, 212.0, 101.0, 52.0, 33.0, 21.0, 15.0, 16.0, 10.0, 12.0, 9.0, 6.0, 3.0, 2.0, 2.0, 1.0, 4.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-14.0546875, -13.6192626953125, -13.183837890625, -12.7484130859375, -12.31298828125, -11.8775634765625, -11.442138671875, -11.0067138671875, -10.5712890625, -10.1358642578125, -9.700439453125, -9.2650146484375, -8.82958984375, -8.3941650390625, -7.958740234375, -7.5233154296875, -7.087890625, -6.6524658203125, -6.217041015625, -5.7816162109375, -5.34619140625, -4.9107666015625, -4.475341796875, -4.0399169921875, -3.6044921875, -3.1690673828125, -2.733642578125, -2.2982177734375, -1.86279296875, -1.4273681640625, -0.991943359375, -0.5565185546875, -0.12109375, 0.3143310546875, 0.749755859375, 1.1851806640625, 1.62060546875, 2.0560302734375, 2.491455078125, 2.9268798828125, 3.3623046875, 3.7977294921875, 4.233154296875, 4.6685791015625, 5.10400390625, 5.5394287109375, 5.974853515625, 6.4102783203125, 6.845703125, 7.2811279296875, 7.716552734375, 8.1519775390625, 8.58740234375, 9.0228271484375, 9.458251953125, 9.8936767578125, 10.3291015625, 10.7645263671875, 11.199951171875, 11.6353759765625, 12.07080078125, 12.5062255859375, 12.941650390625, 13.3770751953125, 13.8125]}, "gradients/decoder.transformer.h.11.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 11.0, 980.0, 28.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.36365607380867004, -0.28428715467453003, -0.20491823554039001, -0.12554931640625, -0.046180397272109985, 0.03318852186203003, 0.11255744099617004, 0.19192633032798767, 0.2712952792644501, 0.3506641983985901, 0.4300331175327301, 0.5094020366668701, 0.5887709856033325, 0.6681398749351501, 0.7475087642669678, 0.8268777132034302, 0.9062466621398926, 0.985615611076355, 1.0649845600128174, 1.1443533897399902, 1.2237223386764526, 1.303091287612915, 1.382460117340088, 1.4618290662765503, 1.5411980152130127, 1.620566964149475, 1.6999359130859375, 1.7793047428131104, 1.8586736917495728, 1.9380426406860352, 2.017411470413208, 2.096780300140381, 2.176149368286133, 2.2555181980133057, 2.3348872661590576, 2.4142560958862305, 2.4936251640319824, 2.5729939937591553, 2.652362823486328, 2.73173189163208, 2.811100721359253, 2.890469551086426, 2.9698386192321777, 3.0492074489593506, 3.1285762786865234, 3.2079453468322754, 3.2873141765594482, 3.366683006286621, 3.446052074432373, 3.525420904159546, 3.604789972305298, 3.6841588020324707, 3.7635278701782227, 3.8428966999053955, 3.9222655296325684, 4.00163459777832, 4.081003189086914, 4.160372257232666, 4.23974084854126, 4.319109916687012, 4.398478984832764, 4.477847576141357, 4.557216644287109, 4.636585712432861, 4.715954780578613]}, "gradients/decoder.transformer.h.11.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 3.0, 2.0, 4.0, 4.0, 2.0, 4.0, 5.0, 5.0, 10.0, 13.0, 16.0, 16.0, 20.0, 20.0, 25.0, 32.0, 33.0, 42.0, 27.0, 37.0, 43.0, 40.0, 36.0, 38.0, 42.0, 42.0, 54.0, 47.0, 42.0, 41.0, 33.0, 38.0, 27.0, 25.0, 30.0, 16.0, 17.0, 18.0, 17.0, 10.0, 5.0, 8.0, 5.0, 8.0, 5.0, 4.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.22160261869430542, -0.2148842215538025, -0.20816582441329956, -0.20144742727279663, -0.1947290301322937, -0.18801063299179077, -0.18129223585128784, -0.1745738387107849, -0.16785544157028198, -0.16113704442977905, -0.15441864728927612, -0.1477002501487732, -0.14098185300827026, -0.13426345586776733, -0.1275450587272644, -0.12082665413618088, -0.11410824954509735, -0.10738985240459442, -0.10067145526409149, -0.09395305812358856, -0.08723466098308563, -0.0805162638425827, -0.07379785925149918, -0.06707946211099625, -0.06036106497049332, -0.05364266782999039, -0.04692427068948746, -0.04020586982369423, -0.0334874726831913, -0.02676907554268837, -0.02005067467689514, -0.013332277536392212, -0.006613880395889282, 0.00010451767593622208, 0.006822915747761726, 0.013541314750909805, 0.020259711891412735, 0.026978109031915665, 0.03369650989770889, 0.04041490703821182, 0.04713330417871475, 0.05385170131921768, 0.06057009845972061, 0.06728850305080414, 0.07400690019130707, 0.08072529733181, 0.08744369447231293, 0.09416209161281586, 0.10088048875331879, 0.10759888589382172, 0.11431728303432465, 0.12103568017482758, 0.1277540773153305, 0.13447247445583344, 0.14119088649749756, 0.1479092836380005, 0.15462768077850342, 0.16134607791900635, 0.16806447505950928, 0.1747828722000122, 0.18150126934051514, 0.18821966648101807, 0.194938063621521, 0.20165646076202393, 0.20837485790252686]}, "gradients/decoder.transformer.h.11.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 2.0, 3.0, 4.0, 6.0, 4.0, 1.0, 4.0, 12.0, 7.0, 23.0, 13.0, 22.0, 25.0, 25.0, 25.0, 41.0, 37.0, 36.0, 41.0, 54.0, 51.0, 58.0, 55.0, 59.0, 45.0, 50.0, 34.0, 47.0, 32.0, 31.0, 26.0, 25.0, 22.0, 17.0, 24.0, 10.0, 12.0, 8.0, 3.0, 11.0, 4.0, 2.0, 2.0, 4.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-67.0, -64.8623046875, -62.724609375, -60.5869140625, -58.44921875, -56.3115234375, -54.173828125, -52.0361328125, -49.8984375, -47.7607421875, -45.623046875, -43.4853515625, -41.34765625, -39.2099609375, -37.072265625, -34.9345703125, -32.796875, -30.6591796875, -28.521484375, -26.3837890625, -24.24609375, -22.1083984375, -19.970703125, -17.8330078125, -15.6953125, -13.5576171875, -11.419921875, -9.2822265625, -7.14453125, -5.0068359375, -2.869140625, -0.7314453125, 1.40625, 3.5439453125, 5.681640625, 7.8193359375, 9.95703125, 12.0947265625, 14.232421875, 16.3701171875, 18.5078125, 20.6455078125, 22.783203125, 24.9208984375, 27.05859375, 29.1962890625, 31.333984375, 33.4716796875, 35.609375, 37.7470703125, 39.884765625, 42.0224609375, 44.16015625, 46.2978515625, 48.435546875, 50.5732421875, 52.7109375, 54.8486328125, 56.986328125, 59.1240234375, 61.26171875, 63.3994140625, 65.537109375, 67.6748046875, 69.8125]}, "gradients/decoder.transformer.h.11.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 1.0, 2.0, 1.0, 4.0, 6.0, 9.0, 10.0, 13.0, 17.0, 25.0, 43.0, 50.0, 69.0, 101.0, 143.0, 222.0, 282.0, 437.0, 706.0, 1043.0, 1713.0, 3013.0, 6548.0, 36024.0, 873632.0, 104686.0, 10285.0, 3679.0, 2107.0, 1258.0, 775.0, 528.0, 337.0, 240.0, 154.0, 120.0, 65.0, 60.0, 45.0, 34.0, 23.0, 14.0, 8.0, 13.0, 6.0, 6.0, 2.0, 4.0, 2.0, 0.0, 3.0, 1.0, 1.0], "bins": [-141.0, -137.0478515625, -133.095703125, -129.1435546875, -125.19140625, -121.2392578125, -117.287109375, -113.3349609375, -109.3828125, -105.4306640625, -101.478515625, -97.5263671875, -93.57421875, -89.6220703125, -85.669921875, -81.7177734375, -77.765625, -73.8134765625, -69.861328125, -65.9091796875, -61.95703125, -58.0048828125, -54.052734375, -50.1005859375, -46.1484375, -42.1962890625, -38.244140625, -34.2919921875, -30.33984375, -26.3876953125, -22.435546875, -18.4833984375, -14.53125, -10.5791015625, -6.626953125, -2.6748046875, 1.27734375, 5.2294921875, 9.181640625, 13.1337890625, 17.0859375, 21.0380859375, 24.990234375, 28.9423828125, 32.89453125, 36.8466796875, 40.798828125, 44.7509765625, 48.703125, 52.6552734375, 56.607421875, 60.5595703125, 64.51171875, 68.4638671875, 72.416015625, 76.3681640625, 80.3203125, 84.2724609375, 88.224609375, 92.1767578125, 96.12890625, 100.0810546875, 104.033203125, 107.9853515625, 111.9375]}, "gradients/decoder.transformer.h.11.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 4.0, 5.0, 3.0, 4.0, 5.0, 5.0, 8.0, 7.0, 7.0, 20.0, 16.0, 20.0, 26.0, 26.0, 28.0, 38.0, 40.0, 34.0, 53.0, 55.0, 63.0, 138.0, 1753.0, 191.0, 96.0, 49.0, 50.0, 48.0, 37.0, 31.0, 32.0, 36.0, 25.0, 18.0, 11.0, 11.0, 15.0, 5.0, 12.0, 10.0, 4.0, 7.0, 3.0, 3.0, 4.0, 5.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0], "bins": [-192.375, -185.669921875, -178.96484375, -172.259765625, -165.5546875, -158.849609375, -152.14453125, -145.439453125, -138.734375, -132.029296875, -125.32421875, -118.619140625, -111.9140625, -105.208984375, -98.50390625, -91.798828125, -85.09375, -78.388671875, -71.68359375, -64.978515625, -58.2734375, -51.568359375, -44.86328125, -38.158203125, -31.453125, -24.748046875, -18.04296875, -11.337890625, -4.6328125, 2.072265625, 8.77734375, 15.482421875, 22.1875, 28.892578125, 35.59765625, 42.302734375, 49.0078125, 55.712890625, 62.41796875, 69.123046875, 75.828125, 82.533203125, 89.23828125, 95.943359375, 102.6484375, 109.353515625, 116.05859375, 122.763671875, 129.46875, 136.173828125, 142.87890625, 149.583984375, 156.2890625, 162.994140625, 169.69921875, 176.404296875, 183.109375, 189.814453125, 196.51953125, 203.224609375, 209.9296875, 216.634765625, 223.33984375, 230.044921875, 236.75]}, "gradients/decoder.transformer.h.11.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 3.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 11.0, 3.0, 4.0, 5.0, 15.0, 8.0, 12.0, 14.0, 23.0, 22.0, 30.0, 52.0, 56.0, 85.0, 103.0, 115.0, 244.0, 465.0, 1714.0, 13386.0, 3027979.0, 95049.0, 4416.0, 891.0, 359.0, 188.0, 108.0, 103.0, 61.0, 49.0, 38.0, 31.0, 25.0, 15.0, 6.0, 5.0, 7.0, 5.0, 4.0, 4.0, 5.0, 1.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-569.0, -551.6328125, -534.265625, -516.8984375, -499.53125, -482.1640625, -464.796875, -447.4296875, -430.0625, -412.6953125, -395.328125, -377.9609375, -360.59375, -343.2265625, -325.859375, -308.4921875, -291.125, -273.7578125, -256.390625, -239.0234375, -221.65625, -204.2890625, -186.921875, -169.5546875, -152.1875, -134.8203125, -117.453125, -100.0859375, -82.71875, -65.3515625, -47.984375, -30.6171875, -13.25, 4.1171875, 21.484375, 38.8515625, 56.21875, 73.5859375, 90.953125, 108.3203125, 125.6875, 143.0546875, 160.421875, 177.7890625, 195.15625, 212.5234375, 229.890625, 247.2578125, 264.625, 281.9921875, 299.359375, 316.7265625, 334.09375, 351.4609375, 368.828125, 386.1953125, 403.5625, 420.9296875, 438.296875, 455.6640625, 473.03125, 490.3984375, 507.765625, 525.1328125, 542.5]}, "gradients/decoder.transformer.h.11.ln_1.weight": {"_type": "histogram", "values": [4.0, 168.0, 819.0, 28.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-206.09202575683594, -115.2530288696289, -24.414031982421875, 66.42497253417969, 157.2639617919922, 248.1029510498047, 338.94195556640625, 429.78094482421875, 520.6199340820312, 611.4589233398438, 702.2979125976562, 793.136962890625, 883.9759521484375, 974.81494140625, 1065.6539306640625, 1156.492919921875, 1247.3319091796875, 1338.1708984375, 1429.0098876953125, 1519.848876953125, 1610.6878662109375, 1701.52685546875, 1792.365966796875, 1883.204833984375, 1974.0439453125, 2064.883056640625, 2155.721923828125, 2246.56103515625, 2337.39990234375, 2428.239013671875, 2519.077880859375, 2609.9169921875, 2700.756103515625, 2791.59521484375, 2882.43408203125, 2973.273193359375, 3064.112060546875, 3154.951171875, 3245.7900390625, 3336.629150390625, 3427.468017578125, 3518.30712890625, 3609.14599609375, 3699.985107421875, 3790.823974609375, 3881.6630859375, 3972.501953125, 4063.341064453125, 4154.18017578125, 4245.01904296875, 4335.8583984375, 4426.697265625, 4517.5361328125, 4608.375, 4699.21435546875, 4790.05322265625, 4880.89208984375, 4971.73095703125, 5062.5703125, 5153.4091796875, 5244.248046875, 5335.0869140625, 5425.92626953125, 5516.76513671875, 5607.60400390625]}, "gradients/decoder.transformer.h.11.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 3.0, 4.0, 3.0, 2.0, 5.0, 7.0, 5.0, 11.0, 3.0, 9.0, 12.0, 14.0, 12.0, 26.0, 22.0, 20.0, 20.0, 16.0, 23.0, 25.0, 28.0, 27.0, 36.0, 30.0, 40.0, 38.0, 33.0, 43.0, 32.0, 47.0, 30.0, 39.0, 27.0, 29.0, 35.0, 31.0, 33.0, 23.0, 24.0, 22.0, 14.0, 13.0, 8.0, 5.0, 19.0, 16.0, 6.0, 8.0, 9.0, 7.0, 7.0, 6.0, 4.0, 5.0, 0.0, 0.0, 2.0, 2.0], "bins": [-448.8605041503906, -435.37518310546875, -421.8898620605469, -408.404541015625, -394.9192199707031, -381.43389892578125, -367.9485778808594, -354.4632568359375, -340.9779357910156, -327.49261474609375, -314.0072937011719, -300.52197265625, -287.0366516113281, -273.55133056640625, -260.0660095214844, -246.5806884765625, -233.09535217285156, -219.6100311279297, -206.1247100830078, -192.63938903808594, -179.15406799316406, -165.66873168945312, -152.18341064453125, -138.69808959960938, -125.21277618408203, -111.72745513916016, -98.24213409423828, -84.75680541992188, -71.271484375, -57.786163330078125, -44.30084228515625, -30.815521240234375, -17.3302001953125, -3.8448781967163086, 9.640443801879883, 23.12576675415039, 36.611087799072266, 50.096412658691406, 63.58173370361328, 77.06705474853516, 90.55237579345703, 104.0376968383789, 117.52301788330078, 131.0083465576172, 144.49366760253906, 157.97898864746094, 171.4643096923828, 184.9496307373047, 198.43495178222656, 211.92027282714844, 225.4055938720703, 238.8909149169922, 252.37623596191406, 265.861572265625, 279.3468933105469, 292.83221435546875, 306.3175354003906, 319.8028564453125, 333.2881774902344, 346.77349853515625, 360.2588195800781, 373.744140625, 387.2294616699219, 400.71478271484375, 414.2001037597656]}, "gradients/decoder.transformer.h.10.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 4.0, 1.0, 3.0, 3.0, 4.0, 6.0, 5.0, 10.0, 12.0, 18.0, 19.0, 27.0, 19.0, 33.0, 23.0, 36.0, 34.0, 41.0, 46.0, 48.0, 50.0, 49.0, 55.0, 58.0, 60.0, 37.0, 33.0, 45.0, 28.0, 40.0, 26.0, 19.0, 20.0, 23.0, 18.0, 20.0, 12.0, 4.0, 8.0, 6.0, 6.0, 0.0, 2.0, 4.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-68.125, -65.9462890625, -63.767578125, -61.5888671875, -59.41015625, -57.2314453125, -55.052734375, -52.8740234375, -50.6953125, -48.5166015625, -46.337890625, -44.1591796875, -41.98046875, -39.8017578125, -37.623046875, -35.4443359375, -33.265625, -31.0869140625, -28.908203125, -26.7294921875, -24.55078125, -22.3720703125, -20.193359375, -18.0146484375, -15.8359375, -13.6572265625, -11.478515625, -9.2998046875, -7.12109375, -4.9423828125, -2.763671875, -0.5849609375, 1.59375, 3.7724609375, 5.951171875, 8.1298828125, 10.30859375, 12.4873046875, 14.666015625, 16.8447265625, 19.0234375, 21.2021484375, 23.380859375, 25.5595703125, 27.73828125, 29.9169921875, 32.095703125, 34.2744140625, 36.453125, 38.6318359375, 40.810546875, 42.9892578125, 45.16796875, 47.3466796875, 49.525390625, 51.7041015625, 53.8828125, 56.0615234375, 58.240234375, 60.4189453125, 62.59765625, 64.7763671875, 66.955078125, 69.1337890625, 71.3125]}, "gradients/decoder.transformer.h.10.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 4.0, 4.0, 6.0, 4.0, 10.0, 15.0, 20.0, 21.0, 38.0, 51.0, 61.0, 91.0, 127.0, 186.0, 336.0, 537.0, 980.0, 2185.0, 5374.0, 15777.0, 113178.0, 3575949.0, 440036.0, 25354.0, 7750.0, 2972.0, 1390.0, 656.0, 394.0, 247.0, 149.0, 110.0, 80.0, 54.0, 40.0, 28.0, 20.0, 15.0, 8.0, 11.0, 7.0, 6.0, 4.0, 2.0, 2.0, 1.0, 2.0, 1.0, 2.0, 2.0], "bins": [-233.75, -227.197265625, -220.64453125, -214.091796875, -207.5390625, -200.986328125, -194.43359375, -187.880859375, -181.328125, -174.775390625, -168.22265625, -161.669921875, -155.1171875, -148.564453125, -142.01171875, -135.458984375, -128.90625, -122.353515625, -115.80078125, -109.248046875, -102.6953125, -96.142578125, -89.58984375, -83.037109375, -76.484375, -69.931640625, -63.37890625, -56.826171875, -50.2734375, -43.720703125, -37.16796875, -30.615234375, -24.0625, -17.509765625, -10.95703125, -4.404296875, 2.1484375, 8.701171875, 15.25390625, 21.806640625, 28.359375, 34.912109375, 41.46484375, 48.017578125, 54.5703125, 61.123046875, 67.67578125, 74.228515625, 80.78125, 87.333984375, 93.88671875, 100.439453125, 106.9921875, 113.544921875, 120.09765625, 126.650390625, 133.203125, 139.755859375, 146.30859375, 152.861328125, 159.4140625, 165.966796875, 172.51953125, 179.072265625, 185.625]}, "gradients/decoder.transformer.h.10.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0, 1.0, 1.0, 3.0, 5.0, 2.0, 8.0, 7.0, 5.0, 19.0, 24.0, 33.0, 59.0, 87.0, 160.0, 339.0, 739.0, 1230.0, 658.0, 289.0, 168.0, 93.0, 34.0, 33.0, 19.0, 15.0, 11.0, 8.0, 5.0, 6.0, 10.0, 4.0, 2.0, 3.0, 3.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-142.875, -137.47265625, -132.0703125, -126.66796875, -121.265625, -115.86328125, -110.4609375, -105.05859375, -99.65625, -94.25390625, -88.8515625, -83.44921875, -78.046875, -72.64453125, -67.2421875, -61.83984375, -56.4375, -51.03515625, -45.6328125, -40.23046875, -34.828125, -29.42578125, -24.0234375, -18.62109375, -13.21875, -7.81640625, -2.4140625, 2.98828125, 8.390625, 13.79296875, 19.1953125, 24.59765625, 30.0, 35.40234375, 40.8046875, 46.20703125, 51.609375, 57.01171875, 62.4140625, 67.81640625, 73.21875, 78.62109375, 84.0234375, 89.42578125, 94.828125, 100.23046875, 105.6328125, 111.03515625, 116.4375, 121.83984375, 127.2421875, 132.64453125, 138.046875, 143.44921875, 148.8515625, 154.25390625, 159.65625, 165.05859375, 170.4609375, 175.86328125, 181.265625, 186.66796875, 192.0703125, 197.47265625, 202.875]}, "gradients/decoder.transformer.h.10.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 3.0, 7.0, 6.0, 11.0, 9.0, 12.0, 13.0, 27.0, 30.0, 41.0, 41.0, 66.0, 118.0, 132.0, 233.0, 428.0, 985.0, 3123.0, 15538.0, 254604.0, 3865870.0, 43337.0, 6298.0, 1695.0, 662.0, 336.0, 205.0, 125.0, 84.0, 71.0, 39.0, 36.0, 29.0, 17.0, 13.0, 17.0, 6.0, 7.0, 7.0, 1.0, 4.0, 0.0, 2.0, 1.0, 1.0, 2.0, 1.0, 0.0, 1.0], "bins": [-544.5, -529.046875, -513.59375, -498.140625, -482.6875, -467.234375, -451.78125, -436.328125, -420.875, -405.421875, -389.96875, -374.515625, -359.0625, -343.609375, -328.15625, -312.703125, -297.25, -281.796875, -266.34375, -250.890625, -235.4375, -219.984375, -204.53125, -189.078125, -173.625, -158.171875, -142.71875, -127.265625, -111.8125, -96.359375, -80.90625, -65.453125, -50.0, -34.546875, -19.09375, -3.640625, 11.8125, 27.265625, 42.71875, 58.171875, 73.625, 89.078125, 104.53125, 119.984375, 135.4375, 150.890625, 166.34375, 181.796875, 197.25, 212.703125, 228.15625, 243.609375, 259.0625, 274.515625, 289.96875, 305.421875, 320.875, 336.328125, 351.78125, 367.234375, 382.6875, 398.140625, 413.59375, 429.046875, 444.5]}, "gradients/decoder.transformer.h.10.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 4.0, 2.0, 7.0, 10.0, 11.0, 20.0, 27.0, 54.0, 76.0, 147.0, 183.0, 143.0, 125.0, 76.0, 45.0, 29.0, 20.0, 10.0, 7.0, 8.0, 3.0, 5.0, 1.0, 2.0, 0.0, 1.0, 1.0, 2.0], "bins": [-1035.982177734375, -1013.8694458007812, -991.7567138671875, -969.6439208984375, -947.5311889648438, -925.41845703125, -903.3056640625, -881.1929321289062, -859.0802001953125, -836.9674682617188, -814.854736328125, -792.741943359375, -770.6292114257812, -748.5164794921875, -726.4036865234375, -704.2909545898438, -682.17822265625, -660.0654907226562, -637.9527587890625, -615.8399658203125, -593.7272338867188, -571.614501953125, -549.501708984375, -527.3889770507812, -505.2762451171875, -483.16351318359375, -461.0507507324219, -438.93798828125, -416.82525634765625, -394.7125244140625, -372.5997619628906, -350.48699951171875, -328.37420654296875, -306.261474609375, -284.1487121582031, -262.03594970703125, -239.9232177734375, -217.8104705810547, -195.69772338867188, -173.58497619628906, -151.47222900390625, -129.35948181152344, -107.24673461914062, -85.13398742675781, -63.021240234375, -40.90849304199219, -18.795745849609375, 3.3170013427734375, 25.42974853515625, 47.54249572753906, 69.65524291992188, 91.76799011230469, 113.8807373046875, 135.9934844970703, 158.10623168945312, 180.21897888183594, 202.33172607421875, 224.44447326660156, 246.55722045898438, 268.66998291015625, 290.78271484375, 312.89544677734375, 335.0082092285156, 357.1209716796875, 379.23370361328125]}, "gradients/decoder.transformer.h.10.ln_2.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 2.0, 0.0, 2.0, 3.0, 2.0, 0.0, 2.0, 2.0, 4.0, 2.0, 5.0, 3.0, 5.0, 13.0, 7.0, 9.0, 9.0, 16.0, 24.0, 20.0, 21.0, 28.0, 29.0, 30.0, 44.0, 36.0, 33.0, 37.0, 52.0, 39.0, 45.0, 44.0, 46.0, 26.0, 41.0, 22.0, 34.0, 27.0, 25.0, 24.0, 30.0, 25.0, 22.0, 26.0, 22.0, 17.0, 11.0, 12.0, 7.0, 7.0, 8.0, 4.0, 5.0, 3.0, 4.0, 3.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-360.80126953125, -350.17242431640625, -339.5435485839844, -328.9147033691406, -318.2858581542969, -307.6570129394531, -297.02813720703125, -286.3992919921875, -275.77044677734375, -265.1416015625, -254.5127410888672, -243.88388061523438, -233.25503540039062, -222.6261749267578, -211.997314453125, -201.36846923828125, -190.73960876464844, -180.11074829101562, -169.48190307617188, -158.85304260253906, -148.2241973876953, -137.5953369140625, -126.96648406982422, -116.33763122558594, -105.70877838134766, -95.07992553710938, -84.4510726928711, -73.82221984863281, -63.193363189697266, -52.564510345458984, -41.93565368652344, -31.306800842285156, -20.677947998046875, -10.049094200134277, 0.5797595977783203, 11.208614349365234, 21.837467193603516, 32.4663200378418, 43.095176696777344, 53.724029541015625, 64.3528823852539, 74.98173522949219, 85.61058807373047, 96.23944091796875, 106.86830139160156, 117.49714660644531, 128.12600708007812, 138.75485229492188, 149.3837127685547, 160.0125732421875, 170.64141845703125, 181.27027893066406, 191.8991241455078, 202.52798461914062, 213.15682983398438, 223.7856903076172, 234.41455078125, 245.0434112548828, 255.67225646972656, 266.3011169433594, 276.9299621582031, 287.5588073730469, 298.18768310546875, 308.8165283203125, 319.44537353515625]}, "gradients/decoder.transformer.h.10.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 2.0, 4.0, 3.0, 3.0, 5.0, 5.0, 6.0, 5.0, 12.0, 18.0, 25.0, 15.0, 27.0, 26.0, 24.0, 34.0, 36.0, 48.0, 41.0, 55.0, 51.0, 47.0, 55.0, 49.0, 46.0, 41.0, 44.0, 46.0, 39.0, 31.0, 22.0, 23.0, 24.0, 22.0, 15.0, 14.0, 15.0, 11.0, 5.0, 4.0, 7.0, 3.0, 2.0, 2.0, 0.0, 0.0, 2.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-65.875, -63.7060546875, -61.537109375, -59.3681640625, -57.19921875, -55.0302734375, -52.861328125, -50.6923828125, -48.5234375, -46.3544921875, -44.185546875, -42.0166015625, -39.84765625, -37.6787109375, -35.509765625, -33.3408203125, -31.171875, -29.0029296875, -26.833984375, -24.6650390625, -22.49609375, -20.3271484375, -18.158203125, -15.9892578125, -13.8203125, -11.6513671875, -9.482421875, -7.3134765625, -5.14453125, -2.9755859375, -0.806640625, 1.3623046875, 3.53125, 5.7001953125, 7.869140625, 10.0380859375, 12.20703125, 14.3759765625, 16.544921875, 18.7138671875, 20.8828125, 23.0517578125, 25.220703125, 27.3896484375, 29.55859375, 31.7275390625, 33.896484375, 36.0654296875, 38.234375, 40.4033203125, 42.572265625, 44.7412109375, 46.91015625, 49.0791015625, 51.248046875, 53.4169921875, 55.5859375, 57.7548828125, 59.923828125, 62.0927734375, 64.26171875, 66.4306640625, 68.599609375, 70.7685546875, 72.9375]}, "gradients/decoder.transformer.h.10.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 2.0, 2.0, 4.0, 4.0, 7.0, 9.0, 9.0, 24.0, 24.0, 26.0, 61.0, 92.0, 116.0, 193.0, 307.0, 477.0, 745.0, 1216.0, 1931.0, 3237.0, 5497.0, 9337.0, 16141.0, 28378.0, 52331.0, 104004.0, 236664.0, 308312.0, 132980.0, 65083.0, 34317.0, 19188.0, 11218.0, 6611.0, 3922.0, 2278.0, 1408.0, 876.0, 530.0, 344.0, 222.0, 137.0, 104.0, 66.0, 43.0, 32.0, 17.0, 17.0, 11.0, 3.0, 7.0, 4.0, 0.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.96875, -5.7772216796875, -5.585693359375, -5.3941650390625, -5.20263671875, -5.0111083984375, -4.819580078125, -4.6280517578125, -4.4365234375, -4.2449951171875, -4.053466796875, -3.8619384765625, -3.67041015625, -3.4788818359375, -3.287353515625, -3.0958251953125, -2.904296875, -2.7127685546875, -2.521240234375, -2.3297119140625, -2.13818359375, -1.9466552734375, -1.755126953125, -1.5635986328125, -1.3720703125, -1.1805419921875, -0.989013671875, -0.7974853515625, -0.60595703125, -0.4144287109375, -0.222900390625, -0.0313720703125, 0.16015625, 0.3516845703125, 0.543212890625, 0.7347412109375, 0.92626953125, 1.1177978515625, 1.309326171875, 1.5008544921875, 1.6923828125, 1.8839111328125, 2.075439453125, 2.2669677734375, 2.45849609375, 2.6500244140625, 2.841552734375, 3.0330810546875, 3.224609375, 3.4161376953125, 3.607666015625, 3.7991943359375, 3.99072265625, 4.1822509765625, 4.373779296875, 4.5653076171875, 4.7568359375, 4.9483642578125, 5.139892578125, 5.3314208984375, 5.52294921875, 5.7144775390625, 5.906005859375, 6.0975341796875, 6.2890625]}, "gradients/decoder.transformer.h.10.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 2.0, 2.0, 0.0, 2.0, 6.0, 3.0, 6.0, 9.0, 2.0, 10.0, 9.0, 14.0, 11.0, 10.0, 16.0, 16.0, 22.0, 26.0, 26.0, 21.0, 26.0, 40.0, 20.0, 37.0, 40.0, 43.0, 44.0, 39.0, 1069.0, 26.0, 45.0, 36.0, 28.0, 38.0, 32.0, 37.0, 27.0, 27.0, 22.0, 20.0, 25.0, 21.0, 15.0, 9.0, 11.0, 12.0, 13.0, 7.0, 8.0, 2.0, 2.0, 2.0, 1.0, 2.0, 0.0, 1.0, 1.0, 0.0, 3.0, 1.0], "bins": [-37.0625, -35.91455078125, -34.7666015625, -33.61865234375, -32.470703125, -31.32275390625, -30.1748046875, -29.02685546875, -27.87890625, -26.73095703125, -25.5830078125, -24.43505859375, -23.287109375, -22.13916015625, -20.9912109375, -19.84326171875, -18.6953125, -17.54736328125, -16.3994140625, -15.25146484375, -14.103515625, -12.95556640625, -11.8076171875, -10.65966796875, -9.51171875, -8.36376953125, -7.2158203125, -6.06787109375, -4.919921875, -3.77197265625, -2.6240234375, -1.47607421875, -0.328125, 0.81982421875, 1.9677734375, 3.11572265625, 4.263671875, 5.41162109375, 6.5595703125, 7.70751953125, 8.85546875, 10.00341796875, 11.1513671875, 12.29931640625, 13.447265625, 14.59521484375, 15.7431640625, 16.89111328125, 18.0390625, 19.18701171875, 20.3349609375, 21.48291015625, 22.630859375, 23.77880859375, 24.9267578125, 26.07470703125, 27.22265625, 28.37060546875, 29.5185546875, 30.66650390625, 31.814453125, 32.96240234375, 34.1103515625, 35.25830078125, 36.40625]}, "gradients/decoder.transformer.h.10.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 5.0, 8.0, 8.0, 6.0, 23.0, 28.0, 31.0, 44.0, 65.0, 92.0, 149.0, 205.0, 293.0, 421.0, 658.0, 939.0, 1310.0, 1944.0, 2760.0, 4087.0, 6080.0, 8997.0, 13593.0, 20703.0, 32314.0, 51367.0, 82969.0, 137964.0, 1266599.0, 182550.0, 102953.0, 63368.0, 39496.0, 25479.0, 16042.0, 10892.0, 7141.0, 4886.0, 3365.0, 2276.0, 1534.0, 1085.0, 767.0, 497.0, 352.0, 224.0, 167.0, 143.0, 82.0, 57.0, 47.0, 29.0, 18.0, 16.0, 6.0, 4.0, 7.0, 1.0, 1.0, 0.0, 2.0], "bins": [-2.537109375, -2.457122802734375, -2.37713623046875, -2.297149658203125, -2.2171630859375, -2.137176513671875, -2.05718994140625, -1.977203369140625, -1.897216796875, -1.817230224609375, -1.73724365234375, -1.657257080078125, -1.5772705078125, -1.497283935546875, -1.41729736328125, -1.337310791015625, -1.25732421875, -1.177337646484375, -1.09735107421875, -1.017364501953125, -0.9373779296875, -0.857391357421875, -0.77740478515625, -0.697418212890625, -0.617431640625, -0.537445068359375, -0.45745849609375, -0.377471923828125, -0.2974853515625, -0.217498779296875, -0.13751220703125, -0.057525634765625, 0.0224609375, 0.102447509765625, 0.18243408203125, 0.262420654296875, 0.3424072265625, 0.422393798828125, 0.50238037109375, 0.582366943359375, 0.662353515625, 0.742340087890625, 0.82232666015625, 0.902313232421875, 0.9822998046875, 1.062286376953125, 1.14227294921875, 1.222259521484375, 1.30224609375, 1.382232666015625, 1.46221923828125, 1.542205810546875, 1.6221923828125, 1.702178955078125, 1.78216552734375, 1.862152099609375, 1.942138671875, 2.022125244140625, 2.10211181640625, 2.182098388671875, 2.2620849609375, 2.342071533203125, 2.42205810546875, 2.502044677734375, 2.58203125]}, "gradients/decoder.transformer.h.10.crossattention.q_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 2.0, 1.0, 0.0, 2.0, 2.0, 0.0, 1.0, 1.0, 3.0, 1.0, 2.0, 4.0, 5.0, 4.0, 8.0, 15.0, 13.0, 24.0, 39.0, 63.0, 129.0, 270.0, 184.0, 98.0, 57.0, 22.0, 22.0, 12.0, 7.0, 8.0, 5.0, 2.0, 3.0, 1.0, 1.0, 2.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.252197265625, -0.24341964721679688, -0.23464202880859375, -0.22586441040039062, -0.2170867919921875, -0.20830917358398438, -0.19953155517578125, -0.19075393676757812, -0.181976318359375, -0.17319869995117188, -0.16442108154296875, -0.15564346313476562, -0.1468658447265625, -0.13808822631835938, -0.12931060791015625, -0.12053298950195312, -0.11175537109375, -0.10297775268554688, -0.09420013427734375, -0.08542251586914062, -0.0766448974609375, -0.06786727905273438, -0.05908966064453125, -0.050312042236328125, -0.041534423828125, -0.032756805419921875, -0.02397918701171875, -0.015201568603515625, -0.0064239501953125, 0.002353668212890625, 0.01113128662109375, 0.019908905029296875, 0.0286865234375, 0.037464141845703125, 0.04624176025390625, 0.055019378662109375, 0.0637969970703125, 0.07257461547851562, 0.08135223388671875, 0.09012985229492188, 0.098907470703125, 0.10768508911132812, 0.11646270751953125, 0.12524032592773438, 0.1340179443359375, 0.14279556274414062, 0.15157318115234375, 0.16035079956054688, 0.16912841796875, 0.17790603637695312, 0.18668365478515625, 0.19546127319335938, 0.2042388916015625, 0.21301651000976562, 0.22179412841796875, 0.23057174682617188, 0.239349365234375, 0.24812698364257812, 0.25690460205078125, 0.2656822204589844, 0.2744598388671875, 0.2832374572753906, 0.29201507568359375, 0.3007926940917969, 0.3095703125]}, "gradients/decoder.transformer.h.10.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 2.0, 1.0, 1.0, 3.0, 0.0, 6.0, 4.0, 11.0, 10.0, 19.0, 30.0, 61.0, 126.0, 475.0, 1047395.0, 200.0, 83.0, 44.0, 31.0, 18.0, 12.0, 6.0, 4.0, 6.0, 4.0, 0.0, 1.0, 3.0, 2.0, 0.0, 0.0, 3.0, 1.0, 0.0, 1.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0], "bins": [-9.65625, -9.3824462890625, -9.108642578125, -8.8348388671875, -8.56103515625, -8.2872314453125, -8.013427734375, -7.7396240234375, -7.4658203125, -7.1920166015625, -6.918212890625, -6.6444091796875, -6.37060546875, -6.0968017578125, -5.822998046875, -5.5491943359375, -5.275390625, -5.0015869140625, -4.727783203125, -4.4539794921875, -4.18017578125, -3.9063720703125, -3.632568359375, -3.3587646484375, -3.0849609375, -2.8111572265625, -2.537353515625, -2.2635498046875, -1.98974609375, -1.7159423828125, -1.442138671875, -1.1683349609375, -0.89453125, -0.6207275390625, -0.346923828125, -0.0731201171875, 0.20068359375, 0.4744873046875, 0.748291015625, 1.0220947265625, 1.2958984375, 1.5697021484375, 1.843505859375, 2.1173095703125, 2.39111328125, 2.6649169921875, 2.938720703125, 3.2125244140625, 3.486328125, 3.7601318359375, 4.033935546875, 4.3077392578125, 4.58154296875, 4.8553466796875, 5.129150390625, 5.4029541015625, 5.6767578125, 5.9505615234375, 6.224365234375, 6.4981689453125, 6.77197265625, 7.0457763671875, 7.319580078125, 7.5933837890625, 7.8671875]}, "gradients/decoder.transformer.h.10.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 499.0, 519.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.1394769251346588, -0.09296469390392303, -0.04645247012376785, 5.97536563873291e-05, 0.04657198488712311, 0.09308421611785889, 0.13959643244743347, 0.18610867857933044, 0.23262089490890503, 0.2791331112384796, 0.3256453573703766, 0.37215757369995117, 0.41866981983184814, 0.46518203616142273, 0.5116942524909973, 0.5582064986228943, 0.6047186851501465, 0.6512309312820435, 0.6977431178092957, 0.7442553639411926, 0.7907676100730896, 0.8372797966003418, 0.8837920427322388, 0.9303042888641357, 0.9768165349960327, 1.0233287811279297, 1.0698410272598267, 1.1163532733917236, 1.162865400314331, 1.209377646446228, 1.255889892578125, 1.302402138710022, 1.3489142656326294, 1.3954265117645264, 1.4419387578964233, 1.4884510040283203, 1.5349631309509277, 1.5814753770828247, 1.6279876232147217, 1.6744998693466187, 1.7210121154785156, 1.7675243616104126, 1.8140366077423096, 1.860548734664917, 1.907060980796814, 1.953573226928711, 2.0000853538513184, 2.046597719192505, 2.0931098461151123, 2.1396219730377197, 2.1861343383789062, 2.2326464653015137, 2.2791588306427, 2.3256709575653076, 2.372183322906494, 2.4186954498291016, 2.465207815170288, 2.5117199420928955, 2.558232307434082, 2.6047444343566895, 2.651256799697876, 2.6977689266204834, 2.74428129196167, 2.7907934188842773, 2.8373055458068848]}, "gradients/decoder.transformer.h.10.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 1.0, 3.0, 4.0, 2.0, 1.0, 2.0, 4.0, 7.0, 4.0, 8.0, 9.0, 13.0, 9.0, 16.0, 18.0, 16.0, 16.0, 23.0, 23.0, 26.0, 39.0, 28.0, 36.0, 37.0, 33.0, 38.0, 39.0, 48.0, 37.0, 29.0, 39.0, 31.0, 47.0, 28.0, 30.0, 34.0, 33.0, 36.0, 23.0, 22.0, 16.0, 20.0, 14.0, 13.0, 11.0, 9.0, 11.0, 8.0, 6.0, 2.0, 4.0, 1.0, 5.0, 2.0, 1.0, 0.0, 0.0, 2.0, 2.0, 0.0, 1.0], "bins": [-0.10227209329605103, -0.0990079864859581, -0.09574387967586517, -0.09247976541519165, -0.08921565860509872, -0.0859515517950058, -0.08268743753433228, -0.07942333072423935, -0.07615922391414642, -0.0728951171040535, -0.06963101029396057, -0.06636689603328705, -0.06310278922319412, -0.059838682413101196, -0.05657457187771797, -0.05331046134233475, -0.05004635453224182, -0.046782247722148895, -0.04351813718676567, -0.040254026651382446, -0.03698991984128952, -0.033725813031196594, -0.03046170249581337, -0.027197593823075294, -0.02393348515033722, -0.020669376477599144, -0.01740526780486107, -0.014141159132122993, -0.010877050459384918, -0.007612941786646843, -0.004348833113908768, -0.0010847244411706924, 0.002179384231567383, 0.005443492904305458, 0.008707601577043533, 0.011971710249781609, 0.015235818922519684, 0.01849992759525776, 0.021764036267995834, 0.02502814494073391, 0.028292253613471985, 0.03155636042356491, 0.034820470958948135, 0.03808458149433136, 0.041348688304424286, 0.04461279511451721, 0.047876905649900436, 0.05114101618528366, 0.05440512299537659, 0.05766922980546951, 0.06093334034085274, 0.06419745087623596, 0.06746155768632889, 0.07072566449642181, 0.07398977875709534, 0.07725388556718826, 0.08051799237728119, 0.08378209918737411, 0.08704620599746704, 0.09031032025814056, 0.09357442706823349, 0.09683853387832642, 0.10010264813899994, 0.10336675494909286, 0.10663086175918579]}, "gradients/decoder.transformer.h.10.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 2.0, 4.0, 3.0, 3.0, 5.0, 5.0, 6.0, 5.0, 12.0, 18.0, 25.0, 15.0, 27.0, 26.0, 24.0, 34.0, 35.0, 49.0, 41.0, 55.0, 51.0, 47.0, 55.0, 48.0, 47.0, 41.0, 44.0, 46.0, 39.0, 31.0, 22.0, 23.0, 24.0, 22.0, 15.0, 14.0, 15.0, 11.0, 5.0, 4.0, 7.0, 3.0, 2.0, 2.0, 0.0, 0.0, 2.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-65.875, -63.7060546875, -61.537109375, -59.3681640625, -57.19921875, -55.0302734375, -52.861328125, -50.6923828125, -48.5234375, -46.3544921875, -44.185546875, -42.0166015625, -39.84765625, -37.6787109375, -35.509765625, -33.3408203125, -31.171875, -29.0029296875, -26.833984375, -24.6650390625, -22.49609375, -20.3271484375, -18.158203125, -15.9892578125, -13.8203125, -11.6513671875, -9.482421875, -7.3134765625, -5.14453125, -2.9755859375, -0.806640625, 1.3623046875, 3.53125, 5.7001953125, 7.869140625, 10.0380859375, 12.20703125, 14.3759765625, 16.544921875, 18.7138671875, 20.8828125, 23.0517578125, 25.220703125, 27.3896484375, 29.55859375, 31.7275390625, 33.896484375, 36.0654296875, 38.234375, 40.4033203125, 42.572265625, 44.7412109375, 46.91015625, 49.0791015625, 51.248046875, 53.4169921875, 55.5859375, 57.7548828125, 59.923828125, 62.0927734375, 64.26171875, 66.4306640625, 68.599609375, 70.7685546875, 72.9375]}, "gradients/decoder.transformer.h.10.attn.c_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 2.0, 4.0, 6.0, 4.0, 10.0, 9.0, 13.0, 20.0, 25.0, 48.0, 55.0, 86.0, 133.0, 146.0, 278.0, 417.0, 579.0, 867.0, 1253.0, 1912.0, 2903.0, 4417.0, 6943.0, 11785.0, 23658.0, 88198.0, 723602.0, 119874.0, 26972.0, 12876.0, 7493.0, 4657.0, 3107.0, 2030.0, 1342.0, 918.0, 638.0, 413.0, 246.0, 191.0, 139.0, 97.0, 57.0, 48.0, 18.0, 26.0, 20.0, 11.0, 7.0, 5.0, 3.0, 3.0, 2.0, 2.0, 2.0, 2.0], "bins": [-83.625, -81.2060546875, -78.787109375, -76.3681640625, -73.94921875, -71.5302734375, -69.111328125, -66.6923828125, -64.2734375, -61.8544921875, -59.435546875, -57.0166015625, -54.59765625, -52.1787109375, -49.759765625, -47.3408203125, -44.921875, -42.5029296875, -40.083984375, -37.6650390625, -35.24609375, -32.8271484375, -30.408203125, -27.9892578125, -25.5703125, -23.1513671875, -20.732421875, -18.3134765625, -15.89453125, -13.4755859375, -11.056640625, -8.6376953125, -6.21875, -3.7998046875, -1.380859375, 1.0380859375, 3.45703125, 5.8759765625, 8.294921875, 10.7138671875, 13.1328125, 15.5517578125, 17.970703125, 20.3896484375, 22.80859375, 25.2275390625, 27.646484375, 30.0654296875, 32.484375, 34.9033203125, 37.322265625, 39.7412109375, 42.16015625, 44.5791015625, 46.998046875, 49.4169921875, 51.8359375, 54.2548828125, 56.673828125, 59.0927734375, 61.51171875, 63.9306640625, 66.349609375, 68.7685546875, 71.1875]}, "gradients/decoder.transformer.h.10.attn.c_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 1.0, 2.0, 2.0, 1.0, 4.0, 3.0, 4.0, 4.0, 6.0, 6.0, 9.0, 8.0, 10.0, 10.0, 21.0, 23.0, 25.0, 15.0, 24.0, 38.0, 40.0, 44.0, 41.0, 49.0, 52.0, 69.0, 210.0, 1785.0, 98.0, 48.0, 60.0, 56.0, 47.0, 28.0, 39.0, 36.0, 25.0, 23.0, 17.0, 11.0, 19.0, 9.0, 7.0, 6.0, 7.0, 6.0, 4.0, 2.0, 1.0, 4.0, 3.0, 2.0, 1.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-194.125, -187.896484375, -181.66796875, -175.439453125, -169.2109375, -162.982421875, -156.75390625, -150.525390625, -144.296875, -138.068359375, -131.83984375, -125.611328125, -119.3828125, -113.154296875, -106.92578125, -100.697265625, -94.46875, -88.240234375, -82.01171875, -75.783203125, -69.5546875, -63.326171875, -57.09765625, -50.869140625, -44.640625, -38.412109375, -32.18359375, -25.955078125, -19.7265625, -13.498046875, -7.26953125, -1.041015625, 5.1875, 11.416015625, 17.64453125, 23.873046875, 30.1015625, 36.330078125, 42.55859375, 48.787109375, 55.015625, 61.244140625, 67.47265625, 73.701171875, 79.9296875, 86.158203125, 92.38671875, 98.615234375, 104.84375, 111.072265625, 117.30078125, 123.529296875, 129.7578125, 135.986328125, 142.21484375, 148.443359375, 154.671875, 160.900390625, 167.12890625, 173.357421875, 179.5859375, 185.814453125, 192.04296875, 198.271484375, 204.5]}, "gradients/decoder.transformer.h.10.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0, 2.0, 3.0, 5.0, 2.0, 6.0, 8.0, 10.0, 15.0, 17.0, 31.0, 50.0, 96.0, 199.0, 669.0, 2814.0, 22879.0, 3062961.0, 49999.0, 4429.0, 930.0, 264.0, 133.0, 61.0, 36.0, 25.0, 25.0, 15.0, 10.0, 6.0, 5.0, 0.0, 2.0, 4.0, 2.0, 2.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-984.5, -954.328125, -924.15625, -893.984375, -863.8125, -833.640625, -803.46875, -773.296875, -743.125, -712.953125, -682.78125, -652.609375, -622.4375, -592.265625, -562.09375, -531.921875, -501.75, -471.578125, -441.40625, -411.234375, -381.0625, -350.890625, -320.71875, -290.546875, -260.375, -230.203125, -200.03125, -169.859375, -139.6875, -109.515625, -79.34375, -49.171875, -19.0, 11.171875, 41.34375, 71.515625, 101.6875, 131.859375, 162.03125, 192.203125, 222.375, 252.546875, 282.71875, 312.890625, 343.0625, 373.234375, 403.40625, 433.578125, 463.75, 493.921875, 524.09375, 554.265625, 584.4375, 614.609375, 644.78125, 674.953125, 705.125, 735.296875, 765.46875, 795.640625, 825.8125, 855.984375, 886.15625, 916.328125, 946.5]}, "gradients/decoder.transformer.h.10.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 8.0, 132.0, 792.0, 76.0, 8.0, 3.0], "bins": [-7885.18798828125, -7754.6591796875, -7624.13037109375, -7493.6015625, -7363.072265625, -7232.54345703125, -7102.0146484375, -6971.48583984375, -6840.95703125, -6710.42822265625, -6579.8994140625, -6449.3701171875, -6318.84130859375, -6188.3125, -6057.78369140625, -5927.2548828125, -5796.7255859375, -5666.19677734375, -5535.66796875, -5405.138671875, -5274.60986328125, -5144.0810546875, -5013.55224609375, -4883.0234375, -4752.49462890625, -4621.9658203125, -4491.43701171875, -4360.908203125, -4230.37890625, -4099.85009765625, -3969.3212890625, -3838.79248046875, -3708.26416015625, -3577.7353515625, -3447.206298828125, -3316.677490234375, -3186.148681640625, -3055.61962890625, -2925.0908203125, -2794.56201171875, -2664.032958984375, -2533.504150390625, -2402.97509765625, -2272.4462890625, -2141.91748046875, -2011.3885498046875, -1880.859619140625, -1750.330810546875, -1619.802001953125, -1489.2730712890625, -1358.7442626953125, -1228.21533203125, -1097.6865234375, -967.1575927734375, -836.628662109375, -706.0997924804688, -575.5709228515625, -445.04205322265625, -314.5131530761719, -183.9842529296875, -53.45538330078125, 77.073486328125, 207.6024169921875, 338.13128662109375, 468.6601867675781]}, "gradients/decoder.transformer.h.10.ln_1.bias": {"_type": "histogram", "values": [2.0, 0.0, 2.0, 0.0, 5.0, 5.0, 3.0, 5.0, 6.0, 6.0, 9.0, 6.0, 16.0, 13.0, 22.0, 18.0, 17.0, 20.0, 23.0, 39.0, 40.0, 27.0, 36.0, 25.0, 27.0, 42.0, 30.0, 44.0, 42.0, 31.0, 37.0, 48.0, 44.0, 39.0, 30.0, 35.0, 36.0, 34.0, 12.0, 29.0, 16.0, 16.0, 14.0, 17.0, 11.0, 7.0, 8.0, 7.0, 4.0, 7.0, 1.0, 2.0, 3.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-421.03778076171875, -405.5781555175781, -390.1184997558594, -374.65887451171875, -359.19921875, -343.7395935058594, -328.27996826171875, -312.8203125, -297.3606872558594, -281.90106201171875, -266.44140625, -250.98178100585938, -235.5221405029297, -220.0625, -204.60287475585938, -189.1432342529297, -173.68359375, -158.2239532470703, -142.76431274414062, -127.3046875, -111.84504699707031, -96.38540649414062, -80.92577362060547, -65.46614074707031, -50.006500244140625, -34.5468635559082, -19.08722686767578, -3.6275901794433594, 11.832046508789062, 27.29168701171875, 42.751319885253906, 58.21095275878906, 73.67059326171875, 89.13023376464844, 104.5898666381836, 120.04949951171875, 135.50914001464844, 150.96878051757812, 166.42840576171875, 181.88804626464844, 197.34768676757812, 212.8073272705078, 228.2669677734375, 243.72659301757812, 259.18621826171875, 274.6458740234375, 290.1054992675781, 305.56512451171875, 321.0247802734375, 336.4844055175781, 351.9440612792969, 367.4036865234375, 382.86334228515625, 398.3229675292969, 413.7825927734375, 429.24224853515625, 444.7018737792969, 460.1614990234375, 475.62115478515625, 491.0807800292969, 506.5404052734375, 522.0000610351562, 537.459716796875, 552.9193115234375, 568.3789672851562]}, "gradients/decoder.transformer.h.9.mlp.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 3.0, 4.0, 3.0, 4.0, 4.0, 3.0, 10.0, 4.0, 10.0, 10.0, 19.0, 16.0, 21.0, 25.0, 23.0, 38.0, 39.0, 26.0, 32.0, 51.0, 45.0, 48.0, 55.0, 49.0, 57.0, 45.0, 46.0, 32.0, 41.0, 39.0, 26.0, 28.0, 32.0, 26.0, 20.0, 16.0, 11.0, 10.0, 13.0, 16.0, 2.0, 3.0, 3.0, 2.0, 3.0, 2.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-64.25, -61.9033203125, -59.556640625, -57.2099609375, -54.86328125, -52.5166015625, -50.169921875, -47.8232421875, -45.4765625, -43.1298828125, -40.783203125, -38.4365234375, -36.08984375, -33.7431640625, -31.396484375, -29.0498046875, -26.703125, -24.3564453125, -22.009765625, -19.6630859375, -17.31640625, -14.9697265625, -12.623046875, -10.2763671875, -7.9296875, -5.5830078125, -3.236328125, -0.8896484375, 1.45703125, 3.8037109375, 6.150390625, 8.4970703125, 10.84375, 13.1904296875, 15.537109375, 17.8837890625, 20.23046875, 22.5771484375, 24.923828125, 27.2705078125, 29.6171875, 31.9638671875, 34.310546875, 36.6572265625, 39.00390625, 41.3505859375, 43.697265625, 46.0439453125, 48.390625, 50.7373046875, 53.083984375, 55.4306640625, 57.77734375, 60.1240234375, 62.470703125, 64.8173828125, 67.1640625, 69.5107421875, 71.857421875, 74.2041015625, 76.55078125, 78.8974609375, 81.244140625, 83.5908203125, 85.9375]}, "gradients/decoder.transformer.h.9.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 3.0, 3.0, 1.0, 2.0, 3.0, 8.0, 8.0, 19.0, 20.0, 34.0, 61.0, 75.0, 95.0, 161.0, 240.0, 357.0, 573.0, 958.0, 1659.0, 3226.0, 6864.0, 17609.0, 141823.0, 3487828.0, 487166.0, 27367.0, 8979.0, 3984.0, 2068.0, 1211.0, 651.0, 405.0, 263.0, 178.0, 125.0, 76.0, 58.0, 41.0, 30.0, 14.0, 15.0, 13.0, 7.0, 6.0, 2.0, 2.0, 4.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-178.625, -172.603515625, -166.58203125, -160.560546875, -154.5390625, -148.517578125, -142.49609375, -136.474609375, -130.453125, -124.431640625, -118.41015625, -112.388671875, -106.3671875, -100.345703125, -94.32421875, -88.302734375, -82.28125, -76.259765625, -70.23828125, -64.216796875, -58.1953125, -52.173828125, -46.15234375, -40.130859375, -34.109375, -28.087890625, -22.06640625, -16.044921875, -10.0234375, -4.001953125, 2.01953125, 8.041015625, 14.0625, 20.083984375, 26.10546875, 32.126953125, 38.1484375, 44.169921875, 50.19140625, 56.212890625, 62.234375, 68.255859375, 74.27734375, 80.298828125, 86.3203125, 92.341796875, 98.36328125, 104.384765625, 110.40625, 116.427734375, 122.44921875, 128.470703125, 134.4921875, 140.513671875, 146.53515625, 152.556640625, 158.578125, 164.599609375, 170.62109375, 176.642578125, 182.6640625, 188.685546875, 194.70703125, 200.728515625, 206.75]}, "gradients/decoder.transformer.h.9.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 3.0, 3.0, 1.0, 3.0, 11.0, 7.0, 13.0, 20.0, 30.0, 40.0, 70.0, 83.0, 231.0, 524.0, 1295.0, 995.0, 401.0, 143.0, 85.0, 39.0, 16.0, 25.0, 7.0, 13.0, 9.0, 10.0, 5.0, 6.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-138.125, -131.802734375, -125.48046875, -119.158203125, -112.8359375, -106.513671875, -100.19140625, -93.869140625, -87.546875, -81.224609375, -74.90234375, -68.580078125, -62.2578125, -55.935546875, -49.61328125, -43.291015625, -36.96875, -30.646484375, -24.32421875, -18.001953125, -11.6796875, -5.357421875, 0.96484375, 7.287109375, 13.609375, 19.931640625, 26.25390625, 32.576171875, 38.8984375, 45.220703125, 51.54296875, 57.865234375, 64.1875, 70.509765625, 76.83203125, 83.154296875, 89.4765625, 95.798828125, 102.12109375, 108.443359375, 114.765625, 121.087890625, 127.41015625, 133.732421875, 140.0546875, 146.376953125, 152.69921875, 159.021484375, 165.34375, 171.666015625, 177.98828125, 184.310546875, 190.6328125, 196.955078125, 203.27734375, 209.599609375, 215.921875, 222.244140625, 228.56640625, 234.888671875, 241.2109375, 247.533203125, 253.85546875, 260.177734375, 266.5]}, "gradients/decoder.transformer.h.9.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 3.0, 2.0, 6.0, 11.0, 10.0, 15.0, 33.0, 44.0, 55.0, 108.0, 143.0, 296.0, 771.0, 3681.0, 43783.0, 4076398.0, 62654.0, 4554.0, 895.0, 338.0, 180.0, 97.0, 85.0, 44.0, 37.0, 23.0, 12.0, 9.0, 2.0, 1.0, 5.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0], "bins": [-1044.0, -1018.875, -993.75, -968.625, -943.5, -918.375, -893.25, -868.125, -843.0, -817.875, -792.75, -767.625, -742.5, -717.375, -692.25, -667.125, -642.0, -616.875, -591.75, -566.625, -541.5, -516.375, -491.25, -466.125, -441.0, -415.875, -390.75, -365.625, -340.5, -315.375, -290.25, -265.125, -240.0, -214.875, -189.75, -164.625, -139.5, -114.375, -89.25, -64.125, -39.0, -13.875, 11.25, 36.375, 61.5, 86.625, 111.75, 136.875, 162.0, 187.125, 212.25, 237.375, 262.5, 287.625, 312.75, 337.875, 363.0, 388.125, 413.25, 438.375, 463.5, 488.625, 513.75, 538.875, 564.0]}, "gradients/decoder.transformer.h.9.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0, 0.0, 1.0, 2.0, 2.0, 2.0, 5.0, 14.0, 7.0, 17.0, 23.0, 35.0, 62.0, 88.0, 109.0, 191.0, 167.0, 90.0, 78.0, 43.0, 23.0, 15.0, 15.0, 10.0, 7.0, 4.0, 2.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0], "bins": [-940.9987182617188, -920.4614868164062, -899.9242553710938, -879.3870239257812, -858.8497924804688, -838.3125610351562, -817.7753295898438, -797.2381591796875, -776.700927734375, -756.1636962890625, -735.62646484375, -715.0892333984375, -694.552001953125, -674.0147705078125, -653.4775390625, -632.9403076171875, -612.403076171875, -591.8658447265625, -571.32861328125, -550.7913818359375, -530.254150390625, -509.7169189453125, -489.1797180175781, -468.6424865722656, -448.1052551269531, -427.5680236816406, -407.0307922363281, -386.4935607910156, -365.95635986328125, -345.41912841796875, -324.88189697265625, -304.34466552734375, -283.8074035644531, -263.2701721191406, -242.73294067382812, -222.1957244873047, -201.6584930419922, -181.1212615966797, -160.58404541015625, -140.04681396484375, -119.50958251953125, -98.97235107421875, -78.43512725830078, -57.89789962768555, -37.36067199707031, -16.823440551757812, 3.7137832641601562, 24.251007080078125, 44.788238525390625, 65.32546997070312, 85.8626937866211, 106.39991760253906, 126.93714904785156, 147.47438049316406, 168.0115966796875, 188.548828125, 209.0860595703125, 229.623291015625, 250.1605224609375, 270.69775390625, 291.2349853515625, 311.772216796875, 332.3094177246094, 352.8466491699219, 373.3838806152344]}, "gradients/decoder.transformer.h.9.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 4.0, 3.0, 4.0, 5.0, 3.0, 1.0, 2.0, 4.0, 6.0, 13.0, 11.0, 20.0, 26.0, 24.0, 18.0, 13.0, 18.0, 28.0, 39.0, 44.0, 39.0, 41.0, 51.0, 35.0, 49.0, 41.0, 39.0, 46.0, 41.0, 41.0, 38.0, 45.0, 31.0, 32.0, 21.0, 16.0, 23.0, 20.0, 20.0, 8.0, 9.0, 13.0, 10.0, 5.0, 12.0, 4.0, 1.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-401.249267578125, -389.5586853027344, -377.8680725097656, -366.177490234375, -354.48687744140625, -342.7962951660156, -331.105712890625, -319.41510009765625, -307.7245178222656, -296.033935546875, -284.34332275390625, -272.6527404785156, -260.962158203125, -249.27154541015625, -237.58096313476562, -225.89036560058594, -214.19976806640625, -202.50917053222656, -190.81857299804688, -179.12799072265625, -167.43739318847656, -155.74679565429688, -144.05621337890625, -132.36561584472656, -120.67501831054688, -108.98442077636719, -97.29383087158203, -85.60324096679688, -73.91264343261719, -62.222049713134766, -50.531455993652344, -38.84086608886719, -27.1502685546875, -15.459674835205078, -3.7690811157226562, 7.921512603759766, 19.612106323242188, 31.30270004272461, 42.99329376220703, 54.68388366699219, 66.37448120117188, 78.06507873535156, 89.75566864013672, 101.44625854492188, 113.13685607910156, 124.82745361328125, 136.51803588867188, 148.20863342285156, 159.89923095703125, 171.58982849121094, 183.28042602539062, 194.97100830078125, 206.66160583496094, 218.35220336914062, 230.04278564453125, 241.73338317871094, 253.42398071289062, 265.11456298828125, 276.80517578125, 288.4957580566406, 300.18634033203125, 311.876953125, 323.5675354003906, 335.25811767578125, 346.94873046875]}, "gradients/decoder.transformer.h.9.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 3.0, 0.0, 4.0, 6.0, 1.0, 5.0, 2.0, 4.0, 3.0, 13.0, 12.0, 5.0, 13.0, 14.0, 21.0, 26.0, 24.0, 26.0, 39.0, 48.0, 38.0, 35.0, 55.0, 47.0, 43.0, 54.0, 51.0, 37.0, 40.0, 38.0, 45.0, 26.0, 34.0, 32.0, 18.0, 33.0, 25.0, 18.0, 18.0, 10.0, 5.0, 16.0, 7.0, 8.0, 5.0, 3.0, 1.0, 3.0, 3.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-63.3125, -61.0986328125, -58.884765625, -56.6708984375, -54.45703125, -52.2431640625, -50.029296875, -47.8154296875, -45.6015625, -43.3876953125, -41.173828125, -38.9599609375, -36.74609375, -34.5322265625, -32.318359375, -30.1044921875, -27.890625, -25.6767578125, -23.462890625, -21.2490234375, -19.03515625, -16.8212890625, -14.607421875, -12.3935546875, -10.1796875, -7.9658203125, -5.751953125, -3.5380859375, -1.32421875, 0.8896484375, 3.103515625, 5.3173828125, 7.53125, 9.7451171875, 11.958984375, 14.1728515625, 16.38671875, 18.6005859375, 20.814453125, 23.0283203125, 25.2421875, 27.4560546875, 29.669921875, 31.8837890625, 34.09765625, 36.3115234375, 38.525390625, 40.7392578125, 42.953125, 45.1669921875, 47.380859375, 49.5947265625, 51.80859375, 54.0224609375, 56.236328125, 58.4501953125, 60.6640625, 62.8779296875, 65.091796875, 67.3056640625, 69.51953125, 71.7333984375, 73.947265625, 76.1611328125, 78.375]}, "gradients/decoder.transformer.h.9.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 4.0, 7.0, 6.0, 6.0, 17.0, 16.0, 34.0, 30.0, 52.0, 52.0, 79.0, 117.0, 155.0, 230.0, 325.0, 461.0, 714.0, 1177.0, 1863.0, 3277.0, 6073.0, 11461.0, 23822.0, 52694.0, 130821.0, 395140.0, 253861.0, 88874.0, 38126.0, 17944.0, 9007.0, 4906.0, 2662.0, 1664.0, 958.0, 633.0, 411.0, 254.0, 161.0, 129.0, 95.0, 64.0, 50.0, 39.0, 23.0, 15.0, 16.0, 13.0, 7.0, 8.0, 7.0, 4.0, 1.0, 3.0, 2.0, 1.0], "bins": [-11.5625, -11.220458984375, -10.87841796875, -10.536376953125, -10.1943359375, -9.852294921875, -9.51025390625, -9.168212890625, -8.826171875, -8.484130859375, -8.14208984375, -7.800048828125, -7.4580078125, -7.115966796875, -6.77392578125, -6.431884765625, -6.08984375, -5.747802734375, -5.40576171875, -5.063720703125, -4.7216796875, -4.379638671875, -4.03759765625, -3.695556640625, -3.353515625, -3.011474609375, -2.66943359375, -2.327392578125, -1.9853515625, -1.643310546875, -1.30126953125, -0.959228515625, -0.6171875, -0.275146484375, 0.06689453125, 0.408935546875, 0.7509765625, 1.093017578125, 1.43505859375, 1.777099609375, 2.119140625, 2.461181640625, 2.80322265625, 3.145263671875, 3.4873046875, 3.829345703125, 4.17138671875, 4.513427734375, 4.85546875, 5.197509765625, 5.53955078125, 5.881591796875, 6.2236328125, 6.565673828125, 6.90771484375, 7.249755859375, 7.591796875, 7.933837890625, 8.27587890625, 8.617919921875, 8.9599609375, 9.302001953125, 9.64404296875, 9.986083984375, 10.328125]}, "gradients/decoder.transformer.h.9.crossattention.c_attn.bias": {"_type": "histogram", "values": [3.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 4.0, 4.0, 4.0, 5.0, 9.0, 6.0, 9.0, 7.0, 19.0, 13.0, 14.0, 15.0, 23.0, 15.0, 27.0, 22.0, 22.0, 25.0, 42.0, 29.0, 26.0, 25.0, 39.0, 35.0, 1058.0, 56.0, 34.0, 46.0, 32.0, 38.0, 33.0, 29.0, 36.0, 22.0, 35.0, 27.0, 14.0, 26.0, 14.0, 18.0, 7.0, 21.0, 11.0, 5.0, 6.0, 5.0, 7.0, 4.0, 6.0, 3.0, 3.0, 1.0, 2.0, 0.0, 1.0, 1.0, 1.0], "bins": [-38.65625, -37.44091796875, -36.2255859375, -35.01025390625, -33.794921875, -32.57958984375, -31.3642578125, -30.14892578125, -28.93359375, -27.71826171875, -26.5029296875, -25.28759765625, -24.072265625, -22.85693359375, -21.6416015625, -20.42626953125, -19.2109375, -17.99560546875, -16.7802734375, -15.56494140625, -14.349609375, -13.13427734375, -11.9189453125, -10.70361328125, -9.48828125, -8.27294921875, -7.0576171875, -5.84228515625, -4.626953125, -3.41162109375, -2.1962890625, -0.98095703125, 0.234375, 1.44970703125, 2.6650390625, 3.88037109375, 5.095703125, 6.31103515625, 7.5263671875, 8.74169921875, 9.95703125, 11.17236328125, 12.3876953125, 13.60302734375, 14.818359375, 16.03369140625, 17.2490234375, 18.46435546875, 19.6796875, 20.89501953125, 22.1103515625, 23.32568359375, 24.541015625, 25.75634765625, 26.9716796875, 28.18701171875, 29.40234375, 30.61767578125, 31.8330078125, 33.04833984375, 34.263671875, 35.47900390625, 36.6943359375, 37.90966796875, 39.125]}, "gradients/decoder.transformer.h.9.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 3.0, 3.0, 3.0, 10.0, 14.0, 18.0, 18.0, 45.0, 68.0, 90.0, 122.0, 184.0, 293.0, 417.0, 689.0, 1014.0, 1582.0, 2450.0, 3958.0, 6318.0, 10027.0, 16391.0, 27223.0, 46047.0, 79056.0, 143782.0, 1246009.0, 243599.0, 110950.0, 62377.0, 36580.0, 22021.0, 13398.0, 8129.0, 5156.0, 3168.0, 2076.0, 1336.0, 864.0, 599.0, 344.0, 247.0, 158.0, 103.0, 61.0, 56.0, 27.0, 15.0, 15.0, 18.0, 3.0, 2.0, 4.0, 3.0, 2.0, 1.0, 0.0, 2.0], "bins": [-3.677734375, -3.565582275390625, -3.45343017578125, -3.341278076171875, -3.2291259765625, -3.116973876953125, -3.00482177734375, -2.892669677734375, -2.780517578125, -2.668365478515625, -2.55621337890625, -2.444061279296875, -2.3319091796875, -2.219757080078125, -2.10760498046875, -1.995452880859375, -1.88330078125, -1.771148681640625, -1.65899658203125, -1.546844482421875, -1.4346923828125, -1.322540283203125, -1.21038818359375, -1.098236083984375, -0.986083984375, -0.873931884765625, -0.76177978515625, -0.649627685546875, -0.5374755859375, -0.425323486328125, -0.31317138671875, -0.201019287109375, -0.0888671875, 0.023284912109375, 0.13543701171875, 0.247589111328125, 0.3597412109375, 0.471893310546875, 0.58404541015625, 0.696197509765625, 0.808349609375, 0.920501708984375, 1.03265380859375, 1.144805908203125, 1.2569580078125, 1.369110107421875, 1.48126220703125, 1.593414306640625, 1.70556640625, 1.817718505859375, 1.92987060546875, 2.042022705078125, 2.1541748046875, 2.266326904296875, 2.37847900390625, 2.490631103515625, 2.602783203125, 2.714935302734375, 2.82708740234375, 2.939239501953125, 3.0513916015625, 3.163543701171875, 3.27569580078125, 3.387847900390625, 3.5]}, "gradients/decoder.transformer.h.9.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 2.0, 2.0, 2.0, 4.0, 3.0, 1.0, 3.0, 4.0, 11.0, 12.0, 21.0, 31.0, 57.0, 122.0, 364.0, 159.0, 76.0, 45.0, 23.0, 15.0, 15.0, 10.0, 4.0, 5.0, 5.0, 2.0, 4.0, 1.0, 1.0, 3.0, 0.0, 0.0, 1.0, 1.0, 0.0, 3.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.82421875, -0.7933197021484375, -0.762420654296875, -0.7315216064453125, -0.70062255859375, -0.6697235107421875, -0.638824462890625, -0.6079254150390625, -0.5770263671875, -0.5461273193359375, -0.515228271484375, -0.4843292236328125, -0.45343017578125, -0.4225311279296875, -0.391632080078125, -0.3607330322265625, -0.329833984375, -0.2989349365234375, -0.268035888671875, -0.2371368408203125, -0.20623779296875, -0.1753387451171875, -0.144439697265625, -0.1135406494140625, -0.0826416015625, -0.0517425537109375, -0.020843505859375, 0.0100555419921875, 0.04095458984375, 0.0718536376953125, 0.102752685546875, 0.1336517333984375, 0.16455078125, 0.1954498291015625, 0.226348876953125, 0.2572479248046875, 0.28814697265625, 0.3190460205078125, 0.349945068359375, 0.3808441162109375, 0.4117431640625, 0.4426422119140625, 0.473541259765625, 0.5044403076171875, 0.53533935546875, 0.5662384033203125, 0.597137451171875, 0.6280364990234375, 0.658935546875, 0.6898345947265625, 0.720733642578125, 0.7516326904296875, 0.78253173828125, 0.8134307861328125, 0.844329833984375, 0.8752288818359375, 0.9061279296875, 0.9370269775390625, 0.967926025390625, 0.9988250732421875, 1.02972412109375, 1.0606231689453125, 1.091522216796875, 1.1224212646484375, 1.1533203125]}, "gradients/decoder.transformer.h.9.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 3.0, 0.0, 2.0, 0.0, 0.0, 0.0, 3.0, 1.0, 1.0, 3.0, 4.0, 4.0, 5.0, 7.0, 9.0, 13.0, 21.0, 25.0, 53.0, 108.0, 315.0, 1047629.0, 184.0, 71.0, 33.0, 21.0, 14.0, 13.0, 4.0, 4.0, 1.0, 3.0, 4.0, 2.0, 2.0, 2.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-36.40625, -35.4306640625, -34.455078125, -33.4794921875, -32.50390625, -31.5283203125, -30.552734375, -29.5771484375, -28.6015625, -27.6259765625, -26.650390625, -25.6748046875, -24.69921875, -23.7236328125, -22.748046875, -21.7724609375, -20.796875, -19.8212890625, -18.845703125, -17.8701171875, -16.89453125, -15.9189453125, -14.943359375, -13.9677734375, -12.9921875, -12.0166015625, -11.041015625, -10.0654296875, -9.08984375, -8.1142578125, -7.138671875, -6.1630859375, -5.1875, -4.2119140625, -3.236328125, -2.2607421875, -1.28515625, -0.3095703125, 0.666015625, 1.6416015625, 2.6171875, 3.5927734375, 4.568359375, 5.5439453125, 6.51953125, 7.4951171875, 8.470703125, 9.4462890625, 10.421875, 11.3974609375, 12.373046875, 13.3486328125, 14.32421875, 15.2998046875, 16.275390625, 17.2509765625, 18.2265625, 19.2021484375, 20.177734375, 21.1533203125, 22.12890625, 23.1044921875, 24.080078125, 25.0556640625, 26.03125]}, "gradients/decoder.transformer.h.9.ln_cross_attn.weight": {"_type": "histogram", "values": [948.0, 74.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.21622787415981293, 0.04361291229724884, 0.3034536838531494, 0.5632944703102112, 0.823135256767273, 1.08297598361969, 1.3428168296813965, 1.602657675743103, 1.86249840259552, 2.1223392486572266, 2.3821799755096436, 2.6420207023620605, 2.9018616676330566, 3.1617021560668945, 3.4215431213378906, 3.6813840866088867, 3.9412248134613037, 4.201065540313721, 4.460906505584717, 4.720746994018555, 4.980587959289551, 5.240428924560547, 5.500269412994385, 5.760110378265381, 6.019950866699219, 6.279791831970215, 6.539632320404053, 6.799473285675049, 7.059313774108887, 7.319154739379883, 7.578995704650879, 7.838836669921875, 8.098676681518555, 8.35851764678955, 8.618358612060547, 8.878198623657227, 9.138039588928223, 9.397880554199219, 9.657721519470215, 9.917562484741211, 10.17740249633789, 10.437243461608887, 10.697084426879883, 10.956924438476562, 11.216765403747559, 11.476606369018555, 11.73644733428955, 11.996288299560547, 12.256129264831543, 12.515970230102539, 12.775811195373535, 13.035651206970215, 13.295492172241211, 13.555333137512207, 13.815174102783203, 14.0750150680542, 14.334856033325195, 14.594696998596191, 14.854537963867188, 15.114377975463867, 15.374218940734863, 15.63405990600586, 15.893900871276855, 16.15374183654785, 16.41358184814453]}, "gradients/decoder.transformer.h.9.ln_cross_attn.bias": {"_type": "histogram", "values": [3.0, 1.0, 1.0, 0.0, 2.0, 3.0, 2.0, 1.0, 4.0, 5.0, 6.0, 6.0, 6.0, 7.0, 7.0, 12.0, 17.0, 17.0, 20.0, 19.0, 18.0, 25.0, 25.0, 27.0, 23.0, 39.0, 36.0, 30.0, 29.0, 37.0, 52.0, 43.0, 42.0, 40.0, 39.0, 33.0, 32.0, 30.0, 26.0, 38.0, 34.0, 22.0, 19.0, 27.0, 16.0, 11.0, 10.0, 11.0, 11.0, 11.0, 7.0, 5.0, 3.0, 7.0, 6.0, 3.0, 5.0, 1.0, 2.0, 2.0, 1.0, 5.0, 0.0, 2.0], "bins": [-0.88971346616745, -0.8618452548980713, -0.8339771032333374, -0.8061088919639587, -0.7782406806945801, -0.7503724694252014, -0.7225042581558228, -0.6946361064910889, -0.6667678952217102, -0.6388996839523315, -0.6110315322875977, -0.583163321018219, -0.5552951097488403, -0.5274268984794617, -0.4995587170124054, -0.4716905355453491, -0.44382232427597046, -0.4159541130065918, -0.3880859315395355, -0.36021775007247925, -0.3323495388031006, -0.3044813275337219, -0.27661314606666565, -0.24874494969844818, -0.2208767533302307, -0.19300855696201324, -0.16514036059379578, -0.1372721642255783, -0.10940396785736084, -0.08153577148914337, -0.0536675751209259, -0.025799378752708435, 0.002068758010864258, 0.029936954379081726, 0.057805150747299194, 0.08567334711551666, 0.11354154348373413, 0.1414097398519516, 0.16927793622016907, 0.19714613258838654, 0.225014328956604, 0.25288254022598267, 0.28075072169303894, 0.3086189031600952, 0.3364871144294739, 0.36435532569885254, 0.3922235071659088, 0.4200916886329651, 0.44795989990234375, 0.4758281111717224, 0.5036963224411011, 0.531564474105835, 0.5594326853752136, 0.5873008966445923, 0.6151690483093262, 0.6430372595787048, 0.6709054708480835, 0.6987736821174622, 0.7266418933868408, 0.7545100450515747, 0.7823782563209534, 0.810246467590332, 0.8381146192550659, 0.8659828305244446, 0.8938510417938232]}, "gradients/decoder.transformer.h.9.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 3.0, 0.0, 4.0, 6.0, 1.0, 5.0, 2.0, 4.0, 3.0, 13.0, 12.0, 5.0, 13.0, 14.0, 21.0, 26.0, 24.0, 26.0, 39.0, 48.0, 38.0, 35.0, 55.0, 47.0, 43.0, 54.0, 51.0, 37.0, 40.0, 38.0, 45.0, 26.0, 34.0, 32.0, 18.0, 33.0, 25.0, 18.0, 18.0, 10.0, 5.0, 16.0, 7.0, 8.0, 5.0, 3.0, 1.0, 3.0, 3.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-63.3125, -61.0986328125, -58.884765625, -56.6708984375, -54.45703125, -52.2431640625, -50.029296875, -47.8154296875, -45.6015625, -43.3876953125, -41.173828125, -38.9599609375, -36.74609375, -34.5322265625, -32.318359375, -30.1044921875, -27.890625, -25.6767578125, -23.462890625, -21.2490234375, -19.03515625, -16.8212890625, -14.607421875, -12.3935546875, -10.1796875, -7.9658203125, -5.751953125, -3.5380859375, -1.32421875, 0.8896484375, 3.103515625, 5.3173828125, 7.53125, 9.7451171875, 11.958984375, 14.1728515625, 16.38671875, 18.6005859375, 20.814453125, 23.0283203125, 25.2421875, 27.4560546875, 29.669921875, 31.8837890625, 34.09765625, 36.3115234375, 38.525390625, 40.7392578125, 42.953125, 45.1669921875, 47.380859375, 49.5947265625, 51.80859375, 54.0224609375, 56.236328125, 58.4501953125, 60.6640625, 62.8779296875, 65.091796875, 67.3056640625, 69.51953125, 71.7333984375, 73.947265625, 76.1611328125, 78.375]}, "gradients/decoder.transformer.h.9.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 3.0, 4.0, 5.0, 13.0, 10.0, 16.0, 20.0, 37.0, 32.0, 56.0, 61.0, 77.0, 110.0, 147.0, 222.0, 229.0, 358.0, 489.0, 641.0, 851.0, 1218.0, 1692.0, 2282.0, 3347.0, 5457.0, 14902.0, 203004.0, 765938.0, 26932.0, 7110.0, 3834.0, 2578.0, 1863.0, 1244.0, 968.0, 701.0, 545.0, 398.0, 282.0, 220.0, 161.0, 131.0, 95.0, 65.0, 63.0, 38.0, 27.0, 29.0, 17.0, 8.0, 16.0, 5.0, 6.0, 5.0, 4.0, 1.0, 1.0, 1.0, 2.0, 0.0, 1.0], "bins": [-120.375, -116.384765625, -112.39453125, -108.404296875, -104.4140625, -100.423828125, -96.43359375, -92.443359375, -88.453125, -84.462890625, -80.47265625, -76.482421875, -72.4921875, -68.501953125, -64.51171875, -60.521484375, -56.53125, -52.541015625, -48.55078125, -44.560546875, -40.5703125, -36.580078125, -32.58984375, -28.599609375, -24.609375, -20.619140625, -16.62890625, -12.638671875, -8.6484375, -4.658203125, -0.66796875, 3.322265625, 7.3125, 11.302734375, 15.29296875, 19.283203125, 23.2734375, 27.263671875, 31.25390625, 35.244140625, 39.234375, 43.224609375, 47.21484375, 51.205078125, 55.1953125, 59.185546875, 63.17578125, 67.166015625, 71.15625, 75.146484375, 79.13671875, 83.126953125, 87.1171875, 91.107421875, 95.09765625, 99.087890625, 103.078125, 107.068359375, 111.05859375, 115.048828125, 119.0390625, 123.029296875, 127.01953125, 131.009765625, 135.0]}, "gradients/decoder.transformer.h.9.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 1.0, 4.0, 5.0, 7.0, 2.0, 7.0, 6.0, 11.0, 8.0, 12.0, 18.0, 15.0, 15.0, 25.0, 27.0, 22.0, 33.0, 36.0, 50.0, 44.0, 54.0, 55.0, 105.0, 1914.0, 103.0, 61.0, 52.0, 70.0, 42.0, 36.0, 17.0, 24.0, 26.0, 25.0, 30.0, 15.0, 15.0, 16.0, 11.0, 8.0, 9.0, 3.0, 3.0, 5.0, 5.0, 3.0, 2.0, 5.0, 2.0, 1.0, 0.0, 0.0, 1.0, 1.0], "bins": [-229.5, -222.646484375, -215.79296875, -208.939453125, -202.0859375, -195.232421875, -188.37890625, -181.525390625, -174.671875, -167.818359375, -160.96484375, -154.111328125, -147.2578125, -140.404296875, -133.55078125, -126.697265625, -119.84375, -112.990234375, -106.13671875, -99.283203125, -92.4296875, -85.576171875, -78.72265625, -71.869140625, -65.015625, -58.162109375, -51.30859375, -44.455078125, -37.6015625, -30.748046875, -23.89453125, -17.041015625, -10.1875, -3.333984375, 3.51953125, 10.373046875, 17.2265625, 24.080078125, 30.93359375, 37.787109375, 44.640625, 51.494140625, 58.34765625, 65.201171875, 72.0546875, 78.908203125, 85.76171875, 92.615234375, 99.46875, 106.322265625, 113.17578125, 120.029296875, 126.8828125, 133.736328125, 140.58984375, 147.443359375, 154.296875, 161.150390625, 168.00390625, 174.857421875, 181.7109375, 188.564453125, 195.41796875, 202.271484375, 209.125]}, "gradients/decoder.transformer.h.9.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 1.0, 1.0, 0.0, 2.0, 0.0, 1.0, 2.0, 3.0, 3.0, 1.0, 5.0, 9.0, 1.0, 7.0, 10.0, 8.0, 19.0, 19.0, 19.0, 27.0, 32.0, 59.0, 56.0, 101.0, 247.0, 594.0, 1414.0, 3802.0, 16874.0, 3047374.0, 63055.0, 7866.0, 2322.0, 882.0, 356.0, 228.0, 78.0, 55.0, 54.0, 25.0, 18.0, 17.0, 16.0, 15.0, 5.0, 4.0, 5.0, 6.0, 11.0, 4.0, 2.0, 4.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-679.0, -658.28125, -637.5625, -616.84375, -596.125, -575.40625, -554.6875, -533.96875, -513.25, -492.53125, -471.8125, -451.09375, -430.375, -409.65625, -388.9375, -368.21875, -347.5, -326.78125, -306.0625, -285.34375, -264.625, -243.90625, -223.1875, -202.46875, -181.75, -161.03125, -140.3125, -119.59375, -98.875, -78.15625, -57.4375, -36.71875, -16.0, 4.71875, 25.4375, 46.15625, 66.875, 87.59375, 108.3125, 129.03125, 149.75, 170.46875, 191.1875, 211.90625, 232.625, 253.34375, 274.0625, 294.78125, 315.5, 336.21875, 356.9375, 377.65625, 398.375, 419.09375, 439.8125, 460.53125, 481.25, 501.96875, 522.6875, 543.40625, 564.125, 584.84375, 605.5625, 626.28125, 647.0]}, "gradients/decoder.transformer.h.9.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 3.0, 3.0, 7.0, 3.0, 7.0, 14.0, 20.0, 38.0, 60.0, 91.0, 152.0, 172.0, 180.0, 97.0, 59.0, 46.0, 27.0, 16.0, 7.0, 5.0, 1.0, 3.0, 1.0, 2.0, 2.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-542.3075561523438, -526.6310424804688, -510.95452880859375, -495.27801513671875, -479.6015319824219, -463.9250183105469, -448.2485046386719, -432.5719909667969, -416.8955078125, -401.218994140625, -385.54248046875, -369.865966796875, -354.1894836425781, -338.5129699707031, -322.8364562988281, -307.1599426269531, -291.4834289550781, -275.8069152832031, -260.1304016113281, -244.4539031982422, -228.77740478515625, -213.10089111328125, -197.42437744140625, -181.74786376953125, -166.0713653564453, -150.3948516845703, -134.71835327148438, -119.04183959960938, -103.3653335571289, -87.68882751464844, -72.01231384277344, -56.33580780029297, -40.6593017578125, -24.9827938079834, -9.306285858154297, 6.3702239990234375, 22.046730041503906, 37.723236083984375, 53.399749755859375, 69.07625579833984, 84.75276184082031, 100.42926788330078, 116.10577392578125, 131.78228759765625, 147.45880126953125, 163.1352996826172, 178.8118133544922, 194.48831176757812, 210.16482543945312, 225.84133911132812, 241.51783752441406, 257.1943359375, 272.870849609375, 288.54736328125, 304.223876953125, 319.900390625, 335.576904296875, 351.25341796875, 366.929931640625, 382.6064453125, 398.2829284667969, 413.9594421386719, 429.6359558105469, 445.3124694824219, 460.98895263671875]}, "gradients/decoder.transformer.h.9.ln_1.bias": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 0.0, 0.0, 2.0, 0.0, 2.0, 2.0, 2.0, 2.0, 4.0, 8.0, 8.0, 8.0, 19.0, 13.0, 12.0, 13.0, 17.0, 22.0, 23.0, 22.0, 21.0, 25.0, 36.0, 35.0, 37.0, 38.0, 36.0, 39.0, 53.0, 48.0, 42.0, 51.0, 34.0, 37.0, 35.0, 47.0, 35.0, 29.0, 27.0, 24.0, 26.0, 19.0, 11.0, 11.0, 13.0, 8.0, 3.0, 5.0, 5.0, 4.0, 2.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0], "bins": [-574.0869140625, -555.5037231445312, -536.9205932617188, -518.33740234375, -499.7542419433594, -481.17108154296875, -462.587890625, -444.0047302246094, -425.42156982421875, -406.8384094238281, -388.2552490234375, -369.67205810546875, -351.0888977050781, -332.5057373046875, -313.92254638671875, -295.3393859863281, -276.7562255859375, -258.1730651855469, -239.5898895263672, -221.0067138671875, -202.42355346679688, -183.84039306640625, -165.25721740722656, -146.67404174804688, -128.09088134765625, -109.5077133178711, -90.92454528808594, -72.34137725830078, -53.758209228515625, -35.17504119873047, -16.591873168945312, 1.991302490234375, 20.57452392578125, 39.157691955566406, 57.74085998535156, 76.32402801513672, 94.90719604492188, 113.49036407470703, 132.0735321044922, 150.65670776367188, 169.2398681640625, 187.82302856445312, 206.4062042236328, 224.9893798828125, 243.57254028320312, 262.15570068359375, 280.7388916015625, 299.3220520019531, 317.90521240234375, 336.4883728027344, 355.071533203125, 373.65472412109375, 392.2378845214844, 410.821044921875, 429.40423583984375, 447.9873962402344, 466.570556640625, 485.1537170410156, 503.73687744140625, 522.320068359375, 540.9031982421875, 559.4863891601562, 578.069580078125, 596.6527099609375, 615.2359008789062]}, "gradients/decoder.transformer.h.8.mlp.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 2.0, 2.0, 1.0, 3.0, 4.0, 5.0, 5.0, 3.0, 5.0, 1.0, 15.0, 9.0, 5.0, 12.0, 16.0, 20.0, 19.0, 27.0, 36.0, 26.0, 44.0, 34.0, 40.0, 37.0, 44.0, 44.0, 50.0, 36.0, 52.0, 44.0, 40.0, 41.0, 34.0, 29.0, 20.0, 28.0, 33.0, 23.0, 21.0, 21.0, 18.0, 13.0, 10.0, 9.0, 9.0, 7.0, 7.0, 2.0, 4.0, 2.0, 3.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-67.625, -65.3828125, -63.140625, -60.8984375, -58.65625, -56.4140625, -54.171875, -51.9296875, -49.6875, -47.4453125, -45.203125, -42.9609375, -40.71875, -38.4765625, -36.234375, -33.9921875, -31.75, -29.5078125, -27.265625, -25.0234375, -22.78125, -20.5390625, -18.296875, -16.0546875, -13.8125, -11.5703125, -9.328125, -7.0859375, -4.84375, -2.6015625, -0.359375, 1.8828125, 4.125, 6.3671875, 8.609375, 10.8515625, 13.09375, 15.3359375, 17.578125, 19.8203125, 22.0625, 24.3046875, 26.546875, 28.7890625, 31.03125, 33.2734375, 35.515625, 37.7578125, 40.0, 42.2421875, 44.484375, 46.7265625, 48.96875, 51.2109375, 53.453125, 55.6953125, 57.9375, 60.1796875, 62.421875, 64.6640625, 66.90625, 69.1484375, 71.390625, 73.6328125, 75.875]}, "gradients/decoder.transformer.h.8.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 3.0, 4.0, 1.0, 14.0, 17.0, 26.0, 38.0, 43.0, 89.0, 111.0, 158.0, 240.0, 398.0, 626.0, 1046.0, 1869.0, 3302.0, 6191.0, 12461.0, 30777.0, 228409.0, 2920192.0, 882692.0, 67863.0, 18637.0, 8553.0, 4409.0, 2444.0, 1347.0, 809.0, 518.0, 334.0, 199.0, 140.0, 97.0, 70.0, 55.0, 33.0, 20.0, 21.0, 13.0, 10.0, 9.0, 3.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-160.375, -155.4765625, -150.578125, -145.6796875, -140.78125, -135.8828125, -130.984375, -126.0859375, -121.1875, -116.2890625, -111.390625, -106.4921875, -101.59375, -96.6953125, -91.796875, -86.8984375, -82.0, -77.1015625, -72.203125, -67.3046875, -62.40625, -57.5078125, -52.609375, -47.7109375, -42.8125, -37.9140625, -33.015625, -28.1171875, -23.21875, -18.3203125, -13.421875, -8.5234375, -3.625, 1.2734375, 6.171875, 11.0703125, 15.96875, 20.8671875, 25.765625, 30.6640625, 35.5625, 40.4609375, 45.359375, 50.2578125, 55.15625, 60.0546875, 64.953125, 69.8515625, 74.75, 79.6484375, 84.546875, 89.4453125, 94.34375, 99.2421875, 104.140625, 109.0390625, 113.9375, 118.8359375, 123.734375, 128.6328125, 133.53125, 138.4296875, 143.328125, 148.2265625, 153.125]}, "gradients/decoder.transformer.h.8.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 3.0, 1.0, 3.0, 6.0, 5.0, 6.0, 10.0, 13.0, 15.0, 16.0, 43.0, 78.0, 115.0, 238.0, 432.0, 957.0, 1107.0, 524.0, 223.0, 109.0, 77.0, 34.0, 24.0, 14.0, 10.0, 1.0, 4.0, 3.0, 4.0, 3.0, 2.0, 1.0, 6.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-245.125, -237.93359375, -230.7421875, -223.55078125, -216.359375, -209.16796875, -201.9765625, -194.78515625, -187.59375, -180.40234375, -173.2109375, -166.01953125, -158.828125, -151.63671875, -144.4453125, -137.25390625, -130.0625, -122.87109375, -115.6796875, -108.48828125, -101.296875, -94.10546875, -86.9140625, -79.72265625, -72.53125, -65.33984375, -58.1484375, -50.95703125, -43.765625, -36.57421875, -29.3828125, -22.19140625, -15.0, -7.80859375, -0.6171875, 6.57421875, 13.765625, 20.95703125, 28.1484375, 35.33984375, 42.53125, 49.72265625, 56.9140625, 64.10546875, 71.296875, 78.48828125, 85.6796875, 92.87109375, 100.0625, 107.25390625, 114.4453125, 121.63671875, 128.828125, 136.01953125, 143.2109375, 150.40234375, 157.59375, 164.78515625, 171.9765625, 179.16796875, 186.359375, 193.55078125, 200.7421875, 207.93359375, 215.125]}, "gradients/decoder.transformer.h.8.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 2.0, 2.0, 4.0, 2.0, 3.0, 4.0, 10.0, 16.0, 19.0, 25.0, 43.0, 81.0, 126.0, 227.0, 411.0, 905.0, 3268.0, 21418.0, 3478581.0, 670476.0, 14394.0, 2414.0, 870.0, 436.0, 207.0, 138.0, 74.0, 54.0, 29.0, 15.0, 16.0, 4.0, 3.0, 9.0, 2.0, 4.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-699.0, -677.1015625, -655.203125, -633.3046875, -611.40625, -589.5078125, -567.609375, -545.7109375, -523.8125, -501.9140625, -480.015625, -458.1171875, -436.21875, -414.3203125, -392.421875, -370.5234375, -348.625, -326.7265625, -304.828125, -282.9296875, -261.03125, -239.1328125, -217.234375, -195.3359375, -173.4375, -151.5390625, -129.640625, -107.7421875, -85.84375, -63.9453125, -42.046875, -20.1484375, 1.75, 23.6484375, 45.546875, 67.4453125, 89.34375, 111.2421875, 133.140625, 155.0390625, 176.9375, 198.8359375, 220.734375, 242.6328125, 264.53125, 286.4296875, 308.328125, 330.2265625, 352.125, 374.0234375, 395.921875, 417.8203125, 439.71875, 461.6171875, 483.515625, 505.4140625, 527.3125, 549.2109375, 571.109375, 593.0078125, 614.90625, 636.8046875, 658.703125, 680.6015625, 702.5]}, "gradients/decoder.transformer.h.8.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 4.0, 9.0, 13.0, 16.0, 25.0, 71.0, 141.0, 206.0, 253.0, 137.0, 72.0, 43.0, 11.0, 9.0, 5.0, 1.0, 0.0, 1.0, 2.0], "bins": [-1969.292236328125, -1932.192138671875, -1895.092041015625, -1857.991943359375, -1820.891845703125, -1783.791748046875, -1746.691650390625, -1709.591552734375, -1672.491455078125, -1635.391357421875, -1598.291259765625, -1561.191162109375, -1524.091064453125, -1486.990966796875, -1449.890869140625, -1412.790771484375, -1375.690673828125, -1338.590576171875, -1301.490478515625, -1264.390380859375, -1227.290283203125, -1190.190185546875, -1153.090087890625, -1115.989990234375, -1078.8897705078125, -1041.7896728515625, -1004.6895751953125, -967.5894775390625, -930.4893798828125, -893.3892822265625, -856.2891845703125, -819.1890869140625, -782.0889892578125, -744.9888916015625, -707.8887939453125, -670.7886962890625, -633.6885986328125, -596.5885009765625, -559.4884033203125, -522.3883056640625, -485.2882080078125, -448.1881103515625, -411.0880126953125, -373.9879150390625, -336.8878173828125, -299.7877197265625, -262.6875915527344, -225.58749389648438, -188.48739624023438, -151.38729858398438, -114.28719329833984, -77.18708801269531, -40.08699035644531, -2.9868927001953125, 34.11322021484375, 71.21331787109375, 108.31341552734375, 145.41351318359375, 182.51361083984375, 219.6137237548828, 256.71380615234375, 293.81390380859375, 330.9140319824219, 368.0141296386719, 405.1142272949219]}, "gradients/decoder.transformer.h.8.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 4.0, 2.0, 2.0, 4.0, 6.0, 4.0, 6.0, 8.0, 6.0, 12.0, 11.0, 14.0, 13.0, 19.0, 17.0, 25.0, 22.0, 28.0, 34.0, 37.0, 38.0, 42.0, 37.0, 41.0, 34.0, 53.0, 46.0, 43.0, 44.0, 32.0, 42.0, 39.0, 35.0, 33.0, 25.0, 33.0, 13.0, 19.0, 20.0, 14.0, 11.0, 11.0, 9.0, 9.0, 5.0, 5.0, 3.0, 4.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-400.17364501953125, -385.8074035644531, -371.441162109375, -357.0749206542969, -342.70867919921875, -328.34246826171875, -313.9761962890625, -299.6099853515625, -285.2437438964844, -270.87750244140625, -256.5112609863281, -242.14501953125, -227.77879333496094, -213.4125518798828, -199.0463104248047, -184.68008422851562, -170.31382751464844, -155.9475860595703, -141.5813446044922, -127.2151107788086, -112.848876953125, -98.48263549804688, -84.11639404296875, -69.75016021728516, -55.38391876220703, -41.01768112182617, -26.65144157409668, -12.285202026367188, 2.081035614013672, 16.44727325439453, 30.813514709472656, 45.17974853515625, 59.545989990234375, 73.9122314453125, 88.2784652709961, 102.64470672607422, 117.01094055175781, 131.37718200683594, 145.74342346191406, 160.10964965820312, 174.47589111328125, 188.84213256835938, 203.2083740234375, 217.57461547851562, 231.9408416748047, 246.3070831298828, 260.67333984375, 275.03955078125, 289.40582275390625, 303.7720642089844, 318.1383056640625, 332.5045471191406, 346.87078857421875, 361.23699951171875, 375.603271484375, 389.969482421875, 404.3357238769531, 418.70196533203125, 433.0682067871094, 447.4344482421875, 461.8006896972656, 476.16693115234375, 490.53314208984375, 504.8993835449219, 519.265625]}, "gradients/decoder.transformer.h.8.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 2.0, 1.0, 0.0, 2.0, 6.0, 1.0, 3.0, 2.0, 7.0, 4.0, 7.0, 6.0, 8.0, 10.0, 11.0, 16.0, 14.0, 21.0, 31.0, 24.0, 30.0, 41.0, 41.0, 41.0, 42.0, 47.0, 36.0, 57.0, 52.0, 44.0, 38.0, 58.0, 40.0, 41.0, 26.0, 28.0, 23.0, 20.0, 22.0, 20.0, 16.0, 21.0, 16.0, 11.0, 6.0, 6.0, 7.0, 3.0, 6.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-75.6875, -73.244140625, -70.80078125, -68.357421875, -65.9140625, -63.470703125, -61.02734375, -58.583984375, -56.140625, -53.697265625, -51.25390625, -48.810546875, -46.3671875, -43.923828125, -41.48046875, -39.037109375, -36.59375, -34.150390625, -31.70703125, -29.263671875, -26.8203125, -24.376953125, -21.93359375, -19.490234375, -17.046875, -14.603515625, -12.16015625, -9.716796875, -7.2734375, -4.830078125, -2.38671875, 0.056640625, 2.5, 4.943359375, 7.38671875, 9.830078125, 12.2734375, 14.716796875, 17.16015625, 19.603515625, 22.046875, 24.490234375, 26.93359375, 29.376953125, 31.8203125, 34.263671875, 36.70703125, 39.150390625, 41.59375, 44.037109375, 46.48046875, 48.923828125, 51.3671875, 53.810546875, 56.25390625, 58.697265625, 61.140625, 63.583984375, 66.02734375, 68.470703125, 70.9140625, 73.357421875, 75.80078125, 78.244140625, 80.6875]}, "gradients/decoder.transformer.h.8.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 6.0, 1.0, 8.0, 10.0, 7.0, 14.0, 14.0, 33.0, 30.0, 57.0, 54.0, 85.0, 125.0, 170.0, 255.0, 361.0, 488.0, 786.0, 1204.0, 2019.0, 3429.0, 6329.0, 12182.0, 24633.0, 54877.0, 135188.0, 385734.0, 251039.0, 90778.0, 38690.0, 18475.0, 9021.0, 4916.0, 2801.0, 1659.0, 991.0, 668.0, 389.0, 303.0, 196.0, 146.0, 121.0, 73.0, 52.0, 46.0, 28.0, 18.0, 13.0, 13.0, 10.0, 11.0, 6.0, 1.0, 2.0, 1.0, 1.0, 2.0, 2.0], "bins": [-11.8203125, -11.4598388671875, -11.099365234375, -10.7388916015625, -10.37841796875, -10.0179443359375, -9.657470703125, -9.2969970703125, -8.9365234375, -8.5760498046875, -8.215576171875, -7.8551025390625, -7.49462890625, -7.1341552734375, -6.773681640625, -6.4132080078125, -6.052734375, -5.6922607421875, -5.331787109375, -4.9713134765625, -4.61083984375, -4.2503662109375, -3.889892578125, -3.5294189453125, -3.1689453125, -2.8084716796875, -2.447998046875, -2.0875244140625, -1.72705078125, -1.3665771484375, -1.006103515625, -0.6456298828125, -0.28515625, 0.0753173828125, 0.435791015625, 0.7962646484375, 1.15673828125, 1.5172119140625, 1.877685546875, 2.2381591796875, 2.5986328125, 2.9591064453125, 3.319580078125, 3.6800537109375, 4.04052734375, 4.4010009765625, 4.761474609375, 5.1219482421875, 5.482421875, 5.8428955078125, 6.203369140625, 6.5638427734375, 6.92431640625, 7.2847900390625, 7.645263671875, 8.0057373046875, 8.3662109375, 8.7266845703125, 9.087158203125, 9.4476318359375, 9.80810546875, 10.1685791015625, 10.529052734375, 10.8895263671875, 11.25]}, "gradients/decoder.transformer.h.8.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 2.0, 3.0, 2.0, 5.0, 3.0, 8.0, 7.0, 15.0, 16.0, 24.0, 18.0, 23.0, 30.0, 29.0, 28.0, 40.0, 38.0, 39.0, 50.0, 33.0, 46.0, 41.0, 1061.0, 47.0, 37.0, 44.0, 41.0, 53.0, 31.0, 38.0, 24.0, 21.0, 25.0, 28.0, 22.0, 16.0, 9.0, 8.0, 7.0, 3.0, 5.0, 6.0, 3.0, 4.0, 2.0, 3.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-50.96875, -49.341796875, -47.71484375, -46.087890625, -44.4609375, -42.833984375, -41.20703125, -39.580078125, -37.953125, -36.326171875, -34.69921875, -33.072265625, -31.4453125, -29.818359375, -28.19140625, -26.564453125, -24.9375, -23.310546875, -21.68359375, -20.056640625, -18.4296875, -16.802734375, -15.17578125, -13.548828125, -11.921875, -10.294921875, -8.66796875, -7.041015625, -5.4140625, -3.787109375, -2.16015625, -0.533203125, 1.09375, 2.720703125, 4.34765625, 5.974609375, 7.6015625, 9.228515625, 10.85546875, 12.482421875, 14.109375, 15.736328125, 17.36328125, 18.990234375, 20.6171875, 22.244140625, 23.87109375, 25.498046875, 27.125, 28.751953125, 30.37890625, 32.005859375, 33.6328125, 35.259765625, 36.88671875, 38.513671875, 40.140625, 41.767578125, 43.39453125, 45.021484375, 46.6484375, 48.275390625, 49.90234375, 51.529296875, 53.15625]}, "gradients/decoder.transformer.h.8.crossattention.c_attn.weight": {"_type": "histogram", "values": [3.0, 1.0, 1.0, 1.0, 5.0, 2.0, 5.0, 8.0, 17.0, 19.0, 22.0, 50.0, 63.0, 67.0, 114.0, 152.0, 247.0, 348.0, 467.0, 745.0, 1064.0, 1593.0, 2527.0, 3809.0, 5880.0, 9478.0, 15516.0, 25678.0, 44113.0, 77546.0, 141837.0, 1276081.0, 230275.0, 108053.0, 60726.0, 34664.0, 20874.0, 12721.0, 7914.0, 4964.0, 3249.0, 2102.0, 1335.0, 864.0, 638.0, 416.0, 277.0, 178.0, 128.0, 99.0, 65.0, 52.0, 32.0, 21.0, 16.0, 8.0, 5.0, 5.0, 4.0, 2.0, 0.0, 3.0, 1.0, 2.0], "bins": [-3.91015625, -3.787139892578125, -3.66412353515625, -3.541107177734375, -3.4180908203125, -3.295074462890625, -3.17205810546875, -3.049041748046875, -2.926025390625, -2.803009033203125, -2.67999267578125, -2.556976318359375, -2.4339599609375, -2.310943603515625, -2.18792724609375, -2.064910888671875, -1.94189453125, -1.818878173828125, -1.69586181640625, -1.572845458984375, -1.4498291015625, -1.326812744140625, -1.20379638671875, -1.080780029296875, -0.957763671875, -0.834747314453125, -0.71173095703125, -0.588714599609375, -0.4656982421875, -0.342681884765625, -0.21966552734375, -0.096649169921875, 0.0263671875, 0.149383544921875, 0.27239990234375, 0.395416259765625, 0.5184326171875, 0.641448974609375, 0.76446533203125, 0.887481689453125, 1.010498046875, 1.133514404296875, 1.25653076171875, 1.379547119140625, 1.5025634765625, 1.625579833984375, 1.74859619140625, 1.871612548828125, 1.99462890625, 2.117645263671875, 2.24066162109375, 2.363677978515625, 2.4866943359375, 2.609710693359375, 2.73272705078125, 2.855743408203125, 2.978759765625, 3.101776123046875, 3.22479248046875, 3.347808837890625, 3.4708251953125, 3.593841552734375, 3.71685791015625, 3.839874267578125, 3.962890625]}, "gradients/decoder.transformer.h.8.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 2.0, 4.0, 2.0, 2.0, 4.0, 4.0, 1.0, 4.0, 1.0, 2.0, 9.0, 10.0, 13.0, 8.0, 15.0, 5.0, 18.0, 8.0, 21.0, 38.0, 56.0, 136.0, 304.0, 119.0, 45.0, 31.0, 22.0, 19.0, 21.0, 8.0, 12.0, 9.0, 11.0, 8.0, 9.0, 10.0, 2.0, 4.0, 3.0, 2.0, 2.0, 1.0, 4.0, 2.0, 0.0, 3.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 2.0], "bins": [-0.346435546875, -0.3354148864746094, -0.32439422607421875, -0.3133735656738281, -0.3023529052734375, -0.2913322448730469, -0.28031158447265625, -0.2692909240722656, -0.258270263671875, -0.24724960327148438, -0.23622894287109375, -0.22520828247070312, -0.2141876220703125, -0.20316696166992188, -0.19214630126953125, -0.18112564086914062, -0.17010498046875, -0.15908432006835938, -0.14806365966796875, -0.13704299926757812, -0.1260223388671875, -0.11500167846679688, -0.10398101806640625, -0.09296035766601562, -0.081939697265625, -0.07091903686523438, -0.05989837646484375, -0.048877716064453125, -0.0378570556640625, -0.026836395263671875, -0.01581573486328125, -0.004795074462890625, 0.0062255859375, 0.017246246337890625, 0.02826690673828125, 0.039287567138671875, 0.0503082275390625, 0.061328887939453125, 0.07234954833984375, 0.08337020874023438, 0.094390869140625, 0.10541152954101562, 0.11643218994140625, 0.12745285034179688, 0.1384735107421875, 0.14949417114257812, 0.16051483154296875, 0.17153549194335938, 0.18255615234375, 0.19357681274414062, 0.20459747314453125, 0.21561813354492188, 0.2266387939453125, 0.23765945434570312, 0.24868011474609375, 0.2597007751464844, 0.270721435546875, 0.2817420959472656, 0.29276275634765625, 0.3037834167480469, 0.3148040771484375, 0.3258247375488281, 0.33684539794921875, 0.3478660583496094, 0.35888671875]}, "gradients/decoder.transformer.h.8.crossattention.q_attn.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 1.0, 3.0, 2.0, 2.0, 2.0, 2.0, 3.0, 4.0, 2.0, 10.0, 6.0, 11.0, 11.0, 7.0, 12.0, 6.0, 18.0, 23.0, 33.0, 49.0, 89.0, 190.0, 1046769.0, 964.0, 114.0, 68.0, 35.0, 25.0, 13.0, 10.0, 13.0, 9.0, 11.0, 8.0, 8.0, 6.0, 2.0, 2.0, 2.0, 4.0, 3.0, 3.0, 2.0, 3.0, 3.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-11.109375, -10.770751953125, -10.43212890625, -10.093505859375, -9.7548828125, -9.416259765625, -9.07763671875, -8.739013671875, -8.400390625, -8.061767578125, -7.72314453125, -7.384521484375, -7.0458984375, -6.707275390625, -6.36865234375, -6.030029296875, -5.69140625, -5.352783203125, -5.01416015625, -4.675537109375, -4.3369140625, -3.998291015625, -3.65966796875, -3.321044921875, -2.982421875, -2.643798828125, -2.30517578125, -1.966552734375, -1.6279296875, -1.289306640625, -0.95068359375, -0.612060546875, -0.2734375, 0.065185546875, 0.40380859375, 0.742431640625, 1.0810546875, 1.419677734375, 1.75830078125, 2.096923828125, 2.435546875, 2.774169921875, 3.11279296875, 3.451416015625, 3.7900390625, 4.128662109375, 4.46728515625, 4.805908203125, 5.14453125, 5.483154296875, 5.82177734375, 6.160400390625, 6.4990234375, 6.837646484375, 7.17626953125, 7.514892578125, 7.853515625, 8.192138671875, 8.53076171875, 8.869384765625, 9.2080078125, 9.546630859375, 9.88525390625, 10.223876953125, 10.5625]}, "gradients/decoder.transformer.h.8.ln_cross_attn.weight": {"_type": "histogram", "values": [45.0, 937.0, 38.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.10511351376771927, -0.034976959228515625, 0.03515959531068802, 0.10529615730047226, 0.1754327118396759, 0.24556925892829895, 0.3157058358192444, 0.38584238290786743, 0.4559789299964905, 0.5261154770851135, 0.5962520241737366, 0.6663886308670044, 0.7365251779556274, 0.8066617250442505, 0.8767982721328735, 0.9469348192214966, 1.0170713663101196, 1.0872079133987427, 1.1573444604873657, 1.2274810075759888, 1.2976175546646118, 1.3677541017532349, 1.4378907680511475, 1.5080273151397705, 1.578163743019104, 1.648300290107727, 1.71843683719635, 1.7885733842849731, 1.8587099313735962, 1.9288464784622192, 1.9989831447601318, 2.069119691848755, 2.139256238937378, 2.209392786026001, 2.279529333114624, 2.349665880203247, 2.41980242729187, 2.489938974380493, 2.560075521469116, 2.6302120685577393, 2.7003486156463623, 2.7704851627349854, 2.8406217098236084, 2.9107582569122314, 2.9808948040008545, 3.0510313510894775, 3.1211678981781006, 3.1913044452667236, 3.261441230773926, 3.331577777862549, 3.401714324951172, 3.471850872039795, 3.541987419128418, 3.612123966217041, 3.682260513305664, 3.752397060394287, 3.82253360748291, 3.892670154571533, 3.9628067016601562, 4.032943248748779, 4.103079795837402, 4.173216342926025, 4.243352890014648, 4.3134894371032715, 4.3836259841918945]}, "gradients/decoder.transformer.h.8.ln_cross_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 4.0, 2.0, 1.0, 5.0, 8.0, 9.0, 8.0, 11.0, 19.0, 11.0, 14.0, 19.0, 22.0, 33.0, 31.0, 36.0, 26.0, 29.0, 41.0, 56.0, 49.0, 37.0, 41.0, 36.0, 35.0, 43.0, 31.0, 41.0, 43.0, 30.0, 39.0, 27.0, 37.0, 25.0, 25.0, 13.0, 17.0, 10.0, 9.0, 11.0, 7.0, 8.0, 6.0, 1.0, 3.0, 3.0, 1.0, 0.0, 1.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.22266161441802979, -0.21496491134166718, -0.20726822316646576, -0.19957152009010315, -0.19187483191490173, -0.18417812883853912, -0.1764814257621765, -0.1687847375869751, -0.1610880345106125, -0.15339133143424988, -0.14569464325904846, -0.13799794018268585, -0.13030123710632324, -0.12260454893112183, -0.11490784585475922, -0.1072111502289772, -0.09951445460319519, -0.09181775897741318, -0.08412106335163116, -0.07642436027526855, -0.06872766464948654, -0.06103096902370453, -0.05333426967263222, -0.045637570321559906, -0.03794087469577789, -0.03024417720735073, -0.02254747971892357, -0.014850782230496407, -0.007154084742069244, 0.0005426108837127686, 0.00823931023478508, 0.01593600958585739, 0.023632705211639404, 0.03132940083742142, 0.03902610018849373, 0.04672279953956604, 0.05441949516534805, 0.062116190791130066, 0.06981289386749268, 0.07750958949327469, 0.0852062851190567, 0.09290298074483871, 0.10059967637062073, 0.10829637944698334, 0.11599307507276535, 0.12368977069854736, 0.13138647377490997, 0.13908317685127258, 0.146779865026474, 0.1544765681028366, 0.16217325627803802, 0.16986995935440063, 0.17756664752960205, 0.18526335060596466, 0.19296005368232727, 0.2006567418575287, 0.2083534449338913, 0.2160501480102539, 0.22374683618545532, 0.23144353926181793, 0.23914024233818054, 0.24683693051338196, 0.2545336186885834, 0.2622303366661072, 0.2699270248413086]}, "gradients/decoder.transformer.h.8.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 2.0, 1.0, 0.0, 2.0, 6.0, 1.0, 3.0, 2.0, 7.0, 4.0, 7.0, 6.0, 8.0, 9.0, 12.0, 16.0, 14.0, 21.0, 30.0, 25.0, 30.0, 41.0, 41.0, 39.0, 44.0, 47.0, 36.0, 56.0, 51.0, 45.0, 39.0, 57.0, 39.0, 43.0, 26.0, 28.0, 22.0, 21.0, 22.0, 19.0, 17.0, 21.0, 16.0, 10.0, 7.0, 6.0, 7.0, 3.0, 6.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-75.6875, -73.2451171875, -70.802734375, -68.3603515625, -65.91796875, -63.4755859375, -61.033203125, -58.5908203125, -56.1484375, -53.7060546875, -51.263671875, -48.8212890625, -46.37890625, -43.9365234375, -41.494140625, -39.0517578125, -36.609375, -34.1669921875, -31.724609375, -29.2822265625, -26.83984375, -24.3974609375, -21.955078125, -19.5126953125, -17.0703125, -14.6279296875, -12.185546875, -9.7431640625, -7.30078125, -4.8583984375, -2.416015625, 0.0263671875, 2.46875, 4.9111328125, 7.353515625, 9.7958984375, 12.23828125, 14.6806640625, 17.123046875, 19.5654296875, 22.0078125, 24.4501953125, 26.892578125, 29.3349609375, 31.77734375, 34.2197265625, 36.662109375, 39.1044921875, 41.546875, 43.9892578125, 46.431640625, 48.8740234375, 51.31640625, 53.7587890625, 56.201171875, 58.6435546875, 61.0859375, 63.5283203125, 65.970703125, 68.4130859375, 70.85546875, 73.2978515625, 75.740234375, 78.1826171875, 80.625]}, "gradients/decoder.transformer.h.8.attn.c_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 4.0, 1.0, 2.0, 4.0, 3.0, 7.0, 22.0, 20.0, 20.0, 22.0, 27.0, 45.0, 73.0, 95.0, 153.0, 219.0, 346.0, 433.0, 656.0, 1128.0, 1643.0, 2817.0, 5015.0, 9688.0, 23484.0, 96914.0, 722181.0, 131229.0, 27688.0, 11036.0, 5267.0, 3031.0, 1814.0, 1153.0, 746.0, 509.0, 331.0, 201.0, 153.0, 113.0, 76.0, 57.0, 48.0, 26.0, 16.0, 10.0, 11.0, 9.0, 7.0, 4.0, 6.0, 0.0, 2.0, 6.0, 1.0, 1.0], "bins": [-118.8125, -115.3759765625, -111.939453125, -108.5029296875, -105.06640625, -101.6298828125, -98.193359375, -94.7568359375, -91.3203125, -87.8837890625, -84.447265625, -81.0107421875, -77.57421875, -74.1376953125, -70.701171875, -67.2646484375, -63.828125, -60.3916015625, -56.955078125, -53.5185546875, -50.08203125, -46.6455078125, -43.208984375, -39.7724609375, -36.3359375, -32.8994140625, -29.462890625, -26.0263671875, -22.58984375, -19.1533203125, -15.716796875, -12.2802734375, -8.84375, -5.4072265625, -1.970703125, 1.4658203125, 4.90234375, 8.3388671875, 11.775390625, 15.2119140625, 18.6484375, 22.0849609375, 25.521484375, 28.9580078125, 32.39453125, 35.8310546875, 39.267578125, 42.7041015625, 46.140625, 49.5771484375, 53.013671875, 56.4501953125, 59.88671875, 63.3232421875, 66.759765625, 70.1962890625, 73.6328125, 77.0693359375, 80.505859375, 83.9423828125, 87.37890625, 90.8154296875, 94.251953125, 97.6884765625, 101.125]}, "gradients/decoder.transformer.h.8.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 2.0, 2.0, 0.0, 2.0, 3.0, 3.0, 0.0, 1.0, 6.0, 3.0, 7.0, 4.0, 11.0, 12.0, 21.0, 15.0, 23.0, 21.0, 15.0, 18.0, 30.0, 36.0, 35.0, 33.0, 43.0, 47.0, 58.0, 121.0, 1568.0, 336.0, 119.0, 57.0, 49.0, 37.0, 38.0, 38.0, 43.0, 22.0, 26.0, 20.0, 17.0, 22.0, 19.0, 12.0, 12.0, 15.0, 12.0, 8.0, 5.0, 4.0, 4.0, 4.0, 3.0, 1.0, 0.0, 1.0, 2.0, 1.0, 1.0, 1.0], "bins": [-237.5, -230.287109375, -223.07421875, -215.861328125, -208.6484375, -201.435546875, -194.22265625, -187.009765625, -179.796875, -172.583984375, -165.37109375, -158.158203125, -150.9453125, -143.732421875, -136.51953125, -129.306640625, -122.09375, -114.880859375, -107.66796875, -100.455078125, -93.2421875, -86.029296875, -78.81640625, -71.603515625, -64.390625, -57.177734375, -49.96484375, -42.751953125, -35.5390625, -28.326171875, -21.11328125, -13.900390625, -6.6875, 0.525390625, 7.73828125, 14.951171875, 22.1640625, 29.376953125, 36.58984375, 43.802734375, 51.015625, 58.228515625, 65.44140625, 72.654296875, 79.8671875, 87.080078125, 94.29296875, 101.505859375, 108.71875, 115.931640625, 123.14453125, 130.357421875, 137.5703125, 144.783203125, 151.99609375, 159.208984375, 166.421875, 173.634765625, 180.84765625, 188.060546875, 195.2734375, 202.486328125, 209.69921875, 216.912109375, 224.125]}, "gradients/decoder.transformer.h.8.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 2.0, 2.0, 0.0, 1.0, 2.0, 1.0, 7.0, 5.0, 3.0, 8.0, 8.0, 13.0, 12.0, 16.0, 18.0, 28.0, 32.0, 33.0, 53.0, 71.0, 95.0, 186.0, 266.0, 470.0, 929.0, 2293.0, 6443.0, 32071.0, 2946091.0, 138352.0, 11673.0, 3549.0, 1413.0, 626.0, 295.0, 223.0, 118.0, 70.0, 52.0, 35.0, 35.0, 28.0, 23.0, 14.0, 11.0, 10.0, 8.0, 5.0, 6.0, 2.0, 1.0, 2.0, 4.0, 0.0, 2.0, 3.0, 1.0, 3.0, 0.0, 2.0], "bins": [-399.0, -386.80859375, -374.6171875, -362.42578125, -350.234375, -338.04296875, -325.8515625, -313.66015625, -301.46875, -289.27734375, -277.0859375, -264.89453125, -252.703125, -240.51171875, -228.3203125, -216.12890625, -203.9375, -191.74609375, -179.5546875, -167.36328125, -155.171875, -142.98046875, -130.7890625, -118.59765625, -106.40625, -94.21484375, -82.0234375, -69.83203125, -57.640625, -45.44921875, -33.2578125, -21.06640625, -8.875, 3.31640625, 15.5078125, 27.69921875, 39.890625, 52.08203125, 64.2734375, 76.46484375, 88.65625, 100.84765625, 113.0390625, 125.23046875, 137.421875, 149.61328125, 161.8046875, 173.99609375, 186.1875, 198.37890625, 210.5703125, 222.76171875, 234.953125, 247.14453125, 259.3359375, 271.52734375, 283.71875, 295.91015625, 308.1015625, 320.29296875, 332.484375, 344.67578125, 356.8671875, 369.05859375, 381.25]}, "gradients/decoder.transformer.h.8.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 3.0, 1.0, 29.0, 700.0, 277.0, 10.0, 1.0], "bins": [-5922.0849609375, -5824.642578125, -5727.2001953125, -5629.7578125, -5532.3154296875, -5434.873046875, -5337.4306640625, -5239.98828125, -5142.5458984375, -5045.103515625, -4947.6611328125, -4850.21875, -4752.7763671875, -4655.333984375, -4557.8916015625, -4460.44921875, -4363.00634765625, -4265.56396484375, -4168.12158203125, -4070.67919921875, -3973.23681640625, -3875.79443359375, -3778.35205078125, -3680.909423828125, -3583.46728515625, -3486.02490234375, -3388.58251953125, -3291.14013671875, -3193.69775390625, -3096.25537109375, -2998.81298828125, -2901.370361328125, -2803.927978515625, -2706.485595703125, -2609.043212890625, -2511.600830078125, -2414.158447265625, -2316.7158203125, -2219.2734375, -2121.8310546875, -2024.3887939453125, -1926.9464111328125, -1829.5040283203125, -1732.0615234375, -1634.619140625, -1537.1767578125, -1439.734375, -1342.2919921875, -1244.8494873046875, -1147.4071044921875, -1049.9647216796875, -952.5222778320312, -855.079833984375, -757.637451171875, -660.195068359375, -562.7526245117188, -465.3102722167969, -367.86785888671875, -270.42547607421875, -172.98306274414062, -75.5406494140625, 21.901763916015625, 119.34414672851562, 216.78659057617188, 314.2289733886719]}, "gradients/decoder.transformer.h.8.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 1.0, 2.0, 2.0, 2.0, 3.0, 3.0, 4.0, 13.0, 5.0, 13.0, 7.0, 16.0, 22.0, 19.0, 25.0, 21.0, 18.0, 29.0, 26.0, 24.0, 25.0, 42.0, 32.0, 38.0, 44.0, 50.0, 41.0, 42.0, 43.0, 44.0, 39.0, 31.0, 36.0, 33.0, 31.0, 33.0, 21.0, 19.0, 23.0, 19.0, 8.0, 14.0, 16.0, 12.0, 10.0, 1.0, 5.0, 5.0, 1.0, 2.0, 1.0, 1.0, 0.0, 1.0], "bins": [-632.8162841796875, -614.3766479492188, -595.93701171875, -577.4973754882812, -559.0577392578125, -540.6181030273438, -522.178466796875, -503.7388610839844, -485.2992248535156, -466.8595886230469, -448.4199523925781, -429.9803161621094, -411.54071044921875, -393.10107421875, -374.66143798828125, -356.2218017578125, -337.78216552734375, -319.342529296875, -300.90289306640625, -282.4632568359375, -264.02362060546875, -245.58399963378906, -227.14437866210938, -208.70474243164062, -190.26510620117188, -171.82546997070312, -153.38583374023438, -134.9462127685547, -116.50657653808594, -98.06694030761719, -79.62731170654297, -61.18768310546875, -42.74810791015625, -24.308475494384766, -5.868843078613281, 12.570789337158203, 31.010421752929688, 49.45005798339844, 67.88968658447266, 86.32931518554688, 104.76895141601562, 123.20858764648438, 141.64822387695312, 160.0878448486328, 178.52748107910156, 196.9671173095703, 215.40673828125, 233.84637451171875, 252.2860107421875, 270.72564697265625, 289.165283203125, 307.60491943359375, 326.0445556640625, 344.48419189453125, 362.9237976074219, 381.3634338378906, 399.8030700683594, 418.2427062988281, 436.6823425292969, 455.1219787597656, 473.56158447265625, 492.001220703125, 510.44085693359375, 528.8804931640625, 547.3201293945312]}, "gradients/decoder.transformer.h.7.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 3.0, 2.0, 2.0, 4.0, 5.0, 6.0, 1.0, 4.0, 5.0, 10.0, 12.0, 15.0, 14.0, 20.0, 18.0, 27.0, 33.0, 35.0, 29.0, 43.0, 49.0, 35.0, 47.0, 41.0, 57.0, 63.0, 46.0, 43.0, 38.0, 52.0, 38.0, 24.0, 30.0, 24.0, 26.0, 15.0, 17.0, 18.0, 15.0, 12.0, 7.0, 11.0, 5.0, 4.0, 4.0, 3.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-84.375, -81.70703125, -79.0390625, -76.37109375, -73.703125, -71.03515625, -68.3671875, -65.69921875, -63.03125, -60.36328125, -57.6953125, -55.02734375, -52.359375, -49.69140625, -47.0234375, -44.35546875, -41.6875, -39.01953125, -36.3515625, -33.68359375, -31.015625, -28.34765625, -25.6796875, -23.01171875, -20.34375, -17.67578125, -15.0078125, -12.33984375, -9.671875, -7.00390625, -4.3359375, -1.66796875, 1.0, 3.66796875, 6.3359375, 9.00390625, 11.671875, 14.33984375, 17.0078125, 19.67578125, 22.34375, 25.01171875, 27.6796875, 30.34765625, 33.015625, 35.68359375, 38.3515625, 41.01953125, 43.6875, 46.35546875, 49.0234375, 51.69140625, 54.359375, 57.02734375, 59.6953125, 62.36328125, 65.03125, 67.69921875, 70.3671875, 73.03515625, 75.703125, 78.37109375, 81.0390625, 83.70703125, 86.375]}, "gradients/decoder.transformer.h.7.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 2.0, 2.0, 4.0, 5.0, 9.0, 9.0, 9.0, 13.0, 22.0, 31.0, 41.0, 61.0, 76.0, 132.0, 176.0, 283.0, 426.0, 665.0, 1192.0, 2312.0, 4312.0, 9224.0, 22179.0, 93473.0, 1797371.0, 2109501.0, 109283.0, 23369.0, 9680.0, 4597.0, 2299.0, 1375.0, 749.0, 463.0, 289.0, 185.0, 144.0, 100.0, 74.0, 43.0, 33.0, 27.0, 12.0, 15.0, 6.0, 2.0, 7.0, 5.0, 1.0, 4.0, 1.0, 1.0, 2.0, 1.0, 0.0, 1.0, 1.0], "bins": [-176.25, -170.74609375, -165.2421875, -159.73828125, -154.234375, -148.73046875, -143.2265625, -137.72265625, -132.21875, -126.71484375, -121.2109375, -115.70703125, -110.203125, -104.69921875, -99.1953125, -93.69140625, -88.1875, -82.68359375, -77.1796875, -71.67578125, -66.171875, -60.66796875, -55.1640625, -49.66015625, -44.15625, -38.65234375, -33.1484375, -27.64453125, -22.140625, -16.63671875, -11.1328125, -5.62890625, -0.125, 5.37890625, 10.8828125, 16.38671875, 21.890625, 27.39453125, 32.8984375, 38.40234375, 43.90625, 49.41015625, 54.9140625, 60.41796875, 65.921875, 71.42578125, 76.9296875, 82.43359375, 87.9375, 93.44140625, 98.9453125, 104.44921875, 109.953125, 115.45703125, 120.9609375, 126.46484375, 131.96875, 137.47265625, 142.9765625, 148.48046875, 153.984375, 159.48828125, 164.9921875, 170.49609375, 176.0]}, "gradients/decoder.transformer.h.7.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 3.0, 4.0, 6.0, 12.0, 18.0, 20.0, 32.0, 58.0, 100.0, 300.0, 875.0, 1771.0, 507.0, 193.0, 68.0, 42.0, 29.0, 19.0, 9.0, 6.0, 8.0, 2.0, 2.0, 1.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-263.0, -251.71875, -240.4375, -229.15625, -217.875, -206.59375, -195.3125, -184.03125, -172.75, -161.46875, -150.1875, -138.90625, -127.625, -116.34375, -105.0625, -93.78125, -82.5, -71.21875, -59.9375, -48.65625, -37.375, -26.09375, -14.8125, -3.53125, 7.75, 19.03125, 30.3125, 41.59375, 52.875, 64.15625, 75.4375, 86.71875, 98.0, 109.28125, 120.5625, 131.84375, 143.125, 154.40625, 165.6875, 176.96875, 188.25, 199.53125, 210.8125, 222.09375, 233.375, 244.65625, 255.9375, 267.21875, 278.5, 289.78125, 301.0625, 312.34375, 323.625, 334.90625, 346.1875, 357.46875, 368.75, 380.03125, 391.3125, 402.59375, 413.875, 425.15625, 436.4375, 447.71875, 459.0]}, "gradients/decoder.transformer.h.7.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 4.0, 4.0, 9.0, 24.0, 18.0, 44.0, 82.0, 136.0, 255.0, 619.0, 2449.0, 25177.0, 4015719.0, 141493.0, 6288.0, 1138.0, 399.0, 214.0, 87.0, 60.0, 32.0, 15.0, 10.0, 11.0, 1.0, 4.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1361.0, -1325.8671875, -1290.734375, -1255.6015625, -1220.46875, -1185.3359375, -1150.203125, -1115.0703125, -1079.9375, -1044.8046875, -1009.671875, -974.5390625, -939.40625, -904.2734375, -869.140625, -834.0078125, -798.875, -763.7421875, -728.609375, -693.4765625, -658.34375, -623.2109375, -588.078125, -552.9453125, -517.8125, -482.6796875, -447.546875, -412.4140625, -377.28125, -342.1484375, -307.015625, -271.8828125, -236.75, -201.6171875, -166.484375, -131.3515625, -96.21875, -61.0859375, -25.953125, 9.1796875, 44.3125, 79.4453125, 114.578125, 149.7109375, 184.84375, 219.9765625, 255.109375, 290.2421875, 325.375, 360.5078125, 395.640625, 430.7734375, 465.90625, 501.0390625, 536.171875, 571.3046875, 606.4375, 641.5703125, 676.703125, 711.8359375, 746.96875, 782.1015625, 817.234375, 852.3671875, 887.5]}, "gradients/decoder.transformer.h.7.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 6.0, 5.0, 2.0, 17.0, 18.0, 26.0, 61.0, 100.0, 167.0, 222.0, 177.0, 108.0, 57.0, 16.0, 24.0, 4.0, 4.0, 0.0, 4.0, 2.0, 0.0, 0.0, 1.0], "bins": [-1824.9630126953125, -1788.8408203125, -1752.7186279296875, -1716.596435546875, -1680.474365234375, -1644.3521728515625, -1608.22998046875, -1572.1077880859375, -1535.985595703125, -1499.8634033203125, -1463.7412109375, -1427.6190185546875, -1391.496826171875, -1355.374755859375, -1319.2525634765625, -1283.13037109375, -1247.0081787109375, -1210.885986328125, -1174.7637939453125, -1138.6416015625, -1102.51953125, -1066.3973388671875, -1030.275146484375, -994.1529541015625, -958.03076171875, -921.9085693359375, -885.786376953125, -849.6642456054688, -813.5420532226562, -777.4198608398438, -741.2977294921875, -705.175537109375, -669.053466796875, -632.9312744140625, -596.80908203125, -560.6869506835938, -524.5647583007812, -488.44256591796875, -452.3204040527344, -416.1982421875, -380.0760498046875, -343.953857421875, -307.8316955566406, -271.70953369140625, -235.58734130859375, -199.4651641845703, -163.34298706054688, -127.22080993652344, -91.0986328125, -54.97645568847656, -18.854278564453125, 17.267898559570312, 53.39007568359375, 89.51225280761719, 125.63442993164062, 161.75660705566406, 197.8787841796875, 234.00096130371094, 270.1231384277344, 306.24530029296875, 342.36749267578125, 378.48968505859375, 414.6118469238281, 450.7340087890625, 486.856201171875]}, "gradients/decoder.transformer.h.7.ln_2.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 2.0, 5.0, 8.0, 3.0, 8.0, 7.0, 9.0, 15.0, 9.0, 12.0, 16.0, 16.0, 25.0, 26.0, 28.0, 30.0, 20.0, 21.0, 31.0, 39.0, 31.0, 27.0, 33.0, 38.0, 34.0, 35.0, 46.0, 42.0, 28.0, 35.0, 36.0, 36.0, 27.0, 25.0, 25.0, 24.0, 18.0, 22.0, 30.0, 8.0, 17.0, 9.0, 8.0, 10.0, 9.0, 4.0, 9.0, 5.0, 2.0, 4.0, 2.0, 4.0, 1.0, 2.0, 2.0, 0.0, 1.0], "bins": [-462.4507751464844, -447.8729553222656, -433.29510498046875, -418.71728515625, -404.13946533203125, -389.5616455078125, -374.98382568359375, -360.4059753417969, -345.8281555175781, -331.2503356933594, -316.6724853515625, -302.09466552734375, -287.516845703125, -272.93902587890625, -258.3612060546875, -243.78335571289062, -229.20553588867188, -214.62771606445312, -200.0498809814453, -185.4720458984375, -170.89422607421875, -156.31640625, -141.7385711669922, -127.1607437133789, -112.58291625976562, -98.00508880615234, -83.42726135253906, -68.84943389892578, -54.2716064453125, -39.69377899169922, -25.115951538085938, -10.538124084472656, 4.0396728515625, 18.61750030517578, 33.19532775878906, 47.773155212402344, 62.350982666015625, 76.9288101196289, 91.50663757324219, 106.08446502685547, 120.66229248046875, 135.2401123046875, 149.8179473876953, 164.39578247070312, 178.97360229492188, 193.55142211914062, 208.12925720214844, 222.70709228515625, 237.284912109375, 251.86273193359375, 266.4405517578125, 281.0184020996094, 295.5962219238281, 310.1740417480469, 324.75189208984375, 339.3297119140625, 353.90753173828125, 368.4853515625, 383.06317138671875, 397.6410217285156, 412.2188415527344, 426.7966613769531, 441.37451171875, 455.95233154296875, 470.5301513671875]}, "gradients/decoder.transformer.h.7.crossattention.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 1.0, 1.0, 0.0, 3.0, 3.0, 2.0, 3.0, 3.0, 4.0, 7.0, 17.0, 8.0, 6.0, 11.0, 11.0, 19.0, 20.0, 25.0, 25.0, 29.0, 32.0, 36.0, 39.0, 34.0, 51.0, 51.0, 36.0, 51.0, 36.0, 50.0, 50.0, 38.0, 45.0, 37.0, 32.0, 32.0, 25.0, 21.0, 21.0, 17.0, 19.0, 16.0, 9.0, 12.0, 6.0, 3.0, 8.0, 4.0, 3.0, 0.0, 3.0, 2.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-84.8125, -82.03515625, -79.2578125, -76.48046875, -73.703125, -70.92578125, -68.1484375, -65.37109375, -62.59375, -59.81640625, -57.0390625, -54.26171875, -51.484375, -48.70703125, -45.9296875, -43.15234375, -40.375, -37.59765625, -34.8203125, -32.04296875, -29.265625, -26.48828125, -23.7109375, -20.93359375, -18.15625, -15.37890625, -12.6015625, -9.82421875, -7.046875, -4.26953125, -1.4921875, 1.28515625, 4.0625, 6.83984375, 9.6171875, 12.39453125, 15.171875, 17.94921875, 20.7265625, 23.50390625, 26.28125, 29.05859375, 31.8359375, 34.61328125, 37.390625, 40.16796875, 42.9453125, 45.72265625, 48.5, 51.27734375, 54.0546875, 56.83203125, 59.609375, 62.38671875, 65.1640625, 67.94140625, 70.71875, 73.49609375, 76.2734375, 79.05078125, 81.828125, 84.60546875, 87.3828125, 90.16015625, 92.9375]}, "gradients/decoder.transformer.h.7.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 3.0, 2.0, 3.0, 8.0, 10.0, 19.0, 17.0, 20.0, 32.0, 33.0, 48.0, 77.0, 126.0, 177.0, 243.0, 390.0, 691.0, 1114.0, 2006.0, 3437.0, 6414.0, 11988.0, 23828.0, 50778.0, 119138.0, 345655.0, 288821.0, 103221.0, 44447.0, 21317.0, 10925.0, 5725.0, 3231.0, 1787.0, 1039.0, 624.0, 382.0, 261.0, 147.0, 114.0, 76.0, 53.0, 30.0, 45.0, 14.0, 19.0, 6.0, 4.0, 6.0, 5.0, 8.0, 0.0, 3.0, 1.0, 2.0], "bins": [-13.3125, -12.9312744140625, -12.550048828125, -12.1688232421875, -11.78759765625, -11.4063720703125, -11.025146484375, -10.6439208984375, -10.2626953125, -9.8814697265625, -9.500244140625, -9.1190185546875, -8.73779296875, -8.3565673828125, -7.975341796875, -7.5941162109375, -7.212890625, -6.8316650390625, -6.450439453125, -6.0692138671875, -5.68798828125, -5.3067626953125, -4.925537109375, -4.5443115234375, -4.1630859375, -3.7818603515625, -3.400634765625, -3.0194091796875, -2.63818359375, -2.2569580078125, -1.875732421875, -1.4945068359375, -1.11328125, -0.7320556640625, -0.350830078125, 0.0303955078125, 0.41162109375, 0.7928466796875, 1.174072265625, 1.5552978515625, 1.9365234375, 2.3177490234375, 2.698974609375, 3.0802001953125, 3.46142578125, 3.8426513671875, 4.223876953125, 4.6051025390625, 4.986328125, 5.3675537109375, 5.748779296875, 6.1300048828125, 6.51123046875, 6.8924560546875, 7.273681640625, 7.6549072265625, 8.0361328125, 8.4173583984375, 8.798583984375, 9.1798095703125, 9.56103515625, 9.9422607421875, 10.323486328125, 10.7047119140625, 11.0859375]}, "gradients/decoder.transformer.h.7.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 2.0, 1.0, 5.0, 6.0, 6.0, 5.0, 9.0, 10.0, 15.0, 15.0, 20.0, 20.0, 19.0, 15.0, 30.0, 31.0, 31.0, 36.0, 34.0, 38.0, 32.0, 33.0, 50.0, 38.0, 1056.0, 52.0, 54.0, 39.0, 24.0, 41.0, 31.0, 32.0, 24.0, 28.0, 25.0, 16.0, 26.0, 20.0, 14.0, 3.0, 8.0, 12.0, 6.0, 6.0, 5.0, 6.0, 2.0, 4.0, 0.0, 0.0, 3.0, 0.0, 0.0, 2.0], "bins": [-58.59375, -56.88134765625, -55.1689453125, -53.45654296875, -51.744140625, -50.03173828125, -48.3193359375, -46.60693359375, -44.89453125, -43.18212890625, -41.4697265625, -39.75732421875, -38.044921875, -36.33251953125, -34.6201171875, -32.90771484375, -31.1953125, -29.48291015625, -27.7705078125, -26.05810546875, -24.345703125, -22.63330078125, -20.9208984375, -19.20849609375, -17.49609375, -15.78369140625, -14.0712890625, -12.35888671875, -10.646484375, -8.93408203125, -7.2216796875, -5.50927734375, -3.796875, -2.08447265625, -0.3720703125, 1.34033203125, 3.052734375, 4.76513671875, 6.4775390625, 8.18994140625, 9.90234375, 11.61474609375, 13.3271484375, 15.03955078125, 16.751953125, 18.46435546875, 20.1767578125, 21.88916015625, 23.6015625, 25.31396484375, 27.0263671875, 28.73876953125, 30.451171875, 32.16357421875, 33.8759765625, 35.58837890625, 37.30078125, 39.01318359375, 40.7255859375, 42.43798828125, 44.150390625, 45.86279296875, 47.5751953125, 49.28759765625, 51.0]}, "gradients/decoder.transformer.h.7.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 1.0, 3.0, 6.0, 8.0, 15.0, 16.0, 20.0, 35.0, 58.0, 61.0, 106.0, 171.0, 245.0, 364.0, 516.0, 771.0, 1192.0, 1798.0, 2739.0, 4338.0, 6710.0, 10655.0, 16938.0, 27243.0, 45345.0, 76074.0, 140036.0, 1240165.0, 255333.0, 107085.0, 61044.0, 36708.0, 22519.0, 13904.0, 8717.0, 5504.0, 3750.0, 2410.0, 1484.0, 1002.0, 693.0, 433.0, 315.0, 203.0, 139.0, 74.0, 62.0, 45.0, 37.0, 17.0, 14.0, 9.0, 6.0, 7.0, 2.0, 2.0, 1.0, 0.0, 0.0, 1.0], "bins": [-4.19921875, -4.067138671875, -3.93505859375, -3.802978515625, -3.6708984375, -3.538818359375, -3.40673828125, -3.274658203125, -3.142578125, -3.010498046875, -2.87841796875, -2.746337890625, -2.6142578125, -2.482177734375, -2.35009765625, -2.218017578125, -2.0859375, -1.953857421875, -1.82177734375, -1.689697265625, -1.5576171875, -1.425537109375, -1.29345703125, -1.161376953125, -1.029296875, -0.897216796875, -0.76513671875, -0.633056640625, -0.5009765625, -0.368896484375, -0.23681640625, -0.104736328125, 0.02734375, 0.159423828125, 0.29150390625, 0.423583984375, 0.5556640625, 0.687744140625, 0.81982421875, 0.951904296875, 1.083984375, 1.216064453125, 1.34814453125, 1.480224609375, 1.6123046875, 1.744384765625, 1.87646484375, 2.008544921875, 2.140625, 2.272705078125, 2.40478515625, 2.536865234375, 2.6689453125, 2.801025390625, 2.93310546875, 3.065185546875, 3.197265625, 3.329345703125, 3.46142578125, 3.593505859375, 3.7255859375, 3.857666015625, 3.98974609375, 4.121826171875, 4.25390625]}, "gradients/decoder.transformer.h.7.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 1.0, 1.0, 1.0, 6.0, 2.0, 3.0, 3.0, 6.0, 11.0, 14.0, 17.0, 13.0, 21.0, 31.0, 28.0, 50.0, 98.0, 243.0, 169.0, 96.0, 51.0, 35.0, 21.0, 17.0, 10.0, 15.0, 13.0, 6.0, 8.0, 9.0, 2.0, 3.0, 2.0, 3.0, 1.0, 1.0, 2.0, 1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.93798828125, -0.9102935791015625, -0.882598876953125, -0.8549041748046875, -0.82720947265625, -0.7995147705078125, -0.771820068359375, -0.7441253662109375, -0.7164306640625, -0.6887359619140625, -0.661041259765625, -0.6333465576171875, -0.60565185546875, -0.5779571533203125, -0.550262451171875, -0.5225677490234375, -0.494873046875, -0.4671783447265625, -0.439483642578125, -0.4117889404296875, -0.38409423828125, -0.3563995361328125, -0.328704833984375, -0.3010101318359375, -0.2733154296875, -0.2456207275390625, -0.217926025390625, -0.1902313232421875, -0.16253662109375, -0.1348419189453125, -0.107147216796875, -0.0794525146484375, -0.0517578125, -0.0240631103515625, 0.003631591796875, 0.0313262939453125, 0.05902099609375, 0.0867156982421875, 0.114410400390625, 0.1421051025390625, 0.1697998046875, 0.1974945068359375, 0.225189208984375, 0.2528839111328125, 0.28057861328125, 0.3082733154296875, 0.335968017578125, 0.3636627197265625, 0.391357421875, 0.4190521240234375, 0.446746826171875, 0.4744415283203125, 0.50213623046875, 0.5298309326171875, 0.557525634765625, 0.5852203369140625, 0.6129150390625, 0.6406097412109375, 0.668304443359375, 0.6959991455078125, 0.72369384765625, 0.7513885498046875, 0.779083251953125, 0.8067779541015625, 0.83447265625]}, "gradients/decoder.transformer.h.7.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 2.0, 1.0, 1.0, 3.0, 2.0, 3.0, 2.0, 9.0, 9.0, 5.0, 13.0, 16.0, 10.0, 18.0, 24.0, 45.0, 71.0, 150.0, 586.0, 1047151.0, 185.0, 85.0, 37.0, 33.0, 25.0, 13.0, 16.0, 18.0, 9.0, 6.0, 3.0, 3.0, 2.0, 6.0, 1.0, 1.0, 2.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-26.03125, -25.16796875, -24.3046875, -23.44140625, -22.578125, -21.71484375, -20.8515625, -19.98828125, -19.125, -18.26171875, -17.3984375, -16.53515625, -15.671875, -14.80859375, -13.9453125, -13.08203125, -12.21875, -11.35546875, -10.4921875, -9.62890625, -8.765625, -7.90234375, -7.0390625, -6.17578125, -5.3125, -4.44921875, -3.5859375, -2.72265625, -1.859375, -0.99609375, -0.1328125, 0.73046875, 1.59375, 2.45703125, 3.3203125, 4.18359375, 5.046875, 5.91015625, 6.7734375, 7.63671875, 8.5, 9.36328125, 10.2265625, 11.08984375, 11.953125, 12.81640625, 13.6796875, 14.54296875, 15.40625, 16.26953125, 17.1328125, 17.99609375, 18.859375, 19.72265625, 20.5859375, 21.44921875, 22.3125, 23.17578125, 24.0390625, 24.90234375, 25.765625, 26.62890625, 27.4921875, 28.35546875, 29.21875]}, "gradients/decoder.transformer.h.7.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 40.0, 961.0, 20.0], "bins": [-4.400049686431885, -4.329604625701904, -4.259159564971924, -4.188714981079102, -4.118269920349121, -4.047824859619141, -3.9773800373077393, -3.906934976577759, -3.8364901542663574, -3.766045093536377, -3.6956002712249756, -3.625155210494995, -3.5547103881835938, -3.4842653274536133, -3.413820505142212, -3.3433754444122314, -3.272930383682251, -3.2024853229522705, -3.132040500640869, -3.0615954399108887, -2.9911506175994873, -2.920705556869507, -2.8502607345581055, -2.779815673828125, -2.7093706130981445, -2.638925552368164, -2.5684807300567627, -2.4980356693267822, -2.427590847015381, -2.3571457862854004, -2.286700963973999, -2.2162559032440186, -2.145811080932617, -2.0753660202026367, -2.0049211978912354, -1.9344762563705444, -1.8640313148498535, -1.793586254119873, -1.7231413125991821, -1.6526963710784912, -1.5822515487670898, -1.511806607246399, -1.441361665725708, -1.370916724205017, -1.3004717826843262, -1.2300267219543457, -1.1595817804336548, -1.0891368389129639, -1.018691897392273, -0.948246955871582, -0.8778020143508911, -0.8073570132255554, -0.7369120717048645, -0.6664671301841736, -0.5960221290588379, -0.525577187538147, -0.4551321864128113, -0.38468724489212036, -0.31424227356910706, -0.24379731714725494, -0.17335236072540283, -0.10290741920471191, -0.03246244788169861, 0.0379825234413147, 0.10842745751142502]}, "gradients/decoder.transformer.h.7.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 1.0, 0.0, 4.0, 5.0, 3.0, 3.0, 9.0, 5.0, 7.0, 8.0, 12.0, 15.0, 16.0, 20.0, 16.0, 18.0, 28.0, 24.0, 30.0, 19.0, 28.0, 17.0, 36.0, 30.0, 29.0, 24.0, 35.0, 28.0, 31.0, 38.0, 37.0, 41.0, 31.0, 49.0, 32.0, 28.0, 19.0, 22.0, 23.0, 28.0, 29.0, 18.0, 18.0, 15.0, 21.0, 13.0, 9.0, 8.0, 6.0, 7.0, 6.0, 7.0, 2.0, 1.0, 5.0, 1.0, 2.0, 1.0, 2.0, 1.0], "bins": [-0.4317131042480469, -0.41850537061691284, -0.4052976369857788, -0.3920899033546448, -0.37888216972351074, -0.3656744360923767, -0.3524667024612427, -0.33925899863243103, -0.326051265001297, -0.31284353137016296, -0.29963579773902893, -0.2864280641078949, -0.27322033047676086, -0.2600126266479492, -0.246804878115654, -0.23359715938568115, -0.22038941085338593, -0.2071816772222519, -0.19397394359111786, -0.18076622486114502, -0.167558491230011, -0.15435075759887695, -0.14114302396774292, -0.1279352903366089, -0.11472756415605545, -0.10151983052492142, -0.08831210434436798, -0.07510437071323395, -0.06189664080739021, -0.04868891090154648, -0.035481177270412445, -0.02227345108985901, -0.009065717458724976, 0.004142013378441334, 0.017349744215607643, 0.030557475984096527, 0.04376520588994026, 0.056972935795784, 0.07018066942691803, 0.08338839560747147, 0.0965961292386055, 0.10980386286973953, 0.12301158905029297, 0.136219322681427, 0.14942705631256104, 0.16263478994369507, 0.1758425235748291, 0.18905024230480194, 0.20225797593593597, 0.21546570956707, 0.22867344319820404, 0.24188116192817688, 0.2550888955593109, 0.26829662919044495, 0.281504362821579, 0.294712096452713, 0.30791983008384705, 0.3211275637149811, 0.3343352973461151, 0.34754303097724915, 0.3607507646083832, 0.3739584684371948, 0.38716620206832886, 0.4003739356994629, 0.4135816693305969]}, "gradients/decoder.transformer.h.7.attn.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 1.0, 1.0, 0.0, 3.0, 3.0, 2.0, 3.0, 3.0, 4.0, 7.0, 17.0, 8.0, 6.0, 11.0, 11.0, 19.0, 20.0, 25.0, 25.0, 29.0, 32.0, 36.0, 39.0, 34.0, 51.0, 51.0, 36.0, 51.0, 36.0, 50.0, 50.0, 38.0, 45.0, 37.0, 32.0, 32.0, 25.0, 21.0, 21.0, 17.0, 19.0, 16.0, 9.0, 12.0, 6.0, 3.0, 8.0, 4.0, 3.0, 0.0, 3.0, 2.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-84.8125, -82.03515625, -79.2578125, -76.48046875, -73.703125, -70.92578125, -68.1484375, -65.37109375, -62.59375, -59.81640625, -57.0390625, -54.26171875, -51.484375, -48.70703125, -45.9296875, -43.15234375, -40.375, -37.59765625, -34.8203125, -32.04296875, -29.265625, -26.48828125, -23.7109375, -20.93359375, -18.15625, -15.37890625, -12.6015625, -9.82421875, -7.046875, -4.26953125, -1.4921875, 1.28515625, 4.0625, 6.83984375, 9.6171875, 12.39453125, 15.171875, 17.94921875, 20.7265625, 23.50390625, 26.28125, 29.05859375, 31.8359375, 34.61328125, 37.390625, 40.16796875, 42.9453125, 45.72265625, 48.5, 51.27734375, 54.0546875, 56.83203125, 59.609375, 62.38671875, 65.1640625, 67.94140625, 70.71875, 73.49609375, 76.2734375, 79.05078125, 81.828125, 84.60546875, 87.3828125, 90.16015625, 92.9375]}, "gradients/decoder.transformer.h.7.attn.c_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 1.0, 1.0, 1.0, 3.0, 5.0, 7.0, 6.0, 4.0, 11.0, 23.0, 26.0, 32.0, 34.0, 69.0, 103.0, 119.0, 162.0, 236.0, 361.0, 453.0, 697.0, 1046.0, 1560.0, 2557.0, 4744.0, 9950.0, 48798.0, 899165.0, 55561.0, 10247.0, 4794.0, 2715.0, 1629.0, 1081.0, 697.0, 476.0, 329.0, 229.0, 189.0, 121.0, 103.0, 61.0, 38.0, 40.0, 25.0, 16.0, 15.0, 9.0, 8.0, 4.0, 5.0, 2.0, 0.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-165.25, -159.8359375, -154.421875, -149.0078125, -143.59375, -138.1796875, -132.765625, -127.3515625, -121.9375, -116.5234375, -111.109375, -105.6953125, -100.28125, -94.8671875, -89.453125, -84.0390625, -78.625, -73.2109375, -67.796875, -62.3828125, -56.96875, -51.5546875, -46.140625, -40.7265625, -35.3125, -29.8984375, -24.484375, -19.0703125, -13.65625, -8.2421875, -2.828125, 2.5859375, 8.0, 13.4140625, 18.828125, 24.2421875, 29.65625, 35.0703125, 40.484375, 45.8984375, 51.3125, 56.7265625, 62.140625, 67.5546875, 72.96875, 78.3828125, 83.796875, 89.2109375, 94.625, 100.0390625, 105.453125, 110.8671875, 116.28125, 121.6953125, 127.109375, 132.5234375, 137.9375, 143.3515625, 148.765625, 154.1796875, 159.59375, 165.0078125, 170.421875, 175.8359375, 181.25]}, "gradients/decoder.transformer.h.7.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 3.0, 3.0, 8.0, 8.0, 9.0, 9.0, 15.0, 11.0, 7.0, 12.0, 23.0, 34.0, 35.0, 25.0, 46.0, 49.0, 44.0, 36.0, 62.0, 115.0, 1960.0, 114.0, 63.0, 63.0, 36.0, 42.0, 39.0, 36.0, 37.0, 25.0, 22.0, 13.0, 11.0, 16.0, 9.0, 7.0, 5.0, 3.0, 2.0, 5.0, 2.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-344.25, -333.98046875, -323.7109375, -313.44140625, -303.171875, -292.90234375, -282.6328125, -272.36328125, -262.09375, -251.82421875, -241.5546875, -231.28515625, -221.015625, -210.74609375, -200.4765625, -190.20703125, -179.9375, -169.66796875, -159.3984375, -149.12890625, -138.859375, -128.58984375, -118.3203125, -108.05078125, -97.78125, -87.51171875, -77.2421875, -66.97265625, -56.703125, -46.43359375, -36.1640625, -25.89453125, -15.625, -5.35546875, 4.9140625, 15.18359375, 25.453125, 35.72265625, 45.9921875, 56.26171875, 66.53125, 76.80078125, 87.0703125, 97.33984375, 107.609375, 117.87890625, 128.1484375, 138.41796875, 148.6875, 158.95703125, 169.2265625, 179.49609375, 189.765625, 200.03515625, 210.3046875, 220.57421875, 230.84375, 241.11328125, 251.3828125, 261.65234375, 271.921875, 282.19140625, 292.4609375, 302.73046875, 313.0]}, "gradients/decoder.transformer.h.7.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 5.0, 3.0, 3.0, 4.0, 6.0, 11.0, 11.0, 18.0, 14.0, 22.0, 27.0, 34.0, 52.0, 61.0, 98.0, 186.0, 452.0, 1391.0, 6558.0, 2975274.0, 154694.0, 4763.0, 1088.0, 362.0, 192.0, 95.0, 65.0, 56.0, 39.0, 34.0, 18.0, 7.0, 14.0, 14.0, 5.0, 16.0, 9.0, 8.0, 4.0, 5.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-704.0, -681.2265625, -658.453125, -635.6796875, -612.90625, -590.1328125, -567.359375, -544.5859375, -521.8125, -499.0390625, -476.265625, -453.4921875, -430.71875, -407.9453125, -385.171875, -362.3984375, -339.625, -316.8515625, -294.078125, -271.3046875, -248.53125, -225.7578125, -202.984375, -180.2109375, -157.4375, -134.6640625, -111.890625, -89.1171875, -66.34375, -43.5703125, -20.796875, 1.9765625, 24.75, 47.5234375, 70.296875, 93.0703125, 115.84375, 138.6171875, 161.390625, 184.1640625, 206.9375, 229.7109375, 252.484375, 275.2578125, 298.03125, 320.8046875, 343.578125, 366.3515625, 389.125, 411.8984375, 434.671875, 457.4453125, 480.21875, 502.9921875, 525.765625, 548.5390625, 571.3125, 594.0859375, 616.859375, 639.6328125, 662.40625, 685.1796875, 707.953125, 730.7265625, 753.5]}, "gradients/decoder.transformer.h.7.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 1.0, 2.0, 3.0, 7.0, 12.0, 23.0, 28.0, 40.0, 60.0, 108.0, 130.0, 138.0, 139.0, 105.0, 74.0, 51.0, 26.0, 22.0, 10.0, 15.0, 5.0, 8.0, 3.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-176.393310546875, -168.86355590820312, -161.3338165283203, -153.80406188964844, -146.27432250976562, -138.74456787109375, -131.21481323242188, -123.68507385253906, -116.15532684326172, -108.62557983398438, -101.09583282470703, -93.56608581542969, -86.03633117675781, -78.506591796875, -70.97683715820312, -63.44709014892578, -55.91734313964844, -48.387596130371094, -40.85784912109375, -33.32809829711914, -25.798351287841797, -18.268604278564453, -10.738853454589844, -3.2091064453125, 4.320640563964844, 11.850388526916504, 19.380136489868164, 26.90988540649414, 34.439632415771484, 41.96937942504883, 49.49913024902344, 57.02887725830078, 64.55862426757812, 72.08837127685547, 79.61811828613281, 87.14787292480469, 94.6776123046875, 102.20736694335938, 109.73711395263672, 117.26686096191406, 124.7966079711914, 132.32635498046875, 139.85610961914062, 147.38584899902344, 154.9156036376953, 162.44534301757812, 169.97509765625, 177.50485229492188, 185.0345916748047, 192.56434631347656, 200.09408569335938, 207.62384033203125, 215.15357971191406, 222.68333435058594, 230.21307373046875, 237.74282836914062, 245.2725830078125, 252.80233764648438, 260.33209228515625, 267.86181640625, 275.3915710449219, 282.92132568359375, 290.4510803222656, 297.9808349609375, 305.51055908203125]}, "gradients/decoder.transformer.h.7.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 7.0, 4.0, 4.0, 3.0, 9.0, 12.0, 13.0, 18.0, 14.0, 22.0, 31.0, 18.0, 19.0, 24.0, 34.0, 32.0, 34.0, 35.0, 37.0, 40.0, 39.0, 44.0, 37.0, 44.0, 50.0, 39.0, 33.0, 31.0, 38.0, 29.0, 32.0, 25.0, 19.0, 23.0, 24.0, 12.0, 21.0, 15.0, 10.0, 9.0, 9.0, 2.0, 3.0, 4.0, 7.0, 0.0, 4.0, 2.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-676.5153198242188, -655.5403442382812, -634.5654296875, -613.5904541015625, -592.6155395507812, -571.6405639648438, -550.6656494140625, -529.690673828125, -508.7156982421875, -487.7407531738281, -466.76580810546875, -445.79083251953125, -424.8158874511719, -403.8409423828125, -382.8659973144531, -361.89105224609375, -340.9161071777344, -319.941162109375, -298.9662170410156, -277.99127197265625, -257.01629638671875, -236.04135131835938, -215.06640625, -194.09144592285156, -173.1165008544922, -152.1415557861328, -131.16659545898438, -110.191650390625, -89.2166976928711, -68.24174499511719, -47.26679992675781, -26.291839599609375, -5.31689453125, 15.658056259155273, 36.63300704956055, 57.60795593261719, 78.5829086303711, 99.557861328125, 120.53280639648438, 141.5077667236328, 162.4827117919922, 183.45765686035156, 204.4326171875, 225.40756225585938, 246.38250732421875, 267.35748291015625, 288.3323974609375, 309.307373046875, 330.2823181152344, 351.25726318359375, 372.2322082519531, 393.2071533203125, 414.18212890625, 435.1570739746094, 456.13201904296875, 477.10699462890625, 498.0819091796875, 519.056884765625, 540.0317993164062, 561.0067749023438, 581.981689453125, 602.9566650390625, 623.931640625, 644.9065551757812, 665.8815307617188]}, "gradients/decoder.transformer.h.6.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 2.0, 0.0, 1.0, 6.0, 2.0, 0.0, 4.0, 5.0, 6.0, 8.0, 6.0, 7.0, 10.0, 11.0, 14.0, 15.0, 18.0, 26.0, 20.0, 31.0, 29.0, 25.0, 35.0, 34.0, 35.0, 44.0, 51.0, 46.0, 37.0, 46.0, 43.0, 47.0, 44.0, 40.0, 32.0, 38.0, 24.0, 27.0, 26.0, 17.0, 26.0, 13.0, 14.0, 7.0, 9.0, 10.0, 6.0, 6.0, 6.0, 3.0, 2.0, 1.0, 2.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-86.5625, -83.798828125, -81.03515625, -78.271484375, -75.5078125, -72.744140625, -69.98046875, -67.216796875, -64.453125, -61.689453125, -58.92578125, -56.162109375, -53.3984375, -50.634765625, -47.87109375, -45.107421875, -42.34375, -39.580078125, -36.81640625, -34.052734375, -31.2890625, -28.525390625, -25.76171875, -22.998046875, -20.234375, -17.470703125, -14.70703125, -11.943359375, -9.1796875, -6.416015625, -3.65234375, -0.888671875, 1.875, 4.638671875, 7.40234375, 10.166015625, 12.9296875, 15.693359375, 18.45703125, 21.220703125, 23.984375, 26.748046875, 29.51171875, 32.275390625, 35.0390625, 37.802734375, 40.56640625, 43.330078125, 46.09375, 48.857421875, 51.62109375, 54.384765625, 57.1484375, 59.912109375, 62.67578125, 65.439453125, 68.203125, 70.966796875, 73.73046875, 76.494140625, 79.2578125, 82.021484375, 84.78515625, 87.548828125, 90.3125]}, "gradients/decoder.transformer.h.6.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 0.0, 2.0, 2.0, 5.0, 4.0, 11.0, 16.0, 19.0, 22.0, 24.0, 62.0, 87.0, 135.0, 219.0, 285.0, 426.0, 613.0, 883.0, 1393.0, 2106.0, 3409.0, 5554.0, 9771.0, 18524.0, 51122.0, 439481.0, 3046692.0, 507892.0, 59987.0, 19450.0, 10224.0, 5835.0, 3493.0, 2136.0, 1445.0, 896.0, 630.0, 442.0, 329.0, 230.0, 144.0, 90.0, 44.0, 39.0, 33.0, 26.0, 20.0, 13.0, 10.0, 11.0, 7.0, 2.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-153.125, -148.109375, -143.09375, -138.078125, -133.0625, -128.046875, -123.03125, -118.015625, -113.0, -107.984375, -102.96875, -97.953125, -92.9375, -87.921875, -82.90625, -77.890625, -72.875, -67.859375, -62.84375, -57.828125, -52.8125, -47.796875, -42.78125, -37.765625, -32.75, -27.734375, -22.71875, -17.703125, -12.6875, -7.671875, -2.65625, 2.359375, 7.375, 12.390625, 17.40625, 22.421875, 27.4375, 32.453125, 37.46875, 42.484375, 47.5, 52.515625, 57.53125, 62.546875, 67.5625, 72.578125, 77.59375, 82.609375, 87.625, 92.640625, 97.65625, 102.671875, 107.6875, 112.703125, 117.71875, 122.734375, 127.75, 132.765625, 137.78125, 142.796875, 147.8125, 152.828125, 157.84375, 162.859375, 167.875]}, "gradients/decoder.transformer.h.6.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 3.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 4.0, 5.0, 6.0, 2.0, 5.0, 10.0, 12.0, 11.0, 9.0, 13.0, 27.0, 27.0, 39.0, 53.0, 93.0, 183.0, 323.0, 706.0, 1104.0, 656.0, 336.0, 151.0, 88.0, 62.0, 32.0, 25.0, 24.0, 16.0, 12.0, 8.0, 8.0, 7.0, 6.0, 4.0, 3.0, 2.0, 2.0, 1.0, 0.0, 4.0, 1.0, 1.0, 2.0, 0.0, 0.0, 1.0], "bins": [-231.0, -224.666015625, -218.33203125, -211.998046875, -205.6640625, -199.330078125, -192.99609375, -186.662109375, -180.328125, -173.994140625, -167.66015625, -161.326171875, -154.9921875, -148.658203125, -142.32421875, -135.990234375, -129.65625, -123.322265625, -116.98828125, -110.654296875, -104.3203125, -97.986328125, -91.65234375, -85.318359375, -78.984375, -72.650390625, -66.31640625, -59.982421875, -53.6484375, -47.314453125, -40.98046875, -34.646484375, -28.3125, -21.978515625, -15.64453125, -9.310546875, -2.9765625, 3.357421875, 9.69140625, 16.025390625, 22.359375, 28.693359375, 35.02734375, 41.361328125, 47.6953125, 54.029296875, 60.36328125, 66.697265625, 73.03125, 79.365234375, 85.69921875, 92.033203125, 98.3671875, 104.701171875, 111.03515625, 117.369140625, 123.703125, 130.037109375, 136.37109375, 142.705078125, 149.0390625, 155.373046875, 161.70703125, 168.041015625, 174.375]}, "gradients/decoder.transformer.h.6.mlp.c_fc.weight": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 0.0, 1.0, 3.0, 4.0, 9.0, 4.0, 5.0, 11.0, 12.0, 19.0, 28.0, 31.0, 44.0, 70.0, 95.0, 147.0, 259.0, 442.0, 958.0, 2227.0, 6264.0, 20727.0, 112391.0, 3811048.0, 198400.0, 27993.0, 7950.0, 2690.0, 1133.0, 552.0, 273.0, 168.0, 94.0, 73.0, 47.0, 40.0, 21.0, 14.0, 12.0, 14.0, 6.0, 2.0, 5.0, 4.0, 2.0, 0.0, 0.0, 2.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-412.75, -397.23828125, -381.7265625, -366.21484375, -350.703125, -335.19140625, -319.6796875, -304.16796875, -288.65625, -273.14453125, -257.6328125, -242.12109375, -226.609375, -211.09765625, -195.5859375, -180.07421875, -164.5625, -149.05078125, -133.5390625, -118.02734375, -102.515625, -87.00390625, -71.4921875, -55.98046875, -40.46875, -24.95703125, -9.4453125, 6.06640625, 21.578125, 37.08984375, 52.6015625, 68.11328125, 83.625, 99.13671875, 114.6484375, 130.16015625, 145.671875, 161.18359375, 176.6953125, 192.20703125, 207.71875, 223.23046875, 238.7421875, 254.25390625, 269.765625, 285.27734375, 300.7890625, 316.30078125, 331.8125, 347.32421875, 362.8359375, 378.34765625, 393.859375, 409.37109375, 424.8828125, 440.39453125, 455.90625, 471.41796875, 486.9296875, 502.44140625, 517.953125, 533.46484375, 548.9765625, 564.48828125, 580.0]}, "gradients/decoder.transformer.h.6.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 16.0, 31.0, 178.0, 537.0, 215.0, 34.0, 9.0, 3.0], "bins": [-5717.24072265625, -5621.240234375, -5525.23974609375, -5429.2392578125, -5333.23876953125, -5237.23828125, -5141.2373046875, -5045.2373046875, -4949.236328125, -4853.23583984375, -4757.2353515625, -4661.23486328125, -4565.234375, -4469.23388671875, -4373.2333984375, -4277.232421875, -4181.232421875, -4085.23193359375, -3989.2314453125, -3893.23095703125, -3797.230224609375, -3701.229736328125, -3605.229248046875, -3509.228759765625, -3413.22802734375, -3317.2275390625, -3221.22705078125, -3125.2265625, -3029.225830078125, -2933.225341796875, -2837.224853515625, -2741.224365234375, -2645.223876953125, -2549.223388671875, -2453.222900390625, -2357.22216796875, -2261.2216796875, -2165.22119140625, -2069.220703125, -1973.22021484375, -1877.2196044921875, -1781.2191162109375, -1685.218505859375, -1589.218017578125, -1493.217529296875, -1397.217041015625, -1301.2164306640625, -1205.2159423828125, -1109.2154541015625, -1013.2149047851562, -917.2144165039062, -821.2138671875, -725.21337890625, -629.2128295898438, -533.2122802734375, -437.2117919921875, -341.2112121582031, -245.210693359375, -149.2101593017578, -53.209625244140625, 42.7908935546875, 138.79141235351562, 234.79196166992188, 330.7924499511719, 426.7929992675781]}, "gradients/decoder.transformer.h.6.ln_2.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 2.0, 1.0, 2.0, 0.0, 4.0, 3.0, 9.0, 10.0, 9.0, 15.0, 10.0, 15.0, 17.0, 17.0, 24.0, 27.0, 32.0, 33.0, 34.0, 40.0, 26.0, 40.0, 34.0, 44.0, 48.0, 41.0, 51.0, 39.0, 39.0, 36.0, 37.0, 40.0, 45.0, 24.0, 20.0, 30.0, 23.0, 22.0, 15.0, 11.0, 9.0, 9.0, 5.0, 8.0, 2.0, 6.0, 2.0, 3.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 2.0, 1.0], "bins": [-530.5936889648438, -514.7981567382812, -499.0025939941406, -483.20703125, -467.4114990234375, -451.6159362792969, -435.82037353515625, -420.02484130859375, -404.22930908203125, -388.4337463378906, -372.6382141113281, -356.8426513671875, -341.047119140625, -325.2515563964844, -309.45599365234375, -293.66046142578125, -277.8648986816406, -262.0693359375, -246.2738037109375, -230.47824096679688, -214.68270874023438, -198.88714599609375, -183.0915985107422, -167.29605102539062, -151.50050354003906, -135.7049560546875, -119.90940856933594, -104.11385345458984, -88.31830596923828, -72.52275848388672, -56.727203369140625, -40.93165588378906, -25.1361083984375, -9.340559005737305, 6.454990386962891, 22.25054168701172, 38.04608917236328, 53.841636657714844, 69.63719177246094, 85.4327392578125, 101.22828674316406, 117.02383422851562, 132.8193817138672, 148.61492919921875, 164.41049194335938, 180.20602416992188, 196.0015869140625, 211.79713439941406, 227.59268188476562, 243.3882293701172, 259.18377685546875, 274.9793395996094, 290.7748718261719, 306.5704345703125, 322.365966796875, 338.1615295410156, 353.95709228515625, 369.7526550292969, 385.5481872558594, 401.34375, 417.1392822265625, 432.9348449707031, 448.73040771484375, 464.52593994140625, 480.32147216796875]}, "gradients/decoder.transformer.h.6.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 2.0, 1.0, 0.0, 2.0, 2.0, 3.0, 4.0, 9.0, 4.0, 5.0, 14.0, 14.0, 9.0, 12.0, 12.0, 17.0, 23.0, 19.0, 24.0, 24.0, 26.0, 37.0, 43.0, 33.0, 34.0, 41.0, 34.0, 39.0, 51.0, 32.0, 42.0, 38.0, 40.0, 45.0, 40.0, 33.0, 28.0, 32.0, 27.0, 17.0, 25.0, 18.0, 9.0, 10.0, 9.0, 5.0, 6.0, 7.0, 5.0, 4.0, 2.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 2.0, 1.0, 0.0, 2.0], "bins": [-81.375, -78.7626953125, -76.150390625, -73.5380859375, -70.92578125, -68.3134765625, -65.701171875, -63.0888671875, -60.4765625, -57.8642578125, -55.251953125, -52.6396484375, -50.02734375, -47.4150390625, -44.802734375, -42.1904296875, -39.578125, -36.9658203125, -34.353515625, -31.7412109375, -29.12890625, -26.5166015625, -23.904296875, -21.2919921875, -18.6796875, -16.0673828125, -13.455078125, -10.8427734375, -8.23046875, -5.6181640625, -3.005859375, -0.3935546875, 2.21875, 4.8310546875, 7.443359375, 10.0556640625, 12.66796875, 15.2802734375, 17.892578125, 20.5048828125, 23.1171875, 25.7294921875, 28.341796875, 30.9541015625, 33.56640625, 36.1787109375, 38.791015625, 41.4033203125, 44.015625, 46.6279296875, 49.240234375, 51.8525390625, 54.46484375, 57.0771484375, 59.689453125, 62.3017578125, 64.9140625, 67.5263671875, 70.138671875, 72.7509765625, 75.36328125, 77.9755859375, 80.587890625, 83.2001953125, 85.8125]}, "gradients/decoder.transformer.h.6.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 1.0, 4.0, 3.0, 2.0, 3.0, 5.0, 16.0, 13.0, 15.0, 28.0, 36.0, 60.0, 109.0, 110.0, 216.0, 372.0, 651.0, 1101.0, 2099.0, 3972.0, 7918.0, 16104.0, 33571.0, 74333.0, 181069.0, 405177.0, 180365.0, 74389.0, 34012.0, 16052.0, 7964.0, 3939.0, 2114.0, 1128.0, 626.0, 377.0, 213.0, 136.0, 93.0, 61.0, 31.0, 25.0, 22.0, 12.0, 7.0, 6.0, 2.0, 2.0, 0.0, 4.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-10.90625, -10.5706787109375, -10.235107421875, -9.8995361328125, -9.56396484375, -9.2283935546875, -8.892822265625, -8.5572509765625, -8.2216796875, -7.8861083984375, -7.550537109375, -7.2149658203125, -6.87939453125, -6.5438232421875, -6.208251953125, -5.8726806640625, -5.537109375, -5.2015380859375, -4.865966796875, -4.5303955078125, -4.19482421875, -3.8592529296875, -3.523681640625, -3.1881103515625, -2.8525390625, -2.5169677734375, -2.181396484375, -1.8458251953125, -1.51025390625, -1.1746826171875, -0.839111328125, -0.5035400390625, -0.16796875, 0.1676025390625, 0.503173828125, 0.8387451171875, 1.17431640625, 1.5098876953125, 1.845458984375, 2.1810302734375, 2.5166015625, 2.8521728515625, 3.187744140625, 3.5233154296875, 3.85888671875, 4.1944580078125, 4.530029296875, 4.8656005859375, 5.201171875, 5.5367431640625, 5.872314453125, 6.2078857421875, 6.54345703125, 6.8790283203125, 7.214599609375, 7.5501708984375, 7.8857421875, 8.2213134765625, 8.556884765625, 8.8924560546875, 9.22802734375, 9.5635986328125, 9.899169921875, 10.2347412109375, 10.5703125]}, "gradients/decoder.transformer.h.6.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 6.0, 2.0, 4.0, 1.0, 10.0, 5.0, 11.0, 9.0, 11.0, 15.0, 10.0, 21.0, 16.0, 28.0, 12.0, 24.0, 21.0, 40.0, 26.0, 19.0, 39.0, 35.0, 46.0, 39.0, 30.0, 1063.0, 41.0, 39.0, 48.0, 42.0, 41.0, 29.0, 28.0, 27.0, 30.0, 23.0, 21.0, 25.0, 15.0, 16.0, 11.0, 8.0, 14.0, 9.0, 8.0, 6.0, 5.0, 1.0, 6.0, 1.0, 1.0, 0.0, 3.0, 2.0], "bins": [-61.125, -59.40478515625, -57.6845703125, -55.96435546875, -54.244140625, -52.52392578125, -50.8037109375, -49.08349609375, -47.36328125, -45.64306640625, -43.9228515625, -42.20263671875, -40.482421875, -38.76220703125, -37.0419921875, -35.32177734375, -33.6015625, -31.88134765625, -30.1611328125, -28.44091796875, -26.720703125, -25.00048828125, -23.2802734375, -21.56005859375, -19.83984375, -18.11962890625, -16.3994140625, -14.67919921875, -12.958984375, -11.23876953125, -9.5185546875, -7.79833984375, -6.078125, -4.35791015625, -2.6376953125, -0.91748046875, 0.802734375, 2.52294921875, 4.2431640625, 5.96337890625, 7.68359375, 9.40380859375, 11.1240234375, 12.84423828125, 14.564453125, 16.28466796875, 18.0048828125, 19.72509765625, 21.4453125, 23.16552734375, 24.8857421875, 26.60595703125, 28.326171875, 30.04638671875, 31.7666015625, 33.48681640625, 35.20703125, 36.92724609375, 38.6474609375, 40.36767578125, 42.087890625, 43.80810546875, 45.5283203125, 47.24853515625, 48.96875]}, "gradients/decoder.transformer.h.6.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 2.0, 2.0, 1.0, 3.0, 4.0, 3.0, 10.0, 12.0, 23.0, 29.0, 33.0, 70.0, 67.0, 137.0, 173.0, 279.0, 377.0, 549.0, 863.0, 1323.0, 1948.0, 3006.0, 4666.0, 7256.0, 11450.0, 18164.0, 29952.0, 50996.0, 89540.0, 172562.0, 1323332.0, 166077.0, 86488.0, 49299.0, 29167.0, 17767.0, 11082.0, 6975.0, 4547.0, 2973.0, 1944.0, 1323.0, 887.0, 609.0, 378.0, 238.0, 183.0, 117.0, 86.0, 61.0, 38.0, 21.0, 20.0, 13.0, 6.0, 7.0, 3.0, 3.0, 2.0, 2.0, 2.0, 0.0, 1.0], "bins": [-4.3125, -4.17547607421875, -4.0384521484375, -3.90142822265625, -3.764404296875, -3.62738037109375, -3.4903564453125, -3.35333251953125, -3.21630859375, -3.07928466796875, -2.9422607421875, -2.80523681640625, -2.668212890625, -2.53118896484375, -2.3941650390625, -2.25714111328125, -2.1201171875, -1.98309326171875, -1.8460693359375, -1.70904541015625, -1.572021484375, -1.43499755859375, -1.2979736328125, -1.16094970703125, -1.02392578125, -0.88690185546875, -0.7498779296875, -0.61285400390625, -0.475830078125, -0.33880615234375, -0.2017822265625, -0.06475830078125, 0.072265625, 0.20928955078125, 0.3463134765625, 0.48333740234375, 0.620361328125, 0.75738525390625, 0.8944091796875, 1.03143310546875, 1.16845703125, 1.30548095703125, 1.4425048828125, 1.57952880859375, 1.716552734375, 1.85357666015625, 1.9906005859375, 2.12762451171875, 2.2646484375, 2.40167236328125, 2.5386962890625, 2.67572021484375, 2.812744140625, 2.94976806640625, 3.0867919921875, 3.22381591796875, 3.36083984375, 3.49786376953125, 3.6348876953125, 3.77191162109375, 3.908935546875, 4.04595947265625, 4.1829833984375, 4.32000732421875, 4.45703125]}, "gradients/decoder.transformer.h.6.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 1.0, 3.0, 4.0, 1.0, 1.0, 3.0, 3.0, 2.0, 8.0, 4.0, 11.0, 7.0, 11.0, 14.0, 12.0, 12.0, 11.0, 20.0, 24.0, 27.0, 32.0, 77.0, 147.0, 189.0, 118.0, 54.0, 41.0, 24.0, 25.0, 15.0, 15.0, 14.0, 5.0, 11.0, 10.0, 8.0, 11.0, 7.0, 4.0, 3.0, 5.0, 5.0, 3.0, 4.0, 1.0, 1.0, 0.0, 3.0, 1.0, 0.0, 3.0, 0.0, 1.0, 1.0, 1.0], "bins": [-0.251708984375, -0.24391746520996094, -0.23612594604492188, -0.2283344268798828, -0.22054290771484375, -0.2127513885498047, -0.20495986938476562, -0.19716835021972656, -0.1893768310546875, -0.18158531188964844, -0.17379379272460938, -0.1660022735595703, -0.15821075439453125, -0.1504192352294922, -0.14262771606445312, -0.13483619689941406, -0.127044677734375, -0.11925315856933594, -0.11146163940429688, -0.10367012023925781, -0.09587860107421875, -0.08808708190917969, -0.08029556274414062, -0.07250404357910156, -0.0647125244140625, -0.05692100524902344, -0.049129486083984375, -0.04133796691894531, -0.03354644775390625, -0.025754928588867188, -0.017963409423828125, -0.010171890258789062, -0.00238037109375, 0.0054111480712890625, 0.013202667236328125, 0.020994186401367188, 0.02878570556640625, 0.03657722473144531, 0.044368743896484375, 0.05216026306152344, 0.0599517822265625, 0.06774330139160156, 0.07553482055664062, 0.08332633972167969, 0.09111785888671875, 0.09890937805175781, 0.10670089721679688, 0.11449241638183594, 0.122283935546875, 0.13007545471191406, 0.13786697387695312, 0.1456584930419922, 0.15345001220703125, 0.1612415313720703, 0.16903305053710938, 0.17682456970214844, 0.1846160888671875, 0.19240760803222656, 0.20019912719726562, 0.2079906463623047, 0.21578216552734375, 0.2235736846923828, 0.23136520385742188, 0.23915672302246094, 0.2469482421875]}, "gradients/decoder.transformer.h.6.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 3.0, 0.0, 3.0, 2.0, 0.0, 2.0, 3.0, 4.0, 4.0, 5.0, 4.0, 4.0, 7.0, 9.0, 9.0, 7.0, 14.0, 6.0, 13.0, 15.0, 18.0, 30.0, 46.0, 68.0, 103.0, 214.0, 1047083.0, 488.0, 123.0, 79.0, 38.0, 33.0, 20.0, 15.0, 11.0, 15.0, 10.0, 11.0, 6.0, 11.0, 5.0, 8.0, 4.0, 2.0, 2.0, 1.0, 2.0, 3.0, 3.0, 1.0, 0.0, 2.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0], "bins": [-7.6796875, -7.44482421875, -7.2099609375, -6.97509765625, -6.740234375, -6.50537109375, -6.2705078125, -6.03564453125, -5.80078125, -5.56591796875, -5.3310546875, -5.09619140625, -4.861328125, -4.62646484375, -4.3916015625, -4.15673828125, -3.921875, -3.68701171875, -3.4521484375, -3.21728515625, -2.982421875, -2.74755859375, -2.5126953125, -2.27783203125, -2.04296875, -1.80810546875, -1.5732421875, -1.33837890625, -1.103515625, -0.86865234375, -0.6337890625, -0.39892578125, -0.1640625, 0.07080078125, 0.3056640625, 0.54052734375, 0.775390625, 1.01025390625, 1.2451171875, 1.47998046875, 1.71484375, 1.94970703125, 2.1845703125, 2.41943359375, 2.654296875, 2.88916015625, 3.1240234375, 3.35888671875, 3.59375, 3.82861328125, 4.0634765625, 4.29833984375, 4.533203125, 4.76806640625, 5.0029296875, 5.23779296875, 5.47265625, 5.70751953125, 5.9423828125, 6.17724609375, 6.412109375, 6.64697265625, 6.8818359375, 7.11669921875, 7.3515625]}, "gradients/decoder.transformer.h.6.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 3.0, 49.0, 817.0, 140.0, 10.0, 1.0], "bins": [-1.6375054121017456, -1.6105188131332397, -1.5835323333740234, -1.5565457344055176, -1.5295592546463013, -1.5025726556777954, -1.475586175918579, -1.4485995769500732, -1.421613097190857, -1.394626498222351, -1.3676400184631348, -1.340653419494629, -1.3136669397354126, -1.2866803407669067, -1.2596938610076904, -1.2327072620391846, -1.2057206630706787, -1.1787340641021729, -1.1517475843429565, -1.1247609853744507, -1.0977745056152344, -1.0707879066467285, -1.0438014268875122, -1.0168148279190063, -0.98982834815979, -0.962841808795929, -0.9358552694320679, -0.9088687300682068, -0.8818821907043457, -0.8548956513404846, -0.8279091119766235, -0.8009225130081177, -0.7739360332489014, -0.7469494938850403, -0.7199629545211792, -0.6929764151573181, -0.665989875793457, -0.639003336429596, -0.6120167970657349, -0.585030198097229, -0.5580437183380127, -0.5310571789741516, -0.5040706396102905, -0.47708410024642944, -0.45009756088256836, -0.4231110215187073, -0.3961244523525238, -0.3691379129886627, -0.34215137362480164, -0.31516483426094055, -0.28817829489707947, -0.261191725730896, -0.2342052012681961, -0.20721866190433502, -0.18023210763931274, -0.15324556827545166, -0.12625902891159058, -0.09927248954772949, -0.07228594273328781, -0.04529939591884613, -0.018312856554985046, 0.008673682808876038, 0.035660237073898315, 0.0626467764377594, 0.08963332325220108]}, "gradients/decoder.transformer.h.6.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 4.0, 3.0, 2.0, 0.0, 5.0, 6.0, 6.0, 7.0, 5.0, 8.0, 11.0, 10.0, 11.0, 13.0, 13.0, 30.0, 22.0, 20.0, 26.0, 28.0, 43.0, 21.0, 38.0, 28.0, 41.0, 22.0, 42.0, 39.0, 48.0, 48.0, 53.0, 36.0, 31.0, 28.0, 29.0, 34.0, 26.0, 24.0, 32.0, 23.0, 15.0, 10.0, 12.0, 6.0, 10.0, 8.0, 6.0, 7.0, 7.0, 3.0, 6.0, 6.0, 2.0, 1.0, 1.0, 1.0, 1.0, 0.0, 2.0, 1.0], "bins": [-0.3662654161453247, -0.35448646545410156, -0.3427075147628784, -0.3309285640716553, -0.3191496431827545, -0.30737069249153137, -0.2955917418003082, -0.2838127911090851, -0.2720338702201843, -0.2602549195289612, -0.24847598373889923, -0.2366970330476761, -0.22491809725761414, -0.213139146566391, -0.20136019587516785, -0.1895812451839447, -0.17780229449272156, -0.1660233438014984, -0.15424440801143646, -0.14246545732021332, -0.13068652153015137, -0.11890757083892822, -0.10712862014770508, -0.09534967690706253, -0.08357073366641998, -0.07179179042577744, -0.06001284345984459, -0.04823389649391174, -0.036454953253269196, -0.024676010012626648, -0.012897059321403503, -0.0011181160807609558, 0.010660827159881592, 0.02243977226316929, 0.034218717366456985, 0.04599766433238983, 0.05777660757303238, 0.06955555081367493, 0.08133450150489807, 0.09311344474554062, 0.10489238798618317, 0.11667133122682571, 0.12845027446746826, 0.1402292251586914, 0.15200817584991455, 0.1637871116399765, 0.17556606233119965, 0.1873449981212616, 0.19912394881248474, 0.21090289950370789, 0.22268183529376984, 0.23446078598499298, 0.24623972177505493, 0.2580186724662781, 0.2697976231575012, 0.28157657384872437, 0.2933555245399475, 0.30513447523117065, 0.3169134259223938, 0.32869237661361694, 0.3404712975025177, 0.35225024819374084, 0.364029198884964, 0.37580814957618713, 0.3875870704650879]}, "gradients/decoder.transformer.h.6.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 2.0, 1.0, 0.0, 2.0, 2.0, 3.0, 4.0, 9.0, 4.0, 5.0, 14.0, 14.0, 9.0, 12.0, 12.0, 17.0, 23.0, 19.0, 24.0, 24.0, 26.0, 37.0, 43.0, 33.0, 34.0, 41.0, 34.0, 39.0, 51.0, 32.0, 42.0, 38.0, 40.0, 45.0, 40.0, 33.0, 28.0, 32.0, 27.0, 17.0, 25.0, 18.0, 9.0, 10.0, 9.0, 5.0, 6.0, 7.0, 5.0, 4.0, 2.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 2.0, 1.0, 0.0, 2.0], "bins": [-81.375, -78.7626953125, -76.150390625, -73.5380859375, -70.92578125, -68.3134765625, -65.701171875, -63.0888671875, -60.4765625, -57.8642578125, -55.251953125, -52.6396484375, -50.02734375, -47.4150390625, -44.802734375, -42.1904296875, -39.578125, -36.9658203125, -34.353515625, -31.7412109375, -29.12890625, -26.5166015625, -23.904296875, -21.2919921875, -18.6796875, -16.0673828125, -13.455078125, -10.8427734375, -8.23046875, -5.6181640625, -3.005859375, -0.3935546875, 2.21875, 4.8310546875, 7.443359375, 10.0556640625, 12.66796875, 15.2802734375, 17.892578125, 20.5048828125, 23.1171875, 25.7294921875, 28.341796875, 30.9541015625, 33.56640625, 36.1787109375, 38.791015625, 41.4033203125, 44.015625, 46.6279296875, 49.240234375, 51.8525390625, 54.46484375, 57.0771484375, 59.689453125, 62.3017578125, 64.9140625, 67.5263671875, 70.138671875, 72.7509765625, 75.36328125, 77.9755859375, 80.587890625, 83.2001953125, 85.8125]}, "gradients/decoder.transformer.h.6.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 3.0, 0.0, 4.0, 3.0, 0.0, 3.0, 4.0, 7.0, 8.0, 16.0, 10.0, 17.0, 24.0, 32.0, 43.0, 48.0, 74.0, 84.0, 146.0, 189.0, 278.0, 442.0, 669.0, 1092.0, 1963.0, 3368.0, 6643.0, 13776.0, 33703.0, 198790.0, 702714.0, 48399.0, 17717.0, 8137.0, 4159.0, 2256.0, 1321.0, 807.0, 512.0, 345.0, 231.0, 131.0, 130.0, 79.0, 46.0, 38.0, 26.0, 17.0, 15.0, 15.0, 13.0, 7.0, 4.0, 2.0, 1.0, 2.0, 2.0, 1.0, 2.0, 2.0, 1.0, 2.0, 2.0], "bins": [-189.0, -182.93359375, -176.8671875, -170.80078125, -164.734375, -158.66796875, -152.6015625, -146.53515625, -140.46875, -134.40234375, -128.3359375, -122.26953125, -116.203125, -110.13671875, -104.0703125, -98.00390625, -91.9375, -85.87109375, -79.8046875, -73.73828125, -67.671875, -61.60546875, -55.5390625, -49.47265625, -43.40625, -37.33984375, -31.2734375, -25.20703125, -19.140625, -13.07421875, -7.0078125, -0.94140625, 5.125, 11.19140625, 17.2578125, 23.32421875, 29.390625, 35.45703125, 41.5234375, 47.58984375, 53.65625, 59.72265625, 65.7890625, 71.85546875, 77.921875, 83.98828125, 90.0546875, 96.12109375, 102.1875, 108.25390625, 114.3203125, 120.38671875, 126.453125, 132.51953125, 138.5859375, 144.65234375, 150.71875, 156.78515625, 162.8515625, 168.91796875, 174.984375, 181.05078125, 187.1171875, 193.18359375, 199.25]}, "gradients/decoder.transformer.h.6.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 1.0, 2.0, 5.0, 5.0, 4.0, 8.0, 7.0, 5.0, 9.0, 10.0, 13.0, 15.0, 16.0, 16.0, 26.0, 31.0, 27.0, 40.0, 46.0, 42.0, 51.0, 77.0, 89.0, 136.0, 1745.0, 125.0, 86.0, 51.0, 51.0, 44.0, 28.0, 41.0, 23.0, 22.0, 28.0, 26.0, 23.0, 15.0, 12.0, 13.0, 8.0, 13.0, 3.0, 3.0, 4.0, 7.0, 4.0, 2.0, 2.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 2.0], "bins": [-216.75, -209.09765625, -201.4453125, -193.79296875, -186.140625, -178.48828125, -170.8359375, -163.18359375, -155.53125, -147.87890625, -140.2265625, -132.57421875, -124.921875, -117.26953125, -109.6171875, -101.96484375, -94.3125, -86.66015625, -79.0078125, -71.35546875, -63.703125, -56.05078125, -48.3984375, -40.74609375, -33.09375, -25.44140625, -17.7890625, -10.13671875, -2.484375, 5.16796875, 12.8203125, 20.47265625, 28.125, 35.77734375, 43.4296875, 51.08203125, 58.734375, 66.38671875, 74.0390625, 81.69140625, 89.34375, 96.99609375, 104.6484375, 112.30078125, 119.953125, 127.60546875, 135.2578125, 142.91015625, 150.5625, 158.21484375, 165.8671875, 173.51953125, 181.171875, 188.82421875, 196.4765625, 204.12890625, 211.78125, 219.43359375, 227.0859375, 234.73828125, 242.390625, 250.04296875, 257.6953125, 265.34765625, 273.0]}, "gradients/decoder.transformer.h.6.attn.c_attn.weight": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 5.0, 5.0, 5.0, 3.0, 7.0, 12.0, 11.0, 10.0, 20.0, 15.0, 26.0, 39.0, 33.0, 42.0, 42.0, 76.0, 113.0, 243.0, 578.0, 1951.0, 10522.0, 144785.0, 2945198.0, 35287.0, 4638.0, 1143.0, 378.0, 154.0, 98.0, 58.0, 31.0, 37.0, 33.0, 15.0, 22.0, 18.0, 13.0, 11.0, 10.0, 8.0, 2.0, 6.0, 5.0, 2.0, 4.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0], "bins": [-690.0, -669.8671875, -649.734375, -629.6015625, -609.46875, -589.3359375, -569.203125, -549.0703125, -528.9375, -508.8046875, -488.671875, -468.5390625, -448.40625, -428.2734375, -408.140625, -388.0078125, -367.875, -347.7421875, -327.609375, -307.4765625, -287.34375, -267.2109375, -247.078125, -226.9453125, -206.8125, -186.6796875, -166.546875, -146.4140625, -126.28125, -106.1484375, -86.015625, -65.8828125, -45.75, -25.6171875, -5.484375, 14.6484375, 34.78125, 54.9140625, 75.046875, 95.1796875, 115.3125, 135.4453125, 155.578125, 175.7109375, 195.84375, 215.9765625, 236.109375, 256.2421875, 276.375, 296.5078125, 316.640625, 336.7734375, 356.90625, 377.0390625, 397.171875, 417.3046875, 437.4375, 457.5703125, 477.703125, 497.8359375, 517.96875, 538.1015625, 558.234375, 578.3671875, 598.5]}, "gradients/decoder.transformer.h.6.ln_1.weight": {"_type": "histogram", "values": [4.0, 8.0, 168.0, 778.0, 61.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-443.7018737792969, -311.68145751953125, -179.6610107421875, -47.640594482421875, 84.37985229492188, 216.40029907226562, 348.4206848144531, 480.4411315917969, 612.4615478515625, 744.4819946289062, 876.50244140625, 1008.5228271484375, 1140.543212890625, 1272.563720703125, 1404.5841064453125, 1536.6044921875, 1668.625, 1800.6453857421875, 1932.6658935546875, 2064.686279296875, 2196.706787109375, 2328.72705078125, 2460.74755859375, 2592.76806640625, 2724.78857421875, 2856.80908203125, 2988.829345703125, 3120.849853515625, 3252.870361328125, 3384.890625, 3516.9111328125, 3648.931640625, 3780.95166015625, 3912.97216796875, 4044.992431640625, 4177.0126953125, 4309.033203125, 4441.0537109375, 4573.07421875, 4705.0947265625, 4837.115234375, 4969.1357421875, 5101.15625, 5233.1767578125, 5365.19677734375, 5497.21728515625, 5629.23779296875, 5761.25830078125, 5893.2783203125, 6025.298828125, 6157.3193359375, 6289.33984375, 6421.35986328125, 6553.38037109375, 6685.40087890625, 6817.42138671875, 6949.44189453125, 7081.46240234375, 7213.48291015625, 7345.5029296875, 7477.5234375, 7609.5439453125, 7741.564453125, 7873.5849609375, 8005.60546875]}, "gradients/decoder.transformer.h.6.ln_1.bias": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 0.0, 0.0, 2.0, 1.0, 3.0, 0.0, 0.0, 4.0, 4.0, 7.0, 3.0, 1.0, 7.0, 11.0, 19.0, 14.0, 22.0, 26.0, 29.0, 29.0, 36.0, 36.0, 42.0, 42.0, 57.0, 50.0, 58.0, 42.0, 44.0, 40.0, 38.0, 43.0, 39.0, 40.0, 43.0, 27.0, 21.0, 21.0, 22.0, 23.0, 16.0, 17.0, 9.0, 6.0, 7.0, 6.0, 6.0, 2.0, 0.0, 3.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-713.80419921875, -690.6886596679688, -667.5730590820312, -644.45751953125, -621.3419799804688, -598.2264404296875, -575.11083984375, -551.9953002929688, -528.8797607421875, -505.7641906738281, -482.6486511230469, -459.5330810546875, -436.41754150390625, -413.3019714355469, -390.1864013671875, -367.07086181640625, -343.95526123046875, -320.8396911621094, -297.7241516113281, -274.60858154296875, -251.49302673339844, -228.37747192382812, -205.26190185546875, -182.14634704589844, -159.03079223632812, -135.9152374267578, -112.79967498779297, -89.68411254882812, -66.56855773925781, -43.4530029296875, -20.337432861328125, 2.7781219482421875, 25.8936767578125, 49.00923538208008, 72.12479400634766, 95.2403564453125, 118.35591125488281, 141.47146606445312, 164.5870361328125, 187.7025909423828, 210.81814575195312, 233.93370056152344, 257.04925537109375, 280.1648254394531, 303.2803955078125, 326.39593505859375, 349.5115051269531, 372.6270751953125, 395.74261474609375, 418.8581848144531, 441.9737243652344, 465.08929443359375, 488.204833984375, 511.3204040527344, 534.4359741210938, 557.551513671875, 580.6671142578125, 603.7826538085938, 626.8982543945312, 650.0137939453125, 673.1293334960938, 696.244873046875, 719.3604736328125, 742.4760131835938, 765.591552734375]}, "gradients/decoder.transformer.h.5.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 1.0, 2.0, 2.0, 2.0, 3.0, 5.0, 3.0, 9.0, 6.0, 6.0, 13.0, 11.0, 11.0, 14.0, 20.0, 25.0, 15.0, 31.0, 29.0, 31.0, 31.0, 47.0, 34.0, 32.0, 30.0, 48.0, 47.0, 37.0, 46.0, 37.0, 38.0, 38.0, 46.0, 29.0, 39.0, 32.0, 34.0, 16.0, 23.0, 8.0, 17.0, 13.0, 7.0, 9.0, 8.0, 5.0, 7.0, 11.0, 4.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0, 1.0, 3.0], "bins": [-91.125, -88.35546875, -85.5859375, -82.81640625, -80.046875, -77.27734375, -74.5078125, -71.73828125, -68.96875, -66.19921875, -63.4296875, -60.66015625, -57.890625, -55.12109375, -52.3515625, -49.58203125, -46.8125, -44.04296875, -41.2734375, -38.50390625, -35.734375, -32.96484375, -30.1953125, -27.42578125, -24.65625, -21.88671875, -19.1171875, -16.34765625, -13.578125, -10.80859375, -8.0390625, -5.26953125, -2.5, 0.26953125, 3.0390625, 5.80859375, 8.578125, 11.34765625, 14.1171875, 16.88671875, 19.65625, 22.42578125, 25.1953125, 27.96484375, 30.734375, 33.50390625, 36.2734375, 39.04296875, 41.8125, 44.58203125, 47.3515625, 50.12109375, 52.890625, 55.66015625, 58.4296875, 61.19921875, 63.96875, 66.73828125, 69.5078125, 72.27734375, 75.046875, 77.81640625, 80.5859375, 83.35546875, 86.125]}, "gradients/decoder.transformer.h.5.mlp.c_proj.weight": {"_type": "histogram", "values": [2.0, 1.0, 2.0, 4.0, 6.0, 6.0, 8.0, 4.0, 16.0, 22.0, 24.0, 47.0, 55.0, 101.0, 110.0, 197.0, 297.0, 448.0, 637.0, 954.0, 1445.0, 2159.0, 3442.0, 5409.0, 9331.0, 16546.0, 34528.0, 152721.0, 1854082.0, 1878758.0, 155919.0, 35112.0, 16881.0, 9432.0, 5436.0, 3466.0, 2259.0, 1464.0, 935.0, 638.0, 419.0, 315.0, 216.0, 139.0, 75.0, 64.0, 46.0, 29.0, 34.0, 19.0, 14.0, 6.0, 5.0, 5.0, 2.0, 4.0, 1.0, 1.0, 0.0, 1.0, 2.0, 2.0, 0.0, 1.0], "bins": [-145.75, -140.724609375, -135.69921875, -130.673828125, -125.6484375, -120.623046875, -115.59765625, -110.572265625, -105.546875, -100.521484375, -95.49609375, -90.470703125, -85.4453125, -80.419921875, -75.39453125, -70.369140625, -65.34375, -60.318359375, -55.29296875, -50.267578125, -45.2421875, -40.216796875, -35.19140625, -30.166015625, -25.140625, -20.115234375, -15.08984375, -10.064453125, -5.0390625, -0.013671875, 5.01171875, 10.037109375, 15.0625, 20.087890625, 25.11328125, 30.138671875, 35.1640625, 40.189453125, 45.21484375, 50.240234375, 55.265625, 60.291015625, 65.31640625, 70.341796875, 75.3671875, 80.392578125, 85.41796875, 90.443359375, 95.46875, 100.494140625, 105.51953125, 110.544921875, 115.5703125, 120.595703125, 125.62109375, 130.646484375, 135.671875, 140.697265625, 145.72265625, 150.748046875, 155.7734375, 160.798828125, 165.82421875, 170.849609375, 175.875]}, "gradients/decoder.transformer.h.5.mlp.c_fc.bias": {"_type": "histogram", "values": [3.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0, 1.0, 1.0, 3.0, 9.0, 10.0, 5.0, 14.0, 26.0, 28.0, 44.0, 63.0, 83.0, 185.0, 552.0, 1380.0, 945.0, 319.0, 159.0, 79.0, 56.0, 34.0, 26.0, 13.0, 10.0, 9.0, 9.0, 4.0, 7.0, 3.0, 3.0, 1.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-202.25, -193.73828125, -185.2265625, -176.71484375, -168.203125, -159.69140625, -151.1796875, -142.66796875, -134.15625, -125.64453125, -117.1328125, -108.62109375, -100.109375, -91.59765625, -83.0859375, -74.57421875, -66.0625, -57.55078125, -49.0390625, -40.52734375, -32.015625, -23.50390625, -14.9921875, -6.48046875, 2.03125, 10.54296875, 19.0546875, 27.56640625, 36.078125, 44.58984375, 53.1015625, 61.61328125, 70.125, 78.63671875, 87.1484375, 95.66015625, 104.171875, 112.68359375, 121.1953125, 129.70703125, 138.21875, 146.73046875, 155.2421875, 163.75390625, 172.265625, 180.77734375, 189.2890625, 197.80078125, 206.3125, 214.82421875, 223.3359375, 231.84765625, 240.359375, 248.87109375, 257.3828125, 265.89453125, 274.40625, 282.91796875, 291.4296875, 299.94140625, 308.453125, 316.96484375, 325.4765625, 333.98828125, 342.5]}, "gradients/decoder.transformer.h.5.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 2.0, 2.0, 3.0, 5.0, 4.0, 10.0, 11.0, 19.0, 24.0, 35.0, 55.0, 91.0, 218.0, 417.0, 1060.0, 3720.0, 19249.0, 219922.0, 3872120.0, 64439.0, 9298.0, 2192.0, 730.0, 331.0, 135.0, 69.0, 45.0, 26.0, 25.0, 13.0, 8.0, 4.0, 2.0, 4.0, 1.0, 2.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 3.0], "bins": [-851.5, -829.828125, -808.15625, -786.484375, -764.8125, -743.140625, -721.46875, -699.796875, -678.125, -656.453125, -634.78125, -613.109375, -591.4375, -569.765625, -548.09375, -526.421875, -504.75, -483.078125, -461.40625, -439.734375, -418.0625, -396.390625, -374.71875, -353.046875, -331.375, -309.703125, -288.03125, -266.359375, -244.6875, -223.015625, -201.34375, -179.671875, -158.0, -136.328125, -114.65625, -92.984375, -71.3125, -49.640625, -27.96875, -6.296875, 15.375, 37.046875, 58.71875, 80.390625, 102.0625, 123.734375, 145.40625, 167.078125, 188.75, 210.421875, 232.09375, 253.765625, 275.4375, 297.109375, 318.78125, 340.453125, 362.125, 383.796875, 405.46875, 427.140625, 448.8125, 470.484375, 492.15625, 513.828125, 535.5]}, "gradients/decoder.transformer.h.5.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 5.0, 36.0, 191.0, 584.0, 153.0, 39.0, 7.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3717.37109375, -3615.325927734375, -3513.280517578125, -3411.2353515625, -3309.190185546875, -3207.14501953125, -3105.099609375, -3003.054443359375, -2901.00927734375, -2798.964111328125, -2696.918701171875, -2594.87353515625, -2492.828369140625, -2390.783203125, -2288.73779296875, -2186.692626953125, -2084.647216796875, -1982.6019287109375, -1880.5567626953125, -1778.511474609375, -1676.46630859375, -1574.4210205078125, -1472.375732421875, -1370.33056640625, -1268.2852783203125, -1166.239990234375, -1064.19482421875, -962.1495361328125, -860.1043090820312, -758.05908203125, -656.0137939453125, -553.9685668945312, -451.923095703125, -349.87786865234375, -247.83261108398438, -145.787353515625, -43.74212646484375, 58.3031005859375, 160.348388671875, 262.39361572265625, 364.4388427734375, 466.48406982421875, 568.529296875, 670.5745849609375, 772.6198120117188, 874.6650390625, 976.7103271484375, 1078.755615234375, 1180.80078125, 1282.8460693359375, 1384.8912353515625, 1486.9365234375, 1588.981689453125, 1691.0269775390625, 1793.072265625, 1895.117431640625, 1997.1627197265625, 2099.2080078125, 2201.253173828125, 2303.29833984375, 2405.34375, 2507.388916015625, 2609.43408203125, 2711.4794921875, 2813.524658203125]}, "gradients/decoder.transformer.h.5.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 2.0, 2.0, 7.0, 5.0, 8.0, 8.0, 16.0, 21.0, 19.0, 24.0, 24.0, 32.0, 30.0, 45.0, 41.0, 38.0, 53.0, 55.0, 59.0, 52.0, 45.0, 50.0, 47.0, 44.0, 49.0, 45.0, 42.0, 26.0, 23.0, 25.0, 14.0, 12.0, 15.0, 10.0, 6.0, 5.0, 5.0, 2.0, 3.0, 2.0, 4.0, 0.0, 0.0, 4.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-652.7791748046875, -631.3751220703125, -609.9710693359375, -588.5670166015625, -567.1629638671875, -545.7589721679688, -524.3549194335938, -502.95086669921875, -481.54681396484375, -460.14276123046875, -438.73870849609375, -417.3346862792969, -395.9306335449219, -374.5265808105469, -353.12255859375, -331.718505859375, -310.314453125, -288.910400390625, -267.50634765625, -246.10232543945312, -224.69827270507812, -203.29421997070312, -181.8901824951172, -160.48614501953125, -139.08209228515625, -117.67804718017578, -96.27400207519531, -74.86995697021484, -53.465911865234375, -32.061866760253906, -10.657821655273438, 10.7462158203125, 32.15032958984375, 53.55437469482422, 74.95841979980469, 96.36246490478516, 117.76651000976562, 139.17056274414062, 160.57460021972656, 181.9786376953125, 203.3826904296875, 224.7867431640625, 246.19078063964844, 267.5948181152344, 288.9988708496094, 310.4029235839844, 331.80694580078125, 353.21099853515625, 374.61505126953125, 396.01910400390625, 417.42315673828125, 438.8271789550781, 460.2312316894531, 481.6352844238281, 503.039306640625, 524.443359375, 545.847412109375, 567.25146484375, 588.655517578125, 610.0595703125, 631.463623046875, 652.8676147460938, 674.2716674804688, 695.6757202148438, 717.0797729492188]}, "gradients/decoder.transformer.h.5.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 2.0, 0.0, 1.0, 5.0, 5.0, 1.0, 1.0, 4.0, 8.0, 5.0, 16.0, 11.0, 8.0, 13.0, 20.0, 23.0, 14.0, 23.0, 24.0, 39.0, 33.0, 21.0, 39.0, 37.0, 30.0, 33.0, 46.0, 41.0, 52.0, 44.0, 49.0, 36.0, 41.0, 45.0, 31.0, 30.0, 27.0, 18.0, 28.0, 23.0, 17.0, 14.0, 12.0, 12.0, 4.0, 6.0, 5.0, 6.0, 3.0, 2.0, 1.0, 0.0, 3.0, 1.0, 2.0, 1.0, 3.0], "bins": [-93.1875, -90.4892578125, -87.791015625, -85.0927734375, -82.39453125, -79.6962890625, -76.998046875, -74.2998046875, -71.6015625, -68.9033203125, -66.205078125, -63.5068359375, -60.80859375, -58.1103515625, -55.412109375, -52.7138671875, -50.015625, -47.3173828125, -44.619140625, -41.9208984375, -39.22265625, -36.5244140625, -33.826171875, -31.1279296875, -28.4296875, -25.7314453125, -23.033203125, -20.3349609375, -17.63671875, -14.9384765625, -12.240234375, -9.5419921875, -6.84375, -4.1455078125, -1.447265625, 1.2509765625, 3.94921875, 6.6474609375, 9.345703125, 12.0439453125, 14.7421875, 17.4404296875, 20.138671875, 22.8369140625, 25.53515625, 28.2333984375, 30.931640625, 33.6298828125, 36.328125, 39.0263671875, 41.724609375, 44.4228515625, 47.12109375, 49.8193359375, 52.517578125, 55.2158203125, 57.9140625, 60.6123046875, 63.310546875, 66.0087890625, 68.70703125, 71.4052734375, 74.103515625, 76.8017578125, 79.5]}, "gradients/decoder.transformer.h.5.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 4.0, 4.0, 3.0, 2.0, 9.0, 17.0, 30.0, 38.0, 56.0, 94.0, 154.0, 258.0, 409.0, 604.0, 957.0, 1535.0, 2694.0, 4260.0, 7159.0, 12854.0, 22292.0, 41544.0, 78460.0, 165355.0, 348010.0, 178051.0, 83767.0, 43584.0, 23767.0, 13340.0, 7654.0, 4519.0, 2767.0, 1594.0, 1030.0, 637.0, 351.0, 269.0, 159.0, 106.0, 66.0, 37.0, 24.0, 20.0, 10.0, 7.0, 5.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-8.8203125, -8.557373046875, -8.29443359375, -8.031494140625, -7.7685546875, -7.505615234375, -7.24267578125, -6.979736328125, -6.716796875, -6.453857421875, -6.19091796875, -5.927978515625, -5.6650390625, -5.402099609375, -5.13916015625, -4.876220703125, -4.61328125, -4.350341796875, -4.08740234375, -3.824462890625, -3.5615234375, -3.298583984375, -3.03564453125, -2.772705078125, -2.509765625, -2.246826171875, -1.98388671875, -1.720947265625, -1.4580078125, -1.195068359375, -0.93212890625, -0.669189453125, -0.40625, -0.143310546875, 0.11962890625, 0.382568359375, 0.6455078125, 0.908447265625, 1.17138671875, 1.434326171875, 1.697265625, 1.960205078125, 2.22314453125, 2.486083984375, 2.7490234375, 3.011962890625, 3.27490234375, 3.537841796875, 3.80078125, 4.063720703125, 4.32666015625, 4.589599609375, 4.8525390625, 5.115478515625, 5.37841796875, 5.641357421875, 5.904296875, 6.167236328125, 6.43017578125, 6.693115234375, 6.9560546875, 7.218994140625, 7.48193359375, 7.744873046875, 8.0078125]}, "gradients/decoder.transformer.h.5.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 4.0, 5.0, 6.0, 1.0, 7.0, 5.0, 8.0, 6.0, 12.0, 14.0, 14.0, 15.0, 16.0, 24.0, 22.0, 20.0, 26.0, 28.0, 37.0, 51.0, 46.0, 51.0, 53.0, 1054.0, 41.0, 39.0, 40.0, 28.0, 35.0, 45.0, 36.0, 36.0, 28.0, 31.0, 27.0, 22.0, 11.0, 16.0, 12.0, 21.0, 8.0, 7.0, 10.0, 4.0, 5.0, 5.0, 2.0, 2.0, 2.0, 1.0, 1.0, 2.0, 0.0, 1.0], "bins": [-62.375, -60.53515625, -58.6953125, -56.85546875, -55.015625, -53.17578125, -51.3359375, -49.49609375, -47.65625, -45.81640625, -43.9765625, -42.13671875, -40.296875, -38.45703125, -36.6171875, -34.77734375, -32.9375, -31.09765625, -29.2578125, -27.41796875, -25.578125, -23.73828125, -21.8984375, -20.05859375, -18.21875, -16.37890625, -14.5390625, -12.69921875, -10.859375, -9.01953125, -7.1796875, -5.33984375, -3.5, -1.66015625, 0.1796875, 2.01953125, 3.859375, 5.69921875, 7.5390625, 9.37890625, 11.21875, 13.05859375, 14.8984375, 16.73828125, 18.578125, 20.41796875, 22.2578125, 24.09765625, 25.9375, 27.77734375, 29.6171875, 31.45703125, 33.296875, 35.13671875, 36.9765625, 38.81640625, 40.65625, 42.49609375, 44.3359375, 46.17578125, 48.015625, 49.85546875, 51.6953125, 53.53515625, 55.375]}, "gradients/decoder.transformer.h.5.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 1.0, 1.0, 3.0, 4.0, 3.0, 6.0, 12.0, 13.0, 29.0, 45.0, 59.0, 71.0, 99.0, 144.0, 262.0, 382.0, 548.0, 826.0, 1263.0, 1870.0, 2833.0, 4537.0, 7079.0, 11156.0, 18440.0, 30898.0, 53315.0, 95519.0, 185217.0, 1323244.0, 157420.0, 83833.0, 47029.0, 26980.0, 16132.0, 9913.0, 6304.0, 3997.0, 2582.0, 1666.0, 1102.0, 789.0, 480.0, 305.0, 215.0, 157.0, 130.0, 72.0, 53.0, 41.0, 20.0, 16.0, 12.0, 5.0, 4.0, 5.0, 2.0, 1.0, 1.0, 0.0, 3.0], "bins": [-4.4296875, -4.29290771484375, -4.1561279296875, -4.01934814453125, -3.882568359375, -3.74578857421875, -3.6090087890625, -3.47222900390625, -3.33544921875, -3.19866943359375, -3.0618896484375, -2.92510986328125, -2.788330078125, -2.65155029296875, -2.5147705078125, -2.37799072265625, -2.2412109375, -2.10443115234375, -1.9676513671875, -1.83087158203125, -1.694091796875, -1.55731201171875, -1.4205322265625, -1.28375244140625, -1.14697265625, -1.01019287109375, -0.8734130859375, -0.73663330078125, -0.599853515625, -0.46307373046875, -0.3262939453125, -0.18951416015625, -0.052734375, 0.08404541015625, 0.2208251953125, 0.35760498046875, 0.494384765625, 0.63116455078125, 0.7679443359375, 0.90472412109375, 1.04150390625, 1.17828369140625, 1.3150634765625, 1.45184326171875, 1.588623046875, 1.72540283203125, 1.8621826171875, 1.99896240234375, 2.1357421875, 2.27252197265625, 2.4093017578125, 2.54608154296875, 2.682861328125, 2.81964111328125, 2.9564208984375, 3.09320068359375, 3.22998046875, 3.36676025390625, 3.5035400390625, 3.64031982421875, 3.777099609375, 3.91387939453125, 4.0506591796875, 4.18743896484375, 4.32421875]}, "gradients/decoder.transformer.h.5.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 3.0, 1.0, 1.0, 3.0, 6.0, 3.0, 1.0, 3.0, 7.0, 10.0, 6.0, 17.0, 17.0, 16.0, 22.0, 41.0, 45.0, 62.0, 88.0, 121.0, 129.0, 89.0, 72.0, 54.0, 34.0, 25.0, 20.0, 24.0, 16.0, 14.0, 8.0, 10.0, 10.0, 14.0, 2.0, 7.0, 4.0, 0.0, 2.0, 0.0, 5.0, 3.0, 1.0, 1.0, 1.0, 2.0], "bins": [-0.272705078125, -0.2655525207519531, -0.25839996337890625, -0.2512474060058594, -0.2440948486328125, -0.23694229125976562, -0.22978973388671875, -0.22263717651367188, -0.215484619140625, -0.20833206176757812, -0.20117950439453125, -0.19402694702148438, -0.1868743896484375, -0.17972183227539062, -0.17256927490234375, -0.16541671752929688, -0.15826416015625, -0.15111160278320312, -0.14395904541015625, -0.13680648803710938, -0.1296539306640625, -0.12250137329101562, -0.11534881591796875, -0.10819625854492188, -0.101043701171875, -0.09389114379882812, -0.08673858642578125, -0.07958602905273438, -0.0724334716796875, -0.06528091430664062, -0.05812835693359375, -0.050975799560546875, -0.0438232421875, -0.036670684814453125, -0.02951812744140625, -0.022365570068359375, -0.0152130126953125, -0.008060455322265625, -0.00090789794921875, 0.006244659423828125, 0.013397216796875, 0.020549774169921875, 0.02770233154296875, 0.034854888916015625, 0.0420074462890625, 0.049160003662109375, 0.05631256103515625, 0.06346511840820312, 0.07061767578125, 0.07777023315429688, 0.08492279052734375, 0.09207534790039062, 0.0992279052734375, 0.10638046264648438, 0.11353302001953125, 0.12068557739257812, 0.127838134765625, 0.13499069213867188, 0.14214324951171875, 0.14929580688476562, 0.1564483642578125, 0.16360092163085938, 0.17075347900390625, 0.17790603637695312, 0.18505859375]}, "gradients/decoder.transformer.h.5.crossattention.q_attn.weight": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 0.0, 3.0, 4.0, 2.0, 2.0, 0.0, 4.0, 7.0, 5.0, 11.0, 9.0, 9.0, 12.0, 9.0, 22.0, 21.0, 23.0, 33.0, 43.0, 96.0, 124.0, 238.0, 1046916.0, 535.0, 145.0, 82.0, 60.0, 39.0, 24.0, 12.0, 19.0, 19.0, 2.0, 10.0, 8.0, 3.0, 0.0, 5.0, 5.0, 2.0, 2.0, 1.0, 0.0, 4.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.6640625, -5.4439697265625, -5.223876953125, -5.0037841796875, -4.78369140625, -4.5635986328125, -4.343505859375, -4.1234130859375, -3.9033203125, -3.6832275390625, -3.463134765625, -3.2430419921875, -3.02294921875, -2.8028564453125, -2.582763671875, -2.3626708984375, -2.142578125, -1.9224853515625, -1.702392578125, -1.4822998046875, -1.26220703125, -1.0421142578125, -0.822021484375, -0.6019287109375, -0.3818359375, -0.1617431640625, 0.058349609375, 0.2784423828125, 0.49853515625, 0.7186279296875, 0.938720703125, 1.1588134765625, 1.37890625, 1.5989990234375, 1.819091796875, 2.0391845703125, 2.25927734375, 2.4793701171875, 2.699462890625, 2.9195556640625, 3.1396484375, 3.3597412109375, 3.579833984375, 3.7999267578125, 4.02001953125, 4.2401123046875, 4.460205078125, 4.6802978515625, 4.900390625, 5.1204833984375, 5.340576171875, 5.5606689453125, 5.78076171875, 6.0008544921875, 6.220947265625, 6.4410400390625, 6.6611328125, 6.8812255859375, 7.101318359375, 7.3214111328125, 7.54150390625, 7.7615966796875, 7.981689453125, 8.2017822265625, 8.421875]}, "gradients/decoder.transformer.h.5.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 77.0, 912.0, 32.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.4252603054046631, -0.40330490469932556, -0.38134950399398804, -0.3593940734863281, -0.337438702583313, -0.3154832720756531, -0.29352787137031555, -0.271572470664978, -0.2496170699596405, -0.22766166925430298, -0.20570626854896545, -0.18375085294246674, -0.1617954522371292, -0.1398400515317917, -0.11788463592529297, -0.09592923521995544, -0.07397383451461792, -0.0520184300839901, -0.030063025653362274, -0.008107617497444153, 0.013847783207893372, 0.035803183913230896, 0.057758599519729614, 0.07971400022506714, 0.10166940093040466, 0.12362480163574219, 0.1455802023410797, 0.16753561794757843, 0.18949101865291595, 0.21144641935825348, 0.2334018349647522, 0.2553572356700897, 0.27731257677078247, 0.29926797747612, 0.3212233781814575, 0.34317880868911743, 0.36513417959213257, 0.3870896100997925, 0.40904501080513, 0.43100041151046753, 0.45295581221580505, 0.4749112129211426, 0.4968666136264801, 0.5188220143318176, 0.5407774448394775, 0.5627328157424927, 0.5846882462501526, 0.6066436767578125, 0.6285990476608276, 0.6505544781684875, 0.6725098490715027, 0.6944652795791626, 0.7164206504821777, 0.7383760809898376, 0.7603315114974976, 0.7822868824005127, 0.8042422533035278, 0.8261976838111877, 0.8481530547142029, 0.8701084852218628, 0.8920638561248779, 0.9140192866325378, 0.9359747171401978, 0.9579300880432129, 0.9798855185508728]}, "gradients/decoder.transformer.h.5.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 2.0, 1.0, 2.0, 10.0, 8.0, 6.0, 7.0, 6.0, 6.0, 17.0, 23.0, 21.0, 27.0, 21.0, 25.0, 28.0, 37.0, 47.0, 33.0, 52.0, 49.0, 47.0, 50.0, 46.0, 42.0, 52.0, 48.0, 50.0, 27.0, 37.0, 31.0, 28.0, 19.0, 14.0, 22.0, 15.0, 14.0, 12.0, 7.0, 10.0, 3.0, 6.0, 1.0, 3.0, 2.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.17474377155303955, -0.16944855451583862, -0.1641533374786377, -0.15885813534259796, -0.15356291830539703, -0.1482677012681961, -0.14297249913215637, -0.13767728209495544, -0.13238206505775452, -0.1270868480205536, -0.12179163843393326, -0.11649642884731293, -0.111201211810112, -0.10590599477291107, -0.10061078518629074, -0.09531557559967041, -0.09002035856246948, -0.08472514152526855, -0.07942993193864822, -0.0741347223520279, -0.06883950531482697, -0.06354428827762604, -0.05824907869100571, -0.05295386537909508, -0.04765865206718445, -0.04236343875527382, -0.03706822544336319, -0.03177301213145256, -0.02647779881954193, -0.021182585507631302, -0.015887372195720673, -0.010592158883810043, -0.005296945571899414, -1.73225998878479e-06, 0.0052934810519218445, 0.010588694363832474, 0.015883907675743103, 0.021179120987653732, 0.02647433429956436, 0.03176954761147499, 0.03706476092338562, 0.04235997423529625, 0.04765518754720688, 0.05295040085911751, 0.05824561417102814, 0.06354083120822906, 0.0688360407948494, 0.07413125038146973, 0.07942646741867065, 0.08472168445587158, 0.09001689404249191, 0.09531210362911224, 0.10060732066631317, 0.1059025377035141, 0.11119774729013443, 0.11649295687675476, 0.12178817391395569, 0.12708339095115662, 0.13237860798835754, 0.13767381012439728, 0.1429690271615982, 0.14826424419879913, 0.15355944633483887, 0.1588546633720398, 0.16414988040924072]}, "gradients/decoder.transformer.h.5.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 2.0, 0.0, 1.0, 5.0, 5.0, 1.0, 1.0, 4.0, 8.0, 5.0, 16.0, 11.0, 8.0, 13.0, 20.0, 23.0, 14.0, 23.0, 24.0, 39.0, 33.0, 21.0, 39.0, 37.0, 30.0, 33.0, 46.0, 41.0, 52.0, 44.0, 49.0, 36.0, 41.0, 45.0, 31.0, 30.0, 27.0, 18.0, 28.0, 23.0, 17.0, 14.0, 12.0, 12.0, 4.0, 6.0, 5.0, 6.0, 3.0, 2.0, 1.0, 0.0, 3.0, 1.0, 2.0, 1.0, 3.0], "bins": [-93.1875, -90.4892578125, -87.791015625, -85.0927734375, -82.39453125, -79.6962890625, -76.998046875, -74.2998046875, -71.6015625, -68.9033203125, -66.205078125, -63.5068359375, -60.80859375, -58.1103515625, -55.412109375, -52.7138671875, -50.015625, -47.3173828125, -44.619140625, -41.9208984375, -39.22265625, -36.5244140625, -33.826171875, -31.1279296875, -28.4296875, -25.7314453125, -23.033203125, -20.3349609375, -17.63671875, -14.9384765625, -12.240234375, -9.5419921875, -6.84375, -4.1455078125, -1.447265625, 1.2509765625, 3.94921875, 6.6474609375, 9.345703125, 12.0439453125, 14.7421875, 17.4404296875, 20.138671875, 22.8369140625, 25.53515625, 28.2333984375, 30.931640625, 33.6298828125, 36.328125, 39.0263671875, 41.724609375, 44.4228515625, 47.12109375, 49.8193359375, 52.517578125, 55.2158203125, 57.9140625, 60.6123046875, 63.310546875, 66.0087890625, 68.70703125, 71.4052734375, 74.103515625, 76.8017578125, 79.5]}, "gradients/decoder.transformer.h.5.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 3.0, 5.0, 4.0, 3.0, 3.0, 4.0, 5.0, 12.0, 21.0, 26.0, 30.0, 48.0, 78.0, 97.0, 170.0, 244.0, 353.0, 646.0, 876.0, 1414.0, 2242.0, 3693.0, 6348.0, 12036.0, 25753.0, 73105.0, 560481.0, 266587.0, 50310.0, 20151.0, 9832.0, 5404.0, 3096.0, 1951.0, 1284.0, 788.0, 514.0, 296.0, 203.0, 142.0, 116.0, 61.0, 33.0, 24.0, 20.0, 21.0, 10.0, 10.0, 4.0, 5.0, 3.0, 2.0, 1.0, 0.0, 3.0, 1.0, 0.0, 0.0, 1.0], "bins": [-155.375, -150.49609375, -145.6171875, -140.73828125, -135.859375, -130.98046875, -126.1015625, -121.22265625, -116.34375, -111.46484375, -106.5859375, -101.70703125, -96.828125, -91.94921875, -87.0703125, -82.19140625, -77.3125, -72.43359375, -67.5546875, -62.67578125, -57.796875, -52.91796875, -48.0390625, -43.16015625, -38.28125, -33.40234375, -28.5234375, -23.64453125, -18.765625, -13.88671875, -9.0078125, -4.12890625, 0.75, 5.62890625, 10.5078125, 15.38671875, 20.265625, 25.14453125, 30.0234375, 34.90234375, 39.78125, 44.66015625, 49.5390625, 54.41796875, 59.296875, 64.17578125, 69.0546875, 73.93359375, 78.8125, 83.69140625, 88.5703125, 93.44921875, 98.328125, 103.20703125, 108.0859375, 112.96484375, 117.84375, 122.72265625, 127.6015625, 132.48046875, 137.359375, 142.23828125, 147.1171875, 151.99609375, 156.875]}, "gradients/decoder.transformer.h.5.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 2.0, 3.0, 1.0, 2.0, 2.0, 0.0, 4.0, 2.0, 8.0, 9.0, 8.0, 19.0, 17.0, 23.0, 21.0, 44.0, 23.0, 30.0, 47.0, 43.0, 59.0, 69.0, 99.0, 137.0, 1586.0, 254.0, 107.0, 69.0, 63.0, 55.0, 37.0, 29.0, 27.0, 27.0, 22.0, 29.0, 11.0, 14.0, 17.0, 5.0, 14.0, 7.0, 6.0, 5.0, 3.0, 0.0, 1.0, 3.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0], "bins": [-261.5, -253.2734375, -245.046875, -236.8203125, -228.59375, -220.3671875, -212.140625, -203.9140625, -195.6875, -187.4609375, -179.234375, -171.0078125, -162.78125, -154.5546875, -146.328125, -138.1015625, -129.875, -121.6484375, -113.421875, -105.1953125, -96.96875, -88.7421875, -80.515625, -72.2890625, -64.0625, -55.8359375, -47.609375, -39.3828125, -31.15625, -22.9296875, -14.703125, -6.4765625, 1.75, 9.9765625, 18.203125, 26.4296875, 34.65625, 42.8828125, 51.109375, 59.3359375, 67.5625, 75.7890625, 84.015625, 92.2421875, 100.46875, 108.6953125, 116.921875, 125.1484375, 133.375, 141.6015625, 149.828125, 158.0546875, 166.28125, 174.5078125, 182.734375, 190.9609375, 199.1875, 207.4140625, 215.640625, 223.8671875, 232.09375, 240.3203125, 248.546875, 256.7734375, 265.0]}, "gradients/decoder.transformer.h.5.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 3.0, 4.0, 5.0, 1.0, 4.0, 5.0, 9.0, 17.0, 26.0, 22.0, 52.0, 63.0, 103.0, 158.0, 309.0, 796.0, 2620.0, 12433.0, 138489.0, 2909835.0, 69081.0, 8333.0, 1914.0, 693.0, 305.0, 144.0, 100.0, 58.0, 40.0, 24.0, 11.0, 11.0, 12.0, 8.0, 4.0, 13.0, 4.0, 4.0, 0.0, 1.0, 0.0, 0.0, 0.0, 3.0, 1.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-396.0, -382.96875, -369.9375, -356.90625, -343.875, -330.84375, -317.8125, -304.78125, -291.75, -278.71875, -265.6875, -252.65625, -239.625, -226.59375, -213.5625, -200.53125, -187.5, -174.46875, -161.4375, -148.40625, -135.375, -122.34375, -109.3125, -96.28125, -83.25, -70.21875, -57.1875, -44.15625, -31.125, -18.09375, -5.0625, 7.96875, 21.0, 34.03125, 47.0625, 60.09375, 73.125, 86.15625, 99.1875, 112.21875, 125.25, 138.28125, 151.3125, 164.34375, 177.375, 190.40625, 203.4375, 216.46875, 229.5, 242.53125, 255.5625, 268.59375, 281.625, 294.65625, 307.6875, 320.71875, 333.75, 346.78125, 359.8125, 372.84375, 385.875, 398.90625, 411.9375, 424.96875, 438.0]}, "gradients/decoder.transformer.h.5.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 4.0, 19.0, 61.0, 257.0, 455.0, 166.0, 50.0, 6.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-286.64801025390625, -247.61993408203125, -208.5918731689453, -169.56381225585938, -130.53573608398438, -91.50765991210938, -52.47959899902344, -13.4515380859375, 25.5765380859375, 64.60460662841797, 103.63267517089844, 142.66073608398438, 181.68881225585938, 220.71688842773438, 259.74493408203125, 298.77301025390625, 337.80108642578125, 376.82916259765625, 415.85723876953125, 454.8852844238281, 493.9133605957031, 532.94140625, 571.969482421875, 610.99755859375, 650.025634765625, 689.0537109375, 728.081787109375, 767.10986328125, 806.137939453125, 845.166015625, 884.1940307617188, 923.2221069335938, 962.250244140625, 1001.2783203125, 1040.306396484375, 1079.33447265625, 1118.362548828125, 1157.390625, 1196.418701171875, 1235.44677734375, 1274.474853515625, 1313.5029296875, 1352.531005859375, 1391.55908203125, 1430.587158203125, 1469.615234375, 1508.643310546875, 1547.67138671875, 1586.6993408203125, 1625.7274169921875, 1664.7554931640625, 1703.7835693359375, 1742.8116455078125, 1781.8397216796875, 1820.8677978515625, 1859.895751953125, 1898.923828125, 1937.951904296875, 1976.97998046875, 2016.008056640625, 2055.0361328125, 2094.064208984375, 2133.09228515625, 2172.120361328125, 2211.1484375]}, "gradients/decoder.transformer.h.5.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 5.0, 5.0, 11.0, 8.0, 7.0, 8.0, 14.0, 13.0, 18.0, 24.0, 22.0, 19.0, 23.0, 27.0, 27.0, 43.0, 38.0, 44.0, 38.0, 36.0, 50.0, 48.0, 42.0, 48.0, 53.0, 39.0, 45.0, 32.0, 35.0, 30.0, 36.0, 21.0, 21.0, 12.0, 11.0, 15.0, 11.0, 8.0, 4.0, 10.0, 2.0, 4.0, 1.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 3.0], "bins": [-546.4813842773438, -529.1879272460938, -511.8945007324219, -494.60107421875, -477.3076171875, -460.01416015625, -442.7207336425781, -425.42730712890625, -408.13385009765625, -390.84039306640625, -373.5469665527344, -356.2535400390625, -338.9600830078125, -321.6666259765625, -304.3731994628906, -287.07977294921875, -269.78631591796875, -252.4928741455078, -235.19943237304688, -217.90599060058594, -200.612548828125, -183.31910705566406, -166.02566528320312, -148.7322235107422, -131.43878173828125, -114.14533996582031, -96.85189819335938, -79.55845642089844, -62.2650146484375, -44.97157287597656, -27.678131103515625, -10.384689331054688, 6.90869140625, 24.202133178710938, 41.495574951171875, 58.78901672363281, 76.08245849609375, 93.37590026855469, 110.66934204101562, 127.96278381347656, 145.2562255859375, 162.54966735839844, 179.84310913085938, 197.1365509033203, 214.42999267578125, 231.7234344482422, 249.01687622070312, 266.310302734375, 283.603759765625, 300.897216796875, 318.1906433105469, 335.48406982421875, 352.77752685546875, 370.07098388671875, 387.3644104003906, 404.6578369140625, 421.9512939453125, 439.2447509765625, 456.5381774902344, 473.83160400390625, 491.12506103515625, 508.41851806640625, 525.7119140625, 543.00537109375, 560.298828125]}, "gradients/decoder.transformer.h.4.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 3.0, 0.0, 1.0, 0.0, 5.0, 1.0, 4.0, 5.0, 4.0, 2.0, 7.0, 13.0, 17.0, 10.0, 18.0, 9.0, 14.0, 17.0, 22.0, 26.0, 27.0, 26.0, 37.0, 36.0, 35.0, 32.0, 46.0, 52.0, 31.0, 54.0, 40.0, 42.0, 35.0, 45.0, 34.0, 28.0, 27.0, 35.0, 36.0, 17.0, 17.0, 21.0, 18.0, 14.0, 10.0, 7.0, 9.0, 6.0, 3.0, 5.0, 5.0, 4.0, 0.0, 1.0, 4.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0], "bins": [-87.9375, -85.1826171875, -82.427734375, -79.6728515625, -76.91796875, -74.1630859375, -71.408203125, -68.6533203125, -65.8984375, -63.1435546875, -60.388671875, -57.6337890625, -54.87890625, -52.1240234375, -49.369140625, -46.6142578125, -43.859375, -41.1044921875, -38.349609375, -35.5947265625, -32.83984375, -30.0849609375, -27.330078125, -24.5751953125, -21.8203125, -19.0654296875, -16.310546875, -13.5556640625, -10.80078125, -8.0458984375, -5.291015625, -2.5361328125, 0.21875, 2.9736328125, 5.728515625, 8.4833984375, 11.23828125, 13.9931640625, 16.748046875, 19.5029296875, 22.2578125, 25.0126953125, 27.767578125, 30.5224609375, 33.27734375, 36.0322265625, 38.787109375, 41.5419921875, 44.296875, 47.0517578125, 49.806640625, 52.5615234375, 55.31640625, 58.0712890625, 60.826171875, 63.5810546875, 66.3359375, 69.0908203125, 71.845703125, 74.6005859375, 77.35546875, 80.1103515625, 82.865234375, 85.6201171875, 88.375]}, "gradients/decoder.transformer.h.4.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 2.0, 1.0, 2.0, 1.0, 1.0, 0.0, 3.0, 3.0, 3.0, 3.0, 7.0, 5.0, 9.0, 10.0, 18.0, 24.0, 41.0, 72.0, 102.0, 208.0, 433.0, 940.0, 2302.0, 6286.0, 20316.0, 244017.0, 3746782.0, 146295.0, 17162.0, 5508.0, 1981.0, 856.0, 368.0, 224.0, 120.0, 59.0, 33.0, 28.0, 16.0, 12.0, 10.0, 5.0, 2.0, 5.0, 5.0, 2.0, 3.0, 3.0, 3.0, 2.0, 0.0, 0.0, 0.0, 4.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-281.0, -271.453125, -261.90625, -252.359375, -242.8125, -233.265625, -223.71875, -214.171875, -204.625, -195.078125, -185.53125, -175.984375, -166.4375, -156.890625, -147.34375, -137.796875, -128.25, -118.703125, -109.15625, -99.609375, -90.0625, -80.515625, -70.96875, -61.421875, -51.875, -42.328125, -32.78125, -23.234375, -13.6875, -4.140625, 5.40625, 14.953125, 24.5, 34.046875, 43.59375, 53.140625, 62.6875, 72.234375, 81.78125, 91.328125, 100.875, 110.421875, 119.96875, 129.515625, 139.0625, 148.609375, 158.15625, 167.703125, 177.25, 186.796875, 196.34375, 205.890625, 215.4375, 224.984375, 234.53125, 244.078125, 253.625, 263.171875, 272.71875, 282.265625, 291.8125, 301.359375, 310.90625, 320.453125, 330.0]}, "gradients/decoder.transformer.h.4.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 3.0, 1.0, 3.0, 2.0, 5.0, 5.0, 2.0, 6.0, 5.0, 15.0, 9.0, 22.0, 14.0, 18.0, 32.0, 40.0, 56.0, 99.0, 192.0, 485.0, 1017.0, 1108.0, 467.0, 191.0, 80.0, 49.0, 38.0, 34.0, 19.0, 19.0, 9.0, 17.0, 5.0, 7.0, 4.0, 3.0, 0.0, 2.0, 1.0, 3.0, 1.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-201.875, -196.271484375, -190.66796875, -185.064453125, -179.4609375, -173.857421875, -168.25390625, -162.650390625, -157.046875, -151.443359375, -145.83984375, -140.236328125, -134.6328125, -129.029296875, -123.42578125, -117.822265625, -112.21875, -106.615234375, -101.01171875, -95.408203125, -89.8046875, -84.201171875, -78.59765625, -72.994140625, -67.390625, -61.787109375, -56.18359375, -50.580078125, -44.9765625, -39.373046875, -33.76953125, -28.166015625, -22.5625, -16.958984375, -11.35546875, -5.751953125, -0.1484375, 5.455078125, 11.05859375, 16.662109375, 22.265625, 27.869140625, 33.47265625, 39.076171875, 44.6796875, 50.283203125, 55.88671875, 61.490234375, 67.09375, 72.697265625, 78.30078125, 83.904296875, 89.5078125, 95.111328125, 100.71484375, 106.318359375, 111.921875, 117.525390625, 123.12890625, 128.732421875, 134.3359375, 139.939453125, 145.54296875, 151.146484375, 156.75]}, "gradients/decoder.transformer.h.4.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 7.0, 9.0, 21.0, 13.0, 28.0, 25.0, 55.0, 94.0, 192.0, 270.0, 481.0, 907.0, 1943.0, 3933.0, 9122.0, 23196.0, 77011.0, 1295854.0, 2645138.0, 90899.0, 26558.0, 9963.0, 4244.0, 2060.0, 990.0, 528.0, 284.0, 159.0, 103.0, 65.0, 37.0, 27.0, 18.0, 11.0, 15.0, 11.0, 6.0, 4.0, 3.0, 1.0, 4.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0], "bins": [-273.5, -264.96875, -256.4375, -247.90625, -239.375, -230.84375, -222.3125, -213.78125, -205.25, -196.71875, -188.1875, -179.65625, -171.125, -162.59375, -154.0625, -145.53125, -137.0, -128.46875, -119.9375, -111.40625, -102.875, -94.34375, -85.8125, -77.28125, -68.75, -60.21875, -51.6875, -43.15625, -34.625, -26.09375, -17.5625, -9.03125, -0.5, 8.03125, 16.5625, 25.09375, 33.625, 42.15625, 50.6875, 59.21875, 67.75, 76.28125, 84.8125, 93.34375, 101.875, 110.40625, 118.9375, 127.46875, 136.0, 144.53125, 153.0625, 161.59375, 170.125, 178.65625, 187.1875, 195.71875, 204.25, 212.78125, 221.3125, 229.84375, 238.375, 246.90625, 255.4375, 263.96875, 272.5]}, "gradients/decoder.transformer.h.4.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 7.0, 13.0, 27.0, 57.0, 152.0, 364.0, 261.0, 90.0, 26.0, 11.0, 4.0, 1.0, 0.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2300.016845703125, -2253.71630859375, -2207.415771484375, -2161.114990234375, -2114.814453125, -2068.513916015625, -2022.21337890625, -1975.9127197265625, -1929.612060546875, -1883.3115234375, -1837.0108642578125, -1790.7103271484375, -1744.40966796875, -1698.109130859375, -1651.80859375, -1605.5079345703125, -1559.2073974609375, -1512.9068603515625, -1466.606201171875, -1420.3056640625, -1374.0050048828125, -1327.7044677734375, -1281.40380859375, -1235.103271484375, -1188.802734375, -1142.502197265625, -1096.2015380859375, -1049.9010009765625, -1003.600341796875, -957.2998046875, -910.9992065429688, -864.6986083984375, -818.3980712890625, -772.0974731445312, -725.796875, -679.496337890625, -633.1956787109375, -586.8951416015625, -540.5945434570312, -494.2939453125, -447.99334716796875, -401.6927490234375, -355.39215087890625, -309.0915832519531, -262.7909851074219, -216.49038696289062, -170.1898193359375, -123.88922119140625, -77.588623046875, -31.28803253173828, 15.012557983398438, 61.313140869140625, 107.61373901367188, 153.91433715820312, 200.21490478515625, 246.5155029296875, 292.81610107421875, 339.11669921875, 385.41729736328125, 431.7178649902344, 478.0184631347656, 524.319091796875, 570.61962890625, 616.9202270507812, 663.2208251953125]}, "gradients/decoder.transformer.h.4.ln_2.bias": {"_type": "histogram", "values": [2.0, 2.0, 4.0, 1.0, 1.0, 1.0, 6.0, 4.0, 4.0, 2.0, 8.0, 10.0, 11.0, 19.0, 15.0, 15.0, 23.0, 21.0, 21.0, 29.0, 37.0, 27.0, 35.0, 47.0, 40.0, 36.0, 27.0, 40.0, 53.0, 49.0, 34.0, 42.0, 37.0, 25.0, 34.0, 20.0, 27.0, 34.0, 19.0, 30.0, 21.0, 26.0, 12.0, 16.0, 13.0, 7.0, 4.0, 8.0, 6.0, 5.0, 4.0, 3.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-371.20050048828125, -358.5307922363281, -345.861083984375, -333.19140625, -320.5216979980469, -307.85198974609375, -295.1822814941406, -282.5125732421875, -269.8428955078125, -257.1731872558594, -244.5034942626953, -231.8337860107422, -219.16409301757812, -206.494384765625, -193.82467651367188, -181.1549835205078, -168.4852752685547, -155.81556701660156, -143.1458740234375, -130.47616577148438, -117.80647277832031, -105.13676452636719, -92.4670639038086, -79.79736328125, -67.1276626586914, -54.45796203613281, -41.78826141357422, -29.11855697631836, -16.448856353759766, -3.779155731201172, 8.890548706054688, 21.56024932861328, 34.229949951171875, 46.89965057373047, 59.56935119628906, 72.23905944824219, 84.90875244140625, 97.57846069335938, 110.24816131591797, 122.91786193847656, 135.58755493164062, 148.25726318359375, 160.9269561767578, 173.59666442871094, 186.266357421875, 198.93606567382812, 211.60577392578125, 224.2754669189453, 236.94517517089844, 249.61488342285156, 262.2845764160156, 274.95428466796875, 287.6239929199219, 300.2936706542969, 312.96337890625, 325.6330871582031, 338.30279541015625, 350.9725036621094, 363.6422119140625, 376.3118896484375, 388.9815979003906, 401.65130615234375, 414.3210144042969, 426.99072265625, 439.660400390625]}, "gradients/decoder.transformer.h.4.crossattention.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 2.0, 1.0, 3.0, 3.0, 5.0, 3.0, 4.0, 8.0, 9.0, 7.0, 18.0, 17.0, 19.0, 26.0, 20.0, 31.0, 35.0, 27.0, 41.0, 40.0, 20.0, 39.0, 42.0, 52.0, 39.0, 40.0, 42.0, 44.0, 40.0, 42.0, 46.0, 41.0, 37.0, 19.0, 21.0, 23.0, 22.0, 17.0, 13.0, 13.0, 10.0, 9.0, 7.0, 4.0, 3.0, 2.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-90.4375, -87.58984375, -84.7421875, -81.89453125, -79.046875, -76.19921875, -73.3515625, -70.50390625, -67.65625, -64.80859375, -61.9609375, -59.11328125, -56.265625, -53.41796875, -50.5703125, -47.72265625, -44.875, -42.02734375, -39.1796875, -36.33203125, -33.484375, -30.63671875, -27.7890625, -24.94140625, -22.09375, -19.24609375, -16.3984375, -13.55078125, -10.703125, -7.85546875, -5.0078125, -2.16015625, 0.6875, 3.53515625, 6.3828125, 9.23046875, 12.078125, 14.92578125, 17.7734375, 20.62109375, 23.46875, 26.31640625, 29.1640625, 32.01171875, 34.859375, 37.70703125, 40.5546875, 43.40234375, 46.25, 49.09765625, 51.9453125, 54.79296875, 57.640625, 60.48828125, 63.3359375, 66.18359375, 69.03125, 71.87890625, 74.7265625, 77.57421875, 80.421875, 83.26953125, 86.1171875, 88.96484375, 91.8125]}, "gradients/decoder.transformer.h.4.crossattention.c_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 3.0, 5.0, 8.0, 17.0, 23.0, 28.0, 34.0, 56.0, 94.0, 122.0, 214.0, 324.0, 521.0, 810.0, 1203.0, 1920.0, 3054.0, 4856.0, 7692.0, 12304.0, 20375.0, 34086.0, 61466.0, 114575.0, 237093.0, 262322.0, 125301.0, 65900.0, 36663.0, 21859.0, 13485.0, 7942.0, 5147.0, 3218.0, 2101.0, 1286.0, 896.0, 540.0, 379.0, 218.0, 130.0, 93.0, 69.0, 50.0, 22.0, 23.0, 11.0, 10.0, 8.0, 6.0, 4.0, 1.0, 0.0, 1.0, 1.0, 1.0], "bins": [-8.1171875, -7.8717041015625, -7.626220703125, -7.3807373046875, -7.13525390625, -6.8897705078125, -6.644287109375, -6.3988037109375, -6.1533203125, -5.9078369140625, -5.662353515625, -5.4168701171875, -5.17138671875, -4.9259033203125, -4.680419921875, -4.4349365234375, -4.189453125, -3.9439697265625, -3.698486328125, -3.4530029296875, -3.20751953125, -2.9620361328125, -2.716552734375, -2.4710693359375, -2.2255859375, -1.9801025390625, -1.734619140625, -1.4891357421875, -1.24365234375, -0.9981689453125, -0.752685546875, -0.5072021484375, -0.26171875, -0.0162353515625, 0.229248046875, 0.4747314453125, 0.72021484375, 0.9656982421875, 1.211181640625, 1.4566650390625, 1.7021484375, 1.9476318359375, 2.193115234375, 2.4385986328125, 2.68408203125, 2.9295654296875, 3.175048828125, 3.4205322265625, 3.666015625, 3.9114990234375, 4.156982421875, 4.4024658203125, 4.64794921875, 4.8934326171875, 5.138916015625, 5.3843994140625, 5.6298828125, 5.8753662109375, 6.120849609375, 6.3663330078125, 6.61181640625, 6.8572998046875, 7.102783203125, 7.3482666015625, 7.59375]}, "gradients/decoder.transformer.h.4.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 3.0, 1.0, 2.0, 6.0, 1.0, 6.0, 5.0, 12.0, 11.0, 8.0, 12.0, 20.0, 19.0, 19.0, 22.0, 25.0, 32.0, 22.0, 39.0, 38.0, 54.0, 44.0, 42.0, 39.0, 33.0, 1063.0, 45.0, 49.0, 45.0, 30.0, 35.0, 37.0, 33.0, 30.0, 19.0, 22.0, 19.0, 11.0, 10.0, 11.0, 13.0, 7.0, 14.0, 9.0, 5.0, 3.0, 5.0, 2.0, 1.0, 3.0, 1.0, 1.0, 1.0, 0.0, 2.0, 2.0], "bins": [-60.59375, -58.7763671875, -56.958984375, -55.1416015625, -53.32421875, -51.5068359375, -49.689453125, -47.8720703125, -46.0546875, -44.2373046875, -42.419921875, -40.6025390625, -38.78515625, -36.9677734375, -35.150390625, -33.3330078125, -31.515625, -29.6982421875, -27.880859375, -26.0634765625, -24.24609375, -22.4287109375, -20.611328125, -18.7939453125, -16.9765625, -15.1591796875, -13.341796875, -11.5244140625, -9.70703125, -7.8896484375, -6.072265625, -4.2548828125, -2.4375, -0.6201171875, 1.197265625, 3.0146484375, 4.83203125, 6.6494140625, 8.466796875, 10.2841796875, 12.1015625, 13.9189453125, 15.736328125, 17.5537109375, 19.37109375, 21.1884765625, 23.005859375, 24.8232421875, 26.640625, 28.4580078125, 30.275390625, 32.0927734375, 33.91015625, 35.7275390625, 37.544921875, 39.3623046875, 41.1796875, 42.9970703125, 44.814453125, 46.6318359375, 48.44921875, 50.2666015625, 52.083984375, 53.9013671875, 55.71875]}, "gradients/decoder.transformer.h.4.crossattention.c_attn.weight": {"_type": "histogram", "values": [4.0, 2.0, 0.0, 3.0, 7.0, 6.0, 6.0, 18.0, 17.0, 20.0, 36.0, 57.0, 81.0, 99.0, 174.0, 229.0, 311.0, 424.0, 613.0, 898.0, 1307.0, 1912.0, 2749.0, 4239.0, 6307.0, 9602.0, 14833.0, 24322.0, 39304.0, 64923.0, 114380.0, 267275.0, 1221058.0, 129491.0, 73452.0, 43275.0, 26725.0, 16532.0, 10800.0, 7172.0, 4597.0, 3148.0, 2102.0, 1453.0, 988.0, 661.0, 442.0, 321.0, 234.0, 157.0, 110.0, 78.0, 69.0, 38.0, 26.0, 14.0, 15.0, 11.0, 8.0, 8.0, 2.0, 2.0, 2.0, 3.0], "bins": [-4.1015625, -3.97381591796875, -3.8460693359375, -3.71832275390625, -3.590576171875, -3.46282958984375, -3.3350830078125, -3.20733642578125, -3.07958984375, -2.95184326171875, -2.8240966796875, -2.69635009765625, -2.568603515625, -2.44085693359375, -2.3131103515625, -2.18536376953125, -2.0576171875, -1.92987060546875, -1.8021240234375, -1.67437744140625, -1.546630859375, -1.41888427734375, -1.2911376953125, -1.16339111328125, -1.03564453125, -0.90789794921875, -0.7801513671875, -0.65240478515625, -0.524658203125, -0.39691162109375, -0.2691650390625, -0.14141845703125, -0.013671875, 0.11407470703125, 0.2418212890625, 0.36956787109375, 0.497314453125, 0.62506103515625, 0.7528076171875, 0.88055419921875, 1.00830078125, 1.13604736328125, 1.2637939453125, 1.39154052734375, 1.519287109375, 1.64703369140625, 1.7747802734375, 1.90252685546875, 2.0302734375, 2.15802001953125, 2.2857666015625, 2.41351318359375, 2.541259765625, 2.66900634765625, 2.7967529296875, 2.92449951171875, 3.05224609375, 3.17999267578125, 3.3077392578125, 3.43548583984375, 3.563232421875, 3.69097900390625, 3.8187255859375, 3.94647216796875, 4.07421875]}, "gradients/decoder.transformer.h.4.crossattention.q_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 2.0, 1.0, 4.0, 2.0, 2.0, 1.0, 3.0, 2.0, 6.0, 2.0, 6.0, 5.0, 8.0, 11.0, 8.0, 11.0, 12.0, 22.0, 12.0, 21.0, 29.0, 39.0, 66.0, 84.0, 109.0, 140.0, 85.0, 74.0, 55.0, 30.0, 33.0, 29.0, 15.0, 12.0, 13.0, 8.0, 8.0, 10.0, 6.0, 8.0, 3.0, 5.0, 1.0, 3.0, 2.0, 3.0, 0.0, 3.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.1943359375, -0.18828773498535156, -0.18223953247070312, -0.1761913299560547, -0.17014312744140625, -0.1640949249267578, -0.15804672241210938, -0.15199851989746094, -0.1459503173828125, -0.13990211486816406, -0.13385391235351562, -0.1278057098388672, -0.12175750732421875, -0.11570930480957031, -0.10966110229492188, -0.10361289978027344, -0.097564697265625, -0.09151649475097656, -0.08546829223632812, -0.07942008972167969, -0.07337188720703125, -0.06732368469238281, -0.061275482177734375, -0.05522727966308594, -0.0491790771484375, -0.04313087463378906, -0.037082672119140625, -0.031034469604492188, -0.02498626708984375, -0.018938064575195312, -0.012889862060546875, -0.0068416595458984375, -0.00079345703125, 0.0052547454833984375, 0.011302947998046875, 0.017351150512695312, 0.02339935302734375, 0.029447555541992188, 0.035495758056640625, 0.04154396057128906, 0.0475921630859375, 0.05364036560058594, 0.059688568115234375, 0.06573677062988281, 0.07178497314453125, 0.07783317565917969, 0.08388137817382812, 0.08992958068847656, 0.095977783203125, 0.10202598571777344, 0.10807418823242188, 0.11412239074707031, 0.12017059326171875, 0.1262187957763672, 0.13226699829101562, 0.13831520080566406, 0.1443634033203125, 0.15041160583496094, 0.15645980834960938, 0.1625080108642578, 0.16855621337890625, 0.1746044158935547, 0.18065261840820312, 0.18670082092285156, 0.1927490234375]}, "gradients/decoder.transformer.h.4.crossattention.q_attn.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 3.0, 0.0, 3.0, 3.0, 3.0, 2.0, 4.0, 5.0, 8.0, 5.0, 6.0, 7.0, 14.0, 11.0, 13.0, 17.0, 28.0, 40.0, 39.0, 86.0, 127.0, 289.0, 1046346.0, 995.0, 178.0, 96.0, 58.0, 34.0, 24.0, 21.0, 18.0, 13.0, 8.0, 11.0, 10.0, 5.0, 6.0, 5.0, 2.0, 6.0, 2.0, 4.0, 0.0, 3.0, 1.0, 4.0, 1.0, 2.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 2.0], "bins": [-6.12890625, -5.9359130859375, -5.742919921875, -5.5499267578125, -5.35693359375, -5.1639404296875, -4.970947265625, -4.7779541015625, -4.5849609375, -4.3919677734375, -4.198974609375, -4.0059814453125, -3.81298828125, -3.6199951171875, -3.427001953125, -3.2340087890625, -3.041015625, -2.8480224609375, -2.655029296875, -2.4620361328125, -2.26904296875, -2.0760498046875, -1.883056640625, -1.6900634765625, -1.4970703125, -1.3040771484375, -1.111083984375, -0.9180908203125, -0.72509765625, -0.5321044921875, -0.339111328125, -0.1461181640625, 0.046875, 0.2398681640625, 0.432861328125, 0.6258544921875, 0.81884765625, 1.0118408203125, 1.204833984375, 1.3978271484375, 1.5908203125, 1.7838134765625, 1.976806640625, 2.1697998046875, 2.36279296875, 2.5557861328125, 2.748779296875, 2.9417724609375, 3.134765625, 3.3277587890625, 3.520751953125, 3.7137451171875, 3.90673828125, 4.0997314453125, 4.292724609375, 4.4857177734375, 4.6787109375, 4.8717041015625, 5.064697265625, 5.2576904296875, 5.45068359375, 5.6436767578125, 5.836669921875, 6.0296630859375, 6.22265625]}, "gradients/decoder.transformer.h.4.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 34.0, 987.0], "bins": [-4.7409892082214355, -4.66603946685791, -4.591089725494385, -4.516139984130859, -4.441190242767334, -4.366240501403809, -4.291290760040283, -4.216341018676758, -4.141390800476074, -4.066441059112549, -3.9914913177490234, -3.916541576385498, -3.8415918350219727, -3.766641855239868, -3.6916921138763428, -3.6167423725128174, -3.541792869567871, -3.4668431282043457, -3.3918933868408203, -3.316943645477295, -3.2419939041137695, -3.167043924331665, -3.0920941829681396, -3.0171444416046143, -2.942194700241089, -2.8672449588775635, -2.792295217514038, -2.7173454761505127, -2.642395496368408, -2.567445755004883, -2.4924960136413574, -2.417546272277832, -2.3425967693328857, -2.2676470279693604, -2.192697286605835, -2.1177475452423096, -2.042797565460205, -1.9678479433059692, -1.8928980827331543, -1.817948341369629, -1.7429986000061035, -1.6680488586425781, -1.5930991172790527, -1.5181492567062378, -1.4431995153427124, -1.368249773979187, -1.293299913406372, -1.2183501720428467, -1.1434004306793213, -1.068450689315796, -0.9935008883476257, -0.9185510873794556, -0.8436013460159302, -0.7686516046524048, -0.6937018036842346, -0.6187520027160645, -0.5438022017478943, -0.4688524305820465, -0.39390265941619873, -0.31895288825035095, -0.24400311708450317, -0.1690533459186554, -0.09410357475280762, -0.01915377378463745, 0.055795956403017044]}, "gradients/decoder.transformer.h.4.ln_cross_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 2.0, 2.0, 3.0, 1.0, 3.0, 2.0, 3.0, 5.0, 7.0, 5.0, 7.0, 12.0, 12.0, 5.0, 8.0, 13.0, 12.0, 17.0, 16.0, 28.0, 25.0, 23.0, 32.0, 37.0, 31.0, 39.0, 34.0, 41.0, 36.0, 49.0, 42.0, 29.0, 34.0, 42.0, 40.0, 40.0, 30.0, 23.0, 31.0, 25.0, 28.0, 27.0, 17.0, 21.0, 12.0, 11.0, 12.0, 9.0, 5.0, 7.0, 4.0, 11.0, 2.0, 1.0, 1.0, 1.0, 0.0, 1.0, 2.0, 1.0, 2.0], "bins": [-0.284653902053833, -0.2759450376033783, -0.26723620295524597, -0.25852733850479126, -0.24981847405433655, -0.24110962450504303, -0.2324007749557495, -0.2236919105052948, -0.2149830460548401, -0.20627419650554657, -0.19756533205509186, -0.18885648250579834, -0.18014761805534363, -0.1714387685060501, -0.1627299189567566, -0.15402105450630188, -0.14531220495700836, -0.13660335540771484, -0.12789449095726013, -0.11918564140796661, -0.1104767769575119, -0.10176792740821838, -0.09305907040834427, -0.08435021340847015, -0.07564135640859604, -0.06693249940872192, -0.05822364240884781, -0.04951478913426399, -0.04080593213438988, -0.03209707513451576, -0.023388221859931946, -0.01467936486005783, -0.005970507860183716, 0.0027383482083678246, 0.011447204276919365, 0.02015605941414833, 0.028864916414022446, 0.03757377341389656, 0.04628262668848038, 0.05499148368835449, 0.06370034068822861, 0.07240919768810272, 0.08111805468797684, 0.08982691168785095, 0.09853576123714447, 0.10724462568759918, 0.1159534752368927, 0.12466233223676682, 0.13337118923664093, 0.14208003878593445, 0.15078890323638916, 0.15949775278568268, 0.1682066172361374, 0.1769154667854309, 0.18562433123588562, 0.19433318078517914, 0.20304203033447266, 0.21175087988376617, 0.2204597443342209, 0.2291685938835144, 0.23787745833396912, 0.24658630788326263, 0.25529515743255615, 0.26400402188301086, 0.2727128863334656]}, "gradients/decoder.transformer.h.4.attn.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 2.0, 1.0, 3.0, 3.0, 5.0, 3.0, 4.0, 8.0, 9.0, 7.0, 18.0, 17.0, 19.0, 26.0, 20.0, 31.0, 35.0, 27.0, 41.0, 40.0, 20.0, 39.0, 42.0, 52.0, 39.0, 40.0, 42.0, 44.0, 40.0, 42.0, 46.0, 41.0, 37.0, 19.0, 21.0, 23.0, 22.0, 17.0, 13.0, 13.0, 10.0, 9.0, 7.0, 4.0, 3.0, 2.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-90.4375, -87.58984375, -84.7421875, -81.89453125, -79.046875, -76.19921875, -73.3515625, -70.50390625, -67.65625, -64.80859375, -61.9609375, -59.11328125, -56.265625, -53.41796875, -50.5703125, -47.72265625, -44.875, -42.02734375, -39.1796875, -36.33203125, -33.484375, -30.63671875, -27.7890625, -24.94140625, -22.09375, -19.24609375, -16.3984375, -13.55078125, -10.703125, -7.85546875, -5.0078125, -2.16015625, 0.6875, 3.53515625, 6.3828125, 9.23046875, 12.078125, 14.92578125, 17.7734375, 20.62109375, 23.46875, 26.31640625, 29.1640625, 32.01171875, 34.859375, 37.70703125, 40.5546875, 43.40234375, 46.25, 49.09765625, 51.9453125, 54.79296875, 57.640625, 60.48828125, 63.3359375, 66.18359375, 69.03125, 71.87890625, 74.7265625, 77.57421875, 80.421875, 83.26953125, 86.1171875, 88.96484375, 91.8125]}, "gradients/decoder.transformer.h.4.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 0.0, 2.0, 2.0, 1.0, 5.0, 2.0, 2.0, 5.0, 9.0, 4.0, 12.0, 12.0, 17.0, 52.0, 48.0, 55.0, 106.0, 151.0, 217.0, 374.0, 614.0, 953.0, 1819.0, 3428.0, 7156.0, 16571.0, 48190.0, 289335.0, 567470.0, 71461.0, 22025.0, 8979.0, 4228.0, 2191.0, 1174.0, 704.0, 404.0, 250.0, 164.0, 121.0, 74.0, 53.0, 26.0, 29.0, 15.0, 18.0, 11.0, 9.0, 6.0, 6.0, 2.0, 2.0, 0.0, 2.0, 3.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-171.0, -165.509765625, -160.01953125, -154.529296875, -149.0390625, -143.548828125, -138.05859375, -132.568359375, -127.078125, -121.587890625, -116.09765625, -110.607421875, -105.1171875, -99.626953125, -94.13671875, -88.646484375, -83.15625, -77.666015625, -72.17578125, -66.685546875, -61.1953125, -55.705078125, -50.21484375, -44.724609375, -39.234375, -33.744140625, -28.25390625, -22.763671875, -17.2734375, -11.783203125, -6.29296875, -0.802734375, 4.6875, 10.177734375, 15.66796875, 21.158203125, 26.6484375, 32.138671875, 37.62890625, 43.119140625, 48.609375, 54.099609375, 59.58984375, 65.080078125, 70.5703125, 76.060546875, 81.55078125, 87.041015625, 92.53125, 98.021484375, 103.51171875, 109.001953125, 114.4921875, 119.982421875, 125.47265625, 130.962890625, 136.453125, 141.943359375, 147.43359375, 152.923828125, 158.4140625, 163.904296875, 169.39453125, 174.884765625, 180.375]}, "gradients/decoder.transformer.h.4.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 2.0, 2.0, 1.0, 2.0, 4.0, 8.0, 4.0, 4.0, 5.0, 12.0, 12.0, 16.0, 10.0, 12.0, 19.0, 32.0, 33.0, 33.0, 34.0, 23.0, 33.0, 44.0, 64.0, 77.0, 161.0, 1651.0, 271.0, 83.0, 52.0, 47.0, 45.0, 27.0, 31.0, 30.0, 26.0, 27.0, 25.0, 10.0, 15.0, 21.0, 13.0, 13.0, 11.0, 8.0, 3.0, 3.0, 0.0, 4.0, 3.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-185.875, -178.900390625, -171.92578125, -164.951171875, -157.9765625, -151.001953125, -144.02734375, -137.052734375, -130.078125, -123.103515625, -116.12890625, -109.154296875, -102.1796875, -95.205078125, -88.23046875, -81.255859375, -74.28125, -67.306640625, -60.33203125, -53.357421875, -46.3828125, -39.408203125, -32.43359375, -25.458984375, -18.484375, -11.509765625, -4.53515625, 2.439453125, 9.4140625, 16.388671875, 23.36328125, 30.337890625, 37.3125, 44.287109375, 51.26171875, 58.236328125, 65.2109375, 72.185546875, 79.16015625, 86.134765625, 93.109375, 100.083984375, 107.05859375, 114.033203125, 121.0078125, 127.982421875, 134.95703125, 141.931640625, 148.90625, 155.880859375, 162.85546875, 169.830078125, 176.8046875, 183.779296875, 190.75390625, 197.728515625, 204.703125, 211.677734375, 218.65234375, 225.626953125, 232.6015625, 239.576171875, 246.55078125, 253.525390625, 260.5]}, "gradients/decoder.transformer.h.4.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 0.0, 2.0, 2.0, 4.0, 5.0, 12.0, 7.0, 11.0, 21.0, 23.0, 31.0, 42.0, 62.0, 83.0, 102.0, 174.0, 282.0, 710.0, 2642.0, 14221.0, 228770.0, 2848540.0, 41891.0, 5616.0, 1270.0, 487.0, 231.0, 125.0, 84.0, 70.0, 49.0, 39.0, 23.0, 18.0, 11.0, 19.0, 7.0, 6.0, 11.0, 5.0, 2.0, 1.0, 0.0, 3.0, 1.0, 2.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-372.25, -360.3359375, -348.421875, -336.5078125, -324.59375, -312.6796875, -300.765625, -288.8515625, -276.9375, -265.0234375, -253.109375, -241.1953125, -229.28125, -217.3671875, -205.453125, -193.5390625, -181.625, -169.7109375, -157.796875, -145.8828125, -133.96875, -122.0546875, -110.140625, -98.2265625, -86.3125, -74.3984375, -62.484375, -50.5703125, -38.65625, -26.7421875, -14.828125, -2.9140625, 9.0, 20.9140625, 32.828125, 44.7421875, 56.65625, 68.5703125, 80.484375, 92.3984375, 104.3125, 116.2265625, 128.140625, 140.0546875, 151.96875, 163.8828125, 175.796875, 187.7109375, 199.625, 211.5390625, 223.453125, 235.3671875, 247.28125, 259.1953125, 271.109375, 283.0234375, 294.9375, 306.8515625, 318.765625, 330.6796875, 342.59375, 354.5078125, 366.421875, 378.3359375, 390.25]}, "gradients/decoder.transformer.h.4.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 5.0, 6.0, 58.0, 154.0, 375.0, 310.0, 89.0, 16.0, 4.0, 2.0], "bins": [-1590.63232421875, -1563.561279296875, -1536.4903564453125, -1509.4193115234375, -1482.348388671875, -1455.27734375, -1428.2064208984375, -1401.1353759765625, -1374.064453125, -1346.993408203125, -1319.9224853515625, -1292.8514404296875, -1265.780517578125, -1238.70947265625, -1211.6385498046875, -1184.5675048828125, -1157.49658203125, -1130.425537109375, -1103.3546142578125, -1076.2835693359375, -1049.212646484375, -1022.1416625976562, -995.0706787109375, -967.9996337890625, -940.9285888671875, -913.8576049804688, -886.78662109375, -859.7156372070312, -832.6446533203125, -805.5736694335938, -778.502685546875, -751.431640625, -724.3607177734375, -697.2897338867188, -670.21875, -643.1477661132812, -616.0767822265625, -589.0057983398438, -561.934814453125, -534.86376953125, -507.7928161621094, -480.7218322753906, -453.6508483886719, -426.579833984375, -399.50885009765625, -372.4378662109375, -345.36688232421875, -318.2958984375, -291.22491455078125, -264.1539306640625, -237.08294677734375, -210.01194763183594, -182.9409637451172, -155.86997985839844, -128.79898071289062, -101.72799682617188, -74.65701293945312, -47.58602523803711, -20.515037536621094, 6.5559539794921875, 33.62693786621094, 60.69792175292969, 87.7689208984375, 114.83990478515625, 141.910888671875]}, "gradients/decoder.transformer.h.4.ln_1.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 3.0, 1.0, 4.0, 9.0, 9.0, 8.0, 4.0, 21.0, 18.0, 25.0, 12.0, 26.0, 20.0, 23.0, 22.0, 29.0, 34.0, 28.0, 33.0, 32.0, 45.0, 41.0, 31.0, 33.0, 43.0, 39.0, 42.0, 44.0, 49.0, 33.0, 33.0, 26.0, 21.0, 25.0, 21.0, 25.0, 12.0, 9.0, 10.0, 10.0, 8.0, 15.0, 8.0, 8.0, 7.0, 2.0, 3.0, 6.0, 1.0, 1.0, 1.0, 2.0, 2.0, 0.0, 0.0, 1.0, 1.0, 1.0], "bins": [-394.71240234375, -381.53668212890625, -368.3609619140625, -355.18524169921875, -342.009521484375, -328.83380126953125, -315.6580810546875, -302.48236083984375, -289.306640625, -276.13092041015625, -262.9552001953125, -249.77947998046875, -236.603759765625, -223.42803955078125, -210.25233459472656, -197.0766143798828, -183.90090942382812, -170.72518920898438, -157.54946899414062, -144.37374877929688, -131.19802856445312, -118.0223159790039, -104.84660339355469, -91.67088317871094, -78.49516296386719, -65.31944274902344, -52.14372634887695, -38.96800994873047, -25.79228973388672, -12.616569519042969, 0.55914306640625, 13.73486328125, 26.910552978515625, 40.086273193359375, 53.26198959350586, 66.43770599365234, 79.6134262084961, 92.78914642333984, 105.96485900878906, 119.14057922363281, 132.31629943847656, 145.4920196533203, 158.66773986816406, 171.84344482421875, 185.0191650390625, 198.19488525390625, 211.37060546875, 224.54632568359375, 237.7220458984375, 250.89776611328125, 264.073486328125, 277.24920654296875, 290.4249267578125, 303.60064697265625, 316.7763671875, 329.95208740234375, 343.1278076171875, 356.30352783203125, 369.479248046875, 382.65496826171875, 395.8306884765625, 409.00640869140625, 422.18212890625, 435.35784912109375, 448.5335388183594]}, "gradients/decoder.transformer.h.3.mlp.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 3.0, 1.0, 1.0, 3.0, 4.0, 4.0, 1.0, 7.0, 7.0, 11.0, 20.0, 12.0, 16.0, 25.0, 27.0, 25.0, 32.0, 35.0, 29.0, 28.0, 44.0, 31.0, 55.0, 55.0, 31.0, 49.0, 44.0, 41.0, 51.0, 36.0, 43.0, 36.0, 28.0, 22.0, 31.0, 20.0, 21.0, 24.0, 14.0, 12.0, 6.0, 4.0, 8.0, 4.0, 4.0, 4.0, 1.0, 3.0, 3.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-93.625, -90.6845703125, -87.744140625, -84.8037109375, -81.86328125, -78.9228515625, -75.982421875, -73.0419921875, -70.1015625, -67.1611328125, -64.220703125, -61.2802734375, -58.33984375, -55.3994140625, -52.458984375, -49.5185546875, -46.578125, -43.6376953125, -40.697265625, -37.7568359375, -34.81640625, -31.8759765625, -28.935546875, -25.9951171875, -23.0546875, -20.1142578125, -17.173828125, -14.2333984375, -11.29296875, -8.3525390625, -5.412109375, -2.4716796875, 0.46875, 3.4091796875, 6.349609375, 9.2900390625, 12.23046875, 15.1708984375, 18.111328125, 21.0517578125, 23.9921875, 26.9326171875, 29.873046875, 32.8134765625, 35.75390625, 38.6943359375, 41.634765625, 44.5751953125, 47.515625, 50.4560546875, 53.396484375, 56.3369140625, 59.27734375, 62.2177734375, 65.158203125, 68.0986328125, 71.0390625, 73.9794921875, 76.919921875, 79.8603515625, 82.80078125, 85.7412109375, 88.681640625, 91.6220703125, 94.5625]}, "gradients/decoder.transformer.h.3.mlp.c_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 3.0, 2.0, 2.0, 0.0, 1.0, 0.0, 4.0, 3.0, 2.0, 6.0, 4.0, 8.0, 5.0, 4.0, 10.0, 9.0, 18.0, 29.0, 31.0, 50.0, 69.0, 112.0, 136.0, 274.0, 525.0, 4027.0, 4182422.0, 5312.0, 487.0, 213.0, 156.0, 87.0, 55.0, 67.0, 43.0, 25.0, 16.0, 13.0, 10.0, 10.0, 8.0, 5.0, 7.0, 4.0, 3.0, 3.0, 5.0, 2.0, 3.0, 2.0, 2.0, 0.0, 0.0, 3.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-2194.0, -2117.09375, -2040.1875, -1963.28125, -1886.375, -1809.46875, -1732.5625, -1655.65625, -1578.75, -1501.84375, -1424.9375, -1348.03125, -1271.125, -1194.21875, -1117.3125, -1040.40625, -963.5, -886.59375, -809.6875, -732.78125, -655.875, -578.96875, -502.0625, -425.15625, -348.25, -271.34375, -194.4375, -117.53125, -40.625, 36.28125, 113.1875, 190.09375, 267.0, 343.90625, 420.8125, 497.71875, 574.625, 651.53125, 728.4375, 805.34375, 882.25, 959.15625, 1036.0625, 1112.96875, 1189.875, 1266.78125, 1343.6875, 1420.59375, 1497.5, 1574.40625, 1651.3125, 1728.21875, 1805.125, 1882.03125, 1958.9375, 2035.84375, 2112.75, 2189.65625, 2266.5625, 2343.46875, 2420.375, 2497.28125, 2574.1875, 2651.09375, 2728.0]}, "gradients/decoder.transformer.h.3.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 3.0, 0.0, 0.0, 3.0, 3.0, 0.0, 2.0, 4.0, 7.0, 7.0, 10.0, 10.0, 8.0, 17.0, 9.0, 16.0, 20.0, 30.0, 24.0, 42.0, 53.0, 75.0, 111.0, 231.0, 435.0, 942.0, 923.0, 433.0, 207.0, 140.0, 72.0, 53.0, 43.0, 32.0, 31.0, 20.0, 22.0, 8.0, 9.0, 10.0, 5.0, 3.0, 2.0, 3.0, 7.0, 0.0, 1.0, 4.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-124.6875, -120.2333984375, -115.779296875, -111.3251953125, -106.87109375, -102.4169921875, -97.962890625, -93.5087890625, -89.0546875, -84.6005859375, -80.146484375, -75.6923828125, -71.23828125, -66.7841796875, -62.330078125, -57.8759765625, -53.421875, -48.9677734375, -44.513671875, -40.0595703125, -35.60546875, -31.1513671875, -26.697265625, -22.2431640625, -17.7890625, -13.3349609375, -8.880859375, -4.4267578125, 0.02734375, 4.4814453125, 8.935546875, 13.3896484375, 17.84375, 22.2978515625, 26.751953125, 31.2060546875, 35.66015625, 40.1142578125, 44.568359375, 49.0224609375, 53.4765625, 57.9306640625, 62.384765625, 66.8388671875, 71.29296875, 75.7470703125, 80.201171875, 84.6552734375, 89.109375, 93.5634765625, 98.017578125, 102.4716796875, 106.92578125, 111.3798828125, 115.833984375, 120.2880859375, 124.7421875, 129.1962890625, 133.650390625, 138.1044921875, 142.55859375, 147.0126953125, 151.466796875, 155.9208984375, 160.375]}, "gradients/decoder.transformer.h.3.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 2.0, 3.0, 5.0, 5.0, 5.0, 9.0, 15.0, 9.0, 14.0, 15.0, 21.0, 21.0, 24.0, 29.0, 42.0, 43.0, 63.0, 126.0, 661.0, 6183.0, 149271.0, 4006633.0, 28338.0, 2024.0, 328.0, 120.0, 48.0, 36.0, 33.0, 35.0, 25.0, 19.0, 13.0, 8.0, 12.0, 10.0, 9.0, 5.0, 9.0, 5.0, 6.0, 5.0, 1.0, 4.0, 2.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-828.0, -802.34375, -776.6875, -751.03125, -725.375, -699.71875, -674.0625, -648.40625, -622.75, -597.09375, -571.4375, -545.78125, -520.125, -494.46875, -468.8125, -443.15625, -417.5, -391.84375, -366.1875, -340.53125, -314.875, -289.21875, -263.5625, -237.90625, -212.25, -186.59375, -160.9375, -135.28125, -109.625, -83.96875, -58.3125, -32.65625, -7.0, 18.65625, 44.3125, 69.96875, 95.625, 121.28125, 146.9375, 172.59375, 198.25, 223.90625, 249.5625, 275.21875, 300.875, 326.53125, 352.1875, 377.84375, 403.5, 429.15625, 454.8125, 480.46875, 506.125, 531.78125, 557.4375, 583.09375, 608.75, 634.40625, 660.0625, 685.71875, 711.375, 737.03125, 762.6875, 788.34375, 814.0]}, "gradients/decoder.transformer.h.3.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 5.0, 38.0, 133.0, 379.0, 321.0, 110.0, 19.0, 7.0, 1.0, 1.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-642.8279418945312, -588.6417846679688, -534.4556884765625, -480.26953125, -426.0834045410156, -371.89727783203125, -317.71112060546875, -263.5249938964844, -209.3388671875, -155.15274047851562, -100.96659851074219, -46.78045654296875, 7.405670166015625, 61.591796875, 115.7779541015625, 169.96408081054688, 224.15020751953125, 278.3363342285156, 332.5224609375, 386.7086181640625, 440.8947448730469, 495.08087158203125, 549.2670288085938, 603.453125, 657.6392822265625, 711.825439453125, 766.0115356445312, 820.1976928710938, 874.3837890625, 928.5699462890625, 982.756103515625, 1036.9422607421875, 1091.12841796875, 1145.3145751953125, 1199.500732421875, 1253.686767578125, 1307.8729248046875, 1362.05908203125, 1416.2452392578125, 1470.431396484375, 1524.617431640625, 1578.8035888671875, 1632.98974609375, 1687.17578125, 1741.3619384765625, 1795.548095703125, 1849.7342529296875, 1903.92041015625, 1958.1065673828125, 2012.292724609375, 2066.478759765625, 2120.6650390625, 2174.85107421875, 2229.037109375, 2283.223388671875, 2337.409423828125, 2391.595703125, 2445.78173828125, 2499.968017578125, 2554.154052734375, 2608.34033203125, 2662.5263671875, 2716.71240234375, 2770.898681640625, 2825.084716796875]}, "gradients/decoder.transformer.h.3.ln_2.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 2.0, 0.0, 1.0, 2.0, 2.0, 1.0, 4.0, 5.0, 2.0, 7.0, 2.0, 2.0, 8.0, 6.0, 19.0, 17.0, 17.0, 21.0, 26.0, 16.0, 25.0, 19.0, 26.0, 33.0, 32.0, 45.0, 38.0, 37.0, 45.0, 43.0, 38.0, 34.0, 31.0, 41.0, 33.0, 40.0, 30.0, 24.0, 32.0, 23.0, 28.0, 21.0, 19.0, 18.0, 22.0, 17.0, 14.0, 10.0, 11.0, 4.0, 6.0, 11.0, 3.0, 3.0, 0.0, 4.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-382.009033203125, -370.2220458984375, -358.4350891113281, -346.6481018066406, -334.86114501953125, -323.07415771484375, -311.28717041015625, -299.5002136230469, -287.7132568359375, -275.92626953125, -264.1393127441406, -252.35232543945312, -240.56536865234375, -228.77838134765625, -216.9914093017578, -205.20443725585938, -193.41744995117188, -181.63047790527344, -169.843505859375, -158.0565185546875, -146.26956176757812, -134.48257446289062, -122.69560241699219, -110.90863037109375, -99.12165832519531, -87.33468627929688, -75.54771423339844, -63.76073455810547, -51.97376251220703, -40.186790466308594, -28.399810791015625, -16.612838745117188, -4.82586669921875, 6.96110725402832, 18.74808120727539, 30.535057067871094, 42.32202911376953, 54.10900115966797, 65.89598083496094, 77.68295288085938, 89.46992492675781, 101.25689697265625, 113.04386901855469, 124.83084869384766, 136.61782836914062, 148.40478515625, 160.1917724609375, 171.97874450683594, 183.76571655273438, 195.5526885986328, 207.33966064453125, 219.12664794921875, 230.91360473632812, 242.70059204101562, 254.48756408691406, 266.2745361328125, 278.0615234375, 289.8485107421875, 301.6354675292969, 313.4224548339844, 325.20941162109375, 336.99639892578125, 348.78338623046875, 360.5703430175781, 372.3572998046875]}, "gradients/decoder.transformer.h.3.crossattention.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 1.0, 2.0, 3.0, 2.0, 3.0, 0.0, 2.0, 12.0, 9.0, 10.0, 20.0, 25.0, 19.0, 30.0, 19.0, 21.0, 31.0, 31.0, 38.0, 38.0, 33.0, 45.0, 41.0, 43.0, 47.0, 34.0, 44.0, 50.0, 50.0, 26.0, 33.0, 31.0, 40.0, 29.0, 32.0, 15.0, 25.0, 17.0, 11.0, 17.0, 6.0, 4.0, 11.0, 4.0, 4.0, 3.0, 0.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-95.25, -92.4873046875, -89.724609375, -86.9619140625, -84.19921875, -81.4365234375, -78.673828125, -75.9111328125, -73.1484375, -70.3857421875, -67.623046875, -64.8603515625, -62.09765625, -59.3349609375, -56.572265625, -53.8095703125, -51.046875, -48.2841796875, -45.521484375, -42.7587890625, -39.99609375, -37.2333984375, -34.470703125, -31.7080078125, -28.9453125, -26.1826171875, -23.419921875, -20.6572265625, -17.89453125, -15.1318359375, -12.369140625, -9.6064453125, -6.84375, -4.0810546875, -1.318359375, 1.4443359375, 4.20703125, 6.9697265625, 9.732421875, 12.4951171875, 15.2578125, 18.0205078125, 20.783203125, 23.5458984375, 26.30859375, 29.0712890625, 31.833984375, 34.5966796875, 37.359375, 40.1220703125, 42.884765625, 45.6474609375, 48.41015625, 51.1728515625, 53.935546875, 56.6982421875, 59.4609375, 62.2236328125, 64.986328125, 67.7490234375, 70.51171875, 73.2744140625, 76.037109375, 78.7998046875, 81.5625]}, "gradients/decoder.transformer.h.3.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 0.0, 0.0, 1.0, 3.0, 0.0, 5.0, 10.0, 15.0, 16.0, 19.0, 21.0, 49.0, 54.0, 92.0, 103.0, 202.0, 265.0, 428.0, 648.0, 1047.0, 1738.0, 3312.0, 6485.0, 14010.0, 32193.0, 86415.0, 286110.0, 416961.0, 120368.0, 42906.0, 17552.0, 7996.0, 3993.0, 2116.0, 1217.0, 771.0, 489.0, 293.0, 217.0, 130.0, 105.0, 71.0, 47.0, 29.0, 14.0, 12.0, 14.0, 7.0, 12.0, 3.0, 3.0, 3.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-16.5625, -16.01318359375, -15.4638671875, -14.91455078125, -14.365234375, -13.81591796875, -13.2666015625, -12.71728515625, -12.16796875, -11.61865234375, -11.0693359375, -10.52001953125, -9.970703125, -9.42138671875, -8.8720703125, -8.32275390625, -7.7734375, -7.22412109375, -6.6748046875, -6.12548828125, -5.576171875, -5.02685546875, -4.4775390625, -3.92822265625, -3.37890625, -2.82958984375, -2.2802734375, -1.73095703125, -1.181640625, -0.63232421875, -0.0830078125, 0.46630859375, 1.015625, 1.56494140625, 2.1142578125, 2.66357421875, 3.212890625, 3.76220703125, 4.3115234375, 4.86083984375, 5.41015625, 5.95947265625, 6.5087890625, 7.05810546875, 7.607421875, 8.15673828125, 8.7060546875, 9.25537109375, 9.8046875, 10.35400390625, 10.9033203125, 11.45263671875, 12.001953125, 12.55126953125, 13.1005859375, 13.64990234375, 14.19921875, 14.74853515625, 15.2978515625, 15.84716796875, 16.396484375, 16.94580078125, 17.4951171875, 18.04443359375, 18.59375]}, "gradients/decoder.transformer.h.3.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 3.0, 0.0, 6.0, 1.0, 5.0, 7.0, 10.0, 7.0, 9.0, 11.0, 22.0, 20.0, 20.0, 25.0, 25.0, 32.0, 39.0, 32.0, 27.0, 47.0, 38.0, 38.0, 44.0, 1079.0, 38.0, 42.0, 45.0, 30.0, 40.0, 38.0, 31.0, 27.0, 45.0, 28.0, 12.0, 21.0, 14.0, 18.0, 14.0, 4.0, 5.0, 11.0, 14.0, 8.0, 2.0, 0.0, 2.0, 3.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0], "bins": [-58.21875, -56.4453125, -54.671875, -52.8984375, -51.125, -49.3515625, -47.578125, -45.8046875, -44.03125, -42.2578125, -40.484375, -38.7109375, -36.9375, -35.1640625, -33.390625, -31.6171875, -29.84375, -28.0703125, -26.296875, -24.5234375, -22.75, -20.9765625, -19.203125, -17.4296875, -15.65625, -13.8828125, -12.109375, -10.3359375, -8.5625, -6.7890625, -5.015625, -3.2421875, -1.46875, 0.3046875, 2.078125, 3.8515625, 5.625, 7.3984375, 9.171875, 10.9453125, 12.71875, 14.4921875, 16.265625, 18.0390625, 19.8125, 21.5859375, 23.359375, 25.1328125, 26.90625, 28.6796875, 30.453125, 32.2265625, 34.0, 35.7734375, 37.546875, 39.3203125, 41.09375, 42.8671875, 44.640625, 46.4140625, 48.1875, 49.9609375, 51.734375, 53.5078125, 55.28125]}, "gradients/decoder.transformer.h.3.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 4.0, 0.0, 3.0, 6.0, 9.0, 12.0, 18.0, 14.0, 22.0, 42.0, 39.0, 62.0, 82.0, 122.0, 160.0, 238.0, 354.0, 492.0, 743.0, 1131.0, 1896.0, 2801.0, 4409.0, 7450.0, 11833.0, 19614.0, 33589.0, 57523.0, 101384.0, 193150.0, 1299189.0, 153821.0, 85347.0, 48671.0, 28431.0, 16917.0, 10226.0, 6048.0, 3910.0, 2419.0, 1635.0, 1065.0, 718.0, 474.0, 323.0, 263.0, 152.0, 105.0, 56.0, 55.0, 29.0, 25.0, 13.0, 14.0, 10.0, 7.0, 9.0, 5.0, 5.0, 1.0, 2.0, 3.0], "bins": [-4.91015625, -4.75836181640625, -4.6065673828125, -4.45477294921875, -4.302978515625, -4.15118408203125, -3.9993896484375, -3.84759521484375, -3.69580078125, -3.54400634765625, -3.3922119140625, -3.24041748046875, -3.088623046875, -2.93682861328125, -2.7850341796875, -2.63323974609375, -2.4814453125, -2.32965087890625, -2.1778564453125, -2.02606201171875, -1.874267578125, -1.72247314453125, -1.5706787109375, -1.41888427734375, -1.26708984375, -1.11529541015625, -0.9635009765625, -0.81170654296875, -0.659912109375, -0.50811767578125, -0.3563232421875, -0.20452880859375, -0.052734375, 0.09906005859375, 0.2508544921875, 0.40264892578125, 0.554443359375, 0.70623779296875, 0.8580322265625, 1.00982666015625, 1.16162109375, 1.31341552734375, 1.4652099609375, 1.61700439453125, 1.768798828125, 1.92059326171875, 2.0723876953125, 2.22418212890625, 2.3759765625, 2.52777099609375, 2.6795654296875, 2.83135986328125, 2.983154296875, 3.13494873046875, 3.2867431640625, 3.43853759765625, 3.59033203125, 3.74212646484375, 3.8939208984375, 4.04571533203125, 4.197509765625, 4.34930419921875, 4.5010986328125, 4.65289306640625, 4.8046875]}, "gradients/decoder.transformer.h.3.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 3.0, 1.0, 5.0, 1.0, 3.0, 3.0, 8.0, 7.0, 8.0, 5.0, 8.0, 9.0, 15.0, 9.0, 19.0, 20.0, 21.0, 34.0, 53.0, 80.0, 97.0, 111.0, 136.0, 93.0, 50.0, 41.0, 27.0, 32.0, 21.0, 12.0, 13.0, 9.0, 7.0, 7.0, 6.0, 7.0, 2.0, 2.0, 8.0, 2.0, 4.0, 2.0, 4.0, 2.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 3.0, 1.0, 0.0, 2.0, 0.0, 1.0], "bins": [-0.2410888671875, -0.23300743103027344, -0.22492599487304688, -0.2168445587158203, -0.20876312255859375, -0.2006816864013672, -0.19260025024414062, -0.18451881408691406, -0.1764373779296875, -0.16835594177246094, -0.16027450561523438, -0.1521930694580078, -0.14411163330078125, -0.1360301971435547, -0.12794876098632812, -0.11986732482910156, -0.111785888671875, -0.10370445251464844, -0.09562301635742188, -0.08754158020019531, -0.07946014404296875, -0.07137870788574219, -0.06329727172851562, -0.05521583557128906, -0.0471343994140625, -0.03905296325683594, -0.030971527099609375, -0.022890090942382812, -0.01480865478515625, -0.0067272186279296875, 0.001354217529296875, 0.009435653686523438, 0.01751708984375, 0.025598526000976562, 0.033679962158203125, 0.04176139831542969, 0.04984283447265625, 0.05792427062988281, 0.06600570678710938, 0.07408714294433594, 0.0821685791015625, 0.09025001525878906, 0.09833145141601562, 0.10641288757324219, 0.11449432373046875, 0.12257575988769531, 0.13065719604492188, 0.13873863220214844, 0.146820068359375, 0.15490150451660156, 0.16298294067382812, 0.1710643768310547, 0.17914581298828125, 0.1872272491455078, 0.19530868530273438, 0.20339012145996094, 0.2114715576171875, 0.21955299377441406, 0.22763442993164062, 0.2357158660888672, 0.24379730224609375, 0.2518787384033203, 0.2599601745605469, 0.26804161071777344, 0.276123046875]}, "gradients/decoder.transformer.h.3.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 2.0, 1.0, 1.0, 0.0, 1.0, 4.0, 2.0, 2.0, 5.0, 2.0, 7.0, 5.0, 2.0, 5.0, 8.0, 13.0, 13.0, 15.0, 16.0, 28.0, 38.0, 52.0, 63.0, 118.0, 232.0, 718.0, 980269.0, 65759.0, 589.0, 212.0, 114.0, 57.0, 52.0, 39.0, 20.0, 15.0, 13.0, 16.0, 9.0, 9.0, 13.0, 8.0, 3.0, 3.0, 4.0, 2.0, 3.0, 1.0, 1.0, 0.0, 2.0, 1.0, 2.0, 0.0, 1.0, 1.0], "bins": [-9.7578125, -9.478515625, -9.19921875, -8.919921875, -8.640625, -8.361328125, -8.08203125, -7.802734375, -7.5234375, -7.244140625, -6.96484375, -6.685546875, -6.40625, -6.126953125, -5.84765625, -5.568359375, -5.2890625, -5.009765625, -4.73046875, -4.451171875, -4.171875, -3.892578125, -3.61328125, -3.333984375, -3.0546875, -2.775390625, -2.49609375, -2.216796875, -1.9375, -1.658203125, -1.37890625, -1.099609375, -0.8203125, -0.541015625, -0.26171875, 0.017578125, 0.296875, 0.576171875, 0.85546875, 1.134765625, 1.4140625, 1.693359375, 1.97265625, 2.251953125, 2.53125, 2.810546875, 3.08984375, 3.369140625, 3.6484375, 3.927734375, 4.20703125, 4.486328125, 4.765625, 5.044921875, 5.32421875, 5.603515625, 5.8828125, 6.162109375, 6.44140625, 6.720703125, 7.0, 7.279296875, 7.55859375, 7.837890625, 8.1171875]}, "gradients/decoder.transformer.h.3.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 942.0, 72.0, 3.0, 1.0], "bins": [-13.160978317260742, -12.94421672821045, -12.727455139160156, -12.51069450378418, -12.293932914733887, -12.077171325683594, -11.8604097366333, -11.643648147583008, -11.426886558532715, -11.210124969482422, -10.993363380432129, -10.776601791381836, -10.55984115600586, -10.343079566955566, -10.126317977905273, -9.90955638885498, -9.692795753479004, -9.476034164428711, -9.259272575378418, -9.042510986328125, -8.825750350952148, -8.608988761901855, -8.392227172851562, -8.17546558380127, -7.958703994750977, -7.741942405700684, -7.525181293487549, -7.308419704437256, -7.091658115386963, -6.874897003173828, -6.658135414123535, -6.441373825073242, -6.224612236022949, -6.007850646972656, -5.7910895347595215, -5.5743279457092285, -5.3575663566589355, -5.140805244445801, -4.924043655395508, -4.707282066345215, -4.49052095413208, -4.273759365081787, -4.056998252868652, -3.8402366638183594, -3.6234753131866455, -3.4067139625549316, -3.1899523735046387, -2.973191022872925, -2.756429672241211, -2.539668321609497, -2.322906732559204, -2.1061453819274902, -1.8893840312957764, -1.672622561454773, -1.4558610916137695, -1.2390997409820557, -1.0223382711410522, -0.8055768609046936, -0.588815450668335, -0.37205398082733154, -0.1552925705909729, 0.06146883964538574, 0.27823030948638916, 0.494991660118103, 0.7117531299591064]}, "gradients/decoder.transformer.h.3.ln_cross_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 2.0, 3.0, 5.0, 4.0, 1.0, 4.0, 4.0, 7.0, 8.0, 9.0, 11.0, 10.0, 9.0, 9.0, 12.0, 15.0, 22.0, 16.0, 24.0, 36.0, 31.0, 27.0, 25.0, 39.0, 45.0, 32.0, 31.0, 38.0, 38.0, 33.0, 31.0, 34.0, 49.0, 35.0, 29.0, 30.0, 29.0, 29.0, 27.0, 15.0, 15.0, 23.0, 23.0, 20.0, 11.0, 13.0, 6.0, 14.0, 6.0, 6.0, 6.0, 5.0, 3.0, 4.0, 2.0, 2.0, 2.0, 0.0, 1.0, 1.0], "bins": [-0.2085365653038025, -0.20219039916992188, -0.19584421813488007, -0.18949805200099945, -0.18315188586711884, -0.17680570483207703, -0.1704595386981964, -0.1641133725643158, -0.15776720643043518, -0.15142104029655457, -0.14507485926151276, -0.13872869312763214, -0.13238252699375153, -0.12603634595870972, -0.1196901798248291, -0.11334401369094849, -0.10699783265590668, -0.10065165907144547, -0.09430549293756485, -0.08795931935310364, -0.08161315321922302, -0.07526697963476181, -0.0689208060503006, -0.06257463991641998, -0.05622846633195877, -0.04988229647278786, -0.04353612661361694, -0.03718995302915573, -0.030843783169984818, -0.024497613310813904, -0.01815143972635269, -0.011805269867181778, -0.005459100008010864, 0.000887070782482624, 0.007233241572976112, 0.013579413294792175, 0.01992558315396309, 0.026271753013134003, 0.032617926597595215, 0.03896409645676613, 0.04531026631593704, 0.051656436175107956, 0.05800260603427887, 0.06434877961874008, 0.0706949532032013, 0.07704111933708191, 0.08338729292154312, 0.08973346650600433, 0.09607963263988495, 0.10242580622434616, 0.10877197235822678, 0.11511814594268799, 0.1214643120765686, 0.12781047821044922, 0.13415665924549103, 0.14050282537937164, 0.14684900641441345, 0.15319517254829407, 0.15954135358333588, 0.1658875197172165, 0.1722336858510971, 0.17857986688613892, 0.18492603302001953, 0.19127219915390015, 0.19761836528778076]}, "gradients/decoder.transformer.h.3.attn.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 1.0, 2.0, 3.0, 2.0, 3.0, 0.0, 2.0, 12.0, 9.0, 10.0, 20.0, 25.0, 19.0, 30.0, 19.0, 21.0, 31.0, 31.0, 38.0, 38.0, 33.0, 45.0, 40.0, 44.0, 47.0, 34.0, 44.0, 50.0, 50.0, 26.0, 33.0, 31.0, 40.0, 29.0, 32.0, 15.0, 25.0, 17.0, 10.0, 18.0, 6.0, 4.0, 11.0, 4.0, 4.0, 3.0, 0.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-95.25, -92.4873046875, -89.724609375, -86.9619140625, -84.19921875, -81.4365234375, -78.673828125, -75.9111328125, -73.1484375, -70.3857421875, -67.623046875, -64.8603515625, -62.09765625, -59.3349609375, -56.572265625, -53.8095703125, -51.046875, -48.2841796875, -45.521484375, -42.7587890625, -39.99609375, -37.2333984375, -34.470703125, -31.7080078125, -28.9453125, -26.1826171875, -23.419921875, -20.6572265625, -17.89453125, -15.1318359375, -12.369140625, -9.6064453125, -6.84375, -4.0810546875, -1.318359375, 1.4443359375, 4.20703125, 6.9697265625, 9.732421875, 12.4951171875, 15.2578125, 18.0205078125, 20.783203125, 23.5458984375, 26.30859375, 29.0712890625, 31.833984375, 34.5966796875, 37.359375, 40.1220703125, 42.884765625, 45.6474609375, 48.41015625, 51.1728515625, 53.935546875, 56.6982421875, 59.4609375, 62.2236328125, 64.986328125, 67.7490234375, 70.51171875, 73.2744140625, 76.037109375, 78.7998046875, 81.5625]}, "gradients/decoder.transformer.h.3.attn.c_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 2.0, 4.0, 1.0, 5.0, 3.0, 5.0, 14.0, 17.0, 16.0, 21.0, 42.0, 61.0, 75.0, 81.0, 144.0, 207.0, 273.0, 447.0, 609.0, 820.0, 1203.0, 1859.0, 2831.0, 4859.0, 8870.0, 18755.0, 45502.0, 136400.0, 461342.0, 239423.0, 69548.0, 26452.0, 12027.0, 6153.0, 3668.0, 2152.0, 1469.0, 925.0, 679.0, 454.0, 344.0, 243.0, 141.0, 128.0, 90.0, 58.0, 38.0, 34.0, 19.0, 17.0, 11.0, 8.0, 10.0, 6.0, 2.0, 0.0, 0.0, 1.0, 3.0, 2.0], "bins": [-131.125, -127.1162109375, -123.107421875, -119.0986328125, -115.08984375, -111.0810546875, -107.072265625, -103.0634765625, -99.0546875, -95.0458984375, -91.037109375, -87.0283203125, -83.01953125, -79.0107421875, -75.001953125, -70.9931640625, -66.984375, -62.9755859375, -58.966796875, -54.9580078125, -50.94921875, -46.9404296875, -42.931640625, -38.9228515625, -34.9140625, -30.9052734375, -26.896484375, -22.8876953125, -18.87890625, -14.8701171875, -10.861328125, -6.8525390625, -2.84375, 1.1650390625, 5.173828125, 9.1826171875, 13.19140625, 17.2001953125, 21.208984375, 25.2177734375, 29.2265625, 33.2353515625, 37.244140625, 41.2529296875, 45.26171875, 49.2705078125, 53.279296875, 57.2880859375, 61.296875, 65.3056640625, 69.314453125, 73.3232421875, 77.33203125, 81.3408203125, 85.349609375, 89.3583984375, 93.3671875, 97.3759765625, 101.384765625, 105.3935546875, 109.40234375, 113.4111328125, 117.419921875, 121.4287109375, 125.4375]}, "gradients/decoder.transformer.h.3.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 2.0, 0.0, 1.0, 3.0, 1.0, 3.0, 8.0, 7.0, 6.0, 9.0, 10.0, 16.0, 19.0, 26.0, 34.0, 31.0, 37.0, 40.0, 66.0, 68.0, 102.0, 231.0, 1567.0, 252.0, 109.0, 65.0, 75.0, 49.0, 43.0, 43.0, 32.0, 25.0, 21.0, 17.0, 4.0, 9.0, 7.0, 9.0, 6.0, 3.0, 3.0, 2.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-261.5, -253.234375, -244.96875, -236.703125, -228.4375, -220.171875, -211.90625, -203.640625, -195.375, -187.109375, -178.84375, -170.578125, -162.3125, -154.046875, -145.78125, -137.515625, -129.25, -120.984375, -112.71875, -104.453125, -96.1875, -87.921875, -79.65625, -71.390625, -63.125, -54.859375, -46.59375, -38.328125, -30.0625, -21.796875, -13.53125, -5.265625, 3.0, 11.265625, 19.53125, 27.796875, 36.0625, 44.328125, 52.59375, 60.859375, 69.125, 77.390625, 85.65625, 93.921875, 102.1875, 110.453125, 118.71875, 126.984375, 135.25, 143.515625, 151.78125, 160.046875, 168.3125, 176.578125, 184.84375, 193.109375, 201.375, 209.640625, 217.90625, 226.171875, 234.4375, 242.703125, 250.96875, 259.234375, 267.5]}, "gradients/decoder.transformer.h.3.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 1.0, 0.0, 2.0, 4.0, 1.0, 2.0, 3.0, 9.0, 8.0, 12.0, 13.0, 17.0, 29.0, 25.0, 47.0, 75.0, 128.0, 208.0, 424.0, 1340.0, 75354.0, 3063374.0, 3221.0, 661.0, 298.0, 152.0, 106.0, 53.0, 40.0, 23.0, 22.0, 7.0, 19.0, 18.0, 5.0, 6.0, 2.0, 2.0, 4.0, 1.0, 1.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1222.0, -1188.25, -1154.5, -1120.75, -1087.0, -1053.25, -1019.5, -985.75, -952.0, -918.25, -884.5, -850.75, -817.0, -783.25, -749.5, -715.75, -682.0, -648.25, -614.5, -580.75, -547.0, -513.25, -479.5, -445.75, -412.0, -378.25, -344.5, -310.75, -277.0, -243.25, -209.5, -175.75, -142.0, -108.25, -74.5, -40.75, -7.0, 26.75, 60.5, 94.25, 128.0, 161.75, 195.5, 229.25, 263.0, 296.75, 330.5, 364.25, 398.0, 431.75, 465.5, 499.25, 533.0, 566.75, 600.5, 634.25, 668.0, 701.75, 735.5, 769.25, 803.0, 836.75, 870.5, 904.25, 938.0]}, "gradients/decoder.transformer.h.3.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 15.0, 506.0, 486.0, 9.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-7437.69970703125, -7272.3701171875, -7107.0400390625, -6941.71044921875, -6776.380859375, -6611.05126953125, -6445.72119140625, -6280.3916015625, -6115.06201171875, -5949.732421875, -5784.40234375, -5619.07275390625, -5453.7431640625, -5288.41357421875, -5123.08349609375, -4957.75390625, -4792.423828125, -4627.09423828125, -4461.76416015625, -4296.4345703125, -4131.10498046875, -3965.775146484375, -3800.4453125, -3635.11572265625, -3469.7861328125, -3304.456298828125, -3139.126708984375, -2973.796875, -2808.46728515625, -2643.137451171875, -2477.8076171875, -2312.47802734375, -2147.1484375, -1981.8187255859375, -1816.489013671875, -1651.1591796875, -1485.82958984375, -1320.499755859375, -1155.1700439453125, -989.84033203125, -824.5106201171875, -659.180908203125, -493.8511657714844, -328.52142333984375, -163.19171142578125, 2.13800048828125, 167.4677734375, 332.7974853515625, 498.127197265625, 663.4569091796875, 828.78662109375, 994.1163940429688, 1159.446044921875, 1324.77587890625, 1490.1055908203125, 1655.435302734375, 1820.7650146484375, 1986.0947265625, 2151.424560546875, 2316.754150390625, 2482.083984375, 2647.41357421875, 2812.743408203125, 2978.0732421875, 3143.40283203125]}, "gradients/decoder.transformer.h.3.ln_1.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 2.0, 2.0, 0.0, 2.0, 2.0, 7.0, 5.0, 11.0, 6.0, 5.0, 11.0, 10.0, 13.0, 16.0, 21.0, 21.0, 24.0, 25.0, 20.0, 25.0, 32.0, 40.0, 35.0, 41.0, 29.0, 38.0, 44.0, 33.0, 50.0, 43.0, 33.0, 29.0, 33.0, 35.0, 36.0, 24.0, 29.0, 29.0, 19.0, 26.0, 17.0, 17.0, 8.0, 14.0, 15.0, 13.0, 5.0, 2.0, 5.0, 5.0, 1.0, 4.0, 2.0, 2.0, 1.0, 0.0, 2.0, 0.0, 1.0, 1.0], "bins": [-535.8380737304688, -517.5827026367188, -499.3272705078125, -481.0718688964844, -462.81646728515625, -444.5610656738281, -426.3056640625, -408.05029296875, -389.79486083984375, -371.5394592285156, -353.2840576171875, -335.0286560058594, -316.77325439453125, -298.5178527832031, -280.262451171875, -262.007080078125, -243.75167846679688, -225.49627685546875, -207.24087524414062, -188.9854736328125, -170.73007202148438, -152.47467041015625, -134.2192840576172, -115.96388244628906, -97.70848083496094, -79.45307922363281, -61.19768142700195, -42.942283630371094, -24.68688201904297, -6.431480407714844, 11.82391357421875, 30.079315185546875, 48.334716796875, 66.59011840820312, 84.84552001953125, 103.10091400146484, 121.35631561279297, 139.61172485351562, 157.8671112060547, 176.1225128173828, 194.37791442871094, 212.63331604003906, 230.8887176513672, 249.14410400390625, 267.3995056152344, 285.6549072265625, 303.9103088378906, 322.16571044921875, 340.4211120605469, 358.676513671875, 376.9319152832031, 395.18731689453125, 413.4427185058594, 431.6981201171875, 449.9534912109375, 468.20892333984375, 486.46429443359375, 504.7196960449219, 522.97509765625, 541.23046875, 559.4859008789062, 577.7412719726562, 595.9967041015625, 614.2520751953125, 632.5075073242188]}, "gradients/decoder.transformer.h.2.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 7.0, 2.0, 2.0, 4.0, 6.0, 7.0, 7.0, 17.0, 11.0, 17.0, 16.0, 22.0, 19.0, 27.0, 30.0, 23.0, 33.0, 38.0, 46.0, 40.0, 42.0, 52.0, 41.0, 42.0, 42.0, 42.0, 39.0, 45.0, 38.0, 36.0, 33.0, 33.0, 29.0, 31.0, 20.0, 13.0, 10.0, 9.0, 2.0, 11.0, 9.0, 6.0, 4.0, 3.0, 4.0, 5.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-105.9375, -102.9794921875, -100.021484375, -97.0634765625, -94.10546875, -91.1474609375, -88.189453125, -85.2314453125, -82.2734375, -79.3154296875, -76.357421875, -73.3994140625, -70.44140625, -67.4833984375, -64.525390625, -61.5673828125, -58.609375, -55.6513671875, -52.693359375, -49.7353515625, -46.77734375, -43.8193359375, -40.861328125, -37.9033203125, -34.9453125, -31.9873046875, -29.029296875, -26.0712890625, -23.11328125, -20.1552734375, -17.197265625, -14.2392578125, -11.28125, -8.3232421875, -5.365234375, -2.4072265625, 0.55078125, 3.5087890625, 6.466796875, 9.4248046875, 12.3828125, 15.3408203125, 18.298828125, 21.2568359375, 24.21484375, 27.1728515625, 30.130859375, 33.0888671875, 36.046875, 39.0048828125, 41.962890625, 44.9208984375, 47.87890625, 50.8369140625, 53.794921875, 56.7529296875, 59.7109375, 62.6689453125, 65.626953125, 68.5849609375, 71.54296875, 74.5009765625, 77.458984375, 80.4169921875, 83.375]}, "gradients/decoder.transformer.h.2.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 3.0, 1.0, 3.0, 5.0, 8.0, 6.0, 5.0, 18.0, 19.0, 25.0, 33.0, 35.0, 49.0, 65.0, 83.0, 111.0, 142.0, 214.0, 295.0, 385.0, 570.0, 843.0, 1390.0, 2484.0, 4912.0, 11313.0, 32341.0, 253419.0, 3061812.0, 737988.0, 56047.0, 15364.0, 6402.0, 3013.0, 1652.0, 951.0, 653.0, 444.0, 292.0, 225.0, 156.0, 117.0, 100.0, 65.0, 51.0, 48.0, 38.0, 27.0, 14.0, 16.0, 9.0, 12.0, 6.0, 9.0, 3.0, 2.0, 4.0, 3.0, 0.0, 0.0, 0.0, 2.0], "bins": [-225.375, -218.02734375, -210.6796875, -203.33203125, -195.984375, -188.63671875, -181.2890625, -173.94140625, -166.59375, -159.24609375, -151.8984375, -144.55078125, -137.203125, -129.85546875, -122.5078125, -115.16015625, -107.8125, -100.46484375, -93.1171875, -85.76953125, -78.421875, -71.07421875, -63.7265625, -56.37890625, -49.03125, -41.68359375, -34.3359375, -26.98828125, -19.640625, -12.29296875, -4.9453125, 2.40234375, 9.75, 17.09765625, 24.4453125, 31.79296875, 39.140625, 46.48828125, 53.8359375, 61.18359375, 68.53125, 75.87890625, 83.2265625, 90.57421875, 97.921875, 105.26953125, 112.6171875, 119.96484375, 127.3125, 134.66015625, 142.0078125, 149.35546875, 156.703125, 164.05078125, 171.3984375, 178.74609375, 186.09375, 193.44140625, 200.7890625, 208.13671875, 215.484375, 222.83203125, 230.1796875, 237.52734375, 244.875]}, "gradients/decoder.transformer.h.2.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 2.0, 1.0, 2.0, 1.0, 5.0, 9.0, 8.0, 8.0, 12.0, 8.0, 15.0, 29.0, 45.0, 32.0, 83.0, 75.0, 133.0, 200.0, 272.0, 513.0, 833.0, 719.0, 367.0, 209.0, 146.0, 87.0, 72.0, 52.0, 31.0, 18.0, 27.0, 18.0, 18.0, 12.0, 8.0, 5.0, 3.0, 2.0, 4.0, 2.0, 0.0, 2.0], "bins": [-248.125, -242.3203125, -236.515625, -230.7109375, -224.90625, -219.1015625, -213.296875, -207.4921875, -201.6875, -195.8828125, -190.078125, -184.2734375, -178.46875, -172.6640625, -166.859375, -161.0546875, -155.25, -149.4453125, -143.640625, -137.8359375, -132.03125, -126.2265625, -120.421875, -114.6171875, -108.8125, -103.0078125, -97.203125, -91.3984375, -85.59375, -79.7890625, -73.984375, -68.1796875, -62.375, -56.5703125, -50.765625, -44.9609375, -39.15625, -33.3515625, -27.546875, -21.7421875, -15.9375, -10.1328125, -4.328125, 1.4765625, 7.28125, 13.0859375, 18.890625, 24.6953125, 30.5, 36.3046875, 42.109375, 47.9140625, 53.71875, 59.5234375, 65.328125, 71.1328125, 76.9375, 82.7421875, 88.546875, 94.3515625, 100.15625, 105.9609375, 111.765625, 117.5703125, 123.375]}, "gradients/decoder.transformer.h.2.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 3.0, 6.0, 8.0, 11.0, 13.0, 31.0, 37.0, 41.0, 66.0, 95.0, 177.0, 259.0, 487.0, 1051.0, 2492.0, 7180.0, 26244.0, 147579.0, 3316645.0, 609627.0, 60918.0, 13768.0, 4241.0, 1632.0, 705.0, 355.0, 217.0, 141.0, 82.0, 71.0, 29.0, 18.0, 26.0, 14.0, 8.0, 7.0, 4.0, 3.0, 4.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-251.25, -241.10546875, -230.9609375, -220.81640625, -210.671875, -200.52734375, -190.3828125, -180.23828125, -170.09375, -159.94921875, -149.8046875, -139.66015625, -129.515625, -119.37109375, -109.2265625, -99.08203125, -88.9375, -78.79296875, -68.6484375, -58.50390625, -48.359375, -38.21484375, -28.0703125, -17.92578125, -7.78125, 2.36328125, 12.5078125, 22.65234375, 32.796875, 42.94140625, 53.0859375, 63.23046875, 73.375, 83.51953125, 93.6640625, 103.80859375, 113.953125, 124.09765625, 134.2421875, 144.38671875, 154.53125, 164.67578125, 174.8203125, 184.96484375, 195.109375, 205.25390625, 215.3984375, 225.54296875, 235.6875, 245.83203125, 255.9765625, 266.12109375, 276.265625, 286.41015625, 296.5546875, 306.69921875, 316.84375, 326.98828125, 337.1328125, 347.27734375, 357.421875, 367.56640625, 377.7109375, 387.85546875, 398.0]}, "gradients/decoder.transformer.h.2.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 23.0, 949.0, 40.0, 2.0, 2.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-15112.060546875, -14787.337890625, -14462.6162109375, -14137.8935546875, -13813.1708984375, -13488.44921875, -13163.7265625, -12839.00390625, -12514.28125, -12189.55859375, -11864.8369140625, -11540.1142578125, -11215.3916015625, -10890.669921875, -10565.947265625, -10241.224609375, -9916.5029296875, -9591.7802734375, -9267.05859375, -8942.3359375, -8617.61328125, -8292.890625, -7968.1689453125, -7643.4462890625, -7318.72412109375, -6994.001953125, -6669.279296875, -6344.55712890625, -6019.8349609375, -5695.1123046875, -5370.39013671875, -5045.66796875, -4720.9462890625, -4396.22412109375, -4071.50146484375, -3746.779296875, -3422.056884765625, -3097.33447265625, -2772.6123046875, -2447.889892578125, -2123.16748046875, -1798.445068359375, -1473.7227783203125, -1149.00048828125, -824.278076171875, -499.5556640625, -174.8333740234375, 149.888916015625, 474.611328125, 799.3336791992188, 1124.0560302734375, 1448.7783203125, 1773.500732421875, 2098.22314453125, 2422.9453125, 2747.667724609375, 3072.39013671875, 3397.112548828125, 3721.8349609375, 4046.55712890625, 4371.279296875, 4696.001953125, 5020.72412109375, 5345.4462890625, 5670.1689453125]}, "gradients/decoder.transformer.h.2.ln_2.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 3.0, 3.0, 8.0, 9.0, 15.0, 11.0, 16.0, 20.0, 19.0, 29.0, 34.0, 34.0, 42.0, 44.0, 48.0, 55.0, 51.0, 45.0, 47.0, 57.0, 52.0, 58.0, 46.0, 45.0, 41.0, 31.0, 21.0, 35.0, 22.0, 16.0, 12.0, 8.0, 6.0, 8.0, 10.0, 4.0, 0.0, 0.0, 2.0, 2.0, 3.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-706.4814453125, -682.6838989257812, -658.8864135742188, -635.0888671875, -611.2913208007812, -587.4937744140625, -563.6962890625, -539.8987426757812, -516.1011962890625, -492.3036804199219, -468.5061340332031, -444.7086181640625, -420.91107177734375, -397.1135559082031, -373.3160400390625, -349.51849365234375, -325.7209777832031, -301.9234619140625, -278.12591552734375, -254.32839965820312, -230.53085327148438, -206.73333740234375, -182.93580627441406, -159.13827514648438, -135.3407440185547, -111.543212890625, -87.74568176269531, -63.948158264160156, -40.15062713623047, -16.35309600830078, 7.444427490234375, 31.241958618164062, 55.03948974609375, 78.83702087402344, 102.63455200195312, 126.43207550048828, 150.2296142578125, 174.02713012695312, 197.8246612548828, 221.6221923828125, 245.4197235107422, 269.2172546386719, 293.0147705078125, 316.81231689453125, 340.6098327636719, 364.4073791503906, 388.20489501953125, 412.00244140625, 435.7999572753906, 459.59747314453125, 483.39501953125, 507.1925354003906, 530.9900512695312, 554.78759765625, 578.5851440429688, 602.3826904296875, 626.18017578125, 649.9777221679688, 673.7752075195312, 697.57275390625, 721.3703002929688, 745.1678466796875, 768.96533203125, 792.7628784179688, 816.5604248046875]}, "gradients/decoder.transformer.h.2.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0, 2.0, 3.0, 3.0, 8.0, 6.0, 5.0, 15.0, 9.0, 17.0, 14.0, 16.0, 24.0, 25.0, 25.0, 32.0, 32.0, 28.0, 30.0, 36.0, 30.0, 39.0, 39.0, 57.0, 51.0, 35.0, 38.0, 39.0, 31.0, 42.0, 36.0, 37.0, 38.0, 23.0, 26.0, 16.0, 23.0, 20.0, 14.0, 11.0, 15.0, 9.0, 4.0, 2.0, 4.0, 2.0, 1.0, 0.0, 2.0, 0.0, 3.0], "bins": [-82.625, -80.39697265625, -78.1689453125, -75.94091796875, -73.712890625, -71.48486328125, -69.2568359375, -67.02880859375, -64.80078125, -62.57275390625, -60.3447265625, -58.11669921875, -55.888671875, -53.66064453125, -51.4326171875, -49.20458984375, -46.9765625, -44.74853515625, -42.5205078125, -40.29248046875, -38.064453125, -35.83642578125, -33.6083984375, -31.38037109375, -29.15234375, -26.92431640625, -24.6962890625, -22.46826171875, -20.240234375, -18.01220703125, -15.7841796875, -13.55615234375, -11.328125, -9.10009765625, -6.8720703125, -4.64404296875, -2.416015625, -0.18798828125, 2.0400390625, 4.26806640625, 6.49609375, 8.72412109375, 10.9521484375, 13.18017578125, 15.408203125, 17.63623046875, 19.8642578125, 22.09228515625, 24.3203125, 26.54833984375, 28.7763671875, 31.00439453125, 33.232421875, 35.46044921875, 37.6884765625, 39.91650390625, 42.14453125, 44.37255859375, 46.6005859375, 48.82861328125, 51.056640625, 53.28466796875, 55.5126953125, 57.74072265625, 59.96875]}, "gradients/decoder.transformer.h.2.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 3.0, 7.0, 7.0, 9.0, 7.0, 11.0, 13.0, 26.0, 36.0, 63.0, 84.0, 152.0, 211.0, 338.0, 604.0, 1002.0, 1877.0, 3580.0, 7079.0, 14327.0, 31103.0, 70644.0, 171128.0, 402917.0, 197137.0, 79163.0, 34501.0, 15822.0, 7877.0, 3993.0, 2046.0, 1131.0, 653.0, 343.0, 243.0, 122.0, 105.0, 59.0, 45.0, 28.0, 25.0, 12.0, 13.0, 8.0, 4.0, 4.0, 0.0, 3.0, 1.0, 3.0, 1.0, 0.0, 0.0, 1.0], "bins": [-11.109375, -10.7784423828125, -10.447509765625, -10.1165771484375, -9.78564453125, -9.4547119140625, -9.123779296875, -8.7928466796875, -8.4619140625, -8.1309814453125, -7.800048828125, -7.4691162109375, -7.13818359375, -6.8072509765625, -6.476318359375, -6.1453857421875, -5.814453125, -5.4835205078125, -5.152587890625, -4.8216552734375, -4.49072265625, -4.1597900390625, -3.828857421875, -3.4979248046875, -3.1669921875, -2.8360595703125, -2.505126953125, -2.1741943359375, -1.84326171875, -1.5123291015625, -1.181396484375, -0.8504638671875, -0.51953125, -0.1885986328125, 0.142333984375, 0.4732666015625, 0.80419921875, 1.1351318359375, 1.466064453125, 1.7969970703125, 2.1279296875, 2.4588623046875, 2.789794921875, 3.1207275390625, 3.45166015625, 3.7825927734375, 4.113525390625, 4.4444580078125, 4.775390625, 5.1063232421875, 5.437255859375, 5.7681884765625, 6.09912109375, 6.4300537109375, 6.760986328125, 7.0919189453125, 7.4228515625, 7.7537841796875, 8.084716796875, 8.4156494140625, 8.74658203125, 9.0775146484375, 9.408447265625, 9.7393798828125, 10.0703125]}, "gradients/decoder.transformer.h.2.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 3.0, 6.0, 1.0, 3.0, 7.0, 3.0, 6.0, 11.0, 17.0, 11.0, 9.0, 21.0, 23.0, 28.0, 33.0, 33.0, 36.0, 28.0, 42.0, 35.0, 49.0, 45.0, 1075.0, 44.0, 39.0, 40.0, 37.0, 37.0, 42.0, 37.0, 41.0, 33.0, 26.0, 21.0, 24.0, 14.0, 26.0, 14.0, 12.0, 10.0, 5.0, 6.0, 2.0, 2.0, 0.0, 2.0, 1.0, 0.0, 1.0, 1.0], "bins": [-56.4375, -54.90234375, -53.3671875, -51.83203125, -50.296875, -48.76171875, -47.2265625, -45.69140625, -44.15625, -42.62109375, -41.0859375, -39.55078125, -38.015625, -36.48046875, -34.9453125, -33.41015625, -31.875, -30.33984375, -28.8046875, -27.26953125, -25.734375, -24.19921875, -22.6640625, -21.12890625, -19.59375, -18.05859375, -16.5234375, -14.98828125, -13.453125, -11.91796875, -10.3828125, -8.84765625, -7.3125, -5.77734375, -4.2421875, -2.70703125, -1.171875, 0.36328125, 1.8984375, 3.43359375, 4.96875, 6.50390625, 8.0390625, 9.57421875, 11.109375, 12.64453125, 14.1796875, 15.71484375, 17.25, 18.78515625, 20.3203125, 21.85546875, 23.390625, 24.92578125, 26.4609375, 27.99609375, 29.53125, 31.06640625, 32.6015625, 34.13671875, 35.671875, 37.20703125, 38.7421875, 40.27734375, 41.8125]}, "gradients/decoder.transformer.h.2.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 3.0, 1.0, 3.0, 5.0, 8.0, 4.0, 24.0, 24.0, 31.0, 46.0, 83.0, 122.0, 191.0, 266.0, 444.0, 710.0, 1137.0, 1928.0, 3276.0, 5739.0, 9898.0, 17266.0, 30489.0, 56623.0, 109590.0, 264809.0, 1299797.0, 138404.0, 70135.0, 37591.0, 20596.0, 11440.0, 6646.0, 3804.0, 2267.0, 1362.0, 900.0, 556.0, 317.0, 196.0, 141.0, 95.0, 54.0, 44.0, 24.0, 13.0, 15.0, 10.0, 5.0, 5.0, 6.0, 1.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0], "bins": [-4.22265625, -4.087158203125, -3.95166015625, -3.816162109375, -3.6806640625, -3.545166015625, -3.40966796875, -3.274169921875, -3.138671875, -3.003173828125, -2.86767578125, -2.732177734375, -2.5966796875, -2.461181640625, -2.32568359375, -2.190185546875, -2.0546875, -1.919189453125, -1.78369140625, -1.648193359375, -1.5126953125, -1.377197265625, -1.24169921875, -1.106201171875, -0.970703125, -0.835205078125, -0.69970703125, -0.564208984375, -0.4287109375, -0.293212890625, -0.15771484375, -0.022216796875, 0.11328125, 0.248779296875, 0.38427734375, 0.519775390625, 0.6552734375, 0.790771484375, 0.92626953125, 1.061767578125, 1.197265625, 1.332763671875, 1.46826171875, 1.603759765625, 1.7392578125, 1.874755859375, 2.01025390625, 2.145751953125, 2.28125, 2.416748046875, 2.55224609375, 2.687744140625, 2.8232421875, 2.958740234375, 3.09423828125, 3.229736328125, 3.365234375, 3.500732421875, 3.63623046875, 3.771728515625, 3.9072265625, 4.042724609375, 4.17822265625, 4.313720703125, 4.44921875]}, "gradients/decoder.transformer.h.2.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 2.0, 2.0, 1.0, 4.0, 3.0, 6.0, 9.0, 11.0, 17.0, 17.0, 14.0, 23.0, 27.0, 39.0, 64.0, 89.0, 115.0, 187.0, 100.0, 75.0, 49.0, 31.0, 35.0, 19.0, 11.0, 20.0, 7.0, 16.0, 8.0, 6.0, 5.0, 1.0, 0.0, 0.0, 0.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.3916015625, -0.37979888916015625, -0.3679962158203125, -0.35619354248046875, -0.344390869140625, -0.33258819580078125, -0.3207855224609375, -0.30898284912109375, -0.29718017578125, -0.28537750244140625, -0.2735748291015625, -0.26177215576171875, -0.249969482421875, -0.23816680908203125, -0.2263641357421875, -0.21456146240234375, -0.2027587890625, -0.19095611572265625, -0.1791534423828125, -0.16735076904296875, -0.155548095703125, -0.14374542236328125, -0.1319427490234375, -0.12014007568359375, -0.10833740234375, -0.09653472900390625, -0.0847320556640625, -0.07292938232421875, -0.061126708984375, -0.04932403564453125, -0.0375213623046875, -0.02571868896484375, -0.013916015625, -0.00211334228515625, 0.0096893310546875, 0.02149200439453125, 0.033294677734375, 0.04509735107421875, 0.0569000244140625, 0.06870269775390625, 0.08050537109375, 0.09230804443359375, 0.1041107177734375, 0.11591339111328125, 0.127716064453125, 0.13951873779296875, 0.1513214111328125, 0.16312408447265625, 0.1749267578125, 0.18672943115234375, 0.1985321044921875, 0.21033477783203125, 0.222137451171875, 0.23394012451171875, 0.2457427978515625, 0.25754547119140625, 0.26934814453125, 0.28115081787109375, 0.2929534912109375, 0.30475616455078125, 0.316558837890625, 0.32836151123046875, 0.3401641845703125, 0.35196685791015625, 0.36376953125]}, "gradients/decoder.transformer.h.2.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 0.0, 2.0, 2.0, 0.0, 1.0, 7.0, 1.0, 10.0, 8.0, 11.0, 13.0, 18.0, 17.0, 39.0, 41.0, 47.0, 83.0, 128.0, 242.0, 691.0, 1043321.0, 2965.0, 392.0, 183.0, 100.0, 52.0, 46.0, 32.0, 29.0, 18.0, 17.0, 11.0, 8.0, 9.0, 5.0, 7.0, 2.0, 2.0, 4.0, 1.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-7.95703125, -7.69854736328125, -7.4400634765625, -7.18157958984375, -6.923095703125, -6.66461181640625, -6.4061279296875, -6.14764404296875, -5.88916015625, -5.63067626953125, -5.3721923828125, -5.11370849609375, -4.855224609375, -4.59674072265625, -4.3382568359375, -4.07977294921875, -3.8212890625, -3.56280517578125, -3.3043212890625, -3.04583740234375, -2.787353515625, -2.52886962890625, -2.2703857421875, -2.01190185546875, -1.75341796875, -1.49493408203125, -1.2364501953125, -0.97796630859375, -0.719482421875, -0.46099853515625, -0.2025146484375, 0.05596923828125, 0.314453125, 0.57293701171875, 0.8314208984375, 1.08990478515625, 1.348388671875, 1.60687255859375, 1.8653564453125, 2.12384033203125, 2.38232421875, 2.64080810546875, 2.8992919921875, 3.15777587890625, 3.416259765625, 3.67474365234375, 3.9332275390625, 4.19171142578125, 4.4501953125, 4.70867919921875, 4.9671630859375, 5.22564697265625, 5.484130859375, 5.74261474609375, 6.0010986328125, 6.25958251953125, 6.51806640625, 6.77655029296875, 7.0350341796875, 7.29351806640625, 7.552001953125, 7.81048583984375, 8.0689697265625, 8.32745361328125, 8.5859375]}, "gradients/decoder.transformer.h.2.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 3.0, 918.0, 96.0, 2.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.7686578035354614, -0.6809415817260742, -0.5932254195213318, -0.5055092573165894, -0.41779303550720215, -0.33007684350013733, -0.2423606514930725, -0.15464448928833008, -0.06692826747894287, 0.020787924528121948, 0.10850411653518677, 0.1962203085422516, 0.2839365005493164, 0.3716526925563812, 0.45936888456344604, 0.5470850467681885, 0.6348012685775757, 0.7225174903869629, 0.8102336525917053, 0.8979498147964478, 0.985666036605835, 1.0733822584152222, 1.1610984802246094, 1.248814582824707, 1.3365308046340942, 1.4242470264434814, 1.511963129043579, 1.5996793508529663, 1.6873955726623535, 1.7751117944717407, 1.862828016281128, 1.9505441188812256, 2.0382604598999023, 2.1259765625, 2.2136929035186768, 2.3014090061187744, 2.389125347137451, 2.476841449737549, 2.5645575523376465, 2.652273654937744, 2.739989995956421, 2.8277060985565186, 2.9154224395751953, 3.003138542175293, 3.0908546447753906, 3.1785709857940674, 3.266287088394165, 3.354003429412842, 3.4417195320129395, 3.529435634613037, 3.617151975631714, 3.7048680782318115, 3.7925844192504883, 3.880300521850586, 3.9680166244506836, 4.055732727050781, 4.143448829650879, 4.231164932250977, 4.318881034851074, 4.40659761428833, 4.494313716888428, 4.582029819488525, 4.669745922088623, 4.757462024688721, 4.845178604125977]}, "gradients/decoder.transformer.h.2.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 3.0, 6.0, 2.0, 6.0, 4.0, 5.0, 5.0, 9.0, 8.0, 14.0, 13.0, 13.0, 9.0, 17.0, 19.0, 17.0, 24.0, 23.0, 29.0, 38.0, 33.0, 34.0, 38.0, 23.0, 39.0, 32.0, 47.0, 33.0, 40.0, 41.0, 39.0, 32.0, 30.0, 34.0, 37.0, 36.0, 17.0, 24.0, 23.0, 23.0, 22.0, 14.0, 16.0, 8.0, 10.0, 6.0, 8.0, 3.0, 3.0, 2.0, 0.0, 1.0, 4.0, 3.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.2980431914329529, -0.2886809706687927, -0.2793187201023102, -0.26995649933815, -0.2605942487716675, -0.2512320280075073, -0.24186980724334717, -0.23250757157802582, -0.22314533591270447, -0.21378310024738312, -0.20442086458206177, -0.1950586438179016, -0.18569640815258026, -0.1763341724872589, -0.16697195172309875, -0.1576097160577774, -0.14824748039245605, -0.1388852447271347, -0.12952300906181335, -0.1201607882976532, -0.11079855263233185, -0.1014363169670105, -0.09207408875226974, -0.08271186053752899, -0.07334962487220764, -0.06398738920688629, -0.05462516099214554, -0.04526292905211449, -0.035900697112083435, -0.026538465172052383, -0.017176233232021332, -0.007814005017280579, 0.0015482306480407715, 0.010910462588071823, 0.020272694528102875, 0.029634926468133926, 0.03899715840816498, 0.04835939034819603, 0.05772162228822708, 0.06708385050296783, 0.07644608616828918, 0.08580832183361053, 0.09517055004835129, 0.10453277826309204, 0.11389501392841339, 0.12325724959373474, 0.1326194703578949, 0.14198170602321625, 0.1513439416885376, 0.16070617735385895, 0.1700684130191803, 0.17943063378334045, 0.1887928694486618, 0.19815510511398315, 0.2075173258781433, 0.21687956154346466, 0.226241797208786, 0.23560403287410736, 0.2449662685394287, 0.25432848930358887, 0.263690710067749, 0.27305296063423157, 0.2824151813983917, 0.29177743196487427, 0.3011396527290344]}, "gradients/decoder.transformer.h.2.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0, 2.0, 3.0, 3.0, 8.0, 6.0, 5.0, 14.0, 10.0, 17.0, 13.0, 17.0, 23.0, 25.0, 26.0, 32.0, 31.0, 28.0, 30.0, 37.0, 30.0, 38.0, 40.0, 56.0, 51.0, 36.0, 37.0, 39.0, 32.0, 41.0, 36.0, 38.0, 37.0, 24.0, 26.0, 16.0, 22.0, 21.0, 14.0, 11.0, 15.0, 9.0, 4.0, 2.0, 4.0, 2.0, 1.0, 0.0, 2.0, 0.0, 3.0], "bins": [-82.6875, -80.45849609375, -78.2294921875, -76.00048828125, -73.771484375, -71.54248046875, -69.3134765625, -67.08447265625, -64.85546875, -62.62646484375, -60.3974609375, -58.16845703125, -55.939453125, -53.71044921875, -51.4814453125, -49.25244140625, -47.0234375, -44.79443359375, -42.5654296875, -40.33642578125, -38.107421875, -35.87841796875, -33.6494140625, -31.42041015625, -29.19140625, -26.96240234375, -24.7333984375, -22.50439453125, -20.275390625, -18.04638671875, -15.8173828125, -13.58837890625, -11.359375, -9.13037109375, -6.9013671875, -4.67236328125, -2.443359375, -0.21435546875, 2.0146484375, 4.24365234375, 6.47265625, 8.70166015625, 10.9306640625, 13.15966796875, 15.388671875, 17.61767578125, 19.8466796875, 22.07568359375, 24.3046875, 26.53369140625, 28.7626953125, 30.99169921875, 33.220703125, 35.44970703125, 37.6787109375, 39.90771484375, 42.13671875, 44.36572265625, 46.5947265625, 48.82373046875, 51.052734375, 53.28173828125, 55.5107421875, 57.73974609375, 59.96875]}, "gradients/decoder.transformer.h.2.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 3.0, 1.0, 0.0, 8.0, 5.0, 8.0, 14.0, 14.0, 26.0, 40.0, 65.0, 73.0, 116.0, 181.0, 282.0, 443.0, 678.0, 1058.0, 1631.0, 2831.0, 5212.0, 10155.0, 25920.0, 103156.0, 636477.0, 193538.0, 38183.0, 13455.0, 6196.0, 3331.0, 2046.0, 1233.0, 743.0, 496.0, 320.0, 205.0, 145.0, 75.0, 72.0, 39.0, 26.0, 22.0, 13.0, 7.0, 7.0, 9.0, 4.0, 1.0, 3.0, 2.0, 2.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-150.625, -145.546875, -140.46875, -135.390625, -130.3125, -125.234375, -120.15625, -115.078125, -110.0, -104.921875, -99.84375, -94.765625, -89.6875, -84.609375, -79.53125, -74.453125, -69.375, -64.296875, -59.21875, -54.140625, -49.0625, -43.984375, -38.90625, -33.828125, -28.75, -23.671875, -18.59375, -13.515625, -8.4375, -3.359375, 1.71875, 6.796875, 11.875, 16.953125, 22.03125, 27.109375, 32.1875, 37.265625, 42.34375, 47.421875, 52.5, 57.578125, 62.65625, 67.734375, 72.8125, 77.890625, 82.96875, 88.046875, 93.125, 98.203125, 103.28125, 108.359375, 113.4375, 118.515625, 123.59375, 128.671875, 133.75, 138.828125, 143.90625, 148.984375, 154.0625, 159.140625, 164.21875, 169.296875, 174.375]}, "gradients/decoder.transformer.h.2.attn.c_attn.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 2.0, 0.0, 1.0, 1.0, 1.0, 0.0, 4.0, 1.0, 5.0, 7.0, 4.0, 8.0, 5.0, 6.0, 15.0, 10.0, 16.0, 22.0, 26.0, 28.0, 37.0, 32.0, 36.0, 59.0, 55.0, 84.0, 126.0, 231.0, 1493.0, 230.0, 99.0, 64.0, 64.0, 43.0, 25.0, 32.0, 32.0, 35.0, 26.0, 17.0, 16.0, 9.0, 10.0, 11.0, 8.0, 6.0, 8.0, 1.0, 1.0, 3.0, 3.0, 0.0, 0.0, 2.0, 2.0, 4.0, 1.0, 0.0, 0.0, 0.0, 2.0], "bins": [-151.875, -147.041015625, -142.20703125, -137.373046875, -132.5390625, -127.705078125, -122.87109375, -118.037109375, -113.203125, -108.369140625, -103.53515625, -98.701171875, -93.8671875, -89.033203125, -84.19921875, -79.365234375, -74.53125, -69.697265625, -64.86328125, -60.029296875, -55.1953125, -50.361328125, -45.52734375, -40.693359375, -35.859375, -31.025390625, -26.19140625, -21.357421875, -16.5234375, -11.689453125, -6.85546875, -2.021484375, 2.8125, 7.646484375, 12.48046875, 17.314453125, 22.1484375, 26.982421875, 31.81640625, 36.650390625, 41.484375, 46.318359375, 51.15234375, 55.986328125, 60.8203125, 65.654296875, 70.48828125, 75.322265625, 80.15625, 84.990234375, 89.82421875, 94.658203125, 99.4921875, 104.326171875, 109.16015625, 113.994140625, 118.828125, 123.662109375, 128.49609375, 133.330078125, 138.1640625, 142.998046875, 147.83203125, 152.666015625, 157.5]}, "gradients/decoder.transformer.h.2.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 1.0, 6.0, 10.0, 6.0, 13.0, 12.0, 17.0, 12.0, 26.0, 27.0, 47.0, 89.0, 132.0, 207.0, 381.0, 819.0, 3999.0, 2874594.0, 261210.0, 2529.0, 662.0, 328.0, 193.0, 122.0, 80.0, 50.0, 31.0, 24.0, 23.0, 13.0, 13.0, 12.0, 6.0, 5.0, 2.0, 4.0, 2.0, 0.0, 2.0, 1.0, 3.0, 2.0, 2.0, 1.0, 0.0, 1.0], "bins": [-688.5, -669.3203125, -650.140625, -630.9609375, -611.78125, -592.6015625, -573.421875, -554.2421875, -535.0625, -515.8828125, -496.703125, -477.5234375, -458.34375, -439.1640625, -419.984375, -400.8046875, -381.625, -362.4453125, -343.265625, -324.0859375, -304.90625, -285.7265625, -266.546875, -247.3671875, -228.1875, -209.0078125, -189.828125, -170.6484375, -151.46875, -132.2890625, -113.109375, -93.9296875, -74.75, -55.5703125, -36.390625, -17.2109375, 1.96875, 21.1484375, 40.328125, 59.5078125, 78.6875, 97.8671875, 117.046875, 136.2265625, 155.40625, 174.5859375, 193.765625, 212.9453125, 232.125, 251.3046875, 270.484375, 289.6640625, 308.84375, 328.0234375, 347.203125, 366.3828125, 385.5625, 404.7421875, 423.921875, 443.1015625, 462.28125, 481.4609375, 500.640625, 519.8203125, 539.0]}, "gradients/decoder.transformer.h.2.ln_1.weight": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 0.0, 2.0, 4.0, 8.0, 13.0, 37.0, 64.0, 123.0, 189.0, 198.0, 163.0, 113.0, 55.0, 24.0, 13.0, 5.0, 4.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-305.8585510253906, -281.7736511230469, -257.6887512207031, -233.60385131835938, -209.51895141601562, -185.43405151367188, -161.34913635253906, -137.2642364501953, -113.17933654785156, -89.09443664550781, -65.00953674316406, -40.92462921142578, -16.83972930908203, 7.245170593261719, 31.330078125, 55.41497802734375, 79.4998779296875, 103.58477783203125, 127.669677734375, 151.75457763671875, 175.8394775390625, 199.92437744140625, 224.00929260253906, 248.0941925048828, 272.1790771484375, 296.26397705078125, 320.348876953125, 344.43377685546875, 368.5186767578125, 392.60357666015625, 416.6884765625, 440.77337646484375, 464.85833740234375, 488.9432373046875, 513.0281372070312, 537.113037109375, 561.1979370117188, 585.2828369140625, 609.3677368164062, 633.45263671875, 657.5375366210938, 681.6224365234375, 705.7073364257812, 729.792236328125, 753.8771362304688, 777.9620361328125, 802.0469360351562, 826.1318359375, 850.216796875, 874.3016967773438, 898.3865966796875, 922.4714965820312, 946.556396484375, 970.6412963867188, 994.7261962890625, 1018.8110961914062, 1042.89599609375, 1066.98095703125, 1091.0657958984375, 1115.1507568359375, 1139.235595703125, 1163.320556640625, 1187.4053955078125, 1211.4903564453125, 1235.5751953125]}, "gradients/decoder.transformer.h.2.ln_1.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 1.0, 2.0, 3.0, 2.0, 2.0, 7.0, 8.0, 5.0, 8.0, 6.0, 11.0, 8.0, 14.0, 13.0, 12.0, 18.0, 24.0, 24.0, 25.0, 21.0, 29.0, 31.0, 27.0, 29.0, 26.0, 35.0, 35.0, 28.0, 33.0, 37.0, 37.0, 31.0, 40.0, 41.0, 33.0, 22.0, 30.0, 30.0, 33.0, 29.0, 24.0, 20.0, 15.0, 14.0, 10.0, 18.0, 5.0, 12.0, 11.0, 11.0, 4.0, 10.0, 6.0, 3.0, 2.0, 3.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-398.1130065917969, -385.73126220703125, -373.3495178222656, -360.9677734375, -348.5860290527344, -336.20428466796875, -323.82257080078125, -311.4407958984375, -299.05908203125, -286.6773376464844, -274.29559326171875, -261.9138488769531, -249.5321044921875, -237.15036010742188, -224.7686309814453, -212.3868865966797, -200.005126953125, -187.62338256835938, -175.24163818359375, -162.85989379882812, -150.4781494140625, -138.09640502929688, -125.71467590332031, -113.33293151855469, -100.95118713378906, -88.56944274902344, -76.18769836425781, -63.80596160888672, -51.424217224121094, -39.04247283935547, -26.660736083984375, -14.27899169921875, -1.897216796875, 10.484525680541992, 22.866268157958984, 35.248008728027344, 47.62975311279297, 60.011497497558594, 72.39323425292969, 84.77497863769531, 97.15672302246094, 109.53846740722656, 121.92021179199219, 134.30194091796875, 146.68368530273438, 159.0654296875, 171.44717407226562, 183.82891845703125, 196.21066284179688, 208.5924072265625, 220.97415161132812, 233.35589599609375, 245.73764038085938, 258.119384765625, 270.5010986328125, 282.88287353515625, 295.26458740234375, 307.6463317871094, 320.028076171875, 332.4098205566406, 344.79156494140625, 357.1733093261719, 369.5550537109375, 381.936767578125, 394.31854248046875]}, "gradients/decoder.transformer.h.1.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 5.0, 4.0, 1.0, 4.0, 5.0, 11.0, 7.0, 10.0, 13.0, 13.0, 25.0, 17.0, 26.0, 28.0, 31.0, 30.0, 28.0, 32.0, 37.0, 36.0, 35.0, 36.0, 38.0, 48.0, 43.0, 50.0, 36.0, 48.0, 48.0, 40.0, 30.0, 24.0, 29.0, 20.0, 26.0, 11.0, 16.0, 11.0, 15.0, 15.0, 8.0, 4.0, 8.0, 3.0, 3.0, 3.0, 3.0, 0.0, 0.0, 1.0, 0.0, 2.0], "bins": [-82.25, -79.916015625, -77.58203125, -75.248046875, -72.9140625, -70.580078125, -68.24609375, -65.912109375, -63.578125, -61.244140625, -58.91015625, -56.576171875, -54.2421875, -51.908203125, -49.57421875, -47.240234375, -44.90625, -42.572265625, -40.23828125, -37.904296875, -35.5703125, -33.236328125, -30.90234375, -28.568359375, -26.234375, -23.900390625, -21.56640625, -19.232421875, -16.8984375, -14.564453125, -12.23046875, -9.896484375, -7.5625, -5.228515625, -2.89453125, -0.560546875, 1.7734375, 4.107421875, 6.44140625, 8.775390625, 11.109375, 13.443359375, 15.77734375, 18.111328125, 20.4453125, 22.779296875, 25.11328125, 27.447265625, 29.78125, 32.115234375, 34.44921875, 36.783203125, 39.1171875, 41.451171875, 43.78515625, 46.119140625, 48.453125, 50.787109375, 53.12109375, 55.455078125, 57.7890625, 60.123046875, 62.45703125, 64.791015625, 67.125]}, "gradients/decoder.transformer.h.1.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 5.0, 0.0, 2.0, 3.0, 3.0, 7.0, 5.0, 6.0, 10.0, 14.0, 19.0, 29.0, 37.0, 44.0, 69.0, 95.0, 125.0, 156.0, 250.0, 328.0, 451.0, 601.0, 909.0, 1376.0, 2177.0, 4059.0, 9696.0, 42482.0, 1251510.0, 2754236.0, 99393.0, 13505.0, 5112.0, 2593.0, 1586.0, 1025.0, 675.0, 458.0, 319.0, 241.0, 180.0, 127.0, 108.0, 65.0, 52.0, 44.0, 26.0, 26.0, 17.0, 12.0, 7.0, 6.0, 3.0, 2.0, 4.0, 2.0, 3.0, 4.0, 2.0, 0.0, 1.0, 0.0, 1.0], "bins": [-220.25, -212.9453125, -205.640625, -198.3359375, -191.03125, -183.7265625, -176.421875, -169.1171875, -161.8125, -154.5078125, -147.203125, -139.8984375, -132.59375, -125.2890625, -117.984375, -110.6796875, -103.375, -96.0703125, -88.765625, -81.4609375, -74.15625, -66.8515625, -59.546875, -52.2421875, -44.9375, -37.6328125, -30.328125, -23.0234375, -15.71875, -8.4140625, -1.109375, 6.1953125, 13.5, 20.8046875, 28.109375, 35.4140625, 42.71875, 50.0234375, 57.328125, 64.6328125, 71.9375, 79.2421875, 86.546875, 93.8515625, 101.15625, 108.4609375, 115.765625, 123.0703125, 130.375, 137.6796875, 144.984375, 152.2890625, 159.59375, 166.8984375, 174.203125, 181.5078125, 188.8125, 196.1171875, 203.421875, 210.7265625, 218.03125, 225.3359375, 232.640625, 239.9453125, 247.25]}, "gradients/decoder.transformer.h.1.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 4.0, 4.0, 5.0, 5.0, 9.0, 24.0, 25.0, 27.0, 53.0, 80.0, 111.0, 202.0, 339.0, 561.0, 798.0, 687.0, 429.0, 245.0, 159.0, 98.0, 52.0, 41.0, 30.0, 28.0, 19.0, 14.0, 5.0, 11.0, 7.0, 4.0, 2.0, 1.0, 2.0, 1.0, 3.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-186.625, -181.267578125, -175.91015625, -170.552734375, -165.1953125, -159.837890625, -154.48046875, -149.123046875, -143.765625, -138.408203125, -133.05078125, -127.693359375, -122.3359375, -116.978515625, -111.62109375, -106.263671875, -100.90625, -95.548828125, -90.19140625, -84.833984375, -79.4765625, -74.119140625, -68.76171875, -63.404296875, -58.046875, -52.689453125, -47.33203125, -41.974609375, -36.6171875, -31.259765625, -25.90234375, -20.544921875, -15.1875, -9.830078125, -4.47265625, 0.884765625, 6.2421875, 11.599609375, 16.95703125, 22.314453125, 27.671875, 33.029296875, 38.38671875, 43.744140625, 49.1015625, 54.458984375, 59.81640625, 65.173828125, 70.53125, 75.888671875, 81.24609375, 86.603515625, 91.9609375, 97.318359375, 102.67578125, 108.033203125, 113.390625, 118.748046875, 124.10546875, 129.462890625, 134.8203125, 140.177734375, 145.53515625, 150.892578125, 156.25]}, "gradients/decoder.transformer.h.1.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 3.0, 2.0, 0.0, 3.0, 7.0, 7.0, 2.0, 21.0, 24.0, 29.0, 37.0, 58.0, 96.0, 206.0, 435.0, 1120.0, 4830.0, 55006.0, 3883271.0, 235823.0, 10322.0, 1863.0, 585.0, 259.0, 116.0, 54.0, 46.0, 24.0, 16.0, 9.0, 6.0, 4.0, 3.0, 5.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-416.25, -402.68359375, -389.1171875, -375.55078125, -361.984375, -348.41796875, -334.8515625, -321.28515625, -307.71875, -294.15234375, -280.5859375, -267.01953125, -253.453125, -239.88671875, -226.3203125, -212.75390625, -199.1875, -185.62109375, -172.0546875, -158.48828125, -144.921875, -131.35546875, -117.7890625, -104.22265625, -90.65625, -77.08984375, -63.5234375, -49.95703125, -36.390625, -22.82421875, -9.2578125, 4.30859375, 17.875, 31.44140625, 45.0078125, 58.57421875, 72.140625, 85.70703125, 99.2734375, 112.83984375, 126.40625, 139.97265625, 153.5390625, 167.10546875, 180.671875, 194.23828125, 207.8046875, 221.37109375, 234.9375, 248.50390625, 262.0703125, 275.63671875, 289.203125, 302.76953125, 316.3359375, 329.90234375, 343.46875, 357.03515625, 370.6015625, 384.16796875, 397.734375, 411.30078125, 424.8671875, 438.43359375, 452.0]}, "gradients/decoder.transformer.h.1.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 89.0, 807.0, 112.0, 7.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-7563.4990234375, -7419.3623046875, -7275.2255859375, -7131.0888671875, -6986.9521484375, -6842.8154296875, -6698.6787109375, -6554.54150390625, -6410.40478515625, -6266.26806640625, -6122.13134765625, -5977.99462890625, -5833.85791015625, -5689.720703125, -5545.583984375, -5401.447265625, -5257.310546875, -5113.173828125, -4969.037109375, -4824.900390625, -4680.763671875, -4536.626953125, -4392.490234375, -4248.35302734375, -4104.216796875, -3960.080078125, -3815.943359375, -3671.806640625, -3527.669677734375, -3383.532958984375, -3239.396240234375, -3095.259521484375, -2951.123046875, -2806.986328125, -2662.849609375, -2518.712890625, -2374.575927734375, -2230.439208984375, -2086.302490234375, -1942.165771484375, -1798.02880859375, -1653.89208984375, -1509.7552490234375, -1365.6185302734375, -1221.481689453125, -1077.344970703125, -933.208251953125, -789.0714721679688, -644.9346923828125, -500.79791259765625, -356.6611633300781, -212.5244140625, -68.38763427734375, 75.7491455078125, 219.8858642578125, 364.02264404296875, 508.159423828125, 652.2962036132812, 796.4329833984375, 940.5697021484375, 1084.70654296875, 1228.84326171875, 1372.97998046875, 1517.11669921875, 1661.2535400390625]}, "gradients/decoder.transformer.h.1.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 3.0, 1.0, 4.0, 3.0, 6.0, 3.0, 9.0, 11.0, 12.0, 9.0, 11.0, 13.0, 21.0, 26.0, 22.0, 19.0, 38.0, 38.0, 34.0, 37.0, 35.0, 39.0, 31.0, 39.0, 52.0, 45.0, 42.0, 30.0, 41.0, 50.0, 41.0, 30.0, 34.0, 32.0, 27.0, 24.0, 21.0, 11.0, 16.0, 12.0, 11.0, 12.0, 4.0, 4.0, 5.0, 4.0, 1.0, 2.0, 1.0, 1.0, 0.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-407.47088623046875, -393.225341796875, -378.97979736328125, -364.7342529296875, -350.48870849609375, -336.2431640625, -321.99761962890625, -307.7520751953125, -293.50653076171875, -279.260986328125, -265.01544189453125, -250.7698974609375, -236.52435302734375, -222.27880859375, -208.03326416015625, -193.7877197265625, -179.54217529296875, -165.296630859375, -151.05108642578125, -136.8055419921875, -122.55999755859375, -108.314453125, -94.06890869140625, -79.8233642578125, -65.57781982421875, -51.332275390625, -37.08673095703125, -22.8411865234375, -8.59564208984375, 5.64990234375, 19.89544677734375, 34.1409912109375, 48.38653564453125, 62.632080078125, 76.87762451171875, 91.1231689453125, 105.36871337890625, 119.6142578125, 133.85980224609375, 148.1053466796875, 162.35089111328125, 176.596435546875, 190.84197998046875, 205.0875244140625, 219.33306884765625, 233.57861328125, 247.82415771484375, 262.0697021484375, 276.31524658203125, 290.560791015625, 304.80633544921875, 319.0518798828125, 333.29742431640625, 347.54296875, 361.78851318359375, 376.0340576171875, 390.27960205078125, 404.525146484375, 418.77069091796875, 433.0162353515625, 447.26177978515625, 461.50732421875, 475.75286865234375, 489.9984130859375, 504.24395751953125]}, "gradients/decoder.transformer.h.1.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 2.0, 6.0, 2.0, 7.0, 3.0, 11.0, 4.0, 8.0, 13.0, 20.0, 23.0, 17.0, 20.0, 32.0, 29.0, 30.0, 44.0, 35.0, 36.0, 38.0, 39.0, 45.0, 44.0, 51.0, 44.0, 43.0, 31.0, 35.0, 54.0, 32.0, 30.0, 23.0, 31.0, 20.0, 24.0, 17.0, 14.0, 9.0, 15.0, 10.0, 11.0, 4.0, 4.0, 3.0, 1.0, 3.0, 1.0, 1.0], "bins": [-78.1875, -76.1728515625, -74.158203125, -72.1435546875, -70.12890625, -68.1142578125, -66.099609375, -64.0849609375, -62.0703125, -60.0556640625, -58.041015625, -56.0263671875, -54.01171875, -51.9970703125, -49.982421875, -47.9677734375, -45.953125, -43.9384765625, -41.923828125, -39.9091796875, -37.89453125, -35.8798828125, -33.865234375, -31.8505859375, -29.8359375, -27.8212890625, -25.806640625, -23.7919921875, -21.77734375, -19.7626953125, -17.748046875, -15.7333984375, -13.71875, -11.7041015625, -9.689453125, -7.6748046875, -5.66015625, -3.6455078125, -1.630859375, 0.3837890625, 2.3984375, 4.4130859375, 6.427734375, 8.4423828125, 10.45703125, 12.4716796875, 14.486328125, 16.5009765625, 18.515625, 20.5302734375, 22.544921875, 24.5595703125, 26.57421875, 28.5888671875, 30.603515625, 32.6181640625, 34.6328125, 36.6474609375, 38.662109375, 40.6767578125, 42.69140625, 44.7060546875, 46.720703125, 48.7353515625, 50.75]}, "gradients/decoder.transformer.h.1.crossattention.c_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 3.0, 7.0, 9.0, 7.0, 10.0, 28.0, 34.0, 45.0, 42.0, 77.0, 79.0, 90.0, 121.0, 116.0, 167.0, 238.0, 335.0, 540.0, 727.0, 1030.0, 1510.0, 2204.0, 3388.0, 5485.0, 8962.0, 15228.0, 28408.0, 55066.0, 115054.0, 246496.0, 284436.0, 135364.0, 64928.0, 32773.0, 17624.0, 10060.0, 6096.0, 3816.0, 2466.0, 1664.0, 1106.0, 776.0, 485.0, 359.0, 261.0, 177.0, 134.0, 121.0, 75.0, 72.0, 69.0, 46.0, 50.0, 40.0, 26.0, 15.0, 11.0, 8.0, 2.0, 3.0, 2.0, 3.0], "bins": [-9.0859375, -8.802978515625, -8.52001953125, -8.237060546875, -7.9541015625, -7.671142578125, -7.38818359375, -7.105224609375, -6.822265625, -6.539306640625, -6.25634765625, -5.973388671875, -5.6904296875, -5.407470703125, -5.12451171875, -4.841552734375, -4.55859375, -4.275634765625, -3.99267578125, -3.709716796875, -3.4267578125, -3.143798828125, -2.86083984375, -2.577880859375, -2.294921875, -2.011962890625, -1.72900390625, -1.446044921875, -1.1630859375, -0.880126953125, -0.59716796875, -0.314208984375, -0.03125, 0.251708984375, 0.53466796875, 0.817626953125, 1.1005859375, 1.383544921875, 1.66650390625, 1.949462890625, 2.232421875, 2.515380859375, 2.79833984375, 3.081298828125, 3.3642578125, 3.647216796875, 3.93017578125, 4.213134765625, 4.49609375, 4.779052734375, 5.06201171875, 5.344970703125, 5.6279296875, 5.910888671875, 6.19384765625, 6.476806640625, 6.759765625, 7.042724609375, 7.32568359375, 7.608642578125, 7.8916015625, 8.174560546875, 8.45751953125, 8.740478515625, 9.0234375]}, "gradients/decoder.transformer.h.1.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 5.0, 1.0, 2.0, 6.0, 6.0, 6.0, 5.0, 15.0, 11.0, 21.0, 18.0, 15.0, 14.0, 31.0, 31.0, 38.0, 34.0, 37.0, 41.0, 42.0, 56.0, 52.0, 1055.0, 28.0, 44.0, 56.0, 36.0, 50.0, 32.0, 31.0, 37.0, 36.0, 28.0, 29.0, 16.0, 8.0, 16.0, 11.0, 5.0, 9.0, 10.0, 6.0, 4.0, 2.0, 2.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-46.875, -45.51708984375, -44.1591796875, -42.80126953125, -41.443359375, -40.08544921875, -38.7275390625, -37.36962890625, -36.01171875, -34.65380859375, -33.2958984375, -31.93798828125, -30.580078125, -29.22216796875, -27.8642578125, -26.50634765625, -25.1484375, -23.79052734375, -22.4326171875, -21.07470703125, -19.716796875, -18.35888671875, -17.0009765625, -15.64306640625, -14.28515625, -12.92724609375, -11.5693359375, -10.21142578125, -8.853515625, -7.49560546875, -6.1376953125, -4.77978515625, -3.421875, -2.06396484375, -0.7060546875, 0.65185546875, 2.009765625, 3.36767578125, 4.7255859375, 6.08349609375, 7.44140625, 8.79931640625, 10.1572265625, 11.51513671875, 12.873046875, 14.23095703125, 15.5888671875, 16.94677734375, 18.3046875, 19.66259765625, 21.0205078125, 22.37841796875, 23.736328125, 25.09423828125, 26.4521484375, 27.81005859375, 29.16796875, 30.52587890625, 31.8837890625, 33.24169921875, 34.599609375, 35.95751953125, 37.3154296875, 38.67333984375, 40.03125]}, "gradients/decoder.transformer.h.1.crossattention.c_attn.weight": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 1.0, 3.0, 5.0, 7.0, 12.0, 9.0, 8.0, 25.0, 31.0, 36.0, 61.0, 61.0, 110.0, 135.0, 244.0, 306.0, 454.0, 667.0, 1022.0, 1488.0, 2271.0, 3574.0, 5533.0, 8908.0, 14313.0, 23785.0, 40462.0, 70034.0, 128069.0, 1243730.0, 279828.0, 114518.0, 63064.0, 36465.0, 21717.0, 13101.0, 8179.0, 5048.0, 3310.0, 2151.0, 1344.0, 953.0, 637.0, 431.0, 311.0, 226.0, 161.0, 91.0, 72.0, 61.0, 36.0, 23.0, 15.0, 10.0, 9.0, 8.0, 7.0, 3.0, 0.0, 2.0, 3.0], "bins": [-4.44140625, -4.30645751953125, -4.1715087890625, -4.03656005859375, -3.901611328125, -3.76666259765625, -3.6317138671875, -3.49676513671875, -3.36181640625, -3.22686767578125, -3.0919189453125, -2.95697021484375, -2.822021484375, -2.68707275390625, -2.5521240234375, -2.41717529296875, -2.2822265625, -2.14727783203125, -2.0123291015625, -1.87738037109375, -1.742431640625, -1.60748291015625, -1.4725341796875, -1.33758544921875, -1.20263671875, -1.06768798828125, -0.9327392578125, -0.79779052734375, -0.662841796875, -0.52789306640625, -0.3929443359375, -0.25799560546875, -0.123046875, 0.01190185546875, 0.1468505859375, 0.28179931640625, 0.416748046875, 0.55169677734375, 0.6866455078125, 0.82159423828125, 0.95654296875, 1.09149169921875, 1.2264404296875, 1.36138916015625, 1.496337890625, 1.63128662109375, 1.7662353515625, 1.90118408203125, 2.0361328125, 2.17108154296875, 2.3060302734375, 2.44097900390625, 2.575927734375, 2.71087646484375, 2.8458251953125, 2.98077392578125, 3.11572265625, 3.25067138671875, 3.3856201171875, 3.52056884765625, 3.655517578125, 3.79046630859375, 3.9254150390625, 4.06036376953125, 4.1953125]}, "gradients/decoder.transformer.h.1.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 2.0, 1.0, 0.0, 1.0, 0.0, 1.0, 3.0, 2.0, 1.0, 0.0, 4.0, 4.0, 5.0, 8.0, 7.0, 5.0, 7.0, 8.0, 7.0, 11.0, 17.0, 30.0, 17.0, 24.0, 48.0, 89.0, 176.0, 203.0, 103.0, 59.0, 37.0, 24.0, 15.0, 22.0, 16.0, 10.0, 5.0, 11.0, 5.0, 9.0, 10.0, 1.0, 3.0, 1.0, 1.0, 1.0, 2.0, 2.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.2418212890625, -0.23378944396972656, -0.22575759887695312, -0.2177257537841797, -0.20969390869140625, -0.2016620635986328, -0.19363021850585938, -0.18559837341308594, -0.1775665283203125, -0.16953468322753906, -0.16150283813476562, -0.1534709930419922, -0.14543914794921875, -0.1374073028564453, -0.12937545776367188, -0.12134361267089844, -0.113311767578125, -0.10527992248535156, -0.09724807739257812, -0.08921623229980469, -0.08118438720703125, -0.07315254211425781, -0.06512069702148438, -0.05708885192871094, -0.0490570068359375, -0.04102516174316406, -0.032993316650390625, -0.024961471557617188, -0.01692962646484375, -0.008897781372070312, -0.000865936279296875, 0.0071659088134765625, 0.01519775390625, 0.023229598999023438, 0.031261444091796875, 0.03929328918457031, 0.04732513427734375, 0.05535697937011719, 0.06338882446289062, 0.07142066955566406, 0.0794525146484375, 0.08748435974121094, 0.09551620483398438, 0.10354804992675781, 0.11157989501953125, 0.11961174011230469, 0.12764358520507812, 0.13567543029785156, 0.143707275390625, 0.15173912048339844, 0.15977096557617188, 0.1678028106689453, 0.17583465576171875, 0.1838665008544922, 0.19189834594726562, 0.19993019104003906, 0.2079620361328125, 0.21599388122558594, 0.22402572631835938, 0.2320575714111328, 0.24008941650390625, 0.2481212615966797, 0.2561531066894531, 0.26418495178222656, 0.272216796875]}, "gradients/decoder.transformer.h.1.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 3.0, 1.0, 1.0, 1.0, 1.0, 2.0, 5.0, 4.0, 10.0, 15.0, 10.0, 14.0, 13.0, 29.0, 33.0, 41.0, 73.0, 101.0, 184.0, 440.0, 5432.0, 1040902.0, 601.0, 232.0, 125.0, 78.0, 56.0, 31.0, 30.0, 19.0, 14.0, 8.0, 13.0, 9.0, 9.0, 5.0, 7.0, 5.0, 2.0, 1.0, 1.0, 4.0, 1.0, 0.0, 1.0, 0.0, 0.0, 3.0, 0.0, 0.0, 2.0], "bins": [-6.79296875, -6.59417724609375, -6.3953857421875, -6.19659423828125, -5.997802734375, -5.79901123046875, -5.6002197265625, -5.40142822265625, -5.20263671875, -5.00384521484375, -4.8050537109375, -4.60626220703125, -4.407470703125, -4.20867919921875, -4.0098876953125, -3.81109619140625, -3.6123046875, -3.41351318359375, -3.2147216796875, -3.01593017578125, -2.817138671875, -2.61834716796875, -2.4195556640625, -2.22076416015625, -2.02197265625, -1.82318115234375, -1.6243896484375, -1.42559814453125, -1.226806640625, -1.02801513671875, -0.8292236328125, -0.63043212890625, -0.431640625, -0.23284912109375, -0.0340576171875, 0.16473388671875, 0.363525390625, 0.56231689453125, 0.7611083984375, 0.95989990234375, 1.15869140625, 1.35748291015625, 1.5562744140625, 1.75506591796875, 1.953857421875, 2.15264892578125, 2.3514404296875, 2.55023193359375, 2.7490234375, 2.94781494140625, 3.1466064453125, 3.34539794921875, 3.544189453125, 3.74298095703125, 3.9417724609375, 4.14056396484375, 4.33935546875, 4.53814697265625, 4.7369384765625, 4.93572998046875, 5.134521484375, 5.33331298828125, 5.5321044921875, 5.73089599609375, 5.9296875]}, "gradients/decoder.transformer.h.1.ln_cross_attn.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1010.0, 6.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.1715028285980225, -0.9954726696014404, -0.8194425106048584, -0.6434123516082764, -0.46738219261169434, -0.2913520336151123, -0.11532187461853027, 0.06070828437805176, 0.2367384433746338, 0.4127686023712158, 0.5887987613677979, 0.7648289203643799, 0.9408590793609619, 1.116889238357544, 1.292919397354126, 1.468949556350708, 1.64497971534729, 1.821009874343872, 1.997040033340454, 2.173070192337036, 2.349100351333618, 2.5251305103302, 2.7011606693267822, 2.8771908283233643, 3.0532209873199463, 3.2292511463165283, 3.4052813053131104, 3.5813114643096924, 3.7573416233062744, 3.9333717823028564, 4.109401702880859, 4.285431861877441, 4.461462020874023, 4.6374921798706055, 4.8135223388671875, 4.9895524978637695, 5.165582656860352, 5.341612815856934, 5.517642974853516, 5.693673133850098, 5.86970329284668, 6.045733451843262, 6.221763610839844, 6.397793769836426, 6.573823928833008, 6.74985408782959, 6.925884246826172, 7.101914405822754, 7.277944564819336, 7.453974723815918, 7.6300048828125, 7.806035041809082, 7.982065200805664, 8.158095359802246, 8.334125518798828, 8.51015567779541, 8.686185836791992, 8.862215995788574, 9.038246154785156, 9.214276313781738, 9.39030647277832, 9.566336631774902, 9.742366790771484, 9.918396949768066, 10.094427108764648]}, "gradients/decoder.transformer.h.1.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 2.0, 3.0, 1.0, 2.0, 4.0, 13.0, 11.0, 14.0, 11.0, 15.0, 25.0, 24.0, 32.0, 41.0, 35.0, 27.0, 51.0, 62.0, 52.0, 47.0, 71.0, 64.0, 55.0, 43.0, 45.0, 51.0, 39.0, 28.0, 20.0, 20.0, 22.0, 20.0, 9.0, 19.0, 13.0, 7.0, 8.0, 5.0, 3.0, 2.0, 2.0, 1.0, 1.0], "bins": [-0.6530327796936035, -0.6372945308685303, -0.6215563416481018, -0.6058180928230286, -0.5900799036026001, -0.5743416547775269, -0.5586034655570984, -0.5428652167320251, -0.5271270275115967, -0.5113887786865234, -0.49565058946609497, -0.4799123704433441, -0.46417415142059326, -0.44843590259552, -0.43269768357276917, -0.4169594645500183, -0.40122121572494507, -0.3854829967021942, -0.36974477767944336, -0.3540065586566925, -0.33826833963394165, -0.3225300908088684, -0.30679187178611755, -0.2910536527633667, -0.27531543374061584, -0.259577214717865, -0.24383899569511414, -0.2281007617712021, -0.21236254274845123, -0.19662432372570038, -0.18088608980178833, -0.16514787077903748, -0.14940959215164185, -0.133671373128891, -0.11793314665555954, -0.10219492018222809, -0.08645670115947723, -0.07071848213672638, -0.05498025566339493, -0.03924202919006348, -0.023503810167312622, -0.007765587419271469, 0.007972635328769684, 0.023710858076810837, 0.03944908082485199, 0.055187299847602844, 0.0709255263209343, 0.08666375279426575, 0.1024019718170166, 0.11814019083976746, 0.1338784098625183, 0.14961664378643036, 0.1653548628091812, 0.18109308183193207, 0.19683131575584412, 0.21256953477859497, 0.22830775380134583, 0.24404597282409668, 0.25978419184684753, 0.2755224108695984, 0.29126065969467163, 0.3069988489151001, 0.32273709774017334, 0.3384753167629242, 0.35421353578567505]}, "gradients/decoder.transformer.h.1.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 2.0, 6.0, 2.0, 7.0, 3.0, 11.0, 4.0, 8.0, 13.0, 21.0, 21.0, 18.0, 21.0, 30.0, 30.0, 31.0, 43.0, 34.0, 37.0, 39.0, 39.0, 45.0, 43.0, 51.0, 44.0, 43.0, 31.0, 36.0, 53.0, 32.0, 30.0, 23.0, 31.0, 18.0, 26.0, 17.0, 14.0, 9.0, 15.0, 10.0, 11.0, 4.0, 4.0, 3.0, 1.0, 3.0, 1.0, 1.0], "bins": [-78.1875, -76.1728515625, -74.158203125, -72.1435546875, -70.12890625, -68.1142578125, -66.099609375, -64.0849609375, -62.0703125, -60.0556640625, -58.041015625, -56.0263671875, -54.01171875, -51.9970703125, -49.982421875, -47.9677734375, -45.953125, -43.9384765625, -41.923828125, -39.9091796875, -37.89453125, -35.8798828125, -33.865234375, -31.8505859375, -29.8359375, -27.8212890625, -25.806640625, -23.7919921875, -21.77734375, -19.7626953125, -17.748046875, -15.7333984375, -13.71875, -11.7041015625, -9.689453125, -7.6748046875, -5.66015625, -3.6455078125, -1.630859375, 0.3837890625, 2.3984375, 4.4130859375, 6.427734375, 8.4423828125, 10.45703125, 12.4716796875, 14.486328125, 16.5009765625, 18.515625, 20.5302734375, 22.544921875, 24.5595703125, 26.57421875, 28.5888671875, 30.603515625, 32.6181640625, 34.6328125, 36.6474609375, 38.662109375, 40.6767578125, 42.69140625, 44.7060546875, 46.720703125, 48.7353515625, 50.75]}, "gradients/decoder.transformer.h.1.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 3.0, 7.0, 6.0, 4.0, 7.0, 10.0, 14.0, 11.0, 30.0, 33.0, 58.0, 54.0, 90.0, 168.0, 266.0, 319.0, 695.0, 982.0, 1802.0, 3291.0, 7633.0, 21774.0, 87888.0, 518826.0, 318682.0, 57274.0, 15851.0, 6064.0, 2847.0, 1482.0, 848.0, 524.0, 347.0, 206.0, 132.0, 115.0, 59.0, 46.0, 33.0, 27.0, 17.0, 14.0, 12.0, 4.0, 4.0, 2.0, 3.0, 3.0, 2.0], "bins": [-126.375, -123.123046875, -119.87109375, -116.619140625, -113.3671875, -110.115234375, -106.86328125, -103.611328125, -100.359375, -97.107421875, -93.85546875, -90.603515625, -87.3515625, -84.099609375, -80.84765625, -77.595703125, -74.34375, -71.091796875, -67.83984375, -64.587890625, -61.3359375, -58.083984375, -54.83203125, -51.580078125, -48.328125, -45.076171875, -41.82421875, -38.572265625, -35.3203125, -32.068359375, -28.81640625, -25.564453125, -22.3125, -19.060546875, -15.80859375, -12.556640625, -9.3046875, -6.052734375, -2.80078125, 0.451171875, 3.703125, 6.955078125, 10.20703125, 13.458984375, 16.7109375, 19.962890625, 23.21484375, 26.466796875, 29.71875, 32.970703125, 36.22265625, 39.474609375, 42.7265625, 45.978515625, 49.23046875, 52.482421875, 55.734375, 58.986328125, 62.23828125, 65.490234375, 68.7421875, 71.994140625, 75.24609375, 78.498046875, 81.75]}, "gradients/decoder.transformer.h.1.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 2.0, 1.0, 1.0, 2.0, 3.0, 1.0, 6.0, 5.0, 7.0, 10.0, 10.0, 16.0, 11.0, 14.0, 18.0, 21.0, 25.0, 28.0, 30.0, 27.0, 50.0, 51.0, 80.0, 92.0, 140.0, 251.0, 1466.0, 163.0, 99.0, 74.0, 52.0, 29.0, 49.0, 36.0, 34.0, 26.0, 25.0, 18.0, 17.0, 10.0, 9.0, 16.0, 7.0, 3.0, 3.0, 5.0, 2.0, 3.0, 5.0, 1.0, 5.0, 3.0, 2.0, 0.0, 2.0, 2.0], "bins": [-143.75, -139.5546875, -135.359375, -131.1640625, -126.96875, -122.7734375, -118.578125, -114.3828125, -110.1875, -105.9921875, -101.796875, -97.6015625, -93.40625, -89.2109375, -85.015625, -80.8203125, -76.625, -72.4296875, -68.234375, -64.0390625, -59.84375, -55.6484375, -51.453125, -47.2578125, -43.0625, -38.8671875, -34.671875, -30.4765625, -26.28125, -22.0859375, -17.890625, -13.6953125, -9.5, -5.3046875, -1.109375, 3.0859375, 7.28125, 11.4765625, 15.671875, 19.8671875, 24.0625, 28.2578125, 32.453125, 36.6484375, 40.84375, 45.0390625, 49.234375, 53.4296875, 57.625, 61.8203125, 66.015625, 70.2109375, 74.40625, 78.6015625, 82.796875, 86.9921875, 91.1875, 95.3828125, 99.578125, 103.7734375, 107.96875, 112.1640625, 116.359375, 120.5546875, 124.75]}, "gradients/decoder.transformer.h.1.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 0.0, 1.0, 0.0, 3.0, 1.0, 2.0, 5.0, 3.0, 8.0, 3.0, 4.0, 4.0, 14.0, 9.0, 13.0, 21.0, 24.0, 25.0, 36.0, 50.0, 73.0, 98.0, 101.0, 196.0, 328.0, 615.0, 1424.0, 16065.0, 3061029.0, 61551.0, 2108.0, 700.0, 386.0, 236.0, 154.0, 110.0, 73.0, 48.0, 44.0, 41.0, 21.0, 17.0, 18.0, 12.0, 15.0, 12.0, 4.0, 5.0, 2.0, 2.0, 2.0, 0.0, 1.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 2.0, 1.0], "bins": [-306.75, -297.0546875, -287.359375, -277.6640625, -267.96875, -258.2734375, -248.578125, -238.8828125, -229.1875, -219.4921875, -209.796875, -200.1015625, -190.40625, -180.7109375, -171.015625, -161.3203125, -151.625, -141.9296875, -132.234375, -122.5390625, -112.84375, -103.1484375, -93.453125, -83.7578125, -74.0625, -64.3671875, -54.671875, -44.9765625, -35.28125, -25.5859375, -15.890625, -6.1953125, 3.5, 13.1953125, 22.890625, 32.5859375, 42.28125, 51.9765625, 61.671875, 71.3671875, 81.0625, 90.7578125, 100.453125, 110.1484375, 119.84375, 129.5390625, 139.234375, 148.9296875, 158.625, 168.3203125, 178.015625, 187.7109375, 197.40625, 207.1015625, 216.796875, 226.4921875, 236.1875, 245.8828125, 255.578125, 265.2734375, 274.96875, 284.6640625, 294.359375, 304.0546875, 313.75]}, "gradients/decoder.transformer.h.1.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 2.0, 3.0, 22.0, 307.0, 622.0, 59.0, 4.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-505.9183349609375, -450.8454895019531, -395.77264404296875, -340.6998291015625, -285.6269836425781, -230.55413818359375, -175.4813232421875, -120.40847778320312, -65.33563232421875, -10.262794494628906, 44.81004333496094, 99.88287353515625, 154.95571899414062, 210.028564453125, 265.10137939453125, 320.1742248535156, 375.2470703125, 430.3199157714844, 485.39276123046875, 540.465576171875, 595.5384521484375, 650.6112670898438, 705.68408203125, 760.7569580078125, 815.8297729492188, 870.902587890625, 925.9754638671875, 981.0482788085938, 1036.12109375, 1091.1939697265625, 1146.266845703125, 1201.339599609375, 1256.412353515625, 1311.4852294921875, 1366.5579833984375, 1421.630859375, 1476.7037353515625, 1531.776611328125, 1586.849365234375, 1641.9222412109375, 1696.9951171875, 1752.0679931640625, 1807.1407470703125, 1862.213623046875, 1917.2864990234375, 1972.359375, 2027.43212890625, 2082.5048828125, 2137.57763671875, 2192.650390625, 2247.723388671875, 2302.796142578125, 2357.868896484375, 2412.94189453125, 2468.0146484375, 2523.08740234375, 2578.160400390625, 2633.233154296875, 2688.30615234375, 2743.37890625, 2798.45166015625, 2853.524658203125, 2908.597412109375, 2963.670166015625, 3018.7431640625]}, "gradients/decoder.transformer.h.1.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 3.0, 4.0, 1.0, 4.0, 6.0, 6.0, 5.0, 5.0, 11.0, 14.0, 14.0, 12.0, 12.0, 14.0, 15.0, 14.0, 29.0, 29.0, 32.0, 39.0, 30.0, 40.0, 45.0, 35.0, 43.0, 35.0, 33.0, 42.0, 49.0, 37.0, 37.0, 39.0, 35.0, 24.0, 24.0, 26.0, 28.0, 14.0, 24.0, 17.0, 15.0, 11.0, 13.0, 10.0, 8.0, 7.0, 11.0, 7.0, 3.0, 2.0, 2.0, 2.0, 1.0, 3.0], "bins": [-378.24462890625, -367.5784912109375, -356.912353515625, -346.2462158203125, -335.580078125, -324.9139404296875, -314.247802734375, -303.5816955566406, -292.9155578613281, -282.2494201660156, -271.5832824707031, -260.9171447753906, -250.2510223388672, -239.5848846435547, -228.9187469482422, -218.25262451171875, -207.5864715576172, -196.9203338623047, -186.2541961669922, -175.58807373046875, -164.92193603515625, -154.25579833984375, -143.58966064453125, -132.92352294921875, -122.25739288330078, -111.59125518798828, -100.92512512207031, -90.25898742675781, -79.59284973144531, -68.92671966552734, -58.260581970214844, -47.594451904296875, -36.928314208984375, -26.26218032836914, -15.596044540405273, -4.929908752441406, 5.736225128173828, 16.402359008789062, 27.068496704101562, 37.73462677001953, 48.40076446533203, 59.066898345947266, 69.7330322265625, 80.399169921875, 91.0653076171875, 101.73143768310547, 112.39757537841797, 123.06370544433594, 133.72984313964844, 144.39598083496094, 155.06211853027344, 165.72824096679688, 176.39437866210938, 187.06051635742188, 197.72665405273438, 208.39279174804688, 219.05892944335938, 229.72506713867188, 240.39120483398438, 251.05734252929688, 261.7234802246094, 272.38958740234375, 283.05572509765625, 293.72186279296875, 304.38800048828125]}, "gradients/decoder.transformer.h.0.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 1.0, 2.0, 2.0, 2.0, 4.0, 8.0, 5.0, 4.0, 12.0, 12.0, 15.0, 13.0, 12.0, 26.0, 17.0, 23.0, 29.0, 20.0, 40.0, 39.0, 38.0, 39.0, 36.0, 51.0, 35.0, 46.0, 45.0, 44.0, 32.0, 26.0, 38.0, 33.0, 31.0, 35.0, 35.0, 25.0, 28.0, 13.0, 19.0, 15.0, 12.0, 10.0, 7.0, 7.0, 8.0, 6.0, 4.0, 3.0, 4.0, 3.0, 5.0], "bins": [-79.625, -77.56005859375, -75.4951171875, -73.43017578125, -71.365234375, -69.30029296875, -67.2353515625, -65.17041015625, -63.10546875, -61.04052734375, -58.9755859375, -56.91064453125, -54.845703125, -52.78076171875, -50.7158203125, -48.65087890625, -46.5859375, -44.52099609375, -42.4560546875, -40.39111328125, -38.326171875, -36.26123046875, -34.1962890625, -32.13134765625, -30.06640625, -28.00146484375, -25.9365234375, -23.87158203125, -21.806640625, -19.74169921875, -17.6767578125, -15.61181640625, -13.546875, -11.48193359375, -9.4169921875, -7.35205078125, -5.287109375, -3.22216796875, -1.1572265625, 0.90771484375, 2.97265625, 5.03759765625, 7.1025390625, 9.16748046875, 11.232421875, 13.29736328125, 15.3623046875, 17.42724609375, 19.4921875, 21.55712890625, 23.6220703125, 25.68701171875, 27.751953125, 29.81689453125, 31.8818359375, 33.94677734375, 36.01171875, 38.07666015625, 40.1416015625, 42.20654296875, 44.271484375, 46.33642578125, 48.4013671875, 50.46630859375, 52.53125]}, "gradients/decoder.transformer.h.0.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 1.0, 0.0, 1.0, 2.0, 3.0, 2.0, 5.0, 4.0, 4.0, 11.0, 12.0, 21.0, 30.0, 31.0, 42.0, 32.0, 68.0, 88.0, 117.0, 162.0, 229.0, 346.0, 495.0, 798.0, 1387.0, 2817.0, 6589.0, 19044.0, 130840.0, 3653767.0, 337737.0, 24388.0, 7717.0, 3165.0, 1659.0, 876.0, 519.0, 367.0, 248.0, 165.0, 126.0, 104.0, 69.0, 59.0, 39.0, 27.0, 26.0, 21.0, 15.0, 9.0, 5.0, 5.0, 1.0, 2.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-339.0, -328.6015625, -318.203125, -307.8046875, -297.40625, -287.0078125, -276.609375, -266.2109375, -255.8125, -245.4140625, -235.015625, -224.6171875, -214.21875, -203.8203125, -193.421875, -183.0234375, -172.625, -162.2265625, -151.828125, -141.4296875, -131.03125, -120.6328125, -110.234375, -99.8359375, -89.4375, -79.0390625, -68.640625, -58.2421875, -47.84375, -37.4453125, -27.046875, -16.6484375, -6.25, 4.1484375, 14.546875, 24.9453125, 35.34375, 45.7421875, 56.140625, 66.5390625, 76.9375, 87.3359375, 97.734375, 108.1328125, 118.53125, 128.9296875, 139.328125, 149.7265625, 160.125, 170.5234375, 180.921875, 191.3203125, 201.71875, 212.1171875, 222.515625, 232.9140625, 243.3125, 253.7109375, 264.109375, 274.5078125, 284.90625, 295.3046875, 305.703125, 316.1015625, 326.5]}, "gradients/decoder.transformer.h.0.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 3.0, 2.0, 2.0, 3.0, 2.0, 4.0, 11.0, 7.0, 12.0, 18.0, 14.0, 30.0, 38.0, 68.0, 88.0, 148.0, 280.0, 550.0, 947.0, 815.0, 414.0, 203.0, 122.0, 91.0, 60.0, 37.0, 22.0, 25.0, 14.0, 18.0, 12.0, 7.0, 6.0, 5.0, 5.0, 2.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-160.625, -155.046875, -149.46875, -143.890625, -138.3125, -132.734375, -127.15625, -121.578125, -116.0, -110.421875, -104.84375, -99.265625, -93.6875, -88.109375, -82.53125, -76.953125, -71.375, -65.796875, -60.21875, -54.640625, -49.0625, -43.484375, -37.90625, -32.328125, -26.75, -21.171875, -15.59375, -10.015625, -4.4375, 1.140625, 6.71875, 12.296875, 17.875, 23.453125, 29.03125, 34.609375, 40.1875, 45.765625, 51.34375, 56.921875, 62.5, 68.078125, 73.65625, 79.234375, 84.8125, 90.390625, 95.96875, 101.546875, 107.125, 112.703125, 118.28125, 123.859375, 129.4375, 135.015625, 140.59375, 146.171875, 151.75, 157.328125, 162.90625, 168.484375, 174.0625, 179.640625, 185.21875, 190.796875, 196.375]}, "gradients/decoder.transformer.h.0.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 2.0, 1.0, 2.0, 1.0, 2.0, 3.0, 4.0, 6.0, 9.0, 14.0, 10.0, 22.0, 27.0, 35.0, 45.0, 71.0, 119.0, 158.0, 244.0, 364.0, 572.0, 933.0, 1483.0, 2729.0, 4819.0, 8828.0, 17596.0, 37747.0, 94896.0, 351424.0, 2618321.0, 803208.0, 147825.0, 53350.0, 23246.0, 11553.0, 6049.0, 3479.0, 1867.0, 1182.0, 739.0, 404.0, 264.0, 211.0, 132.0, 84.0, 61.0, 46.0, 33.0, 19.0, 17.0, 8.0, 13.0, 7.0, 5.0, 4.0, 3.0, 0.0, 3.0, 2.0, 1.0], "bins": [-109.8125, -106.5537109375, -103.294921875, -100.0361328125, -96.77734375, -93.5185546875, -90.259765625, -87.0009765625, -83.7421875, -80.4833984375, -77.224609375, -73.9658203125, -70.70703125, -67.4482421875, -64.189453125, -60.9306640625, -57.671875, -54.4130859375, -51.154296875, -47.8955078125, -44.63671875, -41.3779296875, -38.119140625, -34.8603515625, -31.6015625, -28.3427734375, -25.083984375, -21.8251953125, -18.56640625, -15.3076171875, -12.048828125, -8.7900390625, -5.53125, -2.2724609375, 0.986328125, 4.2451171875, 7.50390625, 10.7626953125, 14.021484375, 17.2802734375, 20.5390625, 23.7978515625, 27.056640625, 30.3154296875, 33.57421875, 36.8330078125, 40.091796875, 43.3505859375, 46.609375, 49.8681640625, 53.126953125, 56.3857421875, 59.64453125, 62.9033203125, 66.162109375, 69.4208984375, 72.6796875, 75.9384765625, 79.197265625, 82.4560546875, 85.71484375, 88.9736328125, 92.232421875, 95.4912109375, 98.75]}, "gradients/decoder.transformer.h.0.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 3.0, 2.0, 4.0, 6.0, 3.0, 10.0, 5.0, 14.0, 13.0, 15.0, 20.0, 12.0, 21.0, 28.0, 46.0, 62.0, 61.0, 72.0, 65.0, 68.0, 71.0, 66.0, 66.0, 43.0, 43.0, 39.0, 33.0, 23.0, 11.0, 18.0, 17.0, 15.0, 6.0, 8.0, 6.0, 4.0, 6.0, 2.0, 1.0, 5.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-506.3139343261719, -486.72625732421875, -467.13861083984375, -447.55096435546875, -427.9632873535156, -408.3756103515625, -388.7879638671875, -369.2003173828125, -349.6126403808594, -330.02496337890625, -310.43731689453125, -290.84967041015625, -271.2619934082031, -251.67433166503906, -232.086669921875, -212.49900817871094, -192.91134643554688, -173.3236846923828, -153.73602294921875, -134.1483612060547, -114.56069946289062, -94.97303771972656, -75.3853759765625, -55.79771423339844, -36.210052490234375, -16.622390747070312, 2.96527099609375, 22.552932739257812, 42.140594482421875, 61.72825622558594, 81.31591796875, 100.90357971191406, 120.49127197265625, 140.0789337158203, 159.66659545898438, 179.25425720214844, 198.8419189453125, 218.42958068847656, 238.01724243164062, 257.60491943359375, 277.19256591796875, 296.78021240234375, 316.3678894042969, 335.95556640625, 355.543212890625, 375.130859375, 394.7185363769531, 414.30621337890625, 433.89385986328125, 453.48150634765625, 473.0691833496094, 492.6568603515625, 512.2445068359375, 531.8321533203125, 551.4197998046875, 571.0075073242188, 590.5951538085938, 610.1828002929688, 629.7705078125, 649.358154296875, 668.94580078125, 688.533447265625, 708.12109375, 727.7088012695312, 747.2964477539062]}, "gradients/decoder.transformer.h.0.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 3.0, 1.0, 5.0, 3.0, 4.0, 4.0, 5.0, 9.0, 10.0, 5.0, 11.0, 14.0, 18.0, 19.0, 25.0, 27.0, 22.0, 21.0, 31.0, 25.0, 35.0, 28.0, 32.0, 38.0, 38.0, 45.0, 30.0, 45.0, 39.0, 36.0, 35.0, 39.0, 37.0, 19.0, 29.0, 31.0, 30.0, 25.0, 33.0, 13.0, 18.0, 11.0, 13.0, 8.0, 14.0, 14.0, 5.0, 3.0, 5.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 0.0, 2.0], "bins": [-495.6292724609375, -480.5400695800781, -465.45086669921875, -450.36163330078125, -435.2724304199219, -420.1832275390625, -405.0940246582031, -390.00482177734375, -374.91558837890625, -359.8263854980469, -344.7371826171875, -329.64794921875, -314.5587463378906, -299.46954345703125, -284.3803405761719, -269.2911376953125, -254.20193481445312, -239.11273193359375, -224.0235137939453, -208.93431091308594, -193.8450927734375, -178.75588989257812, -163.66668701171875, -148.57748413085938, -133.48826599121094, -118.39905548095703, -103.30984497070312, -88.22064208984375, -73.13143157958984, -58.04222106933594, -42.95301818847656, -27.863807678222656, -12.774566650390625, 2.3146419525146484, 17.403850555419922, 32.49305725097656, 47.58226776123047, 62.671478271484375, 77.76068115234375, 92.84989166259766, 107.93910217285156, 123.02831268310547, 138.11752319335938, 153.20672607421875, 168.29592895507812, 183.38514709472656, 198.47434997558594, 213.56356811523438, 228.65277099609375, 243.74197387695312, 258.8311767578125, 273.92041015625, 289.0096130371094, 304.09881591796875, 319.1880187988281, 334.2772216796875, 349.366455078125, 364.4556579589844, 379.54486083984375, 394.63409423828125, 409.7232971191406, 424.8125, 439.9017028808594, 454.99090576171875, 470.0801086425781]}, "gradients/decoder.transformer.h.0.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 4.0, 0.0, 2.0, 3.0, 3.0, 4.0, 4.0, 3.0, 5.0, 10.0, 10.0, 13.0, 10.0, 18.0, 16.0, 14.0, 24.0, 16.0, 22.0, 22.0, 22.0, 24.0, 27.0, 22.0, 47.0, 32.0, 34.0, 36.0, 50.0, 31.0, 47.0, 37.0, 37.0, 31.0, 40.0, 22.0, 33.0, 23.0, 28.0, 18.0, 25.0, 32.0, 17.0, 13.0, 13.0, 16.0, 9.0, 8.0, 13.0, 5.0, 8.0, 4.0, 1.0, 2.0, 4.0, 2.0, 0.0, 2.0, 1.0, 1.0, 0.0, 2.0, 1.0], "bins": [-827.0, -799.7890625, -772.578125, -745.3671875, -718.15625, -690.9453125, -663.734375, -636.5234375, -609.3125, -582.1015625, -554.890625, -527.6796875, -500.46875, -473.2578125, -446.046875, -418.8359375, -391.625, -364.4140625, -337.203125, -309.9921875, -282.78125, -255.5703125, -228.359375, -201.1484375, -173.9375, -146.7265625, -119.515625, -92.3046875, -65.09375, -37.8828125, -10.671875, 16.5390625, 43.75, 70.9609375, 98.171875, 125.3828125, 152.59375, 179.8046875, 207.015625, 234.2265625, 261.4375, 288.6484375, 315.859375, 343.0703125, 370.28125, 397.4921875, 424.703125, 451.9140625, 479.125, 506.3359375, 533.546875, 560.7578125, 587.96875, 615.1796875, 642.390625, 669.6015625, 696.8125, 724.0234375, 751.234375, 778.4453125, 805.65625, 832.8671875, 860.078125, 887.2890625, 914.5]}, "gradients/decoder.transformer.h.0.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 3.0, 1.0, 2.0, 3.0, 3.0, 6.0, 9.0, 17.0, 18.0, 41.0, 51.0, 75.0, 102.0, 163.0, 244.0, 344.0, 546.0, 770.0, 1170.0, 1887.0, 2698.0, 4181.0, 6230.0, 9762.0, 15209.0, 23765.0, 38479.0, 65254.0, 125474.0, 291456.0, 217977.0, 98117.0, 54195.0, 32724.0, 20376.0, 12871.0, 8351.0, 5418.0, 3523.0, 2382.0, 1561.0, 992.0, 710.0, 456.0, 312.0, 196.0, 147.0, 99.0, 71.0, 51.0, 32.0, 22.0, 8.0, 5.0, 2.0, 5.0, 2.0, 0.0, 2.0, 1.0, 1.0, 0.0, 3.0], "bins": [-65.6875, -63.556640625, -61.42578125, -59.294921875, -57.1640625, -55.033203125, -52.90234375, -50.771484375, -48.640625, -46.509765625, -44.37890625, -42.248046875, -40.1171875, -37.986328125, -35.85546875, -33.724609375, -31.59375, -29.462890625, -27.33203125, -25.201171875, -23.0703125, -20.939453125, -18.80859375, -16.677734375, -14.546875, -12.416015625, -10.28515625, -8.154296875, -6.0234375, -3.892578125, -1.76171875, 0.369140625, 2.5, 4.630859375, 6.76171875, 8.892578125, 11.0234375, 13.154296875, 15.28515625, 17.416015625, 19.546875, 21.677734375, 23.80859375, 25.939453125, 28.0703125, 30.201171875, 32.33203125, 34.462890625, 36.59375, 38.724609375, 40.85546875, 42.986328125, 45.1171875, 47.248046875, 49.37890625, 51.509765625, 53.640625, 55.771484375, 57.90234375, 60.033203125, 62.1640625, 64.294921875, 66.42578125, 68.556640625, 70.6875]}, "gradients/decoder.transformer.h.0.crossattention.c_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 2.0, 2.0, 2.0, 1.0, 2.0, 3.0, 3.0, 2.0, 4.0, 5.0, 10.0, 15.0, 14.0, 16.0, 11.0, 20.0, 17.0, 22.0, 28.0, 22.0, 31.0, 27.0, 37.0, 28.0, 31.0, 28.0, 35.0, 34.0, 46.0, 1061.0, 40.0, 33.0, 27.0, 35.0, 28.0, 30.0, 40.0, 35.0, 33.0, 22.0, 23.0, 22.0, 17.0, 15.0, 15.0, 10.0, 9.0, 9.0, 9.0, 7.0, 6.0, 4.0, 6.0, 3.0, 1.0, 1.0, 3.0, 1.0, 2.0, 1.0], "bins": [-545.5, -529.08203125, -512.6640625, -496.24609375, -479.828125, -463.41015625, -446.9921875, -430.57421875, -414.15625, -397.73828125, -381.3203125, -364.90234375, -348.484375, -332.06640625, -315.6484375, -299.23046875, -282.8125, -266.39453125, -249.9765625, -233.55859375, -217.140625, -200.72265625, -184.3046875, -167.88671875, -151.46875, -135.05078125, -118.6328125, -102.21484375, -85.796875, -69.37890625, -52.9609375, -36.54296875, -20.125, -3.70703125, 12.7109375, 29.12890625, 45.546875, 61.96484375, 78.3828125, 94.80078125, 111.21875, 127.63671875, 144.0546875, 160.47265625, 176.890625, 193.30859375, 209.7265625, 226.14453125, 242.5625, 258.98046875, 275.3984375, 291.81640625, 308.234375, 324.65234375, 341.0703125, 357.48828125, 373.90625, 390.32421875, 406.7421875, 423.16015625, 439.578125, 455.99609375, 472.4140625, 488.83203125, 505.25]}, "gradients/decoder.transformer.h.0.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 2.0, 2.0, 3.0, 6.0, 8.0, 7.0, 13.0, 26.0, 25.0, 33.0, 60.0, 70.0, 117.0, 171.0, 234.0, 375.0, 513.0, 761.0, 1135.0, 1703.0, 2407.0, 3597.0, 5495.0, 8161.0, 12575.0, 19469.0, 30887.0, 50084.0, 84273.0, 167503.0, 1350585.0, 149558.0, 78165.0, 46749.0, 28718.0, 18246.0, 11985.0, 7712.0, 5021.0, 3510.0, 2327.0, 1546.0, 997.0, 697.0, 483.0, 379.0, 236.0, 160.0, 112.0, 76.0, 52.0, 34.0, 31.0, 16.0, 10.0, 11.0, 6.0, 5.0, 6.0, 1.0, 0.0, 1.0, 1.0], "bins": [-40.5625, -39.27197265625, -37.9814453125, -36.69091796875, -35.400390625, -34.10986328125, -32.8193359375, -31.52880859375, -30.23828125, -28.94775390625, -27.6572265625, -26.36669921875, -25.076171875, -23.78564453125, -22.4951171875, -21.20458984375, -19.9140625, -18.62353515625, -17.3330078125, -16.04248046875, -14.751953125, -13.46142578125, -12.1708984375, -10.88037109375, -9.58984375, -8.29931640625, -7.0087890625, -5.71826171875, -4.427734375, -3.13720703125, -1.8466796875, -0.55615234375, 0.734375, 2.02490234375, 3.3154296875, 4.60595703125, 5.896484375, 7.18701171875, 8.4775390625, 9.76806640625, 11.05859375, 12.34912109375, 13.6396484375, 14.93017578125, 16.220703125, 17.51123046875, 18.8017578125, 20.09228515625, 21.3828125, 22.67333984375, 23.9638671875, 25.25439453125, 26.544921875, 27.83544921875, 29.1259765625, 30.41650390625, 31.70703125, 32.99755859375, 34.2880859375, 35.57861328125, 36.869140625, 38.15966796875, 39.4501953125, 40.74072265625, 42.03125]}, "gradients/decoder.transformer.h.0.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 2.0, 0.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 3.0, 2.0, 3.0, 3.0, 2.0, 2.0, 1.0, 4.0, 7.0, 5.0, 9.0, 10.0, 12.0, 11.0, 26.0, 29.0, 36.0, 47.0, 91.0, 101.0, 121.0, 111.0, 99.0, 74.0, 48.0, 43.0, 16.0, 19.0, 13.0, 10.0, 9.0, 5.0, 9.0, 6.0, 1.0, 4.0, 0.0, 4.0, 1.0, 1.0, 3.0, 3.0, 2.0, 1.0, 1.0, 1.0, 0.0, 2.0, 3.0], "bins": [-0.5087890625, -0.4945869445800781, -0.48038482666015625, -0.4661827087402344, -0.4519805908203125, -0.4377784729003906, -0.42357635498046875, -0.4093742370605469, -0.395172119140625, -0.3809700012207031, -0.36676788330078125, -0.3525657653808594, -0.3383636474609375, -0.3241615295410156, -0.30995941162109375, -0.2957572937011719, -0.28155517578125, -0.2673530578613281, -0.25315093994140625, -0.23894882202148438, -0.2247467041015625, -0.21054458618164062, -0.19634246826171875, -0.18214035034179688, -0.167938232421875, -0.15373611450195312, -0.13953399658203125, -0.12533187866210938, -0.1111297607421875, -0.09692764282226562, -0.08272552490234375, -0.06852340698242188, -0.0543212890625, -0.040119171142578125, -0.02591705322265625, -0.011714935302734375, 0.0024871826171875, 0.016689300537109375, 0.03089141845703125, 0.045093536376953125, 0.059295654296875, 0.07349777221679688, 0.08769989013671875, 0.10190200805664062, 0.1161041259765625, 0.13030624389648438, 0.14450836181640625, 0.15871047973632812, 0.17291259765625, 0.18711471557617188, 0.20131683349609375, 0.21551895141601562, 0.2297210693359375, 0.24392318725585938, 0.25812530517578125, 0.2723274230957031, 0.286529541015625, 0.3007316589355469, 0.31493377685546875, 0.3291358947753906, 0.3433380126953125, 0.3575401306152344, 0.37174224853515625, 0.3859443664550781, 0.400146484375]}, "gradients/decoder.transformer.h.0.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 2.0, 2.0, 4.0, 3.0, 2.0, 3.0, 2.0, 7.0, 4.0, 12.0, 6.0, 8.0, 9.0, 9.0, 11.0, 16.0, 28.0, 26.0, 44.0, 47.0, 73.0, 102.0, 182.0, 540.0, 1796.0, 13625.0, 950091.0, 76527.0, 3769.0, 847.0, 296.0, 112.0, 78.0, 60.0, 51.0, 38.0, 24.0, 23.0, 14.0, 6.0, 11.0, 6.0, 10.0, 8.0, 3.0, 8.0, 5.0, 5.0, 4.0, 2.0, 1.0, 2.0, 4.0, 1.0, 1.0, 0.0, 1.0, 1.0], "bins": [-6.68359375, -6.47930908203125, -6.2750244140625, -6.07073974609375, -5.866455078125, -5.66217041015625, -5.4578857421875, -5.25360107421875, -5.04931640625, -4.84503173828125, -4.6407470703125, -4.43646240234375, -4.232177734375, -4.02789306640625, -3.8236083984375, -3.61932373046875, -3.4150390625, -3.21075439453125, -3.0064697265625, -2.80218505859375, -2.597900390625, -2.39361572265625, -2.1893310546875, -1.98504638671875, -1.78076171875, -1.57647705078125, -1.3721923828125, -1.16790771484375, -0.963623046875, -0.75933837890625, -0.5550537109375, -0.35076904296875, -0.146484375, 0.05780029296875, 0.2620849609375, 0.46636962890625, 0.670654296875, 0.87493896484375, 1.0792236328125, 1.28350830078125, 1.48779296875, 1.69207763671875, 1.8963623046875, 2.10064697265625, 2.304931640625, 2.50921630859375, 2.7135009765625, 2.91778564453125, 3.1220703125, 3.32635498046875, 3.5306396484375, 3.73492431640625, 3.939208984375, 4.14349365234375, 4.3477783203125, 4.55206298828125, 4.75634765625, 4.96063232421875, 5.1649169921875, 5.36920166015625, 5.573486328125, 5.77777099609375, 5.9820556640625, 6.18634033203125, 6.390625]}, "gradients/decoder.transformer.h.0.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 2.0, 2.0, 13.0, 73.0, 379.0, 403.0, 94.0, 22.0, 14.0, 4.0, 2.0, 3.0, 1.0, 0.0, 2.0, 0.0, 1.0], "bins": [-3.5265917778015137, -3.458897352218628, -3.391202926635742, -3.3235082626342773, -3.2558138370513916, -3.188119411468506, -3.120424747467041, -3.0527303218841553, -2.9850358963012695, -2.917341470718384, -2.849647045135498, -2.781952381134033, -2.7142579555511475, -2.6465635299682617, -2.578868865966797, -2.511174440383911, -2.4434800148010254, -2.3757855892181396, -2.308091163635254, -2.240396499633789, -2.1727020740509033, -2.1050076484680176, -2.0373129844665527, -1.969618558883667, -1.9019241333007812, -1.8342297077178955, -1.7665351629257202, -1.698840618133545, -1.6311461925506592, -1.5634517669677734, -1.4957572221755981, -1.4280626773834229, -1.360368013381958, -1.2926735877990723, -1.224979043006897, -1.1572844982147217, -1.089590072631836, -1.0218956470489502, -0.9542011022567749, -0.8865066170692444, -0.8188121318817139, -0.7511176466941833, -0.6834231615066528, -0.6157286763191223, -0.5480341911315918, -0.4803397059440613, -0.41264522075653076, -0.34495073556900024, -0.2772562503814697, -0.2095617651939392, -0.1418672800064087, -0.07417279481887817, -0.006478309631347656, 0.06121617555618286, 0.12891066074371338, 0.1966051459312439, 0.2642996311187744, 0.33199411630630493, 0.39968860149383545, 0.46738308668136597, 0.5350775718688965, 0.602772057056427, 0.6704665422439575, 0.738161027431488, 0.8058555126190186]}, "gradients/decoder.transformer.h.0.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 3.0, 1.0, 2.0, 2.0, 2.0, 3.0, 9.0, 1.0, 5.0, 9.0, 10.0, 7.0, 18.0, 7.0, 13.0, 18.0, 7.0, 20.0, 17.0, 19.0, 25.0, 28.0, 33.0, 30.0, 20.0, 48.0, 44.0, 22.0, 35.0, 41.0, 37.0, 29.0, 35.0, 39.0, 40.0, 27.0, 34.0, 31.0, 28.0, 21.0, 25.0, 23.0, 21.0, 15.0, 11.0, 20.0, 15.0, 15.0, 8.0, 10.0, 5.0, 6.0, 5.0, 4.0, 1.0, 4.0, 4.0, 5.0, 1.0, 3.0], "bins": [-0.38483625650405884, -0.3735042214393616, -0.3621721565723419, -0.35084012150764465, -0.3395080864429474, -0.32817602157592773, -0.31684398651123047, -0.3055119514465332, -0.29417988657951355, -0.2828478515148163, -0.27151578664779663, -0.26018375158309937, -0.2488517016172409, -0.23751965165138245, -0.22618761658668518, -0.21485556662082672, -0.20352353155612946, -0.192191481590271, -0.18085944652557373, -0.16952739655971527, -0.1581953465938568, -0.14686331152915955, -0.1355312615633011, -0.12419921159744263, -0.11286716908216476, -0.1015351265668869, -0.09020307660102844, -0.07887103408575058, -0.06753899157047272, -0.05620694160461426, -0.044874899089336395, -0.033542849123477936, -0.022210806608200073, -0.010878761298954487, 0.00045328401029109955, 0.011785328388214111, 0.023117374628782272, 0.03444942086935043, 0.045781463384628296, 0.057113513350486755, 0.06844555586576462, 0.07977759838104248, 0.09110964834690094, 0.1024416908621788, 0.11377373337745667, 0.12510578334331512, 0.13643783330917358, 0.14776986837387085, 0.1591019183397293, 0.17043396830558777, 0.18176600337028503, 0.1930980533361435, 0.20443010330200195, 0.21576213836669922, 0.22709418833255768, 0.23842623829841614, 0.2497582733631134, 0.26109030842781067, 0.2724223732948303, 0.2837544083595276, 0.29508644342422485, 0.3064185082912445, 0.3177505433559418, 0.3290826082229614, 0.3404146432876587]}, "gradients/decoder.transformer.h.0.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 4.0, 0.0, 2.0, 3.0, 3.0, 4.0, 4.0, 3.0, 5.0, 10.0, 10.0, 13.0, 10.0, 19.0, 14.0, 15.0, 24.0, 17.0, 20.0, 23.0, 22.0, 26.0, 25.0, 21.0, 45.0, 34.0, 35.0, 36.0, 49.0, 32.0, 48.0, 36.0, 37.0, 31.0, 40.0, 22.0, 33.0, 24.0, 27.0, 18.0, 25.0, 29.0, 20.0, 12.0, 14.0, 16.0, 9.0, 9.0, 12.0, 5.0, 8.0, 4.0, 1.0, 2.0, 4.0, 2.0, 0.0, 2.0, 1.0, 1.0, 0.0, 2.0, 1.0], "bins": [-827.0, -799.796875, -772.59375, -745.390625, -718.1875, -690.984375, -663.78125, -636.578125, -609.375, -582.171875, -554.96875, -527.765625, -500.5625, -473.359375, -446.15625, -418.953125, -391.75, -364.546875, -337.34375, -310.140625, -282.9375, -255.734375, -228.53125, -201.328125, -174.125, -146.921875, -119.71875, -92.515625, -65.3125, -38.109375, -10.90625, 16.296875, 43.5, 70.703125, 97.90625, 125.109375, 152.3125, 179.515625, 206.71875, 233.921875, 261.125, 288.328125, 315.53125, 342.734375, 369.9375, 397.140625, 424.34375, 451.546875, 478.75, 505.953125, 533.15625, 560.359375, 587.5625, 614.765625, 641.96875, 669.171875, 696.375, 723.578125, 750.78125, 777.984375, 805.1875, 832.390625, 859.59375, 886.796875, 914.0]}, "gradients/decoder.transformer.h.0.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 2.0, 3.0, 0.0, 3.0, 3.0, 4.0, 8.0, 8.0, 10.0, 14.0, 17.0, 23.0, 28.0, 37.0, 48.0, 61.0, 80.0, 90.0, 130.0, 195.0, 289.0, 444.0, 869.0, 1633.0, 3270.0, 7182.0, 15779.0, 37871.0, 106993.0, 359695.0, 345849.0, 101978.0, 36570.0, 15307.0, 6900.0, 3321.0, 1596.0, 817.0, 448.0, 259.0, 195.0, 120.0, 93.0, 65.0, 55.0, 49.0, 33.0, 27.0, 24.0, 15.0, 15.0, 15.0, 8.0, 7.0, 5.0, 4.0, 1.0, 4.0, 0.0, 3.0, 2.0], "bins": [-125.125, -121.3310546875, -117.537109375, -113.7431640625, -109.94921875, -106.1552734375, -102.361328125, -98.5673828125, -94.7734375, -90.9794921875, -87.185546875, -83.3916015625, -79.59765625, -75.8037109375, -72.009765625, -68.2158203125, -64.421875, -60.6279296875, -56.833984375, -53.0400390625, -49.24609375, -45.4521484375, -41.658203125, -37.8642578125, -34.0703125, -30.2763671875, -26.482421875, -22.6884765625, -18.89453125, -15.1005859375, -11.306640625, -7.5126953125, -3.71875, 0.0751953125, 3.869140625, 7.6630859375, 11.45703125, 15.2509765625, 19.044921875, 22.8388671875, 26.6328125, 30.4267578125, 34.220703125, 38.0146484375, 41.80859375, 45.6025390625, 49.396484375, 53.1904296875, 56.984375, 60.7783203125, 64.572265625, 68.3662109375, 72.16015625, 75.9541015625, 79.748046875, 83.5419921875, 87.3359375, 91.1298828125, 94.923828125, 98.7177734375, 102.51171875, 106.3056640625, 110.099609375, 113.8935546875, 117.6875]}, "gradients/decoder.transformer.h.0.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 2.0, 1.0, 2.0, 2.0, 1.0, 8.0, 5.0, 5.0, 11.0, 12.0, 19.0, 30.0, 46.0, 51.0, 64.0, 71.0, 77.0, 108.0, 2125.0, 81.0, 62.0, 72.0, 48.0, 46.0, 31.0, 28.0, 15.0, 12.0, 11.0, 6.0, 4.0, 2.0, 3.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2578.0, -2500.40625, -2422.8125, -2345.21875, -2267.625, -2190.03125, -2112.4375, -2034.84375, -1957.25, -1879.65625, -1802.0625, -1724.46875, -1646.875, -1569.28125, -1491.6875, -1414.09375, -1336.5, -1258.90625, -1181.3125, -1103.71875, -1026.125, -948.53125, -870.9375, -793.34375, -715.75, -638.15625, -560.5625, -482.96875, -405.375, -327.78125, -250.1875, -172.59375, -95.0, -17.40625, 60.1875, 137.78125, 215.375, 292.96875, 370.5625, 448.15625, 525.75, 603.34375, 680.9375, 758.53125, 836.125, 913.71875, 991.3125, 1068.90625, 1146.5, 1224.09375, 1301.6875, 1379.28125, 1456.875, 1534.46875, 1612.0625, 1689.65625, 1767.25, 1844.84375, 1922.4375, 2000.03125, 2077.625, 2155.21875, 2232.8125, 2310.40625, 2388.0]}, "gradients/decoder.transformer.h.0.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 2.0, 1.0, 3.0, 4.0, 1.0, 1.0, 6.0, 7.0, 6.0, 8.0, 12.0, 20.0, 15.0, 37.0, 51.0, 85.0, 145.0, 270.0, 479.0, 1134.0, 3847.0, 30664.0, 2923263.0, 172644.0, 9391.0, 1962.0, 767.0, 367.0, 187.0, 122.0, 69.0, 38.0, 23.0, 21.0, 16.0, 11.0, 11.0, 10.0, 4.0, 4.0, 2.0, 2.0, 5.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-393.25, -380.46484375, -367.6796875, -354.89453125, -342.109375, -329.32421875, -316.5390625, -303.75390625, -290.96875, -278.18359375, -265.3984375, -252.61328125, -239.828125, -227.04296875, -214.2578125, -201.47265625, -188.6875, -175.90234375, -163.1171875, -150.33203125, -137.546875, -124.76171875, -111.9765625, -99.19140625, -86.40625, -73.62109375, -60.8359375, -48.05078125, -35.265625, -22.48046875, -9.6953125, 3.08984375, 15.875, 28.66015625, 41.4453125, 54.23046875, 67.015625, 79.80078125, 92.5859375, 105.37109375, 118.15625, 130.94140625, 143.7265625, 156.51171875, 169.296875, 182.08203125, 194.8671875, 207.65234375, 220.4375, 233.22265625, 246.0078125, 258.79296875, 271.578125, 284.36328125, 297.1484375, 309.93359375, 322.71875, 335.50390625, 348.2890625, 361.07421875, 373.859375, 386.64453125, 399.4296875, 412.21484375, 425.0]}, "gradients/decoder.transformer.h.0.ln_1.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 6.0, 5.0, 3.0, 8.0, 16.0, 24.0, 36.0, 76.0, 156.0, 232.0, 195.0, 96.0, 63.0, 21.0, 21.0, 11.0, 10.0, 5.0, 6.0, 6.0, 3.0, 6.0, 1.0, 1.0, 2.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1138.414306640625, -1048.4727783203125, -958.5311279296875, -868.589599609375, -778.6480102539062, -688.7064208984375, -598.764892578125, -508.82330322265625, -418.8817138671875, -328.94012451171875, -238.99856567382812, -149.0570068359375, -59.11541748046875, 30.826171875, 120.7677001953125, 210.70928955078125, 300.65087890625, 390.59246826171875, 480.5340270996094, 570.4755859375, 660.4171752929688, 750.3587646484375, 840.30029296875, 930.2418823242188, 1020.1834716796875, 1110.125, 1200.066650390625, 1290.0081787109375, 1379.94970703125, 1469.891357421875, 1559.8328857421875, 1649.7744140625, 1739.7158203125, 1829.6573486328125, 1919.5989990234375, 2009.54052734375, 2099.482177734375, 2189.423828125, 2279.365234375, 2369.306884765625, 2459.24853515625, 2549.190185546875, 2639.131591796875, 2729.0732421875, 2819.014892578125, 2908.95654296875, 2998.89794921875, 3088.839599609375, 3178.781005859375, 3268.72265625, 3358.6640625, 3448.605712890625, 3538.54736328125, 3628.48876953125, 3718.430419921875, 3808.3720703125, 3898.3134765625, 3988.255126953125, 4078.196533203125, 4168.13818359375, 4258.07958984375, 4348.021484375, 4437.962890625, 4527.904296875, 4617.84619140625]}, "gradients/decoder.transformer.h.0.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 5.0, 2.0, 3.0, 5.0, 5.0, 7.0, 9.0, 7.0, 8.0, 16.0, 14.0, 11.0, 20.0, 14.0, 23.0, 33.0, 32.0, 29.0, 43.0, 36.0, 38.0, 40.0, 48.0, 55.0, 39.0, 47.0, 42.0, 35.0, 41.0, 34.0, 34.0, 26.0, 32.0, 21.0, 24.0, 25.0, 24.0, 13.0, 12.0, 15.0, 10.0, 10.0, 10.0, 5.0, 6.0, 1.0, 2.0, 4.0, 3.0, 0.0, 0.0, 1.0, 3.0], "bins": [-2362.6376953125, -2295.719482421875, -2228.801025390625, -2161.8828125, -2094.96435546875, -2028.046142578125, -1961.1278076171875, -1894.20947265625, -1827.291259765625, -1760.3729248046875, -1693.45458984375, -1626.536376953125, -1559.6180419921875, -1492.69970703125, -1425.7813720703125, -1358.863037109375, -1291.9447021484375, -1225.0263671875, -1158.1080322265625, -1091.189697265625, -1024.271484375, -957.3531494140625, -890.434814453125, -823.5164794921875, -756.5982055664062, -689.6798706054688, -622.7615966796875, -555.84326171875, -488.9249572753906, -422.00665283203125, -355.08831787109375, -288.1700134277344, -221.2518310546875, -154.33352661132812, -87.41520690917969, -20.49688720703125, 46.421417236328125, 113.3397216796875, 180.258056640625, 247.17636108398438, 314.09466552734375, 381.0129699707031, 447.9312744140625, 514.849609375, 581.7679443359375, 648.6862182617188, 715.6045532226562, 782.5228271484375, 849.441162109375, 916.3594970703125, 983.2777709960938, 1050.196044921875, 1117.1143798828125, 1184.03271484375, 1250.9510498046875, 1317.869384765625, 1384.78759765625, 1451.7059326171875, 1518.624267578125, 1585.54248046875, 1652.4608154296875, 1719.379150390625, 1786.2974853515625, 1853.2158203125, 1920.1341552734375]}, "gradients/decoder.transformer.wpe.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 2.0, 3.0, 4.0, 4.0, 1.0, 4.0, 0.0, 3.0, 6.0, 10.0, 4.0, 11.0, 12.0, 20.0, 20.0, 27.0, 37.0, 43.0, 57.0, 74.0, 105.0, 118.0, 173.0, 208.0, 241.0, 307.0, 388.0, 445.0, 706.0, 1042384.0, 852.0, 414.0, 361.0, 328.0, 249.0, 181.0, 168.0, 108.0, 121.0, 81.0, 60.0, 53.0, 43.0, 32.0, 21.0, 19.0, 21.0, 10.0, 10.0, 5.0, 4.0, 3.0, 1.0, 1.0, 2.0, 4.0, 3.0, 0.0, 0.0, 2.0], "bins": [-644.144287109375, -624.9339599609375, -605.7236328125, -586.5133056640625, -567.302978515625, -548.0926513671875, -528.88232421875, -509.6719970703125, -490.461669921875, -471.2513427734375, -452.041015625, -432.8306884765625, -413.620361328125, -394.4100341796875, -375.19970703125, -355.9893798828125, -336.779052734375, -317.5687255859375, -298.3583984375, -279.1480712890625, -259.937744140625, -240.7274169921875, -221.51708984375, -202.3067626953125, -183.096435546875, -163.8861083984375, -144.67578125, -125.4654541015625, -106.255126953125, -87.0447998046875, -67.83447265625, -48.6241455078125, -29.41387939453125, -10.20355224609375, 9.00677490234375, 28.21710205078125, 47.42742919921875, 66.63775634765625, 85.84808349609375, 105.05841064453125, 124.26873779296875, 143.47906494140625, 162.68939208984375, 181.89971923828125, 201.11004638671875, 220.32037353515625, 239.53070068359375, 258.74102783203125, 277.95135498046875, 297.16168212890625, 316.37200927734375, 335.58233642578125, 354.79266357421875, 374.00299072265625, 393.21331787109375, 412.42364501953125, 431.63397216796875, 450.84429931640625, 470.05462646484375, 489.26495361328125, 508.47528076171875, 527.6856079101562, 546.8959350585938, 566.1062622070312, 585.3165893554688]}, "gradients/decoder.transformer.wte.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 5.0, 7.0, 1.0, 3.0, 7.0, 7.0, 8.0, 21.0, 35.0, 139.0, 732.0, 51458740.0, 3073.0, 267.0, 45.0, 16.0, 11.0, 7.0, 8.0, 5.0, 7.0, 7.0, 4.0, 4.0, 2.0, 0.0, 2.0, 3.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6364.0, -6180.9150390625, -5997.82958984375, -5814.74462890625, -5631.6591796875, -5448.57421875, -5265.4892578125, -5082.404296875, -4899.31884765625, -4716.23388671875, -4533.1484375, -4350.0634765625, -4166.978515625, -3983.89306640625, -3800.80810546875, -3617.722900390625, -3434.6376953125, -3251.552490234375, -3068.46728515625, -2885.38232421875, -2702.297119140625, -2519.2119140625, -2336.126953125, -2153.041748046875, -1969.95654296875, -1786.871337890625, -1603.7862548828125, -1420.701171875, -1237.615966796875, -1054.53076171875, -871.4456787109375, -688.360595703125, -505.27490234375, -322.18975830078125, -139.1046142578125, 43.98052978515625, 227.065673828125, 410.15081787109375, 593.2359619140625, 776.321044921875, 959.40625, 1142.491455078125, 1325.5765380859375, 1508.66162109375, 1691.746826171875, 1874.83203125, 2057.9169921875, 2241.002197265625, 2424.08740234375, 2607.172607421875, 2790.2578125, 2973.3427734375, 3156.427978515625, 3339.51318359375, 3522.59814453125, 3705.683349609375, 3888.7685546875, 4071.853759765625, 4254.93896484375, 4438.02392578125, 4621.109375, 4804.1943359375, 4987.279296875, 5170.3642578125, 5353.44970703125]}, "gradients/encoder.adapter.layers.2.conv.weight": {"_type": "histogram", "values": [2.0, 2.0, 7.0, 14.0, 14.0, 23.0, 34.0, 76.0, 104.0, 144.0, 219.0, 313.0, 431.0, 673.0, 976.0, 1594.0, 2312.0, 3457.0, 4866.0, 7065.0, 10137.0, 14722.0, 21380.0, 31257.0, 47808.0, 71968.0, 109112.0, 164913.0, 258077.0, 434661.0, 3249519.0, 871175.0, 346338.0, 216355.0, 140969.0, 92797.0, 61147.0, 40785.0, 27524.0, 18464.0, 12398.0, 8620.0, 5959.0, 4147.0, 2892.0, 2040.0, 1316.0, 890.0, 587.0, 442.0, 245.0, 167.0, 100.0, 66.0, 59.0, 47.0, 15.0, 16.0, 5.0, 3.0, 4.0, 2.0, 1.0, 1.0], "bins": [-11.6640625, -11.285400390625, -10.90673828125, -10.528076171875, -10.1494140625, -9.770751953125, -9.39208984375, -9.013427734375, -8.634765625, -8.256103515625, -7.87744140625, -7.498779296875, -7.1201171875, -6.741455078125, -6.36279296875, -5.984130859375, -5.60546875, -5.226806640625, -4.84814453125, -4.469482421875, -4.0908203125, -3.712158203125, -3.33349609375, -2.954833984375, -2.576171875, -2.197509765625, -1.81884765625, -1.440185546875, -1.0615234375, -0.682861328125, -0.30419921875, 0.074462890625, 0.453125, 0.831787109375, 1.21044921875, 1.589111328125, 1.9677734375, 2.346435546875, 2.72509765625, 3.103759765625, 3.482421875, 3.861083984375, 4.23974609375, 4.618408203125, 4.9970703125, 5.375732421875, 5.75439453125, 6.133056640625, 6.51171875, 6.890380859375, 7.26904296875, 7.647705078125, 8.0263671875, 8.405029296875, 8.78369140625, 9.162353515625, 9.541015625, 9.919677734375, 10.29833984375, 10.677001953125, 11.0556640625, 11.434326171875, 11.81298828125, 12.191650390625, 12.5703125]}, "gradients/encoder.adapter.layers.2.conv.bias": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 5.0, 3.0, 5.0, 6.0, 6.0, 8.0, 3.0, 7.0, 8.0, 13.0, 13.0, 19.0, 16.0, 17.0, 24.0, 34.0, 29.0, 30.0, 29.0, 38.0, 35.0, 18.0, 39.0, 35.0, 69.0, 1003.0, 78.0, 46.0, 33.0, 36.0, 32.0, 31.0, 30.0, 46.0, 25.0, 17.0, 32.0, 21.0, 18.0, 10.0, 10.0, 12.0, 9.0, 6.0, 5.0, 8.0, 6.0, 2.0, 2.0, 0.0, 3.0, 4.0, 3.0, 2.0, 0.0, 1.0], "bins": [-249.75, -242.29296875, -234.8359375, -227.37890625, -219.921875, -212.46484375, -205.0078125, -197.55078125, -190.09375, -182.63671875, -175.1796875, -167.72265625, -160.265625, -152.80859375, -145.3515625, -137.89453125, -130.4375, -122.98046875, -115.5234375, -108.06640625, -100.609375, -93.15234375, -85.6953125, -78.23828125, -70.78125, -63.32421875, -55.8671875, -48.41015625, -40.953125, -33.49609375, -26.0390625, -18.58203125, -11.125, -3.66796875, 3.7890625, 11.24609375, 18.703125, 26.16015625, 33.6171875, 41.07421875, 48.53125, 55.98828125, 63.4453125, 70.90234375, 78.359375, 85.81640625, 93.2734375, 100.73046875, 108.1875, 115.64453125, 123.1015625, 130.55859375, 138.015625, 145.47265625, 152.9296875, 160.38671875, 167.84375, 175.30078125, 182.7578125, 190.21484375, 197.671875, 205.12890625, 212.5859375, 220.04296875, 227.5]}, "gradients/encoder.adapter.layers.1.conv.weight": {"_type": "histogram", "values": [3.0, 3.0, 8.0, 3.0, 12.0, 19.0, 29.0, 45.0, 57.0, 81.0, 128.0, 156.0, 253.0, 375.0, 548.0, 852.0, 1306.0, 2009.0, 3047.0, 4554.0, 6982.0, 11212.0, 17797.0, 28485.0, 45943.0, 75840.0, 127430.0, 218662.0, 384392.0, 986497.0, 3284066.0, 457390.0, 255694.0, 147315.0, 88187.0, 53487.0, 32636.0, 20339.0, 12713.0, 8007.0, 5199.0, 3333.0, 2109.0, 1367.0, 957.0, 625.0, 425.0, 299.0, 187.0, 116.0, 88.0, 48.0, 40.0, 33.0, 18.0, 14.0, 12.0, 9.0, 5.0, 5.0, 1.0, 2.0, 1.0, 1.0], "bins": [-13.0078125, -12.5762939453125, -12.144775390625, -11.7132568359375, -11.28173828125, -10.8502197265625, -10.418701171875, -9.9871826171875, -9.5556640625, -9.1241455078125, -8.692626953125, -8.2611083984375, -7.82958984375, -7.3980712890625, -6.966552734375, -6.5350341796875, -6.103515625, -5.6719970703125, -5.240478515625, -4.8089599609375, -4.37744140625, -3.9459228515625, -3.514404296875, -3.0828857421875, -2.6513671875, -2.2198486328125, -1.788330078125, -1.3568115234375, -0.92529296875, -0.4937744140625, -0.062255859375, 0.3692626953125, 0.80078125, 1.2322998046875, 1.663818359375, 2.0953369140625, 2.52685546875, 2.9583740234375, 3.389892578125, 3.8214111328125, 4.2529296875, 4.6844482421875, 5.115966796875, 5.5474853515625, 5.97900390625, 6.4105224609375, 6.842041015625, 7.2735595703125, 7.705078125, 8.1365966796875, 8.568115234375, 8.9996337890625, 9.43115234375, 9.8626708984375, 10.294189453125, 10.7257080078125, 11.1572265625, 11.5887451171875, 12.020263671875, 12.4517822265625, 12.88330078125, 13.3148193359375, 13.746337890625, 14.1778564453125, 14.609375]}, "gradients/encoder.adapter.layers.1.conv.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 3.0, 2.0, 7.0, 2.0, 10.0, 7.0, 9.0, 9.0, 17.0, 13.0, 11.0, 22.0, 36.0, 28.0, 21.0, 32.0, 35.0, 39.0, 45.0, 29.0, 46.0, 50.0, 649.0, 454.0, 57.0, 44.0, 47.0, 36.0, 32.0, 38.0, 34.0, 27.0, 22.0, 27.0, 17.0, 15.0, 15.0, 13.0, 9.0, 7.0, 3.0, 5.0, 3.0, 5.0, 2.0, 3.0, 2.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-236.5, -229.091796875, -221.68359375, -214.275390625, -206.8671875, -199.458984375, -192.05078125, -184.642578125, -177.234375, -169.826171875, -162.41796875, -155.009765625, -147.6015625, -140.193359375, -132.78515625, -125.376953125, -117.96875, -110.560546875, -103.15234375, -95.744140625, -88.3359375, -80.927734375, -73.51953125, -66.111328125, -58.703125, -51.294921875, -43.88671875, -36.478515625, -29.0703125, -21.662109375, -14.25390625, -6.845703125, 0.5625, 7.970703125, 15.37890625, 22.787109375, 30.1953125, 37.603515625, 45.01171875, 52.419921875, 59.828125, 67.236328125, 74.64453125, 82.052734375, 89.4609375, 96.869140625, 104.27734375, 111.685546875, 119.09375, 126.501953125, 133.91015625, 141.318359375, 148.7265625, 156.134765625, 163.54296875, 170.951171875, 178.359375, 185.767578125, 193.17578125, 200.583984375, 207.9921875, 215.400390625, 222.80859375, 230.216796875, 237.625]}, "gradients/encoder.adapter.layers.0.conv.weight": {"_type": "histogram", "values": [1.0, 3.0, 2.0, 4.0, 2.0, 2.0, 2.0, 4.0, 6.0, 12.0, 23.0, 21.0, 34.0, 35.0, 46.0, 63.0, 59.0, 67.0, 86.0, 161.0, 242.0, 331.0, 399.0, 609.0, 926.0, 1517.0, 2419.0, 4644.0, 9858.0, 26555.0, 82796.0, 5779369.0, 284438.0, 58881.0, 20009.0, 7806.0, 3799.0, 2000.0, 1256.0, 850.0, 563.0, 409.0, 314.0, 241.0, 136.0, 107.0, 74.0, 47.0, 61.0, 37.0, 36.0, 21.0, 25.0, 18.0, 9.0, 1.0, 2.0, 4.0, 5.0, 2.0, 3.0, 2.0, 0.0, 2.0], "bins": [-84.875, -82.2109375, -79.546875, -76.8828125, -74.21875, -71.5546875, -68.890625, -66.2265625, -63.5625, -60.8984375, -58.234375, -55.5703125, -52.90625, -50.2421875, -47.578125, -44.9140625, -42.25, -39.5859375, -36.921875, -34.2578125, -31.59375, -28.9296875, -26.265625, -23.6015625, -20.9375, -18.2734375, -15.609375, -12.9453125, -10.28125, -7.6171875, -4.953125, -2.2890625, 0.375, 3.0390625, 5.703125, 8.3671875, 11.03125, 13.6953125, 16.359375, 19.0234375, 21.6875, 24.3515625, 27.015625, 29.6796875, 32.34375, 35.0078125, 37.671875, 40.3359375, 43.0, 45.6640625, 48.328125, 50.9921875, 53.65625, 56.3203125, 58.984375, 61.6484375, 64.3125, 66.9765625, 69.640625, 72.3046875, 74.96875, 77.6328125, 80.296875, 82.9609375, 85.625]}, "gradients/encoder.adapter.layers.0.conv.bias": {"_type": "histogram", "values": [2.0, 0.0, 4.0, 2.0, 3.0, 4.0, 6.0, 6.0, 7.0, 5.0, 7.0, 8.0, 10.0, 13.0, 15.0, 16.0, 18.0, 18.0, 26.0, 24.0, 27.0, 31.0, 24.0, 27.0, 36.0, 39.0, 37.0, 31.0, 89.0, 984.0, 61.0, 33.0, 49.0, 39.0, 25.0, 44.0, 32.0, 27.0, 24.0, 27.0, 24.0, 22.0, 17.0, 15.0, 14.0, 15.0, 9.0, 9.0, 7.0, 6.0, 1.0, 8.0, 7.0, 4.0, 3.0, 3.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-160.5, -155.013671875, -149.52734375, -144.041015625, -138.5546875, -133.068359375, -127.58203125, -122.095703125, -116.609375, -111.123046875, -105.63671875, -100.150390625, -94.6640625, -89.177734375, -83.69140625, -78.205078125, -72.71875, -67.232421875, -61.74609375, -56.259765625, -50.7734375, -45.287109375, -39.80078125, -34.314453125, -28.828125, -23.341796875, -17.85546875, -12.369140625, -6.8828125, -1.396484375, 4.08984375, 9.576171875, 15.0625, 20.548828125, 26.03515625, 31.521484375, 37.0078125, 42.494140625, 47.98046875, 53.466796875, 58.953125, 64.439453125, 69.92578125, 75.412109375, 80.8984375, 86.384765625, 91.87109375, 97.357421875, 102.84375, 108.330078125, 113.81640625, 119.302734375, 124.7890625, 130.275390625, 135.76171875, 141.248046875, 146.734375, 152.220703125, 157.70703125, 163.193359375, 168.6796875, 174.166015625, 179.65234375, 185.138671875, 190.625]}, "gradients/encoder.encoder.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 3.0, 0.0, 6.0, 4.0, 18.0, 79.0, 654.0, 209.0, 22.0, 8.0, 8.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-761.0686645507812, -705.44970703125, -649.8306884765625, -594.2117309570312, -538.5927734375, -482.97381591796875, -427.3548278808594, -371.73583984375, -316.11688232421875, -260.4979248046875, -204.87893676757812, -149.2599639892578, -93.6409912109375, -38.02203369140625, 17.596954345703125, 73.2159423828125, 128.83489990234375, 184.45387268066406, 240.07284545898438, 295.69183349609375, 351.310791015625, 406.92974853515625, 462.5487365722656, 518.167724609375, 573.7866821289062, 629.4056396484375, 685.024658203125, 740.6436157226562, 796.2625732421875, 851.8815307617188, 907.50048828125, 963.1195068359375, 1018.738525390625, 1074.3575439453125, 1129.9764404296875, 1185.595458984375, 1241.21435546875, 1296.8333740234375, 1352.452392578125, 1408.0712890625, 1463.6903076171875, 1519.309326171875, 1574.92822265625, 1630.5472412109375, 1686.166259765625, 1741.78515625, 1797.4041748046875, 1853.023193359375, 1908.64208984375, 1964.2611083984375, 2019.8800048828125, 2075.4990234375, 2131.117919921875, 2186.73681640625, 2242.35595703125, 2297.974853515625, 2353.59375, 2409.212646484375, 2464.831787109375, 2520.45068359375, 2576.069580078125, 2631.6884765625, 2687.3076171875, 2742.926513671875, 2798.545654296875]}, "gradients/encoder.encoder.layer_norm.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 3.0, 3.0, 2.0, 1.0, 3.0, 6.0, 4.0, 7.0, 5.0, 10.0, 12.0, 15.0, 11.0, 18.0, 21.0, 31.0, 29.0, 32.0, 28.0, 38.0, 34.0, 51.0, 45.0, 34.0, 55.0, 46.0, 56.0, 44.0, 36.0, 40.0, 36.0, 43.0, 38.0, 26.0, 28.0, 22.0, 20.0, 23.0, 11.0, 10.0, 8.0, 4.0, 7.0, 7.0, 3.0, 3.0, 2.0, 1.0, 3.0, 1.0, 1.0, 1.0, 1.0], "bins": [-505.085205078125, -490.9626770019531, -476.84014892578125, -462.7176208496094, -448.5950927734375, -434.4725646972656, -420.35003662109375, -406.2275390625, -392.10498046875, -377.9824523925781, -363.85992431640625, -349.7373962402344, -335.6148681640625, -321.4923400878906, -307.36981201171875, -293.247314453125, -279.1247863769531, -265.00225830078125, -250.87973022460938, -236.7572021484375, -222.63467407226562, -208.51214599609375, -194.38963317871094, -180.26710510253906, -166.1445770263672, -152.0220489501953, -137.89952087402344, -123.7770004272461, -109.65447235107422, -95.53194427490234, -81.409423828125, -67.28689575195312, -53.16436767578125, -39.041839599609375, -24.919315338134766, -10.796791076660156, 3.3257369995117188, 17.448265075683594, 31.570785522460938, 45.69331359863281, 59.81584167480469, 73.93836975097656, 88.06089782714844, 102.18341827392578, 116.30594635009766, 130.428466796875, 144.55099487304688, 158.67352294921875, 172.79605102539062, 186.9185791015625, 201.04110717773438, 215.16363525390625, 229.28616333007812, 243.40869140625, 257.53118896484375, 271.65374755859375, 285.7762451171875, 299.8987731933594, 314.02130126953125, 328.1438293457031, 342.266357421875, 356.3888854980469, 370.51141357421875, 384.6339111328125, 398.7564697265625]}, "gradients/encoder.encoder.layers.23.feed_forward.output_dense.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 1.0, 3.0, 1.0, 0.0, 2.0, 1.0, 1.0, 5.0, 1.0, 3.0, 2.0, 5.0, 5.0, 9.0, 20.0, 13.0, 16.0, 33.0, 31.0, 50.0, 66.0, 81.0, 140.0, 195.0, 287.0, 457.0, 729.0, 1161.0, 2353.0, 6876.0, 26396.0, 211629.0, 3868292.0, 57389.0, 10193.0, 3538.0, 1689.0, 840.0, 564.0, 353.0, 215.0, 187.0, 127.0, 79.0, 59.0, 51.0, 38.0, 26.0, 26.0, 17.0, 9.0, 3.0, 9.0, 5.0, 7.0, 2.0, 4.0, 4.0, 1.0, 2.0], "bins": [-1.4326171875, -1.39306640625, -1.353515625, -1.31396484375, -1.2744140625, -1.23486328125, -1.1953125, -1.15576171875, -1.1162109375, -1.07666015625, -1.037109375, -0.99755859375, -0.9580078125, -0.91845703125, -0.87890625, -0.83935546875, -0.7998046875, -0.76025390625, -0.720703125, -0.68115234375, -0.6416015625, -0.60205078125, -0.5625, -0.52294921875, -0.4833984375, -0.44384765625, -0.404296875, -0.36474609375, -0.3251953125, -0.28564453125, -0.24609375, -0.20654296875, -0.1669921875, -0.12744140625, -0.087890625, -0.04833984375, -0.0087890625, 0.03076171875, 0.0703125, 0.10986328125, 0.1494140625, 0.18896484375, 0.228515625, 0.26806640625, 0.3076171875, 0.34716796875, 0.38671875, 0.42626953125, 0.4658203125, 0.50537109375, 0.544921875, 0.58447265625, 0.6240234375, 0.66357421875, 0.703125, 0.74267578125, 0.7822265625, 0.82177734375, 0.861328125, 0.90087890625, 0.9404296875, 0.97998046875, 1.01953125, 1.05908203125, 1.0986328125]}, "gradients/encoder.encoder.layers.23.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 3.0, 1.0, 2.0, 5.0, 4.0, 5.0, 7.0, 3.0, 10.0, 7.0, 18.0, 16.0, 15.0, 14.0, 46.0, 753.0, 16.0, 12.0, 11.0, 15.0, 6.0, 11.0, 5.0, 5.0, 5.0, 4.0, 3.0, 2.0, 1.0, 0.0, 2.0, 2.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 2.0], "bins": [-1.7255859375, -1.6790618896484375, -1.632537841796875, -1.5860137939453125, -1.53948974609375, -1.4929656982421875, -1.446441650390625, -1.3999176025390625, -1.3533935546875, -1.3068695068359375, -1.260345458984375, -1.2138214111328125, -1.16729736328125, -1.1207733154296875, -1.074249267578125, -1.0277252197265625, -0.981201171875, -0.9346771240234375, -0.888153076171875, -0.8416290283203125, -0.79510498046875, -0.7485809326171875, -0.702056884765625, -0.6555328369140625, -0.6090087890625, -0.5624847412109375, -0.515960693359375, -0.4694366455078125, -0.42291259765625, -0.3763885498046875, -0.329864501953125, -0.2833404541015625, -0.23681640625, -0.1902923583984375, -0.143768310546875, -0.0972442626953125, -0.05072021484375, -0.0041961669921875, 0.042327880859375, 0.0888519287109375, 0.1353759765625, 0.1819000244140625, 0.228424072265625, 0.2749481201171875, 0.32147216796875, 0.3679962158203125, 0.414520263671875, 0.4610443115234375, 0.507568359375, 0.5540924072265625, 0.600616455078125, 0.6471405029296875, 0.69366455078125, 0.7401885986328125, 0.786712646484375, 0.8332366943359375, 0.8797607421875, 0.9262847900390625, 0.972808837890625, 1.0193328857421875, 1.06585693359375, 1.1123809814453125, 1.158905029296875, 1.2054290771484375, 1.251953125]}, "gradients/encoder.encoder.layers.23.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 4.0, 1.0, 0.0, 1.0, 4.0, 6.0, 11.0, 4.0, 9.0, 4.0, 16.0, 23.0, 49.0, 75.0, 138.0, 268.0, 404.0, 783.0, 1890.0, 6363.0, 40366.0, 3853301.0, 267262.0, 16978.0, 3675.0, 1274.0, 582.0, 319.0, 182.0, 106.0, 71.0, 30.0, 27.0, 24.0, 16.0, 7.0, 3.0, 4.0, 6.0, 3.0, 1.0, 2.0, 0.0, 4.0, 0.0, 0.0, 1.0, 3.0, 1.0, 0.0, 1.0], "bins": [-2.970703125, -2.88525390625, -2.7998046875, -2.71435546875, -2.62890625, -2.54345703125, -2.4580078125, -2.37255859375, -2.287109375, -2.20166015625, -2.1162109375, -2.03076171875, -1.9453125, -1.85986328125, -1.7744140625, -1.68896484375, -1.603515625, -1.51806640625, -1.4326171875, -1.34716796875, -1.26171875, -1.17626953125, -1.0908203125, -1.00537109375, -0.919921875, -0.83447265625, -0.7490234375, -0.66357421875, -0.578125, -0.49267578125, -0.4072265625, -0.32177734375, -0.236328125, -0.15087890625, -0.0654296875, 0.02001953125, 0.10546875, 0.19091796875, 0.2763671875, 0.36181640625, 0.447265625, 0.53271484375, 0.6181640625, 0.70361328125, 0.7890625, 0.87451171875, 0.9599609375, 1.04541015625, 1.130859375, 1.21630859375, 1.3017578125, 1.38720703125, 1.47265625, 1.55810546875, 1.6435546875, 1.72900390625, 1.814453125, 1.89990234375, 1.9853515625, 2.07080078125, 2.15625, 2.24169921875, 2.3271484375, 2.41259765625, 2.498046875]}, "gradients/encoder.encoder.layers.23.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 3.0, 5.0, 6.0, 13.0, 11.0, 27.0, 27.0, 57.0, 171.0, 999.0, 2127.0, 358.0, 111.0, 59.0, 35.0, 33.0, 19.0, 6.0, 7.0, 3.0, 3.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.375, -1.3224945068359375, -1.269989013671875, -1.2174835205078125, -1.16497802734375, -1.1124725341796875, -1.059967041015625, -1.0074615478515625, -0.9549560546875, -0.9024505615234375, -0.849945068359375, -0.7974395751953125, -0.74493408203125, -0.6924285888671875, -0.639923095703125, -0.5874176025390625, -0.534912109375, -0.4824066162109375, -0.429901123046875, -0.3773956298828125, -0.32489013671875, -0.2723846435546875, -0.219879150390625, -0.1673736572265625, -0.1148681640625, -0.0623626708984375, -0.009857177734375, 0.0426483154296875, 0.09515380859375, 0.1476593017578125, 0.200164794921875, 0.2526702880859375, 0.30517578125, 0.3576812744140625, 0.410186767578125, 0.4626922607421875, 0.51519775390625, 0.5677032470703125, 0.620208740234375, 0.6727142333984375, 0.7252197265625, 0.7777252197265625, 0.830230712890625, 0.8827362060546875, 0.93524169921875, 0.9877471923828125, 1.040252685546875, 1.0927581787109375, 1.145263671875, 1.1977691650390625, 1.250274658203125, 1.3027801513671875, 1.35528564453125, 1.4077911376953125, 1.460296630859375, 1.5128021240234375, 1.5653076171875, 1.6178131103515625, 1.670318603515625, 1.7228240966796875, 1.77532958984375, 1.8278350830078125, 1.880340576171875, 1.9328460693359375, 1.9853515625]}, "gradients/encoder.encoder.layers.23.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 3.0, 6.0, 19.0, 99.0, 568.0, 225.0, 41.0, 18.0, 13.0, 5.0, 1.0, 2.0, 1.0, 2.0, 2.0, 2.0, 2.0, 0.0, 2.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.887177467346191, -5.6007080078125, -5.314238548278809, -5.027769565582275, -4.741300106048584, -4.454830646514893, -4.168361663818359, -3.881892204284668, -3.5954227447509766, -3.308953285217285, -3.022484064102173, -2.7360148429870605, -2.449545383453369, -2.1630759239196777, -1.8766067028045654, -1.5901374816894531, -1.3036680221557617, -1.0171986818313599, -0.730729341506958, -0.44426000118255615, -0.1577906608581543, 0.12867867946624756, 0.4151480197906494, 0.7016172409057617, 0.9880867004394531, 1.274556040763855, 1.5610253810882568, 1.8474947214126587, 2.1339640617370605, 2.420433521270752, 2.7069027423858643, 2.9933719635009766, 3.2798423767089844, 3.566311836242676, 3.852781057357788, 4.1392502784729, 4.425719738006592, 4.712189197540283, 4.998658180236816, 5.285127639770508, 5.571597099304199, 5.858066558837891, 6.144536018371582, 6.431005001068115, 6.717474460601807, 7.003943920135498, 7.290412902832031, 7.576882362365723, 7.863351821899414, 8.149821281433105, 8.436290740966797, 8.722760200500488, 9.00922966003418, 9.295698165893555, 9.582167625427246, 9.868637084960938, 10.155106544494629, 10.44157600402832, 10.728045463562012, 11.014514923095703, 11.300983428955078, 11.58745288848877, 11.873922348022461, 12.160391807556152, 12.446861267089844]}, "gradients/encoder.encoder.layers.23.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 4.0, 2.0, 1.0, 6.0, 3.0, 5.0, 6.0, 10.0, 11.0, 16.0, 19.0, 28.0, 29.0, 29.0, 38.0, 40.0, 47.0, 60.0, 68.0, 74.0, 64.0, 65.0, 62.0, 48.0, 49.0, 45.0, 30.0, 32.0, 23.0, 26.0, 15.0, 10.0, 9.0, 5.0, 8.0, 10.0, 6.0, 1.0, 2.0, 2.0, 3.0, 0.0, 2.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.8867177963256836, -3.758209466934204, -3.6297011375427246, -3.501192569732666, -3.3726842403411865, -3.244175910949707, -3.1156673431396484, -2.987159013748169, -2.8586506843566895, -2.73014235496521, -2.6016340255737305, -2.473125457763672, -2.3446171283721924, -2.216108798980713, -2.0876002311706543, -1.9590919017791748, -1.8305835723876953, -1.7020752429962158, -1.5735667943954468, -1.4450583457946777, -1.3165500164031982, -1.1880416870117188, -1.0595332384109497, -0.9310248494148254, -0.8025164604187012, -0.6740080714225769, -0.5454996824264526, -0.41699129343032837, -0.2884829044342041, -0.15997451543807983, -0.031466126441955566, 0.0970422625541687, 0.22555017471313477, 0.35405856370925903, 0.4825669527053833, 0.6110753417015076, 0.7395837306976318, 0.8680921196937561, 0.9966005086898804, 1.1251089572906494, 1.253617286682129, 1.3821256160736084, 1.5106340646743774, 1.6391425132751465, 1.767650842666626, 1.8961591720581055, 2.024667739868164, 2.1531760692596436, 2.281684398651123, 2.4101927280426025, 2.538701057434082, 2.6672096252441406, 2.79571795463562, 2.9242262840270996, 3.052734851837158, 3.1812431812286377, 3.309751510620117, 3.4382598400115967, 3.566768169403076, 3.6952767372131348, 3.8237850666046143, 3.9522933959960938, 4.080801963806152, 4.209310054779053, 4.337818622589111]}, "gradients/encoder.encoder.layers.23.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 3.0, 2.0, 1.0, 4.0, 7.0, 10.0, 19.0, 22.0, 29.0, 37.0, 51.0, 73.0, 114.0, 151.0, 257.0, 331.0, 531.0, 945.0, 1522.0, 2851.0, 5412.0, 11555.0, 30966.0, 899650.0, 58858.0, 18227.0, 7822.0, 3773.0, 2066.0, 1180.0, 721.0, 459.0, 308.0, 194.0, 130.0, 97.0, 66.0, 36.0, 25.0, 17.0, 12.0, 11.0, 5.0, 6.0, 2.0, 4.0, 3.0, 2.0, 3.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.99609375, -6.76092529296875, -6.5257568359375, -6.29058837890625, -6.055419921875, -5.82025146484375, -5.5850830078125, -5.34991455078125, -5.11474609375, -4.87957763671875, -4.6444091796875, -4.40924072265625, -4.174072265625, -3.93890380859375, -3.7037353515625, -3.46856689453125, -3.2333984375, -2.99822998046875, -2.7630615234375, -2.52789306640625, -2.292724609375, -2.05755615234375, -1.8223876953125, -1.58721923828125, -1.35205078125, -1.11688232421875, -0.8817138671875, -0.64654541015625, -0.411376953125, -0.17620849609375, 0.0589599609375, 0.29412841796875, 0.529296875, 0.76446533203125, 0.9996337890625, 1.23480224609375, 1.469970703125, 1.70513916015625, 1.9403076171875, 2.17547607421875, 2.41064453125, 2.64581298828125, 2.8809814453125, 3.11614990234375, 3.351318359375, 3.58648681640625, 3.8216552734375, 4.05682373046875, 4.2919921875, 4.52716064453125, 4.7623291015625, 4.99749755859375, 5.232666015625, 5.46783447265625, 5.7030029296875, 5.93817138671875, 6.17333984375, 6.40850830078125, 6.6436767578125, 6.87884521484375, 7.114013671875, 7.34918212890625, 7.5843505859375, 7.81951904296875, 8.0546875]}, "gradients/encoder.encoder.layers.23.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 2.0, 2.0, 3.0, 3.0, 7.0, 4.0, 6.0, 6.0, 8.0, 8.0, 12.0, 19.0, 18.0, 11.0, 100.0, 697.0, 19.0, 12.0, 13.0, 12.0, 7.0, 11.0, 5.0, 6.0, 4.0, 4.0, 2.0, 4.0, 0.0, 1.0, 2.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 2.0], "bins": [-1.701171875, -1.6553802490234375, -1.609588623046875, -1.5637969970703125, -1.51800537109375, -1.4722137451171875, -1.426422119140625, -1.3806304931640625, -1.3348388671875, -1.2890472412109375, -1.243255615234375, -1.1974639892578125, -1.15167236328125, -1.1058807373046875, -1.060089111328125, -1.0142974853515625, -0.968505859375, -0.9227142333984375, -0.876922607421875, -0.8311309814453125, -0.78533935546875, -0.7395477294921875, -0.693756103515625, -0.6479644775390625, -0.6021728515625, -0.5563812255859375, -0.510589599609375, -0.4647979736328125, -0.41900634765625, -0.3732147216796875, -0.327423095703125, -0.2816314697265625, -0.23583984375, -0.1900482177734375, -0.144256591796875, -0.0984649658203125, -0.05267333984375, -0.0068817138671875, 0.038909912109375, 0.0847015380859375, 0.1304931640625, 0.1762847900390625, 0.222076416015625, 0.2678680419921875, 0.31365966796875, 0.3594512939453125, 0.405242919921875, 0.4510345458984375, 0.496826171875, 0.5426177978515625, 0.588409423828125, 0.6342010498046875, 0.67999267578125, 0.7257843017578125, 0.771575927734375, 0.8173675537109375, 0.8631591796875, 0.9089508056640625, 0.954742431640625, 1.0005340576171875, 1.04632568359375, 1.0921173095703125, 1.137908935546875, 1.1837005615234375, 1.2294921875]}, "gradients/encoder.encoder.layers.23.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 2.0, 3.0, 3.0, 2.0, 8.0, 7.0, 10.0, 18.0, 23.0, 29.0, 30.0, 49.0, 67.0, 83.0, 117.0, 165.0, 219.0, 342.0, 560.0, 1119.0, 2521.0, 6407.0, 19815.0, 70906.0, 411260.0, 429807.0, 72548.0, 20486.0, 6534.0, 2475.0, 1051.0, 612.0, 388.0, 237.0, 181.0, 143.0, 92.0, 68.0, 52.0, 32.0, 29.0, 21.0, 14.0, 11.0, 6.0, 8.0, 3.0, 3.0, 0.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.86328125, -3.742584228515625, -3.62188720703125, -3.501190185546875, -3.3804931640625, -3.259796142578125, -3.13909912109375, -3.018402099609375, -2.897705078125, -2.777008056640625, -2.65631103515625, -2.535614013671875, -2.4149169921875, -2.294219970703125, -2.17352294921875, -2.052825927734375, -1.93212890625, -1.811431884765625, -1.69073486328125, -1.570037841796875, -1.4493408203125, -1.328643798828125, -1.20794677734375, -1.087249755859375, -0.966552734375, -0.845855712890625, -0.72515869140625, -0.604461669921875, -0.4837646484375, -0.363067626953125, -0.24237060546875, -0.121673583984375, -0.0009765625, 0.119720458984375, 0.24041748046875, 0.361114501953125, 0.4818115234375, 0.602508544921875, 0.72320556640625, 0.843902587890625, 0.964599609375, 1.085296630859375, 1.20599365234375, 1.326690673828125, 1.4473876953125, 1.568084716796875, 1.68878173828125, 1.809478759765625, 1.93017578125, 2.050872802734375, 2.17156982421875, 2.292266845703125, 2.4129638671875, 2.533660888671875, 2.65435791015625, 2.775054931640625, 2.895751953125, 3.016448974609375, 3.13714599609375, 3.257843017578125, 3.3785400390625, 3.499237060546875, 3.61993408203125, 3.740631103515625, 3.861328125]}, "gradients/encoder.encoder.layers.23.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 3.0, 2.0, 3.0, 3.0, 3.0, 4.0, 9.0, 6.0, 8.0, 9.0, 12.0, 9.0, 15.0, 25.0, 32.0, 23.0, 31.0, 30.0, 27.0, 32.0, 42.0, 49.0, 33.0, 35.0, 57.0, 48.0, 43.0, 42.0, 36.0, 49.0, 29.0, 24.0, 33.0, 32.0, 27.0, 25.0, 27.0, 24.0, 17.0, 12.0, 9.0, 6.0, 8.0, 6.0, 7.0, 5.0, 3.0, 3.0, 0.0, 0.0, 1.0, 0.0, 2.0], "bins": [-5.05078125, -4.909576416015625, -4.76837158203125, -4.627166748046875, -4.4859619140625, -4.344757080078125, -4.20355224609375, -4.062347412109375, -3.921142578125, -3.779937744140625, -3.63873291015625, -3.497528076171875, -3.3563232421875, -3.215118408203125, -3.07391357421875, -2.932708740234375, -2.79150390625, -2.650299072265625, -2.50909423828125, -2.367889404296875, -2.2266845703125, -2.085479736328125, -1.94427490234375, -1.803070068359375, -1.661865234375, -1.520660400390625, -1.37945556640625, -1.238250732421875, -1.0970458984375, -0.955841064453125, -0.81463623046875, -0.673431396484375, -0.5322265625, -0.391021728515625, -0.24981689453125, -0.108612060546875, 0.0325927734375, 0.173797607421875, 0.31500244140625, 0.456207275390625, 0.597412109375, 0.738616943359375, 0.87982177734375, 1.021026611328125, 1.1622314453125, 1.303436279296875, 1.44464111328125, 1.585845947265625, 1.72705078125, 1.868255615234375, 2.00946044921875, 2.150665283203125, 2.2918701171875, 2.433074951171875, 2.57427978515625, 2.715484619140625, 2.856689453125, 2.997894287109375, 3.13909912109375, 3.280303955078125, 3.4215087890625, 3.562713623046875, 3.70391845703125, 3.845123291015625, 3.986328125]}, "gradients/encoder.encoder.layers.23.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 0.0, 1.0, 5.0, 4.0, 6.0, 6.0, 16.0, 24.0, 32.0, 60.0, 109.0, 226.0, 588.0, 3062.0, 131591.0, 907127.0, 4392.0, 795.0, 230.0, 120.0, 67.0, 38.0, 16.0, 19.0, 10.0, 4.0, 2.0, 5.0, 2.0, 1.0, 4.0, 2.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.30859375, -6.10565185546875, -5.9027099609375, -5.69976806640625, -5.496826171875, -5.29388427734375, -5.0909423828125, -4.88800048828125, -4.68505859375, -4.48211669921875, -4.2791748046875, -4.07623291015625, -3.873291015625, -3.67034912109375, -3.4674072265625, -3.26446533203125, -3.0615234375, -2.85858154296875, -2.6556396484375, -2.45269775390625, -2.249755859375, -2.04681396484375, -1.8438720703125, -1.64093017578125, -1.43798828125, -1.23504638671875, -1.0321044921875, -0.82916259765625, -0.626220703125, -0.42327880859375, -0.2203369140625, -0.01739501953125, 0.185546875, 0.38848876953125, 0.5914306640625, 0.79437255859375, 0.997314453125, 1.20025634765625, 1.4031982421875, 1.60614013671875, 1.80908203125, 2.01202392578125, 2.2149658203125, 2.41790771484375, 2.620849609375, 2.82379150390625, 3.0267333984375, 3.22967529296875, 3.4326171875, 3.63555908203125, 3.8385009765625, 4.04144287109375, 4.244384765625, 4.44732666015625, 4.6502685546875, 4.85321044921875, 5.05615234375, 5.25909423828125, 5.4620361328125, 5.66497802734375, 5.867919921875, 6.07086181640625, 6.2738037109375, 6.47674560546875, 6.6796875]}, "gradients/encoder.encoder.layers.23.attention.k_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 3.0, 0.0, 0.0, 1.0, 7.0, 2.0, 4.0, 7.0, 5.0, 7.0, 17.0, 25.0, 32.0, 78.0, 134.0, 228.0, 211.0, 111.0, 57.0, 32.0, 17.0, 8.0, 9.0, 4.0, 6.0, 3.0, 1.0, 2.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00019419193267822266, -0.00018637441098690033, -0.000178556889295578, -0.00017073936760425568, -0.00016292184591293335, -0.00015510432422161102, -0.0001472868025302887, -0.00013946928083896637, -0.00013165175914764404, -0.00012383423745632172, -0.00011601671576499939, -0.00010819919407367706, -0.00010038167238235474, -9.256415069103241e-05, -8.474662899971008e-05, -7.692910730838776e-05, -6.911158561706543e-05, -6.12940639257431e-05, -5.3476542234420776e-05, -4.565902054309845e-05, -3.784149885177612e-05, -3.0023977160453796e-05, -2.220645546913147e-05, -1.4388933777809143e-05, -6.571412086486816e-06, 1.2461096048355103e-06, 9.063631296157837e-06, 1.6881152987480164e-05, 2.469867467880249e-05, 3.251619637012482e-05, 4.0333718061447144e-05, 4.815123975276947e-05, 5.59687614440918e-05, 6.378628313541412e-05, 7.160380482673645e-05, 7.942132651805878e-05, 8.72388482093811e-05, 9.505636990070343e-05, 0.00010287389159202576, 0.00011069141328334808, 0.00011850893497467041, 0.00012632645666599274, 0.00013414397835731506, 0.0001419615000486374, 0.00014977902173995972, 0.00015759654343128204, 0.00016541406512260437, 0.0001732315868139267, 0.00018104910850524902, 0.00018886663019657135, 0.00019668415188789368, 0.000204501673579216, 0.00021231919527053833, 0.00022013671696186066, 0.00022795423865318298, 0.0002357717603445053, 0.00024358928203582764, 0.00025140680372714996, 0.0002592243254184723, 0.0002670418471097946, 0.00027485936880111694, 0.00028267689049243927, 0.0002904944121837616, 0.0002983119338750839, 0.00030612945556640625]}, "gradients/encoder.encoder.layers.23.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 3.0, 6.0, 8.0, 12.0, 23.0, 49.0, 62.0, 140.0, 275.0, 618.0, 1748.0, 8358.0, 126964.0, 878715.0, 26208.0, 3461.0, 1064.0, 420.0, 191.0, 90.0, 47.0, 40.0, 17.0, 11.0, 7.0, 6.0, 10.0, 1.0, 4.0, 1.0, 2.0, 0.0, 0.0, 1.0, 3.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.900390625, -2.807586669921875, -2.71478271484375, -2.621978759765625, -2.5291748046875, -2.436370849609375, -2.34356689453125, -2.250762939453125, -2.157958984375, -2.065155029296875, -1.97235107421875, -1.879547119140625, -1.7867431640625, -1.693939208984375, -1.60113525390625, -1.508331298828125, -1.41552734375, -1.322723388671875, -1.22991943359375, -1.137115478515625, -1.0443115234375, -0.951507568359375, -0.85870361328125, -0.765899658203125, -0.673095703125, -0.580291748046875, -0.48748779296875, -0.394683837890625, -0.3018798828125, -0.209075927734375, -0.11627197265625, -0.023468017578125, 0.0693359375, 0.162139892578125, 0.25494384765625, 0.347747802734375, 0.4405517578125, 0.533355712890625, 0.62615966796875, 0.718963623046875, 0.811767578125, 0.904571533203125, 0.99737548828125, 1.090179443359375, 1.1829833984375, 1.275787353515625, 1.36859130859375, 1.461395263671875, 1.55419921875, 1.647003173828125, 1.73980712890625, 1.832611083984375, 1.9254150390625, 2.018218994140625, 2.11102294921875, 2.203826904296875, 2.296630859375, 2.389434814453125, 2.48223876953125, 2.575042724609375, 2.6678466796875, 2.760650634765625, 2.85345458984375, 2.946258544921875, 3.0390625]}, "gradients/encoder.encoder.layers.23.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 2.0, 3.0, 3.0, 6.0, 4.0, 5.0, 17.0, 23.0, 20.0, 35.0, 41.0, 76.0, 121.0, 159.0, 124.0, 124.0, 71.0, 50.0, 36.0, 33.0, 18.0, 17.0, 5.0, 10.0, 2.0, 4.0, 1.0, 1.0, 1.0, 0.0, 1.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.7314453125, -1.6410980224609375, -1.550750732421875, -1.4604034423828125, -1.37005615234375, -1.2797088623046875, -1.189361572265625, -1.0990142822265625, -1.0086669921875, -0.9183197021484375, -0.827972412109375, -0.7376251220703125, -0.64727783203125, -0.5569305419921875, -0.466583251953125, -0.3762359619140625, -0.285888671875, -0.1955413818359375, -0.105194091796875, -0.0148468017578125, 0.07550048828125, 0.1658477783203125, 0.256195068359375, 0.3465423583984375, 0.4368896484375, 0.5272369384765625, 0.617584228515625, 0.7079315185546875, 0.79827880859375, 0.8886260986328125, 0.978973388671875, 1.0693206787109375, 1.15966796875, 1.2500152587890625, 1.340362548828125, 1.4307098388671875, 1.52105712890625, 1.6114044189453125, 1.701751708984375, 1.7920989990234375, 1.8824462890625, 1.9727935791015625, 2.063140869140625, 2.1534881591796875, 2.24383544921875, 2.3341827392578125, 2.424530029296875, 2.5148773193359375, 2.605224609375, 2.6955718994140625, 2.785919189453125, 2.8762664794921875, 2.96661376953125, 3.0569610595703125, 3.147308349609375, 3.2376556396484375, 3.3280029296875, 3.4183502197265625, 3.508697509765625, 3.5990447998046875, 3.68939208984375, 3.7797393798828125, 3.870086669921875, 3.9604339599609375, 4.05078125]}, "gradients/encoder.encoder.layers.23.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 2.0, 4.0, 7.0, 3.0, 9.0, 19.0, 66.0, 228.0, 403.0, 146.0, 49.0, 32.0, 17.0, 12.0, 4.0, 4.0, 3.0, 3.0, 1.0, 3.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-50.96891784667969, -49.4395751953125, -47.91023254394531, -46.38089370727539, -44.8515510559082, -43.322208404541016, -41.79286575317383, -40.263526916503906, -38.73418426513672, -37.20484161376953, -35.675498962402344, -34.14616012573242, -32.616817474365234, -31.087474822998047, -29.55813217163086, -28.028791427612305, -26.499448776245117, -24.97010612487793, -23.440765380859375, -21.911422729492188, -20.382081985473633, -18.852739334106445, -17.32339859008789, -15.794055938720703, -14.264714241027832, -12.735372543334961, -11.20603084564209, -9.676689147949219, -8.147346496582031, -6.618005275726318, -5.088663101196289, -3.559321403503418, -2.029979705810547, -0.5006378889083862, 1.0287039279937744, 2.5580458641052246, 4.087387561798096, 5.616729259490967, 7.146071434020996, 8.675413131713867, 10.204754829406738, 11.73409652709961, 13.26343822479248, 14.792779922485352, 16.32212257385254, 17.851463317871094, 19.38080596923828, 20.91014862060547, 22.439489364624023, 23.96883201599121, 25.498172760009766, 27.027515411376953, 28.556856155395508, 30.086198806762695, 31.61553955078125, 33.14488220214844, 34.674224853515625, 36.20356750488281, 37.73291015625, 39.26224899291992, 40.79159164428711, 42.3209342956543, 43.850276947021484, 45.379615783691406, 46.908958435058594]}, "gradients/encoder.encoder.layers.23.layer_norm.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 1.0, 1.0, 2.0, 0.0, 6.0, 1.0, 3.0, 2.0, 6.0, 7.0, 6.0, 9.0, 18.0, 22.0, 47.0, 92.0, 152.0, 201.0, 165.0, 89.0, 72.0, 40.0, 17.0, 14.0, 5.0, 1.0, 4.0, 10.0, 4.0, 5.0, 2.0, 4.0, 3.0, 5.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-86.03308868408203, -82.68578338623047, -79.3384780883789, -75.99117279052734, -72.64386749267578, -69.29656219482422, -65.94924926757812, -62.60194778442383, -59.254642486572266, -55.9073371887207, -52.56003189086914, -49.21272277832031, -45.86541748046875, -42.51811218261719, -39.170806884765625, -35.82350158691406, -32.4761962890625, -29.128890991210938, -25.781585693359375, -22.43427848815918, -19.086973190307617, -15.739667892456055, -12.39236068725586, -9.045055389404297, -5.697750091552734, -2.3504443168640137, 0.996861457824707, 4.344167709350586, 7.691473007202148, 11.038778305053711, 14.386085510253906, 17.73339080810547, 21.080703735351562, 24.428009033203125, 27.775314331054688, 31.122621536254883, 34.46992492675781, 37.817230224609375, 41.1645393371582, 44.511844635009766, 47.85914993286133, 51.20645523071289, 54.55376052856445, 57.90106964111328, 61.248374938964844, 64.5956802368164, 67.94298553466797, 71.29029083251953, 74.6375961303711, 77.98490142822266, 81.33220672607422, 84.67951202392578, 88.02681732177734, 91.3741226196289, 94.721435546875, 98.06874084472656, 101.41604614257812, 104.76335144042969, 108.11065673828125, 111.45796203613281, 114.80526733398438, 118.15257263183594, 121.4998779296875, 124.84718322753906, 128.19448852539062]}, "gradients/encoder.encoder.layers.22.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 2.0, 1.0, 2.0, 0.0, 4.0, 2.0, 6.0, 8.0, 12.0, 24.0, 18.0, 42.0, 44.0, 73.0, 123.0, 212.0, 458.0, 1096.0, 3227.0, 13571.0, 220700.0, 3923682.0, 23733.0, 4650.0, 1421.0, 577.0, 252.0, 127.0, 76.0, 53.0, 21.0, 14.0, 10.0, 13.0, 6.0, 6.0, 12.0, 3.0, 4.0, 0.0, 4.0, 0.0, 2.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-17.359375, -16.850830078125, -16.34228515625, -15.833740234375, -15.3251953125, -14.816650390625, -14.30810546875, -13.799560546875, -13.291015625, -12.782470703125, -12.27392578125, -11.765380859375, -11.2568359375, -10.748291015625, -10.23974609375, -9.731201171875, -9.22265625, -8.714111328125, -8.20556640625, -7.697021484375, -7.1884765625, -6.679931640625, -6.17138671875, -5.662841796875, -5.154296875, -4.645751953125, -4.13720703125, -3.628662109375, -3.1201171875, -2.611572265625, -2.10302734375, -1.594482421875, -1.0859375, -0.577392578125, -0.06884765625, 0.439697265625, 0.9482421875, 1.456787109375, 1.96533203125, 2.473876953125, 2.982421875, 3.490966796875, 3.99951171875, 4.508056640625, 5.0166015625, 5.525146484375, 6.03369140625, 6.542236328125, 7.05078125, 7.559326171875, 8.06787109375, 8.576416015625, 9.0849609375, 9.593505859375, 10.10205078125, 10.610595703125, 11.119140625, 11.627685546875, 12.13623046875, 12.644775390625, 13.1533203125, 13.661865234375, 14.17041015625, 14.678955078125, 15.1875]}, "gradients/encoder.encoder.layers.22.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 4.0, 0.0, 6.0, 3.0, 1.0, 7.0, 5.0, 5.0, 8.0, 3.0, 6.0, 8.0, 15.0, 9.0, 20.0, 44.0, 299.0, 409.0, 57.0, 17.0, 12.0, 13.0, 9.0, 5.0, 11.0, 6.0, 8.0, 4.0, 3.0, 7.0, 1.0, 1.0, 4.0, 0.0, 2.0, 2.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0], "bins": [-1.1572265625, -1.1251678466796875, -1.093109130859375, -1.0610504150390625, -1.02899169921875, -0.9969329833984375, -0.964874267578125, -0.9328155517578125, -0.9007568359375, -0.8686981201171875, -0.836639404296875, -0.8045806884765625, -0.77252197265625, -0.7404632568359375, -0.708404541015625, -0.6763458251953125, -0.644287109375, -0.6122283935546875, -0.580169677734375, -0.5481109619140625, -0.51605224609375, -0.4839935302734375, -0.451934814453125, -0.4198760986328125, -0.3878173828125, -0.3557586669921875, -0.323699951171875, -0.2916412353515625, -0.25958251953125, -0.2275238037109375, -0.195465087890625, -0.1634063720703125, -0.13134765625, -0.0992889404296875, -0.067230224609375, -0.0351715087890625, -0.00311279296875, 0.0289459228515625, 0.061004638671875, 0.0930633544921875, 0.1251220703125, 0.1571807861328125, 0.189239501953125, 0.2212982177734375, 0.25335693359375, 0.2854156494140625, 0.317474365234375, 0.3495330810546875, 0.381591796875, 0.4136505126953125, 0.445709228515625, 0.4777679443359375, 0.50982666015625, 0.5418853759765625, 0.573944091796875, 0.6060028076171875, 0.6380615234375, 0.6701202392578125, 0.702178955078125, 0.7342376708984375, 0.76629638671875, 0.7983551025390625, 0.830413818359375, 0.8624725341796875, 0.89453125]}, "gradients/encoder.encoder.layers.22.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 3.0, 3.0, 3.0, 10.0, 6.0, 2.0, 9.0, 11.0, 25.0, 22.0, 41.0, 52.0, 106.0, 138.0, 220.0, 348.0, 595.0, 1100.0, 2318.0, 6097.0, 22553.0, 164164.0, 3797586.0, 163408.0, 23757.0, 6480.0, 2514.0, 1091.0, 604.0, 367.0, 224.0, 117.0, 78.0, 67.0, 52.0, 35.0, 21.0, 21.0, 18.0, 9.0, 10.0, 2.0, 3.0, 2.0, 0.0, 2.0, 0.0, 3.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.484375, -5.32537841796875, -5.1663818359375, -5.00738525390625, -4.848388671875, -4.68939208984375, -4.5303955078125, -4.37139892578125, -4.21240234375, -4.05340576171875, -3.8944091796875, -3.73541259765625, -3.576416015625, -3.41741943359375, -3.2584228515625, -3.09942626953125, -2.9404296875, -2.78143310546875, -2.6224365234375, -2.46343994140625, -2.304443359375, -2.14544677734375, -1.9864501953125, -1.82745361328125, -1.66845703125, -1.50946044921875, -1.3504638671875, -1.19146728515625, -1.032470703125, -0.87347412109375, -0.7144775390625, -0.55548095703125, -0.396484375, -0.23748779296875, -0.0784912109375, 0.08050537109375, 0.239501953125, 0.39849853515625, 0.5574951171875, 0.71649169921875, 0.87548828125, 1.03448486328125, 1.1934814453125, 1.35247802734375, 1.511474609375, 1.67047119140625, 1.8294677734375, 1.98846435546875, 2.1474609375, 2.30645751953125, 2.4654541015625, 2.62445068359375, 2.783447265625, 2.94244384765625, 3.1014404296875, 3.26043701171875, 3.41943359375, 3.57843017578125, 3.7374267578125, 3.89642333984375, 4.055419921875, 4.21441650390625, 4.3734130859375, 4.53240966796875, 4.69140625]}, "gradients/encoder.encoder.layers.22.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 4.0, 1.0, 3.0, 2.0, 8.0, 5.0, 5.0, 14.0, 11.0, 20.0, 15.0, 37.0, 43.0, 71.0, 126.0, 292.0, 847.0, 1640.0, 365.0, 176.0, 113.0, 64.0, 49.0, 33.0, 27.0, 22.0, 28.0, 15.0, 13.0, 7.0, 5.0, 5.0, 2.0, 5.0, 4.0, 3.0, 1.0, 4.0, 2.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.130859375, -1.0957489013671875, -1.060638427734375, -1.0255279541015625, -0.99041748046875, -0.9553070068359375, -0.920196533203125, -0.8850860595703125, -0.8499755859375, -0.8148651123046875, -0.779754638671875, -0.7446441650390625, -0.70953369140625, -0.6744232177734375, -0.639312744140625, -0.6042022705078125, -0.569091796875, -0.5339813232421875, -0.498870849609375, -0.4637603759765625, -0.42864990234375, -0.3935394287109375, -0.358428955078125, -0.3233184814453125, -0.2882080078125, -0.2530975341796875, -0.217987060546875, -0.1828765869140625, -0.14776611328125, -0.1126556396484375, -0.077545166015625, -0.0424346923828125, -0.00732421875, 0.0277862548828125, 0.062896728515625, 0.0980072021484375, 0.13311767578125, 0.1682281494140625, 0.203338623046875, 0.2384490966796875, 0.2735595703125, 0.3086700439453125, 0.343780517578125, 0.3788909912109375, 0.41400146484375, 0.4491119384765625, 0.484222412109375, 0.5193328857421875, 0.554443359375, 0.5895538330078125, 0.624664306640625, 0.6597747802734375, 0.69488525390625, 0.7299957275390625, 0.765106201171875, 0.8002166748046875, 0.8353271484375, 0.8704376220703125, 0.905548095703125, 0.9406585693359375, 0.97576904296875, 1.0108795166015625, 1.045989990234375, 1.0811004638671875, 1.1162109375]}, "gradients/encoder.encoder.layers.22.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 2.0, 0.0, 2.0, 12.0, 357.0, 594.0, 35.0, 3.0, 2.0, 3.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-41.282752990722656, -40.3668212890625, -39.450889587402344, -38.53495788574219, -37.61902618408203, -36.703094482421875, -35.78716278076172, -34.8712272644043, -33.95529556274414, -33.039363861083984, -32.12343215942383, -31.207500457763672, -30.291566848754883, -29.375635147094727, -28.45970344543457, -27.543771743774414, -26.627840042114258, -25.7119083404541, -24.795976638793945, -23.880043029785156, -22.964111328125, -22.048179626464844, -21.132247924804688, -20.21631622314453, -19.300384521484375, -18.38445281982422, -17.468521118164062, -16.552589416503906, -15.636655807495117, -14.720724105834961, -13.804792404174805, -12.888860702514648, -11.97292709350586, -11.056995391845703, -10.14106273651123, -9.225131034851074, -8.309198379516602, -7.393266677856445, -6.477334976196289, -5.561402797698975, -4.64547061920166, -3.7295384407043457, -2.8136065006256104, -1.897674560546875, -0.9817423820495605, -0.0658102035522461, 0.8501214981079102, 1.7660536766052246, 2.681985855102539, 3.5979180335998535, 4.513850212097168, 5.429781913757324, 6.345714092254639, 7.261646270751953, 8.17757797241211, 9.093509674072266, 10.009442329406738, 10.925374031066895, 11.841306686401367, 12.757238388061523, 13.67317008972168, 14.589102745056152, 15.505034446716309, 16.42096710205078, 17.336898803710938]}, "gradients/encoder.encoder.layers.22.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 2.0, 0.0, 1.0, 1.0, 2.0, 5.0, 3.0, 5.0, 5.0, 12.0, 6.0, 15.0, 22.0, 23.0, 47.0, 42.0, 56.0, 73.0, 86.0, 95.0, 79.0, 90.0, 77.0, 76.0, 47.0, 30.0, 34.0, 17.0, 11.0, 12.0, 11.0, 6.0, 5.0, 4.0, 4.0, 4.0, 2.0, 0.0, 0.0, 1.0, 3.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0], "bins": [-7.129861831665039, -6.933475971221924, -6.737090587615967, -6.540704727172852, -6.3443193435668945, -6.147933483123779, -5.951547622680664, -5.755162239074707, -5.558776378631592, -5.362390518188477, -5.1660051345825195, -4.969619274139404, -4.773233413696289, -4.576848030090332, -4.380462169647217, -4.184076309204102, -3.9876909255981445, -3.7913053035736084, -3.5949196815490723, -3.398533821105957, -3.202148199081421, -3.0057625770568848, -2.8093767166137695, -2.6129910945892334, -2.4166054725646973, -2.220219850540161, -2.023834228515625, -1.8274483680725098, -1.6310627460479736, -1.4346771240234375, -1.2382913827896118, -1.0419056415557861, -0.84552001953125, -0.6491343379020691, -0.4527486562728882, -0.2563629746437073, -0.05997729301452637, 0.13640838861465454, 0.33279407024383545, 0.5291798114776611, 0.7255654335021973, 0.9219511151313782, 1.118336796760559, 1.3147225379943848, 1.511108160018921, 1.707493782043457, 1.9038795232772827, 2.1002652645111084, 2.2966508865356445, 2.4930365085601807, 2.689422130584717, 2.885807991027832, 3.082193613052368, 3.2785792350769043, 3.4749650955200195, 3.6713507175445557, 3.867736339569092, 4.064122200012207, 4.260507583618164, 4.456893444061279, 4.6532793045043945, 4.849664688110352, 5.046050548553467, 5.242436408996582, 5.438821792602539]}, "gradients/encoder.encoder.layers.22.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 2.0, 2.0, 5.0, 5.0, 0.0, 6.0, 6.0, 5.0, 14.0, 12.0, 33.0, 44.0, 63.0, 96.0, 164.0, 227.0, 400.0, 715.0, 1438.0, 3060.0, 6935.0, 19345.0, 146845.0, 823575.0, 28486.0, 9283.0, 3827.0, 1799.0, 912.0, 494.0, 245.0, 179.0, 104.0, 84.0, 38.0, 34.0, 31.0, 19.0, 10.0, 9.0, 4.0, 7.0, 3.0, 3.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-8.4921875, -8.2105712890625, -7.928955078125, -7.6473388671875, -7.36572265625, -7.0841064453125, -6.802490234375, -6.5208740234375, -6.2392578125, -5.9576416015625, -5.676025390625, -5.3944091796875, -5.11279296875, -4.8311767578125, -4.549560546875, -4.2679443359375, -3.986328125, -3.7047119140625, -3.423095703125, -3.1414794921875, -2.85986328125, -2.5782470703125, -2.296630859375, -2.0150146484375, -1.7333984375, -1.4517822265625, -1.170166015625, -0.8885498046875, -0.60693359375, -0.3253173828125, -0.043701171875, 0.2379150390625, 0.51953125, 0.8011474609375, 1.082763671875, 1.3643798828125, 1.64599609375, 1.9276123046875, 2.209228515625, 2.4908447265625, 2.7724609375, 3.0540771484375, 3.335693359375, 3.6173095703125, 3.89892578125, 4.1805419921875, 4.462158203125, 4.7437744140625, 5.025390625, 5.3070068359375, 5.588623046875, 5.8702392578125, 6.15185546875, 6.4334716796875, 6.715087890625, 6.9967041015625, 7.2783203125, 7.5599365234375, 7.841552734375, 8.1231689453125, 8.40478515625, 8.6864013671875, 8.968017578125, 9.2496337890625, 9.53125]}, "gradients/encoder.encoder.layers.22.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 4.0, 2.0, 3.0, 5.0, 4.0, 4.0, 6.0, 3.0, 5.0, 11.0, 14.0, 12.0, 15.0, 46.0, 211.0, 394.0, 146.0, 39.0, 15.0, 12.0, 13.0, 11.0, 9.0, 5.0, 4.0, 3.0, 3.0, 6.0, 0.0, 4.0, 1.0, 1.0, 3.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 3.0], "bins": [-1.306640625, -1.2734909057617188, -1.2403411865234375, -1.2071914672851562, -1.174041748046875, -1.1408920288085938, -1.1077423095703125, -1.0745925903320312, -1.04144287109375, -1.0082931518554688, -0.9751434326171875, -0.9419937133789062, -0.908843994140625, -0.8756942749023438, -0.8425445556640625, -0.8093948364257812, -0.7762451171875, -0.7430953979492188, -0.7099456787109375, -0.6767959594726562, -0.643646240234375, -0.6104965209960938, -0.5773468017578125, -0.5441970825195312, -0.51104736328125, -0.47789764404296875, -0.4447479248046875, -0.41159820556640625, -0.378448486328125, -0.34529876708984375, -0.3121490478515625, -0.27899932861328125, -0.245849609375, -0.21269989013671875, -0.1795501708984375, -0.14640045166015625, -0.113250732421875, -0.08010101318359375, -0.0469512939453125, -0.01380157470703125, 0.01934814453125, 0.05249786376953125, 0.0856475830078125, 0.11879730224609375, 0.151947021484375, 0.18509674072265625, 0.2182464599609375, 0.25139617919921875, 0.2845458984375, 0.31769561767578125, 0.3508453369140625, 0.38399505615234375, 0.417144775390625, 0.45029449462890625, 0.4834442138671875, 0.5165939331054688, 0.54974365234375, 0.5828933715820312, 0.6160430908203125, 0.6491928100585938, 0.682342529296875, 0.7154922485351562, 0.7486419677734375, 0.7817916870117188, 0.81494140625]}, "gradients/encoder.encoder.layers.22.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 2.0, 1.0, 3.0, 0.0, 6.0, 2.0, 2.0, 6.0, 9.0, 16.0, 9.0, 10.0, 23.0, 31.0, 47.0, 48.0, 84.0, 114.0, 139.0, 247.0, 471.0, 924.0, 2076.0, 5933.0, 21718.0, 109853.0, 673711.0, 185699.0, 32791.0, 8998.0, 2931.0, 1162.0, 525.0, 305.0, 185.0, 141.0, 78.0, 71.0, 32.0, 37.0, 25.0, 31.0, 14.0, 13.0, 10.0, 8.0, 7.0, 3.0, 5.0, 5.0, 2.0, 3.0, 0.0, 1.0, 2.0, 1.0, 1.0], "bins": [-4.6796875, -4.54052734375, -4.4013671875, -4.26220703125, -4.123046875, -3.98388671875, -3.8447265625, -3.70556640625, -3.56640625, -3.42724609375, -3.2880859375, -3.14892578125, -3.009765625, -2.87060546875, -2.7314453125, -2.59228515625, -2.453125, -2.31396484375, -2.1748046875, -2.03564453125, -1.896484375, -1.75732421875, -1.6181640625, -1.47900390625, -1.33984375, -1.20068359375, -1.0615234375, -0.92236328125, -0.783203125, -0.64404296875, -0.5048828125, -0.36572265625, -0.2265625, -0.08740234375, 0.0517578125, 0.19091796875, 0.330078125, 0.46923828125, 0.6083984375, 0.74755859375, 0.88671875, 1.02587890625, 1.1650390625, 1.30419921875, 1.443359375, 1.58251953125, 1.7216796875, 1.86083984375, 2.0, 2.13916015625, 2.2783203125, 2.41748046875, 2.556640625, 2.69580078125, 2.8349609375, 2.97412109375, 3.11328125, 3.25244140625, 3.3916015625, 3.53076171875, 3.669921875, 3.80908203125, 3.9482421875, 4.08740234375, 4.2265625]}, "gradients/encoder.encoder.layers.22.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 1.0, 1.0, 1.0, 1.0, 3.0, 5.0, 3.0, 2.0, 4.0, 9.0, 8.0, 6.0, 7.0, 17.0, 21.0, 16.0, 20.0, 33.0, 30.0, 39.0, 31.0, 37.0, 35.0, 42.0, 40.0, 40.0, 41.0, 59.0, 46.0, 29.0, 44.0, 46.0, 33.0, 30.0, 35.0, 35.0, 21.0, 20.0, 14.0, 16.0, 16.0, 16.0, 11.0, 7.0, 11.0, 11.0, 5.0, 5.0, 3.0, 5.0, 1.0, 2.0, 0.0, 3.0, 1.0, 0.0, 1.0], "bins": [-2.759765625, -2.678253173828125, -2.59674072265625, -2.515228271484375, -2.4337158203125, -2.352203369140625, -2.27069091796875, -2.189178466796875, -2.107666015625, -2.026153564453125, -1.94464111328125, -1.863128662109375, -1.7816162109375, -1.700103759765625, -1.61859130859375, -1.537078857421875, -1.45556640625, -1.374053955078125, -1.29254150390625, -1.211029052734375, -1.1295166015625, -1.048004150390625, -0.96649169921875, -0.884979248046875, -0.803466796875, -0.721954345703125, -0.64044189453125, -0.558929443359375, -0.4774169921875, -0.395904541015625, -0.31439208984375, -0.232879638671875, -0.1513671875, -0.069854736328125, 0.01165771484375, 0.093170166015625, 0.1746826171875, 0.256195068359375, 0.33770751953125, 0.419219970703125, 0.500732421875, 0.582244873046875, 0.66375732421875, 0.745269775390625, 0.8267822265625, 0.908294677734375, 0.98980712890625, 1.071319580078125, 1.15283203125, 1.234344482421875, 1.31585693359375, 1.397369384765625, 1.4788818359375, 1.560394287109375, 1.64190673828125, 1.723419189453125, 1.804931640625, 1.886444091796875, 1.96795654296875, 2.049468994140625, 2.1309814453125, 2.212493896484375, 2.29400634765625, 2.375518798828125, 2.45703125]}, "gradients/encoder.encoder.layers.22.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 4.0, 1.0, 3.0, 9.0, 10.0, 12.0, 13.0, 31.0, 78.0, 153.0, 398.0, 1670.0, 58078.0, 984153.0, 2934.0, 605.0, 207.0, 95.0, 49.0, 26.0, 10.0, 8.0, 5.0, 3.0, 3.0, 2.0, 3.0, 1.0, 1.0, 2.0, 0.0, 1.0], "bins": [-18.359375, -17.9527587890625, -17.546142578125, -17.1395263671875, -16.73291015625, -16.3262939453125, -15.919677734375, -15.5130615234375, -15.1064453125, -14.6998291015625, -14.293212890625, -13.8865966796875, -13.47998046875, -13.0733642578125, -12.666748046875, -12.2601318359375, -11.853515625, -11.4468994140625, -11.040283203125, -10.6336669921875, -10.22705078125, -9.8204345703125, -9.413818359375, -9.0072021484375, -8.6005859375, -8.1939697265625, -7.787353515625, -7.3807373046875, -6.97412109375, -6.5675048828125, -6.160888671875, -5.7542724609375, -5.34765625, -4.9410400390625, -4.534423828125, -4.1278076171875, -3.72119140625, -3.3145751953125, -2.907958984375, -2.5013427734375, -2.0947265625, -1.6881103515625, -1.281494140625, -0.8748779296875, -0.46826171875, -0.0616455078125, 0.344970703125, 0.7515869140625, 1.158203125, 1.5648193359375, 1.971435546875, 2.3780517578125, 2.78466796875, 3.1912841796875, 3.597900390625, 4.0045166015625, 4.4111328125, 4.8177490234375, 5.224365234375, 5.6309814453125, 6.03759765625, 6.4442138671875, 6.850830078125, 7.2574462890625, 7.6640625]}, "gradients/encoder.encoder.layers.22.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 1.0, 1.0, 5.0, 6.0, 17.0, 28.0, 54.0, 114.0, 326.0, 263.0, 104.0, 42.0, 18.0, 12.0, 6.0, 3.0, 3.0, 2.0, 3.0, 1.0, 3.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00030040740966796875, -0.00028727203607559204, -0.00027413666248321533, -0.0002610012888908386, -0.0002478659152984619, -0.0002347305417060852, -0.0002215951681137085, -0.0002084597945213318, -0.00019532442092895508, -0.00018218904733657837, -0.00016905367374420166, -0.00015591830015182495, -0.00014278292655944824, -0.00012964755296707153, -0.00011651217937469482, -0.00010337680578231812, -9.02414321899414e-05, -7.71060585975647e-05, -6.397068500518799e-05, -5.083531141281128e-05, -3.769993782043457e-05, -2.456456422805786e-05, -1.1429190635681152e-05, 1.7061829566955566e-06, 1.4841556549072266e-05, 2.7976930141448975e-05, 4.1112303733825684e-05, 5.424767732620239e-05, 6.73830509185791e-05, 8.051842451095581e-05, 9.365379810333252e-05, 0.00010678917169570923, 0.00011992454528808594, 0.00013305991888046265, 0.00014619529247283936, 0.00015933066606521606, 0.00017246603965759277, 0.00018560141324996948, 0.0001987367868423462, 0.0002118721604347229, 0.0002250075340270996, 0.00023814290761947632, 0.00025127828121185303, 0.00026441365480422974, 0.00027754902839660645, 0.00029068440198898315, 0.00030381977558135986, 0.00031695514917373657, 0.0003300905227661133, 0.00034322589635849, 0.0003563612699508667, 0.0003694966435432434, 0.0003826320171356201, 0.0003957673907279968, 0.00040890276432037354, 0.00042203813791275024, 0.00043517351150512695, 0.00044830888509750366, 0.00046144425868988037, 0.0004745796322822571, 0.0004877150058746338, 0.0005008503794670105, 0.0005139857530593872, 0.0005271211266517639, 0.0005402565002441406]}, "gradients/encoder.encoder.layers.22.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 4.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 6.0, 5.0, 7.0, 8.0, 11.0, 22.0, 42.0, 79.0, 211.0, 520.0, 1615.0, 12039.0, 922308.0, 105681.0, 4464.0, 908.0, 290.0, 132.0, 92.0, 59.0, 24.0, 13.0, 9.0, 8.0, 3.0, 4.0, 0.0, 2.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.671875, -4.466796875, -4.26171875, -4.056640625, -3.8515625, -3.646484375, -3.44140625, -3.236328125, -3.03125, -2.826171875, -2.62109375, -2.416015625, -2.2109375, -2.005859375, -1.80078125, -1.595703125, -1.390625, -1.185546875, -0.98046875, -0.775390625, -0.5703125, -0.365234375, -0.16015625, 0.044921875, 0.25, 0.455078125, 0.66015625, 0.865234375, 1.0703125, 1.275390625, 1.48046875, 1.685546875, 1.890625, 2.095703125, 2.30078125, 2.505859375, 2.7109375, 2.916015625, 3.12109375, 3.326171875, 3.53125, 3.736328125, 3.94140625, 4.146484375, 4.3515625, 4.556640625, 4.76171875, 4.966796875, 5.171875, 5.376953125, 5.58203125, 5.787109375, 5.9921875, 6.197265625, 6.40234375, 6.607421875, 6.8125, 7.017578125, 7.22265625, 7.427734375, 7.6328125, 7.837890625, 8.04296875, 8.248046875, 8.453125]}, "gradients/encoder.encoder.layers.22.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 2.0, 0.0, 4.0, 7.0, 6.0, 12.0, 15.0, 30.0, 52.0, 85.0, 129.0, 214.0, 165.0, 119.0, 69.0, 31.0, 27.0, 15.0, 12.0, 5.0, 7.0, 1.0, 3.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.603515625, -2.488983154296875, -2.37445068359375, -2.259918212890625, -2.1453857421875, -2.030853271484375, -1.91632080078125, -1.801788330078125, -1.687255859375, -1.572723388671875, -1.45819091796875, -1.343658447265625, -1.2291259765625, -1.114593505859375, -1.00006103515625, -0.885528564453125, -0.77099609375, -0.656463623046875, -0.54193115234375, -0.427398681640625, -0.3128662109375, -0.198333740234375, -0.08380126953125, 0.030731201171875, 0.145263671875, 0.259796142578125, 0.37432861328125, 0.488861083984375, 0.6033935546875, 0.717926025390625, 0.83245849609375, 0.946990966796875, 1.0615234375, 1.176055908203125, 1.29058837890625, 1.405120849609375, 1.5196533203125, 1.634185791015625, 1.74871826171875, 1.863250732421875, 1.977783203125, 2.092315673828125, 2.20684814453125, 2.321380615234375, 2.4359130859375, 2.550445556640625, 2.66497802734375, 2.779510498046875, 2.89404296875, 3.008575439453125, 3.12310791015625, 3.237640380859375, 3.3521728515625, 3.466705322265625, 3.58123779296875, 3.695770263671875, 3.810302734375, 3.924835205078125, 4.03936767578125, 4.153900146484375, 4.2684326171875, 4.382965087890625, 4.49749755859375, 4.612030029296875, 4.7265625]}, "gradients/encoder.encoder.layers.22.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0, 0.0, 3.0, 5.0, 11.0, 17.0, 61.0, 173.0, 412.0, 213.0, 47.0, 30.0, 15.0, 10.0, 7.0, 1.0, 1.0, 1.0, 3.0, 0.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-38.90705871582031, -37.78193283081055, -36.65680694580078, -35.531681060791016, -34.40655517578125, -33.28142547607422, -32.15629959106445, -31.031173706054688, -29.906047821044922, -28.780921936035156, -27.65579605102539, -26.530668258666992, -25.405542373657227, -24.28041648864746, -23.155288696289062, -22.030162811279297, -20.90503692626953, -19.779911041259766, -18.65478515625, -17.5296573638916, -16.404531478881836, -15.27940559387207, -14.154278755187988, -13.029151916503906, -11.90402603149414, -10.778900146484375, -9.653773307800293, -8.528646469116211, -7.403520584106445, -6.2783942222595215, -5.153267860412598, -4.028141498565674, -2.9030113220214844, -1.7778849601745605, -0.6527585983276367, 0.4723677635192871, 1.597494125366211, 2.7226204872131348, 3.8477468490600586, 4.972873210906982, 6.097999572753906, 7.22312593460083, 8.348252296447754, 9.473379135131836, 10.598505020141602, 11.723630905151367, 12.84875774383545, 13.973884582519531, 15.099010467529297, 16.224136352539062, 17.349262237548828, 18.474390029907227, 19.599515914916992, 20.724641799926758, 21.849769592285156, 22.974895477294922, 24.100021362304688, 25.225147247314453, 26.35027313232422, 27.475400924682617, 28.600526809692383, 29.72565269470215, 30.850780487060547, 31.975906372070312, 33.10103225708008]}, "gradients/encoder.encoder.layers.22.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 2.0, 3.0, 3.0, 0.0, 2.0, 1.0, 3.0, 1.0, 6.0, 2.0, 2.0, 1.0, 6.0, 1.0, 4.0, 5.0, 3.0, 21.0, 28.0, 28.0, 35.0, 56.0, 69.0, 68.0, 93.0, 104.0, 97.0, 94.0, 52.0, 58.0, 45.0, 33.0, 20.0, 17.0, 14.0, 7.0, 8.0, 3.0, 2.0, 2.0, 0.0, 3.0, 5.0, 2.0, 2.0, 3.0, 0.0, 2.0, 1.0, 0.0, 3.0], "bins": [-39.178199768066406, -38.16204833984375, -37.145896911621094, -36.12974548339844, -35.11359405517578, -34.09743881225586, -33.0812873840332, -32.06513595581055, -31.04898452758789, -30.032833099365234, -29.016681671142578, -28.00052833557129, -26.984376907348633, -25.968225479125977, -24.952072143554688, -23.93592071533203, -22.919769287109375, -21.90361785888672, -20.887466430664062, -19.871313095092773, -18.855161666870117, -17.83901023864746, -16.822856903076172, -15.806705474853516, -14.79055404663086, -13.774402618408203, -12.75825023651123, -11.742097854614258, -10.725946426391602, -9.709794998168945, -8.693642616271973, -7.677490711212158, -6.661338806152344, -5.645186901092529, -4.629034996032715, -3.6128830909729004, -2.596731185913086, -1.5805792808532715, -0.564427375793457, 0.4517245292663574, 1.4678764343261719, 2.4840283393859863, 3.500180244445801, 4.516332149505615, 5.53248405456543, 6.548635959625244, 7.564787864685059, 8.580940246582031, 9.597091674804688, 10.613243103027344, 11.629395484924316, 12.645547866821289, 13.661699295043945, 14.677850723266602, 15.694003105163574, 16.710155487060547, 17.726306915283203, 18.74245834350586, 19.758609771728516, 20.774763107299805, 21.79091453552246, 22.807065963745117, 23.823219299316406, 24.839370727539062, 25.85552215576172]}, "gradients/encoder.encoder.layers.21.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 5.0, 4.0, 5.0, 4.0, 3.0, 7.0, 5.0, 8.0, 22.0, 25.0, 26.0, 25.0, 43.0, 46.0, 90.0, 121.0, 207.0, 356.0, 701.0, 1394.0, 3434.0, 21099.0, 4143697.0, 16756.0, 3477.0, 1379.0, 625.0, 329.0, 149.0, 111.0, 47.0, 41.0, 22.0, 9.0, 7.0, 7.0, 4.0, 5.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-37.125, -36.159423828125, -35.19384765625, -34.228271484375, -33.2626953125, -32.297119140625, -31.33154296875, -30.365966796875, -29.400390625, -28.434814453125, -27.46923828125, -26.503662109375, -25.5380859375, -24.572509765625, -23.60693359375, -22.641357421875, -21.67578125, -20.710205078125, -19.74462890625, -18.779052734375, -17.8134765625, -16.847900390625, -15.88232421875, -14.916748046875, -13.951171875, -12.985595703125, -12.02001953125, -11.054443359375, -10.0888671875, -9.123291015625, -8.15771484375, -7.192138671875, -6.2265625, -5.260986328125, -4.29541015625, -3.329833984375, -2.3642578125, -1.398681640625, -0.43310546875, 0.532470703125, 1.498046875, 2.463623046875, 3.42919921875, 4.394775390625, 5.3603515625, 6.325927734375, 7.29150390625, 8.257080078125, 9.22265625, 10.188232421875, 11.15380859375, 12.119384765625, 13.0849609375, 14.050537109375, 15.01611328125, 15.981689453125, 16.947265625, 17.912841796875, 18.87841796875, 19.843994140625, 20.8095703125, 21.775146484375, 22.74072265625, 23.706298828125, 24.671875]}, "gradients/encoder.encoder.layers.21.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 3.0, 0.0, 4.0, 5.0, 4.0, 9.0, 7.0, 7.0, 10.0, 10.0, 22.0, 70.0, 213.0, 279.0, 194.0, 72.0, 25.0, 16.0, 13.0, 13.0, 10.0, 4.0, 9.0, 4.0, 2.0, 1.0, 4.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.291015625, -1.2574920654296875, -1.223968505859375, -1.1904449462890625, -1.15692138671875, -1.1233978271484375, -1.089874267578125, -1.0563507080078125, -1.0228271484375, -0.9893035888671875, -0.955780029296875, -0.9222564697265625, -0.88873291015625, -0.8552093505859375, -0.821685791015625, -0.7881622314453125, -0.754638671875, -0.7211151123046875, -0.687591552734375, -0.6540679931640625, -0.62054443359375, -0.5870208740234375, -0.553497314453125, -0.5199737548828125, -0.4864501953125, -0.4529266357421875, -0.419403076171875, -0.3858795166015625, -0.35235595703125, -0.3188323974609375, -0.285308837890625, -0.2517852783203125, -0.21826171875, -0.1847381591796875, -0.151214599609375, -0.1176910400390625, -0.08416748046875, -0.0506439208984375, -0.017120361328125, 0.0164031982421875, 0.0499267578125, 0.0834503173828125, 0.116973876953125, 0.1504974365234375, 0.18402099609375, 0.2175445556640625, 0.251068115234375, 0.2845916748046875, 0.318115234375, 0.3516387939453125, 0.385162353515625, 0.4186859130859375, 0.45220947265625, 0.4857330322265625, 0.519256591796875, 0.5527801513671875, 0.5863037109375, 0.6198272705078125, 0.653350830078125, 0.6868743896484375, 0.72039794921875, 0.7539215087890625, 0.787445068359375, 0.8209686279296875, 0.8544921875]}, "gradients/encoder.encoder.layers.21.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 3.0, 0.0, 1.0, 0.0, 2.0, 1.0, 2.0, 2.0, 3.0, 8.0, 5.0, 1.0, 5.0, 7.0, 11.0, 33.0, 16.0, 42.0, 92.0, 322.0, 2304.0, 68467.0, 4114475.0, 7421.0, 712.0, 178.0, 55.0, 39.0, 24.0, 13.0, 11.0, 14.0, 1.0, 6.0, 4.0, 5.0, 1.0, 1.0, 0.0, 2.0, 0.0, 4.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-20.671875, -19.964599609375, -19.25732421875, -18.550048828125, -17.8427734375, -17.135498046875, -16.42822265625, -15.720947265625, -15.013671875, -14.306396484375, -13.59912109375, -12.891845703125, -12.1845703125, -11.477294921875, -10.77001953125, -10.062744140625, -9.35546875, -8.648193359375, -7.94091796875, -7.233642578125, -6.5263671875, -5.819091796875, -5.11181640625, -4.404541015625, -3.697265625, -2.989990234375, -2.28271484375, -1.575439453125, -0.8681640625, -0.160888671875, 0.54638671875, 1.253662109375, 1.9609375, 2.668212890625, 3.37548828125, 4.082763671875, 4.7900390625, 5.497314453125, 6.20458984375, 6.911865234375, 7.619140625, 8.326416015625, 9.03369140625, 9.740966796875, 10.4482421875, 11.155517578125, 11.86279296875, 12.570068359375, 13.27734375, 13.984619140625, 14.69189453125, 15.399169921875, 16.1064453125, 16.813720703125, 17.52099609375, 18.228271484375, 18.935546875, 19.642822265625, 20.35009765625, 21.057373046875, 21.7646484375, 22.471923828125, 23.17919921875, 23.886474609375, 24.59375]}, "gradients/encoder.encoder.layers.21.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 2.0, 0.0, 2.0, 0.0, 1.0, 3.0, 9.0, 7.0, 3.0, 9.0, 13.0, 13.0, 14.0, 20.0, 33.0, 44.0, 107.0, 244.0, 2068.0, 1025.0, 203.0, 86.0, 40.0, 33.0, 20.0, 22.0, 17.0, 7.0, 4.0, 4.0, 2.0, 7.0, 2.0, 4.0, 3.0, 2.0, 4.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 5.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-1.3125, -1.2737579345703125, -1.235015869140625, -1.1962738037109375, -1.15753173828125, -1.1187896728515625, -1.080047607421875, -1.0413055419921875, -1.0025634765625, -0.9638214111328125, -0.925079345703125, -0.8863372802734375, -0.84759521484375, -0.8088531494140625, -0.770111083984375, -0.7313690185546875, -0.692626953125, -0.6538848876953125, -0.615142822265625, -0.5764007568359375, -0.53765869140625, -0.4989166259765625, -0.460174560546875, -0.4214324951171875, -0.3826904296875, -0.3439483642578125, -0.305206298828125, -0.2664642333984375, -0.22772216796875, -0.1889801025390625, -0.150238037109375, -0.1114959716796875, -0.07275390625, -0.0340118408203125, 0.004730224609375, 0.0434722900390625, 0.08221435546875, 0.1209564208984375, 0.159698486328125, 0.1984405517578125, 0.2371826171875, 0.2759246826171875, 0.314666748046875, 0.3534088134765625, 0.39215087890625, 0.4308929443359375, 0.469635009765625, 0.5083770751953125, 0.547119140625, 0.5858612060546875, 0.624603271484375, 0.6633453369140625, 0.70208740234375, 0.7408294677734375, 0.779571533203125, 0.8183135986328125, 0.8570556640625, 0.8957977294921875, 0.934539794921875, 0.9732818603515625, 1.01202392578125, 1.0507659912109375, 1.089508056640625, 1.1282501220703125, 1.1669921875]}, "gradients/encoder.encoder.layers.21.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 1.0, 5.0, 9.0, 51.0, 538.0, 361.0, 37.0, 9.0, 2.0, 1.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.613154172897339, -3.001356601715088, -2.389558792114258, -1.7777612209320068, -1.1659636497497559, -0.5541660785675049, 0.057631731033325195, 0.6694290637969971, 1.2812268733978271, 1.8930244445800781, 2.504822254180908, 3.116619825363159, 3.72841739654541, 4.340214729309082, 4.95201301574707, 5.563810348510742, 6.175607681274414, 6.787405490875244, 7.399202823638916, 8.011000633239746, 8.622797966003418, 9.234596252441406, 9.846393585205078, 10.45819091796875, 11.069989204406738, 11.68178653717041, 12.293584823608398, 12.90538215637207, 13.517179489135742, 14.128976821899414, 14.740775108337402, 15.352572441101074, 15.96436882019043, 16.5761661529541, 17.187963485717773, 17.799762725830078, 18.41156005859375, 19.023357391357422, 19.635154724121094, 20.246952056884766, 20.858749389648438, 21.47054672241211, 22.08234405517578, 22.694141387939453, 23.305940628051758, 23.91773796081543, 24.5295352935791, 25.141332626342773, 25.753131866455078, 26.36492919921875, 26.976726531982422, 27.588523864746094, 28.2003231048584, 28.81212043762207, 29.423917770385742, 30.035715103149414, 30.647512435913086, 31.259309768676758, 31.87110710144043, 32.482906341552734, 33.094703674316406, 33.70650100708008, 34.31829833984375, 34.93009567260742, 35.541893005371094]}, "gradients/encoder.encoder.layers.21.final_layer_norm.bias": {"_type": "histogram", "values": [3.0, 0.0, 0.0, 2.0, 0.0, 2.0, 0.0, 0.0, 2.0, 1.0, 1.0, 1.0, 3.0, 4.0, 4.0, 3.0, 5.0, 6.0, 6.0, 6.0, 10.0, 12.0, 13.0, 24.0, 25.0, 32.0, 34.0, 41.0, 68.0, 58.0, 59.0, 64.0, 69.0, 72.0, 74.0, 46.0, 44.0, 46.0, 31.0, 29.0, 19.0, 28.0, 16.0, 12.0, 12.0, 4.0, 8.0, 2.0, 4.0, 2.0, 3.0, 3.0, 2.0, 1.0, 3.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-4.197154998779297, -4.068259239196777, -3.9393632411956787, -3.81046724319458, -3.6815712451934814, -3.552675247192383, -3.4237794876098633, -3.2948834896087646, -3.165987491607666, -3.0370914936065674, -2.908195734024048, -2.779299736022949, -2.6504037380218506, -2.521507740020752, -2.3926119804382324, -2.263715982437134, -2.1348202228546143, -2.0059242248535156, -1.8770283460617065, -1.7481324672698975, -1.6192364692687988, -1.4903405904769897, -1.3614447116851807, -1.232548713684082, -1.103652834892273, -0.9747568964958191, -0.8458609580993652, -0.7169650793075562, -0.5880691409111023, -0.45917320251464844, -0.33027732372283936, -0.2013813853263855, -0.07248544692993164, 0.05641047656536102, 0.1853064000606537, 0.31420230865478516, 0.443098247051239, 0.5719941854476929, 0.700890064239502, 0.8297860026359558, 0.9586819410324097, 1.0875778198242188, 1.2164738178253174, 1.3453696966171265, 1.4742655754089355, 1.6031615734100342, 1.7320574522018433, 1.8609533309936523, 1.989849328994751, 2.1187453269958496, 2.247641086578369, 2.3765370845794678, 2.5054330825805664, 2.634328842163086, 2.7632248401641846, 2.892120838165283, 3.0210165977478027, 3.1499125957489014, 3.278808355331421, 3.4077043533325195, 3.536600351333618, 3.665496349334717, 3.7943921089172363, 3.923288106918335, 4.052184104919434]}, "gradients/encoder.encoder.layers.21.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 2.0, 1.0, 1.0, 1.0, 8.0, 3.0, 11.0, 13.0, 15.0, 33.0, 35.0, 68.0, 89.0, 163.0, 286.0, 541.0, 1185.0, 3178.0, 12237.0, 286225.0, 721703.0, 16159.0, 3765.0, 1398.0, 601.0, 339.0, 185.0, 113.0, 67.0, 42.0, 33.0, 14.0, 14.0, 7.0, 6.0, 6.0, 4.0, 5.0, 4.0, 2.0, 0.0, 4.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-11.5078125, -11.159912109375, -10.81201171875, -10.464111328125, -10.1162109375, -9.768310546875, -9.42041015625, -9.072509765625, -8.724609375, -8.376708984375, -8.02880859375, -7.680908203125, -7.3330078125, -6.985107421875, -6.63720703125, -6.289306640625, -5.94140625, -5.593505859375, -5.24560546875, -4.897705078125, -4.5498046875, -4.201904296875, -3.85400390625, -3.506103515625, -3.158203125, -2.810302734375, -2.46240234375, -2.114501953125, -1.7666015625, -1.418701171875, -1.07080078125, -0.722900390625, -0.375, -0.027099609375, 0.32080078125, 0.668701171875, 1.0166015625, 1.364501953125, 1.71240234375, 2.060302734375, 2.408203125, 2.756103515625, 3.10400390625, 3.451904296875, 3.7998046875, 4.147705078125, 4.49560546875, 4.843505859375, 5.19140625, 5.539306640625, 5.88720703125, 6.235107421875, 6.5830078125, 6.930908203125, 7.27880859375, 7.626708984375, 7.974609375, 8.322509765625, 8.67041015625, 9.018310546875, 9.3662109375, 9.714111328125, 10.06201171875, 10.409912109375, 10.7578125]}, "gradients/encoder.encoder.layers.21.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 3.0, 1.0, 2.0, 1.0, 4.0, 1.0, 10.0, 9.0, 5.0, 5.0, 30.0, 83.0, 171.0, 294.0, 207.0, 95.0, 30.0, 16.0, 14.0, 8.0, 6.0, 6.0, 3.0, 3.0, 3.0, 1.0, 1.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.3037109375, -1.2699127197265625, -1.236114501953125, -1.2023162841796875, -1.16851806640625, -1.1347198486328125, -1.100921630859375, -1.0671234130859375, -1.0333251953125, -0.9995269775390625, -0.965728759765625, -0.9319305419921875, -0.89813232421875, -0.8643341064453125, -0.830535888671875, -0.7967376708984375, -0.762939453125, -0.7291412353515625, -0.695343017578125, -0.6615447998046875, -0.62774658203125, -0.5939483642578125, -0.560150146484375, -0.5263519287109375, -0.4925537109375, -0.4587554931640625, -0.424957275390625, -0.3911590576171875, -0.35736083984375, -0.3235626220703125, -0.289764404296875, -0.2559661865234375, -0.22216796875, -0.1883697509765625, -0.154571533203125, -0.1207733154296875, -0.08697509765625, -0.0531768798828125, -0.019378662109375, 0.0144195556640625, 0.0482177734375, 0.0820159912109375, 0.115814208984375, 0.1496124267578125, 0.18341064453125, 0.2172088623046875, 0.251007080078125, 0.2848052978515625, 0.318603515625, 0.3524017333984375, 0.386199951171875, 0.4199981689453125, 0.45379638671875, 0.4875946044921875, 0.521392822265625, 0.5551910400390625, 0.5889892578125, 0.6227874755859375, 0.656585693359375, 0.6903839111328125, 0.72418212890625, 0.7579803466796875, 0.791778564453125, 0.8255767822265625, 0.859375]}, "gradients/encoder.encoder.layers.21.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 3.0, 4.0, 7.0, 7.0, 10.0, 11.0, 15.0, 30.0, 22.0, 36.0, 53.0, 82.0, 119.0, 154.0, 222.0, 364.0, 527.0, 966.0, 2078.0, 5582.0, 20175.0, 104869.0, 637844.0, 226023.0, 34504.0, 8536.0, 3053.0, 1307.0, 711.0, 410.0, 254.0, 161.0, 112.0, 86.0, 57.0, 32.0, 31.0, 35.0, 16.0, 13.0, 11.0, 12.0, 5.0, 7.0, 3.0, 3.0, 1.0, 1.0, 4.0, 2.0, 1.0], "bins": [-4.94921875, -4.810516357421875, -4.67181396484375, -4.533111572265625, -4.3944091796875, -4.255706787109375, -4.11700439453125, -3.978302001953125, -3.839599609375, -3.700897216796875, -3.56219482421875, -3.423492431640625, -3.2847900390625, -3.146087646484375, -3.00738525390625, -2.868682861328125, -2.72998046875, -2.591278076171875, -2.45257568359375, -2.313873291015625, -2.1751708984375, -2.036468505859375, -1.89776611328125, -1.759063720703125, -1.620361328125, -1.481658935546875, -1.34295654296875, -1.204254150390625, -1.0655517578125, -0.926849365234375, -0.78814697265625, -0.649444580078125, -0.5107421875, -0.372039794921875, -0.23333740234375, -0.094635009765625, 0.0440673828125, 0.182769775390625, 0.32147216796875, 0.460174560546875, 0.598876953125, 0.737579345703125, 0.87628173828125, 1.014984130859375, 1.1536865234375, 1.292388916015625, 1.43109130859375, 1.569793701171875, 1.70849609375, 1.847198486328125, 1.98590087890625, 2.124603271484375, 2.2633056640625, 2.402008056640625, 2.54071044921875, 2.679412841796875, 2.818115234375, 2.956817626953125, 3.09552001953125, 3.234222412109375, 3.3729248046875, 3.511627197265625, 3.65032958984375, 3.789031982421875, 3.927734375]}, "gradients/encoder.encoder.layers.21.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 2.0, 2.0, 2.0, 5.0, 3.0, 11.0, 7.0, 6.0, 14.0, 13.0, 25.0, 19.0, 20.0, 27.0, 27.0, 26.0, 35.0, 28.0, 46.0, 46.0, 49.0, 51.0, 51.0, 56.0, 51.0, 42.0, 60.0, 50.0, 31.0, 35.0, 47.0, 22.0, 20.0, 15.0, 7.0, 18.0, 6.0, 9.0, 7.0, 3.0, 4.0, 2.0, 7.0, 3.0, 1.0, 4.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-2.884765625, -2.7984619140625, -2.712158203125, -2.6258544921875, -2.53955078125, -2.4532470703125, -2.366943359375, -2.2806396484375, -2.1943359375, -2.1080322265625, -2.021728515625, -1.9354248046875, -1.84912109375, -1.7628173828125, -1.676513671875, -1.5902099609375, -1.50390625, -1.4176025390625, -1.331298828125, -1.2449951171875, -1.15869140625, -1.0723876953125, -0.986083984375, -0.8997802734375, -0.8134765625, -0.7271728515625, -0.640869140625, -0.5545654296875, -0.46826171875, -0.3819580078125, -0.295654296875, -0.2093505859375, -0.123046875, -0.0367431640625, 0.049560546875, 0.1358642578125, 0.22216796875, 0.3084716796875, 0.394775390625, 0.4810791015625, 0.5673828125, 0.6536865234375, 0.739990234375, 0.8262939453125, 0.91259765625, 0.9989013671875, 1.085205078125, 1.1715087890625, 1.2578125, 1.3441162109375, 1.430419921875, 1.5167236328125, 1.60302734375, 1.6893310546875, 1.775634765625, 1.8619384765625, 1.9482421875, 2.0345458984375, 2.120849609375, 2.2071533203125, 2.29345703125, 2.3797607421875, 2.466064453125, 2.5523681640625, 2.638671875]}, "gradients/encoder.encoder.layers.21.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 2.0, 3.0, 0.0, 0.0, 5.0, 5.0, 8.0, 10.0, 13.0, 22.0, 26.0, 48.0, 87.0, 124.0, 273.0, 653.0, 2242.0, 17740.0, 798127.0, 219262.0, 7368.0, 1461.0, 521.0, 248.0, 109.0, 69.0, 35.0, 33.0, 20.0, 20.0, 8.0, 4.0, 6.0, 6.0, 4.0, 2.0, 1.0, 1.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-4.22265625, -4.10479736328125, -3.9869384765625, -3.86907958984375, -3.751220703125, -3.63336181640625, -3.5155029296875, -3.39764404296875, -3.27978515625, -3.16192626953125, -3.0440673828125, -2.92620849609375, -2.808349609375, -2.69049072265625, -2.5726318359375, -2.45477294921875, -2.3369140625, -2.21905517578125, -2.1011962890625, -1.98333740234375, -1.865478515625, -1.74761962890625, -1.6297607421875, -1.51190185546875, -1.39404296875, -1.27618408203125, -1.1583251953125, -1.04046630859375, -0.922607421875, -0.80474853515625, -0.6868896484375, -0.56903076171875, -0.451171875, -0.33331298828125, -0.2154541015625, -0.09759521484375, 0.020263671875, 0.13812255859375, 0.2559814453125, 0.37384033203125, 0.49169921875, 0.60955810546875, 0.7274169921875, 0.84527587890625, 0.963134765625, 1.08099365234375, 1.1988525390625, 1.31671142578125, 1.4345703125, 1.55242919921875, 1.6702880859375, 1.78814697265625, 1.906005859375, 2.02386474609375, 2.1417236328125, 2.25958251953125, 2.37744140625, 2.49530029296875, 2.6131591796875, 2.73101806640625, 2.848876953125, 2.96673583984375, 3.0845947265625, 3.20245361328125, 3.3203125]}, "gradients/encoder.encoder.layers.21.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 2.0, 1.0, 1.0, 4.0, 4.0, 5.0, 2.0, 9.0, 10.0, 10.0, 19.0, 28.0, 36.0, 40.0, 69.0, 106.0, 147.0, 117.0, 128.0, 73.0, 62.0, 47.0, 24.0, 15.0, 18.0, 8.0, 6.0, 9.0, 5.0, 2.0, 2.0, 6.0, 0.0, 3.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0002161264419555664, -0.00020964257419109344, -0.00020315870642662048, -0.00019667483866214752, -0.00019019097089767456, -0.0001837071031332016, -0.00017722323536872864, -0.00017073936760425568, -0.00016425549983978271, -0.00015777163207530975, -0.0001512877643108368, -0.00014480389654636383, -0.00013832002878189087, -0.0001318361610174179, -0.00012535229325294495, -0.00011886842548847198, -0.00011238455772399902, -0.00010590068995952606, -9.94168221950531e-05, -9.293295443058014e-05, -8.644908666610718e-05, -7.996521890163422e-05, -7.348135113716125e-05, -6.69974833726883e-05, -6.051361560821533e-05, -5.402974784374237e-05, -4.754588007926941e-05, -4.106201231479645e-05, -3.4578144550323486e-05, -2.8094276785850525e-05, -2.1610409021377563e-05, -1.5126541256904602e-05, -8.64267349243164e-06, -2.158805727958679e-06, 4.325062036514282e-06, 1.0808929800987244e-05, 1.7292797565460205e-05, 2.3776665329933167e-05, 3.0260533094406128e-05, 3.674440085887909e-05, 4.322826862335205e-05, 4.971213638782501e-05, 5.6196004152297974e-05, 6.267987191677094e-05, 6.91637396812439e-05, 7.564760744571686e-05, 8.213147521018982e-05, 8.861534297466278e-05, 9.509921073913574e-05, 0.0001015830785036087, 0.00010806694626808167, 0.00011455081403255463, 0.00012103468179702759, 0.00012751854956150055, 0.0001340024173259735, 0.00014048628509044647, 0.00014697015285491943, 0.0001534540206193924, 0.00015993788838386536, 0.00016642175614833832, 0.00017290562391281128, 0.00017938949167728424, 0.0001858733594417572, 0.00019235722720623016, 0.00019884109497070312]}, "gradients/encoder.encoder.layers.21.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 5.0, 0.0, 1.0, 1.0, 6.0, 5.0, 5.0, 7.0, 9.0, 11.0, 9.0, 25.0, 21.0, 36.0, 75.0, 123.0, 219.0, 406.0, 828.0, 2083.0, 7853.0, 64374.0, 821535.0, 134288.0, 11833.0, 2749.0, 976.0, 458.0, 257.0, 142.0, 71.0, 62.0, 24.0, 27.0, 12.0, 7.0, 5.0, 5.0, 4.0, 4.0, 2.0, 2.0, 1.0, 1.0, 3.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-3.0625, -2.97406005859375, -2.8856201171875, -2.79718017578125, -2.708740234375, -2.62030029296875, -2.5318603515625, -2.44342041015625, -2.35498046875, -2.26654052734375, -2.1781005859375, -2.08966064453125, -2.001220703125, -1.91278076171875, -1.8243408203125, -1.73590087890625, -1.6474609375, -1.55902099609375, -1.4705810546875, -1.38214111328125, -1.293701171875, -1.20526123046875, -1.1168212890625, -1.02838134765625, -0.93994140625, -0.85150146484375, -0.7630615234375, -0.67462158203125, -0.586181640625, -0.49774169921875, -0.4093017578125, -0.32086181640625, -0.232421875, -0.14398193359375, -0.0555419921875, 0.03289794921875, 0.121337890625, 0.20977783203125, 0.2982177734375, 0.38665771484375, 0.47509765625, 0.56353759765625, 0.6519775390625, 0.74041748046875, 0.828857421875, 0.91729736328125, 1.0057373046875, 1.09417724609375, 1.1826171875, 1.27105712890625, 1.3594970703125, 1.44793701171875, 1.536376953125, 1.62481689453125, 1.7132568359375, 1.80169677734375, 1.89013671875, 1.97857666015625, 2.0670166015625, 2.15545654296875, 2.243896484375, 2.33233642578125, 2.4207763671875, 2.50921630859375, 2.59765625]}, "gradients/encoder.encoder.layers.21.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 3.0, 3.0, 5.0, 8.0, 14.0, 16.0, 20.0, 30.0, 46.0, 41.0, 76.0, 92.0, 126.0, 131.0, 128.0, 68.0, 67.0, 43.0, 25.0, 20.0, 16.0, 8.0, 7.0, 3.0, 9.0, 1.0, 3.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-1.7177734375, -1.6690521240234375, -1.620330810546875, -1.5716094970703125, -1.52288818359375, -1.4741668701171875, -1.425445556640625, -1.3767242431640625, -1.3280029296875, -1.2792816162109375, -1.230560302734375, -1.1818389892578125, -1.13311767578125, -1.0843963623046875, -1.035675048828125, -0.9869537353515625, -0.938232421875, -0.8895111083984375, -0.840789794921875, -0.7920684814453125, -0.74334716796875, -0.6946258544921875, -0.645904541015625, -0.5971832275390625, -0.5484619140625, -0.4997406005859375, -0.451019287109375, -0.4022979736328125, -0.35357666015625, -0.3048553466796875, -0.256134033203125, -0.2074127197265625, -0.15869140625, -0.1099700927734375, -0.061248779296875, -0.0125274658203125, 0.03619384765625, 0.0849151611328125, 0.133636474609375, 0.1823577880859375, 0.2310791015625, 0.2798004150390625, 0.328521728515625, 0.3772430419921875, 0.42596435546875, 0.4746856689453125, 0.523406982421875, 0.5721282958984375, 0.620849609375, 0.6695709228515625, 0.718292236328125, 0.7670135498046875, 0.81573486328125, 0.8644561767578125, 0.913177490234375, 0.9618988037109375, 1.0106201171875, 1.0593414306640625, 1.108062744140625, 1.1567840576171875, 1.20550537109375, 1.2542266845703125, 1.302947998046875, 1.3516693115234375, 1.400390625]}, "gradients/encoder.encoder.layers.21.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 6.0, 17.0, 80.0, 454.0, 372.0, 61.0, 18.0, 3.0, 1.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-80.54603576660156, -78.10302734375, -75.66001892089844, -73.2170181274414, -70.77400970458984, -68.33100128173828, -65.88800048828125, -63.44499206542969, -61.001983642578125, -58.55897521972656, -56.115970611572266, -53.67296600341797, -51.229957580566406, -48.786949157714844, -46.34394454956055, -43.90093994140625, -41.45793151855469, -39.014923095703125, -36.57191848754883, -34.12891387939453, -31.68590545654297, -29.24289894104004, -26.79989242553711, -24.35688591003418, -21.91387939453125, -19.47087287902832, -17.02786636352539, -14.584859848022461, -12.141853332519531, -9.698846817016602, -7.255840301513672, -4.812833786010742, -2.3698272705078125, 0.07317924499511719, 2.516185760498047, 4.959192276000977, 7.402198791503906, 9.845205307006836, 12.288211822509766, 14.731218338012695, 17.174224853515625, 19.617231369018555, 22.060237884521484, 24.503244400024414, 26.946250915527344, 29.389257431030273, 31.832263946533203, 34.2752685546875, 36.71827697753906, 39.161285400390625, 41.60429000854492, 44.04729461669922, 46.49030303955078, 48.933311462402344, 51.37631607055664, 53.81932067871094, 56.2623291015625, 58.70533752441406, 61.14834213256836, 63.591346740722656, 66.03435516357422, 68.47736358642578, 70.92036437988281, 73.36337280273438, 75.80638122558594]}, "gradients/encoder.encoder.layers.21.layer_norm.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 0.0, 2.0, 0.0, 3.0, 3.0, 4.0, 12.0, 9.0, 12.0, 12.0, 16.0, 32.0, 40.0, 30.0, 63.0, 54.0, 55.0, 61.0, 71.0, 55.0, 76.0, 62.0, 47.0, 63.0, 52.0, 39.0, 36.0, 20.0, 26.0, 14.0, 17.0, 4.0, 7.0, 0.0, 4.0, 1.0, 3.0, 1.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-29.777952194213867, -28.92869758605957, -28.07944107055664, -27.230186462402344, -26.380931854248047, -25.53167724609375, -24.68242073059082, -23.833166122436523, -22.983909606933594, -22.134654998779297, -21.285398483276367, -20.43614387512207, -19.586889266967773, -18.737632751464844, -17.888378143310547, -17.03912353515625, -16.189868927001953, -15.34061336517334, -14.491358757019043, -13.64210319519043, -12.792848587036133, -11.94359302520752, -11.094337463378906, -10.24508285522461, -9.395827293395996, -8.546571731567383, -7.697317123413086, -6.848061561584473, -5.998806476593018, -5.1495513916015625, -4.300295829772949, -3.451040744781494, -2.601787567138672, -1.7525323629379272, -0.9032771587371826, -0.05402183532714844, 0.7952332496643066, 1.6444883346557617, 2.493743896484375, 3.34299898147583, 4.192254066467285, 5.04150915145874, 5.890764236450195, 6.740019798278809, 7.589274883270264, 8.438529968261719, 9.287785530090332, 10.137041091918945, 10.986295700073242, 11.835551261901855, 12.684805870056152, 13.534061431884766, 14.383316040039062, 15.232571601867676, 16.08182716369629, 16.931081771850586, 17.780338287353516, 18.629592895507812, 19.478849411010742, 20.32810401916504, 21.177358627319336, 22.026615142822266, 22.875869750976562, 23.72512435913086, 24.574378967285156]}, "gradients/encoder.encoder.layers.20.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 3.0, 1.0, 1.0, 2.0, 3.0, 6.0, 3.0, 10.0, 19.0, 35.0, 46.0, 115.0, 296.0, 854.0, 8163.0, 4178177.0, 5850.0, 491.0, 110.0, 39.0, 29.0, 6.0, 9.0, 9.0, 9.0, 4.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0], "bins": [-72.625, -71.02734375, -69.4296875, -67.83203125, -66.234375, -64.63671875, -63.0390625, -61.44140625, -59.84375, -58.24609375, -56.6484375, -55.05078125, -53.453125, -51.85546875, -50.2578125, -48.66015625, -47.0625, -45.46484375, -43.8671875, -42.26953125, -40.671875, -39.07421875, -37.4765625, -35.87890625, -34.28125, -32.68359375, -31.0859375, -29.48828125, -27.890625, -26.29296875, -24.6953125, -23.09765625, -21.5, -19.90234375, -18.3046875, -16.70703125, -15.109375, -13.51171875, -11.9140625, -10.31640625, -8.71875, -7.12109375, -5.5234375, -3.92578125, -2.328125, -0.73046875, 0.8671875, 2.46484375, 4.0625, 5.66015625, 7.2578125, 8.85546875, 10.453125, 12.05078125, 13.6484375, 15.24609375, 16.84375, 18.44140625, 20.0390625, 21.63671875, 23.234375, 24.83203125, 26.4296875, 28.02734375, 29.625]}, "gradients/encoder.encoder.layers.20.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 2.0, 1.0, 0.0, 1.0, 2.0, 3.0, 2.0, 1.0, 5.0, 10.0, 12.0, 30.0, 54.0, 110.0, 157.0, 204.0, 162.0, 101.0, 57.0, 33.0, 23.0, 8.0, 7.0, 6.0, 12.0, 6.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0], "bins": [-1.708984375, -1.6714248657226562, -1.6338653564453125, -1.5963058471679688, -1.558746337890625, -1.5211868286132812, -1.4836273193359375, -1.4460678100585938, -1.40850830078125, -1.3709487915039062, -1.3333892822265625, -1.2958297729492188, -1.258270263671875, -1.2207107543945312, -1.1831512451171875, -1.1455917358398438, -1.1080322265625, -1.0704727172851562, -1.0329132080078125, -0.9953536987304688, -0.957794189453125, -0.9202346801757812, -0.8826751708984375, -0.8451156616210938, -0.80755615234375, -0.7699966430664062, -0.7324371337890625, -0.6948776245117188, -0.657318115234375, -0.6197586059570312, -0.5821990966796875, -0.5446395874023438, -0.507080078125, -0.46952056884765625, -0.4319610595703125, -0.39440155029296875, -0.356842041015625, -0.31928253173828125, -0.2817230224609375, -0.24416351318359375, -0.20660400390625, -0.16904449462890625, -0.1314849853515625, -0.09392547607421875, -0.056365966796875, -0.01880645751953125, 0.0187530517578125, 0.05631256103515625, 0.0938720703125, 0.13143157958984375, 0.1689910888671875, 0.20655059814453125, 0.244110107421875, 0.28166961669921875, 0.3192291259765625, 0.35678863525390625, 0.39434814453125, 0.43190765380859375, 0.4694671630859375, 0.5070266723632812, 0.544586181640625, 0.5821456909179688, 0.6197052001953125, 0.6572647094726562, 0.69482421875]}, "gradients/encoder.encoder.layers.20.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 2.0, 0.0, 5.0, 10.0, 26.0, 56.0, 168.0, 450.0, 2030.0, 78451.0, 4104745.0, 7101.0, 782.0, 279.0, 108.0, 45.0, 26.0, 4.0, 2.0, 3.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0], "bins": [-15.1015625, -14.4517822265625, -13.802001953125, -13.1522216796875, -12.50244140625, -11.8526611328125, -11.202880859375, -10.5531005859375, -9.9033203125, -9.2535400390625, -8.603759765625, -7.9539794921875, -7.30419921875, -6.6544189453125, -6.004638671875, -5.3548583984375, -4.705078125, -4.0552978515625, -3.405517578125, -2.7557373046875, -2.10595703125, -1.4561767578125, -0.806396484375, -0.1566162109375, 0.4931640625, 1.1429443359375, 1.792724609375, 2.4425048828125, 3.09228515625, 3.7420654296875, 4.391845703125, 5.0416259765625, 5.69140625, 6.3411865234375, 6.990966796875, 7.6407470703125, 8.29052734375, 8.9403076171875, 9.590087890625, 10.2398681640625, 10.8896484375, 11.5394287109375, 12.189208984375, 12.8389892578125, 13.48876953125, 14.1385498046875, 14.788330078125, 15.4381103515625, 16.087890625, 16.7376708984375, 17.387451171875, 18.0372314453125, 18.68701171875, 19.3367919921875, 19.986572265625, 20.6363525390625, 21.2861328125, 21.9359130859375, 22.585693359375, 23.2354736328125, 23.88525390625, 24.5350341796875, 25.184814453125, 25.8345947265625, 26.484375]}, "gradients/encoder.encoder.layers.20.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 4.0, 3.0, 4.0, 7.0, 8.0, 14.0, 16.0, 33.0, 50.0, 87.0, 312.0, 2527.0, 746.0, 151.0, 46.0, 29.0, 17.0, 10.0, 9.0, 6.0, 3.0, 2.0, 1.0, 4.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-2.716796875, -2.642791748046875, -2.56878662109375, -2.494781494140625, -2.4207763671875, -2.346771240234375, -2.27276611328125, -2.198760986328125, -2.124755859375, -2.050750732421875, -1.97674560546875, -1.902740478515625, -1.8287353515625, -1.754730224609375, -1.68072509765625, -1.606719970703125, -1.53271484375, -1.458709716796875, -1.38470458984375, -1.310699462890625, -1.2366943359375, -1.162689208984375, -1.08868408203125, -1.014678955078125, -0.940673828125, -0.866668701171875, -0.79266357421875, -0.718658447265625, -0.6446533203125, -0.570648193359375, -0.49664306640625, -0.422637939453125, -0.3486328125, -0.274627685546875, -0.20062255859375, -0.126617431640625, -0.0526123046875, 0.021392822265625, 0.09539794921875, 0.169403076171875, 0.243408203125, 0.317413330078125, 0.39141845703125, 0.465423583984375, 0.5394287109375, 0.613433837890625, 0.68743896484375, 0.761444091796875, 0.83544921875, 0.909454345703125, 0.98345947265625, 1.057464599609375, 1.1314697265625, 1.205474853515625, 1.27947998046875, 1.353485107421875, 1.427490234375, 1.501495361328125, 1.57550048828125, 1.649505615234375, 1.7235107421875, 1.797515869140625, 1.87152099609375, 1.945526123046875, 2.01953125]}, "gradients/encoder.encoder.layers.20.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0, 6.0, 12.0, 55.0, 475.0, 405.0, 43.0, 7.0, 4.0, 3.0, 0.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-19.124195098876953, -18.251140594482422, -17.37808609008789, -16.50503158569336, -15.631977081298828, -14.758922576904297, -13.88586711883545, -13.012812614440918, -12.139758110046387, -11.266703605651855, -10.393649101257324, -9.520593643188477, -8.647539138793945, -7.774485111236572, -6.901430130004883, -6.028375625610352, -5.15532112121582, -4.282266616821289, -3.4092118740081787, -2.5361571311950684, -1.663102626800537, -0.7900481224060059, 0.0830068588256836, 0.9560613632202148, 1.829115867614746, 2.7021703720092773, 3.5752251148223877, 4.448279857635498, 5.321334362030029, 6.1943888664245605, 7.06744384765625, 7.940498352050781, 8.813552856445312, 9.686607360839844, 10.559661865234375, 11.432716369628906, 12.305770874023438, 13.178825378417969, 14.051880836486816, 14.924935340881348, 15.797989845275879, 16.671045303344727, 17.544099807739258, 18.41715431213379, 19.29020881652832, 20.16326332092285, 21.036317825317383, 21.909372329711914, 22.782426834106445, 23.655481338500977, 24.528535842895508, 25.40159034729004, 26.27464485168457, 27.1476993560791, 28.020755767822266, 28.893810272216797, 29.766864776611328, 30.63991928100586, 31.51297378540039, 32.38602828979492, 33.25908279418945, 34.132137298583984, 35.005191802978516, 35.87824630737305, 36.75130081176758]}, "gradients/encoder.encoder.layers.20.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 4.0, 5.0, 8.0, 11.0, 17.0, 31.0, 40.0, 68.0, 74.0, 92.0, 111.0, 100.0, 104.0, 99.0, 62.0, 58.0, 46.0, 31.0, 14.0, 19.0, 2.0, 4.0, 4.0, 7.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-10.380634307861328, -10.069737434387207, -9.758840560913086, -9.447943687438965, -9.137046813964844, -8.826149940490723, -8.515253067016602, -8.204355239868164, -7.893458843231201, -7.58256196975708, -7.271665096282959, -6.960768222808838, -6.649870872497559, -6.3389739990234375, -6.028077125549316, -5.717180252075195, -5.406283378601074, -5.095386505126953, -4.784489631652832, -4.473592758178711, -4.16269588470459, -3.8517987728118896, -3.5409016609191895, -3.2300047874450684, -2.9191079139709473, -2.608211040496826, -2.297314167022705, -1.9864170551300049, -1.6755201816558838, -1.3646233081817627, -1.053726315498352, -0.7428293228149414, -0.4319314956665039, -0.12103456258773804, 0.18986237049102783, 0.5007593035697937, 0.8116562366485596, 1.1225531101226807, 1.4334501028060913, 1.744347095489502, 2.055243968963623, 2.366140842437744, 2.6770377159118652, 2.9879348278045654, 3.2988317012786865, 3.6097285747528076, 3.920625686645508, 4.231522560119629, 4.54241943359375, 4.853316307067871, 5.164213180541992, 5.475110054016113, 5.786006927490234, 6.0969038009643555, 6.407801151275635, 6.718698024749756, 7.029594898223877, 7.340491771697998, 7.651388645172119, 7.96228551864624, 8.27318286895752, 8.58407974243164, 8.894976615905762, 9.205873489379883, 9.516770362854004]}, "gradients/encoder.encoder.layers.20.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 2.0, 7.0, 5.0, 10.0, 21.0, 21.0, 40.0, 63.0, 114.0, 236.0, 414.0, 997.0, 2940.0, 13360.0, 486745.0, 524927.0, 13832.0, 2835.0, 993.0, 450.0, 195.0, 127.0, 90.0, 37.0, 31.0, 23.0, 8.0, 12.0, 8.0, 3.0, 3.0, 4.0, 3.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-9.1953125, -8.916748046875, -8.63818359375, -8.359619140625, -8.0810546875, -7.802490234375, -7.52392578125, -7.245361328125, -6.966796875, -6.688232421875, -6.40966796875, -6.131103515625, -5.8525390625, -5.573974609375, -5.29541015625, -5.016845703125, -4.73828125, -4.459716796875, -4.18115234375, -3.902587890625, -3.6240234375, -3.345458984375, -3.06689453125, -2.788330078125, -2.509765625, -2.231201171875, -1.95263671875, -1.674072265625, -1.3955078125, -1.116943359375, -0.83837890625, -0.559814453125, -0.28125, -0.002685546875, 0.27587890625, 0.554443359375, 0.8330078125, 1.111572265625, 1.39013671875, 1.668701171875, 1.947265625, 2.225830078125, 2.50439453125, 2.782958984375, 3.0615234375, 3.340087890625, 3.61865234375, 3.897216796875, 4.17578125, 4.454345703125, 4.73291015625, 5.011474609375, 5.2900390625, 5.568603515625, 5.84716796875, 6.125732421875, 6.404296875, 6.682861328125, 6.96142578125, 7.239990234375, 7.5185546875, 7.797119140625, 8.07568359375, 8.354248046875, 8.6328125]}, "gradients/encoder.encoder.layers.20.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 1.0, 1.0, 0.0, 1.0, 2.0, 3.0, 5.0, 2.0, 5.0, 6.0, 12.0, 33.0, 57.0, 107.0, 126.0, 167.0, 162.0, 125.0, 75.0, 50.0, 22.0, 16.0, 11.0, 5.0, 4.0, 7.0, 2.0, 1.0, 2.0, 1.0, 1.0, 0.0, 2.0, 1.0], "bins": [-1.58203125, -1.5474395751953125, -1.512847900390625, -1.4782562255859375, -1.44366455078125, -1.4090728759765625, -1.374481201171875, -1.3398895263671875, -1.3052978515625, -1.2707061767578125, -1.236114501953125, -1.2015228271484375, -1.16693115234375, -1.1323394775390625, -1.097747802734375, -1.0631561279296875, -1.028564453125, -0.9939727783203125, -0.959381103515625, -0.9247894287109375, -0.89019775390625, -0.8556060791015625, -0.821014404296875, -0.7864227294921875, -0.7518310546875, -0.7172393798828125, -0.682647705078125, -0.6480560302734375, -0.61346435546875, -0.5788726806640625, -0.544281005859375, -0.5096893310546875, -0.47509765625, -0.4405059814453125, -0.405914306640625, -0.3713226318359375, -0.33673095703125, -0.3021392822265625, -0.267547607421875, -0.2329559326171875, -0.1983642578125, -0.1637725830078125, -0.129180908203125, -0.0945892333984375, -0.05999755859375, -0.0254058837890625, 0.009185791015625, 0.0437774658203125, 0.078369140625, 0.1129608154296875, 0.147552490234375, 0.1821441650390625, 0.21673583984375, 0.2513275146484375, 0.285919189453125, 0.3205108642578125, 0.3551025390625, 0.3896942138671875, 0.424285888671875, 0.4588775634765625, 0.49346923828125, 0.5280609130859375, 0.562652587890625, 0.5972442626953125, 0.6318359375]}, "gradients/encoder.encoder.layers.20.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 4.0, 3.0, 0.0, 3.0, 6.0, 7.0, 6.0, 5.0, 12.0, 28.0, 36.0, 39.0, 65.0, 87.0, 141.0, 208.0, 314.0, 514.0, 1008.0, 2063.0, 5866.0, 24984.0, 197931.0, 674946.0, 115416.0, 16607.0, 4433.0, 1671.0, 844.0, 457.0, 276.0, 197.0, 106.0, 94.0, 60.0, 31.0, 30.0, 21.0, 14.0, 13.0, 4.0, 4.0, 3.0, 4.0, 2.0, 1.0, 2.0, 3.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-4.37890625, -4.2393798828125, -4.099853515625, -3.9603271484375, -3.82080078125, -3.6812744140625, -3.541748046875, -3.4022216796875, -3.2626953125, -3.1231689453125, -2.983642578125, -2.8441162109375, -2.70458984375, -2.5650634765625, -2.425537109375, -2.2860107421875, -2.146484375, -2.0069580078125, -1.867431640625, -1.7279052734375, -1.58837890625, -1.4488525390625, -1.309326171875, -1.1697998046875, -1.0302734375, -0.8907470703125, -0.751220703125, -0.6116943359375, -0.47216796875, -0.3326416015625, -0.193115234375, -0.0535888671875, 0.0859375, 0.2254638671875, 0.364990234375, 0.5045166015625, 0.64404296875, 0.7835693359375, 0.923095703125, 1.0626220703125, 1.2021484375, 1.3416748046875, 1.481201171875, 1.6207275390625, 1.76025390625, 1.8997802734375, 2.039306640625, 2.1788330078125, 2.318359375, 2.4578857421875, 2.597412109375, 2.7369384765625, 2.87646484375, 3.0159912109375, 3.155517578125, 3.2950439453125, 3.4345703125, 3.5740966796875, 3.713623046875, 3.8531494140625, 3.99267578125, 4.1322021484375, 4.271728515625, 4.4112548828125, 4.55078125]}, "gradients/encoder.encoder.layers.20.attention.v_proj.bias": {"_type": "histogram", "values": [3.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 2.0, 4.0, 5.0, 4.0, 8.0, 5.0, 4.0, 17.0, 14.0, 11.0, 20.0, 33.0, 33.0, 42.0, 47.0, 41.0, 54.0, 46.0, 46.0, 52.0, 58.0, 61.0, 56.0, 59.0, 57.0, 49.0, 34.0, 30.0, 30.0, 24.0, 13.0, 9.0, 13.0, 6.0, 6.0, 4.0, 6.0, 3.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.0234375, -2.91876220703125, -2.8140869140625, -2.70941162109375, -2.604736328125, -2.50006103515625, -2.3953857421875, -2.29071044921875, -2.18603515625, -2.08135986328125, -1.9766845703125, -1.87200927734375, -1.767333984375, -1.66265869140625, -1.5579833984375, -1.45330810546875, -1.3486328125, -1.24395751953125, -1.1392822265625, -1.03460693359375, -0.929931640625, -0.82525634765625, -0.7205810546875, -0.61590576171875, -0.51123046875, -0.40655517578125, -0.3018798828125, -0.19720458984375, -0.092529296875, 0.01214599609375, 0.1168212890625, 0.22149658203125, 0.326171875, 0.43084716796875, 0.5355224609375, 0.64019775390625, 0.744873046875, 0.84954833984375, 0.9542236328125, 1.05889892578125, 1.16357421875, 1.26824951171875, 1.3729248046875, 1.47760009765625, 1.582275390625, 1.68695068359375, 1.7916259765625, 1.89630126953125, 2.0009765625, 2.10565185546875, 2.2103271484375, 2.31500244140625, 2.419677734375, 2.52435302734375, 2.6290283203125, 2.73370361328125, 2.83837890625, 2.94305419921875, 3.0477294921875, 3.15240478515625, 3.257080078125, 3.36175537109375, 3.4664306640625, 3.57110595703125, 3.67578125]}, "gradients/encoder.encoder.layers.20.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 3.0, 3.0, 1.0, 6.0, 17.0, 33.0, 54.0, 167.0, 519.0, 9876.0, 1034075.0, 3188.0, 369.0, 131.0, 66.0, 14.0, 15.0, 12.0, 6.0, 5.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-18.671875, -18.2093505859375, -17.746826171875, -17.2843017578125, -16.82177734375, -16.3592529296875, -15.896728515625, -15.4342041015625, -14.9716796875, -14.5091552734375, -14.046630859375, -13.5841064453125, -13.12158203125, -12.6590576171875, -12.196533203125, -11.7340087890625, -11.271484375, -10.8089599609375, -10.346435546875, -9.8839111328125, -9.42138671875, -8.9588623046875, -8.496337890625, -8.0338134765625, -7.5712890625, -7.1087646484375, -6.646240234375, -6.1837158203125, -5.72119140625, -5.2586669921875, -4.796142578125, -4.3336181640625, -3.87109375, -3.4085693359375, -2.946044921875, -2.4835205078125, -2.02099609375, -1.5584716796875, -1.095947265625, -0.6334228515625, -0.1708984375, 0.2916259765625, 0.754150390625, 1.2166748046875, 1.67919921875, 2.1417236328125, 2.604248046875, 3.0667724609375, 3.529296875, 3.9918212890625, 4.454345703125, 4.9168701171875, 5.37939453125, 5.8419189453125, 6.304443359375, 6.7669677734375, 7.2294921875, 7.6920166015625, 8.154541015625, 8.6170654296875, 9.07958984375, 9.5421142578125, 10.004638671875, 10.4671630859375, 10.9296875]}, "gradients/encoder.encoder.layers.20.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 5.0, 0.0, 4.0, 2.0, 8.0, 3.0, 16.0, 19.0, 34.0, 52.0, 89.0, 116.0, 181.0, 155.0, 121.0, 73.0, 49.0, 31.0, 17.0, 9.0, 9.0, 5.0, 3.0, 3.0, 3.0, 4.0, 3.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0003228187561035156, -0.00031328946352005005, -0.00030376017093658447, -0.0002942308783531189, -0.0002847015857696533, -0.00027517229318618774, -0.00026564300060272217, -0.0002561137080192566, -0.000246584415435791, -0.00023705512285232544, -0.00022752583026885986, -0.0002179965376853943, -0.0002084672451019287, -0.00019893795251846313, -0.00018940865993499756, -0.00017987936735153198, -0.0001703500747680664, -0.00016082078218460083, -0.00015129148960113525, -0.00014176219701766968, -0.0001322329044342041, -0.00012270361185073853, -0.00011317431926727295, -0.00010364502668380737, -9.41157341003418e-05, -8.458644151687622e-05, -7.505714893341064e-05, -6.552785634994507e-05, -5.599856376647949e-05, -4.6469271183013916e-05, -3.693997859954834e-05, -2.7410686016082764e-05, -1.7881393432617188e-05, -8.352100849151611e-06, 1.1771917343139648e-06, 1.0706484317779541e-05, 2.0235776901245117e-05, 2.9765069484710693e-05, 3.929436206817627e-05, 4.8823654651641846e-05, 5.835294723510742e-05, 6.7882239818573e-05, 7.741153240203857e-05, 8.694082498550415e-05, 9.647011756896973e-05, 0.0001059994101524353, 0.00011552870273590088, 0.00012505799531936646, 0.00013458728790283203, 0.0001441165804862976, 0.00015364587306976318, 0.00016317516565322876, 0.00017270445823669434, 0.0001822337508201599, 0.0001917630434036255, 0.00020129233598709106, 0.00021082162857055664, 0.00022035092115402222, 0.0002298802137374878, 0.00023940950632095337, 0.00024893879890441895, 0.0002584680914878845, 0.0002679973840713501, 0.0002775266766548157, 0.00028705596923828125]}, "gradients/encoder.encoder.layers.20.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 3.0, 1.0, 0.0, 5.0, 3.0, 4.0, 6.0, 4.0, 7.0, 8.0, 16.0, 11.0, 16.0, 28.0, 41.0, 57.0, 102.0, 223.0, 563.0, 1673.0, 6384.0, 76081.0, 931761.0, 25508.0, 3983.0, 1170.0, 433.0, 181.0, 90.0, 62.0, 36.0, 38.0, 10.0, 10.0, 12.0, 10.0, 7.0, 2.0, 3.0, 3.0, 4.0, 3.0, 3.0, 0.0, 2.0, 2.0, 1.0, 1.0, 0.0, 1.0], "bins": [-5.4375, -5.28790283203125, -5.1383056640625, -4.98870849609375, -4.839111328125, -4.68951416015625, -4.5399169921875, -4.39031982421875, -4.24072265625, -4.09112548828125, -3.9415283203125, -3.79193115234375, -3.642333984375, -3.49273681640625, -3.3431396484375, -3.19354248046875, -3.0439453125, -2.89434814453125, -2.7447509765625, -2.59515380859375, -2.445556640625, -2.29595947265625, -2.1463623046875, -1.99676513671875, -1.84716796875, -1.69757080078125, -1.5479736328125, -1.39837646484375, -1.248779296875, -1.09918212890625, -0.9495849609375, -0.79998779296875, -0.650390625, -0.50079345703125, -0.3511962890625, -0.20159912109375, -0.052001953125, 0.09759521484375, 0.2471923828125, 0.39678955078125, 0.54638671875, 0.69598388671875, 0.8455810546875, 0.99517822265625, 1.144775390625, 1.29437255859375, 1.4439697265625, 1.59356689453125, 1.7431640625, 1.89276123046875, 2.0423583984375, 2.19195556640625, 2.341552734375, 2.49114990234375, 2.6407470703125, 2.79034423828125, 2.93994140625, 3.08953857421875, 3.2391357421875, 3.38873291015625, 3.538330078125, 3.68792724609375, 3.8375244140625, 3.98712158203125, 4.13671875]}, "gradients/encoder.encoder.layers.20.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 2.0, 2.0, 4.0, 2.0, 6.0, 12.0, 29.0, 49.0, 87.0, 145.0, 217.0, 160.0, 123.0, 78.0, 40.0, 13.0, 6.0, 4.0, 3.0, 4.0, 2.0, 0.0, 3.0, 4.0, 1.0, 1.0, 0.0, 0.0, 2.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.39453125, -2.308319091796875, -2.22210693359375, -2.135894775390625, -2.0496826171875, -1.963470458984375, -1.87725830078125, -1.791046142578125, -1.704833984375, -1.618621826171875, -1.53240966796875, -1.446197509765625, -1.3599853515625, -1.273773193359375, -1.18756103515625, -1.101348876953125, -1.01513671875, -0.928924560546875, -0.84271240234375, -0.756500244140625, -0.6702880859375, -0.584075927734375, -0.49786376953125, -0.411651611328125, -0.325439453125, -0.239227294921875, -0.15301513671875, -0.066802978515625, 0.0194091796875, 0.105621337890625, 0.19183349609375, 0.278045654296875, 0.3642578125, 0.450469970703125, 0.53668212890625, 0.622894287109375, 0.7091064453125, 0.795318603515625, 0.88153076171875, 0.967742919921875, 1.053955078125, 1.140167236328125, 1.22637939453125, 1.312591552734375, 1.3988037109375, 1.485015869140625, 1.57122802734375, 1.657440185546875, 1.74365234375, 1.829864501953125, 1.91607666015625, 2.002288818359375, 2.0885009765625, 2.174713134765625, 2.26092529296875, 2.347137451171875, 2.433349609375, 2.519561767578125, 2.60577392578125, 2.691986083984375, 2.7781982421875, 2.864410400390625, 2.95062255859375, 3.036834716796875, 3.123046875]}, "gradients/encoder.encoder.layers.20.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 5.0, 49.0, 827.0, 128.0, 9.0, 0.0, 0.0, 1.0, 1.0], "bins": [-280.0517578125, -275.194091796875, -270.3364562988281, -265.4787902832031, -260.6211242675781, -255.7634735107422, -250.9058074951172, -246.04815673828125, -241.19049072265625, -236.3328399658203, -231.4751739501953, -226.61752319335938, -221.75985717773438, -216.90220642089844, -212.04454040527344, -207.1868896484375, -202.32923889160156, -197.47158813476562, -192.61392211914062, -187.7562713623047, -182.8986053466797, -178.04095458984375, -173.18328857421875, -168.3256378173828, -163.46798706054688, -158.61033630371094, -153.75267028808594, -148.89501953125, -144.037353515625, -139.17970275878906, -134.32203674316406, -129.46438598632812, -124.60671997070312, -119.74906158447266, -114.89140319824219, -110.03374481201172, -105.17608642578125, -100.31843566894531, -95.46077728271484, -90.60311889648438, -85.7454605102539, -80.88780212402344, -76.03014373779297, -71.1724853515625, -66.31483459472656, -61.45717239379883, -56.599517822265625, -51.741859436035156, -46.88420104980469, -42.02654266357422, -37.16888427734375, -32.31122970581055, -27.453571319580078, -22.59591293334961, -17.738256454467773, -12.880599975585938, -8.022943496704102, -3.165286064147949, 1.6923713684082031, 6.5500288009643555, 11.407686233520508, 16.265344619750977, 21.123001098632812, 25.98065757751465, 30.838315963745117]}, "gradients/encoder.encoder.layers.20.layer_norm.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 2.0, 1.0, 1.0, 3.0, 2.0, 3.0, 6.0, 5.0, 10.0, 3.0, 11.0, 17.0, 12.0, 14.0, 20.0, 24.0, 25.0, 19.0, 32.0, 40.0, 32.0, 40.0, 42.0, 34.0, 38.0, 50.0, 54.0, 45.0, 46.0, 35.0, 51.0, 45.0, 32.0, 39.0, 32.0, 21.0, 21.0, 13.0, 19.0, 18.0, 13.0, 11.0, 10.0, 7.0, 4.0, 5.0, 0.0, 1.0, 1.0, 2.0, 0.0, 1.0, 1.0, 1.0, 1.0, 3.0, 1.0], "bins": [-19.11857795715332, -18.55038070678711, -17.982181549072266, -17.413982391357422, -16.84578514099121, -16.277587890625, -15.709388732910156, -15.141190528869629, -14.572992324829102, -14.004794120788574, -13.436595916748047, -12.86839771270752, -12.300199508666992, -11.732001304626465, -11.163803100585938, -10.59560489654541, -10.027406692504883, -9.459208488464355, -8.891010284423828, -8.3228120803833, -7.754613876342773, -7.186415672302246, -6.618217468261719, -6.050019264221191, -5.481821060180664, -4.913622856140137, -4.345424652099609, -3.777226448059082, -3.2090282440185547, -2.6408300399780273, -2.0726318359375, -1.5044336318969727, -0.9362373352050781, -0.3680391311645508, 0.20015907287597656, 0.7683572769165039, 1.3365554809570312, 1.9047536849975586, 2.472951889038086, 3.0411500930786133, 3.6093482971191406, 4.177546501159668, 4.745744705200195, 5.313942909240723, 5.88214111328125, 6.450339317321777, 7.018537521362305, 7.586735725402832, 8.15493392944336, 8.723132133483887, 9.291330337524414, 9.859528541564941, 10.427726745605469, 10.995924949645996, 11.564123153686523, 12.13232135772705, 12.700519561767578, 13.268717765808105, 13.836915969848633, 14.40511417388916, 14.973312377929688, 15.541510581970215, 16.109708786010742, 16.677906036376953, 17.246105194091797]}, "gradients/encoder.encoder.layers.19.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 4.0, 2.0, 2.0, 1.0, 3.0, 3.0, 3.0, 3.0, 5.0, 11.0, 23.0, 42.0, 51.0, 134.0, 279.0, 793.0, 4442.0, 3191120.0, 992884.0, 3565.0, 566.0, 191.0, 89.0, 30.0, 18.0, 13.0, 12.0, 2.0, 5.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 2.0], "bins": [-23.25, -22.7091064453125, -22.168212890625, -21.6273193359375, -21.08642578125, -20.5455322265625, -20.004638671875, -19.4637451171875, -18.9228515625, -18.3819580078125, -17.841064453125, -17.3001708984375, -16.75927734375, -16.2183837890625, -15.677490234375, -15.1365966796875, -14.595703125, -14.0548095703125, -13.513916015625, -12.9730224609375, -12.43212890625, -11.8912353515625, -11.350341796875, -10.8094482421875, -10.2685546875, -9.7276611328125, -9.186767578125, -8.6458740234375, -8.10498046875, -7.5640869140625, -7.023193359375, -6.4822998046875, -5.94140625, -5.4005126953125, -4.859619140625, -4.3187255859375, -3.77783203125, -3.2369384765625, -2.696044921875, -2.1551513671875, -1.6142578125, -1.0733642578125, -0.532470703125, 0.0084228515625, 0.54931640625, 1.0902099609375, 1.631103515625, 2.1719970703125, 2.712890625, 3.2537841796875, 3.794677734375, 4.3355712890625, 4.87646484375, 5.4173583984375, 5.958251953125, 6.4991455078125, 7.0400390625, 7.5809326171875, 8.121826171875, 8.6627197265625, 9.20361328125, 9.7445068359375, 10.285400390625, 10.8262939453125, 11.3671875]}, "gradients/encoder.encoder.layers.19.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 1.0, 2.0, 2.0, 0.0, 0.0, 2.0, 4.0, 3.0, 6.0, 3.0, 9.0, 18.0, 25.0, 48.0, 79.0, 108.0, 114.0, 137.0, 139.0, 89.0, 83.0, 59.0, 33.0, 20.0, 10.0, 9.0, 6.0, 3.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.560546875, -1.5228805541992188, -1.4852142333984375, -1.4475479125976562, -1.409881591796875, -1.3722152709960938, -1.3345489501953125, -1.2968826293945312, -1.25921630859375, -1.2215499877929688, -1.1838836669921875, -1.1462173461914062, -1.108551025390625, -1.0708847045898438, -1.0332183837890625, -0.9955520629882812, -0.9578857421875, -0.9202194213867188, -0.8825531005859375, -0.8448867797851562, -0.807220458984375, -0.7695541381835938, -0.7318878173828125, -0.6942214965820312, -0.65655517578125, -0.6188888549804688, -0.5812225341796875, -0.5435562133789062, -0.505889892578125, -0.46822357177734375, -0.4305572509765625, -0.39289093017578125, -0.355224609375, -0.31755828857421875, -0.2798919677734375, -0.24222564697265625, -0.204559326171875, -0.16689300537109375, -0.1292266845703125, -0.09156036376953125, -0.05389404296875, -0.01622772216796875, 0.0214385986328125, 0.05910491943359375, 0.096771240234375, 0.13443756103515625, 0.1721038818359375, 0.20977020263671875, 0.2474365234375, 0.28510284423828125, 0.3227691650390625, 0.36043548583984375, 0.398101806640625, 0.43576812744140625, 0.4734344482421875, 0.5111007690429688, 0.54876708984375, 0.5864334106445312, 0.6240997314453125, 0.6617660522460938, 0.699432373046875, 0.7370986938476562, 0.7747650146484375, 0.8124313354492188, 0.85009765625]}, "gradients/encoder.encoder.layers.19.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 6.0, 9.0, 7.0, 28.0, 76.0, 213.0, 786.0, 9841.0, 4137372.0, 43585.0, 1797.0, 349.0, 120.0, 54.0, 25.0, 5.0, 8.0, 3.0, 1.0, 1.0, 2.0, 1.0, 1.0, 3.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-9.640625, -9.150390625, -8.66015625, -8.169921875, -7.6796875, -7.189453125, -6.69921875, -6.208984375, -5.71875, -5.228515625, -4.73828125, -4.248046875, -3.7578125, -3.267578125, -2.77734375, -2.287109375, -1.796875, -1.306640625, -0.81640625, -0.326171875, 0.1640625, 0.654296875, 1.14453125, 1.634765625, 2.125, 2.615234375, 3.10546875, 3.595703125, 4.0859375, 4.576171875, 5.06640625, 5.556640625, 6.046875, 6.537109375, 7.02734375, 7.517578125, 8.0078125, 8.498046875, 8.98828125, 9.478515625, 9.96875, 10.458984375, 10.94921875, 11.439453125, 11.9296875, 12.419921875, 12.91015625, 13.400390625, 13.890625, 14.380859375, 14.87109375, 15.361328125, 15.8515625, 16.341796875, 16.83203125, 17.322265625, 17.8125, 18.302734375, 18.79296875, 19.283203125, 19.7734375, 20.263671875, 20.75390625, 21.244140625, 21.734375]}, "gradients/encoder.encoder.layers.19.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 2.0, 2.0, 1.0, 2.0, 4.0, 1.0, 2.0, 5.0, 9.0, 9.0, 12.0, 32.0, 52.0, 84.0, 164.0, 543.0, 2192.0, 586.0, 179.0, 85.0, 33.0, 32.0, 14.0, 12.0, 10.0, 4.0, 6.0, 3.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.9501953125, -0.9015350341796875, -0.852874755859375, -0.8042144775390625, -0.75555419921875, -0.7068939208984375, -0.658233642578125, -0.6095733642578125, -0.5609130859375, -0.5122528076171875, -0.463592529296875, -0.4149322509765625, -0.36627197265625, -0.3176116943359375, -0.268951416015625, -0.2202911376953125, -0.171630859375, -0.1229705810546875, -0.074310302734375, -0.0256500244140625, 0.02301025390625, 0.0716705322265625, 0.120330810546875, 0.1689910888671875, 0.2176513671875, 0.2663116455078125, 0.314971923828125, 0.3636322021484375, 0.41229248046875, 0.4609527587890625, 0.509613037109375, 0.5582733154296875, 0.60693359375, 0.6555938720703125, 0.704254150390625, 0.7529144287109375, 0.80157470703125, 0.8502349853515625, 0.898895263671875, 0.9475555419921875, 0.9962158203125, 1.0448760986328125, 1.093536376953125, 1.1421966552734375, 1.19085693359375, 1.2395172119140625, 1.288177490234375, 1.3368377685546875, 1.385498046875, 1.4341583251953125, 1.482818603515625, 1.5314788818359375, 1.58013916015625, 1.6287994384765625, 1.677459716796875, 1.7261199951171875, 1.7747802734375, 1.8234405517578125, 1.872100830078125, 1.9207611083984375, 1.96942138671875, 2.0180816650390625, 2.066741943359375, 2.1154022216796875, 2.1640625]}, "gradients/encoder.encoder.layers.19.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 3.0, 3.0, 8.0, 57.0, 230.0, 411.0, 214.0, 49.0, 17.0, 6.0, 8.0, 2.0, 0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.721809387207031, -5.294982433319092, -4.868155479431152, -4.441328525543213, -4.014501571655273, -3.587674379348755, -3.1608474254608154, -2.734020471572876, -2.3071935176849365, -1.880366563796997, -1.4535396099090576, -1.0267125368118286, -0.5998855829238892, -0.17305850982666016, 0.2537684440612793, 0.6805953979492188, 1.1074223518371582, 1.5342493057250977, 1.961076259613037, 2.3879032135009766, 2.814730167388916, 3.2415573596954346, 3.668384313583374, 4.095211029052734, 4.522038459777832, 4.9488654136657715, 5.375692367553711, 5.80251932144165, 6.22934627532959, 6.6561737060546875, 7.083000183105469, 7.509827613830566, 7.936654090881348, 8.363481521606445, 8.790307998657227, 9.217135429382324, 9.643961906433105, 10.070789337158203, 10.497615814208984, 10.924443244934082, 11.351269721984863, 11.778097152709961, 12.204923629760742, 12.63175106048584, 13.058577537536621, 13.485404968261719, 13.9122314453125, 14.339058876037598, 14.765886306762695, 15.192713737487793, 15.619540214538574, 16.046367645263672, 16.473194122314453, 16.900020599365234, 17.32684898376465, 17.75367546081543, 18.18050193786621, 18.607328414916992, 19.034156799316406, 19.460983276367188, 19.88780975341797, 20.31463623046875, 20.741464614868164, 21.168291091918945, 21.595117568969727]}, "gradients/encoder.encoder.layers.19.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 4.0, 0.0, 0.0, 2.0, 3.0, 2.0, 4.0, 9.0, 9.0, 26.0, 23.0, 28.0, 58.0, 56.0, 63.0, 76.0, 67.0, 79.0, 87.0, 72.0, 65.0, 68.0, 44.0, 43.0, 40.0, 21.0, 17.0, 16.0, 13.0, 8.0, 5.0, 2.0, 2.0, 3.0, 1.0, 0.0, 4.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-7.129608631134033, -6.918914794921875, -6.708220958709717, -6.497527122497559, -6.286832809448242, -6.076138973236084, -5.865445137023926, -5.654751300811768, -5.444057464599609, -5.233363628387451, -5.022669792175293, -4.811975479125977, -4.601281642913818, -4.39058780670166, -4.179893970489502, -3.9692001342773438, -3.7585058212280273, -3.547811985015869, -3.337117910385132, -3.1264240741729736, -2.9157299995422363, -2.705036163330078, -2.49434232711792, -2.2836484909057617, -2.0729544162750244, -1.8622604608535767, -1.651566505432129, -1.4408726692199707, -1.230178713798523, -1.0194847583770752, -0.808790922164917, -0.5980969667434692, -0.3874034881591797, -0.17670956254005432, 0.033984363079071045, 0.24467825889587402, 0.4553722143173218, 0.6660661697387695, 0.8767600059509277, 1.0874539613723755, 1.2981479167938232, 1.508841872215271, 1.7195358276367188, 1.930229663848877, 2.140923500061035, 2.3516175746917725, 2.5623114109039307, 2.773005485534668, 2.983699321746826, 3.1943931579589844, 3.4050872325897217, 3.61578106880188, 3.826475143432617, 4.037168979644775, 4.247862815856934, 4.458556652069092, 4.66925048828125, 4.879944324493408, 5.090638160705566, 5.301332473754883, 5.512026309967041, 5.722720146179199, 5.933413982391357, 6.144107818603516, 6.354802131652832]}, "gradients/encoder.encoder.layers.19.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 4.0, 1.0, 5.0, 2.0, 0.0, 5.0, 3.0, 5.0, 0.0, 10.0, 6.0, 25.0, 18.0, 31.0, 34.0, 35.0, 67.0, 57.0, 132.0, 172.0, 251.0, 358.0, 525.0, 883.0, 1428.0, 2868.0, 7044.0, 23049.0, 101842.0, 458075.0, 351457.0, 71396.0, 16973.0, 5665.0, 2431.0, 1329.0, 771.0, 506.0, 317.0, 191.0, 170.0, 102.0, 90.0, 58.0, 44.0, 29.0, 27.0, 23.0, 10.0, 21.0, 7.0, 3.0, 5.0, 3.0, 3.0, 2.0, 0.0, 3.0, 3.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.95703125, -2.861297607421875, -2.76556396484375, -2.669830322265625, -2.5740966796875, -2.478363037109375, -2.38262939453125, -2.286895751953125, -2.191162109375, -2.095428466796875, -1.99969482421875, -1.903961181640625, -1.8082275390625, -1.712493896484375, -1.61676025390625, -1.521026611328125, -1.42529296875, -1.329559326171875, -1.23382568359375, -1.138092041015625, -1.0423583984375, -0.946624755859375, -0.85089111328125, -0.755157470703125, -0.659423828125, -0.563690185546875, -0.46795654296875, -0.372222900390625, -0.2764892578125, -0.180755615234375, -0.08502197265625, 0.010711669921875, 0.1064453125, 0.202178955078125, 0.29791259765625, 0.393646240234375, 0.4893798828125, 0.585113525390625, 0.68084716796875, 0.776580810546875, 0.872314453125, 0.968048095703125, 1.06378173828125, 1.159515380859375, 1.2552490234375, 1.350982666015625, 1.44671630859375, 1.542449951171875, 1.63818359375, 1.733917236328125, 1.82965087890625, 1.925384521484375, 2.0211181640625, 2.116851806640625, 2.21258544921875, 2.308319091796875, 2.404052734375, 2.499786376953125, 2.59552001953125, 2.691253662109375, 2.7869873046875, 2.882720947265625, 2.97845458984375, 3.074188232421875, 3.169921875]}, "gradients/encoder.encoder.layers.19.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 1.0, 3.0, 0.0, 0.0, 4.0, 1.0, 0.0, 1.0, 4.0, 6.0, 6.0, 9.0, 21.0, 42.0, 59.0, 80.0, 106.0, 123.0, 142.0, 126.0, 99.0, 58.0, 49.0, 23.0, 26.0, 10.0, 8.0, 3.0, 3.0, 0.0, 0.0, 0.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.48046875, -1.442596435546875, -1.40472412109375, -1.366851806640625, -1.3289794921875, -1.291107177734375, -1.25323486328125, -1.215362548828125, -1.177490234375, -1.139617919921875, -1.10174560546875, -1.063873291015625, -1.0260009765625, -0.988128662109375, -0.95025634765625, -0.912384033203125, -0.87451171875, -0.836639404296875, -0.79876708984375, -0.760894775390625, -0.7230224609375, -0.685150146484375, -0.64727783203125, -0.609405517578125, -0.571533203125, -0.533660888671875, -0.49578857421875, -0.457916259765625, -0.4200439453125, -0.382171630859375, -0.34429931640625, -0.306427001953125, -0.2685546875, -0.230682373046875, -0.19281005859375, -0.154937744140625, -0.1170654296875, -0.079193115234375, -0.04132080078125, -0.003448486328125, 0.034423828125, 0.072296142578125, 0.11016845703125, 0.148040771484375, 0.1859130859375, 0.223785400390625, 0.26165771484375, 0.299530029296875, 0.33740234375, 0.375274658203125, 0.41314697265625, 0.451019287109375, 0.4888916015625, 0.526763916015625, 0.56463623046875, 0.602508544921875, 0.640380859375, 0.678253173828125, 0.71612548828125, 0.753997802734375, 0.7918701171875, 0.829742431640625, 0.86761474609375, 0.905487060546875, 0.943359375]}, "gradients/encoder.encoder.layers.19.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 2.0, 1.0, 1.0, 2.0, 2.0, 2.0, 3.0, 2.0, 14.0, 6.0, 16.0, 20.0, 24.0, 28.0, 46.0, 68.0, 93.0, 178.0, 245.0, 382.0, 645.0, 1237.0, 2931.0, 8274.0, 30197.0, 154854.0, 565094.0, 225083.0, 41525.0, 10662.0, 3520.0, 1484.0, 736.0, 413.0, 252.0, 164.0, 113.0, 69.0, 54.0, 38.0, 22.0, 17.0, 18.0, 8.0, 6.0, 7.0, 6.0, 2.0, 3.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.751953125, -3.646575927734375, -3.54119873046875, -3.435821533203125, -3.3304443359375, -3.225067138671875, -3.11968994140625, -3.014312744140625, -2.908935546875, -2.803558349609375, -2.69818115234375, -2.592803955078125, -2.4874267578125, -2.382049560546875, -2.27667236328125, -2.171295166015625, -2.06591796875, -1.960540771484375, -1.85516357421875, -1.749786376953125, -1.6444091796875, -1.539031982421875, -1.43365478515625, -1.328277587890625, -1.222900390625, -1.117523193359375, -1.01214599609375, -0.906768798828125, -0.8013916015625, -0.696014404296875, -0.59063720703125, -0.485260009765625, -0.3798828125, -0.274505615234375, -0.16912841796875, -0.063751220703125, 0.0416259765625, 0.147003173828125, 0.25238037109375, 0.357757568359375, 0.463134765625, 0.568511962890625, 0.67388916015625, 0.779266357421875, 0.8846435546875, 0.990020751953125, 1.09539794921875, 1.200775146484375, 1.30615234375, 1.411529541015625, 1.51690673828125, 1.622283935546875, 1.7276611328125, 1.833038330078125, 1.93841552734375, 2.043792724609375, 2.149169921875, 2.254547119140625, 2.35992431640625, 2.465301513671875, 2.5706787109375, 2.676055908203125, 2.78143310546875, 2.886810302734375, 2.9921875]}, "gradients/encoder.encoder.layers.19.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 3.0, 2.0, 4.0, 4.0, 7.0, 5.0, 4.0, 9.0, 10.0, 9.0, 14.0, 15.0, 25.0, 26.0, 28.0, 35.0, 25.0, 35.0, 28.0, 50.0, 41.0, 49.0, 42.0, 62.0, 43.0, 43.0, 53.0, 45.0, 35.0, 44.0, 40.0, 22.0, 19.0, 24.0, 22.0, 21.0, 17.0, 8.0, 12.0, 6.0, 8.0, 9.0, 1.0, 8.0, 1.0, 1.0, 4.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-2.9140625, -2.82550048828125, -2.7369384765625, -2.64837646484375, -2.559814453125, -2.47125244140625, -2.3826904296875, -2.29412841796875, -2.20556640625, -2.11700439453125, -2.0284423828125, -1.93988037109375, -1.851318359375, -1.76275634765625, -1.6741943359375, -1.58563232421875, -1.4970703125, -1.40850830078125, -1.3199462890625, -1.23138427734375, -1.142822265625, -1.05426025390625, -0.9656982421875, -0.87713623046875, -0.78857421875, -0.70001220703125, -0.6114501953125, -0.52288818359375, -0.434326171875, -0.34576416015625, -0.2572021484375, -0.16864013671875, -0.080078125, 0.00848388671875, 0.0970458984375, 0.18560791015625, 0.274169921875, 0.36273193359375, 0.4512939453125, 0.53985595703125, 0.62841796875, 0.71697998046875, 0.8055419921875, 0.89410400390625, 0.982666015625, 1.07122802734375, 1.1597900390625, 1.24835205078125, 1.3369140625, 1.42547607421875, 1.5140380859375, 1.60260009765625, 1.691162109375, 1.77972412109375, 1.8682861328125, 1.95684814453125, 2.04541015625, 2.13397216796875, 2.2225341796875, 2.31109619140625, 2.399658203125, 2.48822021484375, 2.5767822265625, 2.66534423828125, 2.75390625]}, "gradients/encoder.encoder.layers.19.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 1.0, 4.0, 2.0, 2.0, 7.0, 2.0, 6.0, 4.0, 9.0, 10.0, 12.0, 22.0, 30.0, 25.0, 46.0, 53.0, 107.0, 147.0, 325.0, 581.0, 1227.0, 3315.0, 10866.0, 57432.0, 463366.0, 441005.0, 53723.0, 10544.0, 3150.0, 1226.0, 564.0, 255.0, 154.0, 93.0, 80.0, 43.0, 28.0, 21.0, 12.0, 9.0, 11.0, 14.0, 11.0, 7.0, 5.0, 1.0, 3.0, 2.0, 2.0, 0.0, 3.0, 1.0, 0.0, 0.0, 1.0, 1.0], "bins": [-1.162109375, -1.1268768310546875, -1.091644287109375, -1.0564117431640625, -1.02117919921875, -0.9859466552734375, -0.950714111328125, -0.9154815673828125, -0.8802490234375, -0.8450164794921875, -0.809783935546875, -0.7745513916015625, -0.73931884765625, -0.7040863037109375, -0.668853759765625, -0.6336212158203125, -0.598388671875, -0.5631561279296875, -0.527923583984375, -0.4926910400390625, -0.45745849609375, -0.4222259521484375, -0.386993408203125, -0.3517608642578125, -0.3165283203125, -0.2812957763671875, -0.246063232421875, -0.2108306884765625, -0.17559814453125, -0.1403656005859375, -0.105133056640625, -0.0699005126953125, -0.03466796875, 0.0005645751953125, 0.035797119140625, 0.0710296630859375, 0.10626220703125, 0.1414947509765625, 0.176727294921875, 0.2119598388671875, 0.2471923828125, 0.2824249267578125, 0.317657470703125, 0.3528900146484375, 0.38812255859375, 0.4233551025390625, 0.458587646484375, 0.4938201904296875, 0.529052734375, 0.5642852783203125, 0.599517822265625, 0.6347503662109375, 0.66998291015625, 0.7052154541015625, 0.740447998046875, 0.7756805419921875, 0.8109130859375, 0.8461456298828125, 0.881378173828125, 0.9166107177734375, 0.95184326171875, 0.9870758056640625, 1.022308349609375, 1.0575408935546875, 1.0927734375]}, "gradients/encoder.encoder.layers.19.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 1.0, 1.0, 0.0, 3.0, 3.0, 7.0, 9.0, 11.0, 26.0, 29.0, 41.0, 57.0, 82.0, 119.0, 117.0, 128.0, 100.0, 85.0, 50.0, 48.0, 38.0, 21.0, 15.0, 7.0, 6.0, 3.0, 2.0, 3.0, 4.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00015497207641601562, -0.00014901533722877502, -0.00014305859804153442, -0.00013710185885429382, -0.00013114511966705322, -0.00012518838047981262, -0.00011923164129257202, -0.00011327490210533142, -0.00010731816291809082, -0.00010136142373085022, -9.540468454360962e-05, -8.944794535636902e-05, -8.349120616912842e-05, -7.753446698188782e-05, -7.157772779464722e-05, -6.562098860740662e-05, -5.9664249420166016e-05, -5.3707510232925415e-05, -4.7750771045684814e-05, -4.1794031858444214e-05, -3.583729267120361e-05, -2.9880553483963013e-05, -2.3923814296722412e-05, -1.796707510948181e-05, -1.2010335922241211e-05, -6.05359673500061e-06, -9.685754776000977e-08, 5.859881639480591e-06, 1.1816620826721191e-05, 1.7773360013961792e-05, 2.3730099201202393e-05, 2.9686838388442993e-05, 3.5643577575683594e-05, 4.1600316762924194e-05, 4.7557055950164795e-05, 5.3513795137405396e-05, 5.9470534324645996e-05, 6.54272735118866e-05, 7.13840126991272e-05, 7.73407518863678e-05, 8.32974910736084e-05, 8.9254230260849e-05, 9.52109694480896e-05, 0.0001011677086353302, 0.0001071244478225708, 0.0001130811870098114, 0.000119037926197052, 0.0001249946653842926, 0.0001309514045715332, 0.0001369081437587738, 0.0001428648829460144, 0.000148821622133255, 0.0001547783613204956, 0.0001607351005077362, 0.0001666918396949768, 0.0001726485788822174, 0.000178605318069458, 0.0001845620572566986, 0.0001905187964439392, 0.0001964755356311798, 0.0002024322748184204, 0.000208389014005661, 0.0002143457531929016, 0.0002203024923801422, 0.0002262592315673828]}, "gradients/encoder.encoder.layers.19.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0, 3.0, 1.0, 2.0, 4.0, 4.0, 2.0, 6.0, 8.0, 13.0, 9.0, 19.0, 27.0, 38.0, 79.0, 108.0, 203.0, 385.0, 755.0, 1784.0, 5576.0, 27873.0, 318046.0, 623948.0, 56176.0, 8884.0, 2552.0, 982.0, 477.0, 225.0, 131.0, 78.0, 57.0, 28.0, 32.0, 10.0, 9.0, 3.0, 8.0, 6.0, 6.0, 0.0, 1.0, 4.0, 2.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0], "bins": [-1.66015625, -1.612945556640625, -1.56573486328125, -1.518524169921875, -1.4713134765625, -1.424102783203125, -1.37689208984375, -1.329681396484375, -1.282470703125, -1.235260009765625, -1.18804931640625, -1.140838623046875, -1.0936279296875, -1.046417236328125, -0.99920654296875, -0.951995849609375, -0.90478515625, -0.857574462890625, -0.81036376953125, -0.763153076171875, -0.7159423828125, -0.668731689453125, -0.62152099609375, -0.574310302734375, -0.527099609375, -0.479888916015625, -0.43267822265625, -0.385467529296875, -0.3382568359375, -0.291046142578125, -0.24383544921875, -0.196624755859375, -0.1494140625, -0.102203369140625, -0.05499267578125, -0.007781982421875, 0.0394287109375, 0.086639404296875, 0.13385009765625, 0.181060791015625, 0.228271484375, 0.275482177734375, 0.32269287109375, 0.369903564453125, 0.4171142578125, 0.464324951171875, 0.51153564453125, 0.558746337890625, 0.60595703125, 0.653167724609375, 0.70037841796875, 0.747589111328125, 0.7947998046875, 0.842010498046875, 0.88922119140625, 0.936431884765625, 0.983642578125, 1.030853271484375, 1.07806396484375, 1.125274658203125, 1.1724853515625, 1.219696044921875, 1.26690673828125, 1.314117431640625, 1.361328125]}, "gradients/encoder.encoder.layers.19.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 1.0, 0.0, 1.0, 2.0, 2.0, 2.0, 3.0, 3.0, 5.0, 11.0, 15.0, 12.0, 31.0, 35.0, 61.0, 75.0, 91.0, 112.0, 117.0, 90.0, 89.0, 65.0, 55.0, 44.0, 32.0, 23.0, 10.0, 5.0, 5.0, 8.0, 4.0, 3.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.4404296875, -1.4019317626953125, -1.363433837890625, -1.3249359130859375, -1.28643798828125, -1.2479400634765625, -1.209442138671875, -1.1709442138671875, -1.1324462890625, -1.0939483642578125, -1.055450439453125, -1.0169525146484375, -0.97845458984375, -0.9399566650390625, -0.901458740234375, -0.8629608154296875, -0.824462890625, -0.7859649658203125, -0.747467041015625, -0.7089691162109375, -0.67047119140625, -0.6319732666015625, -0.593475341796875, -0.5549774169921875, -0.5164794921875, -0.4779815673828125, -0.439483642578125, -0.4009857177734375, -0.36248779296875, -0.3239898681640625, -0.285491943359375, -0.2469940185546875, -0.20849609375, -0.1699981689453125, -0.131500244140625, -0.0930023193359375, -0.05450439453125, -0.0160064697265625, 0.022491455078125, 0.0609893798828125, 0.0994873046875, 0.1379852294921875, 0.176483154296875, 0.2149810791015625, 0.25347900390625, 0.2919769287109375, 0.330474853515625, 0.3689727783203125, 0.407470703125, 0.4459686279296875, 0.484466552734375, 0.5229644775390625, 0.56146240234375, 0.5999603271484375, 0.638458251953125, 0.6769561767578125, 0.7154541015625, 0.7539520263671875, 0.792449951171875, 0.8309478759765625, 0.86944580078125, 0.9079437255859375, 0.946441650390625, 0.9849395751953125, 1.0234375]}, "gradients/encoder.encoder.layers.19.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 2.0, 1.0, 1.0, 3.0, 6.0, 18.0, 130.0, 506.0, 292.0, 44.0, 8.0, 4.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-51.57118225097656, -49.567386627197266, -47.56359100341797, -45.55979919433594, -43.55600357055664, -41.552207946777344, -39.54841613769531, -37.544620513916016, -35.54082489013672, -33.53702926635742, -31.533235549926758, -29.529441833496094, -27.525646209716797, -25.5218505859375, -23.518056869506836, -21.514263153076172, -19.510467529296875, -17.506671905517578, -15.502878189086914, -13.499083518981934, -11.495288848876953, -9.491494178771973, -7.487699508666992, -5.483904838562012, -3.4801101684570312, -1.4763154983520508, 0.5274791717529297, 2.53127384185791, 4.535068511962891, 6.538863182067871, 8.542657852172852, 10.546452522277832, 12.550247192382812, 14.554041862487793, 16.557836532592773, 18.561630249023438, 20.565425872802734, 22.56922149658203, 24.573015213012695, 26.57680892944336, 28.580604553222656, 30.584400177001953, 32.58819580078125, 34.59198760986328, 36.59578323364258, 38.599578857421875, 40.603370666503906, 42.6071662902832, 44.6109619140625, 46.6147575378418, 48.618553161621094, 50.622344970703125, 52.62614059448242, 54.62993621826172, 56.63372802734375, 58.63752365112305, 60.641319274902344, 62.64511489868164, 64.64891052246094, 66.65270233154297, 68.656494140625, 70.66029357910156, 72.6640853881836, 74.66788482666016, 76.67167663574219]}, "gradients/encoder.encoder.layers.19.layer_norm.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 3.0, 4.0, 0.0, 2.0, 4.0, 1.0, 7.0, 7.0, 6.0, 13.0, 15.0, 11.0, 21.0, 23.0, 20.0, 31.0, 28.0, 35.0, 37.0, 34.0, 44.0, 43.0, 51.0, 44.0, 57.0, 45.0, 45.0, 45.0, 46.0, 43.0, 35.0, 31.0, 28.0, 28.0, 27.0, 22.0, 12.0, 11.0, 16.0, 11.0, 6.0, 6.0, 3.0, 5.0, 2.0, 3.0, 4.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-14.364089965820312, -13.83922004699707, -13.314350128173828, -12.789480209350586, -12.26461124420166, -11.739741325378418, -11.214871406555176, -10.690001487731934, -10.165132522583008, -9.640262603759766, -9.115392684936523, -8.590522766113281, -8.065653800964355, -7.540783882141113, -7.015913963317871, -6.491044044494629, -5.966174125671387, -5.4413042068481445, -4.9164347648620605, -4.391564846038818, -3.8666951656341553, -3.341825485229492, -2.81695556640625, -2.292085886001587, -1.7672162055969238, -1.2423465251922607, -0.7174767255783081, -0.19260692596435547, 0.3322627544403076, 0.8571324348449707, 1.382002353668213, 1.906872034072876, 2.431741714477539, 2.956611394882202, 3.4814810752868652, 4.006350994110107, 4.531220436096191, 5.056090354919434, 5.580960273742676, 6.105830192565918, 6.630699634552002, 7.155569553375244, 7.680438995361328, 8.20530891418457, 8.730178833007812, 9.255048751831055, 9.779918670654297, 10.304787635803223, 10.829657554626465, 11.354527473449707, 11.87939739227295, 12.404266357421875, 12.929136276245117, 13.45400619506836, 13.978876113891602, 14.503746032714844, 15.028615951538086, 15.553485870361328, 16.07835578918457, 16.603225708007812, 17.128095626831055, 17.652965545654297, 18.177833557128906, 18.70270347595215, 19.22757339477539]}, "gradients/encoder.encoder.layers.18.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 4.0, 0.0, 5.0, 6.0, 2.0, 9.0, 8.0, 16.0, 17.0, 25.0, 24.0, 36.0, 79.0, 137.0, 248.0, 532.0, 1229.0, 3773.0, 19922.0, 3547816.0, 600984.0, 14576.0, 3012.0, 1001.0, 410.0, 188.0, 89.0, 59.0, 25.0, 17.0, 13.0, 9.0, 9.0, 5.0, 6.0, 1.0, 1.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.7578125, -6.5748291015625, -6.391845703125, -6.2088623046875, -6.02587890625, -5.8428955078125, -5.659912109375, -5.4769287109375, -5.2939453125, -5.1109619140625, -4.927978515625, -4.7449951171875, -4.56201171875, -4.3790283203125, -4.196044921875, -4.0130615234375, -3.830078125, -3.6470947265625, -3.464111328125, -3.2811279296875, -3.09814453125, -2.9151611328125, -2.732177734375, -2.5491943359375, -2.3662109375, -2.1832275390625, -2.000244140625, -1.8172607421875, -1.63427734375, -1.4512939453125, -1.268310546875, -1.0853271484375, -0.90234375, -0.7193603515625, -0.536376953125, -0.3533935546875, -0.17041015625, 0.0125732421875, 0.195556640625, 0.3785400390625, 0.5615234375, 0.7445068359375, 0.927490234375, 1.1104736328125, 1.29345703125, 1.4764404296875, 1.659423828125, 1.8424072265625, 2.025390625, 2.2083740234375, 2.391357421875, 2.5743408203125, 2.75732421875, 2.9403076171875, 3.123291015625, 3.3062744140625, 3.4892578125, 3.6722412109375, 3.855224609375, 4.0382080078125, 4.22119140625, 4.4041748046875, 4.587158203125, 4.7701416015625, 4.953125]}, "gradients/encoder.encoder.layers.18.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 3.0, 0.0, 3.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 4.0, 9.0, 5.0, 9.0, 21.0, 19.0, 33.0, 44.0, 64.0, 83.0, 95.0, 91.0, 109.0, 104.0, 87.0, 67.0, 49.0, 31.0, 27.0, 21.0, 9.0, 7.0, 3.0, 5.0, 3.0, 2.0, 1.0, 3.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0], "bins": [-1.419921875, -1.3860549926757812, -1.3521881103515625, -1.3183212280273438, -1.284454345703125, -1.2505874633789062, -1.2167205810546875, -1.1828536987304688, -1.14898681640625, -1.1151199340820312, -1.0812530517578125, -1.0473861694335938, -1.013519287109375, -0.9796524047851562, -0.9457855224609375, -0.9119186401367188, -0.8780517578125, -0.8441848754882812, -0.8103179931640625, -0.7764511108398438, -0.742584228515625, -0.7087173461914062, -0.6748504638671875, -0.6409835815429688, -0.60711669921875, -0.5732498168945312, -0.5393829345703125, -0.5055160522460938, -0.471649169921875, -0.43778228759765625, -0.4039154052734375, -0.37004852294921875, -0.336181640625, -0.30231475830078125, -0.2684478759765625, -0.23458099365234375, -0.200714111328125, -0.16684722900390625, -0.1329803466796875, -0.09911346435546875, -0.06524658203125, -0.03137969970703125, 0.0024871826171875, 0.03635406494140625, 0.070220947265625, 0.10408782958984375, 0.1379547119140625, 0.17182159423828125, 0.2056884765625, 0.23955535888671875, 0.2734222412109375, 0.30728912353515625, 0.341156005859375, 0.37502288818359375, 0.4088897705078125, 0.44275665283203125, 0.47662353515625, 0.5104904174804688, 0.5443572998046875, 0.5782241821289062, 0.612091064453125, 0.6459579467773438, 0.6798248291015625, 0.7136917114257812, 0.74755859375]}, "gradients/encoder.encoder.layers.18.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 3.0, 4.0, 9.0, 17.0, 50.0, 106.0, 278.0, 1898.0, 3307880.0, 882084.0, 1558.0, 260.0, 93.0, 31.0, 14.0, 9.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-16.15625, -15.7308349609375, -15.305419921875, -14.8800048828125, -14.45458984375, -14.0291748046875, -13.603759765625, -13.1783447265625, -12.7529296875, -12.3275146484375, -11.902099609375, -11.4766845703125, -11.05126953125, -10.6258544921875, -10.200439453125, -9.7750244140625, -9.349609375, -8.9241943359375, -8.498779296875, -8.0733642578125, -7.64794921875, -7.2225341796875, -6.797119140625, -6.3717041015625, -5.9462890625, -5.5208740234375, -5.095458984375, -4.6700439453125, -4.24462890625, -3.8192138671875, -3.393798828125, -2.9683837890625, -2.54296875, -2.1175537109375, -1.692138671875, -1.2667236328125, -0.84130859375, -0.4158935546875, 0.009521484375, 0.4349365234375, 0.8603515625, 1.2857666015625, 1.711181640625, 2.1365966796875, 2.56201171875, 2.9874267578125, 3.412841796875, 3.8382568359375, 4.263671875, 4.6890869140625, 5.114501953125, 5.5399169921875, 5.96533203125, 6.3907470703125, 6.816162109375, 7.2415771484375, 7.6669921875, 8.0924072265625, 8.517822265625, 8.9432373046875, 9.36865234375, 9.7940673828125, 10.219482421875, 10.6448974609375, 11.0703125]}, "gradients/encoder.encoder.layers.18.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 0.0, 0.0, 1.0, 2.0, 3.0, 13.0, 11.0, 10.0, 31.0, 74.0, 167.0, 643.0, 2587.0, 346.0, 96.0, 49.0, 23.0, 12.0, 10.0, 7.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.0546875, -2.0037384033203125, -1.952789306640625, -1.9018402099609375, -1.85089111328125, -1.7999420166015625, -1.748992919921875, -1.6980438232421875, -1.6470947265625, -1.5961456298828125, -1.545196533203125, -1.4942474365234375, -1.44329833984375, -1.3923492431640625, -1.341400146484375, -1.2904510498046875, -1.239501953125, -1.1885528564453125, -1.137603759765625, -1.0866546630859375, -1.03570556640625, -0.9847564697265625, -0.933807373046875, -0.8828582763671875, -0.8319091796875, -0.7809600830078125, -0.730010986328125, -0.6790618896484375, -0.62811279296875, -0.5771636962890625, -0.526214599609375, -0.4752655029296875, -0.42431640625, -0.3733673095703125, -0.322418212890625, -0.2714691162109375, -0.22052001953125, -0.1695709228515625, -0.118621826171875, -0.0676727294921875, -0.0167236328125, 0.0342254638671875, 0.085174560546875, 0.1361236572265625, 0.18707275390625, 0.2380218505859375, 0.288970947265625, 0.3399200439453125, 0.390869140625, 0.4418182373046875, 0.492767333984375, 0.5437164306640625, 0.59466552734375, 0.6456146240234375, 0.696563720703125, 0.7475128173828125, 0.7984619140625, 0.8494110107421875, 0.900360107421875, 0.9513092041015625, 1.00225830078125, 1.0532073974609375, 1.104156494140625, 1.1551055908203125, 1.2060546875]}, "gradients/encoder.encoder.layers.18.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 3.0, 1.0, 4.0, 12.0, 518.0, 460.0, 15.0, 5.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-10.212550163269043, -9.362979888916016, -8.513410568237305, -7.6638407707214355, -6.814270973205566, -5.964701175689697, -5.115131378173828, -4.265561580657959, -3.41599178314209, -2.5664219856262207, -1.7168521881103516, -0.8672823905944824, -0.01771259307861328, 0.8318572044372559, 1.681427001953125, 2.530996799468994, 3.3805665969848633, 4.230136394500732, 5.079706192016602, 5.929275989532471, 6.77884578704834, 7.628415584564209, 8.477985382080078, 9.327554702758789, 10.177124977111816, 11.026695251464844, 11.876264572143555, 12.725833892822266, 13.575404167175293, 14.42497444152832, 15.274543762207031, 16.124113082885742, 16.973682403564453, 17.823251724243164, 18.672821044921875, 19.52239227294922, 20.37196159362793, 21.22153091430664, 22.071102142333984, 22.920671463012695, 23.770240783691406, 24.619810104370117, 25.469379425048828, 26.318950653076172, 27.168519973754883, 28.018089294433594, 28.867660522460938, 29.71722984313965, 30.56679916381836, 31.41636848449707, 32.26593780517578, 33.115509033203125, 33.96508026123047, 34.81464767456055, 35.66421890258789, 36.51378631591797, 37.36335754394531, 38.212928771972656, 39.062496185302734, 39.91206741333008, 40.761634826660156, 41.6112060546875, 42.460777282714844, 43.31034469604492, 44.159915924072266]}, "gradients/encoder.encoder.layers.18.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 2.0, 4.0, 10.0, 6.0, 4.0, 11.0, 11.0, 18.0, 22.0, 31.0, 31.0, 32.0, 37.0, 45.0, 63.0, 60.0, 66.0, 56.0, 57.0, 72.0, 50.0, 53.0, 38.0, 48.0, 41.0, 32.0, 33.0, 14.0, 20.0, 15.0, 11.0, 7.0, 4.0, 1.0, 3.0, 2.0, 0.0, 3.0, 0.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.3960537910461426, -2.3008692264556885, -2.2056846618652344, -2.1105000972747803, -2.015315532684326, -1.920130968093872, -1.8249462842941284, -1.7297617197036743, -1.6345771551132202, -1.5393925905227661, -1.444208025932312, -1.3490233421325684, -1.2538387775421143, -1.1586542129516602, -1.063469648361206, -0.968285083770752, -0.8731005191802979, -0.7779159545898438, -0.6827313899993896, -0.5875467658042908, -0.49236220121383667, -0.39717763662338257, -0.3019930124282837, -0.2068084478378296, -0.11162388324737549, -0.016439303755760193, 0.0787452757358551, 0.1739298701286316, 0.2691144347190857, 0.3642989993095398, 0.45948362350463867, 0.5546681880950928, 0.6498527526855469, 0.745037317276001, 0.8402218818664551, 0.935406506061554, 1.0305910110473633, 1.1257755756378174, 1.220960259437561, 1.3161448240280151, 1.4113293886184692, 1.5065139532089233, 1.6016985177993774, 1.696883201599121, 1.7920677661895752, 1.8872523307800293, 1.9824368953704834, 2.0776214599609375, 2.1728060245513916, 2.2679905891418457, 2.3631751537323, 2.458359718322754, 2.553544282913208, 2.648728847503662, 2.7439136505126953, 2.8390979766845703, 2.9342827796936035, 3.0294673442840576, 3.1246519088745117, 3.219836473464966, 3.31502103805542, 3.410205602645874, 3.505390167236328, 3.6005749702453613, 3.6957592964172363]}, "gradients/encoder.encoder.layers.18.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 4.0, 3.0, 2.0, 7.0, 6.0, 7.0, 15.0, 13.0, 26.0, 37.0, 50.0, 68.0, 134.0, 232.0, 405.0, 816.0, 1930.0, 4959.0, 19236.0, 144040.0, 688339.0, 158474.0, 20680.0, 5266.0, 1890.0, 890.0, 437.0, 227.0, 128.0, 101.0, 35.0, 36.0, 23.0, 18.0, 6.0, 9.0, 7.0, 6.0, 3.0, 1.0, 3.0, 1.0, 0.0, 2.0], "bins": [-5.62890625, -5.489990234375, -5.35107421875, -5.212158203125, -5.0732421875, -4.934326171875, -4.79541015625, -4.656494140625, -4.517578125, -4.378662109375, -4.23974609375, -4.100830078125, -3.9619140625, -3.822998046875, -3.68408203125, -3.545166015625, -3.40625, -3.267333984375, -3.12841796875, -2.989501953125, -2.8505859375, -2.711669921875, -2.57275390625, -2.433837890625, -2.294921875, -2.156005859375, -2.01708984375, -1.878173828125, -1.7392578125, -1.600341796875, -1.46142578125, -1.322509765625, -1.18359375, -1.044677734375, -0.90576171875, -0.766845703125, -0.6279296875, -0.489013671875, -0.35009765625, -0.211181640625, -0.072265625, 0.066650390625, 0.20556640625, 0.344482421875, 0.4833984375, 0.622314453125, 0.76123046875, 0.900146484375, 1.0390625, 1.177978515625, 1.31689453125, 1.455810546875, 1.5947265625, 1.733642578125, 1.87255859375, 2.011474609375, 2.150390625, 2.289306640625, 2.42822265625, 2.567138671875, 2.7060546875, 2.844970703125, 2.98388671875, 3.122802734375, 3.26171875]}, "gradients/encoder.encoder.layers.18.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 1.0, 2.0, 0.0, 4.0, 0.0, 1.0, 0.0, 0.0, 2.0, 7.0, 9.0, 12.0, 9.0, 21.0, 33.0, 39.0, 70.0, 79.0, 98.0, 96.0, 110.0, 98.0, 73.0, 81.0, 59.0, 29.0, 24.0, 20.0, 13.0, 4.0, 6.0, 7.0, 2.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.3818359375, -1.3481826782226562, -1.3145294189453125, -1.2808761596679688, -1.247222900390625, -1.2135696411132812, -1.1799163818359375, -1.1462631225585938, -1.11260986328125, -1.0789566040039062, -1.0453033447265625, -1.0116500854492188, -0.977996826171875, -0.9443435668945312, -0.9106903076171875, -0.8770370483398438, -0.8433837890625, -0.8097305297851562, -0.7760772705078125, -0.7424240112304688, -0.708770751953125, -0.6751174926757812, -0.6414642333984375, -0.6078109741210938, -0.57415771484375, -0.5405044555664062, -0.5068511962890625, -0.47319793701171875, -0.439544677734375, -0.40589141845703125, -0.3722381591796875, -0.33858489990234375, -0.304931640625, -0.27127838134765625, -0.2376251220703125, -0.20397186279296875, -0.170318603515625, -0.13666534423828125, -0.1030120849609375, -0.06935882568359375, -0.03570556640625, -0.00205230712890625, 0.0316009521484375, 0.06525421142578125, 0.098907470703125, 0.13256072998046875, 0.1662139892578125, 0.19986724853515625, 0.2335205078125, 0.26717376708984375, 0.3008270263671875, 0.33448028564453125, 0.368133544921875, 0.40178680419921875, 0.4354400634765625, 0.46909332275390625, 0.50274658203125, 0.5363998413085938, 0.5700531005859375, 0.6037063598632812, 0.637359619140625, 0.6710128784179688, 0.7046661376953125, 0.7383193969726562, 0.77197265625]}, "gradients/encoder.encoder.layers.18.attention.v_proj.weight": {"_type": "histogram", "values": [3.0, 0.0, 0.0, 1.0, 3.0, 0.0, 1.0, 1.0, 3.0, 0.0, 5.0, 2.0, 10.0, 16.0, 15.0, 29.0, 50.0, 72.0, 113.0, 181.0, 273.0, 547.0, 963.0, 2129.0, 6073.0, 24498.0, 143951.0, 616633.0, 207437.0, 32683.0, 7732.0, 2630.0, 1101.0, 552.0, 349.0, 172.0, 118.0, 67.0, 44.0, 38.0, 29.0, 15.0, 11.0, 7.0, 2.0, 7.0, 1.0, 1.0, 1.0, 0.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-3.10546875, -2.992919921875, -2.88037109375, -2.767822265625, -2.6552734375, -2.542724609375, -2.43017578125, -2.317626953125, -2.205078125, -2.092529296875, -1.97998046875, -1.867431640625, -1.7548828125, -1.642333984375, -1.52978515625, -1.417236328125, -1.3046875, -1.192138671875, -1.07958984375, -0.967041015625, -0.8544921875, -0.741943359375, -0.62939453125, -0.516845703125, -0.404296875, -0.291748046875, -0.17919921875, -0.066650390625, 0.0458984375, 0.158447265625, 0.27099609375, 0.383544921875, 0.49609375, 0.608642578125, 0.72119140625, 0.833740234375, 0.9462890625, 1.058837890625, 1.17138671875, 1.283935546875, 1.396484375, 1.509033203125, 1.62158203125, 1.734130859375, 1.8466796875, 1.959228515625, 2.07177734375, 2.184326171875, 2.296875, 2.409423828125, 2.52197265625, 2.634521484375, 2.7470703125, 2.859619140625, 2.97216796875, 3.084716796875, 3.197265625, 3.309814453125, 3.42236328125, 3.534912109375, 3.6474609375, 3.760009765625, 3.87255859375, 3.985107421875, 4.09765625]}, "gradients/encoder.encoder.layers.18.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 5.0, 1.0, 1.0, 6.0, 6.0, 7.0, 6.0, 10.0, 13.0, 13.0, 11.0, 20.0, 14.0, 20.0, 18.0, 29.0, 39.0, 29.0, 31.0, 45.0, 49.0, 50.0, 58.0, 47.0, 56.0, 51.0, 46.0, 38.0, 37.0, 35.0, 34.0, 32.0, 36.0, 21.0, 15.0, 14.0, 22.0, 8.0, 10.0, 6.0, 7.0, 2.0, 8.0, 4.0, 1.0, 2.0, 1.0, 3.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-2.548828125, -2.461334228515625, -2.37384033203125, -2.286346435546875, -2.1988525390625, -2.111358642578125, -2.02386474609375, -1.936370849609375, -1.848876953125, -1.761383056640625, -1.67388916015625, -1.586395263671875, -1.4989013671875, -1.411407470703125, -1.32391357421875, -1.236419677734375, -1.14892578125, -1.061431884765625, -0.97393798828125, -0.886444091796875, -0.7989501953125, -0.711456298828125, -0.62396240234375, -0.536468505859375, -0.448974609375, -0.361480712890625, -0.27398681640625, -0.186492919921875, -0.0989990234375, -0.011505126953125, 0.07598876953125, 0.163482666015625, 0.2509765625, 0.338470458984375, 0.42596435546875, 0.513458251953125, 0.6009521484375, 0.688446044921875, 0.77593994140625, 0.863433837890625, 0.950927734375, 1.038421630859375, 1.12591552734375, 1.213409423828125, 1.3009033203125, 1.388397216796875, 1.47589111328125, 1.563385009765625, 1.65087890625, 1.738372802734375, 1.82586669921875, 1.913360595703125, 2.0008544921875, 2.088348388671875, 2.17584228515625, 2.263336181640625, 2.350830078125, 2.438323974609375, 2.52581787109375, 2.613311767578125, 2.7008056640625, 2.788299560546875, 2.87579345703125, 2.963287353515625, 3.05078125]}, "gradients/encoder.encoder.layers.18.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 2.0, 0.0, 4.0, 2.0, 2.0, 4.0, 1.0, 4.0, 9.0, 13.0, 26.0, 37.0, 41.0, 96.0, 157.0, 366.0, 790.0, 2815.0, 16173.0, 354999.0, 640611.0, 26679.0, 3848.0, 1044.0, 386.0, 186.0, 117.0, 54.0, 30.0, 26.0, 13.0, 10.0, 5.0, 5.0, 3.0, 2.0, 4.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-1.9619140625, -1.904388427734375, -1.84686279296875, -1.789337158203125, -1.7318115234375, -1.674285888671875, -1.61676025390625, -1.559234619140625, -1.501708984375, -1.444183349609375, -1.38665771484375, -1.329132080078125, -1.2716064453125, -1.214080810546875, -1.15655517578125, -1.099029541015625, -1.04150390625, -0.983978271484375, -0.92645263671875, -0.868927001953125, -0.8114013671875, -0.753875732421875, -0.69635009765625, -0.638824462890625, -0.581298828125, -0.523773193359375, -0.46624755859375, -0.408721923828125, -0.3511962890625, -0.293670654296875, -0.23614501953125, -0.178619384765625, -0.12109375, -0.063568115234375, -0.00604248046875, 0.051483154296875, 0.1090087890625, 0.166534423828125, 0.22406005859375, 0.281585693359375, 0.339111328125, 0.396636962890625, 0.45416259765625, 0.511688232421875, 0.5692138671875, 0.626739501953125, 0.68426513671875, 0.741790771484375, 0.79931640625, 0.856842041015625, 0.91436767578125, 0.971893310546875, 1.0294189453125, 1.086944580078125, 1.14447021484375, 1.201995849609375, 1.259521484375, 1.317047119140625, 1.37457275390625, 1.432098388671875, 1.4896240234375, 1.547149658203125, 1.60467529296875, 1.662200927734375, 1.7197265625]}, "gradients/encoder.encoder.layers.18.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 2.0, 4.0, 2.0, 1.0, 3.0, 2.0, 8.0, 7.0, 9.0, 8.0, 25.0, 34.0, 50.0, 67.0, 82.0, 111.0, 122.0, 129.0, 95.0, 67.0, 48.0, 32.0, 25.0, 19.0, 13.0, 10.0, 6.0, 9.0, 4.0, 1.0, 3.0, 2.0, 5.0, 3.0, 1.0, 1.0, 3.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00017189979553222656, -0.0001661144196987152, -0.00016032904386520386, -0.0001545436680316925, -0.00014875829219818115, -0.0001429729163646698, -0.00013718754053115845, -0.0001314021646976471, -0.00012561678886413574, -0.00011983141303062439, -0.00011404603719711304, -0.00010826066136360168, -0.00010247528553009033, -9.668990969657898e-05, -9.090453386306763e-05, -8.511915802955627e-05, -7.933378219604492e-05, -7.354840636253357e-05, -6.776303052902222e-05, -6.197765469551086e-05, -5.619227886199951e-05, -5.040690302848816e-05, -4.462152719497681e-05, -3.8836151361465454e-05, -3.30507755279541e-05, -2.726539969444275e-05, -2.1480023860931396e-05, -1.5694648027420044e-05, -9.909272193908691e-06, -4.123896360397339e-06, 1.6614794731140137e-06, 7.446855306625366e-06, 1.3232231140136719e-05, 1.901760697364807e-05, 2.4802982807159424e-05, 3.0588358640670776e-05, 3.637373447418213e-05, 4.215911030769348e-05, 4.7944486141204834e-05, 5.3729861974716187e-05, 5.951523780822754e-05, 6.530061364173889e-05, 7.108598947525024e-05, 7.68713653087616e-05, 8.265674114227295e-05, 8.84421169757843e-05, 9.422749280929565e-05, 0.00010001286864280701, 0.00010579824447631836, 0.00011158362030982971, 0.00011736899614334106, 0.00012315437197685242, 0.00012893974781036377, 0.00013472512364387512, 0.00014051049947738647, 0.00014629587531089783, 0.00015208125114440918, 0.00015786662697792053, 0.00016365200281143188, 0.00016943737864494324, 0.0001752227544784546, 0.00018100813031196594, 0.0001867935061454773, 0.00019257888197898865, 0.0001983642578125]}, "gradients/encoder.encoder.layers.18.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 5.0, 4.0, 2.0, 6.0, 17.0, 16.0, 21.0, 37.0, 74.0, 141.0, 292.0, 661.0, 1834.0, 7117.0, 56242.0, 774291.0, 188644.0, 14389.0, 2976.0, 975.0, 397.0, 172.0, 87.0, 54.0, 30.0, 23.0, 19.0, 12.0, 6.0, 3.0, 0.0, 6.0, 2.0, 1.0, 3.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.7822265625, -1.730865478515625, -1.67950439453125, -1.628143310546875, -1.5767822265625, -1.525421142578125, -1.47406005859375, -1.422698974609375, -1.371337890625, -1.319976806640625, -1.26861572265625, -1.217254638671875, -1.1658935546875, -1.114532470703125, -1.06317138671875, -1.011810302734375, -0.96044921875, -0.909088134765625, -0.85772705078125, -0.806365966796875, -0.7550048828125, -0.703643798828125, -0.65228271484375, -0.600921630859375, -0.549560546875, -0.498199462890625, -0.44683837890625, -0.395477294921875, -0.3441162109375, -0.292755126953125, -0.24139404296875, -0.190032958984375, -0.138671875, -0.087310791015625, -0.03594970703125, 0.015411376953125, 0.0667724609375, 0.118133544921875, 0.16949462890625, 0.220855712890625, 0.272216796875, 0.323577880859375, 0.37493896484375, 0.426300048828125, 0.4776611328125, 0.529022216796875, 0.58038330078125, 0.631744384765625, 0.68310546875, 0.734466552734375, 0.78582763671875, 0.837188720703125, 0.8885498046875, 0.939910888671875, 0.99127197265625, 1.042633056640625, 1.093994140625, 1.145355224609375, 1.19671630859375, 1.248077392578125, 1.2994384765625, 1.350799560546875, 1.40216064453125, 1.453521728515625, 1.5048828125]}, "gradients/encoder.encoder.layers.18.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 4.0, 2.0, 2.0, 7.0, 6.0, 8.0, 8.0, 13.0, 20.0, 28.0, 49.0, 54.0, 86.0, 74.0, 89.0, 106.0, 121.0, 93.0, 70.0, 46.0, 30.0, 23.0, 18.0, 11.0, 16.0, 5.0, 7.0, 9.0, 4.0, 0.0, 1.0, 0.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-1.2119140625, -1.1795578002929688, -1.1472015380859375, -1.1148452758789062, -1.082489013671875, -1.0501327514648438, -1.0177764892578125, -0.9854202270507812, -0.95306396484375, -0.9207077026367188, -0.8883514404296875, -0.8559951782226562, -0.823638916015625, -0.7912826538085938, -0.7589263916015625, -0.7265701293945312, -0.6942138671875, -0.6618576049804688, -0.6295013427734375, -0.5971450805664062, -0.564788818359375, -0.5324325561523438, -0.5000762939453125, -0.46772003173828125, -0.43536376953125, -0.40300750732421875, -0.3706512451171875, -0.33829498291015625, -0.305938720703125, -0.27358245849609375, -0.2412261962890625, -0.20886993408203125, -0.176513671875, -0.14415740966796875, -0.1118011474609375, -0.07944488525390625, -0.047088623046875, -0.01473236083984375, 0.0176239013671875, 0.04998016357421875, 0.08233642578125, 0.11469268798828125, 0.1470489501953125, 0.17940521240234375, 0.211761474609375, 0.24411773681640625, 0.2764739990234375, 0.30883026123046875, 0.3411865234375, 0.37354278564453125, 0.4058990478515625, 0.43825531005859375, 0.470611572265625, 0.5029678344726562, 0.5353240966796875, 0.5676803588867188, 0.60003662109375, 0.6323928833007812, 0.6647491455078125, 0.6971054077148438, 0.729461669921875, 0.7618179321289062, 0.7941741943359375, 0.8265304565429688, 0.85888671875]}, "gradients/encoder.encoder.layers.18.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 1.0, 4.0, 1.0, 5.0, 4.0, 6.0, 13.0, 27.0, 31.0, 77.0, 140.0, 224.0, 213.0, 144.0, 66.0, 28.0, 13.0, 6.0, 6.0, 4.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-16.208221435546875, -15.436664581298828, -14.665107727050781, -13.893550872802734, -13.121994018554688, -12.35043716430664, -11.57888126373291, -10.807324409484863, -10.035767555236816, -9.26421070098877, -8.492653846740723, -7.721097469329834, -6.949540615081787, -6.17798376083374, -5.406427383422852, -4.634870529174805, -3.863313674926758, -3.091756820678711, -2.320200204849243, -1.5486435890197754, -0.7770867347717285, -0.005529880523681641, 0.766026496887207, 1.537583351135254, 2.309140205383301, 3.0806970596313477, 3.8522536754608154, 4.623810291290283, 5.39536714553833, 6.166923999786377, 6.938480377197266, 7.7100372314453125, 8.48159408569336, 9.253150939941406, 10.024707794189453, 10.7962646484375, 11.567821502685547, 12.339378356933594, 13.110934257507324, 13.882491111755371, 14.654047966003418, 15.425604820251465, 16.197160720825195, 16.968717575073242, 17.74027442932129, 18.511831283569336, 19.283388137817383, 20.05494499206543, 20.826501846313477, 21.598058700561523, 22.36961555480957, 23.141172409057617, 23.912729263305664, 24.68428611755371, 25.455841064453125, 26.227397918701172, 26.99895477294922, 27.770511627197266, 28.542068481445312, 29.31362533569336, 30.085182189941406, 30.856739044189453, 31.6282958984375, 32.39985275268555, 33.171409606933594]}, "gradients/encoder.encoder.layers.18.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 1.0, 2.0, 0.0, 6.0, 6.0, 4.0, 5.0, 6.0, 9.0, 9.0, 16.0, 15.0, 25.0, 26.0, 33.0, 36.0, 41.0, 45.0, 59.0, 63.0, 74.0, 57.0, 59.0, 75.0, 53.0, 47.0, 40.0, 32.0, 30.0, 31.0, 23.0, 15.0, 22.0, 13.0, 8.0, 7.0, 4.0, 4.0, 4.0, 2.0, 2.0, 4.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-21.58616065979004, -20.995849609375, -20.40553855895996, -19.815227508544922, -19.224918365478516, -18.634607315063477, -18.044296264648438, -17.4539852142334, -16.86367416381836, -16.27336311340332, -15.683052062988281, -15.092741966247559, -14.50243091583252, -13.91211986541748, -13.321809768676758, -12.731498718261719, -12.14118766784668, -11.55087661743164, -10.960565567016602, -10.370255470275879, -9.77994441986084, -9.1896333694458, -8.599323272705078, -8.009012222290039, -7.418701171875, -6.828390121459961, -6.23807954788208, -5.647768974304199, -5.05745792388916, -4.467146873474121, -3.8768362998962402, -3.2865257263183594, -2.6962127685546875, -2.1059019565582275, -1.5155911445617676, -0.9252803325653076, -0.33496952056884766, 0.2553412914276123, 0.8456521034240723, 1.4359626770019531, 2.026273727416992, 2.616584539413452, 3.206895351409912, 3.797206163406372, 4.387516975402832, 4.977828025817871, 5.568138599395752, 6.158449172973633, 6.748760223388672, 7.339071273803711, 7.929381847381592, 8.519692420959473, 9.110003471374512, 9.70031452178955, 10.290624618530273, 10.880935668945312, 11.471246719360352, 12.06155776977539, 12.65186882019043, 13.242178916931152, 13.832489967346191, 14.42280101776123, 15.013111114501953, 15.603422164916992, 16.19373321533203]}, "gradients/encoder.encoder.layers.17.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 1.0, 2.0, 0.0, 0.0, 1.0, 2.0, 3.0, 6.0, 7.0, 10.0, 9.0, 16.0, 17.0, 18.0, 31.0, 29.0, 59.0, 101.0, 176.0, 306.0, 774.0, 2050.0, 7481.0, 90553.0, 4018341.0, 65049.0, 6191.0, 1724.0, 691.0, 309.0, 136.0, 74.0, 41.0, 28.0, 18.0, 9.0, 8.0, 9.0, 3.0, 4.0, 5.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-7.75390625, -7.54119873046875, -7.3284912109375, -7.11578369140625, -6.903076171875, -6.69036865234375, -6.4776611328125, -6.26495361328125, -6.05224609375, -5.83953857421875, -5.6268310546875, -5.41412353515625, -5.201416015625, -4.98870849609375, -4.7760009765625, -4.56329345703125, -4.3505859375, -4.13787841796875, -3.9251708984375, -3.71246337890625, -3.499755859375, -3.28704833984375, -3.0743408203125, -2.86163330078125, -2.64892578125, -2.43621826171875, -2.2235107421875, -2.01080322265625, -1.798095703125, -1.58538818359375, -1.3726806640625, -1.15997314453125, -0.947265625, -0.73455810546875, -0.5218505859375, -0.30914306640625, -0.096435546875, 0.11627197265625, 0.3289794921875, 0.54168701171875, 0.75439453125, 0.96710205078125, 1.1798095703125, 1.39251708984375, 1.605224609375, 1.81793212890625, 2.0306396484375, 2.24334716796875, 2.4560546875, 2.66876220703125, 2.8814697265625, 3.09417724609375, 3.306884765625, 3.51959228515625, 3.7322998046875, 3.94500732421875, 4.15771484375, 4.37042236328125, 4.5831298828125, 4.79583740234375, 5.008544921875, 5.22125244140625, 5.4339599609375, 5.64666748046875, 5.859375]}, "gradients/encoder.encoder.layers.17.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 2.0, 2.0, 1.0, 2.0, 0.0, 1.0, 0.0, 5.0, 1.0, 6.0, 10.0, 17.0, 27.0, 27.0, 39.0, 61.0, 79.0, 121.0, 86.0, 97.0, 106.0, 75.0, 72.0, 47.0, 51.0, 23.0, 11.0, 14.0, 13.0, 6.0, 4.0, 1.0, 2.0, 2.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.345703125, -1.3112640380859375, -1.276824951171875, -1.2423858642578125, -1.20794677734375, -1.1735076904296875, -1.139068603515625, -1.1046295166015625, -1.0701904296875, -1.0357513427734375, -1.001312255859375, -0.9668731689453125, -0.93243408203125, -0.8979949951171875, -0.863555908203125, -0.8291168212890625, -0.794677734375, -0.7602386474609375, -0.725799560546875, -0.6913604736328125, -0.65692138671875, -0.6224822998046875, -0.588043212890625, -0.5536041259765625, -0.5191650390625, -0.4847259521484375, -0.450286865234375, -0.4158477783203125, -0.38140869140625, -0.3469696044921875, -0.312530517578125, -0.2780914306640625, -0.24365234375, -0.2092132568359375, -0.174774169921875, -0.1403350830078125, -0.10589599609375, -0.0714569091796875, -0.037017822265625, -0.0025787353515625, 0.0318603515625, 0.0662994384765625, 0.100738525390625, 0.1351776123046875, 0.16961669921875, 0.2040557861328125, 0.238494873046875, 0.2729339599609375, 0.307373046875, 0.3418121337890625, 0.376251220703125, 0.4106903076171875, 0.44512939453125, 0.4795684814453125, 0.514007568359375, 0.5484466552734375, 0.5828857421875, 0.6173248291015625, 0.651763916015625, 0.6862030029296875, 0.72064208984375, 0.7550811767578125, 0.789520263671875, 0.8239593505859375, 0.8583984375]}, "gradients/encoder.encoder.layers.17.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 3.0, 3.0, 2.0, 6.0, 13.0, 22.0, 30.0, 30.0, 119.0, 372.0, 1700.0, 15994.0, 4003164.0, 166520.0, 4945.0, 865.0, 287.0, 101.0, 34.0, 27.0, 18.0, 12.0, 9.0, 2.0, 5.0, 5.0, 1.0, 1.0, 0.0, 1.0, 2.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.78515625, -5.49249267578125, -5.1998291015625, -4.90716552734375, -4.614501953125, -4.32183837890625, -4.0291748046875, -3.73651123046875, -3.44384765625, -3.15118408203125, -2.8585205078125, -2.56585693359375, -2.273193359375, -1.98052978515625, -1.6878662109375, -1.39520263671875, -1.1025390625, -0.80987548828125, -0.5172119140625, -0.22454833984375, 0.068115234375, 0.36077880859375, 0.6534423828125, 0.94610595703125, 1.23876953125, 1.53143310546875, 1.8240966796875, 2.11676025390625, 2.409423828125, 2.70208740234375, 2.9947509765625, 3.28741455078125, 3.580078125, 3.87274169921875, 4.1654052734375, 4.45806884765625, 4.750732421875, 5.04339599609375, 5.3360595703125, 5.62872314453125, 5.92138671875, 6.21405029296875, 6.5067138671875, 6.79937744140625, 7.092041015625, 7.38470458984375, 7.6773681640625, 7.97003173828125, 8.2626953125, 8.55535888671875, 8.8480224609375, 9.14068603515625, 9.433349609375, 9.72601318359375, 10.0186767578125, 10.31134033203125, 10.60400390625, 10.89666748046875, 11.1893310546875, 11.48199462890625, 11.774658203125, 12.06732177734375, 12.3599853515625, 12.65264892578125, 12.9453125]}, "gradients/encoder.encoder.layers.17.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 4.0, 1.0, 4.0, 3.0, 8.0, 12.0, 18.0, 16.0, 35.0, 52.0, 65.0, 175.0, 371.0, 1626.0, 1045.0, 327.0, 136.0, 77.0, 34.0, 14.0, 16.0, 9.0, 11.0, 9.0, 6.0, 6.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-1.5595703125, -1.5036163330078125, -1.447662353515625, -1.3917083740234375, -1.33575439453125, -1.2798004150390625, -1.223846435546875, -1.1678924560546875, -1.1119384765625, -1.0559844970703125, -1.000030517578125, -0.9440765380859375, -0.88812255859375, -0.8321685791015625, -0.776214599609375, -0.7202606201171875, -0.664306640625, -0.6083526611328125, -0.552398681640625, -0.4964447021484375, -0.44049072265625, -0.3845367431640625, -0.328582763671875, -0.2726287841796875, -0.2166748046875, -0.1607208251953125, -0.104766845703125, -0.0488128662109375, 0.00714111328125, 0.0630950927734375, 0.119049072265625, 0.1750030517578125, 0.23095703125, 0.2869110107421875, 0.342864990234375, 0.3988189697265625, 0.45477294921875, 0.5107269287109375, 0.566680908203125, 0.6226348876953125, 0.6785888671875, 0.7345428466796875, 0.790496826171875, 0.8464508056640625, 0.90240478515625, 0.9583587646484375, 1.014312744140625, 1.0702667236328125, 1.126220703125, 1.1821746826171875, 1.238128662109375, 1.2940826416015625, 1.35003662109375, 1.4059906005859375, 1.461944580078125, 1.5178985595703125, 1.5738525390625, 1.6298065185546875, 1.685760498046875, 1.7417144775390625, 1.79766845703125, 1.8536224365234375, 1.909576416015625, 1.9655303955078125, 2.021484375]}, "gradients/encoder.encoder.layers.17.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 3.0, 5.0, 23.0, 96.0, 409.0, 352.0, 91.0, 14.0, 10.0, 6.0, 3.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-26.163129806518555, -25.474000930786133, -24.78487205505371, -24.095741271972656, -23.406612396240234, -22.717483520507812, -22.02835464477539, -21.33922576904297, -20.650096893310547, -19.960968017578125, -19.271839141845703, -18.58271026611328, -17.893579483032227, -17.204450607299805, -16.515321731567383, -15.826192855834961, -15.137062072753906, -14.447933197021484, -13.758803367614746, -13.069674491882324, -12.380544662475586, -11.691415786743164, -11.002286911010742, -10.31315803527832, -9.624028205871582, -8.93489933013916, -8.245769500732422, -7.556640625, -6.86751127243042, -6.17838191986084, -5.489253044128418, -4.800123691558838, -4.110992431640625, -3.421863079071045, -2.732733964920044, -2.043604850769043, -1.354475498199463, -0.6653461456298828, 0.023782730102539062, 0.7129120826721191, 1.4020414352416992, 2.0911707878112793, 2.7802999019622803, 3.4694290161132812, 4.158558368682861, 4.847687721252441, 5.536816596984863, 6.225945949554443, 6.915075302124023, 7.6042046546936035, 8.293334007263184, 8.982462882995605, 9.671592712402344, 10.360721588134766, 11.049850463867188, 11.73897933959961, 12.428109169006348, 13.11723804473877, 13.806367874145508, 14.49549674987793, 15.184625625610352, 15.87375545501709, 16.562885284423828, 17.25201416015625, 17.941143035888672]}, "gradients/encoder.encoder.layers.17.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 3.0, 2.0, 1.0, 4.0, 4.0, 9.0, 16.0, 18.0, 29.0, 30.0, 36.0, 52.0, 66.0, 79.0, 91.0, 81.0, 73.0, 76.0, 72.0, 66.0, 53.0, 43.0, 29.0, 23.0, 19.0, 12.0, 11.0, 5.0, 5.0, 2.0, 3.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.126326084136963, -5.882452964782715, -5.638579845428467, -5.394706726074219, -5.150833606719971, -4.906960487365723, -4.663087368011475, -4.419214248657227, -4.1753411293029785, -3.9314680099487305, -3.6875948905944824, -3.4437217712402344, -3.1998486518859863, -2.9559755325317383, -2.7121024131774902, -2.468229293823242, -2.224356174468994, -1.980483055114746, -1.736609935760498, -1.49273681640625, -1.248863697052002, -1.004990577697754, -0.7611174583435059, -0.5172443389892578, -0.27337121963500977, -0.02949810028076172, 0.21437501907348633, 0.4582481384277344, 0.7021212577819824, 0.9459943771362305, 1.1898674964904785, 1.4337406158447266, 1.6776142120361328, 1.9214873313903809, 2.165360450744629, 2.409233570098877, 2.653106689453125, 2.896979808807373, 3.140852928161621, 3.384726047515869, 3.628599166870117, 3.8724722862243652, 4.116345405578613, 4.360218524932861, 4.604091644287109, 4.847964763641357, 5.0918378829956055, 5.3357110023498535, 5.579584121704102, 5.82345724105835, 6.067330360412598, 6.311203479766846, 6.555076599121094, 6.798949718475342, 7.04282283782959, 7.286695957183838, 7.530569076538086, 7.774442195892334, 8.018315315246582, 8.262187957763672, 8.506061553955078, 8.749935150146484, 8.993807792663574, 9.237680435180664, 9.48155403137207]}, "gradients/encoder.encoder.layers.17.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 2.0, 5.0, 2.0, 1.0, 4.0, 2.0, 8.0, 14.0, 11.0, 19.0, 26.0, 40.0, 48.0, 73.0, 79.0, 104.0, 177.0, 265.0, 365.0, 544.0, 1065.0, 1701.0, 3519.0, 8426.0, 28388.0, 142199.0, 565145.0, 232258.0, 42645.0, 11630.0, 4495.0, 2157.0, 1125.0, 737.0, 354.0, 285.0, 187.0, 123.0, 89.0, 66.0, 52.0, 28.0, 29.0, 16.0, 21.0, 9.0, 6.0, 5.0, 3.0, 3.0, 3.0, 2.0, 0.0, 3.0, 5.0, 1.0, 2.0, 1.0], "bins": [-3.587890625, -3.47796630859375, -3.3680419921875, -3.25811767578125, -3.148193359375, -3.03826904296875, -2.9283447265625, -2.81842041015625, -2.70849609375, -2.59857177734375, -2.4886474609375, -2.37872314453125, -2.268798828125, -2.15887451171875, -2.0489501953125, -1.93902587890625, -1.8291015625, -1.71917724609375, -1.6092529296875, -1.49932861328125, -1.389404296875, -1.27947998046875, -1.1695556640625, -1.05963134765625, -0.94970703125, -0.83978271484375, -0.7298583984375, -0.61993408203125, -0.510009765625, -0.40008544921875, -0.2901611328125, -0.18023681640625, -0.0703125, 0.03961181640625, 0.1495361328125, 0.25946044921875, 0.369384765625, 0.47930908203125, 0.5892333984375, 0.69915771484375, 0.80908203125, 0.91900634765625, 1.0289306640625, 1.13885498046875, 1.248779296875, 1.35870361328125, 1.4686279296875, 1.57855224609375, 1.6884765625, 1.79840087890625, 1.9083251953125, 2.01824951171875, 2.128173828125, 2.23809814453125, 2.3480224609375, 2.45794677734375, 2.56787109375, 2.67779541015625, 2.7877197265625, 2.89764404296875, 3.007568359375, 3.11749267578125, 3.2274169921875, 3.33734130859375, 3.447265625]}, "gradients/encoder.encoder.layers.17.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 3.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 3.0, 1.0, 0.0, 1.0, 2.0, 1.0, 3.0, 4.0, 8.0, 14.0, 18.0, 19.0, 36.0, 34.0, 53.0, 77.0, 101.0, 104.0, 93.0, 93.0, 86.0, 69.0, 48.0, 37.0, 34.0, 20.0, 16.0, 5.0, 8.0, 4.0, 6.0, 1.0, 4.0, 3.0, 3.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.09375, -1.0618057250976562, -1.0298614501953125, -0.9979171752929688, -0.965972900390625, -0.9340286254882812, -0.9020843505859375, -0.8701400756835938, -0.83819580078125, -0.8062515258789062, -0.7743072509765625, -0.7423629760742188, -0.710418701171875, -0.6784744262695312, -0.6465301513671875, -0.6145858764648438, -0.5826416015625, -0.5506973266601562, -0.5187530517578125, -0.48680877685546875, -0.454864501953125, -0.42292022705078125, -0.3909759521484375, -0.35903167724609375, -0.32708740234375, -0.29514312744140625, -0.2631988525390625, -0.23125457763671875, -0.199310302734375, -0.16736602783203125, -0.1354217529296875, -0.10347747802734375, -0.071533203125, -0.03958892822265625, -0.0076446533203125, 0.02429962158203125, 0.056243896484375, 0.08818817138671875, 0.1201324462890625, 0.15207672119140625, 0.18402099609375, 0.21596527099609375, 0.2479095458984375, 0.27985382080078125, 0.311798095703125, 0.34374237060546875, 0.3756866455078125, 0.40763092041015625, 0.4395751953125, 0.47151947021484375, 0.5034637451171875, 0.5354080200195312, 0.567352294921875, 0.5992965698242188, 0.6312408447265625, 0.6631851196289062, 0.69512939453125, 0.7270736694335938, 0.7590179443359375, 0.7909622192382812, 0.822906494140625, 0.8548507690429688, 0.8867950439453125, 0.9187393188476562, 0.95068359375]}, "gradients/encoder.encoder.layers.17.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 3.0, 0.0, 1.0, 7.0, 3.0, 8.0, 8.0, 13.0, 18.0, 27.0, 39.0, 66.0, 88.0, 119.0, 228.0, 425.0, 809.0, 1545.0, 3461.0, 10395.0, 39503.0, 190236.0, 546540.0, 196814.0, 40625.0, 10655.0, 3600.0, 1560.0, 752.0, 399.0, 212.0, 143.0, 90.0, 56.0, 32.0, 25.0, 19.0, 12.0, 7.0, 14.0, 4.0, 3.0, 4.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.673828125, -2.576629638671875, -2.47943115234375, -2.382232666015625, -2.2850341796875, -2.187835693359375, -2.09063720703125, -1.993438720703125, -1.896240234375, -1.799041748046875, -1.70184326171875, -1.604644775390625, -1.5074462890625, -1.410247802734375, -1.31304931640625, -1.215850830078125, -1.11865234375, -1.021453857421875, -0.92425537109375, -0.827056884765625, -0.7298583984375, -0.632659912109375, -0.53546142578125, -0.438262939453125, -0.341064453125, -0.243865966796875, -0.14666748046875, -0.049468994140625, 0.0477294921875, 0.144927978515625, 0.24212646484375, 0.339324951171875, 0.4365234375, 0.533721923828125, 0.63092041015625, 0.728118896484375, 0.8253173828125, 0.922515869140625, 1.01971435546875, 1.116912841796875, 1.214111328125, 1.311309814453125, 1.40850830078125, 1.505706787109375, 1.6029052734375, 1.700103759765625, 1.79730224609375, 1.894500732421875, 1.99169921875, 2.088897705078125, 2.18609619140625, 2.283294677734375, 2.3804931640625, 2.477691650390625, 2.57489013671875, 2.672088623046875, 2.769287109375, 2.866485595703125, 2.96368408203125, 3.060882568359375, 3.1580810546875, 3.255279541015625, 3.35247802734375, 3.449676513671875, 3.546875]}, "gradients/encoder.encoder.layers.17.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 1.0, 2.0, 1.0, 1.0, 4.0, 3.0, 4.0, 7.0, 13.0, 8.0, 7.0, 16.0, 16.0, 18.0, 12.0, 22.0, 27.0, 34.0, 32.0, 36.0, 44.0, 41.0, 45.0, 42.0, 47.0, 40.0, 37.0, 38.0, 48.0, 34.0, 44.0, 34.0, 31.0, 31.0, 33.0, 28.0, 18.0, 23.0, 14.0, 15.0, 17.0, 9.0, 9.0, 7.0, 8.0, 3.0, 5.0, 3.0, 0.0, 3.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-2.51953125, -2.4354248046875, -2.351318359375, -2.2672119140625, -2.18310546875, -2.0989990234375, -2.014892578125, -1.9307861328125, -1.8466796875, -1.7625732421875, -1.678466796875, -1.5943603515625, -1.51025390625, -1.4261474609375, -1.342041015625, -1.2579345703125, -1.173828125, -1.0897216796875, -1.005615234375, -0.9215087890625, -0.83740234375, -0.7532958984375, -0.669189453125, -0.5850830078125, -0.5009765625, -0.4168701171875, -0.332763671875, -0.2486572265625, -0.16455078125, -0.0804443359375, 0.003662109375, 0.0877685546875, 0.171875, 0.2559814453125, 0.340087890625, 0.4241943359375, 0.50830078125, 0.5924072265625, 0.676513671875, 0.7606201171875, 0.8447265625, 0.9288330078125, 1.012939453125, 1.0970458984375, 1.18115234375, 1.2652587890625, 1.349365234375, 1.4334716796875, 1.517578125, 1.6016845703125, 1.685791015625, 1.7698974609375, 1.85400390625, 1.9381103515625, 2.022216796875, 2.1063232421875, 2.1904296875, 2.2745361328125, 2.358642578125, 2.4427490234375, 2.52685546875, 2.6109619140625, 2.695068359375, 2.7791748046875, 2.86328125]}, "gradients/encoder.encoder.layers.17.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 2.0, 1.0, 1.0, 5.0, 7.0, 2.0, 6.0, 7.0, 12.0, 10.0, 21.0, 32.0, 45.0, 88.0, 145.0, 337.0, 769.0, 2519.0, 13318.0, 182368.0, 778226.0, 60888.0, 7067.0, 1647.0, 541.0, 220.0, 110.0, 54.0, 43.0, 25.0, 11.0, 11.0, 6.0, 9.0, 1.0, 2.0, 4.0, 3.0, 0.0, 0.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-1.7255859375, -1.6704864501953125, -1.615386962890625, -1.5602874755859375, -1.50518798828125, -1.4500885009765625, -1.394989013671875, -1.3398895263671875, -1.2847900390625, -1.2296905517578125, -1.174591064453125, -1.1194915771484375, -1.06439208984375, -1.0092926025390625, -0.954193115234375, -0.8990936279296875, -0.843994140625, -0.7888946533203125, -0.733795166015625, -0.6786956787109375, -0.62359619140625, -0.5684967041015625, -0.513397216796875, -0.4582977294921875, -0.4031982421875, -0.3480987548828125, -0.292999267578125, -0.2378997802734375, -0.18280029296875, -0.1277008056640625, -0.072601318359375, -0.0175018310546875, 0.03759765625, 0.0926971435546875, 0.147796630859375, 0.2028961181640625, 0.25799560546875, 0.3130950927734375, 0.368194580078125, 0.4232940673828125, 0.4783935546875, 0.5334930419921875, 0.588592529296875, 0.6436920166015625, 0.69879150390625, 0.7538909912109375, 0.808990478515625, 0.8640899658203125, 0.919189453125, 0.9742889404296875, 1.029388427734375, 1.0844879150390625, 1.13958740234375, 1.1946868896484375, 1.249786376953125, 1.3048858642578125, 1.3599853515625, 1.4150848388671875, 1.470184326171875, 1.5252838134765625, 1.58038330078125, 1.6354827880859375, 1.690582275390625, 1.7456817626953125, 1.80078125]}, "gradients/encoder.encoder.layers.17.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 3.0, 0.0, 1.0, 4.0, 5.0, 4.0, 7.0, 8.0, 7.0, 9.0, 10.0, 22.0, 21.0, 34.0, 51.0, 48.0, 68.0, 95.0, 97.0, 110.0, 94.0, 66.0, 66.0, 38.0, 32.0, 25.0, 22.0, 19.0, 10.0, 7.0, 7.0, 8.0, 4.0, 5.0, 3.0, 0.0, 0.0, 1.0, 2.0, 3.0, 0.0, 3.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00012421607971191406, -0.00011980906128883362, -0.00011540204286575317, -0.00011099502444267273, -0.00010658800601959229, -0.00010218098759651184, -9.77739691734314e-05, -9.336695075035095e-05, -8.895993232727051e-05, -8.455291390419006e-05, -8.014589548110962e-05, -7.573887705802917e-05, -7.133185863494873e-05, -6.692484021186829e-05, -6.251782178878784e-05, -5.81108033657074e-05, -5.370378494262695e-05, -4.929676651954651e-05, -4.4889748096466064e-05, -4.048272967338562e-05, -3.6075711250305176e-05, -3.166869282722473e-05, -2.7261674404144287e-05, -2.2854655981063843e-05, -1.84476375579834e-05, -1.4040619134902954e-05, -9.63360071182251e-06, -5.2265822887420654e-06, -8.195638656616211e-07, 3.5874545574188232e-06, 7.994472980499268e-06, 1.2401491403579712e-05, 1.6808509826660156e-05, 2.12155282497406e-05, 2.5622546672821045e-05, 3.002956509590149e-05, 3.4436583518981934e-05, 3.884360194206238e-05, 4.325062036514282e-05, 4.7657638788223267e-05, 5.206465721130371e-05, 5.6471675634384155e-05, 6.08786940574646e-05, 6.528571248054504e-05, 6.969273090362549e-05, 7.409974932670593e-05, 7.850676774978638e-05, 8.291378617286682e-05, 8.732080459594727e-05, 9.172782301902771e-05, 9.613484144210815e-05, 0.0001005418598651886, 0.00010494887828826904, 0.00010935589671134949, 0.00011376291513442993, 0.00011816993355751038, 0.00012257695198059082, 0.00012698397040367126, 0.0001313909888267517, 0.00013579800724983215, 0.0001402050256729126, 0.00014461204409599304, 0.00014901906251907349, 0.00015342608094215393, 0.00015783309936523438]}, "gradients/encoder.encoder.layers.17.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 6.0, 4.0, 5.0, 6.0, 5.0, 13.0, 9.0, 30.0, 51.0, 73.0, 135.0, 244.0, 540.0, 1432.0, 4979.0, 31034.0, 497536.0, 475465.0, 29827.0, 4676.0, 1398.0, 566.0, 236.0, 107.0, 72.0, 51.0, 28.0, 16.0, 5.0, 6.0, 4.0, 4.0, 5.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 1.0], "bins": [-1.9794921875, -1.92999267578125, -1.8804931640625, -1.83099365234375, -1.781494140625, -1.73199462890625, -1.6824951171875, -1.63299560546875, -1.58349609375, -1.53399658203125, -1.4844970703125, -1.43499755859375, -1.385498046875, -1.33599853515625, -1.2864990234375, -1.23699951171875, -1.1875, -1.13800048828125, -1.0885009765625, -1.03900146484375, -0.989501953125, -0.94000244140625, -0.8905029296875, -0.84100341796875, -0.79150390625, -0.74200439453125, -0.6925048828125, -0.64300537109375, -0.593505859375, -0.54400634765625, -0.4945068359375, -0.44500732421875, -0.3955078125, -0.34600830078125, -0.2965087890625, -0.24700927734375, -0.197509765625, -0.14801025390625, -0.0985107421875, -0.04901123046875, 0.00048828125, 0.04998779296875, 0.0994873046875, 0.14898681640625, 0.198486328125, 0.24798583984375, 0.2974853515625, 0.34698486328125, 0.396484375, 0.44598388671875, 0.4954833984375, 0.54498291015625, 0.594482421875, 0.64398193359375, 0.6934814453125, 0.74298095703125, 0.79248046875, 0.84197998046875, 0.8914794921875, 0.94097900390625, 0.990478515625, 1.03997802734375, 1.0894775390625, 1.13897705078125, 1.1884765625]}, "gradients/encoder.encoder.layers.17.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 1.0, 5.0, 2.0, 8.0, 14.0, 12.0, 19.0, 35.0, 42.0, 69.0, 78.0, 104.0, 134.0, 112.0, 115.0, 77.0, 59.0, 39.0, 24.0, 15.0, 14.0, 12.0, 7.0, 6.0, 1.0, 3.0, 1.0, 4.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0], "bins": [-1.32421875, -1.28515625, -1.24609375, -1.20703125, -1.16796875, -1.12890625, -1.08984375, -1.05078125, -1.01171875, -0.97265625, -0.93359375, -0.89453125, -0.85546875, -0.81640625, -0.77734375, -0.73828125, -0.69921875, -0.66015625, -0.62109375, -0.58203125, -0.54296875, -0.50390625, -0.46484375, -0.42578125, -0.38671875, -0.34765625, -0.30859375, -0.26953125, -0.23046875, -0.19140625, -0.15234375, -0.11328125, -0.07421875, -0.03515625, 0.00390625, 0.04296875, 0.08203125, 0.12109375, 0.16015625, 0.19921875, 0.23828125, 0.27734375, 0.31640625, 0.35546875, 0.39453125, 0.43359375, 0.47265625, 0.51171875, 0.55078125, 0.58984375, 0.62890625, 0.66796875, 0.70703125, 0.74609375, 0.78515625, 0.82421875, 0.86328125, 0.90234375, 0.94140625, 0.98046875, 1.01953125, 1.05859375, 1.09765625, 1.13671875, 1.17578125]}, "gradients/encoder.encoder.layers.17.layer_norm.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 2.0, 0.0, 0.0, 3.0, 1.0, 0.0, 3.0, 2.0, 5.0, 12.0, 14.0, 31.0, 62.0, 119.0, 185.0, 229.0, 163.0, 83.0, 41.0, 23.0, 12.0, 12.0, 5.0, 2.0, 3.0, 3.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-21.986242294311523, -21.236927032470703, -20.487613677978516, -19.738300323486328, -18.988985061645508, -18.239669799804688, -17.4903564453125, -16.741043090820312, -15.991727828979492, -15.242413520812988, -14.493099212646484, -13.74378490447998, -12.994470596313477, -12.245156288146973, -11.495841979980469, -10.746527671813965, -9.997213363647461, -9.247899055480957, -8.498584747314453, -7.749270439147949, -6.999956130981445, -6.250641822814941, -5.5013275146484375, -4.752013206481934, -4.00269889831543, -3.253384590148926, -2.504070281982422, -1.754755973815918, -1.005441665649414, -0.25612735748291016, 0.49318695068359375, 1.2425012588500977, 1.9918174743652344, 2.7411317825317383, 3.490446090698242, 4.239760398864746, 4.98907470703125, 5.738389015197754, 6.487703323364258, 7.237017631530762, 7.986331939697266, 8.73564624786377, 9.484960556030273, 10.234274864196777, 10.983589172363281, 11.732903480529785, 12.482217788696289, 13.231532096862793, 13.980846405029297, 14.7301607131958, 15.479475021362305, 16.228790283203125, 16.978103637695312, 17.7274169921875, 18.47673225402832, 19.22604751586914, 19.975360870361328, 20.724674224853516, 21.473989486694336, 22.223304748535156, 22.972618103027344, 23.72193145751953, 24.47124671936035, 25.220561981201172, 25.96987533569336]}, "gradients/encoder.encoder.layers.17.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 0.0, 2.0, 4.0, 7.0, 6.0, 6.0, 7.0, 12.0, 16.0, 14.0, 13.0, 24.0, 30.0, 34.0, 29.0, 37.0, 47.0, 44.0, 50.0, 57.0, 61.0, 56.0, 48.0, 65.0, 55.0, 45.0, 41.0, 41.0, 32.0, 32.0, 27.0, 14.0, 12.0, 14.0, 8.0, 7.0, 3.0, 5.0, 2.0, 3.0, 1.0, 3.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-18.992145538330078, -18.393766403198242, -17.795387268066406, -17.19700813293457, -16.598628997802734, -16.00025177001953, -15.401871681213379, -14.80349349975586, -14.205114364624023, -13.606735229492188, -13.008356094360352, -12.409976959228516, -11.811598777770996, -11.21321964263916, -10.614840507507324, -10.016462326049805, -9.418082237243652, -8.819703102111816, -8.22132396697998, -7.622945308685303, -7.024566650390625, -6.426187515258789, -5.827808380126953, -5.229429721832275, -4.6310505867004395, -4.0326714515686035, -3.434292793273926, -2.83591365814209, -2.237534761428833, -1.6391558647155762, -1.0407767295837402, -0.4423980712890625, 0.15598106384277344, 0.754360020160675, 1.3527389764785767, 1.951117992401123, 2.54949688911438, 3.1478757858276367, 3.7462549209594727, 4.34463357925415, 4.943012714385986, 5.541391849517822, 6.1397705078125, 6.738149642944336, 7.336528778076172, 7.93490743637085, 8.533287048339844, 9.131665229797363, 9.7300443649292, 10.328423500061035, 10.926802635192871, 11.52518081665039, 12.123559951782227, 12.721939086914062, 13.320318222045898, 13.918697357177734, 14.51707649230957, 15.115455627441406, 15.713834762573242, 16.312213897705078, 16.910593032836914, 17.50897216796875, 18.107349395751953, 18.70572853088379, 19.304107666015625]}, "gradients/encoder.encoder.layers.16.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 2.0, 1.0, 3.0, 3.0, 3.0, 2.0, 3.0, 5.0, 8.0, 14.0, 11.0, 11.0, 14.0, 24.0, 38.0, 66.0, 99.0, 132.0, 319.0, 592.0, 1338.0, 3634.0, 17272.0, 313272.0, 3808762.0, 38905.0, 6181.0, 1871.0, 801.0, 391.0, 204.0, 111.0, 68.0, 36.0, 37.0, 16.0, 18.0, 10.0, 8.0, 3.0, 2.0, 2.0, 1.0, 0.0, 2.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-4.62890625, -4.49737548828125, -4.3658447265625, -4.23431396484375, -4.102783203125, -3.97125244140625, -3.8397216796875, -3.70819091796875, -3.57666015625, -3.44512939453125, -3.3135986328125, -3.18206787109375, -3.050537109375, -2.91900634765625, -2.7874755859375, -2.65594482421875, -2.5244140625, -2.39288330078125, -2.2613525390625, -2.12982177734375, -1.998291015625, -1.86676025390625, -1.7352294921875, -1.60369873046875, -1.47216796875, -1.34063720703125, -1.2091064453125, -1.07757568359375, -0.946044921875, -0.81451416015625, -0.6829833984375, -0.55145263671875, -0.419921875, -0.28839111328125, -0.1568603515625, -0.02532958984375, 0.106201171875, 0.23773193359375, 0.3692626953125, 0.50079345703125, 0.63232421875, 0.76385498046875, 0.8953857421875, 1.02691650390625, 1.158447265625, 1.28997802734375, 1.4215087890625, 1.55303955078125, 1.6845703125, 1.81610107421875, 1.9476318359375, 2.07916259765625, 2.210693359375, 2.34222412109375, 2.4737548828125, 2.60528564453125, 2.73681640625, 2.86834716796875, 2.9998779296875, 3.13140869140625, 3.262939453125, 3.39447021484375, 3.5260009765625, 3.65753173828125, 3.7890625]}, "gradients/encoder.encoder.layers.16.feed_forward.output_dense.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 1.0, 2.0, 5.0, 1.0, 4.0, 6.0, 6.0, 12.0, 15.0, 23.0, 28.0, 39.0, 44.0, 67.0, 79.0, 73.0, 102.0, 77.0, 75.0, 74.0, 65.0, 52.0, 46.0, 20.0, 24.0, 23.0, 9.0, 8.0, 7.0, 6.0, 5.0, 2.0, 2.0, 3.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.06640625, -1.034942626953125, -1.00347900390625, -0.972015380859375, -0.9405517578125, -0.909088134765625, -0.87762451171875, -0.846160888671875, -0.814697265625, -0.783233642578125, -0.75177001953125, -0.720306396484375, -0.6888427734375, -0.657379150390625, -0.62591552734375, -0.594451904296875, -0.56298828125, -0.531524658203125, -0.50006103515625, -0.468597412109375, -0.4371337890625, -0.405670166015625, -0.37420654296875, -0.342742919921875, -0.311279296875, -0.279815673828125, -0.24835205078125, -0.216888427734375, -0.1854248046875, -0.153961181640625, -0.12249755859375, -0.091033935546875, -0.0595703125, -0.028106689453125, 0.00335693359375, 0.034820556640625, 0.0662841796875, 0.097747802734375, 0.12921142578125, 0.160675048828125, 0.192138671875, 0.223602294921875, 0.25506591796875, 0.286529541015625, 0.3179931640625, 0.349456787109375, 0.38092041015625, 0.412384033203125, 0.44384765625, 0.475311279296875, 0.50677490234375, 0.538238525390625, 0.5697021484375, 0.601165771484375, 0.63262939453125, 0.664093017578125, 0.695556640625, 0.727020263671875, 0.75848388671875, 0.789947509765625, 0.8214111328125, 0.852874755859375, 0.88433837890625, 0.915802001953125, 0.947265625]}, "gradients/encoder.encoder.layers.16.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 2.0, 1.0, 0.0, 2.0, 4.0, 6.0, 3.0, 8.0, 6.0, 10.0, 19.0, 45.0, 68.0, 158.0, 471.0, 1493.0, 6184.0, 40551.0, 2933109.0, 1171860.0, 32812.0, 5310.0, 1386.0, 448.0, 175.0, 59.0, 30.0, 25.0, 17.0, 8.0, 6.0, 5.0, 3.0, 3.0, 1.0, 0.0, 0.0, 0.0, 4.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.19140625, -4.0748291015625, -3.958251953125, -3.8416748046875, -3.72509765625, -3.6085205078125, -3.491943359375, -3.3753662109375, -3.2587890625, -3.1422119140625, -3.025634765625, -2.9090576171875, -2.79248046875, -2.6759033203125, -2.559326171875, -2.4427490234375, -2.326171875, -2.2095947265625, -2.093017578125, -1.9764404296875, -1.85986328125, -1.7432861328125, -1.626708984375, -1.5101318359375, -1.3935546875, -1.2769775390625, -1.160400390625, -1.0438232421875, -0.92724609375, -0.8106689453125, -0.694091796875, -0.5775146484375, -0.4609375, -0.3443603515625, -0.227783203125, -0.1112060546875, 0.00537109375, 0.1219482421875, 0.238525390625, 0.3551025390625, 0.4716796875, 0.5882568359375, 0.704833984375, 0.8214111328125, 0.93798828125, 1.0545654296875, 1.171142578125, 1.2877197265625, 1.404296875, 1.5208740234375, 1.637451171875, 1.7540283203125, 1.87060546875, 1.9871826171875, 2.103759765625, 2.2203369140625, 2.3369140625, 2.4534912109375, 2.570068359375, 2.6866455078125, 2.80322265625, 2.9197998046875, 3.036376953125, 3.1529541015625, 3.26953125]}, "gradients/encoder.encoder.layers.16.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 2.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 3.0, 3.0, 4.0, 4.0, 8.0, 8.0, 9.0, 16.0, 24.0, 24.0, 32.0, 69.0, 90.0, 137.0, 240.0, 627.0, 1625.0, 541.0, 226.0, 120.0, 91.0, 37.0, 26.0, 28.0, 32.0, 14.0, 5.0, 12.0, 3.0, 6.0, 6.0, 4.0, 1.0, 2.0, 3.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.74658203125, -0.7212371826171875, -0.695892333984375, -0.6705474853515625, -0.64520263671875, -0.6198577880859375, -0.594512939453125, -0.5691680908203125, -0.5438232421875, -0.5184783935546875, -0.493133544921875, -0.4677886962890625, -0.44244384765625, -0.4170989990234375, -0.391754150390625, -0.3664093017578125, -0.341064453125, -0.3157196044921875, -0.290374755859375, -0.2650299072265625, -0.23968505859375, -0.2143402099609375, -0.188995361328125, -0.1636505126953125, -0.1383056640625, -0.1129608154296875, -0.087615966796875, -0.0622711181640625, -0.03692626953125, -0.0115814208984375, 0.013763427734375, 0.0391082763671875, 0.064453125, 0.0897979736328125, 0.115142822265625, 0.1404876708984375, 0.16583251953125, 0.1911773681640625, 0.216522216796875, 0.2418670654296875, 0.2672119140625, 0.2925567626953125, 0.317901611328125, 0.3432464599609375, 0.36859130859375, 0.3939361572265625, 0.419281005859375, 0.4446258544921875, 0.469970703125, 0.4953155517578125, 0.520660400390625, 0.5460052490234375, 0.57135009765625, 0.5966949462890625, 0.622039794921875, 0.6473846435546875, 0.6727294921875, 0.6980743408203125, 0.723419189453125, 0.7487640380859375, 0.77410888671875, 0.7994537353515625, 0.824798583984375, 0.8501434326171875, 0.87548828125]}, "gradients/encoder.encoder.layers.16.final_layer_norm.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 1.0, 3.0, 6.0, 2.0, 12.0, 42.0, 138.0, 304.0, 295.0, 154.0, 43.0, 10.0, 4.0, 3.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.9529218673706055, -3.622502088546753, -3.2920823097229004, -2.9616622924804688, -2.631242513656616, -2.3008227348327637, -1.9704028367996216, -1.6399829387664795, -1.309563159942627, -0.9791433215141296, -0.6487234830856323, -0.318303644657135, 0.012116193771362305, 0.34253597259521484, 0.6729558706283569, 1.003375768661499, 1.3337955474853516, 1.664215326309204, 1.9946352243423462, 2.3250551223754883, 2.655474901199341, 2.9858946800231934, 3.316314697265625, 3.6467344760894775, 3.97715425491333, 4.307574272155762, 4.637993812561035, 4.968413829803467, 5.298833847045898, 5.629253387451172, 5.9596734046936035, 6.290093421936035, 6.620513916015625, 6.950933933258057, 7.28135347366333, 7.611773490905762, 7.942193031311035, 8.272613525390625, 8.603033065795898, 8.933452606201172, 9.263872146606445, 9.594291687011719, 9.924712181091309, 10.255131721496582, 10.585551261901855, 10.915971755981445, 11.246391296386719, 11.576810836791992, 11.907231330871582, 12.237650871276855, 12.568071365356445, 12.898490905761719, 13.228910446166992, 13.559329986572266, 13.889750480651855, 14.220170021057129, 14.550590515136719, 14.881010055541992, 15.211430549621582, 15.541850090026855, 15.872269630432129, 16.20269012451172, 16.533109664916992, 16.863529205322266, 17.19394874572754]}, "gradients/encoder.encoder.layers.16.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 5.0, 4.0, 1.0, 4.0, 2.0, 5.0, 7.0, 9.0, 13.0, 16.0, 18.0, 20.0, 15.0, 22.0, 36.0, 34.0, 33.0, 37.0, 47.0, 51.0, 35.0, 52.0, 44.0, 47.0, 51.0, 49.0, 41.0, 30.0, 41.0, 35.0, 37.0, 24.0, 25.0, 29.0, 12.0, 15.0, 16.0, 15.0, 7.0, 9.0, 1.0, 5.0, 4.0, 10.0, 3.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.7082347869873047, -2.6209793090820312, -2.533723831176758, -2.4464683532714844, -2.359212875366211, -2.2719573974609375, -2.184701919555664, -2.0974464416503906, -2.010190963745117, -1.9229354858398438, -1.8356800079345703, -1.7484245300292969, -1.6611690521240234, -1.57391357421875, -1.4866582155227661, -1.3994027376174927, -1.3121473789215088, -1.2248919010162354, -1.137636423110962, -1.0503809452056885, -0.9631255269050598, -0.8758700489997864, -0.7886146306991577, -0.7013591527938843, -0.6141036748886108, -0.5268481969833374, -0.43959274888038635, -0.3523373007774353, -0.26508182287216187, -0.17782634496688843, -0.09057092666625977, -0.003315448760986328, 0.08394002914428711, 0.17119549214839935, 0.2584509551525116, 0.34570640325546265, 0.4329618811607361, 0.5202173590660095, 0.6074727773666382, 0.6947282552719116, 0.7819837331771851, 0.8692392110824585, 0.9564946889877319, 1.0437500476837158, 1.1310055255889893, 1.2182610034942627, 1.3055164813995361, 1.3927719593048096, 1.480027437210083, 1.5672829151153564, 1.6545383930206299, 1.7417938709259033, 1.8290493488311768, 1.9163048267364502, 2.0035600662231445, 2.090815544128418, 2.1780710220336914, 2.265326499938965, 2.3525819778442383, 2.4398374557495117, 2.527092933654785, 2.6143484115600586, 2.701603889465332, 2.7888593673706055, 2.876114845275879]}, "gradients/encoder.encoder.layers.16.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 2.0, 1.0, 2.0, 0.0, 2.0, 1.0, 2.0, 6.0, 5.0, 8.0, 9.0, 10.0, 16.0, 30.0, 30.0, 50.0, 79.0, 104.0, 163.0, 233.0, 351.0, 588.0, 1061.0, 2087.0, 4869.0, 14599.0, 63445.0, 412615.0, 452243.0, 69951.0, 15544.0, 5306.0, 2188.0, 1161.0, 627.0, 395.0, 263.0, 153.0, 108.0, 84.0, 41.0, 35.0, 24.0, 25.0, 7.0, 14.0, 10.0, 5.0, 5.0, 1.0, 5.0, 2.0, 2.0, 3.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-4.1875, -4.05255126953125, -3.9176025390625, -3.78265380859375, -3.647705078125, -3.51275634765625, -3.3778076171875, -3.24285888671875, -3.10791015625, -2.97296142578125, -2.8380126953125, -2.70306396484375, -2.568115234375, -2.43316650390625, -2.2982177734375, -2.16326904296875, -2.0283203125, -1.89337158203125, -1.7584228515625, -1.62347412109375, -1.488525390625, -1.35357666015625, -1.2186279296875, -1.08367919921875, -0.94873046875, -0.81378173828125, -0.6788330078125, -0.54388427734375, -0.408935546875, -0.27398681640625, -0.1390380859375, -0.00408935546875, 0.130859375, 0.26580810546875, 0.4007568359375, 0.53570556640625, 0.670654296875, 0.80560302734375, 0.9405517578125, 1.07550048828125, 1.21044921875, 1.34539794921875, 1.4803466796875, 1.61529541015625, 1.750244140625, 1.88519287109375, 2.0201416015625, 2.15509033203125, 2.2900390625, 2.42498779296875, 2.5599365234375, 2.69488525390625, 2.829833984375, 2.96478271484375, 3.0997314453125, 3.23468017578125, 3.36962890625, 3.50457763671875, 3.6395263671875, 3.77447509765625, 3.909423828125, 4.04437255859375, 4.1793212890625, 4.31427001953125, 4.44921875]}, "gradients/encoder.encoder.layers.16.attention.out_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 1.0, 0.0, 1.0, 0.0, 1.0, 2.0, 1.0, 1.0, 2.0, 4.0, 5.0, 3.0, 10.0, 8.0, 15.0, 19.0, 26.0, 35.0, 43.0, 59.0, 76.0, 72.0, 100.0, 88.0, 71.0, 88.0, 63.0, 62.0, 42.0, 17.0, 31.0, 13.0, 15.0, 9.0, 9.0, 6.0, 4.0, 4.0, 2.0, 1.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.0869140625, -1.0550689697265625, -1.023223876953125, -0.9913787841796875, -0.95953369140625, -0.9276885986328125, -0.895843505859375, -0.8639984130859375, -0.8321533203125, -0.8003082275390625, -0.768463134765625, -0.7366180419921875, -0.70477294921875, -0.6729278564453125, -0.641082763671875, -0.6092376708984375, -0.577392578125, -0.5455474853515625, -0.513702392578125, -0.4818572998046875, -0.45001220703125, -0.4181671142578125, -0.386322021484375, -0.3544769287109375, -0.3226318359375, -0.2907867431640625, -0.258941650390625, -0.2270965576171875, -0.19525146484375, -0.1634063720703125, -0.131561279296875, -0.0997161865234375, -0.06787109375, -0.0360260009765625, -0.004180908203125, 0.0276641845703125, 0.05950927734375, 0.0913543701171875, 0.123199462890625, 0.1550445556640625, 0.1868896484375, 0.2187347412109375, 0.250579833984375, 0.2824249267578125, 0.31427001953125, 0.3461151123046875, 0.377960205078125, 0.4098052978515625, 0.441650390625, 0.4734954833984375, 0.505340576171875, 0.5371856689453125, 0.56903076171875, 0.6008758544921875, 0.632720947265625, 0.6645660400390625, 0.6964111328125, 0.7282562255859375, 0.760101318359375, 0.7919464111328125, 0.82379150390625, 0.8556365966796875, 0.887481689453125, 0.9193267822265625, 0.951171875]}, "gradients/encoder.encoder.layers.16.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 3.0, 3.0, 1.0, 4.0, 3.0, 9.0, 14.0, 26.0, 27.0, 44.0, 53.0, 66.0, 120.0, 160.0, 239.0, 335.0, 526.0, 859.0, 1509.0, 2835.0, 6117.0, 15182.0, 44225.0, 146206.0, 412249.0, 285795.0, 85430.0, 26980.0, 10021.0, 4231.0, 2114.0, 1123.0, 682.0, 393.0, 309.0, 194.0, 122.0, 104.0, 65.0, 55.0, 40.0, 21.0, 16.0, 14.0, 15.0, 10.0, 7.0, 4.0, 7.0, 1.0, 4.0, 0.0, 1.0], "bins": [-2.5, -2.4301300048828125, -2.360260009765625, -2.2903900146484375, -2.22052001953125, -2.1506500244140625, -2.080780029296875, -2.0109100341796875, -1.9410400390625, -1.8711700439453125, -1.801300048828125, -1.7314300537109375, -1.66156005859375, -1.5916900634765625, -1.521820068359375, -1.4519500732421875, -1.382080078125, -1.3122100830078125, -1.242340087890625, -1.1724700927734375, -1.10260009765625, -1.0327301025390625, -0.962860107421875, -0.8929901123046875, -0.8231201171875, -0.7532501220703125, -0.683380126953125, -0.6135101318359375, -0.54364013671875, -0.4737701416015625, -0.403900146484375, -0.3340301513671875, -0.26416015625, -0.1942901611328125, -0.124420166015625, -0.0545501708984375, 0.01531982421875, 0.0851898193359375, 0.155059814453125, 0.2249298095703125, 0.2947998046875, 0.3646697998046875, 0.434539794921875, 0.5044097900390625, 0.57427978515625, 0.6441497802734375, 0.714019775390625, 0.7838897705078125, 0.853759765625, 0.9236297607421875, 0.993499755859375, 1.0633697509765625, 1.13323974609375, 1.2031097412109375, 1.272979736328125, 1.3428497314453125, 1.4127197265625, 1.4825897216796875, 1.552459716796875, 1.6223297119140625, 1.69219970703125, 1.7620697021484375, 1.831939697265625, 1.9018096923828125, 1.9716796875]}, "gradients/encoder.encoder.layers.16.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 2.0, 3.0, 4.0, 3.0, 10.0, 4.0, 8.0, 11.0, 11.0, 8.0, 14.0, 15.0, 23.0, 25.0, 28.0, 27.0, 18.0, 39.0, 44.0, 38.0, 32.0, 40.0, 47.0, 57.0, 42.0, 40.0, 47.0, 44.0, 51.0, 45.0, 29.0, 28.0, 27.0, 22.0, 26.0, 18.0, 11.0, 11.0, 11.0, 11.0, 11.0, 8.0, 3.0, 2.0, 3.0, 4.0, 4.0, 0.0, 3.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-2.84375, -2.755615234375, -2.66748046875, -2.579345703125, -2.4912109375, -2.403076171875, -2.31494140625, -2.226806640625, -2.138671875, -2.050537109375, -1.96240234375, -1.874267578125, -1.7861328125, -1.697998046875, -1.60986328125, -1.521728515625, -1.43359375, -1.345458984375, -1.25732421875, -1.169189453125, -1.0810546875, -0.992919921875, -0.90478515625, -0.816650390625, -0.728515625, -0.640380859375, -0.55224609375, -0.464111328125, -0.3759765625, -0.287841796875, -0.19970703125, -0.111572265625, -0.0234375, 0.064697265625, 0.15283203125, 0.240966796875, 0.3291015625, 0.417236328125, 0.50537109375, 0.593505859375, 0.681640625, 0.769775390625, 0.85791015625, 0.946044921875, 1.0341796875, 1.122314453125, 1.21044921875, 1.298583984375, 1.38671875, 1.474853515625, 1.56298828125, 1.651123046875, 1.7392578125, 1.827392578125, 1.91552734375, 2.003662109375, 2.091796875, 2.179931640625, 2.26806640625, 2.356201171875, 2.4443359375, 2.532470703125, 2.62060546875, 2.708740234375, 2.796875]}, "gradients/encoder.encoder.layers.16.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 3.0, 0.0, 4.0, 3.0, 5.0, 8.0, 5.0, 22.0, 27.0, 49.0, 103.0, 235.0, 533.0, 1638.0, 8812.0, 147972.0, 825632.0, 56752.0, 4929.0, 1109.0, 378.0, 158.0, 66.0, 45.0, 30.0, 18.0, 15.0, 5.0, 4.0, 3.0, 1.0, 0.0, 2.0, 1.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.4072265625, -1.3442840576171875, -1.281341552734375, -1.2183990478515625, -1.15545654296875, -1.0925140380859375, -1.029571533203125, -0.9666290283203125, -0.9036865234375, -0.8407440185546875, -0.777801513671875, -0.7148590087890625, -0.65191650390625, -0.5889739990234375, -0.526031494140625, -0.4630889892578125, -0.400146484375, -0.3372039794921875, -0.274261474609375, -0.2113189697265625, -0.14837646484375, -0.0854339599609375, -0.022491455078125, 0.0404510498046875, 0.1033935546875, 0.1663360595703125, 0.229278564453125, 0.2922210693359375, 0.35516357421875, 0.4181060791015625, 0.481048583984375, 0.5439910888671875, 0.60693359375, 0.6698760986328125, 0.732818603515625, 0.7957611083984375, 0.85870361328125, 0.9216461181640625, 0.984588623046875, 1.0475311279296875, 1.1104736328125, 1.1734161376953125, 1.236358642578125, 1.2993011474609375, 1.36224365234375, 1.4251861572265625, 1.488128662109375, 1.5510711669921875, 1.614013671875, 1.6769561767578125, 1.739898681640625, 1.8028411865234375, 1.86578369140625, 1.9287261962890625, 1.991668701171875, 2.0546112060546875, 2.1175537109375, 2.1804962158203125, 2.243438720703125, 2.3063812255859375, 2.36932373046875, 2.4322662353515625, 2.495208740234375, 2.5581512451171875, 2.62109375]}, "gradients/encoder.encoder.layers.16.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 4.0, 0.0, 2.0, 0.0, 2.0, 5.0, 2.0, 5.0, 8.0, 11.0, 13.0, 12.0, 21.0, 14.0, 25.0, 42.0, 56.0, 65.0, 76.0, 83.0, 105.0, 102.0, 90.0, 63.0, 67.0, 49.0, 27.0, 15.0, 14.0, 8.0, 7.0, 5.0, 5.0, 4.0, 5.0, 2.0, 5.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00017523765563964844, -0.00017056986689567566, -0.00016590207815170288, -0.0001612342894077301, -0.00015656650066375732, -0.00015189871191978455, -0.00014723092317581177, -0.000142563134431839, -0.0001378953456878662, -0.00013322755694389343, -0.00012855976819992065, -0.00012389197945594788, -0.0001192241907119751, -0.00011455640196800232, -0.00010988861322402954, -0.00010522082448005676, -0.00010055303573608398, -9.58852469921112e-05, -9.121745824813843e-05, -8.654966950416565e-05, -8.188188076019287e-05, -7.721409201622009e-05, -7.254630327224731e-05, -6.787851452827454e-05, -6.321072578430176e-05, -5.854293704032898e-05, -5.38751482963562e-05, -4.920735955238342e-05, -4.4539570808410645e-05, -3.9871782064437866e-05, -3.520399332046509e-05, -3.053620457649231e-05, -2.586841583251953e-05, -2.1200627088546753e-05, -1.6532838344573975e-05, -1.1865049600601196e-05, -7.197260856628418e-06, -2.5294721126556396e-06, 2.1383166313171387e-06, 6.806105375289917e-06, 1.1473894119262695e-05, 1.6141682863235474e-05, 2.0809471607208252e-05, 2.547726035118103e-05, 3.014504909515381e-05, 3.481283783912659e-05, 3.9480626583099365e-05, 4.4148415327072144e-05, 4.881620407104492e-05, 5.34839928150177e-05, 5.815178155899048e-05, 6.281957030296326e-05, 6.748735904693604e-05, 7.215514779090881e-05, 7.682293653488159e-05, 8.149072527885437e-05, 8.615851402282715e-05, 9.082630276679993e-05, 9.54940915107727e-05, 0.00010016188025474548, 0.00010482966899871826, 0.00010949745774269104, 0.00011416524648666382, 0.0001188330352306366, 0.00012350082397460938]}, "gradients/encoder.encoder.layers.16.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 3.0, 3.0, 4.0, 6.0, 8.0, 11.0, 9.0, 25.0, 31.0, 54.0, 144.0, 261.0, 654.0, 2105.0, 11771.0, 226951.0, 760295.0, 40058.0, 4334.0, 1095.0, 386.0, 169.0, 82.0, 41.0, 22.0, 13.0, 8.0, 8.0, 5.0, 5.0, 4.0, 1.0, 2.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.677734375, -2.6143341064453125, -2.550933837890625, -2.4875335693359375, -2.42413330078125, -2.3607330322265625, -2.297332763671875, -2.2339324951171875, -2.1705322265625, -2.1071319580078125, -2.043731689453125, -1.9803314208984375, -1.91693115234375, -1.8535308837890625, -1.790130615234375, -1.7267303466796875, -1.663330078125, -1.5999298095703125, -1.536529541015625, -1.4731292724609375, -1.40972900390625, -1.3463287353515625, -1.282928466796875, -1.2195281982421875, -1.1561279296875, -1.0927276611328125, -1.029327392578125, -0.9659271240234375, -0.90252685546875, -0.8391265869140625, -0.775726318359375, -0.7123260498046875, -0.64892578125, -0.5855255126953125, -0.522125244140625, -0.4587249755859375, -0.39532470703125, -0.3319244384765625, -0.268524169921875, -0.2051239013671875, -0.1417236328125, -0.0783233642578125, -0.014923095703125, 0.0484771728515625, 0.11187744140625, 0.1752777099609375, 0.238677978515625, 0.3020782470703125, 0.365478515625, 0.4288787841796875, 0.492279052734375, 0.5556793212890625, 0.61907958984375, 0.6824798583984375, 0.745880126953125, 0.8092803955078125, 0.8726806640625, 0.9360809326171875, 0.999481201171875, 1.0628814697265625, 1.12628173828125, 1.1896820068359375, 1.253082275390625, 1.3164825439453125, 1.3798828125]}, "gradients/encoder.encoder.layers.16.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 3.0, 4.0, 7.0, 9.0, 13.0, 17.0, 26.0, 44.0, 56.0, 59.0, 92.0, 108.0, 124.0, 119.0, 105.0, 73.0, 54.0, 34.0, 21.0, 15.0, 7.0, 9.0, 5.0, 6.0, 2.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-1.8310546875, -1.7810821533203125, -1.731109619140625, -1.6811370849609375, -1.63116455078125, -1.5811920166015625, -1.531219482421875, -1.4812469482421875, -1.4312744140625, -1.3813018798828125, -1.331329345703125, -1.2813568115234375, -1.23138427734375, -1.1814117431640625, -1.131439208984375, -1.0814666748046875, -1.031494140625, -0.9815216064453125, -0.931549072265625, -0.8815765380859375, -0.83160400390625, -0.7816314697265625, -0.731658935546875, -0.6816864013671875, -0.6317138671875, -0.5817413330078125, -0.531768798828125, -0.4817962646484375, -0.43182373046875, -0.3818511962890625, -0.331878662109375, -0.2819061279296875, -0.23193359375, -0.1819610595703125, -0.131988525390625, -0.0820159912109375, -0.03204345703125, 0.0179290771484375, 0.067901611328125, 0.1178741455078125, 0.1678466796875, 0.2178192138671875, 0.267791748046875, 0.3177642822265625, 0.36773681640625, 0.4177093505859375, 0.467681884765625, 0.5176544189453125, 0.567626953125, 0.6175994873046875, 0.667572021484375, 0.7175445556640625, 0.76751708984375, 0.8174896240234375, 0.867462158203125, 0.9174346923828125, 0.9674072265625, 1.0173797607421875, 1.067352294921875, 1.1173248291015625, 1.16729736328125, 1.2172698974609375, 1.267242431640625, 1.3172149658203125, 1.3671875]}, "gradients/encoder.encoder.layers.16.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 3.0, 2.0, 6.0, 14.0, 14.0, 17.0, 30.0, 83.0, 112.0, 183.0, 184.0, 181.0, 96.0, 40.0, 16.0, 13.0, 7.0, 3.0, 0.0, 0.0, 4.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-22.089115142822266, -21.336265563964844, -20.583415985107422, -19.83056640625, -19.077716827392578, -18.324867248535156, -17.572017669677734, -16.81916618347168, -16.066316604614258, -15.313467025756836, -14.560617446899414, -13.807767868041992, -13.054917335510254, -12.302067756652832, -11.54921817779541, -10.796367645263672, -10.043519020080566, -9.290669441223145, -8.537819862365723, -7.784969806671143, -7.0321197509765625, -6.279270172119141, -5.526420593261719, -4.773570537567139, -4.020720958709717, -3.267871141433716, -2.515021324157715, -1.762171745300293, -1.009321928024292, -0.256472110748291, 0.49637746810913086, 1.249227523803711, 2.002077102661133, 2.754926919937134, 3.5077767372131348, 4.260626316070557, 5.013476371765137, 5.766325950622559, 6.5191755294799805, 7.2720255851745605, 8.02487564086914, 8.777725219726562, 9.530574798583984, 10.283424377441406, 11.036274909973145, 11.789124488830566, 12.541974067687988, 13.294824600219727, 14.047673225402832, 14.800522804260254, 15.553372383117676, 16.306222915649414, 17.059072494506836, 17.811922073364258, 18.56477165222168, 19.3176212310791, 20.070470809936523, 20.823320388793945, 21.576169967651367, 22.32901954650879, 23.08186912536621, 23.834720611572266, 24.587570190429688, 25.34041976928711, 26.09326934814453]}, "gradients/encoder.encoder.layers.16.layer_norm.bias": {"_type": "histogram", "values": [3.0, 2.0, 2.0, 2.0, 1.0, 1.0, 1.0, 4.0, 4.0, 9.0, 10.0, 5.0, 11.0, 16.0, 14.0, 25.0, 21.0, 35.0, 26.0, 31.0, 28.0, 35.0, 40.0, 34.0, 37.0, 44.0, 50.0, 54.0, 35.0, 40.0, 43.0, 39.0, 36.0, 43.0, 34.0, 27.0, 25.0, 16.0, 18.0, 16.0, 19.0, 17.0, 14.0, 10.0, 5.0, 10.0, 6.0, 4.0, 4.0, 3.0, 4.0, 4.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-13.71651554107666, -13.220603942871094, -12.724691390991211, -12.228779792785645, -11.732868194580078, -11.236956596374512, -10.741044998168945, -10.245132446289062, -9.749220848083496, -9.25330924987793, -8.757396697998047, -8.26148509979248, -7.765573501586914, -7.269661903381348, -6.773749828338623, -6.277837753295898, -5.781926155090332, -5.286014556884766, -4.790102481842041, -4.294190406799316, -3.79827880859375, -3.3023669719696045, -2.806455135345459, -2.3105432987213135, -1.814631462097168, -1.3187196254730225, -0.822807788848877, -0.32689595222473145, 0.16901588439941406, 0.6649277210235596, 1.160839557647705, 1.6567513942718506, 2.1526641845703125, 2.648576021194458, 3.1444878578186035, 3.640399694442749, 4.1363115310668945, 4.632223129272461, 5.1281352043151855, 5.62404727935791, 6.119958877563477, 6.615870475769043, 7.111782550811768, 7.607694625854492, 8.103606224060059, 8.599517822265625, 9.095430374145508, 9.591341972351074, 10.08725357055664, 10.583165168762207, 11.079076766967773, 11.574989318847656, 12.070900917053223, 12.566812515258789, 13.062725067138672, 13.558636665344238, 14.054548263549805, 14.550459861755371, 15.046371459960938, 15.54228401184082, 16.038196563720703, 16.534107208251953, 17.030019760131836, 17.525930404663086, 18.02184295654297]}, "gradients/encoder.encoder.layers.15.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 2.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 3.0, 0.0, 5.0, 1.0, 6.0, 9.0, 6.0, 9.0, 15.0, 12.0, 22.0, 31.0, 30.0, 36.0, 69.0, 83.0, 143.0, 244.0, 405.0, 763.0, 1562.0, 3470.0, 9715.0, 38949.0, 671496.0, 3387197.0, 59003.0, 12655.0, 4340.0, 1882.0, 883.0, 460.0, 276.0, 181.0, 102.0, 89.0, 38.0, 24.0, 15.0, 16.0, 10.0, 8.0, 7.0, 4.0, 9.0, 1.0, 5.0, 4.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-3.537109375, -3.436370849609375, -3.33563232421875, -3.234893798828125, -3.1341552734375, -3.033416748046875, -2.93267822265625, -2.831939697265625, -2.731201171875, -2.630462646484375, -2.52972412109375, -2.428985595703125, -2.3282470703125, -2.227508544921875, -2.12677001953125, -2.026031494140625, -1.92529296875, -1.824554443359375, -1.72381591796875, -1.623077392578125, -1.5223388671875, -1.421600341796875, -1.32086181640625, -1.220123291015625, -1.119384765625, -1.018646240234375, -0.91790771484375, -0.817169189453125, -0.7164306640625, -0.615692138671875, -0.51495361328125, -0.414215087890625, -0.3134765625, -0.212738037109375, -0.11199951171875, -0.011260986328125, 0.0894775390625, 0.190216064453125, 0.29095458984375, 0.391693115234375, 0.492431640625, 0.593170166015625, 0.69390869140625, 0.794647216796875, 0.8953857421875, 0.996124267578125, 1.09686279296875, 1.197601318359375, 1.29833984375, 1.399078369140625, 1.49981689453125, 1.600555419921875, 1.7012939453125, 1.802032470703125, 1.90277099609375, 2.003509521484375, 2.104248046875, 2.204986572265625, 2.30572509765625, 2.406463623046875, 2.5072021484375, 2.607940673828125, 2.70867919921875, 2.809417724609375, 2.91015625]}, "gradients/encoder.encoder.layers.15.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 1.0, 1.0, 5.0, 3.0, 3.0, 2.0, 9.0, 7.0, 18.0, 10.0, 26.0, 28.0, 43.0, 54.0, 58.0, 80.0, 67.0, 70.0, 82.0, 72.0, 75.0, 64.0, 48.0, 33.0, 44.0, 22.0, 24.0, 10.0, 13.0, 11.0, 7.0, 7.0, 6.0, 1.0, 1.0, 1.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-1.21484375, -1.1820602416992188, -1.1492767333984375, -1.1164932250976562, -1.083709716796875, -1.0509262084960938, -1.0181427001953125, -0.9853591918945312, -0.95257568359375, -0.9197921752929688, -0.8870086669921875, -0.8542251586914062, -0.821441650390625, -0.7886581420898438, -0.7558746337890625, -0.7230911254882812, -0.6903076171875, -0.6575241088867188, -0.6247406005859375, -0.5919570922851562, -0.559173583984375, -0.5263900756835938, -0.4936065673828125, -0.46082305908203125, -0.42803955078125, -0.39525604248046875, -0.3624725341796875, -0.32968902587890625, -0.296905517578125, -0.26412200927734375, -0.2313385009765625, -0.19855499267578125, -0.165771484375, -0.13298797607421875, -0.1002044677734375, -0.06742095947265625, -0.034637451171875, -0.00185394287109375, 0.0309295654296875, 0.06371307373046875, 0.09649658203125, 0.12928009033203125, 0.1620635986328125, 0.19484710693359375, 0.227630615234375, 0.26041412353515625, 0.2931976318359375, 0.32598114013671875, 0.3587646484375, 0.39154815673828125, 0.4243316650390625, 0.45711517333984375, 0.489898681640625, 0.5226821899414062, 0.5554656982421875, 0.5882492065429688, 0.62103271484375, 0.6538162231445312, 0.6865997314453125, 0.7193832397460938, 0.752166748046875, 0.7849502563476562, 0.8177337646484375, 0.8505172729492188, 0.88330078125]}, "gradients/encoder.encoder.layers.15.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 4.0, 1.0, 7.0, 12.0, 6.0, 19.0, 27.0, 53.0, 114.0, 248.0, 672.0, 2218.0, 13409.0, 345962.0, 3788249.0, 37372.0, 4242.0, 1028.0, 356.0, 124.0, 80.0, 40.0, 21.0, 7.0, 7.0, 3.0, 2.0, 3.0, 3.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-4.8125, -4.6531982421875, -4.493896484375, -4.3345947265625, -4.17529296875, -4.0159912109375, -3.856689453125, -3.6973876953125, -3.5380859375, -3.3787841796875, -3.219482421875, -3.0601806640625, -2.90087890625, -2.7415771484375, -2.582275390625, -2.4229736328125, -2.263671875, -2.1043701171875, -1.945068359375, -1.7857666015625, -1.62646484375, -1.4671630859375, -1.307861328125, -1.1485595703125, -0.9892578125, -0.8299560546875, -0.670654296875, -0.5113525390625, -0.35205078125, -0.1927490234375, -0.033447265625, 0.1258544921875, 0.28515625, 0.4444580078125, 0.603759765625, 0.7630615234375, 0.92236328125, 1.0816650390625, 1.240966796875, 1.4002685546875, 1.5595703125, 1.7188720703125, 1.878173828125, 2.0374755859375, 2.19677734375, 2.3560791015625, 2.515380859375, 2.6746826171875, 2.833984375, 2.9932861328125, 3.152587890625, 3.3118896484375, 3.47119140625, 3.6304931640625, 3.789794921875, 3.9490966796875, 4.1083984375, 4.2677001953125, 4.427001953125, 4.5863037109375, 4.74560546875, 4.9049072265625, 5.064208984375, 5.2235107421875, 5.3828125]}, "gradients/encoder.encoder.layers.15.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 2.0, 0.0, 3.0, 3.0, 4.0, 4.0, 10.0, 6.0, 21.0, 26.0, 43.0, 86.0, 163.0, 517.0, 2017.0, 734.0, 222.0, 82.0, 64.0, 29.0, 21.0, 11.0, 6.0, 5.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.037109375, -1.98150634765625, -1.9259033203125, -1.87030029296875, -1.814697265625, -1.75909423828125, -1.7034912109375, -1.64788818359375, -1.59228515625, -1.53668212890625, -1.4810791015625, -1.42547607421875, -1.369873046875, -1.31427001953125, -1.2586669921875, -1.20306396484375, -1.1474609375, -1.09185791015625, -1.0362548828125, -0.98065185546875, -0.925048828125, -0.86944580078125, -0.8138427734375, -0.75823974609375, -0.70263671875, -0.64703369140625, -0.5914306640625, -0.53582763671875, -0.480224609375, -0.42462158203125, -0.3690185546875, -0.31341552734375, -0.2578125, -0.20220947265625, -0.1466064453125, -0.09100341796875, -0.035400390625, 0.02020263671875, 0.0758056640625, 0.13140869140625, 0.18701171875, 0.24261474609375, 0.2982177734375, 0.35382080078125, 0.409423828125, 0.46502685546875, 0.5206298828125, 0.57623291015625, 0.6318359375, 0.68743896484375, 0.7430419921875, 0.79864501953125, 0.854248046875, 0.90985107421875, 0.9654541015625, 1.02105712890625, 1.07666015625, 1.13226318359375, 1.1878662109375, 1.24346923828125, 1.299072265625, 1.35467529296875, 1.4102783203125, 1.46588134765625, 1.521484375]}, "gradients/encoder.encoder.layers.15.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 3.0, 18.0, 574.0, 407.0, 9.0, 5.0, 1.0, 0.0, 0.0, 1.0], "bins": [-85.823486328125, -84.31658172607422, -82.8096694946289, -81.30276489257812, -79.79585266113281, -78.28894805908203, -76.78203582763672, -75.27513122558594, -73.76821899414062, -72.26131439208984, -70.75440216064453, -69.24749755859375, -67.74058532714844, -66.23368072509766, -64.72676849365234, -63.21986389160156, -61.712955474853516, -60.20604705810547, -58.69913864135742, -57.192230224609375, -55.68532180786133, -54.17841339111328, -52.6715087890625, -51.16459655761719, -49.657691955566406, -48.15078353881836, -46.64387512207031, -45.136966705322266, -43.63005828857422, -42.12314987182617, -40.616241455078125, -39.109336853027344, -37.6024284362793, -36.09552001953125, -34.5886116027832, -33.081703186035156, -31.57479476928711, -30.067886352539062, -28.56097984313965, -27.0540714263916, -25.547163009643555, -24.040254592895508, -22.53334617614746, -21.026439666748047, -19.51953125, -18.012622833251953, -16.505714416503906, -14.99880599975586, -13.491897583007812, -11.984989166259766, -10.478080749511719, -8.971173286437988, -7.464264869689941, -5.9573564529418945, -4.450448989868164, -2.943540573120117, -1.4366321563720703, 0.07027602195739746, 1.5771842002868652, 3.084092140197754, 4.591000556945801, 6.097908973693848, 7.604816436767578, 9.111724853515625, 10.618633270263672]}, "gradients/encoder.encoder.layers.15.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 4.0, 1.0, 7.0, 6.0, 9.0, 7.0, 11.0, 14.0, 22.0, 28.0, 39.0, 33.0, 52.0, 60.0, 65.0, 57.0, 70.0, 65.0, 80.0, 63.0, 62.0, 36.0, 49.0, 26.0, 36.0, 24.0, 17.0, 24.0, 12.0, 9.0, 5.0, 5.0, 4.0, 4.0, 3.0, 5.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0], "bins": [-5.981032848358154, -5.809842586517334, -5.638652324676514, -5.467462062835693, -5.296271800994873, -5.125082015991211, -4.953891754150391, -4.78270149230957, -4.61151123046875, -4.44032096862793, -4.269130706787109, -4.097940444946289, -3.926750421524048, -3.7555601596832275, -3.5843698978424072, -3.413179874420166, -3.2419893741607666, -3.0707991123199463, -2.899608850479126, -2.7284188270568848, -2.5572285652160645, -2.386038303375244, -2.214848041534424, -2.0436577796936035, -1.8724676370620728, -1.7012773752212524, -1.5300872325897217, -1.3588969707489014, -1.187706708908081, -1.0165165662765503, -0.84532630443573, -0.6741361618041992, -0.5029458999633789, -0.33175569772720337, -0.16056546568870544, 0.01062476634979248, 0.18181496858596802, 0.35300517082214355, 0.5241954326629639, 0.6953855752944946, 0.8665758371353149, 1.0377660989761353, 1.208956241607666, 1.3801465034484863, 1.5513367652893066, 1.7225269079208374, 1.8937171697616577, 2.0649073123931885, 2.236097574234009, 2.407287836074829, 2.5784780979156494, 2.7496681213378906, 2.920858383178711, 3.0920486450195312, 3.2632389068603516, 3.434429168701172, 3.605619430541992, 3.7768096923828125, 3.947999954223633, 4.119190216064453, 4.290380477905273, 4.461570739746094, 4.632761001586914, 4.803950786590576, 4.9751410484313965]}, "gradients/encoder.encoder.layers.15.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 2.0, 0.0, 1.0, 1.0, 2.0, 1.0, 3.0, 6.0, 2.0, 2.0, 7.0, 7.0, 8.0, 11.0, 12.0, 19.0, 28.0, 29.0, 47.0, 72.0, 86.0, 129.0, 204.0, 277.0, 451.0, 858.0, 1653.0, 3603.0, 8975.0, 27559.0, 129690.0, 662432.0, 161923.0, 32332.0, 9860.0, 3929.0, 1850.0, 941.0, 542.0, 317.0, 226.0, 123.0, 104.0, 61.0, 60.0, 40.0, 22.0, 17.0, 15.0, 8.0, 7.0, 4.0, 4.0, 2.0, 3.0, 1.0, 1.0, 2.0, 1.0, 0.0, 2.0], "bins": [-4.05078125, -3.933563232421875, -3.81634521484375, -3.699127197265625, -3.5819091796875, -3.464691162109375, -3.34747314453125, -3.230255126953125, -3.113037109375, -2.995819091796875, -2.87860107421875, -2.761383056640625, -2.6441650390625, -2.526947021484375, -2.40972900390625, -2.292510986328125, -2.17529296875, -2.058074951171875, -1.94085693359375, -1.823638916015625, -1.7064208984375, -1.589202880859375, -1.47198486328125, -1.354766845703125, -1.237548828125, -1.120330810546875, -1.00311279296875, -0.885894775390625, -0.7686767578125, -0.651458740234375, -0.53424072265625, -0.417022705078125, -0.2998046875, -0.182586669921875, -0.06536865234375, 0.051849365234375, 0.1690673828125, 0.286285400390625, 0.40350341796875, 0.520721435546875, 0.637939453125, 0.755157470703125, 0.87237548828125, 0.989593505859375, 1.1068115234375, 1.224029541015625, 1.34124755859375, 1.458465576171875, 1.57568359375, 1.692901611328125, 1.81011962890625, 1.927337646484375, 2.0445556640625, 2.161773681640625, 2.27899169921875, 2.396209716796875, 2.513427734375, 2.630645751953125, 2.74786376953125, 2.865081787109375, 2.9822998046875, 3.099517822265625, 3.21673583984375, 3.333953857421875, 3.451171875]}, "gradients/encoder.encoder.layers.15.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 5.0, 4.0, 2.0, 3.0, 3.0, 5.0, 15.0, 14.0, 16.0, 14.0, 36.0, 44.0, 48.0, 62.0, 66.0, 73.0, 83.0, 71.0, 85.0, 81.0, 53.0, 50.0, 32.0, 36.0, 27.0, 19.0, 12.0, 16.0, 9.0, 6.0, 8.0, 6.0, 1.0, 1.0, 1.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0], "bins": [-1.2236328125, -1.1905746459960938, -1.1575164794921875, -1.1244583129882812, -1.091400146484375, -1.0583419799804688, -1.0252838134765625, -0.9922256469726562, -0.95916748046875, -0.9261093139648438, -0.8930511474609375, -0.8599929809570312, -0.826934814453125, -0.7938766479492188, -0.7608184814453125, -0.7277603149414062, -0.6947021484375, -0.6616439819335938, -0.6285858154296875, -0.5955276489257812, -0.562469482421875, -0.5294113159179688, -0.4963531494140625, -0.46329498291015625, -0.43023681640625, -0.39717864990234375, -0.3641204833984375, -0.33106231689453125, -0.298004150390625, -0.26494598388671875, -0.2318878173828125, -0.19882965087890625, -0.165771484375, -0.13271331787109375, -0.0996551513671875, -0.06659698486328125, -0.033538818359375, -0.00048065185546875, 0.0325775146484375, 0.06563568115234375, 0.09869384765625, 0.13175201416015625, 0.1648101806640625, 0.19786834716796875, 0.230926513671875, 0.26398468017578125, 0.2970428466796875, 0.33010101318359375, 0.3631591796875, 0.39621734619140625, 0.4292755126953125, 0.46233367919921875, 0.495391845703125, 0.5284500122070312, 0.5615081787109375, 0.5945663452148438, 0.62762451171875, 0.6606826782226562, 0.6937408447265625, 0.7267990112304688, 0.759857177734375, 0.7929153442382812, 0.8259735107421875, 0.8590316772460938, 0.89208984375]}, "gradients/encoder.encoder.layers.15.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 3.0, 0.0, 1.0, 0.0, 1.0, 2.0, 2.0, 0.0, 1.0, 2.0, 6.0, 0.0, 4.0, 6.0, 11.0, 11.0, 16.0, 14.0, 27.0, 51.0, 56.0, 73.0, 122.0, 164.0, 248.0, 384.0, 660.0, 1215.0, 2537.0, 5994.0, 17798.0, 64206.0, 349383.0, 481042.0, 87585.0, 22884.0, 7644.0, 2953.0, 1399.0, 724.0, 447.0, 271.0, 173.0, 117.0, 85.0, 55.0, 46.0, 31.0, 35.0, 19.0, 12.0, 14.0, 9.0, 7.0, 10.0, 3.0, 1.0, 1.0, 3.0, 5.0, 0.0, 2.0], "bins": [-3.005859375, -2.92022705078125, -2.8345947265625, -2.74896240234375, -2.663330078125, -2.57769775390625, -2.4920654296875, -2.40643310546875, -2.32080078125, -2.23516845703125, -2.1495361328125, -2.06390380859375, -1.978271484375, -1.89263916015625, -1.8070068359375, -1.72137451171875, -1.6357421875, -1.55010986328125, -1.4644775390625, -1.37884521484375, -1.293212890625, -1.20758056640625, -1.1219482421875, -1.03631591796875, -0.95068359375, -0.86505126953125, -0.7794189453125, -0.69378662109375, -0.608154296875, -0.52252197265625, -0.4368896484375, -0.35125732421875, -0.265625, -0.17999267578125, -0.0943603515625, -0.00872802734375, 0.076904296875, 0.16253662109375, 0.2481689453125, 0.33380126953125, 0.41943359375, 0.50506591796875, 0.5906982421875, 0.67633056640625, 0.761962890625, 0.84759521484375, 0.9332275390625, 1.01885986328125, 1.1044921875, 1.19012451171875, 1.2757568359375, 1.36138916015625, 1.447021484375, 1.53265380859375, 1.6182861328125, 1.70391845703125, 1.78955078125, 1.87518310546875, 1.9608154296875, 2.04644775390625, 2.132080078125, 2.21771240234375, 2.3033447265625, 2.38897705078125, 2.474609375]}, "gradients/encoder.encoder.layers.15.attention.v_proj.bias": {"_type": "histogram", "values": [3.0, 1.0, 0.0, 1.0, 2.0, 2.0, 1.0, 3.0, 3.0, 7.0, 4.0, 4.0, 3.0, 12.0, 11.0, 11.0, 20.0, 8.0, 18.0, 32.0, 27.0, 29.0, 31.0, 32.0, 34.0, 37.0, 37.0, 52.0, 37.0, 36.0, 40.0, 34.0, 38.0, 41.0, 32.0, 34.0, 37.0, 39.0, 30.0, 24.0, 18.0, 23.0, 23.0, 17.0, 16.0, 18.0, 6.0, 11.0, 10.0, 4.0, 6.0, 4.0, 3.0, 4.0, 5.0, 0.0, 3.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0], "bins": [-2.541015625, -2.457489013671875, -2.37396240234375, -2.290435791015625, -2.2069091796875, -2.123382568359375, -2.03985595703125, -1.956329345703125, -1.872802734375, -1.789276123046875, -1.70574951171875, -1.622222900390625, -1.5386962890625, -1.455169677734375, -1.37164306640625, -1.288116455078125, -1.20458984375, -1.121063232421875, -1.03753662109375, -0.954010009765625, -0.8704833984375, -0.786956787109375, -0.70343017578125, -0.619903564453125, -0.536376953125, -0.452850341796875, -0.36932373046875, -0.285797119140625, -0.2022705078125, -0.118743896484375, -0.03521728515625, 0.048309326171875, 0.1318359375, 0.215362548828125, 0.29888916015625, 0.382415771484375, 0.4659423828125, 0.549468994140625, 0.63299560546875, 0.716522216796875, 0.800048828125, 0.883575439453125, 0.96710205078125, 1.050628662109375, 1.1341552734375, 1.217681884765625, 1.30120849609375, 1.384735107421875, 1.46826171875, 1.551788330078125, 1.63531494140625, 1.718841552734375, 1.8023681640625, 1.885894775390625, 1.96942138671875, 2.052947998046875, 2.136474609375, 2.220001220703125, 2.30352783203125, 2.387054443359375, 2.4705810546875, 2.554107666015625, 2.63763427734375, 2.721160888671875, 2.8046875]}, "gradients/encoder.encoder.layers.15.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 2.0, 1.0, 4.0, 4.0, 7.0, 5.0, 18.0, 15.0, 20.0, 38.0, 72.0, 144.0, 275.0, 609.0, 1814.0, 8160.0, 75054.0, 851853.0, 97552.0, 9515.0, 2019.0, 680.0, 323.0, 148.0, 102.0, 40.0, 31.0, 19.0, 9.0, 8.0, 7.0, 5.0, 6.0, 2.0, 4.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.4677734375, -1.41448974609375, -1.3612060546875, -1.30792236328125, -1.254638671875, -1.20135498046875, -1.1480712890625, -1.09478759765625, -1.04150390625, -0.98822021484375, -0.9349365234375, -0.88165283203125, -0.828369140625, -0.77508544921875, -0.7218017578125, -0.66851806640625, -0.615234375, -0.56195068359375, -0.5086669921875, -0.45538330078125, -0.402099609375, -0.34881591796875, -0.2955322265625, -0.24224853515625, -0.18896484375, -0.13568115234375, -0.0823974609375, -0.02911376953125, 0.024169921875, 0.07745361328125, 0.1307373046875, 0.18402099609375, 0.2373046875, 0.29058837890625, 0.3438720703125, 0.39715576171875, 0.450439453125, 0.50372314453125, 0.5570068359375, 0.61029052734375, 0.66357421875, 0.71685791015625, 0.7701416015625, 0.82342529296875, 0.876708984375, 0.92999267578125, 0.9832763671875, 1.03656005859375, 1.08984375, 1.14312744140625, 1.1964111328125, 1.24969482421875, 1.302978515625, 1.35626220703125, 1.4095458984375, 1.46282958984375, 1.51611328125, 1.56939697265625, 1.6226806640625, 1.67596435546875, 1.729248046875, 1.78253173828125, 1.8358154296875, 1.88909912109375, 1.9423828125]}, "gradients/encoder.encoder.layers.15.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 4.0, 0.0, 2.0, 5.0, 5.0, 6.0, 2.0, 11.0, 8.0, 9.0, 16.0, 17.0, 34.0, 53.0, 57.0, 101.0, 144.0, 171.0, 121.0, 74.0, 43.0, 37.0, 18.0, 15.0, 11.0, 10.0, 6.0, 7.0, 6.0, 6.0, 2.0, 1.0, 3.0, 3.0, 4.0, 2.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0], "bins": [-0.0002048015594482422, -0.00019959546625614166, -0.00019438937306404114, -0.0001891832798719406, -0.0001839771866798401, -0.00017877109348773956, -0.00017356500029563904, -0.0001683589071035385, -0.000163152813911438, -0.00015794672071933746, -0.00015274062752723694, -0.00014753453433513641, -0.0001423284411430359, -0.00013712234795093536, -0.00013191625475883484, -0.00012671016156673431, -0.00012150406837463379, -0.00011629797518253326, -0.00011109188199043274, -0.00010588578879833221, -0.00010067969560623169, -9.547360241413116e-05, -9.026750922203064e-05, -8.506141602993011e-05, -7.985532283782959e-05, -7.464922964572906e-05, -6.944313645362854e-05, -6.423704326152802e-05, -5.903095006942749e-05, -5.3824856877326965e-05, -4.861876368522644e-05, -4.3412670493125916e-05, -3.820657730102539e-05, -3.3000484108924866e-05, -2.779439091682434e-05, -2.2588297724723816e-05, -1.738220453262329e-05, -1.2176111340522766e-05, -6.970018148422241e-06, -1.7639249563217163e-06, 3.4421682357788086e-06, 8.648261427879333e-06, 1.3854354619979858e-05, 1.9060447812080383e-05, 2.4266541004180908e-05, 2.9472634196281433e-05, 3.467872738838196e-05, 3.988482058048248e-05, 4.509091377258301e-05, 5.029700696468353e-05, 5.550310015678406e-05, 6.070919334888458e-05, 6.591528654098511e-05, 7.112137973308563e-05, 7.632747292518616e-05, 8.153356611728668e-05, 8.673965930938721e-05, 9.194575250148773e-05, 9.715184569358826e-05, 0.00010235793888568878, 0.0001075640320777893, 0.00011277012526988983, 0.00011797621846199036, 0.00012318231165409088, 0.0001283884048461914]}, "gradients/encoder.encoder.layers.15.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 2.0, 0.0, 0.0, 4.0, 2.0, 4.0, 9.0, 7.0, 11.0, 33.0, 52.0, 123.0, 244.0, 1035.0, 6589.0, 149965.0, 868278.0, 19436.0, 2031.0, 426.0, 169.0, 66.0, 28.0, 16.0, 12.0, 7.0, 2.0, 4.0, 1.0, 4.0, 1.0, 3.0, 0.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.15234375, -2.06695556640625, -1.9815673828125, -1.89617919921875, -1.810791015625, -1.72540283203125, -1.6400146484375, -1.55462646484375, -1.46923828125, -1.38385009765625, -1.2984619140625, -1.21307373046875, -1.127685546875, -1.04229736328125, -0.9569091796875, -0.87152099609375, -0.7861328125, -0.70074462890625, -0.6153564453125, -0.52996826171875, -0.444580078125, -0.35919189453125, -0.2738037109375, -0.18841552734375, -0.10302734375, -0.01763916015625, 0.0677490234375, 0.15313720703125, 0.238525390625, 0.32391357421875, 0.4093017578125, 0.49468994140625, 0.580078125, 0.66546630859375, 0.7508544921875, 0.83624267578125, 0.921630859375, 1.00701904296875, 1.0924072265625, 1.17779541015625, 1.26318359375, 1.34857177734375, 1.4339599609375, 1.51934814453125, 1.604736328125, 1.69012451171875, 1.7755126953125, 1.86090087890625, 1.9462890625, 2.03167724609375, 2.1170654296875, 2.20245361328125, 2.287841796875, 2.37322998046875, 2.4586181640625, 2.54400634765625, 2.62939453125, 2.71478271484375, 2.8001708984375, 2.88555908203125, 2.970947265625, 3.05633544921875, 3.1417236328125, 3.22711181640625, 3.3125]}, "gradients/encoder.encoder.layers.15.attention.q_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 2.0, 1.0, 0.0, 3.0, 0.0, 1.0, 4.0, 5.0, 5.0, 3.0, 3.0, 4.0, 4.0, 7.0, 15.0, 8.0, 22.0, 17.0, 15.0, 19.0, 40.0, 72.0, 82.0, 117.0, 140.0, 104.0, 87.0, 46.0, 39.0, 33.0, 28.0, 15.0, 12.0, 10.0, 10.0, 8.0, 6.0, 2.0, 6.0, 4.0, 5.0, 5.0, 2.0, 0.0, 0.0, 1.0, 4.0, 0.0, 0.0, 1.0, 0.0, 0.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.8515625, -0.819366455078125, -0.78717041015625, -0.754974365234375, -0.7227783203125, -0.690582275390625, -0.65838623046875, -0.626190185546875, -0.593994140625, -0.561798095703125, -0.52960205078125, -0.497406005859375, -0.4652099609375, -0.433013916015625, -0.40081787109375, -0.368621826171875, -0.33642578125, -0.304229736328125, -0.27203369140625, -0.239837646484375, -0.2076416015625, -0.175445556640625, -0.14324951171875, -0.111053466796875, -0.078857421875, -0.046661376953125, -0.01446533203125, 0.017730712890625, 0.0499267578125, 0.082122802734375, 0.11431884765625, 0.146514892578125, 0.1787109375, 0.210906982421875, 0.24310302734375, 0.275299072265625, 0.3074951171875, 0.339691162109375, 0.37188720703125, 0.404083251953125, 0.436279296875, 0.468475341796875, 0.50067138671875, 0.532867431640625, 0.5650634765625, 0.597259521484375, 0.62945556640625, 0.661651611328125, 0.69384765625, 0.726043701171875, 0.75823974609375, 0.790435791015625, 0.8226318359375, 0.854827880859375, 0.88702392578125, 0.919219970703125, 0.951416015625, 0.983612060546875, 1.01580810546875, 1.048004150390625, 1.0802001953125, 1.112396240234375, 1.14459228515625, 1.176788330078125, 1.208984375]}, "gradients/encoder.encoder.layers.15.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 4.0, 6.0, 105.0, 739.0, 157.0, 7.0, 2.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-193.56809997558594, -190.08590698242188, -186.6037139892578, -183.1215362548828, -179.63934326171875, -176.1571502685547, -172.67495727539062, -169.19277954101562, -165.71058654785156, -162.2283935546875, -158.74620056152344, -155.26402282714844, -151.78182983398438, -148.2996368408203, -144.81744384765625, -141.33526611328125, -137.85305786132812, -134.37086486816406, -130.888671875, -127.40648651123047, -123.92430114746094, -120.44210815429688, -116.95991516113281, -113.47772979736328, -109.99554443359375, -106.51335144042969, -103.03116607666016, -99.5489730834961, -96.06678771972656, -92.5845947265625, -89.10240173339844, -85.6202163696289, -82.13803100585938, -78.65583801269531, -75.17365264892578, -71.69145965576172, -68.20927429199219, -64.72708129882812, -61.24489212036133, -57.76270294189453, -54.280513763427734, -50.79832458496094, -47.31613540649414, -43.833946228027344, -40.35175323486328, -36.86956787109375, -33.38737487792969, -29.90518569946289, -26.422996520996094, -22.940807342529297, -19.4586181640625, -15.97642707824707, -12.494237899780273, -9.012048721313477, -5.529857635498047, -2.04766845703125, 1.4345207214355469, 4.916710376739502, 8.398900032043457, 11.88109016418457, 15.363279342651367, 18.845468521118164, 22.327659606933594, 25.80984878540039, 29.292037963867188]}, "gradients/encoder.encoder.layers.15.layer_norm.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 2.0, 0.0, 5.0, 5.0, 5.0, 6.0, 4.0, 6.0, 9.0, 12.0, 9.0, 17.0, 15.0, 26.0, 31.0, 41.0, 33.0, 34.0, 46.0, 32.0, 33.0, 51.0, 51.0, 33.0, 36.0, 34.0, 45.0, 44.0, 41.0, 43.0, 42.0, 29.0, 43.0, 31.0, 24.0, 19.0, 13.0, 12.0, 10.0, 8.0, 9.0, 10.0, 1.0, 5.0, 3.0, 2.0, 1.0, 2.0, 1.0, 1.0, 2.0, 1.0, 0.0, 1.0], "bins": [-16.28242301940918, -15.79780101776123, -15.313179016113281, -14.828557014465332, -14.343935012817383, -13.859313011169434, -13.374691009521484, -12.890069007873535, -12.405447006225586, -11.920825004577637, -11.436203002929688, -10.951581001281738, -10.466958999633789, -9.98233699798584, -9.49771499633789, -9.013092994689941, -8.528470993041992, -8.043848991394043, -7.559226989746094, -7.0746049880981445, -6.589982986450195, -6.105360984802246, -5.620738983154297, -5.136116981506348, -4.651494979858398, -4.166872978210449, -3.6822509765625, -3.197628974914551, -2.7130069732666016, -2.2283849716186523, -1.7437629699707031, -1.259140968322754, -0.7745189666748047, -0.28989696502685547, 0.19472503662109375, 0.679347038269043, 1.1639690399169922, 1.6485910415649414, 2.1332130432128906, 2.61783504486084, 3.102457046508789, 3.5870790481567383, 4.0717010498046875, 4.556323051452637, 5.040945053100586, 5.525567054748535, 6.010189056396484, 6.494811058044434, 6.979433059692383, 7.464055061340332, 7.948677062988281, 8.43329906463623, 8.91792106628418, 9.402543067932129, 9.887165069580078, 10.371787071228027, 10.856409072875977, 11.341031074523926, 11.825653076171875, 12.310275077819824, 12.794897079467773, 13.279519081115723, 13.764141082763672, 14.248763084411621, 14.73338508605957]}, "gradients/encoder.encoder.layers.14.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 3.0, 0.0, 0.0, 2.0, 5.0, 6.0, 13.0, 8.0, 9.0, 15.0, 18.0, 19.0, 32.0, 52.0, 77.0, 95.0, 203.0, 305.0, 672.0, 1369.0, 3233.0, 9966.0, 47879.0, 3569763.0, 516230.0, 31449.0, 7596.0, 2767.0, 1184.0, 558.0, 305.0, 169.0, 80.0, 73.0, 41.0, 24.0, 19.0, 22.0, 12.0, 2.0, 6.0, 4.0, 2.0, 4.0, 0.0, 3.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.31640625, -3.21240234375, -3.1083984375, -3.00439453125, -2.900390625, -2.79638671875, -2.6923828125, -2.58837890625, -2.484375, -2.38037109375, -2.2763671875, -2.17236328125, -2.068359375, -1.96435546875, -1.8603515625, -1.75634765625, -1.65234375, -1.54833984375, -1.4443359375, -1.34033203125, -1.236328125, -1.13232421875, -1.0283203125, -0.92431640625, -0.8203125, -0.71630859375, -0.6123046875, -0.50830078125, -0.404296875, -0.30029296875, -0.1962890625, -0.09228515625, 0.01171875, 0.11572265625, 0.2197265625, 0.32373046875, 0.427734375, 0.53173828125, 0.6357421875, 0.73974609375, 0.84375, 0.94775390625, 1.0517578125, 1.15576171875, 1.259765625, 1.36376953125, 1.4677734375, 1.57177734375, 1.67578125, 1.77978515625, 1.8837890625, 1.98779296875, 2.091796875, 2.19580078125, 2.2998046875, 2.40380859375, 2.5078125, 2.61181640625, 2.7158203125, 2.81982421875, 2.923828125, 3.02783203125, 3.1318359375, 3.23583984375, 3.33984375]}, "gradients/encoder.encoder.layers.14.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 3.0, 0.0, 2.0, 3.0, 3.0, 4.0, 6.0, 8.0, 11.0, 11.0, 24.0, 23.0, 25.0, 52.0, 52.0, 54.0, 70.0, 68.0, 82.0, 81.0, 75.0, 64.0, 66.0, 42.0, 47.0, 33.0, 23.0, 28.0, 11.0, 11.0, 9.0, 8.0, 6.0, 3.0, 0.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0], "bins": [-1.3603515625, -1.3234939575195312, -1.2866363525390625, -1.2497787475585938, -1.212921142578125, -1.1760635375976562, -1.1392059326171875, -1.1023483276367188, -1.06549072265625, -1.0286331176757812, -0.9917755126953125, -0.9549179077148438, -0.918060302734375, -0.8812026977539062, -0.8443450927734375, -0.8074874877929688, -0.7706298828125, -0.7337722778320312, -0.6969146728515625, -0.6600570678710938, -0.623199462890625, -0.5863418579101562, -0.5494842529296875, -0.5126266479492188, -0.47576904296875, -0.43891143798828125, -0.4020538330078125, -0.36519622802734375, -0.328338623046875, -0.29148101806640625, -0.2546234130859375, -0.21776580810546875, -0.180908203125, -0.14405059814453125, -0.1071929931640625, -0.07033538818359375, -0.033477783203125, 0.00337982177734375, 0.0402374267578125, 0.07709503173828125, 0.11395263671875, 0.15081024169921875, 0.1876678466796875, 0.22452545166015625, 0.261383056640625, 0.29824066162109375, 0.3350982666015625, 0.37195587158203125, 0.4088134765625, 0.44567108154296875, 0.4825286865234375, 0.5193862915039062, 0.556243896484375, 0.5931015014648438, 0.6299591064453125, 0.6668167114257812, 0.70367431640625, 0.7405319213867188, 0.7773895263671875, 0.8142471313476562, 0.851104736328125, 0.8879623413085938, 0.9248199462890625, 0.9616775512695312, 0.99853515625]}, "gradients/encoder.encoder.layers.14.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 1.0, 0.0, 2.0, 1.0, 5.0, 2.0, 4.0, 6.0, 7.0, 16.0, 25.0, 48.0, 59.0, 106.0, 224.0, 581.0, 2359.0, 16270.0, 1309433.0, 2844499.0, 17278.0, 2365.0, 568.0, 207.0, 100.0, 54.0, 30.0, 18.0, 11.0, 7.0, 2.0, 1.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.15234375, -3.9637451171875, -3.775146484375, -3.5865478515625, -3.39794921875, -3.2093505859375, -3.020751953125, -2.8321533203125, -2.6435546875, -2.4549560546875, -2.266357421875, -2.0777587890625, -1.88916015625, -1.7005615234375, -1.511962890625, -1.3233642578125, -1.134765625, -0.9461669921875, -0.757568359375, -0.5689697265625, -0.38037109375, -0.1917724609375, -0.003173828125, 0.1854248046875, 0.3740234375, 0.5626220703125, 0.751220703125, 0.9398193359375, 1.12841796875, 1.3170166015625, 1.505615234375, 1.6942138671875, 1.8828125, 2.0714111328125, 2.260009765625, 2.4486083984375, 2.63720703125, 2.8258056640625, 3.014404296875, 3.2030029296875, 3.3916015625, 3.5802001953125, 3.768798828125, 3.9573974609375, 4.14599609375, 4.3345947265625, 4.523193359375, 4.7117919921875, 4.900390625, 5.0889892578125, 5.277587890625, 5.4661865234375, 5.65478515625, 5.8433837890625, 6.031982421875, 6.2205810546875, 6.4091796875, 6.5977783203125, 6.786376953125, 6.9749755859375, 7.16357421875, 7.3521728515625, 7.540771484375, 7.7293701171875, 7.91796875]}, "gradients/encoder.encoder.layers.14.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 3.0, 2.0, 2.0, 3.0, 5.0, 2.0, 14.0, 13.0, 25.0, 34.0, 88.0, 219.0, 1616.0, 1696.0, 203.0, 74.0, 41.0, 21.0, 13.0, 5.0, 6.0, 0.0, 1.0, 2.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.591796875, -2.527008056640625, -2.46221923828125, -2.397430419921875, -2.3326416015625, -2.267852783203125, -2.20306396484375, -2.138275146484375, -2.073486328125, -2.008697509765625, -1.94390869140625, -1.879119873046875, -1.8143310546875, -1.749542236328125, -1.68475341796875, -1.619964599609375, -1.55517578125, -1.490386962890625, -1.42559814453125, -1.360809326171875, -1.2960205078125, -1.231231689453125, -1.16644287109375, -1.101654052734375, -1.036865234375, -0.972076416015625, -0.90728759765625, -0.842498779296875, -0.7777099609375, -0.712921142578125, -0.64813232421875, -0.583343505859375, -0.5185546875, -0.453765869140625, -0.38897705078125, -0.324188232421875, -0.2593994140625, -0.194610595703125, -0.12982177734375, -0.065032958984375, -0.000244140625, 0.064544677734375, 0.12933349609375, 0.194122314453125, 0.2589111328125, 0.323699951171875, 0.38848876953125, 0.453277587890625, 0.51806640625, 0.582855224609375, 0.64764404296875, 0.712432861328125, 0.7772216796875, 0.842010498046875, 0.90679931640625, 0.971588134765625, 1.036376953125, 1.101165771484375, 1.16595458984375, 1.230743408203125, 1.2955322265625, 1.360321044921875, 1.42510986328125, 1.489898681640625, 1.5546875]}, "gradients/encoder.encoder.layers.14.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 1.0, 5.0, 7.0, 13.0, 43.0, 111.0, 236.0, 305.0, 183.0, 73.0, 18.0, 10.0, 3.0, 2.0, 1.0, 3.0], "bins": [-21.966527938842773, -21.569799423217773, -21.173070907592773, -20.776342391967773, -20.379613876342773, -19.98288345336914, -19.58615493774414, -19.18942642211914, -18.79269790649414, -18.39596939086914, -17.99924087524414, -17.60251235961914, -17.20578384399414, -16.809053421020508, -16.412324905395508, -16.015596389770508, -15.618867874145508, -15.222139358520508, -14.825410842895508, -14.428681373596191, -14.031952857971191, -13.635224342346191, -13.238495826721191, -12.841766357421875, -12.445038795471191, -12.048310279846191, -11.651581764221191, -11.254852294921875, -10.858123779296875, -10.461395263671875, -10.064666748046875, -9.667938232421875, -9.271209716796875, -8.874481201171875, -8.477752685546875, -8.081023216247559, -7.684294700622559, -7.287566184997559, -6.890837669372559, -6.4941086769104, -6.097379684448242, -5.700651168823242, -5.303922176361084, -4.907193660736084, -4.510464668273926, -4.113736152648926, -3.7170073986053467, -3.3202786445617676, -2.9235496520996094, -2.5268208980560303, -2.130092144012451, -1.7333635091781616, -1.3366347551345825, -0.9399060010910034, -0.5431773662567139, -0.14644861221313477, 0.25028014183044434, 0.6470088958740234, 1.0437376499176025, 1.440466284751892, 1.8371950387954712, 2.23392391204834, 2.63065242767334, 3.027381181716919, 3.424109935760498]}, "gradients/encoder.encoder.layers.14.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 2.0, 3.0, 1.0, 2.0, 2.0, 4.0, 4.0, 4.0, 11.0, 13.0, 6.0, 14.0, 15.0, 27.0, 24.0, 32.0, 32.0, 44.0, 41.0, 50.0, 36.0, 66.0, 46.0, 54.0, 40.0, 40.0, 59.0, 36.0, 45.0, 27.0, 30.0, 29.0, 28.0, 29.0, 17.0, 24.0, 19.0, 8.0, 9.0, 9.0, 7.0, 4.0, 7.0, 7.0, 3.0, 1.0, 4.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0], "bins": [-4.06730842590332, -3.9501895904541016, -3.833070755004883, -3.715952157974243, -3.5988333225250244, -3.4817144870758057, -3.364595890045166, -3.2474770545959473, -3.1303582191467285, -3.0132393836975098, -2.896120548248291, -2.7790019512176514, -2.6618831157684326, -2.544764280319214, -2.427645683288574, -2.3105268478393555, -2.1934080123901367, -2.076289176940918, -1.9591704607009888, -1.8420517444610596, -1.7249329090118408, -1.607814073562622, -1.4906953573226929, -1.3735766410827637, -1.256457805633545, -1.1393389701843262, -1.022220253944397, -0.905101478099823, -0.787982702255249, -0.670863926410675, -0.5537451505661011, -0.4366263747215271, -0.3195078372955322, -0.20238906145095825, -0.08527028560638428, 0.0318484902381897, 0.14896726608276367, 0.26608604192733765, 0.3832048177719116, 0.5003235936164856, 0.6174423694610596, 0.7345611453056335, 0.8516799211502075, 0.9687986969947815, 1.0859174728393555, 1.2030363082885742, 1.3201550245285034, 1.4372737407684326, 1.5543925762176514, 1.6715114116668701, 1.7886301279067993, 1.9057488441467285, 2.0228676795959473, 2.139986515045166, 2.2571053504943848, 2.3742239475250244, 2.491342782974243, 2.608461618423462, 2.7255802154541016, 2.8426990509033203, 2.959817886352539, 3.076936721801758, 3.1940555572509766, 3.311174154281616, 3.428292989730835]}, "gradients/encoder.encoder.layers.14.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 3.0, 1.0, 1.0, 1.0, 1.0, 3.0, 5.0, 5.0, 8.0, 13.0, 17.0, 25.0, 32.0, 60.0, 72.0, 123.0, 159.0, 284.0, 484.0, 1003.0, 2137.0, 6409.0, 25381.0, 151599.0, 675845.0, 149219.0, 25035.0, 6271.0, 2128.0, 967.0, 510.0, 257.0, 175.0, 114.0, 94.0, 40.0, 25.0, 22.0, 14.0, 6.0, 8.0, 3.0, 4.0, 4.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-4.484375, -4.34637451171875, -4.2083740234375, -4.07037353515625, -3.932373046875, -3.79437255859375, -3.6563720703125, -3.51837158203125, -3.38037109375, -3.24237060546875, -3.1043701171875, -2.96636962890625, -2.828369140625, -2.69036865234375, -2.5523681640625, -2.41436767578125, -2.2763671875, -2.13836669921875, -2.0003662109375, -1.86236572265625, -1.724365234375, -1.58636474609375, -1.4483642578125, -1.31036376953125, -1.17236328125, -1.03436279296875, -0.8963623046875, -0.75836181640625, -0.620361328125, -0.48236083984375, -0.3443603515625, -0.20635986328125, -0.068359375, 0.06964111328125, 0.2076416015625, 0.34564208984375, 0.483642578125, 0.62164306640625, 0.7596435546875, 0.89764404296875, 1.03564453125, 1.17364501953125, 1.3116455078125, 1.44964599609375, 1.587646484375, 1.72564697265625, 1.8636474609375, 2.00164794921875, 2.1396484375, 2.27764892578125, 2.4156494140625, 2.55364990234375, 2.691650390625, 2.82965087890625, 2.9676513671875, 3.10565185546875, 3.24365234375, 3.38165283203125, 3.5196533203125, 3.65765380859375, 3.795654296875, 3.93365478515625, 4.0716552734375, 4.20965576171875, 4.34765625]}, "gradients/encoder.encoder.layers.14.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 3.0, 3.0, 4.0, 2.0, 5.0, 9.0, 10.0, 16.0, 26.0, 25.0, 38.0, 38.0, 51.0, 60.0, 66.0, 64.0, 84.0, 97.0, 63.0, 59.0, 68.0, 57.0, 43.0, 25.0, 23.0, 15.0, 18.0, 11.0, 6.0, 7.0, 6.0, 5.0, 0.0, 1.0, 3.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.349609375, -1.311859130859375, -1.27410888671875, -1.236358642578125, -1.1986083984375, -1.160858154296875, -1.12310791015625, -1.085357666015625, -1.047607421875, -1.009857177734375, -0.97210693359375, -0.934356689453125, -0.8966064453125, -0.858856201171875, -0.82110595703125, -0.783355712890625, -0.74560546875, -0.707855224609375, -0.67010498046875, -0.632354736328125, -0.5946044921875, -0.556854248046875, -0.51910400390625, -0.481353759765625, -0.443603515625, -0.405853271484375, -0.36810302734375, -0.330352783203125, -0.2926025390625, -0.254852294921875, -0.21710205078125, -0.179351806640625, -0.1416015625, -0.103851318359375, -0.06610107421875, -0.028350830078125, 0.0093994140625, 0.047149658203125, 0.08489990234375, 0.122650146484375, 0.160400390625, 0.198150634765625, 0.23590087890625, 0.273651123046875, 0.3114013671875, 0.349151611328125, 0.38690185546875, 0.424652099609375, 0.46240234375, 0.500152587890625, 0.53790283203125, 0.575653076171875, 0.6134033203125, 0.651153564453125, 0.68890380859375, 0.726654052734375, 0.764404296875, 0.802154541015625, 0.83990478515625, 0.877655029296875, 0.9154052734375, 0.953155517578125, 0.99090576171875, 1.028656005859375, 1.06640625]}, "gradients/encoder.encoder.layers.14.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 4.0, 1.0, 3.0, 3.0, 7.0, 9.0, 12.0, 11.0, 21.0, 23.0, 39.0, 42.0, 64.0, 109.0, 162.0, 224.0, 440.0, 892.0, 1986.0, 5381.0, 17006.0, 70601.0, 478360.0, 389609.0, 60125.0, 14945.0, 4709.0, 1802.0, 811.0, 446.0, 264.0, 135.0, 92.0, 65.0, 39.0, 36.0, 26.0, 17.0, 6.0, 8.0, 12.0, 3.0, 5.0, 3.0, 2.0, 3.0, 1.0, 1.0, 1.0, 0.0, 3.0, 1.0, 1.0, 0.0, 1.0], "bins": [-3.5625, -3.45098876953125, -3.3394775390625, -3.22796630859375, -3.116455078125, -3.00494384765625, -2.8934326171875, -2.78192138671875, -2.67041015625, -2.55889892578125, -2.4473876953125, -2.33587646484375, -2.224365234375, -2.11285400390625, -2.0013427734375, -1.88983154296875, -1.7783203125, -1.66680908203125, -1.5552978515625, -1.44378662109375, -1.332275390625, -1.22076416015625, -1.1092529296875, -0.99774169921875, -0.88623046875, -0.77471923828125, -0.6632080078125, -0.55169677734375, -0.440185546875, -0.32867431640625, -0.2171630859375, -0.10565185546875, 0.005859375, 0.11737060546875, 0.2288818359375, 0.34039306640625, 0.451904296875, 0.56341552734375, 0.6749267578125, 0.78643798828125, 0.89794921875, 1.00946044921875, 1.1209716796875, 1.23248291015625, 1.343994140625, 1.45550537109375, 1.5670166015625, 1.67852783203125, 1.7900390625, 1.90155029296875, 2.0130615234375, 2.12457275390625, 2.236083984375, 2.34759521484375, 2.4591064453125, 2.57061767578125, 2.68212890625, 2.79364013671875, 2.9051513671875, 3.01666259765625, 3.128173828125, 3.23968505859375, 3.3511962890625, 3.46270751953125, 3.57421875]}, "gradients/encoder.encoder.layers.14.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 5.0, 2.0, 1.0, 3.0, 11.0, 9.0, 8.0, 10.0, 12.0, 16.0, 17.0, 21.0, 18.0, 13.0, 25.0, 29.0, 36.0, 51.0, 43.0, 54.0, 60.0, 42.0, 49.0, 36.0, 34.0, 48.0, 47.0, 39.0, 36.0, 47.0, 24.0, 21.0, 21.0, 14.0, 20.0, 13.0, 22.0, 11.0, 10.0, 8.0, 8.0, 6.0, 6.0, 2.0, 2.0, 1.0, 1.0, 0.0, 1.0, 3.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-3.021484375, -2.9207763671875, -2.820068359375, -2.7193603515625, -2.61865234375, -2.5179443359375, -2.417236328125, -2.3165283203125, -2.2158203125, -2.1151123046875, -2.014404296875, -1.9136962890625, -1.81298828125, -1.7122802734375, -1.611572265625, -1.5108642578125, -1.41015625, -1.3094482421875, -1.208740234375, -1.1080322265625, -1.00732421875, -0.9066162109375, -0.805908203125, -0.7052001953125, -0.6044921875, -0.5037841796875, -0.403076171875, -0.3023681640625, -0.20166015625, -0.1009521484375, -0.000244140625, 0.1004638671875, 0.201171875, 0.3018798828125, 0.402587890625, 0.5032958984375, 0.60400390625, 0.7047119140625, 0.805419921875, 0.9061279296875, 1.0068359375, 1.1075439453125, 1.208251953125, 1.3089599609375, 1.40966796875, 1.5103759765625, 1.611083984375, 1.7117919921875, 1.8125, 1.9132080078125, 2.013916015625, 2.1146240234375, 2.21533203125, 2.3160400390625, 2.416748046875, 2.5174560546875, 2.6181640625, 2.7188720703125, 2.819580078125, 2.9202880859375, 3.02099609375, 3.1217041015625, 3.222412109375, 3.3231201171875, 3.423828125]}, "gradients/encoder.encoder.layers.14.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 3.0, 2.0, 3.0, 6.0, 10.0, 12.0, 19.0, 30.0, 51.0, 122.0, 266.0, 772.0, 3127.0, 24526.0, 743335.0, 261891.0, 11425.0, 1994.0, 578.0, 200.0, 78.0, 54.0, 20.0, 14.0, 7.0, 7.0, 3.0, 3.0, 2.0, 3.0, 1.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.7607421875, -1.7036590576171875, -1.646575927734375, -1.5894927978515625, -1.53240966796875, -1.4753265380859375, -1.418243408203125, -1.3611602783203125, -1.3040771484375, -1.2469940185546875, -1.189910888671875, -1.1328277587890625, -1.07574462890625, -1.0186614990234375, -0.961578369140625, -0.9044952392578125, -0.847412109375, -0.7903289794921875, -0.733245849609375, -0.6761627197265625, -0.61907958984375, -0.5619964599609375, -0.504913330078125, -0.4478302001953125, -0.3907470703125, -0.3336639404296875, -0.276580810546875, -0.2194976806640625, -0.16241455078125, -0.1053314208984375, -0.048248291015625, 0.0088348388671875, 0.06591796875, 0.1230010986328125, 0.180084228515625, 0.2371673583984375, 0.29425048828125, 0.3513336181640625, 0.408416748046875, 0.4654998779296875, 0.5225830078125, 0.5796661376953125, 0.636749267578125, 0.6938323974609375, 0.75091552734375, 0.8079986572265625, 0.865081787109375, 0.9221649169921875, 0.979248046875, 1.0363311767578125, 1.093414306640625, 1.1504974365234375, 1.20758056640625, 1.2646636962890625, 1.321746826171875, 1.3788299560546875, 1.4359130859375, 1.4929962158203125, 1.550079345703125, 1.6071624755859375, 1.66424560546875, 1.7213287353515625, 1.778411865234375, 1.8354949951171875, 1.892578125]}, "gradients/encoder.encoder.layers.14.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 2.0, 1.0, 1.0, 2.0, 2.0, 3.0, 2.0, 3.0, 2.0, 11.0, 9.0, 7.0, 10.0, 24.0, 36.0, 49.0, 59.0, 105.0, 149.0, 164.0, 124.0, 89.0, 42.0, 30.0, 34.0, 16.0, 5.0, 6.0, 7.0, 3.0, 4.0, 7.0, 2.0, 0.0, 0.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0], "bins": [-0.00022649765014648438, -0.00022042728960514069, -0.000214356929063797, -0.0002082865685224533, -0.00020221620798110962, -0.00019614584743976593, -0.00019007548689842224, -0.00018400512635707855, -0.00017793476581573486, -0.00017186440527439117, -0.00016579404473304749, -0.0001597236841917038, -0.0001536533236503601, -0.00014758296310901642, -0.00014151260256767273, -0.00013544224202632904, -0.00012937188148498535, -0.00012330152094364166, -0.00011723116040229797, -0.00011116079986095428, -0.0001050904393196106, -9.90200787782669e-05, -9.294971823692322e-05, -8.687935769557953e-05, -8.080899715423584e-05, -7.473863661289215e-05, -6.866827607154846e-05, -6.259791553020477e-05, -5.6527554988861084e-05, -5.0457194447517395e-05, -4.4386833906173706e-05, -3.831647336483002e-05, -3.224611282348633e-05, -2.617575228214264e-05, -2.010539174079895e-05, -1.4035031199455261e-05, -7.964670658111572e-06, -1.8943101167678833e-06, 4.176050424575806e-06, 1.0246410965919495e-05, 1.6316771507263184e-05, 2.2387132048606873e-05, 2.845749258995056e-05, 3.452785313129425e-05, 4.059821367263794e-05, 4.666857421398163e-05, 5.273893475532532e-05, 5.8809295296669006e-05, 6.48796558380127e-05, 7.095001637935638e-05, 7.702037692070007e-05, 8.309073746204376e-05, 8.916109800338745e-05, 9.523145854473114e-05, 0.00010130181908607483, 0.00010737217962741852, 0.00011344254016876221, 0.0001195129007101059, 0.00012558326125144958, 0.00013165362179279327, 0.00013772398233413696, 0.00014379434287548065, 0.00014986470341682434, 0.00015593506395816803, 0.00016200542449951172]}, "gradients/encoder.encoder.layers.14.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 2.0, 0.0, 0.0, 1.0, 3.0, 5.0, 3.0, 4.0, 10.0, 14.0, 31.0, 66.0, 137.0, 357.0, 1119.0, 6299.0, 109992.0, 891617.0, 34344.0, 3338.0, 745.0, 271.0, 86.0, 52.0, 24.0, 9.0, 13.0, 6.0, 3.0, 6.0, 2.0, 3.0, 1.0, 0.0, 1.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.0859375, -2.0233612060546875, -1.960784912109375, -1.8982086181640625, -1.83563232421875, -1.7730560302734375, -1.710479736328125, -1.6479034423828125, -1.5853271484375, -1.5227508544921875, -1.460174560546875, -1.3975982666015625, -1.33502197265625, -1.2724456787109375, -1.209869384765625, -1.1472930908203125, -1.084716796875, -1.0221405029296875, -0.959564208984375, -0.8969879150390625, -0.83441162109375, -0.7718353271484375, -0.709259033203125, -0.6466827392578125, -0.5841064453125, -0.5215301513671875, -0.458953857421875, -0.3963775634765625, -0.33380126953125, -0.2712249755859375, -0.208648681640625, -0.1460723876953125, -0.08349609375, -0.0209197998046875, 0.041656494140625, 0.1042327880859375, 0.16680908203125, 0.2293853759765625, 0.291961669921875, 0.3545379638671875, 0.4171142578125, 0.4796905517578125, 0.542266845703125, 0.6048431396484375, 0.66741943359375, 0.7299957275390625, 0.792572021484375, 0.8551483154296875, 0.917724609375, 0.9803009033203125, 1.042877197265625, 1.1054534912109375, 1.16802978515625, 1.2306060791015625, 1.293182373046875, 1.3557586669921875, 1.4183349609375, 1.4809112548828125, 1.543487548828125, 1.6060638427734375, 1.66864013671875, 1.7312164306640625, 1.793792724609375, 1.8563690185546875, 1.9189453125]}, "gradients/encoder.encoder.layers.14.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 3.0, 1.0, 2.0, 3.0, 2.0, 6.0, 4.0, 12.0, 8.0, 18.0, 24.0, 26.0, 36.0, 41.0, 70.0, 98.0, 138.0, 143.0, 111.0, 57.0, 47.0, 52.0, 32.0, 13.0, 22.0, 9.0, 9.0, 9.0, 4.0, 0.0, 2.0, 5.0, 0.0, 3.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 1.0], "bins": [-1.2607421875, -1.2282562255859375, -1.195770263671875, -1.1632843017578125, -1.13079833984375, -1.0983123779296875, -1.065826416015625, -1.0333404541015625, -1.0008544921875, -0.9683685302734375, -0.935882568359375, -0.9033966064453125, -0.87091064453125, -0.8384246826171875, -0.805938720703125, -0.7734527587890625, -0.740966796875, -0.7084808349609375, -0.675994873046875, -0.6435089111328125, -0.61102294921875, -0.5785369873046875, -0.546051025390625, -0.5135650634765625, -0.4810791015625, -0.4485931396484375, -0.416107177734375, -0.3836212158203125, -0.35113525390625, -0.3186492919921875, -0.286163330078125, -0.2536773681640625, -0.22119140625, -0.1887054443359375, -0.156219482421875, -0.1237335205078125, -0.09124755859375, -0.0587615966796875, -0.026275634765625, 0.0062103271484375, 0.0386962890625, 0.0711822509765625, 0.103668212890625, 0.1361541748046875, 0.16864013671875, 0.2011260986328125, 0.233612060546875, 0.2660980224609375, 0.298583984375, 0.3310699462890625, 0.363555908203125, 0.3960418701171875, 0.42852783203125, 0.4610137939453125, 0.493499755859375, 0.5259857177734375, 0.5584716796875, 0.5909576416015625, 0.623443603515625, 0.6559295654296875, 0.68841552734375, 0.7209014892578125, 0.753387451171875, 0.7858734130859375, 0.818359375]}, "gradients/encoder.encoder.layers.14.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 3.0, 20.0, 309.0, 627.0, 55.0, 2.0, 0.0, 1.0, 2.0, 2.0], "bins": [-193.10693359375, -189.7327117919922, -186.35848999023438, -182.9842529296875, -179.6100311279297, -176.23580932617188, -172.86158752441406, -169.48736572265625, -166.11314392089844, -162.73892211914062, -159.3647003173828, -155.990478515625, -152.61624145507812, -149.2420196533203, -145.8677978515625, -142.4935760498047, -139.1193389892578, -135.7451171875, -132.3708953857422, -128.99667358398438, -125.62244415283203, -122.24821472167969, -118.87399291992188, -115.49977111816406, -112.12554931640625, -108.75132751464844, -105.3770980834961, -102.00287628173828, -98.62865447998047, -95.25442504882812, -91.88020324707031, -88.5059814453125, -85.13175964355469, -81.75753784179688, -78.38330841064453, -75.00908660888672, -71.6348648071289, -68.26063537597656, -64.88641357421875, -61.51218795776367, -58.137962341308594, -54.763736724853516, -51.3895149230957, -48.015289306640625, -44.64106369018555, -41.26683807373047, -37.892616271972656, -34.51839065551758, -31.144166946411133, -27.769943237304688, -24.39571762084961, -21.021493911743164, -17.64727020263672, -14.27304458618164, -10.898820877075195, -7.524595260620117, -4.150371551513672, -0.7761471271514893, 2.5980772972106934, 5.972301483154297, 9.346526145935059, 12.72075080871582, 16.094974517822266, 19.469200134277344, 22.84342384338379]}, "gradients/encoder.encoder.layers.14.layer_norm.bias": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 0.0, 0.0, 2.0, 4.0, 2.0, 3.0, 5.0, 3.0, 4.0, 7.0, 5.0, 7.0, 8.0, 14.0, 16.0, 15.0, 16.0, 22.0, 27.0, 24.0, 28.0, 26.0, 38.0, 41.0, 34.0, 38.0, 44.0, 63.0, 39.0, 54.0, 34.0, 44.0, 41.0, 48.0, 30.0, 38.0, 34.0, 16.0, 18.0, 16.0, 13.0, 25.0, 13.0, 12.0, 9.0, 8.0, 9.0, 4.0, 4.0, 2.0, 7.0, 2.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-13.780782699584961, -13.335488319396973, -12.890193939208984, -12.444899559020996, -11.999605178833008, -11.554309844970703, -11.109015464782715, -10.663721084594727, -10.218426704406738, -9.77313232421875, -9.327837944030762, -8.882543563842773, -8.437248229980469, -7.991954326629639, -7.546659469604492, -7.101365089416504, -6.656070709228516, -6.210776329040527, -5.765481948852539, -5.320187091827393, -4.874892711639404, -4.429598331451416, -3.9843037128448486, -3.5390090942382812, -3.093714714050293, -2.6484203338623047, -2.2031257152557373, -1.7578312158584595, -1.3125367164611816, -0.8672423362731934, -0.421947717666626, 0.023346900939941406, 0.4686412811279297, 0.9139357805252075, 1.3592302799224854, 1.8045247793197632, 2.249819278717041, 2.6951136589050293, 3.1404082775115967, 3.585702896118164, 4.030997276306152, 4.476291656494141, 4.921586036682129, 5.366880893707275, 5.812175273895264, 6.257469654083252, 6.702764511108398, 7.148058891296387, 7.593353271484375, 8.038647651672363, 8.483942031860352, 8.92923641204834, 9.374530792236328, 9.819826126098633, 10.265120506286621, 10.71041488647461, 11.155709266662598, 11.601003646850586, 12.046298027038574, 12.491592407226562, 12.936887741088867, 13.382181167602539, 13.827476501464844, 14.272770881652832, 14.71806526184082]}, "gradients/encoder.encoder.layers.13.feed_forward.output_dense.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 4.0, 6.0, 3.0, 4.0, 2.0, 5.0, 10.0, 3.0, 8.0, 21.0, 18.0, 43.0, 34.0, 53.0, 64.0, 116.0, 218.0, 342.0, 592.0, 1174.0, 2443.0, 6495.0, 22224.0, 183341.0, 3889350.0, 66191.0, 13104.0, 4372.0, 1911.0, 888.0, 491.0, 278.0, 167.0, 109.0, 67.0, 42.0, 30.0, 20.0, 13.0, 7.0, 7.0, 1.0, 5.0, 4.0, 2.0, 4.0, 3.0, 1.0, 1.0, 2.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-4.7109375, -4.56512451171875, -4.4193115234375, -4.27349853515625, -4.127685546875, -3.98187255859375, -3.8360595703125, -3.69024658203125, -3.54443359375, -3.39862060546875, -3.2528076171875, -3.10699462890625, -2.961181640625, -2.81536865234375, -2.6695556640625, -2.52374267578125, -2.3779296875, -2.23211669921875, -2.0863037109375, -1.94049072265625, -1.794677734375, -1.64886474609375, -1.5030517578125, -1.35723876953125, -1.21142578125, -1.06561279296875, -0.9197998046875, -0.77398681640625, -0.628173828125, -0.48236083984375, -0.3365478515625, -0.19073486328125, -0.044921875, 0.10089111328125, 0.2467041015625, 0.39251708984375, 0.538330078125, 0.68414306640625, 0.8299560546875, 0.97576904296875, 1.12158203125, 1.26739501953125, 1.4132080078125, 1.55902099609375, 1.704833984375, 1.85064697265625, 1.9964599609375, 2.14227294921875, 2.2880859375, 2.43389892578125, 2.5797119140625, 2.72552490234375, 2.871337890625, 3.01715087890625, 3.1629638671875, 3.30877685546875, 3.45458984375, 3.60040283203125, 3.7462158203125, 3.89202880859375, 4.037841796875, 4.18365478515625, 4.3294677734375, 4.47528076171875, 4.62109375]}, "gradients/encoder.encoder.layers.13.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 3.0, 1.0, 3.0, 3.0, 7.0, 11.0, 11.0, 11.0, 16.0, 23.0, 26.0, 38.0, 36.0, 54.0, 57.0, 61.0, 78.0, 80.0, 68.0, 75.0, 60.0, 53.0, 53.0, 41.0, 29.0, 26.0, 22.0, 17.0, 13.0, 11.0, 5.0, 4.0, 8.0, 3.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.3662109375, -1.328033447265625, -1.28985595703125, -1.251678466796875, -1.2135009765625, -1.175323486328125, -1.13714599609375, -1.098968505859375, -1.060791015625, -1.022613525390625, -0.98443603515625, -0.946258544921875, -0.9080810546875, -0.869903564453125, -0.83172607421875, -0.793548583984375, -0.75537109375, -0.717193603515625, -0.67901611328125, -0.640838623046875, -0.6026611328125, -0.564483642578125, -0.52630615234375, -0.488128662109375, -0.449951171875, -0.411773681640625, -0.37359619140625, -0.335418701171875, -0.2972412109375, -0.259063720703125, -0.22088623046875, -0.182708740234375, -0.14453125, -0.106353759765625, -0.06817626953125, -0.029998779296875, 0.0081787109375, 0.046356201171875, 0.08453369140625, 0.122711181640625, 0.160888671875, 0.199066162109375, 0.23724365234375, 0.275421142578125, 0.3135986328125, 0.351776123046875, 0.38995361328125, 0.428131103515625, 0.46630859375, 0.504486083984375, 0.54266357421875, 0.580841064453125, 0.6190185546875, 0.657196044921875, 0.69537353515625, 0.733551025390625, 0.771728515625, 0.809906005859375, 0.84808349609375, 0.886260986328125, 0.9244384765625, 0.962615966796875, 1.00079345703125, 1.038970947265625, 1.0771484375]}, "gradients/encoder.encoder.layers.13.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 3.0, 4.0, 4.0, 10.0, 25.0, 45.0, 98.0, 197.0, 547.0, 1419.0, 4592.0, 21127.0, 263116.0, 3837485.0, 53041.0, 8776.0, 2370.0, 807.0, 355.0, 138.0, 55.0, 35.0, 12.0, 10.0, 5.0, 6.0, 3.0, 4.0, 2.0, 1.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-6.59765625, -6.4049072265625, -6.212158203125, -6.0194091796875, -5.82666015625, -5.6339111328125, -5.441162109375, -5.2484130859375, -5.0556640625, -4.8629150390625, -4.670166015625, -4.4774169921875, -4.28466796875, -4.0919189453125, -3.899169921875, -3.7064208984375, -3.513671875, -3.3209228515625, -3.128173828125, -2.9354248046875, -2.74267578125, -2.5499267578125, -2.357177734375, -2.1644287109375, -1.9716796875, -1.7789306640625, -1.586181640625, -1.3934326171875, -1.20068359375, -1.0079345703125, -0.815185546875, -0.6224365234375, -0.4296875, -0.2369384765625, -0.044189453125, 0.1485595703125, 0.34130859375, 0.5340576171875, 0.726806640625, 0.9195556640625, 1.1123046875, 1.3050537109375, 1.497802734375, 1.6905517578125, 1.88330078125, 2.0760498046875, 2.268798828125, 2.4615478515625, 2.654296875, 2.8470458984375, 3.039794921875, 3.2325439453125, 3.42529296875, 3.6180419921875, 3.810791015625, 4.0035400390625, 4.1962890625, 4.3890380859375, 4.581787109375, 4.7745361328125, 4.96728515625, 5.1600341796875, 5.352783203125, 5.5455322265625, 5.73828125]}, "gradients/encoder.encoder.layers.13.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 2.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 2.0, 4.0, 1.0, 5.0, 5.0, 7.0, 9.0, 20.0, 18.0, 21.0, 47.0, 110.0, 235.0, 2284.0, 836.0, 207.0, 97.0, 49.0, 34.0, 14.0, 21.0, 21.0, 7.0, 11.0, 4.0, 1.0, 3.0, 3.0, 1.0, 4.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-2.154296875, -2.097442626953125, -2.04058837890625, -1.983734130859375, -1.9268798828125, -1.870025634765625, -1.81317138671875, -1.756317138671875, -1.699462890625, -1.642608642578125, -1.58575439453125, -1.528900146484375, -1.4720458984375, -1.415191650390625, -1.35833740234375, -1.301483154296875, -1.24462890625, -1.187774658203125, -1.13092041015625, -1.074066162109375, -1.0172119140625, -0.960357666015625, -0.90350341796875, -0.846649169921875, -0.789794921875, -0.732940673828125, -0.67608642578125, -0.619232177734375, -0.5623779296875, -0.505523681640625, -0.44866943359375, -0.391815185546875, -0.3349609375, -0.278106689453125, -0.22125244140625, -0.164398193359375, -0.1075439453125, -0.050689697265625, 0.00616455078125, 0.063018798828125, 0.119873046875, 0.176727294921875, 0.23358154296875, 0.290435791015625, 0.3472900390625, 0.404144287109375, 0.46099853515625, 0.517852783203125, 0.57470703125, 0.631561279296875, 0.68841552734375, 0.745269775390625, 0.8021240234375, 0.858978271484375, 0.91583251953125, 0.972686767578125, 1.029541015625, 1.086395263671875, 1.14324951171875, 1.200103759765625, 1.2569580078125, 1.313812255859375, 1.37066650390625, 1.427520751953125, 1.484375]}, "gradients/encoder.encoder.layers.13.final_layer_norm.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 4.0, 3.0, 33.0, 325.0, 488.0, 125.0, 21.0, 11.0, 6.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-11.38621997833252, -10.344137191772461, -9.302054405212402, -8.259971618652344, -7.217888355255127, -6.175805568695068, -5.133722305297852, -4.091639518737793, -3.0495567321777344, -2.007473945617676, -0.9653909206390381, 0.07669210433959961, 1.1187748908996582, 2.160857677459717, 3.2029409408569336, 4.245023727416992, 5.287106513977051, 6.329189300537109, 7.371272087097168, 8.413354873657227, 9.455438613891602, 10.497520446777344, 11.539604187011719, 12.581686973571777, 13.623769760131836, 14.665852546691895, 15.707935333251953, 16.750019073486328, 17.79210090637207, 18.834184646606445, 19.876266479492188, 20.918350219726562, 21.960433959960938, 23.002517700195312, 24.044599533081055, 25.08668327331543, 26.128765106201172, 27.170848846435547, 28.212932586669922, 29.255014419555664, 30.297096252441406, 31.33917999267578, 32.381263732910156, 33.423343658447266, 34.46542739868164, 35.507511138916016, 36.54959487915039, 37.5916748046875, 38.633758544921875, 39.67584228515625, 40.717926025390625, 41.760005950927734, 42.80208969116211, 43.844173431396484, 44.88625717163086, 45.92833709716797, 46.97042465209961, 48.012508392333984, 49.05459213256836, 50.09667205810547, 51.138755798339844, 52.18083953857422, 53.222923278808594, 54.26500701904297, 55.30708694458008]}, "gradients/encoder.encoder.layers.13.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 3.0, 5.0, 8.0, 8.0, 10.0, 10.0, 14.0, 16.0, 15.0, 27.0, 29.0, 38.0, 37.0, 35.0, 43.0, 49.0, 48.0, 51.0, 32.0, 51.0, 55.0, 49.0, 46.0, 38.0, 39.0, 38.0, 40.0, 23.0, 36.0, 28.0, 21.0, 17.0, 18.0, 9.0, 7.0, 4.0, 3.0, 4.0, 4.0, 2.0, 2.0, 1.0, 2.0, 1.0, 2.0], "bins": [-7.671318054199219, -7.475038051605225, -7.278757572174072, -7.082477569580078, -6.886197090148926, -6.689917087554932, -6.4936370849609375, -6.297356605529785, -6.101076126098633, -5.904796123504639, -5.708515644073486, -5.512235641479492, -5.31595516204834, -5.119675159454346, -4.923395156860352, -4.727114677429199, -4.530834674835205, -4.334554672241211, -4.138274192810059, -3.9419941902160645, -3.745713710784912, -3.549433708190918, -3.3531534671783447, -3.1568732261657715, -2.9605929851531982, -2.764312744140625, -2.5680325031280518, -2.3717522621154785, -2.1754722595214844, -1.9791918992996216, -1.782911777496338, -1.5866315364837646, -1.3903512954711914, -1.1940710544586182, -0.9977908730506897, -0.8015106916427612, -0.605230450630188, -0.40895020961761475, -0.21267008781433105, -0.016389846801757812, 0.17989039421081543, 0.3761706054210663, 0.5724508166313171, 0.7687309980392456, 0.9650112390518188, 1.161291480064392, 1.3575716018676758, 1.553851842880249, 1.7501320838928223, 1.9464123249053955, 2.1426925659179688, 2.338972568511963, 2.5352530479431152, 2.7315330505371094, 2.9278132915496826, 3.124093532562256, 3.320373773574829, 3.5166540145874023, 3.7129342555999756, 3.909214496612549, 4.105494499206543, 4.301774978637695, 4.4980549812316895, 4.694334983825684, 4.890615463256836]}, "gradients/encoder.encoder.layers.13.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 2.0, 5.0, 3.0, 0.0, 3.0, 8.0, 10.0, 15.0, 18.0, 28.0, 38.0, 51.0, 74.0, 91.0, 106.0, 195.0, 281.0, 478.0, 854.0, 1678.0, 3838.0, 9716.0, 31442.0, 138010.0, 555264.0, 234226.0, 48535.0, 13776.0, 4966.0, 2137.0, 1037.0, 617.0, 339.0, 202.0, 149.0, 118.0, 65.0, 52.0, 29.0, 29.0, 22.0, 13.0, 16.0, 10.0, 4.0, 4.0, 2.0, 5.0, 2.0, 3.0, 2.0, 1.0, 0.0, 1.0, 1.0], "bins": [-5.3125, -5.1546630859375, -4.996826171875, -4.8389892578125, -4.68115234375, -4.5233154296875, -4.365478515625, -4.2076416015625, -4.0498046875, -3.8919677734375, -3.734130859375, -3.5762939453125, -3.41845703125, -3.2606201171875, -3.102783203125, -2.9449462890625, -2.787109375, -2.6292724609375, -2.471435546875, -2.3135986328125, -2.15576171875, -1.9979248046875, -1.840087890625, -1.6822509765625, -1.5244140625, -1.3665771484375, -1.208740234375, -1.0509033203125, -0.89306640625, -0.7352294921875, -0.577392578125, -0.4195556640625, -0.26171875, -0.1038818359375, 0.053955078125, 0.2117919921875, 0.36962890625, 0.5274658203125, 0.685302734375, 0.8431396484375, 1.0009765625, 1.1588134765625, 1.316650390625, 1.4744873046875, 1.63232421875, 1.7901611328125, 1.947998046875, 2.1058349609375, 2.263671875, 2.4215087890625, 2.579345703125, 2.7371826171875, 2.89501953125, 3.0528564453125, 3.210693359375, 3.3685302734375, 3.5263671875, 3.6842041015625, 3.842041015625, 3.9998779296875, 4.15771484375, 4.3155517578125, 4.473388671875, 4.6312255859375, 4.7890625]}, "gradients/encoder.encoder.layers.13.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 3.0, 0.0, 5.0, 3.0, 6.0, 8.0, 8.0, 16.0, 24.0, 26.0, 27.0, 37.0, 52.0, 51.0, 49.0, 74.0, 62.0, 69.0, 82.0, 75.0, 50.0, 50.0, 54.0, 36.0, 25.0, 19.0, 33.0, 13.0, 15.0, 13.0, 4.0, 8.0, 2.0, 4.0, 0.0, 5.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0], "bins": [-1.390625, -1.3526153564453125, -1.314605712890625, -1.2765960693359375, -1.23858642578125, -1.2005767822265625, -1.162567138671875, -1.1245574951171875, -1.0865478515625, -1.0485382080078125, -1.010528564453125, -0.9725189208984375, -0.93450927734375, -0.8964996337890625, -0.858489990234375, -0.8204803466796875, -0.782470703125, -0.7444610595703125, -0.706451416015625, -0.6684417724609375, -0.63043212890625, -0.5924224853515625, -0.554412841796875, -0.5164031982421875, -0.4783935546875, -0.4403839111328125, -0.402374267578125, -0.3643646240234375, -0.32635498046875, -0.2883453369140625, -0.250335693359375, -0.2123260498046875, -0.17431640625, -0.1363067626953125, -0.098297119140625, -0.0602874755859375, -0.02227783203125, 0.0157318115234375, 0.053741455078125, 0.0917510986328125, 0.1297607421875, 0.1677703857421875, 0.205780029296875, 0.2437896728515625, 0.28179931640625, 0.3198089599609375, 0.357818603515625, 0.3958282470703125, 0.433837890625, 0.4718475341796875, 0.509857177734375, 0.5478668212890625, 0.58587646484375, 0.6238861083984375, 0.661895751953125, 0.6999053955078125, 0.7379150390625, 0.7759246826171875, 0.813934326171875, 0.8519439697265625, 0.88995361328125, 0.9279632568359375, 0.965972900390625, 1.0039825439453125, 1.0419921875]}, "gradients/encoder.encoder.layers.13.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 3.0, 1.0, 2.0, 3.0, 3.0, 4.0, 5.0, 8.0, 15.0, 13.0, 14.0, 17.0, 51.0, 57.0, 115.0, 179.0, 325.0, 628.0, 1364.0, 3142.0, 7750.0, 27045.0, 132343.0, 589267.0, 225869.0, 41784.0, 11039.0, 3979.0, 1676.0, 873.0, 404.0, 224.0, 122.0, 88.0, 42.0, 27.0, 27.0, 19.0, 10.0, 6.0, 6.0, 1.0, 3.0, 3.0, 5.0, 2.0, 1.0, 2.0, 1.0, 2.0, 0.0, 0.0, 1.0], "bins": [-4.98046875, -4.83673095703125, -4.6929931640625, -4.54925537109375, -4.405517578125, -4.26177978515625, -4.1180419921875, -3.97430419921875, -3.83056640625, -3.68682861328125, -3.5430908203125, -3.39935302734375, -3.255615234375, -3.11187744140625, -2.9681396484375, -2.82440185546875, -2.6806640625, -2.53692626953125, -2.3931884765625, -2.24945068359375, -2.105712890625, -1.96197509765625, -1.8182373046875, -1.67449951171875, -1.53076171875, -1.38702392578125, -1.2432861328125, -1.09954833984375, -0.955810546875, -0.81207275390625, -0.6683349609375, -0.52459716796875, -0.380859375, -0.23712158203125, -0.0933837890625, 0.05035400390625, 0.194091796875, 0.33782958984375, 0.4815673828125, 0.62530517578125, 0.76904296875, 0.91278076171875, 1.0565185546875, 1.20025634765625, 1.343994140625, 1.48773193359375, 1.6314697265625, 1.77520751953125, 1.9189453125, 2.06268310546875, 2.2064208984375, 2.35015869140625, 2.493896484375, 2.63763427734375, 2.7813720703125, 2.92510986328125, 3.06884765625, 3.21258544921875, 3.3563232421875, 3.50006103515625, 3.643798828125, 3.78753662109375, 3.9312744140625, 4.07501220703125, 4.21875]}, "gradients/encoder.encoder.layers.13.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 4.0, 0.0, 0.0, 2.0, 2.0, 1.0, 0.0, 7.0, 9.0, 8.0, 9.0, 12.0, 15.0, 24.0, 25.0, 25.0, 31.0, 47.0, 45.0, 42.0, 48.0, 55.0, 62.0, 53.0, 58.0, 64.0, 46.0, 49.0, 24.0, 40.0, 41.0, 34.0, 30.0, 32.0, 16.0, 14.0, 9.0, 5.0, 4.0, 3.0, 5.0, 3.0, 1.0, 2.0, 2.0, 4.0, 2.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-4.6484375, -4.49285888671875, -4.3372802734375, -4.18170166015625, -4.026123046875, -3.87054443359375, -3.7149658203125, -3.55938720703125, -3.40380859375, -3.24822998046875, -3.0926513671875, -2.93707275390625, -2.781494140625, -2.62591552734375, -2.4703369140625, -2.31475830078125, -2.1591796875, -2.00360107421875, -1.8480224609375, -1.69244384765625, -1.536865234375, -1.38128662109375, -1.2257080078125, -1.07012939453125, -0.91455078125, -0.75897216796875, -0.6033935546875, -0.44781494140625, -0.292236328125, -0.13665771484375, 0.0189208984375, 0.17449951171875, 0.330078125, 0.48565673828125, 0.6412353515625, 0.79681396484375, 0.952392578125, 1.10797119140625, 1.2635498046875, 1.41912841796875, 1.57470703125, 1.73028564453125, 1.8858642578125, 2.04144287109375, 2.197021484375, 2.35260009765625, 2.5081787109375, 2.66375732421875, 2.8193359375, 2.97491455078125, 3.1304931640625, 3.28607177734375, 3.441650390625, 3.59722900390625, 3.7528076171875, 3.90838623046875, 4.06396484375, 4.21954345703125, 4.3751220703125, 4.53070068359375, 4.686279296875, 4.84185791015625, 4.9974365234375, 5.15301513671875, 5.30859375]}, "gradients/encoder.encoder.layers.13.attention.k_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 4.0, 4.0, 0.0, 3.0, 0.0, 2.0, 10.0, 3.0, 17.0, 18.0, 26.0, 38.0, 58.0, 90.0, 133.0, 238.0, 416.0, 731.0, 1503.0, 3637.0, 12516.0, 125881.0, 838340.0, 51387.0, 8156.0, 2681.0, 1158.0, 569.0, 376.0, 215.0, 96.0, 85.0, 57.0, 34.0, 26.0, 17.0, 12.0, 11.0, 4.0, 4.0, 3.0, 3.0, 1.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0], "bins": [-3.9375, -3.82916259765625, -3.7208251953125, -3.61248779296875, -3.504150390625, -3.39581298828125, -3.2874755859375, -3.17913818359375, -3.07080078125, -2.96246337890625, -2.8541259765625, -2.74578857421875, -2.637451171875, -2.52911376953125, -2.4207763671875, -2.31243896484375, -2.2041015625, -2.09576416015625, -1.9874267578125, -1.87908935546875, -1.770751953125, -1.66241455078125, -1.5540771484375, -1.44573974609375, -1.33740234375, -1.22906494140625, -1.1207275390625, -1.01239013671875, -0.904052734375, -0.79571533203125, -0.6873779296875, -0.57904052734375, -0.470703125, -0.36236572265625, -0.2540283203125, -0.14569091796875, -0.037353515625, 0.07098388671875, 0.1793212890625, 0.28765869140625, 0.39599609375, 0.50433349609375, 0.6126708984375, 0.72100830078125, 0.829345703125, 0.93768310546875, 1.0460205078125, 1.15435791015625, 1.2626953125, 1.37103271484375, 1.4793701171875, 1.58770751953125, 1.696044921875, 1.80438232421875, 1.9127197265625, 2.02105712890625, 2.12939453125, 2.23773193359375, 2.3460693359375, 2.45440673828125, 2.562744140625, 2.67108154296875, 2.7794189453125, 2.88775634765625, 2.99609375]}, "gradients/encoder.encoder.layers.13.attention.k_proj.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 3.0, 4.0, 4.0, 6.0, 5.0, 6.0, 12.0, 17.0, 19.0, 30.0, 48.0, 71.0, 117.0, 162.0, 154.0, 115.0, 81.0, 53.0, 36.0, 25.0, 12.0, 7.0, 6.0, 7.0, 2.0, 4.0, 2.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0002970695495605469, -0.00028610602021217346, -0.00027514249086380005, -0.00026417896151542664, -0.0002532154321670532, -0.0002422519028186798, -0.0002312883734703064, -0.00022032484412193298, -0.00020936131477355957, -0.00019839778542518616, -0.00018743425607681274, -0.00017647072672843933, -0.00016550719738006592, -0.0001545436680316925, -0.0001435801386833191, -0.00013261660933494568, -0.00012165307998657227, -0.00011068955063819885, -9.972602128982544e-05, -8.876249194145203e-05, -7.779896259307861e-05, -6.68354332447052e-05, -5.587190389633179e-05, -4.4908374547958374e-05, -3.394484519958496e-05, -2.2981315851211548e-05, -1.2017786502838135e-05, -1.0542571544647217e-06, 9.909272193908691e-06, 2.0872801542282104e-05, 3.183633089065552e-05, 4.279986023902893e-05, 5.3763389587402344e-05, 6.472691893577576e-05, 7.569044828414917e-05, 8.665397763252258e-05, 9.7617506980896e-05, 0.00010858103632926941, 0.00011954456567764282, 0.00013050809502601624, 0.00014147162437438965, 0.00015243515372276306, 0.00016339868307113647, 0.0001743622124195099, 0.0001853257417678833, 0.00019628927111625671, 0.00020725280046463013, 0.00021821632981300354, 0.00022917985916137695, 0.00024014338850975037, 0.0002511069178581238, 0.0002620704472064972, 0.0002730339765548706, 0.000283997505903244, 0.00029496103525161743, 0.00030592456459999084, 0.00031688809394836426, 0.00032785162329673767, 0.0003388151526451111, 0.0003497786819934845, 0.0003607422113418579, 0.0003717057406902313, 0.00038266927003860474, 0.00039363279938697815, 0.00040459632873535156]}, "gradients/encoder.encoder.layers.13.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 2.0, 2.0, 4.0, 6.0, 11.0, 21.0, 19.0, 22.0, 41.0, 71.0, 96.0, 176.0, 414.0, 1186.0, 8046.0, 726467.0, 304752.0, 5440.0, 996.0, 324.0, 164.0, 91.0, 77.0, 38.0, 29.0, 18.0, 10.0, 13.0, 15.0, 1.0, 4.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.54296875, -6.35009765625, -6.1572265625, -5.96435546875, -5.771484375, -5.57861328125, -5.3857421875, -5.19287109375, -5.0, -4.80712890625, -4.6142578125, -4.42138671875, -4.228515625, -4.03564453125, -3.8427734375, -3.64990234375, -3.45703125, -3.26416015625, -3.0712890625, -2.87841796875, -2.685546875, -2.49267578125, -2.2998046875, -2.10693359375, -1.9140625, -1.72119140625, -1.5283203125, -1.33544921875, -1.142578125, -0.94970703125, -0.7568359375, -0.56396484375, -0.37109375, -0.17822265625, 0.0146484375, 0.20751953125, 0.400390625, 0.59326171875, 0.7861328125, 0.97900390625, 1.171875, 1.36474609375, 1.5576171875, 1.75048828125, 1.943359375, 2.13623046875, 2.3291015625, 2.52197265625, 2.71484375, 2.90771484375, 3.1005859375, 3.29345703125, 3.486328125, 3.67919921875, 3.8720703125, 4.06494140625, 4.2578125, 4.45068359375, 4.6435546875, 4.83642578125, 5.029296875, 5.22216796875, 5.4150390625, 5.60791015625, 5.80078125]}, "gradients/encoder.encoder.layers.13.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 2.0, 1.0, 7.0, 7.0, 11.0, 21.0, 66.0, 151.0, 356.0, 234.0, 76.0, 48.0, 16.0, 9.0, 9.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.7734375, -6.5291748046875, -6.284912109375, -6.0406494140625, -5.79638671875, -5.5521240234375, -5.307861328125, -5.0635986328125, -4.8193359375, -4.5750732421875, -4.330810546875, -4.0865478515625, -3.84228515625, -3.5980224609375, -3.353759765625, -3.1094970703125, -2.865234375, -2.6209716796875, -2.376708984375, -2.1324462890625, -1.88818359375, -1.6439208984375, -1.399658203125, -1.1553955078125, -0.9111328125, -0.6668701171875, -0.422607421875, -0.1783447265625, 0.06591796875, 0.3101806640625, 0.554443359375, 0.7987060546875, 1.04296875, 1.2872314453125, 1.531494140625, 1.7757568359375, 2.02001953125, 2.2642822265625, 2.508544921875, 2.7528076171875, 2.9970703125, 3.2413330078125, 3.485595703125, 3.7298583984375, 3.97412109375, 4.2183837890625, 4.462646484375, 4.7069091796875, 4.951171875, 5.1954345703125, 5.439697265625, 5.6839599609375, 5.92822265625, 6.1724853515625, 6.416748046875, 6.6610107421875, 6.9052734375, 7.1495361328125, 7.393798828125, 7.6380615234375, 7.88232421875, 8.1265869140625, 8.370849609375, 8.6151123046875, 8.859375]}, "gradients/encoder.encoder.layers.13.layer_norm.weight": {"_type": "histogram", "values": [2.0, 3.0, 0.0, 1.0, 5.0, 8.0, 24.0, 17.0, 54.0, 130.0, 207.0, 229.0, 166.0, 78.0, 48.0, 25.0, 6.0, 5.0, 4.0, 2.0, 3.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-18.90658950805664, -17.215009689331055, -15.523430824279785, -13.831851959228516, -12.14027214050293, -10.448692321777344, -8.757113456726074, -7.065534591674805, -5.373954772949219, -3.682375431060791, -1.9907960891723633, -0.29921674728393555, 1.3923625946044922, 3.083942413330078, 4.775521278381348, 6.467100143432617, 8.158679962158203, 9.850259780883789, 11.541838645935059, 13.233417510986328, 14.924997329711914, 16.6165771484375, 18.308155059814453, 19.99973487854004, 21.691314697265625, 23.38289451599121, 25.074474334716797, 26.76605224609375, 28.457632064819336, 30.149211883544922, 31.840789794921875, 33.532371520996094, 35.22395324707031, 36.915531158447266, 38.607112884521484, 40.29869079589844, 41.990272521972656, 43.68185043334961, 45.37342834472656, 47.06501007080078, 48.756587982177734, 50.44816589355469, 52.139747619628906, 53.83132553100586, 55.52290344238281, 57.21448516845703, 58.906063079833984, 60.59764099121094, 62.289222717285156, 63.98080062866211, 65.67237854003906, 67.36396026611328, 69.0555419921875, 70.74711608886719, 72.4386978149414, 74.13027954101562, 75.82185363769531, 77.51343536376953, 79.20500946044922, 80.89659118652344, 82.58817291259766, 84.27975463867188, 85.97132873535156, 87.66291046142578, 89.3544921875]}, "gradients/encoder.encoder.layers.13.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 3.0, 3.0, 3.0, 2.0, 2.0, 3.0, 4.0, 8.0, 6.0, 10.0, 14.0, 16.0, 20.0, 20.0, 17.0, 31.0, 32.0, 34.0, 46.0, 41.0, 41.0, 36.0, 52.0, 54.0, 41.0, 36.0, 57.0, 47.0, 50.0, 41.0, 31.0, 31.0, 34.0, 26.0, 23.0, 24.0, 18.0, 7.0, 10.0, 13.0, 5.0, 4.0, 5.0, 3.0, 3.0, 2.0, 1.0, 3.0, 0.0, 2.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0], "bins": [-24.955293655395508, -24.15846824645996, -23.361642837524414, -22.564817428588867, -21.767990112304688, -20.97116470336914, -20.174339294433594, -19.377513885498047, -18.5806884765625, -17.783863067626953, -16.987037658691406, -16.19021224975586, -15.393385887145996, -14.59656047821045, -13.799734115600586, -13.002908706665039, -12.206083297729492, -11.409257888793945, -10.612432479858398, -9.815606117248535, -9.018780708312988, -8.221955299377441, -7.425129413604736, -6.628303527832031, -5.831478118896484, -5.0346527099609375, -4.237826824188232, -3.4410011768341064, -2.6441755294799805, -1.8473498821258545, -1.0505242347717285, -0.25369834899902344, 0.5431251525878906, 1.3399507999420166, 2.1367764472961426, 2.9336020946502686, 3.7304277420043945, 4.527253150939941, 5.3240790367126465, 6.120904922485352, 6.917730331420898, 7.714555740356445, 8.511381149291992, 9.308207511901855, 10.105032920837402, 10.90185832977295, 11.698684692382812, 12.49551010131836, 13.292335510253906, 14.089160919189453, 14.885986328125, 15.682812690734863, 16.479637145996094, 17.276464462280273, 18.07328987121582, 18.870115280151367, 19.666940689086914, 20.46376609802246, 21.260591506958008, 22.057416915893555, 22.854244232177734, 23.65106964111328, 24.447895050048828, 25.244720458984375, 26.041545867919922]}, "gradients/encoder.encoder.layers.12.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 2.0, 1.0, 6.0, 8.0, 11.0, 18.0, 13.0, 24.0, 34.0, 57.0, 80.0, 199.0, 351.0, 861.0, 3134.0, 22462.0, 3982653.0, 172741.0, 8632.0, 1798.0, 570.0, 286.0, 132.0, 73.0, 35.0, 35.0, 26.0, 13.0, 10.0, 9.0, 5.0, 2.0, 5.0, 3.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-11.4765625, -11.1368408203125, -10.797119140625, -10.4573974609375, -10.11767578125, -9.7779541015625, -9.438232421875, -9.0985107421875, -8.7587890625, -8.4190673828125, -8.079345703125, -7.7396240234375, -7.39990234375, -7.0601806640625, -6.720458984375, -6.3807373046875, -6.041015625, -5.7012939453125, -5.361572265625, -5.0218505859375, -4.68212890625, -4.3424072265625, -4.002685546875, -3.6629638671875, -3.3232421875, -2.9835205078125, -2.643798828125, -2.3040771484375, -1.96435546875, -1.6246337890625, -1.284912109375, -0.9451904296875, -0.60546875, -0.2657470703125, 0.073974609375, 0.4136962890625, 0.75341796875, 1.0931396484375, 1.432861328125, 1.7725830078125, 2.1123046875, 2.4520263671875, 2.791748046875, 3.1314697265625, 3.47119140625, 3.8109130859375, 4.150634765625, 4.4903564453125, 4.830078125, 5.1697998046875, 5.509521484375, 5.8492431640625, 6.18896484375, 6.5286865234375, 6.868408203125, 7.2081298828125, 7.5478515625, 7.8875732421875, 8.227294921875, 8.5670166015625, 8.90673828125, 9.2464599609375, 9.586181640625, 9.9259033203125, 10.265625]}, "gradients/encoder.encoder.layers.12.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 3.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 4.0, 1.0, 6.0, 4.0, 11.0, 7.0, 15.0, 14.0, 14.0, 23.0, 29.0, 35.0, 42.0, 53.0, 48.0, 55.0, 56.0, 65.0, 65.0, 65.0, 63.0, 51.0, 42.0, 48.0, 26.0, 43.0, 30.0, 17.0, 19.0, 16.0, 12.0, 9.0, 6.0, 2.0, 4.0, 3.0, 2.0, 3.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0], "bins": [-1.388671875, -1.34814453125, -1.3076171875, -1.26708984375, -1.2265625, -1.18603515625, -1.1455078125, -1.10498046875, -1.064453125, -1.02392578125, -0.9833984375, -0.94287109375, -0.90234375, -0.86181640625, -0.8212890625, -0.78076171875, -0.740234375, -0.69970703125, -0.6591796875, -0.61865234375, -0.578125, -0.53759765625, -0.4970703125, -0.45654296875, -0.416015625, -0.37548828125, -0.3349609375, -0.29443359375, -0.25390625, -0.21337890625, -0.1728515625, -0.13232421875, -0.091796875, -0.05126953125, -0.0107421875, 0.02978515625, 0.0703125, 0.11083984375, 0.1513671875, 0.19189453125, 0.232421875, 0.27294921875, 0.3134765625, 0.35400390625, 0.39453125, 0.43505859375, 0.4755859375, 0.51611328125, 0.556640625, 0.59716796875, 0.6376953125, 0.67822265625, 0.71875, 0.75927734375, 0.7998046875, 0.84033203125, 0.880859375, 0.92138671875, 0.9619140625, 1.00244140625, 1.04296875, 1.08349609375, 1.1240234375, 1.16455078125, 1.205078125]}, "gradients/encoder.encoder.layers.12.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 3.0, 4.0, 2.0, 4.0, 10.0, 12.0, 8.0, 20.0, 29.0, 33.0, 42.0, 81.0, 124.0, 181.0, 352.0, 603.0, 1194.0, 2782.0, 8240.0, 40677.0, 3026179.0, 1066894.0, 34446.0, 7290.0, 2549.0, 1180.0, 612.0, 262.0, 151.0, 111.0, 61.0, 55.0, 26.0, 24.0, 23.0, 13.0, 4.0, 4.0, 3.0, 3.0, 2.0, 1.0, 0.0, 3.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-7.53125, -7.2879638671875, -7.044677734375, -6.8013916015625, -6.55810546875, -6.3148193359375, -6.071533203125, -5.8282470703125, -5.5849609375, -5.3416748046875, -5.098388671875, -4.8551025390625, -4.61181640625, -4.3685302734375, -4.125244140625, -3.8819580078125, -3.638671875, -3.3953857421875, -3.152099609375, -2.9088134765625, -2.66552734375, -2.4222412109375, -2.178955078125, -1.9356689453125, -1.6923828125, -1.4490966796875, -1.205810546875, -0.9625244140625, -0.71923828125, -0.4759521484375, -0.232666015625, 0.0106201171875, 0.25390625, 0.4971923828125, 0.740478515625, 0.9837646484375, 1.22705078125, 1.4703369140625, 1.713623046875, 1.9569091796875, 2.2001953125, 2.4434814453125, 2.686767578125, 2.9300537109375, 3.17333984375, 3.4166259765625, 3.659912109375, 3.9031982421875, 4.146484375, 4.3897705078125, 4.633056640625, 4.8763427734375, 5.11962890625, 5.3629150390625, 5.606201171875, 5.8494873046875, 6.0927734375, 6.3360595703125, 6.579345703125, 6.8226318359375, 7.06591796875, 7.3092041015625, 7.552490234375, 7.7957763671875, 8.0390625]}, "gradients/encoder.encoder.layers.12.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 3.0, 1.0, 0.0, 0.0, 1.0, 1.0, 4.0, 3.0, 0.0, 2.0, 1.0, 7.0, 10.0, 15.0, 23.0, 35.0, 67.0, 161.0, 627.0, 2610.0, 319.0, 88.0, 62.0, 17.0, 10.0, 6.0, 5.0, 3.0, 1.0, 4.0, 0.0, 2.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.2578125, -6.113311767578125, -5.96881103515625, -5.824310302734375, -5.6798095703125, -5.535308837890625, -5.39080810546875, -5.246307373046875, -5.101806640625, -4.957305908203125, -4.81280517578125, -4.668304443359375, -4.5238037109375, -4.379302978515625, -4.23480224609375, -4.090301513671875, -3.94580078125, -3.801300048828125, -3.65679931640625, -3.512298583984375, -3.3677978515625, -3.223297119140625, -3.07879638671875, -2.934295654296875, -2.789794921875, -2.645294189453125, -2.50079345703125, -2.356292724609375, -2.2117919921875, -2.067291259765625, -1.92279052734375, -1.778289794921875, -1.6337890625, -1.489288330078125, -1.34478759765625, -1.200286865234375, -1.0557861328125, -0.911285400390625, -0.76678466796875, -0.622283935546875, -0.477783203125, -0.333282470703125, -0.18878173828125, -0.044281005859375, 0.1002197265625, 0.244720458984375, 0.38922119140625, 0.533721923828125, 0.67822265625, 0.822723388671875, 0.96722412109375, 1.111724853515625, 1.2562255859375, 1.400726318359375, 1.54522705078125, 1.689727783203125, 1.834228515625, 1.978729248046875, 2.12322998046875, 2.267730712890625, 2.4122314453125, 2.556732177734375, 2.70123291015625, 2.845733642578125, 2.990234375]}, "gradients/encoder.encoder.layers.12.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 4.0, 8.0, 8.0, 36.0, 104.0, 335.0, 394.0, 96.0, 19.0, 8.0, 3.0, 2.0, 1.0], "bins": [-92.97163391113281, -91.34947967529297, -89.72732543945312, -88.10517120361328, -86.48301696777344, -84.8608627319336, -83.23870849609375, -81.6165542602539, -79.99440002441406, -78.37224578857422, -76.75009155273438, -75.12793731689453, -73.50578308105469, -71.88362884521484, -70.261474609375, -68.63932037353516, -67.01716613769531, -65.39501190185547, -63.772857666015625, -62.15070343017578, -60.52854919433594, -58.906394958496094, -57.28424072265625, -55.662086486816406, -54.03993225097656, -52.41777801513672, -50.795623779296875, -49.17346954345703, -47.55131530761719, -45.929161071777344, -44.3070068359375, -42.684852600097656, -41.06269836425781, -39.44054412841797, -37.818389892578125, -36.19623565673828, -34.57408142089844, -32.951927185058594, -31.32977294921875, -29.707618713378906, -28.085464477539062, -26.46331024169922, -24.841156005859375, -23.21900177001953, -21.596847534179688, -19.974693298339844, -18.3525390625, -16.730384826660156, -15.108230590820312, -13.486076354980469, -11.863922119140625, -10.241767883300781, -8.619613647460938, -6.997459411621094, -5.37530517578125, -3.7531509399414062, -2.1309967041015625, -0.5088424682617188, 1.113311767578125, 2.7354660034179688, 4.3576202392578125, 5.979774475097656, 7.6019287109375, 9.224082946777344, 10.846237182617188]}, "gradients/encoder.encoder.layers.12.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 2.0, 5.0, 5.0, 5.0, 7.0, 13.0, 10.0, 11.0, 11.0, 15.0, 14.0, 18.0, 27.0, 44.0, 43.0, 36.0, 49.0, 47.0, 59.0, 46.0, 37.0, 48.0, 32.0, 45.0, 46.0, 45.0, 39.0, 28.0, 30.0, 31.0, 30.0, 26.0, 22.0, 18.0, 21.0, 10.0, 12.0, 10.0, 3.0, 6.0, 3.0, 1.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-12.471174240112305, -12.124832153320312, -11.77849006652832, -11.432147979736328, -11.085805892944336, -10.739463806152344, -10.393121719360352, -10.046778678894043, -9.70043659210205, -9.354094505310059, -9.007752418518066, -8.661410331726074, -8.315068244934082, -7.968725681304932, -7.6223835945129395, -7.276041030883789, -6.929699420928955, -6.583357334136963, -6.237015247344971, -5.89067268371582, -5.544330596923828, -5.197988510131836, -4.851646423339844, -4.505304336547852, -4.158962249755859, -3.812620162963867, -3.466277837753296, -3.1199357509613037, -2.7735934257507324, -2.4272513389587402, -2.080909252166748, -1.7345669269561768, -1.3882246017456055, -1.0418823957443237, -0.6955402493476868, -0.3491981029510498, -0.0028558969497680664, 0.34348630905151367, 0.6898283958435059, 1.0361707210540771, 1.3825128078460693, 1.728855013847351, 2.075197219848633, 2.421539306640625, 2.767881393432617, 3.1142237186431885, 3.4605658054351807, 3.806908130645752, 4.153250217437744, 4.499592304229736, 4.8459343910217285, 5.192276954650879, 5.538619041442871, 5.884961128234863, 6.2313032150268555, 6.577645301818848, 6.92398738861084, 7.270329475402832, 7.616671562194824, 7.963013648986816, 8.309355735778809, 8.655698776245117, 9.00204086303711, 9.348382949829102, 9.694725036621094]}, "gradients/encoder.encoder.layers.12.attention.out_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 2.0, 1.0, 0.0, 2.0, 0.0, 2.0, 6.0, 1.0, 7.0, 10.0, 11.0, 16.0, 19.0, 33.0, 36.0, 50.0, 70.0, 119.0, 166.0, 254.0, 376.0, 642.0, 1073.0, 1958.0, 3742.0, 7096.0, 15794.0, 38895.0, 110121.0, 313546.0, 346374.0, 128630.0, 44437.0, 17864.0, 7955.0, 3975.0, 2123.0, 1204.0, 683.0, 441.0, 276.0, 162.0, 121.0, 83.0, 54.0, 37.0, 24.0, 25.0, 14.0, 15.0, 12.0, 1.0, 2.0, 2.0, 0.0, 4.0, 1.0, 1.0, 2.0, 2.0, 2.0], "bins": [-4.52734375, -4.39044189453125, -4.2535400390625, -4.11663818359375, -3.979736328125, -3.84283447265625, -3.7059326171875, -3.56903076171875, -3.43212890625, -3.29522705078125, -3.1583251953125, -3.02142333984375, -2.884521484375, -2.74761962890625, -2.6107177734375, -2.47381591796875, -2.3369140625, -2.20001220703125, -2.0631103515625, -1.92620849609375, -1.789306640625, -1.65240478515625, -1.5155029296875, -1.37860107421875, -1.24169921875, -1.10479736328125, -0.9678955078125, -0.83099365234375, -0.694091796875, -0.55718994140625, -0.4202880859375, -0.28338623046875, -0.146484375, -0.00958251953125, 0.1273193359375, 0.26422119140625, 0.401123046875, 0.53802490234375, 0.6749267578125, 0.81182861328125, 0.94873046875, 1.08563232421875, 1.2225341796875, 1.35943603515625, 1.496337890625, 1.63323974609375, 1.7701416015625, 1.90704345703125, 2.0439453125, 2.18084716796875, 2.3177490234375, 2.45465087890625, 2.591552734375, 2.72845458984375, 2.8653564453125, 3.00225830078125, 3.13916015625, 3.27606201171875, 3.4129638671875, 3.54986572265625, 3.686767578125, 3.82366943359375, 3.9605712890625, 4.09747314453125, 4.234375]}, "gradients/encoder.encoder.layers.12.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 4.0, 0.0, 2.0, 4.0, 1.0, 5.0, 12.0, 6.0, 12.0, 17.0, 11.0, 14.0, 24.0, 29.0, 24.0, 35.0, 48.0, 52.0, 49.0, 68.0, 61.0, 54.0, 60.0, 60.0, 59.0, 44.0, 40.0, 40.0, 36.0, 22.0, 24.0, 22.0, 19.0, 10.0, 12.0, 5.0, 7.0, 4.0, 5.0, 5.0, 1.0, 3.0, 2.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-1.4091796875, -1.3671722412109375, -1.325164794921875, -1.2831573486328125, -1.24114990234375, -1.1991424560546875, -1.157135009765625, -1.1151275634765625, -1.0731201171875, -1.0311126708984375, -0.989105224609375, -0.9470977783203125, -0.90509033203125, -0.8630828857421875, -0.821075439453125, -0.7790679931640625, -0.737060546875, -0.6950531005859375, -0.653045654296875, -0.6110382080078125, -0.56903076171875, -0.5270233154296875, -0.485015869140625, -0.4430084228515625, -0.4010009765625, -0.3589935302734375, -0.316986083984375, -0.2749786376953125, -0.23297119140625, -0.1909637451171875, -0.148956298828125, -0.1069488525390625, -0.06494140625, -0.0229339599609375, 0.019073486328125, 0.0610809326171875, 0.10308837890625, 0.1450958251953125, 0.187103271484375, 0.2291107177734375, 0.2711181640625, 0.3131256103515625, 0.355133056640625, 0.3971405029296875, 0.43914794921875, 0.4811553955078125, 0.523162841796875, 0.5651702880859375, 0.607177734375, 0.6491851806640625, 0.691192626953125, 0.7332000732421875, 0.77520751953125, 0.8172149658203125, 0.859222412109375, 0.9012298583984375, 0.9432373046875, 0.9852447509765625, 1.027252197265625, 1.0692596435546875, 1.11126708984375, 1.1532745361328125, 1.195281982421875, 1.2372894287109375, 1.279296875]}, "gradients/encoder.encoder.layers.12.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 3.0, 2.0, 4.0, 6.0, 9.0, 12.0, 17.0, 29.0, 32.0, 54.0, 69.0, 149.0, 255.0, 438.0, 790.0, 1489.0, 2915.0, 5738.0, 11979.0, 27512.0, 69335.0, 217390.0, 427108.0, 179346.0, 58820.0, 23607.0, 10763.0, 5069.0, 2495.0, 1349.0, 744.0, 407.0, 248.0, 144.0, 90.0, 45.0, 25.0, 28.0, 14.0, 10.0, 7.0, 4.0, 6.0, 4.0, 4.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0], "bins": [-5.25, -5.0928955078125, -4.935791015625, -4.7786865234375, -4.62158203125, -4.4644775390625, -4.307373046875, -4.1502685546875, -3.9931640625, -3.8360595703125, -3.678955078125, -3.5218505859375, -3.36474609375, -3.2076416015625, -3.050537109375, -2.8934326171875, -2.736328125, -2.5792236328125, -2.422119140625, -2.2650146484375, -2.10791015625, -1.9508056640625, -1.793701171875, -1.6365966796875, -1.4794921875, -1.3223876953125, -1.165283203125, -1.0081787109375, -0.85107421875, -0.6939697265625, -0.536865234375, -0.3797607421875, -0.22265625, -0.0655517578125, 0.091552734375, 0.2486572265625, 0.40576171875, 0.5628662109375, 0.719970703125, 0.8770751953125, 1.0341796875, 1.1912841796875, 1.348388671875, 1.5054931640625, 1.66259765625, 1.8197021484375, 1.976806640625, 2.1339111328125, 2.291015625, 2.4481201171875, 2.605224609375, 2.7623291015625, 2.91943359375, 3.0765380859375, 3.233642578125, 3.3907470703125, 3.5478515625, 3.7049560546875, 3.862060546875, 4.0191650390625, 4.17626953125, 4.3333740234375, 4.490478515625, 4.6475830078125, 4.8046875]}, "gradients/encoder.encoder.layers.12.attention.v_proj.bias": {"_type": "histogram", "values": [2.0, 4.0, 2.0, 3.0, 2.0, 6.0, 7.0, 3.0, 4.0, 1.0, 4.0, 9.0, 11.0, 12.0, 8.0, 15.0, 16.0, 15.0, 17.0, 22.0, 30.0, 28.0, 17.0, 26.0, 26.0, 23.0, 32.0, 33.0, 40.0, 28.0, 23.0, 33.0, 36.0, 39.0, 35.0, 39.0, 35.0, 21.0, 36.0, 34.0, 31.0, 22.0, 29.0, 22.0, 28.0, 12.0, 16.0, 13.0, 9.0, 6.0, 14.0, 9.0, 8.0, 5.0, 3.0, 4.0, 2.0, 3.0, 4.0, 2.0, 1.0, 0.0, 2.0, 2.0], "bins": [-5.48828125, -5.31353759765625, -5.1387939453125, -4.96405029296875, -4.789306640625, -4.61456298828125, -4.4398193359375, -4.26507568359375, -4.09033203125, -3.91558837890625, -3.7408447265625, -3.56610107421875, -3.391357421875, -3.21661376953125, -3.0418701171875, -2.86712646484375, -2.6923828125, -2.51763916015625, -2.3428955078125, -2.16815185546875, -1.993408203125, -1.81866455078125, -1.6439208984375, -1.46917724609375, -1.29443359375, -1.11968994140625, -0.9449462890625, -0.77020263671875, -0.595458984375, -0.42071533203125, -0.2459716796875, -0.07122802734375, 0.103515625, 0.27825927734375, 0.4530029296875, 0.62774658203125, 0.802490234375, 0.97723388671875, 1.1519775390625, 1.32672119140625, 1.50146484375, 1.67620849609375, 1.8509521484375, 2.02569580078125, 2.200439453125, 2.37518310546875, 2.5499267578125, 2.72467041015625, 2.8994140625, 3.07415771484375, 3.2489013671875, 3.42364501953125, 3.598388671875, 3.77313232421875, 3.9478759765625, 4.12261962890625, 4.29736328125, 4.47210693359375, 4.6468505859375, 4.82159423828125, 4.996337890625, 5.17108154296875, 5.3458251953125, 5.52056884765625, 5.6953125]}, "gradients/encoder.encoder.layers.12.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 2.0, 2.0, 1.0, 5.0, 6.0, 13.0, 13.0, 13.0, 13.0, 24.0, 43.0, 62.0, 83.0, 130.0, 231.0, 415.0, 888.0, 2210.0, 5578.0, 20763.0, 331854.0, 641496.0, 32503.0, 7322.0, 2590.0, 1151.0, 474.0, 245.0, 128.0, 87.0, 66.0, 41.0, 22.0, 32.0, 13.0, 12.0, 15.0, 11.0, 2.0, 5.0, 2.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.26171875, -5.10748291015625, -4.9532470703125, -4.79901123046875, -4.644775390625, -4.49053955078125, -4.3363037109375, -4.18206787109375, -4.02783203125, -3.87359619140625, -3.7193603515625, -3.56512451171875, -3.410888671875, -3.25665283203125, -3.1024169921875, -2.94818115234375, -2.7939453125, -2.63970947265625, -2.4854736328125, -2.33123779296875, -2.177001953125, -2.02276611328125, -1.8685302734375, -1.71429443359375, -1.56005859375, -1.40582275390625, -1.2515869140625, -1.09735107421875, -0.943115234375, -0.78887939453125, -0.6346435546875, -0.48040771484375, -0.326171875, -0.17193603515625, -0.0177001953125, 0.13653564453125, 0.290771484375, 0.44500732421875, 0.5992431640625, 0.75347900390625, 0.90771484375, 1.06195068359375, 1.2161865234375, 1.37042236328125, 1.524658203125, 1.67889404296875, 1.8331298828125, 1.98736572265625, 2.1416015625, 2.29583740234375, 2.4500732421875, 2.60430908203125, 2.758544921875, 2.91278076171875, 3.0670166015625, 3.22125244140625, 3.37548828125, 3.52972412109375, 3.6839599609375, 3.83819580078125, 3.992431640625, 4.14666748046875, 4.3009033203125, 4.45513916015625, 4.609375]}, "gradients/encoder.encoder.layers.12.attention.k_proj.bias": {"_type": "histogram", "values": [3.0, 0.0, 3.0, 2.0, 2.0, 5.0, 2.0, 4.0, 7.0, 8.0, 19.0, 25.0, 43.0, 62.0, 91.0, 129.0, 146.0, 141.0, 109.0, 64.0, 53.0, 34.0, 16.0, 16.0, 9.0, 6.0, 7.0, 2.0, 4.0, 2.0, 1.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0002887248992919922, -0.00027147307991981506, -0.00025422126054763794, -0.00023696944117546082, -0.0002197176218032837, -0.00020246580243110657, -0.00018521398305892944, -0.00016796216368675232, -0.0001507103443145752, -0.00013345852494239807, -0.00011620670557022095, -9.895488619804382e-05, -8.17030668258667e-05, -6.445124745368958e-05, -4.719942808151245e-05, -2.9947608709335327e-05, -1.2695789337158203e-05, 4.556030035018921e-06, 2.1807849407196045e-05, 3.905966877937317e-05, 5.631148815155029e-05, 7.356330752372742e-05, 9.081512689590454e-05, 0.00010806694626808167, 0.0001253187656402588, 0.0001425705850124359, 0.00015982240438461304, 0.00017707422375679016, 0.00019432604312896729, 0.0002115778625011444, 0.00022882968187332153, 0.00024608150124549866, 0.0002633333206176758, 0.0002805851399898529, 0.00029783695936203003, 0.00031508877873420715, 0.0003323405981063843, 0.0003495924174785614, 0.0003668442368507385, 0.00038409605622291565, 0.0004013478755950928, 0.0004185996949672699, 0.000435851514339447, 0.00045310333371162415, 0.00047035515308380127, 0.0004876069724559784, 0.0005048587918281555, 0.0005221106112003326, 0.0005393624305725098, 0.0005566142499446869, 0.000573866069316864, 0.0005911178886890411, 0.0006083697080612183, 0.0006256215274333954, 0.0006428733468055725, 0.0006601251661777496, 0.0006773769855499268, 0.0006946288049221039, 0.000711880624294281, 0.0007291324436664581, 0.0007463842630386353, 0.0007636360824108124, 0.0007808879017829895, 0.0007981397211551666, 0.0008153915405273438]}, "gradients/encoder.encoder.layers.12.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0, 5.0, 1.0, 2.0, 4.0, 3.0, 1.0, 12.0, 13.0, 26.0, 43.0, 69.0, 96.0, 163.0, 248.0, 514.0, 1047.0, 2647.0, 8925.0, 49735.0, 571826.0, 369546.0, 32806.0, 6723.0, 2091.0, 904.0, 459.0, 241.0, 152.0, 91.0, 58.0, 31.0, 36.0, 21.0, 10.0, 6.0, 6.0, 2.0, 1.0, 3.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-4.5234375, -4.404083251953125, -4.28472900390625, -4.165374755859375, -4.0460205078125, -3.926666259765625, -3.80731201171875, -3.687957763671875, -3.568603515625, -3.449249267578125, -3.32989501953125, -3.210540771484375, -3.0911865234375, -2.971832275390625, -2.85247802734375, -2.733123779296875, -2.61376953125, -2.494415283203125, -2.37506103515625, -2.255706787109375, -2.1363525390625, -2.016998291015625, -1.89764404296875, -1.778289794921875, -1.658935546875, -1.539581298828125, -1.42022705078125, -1.300872802734375, -1.1815185546875, -1.062164306640625, -0.94281005859375, -0.823455810546875, -0.7041015625, -0.584747314453125, -0.46539306640625, -0.346038818359375, -0.2266845703125, -0.107330322265625, 0.01202392578125, 0.131378173828125, 0.250732421875, 0.370086669921875, 0.48944091796875, 0.608795166015625, 0.7281494140625, 0.847503662109375, 0.96685791015625, 1.086212158203125, 1.20556640625, 1.324920654296875, 1.44427490234375, 1.563629150390625, 1.6829833984375, 1.802337646484375, 1.92169189453125, 2.041046142578125, 2.160400390625, 2.279754638671875, 2.39910888671875, 2.518463134765625, 2.6378173828125, 2.757171630859375, 2.87652587890625, 2.995880126953125, 3.115234375]}, "gradients/encoder.encoder.layers.12.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 6.0, 4.0, 7.0, 3.0, 6.0, 9.0, 18.0, 22.0, 41.0, 59.0, 83.0, 125.0, 164.0, 138.0, 107.0, 74.0, 45.0, 40.0, 19.0, 19.0, 7.0, 6.0, 4.0, 2.0, 0.0, 2.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.33984375, -4.18292236328125, -4.0260009765625, -3.86907958984375, -3.712158203125, -3.55523681640625, -3.3983154296875, -3.24139404296875, -3.08447265625, -2.92755126953125, -2.7706298828125, -2.61370849609375, -2.456787109375, -2.29986572265625, -2.1429443359375, -1.98602294921875, -1.8291015625, -1.67218017578125, -1.5152587890625, -1.35833740234375, -1.201416015625, -1.04449462890625, -0.8875732421875, -0.73065185546875, -0.57373046875, -0.41680908203125, -0.2598876953125, -0.10296630859375, 0.053955078125, 0.21087646484375, 0.3677978515625, 0.52471923828125, 0.681640625, 0.83856201171875, 0.9954833984375, 1.15240478515625, 1.309326171875, 1.46624755859375, 1.6231689453125, 1.78009033203125, 1.93701171875, 2.09393310546875, 2.2508544921875, 2.40777587890625, 2.564697265625, 2.72161865234375, 2.8785400390625, 3.03546142578125, 3.1923828125, 3.34930419921875, 3.5062255859375, 3.66314697265625, 3.820068359375, 3.97698974609375, 4.1339111328125, 4.29083251953125, 4.44775390625, 4.60467529296875, 4.7615966796875, 4.91851806640625, 5.075439453125, 5.23236083984375, 5.3892822265625, 5.54620361328125, 5.703125]}, "gradients/encoder.encoder.layers.12.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 2.0, 2.0, 3.0, 5.0, 14.0, 18.0, 34.0, 51.0, 124.0, 186.0, 194.0, 163.0, 116.0, 48.0, 30.0, 15.0, 7.0, 0.0, 1.0, 2.0, 1.0, 0.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-109.27366638183594, -106.86898040771484, -104.46428680419922, -102.05960083007812, -99.6549072265625, -97.2502212524414, -94.84553527832031, -92.44084167480469, -90.0361557006836, -87.6314697265625, -85.22677612304688, -82.82209014892578, -80.41740417480469, -78.01271057128906, -75.60802459716797, -73.20333862304688, -70.79864501953125, -68.39395904541016, -65.98926544189453, -63.58457946777344, -61.17988967895508, -58.77519989013672, -56.370513916015625, -53.965824127197266, -51.561134338378906, -49.15644454956055, -46.75175476074219, -44.347068786621094, -41.942378997802734, -39.537689208984375, -37.13300323486328, -34.72831344604492, -32.32361602783203, -29.918926239013672, -27.514238357543945, -25.10955047607422, -22.70486068725586, -20.3001708984375, -17.895483016967773, -15.490795135498047, -13.086105346679688, -10.681416511535645, -8.276727676391602, -5.872038841247559, -3.4673500061035156, -1.0626611709594727, 1.3420276641845703, 3.746715545654297, 6.151405334472656, 8.5560941696167, 10.960783004760742, 13.365471839904785, 15.770160675048828, 18.174850463867188, 20.579538345336914, 22.98422622680664, 25.388916015625, 27.79360580444336, 30.198293685913086, 32.60298156738281, 35.00767135620117, 37.41236114501953, 39.817047119140625, 42.221736907958984, 44.626426696777344]}, "gradients/encoder.encoder.layers.12.layer_norm.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 3.0, 4.0, 3.0, 10.0, 6.0, 7.0, 6.0, 12.0, 15.0, 19.0, 18.0, 18.0, 21.0, 34.0, 32.0, 41.0, 42.0, 41.0, 43.0, 49.0, 51.0, 55.0, 35.0, 34.0, 50.0, 45.0, 42.0, 36.0, 23.0, 38.0, 27.0, 29.0, 24.0, 22.0, 21.0, 18.0, 8.0, 9.0, 5.0, 6.0, 5.0, 3.0, 3.0, 2.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-35.57080078125, -34.32867431640625, -33.0865478515625, -31.84442138671875, -30.602294921875, -29.36016845703125, -28.118040084838867, -26.875913619995117, -25.633787155151367, -24.391660690307617, -23.149534225463867, -21.907407760620117, -20.665279388427734, -19.423152923583984, -18.181026458740234, -16.938899993896484, -15.696773529052734, -14.454647064208984, -13.212520599365234, -11.970393180847168, -10.728266716003418, -9.486140251159668, -8.244012832641602, -7.001886367797852, -5.759759902954102, -4.517633438110352, -3.2755064964294434, -2.0333797931671143, -0.7912530899047852, 0.45087337493896484, 1.693000316619873, 2.9351272583007812, 4.177253723144531, 5.419380187988281, 6.6615071296691895, 7.903634071350098, 9.145760536193848, 10.387887001037598, 11.630014419555664, 12.872140884399414, 14.114267349243164, 15.356393814086914, 16.598520278930664, 17.840646743774414, 19.082775115966797, 20.324901580810547, 21.567028045654297, 22.809154510498047, 24.051280975341797, 25.293407440185547, 26.535533905029297, 27.777660369873047, 29.019786834716797, 30.261913299560547, 31.50404167175293, 32.74617004394531, 33.98829650878906, 35.23042297363281, 36.47254943847656, 37.71467590332031, 38.95680236816406, 40.19892883300781, 41.44105529785156, 42.68318176269531, 43.92530822753906]}, "gradients/encoder.encoder.layers.11.feed_forward.output_dense.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 3.0, 0.0, 2.0, 1.0, 2.0, 2.0, 3.0, 5.0, 8.0, 10.0, 18.0, 28.0, 24.0, 51.0, 73.0, 119.0, 210.0, 428.0, 1135.0, 4110.0, 27267.0, 4121060.0, 32772.0, 4646.0, 1286.0, 458.0, 208.0, 115.0, 76.0, 49.0, 39.0, 17.0, 27.0, 14.0, 9.0, 9.0, 3.0, 3.0, 2.0, 0.0, 2.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-10.8671875, -10.4725341796875, -10.077880859375, -9.6832275390625, -9.28857421875, -8.8939208984375, -8.499267578125, -8.1046142578125, -7.7099609375, -7.3153076171875, -6.920654296875, -6.5260009765625, -6.13134765625, -5.7366943359375, -5.342041015625, -4.9473876953125, -4.552734375, -4.1580810546875, -3.763427734375, -3.3687744140625, -2.97412109375, -2.5794677734375, -2.184814453125, -1.7901611328125, -1.3955078125, -1.0008544921875, -0.606201171875, -0.2115478515625, 0.18310546875, 0.5777587890625, 0.972412109375, 1.3670654296875, 1.76171875, 2.1563720703125, 2.551025390625, 2.9456787109375, 3.34033203125, 3.7349853515625, 4.129638671875, 4.5242919921875, 4.9189453125, 5.3135986328125, 5.708251953125, 6.1029052734375, 6.49755859375, 6.8922119140625, 7.286865234375, 7.6815185546875, 8.076171875, 8.4708251953125, 8.865478515625, 9.2601318359375, 9.65478515625, 10.0494384765625, 10.444091796875, 10.8387451171875, 11.2333984375, 11.6280517578125, 12.022705078125, 12.4173583984375, 12.81201171875, 13.2066650390625, 13.601318359375, 13.9959716796875, 14.390625]}, "gradients/encoder.encoder.layers.11.feed_forward.output_dense.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 4.0, 3.0, 2.0, 3.0, 0.0, 8.0, 7.0, 11.0, 12.0, 27.0, 23.0, 25.0, 35.0, 42.0, 48.0, 55.0, 55.0, 71.0, 81.0, 82.0, 60.0, 72.0, 52.0, 44.0, 36.0, 35.0, 31.0, 27.0, 12.0, 12.0, 13.0, 11.0, 7.0, 2.0, 0.0, 6.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.65625, -1.5924072265625, -1.528564453125, -1.4647216796875, -1.40087890625, -1.3370361328125, -1.273193359375, -1.2093505859375, -1.1455078125, -1.0816650390625, -1.017822265625, -0.9539794921875, -0.89013671875, -0.8262939453125, -0.762451171875, -0.6986083984375, -0.634765625, -0.5709228515625, -0.507080078125, -0.4432373046875, -0.37939453125, -0.3155517578125, -0.251708984375, -0.1878662109375, -0.1240234375, -0.0601806640625, 0.003662109375, 0.0675048828125, 0.13134765625, 0.1951904296875, 0.259033203125, 0.3228759765625, 0.38671875, 0.4505615234375, 0.514404296875, 0.5782470703125, 0.64208984375, 0.7059326171875, 0.769775390625, 0.8336181640625, 0.8974609375, 0.9613037109375, 1.025146484375, 1.0889892578125, 1.15283203125, 1.2166748046875, 1.280517578125, 1.3443603515625, 1.408203125, 1.4720458984375, 1.535888671875, 1.5997314453125, 1.66357421875, 1.7274169921875, 1.791259765625, 1.8551025390625, 1.9189453125, 1.9827880859375, 2.046630859375, 2.1104736328125, 2.17431640625, 2.2381591796875, 2.302001953125, 2.3658447265625, 2.4296875]}, "gradients/encoder.encoder.layers.11.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 4.0, 0.0, 0.0, 0.0, 1.0, 5.0, 4.0, 3.0, 2.0, 12.0, 8.0, 7.0, 18.0, 12.0, 21.0, 26.0, 37.0, 47.0, 71.0, 83.0, 116.0, 189.0, 339.0, 514.0, 973.0, 1932.0, 4086.0, 9923.0, 36767.0, 4029092.0, 84321.0, 14509.0, 5525.0, 2557.0, 1253.0, 661.0, 411.0, 257.0, 153.0, 78.0, 67.0, 40.0, 42.0, 29.0, 32.0, 20.0, 9.0, 10.0, 7.0, 7.0, 2.0, 6.0, 1.0, 3.0, 4.0, 1.0, 2.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-9.3203125, -9.0164794921875, -8.712646484375, -8.4088134765625, -8.10498046875, -7.8011474609375, -7.497314453125, -7.1934814453125, -6.8896484375, -6.5858154296875, -6.281982421875, -5.9781494140625, -5.67431640625, -5.3704833984375, -5.066650390625, -4.7628173828125, -4.458984375, -4.1551513671875, -3.851318359375, -3.5474853515625, -3.24365234375, -2.9398193359375, -2.635986328125, -2.3321533203125, -2.0283203125, -1.7244873046875, -1.420654296875, -1.1168212890625, -0.81298828125, -0.5091552734375, -0.205322265625, 0.0985107421875, 0.40234375, 0.7061767578125, 1.010009765625, 1.3138427734375, 1.61767578125, 1.9215087890625, 2.225341796875, 2.5291748046875, 2.8330078125, 3.1368408203125, 3.440673828125, 3.7445068359375, 4.04833984375, 4.3521728515625, 4.656005859375, 4.9598388671875, 5.263671875, 5.5675048828125, 5.871337890625, 6.1751708984375, 6.47900390625, 6.7828369140625, 7.086669921875, 7.3905029296875, 7.6943359375, 7.9981689453125, 8.302001953125, 8.6058349609375, 8.90966796875, 9.2135009765625, 9.517333984375, 9.8211669921875, 10.125]}, "gradients/encoder.encoder.layers.11.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 3.0, 1.0, 1.0, 4.0, 5.0, 7.0, 8.0, 11.0, 18.0, 55.0, 244.0, 3573.0, 79.0, 28.0, 11.0, 5.0, 9.0, 11.0, 4.0, 2.0, 2.0, 2.0, 2.0, 3.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.109375, -3.0235595703125, -2.937744140625, -2.8519287109375, -2.76611328125, -2.6802978515625, -2.594482421875, -2.5086669921875, -2.4228515625, -2.3370361328125, -2.251220703125, -2.1654052734375, -2.07958984375, -1.9937744140625, -1.907958984375, -1.8221435546875, -1.736328125, -1.6505126953125, -1.564697265625, -1.4788818359375, -1.39306640625, -1.3072509765625, -1.221435546875, -1.1356201171875, -1.0498046875, -0.9639892578125, -0.878173828125, -0.7923583984375, -0.70654296875, -0.6207275390625, -0.534912109375, -0.4490966796875, -0.36328125, -0.2774658203125, -0.191650390625, -0.1058349609375, -0.02001953125, 0.0657958984375, 0.151611328125, 0.2374267578125, 0.3232421875, 0.4090576171875, 0.494873046875, 0.5806884765625, 0.66650390625, 0.7523193359375, 0.838134765625, 0.9239501953125, 1.009765625, 1.0955810546875, 1.181396484375, 1.2672119140625, 1.35302734375, 1.4388427734375, 1.524658203125, 1.6104736328125, 1.6962890625, 1.7821044921875, 1.867919921875, 1.9537353515625, 2.03955078125, 2.1253662109375, 2.211181640625, 2.2969970703125, 2.3828125]}, "gradients/encoder.encoder.layers.11.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 2.0, 7.0, 16.0, 13.0, 25.0, 58.0, 93.0, 142.0, 178.0, 184.0, 105.0, 73.0, 48.0, 29.0, 17.0, 11.0, 6.0, 2.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.679746627807617, -6.394729137420654, -6.109711647033691, -5.824694633483887, -5.539677143096924, -5.254659652709961, -4.969642639160156, -4.684625148773193, -4.3996076583862305, -4.114590167999268, -3.829572916030884, -3.5445556640625, -3.259538173675537, -2.974520683288574, -2.6895034313201904, -2.4044861793518066, -2.1194686889648438, -1.8344513177871704, -1.549433946609497, -1.2644165754318237, -0.9793992042541504, -0.694381833076477, -0.4093644618988037, -0.12434709072113037, 0.16067028045654297, 0.4456876516342163, 0.7307050228118896, 1.015722393989563, 1.3007397651672363, 1.5857571363449097, 1.870774507522583, 2.155791759490967, 2.4408092498779297, 2.7258267402648926, 3.0108439922332764, 3.29586124420166, 3.580878734588623, 3.865896224975586, 4.150913238525391, 4.4359307289123535, 4.720948219299316, 5.005965709686279, 5.290983200073242, 5.576000213623047, 5.86101770401001, 6.146035194396973, 6.431052207946777, 6.71606969833374, 7.001087188720703, 7.286104679107666, 7.571122169494629, 7.856139183044434, 8.141157150268555, 8.42617416381836, 8.711191177368164, 8.996209144592285, 9.28122615814209, 9.566243171691895, 9.851261138916016, 10.13627815246582, 10.421295166015625, 10.706313133239746, 10.99133014678955, 11.276348114013672, 11.561365127563477]}, "gradients/encoder.encoder.layers.11.final_layer_norm.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 4.0, 4.0, 6.0, 7.0, 13.0, 14.0, 11.0, 18.0, 16.0, 14.0, 17.0, 22.0, 22.0, 35.0, 40.0, 39.0, 41.0, 42.0, 41.0, 48.0, 49.0, 44.0, 36.0, 55.0, 39.0, 34.0, 36.0, 39.0, 44.0, 28.0, 25.0, 33.0, 20.0, 12.0, 13.0, 14.0, 6.0, 6.0, 5.0, 4.0, 3.0, 1.0, 4.0, 0.0, 3.0, 6.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-3.9291679859161377, -3.815011978149414, -3.7008562088012695, -3.586700201034546, -3.4725444316864014, -3.3583884239196777, -3.244232654571533, -3.1300766468048096, -3.015920639038086, -2.9017646312713623, -2.7876088619232178, -2.673452854156494, -2.5592970848083496, -2.445141077041626, -2.3309850692749023, -2.216829299926758, -2.1026735305786133, -1.9885176420211792, -1.8743617534637451, -1.7602057456970215, -1.6460498571395874, -1.5318939685821533, -1.4177380800247192, -1.3035821914672852, -1.1894261837005615, -1.0752702951431274, -0.9611143469810486, -0.8469584584236145, -0.7328025102615356, -0.6186466217041016, -0.5044907331466675, -0.3903347849845886, -0.27617883682250977, -0.1620229184627533, -0.04786701500415802, 0.06628888845443726, 0.18044480681419373, 0.2946007251739502, 0.4087566137313843, 0.5229125618934631, 0.6370684504508972, 0.7512243390083313, 0.8653802871704102, 0.9795361757278442, 1.0936920642852783, 1.207848072052002, 1.3220038414001465, 1.4361598491668701, 1.5503157377243042, 1.6644716262817383, 1.7786275148391724, 1.8927834033966064, 2.00693941116333, 2.1210951805114746, 2.2352511882781982, 2.349407196044922, 2.4635629653930664, 2.57771897315979, 2.6918747425079346, 2.806030750274658, 2.9201865196228027, 3.0343425273895264, 3.14849853515625, 3.2626543045043945, 3.376810312271118]}, "gradients/encoder.encoder.layers.11.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 5.0, 1.0, 4.0, 10.0, 12.0, 14.0, 15.0, 17.0, 34.0, 34.0, 62.0, 96.0, 128.0, 233.0, 378.0, 650.0, 1186.0, 2300.0, 4967.0, 11641.0, 32929.0, 109572.0, 378928.0, 353937.0, 100261.0, 30325.0, 11107.0, 4716.0, 2224.0, 1102.0, 635.0, 393.0, 248.0, 142.0, 84.0, 47.0, 48.0, 23.0, 14.0, 15.0, 12.0, 3.0, 7.0, 2.0, 3.0, 3.0, 2.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.84375, -6.62274169921875, -6.4017333984375, -6.18072509765625, -5.959716796875, -5.73870849609375, -5.5177001953125, -5.29669189453125, -5.07568359375, -4.85467529296875, -4.6336669921875, -4.41265869140625, -4.191650390625, -3.97064208984375, -3.7496337890625, -3.52862548828125, -3.3076171875, -3.08660888671875, -2.8656005859375, -2.64459228515625, -2.423583984375, -2.20257568359375, -1.9815673828125, -1.76055908203125, -1.53955078125, -1.31854248046875, -1.0975341796875, -0.87652587890625, -0.655517578125, -0.43450927734375, -0.2135009765625, 0.00750732421875, 0.228515625, 0.44952392578125, 0.6705322265625, 0.89154052734375, 1.112548828125, 1.33355712890625, 1.5545654296875, 1.77557373046875, 1.99658203125, 2.21759033203125, 2.4385986328125, 2.65960693359375, 2.880615234375, 3.10162353515625, 3.3226318359375, 3.54364013671875, 3.7646484375, 3.98565673828125, 4.2066650390625, 4.42767333984375, 4.648681640625, 4.86968994140625, 5.0906982421875, 5.31170654296875, 5.53271484375, 5.75372314453125, 5.9747314453125, 6.19573974609375, 6.416748046875, 6.63775634765625, 6.8587646484375, 7.07977294921875, 7.30078125]}, "gradients/encoder.encoder.layers.11.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 2.0, 2.0, 2.0, 1.0, 2.0, 6.0, 8.0, 7.0, 11.0, 20.0, 16.0, 25.0, 28.0, 29.0, 35.0, 42.0, 55.0, 39.0, 69.0, 64.0, 62.0, 57.0, 62.0, 63.0, 52.0, 52.0, 33.0, 32.0, 28.0, 16.0, 26.0, 21.0, 15.0, 10.0, 3.0, 7.0, 6.0, 1.0, 3.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.6806640625, -1.6200408935546875, -1.559417724609375, -1.4987945556640625, -1.43817138671875, -1.3775482177734375, -1.316925048828125, -1.2563018798828125, -1.1956787109375, -1.1350555419921875, -1.074432373046875, -1.0138092041015625, -0.95318603515625, -0.8925628662109375, -0.831939697265625, -0.7713165283203125, -0.710693359375, -0.6500701904296875, -0.589447021484375, -0.5288238525390625, -0.46820068359375, -0.4075775146484375, -0.346954345703125, -0.2863311767578125, -0.2257080078125, -0.1650848388671875, -0.104461669921875, -0.0438385009765625, 0.01678466796875, 0.0774078369140625, 0.138031005859375, 0.1986541748046875, 0.25927734375, 0.3199005126953125, 0.380523681640625, 0.4411468505859375, 0.50177001953125, 0.5623931884765625, 0.623016357421875, 0.6836395263671875, 0.7442626953125, 0.8048858642578125, 0.865509033203125, 0.9261322021484375, 0.98675537109375, 1.0473785400390625, 1.108001708984375, 1.1686248779296875, 1.229248046875, 1.2898712158203125, 1.350494384765625, 1.4111175537109375, 1.47174072265625, 1.5323638916015625, 1.592987060546875, 1.6536102294921875, 1.7142333984375, 1.7748565673828125, 1.835479736328125, 1.8961029052734375, 1.95672607421875, 2.0173492431640625, 2.077972412109375, 2.1385955810546875, 2.19921875]}, "gradients/encoder.encoder.layers.11.attention.v_proj.weight": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 1.0, 1.0, 2.0, 3.0, 2.0, 6.0, 6.0, 4.0, 7.0, 10.0, 15.0, 20.0, 36.0, 32.0, 74.0, 122.0, 201.0, 335.0, 621.0, 1256.0, 2636.0, 6998.0, 23272.0, 113216.0, 567484.0, 265546.0, 46725.0, 12095.0, 4176.0, 1751.0, 819.0, 421.0, 241.0, 143.0, 89.0, 61.0, 46.0, 29.0, 19.0, 14.0, 6.0, 6.0, 8.0, 4.0, 4.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0], "bins": [-7.66015625, -7.38397216796875, -7.1077880859375, -6.83160400390625, -6.555419921875, -6.27923583984375, -6.0030517578125, -5.72686767578125, -5.45068359375, -5.17449951171875, -4.8983154296875, -4.62213134765625, -4.345947265625, -4.06976318359375, -3.7935791015625, -3.51739501953125, -3.2412109375, -2.96502685546875, -2.6888427734375, -2.41265869140625, -2.136474609375, -1.86029052734375, -1.5841064453125, -1.30792236328125, -1.03173828125, -0.75555419921875, -0.4793701171875, -0.20318603515625, 0.072998046875, 0.34918212890625, 0.6253662109375, 0.90155029296875, 1.177734375, 1.45391845703125, 1.7301025390625, 2.00628662109375, 2.282470703125, 2.55865478515625, 2.8348388671875, 3.11102294921875, 3.38720703125, 3.66339111328125, 3.9395751953125, 4.21575927734375, 4.491943359375, 4.76812744140625, 5.0443115234375, 5.32049560546875, 5.5966796875, 5.87286376953125, 6.1490478515625, 6.42523193359375, 6.701416015625, 6.97760009765625, 7.2537841796875, 7.52996826171875, 7.80615234375, 8.08233642578125, 8.3585205078125, 8.63470458984375, 8.910888671875, 9.18707275390625, 9.4632568359375, 9.73944091796875, 10.015625]}, "gradients/encoder.encoder.layers.11.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 4.0, 2.0, 2.0, 2.0, 1.0, 6.0, 6.0, 8.0, 14.0, 8.0, 16.0, 17.0, 19.0, 15.0, 17.0, 26.0, 25.0, 47.0, 35.0, 42.0, 53.0, 42.0, 62.0, 43.0, 69.0, 38.0, 44.0, 44.0, 33.0, 37.0, 32.0, 35.0, 28.0, 33.0, 22.0, 19.0, 13.0, 10.0, 6.0, 8.0, 8.0, 8.0, 5.0, 6.0, 3.0, 3.0, 0.0, 0.0, 2.0, 2.0, 0.0, 1.0, 1.0], "bins": [-10.7734375, -10.467041015625, -10.16064453125, -9.854248046875, -9.5478515625, -9.241455078125, -8.93505859375, -8.628662109375, -8.322265625, -8.015869140625, -7.70947265625, -7.403076171875, -7.0966796875, -6.790283203125, -6.48388671875, -6.177490234375, -5.87109375, -5.564697265625, -5.25830078125, -4.951904296875, -4.6455078125, -4.339111328125, -4.03271484375, -3.726318359375, -3.419921875, -3.113525390625, -2.80712890625, -2.500732421875, -2.1943359375, -1.887939453125, -1.58154296875, -1.275146484375, -0.96875, -0.662353515625, -0.35595703125, -0.049560546875, 0.2568359375, 0.563232421875, 0.86962890625, 1.176025390625, 1.482421875, 1.788818359375, 2.09521484375, 2.401611328125, 2.7080078125, 3.014404296875, 3.32080078125, 3.627197265625, 3.93359375, 4.239990234375, 4.54638671875, 4.852783203125, 5.1591796875, 5.465576171875, 5.77197265625, 6.078369140625, 6.384765625, 6.691162109375, 6.99755859375, 7.303955078125, 7.6103515625, 7.916748046875, 8.22314453125, 8.529541015625, 8.8359375]}, "gradients/encoder.encoder.layers.11.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 1.0, 3.0, 1.0, 4.0, 1.0, 1.0, 3.0, 2.0, 4.0, 13.0, 16.0, 17.0, 18.0, 26.0, 46.0, 68.0, 91.0, 141.0, 263.0, 411.0, 897.0, 2363.0, 8208.0, 68047.0, 908824.0, 48209.0, 7041.0, 1992.0, 803.0, 382.0, 232.0, 148.0, 76.0, 64.0, 31.0, 23.0, 27.0, 18.0, 12.0, 8.0, 6.0, 8.0, 5.0, 2.0, 4.0, 5.0, 1.0, 1.0, 0.0, 0.0, 0.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-9.1796875, -8.85693359375, -8.5341796875, -8.21142578125, -7.888671875, -7.56591796875, -7.2431640625, -6.92041015625, -6.59765625, -6.27490234375, -5.9521484375, -5.62939453125, -5.306640625, -4.98388671875, -4.6611328125, -4.33837890625, -4.015625, -3.69287109375, -3.3701171875, -3.04736328125, -2.724609375, -2.40185546875, -2.0791015625, -1.75634765625, -1.43359375, -1.11083984375, -0.7880859375, -0.46533203125, -0.142578125, 0.18017578125, 0.5029296875, 0.82568359375, 1.1484375, 1.47119140625, 1.7939453125, 2.11669921875, 2.439453125, 2.76220703125, 3.0849609375, 3.40771484375, 3.73046875, 4.05322265625, 4.3759765625, 4.69873046875, 5.021484375, 5.34423828125, 5.6669921875, 5.98974609375, 6.3125, 6.63525390625, 6.9580078125, 7.28076171875, 7.603515625, 7.92626953125, 8.2490234375, 8.57177734375, 8.89453125, 9.21728515625, 9.5400390625, 9.86279296875, 10.185546875, 10.50830078125, 10.8310546875, 11.15380859375, 11.4765625]}, "gradients/encoder.encoder.layers.11.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 3.0, 2.0, 3.0, 2.0, 0.0, 3.0, 2.0, 6.0, 9.0, 19.0, 16.0, 19.0, 37.0, 54.0, 116.0, 166.0, 143.0, 153.0, 98.0, 49.0, 36.0, 30.0, 12.0, 9.0, 11.0, 3.0, 3.0, 4.0, 1.0, 1.0, 1.0, 2.0, 0.0, 2.0, 1.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0008754730224609375, -0.0008492767810821533, -0.0008230805397033691, -0.000796884298324585, -0.0007706880569458008, -0.0007444918155670166, -0.0007182955741882324, -0.0006920993328094482, -0.0006659030914306641, -0.0006397068500518799, -0.0006135106086730957, -0.0005873143672943115, -0.0005611181259155273, -0.0005349218845367432, -0.000508725643157959, -0.0004825294017791748, -0.0004563331604003906, -0.00043013691902160645, -0.00040394067764282227, -0.0003777444362640381, -0.0003515481948852539, -0.0003253519535064697, -0.00029915571212768555, -0.00027295947074890137, -0.0002467632293701172, -0.000220566987991333, -0.00019437074661254883, -0.00016817450523376465, -0.00014197826385498047, -0.00011578202247619629, -8.958578109741211e-05, -6.338953971862793e-05, -3.719329833984375e-05, -1.099705696105957e-05, 1.519918441772461e-05, 4.139542579650879e-05, 6.759166717529297e-05, 9.378790855407715e-05, 0.00011998414993286133, 0.0001461803913116455, 0.0001723766326904297, 0.00019857287406921387, 0.00022476911544799805, 0.0002509653568267822, 0.0002771615982055664, 0.0003033578395843506, 0.00032955408096313477, 0.00035575032234191895, 0.0003819465637207031, 0.0004081428050994873, 0.0004343390464782715, 0.00046053528785705566, 0.00048673152923583984, 0.000512927770614624, 0.0005391240119934082, 0.0005653202533721924, 0.0005915164947509766, 0.0006177127361297607, 0.0006439089775085449, 0.0006701052188873291, 0.0006963014602661133, 0.0007224977016448975, 0.0007486939430236816, 0.0007748901844024658, 0.00080108642578125]}, "gradients/encoder.encoder.layers.11.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 3.0, 0.0, 3.0, 0.0, 1.0, 3.0, 2.0, 4.0, 9.0, 16.0, 13.0, 27.0, 43.0, 74.0, 166.0, 380.0, 1150.0, 4852.0, 37598.0, 906897.0, 87126.0, 7718.0, 1580.0, 499.0, 171.0, 88.0, 47.0, 29.0, 14.0, 20.0, 11.0, 12.0, 3.0, 2.0, 4.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0], "bins": [-11.4375, -11.133544921875, -10.82958984375, -10.525634765625, -10.2216796875, -9.917724609375, -9.61376953125, -9.309814453125, -9.005859375, -8.701904296875, -8.39794921875, -8.093994140625, -7.7900390625, -7.486083984375, -7.18212890625, -6.878173828125, -6.57421875, -6.270263671875, -5.96630859375, -5.662353515625, -5.3583984375, -5.054443359375, -4.75048828125, -4.446533203125, -4.142578125, -3.838623046875, -3.53466796875, -3.230712890625, -2.9267578125, -2.622802734375, -2.31884765625, -2.014892578125, -1.7109375, -1.406982421875, -1.10302734375, -0.799072265625, -0.4951171875, -0.191162109375, 0.11279296875, 0.416748046875, 0.720703125, 1.024658203125, 1.32861328125, 1.632568359375, 1.9365234375, 2.240478515625, 2.54443359375, 2.848388671875, 3.15234375, 3.456298828125, 3.76025390625, 4.064208984375, 4.3681640625, 4.672119140625, 4.97607421875, 5.280029296875, 5.583984375, 5.887939453125, 6.19189453125, 6.495849609375, 6.7998046875, 7.103759765625, 7.40771484375, 7.711669921875, 8.015625]}, "gradients/encoder.encoder.layers.11.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 2.0, 1.0, 0.0, 0.0, 2.0, 2.0, 3.0, 0.0, 2.0, 7.0, 2.0, 9.0, 10.0, 10.0, 9.0, 16.0, 38.0, 57.0, 83.0, 128.0, 187.0, 167.0, 92.0, 61.0, 38.0, 22.0, 19.0, 12.0, 8.0, 5.0, 5.0, 2.0, 5.0, 2.0, 4.0, 2.0, 1.0, 0.0, 0.0, 2.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-7.734375, -7.4820556640625, -7.229736328125, -6.9774169921875, -6.72509765625, -6.4727783203125, -6.220458984375, -5.9681396484375, -5.7158203125, -5.4635009765625, -5.211181640625, -4.9588623046875, -4.70654296875, -4.4542236328125, -4.201904296875, -3.9495849609375, -3.697265625, -3.4449462890625, -3.192626953125, -2.9403076171875, -2.68798828125, -2.4356689453125, -2.183349609375, -1.9310302734375, -1.6787109375, -1.4263916015625, -1.174072265625, -0.9217529296875, -0.66943359375, -0.4171142578125, -0.164794921875, 0.0875244140625, 0.33984375, 0.5921630859375, 0.844482421875, 1.0968017578125, 1.34912109375, 1.6014404296875, 1.853759765625, 2.1060791015625, 2.3583984375, 2.6107177734375, 2.863037109375, 3.1153564453125, 3.36767578125, 3.6199951171875, 3.872314453125, 4.1246337890625, 4.376953125, 4.6292724609375, 4.881591796875, 5.1339111328125, 5.38623046875, 5.6385498046875, 5.890869140625, 6.1431884765625, 6.3955078125, 6.6478271484375, 6.900146484375, 7.1524658203125, 7.40478515625, 7.6571044921875, 7.909423828125, 8.1617431640625, 8.4140625]}, "gradients/encoder.encoder.layers.11.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 3.0, 5.0, 1.0, 7.0, 13.0, 22.0, 53.0, 77.0, 128.0, 191.0, 190.0, 149.0, 74.0, 54.0, 27.0, 9.0, 5.0, 1.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 2.0], "bins": [-175.62242126464844, -171.98858642578125, -168.35476684570312, -164.720947265625, -161.0871124267578, -157.45327758789062, -153.8194580078125, -150.18563842773438, -146.5518035888672, -142.91796875, -139.28414916992188, -135.65032958984375, -132.01649475097656, -128.38265991210938, -124.74884033203125, -121.1150131225586, -117.48118591308594, -113.84735870361328, -110.21353149414062, -106.57970428466797, -102.94587707519531, -99.31204986572266, -95.67822265625, -92.04439544677734, -88.41056823730469, -84.77674102783203, -81.14291381835938, -77.50908660888672, -73.87525939941406, -70.2414321899414, -66.60760498046875, -62.973777770996094, -59.33995819091797, -55.70613098144531, -52.072303771972656, -48.4384765625, -44.804649353027344, -41.17082214355469, -37.53699493408203, -33.903167724609375, -30.26934051513672, -26.635513305664062, -23.001686096191406, -19.36785888671875, -15.734031677246094, -12.100204467773438, -8.466377258300781, -4.832550048828125, -1.1987228393554688, 2.4351043701171875, 6.068931579589844, 9.7027587890625, 13.336585998535156, 16.970413208007812, 20.60424041748047, 24.238067626953125, 27.87189483642578, 31.505722045898438, 35.139549255371094, 38.77337646484375, 42.407203674316406, 46.04103088378906, 49.67485809326172, 53.308685302734375, 56.94251251220703]}, "gradients/encoder.encoder.layers.11.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 3.0, 1.0, 4.0, 3.0, 3.0, 8.0, 9.0, 17.0, 17.0, 9.0, 17.0, 20.0, 31.0, 29.0, 44.0, 50.0, 31.0, 39.0, 41.0, 37.0, 48.0, 54.0, 55.0, 49.0, 50.0, 50.0, 36.0, 47.0, 29.0, 26.0, 21.0, 19.0, 18.0, 15.0, 21.0, 14.0, 12.0, 8.0, 4.0, 9.0, 4.0, 2.0, 4.0, 2.0, 3.0, 1.0, 0.0, 2.0, 2.0, 1.0, 1.0], "bins": [-55.95832061767578, -54.347618103027344, -52.73691177368164, -51.1262092590332, -49.515506744384766, -47.90480041503906, -46.294097900390625, -44.68339538574219, -43.07269287109375, -41.46199035644531, -39.85128402709961, -38.24058151245117, -36.629878997802734, -35.01917266845703, -33.408470153808594, -31.797767639160156, -30.187061309814453, -28.576356887817383, -26.965654373168945, -25.354949951171875, -23.744247436523438, -22.133543014526367, -20.522838592529297, -18.91213607788086, -17.30143165588379, -15.690728187561035, -14.080024719238281, -12.469320297241211, -10.858616828918457, -9.247913360595703, -7.637208938598633, -6.026505470275879, -4.415802001953125, -2.805098295211792, -1.194394588470459, 0.4163093566894531, 2.027012825012207, 3.637716293334961, 5.248420715332031, 6.859124183654785, 8.469827651977539, 10.080531120300293, 11.691234588623047, 13.301939010620117, 14.912642478942871, 16.523345947265625, 18.134050369262695, 19.744754791259766, 21.355457305908203, 22.966161727905273, 24.57686424255371, 26.18756866455078, 27.79827117919922, 29.40897560119629, 31.01968002319336, 32.6303825378418, 34.2410888671875, 35.85179138183594, 37.46249771118164, 39.07320022583008, 40.683902740478516, 42.29460906982422, 43.905311584472656, 45.516014099121094, 47.12671661376953]}, "gradients/encoder.encoder.layers.10.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 3.0, 4.0, 7.0, 5.0, 10.0, 12.0, 24.0, 37.0, 50.0, 65.0, 129.0, 273.0, 649.0, 1698.0, 7217.0, 106455.0, 4058249.0, 14860.0, 2726.0, 936.0, 386.0, 199.0, 114.0, 56.0, 43.0, 19.0, 24.0, 14.0, 7.0, 5.0, 3.0, 4.0, 2.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 3.0, 1.0, 0.0, 0.0, 1.0], "bins": [-22.6875, -22.042724609375, -21.39794921875, -20.753173828125, -20.1083984375, -19.463623046875, -18.81884765625, -18.174072265625, -17.529296875, -16.884521484375, -16.23974609375, -15.594970703125, -14.9501953125, -14.305419921875, -13.66064453125, -13.015869140625, -12.37109375, -11.726318359375, -11.08154296875, -10.436767578125, -9.7919921875, -9.147216796875, -8.50244140625, -7.857666015625, -7.212890625, -6.568115234375, -5.92333984375, -5.278564453125, -4.6337890625, -3.989013671875, -3.34423828125, -2.699462890625, -2.0546875, -1.409912109375, -0.76513671875, -0.120361328125, 0.5244140625, 1.169189453125, 1.81396484375, 2.458740234375, 3.103515625, 3.748291015625, 4.39306640625, 5.037841796875, 5.6826171875, 6.327392578125, 6.97216796875, 7.616943359375, 8.26171875, 8.906494140625, 9.55126953125, 10.196044921875, 10.8408203125, 11.485595703125, 12.13037109375, 12.775146484375, 13.419921875, 14.064697265625, 14.70947265625, 15.354248046875, 15.9990234375, 16.643798828125, 17.28857421875, 17.933349609375, 18.578125]}, "gradients/encoder.encoder.layers.10.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 1.0, 1.0, 1.0, 2.0, 3.0, 5.0, 7.0, 9.0, 5.0, 8.0, 26.0, 19.0, 39.0, 38.0, 57.0, 44.0, 55.0, 63.0, 77.0, 68.0, 69.0, 84.0, 73.0, 46.0, 44.0, 30.0, 37.0, 25.0, 21.0, 12.0, 13.0, 15.0, 8.0, 3.0, 2.0, 1.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.302734375, -2.22491455078125, -2.1470947265625, -2.06927490234375, -1.991455078125, -1.91363525390625, -1.8358154296875, -1.75799560546875, -1.68017578125, -1.60235595703125, -1.5245361328125, -1.44671630859375, -1.368896484375, -1.29107666015625, -1.2132568359375, -1.13543701171875, -1.0576171875, -0.97979736328125, -0.9019775390625, -0.82415771484375, -0.746337890625, -0.66851806640625, -0.5906982421875, -0.51287841796875, -0.43505859375, -0.35723876953125, -0.2794189453125, -0.20159912109375, -0.123779296875, -0.04595947265625, 0.0318603515625, 0.10968017578125, 0.1875, 0.26531982421875, 0.3431396484375, 0.42095947265625, 0.498779296875, 0.57659912109375, 0.6544189453125, 0.73223876953125, 0.81005859375, 0.88787841796875, 0.9656982421875, 1.04351806640625, 1.121337890625, 1.19915771484375, 1.2769775390625, 1.35479736328125, 1.4326171875, 1.51043701171875, 1.5882568359375, 1.66607666015625, 1.743896484375, 1.82171630859375, 1.8995361328125, 1.97735595703125, 2.05517578125, 2.13299560546875, 2.2108154296875, 2.28863525390625, 2.366455078125, 2.44427490234375, 2.5220947265625, 2.59991455078125, 2.677734375]}, "gradients/encoder.encoder.layers.10.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 2.0, 1.0, 1.0, 0.0, 6.0, 1.0, 3.0, 7.0, 8.0, 13.0, 13.0, 24.0, 28.0, 34.0, 67.0, 98.0, 139.0, 211.0, 326.0, 479.0, 760.0, 1184.0, 1891.0, 3239.0, 5968.0, 13242.0, 44685.0, 3766883.0, 300570.0, 30892.0, 10735.0, 5134.0, 2713.0, 1709.0, 1077.0, 721.0, 502.0, 309.0, 201.0, 138.0, 82.0, 56.0, 43.0, 26.0, 19.0, 20.0, 11.0, 4.0, 8.0, 4.0, 4.0, 4.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-16.9375, -16.4375, -15.9375, -15.4375, -14.9375, -14.4375, -13.9375, -13.4375, -12.9375, -12.4375, -11.9375, -11.4375, -10.9375, -10.4375, -9.9375, -9.4375, -8.9375, -8.4375, -7.9375, -7.4375, -6.9375, -6.4375, -5.9375, -5.4375, -4.9375, -4.4375, -3.9375, -3.4375, -2.9375, -2.4375, -1.9375, -1.4375, -0.9375, -0.4375, 0.0625, 0.5625, 1.0625, 1.5625, 2.0625, 2.5625, 3.0625, 3.5625, 4.0625, 4.5625, 5.0625, 5.5625, 6.0625, 6.5625, 7.0625, 7.5625, 8.0625, 8.5625, 9.0625, 9.5625, 10.0625, 10.5625, 11.0625, 11.5625, 12.0625, 12.5625, 13.0625, 13.5625, 14.0625, 14.5625, 15.0625]}, "gradients/encoder.encoder.layers.10.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 3.0, 0.0, 3.0, 0.0, 0.0, 1.0, 0.0, 4.0, 4.0, 2.0, 5.0, 2.0, 3.0, 15.0, 7.0, 22.0, 25.0, 51.0, 102.0, 1674.0, 1973.0, 85.0, 39.0, 19.0, 21.0, 7.0, 5.0, 3.0, 3.0, 3.0, 5.0, 2.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.11328125, -4.9306640625, -4.748046875, -4.5654296875, -4.3828125, -4.2001953125, -4.017578125, -3.8349609375, -3.65234375, -3.4697265625, -3.287109375, -3.1044921875, -2.921875, -2.7392578125, -2.556640625, -2.3740234375, -2.19140625, -2.0087890625, -1.826171875, -1.6435546875, -1.4609375, -1.2783203125, -1.095703125, -0.9130859375, -0.73046875, -0.5478515625, -0.365234375, -0.1826171875, 0.0, 0.1826171875, 0.365234375, 0.5478515625, 0.73046875, 0.9130859375, 1.095703125, 1.2783203125, 1.4609375, 1.6435546875, 1.826171875, 2.0087890625, 2.19140625, 2.3740234375, 2.556640625, 2.7392578125, 2.921875, 3.1044921875, 3.287109375, 3.4697265625, 3.65234375, 3.8349609375, 4.017578125, 4.2001953125, 4.3828125, 4.5654296875, 4.748046875, 4.9306640625, 5.11328125, 5.2958984375, 5.478515625, 5.6611328125, 5.84375, 6.0263671875, 6.208984375, 6.3916015625, 6.57421875]}, "gradients/encoder.encoder.layers.10.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 5.0, 6.0, 5.0, 6.0, 25.0, 53.0, 107.0, 206.0, 306.0, 173.0, 77.0, 20.0, 16.0, 6.0, 1.0, 2.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-74.01553344726562, -72.3567123413086, -70.69789123535156, -69.0390625, -67.38024139404297, -65.72142028808594, -64.0625991821289, -62.403778076171875, -60.74495315551758, -59.08613204956055, -57.42730712890625, -55.76848602294922, -54.10966491699219, -52.45083999633789, -50.79201889038086, -49.13319396972656, -47.47437286376953, -45.8155517578125, -44.1567268371582, -42.49790573120117, -40.83908462524414, -39.180259704589844, -37.52143859863281, -35.86261749267578, -34.20379638671875, -32.54497528076172, -30.886152267456055, -29.22732925415039, -27.568506240844727, -25.909683227539062, -24.25086212158203, -22.592039108276367, -20.933216094970703, -19.27439308166504, -17.615571975708008, -15.956748962402344, -14.29792594909668, -12.639103889465332, -10.980281829833984, -9.32145881652832, -7.662636756896973, -6.003814220428467, -4.344991683959961, -2.6861696243286133, -1.0273470878601074, 0.6314754486083984, 2.290297508239746, 3.94912052154541, 5.607942581176758, 7.266765117645264, 8.92558765411377, 10.584409713745117, 12.243232727050781, 13.902054786682129, 15.560876846313477, 17.21969985961914, 18.878520965576172, 20.537343978881836, 22.196165084838867, 23.85498809814453, 25.513811111450195, 27.17263412475586, 28.83145523071289, 30.490278244018555, 32.14910125732422]}, "gradients/encoder.encoder.layers.10.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 2.0, 1.0, 2.0, 5.0, 3.0, 8.0, 7.0, 9.0, 14.0, 18.0, 21.0, 31.0, 32.0, 41.0, 52.0, 42.0, 64.0, 53.0, 36.0, 54.0, 50.0, 59.0, 52.0, 44.0, 52.0, 26.0, 40.0, 39.0, 37.0, 26.0, 25.0, 11.0, 19.0, 9.0, 7.0, 6.0, 8.0, 5.0, 4.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-17.444921493530273, -16.809663772583008, -16.174406051635742, -15.539148330688477, -14.903889656066895, -14.268631935119629, -13.633374214172363, -12.998115539550781, -12.362857818603516, -11.72760009765625, -11.092342376708984, -10.457084655761719, -9.821825981140137, -9.186568260192871, -8.551310539245605, -7.916052341461182, -7.280795097351074, -6.645537376403809, -6.010279178619385, -5.375021457672119, -4.739763259887695, -4.10450553894043, -3.469247817993164, -2.8339896202087402, -2.1987318992614746, -1.5634739398956299, -0.9282160997390747, -0.29295825958251953, 0.3422996997833252, 0.9775576591491699, 1.6128153800964355, 2.2480735778808594, 2.883331298828125, 3.5185892581939697, 4.1538472175598145, 4.78910493850708, 5.424363136291504, 6.0596208572387695, 6.694878578186035, 7.330136775970459, 7.965394496917725, 8.600652694702148, 9.235910415649414, 9.87116813659668, 10.506425857543945, 11.141683578491211, 11.776941299438477, 12.412199974060059, 13.047457695007324, 13.68271541595459, 14.317973136901855, 14.953231811523438, 15.588489532470703, 16.22374725341797, 16.859004974365234, 17.4942626953125, 18.129520416259766, 18.76477813720703, 19.400035858154297, 20.035293579101562, 20.670551300048828, 21.305809020996094, 21.94106674194336, 22.576326370239258, 23.211584091186523]}, "gradients/encoder.encoder.layers.10.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 4.0, 0.0, 1.0, 2.0, 2.0, 4.0, 6.0, 14.0, 10.0, 29.0, 26.0, 35.0, 56.0, 69.0, 95.0, 170.0, 249.0, 409.0, 565.0, 869.0, 1502.0, 2400.0, 4006.0, 7086.0, 12947.0, 24169.0, 49184.0, 107130.0, 232988.0, 298263.0, 159839.0, 71606.0, 34430.0, 17405.0, 9425.0, 5404.0, 3075.0, 1842.0, 1136.0, 707.0, 472.0, 290.0, 200.0, 135.0, 104.0, 69.0, 35.0, 29.0, 21.0, 15.0, 15.0, 8.0, 7.0, 4.0, 3.0, 2.0, 2.0, 0.0, 2.0, 0.0, 0.0, 2.0], "bins": [-5.02734375, -4.866455078125, -4.70556640625, -4.544677734375, -4.3837890625, -4.222900390625, -4.06201171875, -3.901123046875, -3.740234375, -3.579345703125, -3.41845703125, -3.257568359375, -3.0966796875, -2.935791015625, -2.77490234375, -2.614013671875, -2.453125, -2.292236328125, -2.13134765625, -1.970458984375, -1.8095703125, -1.648681640625, -1.48779296875, -1.326904296875, -1.166015625, -1.005126953125, -0.84423828125, -0.683349609375, -0.5224609375, -0.361572265625, -0.20068359375, -0.039794921875, 0.12109375, 0.281982421875, 0.44287109375, 0.603759765625, 0.7646484375, 0.925537109375, 1.08642578125, 1.247314453125, 1.408203125, 1.569091796875, 1.72998046875, 1.890869140625, 2.0517578125, 2.212646484375, 2.37353515625, 2.534423828125, 2.6953125, 2.856201171875, 3.01708984375, 3.177978515625, 3.3388671875, 3.499755859375, 3.66064453125, 3.821533203125, 3.982421875, 4.143310546875, 4.30419921875, 4.465087890625, 4.6259765625, 4.786865234375, 4.94775390625, 5.108642578125, 5.26953125]}, "gradients/encoder.encoder.layers.10.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 3.0, 4.0, 2.0, 5.0, 7.0, 7.0, 6.0, 9.0, 12.0, 10.0, 14.0, 19.0, 16.0, 26.0, 24.0, 22.0, 23.0, 32.0, 31.0, 39.0, 38.0, 44.0, 44.0, 51.0, 41.0, 54.0, 45.0, 36.0, 45.0, 34.0, 35.0, 33.0, 30.0, 29.0, 25.0, 22.0, 14.0, 16.0, 15.0, 12.0, 10.0, 7.0, 5.0, 0.0, 6.0, 2.0, 1.0, 1.0, 2.0, 4.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0], "bins": [-2.291015625, -2.21319580078125, -2.1353759765625, -2.05755615234375, -1.979736328125, -1.90191650390625, -1.8240966796875, -1.74627685546875, -1.66845703125, -1.59063720703125, -1.5128173828125, -1.43499755859375, -1.357177734375, -1.27935791015625, -1.2015380859375, -1.12371826171875, -1.0458984375, -0.96807861328125, -0.8902587890625, -0.81243896484375, -0.734619140625, -0.65679931640625, -0.5789794921875, -0.50115966796875, -0.42333984375, -0.34552001953125, -0.2677001953125, -0.18988037109375, -0.112060546875, -0.03424072265625, 0.0435791015625, 0.12139892578125, 0.19921875, 0.27703857421875, 0.3548583984375, 0.43267822265625, 0.510498046875, 0.58831787109375, 0.6661376953125, 0.74395751953125, 0.82177734375, 0.89959716796875, 0.9774169921875, 1.05523681640625, 1.133056640625, 1.21087646484375, 1.2886962890625, 1.36651611328125, 1.4443359375, 1.52215576171875, 1.5999755859375, 1.67779541015625, 1.755615234375, 1.83343505859375, 1.9112548828125, 1.98907470703125, 2.06689453125, 2.14471435546875, 2.2225341796875, 2.30035400390625, 2.378173828125, 2.45599365234375, 2.5338134765625, 2.61163330078125, 2.689453125]}, "gradients/encoder.encoder.layers.10.attention.v_proj.weight": {"_type": "histogram", "values": [3.0, 0.0, 0.0, 0.0, 2.0, 0.0, 2.0, 0.0, 3.0, 6.0, 7.0, 10.0, 19.0, 21.0, 40.0, 53.0, 106.0, 204.0, 388.0, 879.0, 2148.0, 7459.0, 46552.0, 699920.0, 262439.0, 20997.0, 4538.0, 1462.0, 636.0, 279.0, 177.0, 84.0, 44.0, 34.0, 19.0, 16.0, 7.0, 3.0, 3.0, 3.0, 2.0, 0.0, 1.0, 2.0, 2.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-18.125, -17.36376953125, -16.6025390625, -15.84130859375, -15.080078125, -14.31884765625, -13.5576171875, -12.79638671875, -12.03515625, -11.27392578125, -10.5126953125, -9.75146484375, -8.990234375, -8.22900390625, -7.4677734375, -6.70654296875, -5.9453125, -5.18408203125, -4.4228515625, -3.66162109375, -2.900390625, -2.13916015625, -1.3779296875, -0.61669921875, 0.14453125, 0.90576171875, 1.6669921875, 2.42822265625, 3.189453125, 3.95068359375, 4.7119140625, 5.47314453125, 6.234375, 6.99560546875, 7.7568359375, 8.51806640625, 9.279296875, 10.04052734375, 10.8017578125, 11.56298828125, 12.32421875, 13.08544921875, 13.8466796875, 14.60791015625, 15.369140625, 16.13037109375, 16.8916015625, 17.65283203125, 18.4140625, 19.17529296875, 19.9365234375, 20.69775390625, 21.458984375, 22.22021484375, 22.9814453125, 23.74267578125, 24.50390625, 25.26513671875, 26.0263671875, 26.78759765625, 27.548828125, 28.31005859375, 29.0712890625, 29.83251953125, 30.59375]}, "gradients/encoder.encoder.layers.10.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 3.0, 2.0, 1.0, 0.0, 6.0, 6.0, 2.0, 4.0, 8.0, 5.0, 12.0, 14.0, 20.0, 28.0, 35.0, 32.0, 38.0, 66.0, 65.0, 74.0, 72.0, 69.0, 78.0, 59.0, 53.0, 51.0, 44.0, 33.0, 27.0, 23.0, 22.0, 19.0, 12.0, 11.0, 4.0, 1.0, 5.0, 3.0, 4.0, 0.0, 3.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0], "bins": [-21.6875, -20.850830078125, -20.01416015625, -19.177490234375, -18.3408203125, -17.504150390625, -16.66748046875, -15.830810546875, -14.994140625, -14.157470703125, -13.32080078125, -12.484130859375, -11.6474609375, -10.810791015625, -9.97412109375, -9.137451171875, -8.30078125, -7.464111328125, -6.62744140625, -5.790771484375, -4.9541015625, -4.117431640625, -3.28076171875, -2.444091796875, -1.607421875, -0.770751953125, 0.06591796875, 0.902587890625, 1.7392578125, 2.575927734375, 3.41259765625, 4.249267578125, 5.0859375, 5.922607421875, 6.75927734375, 7.595947265625, 8.4326171875, 9.269287109375, 10.10595703125, 10.942626953125, 11.779296875, 12.615966796875, 13.45263671875, 14.289306640625, 15.1259765625, 15.962646484375, 16.79931640625, 17.635986328125, 18.47265625, 19.309326171875, 20.14599609375, 20.982666015625, 21.8193359375, 22.656005859375, 23.49267578125, 24.329345703125, 25.166015625, 26.002685546875, 26.83935546875, 27.676025390625, 28.5126953125, 29.349365234375, 30.18603515625, 31.022705078125, 31.859375]}, "gradients/encoder.encoder.layers.10.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 0.0, 3.0, 3.0, 10.0, 8.0, 12.0, 8.0, 11.0, 21.0, 32.0, 37.0, 50.0, 72.0, 114.0, 288.0, 805.0, 3226.0, 27854.0, 991921.0, 20026.0, 2764.0, 695.0, 250.0, 100.0, 72.0, 40.0, 27.0, 29.0, 12.0, 11.0, 17.0, 11.0, 12.0, 5.0, 2.0, 7.0, 1.0, 4.0, 0.0, 3.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-30.671875, -29.7548828125, -28.837890625, -27.9208984375, -27.00390625, -26.0869140625, -25.169921875, -24.2529296875, -23.3359375, -22.4189453125, -21.501953125, -20.5849609375, -19.66796875, -18.7509765625, -17.833984375, -16.9169921875, -16.0, -15.0830078125, -14.166015625, -13.2490234375, -12.33203125, -11.4150390625, -10.498046875, -9.5810546875, -8.6640625, -7.7470703125, -6.830078125, -5.9130859375, -4.99609375, -4.0791015625, -3.162109375, -2.2451171875, -1.328125, -0.4111328125, 0.505859375, 1.4228515625, 2.33984375, 3.2568359375, 4.173828125, 5.0908203125, 6.0078125, 6.9248046875, 7.841796875, 8.7587890625, 9.67578125, 10.5927734375, 11.509765625, 12.4267578125, 13.34375, 14.2607421875, 15.177734375, 16.0947265625, 17.01171875, 17.9287109375, 18.845703125, 19.7626953125, 20.6796875, 21.5966796875, 22.513671875, 23.4306640625, 24.34765625, 25.2646484375, 26.181640625, 27.0986328125, 28.015625]}, "gradients/encoder.encoder.layers.10.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 2.0, 2.0, 7.0, 5.0, 20.0, 46.0, 124.0, 255.0, 289.0, 150.0, 58.0, 31.0, 11.0, 8.0, 3.0, 1.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0022640228271484375, -0.0021701157093048096, -0.0020762085914611816, -0.0019823014736175537, -0.0018883943557739258, -0.0017944872379302979, -0.00170058012008667, -0.001606673002243042, -0.001512765884399414, -0.0014188587665557861, -0.0013249516487121582, -0.0012310445308685303, -0.0011371374130249023, -0.0010432302951812744, -0.0009493231773376465, -0.0008554160594940186, -0.0007615089416503906, -0.0006676018238067627, -0.0005736947059631348, -0.00047978758811950684, -0.0003858804702758789, -0.000291973352432251, -0.00019806623458862305, -0.00010415911674499512, -1.0251998901367188e-05, 8.365511894226074e-05, 0.00017756223678588867, 0.0002714693546295166, 0.00036537647247314453, 0.00045928359031677246, 0.0005531907081604004, 0.0006470978260040283, 0.0007410049438476562, 0.0008349120616912842, 0.0009288191795349121, 0.00102272629737854, 0.001116633415222168, 0.001210540533065796, 0.0013044476509094238, 0.0013983547687530518, 0.0014922618865966797, 0.0015861690044403076, 0.0016800761222839355, 0.0017739832401275635, 0.0018678903579711914, 0.0019617974758148193, 0.0020557045936584473, 0.002149611711502075, 0.002243518829345703, 0.002337425947189331, 0.002431333065032959, 0.002525240182876587, 0.002619147300720215, 0.0027130544185638428, 0.0028069615364074707, 0.0029008686542510986, 0.0029947757720947266, 0.0030886828899383545, 0.0031825900077819824, 0.0032764971256256104, 0.0033704042434692383, 0.003464311361312866, 0.003558218479156494, 0.003652125597000122, 0.00374603271484375]}, "gradients/encoder.encoder.layers.10.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 1.0, 2.0, 2.0, 7.0, 8.0, 9.0, 14.0, 18.0, 70.0, 135.0, 237.0, 539.0, 1279.0, 3402.0, 14659.0, 388488.0, 615455.0, 17829.0, 3965.0, 1396.0, 557.0, 258.0, 109.0, 44.0, 27.0, 17.0, 10.0, 5.0, 1.0, 4.0, 3.0, 5.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-14.9609375, -14.4459228515625, -13.930908203125, -13.4158935546875, -12.90087890625, -12.3858642578125, -11.870849609375, -11.3558349609375, -10.8408203125, -10.3258056640625, -9.810791015625, -9.2957763671875, -8.78076171875, -8.2657470703125, -7.750732421875, -7.2357177734375, -6.720703125, -6.2056884765625, -5.690673828125, -5.1756591796875, -4.66064453125, -4.1456298828125, -3.630615234375, -3.1156005859375, -2.6005859375, -2.0855712890625, -1.570556640625, -1.0555419921875, -0.54052734375, -0.0255126953125, 0.489501953125, 1.0045166015625, 1.51953125, 2.0345458984375, 2.549560546875, 3.0645751953125, 3.57958984375, 4.0946044921875, 4.609619140625, 5.1246337890625, 5.6396484375, 6.1546630859375, 6.669677734375, 7.1846923828125, 7.69970703125, 8.2147216796875, 8.729736328125, 9.2447509765625, 9.759765625, 10.2747802734375, 10.789794921875, 11.3048095703125, 11.81982421875, 12.3348388671875, 12.849853515625, 13.3648681640625, 13.8798828125, 14.3948974609375, 14.909912109375, 15.4249267578125, 15.93994140625, 16.4549560546875, 16.969970703125, 17.4849853515625, 18.0]}, "gradients/encoder.encoder.layers.10.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 5.0, 1.0, 5.0, 4.0, 2.0, 7.0, 11.0, 10.0, 24.0, 43.0, 53.0, 99.0, 139.0, 229.0, 155.0, 84.0, 48.0, 29.0, 18.0, 5.0, 11.0, 7.0, 3.0, 1.0, 3.0, 4.0, 1.0, 4.0, 1.0, 2.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0], "bins": [-17.09375, -16.568359375, -16.04296875, -15.517578125, -14.9921875, -14.466796875, -13.94140625, -13.416015625, -12.890625, -12.365234375, -11.83984375, -11.314453125, -10.7890625, -10.263671875, -9.73828125, -9.212890625, -8.6875, -8.162109375, -7.63671875, -7.111328125, -6.5859375, -6.060546875, -5.53515625, -5.009765625, -4.484375, -3.958984375, -3.43359375, -2.908203125, -2.3828125, -1.857421875, -1.33203125, -0.806640625, -0.28125, 0.244140625, 0.76953125, 1.294921875, 1.8203125, 2.345703125, 2.87109375, 3.396484375, 3.921875, 4.447265625, 4.97265625, 5.498046875, 6.0234375, 6.548828125, 7.07421875, 7.599609375, 8.125, 8.650390625, 9.17578125, 9.701171875, 10.2265625, 10.751953125, 11.27734375, 11.802734375, 12.328125, 12.853515625, 13.37890625, 13.904296875, 14.4296875, 14.955078125, 15.48046875, 16.005859375, 16.53125]}, "gradients/encoder.encoder.layers.10.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 2.0, 3.0, 4.0, 11.0, 74.0, 318.0, 442.0, 129.0, 29.0, 5.0, 1.0, 1.0], "bins": [-972.9915161132812, -956.272705078125, -939.553955078125, -922.8351440429688, -906.1163940429688, -889.3975830078125, -872.6788330078125, -855.9600219726562, -839.2412109375, -822.5223999023438, -805.8036499023438, -789.0848388671875, -772.3660888671875, -755.6472778320312, -738.928466796875, -722.209716796875, -705.490966796875, -688.7721557617188, -672.0534057617188, -655.3345947265625, -638.6158447265625, -621.8970336914062, -605.17822265625, -588.45947265625, -571.7406616210938, -555.0218505859375, -538.3031005859375, -521.5842895507812, -504.8655090332031, -488.146728515625, -471.4279479980469, -454.70916748046875, -437.99029541015625, -421.2715148925781, -404.552734375, -387.83392333984375, -371.1151428222656, -354.3963623046875, -337.6775817871094, -320.95880126953125, -304.2400207519531, -287.521240234375, -270.8024597167969, -254.0836639404297, -237.3648681640625, -220.64608764648438, -203.92730712890625, -187.20852661132812, -170.48971557617188, -153.77093505859375, -137.05213928222656, -120.33335876464844, -103.61457061767578, -86.89578247070312, -70.177001953125, -53.458213806152344, -36.73943328857422, -20.020647048950195, -3.301860809326172, 13.416923522949219, 30.135711669921875, 46.85449981689453, 63.573280334472656, 80.29206848144531, 97.01085662841797]}, "gradients/encoder.encoder.layers.10.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 4.0, 1.0, 2.0, 4.0, 6.0, 11.0, 9.0, 11.0, 12.0, 15.0, 16.0, 20.0, 29.0, 44.0, 38.0, 36.0, 56.0, 58.0, 60.0, 63.0, 72.0, 68.0, 52.0, 52.0, 39.0, 39.0, 39.0, 27.0, 27.0, 16.0, 15.0, 17.0, 10.0, 7.0, 11.0, 5.0, 4.0, 6.0, 3.0, 2.0, 4.0, 3.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-112.86322021484375, -109.10414123535156, -105.3450698852539, -101.58599090576172, -97.82691955566406, -94.06784057617188, -90.30876159667969, -86.54969024658203, -82.79061889648438, -79.03153991699219, -75.27246856689453, -71.51338958740234, -67.75431823730469, -63.9952392578125, -60.23616409301758, -56.477088928222656, -52.71800994873047, -48.95893478393555, -45.199859619140625, -41.44078063964844, -37.68170928955078, -33.922630310058594, -30.163555145263672, -26.40447998046875, -22.645404815673828, -18.886329650878906, -15.127253532409668, -11.36817741394043, -7.609102249145508, -3.850027084350586, -0.09095001220703125, 3.6681251525878906, 7.4272003173828125, 11.186275482177734, 14.945351600646973, 18.70442771911621, 22.463502883911133, 26.222578048706055, 29.98165512084961, 33.74073028564453, 37.49980545043945, 41.258880615234375, 45.0179557800293, 48.77703094482422, 52.536109924316406, 56.29518127441406, 60.05426025390625, 63.81333541870117, 67.5724105834961, 71.33148956298828, 75.09056091308594, 78.84963989257812, 82.60871124267578, 86.36779022216797, 90.12686157226562, 93.88594055175781, 97.64501953125, 101.40409851074219, 105.16316986083984, 108.92224884033203, 112.68132019042969, 116.44039916992188, 120.19947814941406, 123.95854949951172, 127.71762084960938]}, "gradients/encoder.encoder.layers.9.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 3.0, 1.0, 2.0, 0.0, 6.0, 6.0, 5.0, 13.0, 16.0, 42.0, 74.0, 159.0, 607.0, 15412.0, 4176166.0, 1294.0, 277.0, 91.0, 52.0, 23.0, 16.0, 10.0, 4.0, 6.0, 4.0, 0.0, 4.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-111.5, -108.654296875, -105.80859375, -102.962890625, -100.1171875, -97.271484375, -94.42578125, -91.580078125, -88.734375, -85.888671875, -83.04296875, -80.197265625, -77.3515625, -74.505859375, -71.66015625, -68.814453125, -65.96875, -63.123046875, -60.27734375, -57.431640625, -54.5859375, -51.740234375, -48.89453125, -46.048828125, -43.203125, -40.357421875, -37.51171875, -34.666015625, -31.8203125, -28.974609375, -26.12890625, -23.283203125, -20.4375, -17.591796875, -14.74609375, -11.900390625, -9.0546875, -6.208984375, -3.36328125, -0.517578125, 2.328125, 5.173828125, 8.01953125, 10.865234375, 13.7109375, 16.556640625, 19.40234375, 22.248046875, 25.09375, 27.939453125, 30.78515625, 33.630859375, 36.4765625, 39.322265625, 42.16796875, 45.013671875, 47.859375, 50.705078125, 53.55078125, 56.396484375, 59.2421875, 62.087890625, 64.93359375, 67.779296875, 70.625]}, "gradients/encoder.encoder.layers.9.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 3.0, 1.0, 3.0, 3.0, 13.0, 8.0, 13.0, 22.0, 31.0, 37.0, 43.0, 60.0, 66.0, 86.0, 83.0, 103.0, 84.0, 92.0, 60.0, 45.0, 41.0, 31.0, 19.0, 16.0, 11.0, 8.0, 7.0, 7.0, 3.0, 1.0, 4.0, 3.0, 1.0, 2.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.78125, -5.6021728515625, -5.423095703125, -5.2440185546875, -5.06494140625, -4.8858642578125, -4.706787109375, -4.5277099609375, -4.3486328125, -4.1695556640625, -3.990478515625, -3.8114013671875, -3.63232421875, -3.4532470703125, -3.274169921875, -3.0950927734375, -2.916015625, -2.7369384765625, -2.557861328125, -2.3787841796875, -2.19970703125, -2.0206298828125, -1.841552734375, -1.6624755859375, -1.4833984375, -1.3043212890625, -1.125244140625, -0.9461669921875, -0.76708984375, -0.5880126953125, -0.408935546875, -0.2298583984375, -0.05078125, 0.1282958984375, 0.307373046875, 0.4864501953125, 0.66552734375, 0.8446044921875, 1.023681640625, 1.2027587890625, 1.3818359375, 1.5609130859375, 1.739990234375, 1.9190673828125, 2.09814453125, 2.2772216796875, 2.456298828125, 2.6353759765625, 2.814453125, 2.9935302734375, 3.172607421875, 3.3516845703125, 3.53076171875, 3.7098388671875, 3.888916015625, 4.0679931640625, 4.2470703125, 4.4261474609375, 4.605224609375, 4.7843017578125, 4.96337890625, 5.1424560546875, 5.321533203125, 5.5006103515625, 5.6796875]}, "gradients/encoder.encoder.layers.9.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 4.0, 4.0, 3.0, 6.0, 4.0, 2.0, 8.0, 12.0, 20.0, 18.0, 28.0, 30.0, 50.0, 54.0, 68.0, 94.0, 113.0, 169.0, 261.0, 354.0, 603.0, 879.0, 1742.0, 3710.0, 10956.0, 339792.0, 3815371.0, 11541.0, 3810.0, 1817.0, 883.0, 537.0, 383.0, 265.0, 180.0, 138.0, 99.0, 51.0, 62.0, 38.0, 43.0, 28.0, 21.0, 9.0, 5.0, 7.0, 4.0, 7.0, 4.0, 1.0, 4.0, 4.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-29.609375, -28.622802734375, -27.63623046875, -26.649658203125, -25.6630859375, -24.676513671875, -23.68994140625, -22.703369140625, -21.716796875, -20.730224609375, -19.74365234375, -18.757080078125, -17.7705078125, -16.783935546875, -15.79736328125, -14.810791015625, -13.82421875, -12.837646484375, -11.85107421875, -10.864501953125, -9.8779296875, -8.891357421875, -7.90478515625, -6.918212890625, -5.931640625, -4.945068359375, -3.95849609375, -2.971923828125, -1.9853515625, -0.998779296875, -0.01220703125, 0.974365234375, 1.9609375, 2.947509765625, 3.93408203125, 4.920654296875, 5.9072265625, 6.893798828125, 7.88037109375, 8.866943359375, 9.853515625, 10.840087890625, 11.82666015625, 12.813232421875, 13.7998046875, 14.786376953125, 15.77294921875, 16.759521484375, 17.74609375, 18.732666015625, 19.71923828125, 20.705810546875, 21.6923828125, 22.678955078125, 23.66552734375, 24.652099609375, 25.638671875, 26.625244140625, 27.61181640625, 28.598388671875, 29.5849609375, 30.571533203125, 31.55810546875, 32.544677734375, 33.53125]}, "gradients/encoder.encoder.layers.9.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 2.0, 2.0, 0.0, 2.0, 4.0, 1.0, 3.0, 8.0, 5.0, 22.0, 32.0, 3711.0, 204.0, 44.0, 17.0, 5.0, 5.0, 8.0, 3.0, 3.0, 2.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-9.0078125, -8.7418212890625, -8.475830078125, -8.2098388671875, -7.94384765625, -7.6778564453125, -7.411865234375, -7.1458740234375, -6.8798828125, -6.6138916015625, -6.347900390625, -6.0819091796875, -5.81591796875, -5.5499267578125, -5.283935546875, -5.0179443359375, -4.751953125, -4.4859619140625, -4.219970703125, -3.9539794921875, -3.68798828125, -3.4219970703125, -3.156005859375, -2.8900146484375, -2.6240234375, -2.3580322265625, -2.092041015625, -1.8260498046875, -1.56005859375, -1.2940673828125, -1.028076171875, -0.7620849609375, -0.49609375, -0.2301025390625, 0.035888671875, 0.3018798828125, 0.56787109375, 0.8338623046875, 1.099853515625, 1.3658447265625, 1.6318359375, 1.8978271484375, 2.163818359375, 2.4298095703125, 2.69580078125, 2.9617919921875, 3.227783203125, 3.4937744140625, 3.759765625, 4.0257568359375, 4.291748046875, 4.5577392578125, 4.82373046875, 5.0897216796875, 5.355712890625, 5.6217041015625, 5.8876953125, 6.1536865234375, 6.419677734375, 6.6856689453125, 6.95166015625, 7.2176513671875, 7.483642578125, 7.7496337890625, 8.015625]}, "gradients/encoder.encoder.layers.9.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 2.0, 2.0, 2.0, 1.0, 1.0, 3.0, 4.0, 4.0, 9.0, 4.0, 15.0, 18.0, 35.0, 40.0, 67.0, 85.0, 112.0, 102.0, 103.0, 82.0, 104.0, 49.0, 49.0, 39.0, 21.0, 15.0, 13.0, 14.0, 6.0, 1.0, 7.0, 3.0, 0.0, 1.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-23.559917449951172, -22.930341720581055, -22.300765991210938, -21.671188354492188, -21.04161262512207, -20.412036895751953, -19.782459259033203, -19.152883529663086, -18.52330780029297, -17.89373207092285, -17.264156341552734, -16.634578704833984, -16.005002975463867, -15.37542724609375, -14.745850563049316, -14.116273880004883, -13.486698150634766, -12.857122421264648, -12.227545738220215, -11.597969055175781, -10.968393325805664, -10.338817596435547, -9.709240913391113, -9.07966423034668, -8.450088500976562, -7.820512294769287, -7.190936088562012, -6.561359882354736, -5.931783676147461, -5.3022074699401855, -4.67263126373291, -4.043055057525635, -3.4134788513183594, -2.783902645111084, -2.1543264389038086, -1.5247502326965332, -0.8951740264892578, -0.2655978202819824, 0.36397838592529297, 0.9935545921325684, 1.6231307983398438, 2.252707004547119, 2.8822832107543945, 3.51185941696167, 4.141435623168945, 4.771011829376221, 5.400588035583496, 6.0301642417907715, 6.659740447998047, 7.289316654205322, 7.918892860412598, 8.548469543457031, 9.178045272827148, 9.807621002197266, 10.4371976852417, 11.066774368286133, 11.69635009765625, 12.325925827026367, 12.9555025100708, 13.585079193115234, 14.214654922485352, 14.844230651855469, 15.473807334899902, 16.103384017944336, 16.732959747314453]}, "gradients/encoder.encoder.layers.9.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 3.0, 0.0, 4.0, 3.0, 7.0, 7.0, 6.0, 14.0, 5.0, 3.0, 7.0, 17.0, 13.0, 19.0, 22.0, 33.0, 24.0, 25.0, 24.0, 33.0, 41.0, 41.0, 40.0, 48.0, 37.0, 38.0, 39.0, 45.0, 48.0, 36.0, 30.0, 31.0, 27.0, 33.0, 28.0, 29.0, 19.0, 25.0, 15.0, 11.0, 13.0, 11.0, 12.0, 10.0, 12.0, 5.0, 8.0, 2.0, 4.0, 4.0, 1.0, 1.0, 3.0, 2.0, 1.0, 0.0, 1.0], "bins": [-11.374119758605957, -11.031984329223633, -10.689848899841309, -10.347713470458984, -10.005577087402344, -9.66344165802002, -9.321306228637695, -8.979170799255371, -8.637035369873047, -8.294899940490723, -7.952764511108398, -7.610628604888916, -7.268493175506592, -6.926357746124268, -6.584221839904785, -6.242086410522461, -5.899950981140137, -5.5578155517578125, -5.215680122375488, -4.873544216156006, -4.531408786773682, -4.189273357391357, -3.847137689590454, -3.505002021789551, -3.1628665924072266, -2.8207311630249023, -2.478595495223999, -2.1364598274230957, -1.7943243980407715, -1.4521888494491577, -1.110053300857544, -0.7679176330566406, -0.4257831573486328, -0.08364760875701904, 0.2584879398345947, 0.6006234884262085, 0.9427590370178223, 1.284894585609436, 1.6270301342010498, 1.9691658020019531, 2.3113012313842773, 2.6534366607666016, 2.995572328567505, 3.337707996368408, 3.6798434257507324, 4.021978855133057, 4.364114761352539, 4.706250190734863, 5.0483856201171875, 5.390521049499512, 5.732656478881836, 6.074792385101318, 6.416927814483643, 6.759063243865967, 7.101199150085449, 7.443334579467773, 7.785470008850098, 8.127605438232422, 8.469740867614746, 8.81187629699707, 9.154012680053711, 9.496148109436035, 9.83828353881836, 10.180418968200684, 10.522554397583008]}, "gradients/encoder.encoder.layers.9.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 5.0, 5.0, 7.0, 3.0, 14.0, 16.0, 14.0, 26.0, 34.0, 47.0, 96.0, 160.0, 251.0, 488.0, 1011.0, 2343.0, 6628.0, 26415.0, 183329.0, 675983.0, 122525.0, 19828.0, 5421.0, 1952.0, 874.0, 463.0, 225.0, 117.0, 87.0, 69.0, 37.0, 27.0, 11.0, 13.0, 11.0, 10.0, 7.0, 2.0, 3.0, 1.0, 6.0, 1.0, 2.0, 0.0, 0.0, 1.0, 1.0], "bins": [-20.484375, -19.9217529296875, -19.359130859375, -18.7965087890625, -18.23388671875, -17.6712646484375, -17.108642578125, -16.5460205078125, -15.9833984375, -15.4207763671875, -14.858154296875, -14.2955322265625, -13.73291015625, -13.1702880859375, -12.607666015625, -12.0450439453125, -11.482421875, -10.9197998046875, -10.357177734375, -9.7945556640625, -9.23193359375, -8.6693115234375, -8.106689453125, -7.5440673828125, -6.9814453125, -6.4188232421875, -5.856201171875, -5.2935791015625, -4.73095703125, -4.1683349609375, -3.605712890625, -3.0430908203125, -2.48046875, -1.9178466796875, -1.355224609375, -0.7926025390625, -0.22998046875, 0.3326416015625, 0.895263671875, 1.4578857421875, 2.0205078125, 2.5831298828125, 3.145751953125, 3.7083740234375, 4.27099609375, 4.8336181640625, 5.396240234375, 5.9588623046875, 6.521484375, 7.0841064453125, 7.646728515625, 8.2093505859375, 8.77197265625, 9.3345947265625, 9.897216796875, 10.4598388671875, 11.0224609375, 11.5850830078125, 12.147705078125, 12.7103271484375, 13.27294921875, 13.8355712890625, 14.398193359375, 14.9608154296875, 15.5234375]}, "gradients/encoder.encoder.layers.9.attention.out_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 5.0, 5.0, 3.0, 10.0, 7.0, 13.0, 15.0, 20.0, 28.0, 28.0, 30.0, 46.0, 45.0, 57.0, 62.0, 67.0, 74.0, 79.0, 64.0, 71.0, 53.0, 48.0, 48.0, 25.0, 16.0, 26.0, 12.0, 11.0, 13.0, 9.0, 4.0, 2.0, 4.0, 2.0, 1.0, 1.0, 3.0, 0.0, 0.0, 0.0, 2.0, 2.0, 1.0, 1.0], "bins": [-5.9296875, -5.7677001953125, -5.605712890625, -5.4437255859375, -5.28173828125, -5.1197509765625, -4.957763671875, -4.7957763671875, -4.6337890625, -4.4718017578125, -4.309814453125, -4.1478271484375, -3.98583984375, -3.8238525390625, -3.661865234375, -3.4998779296875, -3.337890625, -3.1759033203125, -3.013916015625, -2.8519287109375, -2.68994140625, -2.5279541015625, -2.365966796875, -2.2039794921875, -2.0419921875, -1.8800048828125, -1.718017578125, -1.5560302734375, -1.39404296875, -1.2320556640625, -1.070068359375, -0.9080810546875, -0.74609375, -0.5841064453125, -0.422119140625, -0.2601318359375, -0.09814453125, 0.0638427734375, 0.225830078125, 0.3878173828125, 0.5498046875, 0.7117919921875, 0.873779296875, 1.0357666015625, 1.19775390625, 1.3597412109375, 1.521728515625, 1.6837158203125, 1.845703125, 2.0076904296875, 2.169677734375, 2.3316650390625, 2.49365234375, 2.6556396484375, 2.817626953125, 2.9796142578125, 3.1416015625, 3.3035888671875, 3.465576171875, 3.6275634765625, 3.78955078125, 3.9515380859375, 4.113525390625, 4.2755126953125, 4.4375]}, "gradients/encoder.encoder.layers.9.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 2.0, 1.0, 5.0, 2.0, 3.0, 4.0, 10.0, 8.0, 12.0, 15.0, 37.0, 47.0, 63.0, 109.0, 186.0, 368.0, 678.0, 1540.0, 4364.0, 18529.0, 136945.0, 729288.0, 130862.0, 18225.0, 4334.0, 1441.0, 619.0, 356.0, 150.0, 112.0, 78.0, 49.0, 29.0, 31.0, 21.0, 12.0, 14.0, 6.0, 3.0, 2.0, 2.0, 2.0, 1.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-20.875, -20.269775390625, -19.66455078125, -19.059326171875, -18.4541015625, -17.848876953125, -17.24365234375, -16.638427734375, -16.033203125, -15.427978515625, -14.82275390625, -14.217529296875, -13.6123046875, -13.007080078125, -12.40185546875, -11.796630859375, -11.19140625, -10.586181640625, -9.98095703125, -9.375732421875, -8.7705078125, -8.165283203125, -7.56005859375, -6.954833984375, -6.349609375, -5.744384765625, -5.13916015625, -4.533935546875, -3.9287109375, -3.323486328125, -2.71826171875, -2.113037109375, -1.5078125, -0.902587890625, -0.29736328125, 0.307861328125, 0.9130859375, 1.518310546875, 2.12353515625, 2.728759765625, 3.333984375, 3.939208984375, 4.54443359375, 5.149658203125, 5.7548828125, 6.360107421875, 6.96533203125, 7.570556640625, 8.17578125, 8.781005859375, 9.38623046875, 9.991455078125, 10.5966796875, 11.201904296875, 11.80712890625, 12.412353515625, 13.017578125, 13.622802734375, 14.22802734375, 14.833251953125, 15.4384765625, 16.043701171875, 16.64892578125, 17.254150390625, 17.859375]}, "gradients/encoder.encoder.layers.9.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 2.0, 3.0, 1.0, 0.0, 4.0, 3.0, 6.0, 0.0, 2.0, 5.0, 11.0, 13.0, 10.0, 16.0, 15.0, 32.0, 34.0, 32.0, 33.0, 43.0, 53.0, 49.0, 49.0, 50.0, 44.0, 61.0, 52.0, 44.0, 38.0, 42.0, 41.0, 27.0, 26.0, 37.0, 17.0, 24.0, 23.0, 17.0, 8.0, 11.0, 9.0, 6.0, 11.0, 3.0, 1.0, 1.0, 2.0, 1.0, 2.0, 0.0, 1.0, 0.0, 3.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-20.046875, -19.40625, -18.765625, -18.125, -17.484375, -16.84375, -16.203125, -15.5625, -14.921875, -14.28125, -13.640625, -13.0, -12.359375, -11.71875, -11.078125, -10.4375, -9.796875, -9.15625, -8.515625, -7.875, -7.234375, -6.59375, -5.953125, -5.3125, -4.671875, -4.03125, -3.390625, -2.75, -2.109375, -1.46875, -0.828125, -0.1875, 0.453125, 1.09375, 1.734375, 2.375, 3.015625, 3.65625, 4.296875, 4.9375, 5.578125, 6.21875, 6.859375, 7.5, 8.140625, 8.78125, 9.421875, 10.0625, 10.703125, 11.34375, 11.984375, 12.625, 13.265625, 13.90625, 14.546875, 15.1875, 15.828125, 16.46875, 17.109375, 17.75, 18.390625, 19.03125, 19.671875, 20.3125, 20.953125]}, "gradients/encoder.encoder.layers.9.attention.k_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 3.0, 8.0, 9.0, 4.0, 6.0, 16.0, 15.0, 11.0, 20.0, 36.0, 48.0, 106.0, 150.0, 417.0, 1923.0, 38642.0, 986248.0, 18875.0, 1335.0, 306.0, 139.0, 87.0, 45.0, 34.0, 24.0, 16.0, 15.0, 10.0, 3.0, 4.0, 3.0, 3.0, 3.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-22.8125, -21.8779296875, -20.943359375, -20.0087890625, -19.07421875, -18.1396484375, -17.205078125, -16.2705078125, -15.3359375, -14.4013671875, -13.466796875, -12.5322265625, -11.59765625, -10.6630859375, -9.728515625, -8.7939453125, -7.859375, -6.9248046875, -5.990234375, -5.0556640625, -4.12109375, -3.1865234375, -2.251953125, -1.3173828125, -0.3828125, 0.5517578125, 1.486328125, 2.4208984375, 3.35546875, 4.2900390625, 5.224609375, 6.1591796875, 7.09375, 8.0283203125, 8.962890625, 9.8974609375, 10.83203125, 11.7666015625, 12.701171875, 13.6357421875, 14.5703125, 15.5048828125, 16.439453125, 17.3740234375, 18.30859375, 19.2431640625, 20.177734375, 21.1123046875, 22.046875, 22.9814453125, 23.916015625, 24.8505859375, 25.78515625, 26.7197265625, 27.654296875, 28.5888671875, 29.5234375, 30.4580078125, 31.392578125, 32.3271484375, 33.26171875, 34.1962890625, 35.130859375, 36.0654296875, 37.0]}, "gradients/encoder.encoder.layers.9.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 3.0, 2.0, 5.0, 8.0, 9.0, 10.0, 20.0, 28.0, 44.0, 52.0, 78.0, 152.0, 168.0, 130.0, 100.0, 66.0, 33.0, 28.0, 24.0, 15.0, 11.0, 7.0, 6.0, 3.0, 4.0, 3.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0012950897216796875, -0.0012351572513580322, -0.001175224781036377, -0.0011152923107147217, -0.0010553598403930664, -0.0009954273700714111, -0.0009354948997497559, -0.0008755624294281006, -0.0008156299591064453, -0.00075569748878479, -0.0006957650184631348, -0.0006358325481414795, -0.0005759000778198242, -0.0005159676074981689, -0.00045603513717651367, -0.0003961026668548584, -0.0003361701965332031, -0.00027623772621154785, -0.00021630525588989258, -0.0001563727855682373, -9.644031524658203e-05, -3.650784492492676e-05, 2.3424625396728516e-05, 8.335709571838379e-05, 0.00014328956604003906, 0.00020322203636169434, 0.0002631545066833496, 0.0003230869770050049, 0.00038301944732666016, 0.00044295191764831543, 0.0005028843879699707, 0.000562816858291626, 0.0006227493286132812, 0.0006826817989349365, 0.0007426142692565918, 0.0008025467395782471, 0.0008624792098999023, 0.0009224116802215576, 0.0009823441505432129, 0.0010422766208648682, 0.0011022090911865234, 0.0011621415615081787, 0.001222074031829834, 0.0012820065021514893, 0.0013419389724731445, 0.0014018714427947998, 0.001461803913116455, 0.0015217363834381104, 0.0015816688537597656, 0.001641601324081421, 0.0017015337944030762, 0.0017614662647247314, 0.0018213987350463867, 0.001881331205368042, 0.0019412636756896973, 0.0020011961460113525, 0.002061128616333008, 0.002121061086654663, 0.0021809935569763184, 0.0022409260272979736, 0.002300858497619629, 0.002360790967941284, 0.0024207234382629395, 0.0024806559085845947, 0.00254058837890625]}, "gradients/encoder.encoder.layers.9.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 2.0, 0.0, 3.0, 4.0, 5.0, 4.0, 4.0, 8.0, 18.0, 35.0, 41.0, 88.0, 146.0, 325.0, 708.0, 2155.0, 9913.0, 124047.0, 853798.0, 48930.0, 5697.0, 1493.0, 599.0, 250.0, 129.0, 58.0, 37.0, 22.0, 12.0, 15.0, 6.0, 3.0, 3.0, 4.0, 2.0, 1.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-19.140625, -18.6541748046875, -18.167724609375, -17.6812744140625, -17.19482421875, -16.7083740234375, -16.221923828125, -15.7354736328125, -15.2490234375, -14.7625732421875, -14.276123046875, -13.7896728515625, -13.30322265625, -12.8167724609375, -12.330322265625, -11.8438720703125, -11.357421875, -10.8709716796875, -10.384521484375, -9.8980712890625, -9.41162109375, -8.9251708984375, -8.438720703125, -7.9522705078125, -7.4658203125, -6.9793701171875, -6.492919921875, -6.0064697265625, -5.52001953125, -5.0335693359375, -4.547119140625, -4.0606689453125, -3.57421875, -3.0877685546875, -2.601318359375, -2.1148681640625, -1.62841796875, -1.1419677734375, -0.655517578125, -0.1690673828125, 0.3173828125, 0.8038330078125, 1.290283203125, 1.7767333984375, 2.26318359375, 2.7496337890625, 3.236083984375, 3.7225341796875, 4.208984375, 4.6954345703125, 5.181884765625, 5.6683349609375, 6.15478515625, 6.6412353515625, 7.127685546875, 7.6141357421875, 8.1005859375, 8.5870361328125, 9.073486328125, 9.5599365234375, 10.04638671875, 10.5328369140625, 11.019287109375, 11.5057373046875, 11.9921875]}, "gradients/encoder.encoder.layers.9.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 4.0, 2.0, 2.0, 4.0, 2.0, 4.0, 5.0, 3.0, 9.0, 15.0, 19.0, 22.0, 37.0, 55.0, 52.0, 97.0, 158.0, 108.0, 103.0, 80.0, 66.0, 38.0, 40.0, 26.0, 26.0, 9.0, 8.0, 6.0, 3.0, 1.0, 2.0, 1.0, 2.0, 4.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-12.234375, -11.9078369140625, -11.581298828125, -11.2547607421875, -10.92822265625, -10.6016845703125, -10.275146484375, -9.9486083984375, -9.6220703125, -9.2955322265625, -8.968994140625, -8.6424560546875, -8.31591796875, -7.9893798828125, -7.662841796875, -7.3363037109375, -7.009765625, -6.6832275390625, -6.356689453125, -6.0301513671875, -5.70361328125, -5.3770751953125, -5.050537109375, -4.7239990234375, -4.3974609375, -4.0709228515625, -3.744384765625, -3.4178466796875, -3.09130859375, -2.7647705078125, -2.438232421875, -2.1116943359375, -1.78515625, -1.4586181640625, -1.132080078125, -0.8055419921875, -0.47900390625, -0.1524658203125, 0.174072265625, 0.5006103515625, 0.8271484375, 1.1536865234375, 1.480224609375, 1.8067626953125, 2.13330078125, 2.4598388671875, 2.786376953125, 3.1129150390625, 3.439453125, 3.7659912109375, 4.092529296875, 4.4190673828125, 4.74560546875, 5.0721435546875, 5.398681640625, 5.7252197265625, 6.0517578125, 6.3782958984375, 6.704833984375, 7.0313720703125, 7.35791015625, 7.6844482421875, 8.010986328125, 8.3375244140625, 8.6640625]}, "gradients/encoder.encoder.layers.9.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 6.0, 9.0, 30.0, 216.0, 453.0, 239.0, 44.0, 11.0, 1.0, 2.0, 3.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-874.15185546875, -857.1844482421875, -840.217041015625, -823.2496948242188, -806.2822875976562, -789.3148803710938, -772.3475341796875, -755.380126953125, -738.4127197265625, -721.4453125, -704.4779052734375, -687.5105590820312, -670.5431518554688, -653.5757446289062, -636.6083984375, -619.6409912109375, -602.673583984375, -585.7061767578125, -568.73876953125, -551.7714233398438, -534.8040161132812, -517.8366088867188, -500.8692321777344, -483.90185546875, -466.9344482421875, -449.967041015625, -432.9996643066406, -416.03228759765625, -399.06488037109375, -382.09747314453125, -365.1300964355469, -348.1627197265625, -331.1952819824219, -314.2279052734375, -297.260498046875, -280.2930908203125, -263.3257141113281, -246.3583221435547, -229.39093017578125, -212.4235382080078, -195.45614624023438, -178.48875427246094, -161.5213623046875, -144.55397033691406, -127.58657836914062, -110.61918640136719, -93.65179443359375, -76.68440246582031, -59.717010498046875, -42.74961853027344, -25.7822265625, -8.814834594726562, 8.152557373046875, 25.119949340820312, 42.08734130859375, 59.05473327636719, 76.02212524414062, 92.98951721191406, 109.9569091796875, 126.92430114746094, 143.89169311523438, 160.8590850830078, 177.82647705078125, 194.7938690185547, 211.76126098632812]}, "gradients/encoder.encoder.layers.9.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 4.0, 3.0, 0.0, 0.0, 1.0, 3.0, 2.0, 6.0, 7.0, 12.0, 13.0, 13.0, 20.0, 23.0, 24.0, 27.0, 25.0, 39.0, 41.0, 35.0, 42.0, 49.0, 51.0, 47.0, 55.0, 49.0, 50.0, 50.0, 50.0, 43.0, 33.0, 37.0, 35.0, 29.0, 25.0, 11.0, 5.0, 9.0, 11.0, 8.0, 8.0, 7.0, 4.0, 2.0, 2.0, 1.0, 3.0, 0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 1.0], "bins": [-99.05447387695312, -96.16991424560547, -93.28535461425781, -90.40079498291016, -87.5162353515625, -84.63166809082031, -81.74711608886719, -78.862548828125, -75.97798919677734, -73.09342956542969, -70.20886993408203, -67.32431030273438, -64.43975067138672, -61.5551872253418, -58.67062759399414, -55.78606414794922, -52.90150833129883, -50.01694869995117, -47.132389068603516, -44.247825622558594, -41.36326599121094, -38.47870635986328, -35.594146728515625, -32.70958709716797, -29.82502555847168, -26.940465927124023, -24.055904388427734, -21.171344757080078, -18.286785125732422, -15.402223587036133, -12.517663955688477, -9.633102416992188, -6.748542785644531, -3.8639824390411377, -0.9794220924377441, 1.9051380157470703, 4.789698600769043, 7.674259185791016, 10.558818817138672, 13.443380355834961, 16.327939987182617, 19.212499618530273, 22.097061157226562, 24.98162078857422, 27.866180419921875, 30.750741958618164, 33.63529968261719, 36.51986312866211, 39.404422760009766, 42.28898239135742, 45.17354202270508, 48.05810546875, 50.942665100097656, 53.82722473144531, 56.71178436279297, 59.596343994140625, 62.48090362548828, 65.36546325683594, 68.2500228881836, 71.13458251953125, 74.0191421508789, 76.90370178222656, 79.78826904296875, 82.6728286743164, 85.55738830566406]}, "gradients/encoder.encoder.layers.8.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 2.0, 0.0, 1.0, 3.0, 4.0, 1.0, 5.0, 3.0, 8.0, 18.0, 15.0, 25.0, 29.0, 43.0, 70.0, 129.0, 238.0, 466.0, 929.0, 2614.0, 11770.0, 4015564.0, 150716.0, 7939.0, 2048.0, 833.0, 375.0, 183.0, 99.0, 59.0, 32.0, 22.0, 19.0, 10.0, 8.0, 3.0, 1.0, 3.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 2.0], "bins": [-43.59375, -42.34423828125, -41.0947265625, -39.84521484375, -38.595703125, -37.34619140625, -36.0966796875, -34.84716796875, -33.59765625, -32.34814453125, -31.0986328125, -29.84912109375, -28.599609375, -27.35009765625, -26.1005859375, -24.85107421875, -23.6015625, -22.35205078125, -21.1025390625, -19.85302734375, -18.603515625, -17.35400390625, -16.1044921875, -14.85498046875, -13.60546875, -12.35595703125, -11.1064453125, -9.85693359375, -8.607421875, -7.35791015625, -6.1083984375, -4.85888671875, -3.609375, -2.35986328125, -1.1103515625, 0.13916015625, 1.388671875, 2.63818359375, 3.8876953125, 5.13720703125, 6.38671875, 7.63623046875, 8.8857421875, 10.13525390625, 11.384765625, 12.63427734375, 13.8837890625, 15.13330078125, 16.3828125, 17.63232421875, 18.8818359375, 20.13134765625, 21.380859375, 22.63037109375, 23.8798828125, 25.12939453125, 26.37890625, 27.62841796875, 28.8779296875, 30.12744140625, 31.376953125, 32.62646484375, 33.8759765625, 35.12548828125, 36.375]}, "gradients/encoder.encoder.layers.8.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 2.0, 1.0, 2.0, 0.0, 6.0, 11.0, 7.0, 5.0, 16.0, 34.0, 35.0, 44.0, 63.0, 76.0, 81.0, 105.0, 99.0, 109.0, 73.0, 69.0, 55.0, 44.0, 32.0, 10.0, 13.0, 8.0, 1.0, 5.0, 5.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-8.6171875, -8.401611328125, -8.18603515625, -7.970458984375, -7.7548828125, -7.539306640625, -7.32373046875, -7.108154296875, -6.892578125, -6.677001953125, -6.46142578125, -6.245849609375, -6.0302734375, -5.814697265625, -5.59912109375, -5.383544921875, -5.16796875, -4.952392578125, -4.73681640625, -4.521240234375, -4.3056640625, -4.090087890625, -3.87451171875, -3.658935546875, -3.443359375, -3.227783203125, -3.01220703125, -2.796630859375, -2.5810546875, -2.365478515625, -2.14990234375, -1.934326171875, -1.71875, -1.503173828125, -1.28759765625, -1.072021484375, -0.8564453125, -0.640869140625, -0.42529296875, -0.209716796875, 0.005859375, 0.221435546875, 0.43701171875, 0.652587890625, 0.8681640625, 1.083740234375, 1.29931640625, 1.514892578125, 1.73046875, 1.946044921875, 2.16162109375, 2.377197265625, 2.5927734375, 2.808349609375, 3.02392578125, 3.239501953125, 3.455078125, 3.670654296875, 3.88623046875, 4.101806640625, 4.3173828125, 4.532958984375, 4.74853515625, 4.964111328125, 5.1796875]}, "gradients/encoder.encoder.layers.8.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 4.0, 0.0, 0.0, 3.0, 7.0, 9.0, 12.0, 12.0, 19.0, 40.0, 43.0, 68.0, 102.0, 185.0, 320.0, 541.0, 935.0, 1852.0, 4274.0, 10786.0, 37301.0, 3698945.0, 392672.0, 29427.0, 9153.0, 3792.0, 1732.0, 824.0, 487.0, 283.0, 163.0, 91.0, 72.0, 42.0, 25.0, 28.0, 12.0, 10.0, 4.0, 6.0, 4.0, 6.0, 2.0, 3.0, 0.0, 0.0, 0.0, 3.0], "bins": [-34.875, -33.955322265625, -33.03564453125, -32.115966796875, -31.1962890625, -30.276611328125, -29.35693359375, -28.437255859375, -27.517578125, -26.597900390625, -25.67822265625, -24.758544921875, -23.8388671875, -22.919189453125, -21.99951171875, -21.079833984375, -20.16015625, -19.240478515625, -18.32080078125, -17.401123046875, -16.4814453125, -15.561767578125, -14.64208984375, -13.722412109375, -12.802734375, -11.883056640625, -10.96337890625, -10.043701171875, -9.1240234375, -8.204345703125, -7.28466796875, -6.364990234375, -5.4453125, -4.525634765625, -3.60595703125, -2.686279296875, -1.7666015625, -0.846923828125, 0.07275390625, 0.992431640625, 1.912109375, 2.831787109375, 3.75146484375, 4.671142578125, 5.5908203125, 6.510498046875, 7.43017578125, 8.349853515625, 9.26953125, 10.189208984375, 11.10888671875, 12.028564453125, 12.9482421875, 13.867919921875, 14.78759765625, 15.707275390625, 16.626953125, 17.546630859375, 18.46630859375, 19.385986328125, 20.3056640625, 21.225341796875, 22.14501953125, 23.064697265625, 23.984375]}, "gradients/encoder.encoder.layers.8.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 2.0, 3.0, 3.0, 4.0, 4.0, 9.0, 9.0, 4.0, 15.0, 20.0, 28.0, 37.0, 89.0, 220.0, 3214.0, 228.0, 73.0, 38.0, 19.0, 14.0, 12.0, 9.0, 5.0, 6.0, 2.0, 5.0, 3.0, 4.0, 3.0, 0.0, 0.0, 1.0, 4.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-15.6484375, -15.252197265625, -14.85595703125, -14.459716796875, -14.0634765625, -13.667236328125, -13.27099609375, -12.874755859375, -12.478515625, -12.082275390625, -11.68603515625, -11.289794921875, -10.8935546875, -10.497314453125, -10.10107421875, -9.704833984375, -9.30859375, -8.912353515625, -8.51611328125, -8.119873046875, -7.7236328125, -7.327392578125, -6.93115234375, -6.534912109375, -6.138671875, -5.742431640625, -5.34619140625, -4.949951171875, -4.5537109375, -4.157470703125, -3.76123046875, -3.364990234375, -2.96875, -2.572509765625, -2.17626953125, -1.780029296875, -1.3837890625, -0.987548828125, -0.59130859375, -0.195068359375, 0.201171875, 0.597412109375, 0.99365234375, 1.389892578125, 1.7861328125, 2.182373046875, 2.57861328125, 2.974853515625, 3.37109375, 3.767333984375, 4.16357421875, 4.559814453125, 4.9560546875, 5.352294921875, 5.74853515625, 6.144775390625, 6.541015625, 6.937255859375, 7.33349609375, 7.729736328125, 8.1259765625, 8.522216796875, 8.91845703125, 9.314697265625, 9.7109375]}, "gradients/encoder.encoder.layers.8.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 3.0, 1.0, 2.0, 1.0, 7.0, 7.0, 9.0, 12.0, 14.0, 23.0, 47.0, 66.0, 108.0, 106.0, 136.0, 105.0, 87.0, 84.0, 63.0, 38.0, 19.0, 19.0, 16.0, 7.0, 12.0, 8.0, 5.0, 2.0, 5.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-52.619911193847656, -51.10145568847656, -49.58300018310547, -48.064544677734375, -46.54608917236328, -45.02763366699219, -43.509178161621094, -41.99072265625, -40.472267150878906, -38.95381164550781, -37.43535614013672, -35.916900634765625, -34.39844512939453, -32.87998962402344, -31.36153221130371, -29.843076705932617, -28.32461929321289, -26.806163787841797, -25.287708282470703, -23.76925277709961, -22.250797271728516, -20.732341766357422, -19.213884353637695, -17.6954288482666, -16.176973342895508, -14.658517837524414, -13.14006233215332, -11.62160587310791, -10.103150367736816, -8.584694862365723, -7.0662384033203125, -5.547782897949219, -4.029331207275391, -2.5108754634857178, -0.9924197196960449, 0.526036262512207, 2.044491767883301, 3.5629472732543945, 5.081403732299805, 6.599859237670898, 8.118314743041992, 9.636770248413086, 11.15522575378418, 12.67368221282959, 14.192137718200684, 15.710593223571777, 17.229049682617188, 18.74750518798828, 20.265960693359375, 21.78441619873047, 23.302871704101562, 24.821327209472656, 26.33978271484375, 27.858238220214844, 29.37669563293457, 30.895151138305664, 32.413604736328125, 33.93206024169922, 35.45051574707031, 36.968971252441406, 38.4874267578125, 40.005882263183594, 41.52433776855469, 43.04279327392578, 44.56125259399414]}, "gradients/encoder.encoder.layers.8.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 1.0, 2.0, 3.0, 2.0, 3.0, 5.0, 3.0, 9.0, 5.0, 11.0, 7.0, 14.0, 8.0, 29.0, 28.0, 20.0, 26.0, 26.0, 23.0, 24.0, 41.0, 26.0, 45.0, 39.0, 39.0, 48.0, 44.0, 47.0, 50.0, 49.0, 40.0, 40.0, 37.0, 34.0, 22.0, 21.0, 26.0, 14.0, 15.0, 26.0, 8.0, 13.0, 12.0, 4.0, 10.0, 6.0, 6.0, 0.0, 2.0, 2.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0], "bins": [-26.727922439575195, -25.821088790893555, -24.914257049560547, -24.007423400878906, -23.100589752197266, -22.193756103515625, -21.286924362182617, -20.380090713500977, -19.47325897216797, -18.566425323486328, -17.65959358215332, -16.75275993347168, -15.845926284790039, -14.939093589782715, -14.03226089477539, -13.12542724609375, -12.21859359741211, -11.311760902404785, -10.404927253723145, -9.49809455871582, -8.59126091003418, -7.6844282150268555, -6.777595520019531, -5.870762348175049, -4.963929176330566, -4.057096004486084, -3.1502630710601807, -2.2434301376342773, -1.336596965789795, -0.4297637939453125, 0.4770689010620117, 1.3839020729064941, 2.2907333374023438, 3.197566509246826, 4.104399681091309, 5.011232376098633, 5.918065547943115, 6.824898719787598, 7.731731414794922, 8.638565063476562, 9.545397758483887, 10.452230453491211, 11.359064102172852, 12.265896797180176, 13.1727294921875, 14.07956314086914, 14.986395835876465, 15.893228530883789, 16.80006217956543, 17.70689582824707, 18.613727569580078, 19.52056121826172, 20.42739486694336, 21.334228515625, 22.241060256958008, 23.14789390563965, 24.054725646972656, 24.961559295654297, 25.868391036987305, 26.775224685668945, 27.682058334350586, 28.588890075683594, 29.495723724365234, 30.402557373046875, 31.309391021728516]}, "gradients/encoder.encoder.layers.8.attention.out_proj.weight": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 0.0, 1.0, 7.0, 3.0, 6.0, 7.0, 10.0, 28.0, 34.0, 63.0, 94.0, 171.0, 344.0, 759.0, 1933.0, 5661.0, 20512.0, 93076.0, 497123.0, 344902.0, 62294.0, 14539.0, 4223.0, 1533.0, 622.0, 287.0, 133.0, 76.0, 56.0, 25.0, 9.0, 8.0, 10.0, 7.0, 4.0, 2.0, 2.0, 1.0, 1.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-12.40625, -11.839111328125, -11.27197265625, -10.704833984375, -10.1376953125, -9.570556640625, -9.00341796875, -8.436279296875, -7.869140625, -7.302001953125, -6.73486328125, -6.167724609375, -5.6005859375, -5.033447265625, -4.46630859375, -3.899169921875, -3.33203125, -2.764892578125, -2.19775390625, -1.630615234375, -1.0634765625, -0.496337890625, 0.07080078125, 0.637939453125, 1.205078125, 1.772216796875, 2.33935546875, 2.906494140625, 3.4736328125, 4.040771484375, 4.60791015625, 5.175048828125, 5.7421875, 6.309326171875, 6.87646484375, 7.443603515625, 8.0107421875, 8.577880859375, 9.14501953125, 9.712158203125, 10.279296875, 10.846435546875, 11.41357421875, 11.980712890625, 12.5478515625, 13.114990234375, 13.68212890625, 14.249267578125, 14.81640625, 15.383544921875, 15.95068359375, 16.517822265625, 17.0849609375, 17.652099609375, 18.21923828125, 18.786376953125, 19.353515625, 19.920654296875, 20.48779296875, 21.054931640625, 21.6220703125, 22.189208984375, 22.75634765625, 23.323486328125, 23.890625]}, "gradients/encoder.encoder.layers.8.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 3.0, 0.0, 5.0, 1.0, 0.0, 8.0, 11.0, 12.0, 14.0, 20.0, 31.0, 31.0, 34.0, 53.0, 40.0, 61.0, 73.0, 74.0, 80.0, 87.0, 76.0, 63.0, 50.0, 47.0, 36.0, 29.0, 21.0, 17.0, 7.0, 9.0, 5.0, 5.0, 5.0, 2.0, 5.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-7.421875, -7.21685791015625, -7.0118408203125, -6.80682373046875, -6.601806640625, -6.39678955078125, -6.1917724609375, -5.98675537109375, -5.78173828125, -5.57672119140625, -5.3717041015625, -5.16668701171875, -4.961669921875, -4.75665283203125, -4.5516357421875, -4.34661865234375, -4.1416015625, -3.93658447265625, -3.7315673828125, -3.52655029296875, -3.321533203125, -3.11651611328125, -2.9114990234375, -2.70648193359375, -2.50146484375, -2.29644775390625, -2.0914306640625, -1.88641357421875, -1.681396484375, -1.47637939453125, -1.2713623046875, -1.06634521484375, -0.861328125, -0.65631103515625, -0.4512939453125, -0.24627685546875, -0.041259765625, 0.16375732421875, 0.3687744140625, 0.57379150390625, 0.77880859375, 0.98382568359375, 1.1888427734375, 1.39385986328125, 1.598876953125, 1.80389404296875, 2.0089111328125, 2.21392822265625, 2.4189453125, 2.62396240234375, 2.8289794921875, 3.03399658203125, 3.239013671875, 3.44403076171875, 3.6490478515625, 3.85406494140625, 4.05908203125, 4.26409912109375, 4.4691162109375, 4.67413330078125, 4.879150390625, 5.08416748046875, 5.2891845703125, 5.49420166015625, 5.69921875]}, "gradients/encoder.encoder.layers.8.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 3.0, 3.0, 4.0, 2.0, 6.0, 7.0, 9.0, 15.0, 18.0, 25.0, 24.0, 30.0, 39.0, 65.0, 78.0, 123.0, 199.0, 319.0, 504.0, 1029.0, 2354.0, 6711.0, 25343.0, 133265.0, 615474.0, 211270.0, 36613.0, 9106.0, 3080.0, 1201.0, 594.0, 361.0, 210.0, 143.0, 100.0, 78.0, 41.0, 28.0, 26.0, 15.0, 19.0, 10.0, 8.0, 6.0, 0.0, 6.0, 1.0, 3.0, 2.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-14.6796875, -14.1282958984375, -13.576904296875, -13.0255126953125, -12.47412109375, -11.9227294921875, -11.371337890625, -10.8199462890625, -10.2685546875, -9.7171630859375, -9.165771484375, -8.6143798828125, -8.06298828125, -7.5115966796875, -6.960205078125, -6.4088134765625, -5.857421875, -5.3060302734375, -4.754638671875, -4.2032470703125, -3.65185546875, -3.1004638671875, -2.549072265625, -1.9976806640625, -1.4462890625, -0.8948974609375, -0.343505859375, 0.2078857421875, 0.75927734375, 1.3106689453125, 1.862060546875, 2.4134521484375, 2.96484375, 3.5162353515625, 4.067626953125, 4.6190185546875, 5.17041015625, 5.7218017578125, 6.273193359375, 6.8245849609375, 7.3759765625, 7.9273681640625, 8.478759765625, 9.0301513671875, 9.58154296875, 10.1329345703125, 10.684326171875, 11.2357177734375, 11.787109375, 12.3385009765625, 12.889892578125, 13.4412841796875, 13.99267578125, 14.5440673828125, 15.095458984375, 15.6468505859375, 16.1982421875, 16.7496337890625, 17.301025390625, 17.8524169921875, 18.40380859375, 18.9552001953125, 19.506591796875, 20.0579833984375, 20.609375]}, "gradients/encoder.encoder.layers.8.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 3.0, 1.0, 0.0, 0.0, 1.0, 1.0, 4.0, 2.0, 5.0, 12.0, 4.0, 9.0, 6.0, 10.0, 22.0, 16.0, 23.0, 18.0, 26.0, 31.0, 32.0, 26.0, 44.0, 51.0, 42.0, 40.0, 44.0, 46.0, 43.0, 45.0, 45.0, 39.0, 57.0, 36.0, 31.0, 32.0, 36.0, 21.0, 24.0, 13.0, 16.0, 7.0, 13.0, 11.0, 3.0, 9.0, 8.0, 2.0, 2.0, 2.0, 2.0, 0.0, 2.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-20.09375, -19.442626953125, -18.79150390625, -18.140380859375, -17.4892578125, -16.838134765625, -16.18701171875, -15.535888671875, -14.884765625, -14.233642578125, -13.58251953125, -12.931396484375, -12.2802734375, -11.629150390625, -10.97802734375, -10.326904296875, -9.67578125, -9.024658203125, -8.37353515625, -7.722412109375, -7.0712890625, -6.420166015625, -5.76904296875, -5.117919921875, -4.466796875, -3.815673828125, -3.16455078125, -2.513427734375, -1.8623046875, -1.211181640625, -0.56005859375, 0.091064453125, 0.7421875, 1.393310546875, 2.04443359375, 2.695556640625, 3.3466796875, 3.997802734375, 4.64892578125, 5.300048828125, 5.951171875, 6.602294921875, 7.25341796875, 7.904541015625, 8.5556640625, 9.206787109375, 9.85791015625, 10.509033203125, 11.16015625, 11.811279296875, 12.46240234375, 13.113525390625, 13.7646484375, 14.415771484375, 15.06689453125, 15.718017578125, 16.369140625, 17.020263671875, 17.67138671875, 18.322509765625, 18.9736328125, 19.624755859375, 20.27587890625, 20.927001953125, 21.578125]}, "gradients/encoder.encoder.layers.8.attention.k_proj.weight": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 2.0, 2.0, 2.0, 1.0, 5.0, 9.0, 8.0, 18.0, 30.0, 39.0, 57.0, 139.0, 255.0, 625.0, 1905.0, 10040.0, 186240.0, 805012.0, 38080.0, 4178.0, 1100.0, 411.0, 176.0, 97.0, 46.0, 29.0, 23.0, 11.0, 7.0, 7.0, 1.0, 2.0, 3.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-8.4453125, -8.15673828125, -7.8681640625, -7.57958984375, -7.291015625, -7.00244140625, -6.7138671875, -6.42529296875, -6.13671875, -5.84814453125, -5.5595703125, -5.27099609375, -4.982421875, -4.69384765625, -4.4052734375, -4.11669921875, -3.828125, -3.53955078125, -3.2509765625, -2.96240234375, -2.673828125, -2.38525390625, -2.0966796875, -1.80810546875, -1.51953125, -1.23095703125, -0.9423828125, -0.65380859375, -0.365234375, -0.07666015625, 0.2119140625, 0.50048828125, 0.7890625, 1.07763671875, 1.3662109375, 1.65478515625, 1.943359375, 2.23193359375, 2.5205078125, 2.80908203125, 3.09765625, 3.38623046875, 3.6748046875, 3.96337890625, 4.251953125, 4.54052734375, 4.8291015625, 5.11767578125, 5.40625, 5.69482421875, 5.9833984375, 6.27197265625, 6.560546875, 6.84912109375, 7.1376953125, 7.42626953125, 7.71484375, 8.00341796875, 8.2919921875, 8.58056640625, 8.869140625, 9.15771484375, 9.4462890625, 9.73486328125, 10.0234375]}, "gradients/encoder.encoder.layers.8.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 2.0, 3.0, 5.0, 8.0, 9.0, 7.0, 11.0, 22.0, 35.0, 38.0, 61.0, 90.0, 112.0, 133.0, 125.0, 114.0, 70.0, 54.0, 34.0, 18.0, 19.0, 14.0, 12.0, 6.0, 4.0, 2.0, 2.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00115966796875, -0.0011193007230758667, -0.0010789334774017334, -0.0010385662317276, -0.0009981989860534668, -0.0009578317403793335, -0.0009174644947052002, -0.0008770972490310669, -0.0008367300033569336, -0.0007963627576828003, -0.000755995512008667, -0.0007156282663345337, -0.0006752610206604004, -0.0006348937749862671, -0.0005945265293121338, -0.0005541592836380005, -0.0005137920379638672, -0.0004734247922897339, -0.0004330575466156006, -0.0003926903009414673, -0.000352323055267334, -0.0003119558095932007, -0.0002715885639190674, -0.00023122131824493408, -0.00019085407257080078, -0.00015048682689666748, -0.00011011958122253418, -6.975233554840088e-05, -2.9385089874267578e-05, 1.0982155799865723e-05, 5.1349401473999023e-05, 9.171664714813232e-05, 0.00013208389282226562, 0.00017245113849639893, 0.00021281838417053223, 0.00025318562984466553, 0.00029355287551879883, 0.00033392012119293213, 0.00037428736686706543, 0.00041465461254119873, 0.00045502185821533203, 0.0004953891038894653, 0.0005357563495635986, 0.0005761235952377319, 0.0006164908409118652, 0.0006568580865859985, 0.0006972253322601318, 0.0007375925779342651, 0.0007779598236083984, 0.0008183270692825317, 0.000858694314956665, 0.0008990615606307983, 0.0009394288063049316, 0.000979796051979065, 0.0010201632976531982, 0.0010605305433273315, 0.0011008977890014648, 0.0011412650346755981, 0.0011816322803497314, 0.0012219995260238647, 0.001262366771697998, 0.0013027340173721313, 0.0013431012630462646, 0.001383468508720398, 0.0014238357543945312]}, "gradients/encoder.encoder.layers.8.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 5.0, 4.0, 3.0, 1.0, 9.0, 20.0, 51.0, 68.0, 127.0, 322.0, 846.0, 3593.0, 37488.0, 857882.0, 138915.0, 7000.0, 1358.0, 469.0, 212.0, 90.0, 38.0, 25.0, 13.0, 5.0, 10.0, 3.0, 2.0, 3.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-13.6171875, -13.282470703125, -12.94775390625, -12.613037109375, -12.2783203125, -11.943603515625, -11.60888671875, -11.274169921875, -10.939453125, -10.604736328125, -10.27001953125, -9.935302734375, -9.6005859375, -9.265869140625, -8.93115234375, -8.596435546875, -8.26171875, -7.927001953125, -7.59228515625, -7.257568359375, -6.9228515625, -6.588134765625, -6.25341796875, -5.918701171875, -5.583984375, -5.249267578125, -4.91455078125, -4.579833984375, -4.2451171875, -3.910400390625, -3.57568359375, -3.240966796875, -2.90625, -2.571533203125, -2.23681640625, -1.902099609375, -1.5673828125, -1.232666015625, -0.89794921875, -0.563232421875, -0.228515625, 0.106201171875, 0.44091796875, 0.775634765625, 1.1103515625, 1.445068359375, 1.77978515625, 2.114501953125, 2.44921875, 2.783935546875, 3.11865234375, 3.453369140625, 3.7880859375, 4.122802734375, 4.45751953125, 4.792236328125, 5.126953125, 5.461669921875, 5.79638671875, 6.131103515625, 6.4658203125, 6.800537109375, 7.13525390625, 7.469970703125, 7.8046875]}, "gradients/encoder.encoder.layers.8.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 1.0, 2.0, 2.0, 2.0, 1.0, 8.0, 11.0, 8.0, 12.0, 13.0, 19.0, 31.0, 46.0, 58.0, 69.0, 119.0, 96.0, 92.0, 91.0, 84.0, 57.0, 50.0, 42.0, 30.0, 19.0, 23.0, 10.0, 4.0, 7.0, 2.0, 2.0, 1.0, 2.0, 0.0, 2.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-8.2734375, -8.038330078125, -7.80322265625, -7.568115234375, -7.3330078125, -7.097900390625, -6.86279296875, -6.627685546875, -6.392578125, -6.157470703125, -5.92236328125, -5.687255859375, -5.4521484375, -5.217041015625, -4.98193359375, -4.746826171875, -4.51171875, -4.276611328125, -4.04150390625, -3.806396484375, -3.5712890625, -3.336181640625, -3.10107421875, -2.865966796875, -2.630859375, -2.395751953125, -2.16064453125, -1.925537109375, -1.6904296875, -1.455322265625, -1.22021484375, -0.985107421875, -0.75, -0.514892578125, -0.27978515625, -0.044677734375, 0.1904296875, 0.425537109375, 0.66064453125, 0.895751953125, 1.130859375, 1.365966796875, 1.60107421875, 1.836181640625, 2.0712890625, 2.306396484375, 2.54150390625, 2.776611328125, 3.01171875, 3.246826171875, 3.48193359375, 3.717041015625, 3.9521484375, 4.187255859375, 4.42236328125, 4.657470703125, 4.892578125, 5.127685546875, 5.36279296875, 5.597900390625, 5.8330078125, 6.068115234375, 6.30322265625, 6.538330078125, 6.7734375]}, "gradients/encoder.encoder.layers.8.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 4.0, 4.0, 9.0, 20.0, 54.0, 106.0, 163.0, 226.0, 186.0, 117.0, 65.0, 26.0, 14.0, 9.0, 2.0, 1.0, 3.0, 3.0, 1.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-304.8187255859375, -297.9580078125, -291.0973205566406, -284.2366027832031, -277.3758850097656, -270.51519775390625, -263.65447998046875, -256.79376220703125, -249.93307495117188, -243.07237243652344, -236.21165466308594, -229.3509521484375, -222.49024963378906, -215.62954711914062, -208.76882934570312, -201.9081268310547, -195.0474090576172, -188.18670654296875, -181.32598876953125, -174.4652862548828, -167.60458374023438, -160.74386596679688, -153.88316345214844, -147.0224609375, -140.1617431640625, -133.30104064941406, -126.4403305053711, -119.57962036132812, -112.71891784667969, -105.85820770263672, -98.99749755859375, -92.13679504394531, -85.27609252929688, -78.4153823852539, -71.55467987060547, -64.6939697265625, -57.8332633972168, -50.972557067871094, -44.111846923828125, -37.25114059448242, -30.39043426513672, -23.529727935791016, -16.66901969909668, -9.808311462402344, -2.9476051330566406, 3.9131011962890625, 10.773811340332031, 17.634517669677734, 24.495223999023438, 31.35593032836914, 38.216636657714844, 45.07734680175781, 51.938053131103516, 58.79875946044922, 65.65946960449219, 72.52017211914062, 79.3808822631836, 86.24159240722656, 93.102294921875, 99.96300506591797, 106.82371520996094, 113.68441772460938, 120.54512786865234, 127.40583801269531, 134.26654052734375]}, "gradients/encoder.encoder.layers.8.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 2.0, 3.0, 1.0, 7.0, 5.0, 5.0, 9.0, 13.0, 17.0, 16.0, 17.0, 24.0, 23.0, 33.0, 31.0, 43.0, 45.0, 49.0, 40.0, 54.0, 55.0, 70.0, 59.0, 42.0, 46.0, 29.0, 45.0, 37.0, 29.0, 34.0, 21.0, 20.0, 16.0, 17.0, 12.0, 10.0, 7.0, 9.0, 6.0, 2.0, 4.0, 4.0, 1.0, 1.0, 0.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-106.0716552734375, -102.79878234863281, -99.52590942382812, -96.2530288696289, -92.98015594482422, -89.70728302001953, -86.43440246582031, -83.16152954101562, -79.88865661621094, -76.61578369140625, -73.34291076660156, -70.07003021240234, -66.79715728759766, -63.52428436279297, -60.251407623291016, -56.97853088378906, -53.705657958984375, -50.43278503417969, -47.159908294677734, -43.88703155517578, -40.614158630371094, -37.341285705566406, -34.06840896606445, -30.795534133911133, -27.522659301757812, -24.249784469604492, -20.976909637451172, -17.70403480529785, -14.431159973144531, -11.158285140991211, -7.885410308837891, -4.61253547668457, -1.33966064453125, 1.9332141876220703, 5.206089019775391, 8.478963851928711, 11.751838684082031, 15.024713516235352, 18.297588348388672, 21.570463180541992, 24.843338012695312, 28.116212844848633, 31.389087677001953, 34.661964416503906, 37.934837341308594, 41.20771026611328, 44.480587005615234, 47.75346374511719, 51.026336669921875, 54.29920959472656, 57.572086334228516, 60.84496307373047, 64.11783599853516, 67.39070892333984, 70.66358947753906, 73.93646240234375, 77.20933532714844, 80.48220825195312, 83.75508117675781, 87.02796173095703, 90.30083465576172, 93.5737075805664, 96.84658813476562, 100.11946105957031, 103.392333984375]}, "gradients/encoder.encoder.layers.7.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 2.0, 5.0, 2.0, 3.0, 3.0, 6.0, 5.0, 11.0, 12.0, 25.0, 32.0, 51.0, 73.0, 92.0, 167.0, 314.0, 525.0, 1057.0, 2560.0, 6244.0, 18467.0, 84768.0, 2935680.0, 1060714.0, 59462.0, 15024.0, 5173.0, 2007.0, 918.0, 400.0, 225.0, 119.0, 65.0, 29.0, 24.0, 13.0, 4.0, 3.0, 3.0, 2.0, 4.0, 1.0, 0.0, 1.0, 1.0], "bins": [-17.15625, -16.7467041015625, -16.337158203125, -15.9276123046875, -15.51806640625, -15.1085205078125, -14.698974609375, -14.2894287109375, -13.8798828125, -13.4703369140625, -13.060791015625, -12.6512451171875, -12.24169921875, -11.8321533203125, -11.422607421875, -11.0130615234375, -10.603515625, -10.1939697265625, -9.784423828125, -9.3748779296875, -8.96533203125, -8.5557861328125, -8.146240234375, -7.7366943359375, -7.3271484375, -6.9176025390625, -6.508056640625, -6.0985107421875, -5.68896484375, -5.2794189453125, -4.869873046875, -4.4603271484375, -4.05078125, -3.6412353515625, -3.231689453125, -2.8221435546875, -2.41259765625, -2.0030517578125, -1.593505859375, -1.1839599609375, -0.7744140625, -0.3648681640625, 0.044677734375, 0.4542236328125, 0.86376953125, 1.2733154296875, 1.682861328125, 2.0924072265625, 2.501953125, 2.9114990234375, 3.321044921875, 3.7305908203125, 4.14013671875, 4.5496826171875, 4.959228515625, 5.3687744140625, 5.7783203125, 6.1878662109375, 6.597412109375, 7.0069580078125, 7.41650390625, 7.8260498046875, 8.235595703125, 8.6451416015625, 9.0546875]}, "gradients/encoder.encoder.layers.7.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 2.0, 2.0, 9.0, 12.0, 12.0, 18.0, 16.0, 39.0, 42.0, 65.0, 73.0, 62.0, 80.0, 95.0, 100.0, 95.0, 79.0, 46.0, 48.0, 41.0, 32.0, 11.0, 17.0, 3.0, 7.0, 5.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-9.53125, -9.30157470703125, -9.0718994140625, -8.84222412109375, -8.612548828125, -8.38287353515625, -8.1531982421875, -7.92352294921875, -7.69384765625, -7.46417236328125, -7.2344970703125, -7.00482177734375, -6.775146484375, -6.54547119140625, -6.3157958984375, -6.08612060546875, -5.8564453125, -5.62677001953125, -5.3970947265625, -5.16741943359375, -4.937744140625, -4.70806884765625, -4.4783935546875, -4.24871826171875, -4.01904296875, -3.78936767578125, -3.5596923828125, -3.33001708984375, -3.100341796875, -2.87066650390625, -2.6409912109375, -2.41131591796875, -2.181640625, -1.95196533203125, -1.7222900390625, -1.49261474609375, -1.262939453125, -1.03326416015625, -0.8035888671875, -0.57391357421875, -0.34423828125, -0.11456298828125, 0.1151123046875, 0.34478759765625, 0.574462890625, 0.80413818359375, 1.0338134765625, 1.26348876953125, 1.4931640625, 1.72283935546875, 1.9525146484375, 2.18218994140625, 2.411865234375, 2.64154052734375, 2.8712158203125, 3.10089111328125, 3.33056640625, 3.56024169921875, 3.7899169921875, 4.01959228515625, 4.249267578125, 4.47894287109375, 4.7086181640625, 4.93829345703125, 5.16796875]}, "gradients/encoder.encoder.layers.7.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 3.0, 4.0, 2.0, 5.0, 7.0, 11.0, 7.0, 15.0, 14.0, 33.0, 38.0, 51.0, 120.0, 158.0, 202.0, 377.0, 721.0, 1308.0, 2497.0, 5076.0, 11579.0, 29422.0, 97706.0, 800214.0, 2979866.0, 186964.0, 46554.0, 16834.0, 7218.0, 3363.0, 1757.0, 870.0, 491.0, 289.0, 188.0, 107.0, 76.0, 37.0, 29.0, 18.0, 17.0, 5.0, 5.0, 15.0, 4.0, 1.0, 7.0, 1.0, 3.0, 2.0, 1.0, 1.0], "bins": [-12.84375, -12.489013671875, -12.13427734375, -11.779541015625, -11.4248046875, -11.070068359375, -10.71533203125, -10.360595703125, -10.005859375, -9.651123046875, -9.29638671875, -8.941650390625, -8.5869140625, -8.232177734375, -7.87744140625, -7.522705078125, -7.16796875, -6.813232421875, -6.45849609375, -6.103759765625, -5.7490234375, -5.394287109375, -5.03955078125, -4.684814453125, -4.330078125, -3.975341796875, -3.62060546875, -3.265869140625, -2.9111328125, -2.556396484375, -2.20166015625, -1.846923828125, -1.4921875, -1.137451171875, -0.78271484375, -0.427978515625, -0.0732421875, 0.281494140625, 0.63623046875, 0.990966796875, 1.345703125, 1.700439453125, 2.05517578125, 2.409912109375, 2.7646484375, 3.119384765625, 3.47412109375, 3.828857421875, 4.18359375, 4.538330078125, 4.89306640625, 5.247802734375, 5.6025390625, 5.957275390625, 6.31201171875, 6.666748046875, 7.021484375, 7.376220703125, 7.73095703125, 8.085693359375, 8.4404296875, 8.795166015625, 9.14990234375, 9.504638671875, 9.859375]}, "gradients/encoder.encoder.layers.7.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 2.0, 4.0, 2.0, 5.0, 8.0, 3.0, 7.0, 4.0, 16.0, 19.0, 17.0, 25.0, 44.0, 45.0, 77.0, 94.0, 180.0, 321.0, 781.0, 1135.0, 561.0, 247.0, 148.0, 89.0, 62.0, 39.0, 36.0, 25.0, 22.0, 22.0, 7.0, 11.0, 6.0, 7.0, 4.0, 4.0, 0.0, 3.0, 0.0, 1.0, 1.0, 0.0, 3.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-17.125, -16.639404296875, -16.15380859375, -15.668212890625, -15.1826171875, -14.697021484375, -14.21142578125, -13.725830078125, -13.240234375, -12.754638671875, -12.26904296875, -11.783447265625, -11.2978515625, -10.812255859375, -10.32666015625, -9.841064453125, -9.35546875, -8.869873046875, -8.38427734375, -7.898681640625, -7.4130859375, -6.927490234375, -6.44189453125, -5.956298828125, -5.470703125, -4.985107421875, -4.49951171875, -4.013916015625, -3.5283203125, -3.042724609375, -2.55712890625, -2.071533203125, -1.5859375, -1.100341796875, -0.61474609375, -0.129150390625, 0.3564453125, 0.842041015625, 1.32763671875, 1.813232421875, 2.298828125, 2.784423828125, 3.27001953125, 3.755615234375, 4.2412109375, 4.726806640625, 5.21240234375, 5.697998046875, 6.18359375, 6.669189453125, 7.15478515625, 7.640380859375, 8.1259765625, 8.611572265625, 9.09716796875, 9.582763671875, 10.068359375, 10.553955078125, 11.03955078125, 11.525146484375, 12.0107421875, 12.496337890625, 12.98193359375, 13.467529296875, 13.953125]}, "gradients/encoder.encoder.layers.7.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 4.0, 6.0, 21.0, 63.0, 188.0, 375.0, 253.0, 61.0, 21.0, 6.0, 5.0, 5.0, 2.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-163.5716094970703, -154.7948455810547, -146.01806640625, -137.24130249023438, -128.46453857421875, -119.6877670288086, -110.91099548339844, -102.13423156738281, -93.35746002197266, -84.5806884765625, -75.80392456054688, -67.02715301513672, -58.25038528442383, -49.47361755371094, -40.69684600830078, -31.92007827758789, -23.143310546875, -14.366541862487793, -5.589773178100586, 3.1869964599609375, 11.963764190673828, 20.74053192138672, 29.517303466796875, 38.294071197509766, 47.070838928222656, 55.84760665893555, 64.62437438964844, 73.4011459350586, 82.17791748046875, 90.95468139648438, 99.73145294189453, 108.50822448730469, 117.28500366210938, 126.06177520751953, 134.8385467529297, 143.6153106689453, 152.39207458496094, 161.16885375976562, 169.94561767578125, 178.72238159179688, 187.4991455078125, 196.27590942382812, 205.0526885986328, 213.82945251464844, 222.60621643066406, 231.38299560546875, 240.15975952148438, 248.9365234375, 257.71331787109375, 266.4900817871094, 275.266845703125, 284.04364013671875, 292.8204040527344, 301.59716796875, 310.3739318847656, 319.15069580078125, 327.9274597167969, 336.7042236328125, 345.4809875488281, 354.25775146484375, 363.0345458984375, 371.8113098144531, 380.58807373046875, 389.3648376464844, 398.1416015625]}, "gradients/encoder.encoder.layers.7.final_layer_norm.bias": {"_type": "histogram", "values": [3.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 5.0, 5.0, 2.0, 2.0, 10.0, 4.0, 4.0, 8.0, 4.0, 9.0, 13.0, 17.0, 17.0, 14.0, 15.0, 11.0, 25.0, 26.0, 30.0, 36.0, 33.0, 41.0, 45.0, 39.0, 57.0, 33.0, 46.0, 44.0, 37.0, 26.0, 44.0, 28.0, 38.0, 28.0, 31.0, 28.0, 21.0, 16.0, 16.0, 15.0, 22.0, 14.0, 10.0, 12.0, 10.0, 7.0, 5.0, 1.0, 3.0, 0.0, 0.0, 1.0, 1.0, 3.0, 0.0, 4.0], "bins": [-58.5560302734375, -56.81324768066406, -55.07046890258789, -53.32769012451172, -51.58490753173828, -49.842124938964844, -48.09934616088867, -46.3565673828125, -44.61378479003906, -42.871002197265625, -41.12822341918945, -39.38544464111328, -37.642662048339844, -35.899879455566406, -34.157100677490234, -32.41432189941406, -30.671539306640625, -28.92875862121582, -27.185977935791016, -25.44319725036621, -23.700416564941406, -21.9576358795166, -20.214855194091797, -18.472074508666992, -16.729293823242188, -14.986513137817383, -13.243732452392578, -11.500951766967773, -9.758171081542969, -8.015390396118164, -6.272609710693359, -4.529829025268555, -2.78704833984375, -1.0442676544189453, 0.6985130310058594, 2.441293716430664, 4.184074401855469, 5.926855087280273, 7.669635772705078, 9.412416458129883, 11.155197143554688, 12.897977828979492, 14.640758514404297, 16.3835391998291, 18.126319885253906, 19.86910057067871, 21.611881256103516, 23.35466194152832, 25.097442626953125, 26.84022331237793, 28.583003997802734, 30.32578468322754, 32.068565368652344, 33.81134796142578, 35.55412673950195, 37.296905517578125, 39.03968811035156, 40.782470703125, 42.52524948120117, 44.268028259277344, 46.01081085205078, 47.75359344482422, 49.49637222290039, 51.23915100097656, 52.98193359375]}, "gradients/encoder.encoder.layers.7.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 3.0, 1.0, 2.0, 4.0, 2.0, 2.0, 6.0, 4.0, 8.0, 6.0, 15.0, 23.0, 29.0, 35.0, 55.0, 87.0, 113.0, 165.0, 266.0, 370.0, 681.0, 1184.0, 1929.0, 3563.0, 6737.0, 13792.0, 30517.0, 72033.0, 179951.0, 362059.0, 218626.0, 86504.0, 36036.0, 16215.0, 7828.0, 4073.0, 2226.0, 1278.0, 783.0, 470.0, 305.0, 177.0, 125.0, 88.0, 66.0, 43.0, 30.0, 14.0, 16.0, 7.0, 7.0, 5.0, 1.0, 6.0, 2.0, 0.0, 0.0, 0.0, 2.0], "bins": [-9.34375, -9.07373046875, -8.8037109375, -8.53369140625, -8.263671875, -7.99365234375, -7.7236328125, -7.45361328125, -7.18359375, -6.91357421875, -6.6435546875, -6.37353515625, -6.103515625, -5.83349609375, -5.5634765625, -5.29345703125, -5.0234375, -4.75341796875, -4.4833984375, -4.21337890625, -3.943359375, -3.67333984375, -3.4033203125, -3.13330078125, -2.86328125, -2.59326171875, -2.3232421875, -2.05322265625, -1.783203125, -1.51318359375, -1.2431640625, -0.97314453125, -0.703125, -0.43310546875, -0.1630859375, 0.10693359375, 0.376953125, 0.64697265625, 0.9169921875, 1.18701171875, 1.45703125, 1.72705078125, 1.9970703125, 2.26708984375, 2.537109375, 2.80712890625, 3.0771484375, 3.34716796875, 3.6171875, 3.88720703125, 4.1572265625, 4.42724609375, 4.697265625, 4.96728515625, 5.2373046875, 5.50732421875, 5.77734375, 6.04736328125, 6.3173828125, 6.58740234375, 6.857421875, 7.12744140625, 7.3974609375, 7.66748046875, 7.9375]}, "gradients/encoder.encoder.layers.7.attention.out_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 4.0, 3.0, 2.0, 1.0, 3.0, 8.0, 3.0, 10.0, 11.0, 17.0, 12.0, 21.0, 29.0, 34.0, 19.0, 50.0, 44.0, 43.0, 47.0, 52.0, 66.0, 58.0, 49.0, 66.0, 42.0, 57.0, 48.0, 41.0, 40.0, 17.0, 22.0, 26.0, 16.0, 11.0, 14.0, 6.0, 7.0, 3.0, 8.0, 5.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-5.2890625, -5.1397705078125, -4.990478515625, -4.8411865234375, -4.69189453125, -4.5426025390625, -4.393310546875, -4.2440185546875, -4.0947265625, -3.9454345703125, -3.796142578125, -3.6468505859375, -3.49755859375, -3.3482666015625, -3.198974609375, -3.0496826171875, -2.900390625, -2.7510986328125, -2.601806640625, -2.4525146484375, -2.30322265625, -2.1539306640625, -2.004638671875, -1.8553466796875, -1.7060546875, -1.5567626953125, -1.407470703125, -1.2581787109375, -1.10888671875, -0.9595947265625, -0.810302734375, -0.6610107421875, -0.51171875, -0.3624267578125, -0.213134765625, -0.0638427734375, 0.08544921875, 0.2347412109375, 0.384033203125, 0.5333251953125, 0.6826171875, 0.8319091796875, 0.981201171875, 1.1304931640625, 1.27978515625, 1.4290771484375, 1.578369140625, 1.7276611328125, 1.876953125, 2.0262451171875, 2.175537109375, 2.3248291015625, 2.47412109375, 2.6234130859375, 2.772705078125, 2.9219970703125, 3.0712890625, 3.2205810546875, 3.369873046875, 3.5191650390625, 3.66845703125, 3.8177490234375, 3.967041015625, 4.1163330078125, 4.265625]}, "gradients/encoder.encoder.layers.7.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 1.0, 1.0, 1.0, 2.0, 6.0, 1.0, 0.0, 4.0, 4.0, 6.0, 13.0, 15.0, 21.0, 20.0, 49.0, 57.0, 106.0, 139.0, 211.0, 321.0, 546.0, 926.0, 1848.0, 4068.0, 11115.0, 44209.0, 289898.0, 587500.0, 79562.0, 17219.0, 5370.0, 2310.0, 1149.0, 697.0, 378.0, 253.0, 175.0, 108.0, 89.0, 52.0, 39.0, 20.0, 13.0, 10.0, 8.0, 14.0, 4.0, 4.0, 3.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 3.0], "bins": [-17.078125, -16.578369140625, -16.07861328125, -15.578857421875, -15.0791015625, -14.579345703125, -14.07958984375, -13.579833984375, -13.080078125, -12.580322265625, -12.08056640625, -11.580810546875, -11.0810546875, -10.581298828125, -10.08154296875, -9.581787109375, -9.08203125, -8.582275390625, -8.08251953125, -7.582763671875, -7.0830078125, -6.583251953125, -6.08349609375, -5.583740234375, -5.083984375, -4.584228515625, -4.08447265625, -3.584716796875, -3.0849609375, -2.585205078125, -2.08544921875, -1.585693359375, -1.0859375, -0.586181640625, -0.08642578125, 0.413330078125, 0.9130859375, 1.412841796875, 1.91259765625, 2.412353515625, 2.912109375, 3.411865234375, 3.91162109375, 4.411376953125, 4.9111328125, 5.410888671875, 5.91064453125, 6.410400390625, 6.91015625, 7.409912109375, 7.90966796875, 8.409423828125, 8.9091796875, 9.408935546875, 9.90869140625, 10.408447265625, 10.908203125, 11.407958984375, 11.90771484375, 12.407470703125, 12.9072265625, 13.406982421875, 13.90673828125, 14.406494140625, 14.90625]}, "gradients/encoder.encoder.layers.7.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 3.0, 1.0, 2.0, 4.0, 6.0, 6.0, 5.0, 14.0, 13.0, 16.0, 26.0, 23.0, 23.0, 22.0, 41.0, 49.0, 49.0, 56.0, 61.0, 50.0, 62.0, 60.0, 51.0, 44.0, 49.0, 39.0, 52.0, 32.0, 30.0, 28.0, 24.0, 23.0, 15.0, 11.0, 2.0, 1.0, 6.0, 2.0, 2.0, 3.0, 4.0, 2.0, 1.0, 1.0, 0.0, 2.0, 0.0, 1.0], "bins": [-26.859375, -26.120361328125, -25.38134765625, -24.642333984375, -23.9033203125, -23.164306640625, -22.42529296875, -21.686279296875, -20.947265625, -20.208251953125, -19.46923828125, -18.730224609375, -17.9912109375, -17.252197265625, -16.51318359375, -15.774169921875, -15.03515625, -14.296142578125, -13.55712890625, -12.818115234375, -12.0791015625, -11.340087890625, -10.60107421875, -9.862060546875, -9.123046875, -8.384033203125, -7.64501953125, -6.906005859375, -6.1669921875, -5.427978515625, -4.68896484375, -3.949951171875, -3.2109375, -2.471923828125, -1.73291015625, -0.993896484375, -0.2548828125, 0.484130859375, 1.22314453125, 1.962158203125, 2.701171875, 3.440185546875, 4.17919921875, 4.918212890625, 5.6572265625, 6.396240234375, 7.13525390625, 7.874267578125, 8.61328125, 9.352294921875, 10.09130859375, 10.830322265625, 11.5693359375, 12.308349609375, 13.04736328125, 13.786376953125, 14.525390625, 15.264404296875, 16.00341796875, 16.742431640625, 17.4814453125, 18.220458984375, 18.95947265625, 19.698486328125, 20.4375]}, "gradients/encoder.encoder.layers.7.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 6.0, 6.0, 9.0, 10.0, 16.0, 28.0, 73.0, 156.0, 423.0, 1842.0, 24613.0, 968193.0, 49795.0, 2574.0, 507.0, 153.0, 86.0, 32.0, 20.0, 7.0, 6.0, 4.0, 4.0, 2.0, 1.0, 2.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-14.421875, -13.87939453125, -13.3369140625, -12.79443359375, -12.251953125, -11.70947265625, -11.1669921875, -10.62451171875, -10.08203125, -9.53955078125, -8.9970703125, -8.45458984375, -7.912109375, -7.36962890625, -6.8271484375, -6.28466796875, -5.7421875, -5.19970703125, -4.6572265625, -4.11474609375, -3.572265625, -3.02978515625, -2.4873046875, -1.94482421875, -1.40234375, -0.85986328125, -0.3173828125, 0.22509765625, 0.767578125, 1.31005859375, 1.8525390625, 2.39501953125, 2.9375, 3.47998046875, 4.0224609375, 4.56494140625, 5.107421875, 5.64990234375, 6.1923828125, 6.73486328125, 7.27734375, 7.81982421875, 8.3623046875, 8.90478515625, 9.447265625, 9.98974609375, 10.5322265625, 11.07470703125, 11.6171875, 12.15966796875, 12.7021484375, 13.24462890625, 13.787109375, 14.32958984375, 14.8720703125, 15.41455078125, 15.95703125, 16.49951171875, 17.0419921875, 17.58447265625, 18.126953125, 18.66943359375, 19.2119140625, 19.75439453125, 20.296875]}, "gradients/encoder.encoder.layers.7.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 2.0, 1.0, 6.0, 7.0, 0.0, 3.0, 9.0, 9.0, 6.0, 10.0, 19.0, 16.0, 25.0, 33.0, 42.0, 50.0, 64.0, 79.0, 90.0, 102.0, 96.0, 66.0, 66.0, 51.0, 39.0, 19.0, 15.0, 16.0, 14.0, 8.0, 13.0, 5.0, 5.0, 5.0, 7.0, 5.0, 0.0, 2.0, 2.0, 1.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0], "bins": [-0.0010395050048828125, -0.0010071098804473877, -0.0009747147560119629, -0.0009423196315765381, -0.0009099245071411133, -0.0008775293827056885, -0.0008451342582702637, -0.0008127391338348389, -0.0007803440093994141, -0.0007479488849639893, -0.0007155537605285645, -0.0006831586360931396, -0.0006507635116577148, -0.00061836838722229, -0.0005859732627868652, -0.0005535781383514404, -0.0005211830139160156, -0.0004887878894805908, -0.000456392765045166, -0.0004239976406097412, -0.0003916025161743164, -0.0003592073917388916, -0.0003268122673034668, -0.000294417142868042, -0.0002620220184326172, -0.00022962689399719238, -0.00019723176956176758, -0.00016483664512634277, -0.00013244152069091797, -0.00010004639625549316, -6.765127182006836e-05, -3.5256147384643555e-05, -2.86102294921875e-06, 2.9534101486206055e-05, 6.192922592163086e-05, 9.432435035705566e-05, 0.00012671947479248047, 0.00015911459922790527, 0.00019150972366333008, 0.00022390484809875488, 0.0002562999725341797, 0.0002886950969696045, 0.0003210902214050293, 0.0003534853458404541, 0.0003858804702758789, 0.0004182755947113037, 0.0004506707191467285, 0.0004830658435821533, 0.0005154609680175781, 0.0005478560924530029, 0.0005802512168884277, 0.0006126463413238525, 0.0006450414657592773, 0.0006774365901947021, 0.000709831714630127, 0.0007422268390655518, 0.0007746219635009766, 0.0008070170879364014, 0.0008394122123718262, 0.000871807336807251, 0.0009042024612426758, 0.0009365975856781006, 0.0009689927101135254, 0.0010013878345489502, 0.001033782958984375]}, "gradients/encoder.encoder.layers.7.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 10.0, 7.0, 8.0, 12.0, 22.0, 28.0, 43.0, 59.0, 91.0, 134.0, 207.0, 376.0, 778.0, 1519.0, 3549.0, 11225.0, 59539.0, 569727.0, 347261.0, 39644.0, 8548.0, 2985.0, 1253.0, 601.0, 351.0, 190.0, 134.0, 84.0, 50.0, 33.0, 25.0, 17.0, 10.0, 5.0, 6.0, 6.0, 5.0, 2.0, 3.0, 1.0, 1.0, 2.0, 3.0, 2.0, 1.0, 0.0, 1.0, 1.0], "bins": [-6.703125, -6.50537109375, -6.3076171875, -6.10986328125, -5.912109375, -5.71435546875, -5.5166015625, -5.31884765625, -5.12109375, -4.92333984375, -4.7255859375, -4.52783203125, -4.330078125, -4.13232421875, -3.9345703125, -3.73681640625, -3.5390625, -3.34130859375, -3.1435546875, -2.94580078125, -2.748046875, -2.55029296875, -2.3525390625, -2.15478515625, -1.95703125, -1.75927734375, -1.5615234375, -1.36376953125, -1.166015625, -0.96826171875, -0.7705078125, -0.57275390625, -0.375, -0.17724609375, 0.0205078125, 0.21826171875, 0.416015625, 0.61376953125, 0.8115234375, 1.00927734375, 1.20703125, 1.40478515625, 1.6025390625, 1.80029296875, 1.998046875, 2.19580078125, 2.3935546875, 2.59130859375, 2.7890625, 2.98681640625, 3.1845703125, 3.38232421875, 3.580078125, 3.77783203125, 3.9755859375, 4.17333984375, 4.37109375, 4.56884765625, 4.7666015625, 4.96435546875, 5.162109375, 5.35986328125, 5.5576171875, 5.75537109375, 5.953125]}, "gradients/encoder.encoder.layers.7.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 5.0, 2.0, 2.0, 3.0, 7.0, 4.0, 4.0, 6.0, 10.0, 10.0, 15.0, 28.0, 39.0, 37.0, 51.0, 55.0, 57.0, 74.0, 84.0, 89.0, 61.0, 77.0, 60.0, 56.0, 35.0, 41.0, 34.0, 17.0, 12.0, 9.0, 9.0, 9.0, 5.0, 2.0, 5.0, 0.0, 0.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.67578125, -4.46600341796875, -4.2562255859375, -4.04644775390625, -3.836669921875, -3.62689208984375, -3.4171142578125, -3.20733642578125, -2.99755859375, -2.78778076171875, -2.5780029296875, -2.36822509765625, -2.158447265625, -1.94866943359375, -1.7388916015625, -1.52911376953125, -1.3193359375, -1.10955810546875, -0.8997802734375, -0.69000244140625, -0.480224609375, -0.27044677734375, -0.0606689453125, 0.14910888671875, 0.35888671875, 0.56866455078125, 0.7784423828125, 0.98822021484375, 1.197998046875, 1.40777587890625, 1.6175537109375, 1.82733154296875, 2.037109375, 2.24688720703125, 2.4566650390625, 2.66644287109375, 2.876220703125, 3.08599853515625, 3.2957763671875, 3.50555419921875, 3.71533203125, 3.92510986328125, 4.1348876953125, 4.34466552734375, 4.554443359375, 4.76422119140625, 4.9739990234375, 5.18377685546875, 5.3935546875, 5.60333251953125, 5.8131103515625, 6.02288818359375, 6.232666015625, 6.44244384765625, 6.6522216796875, 6.86199951171875, 7.07177734375, 7.28155517578125, 7.4913330078125, 7.70111083984375, 7.910888671875, 8.12066650390625, 8.3304443359375, 8.54022216796875, 8.75]}, "gradients/encoder.encoder.layers.7.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 2.0, 3.0, 1.0, 6.0, 14.0, 19.0, 64.0, 136.0, 240.0, 265.0, 168.0, 50.0, 25.0, 10.0, 5.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-215.15135192871094, -208.01022338867188, -200.86911010742188, -193.72799682617188, -186.5868682861328, -179.44573974609375, -172.30462646484375, -165.16351318359375, -158.0223846435547, -150.88125610351562, -143.74014282226562, -136.59902954101562, -129.45790100097656, -122.31678009033203, -115.1756591796875, -108.03453826904297, -100.89341735839844, -93.7522964477539, -86.61117553710938, -79.47005462646484, -72.32893371582031, -65.18781280517578, -58.04669189453125, -50.90557098388672, -43.76445007324219, -36.623329162597656, -29.482208251953125, -22.341087341308594, -15.199966430664062, -8.058845520019531, -0.917724609375, 6.223396301269531, 13.364501953125, 20.50562286376953, 27.646743774414062, 34.787864685058594, 41.928985595703125, 49.070106506347656, 56.21122741699219, 63.35234832763672, 70.49346923828125, 77.63459014892578, 84.77571105957031, 91.91683197021484, 99.05795288085938, 106.1990737915039, 113.34019470214844, 120.48131561279297, 127.6224365234375, 134.7635498046875, 141.90467834472656, 149.04580688476562, 156.18692016601562, 163.32803344726562, 170.4691619873047, 177.61029052734375, 184.75140380859375, 191.89251708984375, 199.0336456298828, 206.17477416992188, 213.31588745117188, 220.45700073242188, 227.59812927246094, 234.7392578125, 241.88037109375]}, "gradients/encoder.encoder.layers.7.layer_norm.bias": {"_type": "histogram", "values": [2.0, 1.0, 2.0, 3.0, 5.0, 7.0, 5.0, 3.0, 4.0, 8.0, 8.0, 6.0, 10.0, 8.0, 15.0, 23.0, 26.0, 30.0, 24.0, 36.0, 29.0, 48.0, 40.0, 46.0, 57.0, 59.0, 61.0, 61.0, 46.0, 44.0, 44.0, 28.0, 33.0, 30.0, 16.0, 17.0, 17.0, 13.0, 18.0, 11.0, 13.0, 14.0, 10.0, 6.0, 9.0, 5.0, 3.0, 3.0, 3.0, 2.0, 2.0, 0.0, 1.0, 0.0, 2.0, 2.0, 0.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-62.922603607177734, -60.532447814941406, -58.14229202270508, -55.75213623046875, -53.361976623535156, -50.971824645996094, -48.5816650390625, -46.19150924682617, -43.801353454589844, -41.411197662353516, -39.02104187011719, -36.63088607788086, -34.24073028564453, -31.85057258605957, -29.46041488647461, -27.07025909423828, -24.680103302001953, -22.289947509765625, -19.899791717529297, -17.509634017944336, -15.119478225708008, -12.72932243347168, -10.339165687561035, -7.949008941650391, -5.5588531494140625, -3.168696880340576, -0.7785406112670898, 1.6116156578063965, 4.001771926879883, 6.391927719116211, 8.782084465026855, 11.1722412109375, 13.562400817871094, 15.952556610107422, 18.34271240234375, 20.73287010192871, 23.12302589416504, 25.513181686401367, 27.903339385986328, 30.293495178222656, 32.683650970458984, 35.07380676269531, 37.46396255493164, 39.85411834716797, 42.24427795410156, 44.634429931640625, 47.02458953857422, 49.41474533081055, 51.804901123046875, 54.1950569152832, 56.58521270751953, 58.97536849975586, 61.36552429199219, 63.75568389892578, 66.14583587646484, 68.53599548339844, 70.9261474609375, 73.3163070678711, 75.70645904541016, 78.09661865234375, 80.48677062988281, 82.8769302368164, 85.26708221435547, 87.65724182128906, 90.04740142822266]}, "gradients/encoder.encoder.layers.6.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 0.0, 0.0, 3.0, 1.0, 4.0, 8.0, 10.0, 8.0, 16.0, 21.0, 29.0, 54.0, 80.0, 166.0, 473.0, 2409.0, 39249.0, 4116515.0, 32155.0, 2212.0, 478.0, 174.0, 87.0, 48.0, 25.0, 19.0, 15.0, 12.0, 7.0, 5.0, 2.0, 2.0, 2.0, 6.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-30.28125, -29.17919921875, -28.0771484375, -26.97509765625, -25.873046875, -24.77099609375, -23.6689453125, -22.56689453125, -21.46484375, -20.36279296875, -19.2607421875, -18.15869140625, -17.056640625, -15.95458984375, -14.8525390625, -13.75048828125, -12.6484375, -11.54638671875, -10.4443359375, -9.34228515625, -8.240234375, -7.13818359375, -6.0361328125, -4.93408203125, -3.83203125, -2.72998046875, -1.6279296875, -0.52587890625, 0.576171875, 1.67822265625, 2.7802734375, 3.88232421875, 4.984375, 6.08642578125, 7.1884765625, 8.29052734375, 9.392578125, 10.49462890625, 11.5966796875, 12.69873046875, 13.80078125, 14.90283203125, 16.0048828125, 17.10693359375, 18.208984375, 19.31103515625, 20.4130859375, 21.51513671875, 22.6171875, 23.71923828125, 24.8212890625, 25.92333984375, 27.025390625, 28.12744140625, 29.2294921875, 30.33154296875, 31.43359375, 32.53564453125, 33.6376953125, 34.73974609375, 35.841796875, 36.94384765625, 38.0458984375, 39.14794921875, 40.25]}, "gradients/encoder.encoder.layers.6.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 3.0, 3.0, 2.0, 2.0, 4.0, 5.0, 9.0, 7.0, 4.0, 14.0, 12.0, 18.0, 19.0, 25.0, 27.0, 36.0, 39.0, 40.0, 63.0, 57.0, 51.0, 54.0, 56.0, 65.0, 64.0, 51.0, 60.0, 36.0, 37.0, 21.0, 25.0, 18.0, 19.0, 15.0, 8.0, 14.0, 8.0, 8.0, 5.0, 6.0, 5.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.59375, -3.4512939453125, -3.308837890625, -3.1663818359375, -3.02392578125, -2.8814697265625, -2.739013671875, -2.5965576171875, -2.4541015625, -2.3116455078125, -2.169189453125, -2.0267333984375, -1.88427734375, -1.7418212890625, -1.599365234375, -1.4569091796875, -1.314453125, -1.1719970703125, -1.029541015625, -0.8870849609375, -0.74462890625, -0.6021728515625, -0.459716796875, -0.3172607421875, -0.1748046875, -0.0323486328125, 0.110107421875, 0.2525634765625, 0.39501953125, 0.5374755859375, 0.679931640625, 0.8223876953125, 0.96484375, 1.1072998046875, 1.249755859375, 1.3922119140625, 1.53466796875, 1.6771240234375, 1.819580078125, 1.9620361328125, 2.1044921875, 2.2469482421875, 2.389404296875, 2.5318603515625, 2.67431640625, 2.8167724609375, 2.959228515625, 3.1016845703125, 3.244140625, 3.3865966796875, 3.529052734375, 3.6715087890625, 3.81396484375, 3.9564208984375, 4.098876953125, 4.2413330078125, 4.3837890625, 4.5262451171875, 4.668701171875, 4.8111572265625, 4.95361328125, 5.0960693359375, 5.238525390625, 5.3809814453125, 5.5234375]}, "gradients/encoder.encoder.layers.6.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0, 1.0, 3.0, 9.0, 7.0, 13.0, 17.0, 25.0, 30.0, 48.0, 90.0, 132.0, 277.0, 615.0, 1391.0, 3673.0, 11113.0, 41599.0, 286109.0, 3565961.0, 229711.0, 37667.0, 10000.0, 3215.0, 1255.0, 566.0, 301.0, 162.0, 81.0, 66.0, 56.0, 29.0, 24.0, 22.0, 10.0, 7.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-10.640625, -10.22265625, -9.8046875, -9.38671875, -8.96875, -8.55078125, -8.1328125, -7.71484375, -7.296875, -6.87890625, -6.4609375, -6.04296875, -5.625, -5.20703125, -4.7890625, -4.37109375, -3.953125, -3.53515625, -3.1171875, -2.69921875, -2.28125, -1.86328125, -1.4453125, -1.02734375, -0.609375, -0.19140625, 0.2265625, 0.64453125, 1.0625, 1.48046875, 1.8984375, 2.31640625, 2.734375, 3.15234375, 3.5703125, 3.98828125, 4.40625, 4.82421875, 5.2421875, 5.66015625, 6.078125, 6.49609375, 6.9140625, 7.33203125, 7.75, 8.16796875, 8.5859375, 9.00390625, 9.421875, 9.83984375, 10.2578125, 10.67578125, 11.09375, 11.51171875, 11.9296875, 12.34765625, 12.765625, 13.18359375, 13.6015625, 14.01953125, 14.4375, 14.85546875, 15.2734375, 15.69140625, 16.109375]}, "gradients/encoder.encoder.layers.6.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 3.0, 4.0, 1.0, 6.0, 5.0, 5.0, 5.0, 5.0, 10.0, 16.0, 18.0, 23.0, 27.0, 44.0, 87.0, 111.0, 227.0, 462.0, 1123.0, 971.0, 392.0, 185.0, 103.0, 73.0, 46.0, 33.0, 25.0, 24.0, 17.0, 16.0, 7.0, 6.0, 1.0, 1.0, 3.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-11.7265625, -11.3060302734375, -10.885498046875, -10.4649658203125, -10.04443359375, -9.6239013671875, -9.203369140625, -8.7828369140625, -8.3623046875, -7.9417724609375, -7.521240234375, -7.1007080078125, -6.68017578125, -6.2596435546875, -5.839111328125, -5.4185791015625, -4.998046875, -4.5775146484375, -4.156982421875, -3.7364501953125, -3.31591796875, -2.8953857421875, -2.474853515625, -2.0543212890625, -1.6337890625, -1.2132568359375, -0.792724609375, -0.3721923828125, 0.04833984375, 0.4688720703125, 0.889404296875, 1.3099365234375, 1.73046875, 2.1510009765625, 2.571533203125, 2.9920654296875, 3.41259765625, 3.8331298828125, 4.253662109375, 4.6741943359375, 5.0947265625, 5.5152587890625, 5.935791015625, 6.3563232421875, 6.77685546875, 7.1973876953125, 7.617919921875, 8.0384521484375, 8.458984375, 8.8795166015625, 9.300048828125, 9.7205810546875, 10.14111328125, 10.5616455078125, 10.982177734375, 11.4027099609375, 11.8232421875, 12.2437744140625, 12.664306640625, 13.0848388671875, 13.50537109375, 13.9259033203125, 14.346435546875, 14.7669677734375, 15.1875]}, "gradients/encoder.encoder.layers.6.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 3.0, 6.0, 1.0, 4.0, 6.0, 8.0, 7.0, 28.0, 55.0, 80.0, 169.0, 215.0, 188.0, 124.0, 52.0, 24.0, 16.0, 16.0, 0.0, 2.0, 0.0, 4.0, 2.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-161.1895294189453, -157.32620239257812, -153.46287536621094, -149.59954833984375, -145.73622131347656, -141.87289428710938, -138.0095672607422, -134.146240234375, -130.28289794921875, -126.41957092285156, -122.55624389648438, -118.69291687011719, -114.82958984375, -110.96626281738281, -107.1029281616211, -103.2396011352539, -99.37628173828125, -95.51295471191406, -91.64962768554688, -87.78630065917969, -83.9229736328125, -80.05964660644531, -76.1963119506836, -72.3329849243164, -68.46965789794922, -64.60633087158203, -60.743003845214844, -56.87967300415039, -53.0163459777832, -49.153018951416016, -45.28968811035156, -41.426361083984375, -37.56304168701172, -33.69971466064453, -29.83638572692871, -25.97305679321289, -22.109729766845703, -18.246402740478516, -14.383073806762695, -10.519744873046875, -6.6564178466796875, -2.7930898666381836, 1.0702381134033203, 4.933566093444824, 8.796894073486328, 12.660221099853516, 16.523550033569336, 20.386878967285156, 24.250205993652344, 28.11353302001953, 31.97686195373535, 35.84019088745117, 39.70351791381836, 43.56684494018555, 47.43017578125, 51.29350280761719, 55.156829833984375, 59.02015686035156, 62.88348388671875, 66.74681091308594, 70.61013793945312, 74.47346496582031, 78.33679962158203, 82.20012664794922, 86.0634536743164]}, "gradients/encoder.encoder.layers.6.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 5.0, 2.0, 4.0, 5.0, 7.0, 5.0, 6.0, 8.0, 10.0, 15.0, 12.0, 14.0, 20.0, 23.0, 26.0, 20.0, 31.0, 31.0, 34.0, 34.0, 40.0, 30.0, 52.0, 38.0, 40.0, 52.0, 35.0, 53.0, 48.0, 18.0, 35.0, 40.0, 29.0, 26.0, 29.0, 28.0, 24.0, 12.0, 13.0, 12.0, 7.0, 14.0, 3.0, 4.0, 6.0, 5.0, 4.0, 4.0, 3.0, 2.0, 1.0, 0.0, 1.0], "bins": [-54.968536376953125, -53.42902755737305, -51.88951873779297, -50.35000991821289, -48.81050109863281, -47.270992279052734, -45.731483459472656, -44.191978454589844, -42.6524658203125, -41.11295700073242, -39.573448181152344, -38.033939361572266, -36.49443054199219, -34.95492172241211, -33.41541290283203, -31.875905990600586, -30.33639907836914, -28.796890258789062, -27.257381439208984, -25.717872619628906, -24.178363800048828, -22.63885498046875, -21.099348068237305, -19.559839248657227, -18.02033042907715, -16.48082160949707, -14.941312789916992, -13.40180492401123, -11.862296104431152, -10.322787284851074, -8.783279418945312, -7.243770599365234, -5.704261779785156, -4.164752960205078, -2.625244617462158, -1.0857362747192383, 0.45377254486083984, 1.993281364440918, 3.5327892303466797, 5.072298049926758, 6.611806869506836, 8.151315689086914, 9.690824508666992, 11.230332374572754, 12.769841194152832, 14.30935001373291, 15.848857879638672, 17.38836669921875, 18.927875518798828, 20.467384338378906, 22.006893157958984, 23.546401977539062, 25.08591079711914, 26.62541961669922, 28.164926528930664, 29.704435348510742, 31.24394416809082, 32.783451080322266, 34.322959899902344, 35.86246871948242, 37.4019775390625, 38.94148635864258, 40.480995178222656, 42.020503997802734, 43.56001281738281]}, "gradients/encoder.encoder.layers.6.attention.out_proj.weight": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 1.0, 2.0, 4.0, 2.0, 5.0, 2.0, 12.0, 18.0, 19.0, 22.0, 45.0, 79.0, 106.0, 176.0, 269.0, 407.0, 669.0, 1082.0, 1958.0, 3473.0, 6815.0, 14474.0, 33909.0, 86733.0, 247678.0, 387397.0, 159798.0, 57908.0, 23428.0, 10417.0, 5034.0, 2685.0, 1503.0, 897.0, 572.0, 332.0, 233.0, 140.0, 91.0, 58.0, 30.0, 23.0, 18.0, 11.0, 11.0, 8.0, 9.0, 2.0, 1.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-8.078125, -7.7926025390625, -7.507080078125, -7.2215576171875, -6.93603515625, -6.6505126953125, -6.364990234375, -6.0794677734375, -5.7939453125, -5.5084228515625, -5.222900390625, -4.9373779296875, -4.65185546875, -4.3663330078125, -4.080810546875, -3.7952880859375, -3.509765625, -3.2242431640625, -2.938720703125, -2.6531982421875, -2.36767578125, -2.0821533203125, -1.796630859375, -1.5111083984375, -1.2255859375, -0.9400634765625, -0.654541015625, -0.3690185546875, -0.08349609375, 0.2020263671875, 0.487548828125, 0.7730712890625, 1.05859375, 1.3441162109375, 1.629638671875, 1.9151611328125, 2.20068359375, 2.4862060546875, 2.771728515625, 3.0572509765625, 3.3427734375, 3.6282958984375, 3.913818359375, 4.1993408203125, 4.48486328125, 4.7703857421875, 5.055908203125, 5.3414306640625, 5.626953125, 5.9124755859375, 6.197998046875, 6.4835205078125, 6.76904296875, 7.0545654296875, 7.340087890625, 7.6256103515625, 7.9111328125, 8.1966552734375, 8.482177734375, 8.7677001953125, 9.05322265625, 9.3387451171875, 9.624267578125, 9.9097900390625, 10.1953125]}, "gradients/encoder.encoder.layers.6.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 1.0, 0.0, 5.0, 2.0, 5.0, 5.0, 6.0, 7.0, 7.0, 8.0, 12.0, 15.0, 21.0, 23.0, 34.0, 28.0, 23.0, 47.0, 36.0, 49.0, 45.0, 53.0, 61.0, 40.0, 53.0, 49.0, 67.0, 41.0, 44.0, 31.0, 24.0, 26.0, 21.0, 30.0, 18.0, 16.0, 11.0, 11.0, 11.0, 6.0, 6.0, 1.0, 5.0, 4.0, 4.0, 2.0, 1.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.623046875, -3.486480712890625, -3.34991455078125, -3.213348388671875, -3.0767822265625, -2.940216064453125, -2.80364990234375, -2.667083740234375, -2.530517578125, -2.393951416015625, -2.25738525390625, -2.120819091796875, -1.9842529296875, -1.847686767578125, -1.71112060546875, -1.574554443359375, -1.43798828125, -1.301422119140625, -1.16485595703125, -1.028289794921875, -0.8917236328125, -0.755157470703125, -0.61859130859375, -0.482025146484375, -0.345458984375, -0.208892822265625, -0.07232666015625, 0.064239501953125, 0.2008056640625, 0.337371826171875, 0.47393798828125, 0.610504150390625, 0.7470703125, 0.883636474609375, 1.02020263671875, 1.156768798828125, 1.2933349609375, 1.429901123046875, 1.56646728515625, 1.703033447265625, 1.839599609375, 1.976165771484375, 2.11273193359375, 2.249298095703125, 2.3858642578125, 2.522430419921875, 2.65899658203125, 2.795562744140625, 2.93212890625, 3.068695068359375, 3.20526123046875, 3.341827392578125, 3.4783935546875, 3.614959716796875, 3.75152587890625, 3.888092041015625, 4.024658203125, 4.161224365234375, 4.29779052734375, 4.434356689453125, 4.5709228515625, 4.707489013671875, 4.84405517578125, 4.980621337890625, 5.1171875]}, "gradients/encoder.encoder.layers.6.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 3.0, 0.0, 2.0, 3.0, 4.0, 6.0, 12.0, 8.0, 16.0, 30.0, 47.0, 57.0, 91.0, 131.0, 213.0, 415.0, 755.0, 1691.0, 4272.0, 14540.0, 75309.0, 636933.0, 263413.0, 36768.0, 8362.0, 2807.0, 1244.0, 589.0, 323.0, 172.0, 115.0, 68.0, 48.0, 36.0, 25.0, 13.0, 17.0, 5.0, 8.0, 7.0, 4.0, 0.0, 0.0, 1.0, 2.0, 0.0, 3.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-14.3828125, -13.8253173828125, -13.267822265625, -12.7103271484375, -12.15283203125, -11.5953369140625, -11.037841796875, -10.4803466796875, -9.9228515625, -9.3653564453125, -8.807861328125, -8.2503662109375, -7.69287109375, -7.1353759765625, -6.577880859375, -6.0203857421875, -5.462890625, -4.9053955078125, -4.347900390625, -3.7904052734375, -3.23291015625, -2.6754150390625, -2.117919921875, -1.5604248046875, -1.0029296875, -0.4454345703125, 0.112060546875, 0.6695556640625, 1.22705078125, 1.7845458984375, 2.342041015625, 2.8995361328125, 3.45703125, 4.0145263671875, 4.572021484375, 5.1295166015625, 5.68701171875, 6.2445068359375, 6.802001953125, 7.3594970703125, 7.9169921875, 8.4744873046875, 9.031982421875, 9.5894775390625, 10.14697265625, 10.7044677734375, 11.261962890625, 11.8194580078125, 12.376953125, 12.9344482421875, 13.491943359375, 14.0494384765625, 14.60693359375, 15.1644287109375, 15.721923828125, 16.2794189453125, 16.8369140625, 17.3944091796875, 17.951904296875, 18.5093994140625, 19.06689453125, 19.6243896484375, 20.181884765625, 20.7393798828125, 21.296875]}, "gradients/encoder.encoder.layers.6.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 0.0, 2.0, 1.0, 1.0, 4.0, 5.0, 2.0, 3.0, 6.0, 6.0, 7.0, 14.0, 14.0, 30.0, 25.0, 38.0, 47.0, 50.0, 87.0, 62.0, 70.0, 60.0, 77.0, 62.0, 71.0, 61.0, 38.0, 37.0, 36.0, 19.0, 22.0, 12.0, 15.0, 12.0, 4.0, 6.0, 3.0, 0.0, 6.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-23.546875, -22.750244140625, -21.95361328125, -21.156982421875, -20.3603515625, -19.563720703125, -18.76708984375, -17.970458984375, -17.173828125, -16.377197265625, -15.58056640625, -14.783935546875, -13.9873046875, -13.190673828125, -12.39404296875, -11.597412109375, -10.80078125, -10.004150390625, -9.20751953125, -8.410888671875, -7.6142578125, -6.817626953125, -6.02099609375, -5.224365234375, -4.427734375, -3.631103515625, -2.83447265625, -2.037841796875, -1.2412109375, -0.444580078125, 0.35205078125, 1.148681640625, 1.9453125, 2.741943359375, 3.53857421875, 4.335205078125, 5.1318359375, 5.928466796875, 6.72509765625, 7.521728515625, 8.318359375, 9.114990234375, 9.91162109375, 10.708251953125, 11.5048828125, 12.301513671875, 13.09814453125, 13.894775390625, 14.69140625, 15.488037109375, 16.28466796875, 17.081298828125, 17.8779296875, 18.674560546875, 19.47119140625, 20.267822265625, 21.064453125, 21.861083984375, 22.65771484375, 23.454345703125, 24.2509765625, 25.047607421875, 25.84423828125, 26.640869140625, 27.4375]}, "gradients/encoder.encoder.layers.6.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 2.0, 1.0, 4.0, 9.0, 6.0, 14.0, 13.0, 13.0, 15.0, 26.0, 71.0, 84.0, 215.0, 430.0, 1151.0, 4628.0, 44070.0, 908497.0, 80852.0, 5951.0, 1505.0, 505.0, 212.0, 115.0, 71.0, 32.0, 19.0, 14.0, 13.0, 6.0, 2.0, 7.0, 3.0, 4.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 3.0], "bins": [-12.75, -12.370361328125, -11.99072265625, -11.611083984375, -11.2314453125, -10.851806640625, -10.47216796875, -10.092529296875, -9.712890625, -9.333251953125, -8.95361328125, -8.573974609375, -8.1943359375, -7.814697265625, -7.43505859375, -7.055419921875, -6.67578125, -6.296142578125, -5.91650390625, -5.536865234375, -5.1572265625, -4.777587890625, -4.39794921875, -4.018310546875, -3.638671875, -3.259033203125, -2.87939453125, -2.499755859375, -2.1201171875, -1.740478515625, -1.36083984375, -0.981201171875, -0.6015625, -0.221923828125, 0.15771484375, 0.537353515625, 0.9169921875, 1.296630859375, 1.67626953125, 2.055908203125, 2.435546875, 2.815185546875, 3.19482421875, 3.574462890625, 3.9541015625, 4.333740234375, 4.71337890625, 5.093017578125, 5.47265625, 5.852294921875, 6.23193359375, 6.611572265625, 6.9912109375, 7.370849609375, 7.75048828125, 8.130126953125, 8.509765625, 8.889404296875, 9.26904296875, 9.648681640625, 10.0283203125, 10.407958984375, 10.78759765625, 11.167236328125, 11.546875]}, "gradients/encoder.encoder.layers.6.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 4.0, 2.0, 3.0, 2.0, 2.0, 4.0, 10.0, 9.0, 14.0, 17.0, 20.0, 21.0, 55.0, 58.0, 90.0, 92.0, 113.0, 96.0, 94.0, 64.0, 72.0, 48.0, 37.0, 16.0, 13.0, 9.0, 10.0, 7.0, 4.0, 5.0, 9.0, 6.0, 1.0, 2.0, 2.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0012416839599609375, -0.0012016892433166504, -0.0011616945266723633, -0.0011216998100280762, -0.001081705093383789, -0.001041710376739502, -0.0010017156600952148, -0.0009617209434509277, -0.0009217262268066406, -0.0008817315101623535, -0.0008417367935180664, -0.0008017420768737793, -0.0007617473602294922, -0.0007217526435852051, -0.000681757926940918, -0.0006417632102966309, -0.0006017684936523438, -0.0005617737770080566, -0.0005217790603637695, -0.0004817843437194824, -0.0004417896270751953, -0.0004017949104309082, -0.0003618001937866211, -0.000321805477142334, -0.0002818107604980469, -0.00024181604385375977, -0.00020182132720947266, -0.00016182661056518555, -0.00012183189392089844, -8.183717727661133e-05, -4.184246063232422e-05, -1.8477439880371094e-06, 3.814697265625e-05, 7.814168930053711e-05, 0.00011813640594482422, 0.00015813112258911133, 0.00019812583923339844, 0.00023812055587768555, 0.00027811527252197266, 0.00031810998916625977, 0.0003581047058105469, 0.000398099422454834, 0.0004380941390991211, 0.0004780888557434082, 0.0005180835723876953, 0.0005580782890319824, 0.0005980730056762695, 0.0006380677223205566, 0.0006780624389648438, 0.0007180571556091309, 0.000758051872253418, 0.0007980465888977051, 0.0008380413055419922, 0.0008780360221862793, 0.0009180307388305664, 0.0009580254554748535, 0.0009980201721191406, 0.0010380148887634277, 0.0010780096054077148, 0.001118004322052002, 0.001157999038696289, 0.0011979937553405762, 0.0012379884719848633, 0.0012779831886291504, 0.0013179779052734375]}, "gradients/encoder.encoder.layers.6.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 1.0, 0.0, 0.0, 1.0, 3.0, 0.0, 7.0, 3.0, 6.0, 6.0, 9.0, 13.0, 24.0, 37.0, 68.0, 141.0, 262.0, 554.0, 1847.0, 8759.0, 194142.0, 817727.0, 20282.0, 3019.0, 914.0, 358.0, 171.0, 76.0, 45.0, 26.0, 20.0, 14.0, 8.0, 11.0, 4.0, 1.0, 3.0, 3.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-14.375, -13.977783203125, -13.58056640625, -13.183349609375, -12.7861328125, -12.388916015625, -11.99169921875, -11.594482421875, -11.197265625, -10.800048828125, -10.40283203125, -10.005615234375, -9.6083984375, -9.211181640625, -8.81396484375, -8.416748046875, -8.01953125, -7.622314453125, -7.22509765625, -6.827880859375, -6.4306640625, -6.033447265625, -5.63623046875, -5.239013671875, -4.841796875, -4.444580078125, -4.04736328125, -3.650146484375, -3.2529296875, -2.855712890625, -2.45849609375, -2.061279296875, -1.6640625, -1.266845703125, -0.86962890625, -0.472412109375, -0.0751953125, 0.322021484375, 0.71923828125, 1.116455078125, 1.513671875, 1.910888671875, 2.30810546875, 2.705322265625, 3.1025390625, 3.499755859375, 3.89697265625, 4.294189453125, 4.69140625, 5.088623046875, 5.48583984375, 5.883056640625, 6.2802734375, 6.677490234375, 7.07470703125, 7.471923828125, 7.869140625, 8.266357421875, 8.66357421875, 9.060791015625, 9.4580078125, 9.855224609375, 10.25244140625, 10.649658203125, 11.046875]}, "gradients/encoder.encoder.layers.6.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 4.0, 1.0, 2.0, 4.0, 5.0, 6.0, 5.0, 9.0, 14.0, 14.0, 18.0, 37.0, 34.0, 59.0, 51.0, 70.0, 89.0, 77.0, 108.0, 98.0, 70.0, 63.0, 50.0, 28.0, 25.0, 27.0, 10.0, 9.0, 8.0, 6.0, 1.0, 2.0, 2.0, 3.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 4.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.4765625, -6.27899169921875, -6.0814208984375, -5.88385009765625, -5.686279296875, -5.48870849609375, -5.2911376953125, -5.09356689453125, -4.89599609375, -4.69842529296875, -4.5008544921875, -4.30328369140625, -4.105712890625, -3.90814208984375, -3.7105712890625, -3.51300048828125, -3.3154296875, -3.11785888671875, -2.9202880859375, -2.72271728515625, -2.525146484375, -2.32757568359375, -2.1300048828125, -1.93243408203125, -1.73486328125, -1.53729248046875, -1.3397216796875, -1.14215087890625, -0.944580078125, -0.74700927734375, -0.5494384765625, -0.35186767578125, -0.154296875, 0.04327392578125, 0.2408447265625, 0.43841552734375, 0.635986328125, 0.83355712890625, 1.0311279296875, 1.22869873046875, 1.42626953125, 1.62384033203125, 1.8214111328125, 2.01898193359375, 2.216552734375, 2.41412353515625, 2.6116943359375, 2.80926513671875, 3.0068359375, 3.20440673828125, 3.4019775390625, 3.59954833984375, 3.797119140625, 3.99468994140625, 4.1922607421875, 4.38983154296875, 4.58740234375, 4.78497314453125, 4.9825439453125, 5.18011474609375, 5.377685546875, 5.57525634765625, 5.7728271484375, 5.97039794921875, 6.16796875]}, "gradients/encoder.encoder.layers.6.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 4.0, 5.0, 9.0, 23.0, 69.0, 148.0, 302.0, 277.0, 117.0, 38.0, 13.0, 4.0, 0.0, 3.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-232.25192260742188, -225.405517578125, -218.55911254882812, -211.71270751953125, -204.86630249023438, -198.0198974609375, -191.17349243164062, -184.32708740234375, -177.48068237304688, -170.63427734375, -163.78787231445312, -156.94146728515625, -150.09506225585938, -143.2486572265625, -136.40225219726562, -129.55584716796875, -122.70942687988281, -115.86302185058594, -109.01661682128906, -102.17021179199219, -95.32380676269531, -88.47740173339844, -81.63098907470703, -74.78458404541016, -67.93817901611328, -61.091773986816406, -54.24536895751953, -47.39896011352539, -40.552555084228516, -33.70615005493164, -26.8597412109375, -20.013336181640625, -13.16693115234375, -6.320525169372559, 0.5258808135986328, 7.372287750244141, 14.218692779541016, 21.06509780883789, 27.91150665283203, 34.757911682128906, 41.60431671142578, 48.450721740722656, 55.29712677001953, 62.14353561401367, 68.98994445800781, 75.83634948730469, 82.68275451660156, 89.52915954589844, 96.37556457519531, 103.22196960449219, 110.06837463378906, 116.91477966308594, 123.76118469238281, 130.6075897216797, 137.45401000976562, 144.3004150390625, 151.14682006835938, 157.99322509765625, 164.83963012695312, 171.68603515625, 178.53244018554688, 185.37884521484375, 192.22525024414062, 199.0716552734375, 205.91806030273438]}, "gradients/encoder.encoder.layers.6.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 3.0, 5.0, 5.0, 10.0, 10.0, 14.0, 17.0, 24.0, 20.0, 35.0, 45.0, 43.0, 58.0, 66.0, 69.0, 74.0, 83.0, 68.0, 73.0, 53.0, 43.0, 47.0, 28.0, 30.0, 23.0, 19.0, 11.0, 10.0, 3.0, 6.0, 9.0, 6.0, 1.0, 4.0, 3.0, 0.0, 0.0, 2.0], "bins": [-134.73545837402344, -131.5338592529297, -128.332275390625, -125.13067626953125, -121.9290771484375, -118.72747802734375, -115.52588653564453, -112.32429504394531, -109.12269592285156, -105.92109680175781, -102.7195053100586, -99.51791381835938, -96.31631469726562, -93.11471557617188, -89.91312408447266, -86.71153259277344, -83.50993347167969, -80.30833435058594, -77.10674285888672, -73.9051513671875, -70.70355224609375, -67.501953125, -64.30036163330078, -61.0987663269043, -57.89717102050781, -54.69557571411133, -51.493980407714844, -48.29238510131836, -45.090789794921875, -41.88919448852539, -38.687599182128906, -35.48600387573242, -32.28441619873047, -29.082820892333984, -25.8812255859375, -22.679630279541016, -19.47803497314453, -16.276439666748047, -13.074844360351562, -9.873249053955078, -6.671653747558594, -3.4700584411621094, -0.268463134765625, 2.9331321716308594, 6.134727478027344, 9.336322784423828, 12.537918090820312, 15.739513397216797, 18.94110870361328, 22.142704010009766, 25.34429931640625, 28.545894622802734, 31.74748992919922, 34.9490852355957, 38.15068054199219, 41.35227584838867, 44.553871154785156, 47.75546646118164, 50.957061767578125, 54.15865707397461, 57.360252380371094, 60.56184768676758, 63.76344299316406, 66.96504211425781, 70.16663360595703]}, "gradients/encoder.encoder.layers.5.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 2.0, 2.0, 4.0, 4.0, 8.0, 8.0, 28.0, 34.0, 42.0, 79.0, 125.0, 241.0, 473.0, 898.0, 2119.0, 5005.0, 13577.0, 44691.0, 213656.0, 1983772.0, 1685998.0, 183156.0, 39602.0, 12278.0, 4707.0, 1958.0, 882.0, 444.0, 208.0, 114.0, 68.0, 32.0, 32.0, 15.0, 9.0, 10.0, 3.0, 2.0, 2.0, 2.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-6.77734375, -6.57171630859375, -6.3660888671875, -6.16046142578125, -5.954833984375, -5.74920654296875, -5.5435791015625, -5.33795166015625, -5.13232421875, -4.92669677734375, -4.7210693359375, -4.51544189453125, -4.309814453125, -4.10418701171875, -3.8985595703125, -3.69293212890625, -3.4873046875, -3.28167724609375, -3.0760498046875, -2.87042236328125, -2.664794921875, -2.45916748046875, -2.2535400390625, -2.04791259765625, -1.84228515625, -1.63665771484375, -1.4310302734375, -1.22540283203125, -1.019775390625, -0.81414794921875, -0.6085205078125, -0.40289306640625, -0.197265625, 0.00836181640625, 0.2139892578125, 0.41961669921875, 0.625244140625, 0.83087158203125, 1.0364990234375, 1.24212646484375, 1.44775390625, 1.65338134765625, 1.8590087890625, 2.06463623046875, 2.270263671875, 2.47589111328125, 2.6815185546875, 2.88714599609375, 3.0927734375, 3.29840087890625, 3.5040283203125, 3.70965576171875, 3.915283203125, 4.12091064453125, 4.3265380859375, 4.53216552734375, 4.73779296875, 4.94342041015625, 5.1490478515625, 5.35467529296875, 5.560302734375, 5.76593017578125, 5.9715576171875, 6.17718505859375, 6.3828125]}, "gradients/encoder.encoder.layers.5.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 5.0, 4.0, 7.0, 8.0, 15.0, 29.0, 28.0, 37.0, 67.0, 65.0, 67.0, 60.0, 92.0, 91.0, 81.0, 81.0, 74.0, 47.0, 52.0, 33.0, 28.0, 12.0, 9.0, 13.0, 4.0, 2.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0], "bins": [-8.9609375, -8.7430419921875, -8.525146484375, -8.3072509765625, -8.08935546875, -7.8714599609375, -7.653564453125, -7.4356689453125, -7.2177734375, -6.9998779296875, -6.781982421875, -6.5640869140625, -6.34619140625, -6.1282958984375, -5.910400390625, -5.6925048828125, -5.474609375, -5.2567138671875, -5.038818359375, -4.8209228515625, -4.60302734375, -4.3851318359375, -4.167236328125, -3.9493408203125, -3.7314453125, -3.5135498046875, -3.295654296875, -3.0777587890625, -2.85986328125, -2.6419677734375, -2.424072265625, -2.2061767578125, -1.98828125, -1.7703857421875, -1.552490234375, -1.3345947265625, -1.11669921875, -0.8988037109375, -0.680908203125, -0.4630126953125, -0.2451171875, -0.0272216796875, 0.190673828125, 0.4085693359375, 0.62646484375, 0.8443603515625, 1.062255859375, 1.2801513671875, 1.498046875, 1.7159423828125, 1.933837890625, 2.1517333984375, 2.36962890625, 2.5875244140625, 2.805419921875, 3.0233154296875, 3.2412109375, 3.4591064453125, 3.677001953125, 3.8948974609375, 4.11279296875, 4.3306884765625, 4.548583984375, 4.7664794921875, 4.984375]}, "gradients/encoder.encoder.layers.5.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 4.0, 1.0, 10.0, 9.0, 9.0, 17.0, 24.0, 30.0, 40.0, 69.0, 115.0, 189.0, 357.0, 727.0, 1596.0, 4215.0, 12935.0, 53957.0, 392717.0, 3298953.0, 359055.0, 50514.0, 11892.0, 3720.0, 1487.0, 712.0, 362.0, 231.0, 114.0, 83.0, 51.0, 31.0, 16.0, 13.0, 9.0, 7.0, 3.0, 6.0, 1.0, 2.0, 4.0, 2.0, 3.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 2.0], "bins": [-10.90625, -10.559814453125, -10.21337890625, -9.866943359375, -9.5205078125, -9.174072265625, -8.82763671875, -8.481201171875, -8.134765625, -7.788330078125, -7.44189453125, -7.095458984375, -6.7490234375, -6.402587890625, -6.05615234375, -5.709716796875, -5.36328125, -5.016845703125, -4.67041015625, -4.323974609375, -3.9775390625, -3.631103515625, -3.28466796875, -2.938232421875, -2.591796875, -2.245361328125, -1.89892578125, -1.552490234375, -1.2060546875, -0.859619140625, -0.51318359375, -0.166748046875, 0.1796875, 0.526123046875, 0.87255859375, 1.218994140625, 1.5654296875, 1.911865234375, 2.25830078125, 2.604736328125, 2.951171875, 3.297607421875, 3.64404296875, 3.990478515625, 4.3369140625, 4.683349609375, 5.02978515625, 5.376220703125, 5.72265625, 6.069091796875, 6.41552734375, 6.761962890625, 7.1083984375, 7.454833984375, 7.80126953125, 8.147705078125, 8.494140625, 8.840576171875, 9.18701171875, 9.533447265625, 9.8798828125, 10.226318359375, 10.57275390625, 10.919189453125, 11.265625]}, "gradients/encoder.encoder.layers.5.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 5.0, 1.0, 4.0, 1.0, 6.0, 5.0, 10.0, 9.0, 11.0, 18.0, 12.0, 33.0, 45.0, 60.0, 85.0, 121.0, 211.0, 324.0, 572.0, 814.0, 660.0, 406.0, 212.0, 130.0, 91.0, 63.0, 61.0, 26.0, 31.0, 22.0, 12.0, 8.0, 7.0, 5.0, 3.0, 2.0, 1.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-10.9140625, -10.504150390625, -10.09423828125, -9.684326171875, -9.2744140625, -8.864501953125, -8.45458984375, -8.044677734375, -7.634765625, -7.224853515625, -6.81494140625, -6.405029296875, -5.9951171875, -5.585205078125, -5.17529296875, -4.765380859375, -4.35546875, -3.945556640625, -3.53564453125, -3.125732421875, -2.7158203125, -2.305908203125, -1.89599609375, -1.486083984375, -1.076171875, -0.666259765625, -0.25634765625, 0.153564453125, 0.5634765625, 0.973388671875, 1.38330078125, 1.793212890625, 2.203125, 2.613037109375, 3.02294921875, 3.432861328125, 3.8427734375, 4.252685546875, 4.66259765625, 5.072509765625, 5.482421875, 5.892333984375, 6.30224609375, 6.712158203125, 7.1220703125, 7.531982421875, 7.94189453125, 8.351806640625, 8.76171875, 9.171630859375, 9.58154296875, 9.991455078125, 10.4013671875, 10.811279296875, 11.22119140625, 11.631103515625, 12.041015625, 12.450927734375, 12.86083984375, 13.270751953125, 13.6806640625, 14.090576171875, 14.50048828125, 14.910400390625, 15.3203125]}, "gradients/encoder.encoder.layers.5.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 1.0, 3.0, 6.0, 10.0, 28.0, 62.0, 159.0, 321.0, 247.0, 101.0, 29.0, 17.0, 7.0, 6.0, 7.0, 1.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-205.401123046875, -199.29710388183594, -193.19309997558594, -187.08908081054688, -180.98507690429688, -174.8810577392578, -168.77703857421875, -162.67303466796875, -156.56903076171875, -150.4650115966797, -144.3610076904297, -138.25698852539062, -132.15298461914062, -126.04896545410156, -119.94495391845703, -113.8409423828125, -107.73692321777344, -101.6329116821289, -95.52890014648438, -89.42488098144531, -83.32087707519531, -77.21685791015625, -71.11284637451172, -65.00883483886719, -58.904823303222656, -52.800811767578125, -46.696800231933594, -40.5927848815918, -34.488773345947266, -28.384761810302734, -22.280746459960938, -16.176734924316406, -10.072738647460938, -3.96872615814209, 2.135286331176758, 8.239299774169922, 14.343311309814453, 20.447322845458984, 26.55133819580078, 32.65534973144531, 38.759361267089844, 44.863372802734375, 50.967384338378906, 57.0713996887207, 63.175411224365234, 69.2794189453125, 75.38343811035156, 81.4874496459961, 87.59146118164062, 93.69547271728516, 99.79948425292969, 105.90350341796875, 112.00750732421875, 118.11152648925781, 124.21553802490234, 130.31954956054688, 136.42355346679688, 142.52757263183594, 148.63157653808594, 154.735595703125, 160.839599609375, 166.94361877441406, 173.04763793945312, 179.15164184570312, 185.2556610107422]}, "gradients/encoder.encoder.layers.5.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 1.0, 4.0, 3.0, 2.0, 1.0, 10.0, 13.0, 16.0, 20.0, 24.0, 25.0, 25.0, 30.0, 27.0, 35.0, 47.0, 51.0, 37.0, 51.0, 46.0, 54.0, 66.0, 50.0, 64.0, 37.0, 44.0, 29.0, 26.0, 34.0, 24.0, 20.0, 21.0, 17.0, 14.0, 7.0, 8.0, 6.0, 8.0, 4.0, 3.0, 2.0, 4.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 2.0], "bins": [-74.07620239257812, -72.036376953125, -69.99654388427734, -67.95671844482422, -65.91688537597656, -63.87705993652344, -61.83723068237305, -59.797401428222656, -57.75757598876953, -55.71774673461914, -53.67791748046875, -51.638092041015625, -49.598262786865234, -47.558433532714844, -45.51860427856445, -43.47877502441406, -41.43894577026367, -39.39911651611328, -37.35928726196289, -35.3194580078125, -33.279632568359375, -31.239803314208984, -29.199974060058594, -27.160144805908203, -25.120317459106445, -23.080488204956055, -21.040660858154297, -19.000831604003906, -16.961002349853516, -14.921175003051758, -12.881345748901367, -10.841517448425293, -8.801689147949219, -6.7618608474731445, -4.722032070159912, -2.6822032928466797, -0.6423749923706055, 1.3974533081054688, 3.4372825622558594, 5.477110862731934, 7.516939163208008, 9.556767463684082, 11.596595764160156, 13.636425018310547, 15.676253318786621, 17.716081619262695, 19.755910873413086, 21.795738220214844, 23.835567474365234, 25.875396728515625, 27.915224075317383, 29.955053329467773, 31.99488067626953, 34.03470993041992, 36.07453918457031, 38.1143684387207, 40.154197692871094, 42.194026947021484, 44.233856201171875, 46.273681640625, 48.31351089477539, 50.35334014892578, 52.39316940307617, 54.43299865722656, 56.47282409667969]}, "gradients/encoder.encoder.layers.5.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 5.0, 3.0, 6.0, 4.0, 6.0, 8.0, 21.0, 13.0, 25.0, 33.0, 40.0, 56.0, 103.0, 136.0, 215.0, 346.0, 542.0, 843.0, 1525.0, 2638.0, 5082.0, 9656.0, 19202.0, 38868.0, 79229.0, 162595.0, 292990.0, 220266.0, 107316.0, 52659.0, 26001.0, 13026.0, 6688.0, 3469.0, 1935.0, 1139.0, 689.0, 406.0, 246.0, 162.0, 106.0, 92.0, 35.0, 35.0, 32.0, 19.0, 15.0, 8.0, 9.0, 8.0, 5.0, 2.0, 5.0, 3.0, 4.0, 0.0, 1.0], "bins": [-6.68359375, -6.4853515625, -6.287109375, -6.0888671875, -5.890625, -5.6923828125, -5.494140625, -5.2958984375, -5.09765625, -4.8994140625, -4.701171875, -4.5029296875, -4.3046875, -4.1064453125, -3.908203125, -3.7099609375, -3.51171875, -3.3134765625, -3.115234375, -2.9169921875, -2.71875, -2.5205078125, -2.322265625, -2.1240234375, -1.92578125, -1.7275390625, -1.529296875, -1.3310546875, -1.1328125, -0.9345703125, -0.736328125, -0.5380859375, -0.33984375, -0.1416015625, 0.056640625, 0.2548828125, 0.453125, 0.6513671875, 0.849609375, 1.0478515625, 1.24609375, 1.4443359375, 1.642578125, 1.8408203125, 2.0390625, 2.2373046875, 2.435546875, 2.6337890625, 2.83203125, 3.0302734375, 3.228515625, 3.4267578125, 3.625, 3.8232421875, 4.021484375, 4.2197265625, 4.41796875, 4.6162109375, 4.814453125, 5.0126953125, 5.2109375, 5.4091796875, 5.607421875, 5.8056640625, 6.00390625]}, "gradients/encoder.encoder.layers.5.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 4.0, 6.0, 2.0, 4.0, 7.0, 18.0, 13.0, 16.0, 16.0, 30.0, 36.0, 42.0, 51.0, 40.0, 53.0, 71.0, 72.0, 69.0, 70.0, 67.0, 60.0, 56.0, 44.0, 41.0, 29.0, 29.0, 13.0, 12.0, 13.0, 13.0, 6.0, 7.0, 3.0, 4.0, 1.0, 0.0, 0.0, 1.0], "bins": [-7.34765625, -7.177642822265625, -7.00762939453125, -6.837615966796875, -6.6676025390625, -6.497589111328125, -6.32757568359375, -6.157562255859375, -5.987548828125, -5.817535400390625, -5.64752197265625, -5.477508544921875, -5.3074951171875, -5.137481689453125, -4.96746826171875, -4.797454833984375, -4.62744140625, -4.457427978515625, -4.28741455078125, -4.117401123046875, -3.9473876953125, -3.777374267578125, -3.60736083984375, -3.437347412109375, -3.267333984375, -3.097320556640625, -2.92730712890625, -2.757293701171875, -2.5872802734375, -2.417266845703125, -2.24725341796875, -2.077239990234375, -1.9072265625, -1.737213134765625, -1.56719970703125, -1.397186279296875, -1.2271728515625, -1.057159423828125, -0.88714599609375, -0.717132568359375, -0.547119140625, -0.377105712890625, -0.20709228515625, -0.037078857421875, 0.1329345703125, 0.302947998046875, 0.47296142578125, 0.642974853515625, 0.81298828125, 0.983001708984375, 1.15301513671875, 1.323028564453125, 1.4930419921875, 1.663055419921875, 1.83306884765625, 2.003082275390625, 2.173095703125, 2.343109130859375, 2.51312255859375, 2.683135986328125, 2.8531494140625, 3.023162841796875, 3.19317626953125, 3.363189697265625, 3.533203125]}, "gradients/encoder.encoder.layers.5.attention.v_proj.weight": {"_type": "histogram", "values": [3.0, 1.0, 1.0, 2.0, 4.0, 10.0, 7.0, 9.0, 23.0, 25.0, 31.0, 35.0, 54.0, 67.0, 104.0, 133.0, 186.0, 278.0, 364.0, 534.0, 779.0, 1314.0, 2213.0, 4562.0, 12163.0, 47834.0, 271652.0, 588167.0, 85530.0, 18777.0, 6266.0, 2796.0, 1646.0, 999.0, 591.0, 395.0, 270.0, 210.0, 158.0, 114.0, 74.0, 42.0, 33.0, 29.0, 23.0, 7.0, 11.0, 9.0, 6.0, 8.0, 3.0, 4.0, 1.0, 4.0, 3.0, 2.0, 1.0, 5.0, 1.0, 0.0, 0.0, 0.0, 0.0, 3.0], "bins": [-11.421875, -11.0015869140625, -10.581298828125, -10.1610107421875, -9.74072265625, -9.3204345703125, -8.900146484375, -8.4798583984375, -8.0595703125, -7.6392822265625, -7.218994140625, -6.7987060546875, -6.37841796875, -5.9581298828125, -5.537841796875, -5.1175537109375, -4.697265625, -4.2769775390625, -3.856689453125, -3.4364013671875, -3.01611328125, -2.5958251953125, -2.175537109375, -1.7552490234375, -1.3349609375, -0.9146728515625, -0.494384765625, -0.0740966796875, 0.34619140625, 0.7664794921875, 1.186767578125, 1.6070556640625, 2.02734375, 2.4476318359375, 2.867919921875, 3.2882080078125, 3.70849609375, 4.1287841796875, 4.549072265625, 4.9693603515625, 5.3896484375, 5.8099365234375, 6.230224609375, 6.6505126953125, 7.07080078125, 7.4910888671875, 7.911376953125, 8.3316650390625, 8.751953125, 9.1722412109375, 9.592529296875, 10.0128173828125, 10.43310546875, 10.8533935546875, 11.273681640625, 11.6939697265625, 12.1142578125, 12.5345458984375, 12.954833984375, 13.3751220703125, 13.79541015625, 14.2156982421875, 14.635986328125, 15.0562744140625, 15.4765625]}, "gradients/encoder.encoder.layers.5.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 2.0, 2.0, 4.0, 4.0, 3.0, 2.0, 5.0, 3.0, 13.0, 17.0, 24.0, 22.0, 15.0, 40.0, 37.0, 53.0, 63.0, 50.0, 77.0, 65.0, 65.0, 80.0, 56.0, 55.0, 57.0, 39.0, 43.0, 36.0, 18.0, 16.0, 13.0, 11.0, 9.0, 2.0, 3.0, 4.0, 2.0, 2.0, 1.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-21.4375, -20.61376953125, -19.7900390625, -18.96630859375, -18.142578125, -17.31884765625, -16.4951171875, -15.67138671875, -14.84765625, -14.02392578125, -13.2001953125, -12.37646484375, -11.552734375, -10.72900390625, -9.9052734375, -9.08154296875, -8.2578125, -7.43408203125, -6.6103515625, -5.78662109375, -4.962890625, -4.13916015625, -3.3154296875, -2.49169921875, -1.66796875, -0.84423828125, -0.0205078125, 0.80322265625, 1.626953125, 2.45068359375, 3.2744140625, 4.09814453125, 4.921875, 5.74560546875, 6.5693359375, 7.39306640625, 8.216796875, 9.04052734375, 9.8642578125, 10.68798828125, 11.51171875, 12.33544921875, 13.1591796875, 13.98291015625, 14.806640625, 15.63037109375, 16.4541015625, 17.27783203125, 18.1015625, 18.92529296875, 19.7490234375, 20.57275390625, 21.396484375, 22.22021484375, 23.0439453125, 23.86767578125, 24.69140625, 25.51513671875, 26.3388671875, 27.16259765625, 27.986328125, 28.81005859375, 29.6337890625, 30.45751953125, 31.28125]}, "gradients/encoder.encoder.layers.5.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0, 2.0, 1.0, 0.0, 3.0, 3.0, 2.0, 5.0, 7.0, 16.0, 30.0, 67.0, 193.0, 881.0, 9316.0, 972112.0, 63269.0, 2079.0, 369.0, 117.0, 41.0, 11.0, 12.0, 6.0, 5.0, 3.0, 4.0, 1.0, 4.0, 1.0, 0.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-15.7578125, -15.1451416015625, -14.532470703125, -13.9197998046875, -13.30712890625, -12.6944580078125, -12.081787109375, -11.4691162109375, -10.8564453125, -10.2437744140625, -9.631103515625, -9.0184326171875, -8.40576171875, -7.7930908203125, -7.180419921875, -6.5677490234375, -5.955078125, -5.3424072265625, -4.729736328125, -4.1170654296875, -3.50439453125, -2.8917236328125, -2.279052734375, -1.6663818359375, -1.0537109375, -0.4410400390625, 0.171630859375, 0.7843017578125, 1.39697265625, 2.0096435546875, 2.622314453125, 3.2349853515625, 3.84765625, 4.4603271484375, 5.072998046875, 5.6856689453125, 6.29833984375, 6.9110107421875, 7.523681640625, 8.1363525390625, 8.7490234375, 9.3616943359375, 9.974365234375, 10.5870361328125, 11.19970703125, 11.8123779296875, 12.425048828125, 13.0377197265625, 13.650390625, 14.2630615234375, 14.875732421875, 15.4884033203125, 16.10107421875, 16.7137451171875, 17.326416015625, 17.9390869140625, 18.5517578125, 19.1644287109375, 19.777099609375, 20.3897705078125, 21.00244140625, 21.6151123046875, 22.227783203125, 22.8404541015625, 23.453125]}, "gradients/encoder.encoder.layers.5.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 2.0, 3.0, 4.0, 3.0, 2.0, 4.0, 4.0, 5.0, 11.0, 18.0, 17.0, 31.0, 35.0, 49.0, 46.0, 90.0, 104.0, 152.0, 109.0, 88.0, 55.0, 37.0, 41.0, 17.0, 28.0, 11.0, 5.0, 12.0, 4.0, 9.0, 3.0, 4.0, 5.0, 0.0, 2.0, 0.0, 0.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.001926422119140625, -0.001872330904006958, -0.001818239688873291, -0.001764148473739624, -0.001710057258605957, -0.00165596604347229, -0.001601874828338623, -0.001547783613204956, -0.001493692398071289, -0.001439601182937622, -0.001385509967803955, -0.001331418752670288, -0.001277327537536621, -0.001223236322402954, -0.0011691451072692871, -0.0011150538921356201, -0.0010609626770019531, -0.0010068714618682861, -0.0009527802467346191, -0.0008986890316009521, -0.0008445978164672852, -0.0007905066013336182, -0.0007364153861999512, -0.0006823241710662842, -0.0006282329559326172, -0.0005741417407989502, -0.0005200505256652832, -0.0004659593105316162, -0.0004118680953979492, -0.0003577768802642822, -0.00030368566513061523, -0.00024959444999694824, -0.00019550323486328125, -0.00014141201972961426, -8.732080459594727e-05, -3.3229589462280273e-05, 2.086162567138672e-05, 7.495284080505371e-05, 0.0001290440559387207, 0.0001831352710723877, 0.0002372264862060547, 0.0002913177013397217, 0.00034540891647338867, 0.00039950013160705566, 0.00045359134674072266, 0.0005076825618743896, 0.0005617737770080566, 0.0006158649921417236, 0.0006699562072753906, 0.0007240474224090576, 0.0007781386375427246, 0.0008322298526763916, 0.0008863210678100586, 0.0009404122829437256, 0.0009945034980773926, 0.0010485947132110596, 0.0011026859283447266, 0.0011567771434783936, 0.0012108683586120605, 0.0012649595737457275, 0.0013190507888793945, 0.0013731420040130615, 0.0014272332191467285, 0.0014813244342803955, 0.0015354156494140625]}, "gradients/encoder.encoder.layers.5.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 2.0, 3.0, 11.0, 12.0, 8.0, 35.0, 57.0, 108.0, 278.0, 761.0, 3224.0, 28486.0, 950788.0, 58687.0, 4485.0, 1008.0, 350.0, 126.0, 62.0, 27.0, 15.0, 6.0, 5.0, 5.0, 8.0, 2.0, 0.0, 2.0, 2.0, 0.0, 2.0, 0.0, 3.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-14.1328125, -13.7569580078125, -13.381103515625, -13.0052490234375, -12.62939453125, -12.2535400390625, -11.877685546875, -11.5018310546875, -11.1259765625, -10.7501220703125, -10.374267578125, -9.9984130859375, -9.62255859375, -9.2467041015625, -8.870849609375, -8.4949951171875, -8.119140625, -7.7432861328125, -7.367431640625, -6.9915771484375, -6.61572265625, -6.2398681640625, -5.864013671875, -5.4881591796875, -5.1123046875, -4.7364501953125, -4.360595703125, -3.9847412109375, -3.60888671875, -3.2330322265625, -2.857177734375, -2.4813232421875, -2.10546875, -1.7296142578125, -1.353759765625, -0.9779052734375, -0.60205078125, -0.2261962890625, 0.149658203125, 0.5255126953125, 0.9013671875, 1.2772216796875, 1.653076171875, 2.0289306640625, 2.40478515625, 2.7806396484375, 3.156494140625, 3.5323486328125, 3.908203125, 4.2840576171875, 4.659912109375, 5.0357666015625, 5.41162109375, 5.7874755859375, 6.163330078125, 6.5391845703125, 6.9150390625, 7.2908935546875, 7.666748046875, 8.0426025390625, 8.41845703125, 8.7943115234375, 9.170166015625, 9.5460205078125, 9.921875]}, "gradients/encoder.encoder.layers.5.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 6.0, 7.0, 15.0, 20.0, 32.0, 38.0, 60.0, 80.0, 123.0, 144.0, 138.0, 104.0, 63.0, 62.0, 36.0, 26.0, 17.0, 16.0, 5.0, 7.0, 6.0, 4.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-7.6875, -7.3922119140625, -7.096923828125, -6.8016357421875, -6.50634765625, -6.2110595703125, -5.915771484375, -5.6204833984375, -5.3251953125, -5.0299072265625, -4.734619140625, -4.4393310546875, -4.14404296875, -3.8487548828125, -3.553466796875, -3.2581787109375, -2.962890625, -2.6676025390625, -2.372314453125, -2.0770263671875, -1.78173828125, -1.4864501953125, -1.191162109375, -0.8958740234375, -0.6005859375, -0.3052978515625, -0.010009765625, 0.2852783203125, 0.58056640625, 0.8758544921875, 1.171142578125, 1.4664306640625, 1.76171875, 2.0570068359375, 2.352294921875, 2.6475830078125, 2.94287109375, 3.2381591796875, 3.533447265625, 3.8287353515625, 4.1240234375, 4.4193115234375, 4.714599609375, 5.0098876953125, 5.30517578125, 5.6004638671875, 5.895751953125, 6.1910400390625, 6.486328125, 6.7816162109375, 7.076904296875, 7.3721923828125, 7.66748046875, 7.9627685546875, 8.258056640625, 8.5533447265625, 8.8486328125, 9.1439208984375, 9.439208984375, 9.7344970703125, 10.02978515625, 10.3250732421875, 10.620361328125, 10.9156494140625, 11.2109375]}, "gradients/encoder.encoder.layers.5.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 1.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 3.0, 1.0, 4.0, 6.0, 9.0, 12.0, 20.0, 36.0, 35.0, 76.0, 88.0, 169.0, 177.0, 118.0, 89.0, 56.0, 48.0, 18.0, 13.0, 9.0, 6.0, 3.0, 1.0, 3.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-92.28850555419922, -89.34164428710938, -86.39478302001953, -83.44792175292969, -80.50105285644531, -77.55419158935547, -74.60733032226562, -71.66046905517578, -68.71360778808594, -65.7667465209961, -62.819881439208984, -59.87302017211914, -56.92615509033203, -53.97929382324219, -51.032432556152344, -48.0855712890625, -45.138702392578125, -42.19184112548828, -39.24497604370117, -36.29811477661133, -33.35124969482422, -30.404388427734375, -27.45752716064453, -24.510663986206055, -21.563800811767578, -18.6169376373291, -15.670075416564941, -12.723213195800781, -9.776350021362305, -6.829486846923828, -3.8826255798339844, -0.9357624053955078, 2.0111007690429688, 4.957963466644287, 7.9048261642456055, 10.851688385009766, 13.798551559448242, 16.74541473388672, 19.692276000976562, 22.63913917541504, 25.586002349853516, 28.532865524291992, 31.47972869873047, 34.42658996582031, 37.373451232910156, 40.320316314697266, 43.26717758178711, 46.21404266357422, 49.16090393066406, 52.107765197753906, 55.054630279541016, 58.00149154663086, 60.94835662841797, 63.89521789550781, 66.84207916259766, 69.7889404296875, 72.73580932617188, 75.68267059326172, 78.62953186035156, 81.57640075683594, 84.52326202392578, 87.47012329101562, 90.41698455810547, 93.36384582519531, 96.31070709228516]}, "gradients/encoder.encoder.layers.5.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 3.0, 0.0, 1.0, 2.0, 4.0, 2.0, 6.0, 3.0, 8.0, 6.0, 7.0, 15.0, 10.0, 8.0, 20.0, 21.0, 24.0, 31.0, 32.0, 44.0, 40.0, 40.0, 44.0, 52.0, 90.0, 70.0, 62.0, 54.0, 36.0, 42.0, 30.0, 40.0, 19.0, 37.0, 24.0, 20.0, 17.0, 14.0, 7.0, 12.0, 1.0, 1.0, 5.0, 3.0, 7.0, 1.0, 0.0, 3.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-84.497314453125, -82.14225006103516, -79.78717803955078, -77.43211364746094, -75.0770492553711, -72.72198486328125, -70.36691284179688, -68.01184844970703, -65.65678405761719, -63.30171585083008, -60.946651458740234, -58.591583251953125, -56.23651885986328, -53.88145065307617, -51.52638244628906, -49.17131805419922, -46.816246032714844, -44.461177825927734, -42.10611343383789, -39.75104522705078, -37.39598083496094, -35.04091262817383, -32.68584442138672, -30.330778121948242, -27.975711822509766, -25.62064552307129, -23.265579223632812, -20.910511016845703, -18.555444717407227, -16.20037841796875, -13.845311164855957, -11.490243911743164, -9.135177612304688, -6.780110836029053, -4.425044059753418, -2.069977283477783, 0.28508949279785156, 2.640155792236328, 4.995223045349121, 7.350290298461914, 9.70535659790039, 12.060422897338867, 14.41549015045166, 16.770557403564453, 19.12562370300293, 21.480690002441406, 23.835758209228516, 26.190824508666992, 28.54589080810547, 30.900957107543945, 33.25602340698242, 35.61109161376953, 37.966156005859375, 40.321224212646484, 42.676292419433594, 45.03135681152344, 47.38642501831055, 49.741493225097656, 52.0965576171875, 54.45162582397461, 56.80669403076172, 59.16175842285156, 61.51682662963867, 63.87189483642578, 66.22695922851562]}, "gradients/encoder.encoder.layers.4.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 3.0, 2.0, 3.0, 1.0, 4.0, 1.0, 6.0, 7.0, 4.0, 9.0, 6.0, 16.0, 34.0, 40.0, 74.0, 133.0, 220.0, 451.0, 962.0, 2147.0, 5155.0, 13705.0, 42890.0, 190101.0, 1378925.0, 2160479.0, 307545.0, 60314.0, 18338.0, 6900.0, 3033.0, 1408.0, 696.0, 343.0, 156.0, 102.0, 40.0, 25.0, 9.0, 5.0, 2.0, 1.0, 1.0], "bins": [-7.62109375, -7.455902099609375, -7.29071044921875, -7.125518798828125, -6.9603271484375, -6.795135498046875, -6.62994384765625, -6.464752197265625, -6.299560546875, -6.134368896484375, -5.96917724609375, -5.803985595703125, -5.6387939453125, -5.473602294921875, -5.30841064453125, -5.143218994140625, -4.97802734375, -4.812835693359375, -4.64764404296875, -4.482452392578125, -4.3172607421875, -4.152069091796875, -3.98687744140625, -3.821685791015625, -3.656494140625, -3.491302490234375, -3.32611083984375, -3.160919189453125, -2.9957275390625, -2.830535888671875, -2.66534423828125, -2.500152587890625, -2.3349609375, -2.169769287109375, -2.00457763671875, -1.839385986328125, -1.6741943359375, -1.509002685546875, -1.34381103515625, -1.178619384765625, -1.013427734375, -0.848236083984375, -0.68304443359375, -0.517852783203125, -0.3526611328125, -0.187469482421875, -0.02227783203125, 0.142913818359375, 0.30810546875, 0.473297119140625, 0.63848876953125, 0.803680419921875, 0.9688720703125, 1.134063720703125, 1.29925537109375, 1.464447021484375, 1.629638671875, 1.794830322265625, 1.96002197265625, 2.125213623046875, 2.2904052734375, 2.455596923828125, 2.62078857421875, 2.785980224609375, 2.951171875]}, "gradients/encoder.encoder.layers.4.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 3.0, 3.0, 6.0, 8.0, 14.0, 21.0, 28.0, 49.0, 56.0, 84.0, 87.0, 93.0, 98.0, 103.0, 104.0, 78.0, 53.0, 43.0, 24.0, 21.0, 16.0, 14.0, 6.0, 4.0, 1.0, 1.0, 1.0, 2.0], "bins": [-12.2734375, -12.020355224609375, -11.76727294921875, -11.514190673828125, -11.2611083984375, -11.008026123046875, -10.75494384765625, -10.501861572265625, -10.248779296875, -9.995697021484375, -9.74261474609375, -9.489532470703125, -9.2364501953125, -8.983367919921875, -8.73028564453125, -8.477203369140625, -8.22412109375, -7.971038818359375, -7.71795654296875, -7.464874267578125, -7.2117919921875, -6.958709716796875, -6.70562744140625, -6.452545166015625, -6.199462890625, -5.946380615234375, -5.69329833984375, -5.440216064453125, -5.1871337890625, -4.934051513671875, -4.68096923828125, -4.427886962890625, -4.1748046875, -3.921722412109375, -3.66864013671875, -3.415557861328125, -3.1624755859375, -2.909393310546875, -2.65631103515625, -2.403228759765625, -2.150146484375, -1.897064208984375, -1.64398193359375, -1.390899658203125, -1.1378173828125, -0.884735107421875, -0.63165283203125, -0.378570556640625, -0.12548828125, 0.127593994140625, 0.38067626953125, 0.633758544921875, 0.8868408203125, 1.139923095703125, 1.39300537109375, 1.646087646484375, 1.899169921875, 2.152252197265625, 2.40533447265625, 2.658416748046875, 2.9114990234375, 3.164581298828125, 3.41766357421875, 3.670745849609375, 3.923828125]}, "gradients/encoder.encoder.layers.4.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 4.0, 7.0, 4.0, 12.0, 16.0, 23.0, 39.0, 62.0, 89.0, 167.0, 368.0, 1073.0, 4627.0, 32564.0, 724281.0, 3339385.0, 79268.0, 9079.0, 1867.0, 602.0, 302.0, 156.0, 114.0, 78.0, 40.0, 20.0, 16.0, 15.0, 6.0, 3.0, 5.0, 3.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-11.7890625, -11.2803955078125, -10.771728515625, -10.2630615234375, -9.75439453125, -9.2457275390625, -8.737060546875, -8.2283935546875, -7.7197265625, -7.2110595703125, -6.702392578125, -6.1937255859375, -5.68505859375, -5.1763916015625, -4.667724609375, -4.1590576171875, -3.650390625, -3.1417236328125, -2.633056640625, -2.1243896484375, -1.61572265625, -1.1070556640625, -0.598388671875, -0.0897216796875, 0.4189453125, 0.9276123046875, 1.436279296875, 1.9449462890625, 2.45361328125, 2.9622802734375, 3.470947265625, 3.9796142578125, 4.48828125, 4.9969482421875, 5.505615234375, 6.0142822265625, 6.52294921875, 7.0316162109375, 7.540283203125, 8.0489501953125, 8.5576171875, 9.0662841796875, 9.574951171875, 10.0836181640625, 10.59228515625, 11.1009521484375, 11.609619140625, 12.1182861328125, 12.626953125, 13.1356201171875, 13.644287109375, 14.1529541015625, 14.66162109375, 15.1702880859375, 15.678955078125, 16.1876220703125, 16.6962890625, 17.2049560546875, 17.713623046875, 18.2222900390625, 18.73095703125, 19.2396240234375, 19.748291015625, 20.2569580078125, 20.765625]}, "gradients/encoder.encoder.layers.4.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 3.0, 3.0, 3.0, 4.0, 7.0, 2.0, 14.0, 14.0, 18.0, 24.0, 38.0, 34.0, 53.0, 81.0, 105.0, 131.0, 235.0, 370.0, 632.0, 718.0, 570.0, 346.0, 185.0, 149.0, 92.0, 51.0, 55.0, 42.0, 23.0, 16.0, 11.0, 10.0, 11.0, 8.0, 4.0, 6.0, 7.0, 2.0, 2.0, 5.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-9.0625, -8.742431640625, -8.42236328125, -8.102294921875, -7.7822265625, -7.462158203125, -7.14208984375, -6.822021484375, -6.501953125, -6.181884765625, -5.86181640625, -5.541748046875, -5.2216796875, -4.901611328125, -4.58154296875, -4.261474609375, -3.94140625, -3.621337890625, -3.30126953125, -2.981201171875, -2.6611328125, -2.341064453125, -2.02099609375, -1.700927734375, -1.380859375, -1.060791015625, -0.74072265625, -0.420654296875, -0.1005859375, 0.219482421875, 0.53955078125, 0.859619140625, 1.1796875, 1.499755859375, 1.81982421875, 2.139892578125, 2.4599609375, 2.780029296875, 3.10009765625, 3.420166015625, 3.740234375, 4.060302734375, 4.38037109375, 4.700439453125, 5.0205078125, 5.340576171875, 5.66064453125, 5.980712890625, 6.30078125, 6.620849609375, 6.94091796875, 7.260986328125, 7.5810546875, 7.901123046875, 8.22119140625, 8.541259765625, 8.861328125, 9.181396484375, 9.50146484375, 9.821533203125, 10.1416015625, 10.461669921875, 10.78173828125, 11.101806640625, 11.421875]}, "gradients/encoder.encoder.layers.4.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 3.0, 0.0, 1.0, 2.0, 1.0, 1.0, 0.0, 5.0, 3.0, 7.0, 20.0, 30.0, 72.0, 199.0, 292.0, 217.0, 105.0, 32.0, 9.0, 4.0, 2.0, 4.0, 4.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-173.59999084472656, -168.24278259277344, -162.8855743408203, -157.52838134765625, -152.17117309570312, -146.81396484375, -141.45675659179688, -136.09954833984375, -130.74234008789062, -125.3851318359375, -120.0279312133789, -114.67072296142578, -109.31352233886719, -103.95631408691406, -98.59910583496094, -93.24189758300781, -87.88470458984375, -82.52749633789062, -77.17029571533203, -71.8130874633789, -66.45588684082031, -61.09867858886719, -55.74147033691406, -50.3842658996582, -45.027061462402344, -39.669857025146484, -34.312652587890625, -28.9554443359375, -23.59823989868164, -18.24103546142578, -12.883827209472656, -7.526622772216797, -2.169403076171875, 3.187802314758301, 8.545007705688477, 13.902214050292969, 19.259418487548828, 24.616622924804688, 29.973831176757812, 35.33103561401367, 40.68824005126953, 46.04544448852539, 51.40264892578125, 56.759857177734375, 62.117061614990234, 67.4742660522461, 72.83147430419922, 78.18867492675781, 83.54588317871094, 88.90309143066406, 94.26029205322266, 99.61750030517578, 104.97470092773438, 110.3319091796875, 115.68911743164062, 121.04632568359375, 126.40352630615234, 131.76072692871094, 137.11793518066406, 142.4751434326172, 147.8323516845703, 153.18954467773438, 158.5467529296875, 163.90396118164062, 169.26116943359375]}, "gradients/encoder.encoder.layers.4.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 6.0, 2.0, 4.0, 12.0, 15.0, 15.0, 13.0, 23.0, 21.0, 28.0, 34.0, 42.0, 52.0, 50.0, 67.0, 65.0, 66.0, 62.0, 60.0, 50.0, 54.0, 57.0, 43.0, 30.0, 34.0, 29.0, 19.0, 21.0, 9.0, 9.0, 8.0, 8.0, 0.0, 6.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-79.02796936035156, -76.88227081298828, -74.736572265625, -72.59087371826172, -70.44517517089844, -68.29946899414062, -66.15377044677734, -64.00807189941406, -61.86237335205078, -59.7166748046875, -57.57097625732422, -55.42527389526367, -53.27957534790039, -51.13387680053711, -48.98817443847656, -46.84247589111328, -44.69677734375, -42.55107879638672, -40.40538024902344, -38.25967788696289, -36.11397933959961, -33.96828079223633, -31.822580337524414, -29.6768798828125, -27.53118133544922, -25.385482788085938, -23.239782333374023, -21.09408187866211, -18.948383331298828, -16.802684783935547, -14.656984329223633, -12.511284828186035, -10.365592956542969, -8.219893455505371, -6.074193954467773, -3.928494453430176, -1.7827949523925781, 0.36290454864501953, 2.508604049682617, 4.654303550720215, 6.8000030517578125, 8.94570255279541, 11.091402053833008, 13.237101554870605, 15.382801055908203, 17.528499603271484, 19.6742000579834, 21.819900512695312, 23.965599060058594, 26.111297607421875, 28.25699806213379, 30.402698516845703, 32.548397064208984, 34.694095611572266, 36.83979797363281, 38.985496520996094, 41.131195068359375, 43.276893615722656, 45.42259216308594, 47.568294525146484, 49.713993072509766, 51.85969161987305, 54.005393981933594, 56.151092529296875, 58.296791076660156]}, "gradients/encoder.encoder.layers.4.attention.out_proj.weight": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 2.0, 4.0, 2.0, 9.0, 9.0, 20.0, 34.0, 54.0, 67.0, 101.0, 163.0, 250.0, 397.0, 683.0, 1201.0, 2059.0, 3993.0, 8459.0, 18480.0, 43100.0, 105588.0, 265691.0, 340148.0, 149893.0, 59868.0, 25470.0, 11189.0, 5196.0, 2721.0, 1506.0, 835.0, 515.0, 314.0, 193.0, 123.0, 68.0, 56.0, 32.0, 23.0, 17.0, 6.0, 12.0, 4.0, 3.0, 4.0, 0.0, 1.0, 1.0, 3.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0], "bins": [-6.2265625, -5.9793701171875, -5.732177734375, -5.4849853515625, -5.23779296875, -4.9906005859375, -4.743408203125, -4.4962158203125, -4.2490234375, -4.0018310546875, -3.754638671875, -3.5074462890625, -3.26025390625, -3.0130615234375, -2.765869140625, -2.5186767578125, -2.271484375, -2.0242919921875, -1.777099609375, -1.5299072265625, -1.28271484375, -1.0355224609375, -0.788330078125, -0.5411376953125, -0.2939453125, -0.0467529296875, 0.200439453125, 0.4476318359375, 0.69482421875, 0.9420166015625, 1.189208984375, 1.4364013671875, 1.68359375, 1.9307861328125, 2.177978515625, 2.4251708984375, 2.67236328125, 2.9195556640625, 3.166748046875, 3.4139404296875, 3.6611328125, 3.9083251953125, 4.155517578125, 4.4027099609375, 4.64990234375, 4.8970947265625, 5.144287109375, 5.3914794921875, 5.638671875, 5.8858642578125, 6.133056640625, 6.3802490234375, 6.62744140625, 6.8746337890625, 7.121826171875, 7.3690185546875, 7.6162109375, 7.8634033203125, 8.110595703125, 8.3577880859375, 8.60498046875, 8.8521728515625, 9.099365234375, 9.3465576171875, 9.59375]}, "gradients/encoder.encoder.layers.4.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 2.0, 2.0, 1.0, 2.0, 10.0, 13.0, 9.0, 19.0, 21.0, 29.0, 44.0, 55.0, 65.0, 64.0, 62.0, 69.0, 78.0, 68.0, 67.0, 72.0, 47.0, 60.0, 33.0, 21.0, 24.0, 23.0, 17.0, 9.0, 8.0, 9.0, 7.0, 3.0, 1.0, 2.0, 2.0, 0.0, 1.0], "bins": [-8.328125, -8.136993408203125, -7.94586181640625, -7.754730224609375, -7.5635986328125, -7.372467041015625, -7.18133544921875, -6.990203857421875, -6.799072265625, -6.607940673828125, -6.41680908203125, -6.225677490234375, -6.0345458984375, -5.843414306640625, -5.65228271484375, -5.461151123046875, -5.27001953125, -5.078887939453125, -4.88775634765625, -4.696624755859375, -4.5054931640625, -4.314361572265625, -4.12322998046875, -3.932098388671875, -3.740966796875, -3.549835205078125, -3.35870361328125, -3.167572021484375, -2.9764404296875, -2.785308837890625, -2.59417724609375, -2.403045654296875, -2.2119140625, -2.020782470703125, -1.82965087890625, -1.638519287109375, -1.4473876953125, -1.256256103515625, -1.06512451171875, -0.873992919921875, -0.682861328125, -0.491729736328125, -0.30059814453125, -0.109466552734375, 0.0816650390625, 0.272796630859375, 0.46392822265625, 0.655059814453125, 0.84619140625, 1.037322998046875, 1.22845458984375, 1.419586181640625, 1.6107177734375, 1.801849365234375, 1.99298095703125, 2.184112548828125, 2.375244140625, 2.566375732421875, 2.75750732421875, 2.948638916015625, 3.1397705078125, 3.330902099609375, 3.52203369140625, 3.713165283203125, 3.904296875]}, "gradients/encoder.encoder.layers.4.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 3.0, 3.0, 2.0, 6.0, 5.0, 9.0, 15.0, 18.0, 37.0, 34.0, 41.0, 61.0, 102.0, 152.0, 202.0, 383.0, 534.0, 1014.0, 2043.0, 5361.0, 21625.0, 154822.0, 743725.0, 94609.0, 15375.0, 4205.0, 1783.0, 897.0, 539.0, 330.0, 203.0, 126.0, 95.0, 62.0, 41.0, 23.0, 19.0, 17.0, 8.0, 16.0, 7.0, 2.0, 6.0, 1.0, 1.0, 4.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-17.265625, -16.697509765625, -16.12939453125, -15.561279296875, -14.9931640625, -14.425048828125, -13.85693359375, -13.288818359375, -12.720703125, -12.152587890625, -11.58447265625, -11.016357421875, -10.4482421875, -9.880126953125, -9.31201171875, -8.743896484375, -8.17578125, -7.607666015625, -7.03955078125, -6.471435546875, -5.9033203125, -5.335205078125, -4.76708984375, -4.198974609375, -3.630859375, -3.062744140625, -2.49462890625, -1.926513671875, -1.3583984375, -0.790283203125, -0.22216796875, 0.345947265625, 0.9140625, 1.482177734375, 2.05029296875, 2.618408203125, 3.1865234375, 3.754638671875, 4.32275390625, 4.890869140625, 5.458984375, 6.027099609375, 6.59521484375, 7.163330078125, 7.7314453125, 8.299560546875, 8.86767578125, 9.435791015625, 10.00390625, 10.572021484375, 11.14013671875, 11.708251953125, 12.2763671875, 12.844482421875, 13.41259765625, 13.980712890625, 14.548828125, 15.116943359375, 15.68505859375, 16.253173828125, 16.8212890625, 17.389404296875, 17.95751953125, 18.525634765625, 19.09375]}, "gradients/encoder.encoder.layers.4.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0, 1.0, 0.0, 2.0, 1.0, 4.0, 2.0, 4.0, 11.0, 4.0, 17.0, 10.0, 17.0, 22.0, 28.0, 27.0, 25.0, 34.0, 39.0, 42.0, 61.0, 52.0, 53.0, 59.0, 65.0, 66.0, 49.0, 53.0, 35.0, 36.0, 36.0, 27.0, 33.0, 16.0, 19.0, 14.0, 9.0, 12.0, 5.0, 8.0, 2.0, 3.0, 4.0, 1.0, 2.0, 2.0, 4.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-21.03125, -20.4228515625, -19.814453125, -19.2060546875, -18.59765625, -17.9892578125, -17.380859375, -16.7724609375, -16.1640625, -15.5556640625, -14.947265625, -14.3388671875, -13.73046875, -13.1220703125, -12.513671875, -11.9052734375, -11.296875, -10.6884765625, -10.080078125, -9.4716796875, -8.86328125, -8.2548828125, -7.646484375, -7.0380859375, -6.4296875, -5.8212890625, -5.212890625, -4.6044921875, -3.99609375, -3.3876953125, -2.779296875, -2.1708984375, -1.5625, -0.9541015625, -0.345703125, 0.2626953125, 0.87109375, 1.4794921875, 2.087890625, 2.6962890625, 3.3046875, 3.9130859375, 4.521484375, 5.1298828125, 5.73828125, 6.3466796875, 6.955078125, 7.5634765625, 8.171875, 8.7802734375, 9.388671875, 9.9970703125, 10.60546875, 11.2138671875, 11.822265625, 12.4306640625, 13.0390625, 13.6474609375, 14.255859375, 14.8642578125, 15.47265625, 16.0810546875, 16.689453125, 17.2978515625, 17.90625]}, "gradients/encoder.encoder.layers.4.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 3.0, 4.0, 0.0, 0.0, 3.0, 1.0, 4.0, 3.0, 5.0, 6.0, 15.0, 18.0, 32.0, 51.0, 82.0, 199.0, 465.0, 1177.0, 4266.0, 28874.0, 510261.0, 468008.0, 28816.0, 4180.0, 1179.0, 427.0, 211.0, 108.0, 61.0, 29.0, 20.0, 19.0, 8.0, 13.0, 8.0, 2.0, 6.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.19140625, -5.97027587890625, -5.7491455078125, -5.52801513671875, -5.306884765625, -5.08575439453125, -4.8646240234375, -4.64349365234375, -4.42236328125, -4.20123291015625, -3.9801025390625, -3.75897216796875, -3.537841796875, -3.31671142578125, -3.0955810546875, -2.87445068359375, -2.6533203125, -2.43218994140625, -2.2110595703125, -1.98992919921875, -1.768798828125, -1.54766845703125, -1.3265380859375, -1.10540771484375, -0.88427734375, -0.66314697265625, -0.4420166015625, -0.22088623046875, 0.000244140625, 0.22137451171875, 0.4425048828125, 0.66363525390625, 0.884765625, 1.10589599609375, 1.3270263671875, 1.54815673828125, 1.769287109375, 1.99041748046875, 2.2115478515625, 2.43267822265625, 2.65380859375, 2.87493896484375, 3.0960693359375, 3.31719970703125, 3.538330078125, 3.75946044921875, 3.9805908203125, 4.20172119140625, 4.4228515625, 4.64398193359375, 4.8651123046875, 5.08624267578125, 5.307373046875, 5.52850341796875, 5.7496337890625, 5.97076416015625, 6.19189453125, 6.41302490234375, 6.6341552734375, 6.85528564453125, 7.076416015625, 7.29754638671875, 7.5186767578125, 7.73980712890625, 7.9609375]}, "gradients/encoder.encoder.layers.4.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 3.0, 1.0, 3.0, 3.0, 4.0, 3.0, 4.0, 1.0, 4.0, 8.0, 7.0, 13.0, 9.0, 16.0, 18.0, 30.0, 24.0, 36.0, 50.0, 43.0, 69.0, 87.0, 96.0, 69.0, 78.0, 67.0, 48.0, 38.0, 35.0, 24.0, 29.0, 15.0, 14.0, 12.0, 12.0, 6.0, 4.0, 4.0, 11.0, 7.0, 2.0, 5.0, 1.0, 2.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0], "bins": [-0.0012674331665039062, -0.0012329965829849243, -0.0011985599994659424, -0.0011641234159469604, -0.0011296868324279785, -0.0010952502489089966, -0.0010608136653900146, -0.0010263770818710327, -0.0009919404983520508, -0.0009575039148330688, -0.0009230673313140869, -0.000888630747795105, -0.000854194164276123, -0.0008197575807571411, -0.0007853209972381592, -0.0007508844137191772, -0.0007164478302001953, -0.0006820112466812134, -0.0006475746631622314, -0.0006131380796432495, -0.0005787014961242676, -0.0005442649126052856, -0.0005098283290863037, -0.0004753917455673218, -0.00044095516204833984, -0.0004065185785293579, -0.000372081995010376, -0.00033764541149139404, -0.0003032088279724121, -0.0002687722444534302, -0.00023433566093444824, -0.0001998990774154663, -0.00016546249389648438, -0.00013102591037750244, -9.658932685852051e-05, -6.215274333953857e-05, -2.771615982055664e-05, 6.720423698425293e-06, 4.1157007217407227e-05, 7.559359073638916e-05, 0.0001100301742553711, 0.00014446675777435303, 0.00017890334129333496, 0.0002133399248123169, 0.00024777650833129883, 0.00028221309185028076, 0.0003166496753692627, 0.00035108625888824463, 0.00038552284240722656, 0.0004199594259262085, 0.00045439600944519043, 0.0004888325929641724, 0.0005232691764831543, 0.0005577057600021362, 0.0005921423435211182, 0.0006265789270401001, 0.000661015510559082, 0.000695452094078064, 0.0007298886775970459, 0.0007643252611160278, 0.0007987618446350098, 0.0008331984281539917, 0.0008676350116729736, 0.0009020715951919556, 0.0009365081787109375]}, "gradients/encoder.encoder.layers.4.attention.q_proj.weight": {"_type": "histogram", "values": [3.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0, 6.0, 4.0, 3.0, 6.0, 5.0, 9.0, 9.0, 10.0, 14.0, 25.0, 28.0, 50.0, 65.0, 120.0, 171.0, 266.0, 427.0, 784.0, 1554.0, 3347.0, 9176.0, 37268.0, 227487.0, 640746.0, 97598.0, 18834.0, 5646.0, 2280.0, 1107.0, 549.0, 341.0, 193.0, 131.0, 95.0, 55.0, 36.0, 30.0, 23.0, 17.0, 13.0, 11.0, 11.0, 6.0, 5.0, 0.0, 3.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.95703125, -3.83050537109375, -3.7039794921875, -3.57745361328125, -3.450927734375, -3.32440185546875, -3.1978759765625, -3.07135009765625, -2.94482421875, -2.81829833984375, -2.6917724609375, -2.56524658203125, -2.438720703125, -2.31219482421875, -2.1856689453125, -2.05914306640625, -1.9326171875, -1.80609130859375, -1.6795654296875, -1.55303955078125, -1.426513671875, -1.29998779296875, -1.1734619140625, -1.04693603515625, -0.92041015625, -0.79388427734375, -0.6673583984375, -0.54083251953125, -0.414306640625, -0.28778076171875, -0.1612548828125, -0.03472900390625, 0.091796875, 0.21832275390625, 0.3448486328125, 0.47137451171875, 0.597900390625, 0.72442626953125, 0.8509521484375, 0.97747802734375, 1.10400390625, 1.23052978515625, 1.3570556640625, 1.48358154296875, 1.610107421875, 1.73663330078125, 1.8631591796875, 1.98968505859375, 2.1162109375, 2.24273681640625, 2.3692626953125, 2.49578857421875, 2.622314453125, 2.74884033203125, 2.8753662109375, 3.00189208984375, 3.12841796875, 3.25494384765625, 3.3814697265625, 3.50799560546875, 3.634521484375, 3.76104736328125, 3.8875732421875, 4.01409912109375, 4.140625]}, "gradients/encoder.encoder.layers.4.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 4.0, 3.0, 7.0, 5.0, 3.0, 12.0, 19.0, 17.0, 21.0, 33.0, 35.0, 42.0, 65.0, 71.0, 86.0, 102.0, 78.0, 83.0, 79.0, 65.0, 49.0, 28.0, 30.0, 19.0, 16.0, 17.0, 6.0, 5.0, 6.0, 2.0, 0.0, 0.0, 2.0, 3.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-5.0234375, -4.87493896484375, -4.7264404296875, -4.57794189453125, -4.429443359375, -4.28094482421875, -4.1324462890625, -3.98394775390625, -3.83544921875, -3.68695068359375, -3.5384521484375, -3.38995361328125, -3.241455078125, -3.09295654296875, -2.9444580078125, -2.79595947265625, -2.6474609375, -2.49896240234375, -2.3504638671875, -2.20196533203125, -2.053466796875, -1.90496826171875, -1.7564697265625, -1.60797119140625, -1.45947265625, -1.31097412109375, -1.1624755859375, -1.01397705078125, -0.865478515625, -0.71697998046875, -0.5684814453125, -0.41998291015625, -0.271484375, -0.12298583984375, 0.0255126953125, 0.17401123046875, 0.322509765625, 0.47100830078125, 0.6195068359375, 0.76800537109375, 0.91650390625, 1.06500244140625, 1.2135009765625, 1.36199951171875, 1.510498046875, 1.65899658203125, 1.8074951171875, 1.95599365234375, 2.1044921875, 2.25299072265625, 2.4014892578125, 2.54998779296875, 2.698486328125, 2.84698486328125, 2.9954833984375, 3.14398193359375, 3.29248046875, 3.44097900390625, 3.5894775390625, 3.73797607421875, 3.886474609375, 4.03497314453125, 4.1834716796875, 4.33197021484375, 4.48046875]}, "gradients/encoder.encoder.layers.4.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 3.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0, 3.0, 4.0, 7.0, 6.0, 13.0, 17.0, 27.0, 47.0, 68.0, 120.0, 162.0, 214.0, 105.0, 69.0, 50.0, 42.0, 16.0, 13.0, 10.0, 5.0, 1.0, 5.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-111.75297546386719, -108.66079711914062, -105.5686264038086, -102.47644805908203, -99.38427734375, -96.29209899902344, -93.19992065429688, -90.10774993896484, -87.01557922363281, -83.92340087890625, -80.83123016357422, -77.73905181884766, -74.64688110351562, -71.55470275878906, -68.4625244140625, -65.37035369873047, -62.278175354003906, -59.18600082397461, -56.09382629394531, -53.00164794921875, -49.90947723388672, -46.817298889160156, -43.72512435913086, -40.63294982910156, -37.540775299072266, -34.44860076904297, -31.356426239013672, -28.264249801635742, -25.172075271606445, -22.07990074157715, -18.98772430419922, -15.895549774169922, -12.803375244140625, -9.711200714111328, -6.619025230407715, -3.5268497467041016, -0.4346752166748047, 2.657499313354492, 5.749675750732422, 8.841850280761719, 11.934024810791016, 15.026199340820312, 18.11837387084961, 21.21055030822754, 24.302724838256836, 27.394899368286133, 30.487075805664062, 33.57925033569336, 36.671424865722656, 39.76359939575195, 42.85577392578125, 45.94795227050781, 49.040122985839844, 52.132301330566406, 55.2244758605957, 58.316650390625, 61.4088249206543, 64.5009994506836, 67.59317779541016, 70.68534851074219, 73.77752685546875, 76.86969757080078, 79.96187591552734, 83.05404663085938, 86.14622497558594]}, "gradients/encoder.encoder.layers.4.layer_norm.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 1.0, 3.0, 2.0, 1.0, 0.0, 7.0, 2.0, 6.0, 11.0, 9.0, 9.0, 10.0, 13.0, 26.0, 23.0, 28.0, 25.0, 33.0, 36.0, 33.0, 45.0, 69.0, 87.0, 101.0, 73.0, 60.0, 37.0, 38.0, 38.0, 25.0, 32.0, 32.0, 15.0, 17.0, 14.0, 17.0, 6.0, 5.0, 7.0, 10.0, 7.0, 2.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-69.12016296386719, -66.59635925292969, -64.07254791259766, -61.548744201660156, -59.02493667602539, -56.501129150390625, -53.977325439453125, -51.45351791381836, -48.929710388183594, -46.40590286254883, -43.88209915161133, -41.35829162597656, -38.8344841003418, -36.31067657470703, -33.78687286376953, -31.263065338134766, -28.739261627197266, -26.215456008911133, -23.691648483276367, -21.167842864990234, -18.64403533935547, -16.120229721069336, -13.596424102783203, -11.072616577148438, -8.548810958862305, -6.0250043869018555, -3.5011982917785645, -0.9773921966552734, 1.5464143753051758, 4.070220947265625, 6.594026565551758, 9.117834091186523, 11.641639709472656, 14.165446281433105, 16.689252853393555, 19.213058471679688, 21.736865997314453, 24.260671615600586, 26.78447723388672, 29.308284759521484, 31.832090377807617, 34.35589599609375, 36.879703521728516, 39.40351104736328, 41.92731475830078, 44.45112228393555, 46.97492980957031, 49.49873352050781, 52.02254104614258, 54.546348571777344, 57.070152282714844, 59.59395980834961, 62.117767333984375, 64.64157104492188, 67.16537475585938, 69.6891860961914, 72.2129898071289, 74.7367935180664, 77.26060485839844, 79.78440856933594, 82.30821228027344, 84.83202362060547, 87.35582733154297, 89.879638671875, 92.4034423828125]}, "gradients/encoder.encoder.layers.3.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 3.0, 1.0, 4.0, 10.0, 10.0, 13.0, 16.0, 23.0, 38.0, 57.0, 70.0, 149.0, 213.0, 383.0, 668.0, 1251.0, 2560.0, 5511.0, 12973.0, 39082.0, 182394.0, 1331849.0, 2192989.0, 334742.0, 58277.0, 17495.0, 6911.0, 3175.0, 1475.0, 799.0, 441.0, 250.0, 151.0, 109.0, 62.0, 46.0, 27.0, 20.0, 11.0, 11.0, 9.0, 5.0, 5.0, 5.0, 3.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.25, -5.0916748046875, -4.933349609375, -4.7750244140625, -4.61669921875, -4.4583740234375, -4.300048828125, -4.1417236328125, -3.9833984375, -3.8250732421875, -3.666748046875, -3.5084228515625, -3.35009765625, -3.1917724609375, -3.033447265625, -2.8751220703125, -2.716796875, -2.5584716796875, -2.400146484375, -2.2418212890625, -2.08349609375, -1.9251708984375, -1.766845703125, -1.6085205078125, -1.4501953125, -1.2918701171875, -1.133544921875, -0.9752197265625, -0.81689453125, -0.6585693359375, -0.500244140625, -0.3419189453125, -0.18359375, -0.0252685546875, 0.133056640625, 0.2913818359375, 0.44970703125, 0.6080322265625, 0.766357421875, 0.9246826171875, 1.0830078125, 1.2413330078125, 1.399658203125, 1.5579833984375, 1.71630859375, 1.8746337890625, 2.032958984375, 2.1912841796875, 2.349609375, 2.5079345703125, 2.666259765625, 2.8245849609375, 2.98291015625, 3.1412353515625, 3.299560546875, 3.4578857421875, 3.6162109375, 3.7745361328125, 3.932861328125, 4.0911865234375, 4.24951171875, 4.4078369140625, 4.566162109375, 4.7244873046875, 4.8828125]}, "gradients/encoder.encoder.layers.3.feed_forward.output_dense.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 0.0, 3.0, 4.0, 2.0, 2.0, 5.0, 2.0, 8.0, 5.0, 8.0, 10.0, 8.0, 14.0, 10.0, 19.0, 22.0, 21.0, 28.0, 23.0, 24.0, 29.0, 38.0, 32.0, 46.0, 40.0, 43.0, 37.0, 47.0, 39.0, 35.0, 48.0, 33.0, 40.0, 38.0, 31.0, 32.0, 28.0, 26.0, 20.0, 15.0, 13.0, 12.0, 11.0, 9.0, 9.0, 8.0, 10.0, 6.0, 0.0, 8.0, 2.0, 7.0, 4.0, 2.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 1.0], "bins": [-3.048828125, -2.9493408203125, -2.849853515625, -2.7503662109375, -2.65087890625, -2.5513916015625, -2.451904296875, -2.3524169921875, -2.2529296875, -2.1534423828125, -2.053955078125, -1.9544677734375, -1.85498046875, -1.7554931640625, -1.656005859375, -1.5565185546875, -1.45703125, -1.3575439453125, -1.258056640625, -1.1585693359375, -1.05908203125, -0.9595947265625, -0.860107421875, -0.7606201171875, -0.6611328125, -0.5616455078125, -0.462158203125, -0.3626708984375, -0.26318359375, -0.1636962890625, -0.064208984375, 0.0352783203125, 0.134765625, 0.2342529296875, 0.333740234375, 0.4332275390625, 0.53271484375, 0.6322021484375, 0.731689453125, 0.8311767578125, 0.9306640625, 1.0301513671875, 1.129638671875, 1.2291259765625, 1.32861328125, 1.4281005859375, 1.527587890625, 1.6270751953125, 1.7265625, 1.8260498046875, 1.925537109375, 2.0250244140625, 2.12451171875, 2.2239990234375, 2.323486328125, 2.4229736328125, 2.5224609375, 2.6219482421875, 2.721435546875, 2.8209228515625, 2.92041015625, 3.0198974609375, 3.119384765625, 3.2188720703125, 3.318359375]}, "gradients/encoder.encoder.layers.3.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 4.0, 5.0, 9.0, 16.0, 12.0, 25.0, 23.0, 35.0, 57.0, 99.0, 167.0, 301.0, 802.0, 3874.0, 63711.0, 3958137.0, 157495.0, 7073.0, 1224.0, 435.0, 236.0, 163.0, 102.0, 73.0, 57.0, 44.0, 40.0, 15.0, 16.0, 13.0, 10.0, 6.0, 6.0, 2.0, 2.0, 1.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-17.46875, -16.78662109375, -16.1044921875, -15.42236328125, -14.740234375, -14.05810546875, -13.3759765625, -12.69384765625, -12.01171875, -11.32958984375, -10.6474609375, -9.96533203125, -9.283203125, -8.60107421875, -7.9189453125, -7.23681640625, -6.5546875, -5.87255859375, -5.1904296875, -4.50830078125, -3.826171875, -3.14404296875, -2.4619140625, -1.77978515625, -1.09765625, -0.41552734375, 0.2666015625, 0.94873046875, 1.630859375, 2.31298828125, 2.9951171875, 3.67724609375, 4.359375, 5.04150390625, 5.7236328125, 6.40576171875, 7.087890625, 7.77001953125, 8.4521484375, 9.13427734375, 9.81640625, 10.49853515625, 11.1806640625, 11.86279296875, 12.544921875, 13.22705078125, 13.9091796875, 14.59130859375, 15.2734375, 15.95556640625, 16.6376953125, 17.31982421875, 18.001953125, 18.68408203125, 19.3662109375, 20.04833984375, 20.73046875, 21.41259765625, 22.0947265625, 22.77685546875, 23.458984375, 24.14111328125, 24.8232421875, 25.50537109375, 26.1875]}, "gradients/encoder.encoder.layers.3.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 1.0, 2.0, 5.0, 5.0, 9.0, 13.0, 18.0, 49.0, 73.0, 100.0, 234.0, 549.0, 1131.0, 1007.0, 419.0, 217.0, 87.0, 62.0, 36.0, 25.0, 13.0, 11.0, 7.0, 4.0, 3.0, 3.0, 1.0, 0.0, 2.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-8.0546875, -7.5479736328125, -7.041259765625, -6.5345458984375, -6.02783203125, -5.5211181640625, -5.014404296875, -4.5076904296875, -4.0009765625, -3.4942626953125, -2.987548828125, -2.4808349609375, -1.97412109375, -1.4674072265625, -0.960693359375, -0.4539794921875, 0.052734375, 0.5594482421875, 1.066162109375, 1.5728759765625, 2.07958984375, 2.5863037109375, 3.093017578125, 3.5997314453125, 4.1064453125, 4.6131591796875, 5.119873046875, 5.6265869140625, 6.13330078125, 6.6400146484375, 7.146728515625, 7.6534423828125, 8.16015625, 8.6668701171875, 9.173583984375, 9.6802978515625, 10.18701171875, 10.6937255859375, 11.200439453125, 11.7071533203125, 12.2138671875, 12.7205810546875, 13.227294921875, 13.7340087890625, 14.24072265625, 14.7474365234375, 15.254150390625, 15.7608642578125, 16.267578125, 16.7742919921875, 17.281005859375, 17.7877197265625, 18.29443359375, 18.8011474609375, 19.307861328125, 19.8145751953125, 20.3212890625, 20.8280029296875, 21.334716796875, 21.8414306640625, 22.34814453125, 22.8548583984375, 23.361572265625, 23.8682861328125, 24.375]}, "gradients/encoder.encoder.layers.3.final_layer_norm.weight": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 2.0, 1.0, 0.0, 4.0, 1.0, 1.0, 1.0, 2.0, 8.0, 8.0, 14.0, 47.0, 149.0, 246.0, 257.0, 149.0, 61.0, 26.0, 11.0, 9.0, 6.0, 3.0, 3.0, 1.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-84.5615463256836, -79.55257415771484, -74.54360961914062, -69.53463745117188, -64.52566528320312, -59.516693115234375, -54.50772476196289, -49.498756408691406, -44.489784240722656, -39.480812072753906, -34.47184371948242, -29.462873458862305, -24.453903198242188, -19.44493293762207, -14.435962677001953, -9.426994323730469, -4.418022155761719, 0.5909481048583984, 5.599918365478516, 10.608888626098633, 15.61785888671875, 20.626829147338867, 25.635799407958984, 30.64476776123047, 35.65373992919922, 40.66271209716797, 45.67168045043945, 50.68064880371094, 55.68962097167969, 60.69859313964844, 65.70756530761719, 70.7165298461914, 75.72549438476562, 80.73446655273438, 85.74343872070312, 90.75240325927734, 95.7613754272461, 100.77034759521484, 105.77931213378906, 110.78828430175781, 115.79725646972656, 120.80622863769531, 125.81520080566406, 130.8241729736328, 135.8331298828125, 140.84210205078125, 145.85107421875, 150.86004638671875, 155.8690185546875, 160.87799072265625, 165.886962890625, 170.89593505859375, 175.9049072265625, 180.9138641357422, 185.92283630371094, 190.9318084716797, 195.94078063964844, 200.9497528076172, 205.95872497558594, 210.9676971435547, 215.97665405273438, 220.98562622070312, 225.99459838867188, 231.00357055664062, 236.01254272460938]}, "gradients/encoder.encoder.layers.3.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 4.0, 4.0, 1.0, 9.0, 7.0, 8.0, 5.0, 22.0, 14.0, 16.0, 24.0, 24.0, 24.0, 35.0, 35.0, 42.0, 51.0, 66.0, 48.0, 58.0, 56.0, 61.0, 58.0, 42.0, 52.0, 40.0, 33.0, 23.0, 32.0, 24.0, 28.0, 13.0, 11.0, 11.0, 9.0, 8.0, 7.0, 2.0, 2.0, 3.0, 3.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0], "bins": [-71.04045867919922, -69.17880249023438, -67.31714630126953, -65.45549011230469, -63.59383773803711, -61.73218536376953, -59.87052917480469, -58.008872985839844, -56.147216796875, -54.285560607910156, -52.42390823364258, -50.562252044677734, -48.70059585571289, -46.83894348144531, -44.97728729248047, -43.115631103515625, -41.25397872924805, -39.3923225402832, -37.530670166015625, -35.66901397705078, -33.80735778808594, -31.945703506469727, -30.084049224853516, -28.222393035888672, -26.36073875427246, -24.49908447265625, -22.637428283691406, -20.775774002075195, -18.914119720458984, -17.05246353149414, -15.19080924987793, -13.329154014587402, -11.46750259399414, -9.605847358703613, -7.744192600250244, -5.882537841796875, -4.020882606506348, -2.1592273712158203, -0.2975730895996094, 1.564082145690918, 3.4257373809814453, 5.287392616271973, 7.149047374725342, 9.010702133178711, 10.872357368469238, 12.734012603759766, 14.595666885375977, 16.457321166992188, 18.31897735595703, 20.180631637573242, 22.042287826538086, 23.903942108154297, 25.76559829711914, 27.62725257873535, 29.488906860351562, 31.350563049316406, 33.21221923828125, 35.073875427246094, 36.93552780151367, 38.797183990478516, 40.65884017944336, 42.52049255371094, 44.38214874267578, 46.243804931640625, 48.1054573059082]}, "gradients/encoder.encoder.layers.3.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 3.0, 0.0, 1.0, 1.0, 7.0, 6.0, 7.0, 8.0, 20.0, 21.0, 33.0, 62.0, 76.0, 125.0, 181.0, 209.0, 338.0, 510.0, 759.0, 1179.0, 2037.0, 3607.0, 6389.0, 12366.0, 25724.0, 56637.0, 126296.0, 255028.0, 279357.0, 148707.0, 66786.0, 30107.0, 14538.0, 7471.0, 3830.0, 2251.0, 1384.0, 832.0, 559.0, 374.0, 250.0, 149.0, 107.0, 68.0, 55.0, 45.0, 29.0, 16.0, 12.0, 4.0, 5.0, 5.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-5.765625, -5.5740966796875, -5.382568359375, -5.1910400390625, -4.99951171875, -4.8079833984375, -4.616455078125, -4.4249267578125, -4.2333984375, -4.0418701171875, -3.850341796875, -3.6588134765625, -3.46728515625, -3.2757568359375, -3.084228515625, -2.8927001953125, -2.701171875, -2.5096435546875, -2.318115234375, -2.1265869140625, -1.93505859375, -1.7435302734375, -1.552001953125, -1.3604736328125, -1.1689453125, -0.9774169921875, -0.785888671875, -0.5943603515625, -0.40283203125, -0.2113037109375, -0.019775390625, 0.1717529296875, 0.36328125, 0.5548095703125, 0.746337890625, 0.9378662109375, 1.12939453125, 1.3209228515625, 1.512451171875, 1.7039794921875, 1.8955078125, 2.0870361328125, 2.278564453125, 2.4700927734375, 2.66162109375, 2.8531494140625, 3.044677734375, 3.2362060546875, 3.427734375, 3.6192626953125, 3.810791015625, 4.0023193359375, 4.19384765625, 4.3853759765625, 4.576904296875, 4.7684326171875, 4.9599609375, 5.1514892578125, 5.343017578125, 5.5345458984375, 5.72607421875, 5.9176025390625, 6.109130859375, 6.3006591796875, 6.4921875]}, "gradients/encoder.encoder.layers.3.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 2.0, 4.0, 1.0, 3.0, 4.0, 3.0, 3.0, 2.0, 6.0, 18.0, 12.0, 17.0, 15.0, 26.0, 16.0, 19.0, 39.0, 31.0, 36.0, 52.0, 47.0, 49.0, 52.0, 49.0, 52.0, 49.0, 56.0, 44.0, 36.0, 24.0, 32.0, 28.0, 34.0, 17.0, 29.0, 23.0, 14.0, 21.0, 5.0, 5.0, 14.0, 7.0, 5.0, 1.0, 3.0, 4.0, 3.0, 4.0, 2.0, 0.0, 1.0, 1.0], "bins": [-4.6640625, -4.535430908203125, -4.40679931640625, -4.278167724609375, -4.1495361328125, -4.020904541015625, -3.89227294921875, -3.763641357421875, -3.635009765625, -3.506378173828125, -3.37774658203125, -3.249114990234375, -3.1204833984375, -2.991851806640625, -2.86322021484375, -2.734588623046875, -2.60595703125, -2.477325439453125, -2.34869384765625, -2.220062255859375, -2.0914306640625, -1.962799072265625, -1.83416748046875, -1.705535888671875, -1.576904296875, -1.448272705078125, -1.31964111328125, -1.191009521484375, -1.0623779296875, -0.933746337890625, -0.80511474609375, -0.676483154296875, -0.5478515625, -0.419219970703125, -0.29058837890625, -0.161956787109375, -0.0333251953125, 0.095306396484375, 0.22393798828125, 0.352569580078125, 0.481201171875, 0.609832763671875, 0.73846435546875, 0.867095947265625, 0.9957275390625, 1.124359130859375, 1.25299072265625, 1.381622314453125, 1.51025390625, 1.638885498046875, 1.76751708984375, 1.896148681640625, 2.0247802734375, 2.153411865234375, 2.28204345703125, 2.410675048828125, 2.539306640625, 2.667938232421875, 2.79656982421875, 2.925201416015625, 3.0538330078125, 3.182464599609375, 3.31109619140625, 3.439727783203125, 3.568359375]}, "gradients/encoder.encoder.layers.3.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 2.0, 3.0, 0.0, 4.0, 7.0, 7.0, 6.0, 6.0, 12.0, 20.0, 26.0, 34.0, 60.0, 71.0, 109.0, 195.0, 387.0, 792.0, 1980.0, 5957.0, 29010.0, 240136.0, 686808.0, 66433.0, 11182.0, 3024.0, 1092.0, 540.0, 280.0, 123.0, 83.0, 44.0, 40.0, 28.0, 18.0, 10.0, 6.0, 8.0, 9.0, 4.0, 3.0, 5.0, 3.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0], "bins": [-19.546875, -19.0216064453125, -18.496337890625, -17.9710693359375, -17.44580078125, -16.9205322265625, -16.395263671875, -15.8699951171875, -15.3447265625, -14.8194580078125, -14.294189453125, -13.7689208984375, -13.24365234375, -12.7183837890625, -12.193115234375, -11.6678466796875, -11.142578125, -10.6173095703125, -10.092041015625, -9.5667724609375, -9.04150390625, -8.5162353515625, -7.990966796875, -7.4656982421875, -6.9404296875, -6.4151611328125, -5.889892578125, -5.3646240234375, -4.83935546875, -4.3140869140625, -3.788818359375, -3.2635498046875, -2.73828125, -2.2130126953125, -1.687744140625, -1.1624755859375, -0.63720703125, -0.1119384765625, 0.413330078125, 0.9385986328125, 1.4638671875, 1.9891357421875, 2.514404296875, 3.0396728515625, 3.56494140625, 4.0902099609375, 4.615478515625, 5.1407470703125, 5.666015625, 6.1912841796875, 6.716552734375, 7.2418212890625, 7.76708984375, 8.2923583984375, 8.817626953125, 9.3428955078125, 9.8681640625, 10.3934326171875, 10.918701171875, 11.4439697265625, 11.96923828125, 12.4945068359375, 13.019775390625, 13.5450439453125, 14.0703125]}, "gradients/encoder.encoder.layers.3.attention.v_proj.bias": {"_type": "histogram", "values": [3.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 3.0, 1.0, 3.0, 2.0, 5.0, 2.0, 5.0, 9.0, 5.0, 8.0, 11.0, 14.0, 16.0, 29.0, 34.0, 24.0, 22.0, 38.0, 42.0, 42.0, 55.0, 55.0, 65.0, 60.0, 42.0, 56.0, 50.0, 36.0, 45.0, 43.0, 23.0, 25.0, 23.0, 19.0, 15.0, 17.0, 19.0, 18.0, 5.0, 13.0, 2.0, 5.0, 2.0, 1.0, 3.0, 0.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0], "bins": [-17.65625, -17.132568359375, -16.60888671875, -16.085205078125, -15.5615234375, -15.037841796875, -14.51416015625, -13.990478515625, -13.466796875, -12.943115234375, -12.41943359375, -11.895751953125, -11.3720703125, -10.848388671875, -10.32470703125, -9.801025390625, -9.27734375, -8.753662109375, -8.22998046875, -7.706298828125, -7.1826171875, -6.658935546875, -6.13525390625, -5.611572265625, -5.087890625, -4.564208984375, -4.04052734375, -3.516845703125, -2.9931640625, -2.469482421875, -1.94580078125, -1.422119140625, -0.8984375, -0.374755859375, 0.14892578125, 0.672607421875, 1.1962890625, 1.719970703125, 2.24365234375, 2.767333984375, 3.291015625, 3.814697265625, 4.33837890625, 4.862060546875, 5.3857421875, 5.909423828125, 6.43310546875, 6.956787109375, 7.48046875, 8.004150390625, 8.52783203125, 9.051513671875, 9.5751953125, 10.098876953125, 10.62255859375, 11.146240234375, 11.669921875, 12.193603515625, 12.71728515625, 13.240966796875, 13.7646484375, 14.288330078125, 14.81201171875, 15.335693359375, 15.859375]}, "gradients/encoder.encoder.layers.3.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 2.0, 4.0, 2.0, 8.0, 6.0, 8.0, 7.0, 7.0, 11.0, 21.0, 24.0, 32.0, 57.0, 82.0, 140.0, 183.0, 297.0, 532.0, 1012.0, 1987.0, 3981.0, 9100.0, 24289.0, 80722.0, 492804.0, 319636.0, 74227.0, 22978.0, 8610.0, 3681.0, 1840.0, 941.0, 504.0, 253.0, 184.0, 133.0, 76.0, 47.0, 37.0, 21.0, 17.0, 13.0, 11.0, 11.0, 6.0, 10.0, 7.0, 3.0, 1.0, 0.0, 2.0, 2.0, 0.0, 1.0], "bins": [-3.818359375, -3.70916748046875, -3.5999755859375, -3.49078369140625, -3.381591796875, -3.27239990234375, -3.1632080078125, -3.05401611328125, -2.94482421875, -2.83563232421875, -2.7264404296875, -2.61724853515625, -2.508056640625, -2.39886474609375, -2.2896728515625, -2.18048095703125, -2.0712890625, -1.96209716796875, -1.8529052734375, -1.74371337890625, -1.634521484375, -1.52532958984375, -1.4161376953125, -1.30694580078125, -1.19775390625, -1.08856201171875, -0.9793701171875, -0.87017822265625, -0.760986328125, -0.65179443359375, -0.5426025390625, -0.43341064453125, -0.32421875, -0.21502685546875, -0.1058349609375, 0.00335693359375, 0.112548828125, 0.22174072265625, 0.3309326171875, 0.44012451171875, 0.54931640625, 0.65850830078125, 0.7677001953125, 0.87689208984375, 0.986083984375, 1.09527587890625, 1.2044677734375, 1.31365966796875, 1.4228515625, 1.53204345703125, 1.6412353515625, 1.75042724609375, 1.859619140625, 1.96881103515625, 2.0780029296875, 2.18719482421875, 2.29638671875, 2.40557861328125, 2.5147705078125, 2.62396240234375, 2.733154296875, 2.84234619140625, 2.9515380859375, 3.06072998046875, 3.169921875]}, "gradients/encoder.encoder.layers.3.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 0.0, 2.0, 0.0, 1.0, 1.0, 2.0, 5.0, 4.0, 4.0, 3.0, 9.0, 11.0, 19.0, 16.0, 31.0, 31.0, 68.0, 103.0, 157.0, 150.0, 129.0, 94.0, 53.0, 40.0, 20.0, 21.0, 7.0, 7.0, 8.0, 4.0, 4.0, 5.0, 3.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0017671585083007812, -0.0017026811838150024, -0.0016382038593292236, -0.0015737265348434448, -0.001509249210357666, -0.0014447718858718872, -0.0013802945613861084, -0.0013158172369003296, -0.0012513399124145508, -0.001186862587928772, -0.0011223852634429932, -0.0010579079389572144, -0.0009934306144714355, -0.0009289532899856567, -0.0008644759654998779, -0.0007999986410140991, -0.0007355213165283203, -0.0006710439920425415, -0.0006065666675567627, -0.0005420893430709839, -0.0004776120185852051, -0.00041313469409942627, -0.00034865736961364746, -0.00028418004512786865, -0.00021970272064208984, -0.00015522539615631104, -9.074807167053223e-05, -2.6270747184753418e-05, 3.820657730102539e-05, 0.0001026839017868042, 0.000167161226272583, 0.00023163855075836182, 0.0002961158752441406, 0.00036059319972991943, 0.00042507052421569824, 0.000489547848701477, 0.0005540251731872559, 0.0006185024976730347, 0.0006829798221588135, 0.0007474571466445923, 0.0008119344711303711, 0.0008764117956161499, 0.0009408891201019287, 0.0010053664445877075, 0.0010698437690734863, 0.0011343210935592651, 0.001198798418045044, 0.0012632757425308228, 0.0013277530670166016, 0.0013922303915023804, 0.0014567077159881592, 0.001521185040473938, 0.0015856623649597168, 0.0016501396894454956, 0.0017146170139312744, 0.0017790943384170532, 0.001843571662902832, 0.0019080489873886108, 0.0019725263118743896, 0.0020370036363601685, 0.0021014809608459473, 0.002165958285331726, 0.002230435609817505, 0.0022949129343032837, 0.0023593902587890625]}, "gradients/encoder.encoder.layers.3.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 5.0, 3.0, 6.0, 10.0, 25.0, 28.0, 57.0, 129.0, 335.0, 848.0, 2970.0, 14304.0, 138289.0, 807201.0, 71848.0, 9207.0, 2152.0, 670.0, 242.0, 102.0, 65.0, 28.0, 16.0, 8.0, 7.0, 3.0, 1.0, 1.0, 1.0, 3.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0], "bins": [-9.1875, -8.9654541015625, -8.743408203125, -8.5213623046875, -8.29931640625, -8.0772705078125, -7.855224609375, -7.6331787109375, -7.4111328125, -7.1890869140625, -6.967041015625, -6.7449951171875, -6.52294921875, -6.3009033203125, -6.078857421875, -5.8568115234375, -5.634765625, -5.4127197265625, -5.190673828125, -4.9686279296875, -4.74658203125, -4.5245361328125, -4.302490234375, -4.0804443359375, -3.8583984375, -3.6363525390625, -3.414306640625, -3.1922607421875, -2.97021484375, -2.7481689453125, -2.526123046875, -2.3040771484375, -2.08203125, -1.8599853515625, -1.637939453125, -1.4158935546875, -1.19384765625, -0.9718017578125, -0.749755859375, -0.5277099609375, -0.3056640625, -0.0836181640625, 0.138427734375, 0.3604736328125, 0.58251953125, 0.8045654296875, 1.026611328125, 1.2486572265625, 1.470703125, 1.6927490234375, 1.914794921875, 2.1368408203125, 2.35888671875, 2.5809326171875, 2.802978515625, 3.0250244140625, 3.2470703125, 3.4691162109375, 3.691162109375, 3.9132080078125, 4.13525390625, 4.3572998046875, 4.579345703125, 4.8013916015625, 5.0234375]}, "gradients/encoder.encoder.layers.3.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 3.0, 2.0, 3.0, 2.0, 3.0, 5.0, 5.0, 6.0, 9.0, 10.0, 17.0, 15.0, 31.0, 27.0, 37.0, 60.0, 73.0, 78.0, 83.0, 91.0, 99.0, 87.0, 48.0, 45.0, 28.0, 38.0, 28.0, 17.0, 12.0, 13.0, 6.0, 8.0, 5.0, 5.0, 3.0, 4.0, 2.0, 2.0, 1.0, 1.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.89453125, -4.7388916015625, -4.583251953125, -4.4276123046875, -4.27197265625, -4.1163330078125, -3.960693359375, -3.8050537109375, -3.6494140625, -3.4937744140625, -3.338134765625, -3.1824951171875, -3.02685546875, -2.8712158203125, -2.715576171875, -2.5599365234375, -2.404296875, -2.2486572265625, -2.093017578125, -1.9373779296875, -1.78173828125, -1.6260986328125, -1.470458984375, -1.3148193359375, -1.1591796875, -1.0035400390625, -0.847900390625, -0.6922607421875, -0.53662109375, -0.3809814453125, -0.225341796875, -0.0697021484375, 0.0859375, 0.2415771484375, 0.397216796875, 0.5528564453125, 0.70849609375, 0.8641357421875, 1.019775390625, 1.1754150390625, 1.3310546875, 1.4866943359375, 1.642333984375, 1.7979736328125, 1.95361328125, 2.1092529296875, 2.264892578125, 2.4205322265625, 2.576171875, 2.7318115234375, 2.887451171875, 3.0430908203125, 3.19873046875, 3.3543701171875, 3.510009765625, 3.6656494140625, 3.8212890625, 3.9769287109375, 4.132568359375, 4.2882080078125, 4.44384765625, 4.5994873046875, 4.755126953125, 4.9107666015625, 5.06640625]}, "gradients/encoder.encoder.layers.3.layer_norm.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 9.0, 14.0, 25.0, 56.0, 104.0, 256.0, 263.0, 135.0, 55.0, 33.0, 19.0, 12.0, 7.0, 6.0, 6.0, 2.0, 1.0, 2.0, 1.0, 0.0, 4.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-64.4195327758789, -60.40513610839844, -56.39073944091797, -52.3763427734375, -48.36194610595703, -44.34754943847656, -40.333152770996094, -36.318756103515625, -32.304359436035156, -28.289962768554688, -24.27556610107422, -20.26116943359375, -16.24677276611328, -12.232376098632812, -8.217979431152344, -4.203582763671875, -0.18918609619140625, 3.8252105712890625, 7.839607238769531, 11.85400390625, 15.868400573730469, 19.882797241210938, 23.897193908691406, 27.911590576171875, 31.925987243652344, 35.94038391113281, 39.95478057861328, 43.96917724609375, 47.98357391357422, 51.99797058105469, 56.012367248535156, 60.026763916015625, 64.04116821289062, 68.0555648803711, 72.06996154785156, 76.08435821533203, 80.0987548828125, 84.11315155029297, 88.12754821777344, 92.1419448852539, 96.15634155273438, 100.17073822021484, 104.18513488769531, 108.19953155517578, 112.21392822265625, 116.22832489013672, 120.24272155761719, 124.25711822509766, 128.27151489257812, 132.28591918945312, 136.30030822753906, 140.314697265625, 144.3291015625, 148.343505859375, 152.35789489746094, 156.37228393554688, 160.38668823242188, 164.40109252929688, 168.4154815673828, 172.42987060546875, 176.44427490234375, 180.45867919921875, 184.4730682373047, 188.48745727539062, 192.50186157226562]}, "gradients/encoder.encoder.layers.3.layer_norm.bias": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 2.0, 1.0, 5.0, 3.0, 5.0, 3.0, 8.0, 8.0, 4.0, 10.0, 16.0, 19.0, 16.0, 23.0, 17.0, 21.0, 32.0, 29.0, 27.0, 43.0, 52.0, 81.0, 86.0, 75.0, 65.0, 46.0, 40.0, 37.0, 39.0, 30.0, 29.0, 25.0, 21.0, 6.0, 17.0, 10.0, 11.0, 18.0, 11.0, 5.0, 6.0, 4.0, 5.0, 3.0, 1.0, 0.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-58.536033630371094, -56.27876281738281, -54.021488189697266, -51.764217376708984, -49.50694274902344, -47.249671936035156, -44.992401123046875, -42.735130310058594, -40.47785568237305, -38.220584869384766, -35.96331024169922, -33.70603942871094, -31.448766708374023, -29.19149398803711, -26.934223175048828, -24.676950454711914, -22.419677734375, -20.162405014038086, -17.905132293701172, -15.64786148071289, -13.390588760375977, -11.133316040039062, -8.876044273376465, -6.618772506713867, -4.361499786376953, -2.1042275428771973, 0.1530447006225586, 2.4103169441223145, 4.66758918762207, 6.924861907958984, 9.182133674621582, 11.43940544128418, 13.696685791015625, 15.953958511352539, 18.211231231689453, 20.468502044677734, 22.72577476501465, 24.983047485351562, 27.240318298339844, 29.497591018676758, 31.754863739013672, 34.01213455200195, 36.2694091796875, 38.52667999267578, 40.78395080566406, 43.04122543334961, 45.29849624633789, 47.55577087402344, 49.81304168701172, 52.0703125, 54.32758712768555, 56.58485794067383, 58.842132568359375, 61.099403381347656, 63.35667419433594, 65.61394500732422, 67.8712158203125, 70.12848663330078, 72.38575744628906, 74.64303588867188, 76.90030670166016, 79.15757751464844, 81.41484832763672, 83.672119140625, 85.92939758300781]}, "gradients/encoder.encoder.layers.2.feed_forward.output_dense.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 3.0, 1.0, 0.0, 3.0, 3.0, 5.0, 6.0, 7.0, 14.0, 15.0, 13.0, 31.0, 47.0, 93.0, 117.0, 217.0, 430.0, 901.0, 1950.0, 4778.0, 13112.0, 51985.0, 340319.0, 2334943.0, 1250852.0, 151984.0, 28004.0, 8301.0, 3265.0, 1393.0, 685.0, 319.0, 198.0, 95.0, 64.0, 35.0, 25.0, 22.0, 16.0, 16.0, 3.0, 7.0, 1.0, 7.0, 3.0, 4.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0], "bins": [-5.74609375, -5.57611083984375, -5.4061279296875, -5.23614501953125, -5.066162109375, -4.89617919921875, -4.7261962890625, -4.55621337890625, -4.38623046875, -4.21624755859375, -4.0462646484375, -3.87628173828125, -3.706298828125, -3.53631591796875, -3.3663330078125, -3.19635009765625, -3.0263671875, -2.85638427734375, -2.6864013671875, -2.51641845703125, -2.346435546875, -2.17645263671875, -2.0064697265625, -1.83648681640625, -1.66650390625, -1.49652099609375, -1.3265380859375, -1.15655517578125, -0.986572265625, -0.81658935546875, -0.6466064453125, -0.47662353515625, -0.306640625, -0.13665771484375, 0.0333251953125, 0.20330810546875, 0.373291015625, 0.54327392578125, 0.7132568359375, 0.88323974609375, 1.05322265625, 1.22320556640625, 1.3931884765625, 1.56317138671875, 1.733154296875, 1.90313720703125, 2.0731201171875, 2.24310302734375, 2.4130859375, 2.58306884765625, 2.7530517578125, 2.92303466796875, 3.093017578125, 3.26300048828125, 3.4329833984375, 3.60296630859375, 3.77294921875, 3.94293212890625, 4.1129150390625, 4.28289794921875, 4.452880859375, 4.62286376953125, 4.7928466796875, 4.96282958984375, 5.1328125]}, "gradients/encoder.encoder.layers.2.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 2.0, 1.0, 4.0, 1.0, 3.0, 5.0, 2.0, 7.0, 3.0, 6.0, 7.0, 11.0, 11.0, 12.0, 12.0, 17.0, 16.0, 18.0, 19.0, 25.0, 37.0, 33.0, 37.0, 37.0, 47.0, 46.0, 31.0, 57.0, 55.0, 49.0, 37.0, 33.0, 29.0, 29.0, 37.0, 35.0, 20.0, 25.0, 21.0, 25.0, 19.0, 14.0, 12.0, 13.0, 8.0, 10.0, 7.0, 13.0, 2.0, 3.0, 4.0, 4.0, 1.0, 3.0, 2.0, 0.0, 2.0, 1.0, 1.0, 1.0], "bins": [-3.619140625, -3.50701904296875, -3.3948974609375, -3.28277587890625, -3.170654296875, -3.05853271484375, -2.9464111328125, -2.83428955078125, -2.72216796875, -2.61004638671875, -2.4979248046875, -2.38580322265625, -2.273681640625, -2.16156005859375, -2.0494384765625, -1.93731689453125, -1.8251953125, -1.71307373046875, -1.6009521484375, -1.48883056640625, -1.376708984375, -1.26458740234375, -1.1524658203125, -1.04034423828125, -0.92822265625, -0.81610107421875, -0.7039794921875, -0.59185791015625, -0.479736328125, -0.36761474609375, -0.2554931640625, -0.14337158203125, -0.03125, 0.08087158203125, 0.1929931640625, 0.30511474609375, 0.417236328125, 0.52935791015625, 0.6414794921875, 0.75360107421875, 0.86572265625, 0.97784423828125, 1.0899658203125, 1.20208740234375, 1.314208984375, 1.42633056640625, 1.5384521484375, 1.65057373046875, 1.7626953125, 1.87481689453125, 1.9869384765625, 2.09906005859375, 2.211181640625, 2.32330322265625, 2.4354248046875, 2.54754638671875, 2.65966796875, 2.77178955078125, 2.8839111328125, 2.99603271484375, 3.108154296875, 3.22027587890625, 3.3323974609375, 3.44451904296875, 3.556640625]}, "gradients/encoder.encoder.layers.2.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 9.0, 6.0, 8.0, 12.0, 18.0, 25.0, 32.0, 63.0, 95.0, 127.0, 269.0, 582.0, 1721.0, 9652.0, 145235.0, 3883283.0, 141019.0, 9212.0, 1669.0, 565.0, 276.0, 146.0, 79.0, 46.0, 36.0, 34.0, 13.0, 13.0, 10.0, 10.0, 4.0, 5.0, 1.0, 4.0, 5.0, 0.0, 4.0, 1.0, 2.0, 0.0, 3.0, 0.0, 0.0, 1.0], "bins": [-18.125, -17.6143798828125, -17.103759765625, -16.5931396484375, -16.08251953125, -15.5718994140625, -15.061279296875, -14.5506591796875, -14.0400390625, -13.5294189453125, -13.018798828125, -12.5081787109375, -11.99755859375, -11.4869384765625, -10.976318359375, -10.4656982421875, -9.955078125, -9.4444580078125, -8.933837890625, -8.4232177734375, -7.91259765625, -7.4019775390625, -6.891357421875, -6.3807373046875, -5.8701171875, -5.3594970703125, -4.848876953125, -4.3382568359375, -3.82763671875, -3.3170166015625, -2.806396484375, -2.2957763671875, -1.78515625, -1.2745361328125, -0.763916015625, -0.2532958984375, 0.25732421875, 0.7679443359375, 1.278564453125, 1.7891845703125, 2.2998046875, 2.8104248046875, 3.321044921875, 3.8316650390625, 4.34228515625, 4.8529052734375, 5.363525390625, 5.8741455078125, 6.384765625, 6.8953857421875, 7.406005859375, 7.9166259765625, 8.42724609375, 8.9378662109375, 9.448486328125, 9.9591064453125, 10.4697265625, 10.9803466796875, 11.490966796875, 12.0015869140625, 12.51220703125, 13.0228271484375, 13.533447265625, 14.0440673828125, 14.5546875]}, "gradients/encoder.encoder.layers.2.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 2.0, 0.0, 2.0, 2.0, 7.0, 11.0, 11.0, 30.0, 43.0, 67.0, 93.0, 210.0, 493.0, 867.0, 1074.0, 591.0, 256.0, 144.0, 64.0, 35.0, 28.0, 19.0, 19.0, 7.0, 5.0, 2.0, 2.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-15.046875, -14.6060791015625, -14.165283203125, -13.7244873046875, -13.28369140625, -12.8428955078125, -12.402099609375, -11.9613037109375, -11.5205078125, -11.0797119140625, -10.638916015625, -10.1981201171875, -9.75732421875, -9.3165283203125, -8.875732421875, -8.4349365234375, -7.994140625, -7.5533447265625, -7.112548828125, -6.6717529296875, -6.23095703125, -5.7901611328125, -5.349365234375, -4.9085693359375, -4.4677734375, -4.0269775390625, -3.586181640625, -3.1453857421875, -2.70458984375, -2.2637939453125, -1.822998046875, -1.3822021484375, -0.94140625, -0.5006103515625, -0.059814453125, 0.3809814453125, 0.82177734375, 1.2625732421875, 1.703369140625, 2.1441650390625, 2.5849609375, 3.0257568359375, 3.466552734375, 3.9073486328125, 4.34814453125, 4.7889404296875, 5.229736328125, 5.6705322265625, 6.111328125, 6.5521240234375, 6.992919921875, 7.4337158203125, 7.87451171875, 8.3153076171875, 8.756103515625, 9.1968994140625, 9.6376953125, 10.0784912109375, 10.519287109375, 10.9600830078125, 11.40087890625, 11.8416748046875, 12.282470703125, 12.7232666015625, 13.1640625]}, "gradients/encoder.encoder.layers.2.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 3.0, 3.0, 3.0, 3.0, 10.0, 18.0, 61.0, 185.0, 351.0, 268.0, 74.0, 21.0, 8.0, 4.0, 4.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-114.16326141357422, -108.63911437988281, -103.11497497558594, -97.59082794189453, -92.06668853759766, -86.54254150390625, -81.01840209960938, -75.49425506591797, -69.97010803222656, -64.44596099853516, -58.92182159423828, -53.397674560546875, -47.87353515625, -42.349388122558594, -36.82524490356445, -31.301101684570312, -25.776962280273438, -20.252819061279297, -14.72867488861084, -9.204530715942383, -3.680387496948242, 1.8437557220458984, 7.367900848388672, 12.892044067382812, 18.416187286376953, 23.940330505371094, 29.464473724365234, 34.988616943359375, 40.51276397705078, 46.036903381347656, 51.56105041503906, 57.0851936340332, 62.609344482421875, 68.13349151611328, 73.65763092041016, 79.18177795410156, 84.70591735839844, 90.23006439208984, 95.75421142578125, 101.27835083007812, 106.802490234375, 112.3266372680664, 117.85077667236328, 123.37492370605469, 128.89906311035156, 134.42320251464844, 139.94735717773438, 145.47149658203125, 150.99563598632812, 156.519775390625, 162.04393005371094, 167.5680694580078, 173.0922088623047, 178.61634826660156, 184.1405029296875, 189.66464233398438, 195.1887969970703, 200.7129364013672, 206.23709106445312, 211.76123046875, 217.28536987304688, 222.80950927734375, 228.3336639404297, 233.85780334472656, 239.38194274902344]}, "gradients/encoder.encoder.layers.2.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 4.0, 1.0, 3.0, 2.0, 5.0, 5.0, 9.0, 9.0, 8.0, 20.0, 24.0, 26.0, 33.0, 35.0, 38.0, 45.0, 44.0, 49.0, 67.0, 72.0, 69.0, 61.0, 60.0, 40.0, 48.0, 42.0, 50.0, 32.0, 21.0, 20.0, 12.0, 16.0, 9.0, 8.0, 4.0, 7.0, 4.0, 4.0, 2.0, 3.0, 3.0, 1.0, 0.0, 1.0, 0.0, 1.0, 2.0], "bins": [-61.8324089050293, -60.15514373779297, -58.47787857055664, -56.80061340332031, -55.123348236083984, -53.446083068847656, -51.76881408691406, -50.091552734375, -48.414283752441406, -46.73701858520508, -45.05975341796875, -43.38248825073242, -41.705223083496094, -40.027957916259766, -38.35069274902344, -36.673423767089844, -34.99616241455078, -33.31889724731445, -31.641632080078125, -29.964366912841797, -28.28710174560547, -26.60983657836914, -24.93256950378418, -23.25530433654785, -21.578039169311523, -19.900774002075195, -18.223508834838867, -16.546241760253906, -14.868977546691895, -13.191712379455566, -11.514446258544922, -9.837181091308594, -8.159915924072266, -6.4826507568359375, -4.805385112762451, -3.128119468688965, -1.4508543014526367, 0.2264108657836914, 1.903676986694336, 3.580942153930664, 5.258207321166992, 6.93547248840332, 8.612737655639648, 10.290003776550293, 11.967268943786621, 13.64453411102295, 15.321800231933594, 16.999065399169922, 18.67633056640625, 20.353595733642578, 22.030860900878906, 23.708126068115234, 25.385391235351562, 27.06265640258789, 28.73992347717285, 30.41718864440918, 32.094451904296875, 33.7717170715332, 35.44898223876953, 37.12624740600586, 38.80351257324219, 40.480777740478516, 42.158042907714844, 43.83531188964844, 45.512577056884766]}, "gradients/encoder.encoder.layers.2.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 5.0, 7.0, 6.0, 8.0, 7.0, 9.0, 14.0, 16.0, 20.0, 39.0, 51.0, 72.0, 111.0, 179.0, 269.0, 477.0, 840.0, 1564.0, 3037.0, 6077.0, 12965.0, 30549.0, 74865.0, 194597.0, 355707.0, 219456.0, 85099.0, 34138.0, 14578.0, 6674.0, 3191.0, 1663.0, 848.0, 543.0, 290.0, 194.0, 123.0, 73.0, 58.0, 40.0, 33.0, 19.0, 14.0, 8.0, 6.0, 5.0, 5.0, 7.0, 5.0, 3.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0], "bins": [-6.40625, -6.20953369140625, -6.0128173828125, -5.81610107421875, -5.619384765625, -5.42266845703125, -5.2259521484375, -5.02923583984375, -4.83251953125, -4.63580322265625, -4.4390869140625, -4.24237060546875, -4.045654296875, -3.84893798828125, -3.6522216796875, -3.45550537109375, -3.2587890625, -3.06207275390625, -2.8653564453125, -2.66864013671875, -2.471923828125, -2.27520751953125, -2.0784912109375, -1.88177490234375, -1.68505859375, -1.48834228515625, -1.2916259765625, -1.09490966796875, -0.898193359375, -0.70147705078125, -0.5047607421875, -0.30804443359375, -0.111328125, 0.08538818359375, 0.2821044921875, 0.47882080078125, 0.675537109375, 0.87225341796875, 1.0689697265625, 1.26568603515625, 1.46240234375, 1.65911865234375, 1.8558349609375, 2.05255126953125, 2.249267578125, 2.44598388671875, 2.6427001953125, 2.83941650390625, 3.0361328125, 3.23284912109375, 3.4295654296875, 3.62628173828125, 3.822998046875, 4.01971435546875, 4.2164306640625, 4.41314697265625, 4.60986328125, 4.80657958984375, 5.0032958984375, 5.20001220703125, 5.396728515625, 5.59344482421875, 5.7901611328125, 5.98687744140625, 6.18359375]}, "gradients/encoder.encoder.layers.2.attention.out_proj.bias": {"_type": "histogram", "values": [3.0, 1.0, 0.0, 0.0, 5.0, 7.0, 3.0, 3.0, 2.0, 5.0, 3.0, 6.0, 11.0, 9.0, 12.0, 6.0, 14.0, 14.0, 13.0, 25.0, 34.0, 26.0, 32.0, 37.0, 37.0, 40.0, 49.0, 51.0, 46.0, 51.0, 48.0, 32.0, 45.0, 38.0, 30.0, 31.0, 29.0, 28.0, 28.0, 26.0, 15.0, 25.0, 20.0, 5.0, 17.0, 9.0, 9.0, 9.0, 10.0, 4.0, 4.0, 6.0, 0.0, 1.0, 3.0, 1.0, 1.0, 0.0, 3.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.376953125, -3.26318359375, -3.1494140625, -3.03564453125, -2.921875, -2.80810546875, -2.6943359375, -2.58056640625, -2.466796875, -2.35302734375, -2.2392578125, -2.12548828125, -2.01171875, -1.89794921875, -1.7841796875, -1.67041015625, -1.556640625, -1.44287109375, -1.3291015625, -1.21533203125, -1.1015625, -0.98779296875, -0.8740234375, -0.76025390625, -0.646484375, -0.53271484375, -0.4189453125, -0.30517578125, -0.19140625, -0.07763671875, 0.0361328125, 0.14990234375, 0.263671875, 0.37744140625, 0.4912109375, 0.60498046875, 0.71875, 0.83251953125, 0.9462890625, 1.06005859375, 1.173828125, 1.28759765625, 1.4013671875, 1.51513671875, 1.62890625, 1.74267578125, 1.8564453125, 1.97021484375, 2.083984375, 2.19775390625, 2.3115234375, 2.42529296875, 2.5390625, 2.65283203125, 2.7666015625, 2.88037109375, 2.994140625, 3.10791015625, 3.2216796875, 3.33544921875, 3.44921875, 3.56298828125, 3.6767578125, 3.79052734375, 3.904296875]}, "gradients/encoder.encoder.layers.2.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 3.0, 1.0, 3.0, 2.0, 3.0, 8.0, 7.0, 8.0, 13.0, 19.0, 30.0, 51.0, 68.0, 116.0, 146.0, 265.0, 465.0, 923.0, 1978.0, 5253.0, 19303.0, 109912.0, 734054.0, 142329.0, 23070.0, 6025.0, 2225.0, 972.0, 509.0, 289.0, 162.0, 111.0, 72.0, 53.0, 29.0, 25.0, 21.0, 10.0, 11.0, 9.0, 4.0, 1.0, 4.0, 2.0, 0.0, 1.0, 4.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-12.8984375, -12.5023193359375, -12.106201171875, -11.7100830078125, -11.31396484375, -10.9178466796875, -10.521728515625, -10.1256103515625, -9.7294921875, -9.3333740234375, -8.937255859375, -8.5411376953125, -8.14501953125, -7.7489013671875, -7.352783203125, -6.9566650390625, -6.560546875, -6.1644287109375, -5.768310546875, -5.3721923828125, -4.97607421875, -4.5799560546875, -4.183837890625, -3.7877197265625, -3.3916015625, -2.9954833984375, -2.599365234375, -2.2032470703125, -1.80712890625, -1.4110107421875, -1.014892578125, -0.6187744140625, -0.22265625, 0.1734619140625, 0.569580078125, 0.9656982421875, 1.36181640625, 1.7579345703125, 2.154052734375, 2.5501708984375, 2.9462890625, 3.3424072265625, 3.738525390625, 4.1346435546875, 4.53076171875, 4.9268798828125, 5.322998046875, 5.7191162109375, 6.115234375, 6.5113525390625, 6.907470703125, 7.3035888671875, 7.69970703125, 8.0958251953125, 8.491943359375, 8.8880615234375, 9.2841796875, 9.6802978515625, 10.076416015625, 10.4725341796875, 10.86865234375, 11.2647705078125, 11.660888671875, 12.0570068359375, 12.453125]}, "gradients/encoder.encoder.layers.2.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 2.0, 2.0, 0.0, 3.0, 0.0, 1.0, 1.0, 4.0, 6.0, 4.0, 6.0, 9.0, 9.0, 15.0, 23.0, 22.0, 32.0, 23.0, 37.0, 47.0, 67.0, 59.0, 75.0, 60.0, 77.0, 60.0, 55.0, 55.0, 49.0, 42.0, 36.0, 34.0, 19.0, 14.0, 19.0, 9.0, 10.0, 5.0, 6.0, 5.0, 5.0, 2.0, 0.0, 2.0, 2.0, 2.0, 3.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-14.90625, -14.31201171875, -13.7177734375, -13.12353515625, -12.529296875, -11.93505859375, -11.3408203125, -10.74658203125, -10.15234375, -9.55810546875, -8.9638671875, -8.36962890625, -7.775390625, -7.18115234375, -6.5869140625, -5.99267578125, -5.3984375, -4.80419921875, -4.2099609375, -3.61572265625, -3.021484375, -2.42724609375, -1.8330078125, -1.23876953125, -0.64453125, -0.05029296875, 0.5439453125, 1.13818359375, 1.732421875, 2.32666015625, 2.9208984375, 3.51513671875, 4.109375, 4.70361328125, 5.2978515625, 5.89208984375, 6.486328125, 7.08056640625, 7.6748046875, 8.26904296875, 8.86328125, 9.45751953125, 10.0517578125, 10.64599609375, 11.240234375, 11.83447265625, 12.4287109375, 13.02294921875, 13.6171875, 14.21142578125, 14.8056640625, 15.39990234375, 15.994140625, 16.58837890625, 17.1826171875, 17.77685546875, 18.37109375, 18.96533203125, 19.5595703125, 20.15380859375, 20.748046875, 21.34228515625, 21.9365234375, 22.53076171875, 23.125]}, "gradients/encoder.encoder.layers.2.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 3.0, 0.0, 1.0, 2.0, 6.0, 6.0, 8.0, 7.0, 12.0, 15.0, 29.0, 25.0, 56.0, 51.0, 102.0, 137.0, 206.0, 315.0, 547.0, 1016.0, 2062.0, 4618.0, 11028.0, 30268.0, 100715.0, 609571.0, 204253.0, 52778.0, 17702.0, 6729.0, 2931.0, 1376.0, 744.0, 473.0, 256.0, 182.0, 89.0, 82.0, 45.0, 31.0, 26.0, 14.0, 11.0, 11.0, 7.0, 4.0, 3.0, 2.0, 1.0, 7.0, 2.0, 0.0, 2.0, 3.0, 1.0, 1.0], "bins": [-2.876953125, -2.79168701171875, -2.7064208984375, -2.62115478515625, -2.535888671875, -2.45062255859375, -2.3653564453125, -2.28009033203125, -2.19482421875, -2.10955810546875, -2.0242919921875, -1.93902587890625, -1.853759765625, -1.76849365234375, -1.6832275390625, -1.59796142578125, -1.5126953125, -1.42742919921875, -1.3421630859375, -1.25689697265625, -1.171630859375, -1.08636474609375, -1.0010986328125, -0.91583251953125, -0.83056640625, -0.74530029296875, -0.6600341796875, -0.57476806640625, -0.489501953125, -0.40423583984375, -0.3189697265625, -0.23370361328125, -0.1484375, -0.06317138671875, 0.0220947265625, 0.10736083984375, 0.192626953125, 0.27789306640625, 0.3631591796875, 0.44842529296875, 0.53369140625, 0.61895751953125, 0.7042236328125, 0.78948974609375, 0.874755859375, 0.96002197265625, 1.0452880859375, 1.13055419921875, 1.2158203125, 1.30108642578125, 1.3863525390625, 1.47161865234375, 1.556884765625, 1.64215087890625, 1.7274169921875, 1.81268310546875, 1.89794921875, 1.98321533203125, 2.0684814453125, 2.15374755859375, 2.239013671875, 2.32427978515625, 2.4095458984375, 2.49481201171875, 2.580078125]}, "gradients/encoder.encoder.layers.2.attention.k_proj.bias": {"_type": "histogram", "values": [2.0, 2.0, 1.0, 1.0, 0.0, 2.0, 0.0, 2.0, 3.0, 2.0, 2.0, 1.0, 3.0, 5.0, 1.0, 9.0, 6.0, 4.0, 7.0, 8.0, 12.0, 17.0, 28.0, 24.0, 27.0, 30.0, 29.0, 45.0, 49.0, 87.0, 100.0, 112.0, 90.0, 52.0, 52.0, 34.0, 30.0, 21.0, 20.0, 16.0, 16.0, 10.0, 8.0, 4.0, 11.0, 3.0, 6.0, 5.0, 5.0, 1.0, 5.0, 3.0, 2.0, 2.0, 1.0, 0.0, 1.0, 2.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0], "bins": [-0.00098419189453125, -0.0009525120258331299, -0.0009208321571350098, -0.0008891522884368896, -0.0008574724197387695, -0.0008257925510406494, -0.0007941126823425293, -0.0007624328136444092, -0.0007307529449462891, -0.0006990730762481689, -0.0006673932075500488, -0.0006357133388519287, -0.0006040334701538086, -0.0005723536014556885, -0.0005406737327575684, -0.0005089938640594482, -0.0004773139953613281, -0.000445634126663208, -0.0004139542579650879, -0.0003822743892669678, -0.00035059452056884766, -0.00031891465187072754, -0.0002872347831726074, -0.0002555549144744873, -0.0002238750457763672, -0.00019219517707824707, -0.00016051530838012695, -0.00012883543968200684, -9.715557098388672e-05, -6.54757022857666e-05, -3.3795833587646484e-05, -2.115964889526367e-06, 2.956390380859375e-05, 6.124377250671387e-05, 9.292364120483398e-05, 0.0001246035099029541, 0.00015628337860107422, 0.00018796324729919434, 0.00021964311599731445, 0.00025132298469543457, 0.0002830028533935547, 0.0003146827220916748, 0.0003463625907897949, 0.00037804245948791504, 0.00040972232818603516, 0.0004414021968841553, 0.0004730820655822754, 0.0005047619342803955, 0.0005364418029785156, 0.0005681216716766357, 0.0005998015403747559, 0.000631481409072876, 0.0006631612777709961, 0.0006948411464691162, 0.0007265210151672363, 0.0007582008838653564, 0.0007898807525634766, 0.0008215606212615967, 0.0008532404899597168, 0.0008849203586578369, 0.000916600227355957, 0.0009482800960540771, 0.0009799599647521973, 0.0010116398334503174, 0.0010433197021484375]}, "gradients/encoder.encoder.layers.2.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 2.0, 3.0, 3.0, 11.0, 10.0, 20.0, 28.0, 36.0, 44.0, 76.0, 125.0, 169.0, 267.0, 505.0, 866.0, 1683.0, 3870.0, 9939.0, 31401.0, 127411.0, 664482.0, 151795.0, 35965.0, 11319.0, 4283.0, 1940.0, 944.0, 544.0, 282.0, 219.0, 101.0, 88.0, 48.0, 28.0, 18.0, 15.0, 8.0, 2.0, 4.0, 4.0, 3.0, 2.0, 2.0, 3.0, 0.0, 0.0, 2.0], "bins": [-3.47265625, -3.38018798828125, -3.2877197265625, -3.19525146484375, -3.102783203125, -3.01031494140625, -2.9178466796875, -2.82537841796875, -2.73291015625, -2.64044189453125, -2.5479736328125, -2.45550537109375, -2.363037109375, -2.27056884765625, -2.1781005859375, -2.08563232421875, -1.9931640625, -1.90069580078125, -1.8082275390625, -1.71575927734375, -1.623291015625, -1.53082275390625, -1.4383544921875, -1.34588623046875, -1.25341796875, -1.16094970703125, -1.0684814453125, -0.97601318359375, -0.883544921875, -0.79107666015625, -0.6986083984375, -0.60614013671875, -0.513671875, -0.42120361328125, -0.3287353515625, -0.23626708984375, -0.143798828125, -0.05133056640625, 0.0411376953125, 0.13360595703125, 0.22607421875, 0.31854248046875, 0.4110107421875, 0.50347900390625, 0.595947265625, 0.68841552734375, 0.7808837890625, 0.87335205078125, 0.9658203125, 1.05828857421875, 1.1507568359375, 1.24322509765625, 1.335693359375, 1.42816162109375, 1.5206298828125, 1.61309814453125, 1.70556640625, 1.79803466796875, 1.8905029296875, 1.98297119140625, 2.075439453125, 2.16790771484375, 2.2603759765625, 2.35284423828125, 2.4453125]}, "gradients/encoder.encoder.layers.2.attention.q_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 2.0, 1.0, 2.0, 3.0, 4.0, 2.0, 2.0, 3.0, 2.0, 3.0, 9.0, 8.0, 9.0, 11.0, 14.0, 24.0, 20.0, 37.0, 40.0, 48.0, 63.0, 67.0, 91.0, 92.0, 87.0, 55.0, 82.0, 40.0, 32.0, 29.0, 23.0, 17.0, 19.0, 15.0, 14.0, 7.0, 8.0, 9.0, 4.0, 5.0, 5.0, 2.0, 1.0, 2.0, 3.0, 0.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.6953125, -2.593475341796875, -2.49163818359375, -2.389801025390625, -2.2879638671875, -2.186126708984375, -2.08428955078125, -1.982452392578125, -1.880615234375, -1.778778076171875, -1.67694091796875, -1.575103759765625, -1.4732666015625, -1.371429443359375, -1.26959228515625, -1.167755126953125, -1.06591796875, -0.964080810546875, -0.86224365234375, -0.760406494140625, -0.6585693359375, -0.556732177734375, -0.45489501953125, -0.353057861328125, -0.251220703125, -0.149383544921875, -0.04754638671875, 0.054290771484375, 0.1561279296875, 0.257965087890625, 0.35980224609375, 0.461639404296875, 0.5634765625, 0.665313720703125, 0.76715087890625, 0.868988037109375, 0.9708251953125, 1.072662353515625, 1.17449951171875, 1.276336669921875, 1.378173828125, 1.480010986328125, 1.58184814453125, 1.683685302734375, 1.7855224609375, 1.887359619140625, 1.98919677734375, 2.091033935546875, 2.19287109375, 2.294708251953125, 2.39654541015625, 2.498382568359375, 2.6002197265625, 2.702056884765625, 2.80389404296875, 2.905731201171875, 3.007568359375, 3.109405517578125, 3.21124267578125, 3.313079833984375, 3.4149169921875, 3.516754150390625, 3.61859130859375, 3.720428466796875, 3.822265625]}, "gradients/encoder.encoder.layers.2.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 4.0, 2.0, 3.0, 6.0, 28.0, 58.0, 97.0, 257.0, 333.0, 126.0, 48.0, 24.0, 11.0, 7.0, 2.0, 3.0, 1.0, 2.0, 1.0, 0.0, 1.0, 2.0, 1.0], "bins": [-198.7899169921875, -194.7440643310547, -190.69821166992188, -186.65234375, -182.6064910888672, -178.56063842773438, -174.51478576660156, -170.46893310546875, -166.42306518554688, -162.37721252441406, -158.33135986328125, -154.28549194335938, -150.23963928222656, -146.19378662109375, -142.14793395996094, -138.10208129882812, -134.0562286376953, -130.0103759765625, -125.96451568603516, -121.91866302490234, -117.872802734375, -113.82695007324219, -109.78109741210938, -105.73524475097656, -101.68938446044922, -97.6435317993164, -93.59767150878906, -89.55181884765625, -85.50596618652344, -81.4601058959961, -77.41425323486328, -73.36839294433594, -69.3225326538086, -65.27667999267578, -61.23081970214844, -57.184967041015625, -53.13911056518555, -49.09325408935547, -45.047401428222656, -41.00154495239258, -36.9556884765625, -32.90983200073242, -28.863977432250977, -24.81812286376953, -20.772266387939453, -16.726409912109375, -12.68055534362793, -8.634700775146484, -4.588844299316406, -0.5429887771606445, 3.502866744995117, 7.548722267150879, 11.59457778930664, 15.640434265136719, 19.686288833618164, 23.73214340209961, 27.777999877929688, 31.823856353759766, 35.869712829589844, 39.915565490722656, 43.961421966552734, 48.00727844238281, 52.053131103515625, 56.0989875793457, 60.14484405517578]}, "gradients/encoder.encoder.layers.2.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 2.0, 2.0, 2.0, 3.0, 6.0, 3.0, 4.0, 8.0, 9.0, 11.0, 11.0, 13.0, 11.0, 9.0, 13.0, 31.0, 17.0, 25.0, 23.0, 27.0, 40.0, 51.0, 57.0, 98.0, 99.0, 74.0, 56.0, 28.0, 33.0, 35.0, 32.0, 25.0, 27.0, 14.0, 19.0, 12.0, 13.0, 9.0, 16.0, 10.0, 7.0, 8.0, 7.0, 5.0, 2.0, 1.0, 4.0, 2.0, 1.0, 2.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-50.53459548950195, -48.74828338623047, -46.96196746826172, -45.175655364990234, -43.38934326171875, -41.60302734375, -39.816715240478516, -38.03040313720703, -36.24408721923828, -34.4577751159668, -32.67145919799805, -30.885147094726562, -29.098833084106445, -27.312519073486328, -25.526206970214844, -23.739892959594727, -21.953580856323242, -20.167266845703125, -18.38095474243164, -16.594640731811523, -14.808326721191406, -13.022013664245605, -11.235700607299805, -9.449386596679688, -7.663073539733887, -5.876760005950928, -4.090446472167969, -2.304133415222168, -0.517819881439209, 1.26849365234375, 3.054806709289551, 4.841120719909668, 6.627433776855469, 8.41374683380127, 10.200060844421387, 11.986373901367188, 13.772687911987305, 15.559000968933105, 17.345314025878906, 19.131628036499023, 20.91794204711914, 22.704256057739258, 24.490568161010742, 26.27688217163086, 28.063196182250977, 29.849510192871094, 31.635822296142578, 33.42213439941406, 35.20845031738281, 36.9947624206543, 38.78107833862305, 40.56739044189453, 42.353702545166016, 44.140018463134766, 45.92633056640625, 47.712646484375, 49.49895477294922, 51.2852668762207, 53.07158279418945, 54.85789489746094, 56.64420700073242, 58.43052291870117, 60.216835021972656, 62.003150939941406, 63.78946304321289]}, "gradients/encoder.encoder.layers.1.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 1.0, 1.0, 5.0, 4.0, 6.0, 12.0, 13.0, 15.0, 35.0, 49.0, 81.0, 121.0, 162.0, 240.0, 349.0, 530.0, 851.0, 1522.0, 2719.0, 5261.0, 10573.0, 27429.0, 91223.0, 398150.0, 1616941.0, 1538567.0, 365669.0, 85613.0, 26222.0, 10494.0, 4956.0, 2630.0, 1473.0, 830.0, 566.0, 313.0, 222.0, 146.0, 103.0, 69.0, 44.0, 34.0, 15.0, 12.0, 7.0, 5.0, 6.0, 2.0, 2.0, 0.0, 1.0, 3.0, 1.0], "bins": [-4.87890625, -4.7432861328125, -4.607666015625, -4.4720458984375, -4.33642578125, -4.2008056640625, -4.065185546875, -3.9295654296875, -3.7939453125, -3.6583251953125, -3.522705078125, -3.3870849609375, -3.25146484375, -3.1158447265625, -2.980224609375, -2.8446044921875, -2.708984375, -2.5733642578125, -2.437744140625, -2.3021240234375, -2.16650390625, -2.0308837890625, -1.895263671875, -1.7596435546875, -1.6240234375, -1.4884033203125, -1.352783203125, -1.2171630859375, -1.08154296875, -0.9459228515625, -0.810302734375, -0.6746826171875, -0.5390625, -0.4034423828125, -0.267822265625, -0.1322021484375, 0.00341796875, 0.1390380859375, 0.274658203125, 0.4102783203125, 0.5458984375, 0.6815185546875, 0.817138671875, 0.9527587890625, 1.08837890625, 1.2239990234375, 1.359619140625, 1.4952392578125, 1.630859375, 1.7664794921875, 1.902099609375, 2.0377197265625, 2.17333984375, 2.3089599609375, 2.444580078125, 2.5802001953125, 2.7158203125, 2.8514404296875, 2.987060546875, 3.1226806640625, 3.25830078125, 3.3939208984375, 3.529541015625, 3.6651611328125, 3.80078125]}, "gradients/encoder.encoder.layers.1.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 4.0, 2.0, 6.0, 6.0, 2.0, 6.0, 5.0, 11.0, 9.0, 12.0, 13.0, 10.0, 15.0, 18.0, 27.0, 26.0, 31.0, 28.0, 36.0, 52.0, 58.0, 54.0, 56.0, 47.0, 49.0, 40.0, 45.0, 27.0, 39.0, 28.0, 28.0, 35.0, 37.0, 19.0, 26.0, 26.0, 15.0, 12.0, 12.0, 12.0, 8.0, 8.0, 2.0, 2.0, 5.0, 2.0, 2.0, 2.0, 0.0, 0.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-3.875, -3.7535400390625, -3.632080078125, -3.5106201171875, -3.38916015625, -3.2677001953125, -3.146240234375, -3.0247802734375, -2.9033203125, -2.7818603515625, -2.660400390625, -2.5389404296875, -2.41748046875, -2.2960205078125, -2.174560546875, -2.0531005859375, -1.931640625, -1.8101806640625, -1.688720703125, -1.5672607421875, -1.44580078125, -1.3243408203125, -1.202880859375, -1.0814208984375, -0.9599609375, -0.8385009765625, -0.717041015625, -0.5955810546875, -0.47412109375, -0.3526611328125, -0.231201171875, -0.1097412109375, 0.01171875, 0.1331787109375, 0.254638671875, 0.3760986328125, 0.49755859375, 0.6190185546875, 0.740478515625, 0.8619384765625, 0.9833984375, 1.1048583984375, 1.226318359375, 1.3477783203125, 1.46923828125, 1.5906982421875, 1.712158203125, 1.8336181640625, 1.955078125, 2.0765380859375, 2.197998046875, 2.3194580078125, 2.44091796875, 2.5623779296875, 2.683837890625, 2.8052978515625, 2.9267578125, 3.0482177734375, 3.169677734375, 3.2911376953125, 3.41259765625, 3.5340576171875, 3.655517578125, 3.7769775390625, 3.8984375]}, "gradients/encoder.encoder.layers.1.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 3.0, 4.0, 6.0, 7.0, 8.0, 6.0, 11.0, 13.0, 19.0, 23.0, 36.0, 62.0, 75.0, 115.0, 145.0, 283.0, 593.0, 1507.0, 5542.0, 33799.0, 667842.0, 3376126.0, 92427.0, 11378.0, 2467.0, 881.0, 336.0, 188.0, 107.0, 76.0, 51.0, 40.0, 32.0, 15.0, 18.0, 9.0, 6.0, 12.0, 6.0, 4.0, 2.0, 2.0, 4.0, 3.0, 3.0, 2.0, 1.0, 2.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-11.328125, -10.87890625, -10.4296875, -9.98046875, -9.53125, -9.08203125, -8.6328125, -8.18359375, -7.734375, -7.28515625, -6.8359375, -6.38671875, -5.9375, -5.48828125, -5.0390625, -4.58984375, -4.140625, -3.69140625, -3.2421875, -2.79296875, -2.34375, -1.89453125, -1.4453125, -0.99609375, -0.546875, -0.09765625, 0.3515625, 0.80078125, 1.25, 1.69921875, 2.1484375, 2.59765625, 3.046875, 3.49609375, 3.9453125, 4.39453125, 4.84375, 5.29296875, 5.7421875, 6.19140625, 6.640625, 7.08984375, 7.5390625, 7.98828125, 8.4375, 8.88671875, 9.3359375, 9.78515625, 10.234375, 10.68359375, 11.1328125, 11.58203125, 12.03125, 12.48046875, 12.9296875, 13.37890625, 13.828125, 14.27734375, 14.7265625, 15.17578125, 15.625, 16.07421875, 16.5234375, 16.97265625, 17.421875]}, "gradients/encoder.encoder.layers.1.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 2.0, 0.0, 6.0, 5.0, 2.0, 7.0, 8.0, 10.0, 17.0, 18.0, 30.0, 46.0, 47.0, 70.0, 95.0, 152.0, 276.0, 420.0, 611.0, 765.0, 508.0, 327.0, 226.0, 141.0, 86.0, 63.0, 33.0, 29.0, 13.0, 12.0, 8.0, 15.0, 11.0, 4.0, 5.0, 6.0, 3.0, 2.0, 3.0, 4.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-11.8828125, -11.54638671875, -11.2099609375, -10.87353515625, -10.537109375, -10.20068359375, -9.8642578125, -9.52783203125, -9.19140625, -8.85498046875, -8.5185546875, -8.18212890625, -7.845703125, -7.50927734375, -7.1728515625, -6.83642578125, -6.5, -6.16357421875, -5.8271484375, -5.49072265625, -5.154296875, -4.81787109375, -4.4814453125, -4.14501953125, -3.80859375, -3.47216796875, -3.1357421875, -2.79931640625, -2.462890625, -2.12646484375, -1.7900390625, -1.45361328125, -1.1171875, -0.78076171875, -0.4443359375, -0.10791015625, 0.228515625, 0.56494140625, 0.9013671875, 1.23779296875, 1.57421875, 1.91064453125, 2.2470703125, 2.58349609375, 2.919921875, 3.25634765625, 3.5927734375, 3.92919921875, 4.265625, 4.60205078125, 4.9384765625, 5.27490234375, 5.611328125, 5.94775390625, 6.2841796875, 6.62060546875, 6.95703125, 7.29345703125, 7.6298828125, 7.96630859375, 8.302734375, 8.63916015625, 8.9755859375, 9.31201171875, 9.6484375]}, "gradients/encoder.encoder.layers.1.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 2.0, 6.0, 4.0, 17.0, 31.0, 60.0, 187.0, 315.0, 238.0, 97.0, 26.0, 11.0, 7.0, 4.0, 3.0, 3.0, 1.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-174.51271057128906, -169.1851043701172, -163.85748291015625, -158.52987670898438, -153.2022705078125, -147.87466430664062, -142.54705810546875, -137.2194366455078, -131.89183044433594, -126.56422424316406, -121.23661041259766, -115.90899658203125, -110.58139038085938, -105.2537841796875, -99.9261703491211, -94.59855651855469, -89.27095031738281, -83.94334411621094, -78.61573028564453, -73.28811645507812, -67.96051025390625, -62.63290023803711, -57.30529022216797, -51.97768020629883, -46.65007019042969, -41.32246017456055, -35.994850158691406, -30.667240142822266, -25.339630126953125, -20.012020111083984, -14.684410095214844, -9.356800079345703, -4.0291748046875, 1.2984352111816406, 6.626045227050781, 11.953655242919922, 17.281265258789062, 22.608875274658203, 27.936485290527344, 33.264095306396484, 38.591705322265625, 43.919315338134766, 49.246925354003906, 54.57453536987305, 59.90214538574219, 65.22975158691406, 70.55736541748047, 75.88497924804688, 81.21258544921875, 86.54019165039062, 91.86780548095703, 97.19541931152344, 102.52302551269531, 107.85063171386719, 113.1782455444336, 118.505859375, 123.83346557617188, 129.16107177734375, 134.48867797851562, 139.81629943847656, 145.14390563964844, 150.4715118408203, 155.79913330078125, 161.12673950195312, 166.454345703125]}, "gradients/encoder.encoder.layers.1.final_layer_norm.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 3.0, 1.0, 4.0, 5.0, 2.0, 3.0, 4.0, 9.0, 10.0, 20.0, 4.0, 21.0, 18.0, 25.0, 18.0, 25.0, 19.0, 23.0, 31.0, 45.0, 44.0, 55.0, 51.0, 41.0, 46.0, 44.0, 42.0, 24.0, 42.0, 40.0, 39.0, 35.0, 33.0, 23.0, 17.0, 20.0, 14.0, 13.0, 13.0, 18.0, 18.0, 13.0, 8.0, 8.0, 1.0, 1.0, 10.0, 7.0, 0.0, 1.0, 3.0, 0.0, 2.0, 0.0, 2.0, 0.0, 1.0, 1.0], "bins": [-43.146690368652344, -41.7572135925293, -40.36773681640625, -38.9782600402832, -37.588783264160156, -36.19930648803711, -34.80982971191406, -33.42034912109375, -32.03087615966797, -30.641399383544922, -29.251922607421875, -27.862445831298828, -26.47296905517578, -25.083492279052734, -23.694013595581055, -22.304536819458008, -20.915058135986328, -19.52558135986328, -18.136104583740234, -16.746627807617188, -15.357150077819824, -13.967673301696777, -12.578195571899414, -11.188718795776367, -9.79924201965332, -8.409765243530273, -7.020287990570068, -5.630810737609863, -4.241333961486816, -2.8518571853637695, -1.4623794555664062, -0.07290267944335938, 1.3165779113769531, 2.706054925918579, 4.095531940460205, 5.48500919342041, 6.874485969543457, 8.263962745666504, 9.653440475463867, 11.042917251586914, 12.432394027709961, 13.821870803833008, 15.211347579956055, 16.600826263427734, 17.99030303955078, 19.379779815673828, 20.769256591796875, 22.158733367919922, 23.54821014404297, 24.937686920166016, 26.327163696289062, 27.71664047241211, 29.106117248535156, 30.495594024658203, 31.885072708129883, 33.27455139160156, 34.664024353027344, 36.05350112915039, 37.44297790527344, 38.832454681396484, 40.22193145751953, 41.61140823364258, 43.000885009765625, 44.39036560058594, 45.779842376708984]}, "gradients/encoder.encoder.layers.1.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 0.0, 0.0, 3.0, 0.0, 1.0, 8.0, 5.0, 9.0, 10.0, 18.0, 12.0, 29.0, 52.0, 88.0, 161.0, 236.0, 500.0, 928.0, 1872.0, 3891.0, 8327.0, 19187.0, 48957.0, 139289.0, 380976.0, 287464.0, 95005.0, 34774.0, 14222.0, 6472.0, 2870.0, 1429.0, 791.0, 402.0, 221.0, 134.0, 79.0, 48.0, 37.0, 20.0, 13.0, 10.0, 3.0, 5.0, 6.0, 3.0, 0.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.796875, -6.5526123046875, -6.308349609375, -6.0640869140625, -5.81982421875, -5.5755615234375, -5.331298828125, -5.0870361328125, -4.8427734375, -4.5985107421875, -4.354248046875, -4.1099853515625, -3.86572265625, -3.6214599609375, -3.377197265625, -3.1329345703125, -2.888671875, -2.6444091796875, -2.400146484375, -2.1558837890625, -1.91162109375, -1.6673583984375, -1.423095703125, -1.1788330078125, -0.9345703125, -0.6903076171875, -0.446044921875, -0.2017822265625, 0.04248046875, 0.2867431640625, 0.531005859375, 0.7752685546875, 1.01953125, 1.2637939453125, 1.508056640625, 1.7523193359375, 1.99658203125, 2.2408447265625, 2.485107421875, 2.7293701171875, 2.9736328125, 3.2178955078125, 3.462158203125, 3.7064208984375, 3.95068359375, 4.1949462890625, 4.439208984375, 4.6834716796875, 4.927734375, 5.1719970703125, 5.416259765625, 5.6605224609375, 5.90478515625, 6.1490478515625, 6.393310546875, 6.6375732421875, 6.8818359375, 7.1260986328125, 7.370361328125, 7.6146240234375, 7.85888671875, 8.1031494140625, 8.347412109375, 8.5916748046875, 8.8359375]}, "gradients/encoder.encoder.layers.1.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 1.0, 2.0, 1.0, 5.0, 2.0, 6.0, 9.0, 6.0, 10.0, 12.0, 18.0, 25.0, 23.0, 27.0, 29.0, 40.0, 39.0, 40.0, 43.0, 46.0, 67.0, 66.0, 56.0, 44.0, 47.0, 48.0, 51.0, 35.0, 32.0, 31.0, 23.0, 31.0, 19.0, 17.0, 13.0, 13.0, 10.0, 3.0, 5.0, 6.0, 6.0, 4.0, 1.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.34375, -4.1944580078125, -4.045166015625, -3.8958740234375, -3.74658203125, -3.5972900390625, -3.447998046875, -3.2987060546875, -3.1494140625, -3.0001220703125, -2.850830078125, -2.7015380859375, -2.55224609375, -2.4029541015625, -2.253662109375, -2.1043701171875, -1.955078125, -1.8057861328125, -1.656494140625, -1.5072021484375, -1.35791015625, -1.2086181640625, -1.059326171875, -0.9100341796875, -0.7607421875, -0.6114501953125, -0.462158203125, -0.3128662109375, -0.16357421875, -0.0142822265625, 0.135009765625, 0.2843017578125, 0.43359375, 0.5828857421875, 0.732177734375, 0.8814697265625, 1.03076171875, 1.1800537109375, 1.329345703125, 1.4786376953125, 1.6279296875, 1.7772216796875, 1.926513671875, 2.0758056640625, 2.22509765625, 2.3743896484375, 2.523681640625, 2.6729736328125, 2.822265625, 2.9715576171875, 3.120849609375, 3.2701416015625, 3.41943359375, 3.5687255859375, 3.718017578125, 3.8673095703125, 4.0166015625, 4.1658935546875, 4.315185546875, 4.4644775390625, 4.61376953125, 4.7630615234375, 4.912353515625, 5.0616455078125, 5.2109375]}, "gradients/encoder.encoder.layers.1.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 2.0, 0.0, 3.0, 4.0, 4.0, 3.0, 7.0, 3.0, 12.0, 15.0, 25.0, 33.0, 54.0, 66.0, 97.0, 145.0, 205.0, 353.0, 559.0, 984.0, 2008.0, 5077.0, 18508.0, 124400.0, 785937.0, 87182.0, 14479.0, 4295.0, 1828.0, 873.0, 486.0, 296.0, 191.0, 128.0, 95.0, 62.0, 38.0, 32.0, 28.0, 15.0, 11.0, 5.0, 7.0, 5.0, 4.0, 0.0, 1.0, 1.0, 4.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-14.5390625, -14.090576171875, -13.64208984375, -13.193603515625, -12.7451171875, -12.296630859375, -11.84814453125, -11.399658203125, -10.951171875, -10.502685546875, -10.05419921875, -9.605712890625, -9.1572265625, -8.708740234375, -8.26025390625, -7.811767578125, -7.36328125, -6.914794921875, -6.46630859375, -6.017822265625, -5.5693359375, -5.120849609375, -4.67236328125, -4.223876953125, -3.775390625, -3.326904296875, -2.87841796875, -2.429931640625, -1.9814453125, -1.532958984375, -1.08447265625, -0.635986328125, -0.1875, 0.260986328125, 0.70947265625, 1.157958984375, 1.6064453125, 2.054931640625, 2.50341796875, 2.951904296875, 3.400390625, 3.848876953125, 4.29736328125, 4.745849609375, 5.1943359375, 5.642822265625, 6.09130859375, 6.539794921875, 6.98828125, 7.436767578125, 7.88525390625, 8.333740234375, 8.7822265625, 9.230712890625, 9.67919921875, 10.127685546875, 10.576171875, 11.024658203125, 11.47314453125, 11.921630859375, 12.3701171875, 12.818603515625, 13.26708984375, 13.715576171875, 14.1640625]}, "gradients/encoder.encoder.layers.1.attention.v_proj.bias": {"_type": "histogram", "values": [2.0, 2.0, 2.0, 3.0, 3.0, 2.0, 1.0, 4.0, 4.0, 6.0, 6.0, 6.0, 12.0, 19.0, 8.0, 19.0, 18.0, 20.0, 31.0, 33.0, 40.0, 38.0, 48.0, 48.0, 60.0, 54.0, 63.0, 79.0, 51.0, 52.0, 49.0, 42.0, 22.0, 33.0, 24.0, 25.0, 17.0, 13.0, 9.0, 11.0, 8.0, 10.0, 3.0, 5.0, 6.0, 2.0, 0.0, 3.0, 3.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-15.15625, -14.5703125, -13.984375, -13.3984375, -12.8125, -12.2265625, -11.640625, -11.0546875, -10.46875, -9.8828125, -9.296875, -8.7109375, -8.125, -7.5390625, -6.953125, -6.3671875, -5.78125, -5.1953125, -4.609375, -4.0234375, -3.4375, -2.8515625, -2.265625, -1.6796875, -1.09375, -0.5078125, 0.078125, 0.6640625, 1.25, 1.8359375, 2.421875, 3.0078125, 3.59375, 4.1796875, 4.765625, 5.3515625, 5.9375, 6.5234375, 7.109375, 7.6953125, 8.28125, 8.8671875, 9.453125, 10.0390625, 10.625, 11.2109375, 11.796875, 12.3828125, 12.96875, 13.5546875, 14.140625, 14.7265625, 15.3125, 15.8984375, 16.484375, 17.0703125, 17.65625, 18.2421875, 18.828125, 19.4140625, 20.0, 20.5859375, 21.171875, 21.7578125, 22.34375]}, "gradients/encoder.encoder.layers.1.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 4.0, 0.0, 4.0, 6.0, 2.0, 13.0, 8.0, 20.0, 15.0, 34.0, 28.0, 73.0, 104.0, 218.0, 497.0, 1129.0, 3582.0, 15771.0, 113910.0, 807367.0, 87716.0, 12971.0, 3169.0, 1016.0, 420.0, 183.0, 88.0, 67.0, 52.0, 29.0, 20.0, 15.0, 19.0, 9.0, 2.0, 4.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0], "bins": [-4.7421875, -4.612060546875, -4.48193359375, -4.351806640625, -4.2216796875, -4.091552734375, -3.96142578125, -3.831298828125, -3.701171875, -3.571044921875, -3.44091796875, -3.310791015625, -3.1806640625, -3.050537109375, -2.92041015625, -2.790283203125, -2.66015625, -2.530029296875, -2.39990234375, -2.269775390625, -2.1396484375, -2.009521484375, -1.87939453125, -1.749267578125, -1.619140625, -1.489013671875, -1.35888671875, -1.228759765625, -1.0986328125, -0.968505859375, -0.83837890625, -0.708251953125, -0.578125, -0.447998046875, -0.31787109375, -0.187744140625, -0.0576171875, 0.072509765625, 0.20263671875, 0.332763671875, 0.462890625, 0.593017578125, 0.72314453125, 0.853271484375, 0.9833984375, 1.113525390625, 1.24365234375, 1.373779296875, 1.50390625, 1.634033203125, 1.76416015625, 1.894287109375, 2.0244140625, 2.154541015625, 2.28466796875, 2.414794921875, 2.544921875, 2.675048828125, 2.80517578125, 2.935302734375, 3.0654296875, 3.195556640625, 3.32568359375, 3.455810546875, 3.5859375]}, "gradients/encoder.encoder.layers.1.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 4.0, 3.0, 2.0, 2.0, 3.0, 3.0, 6.0, 7.0, 6.0, 10.0, 9.0, 12.0, 10.0, 21.0, 17.0, 25.0, 28.0, 47.0, 43.0, 71.0, 75.0, 94.0, 111.0, 69.0, 59.0, 57.0, 39.0, 35.0, 30.0, 28.0, 16.0, 10.0, 13.0, 12.0, 11.0, 4.0, 6.0, 5.0, 3.0, 2.0, 3.0, 2.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.0010356903076171875, -0.0010052770376205444, -0.0009748637676239014, -0.0009444504976272583, -0.0009140372276306152, -0.0008836239576339722, -0.0008532106876373291, -0.000822797417640686, -0.000792384147644043, -0.0007619708776473999, -0.0007315576076507568, -0.0007011443376541138, -0.0006707310676574707, -0.0006403177976608276, -0.0006099045276641846, -0.0005794912576675415, -0.0005490779876708984, -0.0005186647176742554, -0.0004882514476776123, -0.00045783817768096924, -0.00042742490768432617, -0.0003970116376876831, -0.00036659836769104004, -0.00033618509769439697, -0.0003057718276977539, -0.00027535855770111084, -0.0002449452877044678, -0.0002145320177078247, -0.00018411874771118164, -0.00015370547771453857, -0.0001232922077178955, -9.287893772125244e-05, -6.246566772460938e-05, -3.205239772796631e-05, -1.6391277313232422e-06, 2.8774142265319824e-05, 5.918741226196289e-05, 8.960068225860596e-05, 0.00012001395225524902, 0.0001504272222518921, 0.00018084049224853516, 0.00021125376224517822, 0.0002416670322418213, 0.00027208030223846436, 0.0003024935722351074, 0.0003329068422317505, 0.00036332011222839355, 0.0003937333822250366, 0.0004241466522216797, 0.00045455992221832275, 0.0004849731922149658, 0.0005153864622116089, 0.000545799732208252, 0.000576213002204895, 0.0006066262722015381, 0.0006370395421981812, 0.0006674528121948242, 0.0006978660821914673, 0.0007282793521881104, 0.0007586926221847534, 0.0007891058921813965, 0.0008195191621780396, 0.0008499324321746826, 0.0008803457021713257, 0.0009107589721679688]}, "gradients/encoder.encoder.layers.1.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 3.0, 1.0, 7.0, 6.0, 11.0, 18.0, 30.0, 41.0, 58.0, 92.0, 133.0, 222.0, 415.0, 741.0, 1455.0, 3340.0, 9011.0, 31772.0, 145853.0, 695635.0, 119325.0, 26576.0, 7804.0, 3023.0, 1343.0, 644.0, 378.0, 233.0, 128.0, 92.0, 46.0, 40.0, 26.0, 21.0, 11.0, 11.0, 5.0, 4.0, 5.0, 2.0, 5.0, 2.0, 0.0, 0.0, 2.0], "bins": [-3.142578125, -3.060821533203125, -2.97906494140625, -2.897308349609375, -2.8155517578125, -2.733795166015625, -2.65203857421875, -2.570281982421875, -2.488525390625, -2.406768798828125, -2.32501220703125, -2.243255615234375, -2.1614990234375, -2.079742431640625, -1.99798583984375, -1.916229248046875, -1.83447265625, -1.752716064453125, -1.67095947265625, -1.589202880859375, -1.5074462890625, -1.425689697265625, -1.34393310546875, -1.262176513671875, -1.180419921875, -1.098663330078125, -1.01690673828125, -0.935150146484375, -0.8533935546875, -0.771636962890625, -0.68988037109375, -0.608123779296875, -0.5263671875, -0.444610595703125, -0.36285400390625, -0.281097412109375, -0.1993408203125, -0.117584228515625, -0.03582763671875, 0.045928955078125, 0.127685546875, 0.209442138671875, 0.29119873046875, 0.372955322265625, 0.4547119140625, 0.536468505859375, 0.61822509765625, 0.699981689453125, 0.78173828125, 0.863494873046875, 0.94525146484375, 1.027008056640625, 1.1087646484375, 1.190521240234375, 1.27227783203125, 1.354034423828125, 1.435791015625, 1.517547607421875, 1.59930419921875, 1.681060791015625, 1.7628173828125, 1.844573974609375, 1.92633056640625, 2.008087158203125, 2.08984375]}, "gradients/encoder.encoder.layers.1.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 2.0, 2.0, 2.0, 1.0, 4.0, 2.0, 2.0, 4.0, 6.0, 9.0, 11.0, 6.0, 8.0, 17.0, 22.0, 32.0, 20.0, 29.0, 29.0, 48.0, 45.0, 52.0, 44.0, 65.0, 57.0, 55.0, 58.0, 55.0, 54.0, 43.0, 30.0, 44.0, 35.0, 27.0, 20.0, 14.0, 6.0, 10.0, 13.0, 7.0, 5.0, 4.0, 3.0, 2.0, 4.0, 6.0, 1.0, 0.0, 2.0, 1.0, 1.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.900390625, -1.8267822265625, -1.753173828125, -1.6795654296875, -1.60595703125, -1.5323486328125, -1.458740234375, -1.3851318359375, -1.3115234375, -1.2379150390625, -1.164306640625, -1.0906982421875, -1.01708984375, -0.9434814453125, -0.869873046875, -0.7962646484375, -0.72265625, -0.6490478515625, -0.575439453125, -0.5018310546875, -0.42822265625, -0.3546142578125, -0.281005859375, -0.2073974609375, -0.1337890625, -0.0601806640625, 0.013427734375, 0.0870361328125, 0.16064453125, 0.2342529296875, 0.307861328125, 0.3814697265625, 0.455078125, 0.5286865234375, 0.602294921875, 0.6759033203125, 0.74951171875, 0.8231201171875, 0.896728515625, 0.9703369140625, 1.0439453125, 1.1175537109375, 1.191162109375, 1.2647705078125, 1.33837890625, 1.4119873046875, 1.485595703125, 1.5592041015625, 1.6328125, 1.7064208984375, 1.780029296875, 1.8536376953125, 1.92724609375, 2.0008544921875, 2.074462890625, 2.1480712890625, 2.2216796875, 2.2952880859375, 2.368896484375, 2.4425048828125, 2.51611328125, 2.5897216796875, 2.663330078125, 2.7369384765625, 2.810546875]}, "gradients/encoder.encoder.layers.1.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 1.0, 1.0, 3.0, 1.0, 1.0, 3.0, 2.0, 2.0, 4.0, 2.0, 9.0, 12.0, 27.0, 81.0, 167.0, 429.0, 141.0, 69.0, 30.0, 21.0, 6.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-166.93116760253906, -162.80755615234375, -158.68392944335938, -154.56031799316406, -150.43670654296875, -146.31309509277344, -142.18948364257812, -138.06585693359375, -133.94224548339844, -129.81863403320312, -125.69501495361328, -121.57139587402344, -117.44778442382812, -113.32417297363281, -109.20055389404297, -105.07693481445312, -100.95332336425781, -96.8297119140625, -92.70609283447266, -88.58247375488281, -84.4588623046875, -80.33525085449219, -76.21163177490234, -72.0880126953125, -67.96440124511719, -63.84078598022461, -59.71717071533203, -55.59355545043945, -51.469940185546875, -47.3463249206543, -43.22270965576172, -39.09909439086914, -34.97547912597656, -30.851863861083984, -26.728248596191406, -22.604633331298828, -18.48101806640625, -14.357402801513672, -10.233787536621094, -6.110172271728516, -1.9865570068359375, 2.1370582580566406, 6.260673522949219, 10.384288787841797, 14.507904052734375, 18.631519317626953, 22.75513458251953, 26.87874984741211, 31.002365112304688, 35.125980377197266, 39.249595642089844, 43.37321090698242, 47.496826171875, 51.62044143676758, 55.744056701660156, 59.867671966552734, 63.99128723144531, 68.11489868164062, 72.23851776123047, 76.36213684082031, 80.48574829101562, 84.60935974121094, 88.73297882080078, 92.85659790039062, 96.98020935058594]}, "gradients/encoder.encoder.layers.1.layer_norm.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0, 3.0, 1.0, 1.0, 4.0, 3.0, 5.0, 9.0, 12.0, 5.0, 9.0, 17.0, 13.0, 15.0, 24.0, 23.0, 14.0, 33.0, 41.0, 42.0, 46.0, 75.0, 148.0, 108.0, 72.0, 46.0, 34.0, 18.0, 30.0, 23.0, 21.0, 28.0, 15.0, 21.0, 14.0, 6.0, 11.0, 5.0, 3.0, 2.0, 3.0, 2.0, 3.0, 1.0, 4.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-67.50314331054688, -65.31613159179688, -63.129127502441406, -60.94211959838867, -58.75511169433594, -56.56809997558594, -54.3810920715332, -52.19408416748047, -50.007076263427734, -47.820068359375, -45.633060455322266, -43.44605255126953, -41.25904083251953, -39.07203674316406, -36.88502502441406, -34.69801712036133, -32.511009216308594, -30.32400131225586, -28.136993408203125, -25.949983596801758, -23.762975692749023, -21.57596778869629, -19.388957977294922, -17.201950073242188, -15.014942169189453, -12.827934265136719, -10.640925407409668, -8.453916549682617, -6.266908645629883, -4.079900741577148, -1.8928918838500977, 0.2941169738769531, 2.4811325073242188, 4.668140888214111, 6.855149269104004, 9.042158126831055, 11.229166030883789, 13.416173934936523, 15.603182792663574, 17.790191650390625, 19.97719955444336, 22.164207458496094, 24.351215362548828, 26.538225173950195, 28.72523307800293, 30.912240982055664, 33.09925079345703, 35.286258697509766, 37.4732666015625, 39.660274505615234, 41.84728240966797, 44.0342903137207, 46.22129821777344, 48.40830993652344, 50.59531784057617, 52.782325744628906, 54.96933364868164, 57.156341552734375, 59.34334945678711, 61.530357360839844, 63.717369079589844, 65.90437316894531, 68.09138488769531, 70.27839660644531, 72.46540069580078]}, "gradients/encoder.encoder.layers.0.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 4.0, 5.0, 12.0, 11.0, 17.0, 31.0, 57.0, 69.0, 88.0, 166.0, 253.0, 356.0, 655.0, 1221.0, 2088.0, 3690.0, 7954.0, 17520.0, 45971.0, 149781.0, 589963.0, 1722368.0, 1189422.0, 322196.0, 86302.0, 29615.0, 12352.0, 5611.0, 2865.0, 1556.0, 817.0, 488.0, 293.0, 177.0, 129.0, 71.0, 46.0, 32.0, 16.0, 10.0, 7.0, 2.0, 3.0, 2.0, 4.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.6640625, -5.473876953125, -5.28369140625, -5.093505859375, -4.9033203125, -4.713134765625, -4.52294921875, -4.332763671875, -4.142578125, -3.952392578125, -3.76220703125, -3.572021484375, -3.3818359375, -3.191650390625, -3.00146484375, -2.811279296875, -2.62109375, -2.430908203125, -2.24072265625, -2.050537109375, -1.8603515625, -1.670166015625, -1.47998046875, -1.289794921875, -1.099609375, -0.909423828125, -0.71923828125, -0.529052734375, -0.3388671875, -0.148681640625, 0.04150390625, 0.231689453125, 0.421875, 0.612060546875, 0.80224609375, 0.992431640625, 1.1826171875, 1.372802734375, 1.56298828125, 1.753173828125, 1.943359375, 2.133544921875, 2.32373046875, 2.513916015625, 2.7041015625, 2.894287109375, 3.08447265625, 3.274658203125, 3.46484375, 3.655029296875, 3.84521484375, 4.035400390625, 4.2255859375, 4.415771484375, 4.60595703125, 4.796142578125, 4.986328125, 5.176513671875, 5.36669921875, 5.556884765625, 5.7470703125, 5.937255859375, 6.12744140625, 6.317626953125, 6.5078125]}, "gradients/encoder.encoder.layers.0.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 1.0, 4.0, 9.0, 2.0, 2.0, 1.0, 5.0, 7.0, 9.0, 14.0, 13.0, 13.0, 20.0, 23.0, 28.0, 39.0, 45.0, 36.0, 50.0, 51.0, 57.0, 55.0, 60.0, 48.0, 53.0, 45.0, 54.0, 46.0, 27.0, 25.0, 23.0, 31.0, 23.0, 25.0, 15.0, 14.0, 9.0, 9.0, 5.0, 7.0, 4.0, 3.0, 4.0, 2.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.466796875, -3.331634521484375, -3.19647216796875, -3.061309814453125, -2.9261474609375, -2.790985107421875, -2.65582275390625, -2.520660400390625, -2.385498046875, -2.250335693359375, -2.11517333984375, -1.980010986328125, -1.8448486328125, -1.709686279296875, -1.57452392578125, -1.439361572265625, -1.30419921875, -1.169036865234375, -1.03387451171875, -0.898712158203125, -0.7635498046875, -0.628387451171875, -0.49322509765625, -0.358062744140625, -0.222900390625, -0.087738037109375, 0.04742431640625, 0.182586669921875, 0.3177490234375, 0.452911376953125, 0.58807373046875, 0.723236083984375, 0.8583984375, 0.993560791015625, 1.12872314453125, 1.263885498046875, 1.3990478515625, 1.534210205078125, 1.66937255859375, 1.804534912109375, 1.939697265625, 2.074859619140625, 2.21002197265625, 2.345184326171875, 2.4803466796875, 2.615509033203125, 2.75067138671875, 2.885833740234375, 3.02099609375, 3.156158447265625, 3.29132080078125, 3.426483154296875, 3.5616455078125, 3.696807861328125, 3.83197021484375, 3.967132568359375, 4.102294921875, 4.237457275390625, 4.37261962890625, 4.507781982421875, 4.6429443359375, 4.778106689453125, 4.91326904296875, 5.048431396484375, 5.18359375]}, "gradients/encoder.encoder.layers.0.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 3.0, 0.0, 1.0, 6.0, 4.0, 8.0, 7.0, 10.0, 17.0, 22.0, 34.0, 61.0, 78.0, 124.0, 227.0, 393.0, 753.0, 1996.0, 8169.0, 147829.0, 3945303.0, 79641.0, 6319.0, 1692.0, 701.0, 337.0, 204.0, 129.0, 76.0, 41.0, 37.0, 24.0, 17.0, 12.0, 6.0, 1.0, 4.0, 3.0, 2.0, 4.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-24.90625, -24.030029296875, -23.15380859375, -22.277587890625, -21.4013671875, -20.525146484375, -19.64892578125, -18.772705078125, -17.896484375, -17.020263671875, -16.14404296875, -15.267822265625, -14.3916015625, -13.515380859375, -12.63916015625, -11.762939453125, -10.88671875, -10.010498046875, -9.13427734375, -8.258056640625, -7.3818359375, -6.505615234375, -5.62939453125, -4.753173828125, -3.876953125, -3.000732421875, -2.12451171875, -1.248291015625, -0.3720703125, 0.504150390625, 1.38037109375, 2.256591796875, 3.1328125, 4.009033203125, 4.88525390625, 5.761474609375, 6.6376953125, 7.513916015625, 8.39013671875, 9.266357421875, 10.142578125, 11.018798828125, 11.89501953125, 12.771240234375, 13.6474609375, 14.523681640625, 15.39990234375, 16.276123046875, 17.15234375, 18.028564453125, 18.90478515625, 19.781005859375, 20.6572265625, 21.533447265625, 22.40966796875, 23.285888671875, 24.162109375, 25.038330078125, 25.91455078125, 26.790771484375, 27.6669921875, 28.543212890625, 29.41943359375, 30.295654296875, 31.171875]}, "gradients/encoder.encoder.layers.0.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 2.0, 3.0, 2.0, 6.0, 7.0, 21.0, 15.0, 36.0, 41.0, 56.0, 88.0, 152.0, 187.0, 261.0, 460.0, 604.0, 687.0, 450.0, 339.0, 229.0, 143.0, 96.0, 53.0, 43.0, 28.0, 18.0, 17.0, 15.0, 8.0, 7.0, 7.0, 2.0, 3.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-17.1875, -16.651611328125, -16.11572265625, -15.579833984375, -15.0439453125, -14.508056640625, -13.97216796875, -13.436279296875, -12.900390625, -12.364501953125, -11.82861328125, -11.292724609375, -10.7568359375, -10.220947265625, -9.68505859375, -9.149169921875, -8.61328125, -8.077392578125, -7.54150390625, -7.005615234375, -6.4697265625, -5.933837890625, -5.39794921875, -4.862060546875, -4.326171875, -3.790283203125, -3.25439453125, -2.718505859375, -2.1826171875, -1.646728515625, -1.11083984375, -0.574951171875, -0.0390625, 0.496826171875, 1.03271484375, 1.568603515625, 2.1044921875, 2.640380859375, 3.17626953125, 3.712158203125, 4.248046875, 4.783935546875, 5.31982421875, 5.855712890625, 6.3916015625, 6.927490234375, 7.46337890625, 7.999267578125, 8.53515625, 9.071044921875, 9.60693359375, 10.142822265625, 10.6787109375, 11.214599609375, 11.75048828125, 12.286376953125, 12.822265625, 13.358154296875, 13.89404296875, 14.429931640625, 14.9658203125, 15.501708984375, 16.03759765625, 16.573486328125, 17.109375]}, "gradients/encoder.encoder.layers.0.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 3.0, 3.0, 4.0, 5.0, 12.0, 12.0, 25.0, 42.0, 83.0, 146.0, 197.0, 161.0, 119.0, 77.0, 49.0, 27.0, 20.0, 4.0, 6.0, 4.0, 3.0, 3.0, 1.0, 1.0, 1.0, 2.0, 1.0, 2.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-120.17025756835938, -114.39874267578125, -108.62723541259766, -102.85572814941406, -97.08421325683594, -91.31269836425781, -85.54119110107422, -79.76968383789062, -73.9981689453125, -68.22665405273438, -62.45514678955078, -56.68363571166992, -50.91212463378906, -45.1406135559082, -39.369102478027344, -33.597591400146484, -27.826080322265625, -22.054569244384766, -16.283058166503906, -10.511547088623047, -4.7400360107421875, 1.0314750671386719, 6.802986145019531, 12.57449722290039, 18.34600830078125, 24.11751937866211, 29.88903045654297, 35.66054153442383, 41.43205261230469, 47.20356369018555, 52.975074768066406, 58.746585845947266, 64.51809692382812, 70.28961181640625, 76.06111907958984, 81.83262634277344, 87.60414123535156, 93.37565612792969, 99.14716339111328, 104.91867065429688, 110.690185546875, 116.46170043945312, 122.23320770263672, 128.0047149658203, 133.77622985839844, 139.54774475097656, 145.31924438476562, 151.09075927734375, 156.86227416992188, 162.6337890625, 168.40530395507812, 174.1768035888672, 179.9483184814453, 185.71983337402344, 191.4913330078125, 197.26284790039062, 203.03436279296875, 208.80587768554688, 214.577392578125, 220.34889221191406, 226.1204071044922, 231.8919219970703, 237.66342163085938, 243.4349365234375, 249.20645141601562]}, "gradients/encoder.encoder.layers.0.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 2.0, 3.0, 1.0, 4.0, 3.0, 7.0, 9.0, 11.0, 10.0, 9.0, 18.0, 12.0, 13.0, 21.0, 25.0, 24.0, 26.0, 38.0, 42.0, 35.0, 40.0, 47.0, 40.0, 58.0, 62.0, 54.0, 45.0, 42.0, 39.0, 32.0, 32.0, 33.0, 27.0, 19.0, 22.0, 21.0, 20.0, 13.0, 12.0, 5.0, 6.0, 7.0, 8.0, 6.0, 4.0, 3.0, 3.0, 2.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-74.97482299804688, -72.59039306640625, -70.20597076416016, -67.82154083251953, -65.43711853027344, -63.05268859863281, -60.66826248168945, -58.283836364746094, -55.89940643310547, -53.51498031616211, -51.13055419921875, -48.746124267578125, -46.361698150634766, -43.977272033691406, -41.59284591674805, -39.20841979980469, -36.82399368286133, -34.43956756591797, -32.05514144897461, -29.670713424682617, -27.286285400390625, -24.901859283447266, -22.517433166503906, -20.133005142211914, -17.748579025268555, -15.364151954650879, -12.979724884033203, -10.595298767089844, -8.210871696472168, -5.826444625854492, -3.442018508911133, -1.0575904846191406, 1.3268356323242188, 3.7112624645233154, 6.095689296722412, 8.48011589050293, 10.864542961120605, 13.248970031738281, 15.63339614868164, 18.017824172973633, 20.402250289916992, 22.78667640686035, 25.171104431152344, 27.555530548095703, 29.939956665039062, 32.32438659667969, 34.70880889892578, 37.093238830566406, 39.477664947509766, 41.862091064453125, 44.246517181396484, 46.630943298339844, 49.01537322998047, 51.39979934692383, 53.78422546386719, 56.16865539550781, 58.553077697753906, 60.937503814697266, 63.321929931640625, 65.70635986328125, 68.09078216552734, 70.47521209716797, 72.85963439941406, 75.24406433105469, 77.62849426269531]}, "gradients/encoder.encoder.layers.0.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 0.0, 1.0, 1.0, 3.0, 1.0, 6.0, 2.0, 1.0, 9.0, 13.0, 7.0, 24.0, 33.0, 56.0, 84.0, 138.0, 219.0, 381.0, 686.0, 1407.0, 2983.0, 7220.0, 21034.0, 75565.0, 458989.0, 380577.0, 67547.0, 19115.0, 6800.0, 2762.0, 1278.0, 714.0, 375.0, 196.0, 121.0, 66.0, 53.0, 25.0, 29.0, 13.0, 7.0, 10.0, 3.0, 1.0, 4.0, 1.0, 1.0, 1.0, 0.0, 4.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0], "bins": [-9.25, -8.9605712890625, -8.671142578125, -8.3817138671875, -8.09228515625, -7.8028564453125, -7.513427734375, -7.2239990234375, -6.9345703125, -6.6451416015625, -6.355712890625, -6.0662841796875, -5.77685546875, -5.4874267578125, -5.197998046875, -4.9085693359375, -4.619140625, -4.3297119140625, -4.040283203125, -3.7508544921875, -3.46142578125, -3.1719970703125, -2.882568359375, -2.5931396484375, -2.3037109375, -2.0142822265625, -1.724853515625, -1.4354248046875, -1.14599609375, -0.8565673828125, -0.567138671875, -0.2777099609375, 0.01171875, 0.3011474609375, 0.590576171875, 0.8800048828125, 1.16943359375, 1.4588623046875, 1.748291015625, 2.0377197265625, 2.3271484375, 2.6165771484375, 2.906005859375, 3.1954345703125, 3.48486328125, 3.7742919921875, 4.063720703125, 4.3531494140625, 4.642578125, 4.9320068359375, 5.221435546875, 5.5108642578125, 5.80029296875, 6.0897216796875, 6.379150390625, 6.6685791015625, 6.9580078125, 7.2474365234375, 7.536865234375, 7.8262939453125, 8.11572265625, 8.4051513671875, 8.694580078125, 8.9840087890625, 9.2734375]}, "gradients/encoder.encoder.layers.0.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 5.0, 2.0, 2.0, 4.0, 5.0, 6.0, 8.0, 14.0, 16.0, 20.0, 23.0, 31.0, 32.0, 42.0, 45.0, 49.0, 81.0, 65.0, 65.0, 66.0, 57.0, 56.0, 58.0, 44.0, 46.0, 38.0, 26.0, 23.0, 22.0, 13.0, 9.0, 10.0, 6.0, 8.0, 5.0, 8.0, 2.0, 1.0, 2.0, 1.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.48828125, -5.2928466796875, -5.097412109375, -4.9019775390625, -4.70654296875, -4.5111083984375, -4.315673828125, -4.1202392578125, -3.9248046875, -3.7293701171875, -3.533935546875, -3.3385009765625, -3.14306640625, -2.9476318359375, -2.752197265625, -2.5567626953125, -2.361328125, -2.1658935546875, -1.970458984375, -1.7750244140625, -1.57958984375, -1.3841552734375, -1.188720703125, -0.9932861328125, -0.7978515625, -0.6024169921875, -0.406982421875, -0.2115478515625, -0.01611328125, 0.1793212890625, 0.374755859375, 0.5701904296875, 0.765625, 0.9610595703125, 1.156494140625, 1.3519287109375, 1.54736328125, 1.7427978515625, 1.938232421875, 2.1336669921875, 2.3291015625, 2.5245361328125, 2.719970703125, 2.9154052734375, 3.11083984375, 3.3062744140625, 3.501708984375, 3.6971435546875, 3.892578125, 4.0880126953125, 4.283447265625, 4.4788818359375, 4.67431640625, 4.8697509765625, 5.065185546875, 5.2606201171875, 5.4560546875, 5.6514892578125, 5.846923828125, 6.0423583984375, 6.23779296875, 6.4332275390625, 6.628662109375, 6.8240966796875, 7.01953125]}, "gradients/encoder.encoder.layers.0.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 2.0, 1.0, 0.0, 2.0, 1.0, 2.0, 1.0, 5.0, 4.0, 14.0, 6.0, 8.0, 10.0, 14.0, 25.0, 31.0, 46.0, 92.0, 100.0, 169.0, 268.0, 497.0, 999.0, 2582.0, 10755.0, 80605.0, 894279.0, 46738.0, 7340.0, 2042.0, 823.0, 391.0, 246.0, 135.0, 99.0, 56.0, 31.0, 39.0, 20.0, 23.0, 11.0, 21.0, 7.0, 8.0, 3.0, 1.0, 1.0, 3.0, 1.0, 4.0, 1.0, 2.0, 3.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0], "bins": [-13.4296875, -12.97265625, -12.515625, -12.05859375, -11.6015625, -11.14453125, -10.6875, -10.23046875, -9.7734375, -9.31640625, -8.859375, -8.40234375, -7.9453125, -7.48828125, -7.03125, -6.57421875, -6.1171875, -5.66015625, -5.203125, -4.74609375, -4.2890625, -3.83203125, -3.375, -2.91796875, -2.4609375, -2.00390625, -1.546875, -1.08984375, -0.6328125, -0.17578125, 0.28125, 0.73828125, 1.1953125, 1.65234375, 2.109375, 2.56640625, 3.0234375, 3.48046875, 3.9375, 4.39453125, 4.8515625, 5.30859375, 5.765625, 6.22265625, 6.6796875, 7.13671875, 7.59375, 8.05078125, 8.5078125, 8.96484375, 9.421875, 9.87890625, 10.3359375, 10.79296875, 11.25, 11.70703125, 12.1640625, 12.62109375, 13.078125, 13.53515625, 13.9921875, 14.44921875, 14.90625, 15.36328125, 15.8203125]}, "gradients/encoder.encoder.layers.0.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 4.0, 2.0, 0.0, 4.0, 5.0, 1.0, 7.0, 12.0, 5.0, 11.0, 19.0, 24.0, 12.0, 19.0, 34.0, 29.0, 35.0, 35.0, 49.0, 64.0, 94.0, 77.0, 87.0, 69.0, 58.0, 52.0, 35.0, 31.0, 25.0, 18.0, 13.0, 13.0, 10.0, 9.0, 10.0, 9.0, 8.0, 5.0, 3.0, 6.0, 4.0, 6.0, 0.0, 3.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-19.390625, -18.605224609375, -17.81982421875, -17.034423828125, -16.2490234375, -15.463623046875, -14.67822265625, -13.892822265625, -13.107421875, -12.322021484375, -11.53662109375, -10.751220703125, -9.9658203125, -9.180419921875, -8.39501953125, -7.609619140625, -6.82421875, -6.038818359375, -5.25341796875, -4.468017578125, -3.6826171875, -2.897216796875, -2.11181640625, -1.326416015625, -0.541015625, 0.244384765625, 1.02978515625, 1.815185546875, 2.6005859375, 3.385986328125, 4.17138671875, 4.956787109375, 5.7421875, 6.527587890625, 7.31298828125, 8.098388671875, 8.8837890625, 9.669189453125, 10.45458984375, 11.239990234375, 12.025390625, 12.810791015625, 13.59619140625, 14.381591796875, 15.1669921875, 15.952392578125, 16.73779296875, 17.523193359375, 18.30859375, 19.093994140625, 19.87939453125, 20.664794921875, 21.4501953125, 22.235595703125, 23.02099609375, 23.806396484375, 24.591796875, 25.377197265625, 26.16259765625, 26.947998046875, 27.7333984375, 28.518798828125, 29.30419921875, 30.089599609375, 30.875]}, "gradients/encoder.encoder.layers.0.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 3.0, 1.0, 2.0, 1.0, 4.0, 6.0, 3.0, 4.0, 8.0, 15.0, 8.0, 14.0, 20.0, 29.0, 49.0, 73.0, 116.0, 231.0, 533.0, 1491.0, 5816.0, 41773.0, 939004.0, 49928.0, 6628.0, 1613.0, 558.0, 268.0, 130.0, 77.0, 31.0, 27.0, 21.0, 22.0, 15.0, 12.0, 7.0, 5.0, 5.0, 3.0, 5.0, 1.0, 3.0, 0.0, 4.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-4.640625, -4.49798583984375, -4.3553466796875, -4.21270751953125, -4.070068359375, -3.92742919921875, -3.7847900390625, -3.64215087890625, -3.49951171875, -3.35687255859375, -3.2142333984375, -3.07159423828125, -2.928955078125, -2.78631591796875, -2.6436767578125, -2.50103759765625, -2.3583984375, -2.21575927734375, -2.0731201171875, -1.93048095703125, -1.787841796875, -1.64520263671875, -1.5025634765625, -1.35992431640625, -1.21728515625, -1.07464599609375, -0.9320068359375, -0.78936767578125, -0.646728515625, -0.50408935546875, -0.3614501953125, -0.21881103515625, -0.076171875, 0.06646728515625, 0.2091064453125, 0.35174560546875, 0.494384765625, 0.63702392578125, 0.7796630859375, 0.92230224609375, 1.06494140625, 1.20758056640625, 1.3502197265625, 1.49285888671875, 1.635498046875, 1.77813720703125, 1.9207763671875, 2.06341552734375, 2.2060546875, 2.34869384765625, 2.4913330078125, 2.63397216796875, 2.776611328125, 2.91925048828125, 3.0618896484375, 3.20452880859375, 3.34716796875, 3.48980712890625, 3.6324462890625, 3.77508544921875, 3.917724609375, 4.06036376953125, 4.2030029296875, 4.34564208984375, 4.48828125]}, "gradients/encoder.encoder.layers.0.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 0.0, 2.0, 2.0, 3.0, 0.0, 1.0, 1.0, 2.0, 7.0, 1.0, 2.0, 3.0, 8.0, 10.0, 14.0, 22.0, 20.0, 38.0, 77.0, 184.0, 236.0, 171.0, 67.0, 39.0, 28.0, 11.0, 9.0, 7.0, 6.0, 10.0, 6.0, 2.0, 4.0, 3.0, 4.0, 1.0, 3.0, 3.0, 1.0, 1.0, 2.0, 2.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.001789093017578125, -0.001733824610710144, -0.001678556203842163, -0.0016232877969741821, -0.0015680193901062012, -0.0015127509832382202, -0.0014574825763702393, -0.0014022141695022583, -0.0013469457626342773, -0.0012916773557662964, -0.0012364089488983154, -0.0011811405420303345, -0.0011258721351623535, -0.0010706037282943726, -0.0010153353214263916, -0.0009600669145584106, -0.0009047985076904297, -0.0008495301008224487, -0.0007942616939544678, -0.0007389932870864868, -0.0006837248802185059, -0.0006284564733505249, -0.0005731880664825439, -0.000517919659614563, -0.00046265125274658203, -0.0004073828458786011, -0.0003521144390106201, -0.00029684603214263916, -0.0002415776252746582, -0.00018630921840667725, -0.0001310408115386963, -7.577240467071533e-05, -2.0503997802734375e-05, 3.476440906524658e-05, 9.003281593322754e-05, 0.0001453012228012085, 0.00020056962966918945, 0.0002558380365371704, 0.00031110644340515137, 0.0003663748502731323, 0.0004216432571411133, 0.00047691166400909424, 0.0005321800708770752, 0.0005874484777450562, 0.0006427168846130371, 0.0006979852914810181, 0.000753253698348999, 0.00080852210521698, 0.0008637905120849609, 0.0009190589189529419, 0.0009743273258209229, 0.0010295957326889038, 0.0010848641395568848, 0.0011401325464248657, 0.0011954009532928467, 0.0012506693601608276, 0.0013059377670288086, 0.0013612061738967896, 0.0014164745807647705, 0.0014717429876327515, 0.0015270113945007324, 0.0015822798013687134, 0.0016375482082366943, 0.0016928166151046753, 0.0017480850219726562]}, "gradients/encoder.encoder.layers.0.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 2.0, 1.0, 2.0, 1.0, 3.0, 7.0, 4.0, 5.0, 12.0, 17.0, 29.0, 51.0, 83.0, 141.0, 279.0, 650.0, 1804.0, 5975.0, 29717.0, 881540.0, 110411.0, 12602.0, 3244.0, 1092.0, 453.0, 203.0, 96.0, 49.0, 28.0, 14.0, 15.0, 5.0, 10.0, 6.0, 6.0, 2.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 1.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.486328125, -3.376556396484375, -3.26678466796875, -3.157012939453125, -3.0472412109375, -2.937469482421875, -2.82769775390625, -2.717926025390625, -2.608154296875, -2.498382568359375, -2.38861083984375, -2.278839111328125, -2.1690673828125, -2.059295654296875, -1.94952392578125, -1.839752197265625, -1.72998046875, -1.620208740234375, -1.51043701171875, -1.400665283203125, -1.2908935546875, -1.181121826171875, -1.07135009765625, -0.961578369140625, -0.851806640625, -0.742034912109375, -0.63226318359375, -0.522491455078125, -0.4127197265625, -0.302947998046875, -0.19317626953125, -0.083404541015625, 0.0263671875, 0.136138916015625, 0.24591064453125, 0.355682373046875, 0.4654541015625, 0.575225830078125, 0.68499755859375, 0.794769287109375, 0.904541015625, 1.014312744140625, 1.12408447265625, 1.233856201171875, 1.3436279296875, 1.453399658203125, 1.56317138671875, 1.672943115234375, 1.78271484375, 1.892486572265625, 2.00225830078125, 2.112030029296875, 2.2218017578125, 2.331573486328125, 2.44134521484375, 2.551116943359375, 2.660888671875, 2.770660400390625, 2.88043212890625, 2.990203857421875, 3.0999755859375, 3.209747314453125, 3.31951904296875, 3.429290771484375, 3.5390625]}, "gradients/encoder.encoder.layers.0.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 2.0, 4.0, 1.0, 3.0, 2.0, 3.0, 5.0, 2.0, 12.0, 8.0, 11.0, 9.0, 13.0, 23.0, 16.0, 32.0, 43.0, 75.0, 101.0, 115.0, 116.0, 111.0, 77.0, 75.0, 39.0, 24.0, 19.0, 19.0, 11.0, 8.0, 5.0, 7.0, 6.0, 3.0, 4.0, 1.0, 3.0, 0.0, 4.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.57421875, -2.488616943359375, -2.40301513671875, -2.317413330078125, -2.2318115234375, -2.146209716796875, -2.06060791015625, -1.975006103515625, -1.889404296875, -1.803802490234375, -1.71820068359375, -1.632598876953125, -1.5469970703125, -1.461395263671875, -1.37579345703125, -1.290191650390625, -1.20458984375, -1.118988037109375, -1.03338623046875, -0.947784423828125, -0.8621826171875, -0.776580810546875, -0.69097900390625, -0.605377197265625, -0.519775390625, -0.434173583984375, -0.34857177734375, -0.262969970703125, -0.1773681640625, -0.091766357421875, -0.00616455078125, 0.079437255859375, 0.1650390625, 0.250640869140625, 0.33624267578125, 0.421844482421875, 0.5074462890625, 0.593048095703125, 0.67864990234375, 0.764251708984375, 0.849853515625, 0.935455322265625, 1.02105712890625, 1.106658935546875, 1.1922607421875, 1.277862548828125, 1.36346435546875, 1.449066162109375, 1.53466796875, 1.620269775390625, 1.70587158203125, 1.791473388671875, 1.8770751953125, 1.962677001953125, 2.04827880859375, 2.133880615234375, 2.219482421875, 2.305084228515625, 2.39068603515625, 2.476287841796875, 2.5618896484375, 2.647491455078125, 2.73309326171875, 2.818695068359375, 2.904296875]}, "gradients/encoder.encoder.layers.0.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 1.0, 5.0, 2.0, 2.0, 10.0, 13.0, 23.0, 35.0, 69.0, 190.0, 460.0, 93.0, 43.0, 26.0, 15.0, 10.0, 5.0, 5.0, 0.0, 4.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-88.88695526123047, -85.36268615722656, -81.83840942382812, -78.31414031982422, -74.78987121582031, -71.2656021118164, -67.7413330078125, -64.21705627441406, -60.692787170410156, -57.16851806640625, -53.64424514770508, -50.119972229003906, -46.595703125, -43.071434020996094, -39.54716110229492, -36.02288818359375, -32.498619079589844, -28.974348068237305, -25.450077056884766, -21.925806045532227, -18.401535034179688, -14.877264022827148, -11.35299301147461, -7.82872200012207, -4.304450988769531, -0.7801799774169922, 2.744091033935547, 6.268362045288086, 9.792633056640625, 13.316904067993164, 16.841175079345703, 20.365446090698242, 23.889724731445312, 27.41399574279785, 30.93826675415039, 34.46253967285156, 37.98680877685547, 41.511077880859375, 45.03535079956055, 48.55962371826172, 52.083892822265625, 55.60816192626953, 59.1324348449707, 62.656707763671875, 66.18097686767578, 69.70524597167969, 73.22952270507812, 76.75379180908203, 80.27806091308594, 83.80233001708984, 87.32659912109375, 90.85087585449219, 94.3751449584961, 97.8994140625, 101.42369079589844, 104.94795989990234, 108.47222900390625, 111.99649810791016, 115.52076721191406, 119.0450439453125, 122.5693130493164, 126.09358215332031, 129.61785888671875, 133.14212036132812, 136.66639709472656]}, "gradients/encoder.encoder.layers.0.layer_norm.bias": {"_type": "histogram", "values": [1.0, 3.0, 0.0, 0.0, 2.0, 3.0, 2.0, 1.0, 1.0, 4.0, 3.0, 5.0, 6.0, 8.0, 5.0, 9.0, 11.0, 7.0, 22.0, 15.0, 26.0, 18.0, 19.0, 17.0, 19.0, 30.0, 77.0, 193.0, 208.0, 69.0, 24.0, 32.0, 27.0, 23.0, 21.0, 10.0, 18.0, 11.0, 13.0, 12.0, 12.0, 4.0, 3.0, 6.0, 3.0, 4.0, 2.0, 0.0, 4.0, 1.0, 2.0, 2.0, 1.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-53.78363800048828, -51.8643684387207, -49.945098876953125, -48.02582550048828, -46.1065559387207, -44.187286376953125, -42.26801681518555, -40.34874725341797, -38.429473876953125, -36.51020431518555, -34.59093475341797, -32.671661376953125, -30.752391815185547, -28.83312225341797, -26.91385269165039, -24.994583129882812, -23.075313568115234, -21.156044006347656, -19.236772537231445, -17.317502975463867, -15.398232460021973, -13.478961944580078, -11.5596923828125, -9.640421867370605, -7.721151351928711, -5.801880836486816, -3.88261079788208, -1.9633407592773438, -0.04407024383544922, 1.8752002716064453, 3.7944698333740234, 5.713740348815918, 7.6330108642578125, 9.552281379699707, 11.471551895141602, 13.39082145690918, 15.310091972351074, 17.22936248779297, 19.148632049560547, 21.067901611328125, 22.987173080444336, 24.906442642211914, 26.825714111328125, 28.744983673095703, 30.66425323486328, 32.583526611328125, 34.50279235839844, 36.42206573486328, 38.34133529663086, 40.26060485839844, 42.179874420166016, 44.099143981933594, 46.01841735839844, 47.937686920166016, 49.856956481933594, 51.77622604370117, 53.69549560546875, 55.61476516723633, 57.534034729003906, 59.45330810546875, 61.37257766723633, 63.291847229003906, 65.21112060546875, 67.13038635253906, 69.0496597290039]}, "gradients/encoder.encoder.pos_conv_embed.conv.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 4.0, 2.0, 2.0, 4.0, 8.0, 5.0, 9.0, 17.0, 13.0, 21.0, 15.0, 19.0, 30.0, 37.0, 46.0, 49.0, 91.0, 295.0, 89.0, 34.0, 28.0, 43.0, 37.0, 21.0, 18.0, 16.0, 14.0, 12.0, 7.0, 7.0, 7.0, 5.0, 7.0, 1.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-7.5234375, -7.322509765625, -7.12158203125, -6.920654296875, -6.7197265625, -6.518798828125, -6.31787109375, -6.116943359375, -5.916015625, -5.715087890625, -5.51416015625, -5.313232421875, -5.1123046875, -4.911376953125, -4.71044921875, -4.509521484375, -4.30859375, -4.107666015625, -3.90673828125, -3.705810546875, -3.5048828125, -3.303955078125, -3.10302734375, -2.902099609375, -2.701171875, -2.500244140625, -2.29931640625, -2.098388671875, -1.8974609375, -1.696533203125, -1.49560546875, -1.294677734375, -1.09375, -0.892822265625, -0.69189453125, -0.490966796875, -0.2900390625, -0.089111328125, 0.11181640625, 0.312744140625, 0.513671875, 0.714599609375, 0.91552734375, 1.116455078125, 1.3173828125, 1.518310546875, 1.71923828125, 1.920166015625, 2.12109375, 2.322021484375, 2.52294921875, 2.723876953125, 2.9248046875, 3.125732421875, 3.32666015625, 3.527587890625, 3.728515625, 3.929443359375, 4.13037109375, 4.331298828125, 4.5322265625, 4.733154296875, 4.93408203125, 5.135009765625, 5.3359375]}, "gradients/encoder.encoder.pos_conv_embed.conv.weight_v": {"_type": "histogram", "values": [4.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 3.0, 4.0, 5.0, 3.0, 6.0, 6.0, 11.0, 10.0, 31.0, 38.0, 38.0, 61.0, 114.0, 198.0, 382.0, 887.0, 2214.0, 7309.0, 156293.0, 8206714.0, 9692.0, 2615.0, 995.0, 419.0, 208.0, 114.0, 73.0, 40.0, 29.0, 24.0, 14.0, 19.0, 7.0, 2.0, 5.0, 2.0, 2.0, 4.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-45.53217697143555, -44.1966552734375, -42.86112976074219, -41.52560806274414, -40.19008255004883, -38.85456085205078, -37.51903533935547, -36.18351364135742, -34.847991943359375, -33.51247024536133, -32.176944732666016, -30.84142303466797, -29.505897521972656, -28.17037582397461, -26.83485221862793, -25.49932861328125, -24.163803100585938, -22.828279495239258, -21.492755889892578, -20.15723419189453, -18.82170867919922, -17.486186981201172, -16.150663375854492, -14.815139770507812, -13.479616165161133, -12.144092559814453, -10.808568954467773, -9.47304630279541, -8.13752269744873, -6.801999092102051, -5.4664764404296875, -4.130952835083008, -2.795429229736328, -1.4599058628082275, -0.12438249588012695, 1.2111406326293945, 2.546664237976074, 3.882187843322754, 5.217710494995117, 6.553234100341797, 7.888757705688477, 9.224281311035156, 10.559804916381836, 11.8953275680542, 13.230851173400879, 14.566374778747559, 15.901897430419922, 17.2374210357666, 18.57294464111328, 19.90846824645996, 21.24399185180664, 22.579513549804688, 23.9150390625, 25.250560760498047, 26.586084365844727, 27.921607971191406, 29.257131576538086, 30.592655181884766, 31.928178787231445, 33.263702392578125, 34.59922409057617, 35.934749603271484, 37.27027130126953, 38.605796813964844, 39.94131851196289]}, "gradients/encoder.encoder.pos_conv_embed.conv.weight_g": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 4.0, 0.0, 1.0, 1.0, 3.0, 0.0, 0.0, 4.0, 1.0, 2.0, 0.0, 4.0, 7.0, 3.0, 4.0, 3.0, 1.0, 9.0, 3.0, 1.0, 3.0, 10.0, 4.0, 6.0, 5.0, 0.0, 3.0, 3.0, 5.0, 7.0, 3.0, 2.0, 1.0, 3.0, 1.0, 2.0, 3.0, 2.0, 1.0, 3.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-39.83613204956055, -38.578125, -37.32012176513672, -36.06211853027344, -34.80411148071289, -33.546104431152344, -32.28810119628906, -31.03009605407715, -29.772090911865234, -28.51408576965332, -27.256080627441406, -25.998075485229492, -24.740070343017578, -23.482065200805664, -22.22406005859375, -20.966054916381836, -19.708049774169922, -18.450044631958008, -17.192039489746094, -15.93403434753418, -14.676029205322266, -13.418024063110352, -12.160018920898438, -10.902013778686523, -9.64400863647461, -8.386003494262695, -7.127998352050781, -5.869993209838867, -4.611988067626953, -3.353982925415039, -2.095977783203125, -0.8379726409912109, 0.4200286865234375, 1.6780338287353516, 2.9360389709472656, 4.19404411315918, 5.452049255371094, 6.710054397583008, 7.968059539794922, 9.226064682006836, 10.48406982421875, 11.742074966430664, 13.000080108642578, 14.258085250854492, 15.516090393066406, 16.77409553527832, 18.032100677490234, 19.29010581970215, 20.548110961914062, 21.806116104125977, 23.06412124633789, 24.322126388549805, 25.58013153076172, 26.838136672973633, 28.096141815185547, 29.35414695739746, 30.612152099609375, 31.87015724182129, 33.1281623840332, 34.38616943359375, 35.64417266845703, 36.90217590332031, 38.16018295288086, 39.418190002441406, 40.67619323730469]}, "gradients/encoder.feature_projection.projection.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 2.0, 1.0, 4.0, 10.0, 5.0, 12.0, 23.0, 31.0, 48.0, 103.0, 168.0, 292.0, 689.0, 1664.0, 5980.0, 28577.0, 168325.0, 257577.0, 47278.0, 9516.0, 2355.0, 795.0, 363.0, 190.0, 125.0, 54.0, 39.0, 21.0, 12.0, 7.0, 3.0, 1.0, 3.0, 1.0, 3.0, 0.0, 0.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-83.75, -81.49560546875, -79.2412109375, -76.98681640625, -74.732421875, -72.47802734375, -70.2236328125, -67.96923828125, -65.71484375, -63.46044921875, -61.2060546875, -58.95166015625, -56.697265625, -54.44287109375, -52.1884765625, -49.93408203125, -47.6796875, -45.42529296875, -43.1708984375, -40.91650390625, -38.662109375, -36.40771484375, -34.1533203125, -31.89892578125, -29.64453125, -27.39013671875, -25.1357421875, -22.88134765625, -20.626953125, -18.37255859375, -16.1181640625, -13.86376953125, -11.609375, -9.35498046875, -7.1005859375, -4.84619140625, -2.591796875, -0.33740234375, 1.9169921875, 4.17138671875, 6.42578125, 8.68017578125, 10.9345703125, 13.18896484375, 15.443359375, 17.69775390625, 19.9521484375, 22.20654296875, 24.4609375, 26.71533203125, 28.9697265625, 31.22412109375, 33.478515625, 35.73291015625, 37.9873046875, 40.24169921875, 42.49609375, 44.75048828125, 47.0048828125, 49.25927734375, 51.513671875, 53.76806640625, 56.0224609375, 58.27685546875, 60.53125]}, "gradients/encoder.feature_projection.projection.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 1.0, 6.0, 4.0, 6.0, 8.0, 8.0, 11.0, 9.0, 22.0, 19.0, 19.0, 29.0, 33.0, 40.0, 52.0, 71.0, 52.0, 62.0, 74.0, 55.0, 78.0, 63.0, 52.0, 47.0, 35.0, 30.0, 32.0, 18.0, 27.0, 7.0, 11.0, 6.0, 8.0, 3.0, 2.0, 4.0, 1.0, 2.0, 2.0, 1.0, 2.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-6.5234375, -6.3355712890625, -6.147705078125, -5.9598388671875, -5.77197265625, -5.5841064453125, -5.396240234375, -5.2083740234375, -5.0205078125, -4.8326416015625, -4.644775390625, -4.4569091796875, -4.26904296875, -4.0811767578125, -3.893310546875, -3.7054443359375, -3.517578125, -3.3297119140625, -3.141845703125, -2.9539794921875, -2.76611328125, -2.5782470703125, -2.390380859375, -2.2025146484375, -2.0146484375, -1.8267822265625, -1.638916015625, -1.4510498046875, -1.26318359375, -1.0753173828125, -0.887451171875, -0.6995849609375, -0.51171875, -0.3238525390625, -0.135986328125, 0.0518798828125, 0.23974609375, 0.4276123046875, 0.615478515625, 0.8033447265625, 0.9912109375, 1.1790771484375, 1.366943359375, 1.5548095703125, 1.74267578125, 1.9305419921875, 2.118408203125, 2.3062744140625, 2.494140625, 2.6820068359375, 2.869873046875, 3.0577392578125, 3.24560546875, 3.4334716796875, 3.621337890625, 3.8092041015625, 3.9970703125, 4.1849365234375, 4.372802734375, 4.5606689453125, 4.74853515625, 4.9364013671875, 5.124267578125, 5.3121337890625, 5.5]}, "gradients/encoder.feature_projection.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 4.0, 0.0, 1.0, 3.0, 8.0, 8.0, 18.0, 31.0, 88.0, 146.0, 108.0, 36.0, 19.0, 13.0, 8.0, 5.0, 3.0, 2.0, 0.0, 2.0, 0.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-124.5690689086914, -121.64833068847656, -118.72760009765625, -115.80686950683594, -112.8861312866211, -109.96539306640625, -107.04466247558594, -104.12393188476562, -101.20319366455078, -98.28245544433594, -95.36172485351562, -92.44099426269531, -89.52025604248047, -86.59951782226562, -83.67878723144531, -80.758056640625, -77.83731842041016, -74.91658020019531, -71.995849609375, -69.07511901855469, -66.15438079833984, -63.233646392822266, -60.31291198730469, -57.39217758178711, -54.47144317626953, -51.55070877075195, -48.629974365234375, -45.7092399597168, -42.78850555419922, -39.86777114868164, -36.94703674316406, -34.026302337646484, -31.105560302734375, -28.184825897216797, -25.26409149169922, -22.34335708618164, -19.422622680664062, -16.501888275146484, -13.581153869628906, -10.660419464111328, -7.73968505859375, -4.818950653076172, -1.8982162475585938, 1.0225181579589844, 3.9432525634765625, 6.863986968994141, 9.784721374511719, 12.705455780029297, 15.626190185546875, 18.546924591064453, 21.46765899658203, 24.38839340209961, 27.309127807617188, 30.229862213134766, 33.150596618652344, 36.07133102416992, 38.9920654296875, 41.91279983520508, 44.833534240722656, 47.754268646240234, 50.67500305175781, 53.59573745727539, 56.51647186279297, 59.43720626831055, 62.357940673828125]}, "gradients/encoder.feature_projection.layer_norm.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 2.0, 3.0, 4.0, 2.0, 1.0, 2.0, 2.0, 3.0, 5.0, 3.0, 4.0, 11.0, 6.0, 10.0, 34.0, 48.0, 70.0, 78.0, 64.0, 51.0, 24.0, 14.0, 11.0, 7.0, 9.0, 3.0, 6.0, 3.0, 4.0, 3.0, 0.0, 1.0, 2.0, 1.0, 0.0, 0.0, 3.0, 1.0, 4.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-28.507526397705078, -27.567724227905273, -26.62792205810547, -25.688119888305664, -24.74831771850586, -23.808517456054688, -22.86871337890625, -21.928913116455078, -20.989110946655273, -20.04930877685547, -19.109506607055664, -18.16970443725586, -17.229902267456055, -16.29010009765625, -15.350298881530762, -14.410497665405273, -13.470694541931152, -12.530892372131348, -11.591090202331543, -10.651288986206055, -9.71148681640625, -8.771684646606445, -7.831882476806641, -6.892080783843994, -5.9522786140441895, -5.012476444244385, -4.072674751281738, -3.1328725814819336, -2.193070650100708, -1.2532687187194824, -0.31346654891967773, 0.6263351440429688, 1.5661373138427734, 2.505939245223999, 3.4457411766052246, 4.385543346405029, 5.325345039367676, 6.2651472091674805, 7.204949378967285, 8.144750595092773, 9.084552764892578, 10.024354934692383, 10.964157104492188, 11.903959274291992, 12.84376049041748, 13.783562660217285, 14.72336483001709, 15.663166046142578, 16.602970123291016, 17.54277229309082, 18.482574462890625, 19.42237663269043, 20.362178802490234, 21.301979064941406, 22.241783142089844, 23.181583404541016, 24.12138557434082, 25.061187744140625, 26.00098991394043, 26.940792083740234, 27.88059425354004, 28.820396423339844, 29.760196685791016, 30.69999885559082, 31.639801025390625]}, "_wandb": {"runtime": 4156}} \ No newline at end of file diff --git a/wandb/run-20220302_222605-10glutwr/logs/debug-internal.log b/wandb/run-20220302_222605-10glutwr/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..a2fa412338890de1c27f3e739135c9aaca15b148 --- /dev/null +++ b/wandb/run-20220302_222605-10glutwr/logs/debug-internal.log @@ -0,0 +1,7217 @@ +2022-03-02 22:26:05,990 INFO MainThread:266499 [internal.py:wandb_internal():89] W&B internal server running at pid: 266499, started at: 2022-03-02 22:26:05.990515 +2022-03-02 22:26:05,992 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: check_version +2022-03-02 22:26:05,992 INFO WriterThread:266499 [datastore.py:open_for_write():77] open: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/run-10glutwr.wandb +2022-03-02 22:26:05,994 DEBUG SenderThread:266499 [sender.py:send():235] send: header +2022-03-02 22:26:05,994 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: check_version +2022-03-02 22:26:06,061 DEBUG SenderThread:266499 [sender.py:send():235] send: run +2022-03-02 22:26:06,157 INFO SenderThread:266499 [dir_watcher.py:__init__():169] watching files in: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files +2022-03-02 22:26:06,157 INFO SenderThread:266499 [sender.py:_start_run_threads():809] run started: 10glutwr with start time 1646259965 +2022-03-02 22:26:06,157 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:26:06,158 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:26:06,158 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: run_start +2022-03-02 22:26:06,164 DEBUG HandlerThread:266499 [meta.py:__init__():36] meta init +2022-03-02 22:26:06,164 DEBUG HandlerThread:266499 [meta.py:__init__():50] meta init done +2022-03-02 22:26:06,164 DEBUG HandlerThread:266499 [meta.py:probe():210] probe +2022-03-02 22:26:06,170 DEBUG HandlerThread:266499 [meta.py:_setup_git():200] setup git +2022-03-02 22:26:06,185 DEBUG HandlerThread:266499 [meta.py:_setup_git():207] setup git done +2022-03-02 22:26:06,185 DEBUG HandlerThread:266499 [meta.py:_save_pip():54] save pip +2022-03-02 22:26:06,186 DEBUG HandlerThread:266499 [meta.py:_save_pip():68] save pip done +2022-03-02 22:26:06,186 DEBUG HandlerThread:266499 [meta.py:probe():248] probe done +2022-03-02 22:26:06,269 DEBUG SenderThread:266499 [sender.py:send():235] send: files +2022-03-02 22:26:06,269 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-metadata.json with policy now +2022-03-02 22:26:06,274 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:26:06,274 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:26:06,330 DEBUG SenderThread:266499 [sender.py:send():235] send: config +2022-03-02 22:26:06,331 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:26:06,331 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:26:06,331 WARNING SenderThread:266499 [sender.py:send_metric():902] Seen metric with glob (shouldnt happen) +2022-03-02 22:26:06,583 INFO Thread-11 :266499 [upload_job.py:push():137] Uploaded file /tmp/tmpyl20fv8swandb/1lkxtcz7-wandb-metadata.json +2022-03-02 22:26:07,159 INFO Thread-8 :266499 [dir_watcher.py:_on_file_created():217] file/dir created: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:26:07,160 INFO Thread-8 :266499 [dir_watcher.py:_on_file_created():217] file/dir created: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:26:07,160 INFO Thread-8 :266499 [dir_watcher.py:_on_file_created():217] file/dir created: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/requirements.txt +2022-03-02 22:26:07,160 INFO Thread-8 :266499 [dir_watcher.py:_on_file_created():217] file/dir created: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-metadata.json +2022-03-02 22:26:09,158 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:26:13,160 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:26:15,161 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:26:19,159 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:26:19,159 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:26:19,159 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:26:19,159 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:26:19,159 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:26:19,160 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:26:19,162 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:26:19,163 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:26:21,163 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:26:21,525 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:26:21,526 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:26:25,164 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:26:27,165 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:26:29,166 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:26:31,380 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:26:31,380 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:26:31,380 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:26:32,167 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:26:33,167 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:26:34,658 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 22:26:35,168 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:26:36,568 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:26:36,568 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:26:37,169 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/config.yaml +2022-03-02 22:26:39,169 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:26:41,170 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:26:43,335 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:26:43,336 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:26:43,338 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:26:44,172 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:26:45,172 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:26:47,173 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:26:51,627 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:26:51,627 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:26:52,174 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:26:54,175 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:26:55,155 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:26:55,156 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:26:55,156 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:26:55,176 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:26:57,176 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:26:58,177 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:27:00,177 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:27:04,179 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:27:05,066 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 22:27:06,179 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:27:06,672 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:27:06,673 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:27:06,834 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:27:06,834 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:27:06,835 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:27:07,180 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:27:08,180 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:27:12,182 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:27:14,182 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:27:18,184 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:27:18,536 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:27:18,536 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:27:18,537 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:27:19,184 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:27:20,184 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:27:21,829 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:27:21,829 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:27:22,185 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:27:26,186 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:27:28,187 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:27:30,022 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:27:30,023 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:27:30,023 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:27:30,188 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:27:32,188 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:27:34,189 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:27:35,476 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 22:27:37,045 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:27:37,045 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:27:37,190 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:27:41,192 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:27:41,447 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:27:41,447 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:27:41,448 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:27:42,192 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:27:43,193 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:27:44,193 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:27:45,193 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:27:49,195 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:27:51,196 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:27:52,132 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:27:52,133 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:27:52,900 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:27:52,901 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:27:52,903 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:27:53,196 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:27:54,197 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:27:55,197 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:27:57,198 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:27:59,199 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:28:04,200 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:28:04,315 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:28:04,315 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:28:04,316 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:28:05,201 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:28:05,879 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 22:28:06,201 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:28:07,223 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:28:07,223 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:28:08,202 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:28:12,204 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:28:14,204 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:28:15,511 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:28:15,512 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:28:15,512 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:28:16,205 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:28:16,205 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:28:20,206 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:28:22,207 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:28:22,468 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:28:22,468 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:28:24,208 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:28:26,635 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:28:26,636 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:28:26,636 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:28:27,209 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:28:28,209 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:28:29,210 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:28:31,210 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:28:33,211 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:28:36,290 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 22:28:37,213 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:28:37,517 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:28:37,517 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:28:37,629 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:28:37,629 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:28:37,630 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:28:38,213 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:28:39,213 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:28:41,214 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:28:45,215 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:28:47,216 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:28:48,608 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:28:48,608 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:28:48,609 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:28:49,217 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:28:49,217 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:28:52,576 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:28:52,576 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:28:53,218 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:28:55,219 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:28:58,220 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:28:59,532 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:28:59,532 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:28:59,533 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:29:00,221 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:29:01,221 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:29:02,222 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:29:04,222 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:29:06,223 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:29:06,704 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 22:29:07,896 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:29:07,896 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:29:10,213 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:29:10,213 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:29:10,213 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:29:10,225 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:29:10,225 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:29:11,225 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:29:12,225 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:29:14,226 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:29:16,227 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:29:20,228 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:29:20,927 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:29:20,927 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:29:20,928 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:29:21,228 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:29:22,229 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:29:23,215 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:29:23,216 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:29:24,230 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:29:29,231 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:29:31,232 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:29:31,594 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:29:31,594 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:29:31,595 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:29:32,232 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:29:33,233 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:29:34,233 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:29:35,234 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:29:37,101 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 22:29:38,262 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:29:38,262 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:29:39,235 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:29:41,236 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:29:42,169 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:29:42,169 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:29:42,169 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:29:42,236 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:29:43,236 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:29:44,237 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:29:45,237 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:29:47,238 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:29:49,239 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:29:51,239 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:29:52,643 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:29:52,643 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:29:52,644 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:29:53,240 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:29:53,240 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:29:53,368 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:29:53,368 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:29:54,241 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:29:57,242 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:29:59,242 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:30:01,243 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:30:03,160 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:30:03,160 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:30:03,160 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:30:03,244 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:30:04,244 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:30:06,245 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:30:07,492 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 22:30:08,246 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:30:08,556 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:30:08,557 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:30:10,246 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:30:12,247 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:30:13,485 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:30:13,486 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:30:13,486 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:30:14,248 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:30:14,248 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:30:16,249 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:30:18,250 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:30:20,250 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:30:22,251 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:30:23,702 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:30:23,703 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:30:23,703 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:30:23,949 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:30:23,950 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:30:24,252 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:30:25,252 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:30:26,253 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:30:28,253 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:30:30,254 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:30:32,255 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:30:34,067 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:30:34,067 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:30:34,069 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:30:34,256 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:30:35,256 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:30:36,257 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:30:37,877 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 22:30:38,257 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:30:39,154 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:30:39,154 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:30:40,258 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:30:42,258 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:30:44,795 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,801 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,801 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,801 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,801 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,801 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,801 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,807 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,807 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,807 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,812 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,817 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,823 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,828 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,834 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,839 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,839 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,845 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,853 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,853 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,858 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,858 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,859 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,859 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,859 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,859 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,859 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,859 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,859 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,859 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,864 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,864 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,865 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,870 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,870 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,870 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,875 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,881 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,881 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,881 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,886 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,886 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,892 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,892 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,892 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,892 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,892 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,892 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,892 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,892 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,892 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,892 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,893 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,893 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,893 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,893 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,893 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,893 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,893 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,898 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,898 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,898 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,898 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,899 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,904 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,904 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,904 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,904 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,904 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,904 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,904 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,904 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,905 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,905 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,905 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,905 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,905 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,910 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,915 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,916 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,916 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,916 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,916 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,916 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,916 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,916 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,916 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,916 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,916 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,916 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,916 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,922 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,922 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,922 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,922 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,922 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,922 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,922 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,922 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,922 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,922 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,928 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,928 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,928 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,938 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,944 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,944 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,944 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,947 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,947 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,948 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,948 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,948 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,948 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,948 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,948 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,948 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,948 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,948 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,948 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,948 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,949 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,949 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,949 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,949 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,949 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,949 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,949 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,949 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,949 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,949 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,949 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,949 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,950 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,950 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,950 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,950 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,950 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,950 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,950 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,950 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,950 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,951 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,951 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,951 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,951 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,951 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,951 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,951 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,951 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,951 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,951 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,951 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,951 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,951 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,952 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,952 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,952 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,952 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,952 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,952 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,952 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,952 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,952 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,952 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,952 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,953 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,953 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,953 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,953 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,953 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,953 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,953 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,953 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,953 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,953 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,954 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,954 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,954 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,954 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,954 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,954 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,954 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,954 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,954 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,954 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,954 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,954 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,955 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,955 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,955 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,955 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,955 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,955 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,955 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,955 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,955 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,955 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,955 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,956 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,956 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,956 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,956 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,956 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,956 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,956 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,956 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,956 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,956 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,956 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,956 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,957 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,957 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,957 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,957 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,957 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,957 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,957 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,957 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,957 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,957 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,957 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,957 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,957 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,958 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,958 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,958 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,958 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,958 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,958 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,958 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,958 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,958 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,958 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,958 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,959 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,959 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,959 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,959 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,959 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,959 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,959 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,959 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,959 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,959 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,959 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,959 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,960 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,960 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,960 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,960 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,960 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,960 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,960 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,960 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,960 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,960 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,961 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,961 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,961 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,961 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,961 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,961 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,961 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,961 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,961 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,961 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,961 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,961 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,961 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,961 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,961 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,961 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,962 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,962 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,962 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,962 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,962 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,962 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,962 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,962 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,962 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,962 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,962 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,962 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,962 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,962 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,962 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,962 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,963 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,963 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,963 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,963 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,963 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,963 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,963 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,963 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,963 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,963 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,963 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,963 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,963 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,963 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,963 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,963 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,964 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,964 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,964 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,964 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,964 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,964 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,964 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,964 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,964 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,964 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,964 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,964 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,964 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,964 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,964 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,964 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,964 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,965 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,965 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,965 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,965 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,965 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,965 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,965 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,965 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,965 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,965 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,965 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,965 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,965 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,965 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,965 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,965 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,966 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,966 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,966 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,966 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,966 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,966 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,966 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,966 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,966 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,966 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,966 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,966 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,966 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,966 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,966 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,967 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,967 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,967 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,967 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,967 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,967 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,967 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,967 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,967 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,967 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,967 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,967 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,967 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,967 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,967 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,967 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,968 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,968 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,968 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,968 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,968 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,968 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,968 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,968 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,968 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,968 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,968 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,968 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,968 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,968 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,968 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,969 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,969 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,969 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,969 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,969 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,969 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,969 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,969 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,969 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,969 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,969 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,969 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,969 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,969 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,969 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,969 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,970 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,970 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,970 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,970 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,970 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,970 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,970 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,970 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,970 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,970 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,970 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,970 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,970 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,970 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,971 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,971 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,971 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,971 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,971 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,971 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,971 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,971 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,971 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,971 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,971 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,971 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,971 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,971 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,971 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,971 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,972 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,972 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,972 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,972 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,972 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,972 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,972 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,972 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,972 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,972 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,972 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,972 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,972 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,972 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,972 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,972 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,973 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,973 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,973 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,973 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,973 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,973 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,973 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,973 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,973 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,973 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,973 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,973 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,973 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,973 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,973 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,973 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,974 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,974 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,974 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,974 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,974 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,974 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,974 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,974 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,974 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,974 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,974 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,974 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,974 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,974 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,974 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,974 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,974 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,975 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,975 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,975 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,975 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,975 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,975 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,975 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,975 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,975 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,975 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,975 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,975 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,975 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,975 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,975 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,975 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,976 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,976 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,976 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,976 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,976 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,976 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,976 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,976 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,976 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,976 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,976 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,976 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,976 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,976 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,976 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,976 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,977 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,977 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,977 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,977 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,977 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,977 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,977 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,977 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,977 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,977 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,977 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,977 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,977 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,977 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,977 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,977 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,978 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,978 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,978 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,978 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,978 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,978 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,978 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,978 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,978 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,978 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,978 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,978 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,978 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,978 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,978 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,978 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,979 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,979 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,979 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,979 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,979 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,979 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,979 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,979 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,979 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,979 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,979 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,979 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,979 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,979 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,979 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,979 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,980 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,980 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,980 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,980 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,980 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,980 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,980 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,980 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,980 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,980 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,980 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,980 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,980 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,980 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,980 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,980 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,980 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,981 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,981 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,981 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,981 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,981 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,981 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,981 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,981 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,981 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,981 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,981 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,981 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,981 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,981 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,981 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,982 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,982 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,982 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,982 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,982 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,982 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,982 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,982 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,982 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,982 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,982 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,982 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,982 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,982 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,982 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,982 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,983 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,983 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,983 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,983 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,983 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,983 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,983 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,983 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,983 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,983 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,983 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,983 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,983 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,983 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,983 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,983 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,984 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,984 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,984 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,984 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,984 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,984 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,984 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,984 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,984 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,984 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,984 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,984 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,984 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,984 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,984 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,985 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,985 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,985 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,985 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,985 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,985 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,985 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,985 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,985 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,985 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,985 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,985 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,985 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,985 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,985 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,985 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,986 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,986 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,986 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,986 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,986 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,986 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,986 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,986 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,986 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,986 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,986 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,986 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,986 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,986 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,986 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,986 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,987 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,987 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,987 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,987 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,987 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,987 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,987 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,987 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,987 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,987 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,987 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,987 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,987 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,987 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,987 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,987 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,988 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,988 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,988 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,988 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,988 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,988 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,988 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,988 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,988 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,988 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,988 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,988 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,988 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,988 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,988 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,988 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,989 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,989 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,989 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,989 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,989 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,989 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,989 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,989 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,989 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,989 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,989 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,989 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,989 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,989 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,989 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,990 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,990 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,990 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,990 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,990 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,990 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,990 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,990 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,990 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,990 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,990 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,990 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,990 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,990 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,990 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,991 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,991 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,991 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,991 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,991 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,991 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,991 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,991 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,991 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,991 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,991 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,991 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,991 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,991 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,991 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,991 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,991 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,992 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,992 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,992 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,992 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,992 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,992 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,992 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,992 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,992 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,992 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,992 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,992 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,992 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,992 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,992 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,993 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,993 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,993 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,993 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,993 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,993 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,993 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,993 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,993 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,993 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,993 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,993 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,993 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,993 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,993 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,993 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,994 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,994 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,994 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,994 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,994 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,994 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,994 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,994 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,994 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,994 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,994 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,994 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,994 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,994 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,994 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,995 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,995 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,995 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,995 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,995 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,995 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,995 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,995 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,995 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,995 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,995 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,995 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,995 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,995 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,995 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,995 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,996 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,996 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,996 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,996 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,996 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,996 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,996 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,996 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,996 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,996 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,996 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,996 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,996 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,996 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,996 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,996 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,997 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,997 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,997 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,997 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,997 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,997 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,997 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,997 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,997 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,997 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,997 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,997 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,997 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,997 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,997 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,997 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,997 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,998 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,998 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,998 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,998 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,998 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,998 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,998 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,998 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,998 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,998 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,998 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,998 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,998 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,998 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,998 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,998 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,998 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,999 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,999 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,999 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,999 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,999 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,999 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,999 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,999 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,999 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,999 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,999 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,999 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,999 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,999 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,999 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:44,999 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,000 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,000 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,000 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,000 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,000 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,000 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,000 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,000 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,000 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,000 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,000 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,000 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,000 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,000 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,000 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,000 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,000 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,001 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,001 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,001 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,001 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,001 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,001 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,001 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,001 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,001 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,001 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,001 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,001 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,001 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,001 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,001 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,001 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,001 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,002 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,002 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,002 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,002 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,002 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,002 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,002 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,002 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,002 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,002 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,002 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,002 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,002 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,002 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,002 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,002 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,003 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,003 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,003 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,003 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,003 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,003 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,003 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,003 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,003 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,003 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,003 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,003 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,003 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,003 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,003 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,003 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,004 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,004 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,004 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,004 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,004 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,004 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,004 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,004 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,004 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,004 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,004 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,004 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,004 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,004 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,004 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,004 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,004 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,005 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,005 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,005 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,005 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,005 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,005 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,005 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,005 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,005 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,005 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,005 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,005 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,005 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,005 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,005 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,005 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,006 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,006 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,006 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,006 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,006 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,006 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,006 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,006 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,006 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,006 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,006 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,006 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,006 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,006 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,006 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,007 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,007 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,007 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,007 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,007 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,007 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,007 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,007 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,007 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,007 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,007 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,007 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,007 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,007 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,007 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,007 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,007 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,008 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,008 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,008 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,008 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,008 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,008 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,008 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,008 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,008 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,008 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,008 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,008 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,008 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,008 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,008 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,008 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,008 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,009 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,009 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,009 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,009 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,009 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,009 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,009 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,009 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,009 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,009 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,009 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,009 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,009 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,009 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,009 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,010 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,010 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,010 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,010 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,010 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,010 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,010 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,010 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,010 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,010 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,010 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,010 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,010 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,010 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,011 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,011 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,011 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,011 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,011 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,011 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,011 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,011 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,011 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,011 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,011 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,011 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,011 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,011 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,011 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,011 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,012 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,012 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,012 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,012 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,012 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,012 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,012 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,012 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,012 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,012 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,012 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,012 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,012 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,012 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,012 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,012 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,013 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,013 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,013 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,013 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,013 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,013 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,013 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,013 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,013 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,013 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,013 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,013 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,013 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,013 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,014 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,014 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,014 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,014 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,014 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,014 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,014 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,014 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,014 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,014 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,014 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,014 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,014 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,014 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,014 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,014 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,015 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,015 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,015 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,015 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,015 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,015 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,015 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,015 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,015 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,015 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,015 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,015 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,015 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,015 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,015 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,015 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,016 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,016 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,016 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,016 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,016 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,016 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,016 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,016 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,016 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,016 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,016 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,016 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,016 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,016 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,016 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,017 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,017 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,017 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,017 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,017 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,017 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,017 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,017 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,017 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,017 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,017 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,017 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,017 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,017 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,017 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,017 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,017 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,018 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,018 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,018 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,018 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,018 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,018 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,018 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,018 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,018 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,018 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,018 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,018 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,018 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,018 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,018 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,018 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,019 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,019 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,019 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,019 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,019 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,019 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,019 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,019 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,019 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,019 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,019 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,019 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,019 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,019 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,019 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,019 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,019 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,020 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,020 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,020 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,020 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,020 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,020 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,020 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,020 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,020 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,020 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,020 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,020 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,020 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,020 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,020 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,020 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,020 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,021 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,021 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,021 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,021 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,021 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,021 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,021 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,021 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,021 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,021 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,021 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,021 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,021 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,021 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,021 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,021 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,022 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,022 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,022 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,022 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,022 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,022 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,022 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,022 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,022 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,022 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,022 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,022 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,022 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,022 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,022 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,023 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,023 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,023 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,023 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,023 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,023 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,023 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,023 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,023 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,023 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,023 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,023 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,023 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,023 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,023 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,023 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,024 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,024 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,024 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,024 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,024 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,024 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,024 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,024 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,024 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,024 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,024 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,024 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,024 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,024 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,024 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,024 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,025 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,025 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,025 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,025 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,025 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,025 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,025 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,025 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,025 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,025 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,025 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,025 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,025 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,025 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,025 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,025 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,026 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,026 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,026 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,026 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,026 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,026 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,026 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,026 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,026 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,026 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,026 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,026 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,026 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,026 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,026 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,027 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,027 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,027 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,027 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,027 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,027 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,027 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,027 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,027 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,027 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,027 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,027 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,027 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,027 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,027 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,028 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,028 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,028 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,028 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,028 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,028 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,028 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,028 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,028 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,028 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,028 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,028 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,028 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,028 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,028 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,028 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,028 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,029 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,029 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,029 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,029 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,029 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,029 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,029 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,029 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,029 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,029 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,029 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,029 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,029 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,029 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,029 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,029 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,029 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,030 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,030 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,030 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,030 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,030 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,030 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,030 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,030 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,030 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,030 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,030 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,030 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,030 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,030 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,030 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,031 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,031 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,031 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,031 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,031 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,031 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,031 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,031 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,031 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,031 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,031 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,031 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,031 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,031 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,031 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,031 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,031 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,032 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,032 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,032 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,032 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,032 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,032 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,032 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,032 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,032 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,032 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,032 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,032 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,032 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,032 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,032 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,032 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,033 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,033 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,033 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,033 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,033 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,033 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,033 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,033 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,033 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,033 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,033 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,033 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,033 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,033 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,033 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,033 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,033 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,034 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,034 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,034 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,034 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,034 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,034 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,034 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,034 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,034 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,034 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,034 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,034 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,034 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,034 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,034 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,034 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,035 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,035 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,035 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,035 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,035 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,035 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,035 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,035 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,035 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,035 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,035 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,035 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,035 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,035 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,036 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,036 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,036 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,036 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,036 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,036 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,036 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,036 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,036 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,036 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,036 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,036 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,036 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,036 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,036 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,036 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,037 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,037 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,037 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,037 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,037 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,037 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,037 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,037 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,037 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,037 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,037 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,037 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,037 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,037 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,037 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,037 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,037 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,038 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,038 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,038 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,038 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,038 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,038 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,038 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,038 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,038 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,038 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,038 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,038 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,038 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,038 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,038 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,039 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,039 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,039 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,039 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,039 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,039 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,039 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,039 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,039 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,039 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,039 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,039 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,039 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,039 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,039 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,040 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,040 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,040 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,040 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,040 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,040 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,040 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,040 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,040 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,040 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,040 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,040 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,040 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,040 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,040 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,040 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,041 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,041 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,041 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,041 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,041 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,041 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,041 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,041 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,041 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,041 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,041 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,041 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,041 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,041 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,041 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,041 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,041 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,042 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,042 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,042 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,042 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,042 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,042 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,042 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,042 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,042 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,042 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,042 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,042 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,042 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,042 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,042 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,043 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,043 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,043 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,043 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,043 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,043 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,043 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,043 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,043 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,043 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,043 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,043 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,043 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,043 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,043 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,043 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,043 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,044 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,044 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,044 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,044 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,044 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,044 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,044 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,044 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,044 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,044 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,044 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,044 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,044 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,044 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,044 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,044 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,045 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,045 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,045 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,045 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,045 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,045 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,045 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,045 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,045 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,045 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,045 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,045 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,045 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,045 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,045 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,045 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,046 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,046 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,046 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,046 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,046 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,046 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,046 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,046 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,046 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,046 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,046 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,046 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,046 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,046 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,046 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,047 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,047 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,047 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,047 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,047 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,047 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,047 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,047 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,047 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,047 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,047 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,047 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,047 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,047 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,047 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,047 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,047 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,048 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,048 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,048 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,048 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,048 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,048 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,048 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,048 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,048 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,048 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,048 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,048 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,048 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,048 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,048 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,048 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,049 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,049 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,049 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,049 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,049 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,049 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,049 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,049 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,049 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,049 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,049 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,049 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,049 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,049 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,049 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,050 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,050 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,050 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,050 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,050 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,050 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,050 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,050 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,050 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,050 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,050 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,050 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,050 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,050 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,050 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,050 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,051 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,051 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,051 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,051 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,051 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,051 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,051 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,051 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,051 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,051 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,051 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,051 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,051 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,051 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,051 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,051 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,052 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,052 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,052 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,052 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,052 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,052 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,052 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,052 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,052 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,052 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,052 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,052 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,052 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,052 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,052 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,052 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,052 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,053 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,053 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,053 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,053 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,053 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,053 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,053 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,053 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,053 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,053 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,053 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,053 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,053 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,053 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,053 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,053 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,054 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,054 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,054 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,054 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,054 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,054 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,054 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,054 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,054 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,054 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,054 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,054 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,054 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,054 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,054 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,055 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,055 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,055 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,055 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,055 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,055 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,055 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,055 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,055 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,055 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,055 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,055 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,055 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,055 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,055 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,055 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,056 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,056 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,056 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,056 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,056 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,056 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,056 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,056 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,056 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,056 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,056 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,056 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,056 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,056 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,057 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,057 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,057 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,057 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,057 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,057 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,057 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,057 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,057 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,057 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,057 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,057 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,057 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,057 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,057 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,057 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,058 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,058 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,058 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,058 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,058 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,058 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,058 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,058 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,058 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,058 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,058 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,058 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,058 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,058 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,058 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,058 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,059 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,059 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,059 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,059 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,059 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,059 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,059 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,059 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,059 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,059 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,059 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,059 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,059 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,059 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,059 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,060 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,060 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,060 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,060 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,060 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,060 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,060 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,060 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,060 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,060 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,060 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,060 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,060 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,060 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,060 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,060 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,060 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,061 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,061 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,061 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,061 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,061 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,061 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,061 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,061 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,061 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,061 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,061 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,061 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,061 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,061 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,061 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,062 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,062 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,062 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,062 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,062 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,062 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,062 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,062 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,062 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,062 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,062 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,062 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,062 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,062 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,063 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,063 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,063 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,063 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,063 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,063 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,063 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,063 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,063 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,063 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,063 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,063 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,063 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,063 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,064 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,064 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,064 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,064 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,064 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,064 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,064 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,064 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,064 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,064 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,064 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,064 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,064 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,064 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,064 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,064 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,065 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,065 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,065 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,065 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,065 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,065 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,065 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,065 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,065 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,065 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,065 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,065 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,065 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,065 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,065 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,065 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,066 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,066 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,066 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,066 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,066 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,066 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,066 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,066 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,066 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,066 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,066 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,066 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,066 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,066 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,066 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,066 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,067 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,067 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,067 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,067 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,067 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,067 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,067 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,067 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,067 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,067 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,067 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,067 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,067 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,067 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,067 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,067 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,068 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,068 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,068 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,068 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,068 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,068 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,068 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,068 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,068 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,068 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,068 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,068 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,068 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,068 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,068 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,068 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,068 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,069 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,069 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,069 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,069 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,069 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,069 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,069 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,069 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,069 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,069 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,069 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,069 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,069 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,069 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,069 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,069 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,069 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,070 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,070 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,070 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,070 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,070 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,070 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,070 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,070 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,070 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,070 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,070 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,070 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,070 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,070 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,070 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,070 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,070 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,071 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,071 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,071 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,071 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,071 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,071 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,071 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,071 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,071 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,071 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,071 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,071 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,071 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,071 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,071 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,071 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,072 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,072 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,072 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,072 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,072 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,072 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,072 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,072 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,072 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,072 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,072 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,072 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,072 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,072 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,072 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,072 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,072 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,073 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,073 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,073 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,073 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,073 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,073 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,073 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,073 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,073 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,073 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,073 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,073 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,073 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,073 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,073 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,073 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,073 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,073 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,074 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,074 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,074 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,074 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,074 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,074 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,074 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,074 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,074 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,074 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,074 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,074 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,074 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,074 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,074 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,074 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,075 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,075 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,075 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,075 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,075 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,075 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,075 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,075 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,075 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,075 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,075 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,075 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,075 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,075 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,075 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,075 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,075 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,076 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,076 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,076 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,076 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,076 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,076 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,076 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,076 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,076 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,076 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,076 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,076 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,076 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,076 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,076 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,076 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,077 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,077 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,077 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,077 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,077 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,077 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,077 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,077 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,077 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,077 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,077 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,077 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,077 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,077 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,077 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,077 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,078 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,078 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,078 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,078 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,078 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,078 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,078 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,078 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,078 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,078 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,078 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,078 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,078 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,078 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,078 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,079 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,079 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,079 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,079 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,079 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,079 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,079 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,079 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,079 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,079 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,079 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,079 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,079 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,079 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,079 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,079 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,079 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,080 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,080 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,080 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,080 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,080 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,080 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,080 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,080 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,080 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,080 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,080 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,080 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,080 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,080 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,080 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,080 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,081 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,081 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,081 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,081 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,081 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,081 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,081 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,081 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,081 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,081 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,081 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,081 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,081 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,081 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,081 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,081 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,081 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,082 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,082 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,082 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,082 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,082 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,082 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,082 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,082 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,082 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,082 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,082 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,082 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,082 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,082 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,082 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,083 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,083 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,083 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,083 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,083 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,083 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,083 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,083 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,083 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,083 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,083 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,083 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,083 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,083 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,083 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,083 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,083 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,084 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,084 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,084 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,084 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,084 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,084 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,084 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,084 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,084 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,084 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,084 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,084 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,084 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,084 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,084 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,084 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,084 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,085 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,085 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,085 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,085 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,085 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,085 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,085 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,085 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,085 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,085 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,085 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,085 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,085 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,085 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,085 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,085 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,086 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,086 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,086 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,086 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,086 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,086 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,086 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,086 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,086 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,086 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,086 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,086 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,086 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,086 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,086 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,086 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,087 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,087 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,087 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,087 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,087 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,087 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,087 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,087 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,087 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,087 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,087 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,087 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,087 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,087 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,087 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,087 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,087 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,088 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,088 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,088 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,088 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,088 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,088 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,088 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,088 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,088 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,088 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,088 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,088 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,088 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,088 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,088 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,088 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,089 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,089 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,089 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,089 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,089 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,089 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,089 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,089 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,089 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,089 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,089 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,089 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,089 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,089 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,089 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,089 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,089 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,090 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,090 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,090 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,090 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,090 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,090 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,090 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,090 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,090 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,090 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,090 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,090 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,090 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,090 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,090 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,090 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,090 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,091 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,091 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,091 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,091 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,091 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,091 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,091 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,091 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,091 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,091 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,091 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,091 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,091 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,091 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,091 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,091 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,091 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,091 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,092 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,092 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,092 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,092 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,092 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,092 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,092 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,092 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,092 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,092 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,092 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,092 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,092 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,092 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,092 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,092 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,092 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,093 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,093 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,093 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,093 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,093 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,093 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,093 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,093 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,093 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,093 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,093 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,093 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,093 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,093 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,093 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,093 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,093 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,093 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,094 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,094 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,094 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,094 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,094 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,094 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,094 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,094 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,094 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,094 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,094 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,094 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,094 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,094 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,094 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,094 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,094 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,095 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,095 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,095 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,095 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,095 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,095 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,095 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,095 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,095 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,095 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,095 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,095 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,095 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,095 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,095 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,095 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,096 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,096 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,096 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,096 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,096 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,096 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,096 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,096 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,096 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,096 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,096 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,096 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,096 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,096 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,096 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,096 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,097 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,097 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,097 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,097 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,097 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,097 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,097 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,097 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,097 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,097 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,097 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,097 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,097 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,097 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,097 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,097 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,097 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,097 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,097 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,098 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,098 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,098 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,098 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,098 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,098 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,098 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,098 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,098 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,098 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,098 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,098 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,098 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,098 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,098 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,098 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,098 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,099 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,099 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,099 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,099 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,099 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,099 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,099 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,099 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,099 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,099 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,099 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,099 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,099 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,099 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,099 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,099 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,100 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,100 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,100 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,100 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,100 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,100 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,100 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,100 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,100 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,100 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,100 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,100 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,100 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,100 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,100 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,100 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,100 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,100 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,101 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,101 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,101 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,101 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,101 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,101 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,101 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,101 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,101 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,101 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,101 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,101 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,101 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,101 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,101 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,102 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,102 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,102 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,102 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,102 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,102 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,102 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,102 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,102 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,102 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,102 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,102 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,102 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,102 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,102 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,102 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,103 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,103 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,103 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,103 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,103 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,103 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,103 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,103 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,103 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,103 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,103 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,103 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,103 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,103 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,103 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,103 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,103 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,104 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,104 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,104 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,104 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,104 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,104 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,104 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,104 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,104 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,104 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,104 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,104 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,104 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,104 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,104 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,104 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,104 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,105 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,105 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,105 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,105 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,105 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,105 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,105 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,105 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,105 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,105 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,105 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,105 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,105 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,105 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,105 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,105 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,106 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,106 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,106 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,106 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,106 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,106 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,106 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,106 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,106 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,106 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,106 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,106 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,106 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,106 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,106 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,106 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,107 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,107 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,107 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,107 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,107 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,107 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,107 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,107 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,107 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,107 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,107 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,107 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,107 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,107 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,107 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,107 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,108 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,108 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,108 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,108 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,108 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,108 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,108 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,108 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,108 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,108 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,108 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,108 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,108 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,108 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,108 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,108 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,108 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,109 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,109 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,109 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,109 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,109 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,109 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,109 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,109 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,109 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,109 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,109 DEBUG SenderThread:266499 [sender.py:send():235] send: metric +2022-03-02 22:30:45,109 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:30:45,199 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:30:45,286 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:30:45,287 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:30:47,287 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:30:49,288 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:30:51,289 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:30:53,290 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:30:54,446 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:30:54,447 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:30:54,819 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:30:54,865 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:30:54,958 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:30:55,291 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:30:55,291 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:30:57,291 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:30:59,292 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:31:01,293 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:31:03,293 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:31:04,757 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:31:04,812 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:31:04,904 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:31:05,294 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:31:07,295 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:31:08,365 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 22:31:09,295 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:31:09,503 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:31:09,503 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:31:11,347 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:31:11,348 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/config.yaml +2022-03-02 22:31:13,348 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:31:14,619 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:31:14,693 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:31:14,785 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:31:15,349 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:31:15,349 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:31:16,349 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:31:20,351 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:31:22,351 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:31:24,352 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:31:24,430 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:31:24,482 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:31:24,568 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:31:24,718 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:31:24,719 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:31:25,352 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:31:26,353 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:31:28,354 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:31:30,354 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:31:34,217 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:31:34,276 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:31:34,367 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:31:34,369 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:31:35,367 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:31:36,368 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:31:38,368 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:31:38,886 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 22:31:39,892 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:31:39,892 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:31:40,369 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:31:42,370 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:31:43,780 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:31:43,853 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:31:43,938 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:31:44,371 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:31:44,371 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:31:46,371 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:31:48,372 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:31:50,373 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:31:52,373 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:31:53,278 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:31:53,331 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:31:53,415 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:31:54,413 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:31:54,414 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:31:54,967 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:31:54,968 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:31:56,414 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:31:59,415 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:32:02,645 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:32:02,698 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:32:02,782 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:32:03,417 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:32:03,417 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:32:04,417 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:32:05,417 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:32:07,418 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:32:09,329 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 22:32:09,419 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:32:10,110 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:32:10,112 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:32:11,419 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:32:11,873 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:32:11,927 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:32:12,018 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:32:12,420 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:32:12,420 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:32:13,420 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:32:15,421 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:32:17,422 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:32:19,422 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:32:20,868 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:32:20,922 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:32:21,010 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:32:21,423 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:32:21,423 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:32:22,423 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:32:23,424 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:32:25,439 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:32:25,440 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:32:26,425 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:32:28,426 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:32:29,672 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:32:29,726 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:32:29,819 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:32:30,426 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:32:30,427 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:32:32,427 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:32:34,428 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:32:36,428 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:32:38,241 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:32:38,295 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:32:38,385 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:32:38,429 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:32:38,429 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:32:39,836 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 22:32:40,430 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:32:40,657 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:32:40,657 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:32:41,430 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:32:42,431 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:32:44,431 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:32:46,432 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:32:46,748 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:32:46,800 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:32:46,890 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:32:47,432 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:32:48,433 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:32:49,433 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:32:50,433 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:32:52,434 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:32:54,435 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:32:54,871 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:32:54,927 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:32:55,015 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:32:55,435 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:32:55,826 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:32:55,828 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:32:56,436 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:32:57,436 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:32:58,437 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:33:00,437 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:33:02,438 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:33:02,617 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:33:02,670 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:33:02,755 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:33:03,438 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:33:03,439 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:33:04,439 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:33:06,440 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:33:08,440 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:33:09,946 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:33:09,998 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:33:10,088 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:33:10,329 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 22:33:10,441 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:33:10,442 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:33:10,888 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:33:10,889 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:33:11,442 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:33:12,442 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:33:14,443 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:33:16,443 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:33:16,924 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:33:16,978 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:33:17,070 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:33:17,444 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:33:18,444 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:33:20,445 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:33:22,446 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:33:23,442 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:33:23,495 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:33:23,579 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:33:24,496 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:33:24,496 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:33:25,933 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:33:25,934 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:33:26,496 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:33:28,497 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:33:29,302 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:33:29,355 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:33:29,440 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:33:29,497 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:33:30,498 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:33:32,498 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:33:34,499 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:33:34,603 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:33:34,659 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:33:34,775 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:33:35,499 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:33:36,500 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:33:38,500 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:33:39,363 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:33:39,418 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:33:39,536 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:33:40,534 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:33:40,534 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:33:40,897 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 22:33:41,006 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:33:41,007 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:33:42,534 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:33:43,541 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:33:43,591 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:33:43,683 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:33:44,573 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:33:44,574 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:33:46,574 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:33:47,209 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:33:47,291 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:33:47,379 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:33:47,574 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:33:48,575 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:33:50,419 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:33:50,472 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:33:50,577 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:33:50,579 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:33:51,578 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:33:52,578 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:33:53,654 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:33:53,828 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:33:53,910 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:33:54,579 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:33:55,579 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:33:56,120 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:33:56,121 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:33:56,580 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:33:59,581 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:34:01,582 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:34:05,583 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:34:05,938 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:34:06,016 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:34:06,099 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:34:06,583 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:34:06,584 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:34:07,584 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:34:11,166 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:34:11,167 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:34:11,494 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 22:34:11,585 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:34:13,586 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:34:15,587 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:34:17,815 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:34:17,868 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:34:17,957 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:34:18,588 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:34:18,588 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:34:19,589 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:34:21,589 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:34:25,591 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:34:26,220 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:34:26,221 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:34:28,592 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:34:29,639 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:34:29,690 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:34:29,778 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:34:30,593 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:34:30,593 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:34:32,594 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:34:34,594 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:34:36,595 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:34:40,597 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:34:41,286 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:34:41,287 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:34:41,378 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:34:41,430 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:34:41,512 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:34:41,597 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:34:41,961 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 22:34:42,597 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:34:43,598 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:34:46,599 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:34:48,600 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:34:50,600 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:34:52,808 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:34:52,860 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:34:52,940 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:34:53,601 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:34:55,602 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:34:56,445 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:34:56,446 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:34:57,603 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:34:59,603 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:35:03,605 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:35:04,136 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:35:04,186 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:35:04,272 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:35:04,605 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:35:05,605 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:35:09,607 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:35:11,607 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:35:11,619 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:35:11,620 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:35:12,421 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 22:35:13,608 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:35:15,366 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:35:15,418 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:35:15,501 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:35:15,609 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:35:17,609 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:35:19,610 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:35:21,611 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:35:26,612 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:35:26,661 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:35:26,662 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:35:26,763 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:35:26,763 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:35:26,854 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:35:27,613 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:35:27,613 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:35:28,613 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:35:30,614 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:35:34,615 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:35:36,616 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:35:37,814 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:35:37,891 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:35:37,983 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:35:38,617 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:35:40,617 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:35:42,096 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:35:42,097 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:35:42,618 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:35:42,864 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 22:35:44,619 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:35:48,620 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:35:48,905 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:35:48,957 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:35:49,077 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:35:49,621 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:35:50,621 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:35:52,622 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:35:54,623 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:35:57,267 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:35:57,269 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:35:58,624 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:35:59,805 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:35:59,856 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:35:59,947 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:36:00,625 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:36:01,625 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:36:02,625 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:36:03,626 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:36:07,627 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:36:09,628 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:36:10,564 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:36:10,616 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:36:10,698 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:36:11,697 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:36:11,697 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:36:12,430 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:36:12,431 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:36:12,697 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:36:13,456 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 22:36:13,697 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:36:15,698 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:36:17,699 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:36:19,699 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:36:21,398 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:36:21,450 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:36:21,530 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:36:21,700 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:36:22,700 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:36:23,701 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:36:25,701 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:36:27,667 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:36:27,669 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:36:27,702 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:36:31,703 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:36:32,100 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:36:32,151 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:36:32,253 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:36:32,704 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:36:32,704 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:36:33,704 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:36:35,705 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:36:39,706 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:36:41,707 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:36:42,763 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:36:42,831 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:36:42,949 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:36:43,005 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:36:43,006 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:36:43,707 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:36:43,886 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 22:36:44,708 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:36:46,709 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:36:50,710 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:36:52,711 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:36:53,254 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:36:53,306 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:36:53,429 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:36:53,711 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:36:54,711 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:36:55,712 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:36:58,244 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:36:58,245 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:36:58,713 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:37:00,713 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:37:02,714 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:37:03,871 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:37:03,923 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:37:04,005 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:37:04,715 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:37:04,715 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:37:05,715 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:37:06,715 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:37:08,716 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:37:10,717 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:37:12,718 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:37:13,289 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:37:13,290 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:37:14,303 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:37:14,360 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:37:14,447 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:37:14,484 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 22:37:14,718 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:37:15,719 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:37:16,719 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:37:18,719 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:37:20,720 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:37:22,721 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:37:24,881 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:37:24,934 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:37:25,021 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:37:25,722 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:37:26,722 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:37:27,723 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:37:28,493 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:37:28,494 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:37:28,723 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:37:31,724 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:37:35,359 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:37:35,438 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:37:35,520 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:37:35,726 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:37:35,726 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:37:37,726 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:37:39,727 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:37:41,728 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:37:43,657 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:37:43,658 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:37:44,892 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 22:37:45,729 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:37:45,842 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:37:45,895 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:37:45,985 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:37:46,730 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:37:47,730 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:37:48,731 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:37:49,731 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:37:51,732 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:37:55,733 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:37:56,233 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:37:56,286 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:37:56,373 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:37:56,734 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:37:57,734 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:37:58,734 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:37:59,146 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:37:59,147 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:37:59,735 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:38:02,736 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:38:06,536 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:38:06,590 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:38:06,677 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:38:06,737 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:38:06,738 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:38:08,738 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:38:10,739 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:38:12,740 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:38:14,593 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:38:14,595 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:38:15,318 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 22:38:16,741 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:38:16,862 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:38:16,915 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:38:17,000 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:38:17,742 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:38:18,742 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:38:20,743 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:38:22,743 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:38:26,745 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:38:27,507 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:38:27,696 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:38:27,787 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:38:27,793 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:38:28,787 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:38:28,787 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:38:29,816 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:38:29,817 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:38:30,788 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:38:33,789 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:38:37,529 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:38:37,586 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:38:37,691 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:38:37,790 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:38:37,791 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:38:38,791 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:38:39,791 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:38:41,792 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:38:43,793 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:38:44,872 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:38:44,873 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:38:45,793 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:38:46,050 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 22:38:47,416 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:38:47,471 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:38:47,567 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:38:47,794 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:38:48,794 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:38:49,795 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:38:51,795 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:38:53,796 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:38:55,796 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:38:57,314 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:38:57,369 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:38:57,460 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:38:57,797 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:38:57,797 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:38:59,798 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:39:00,154 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:39:00,156 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:39:01,799 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:39:04,800 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:39:06,801 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:39:06,988 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:39:07,062 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:39:07,146 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:39:07,801 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:39:08,802 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:39:09,802 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:39:10,802 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:39:12,803 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:39:15,371 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:39:15,373 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:39:16,478 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:39:16,533 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:39:16,627 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:39:16,630 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 22:39:16,805 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:39:16,805 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:39:17,805 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:39:18,805 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:39:20,806 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:39:22,807 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:39:24,808 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:39:26,054 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:39:26,108 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:39:26,191 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:39:26,808 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:39:26,809 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:39:27,809 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:39:28,809 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:39:30,417 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:39:30,419 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:39:32,811 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:39:34,811 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:39:35,347 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:39:35,400 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:39:35,492 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:39:35,812 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:39:36,812 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:39:37,812 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:39:38,813 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:39:41,814 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:39:43,815 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:39:44,635 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:39:44,687 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:39:44,777 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:39:44,815 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:39:45,577 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:39:45,578 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:39:45,815 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:39:47,053 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 22:39:47,816 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:39:49,817 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:39:51,817 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:39:53,783 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:39:53,833 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:39:53,926 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:39:54,860 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:39:55,860 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:39:57,861 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:39:59,862 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:40:00,741 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:40:00,743 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:40:01,862 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:40:02,669 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:40:02,724 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:40:02,810 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:40:02,863 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:40:03,863 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:40:05,864 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:40:08,865 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:40:10,866 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:40:11,565 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:40:11,617 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:40:11,708 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:40:11,866 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:40:12,867 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:40:15,805 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:40:15,806 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:40:16,868 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:40:17,507 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 22:40:18,869 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:40:20,257 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:40:20,313 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:40:20,408 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:40:20,870 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:40:22,871 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:40:24,872 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:40:26,873 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:40:28,704 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:40:28,761 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:40:28,849 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:40:28,874 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:40:28,874 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:40:30,874 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:40:30,880 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:40:30,881 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:40:32,875 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:40:34,876 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:40:36,877 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:40:37,053 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:40:37,115 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:40:37,206 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:40:37,877 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:40:38,877 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:40:40,878 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:40:42,879 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:40:44,765 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:40:44,821 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:40:44,908 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:40:44,910 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:40:45,908 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:40:46,005 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:40:46,006 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:40:46,909 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:40:47,975 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 22:40:48,909 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:40:50,910 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:40:52,017 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:40:52,088 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:40:52,176 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:40:52,911 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:40:52,911 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:40:54,912 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:40:56,912 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:40:58,746 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:40:58,807 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:40:58,897 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:40:58,913 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:40:58,913 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:41:00,914 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:41:01,064 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:41:01,065 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:41:02,915 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:41:04,915 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:41:04,958 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:41:05,011 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:41:05,096 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:41:05,916 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:41:06,916 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:41:08,917 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:41:10,625 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:41:10,679 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:41:10,771 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:41:10,917 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:41:10,918 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:41:12,918 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:41:14,919 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:41:15,708 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:41:15,761 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:41:15,871 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:41:15,919 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:41:16,170 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:41:16,171 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:41:16,919 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:41:17,920 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:41:18,630 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 22:41:19,920 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:41:20,485 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:41:20,541 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:41:20,632 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:41:20,921 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:41:21,921 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:41:22,922 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:41:23,922 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:41:24,713 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:41:24,768 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:41:24,858 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:41:24,923 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:41:25,923 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:41:26,923 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:41:27,924 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:41:28,440 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:41:28,498 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:41:28,591 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:41:28,924 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:41:29,924 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:41:30,925 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:41:31,408 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:41:31,409 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:41:31,676 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:41:31,730 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:41:31,822 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:41:31,925 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:41:31,926 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:41:32,926 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:41:33,926 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:41:34,928 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:41:35,097 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:41:35,183 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:41:35,962 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:41:35,962 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:41:36,962 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:41:39,964 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:41:41,964 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:41:45,966 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:41:46,477 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:41:46,478 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:41:46,946 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:41:46,999 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:41:47,082 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:41:48,000 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:41:48,000 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:41:49,000 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:41:49,342 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 22:41:52,001 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:41:54,002 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:41:58,003 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:41:58,677 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:41:58,729 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:41:58,816 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:41:59,004 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:42:00,004 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:42:01,005 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:42:01,528 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:42:01,529 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:42:02,005 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:42:06,007 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:42:08,008 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:42:10,372 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:42:10,432 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:42:10,543 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:42:11,009 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:42:11,009 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:42:13,010 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:42:15,010 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:42:16,930 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:42:16,931 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:42:19,012 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:42:19,779 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 22:42:21,012 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:42:21,904 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:42:21,955 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:42:22,060 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:42:23,059 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:42:23,059 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:42:24,060 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:42:27,061 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:42:29,062 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:42:32,328 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:42:32,328 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:42:33,063 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:42:33,172 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:42:33,223 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:42:33,348 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:42:34,063 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:42:35,064 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:42:36,064 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:42:37,064 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:42:41,066 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:42:43,067 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:42:44,697 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:42:44,749 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:42:44,836 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:42:45,067 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:42:45,067 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:42:46,068 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:42:47,452 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:42:47,453 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:42:49,069 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:42:50,232 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 22:42:51,069 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:42:53,070 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:42:55,781 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:42:55,833 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:42:55,916 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:42:56,071 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:42:58,072 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:43:00,073 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:43:02,697 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:43:02,699 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:43:04,074 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:43:06,075 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:43:06,989 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:43:07,042 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:43:07,127 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:43:08,126 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:43:08,126 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:43:12,128 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:43:14,129 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:43:16,129 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:43:17,749 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:43:17,750 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:43:18,004 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:43:18,083 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:43:18,168 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:43:19,167 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:43:20,167 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:43:20,626 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 22:43:22,168 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:43:24,168 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:43:28,170 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:43:29,121 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:43:29,177 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:43:29,261 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:43:30,204 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:43:30,204 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:43:31,204 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:43:32,926 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:43:32,928 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:43:33,205 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:43:37,207 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:43:39,208 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:43:40,052 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:43:40,106 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:43:40,194 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:43:40,208 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:43:43,209 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:43:45,210 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:43:47,211 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:43:48,054 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:43:48,056 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:43:49,211 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:43:50,968 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:43:51,033 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:43:51,119 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:43:51,155 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 22:43:51,212 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:43:53,213 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:43:55,213 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:43:57,214 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:44:01,215 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:44:01,845 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:44:01,895 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:44:01,979 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:44:02,215 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:44:03,140 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:44:03,142 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:44:03,216 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:44:06,217 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:44:10,218 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:44:12,219 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:44:12,577 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:44:12,653 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:44:12,736 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:44:13,219 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:44:14,220 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:44:15,220 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:44:16,220 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:44:18,428 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:44:18,429 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:44:20,222 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:44:21,573 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 22:44:22,222 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:44:23,318 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:44:23,368 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:44:23,452 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:44:24,223 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:44:24,223 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:44:25,223 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:44:26,224 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:44:28,224 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:44:30,225 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:44:32,226 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:44:33,661 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:44:33,662 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:44:34,156 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:44:34,211 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:44:34,302 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:44:35,296 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:44:35,297 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:44:36,297 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:44:38,297 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:44:40,298 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:44:44,672 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:44:44,726 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:44:44,817 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:44:45,300 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:44:45,300 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:44:47,300 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:44:48,727 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:44:48,728 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:44:49,301 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:44:51,302 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:44:51,989 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 22:44:55,270 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:44:55,321 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:44:55,332 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:44:55,408 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:44:56,322 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:44:57,322 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:44:58,323 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:44:59,323 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:45:01,324 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:45:03,846 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:45:03,847 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:45:05,325 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:45:05,812 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:45:05,865 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:45:05,947 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:45:06,325 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:45:07,326 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:45:08,326 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:45:09,326 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:45:12,328 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:45:16,329 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:45:16,415 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:45:16,470 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:45:16,553 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:45:17,329 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:45:18,330 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:45:19,380 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:45:19,382 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:45:20,330 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:45:22,331 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:45:22,394 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 22:45:26,332 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:45:26,699 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:45:26,753 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:45:26,843 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:45:27,333 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:45:28,333 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:45:30,334 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:45:32,335 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:45:34,564 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:45:34,565 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:45:36,336 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:45:37,044 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:45:37,128 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:45:37,212 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:45:37,336 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:45:38,337 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:45:40,338 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:45:45,339 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:45:47,347 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:45:47,388 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:45:47,402 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:45:47,490 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:45:48,379 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:45:48,379 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:45:49,379 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:45:49,716 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:45:49,717 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:45:51,380 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:45:52,819 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 22:45:55,381 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:45:57,382 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:45:57,396 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:45:57,470 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:45:57,553 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:45:58,383 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:45:58,383 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:45:59,383 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:46:01,384 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:46:04,768 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:46:04,769 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:46:05,385 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:46:07,386 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:46:07,965 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:46:08,137 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:46:08,217 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:46:08,387 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:46:09,387 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:46:11,388 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:46:13,389 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:46:17,390 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:46:17,898 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:46:17,949 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:46:18,037 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:46:18,390 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:46:19,391 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:46:19,876 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:46:19,877 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:46:21,392 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:46:23,230 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 22:46:23,392 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:46:27,755 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:46:27,809 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:46:27,928 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:46:28,394 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:46:28,394 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:46:29,395 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:46:30,395 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:46:32,396 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:46:34,396 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:46:34,950 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:46:34,951 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:46:36,397 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:46:37,637 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:46:37,691 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:46:37,776 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:46:38,398 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:46:38,398 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:46:39,398 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:46:40,399 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:46:42,399 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:46:44,400 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:46:46,401 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:46:47,307 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:46:47,360 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:46:47,444 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:46:48,443 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:46:48,443 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:46:49,443 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:46:50,265 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:46:50,266 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:46:50,444 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:46:53,711 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 22:46:54,445 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:46:56,446 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:46:56,849 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:46:56,906 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:46:56,991 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:46:57,446 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:46:58,447 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:46:59,447 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:47:00,448 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:47:02,448 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:47:04,449 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:47:05,402 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:47:05,403 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:47:06,350 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:47:06,406 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:47:06,493 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:47:07,492 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:47:07,492 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:47:09,492 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:47:11,493 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:47:13,494 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:47:15,495 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:47:15,680 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:47:15,761 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:47:15,844 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:47:16,495 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:47:17,495 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:47:19,496 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:47:20,595 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:47:20,596 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:47:21,497 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:47:23,497 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:47:24,183 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 22:47:24,843 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:47:24,896 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:47:24,982 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:47:25,498 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:47:25,498 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:47:26,499 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:47:30,500 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:47:32,501 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:47:33,836 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:47:33,888 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:47:33,991 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:47:34,502 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:47:34,502 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:47:35,664 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:47:35,666 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:47:36,502 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:47:38,503 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:47:40,504 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:47:42,505 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:47:42,681 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:47:42,735 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:47:42,851 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:47:43,505 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:47:44,505 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:47:46,506 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:47:48,507 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:47:50,508 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:47:50,710 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:47:50,712 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:47:51,532 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:47:51,600 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:47:51,685 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:47:52,508 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:47:52,509 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:47:54,509 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:47:54,651 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 22:47:56,510 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:47:59,511 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:47:59,983 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:48:00,038 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:48:00,122 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:48:00,511 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:48:01,512 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:48:02,512 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:48:03,513 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:48:05,513 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:48:05,759 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:48:05,760 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:48:07,514 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:48:08,241 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:48:08,295 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:48:08,385 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:48:08,514 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:48:09,515 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:48:10,515 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:48:11,515 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:48:13,516 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:48:15,517 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:48:16,206 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:48:16,256 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:48:16,341 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:48:16,517 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:48:17,518 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:48:19,518 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:48:20,874 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:48:20,875 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:48:21,519 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:48:23,520 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:48:23,759 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:48:23,833 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:48:23,919 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:48:24,520 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:48:25,095 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 22:48:25,521 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:48:27,521 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:48:29,522 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:48:30,923 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:48:30,975 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:48:31,061 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:48:31,523 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:48:31,523 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:48:33,523 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:48:35,524 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:48:36,008 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:48:36,009 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:48:37,525 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:48:37,617 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:48:37,671 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:48:37,761 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:48:38,525 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:48:39,526 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:48:41,526 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:48:43,527 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:48:43,809 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:48:43,864 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:48:43,955 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:48:44,527 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:48:45,528 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:48:46,528 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:48:48,529 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:48:49,472 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:48:49,527 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:48:49,613 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:48:50,611 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:48:50,612 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:48:51,293 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:48:51,294 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:48:51,612 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:48:52,612 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:48:54,613 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:48:54,634 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:48:54,712 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:48:54,801 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:48:55,531 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 22:48:55,613 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:48:55,613 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:48:56,614 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:48:58,614 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:48:59,268 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:48:59,320 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:48:59,409 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:48:59,615 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:49:00,615 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:49:01,615 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:49:02,616 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:49:03,269 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:49:03,322 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:49:03,450 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:49:03,616 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:49:04,617 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:49:05,617 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:49:06,360 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:49:06,361 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:49:06,617 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:49:06,751 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:49:06,804 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:49:06,887 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:49:07,618 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:49:07,618 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:49:08,618 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:49:09,796 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:49:09,851 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:49:09,937 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:49:10,619 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:49:10,619 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:49:11,620 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:49:12,620 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:49:13,064 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:49:13,233 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:49:13,312 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:49:13,620 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:49:13,621 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:49:14,621 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:49:18,622 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:49:20,623 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:49:21,421 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:49:21,422 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:49:24,624 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:49:25,255 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:49:25,322 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:49:25,402 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:49:25,625 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:49:26,041 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 22:49:26,625 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:49:27,625 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:49:30,627 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:49:32,627 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:49:34,628 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:49:36,481 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:49:36,483 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:49:36,965 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:49:37,016 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:49:37,095 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:49:37,629 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:49:38,630 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:49:40,631 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:49:45,632 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:49:47,633 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:49:48,589 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:49:48,639 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:49:48,728 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:49:49,651 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:49:49,651 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:49:50,651 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:49:51,530 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:49:51,531 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:49:51,651 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:49:53,652 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:49:55,653 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:49:56,472 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 22:49:59,655 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:50:00,099 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:50:00,149 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:50:00,228 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:50:00,655 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:50:01,655 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:50:02,656 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:50:03,656 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:50:06,865 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:50:06,867 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:50:07,657 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:50:09,658 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:50:11,600 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:50:11,651 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:50:11,734 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:50:12,733 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:50:12,733 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:50:13,734 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:50:15,734 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:50:18,735 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:50:22,005 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:50:22,006 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:50:22,737 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:50:22,979 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:50:23,057 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:50:23,156 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:50:23,737 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:50:24,738 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:50:26,919 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 22:50:28,739 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:50:30,740 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:50:32,740 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:50:34,185 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:50:34,235 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:50:34,317 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:50:34,741 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:50:34,741 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:50:36,742 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:50:37,155 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:50:37,156 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:50:38,743 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:50:40,743 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:50:44,745 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:50:45,361 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:50:45,412 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:50:45,493 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:50:45,745 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:50:46,745 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:50:47,746 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:50:50,747 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:50:52,350 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:50:52,352 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:50:52,748 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:50:54,748 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:50:56,618 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:50:56,670 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:50:56,751 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:50:56,757 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:50:57,391 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 22:50:57,751 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:50:57,751 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:51:01,753 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:51:03,753 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:51:07,396 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:51:07,397 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:51:07,755 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:51:07,834 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:51:07,886 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:51:07,973 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:51:08,755 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:51:09,756 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:51:11,756 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:51:15,758 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:51:17,758 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:51:18,882 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:51:18,931 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:51:19,014 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:51:19,759 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:51:19,760 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:51:22,605 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:51:22,606 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:51:23,761 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:51:25,761 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:51:27,787 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 22:51:28,762 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:51:29,746 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:51:29,799 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:51:29,880 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:51:30,805 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:51:30,805 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:51:31,805 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:51:34,806 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:51:36,807 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:51:37,703 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:51:37,704 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:51:40,679 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:51:40,756 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:51:40,835 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:51:40,841 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:51:41,835 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:51:41,836 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:51:42,836 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:51:44,836 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:51:48,838 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:51:50,839 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:51:51,509 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:51:51,560 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:51:51,640 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:51:51,839 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:51:52,786 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:51:52,787 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:51:52,840 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:51:54,841 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:51:58,272 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 22:51:58,842 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:52:00,843 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:52:02,342 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:52:02,420 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:52:02,505 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:52:02,844 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:52:02,844 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:52:04,845 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:52:06,845 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:52:07,971 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:52:07,973 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:52:09,847 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:52:11,847 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:52:13,132 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:52:13,211 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:52:13,290 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:52:13,848 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:52:14,848 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:52:15,849 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:52:17,849 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:52:19,850 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:52:23,208 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:52:23,209 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:52:23,826 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:52:23,877 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:52:23,883 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:52:23,960 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:52:24,878 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:52:24,878 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:52:25,878 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:52:27,879 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:52:28,765 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 22:52:29,880 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:52:33,881 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:52:34,385 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:52:34,452 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:52:34,543 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:52:34,882 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:52:34,882 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:52:35,882 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:52:37,883 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:52:38,311 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:52:38,312 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:52:41,884 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:52:43,885 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:52:44,937 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:52:44,988 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:52:45,070 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:52:45,885 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:52:45,886 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:52:47,886 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:52:48,887 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:52:52,888 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:52:53,487 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:52:53,489 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:52:54,889 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:52:55,256 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:52:55,309 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:52:55,392 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:52:55,889 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:52:56,890 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:52:57,890 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:52:58,890 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:52:59,276 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 22:53:02,892 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:53:04,893 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:53:05,541 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:53:05,592 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:53:05,674 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:53:05,893 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:53:06,894 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:53:07,894 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:53:08,675 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:53:08,676 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:53:08,894 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:53:12,896 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:53:14,896 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:53:15,717 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:53:15,768 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:53:15,846 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:53:15,897 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:53:16,897 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:53:17,898 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:53:19,898 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:53:23,900 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:53:23,946 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:53:23,947 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:53:25,869 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:53:25,918 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:53:25,924 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:53:26,001 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:53:26,919 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:53:27,919 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:53:29,761 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 22:53:29,920 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:53:31,921 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:53:35,922 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:53:35,942 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:53:36,013 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:53:36,091 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:53:36,923 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:53:37,923 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:53:39,321 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:53:39,322 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:53:39,924 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:53:41,924 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:53:43,925 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:53:46,476 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:53:46,645 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:53:46,731 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:53:46,926 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:53:47,927 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:53:48,927 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:53:50,928 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:53:52,929 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:53:54,522 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:53:54,522 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:53:54,929 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:53:56,207 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:53:56,260 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:53:56,349 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:53:56,930 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:53:57,930 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:53:58,931 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:54:00,234 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 22:54:00,932 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:54:02,932 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:54:04,933 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:54:05,885 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:54:05,939 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:54:06,029 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:54:06,972 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:54:06,972 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:54:07,972 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:54:08,972 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:54:09,666 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:54:09,667 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:54:12,974 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:54:14,975 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:54:15,593 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:54:15,648 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:54:15,732 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:54:15,975 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:54:16,976 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:54:17,976 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:54:18,977 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:54:20,977 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:54:22,978 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:54:24,738 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:54:24,739 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:54:25,175 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:54:25,232 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:54:25,323 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:54:25,979 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:54:25,979 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:54:27,980 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:54:29,980 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:54:30,713 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 22:54:31,981 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:54:33,982 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:54:34,530 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:54:34,607 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:54:34,693 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:54:34,982 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:54:35,982 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:54:36,983 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:54:37,983 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:54:39,821 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:54:39,823 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:54:41,985 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:54:43,985 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:54:44,012 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:54:44,067 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:54:44,152 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:54:44,986 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:54:45,986 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:54:46,987 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:54:47,987 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:54:49,988 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:54:51,989 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:54:53,331 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:54:53,383 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:54:53,466 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:54:53,989 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:54:53,989 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:54:54,964 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:54:54,965 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:54:54,989 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:54:58,991 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:55:01,213 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 22:55:02,367 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:55:02,422 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:55:02,515 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:55:02,992 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:55:02,993 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:55:04,993 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:55:06,994 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:55:08,995 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:55:10,031 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:55:10,034 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:55:10,995 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:55:11,266 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:55:11,340 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:55:11,428 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:55:11,996 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:55:12,996 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:55:14,997 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:55:16,998 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:55:18,998 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:55:20,074 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:55:20,126 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:55:20,211 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:55:20,999 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:55:20,999 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:55:23,000 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:55:25,001 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:55:25,186 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:55:25,188 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:55:27,002 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:55:28,801 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:55:28,854 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:55:28,937 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:55:29,002 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:55:29,003 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:55:31,003 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:55:31,710 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 22:55:32,003 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:55:34,004 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:55:37,149 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:55:37,203 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:55:37,346 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:55:38,006 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:55:38,006 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:55:39,006 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:55:40,006 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:55:40,314 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:55:40,315 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:55:42,007 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:55:44,008 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:55:45,309 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:55:45,361 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:55:45,467 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:55:46,008 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:55:46,009 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:55:47,009 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:55:48,009 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:55:51,010 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:55:53,011 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:55:53,229 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:55:53,288 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:55:53,377 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:55:54,011 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:55:55,012 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:55:55,399 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:55:55,401 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:55:57,013 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:55:59,013 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:56:00,762 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:56:00,816 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:56:00,905 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:56:01,014 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:56:01,014 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:56:02,163 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 22:56:03,015 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:56:05,016 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:56:07,017 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:56:07,778 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:56:07,832 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:56:07,922 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:56:08,017 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:56:09,017 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:56:10,513 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:56:10,514 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:56:11,018 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:56:13,019 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:56:14,406 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:56:14,458 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:56:14,555 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:56:15,019 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:56:15,020 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:56:16,020 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:56:17,020 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:56:19,021 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:56:20,521 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:56:20,574 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:56:20,661 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:56:21,022 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:56:21,022 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:56:22,022 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:56:23,022 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:56:25,023 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:56:25,566 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:56:25,567 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:56:26,191 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:56:26,246 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:56:26,361 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:56:27,024 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:56:27,024 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:56:28,024 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:56:29,024 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:56:31,025 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:56:31,392 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:56:31,447 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:56:31,535 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:56:32,026 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:56:32,681 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 22:56:33,026 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:56:34,026 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:56:35,027 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:56:35,941 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:56:35,993 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:56:36,078 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:56:37,077 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:56:37,078 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:56:38,078 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:56:39,078 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:56:39,965 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:56:40,018 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:56:40,104 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:56:40,612 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:56:40,613 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:56:41,103 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:56:41,103 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:56:42,103 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:56:43,103 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:56:43,560 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:56:43,623 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:56:43,710 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:56:44,104 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:56:45,104 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:56:46,104 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:56:46,615 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:56:46,668 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:56:46,753 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:56:47,105 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:56:47,105 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:56:48,105 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:56:49,106 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:56:49,780 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:56:49,943 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:56:50,023 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:56:50,106 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:56:51,107 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:56:52,107 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:56:55,108 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:56:55,663 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:56:55,664 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:56:57,109 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:57:01,972 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:57:02,026 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:57:02,132 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:57:02,133 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:57:03,132 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:57:03,183 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 22:57:04,132 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:57:08,134 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:57:10,134 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:57:10,871 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:57:10,871 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:57:12,135 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:57:13,727 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:57:13,779 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:57:13,860 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:57:14,136 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:57:16,137 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:57:18,137 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:57:22,139 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:57:24,139 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:57:25,274 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:57:25,323 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:57:25,403 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:57:26,140 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:57:26,140 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:57:26,201 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:57:26,202 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:57:30,142 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:57:32,142 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:57:33,648 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 22:57:36,144 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:57:36,700 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:57:36,751 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:57:36,827 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:57:37,144 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:57:38,145 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:57:40,145 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:57:41,374 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:57:41,376 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:57:45,147 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:57:47,148 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:57:48,084 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:57:48,136 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:57:48,242 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:57:49,175 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:57:51,175 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:57:53,176 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:57:55,177 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:57:56,420 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:57:56,422 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:57:59,178 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:57:59,335 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:57:59,388 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:57:59,467 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:58:00,178 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:58:01,179 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:58:04,085 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 22:58:05,180 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:58:07,181 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:58:09,182 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:58:10,499 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:58:10,549 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:58:10,632 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:58:11,183 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:58:11,183 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:58:11,657 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:58:11,658 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:58:13,183 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:58:15,184 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:58:17,185 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:58:19,186 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:58:21,601 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:58:21,653 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:58:21,737 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:58:22,187 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:58:23,187 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:58:24,187 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:58:26,188 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:58:26,757 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:58:26,759 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:58:28,189 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:58:32,190 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:58:32,657 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:58:32,709 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:58:32,793 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:58:33,191 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:58:34,191 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:58:34,528 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 22:58:35,191 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:58:36,192 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:58:40,193 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:58:41,967 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:58:41,968 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:58:42,194 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:58:43,678 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:58:43,752 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:58:43,832 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:58:44,195 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:58:44,195 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:58:45,195 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:58:47,196 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:58:49,196 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:58:51,197 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:58:53,198 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:58:54,576 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:58:54,628 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:58:54,714 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:58:55,199 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:58:56,199 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:58:57,083 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:58:57,085 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:58:57,199 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:58:59,200 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:59:01,201 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:59:04,925 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 22:59:05,202 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:59:05,353 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:59:05,408 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:59:05,490 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:59:06,203 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:59:07,203 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:59:08,204 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:59:09,204 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:59:11,205 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:59:12,196 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:59:12,197 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:59:15,206 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:59:16,117 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:59:16,166 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:59:16,254 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:59:17,248 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:59:17,249 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:59:18,249 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:59:22,250 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:59:24,251 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:59:26,252 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:59:26,841 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:59:26,916 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:59:27,005 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:59:27,252 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:59:27,406 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:59:27,407 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:59:28,252 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:59:32,254 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:59:34,254 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:59:35,341 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 22:59:36,255 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:59:37,450 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:59:37,505 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:59:37,586 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:59:38,256 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:59:38,256 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:59:40,257 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:59:42,257 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:59:42,565 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:59:42,566 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:59:44,258 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:59:46,259 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:59:48,020 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:59:48,072 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:59:48,151 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:59:48,260 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 22:59:50,260 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:59:52,261 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:59:54,262 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:59:57,263 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 22:59:57,620 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 22:59:57,621 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 22:59:58,509 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 22:59:58,561 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 22:59:58,641 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 22:59:59,264 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:00:00,264 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:00:01,264 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:00:03,265 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:00:05,266 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:00:05,799 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 23:00:07,267 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:00:08,900 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:00:08,951 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:00:09,029 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:00:09,267 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:00:10,268 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:00:11,268 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:00:12,701 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:00:12,703 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:00:13,269 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:00:15,270 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:00:17,271 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:00:19,245 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:00:19,297 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:00:19,376 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:00:20,308 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:00:20,308 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:00:21,308 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:00:23,309 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:00:25,309 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:00:27,310 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:00:27,829 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:00:27,830 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:00:29,590 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:00:29,641 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:00:29,721 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:00:30,311 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:00:31,311 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:00:32,312 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:00:34,312 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:00:36,232 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 23:00:36,313 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:00:38,314 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:00:39,752 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:00:39,802 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:00:39,883 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:00:40,315 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:00:41,315 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:00:42,315 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:00:43,074 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:00:43,075 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:00:44,316 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:00:46,317 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:00:48,317 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:00:49,844 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:00:49,896 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:00:49,979 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:00:50,318 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:00:51,318 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:00:52,319 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:00:54,320 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:00:56,320 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:00:58,321 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:00:58,353 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:00:58,354 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:00:59,974 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:01:00,025 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:01:00,105 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:01:00,322 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:01:01,322 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:01:02,323 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:01:04,323 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:01:06,324 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:01:06,647 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 23:01:09,325 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:01:09,990 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:01:10,042 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:01:10,127 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:01:10,325 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:01:11,326 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:01:13,553 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:01:13,555 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:01:15,327 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:01:17,328 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:01:19,329 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:01:20,847 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:01:21,024 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:01:21,108 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:01:21,330 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:01:21,330 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:01:22,330 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:01:23,331 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:01:25,331 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:01:27,332 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:01:28,673 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:01:28,673 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:01:29,333 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:01:30,511 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:01:30,566 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:01:30,650 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:01:31,333 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:01:32,334 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:01:33,334 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:01:35,335 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:01:37,335 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:01:37,365 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 23:01:39,336 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:01:40,289 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:01:40,342 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:01:40,488 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:01:41,420 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:01:41,420 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:01:42,421 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:01:43,767 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:01:43,768 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:01:45,422 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:01:47,423 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:01:49,423 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:01:50,025 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:01:50,079 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:01:50,168 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:01:50,424 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:01:51,424 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:01:52,425 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:01:53,425 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:01:55,426 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:01:57,427 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:01:58,868 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:01:58,869 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:01:59,652 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:01:59,708 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:01:59,796 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:02:00,428 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:02:02,429 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:02:04,429 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:02:06,430 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:02:07,928 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 23:02:08,431 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:02:09,222 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:02:09,277 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:02:09,367 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:02:09,431 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:02:10,432 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:02:12,432 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:02:14,005 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:02:14,006 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:02:16,434 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:02:18,435 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:02:18,633 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:02:18,686 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:02:18,774 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:02:19,435 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:02:20,436 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:02:22,436 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:02:24,437 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:02:26,438 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:02:27,934 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:02:28,001 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:02:28,087 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:02:28,439 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:02:28,439 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:02:29,222 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:02:29,223 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:02:29,439 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:02:31,440 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:02:35,441 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:02:37,284 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:02:37,338 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:02:37,429 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:02:37,442 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:02:37,443 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:02:38,374 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 23:02:38,443 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:02:39,443 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:02:41,444 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:02:43,444 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:02:44,401 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:02:44,403 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:02:45,445 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:02:46,385 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:02:46,440 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:02:46,528 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:02:47,527 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:02:47,527 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:02:49,527 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:02:51,528 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:02:53,529 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:02:55,398 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:02:55,454 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:02:55,539 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:02:56,538 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:02:57,538 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:02:58,539 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:02:59,670 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:02:59,671 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:03:00,539 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:03:02,540 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:03:04,276 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:03:04,329 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:03:04,413 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:03:04,541 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:03:04,541 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:03:05,541 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:03:06,541 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:03:08,542 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:03:08,859 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 23:03:10,543 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:03:12,544 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:03:12,899 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:03:12,981 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:03:13,068 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:03:13,544 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:03:14,545 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:03:14,722 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:03:14,723 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:03:15,545 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:03:16,545 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:03:18,546 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:03:20,547 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:03:21,233 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:03:21,288 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:03:21,380 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:03:21,547 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:03:22,547 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:03:23,548 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:03:24,548 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:03:26,549 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:03:28,550 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:03:29,344 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:03:29,400 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:03:29,490 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:03:29,550 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:03:29,869 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:03:29,871 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:03:30,551 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:03:31,551 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:03:32,551 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:03:34,552 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:03:36,553 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:03:37,092 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:03:37,147 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:03:37,233 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:03:37,553 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:03:38,553 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:03:39,452 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 23:03:39,554 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:03:40,554 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:03:42,555 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:03:44,452 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:03:44,505 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:03:44,589 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:03:44,590 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:03:45,125 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:03:45,127 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:03:45,589 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:03:45,589 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:03:46,589 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:03:48,590 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:03:50,591 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:03:51,406 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:03:51,467 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:03:51,554 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:03:51,591 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:03:52,591 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:03:54,592 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:03:56,593 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:03:57,869 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:03:57,922 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:03:58,011 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:03:58,593 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:03:58,594 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:04:00,207 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:04:00,209 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:04:00,594 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:04:02,595 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:04:03,936 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:04:03,990 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:04:04,075 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:04:04,596 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:04:05,596 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:04:06,596 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:04:07,597 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:04:09,448 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:04:09,516 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:04:09,599 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:04:09,601 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:04:09,879 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 23:04:10,600 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:04:10,600 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:04:11,600 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:04:13,601 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:04:14,374 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:04:14,428 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:04:14,521 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:04:14,601 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:04:15,261 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:04:15,262 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:04:15,602 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:04:16,602 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:04:17,603 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:04:18,752 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:04:18,839 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:04:18,926 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:04:19,603 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:04:19,604 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:04:20,604 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:04:21,604 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:04:22,564 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:04:22,617 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:04:22,708 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:04:23,633 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:04:23,634 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:04:24,634 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:04:25,634 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:04:25,771 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:04:25,830 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:04:25,922 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:04:26,634 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:04:26,635 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:04:27,635 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:04:29,207 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:04:29,375 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:04:29,465 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:04:29,636 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:04:29,636 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:04:30,364 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:04:30,365 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:04:30,636 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:04:31,636 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:04:33,637 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:04:37,638 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:04:39,639 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:04:40,410 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 23:04:41,177 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:04:41,229 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:04:41,327 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:04:41,640 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:04:42,640 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:04:43,641 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:04:45,519 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:04:45,520 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:04:45,641 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:04:47,642 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:04:51,643 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:04:53,175 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:04:53,225 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:04:53,313 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:04:53,644 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:04:54,644 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:04:58,646 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:05:00,605 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:05:00,606 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:05:00,646 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:05:02,647 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:05:04,650 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:05:04,707 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:05:04,795 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:05:05,662 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:05:06,662 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:05:07,662 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:05:08,663 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:05:10,902 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 23:05:12,664 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:05:14,665 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:05:15,652 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:05:15,653 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:05:16,364 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:05:16,417 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:05:16,505 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:05:16,666 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:05:17,666 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:05:18,667 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:05:20,667 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:05:22,668 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:05:27,670 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:05:27,852 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:05:27,904 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:05:27,990 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:05:28,670 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:05:29,671 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:05:30,733 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:05:30,734 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:05:31,672 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:05:35,673 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:05:37,674 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:05:39,284 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:05:39,334 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:05:39,421 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:05:39,675 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:05:41,480 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 23:05:41,675 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:05:43,676 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:05:45,677 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:05:45,954 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:05:45,955 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:05:50,651 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:05:50,702 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:05:50,728 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:05:50,781 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:05:51,719 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:05:51,719 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:05:52,719 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:05:54,720 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:05:58,721 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:06:00,722 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:06:01,039 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:06:01,040 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:06:01,926 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:06:01,980 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:06:02,065 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:06:02,723 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:06:02,723 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:06:03,723 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:06:04,724 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:06:06,724 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:06:08,725 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:06:10,726 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:06:11,892 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 23:06:13,068 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:06:13,119 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:06:13,200 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:06:13,727 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:06:13,728 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:06:14,728 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:06:16,246 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:06:16,247 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:06:16,729 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:06:20,730 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:06:22,731 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:06:24,228 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:06:24,281 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:06:24,363 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:06:24,732 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:06:25,732 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:06:26,732 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:06:29,733 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:06:31,492 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:06:31,493 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:06:31,734 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:06:33,735 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:06:35,139 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:06:35,191 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:06:35,273 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:06:35,735 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:06:36,736 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:06:37,736 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:06:39,737 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:06:41,738 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:06:42,302 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 23:06:45,739 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:06:46,159 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:06:46,221 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:06:46,303 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:06:46,657 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:06:46,658 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:06:46,739 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:06:47,740 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:06:48,740 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:06:49,741 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:06:53,742 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:06:55,743 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:06:57,146 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:06:57,199 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:06:57,280 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:06:57,744 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:06:57,744 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:06:58,744 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:07:00,745 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:07:01,906 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:07:01,907 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:07:02,746 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:07:04,747 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:07:06,747 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:07:08,045 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:07:08,096 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:07:08,177 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:07:08,748 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:07:08,748 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:07:10,749 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:07:12,717 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 23:07:12,751 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:07:14,751 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:07:16,752 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:07:17,120 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:07:17,121 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:07:18,816 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:07:18,870 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:07:18,982 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:07:19,753 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:07:20,754 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:07:22,754 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:07:24,755 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:07:29,654 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:07:29,707 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:07:29,784 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:07:29,789 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:07:30,784 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:07:30,785 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:07:31,785 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:07:32,242 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:07:32,243 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:07:35,786 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:07:37,787 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:07:39,788 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:07:40,336 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:07:40,389 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:07:40,469 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:07:40,788 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:07:41,789 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:07:43,169 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 23:07:45,790 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:07:47,289 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:07:47,290 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:07:47,791 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:07:49,792 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:07:51,018 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:07:51,071 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:07:51,158 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:07:51,792 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:07:51,793 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:07:53,793 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:07:55,794 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:07:57,795 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:07:59,796 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:08:01,611 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:08:01,660 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:08:01,783 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:08:01,796 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:08:02,387 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:08:02,388 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:08:03,797 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:08:05,798 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:08:07,799 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:08:09,799 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:08:12,065 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:08:12,118 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:08:12,202 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:08:12,800 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:08:13,589 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 23:08:13,801 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:08:14,801 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:08:16,802 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:08:17,473 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:08:17,475 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:08:18,803 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:08:22,390 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:08:22,463 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:08:22,546 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:08:22,804 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:08:22,804 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:08:23,804 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:08:24,805 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:08:26,806 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:08:28,807 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:08:32,629 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:08:32,630 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:08:32,696 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:08:32,750 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:08:32,831 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:08:32,831 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:08:32,838 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:08:33,832 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:08:34,832 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:08:36,833 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:08:38,833 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:08:40,834 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:08:42,930 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:08:42,982 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:08:43,067 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:08:43,835 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:08:44,010 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 23:08:44,835 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:08:46,836 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:08:47,677 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:08:47,678 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:08:48,837 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:08:51,838 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:08:53,020 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:08:53,073 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:08:53,151 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:08:53,838 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:08:54,839 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:08:55,839 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:08:57,840 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:08:59,840 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:09:01,841 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:09:02,729 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:09:02,730 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:09:03,792 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:09:03,969 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:09:04,053 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:09:04,970 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:09:05,970 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:09:06,970 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:09:07,971 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:09:09,971 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:09:11,972 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:09:13,730 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:09:13,787 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:09:13,877 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:09:13,973 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:09:14,521 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 23:09:14,973 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:09:15,974 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:09:17,782 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:09:17,783 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:09:17,974 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:09:19,975 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:09:21,976 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:09:23,719 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:09:23,774 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:09:23,890 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:09:23,977 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:09:24,977 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:09:26,978 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:09:28,978 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:09:30,979 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:09:32,830 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:09:32,832 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:09:32,980 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:09:33,892 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:09:33,951 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:09:34,043 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:09:35,037 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:09:35,037 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:09:37,038 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:09:41,039 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:09:43,040 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:09:43,510 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:09:43,572 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:09:43,654 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:09:44,040 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:09:44,920 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 23:09:45,041 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:09:47,042 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:09:47,886 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:09:47,888 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:09:51,043 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:09:53,044 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:09:53,105 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:09:53,160 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:09:53,249 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:09:54,044 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:09:55,045 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:09:57,045 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:09:59,046 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:10:02,047 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:10:02,729 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:10:02,782 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:10:02,875 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:10:03,047 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:10:03,067 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:10:03,069 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:10:04,048 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:10:05,048 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:10:06,049 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:10:10,050 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:10:12,051 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:10:12,090 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:10:12,145 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:10:12,234 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:10:13,051 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:10:13,051 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:10:14,051 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:10:15,400 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 23:10:16,052 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:10:18,053 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:10:18,279 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:10:18,280 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:10:20,053 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:10:21,315 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:10:21,369 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:10:21,463 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:10:22,054 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:10:22,054 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:10:24,055 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:10:27,056 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:10:29,057 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:10:30,430 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:10:30,483 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:10:30,569 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:10:31,058 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:10:32,058 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:10:33,058 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:10:33,353 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:10:33,355 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:10:35,059 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:10:37,060 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:10:39,061 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:10:39,434 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:10:39,489 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:10:39,581 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:10:40,061 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:10:41,061 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:10:42,062 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:10:43,062 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:10:45,063 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:10:45,807 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 23:10:47,064 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:10:48,256 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:10:48,308 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:10:48,397 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:10:48,510 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:10:48,512 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:10:49,064 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:10:49,064 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:10:50,065 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:10:51,065 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:10:54,066 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:10:56,067 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:10:56,889 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:10:56,965 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:10:57,056 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:10:57,067 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:10:58,067 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:11:00,068 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:11:02,069 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:11:03,883 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:11:03,884 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:11:04,069 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:11:05,234 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:11:05,288 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:11:05,373 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:11:06,070 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:11:06,070 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:11:08,071 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:11:10,071 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:11:12,072 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:11:13,183 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:11:13,236 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:11:13,321 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:11:14,073 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:11:14,073 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:11:16,284 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 23:11:17,074 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:11:19,067 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:11:19,068 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:11:19,075 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:11:20,622 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:11:20,677 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:11:20,767 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:11:21,075 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:11:21,076 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:11:22,076 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:11:23,076 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:11:25,077 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:11:27,077 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:11:27,654 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:11:27,708 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:11:27,808 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:11:28,078 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:11:29,078 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:11:31,079 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:11:33,080 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:11:34,303 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:11:34,358 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:11:34,363 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:11:34,446 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:11:34,447 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:11:35,081 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:11:35,081 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:11:37,081 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:11:39,082 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:11:40,380 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:11:40,454 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:11:40,549 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:11:41,083 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:11:41,084 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:11:43,084 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:11:45,084 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:11:45,934 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:11:45,988 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:11:46,081 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:11:46,085 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:11:46,772 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 23:11:47,085 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:11:49,086 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:11:49,536 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:11:49,537 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:11:50,793 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:11:50,848 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:11:50,941 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:11:51,087 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:11:51,087 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:11:53,087 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:11:55,088 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:11:55,146 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:11:55,198 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:11:55,282 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:11:56,088 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:11:57,089 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:11:59,034 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:11:59,088 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:11:59,175 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:11:59,182 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:12:00,175 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:12:01,176 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:12:02,500 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:12:02,553 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:12:02,642 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:12:03,177 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:12:03,177 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:12:04,640 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:12:04,641 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:12:05,177 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:12:05,472 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:12:05,546 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:12:05,656 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:12:06,178 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:12:07,178 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:12:08,727 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:12:08,895 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:12:08,978 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:12:09,179 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:12:09,179 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:12:11,179 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:12:13,180 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:12:17,517 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 23:12:18,182 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:12:19,723 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:12:19,724 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:12:20,183 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:12:20,754 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:12:20,805 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:12:20,883 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:12:21,183 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:12:22,184 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:12:23,184 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:12:26,185 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:12:28,186 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:12:32,188 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:12:32,332 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:12:32,383 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:12:32,465 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:12:33,188 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:12:33,188 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:12:34,188 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:12:34,773 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:12:34,774 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:12:36,189 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:12:40,190 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:12:42,191 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:12:43,877 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:12:43,930 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:12:44,017 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:12:44,192 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:12:44,192 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:12:45,192 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:12:47,193 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:12:48,115 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 23:12:49,193 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:12:49,868 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:12:49,870 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:12:51,194 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:12:53,195 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:12:55,262 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:12:55,313 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:12:55,427 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:12:56,196 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:12:57,196 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:12:58,197 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:12:59,197 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:13:03,199 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:13:05,150 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:13:05,151 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:13:05,201 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:13:06,701 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:13:06,774 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:13:06,856 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:13:07,202 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:13:07,202 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:13:10,203 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:13:12,203 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:13:14,204 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:13:16,205 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:13:17,978 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:13:18,030 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:13:18,168 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:13:18,206 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:13:18,610 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 23:13:20,206 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:13:20,210 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:13:20,211 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:13:22,207 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:13:24,208 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:13:28,209 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:13:29,291 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:13:29,343 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:13:29,432 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:13:30,210 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:13:30,210 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:13:33,211 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:13:35,314 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:13:35,316 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:13:37,212 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:13:39,213 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:13:40,400 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:13:40,451 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:13:40,546 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:13:41,214 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:13:42,214 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:13:43,215 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:13:45,215 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:13:47,216 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:13:48,985 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 23:13:50,474 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:13:50,476 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:13:51,217 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:13:51,560 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:13:51,609 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:13:51,727 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:13:52,218 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:13:53,218 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:13:54,218 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:13:55,219 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:13:59,220 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:14:01,221 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:14:02,635 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:14:02,713 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:14:02,795 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:14:03,222 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:14:03,222 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:14:04,222 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:14:05,694 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:14:05,696 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:14:08,224 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:14:10,224 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:14:12,225 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:14:13,562 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:14:13,633 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:14:13,720 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:14:14,226 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:14:14,226 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:14:15,226 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:14:16,227 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:14:18,227 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:14:19,420 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 23:14:20,228 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:14:20,855 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:14:20,856 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:14:22,229 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:14:24,390 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:14:24,442 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:14:24,568 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:14:25,230 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:14:26,231 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:14:27,231 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:14:28,232 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:14:30,232 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:14:35,234 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:14:35,283 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:14:35,334 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:14:35,423 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:14:36,006 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:14:36,007 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:14:36,234 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:14:37,235 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:14:41,236 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:14:43,237 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:14:45,238 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:14:46,075 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:14:46,126 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:14:46,206 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:14:46,238 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:14:47,239 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:14:49,871 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 23:14:51,167 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:14:51,168 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:14:51,240 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:14:53,241 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:14:55,242 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:14:56,810 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:14:56,863 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:14:56,946 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:14:57,242 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:14:57,243 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:14:59,243 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:15:01,244 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:15:03,244 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:15:05,245 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:15:06,249 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:15:06,250 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:15:07,354 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:15:07,406 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:15:07,488 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:15:08,246 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:15:09,246 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:15:10,247 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:15:12,248 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:15:14,248 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:15:17,983 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:15:18,035 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:15:18,116 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:15:18,250 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:15:18,250 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:15:19,250 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:15:20,251 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:15:20,286 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 23:15:21,547 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:15:21,549 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:15:22,251 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:15:24,252 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:15:28,253 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:15:28,543 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:15:28,621 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:15:28,711 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:15:29,254 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:15:30,254 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:15:32,255 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:15:34,256 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:15:36,931 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:15:36,932 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:15:38,257 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:15:39,015 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:15:39,069 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:15:39,158 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:15:39,257 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:15:40,258 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:15:45,260 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:15:47,260 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:15:49,261 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:15:49,413 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:15:49,465 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:15:49,548 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:15:50,261 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:15:50,716 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 23:15:51,262 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:15:52,115 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:15:52,117 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:15:52,262 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:15:55,263 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:15:57,264 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:15:59,265 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:15:59,679 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:15:59,732 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:15:59,821 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:16:00,265 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:16:00,266 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:16:01,266 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:16:05,267 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:16:07,268 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:16:07,416 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:16:07,418 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:16:09,268 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:16:09,925 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:16:09,978 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:16:10,082 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:16:10,269 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:16:11,269 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:16:13,270 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:16:17,271 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:16:19,272 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:16:20,205 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:16:20,258 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:16:20,346 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:16:21,180 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 23:16:21,341 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:16:21,341 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:16:22,571 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:16:22,572 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:16:23,341 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:16:28,343 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:16:30,330 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:16:30,389 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:16:30,389 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:16:30,476 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:16:31,389 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:16:32,389 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:16:33,390 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:16:36,391 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:16:37,623 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:16:37,624 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:16:38,392 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:16:40,392 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:16:41,062 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:16:41,274 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:16:41,368 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:16:41,393 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:16:42,393 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:16:43,394 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:16:46,395 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:16:48,396 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:16:50,397 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:16:51,039 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:16:51,150 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:16:51,243 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:16:51,397 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:16:51,688 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 23:16:52,397 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:16:52,674 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:16:52,675 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:16:53,398 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:16:54,398 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:16:58,400 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:17:00,400 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:17:00,886 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:17:00,935 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:17:01,043 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:17:01,401 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:17:01,401 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:17:02,401 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:17:04,402 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:17:06,403 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:17:07,728 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:17:07,730 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:17:08,404 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:17:10,528 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:17:10,583 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:17:10,675 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:17:11,405 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:17:12,405 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:17:15,406 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:17:17,407 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:17:20,335 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:17:20,391 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:17:20,481 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:17:21,476 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:17:21,476 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:17:22,069 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 23:17:22,476 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:17:22,858 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:17:22,859 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:17:23,477 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:17:25,477 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:17:27,478 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:17:29,479 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:17:29,934 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:17:29,988 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:17:30,077 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:17:30,480 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:17:31,480 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:17:32,480 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:17:33,481 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:17:35,481 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:17:37,482 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:17:38,038 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:17:38,040 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:17:39,407 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:17:39,462 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:17:39,558 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:17:40,551 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:17:40,551 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:17:41,551 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:17:43,552 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:17:45,552 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:17:47,553 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:17:48,820 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:17:48,874 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:17:48,985 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:17:49,554 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:17:49,554 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:17:50,554 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:17:51,555 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:17:52,551 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 23:17:53,089 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:17:53,090 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:17:55,560 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:17:57,560 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:17:58,041 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:17:58,095 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:17:58,189 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:17:58,561 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:17:59,561 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:18:00,561 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:18:01,562 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:18:04,563 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:18:06,564 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:18:07,094 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:18:07,144 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:18:07,256 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:18:07,564 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:18:08,159 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:18:08,160 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:18:08,565 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:18:10,565 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:18:12,566 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:18:14,567 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:18:16,025 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:18:16,080 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:18:16,165 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:18:16,568 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:18:16,568 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:18:20,569 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:18:22,570 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:18:22,971 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 23:18:23,345 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:18:23,346 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:18:24,571 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:18:24,923 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:18:24,983 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:18:25,074 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:18:25,571 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:18:26,572 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:18:28,572 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:18:30,573 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:18:32,574 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:18:33,602 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:18:33,657 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:18:33,746 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:18:34,574 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:18:34,575 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:18:37,575 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:18:38,566 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:18:38,568 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:18:39,576 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:18:41,577 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:18:42,136 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:18:42,192 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:18:42,284 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:18:42,577 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:18:43,577 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:18:45,578 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:18:47,579 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:18:49,580 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:18:50,285 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:18:50,339 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:18:50,430 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:18:50,580 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:18:51,581 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:18:53,582 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:18:53,587 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 23:18:53,702 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:18:53,704 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:18:55,582 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:18:57,583 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:18:58,182 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:18:58,236 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:18:58,321 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:18:58,583 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:18:59,584 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:19:01,585 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:19:03,585 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:19:05,542 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:19:05,593 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:19:05,609 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:19:05,683 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:19:06,609 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:19:07,610 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:19:08,779 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:19:08,781 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:19:09,610 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:19:11,611 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:19:12,432 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:19:12,487 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:19:12,574 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:19:12,611 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:19:13,612 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:19:15,613 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:19:17,613 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:19:18,721 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:19:18,801 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:19:18,891 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:19:19,614 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:19:19,614 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:19:21,615 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:19:23,615 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:19:23,837 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:19:23,838 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:19:24,099 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 23:19:24,474 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:19:24,530 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:19:24,645 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:19:25,639 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:19:25,639 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:19:27,639 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:19:29,640 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:19:29,670 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:19:29,724 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:19:29,805 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:19:30,640 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:19:31,641 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:19:33,641 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:19:34,344 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:19:34,399 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:19:34,491 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:19:34,642 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:19:35,642 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:19:36,642 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:19:38,458 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:19:38,512 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:19:38,601 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:19:38,643 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:19:38,643 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:19:38,893 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:19:38,895 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:19:39,643 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:19:40,644 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:19:42,073 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:19:42,125 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:19:42,216 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:19:42,645 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:19:42,645 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:19:43,645 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:19:44,645 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:19:45,180 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:19:45,235 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:19:45,323 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:19:45,646 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:19:45,646 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:19:46,646 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:19:48,369 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:19:48,574 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:19:48,649 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:19:48,650 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:19:48,655 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:19:49,650 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:19:50,650 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:19:53,946 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:19:53,948 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:19:54,652 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:19:54,787 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 23:19:56,652 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:19:58,653 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:20:00,778 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:20:00,828 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:20:00,930 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:20:01,654 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:20:02,655 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:20:04,655 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:20:08,657 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:20:08,993 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:20:08,994 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:20:10,657 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:20:12,526 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:20:12,577 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:20:12,661 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:20:12,662 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:20:14,662 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:20:16,663 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:20:19,664 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:20:23,666 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:20:24,012 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:20:24,058 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:20:24,138 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:20:24,165 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:20:24,166 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:20:24,666 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:20:24,666 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:20:25,281 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 23:20:25,666 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:20:27,667 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:20:31,668 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:20:33,669 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:20:35,394 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:20:35,445 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:20:35,521 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:20:35,670 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:20:36,670 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:20:37,671 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:20:39,376 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:20:39,378 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:20:39,671 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:20:43,673 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:20:45,673 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:20:46,933 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:20:46,984 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:20:47,066 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:20:47,675 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:20:47,675 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:20:48,675 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:20:51,676 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:20:53,677 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:20:54,464 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:20:54,465 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:20:55,805 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 23:20:57,679 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:20:58,232 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:20:58,284 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:20:58,369 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:20:58,679 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:20:59,679 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:21:00,680 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:21:02,680 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:21:06,682 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:21:08,683 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:21:09,517 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:21:09,519 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:21:09,603 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:21:09,603 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:21:09,683 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:21:09,684 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:21:10,683 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:21:11,684 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:21:12,684 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:21:14,685 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:21:16,686 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:21:18,687 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:21:20,804 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:21:20,854 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:21:20,936 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:21:21,688 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:21:22,688 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:21:23,688 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:21:24,689 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:21:24,917 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:21:24,918 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:21:26,245 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 23:21:26,690 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:21:30,691 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:21:31,838 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:21:31,888 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:21:31,962 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:21:32,692 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:21:33,692 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:21:35,693 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:21:39,694 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:21:40,007 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:21:40,008 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:21:41,695 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:21:43,073 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:21:43,131 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:21:43,218 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:21:43,696 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:21:45,697 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:21:47,698 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:21:49,698 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:21:53,700 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:21:54,173 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:21:54,241 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:21:54,326 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:21:54,700 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:21:55,157 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:21:55,159 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:21:55,701 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:21:56,696 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 23:21:57,704 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:22:01,705 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:22:03,706 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:22:05,103 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:22:05,152 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:22:05,229 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:22:05,707 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:22:05,707 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:22:06,707 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:22:09,708 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:22:10,333 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:22:10,335 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:22:11,709 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:22:16,007 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:22:16,058 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:22:16,140 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:22:16,711 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:22:16,711 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:22:18,711 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:22:20,712 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:22:22,713 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:22:25,390 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:22:25,391 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:22:26,714 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:22:26,810 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:22:26,857 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:22:26,942 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:22:27,173 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 23:22:27,715 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:22:28,715 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:22:30,716 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:22:34,717 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:22:36,718 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:22:37,587 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:22:37,638 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:22:37,718 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:22:37,720 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:22:38,719 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:22:40,637 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:22:40,638 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:22:42,720 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:22:44,721 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:22:46,721 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:22:48,336 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:22:48,386 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:22:48,462 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:22:48,722 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:22:48,723 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:22:50,723 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:22:52,723 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:22:54,724 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:22:55,801 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:22:55,802 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:22:57,594 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 23:22:57,726 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:22:59,024 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:22:59,073 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:22:59,158 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:22:59,726 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:23:00,727 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:23:01,727 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:23:03,728 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:23:05,729 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:23:07,730 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:23:09,580 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:23:09,632 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:23:09,717 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:23:09,730 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:23:10,912 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:23:10,914 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:23:11,731 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:23:13,732 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:23:17,733 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:23:19,734 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:23:20,100 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:23:20,150 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:23:20,234 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:23:20,734 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:23:21,735 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:23:23,736 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:23:26,021 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:23:26,023 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:23:26,737 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:23:28,001 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 23:23:28,738 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:23:30,542 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:23:30,590 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:23:30,672 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:23:30,738 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:23:31,739 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:23:32,739 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:23:34,740 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:23:36,741 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:23:40,743 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:23:40,994 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:23:41,045 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:23:41,118 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:23:41,292 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:23:41,294 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:23:41,743 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:23:42,743 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:23:43,744 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:23:44,744 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:23:46,745 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:23:50,746 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:23:51,255 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:23:51,305 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:23:51,392 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:23:51,747 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:23:52,747 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:23:53,747 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:23:54,748 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:23:56,552 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:23:56,554 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:23:56,748 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:23:58,493 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 23:24:01,566 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:24:01,615 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:24:01,688 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:24:01,750 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:24:01,750 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:24:03,751 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:24:05,752 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:24:07,752 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:24:11,581 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:24:11,644 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:24:11,723 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:24:11,724 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:24:11,725 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:24:11,754 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:24:11,754 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:24:13,755 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:24:15,755 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:24:17,756 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:24:21,757 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:24:22,328 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:24:22,516 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:24:22,602 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:24:22,758 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:24:23,758 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:24:25,759 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:24:26,879 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:24:26,881 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:24:28,915 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 23:24:29,760 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:24:31,761 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:24:32,243 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:24:32,296 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:24:32,380 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:24:32,761 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:24:33,762 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:24:34,762 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:24:35,762 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:24:37,763 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:24:41,940 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:24:41,942 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:24:42,171 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:24:42,251 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:24:42,339 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:24:42,765 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:24:42,765 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:24:44,766 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:24:46,767 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:24:48,767 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:24:52,693 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:24:52,745 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:24:52,830 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:24:52,837 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:24:53,830 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:24:54,831 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:24:56,831 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:24:56,985 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:24:56,987 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:24:58,832 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:24:59,394 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 23:25:00,833 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:25:02,385 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:25:02,441 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:25:02,535 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:25:02,833 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:25:04,834 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:25:06,835 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:25:08,835 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:25:10,836 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:25:12,038 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:25:12,039 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:25:12,101 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:25:12,153 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:25:12,243 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:25:12,837 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:25:12,837 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:25:14,838 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:25:16,839 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:25:18,839 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:25:20,840 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:25:21,649 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:25:21,704 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:25:21,789 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:25:21,840 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:25:22,841 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:25:24,841 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:25:27,232 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:25:27,234 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:25:27,843 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:25:29,923 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 23:25:31,219 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:25:31,273 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:25:31,364 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:25:31,844 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:25:31,845 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:25:32,845 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:25:33,845 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:25:35,846 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:25:37,847 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:25:39,848 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:25:40,624 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:25:40,681 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:25:40,767 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:25:40,848 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:25:41,848 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:25:42,369 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:25:42,370 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:25:42,849 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:25:45,850 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:25:47,851 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:25:49,851 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:25:49,980 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:25:50,036 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:25:50,125 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:25:50,852 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:25:50,852 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:25:51,852 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:25:53,853 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:25:55,854 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:25:57,591 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:25:57,592 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:25:57,855 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:25:59,271 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:25:59,324 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:25:59,410 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:25:59,855 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:25:59,856 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:26:00,381 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 23:26:01,856 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:26:02,857 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:26:06,858 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:26:08,201 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:26:08,246 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:26:08,340 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:26:08,859 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:26:08,859 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:26:09,860 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:26:10,860 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:26:12,861 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:26:12,890 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:26:12,891 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:26:14,861 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:26:16,862 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:26:17,023 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:26:17,076 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:26:17,168 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:26:17,862 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:26:18,863 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:26:19,863 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:26:20,864 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:26:22,865 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:26:24,866 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:26:25,623 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:26:25,677 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:26:25,767 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:26:25,866 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:26:26,866 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:26:27,867 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:26:28,086 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:26:28,087 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:26:28,867 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:26:30,776 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 23:26:30,868 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:26:32,869 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:26:33,992 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:26:34,044 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:26:34,133 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:26:34,869 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:26:34,870 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:26:35,870 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:26:37,871 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:26:39,871 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:26:41,872 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:26:42,013 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:26:42,069 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:26:42,159 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:26:42,872 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:26:43,276 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:26:43,277 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:26:43,873 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:26:45,874 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:26:47,874 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:26:49,683 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:26:49,737 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:26:49,868 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:26:49,875 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:26:49,875 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:26:51,876 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:26:53,876 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:26:55,877 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:26:56,893 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:26:56,936 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:26:57,028 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:26:57,909 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:26:57,909 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:26:58,320 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:26:58,322 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:26:59,910 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:27:01,330 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 23:27:01,911 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:27:03,580 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:27:03,636 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:27:03,728 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:27:03,911 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:27:03,912 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:27:05,912 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:27:07,913 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:27:09,692 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:27:09,749 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:27:09,842 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:27:09,913 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:27:09,914 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:27:11,914 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:27:13,383 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:27:13,385 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:27:13,915 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:27:15,201 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:27:15,268 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:27:15,362 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:27:15,916 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:27:15,916 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:27:17,916 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:27:19,917 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:27:20,060 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:27:20,114 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:27:20,222 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:27:20,917 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:27:21,918 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:27:23,919 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:27:24,482 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:27:24,537 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:27:24,629 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:27:24,919 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:27:25,919 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:27:27,920 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:27:28,348 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:27:28,400 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:27:28,510 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:27:28,513 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:27:28,514 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:27:28,920 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:27:29,921 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:27:31,544 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:27:31,607 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:27:31,697 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:27:31,921 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:27:31,922 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:27:31,941 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 23:27:33,922 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:27:34,838 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:27:35,007 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:27:35,093 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:27:36,000 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:27:36,001 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:27:40,002 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:27:42,003 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:27:43,606 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:27:43,606 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:27:46,004 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:27:47,186 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:27:47,239 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:27:47,326 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:27:48,005 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:27:48,005 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:27:49,005 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:27:53,007 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:27:55,008 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:27:58,655 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:27:58,657 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:27:58,958 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:27:59,010 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:27:59,114 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:27:59,116 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:28:00,058 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:28:01,059 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:28:02,376 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 23:28:03,059 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:28:07,061 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:28:09,061 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:28:10,603 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:28:10,656 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:28:10,740 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:28:11,062 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:28:11,062 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:28:13,063 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:28:13,763 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:28:13,764 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:28:15,064 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:28:17,064 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:28:21,066 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:28:22,232 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:28:22,283 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:28:22,373 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:28:23,066 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:28:23,067 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:28:24,067 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:28:28,068 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:28:29,112 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:28:29,113 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:28:30,069 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:28:32,763 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 23:28:33,827 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:28:33,880 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:28:34,002 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:28:34,070 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:28:34,071 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:28:35,071 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:28:36,071 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:28:38,072 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:28:42,073 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:28:44,074 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:28:44,213 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:28:44,215 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:28:45,200 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:28:45,252 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:28:45,333 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:28:46,075 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:28:46,075 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:28:47,075 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:28:48,076 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:28:50,077 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:28:52,077 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:28:56,079 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:28:56,507 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:28:56,560 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:28:56,649 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:28:57,079 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:28:58,080 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:28:59,080 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:28:59,268 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:28:59,269 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:29:00,081 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:29:03,199 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 23:29:05,082 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:29:07,083 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:29:07,738 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:29:07,790 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:29:07,877 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:29:08,084 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:29:09,084 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:29:13,085 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:29:14,475 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:29:14,476 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:29:15,086 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:29:17,087 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:29:18,932 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:29:18,984 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:29:19,068 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:29:19,087 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:29:21,088 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:29:22,088 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:29:23,089 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:29:27,090 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:29:29,091 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:29:29,626 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:29:29,627 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:29:30,038 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:29:30,088 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:29:30,171 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:29:31,166 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:29:31,166 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:29:32,166 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:29:33,167 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:29:33,639 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 23:29:38,168 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:29:40,169 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:29:40,919 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:29:40,971 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:29:41,059 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:29:41,170 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:29:42,170 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:29:44,774 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:29:44,775 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:29:46,171 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:29:48,172 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:29:50,173 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:29:51,810 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:29:51,877 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:29:51,980 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:29:52,174 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:29:54,175 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:29:56,176 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:29:58,176 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:29:59,848 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:29:59,849 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:30:02,178 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:30:02,618 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:30:02,672 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:30:02,773 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:30:03,178 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:30:04,178 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:30:04,213 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 23:30:08,180 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:30:10,181 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:30:12,181 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:30:13,336 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:30:13,388 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:30:13,469 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:30:14,182 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:30:14,182 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:30:15,016 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:30:15,017 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:30:16,183 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:30:18,183 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:30:20,184 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:30:22,185 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:30:24,036 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:30:24,089 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:30:24,169 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:30:24,185 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:30:26,186 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:30:28,187 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:30:30,188 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:30:30,316 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:30:30,317 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:30:34,642 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 23:30:34,738 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:30:34,792 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:30:34,877 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:30:35,189 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:30:35,189 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:30:36,190 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:30:37,190 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:30:39,191 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:30:41,191 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:30:45,193 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:30:45,373 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:30:45,424 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:30:45,510 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:30:45,523 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:30:45,525 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:30:46,193 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:30:46,193 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:30:47,194 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:30:49,194 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:30:53,196 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:30:55,196 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:30:55,951 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:30:56,002 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:30:56,088 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:30:56,197 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:30:57,197 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:30:58,198 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:30:59,198 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:31:00,677 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:31:00,679 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:31:03,200 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:31:05,083 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 23:31:05,200 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:31:06,455 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:31:06,507 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:31:06,594 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:31:07,201 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:31:07,201 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:31:08,202 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:31:10,202 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:31:14,204 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:31:15,727 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:31:15,728 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:31:16,204 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:31:16,921 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:31:16,973 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:31:17,059 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:31:17,205 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:31:18,205 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:31:19,205 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:31:22,206 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:31:24,207 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:31:26,208 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:31:27,329 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:31:27,380 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:31:27,463 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:31:28,209 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:31:28,209 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:31:29,209 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:31:30,209 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:31:30,827 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:31:30,828 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:31:32,210 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:31:34,211 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:31:35,610 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 23:31:36,211 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:31:37,643 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:31:37,697 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:31:37,789 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:31:38,212 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:31:38,212 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:31:39,212 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:31:43,214 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:31:45,214 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:31:46,094 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:31:46,096 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:31:47,215 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:31:47,911 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:31:47,964 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:31:48,048 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:31:48,216 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:31:49,216 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:31:53,217 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:31:55,218 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:31:57,219 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:31:57,967 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:31:58,019 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:31:58,104 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:31:58,219 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:31:59,220 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:32:01,221 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:32:01,443 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:32:01,444 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:32:05,222 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:32:06,222 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 23:32:07,230 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:32:08,873 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:32:09,054 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:32:09,135 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:32:09,231 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:32:09,232 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:32:11,232 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:32:13,233 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:32:15,234 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:32:16,656 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:32:16,656 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:32:17,234 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:32:18,950 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:32:19,003 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:32:19,087 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:32:19,235 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:32:19,236 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:32:20,236 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:32:24,237 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:32:26,238 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:32:28,239 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:32:28,829 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:32:28,882 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:32:28,967 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:32:29,239 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:32:30,239 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:32:31,852 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:32:31,854 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:32:34,241 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:32:36,241 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:32:36,714 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 23:32:38,242 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:32:38,562 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:32:38,615 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:32:38,703 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:32:39,243 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:32:40,243 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:32:42,244 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:32:44,244 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:32:46,245 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:32:47,000 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:32:47,001 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:32:48,157 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:32:48,209 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:32:48,289 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:32:48,291 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:32:49,290 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:32:50,290 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:32:51,290 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:32:53,291 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:32:55,292 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:32:57,292 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:32:57,874 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:32:57,927 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:32:58,041 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:32:58,293 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:32:59,293 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:33:00,294 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:33:02,067 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:33:02,068 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:33:03,295 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:33:05,296 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:33:07,185 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 23:33:07,296 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:33:07,451 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:33:07,502 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:33:07,626 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:33:08,297 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:33:08,297 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:33:09,297 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:33:11,298 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:33:13,298 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:33:15,299 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:33:16,989 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:33:17,039 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:33:17,125 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:33:17,192 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:33:17,193 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:33:17,300 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:33:18,300 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:33:19,301 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:33:21,301 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:33:23,302 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:33:25,303 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:33:26,350 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:33:26,402 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:33:26,485 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:33:27,304 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:33:27,304 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:33:28,304 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:33:30,305 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:33:32,306 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:33:32,398 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:33:32,399 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:33:34,306 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:33:35,445 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:33:35,499 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:33:35,603 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:33:36,307 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:33:36,308 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:33:37,674 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 23:33:40,309 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:33:42,310 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:33:44,446 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:33:44,500 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:33:44,588 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:33:45,311 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:33:45,311 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:33:47,312 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:33:47,578 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:33:47,580 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:33:49,312 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:33:51,313 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:33:53,244 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:33:53,297 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:33:53,381 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:33:53,382 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:33:54,381 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:33:54,382 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:33:55,382 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:33:56,382 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:33:59,383 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:34:01,384 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:34:01,839 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:34:01,890 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:34:01,972 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:34:02,384 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:34:02,661 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:34:02,663 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:34:03,385 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:34:05,386 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:34:07,386 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:34:08,100 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 23:34:09,387 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:34:10,216 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:34:10,270 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:34:10,354 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:34:10,387 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:34:11,388 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:34:13,389 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:34:15,389 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:34:17,390 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:34:17,737 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:34:17,738 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:34:18,375 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:34:18,427 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:34:18,516 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:34:19,411 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:34:19,412 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:34:21,412 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:34:23,413 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:34:25,414 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:34:26,187 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:34:26,240 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:34:26,321 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:34:26,414 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:34:27,415 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:34:29,416 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:34:31,416 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:34:32,783 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:34:32,785 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:34:33,417 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:34:33,649 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:34:33,701 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:34:33,786 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:34:34,417 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:34:35,418 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:34:37,418 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:34:38,597 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 23:34:40,419 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:34:40,729 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:34:40,781 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:34:40,868 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:34:41,420 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:34:41,420 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:34:42,420 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:34:44,421 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:34:46,422 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:34:47,517 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:34:47,570 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:34:47,653 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:34:47,989 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:34:47,990 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:34:48,422 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:34:48,423 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:34:49,423 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:34:50,423 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:34:52,424 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:34:53,755 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:34:53,808 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:34:53,895 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:34:54,425 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:34:54,425 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:34:55,425 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:34:56,426 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:34:58,426 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:34:59,311 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:34:59,363 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:34:59,444 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:35:00,443 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:35:00,443 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:35:02,444 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:35:03,201 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:35:03,203 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:35:04,261 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:35:04,310 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:35:04,394 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:35:04,445 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:35:04,445 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:35:06,445 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:35:08,446 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:35:08,538 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:35:08,590 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:35:08,679 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:35:09,115 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 23:35:09,446 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:35:10,447 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:35:12,320 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:35:12,376 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:35:12,459 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:35:12,467 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:35:13,459 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:35:14,460 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:35:15,600 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:35:15,651 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:35:15,733 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:35:16,460 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:35:16,461 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:35:18,298 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:35:18,300 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:35:18,461 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:35:18,994 DEBUG SenderThread:266499 [sender.py:send():235] send: history +2022-03-02 23:35:19,159 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:35:19,238 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:35:19,461 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:35:20,462 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:35:22,463 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:35:22,990 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-02 23:35:22,991 DEBUG SenderThread:266499 [sender.py:send():235] send: telemetry +2022-03-02 23:35:22,991 DEBUG SenderThread:266499 [sender.py:send():235] send: exit +2022-03-02 23:35:22,991 INFO SenderThread:266499 [sender.py:send_exit():371] handling exit code: 1 +2022-03-02 23:35:22,991 INFO SenderThread:266499 [sender.py:send_exit():373] handling runtime: 4156 +2022-03-02 23:35:23,044 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:35:23,044 INFO SenderThread:266499 [sender.py:send_exit():379] send defer +2022-03-02 23:35:23,044 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: poll_exit +2022-03-02 23:35:23,045 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: defer +2022-03-02 23:35:23,045 INFO HandlerThread:266499 [handler.py:handle_request_defer():154] handle defer: 0 +2022-03-02 23:35:23,045 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: defer +2022-03-02 23:35:23,045 INFO SenderThread:266499 [sender.py:send_request_defer():388] handle sender defer: 0 +2022-03-02 23:35:23,045 INFO SenderThread:266499 [sender.py:transition_state():392] send defer: 1 +2022-03-02 23:35:23,046 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: defer +2022-03-02 23:35:23,046 INFO HandlerThread:266499 [handler.py:handle_request_defer():154] handle defer: 1 +2022-03-02 23:35:23,103 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: defer +2022-03-02 23:35:23,103 INFO SenderThread:266499 [sender.py:send_request_defer():388] handle sender defer: 1 +2022-03-02 23:35:23,103 INFO SenderThread:266499 [sender.py:transition_state():392] send defer: 2 +2022-03-02 23:35:23,103 DEBUG SenderThread:266499 [sender.py:send():235] send: stats +2022-03-02 23:35:23,104 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: defer +2022-03-02 23:35:23,104 INFO HandlerThread:266499 [handler.py:handle_request_defer():154] handle defer: 2 +2022-03-02 23:35:23,104 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: defer +2022-03-02 23:35:23,104 INFO SenderThread:266499 [sender.py:send_request_defer():388] handle sender defer: 2 +2022-03-02 23:35:23,104 INFO SenderThread:266499 [sender.py:transition_state():392] send defer: 3 +2022-03-02 23:35:23,104 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: defer +2022-03-02 23:35:23,104 INFO HandlerThread:266499 [handler.py:handle_request_defer():154] handle defer: 3 +2022-03-02 23:35:23,160 DEBUG SenderThread:266499 [sender.py:send():235] send: summary +2022-03-02 23:35:23,165 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-02 23:35:23,242 INFO SenderThread:266499 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:35:23,243 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: defer +2022-03-02 23:35:23,243 INFO SenderThread:266499 [sender.py:send_request_defer():388] handle sender defer: 3 +2022-03-02 23:35:23,243 INFO SenderThread:266499 [sender.py:transition_state():392] send defer: 4 +2022-03-02 23:35:23,243 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: poll_exit +2022-03-02 23:35:23,243 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: defer +2022-03-02 23:35:23,244 INFO HandlerThread:266499 [handler.py:handle_request_defer():154] handle defer: 4 +2022-03-02 23:35:23,244 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: defer +2022-03-02 23:35:23,244 INFO SenderThread:266499 [sender.py:send_request_defer():388] handle sender defer: 4 +2022-03-02 23:35:23,345 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-02 23:35:23,463 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:35:23,463 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:35:24,168 INFO SenderThread:266499 [sender.py:transition_state():392] send defer: 5 +2022-03-02 23:35:24,168 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: poll_exit +2022-03-02 23:35:24,169 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: defer +2022-03-02 23:35:24,169 INFO HandlerThread:266499 [handler.py:handle_request_defer():154] handle defer: 5 +2022-03-02 23:35:24,169 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: defer +2022-03-02 23:35:24,169 INFO SenderThread:266499 [sender.py:send_request_defer():388] handle sender defer: 5 +2022-03-02 23:35:24,169 INFO SenderThread:266499 [dir_watcher.py:finish():283] shutting down directory watcher +2022-03-02 23:35:24,270 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-02 23:35:24,463 INFO Thread-8 :266499 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/config.yaml +2022-03-02 23:35:24,464 INFO SenderThread:266499 [dir_watcher.py:finish():313] scan: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files +2022-03-02 23:35:24,464 INFO SenderThread:266499 [dir_watcher.py:finish():327] scan save: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-metadata.json wandb-metadata.json +2022-03-02 23:35:24,464 INFO SenderThread:266499 [dir_watcher.py:finish():327] scan save: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log output.log +2022-03-02 23:35:24,464 INFO SenderThread:266499 [dir_watcher.py:finish():327] scan save: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json wandb-summary.json +2022-03-02 23:35:24,465 INFO SenderThread:266499 [dir_watcher.py:finish():327] scan save: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/requirements.txt requirements.txt +2022-03-02 23:35:24,467 INFO SenderThread:266499 [dir_watcher.py:finish():327] scan save: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/config.yaml config.yaml +2022-03-02 23:35:24,468 INFO SenderThread:266499 [sender.py:transition_state():392] send defer: 6 +2022-03-02 23:35:24,468 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: poll_exit +2022-03-02 23:35:24,471 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: defer +2022-03-02 23:35:24,472 INFO HandlerThread:266499 [handler.py:handle_request_defer():154] handle defer: 6 +2022-03-02 23:35:24,477 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: defer +2022-03-02 23:35:24,477 INFO SenderThread:266499 [sender.py:send_request_defer():388] handle sender defer: 6 +2022-03-02 23:35:24,477 INFO SenderThread:266499 [file_pusher.py:finish():177] shutting down file pusher +2022-03-02 23:35:24,572 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-02 23:35:24,573 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: poll_exit +2022-03-02 23:35:24,675 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-02 23:35:24,675 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: poll_exit +2022-03-02 23:35:24,774 INFO Thread-12 :266499 [upload_job.py:push():137] Uploaded file /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/output.log +2022-03-02 23:35:24,776 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-02 23:35:24,777 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: poll_exit +2022-03-02 23:35:24,788 INFO Thread-14 :266499 [upload_job.py:push():137] Uploaded file /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/requirements.txt +2022-03-02 23:35:24,790 INFO Thread-13 :266499 [upload_job.py:push():137] Uploaded file /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/wandb-summary.json +2022-03-02 23:35:24,795 INFO Thread-15 :266499 [upload_job.py:push():137] Uploaded file /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/files/config.yaml +2022-03-02 23:35:24,878 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-02 23:35:24,879 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: poll_exit +2022-03-02 23:35:24,980 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-02 23:35:24,980 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: poll_exit +2022-03-02 23:35:24,995 INFO Thread-7 :266499 [sender.py:transition_state():392] send defer: 7 +2022-03-02 23:35:24,996 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: defer +2022-03-02 23:35:24,996 INFO HandlerThread:266499 [handler.py:handle_request_defer():154] handle defer: 7 +2022-03-02 23:35:24,996 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: defer +2022-03-02 23:35:24,996 INFO SenderThread:266499 [sender.py:send_request_defer():388] handle sender defer: 7 +2022-03-02 23:35:25,082 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-02 23:35:26,629 INFO SenderThread:266499 [sender.py:transition_state():392] send defer: 8 +2022-03-02 23:35:26,629 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: poll_exit +2022-03-02 23:35:26,630 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: defer +2022-03-02 23:35:26,630 INFO HandlerThread:266499 [handler.py:handle_request_defer():154] handle defer: 8 +2022-03-02 23:35:26,630 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: defer +2022-03-02 23:35:26,630 INFO SenderThread:266499 [sender.py:send_request_defer():388] handle sender defer: 8 +2022-03-02 23:35:26,631 INFO SenderThread:266499 [sender.py:transition_state():392] send defer: 9 +2022-03-02 23:35:26,631 DEBUG SenderThread:266499 [sender.py:send():235] send: final +2022-03-02 23:35:26,632 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: defer +2022-03-02 23:35:26,633 INFO HandlerThread:266499 [handler.py:handle_request_defer():154] handle defer: 9 +2022-03-02 23:35:26,633 DEBUG SenderThread:266499 [sender.py:send():235] send: footer +2022-03-02 23:35:26,633 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: defer +2022-03-02 23:35:26,633 INFO SenderThread:266499 [sender.py:send_request_defer():388] handle sender defer: 9 +2022-03-02 23:35:26,731 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-02 23:35:26,731 DEBUG SenderThread:266499 [sender.py:send_request():249] send_request: poll_exit +2022-03-02 23:35:26,731 INFO SenderThread:266499 [file_pusher.py:join():182] waiting for file pusher +2022-03-02 23:35:26,787 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: get_summary +2022-03-02 23:35:26,884 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: sampled_history +2022-03-02 23:35:26,887 DEBUG HandlerThread:266499 [handler.py:handle_request():131] handle_request: shutdown +2022-03-02 23:35:26,888 INFO HandlerThread:266499 [handler.py:finish():739] shutting down handler +2022-03-02 23:35:27,632 INFO WriterThread:266499 [datastore.py:close():281] close: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/run-10glutwr.wandb +2022-03-02 23:35:27,786 INFO SenderThread:266499 [sender.py:finish():1075] shutting down sender +2022-03-02 23:35:27,787 INFO SenderThread:266499 [file_pusher.py:finish():177] shutting down file pusher +2022-03-02 23:35:27,787 INFO SenderThread:266499 [file_pusher.py:join():182] waiting for file pusher +2022-03-02 23:35:27,793 INFO MainThread:266499 [internal.py:handle_exit():79] Internal process exited diff --git a/wandb/run-20220302_222605-10glutwr/logs/debug.log b/wandb/run-20220302_222605-10glutwr/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..e91f85444797dac07b0e9761eb8d2887864dadde --- /dev/null +++ b/wandb/run-20220302_222605-10glutwr/logs/debug.log @@ -0,0 +1,125 @@ +2022-03-02 22:26:05,010 INFO MainThread:266400 [wandb_setup.py:_flush():75] Loading settings from /home/sanchit_huggingface_co/.config/wandb/settings +2022-03-02 22:26:05,010 INFO MainThread:266400 [wandb_setup.py:_flush():75] Loading settings from /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/settings +2022-03-02 22:26:05,010 INFO MainThread:266400 [wandb_setup.py:_flush():75] Loading settings from environment variables: {} +2022-03-02 22:26:05,010 INFO MainThread:266400 [wandb_setup.py:_flush():75] Inferring run settings from compute environment: {'program_relpath': 'run_speech_recognition_seq2seq.py', 'program': '/home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/run_speech_recognition_seq2seq.py'} +2022-03-02 22:26:05,010 INFO MainThread:266400 [wandb_init.py:_log_setup():386] Logging user logs to /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/logs/debug.log +2022-03-02 22:26:05,010 INFO MainThread:266400 [wandb_init.py:_log_setup():387] Logging internal logs to /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_222605-10glutwr/logs/debug-internal.log +2022-03-02 22:26:05,010 INFO MainThread:266400 [wandb_init.py:init():420] calling init triggers +2022-03-02 22:26:05,010 INFO MainThread:266400 [wandb_init.py:init():425] wandb.init called with sweep_config: {} +config: {} +2022-03-02 22:26:05,010 INFO MainThread:266400 [wandb_init.py:init():471] starting backend +2022-03-02 22:26:05,010 INFO MainThread:266400 [backend.py:_multiprocessing_setup():99] multiprocessing start_methods=fork,spawn,forkserver, using: spawn +2022-03-02 22:26:05,078 INFO MainThread:266400 [backend.py:ensure_launched():219] starting backend process... +2022-03-02 22:26:05,143 INFO MainThread:266400 [backend.py:ensure_launched():224] started backend process with pid: 266499 +2022-03-02 22:26:05,145 INFO MainThread:266400 [wandb_init.py:init():480] backend started and connected +2022-03-02 22:26:05,156 INFO MainThread:266400 [wandb_init.py:init():550] updated telemetry +2022-03-02 22:26:05,315 INFO MainThread:266400 [wandb_init.py:init():581] communicating current version +2022-03-02 22:26:06,059 INFO MainThread:266400 [wandb_init.py:init():586] got version response upgrade_message: "wandb version 0.12.11 is available! To upgrade, please run:\n $ pip install wandb --upgrade" + +2022-03-02 22:26:06,060 INFO MainThread:266400 [wandb_init.py:init():596] communicating run to backend with 30 second timeout +2022-03-02 22:26:06,158 INFO MainThread:266400 [wandb_init.py:init():624] starting run threads in backend +2022-03-02 22:26:06,273 INFO MainThread:266400 [wandb_run.py:_console_start():1827] atexit reg +2022-03-02 22:26:06,274 INFO MainThread:266400 [wandb_run.py:_redirect():1701] redirect: SettingsConsole.REDIRECT +2022-03-02 22:26:06,274 INFO MainThread:266400 [wandb_run.py:_redirect():1706] Redirecting console. +2022-03-02 22:26:06,276 INFO MainThread:266400 [wandb_run.py:_redirect():1762] Redirects installed. +2022-03-02 22:26:06,276 INFO MainThread:266400 [wandb_init.py:init():651] run started, returning control to user process +2022-03-02 22:26:06,280 INFO MainThread:266400 [wandb_run.py:_config_callback():966] config_cb None None {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'torch.float32', 'use_bfloat16': False, 'pruned_heads': {}, 'tie_word_embeddings': False, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 50, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'chunk_size_feed_forward': 0, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'architectures': ['SpeechEncoderDecoderModel'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': None, 'pad_token_id': 50256, 'eos_token_id': 50256, 'sep_token_id': None, 'decoder_start_token_id': 50256, 'task_specific_params': None, 'problem_type': None, '_name_or_path': './', 'transformers_version': None, 'decoder': {'vocab_size': 50257, 'n_positions': 1024, 'n_embd': 1024, 'n_layer': 24, 'n_head': 16, 'n_inner': None, 'activation_function': 'gelu_new', 'resid_pdrop': 0.0, 'embd_pdrop': 0.0, 'attn_pdrop': 0.0, 'layer_norm_epsilon': 1e-05, 'initializer_range': 0.02, 'summary_type': 'cls_index', 'summary_use_proj': True, 'summary_activation': None, 'summary_first_dropout': 0.0, 'summary_proj_to_labels': True, 'scale_attn_weights': True, 'use_cache': False, 'scale_attn_by_inverse_layer_idx': False, 'reorder_and_upcast_attn': False, 'bos_token_id': 50256, 'eos_token_id': 50256, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': None, 'use_bfloat16': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'is_encoder_decoder': False, 'is_decoder': True, 'cross_attention_hidden_size': None, 'add_cross_attention': True, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'chunk_size_feed_forward': 0, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'architectures': ['GPT2LMHeadModel'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'pad_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': {'text-generation': {'do_sample': True, 'max_length': 50}}, 'problem_type': None, '_name_or_path': 'gpt2-medium', 'transformers_version': '4.17.0.dev0', 'n_ctx': 1024, 'n_special': 0, 'predict_special_tokens': True, 'model_type': 'gpt2'}, 'encoder': {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': None, 'use_bfloat16': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'chunk_size_feed_forward': 0, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'architectures': ['Wav2Vec2ForPreTraining'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 1, 'pad_token_id': 0, 'eos_token_id': 2, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'facebook/wav2vec2-large-lv60', 'transformers_version': '4.17.0.dev0', 'feat_extract_dropout': 0.0, 'gradient_checkpointing': False, 'hidden_dropout_prob': 0.0, 'num_feat_extract_layers': 7, 'hidden_size': 1024, 'feat_extract_norm': 'layer', 'feat_extract_activation': 'gelu', 'conv_dim': [512, 512, 512, 512, 512, 512, 512], 'conv_stride': [5, 2, 2, 2, 2, 2, 2], 'conv_kernel': [10, 3, 3, 3, 3, 2, 2], 'conv_bias': True, 'num_conv_pos_embeddings': 128, 'num_conv_pos_embedding_groups': 16, 'num_hidden_layers': 24, 'intermediate_size': 4096, 'hidden_act': 'gelu', 'num_attention_heads': 16, 'hidden_dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.0, 'feat_proj_dropout': 0.0, 'final_dropout': 0.0, 'layerdrop': 0.0, 'layer_norm_eps': 1e-05, 'initializer_range': 0.02, 'vocab_size': 32, 'do_stable_layer_norm': True, 'use_weighted_layer_sum': False, 'apply_spec_augment': False, 'mask_time_prob': 0.0, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'num_codevectors_per_group': 320, 'num_codevector_groups': 2, 'contrastive_logits_temperature': 0.1, 'feat_quantizer_dropout': 0.0, 'num_negatives': 100, 'codevector_dim': 768, 'proj_codevector_dim': 768, 'diversity_loss_weight': 0.1, 'ctc_loss_reduction': 'sum', 'ctc_zero_infinity': False, 'add_adapter': True, 'adapter_kernel_size': 3, 'adapter_stride': 2, 'num_adapter_layers': 3, 'output_hidden_size': 1024, 'classifier_proj_size': 256, 'tdnn_dim': [512, 512, 512, 512, 1500], 'tdnn_kernel': [5, 3, 3, 1, 1], 'tdnn_dilation': [1, 2, 3, 1, 1], 'xvector_output_dim': 512, 'model_type': 'wav2vec2'}, 'model_type': 'speech-encoder-decoder', 'processor_class': 'Wav2Vec2Processor', 'use_cache': False, 'output_dir': './', 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'evaluation_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 14, 'per_device_eval_batch_size': 14, 'per_gpu_train_batch_size': 'None', 'per_gpu_eval_batch_size': 'None', 'gradient_accumulation_steps': 4, 'eval_accumulation_steps': 'None', 'learning_rate': 0.0003, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 1.0, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'warmup_ratio': 0.0, 'warmup_steps': 500, 'log_level': -1, 'log_level_replica': -1, 'log_on_each_node': True, 'logging_dir': './runs/Mar02_22-25-22_sanchit--v100', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 1, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 500, 'save_total_limit': 1, 'save_on_each_node': False, 'no_cuda': False, 'seed': 42, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'amp', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': 'None', 'local_rank': -1, 'xpu_backend': 'None', 'tpu_num_cores': 'None', 'tpu_metrics_debug': False, 'debug': '[]', 'dataloader_drop_last': False, 'eval_steps': 500, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': 'None', 'load_best_model_at_end': False, 'metric_for_best_model': 'None', 'greater_is_better': 'None', 'ignore_data_skip': False, 'sharded_ddp': '[]', 'deepspeed': 'None', 'label_smoothing_factor': 0.0, 'optim': 'adamw_hf', 'adafactor': False, 'group_by_length': True, 'length_column_name': 'input_length', 'report_to': "['wandb']", 'ddp_find_unused_parameters': 'None', 'ddp_bucket_cap_mb': 'None', 'dataloader_pin_memory': True, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': 'None', 'hub_model_id': 'None', 'hub_strategy': 'every_save', 'hub_token': '', 'gradient_checkpointing': True, 'fp16_backend': 'auto', 'push_to_hub_model_id': 'None', 'push_to_hub_organization': 'None', 'push_to_hub_token': '', '_n_gpu': 1, 'mp_parameters': '', 'sortish_sampler': False, 'predict_with_generate': True, 'generation_max_length': 40, 'generation_num_beams': 1, 'train_batch_size': 14, 'eval_batch_size': 14} +2022-03-02 22:26:06,285 INFO MainThread:266400 [wandb_watch.py:watch():43] Watching +2022-03-02 23:35:20,661 INFO MainThread:266400 [wandb_run.py:_atexit_cleanup():1797] got exitcode: 1 +2022-03-02 23:35:20,662 INFO MainThread:266400 [wandb_run.py:_restore():1769] restore +2022-03-02 23:35:23,045 INFO MainThread:266400 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 1 +} +pusher_stats { + uploaded_bytes: 2095 + total_bytes: 2095 +} + +2022-03-02 23:35:23,244 INFO MainThread:266400 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 1 +} +pusher_stats { + uploaded_bytes: 2095 + total_bytes: 2095 +} + +2022-03-02 23:35:24,169 INFO MainThread:266400 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 1 +} +pusher_stats { + uploaded_bytes: 2095 + total_bytes: 2095 +} + +2022-03-02 23:35:24,471 INFO MainThread:266400 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 3 +} +pusher_stats { + uploaded_bytes: 2095 + total_bytes: 2051200 +} + +2022-03-02 23:35:24,574 INFO MainThread:266400 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 174127 + total_bytes: 2352011 +} + +2022-03-02 23:35:24,676 INFO MainThread:266400 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 2352011 + total_bytes: 2352011 +} + +2022-03-02 23:35:24,777 INFO MainThread:266400 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 2352011 + total_bytes: 2352011 +} + +2022-03-02 23:35:24,879 INFO MainThread:266400 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 2352011 + total_bytes: 2352011 +} + +2022-03-02 23:35:24,981 INFO MainThread:266400 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 2352011 + total_bytes: 2352011 +} + +2022-03-02 23:35:26,630 INFO MainThread:266400 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 2352011 + total_bytes: 2352011 +} + +2022-03-02 23:35:26,787 INFO MainThread:266400 [wandb_run.py:_wait_for_finish():1929] got exit ret: done: true +exit_result { +} +file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 2352011 + total_bytes: 2352011 +} +local_info { +} + +2022-03-02 23:35:27,944 INFO MainThread:266400 [wandb_run.py:_append_history():2144] rendering history +2022-03-02 23:35:27,945 INFO MainThread:266400 [wandb_run.py:_append_summary():2102] rendering summary +2022-03-02 23:35:27,946 INFO MainThread:266400 [wandb_run.py:_append_files():2194] logging synced files diff --git a/wandb/run-20220302_222605-10glutwr/run-10glutwr.wandb b/wandb/run-20220302_222605-10glutwr/run-10glutwr.wandb new file mode 100644 index 0000000000000000000000000000000000000000..99092e8bc375d16d1ed6222dc28012f50e889061 --- /dev/null +++ b/wandb/run-20220302_222605-10glutwr/run-10glutwr.wandb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac6d6670d416568eed6d71f6125deb820b6762806b784e9aa0f2962ef5170dc2 +size 29171596 diff --git a/wandb/run-20220302_233655-33dtvgaa/files/config.yaml b/wandb/run-20220302_233655-33dtvgaa/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..004eb0ed23d3b79323982e119e947f5906fda9ff --- /dev/null +++ b/wandb/run-20220302_233655-33dtvgaa/files/config.yaml @@ -0,0 +1,11321 @@ +wandb_version: 1 + +_n_gpu: + desc: null + value: 1 +_name_or_path: + desc: null + value: ./ +_wandb: + desc: null + value: + cli_version: 0.12.10 + framework: huggingface + huggingface_version: 4.17.0.dev0 + is_jupyter_run: false + is_kaggle_kernel: false + m: + - 1: train/global_step + 6: + - 3 + - 1: train/loss + 5: 1 + 6: + - 1 + - 1: train/learning_rate + 5: 1 + 6: + - 1 + - 1: train/epoch + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.ln_f\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.ln_f\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.ln_f\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.ln_f\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.ln_f\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.ln_f\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.wpe\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.wpe\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.wpe\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.wte\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.wte\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.wte\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.2\.conv\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.2\.conv\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.2\.conv\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.2\.conv\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.2\.conv\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.2\.conv\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.1\.conv\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.1\.conv\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.1\.conv\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.1\.conv\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.1\.conv\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.1\.conv\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.0\.conv\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.0\.conv\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.0\.conv\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.0\.conv\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.0\.conv\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.0\.conv\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.pos_conv_embed\.conv\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.pos_conv_embed\.conv\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.pos_conv_embed\.conv\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.pos_conv_embed\.conv\.weight_v._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.pos_conv_embed\.conv\.weight_v.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.pos_conv_embed\.conv\.weight_v.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.pos_conv_embed\.conv\.weight_g._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.pos_conv_embed\.conv\.weight_g.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.pos_conv_embed\.conv\.weight_g.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.feature_projection\.projection\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.feature_projection\.projection\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.feature_projection\.projection\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.feature_projection\.projection\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.feature_projection\.projection\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.feature_projection\.projection\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.feature_projection\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.feature_projection\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.feature_projection\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.feature_projection\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.feature_projection\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.feature_projection\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + python_version: 3.9.5 + start_time: 1646264215 + t: + 1: + - 1 + - 5 + - 11 + 2: + - 1 + - 5 + - 11 + 3: + - 1 + - 7 + - 13 + 4: 3.9.5 + 5: 0.12.10 + 6: 4.17.0.dev0 + 8: + - 5 +adafactor: + desc: null + value: false +adam_beta1: + desc: null + value: 0.9 +adam_beta2: + desc: null + value: 0.999 +adam_epsilon: + desc: null + value: 1.0e-08 +add_cross_attention: + desc: null + value: false +architectures: + desc: null + value: + - SpeechEncoderDecoderModel +bad_words_ids: + desc: null + value: null +bf16: + desc: null + value: false +bf16_full_eval: + desc: null + value: false +bos_token_id: + desc: null + value: null +chunk_size_feed_forward: + desc: null + value: 0 +cross_attention_hidden_size: + desc: null + value: null +dataloader_drop_last: + desc: null + value: false +dataloader_num_workers: + desc: null + value: 0 +dataloader_pin_memory: + desc: null + value: true +ddp_bucket_cap_mb: + desc: null + value: None +ddp_find_unused_parameters: + desc: null + value: None +debug: + desc: null + value: '[]' +decoder: + desc: null + value: + _name_or_path: gpt2-medium + activation_function: gelu_new + add_cross_attention: true + architectures: + - GPT2LMHeadModel + attn_pdrop: 0.0 + bad_words_ids: null + bos_token_id: 50256 + chunk_size_feed_forward: 0 + cross_attention_hidden_size: null + decoder_start_token_id: null + diversity_penalty: 0.0 + do_sample: false + early_stopping: false + embd_pdrop: 0.0 + encoder_no_repeat_ngram_size: 0 + eos_token_id: 50256 + finetuning_task: null + forced_bos_token_id: null + forced_eos_token_id: null + id2label: + '0': LABEL_0 + '1': LABEL_1 + initializer_range: 0.02 + is_decoder: true + is_encoder_decoder: false + label2id: + LABEL_0: 0 + LABEL_1: 1 + layer_norm_epsilon: 1.0e-05 + length_penalty: 1.0 + max_length: 20 + min_length: 0 + model_type: gpt2 + n_ctx: 1024 + n_embd: 1024 + n_head: 16 + n_inner: null + n_layer: 24 + n_positions: 1024 + n_special: 0 + no_repeat_ngram_size: 0 + num_beam_groups: 1 + num_beams: 1 + num_return_sequences: 1 + output_attentions: false + output_hidden_states: false + output_scores: false + pad_token_id: null + predict_special_tokens: true + prefix: null + problem_type: null + pruned_heads: {} + remove_invalid_values: false + reorder_and_upcast_attn: false + repetition_penalty: 1.0 + resid_pdrop: 0.0 + return_dict: true + return_dict_in_generate: false + scale_attn_by_inverse_layer_idx: false + scale_attn_weights: true + sep_token_id: null + summary_activation: null + summary_first_dropout: 0.0 + summary_proj_to_labels: true + summary_type: cls_index + summary_use_proj: true + task_specific_params: + text-generation: + do_sample: true + max_length: 50 + temperature: 1.0 + tie_encoder_decoder: false + tie_word_embeddings: true + tokenizer_class: null + top_k: 50 + top_p: 1.0 + torch_dtype: null + torchscript: false + transformers_version: 4.17.0.dev0 + use_bfloat16: false + use_cache: false + vocab_size: 50257 +decoder_start_token_id: + desc: null + value: 50256 +deepspeed: + desc: null + value: None +disable_tqdm: + desc: null + value: false +diversity_penalty: + desc: null + value: 0.0 +do_eval: + desc: null + value: true +do_predict: + desc: null + value: false +do_sample: + desc: null + value: false +do_train: + desc: null + value: true +early_stopping: + desc: null + value: false +encoder: + desc: null + value: + _name_or_path: facebook/wav2vec2-large-lv60 + activation_dropout: 0.0 + adapter_kernel_size: 3 + adapter_stride: 2 + add_adapter: true + add_cross_attention: false + apply_spec_augment: false + architectures: + - Wav2Vec2ForPreTraining + attention_dropout: 0.0 + bad_words_ids: null + bos_token_id: 1 + chunk_size_feed_forward: 0 + classifier_proj_size: 256 + codevector_dim: 768 + contrastive_logits_temperature: 0.1 + conv_bias: true + conv_dim: + - 512 + - 512 + - 512 + - 512 + - 512 + - 512 + - 512 + conv_kernel: + - 10 + - 3 + - 3 + - 3 + - 3 + - 2 + - 2 + conv_stride: + - 5 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + cross_attention_hidden_size: null + ctc_loss_reduction: sum + ctc_zero_infinity: false + decoder_start_token_id: null + diversity_loss_weight: 0.1 + diversity_penalty: 0.0 + do_sample: false + do_stable_layer_norm: true + early_stopping: false + encoder_no_repeat_ngram_size: 0 + eos_token_id: 2 + feat_extract_activation: gelu + feat_extract_dropout: 0.0 + feat_extract_norm: layer + feat_proj_dropout: 0.0 + feat_quantizer_dropout: 0.0 + final_dropout: 0.0 + finetuning_task: null + forced_bos_token_id: null + forced_eos_token_id: null + gradient_checkpointing: false + hidden_act: gelu + hidden_dropout: 0.0 + hidden_dropout_prob: 0.0 + hidden_size: 1024 + id2label: + '0': LABEL_0 + '1': LABEL_1 + initializer_range: 0.02 + intermediate_size: 4096 + is_decoder: false + is_encoder_decoder: false + label2id: + LABEL_0: 0 + LABEL_1: 1 + layer_norm_eps: 1.0e-05 + layerdrop: 0.0 + length_penalty: 1.0 + mask_feature_length: 10 + mask_feature_min_masks: 0 + mask_feature_prob: 0.0 + mask_time_length: 10 + mask_time_min_masks: 2 + mask_time_prob: 0.0 + max_length: 20 + min_length: 0 + model_type: wav2vec2 + no_repeat_ngram_size: 0 + num_adapter_layers: 3 + num_attention_heads: 16 + num_beam_groups: 1 + num_beams: 1 + num_codevector_groups: 2 + num_codevectors_per_group: 320 + num_conv_pos_embedding_groups: 16 + num_conv_pos_embeddings: 128 + num_feat_extract_layers: 7 + num_hidden_layers: 24 + num_negatives: 100 + num_return_sequences: 1 + output_attentions: false + output_hidden_size: 1024 + output_hidden_states: false + output_scores: false + pad_token_id: 0 + prefix: null + problem_type: null + proj_codevector_dim: 768 + pruned_heads: {} + remove_invalid_values: false + repetition_penalty: 1.0 + return_dict: true + return_dict_in_generate: false + sep_token_id: null + task_specific_params: null + tdnn_dilation: + - 1 + - 2 + - 3 + - 1 + - 1 + tdnn_dim: + - 512 + - 512 + - 512 + - 512 + - 1500 + tdnn_kernel: + - 5 + - 3 + - 3 + - 1 + - 1 + temperature: 1.0 + tie_encoder_decoder: false + tie_word_embeddings: true + tokenizer_class: null + top_k: 50 + top_p: 1.0 + torch_dtype: null + torchscript: false + transformers_version: 4.17.0.dev0 + use_bfloat16: false + use_weighted_layer_sum: false + vocab_size: 32 + xvector_output_dim: 512 +encoder_no_repeat_ngram_size: + desc: null + value: 0 +eos_token_id: + desc: null + value: 50256 +eval_accumulation_steps: + desc: null + value: None +eval_batch_size: + desc: null + value: 14 +eval_steps: + desc: null + value: 500 +evaluation_strategy: + desc: null + value: steps +finetuning_task: + desc: null + value: null +forced_bos_token_id: + desc: null + value: null +forced_eos_token_id: + desc: null + value: null +fp16: + desc: null + value: true +fp16_backend: + desc: null + value: auto +fp16_full_eval: + desc: null + value: false +fp16_opt_level: + desc: null + value: O1 +generation_max_length: + desc: null + value: 40 +generation_num_beams: + desc: null + value: 1 +gradient_accumulation_steps: + desc: null + value: 8 +gradient_checkpointing: + desc: null + value: true +greater_is_better: + desc: null + value: None +group_by_length: + desc: null + value: true +half_precision_backend: + desc: null + value: amp +hub_model_id: + desc: null + value: None +hub_strategy: + desc: null + value: every_save +hub_token: + desc: null + value: +id2label: + desc: null + value: + '0': LABEL_0 + '1': LABEL_1 +ignore_data_skip: + desc: null + value: false +is_decoder: + desc: null + value: false +is_encoder_decoder: + desc: null + value: true +label2id: + desc: null + value: + LABEL_0: 0 + LABEL_1: 1 +label_names: + desc: null + value: None +label_smoothing_factor: + desc: null + value: 0.0 +learning_rate: + desc: null + value: 0.0003 +length_column_name: + desc: null + value: input_length +length_penalty: + desc: null + value: 1.0 +load_best_model_at_end: + desc: null + value: false +local_rank: + desc: null + value: -1 +log_level: + desc: null + value: -1 +log_level_replica: + desc: null + value: -1 +log_on_each_node: + desc: null + value: true +logging_dir: + desc: null + value: ./runs/Mar02_23-36-13_sanchit--v100 +logging_first_step: + desc: null + value: false +logging_nan_inf_filter: + desc: null + value: true +logging_steps: + desc: null + value: 1 +logging_strategy: + desc: null + value: steps +lr_scheduler_type: + desc: null + value: linear +max_grad_norm: + desc: null + value: 1.0 +max_length: + desc: null + value: 50 +max_steps: + desc: null + value: -1 +metric_for_best_model: + desc: null + value: None +min_length: + desc: null + value: 0 +model_type: + desc: null + value: speech-encoder-decoder +mp_parameters: + desc: null + value: '' +no_cuda: + desc: null + value: false +no_repeat_ngram_size: + desc: null + value: 0 +num_beam_groups: + desc: null + value: 1 +num_beams: + desc: null + value: 1 +num_return_sequences: + desc: null + value: 1 +num_train_epochs: + desc: null + value: 1.0 +optim: + desc: null + value: adamw_hf +output_attentions: + desc: null + value: false +output_dir: + desc: null + value: ./ +output_hidden_states: + desc: null + value: false +output_scores: + desc: null + value: false +overwrite_output_dir: + desc: null + value: true +pad_token_id: + desc: null + value: 50256 +past_index: + desc: null + value: -1 +per_device_eval_batch_size: + desc: null + value: 14 +per_device_train_batch_size: + desc: null + value: 14 +per_gpu_eval_batch_size: + desc: null + value: None +per_gpu_train_batch_size: + desc: null + value: None +predict_with_generate: + desc: null + value: true +prediction_loss_only: + desc: null + value: false +prefix: + desc: null + value: null +problem_type: + desc: null + value: null +processor_class: + desc: null + value: Wav2Vec2Processor +pruned_heads: + desc: null + value: {} +push_to_hub: + desc: null + value: true +push_to_hub_model_id: + desc: null + value: None +push_to_hub_organization: + desc: null + value: None +push_to_hub_token: + desc: null + value: +remove_invalid_values: + desc: null + value: false +remove_unused_columns: + desc: null + value: true +repetition_penalty: + desc: null + value: 1.0 +report_to: + desc: null + value: '[''wandb'']' +resume_from_checkpoint: + desc: null + value: None +return_dict: + desc: null + value: true +return_dict_in_generate: + desc: null + value: false +run_name: + desc: null + value: ./ +save_on_each_node: + desc: null + value: false +save_steps: + desc: null + value: 500 +save_strategy: + desc: null + value: steps +save_total_limit: + desc: null + value: 1 +seed: + desc: null + value: 42 +sep_token_id: + desc: null + value: null +sharded_ddp: + desc: null + value: '[]' +skip_memory_metrics: + desc: null + value: true +sortish_sampler: + desc: null + value: false +task_specific_params: + desc: null + value: null +temperature: + desc: null + value: 1.0 +tf32: + desc: null + value: None +tie_encoder_decoder: + desc: null + value: false +tie_word_embeddings: + desc: null + value: false +tokenizer_class: + desc: null + value: null +top_k: + desc: null + value: 50 +top_p: + desc: null + value: 1.0 +torch_dtype: + desc: null + value: torch.float32 +torchscript: + desc: null + value: false +tpu_metrics_debug: + desc: null + value: false +tpu_num_cores: + desc: null + value: None +train_batch_size: + desc: null + value: 14 +transformers_version: + desc: null + value: null +use_bfloat16: + desc: null + value: false +use_cache: + desc: null + value: false +use_legacy_prediction_loop: + desc: null + value: false +warmup_ratio: + desc: null + value: 0.0 +warmup_steps: + desc: null + value: 500 +weight_decay: + desc: null + value: 0.0 +xpu_backend: + desc: null + value: None diff --git a/wandb/run-20220302_233655-33dtvgaa/files/output.log b/wandb/run-20220302_233655-33dtvgaa/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..5d38e993a616052bc4defb185c349f9de415e6bb --- /dev/null +++ b/wandb/run-20220302_233655-33dtvgaa/files/output.log @@ -0,0 +1,1706 @@ + + + 0%| | 0/254 [00:00> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:37:03,447 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:37:06,555 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:37:09,679 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:37:12,783 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:37:15,848 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:37:19,244 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.7722, 'learning_rate': 6e-07, 'epoch': 0.0} +[WARNING|modeling_utils.py:388] 2022-03-02 23:37:22,269 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 0%|▎ | 1/254 [00:25<1:49:37, 26.00s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:37:25,651 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:37:28,697 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:37:31,725 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:37:34,725 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:37:37,656 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:37:40,613 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:37:43,568 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.7898, 'learning_rate': 1.2e-06, 'epoch': 0.01} +[WARNING|modeling_utils.py:388] 2022-03-02 23:37:46,590 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 1%|▋ | 2/254 [00:50<1:44:31, 24.89s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:37:49,678 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:37:52,622 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:37:55,568 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:37:58,514 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:38:01,521 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:38:04,439 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:38:07,391 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:38:10,272 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.9156, 'learning_rate': 1.8e-06, 'epoch': 0.01} + + 1%|▉ | 3/254 [01:13<1:41:48, 24.34s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:38:13,318 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:38:16,164 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:38:19,110 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:38:22,004 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:38:24,851 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:38:27,731 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:38:30,637 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.8726, 'learning_rate': 2.4e-06, 'epoch': 0.02} +[WARNING|modeling_utils.py:388] 2022-03-02 23:38:33,497 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 2%|█▎ | 4/254 [01:37<1:39:34, 23.90s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:38:36,500 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:38:39,381 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:38:42,222 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:38:45,184 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:38:48,011 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:38:50,863 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:38:53,705 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:38:56,600 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 2%|█▌ | 5/254 [02:00<1:37:54, 23.59s/it] + + 2%|█▌ | 5/254 [02:00<1:37:54, 23.59s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:38:59,504 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:39:02,339 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:39:05,189 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:39:08,037 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:39:10,829 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:39:13,690 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:39:16,572 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:39:19,418 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 2%|█▉ | 6/254 [02:22<1:36:30, 23.35s/it] + + 2%|█▉ | 6/254 [02:22<1:36:30, 23.35s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:39:22,403 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:39:25,290 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:39:28,166 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:39:30,912 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:39:33,778 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:39:36,623 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:39:39,448 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.7939, 'learning_rate': 3.6e-06, 'epoch': 0.03} +[WARNING|modeling_utils.py:388] 2022-03-02 23:39:42,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 3%|██▏ | 7/254 [02:45<1:35:25, 23.18s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:39:45,249 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:39:48,078 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:39:50,945 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:39:53,790 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:39:56,582 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:39:59,427 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:40:02,215 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.8651, 'learning_rate': 4.2e-06, 'epoch': 0.03} +[WARNING|modeling_utils.py:388] 2022-03-02 23:40:04,985 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 3%|██▌ | 8/254 [03:08<1:34:27, 23.04s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:40:07,891 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:40:10,759 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:40:13,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:40:16,300 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:40:19,224 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:40:22,051 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:40:24,863 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:40:27,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|██▊ | 9/254 [03:31<1:33:36, 22.93s/it] + + 4%|██▊ | 9/254 [03:31<1:33:36, 22.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:40:30,629 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:40:33,484 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:40:36,248 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:40:39,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:40:41,834 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:40:44,644 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:40:47,394 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:40:50,185 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███▏ | 10/254 [03:53<1:32:43, 22.80s/it] + + 4%|███▏ | 10/254 [03:53<1:32:43, 22.80s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:40:53,095 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:40:55,847 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:40:58,574 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:41:01,366 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:41:04,089 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:41:06,818 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:41:09,547 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:41:12,311 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███▍ | 11/254 [04:15<1:31:30, 22.60s/it] + + 4%|███▍ | 11/254 [04:15<1:31:30, 22.60s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:41:15,140 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:41:17,818 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:41:20,583 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:41:23,299 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:41:25,960 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:41:28,645 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:41:31,394 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:41:34,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6487, 'learning_rate': 6.599999999999999e-06, 'epoch': 0.05} + + 5%|███▊ | 12/254 [04:37<1:30:12, 22.36s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:41:36,930 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:41:39,680 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:41:42,462 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:41:45,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:41:48,449 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:41:51,159 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:41:53,892 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:41:56,660 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████ | 13/254 [05:00<1:30:00, 22.41s/it] + 5%|████ | 13/254 [05:00<1:30:00, 22.41s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:41:59,466 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████ | 13/254 [05:00<1:30:00, 22.41s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:41:59,466 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:42:04,951 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:41:59,466 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:42:04,951 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:41:59,466 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:42:10,325 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:41:59,466 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:42:10,325 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:41:59,466 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:42:15,741 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:41:59,466 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:42:15,741 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:41:59,466 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▍ | 14/254 [05:21<1:28:47, 22.20s/it]g-point operations will not be computed-02 23:41:59,466 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▍ | 14/254 [05:21<1:28:47, 22.20s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:42:21,089 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▍ | 14/254 [05:21<1:28:47, 22.20s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:42:21,089 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:42:26,426 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:42:21,089 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:42:26,426 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:42:21,089 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:42:31,791 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:42:21,089 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:42:31,791 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:42:21,089 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:42:37,150 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:42:21,089 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▋ | 15/254 [05:43<1:27:30, 21.97s/it]g-point operations will not be computed-02 23:42:21,089 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▋ | 15/254 [05:43<1:27:30, 21.97s/it]g-point operations will not be computed-02 23:42:21,089 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▋ | 15/254 [05:43<1:27:30, 21.97s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:42:42,653 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▋ | 15/254 [05:43<1:27:30, 21.97s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:42:42,653 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:42:47,969 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:42:42,653 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:42:47,969 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:42:42,653 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:42:53,339 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:42:42,653 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:42:53,339 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:42:42,653 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:42:58,626 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:42:42,653 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|█████ | 16/254 [06:04<1:26:35, 21.83s/it]g-point operations will not be computed-02 23:42:42,653 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|█████ | 16/254 [06:04<1:26:35, 21.83s/it]g-point operations will not be computed-02 23:42:42,653 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|█████ | 16/254 [06:04<1:26:35, 21.83s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:43:04,043 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|█████ | 16/254 [06:04<1:26:35, 21.83s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:43:04,043 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:43:09,394 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:43:04,043 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:43:09,394 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:43:04,043 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:43:14,709 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:43:04,043 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:43:14,709 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:43:04,043 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:43:20,038 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:43:04,043 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▎ | 17/254 [06:26<1:25:39, 21.69s/it]g-point operations will not be computed-02 23:43:04,043 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▎ | 17/254 [06:26<1:25:39, 21.69s/it]g-point operations will not be computed-02 23:43:04,043 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▎ | 17/254 [06:26<1:25:39, 21.69s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:43:25,388 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▎ | 17/254 [06:26<1:25:39, 21.69s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:43:25,388 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:43:30,624 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:43:25,388 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:43:30,624 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:43:25,388 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:43:35,737 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:43:25,388 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:43:35,737 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:43:25,388 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:43:41,083 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:43:25,388 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:43:41,083 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:43:25,388 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▋ | 18/254 [06:47<1:24:31, 21.49s/it]g-point operations will not be computed-02 23:43:25,388 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▋ | 18/254 [06:47<1:24:31, 21.49s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:43:46,482 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▋ | 18/254 [06:47<1:24:31, 21.49s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:43:46,482 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:43:51,790 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:43:46,482 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:43:51,790 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:43:46,482 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:43:57,003 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:43:46,482 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:43:57,003 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:43:46,482 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:44:02,227 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:43:46,482 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:44:02,227 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:43:46,482 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▉ | 19/254 [07:08<1:23:41, 21.37s/it]g-point operations will not be computed-02 23:43:46,482 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▉ | 19/254 [07:08<1:23:41, 21.37s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:44:07,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▉ | 19/254 [07:08<1:23:41, 21.37s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:44:07,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:44:12,640 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:44:07,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:44:12,640 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:44:07,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:44:17,787 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:44:07,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:44:17,787 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:44:07,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:44:22,959 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:44:07,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:44:22,959 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:44:07,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:44:22,959 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:44:07,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▎ | 20/254 [07:29<1:22:39, 21.20s/it]g-point operations will not be computed-02 23:44:07,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▎ | 20/254 [07:29<1:22:39, 21.20s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:44:28,209 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▎ | 20/254 [07:29<1:22:39, 21.20s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:44:28,209 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:44:33,311 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:44:28,209 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:44:33,311 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:44:28,209 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:44:38,501 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:44:28,209 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:44:38,501 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:44:28,209 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:44:43,633 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:44:28,209 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▌ | 21/254 [07:49<1:21:35, 21.01s/it]g-point operations will not be computed-02 23:44:28,209 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▌ | 21/254 [07:49<1:21:35, 21.01s/it]g-point operations will not be computed-02 23:44:28,209 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▌ | 21/254 [07:49<1:21:35, 21.01s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:44:48,852 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▌ | 21/254 [07:49<1:21:35, 21.01s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:44:48,852 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:44:53,911 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:44:48,852 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:44:53,911 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:44:48,852 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:44:59,025 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:44:48,852 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:44:59,025 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:44:48,852 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:45:04,053 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:44:48,852 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:45:04,053 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:44:48,852 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:45:04,053 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:44:48,852 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|██████▉ | 22/254 [08:10<1:20:35, 20.84s/it]g-point operations will not be computed-02 23:44:48,852 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|██████▉ | 22/254 [08:10<1:20:35, 20.84s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:45:09,241 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|██████▉ | 22/254 [08:10<1:20:35, 20.84s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:45:09,241 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:45:14,395 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:45:09,241 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:45:14,395 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:45:09,241 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:45:19,431 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:45:09,241 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:45:19,431 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:45:09,241 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:45:24,536 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:45:09,241 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:45:24,536 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:45:09,241 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▏ | 23/254 [08:30<1:19:48, 20.73s/it]g-point operations will not be computed-02 23:45:09,241 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▏ | 23/254 [08:30<1:19:48, 20.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:45:29,748 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▏ | 23/254 [08:30<1:19:48, 20.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:45:29,748 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:45:34,784 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:45:29,748 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:45:34,784 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:45:29,748 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:45:39,847 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:45:29,748 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:45:39,847 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:45:29,748 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:45:44,951 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:45:29,748 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:45:44,951 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:45:29,748 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▌ | 24/254 [08:51<1:19:06, 20.64s/it]g-point operations will not be computed-02 23:45:29,748 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▌ | 24/254 [08:51<1:19:06, 20.64s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▌ | 24/254 [08:51<1:19:06, 20.64s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:45:55,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:45:55,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:46:00,173 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:46:00,173 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:46:05,194 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▊ | 25/254 [09:11<1:18:49, 20.65s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▊ | 25/254 [09:11<1:18:49, 20.65s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3355, 'learning_rate': 1.44e-05, 'epoch': 0.1} + 10%|███████▊ | 25/254 [09:11<1:18:49, 20.65s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▊ | 25/254 [09:11<1:18:49, 20.65s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▊ | 25/254 [09:11<1:18:49, 20.65s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▊ | 25/254 [09:11<1:18:49, 20.65s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▊ | 25/254 [09:11<1:18:49, 20.65s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▊ | 25/254 [09:11<1:18:49, 20.65s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████▏ | 26/254 [09:31<1:17:44, 20.46s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████▏ | 26/254 [09:31<1:17:44, 20.46s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4344, 'learning_rate': 1.4999999999999999e-05, 'epoch': 0.1} + 10%|████████▏ | 26/254 [09:31<1:17:44, 20.46s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████▏ | 26/254 [09:31<1:17:44, 20.46s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████▏ | 26/254 [09:31<1:17:44, 20.46s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████▏ | 26/254 [09:31<1:17:44, 20.46s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████▏ | 26/254 [09:31<1:17:44, 20.46s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████▏ | 26/254 [09:31<1:17:44, 20.46s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▌ | 27/254 [09:51<1:16:36, 20.25s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▌ | 27/254 [09:51<1:16:36, 20.25s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2896, 'learning_rate': 1.5599999999999996e-05, 'epoch': 0.11} + 11%|████████▌ | 27/254 [09:51<1:16:36, 20.25s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▌ | 27/254 [09:51<1:16:36, 20.25s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▌ | 27/254 [09:51<1:16:36, 20.25s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▌ | 27/254 [09:51<1:16:36, 20.25s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▌ | 27/254 [09:51<1:16:36, 20.25s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▌ | 27/254 [09:51<1:16:36, 20.25s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▌ | 27/254 [09:51<1:16:36, 20.25s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▌ | 27/254 [09:51<1:16:36, 20.25s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3127, 'learning_rate': 1.6199999999999997e-05, 'epoch': 0.11} + 11%|████████▌ | 27/254 [09:51<1:16:36, 20.25s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▌ | 27/254 [09:51<1:16:36, 20.25s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▌ | 27/254 [09:51<1:16:36, 20.25s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▌ | 27/254 [09:51<1:16:36, 20.25s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▌ | 27/254 [09:51<1:16:36, 20.25s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▌ | 27/254 [09:51<1:16:36, 20.25s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▌ | 27/254 [09:51<1:16:36, 20.25s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|█████████▏ | 29/254 [10:30<1:14:42, 19.92s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|█████████▏ | 29/254 [10:30<1:14:42, 19.92s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2021, 'learning_rate': 1.68e-05, 'epoch': 0.11} + 11%|█████████▏ | 29/254 [10:30<1:14:42, 19.92s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|█████████▏ | 29/254 [10:30<1:14:42, 19.92s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|█████████▏ | 29/254 [10:30<1:14:42, 19.92s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|█████████▏ | 29/254 [10:30<1:14:42, 19.92s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|█████████▏ | 29/254 [10:30<1:14:42, 19.92s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|█████████▏ | 29/254 [10:30<1:14:42, 19.92s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▍ | 30/254 [10:50<1:13:46, 19.76s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▍ | 30/254 [10:50<1:13:46, 19.76s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.386, 'learning_rate': 1.74e-05, 'epoch': 0.12} + 12%|█████████▍ | 30/254 [10:50<1:13:46, 19.76s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▍ | 30/254 [10:50<1:13:46, 19.76s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▍ | 30/254 [10:50<1:13:46, 19.76s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▍ | 30/254 [10:50<1:13:46, 19.76s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▍ | 30/254 [10:50<1:13:46, 19.76s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▍ | 30/254 [10:50<1:13:46, 19.76s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▍ | 30/254 [10:50<1:13:46, 19.76s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▊ | 31/254 [11:09<1:12:52, 19.61s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▊ | 31/254 [11:09<1:12:52, 19.61s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▊ | 31/254 [11:09<1:12:52, 19.61s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▊ | 31/254 [11:09<1:12:52, 19.61s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▊ | 31/254 [11:09<1:12:52, 19.61s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▊ | 31/254 [11:09<1:12:52, 19.61s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▊ | 31/254 [11:09<1:12:52, 19.61s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▊ | 31/254 [11:09<1:12:52, 19.61s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▊ | 31/254 [11:09<1:12:52, 19.61s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████ | 32/254 [11:28<1:11:54, 19.43s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████ | 32/254 [11:28<1:11:54, 19.43s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████ | 32/254 [11:28<1:11:54, 19.43s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████ | 32/254 [11:28<1:11:54, 19.43s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████ | 32/254 [11:28<1:11:54, 19.43s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████ | 32/254 [11:28<1:11:54, 19.43s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████ | 32/254 [11:28<1:11:54, 19.43s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████ | 32/254 [11:28<1:11:54, 19.43s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▍ | 33/254 [11:47<1:10:52, 19.24s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▍ | 33/254 [11:47<1:10:52, 19.24s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3241, 'learning_rate': 1.92e-05, 'epoch': 0.13} + 13%|██████████▍ | 33/254 [11:47<1:10:52, 19.24s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▍ | 33/254 [11:47<1:10:52, 19.24s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▍ | 33/254 [11:47<1:10:52, 19.24s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▍ | 33/254 [11:47<1:10:52, 19.24s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▍ | 33/254 [11:47<1:10:52, 19.24s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▍ | 33/254 [11:47<1:10:52, 19.24s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▋ | 34/254 [12:05<1:09:31, 18.96s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▋ | 34/254 [12:05<1:09:31, 18.96s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2998, 'learning_rate': 1.98e-05, 'epoch': 0.13} + 13%|██████████▋ | 34/254 [12:05<1:09:31, 18.96s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▋ | 34/254 [12:05<1:09:31, 18.96s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▋ | 34/254 [12:05<1:09:31, 18.96s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▋ | 34/254 [12:05<1:09:31, 18.96s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▋ | 34/254 [12:05<1:09:31, 18.96s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▋ | 34/254 [12:05<1:09:31, 18.96s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▋ | 34/254 [12:05<1:09:31, 18.96s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████ | 35/254 [12:23<1:08:15, 18.70s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████ | 35/254 [12:23<1:08:15, 18.70s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████ | 35/254 [12:23<1:08:15, 18.70s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████ | 35/254 [12:23<1:08:15, 18.70s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████ | 35/254 [12:23<1:08:15, 18.70s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████ | 35/254 [12:23<1:08:15, 18.70s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████ | 35/254 [12:23<1:08:15, 18.70s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:49:37,730 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:49:37,730 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.308, 'learning_rate': 2.1e-05, 'epoch': 0.14} +[WARNING|modeling_utils.py:388] 2022-03-02 23:49:37,730 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:49:37,730 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:49:37,730 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:49:37,730 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:49:37,730 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:49:37,730 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▋ | 37/254 [12:58<1:05:30, 18.11s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▋ | 37/254 [12:58<1:05:30, 18.11s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3661, 'learning_rate': 2.1599999999999996e-05, 'epoch': 0.15} + 15%|███████████▋ | 37/254 [12:58<1:05:30, 18.11s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▋ | 37/254 [12:58<1:05:30, 18.11s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:50:06,449 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:50:06,449 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:50:06,449 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:50:06,449 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▉ | 38/254 [13:16<1:04:31, 17.92s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▉ | 38/254 [13:16<1:04:31, 17.92s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▉ | 38/254 [13:16<1:04:31, 17.92s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:50:21,058 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:50:21,058 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:50:21,058 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:50:21,058 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|████████████▎ | 39/254 [13:32<1:02:33, 17.46s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|████████████▎ | 39/254 [13:32<1:02:33, 17.46s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3651, 'learning_rate': 2.28e-05, 'epoch': 0.15} + 15%|████████████▎ | 39/254 [13:32<1:02:33, 17.46s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|████████████▎ | 39/254 [13:32<1:02:33, 17.46s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|████████████▎ | 39/254 [13:32<1:02:33, 17.46s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|████████████▎ | 39/254 [13:32<1:02:33, 17.46s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|████████████▎ | 39/254 [13:32<1:02:33, 17.46s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|████████████▎ | 39/254 [13:32<1:02:33, 17.46s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|████████████▎ | 39/254 [13:32<1:02:33, 17.46s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▌ | 40/254 [13:48<1:00:11, 16.88s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▌ | 40/254 [13:48<1:00:11, 16.88s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▌ | 40/254 [13:48<1:00:11, 16.88s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▌ | 40/254 [13:48<1:00:11, 16.88s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▌ | 40/254 [13:48<1:00:11, 16.88s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:50:55,681 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:50:55,681 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:50:55,681 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|█████████████▏ | 41/254 [14:02<57:32, 16.21s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|█████████████▏ | 41/254 [14:02<57:32, 16.21s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|█████████████▏ | 41/254 [14:02<57:32, 16.21s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:51:06,194 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:51:06,194 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:51:06,194 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:51:06,194 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:51:06,194 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▌ | 42/254 [14:16<54:38, 15.46s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:51:16,265 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:51:16,265 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:51:16,265 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:51:16,265 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:51:24,161 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:51:24,161 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▉ | 43/254 [14:29<51:31, 14.65s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▉ | 43/254 [14:29<51:31, 14.65s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:51:30,331 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:51:30,331 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:51:30,331 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:51:35,995 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:51:35,995 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|██████████████▏ | 44/254 [14:40<48:09, 13.76s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:51:40,159 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:51:40,159 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:51:44,028 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:51:46,525 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:51:46,525 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▌ | 45/254 [14:51<44:23, 12.75s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:51:50,306 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:51:52,627 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:51:54,925 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:51:54,925 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:51:54,925 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▊ | 46/254 [15:00<40:40, 11.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:51:58,318 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:52:00,464 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:51:58,318 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:52:02,544 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:51:58,318 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:52:04,535 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:51:58,318 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:52:04,535 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:51:58,318 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|███████████████▏ | 47/254 [15:09<36:59, 10.72s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:52:06,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:52:08,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:52:06,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:52:10,179 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:52:06,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|███████████████▍ | 48/254 [15:16<33:14, 9.68s/it]g-point operations will not be computed-02 23:52:06,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|███████████████▍ | 48/254 [15:16<33:14, 9.68s/it]g-point operations will not be computed-02 23:52:06,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|███████████████▍ | 48/254 [15:16<33:14, 9.68s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:52:13,701 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:52:16,835 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:52:13,701 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:52:18,386 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:52:13,701 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:52:18,386 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:52:13,701 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|███████████████▊ | 49/254 [15:22<29:38, 8.68s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:52:19,892 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:52:22,438 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:52:19,892 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|████████████████▏ | 50/254 [15:28<26:22, 7.76s/it]g-point operations will not be computed-02 23:52:19,892 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|████████████████▏ | 50/254 [15:28<26:22, 7.76s/it]g-point operations will not be computed-02 23:52:19,892 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|████████████████▏ | 50/254 [15:28<26:22, 7.76s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|████████████████▏ | 50/254 [15:28<26:22, 7.76s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:52:34,170 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:52:34,170 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:52:40,333 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:52:40,333 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:52:40,333 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|████████████████▍ | 51/254 [15:52<43:30, 12.86s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|████████████████▍ | 51/254 [15:52<43:30, 12.86s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.243, 'learning_rate': 2.9999999999999997e-05, 'epoch': 0.2} + 20%|████████████████▍ | 51/254 [15:52<43:30, 12.86s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|████████████████▍ | 51/254 [15:52<43:30, 12.86s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|████████████████▍ | 51/254 [15:52<43:30, 12.86s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|████████████████▍ | 51/254 [15:52<43:30, 12.86s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|████████████████▍ | 51/254 [15:52<43:30, 12.86s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|████████████████▍ | 51/254 [15:52<43:30, 12.86s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|████████████████▊ | 52/254 [16:17<54:35, 16.22s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|████████████████▊ | 52/254 [16:17<54:35, 16.22s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.181, 'learning_rate': 3.06e-05, 'epoch': 0.2} + 20%|████████████████▊ | 52/254 [16:17<54:35, 16.22s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|████████████████▊ | 52/254 [16:17<54:35, 16.22s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|████████████████▊ | 52/254 [16:17<54:35, 16.22s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|████████████████▊ | 52/254 [16:17<54:35, 16.22s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|████████████████▊ | 52/254 [16:17<54:35, 16.22s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|████████████████▊ | 52/254 [16:17<54:35, 16.22s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|████████████████▊ | 52/254 [16:17<54:35, 16.22s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▋ | 53/254 [16:40<1:01:42, 18.42s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▋ | 53/254 [16:40<1:01:42, 18.42s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▋ | 53/254 [16:40<1:01:42, 18.42s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▋ | 53/254 [16:40<1:01:42, 18.42s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▋ | 53/254 [16:40<1:01:42, 18.42s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▋ | 53/254 [16:40<1:01:42, 18.42s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▋ | 53/254 [16:40<1:01:42, 18.42s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▋ | 53/254 [16:40<1:01:42, 18.42s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▋ | 53/254 [16:40<1:01:42, 18.42s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|█████████████████ | 54/254 [17:03<1:06:13, 19.87s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|█████████████████ | 54/254 [17:03<1:06:13, 19.87s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|█████████████████ | 54/254 [17:03<1:06:13, 19.87s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|█████████████████ | 54/254 [17:03<1:06:13, 19.87s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|█████████████████ | 54/254 [17:03<1:06:13, 19.87s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|█████████████████ | 54/254 [17:03<1:06:13, 19.87s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|█████████████████ | 54/254 [17:03<1:06:13, 19.87s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|█████████████████ | 54/254 [17:03<1:06:13, 19.87s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|█████████████████ | 54/254 [17:03<1:06:13, 19.87s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▎ | 55/254 [17:27<1:09:15, 20.88s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▎ | 55/254 [17:27<1:09:15, 20.88s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▎ | 55/254 [17:27<1:09:15, 20.88s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▎ | 55/254 [17:27<1:09:15, 20.88s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▎ | 55/254 [17:27<1:09:15, 20.88s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▎ | 55/254 [17:27<1:09:15, 20.88s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▎ | 55/254 [17:27<1:09:15, 20.88s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▎ | 55/254 [17:27<1:09:15, 20.88s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▋ | 56/254 [17:50<1:11:01, 21.52s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▋ | 56/254 [17:50<1:11:01, 21.52s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1373, 'learning_rate': 3.2999999999999996e-05, 'epoch': 0.22} + 22%|█████████████████▋ | 56/254 [17:50<1:11:01, 21.52s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▋ | 56/254 [17:50<1:11:01, 21.52s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▋ | 56/254 [17:50<1:11:01, 21.52s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▋ | 56/254 [17:50<1:11:01, 21.52s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▋ | 56/254 [17:50<1:11:01, 21.52s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▋ | 56/254 [17:50<1:11:01, 21.52s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▉ | 57/254 [18:12<1:11:51, 21.89s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▉ | 57/254 [18:12<1:11:51, 21.89s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1799, 'learning_rate': 3.36e-05, 'epoch': 0.22} + 22%|█████████████████▉ | 57/254 [18:12<1:11:51, 21.89s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▉ | 57/254 [18:12<1:11:51, 21.89s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▉ | 57/254 [18:12<1:11:51, 21.89s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▉ | 57/254 [18:12<1:11:51, 21.89s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▉ | 57/254 [18:12<1:11:51, 21.89s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▉ | 57/254 [18:12<1:11:51, 21.89s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1919, 'learning_rate': 3.42e-05, 'epoch': 0.23} + g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▌ | 59/254 [18:58<1:12:38, 22.35s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▌ | 59/254 [18:58<1:12:38, 22.35s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▌ | 59/254 [18:58<1:12:38, 22.35s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▌ | 59/254 [18:58<1:12:38, 22.35s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▌ | 59/254 [18:58<1:12:38, 22.35s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▌ | 59/254 [18:58<1:12:38, 22.35s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▌ | 59/254 [18:58<1:12:38, 22.35s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▌ | 59/254 [18:58<1:12:38, 22.35s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|██████████████████▉ | 60/254 [19:21<1:12:31, 22.43s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|██████████████████▉ | 60/254 [19:21<1:12:31, 22.43s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2191, 'learning_rate': 3.539999999999999e-05, 'epoch': 0.24} + 24%|██████████████████▉ | 60/254 [19:21<1:12:31, 22.43s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|██████████████████▉ | 60/254 [19:21<1:12:31, 22.43s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|██████████████████▉ | 60/254 [19:21<1:12:31, 22.43s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|██████████████████▉ | 60/254 [19:21<1:12:31, 22.43s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|██████████████████▉ | 60/254 [19:21<1:12:31, 22.43s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|██████████████████▉ | 60/254 [19:21<1:12:31, 22.43s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████▏ | 61/254 [19:43<1:12:09, 22.43s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████▏ | 61/254 [19:43<1:12:09, 22.43s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2233, 'learning_rate': 3.5999999999999994e-05, 'epoch': 0.24} + 24%|███████████████████▏ | 61/254 [19:43<1:12:09, 22.43s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████▏ | 61/254 [19:43<1:12:09, 22.43s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████▏ | 61/254 [19:43<1:12:09, 22.43s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████▏ | 61/254 [19:43<1:12:09, 22.43s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████▏ | 61/254 [19:43<1:12:09, 22.43s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████▏ | 61/254 [19:43<1:12:09, 22.43s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████▌ | 62/254 [20:05<1:11:41, 22.41s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████▌ | 62/254 [20:05<1:11:41, 22.41s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1567, 'learning_rate': 3.6599999999999995e-05, 'epoch': 0.24} + 24%|███████████████████▌ | 62/254 [20:05<1:11:41, 22.41s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████▌ | 62/254 [20:05<1:11:41, 22.41s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████▌ | 62/254 [20:05<1:11:41, 22.41s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████▌ | 62/254 [20:05<1:11:41, 22.41s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████▌ | 62/254 [20:05<1:11:41, 22.41s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████▌ | 62/254 [20:05<1:11:41, 22.41s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▊ | 63/254 [20:28<1:11:35, 22.49s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▊ | 63/254 [20:28<1:11:35, 22.49s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2383, 'learning_rate': 3.7199999999999996e-05, 'epoch': 0.25} + 25%|███████████████████▊ | 63/254 [20:28<1:11:35, 22.49s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▊ | 63/254 [20:28<1:11:35, 22.49s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▊ | 63/254 [20:28<1:11:35, 22.49s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▊ | 63/254 [20:28<1:11:35, 22.49s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▊ | 63/254 [20:28<1:11:35, 22.49s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▊ | 63/254 [20:28<1:11:35, 22.49s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|████████████████████▏ | 64/254 [20:50<1:10:51, 22.38s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|████████████████████▏ | 64/254 [20:50<1:10:51, 22.38s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2146, 'learning_rate': 3.78e-05, 'epoch': 0.25} + 25%|████████████████████▏ | 64/254 [20:50<1:10:51, 22.38s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|████████████████████▏ | 64/254 [20:50<1:10:51, 22.38s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|████████████████████▏ | 64/254 [20:50<1:10:51, 22.38s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|████████████████████▏ | 64/254 [20:50<1:10:51, 22.38s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|████████████████████▏ | 64/254 [20:50<1:10:51, 22.38s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|████████████████████▏ | 64/254 [20:50<1:10:51, 22.38s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▍ | 65/254 [21:12<1:10:01, 22.23s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▍ | 65/254 [21:12<1:10:01, 22.23s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1731, 'learning_rate': 3.84e-05, 'epoch': 0.26} + 26%|████████████████████▍ | 65/254 [21:12<1:10:01, 22.23s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▍ | 65/254 [21:12<1:10:01, 22.23s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▍ | 65/254 [21:12<1:10:01, 22.23s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▍ | 65/254 [21:12<1:10:01, 22.23s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▍ | 65/254 [21:12<1:10:01, 22.23s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▍ | 65/254 [21:12<1:10:01, 22.23s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▍ | 65/254 [21:12<1:10:01, 22.23s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▊ | 66/254 [21:34<1:09:04, 22.04s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▊ | 66/254 [21:34<1:09:04, 22.04s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▊ | 66/254 [21:34<1:09:04, 22.04s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▊ | 66/254 [21:34<1:09:04, 22.04s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▊ | 66/254 [21:34<1:09:04, 22.04s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▊ | 66/254 [21:34<1:09:04, 22.04s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▊ | 66/254 [21:34<1:09:04, 22.04s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▊ | 66/254 [21:34<1:09:04, 22.04s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▊ | 66/254 [21:34<1:09:04, 22.04s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▊ | 66/254 [21:34<1:09:04, 22.04s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1804, 'learning_rate': 3.96e-05, 'epoch': 0.26} + 26%|████████████████████▊ | 66/254 [21:34<1:09:04, 22.04s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▊ | 66/254 [21:34<1:09:04, 22.04s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▊ | 66/254 [21:34<1:09:04, 22.04s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▊ | 66/254 [21:34<1:09:04, 22.04s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▊ | 66/254 [21:34<1:09:04, 22.04s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▊ | 66/254 [21:34<1:09:04, 22.04s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▊ | 66/254 [21:34<1:09:04, 22.04s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▊ | 66/254 [21:34<1:09:04, 22.04s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▍ | 68/254 [22:16<1:07:17, 21.71s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▍ | 68/254 [22:16<1:07:17, 21.71s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▍ | 68/254 [22:16<1:07:17, 21.71s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▍ | 68/254 [22:16<1:07:17, 21.71s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▍ | 68/254 [22:16<1:07:17, 21.71s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▍ | 68/254 [22:16<1:07:17, 21.71s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▍ | 68/254 [22:16<1:07:17, 21.71s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▍ | 68/254 [22:16<1:07:17, 21.71s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▋ | 69/254 [22:38<1:06:40, 21.63s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▋ | 69/254 [22:38<1:06:40, 21.63s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2335, 'learning_rate': 4.08e-05, 'epoch': 0.27} + 27%|█████████████████████▋ | 69/254 [22:38<1:06:40, 21.63s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▋ | 69/254 [22:38<1:06:40, 21.63s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▋ | 69/254 [22:38<1:06:40, 21.63s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▋ | 69/254 [22:38<1:06:40, 21.63s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▋ | 69/254 [22:38<1:06:40, 21.63s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▋ | 69/254 [22:38<1:06:40, 21.63s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▋ | 69/254 [22:38<1:06:40, 21.63s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████ | 70/254 [22:59<1:05:48, 21.46s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████ | 70/254 [22:59<1:05:48, 21.46s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████ | 70/254 [22:59<1:05:48, 21.46s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████ | 70/254 [22:59<1:05:48, 21.46s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████ | 70/254 [22:59<1:05:48, 21.46s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████ | 70/254 [22:59<1:05:48, 21.46s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████ | 70/254 [22:59<1:05:48, 21.46s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████ | 70/254 [22:59<1:05:48, 21.46s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▎ | 71/254 [23:20<1:04:56, 21.29s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▎ | 71/254 [23:20<1:04:56, 21.29s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1473, 'learning_rate': 4.2e-05, 'epoch': 0.28} + 28%|██████████████████████▎ | 71/254 [23:20<1:04:56, 21.29s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▎ | 71/254 [23:20<1:04:56, 21.29s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▎ | 71/254 [23:20<1:04:56, 21.29s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▎ | 71/254 [23:20<1:04:56, 21.29s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▎ | 71/254 [23:20<1:04:56, 21.29s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▎ | 71/254 [23:20<1:04:56, 21.29s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▎ | 71/254 [23:20<1:04:56, 21.29s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▋ | 72/254 [23:41<1:04:10, 21.16s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▋ | 72/254 [23:41<1:04:10, 21.16s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▋ | 72/254 [23:41<1:04:10, 21.16s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▋ | 72/254 [23:41<1:04:10, 21.16s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▋ | 72/254 [23:41<1:04:10, 21.16s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▋ | 72/254 [23:41<1:04:10, 21.16s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▋ | 72/254 [23:41<1:04:10, 21.16s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▋ | 72/254 [23:41<1:04:10, 21.16s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▋ | 72/254 [23:41<1:04:10, 21.16s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|██████████████████████▉ | 73/254 [24:01<1:03:17, 20.98s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|██████████████████████▉ | 73/254 [24:01<1:03:17, 20.98s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|██████████████████████▉ | 73/254 [24:01<1:03:17, 20.98s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|██████████████████████▉ | 73/254 [24:01<1:03:17, 20.98s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|██████████████████████▉ | 73/254 [24:01<1:03:17, 20.98s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|██████████████████████▉ | 73/254 [24:01<1:03:17, 20.98s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|██████████████████████▉ | 73/254 [24:01<1:03:17, 20.98s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|██████████████████████▉ | 73/254 [24:01<1:03:17, 20.98s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|██████████████████████▉ | 73/254 [24:01<1:03:17, 20.98s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|███████████████████████▎ | 74/254 [24:22<1:02:40, 20.89s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|███████████████████████▎ | 74/254 [24:22<1:02:40, 20.89s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|███████████████████████▎ | 74/254 [24:22<1:02:40, 20.89s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|███████████████████████▎ | 74/254 [24:22<1:02:40, 20.89s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|███████████████████████▎ | 74/254 [24:22<1:02:40, 20.89s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|███████████████████████▎ | 74/254 [24:22<1:02:40, 20.89s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|███████████████████████▎ | 74/254 [24:22<1:02:40, 20.89s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|███████████████████████▎ | 74/254 [24:22<1:02:40, 20.89s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▌ | 75/254 [24:43<1:02:17, 20.88s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▌ | 75/254 [24:43<1:02:17, 20.88s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2113, 'learning_rate': 4.4399999999999995e-05, 'epoch': 0.29} + 30%|███████████████████████▌ | 75/254 [24:43<1:02:17, 20.88s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▌ | 75/254 [24:43<1:02:17, 20.88s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▌ | 75/254 [24:43<1:02:17, 20.88s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▌ | 75/254 [24:43<1:02:17, 20.88s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▌ | 75/254 [24:43<1:02:17, 20.88s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▌ | 75/254 [24:43<1:02:17, 20.88s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▉ | 76/254 [25:03<1:01:27, 20.72s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▉ | 76/254 [25:03<1:01:27, 20.72s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2269, 'learning_rate': 4.4999999999999996e-05, 'epoch': 0.3} + 30%|███████████████████████▉ | 76/254 [25:03<1:01:27, 20.72s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▉ | 76/254 [25:03<1:01:27, 20.72s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▉ | 76/254 [25:03<1:01:27, 20.72s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▉ | 76/254 [25:03<1:01:27, 20.72s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▉ | 76/254 [25:03<1:01:27, 20.72s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▉ | 76/254 [25:03<1:01:27, 20.72s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|████████████████████████▎ | 77/254 [25:23<1:00:28, 20.50s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|████████████████████████▎ | 77/254 [25:23<1:00:28, 20.50s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1723, 'learning_rate': 4.56e-05, 'epoch': 0.3} + 30%|████████████████████████▎ | 77/254 [25:23<1:00:28, 20.50s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|████████████████████████▎ | 77/254 [25:23<1:00:28, 20.50s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|████████████████████████▎ | 77/254 [25:23<1:00:28, 20.50s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|████████████████████████▎ | 77/254 [25:23<1:00:28, 20.50s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|████████████████████████▎ | 77/254 [25:23<1:00:28, 20.50s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|████████████████████████▎ | 77/254 [25:23<1:00:28, 20.50s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|████████████████████████▎ | 77/254 [25:23<1:00:28, 20.50s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|████████████████████████▎ | 77/254 [25:23<1:00:28, 20.50s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1948, 'learning_rate': 4.62e-05, 'epoch': 0.31} + 30%|████████████████████████▎ | 77/254 [25:23<1:00:28, 20.50s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|████████████████████████▎ | 77/254 [25:23<1:00:28, 20.50s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|████████████████████████▎ | 77/254 [25:23<1:00:28, 20.50s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|████████████████████████▎ | 77/254 [25:23<1:00:28, 20.50s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|████████████████████████▎ | 77/254 [25:23<1:00:28, 20.50s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|████████████████████████▎ | 77/254 [25:23<1:00:28, 20.50s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|████████████████████████▎ | 77/254 [25:23<1:00:28, 20.50s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|█████████████████████████▌ | 79/254 [26:02<58:32, 20.07s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|█████████████████████████▌ | 79/254 [26:02<58:32, 20.07s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.0934, 'learning_rate': 4.68e-05, 'epoch': 0.31} + 31%|█████████████████████████▌ | 79/254 [26:02<58:32, 20.07s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|█████████████████████████▌ | 79/254 [26:02<58:32, 20.07s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|█████████████████████████▌ | 79/254 [26:02<58:32, 20.07s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|█████████████████████████▌ | 79/254 [26:02<58:32, 20.07s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|█████████████████████████▌ | 79/254 [26:02<58:32, 20.07s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|█████████████████████████▌ | 79/254 [26:02<58:32, 20.07s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|█████████████████████████▊ | 80/254 [26:22<57:31, 19.84s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|█████████████████████████▊ | 80/254 [26:22<57:31, 19.84s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2371, 'learning_rate': 4.7399999999999993e-05, 'epoch': 0.31} + 31%|█████████████████████████▊ | 80/254 [26:22<57:31, 19.84s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|█████████████████████████▊ | 80/254 [26:22<57:31, 19.84s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|█████████████████████████▊ | 80/254 [26:22<57:31, 19.84s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|█████████████████████████▊ | 80/254 [26:22<57:31, 19.84s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|█████████████████████████▊ | 80/254 [26:22<57:31, 19.84s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|█████████████████████████▊ | 80/254 [26:22<57:31, 19.84s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|██████████████████████████▏ | 81/254 [26:41<56:39, 19.65s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|██████████████████████████▏ | 81/254 [26:41<56:39, 19.65s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1495, 'learning_rate': 4.7999999999999994e-05, 'epoch': 0.32} + 32%|██████████████████████████▏ | 81/254 [26:41<56:39, 19.65s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|██████████████████████████▏ | 81/254 [26:41<56:39, 19.65s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|██████████████████████████▏ | 81/254 [26:41<56:39, 19.65s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|██████████████████████████▏ | 81/254 [26:41<56:39, 19.65s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|██████████████████████████▏ | 81/254 [26:41<56:39, 19.65s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|██████████████████████████▏ | 81/254 [26:41<56:39, 19.65s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|██████████████████████████▍ | 82/254 [27:00<55:34, 19.39s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|██████████████████████████▍ | 82/254 [27:00<55:34, 19.39s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1826, 'learning_rate': 4.8599999999999995e-05, 'epoch': 0.32} + 32%|██████████████████████████▍ | 82/254 [27:00<55:34, 19.39s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|██████████████████████████▍ | 82/254 [27:00<55:34, 19.39s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|██████████████████████████▍ | 82/254 [27:00<55:34, 19.39s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|██████████████████████████▍ | 82/254 [27:00<55:34, 19.39s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|██████████████████████████▍ | 82/254 [27:00<55:34, 19.39s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|██████████████████████████▍ | 82/254 [27:00<55:34, 19.39s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|██████████████████████████▍ | 82/254 [27:00<55:34, 19.39s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|██████████████████████████▊ | 83/254 [27:19<54:46, 19.22s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|██████████████████████████▊ | 83/254 [27:19<54:46, 19.22s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|██████████████████████████▊ | 83/254 [27:19<54:46, 19.22s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|██████████████████████████▊ | 83/254 [27:19<54:46, 19.22s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|██████████████████████████▊ | 83/254 [27:19<54:46, 19.22s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|██████████████████████████▊ | 83/254 [27:19<54:46, 19.22s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|██████████████████████████▊ | 83/254 [27:19<54:46, 19.22s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|██████████████████████████▊ | 83/254 [27:19<54:46, 19.22s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|███████████████████████████ | 84/254 [27:37<53:36, 18.92s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|███████████████████████████ | 84/254 [27:37<53:36, 18.92s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1952, 'learning_rate': 4.98e-05, 'epoch': 0.33} + 33%|███████████████████████████ | 84/254 [27:37<53:36, 18.92s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|███████████████████████████ | 84/254 [27:37<53:36, 18.92s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:04:44,867 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:04:44,867 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:04:44,867 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:04:44,867 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|███████████████████████████▍ | 85/254 [27:54<52:14, 18.55s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|███████████████████████████▍ | 85/254 [27:54<52:14, 18.55s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|███████████████████████████▍ | 85/254 [27:54<52:14, 18.55s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|███████████████████████████▍ | 85/254 [27:54<52:14, 18.55s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|███████████████████████████▍ | 85/254 [27:54<52:14, 18.55s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|███████████████████████████▍ | 85/254 [27:54<52:14, 18.55s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|███████████████████████████▍ | 85/254 [27:54<52:14, 18.55s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|███████████████████████████▍ | 85/254 [27:54<52:14, 18.55s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████▊ | 86/254 [28:12<51:03, 18.23s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████▊ | 86/254 [28:12<51:03, 18.23s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.149, 'learning_rate': 5.1e-05, 'epoch': 0.34} + 34%|███████████████████████████▊ | 86/254 [28:12<51:03, 18.23s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████▊ | 86/254 [28:12<51:03, 18.23s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████▊ | 86/254 [28:12<51:03, 18.23s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████▊ | 86/254 [28:12<51:03, 18.23s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████▊ | 86/254 [28:12<51:03, 18.23s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████▊ | 86/254 [28:12<51:03, 18.23s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████▊ | 86/254 [28:12<51:03, 18.23s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|████████████████████████████ | 87/254 [28:29<49:45, 17.88s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|████████████████████████████ | 87/254 [28:29<49:45, 17.88s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|████████████████████████████ | 87/254 [28:29<49:45, 17.88s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|████████████████████████████ | 87/254 [28:29<49:45, 17.88s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|████████████████████████████ | 87/254 [28:29<49:45, 17.88s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|████████████████████████████ | 87/254 [28:29<49:45, 17.88s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|████████████████████████████ | 87/254 [28:29<49:45, 17.88s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|████████████████████████████ | 87/254 [28:29<49:45, 17.88s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|████████████████████████████ | 87/254 [28:29<49:45, 17.88s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▍ | 88/254 [28:46<48:48, 17.64s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▍ | 88/254 [28:46<48:48, 17.64s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▍ | 88/254 [28:46<48:48, 17.64s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▍ | 88/254 [28:46<48:48, 17.64s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▍ | 88/254 [28:46<48:48, 17.64s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▍ | 88/254 [28:46<48:48, 17.64s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▍ | 88/254 [28:46<48:48, 17.64s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▍ | 88/254 [28:46<48:48, 17.64s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▍ | 88/254 [28:46<48:48, 17.64s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▋ | 89/254 [29:02<47:14, 17.18s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▋ | 89/254 [29:02<47:14, 17.18s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▋ | 89/254 [29:02<47:14, 17.18s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▋ | 89/254 [29:02<47:14, 17.18s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▋ | 89/254 [29:02<47:14, 17.18s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▋ | 89/254 [29:02<47:14, 17.18s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:06:12,585 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|█████████████████████████████ | 90/254 [29:17<45:24, 16.61s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|█████████████████████████████ | 90/254 [29:17<45:24, 16.61s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2526, 'learning_rate': 5.339999999999999e-05, 'epoch': 0.35} + 35%|█████████████████████████████ | 90/254 [29:17<45:24, 16.61s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|█████████████████████████████ | 90/254 [29:17<45:24, 16.61s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|█████████████████████████████ | 90/254 [29:17<45:24, 16.61s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|█████████████████████████████ | 90/254 [29:17<45:24, 16.61s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:06:27,113 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|█████████████████████████████▍ | 91/254 [29:32<43:18, 15.94s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|█████████████████████████████▍ | 91/254 [29:32<43:18, 15.94s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2178, 'learning_rate': 5.399999999999999e-05, 'epoch': 0.36} + 36%|█████████████████████████████▍ | 91/254 [29:32<43:18, 15.94s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|█████████████████████████████▍ | 91/254 [29:32<43:18, 15.94s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:06:37,362 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:06:37,362 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:06:37,362 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:06:37,362 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|█████████████████████████████▋ | 92/254 [29:45<40:57, 15.17s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:06:45,490 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:06:45,490 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:06:45,490 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:06:51,694 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:06:51,694 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:06:51,694 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|██████████████████████████████ | 93/254 [29:58<38:33, 14.37s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:06:57,724 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:06:57,724 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:07:01,987 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:07:01,987 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:07:06,073 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:07:06,073 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2793, 'learning_rate': 5.5799999999999994e-05, 'epoch': 0.37} +[WARNING|modeling_utils.py:388] 2022-03-03 00:07:10,077 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:07:10,077 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:07:13,956 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:07:13,956 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:07:13,956 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|██████████████████████████████▋ | 95/254 [30:19<33:12, 12.53s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:07:20,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:07:20,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:07:23,521 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:07:25,707 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:07:25,707 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:07:27,889 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:07:29,872 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:07:31,814 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:07:33,715 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:07:33,715 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:07:35,649 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:07:37,444 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:07:39,149 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:07:39,149 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:07:40,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:07:43,984 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:07:45,458 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:07:45,458 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:07:48,286 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:07:49,579 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:07:50,777 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:07:50,777 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:07:52,454 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:07:52,454 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:07:58,934 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:07:58,934 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:08:05,074 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:08:05,074 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:08:05,074 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:08:05,074 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▏ | 101/254 [31:20<32:20, 12.68s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▏ | 101/254 [31:20<32:20, 12.68s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2692, 'learning_rate': 5.9999999999999995e-05, 'epoch': 0.4} + 40%|████████████████████████████████▏ | 101/254 [31:20<32:20, 12.68s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▏ | 101/254 [31:20<32:20, 12.68s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▏ | 101/254 [31:20<32:20, 12.68s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▏ | 101/254 [31:20<32:20, 12.68s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▏ | 101/254 [31:20<32:20, 12.68s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▏ | 101/254 [31:20<32:20, 12.68s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▏ | 101/254 [31:20<32:20, 12.68s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▌ | 102/254 [31:44<40:36, 16.03s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▌ | 102/254 [31:44<40:36, 16.03s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▌ | 102/254 [31:44<40:36, 16.03s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▌ | 102/254 [31:44<40:36, 16.03s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▌ | 102/254 [31:44<40:36, 16.03s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▌ | 102/254 [31:44<40:36, 16.03s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▌ | 102/254 [31:44<40:36, 16.03s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▌ | 102/254 [31:44<40:36, 16.03s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▌ | 102/254 [31:44<40:36, 16.03s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|████████████████████████████████▊ | 103/254 [32:07<45:44, 18.17s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|████████████████████████████████▊ | 103/254 [32:07<45:44, 18.17s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|████████████████████████████████▊ | 103/254 [32:07<45:44, 18.17s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|████████████████████████████████▊ | 103/254 [32:07<45:44, 18.17s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|████████████████████████████████▊ | 103/254 [32:07<45:44, 18.17s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|████████████████████████████████▊ | 103/254 [32:07<45:44, 18.17s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|████████████████████████████████▊ | 103/254 [32:07<45:44, 18.17s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|████████████████████████████████▊ | 103/254 [32:07<45:44, 18.17s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████▏ | 104/254 [32:30<49:06, 19.64s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████▏ | 104/254 [32:30<49:06, 19.64s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1882, 'learning_rate': 6.18e-05, 'epoch': 0.41} + 41%|█████████████████████████████████▏ | 104/254 [32:30<49:06, 19.64s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████▏ | 104/254 [32:30<49:06, 19.64s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████▏ | 104/254 [32:30<49:06, 19.64s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████▏ | 104/254 [32:30<49:06, 19.64s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████▏ | 104/254 [32:30<49:06, 19.64s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████▏ | 104/254 [32:30<49:06, 19.64s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████▍ | 105/254 [32:53<51:14, 20.63s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████▍ | 105/254 [32:53<51:14, 20.63s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2571, 'learning_rate': 6.239999999999999e-05, 'epoch': 0.41} + 41%|█████████████████████████████████▍ | 105/254 [32:53<51:14, 20.63s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████▍ | 105/254 [32:53<51:14, 20.63s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████▍ | 105/254 [32:53<51:14, 20.63s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████▍ | 105/254 [32:53<51:14, 20.63s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████▍ | 105/254 [32:53<51:14, 20.63s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████▍ | 105/254 [32:53<51:14, 20.63s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████▍ | 105/254 [32:53<51:14, 20.63s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|█████████████████████████████████▊ | 106/254 [33:16<52:27, 21.27s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|█████████████████████████████████▊ | 106/254 [33:16<52:27, 21.27s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|█████████████████████████████████▊ | 106/254 [33:16<52:27, 21.27s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|█████████████████████████████████▊ | 106/254 [33:16<52:27, 21.27s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|█████████████████████████████████▊ | 106/254 [33:16<52:27, 21.27s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|█████████████████████████████████▊ | 106/254 [33:16<52:27, 21.27s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|█████████████████████████████████▊ | 106/254 [33:16<52:27, 21.27s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|█████████████████████████████████▊ | 106/254 [33:16<52:27, 21.27s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.0725, 'learning_rate': 6.359999999999999e-05, 'epoch': 0.42} + g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▍ | 108/254 [34:01<53:31, 22.00s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▍ | 108/254 [34:01<53:31, 22.00s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▍ | 108/254 [34:01<53:31, 22.00s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▍ | 108/254 [34:01<53:31, 22.00s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▍ | 108/254 [34:01<53:31, 22.00s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▍ | 108/254 [34:01<53:31, 22.00s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▍ | 108/254 [34:01<53:31, 22.00s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▍ | 108/254 [34:01<53:31, 22.00s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2084, 'learning_rate': 6.479999999999999e-05, 'epoch': 0.43} + g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|███████████████████████████████████ | 110/254 [34:46<53:08, 22.14s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|███████████████████████████████████ | 110/254 [34:46<53:08, 22.14s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|███████████████████████████████████ | 110/254 [34:46<53:08, 22.14s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|███████████████████████████████████ | 110/254 [34:46<53:08, 22.14s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|███████████████████████████████████ | 110/254 [34:46<53:08, 22.14s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|███████████████████████████████████ | 110/254 [34:46<53:08, 22.14s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|███████████████████████████████████ | 110/254 [34:46<53:08, 22.14s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|███████████████████████████████████ | 110/254 [34:46<53:08, 22.14s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|███████████████████████████████████ | 110/254 [34:46<53:08, 22.14s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▍ | 111/254 [35:08<52:30, 22.03s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▍ | 111/254 [35:08<52:30, 22.03s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▍ | 111/254 [35:08<52:30, 22.03s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▍ | 111/254 [35:08<52:30, 22.03s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▍ | 111/254 [35:08<52:30, 22.03s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▍ | 111/254 [35:08<52:30, 22.03s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▍ | 111/254 [35:08<52:30, 22.03s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▍ | 111/254 [35:08<52:30, 22.03s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▍ | 111/254 [35:08<52:30, 22.03s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▋ | 112/254 [35:30<52:06, 22.02s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▋ | 112/254 [35:30<52:06, 22.02s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▋ | 112/254 [35:30<52:06, 22.02s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▋ | 112/254 [35:30<52:06, 22.02s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▋ | 112/254 [35:30<52:06, 22.02s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▋ | 112/254 [35:30<52:06, 22.02s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▋ | 112/254 [35:30<52:06, 22.02s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▋ | 112/254 [35:30<52:06, 22.02s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.0844, 'learning_rate': 6.72e-05, 'epoch': 0.44} + g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▎ | 114/254 [36:14<51:19, 22.00s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▎ | 114/254 [36:14<51:19, 22.00s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.172, 'learning_rate': 6.78e-05, 'epoch': 0.45} + 45%|████████████████████████████████████▎ | 114/254 [36:14<51:19, 22.00s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▎ | 114/254 [36:14<51:19, 22.00s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▎ | 114/254 [36:14<51:19, 22.00s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▎ | 114/254 [36:14<51:19, 22.00s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▎ | 114/254 [36:14<51:19, 22.00s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▎ | 114/254 [36:14<51:19, 22.00s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▋ | 115/254 [36:35<50:42, 21.89s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▋ | 115/254 [36:35<50:42, 21.89s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.188, 'learning_rate': 6.84e-05, 'epoch': 0.45} + 45%|████████████████████████████████████▋ | 115/254 [36:35<50:42, 21.89s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▋ | 115/254 [36:35<50:42, 21.89s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▋ | 115/254 [36:35<50:42, 21.89s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▋ | 115/254 [36:35<50:42, 21.89s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▋ | 115/254 [36:35<50:42, 21.89s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▋ | 115/254 [36:35<50:42, 21.89s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▋ | 115/254 [36:35<50:42, 21.89s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|████████████████████████████████████▉ | 116/254 [36:57<49:56, 21.72s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|████████████████████████████████████▉ | 116/254 [36:57<49:56, 21.72s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|████████████████████████████████████▉ | 116/254 [36:57<49:56, 21.72s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|████████████████████████████████████▉ | 116/254 [36:57<49:56, 21.72s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|████████████████████████████████████▉ | 116/254 [36:57<49:56, 21.72s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|████████████████████████████████████▉ | 116/254 [36:57<49:56, 21.72s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|████████████████████████████████████▉ | 116/254 [36:57<49:56, 21.72s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|████████████████████████████████████▉ | 116/254 [36:57<49:56, 21.72s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|████████████████████████████████████▉ | 116/254 [36:57<49:56, 21.72s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████▎ | 117/254 [37:18<49:19, 21.60s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████▎ | 117/254 [37:18<49:19, 21.60s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████▎ | 117/254 [37:18<49:19, 21.60s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████▎ | 117/254 [37:18<49:19, 21.60s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████▎ | 117/254 [37:18<49:19, 21.60s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████▎ | 117/254 [37:18<49:19, 21.60s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████▎ | 117/254 [37:18<49:19, 21.60s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████▎ | 117/254 [37:18<49:19, 21.60s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████▋ | 118/254 [37:39<48:33, 21.43s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████▋ | 118/254 [37:39<48:33, 21.43s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1547, 'learning_rate': 7.02e-05, 'epoch': 0.46} + 46%|█████████████████████████████████████▋ | 118/254 [37:39<48:33, 21.43s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████▋ | 118/254 [37:39<48:33, 21.43s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████▋ | 118/254 [37:39<48:33, 21.43s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████▋ | 118/254 [37:39<48:33, 21.43s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████▋ | 118/254 [37:39<48:33, 21.43s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████▋ | 118/254 [37:39<48:33, 21.43s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████▋ | 118/254 [37:39<48:33, 21.43s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████▋ | 118/254 [37:39<48:33, 21.43s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|█████████████████████████████████████▉ | 119/254 [38:00<47:56, 21.31s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|█████████████████████████████████████▉ | 119/254 [38:00<47:56, 21.31s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|█████████████████████████████████████▉ | 119/254 [38:00<47:56, 21.31s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|█████████████████████████████████████▉ | 119/254 [38:00<47:56, 21.31s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|█████████████████████████████████████▉ | 119/254 [38:00<47:56, 21.31s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|█████████████████████████████████████▉ | 119/254 [38:00<47:56, 21.31s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|█████████████████████████████████████▉ | 119/254 [38:00<47:56, 21.31s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|█████████████████████████████████████▉ | 119/254 [38:00<47:56, 21.31s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|█████████████████████████████████████▉ | 119/254 [38:00<47:56, 21.31s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|██████████████████████████████████████▎ | 120/254 [38:21<47:17, 21.17s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|██████████████████████████████████████▎ | 120/254 [38:21<47:17, 21.17s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|██████████████████████████████████████▎ | 120/254 [38:21<47:17, 21.17s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|██████████████████████████████████████▎ | 120/254 [38:21<47:17, 21.17s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|██████████████████████████████████████▎ | 120/254 [38:21<47:17, 21.17s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|██████████████████████████████████████▎ | 120/254 [38:21<47:17, 21.17s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|██████████████████████████████████████▎ | 120/254 [38:21<47:17, 21.17s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|██████████████████████████████████████▎ | 120/254 [38:21<47:17, 21.17s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|██████████████████████████████████████▌ | 121/254 [38:41<46:29, 20.97s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|██████████████████████████████████████▌ | 121/254 [38:41<46:29, 20.97s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1361, 'learning_rate': 7.199999999999999e-05, 'epoch': 0.47} + 48%|██████████████████████████████████████▌ | 121/254 [38:41<46:29, 20.97s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|██████████████████████████████████████▌ | 121/254 [38:41<46:29, 20.97s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|██████████████████████████████████████▌ | 121/254 [38:41<46:29, 20.97s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|██████████████████████████████████████▌ | 121/254 [38:41<46:29, 20.97s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|██████████████████████████████████████▌ | 121/254 [38:41<46:29, 20.97s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|██████████████████████████████████████▌ | 121/254 [38:41<46:29, 20.97s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|██████████████████████████████████████▉ | 122/254 [39:02<45:44, 20.79s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|██████████████████████████████████████▉ | 122/254 [39:02<45:44, 20.79s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2158, 'learning_rate': 7.259999999999999e-05, 'epoch': 0.48} + 48%|██████████████████████████████████████▉ | 122/254 [39:02<45:44, 20.79s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|██████████████████████████████████████▉ | 122/254 [39:02<45:44, 20.79s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|██████████████████████████████████████▉ | 122/254 [39:02<45:44, 20.79s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|██████████████████████████████████████▉ | 122/254 [39:02<45:44, 20.79s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|██████████████████████████████████████▉ | 122/254 [39:02<45:44, 20.79s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|██████████████████████████████████████▉ | 122/254 [39:02<45:44, 20.79s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|██████████████████████████████████████▉ | 122/254 [39:02<45:44, 20.79s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|███████████████████████████████████████▏ | 123/254 [39:22<45:03, 20.64s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|███████████████████████████████████████▏ | 123/254 [39:22<45:03, 20.64s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|███████████████████████████████████████▏ | 123/254 [39:22<45:03, 20.64s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|███████████████████████████████████████▏ | 123/254 [39:22<45:03, 20.64s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|███████████████████████████████████████▏ | 123/254 [39:22<45:03, 20.64s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|███████████████████████████████████████▏ | 123/254 [39:22<45:03, 20.64s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|███████████████████████████████████████▏ | 123/254 [39:22<45:03, 20.64s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|███████████████████████████████████████▏ | 123/254 [39:22<45:03, 20.64s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|███████████████████████████████████████▏ | 123/254 [39:22<45:03, 20.64s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 49%|███████████████████████████████████████▌ | 124/254 [39:42<44:19, 20.46s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 49%|███████████████████████████████████████▌ | 124/254 [39:42<44:19, 20.46s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 49%|███████████████████████████████████████▌ | 124/254 [39:42<44:19, 20.46s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 49%|███████████████████████████████████████▌ | 124/254 [39:42<44:19, 20.46s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 49%|███████████████████████████████████████▌ | 124/254 [39:42<44:19, 20.46s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 49%|███████████████████████████████████████▌ | 124/254 [39:42<44:19, 20.46s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 49%|███████████████████████████████████████▌ | 124/254 [39:42<44:19, 20.46s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 49%|███████████████████████████████████████▌ | 124/254 [39:42<44:19, 20.46s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 49%|███████████████████████████████████████▊ | 125/254 [40:03<44:04, 20.50s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 49%|███████████████████████████████████████▊ | 125/254 [40:03<44:04, 20.50s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2207, 'learning_rate': 7.439999999999999e-05, 'epoch': 0.49} + 49%|███████████████████████████████████████▊ | 125/254 [40:03<44:04, 20.50s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 49%|███████████████████████████████████████▊ | 125/254 [40:03<44:04, 20.50s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 49%|███████████████████████████████████████▊ | 125/254 [40:03<44:04, 20.50s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 49%|███████████████████████████████████████▊ | 125/254 [40:03<44:04, 20.50s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 49%|███████████████████████████████████████▊ | 125/254 [40:03<44:04, 20.50s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 49%|███████████████████████████████████████▊ | 125/254 [40:03<44:04, 20.50s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 49%|███████████████████████████████████████▊ | 125/254 [40:03<44:04, 20.50s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 49%|███████████████████████████████████████▊ | 125/254 [40:03<44:04, 20.50s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▏ | 126/254 [40:22<43:18, 20.30s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▏ | 126/254 [40:22<43:18, 20.30s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▏ | 126/254 [40:22<43:18, 20.30s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▏ | 126/254 [40:22<43:18, 20.30s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▏ | 126/254 [40:22<43:18, 20.30s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▏ | 126/254 [40:22<43:18, 20.30s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▏ | 126/254 [40:22<43:18, 20.30s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▏ | 126/254 [40:22<43:18, 20.30s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▏ | 126/254 [40:22<43:18, 20.30s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▏ | 126/254 [40:22<43:18, 20.30s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2011, 'learning_rate': 7.56e-05, 'epoch': 0.5} + 50%|████████████████████████████████████████▏ | 126/254 [40:22<43:18, 20.30s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▏ | 126/254 [40:22<43:18, 20.30s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▏ | 126/254 [40:22<43:18, 20.30s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▏ | 126/254 [40:22<43:18, 20.30s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▏ | 126/254 [40:22<43:18, 20.30s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▏ | 126/254 [40:22<43:18, 20.30s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▏ | 126/254 [40:22<43:18, 20.30s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▊ | 128/254 [41:02<41:57, 19.98s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▊ | 128/254 [41:02<41:57, 19.98s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.16, 'learning_rate': 7.62e-05, 'epoch': 0.5} + 50%|████████████████████████████████████████▊ | 128/254 [41:02<41:57, 19.98s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▊ | 128/254 [41:02<41:57, 19.98s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▊ | 128/254 [41:02<41:57, 19.98s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▊ | 128/254 [41:02<41:57, 19.98s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▊ | 128/254 [41:02<41:57, 19.98s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▊ | 128/254 [41:02<41:57, 19.98s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▊ | 128/254 [41:02<41:57, 19.98s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████▏ | 129/254 [41:21<41:18, 19.82s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████▏ | 129/254 [41:21<41:18, 19.82s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████▏ | 129/254 [41:21<41:18, 19.82s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████▏ | 129/254 [41:21<41:18, 19.82s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████▏ | 129/254 [41:21<41:18, 19.82s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████▏ | 129/254 [41:21<41:18, 19.82s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████▏ | 129/254 [41:21<41:18, 19.82s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████▏ | 129/254 [41:21<41:18, 19.82s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████▍ | 130/254 [41:41<40:38, 19.66s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████▍ | 130/254 [41:41<40:38, 19.66s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2454, 'learning_rate': 7.74e-05, 'epoch': 0.51} + 51%|█████████████████████████████████████████▍ | 130/254 [41:41<40:38, 19.66s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████▍ | 130/254 [41:41<40:38, 19.66s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████▍ | 130/254 [41:41<40:38, 19.66s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████▍ | 130/254 [41:41<40:38, 19.66s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████▍ | 130/254 [41:41<40:38, 19.66s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████▍ | 130/254 [41:41<40:38, 19.66s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████▍ | 130/254 [41:41<40:38, 19.66s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|█████████████████████████████████████████▊ | 131/254 [41:59<39:50, 19.43s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|█████████████████████████████████████████▊ | 131/254 [41:59<39:50, 19.43s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|█████████████████████████████████████████▊ | 131/254 [41:59<39:50, 19.43s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|█████████████████████████████████████████▊ | 131/254 [41:59<39:50, 19.43s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|█████████████████████████████████████████▊ | 131/254 [41:59<39:50, 19.43s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|█████████████████████████████████████████▊ | 131/254 [41:59<39:50, 19.43s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|█████████████████████████████████████████▊ | 131/254 [41:59<39:50, 19.43s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|█████████████████████████████████████████▊ | 131/254 [41:59<39:50, 19.43s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████ | 132/254 [42:18<39:10, 19.27s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████ | 132/254 [42:18<39:10, 19.27s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1064, 'learning_rate': 7.86e-05, 'epoch': 0.52} + 52%|██████████████████████████████████████████ | 132/254 [42:18<39:10, 19.27s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████ | 132/254 [42:18<39:10, 19.27s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████ | 132/254 [42:18<39:10, 19.27s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████ | 132/254 [42:18<39:10, 19.27s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████ | 132/254 [42:18<39:10, 19.27s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████ | 132/254 [42:18<39:10, 19.27s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████▍ | 133/254 [42:37<38:31, 19.10s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████▍ | 133/254 [42:37<38:31, 19.10s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1765, 'learning_rate': 7.92e-05, 'epoch': 0.52} + 52%|██████████████████████████████████████████▍ | 133/254 [42:37<38:31, 19.10s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████▍ | 133/254 [42:37<38:31, 19.10s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████▍ | 133/254 [42:37<38:31, 19.10s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████▍ | 133/254 [42:37<38:31, 19.10s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████▍ | 133/254 [42:37<38:31, 19.10s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████▍ | 133/254 [42:37<38:31, 19.10s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|██████████████████████████████████████████▋ | 134/254 [42:55<37:42, 18.85s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|██████████████████████████████████████████▋ | 134/254 [42:55<37:42, 18.85s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.073, 'learning_rate': 7.98e-05, 'epoch': 0.53} + 53%|██████████████████████████████████████████▋ | 134/254 [42:55<37:42, 18.85s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|██████████████████████████████████████████▋ | 134/254 [42:55<37:42, 18.85s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|██████████████████████████████████████████▋ | 134/254 [42:55<37:42, 18.85s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|██████████████████████████████████████████▋ | 134/254 [42:55<37:42, 18.85s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|██████████████████████████████████████████▋ | 134/254 [42:55<37:42, 18.85s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|██████████████████████████████████████████▋ | 134/254 [42:55<37:42, 18.85s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|███████████████████████████████████████████ | 135/254 [43:13<36:59, 18.65s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|███████████████████████████████████████████ | 135/254 [43:13<36:59, 18.65s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2656, 'learning_rate': 8.04e-05, 'epoch': 0.53} + 53%|███████████████████████████████████████████ | 135/254 [43:13<36:59, 18.65s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|███████████████████████████████████████████ | 135/254 [43:13<36:59, 18.65s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|███████████████████████████████████████████ | 135/254 [43:13<36:59, 18.65s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|███████████████████████████████████████████ | 135/254 [43:13<36:59, 18.65s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|███████████████████████████████████████████ | 135/254 [43:13<36:59, 18.65s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|███████████████████████████████████████████ | 135/254 [43:13<36:59, 18.65s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▎ | 136/254 [43:31<36:02, 18.33s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▎ | 136/254 [43:31<36:02, 18.33s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.214, 'learning_rate': 8.1e-05, 'epoch': 0.53} + 54%|███████████████████████████████████████████▎ | 136/254 [43:31<36:02, 18.33s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▎ | 136/254 [43:31<36:02, 18.33s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▎ | 136/254 [43:31<36:02, 18.33s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▎ | 136/254 [43:31<36:02, 18.33s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▎ | 136/254 [43:31<36:02, 18.33s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▎ | 136/254 [43:31<36:02, 18.33s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▋ | 137/254 [43:48<35:03, 17.98s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▋ | 137/254 [43:48<35:03, 17.98s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1251, 'learning_rate': 8.16e-05, 'epoch': 0.54} + 54%|███████████████████████████████████████████▋ | 137/254 [43:48<35:03, 17.98s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▋ | 137/254 [43:48<35:03, 17.98s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▋ | 137/254 [43:48<35:03, 17.98s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▋ | 137/254 [43:48<35:03, 17.98s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▋ | 137/254 [43:48<35:03, 17.98s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▋ | 137/254 [43:48<35:03, 17.98s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|████████████████████████████████████████████ | 138/254 [44:06<34:20, 17.77s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|████████████████████████████████████████████ | 138/254 [44:06<34:20, 17.77s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.0177, 'learning_rate': 8.22e-05, 'epoch': 0.54} + 54%|████████████████████████████████████████████ | 138/254 [44:06<34:20, 17.77s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|████████████████████████████████████████████ | 138/254 [44:06<34:20, 17.77s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|████████████████████████████████████████████ | 138/254 [44:06<34:20, 17.77s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|████████████████████████████████████████████ | 138/254 [44:06<34:20, 17.77s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|████████████████████████████████████████████ | 138/254 [44:06<34:20, 17.77s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|████████████████████████████████████████████ | 138/254 [44:06<34:20, 17.77s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▎ | 139/254 [44:22<33:13, 17.33s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▎ | 139/254 [44:22<33:13, 17.33s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1159, 'learning_rate': 8.28e-05, 'epoch': 0.55} + 55%|████████████████████████████████████████████▎ | 139/254 [44:22<33:13, 17.33s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▎ | 139/254 [44:22<33:13, 17.33s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▎ | 139/254 [44:22<33:13, 17.33s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▎ | 139/254 [44:22<33:13, 17.33s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▎ | 139/254 [44:22<33:13, 17.33s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▎ | 139/254 [44:22<33:13, 17.33s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▎ | 139/254 [44:22<33:13, 17.33s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▋ | 140/254 [44:37<31:47, 16.73s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▋ | 140/254 [44:37<31:47, 16.73s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▋ | 140/254 [44:37<31:47, 16.73s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:21:41,502 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:21:41,502 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:21:41,502 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:21:41,502 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:21:41,502 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|████████████████████████████████████████████▉ | 141/254 [44:52<30:16, 16.08s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|████████████████████████████████████████████▉ | 141/254 [44:52<30:16, 16.08s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|████████████████████████████████████████████▉ | 141/254 [44:52<30:16, 16.08s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:21:55,588 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:21:55,588 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:21:55,588 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:21:55,588 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:21:55,588 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|█████████████████████████████████████████████▎ | 142/254 [45:05<28:36, 15.32s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:22:05,533 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:22:05,533 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:22:05,533 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:22:11,816 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:22:11,816 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:22:11,816 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|█████████████████████████████████████████████▌ | 143/254 [45:18<26:53, 14.54s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:22:18,005 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:22:18,005 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:22:18,005 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:22:23,767 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:22:23,767 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:22:23,767 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 57%|█████████████████████████████████████████████▉ | 144/254 [45:30<25:02, 13.66s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:22:27,928 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 57%|█████████████████████████████████████████████▉ | 144/254 [45:30<25:02, 13.66s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:22:27,928 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:22:31,778 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:22:27,928 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:22:34,298 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:22:27,928 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:22:34,298 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:22:27,928 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:22:34,298 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:22:27,928 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 57%|██████████████████████████████████████████████▏ | 145/254 [45:40<22:55, 12.62s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:22:40,282 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:22:42,527 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:22:42,527 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:22:42,527 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:22:45,807 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:22:47,994 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:22:50,016 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:22:51,976 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:22:53,883 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:22:53,883 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:22:55,834 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:22:57,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:22:59,321 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:22:59,321 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:23:02,678 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:23:04,225 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:23:05,657 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:23:05,657 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:23:08,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:23:10,908 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:23:12,592 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:23:12,592 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5587, 'learning_rate': 8.939999999999999e-05, 'epoch': 0.59} +[WARNING|modeling_utils.py:388] 2022-03-03 00:23:19,112 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:23:19,112 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:23:25,253 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:23:25,253 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:23:31,387 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:23:31,387 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|████████████████████████████████████████████████▏ | 151/254 [46:40<21:51, 12.73s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|████████████████████████████████████████████████▏ | 151/254 [46:40<21:51, 12.73s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2288, 'learning_rate': 8.999999999999999e-05, 'epoch': 0.59} + 59%|████████████████████████████████████████████████▏ | 151/254 [46:40<21:51, 12.73s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|████████████████████████████████████████████████▏ | 151/254 [46:40<21:51, 12.73s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|████████████████████████████████████████████████▏ | 151/254 [46:40<21:51, 12.73s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|████████████████████████████████████████████████▏ | 151/254 [46:40<21:51, 12.73s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|████████████████████████████████████████████████▏ | 151/254 [46:40<21:51, 12.73s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|████████████████████████████████████████████████▏ | 151/254 [46:40<21:51, 12.73s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▍ | 152/254 [47:04<27:16, 16.05s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▍ | 152/254 [47:04<27:16, 16.05s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.228, 'learning_rate': 9.059999999999999e-05, 'epoch': 0.6} + 60%|████████████████████████████████████████████████▍ | 152/254 [47:04<27:16, 16.05s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▍ | 152/254 [47:04<27:16, 16.05s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▍ | 152/254 [47:04<27:16, 16.05s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▍ | 152/254 [47:04<27:16, 16.05s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▍ | 152/254 [47:04<27:16, 16.05s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▍ | 152/254 [47:04<27:16, 16.05s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▊ | 153/254 [47:28<30:45, 18.27s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▊ | 153/254 [47:28<30:45, 18.27s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2054, 'learning_rate': 9.12e-05, 'epoch': 0.6} + 60%|████████████████████████████████████████████████▊ | 153/254 [47:28<30:45, 18.27s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▊ | 153/254 [47:28<30:45, 18.27s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▊ | 153/254 [47:28<30:45, 18.27s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▊ | 153/254 [47:28<30:45, 18.27s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▊ | 153/254 [47:28<30:45, 18.27s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▊ | 153/254 [47:28<30:45, 18.27s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▊ | 153/254 [47:28<30:45, 18.27s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████ | 154/254 [47:51<32:51, 19.72s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████ | 154/254 [47:51<32:51, 19.72s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████ | 154/254 [47:51<32:51, 19.72s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████ | 154/254 [47:51<32:51, 19.72s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████ | 154/254 [47:51<32:51, 19.72s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████ | 154/254 [47:51<32:51, 19.72s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████ | 154/254 [47:51<32:51, 19.72s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████ | 154/254 [47:51<32:51, 19.72s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████ | 154/254 [47:51<32:51, 19.72s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▍ | 155/254 [48:14<34:05, 20.66s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▍ | 155/254 [48:14<34:05, 20.66s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▍ | 155/254 [48:14<34:05, 20.66s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▍ | 155/254 [48:14<34:05, 20.66s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▍ | 155/254 [48:14<34:05, 20.66s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▍ | 155/254 [48:14<34:05, 20.66s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▍ | 155/254 [48:14<34:05, 20.66s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▍ | 155/254 [48:14<34:05, 20.66s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▍ | 155/254 [48:14<34:05, 20.66s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▋ | 156/254 [48:36<34:44, 21.27s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▋ | 156/254 [48:36<34:44, 21.27s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▋ | 156/254 [48:36<34:44, 21.27s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▋ | 156/254 [48:36<34:44, 21.27s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▋ | 156/254 [48:36<34:44, 21.27s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▋ | 156/254 [48:36<34:44, 21.27s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▋ | 156/254 [48:36<34:44, 21.27s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▋ | 156/254 [48:36<34:44, 21.27s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▋ | 156/254 [48:36<34:44, 21.27s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████ | 157/254 [48:59<35:02, 21.68s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████ | 157/254 [48:59<35:02, 21.68s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████ | 157/254 [48:59<35:02, 21.68s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████ | 157/254 [48:59<35:02, 21.68s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████ | 157/254 [48:59<35:02, 21.68s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████ | 157/254 [48:59<35:02, 21.68s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████ | 157/254 [48:59<35:02, 21.68s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████ | 157/254 [48:59<35:02, 21.68s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████ | 157/254 [48:59<35:02, 21.68s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████▍ | 158/254 [49:22<35:10, 21.99s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████▍ | 158/254 [49:22<35:10, 21.99s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████▍ | 158/254 [49:22<35:10, 21.99s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████▍ | 158/254 [49:22<35:10, 21.99s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████▍ | 158/254 [49:22<35:10, 21.99s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████▍ | 158/254 [49:22<35:10, 21.99s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████▍ | 158/254 [49:22<35:10, 21.99s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████▍ | 158/254 [49:22<35:10, 21.99s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|██████████████████████████████████████████████████▋ | 159/254 [49:44<35:01, 22.12s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|██████████████████████████████████████████████████▋ | 159/254 [49:44<35:01, 22.12s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1494, 'learning_rate': 9.479999999999999e-05, 'epoch': 0.62} + 63%|██████████████████████████████████████████████████▋ | 159/254 [49:44<35:01, 22.12s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|██████████████████████████████████████████████████▋ | 159/254 [49:44<35:01, 22.12s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|██████████████████████████████████████████████████▋ | 159/254 [49:44<35:01, 22.12s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|██████████████████████████████████████████████████▋ | 159/254 [49:44<35:01, 22.12s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|██████████████████████████████████████████████████▋ | 159/254 [49:44<35:01, 22.12s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|██████████████████████████████████████████████████▋ | 159/254 [49:44<35:01, 22.12s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|██████████████████████████████████████████████████▋ | 159/254 [49:44<35:01, 22.12s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1828, 'learning_rate': 9.539999999999999e-05, 'epoch': 0.63} + 63%|██████████████████████████████████████████████████▋ | 159/254 [49:44<35:01, 22.12s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|██████████████████████████████████████████████████▋ | 159/254 [49:44<35:01, 22.12s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|██████████████████████████████████████████████████▋ | 159/254 [49:44<35:01, 22.12s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|██████████████████████████████████████████████████▋ | 159/254 [49:44<35:01, 22.12s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|██████████████████████████████████████████████████▋ | 159/254 [49:44<35:01, 22.12s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|██████████████████████████████████████████████████▋ | 159/254 [49:44<35:01, 22.12s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|██████████████████████████████████████████████████▋ | 159/254 [49:44<35:01, 22.12s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|██████████████████████████████████████████████████▋ | 159/254 [49:44<35:01, 22.12s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|██████████████████████████████████████████████████▋ | 159/254 [49:44<35:01, 22.12s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|███████████████████████████████████████████████████▎ | 161/254 [50:28<34:18, 22.14s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|███████████████████████████████████████████████████▎ | 161/254 [50:28<34:18, 22.14s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|███████████████████████████████████████████████████▎ | 161/254 [50:28<34:18, 22.14s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|███████████████████████████████████████████████████▎ | 161/254 [50:28<34:18, 22.14s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|███████████████████████████████████████████████████▎ | 161/254 [50:28<34:18, 22.14s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|███████████████████████████████████████████████████▎ | 161/254 [50:28<34:18, 22.14s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|███████████████████████████████████████████████████▎ | 161/254 [50:28<34:18, 22.14s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|███████████████████████████████████████████████████▎ | 161/254 [50:28<34:18, 22.14s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|███████████████████████████████████████████████████▎ | 161/254 [50:28<34:18, 22.14s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 64%|███████████████████████████████████████████████████▋ | 162/254 [50:51<33:57, 22.14s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 64%|███████████████████████████████████████████████████▋ | 162/254 [50:51<33:57, 22.14s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 64%|███████████████████████████████████████████████████▋ | 162/254 [50:51<33:57, 22.14s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 64%|███████████████████████████████████████████████████▋ | 162/254 [50:51<33:57, 22.14s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 64%|███████████████████████████████████████████████████▋ | 162/254 [50:51<33:57, 22.14s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 64%|███████████████████████████████████████████████████▋ | 162/254 [50:51<33:57, 22.14s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 64%|███████████████████████████████████████████████████▋ | 162/254 [50:51<33:57, 22.14s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 64%|███████████████████████████████████████████████████▋ | 162/254 [50:51<33:57, 22.14s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 64%|███████████████████████████████████████████████████▉ | 163/254 [51:13<33:49, 22.30s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 64%|███████████████████████████████████████████████████▉ | 163/254 [51:13<33:49, 22.30s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2278, 'learning_rate': 9.719999999999999e-05, 'epoch': 0.64} + 64%|███████████████████████████████████████████████████▉ | 163/254 [51:13<33:49, 22.30s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 64%|███████████████████████████████████████████████████▉ | 163/254 [51:13<33:49, 22.30s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 64%|███████████████████████████████████████████████████▉ | 163/254 [51:13<33:49, 22.30s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 64%|███████████████████████████████████████████████████▉ | 163/254 [51:13<33:49, 22.30s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 64%|███████████████████████████████████████████████████▉ | 163/254 [51:13<33:49, 22.30s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 64%|███████████████████████████████████████████████████▉ | 163/254 [51:13<33:49, 22.30s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 64%|███████████████████████████████████████████████████▉ | 163/254 [51:13<33:49, 22.30s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▎ | 164/254 [51:35<33:12, 22.14s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▎ | 164/254 [51:35<33:12, 22.14s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▎ | 164/254 [51:35<33:12, 22.14s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▎ | 164/254 [51:35<33:12, 22.14s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▎ | 164/254 [51:35<33:12, 22.14s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▎ | 164/254 [51:35<33:12, 22.14s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▎ | 164/254 [51:35<33:12, 22.14s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▎ | 164/254 [51:35<33:12, 22.14s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▌ | 165/254 [51:57<32:37, 21.99s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▌ | 165/254 [51:57<32:37, 21.99s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1873, 'learning_rate': 9.839999999999999e-05, 'epoch': 0.65} + 65%|████████████████████████████████████████████████████▌ | 165/254 [51:57<32:37, 21.99s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▌ | 165/254 [51:57<32:37, 21.99s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▌ | 165/254 [51:57<32:37, 21.99s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▌ | 165/254 [51:57<32:37, 21.99s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▌ | 165/254 [51:57<32:37, 21.99s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▌ | 165/254 [51:57<32:37, 21.99s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▉ | 166/254 [52:18<32:03, 21.86s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▉ | 166/254 [52:18<32:03, 21.86s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1381, 'learning_rate': 9.9e-05, 'epoch': 0.65} + 65%|████████████████████████████████████████████████████▉ | 166/254 [52:18<32:03, 21.86s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▉ | 166/254 [52:18<32:03, 21.86s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▉ | 166/254 [52:18<32:03, 21.86s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▉ | 166/254 [52:18<32:03, 21.86s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▉ | 166/254 [52:18<32:03, 21.86s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▉ | 166/254 [52:18<32:03, 21.86s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▉ | 166/254 [52:18<32:03, 21.86s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 66%|█████████████████████████████████████████████████████▎ | 167/254 [52:40<31:29, 21.72s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 66%|█████████████████████████████████████████████████████▎ | 167/254 [52:40<31:29, 21.72s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 66%|█████████████████████████████████████████████████████▎ | 167/254 [52:40<31:29, 21.72s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 66%|█████████████████████████████████████████████████████▎ | 167/254 [52:40<31:29, 21.72s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 66%|█████████████████████████████████████████████████████▎ | 167/254 [52:40<31:29, 21.72s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 66%|█████████████████████████████████████████████████████▎ | 167/254 [52:40<31:29, 21.72s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 66%|█████████████████████████████████████████████████████▎ | 167/254 [52:40<31:29, 21.72s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 66%|█████████████████████████████████████████████████████▎ | 167/254 [52:40<31:29, 21.72s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 66%|█████████████████████████████████████████████████████▌ | 168/254 [53:01<30:54, 21.56s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 66%|█████████████████████████████████████████████████████▌ | 168/254 [53:01<30:54, 21.56s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1664, 'learning_rate': 0.0001002, 'epoch': 0.66} + 66%|█████████████████████████████████████████████████████▌ | 168/254 [53:01<30:54, 21.56s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 66%|█████████████████████████████████████████████████████▌ | 168/254 [53:01<30:54, 21.56s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 66%|█████████████████████████████████████████████████████▌ | 168/254 [53:01<30:54, 21.56s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 66%|█████████████████████████████████████████████████████▌ | 168/254 [53:01<30:54, 21.56s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 66%|█████████████████████████████████████████████████████▌ | 168/254 [53:01<30:54, 21.56s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 66%|█████████████████████████████████████████████████████▌ | 168/254 [53:01<30:54, 21.56s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1571, 'learning_rate': 0.0001008, 'epoch': 0.66} + g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|██████████████████████████████████████████████████████▏ | 170/254 [53:43<29:43, 21.23s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|██████████████████████████████████████████████████████▏ | 170/254 [53:43<29:43, 21.23s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|██████████████████████████████████████████████████████▏ | 170/254 [53:43<29:43, 21.23s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|██████████████████████████████████████████████████████▏ | 170/254 [53:43<29:43, 21.23s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|██████████████████████████████████████████████████████▏ | 170/254 [53:43<29:43, 21.23s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|██████████████████████████████████████████████████████▏ | 170/254 [53:43<29:43, 21.23s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|██████████████████████████████████████████████████████▏ | 170/254 [53:43<29:43, 21.23s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|██████████████████████████████████████████████████████▏ | 170/254 [53:43<29:43, 21.23s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|██████████████████████████████████████████████████████▌ | 171/254 [54:03<29:10, 21.09s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|██████████████████████████████████████████████████████▌ | 171/254 [54:03<29:10, 21.09s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1581, 'learning_rate': 0.000102, 'epoch': 0.67} + 67%|██████████████████████████████████████████████████████▌ | 171/254 [54:03<29:10, 21.09s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|██████████████████████████████████████████████████████▌ | 171/254 [54:03<29:10, 21.09s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|██████████████████████████████████████████████████████▌ | 171/254 [54:03<29:10, 21.09s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|██████████████████████████████████████████████████████▌ | 171/254 [54:03<29:10, 21.09s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|██████████████████████████████████████████████████████▌ | 171/254 [54:03<29:10, 21.09s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|██████████████████████████████████████████████████████▌ | 171/254 [54:03<29:10, 21.09s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|██████████████████████████████████████████████████████▊ | 172/254 [54:24<28:37, 20.95s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|██████████████████████████████████████████████████████▊ | 172/254 [54:24<28:37, 20.95s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.0611, 'learning_rate': 0.0001026, 'epoch': 0.67} + 68%|██████████████████████████████████████████████████████▊ | 172/254 [54:24<28:37, 20.95s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|██████████████████████████████████████████████████████▊ | 172/254 [54:24<28:37, 20.95s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|██████████████████████████████████████████████████████▊ | 172/254 [54:24<28:37, 20.95s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|██████████████████████████████████████████████████████▊ | 172/254 [54:24<28:37, 20.95s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|██████████████████████████████████████████████████████▊ | 172/254 [54:24<28:37, 20.95s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|██████████████████████████████████████████████████████▊ | 172/254 [54:24<28:37, 20.95s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|██████████████████████████████████████████████████████▊ | 172/254 [54:24<28:37, 20.95s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|███████████████████████████████████████████████████████▏ | 173/254 [54:44<28:05, 20.80s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|███████████████████████████████████████████████████████▏ | 173/254 [54:44<28:05, 20.80s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|███████████████████████████████████████████████████████▏ | 173/254 [54:44<28:05, 20.80s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|███████████████████████████████████████████████████████▏ | 173/254 [54:44<28:05, 20.80s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|███████████████████████████████████████████████████████▏ | 173/254 [54:44<28:05, 20.80s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|███████████████████████████████████████████████████████▏ | 173/254 [54:44<28:05, 20.80s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|███████████████████████████████████████████████████████▏ | 173/254 [54:44<28:05, 20.80s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|███████████████████████████████████████████████████████▏ | 173/254 [54:44<28:05, 20.80s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|███████████████████████████████████████████████████████▏ | 173/254 [54:44<28:05, 20.80s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▍ | 174/254 [55:05<27:34, 20.68s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▍ | 174/254 [55:05<27:34, 20.68s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▍ | 174/254 [55:05<27:34, 20.68s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▍ | 174/254 [55:05<27:34, 20.68s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▍ | 174/254 [55:05<27:34, 20.68s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▍ | 174/254 [55:05<27:34, 20.68s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▍ | 174/254 [55:05<27:34, 20.68s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▍ | 174/254 [55:05<27:34, 20.68s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▍ | 174/254 [55:05<27:34, 20.68s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▊ | 175/254 [55:26<27:16, 20.72s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▊ | 175/254 [55:26<27:16, 20.72s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▊ | 175/254 [55:26<27:16, 20.72s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▊ | 175/254 [55:26<27:16, 20.72s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▊ | 175/254 [55:26<27:16, 20.72s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▊ | 175/254 [55:26<27:16, 20.72s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▊ | 175/254 [55:26<27:16, 20.72s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▊ | 175/254 [55:26<27:16, 20.72s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|████████████████████████████████████████████████████████▏ | 176/254 [55:46<26:40, 20.52s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|████████████████████████████████████████████████████████▏ | 176/254 [55:46<26:40, 20.52s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1462, 'learning_rate': 0.00010499999999999999, 'epoch': 0.69} + 69%|████████████████████████████████████████████████████████▏ | 176/254 [55:46<26:40, 20.52s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|████████████████████████████████████████████████████████▏ | 176/254 [55:46<26:40, 20.52s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|████████████████████████████████████████████████████████▏ | 176/254 [55:46<26:40, 20.52s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|████████████████████████████████████████████████████████▏ | 176/254 [55:46<26:40, 20.52s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|████████████████████████████████████████████████████████▏ | 176/254 [55:46<26:40, 20.52s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|████████████████████████████████████████████████████████▏ | 176/254 [55:46<26:40, 20.52s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▍ | 177/254 [56:06<26:07, 20.35s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▍ | 177/254 [56:06<26:07, 20.35s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1756, 'learning_rate': 0.00010559999999999998, 'epoch': 0.69} + 70%|████████████████████████████████████████████████████████▍ | 177/254 [56:06<26:07, 20.35s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▍ | 177/254 [56:06<26:07, 20.35s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▍ | 177/254 [56:06<26:07, 20.35s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▍ | 177/254 [56:06<26:07, 20.35s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▍ | 177/254 [56:06<26:07, 20.35s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▍ | 177/254 [56:06<26:07, 20.35s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▊ | 178/254 [56:25<25:30, 20.14s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▊ | 178/254 [56:25<25:30, 20.14s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1378, 'learning_rate': 0.00010619999999999998, 'epoch': 0.7} + 70%|████████████████████████████████████████████████████████▊ | 178/254 [56:25<25:30, 20.14s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▊ | 178/254 [56:25<25:30, 20.14s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▊ | 178/254 [56:25<25:30, 20.14s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▊ | 178/254 [56:25<25:30, 20.14s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▊ | 178/254 [56:25<25:30, 20.14s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▊ | 178/254 [56:25<25:30, 20.14s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|█████████████████████████████████████████████████████████ | 179/254 [56:45<24:57, 19.96s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|█████████████████████████████████████████████████████████ | 179/254 [56:45<24:57, 19.96s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.0316, 'learning_rate': 0.00010679999999999998, 'epoch': 0.7} + 70%|█████████████████████████████████████████████████████████ | 179/254 [56:45<24:57, 19.96s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|█████████████████████████████████████████████████████████ | 179/254 [56:45<24:57, 19.96s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|█████████████████████████████████████████████████████████ | 179/254 [56:45<24:57, 19.96s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|█████████████████████████████████████████████████████████ | 179/254 [56:45<24:57, 19.96s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|█████████████████████████████████████████████████████████ | 179/254 [56:45<24:57, 19.96s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|█████████████████████████████████████████████████████████ | 179/254 [56:45<24:57, 19.96s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▍ | 180/254 [57:04<24:22, 19.76s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▍ | 180/254 [57:04<24:22, 19.76s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2297, 'learning_rate': 0.00010739999999999998, 'epoch': 0.71} + 71%|█████████████████████████████████████████████████████████▍ | 180/254 [57:04<24:22, 19.76s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▍ | 180/254 [57:04<24:22, 19.76s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▍ | 180/254 [57:04<24:22, 19.76s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▍ | 180/254 [57:04<24:22, 19.76s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▍ | 180/254 [57:04<24:22, 19.76s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▍ | 180/254 [57:04<24:22, 19.76s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▋ | 181/254 [57:24<24:06, 19.81s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▋ | 181/254 [57:24<24:06, 19.81s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2747, 'learning_rate': 0.00010799999999999998, 'epoch': 0.71} + 71%|█████████████████████████████████████████████████████████▋ | 181/254 [57:24<24:06, 19.81s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▋ | 181/254 [57:24<24:06, 19.81s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▋ | 181/254 [57:24<24:06, 19.81s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▋ | 181/254 [57:24<24:06, 19.81s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▋ | 181/254 [57:24<24:06, 19.81s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▋ | 181/254 [57:24<24:06, 19.81s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▋ | 181/254 [57:24<24:06, 19.81s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████ | 182/254 [57:43<23:25, 19.52s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████ | 182/254 [57:43<23:25, 19.52s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████ | 182/254 [57:43<23:25, 19.52s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████ | 182/254 [57:43<23:25, 19.52s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████ | 182/254 [57:43<23:25, 19.52s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████ | 182/254 [57:43<23:25, 19.52s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████ | 182/254 [57:43<23:25, 19.52s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████ | 182/254 [57:43<23:25, 19.52s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████ | 182/254 [57:43<23:25, 19.52s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▎ | 183/254 [58:02<22:47, 19.27s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▎ | 183/254 [58:02<22:47, 19.27s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▎ | 183/254 [58:02<22:47, 19.27s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▎ | 183/254 [58:02<22:47, 19.27s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▎ | 183/254 [58:02<22:47, 19.27s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▎ | 183/254 [58:02<22:47, 19.27s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▎ | 183/254 [58:02<22:47, 19.27s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▎ | 183/254 [58:02<22:47, 19.27s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▎ | 183/254 [58:02<22:47, 19.27s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▋ | 184/254 [58:20<22:08, 18.98s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▋ | 184/254 [58:20<22:08, 18.98s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▋ | 184/254 [58:20<22:08, 18.98s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▋ | 184/254 [58:20<22:08, 18.98s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▋ | 184/254 [58:20<22:08, 18.98s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▋ | 184/254 [58:20<22:08, 18.98s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▋ | 184/254 [58:20<22:08, 18.98s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▋ | 184/254 [58:20<22:08, 18.98s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|██████████████████████████████████████████████████████████▉ | 185/254 [58:38<21:33, 18.74s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|██████████████████████████████████████████████████████████▉ | 185/254 [58:38<21:33, 18.74s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2607, 'learning_rate': 0.00011039999999999999, 'epoch': 0.73} + 73%|██████████████████████████████████████████████████████████▉ | 185/254 [58:38<21:33, 18.74s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|██████████████████████████████████████████████████████████▉ | 185/254 [58:38<21:33, 18.74s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|██████████████████████████████████████████████████████████▉ | 185/254 [58:38<21:33, 18.74s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|██████████████████████████████████████████████████████████▉ | 185/254 [58:38<21:33, 18.74s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|██████████████████████████████████████████████████████████▉ | 185/254 [58:38<21:33, 18.74s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|██████████████████████████████████████████████████████████▉ | 185/254 [58:38<21:33, 18.74s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|███████████████████████████████████████████████████████████▎ | 186/254 [58:56<20:57, 18.49s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|███████████████████████████████████████████████████████████▎ | 186/254 [58:56<20:57, 18.49s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1508, 'learning_rate': 0.00011099999999999999, 'epoch': 0.73} + 73%|███████████████████████████████████████████████████████████▎ | 186/254 [58:56<20:57, 18.49s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|███████████████████████████████████████████████████████████▎ | 186/254 [58:56<20:57, 18.49s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|███████████████████████████████████████████████████████████▎ | 186/254 [58:56<20:57, 18.49s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|███████████████████████████████████████████████████████████▎ | 186/254 [58:56<20:57, 18.49s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|███████████████████████████████████████████████████████████▎ | 186/254 [58:56<20:57, 18.49s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|███████████████████████████████████████████████████████████▎ | 186/254 [58:56<20:57, 18.49s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|███████████████████████████████████████████████████████████▋ | 187/254 [59:14<20:32, 18.39s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|███████████████████████████████████████████████████████████▋ | 187/254 [59:14<20:32, 18.39s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1362, 'learning_rate': 0.00011159999999999999, 'epoch': 0.73} + 74%|███████████████████████████████████████████████████████████▋ | 187/254 [59:14<20:32, 18.39s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|███████████████████████████████████████████████████████████▋ | 187/254 [59:14<20:32, 18.39s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|███████████████████████████████████████████████████████████▋ | 187/254 [59:14<20:32, 18.39s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|███████████████████████████████████████████████████████████▋ | 187/254 [59:14<20:32, 18.39s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|███████████████████████████████████████████████████████████▋ | 187/254 [59:14<20:32, 18.39s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|███████████████████████████████████████████████████████████▋ | 187/254 [59:14<20:32, 18.39s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|███████████████████████████████████████████████████████████▋ | 187/254 [59:14<20:32, 18.39s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|███████████████████████████████████████████████████████████▉ | 188/254 [59:33<20:29, 18.63s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|███████████████████████████████████████████████████████████▉ | 188/254 [59:33<20:29, 18.63s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|███████████████████████████████████████████████████████████▉ | 188/254 [59:33<20:29, 18.63s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|███████████████████████████████████████████████████████████▉ | 188/254 [59:33<20:29, 18.63s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|███████████████████████████████████████████████████████████▉ | 188/254 [59:33<20:29, 18.63s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|███████████████████████████████████████████████████████████▉ | 188/254 [59:33<20:29, 18.63s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|███████████████████████████████████████████████████████████▉ | 188/254 [59:33<20:29, 18.63s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|███████████████████████████████████████████████████████████▉ | 188/254 [59:33<20:29, 18.63s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|███████████████████████████████████████████████████████████▉ | 188/254 [59:33<20:29, 18.63s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|████████████████████████████████████████████████████████████▎ | 189/254 [59:51<19:59, 18.45s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|████████████████████████████████████████████████████████████▎ | 189/254 [59:51<19:59, 18.45s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|████████████████████████████████████████████████████████████▎ | 189/254 [59:51<19:59, 18.45s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|████████████████████████████████████████████████████████████▎ | 189/254 [59:51<19:59, 18.45s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|████████████████████████████████████████████████████████████▎ | 189/254 [59:51<19:59, 18.45s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|████████████████████████████████████████████████████████████▎ | 189/254 [59:51<19:59, 18.45s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|████████████████████████████████████████████████████████████▎ | 189/254 [59:51<19:59, 18.45s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|████████████████████████████████████████████████████████████▎ | 189/254 [59:51<19:59, 18.45s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|████████████████████████████████████████████████████████████▎ | 189/254 [59:51<19:59, 18.45s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 75%|███████████████████████████████████████████████████████████ | 190/254 [1:00:08<19:07, 17.93s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 75%|███████████████████████████████████████████████████████████ | 190/254 [1:00:08<19:07, 17.93s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 75%|███████████████████████████████████████████████████████████ | 190/254 [1:00:08<19:07, 17.93s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 75%|███████████████████████████████████████████████████████████ | 190/254 [1:00:08<19:07, 17.93s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 75%|███████████████████████████████████████████████████████████ | 190/254 [1:00:08<19:07, 17.93s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 75%|███████████████████████████████████████████████████████████ | 190/254 [1:00:08<19:07, 17.93s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 75%|███████████████████████████████████████████████████████████ | 190/254 [1:00:08<19:07, 17.93s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 75%|███████████████████████████████████████████████████████████ | 190/254 [1:00:08<19:07, 17.93s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 75%|███████████████████████████████████████████████████████████ | 190/254 [1:00:08<19:07, 17.93s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 75%|███████████████████████████████████████████████████████████▍ | 191/254 [1:00:24<18:13, 17.36s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 75%|███████████████████████████████████████████████████████████▍ | 191/254 [1:00:24<18:13, 17.36s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 75%|███████████████████████████████████████████████████████████▍ | 191/254 [1:00:24<18:13, 17.36s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 75%|███████████████████████████████████████████████████████████▍ | 191/254 [1:00:24<18:13, 17.36s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 75%|███████████████████████████████████████████████████████████▍ | 191/254 [1:00:24<18:13, 17.36s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:37:32,242 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:37:32,242 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 76%|███████████████████████████████████████████████████████████▋ | 192/254 [1:00:39<17:06, 16.56s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 76%|███████████████████████████████████████████████████████████▋ | 192/254 [1:00:39<17:06, 16.56s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2683, 'learning_rate': 0.0001146, 'epoch': 0.75} + 76%|███████████████████████████████████████████████████████████▋ | 192/254 [1:00:39<17:06, 16.56s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 76%|███████████████████████████████████████████████████████████▋ | 192/254 [1:00:39<17:06, 16.56s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 76%|███████████████████████████████████████████████████████████▋ | 192/254 [1:00:39<17:06, 16.56s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:37:46,365 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:37:46,365 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:37:46,365 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 76%|████████████████████████████████████████████████████████████ | 193/254 [1:00:53<16:00, 15.74s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 76%|████████████████████████████████████████████████████████████ | 193/254 [1:00:53<16:00, 15.74s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:37:54,497 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:37:54,497 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:37:54,497 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:37:54,497 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:38:01,933 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:38:01,933 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3034, 'learning_rate': 0.0001158, 'epoch': 0.76} +[WARNING|modeling_utils.py:388] 2022-03-03 00:38:06,394 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:38:06,394 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:38:10,566 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:38:10,566 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:38:10,566 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 77%|████████████████████████████████████████████████████████████▋ | 195/254 [1:01:16<13:28, 13.71s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:38:14,716 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 77%|████████████████████████████████████████████████████████████▋ | 195/254 [1:01:16<13:28, 13.71s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:38:14,716 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:38:18,537 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:38:14,716 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:38:18,537 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:38:14,716 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:38:22,380 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:38:14,716 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:38:22,380 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:38:14,716 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 77%|████████████████████████████████████████████████████████████▉ | 196/254 [1:01:27<12:15, 12.68s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:38:24,881 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 77%|████████████████████████████████████████████████████████████▉ | 196/254 [1:01:27<12:15, 12.68s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:38:24,881 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:38:28,310 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:38:24,881 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:38:30,531 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:38:24,881 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:38:32,569 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:38:24,881 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:38:32,569 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:38:24,881 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:38:34,703 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:38:24,881 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:38:36,698 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:38:24,881 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:38:38,594 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:38:24,881 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:38:40,458 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:38:24,881 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:38:40,458 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:38:24,881 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:38:42,315 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:38:24,881 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:38:43,987 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:38:24,881 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:38:47,136 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:38:24,881 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:38:47,136 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:38:24,881 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:38:48,728 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:38:24,881 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:38:50,159 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:38:24,881 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:38:51,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:38:24,881 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:38:51,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:38:24,881 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:38:53,398 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:38:24,881 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +RuntimeError: CUDA out of memory. Tried to allocate 1.65 GiB (GPU 0; 15.78 GiB total capacity; 11.62 GiB already allocated; 1.65 GiB free; 12.44 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF +RuntimeError: CUDA out of memory. Tried to allocate 1.65 GiB (GPU 0; 15.78 GiB total capacity; 11.62 GiB already allocated; 1.65 GiB free; 12.44 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF +RuntimeError: CUDA out of memory. Tried to allocate 1.65 GiB (GPU 0; 15.78 GiB total capacity; 11.62 GiB already allocated; 1.65 GiB free; 12.44 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF \ No newline at end of file diff --git a/wandb/run-20220302_233655-33dtvgaa/files/requirements.txt b/wandb/run-20220302_233655-33dtvgaa/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..3974f97a24952deb24d97850f53367da9e7c347d --- /dev/null +++ b/wandb/run-20220302_233655-33dtvgaa/files/requirements.txt @@ -0,0 +1,184 @@ +absl-py==1.0.0 +aiohttp==3.8.1 +aiosignal==1.2.0 +anyio==3.5.0 +appdirs==1.4.4 +argon2-cffi-bindings==21.2.0 +argon2-cffi==21.3.0 +asttokens==2.0.5 +async-timeout==4.0.2 +attrs==21.4.0 +audioread==2.1.9 +babel==2.9.1 +backcall==0.2.0 +bitsandbytes-cuda113==0.26.0 +black==22.1.0 +bleach==4.1.0 +cachetools==5.0.0 +certifi==2021.10.8 +cffi==1.15.0 +charset-normalizer==2.0.11 +chex==0.1.0 +click==8.0.3 +clldutils==3.10.1 +colorlog==6.6.0 +csvw==1.11.0 +cycler==0.11.0 +datasets==1.18.3 +debugpy==1.5.1 +decorator==5.1.1 +defusedxml==0.7.1 +dill==0.3.4 +dlinfo==1.2.1 +dm-tree==0.1.6 +docker-pycreds==0.4.0 +entrypoints==0.4 +executing==0.8.2 +filelock==3.4.2 +flatbuffers==2.0 +flax==0.4.0 +fonttools==4.29.1 +frozenlist==1.3.0 +fsspec==2022.1.0 +gitdb==4.0.9 +gitpython==3.1.27 +google-auth-oauthlib==0.4.6 +google-auth==2.6.0 +grpcio==1.43.0 +huggingface-hub==0.4.0 +hypothesis==6.36.1 +idna==3.3 +importlib-metadata==4.10.1 +ipykernel==6.8.0 +ipython-genutils==0.2.0 +ipython==8.0.1 +ipywidgets==7.6.5 +isodate==0.6.1 +jax==0.2.28 +jaxlib==0.1.76+cuda11.cudnn82 +jedi==0.18.1 +jinja2==3.0.3 +jiwer==2.3.0 +joblib==1.1.0 +json5==0.9.6 +jsonschema==4.4.0 +jupyter-client==7.1.2 +jupyter-console==6.4.0 +jupyter-core==4.9.1 +jupyter-server==1.13.5 +jupyter==1.0.0 +jupyterlab-pygments==0.1.2 +jupyterlab-server==2.10.3 +jupyterlab-widgets==1.0.2 +jupyterlab==3.2.9 +kiwisolver==1.3.2 +librosa==0.8.1 +llvmlite==0.38.0 +markdown==3.3.6 +markupsafe==2.0.1 +matplotlib-inline==0.1.3 +matplotlib==3.5.1 +mistune==0.8.4 +msgpack==1.0.3 +multidict==6.0.2 +multiprocess==0.70.12.2 +mypy-extensions==0.4.3 +nbclassic==0.3.5 +nbclient==0.5.10 +nbconvert==6.4.1 +nbformat==5.1.3 +nest-asyncio==1.5.4 +notebook==6.4.8 +numba==0.55.1 +numpy==1.21.5 +oauthlib==3.2.0 +opt-einsum==3.3.0 +optax==0.1.0 +packaging==21.3 +pandas==1.4.0 +pandocfilters==1.5.0 +parso==0.8.3 +pathspec==0.9.0 +pathtools==0.1.2 +pexpect==4.8.0 +phonemizer==3.0.1 +pickleshare==0.7.5 +pillow==9.0.0 +pip==22.0.2 +pkg-resources==0.0.0 +platformdirs==2.4.1 +pooch==1.6.0 +prometheus-client==0.13.1 +promise==2.3 +prompt-toolkit==3.0.26 +protobuf==3.19.4 +psutil==5.9.0 +ptyprocess==0.7.0 +pure-eval==0.2.2 +pyarrow==6.0.1 +pyasn1-modules==0.2.8 +pyasn1==0.4.8 +pycparser==2.21 +pyctcdecode==0.3.0 +pygments==2.11.2 +pygtrie==2.4.2 +pyparsing==3.0.7 +pyrsistent==0.18.1 +python-dateutil==2.8.2 +python-levenshtein==0.12.2 +pytz==2021.3 +pyyaml==6.0 +pyzmq==22.3.0 +qtconsole==5.2.2 +qtpy==2.0.1 +regex==2022.1.18 +requests-oauthlib==1.3.1 +requests==2.27.1 +resampy==0.2.2 +rfc3986==2.0.0 +rsa==4.8 +sacremoses==0.0.47 +scikit-learn==1.0.2 +scipy==1.7.3 +segments==2.2.0 +send2trash==1.8.0 +sentry-sdk==1.5.6 +setuptools==44.1.1 +shortuuid==1.0.8 +six==1.16.0 +smmap==5.0.0 +sniffio==1.2.0 +sortedcontainers==2.4.0 +soundfile==0.10.3.post1 +stack-data==0.1.4 +tabulate==0.8.9 +tensorboard-data-server==0.6.1 +tensorboard-plugin-wit==1.8.1 +tensorboard==2.8.0 +termcolor==1.1.0 +terminado==0.13.1 +testpath==0.5.0 +threadpoolctl==3.1.0 +tokenizers==0.11.4 +tomli==2.0.0 +toolz==0.11.2 +torch==1.10.2+cu113 +torchaudio==0.10.2+cu113 +tornado==6.1 +tqdm==4.62.3 +traitlets==5.1.1 +transformers==4.17.0.dev0 +typing-extensions==3.10.0.2 +uritemplate==4.1.1 +urllib3==1.26.8 +wandb==0.12.10 +wcwidth==0.2.5 +webencodings==0.5.1 +websocket-client==1.2.3 +werkzeug==2.0.2 +wheel==0.37.1 +widgetsnbextension==3.5.2 +xxhash==2.0.2 +yarl==1.7.2 +yaspin==2.1.0 +zipp==3.7.0 \ No newline at end of file diff --git a/wandb/run-20220302_233655-33dtvgaa/files/wandb-metadata.json b/wandb/run-20220302_233655-33dtvgaa/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..6bdd63a11a8a3b33ac8c5481f6a14b8b61b6d7f6 --- /dev/null +++ b/wandb/run-20220302_233655-33dtvgaa/files/wandb-metadata.json @@ -0,0 +1,60 @@ +{ + "os": "Linux-5.11.0-1028-gcp-x86_64-with-glibc2.33", + "python": "3.9.5", + "heartbeatAt": "2022-03-02T23:36:56.434217", + "startedAt": "2022-03-02T23:36:55.283203", + "docker": null, + "gpu": "Tesla V100-SXM2-16GB", + "gpu_count": 2, + "cpu_count": 16, + "cuda": null, + "args": [ + "--dataset_name=librispeech_asr", + "--model_name_or_path=./", + "--tokenizer_name=./", + "--dataset_config_name=clean", + "--train_split_name=train.100", + "--eval_split_name=validation", + "--output_dir=./", + "--preprocessing_num_workers=1", + "--length_column_name=input_length", + "--overwrite_output_dir", + "--num_train_epochs=1", + "--per_device_train_batch_size=14", + "--per_device_eval_batch_size=14", + "--gradient_accumulation_steps=8", + "--generation_max_length=40", + "--generation_num_beams=1", + "--learning_rate=3e-4", + "--warmup_steps=500", + "--evaluation_strategy=steps", + "--text_column_name=text", + "--save_steps=500", + "--eval_steps=500", + "--logging_steps=1", + "--save_total_limit=1", + "--freeze_feature_encoder", + "--gradient_checkpointing", + "--fp16", + "--group_by_length", + "--predict_with_generate", + "--do_lower_case", + "--do_train", + "--do_eval", + "--report_to=wandb", + "--push_to_hub", + "--use_auth_token" + ], + "state": "running", + "program": "/home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/run_speech_recognition_seq2seq.py", + "codePath": "run_speech_recognition_seq2seq.py", + "git": { + "remote": "https://huggingface.co/sanchit-gandhi/wav2vec2-gpt2-wandb-grid-search", + "commit": "8c7181143c175387040dc1a6ac2ddbc9179b550c" + }, + "email": "sanchit@huggingface.co", + "root": "/home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search", + "host": "sanchit--v100", + "username": "sanchit_huggingface_co", + "executable": "/home/sanchit_huggingface_co/gcp/bin/python" +} diff --git a/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json b/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..ce38b1a2f10bf3072d74da5f8a247a6b6df32ed2 --- /dev/null +++ b/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json @@ -0,0 +1 @@ +{"train/loss": 4.3323, "train/learning_rate": 0.0001194, "train/epoch": 0.78, "train/global_step": 200, "_runtime": 3718, "_timestamp": 1646267933, "_step": 199, "gradients/decoder.transformer.ln_f.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 10.0, 904.0, 101.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-176.83297729492188, -145.7891387939453, -114.74528503417969, -83.70144653320312, -52.65760040283203, -21.613754272460938, 9.430084228515625, 40.47393798828125, 71.51777648925781, 102.5616226196289, 133.60546875, 164.64930725097656, 195.69314575195312, 226.73699951171875, 257.78082275390625, 288.82470703125, 319.8685302734375, 350.9123840332031, 381.9562072753906, 413.00006103515625, 444.0439147949219, 475.0877685546875, 506.131591796875, 537.1754150390625, 568.2192993164062, 599.2631225585938, 630.3070068359375, 661.350830078125, 692.3946533203125, 723.4385375976562, 754.4823608398438, 785.5262451171875, 816.5700073242188, 847.6138305664062, 878.65771484375, 909.7015380859375, 940.745361328125, 971.7892456054688, 1002.8330688476562, 1033.876953125, 1064.9207763671875, 1095.964599609375, 1127.0084228515625, 1158.05224609375, 1189.09619140625, 1220.1400146484375, 1251.183837890625, 1282.2276611328125, 1313.271484375, 1344.3153076171875, 1375.359130859375, 1406.403076171875, 1437.4468994140625, 1468.49072265625, 1499.5345458984375, 1530.578369140625, 1561.622314453125, 1592.6661376953125, 1623.7099609375, 1654.75390625, 1685.7977294921875, 1716.841552734375, 1747.8853759765625, 1778.92919921875, 1809.9730224609375]}, "gradients/decoder.transformer.ln_f.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 2.0, 0.0, 3.0, 5.0, 6.0, 2.0, 10.0, 8.0, 10.0, 12.0, 14.0, 14.0, 20.0, 19.0, 33.0, 40.0, 40.0, 46.0, 46.0, 49.0, 48.0, 54.0, 58.0, 53.0, 58.0, 49.0, 47.0, 50.0, 38.0, 39.0, 26.0, 20.0, 24.0, 17.0, 9.0, 11.0, 6.0, 10.0, 6.0, 5.0, 3.0, 3.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-114.36355590820312, -110.03234100341797, -105.70112609863281, -101.36991119384766, -97.0386962890625, -92.70748901367188, -88.37627410888672, -84.04505920410156, -79.7138442993164, -75.38262939453125, -71.0514144897461, -66.72019958496094, -62.38898849487305, -58.05777359008789, -53.7265625, -49.395347595214844, -45.06413269042969, -40.73291778564453, -36.401702880859375, -32.070491790771484, -27.739276885986328, -23.408061981201172, -19.07684898376465, -14.745635986328125, -10.414421081542969, -6.083207130432129, -1.751993179321289, 2.579220771789551, 6.910434722900391, 11.241649627685547, 15.57286262512207, 19.904075622558594, 24.23529052734375, 28.566505432128906, 32.89772033691406, 37.22893142700195, 41.56014633178711, 45.891361236572266, 50.222572326660156, 54.55378723144531, 58.88500213623047, 63.216217041015625, 67.54743194580078, 71.87864685058594, 76.20985412597656, 80.54107666015625, 84.87228393554688, 89.20349884033203, 93.53471374511719, 97.86592864990234, 102.1971435546875, 106.52835845947266, 110.85957336425781, 115.19078063964844, 119.5219955444336, 123.85321044921875, 128.18441772460938, 132.515625, 136.8468475341797, 141.1780548095703, 145.50927734375, 149.84048461914062, 154.1717071533203, 158.50291442871094, 162.83413696289062]}, "gradients/decoder.transformer.h.23.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 3.0, 0.0, 2.0, 4.0, 5.0, 7.0, 7.0, 9.0, 10.0, 18.0, 7.0, 21.0, 33.0, 21.0, 35.0, 41.0, 51.0, 52.0, 46.0, 46.0, 49.0, 48.0, 64.0, 59.0, 51.0, 65.0, 47.0, 32.0, 26.0, 33.0, 23.0, 22.0, 20.0, 17.0, 13.0, 7.0, 5.0, 7.0, 4.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0], "bins": [-7.36328125, -7.15606689453125, -6.9488525390625, -6.74163818359375, -6.534423828125, -6.32720947265625, -6.1199951171875, -5.91278076171875, -5.70556640625, -5.49835205078125, -5.2911376953125, -5.08392333984375, -4.876708984375, -4.66949462890625, -4.4622802734375, -4.25506591796875, -4.0478515625, -3.84063720703125, -3.6334228515625, -3.42620849609375, -3.218994140625, -3.01177978515625, -2.8045654296875, -2.59735107421875, -2.39013671875, -2.18292236328125, -1.9757080078125, -1.76849365234375, -1.561279296875, -1.35406494140625, -1.1468505859375, -0.93963623046875, -0.732421875, -0.52520751953125, -0.3179931640625, -0.11077880859375, 0.096435546875, 0.30364990234375, 0.5108642578125, 0.71807861328125, 0.92529296875, 1.13250732421875, 1.3397216796875, 1.54693603515625, 1.754150390625, 1.96136474609375, 2.1685791015625, 2.37579345703125, 2.5830078125, 2.79022216796875, 2.9974365234375, 3.20465087890625, 3.411865234375, 3.61907958984375, 3.8262939453125, 4.03350830078125, 4.24072265625, 4.44793701171875, 4.6551513671875, 4.86236572265625, 5.069580078125, 5.27679443359375, 5.4840087890625, 5.69122314453125, 5.8984375]}, "gradients/decoder.transformer.h.23.mlp.c_proj.weight": {"_type": "histogram", "values": [3.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 0.0, 0.0, 2.0, 2.0, 3.0, 5.0, 1.0, 5.0, 16.0, 11.0, 19.0, 23.0, 37.0, 51.0, 84.0, 121.0, 199.0, 322.0, 583.0, 1102.0, 2449.0, 6142.0, 21555.0, 135995.0, 2666167.0, 1260344.0, 75531.0, 15057.0, 4574.0, 1794.0, 887.0, 464.0, 255.0, 165.0, 106.0, 77.0, 49.0, 27.0, 19.0, 14.0, 8.0, 9.0, 5.0, 5.0, 6.0, 1.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-28.796875, -27.89306640625, -26.9892578125, -26.08544921875, -25.181640625, -24.27783203125, -23.3740234375, -22.47021484375, -21.56640625, -20.66259765625, -19.7587890625, -18.85498046875, -17.951171875, -17.04736328125, -16.1435546875, -15.23974609375, -14.3359375, -13.43212890625, -12.5283203125, -11.62451171875, -10.720703125, -9.81689453125, -8.9130859375, -8.00927734375, -7.10546875, -6.20166015625, -5.2978515625, -4.39404296875, -3.490234375, -2.58642578125, -1.6826171875, -0.77880859375, 0.125, 1.02880859375, 1.9326171875, 2.83642578125, 3.740234375, 4.64404296875, 5.5478515625, 6.45166015625, 7.35546875, 8.25927734375, 9.1630859375, 10.06689453125, 10.970703125, 11.87451171875, 12.7783203125, 13.68212890625, 14.5859375, 15.48974609375, 16.3935546875, 17.29736328125, 18.201171875, 19.10498046875, 20.0087890625, 20.91259765625, 21.81640625, 22.72021484375, 23.6240234375, 24.52783203125, 25.431640625, 26.33544921875, 27.2392578125, 28.14306640625, 29.046875]}, "gradients/decoder.transformer.h.23.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 4.0, 6.0, 5.0, 15.0, 29.0, 45.0, 77.0, 115.0, 211.0, 456.0, 807.0, 860.0, 645.0, 356.0, 181.0, 110.0, 78.0, 31.0, 19.0, 18.0, 6.0, 3.0, 4.0, 2.0, 1.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-32.625, -31.42529296875, -30.2255859375, -29.02587890625, -27.826171875, -26.62646484375, -25.4267578125, -24.22705078125, -23.02734375, -21.82763671875, -20.6279296875, -19.42822265625, -18.228515625, -17.02880859375, -15.8291015625, -14.62939453125, -13.4296875, -12.22998046875, -11.0302734375, -9.83056640625, -8.630859375, -7.43115234375, -6.2314453125, -5.03173828125, -3.83203125, -2.63232421875, -1.4326171875, -0.23291015625, 0.966796875, 2.16650390625, 3.3662109375, 4.56591796875, 5.765625, 6.96533203125, 8.1650390625, 9.36474609375, 10.564453125, 11.76416015625, 12.9638671875, 14.16357421875, 15.36328125, 16.56298828125, 17.7626953125, 18.96240234375, 20.162109375, 21.36181640625, 22.5615234375, 23.76123046875, 24.9609375, 26.16064453125, 27.3603515625, 28.56005859375, 29.759765625, 30.95947265625, 32.1591796875, 33.35888671875, 34.55859375, 35.75830078125, 36.9580078125, 38.15771484375, 39.357421875, 40.55712890625, 41.7568359375, 42.95654296875, 44.15625]}, "gradients/decoder.transformer.h.23.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 3.0, 0.0, 0.0, 1.0, 1.0, 3.0, 5.0, 3.0, 6.0, 6.0, 12.0, 19.0, 24.0, 62.0, 115.0, 218.0, 500.0, 1092.0, 4233.0, 439348.0, 3735414.0, 10128.0, 1794.0, 634.0, 325.0, 156.0, 80.0, 49.0, 29.0, 11.0, 9.0, 4.0, 3.0, 0.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0], "bins": [-116.1875, -112.2041015625, -108.220703125, -104.2373046875, -100.25390625, -96.2705078125, -92.287109375, -88.3037109375, -84.3203125, -80.3369140625, -76.353515625, -72.3701171875, -68.38671875, -64.4033203125, -60.419921875, -56.4365234375, -52.453125, -48.4697265625, -44.486328125, -40.5029296875, -36.51953125, -32.5361328125, -28.552734375, -24.5693359375, -20.5859375, -16.6025390625, -12.619140625, -8.6357421875, -4.65234375, -0.6689453125, 3.314453125, 7.2978515625, 11.28125, 15.2646484375, 19.248046875, 23.2314453125, 27.21484375, 31.1982421875, 35.181640625, 39.1650390625, 43.1484375, 47.1318359375, 51.115234375, 55.0986328125, 59.08203125, 63.0654296875, 67.048828125, 71.0322265625, 75.015625, 78.9990234375, 82.982421875, 86.9658203125, 90.94921875, 94.9326171875, 98.916015625, 102.8994140625, 106.8828125, 110.8662109375, 114.849609375, 118.8330078125, 122.81640625, 126.7998046875, 130.783203125, 134.7666015625, 138.75]}, "gradients/decoder.transformer.h.23.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 3.0, 103.0, 785.0, 118.0, 5.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-905.34814453125, -888.7216186523438, -872.0950927734375, -855.468505859375, -838.8419799804688, -822.2154541015625, -805.5889282226562, -788.96240234375, -772.3358154296875, -755.7092895507812, -739.082763671875, -722.4561767578125, -705.8296508789062, -689.203125, -672.5765991210938, -655.9500732421875, -639.3235473632812, -622.697021484375, -606.0704956054688, -589.4439086914062, -572.8173828125, -556.1908569335938, -539.5643310546875, -522.9378051757812, -506.3112487792969, -489.6847229003906, -473.05816650390625, -456.431640625, -439.80511474609375, -423.1785583496094, -406.5520324707031, -389.92547607421875, -373.2989501953125, -356.67242431640625, -340.0458679199219, -323.4193420410156, -306.79278564453125, -290.166259765625, -273.53973388671875, -256.9132080078125, -240.28665161132812, -223.6601104736328, -207.0335693359375, -190.40704345703125, -173.78050231933594, -157.15396118164062, -140.52743530273438, -123.90089416503906, -107.27435302734375, -90.64781188964844, -74.02127838134766, -57.39474105834961, -40.76820373535156, -24.14166259765625, -7.515129089355469, 9.111404418945312, 25.737945556640625, 42.36448287963867, 58.99102020263672, 75.6175537109375, 92.24409484863281, 108.87063598632812, 125.4971694946289, 142.1237030029297, 158.750244140625]}, "gradients/decoder.transformer.h.23.ln_2.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 4.0, 0.0, 3.0, 0.0, 3.0, 3.0, 3.0, 8.0, 8.0, 11.0, 13.0, 11.0, 29.0, 29.0, 20.0, 25.0, 30.0, 28.0, 36.0, 32.0, 57.0, 49.0, 50.0, 42.0, 36.0, 43.0, 49.0, 40.0, 54.0, 44.0, 39.0, 39.0, 32.0, 31.0, 24.0, 21.0, 14.0, 8.0, 13.0, 9.0, 4.0, 7.0, 6.0, 5.0, 1.0, 3.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-96.62782287597656, -93.78887939453125, -90.94994354248047, -88.11100769042969, -85.27206420898438, -82.43312072753906, -79.59418487548828, -76.7552490234375, -73.91630554199219, -71.07736206054688, -68.2384262084961, -65.39949035644531, -62.560546875, -59.72160720825195, -56.882667541503906, -54.04372787475586, -51.20478820800781, -48.365848541259766, -45.52690887451172, -42.68796920776367, -39.849029541015625, -37.01008987426758, -34.17115020751953, -31.332210540771484, -28.493270874023438, -25.65433120727539, -22.815391540527344, -19.976451873779297, -17.13751220703125, -14.298572540283203, -11.459632873535156, -8.62069320678711, -5.781761169433594, -2.942821502685547, -0.1038818359375, 2.735057830810547, 5.573997497558594, 8.41293716430664, 11.251876831054688, 14.090816497802734, 16.92975616455078, 19.768695831298828, 22.607635498046875, 25.446575164794922, 28.28551483154297, 31.124454498291016, 33.96339416503906, 36.80233383178711, 39.641273498535156, 42.4802131652832, 45.31915283203125, 48.1580924987793, 50.997032165527344, 53.83597183227539, 56.67491149902344, 59.513851165771484, 62.35279083251953, 65.19172668457031, 68.03067016601562, 70.86961364746094, 73.70854949951172, 76.5474853515625, 79.38642883300781, 82.22537231445312, 85.0643081665039]}, "gradients/decoder.transformer.h.23.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 4.0, 2.0, 3.0, 5.0, 4.0, 5.0, 3.0, 18.0, 15.0, 12.0, 16.0, 24.0, 15.0, 24.0, 24.0, 32.0, 42.0, 31.0, 42.0, 38.0, 42.0, 48.0, 46.0, 47.0, 47.0, 52.0, 47.0, 47.0, 30.0, 42.0, 36.0, 29.0, 31.0, 14.0, 21.0, 11.0, 19.0, 9.0, 13.0, 8.0, 3.0, 2.0, 5.0, 3.0, 2.0, 2.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-6.01171875, -5.8240966796875, -5.636474609375, -5.4488525390625, -5.26123046875, -5.0736083984375, -4.885986328125, -4.6983642578125, -4.5107421875, -4.3231201171875, -4.135498046875, -3.9478759765625, -3.76025390625, -3.5726318359375, -3.385009765625, -3.1973876953125, -3.009765625, -2.8221435546875, -2.634521484375, -2.4468994140625, -2.25927734375, -2.0716552734375, -1.884033203125, -1.6964111328125, -1.5087890625, -1.3211669921875, -1.133544921875, -0.9459228515625, -0.75830078125, -0.5706787109375, -0.383056640625, -0.1954345703125, -0.0078125, 0.1798095703125, 0.367431640625, 0.5550537109375, 0.74267578125, 0.9302978515625, 1.117919921875, 1.3055419921875, 1.4931640625, 1.6807861328125, 1.868408203125, 2.0560302734375, 2.24365234375, 2.4312744140625, 2.618896484375, 2.8065185546875, 2.994140625, 3.1817626953125, 3.369384765625, 3.5570068359375, 3.74462890625, 3.9322509765625, 4.119873046875, 4.3074951171875, 4.4951171875, 4.6827392578125, 4.870361328125, 5.0579833984375, 5.24560546875, 5.4332275390625, 5.620849609375, 5.8084716796875, 5.99609375]}, "gradients/decoder.transformer.h.23.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 5.0, 7.0, 6.0, 15.0, 20.0, 22.0, 51.0, 50.0, 80.0, 109.0, 162.0, 250.0, 380.0, 546.0, 924.0, 1376.0, 2036.0, 3259.0, 5287.0, 8127.0, 13410.0, 21760.0, 36516.0, 62365.0, 108913.0, 182356.0, 224664.0, 154715.0, 90031.0, 51405.0, 30492.0, 18257.0, 11346.0, 7138.0, 4469.0, 2844.0, 1772.0, 1137.0, 733.0, 524.0, 333.0, 227.0, 154.0, 109.0, 55.0, 33.0, 29.0, 19.0, 19.0, 9.0, 7.0, 8.0, 4.0, 2.0, 1.0, 3.0, 2.0], "bins": [-0.6865234375, -0.665924072265625, -0.64532470703125, -0.624725341796875, -0.6041259765625, -0.583526611328125, -0.56292724609375, -0.542327880859375, -0.521728515625, -0.501129150390625, -0.48052978515625, -0.459930419921875, -0.4393310546875, -0.418731689453125, -0.39813232421875, -0.377532958984375, -0.35693359375, -0.336334228515625, -0.31573486328125, -0.295135498046875, -0.2745361328125, -0.253936767578125, -0.23333740234375, -0.212738037109375, -0.192138671875, -0.171539306640625, -0.15093994140625, -0.130340576171875, -0.1097412109375, -0.089141845703125, -0.06854248046875, -0.047943115234375, -0.02734375, -0.006744384765625, 0.01385498046875, 0.034454345703125, 0.0550537109375, 0.075653076171875, 0.09625244140625, 0.116851806640625, 0.137451171875, 0.158050537109375, 0.17864990234375, 0.199249267578125, 0.2198486328125, 0.240447998046875, 0.26104736328125, 0.281646728515625, 0.30224609375, 0.322845458984375, 0.34344482421875, 0.364044189453125, 0.3846435546875, 0.405242919921875, 0.42584228515625, 0.446441650390625, 0.467041015625, 0.487640380859375, 0.50823974609375, 0.528839111328125, 0.5494384765625, 0.570037841796875, 0.59063720703125, 0.611236572265625, 0.6318359375]}, "gradients/decoder.transformer.h.23.crossattention.c_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 2.0, 1.0, 3.0, 1.0, 1.0, 4.0, 3.0, 6.0, 4.0, 3.0, 6.0, 7.0, 11.0, 8.0, 14.0, 17.0, 15.0, 22.0, 21.0, 19.0, 32.0, 29.0, 33.0, 45.0, 40.0, 36.0, 33.0, 34.0, 45.0, 1066.0, 32.0, 33.0, 32.0, 36.0, 36.0, 40.0, 26.0, 31.0, 36.0, 22.0, 21.0, 20.0, 16.0, 16.0, 16.0, 14.0, 9.0, 11.0, 6.0, 4.0, 1.0, 9.0, 2.0, 5.0, 4.0, 1.0, 1.0, 3.0, 0.0, 0.0, 2.0], "bins": [-3.1953125, -3.095947265625, -2.99658203125, -2.897216796875, -2.7978515625, -2.698486328125, -2.59912109375, -2.499755859375, -2.400390625, -2.301025390625, -2.20166015625, -2.102294921875, -2.0029296875, -1.903564453125, -1.80419921875, -1.704833984375, -1.60546875, -1.506103515625, -1.40673828125, -1.307373046875, -1.2080078125, -1.108642578125, -1.00927734375, -0.909912109375, -0.810546875, -0.711181640625, -0.61181640625, -0.512451171875, -0.4130859375, -0.313720703125, -0.21435546875, -0.114990234375, -0.015625, 0.083740234375, 0.18310546875, 0.282470703125, 0.3818359375, 0.481201171875, 0.58056640625, 0.679931640625, 0.779296875, 0.878662109375, 0.97802734375, 1.077392578125, 1.1767578125, 1.276123046875, 1.37548828125, 1.474853515625, 1.57421875, 1.673583984375, 1.77294921875, 1.872314453125, 1.9716796875, 2.071044921875, 2.17041015625, 2.269775390625, 2.369140625, 2.468505859375, 2.56787109375, 2.667236328125, 2.7666015625, 2.865966796875, 2.96533203125, 3.064697265625, 3.1640625]}, "gradients/decoder.transformer.h.23.crossattention.c_attn.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 4.0, 0.0, 5.0, 4.0, 4.0, 14.0, 15.0, 29.0, 33.0, 59.0, 76.0, 125.0, 179.0, 269.0, 409.0, 585.0, 809.0, 1242.0, 1861.0, 2744.0, 4067.0, 6240.0, 9404.0, 14470.0, 22590.0, 35200.0, 55854.0, 86912.0, 130051.0, 1151383.0, 224499.0, 121943.0, 80864.0, 51733.0, 32740.0, 20881.0, 13555.0, 8735.0, 5825.0, 3841.0, 2577.0, 1708.0, 1107.0, 797.0, 559.0, 333.0, 256.0, 169.0, 124.0, 79.0, 56.0, 45.0, 28.0, 18.0, 15.0, 8.0, 5.0, 5.0, 4.0, 1.0, 2.0], "bins": [-0.413818359375, -0.4012451171875, -0.388671875, -0.3760986328125, -0.363525390625, -0.3509521484375, -0.33837890625, -0.3258056640625, -0.313232421875, -0.3006591796875, -0.2880859375, -0.2755126953125, -0.262939453125, -0.2503662109375, -0.23779296875, -0.2252197265625, -0.212646484375, -0.2000732421875, -0.1875, -0.1749267578125, -0.162353515625, -0.1497802734375, -0.13720703125, -0.1246337890625, -0.112060546875, -0.0994873046875, -0.0869140625, -0.0743408203125, -0.061767578125, -0.0491943359375, -0.03662109375, -0.0240478515625, -0.011474609375, 0.0010986328125, 0.013671875, 0.0262451171875, 0.038818359375, 0.0513916015625, 0.06396484375, 0.0765380859375, 0.089111328125, 0.1016845703125, 0.1142578125, 0.1268310546875, 0.139404296875, 0.1519775390625, 0.16455078125, 0.1771240234375, 0.189697265625, 0.2022705078125, 0.21484375, 0.2274169921875, 0.239990234375, 0.2525634765625, 0.26513671875, 0.2777099609375, 0.290283203125, 0.3028564453125, 0.3154296875, 0.3280029296875, 0.340576171875, 0.3531494140625, 0.36572265625, 0.3782958984375, 0.390869140625]}, "gradients/decoder.transformer.h.23.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 3.0, 0.0, 4.0, 1.0, 1.0, 2.0, 1.0, 0.0, 7.0, 5.0, 6.0, 10.0, 5.0, 17.0, 17.0, 16.0, 21.0, 23.0, 28.0, 30.0, 32.0, 43.0, 38.0, 53.0, 57.0, 60.0, 54.0, 57.0, 58.0, 45.0, 52.0, 34.0, 41.0, 28.0, 35.0, 19.0, 19.0, 15.0, 14.0, 4.0, 14.0, 4.0, 10.0, 6.0, 5.0, 5.0, 6.0, 3.0, 1.0, 1.0, 1.0, 2.0, 4.0, 2.0], "bins": [-0.0038471221923828125, -0.003740847110748291, -0.0036345720291137695, -0.003528296947479248, -0.0034220218658447266, -0.003315746784210205, -0.0032094717025756836, -0.003103196620941162, -0.0029969215393066406, -0.002890646457672119, -0.0027843713760375977, -0.002678096294403076, -0.0025718212127685547, -0.002465546131134033, -0.0023592710494995117, -0.0022529959678649902, -0.0021467208862304688, -0.0020404458045959473, -0.0019341707229614258, -0.0018278956413269043, -0.0017216205596923828, -0.0016153454780578613, -0.0015090703964233398, -0.0014027953147888184, -0.0012965202331542969, -0.0011902451515197754, -0.001083970069885254, -0.0009776949882507324, -0.0008714199066162109, -0.0007651448249816895, -0.000658869743347168, -0.0005525946617126465, -0.000446319580078125, -0.0003400444984436035, -0.00023376941680908203, -0.00012749433517456055, -2.1219253540039062e-05, 8.505582809448242e-05, 0.0001913309097290039, 0.0002976059913635254, 0.0004038810729980469, 0.0005101561546325684, 0.0006164312362670898, 0.0007227063179016113, 0.0008289813995361328, 0.0009352564811706543, 0.0010415315628051758, 0.0011478066444396973, 0.0012540817260742188, 0.0013603568077087402, 0.0014666318893432617, 0.0015729069709777832, 0.0016791820526123047, 0.0017854571342468262, 0.0018917322158813477, 0.001998007297515869, 0.0021042823791503906, 0.002210557460784912, 0.0023168325424194336, 0.002423107624053955, 0.0025293827056884766, 0.002635657787322998, 0.0027419328689575195, 0.002848207950592041, 0.0029544830322265625]}, "gradients/decoder.transformer.h.23.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 0.0, 0.0, 3.0, 7.0, 2.0, 4.0, 4.0, 5.0, 4.0, 11.0, 13.0, 10.0, 13.0, 17.0, 17.0, 26.0, 40.0, 40.0, 70.0, 77.0, 107.0, 124.0, 170.0, 242.0, 324.0, 600.0, 19736.0, 1023575.0, 1687.0, 425.0, 283.0, 209.0, 154.0, 136.0, 95.0, 72.0, 62.0, 40.0, 25.0, 37.0, 29.0, 12.0, 19.0, 12.0, 10.0, 4.0, 5.0, 4.0, 0.0, 4.0, 2.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0], "bins": [-0.06866455078125, -0.06639766693115234, -0.06413078308105469, -0.06186389923095703, -0.059597015380859375, -0.05733013153076172, -0.05506324768066406, -0.052796363830566406, -0.05052947998046875, -0.048262596130371094, -0.04599571228027344, -0.04372882843017578, -0.041461944580078125, -0.03919506072998047, -0.03692817687988281, -0.034661293029785156, -0.0323944091796875, -0.030127525329589844, -0.027860641479492188, -0.02559375762939453, -0.023326873779296875, -0.02105998992919922, -0.018793106079101562, -0.016526222229003906, -0.01425933837890625, -0.011992454528808594, -0.009725570678710938, -0.007458686828613281, -0.005191802978515625, -0.0029249191284179688, -0.0006580352783203125, 0.0016088485717773438, 0.003875732421875, 0.006142616271972656, 0.008409500122070312, 0.010676383972167969, 0.012943267822265625, 0.015210151672363281, 0.017477035522460938, 0.019743919372558594, 0.02201080322265625, 0.024277687072753906, 0.026544570922851562, 0.02881145477294922, 0.031078338623046875, 0.03334522247314453, 0.03561210632324219, 0.037878990173339844, 0.0401458740234375, 0.042412757873535156, 0.04467964172363281, 0.04694652557373047, 0.049213409423828125, 0.05148029327392578, 0.05374717712402344, 0.056014060974121094, 0.05828094482421875, 0.060547828674316406, 0.06281471252441406, 0.06508159637451172, 0.06734848022460938, 0.06961536407470703, 0.07188224792480469, 0.07414913177490234, 0.076416015625]}, "gradients/decoder.transformer.h.23.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 32.0, 734.0, 243.0, 7.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.008459459990262985, -0.008007566444575787, -0.007555673830211163, -0.007103780750185251, -0.00665188767015934, -0.006199994124472141, -0.005748101510107517, -0.005296207964420319, -0.004844315350055695, -0.004392422270029783, -0.003940529190003872, -0.0034886361099779606, -0.0030367430299520493, -0.0025848497170954943, -0.002132956637069583, -0.0016810635570436716, -0.0012291702441871166, -0.0007772771641612053, -0.00032538402592763305, 0.0001265091123059392, 0.0005784021923318505, 0.0010302953887730837, 0.001482188468798995, 0.0019340815488249063, 0.0023859746288508177, 0.002837867708876729, 0.0032897607889026403, 0.0037416541017591953, 0.004193547181785107, 0.004645440261811018, 0.005097333341836929, 0.005549226421862841, 0.006001119501888752, 0.006453012581914663, 0.006904905661940575, 0.007356798741966486, 0.007808691821992397, 0.008260585367679596, 0.00871247798204422, 0.009164371527731419, 0.009616264142096043, 0.010068157687783241, 0.010520050302147865, 0.010971943847835064, 0.011423836462199688, 0.011875730007886887, 0.01232762262225151, 0.01277951616793871, 0.013231409713625908, 0.013683303259313107, 0.01413519587367773, 0.01458708941936493, 0.015038982033729553, 0.015490875579416752, 0.01594276912510395, 0.016394661739468575, 0.0168465543538332, 0.017298446968197823, 0.017750341445207596, 0.01820223405957222, 0.018654126673936844, 0.019106019288301468, 0.01955791376531124, 0.020009806379675865, 0.02046169899404049]}, "gradients/decoder.transformer.h.23.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 5.0, 3.0, 7.0, 6.0, 7.0, 10.0, 11.0, 16.0, 19.0, 38.0, 44.0, 38.0, 46.0, 44.0, 48.0, 69.0, 43.0, 37.0, 69.0, 62.0, 60.0, 51.0, 48.0, 34.0, 37.0, 32.0, 19.0, 17.0, 15.0, 19.0, 16.0, 8.0, 12.0, 8.0, 6.0, 6.0, 1.0, 5.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0022068023681640625, -0.0021236399188637733, -0.002040477469563484, -0.001957315020263195, -0.0018741525709629059, -0.0017909901216626167, -0.0017078276723623276, -0.0016246652230620384, -0.0015415027737617493, -0.0014583403244614601, -0.001375177875161171, -0.0012920154258608818, -0.0012088529765605927, -0.0011256905272603035, -0.0010425280779600143, -0.0009593656286597252, -0.000876203179359436, -0.0007930407300591469, -0.0007098782807588577, -0.0006267158314585686, -0.0005435533821582794, -0.00046039093285799026, -0.0003772284835577011, -0.00029406603425741196, -0.0002109035849571228, -0.00012774113565683365, -4.4578686356544495e-05, 3.858376294374466e-05, 0.00012174621224403381, 0.00020490866154432297, 0.0002880711108446121, 0.0003712335601449013, 0.00045439600944519043, 0.0005375584587454796, 0.0006207209080457687, 0.0007038833573460579, 0.000787045806646347, 0.0008702082559466362, 0.0009533707052469254, 0.0010365331545472145, 0.0011196956038475037, 0.0012028580531477928, 0.001286020502448082, 0.0013691829517483711, 0.0014523454010486603, 0.0015355078503489494, 0.0016186702996492386, 0.0017018327489495277, 0.001784995198249817, 0.001868157647550106, 0.0019513200968503952, 0.0020344825461506844, 0.0021176449954509735, 0.0022008074447512627, 0.002283969894051552, 0.002367132343351841, 0.00245029479265213, 0.0025334572419524193, 0.0026166196912527084, 0.0026997821405529976, 0.0027829445898532867, 0.002866107039153576, 0.002949269488453865, 0.003032431937754154, 0.0031155943870544434]}, "gradients/decoder.transformer.h.23.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 4.0, 2.0, 3.0, 5.0, 4.0, 5.0, 3.0, 18.0, 15.0, 12.0, 16.0, 24.0, 15.0, 24.0, 24.0, 32.0, 42.0, 31.0, 42.0, 38.0, 43.0, 47.0, 46.0, 47.0, 47.0, 52.0, 47.0, 47.0, 30.0, 42.0, 36.0, 29.0, 31.0, 14.0, 21.0, 11.0, 19.0, 9.0, 13.0, 9.0, 2.0, 2.0, 5.0, 3.0, 2.0, 2.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-6.01171875, -5.8240966796875, -5.636474609375, -5.4488525390625, -5.26123046875, -5.0736083984375, -4.885986328125, -4.6983642578125, -4.5107421875, -4.3231201171875, -4.135498046875, -3.9478759765625, -3.76025390625, -3.5726318359375, -3.385009765625, -3.1973876953125, -3.009765625, -2.8221435546875, -2.634521484375, -2.4468994140625, -2.25927734375, -2.0716552734375, -1.884033203125, -1.6964111328125, -1.5087890625, -1.3211669921875, -1.133544921875, -0.9459228515625, -0.75830078125, -0.5706787109375, -0.383056640625, -0.1954345703125, -0.0078125, 0.1798095703125, 0.367431640625, 0.5550537109375, 0.74267578125, 0.9302978515625, 1.117919921875, 1.3055419921875, 1.4931640625, 1.6807861328125, 1.868408203125, 2.0560302734375, 2.24365234375, 2.4312744140625, 2.618896484375, 2.8065185546875, 2.994140625, 3.1817626953125, 3.369384765625, 3.5570068359375, 3.74462890625, 3.9322509765625, 4.119873046875, 4.3074951171875, 4.4951171875, 4.6827392578125, 4.870361328125, 5.0579833984375, 5.24560546875, 5.4332275390625, 5.620849609375, 5.8084716796875, 5.99609375]}, "gradients/decoder.transformer.h.23.attn.c_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 4.0, 8.0, 3.0, 9.0, 3.0, 18.0, 27.0, 25.0, 43.0, 56.0, 82.0, 116.0, 123.0, 254.0, 315.0, 468.0, 659.0, 1012.0, 1552.0, 2386.0, 4162.0, 7265.0, 13891.0, 39581.0, 411770.0, 487690.0, 43170.0, 14710.0, 7457.0, 4322.0, 2549.0, 1562.0, 975.0, 712.0, 480.0, 306.0, 238.0, 166.0, 111.0, 72.0, 65.0, 47.0, 22.0, 27.0, 21.0, 13.0, 3.0, 9.0, 2.0, 3.0, 1.0, 2.0, 2.0, 0.0, 1.0, 1.0], "bins": [-29.828125, -28.925048828125, -28.02197265625, -27.118896484375, -26.2158203125, -25.312744140625, -24.40966796875, -23.506591796875, -22.603515625, -21.700439453125, -20.79736328125, -19.894287109375, -18.9912109375, -18.088134765625, -17.18505859375, -16.281982421875, -15.37890625, -14.475830078125, -13.57275390625, -12.669677734375, -11.7666015625, -10.863525390625, -9.96044921875, -9.057373046875, -8.154296875, -7.251220703125, -6.34814453125, -5.445068359375, -4.5419921875, -3.638916015625, -2.73583984375, -1.832763671875, -0.9296875, -0.026611328125, 0.87646484375, 1.779541015625, 2.6826171875, 3.585693359375, 4.48876953125, 5.391845703125, 6.294921875, 7.197998046875, 8.10107421875, 9.004150390625, 9.9072265625, 10.810302734375, 11.71337890625, 12.616455078125, 13.51953125, 14.422607421875, 15.32568359375, 16.228759765625, 17.1318359375, 18.034912109375, 18.93798828125, 19.841064453125, 20.744140625, 21.647216796875, 22.55029296875, 23.453369140625, 24.3564453125, 25.259521484375, 26.16259765625, 27.065673828125, 27.96875]}, "gradients/decoder.transformer.h.23.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 3.0, 1.0, 2.0, 1.0, 3.0, 3.0, 2.0, 2.0, 7.0, 3.0, 7.0, 10.0, 8.0, 15.0, 12.0, 14.0, 21.0, 17.0, 19.0, 34.0, 38.0, 31.0, 58.0, 53.0, 62.0, 67.0, 124.0, 263.0, 1451.0, 200.0, 95.0, 62.0, 50.0, 52.0, 24.0, 32.0, 30.0, 30.0, 22.0, 20.0, 21.0, 22.0, 14.0, 10.0, 14.0, 9.0, 9.0, 7.0, 3.0, 1.0, 3.0, 2.0, 3.0, 1.0, 1.0], "bins": [-19.875, -19.3426513671875, -18.810302734375, -18.2779541015625, -17.74560546875, -17.2132568359375, -16.680908203125, -16.1485595703125, -15.6162109375, -15.0838623046875, -14.551513671875, -14.0191650390625, -13.48681640625, -12.9544677734375, -12.422119140625, -11.8897705078125, -11.357421875, -10.8250732421875, -10.292724609375, -9.7603759765625, -9.22802734375, -8.6956787109375, -8.163330078125, -7.6309814453125, -7.0986328125, -6.5662841796875, -6.033935546875, -5.5015869140625, -4.96923828125, -4.4368896484375, -3.904541015625, -3.3721923828125, -2.83984375, -2.3074951171875, -1.775146484375, -1.2427978515625, -0.71044921875, -0.1781005859375, 0.354248046875, 0.8865966796875, 1.4189453125, 1.9512939453125, 2.483642578125, 3.0159912109375, 3.54833984375, 4.0806884765625, 4.613037109375, 5.1453857421875, 5.677734375, 6.2100830078125, 6.742431640625, 7.2747802734375, 7.80712890625, 8.3394775390625, 8.871826171875, 9.4041748046875, 9.9365234375, 10.4688720703125, 11.001220703125, 11.5335693359375, 12.06591796875, 12.5982666015625, 13.130615234375, 13.6629638671875, 14.1953125]}, "gradients/decoder.transformer.h.23.attn.c_attn.weight": {"_type": "histogram", "values": [2.0, 1.0, 3.0, 2.0, 3.0, 6.0, 8.0, 8.0, 8.0, 11.0, 10.0, 14.0, 17.0, 11.0, 22.0, 23.0, 36.0, 25.0, 46.0, 46.0, 73.0, 127.0, 197.0, 734.0, 5952.0, 3027636.0, 108154.0, 1522.0, 417.0, 163.0, 117.0, 65.0, 45.0, 37.0, 34.0, 15.0, 20.0, 18.0, 15.0, 9.0, 10.0, 10.0, 10.0, 11.0, 9.0, 4.0, 3.0, 1.0, 2.0, 3.0, 2.0, 1.0, 2.0, 1.0, 1.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-58.875, -56.5888671875, -54.302734375, -52.0166015625, -49.73046875, -47.4443359375, -45.158203125, -42.8720703125, -40.5859375, -38.2998046875, -36.013671875, -33.7275390625, -31.44140625, -29.1552734375, -26.869140625, -24.5830078125, -22.296875, -20.0107421875, -17.724609375, -15.4384765625, -13.15234375, -10.8662109375, -8.580078125, -6.2939453125, -4.0078125, -1.7216796875, 0.564453125, 2.8505859375, 5.13671875, 7.4228515625, 9.708984375, 11.9951171875, 14.28125, 16.5673828125, 18.853515625, 21.1396484375, 23.42578125, 25.7119140625, 27.998046875, 30.2841796875, 32.5703125, 34.8564453125, 37.142578125, 39.4287109375, 41.71484375, 44.0009765625, 46.287109375, 48.5732421875, 50.859375, 53.1455078125, 55.431640625, 57.7177734375, 60.00390625, 62.2900390625, 64.576171875, 66.8623046875, 69.1484375, 71.4345703125, 73.720703125, 76.0068359375, 78.29296875, 80.5791015625, 82.865234375, 85.1513671875, 87.4375]}, "gradients/decoder.transformer.h.23.ln_1.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0, 1.0, 117.0, 804.0, 88.0, 6.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-164.1995391845703, -158.19998168945312, -152.20042419433594, -146.20086669921875, -140.20130920410156, -134.20175170898438, -128.20220947265625, -122.20264434814453, -116.20308685302734, -110.20352935791016, -104.20397186279297, -98.20442199707031, -92.20486450195312, -86.20530700683594, -80.20574951171875, -74.20619201660156, -68.20663452148438, -62.20707702636719, -56.20751953125, -50.20796585083008, -44.20840835571289, -38.2088508605957, -32.20929718017578, -26.209739685058594, -20.210182189941406, -14.210625648498535, -8.211069107055664, -2.2115135192871094, 3.788043975830078, 9.787601470947266, 15.787155151367188, 21.786712646484375, 27.7862548828125, 33.78581237792969, 39.785369873046875, 45.7849235534668, 51.784481048583984, 57.78403854370117, 63.783592224121094, 69.78314971923828, 75.78270721435547, 81.78226470947266, 87.78182220458984, 93.7813720703125, 99.78092956542969, 105.78048706054688, 111.78004455566406, 117.77960205078125, 123.77915954589844, 129.77871704101562, 135.7782745361328, 141.77783203125, 147.7773895263672, 153.77694702148438, 159.7764892578125, 165.77606201171875, 171.77560424804688, 177.77516174316406, 183.77471923828125, 189.77427673339844, 195.77383422851562, 201.7733917236328, 207.77294921875, 213.77249145507812, 219.77206420898438]}, "gradients/decoder.transformer.h.23.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 1.0, 2.0, 3.0, 2.0, 6.0, 5.0, 7.0, 8.0, 11.0, 15.0, 14.0, 14.0, 27.0, 25.0, 24.0, 20.0, 30.0, 32.0, 30.0, 38.0, 24.0, 44.0, 44.0, 47.0, 50.0, 57.0, 52.0, 42.0, 48.0, 33.0, 28.0, 30.0, 30.0, 30.0, 28.0, 25.0, 24.0, 17.0, 10.0, 12.0, 7.0, 6.0, 5.0, 1.0, 5.0, 2.0, 1.0, 2.0, 2.0], "bins": [-68.90167999267578, -67.07246398925781, -65.24324798583984, -63.41402816772461, -61.584808349609375, -59.755592346191406, -57.92637634277344, -56.09716033935547, -54.267940521240234, -52.438724517822266, -50.60950469970703, -48.78028869628906, -46.951072692871094, -45.12185287475586, -43.29263687133789, -41.463417053222656, -39.63420104980469, -37.80498504638672, -35.975765228271484, -34.146549224853516, -32.31732940673828, -30.488113403320312, -28.658897399902344, -26.829679489135742, -25.00046157836914, -23.17124366760254, -21.342025756835938, -19.51280975341797, -17.683591842651367, -15.854373931884766, -14.02515697479248, -12.195940017700195, -10.366722106933594, -8.537504196166992, -6.708287239074707, -4.879069805145264, -3.0498523712158203, -1.2206344604492188, 0.6085824966430664, 2.4377994537353516, 4.267017364501953, 6.0962347984313965, 7.92545223236084, 9.754669189453125, 11.583887100219727, 13.413105010986328, 15.242321968078613, 17.0715389251709, 18.9007568359375, 20.7299747467041, 22.559192657470703, 24.388408660888672, 26.217626571655273, 28.046844482421875, 29.876060485839844, 31.705278396606445, 33.53449630737305, 35.363712310791016, 37.19293212890625, 39.02214813232422, 40.85136413574219, 42.68058395385742, 44.50979995727539, 46.339019775390625, 48.168235778808594]}, "gradients/decoder.transformer.h.22.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 4.0, 2.0, 2.0, 6.0, 7.0, 8.0, 14.0, 9.0, 18.0, 19.0, 15.0, 22.0, 12.0, 17.0, 27.0, 39.0, 30.0, 25.0, 37.0, 39.0, 47.0, 49.0, 46.0, 43.0, 39.0, 46.0, 48.0, 45.0, 33.0, 41.0, 27.0, 37.0, 26.0, 28.0, 14.0, 16.0, 13.0, 11.0, 12.0, 13.0, 5.0, 7.0, 4.0, 3.0, 4.0, 3.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0, 1.0], "bins": [-5.8359375, -5.64849853515625, -5.4610595703125, -5.27362060546875, -5.086181640625, -4.89874267578125, -4.7113037109375, -4.52386474609375, -4.33642578125, -4.14898681640625, -3.9615478515625, -3.77410888671875, -3.586669921875, -3.39923095703125, -3.2117919921875, -3.02435302734375, -2.8369140625, -2.64947509765625, -2.4620361328125, -2.27459716796875, -2.087158203125, -1.89971923828125, -1.7122802734375, -1.52484130859375, -1.33740234375, -1.14996337890625, -0.9625244140625, -0.77508544921875, -0.587646484375, -0.40020751953125, -0.2127685546875, -0.02532958984375, 0.162109375, 0.34954833984375, 0.5369873046875, 0.72442626953125, 0.911865234375, 1.09930419921875, 1.2867431640625, 1.47418212890625, 1.66162109375, 1.84906005859375, 2.0364990234375, 2.22393798828125, 2.411376953125, 2.59881591796875, 2.7862548828125, 2.97369384765625, 3.1611328125, 3.34857177734375, 3.5360107421875, 3.72344970703125, 3.910888671875, 4.09832763671875, 4.2857666015625, 4.47320556640625, 4.66064453125, 4.84808349609375, 5.0355224609375, 5.22296142578125, 5.410400390625, 5.59783935546875, 5.7852783203125, 5.97271728515625, 6.16015625]}, "gradients/decoder.transformer.h.22.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 1.0, 3.0, 0.0, 0.0, 5.0, 1.0, 7.0, 11.0, 17.0, 22.0, 28.0, 18.0, 51.0, 61.0, 71.0, 153.0, 202.0, 311.0, 511.0, 983.0, 2141.0, 5165.0, 16270.0, 80013.0, 1317085.0, 2604971.0, 132096.0, 22108.0, 6581.0, 2589.0, 1198.0, 608.0, 335.0, 202.0, 142.0, 95.0, 62.0, 53.0, 32.0, 18.0, 20.0, 18.0, 10.0, 7.0, 7.0, 5.0, 5.0, 3.0, 1.0, 2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-30.03125, -29.0, -27.96875, -26.9375, -25.90625, -24.875, -23.84375, -22.8125, -21.78125, -20.75, -19.71875, -18.6875, -17.65625, -16.625, -15.59375, -14.5625, -13.53125, -12.5, -11.46875, -10.4375, -9.40625, -8.375, -7.34375, -6.3125, -5.28125, -4.25, -3.21875, -2.1875, -1.15625, -0.125, 0.90625, 1.9375, 2.96875, 4.0, 5.03125, 6.0625, 7.09375, 8.125, 9.15625, 10.1875, 11.21875, 12.25, 13.28125, 14.3125, 15.34375, 16.375, 17.40625, 18.4375, 19.46875, 20.5, 21.53125, 22.5625, 23.59375, 24.625, 25.65625, 26.6875, 27.71875, 28.75, 29.78125, 30.8125, 31.84375, 32.875, 33.90625, 34.9375, 35.96875]}, "gradients/decoder.transformer.h.22.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 1.0, 2.0, 3.0, 5.0, 5.0, 7.0, 16.0, 17.0, 20.0, 41.0, 41.0, 66.0, 116.0, 156.0, 234.0, 361.0, 473.0, 688.0, 579.0, 353.0, 291.0, 181.0, 113.0, 113.0, 61.0, 44.0, 31.0, 21.0, 15.0, 15.0, 11.0, 2.0, 3.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-35.21875, -34.283203125, -33.34765625, -32.412109375, -31.4765625, -30.541015625, -29.60546875, -28.669921875, -27.734375, -26.798828125, -25.86328125, -24.927734375, -23.9921875, -23.056640625, -22.12109375, -21.185546875, -20.25, -19.314453125, -18.37890625, -17.443359375, -16.5078125, -15.572265625, -14.63671875, -13.701171875, -12.765625, -11.830078125, -10.89453125, -9.958984375, -9.0234375, -8.087890625, -7.15234375, -6.216796875, -5.28125, -4.345703125, -3.41015625, -2.474609375, -1.5390625, -0.603515625, 0.33203125, 1.267578125, 2.203125, 3.138671875, 4.07421875, 5.009765625, 5.9453125, 6.880859375, 7.81640625, 8.751953125, 9.6875, 10.623046875, 11.55859375, 12.494140625, 13.4296875, 14.365234375, 15.30078125, 16.236328125, 17.171875, 18.107421875, 19.04296875, 19.978515625, 20.9140625, 21.849609375, 22.78515625, 23.720703125, 24.65625]}, "gradients/decoder.transformer.h.22.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 2.0, 2.0, 4.0, 9.0, 8.0, 11.0, 21.0, 26.0, 27.0, 57.0, 95.0, 122.0, 221.0, 578.0, 3514.0, 1549447.0, 2634858.0, 4157.0, 596.0, 189.0, 126.0, 74.0, 42.0, 37.0, 23.0, 14.0, 12.0, 9.0, 3.0, 1.0, 4.0, 3.0, 1.0, 2.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-96.4375, -92.0595703125, -87.681640625, -83.3037109375, -78.92578125, -74.5478515625, -70.169921875, -65.7919921875, -61.4140625, -57.0361328125, -52.658203125, -48.2802734375, -43.90234375, -39.5244140625, -35.146484375, -30.7685546875, -26.390625, -22.0126953125, -17.634765625, -13.2568359375, -8.87890625, -4.5009765625, -0.123046875, 4.2548828125, 8.6328125, 13.0107421875, 17.388671875, 21.7666015625, 26.14453125, 30.5224609375, 34.900390625, 39.2783203125, 43.65625, 48.0341796875, 52.412109375, 56.7900390625, 61.16796875, 65.5458984375, 69.923828125, 74.3017578125, 78.6796875, 83.0576171875, 87.435546875, 91.8134765625, 96.19140625, 100.5693359375, 104.947265625, 109.3251953125, 113.703125, 118.0810546875, 122.458984375, 126.8369140625, 131.21484375, 135.5927734375, 139.970703125, 144.3486328125, 148.7265625, 153.1044921875, 157.482421875, 161.8603515625, 166.23828125, 170.6162109375, 174.994140625, 179.3720703125, 183.75]}, "gradients/decoder.transformer.h.22.ln_2.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 21.0, 404.0, 560.0, 31.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-138.68788146972656, -123.40946197509766, -108.13104248046875, -92.85263061523438, -77.57421112060547, -62.29579162597656, -47.01737976074219, -31.73896026611328, -16.460540771484375, -1.1821231842041016, 14.096294403076172, 29.374710083007812, 44.65312957763672, 59.931549072265625, 75.2099609375, 90.4883804321289, 105.76679992675781, 121.04521942138672, 136.32363891601562, 151.60205078125, 166.88046264648438, 182.1588897705078, 197.4373016357422, 212.71572875976562, 227.994140625, 243.27255249023438, 258.55096435546875, 273.82940673828125, 289.1078186035156, 304.38623046875, 319.6646423339844, 334.94305419921875, 350.22149658203125, 365.4999084472656, 380.7783203125, 396.0567626953125, 411.3351745605469, 426.61358642578125, 441.8919982910156, 457.17041015625, 472.4488525390625, 487.7272644042969, 503.00567626953125, 518.2841186523438, 533.5625, 548.8409423828125, 564.119384765625, 579.3977661132812, 594.6761474609375, 609.95458984375, 625.2329711914062, 640.5114135742188, 655.789794921875, 671.0682373046875, 686.3466796875, 701.6250610351562, 716.9035034179688, 732.1819458007812, 747.4603271484375, 762.73876953125, 778.0171508789062, 793.2955932617188, 808.573974609375, 823.8524169921875, 839.130859375]}, "gradients/decoder.transformer.h.22.ln_2.bias": {"_type": "histogram", "values": [2.0, 1.0, 3.0, 5.0, 4.0, 6.0, 4.0, 7.0, 6.0, 13.0, 12.0, 13.0, 11.0, 7.0, 24.0, 26.0, 20.0, 28.0, 25.0, 27.0, 27.0, 45.0, 46.0, 43.0, 38.0, 41.0, 43.0, 44.0, 36.0, 42.0, 33.0, 32.0, 35.0, 32.0, 34.0, 20.0, 32.0, 29.0, 17.0, 12.0, 23.0, 15.0, 8.0, 9.0, 15.0, 6.0, 6.0, 4.0, 3.0, 1.0, 3.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-63.233245849609375, -60.78960418701172, -58.3459587097168, -55.902313232421875, -53.45867156982422, -51.01502990722656, -48.57138442993164, -46.12773895263672, -43.68409729003906, -41.240455627441406, -38.796810150146484, -36.35316467285156, -33.909523010253906, -31.465879440307617, -29.022235870361328, -26.57859230041504, -24.13494873046875, -21.69130516052246, -19.247661590576172, -16.804018020629883, -14.360374450683594, -11.916730880737305, -9.473087310791016, -7.029443740844727, -4.5858001708984375, -2.1421566009521484, 0.3014869689941406, 2.7451305389404297, 5.188774108886719, 7.632417678833008, 10.076061248779297, 12.519704818725586, 14.963340759277344, 17.406984329223633, 19.850627899169922, 22.29427146911621, 24.7379150390625, 27.18155860900879, 29.625202178955078, 32.06884765625, 34.512489318847656, 36.95613098144531, 39.399776458740234, 41.843421936035156, 44.28706359863281, 46.73070526123047, 49.17435073852539, 51.61799621582031, 54.06163787841797, 56.505279541015625, 58.94892501831055, 61.39257049560547, 63.836212158203125, 66.27985382080078, 68.72349548339844, 71.16714477539062, 73.61078643798828, 76.05442810058594, 78.49807739257812, 80.94171905517578, 83.38536071777344, 85.8290023803711, 88.27264404296875, 90.71629333496094, 93.1599349975586]}, "gradients/decoder.transformer.h.22.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 2.0, 2.0, 2.0, 2.0, 5.0, 3.0, 8.0, 7.0, 6.0, 11.0, 14.0, 18.0, 12.0, 21.0, 18.0, 20.0, 21.0, 19.0, 39.0, 37.0, 33.0, 43.0, 37.0, 34.0, 50.0, 38.0, 47.0, 32.0, 49.0, 50.0, 43.0, 47.0, 26.0, 19.0, 33.0, 23.0, 26.0, 19.0, 19.0, 19.0, 10.0, 15.0, 15.0, 4.0, 7.0, 4.0, 2.0, 2.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-5.38671875, -5.20037841796875, -5.0140380859375, -4.82769775390625, -4.641357421875, -4.45501708984375, -4.2686767578125, -4.08233642578125, -3.89599609375, -3.70965576171875, -3.5233154296875, -3.33697509765625, -3.150634765625, -2.96429443359375, -2.7779541015625, -2.59161376953125, -2.4052734375, -2.21893310546875, -2.0325927734375, -1.84625244140625, -1.659912109375, -1.47357177734375, -1.2872314453125, -1.10089111328125, -0.91455078125, -0.72821044921875, -0.5418701171875, -0.35552978515625, -0.169189453125, 0.01715087890625, 0.2034912109375, 0.38983154296875, 0.576171875, 0.76251220703125, 0.9488525390625, 1.13519287109375, 1.321533203125, 1.50787353515625, 1.6942138671875, 1.88055419921875, 2.06689453125, 2.25323486328125, 2.4395751953125, 2.62591552734375, 2.812255859375, 2.99859619140625, 3.1849365234375, 3.37127685546875, 3.5576171875, 3.74395751953125, 3.9302978515625, 4.11663818359375, 4.302978515625, 4.48931884765625, 4.6756591796875, 4.86199951171875, 5.04833984375, 5.23468017578125, 5.4210205078125, 5.60736083984375, 5.793701171875, 5.98004150390625, 6.1663818359375, 6.35272216796875, 6.5390625]}, "gradients/decoder.transformer.h.22.crossattention.c_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 3.0, 0.0, 1.0, 3.0, 3.0, 6.0, 7.0, 14.0, 18.0, 22.0, 49.0, 58.0, 106.0, 144.0, 250.0, 429.0, 702.0, 1194.0, 2019.0, 3436.0, 6085.0, 10724.0, 19196.0, 35926.0, 69395.0, 140188.0, 266508.0, 239260.0, 121918.0, 59653.0, 31569.0, 17081.0, 9604.0, 5370.0, 3041.0, 1878.0, 1086.0, 631.0, 359.0, 213.0, 148.0, 92.0, 67.0, 44.0, 27.0, 14.0, 11.0, 5.0, 8.0, 2.0, 2.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.83935546875, -0.8103103637695312, -0.7812652587890625, -0.7522201538085938, -0.723175048828125, -0.6941299438476562, -0.6650848388671875, -0.6360397338867188, -0.60699462890625, -0.5779495239257812, -0.5489044189453125, -0.5198593139648438, -0.490814208984375, -0.46176910400390625, -0.4327239990234375, -0.40367889404296875, -0.3746337890625, -0.34558868408203125, -0.3165435791015625, -0.28749847412109375, -0.258453369140625, -0.22940826416015625, -0.2003631591796875, -0.17131805419921875, -0.14227294921875, -0.11322784423828125, -0.0841827392578125, -0.05513763427734375, -0.026092529296875, 0.00295257568359375, 0.0319976806640625, 0.06104278564453125, 0.090087890625, 0.11913299560546875, 0.1481781005859375, 0.17722320556640625, 0.206268310546875, 0.23531341552734375, 0.2643585205078125, 0.29340362548828125, 0.32244873046875, 0.35149383544921875, 0.3805389404296875, 0.40958404541015625, 0.438629150390625, 0.46767425537109375, 0.4967193603515625, 0.5257644653320312, 0.5548095703125, 0.5838546752929688, 0.6128997802734375, 0.6419448852539062, 0.670989990234375, 0.7000350952148438, 0.7290802001953125, 0.7581253051757812, 0.78717041015625, 0.8162155151367188, 0.8452606201171875, 0.8743057250976562, 0.903350830078125, 0.9323959350585938, 0.9614410400390625, 0.9904861450195312, 1.01953125]}, "gradients/decoder.transformer.h.22.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 3.0, 1.0, 2.0, 9.0, 9.0, 6.0, 6.0, 10.0, 11.0, 11.0, 15.0, 16.0, 22.0, 19.0, 24.0, 37.0, 30.0, 34.0, 35.0, 36.0, 41.0, 47.0, 45.0, 40.0, 1067.0, 35.0, 44.0, 32.0, 33.0, 30.0, 37.0, 34.0, 29.0, 25.0, 29.0, 13.0, 20.0, 24.0, 12.0, 13.0, 12.0, 10.0, 4.0, 7.0, 6.0, 3.0, 2.0, 6.0, 1.0, 3.0, 1.0, 2.0, 1.0, 0.0, 0.0, 1.0], "bins": [-3.712890625, -3.59832763671875, -3.4837646484375, -3.36920166015625, -3.254638671875, -3.14007568359375, -3.0255126953125, -2.91094970703125, -2.79638671875, -2.68182373046875, -2.5672607421875, -2.45269775390625, -2.338134765625, -2.22357177734375, -2.1090087890625, -1.99444580078125, -1.8798828125, -1.76531982421875, -1.6507568359375, -1.53619384765625, -1.421630859375, -1.30706787109375, -1.1925048828125, -1.07794189453125, -0.96337890625, -0.84881591796875, -0.7342529296875, -0.61968994140625, -0.505126953125, -0.39056396484375, -0.2760009765625, -0.16143798828125, -0.046875, 0.06768798828125, 0.1822509765625, 0.29681396484375, 0.411376953125, 0.52593994140625, 0.6405029296875, 0.75506591796875, 0.86962890625, 0.98419189453125, 1.0987548828125, 1.21331787109375, 1.327880859375, 1.44244384765625, 1.5570068359375, 1.67156982421875, 1.7861328125, 1.90069580078125, 2.0152587890625, 2.12982177734375, 2.244384765625, 2.35894775390625, 2.4735107421875, 2.58807373046875, 2.70263671875, 2.81719970703125, 2.9317626953125, 3.04632568359375, 3.160888671875, 3.27545166015625, 3.3900146484375, 3.50457763671875, 3.619140625]}, "gradients/decoder.transformer.h.22.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 3.0, 3.0, 7.0, 13.0, 13.0, 24.0, 34.0, 50.0, 84.0, 126.0, 176.0, 247.0, 314.0, 463.0, 687.0, 1034.0, 1365.0, 2007.0, 3059.0, 4324.0, 6586.0, 9809.0, 14620.0, 21944.0, 33136.0, 50023.0, 75197.0, 110131.0, 149738.0, 1201783.0, 131336.0, 92319.0, 61921.0, 41293.0, 27412.0, 18146.0, 12210.0, 8093.0, 5433.0, 3718.0, 2583.0, 1777.0, 1202.0, 855.0, 583.0, 400.0, 272.0, 177.0, 130.0, 100.0, 65.0, 41.0, 29.0, 19.0, 10.0, 17.0, 4.0, 4.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.39599609375, -0.383331298828125, -0.37066650390625, -0.358001708984375, -0.3453369140625, -0.332672119140625, -0.32000732421875, -0.307342529296875, -0.294677734375, -0.282012939453125, -0.26934814453125, -0.256683349609375, -0.2440185546875, -0.231353759765625, -0.21868896484375, -0.206024169921875, -0.193359375, -0.180694580078125, -0.16802978515625, -0.155364990234375, -0.1427001953125, -0.130035400390625, -0.11737060546875, -0.104705810546875, -0.092041015625, -0.079376220703125, -0.06671142578125, -0.054046630859375, -0.0413818359375, -0.028717041015625, -0.01605224609375, -0.003387451171875, 0.00927734375, 0.021942138671875, 0.03460693359375, 0.047271728515625, 0.0599365234375, 0.072601318359375, 0.08526611328125, 0.097930908203125, 0.110595703125, 0.123260498046875, 0.13592529296875, 0.148590087890625, 0.1612548828125, 0.173919677734375, 0.18658447265625, 0.199249267578125, 0.2119140625, 0.224578857421875, 0.23724365234375, 0.249908447265625, 0.2625732421875, 0.275238037109375, 0.28790283203125, 0.300567626953125, 0.313232421875, 0.325897216796875, 0.33856201171875, 0.351226806640625, 0.3638916015625, 0.376556396484375, 0.38922119140625, 0.401885986328125, 0.41455078125]}, "gradients/decoder.transformer.h.22.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 2.0, 4.0, 2.0, 5.0, 4.0, 9.0, 10.0, 11.0, 18.0, 15.0, 16.0, 22.0, 29.0, 26.0, 36.0, 50.0, 62.0, 55.0, 78.0, 78.0, 89.0, 70.0, 69.0, 42.0, 52.0, 26.0, 27.0, 18.0, 20.0, 20.0, 12.0, 8.0, 6.0, 10.0, 3.0, 5.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.004657745361328125, -0.004519641399383545, -0.004381537437438965, -0.004243433475494385, -0.004105329513549805, -0.003967225551605225, -0.0038291215896606445, -0.0036910176277160645, -0.0035529136657714844, -0.0034148097038269043, -0.0032767057418823242, -0.003138601779937744, -0.003000497817993164, -0.002862393856048584, -0.002724289894104004, -0.002586185932159424, -0.0024480819702148438, -0.0023099780082702637, -0.0021718740463256836, -0.0020337700843811035, -0.0018956661224365234, -0.0017575621604919434, -0.0016194581985473633, -0.0014813542366027832, -0.0013432502746582031, -0.001205146312713623, -0.001067042350769043, -0.0009289383888244629, -0.0007908344268798828, -0.0006527304649353027, -0.0005146265029907227, -0.0003765225410461426, -0.0002384185791015625, -0.00010031461715698242, 3.7789344787597656e-05, 0.00017589330673217773, 0.0003139972686767578, 0.0004521012306213379, 0.000590205192565918, 0.000728309154510498, 0.0008664131164550781, 0.0010045170783996582, 0.0011426210403442383, 0.0012807250022888184, 0.0014188289642333984, 0.0015569329261779785, 0.0016950368881225586, 0.0018331408500671387, 0.0019712448120117188, 0.002109348773956299, 0.002247452735900879, 0.002385556697845459, 0.002523660659790039, 0.002661764621734619, 0.0027998685836791992, 0.0029379725456237793, 0.0030760765075683594, 0.0032141804695129395, 0.0033522844314575195, 0.0034903883934020996, 0.0036284923553466797, 0.0037665963172912598, 0.00390470027923584, 0.00404280424118042, 0.004180908203125]}, "gradients/decoder.transformer.h.22.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 0.0, 1.0, 3.0, 4.0, 1.0, 4.0, 5.0, 9.0, 9.0, 8.0, 11.0, 13.0, 28.0, 23.0, 41.0, 47.0, 62.0, 73.0, 124.0, 215.0, 387.0, 1179.0, 997044.0, 47758.0, 645.0, 286.0, 153.0, 105.0, 85.0, 46.0, 42.0, 27.0, 23.0, 25.0, 12.0, 14.0, 10.0, 8.0, 8.0, 13.0, 2.0, 5.0, 4.0, 3.0, 4.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.0906982421875, -0.08765506744384766, -0.08461189270019531, -0.08156871795654297, -0.07852554321289062, -0.07548236846923828, -0.07243919372558594, -0.0693960189819336, -0.06635284423828125, -0.0633096694946289, -0.06026649475097656, -0.05722332000732422, -0.054180145263671875, -0.05113697052001953, -0.04809379577636719, -0.045050621032714844, -0.0420074462890625, -0.038964271545410156, -0.03592109680175781, -0.03287792205810547, -0.029834747314453125, -0.02679157257080078, -0.023748397827148438, -0.020705223083496094, -0.01766204833984375, -0.014618873596191406, -0.011575698852539062, -0.008532524108886719, -0.005489349365234375, -0.0024461746215820312, 0.0005970001220703125, 0.0036401748657226562, 0.006683349609375, 0.009726524353027344, 0.012769699096679688, 0.01581287384033203, 0.018856048583984375, 0.02189922332763672, 0.024942398071289062, 0.027985572814941406, 0.03102874755859375, 0.034071922302246094, 0.03711509704589844, 0.04015827178955078, 0.043201446533203125, 0.04624462127685547, 0.04928779602050781, 0.052330970764160156, 0.0553741455078125, 0.058417320251464844, 0.06146049499511719, 0.06450366973876953, 0.06754684448242188, 0.07059001922607422, 0.07363319396972656, 0.0766763687133789, 0.07971954345703125, 0.0827627182006836, 0.08580589294433594, 0.08884906768798828, 0.09189224243164062, 0.09493541717529297, 0.09797859191894531, 0.10102176666259766, 0.10406494140625]}, "gradients/decoder.transformer.h.22.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 342.0, 675.0, 1.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0869641825556755, -0.08535484969615936, -0.08374552428722382, -0.08213619142770767, -0.08052685856819153, -0.07891753315925598, -0.07730820029973984, -0.0756988674402237, -0.07408954203128815, -0.072480209171772, -0.07087088376283646, -0.06926155090332031, -0.06765221804380417, -0.06604289263486862, -0.06443355977535248, -0.06282422691583633, -0.06121489778161049, -0.059605568647384644, -0.0579962357878685, -0.056386906653642654, -0.05477757751941681, -0.053168244659900665, -0.05155891552567482, -0.049949586391448975, -0.04834025353193283, -0.046730924397706985, -0.04512159153819084, -0.043512262403964996, -0.04190293326973915, -0.040293604135513306, -0.03868427127599716, -0.037074942141771317, -0.03546561300754547, -0.033856283873319626, -0.03224695101380348, -0.030637621879577637, -0.02902829274535179, -0.027418961748480797, -0.025809630751609802, -0.024200301617383957, -0.022590970620512962, -0.020981639623641968, -0.019372310489416122, -0.017762979492545128, -0.016153648495674133, -0.014544319361448288, -0.012934988364577293, -0.011325658299028873, -0.009716328233480453, -0.008106998167932034, -0.006497667636722326, -0.004888337105512619, -0.003279007039964199, -0.0016696769744157791, -6.0345977544784546e-05, 0.0015489840880036354, 0.0031583141535520554, 0.004767644219100475, 0.006376974750310183, 0.00798630528151989, 0.00959563534706831, 0.01120496541261673, 0.012814296409487724, 0.014423626475036144, 0.016032956540584564]}, "gradients/decoder.transformer.h.22.ln_cross_attn.bias": {"_type": "histogram", "values": [2.0, 3.0, 1.0, 2.0, 4.0, 2.0, 3.0, 4.0, 7.0, 8.0, 14.0, 19.0, 15.0, 23.0, 27.0, 32.0, 25.0, 43.0, 26.0, 59.0, 38.0, 61.0, 60.0, 46.0, 44.0, 49.0, 51.0, 57.0, 43.0, 34.0, 37.0, 42.0, 24.0, 24.0, 25.0, 17.0, 12.0, 8.0, 10.0, 5.0, 3.0, 5.0, 4.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0019191503524780273, -0.0018393006175756454, -0.0017594508826732635, -0.0016796011477708817, -0.0015997514128684998, -0.0015199016779661179, -0.001440051943063736, -0.001360202208161354, -0.0012803524732589722, -0.0012005027383565903, -0.0011206530034542084, -0.0010408032685518265, -0.0009609535336494446, -0.0008811037987470627, -0.0008012540638446808, -0.0007214043289422989, -0.000641554594039917, -0.0005617048591375351, -0.0004818551242351532, -0.0004020053893327713, -0.0003221556544303894, -0.0002423059195280075, -0.0001624561846256256, -8.260644972324371e-05, -2.7567148208618164e-06, 7.709302008152008e-05, 0.00015694275498390198, 0.00023679248988628387, 0.00031664222478866577, 0.00039649195969104767, 0.00047634169459342957, 0.0005561914294958115, 0.0006360411643981934, 0.0007158908993005753, 0.0007957406342029572, 0.000875590369105339, 0.000955440104007721, 0.0010352898389101028, 0.0011151395738124847, 0.0011949893087148666, 0.0012748390436172485, 0.0013546887785196304, 0.0014345385134220123, 0.0015143882483243942, 0.0015942379832267761, 0.001674087718129158, 0.00175393745303154, 0.0018337871879339218, 0.0019136369228363037, 0.0019934866577386856, 0.0020733363926410675, 0.0021531861275434494, 0.0022330358624458313, 0.002312885597348213, 0.002392735332250595, 0.002472585067152977, 0.002552434802055359, 0.002632284536957741, 0.0027121342718601227, 0.0027919840067625046, 0.0028718337416648865, 0.0029516834765672684, 0.0030315332114696503, 0.003111382946372032, 0.003191232681274414]}, "gradients/decoder.transformer.h.22.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 2.0, 2.0, 2.0, 2.0, 5.0, 3.0, 8.0, 7.0, 6.0, 11.0, 14.0, 18.0, 12.0, 21.0, 18.0, 20.0, 21.0, 19.0, 39.0, 38.0, 32.0, 43.0, 37.0, 34.0, 50.0, 38.0, 47.0, 32.0, 49.0, 50.0, 43.0, 47.0, 26.0, 19.0, 33.0, 23.0, 26.0, 19.0, 19.0, 19.0, 10.0, 15.0, 15.0, 4.0, 7.0, 4.0, 2.0, 1.0, 2.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-5.38671875, -5.20037841796875, -5.0140380859375, -4.82769775390625, -4.641357421875, -4.45501708984375, -4.2686767578125, -4.08233642578125, -3.89599609375, -3.70965576171875, -3.5233154296875, -3.33697509765625, -3.150634765625, -2.96429443359375, -2.7779541015625, -2.59161376953125, -2.4052734375, -2.21893310546875, -2.0325927734375, -1.84625244140625, -1.659912109375, -1.47357177734375, -1.2872314453125, -1.10089111328125, -0.91455078125, -0.72821044921875, -0.5418701171875, -0.35552978515625, -0.169189453125, 0.01715087890625, 0.2034912109375, 0.38983154296875, 0.576171875, 0.76251220703125, 0.9488525390625, 1.13519287109375, 1.321533203125, 1.50787353515625, 1.6942138671875, 1.88055419921875, 2.06689453125, 2.25323486328125, 2.4395751953125, 2.62591552734375, 2.812255859375, 2.99859619140625, 3.1849365234375, 3.37127685546875, 3.5576171875, 3.74395751953125, 3.9302978515625, 4.11663818359375, 4.302978515625, 4.48931884765625, 4.6756591796875, 4.86199951171875, 5.04833984375, 5.23468017578125, 5.4210205078125, 5.60736083984375, 5.793701171875, 5.98004150390625, 6.1663818359375, 6.35272216796875, 6.5390625]}, "gradients/decoder.transformer.h.22.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 0.0, 4.0, 1.0, 4.0, 3.0, 9.0, 10.0, 16.0, 13.0, 28.0, 35.0, 53.0, 73.0, 75.0, 130.0, 175.0, 234.0, 387.0, 608.0, 1024.0, 2140.0, 4943.0, 12995.0, 41319.0, 154431.0, 498681.0, 238970.0, 60972.0, 18542.0, 6543.0, 2739.0, 1309.0, 733.0, 420.0, 283.0, 191.0, 152.0, 78.0, 81.0, 38.0, 35.0, 28.0, 18.0, 17.0, 6.0, 1.0, 10.0, 2.0, 6.0, 1.0, 2.0, 2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.72265625, -5.52294921875, -5.3232421875, -5.12353515625, -4.923828125, -4.72412109375, -4.5244140625, -4.32470703125, -4.125, -3.92529296875, -3.7255859375, -3.52587890625, -3.326171875, -3.12646484375, -2.9267578125, -2.72705078125, -2.52734375, -2.32763671875, -2.1279296875, -1.92822265625, -1.728515625, -1.52880859375, -1.3291015625, -1.12939453125, -0.9296875, -0.72998046875, -0.5302734375, -0.33056640625, -0.130859375, 0.06884765625, 0.2685546875, 0.46826171875, 0.66796875, 0.86767578125, 1.0673828125, 1.26708984375, 1.466796875, 1.66650390625, 1.8662109375, 2.06591796875, 2.265625, 2.46533203125, 2.6650390625, 2.86474609375, 3.064453125, 3.26416015625, 3.4638671875, 3.66357421875, 3.86328125, 4.06298828125, 4.2626953125, 4.46240234375, 4.662109375, 4.86181640625, 5.0615234375, 5.26123046875, 5.4609375, 5.66064453125, 5.8603515625, 6.06005859375, 6.259765625, 6.45947265625, 6.6591796875, 6.85888671875, 7.05859375]}, "gradients/decoder.transformer.h.22.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 1.0, 2.0, 4.0, 0.0, 8.0, 7.0, 5.0, 13.0, 10.0, 13.0, 14.0, 15.0, 16.0, 26.0, 31.0, 38.0, 37.0, 37.0, 35.0, 39.0, 58.0, 58.0, 107.0, 1599.0, 361.0, 112.0, 57.0, 49.0, 31.0, 45.0, 34.0, 37.0, 20.0, 19.0, 22.0, 17.0, 18.0, 16.0, 6.0, 12.0, 13.0, 5.0, 5.0, 5.0, 2.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0], "bins": [-22.96875, -22.269775390625, -21.57080078125, -20.871826171875, -20.1728515625, -19.473876953125, -18.77490234375, -18.075927734375, -17.376953125, -16.677978515625, -15.97900390625, -15.280029296875, -14.5810546875, -13.882080078125, -13.18310546875, -12.484130859375, -11.78515625, -11.086181640625, -10.38720703125, -9.688232421875, -8.9892578125, -8.290283203125, -7.59130859375, -6.892333984375, -6.193359375, -5.494384765625, -4.79541015625, -4.096435546875, -3.3974609375, -2.698486328125, -1.99951171875, -1.300537109375, -0.6015625, 0.097412109375, 0.79638671875, 1.495361328125, 2.1943359375, 2.893310546875, 3.59228515625, 4.291259765625, 4.990234375, 5.689208984375, 6.38818359375, 7.087158203125, 7.7861328125, 8.485107421875, 9.18408203125, 9.883056640625, 10.58203125, 11.281005859375, 11.97998046875, 12.678955078125, 13.3779296875, 14.076904296875, 14.77587890625, 15.474853515625, 16.173828125, 16.872802734375, 17.57177734375, 18.270751953125, 18.9697265625, 19.668701171875, 20.36767578125, 21.066650390625, 21.765625]}, "gradients/decoder.transformer.h.22.attn.c_attn.weight": {"_type": "histogram", "values": [3.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 2.0, 4.0, 5.0, 6.0, 8.0, 11.0, 8.0, 10.0, 17.0, 23.0, 20.0, 21.0, 17.0, 48.0, 43.0, 57.0, 80.0, 130.0, 255.0, 673.0, 3172.0, 2897352.0, 240679.0, 1871.0, 467.0, 224.0, 103.0, 86.0, 69.0, 46.0, 39.0, 40.0, 18.0, 21.0, 17.0, 12.0, 11.0, 13.0, 10.0, 8.0, 5.0, 5.0, 2.0, 2.0, 4.0, 1.0, 1.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-47.375, -45.7890625, -44.203125, -42.6171875, -41.03125, -39.4453125, -37.859375, -36.2734375, -34.6875, -33.1015625, -31.515625, -29.9296875, -28.34375, -26.7578125, -25.171875, -23.5859375, -22.0, -20.4140625, -18.828125, -17.2421875, -15.65625, -14.0703125, -12.484375, -10.8984375, -9.3125, -7.7265625, -6.140625, -4.5546875, -2.96875, -1.3828125, 0.203125, 1.7890625, 3.375, 4.9609375, 6.546875, 8.1328125, 9.71875, 11.3046875, 12.890625, 14.4765625, 16.0625, 17.6484375, 19.234375, 20.8203125, 22.40625, 23.9921875, 25.578125, 27.1640625, 28.75, 30.3359375, 31.921875, 33.5078125, 35.09375, 36.6796875, 38.265625, 39.8515625, 41.4375, 43.0234375, 44.609375, 46.1953125, 47.78125, 49.3671875, 50.953125, 52.5390625, 54.125]}, "gradients/decoder.transformer.h.22.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 7.0, 141.0, 827.0, 39.0, 3.0], "bins": [-274.7508850097656, -270.2724609375, -265.7940673828125, -261.3156433105469, -256.83721923828125, -252.35882568359375, -247.88040161132812, -243.40199279785156, -238.923583984375, -234.44517517089844, -229.96676635742188, -225.48834228515625, -221.0099334716797, -216.53152465820312, -212.0531005859375, -207.57469177246094, -203.09628295898438, -198.6178741455078, -194.13946533203125, -189.66104125976562, -185.18263244628906, -180.7042236328125, -176.22579956054688, -171.7473907470703, -167.26898193359375, -162.7905731201172, -158.31216430664062, -153.833740234375, -149.35533142089844, -144.87692260742188, -140.39849853515625, -135.9200897216797, -131.44168090820312, -126.96327209472656, -122.48485565185547, -118.00643920898438, -113.52803039550781, -109.04962158203125, -104.57120513916016, -100.09278869628906, -95.61438751220703, -91.13597106933594, -86.65756225585938, -82.17915344238281, -77.70073699951172, -73.22232055664062, -68.74391174316406, -64.2655029296875, -59.787086486816406, -55.30867385864258, -50.83026123046875, -46.35184860229492, -41.873435974121094, -37.395023345947266, -32.91661071777344, -28.43819808959961, -23.95978546142578, -19.481372833251953, -15.002960205078125, -10.524547576904297, -6.046134948730469, -1.5677223205566406, 2.9106903076171875, 7.389102935791016, 11.86751651763916]}, "gradients/decoder.transformer.h.22.ln_1.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 3.0, 2.0, 1.0, 0.0, 1.0, 2.0, 3.0, 7.0, 3.0, 6.0, 5.0, 10.0, 7.0, 12.0, 25.0, 20.0, 31.0, 24.0, 26.0, 28.0, 29.0, 31.0, 27.0, 36.0, 34.0, 51.0, 32.0, 48.0, 32.0, 40.0, 46.0, 45.0, 33.0, 40.0, 35.0, 33.0, 27.0, 29.0, 26.0, 22.0, 13.0, 16.0, 12.0, 13.0, 5.0, 4.0, 10.0, 5.0, 8.0, 2.0, 4.0, 5.0, 1.0, 2.0, 4.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0], "bins": [-63.05537033081055, -61.02915954589844, -59.002952575683594, -56.976741790771484, -54.95053482055664, -52.92432403564453, -50.89811706542969, -48.87190628051758, -46.84569549560547, -44.81948471069336, -42.793277740478516, -40.767066955566406, -38.74085998535156, -36.71464920043945, -34.688438415527344, -32.6622314453125, -30.636024475097656, -28.60981559753418, -26.583606719970703, -24.557395935058594, -22.53118896484375, -20.50497817993164, -18.478769302368164, -16.452560424804688, -14.426351547241211, -12.400142669677734, -10.373933792114258, -8.347723960876465, -6.321515083312988, -4.295306205749512, -2.2690963745117188, -0.2428874969482422, 1.7833251953125, 3.8095343112945557, 5.835743427276611, 7.861952781677246, 9.888161659240723, 11.9143705368042, 13.940580368041992, 15.966789245605469, 17.992998123168945, 20.019207000732422, 22.0454158782959, 24.071624755859375, 26.097835540771484, 28.124042510986328, 30.150253295898438, 32.17646026611328, 34.20267105102539, 36.2288818359375, 38.255088806152344, 40.28129959106445, 42.3075065612793, 44.333717346191406, 46.35992431640625, 48.38613510131836, 50.41234588623047, 52.43855667114258, 54.46476364135742, 56.49097442626953, 58.517181396484375, 60.543392181396484, 62.569602966308594, 64.59580993652344, 66.62201690673828]}, "gradients/decoder.transformer.h.21.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 3.0, 4.0, 1.0, 6.0, 3.0, 6.0, 7.0, 7.0, 13.0, 15.0, 10.0, 16.0, 20.0, 17.0, 23.0, 18.0, 23.0, 31.0, 40.0, 40.0, 32.0, 40.0, 35.0, 45.0, 41.0, 44.0, 44.0, 52.0, 45.0, 40.0, 40.0, 41.0, 25.0, 24.0, 29.0, 18.0, 21.0, 17.0, 20.0, 14.0, 14.0, 10.0, 10.0, 5.0, 1.0, 0.0, 3.0, 0.0, 2.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-5.51953125, -5.328857421875, -5.13818359375, -4.947509765625, -4.7568359375, -4.566162109375, -4.37548828125, -4.184814453125, -3.994140625, -3.803466796875, -3.61279296875, -3.422119140625, -3.2314453125, -3.040771484375, -2.85009765625, -2.659423828125, -2.46875, -2.278076171875, -2.08740234375, -1.896728515625, -1.7060546875, -1.515380859375, -1.32470703125, -1.134033203125, -0.943359375, -0.752685546875, -0.56201171875, -0.371337890625, -0.1806640625, 0.010009765625, 0.20068359375, 0.391357421875, 0.58203125, 0.772705078125, 0.96337890625, 1.154052734375, 1.3447265625, 1.535400390625, 1.72607421875, 1.916748046875, 2.107421875, 2.298095703125, 2.48876953125, 2.679443359375, 2.8701171875, 3.060791015625, 3.25146484375, 3.442138671875, 3.6328125, 3.823486328125, 4.01416015625, 4.204833984375, 4.3955078125, 4.586181640625, 4.77685546875, 4.967529296875, 5.158203125, 5.348876953125, 5.53955078125, 5.730224609375, 5.9208984375, 6.111572265625, 6.30224609375, 6.492919921875, 6.68359375]}, "gradients/decoder.transformer.h.21.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 0.0, 2.0, 1.0, 2.0, 4.0, 6.0, 8.0, 8.0, 14.0, 18.0, 33.0, 22.0, 50.0, 52.0, 61.0, 95.0, 125.0, 275.0, 521.0, 1484.0, 5504.0, 32809.0, 371509.0, 3270178.0, 464894.0, 37625.0, 6182.0, 1520.0, 523.0, 233.0, 154.0, 93.0, 71.0, 54.0, 45.0, 34.0, 20.0, 11.0, 16.0, 12.0, 8.0, 2.0, 4.0, 8.0, 3.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-25.359375, -24.55517578125, -23.7509765625, -22.94677734375, -22.142578125, -21.33837890625, -20.5341796875, -19.72998046875, -18.92578125, -18.12158203125, -17.3173828125, -16.51318359375, -15.708984375, -14.90478515625, -14.1005859375, -13.29638671875, -12.4921875, -11.68798828125, -10.8837890625, -10.07958984375, -9.275390625, -8.47119140625, -7.6669921875, -6.86279296875, -6.05859375, -5.25439453125, -4.4501953125, -3.64599609375, -2.841796875, -2.03759765625, -1.2333984375, -0.42919921875, 0.375, 1.17919921875, 1.9833984375, 2.78759765625, 3.591796875, 4.39599609375, 5.2001953125, 6.00439453125, 6.80859375, 7.61279296875, 8.4169921875, 9.22119140625, 10.025390625, 10.82958984375, 11.6337890625, 12.43798828125, 13.2421875, 14.04638671875, 14.8505859375, 15.65478515625, 16.458984375, 17.26318359375, 18.0673828125, 18.87158203125, 19.67578125, 20.47998046875, 21.2841796875, 22.08837890625, 22.892578125, 23.69677734375, 24.5009765625, 25.30517578125, 26.109375]}, "gradients/decoder.transformer.h.21.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 3.0, 1.0, 2.0, 4.0, 12.0, 21.0, 34.0, 46.0, 76.0, 135.0, 235.0, 331.0, 531.0, 719.0, 707.0, 451.0, 310.0, 200.0, 112.0, 56.0, 42.0, 25.0, 17.0, 11.0, 6.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-45.1875, -44.131103515625, -43.07470703125, -42.018310546875, -40.9619140625, -39.905517578125, -38.84912109375, -37.792724609375, -36.736328125, -35.679931640625, -34.62353515625, -33.567138671875, -32.5107421875, -31.454345703125, -30.39794921875, -29.341552734375, -28.28515625, -27.228759765625, -26.17236328125, -25.115966796875, -24.0595703125, -23.003173828125, -21.94677734375, -20.890380859375, -19.833984375, -18.777587890625, -17.72119140625, -16.664794921875, -15.6083984375, -14.552001953125, -13.49560546875, -12.439208984375, -11.3828125, -10.326416015625, -9.27001953125, -8.213623046875, -7.1572265625, -6.100830078125, -5.04443359375, -3.988037109375, -2.931640625, -1.875244140625, -0.81884765625, 0.237548828125, 1.2939453125, 2.350341796875, 3.40673828125, 4.463134765625, 5.51953125, 6.575927734375, 7.63232421875, 8.688720703125, 9.7451171875, 10.801513671875, 11.85791015625, 12.914306640625, 13.970703125, 15.027099609375, 16.08349609375, 17.139892578125, 18.1962890625, 19.252685546875, 20.30908203125, 21.365478515625, 22.421875]}, "gradients/decoder.transformer.h.21.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 2.0, 5.0, 4.0, 9.0, 7.0, 14.0, 36.0, 62.0, 83.0, 170.0, 317.0, 917.0, 7901.0, 805732.0, 3356956.0, 19961.0, 1342.0, 353.0, 173.0, 101.0, 47.0, 36.0, 25.0, 14.0, 5.0, 7.0, 6.0, 1.0, 4.0, 0.0, 1.0, 2.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-71.375, -68.53515625, -65.6953125, -62.85546875, -60.015625, -57.17578125, -54.3359375, -51.49609375, -48.65625, -45.81640625, -42.9765625, -40.13671875, -37.296875, -34.45703125, -31.6171875, -28.77734375, -25.9375, -23.09765625, -20.2578125, -17.41796875, -14.578125, -11.73828125, -8.8984375, -6.05859375, -3.21875, -0.37890625, 2.4609375, 5.30078125, 8.140625, 10.98046875, 13.8203125, 16.66015625, 19.5, 22.33984375, 25.1796875, 28.01953125, 30.859375, 33.69921875, 36.5390625, 39.37890625, 42.21875, 45.05859375, 47.8984375, 50.73828125, 53.578125, 56.41796875, 59.2578125, 62.09765625, 64.9375, 67.77734375, 70.6171875, 73.45703125, 76.296875, 79.13671875, 81.9765625, 84.81640625, 87.65625, 90.49609375, 93.3359375, 96.17578125, 99.015625, 101.85546875, 104.6953125, 107.53515625, 110.375]}, "gradients/decoder.transformer.h.21.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 4.0, 18.0, 73.0, 235.0, 365.0, 234.0, 72.0, 13.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-317.4837951660156, -309.6406555175781, -301.79754638671875, -293.95440673828125, -286.11126708984375, -278.2681579589844, -270.4250183105469, -262.5818786621094, -254.73875427246094, -246.8956298828125, -239.052490234375, -231.20936584472656, -223.36624145507812, -215.52310180664062, -207.6799774169922, -199.83685302734375, -191.99371337890625, -184.1505889892578, -176.3074493408203, -168.46432495117188, -160.62120056152344, -152.77806091308594, -144.9349365234375, -137.091796875, -129.24868774414062, -121.40555572509766, -113.56243133544922, -105.71929931640625, -97.87616729736328, -90.03303527832031, -82.18991088867188, -74.3467788696289, -66.50364685058594, -58.660518646240234, -50.817386627197266, -42.97425842285156, -35.131126403808594, -27.28799819946289, -19.444869995117188, -11.601737976074219, -3.7586097717285156, 4.084519863128662, 11.92764949798584, 19.77077865600586, 27.613908767700195, 35.45703887939453, 43.300167083740234, 51.1432991027832, 58.986427307128906, 66.82955932617188, 74.67268371582031, 82.51581573486328, 90.35894775390625, 98.20207214355469, 106.04520416259766, 113.88833618164062, 121.73146057128906, 129.5745849609375, 137.417724609375, 145.26084899902344, 153.10397338867188, 160.94711303710938, 168.7902374267578, 176.63336181640625, 184.47650146484375]}, "gradients/decoder.transformer.h.21.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 1.0, 0.0, 3.0, 3.0, 1.0, 5.0, 1.0, 6.0, 7.0, 6.0, 9.0, 16.0, 13.0, 22.0, 19.0, 16.0, 25.0, 29.0, 28.0, 30.0, 30.0, 28.0, 46.0, 39.0, 40.0, 48.0, 38.0, 35.0, 31.0, 42.0, 44.0, 46.0, 33.0, 34.0, 25.0, 31.0, 22.0, 28.0, 22.0, 27.0, 21.0, 19.0, 15.0, 11.0, 4.0, 7.0, 3.0, 3.0, 2.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-68.70602416992188, -66.41228485107422, -64.11854553222656, -61.82481002807617, -59.531070709228516, -57.237335205078125, -54.94359588623047, -52.64985656738281, -50.356117248535156, -48.0623779296875, -45.76864242553711, -43.47490310668945, -41.1811637878418, -38.887428283691406, -36.59368896484375, -34.299949645996094, -32.0062141418457, -29.71247673034668, -27.418737411499023, -25.125, -22.831260681152344, -20.53752326965332, -18.243785858154297, -15.95004653930664, -13.656309127807617, -11.362570762634277, -9.068832397460938, -6.775094985961914, -4.481356620788574, -2.1876182556152344, 0.10611915588378906, 2.3998584747314453, 4.693595886230469, 6.987334251403809, 9.281072616577148, 11.574810028076172, 13.868548393249512, 16.16228675842285, 18.456024169921875, 20.74976348876953, 23.043500900268555, 25.337238311767578, 27.630977630615234, 29.924715042114258, 32.21845245361328, 34.51219177246094, 36.805931091308594, 39.09967041015625, 41.39340591430664, 43.6871452331543, 45.98088073730469, 48.274620056152344, 50.568359375, 52.862098693847656, 55.15583419799805, 57.4495735168457, 59.743309020996094, 62.03704833984375, 64.3307876586914, 66.62452697753906, 68.91825866699219, 71.21199798583984, 73.5057373046875, 75.79947662353516, 78.09321594238281]}, "gradients/decoder.transformer.h.21.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 2.0, 3.0, 1.0, 5.0, 0.0, 6.0, 6.0, 12.0, 14.0, 16.0, 10.0, 15.0, 15.0, 14.0, 12.0, 23.0, 35.0, 25.0, 32.0, 30.0, 42.0, 42.0, 46.0, 52.0, 41.0, 46.0, 41.0, 38.0, 52.0, 44.0, 46.0, 38.0, 28.0, 30.0, 22.0, 26.0, 20.0, 18.0, 15.0, 14.0, 9.0, 12.0, 7.0, 3.0, 4.0, 2.0, 0.0, 2.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-5.609375, -5.40899658203125, -5.2086181640625, -5.00823974609375, -4.807861328125, -4.60748291015625, -4.4071044921875, -4.20672607421875, -4.00634765625, -3.80596923828125, -3.6055908203125, -3.40521240234375, -3.204833984375, -3.00445556640625, -2.8040771484375, -2.60369873046875, -2.4033203125, -2.20294189453125, -2.0025634765625, -1.80218505859375, -1.601806640625, -1.40142822265625, -1.2010498046875, -1.00067138671875, -0.80029296875, -0.59991455078125, -0.3995361328125, -0.19915771484375, 0.001220703125, 0.20159912109375, 0.4019775390625, 0.60235595703125, 0.802734375, 1.00311279296875, 1.2034912109375, 1.40386962890625, 1.604248046875, 1.80462646484375, 2.0050048828125, 2.20538330078125, 2.40576171875, 2.60614013671875, 2.8065185546875, 3.00689697265625, 3.207275390625, 3.40765380859375, 3.6080322265625, 3.80841064453125, 4.0087890625, 4.20916748046875, 4.4095458984375, 4.60992431640625, 4.810302734375, 5.01068115234375, 5.2110595703125, 5.41143798828125, 5.61181640625, 5.81219482421875, 6.0125732421875, 6.21295166015625, 6.413330078125, 6.61370849609375, 6.8140869140625, 7.01446533203125, 7.21484375]}, "gradients/decoder.transformer.h.21.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 3.0, 2.0, 3.0, 6.0, 4.0, 20.0, 10.0, 29.0, 25.0, 46.0, 71.0, 91.0, 145.0, 221.0, 318.0, 499.0, 807.0, 1091.0, 1829.0, 2724.0, 4343.0, 6831.0, 10671.0, 17200.0, 28875.0, 49129.0, 86825.0, 152615.0, 227966.0, 189024.0, 111035.0, 62339.0, 35959.0, 21624.0, 13127.0, 8323.0, 5243.0, 3288.0, 2137.0, 1413.0, 873.0, 565.0, 423.0, 256.0, 187.0, 114.0, 86.0, 54.0, 29.0, 25.0, 21.0, 11.0, 9.0, 1.0, 4.0, 2.0, 1.0, 1.0, 0.0, 2.0], "bins": [-0.76025390625, -0.7370147705078125, -0.713775634765625, -0.6905364990234375, -0.66729736328125, -0.6440582275390625, -0.620819091796875, -0.5975799560546875, -0.5743408203125, -0.5511016845703125, -0.527862548828125, -0.5046234130859375, -0.48138427734375, -0.4581451416015625, -0.434906005859375, -0.4116668701171875, -0.388427734375, -0.3651885986328125, -0.341949462890625, -0.3187103271484375, -0.29547119140625, -0.2722320556640625, -0.248992919921875, -0.2257537841796875, -0.2025146484375, -0.1792755126953125, -0.156036376953125, -0.1327972412109375, -0.10955810546875, -0.0863189697265625, -0.063079833984375, -0.0398406982421875, -0.0166015625, 0.0066375732421875, 0.029876708984375, 0.0531158447265625, 0.07635498046875, 0.0995941162109375, 0.122833251953125, 0.1460723876953125, 0.1693115234375, 0.1925506591796875, 0.215789794921875, 0.2390289306640625, 0.26226806640625, 0.2855072021484375, 0.308746337890625, 0.3319854736328125, 0.355224609375, 0.3784637451171875, 0.401702880859375, 0.4249420166015625, 0.44818115234375, 0.4714202880859375, 0.494659423828125, 0.5178985595703125, 0.5411376953125, 0.5643768310546875, 0.587615966796875, 0.6108551025390625, 0.63409423828125, 0.6573333740234375, 0.680572509765625, 0.7038116455078125, 0.72705078125]}, "gradients/decoder.transformer.h.21.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 1.0, 1.0, 3.0, 2.0, 1.0, 1.0, 4.0, 8.0, 5.0, 12.0, 9.0, 7.0, 22.0, 16.0, 25.0, 19.0, 20.0, 29.0, 31.0, 24.0, 31.0, 43.0, 41.0, 55.0, 41.0, 1073.0, 41.0, 46.0, 35.0, 37.0, 57.0, 45.0, 35.0, 36.0, 23.0, 32.0, 28.0, 24.0, 14.0, 10.0, 15.0, 7.0, 9.0, 5.0, 0.0, 3.0, 6.0, 5.0, 2.0, 2.0, 0.0, 0.0, 0.0, 2.0], "bins": [-4.84765625, -4.71240234375, -4.5771484375, -4.44189453125, -4.306640625, -4.17138671875, -4.0361328125, -3.90087890625, -3.765625, -3.63037109375, -3.4951171875, -3.35986328125, -3.224609375, -3.08935546875, -2.9541015625, -2.81884765625, -2.68359375, -2.54833984375, -2.4130859375, -2.27783203125, -2.142578125, -2.00732421875, -1.8720703125, -1.73681640625, -1.6015625, -1.46630859375, -1.3310546875, -1.19580078125, -1.060546875, -0.92529296875, -0.7900390625, -0.65478515625, -0.51953125, -0.38427734375, -0.2490234375, -0.11376953125, 0.021484375, 0.15673828125, 0.2919921875, 0.42724609375, 0.5625, 0.69775390625, 0.8330078125, 0.96826171875, 1.103515625, 1.23876953125, 1.3740234375, 1.50927734375, 1.64453125, 1.77978515625, 1.9150390625, 2.05029296875, 2.185546875, 2.32080078125, 2.4560546875, 2.59130859375, 2.7265625, 2.86181640625, 2.9970703125, 3.13232421875, 3.267578125, 3.40283203125, 3.5380859375, 3.67333984375, 3.80859375]}, "gradients/decoder.transformer.h.21.crossattention.c_attn.weight": {"_type": "histogram", "values": [2.0, 1.0, 4.0, 1.0, 5.0, 6.0, 8.0, 11.0, 20.0, 17.0, 30.0, 47.0, 64.0, 111.0, 156.0, 191.0, 344.0, 490.0, 667.0, 1004.0, 1349.0, 2116.0, 3130.0, 5018.0, 7467.0, 11443.0, 17777.0, 27569.0, 42806.0, 67801.0, 105005.0, 149725.0, 1218863.0, 147335.0, 102012.0, 65616.0, 42005.0, 27168.0, 17164.0, 11030.0, 7179.0, 4705.0, 3137.0, 2084.0, 1460.0, 949.0, 700.0, 472.0, 289.0, 190.0, 120.0, 82.0, 62.0, 46.0, 29.0, 23.0, 17.0, 12.0, 7.0, 4.0, 3.0, 1.0, 0.0, 3.0], "bins": [-0.466064453125, -0.4517097473144531, -0.43735504150390625, -0.4230003356933594, -0.4086456298828125, -0.3942909240722656, -0.37993621826171875, -0.3655815124511719, -0.351226806640625, -0.3368721008300781, -0.32251739501953125, -0.3081626892089844, -0.2938079833984375, -0.2794532775878906, -0.26509857177734375, -0.2507438659667969, -0.23638916015625, -0.22203445434570312, -0.20767974853515625, -0.19332504272460938, -0.1789703369140625, -0.16461563110351562, -0.15026092529296875, -0.13590621948242188, -0.121551513671875, -0.10719680786132812, -0.09284210205078125, -0.07848739624023438, -0.0641326904296875, -0.049777984619140625, -0.03542327880859375, -0.021068572998046875, -0.0067138671875, 0.007640838623046875, 0.02199554443359375, 0.036350250244140625, 0.0507049560546875, 0.06505966186523438, 0.07941436767578125, 0.09376907348632812, 0.108123779296875, 0.12247848510742188, 0.13683319091796875, 0.15118789672851562, 0.1655426025390625, 0.17989730834960938, 0.19425201416015625, 0.20860671997070312, 0.22296142578125, 0.23731613159179688, 0.25167083740234375, 0.2660255432128906, 0.2803802490234375, 0.2947349548339844, 0.30908966064453125, 0.3234443664550781, 0.337799072265625, 0.3521537780761719, 0.36650848388671875, 0.3808631896972656, 0.3952178955078125, 0.4095726013183594, 0.42392730712890625, 0.4382820129394531, 0.45263671875]}, "gradients/decoder.transformer.h.21.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 2.0, 2.0, 3.0, 5.0, 5.0, 6.0, 4.0, 5.0, 12.0, 11.0, 13.0, 14.0, 12.0, 24.0, 37.0, 47.0, 58.0, 64.0, 91.0, 96.0, 83.0, 81.0, 77.0, 59.0, 35.0, 33.0, 27.0, 26.0, 16.0, 10.0, 9.0, 8.0, 3.0, 3.0, 6.0, 4.0, 4.0, 3.0, 1.0, 1.0, 5.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.007099151611328125, -0.00690990686416626, -0.0067206621170043945, -0.006531417369842529, -0.006342172622680664, -0.006152927875518799, -0.005963683128356934, -0.005774438381195068, -0.005585193634033203, -0.005395948886871338, -0.005206704139709473, -0.005017459392547607, -0.004828214645385742, -0.004638969898223877, -0.004449725151062012, -0.0042604804039001465, -0.004071235656738281, -0.003881990909576416, -0.0036927461624145508, -0.0035035014152526855, -0.0033142566680908203, -0.003125011920928955, -0.00293576717376709, -0.0027465224266052246, -0.0025572776794433594, -0.002368032932281494, -0.002178788185119629, -0.0019895434379577637, -0.0018002986907958984, -0.0016110539436340332, -0.001421809196472168, -0.0012325644493103027, -0.0010433197021484375, -0.0008540749549865723, -0.000664830207824707, -0.0004755854606628418, -0.00028634071350097656, -9.709596633911133e-05, 9.21487808227539e-05, 0.00028139352798461914, 0.0004706382751464844, 0.0006598830223083496, 0.0008491277694702148, 0.00103837251663208, 0.0012276172637939453, 0.0014168620109558105, 0.0016061067581176758, 0.001795351505279541, 0.0019845962524414062, 0.0021738409996032715, 0.0023630857467651367, 0.002552330493927002, 0.002741575241088867, 0.0029308199882507324, 0.0031200647354125977, 0.003309309482574463, 0.003498554229736328, 0.0036877989768981934, 0.0038770437240600586, 0.004066288471221924, 0.004255533218383789, 0.004444777965545654, 0.0046340227127075195, 0.004823267459869385, 0.00501251220703125]}, "gradients/decoder.transformer.h.21.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 5.0, 3.0, 1.0, 2.0, 5.0, 9.0, 3.0, 5.0, 5.0, 11.0, 25.0, 19.0, 33.0, 41.0, 45.0, 67.0, 123.0, 197.0, 299.0, 805.0, 744363.0, 300888.0, 731.0, 318.0, 171.0, 129.0, 65.0, 43.0, 31.0, 27.0, 18.0, 16.0, 11.0, 12.0, 3.0, 5.0, 6.0, 7.0, 3.0, 4.0, 4.0, 2.0, 2.0, 0.0, 3.0, 2.0, 3.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.10943603515625, -0.1053762435913086, -0.10131645202636719, -0.09725666046142578, -0.09319686889648438, -0.08913707733154297, -0.08507728576660156, -0.08101749420166016, -0.07695770263671875, -0.07289791107177734, -0.06883811950683594, -0.06477832794189453, -0.060718536376953125, -0.05665874481201172, -0.05259895324707031, -0.048539161682128906, -0.0444793701171875, -0.040419578552246094, -0.03635978698730469, -0.03229999542236328, -0.028240203857421875, -0.02418041229248047, -0.020120620727539062, -0.016060829162597656, -0.01200103759765625, -0.007941246032714844, -0.0038814544677734375, 0.00017833709716796875, 0.004238128662109375, 0.008297920227050781, 0.012357711791992188, 0.016417503356933594, 0.020477294921875, 0.024537086486816406, 0.028596878051757812, 0.03265666961669922, 0.036716461181640625, 0.04077625274658203, 0.04483604431152344, 0.048895835876464844, 0.05295562744140625, 0.057015419006347656, 0.06107521057128906, 0.06513500213623047, 0.06919479370117188, 0.07325458526611328, 0.07731437683105469, 0.0813741683959961, 0.0854339599609375, 0.0894937515258789, 0.09355354309082031, 0.09761333465576172, 0.10167312622070312, 0.10573291778564453, 0.10979270935058594, 0.11385250091552734, 0.11791229248046875, 0.12197208404541016, 0.12603187561035156, 0.13009166717529297, 0.13415145874023438, 0.13821125030517578, 0.1422710418701172, 0.1463308334350586, 0.150390625]}, "gradients/decoder.transformer.h.21.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 518.0, 498.0, 2.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.03060906007885933, -0.029217127710580826, -0.027825195342302322, -0.02643326297402382, -0.025041330605745316, -0.023649398237466812, -0.022257467731833458, -0.020865535363554955, -0.01947360299527645, -0.018081670626997948, -0.016689738258719444, -0.015297806821763515, -0.013905874453485012, -0.012513942085206509, -0.01112201064825058, -0.009730078279972076, -0.008338145911693573, -0.00694621354341507, -0.0055542816407978535, -0.004162349738180637, -0.002770417369902134, -0.0013784850016236305, 1.3446435332298279e-05, 0.0014053788036108017, 0.002797311171889305, 0.0041892435401678085, 0.005581175442785025, 0.006973107345402241, 0.008365039713680744, 0.009756972081959248, 0.011148903518915176, 0.01254083588719368, 0.013932771980762482, 0.015324704349040985, 0.01671663671731949, 0.018108569085597992, 0.019500501453876495, 0.020892433822155, 0.022284364327788353, 0.023676296696066856, 0.02506822906434536, 0.026460161432623863, 0.027852093800902367, 0.02924402430653572, 0.030635956674814224, 0.03202788904309273, 0.03341982141137123, 0.034811753779649734, 0.03620368614792824, 0.03759561851620674, 0.038987550884485245, 0.04037948325276375, 0.04177141562104225, 0.043163347989320755, 0.04455527663230896, 0.04594720900058746, 0.04733914136886597, 0.04873107373714447, 0.050123006105422974, 0.05151493847370148, 0.05290687084197998, 0.054298803210258484, 0.05569073557853699, 0.05708266794681549, 0.058474600315093994]}, "gradients/decoder.transformer.h.21.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 4.0, 1.0, 4.0, 10.0, 19.0, 21.0, 22.0, 25.0, 36.0, 52.0, 53.0, 46.0, 71.0, 75.0, 74.0, 70.0, 79.0, 75.0, 63.0, 44.0, 38.0, 31.0, 27.0, 25.0, 11.0, 13.0, 10.0, 5.0, 7.0, 4.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.003817319869995117, -0.003658144734799862, -0.0034989695996046066, -0.0033397944644093513, -0.003180619329214096, -0.003021444194018841, -0.0028622690588235855, -0.0027030939236283302, -0.002543918788433075, -0.0023847436532378197, -0.0022255685180425644, -0.002066393382847309, -0.0019072182476520538, -0.0017480431124567986, -0.0015888679772615433, -0.001429692842066288, -0.0012705177068710327, -0.0011113425716757774, -0.0009521674364805222, -0.0007929923012852669, -0.0006338171660900116, -0.0004746420308947563, -0.00031546689569950104, -0.00015629176050424576, 2.8833746910095215e-06, 0.0001620585098862648, 0.0003212336450815201, 0.00048040878027677536, 0.0006395839154720306, 0.0007987590506672859, 0.0009579341858625412, 0.0011171093210577965, 0.0012762844562530518, 0.001435459591448307, 0.0015946347266435623, 0.0017538098618388176, 0.0019129849970340729, 0.002072160132229328, 0.0022313352674245834, 0.0023905104026198387, 0.002549685537815094, 0.0027088606730103493, 0.0028680358082056046, 0.00302721094340086, 0.003186386078596115, 0.0033455612137913704, 0.0035047363489866257, 0.003663911484181881, 0.0038230866193771362, 0.0039822617545723915, 0.004141436889767647, 0.004300612024962902, 0.004459787160158157, 0.004618962295353413, 0.004778137430548668, 0.004937312565743923, 0.0050964877009391785, 0.005255662836134434, 0.005414837971329689, 0.005574013106524944, 0.0057331882417202, 0.005892363376915455, 0.00605153851211071, 0.006210713647305965, 0.006369888782501221]}, "gradients/decoder.transformer.h.21.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 2.0, 3.0, 1.0, 5.0, 0.0, 6.0, 6.0, 12.0, 14.0, 16.0, 10.0, 15.0, 15.0, 14.0, 12.0, 23.0, 35.0, 25.0, 32.0, 30.0, 42.0, 42.0, 46.0, 52.0, 41.0, 46.0, 41.0, 38.0, 52.0, 44.0, 46.0, 38.0, 28.0, 30.0, 22.0, 26.0, 20.0, 18.0, 15.0, 14.0, 9.0, 12.0, 7.0, 3.0, 4.0, 2.0, 0.0, 2.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-5.609375, -5.40899658203125, -5.2086181640625, -5.00823974609375, -4.807861328125, -4.60748291015625, -4.4071044921875, -4.20672607421875, -4.00634765625, -3.80596923828125, -3.6055908203125, -3.40521240234375, -3.204833984375, -3.00445556640625, -2.8040771484375, -2.60369873046875, -2.4033203125, -2.20294189453125, -2.0025634765625, -1.80218505859375, -1.601806640625, -1.40142822265625, -1.2010498046875, -1.00067138671875, -0.80029296875, -0.59991455078125, -0.3995361328125, -0.19915771484375, 0.001220703125, 0.20159912109375, 0.4019775390625, 0.60235595703125, 0.802734375, 1.00311279296875, 1.2034912109375, 1.40386962890625, 1.604248046875, 1.80462646484375, 2.0050048828125, 2.20538330078125, 2.40576171875, 2.60614013671875, 2.8065185546875, 3.00689697265625, 3.207275390625, 3.40765380859375, 3.6080322265625, 3.80841064453125, 4.0087890625, 4.20916748046875, 4.4095458984375, 4.60992431640625, 4.810302734375, 5.01068115234375, 5.2110595703125, 5.41143798828125, 5.61181640625, 5.81219482421875, 6.0125732421875, 6.21295166015625, 6.413330078125, 6.61370849609375, 6.8140869140625, 7.01446533203125, 7.21484375]}, "gradients/decoder.transformer.h.21.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 2.0, 2.0, 3.0, 1.0, 3.0, 11.0, 3.0, 18.0, 20.0, 27.0, 30.0, 64.0, 61.0, 110.0, 163.0, 209.0, 322.0, 481.0, 652.0, 1016.0, 1472.0, 2172.0, 3611.0, 5713.0, 10487.0, 21862.0, 53988.0, 174399.0, 475187.0, 188554.0, 57360.0, 23016.0, 10785.0, 6053.0, 3733.0, 2279.0, 1475.0, 1027.0, 680.0, 472.0, 317.0, 215.0, 148.0, 111.0, 68.0, 56.0, 38.0, 29.0, 21.0, 13.0, 4.0, 9.0, 3.0, 9.0, 1.0, 3.0, 2.0, 2.0, 0.0, 1.0], "bins": [-5.5234375, -5.3536376953125, -5.183837890625, -5.0140380859375, -4.84423828125, -4.6744384765625, -4.504638671875, -4.3348388671875, -4.1650390625, -3.9952392578125, -3.825439453125, -3.6556396484375, -3.48583984375, -3.3160400390625, -3.146240234375, -2.9764404296875, -2.806640625, -2.6368408203125, -2.467041015625, -2.2972412109375, -2.12744140625, -1.9576416015625, -1.787841796875, -1.6180419921875, -1.4482421875, -1.2784423828125, -1.108642578125, -0.9388427734375, -0.76904296875, -0.5992431640625, -0.429443359375, -0.2596435546875, -0.08984375, 0.0799560546875, 0.249755859375, 0.4195556640625, 0.58935546875, 0.7591552734375, 0.928955078125, 1.0987548828125, 1.2685546875, 1.4383544921875, 1.608154296875, 1.7779541015625, 1.94775390625, 2.1175537109375, 2.287353515625, 2.4571533203125, 2.626953125, 2.7967529296875, 2.966552734375, 3.1363525390625, 3.30615234375, 3.4759521484375, 3.645751953125, 3.8155517578125, 3.9853515625, 4.1551513671875, 4.324951171875, 4.4947509765625, 4.66455078125, 4.8343505859375, 5.004150390625, 5.1739501953125, 5.34375]}, "gradients/decoder.transformer.h.21.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 3.0, 1.0, 1.0, 2.0, 5.0, 3.0, 8.0, 7.0, 5.0, 4.0, 13.0, 9.0, 15.0, 14.0, 12.0, 23.0, 16.0, 28.0, 28.0, 25.0, 38.0, 28.0, 31.0, 39.0, 49.0, 44.0, 58.0, 107.0, 1634.0, 272.0, 76.0, 42.0, 51.0, 38.0, 33.0, 36.0, 43.0, 34.0, 28.0, 26.0, 15.0, 20.0, 17.0, 16.0, 10.0, 9.0, 9.0, 2.0, 7.0, 8.0, 8.0, 6.0, 6.0, 3.0, 2.0, 2.0, 1.0], "bins": [-22.328125, -21.7041015625, -21.080078125, -20.4560546875, -19.83203125, -19.2080078125, -18.583984375, -17.9599609375, -17.3359375, -16.7119140625, -16.087890625, -15.4638671875, -14.83984375, -14.2158203125, -13.591796875, -12.9677734375, -12.34375, -11.7197265625, -11.095703125, -10.4716796875, -9.84765625, -9.2236328125, -8.599609375, -7.9755859375, -7.3515625, -6.7275390625, -6.103515625, -5.4794921875, -4.85546875, -4.2314453125, -3.607421875, -2.9833984375, -2.359375, -1.7353515625, -1.111328125, -0.4873046875, 0.13671875, 0.7607421875, 1.384765625, 2.0087890625, 2.6328125, 3.2568359375, 3.880859375, 4.5048828125, 5.12890625, 5.7529296875, 6.376953125, 7.0009765625, 7.625, 8.2490234375, 8.873046875, 9.4970703125, 10.12109375, 10.7451171875, 11.369140625, 11.9931640625, 12.6171875, 13.2412109375, 13.865234375, 14.4892578125, 15.11328125, 15.7373046875, 16.361328125, 16.9853515625, 17.609375]}, "gradients/decoder.transformer.h.21.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 3.0, 10.0, 5.0, 9.0, 13.0, 5.0, 5.0, 12.0, 12.0, 21.0, 20.0, 26.0, 36.0, 45.0, 68.0, 112.0, 147.0, 232.0, 515.0, 1280.0, 5140.0, 86813.0, 3008965.0, 36580.0, 3505.0, 1018.0, 389.0, 218.0, 130.0, 87.0, 49.0, 50.0, 45.0, 27.0, 20.0, 19.0, 23.0, 15.0, 14.0, 5.0, 5.0, 9.0, 7.0, 4.0, 5.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-39.1875, -37.9775390625, -36.767578125, -35.5576171875, -34.34765625, -33.1376953125, -31.927734375, -30.7177734375, -29.5078125, -28.2978515625, -27.087890625, -25.8779296875, -24.66796875, -23.4580078125, -22.248046875, -21.0380859375, -19.828125, -18.6181640625, -17.408203125, -16.1982421875, -14.98828125, -13.7783203125, -12.568359375, -11.3583984375, -10.1484375, -8.9384765625, -7.728515625, -6.5185546875, -5.30859375, -4.0986328125, -2.888671875, -1.6787109375, -0.46875, 0.7412109375, 1.951171875, 3.1611328125, 4.37109375, 5.5810546875, 6.791015625, 8.0009765625, 9.2109375, 10.4208984375, 11.630859375, 12.8408203125, 14.05078125, 15.2607421875, 16.470703125, 17.6806640625, 18.890625, 20.1005859375, 21.310546875, 22.5205078125, 23.73046875, 24.9404296875, 26.150390625, 27.3603515625, 28.5703125, 29.7802734375, 30.990234375, 32.2001953125, 33.41015625, 34.6201171875, 35.830078125, 37.0400390625, 38.25]}, "gradients/decoder.transformer.h.21.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 4.0, 5.0, 30.0, 209.0, 393.0, 294.0, 64.0, 15.0, 2.0, 1.0, 0.0, 2.0], "bins": [-164.3690948486328, -161.46563720703125, -158.56216430664062, -155.65870666503906, -152.7552490234375, -149.85177612304688, -146.9483184814453, -144.04486083984375, -141.1414031982422, -138.23794555664062, -135.33447265625, -132.43101501464844, -129.52755737304688, -126.62409210205078, -123.72062683105469, -120.81716918945312, -117.9136962890625, -115.0102310180664, -112.10677337646484, -109.20330810546875, -106.29985046386719, -103.3963851928711, -100.492919921875, -97.58946228027344, -94.68600463867188, -91.78253936767578, -88.87908172607422, -85.97561645507812, -83.07215881347656, -80.16869354248047, -77.26522827148438, -74.36177062988281, -71.45829772949219, -68.5548324584961, -65.65137481689453, -62.74790954589844, -59.84444808959961, -56.94098663330078, -54.03752517700195, -51.134063720703125, -48.2306022644043, -45.32714080810547, -42.42367935180664, -39.52021789550781, -36.61675262451172, -33.71329116821289, -30.809829711914062, -27.9063663482666, -25.002904891967773, -22.099443435668945, -19.195980072021484, -16.292518615722656, -13.389056205749512, -10.485593795776367, -7.582132339477539, -4.678668975830078, -1.77520751953125, 1.1282546520233154, 4.031716823577881, 6.935178756713867, 9.838641166687012, 12.742103576660156, 15.645565032958984, 18.549028396606445, 21.452489852905273]}, "gradients/decoder.transformer.h.21.ln_1.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 0.0, 2.0, 2.0, 3.0, 2.0, 2.0, 3.0, 6.0, 7.0, 9.0, 7.0, 7.0, 11.0, 12.0, 13.0, 20.0, 20.0, 32.0, 28.0, 18.0, 32.0, 38.0, 34.0, 33.0, 30.0, 52.0, 44.0, 54.0, 39.0, 45.0, 52.0, 35.0, 34.0, 39.0, 38.0, 32.0, 27.0, 26.0, 20.0, 27.0, 16.0, 9.0, 15.0, 12.0, 6.0, 7.0, 5.0, 3.0, 3.0, 2.0, 2.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-72.67668914794922, -70.34230041503906, -68.0079116821289, -65.67352294921875, -63.339134216308594, -61.00474548339844, -58.67036056518555, -56.33597183227539, -54.001583099365234, -51.66719436645508, -49.33280563354492, -46.998416900634766, -44.664031982421875, -42.32964324951172, -39.99525451660156, -37.660865783691406, -35.32647705078125, -32.992088317871094, -30.657699584960938, -28.323312759399414, -25.988924026489258, -23.6545352935791, -21.320148468017578, -18.985759735107422, -16.651371002197266, -14.31698226928711, -11.98259449005127, -9.64820671081543, -7.313817977905273, -4.979429244995117, -2.6450414657592773, -0.3106536865234375, 2.0237350463867188, 4.358123302459717, 6.692511558532715, 9.026899337768555, 11.361288070678711, 13.695676803588867, 16.03006362915039, 18.364452362060547, 20.698841094970703, 23.03322982788086, 25.367618560791016, 27.70200538635254, 30.036394119262695, 32.37078094482422, 34.705169677734375, 37.03955841064453, 39.37394714355469, 41.708335876464844, 44.042724609375, 46.377113342285156, 48.71150207519531, 51.04589080810547, 53.38027572631836, 55.714664459228516, 58.04905319213867, 60.38344192504883, 62.717830657958984, 65.05221557617188, 67.38660430908203, 69.72099304199219, 72.05538177490234, 74.3897705078125, 76.72415924072266]}, "gradients/decoder.transformer.h.20.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 2.0, 4.0, 1.0, 1.0, 5.0, 8.0, 6.0, 12.0, 11.0, 11.0, 9.0, 15.0, 14.0, 19.0, 19.0, 19.0, 27.0, 27.0, 32.0, 29.0, 41.0, 43.0, 45.0, 45.0, 45.0, 38.0, 54.0, 37.0, 38.0, 48.0, 48.0, 43.0, 37.0, 27.0, 12.0, 35.0, 14.0, 26.0, 13.0, 14.0, 11.0, 6.0, 7.0, 10.0, 3.0, 2.0, 2.0, 2.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-5.77734375, -5.5726318359375, -5.367919921875, -5.1632080078125, -4.95849609375, -4.7537841796875, -4.549072265625, -4.3443603515625, -4.1396484375, -3.9349365234375, -3.730224609375, -3.5255126953125, -3.32080078125, -3.1160888671875, -2.911376953125, -2.7066650390625, -2.501953125, -2.2972412109375, -2.092529296875, -1.8878173828125, -1.68310546875, -1.4783935546875, -1.273681640625, -1.0689697265625, -0.8642578125, -0.6595458984375, -0.454833984375, -0.2501220703125, -0.04541015625, 0.1593017578125, 0.364013671875, 0.5687255859375, 0.7734375, 0.9781494140625, 1.182861328125, 1.3875732421875, 1.59228515625, 1.7969970703125, 2.001708984375, 2.2064208984375, 2.4111328125, 2.6158447265625, 2.820556640625, 3.0252685546875, 3.22998046875, 3.4346923828125, 3.639404296875, 3.8441162109375, 4.048828125, 4.2535400390625, 4.458251953125, 4.6629638671875, 4.86767578125, 5.0723876953125, 5.277099609375, 5.4818115234375, 5.6865234375, 5.8912353515625, 6.095947265625, 6.3006591796875, 6.50537109375, 6.7100830078125, 6.914794921875, 7.1195068359375, 7.32421875]}, "gradients/decoder.transformer.h.20.mlp.c_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 1.0, 4.0, 1.0, 5.0, 2.0, 8.0, 5.0, 8.0, 8.0, 8.0, 19.0, 10.0, 13.0, 12.0, 21.0, 18.0, 33.0, 23.0, 55.0, 63.0, 102.0, 140.0, 360.0, 1224.0, 6391.0, 56401.0, 1578791.0, 2453833.0, 85374.0, 8827.0, 1513.0, 430.0, 183.0, 98.0, 65.0, 49.0, 31.0, 27.0, 26.0, 18.0, 17.0, 16.0, 13.0, 10.0, 7.0, 11.0, 6.0, 5.0, 2.0, 3.0, 5.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-28.875, -27.9150390625, -26.955078125, -25.9951171875, -25.03515625, -24.0751953125, -23.115234375, -22.1552734375, -21.1953125, -20.2353515625, -19.275390625, -18.3154296875, -17.35546875, -16.3955078125, -15.435546875, -14.4755859375, -13.515625, -12.5556640625, -11.595703125, -10.6357421875, -9.67578125, -8.7158203125, -7.755859375, -6.7958984375, -5.8359375, -4.8759765625, -3.916015625, -2.9560546875, -1.99609375, -1.0361328125, -0.076171875, 0.8837890625, 1.84375, 2.8037109375, 3.763671875, 4.7236328125, 5.68359375, 6.6435546875, 7.603515625, 8.5634765625, 9.5234375, 10.4833984375, 11.443359375, 12.4033203125, 13.36328125, 14.3232421875, 15.283203125, 16.2431640625, 17.203125, 18.1630859375, 19.123046875, 20.0830078125, 21.04296875, 22.0029296875, 22.962890625, 23.9228515625, 24.8828125, 25.8427734375, 26.802734375, 27.7626953125, 28.72265625, 29.6826171875, 30.642578125, 31.6025390625, 32.5625]}, "gradients/decoder.transformer.h.20.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 2.0, 3.0, 6.0, 2.0, 10.0, 8.0, 15.0, 38.0, 35.0, 64.0, 78.0, 128.0, 157.0, 248.0, 316.0, 466.0, 581.0, 556.0, 398.0, 294.0, 214.0, 147.0, 112.0, 64.0, 54.0, 29.0, 22.0, 9.0, 8.0, 13.0, 8.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-23.328125, -22.56884765625, -21.8095703125, -21.05029296875, -20.291015625, -19.53173828125, -18.7724609375, -18.01318359375, -17.25390625, -16.49462890625, -15.7353515625, -14.97607421875, -14.216796875, -13.45751953125, -12.6982421875, -11.93896484375, -11.1796875, -10.42041015625, -9.6611328125, -8.90185546875, -8.142578125, -7.38330078125, -6.6240234375, -5.86474609375, -5.10546875, -4.34619140625, -3.5869140625, -2.82763671875, -2.068359375, -1.30908203125, -0.5498046875, 0.20947265625, 0.96875, 1.72802734375, 2.4873046875, 3.24658203125, 4.005859375, 4.76513671875, 5.5244140625, 6.28369140625, 7.04296875, 7.80224609375, 8.5615234375, 9.32080078125, 10.080078125, 10.83935546875, 11.5986328125, 12.35791015625, 13.1171875, 13.87646484375, 14.6357421875, 15.39501953125, 16.154296875, 16.91357421875, 17.6728515625, 18.43212890625, 19.19140625, 19.95068359375, 20.7099609375, 21.46923828125, 22.228515625, 22.98779296875, 23.7470703125, 24.50634765625, 25.265625]}, "gradients/decoder.transformer.h.20.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 3.0, 1.0, 5.0, 12.0, 12.0, 16.0, 29.0, 55.0, 84.0, 126.0, 253.0, 551.0, 3183.0, 325979.0, 3843746.0, 18476.0, 973.0, 372.0, 183.0, 97.0, 63.0, 34.0, 16.0, 9.0, 6.0, 5.0, 6.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-112.625, -109.3359375, -106.046875, -102.7578125, -99.46875, -96.1796875, -92.890625, -89.6015625, -86.3125, -83.0234375, -79.734375, -76.4453125, -73.15625, -69.8671875, -66.578125, -63.2890625, -60.0, -56.7109375, -53.421875, -50.1328125, -46.84375, -43.5546875, -40.265625, -36.9765625, -33.6875, -30.3984375, -27.109375, -23.8203125, -20.53125, -17.2421875, -13.953125, -10.6640625, -7.375, -4.0859375, -0.796875, 2.4921875, 5.78125, 9.0703125, 12.359375, 15.6484375, 18.9375, 22.2265625, 25.515625, 28.8046875, 32.09375, 35.3828125, 38.671875, 41.9609375, 45.25, 48.5390625, 51.828125, 55.1171875, 58.40625, 61.6953125, 64.984375, 68.2734375, 71.5625, 74.8515625, 78.140625, 81.4296875, 84.71875, 88.0078125, 91.296875, 94.5859375, 97.875]}, "gradients/decoder.transformer.h.20.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 3.0, 5.0, 5.0, 14.0, 23.0, 26.0, 40.0, 52.0, 56.0, 71.0, 89.0, 99.0, 110.0, 85.0, 83.0, 70.0, 61.0, 35.0, 31.0, 15.0, 10.0, 12.0, 7.0, 6.0, 1.0, 1.0, 2.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0], "bins": [-84.47265625, -82.25282287597656, -80.03298950195312, -77.81315612792969, -75.59332275390625, -73.37348937988281, -71.15365600585938, -68.93382263183594, -66.7139892578125, -64.49415588378906, -62.274322509765625, -60.05448913574219, -57.83465576171875, -55.61482238769531, -53.394989013671875, -51.17515563964844, -48.955322265625, -46.73548889160156, -44.515655517578125, -42.29582214355469, -40.07598876953125, -37.85615539550781, -35.636322021484375, -33.41648864746094, -31.1966552734375, -28.976821899414062, -26.756988525390625, -24.537155151367188, -22.31732177734375, -20.097488403320312, -17.877655029296875, -15.657821655273438, -13.43798828125, -11.218154907226562, -8.998321533203125, -6.7784881591796875, -4.55865478515625, -2.3388214111328125, -0.118988037109375, 2.1008453369140625, 4.3206787109375, 6.5405120849609375, 8.760345458984375, 10.980178833007812, 13.20001220703125, 15.419845581054688, 17.639678955078125, 19.859512329101562, 22.079345703125, 24.299179077148438, 26.519012451171875, 28.738845825195312, 30.95867919921875, 33.17851257324219, 35.398345947265625, 37.61817932128906, 39.8380126953125, 42.05784606933594, 44.277679443359375, 46.49751281738281, 48.71734619140625, 50.93717956542969, 53.157012939453125, 55.37684631347656, 57.5966796875]}, "gradients/decoder.transformer.h.20.ln_2.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 3.0, 4.0, 3.0, 5.0, 4.0, 6.0, 11.0, 11.0, 11.0, 9.0, 18.0, 18.0, 30.0, 19.0, 41.0, 32.0, 33.0, 30.0, 37.0, 48.0, 45.0, 36.0, 50.0, 44.0, 38.0, 28.0, 36.0, 44.0, 35.0, 39.0, 25.0, 36.0, 25.0, 24.0, 13.0, 28.0, 21.0, 17.0, 17.0, 6.0, 12.0, 6.0, 5.0, 6.0, 5.0, 1.0, 1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-58.01872253417969, -55.890869140625, -53.76301193237305, -51.63515853881836, -49.507301330566406, -47.37944793701172, -45.25159454345703, -43.123741149902344, -40.99588394165039, -38.8680305480957, -36.74017333984375, -34.61231994628906, -32.484466552734375, -30.356609344482422, -28.228755950927734, -26.100900650024414, -23.973045349121094, -21.845190048217773, -19.717334747314453, -17.589481353759766, -15.461626052856445, -13.333770751953125, -11.205916404724121, -9.078062057495117, -6.950206756591797, -4.822351932525635, -2.6944971084594727, -0.5666422843933105, 1.5612125396728516, 3.689067840576172, 5.816922187805176, 7.94477653503418, 10.0726318359375, 12.20048713684082, 14.328341484069824, 16.456195831298828, 18.58405113220215, 20.71190643310547, 22.839759826660156, 24.967615127563477, 27.095470428466797, 29.223325729370117, 31.351181030273438, 33.479034423828125, 35.60688781738281, 37.734745025634766, 39.86259841918945, 41.990455627441406, 44.118309020996094, 46.24616241455078, 48.374019622802734, 50.50187301635742, 52.629730224609375, 54.75758361816406, 56.88543701171875, 59.01329040527344, 61.14114761352539, 63.26900100708008, 65.39685821533203, 67.52471160888672, 69.6525650024414, 71.78042602539062, 73.90827941894531, 76.0361328125, 78.16398620605469]}, "gradients/decoder.transformer.h.20.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 1.0, 1.0, 3.0, 2.0, 2.0, 1.0, 4.0, 7.0, 11.0, 5.0, 5.0, 13.0, 14.0, 21.0, 12.0, 24.0, 31.0, 23.0, 32.0, 35.0, 25.0, 42.0, 41.0, 43.0, 56.0, 31.0, 46.0, 43.0, 44.0, 49.0, 52.0, 41.0, 30.0, 36.0, 23.0, 24.0, 28.0, 17.0, 21.0, 12.0, 18.0, 9.0, 13.0, 10.0, 5.0, 4.0, 4.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.25390625, -6.04058837890625, -5.8272705078125, -5.61395263671875, -5.400634765625, -5.18731689453125, -4.9739990234375, -4.76068115234375, -4.54736328125, -4.33404541015625, -4.1207275390625, -3.90740966796875, -3.694091796875, -3.48077392578125, -3.2674560546875, -3.05413818359375, -2.8408203125, -2.62750244140625, -2.4141845703125, -2.20086669921875, -1.987548828125, -1.77423095703125, -1.5609130859375, -1.34759521484375, -1.13427734375, -0.92095947265625, -0.7076416015625, -0.49432373046875, -0.281005859375, -0.06768798828125, 0.1456298828125, 0.35894775390625, 0.572265625, 0.78558349609375, 0.9989013671875, 1.21221923828125, 1.425537109375, 1.63885498046875, 1.8521728515625, 2.06549072265625, 2.27880859375, 2.49212646484375, 2.7054443359375, 2.91876220703125, 3.132080078125, 3.34539794921875, 3.5587158203125, 3.77203369140625, 3.9853515625, 4.19866943359375, 4.4119873046875, 4.62530517578125, 4.838623046875, 5.05194091796875, 5.2652587890625, 5.47857666015625, 5.69189453125, 5.90521240234375, 6.1185302734375, 6.33184814453125, 6.545166015625, 6.75848388671875, 6.9718017578125, 7.18511962890625, 7.3984375]}, "gradients/decoder.transformer.h.20.crossattention.c_proj.weight": {"_type": "histogram", "values": [3.0, 2.0, 4.0, 7.0, 7.0, 9.0, 24.0, 24.0, 52.0, 66.0, 93.0, 105.0, 175.0, 219.0, 327.0, 485.0, 704.0, 995.0, 1535.0, 2092.0, 3117.0, 4390.0, 6494.0, 9510.0, 14421.0, 21593.0, 32194.0, 50279.0, 79541.0, 122758.0, 171893.0, 172444.0, 123024.0, 79819.0, 50904.0, 32789.0, 21496.0, 14147.0, 9693.0, 6615.0, 4453.0, 3114.0, 2119.0, 1441.0, 1043.0, 712.0, 522.0, 356.0, 250.0, 146.0, 112.0, 79.0, 60.0, 39.0, 21.0, 20.0, 17.0, 9.0, 6.0, 2.0, 4.0, 0.0, 0.0, 2.0], "bins": [-0.62451171875, -0.6043701171875, -0.584228515625, -0.5640869140625, -0.5439453125, -0.5238037109375, -0.503662109375, -0.4835205078125, -0.46337890625, -0.4432373046875, -0.423095703125, -0.4029541015625, -0.3828125, -0.3626708984375, -0.342529296875, -0.3223876953125, -0.30224609375, -0.2821044921875, -0.261962890625, -0.2418212890625, -0.2216796875, -0.2015380859375, -0.181396484375, -0.1612548828125, -0.14111328125, -0.1209716796875, -0.100830078125, -0.0806884765625, -0.060546875, -0.0404052734375, -0.020263671875, -0.0001220703125, 0.02001953125, 0.0401611328125, 0.060302734375, 0.0804443359375, 0.1005859375, 0.1207275390625, 0.140869140625, 0.1610107421875, 0.18115234375, 0.2012939453125, 0.221435546875, 0.2415771484375, 0.26171875, 0.2818603515625, 0.302001953125, 0.3221435546875, 0.34228515625, 0.3624267578125, 0.382568359375, 0.4027099609375, 0.4228515625, 0.4429931640625, 0.463134765625, 0.4832763671875, 0.50341796875, 0.5235595703125, 0.543701171875, 0.5638427734375, 0.583984375, 0.6041259765625, 0.624267578125, 0.6444091796875, 0.66455078125]}, "gradients/decoder.transformer.h.20.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 2.0, 1.0, 3.0, 2.0, 2.0, 2.0, 2.0, 10.0, 10.0, 9.0, 13.0, 14.0, 12.0, 16.0, 18.0, 23.0, 33.0, 40.0, 24.0, 39.0, 46.0, 39.0, 49.0, 40.0, 55.0, 1069.0, 48.0, 43.0, 42.0, 30.0, 42.0, 32.0, 31.0, 35.0, 30.0, 20.0, 24.0, 22.0, 11.0, 11.0, 11.0, 6.0, 9.0, 3.0, 7.0, 4.0, 4.0, 0.0, 1.0, 0.0, 3.0, 1.0, 0.0, 0.0, 1.0], "bins": [-4.82421875, -4.68267822265625, -4.5411376953125, -4.39959716796875, -4.258056640625, -4.11651611328125, -3.9749755859375, -3.83343505859375, -3.69189453125, -3.55035400390625, -3.4088134765625, -3.26727294921875, -3.125732421875, -2.98419189453125, -2.8426513671875, -2.70111083984375, -2.5595703125, -2.41802978515625, -2.2764892578125, -2.13494873046875, -1.993408203125, -1.85186767578125, -1.7103271484375, -1.56878662109375, -1.42724609375, -1.28570556640625, -1.1441650390625, -1.00262451171875, -0.861083984375, -0.71954345703125, -0.5780029296875, -0.43646240234375, -0.294921875, -0.15338134765625, -0.0118408203125, 0.12969970703125, 0.271240234375, 0.41278076171875, 0.5543212890625, 0.69586181640625, 0.83740234375, 0.97894287109375, 1.1204833984375, 1.26202392578125, 1.403564453125, 1.54510498046875, 1.6866455078125, 1.82818603515625, 1.9697265625, 2.11126708984375, 2.2528076171875, 2.39434814453125, 2.535888671875, 2.67742919921875, 2.8189697265625, 2.96051025390625, 3.10205078125, 3.24359130859375, 3.3851318359375, 3.52667236328125, 3.668212890625, 3.80975341796875, 3.9512939453125, 4.09283447265625, 4.234375]}, "gradients/decoder.transformer.h.20.crossattention.c_attn.weight": {"_type": "histogram", "values": [3.0, 4.0, 3.0, 1.0, 9.0, 10.0, 19.0, 31.0, 28.0, 70.0, 65.0, 129.0, 189.0, 238.0, 362.0, 518.0, 737.0, 1037.0, 1496.0, 2090.0, 3090.0, 4504.0, 6483.0, 9598.0, 14245.0, 20959.0, 31325.0, 45951.0, 69671.0, 99227.0, 134643.0, 1200392.0, 134786.0, 100637.0, 69514.0, 46725.0, 31715.0, 21165.0, 14273.0, 9753.0, 6609.0, 4528.0, 3082.0, 2217.0, 1509.0, 1092.0, 752.0, 528.0, 338.0, 256.0, 177.0, 128.0, 86.0, 54.0, 35.0, 20.0, 15.0, 13.0, 8.0, 1.0, 3.0, 3.0, 0.0, 3.0], "bins": [-0.426513671875, -0.4129791259765625, -0.399444580078125, -0.3859100341796875, -0.37237548828125, -0.3588409423828125, -0.345306396484375, -0.3317718505859375, -0.3182373046875, -0.3047027587890625, -0.291168212890625, -0.2776336669921875, -0.26409912109375, -0.2505645751953125, -0.237030029296875, -0.2234954833984375, -0.2099609375, -0.1964263916015625, -0.182891845703125, -0.1693572998046875, -0.15582275390625, -0.1422882080078125, -0.128753662109375, -0.1152191162109375, -0.1016845703125, -0.0881500244140625, -0.074615478515625, -0.0610809326171875, -0.04754638671875, -0.0340118408203125, -0.020477294921875, -0.0069427490234375, 0.006591796875, 0.0201263427734375, 0.033660888671875, 0.0471954345703125, 0.06072998046875, 0.0742645263671875, 0.087799072265625, 0.1013336181640625, 0.1148681640625, 0.1284027099609375, 0.141937255859375, 0.1554718017578125, 0.16900634765625, 0.1825408935546875, 0.196075439453125, 0.2096099853515625, 0.22314453125, 0.2366790771484375, 0.250213623046875, 0.2637481689453125, 0.27728271484375, 0.2908172607421875, 0.304351806640625, 0.3178863525390625, 0.3314208984375, 0.3449554443359375, 0.358489990234375, 0.3720245361328125, 0.38555908203125, 0.3990936279296875, 0.412628173828125, 0.4261627197265625, 0.439697265625]}, "gradients/decoder.transformer.h.20.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 3.0, 5.0, 3.0, 3.0, 1.0, 6.0, 8.0, 4.0, 7.0, 4.0, 7.0, 11.0, 18.0, 23.0, 21.0, 16.0, 33.0, 44.0, 42.0, 62.0, 53.0, 64.0, 53.0, 59.0, 52.0, 51.0, 51.0, 52.0, 39.0, 37.0, 33.0, 23.0, 16.0, 23.0, 18.0, 12.0, 8.0, 11.0, 10.0, 3.0, 10.0, 3.0, 4.0, 7.0, 0.0, 3.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.0032291412353515625, -0.0031062662601470947, -0.002983391284942627, -0.002860516309738159, -0.0027376413345336914, -0.0026147663593292236, -0.002491891384124756, -0.002369016408920288, -0.0022461414337158203, -0.0021232664585113525, -0.0020003914833068848, -0.001877516508102417, -0.0017546415328979492, -0.0016317665576934814, -0.0015088915824890137, -0.001386016607284546, -0.0012631416320800781, -0.0011402666568756104, -0.0010173916816711426, -0.0008945167064666748, -0.000771641731262207, -0.0006487667560577393, -0.0005258917808532715, -0.0004030168056488037, -0.00028014183044433594, -0.00015726685523986816, -3.439188003540039e-05, 8.848309516906738e-05, 0.00021135807037353516, 0.00033423304557800293, 0.0004571080207824707, 0.0005799829959869385, 0.0007028579711914062, 0.000825732946395874, 0.0009486079216003418, 0.0010714828968048096, 0.0011943578720092773, 0.0013172328472137451, 0.0014401078224182129, 0.0015629827976226807, 0.0016858577728271484, 0.0018087327480316162, 0.001931607723236084, 0.0020544826984405518, 0.0021773576736450195, 0.0023002326488494873, 0.002423107624053955, 0.002545982599258423, 0.0026688575744628906, 0.0027917325496673584, 0.002914607524871826, 0.003037482500076294, 0.0031603574752807617, 0.0032832324504852295, 0.0034061074256896973, 0.003528982400894165, 0.003651857376098633, 0.0037747323513031006, 0.0038976073265075684, 0.004020482301712036, 0.004143357276916504, 0.004266232252120972, 0.0043891072273254395, 0.004511982202529907, 0.004634857177734375]}, "gradients/decoder.transformer.h.20.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 6.0, 4.0, 3.0, 5.0, 13.0, 4.0, 10.0, 10.0, 11.0, 12.0, 21.0, 23.0, 34.0, 47.0, 61.0, 75.0, 112.0, 203.0, 387.0, 897.0, 101873.0, 940781.0, 2701.0, 464.0, 230.0, 163.0, 100.0, 103.0, 45.0, 29.0, 31.0, 28.0, 11.0, 13.0, 8.0, 7.0, 5.0, 6.0, 6.0, 3.0, 2.0, 5.0, 4.0, 2.0, 3.0, 3.0, 1.0, 1.0, 1.0], "bins": [-0.1004638671875, -0.09768867492675781, -0.09491348266601562, -0.09213829040527344, -0.08936309814453125, -0.08658790588378906, -0.08381271362304688, -0.08103752136230469, -0.0782623291015625, -0.07548713684082031, -0.07271194458007812, -0.06993675231933594, -0.06716156005859375, -0.06438636779785156, -0.061611175537109375, -0.05883598327636719, -0.056060791015625, -0.05328559875488281, -0.050510406494140625, -0.04773521423339844, -0.04496002197265625, -0.04218482971191406, -0.039409637451171875, -0.03663444519042969, -0.0338592529296875, -0.031084060668945312, -0.028308868408203125, -0.025533676147460938, -0.02275848388671875, -0.019983291625976562, -0.017208099365234375, -0.014432907104492188, -0.01165771484375, -0.008882522583007812, -0.006107330322265625, -0.0033321380615234375, -0.00055694580078125, 0.0022182464599609375, 0.004993438720703125, 0.0077686309814453125, 0.0105438232421875, 0.013319015502929688, 0.016094207763671875, 0.018869400024414062, 0.02164459228515625, 0.024419784545898438, 0.027194976806640625, 0.029970169067382812, 0.032745361328125, 0.03552055358886719, 0.038295745849609375, 0.04107093811035156, 0.04384613037109375, 0.04662132263183594, 0.049396514892578125, 0.05217170715332031, 0.0549468994140625, 0.05772209167480469, 0.060497283935546875, 0.06327247619628906, 0.06604766845703125, 0.06882286071777344, 0.07159805297851562, 0.07437324523925781, 0.0771484375]}, "gradients/decoder.transformer.h.20.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 897.0, 122.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.10068836063146591, -0.09885051101446152, -0.09701266884803772, -0.09517481923103333, -0.09333697706460953, -0.09149912744760513, -0.08966128528118134, -0.08782343566417694, -0.08598558604717255, -0.08414773643016815, -0.08230989426374435, -0.08047204464673996, -0.07863420248031616, -0.07679635286331177, -0.07495850324630737, -0.07312066107988358, -0.07128281891345978, -0.06944496929645538, -0.06760712713003159, -0.06576927751302719, -0.0639314353466034, -0.062093585729599, -0.0602557398378849, -0.05841789394617081, -0.05658004432916641, -0.054742198437452316, -0.05290435254573822, -0.051066502928733826, -0.04922865703701973, -0.047390811145305634, -0.04555296525359154, -0.04371511936187744, -0.04187726974487305, -0.04003942385315895, -0.038201577961444855, -0.03636372834444046, -0.034525882452726364, -0.03268803656101227, -0.030850190669298172, -0.029012344777584076, -0.02717449888586998, -0.025336652994155884, -0.02349880523979664, -0.021660959348082542, -0.019823113456368446, -0.0179852657020092, -0.016147419810295105, -0.014309573918581009, -0.012471728026866913, -0.010633881203830242, -0.008796035312116146, -0.006958188489079475, -0.005120342131704092, -0.0032824957743287086, -0.001444648951292038, 0.0003931969404220581, 0.002231043763458729, 0.004068890120834112, 0.0059067364782094955, 0.007744583301246166, 0.009582430124282837, 0.011420276015996933, 0.013258122839033604, 0.0150959687307477, 0.01693381555378437]}, "gradients/decoder.transformer.h.20.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 1.0, 2.0, 5.0, 6.0, 8.0, 11.0, 19.0, 15.0, 20.0, 23.0, 39.0, 35.0, 41.0, 43.0, 39.0, 56.0, 59.0, 57.0, 55.0, 58.0, 49.0, 54.0, 57.0, 55.0, 33.0, 30.0, 39.0, 32.0, 12.0, 18.0, 12.0, 9.0, 12.0, 5.0, 4.0, 1.0, 1.0, 2.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0021790266036987305, -0.002082289196550846, -0.0019855517894029617, -0.0018888143822550774, -0.001792076975107193, -0.0016953395679593086, -0.0015986021608114243, -0.0015018647536635399, -0.0014051273465156555, -0.0013083899393677711, -0.0012116525322198868, -0.0011149151250720024, -0.001018177717924118, -0.0009214403107762337, -0.0008247029036283493, -0.0007279654964804649, -0.0006312280893325806, -0.0005344906821846962, -0.00043775327503681183, -0.00034101586788892746, -0.0002442784607410431, -0.00014754105359315872, -5.080364644527435e-05, 4.5933760702610016e-05, 0.00014267116785049438, 0.00023940857499837875, 0.0003361459821462631, 0.0004328833892941475, 0.0005296207964420319, 0.0006263582035899162, 0.0007230956107378006, 0.000819833017885685, 0.0009165704250335693, 0.0010133078321814537, 0.001110045239329338, 0.0012067826464772224, 0.0013035200536251068, 0.0014002574607729912, 0.0014969948679208755, 0.00159373227506876, 0.0016904696822166443, 0.0017872070893645287, 0.001883944496512413, 0.0019806819036602974, 0.0020774193108081818, 0.002174156717956066, 0.0022708941251039505, 0.002367631532251835, 0.0024643689393997192, 0.0025611063465476036, 0.002657843753695488, 0.0027545811608433723, 0.0028513185679912567, 0.002948055975139141, 0.0030447933822870255, 0.00314153078943491, 0.003238268196582794, 0.0033350056037306786, 0.003431743010878563, 0.0035284804180264473, 0.0036252178251743317, 0.003721955232322216, 0.0038186926394701004, 0.003915430046617985, 0.004012167453765869]}, "gradients/decoder.transformer.h.20.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 1.0, 1.0, 3.0, 2.0, 2.0, 1.0, 4.0, 7.0, 11.0, 5.0, 5.0, 13.0, 14.0, 21.0, 12.0, 24.0, 31.0, 23.0, 32.0, 35.0, 25.0, 42.0, 41.0, 43.0, 56.0, 31.0, 46.0, 43.0, 44.0, 50.0, 51.0, 41.0, 30.0, 36.0, 23.0, 23.0, 29.0, 17.0, 21.0, 12.0, 18.0, 9.0, 13.0, 10.0, 5.0, 4.0, 4.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.25390625, -6.04058837890625, -5.8272705078125, -5.61395263671875, -5.400634765625, -5.18731689453125, -4.9739990234375, -4.76068115234375, -4.54736328125, -4.33404541015625, -4.1207275390625, -3.90740966796875, -3.694091796875, -3.48077392578125, -3.2674560546875, -3.05413818359375, -2.8408203125, -2.62750244140625, -2.4141845703125, -2.20086669921875, -1.987548828125, -1.77423095703125, -1.5609130859375, -1.34759521484375, -1.13427734375, -0.92095947265625, -0.7076416015625, -0.49432373046875, -0.281005859375, -0.06768798828125, 0.1456298828125, 0.35894775390625, 0.572265625, 0.78558349609375, 0.9989013671875, 1.21221923828125, 1.425537109375, 1.63885498046875, 1.8521728515625, 2.06549072265625, 2.27880859375, 2.49212646484375, 2.7054443359375, 2.91876220703125, 3.132080078125, 3.34539794921875, 3.5587158203125, 3.77203369140625, 3.9853515625, 4.19866943359375, 4.4119873046875, 4.62530517578125, 4.838623046875, 5.05194091796875, 5.2652587890625, 5.47857666015625, 5.69189453125, 5.90521240234375, 6.1185302734375, 6.33184814453125, 6.545166015625, 6.75848388671875, 6.9718017578125, 7.18511962890625, 7.3984375]}, "gradients/decoder.transformer.h.20.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 3.0, 4.0, 0.0, 3.0, 1.0, 7.0, 7.0, 18.0, 15.0, 30.0, 43.0, 74.0, 107.0, 165.0, 257.0, 445.0, 843.0, 1608.0, 3523.0, 9369.0, 32464.0, 183876.0, 632891.0, 141505.0, 26655.0, 8009.0, 3202.0, 1578.0, 768.0, 440.0, 229.0, 138.0, 100.0, 73.0, 38.0, 30.0, 18.0, 9.0, 8.0, 4.0, 4.0, 4.0, 1.0, 3.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.1015625, -5.8707275390625, -5.639892578125, -5.4090576171875, -5.17822265625, -4.9473876953125, -4.716552734375, -4.4857177734375, -4.2548828125, -4.0240478515625, -3.793212890625, -3.5623779296875, -3.33154296875, -3.1007080078125, -2.869873046875, -2.6390380859375, -2.408203125, -2.1773681640625, -1.946533203125, -1.7156982421875, -1.48486328125, -1.2540283203125, -1.023193359375, -0.7923583984375, -0.5615234375, -0.3306884765625, -0.099853515625, 0.1309814453125, 0.36181640625, 0.5926513671875, 0.823486328125, 1.0543212890625, 1.28515625, 1.5159912109375, 1.746826171875, 1.9776611328125, 2.20849609375, 2.4393310546875, 2.670166015625, 2.9010009765625, 3.1318359375, 3.3626708984375, 3.593505859375, 3.8243408203125, 4.05517578125, 4.2860107421875, 4.516845703125, 4.7476806640625, 4.978515625, 5.2093505859375, 5.440185546875, 5.6710205078125, 5.90185546875, 6.1326904296875, 6.363525390625, 6.5943603515625, 6.8251953125, 7.0560302734375, 7.286865234375, 7.5177001953125, 7.74853515625, 7.9793701171875, 8.210205078125, 8.4410400390625, 8.671875]}, "gradients/decoder.transformer.h.20.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 7.0, 5.0, 5.0, 4.0, 5.0, 3.0, 5.0, 5.0, 7.0, 7.0, 11.0, 14.0, 13.0, 22.0, 18.0, 18.0, 26.0, 27.0, 32.0, 50.0, 40.0, 35.0, 48.0, 58.0, 100.0, 1491.0, 462.0, 99.0, 53.0, 52.0, 44.0, 35.0, 32.0, 42.0, 23.0, 28.0, 22.0, 20.0, 18.0, 6.0, 9.0, 9.0, 10.0, 11.0, 3.0, 6.0, 5.0, 5.0, 4.0, 3.0, 3.0, 3.0, 0.0, 2.0, 1.0, 1.0], "bins": [-23.828125, -23.127197265625, -22.42626953125, -21.725341796875, -21.0244140625, -20.323486328125, -19.62255859375, -18.921630859375, -18.220703125, -17.519775390625, -16.81884765625, -16.117919921875, -15.4169921875, -14.716064453125, -14.01513671875, -13.314208984375, -12.61328125, -11.912353515625, -11.21142578125, -10.510498046875, -9.8095703125, -9.108642578125, -8.40771484375, -7.706787109375, -7.005859375, -6.304931640625, -5.60400390625, -4.903076171875, -4.2021484375, -3.501220703125, -2.80029296875, -2.099365234375, -1.3984375, -0.697509765625, 0.00341796875, 0.704345703125, 1.4052734375, 2.106201171875, 2.80712890625, 3.508056640625, 4.208984375, 4.909912109375, 5.61083984375, 6.311767578125, 7.0126953125, 7.713623046875, 8.41455078125, 9.115478515625, 9.81640625, 10.517333984375, 11.21826171875, 11.919189453125, 12.6201171875, 13.321044921875, 14.02197265625, 14.722900390625, 15.423828125, 16.124755859375, 16.82568359375, 17.526611328125, 18.2275390625, 18.928466796875, 19.62939453125, 20.330322265625, 21.03125]}, "gradients/decoder.transformer.h.20.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 1.0, 2.0, 2.0, 5.0, 3.0, 4.0, 7.0, 5.0, 11.0, 8.0, 9.0, 13.0, 7.0, 8.0, 23.0, 24.0, 32.0, 42.0, 45.0, 80.0, 95.0, 156.0, 260.0, 550.0, 1794.0, 12871.0, 2804063.0, 317413.0, 5810.0, 1160.0, 429.0, 214.0, 149.0, 92.0, 77.0, 54.0, 39.0, 24.0, 23.0, 21.0, 15.0, 9.0, 17.0, 7.0, 6.0, 6.0, 6.0, 2.0, 7.0, 5.0, 3.0, 7.0, 3.0, 2.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-33.8125, -32.6796875, -31.546875, -30.4140625, -29.28125, -28.1484375, -27.015625, -25.8828125, -24.75, -23.6171875, -22.484375, -21.3515625, -20.21875, -19.0859375, -17.953125, -16.8203125, -15.6875, -14.5546875, -13.421875, -12.2890625, -11.15625, -10.0234375, -8.890625, -7.7578125, -6.625, -5.4921875, -4.359375, -3.2265625, -2.09375, -0.9609375, 0.171875, 1.3046875, 2.4375, 3.5703125, 4.703125, 5.8359375, 6.96875, 8.1015625, 9.234375, 10.3671875, 11.5, 12.6328125, 13.765625, 14.8984375, 16.03125, 17.1640625, 18.296875, 19.4296875, 20.5625, 21.6953125, 22.828125, 23.9609375, 25.09375, 26.2265625, 27.359375, 28.4921875, 29.625, 30.7578125, 31.890625, 33.0234375, 34.15625, 35.2890625, 36.421875, 37.5546875, 38.6875]}, "gradients/decoder.transformer.h.20.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 2.0, 0.0, 2.0, 314.0, 685.0, 17.0], "bins": [-401.2157287597656, -394.7601623535156, -388.3045654296875, -381.8489990234375, -375.3934326171875, -368.9378356933594, -362.4822692871094, -356.0267028808594, -349.57110595703125, -343.11553955078125, -336.6599426269531, -330.2043762207031, -323.7488098144531, -317.293212890625, -310.837646484375, -304.382080078125, -297.926513671875, -291.470947265625, -285.0153503417969, -278.5597839355469, -272.1042175292969, -265.64862060546875, -259.19305419921875, -252.73748779296875, -246.28189086914062, -239.82630920410156, -233.37074279785156, -226.9151611328125, -220.45957946777344, -214.00401306152344, -207.54843139648438, -201.09286499023438, -194.63729858398438, -188.1817169189453, -181.7261505126953, -175.27056884765625, -168.8149871826172, -162.3594207763672, -155.90383911132812, -149.44827270507812, -142.99267578125, -136.53709411621094, -130.08152770996094, -123.62594604492188, -117.17037200927734, -110.71479797363281, -104.25921630859375, -97.80364227294922, -91.34806823730469, -84.89249420166016, -78.4369125366211, -71.98133850097656, -65.52576446533203, -59.070186614990234, -52.61460876464844, -46.159034729003906, -39.703460693359375, -33.24788284301758, -26.792308807373047, -20.33673095703125, -13.881155014038086, -7.425579071044922, -0.970001220703125, 5.485572814941406, 11.94115161895752]}, "gradients/decoder.transformer.h.20.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 2.0, 2.0, 3.0, 2.0, 5.0, 2.0, 6.0, 5.0, 6.0, 7.0, 10.0, 9.0, 13.0, 11.0, 19.0, 15.0, 18.0, 25.0, 27.0, 23.0, 29.0, 28.0, 38.0, 29.0, 32.0, 24.0, 36.0, 39.0, 46.0, 52.0, 44.0, 44.0, 29.0, 29.0, 38.0, 32.0, 24.0, 20.0, 24.0, 17.0, 14.0, 21.0, 19.0, 17.0, 16.0, 6.0, 15.0, 8.0, 6.0, 10.0, 7.0, 3.0, 3.0, 3.0, 3.0, 3.0, 0.0, 1.0, 2.0], "bins": [-71.29705810546875, -69.14692687988281, -66.9968032836914, -64.84667205810547, -62.6965446472168, -60.546417236328125, -58.39628601074219, -56.246158599853516, -54.096031188964844, -51.94590377807617, -49.795772552490234, -47.64564514160156, -45.49551773071289, -43.34539031982422, -41.19525909423828, -39.04513168334961, -36.89500045776367, -34.744873046875, -32.59474182128906, -30.44461441040039, -28.29448699951172, -26.144357681274414, -23.99422836303711, -21.844100952148438, -19.693971633911133, -17.543842315673828, -15.393714904785156, -13.243585586547852, -11.093457221984863, -8.943328857421875, -6.79319953918457, -4.643071174621582, -2.4929428100585938, -0.34281420707702637, 1.807314395904541, 3.9574432373046875, 6.107571601867676, 8.257699966430664, 10.407829284667969, 12.557957649230957, 14.708086013793945, 16.85821533203125, 19.008342742919922, 21.158472061157227, 23.30860137939453, 25.458728790283203, 27.608858108520508, 29.758987426757812, 31.909114837646484, 34.059242248535156, 36.209373474121094, 38.359500885009766, 40.50962829589844, 42.659759521484375, 44.80988693237305, 46.96001434326172, 49.110145568847656, 51.26027297973633, 53.410404205322266, 55.56053161621094, 57.71065902709961, 59.86078643798828, 62.01091766357422, 64.16104888916016, 66.31117248535156]}, "gradients/decoder.transformer.h.19.mlp.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 1.0, 2.0, 3.0, 3.0, 1.0, 2.0, 8.0, 10.0, 4.0, 7.0, 7.0, 8.0, 16.0, 26.0, 13.0, 27.0, 23.0, 30.0, 29.0, 39.0, 31.0, 33.0, 49.0, 47.0, 35.0, 34.0, 47.0, 46.0, 41.0, 53.0, 36.0, 39.0, 40.0, 35.0, 23.0, 21.0, 30.0, 21.0, 22.0, 13.0, 11.0, 13.0, 10.0, 13.0, 5.0, 4.0, 2.0, 1.0, 1.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.2109375, -5.99774169921875, -5.7845458984375, -5.57135009765625, -5.358154296875, -5.14495849609375, -4.9317626953125, -4.71856689453125, -4.50537109375, -4.29217529296875, -4.0789794921875, -3.86578369140625, -3.652587890625, -3.43939208984375, -3.2261962890625, -3.01300048828125, -2.7998046875, -2.58660888671875, -2.3734130859375, -2.16021728515625, -1.947021484375, -1.73382568359375, -1.5206298828125, -1.30743408203125, -1.09423828125, -0.88104248046875, -0.6678466796875, -0.45465087890625, -0.241455078125, -0.02825927734375, 0.1849365234375, 0.39813232421875, 0.611328125, 0.82452392578125, 1.0377197265625, 1.25091552734375, 1.464111328125, 1.67730712890625, 1.8905029296875, 2.10369873046875, 2.31689453125, 2.53009033203125, 2.7432861328125, 2.95648193359375, 3.169677734375, 3.38287353515625, 3.5960693359375, 3.80926513671875, 4.0224609375, 4.23565673828125, 4.4488525390625, 4.66204833984375, 4.875244140625, 5.08843994140625, 5.3016357421875, 5.51483154296875, 5.72802734375, 5.94122314453125, 6.1544189453125, 6.36761474609375, 6.580810546875, 6.79400634765625, 7.0072021484375, 7.22039794921875, 7.43359375]}, "gradients/decoder.transformer.h.19.mlp.c_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 3.0, 1.0, 1.0, 1.0, 1.0, 6.0, 5.0, 6.0, 7.0, 11.0, 17.0, 22.0, 25.0, 33.0, 37.0, 81.0, 110.0, 183.0, 431.0, 924.0, 2655.0, 9065.0, 36056.0, 190296.0, 1730874.0, 1943737.0, 224659.0, 40120.0, 10049.0, 2912.0, 973.0, 416.0, 210.0, 117.0, 61.0, 45.0, 40.0, 27.0, 20.0, 15.0, 14.0, 13.0, 6.0, 7.0, 4.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-15.1328125, -14.5731201171875, -14.013427734375, -13.4537353515625, -12.89404296875, -12.3343505859375, -11.774658203125, -11.2149658203125, -10.6552734375, -10.0955810546875, -9.535888671875, -8.9761962890625, -8.41650390625, -7.8568115234375, -7.297119140625, -6.7374267578125, -6.177734375, -5.6180419921875, -5.058349609375, -4.4986572265625, -3.93896484375, -3.3792724609375, -2.819580078125, -2.2598876953125, -1.7001953125, -1.1405029296875, -0.580810546875, -0.0211181640625, 0.53857421875, 1.0982666015625, 1.657958984375, 2.2176513671875, 2.77734375, 3.3370361328125, 3.896728515625, 4.4564208984375, 5.01611328125, 5.5758056640625, 6.135498046875, 6.6951904296875, 7.2548828125, 7.8145751953125, 8.374267578125, 8.9339599609375, 9.49365234375, 10.0533447265625, 10.613037109375, 11.1727294921875, 11.732421875, 12.2921142578125, 12.851806640625, 13.4114990234375, 13.97119140625, 14.5308837890625, 15.090576171875, 15.6502685546875, 16.2099609375, 16.7696533203125, 17.329345703125, 17.8890380859375, 18.44873046875, 19.0084228515625, 19.568115234375, 20.1278076171875, 20.6875]}, "gradients/decoder.transformer.h.19.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 4.0, 3.0, 10.0, 10.0, 14.0, 28.0, 30.0, 52.0, 64.0, 84.0, 121.0, 150.0, 227.0, 284.0, 406.0, 477.0, 504.0, 440.0, 314.0, 245.0, 172.0, 129.0, 92.0, 72.0, 41.0, 40.0, 22.0, 8.0, 13.0, 7.0, 3.0, 6.0, 4.0, 6.0, 2.0, 2.0, 1.0], "bins": [-28.515625, -27.854248046875, -27.19287109375, -26.531494140625, -25.8701171875, -25.208740234375, -24.54736328125, -23.885986328125, -23.224609375, -22.563232421875, -21.90185546875, -21.240478515625, -20.5791015625, -19.917724609375, -19.25634765625, -18.594970703125, -17.93359375, -17.272216796875, -16.61083984375, -15.949462890625, -15.2880859375, -14.626708984375, -13.96533203125, -13.303955078125, -12.642578125, -11.981201171875, -11.31982421875, -10.658447265625, -9.9970703125, -9.335693359375, -8.67431640625, -8.012939453125, -7.3515625, -6.690185546875, -6.02880859375, -5.367431640625, -4.7060546875, -4.044677734375, -3.38330078125, -2.721923828125, -2.060546875, -1.399169921875, -0.73779296875, -0.076416015625, 0.5849609375, 1.246337890625, 1.90771484375, 2.569091796875, 3.23046875, 3.891845703125, 4.55322265625, 5.214599609375, 5.8759765625, 6.537353515625, 7.19873046875, 7.860107421875, 8.521484375, 9.182861328125, 9.84423828125, 10.505615234375, 11.1669921875, 11.828369140625, 12.48974609375, 13.151123046875, 13.8125]}, "gradients/decoder.transformer.h.19.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 5.0, 1.0, 8.0, 9.0, 14.0, 16.0, 20.0, 32.0, 56.0, 77.0, 128.0, 220.0, 461.0, 1963.0, 42221.0, 3772571.0, 369068.0, 5884.0, 800.0, 300.0, 154.0, 92.0, 71.0, 37.0, 33.0, 19.0, 13.0, 10.0, 4.0, 3.0, 1.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-71.3125, -68.740234375, -66.16796875, -63.595703125, -61.0234375, -58.451171875, -55.87890625, -53.306640625, -50.734375, -48.162109375, -45.58984375, -43.017578125, -40.4453125, -37.873046875, -35.30078125, -32.728515625, -30.15625, -27.583984375, -25.01171875, -22.439453125, -19.8671875, -17.294921875, -14.72265625, -12.150390625, -9.578125, -7.005859375, -4.43359375, -1.861328125, 0.7109375, 3.283203125, 5.85546875, 8.427734375, 11.0, 13.572265625, 16.14453125, 18.716796875, 21.2890625, 23.861328125, 26.43359375, 29.005859375, 31.578125, 34.150390625, 36.72265625, 39.294921875, 41.8671875, 44.439453125, 47.01171875, 49.583984375, 52.15625, 54.728515625, 57.30078125, 59.873046875, 62.4453125, 65.017578125, 67.58984375, 70.162109375, 72.734375, 75.306640625, 77.87890625, 80.451171875, 83.0234375, 85.595703125, 88.16796875, 90.740234375, 93.3125]}, "gradients/decoder.transformer.h.19.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 5.0, 19.0, 36.0, 89.0, 207.0, 202.0, 205.0, 147.0, 66.0, 23.0, 12.0, 2.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-103.1478500366211, -97.66360473632812, -92.17935943603516, -86.69511413574219, -81.21086883544922, -75.72662353515625, -70.24238586425781, -64.75813293457031, -59.27389144897461, -53.78964614868164, -48.30540084838867, -42.82115936279297, -37.3369140625, -31.8526668548584, -26.368423461914062, -20.884178161621094, -15.399932861328125, -9.915687561035156, -4.431443214416504, 1.0528011322021484, 6.537046432495117, 12.021291732788086, 17.505535125732422, 22.98978042602539, 28.47402572631836, 33.95827102661133, 39.4425163269043, 44.9267578125, 50.41100311279297, 55.89524841308594, 61.379493713378906, 66.86373901367188, 72.34797668457031, 77.83222198486328, 83.31646728515625, 88.80071258544922, 94.28495788574219, 99.76920318603516, 105.25344848632812, 110.73768615722656, 116.22193908691406, 121.70618438720703, 127.1904296875, 132.67466735839844, 138.15892028808594, 143.64315795898438, 149.12741088867188, 154.6116485595703, 160.09588623046875, 165.5801239013672, 171.0643768310547, 176.54861450195312, 182.03286743164062, 187.51710510253906, 193.00135803222656, 198.485595703125, 203.9698486328125, 209.45408630371094, 214.93833923339844, 220.42257690429688, 225.90682983398438, 231.3910675048828, 236.8753204345703, 242.35955810546875, 247.84381103515625]}, "gradients/decoder.transformer.h.19.ln_2.bias": {"_type": "histogram", "values": [2.0, 2.0, 2.0, 2.0, 1.0, 5.0, 4.0, 10.0, 5.0, 6.0, 9.0, 11.0, 16.0, 5.0, 23.0, 15.0, 19.0, 18.0, 19.0, 29.0, 31.0, 29.0, 38.0, 33.0, 27.0, 32.0, 35.0, 39.0, 30.0, 37.0, 50.0, 36.0, 36.0, 34.0, 41.0, 33.0, 26.0, 32.0, 24.0, 21.0, 19.0, 19.0, 16.0, 13.0, 15.0, 10.0, 14.0, 4.0, 9.0, 9.0, 5.0, 5.0, 5.0, 3.0, 2.0, 3.0, 0.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-52.90128707885742, -51.10117721557617, -49.30106735229492, -47.50095748901367, -45.70085144042969, -43.90074157714844, -42.10063171386719, -40.30052185058594, -38.50041198730469, -36.70030212402344, -34.90019226074219, -33.10008239746094, -31.29997444152832, -29.49986457824707, -27.699756622314453, -25.899646759033203, -24.099536895751953, -22.299427032470703, -20.499317169189453, -18.699209213256836, -16.899099349975586, -15.098989486694336, -13.298880577087402, -11.498771667480469, -9.698661804199219, -7.898552417755127, -6.098443031311035, -4.298333644866943, -2.4982242584228516, -0.6981143951416016, 1.101994514465332, 2.9021034240722656, 4.70220947265625, 6.502318859100342, 8.302428245544434, 10.102537155151367, 11.902647018432617, 13.702756881713867, 15.5028657913208, 17.302974700927734, 19.103084564208984, 20.903194427490234, 22.703304290771484, 24.5034122467041, 26.30352210998535, 28.1036319732666, 29.90373992919922, 31.70384979248047, 33.50395965576172, 35.30406951904297, 37.10417938232422, 38.90428924560547, 40.70439910888672, 42.50450897216797, 44.30461502075195, 46.1047248840332, 47.90483474731445, 49.7049446105957, 51.50505447387695, 53.3051643371582, 55.10527038574219, 56.90538024902344, 58.70549011230469, 60.50559997558594, 62.30570983886719]}, "gradients/decoder.transformer.h.19.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 5.0, 1.0, 5.0, 1.0, 1.0, 4.0, 2.0, 4.0, 9.0, 10.0, 17.0, 11.0, 21.0, 17.0, 25.0, 26.0, 25.0, 26.0, 33.0, 37.0, 28.0, 42.0, 47.0, 41.0, 54.0, 41.0, 44.0, 49.0, 39.0, 34.0, 45.0, 38.0, 31.0, 29.0, 22.0, 31.0, 19.0, 21.0, 16.0, 19.0, 15.0, 12.0, 8.0, 5.0, 2.0, 3.0, 2.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-7.12890625, -6.90179443359375, -6.6746826171875, -6.44757080078125, -6.220458984375, -5.99334716796875, -5.7662353515625, -5.53912353515625, -5.31201171875, -5.08489990234375, -4.8577880859375, -4.63067626953125, -4.403564453125, -4.17645263671875, -3.9493408203125, -3.72222900390625, -3.4951171875, -3.26800537109375, -3.0408935546875, -2.81378173828125, -2.586669921875, -2.35955810546875, -2.1324462890625, -1.90533447265625, -1.67822265625, -1.45111083984375, -1.2239990234375, -0.99688720703125, -0.769775390625, -0.54266357421875, -0.3155517578125, -0.08843994140625, 0.138671875, 0.36578369140625, 0.5928955078125, 0.82000732421875, 1.047119140625, 1.27423095703125, 1.5013427734375, 1.72845458984375, 1.95556640625, 2.18267822265625, 2.4097900390625, 2.63690185546875, 2.864013671875, 3.09112548828125, 3.3182373046875, 3.54534912109375, 3.7724609375, 3.99957275390625, 4.2266845703125, 4.45379638671875, 4.680908203125, 4.90802001953125, 5.1351318359375, 5.36224365234375, 5.58935546875, 5.81646728515625, 6.0435791015625, 6.27069091796875, 6.497802734375, 6.72491455078125, 6.9520263671875, 7.17913818359375, 7.40625]}, "gradients/decoder.transformer.h.19.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 4.0, 5.0, 4.0, 15.0, 14.0, 28.0, 29.0, 43.0, 64.0, 85.0, 147.0, 194.0, 252.0, 370.0, 570.0, 789.0, 1136.0, 1626.0, 2366.0, 3314.0, 5020.0, 7114.0, 10343.0, 15360.0, 22925.0, 34565.0, 52553.0, 82022.0, 124878.0, 174092.0, 166674.0, 117031.0, 76451.0, 49317.0, 32087.0, 21303.0, 14448.0, 9514.0, 6642.0, 4600.0, 3193.0, 2248.0, 1556.0, 1081.0, 753.0, 488.0, 374.0, 262.0, 200.0, 138.0, 85.0, 72.0, 49.0, 26.0, 15.0, 12.0, 12.0, 6.0, 5.0, 3.0, 1.0, 1.0], "bins": [-0.662109375, -0.6413650512695312, -0.6206207275390625, -0.5998764038085938, -0.579132080078125, -0.5583877563476562, -0.5376434326171875, -0.5168991088867188, -0.49615478515625, -0.47541046142578125, -0.4546661376953125, -0.43392181396484375, -0.413177490234375, -0.39243316650390625, -0.3716888427734375, -0.35094451904296875, -0.3302001953125, -0.30945587158203125, -0.2887115478515625, -0.26796722412109375, -0.247222900390625, -0.22647857666015625, -0.2057342529296875, -0.18498992919921875, -0.16424560546875, -0.14350128173828125, -0.1227569580078125, -0.10201263427734375, -0.081268310546875, -0.06052398681640625, -0.0397796630859375, -0.01903533935546875, 0.001708984375, 0.02245330810546875, 0.0431976318359375, 0.06394195556640625, 0.084686279296875, 0.10543060302734375, 0.1261749267578125, 0.14691925048828125, 0.16766357421875, 0.18840789794921875, 0.2091522216796875, 0.22989654541015625, 0.250640869140625, 0.27138519287109375, 0.2921295166015625, 0.31287384033203125, 0.3336181640625, 0.35436248779296875, 0.3751068115234375, 0.39585113525390625, 0.416595458984375, 0.43733978271484375, 0.4580841064453125, 0.47882843017578125, 0.49957275390625, 0.5203170776367188, 0.5410614013671875, 0.5618057250976562, 0.582550048828125, 0.6032943725585938, 0.6240386962890625, 0.6447830200195312, 0.66552734375]}, "gradients/decoder.transformer.h.19.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 3.0, 4.0, 1.0, 6.0, 1.0, 3.0, 8.0, 9.0, 7.0, 13.0, 23.0, 10.0, 15.0, 22.0, 17.0, 34.0, 25.0, 33.0, 30.0, 41.0, 34.0, 37.0, 45.0, 39.0, 35.0, 1061.0, 40.0, 51.0, 45.0, 29.0, 33.0, 34.0, 27.0, 28.0, 25.0, 28.0, 21.0, 21.0, 18.0, 16.0, 16.0, 18.0, 3.0, 2.0, 3.0, 10.0, 6.0, 5.0, 3.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-4.0859375, -3.95458984375, -3.8232421875, -3.69189453125, -3.560546875, -3.42919921875, -3.2978515625, -3.16650390625, -3.03515625, -2.90380859375, -2.7724609375, -2.64111328125, -2.509765625, -2.37841796875, -2.2470703125, -2.11572265625, -1.984375, -1.85302734375, -1.7216796875, -1.59033203125, -1.458984375, -1.32763671875, -1.1962890625, -1.06494140625, -0.93359375, -0.80224609375, -0.6708984375, -0.53955078125, -0.408203125, -0.27685546875, -0.1455078125, -0.01416015625, 0.1171875, 0.24853515625, 0.3798828125, 0.51123046875, 0.642578125, 0.77392578125, 0.9052734375, 1.03662109375, 1.16796875, 1.29931640625, 1.4306640625, 1.56201171875, 1.693359375, 1.82470703125, 1.9560546875, 2.08740234375, 2.21875, 2.35009765625, 2.4814453125, 2.61279296875, 2.744140625, 2.87548828125, 3.0068359375, 3.13818359375, 3.26953125, 3.40087890625, 3.5322265625, 3.66357421875, 3.794921875, 3.92626953125, 4.0576171875, 4.18896484375, 4.3203125]}, "gradients/decoder.transformer.h.19.crossattention.c_attn.weight": {"_type": "histogram", "values": [2.0, 0.0, 7.0, 6.0, 11.0, 20.0, 40.0, 42.0, 57.0, 70.0, 127.0, 153.0, 224.0, 307.0, 401.0, 607.0, 843.0, 1222.0, 1760.0, 2506.0, 3647.0, 5086.0, 7499.0, 11108.0, 16284.0, 23783.0, 35481.0, 51968.0, 76150.0, 107098.0, 143895.0, 1187481.0, 125030.0, 92278.0, 64516.0, 43837.0, 29578.0, 20075.0, 13475.0, 9375.0, 6472.0, 4434.0, 3116.0, 2105.0, 1418.0, 1038.0, 717.0, 528.0, 369.0, 248.0, 187.0, 145.0, 89.0, 58.0, 53.0, 41.0, 18.0, 23.0, 13.0, 10.0, 11.0, 2.0, 5.0, 3.0], "bins": [-0.41552734375, -0.4022407531738281, -0.38895416259765625, -0.3756675720214844, -0.3623809814453125, -0.3490943908691406, -0.33580780029296875, -0.3225212097167969, -0.309234619140625, -0.2959480285644531, -0.28266143798828125, -0.2693748474121094, -0.2560882568359375, -0.24280166625976562, -0.22951507568359375, -0.21622848510742188, -0.20294189453125, -0.18965530395507812, -0.17636871337890625, -0.16308212280273438, -0.1497955322265625, -0.13650894165039062, -0.12322235107421875, -0.10993576049804688, -0.096649169921875, -0.08336257934570312, -0.07007598876953125, -0.056789398193359375, -0.0435028076171875, -0.030216217041015625, -0.01692962646484375, -0.003643035888671875, 0.0096435546875, 0.022930145263671875, 0.03621673583984375, 0.049503326416015625, 0.0627899169921875, 0.07607650756835938, 0.08936309814453125, 0.10264968872070312, 0.115936279296875, 0.12922286987304688, 0.14250946044921875, 0.15579605102539062, 0.1690826416015625, 0.18236923217773438, 0.19565582275390625, 0.20894241333007812, 0.22222900390625, 0.23551559448242188, 0.24880218505859375, 0.2620887756347656, 0.2753753662109375, 0.2886619567871094, 0.30194854736328125, 0.3152351379394531, 0.328521728515625, 0.3418083190917969, 0.35509490966796875, 0.3683815002441406, 0.3816680908203125, 0.3949546813964844, 0.40824127197265625, 0.4215278625488281, 0.434814453125]}, "gradients/decoder.transformer.h.19.crossattention.q_attn.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 0.0, 2.0, 2.0, 3.0, 0.0, 1.0, 5.0, 8.0, 6.0, 9.0, 6.0, 12.0, 9.0, 21.0, 24.0, 26.0, 34.0, 34.0, 33.0, 37.0, 40.0, 46.0, 57.0, 56.0, 61.0, 55.0, 54.0, 50.0, 46.0, 42.0, 30.0, 41.0, 35.0, 20.0, 19.0, 11.0, 15.0, 15.0, 9.0, 7.0, 5.0, 4.0, 9.0, 4.0, 1.0, 4.0, 1.0, 2.0, 3.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00415802001953125, -0.0040076375007629395, -0.003857254981994629, -0.0037068724632263184, -0.003556489944458008, -0.0034061074256896973, -0.0032557249069213867, -0.003105342388153076, -0.0029549598693847656, -0.002804577350616455, -0.0026541948318481445, -0.002503812313079834, -0.0023534297943115234, -0.002203047275543213, -0.0020526647567749023, -0.0019022822380065918, -0.0017518997192382812, -0.0016015172004699707, -0.0014511346817016602, -0.0013007521629333496, -0.001150369644165039, -0.0009999871253967285, -0.000849604606628418, -0.0006992220878601074, -0.0005488395690917969, -0.00039845705032348633, -0.0002480745315551758, -9.769201278686523e-05, 5.269050598144531e-05, 0.00020307302474975586, 0.0003534555435180664, 0.000503838062286377, 0.0006542205810546875, 0.000804603099822998, 0.0009549856185913086, 0.0011053681373596191, 0.0012557506561279297, 0.0014061331748962402, 0.0015565156936645508, 0.0017068982124328613, 0.0018572807312011719, 0.0020076632499694824, 0.002158045768737793, 0.0023084282875061035, 0.002458810806274414, 0.0026091933250427246, 0.002759575843811035, 0.0029099583625793457, 0.0030603408813476562, 0.003210723400115967, 0.0033611059188842773, 0.003511488437652588, 0.0036618709564208984, 0.003812253475189209, 0.0039626359939575195, 0.00411301851272583, 0.004263401031494141, 0.004413783550262451, 0.004564166069030762, 0.004714548587799072, 0.004864931106567383, 0.005015313625335693, 0.005165696144104004, 0.0053160786628723145, 0.005466461181640625]}, "gradients/decoder.transformer.h.19.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 1.0, 0.0, 2.0, 2.0, 1.0, 2.0, 2.0, 7.0, 5.0, 5.0, 7.0, 9.0, 15.0, 13.0, 22.0, 22.0, 19.0, 41.0, 42.0, 81.0, 102.0, 163.0, 270.0, 407.0, 1215.0, 189964.0, 852609.0, 2158.0, 480.0, 270.0, 165.0, 129.0, 82.0, 65.0, 42.0, 28.0, 29.0, 14.0, 17.0, 15.0, 8.0, 13.0, 7.0, 4.0, 6.0, 1.0, 3.0, 2.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0], "bins": [-0.119873046875, -0.11655521392822266, -0.11323738098144531, -0.10991954803466797, -0.10660171508789062, -0.10328388214111328, -0.09996604919433594, -0.0966482162475586, -0.09333038330078125, -0.0900125503540039, -0.08669471740722656, -0.08337688446044922, -0.08005905151367188, -0.07674121856689453, -0.07342338562011719, -0.07010555267333984, -0.0667877197265625, -0.06346988677978516, -0.06015205383300781, -0.05683422088623047, -0.053516387939453125, -0.05019855499267578, -0.04688072204589844, -0.043562889099121094, -0.04024505615234375, -0.036927223205566406, -0.03360939025878906, -0.03029155731201172, -0.026973724365234375, -0.02365589141845703, -0.020338058471679688, -0.017020225524902344, -0.013702392578125, -0.010384559631347656, -0.0070667266845703125, -0.0037488937377929688, -0.000431060791015625, 0.0028867721557617188, 0.0062046051025390625, 0.009522438049316406, 0.01284027099609375, 0.016158103942871094, 0.019475936889648438, 0.02279376983642578, 0.026111602783203125, 0.02942943572998047, 0.03274726867675781, 0.036065101623535156, 0.0393829345703125, 0.042700767517089844, 0.04601860046386719, 0.04933643341064453, 0.052654266357421875, 0.05597209930419922, 0.05928993225097656, 0.0626077651977539, 0.06592559814453125, 0.0692434310913086, 0.07256126403808594, 0.07587909698486328, 0.07919692993164062, 0.08251476287841797, 0.08583259582519531, 0.08915042877197266, 0.09246826171875]}, "gradients/decoder.transformer.h.19.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 6.0, 326.0, 655.0, 26.0, 1.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.03520818427205086, -0.03423449769616127, -0.03326081112027168, -0.032287124544382095, -0.03131343796849251, -0.03033974952995777, -0.029366062954068184, -0.028392376378178596, -0.02741868793964386, -0.026445001363754272, -0.025471314787864685, -0.024497628211975098, -0.02352393977344036, -0.022550253197550774, -0.021576566621661186, -0.0206028800457716, -0.01962919346988201, -0.018655506893992424, -0.017681820318102837, -0.0167081318795681, -0.015734445303678513, -0.014760758727788925, -0.013787072151899338, -0.01281338557600975, -0.011839698068797588, -0.010866011492908001, -0.009892323985695839, -0.008918637409806252, -0.007944950833916664, -0.006971263326704502, -0.005997576750814915, -0.00502388970926404, -0.0040502045303583145, -0.00307651748880744, -0.0021028306800872087, -0.0011291438713669777, -0.00015545682981610298, 0.0008182302117347717, 0.0017919167876243591, 0.002765603829175234, 0.0037392908707261086, 0.004712977912276983, 0.005686664953827858, 0.006660351529717445, 0.00763403857126832, 0.008607725612819195, 0.009581412188708782, 0.010555099695920944, 0.011528786271810532, 0.012502472847700119, 0.013476160354912281, 0.014449846930801868, 0.01542353443801403, 0.016397221013903618, 0.017370907589793205, 0.018344594165682793, 0.01931828260421753, 0.020291969180107117, 0.021265655755996704, 0.02223934233188629, 0.023213030770421028, 0.024186717346310616, 0.025160403922200203, 0.02613409049808979, 0.027107777073979378]}, "gradients/decoder.transformer.h.19.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 2.0, 1.0, 2.0, 4.0, 5.0, 2.0, 6.0, 15.0, 13.0, 15.0, 14.0, 11.0, 17.0, 21.0, 28.0, 19.0, 31.0, 27.0, 36.0, 36.0, 38.0, 39.0, 39.0, 47.0, 48.0, 46.0, 41.0, 33.0, 47.0, 39.0, 33.0, 42.0, 26.0, 21.0, 31.0, 13.0, 14.0, 19.0, 23.0, 14.0, 13.0, 11.0, 10.0, 3.0, 7.0, 3.0, 2.0, 2.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 3.0, 1.0], "bins": [-0.0027774572372436523, -0.002688334323465824, -0.002599211409687996, -0.0025100884959101677, -0.0024209655821323395, -0.0023318426683545113, -0.002242719754576683, -0.002153596840798855, -0.0020644739270210266, -0.0019753510132431984, -0.0018862280994653702, -0.001797105185687542, -0.0017079822719097137, -0.0016188593581318855, -0.0015297364443540573, -0.001440613530576229, -0.0013514906167984009, -0.0012623677030205727, -0.0011732447892427444, -0.0010841218754649162, -0.000994998961687088, -0.0009058760479092598, -0.0008167531341314316, -0.0007276302203536034, -0.0006385073065757751, -0.0005493843927979469, -0.0004602614790201187, -0.0003711385652422905, -0.0002820156514644623, -0.00019289273768663406, -0.00010376982390880585, -1.464691013097763e-05, 7.447600364685059e-05, 0.0001635989174246788, 0.000252721831202507, 0.00034184474498033524, 0.00043096765875816345, 0.0005200905725359917, 0.0006092134863138199, 0.0006983364000916481, 0.0007874593138694763, 0.0008765822276473045, 0.0009657051414251328, 0.001054828055202961, 0.0011439509689807892, 0.0012330738827586174, 0.0013221967965364456, 0.0014113197103142738, 0.001500442624092102, 0.0015895655378699303, 0.0016786884516477585, 0.0017678113654255867, 0.001856934279203415, 0.0019460571929812431, 0.0020351801067590714, 0.0021243030205368996, 0.002213425934314728, 0.002302548848092556, 0.002391671761870384, 0.0024807946756482124, 0.0025699175894260406, 0.002659040503203869, 0.002748163416981697, 0.0028372863307595253, 0.0029264092445373535]}, "gradients/decoder.transformer.h.19.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 5.0, 1.0, 5.0, 1.0, 1.0, 4.0, 2.0, 4.0, 9.0, 10.0, 17.0, 11.0, 21.0, 17.0, 25.0, 26.0, 25.0, 26.0, 33.0, 37.0, 28.0, 42.0, 47.0, 41.0, 54.0, 41.0, 44.0, 49.0, 39.0, 34.0, 45.0, 38.0, 31.0, 29.0, 22.0, 31.0, 19.0, 21.0, 16.0, 19.0, 15.0, 12.0, 8.0, 5.0, 2.0, 3.0, 2.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-7.12890625, -6.90179443359375, -6.6746826171875, -6.44757080078125, -6.220458984375, -5.99334716796875, -5.7662353515625, -5.53912353515625, -5.31201171875, -5.08489990234375, -4.8577880859375, -4.63067626953125, -4.403564453125, -4.17645263671875, -3.9493408203125, -3.72222900390625, -3.4951171875, -3.26800537109375, -3.0408935546875, -2.81378173828125, -2.586669921875, -2.35955810546875, -2.1324462890625, -1.90533447265625, -1.67822265625, -1.45111083984375, -1.2239990234375, -0.99688720703125, -0.769775390625, -0.54266357421875, -0.3155517578125, -0.08843994140625, 0.138671875, 0.36578369140625, 0.5928955078125, 0.82000732421875, 1.047119140625, 1.27423095703125, 1.5013427734375, 1.72845458984375, 1.95556640625, 2.18267822265625, 2.4097900390625, 2.63690185546875, 2.864013671875, 3.09112548828125, 3.3182373046875, 3.54534912109375, 3.7724609375, 3.99957275390625, 4.2266845703125, 4.45379638671875, 4.680908203125, 4.90802001953125, 5.1351318359375, 5.36224365234375, 5.58935546875, 5.81646728515625, 6.0435791015625, 6.27069091796875, 6.497802734375, 6.72491455078125, 6.9520263671875, 7.17913818359375, 7.40625]}, "gradients/decoder.transformer.h.19.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 4.0, 11.0, 16.0, 15.0, 17.0, 22.0, 41.0, 47.0, 71.0, 103.0, 119.0, 185.0, 216.0, 312.0, 383.0, 537.0, 654.0, 919.0, 1300.0, 1907.0, 3176.0, 5488.0, 10844.0, 24223.0, 65177.0, 215318.0, 433600.0, 181612.0, 56227.0, 21580.0, 9909.0, 5071.0, 2916.0, 1788.0, 1233.0, 911.0, 601.0, 523.0, 387.0, 262.0, 226.0, 159.0, 126.0, 97.0, 77.0, 48.0, 30.0, 29.0, 16.0, 15.0, 5.0, 10.0, 3.0, 3.0, 3.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.86328125, -4.70855712890625, -4.5538330078125, -4.39910888671875, -4.244384765625, -4.08966064453125, -3.9349365234375, -3.78021240234375, -3.62548828125, -3.47076416015625, -3.3160400390625, -3.16131591796875, -3.006591796875, -2.85186767578125, -2.6971435546875, -2.54241943359375, -2.3876953125, -2.23297119140625, -2.0782470703125, -1.92352294921875, -1.768798828125, -1.61407470703125, -1.4593505859375, -1.30462646484375, -1.14990234375, -0.99517822265625, -0.8404541015625, -0.68572998046875, -0.531005859375, -0.37628173828125, -0.2215576171875, -0.06683349609375, 0.087890625, 0.24261474609375, 0.3973388671875, 0.55206298828125, 0.706787109375, 0.86151123046875, 1.0162353515625, 1.17095947265625, 1.32568359375, 1.48040771484375, 1.6351318359375, 1.78985595703125, 1.944580078125, 2.09930419921875, 2.2540283203125, 2.40875244140625, 2.5634765625, 2.71820068359375, 2.8729248046875, 3.02764892578125, 3.182373046875, 3.33709716796875, 3.4918212890625, 3.64654541015625, 3.80126953125, 3.95599365234375, 4.1107177734375, 4.26544189453125, 4.420166015625, 4.57489013671875, 4.7296142578125, 4.88433837890625, 5.0390625]}, "gradients/decoder.transformer.h.19.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 2.0, 3.0, 1.0, 4.0, 4.0, 4.0, 3.0, 8.0, 9.0, 12.0, 14.0, 11.0, 21.0, 16.0, 16.0, 17.0, 30.0, 19.0, 30.0, 32.0, 29.0, 47.0, 40.0, 41.0, 63.0, 117.0, 1590.0, 306.0, 122.0, 59.0, 54.0, 36.0, 47.0, 45.0, 25.0, 24.0, 26.0, 20.0, 14.0, 17.0, 7.0, 10.0, 14.0, 9.0, 8.0, 6.0, 6.0, 2.0, 10.0, 5.0, 1.0, 3.0, 2.0, 1.0, 2.0, 1.0, 2.0, 0.0, 1.0, 0.0, 2.0], "bins": [-21.640625, -20.916015625, -20.19140625, -19.466796875, -18.7421875, -18.017578125, -17.29296875, -16.568359375, -15.84375, -15.119140625, -14.39453125, -13.669921875, -12.9453125, -12.220703125, -11.49609375, -10.771484375, -10.046875, -9.322265625, -8.59765625, -7.873046875, -7.1484375, -6.423828125, -5.69921875, -4.974609375, -4.25, -3.525390625, -2.80078125, -2.076171875, -1.3515625, -0.626953125, 0.09765625, 0.822265625, 1.546875, 2.271484375, 2.99609375, 3.720703125, 4.4453125, 5.169921875, 5.89453125, 6.619140625, 7.34375, 8.068359375, 8.79296875, 9.517578125, 10.2421875, 10.966796875, 11.69140625, 12.416015625, 13.140625, 13.865234375, 14.58984375, 15.314453125, 16.0390625, 16.763671875, 17.48828125, 18.212890625, 18.9375, 19.662109375, 20.38671875, 21.111328125, 21.8359375, 22.560546875, 23.28515625, 24.009765625, 24.734375]}, "gradients/decoder.transformer.h.19.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 3.0, 1.0, 2.0, 2.0, 2.0, 2.0, 6.0, 8.0, 2.0, 5.0, 5.0, 10.0, 7.0, 13.0, 15.0, 8.0, 13.0, 24.0, 25.0, 39.0, 58.0, 64.0, 85.0, 127.0, 169.0, 316.0, 729.0, 3181.0, 52417.0, 3039508.0, 44375.0, 2819.0, 672.0, 310.0, 181.0, 112.0, 88.0, 71.0, 43.0, 40.0, 26.0, 21.0, 15.0, 20.0, 18.0, 11.0, 18.0, 8.0, 7.0, 4.0, 4.0, 4.0, 4.0, 1.0, 4.0, 1.0, 0.0, 1.0, 1.0], "bins": [-41.5, -40.29638671875, -39.0927734375, -37.88916015625, -36.685546875, -35.48193359375, -34.2783203125, -33.07470703125, -31.87109375, -30.66748046875, -29.4638671875, -28.26025390625, -27.056640625, -25.85302734375, -24.6494140625, -23.44580078125, -22.2421875, -21.03857421875, -19.8349609375, -18.63134765625, -17.427734375, -16.22412109375, -15.0205078125, -13.81689453125, -12.61328125, -11.40966796875, -10.2060546875, -9.00244140625, -7.798828125, -6.59521484375, -5.3916015625, -4.18798828125, -2.984375, -1.78076171875, -0.5771484375, 0.62646484375, 1.830078125, 3.03369140625, 4.2373046875, 5.44091796875, 6.64453125, 7.84814453125, 9.0517578125, 10.25537109375, 11.458984375, 12.66259765625, 13.8662109375, 15.06982421875, 16.2734375, 17.47705078125, 18.6806640625, 19.88427734375, 21.087890625, 22.29150390625, 23.4951171875, 24.69873046875, 25.90234375, 27.10595703125, 28.3095703125, 29.51318359375, 30.716796875, 31.92041015625, 33.1240234375, 34.32763671875, 35.53125]}, "gradients/decoder.transformer.h.19.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 9.0, 53.0, 205.0, 418.0, 262.0, 62.0, 4.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-64.32811737060547, -61.589595794677734, -58.851070404052734, -56.112548828125, -53.3740234375, -50.635501861572266, -47.89698028564453, -45.15845489501953, -42.4199333190918, -39.68141174316406, -36.94288635253906, -34.20436477661133, -31.46584129333496, -28.727317810058594, -25.98879623413086, -23.250272750854492, -20.511749267578125, -17.773225784301758, -15.034703254699707, -12.296180725097656, -9.557657241821289, -6.819133758544922, -4.0806121826171875, -1.3420886993408203, 1.3964347839355469, 4.134957790374756, 6.873480796813965, 9.612003326416016, 12.350526809692383, 15.08905029296875, 17.827571868896484, 20.56609535217285, 23.30461883544922, 26.043142318725586, 28.781665802001953, 31.520187377929688, 34.25871276855469, 36.99723434448242, 39.735755920410156, 42.474281311035156, 45.21280288696289, 47.951324462890625, 50.689849853515625, 53.42837142944336, 56.166893005371094, 58.905418395996094, 61.64393997192383, 64.38246154785156, 67.12098693847656, 69.85951232910156, 72.59803009033203, 75.33655548095703, 78.07508087158203, 80.8135986328125, 83.5521240234375, 86.2906494140625, 89.0291748046875, 91.7677001953125, 94.50621795654297, 97.24474334716797, 99.98326873779297, 102.72178649902344, 105.46031188964844, 108.19883728027344, 110.9373550415039]}, "gradients/decoder.transformer.h.19.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 1.0, 0.0, 2.0, 2.0, 2.0, 4.0, 1.0, 4.0, 3.0, 6.0, 7.0, 7.0, 17.0, 11.0, 14.0, 21.0, 21.0, 21.0, 29.0, 26.0, 29.0, 40.0, 35.0, 43.0, 42.0, 38.0, 41.0, 43.0, 37.0, 52.0, 38.0, 47.0, 37.0, 33.0, 26.0, 29.0, 35.0, 23.0, 24.0, 15.0, 22.0, 16.0, 12.0, 11.0, 12.0, 9.0, 4.0, 7.0, 8.0, 2.0, 2.0, 4.0, 2.0, 2.0, 1.0], "bins": [-92.89500427246094, -90.39706420898438, -87.89911651611328, -85.40117645263672, -82.90323638916016, -80.40528869628906, -77.9073486328125, -75.40940856933594, -72.91146850585938, -70.41352844238281, -67.91558074951172, -65.41764068603516, -62.919700622558594, -60.421756744384766, -57.92381286621094, -55.425872802734375, -52.92792510986328, -50.42998123168945, -47.93204116821289, -45.43409729003906, -42.9361572265625, -40.43821334838867, -37.940269470214844, -35.44232940673828, -32.94438552856445, -30.446443557739258, -27.948501586914062, -25.450557708740234, -22.95261573791504, -20.454673767089844, -17.956729888916016, -15.45878791809082, -12.960853576660156, -10.462911605834961, -7.964968681335449, -5.467026233673096, -2.969083786010742, -0.4711418151855469, 2.026801109313965, 4.524744033813477, 7.022686004638672, 9.520627975463867, 12.018570899963379, 14.51651382446289, 17.014455795288086, 19.51239776611328, 22.01034164428711, 24.508283615112305, 27.0062255859375, 29.504167556762695, 32.00210952758789, 34.50005340576172, 36.99799346923828, 39.49593734741211, 41.99388122558594, 44.4918212890625, 46.98976516723633, 49.487709045410156, 51.98564910888672, 54.48359298706055, 56.981536865234375, 59.47947692871094, 61.977420806884766, 64.4753646850586, 66.97330474853516]}, "gradients/decoder.transformer.h.18.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 4.0, 1.0, 2.0, 2.0, 2.0, 3.0, 3.0, 8.0, 12.0, 12.0, 10.0, 13.0, 27.0, 22.0, 26.0, 26.0, 30.0, 28.0, 24.0, 36.0, 42.0, 43.0, 46.0, 49.0, 46.0, 48.0, 47.0, 38.0, 36.0, 39.0, 34.0, 45.0, 28.0, 26.0, 27.0, 21.0, 20.0, 16.0, 19.0, 16.0, 15.0, 8.0, 4.0, 5.0, 0.0, 3.0, 3.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-7.6171875, -7.384521484375, -7.15185546875, -6.919189453125, -6.6865234375, -6.453857421875, -6.22119140625, -5.988525390625, -5.755859375, -5.523193359375, -5.29052734375, -5.057861328125, -4.8251953125, -4.592529296875, -4.35986328125, -4.127197265625, -3.89453125, -3.661865234375, -3.42919921875, -3.196533203125, -2.9638671875, -2.731201171875, -2.49853515625, -2.265869140625, -2.033203125, -1.800537109375, -1.56787109375, -1.335205078125, -1.1025390625, -0.869873046875, -0.63720703125, -0.404541015625, -0.171875, 0.060791015625, 0.29345703125, 0.526123046875, 0.7587890625, 0.991455078125, 1.22412109375, 1.456787109375, 1.689453125, 1.922119140625, 2.15478515625, 2.387451171875, 2.6201171875, 2.852783203125, 3.08544921875, 3.318115234375, 3.55078125, 3.783447265625, 4.01611328125, 4.248779296875, 4.4814453125, 4.714111328125, 4.94677734375, 5.179443359375, 5.412109375, 5.644775390625, 5.87744140625, 6.110107421875, 6.3427734375, 6.575439453125, 6.80810546875, 7.040771484375, 7.2734375]}, "gradients/decoder.transformer.h.18.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 2.0, 3.0, 3.0, 4.0, 5.0, 5.0, 4.0, 9.0, 15.0, 21.0, 18.0, 28.0, 19.0, 37.0, 60.0, 82.0, 168.0, 345.0, 880.0, 3064.0, 13595.0, 82454.0, 1033398.0, 2737716.0, 279121.0, 33819.0, 6527.0, 1715.0, 542.0, 252.0, 92.0, 81.0, 45.0, 40.0, 34.0, 14.0, 22.0, 16.0, 11.0, 6.0, 8.0, 9.0, 2.0, 3.0, 2.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-22.953125, -22.24169921875, -21.5302734375, -20.81884765625, -20.107421875, -19.39599609375, -18.6845703125, -17.97314453125, -17.26171875, -16.55029296875, -15.8388671875, -15.12744140625, -14.416015625, -13.70458984375, -12.9931640625, -12.28173828125, -11.5703125, -10.85888671875, -10.1474609375, -9.43603515625, -8.724609375, -8.01318359375, -7.3017578125, -6.59033203125, -5.87890625, -5.16748046875, -4.4560546875, -3.74462890625, -3.033203125, -2.32177734375, -1.6103515625, -0.89892578125, -0.1875, 0.52392578125, 1.2353515625, 1.94677734375, 2.658203125, 3.36962890625, 4.0810546875, 4.79248046875, 5.50390625, 6.21533203125, 6.9267578125, 7.63818359375, 8.349609375, 9.06103515625, 9.7724609375, 10.48388671875, 11.1953125, 11.90673828125, 12.6181640625, 13.32958984375, 14.041015625, 14.75244140625, 15.4638671875, 16.17529296875, 16.88671875, 17.59814453125, 18.3095703125, 19.02099609375, 19.732421875, 20.44384765625, 21.1552734375, 21.86669921875, 22.578125]}, "gradients/decoder.transformer.h.18.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 0.0, 9.0, 6.0, 4.0, 7.0, 6.0, 16.0, 43.0, 32.0, 68.0, 101.0, 144.0, 233.0, 311.0, 478.0, 609.0, 602.0, 453.0, 278.0, 224.0, 172.0, 99.0, 79.0, 36.0, 23.0, 21.0, 13.0, 4.0, 5.0, 4.0, 4.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-33.40625, -32.568603515625, -31.73095703125, -30.893310546875, -30.0556640625, -29.218017578125, -28.38037109375, -27.542724609375, -26.705078125, -25.867431640625, -25.02978515625, -24.192138671875, -23.3544921875, -22.516845703125, -21.67919921875, -20.841552734375, -20.00390625, -19.166259765625, -18.32861328125, -17.490966796875, -16.6533203125, -15.815673828125, -14.97802734375, -14.140380859375, -13.302734375, -12.465087890625, -11.62744140625, -10.789794921875, -9.9521484375, -9.114501953125, -8.27685546875, -7.439208984375, -6.6015625, -5.763916015625, -4.92626953125, -4.088623046875, -3.2509765625, -2.413330078125, -1.57568359375, -0.738037109375, 0.099609375, 0.937255859375, 1.77490234375, 2.612548828125, 3.4501953125, 4.287841796875, 5.12548828125, 5.963134765625, 6.80078125, 7.638427734375, 8.47607421875, 9.313720703125, 10.1513671875, 10.989013671875, 11.82666015625, 12.664306640625, 13.501953125, 14.339599609375, 15.17724609375, 16.014892578125, 16.8525390625, 17.690185546875, 18.52783203125, 19.365478515625, 20.203125]}, "gradients/decoder.transformer.h.18.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 2.0, 1.0, 2.0, 1.0, 5.0, 1.0, 3.0, 3.0, 4.0, 8.0, 18.0, 17.0, 23.0, 37.0, 44.0, 64.0, 92.0, 159.0, 235.0, 427.0, 847.0, 3156.0, 33052.0, 1442390.0, 2654526.0, 52888.0, 4088.0, 1024.0, 452.0, 236.0, 154.0, 96.0, 68.0, 56.0, 36.0, 22.0, 17.0, 7.0, 12.0, 9.0, 3.0, 2.0, 4.0, 1.0, 1.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-52.65625, -50.78076171875, -48.9052734375, -47.02978515625, -45.154296875, -43.27880859375, -41.4033203125, -39.52783203125, -37.65234375, -35.77685546875, -33.9013671875, -32.02587890625, -30.150390625, -28.27490234375, -26.3994140625, -24.52392578125, -22.6484375, -20.77294921875, -18.8974609375, -17.02197265625, -15.146484375, -13.27099609375, -11.3955078125, -9.52001953125, -7.64453125, -5.76904296875, -3.8935546875, -2.01806640625, -0.142578125, 1.73291015625, 3.6083984375, 5.48388671875, 7.359375, 9.23486328125, 11.1103515625, 12.98583984375, 14.861328125, 16.73681640625, 18.6123046875, 20.48779296875, 22.36328125, 24.23876953125, 26.1142578125, 27.98974609375, 29.865234375, 31.74072265625, 33.6162109375, 35.49169921875, 37.3671875, 39.24267578125, 41.1181640625, 42.99365234375, 44.869140625, 46.74462890625, 48.6201171875, 50.49560546875, 52.37109375, 54.24658203125, 56.1220703125, 57.99755859375, 59.873046875, 61.74853515625, 63.6240234375, 65.49951171875, 67.375]}, "gradients/decoder.transformer.h.18.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 0.0, 2.0, 2.0, 2.0, 9.0, 12.0, 24.0, 81.0, 123.0, 200.0, 211.0, 164.0, 102.0, 52.0, 19.0, 11.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-77.49533081054688, -72.02521514892578, -66.55509185791016, -61.08497619628906, -55.61486053466797, -50.14474105834961, -44.67462158203125, -39.204505920410156, -33.7343864440918, -28.26426887512207, -22.794151306152344, -17.324031829833984, -11.853914260864258, -6.383796691894531, -0.9136772155761719, 4.556438446044922, 10.026557922363281, 15.496675491333008, 20.966793060302734, 26.436912536621094, 31.90703010559082, 37.37714767456055, 42.847267150878906, 48.3173828125, 53.78750228881836, 59.25762176513672, 64.72773742675781, 70.19786071777344, 75.66797637939453, 81.13809204101562, 86.60821533203125, 92.07832336425781, 97.54844665527344, 103.01856231689453, 108.48868560791016, 113.95880126953125, 119.42891693115234, 124.89903259277344, 130.36915588378906, 135.83926391601562, 141.30938720703125, 146.77951049804688, 152.24961853027344, 157.71974182128906, 163.1898651123047, 168.65997314453125, 174.13009643554688, 179.6002197265625, 185.07034301757812, 190.54046630859375, 196.0105743408203, 201.48069763183594, 206.95082092285156, 212.42092895507812, 217.89105224609375, 223.36117553710938, 228.83128356933594, 234.30140686035156, 239.77151489257812, 245.24163818359375, 250.71176147460938, 256.181884765625, 261.6519775390625, 267.1221008300781, 272.59222412109375]}, "gradients/decoder.transformer.h.18.ln_2.bias": {"_type": "histogram", "values": [1.0, 2.0, 1.0, 4.0, 0.0, 1.0, 2.0, 2.0, 5.0, 3.0, 2.0, 2.0, 8.0, 2.0, 5.0, 10.0, 13.0, 12.0, 16.0, 14.0, 25.0, 22.0, 27.0, 29.0, 22.0, 42.0, 32.0, 24.0, 34.0, 43.0, 35.0, 36.0, 40.0, 30.0, 39.0, 43.0, 41.0, 46.0, 37.0, 41.0, 37.0, 28.0, 22.0, 17.0, 17.0, 19.0, 10.0, 14.0, 7.0, 7.0, 11.0, 5.0, 4.0, 7.0, 3.0, 7.0, 5.0, 2.0, 2.0, 4.0, 0.0, 2.0, 0.0, 1.0], "bins": [-66.65666961669922, -64.62848663330078, -62.600303649902344, -60.57212448120117, -58.543941497802734, -56.5157585144043, -54.487579345703125, -52.45939636230469, -50.43121337890625, -48.40303039550781, -46.374847412109375, -44.3466682434082, -42.318485260009766, -40.29030227661133, -38.262123107910156, -36.23394012451172, -34.20575714111328, -32.177574157714844, -30.14939308166504, -28.121212005615234, -26.093029022216797, -24.06484603881836, -22.036664962768555, -20.00848388671875, -17.980300903320312, -15.952118873596191, -13.92393684387207, -11.89575481414795, -9.867572784423828, -7.839390754699707, -5.811208724975586, -3.783026695251465, -1.7548370361328125, 0.2733449935913086, 2.3015270233154297, 4.329709053039551, 6.357891082763672, 8.386073112487793, 10.414255142211914, 12.442437171936035, 14.470619201660156, 16.498802185058594, 18.5269832611084, 20.555164337158203, 22.58334732055664, 24.611530303955078, 26.639711380004883, 28.667892456054688, 30.696075439453125, 32.72425842285156, 34.75244140625, 36.78062057495117, 38.80880355834961, 40.83698654174805, 42.86516571044922, 44.893348693847656, 46.921531677246094, 48.94971466064453, 50.97789764404297, 53.00607681274414, 55.03425979614258, 57.062442779541016, 59.09062194824219, 61.118804931640625, 63.14698791503906]}, "gradients/decoder.transformer.h.18.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 3.0, 2.0, 4.0, 5.0, 4.0, 5.0, 8.0, 6.0, 14.0, 13.0, 17.0, 13.0, 18.0, 18.0, 28.0, 37.0, 34.0, 33.0, 53.0, 39.0, 35.0, 41.0, 42.0, 47.0, 35.0, 38.0, 51.0, 40.0, 40.0, 35.0, 41.0, 31.0, 21.0, 31.0, 26.0, 22.0, 20.0, 18.0, 14.0, 7.0, 11.0, 4.0, 3.0, 2.0, 2.0, 2.0, 2.0, 0.0, 1.0, 2.0, 1.0, 0.0, 0.0, 1.0], "bins": [-8.21875, -7.9754638671875, -7.732177734375, -7.4888916015625, -7.24560546875, -7.0023193359375, -6.759033203125, -6.5157470703125, -6.2724609375, -6.0291748046875, -5.785888671875, -5.5426025390625, -5.29931640625, -5.0560302734375, -4.812744140625, -4.5694580078125, -4.326171875, -4.0828857421875, -3.839599609375, -3.5963134765625, -3.35302734375, -3.1097412109375, -2.866455078125, -2.6231689453125, -2.3798828125, -2.1365966796875, -1.893310546875, -1.6500244140625, -1.40673828125, -1.1634521484375, -0.920166015625, -0.6768798828125, -0.43359375, -0.1903076171875, 0.052978515625, 0.2962646484375, 0.53955078125, 0.7828369140625, 1.026123046875, 1.2694091796875, 1.5126953125, 1.7559814453125, 1.999267578125, 2.2425537109375, 2.48583984375, 2.7291259765625, 2.972412109375, 3.2156982421875, 3.458984375, 3.7022705078125, 3.945556640625, 4.1888427734375, 4.43212890625, 4.6754150390625, 4.918701171875, 5.1619873046875, 5.4052734375, 5.6485595703125, 5.891845703125, 6.1351318359375, 6.37841796875, 6.6217041015625, 6.864990234375, 7.1082763671875, 7.3515625]}, "gradients/decoder.transformer.h.18.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 3.0, 5.0, 3.0, 5.0, 7.0, 13.0, 22.0, 29.0, 50.0, 73.0, 98.0, 138.0, 247.0, 293.0, 490.0, 717.0, 1041.0, 1540.0, 2235.0, 3302.0, 4867.0, 7267.0, 10847.0, 16722.0, 25282.0, 39954.0, 63906.0, 101821.0, 157668.0, 191560.0, 150813.0, 97050.0, 60457.0, 37655.0, 24601.0, 15854.0, 10466.0, 6833.0, 4804.0, 3116.0, 2137.0, 1439.0, 1011.0, 671.0, 447.0, 308.0, 259.0, 140.0, 105.0, 65.0, 50.0, 27.0, 22.0, 17.0, 14.0, 5.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.72021484375, -0.6973114013671875, -0.674407958984375, -0.6515045166015625, -0.62860107421875, -0.6056976318359375, -0.582794189453125, -0.5598907470703125, -0.5369873046875, -0.5140838623046875, -0.491180419921875, -0.4682769775390625, -0.44537353515625, -0.4224700927734375, -0.399566650390625, -0.3766632080078125, -0.353759765625, -0.3308563232421875, -0.307952880859375, -0.2850494384765625, -0.26214599609375, -0.2392425537109375, -0.216339111328125, -0.1934356689453125, -0.1705322265625, -0.1476287841796875, -0.124725341796875, -0.1018218994140625, -0.07891845703125, -0.0560150146484375, -0.033111572265625, -0.0102081298828125, 0.0126953125, 0.0355987548828125, 0.058502197265625, 0.0814056396484375, 0.10430908203125, 0.1272125244140625, 0.150115966796875, 0.1730194091796875, 0.1959228515625, 0.2188262939453125, 0.241729736328125, 0.2646331787109375, 0.28753662109375, 0.3104400634765625, 0.333343505859375, 0.3562469482421875, 0.379150390625, 0.4020538330078125, 0.424957275390625, 0.4478607177734375, 0.47076416015625, 0.4936676025390625, 0.516571044921875, 0.5394744873046875, 0.5623779296875, 0.5852813720703125, 0.608184814453125, 0.6310882568359375, 0.65399169921875, 0.6768951416015625, 0.699798583984375, 0.7227020263671875, 0.74560546875]}, "gradients/decoder.transformer.h.18.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 2.0, 0.0, 2.0, 2.0, 3.0, 5.0, 2.0, 8.0, 5.0, 3.0, 12.0, 17.0, 13.0, 5.0, 18.0, 18.0, 13.0, 22.0, 20.0, 35.0, 29.0, 20.0, 24.0, 30.0, 38.0, 40.0, 24.0, 32.0, 36.0, 1061.0, 35.0, 38.0, 36.0, 32.0, 34.0, 32.0, 32.0, 25.0, 26.0, 24.0, 25.0, 16.0, 27.0, 21.0, 16.0, 12.0, 11.0, 9.0, 10.0, 6.0, 8.0, 7.0, 4.0, 3.0, 6.0, 0.0, 4.0, 2.0, 3.0, 0.0, 3.0], "bins": [-3.87890625, -3.7586669921875, -3.638427734375, -3.5181884765625, -3.39794921875, -3.2777099609375, -3.157470703125, -3.0372314453125, -2.9169921875, -2.7967529296875, -2.676513671875, -2.5562744140625, -2.43603515625, -2.3157958984375, -2.195556640625, -2.0753173828125, -1.955078125, -1.8348388671875, -1.714599609375, -1.5943603515625, -1.47412109375, -1.3538818359375, -1.233642578125, -1.1134033203125, -0.9931640625, -0.8729248046875, -0.752685546875, -0.6324462890625, -0.51220703125, -0.3919677734375, -0.271728515625, -0.1514892578125, -0.03125, 0.0889892578125, 0.209228515625, 0.3294677734375, 0.44970703125, 0.5699462890625, 0.690185546875, 0.8104248046875, 0.9306640625, 1.0509033203125, 1.171142578125, 1.2913818359375, 1.41162109375, 1.5318603515625, 1.652099609375, 1.7723388671875, 1.892578125, 2.0128173828125, 2.133056640625, 2.2532958984375, 2.37353515625, 2.4937744140625, 2.614013671875, 2.7342529296875, 2.8544921875, 2.9747314453125, 3.094970703125, 3.2152099609375, 3.33544921875, 3.4556884765625, 3.575927734375, 3.6961669921875, 3.81640625]}, "gradients/decoder.transformer.h.18.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 3.0, 2.0, 3.0, 8.0, 6.0, 8.0, 20.0, 32.0, 35.0, 54.0, 92.0, 123.0, 191.0, 267.0, 424.0, 612.0, 833.0, 1288.0, 2057.0, 2995.0, 4609.0, 7050.0, 11078.0, 16974.0, 26217.0, 40987.0, 63878.0, 98141.0, 139641.0, 1211834.0, 153716.0, 109753.0, 72951.0, 46870.0, 29834.0, 19130.0, 11984.0, 8017.0, 5165.0, 3343.0, 2243.0, 1490.0, 1006.0, 681.0, 472.0, 339.0, 220.0, 127.0, 121.0, 72.0, 43.0, 32.0, 24.0, 22.0, 9.0, 10.0, 8.0, 4.0, 1.0, 0.0, 2.0], "bins": [-0.515625, -0.4998283386230469, -0.48403167724609375, -0.4682350158691406, -0.4524383544921875, -0.4366416931152344, -0.42084503173828125, -0.4050483703613281, -0.389251708984375, -0.3734550476074219, -0.35765838623046875, -0.3418617248535156, -0.3260650634765625, -0.3102684020996094, -0.29447174072265625, -0.2786750793457031, -0.26287841796875, -0.24708175659179688, -0.23128509521484375, -0.21548843383789062, -0.1996917724609375, -0.18389511108398438, -0.16809844970703125, -0.15230178833007812, -0.136505126953125, -0.12070846557617188, -0.10491180419921875, -0.08911514282226562, -0.0733184814453125, -0.057521820068359375, -0.04172515869140625, -0.025928497314453125, -0.0101318359375, 0.005664825439453125, 0.02146148681640625, 0.037258148193359375, 0.0530548095703125, 0.06885147094726562, 0.08464813232421875, 0.10044479370117188, 0.116241455078125, 0.13203811645507812, 0.14783477783203125, 0.16363143920898438, 0.1794281005859375, 0.19522476196289062, 0.21102142333984375, 0.22681808471679688, 0.24261474609375, 0.2584114074707031, 0.27420806884765625, 0.2900047302246094, 0.3058013916015625, 0.3215980529785156, 0.33739471435546875, 0.3531913757324219, 0.368988037109375, 0.3847846984863281, 0.40058135986328125, 0.4163780212402344, 0.4321746826171875, 0.4479713439941406, 0.46376800537109375, 0.4795646667480469, 0.495361328125]}, "gradients/decoder.transformer.h.18.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 1.0, 3.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 2.0, 0.0, 2.0, 2.0, 2.0, 4.0, 4.0, 2.0, 11.0, 5.0, 8.0, 12.0, 32.0, 39.0, 65.0, 89.0, 92.0, 126.0, 136.0, 101.0, 77.0, 57.0, 30.0, 23.0, 22.0, 17.0, 11.0, 7.0, 2.0, 5.0, 1.0, 6.0, 4.0, 6.0, 0.0, 4.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.010711669921875, -0.010397553443908691, -0.010083436965942383, -0.009769320487976074, -0.009455204010009766, -0.009141087532043457, -0.008826971054077148, -0.00851285457611084, -0.008198738098144531, -0.007884621620178223, -0.007570505142211914, -0.0072563886642456055, -0.006942272186279297, -0.006628155708312988, -0.00631403923034668, -0.005999922752380371, -0.0056858062744140625, -0.005371689796447754, -0.005057573318481445, -0.004743456840515137, -0.004429340362548828, -0.0041152238845825195, -0.003801107406616211, -0.0034869909286499023, -0.0031728744506835938, -0.002858757972717285, -0.0025446414947509766, -0.002230525016784668, -0.0019164085388183594, -0.0016022920608520508, -0.0012881755828857422, -0.0009740591049194336, -0.000659942626953125, -0.0003458261489868164, -3.170967102050781e-05, 0.0002824068069458008, 0.0005965232849121094, 0.000910639762878418, 0.0012247562408447266, 0.0015388727188110352, 0.0018529891967773438, 0.0021671056747436523, 0.002481222152709961, 0.0027953386306762695, 0.003109455108642578, 0.0034235715866088867, 0.0037376880645751953, 0.004051804542541504, 0.0043659210205078125, 0.004680037498474121, 0.00499415397644043, 0.005308270454406738, 0.005622386932373047, 0.0059365034103393555, 0.006250619888305664, 0.006564736366271973, 0.006878852844238281, 0.00719296932220459, 0.0075070858001708984, 0.007821202278137207, 0.008135318756103516, 0.008449435234069824, 0.008763551712036133, 0.009077668190002441, 0.00939178466796875]}, "gradients/decoder.transformer.h.18.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 2.0, 3.0, 2.0, 1.0, 7.0, 5.0, 2.0, 6.0, 6.0, 18.0, 13.0, 22.0, 22.0, 56.0, 98.0, 162.0, 473.0, 4612.0, 1040739.0, 1545.0, 356.0, 167.0, 90.0, 68.0, 31.0, 19.0, 10.0, 5.0, 5.0, 3.0, 2.0, 2.0, 1.0, 4.0, 2.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.219482421875, -0.21226119995117188, -0.20503997802734375, -0.19781875610351562, -0.1905975341796875, -0.18337631225585938, -0.17615509033203125, -0.16893386840820312, -0.161712646484375, -0.15449142456054688, -0.14727020263671875, -0.14004898071289062, -0.1328277587890625, -0.12560653686523438, -0.11838531494140625, -0.11116409301757812, -0.10394287109375, -0.09672164916992188, -0.08950042724609375, -0.08227920532226562, -0.0750579833984375, -0.06783676147460938, -0.06061553955078125, -0.053394317626953125, -0.046173095703125, -0.038951873779296875, -0.03173065185546875, -0.024509429931640625, -0.0172882080078125, -0.010066986083984375, -0.00284576416015625, 0.004375457763671875, 0.0115966796875, 0.018817901611328125, 0.02603912353515625, 0.033260345458984375, 0.0404815673828125, 0.047702789306640625, 0.05492401123046875, 0.062145233154296875, 0.069366455078125, 0.07658767700195312, 0.08380889892578125, 0.09103012084960938, 0.0982513427734375, 0.10547256469726562, 0.11269378662109375, 0.11991500854492188, 0.12713623046875, 0.13435745239257812, 0.14157867431640625, 0.14879989624023438, 0.1560211181640625, 0.16324234008789062, 0.17046356201171875, 0.17768478393554688, 0.184906005859375, 0.19212722778320312, 0.19934844970703125, 0.20656967163085938, 0.2137908935546875, 0.22101211547851562, 0.22823333740234375, 0.23545455932617188, 0.24267578125]}, "gradients/decoder.transformer.h.18.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 4.0, 14.0, 65.0, 202.0, 339.0, 255.0, 105.0, 19.0, 6.0, 3.0, 1.0, 0.0, 2.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0027733028400689363, -0.0024084763135761023, -0.002043650019913912, -0.0016788234934210777, -0.0013139970833435655, -0.0009491706732660532, -0.0005843441467732191, -0.00021951785311102867, 0.00014530867338180542, 0.0005101350834593177, 0.0008749615517444909, 0.001239788020029664, 0.0016046144301071763, 0.0019694408401846886, 0.0023342673666775227, 0.002699093660339713, 0.003063920186832547, 0.0034287467133253813, 0.0037935730069875717, 0.004158399533480406, 0.00452322605997324, 0.004888052120804787, 0.005252879112958908, 0.005617705173790455, 0.005982531700283289, 0.006347358226776123, 0.006712184753268957, 0.007077011279761791, 0.007441837340593338, 0.007806663867086172, 0.008171490393579006, 0.008536316454410553, 0.0089011425152421, 0.009265968576073647, 0.009630795568227768, 0.009995621629059315, 0.010360448621213436, 0.010725274682044983, 0.011090101674199104, 0.011454927735030651, 0.011819753795862198, 0.012184579856693745, 0.012549406848847866, 0.012914232909679413, 0.013279059901833534, 0.013643885962665081, 0.014008712023496628, 0.01437353901565075, 0.01473836600780487, 0.015103192068636417, 0.015468019060790539, 0.015832845121622086, 0.016197672113776207, 0.01656249910593033, 0.0169273242354393, 0.017292151227593422, 0.017656976357102394, 0.018021803349256516, 0.018386628478765488, 0.01875145547091961, 0.01911628246307373, 0.019481107592582703, 0.019845934584736824, 0.020210761576890945, 0.020575588569045067]}, "gradients/decoder.transformer.h.18.ln_cross_attn.bias": {"_type": "histogram", "values": [7.0, 2.0, 0.0, 2.0, 1.0, 0.0, 2.0, 3.0, 0.0, 4.0, 6.0, 4.0, 11.0, 8.0, 9.0, 8.0, 14.0, 10.0, 18.0, 20.0, 20.0, 13.0, 30.0, 30.0, 31.0, 45.0, 33.0, 41.0, 32.0, 47.0, 27.0, 39.0, 47.0, 33.0, 34.0, 29.0, 38.0, 38.0, 31.0, 42.0, 21.0, 19.0, 25.0, 17.0, 21.0, 17.0, 19.0, 12.0, 14.0, 13.0, 6.0, 6.0, 3.0, 4.0, 5.0, 2.0, 3.0, 2.0, 1.0, 1.0, 2.0, 1.0, 0.0, 1.0], "bins": [-0.0037806034088134766, -0.0036630742251873016, -0.0035455450415611267, -0.0034280158579349518, -0.003310486674308777, -0.003192957490682602, -0.003075428307056427, -0.002957899123430252, -0.002840369939804077, -0.0027228407561779022, -0.0026053115725517273, -0.0024877823889255524, -0.0023702532052993774, -0.0022527240216732025, -0.0021351948380470276, -0.0020176656544208527, -0.0019001364707946777, -0.0017826072871685028, -0.0016650781035423279, -0.001547548919916153, -0.001430019736289978, -0.001312490552663803, -0.0011949613690376282, -0.0010774321854114532, -0.0009599030017852783, -0.0008423738181591034, -0.0007248446345329285, -0.0006073154509067535, -0.0004897862672805786, -0.0003722570836544037, -0.00025472790002822876, -0.00013719871640205383, -1.9669532775878906e-05, 9.785965085029602e-05, 0.00021538883447647095, 0.0003329180181026459, 0.0004504472017288208, 0.0005679763853549957, 0.0006855055689811707, 0.0008030347526073456, 0.0009205639362335205, 0.0010380931198596954, 0.0011556223034858704, 0.0012731514871120453, 0.0013906806707382202, 0.0015082098543643951, 0.00162573903799057, 0.001743268221616745, 0.00186079740524292, 0.001978326588869095, 0.0020958557724952698, 0.0022133849561214447, 0.0023309141397476196, 0.0024484433233737946, 0.0025659725069999695, 0.0026835016906261444, 0.0028010308742523193, 0.0029185600578784943, 0.003036089241504669, 0.003153618425130844, 0.003271147608757019, 0.003388676792383194, 0.003506205976009369, 0.003623735159635544, 0.0037412643432617188]}, "gradients/decoder.transformer.h.18.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 3.0, 2.0, 4.0, 5.0, 4.0, 5.0, 8.0, 6.0, 14.0, 13.0, 17.0, 13.0, 18.0, 18.0, 28.0, 37.0, 34.0, 33.0, 53.0, 39.0, 35.0, 41.0, 42.0, 47.0, 35.0, 38.0, 51.0, 40.0, 40.0, 35.0, 41.0, 31.0, 21.0, 31.0, 26.0, 22.0, 20.0, 18.0, 14.0, 7.0, 11.0, 4.0, 3.0, 2.0, 2.0, 2.0, 2.0, 0.0, 1.0, 2.0, 1.0, 0.0, 0.0, 1.0], "bins": [-8.21875, -7.9754638671875, -7.732177734375, -7.4888916015625, -7.24560546875, -7.0023193359375, -6.759033203125, -6.5157470703125, -6.2724609375, -6.0291748046875, -5.785888671875, -5.5426025390625, -5.29931640625, -5.0560302734375, -4.812744140625, -4.5694580078125, -4.326171875, -4.0828857421875, -3.839599609375, -3.5963134765625, -3.35302734375, -3.1097412109375, -2.866455078125, -2.6231689453125, -2.3798828125, -2.1365966796875, -1.893310546875, -1.6500244140625, -1.40673828125, -1.1634521484375, -0.920166015625, -0.6768798828125, -0.43359375, -0.1903076171875, 0.052978515625, 0.2962646484375, 0.53955078125, 0.7828369140625, 1.026123046875, 1.2694091796875, 1.5126953125, 1.7559814453125, 1.999267578125, 2.2425537109375, 2.48583984375, 2.7291259765625, 2.972412109375, 3.2156982421875, 3.458984375, 3.7022705078125, 3.945556640625, 4.1888427734375, 4.43212890625, 4.6754150390625, 4.918701171875, 5.1619873046875, 5.4052734375, 5.6485595703125, 5.891845703125, 6.1351318359375, 6.37841796875, 6.6217041015625, 6.864990234375, 7.1082763671875, 7.3515625]}, "gradients/decoder.transformer.h.18.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 2.0, 0.0, 2.0, 3.0, 2.0, 3.0, 4.0, 1.0, 2.0, 10.0, 4.0, 10.0, 14.0, 13.0, 36.0, 35.0, 53.0, 67.0, 88.0, 135.0, 219.0, 348.0, 609.0, 949.0, 1506.0, 2740.0, 4801.0, 8982.0, 18507.0, 50851.0, 228677.0, 563860.0, 106434.0, 30748.0, 12989.0, 6798.0, 3711.0, 2071.0, 1223.0, 711.0, 435.0, 291.0, 186.0, 123.0, 87.0, 71.0, 48.0, 29.0, 16.0, 16.0, 13.0, 12.0, 7.0, 3.0, 5.0, 7.0, 1.0, 2.0, 0.0, 2.0, 2.0], "bins": [-9.71875, -9.4351806640625, -9.151611328125, -8.8680419921875, -8.58447265625, -8.3009033203125, -8.017333984375, -7.7337646484375, -7.4501953125, -7.1666259765625, -6.883056640625, -6.5994873046875, -6.31591796875, -6.0323486328125, -5.748779296875, -5.4652099609375, -5.181640625, -4.8980712890625, -4.614501953125, -4.3309326171875, -4.04736328125, -3.7637939453125, -3.480224609375, -3.1966552734375, -2.9130859375, -2.6295166015625, -2.345947265625, -2.0623779296875, -1.77880859375, -1.4952392578125, -1.211669921875, -0.9281005859375, -0.64453125, -0.3609619140625, -0.077392578125, 0.2061767578125, 0.48974609375, 0.7733154296875, 1.056884765625, 1.3404541015625, 1.6240234375, 1.9075927734375, 2.191162109375, 2.4747314453125, 2.75830078125, 3.0418701171875, 3.325439453125, 3.6090087890625, 3.892578125, 4.1761474609375, 4.459716796875, 4.7432861328125, 5.02685546875, 5.3104248046875, 5.593994140625, 5.8775634765625, 6.1611328125, 6.4447021484375, 6.728271484375, 7.0118408203125, 7.29541015625, 7.5789794921875, 7.862548828125, 8.1461181640625, 8.4296875]}, "gradients/decoder.transformer.h.18.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 2.0, 2.0, 3.0, 5.0, 9.0, 7.0, 9.0, 5.0, 8.0, 14.0, 11.0, 14.0, 12.0, 17.0, 16.0, 32.0, 28.0, 21.0, 30.0, 32.0, 38.0, 41.0, 41.0, 69.0, 119.0, 1513.0, 404.0, 106.0, 69.0, 47.0, 35.0, 50.0, 20.0, 31.0, 25.0, 20.0, 17.0, 20.0, 21.0, 25.0, 18.0, 14.0, 7.0, 9.0, 6.0, 2.0, 4.0, 4.0, 4.0, 2.0, 1.0, 0.0, 5.0, 1.0, 0.0, 1.0], "bins": [-25.96875, -25.203857421875, -24.43896484375, -23.674072265625, -22.9091796875, -22.144287109375, -21.37939453125, -20.614501953125, -19.849609375, -19.084716796875, -18.31982421875, -17.554931640625, -16.7900390625, -16.025146484375, -15.26025390625, -14.495361328125, -13.73046875, -12.965576171875, -12.20068359375, -11.435791015625, -10.6708984375, -9.906005859375, -9.14111328125, -8.376220703125, -7.611328125, -6.846435546875, -6.08154296875, -5.316650390625, -4.5517578125, -3.786865234375, -3.02197265625, -2.257080078125, -1.4921875, -0.727294921875, 0.03759765625, 0.802490234375, 1.5673828125, 2.332275390625, 3.09716796875, 3.862060546875, 4.626953125, 5.391845703125, 6.15673828125, 6.921630859375, 7.6865234375, 8.451416015625, 9.21630859375, 9.981201171875, 10.74609375, 11.510986328125, 12.27587890625, 13.040771484375, 13.8056640625, 14.570556640625, 15.33544921875, 16.100341796875, 16.865234375, 17.630126953125, 18.39501953125, 19.159912109375, 19.9248046875, 20.689697265625, 21.45458984375, 22.219482421875, 22.984375]}, "gradients/decoder.transformer.h.18.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 3.0, 4.0, 9.0, 8.0, 8.0, 3.0, 15.0, 17.0, 30.0, 31.0, 39.0, 42.0, 90.0, 116.0, 219.0, 380.0, 1055.0, 12153.0, 2995597.0, 131145.0, 3330.0, 580.0, 274.0, 163.0, 105.0, 84.0, 54.0, 38.0, 29.0, 19.0, 18.0, 10.0, 9.0, 12.0, 11.0, 2.0, 2.0, 3.0, 5.0, 1.0, 2.0, 0.0, 2.0, 0.0, 2.0, 1.0], "bins": [-71.0625, -69.1298828125, -67.197265625, -65.2646484375, -63.33203125, -61.3994140625, -59.466796875, -57.5341796875, -55.6015625, -53.6689453125, -51.736328125, -49.8037109375, -47.87109375, -45.9384765625, -44.005859375, -42.0732421875, -40.140625, -38.2080078125, -36.275390625, -34.3427734375, -32.41015625, -30.4775390625, -28.544921875, -26.6123046875, -24.6796875, -22.7470703125, -20.814453125, -18.8818359375, -16.94921875, -15.0166015625, -13.083984375, -11.1513671875, -9.21875, -7.2861328125, -5.353515625, -3.4208984375, -1.48828125, 0.4443359375, 2.376953125, 4.3095703125, 6.2421875, 8.1748046875, 10.107421875, 12.0400390625, 13.97265625, 15.9052734375, 17.837890625, 19.7705078125, 21.703125, 23.6357421875, 25.568359375, 27.5009765625, 29.43359375, 31.3662109375, 33.298828125, 35.2314453125, 37.1640625, 39.0966796875, 41.029296875, 42.9619140625, 44.89453125, 46.8271484375, 48.759765625, 50.6923828125, 52.625]}, "gradients/decoder.transformer.h.18.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 9.0, 34.0, 117.0, 264.0, 316.0, 193.0, 58.0, 17.0, 5.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-74.61109924316406, -72.0590591430664, -69.50702667236328, -66.95498657226562, -64.4029541015625, -61.850914001464844, -59.29887771606445, -56.74684143066406, -54.194801330566406, -51.642765045166016, -49.090728759765625, -46.53868865966797, -43.98665237426758, -41.43461608886719, -38.8825798034668, -36.330543518066406, -33.778507232666016, -31.226470947265625, -28.6744327545166, -26.12239646911621, -23.570358276367188, -21.018321990966797, -18.466285705566406, -15.914247512817383, -13.362211227416992, -10.810173988342285, -8.258136749267578, -5.7061004638671875, -3.1540632247924805, -0.6020259857177734, 1.9500102996826172, 4.502048492431641, 7.054084777832031, 9.606122016906738, 12.158159255981445, 14.710195541381836, 17.26223373413086, 19.81427001953125, 22.36630630493164, 24.918344497680664, 27.470380783081055, 30.022417068481445, 32.57445526123047, 35.12649154663086, 37.67852783203125, 40.230567932128906, 42.78260040283203, 45.33464050292969, 47.88667678833008, 50.43871307373047, 52.99074935913086, 55.54278564453125, 58.094825744628906, 60.6468620300293, 63.19889831542969, 65.75093841552734, 68.30297088623047, 70.85501098632812, 73.40704345703125, 75.9590835571289, 78.51111602783203, 81.06315612792969, 83.61518859863281, 86.16722869873047, 88.71926879882812]}, "gradients/decoder.transformer.h.18.ln_1.bias": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 1.0, 0.0, 2.0, 1.0, 2.0, 4.0, 5.0, 5.0, 6.0, 4.0, 7.0, 9.0, 10.0, 8.0, 15.0, 14.0, 13.0, 19.0, 22.0, 25.0, 23.0, 34.0, 36.0, 38.0, 33.0, 40.0, 39.0, 33.0, 34.0, 41.0, 50.0, 44.0, 36.0, 36.0, 38.0, 35.0, 27.0, 43.0, 27.0, 29.0, 17.0, 15.0, 18.0, 15.0, 5.0, 14.0, 8.0, 4.0, 4.0, 5.0, 7.0, 7.0, 4.0, 1.0, 2.0, 2.0, 1.0, 2.0, 0.0, 0.0, 2.0], "bins": [-75.21470642089844, -72.8707275390625, -70.52674865722656, -68.1827621459961, -65.83878326416016, -63.49480438232422, -61.15082550048828, -58.806846618652344, -56.46286392211914, -54.1188850402832, -51.77490234375, -49.43092346191406, -47.086944580078125, -44.74296188354492, -42.398983001708984, -40.05500030517578, -37.711021423339844, -35.367042541503906, -33.0230598449707, -30.679080963134766, -28.335100173950195, -25.991119384765625, -23.647140502929688, -21.303159713745117, -18.959178924560547, -16.615198135375977, -14.271218299865723, -11.927238464355469, -9.583257675170898, -7.239276885986328, -4.895297050476074, -2.5513172149658203, -0.20733642578125, 2.136643886566162, 4.480624198913574, 6.824604511260986, 9.168584823608398, 11.512565612792969, 13.856545448303223, 16.200525283813477, 18.544506072998047, 20.888486862182617, 23.232467651367188, 25.576446533203125, 27.920427322387695, 30.264408111572266, 32.6083869934082, 34.952369689941406, 37.296348571777344, 39.64032745361328, 41.984310150146484, 44.32828903198242, 46.672271728515625, 49.01625061035156, 51.3602294921875, 53.70420837402344, 56.04819107055664, 58.39216995239258, 60.73615264892578, 63.08013153076172, 65.42411041259766, 67.76809692382812, 70.11207580566406, 72.4560546875, 74.80003356933594]}, "gradients/decoder.transformer.h.17.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 2.0, 3.0, 6.0, 6.0, 6.0, 3.0, 6.0, 12.0, 13.0, 11.0, 22.0, 18.0, 21.0, 26.0, 27.0, 34.0, 42.0, 38.0, 47.0, 38.0, 44.0, 41.0, 43.0, 42.0, 40.0, 45.0, 37.0, 58.0, 36.0, 42.0, 21.0, 21.0, 31.0, 22.0, 25.0, 23.0, 19.0, 8.0, 10.0, 8.0, 5.0, 4.0, 2.0, 2.0, 2.0, 2.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 1.0], "bins": [-8.5859375, -8.33270263671875, -8.0794677734375, -7.82623291015625, -7.572998046875, -7.31976318359375, -7.0665283203125, -6.81329345703125, -6.56005859375, -6.30682373046875, -6.0535888671875, -5.80035400390625, -5.547119140625, -5.29388427734375, -5.0406494140625, -4.78741455078125, -4.5341796875, -4.28094482421875, -4.0277099609375, -3.77447509765625, -3.521240234375, -3.26800537109375, -3.0147705078125, -2.76153564453125, -2.50830078125, -2.25506591796875, -2.0018310546875, -1.74859619140625, -1.495361328125, -1.24212646484375, -0.9888916015625, -0.73565673828125, -0.482421875, -0.22918701171875, 0.0240478515625, 0.27728271484375, 0.530517578125, 0.78375244140625, 1.0369873046875, 1.29022216796875, 1.54345703125, 1.79669189453125, 2.0499267578125, 2.30316162109375, 2.556396484375, 2.80963134765625, 3.0628662109375, 3.31610107421875, 3.5693359375, 3.82257080078125, 4.0758056640625, 4.32904052734375, 4.582275390625, 4.83551025390625, 5.0887451171875, 5.34197998046875, 5.59521484375, 5.84844970703125, 6.1016845703125, 6.35491943359375, 6.608154296875, 6.86138916015625, 7.1146240234375, 7.36785888671875, 7.62109375]}, "gradients/decoder.transformer.h.17.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 2.0, 0.0, 0.0, 6.0, 4.0, 3.0, 9.0, 13.0, 33.0, 34.0, 33.0, 60.0, 91.0, 103.0, 179.0, 290.0, 472.0, 768.0, 1304.0, 2362.0, 4516.0, 9140.0, 19413.0, 42492.0, 106678.0, 321233.0, 969719.0, 1489848.0, 805481.0, 259347.0, 89560.0, 36932.0, 16807.0, 7969.0, 4059.0, 2161.0, 1192.0, 679.0, 446.0, 278.0, 168.0, 123.0, 75.0, 67.0, 43.0, 30.0, 20.0, 15.0, 16.0, 6.0, 6.0, 7.0, 1.0, 1.0, 1.0, 2.0, 2.0, 0.0, 1.0, 1.0], "bins": [-9.1015625, -8.811767578125, -8.52197265625, -8.232177734375, -7.9423828125, -7.652587890625, -7.36279296875, -7.072998046875, -6.783203125, -6.493408203125, -6.20361328125, -5.913818359375, -5.6240234375, -5.334228515625, -5.04443359375, -4.754638671875, -4.46484375, -4.175048828125, -3.88525390625, -3.595458984375, -3.3056640625, -3.015869140625, -2.72607421875, -2.436279296875, -2.146484375, -1.856689453125, -1.56689453125, -1.277099609375, -0.9873046875, -0.697509765625, -0.40771484375, -0.117919921875, 0.171875, 0.461669921875, 0.75146484375, 1.041259765625, 1.3310546875, 1.620849609375, 1.91064453125, 2.200439453125, 2.490234375, 2.780029296875, 3.06982421875, 3.359619140625, 3.6494140625, 3.939208984375, 4.22900390625, 4.518798828125, 4.80859375, 5.098388671875, 5.38818359375, 5.677978515625, 5.9677734375, 6.257568359375, 6.54736328125, 6.837158203125, 7.126953125, 7.416748046875, 7.70654296875, 7.996337890625, 8.2861328125, 8.575927734375, 8.86572265625, 9.155517578125, 9.4453125]}, "gradients/decoder.transformer.h.17.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 2.0, 5.0, 3.0, 3.0, 4.0, 11.0, 15.0, 16.0, 23.0, 14.0, 45.0, 51.0, 82.0, 98.0, 148.0, 187.0, 276.0, 340.0, 416.0, 464.0, 459.0, 414.0, 297.0, 206.0, 138.0, 108.0, 73.0, 54.0, 35.0, 30.0, 29.0, 11.0, 7.0, 9.0, 9.0, 5.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-26.03125, -25.35693359375, -24.6826171875, -24.00830078125, -23.333984375, -22.65966796875, -21.9853515625, -21.31103515625, -20.63671875, -19.96240234375, -19.2880859375, -18.61376953125, -17.939453125, -17.26513671875, -16.5908203125, -15.91650390625, -15.2421875, -14.56787109375, -13.8935546875, -13.21923828125, -12.544921875, -11.87060546875, -11.1962890625, -10.52197265625, -9.84765625, -9.17333984375, -8.4990234375, -7.82470703125, -7.150390625, -6.47607421875, -5.8017578125, -5.12744140625, -4.453125, -3.77880859375, -3.1044921875, -2.43017578125, -1.755859375, -1.08154296875, -0.4072265625, 0.26708984375, 0.94140625, 1.61572265625, 2.2900390625, 2.96435546875, 3.638671875, 4.31298828125, 4.9873046875, 5.66162109375, 6.3359375, 7.01025390625, 7.6845703125, 8.35888671875, 9.033203125, 9.70751953125, 10.3818359375, 11.05615234375, 11.73046875, 12.40478515625, 13.0791015625, 13.75341796875, 14.427734375, 15.10205078125, 15.7763671875, 16.45068359375, 17.125]}, "gradients/decoder.transformer.h.17.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 3.0, 0.0, 0.0, 2.0, 1.0, 5.0, 6.0, 8.0, 4.0, 15.0, 11.0, 19.0, 27.0, 38.0, 59.0, 68.0, 109.0, 160.0, 297.0, 582.0, 1896.0, 15669.0, 467614.0, 3579306.0, 119661.0, 6431.0, 1153.0, 440.0, 240.0, 134.0, 82.0, 61.0, 42.0, 41.0, 29.0, 21.0, 19.0, 12.0, 7.0, 11.0, 2.0, 3.0, 4.0, 2.0, 2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-49.78125, -47.89013671875, -45.9990234375, -44.10791015625, -42.216796875, -40.32568359375, -38.4345703125, -36.54345703125, -34.65234375, -32.76123046875, -30.8701171875, -28.97900390625, -27.087890625, -25.19677734375, -23.3056640625, -21.41455078125, -19.5234375, -17.63232421875, -15.7412109375, -13.85009765625, -11.958984375, -10.06787109375, -8.1767578125, -6.28564453125, -4.39453125, -2.50341796875, -0.6123046875, 1.27880859375, 3.169921875, 5.06103515625, 6.9521484375, 8.84326171875, 10.734375, 12.62548828125, 14.5166015625, 16.40771484375, 18.298828125, 20.18994140625, 22.0810546875, 23.97216796875, 25.86328125, 27.75439453125, 29.6455078125, 31.53662109375, 33.427734375, 35.31884765625, 37.2099609375, 39.10107421875, 40.9921875, 42.88330078125, 44.7744140625, 46.66552734375, 48.556640625, 50.44775390625, 52.3388671875, 54.22998046875, 56.12109375, 58.01220703125, 59.9033203125, 61.79443359375, 63.685546875, 65.57666015625, 67.4677734375, 69.35888671875, 71.25]}, "gradients/decoder.transformer.h.17.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 3.0, 1.0, 10.0, 33.0, 71.0, 127.0, 204.0, 226.0, 178.0, 87.0, 50.0, 19.0, 6.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-164.1261444091797, -158.10586547851562, -152.0855712890625, -146.06529235839844, -140.04501342773438, -134.02471923828125, -128.0044403076172, -121.9841537475586, -115.9638671875, -109.9435806274414, -103.92329406738281, -97.90301513671875, -91.88272857666016, -85.86244201660156, -79.8421630859375, -73.8218765258789, -67.80158996582031, -61.78130340576172, -55.76102066040039, -49.74073791503906, -43.72045135498047, -37.700164794921875, -31.679882049560547, -25.65959930419922, -19.639312744140625, -13.619028091430664, -7.598743438720703, -1.5784587860107422, 4.441825866699219, 10.46211051940918, 16.48239517211914, 22.50267791748047, 28.52294921875, 34.543235778808594, 40.56351852416992, 46.58380126953125, 52.604087829589844, 58.62437438964844, 64.6446533203125, 70.6649398803711, 76.68522644042969, 82.70551300048828, 88.72579956054688, 94.74607849121094, 100.76636505126953, 106.78665161132812, 112.80693054199219, 118.82721710205078, 124.84750366210938, 130.86778259277344, 136.88807678222656, 142.90835571289062, 148.92864990234375, 154.9489288330078, 160.96920776367188, 166.989501953125, 173.00978088378906, 179.03005981445312, 185.05035400390625, 191.0706329345703, 197.09091186523438, 203.1112060546875, 209.13148498535156, 215.15176391601562, 221.17205810546875]}, "gradients/decoder.transformer.h.17.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 4.0, 1.0, 1.0, 3.0, 4.0, 4.0, 6.0, 10.0, 7.0, 12.0, 12.0, 13.0, 25.0, 17.0, 28.0, 24.0, 31.0, 33.0, 42.0, 37.0, 43.0, 43.0, 54.0, 46.0, 39.0, 30.0, 50.0, 44.0, 42.0, 36.0, 27.0, 25.0, 40.0, 25.0, 31.0, 22.0, 18.0, 17.0, 15.0, 16.0, 12.0, 5.0, 6.0, 5.0, 3.0, 3.0, 3.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0], "bins": [-69.18965911865234, -66.94194030761719, -64.69422149658203, -62.44649887084961, -60.19877624511719, -57.95105743408203, -55.703338623046875, -53.45561981201172, -51.2078971862793, -48.96017837524414, -46.71245574951172, -44.46473693847656, -42.217018127441406, -39.969295501708984, -37.72157669067383, -35.473854064941406, -33.22613525390625, -30.97841453552246, -28.730693817138672, -26.482975006103516, -24.235254287719727, -21.987533569335938, -19.73981475830078, -17.492094039916992, -15.244373321533203, -12.996652603149414, -10.748932838439941, -8.501213073730469, -6.25349235534668, -4.005771636962891, -1.758051872253418, 0.4896678924560547, 2.7373809814453125, 4.985101222991943, 7.232821464538574, 9.480541229248047, 11.728261947631836, 13.975982666015625, 16.22370147705078, 18.47142219543457, 20.71914291381836, 22.96686363220215, 25.214584350585938, 27.462303161621094, 29.710023880004883, 31.957744598388672, 34.20546340942383, 36.45318603515625, 38.700904846191406, 40.94862365722656, 43.196346282958984, 45.44406509399414, 47.69178771972656, 49.93950653076172, 52.187225341796875, 54.43494415283203, 56.68266677856445, 58.93038558959961, 61.17810821533203, 63.42582702636719, 65.67354583740234, 67.9212646484375, 70.16899108886719, 72.41670989990234, 74.6644287109375]}, "gradients/decoder.transformer.h.17.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 3.0, 0.0, 1.0, 2.0, 6.0, 4.0, 4.0, 4.0, 10.0, 12.0, 16.0, 11.0, 19.0, 23.0, 26.0, 19.0, 32.0, 43.0, 33.0, 37.0, 49.0, 43.0, 53.0, 47.0, 38.0, 36.0, 51.0, 42.0, 52.0, 35.0, 43.0, 35.0, 31.0, 28.0, 26.0, 22.0, 18.0, 13.0, 13.0, 5.0, 12.0, 7.0, 5.0, 3.0, 2.0, 4.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-9.234375, -8.9580078125, -8.681640625, -8.4052734375, -8.12890625, -7.8525390625, -7.576171875, -7.2998046875, -7.0234375, -6.7470703125, -6.470703125, -6.1943359375, -5.91796875, -5.6416015625, -5.365234375, -5.0888671875, -4.8125, -4.5361328125, -4.259765625, -3.9833984375, -3.70703125, -3.4306640625, -3.154296875, -2.8779296875, -2.6015625, -2.3251953125, -2.048828125, -1.7724609375, -1.49609375, -1.2197265625, -0.943359375, -0.6669921875, -0.390625, -0.1142578125, 0.162109375, 0.4384765625, 0.71484375, 0.9912109375, 1.267578125, 1.5439453125, 1.8203125, 2.0966796875, 2.373046875, 2.6494140625, 2.92578125, 3.2021484375, 3.478515625, 3.7548828125, 4.03125, 4.3076171875, 4.583984375, 4.8603515625, 5.13671875, 5.4130859375, 5.689453125, 5.9658203125, 6.2421875, 6.5185546875, 6.794921875, 7.0712890625, 7.34765625, 7.6240234375, 7.900390625, 8.1767578125, 8.453125]}, "gradients/decoder.transformer.h.17.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 5.0, 9.0, 9.0, 14.0, 15.0, 24.0, 36.0, 57.0, 88.0, 141.0, 205.0, 307.0, 432.0, 621.0, 973.0, 1403.0, 2082.0, 3234.0, 5036.0, 7650.0, 11759.0, 18017.0, 28369.0, 45294.0, 72635.0, 115618.0, 170101.0, 186474.0, 137736.0, 88202.0, 55403.0, 34447.0, 21636.0, 13951.0, 9221.0, 5854.0, 3792.0, 2588.0, 1733.0, 1099.0, 738.0, 501.0, 331.0, 250.0, 148.0, 122.0, 74.0, 46.0, 35.0, 20.0, 8.0, 11.0, 8.0, 6.0, 4.0, 2.0, 1.0], "bins": [-0.81005859375, -0.786376953125, -0.7626953125, -0.739013671875, -0.71533203125, -0.691650390625, -0.66796875, -0.644287109375, -0.62060546875, -0.596923828125, -0.5732421875, -0.549560546875, -0.52587890625, -0.502197265625, -0.478515625, -0.454833984375, -0.43115234375, -0.407470703125, -0.3837890625, -0.360107421875, -0.33642578125, -0.312744140625, -0.2890625, -0.265380859375, -0.24169921875, -0.218017578125, -0.1943359375, -0.170654296875, -0.14697265625, -0.123291015625, -0.099609375, -0.075927734375, -0.05224609375, -0.028564453125, -0.0048828125, 0.018798828125, 0.04248046875, 0.066162109375, 0.08984375, 0.113525390625, 0.13720703125, 0.160888671875, 0.1845703125, 0.208251953125, 0.23193359375, 0.255615234375, 0.279296875, 0.302978515625, 0.32666015625, 0.350341796875, 0.3740234375, 0.397705078125, 0.42138671875, 0.445068359375, 0.46875, 0.492431640625, 0.51611328125, 0.539794921875, 0.5634765625, 0.587158203125, 0.61083984375, 0.634521484375, 0.658203125, 0.681884765625, 0.70556640625]}, "gradients/decoder.transformer.h.17.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 2.0, 3.0, 2.0, 2.0, 1.0, 0.0, 2.0, 7.0, 11.0, 12.0, 7.0, 6.0, 26.0, 24.0, 22.0, 26.0, 31.0, 27.0, 31.0, 36.0, 39.0, 37.0, 35.0, 46.0, 57.0, 1072.0, 38.0, 38.0, 35.0, 43.0, 33.0, 41.0, 37.0, 29.0, 27.0, 29.0, 18.0, 23.0, 14.0, 21.0, 16.0, 9.0, 6.0, 5.0, 3.0, 2.0, 2.0, 5.0, 3.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-4.859375, -4.69403076171875, -4.5286865234375, -4.36334228515625, -4.197998046875, -4.03265380859375, -3.8673095703125, -3.70196533203125, -3.53662109375, -3.37127685546875, -3.2059326171875, -3.04058837890625, -2.875244140625, -2.70989990234375, -2.5445556640625, -2.37921142578125, -2.2138671875, -2.04852294921875, -1.8831787109375, -1.71783447265625, -1.552490234375, -1.38714599609375, -1.2218017578125, -1.05645751953125, -0.89111328125, -0.72576904296875, -0.5604248046875, -0.39508056640625, -0.229736328125, -0.06439208984375, 0.1009521484375, 0.26629638671875, 0.431640625, 0.59698486328125, 0.7623291015625, 0.92767333984375, 1.093017578125, 1.25836181640625, 1.4237060546875, 1.58905029296875, 1.75439453125, 1.91973876953125, 2.0850830078125, 2.25042724609375, 2.415771484375, 2.58111572265625, 2.7464599609375, 2.91180419921875, 3.0771484375, 3.24249267578125, 3.4078369140625, 3.57318115234375, 3.738525390625, 3.90386962890625, 4.0692138671875, 4.23455810546875, 4.39990234375, 4.56524658203125, 4.7305908203125, 4.89593505859375, 5.061279296875, 5.22662353515625, 5.3919677734375, 5.55731201171875, 5.72265625]}, "gradients/decoder.transformer.h.17.crossattention.c_attn.weight": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 2.0, 2.0, 0.0, 3.0, 4.0, 5.0, 17.0, 15.0, 17.0, 30.0, 45.0, 68.0, 98.0, 128.0, 207.0, 331.0, 439.0, 701.0, 1047.0, 1662.0, 2572.0, 3944.0, 6391.0, 10044.0, 16119.0, 26182.0, 42490.0, 68222.0, 107141.0, 154744.0, 1225516.0, 151094.0, 103377.0, 65780.0, 40907.0, 25236.0, 15517.0, 9988.0, 6031.0, 3973.0, 2492.0, 1645.0, 986.0, 671.0, 458.0, 251.0, 177.0, 113.0, 97.0, 54.0, 33.0, 26.0, 13.0, 11.0, 10.0, 8.0, 4.0, 5.0, 1.0, 1.0, 3.0], "bins": [-0.609375, -0.5911636352539062, -0.5729522705078125, -0.5547409057617188, -0.536529541015625, -0.5183181762695312, -0.5001068115234375, -0.48189544677734375, -0.46368408203125, -0.44547271728515625, -0.4272613525390625, -0.40904998779296875, -0.390838623046875, -0.37262725830078125, -0.3544158935546875, -0.33620452880859375, -0.3179931640625, -0.29978179931640625, -0.2815704345703125, -0.26335906982421875, -0.245147705078125, -0.22693634033203125, -0.2087249755859375, -0.19051361083984375, -0.17230224609375, -0.15409088134765625, -0.1358795166015625, -0.11766815185546875, -0.099456787109375, -0.08124542236328125, -0.0630340576171875, -0.04482269287109375, -0.026611328125, -0.00839996337890625, 0.0098114013671875, 0.02802276611328125, 0.046234130859375, 0.06444549560546875, 0.0826568603515625, 0.10086822509765625, 0.11907958984375, 0.13729095458984375, 0.1555023193359375, 0.17371368408203125, 0.191925048828125, 0.21013641357421875, 0.2283477783203125, 0.24655914306640625, 0.2647705078125, 0.28298187255859375, 0.3011932373046875, 0.31940460205078125, 0.337615966796875, 0.35582733154296875, 0.3740386962890625, 0.39225006103515625, 0.41046142578125, 0.42867279052734375, 0.4468841552734375, 0.46509552001953125, 0.483306884765625, 0.5015182495117188, 0.5197296142578125, 0.5379409790039062, 0.55615234375]}, "gradients/decoder.transformer.h.17.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 2.0, 4.0, 2.0, 2.0, 3.0, 8.0, 14.0, 12.0, 15.0, 13.0, 13.0, 27.0, 29.0, 26.0, 31.0, 45.0, 42.0, 62.0, 79.0, 74.0, 85.0, 59.0, 75.0, 42.0, 52.0, 33.0, 24.0, 22.0, 29.0, 16.0, 15.0, 9.0, 10.0, 12.0, 7.0, 7.0, 1.0, 3.0, 0.0, 0.0, 1.0, 2.0, 1.0, 4.0, 1.0, 2.0, 0.0, 1.0], "bins": [-0.008758544921875, -0.008516490459442139, -0.008274435997009277, -0.008032381534576416, -0.007790327072143555, -0.007548272609710693, -0.007306218147277832, -0.007064163684844971, -0.006822109222412109, -0.006580054759979248, -0.006338000297546387, -0.006095945835113525, -0.005853891372680664, -0.005611836910247803, -0.005369782447814941, -0.00512772798538208, -0.004885673522949219, -0.004643619060516357, -0.004401564598083496, -0.004159510135650635, -0.0039174556732177734, -0.003675401210784912, -0.0034333467483520508, -0.0031912922859191895, -0.002949237823486328, -0.002707183361053467, -0.0024651288986206055, -0.002223074436187744, -0.001981019973754883, -0.0017389655113220215, -0.0014969110488891602, -0.0012548565864562988, -0.0010128021240234375, -0.0007707476615905762, -0.0005286931991577148, -0.0002866387367248535, -4.458427429199219e-05, 0.00019747018814086914, 0.00043952465057373047, 0.0006815791130065918, 0.0009236335754394531, 0.0011656880378723145, 0.0014077425003051758, 0.0016497969627380371, 0.0018918514251708984, 0.0021339058876037598, 0.002375960350036621, 0.0026180148124694824, 0.0028600692749023438, 0.003102123737335205, 0.0033441781997680664, 0.0035862326622009277, 0.003828287124633789, 0.00407034158706665, 0.004312396049499512, 0.004554450511932373, 0.004796504974365234, 0.005038559436798096, 0.005280613899230957, 0.005522668361663818, 0.00576472282409668, 0.006006777286529541, 0.006248831748962402, 0.006490886211395264, 0.006732940673828125]}, "gradients/decoder.transformer.h.17.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 3.0, 0.0, 2.0, 2.0, 2.0, 2.0, 1.0, 1.0, 3.0, 1.0, 9.0, 12.0, 9.0, 15.0, 15.0, 26.0, 27.0, 48.0, 65.0, 104.0, 170.0, 261.0, 456.0, 1501.0, 554642.0, 488418.0, 1533.0, 462.0, 269.0, 145.0, 101.0, 72.0, 49.0, 43.0, 16.0, 22.0, 9.0, 14.0, 12.0, 7.0, 9.0, 0.0, 4.0, 1.0, 2.0, 2.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.132568359375, -0.1276569366455078, -0.12274551391601562, -0.11783409118652344, -0.11292266845703125, -0.10801124572753906, -0.10309982299804688, -0.09818840026855469, -0.0932769775390625, -0.08836555480957031, -0.08345413208007812, -0.07854270935058594, -0.07363128662109375, -0.06871986389160156, -0.06380844116210938, -0.05889701843261719, -0.053985595703125, -0.04907417297363281, -0.044162750244140625, -0.03925132751464844, -0.03433990478515625, -0.029428482055664062, -0.024517059326171875, -0.019605636596679688, -0.0146942138671875, -0.009782791137695312, -0.004871368408203125, 4.00543212890625e-05, 0.00495147705078125, 0.009862899780273438, 0.014774322509765625, 0.019685745239257812, 0.02459716796875, 0.029508590698242188, 0.034420013427734375, 0.03933143615722656, 0.04424285888671875, 0.04915428161621094, 0.054065704345703125, 0.05897712707519531, 0.0638885498046875, 0.06879997253417969, 0.07371139526367188, 0.07862281799316406, 0.08353424072265625, 0.08844566345214844, 0.09335708618164062, 0.09826850891113281, 0.103179931640625, 0.10809135437011719, 0.11300277709960938, 0.11791419982910156, 0.12282562255859375, 0.12773704528808594, 0.13264846801757812, 0.1375598907470703, 0.1424713134765625, 0.1473827362060547, 0.15229415893554688, 0.15720558166503906, 0.16211700439453125, 0.16702842712402344, 0.17193984985351562, 0.1768512725830078, 0.1817626953125]}, "gradients/decoder.transformer.h.17.ln_cross_attn.weight": {"_type": "histogram", "values": [3.0, 16.0, 144.0, 530.0, 273.0, 40.0, 9.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0026666298508644104, -0.00194618571549654, -0.0012257416965439916, -0.0005052976775914431, 0.00021514645777642727, 0.0009355905931442976, 0.0016560344956815243, 0.0023764786310493946, 0.003096922766417265, 0.0038173669017851353, 0.004537811037153006, 0.005258254706859589, 0.005978698842227459, 0.006699142977595329, 0.0074195871129632, 0.00814003124833107, 0.00886047538369894, 0.00958091951906681, 0.010301363654434681, 0.011021807789802551, 0.011742251925170422, 0.012462696060538292, 0.013183139264583588, 0.013903584331274033, 0.014624027535319328, 0.015344471670687199, 0.01606491580605507, 0.016785359010100365, 0.01750580407679081, 0.018226247280836105, 0.01894669234752655, 0.019667135551571846, 0.02038758061826229, 0.021108023822307587, 0.02182846888899803, 0.022548912093043327, 0.023269357159733772, 0.023989800363779068, 0.024710245430469513, 0.02543068863451481, 0.026151133701205254, 0.02687157690525055, 0.027592021971940994, 0.02831246517598629, 0.029032910242676735, 0.02975335344672203, 0.030473798513412476, 0.03119424171745777, 0.03191468492150307, 0.03263512998819351, 0.03335557132959366, 0.0340760163962841, 0.03479646146297455, 0.03551690652966499, 0.03623734787106514, 0.036957792937755585, 0.03767823800444603, 0.038398683071136475, 0.03911912441253662, 0.039839569479227066, 0.04056001454591751, 0.041280459612607956, 0.0420009009540081, 0.04272134602069855, 0.04344179108738899]}, "gradients/decoder.transformer.h.17.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 2.0, 3.0, 0.0, 8.0, 7.0, 10.0, 10.0, 8.0, 25.0, 12.0, 23.0, 19.0, 18.0, 37.0, 34.0, 34.0, 43.0, 48.0, 36.0, 49.0, 48.0, 54.0, 51.0, 46.0, 31.0, 43.0, 32.0, 38.0, 27.0, 35.0, 26.0, 25.0, 24.0, 19.0, 16.0, 18.0, 13.0, 10.0, 8.0, 7.0, 6.0, 6.0, 6.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.004884064197540283, -0.004735061898827553, -0.004586059600114822, -0.004437057301402092, -0.004288055002689362, -0.004139052703976631, -0.003990050405263901, -0.0038410481065511703, -0.00369204580783844, -0.0035430435091257095, -0.003394041210412979, -0.0032450389117002487, -0.0030960366129875183, -0.002947034314274788, -0.0027980320155620575, -0.002649029716849327, -0.0025000274181365967, -0.0023510251194238663, -0.002202022820711136, -0.0020530205219984055, -0.001904018223285675, -0.0017550159245729446, -0.0016060136258602142, -0.0014570113271474838, -0.0013080090284347534, -0.001159006729722023, -0.0010100044310092926, -0.0008610021322965622, -0.0007119998335838318, -0.0005629975348711014, -0.00041399523615837097, -0.00026499293744564056, -0.00011599063873291016, 3.301165997982025e-05, 0.00018201395869255066, 0.00033101625740528107, 0.0004800185561180115, 0.0006290208548307419, 0.0007780231535434723, 0.0009270254522562027, 0.001076027750968933, 0.0012250300496816635, 0.001374032348394394, 0.0015230346471071243, 0.0016720369458198547, 0.0018210392445325851, 0.0019700415432453156, 0.002119043841958046, 0.0022680461406707764, 0.0024170484393835068, 0.002566050738096237, 0.0027150530368089676, 0.002864055335521698, 0.0030130576342344284, 0.003162059932947159, 0.0033110622316598892, 0.0034600645303726196, 0.00360906682908535, 0.0037580691277980804, 0.003907071426510811, 0.004056073725223541, 0.004205076023936272, 0.004354078322649002, 0.0045030806213617325, 0.004652082920074463]}, "gradients/decoder.transformer.h.17.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 3.0, 0.0, 1.0, 2.0, 6.0, 4.0, 4.0, 4.0, 10.0, 12.0, 16.0, 11.0, 19.0, 23.0, 26.0, 19.0, 32.0, 43.0, 33.0, 37.0, 49.0, 43.0, 53.0, 47.0, 38.0, 36.0, 51.0, 42.0, 52.0, 35.0, 43.0, 35.0, 31.0, 28.0, 26.0, 21.0, 19.0, 13.0, 13.0, 5.0, 12.0, 7.0, 5.0, 3.0, 2.0, 4.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-9.234375, -8.9580078125, -8.681640625, -8.4052734375, -8.12890625, -7.8525390625, -7.576171875, -7.2998046875, -7.0234375, -6.7470703125, -6.470703125, -6.1943359375, -5.91796875, -5.6416015625, -5.365234375, -5.0888671875, -4.8125, -4.5361328125, -4.259765625, -3.9833984375, -3.70703125, -3.4306640625, -3.154296875, -2.8779296875, -2.6015625, -2.3251953125, -2.048828125, -1.7724609375, -1.49609375, -1.2197265625, -0.943359375, -0.6669921875, -0.390625, -0.1142578125, 0.162109375, 0.4384765625, 0.71484375, 0.9912109375, 1.267578125, 1.5439453125, 1.8203125, 2.0966796875, 2.373046875, 2.6494140625, 2.92578125, 3.2021484375, 3.478515625, 3.7548828125, 4.03125, 4.3076171875, 4.583984375, 4.8603515625, 5.13671875, 5.4130859375, 5.689453125, 5.9658203125, 6.2421875, 6.5185546875, 6.794921875, 7.0712890625, 7.34765625, 7.6240234375, 7.900390625, 8.1767578125, 8.453125]}, "gradients/decoder.transformer.h.17.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 3.0, 3.0, 1.0, 5.0, 5.0, 7.0, 11.0, 19.0, 31.0, 24.0, 65.0, 93.0, 150.0, 237.0, 404.0, 615.0, 1076.0, 1864.0, 3283.0, 5965.0, 11154.0, 24065.0, 66724.0, 259832.0, 471205.0, 127770.0, 38976.0, 16466.0, 8180.0, 4449.0, 2410.0, 1384.0, 787.0, 496.0, 299.0, 169.0, 115.0, 60.0, 47.0, 43.0, 31.0, 15.0, 11.0, 5.0, 6.0, 4.0, 2.0, 2.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-7.96875, -7.7291259765625, -7.489501953125, -7.2498779296875, -7.01025390625, -6.7706298828125, -6.531005859375, -6.2913818359375, -6.0517578125, -5.8121337890625, -5.572509765625, -5.3328857421875, -5.09326171875, -4.8536376953125, -4.614013671875, -4.3743896484375, -4.134765625, -3.8951416015625, -3.655517578125, -3.4158935546875, -3.17626953125, -2.9366455078125, -2.697021484375, -2.4573974609375, -2.2177734375, -1.9781494140625, -1.738525390625, -1.4989013671875, -1.25927734375, -1.0196533203125, -0.780029296875, -0.5404052734375, -0.30078125, -0.0611572265625, 0.178466796875, 0.4180908203125, 0.65771484375, 0.8973388671875, 1.136962890625, 1.3765869140625, 1.6162109375, 1.8558349609375, 2.095458984375, 2.3350830078125, 2.57470703125, 2.8143310546875, 3.053955078125, 3.2935791015625, 3.533203125, 3.7728271484375, 4.012451171875, 4.2520751953125, 4.49169921875, 4.7313232421875, 4.970947265625, 5.2105712890625, 5.4501953125, 5.6898193359375, 5.929443359375, 6.1690673828125, 6.40869140625, 6.6483154296875, 6.887939453125, 7.1275634765625, 7.3671875]}, "gradients/decoder.transformer.h.17.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 2.0, 4.0, 1.0, 3.0, 5.0, 6.0, 9.0, 4.0, 9.0, 20.0, 16.0, 15.0, 25.0, 30.0, 42.0, 36.0, 34.0, 52.0, 55.0, 68.0, 76.0, 189.0, 1582.0, 222.0, 94.0, 76.0, 53.0, 56.0, 44.0, 38.0, 29.0, 34.0, 24.0, 26.0, 10.0, 15.0, 16.0, 13.0, 10.0, 8.0, 5.0, 2.0, 2.0, 0.0, 3.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-35.96875, -34.958740234375, -33.94873046875, -32.938720703125, -31.9287109375, -30.918701171875, -29.90869140625, -28.898681640625, -27.888671875, -26.878662109375, -25.86865234375, -24.858642578125, -23.8486328125, -22.838623046875, -21.82861328125, -20.818603515625, -19.80859375, -18.798583984375, -17.78857421875, -16.778564453125, -15.7685546875, -14.758544921875, -13.74853515625, -12.738525390625, -11.728515625, -10.718505859375, -9.70849609375, -8.698486328125, -7.6884765625, -6.678466796875, -5.66845703125, -4.658447265625, -3.6484375, -2.638427734375, -1.62841796875, -0.618408203125, 0.3916015625, 1.401611328125, 2.41162109375, 3.421630859375, 4.431640625, 5.441650390625, 6.45166015625, 7.461669921875, 8.4716796875, 9.481689453125, 10.49169921875, 11.501708984375, 12.51171875, 13.521728515625, 14.53173828125, 15.541748046875, 16.5517578125, 17.561767578125, 18.57177734375, 19.581787109375, 20.591796875, 21.601806640625, 22.61181640625, 23.621826171875, 24.6318359375, 25.641845703125, 26.65185546875, 27.661865234375, 28.671875]}, "gradients/decoder.transformer.h.17.attn.c_attn.weight": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 2.0, 0.0, 1.0, 0.0, 5.0, 0.0, 2.0, 2.0, 14.0, 15.0, 12.0, 30.0, 18.0, 44.0, 49.0, 56.0, 104.0, 137.0, 222.0, 325.0, 884.0, 5865.0, 2809686.0, 323370.0, 3280.0, 682.0, 297.0, 176.0, 113.0, 94.0, 61.0, 45.0, 29.0, 26.0, 21.0, 14.0, 11.0, 4.0, 8.0, 6.0, 3.0, 3.0, 0.0, 0.0, 2.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-73.6875, -71.2216796875, -68.755859375, -66.2900390625, -63.82421875, -61.3583984375, -58.892578125, -56.4267578125, -53.9609375, -51.4951171875, -49.029296875, -46.5634765625, -44.09765625, -41.6318359375, -39.166015625, -36.7001953125, -34.234375, -31.7685546875, -29.302734375, -26.8369140625, -24.37109375, -21.9052734375, -19.439453125, -16.9736328125, -14.5078125, -12.0419921875, -9.576171875, -7.1103515625, -4.64453125, -2.1787109375, 0.287109375, 2.7529296875, 5.21875, 7.6845703125, 10.150390625, 12.6162109375, 15.08203125, 17.5478515625, 20.013671875, 22.4794921875, 24.9453125, 27.4111328125, 29.876953125, 32.3427734375, 34.80859375, 37.2744140625, 39.740234375, 42.2060546875, 44.671875, 47.1376953125, 49.603515625, 52.0693359375, 54.53515625, 57.0009765625, 59.466796875, 61.9326171875, 64.3984375, 66.8642578125, 69.330078125, 71.7958984375, 74.26171875, 76.7275390625, 79.193359375, 81.6591796875, 84.125]}, "gradients/decoder.transformer.h.17.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 3.0, 9.0, 87.0, 459.0, 376.0, 79.0, 4.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-220.84524536132812, -215.80874633789062, -210.77224731445312, -205.73574829101562, -200.69923400878906, -195.66273498535156, -190.62623596191406, -185.58973693847656, -180.55323791503906, -175.51673889160156, -170.48023986816406, -165.4437255859375, -160.4072265625, -155.3707275390625, -150.334228515625, -145.2977294921875, -140.26123046875, -135.2247314453125, -130.188232421875, -125.15172576904297, -120.11522674560547, -115.07872009277344, -110.04222106933594, -105.00572204589844, -99.96920776367188, -94.93270874023438, -89.89620208740234, -84.85970306396484, -79.82320404052734, -74.78669738769531, -69.75019836425781, -64.71369934082031, -59.67720031738281, -54.64069747924805, -49.60419845581055, -44.56769561767578, -39.53119659423828, -34.494693756103516, -29.45819091796875, -24.42169189453125, -19.385189056396484, -14.348688125610352, -9.312186241149902, -4.275684356689453, 0.7608165740966797, 5.7973175048828125, 10.833820343017578, 15.870319366455078, 20.906822204589844, 25.943323135375977, 30.97982406616211, 36.016326904296875, 41.052825927734375, 46.08932876586914, 51.125831604003906, 56.162330627441406, 61.19883346557617, 66.23533630371094, 71.27183532714844, 76.30833435058594, 81.34484100341797, 86.38134002685547, 91.4178466796875, 96.454345703125, 101.4908447265625]}, "gradients/decoder.transformer.h.17.ln_1.bias": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 1.0, 0.0, 2.0, 3.0, 4.0, 2.0, 5.0, 5.0, 6.0, 8.0, 11.0, 11.0, 11.0, 16.0, 17.0, 24.0, 23.0, 28.0, 33.0, 31.0, 29.0, 30.0, 41.0, 43.0, 39.0, 39.0, 37.0, 45.0, 28.0, 44.0, 51.0, 22.0, 30.0, 37.0, 34.0, 29.0, 29.0, 24.0, 24.0, 19.0, 10.0, 12.0, 18.0, 12.0, 11.0, 9.0, 10.0, 8.0, 3.0, 1.0, 2.0, 2.0, 1.0, 3.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0], "bins": [-66.06924438476562, -63.86018753051758, -61.6511344909668, -59.44207763671875, -57.23302459716797, -55.02396774291992, -52.814910888671875, -50.605857849121094, -48.39680099487305, -46.187744140625, -43.97869110107422, -41.76963424682617, -39.560577392578125, -37.351524353027344, -35.1424674987793, -32.93341064453125, -30.72435760498047, -28.515302658081055, -26.30624771118164, -24.097190856933594, -21.88813591003418, -19.679080963134766, -17.47002410888672, -15.260969161987305, -13.05191421508789, -10.842859268188477, -8.633803367614746, -6.424747943878174, -4.215692520141602, -2.0066375732421875, 0.20241832733154297, 2.4114742279052734, 4.6205291748046875, 6.82958459854126, 9.038640022277832, 11.247695922851562, 13.456750869750977, 15.66580581665039, 17.874862670898438, 20.08391761779785, 22.292972564697266, 24.50202751159668, 26.711082458496094, 28.92013931274414, 31.129194259643555, 33.33824920654297, 35.547306060791016, 37.75636291503906, 39.965415954589844, 42.17447280883789, 44.38352584838867, 46.59258270263672, 48.8016357421875, 51.01069259643555, 53.219749450683594, 55.428802490234375, 57.63785934448242, 59.84691619873047, 62.05596923828125, 64.26502227783203, 66.47408294677734, 68.68313598632812, 70.8921890258789, 73.10124969482422, 75.310302734375]}, "gradients/decoder.transformer.h.16.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 4.0, 4.0, 4.0, 8.0, 5.0, 11.0, 12.0, 20.0, 17.0, 27.0, 17.0, 27.0, 33.0, 41.0, 39.0, 35.0, 46.0, 38.0, 51.0, 44.0, 52.0, 41.0, 36.0, 50.0, 39.0, 39.0, 45.0, 41.0, 33.0, 24.0, 25.0, 17.0, 23.0, 18.0, 10.0, 8.0, 9.0, 5.0, 4.0, 4.0, 4.0, 2.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-9.5546875, -9.26904296875, -8.9833984375, -8.69775390625, -8.412109375, -8.12646484375, -7.8408203125, -7.55517578125, -7.26953125, -6.98388671875, -6.6982421875, -6.41259765625, -6.126953125, -5.84130859375, -5.5556640625, -5.27001953125, -4.984375, -4.69873046875, -4.4130859375, -4.12744140625, -3.841796875, -3.55615234375, -3.2705078125, -2.98486328125, -2.69921875, -2.41357421875, -2.1279296875, -1.84228515625, -1.556640625, -1.27099609375, -0.9853515625, -0.69970703125, -0.4140625, -0.12841796875, 0.1572265625, 0.44287109375, 0.728515625, 1.01416015625, 1.2998046875, 1.58544921875, 1.87109375, 2.15673828125, 2.4423828125, 2.72802734375, 3.013671875, 3.29931640625, 3.5849609375, 3.87060546875, 4.15625, 4.44189453125, 4.7275390625, 5.01318359375, 5.298828125, 5.58447265625, 5.8701171875, 6.15576171875, 6.44140625, 6.72705078125, 7.0126953125, 7.29833984375, 7.583984375, 7.86962890625, 8.1552734375, 8.44091796875, 8.7265625]}, "gradients/decoder.transformer.h.16.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 2.0, 1.0, 3.0, 3.0, 1.0, 7.0, 9.0, 8.0, 9.0, 6.0, 16.0, 20.0, 19.0, 31.0, 32.0, 46.0, 46.0, 61.0, 68.0, 87.0, 173.0, 452.0, 2323.0, 23269.0, 772379.0, 3275575.0, 110864.0, 6989.0, 993.0, 256.0, 121.0, 77.0, 59.0, 46.0, 51.0, 48.0, 32.0, 22.0, 19.0, 14.0, 13.0, 11.0, 7.0, 7.0, 7.0, 8.0, 0.0, 1.0, 2.0, 1.0, 3.0, 0.0, 0.0, 2.0], "bins": [-42.625, -41.416015625, -40.20703125, -38.998046875, -37.7890625, -36.580078125, -35.37109375, -34.162109375, -32.953125, -31.744140625, -30.53515625, -29.326171875, -28.1171875, -26.908203125, -25.69921875, -24.490234375, -23.28125, -22.072265625, -20.86328125, -19.654296875, -18.4453125, -17.236328125, -16.02734375, -14.818359375, -13.609375, -12.400390625, -11.19140625, -9.982421875, -8.7734375, -7.564453125, -6.35546875, -5.146484375, -3.9375, -2.728515625, -1.51953125, -0.310546875, 0.8984375, 2.107421875, 3.31640625, 4.525390625, 5.734375, 6.943359375, 8.15234375, 9.361328125, 10.5703125, 11.779296875, 12.98828125, 14.197265625, 15.40625, 16.615234375, 17.82421875, 19.033203125, 20.2421875, 21.451171875, 22.66015625, 23.869140625, 25.078125, 26.287109375, 27.49609375, 28.705078125, 29.9140625, 31.123046875, 32.33203125, 33.541015625, 34.75]}, "gradients/decoder.transformer.h.16.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 3.0, 0.0, 1.0, 0.0, 0.0, 1.0, 3.0, 4.0, 2.0, 4.0, 9.0, 8.0, 13.0, 25.0, 33.0, 43.0, 69.0, 88.0, 121.0, 185.0, 280.0, 321.0, 500.0, 558.0, 512.0, 379.0, 253.0, 207.0, 128.0, 115.0, 58.0, 56.0, 39.0, 20.0, 19.0, 9.0, 9.0, 5.0, 3.0, 2.0, 3.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-26.3125, -25.552734375, -24.79296875, -24.033203125, -23.2734375, -22.513671875, -21.75390625, -20.994140625, -20.234375, -19.474609375, -18.71484375, -17.955078125, -17.1953125, -16.435546875, -15.67578125, -14.916015625, -14.15625, -13.396484375, -12.63671875, -11.876953125, -11.1171875, -10.357421875, -9.59765625, -8.837890625, -8.078125, -7.318359375, -6.55859375, -5.798828125, -5.0390625, -4.279296875, -3.51953125, -2.759765625, -2.0, -1.240234375, -0.48046875, 0.279296875, 1.0390625, 1.798828125, 2.55859375, 3.318359375, 4.078125, 4.837890625, 5.59765625, 6.357421875, 7.1171875, 7.876953125, 8.63671875, 9.396484375, 10.15625, 10.916015625, 11.67578125, 12.435546875, 13.1953125, 13.955078125, 14.71484375, 15.474609375, 16.234375, 16.994140625, 17.75390625, 18.513671875, 19.2734375, 20.033203125, 20.79296875, 21.552734375, 22.3125]}, "gradients/decoder.transformer.h.16.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 4.0, 2.0, 3.0, 8.0, 5.0, 10.0, 20.0, 22.0, 29.0, 34.0, 39.0, 74.0, 90.0, 104.0, 150.0, 259.0, 457.0, 1075.0, 4572.0, 96603.0, 3668524.0, 408865.0, 10260.0, 1531.0, 535.0, 294.0, 197.0, 139.0, 93.0, 82.0, 72.0, 42.0, 20.0, 20.0, 11.0, 16.0, 7.0, 7.0, 4.0, 5.0, 3.0, 1.0, 3.0, 2.0, 1.0, 1.0, 0.0, 0.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-62.65625, -60.54443359375, -58.4326171875, -56.32080078125, -54.208984375, -52.09716796875, -49.9853515625, -47.87353515625, -45.76171875, -43.64990234375, -41.5380859375, -39.42626953125, -37.314453125, -35.20263671875, -33.0908203125, -30.97900390625, -28.8671875, -26.75537109375, -24.6435546875, -22.53173828125, -20.419921875, -18.30810546875, -16.1962890625, -14.08447265625, -11.97265625, -9.86083984375, -7.7490234375, -5.63720703125, -3.525390625, -1.41357421875, 0.6982421875, 2.81005859375, 4.921875, 7.03369140625, 9.1455078125, 11.25732421875, 13.369140625, 15.48095703125, 17.5927734375, 19.70458984375, 21.81640625, 23.92822265625, 26.0400390625, 28.15185546875, 30.263671875, 32.37548828125, 34.4873046875, 36.59912109375, 38.7109375, 40.82275390625, 42.9345703125, 45.04638671875, 47.158203125, 49.27001953125, 51.3818359375, 53.49365234375, 55.60546875, 57.71728515625, 59.8291015625, 61.94091796875, 64.052734375, 66.16455078125, 68.2763671875, 70.38818359375, 72.5]}, "gradients/decoder.transformer.h.16.ln_2.weight": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 4.0, 8.0, 25.0, 70.0, 183.0, 291.0, 232.0, 126.0, 55.0, 16.0, 3.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-66.56511688232422, -59.006038665771484, -51.44696044921875, -43.88787841796875, -36.32880401611328, -28.76972198486328, -21.210643768310547, -13.651565551757812, -6.092487335205078, 1.4665913581848145, 9.025670051574707, 16.584749221801758, 24.143827438354492, 31.70290756225586, 39.261985778808594, 46.82106399536133, 54.38014221191406, 61.9392204284668, 69.49829864501953, 77.05738067626953, 84.616455078125, 92.175537109375, 99.734619140625, 107.29369354248047, 114.85276794433594, 122.41184997558594, 129.97093200683594, 137.52999877929688, 145.08908081054688, 152.64816284179688, 160.20724487304688, 167.76632690429688, 175.3253936767578, 182.8844757080078, 190.4435577392578, 198.00262451171875, 205.56170654296875, 213.12078857421875, 220.67987060546875, 228.23895263671875, 235.7980194091797, 243.3571014404297, 250.9161834716797, 258.4752502441406, 266.0343322753906, 273.5934143066406, 281.1524963378906, 288.7115783691406, 296.2706604003906, 303.8297424316406, 311.3888244628906, 318.9479064941406, 326.5069885253906, 334.0660400390625, 341.6251220703125, 349.1842041015625, 356.7432861328125, 364.3023681640625, 371.8614501953125, 379.4205322265625, 386.9796142578125, 394.5386962890625, 402.0977478027344, 409.6568298339844, 417.2159118652344]}, "gradients/decoder.transformer.h.16.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 2.0, 0.0, 1.0, 3.0, 2.0, 5.0, 3.0, 6.0, 2.0, 8.0, 7.0, 9.0, 12.0, 20.0, 16.0, 22.0, 20.0, 19.0, 26.0, 31.0, 37.0, 33.0, 32.0, 31.0, 31.0, 34.0, 37.0, 34.0, 29.0, 39.0, 32.0, 31.0, 35.0, 31.0, 29.0, 39.0, 44.0, 20.0, 31.0, 20.0, 24.0, 16.0, 18.0, 14.0, 14.0, 11.0, 14.0, 9.0, 9.0, 8.0, 6.0, 3.0, 1.0, 1.0, 0.0, 3.0, 1.0, 4.0, 2.0, 0.0, 1.0], "bins": [-63.25408935546875, -61.28107452392578, -59.30805587768555, -57.33504104614258, -55.36202621459961, -53.389007568359375, -51.415992736816406, -49.44297790527344, -47.4699592590332, -45.496944427490234, -43.52392578125, -41.55091094970703, -39.57789611816406, -37.60487747192383, -35.63186264038086, -33.658843994140625, -31.68583106994629, -29.712814331054688, -27.73979949951172, -25.766782760620117, -23.793766021728516, -21.820751190185547, -19.847734451293945, -17.874717712402344, -15.901701927185059, -13.928686141967773, -11.955669403076172, -9.982653617858887, -8.009637832641602, -6.03662109375, -4.063605308532715, -2.0905885696411133, -0.11757278442382812, 1.8554433584213257, 3.8284595012664795, 5.801475524902344, 7.774491786956787, 9.74750804901123, 11.720523834228516, 13.693540573120117, 15.666556358337402, 17.639572143554688, 19.61258888244629, 21.58560562133789, 23.55862045288086, 25.53163719177246, 27.504653930664062, 29.47766876220703, 31.450685501098633, 33.423702239990234, 35.3967170715332, 37.36973571777344, 39.342750549316406, 41.315765380859375, 43.288780212402344, 45.26179885864258, 47.23481369018555, 49.207828521728516, 51.18084716796875, 53.15386199951172, 55.12687683105469, 57.09989547729492, 59.07291030883789, 61.045928955078125, 63.018943786621094]}, "gradients/decoder.transformer.h.16.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 0.0, 1.0, 4.0, 2.0, 3.0, 6.0, 4.0, 8.0, 12.0, 11.0, 17.0, 13.0, 18.0, 20.0, 28.0, 33.0, 29.0, 37.0, 36.0, 39.0, 47.0, 38.0, 56.0, 44.0, 43.0, 44.0, 44.0, 41.0, 46.0, 38.0, 34.0, 29.0, 33.0, 38.0, 16.0, 26.0, 13.0, 13.0, 12.0, 15.0, 1.0, 12.0, 1.0, 4.0, 1.0, 2.0, 2.0, 0.0, 2.0, 1.0, 2.0], "bins": [-10.578125, -10.29266357421875, -10.0072021484375, -9.72174072265625, -9.436279296875, -9.15081787109375, -8.8653564453125, -8.57989501953125, -8.29443359375, -8.00897216796875, -7.7235107421875, -7.43804931640625, -7.152587890625, -6.86712646484375, -6.5816650390625, -6.29620361328125, -6.0107421875, -5.72528076171875, -5.4398193359375, -5.15435791015625, -4.868896484375, -4.58343505859375, -4.2979736328125, -4.01251220703125, -3.72705078125, -3.44158935546875, -3.1561279296875, -2.87066650390625, -2.585205078125, -2.29974365234375, -2.0142822265625, -1.72882080078125, -1.443359375, -1.15789794921875, -0.8724365234375, -0.58697509765625, -0.301513671875, -0.01605224609375, 0.2694091796875, 0.55487060546875, 0.84033203125, 1.12579345703125, 1.4112548828125, 1.69671630859375, 1.982177734375, 2.26763916015625, 2.5531005859375, 2.83856201171875, 3.1240234375, 3.40948486328125, 3.6949462890625, 3.98040771484375, 4.265869140625, 4.55133056640625, 4.8367919921875, 5.12225341796875, 5.40771484375, 5.69317626953125, 5.9786376953125, 6.26409912109375, 6.549560546875, 6.83502197265625, 7.1204833984375, 7.40594482421875, 7.69140625]}, "gradients/decoder.transformer.h.16.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 6.0, 4.0, 6.0, 19.0, 18.0, 29.0, 45.0, 68.0, 84.0, 119.0, 162.0, 249.0, 379.0, 541.0, 773.0, 1007.0, 1407.0, 2083.0, 2961.0, 4174.0, 5983.0, 8947.0, 12906.0, 19205.0, 28821.0, 43712.0, 67340.0, 100891.0, 142180.0, 165626.0, 141544.0, 99622.0, 65642.0, 43179.0, 28329.0, 18868.0, 12960.0, 8741.0, 5959.0, 4125.0, 2930.0, 2071.0, 1428.0, 988.0, 703.0, 523.0, 378.0, 272.0, 179.0, 127.0, 88.0, 48.0, 37.0, 36.0, 17.0, 23.0, 8.0, 1.0, 0.0, 1.0, 1.0], "bins": [-0.72314453125, -0.7008819580078125, -0.678619384765625, -0.6563568115234375, -0.63409423828125, -0.6118316650390625, -0.589569091796875, -0.5673065185546875, -0.5450439453125, -0.5227813720703125, -0.500518798828125, -0.4782562255859375, -0.45599365234375, -0.4337310791015625, -0.411468505859375, -0.3892059326171875, -0.366943359375, -0.3446807861328125, -0.322418212890625, -0.3001556396484375, -0.27789306640625, -0.2556304931640625, -0.233367919921875, -0.2111053466796875, -0.1888427734375, -0.1665802001953125, -0.144317626953125, -0.1220550537109375, -0.09979248046875, -0.0775299072265625, -0.055267333984375, -0.0330047607421875, -0.0107421875, 0.0115203857421875, 0.033782958984375, 0.0560455322265625, 0.07830810546875, 0.1005706787109375, 0.122833251953125, 0.1450958251953125, 0.1673583984375, 0.1896209716796875, 0.211883544921875, 0.2341461181640625, 0.25640869140625, 0.2786712646484375, 0.300933837890625, 0.3231964111328125, 0.345458984375, 0.3677215576171875, 0.389984130859375, 0.4122467041015625, 0.43450927734375, 0.4567718505859375, 0.479034423828125, 0.5012969970703125, 0.5235595703125, 0.5458221435546875, 0.568084716796875, 0.5903472900390625, 0.61260986328125, 0.6348724365234375, 0.657135009765625, 0.6793975830078125, 0.70166015625]}, "gradients/decoder.transformer.h.16.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 4.0, 5.0, 4.0, 4.0, 7.0, 9.0, 6.0, 4.0, 3.0, 14.0, 14.0, 16.0, 10.0, 15.0, 23.0, 26.0, 28.0, 42.0, 38.0, 37.0, 46.0, 37.0, 25.0, 32.0, 45.0, 1059.0, 43.0, 38.0, 49.0, 28.0, 40.0, 38.0, 24.0, 24.0, 23.0, 42.0, 26.0, 17.0, 18.0, 14.0, 10.0, 14.0, 13.0, 11.0, 8.0, 3.0, 1.0, 0.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0], "bins": [-5.19140625, -5.02154541015625, -4.8516845703125, -4.68182373046875, -4.511962890625, -4.34210205078125, -4.1722412109375, -4.00238037109375, -3.83251953125, -3.66265869140625, -3.4927978515625, -3.32293701171875, -3.153076171875, -2.98321533203125, -2.8133544921875, -2.64349365234375, -2.4736328125, -2.30377197265625, -2.1339111328125, -1.96405029296875, -1.794189453125, -1.62432861328125, -1.4544677734375, -1.28460693359375, -1.11474609375, -0.94488525390625, -0.7750244140625, -0.60516357421875, -0.435302734375, -0.26544189453125, -0.0955810546875, 0.07427978515625, 0.244140625, 0.41400146484375, 0.5838623046875, 0.75372314453125, 0.923583984375, 1.09344482421875, 1.2633056640625, 1.43316650390625, 1.60302734375, 1.77288818359375, 1.9427490234375, 2.11260986328125, 2.282470703125, 2.45233154296875, 2.6221923828125, 2.79205322265625, 2.9619140625, 3.13177490234375, 3.3016357421875, 3.47149658203125, 3.641357421875, 3.81121826171875, 3.9810791015625, 4.15093994140625, 4.32080078125, 4.49066162109375, 4.6605224609375, 4.83038330078125, 5.000244140625, 5.17010498046875, 5.3399658203125, 5.50982666015625, 5.6796875]}, "gradients/decoder.transformer.h.16.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 2.0, 2.0, 7.0, 4.0, 4.0, 13.0, 21.0, 19.0, 42.0, 73.0, 86.0, 139.0, 195.0, 293.0, 422.0, 629.0, 930.0, 1396.0, 2038.0, 3047.0, 4692.0, 7108.0, 10791.0, 17192.0, 25741.0, 39999.0, 62246.0, 94193.0, 134769.0, 1205681.0, 156573.0, 113055.0, 75194.0, 49072.0, 31893.0, 20788.0, 13376.0, 8588.0, 5629.0, 3703.0, 2504.0, 1655.0, 1124.0, 748.0, 469.0, 327.0, 201.0, 144.0, 105.0, 75.0, 46.0, 32.0, 24.0, 20.0, 9.0, 7.0, 6.0, 6.0, 1.0, 0.0, 2.0], "bins": [-0.57568359375, -0.5580978393554688, -0.5405120849609375, -0.5229263305664062, -0.505340576171875, -0.48775482177734375, -0.4701690673828125, -0.45258331298828125, -0.43499755859375, -0.41741180419921875, -0.3998260498046875, -0.38224029541015625, -0.364654541015625, -0.34706878662109375, -0.3294830322265625, -0.31189727783203125, -0.2943115234375, -0.27672576904296875, -0.2591400146484375, -0.24155426025390625, -0.223968505859375, -0.20638275146484375, -0.1887969970703125, -0.17121124267578125, -0.15362548828125, -0.13603973388671875, -0.1184539794921875, -0.10086822509765625, -0.083282470703125, -0.06569671630859375, -0.0481109619140625, -0.03052520751953125, -0.012939453125, 0.00464630126953125, 0.0222320556640625, 0.03981781005859375, 0.057403564453125, 0.07498931884765625, 0.0925750732421875, 0.11016082763671875, 0.12774658203125, 0.14533233642578125, 0.1629180908203125, 0.18050384521484375, 0.198089599609375, 0.21567535400390625, 0.2332611083984375, 0.25084686279296875, 0.2684326171875, 0.28601837158203125, 0.3036041259765625, 0.32118988037109375, 0.338775634765625, 0.35636138916015625, 0.3739471435546875, 0.39153289794921875, 0.40911865234375, 0.42670440673828125, 0.4442901611328125, 0.46187591552734375, 0.479461669921875, 0.49704742431640625, 0.5146331787109375, 0.5322189331054688, 0.5498046875]}, "gradients/decoder.transformer.h.16.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 1.0, 2.0, 1.0, 0.0, 4.0, 5.0, 8.0, 3.0, 3.0, 4.0, 4.0, 10.0, 13.0, 27.0, 30.0, 32.0, 45.0, 35.0, 53.0, 52.0, 70.0, 79.0, 67.0, 82.0, 76.0, 57.0, 46.0, 45.0, 28.0, 22.0, 24.0, 14.0, 12.0, 12.0, 10.0, 9.0, 1.0, 4.0, 6.0, 3.0, 3.0, 4.0, 0.0, 3.0, 2.0, 0.0, 0.0, 1.0, 3.0, 1.0, 0.0, 1.0], "bins": [-0.0088958740234375, -0.008635938167572021, -0.008376002311706543, -0.008116066455841064, -0.007856130599975586, -0.007596194744110107, -0.007336258888244629, -0.00707632303237915, -0.006816387176513672, -0.006556451320648193, -0.006296515464782715, -0.006036579608917236, -0.005776643753051758, -0.005516707897186279, -0.005256772041320801, -0.004996836185455322, -0.004736900329589844, -0.004476964473724365, -0.004217028617858887, -0.003957092761993408, -0.0036971569061279297, -0.003437221050262451, -0.0031772851943969727, -0.002917349338531494, -0.0026574134826660156, -0.002397477626800537, -0.0021375417709350586, -0.00187760591506958, -0.0016176700592041016, -0.001357734203338623, -0.0010977983474731445, -0.000837862491607666, -0.0005779266357421875, -0.000317990779876709, -5.805492401123047e-05, 0.00020188093185424805, 0.00046181678771972656, 0.0007217526435852051, 0.0009816884994506836, 0.0012416243553161621, 0.0015015602111816406, 0.0017614960670471191, 0.0020214319229125977, 0.002281367778778076, 0.0025413036346435547, 0.002801239490509033, 0.0030611753463745117, 0.0033211112022399902, 0.0035810470581054688, 0.0038409829139709473, 0.004100918769836426, 0.004360854625701904, 0.004620790481567383, 0.004880726337432861, 0.00514066219329834, 0.005400598049163818, 0.005660533905029297, 0.005920469760894775, 0.006180405616760254, 0.006440341472625732, 0.006700277328491211, 0.0069602131843566895, 0.007220149040222168, 0.0074800848960876465, 0.007740020751953125]}, "gradients/decoder.transformer.h.16.crossattention.q_attn.weight": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 2.0, 0.0, 1.0, 1.0, 1.0, 1.0, 4.0, 2.0, 2.0, 1.0, 10.0, 5.0, 2.0, 7.0, 13.0, 14.0, 20.0, 27.0, 18.0, 47.0, 62.0, 101.0, 182.0, 335.0, 1319.0, 717410.0, 327098.0, 1054.0, 344.0, 153.0, 104.0, 51.0, 46.0, 38.0, 25.0, 18.0, 11.0, 6.0, 6.0, 1.0, 3.0, 7.0, 5.0, 1.0, 5.0, 1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.17333984375, -0.16735267639160156, -0.16136550903320312, -0.1553783416748047, -0.14939117431640625, -0.1434040069580078, -0.13741683959960938, -0.13142967224121094, -0.1254425048828125, -0.11945533752441406, -0.11346817016601562, -0.10748100280761719, -0.10149383544921875, -0.09550666809082031, -0.08951950073242188, -0.08353233337402344, -0.077545166015625, -0.07155799865722656, -0.06557083129882812, -0.05958366394042969, -0.05359649658203125, -0.04760932922363281, -0.041622161865234375, -0.03563499450683594, -0.0296478271484375, -0.023660659790039062, -0.017673492431640625, -0.011686325073242188, -0.00569915771484375, 0.0002880096435546875, 0.006275177001953125, 0.012262344360351562, 0.01824951171875, 0.024236679077148438, 0.030223846435546875, 0.03621101379394531, 0.04219818115234375, 0.04818534851074219, 0.054172515869140625, 0.06015968322753906, 0.0661468505859375, 0.07213401794433594, 0.07812118530273438, 0.08410835266113281, 0.09009552001953125, 0.09608268737792969, 0.10206985473632812, 0.10805702209472656, 0.114044189453125, 0.12003135681152344, 0.12601852416992188, 0.1320056915283203, 0.13799285888671875, 0.1439800262451172, 0.14996719360351562, 0.15595436096191406, 0.1619415283203125, 0.16792869567871094, 0.17391586303710938, 0.1799030303955078, 0.18589019775390625, 0.1918773651123047, 0.19786453247070312, 0.20385169982910156, 0.2098388671875]}, "gradients/decoder.transformer.h.16.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 5.0, 176.0, 801.0, 32.0, 4.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.004977295640856028, -0.0034739633556455374, -0.001970631070435047, -0.0004672990180552006, 0.0010360334999859333, 0.002539366018027067, 0.0040426976047456264, 0.00554603012278676, 0.007049362640827894, 0.00855269469320774, 0.010056027211248875, 0.011559359729290009, 0.013062691316008568, 0.014566024765372276, 0.016069356352090836, 0.017572687938809395, 0.019076021388173103, 0.020579352974891663, 0.02208268642425537, 0.02358601801097393, 0.02508934959769249, 0.026592683047056198, 0.028096014633774757, 0.029599346220493317, 0.031102679669857025, 0.032606013119220734, 0.034109342843294144, 0.03561267629265785, 0.03711600974202156, 0.03861933946609497, 0.04012267291545868, 0.04162600636482239, 0.043129339814186096, 0.044632673263549805, 0.046136002987623215, 0.04763933643698692, 0.04914266988635063, 0.05064599961042404, 0.05214933305978775, 0.05365266650915146, 0.05515599995851517, 0.056659333407878876, 0.058162663131952286, 0.059665996581315994, 0.0611693300306797, 0.06267265975475311, 0.06417599320411682, 0.06567932665348053, 0.06718266010284424, 0.06868599355220795, 0.07018932700157166, 0.07169266045093536, 0.07319598644971848, 0.07469931989908218, 0.07620265334844589, 0.0777059867978096, 0.07920931279659271, 0.08071264624595642, 0.08221597969532013, 0.08371931314468384, 0.08522263914346695, 0.08672597259283066, 0.08822930604219437, 0.08973263949155807, 0.09123597294092178]}, "gradients/decoder.transformer.h.16.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 5.0, 1.0, 7.0, 3.0, 6.0, 8.0, 12.0, 10.0, 14.0, 12.0, 16.0, 28.0, 29.0, 22.0, 26.0, 32.0, 24.0, 45.0, 40.0, 24.0, 42.0, 49.0, 60.0, 34.0, 39.0, 41.0, 36.0, 49.0, 41.0, 43.0, 35.0, 34.0, 25.0, 25.0, 20.0, 13.0, 17.0, 13.0, 8.0, 9.0, 3.0, 6.0, 3.0, 3.0, 1.0, 2.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-0.004888296127319336, -0.004735466092824936, -0.004582636058330536, -0.004429806023836136, -0.004276975989341736, -0.004124145954847336, -0.003971315920352936, -0.0038184858858585358, -0.0036656558513641357, -0.0035128258168697357, -0.0033599957823753357, -0.0032071657478809357, -0.0030543357133865356, -0.0029015056788921356, -0.0027486756443977356, -0.0025958456099033356, -0.0024430155754089355, -0.0022901855409145355, -0.0021373555064201355, -0.0019845254719257355, -0.0018316954374313354, -0.0016788654029369354, -0.0015260353684425354, -0.0013732053339481354, -0.0012203752994537354, -0.0010675452649593353, -0.0009147152304649353, -0.0007618851959705353, -0.0006090551614761353, -0.00045622512698173523, -0.0003033950924873352, -0.00015056505799293518, 2.2649765014648438e-06, 0.00015509501099586487, 0.0003079250454902649, 0.0004607550799846649, 0.0006135851144790649, 0.000766415148973465, 0.000919245183467865, 0.001072075217962265, 0.001224905252456665, 0.001377735286951065, 0.001530565321445465, 0.0016833953559398651, 0.0018362253904342651, 0.001989055424928665, 0.002141885459423065, 0.002294715493917465, 0.0024475455284118652, 0.0026003755629062653, 0.0027532055974006653, 0.0029060356318950653, 0.0030588656663894653, 0.0032116957008838654, 0.0033645257353782654, 0.0035173557698726654, 0.0036701858043670654, 0.0038230158388614655, 0.0039758458733558655, 0.0041286759078502655, 0.0042815059423446655, 0.0044343359768390656, 0.004587166011333466, 0.004739996045827866, 0.004892826080322266]}, "gradients/decoder.transformer.h.16.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 0.0, 1.0, 4.0, 2.0, 3.0, 6.0, 4.0, 8.0, 12.0, 11.0, 17.0, 13.0, 18.0, 20.0, 28.0, 33.0, 29.0, 37.0, 36.0, 39.0, 47.0, 39.0, 55.0, 44.0, 43.0, 44.0, 44.0, 41.0, 46.0, 39.0, 33.0, 29.0, 33.0, 38.0, 17.0, 25.0, 13.0, 13.0, 12.0, 15.0, 1.0, 12.0, 1.0, 4.0, 1.0, 2.0, 2.0, 0.0, 2.0, 1.0, 2.0], "bins": [-10.578125, -10.2926025390625, -10.007080078125, -9.7215576171875, -9.43603515625, -9.1505126953125, -8.864990234375, -8.5794677734375, -8.2939453125, -8.0084228515625, -7.722900390625, -7.4373779296875, -7.15185546875, -6.8663330078125, -6.580810546875, -6.2952880859375, -6.009765625, -5.7242431640625, -5.438720703125, -5.1531982421875, -4.86767578125, -4.5821533203125, -4.296630859375, -4.0111083984375, -3.7255859375, -3.4400634765625, -3.154541015625, -2.8690185546875, -2.58349609375, -2.2979736328125, -2.012451171875, -1.7269287109375, -1.44140625, -1.1558837890625, -0.870361328125, -0.5848388671875, -0.29931640625, -0.0137939453125, 0.271728515625, 0.5572509765625, 0.8427734375, 1.1282958984375, 1.413818359375, 1.6993408203125, 1.98486328125, 2.2703857421875, 2.555908203125, 2.8414306640625, 3.126953125, 3.4124755859375, 3.697998046875, 3.9835205078125, 4.26904296875, 4.5545654296875, 4.840087890625, 5.1256103515625, 5.4111328125, 5.6966552734375, 5.982177734375, 6.2677001953125, 6.55322265625, 6.8387451171875, 7.124267578125, 7.4097900390625, 7.6953125]}, "gradients/decoder.transformer.h.16.attn.c_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 3.0, 1.0, 1.0, 6.0, 4.0, 6.0, 14.0, 19.0, 36.0, 46.0, 65.0, 89.0, 152.0, 218.0, 319.0, 455.0, 756.0, 1077.0, 1767.0, 2769.0, 4494.0, 7299.0, 12175.0, 21927.0, 42083.0, 95232.0, 317072.0, 340171.0, 101357.0, 43605.0, 22724.0, 12669.0, 7383.0, 4590.0, 2877.0, 1750.0, 1138.0, 735.0, 505.0, 292.0, 236.0, 174.0, 91.0, 67.0, 42.0, 31.0, 22.0, 15.0, 9.0, 3.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.20703125, -5.02764892578125, -4.8482666015625, -4.66888427734375, -4.489501953125, -4.31011962890625, -4.1307373046875, -3.95135498046875, -3.77197265625, -3.59259033203125, -3.4132080078125, -3.23382568359375, -3.054443359375, -2.87506103515625, -2.6956787109375, -2.51629638671875, -2.3369140625, -2.15753173828125, -1.9781494140625, -1.79876708984375, -1.619384765625, -1.44000244140625, -1.2606201171875, -1.08123779296875, -0.90185546875, -0.72247314453125, -0.5430908203125, -0.36370849609375, -0.184326171875, -0.00494384765625, 0.1744384765625, 0.35382080078125, 0.533203125, 0.71258544921875, 0.8919677734375, 1.07135009765625, 1.250732421875, 1.43011474609375, 1.6094970703125, 1.78887939453125, 1.96826171875, 2.14764404296875, 2.3270263671875, 2.50640869140625, 2.685791015625, 2.86517333984375, 3.0445556640625, 3.22393798828125, 3.4033203125, 3.58270263671875, 3.7620849609375, 3.94146728515625, 4.120849609375, 4.30023193359375, 4.4796142578125, 4.65899658203125, 4.83837890625, 5.01776123046875, 5.1971435546875, 5.37652587890625, 5.555908203125, 5.73529052734375, 5.9146728515625, 6.09405517578125, 6.2734375]}, "gradients/decoder.transformer.h.16.attn.c_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 2.0, 0.0, 4.0, 0.0, 6.0, 3.0, 5.0, 5.0, 7.0, 6.0, 7.0, 10.0, 16.0, 10.0, 27.0, 31.0, 32.0, 37.0, 31.0, 46.0, 48.0, 58.0, 61.0, 83.0, 344.0, 1612.0, 111.0, 56.0, 70.0, 43.0, 39.0, 43.0, 34.0, 31.0, 21.0, 27.0, 20.0, 11.0, 17.0, 9.0, 13.0, 7.0, 6.0, 5.0, 4.0, 1.0, 4.0, 1.0, 3.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-29.546875, -28.455810546875, -27.36474609375, -26.273681640625, -25.1826171875, -24.091552734375, -23.00048828125, -21.909423828125, -20.818359375, -19.727294921875, -18.63623046875, -17.545166015625, -16.4541015625, -15.363037109375, -14.27197265625, -13.180908203125, -12.08984375, -10.998779296875, -9.90771484375, -8.816650390625, -7.7255859375, -6.634521484375, -5.54345703125, -4.452392578125, -3.361328125, -2.270263671875, -1.17919921875, -0.088134765625, 1.0029296875, 2.093994140625, 3.18505859375, 4.276123046875, 5.3671875, 6.458251953125, 7.54931640625, 8.640380859375, 9.7314453125, 10.822509765625, 11.91357421875, 13.004638671875, 14.095703125, 15.186767578125, 16.27783203125, 17.368896484375, 18.4599609375, 19.551025390625, 20.64208984375, 21.733154296875, 22.82421875, 23.915283203125, 25.00634765625, 26.097412109375, 27.1884765625, 28.279541015625, 29.37060546875, 30.461669921875, 31.552734375, 32.643798828125, 33.73486328125, 34.825927734375, 35.9169921875, 37.008056640625, 38.09912109375, 39.190185546875, 40.28125]}, "gradients/decoder.transformer.h.16.attn.c_attn.weight": {"_type": "histogram", "values": [2.0, 1.0, 2.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 0.0, 0.0, 2.0, 3.0, 6.0, 5.0, 6.0, 18.0, 18.0, 33.0, 36.0, 60.0, 78.0, 139.0, 233.0, 482.0, 1903.0, 1044489.0, 2095138.0, 1911.0, 492.0, 228.0, 156.0, 91.0, 47.0, 45.0, 29.0, 22.0, 15.0, 6.0, 6.0, 1.0, 4.0, 4.0, 1.0, 1.0, 1.0, 1.0, 1.0, 3.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-94.125, -90.640625, -87.15625, -83.671875, -80.1875, -76.703125, -73.21875, -69.734375, -66.25, -62.765625, -59.28125, -55.796875, -52.3125, -48.828125, -45.34375, -41.859375, -38.375, -34.890625, -31.40625, -27.921875, -24.4375, -20.953125, -17.46875, -13.984375, -10.5, -7.015625, -3.53125, -0.046875, 3.4375, 6.921875, 10.40625, 13.890625, 17.375, 20.859375, 24.34375, 27.828125, 31.3125, 34.796875, 38.28125, 41.765625, 45.25, 48.734375, 52.21875, 55.703125, 59.1875, 62.671875, 66.15625, 69.640625, 73.125, 76.609375, 80.09375, 83.578125, 87.0625, 90.546875, 94.03125, 97.515625, 101.0, 104.484375, 107.96875, 111.453125, 114.9375, 118.421875, 121.90625, 125.390625, 128.875]}, "gradients/decoder.transformer.h.16.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 6.0, 966.0, 47.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-61.23606491088867, -44.301368713378906, -27.366668701171875, -10.43197250366211, 6.502727508544922, 23.437427520751953, 40.37211990356445, 57.306819915771484, 74.24151611328125, 91.17621612548828, 108.11091613769531, 125.04560852050781, 141.98031616210938, 158.91500854492188, 175.84970092773438, 192.78439331054688, 209.71910095214844, 226.65379333496094, 243.5885009765625, 260.523193359375, 277.4578857421875, 294.392578125, 311.3272705078125, 328.2619934082031, 345.1966857910156, 362.1313781738281, 379.0660705566406, 396.00079345703125, 412.93548583984375, 429.87017822265625, 446.80487060546875, 463.73956298828125, 480.67431640625, 497.6090087890625, 514.543701171875, 531.4783935546875, 548.4130859375, 565.3477783203125, 582.282470703125, 599.2172241210938, 616.1519165039062, 633.0866088867188, 650.0213012695312, 666.9559936523438, 683.8906860351562, 700.825439453125, 717.7601318359375, 734.69482421875, 751.6295166015625, 768.564208984375, 785.4989013671875, 802.43359375, 819.3682861328125, 836.302978515625, 853.2376708984375, 870.1724243164062, 887.1070556640625, 904.041748046875, 920.9764404296875, 937.9111328125, 954.8458251953125, 971.780517578125, 988.7152099609375, 1005.6499633789062, 1022.5846557617188]}, "gradients/decoder.transformer.h.16.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 3.0, 0.0, 4.0, 2.0, 2.0, 4.0, 5.0, 8.0, 8.0, 7.0, 7.0, 13.0, 13.0, 15.0, 22.0, 26.0, 29.0, 31.0, 28.0, 33.0, 19.0, 43.0, 45.0, 39.0, 43.0, 44.0, 39.0, 44.0, 39.0, 51.0, 40.0, 40.0, 37.0, 20.0, 26.0, 32.0, 21.0, 23.0, 15.0, 16.0, 21.0, 14.0, 8.0, 9.0, 2.0, 9.0, 5.0, 5.0, 4.0, 1.0, 2.0, 0.0, 1.0, 1.0, 1.0, 0.0, 2.0], "bins": [-96.8150863647461, -93.96878051757812, -91.12248229980469, -88.27617645263672, -85.42987823486328, -82.58357238769531, -79.73727416992188, -76.8909683227539, -74.04466247558594, -71.19835662841797, -68.35205841064453, -65.50575256347656, -62.659454345703125, -59.813148498535156, -56.96684646606445, -54.12054443359375, -51.27424621582031, -48.42794418334961, -45.581642150878906, -42.73533630371094, -39.8890380859375, -37.04273223876953, -34.19643020629883, -31.350128173828125, -28.503826141357422, -25.65752410888672, -22.811222076416016, -19.96491813659668, -17.118616104125977, -14.272314071655273, -11.426010131835938, -8.579708099365234, -5.7333984375, -2.8870959281921387, -0.040793418884277344, 2.805509567260742, 5.651811599731445, 8.498113632202148, 11.344417572021484, 14.190719604492188, 17.03702163696289, 19.883323669433594, 22.729625701904297, 25.575929641723633, 28.422231674194336, 31.26853370666504, 34.114837646484375, 36.96113967895508, 39.80744171142578, 42.653743743896484, 45.50004577636719, 48.346351623535156, 51.192649841308594, 54.03895568847656, 56.885257720947266, 59.73155975341797, 62.57786178588867, 65.42416381835938, 68.27046966552734, 71.11676788330078, 73.96307373046875, 76.80937194824219, 79.65567779541016, 82.50198364257812, 85.34828186035156]}, "gradients/decoder.transformer.h.15.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 1.0, 3.0, 1.0, 1.0, 3.0, 8.0, 6.0, 7.0, 11.0, 11.0, 18.0, 14.0, 22.0, 17.0, 27.0, 38.0, 31.0, 34.0, 32.0, 42.0, 47.0, 45.0, 50.0, 36.0, 44.0, 51.0, 48.0, 40.0, 47.0, 30.0, 36.0, 31.0, 32.0, 27.0, 23.0, 23.0, 15.0, 12.0, 11.0, 13.0, 3.0, 12.0, 4.0, 1.0, 2.0, 2.0, 0.0, 3.0, 2.0, 1.0, 1.0], "bins": [-11.0, -10.7020263671875, -10.404052734375, -10.1060791015625, -9.80810546875, -9.5101318359375, -9.212158203125, -8.9141845703125, -8.6162109375, -8.3182373046875, -8.020263671875, -7.7222900390625, -7.42431640625, -7.1263427734375, -6.828369140625, -6.5303955078125, -6.232421875, -5.9344482421875, -5.636474609375, -5.3385009765625, -5.04052734375, -4.7425537109375, -4.444580078125, -4.1466064453125, -3.8486328125, -3.5506591796875, -3.252685546875, -2.9547119140625, -2.65673828125, -2.3587646484375, -2.060791015625, -1.7628173828125, -1.46484375, -1.1668701171875, -0.868896484375, -0.5709228515625, -0.27294921875, 0.0250244140625, 0.322998046875, 0.6209716796875, 0.9189453125, 1.2169189453125, 1.514892578125, 1.8128662109375, 2.11083984375, 2.4088134765625, 2.706787109375, 3.0047607421875, 3.302734375, 3.6007080078125, 3.898681640625, 4.1966552734375, 4.49462890625, 4.7926025390625, 5.090576171875, 5.3885498046875, 5.6865234375, 5.9844970703125, 6.282470703125, 6.5804443359375, 6.87841796875, 7.1763916015625, 7.474365234375, 7.7723388671875, 8.0703125]}, "gradients/decoder.transformer.h.15.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 3.0, 7.0, 4.0, 8.0, 6.0, 14.0, 16.0, 17.0, 21.0, 22.0, 23.0, 35.0, 47.0, 73.0, 93.0, 164.0, 241.0, 449.0, 993.0, 2211.0, 6010.0, 18933.0, 73045.0, 393824.0, 1759644.0, 1537479.0, 313770.0, 61722.0, 16271.0, 5243.0, 1922.0, 838.0, 409.0, 231.0, 145.0, 84.0, 63.0, 62.0, 32.0, 23.0, 24.0, 16.0, 18.0, 8.0, 10.0, 2.0, 6.0, 4.0, 4.0, 3.0, 3.0, 2.0, 2.0, 0.0, 0.0, 1.0], "bins": [-16.359375, -15.8626708984375, -15.365966796875, -14.8692626953125, -14.37255859375, -13.8758544921875, -13.379150390625, -12.8824462890625, -12.3857421875, -11.8890380859375, -11.392333984375, -10.8956298828125, -10.39892578125, -9.9022216796875, -9.405517578125, -8.9088134765625, -8.412109375, -7.9154052734375, -7.418701171875, -6.9219970703125, -6.42529296875, -5.9285888671875, -5.431884765625, -4.9351806640625, -4.4384765625, -3.9417724609375, -3.445068359375, -2.9483642578125, -2.45166015625, -1.9549560546875, -1.458251953125, -0.9615478515625, -0.46484375, 0.0318603515625, 0.528564453125, 1.0252685546875, 1.52197265625, 2.0186767578125, 2.515380859375, 3.0120849609375, 3.5087890625, 4.0054931640625, 4.502197265625, 4.9989013671875, 5.49560546875, 5.9923095703125, 6.489013671875, 6.9857177734375, 7.482421875, 7.9791259765625, 8.475830078125, 8.9725341796875, 9.46923828125, 9.9659423828125, 10.462646484375, 10.9593505859375, 11.4560546875, 11.9527587890625, 12.449462890625, 12.9461669921875, 13.44287109375, 13.9395751953125, 14.436279296875, 14.9329833984375, 15.4296875]}, "gradients/decoder.transformer.h.15.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 1.0, 4.0, 3.0, 11.0, 26.0, 30.0, 58.0, 93.0, 149.0, 252.0, 445.0, 696.0, 788.0, 608.0, 403.0, 228.0, 120.0, 77.0, 43.0, 28.0, 9.0, 7.0, 4.0, 2.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-40.75, -39.628173828125, -38.50634765625, -37.384521484375, -36.2626953125, -35.140869140625, -34.01904296875, -32.897216796875, -31.775390625, -30.653564453125, -29.53173828125, -28.409912109375, -27.2880859375, -26.166259765625, -25.04443359375, -23.922607421875, -22.80078125, -21.678955078125, -20.55712890625, -19.435302734375, -18.3134765625, -17.191650390625, -16.06982421875, -14.947998046875, -13.826171875, -12.704345703125, -11.58251953125, -10.460693359375, -9.3388671875, -8.217041015625, -7.09521484375, -5.973388671875, -4.8515625, -3.729736328125, -2.60791015625, -1.486083984375, -0.3642578125, 0.757568359375, 1.87939453125, 3.001220703125, 4.123046875, 5.244873046875, 6.36669921875, 7.488525390625, 8.6103515625, 9.732177734375, 10.85400390625, 11.975830078125, 13.09765625, 14.219482421875, 15.34130859375, 16.463134765625, 17.5849609375, 18.706787109375, 19.82861328125, 20.950439453125, 22.072265625, 23.194091796875, 24.31591796875, 25.437744140625, 26.5595703125, 27.681396484375, 28.80322265625, 29.925048828125, 31.046875]}, "gradients/decoder.transformer.h.15.mlp.c_fc.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 0.0, 4.0, 4.0, 13.0, 11.0, 13.0, 19.0, 20.0, 37.0, 42.0, 58.0, 111.0, 182.0, 314.0, 616.0, 2099.0, 44834.0, 3535166.0, 600384.0, 8171.0, 1054.0, 457.0, 238.0, 134.0, 94.0, 64.0, 43.0, 37.0, 29.0, 17.0, 4.0, 7.0, 4.0, 6.0, 3.0, 1.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-70.5625, -68.26953125, -65.9765625, -63.68359375, -61.390625, -59.09765625, -56.8046875, -54.51171875, -52.21875, -49.92578125, -47.6328125, -45.33984375, -43.046875, -40.75390625, -38.4609375, -36.16796875, -33.875, -31.58203125, -29.2890625, -26.99609375, -24.703125, -22.41015625, -20.1171875, -17.82421875, -15.53125, -13.23828125, -10.9453125, -8.65234375, -6.359375, -4.06640625, -1.7734375, 0.51953125, 2.8125, 5.10546875, 7.3984375, 9.69140625, 11.984375, 14.27734375, 16.5703125, 18.86328125, 21.15625, 23.44921875, 25.7421875, 28.03515625, 30.328125, 32.62109375, 34.9140625, 37.20703125, 39.5, 41.79296875, 44.0859375, 46.37890625, 48.671875, 50.96484375, 53.2578125, 55.55078125, 57.84375, 60.13671875, 62.4296875, 64.72265625, 67.015625, 69.30859375, 71.6015625, 73.89453125, 76.1875]}, "gradients/decoder.transformer.h.15.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 6.0, 18.0, 41.0, 64.0, 105.0, 143.0, 166.0, 155.0, 137.0, 85.0, 48.0, 17.0, 17.0, 4.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-132.06883239746094, -127.63428497314453, -123.19974517822266, -118.76519775390625, -114.33065795898438, -109.89611053466797, -105.46156311035156, -101.02702331542969, -96.59247589111328, -92.15792846679688, -87.723388671875, -83.2888412475586, -78.85429382324219, -74.41975402832031, -69.9852066040039, -65.5506591796875, -61.116119384765625, -56.681575775146484, -52.247032165527344, -47.81248474121094, -43.3779411315918, -38.943397521972656, -34.50885009765625, -30.07430648803711, -25.63976287841797, -21.205219268798828, -16.770673751831055, -12.336129188537598, -7.901584625244141, -3.467041015625, 0.9675045013427734, 5.402050018310547, 9.836578369140625, 14.271122932434082, 18.70566749572754, 23.140213012695312, 27.574756622314453, 32.009300231933594, 36.44384765625, 40.87839126586914, 45.31293487548828, 49.74747848510742, 54.18202209472656, 58.61656951904297, 63.05111312866211, 67.48565673828125, 71.92020416259766, 76.35475158691406, 80.78929138183594, 85.22383880615234, 89.65837860107422, 94.09292602539062, 98.5274658203125, 102.9620132446289, 107.39656066894531, 111.83110046386719, 116.2656478881836, 120.7001953125, 125.13473510742188, 129.56927490234375, 134.0038299560547, 138.43836975097656, 142.87290954589844, 147.30746459960938, 151.74200439453125]}, "gradients/decoder.transformer.h.15.ln_2.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 2.0, 2.0, 1.0, 3.0, 2.0, 4.0, 2.0, 3.0, 8.0, 14.0, 12.0, 14.0, 16.0, 12.0, 12.0, 21.0, 28.0, 26.0, 25.0, 21.0, 37.0, 33.0, 35.0, 50.0, 43.0, 31.0, 47.0, 36.0, 45.0, 46.0, 41.0, 39.0, 27.0, 33.0, 32.0, 29.0, 29.0, 19.0, 20.0, 24.0, 16.0, 11.0, 16.0, 12.0, 6.0, 7.0, 8.0, 6.0, 4.0, 5.0, 1.0, 0.0, 4.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-63.7955322265625, -61.680599212646484, -59.56566619873047, -57.45073318481445, -55.33580017089844, -53.22086715698242, -51.105934143066406, -48.990997314453125, -46.876068115234375, -44.76113510131836, -42.646202087402344, -40.53126907348633, -38.41633605957031, -36.3014030456543, -34.18647003173828, -32.071533203125, -29.956600189208984, -27.84166717529297, -25.726734161376953, -23.611801147460938, -21.496868133544922, -19.381935119628906, -17.267000198364258, -15.152067184448242, -13.037134170532227, -10.922201156616211, -8.807268142700195, -6.692334175109863, -4.577401161193848, -2.462468147277832, -0.3475341796875, 1.7673988342285156, 3.8823318481445312, 5.997264862060547, 8.112197875976562, 10.227131843566895, 12.34206485748291, 14.456997871398926, 16.571931838989258, 18.686864852905273, 20.80179786682129, 22.916730880737305, 25.03166389465332, 27.14659881591797, 29.261531829833984, 31.37646484375, 33.491397857666016, 35.60633087158203, 37.72126388549805, 39.83619689941406, 41.95112991333008, 44.066062927246094, 46.18099594116211, 48.295928955078125, 50.410865783691406, 52.525794982910156, 54.64073181152344, 56.75566482543945, 58.87059783935547, 60.985530853271484, 63.1004638671875, 65.21540069580078, 67.33032989501953, 69.44526672363281, 71.56019592285156]}, "gradients/decoder.transformer.h.15.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 5.0, 2.0, 3.0, 2.0, 3.0, 10.0, 2.0, 7.0, 11.0, 12.0, 15.0, 14.0, 17.0, 28.0, 29.0, 30.0, 33.0, 24.0, 25.0, 32.0, 36.0, 43.0, 37.0, 46.0, 43.0, 38.0, 48.0, 52.0, 42.0, 41.0, 27.0, 31.0, 30.0, 29.0, 22.0, 22.0, 20.0, 18.0, 17.0, 11.0, 16.0, 7.0, 5.0, 7.0, 6.0, 7.0, 6.0, 1.0, 1.0, 3.0, 1.0, 2.0, 0.0, 2.0], "bins": [-9.6640625, -9.38720703125, -9.1103515625, -8.83349609375, -8.556640625, -8.27978515625, -8.0029296875, -7.72607421875, -7.44921875, -7.17236328125, -6.8955078125, -6.61865234375, -6.341796875, -6.06494140625, -5.7880859375, -5.51123046875, -5.234375, -4.95751953125, -4.6806640625, -4.40380859375, -4.126953125, -3.85009765625, -3.5732421875, -3.29638671875, -3.01953125, -2.74267578125, -2.4658203125, -2.18896484375, -1.912109375, -1.63525390625, -1.3583984375, -1.08154296875, -0.8046875, -0.52783203125, -0.2509765625, 0.02587890625, 0.302734375, 0.57958984375, 0.8564453125, 1.13330078125, 1.41015625, 1.68701171875, 1.9638671875, 2.24072265625, 2.517578125, 2.79443359375, 3.0712890625, 3.34814453125, 3.625, 3.90185546875, 4.1787109375, 4.45556640625, 4.732421875, 5.00927734375, 5.2861328125, 5.56298828125, 5.83984375, 6.11669921875, 6.3935546875, 6.67041015625, 6.947265625, 7.22412109375, 7.5009765625, 7.77783203125, 8.0546875]}, "gradients/decoder.transformer.h.15.crossattention.c_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 3.0, 3.0, 6.0, 7.0, 21.0, 23.0, 33.0, 35.0, 84.0, 109.0, 142.0, 215.0, 296.0, 478.0, 669.0, 922.0, 1316.0, 1906.0, 2713.0, 3899.0, 5557.0, 8137.0, 11909.0, 17467.0, 25740.0, 38232.0, 55682.0, 82217.0, 120125.0, 159956.0, 155250.0, 112833.0, 77377.0, 52881.0, 35654.0, 23955.0, 16280.0, 11339.0, 7611.0, 5377.0, 3611.0, 2581.0, 1744.0, 1305.0, 820.0, 649.0, 432.0, 321.0, 195.0, 154.0, 106.0, 63.0, 48.0, 30.0, 13.0, 19.0, 8.0, 7.0, 3.0, 1.0, 4.0], "bins": [-0.76904296875, -0.7456817626953125, -0.722320556640625, -0.6989593505859375, -0.67559814453125, -0.6522369384765625, -0.628875732421875, -0.6055145263671875, -0.5821533203125, -0.5587921142578125, -0.535430908203125, -0.5120697021484375, -0.48870849609375, -0.4653472900390625, -0.441986083984375, -0.4186248779296875, -0.395263671875, -0.3719024658203125, -0.348541259765625, -0.3251800537109375, -0.30181884765625, -0.2784576416015625, -0.255096435546875, -0.2317352294921875, -0.2083740234375, -0.1850128173828125, -0.161651611328125, -0.1382904052734375, -0.11492919921875, -0.0915679931640625, -0.068206787109375, -0.0448455810546875, -0.021484375, 0.0018768310546875, 0.025238037109375, 0.0485992431640625, 0.07196044921875, 0.0953216552734375, 0.118682861328125, 0.1420440673828125, 0.1654052734375, 0.1887664794921875, 0.212127685546875, 0.2354888916015625, 0.25885009765625, 0.2822113037109375, 0.305572509765625, 0.3289337158203125, 0.352294921875, 0.3756561279296875, 0.399017333984375, 0.4223785400390625, 0.44573974609375, 0.4691009521484375, 0.492462158203125, 0.5158233642578125, 0.5391845703125, 0.5625457763671875, 0.585906982421875, 0.6092681884765625, 0.63262939453125, 0.6559906005859375, 0.679351806640625, 0.7027130126953125, 0.72607421875]}, "gradients/decoder.transformer.h.15.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 5.0, 3.0, 2.0, 3.0, 4.0, 8.0, 5.0, 6.0, 10.0, 11.0, 10.0, 13.0, 20.0, 22.0, 20.0, 19.0, 30.0, 24.0, 32.0, 39.0, 41.0, 34.0, 18.0, 39.0, 38.0, 1062.0, 50.0, 46.0, 33.0, 48.0, 45.0, 40.0, 24.0, 26.0, 23.0, 23.0, 22.0, 23.0, 21.0, 15.0, 11.0, 12.0, 14.0, 8.0, 8.0, 5.0, 5.0, 2.0, 4.0, 9.0, 4.0, 2.0, 2.0], "bins": [-6.09765625, -5.931640625, -5.765625, -5.599609375, -5.43359375, -5.267578125, -5.1015625, -4.935546875, -4.76953125, -4.603515625, -4.4375, -4.271484375, -4.10546875, -3.939453125, -3.7734375, -3.607421875, -3.44140625, -3.275390625, -3.109375, -2.943359375, -2.77734375, -2.611328125, -2.4453125, -2.279296875, -2.11328125, -1.947265625, -1.78125, -1.615234375, -1.44921875, -1.283203125, -1.1171875, -0.951171875, -0.78515625, -0.619140625, -0.453125, -0.287109375, -0.12109375, 0.044921875, 0.2109375, 0.376953125, 0.54296875, 0.708984375, 0.875, 1.041015625, 1.20703125, 1.373046875, 1.5390625, 1.705078125, 1.87109375, 2.037109375, 2.203125, 2.369140625, 2.53515625, 2.701171875, 2.8671875, 3.033203125, 3.19921875, 3.365234375, 3.53125, 3.697265625, 3.86328125, 4.029296875, 4.1953125, 4.361328125, 4.52734375]}, "gradients/decoder.transformer.h.15.crossattention.c_attn.weight": {"_type": "histogram", "values": [3.0, 1.0, 2.0, 5.0, 4.0, 12.0, 11.0, 16.0, 24.0, 46.0, 61.0, 113.0, 122.0, 200.0, 298.0, 435.0, 639.0, 972.0, 1463.0, 2106.0, 3088.0, 4844.0, 7312.0, 10596.0, 15910.0, 23907.0, 36166.0, 53153.0, 78028.0, 111366.0, 156806.0, 1184877.0, 124881.0, 90663.0, 62223.0, 41838.0, 27977.0, 18850.0, 12538.0, 8466.0, 5636.0, 3816.0, 2594.0, 1671.0, 1104.0, 777.0, 517.0, 322.0, 235.0, 155.0, 103.0, 69.0, 38.0, 26.0, 26.0, 19.0, 6.0, 5.0, 1.0, 3.0, 2.0, 0.0, 3.0, 2.0], "bins": [-0.54150390625, -0.5241470336914062, -0.5067901611328125, -0.48943328857421875, -0.472076416015625, -0.45471954345703125, -0.4373626708984375, -0.42000579833984375, -0.40264892578125, -0.38529205322265625, -0.3679351806640625, -0.35057830810546875, -0.333221435546875, -0.31586456298828125, -0.2985076904296875, -0.28115081787109375, -0.2637939453125, -0.24643707275390625, -0.2290802001953125, -0.21172332763671875, -0.194366455078125, -0.17700958251953125, -0.1596527099609375, -0.14229583740234375, -0.12493896484375, -0.10758209228515625, -0.0902252197265625, -0.07286834716796875, -0.055511474609375, -0.03815460205078125, -0.0207977294921875, -0.00344085693359375, 0.013916015625, 0.03127288818359375, 0.0486297607421875, 0.06598663330078125, 0.083343505859375, 0.10070037841796875, 0.1180572509765625, 0.13541412353515625, 0.15277099609375, 0.17012786865234375, 0.1874847412109375, 0.20484161376953125, 0.222198486328125, 0.23955535888671875, 0.2569122314453125, 0.27426910400390625, 0.2916259765625, 0.30898284912109375, 0.3263397216796875, 0.34369659423828125, 0.361053466796875, 0.37841033935546875, 0.3957672119140625, 0.41312408447265625, 0.43048095703125, 0.44783782958984375, 0.4651947021484375, 0.48255157470703125, 0.499908447265625, 0.5172653198242188, 0.5346221923828125, 0.5519790649414062, 0.5693359375]}, "gradients/decoder.transformer.h.15.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 2.0, 0.0, 1.0, 3.0, 2.0, 3.0, 2.0, 4.0, 3.0, 7.0, 5.0, 10.0, 12.0, 19.0, 13.0, 31.0, 49.0, 57.0, 59.0, 93.0, 84.0, 89.0, 79.0, 72.0, 73.0, 73.0, 42.0, 30.0, 31.0, 14.0, 12.0, 8.0, 6.0, 6.0, 1.0, 2.0, 4.0, 0.0, 2.0, 3.0, 3.0, 2.0, 0.0, 2.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-0.00937652587890625, -0.00907886028289795, -0.008781194686889648, -0.008483529090881348, -0.008185863494873047, -0.007888197898864746, -0.007590532302856445, -0.0072928667068481445, -0.006995201110839844, -0.006697535514831543, -0.006399869918823242, -0.006102204322814941, -0.005804538726806641, -0.00550687313079834, -0.005209207534790039, -0.004911541938781738, -0.0046138763427734375, -0.004316210746765137, -0.004018545150756836, -0.003720879554748535, -0.0034232139587402344, -0.0031255483627319336, -0.002827882766723633, -0.002530217170715332, -0.0022325515747070312, -0.0019348859786987305, -0.0016372203826904297, -0.001339554786682129, -0.0010418891906738281, -0.0007442235946655273, -0.00044655799865722656, -0.00014889240264892578, 0.000148773193359375, 0.0004464387893676758, 0.0007441043853759766, 0.0010417699813842773, 0.0013394355773925781, 0.001637101173400879, 0.0019347667694091797, 0.0022324323654174805, 0.0025300979614257812, 0.002827763557434082, 0.003125429153442383, 0.0034230947494506836, 0.0037207603454589844, 0.004018425941467285, 0.004316091537475586, 0.004613757133483887, 0.0049114227294921875, 0.005209088325500488, 0.005506753921508789, 0.00580441951751709, 0.006102085113525391, 0.006399750709533691, 0.006697416305541992, 0.006995081901550293, 0.007292747497558594, 0.0075904130935668945, 0.007888078689575195, 0.008185744285583496, 0.008483409881591797, 0.008781075477600098, 0.009078741073608398, 0.0093764066696167, 0.009674072265625]}, "gradients/decoder.transformer.h.15.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 3.0, 2.0, 4.0, 2.0, 5.0, 5.0, 5.0, 8.0, 10.0, 19.0, 32.0, 35.0, 45.0, 86.0, 141.0, 237.0, 623.0, 24439.0, 1019114.0, 2825.0, 414.0, 175.0, 103.0, 66.0, 48.0, 26.0, 27.0, 15.0, 11.0, 7.0, 7.0, 6.0, 2.0, 2.0, 4.0, 2.0, 2.0, 1.0, 0.0, 2.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.2076416015625, -0.2012176513671875, -0.194793701171875, -0.1883697509765625, -0.18194580078125, -0.1755218505859375, -0.169097900390625, -0.1626739501953125, -0.15625, -0.1498260498046875, -0.143402099609375, -0.1369781494140625, -0.13055419921875, -0.1241302490234375, -0.117706298828125, -0.1112823486328125, -0.1048583984375, -0.0984344482421875, -0.092010498046875, -0.0855865478515625, -0.07916259765625, -0.0727386474609375, -0.066314697265625, -0.0598907470703125, -0.053466796875, -0.0470428466796875, -0.040618896484375, -0.0341949462890625, -0.02777099609375, -0.0213470458984375, -0.014923095703125, -0.0084991455078125, -0.0020751953125, 0.0043487548828125, 0.010772705078125, 0.0171966552734375, 0.02362060546875, 0.0300445556640625, 0.036468505859375, 0.0428924560546875, 0.04931640625, 0.0557403564453125, 0.062164306640625, 0.0685882568359375, 0.07501220703125, 0.0814361572265625, 0.087860107421875, 0.0942840576171875, 0.1007080078125, 0.1071319580078125, 0.113555908203125, 0.1199798583984375, 0.12640380859375, 0.1328277587890625, 0.139251708984375, 0.1456756591796875, 0.152099609375, 0.1585235595703125, 0.164947509765625, 0.1713714599609375, 0.17779541015625, 0.1842193603515625, 0.190643310546875, 0.1970672607421875, 0.2034912109375]}, "gradients/decoder.transformer.h.15.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 14.0, 57.0, 252.0, 494.0, 162.0, 29.0, 5.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.028306107968091965, -0.02752826176583767, -0.026750413700938225, -0.02597256749868393, -0.025194719433784485, -0.02441687323153019, -0.023639027029275894, -0.02286117896437645, -0.022083330899477005, -0.02130548469722271, -0.020527636632323265, -0.01974979043006897, -0.018971942365169525, -0.01819409616291523, -0.017416249960660934, -0.01663840189576149, -0.015860555693507195, -0.015082708559930325, -0.014304861426353455, -0.01352701522409916, -0.012749167159199715, -0.01197132095694542, -0.01119347382336855, -0.01041562668979168, -0.00963777955621481, -0.00885993242263794, -0.00808208528906107, -0.007304238621145487, -0.006526391487568617, -0.005748544353991747, -0.004970697686076164, -0.004192850552499294, -0.0034150034189224243, -0.0026371562853455544, -0.001859309384599328, -0.0010814624838531017, -0.00030361535027623177, 0.0004742317833006382, 0.0012520784512162209, 0.002029925584793091, 0.002807772718369961, 0.0035856198519468307, 0.004363466985523701, 0.005141313653439283, 0.005919160787016153, 0.006697007920593023, 0.007474854588508606, 0.008252701722085476, 0.009030548855662346, 0.009808395989239216, 0.010586243122816086, 0.011364089325070381, 0.012141937389969826, 0.012919783592224121, 0.013697630725800991, 0.014475477859377861, 0.015253324992954731, 0.0160311721265316, 0.016809018328785896, 0.01758686639368534, 0.018364712595939636, 0.01914256066083908, 0.019920406863093376, 0.02069825306534767, 0.021476101130247116]}, "gradients/decoder.transformer.h.15.ln_cross_attn.bias": {"_type": "histogram", "values": [2.0, 1.0, 2.0, 2.0, 6.0, 3.0, 8.0, 7.0, 9.0, 10.0, 3.0, 7.0, 14.0, 18.0, 15.0, 13.0, 37.0, 27.0, 24.0, 33.0, 30.0, 43.0, 35.0, 42.0, 41.0, 40.0, 38.0, 39.0, 47.0, 38.0, 45.0, 47.0, 35.0, 34.0, 32.0, 28.0, 22.0, 31.0, 14.0, 17.0, 13.0, 15.0, 15.0, 6.0, 8.0, 7.0, 4.0, 6.0, 4.0, 1.0, 2.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.004151046276092529, -0.003997752442955971, -0.0038444586098194122, -0.0036911647766828537, -0.003537870943546295, -0.0033845771104097366, -0.003231283277273178, -0.0030779894441366196, -0.002924695611000061, -0.0027714017778635025, -0.002618107944726944, -0.0024648141115903854, -0.002311520278453827, -0.0021582264453172684, -0.00200493261218071, -0.0018516387790441513, -0.0016983449459075928, -0.0015450511127710342, -0.0013917572796344757, -0.0012384634464979172, -0.0010851696133613586, -0.0009318757802248001, -0.0007785819470882416, -0.000625288113951683, -0.0004719942808151245, -0.000318700447678566, -0.00016540661454200745, -1.2112781405448914e-05, 0.00014118105173110962, 0.00029447488486766815, 0.0004477687180042267, 0.0006010625511407852, 0.0007543563842773438, 0.0009076502174139023, 0.0010609440505504608, 0.0012142378836870193, 0.0013675317168235779, 0.0015208255499601364, 0.001674119383096695, 0.0018274132162332535, 0.001980707049369812, 0.0021340008825063705, 0.002287294715642929, 0.0024405885487794876, 0.002593882381916046, 0.0027471762150526047, 0.002900470048189163, 0.0030537638813257217, 0.0032070577144622803, 0.003360351547598839, 0.0035136453807353973, 0.003666939213871956, 0.0038202330470085144, 0.003973526880145073, 0.0041268207132816315, 0.00428011454641819, 0.0044334083795547485, 0.004586702212691307, 0.004739996045827866, 0.004893289878964424, 0.005046583712100983, 0.005199877545237541, 0.0053531713783741, 0.005506465211510658, 0.005659759044647217]}, "gradients/decoder.transformer.h.15.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 5.0, 2.0, 3.0, 2.0, 3.0, 10.0, 2.0, 7.0, 11.0, 12.0, 15.0, 14.0, 17.0, 28.0, 29.0, 30.0, 33.0, 24.0, 25.0, 32.0, 36.0, 43.0, 37.0, 46.0, 43.0, 38.0, 48.0, 51.0, 43.0, 41.0, 27.0, 31.0, 30.0, 29.0, 22.0, 22.0, 20.0, 18.0, 17.0, 12.0, 15.0, 7.0, 5.0, 7.0, 6.0, 7.0, 6.0, 1.0, 1.0, 3.0, 1.0, 2.0, 0.0, 2.0], "bins": [-9.6640625, -9.38720703125, -9.1103515625, -8.83349609375, -8.556640625, -8.27978515625, -8.0029296875, -7.72607421875, -7.44921875, -7.17236328125, -6.8955078125, -6.61865234375, -6.341796875, -6.06494140625, -5.7880859375, -5.51123046875, -5.234375, -4.95751953125, -4.6806640625, -4.40380859375, -4.126953125, -3.85009765625, -3.5732421875, -3.29638671875, -3.01953125, -2.74267578125, -2.4658203125, -2.18896484375, -1.912109375, -1.63525390625, -1.3583984375, -1.08154296875, -0.8046875, -0.52783203125, -0.2509765625, 0.02587890625, 0.302734375, 0.57958984375, 0.8564453125, 1.13330078125, 1.41015625, 1.68701171875, 1.9638671875, 2.24072265625, 2.517578125, 2.79443359375, 3.0712890625, 3.34814453125, 3.625, 3.90185546875, 4.1787109375, 4.45556640625, 4.732421875, 5.00927734375, 5.2861328125, 5.56298828125, 5.83984375, 6.11669921875, 6.3935546875, 6.67041015625, 6.947265625, 7.22412109375, 7.5009765625, 7.77783203125, 8.0546875]}, "gradients/decoder.transformer.h.15.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 2.0, 3.0, 2.0, 2.0, 2.0, 2.0, 1.0, 4.0, 6.0, 8.0, 13.0, 22.0, 20.0, 29.0, 41.0, 57.0, 84.0, 142.0, 197.0, 272.0, 442.0, 689.0, 1164.0, 1857.0, 3343.0, 5874.0, 10440.0, 19868.0, 38904.0, 80115.0, 169600.0, 301658.0, 211169.0, 100549.0, 48128.0, 24287.0, 12716.0, 6862.0, 3972.0, 2303.0, 1383.0, 845.0, 514.0, 311.0, 205.0, 131.0, 106.0, 75.0, 37.0, 38.0, 19.0, 17.0, 10.0, 8.0, 10.0, 2.0, 2.0, 1.0, 4.0, 2.0, 1.0, 2.0, 3.0], "bins": [-5.76953125, -5.59283447265625, -5.4161376953125, -5.23944091796875, -5.062744140625, -4.88604736328125, -4.7093505859375, -4.53265380859375, -4.35595703125, -4.17926025390625, -4.0025634765625, -3.82586669921875, -3.649169921875, -3.47247314453125, -3.2957763671875, -3.11907958984375, -2.9423828125, -2.76568603515625, -2.5889892578125, -2.41229248046875, -2.235595703125, -2.05889892578125, -1.8822021484375, -1.70550537109375, -1.52880859375, -1.35211181640625, -1.1754150390625, -0.99871826171875, -0.822021484375, -0.64532470703125, -0.4686279296875, -0.29193115234375, -0.115234375, 0.06146240234375, 0.2381591796875, 0.41485595703125, 0.591552734375, 0.76824951171875, 0.9449462890625, 1.12164306640625, 1.29833984375, 1.47503662109375, 1.6517333984375, 1.82843017578125, 2.005126953125, 2.18182373046875, 2.3585205078125, 2.53521728515625, 2.7119140625, 2.88861083984375, 3.0653076171875, 3.24200439453125, 3.418701171875, 3.59539794921875, 3.7720947265625, 3.94879150390625, 4.12548828125, 4.30218505859375, 4.4788818359375, 4.65557861328125, 4.832275390625, 5.00897216796875, 5.1856689453125, 5.36236572265625, 5.5390625]}, "gradients/decoder.transformer.h.15.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 2.0, 5.0, 1.0, 3.0, 3.0, 2.0, 5.0, 4.0, 7.0, 8.0, 10.0, 18.0, 19.0, 21.0, 28.0, 39.0, 31.0, 40.0, 46.0, 58.0, 65.0, 163.0, 1702.0, 278.0, 83.0, 63.0, 59.0, 55.0, 55.0, 36.0, 47.0, 16.0, 22.0, 15.0, 10.0, 11.0, 9.0, 6.0, 4.0, 4.0, 2.0, 1.0, 4.0, 1.0, 3.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-46.0, -44.67236328125, -43.3447265625, -42.01708984375, -40.689453125, -39.36181640625, -38.0341796875, -36.70654296875, -35.37890625, -34.05126953125, -32.7236328125, -31.39599609375, -30.068359375, -28.74072265625, -27.4130859375, -26.08544921875, -24.7578125, -23.43017578125, -22.1025390625, -20.77490234375, -19.447265625, -18.11962890625, -16.7919921875, -15.46435546875, -14.13671875, -12.80908203125, -11.4814453125, -10.15380859375, -8.826171875, -7.49853515625, -6.1708984375, -4.84326171875, -3.515625, -2.18798828125, -0.8603515625, 0.46728515625, 1.794921875, 3.12255859375, 4.4501953125, 5.77783203125, 7.10546875, 8.43310546875, 9.7607421875, 11.08837890625, 12.416015625, 13.74365234375, 15.0712890625, 16.39892578125, 17.7265625, 19.05419921875, 20.3818359375, 21.70947265625, 23.037109375, 24.36474609375, 25.6923828125, 27.02001953125, 28.34765625, 29.67529296875, 31.0029296875, 32.33056640625, 33.658203125, 34.98583984375, 36.3134765625, 37.64111328125, 38.96875]}, "gradients/decoder.transformer.h.15.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 5.0, 2.0, 3.0, 9.0, 6.0, 15.0, 11.0, 17.0, 22.0, 29.0, 37.0, 71.0, 88.0, 130.0, 154.0, 262.0, 447.0, 1214.0, 48586.0, 3087238.0, 5700.0, 654.0, 333.0, 190.0, 141.0, 87.0, 61.0, 55.0, 43.0, 32.0, 23.0, 13.0, 7.0, 5.0, 6.0, 4.0, 3.0, 4.0, 3.0, 3.0, 4.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-87.1875, -84.2119140625, -81.236328125, -78.2607421875, -75.28515625, -72.3095703125, -69.333984375, -66.3583984375, -63.3828125, -60.4072265625, -57.431640625, -54.4560546875, -51.48046875, -48.5048828125, -45.529296875, -42.5537109375, -39.578125, -36.6025390625, -33.626953125, -30.6513671875, -27.67578125, -24.7001953125, -21.724609375, -18.7490234375, -15.7734375, -12.7978515625, -9.822265625, -6.8466796875, -3.87109375, -0.8955078125, 2.080078125, 5.0556640625, 8.03125, 11.0068359375, 13.982421875, 16.9580078125, 19.93359375, 22.9091796875, 25.884765625, 28.8603515625, 31.8359375, 34.8115234375, 37.787109375, 40.7626953125, 43.73828125, 46.7138671875, 49.689453125, 52.6650390625, 55.640625, 58.6162109375, 61.591796875, 64.5673828125, 67.54296875, 70.5185546875, 73.494140625, 76.4697265625, 79.4453125, 82.4208984375, 85.396484375, 88.3720703125, 91.34765625, 94.3232421875, 97.298828125, 100.2744140625, 103.25]}, "gradients/decoder.transformer.h.15.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 4.0, 26.0, 132.0, 345.0, 346.0, 132.0, 24.0, 5.0, 2.0, 0.0, 1.0], "bins": [-210.7089080810547, -207.01072692871094, -203.3125457763672, -199.6143798828125, -195.91619873046875, -192.218017578125, -188.51983642578125, -184.8216552734375, -181.1234893798828, -177.42530822753906, -173.7271270751953, -170.02896118164062, -166.33078002929688, -162.63259887695312, -158.93441772460938, -155.23623657226562, -151.53805541992188, -147.83987426757812, -144.14169311523438, -140.4435272216797, -136.74534606933594, -133.0471649169922, -129.34898376464844, -125.65081024169922, -121.95263671875, -118.25445556640625, -114.55628204345703, -110.85810089111328, -107.15992736816406, -103.46174621582031, -99.76356506347656, -96.06539154052734, -92.36721801757812, -88.66903686523438, -84.97086334228516, -81.2726821899414, -77.57450866699219, -73.87632751464844, -70.17814636230469, -66.47997283935547, -62.78179931640625, -59.083621978759766, -55.38544464111328, -51.68726348876953, -47.98908996582031, -44.29090881347656, -40.59273147583008, -36.894554138183594, -33.19637680053711, -29.498199462890625, -25.80002212524414, -22.101842880249023, -18.40366554260254, -14.705488204956055, -11.007308959960938, -7.309131622314453, -3.6109542846679688, 0.08722352981567383, 3.7854013442993164, 7.483579635620117, 11.181756973266602, 14.879934310913086, 18.578113555908203, 22.276290893554688, 25.974468231201172]}, "gradients/decoder.transformer.h.15.ln_1.bias": {"_type": "histogram", "values": [3.0, 1.0, 1.0, 0.0, 4.0, 1.0, 4.0, 2.0, 3.0, 7.0, 7.0, 7.0, 7.0, 9.0, 19.0, 16.0, 11.0, 14.0, 22.0, 35.0, 28.0, 22.0, 24.0, 33.0, 37.0, 26.0, 35.0, 31.0, 44.0, 45.0, 45.0, 33.0, 47.0, 26.0, 38.0, 40.0, 38.0, 24.0, 34.0, 31.0, 19.0, 15.0, 18.0, 21.0, 17.0, 20.0, 8.0, 15.0, 8.0, 7.0, 5.0, 2.0, 2.0, 2.0, 1.0, 4.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 1.0], "bins": [-76.93017578125, -74.37652587890625, -71.82288360595703, -69.26923370361328, -66.71559143066406, -64.16194152832031, -61.60829544067383, -59.054649353027344, -56.50100326538086, -53.947357177734375, -51.39371109008789, -48.840065002441406, -46.286415100097656, -43.73277282714844, -41.17912292480469, -38.6254768371582, -36.07183074951172, -33.518184661865234, -30.96453857421875, -28.410890579223633, -25.85724449157715, -23.303598403930664, -20.749950408935547, -18.196304321289062, -15.642658233642578, -13.089012145996094, -10.535365104675293, -7.98171854019165, -5.428071975708008, -2.8744258880615234, -0.32077884674072266, 2.232868194580078, 4.7865142822265625, 7.340160846710205, 9.893807411193848, 12.447454452514648, 15.001100540161133, 17.554746627807617, 20.108394622802734, 22.66204071044922, 25.215686798095703, 27.769332885742188, 30.322978973388672, 32.876625061035156, 35.430274963378906, 37.983917236328125, 40.537567138671875, 43.09121322631836, 45.644859313964844, 48.19850540161133, 50.75215148925781, 53.3057975769043, 55.85944366455078, 58.41309356689453, 60.966739654541016, 63.5203857421875, 66.07403564453125, 68.627685546875, 71.18132781982422, 73.73497772216797, 76.28861999511719, 78.84226989746094, 81.39591217041016, 83.9495620727539, 86.50320434570312]}, "gradients/decoder.transformer.h.14.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 4.0, 2.0, 2.0, 3.0, 5.0, 5.0, 11.0, 8.0, 8.0, 20.0, 11.0, 14.0, 27.0, 25.0, 37.0, 21.0, 23.0, 36.0, 28.0, 34.0, 30.0, 47.0, 39.0, 42.0, 43.0, 38.0, 46.0, 45.0, 38.0, 37.0, 34.0, 27.0, 32.0, 24.0, 28.0, 24.0, 16.0, 16.0, 16.0, 11.0, 13.0, 9.0, 5.0, 6.0, 7.0, 7.0, 3.0, 3.0, 1.0, 1.0, 2.0, 1.0, 1.0, 2.0], "bins": [-9.6484375, -9.3685302734375, -9.088623046875, -8.8087158203125, -8.52880859375, -8.2489013671875, -7.968994140625, -7.6890869140625, -7.4091796875, -7.1292724609375, -6.849365234375, -6.5694580078125, -6.28955078125, -6.0096435546875, -5.729736328125, -5.4498291015625, -5.169921875, -4.8900146484375, -4.610107421875, -4.3302001953125, -4.05029296875, -3.7703857421875, -3.490478515625, -3.2105712890625, -2.9306640625, -2.6507568359375, -2.370849609375, -2.0909423828125, -1.81103515625, -1.5311279296875, -1.251220703125, -0.9713134765625, -0.69140625, -0.4114990234375, -0.131591796875, 0.1483154296875, 0.42822265625, 0.7081298828125, 0.988037109375, 1.2679443359375, 1.5478515625, 1.8277587890625, 2.107666015625, 2.3875732421875, 2.66748046875, 2.9473876953125, 3.227294921875, 3.5072021484375, 3.787109375, 4.0670166015625, 4.346923828125, 4.6268310546875, 4.90673828125, 5.1866455078125, 5.466552734375, 5.7464599609375, 6.0263671875, 6.3062744140625, 6.586181640625, 6.8660888671875, 7.14599609375, 7.4259033203125, 7.705810546875, 7.9857177734375, 8.265625]}, "gradients/decoder.transformer.h.14.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 3.0, 8.0, 7.0, 4.0, 4.0, 4.0, 7.0, 6.0, 10.0, 13.0, 18.0, 23.0, 21.0, 32.0, 29.0, 24.0, 28.0, 38.0, 95.0, 217.0, 827.0, 4725.0, 49027.0, 1382790.0, 2628957.0, 117226.0, 8170.0, 1288.0, 304.0, 100.0, 50.0, 28.0, 28.0, 27.0, 22.0, 19.0, 21.0, 19.0, 14.0, 11.0, 14.0, 6.0, 9.0, 6.0, 9.0, 0.0, 5.0, 0.0, 2.0, 3.0, 0.0, 1.0, 1.0], "bins": [-37.65625, -36.584716796875, -35.51318359375, -34.441650390625, -33.3701171875, -32.298583984375, -31.22705078125, -30.155517578125, -29.083984375, -28.012451171875, -26.94091796875, -25.869384765625, -24.7978515625, -23.726318359375, -22.65478515625, -21.583251953125, -20.51171875, -19.440185546875, -18.36865234375, -17.297119140625, -16.2255859375, -15.154052734375, -14.08251953125, -13.010986328125, -11.939453125, -10.867919921875, -9.79638671875, -8.724853515625, -7.6533203125, -6.581787109375, -5.51025390625, -4.438720703125, -3.3671875, -2.295654296875, -1.22412109375, -0.152587890625, 0.9189453125, 1.990478515625, 3.06201171875, 4.133544921875, 5.205078125, 6.276611328125, 7.34814453125, 8.419677734375, 9.4912109375, 10.562744140625, 11.63427734375, 12.705810546875, 13.77734375, 14.848876953125, 15.92041015625, 16.991943359375, 18.0634765625, 19.135009765625, 20.20654296875, 21.278076171875, 22.349609375, 23.421142578125, 24.49267578125, 25.564208984375, 26.6357421875, 27.707275390625, 28.77880859375, 29.850341796875, 30.921875]}, "gradients/decoder.transformer.h.14.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 1.0, 0.0, 3.0, 2.0, 8.0, 11.0, 16.0, 24.0, 48.0, 60.0, 115.0, 160.0, 241.0, 362.0, 513.0, 601.0, 601.0, 467.0, 276.0, 209.0, 118.0, 81.0, 45.0, 31.0, 26.0, 25.0, 16.0, 12.0, 5.0, 2.0, 1.0, 2.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0], "bins": [-34.625, -33.731689453125, -32.83837890625, -31.945068359375, -31.0517578125, -30.158447265625, -29.26513671875, -28.371826171875, -27.478515625, -26.585205078125, -25.69189453125, -24.798583984375, -23.9052734375, -23.011962890625, -22.11865234375, -21.225341796875, -20.33203125, -19.438720703125, -18.54541015625, -17.652099609375, -16.7587890625, -15.865478515625, -14.97216796875, -14.078857421875, -13.185546875, -12.292236328125, -11.39892578125, -10.505615234375, -9.6123046875, -8.718994140625, -7.82568359375, -6.932373046875, -6.0390625, -5.145751953125, -4.25244140625, -3.359130859375, -2.4658203125, -1.572509765625, -0.67919921875, 0.214111328125, 1.107421875, 2.000732421875, 2.89404296875, 3.787353515625, 4.6806640625, 5.573974609375, 6.46728515625, 7.360595703125, 8.25390625, 9.147216796875, 10.04052734375, 10.933837890625, 11.8271484375, 12.720458984375, 13.61376953125, 14.507080078125, 15.400390625, 16.293701171875, 17.18701171875, 18.080322265625, 18.9736328125, 19.866943359375, 20.76025390625, 21.653564453125, 22.546875]}, "gradients/decoder.transformer.h.14.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 2.0, 1.0, 1.0, 3.0, 5.0, 11.0, 11.0, 17.0, 34.0, 43.0, 50.0, 70.0, 112.0, 145.0, 278.0, 599.0, 2081.0, 65468.0, 3963047.0, 157925.0, 2885.0, 690.0, 283.0, 187.0, 105.0, 74.0, 49.0, 36.0, 27.0, 18.0, 12.0, 7.0, 8.0, 3.0, 4.0, 1.0, 1.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-90.3125, -87.623046875, -84.93359375, -82.244140625, -79.5546875, -76.865234375, -74.17578125, -71.486328125, -68.796875, -66.107421875, -63.41796875, -60.728515625, -58.0390625, -55.349609375, -52.66015625, -49.970703125, -47.28125, -44.591796875, -41.90234375, -39.212890625, -36.5234375, -33.833984375, -31.14453125, -28.455078125, -25.765625, -23.076171875, -20.38671875, -17.697265625, -15.0078125, -12.318359375, -9.62890625, -6.939453125, -4.25, -1.560546875, 1.12890625, 3.818359375, 6.5078125, 9.197265625, 11.88671875, 14.576171875, 17.265625, 19.955078125, 22.64453125, 25.333984375, 28.0234375, 30.712890625, 33.40234375, 36.091796875, 38.78125, 41.470703125, 44.16015625, 46.849609375, 49.5390625, 52.228515625, 54.91796875, 57.607421875, 60.296875, 62.986328125, 65.67578125, 68.365234375, 71.0546875, 73.744140625, 76.43359375, 79.123046875, 81.8125]}, "gradients/decoder.transformer.h.14.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 14.0, 45.0, 88.0, 156.0, 194.0, 244.0, 135.0, 74.0, 47.0, 11.0, 3.0, 3.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-171.202392578125, -165.27557373046875, -159.3487548828125, -153.4219512939453, -147.49513244628906, -141.5683135986328, -135.64149475097656, -129.71469116210938, -123.78787231445312, -117.86105346679688, -111.93424224853516, -106.0074234008789, -100.08061218261719, -94.15379333496094, -88.22697448730469, -82.30016326904297, -76.37334442138672, -70.44652557373047, -64.51971435546875, -58.5928955078125, -52.66608428955078, -46.73926544189453, -40.81245040893555, -34.88563537597656, -28.958820343017578, -23.032005310058594, -17.10519027709961, -11.178373336791992, -5.251558303833008, 0.6752567291259766, 6.602073669433594, 12.528888702392578, 18.455703735351562, 24.382518768310547, 30.30933380126953, 36.23615264892578, 42.1629638671875, 48.08978271484375, 54.016597747802734, 59.94341278076172, 65.87022399902344, 71.79704284667969, 77.7238540649414, 83.65067291259766, 89.57748413085938, 95.50430297851562, 101.43112182617188, 107.3579330444336, 113.28475189208984, 119.2115707397461, 125.13838195800781, 131.06520080566406, 136.9920196533203, 142.9188232421875, 148.84564208984375, 154.7724609375, 160.69927978515625, 166.6260986328125, 172.55291748046875, 178.47972106933594, 184.4065399169922, 190.33335876464844, 196.2601776123047, 202.18698120117188, 208.11380004882812]}, "gradients/decoder.transformer.h.14.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 7.0, 1.0, 7.0, 3.0, 6.0, 13.0, 12.0, 6.0, 16.0, 11.0, 12.0, 14.0, 22.0, 19.0, 21.0, 29.0, 26.0, 31.0, 44.0, 19.0, 43.0, 29.0, 38.0, 43.0, 34.0, 27.0, 43.0, 40.0, 38.0, 48.0, 37.0, 38.0, 30.0, 23.0, 23.0, 20.0, 18.0, 18.0, 19.0, 15.0, 18.0, 13.0, 7.0, 11.0, 7.0, 3.0, 3.0, 1.0, 5.0, 7.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-56.4300537109375, -54.341880798339844, -52.25370788574219, -50.16553497314453, -48.077362060546875, -45.98918914794922, -43.90101623535156, -41.812843322753906, -39.72467041015625, -37.636497497558594, -35.54832458496094, -33.46015167236328, -31.371978759765625, -29.28380584716797, -27.195632934570312, -25.107460021972656, -23.019287109375, -20.931114196777344, -18.842941284179688, -16.75476837158203, -14.666595458984375, -12.578422546386719, -10.490249633789062, -8.402076721191406, -6.31390380859375, -4.225730895996094, -2.1375579833984375, -0.04938507080078125, 2.038787841796875, 4.126960754394531, 6.2151336669921875, 8.303306579589844, 10.3914794921875, 12.479652404785156, 14.567825317382812, 16.65599822998047, 18.744171142578125, 20.83234405517578, 22.920516967773438, 25.008689880371094, 27.09686279296875, 29.185035705566406, 31.273208618164062, 33.36138153076172, 35.449554443359375, 37.53772735595703, 39.62590026855469, 41.714073181152344, 43.80224609375, 45.890419006347656, 47.97859191894531, 50.06676483154297, 52.154937744140625, 54.24311065673828, 56.33128356933594, 58.419456481933594, 60.50762939453125, 62.595802307128906, 64.68397521972656, 66.77214813232422, 68.86032104492188, 70.94849395751953, 73.03666687011719, 75.12483978271484, 77.2130126953125]}, "gradients/decoder.transformer.h.14.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 2.0, 1.0, 2.0, 3.0, 2.0, 2.0, 2.0, 3.0, 7.0, 7.0, 9.0, 17.0, 13.0, 16.0, 18.0, 20.0, 16.0, 27.0, 39.0, 25.0, 29.0, 34.0, 32.0, 37.0, 34.0, 37.0, 43.0, 36.0, 39.0, 38.0, 48.0, 36.0, 40.0, 39.0, 25.0, 31.0, 26.0, 27.0, 17.0, 25.0, 20.0, 17.0, 17.0, 11.0, 9.0, 7.0, 10.0, 8.0, 1.0, 6.0, 3.0, 2.0, 3.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0], "bins": [-8.6875, -8.4144287109375, -8.141357421875, -7.8682861328125, -7.59521484375, -7.3221435546875, -7.049072265625, -6.7760009765625, -6.5029296875, -6.2298583984375, -5.956787109375, -5.6837158203125, -5.41064453125, -5.1375732421875, -4.864501953125, -4.5914306640625, -4.318359375, -4.0452880859375, -3.772216796875, -3.4991455078125, -3.22607421875, -2.9530029296875, -2.679931640625, -2.4068603515625, -2.1337890625, -1.8607177734375, -1.587646484375, -1.3145751953125, -1.04150390625, -0.7684326171875, -0.495361328125, -0.2222900390625, 0.05078125, 0.3238525390625, 0.596923828125, 0.8699951171875, 1.14306640625, 1.4161376953125, 1.689208984375, 1.9622802734375, 2.2353515625, 2.5084228515625, 2.781494140625, 3.0545654296875, 3.32763671875, 3.6007080078125, 3.873779296875, 4.1468505859375, 4.419921875, 4.6929931640625, 4.966064453125, 5.2391357421875, 5.51220703125, 5.7852783203125, 6.058349609375, 6.3314208984375, 6.6044921875, 6.8775634765625, 7.150634765625, 7.4237060546875, 7.69677734375, 7.9698486328125, 8.242919921875, 8.5159912109375, 8.7890625]}, "gradients/decoder.transformer.h.14.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 2.0, 3.0, 2.0, 2.0, 9.0, 7.0, 11.0, 18.0, 34.0, 38.0, 66.0, 97.0, 128.0, 186.0, 303.0, 405.0, 569.0, 846.0, 1233.0, 1806.0, 2724.0, 3994.0, 6004.0, 9300.0, 14247.0, 22228.0, 34446.0, 54242.0, 84153.0, 128026.0, 172214.0, 166816.0, 121291.0, 79023.0, 51003.0, 32600.0, 20929.0, 13491.0, 8797.0, 5585.0, 3755.0, 2577.0, 1730.0, 1107.0, 823.0, 503.0, 378.0, 259.0, 171.0, 135.0, 82.0, 59.0, 29.0, 30.0, 21.0, 12.0, 8.0, 9.0, 4.0, 2.0, 2.0, 0.0, 1.0], "bins": [-0.87451171875, -0.8471221923828125, -0.819732666015625, -0.7923431396484375, -0.76495361328125, -0.7375640869140625, -0.710174560546875, -0.6827850341796875, -0.6553955078125, -0.6280059814453125, -0.600616455078125, -0.5732269287109375, -0.54583740234375, -0.5184478759765625, -0.491058349609375, -0.4636688232421875, -0.436279296875, -0.4088897705078125, -0.381500244140625, -0.3541107177734375, -0.32672119140625, -0.2993316650390625, -0.271942138671875, -0.2445526123046875, -0.2171630859375, -0.1897735595703125, -0.162384033203125, -0.1349945068359375, -0.10760498046875, -0.0802154541015625, -0.052825927734375, -0.0254364013671875, 0.001953125, 0.0293426513671875, 0.056732177734375, 0.0841217041015625, 0.11151123046875, 0.1389007568359375, 0.166290283203125, 0.1936798095703125, 0.2210693359375, 0.2484588623046875, 0.275848388671875, 0.3032379150390625, 0.33062744140625, 0.3580169677734375, 0.385406494140625, 0.4127960205078125, 0.440185546875, 0.4675750732421875, 0.494964599609375, 0.5223541259765625, 0.54974365234375, 0.5771331787109375, 0.604522705078125, 0.6319122314453125, 0.6593017578125, 0.6866912841796875, 0.714080810546875, 0.7414703369140625, 0.76885986328125, 0.7962493896484375, 0.823638916015625, 0.8510284423828125, 0.87841796875]}, "gradients/decoder.transformer.h.14.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 4.0, 4.0, 4.0, 9.0, 9.0, 5.0, 8.0, 7.0, 15.0, 10.0, 15.0, 22.0, 25.0, 31.0, 34.0, 38.0, 32.0, 45.0, 49.0, 44.0, 46.0, 40.0, 1060.0, 30.0, 54.0, 44.0, 49.0, 35.0, 37.0, 27.0, 30.0, 36.0, 29.0, 23.0, 22.0, 12.0, 17.0, 5.0, 6.0, 9.0, 2.0, 3.0, 5.0, 1.0, 2.0, 3.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.46484375, -6.2701416015625, -6.075439453125, -5.8807373046875, -5.68603515625, -5.4913330078125, -5.296630859375, -5.1019287109375, -4.9072265625, -4.7125244140625, -4.517822265625, -4.3231201171875, -4.12841796875, -3.9337158203125, -3.739013671875, -3.5443115234375, -3.349609375, -3.1549072265625, -2.960205078125, -2.7655029296875, -2.57080078125, -2.3760986328125, -2.181396484375, -1.9866943359375, -1.7919921875, -1.5972900390625, -1.402587890625, -1.2078857421875, -1.01318359375, -0.8184814453125, -0.623779296875, -0.4290771484375, -0.234375, -0.0396728515625, 0.155029296875, 0.3497314453125, 0.54443359375, 0.7391357421875, 0.933837890625, 1.1285400390625, 1.3232421875, 1.5179443359375, 1.712646484375, 1.9073486328125, 2.10205078125, 2.2967529296875, 2.491455078125, 2.6861572265625, 2.880859375, 3.0755615234375, 3.270263671875, 3.4649658203125, 3.65966796875, 3.8543701171875, 4.049072265625, 4.2437744140625, 4.4384765625, 4.6331787109375, 4.827880859375, 5.0225830078125, 5.21728515625, 5.4119873046875, 5.606689453125, 5.8013916015625, 5.99609375]}, "gradients/decoder.transformer.h.14.crossattention.c_attn.weight": {"_type": "histogram", "values": [2.0, 2.0, 2.0, 4.0, 2.0, 7.0, 10.0, 16.0, 21.0, 28.0, 62.0, 72.0, 110.0, 151.0, 196.0, 324.0, 473.0, 710.0, 1067.0, 1591.0, 2248.0, 3343.0, 5333.0, 7968.0, 12231.0, 19250.0, 29620.0, 46246.0, 72228.0, 108004.0, 147190.0, 1208993.0, 139916.0, 101452.0, 67031.0, 43032.0, 27269.0, 17653.0, 11319.0, 7437.0, 4908.0, 3149.0, 2063.0, 1336.0, 902.0, 704.0, 442.0, 316.0, 215.0, 166.0, 112.0, 56.0, 48.0, 38.0, 27.0, 12.0, 14.0, 10.0, 7.0, 3.0, 4.0, 4.0, 1.0, 2.0], "bins": [-0.59814453125, -0.5791015625, -0.56005859375, -0.541015625, -0.52197265625, -0.5029296875, -0.48388671875, -0.46484375, -0.44580078125, -0.4267578125, -0.40771484375, -0.388671875, -0.36962890625, -0.3505859375, -0.33154296875, -0.3125, -0.29345703125, -0.2744140625, -0.25537109375, -0.236328125, -0.21728515625, -0.1982421875, -0.17919921875, -0.16015625, -0.14111328125, -0.1220703125, -0.10302734375, -0.083984375, -0.06494140625, -0.0458984375, -0.02685546875, -0.0078125, 0.01123046875, 0.0302734375, 0.04931640625, 0.068359375, 0.08740234375, 0.1064453125, 0.12548828125, 0.14453125, 0.16357421875, 0.1826171875, 0.20166015625, 0.220703125, 0.23974609375, 0.2587890625, 0.27783203125, 0.296875, 0.31591796875, 0.3349609375, 0.35400390625, 0.373046875, 0.39208984375, 0.4111328125, 0.43017578125, 0.44921875, 0.46826171875, 0.4873046875, 0.50634765625, 0.525390625, 0.54443359375, 0.5634765625, 0.58251953125, 0.6015625, 0.62060546875]}, "gradients/decoder.transformer.h.14.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 4.0, 2.0, 5.0, 5.0, 4.0, 4.0, 4.0, 9.0, 11.0, 11.0, 15.0, 24.0, 39.0, 42.0, 49.0, 70.0, 66.0, 95.0, 85.0, 77.0, 84.0, 68.0, 62.0, 55.0, 33.0, 20.0, 20.0, 8.0, 11.0, 6.0, 4.0, 4.0, 4.0, 2.0, 3.0, 3.0, 0.0, 2.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.011077880859375, -0.010755538940429688, -0.010433197021484375, -0.010110855102539062, -0.00978851318359375, -0.009466171264648438, -0.009143829345703125, -0.008821487426757812, -0.0084991455078125, -0.008176803588867188, -0.007854461669921875, -0.0075321197509765625, -0.00720977783203125, -0.0068874359130859375, -0.006565093994140625, -0.0062427520751953125, -0.00592041015625, -0.0055980682373046875, -0.005275726318359375, -0.0049533843994140625, -0.00463104248046875, -0.0043087005615234375, -0.003986358642578125, -0.0036640167236328125, -0.0033416748046875, -0.0030193328857421875, -0.002696990966796875, -0.0023746490478515625, -0.00205230712890625, -0.0017299652099609375, -0.001407623291015625, -0.0010852813720703125, -0.000762939453125, -0.0004405975341796875, -0.000118255615234375, 0.0002040863037109375, 0.00052642822265625, 0.0008487701416015625, 0.001171112060546875, 0.0014934539794921875, 0.0018157958984375, 0.0021381378173828125, 0.002460479736328125, 0.0027828216552734375, 0.00310516357421875, 0.0034275054931640625, 0.003749847412109375, 0.0040721893310546875, 0.00439453125, 0.0047168731689453125, 0.005039215087890625, 0.0053615570068359375, 0.00568389892578125, 0.0060062408447265625, 0.006328582763671875, 0.0066509246826171875, 0.0069732666015625, 0.0072956085205078125, 0.007617950439453125, 0.007940292358398438, 0.00826263427734375, 0.008584976196289062, 0.008907318115234375, 0.009229660034179688, 0.009552001953125]}, "gradients/decoder.transformer.h.14.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 3.0, 0.0, 0.0, 4.0, 5.0, 1.0, 6.0, 3.0, 9.0, 4.0, 14.0, 12.0, 35.0, 29.0, 60.0, 83.0, 121.0, 239.0, 836.0, 79156.0, 965590.0, 1596.0, 331.0, 152.0, 65.0, 60.0, 34.0, 23.0, 28.0, 14.0, 15.0, 5.0, 7.0, 4.0, 7.0, 2.0, 5.0, 2.0, 4.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.21533203125, -0.20819854736328125, -0.2010650634765625, -0.19393157958984375, -0.186798095703125, -0.17966461181640625, -0.1725311279296875, -0.16539764404296875, -0.15826416015625, -0.15113067626953125, -0.1439971923828125, -0.13686370849609375, -0.129730224609375, -0.12259674072265625, -0.1154632568359375, -0.10832977294921875, -0.1011962890625, -0.09406280517578125, -0.0869293212890625, -0.07979583740234375, -0.072662353515625, -0.06552886962890625, -0.0583953857421875, -0.05126190185546875, -0.04412841796875, -0.03699493408203125, -0.0298614501953125, -0.02272796630859375, -0.015594482421875, -0.00846099853515625, -0.0013275146484375, 0.00580596923828125, 0.012939453125, 0.02007293701171875, 0.0272064208984375, 0.03433990478515625, 0.041473388671875, 0.04860687255859375, 0.0557403564453125, 0.06287384033203125, 0.07000732421875, 0.07714080810546875, 0.0842742919921875, 0.09140777587890625, 0.098541259765625, 0.10567474365234375, 0.1128082275390625, 0.11994171142578125, 0.1270751953125, 0.13420867919921875, 0.1413421630859375, 0.14847564697265625, 0.155609130859375, 0.16274261474609375, 0.1698760986328125, 0.17700958251953125, 0.18414306640625, 0.19127655029296875, 0.1984100341796875, 0.20554351806640625, 0.212677001953125, 0.21981048583984375, 0.2269439697265625, 0.23407745361328125, 0.2412109375]}, "gradients/decoder.transformer.h.14.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 4.0, 8.0, 18.0, 56.0, 132.0, 275.0, 266.0, 151.0, 62.0, 24.0, 12.0, 2.0, 3.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.006108611822128296, -0.005644149612635374, -0.005179687403142452, -0.004715224727988243, -0.004250762518495321, -0.0037863003090023994, -0.003321837866678834, -0.0028573754243552685, -0.0023929132148623466, -0.001928450888954103, -0.0014639885630458593, -0.0009995262371376157, -0.000535063911229372, -7.06017017364502e-05, 0.0003938607405871153, 0.0008583231829106808, 0.0013227853924036026, 0.0017872477183118463, 0.00225171004422009, 0.0027161724865436554, 0.0031806346960365772, 0.003645096905529499, 0.004109559580683708, 0.00457402179017663, 0.005038483999669552, 0.005502946209162474, 0.0059674084186553955, 0.006431871093809605, 0.0068963333033025265, 0.007360795512795448, 0.007825258187949657, 0.008289720863103867, 0.008754182606935501, 0.00921864528208971, 0.009683107025921345, 0.010147569701075554, 0.010612031444907188, 0.011076494120061398, 0.011540956795215607, 0.012005418539047241, 0.01246988121420145, 0.01293434388935566, 0.013398805633187294, 0.013863268308341503, 0.014327730983495712, 0.014792192727327347, 0.015256655402481556, 0.015721118077635765, 0.0161855798214674, 0.016650041565299034, 0.017114505171775818, 0.017578966915607452, 0.018043428659439087, 0.01850789040327072, 0.018972354009747505, 0.01943681575357914, 0.019901279360055923, 0.020365741103887558, 0.020830204710364342, 0.021294666454195976, 0.02175912819802761, 0.022223591804504395, 0.02268805354833603, 0.023152515292167664, 0.023616977035999298]}, "gradients/decoder.transformer.h.14.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 3.0, 3.0, 2.0, 4.0, 7.0, 5.0, 7.0, 11.0, 18.0, 13.0, 29.0, 25.0, 31.0, 33.0, 38.0, 31.0, 40.0, 54.0, 41.0, 49.0, 38.0, 49.0, 52.0, 45.0, 45.0, 46.0, 38.0, 37.0, 33.0, 33.0, 34.0, 25.0, 20.0, 25.0, 11.0, 9.0, 10.0, 5.0, 5.0, 2.0, 3.0, 2.0, 1.0, 2.0, 2.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.005811810493469238, -0.005631326697766781, -0.005450842902064323, -0.005270359106361866, -0.005089875310659409, -0.004909391514956951, -0.004728907719254494, -0.004548423923552036, -0.004367940127849579, -0.004187456332147121, -0.004006972536444664, -0.0038264887407422066, -0.003646004945039749, -0.0034655211493372917, -0.0032850373536348343, -0.003104553557932377, -0.0029240697622299194, -0.002743585966527462, -0.0025631021708250046, -0.002382618375122547, -0.0022021345794200897, -0.0020216507837176323, -0.0018411669880151749, -0.0016606831923127174, -0.00148019939661026, -0.0012997156009078026, -0.0011192318052053452, -0.0009387480095028877, -0.0007582642138004303, -0.0005777804180979729, -0.00039729662239551544, -0.00021681282669305801, -3.6329030990600586e-05, 0.00014415476471185684, 0.00032463856041431427, 0.0005051223561167717, 0.0006856061518192291, 0.0008660899475216866, 0.001046573743224144, 0.0012270575389266014, 0.0014075413346290588, 0.0015880251303315163, 0.0017685089260339737, 0.0019489927217364311, 0.0021294765174388885, 0.002309960313141346, 0.0024904441088438034, 0.002670927904546261, 0.0028514117002487183, 0.0030318954959511757, 0.003212379291653633, 0.0033928630873560905, 0.003573346883058548, 0.0037538306787610054, 0.003934314474463463, 0.00411479827016592, 0.004295282065868378, 0.004475765861570835, 0.0046562496572732925, 0.00483673345297575, 0.005017217248678207, 0.005197701044380665, 0.005378184840083122, 0.00555866863578558, 0.005739152431488037]}, "gradients/decoder.transformer.h.14.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 2.0, 1.0, 2.0, 3.0, 2.0, 2.0, 2.0, 3.0, 7.0, 7.0, 9.0, 17.0, 13.0, 16.0, 18.0, 20.0, 16.0, 27.0, 39.0, 25.0, 29.0, 34.0, 32.0, 37.0, 34.0, 37.0, 43.0, 36.0, 39.0, 38.0, 48.0, 36.0, 40.0, 39.0, 25.0, 31.0, 26.0, 27.0, 17.0, 25.0, 20.0, 17.0, 16.0, 12.0, 9.0, 7.0, 10.0, 8.0, 1.0, 6.0, 3.0, 2.0, 3.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0], "bins": [-8.6875, -8.4144287109375, -8.141357421875, -7.8682861328125, -7.59521484375, -7.3221435546875, -7.049072265625, -6.7760009765625, -6.5029296875, -6.2298583984375, -5.956787109375, -5.6837158203125, -5.41064453125, -5.1375732421875, -4.864501953125, -4.5914306640625, -4.318359375, -4.0452880859375, -3.772216796875, -3.4991455078125, -3.22607421875, -2.9530029296875, -2.679931640625, -2.4068603515625, -2.1337890625, -1.8607177734375, -1.587646484375, -1.3145751953125, -1.04150390625, -0.7684326171875, -0.495361328125, -0.2222900390625, 0.05078125, 0.3238525390625, 0.596923828125, 0.8699951171875, 1.14306640625, 1.4161376953125, 1.689208984375, 1.9622802734375, 2.2353515625, 2.5084228515625, 2.781494140625, 3.0545654296875, 3.32763671875, 3.6007080078125, 3.873779296875, 4.1468505859375, 4.419921875, 4.6929931640625, 4.966064453125, 5.2391357421875, 5.51220703125, 5.7852783203125, 6.058349609375, 6.3314208984375, 6.6044921875, 6.8775634765625, 7.150634765625, 7.4237060546875, 7.69677734375, 7.9698486328125, 8.242919921875, 8.5159912109375, 8.7890625]}, "gradients/decoder.transformer.h.14.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 2.0, 2.0, 2.0, 0.0, 5.0, 3.0, 7.0, 10.0, 21.0, 32.0, 36.0, 57.0, 77.0, 130.0, 186.0, 283.0, 442.0, 683.0, 1041.0, 1702.0, 2937.0, 4825.0, 8338.0, 14985.0, 28162.0, 55509.0, 117268.0, 337990.0, 266436.0, 100505.0, 49144.0, 25338.0, 13534.0, 7575.0, 4293.0, 2715.0, 1586.0, 946.0, 562.0, 387.0, 253.0, 181.0, 112.0, 90.0, 59.0, 37.0, 25.0, 18.0, 13.0, 8.0, 8.0, 3.0, 6.0, 1.0, 2.0], "bins": [-7.4453125, -7.24359130859375, -7.0418701171875, -6.84014892578125, -6.638427734375, -6.43670654296875, -6.2349853515625, -6.03326416015625, -5.83154296875, -5.62982177734375, -5.4281005859375, -5.22637939453125, -5.024658203125, -4.82293701171875, -4.6212158203125, -4.41949462890625, -4.2177734375, -4.01605224609375, -3.8143310546875, -3.61260986328125, -3.410888671875, -3.20916748046875, -3.0074462890625, -2.80572509765625, -2.60400390625, -2.40228271484375, -2.2005615234375, -1.99884033203125, -1.797119140625, -1.59539794921875, -1.3936767578125, -1.19195556640625, -0.990234375, -0.78851318359375, -0.5867919921875, -0.38507080078125, -0.183349609375, 0.01837158203125, 0.2200927734375, 0.42181396484375, 0.62353515625, 0.82525634765625, 1.0269775390625, 1.22869873046875, 1.430419921875, 1.63214111328125, 1.8338623046875, 2.03558349609375, 2.2373046875, 2.43902587890625, 2.6407470703125, 2.84246826171875, 3.044189453125, 3.24591064453125, 3.4476318359375, 3.64935302734375, 3.85107421875, 4.05279541015625, 4.2545166015625, 4.45623779296875, 4.657958984375, 4.85968017578125, 5.0614013671875, 5.26312255859375, 5.46484375]}, "gradients/decoder.transformer.h.14.attn.c_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 1.0, 4.0, 4.0, 3.0, 2.0, 6.0, 4.0, 8.0, 11.0, 17.0, 17.0, 12.0, 16.0, 25.0, 26.0, 25.0, 27.0, 29.0, 51.0, 35.0, 49.0, 54.0, 90.0, 197.0, 1623.0, 212.0, 92.0, 48.0, 38.0, 38.0, 48.0, 32.0, 40.0, 22.0, 33.0, 20.0, 17.0, 10.0, 14.0, 9.0, 10.0, 11.0, 2.0, 4.0, 7.0, 5.0, 7.0, 2.0, 2.0, 2.0, 0.0, 0.0, 0.0, 2.0, 2.0], "bins": [-31.640625, -30.695068359375, -29.74951171875, -28.803955078125, -27.8583984375, -26.912841796875, -25.96728515625, -25.021728515625, -24.076171875, -23.130615234375, -22.18505859375, -21.239501953125, -20.2939453125, -19.348388671875, -18.40283203125, -17.457275390625, -16.51171875, -15.566162109375, -14.62060546875, -13.675048828125, -12.7294921875, -11.783935546875, -10.83837890625, -9.892822265625, -8.947265625, -8.001708984375, -7.05615234375, -6.110595703125, -5.1650390625, -4.219482421875, -3.27392578125, -2.328369140625, -1.3828125, -0.437255859375, 0.50830078125, 1.453857421875, 2.3994140625, 3.344970703125, 4.29052734375, 5.236083984375, 6.181640625, 7.127197265625, 8.07275390625, 9.018310546875, 9.9638671875, 10.909423828125, 11.85498046875, 12.800537109375, 13.74609375, 14.691650390625, 15.63720703125, 16.582763671875, 17.5283203125, 18.473876953125, 19.41943359375, 20.364990234375, 21.310546875, 22.256103515625, 23.20166015625, 24.147216796875, 25.0927734375, 26.038330078125, 26.98388671875, 27.929443359375, 28.875]}, "gradients/decoder.transformer.h.14.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 3.0, 2.0, 2.0, 8.0, 12.0, 7.0, 8.0, 10.0, 19.0, 28.0, 21.0, 48.0, 71.0, 85.0, 154.0, 199.0, 281.0, 452.0, 1984.0, 214476.0, 2921967.0, 4186.0, 655.0, 321.0, 223.0, 140.0, 92.0, 71.0, 57.0, 31.0, 35.0, 20.0, 14.0, 12.0, 3.0, 6.0, 6.0, 4.0, 4.0, 2.0, 2.0, 0.0, 0.0, 2.0, 0.0, 1.0], "bins": [-108.75, -105.8994140625, -103.048828125, -100.1982421875, -97.34765625, -94.4970703125, -91.646484375, -88.7958984375, -85.9453125, -83.0947265625, -80.244140625, -77.3935546875, -74.54296875, -71.6923828125, -68.841796875, -65.9912109375, -63.140625, -60.2900390625, -57.439453125, -54.5888671875, -51.73828125, -48.8876953125, -46.037109375, -43.1865234375, -40.3359375, -37.4853515625, -34.634765625, -31.7841796875, -28.93359375, -26.0830078125, -23.232421875, -20.3818359375, -17.53125, -14.6806640625, -11.830078125, -8.9794921875, -6.12890625, -3.2783203125, -0.427734375, 2.4228515625, 5.2734375, 8.1240234375, 10.974609375, 13.8251953125, 16.67578125, 19.5263671875, 22.376953125, 25.2275390625, 28.078125, 30.9287109375, 33.779296875, 36.6298828125, 39.48046875, 42.3310546875, 45.181640625, 48.0322265625, 50.8828125, 53.7333984375, 56.583984375, 59.4345703125, 62.28515625, 65.1357421875, 67.986328125, 70.8369140625, 73.6875]}, "gradients/decoder.transformer.h.14.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 76.0, 887.0, 56.0], "bins": [-794.597900390625, -781.8685913085938, -769.1393432617188, -756.4100341796875, -743.6807250976562, -730.951416015625, -718.22216796875, -705.4928588867188, -692.7635498046875, -680.0342407226562, -667.3049926757812, -654.57568359375, -641.8463745117188, -629.1170654296875, -616.3878173828125, -603.6585083007812, -590.92919921875, -578.1998901367188, -565.4706420898438, -552.7413330078125, -540.0120239257812, -527.28271484375, -514.553466796875, -501.82415771484375, -489.09490966796875, -476.3656311035156, -463.6363220214844, -450.90704345703125, -438.177734375, -425.4484558105469, -412.71917724609375, -399.9898681640625, -387.26055908203125, -374.5312805175781, -361.8019714355469, -349.07269287109375, -336.3433837890625, -323.6141052246094, -310.88482666015625, -298.155517578125, -285.4262390136719, -272.69696044921875, -259.9676513671875, -247.23837280273438, -234.50906372070312, -221.77978515625, -209.0504913330078, -196.32119750976562, -183.59190368652344, -170.86260986328125, -158.13331604003906, -145.40402221679688, -132.67474365234375, -119.94544219970703, -107.21615600585938, -94.48686218261719, -81.757568359375, -69.02827453613281, -56.29898452758789, -43.56969451904297, -30.84040069580078, -18.111106872558594, -5.3818206787109375, 7.34747314453125, 20.07676887512207]}, "gradients/decoder.transformer.h.14.ln_1.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 1.0, 2.0, 3.0, 0.0, 2.0, 6.0, 3.0, 4.0, 5.0, 8.0, 6.0, 11.0, 7.0, 16.0, 22.0, 22.0, 19.0, 29.0, 20.0, 29.0, 31.0, 42.0, 39.0, 36.0, 48.0, 47.0, 51.0, 36.0, 40.0, 39.0, 48.0, 36.0, 42.0, 35.0, 31.0, 26.0, 28.0, 26.0, 21.0, 27.0, 12.0, 8.0, 10.0, 12.0, 6.0, 8.0, 4.0, 3.0, 6.0, 2.0, 0.0, 0.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-85.42178344726562, -82.57671356201172, -79.73163604736328, -76.88656616210938, -74.04149627685547, -71.19642639160156, -68.35134887695312, -65.50627899169922, -62.66120910644531, -59.81613540649414, -56.971065521240234, -54.12599182128906, -51.280921936035156, -48.435848236083984, -45.59077453613281, -42.745704650878906, -39.900630950927734, -37.05555725097656, -34.210487365722656, -31.365413665771484, -28.520343780517578, -25.675270080566406, -22.830198287963867, -19.985126495361328, -17.14005470275879, -14.29498291015625, -11.449911117553711, -8.604838371276855, -5.759766578674316, -2.9146947860717773, -0.06962203979492188, 2.775449752807617, 5.620521545410156, 8.465593338012695, 11.310665130615234, 14.15573787689209, 17.000808715820312, 19.845882415771484, 22.690954208374023, 25.536026000976562, 28.3810977935791, 31.22616958618164, 34.07124328613281, 36.91631317138672, 39.76138687133789, 42.6064567565918, 45.45153045654297, 48.296600341796875, 51.14167404174805, 53.98674774169922, 56.831817626953125, 59.6768913269043, 62.5219612121582, 65.36703491210938, 68.21210479736328, 71.05717468261719, 73.90225219726562, 76.74732208251953, 79.59239959716797, 82.43746948242188, 85.28253936767578, 88.12760925292969, 90.97268676757812, 93.81775665283203, 96.66282653808594]}, "gradients/decoder.transformer.h.13.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 4.0, 0.0, 0.0, 1.0, 2.0, 5.0, 3.0, 5.0, 5.0, 4.0, 10.0, 16.0, 15.0, 20.0, 11.0, 19.0, 24.0, 19.0, 24.0, 29.0, 29.0, 35.0, 44.0, 21.0, 35.0, 44.0, 42.0, 34.0, 41.0, 42.0, 40.0, 36.0, 43.0, 39.0, 32.0, 25.0, 31.0, 24.0, 17.0, 25.0, 21.0, 17.0, 9.0, 12.0, 15.0, 9.0, 9.0, 6.0, 6.0, 7.0, 4.0, 3.0, 2.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-9.09375, -8.810302734375, -8.52685546875, -8.243408203125, -7.9599609375, -7.676513671875, -7.39306640625, -7.109619140625, -6.826171875, -6.542724609375, -6.25927734375, -5.975830078125, -5.6923828125, -5.408935546875, -5.12548828125, -4.842041015625, -4.55859375, -4.275146484375, -3.99169921875, -3.708251953125, -3.4248046875, -3.141357421875, -2.85791015625, -2.574462890625, -2.291015625, -2.007568359375, -1.72412109375, -1.440673828125, -1.1572265625, -0.873779296875, -0.59033203125, -0.306884765625, -0.0234375, 0.260009765625, 0.54345703125, 0.826904296875, 1.1103515625, 1.393798828125, 1.67724609375, 1.960693359375, 2.244140625, 2.527587890625, 2.81103515625, 3.094482421875, 3.3779296875, 3.661376953125, 3.94482421875, 4.228271484375, 4.51171875, 4.795166015625, 5.07861328125, 5.362060546875, 5.6455078125, 5.928955078125, 6.21240234375, 6.495849609375, 6.779296875, 7.062744140625, 7.34619140625, 7.629638671875, 7.9130859375, 8.196533203125, 8.47998046875, 8.763427734375, 9.046875]}, "gradients/decoder.transformer.h.13.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 1.0, 1.0, 0.0, 5.0, 2.0, 2.0, 8.0, 6.0, 6.0, 13.0, 7.0, 7.0, 10.0, 19.0, 14.0, 16.0, 20.0, 25.0, 24.0, 29.0, 35.0, 66.0, 145.0, 275.0, 781.0, 3489.0, 21147.0, 264430.0, 2790658.0, 1045280.0, 58333.0, 7078.0, 1459.0, 400.0, 167.0, 76.0, 41.0, 32.0, 22.0, 14.0, 24.0, 19.0, 18.0, 11.0, 17.0, 15.0, 10.0, 7.0, 2.0, 8.0, 6.0, 8.0, 5.0, 4.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-29.03125, -28.1162109375, -27.201171875, -26.2861328125, -25.37109375, -24.4560546875, -23.541015625, -22.6259765625, -21.7109375, -20.7958984375, -19.880859375, -18.9658203125, -18.05078125, -17.1357421875, -16.220703125, -15.3056640625, -14.390625, -13.4755859375, -12.560546875, -11.6455078125, -10.73046875, -9.8154296875, -8.900390625, -7.9853515625, -7.0703125, -6.1552734375, -5.240234375, -4.3251953125, -3.41015625, -2.4951171875, -1.580078125, -0.6650390625, 0.25, 1.1650390625, 2.080078125, 2.9951171875, 3.91015625, 4.8251953125, 5.740234375, 6.6552734375, 7.5703125, 8.4853515625, 9.400390625, 10.3154296875, 11.23046875, 12.1455078125, 13.060546875, 13.9755859375, 14.890625, 15.8056640625, 16.720703125, 17.6357421875, 18.55078125, 19.4658203125, 20.380859375, 21.2958984375, 22.2109375, 23.1259765625, 24.041015625, 24.9560546875, 25.87109375, 26.7861328125, 27.701171875, 28.6162109375, 29.53125]}, "gradients/decoder.transformer.h.13.mlp.c_fc.bias": {"_type": "histogram", "values": [2.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 3.0, 4.0, 2.0, 2.0, 4.0, 9.0, 7.0, 13.0, 13.0, 23.0, 33.0, 48.0, 58.0, 59.0, 68.0, 113.0, 137.0, 187.0, 235.0, 334.0, 374.0, 419.0, 389.0, 377.0, 276.0, 230.0, 174.0, 128.0, 86.0, 61.0, 51.0, 42.0, 32.0, 25.0, 18.0, 13.0, 10.0, 7.0, 3.0, 5.0, 6.0, 1.0, 2.0, 1.0, 0.0, 1.0, 0.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-19.3125, -18.723388671875, -18.13427734375, -17.545166015625, -16.9560546875, -16.366943359375, -15.77783203125, -15.188720703125, -14.599609375, -14.010498046875, -13.42138671875, -12.832275390625, -12.2431640625, -11.654052734375, -11.06494140625, -10.475830078125, -9.88671875, -9.297607421875, -8.70849609375, -8.119384765625, -7.5302734375, -6.941162109375, -6.35205078125, -5.762939453125, -5.173828125, -4.584716796875, -3.99560546875, -3.406494140625, -2.8173828125, -2.228271484375, -1.63916015625, -1.050048828125, -0.4609375, 0.128173828125, 0.71728515625, 1.306396484375, 1.8955078125, 2.484619140625, 3.07373046875, 3.662841796875, 4.251953125, 4.841064453125, 5.43017578125, 6.019287109375, 6.6083984375, 7.197509765625, 7.78662109375, 8.375732421875, 8.96484375, 9.553955078125, 10.14306640625, 10.732177734375, 11.3212890625, 11.910400390625, 12.49951171875, 13.088623046875, 13.677734375, 14.266845703125, 14.85595703125, 15.445068359375, 16.0341796875, 16.623291015625, 17.21240234375, 17.801513671875, 18.390625]}, "gradients/decoder.transformer.h.13.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 2.0, 1.0, 2.0, 2.0, 3.0, 7.0, 7.0, 11.0, 16.0, 17.0, 18.0, 23.0, 30.0, 48.0, 58.0, 68.0, 83.0, 128.0, 158.0, 215.0, 338.0, 588.0, 2523.0, 50452.0, 3441446.0, 684614.0, 10741.0, 1131.0, 421.0, 250.0, 194.0, 142.0, 109.0, 104.0, 93.0, 57.0, 54.0, 31.0, 19.0, 29.0, 23.0, 4.0, 9.0, 4.0, 9.0, 5.0, 3.0, 1.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 1.0], "bins": [-64.9375, -62.828125, -60.71875, -58.609375, -56.5, -54.390625, -52.28125, -50.171875, -48.0625, -45.953125, -43.84375, -41.734375, -39.625, -37.515625, -35.40625, -33.296875, -31.1875, -29.078125, -26.96875, -24.859375, -22.75, -20.640625, -18.53125, -16.421875, -14.3125, -12.203125, -10.09375, -7.984375, -5.875, -3.765625, -1.65625, 0.453125, 2.5625, 4.671875, 6.78125, 8.890625, 11.0, 13.109375, 15.21875, 17.328125, 19.4375, 21.546875, 23.65625, 25.765625, 27.875, 29.984375, 32.09375, 34.203125, 36.3125, 38.421875, 40.53125, 42.640625, 44.75, 46.859375, 48.96875, 51.078125, 53.1875, 55.296875, 57.40625, 59.515625, 61.625, 63.734375, 65.84375, 67.953125, 70.0625]}, "gradients/decoder.transformer.h.13.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 4.0, 15.0, 31.0, 64.0, 116.0, 185.0, 207.0, 180.0, 104.0, 63.0, 30.0, 10.0, 3.0, 3.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-193.2103271484375, -187.75582885742188, -182.30133056640625, -176.84681701660156, -171.39231872558594, -165.9378204345703, -160.48330688476562, -155.02880859375, -149.57431030273438, -144.11981201171875, -138.66531372070312, -133.21080017089844, -127.75630187988281, -122.30180358886719, -116.84729766845703, -111.39279174804688, -105.93829345703125, -100.48379516601562, -95.02928924560547, -89.57478332519531, -84.12028503417969, -78.66578674316406, -73.2112808227539, -67.75677490234375, -62.302276611328125, -56.847774505615234, -51.393272399902344, -45.93877029418945, -40.48426818847656, -35.02976608276367, -29.57526397705078, -24.12076187133789, -18.666275024414062, -13.211772918701172, -7.757270812988281, -2.3027687072753906, 3.1517333984375, 8.60623550415039, 14.060737609863281, 19.515239715576172, 24.969741821289062, 30.424243927001953, 35.878746032714844, 41.333248138427734, 46.787750244140625, 52.242252349853516, 57.696754455566406, 63.1512565612793, 68.60575866699219, 74.06025695800781, 79.51476287841797, 84.96926879882812, 90.42376708984375, 95.87826538085938, 101.33277130126953, 106.78727722167969, 112.24177551269531, 117.69627380371094, 123.1507797241211, 128.60528564453125, 134.05978393554688, 139.5142822265625, 144.96878051757812, 150.4232940673828, 155.87779235839844]}, "gradients/decoder.transformer.h.13.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 2.0, 3.0, 2.0, 4.0, 5.0, 6.0, 7.0, 12.0, 10.0, 3.0, 14.0, 12.0, 20.0, 14.0, 26.0, 20.0, 34.0, 26.0, 28.0, 36.0, 42.0, 44.0, 47.0, 33.0, 35.0, 35.0, 41.0, 49.0, 36.0, 30.0, 33.0, 33.0, 27.0, 37.0, 37.0, 29.0, 17.0, 20.0, 16.0, 16.0, 18.0, 13.0, 11.0, 11.0, 6.0, 0.0, 4.0, 1.0, 6.0, 2.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-71.32232666015625, -69.15310668945312, -66.98388671875, -64.81466674804688, -62.64544677734375, -60.476226806640625, -58.307003021240234, -56.13778305053711, -53.968563079833984, -51.79934310913086, -49.630123138427734, -47.46090316772461, -45.29167938232422, -43.122459411621094, -40.95323944091797, -38.784019470214844, -36.61479949951172, -34.445579528808594, -32.27635955810547, -30.10713768005371, -27.937917709350586, -25.76869773864746, -23.599475860595703, -21.430255889892578, -19.261035919189453, -17.091815948486328, -14.922595024108887, -12.753374099731445, -10.58415412902832, -8.414934158325195, -6.245713233947754, -4.0764923095703125, -1.9072723388671875, 0.2619481086730957, 2.431168556213379, 4.600389003753662, 6.769609451293945, 8.93882942199707, 11.108050346374512, 13.277271270751953, 15.446491241455078, 17.615711212158203, 19.784931182861328, 21.954153060913086, 24.12337303161621, 26.292593002319336, 28.461814880371094, 30.63103485107422, 32.800254821777344, 34.96947479248047, 37.138694763183594, 39.30791473388672, 41.477134704589844, 43.64635467529297, 45.81557846069336, 47.984798431396484, 50.15401840209961, 52.323238372802734, 54.49245834350586, 56.661678314208984, 58.830902099609375, 61.0001220703125, 63.169342041015625, 65.33856201171875, 67.50778198242188]}, "gradients/decoder.transformer.h.13.crossattention.c_proj.bias": {"_type": "histogram", "values": [3.0, 0.0, 2.0, 0.0, 3.0, 1.0, 3.0, 2.0, 5.0, 7.0, 7.0, 10.0, 12.0, 10.0, 18.0, 20.0, 14.0, 23.0, 26.0, 29.0, 33.0, 34.0, 44.0, 53.0, 35.0, 37.0, 37.0, 35.0, 30.0, 39.0, 44.0, 39.0, 49.0, 36.0, 26.0, 31.0, 27.0, 32.0, 30.0, 18.0, 13.0, 16.0, 21.0, 17.0, 10.0, 5.0, 9.0, 7.0, 8.0, 1.0, 4.0, 4.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-8.4453125, -8.1480712890625, -7.850830078125, -7.5535888671875, -7.25634765625, -6.9591064453125, -6.661865234375, -6.3646240234375, -6.0673828125, -5.7701416015625, -5.472900390625, -5.1756591796875, -4.87841796875, -4.5811767578125, -4.283935546875, -3.9866943359375, -3.689453125, -3.3922119140625, -3.094970703125, -2.7977294921875, -2.50048828125, -2.2032470703125, -1.906005859375, -1.6087646484375, -1.3115234375, -1.0142822265625, -0.717041015625, -0.4197998046875, -0.12255859375, 0.1746826171875, 0.471923828125, 0.7691650390625, 1.06640625, 1.3636474609375, 1.660888671875, 1.9581298828125, 2.25537109375, 2.5526123046875, 2.849853515625, 3.1470947265625, 3.4443359375, 3.7415771484375, 4.038818359375, 4.3360595703125, 4.63330078125, 4.9305419921875, 5.227783203125, 5.5250244140625, 5.822265625, 6.1195068359375, 6.416748046875, 6.7139892578125, 7.01123046875, 7.3084716796875, 7.605712890625, 7.9029541015625, 8.2001953125, 8.4974365234375, 8.794677734375, 9.0919189453125, 9.38916015625, 9.6864013671875, 9.983642578125, 10.2808837890625, 10.578125]}, "gradients/decoder.transformer.h.13.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 5.0, 5.0, 8.0, 4.0, 11.0, 16.0, 18.0, 35.0, 64.0, 96.0, 142.0, 253.0, 420.0, 652.0, 1136.0, 1964.0, 3391.0, 5929.0, 10503.0, 18932.0, 34967.0, 63022.0, 113498.0, 189250.0, 229557.0, 164039.0, 93874.0, 51790.0, 28792.0, 15648.0, 8591.0, 5027.0, 2752.0, 1657.0, 971.0, 624.0, 356.0, 211.0, 140.0, 82.0, 43.0, 35.0, 22.0, 14.0, 7.0, 5.0, 5.0, 5.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-1.16796875, -1.1329345703125, -1.097900390625, -1.0628662109375, -1.02783203125, -0.9927978515625, -0.957763671875, -0.9227294921875, -0.8876953125, -0.8526611328125, -0.817626953125, -0.7825927734375, -0.74755859375, -0.7125244140625, -0.677490234375, -0.6424560546875, -0.607421875, -0.5723876953125, -0.537353515625, -0.5023193359375, -0.46728515625, -0.4322509765625, -0.397216796875, -0.3621826171875, -0.3271484375, -0.2921142578125, -0.257080078125, -0.2220458984375, -0.18701171875, -0.1519775390625, -0.116943359375, -0.0819091796875, -0.046875, -0.0118408203125, 0.023193359375, 0.0582275390625, 0.09326171875, 0.1282958984375, 0.163330078125, 0.1983642578125, 0.2333984375, 0.2684326171875, 0.303466796875, 0.3385009765625, 0.37353515625, 0.4085693359375, 0.443603515625, 0.4786376953125, 0.513671875, 0.5487060546875, 0.583740234375, 0.6187744140625, 0.65380859375, 0.6888427734375, 0.723876953125, 0.7589111328125, 0.7939453125, 0.8289794921875, 0.864013671875, 0.8990478515625, 0.93408203125, 0.9691162109375, 1.004150390625, 1.0391845703125, 1.07421875]}, "gradients/decoder.transformer.h.13.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 6.0, 0.0, 4.0, 1.0, 5.0, 5.0, 11.0, 8.0, 9.0, 15.0, 14.0, 18.0, 23.0, 19.0, 26.0, 25.0, 23.0, 36.0, 31.0, 45.0, 25.0, 42.0, 42.0, 43.0, 1070.0, 42.0, 29.0, 33.0, 53.0, 43.0, 34.0, 27.0, 25.0, 26.0, 32.0, 21.0, 12.0, 16.0, 13.0, 16.0, 17.0, 9.0, 3.0, 6.0, 10.0, 8.0, 5.0, 4.0, 4.0, 2.0, 2.0, 1.0, 1.0, 3.0, 1.0, 0.0, 0.0, 1.0], "bins": [-5.734375, -5.54803466796875, -5.3616943359375, -5.17535400390625, -4.989013671875, -4.80267333984375, -4.6163330078125, -4.42999267578125, -4.24365234375, -4.05731201171875, -3.8709716796875, -3.68463134765625, -3.498291015625, -3.31195068359375, -3.1256103515625, -2.93927001953125, -2.7529296875, -2.56658935546875, -2.3802490234375, -2.19390869140625, -2.007568359375, -1.82122802734375, -1.6348876953125, -1.44854736328125, -1.26220703125, -1.07586669921875, -0.8895263671875, -0.70318603515625, -0.516845703125, -0.33050537109375, -0.1441650390625, 0.04217529296875, 0.228515625, 0.41485595703125, 0.6011962890625, 0.78753662109375, 0.973876953125, 1.16021728515625, 1.3465576171875, 1.53289794921875, 1.71923828125, 1.90557861328125, 2.0919189453125, 2.27825927734375, 2.464599609375, 2.65093994140625, 2.8372802734375, 3.02362060546875, 3.2099609375, 3.39630126953125, 3.5826416015625, 3.76898193359375, 3.955322265625, 4.14166259765625, 4.3280029296875, 4.51434326171875, 4.70068359375, 4.88702392578125, 5.0733642578125, 5.25970458984375, 5.446044921875, 5.63238525390625, 5.8187255859375, 6.00506591796875, 6.19140625]}, "gradients/decoder.transformer.h.13.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 3.0, 3.0, 8.0, 13.0, 21.0, 29.0, 45.0, 55.0, 88.0, 138.0, 178.0, 297.0, 475.0, 674.0, 1118.0, 1674.0, 2638.0, 4137.0, 6425.0, 10188.0, 16468.0, 25674.0, 41313.0, 64570.0, 99300.0, 142540.0, 1218778.0, 152835.0, 109563.0, 72208.0, 46393.0, 29048.0, 18422.0, 11516.0, 7384.0, 4593.0, 2950.0, 1881.0, 1182.0, 823.0, 482.0, 336.0, 231.0, 171.0, 115.0, 58.0, 39.0, 16.0, 19.0, 13.0, 6.0, 2.0, 2.0, 5.0, 3.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.630859375, -0.610260009765625, -0.58966064453125, -0.569061279296875, -0.5484619140625, -0.527862548828125, -0.50726318359375, -0.486663818359375, -0.466064453125, -0.445465087890625, -0.42486572265625, -0.404266357421875, -0.3836669921875, -0.363067626953125, -0.34246826171875, -0.321868896484375, -0.30126953125, -0.280670166015625, -0.26007080078125, -0.239471435546875, -0.2188720703125, -0.198272705078125, -0.17767333984375, -0.157073974609375, -0.136474609375, -0.115875244140625, -0.09527587890625, -0.074676513671875, -0.0540771484375, -0.033477783203125, -0.01287841796875, 0.007720947265625, 0.0283203125, 0.048919677734375, 0.06951904296875, 0.090118408203125, 0.1107177734375, 0.131317138671875, 0.15191650390625, 0.172515869140625, 0.193115234375, 0.213714599609375, 0.23431396484375, 0.254913330078125, 0.2755126953125, 0.296112060546875, 0.31671142578125, 0.337310791015625, 0.35791015625, 0.378509521484375, 0.39910888671875, 0.419708251953125, 0.4403076171875, 0.460906982421875, 0.48150634765625, 0.502105712890625, 0.522705078125, 0.543304443359375, 0.56390380859375, 0.584503173828125, 0.6051025390625, 0.625701904296875, 0.64630126953125, 0.666900634765625, 0.6875]}, "gradients/decoder.transformer.h.13.crossattention.q_attn.bias": {"_type": "histogram", "values": [3.0, 1.0, 1.0, 3.0, 0.0, 1.0, 7.0, 0.0, 3.0, 6.0, 7.0, 6.0, 11.0, 12.0, 12.0, 18.0, 15.0, 20.0, 22.0, 29.0, 29.0, 39.0, 39.0, 51.0, 52.0, 54.0, 59.0, 49.0, 57.0, 44.0, 44.0, 40.0, 42.0, 34.0, 38.0, 39.0, 26.0, 12.0, 13.0, 20.0, 17.0, 10.0, 4.0, 5.0, 6.0, 4.0, 5.0, 3.0, 1.0, 2.0, 1.0, 1.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.005443572998046875, -0.005242288112640381, -0.005041003227233887, -0.004839718341827393, -0.0046384334564208984, -0.004437148571014404, -0.00423586368560791, -0.004034578800201416, -0.003833293914794922, -0.0036320090293884277, -0.0034307241439819336, -0.0032294392585754395, -0.0030281543731689453, -0.002826869487762451, -0.002625584602355957, -0.002424299716949463, -0.0022230148315429688, -0.0020217299461364746, -0.0018204450607299805, -0.0016191601753234863, -0.0014178752899169922, -0.001216590404510498, -0.001015305519104004, -0.0008140206336975098, -0.0006127357482910156, -0.0004114508628845215, -0.00021016597747802734, -8.881092071533203e-06, 0.00019240379333496094, 0.0003936886787414551, 0.0005949735641479492, 0.0007962584495544434, 0.0009975433349609375, 0.0011988282203674316, 0.0014001131057739258, 0.00160139799118042, 0.001802682876586914, 0.002003967761993408, 0.0022052526473999023, 0.0024065375328063965, 0.0026078224182128906, 0.0028091073036193848, 0.003010392189025879, 0.003211677074432373, 0.003412961959838867, 0.0036142468452453613, 0.0038155317306518555, 0.00401681661605835, 0.004218101501464844, 0.004419386386871338, 0.004620671272277832, 0.004821956157684326, 0.00502324104309082, 0.0052245259284973145, 0.005425810813903809, 0.005627095699310303, 0.005828380584716797, 0.006029665470123291, 0.006230950355529785, 0.006432235240936279, 0.0066335201263427734, 0.006834805011749268, 0.007036089897155762, 0.007237374782562256, 0.00743865966796875]}, "gradients/decoder.transformer.h.13.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 1.0, 3.0, 1.0, 0.0, 1.0, 4.0, 0.0, 2.0, 5.0, 5.0, 2.0, 4.0, 9.0, 13.0, 19.0, 22.0, 21.0, 32.0, 30.0, 49.0, 55.0, 74.0, 97.0, 209.0, 367.0, 1504.0, 294693.0, 747935.0, 2343.0, 432.0, 202.0, 116.0, 71.0, 50.0, 41.0, 31.0, 26.0, 19.0, 18.0, 12.0, 10.0, 10.0, 5.0, 4.0, 5.0, 3.0, 3.0, 4.0, 4.0, 1.0, 1.0, 3.0, 1.0], "bins": [-0.1866455078125, -0.18174362182617188, -0.17684173583984375, -0.17193984985351562, -0.1670379638671875, -0.16213607788085938, -0.15723419189453125, -0.15233230590820312, -0.147430419921875, -0.14252853393554688, -0.13762664794921875, -0.13272476196289062, -0.1278228759765625, -0.12292098999023438, -0.11801910400390625, -0.11311721801757812, -0.10821533203125, -0.10331344604492188, -0.09841156005859375, -0.09350967407226562, -0.0886077880859375, -0.08370590209960938, -0.07880401611328125, -0.07390213012695312, -0.069000244140625, -0.06409835815429688, -0.05919647216796875, -0.054294586181640625, -0.0493927001953125, -0.044490814208984375, -0.03958892822265625, -0.034687042236328125, -0.02978515625, -0.024883270263671875, -0.01998138427734375, -0.015079498291015625, -0.0101776123046875, -0.005275726318359375, -0.00037384033203125, 0.004528045654296875, 0.009429931640625, 0.014331817626953125, 0.01923370361328125, 0.024135589599609375, 0.0290374755859375, 0.033939361572265625, 0.03884124755859375, 0.043743133544921875, 0.04864501953125, 0.053546905517578125, 0.05844879150390625, 0.06335067749023438, 0.0682525634765625, 0.07315444946289062, 0.07805633544921875, 0.08295822143554688, 0.087860107421875, 0.09276199340820312, 0.09766387939453125, 0.10256576538085938, 0.1074676513671875, 0.11236953735351562, 0.11727142333984375, 0.12217330932617188, 0.1270751953125]}, "gradients/decoder.transformer.h.13.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 3.0, 4.0, 4.0, 9.0, 8.0, 19.0, 13.0, 23.0, 32.0, 58.0, 76.0, 87.0, 113.0, 102.0, 94.0, 102.0, 60.0, 62.0, 46.0, 28.0, 23.0, 10.0, 12.0, 7.0, 5.0, 4.0, 2.0, 3.0, 0.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.006243600510060787, -0.006086657755076885, -0.005929714534431696, -0.005772771779447794, -0.005615829024463892, -0.00545888626947999, -0.005301943048834801, -0.005145000293850899, -0.004988057538866997, -0.004831114783883095, -0.0046741715632379055, -0.0045172288082540035, -0.0043602860532701015, -0.0042033432982862, -0.00404640007764101, -0.0038894573226571083, -0.0037325143348425627, -0.003575571347028017, -0.003418628592044115, -0.0032616856042295694, -0.0031047428492456675, -0.002947799861431122, -0.00279085710644722, -0.0026339141186326742, -0.0024769711308181286, -0.002320028143003583, -0.002163085388019681, -0.0020061424002051353, -0.0018491996452212334, -0.0016922566574066877, -0.001535313786007464, -0.0013783709146082401, -0.0012214283924549818, -0.001064485521055758, -0.0009075426496565342, -0.0007505997200496495, -0.0005936568486504257, -0.00043671397725120187, -0.00027977104764431715, -0.00012282817624509335, 3.411469515413046e-05, 0.0001910575811052695, 0.0003480004670564085, 0.0005049433675594628, 0.0006618862389586866, 0.0008188291103579104, 0.0009757720399647951, 0.001132714911364019, 0.0012896577827632427, 0.0014466006541624665, 0.0016035435255616903, 0.001760486513376236, 0.001917429268360138, 0.0020743722561746836, 0.002231315243989229, 0.002388257998973131, 0.002545200753957033, 0.002702143741771579, 0.0028590864967554808, 0.0030160294845700264, 0.0031729722395539284, 0.003329915227368474, 0.0034868582151830196, 0.0036438009701669216, 0.0038007439579814672]}, "gradients/decoder.transformer.h.13.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 1.0, 3.0, 1.0, 5.0, 7.0, 6.0, 6.0, 6.0, 14.0, 6.0, 12.0, 21.0, 20.0, 18.0, 31.0, 29.0, 30.0, 33.0, 36.0, 27.0, 40.0, 34.0, 45.0, 39.0, 44.0, 35.0, 43.0, 50.0, 31.0, 41.0, 42.0, 36.0, 25.0, 32.0, 27.0, 25.0, 23.0, 18.0, 5.0, 15.0, 8.0, 9.0, 10.0, 5.0, 5.0, 5.0, 4.0, 1.0, 0.0, 3.0, 2.0, 0.0, 2.0, 3.0], "bins": [-0.004159212112426758, -0.00403998326510191, -0.0039207544177770615, -0.0038015255704522133, -0.003682296723127365, -0.003563067875802517, -0.0034438390284776688, -0.0033246101811528206, -0.0032053813338279724, -0.0030861524865031242, -0.002966923639178276, -0.002847694791853428, -0.0027284659445285797, -0.0026092370972037315, -0.0024900082498788834, -0.002370779402554035, -0.002251550555229187, -0.002132321707904339, -0.0020130928605794907, -0.0018938640132546425, -0.0017746351659297943, -0.0016554063186049461, -0.001536177471280098, -0.0014169486239552498, -0.0012977197766304016, -0.0011784909293055534, -0.0010592620819807053, -0.0009400332346558571, -0.0008208043873310089, -0.0007015755400061607, -0.0005823466926813126, -0.0004631178453564644, -0.0003438889980316162, -0.00022466015070676804, -0.00010543130338191986, 1.3797543942928314e-05, 0.0001330263912677765, 0.00025225523859262466, 0.00037148408591747284, 0.000490712933242321, 0.0006099417805671692, 0.0007291706278920174, 0.0008483994752168655, 0.0009676283225417137, 0.0010868571698665619, 0.00120608601719141, 0.0013253148645162582, 0.0014445437118411064, 0.0015637725591659546, 0.0016830014064908028, 0.001802230253815651, 0.0019214591011404991, 0.0020406879484653473, 0.0021599167957901955, 0.0022791456431150436, 0.002398374490439892, 0.00251760333776474, 0.002636832185089588, 0.0027560610324144363, 0.0028752898797392845, 0.0029945187270641327, 0.003113747574388981, 0.003232976421713829, 0.003352205269038677, 0.0034714341163635254]}, "gradients/decoder.transformer.h.13.attn.c_proj.bias": {"_type": "histogram", "values": [3.0, 0.0, 2.0, 0.0, 3.0, 1.0, 3.0, 2.0, 5.0, 7.0, 7.0, 10.0, 12.0, 10.0, 18.0, 20.0, 14.0, 24.0, 25.0, 29.0, 33.0, 34.0, 44.0, 53.0, 35.0, 37.0, 37.0, 35.0, 30.0, 39.0, 44.0, 39.0, 49.0, 36.0, 26.0, 31.0, 27.0, 32.0, 30.0, 18.0, 13.0, 16.0, 21.0, 17.0, 10.0, 5.0, 9.0, 7.0, 8.0, 1.0, 4.0, 4.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-8.4453125, -8.1480712890625, -7.850830078125, -7.5535888671875, -7.25634765625, -6.9591064453125, -6.661865234375, -6.3646240234375, -6.0673828125, -5.7701416015625, -5.472900390625, -5.1756591796875, -4.87841796875, -4.5811767578125, -4.283935546875, -3.9866943359375, -3.689453125, -3.3922119140625, -3.094970703125, -2.7977294921875, -2.50048828125, -2.2032470703125, -1.906005859375, -1.6087646484375, -1.3115234375, -1.0142822265625, -0.717041015625, -0.4197998046875, -0.12255859375, 0.1746826171875, 0.471923828125, 0.7691650390625, 1.06640625, 1.3636474609375, 1.660888671875, 1.9581298828125, 2.25537109375, 2.5526123046875, 2.849853515625, 3.1470947265625, 3.4443359375, 3.7415771484375, 4.038818359375, 4.3360595703125, 4.63330078125, 4.9305419921875, 5.227783203125, 5.5250244140625, 5.822265625, 6.1195068359375, 6.416748046875, 6.7139892578125, 7.01123046875, 7.3084716796875, 7.605712890625, 7.9029541015625, 8.2001953125, 8.4974365234375, 8.794677734375, 9.0919189453125, 9.38916015625, 9.6864013671875, 9.983642578125, 10.2808837890625, 10.578125]}, "gradients/decoder.transformer.h.13.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 3.0, 0.0, 4.0, 1.0, 2.0, 7.0, 9.0, 9.0, 15.0, 18.0, 37.0, 43.0, 66.0, 82.0, 135.0, 186.0, 325.0, 506.0, 710.0, 1117.0, 1903.0, 3032.0, 5013.0, 8690.0, 16196.0, 32838.0, 77556.0, 210283.0, 384809.0, 175671.0, 65811.0, 28816.0, 14570.0, 7926.0, 4599.0, 2783.0, 1725.0, 1020.0, 703.0, 452.0, 306.0, 208.0, 106.0, 87.0, 58.0, 37.0, 25.0, 22.0, 15.0, 9.0, 5.0, 5.0, 6.0, 3.0, 3.0, 1.0, 4.0, 1.0, 1.0, 0.0, 0.0, 2.0], "bins": [-5.953125, -5.75738525390625, -5.5616455078125, -5.36590576171875, -5.170166015625, -4.97442626953125, -4.7786865234375, -4.58294677734375, -4.38720703125, -4.19146728515625, -3.9957275390625, -3.79998779296875, -3.604248046875, -3.40850830078125, -3.2127685546875, -3.01702880859375, -2.8212890625, -2.62554931640625, -2.4298095703125, -2.23406982421875, -2.038330078125, -1.84259033203125, -1.6468505859375, -1.45111083984375, -1.25537109375, -1.05963134765625, -0.8638916015625, -0.66815185546875, -0.472412109375, -0.27667236328125, -0.0809326171875, 0.11480712890625, 0.310546875, 0.50628662109375, 0.7020263671875, 0.89776611328125, 1.093505859375, 1.28924560546875, 1.4849853515625, 1.68072509765625, 1.87646484375, 2.07220458984375, 2.2679443359375, 2.46368408203125, 2.659423828125, 2.85516357421875, 3.0509033203125, 3.24664306640625, 3.4423828125, 3.63812255859375, 3.8338623046875, 4.02960205078125, 4.225341796875, 4.42108154296875, 4.6168212890625, 4.81256103515625, 5.00830078125, 5.20404052734375, 5.3997802734375, 5.59552001953125, 5.791259765625, 5.98699951171875, 6.1827392578125, 6.37847900390625, 6.57421875]}, "gradients/decoder.transformer.h.13.attn.c_attn.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 2.0, 2.0, 4.0, 3.0, 3.0, 4.0, 6.0, 6.0, 11.0, 11.0, 13.0, 13.0, 14.0, 18.0, 20.0, 26.0, 28.0, 43.0, 37.0, 31.0, 38.0, 47.0, 68.0, 91.0, 252.0, 1627.0, 138.0, 71.0, 64.0, 43.0, 50.0, 34.0, 42.0, 40.0, 25.0, 26.0, 16.0, 15.0, 20.0, 15.0, 9.0, 4.0, 5.0, 6.0, 6.0, 4.0, 5.0, 0.0, 2.0, 1.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 2.0], "bins": [-32.96875, -31.94677734375, -30.9248046875, -29.90283203125, -28.880859375, -27.85888671875, -26.8369140625, -25.81494140625, -24.79296875, -23.77099609375, -22.7490234375, -21.72705078125, -20.705078125, -19.68310546875, -18.6611328125, -17.63916015625, -16.6171875, -15.59521484375, -14.5732421875, -13.55126953125, -12.529296875, -11.50732421875, -10.4853515625, -9.46337890625, -8.44140625, -7.41943359375, -6.3974609375, -5.37548828125, -4.353515625, -3.33154296875, -2.3095703125, -1.28759765625, -0.265625, 0.75634765625, 1.7783203125, 2.80029296875, 3.822265625, 4.84423828125, 5.8662109375, 6.88818359375, 7.91015625, 8.93212890625, 9.9541015625, 10.97607421875, 11.998046875, 13.02001953125, 14.0419921875, 15.06396484375, 16.0859375, 17.10791015625, 18.1298828125, 19.15185546875, 20.173828125, 21.19580078125, 22.2177734375, 23.23974609375, 24.26171875, 25.28369140625, 26.3056640625, 27.32763671875, 28.349609375, 29.37158203125, 30.3935546875, 31.41552734375, 32.4375]}, "gradients/decoder.transformer.h.13.attn.c_attn.weight": {"_type": "histogram", "values": [3.0, 2.0, 2.0, 3.0, 2.0, 3.0, 5.0, 4.0, 10.0, 16.0, 20.0, 19.0, 32.0, 49.0, 60.0, 98.0, 131.0, 184.0, 282.0, 427.0, 1203.0, 76820.0, 3060961.0, 3770.0, 543.0, 299.0, 223.0, 166.0, 104.0, 75.0, 50.0, 44.0, 32.0, 18.0, 19.0, 11.0, 9.0, 3.0, 6.0, 1.0, 2.0, 1.0, 6.0, 0.0, 1.0, 2.0, 0.0, 2.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-70.125, -66.96875, -63.8125, -60.65625, -57.5, -54.34375, -51.1875, -48.03125, -44.875, -41.71875, -38.5625, -35.40625, -32.25, -29.09375, -25.9375, -22.78125, -19.625, -16.46875, -13.3125, -10.15625, -7.0, -3.84375, -0.6875, 2.46875, 5.625, 8.78125, 11.9375, 15.09375, 18.25, 21.40625, 24.5625, 27.71875, 30.875, 34.03125, 37.1875, 40.34375, 43.5, 46.65625, 49.8125, 52.96875, 56.125, 59.28125, 62.4375, 65.59375, 68.75, 71.90625, 75.0625, 78.21875, 81.375, 84.53125, 87.6875, 90.84375, 94.0, 97.15625, 100.3125, 103.46875, 106.625, 109.78125, 112.9375, 116.09375, 119.25, 122.40625, 125.5625, 128.71875, 131.875]}, "gradients/decoder.transformer.h.13.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 40.0, 554.0, 411.0, 15.0, 1.0, 1.0], "bins": [-429.1835021972656, -422.0173034667969, -414.85107421875, -407.68487548828125, -400.5186767578125, -393.3524475097656, -386.1862487792969, -379.0200500488281, -371.85382080078125, -364.6876220703125, -357.5213928222656, -350.3551940917969, -343.1889953613281, -336.02276611328125, -328.8565673828125, -321.69036865234375, -314.524169921875, -307.35797119140625, -300.1917419433594, -293.0255432128906, -285.8593444824219, -278.693115234375, -271.52691650390625, -264.3607177734375, -257.1944885253906, -250.0282745361328, -242.86207580566406, -235.69586181640625, -228.52964782714844, -221.3634490966797, -214.19723510742188, -207.03103637695312, -199.86483764648438, -192.69862365722656, -185.5324249267578, -178.3662109375, -171.1999969482422, -164.03379821777344, -156.86758422851562, -149.70138549804688, -142.53515625, -135.3689422607422, -128.20274353027344, -121.03652954101562, -113.87032318115234, -106.70411682128906, -99.53790283203125, -92.37169647216797, -85.20549011230469, -78.0392837524414, -70.8730697631836, -63.70686340332031, -56.54065704345703, -49.374446868896484, -42.20823669433594, -35.042030334472656, -27.875822067260742, -20.709613800048828, -13.543404579162598, -6.377195358276367, 0.7890129089355469, 7.955221176147461, 15.121431350708008, 22.28763771057129, 29.453847885131836]}, "gradients/decoder.transformer.h.13.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 2.0, 2.0, 4.0, 2.0, 9.0, 7.0, 9.0, 6.0, 11.0, 11.0, 11.0, 15.0, 13.0, 18.0, 18.0, 27.0, 22.0, 25.0, 30.0, 31.0, 45.0, 30.0, 39.0, 35.0, 40.0, 40.0, 50.0, 33.0, 39.0, 32.0, 36.0, 47.0, 34.0, 28.0, 22.0, 19.0, 20.0, 17.0, 19.0, 19.0, 13.0, 15.0, 13.0, 5.0, 9.0, 8.0, 11.0, 5.0, 4.0, 3.0, 3.0, 7.0, 2.0, 3.0, 0.0, 0.0, 1.0], "bins": [-80.92066192626953, -78.45547485351562, -75.99028778076172, -73.52510070800781, -71.0599136352539, -68.5947265625, -66.1295394897461, -63.66435623168945, -61.19916915893555, -58.73398208618164, -56.268795013427734, -53.80360794067383, -51.33842468261719, -48.87323760986328, -46.408050537109375, -43.94286346435547, -41.47767639160156, -39.012489318847656, -36.54730224609375, -34.082115173339844, -31.61693000793457, -29.151742935180664, -26.68655776977539, -24.221370697021484, -21.756183624267578, -19.290996551513672, -16.825809478759766, -14.360624313354492, -11.895437240600586, -9.43025016784668, -6.96506404876709, -4.4998779296875, -2.0346908569335938, 0.4304957389831543, 2.8956823348999023, 5.36086893081665, 7.826055526733398, 10.291242599487305, 12.756428718566895, 15.221614837646484, 17.68680191040039, 20.151988983154297, 22.617176055908203, 25.082361221313477, 27.547548294067383, 30.01273536682129, 32.47792053222656, 34.94310760498047, 37.408294677734375, 39.87348175048828, 42.33866882324219, 44.803855895996094, 47.26904296875, 49.734230041503906, 52.19941329956055, 54.66460037231445, 57.12978744506836, 59.594974517822266, 62.06016159057617, 64.52534484863281, 66.99053192138672, 69.45571899414062, 71.92090606689453, 74.38609313964844, 76.85128021240234]}, "gradients/decoder.transformer.h.12.mlp.c_proj.bias": {"_type": "histogram", "values": [3.0, 0.0, 1.0, 0.0, 4.0, 1.0, 0.0, 4.0, 4.0, 8.0, 7.0, 6.0, 10.0, 13.0, 19.0, 11.0, 18.0, 23.0, 25.0, 28.0, 28.0, 32.0, 34.0, 47.0, 39.0, 42.0, 48.0, 36.0, 36.0, 38.0, 42.0, 49.0, 33.0, 37.0, 28.0, 36.0, 33.0, 27.0, 29.0, 22.0, 16.0, 19.0, 19.0, 13.0, 9.0, 9.0, 9.0, 7.0, 6.0, 2.0, 5.0, 3.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-8.9765625, -8.664306640625, -8.35205078125, -8.039794921875, -7.7275390625, -7.415283203125, -7.10302734375, -6.790771484375, -6.478515625, -6.166259765625, -5.85400390625, -5.541748046875, -5.2294921875, -4.917236328125, -4.60498046875, -4.292724609375, -3.98046875, -3.668212890625, -3.35595703125, -3.043701171875, -2.7314453125, -2.419189453125, -2.10693359375, -1.794677734375, -1.482421875, -1.170166015625, -0.85791015625, -0.545654296875, -0.2333984375, 0.078857421875, 0.39111328125, 0.703369140625, 1.015625, 1.327880859375, 1.64013671875, 1.952392578125, 2.2646484375, 2.576904296875, 2.88916015625, 3.201416015625, 3.513671875, 3.825927734375, 4.13818359375, 4.450439453125, 4.7626953125, 5.074951171875, 5.38720703125, 5.699462890625, 6.01171875, 6.323974609375, 6.63623046875, 6.948486328125, 7.2607421875, 7.572998046875, 7.88525390625, 8.197509765625, 8.509765625, 8.822021484375, 9.13427734375, 9.446533203125, 9.7587890625, 10.071044921875, 10.38330078125, 10.695556640625, 11.0078125]}, "gradients/decoder.transformer.h.12.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 3.0, 4.0, 4.0, 6.0, 4.0, 7.0, 9.0, 7.0, 19.0, 8.0, 21.0, 23.0, 23.0, 48.0, 48.0, 81.0, 126.0, 277.0, 648.0, 2085.0, 9355.0, 68109.0, 840155.0, 2696558.0, 522397.0, 44947.0, 6701.0, 1485.0, 511.0, 207.0, 119.0, 79.0, 54.0, 46.0, 14.0, 22.0, 14.0, 13.0, 11.0, 13.0, 8.0, 8.0, 6.0, 4.0, 4.0, 2.0, 1.0, 2.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-22.40625, -21.644287109375, -20.88232421875, -20.120361328125, -19.3583984375, -18.596435546875, -17.83447265625, -17.072509765625, -16.310546875, -15.548583984375, -14.78662109375, -14.024658203125, -13.2626953125, -12.500732421875, -11.73876953125, -10.976806640625, -10.21484375, -9.452880859375, -8.69091796875, -7.928955078125, -7.1669921875, -6.405029296875, -5.64306640625, -4.881103515625, -4.119140625, -3.357177734375, -2.59521484375, -1.833251953125, -1.0712890625, -0.309326171875, 0.45263671875, 1.214599609375, 1.9765625, 2.738525390625, 3.50048828125, 4.262451171875, 5.0244140625, 5.786376953125, 6.54833984375, 7.310302734375, 8.072265625, 8.834228515625, 9.59619140625, 10.358154296875, 11.1201171875, 11.882080078125, 12.64404296875, 13.406005859375, 14.16796875, 14.929931640625, 15.69189453125, 16.453857421875, 17.2158203125, 17.977783203125, 18.73974609375, 19.501708984375, 20.263671875, 21.025634765625, 21.78759765625, 22.549560546875, 23.3115234375, 24.073486328125, 24.83544921875, 25.597412109375, 26.359375]}, "gradients/decoder.transformer.h.12.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 3.0, 4.0, 1.0, 4.0, 10.0, 9.0, 8.0, 15.0, 17.0, 29.0, 31.0, 47.0, 56.0, 79.0, 96.0, 128.0, 167.0, 266.0, 349.0, 443.0, 512.0, 442.0, 356.0, 274.0, 206.0, 135.0, 106.0, 80.0, 51.0, 40.0, 29.0, 27.0, 18.0, 10.0, 11.0, 7.0, 4.0, 4.0, 4.0, 1.0, 4.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-22.65625, -22.0205078125, -21.384765625, -20.7490234375, -20.11328125, -19.4775390625, -18.841796875, -18.2060546875, -17.5703125, -16.9345703125, -16.298828125, -15.6630859375, -15.02734375, -14.3916015625, -13.755859375, -13.1201171875, -12.484375, -11.8486328125, -11.212890625, -10.5771484375, -9.94140625, -9.3056640625, -8.669921875, -8.0341796875, -7.3984375, -6.7626953125, -6.126953125, -5.4912109375, -4.85546875, -4.2197265625, -3.583984375, -2.9482421875, -2.3125, -1.6767578125, -1.041015625, -0.4052734375, 0.23046875, 0.8662109375, 1.501953125, 2.1376953125, 2.7734375, 3.4091796875, 4.044921875, 4.6806640625, 5.31640625, 5.9521484375, 6.587890625, 7.2236328125, 7.859375, 8.4951171875, 9.130859375, 9.7666015625, 10.40234375, 11.0380859375, 11.673828125, 12.3095703125, 12.9453125, 13.5810546875, 14.216796875, 14.8525390625, 15.48828125, 16.1240234375, 16.759765625, 17.3955078125, 18.03125]}, "gradients/decoder.transformer.h.12.mlp.c_fc.weight": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 1.0, 5.0, 4.0, 9.0, 6.0, 6.0, 13.0, 11.0, 19.0, 25.0, 20.0, 47.0, 56.0, 66.0, 77.0, 119.0, 166.0, 190.0, 303.0, 558.0, 2521.0, 146843.0, 3946246.0, 93165.0, 2105.0, 497.0, 294.0, 208.0, 175.0, 118.0, 106.0, 80.0, 59.0, 35.0, 39.0, 23.0, 16.0, 15.0, 17.0, 9.0, 6.0, 6.0, 1.0, 3.0, 1.0, 3.0, 3.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-60.84375, -58.45361328125, -56.0634765625, -53.67333984375, -51.283203125, -48.89306640625, -46.5029296875, -44.11279296875, -41.72265625, -39.33251953125, -36.9423828125, -34.55224609375, -32.162109375, -29.77197265625, -27.3818359375, -24.99169921875, -22.6015625, -20.21142578125, -17.8212890625, -15.43115234375, -13.041015625, -10.65087890625, -8.2607421875, -5.87060546875, -3.48046875, -1.09033203125, 1.2998046875, 3.68994140625, 6.080078125, 8.47021484375, 10.8603515625, 13.25048828125, 15.640625, 18.03076171875, 20.4208984375, 22.81103515625, 25.201171875, 27.59130859375, 29.9814453125, 32.37158203125, 34.76171875, 37.15185546875, 39.5419921875, 41.93212890625, 44.322265625, 46.71240234375, 49.1025390625, 51.49267578125, 53.8828125, 56.27294921875, 58.6630859375, 61.05322265625, 63.443359375, 65.83349609375, 68.2236328125, 70.61376953125, 73.00390625, 75.39404296875, 77.7841796875, 80.17431640625, 82.564453125, 84.95458984375, 87.3447265625, 89.73486328125, 92.125]}, "gradients/decoder.transformer.h.12.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 2.0, 3.0, 11.0, 16.0, 32.0, 66.0, 97.0, 160.0, 180.0, 166.0, 134.0, 78.0, 39.0, 20.0, 2.0, 3.0, 5.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-144.386962890625, -140.22299194335938, -136.05902099609375, -131.89505004882812, -127.73107147216797, -123.56710052490234, -119.40312194824219, -115.23915100097656, -111.07518005371094, -106.91120910644531, -102.74723815917969, -98.58325958251953, -94.4192886352539, -90.25531768798828, -86.09133911132812, -81.9273681640625, -77.76339721679688, -73.59942626953125, -69.43545532226562, -65.27147674560547, -61.107505798339844, -56.94353485107422, -52.77956008911133, -48.61558532714844, -44.45161437988281, -40.28764343261719, -36.1236686706543, -31.95969581604004, -27.79572296142578, -23.631750106811523, -19.467777252197266, -15.303804397583008, -11.139816284179688, -6.97584342956543, -2.811870574951172, 1.352102279663086, 5.516075134277344, 9.680047988891602, 13.84402084350586, 18.007993698120117, 22.171966552734375, 26.335939407348633, 30.49991226196289, 34.66388702392578, 38.827857971191406, 42.99182891845703, 47.15580368041992, 51.31977844238281, 55.48374938964844, 59.64772033691406, 63.81169509887695, 67.97566986083984, 72.13964080810547, 76.3036117553711, 80.46759033203125, 84.63156127929688, 88.7955322265625, 92.95950317382812, 97.12347412109375, 101.2874526977539, 105.45142364501953, 109.61539459228516, 113.77937316894531, 117.94334411621094, 122.10731506347656]}, "gradients/decoder.transformer.h.12.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 3.0, 0.0, 0.0, 0.0, 2.0, 2.0, 4.0, 1.0, 7.0, 8.0, 10.0, 12.0, 8.0, 14.0, 22.0, 27.0, 14.0, 32.0, 31.0, 30.0, 44.0, 38.0, 39.0, 39.0, 44.0, 32.0, 41.0, 45.0, 48.0, 34.0, 40.0, 33.0, 36.0, 32.0, 36.0, 25.0, 34.0, 21.0, 26.0, 17.0, 10.0, 14.0, 12.0, 13.0, 10.0, 9.0, 4.0, 6.0, 2.0, 0.0, 3.0, 0.0, 3.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-64.89410400390625, -62.799903869628906, -60.70570373535156, -58.61150360107422, -56.517303466796875, -54.42310333251953, -52.32890701293945, -50.23470687866211, -48.140506744384766, -46.04630661010742, -43.95210647583008, -41.857906341552734, -39.763710021972656, -37.66950988769531, -35.57530975341797, -33.481109619140625, -31.38690948486328, -29.292709350585938, -27.198509216308594, -25.104310989379883, -23.01011085510254, -20.915910720825195, -18.821712493896484, -16.72751235961914, -14.633312225341797, -12.539112091064453, -10.444912910461426, -8.350713729858398, -6.256513595581055, -4.162313461303711, -2.0681142807006836, 0.02608489990234375, 2.1202774047851562, 4.214477062225342, 6.308676719665527, 8.402875900268555, 10.497076034545898, 12.591276168823242, 14.68547534942627, 16.779674530029297, 18.87387466430664, 20.968074798583984, 23.062274932861328, 25.15647315979004, 27.250673294067383, 29.344873428344727, 31.439071655273438, 33.53327178955078, 35.627471923828125, 37.72167205810547, 39.81587219238281, 41.910072326660156, 44.0042724609375, 46.098472595214844, 48.19266891479492, 50.286869049072266, 52.38106918334961, 54.47526931762695, 56.5694694519043, 58.66366958618164, 60.75786590576172, 62.85206604003906, 64.9462661743164, 67.04046630859375, 69.1346664428711]}, "gradients/decoder.transformer.h.12.crossattention.c_proj.bias": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 0.0, 0.0, 2.0, 0.0, 2.0, 3.0, 1.0, 2.0, 1.0, 6.0, 9.0, 7.0, 12.0, 13.0, 11.0, 16.0, 20.0, 23.0, 22.0, 26.0, 37.0, 41.0, 35.0, 35.0, 37.0, 45.0, 50.0, 43.0, 35.0, 43.0, 36.0, 38.0, 29.0, 37.0, 39.0, 34.0, 28.0, 24.0, 23.0, 20.0, 31.0, 18.0, 15.0, 14.0, 15.0, 8.0, 3.0, 8.0, 7.0, 5.0, 4.0, 1.0, 1.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-9.5546875, -9.2528076171875, -8.950927734375, -8.6490478515625, -8.34716796875, -8.0452880859375, -7.743408203125, -7.4415283203125, -7.1396484375, -6.8377685546875, -6.535888671875, -6.2340087890625, -5.93212890625, -5.6302490234375, -5.328369140625, -5.0264892578125, -4.724609375, -4.4227294921875, -4.120849609375, -3.8189697265625, -3.51708984375, -3.2152099609375, -2.913330078125, -2.6114501953125, -2.3095703125, -2.0076904296875, -1.705810546875, -1.4039306640625, -1.10205078125, -0.8001708984375, -0.498291015625, -0.1964111328125, 0.10546875, 0.4073486328125, 0.709228515625, 1.0111083984375, 1.31298828125, 1.6148681640625, 1.916748046875, 2.2186279296875, 2.5205078125, 2.8223876953125, 3.124267578125, 3.4261474609375, 3.72802734375, 4.0299072265625, 4.331787109375, 4.6336669921875, 4.935546875, 5.2374267578125, 5.539306640625, 5.8411865234375, 6.14306640625, 6.4449462890625, 6.746826171875, 7.0487060546875, 7.3505859375, 7.6524658203125, 7.954345703125, 8.2562255859375, 8.55810546875, 8.8599853515625, 9.161865234375, 9.4637451171875, 9.765625]}, "gradients/decoder.transformer.h.12.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 2.0, 1.0, 2.0, 4.0, 0.0, 3.0, 3.0, 4.0, 8.0, 12.0, 14.0, 23.0, 27.0, 53.0, 88.0, 112.0, 172.0, 251.0, 395.0, 570.0, 942.0, 1540.0, 2333.0, 3672.0, 6208.0, 10079.0, 17286.0, 29915.0, 51632.0, 88926.0, 147179.0, 206991.0, 187122.0, 120599.0, 71579.0, 41194.0, 23848.0, 13961.0, 8287.0, 5059.0, 3092.0, 1918.0, 1234.0, 753.0, 498.0, 355.0, 211.0, 129.0, 90.0, 60.0, 42.0, 17.0, 21.0, 18.0, 11.0, 8.0, 4.0, 6.0, 5.0, 2.0, 1.0, 2.0, 2.0], "bins": [-1.0654296875, -1.0329437255859375, -1.000457763671875, -0.9679718017578125, -0.93548583984375, -0.9029998779296875, -0.870513916015625, -0.8380279541015625, -0.8055419921875, -0.7730560302734375, -0.740570068359375, -0.7080841064453125, -0.67559814453125, -0.6431121826171875, -0.610626220703125, -0.5781402587890625, -0.545654296875, -0.5131683349609375, -0.480682373046875, -0.4481964111328125, -0.41571044921875, -0.3832244873046875, -0.350738525390625, -0.3182525634765625, -0.2857666015625, -0.2532806396484375, -0.220794677734375, -0.1883087158203125, -0.15582275390625, -0.1233367919921875, -0.090850830078125, -0.0583648681640625, -0.02587890625, 0.0066070556640625, 0.039093017578125, 0.0715789794921875, 0.10406494140625, 0.1365509033203125, 0.169036865234375, 0.2015228271484375, 0.2340087890625, 0.2664947509765625, 0.298980712890625, 0.3314666748046875, 0.36395263671875, 0.3964385986328125, 0.428924560546875, 0.4614105224609375, 0.493896484375, 0.5263824462890625, 0.558868408203125, 0.5913543701171875, 0.62384033203125, 0.6563262939453125, 0.688812255859375, 0.7212982177734375, 0.7537841796875, 0.7862701416015625, 0.818756103515625, 0.8512420654296875, 0.88372802734375, 0.9162139892578125, 0.948699951171875, 0.9811859130859375, 1.013671875]}, "gradients/decoder.transformer.h.12.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 2.0, 1.0, 1.0, 2.0, 6.0, 9.0, 4.0, 10.0, 4.0, 10.0, 12.0, 20.0, 14.0, 20.0, 22.0, 15.0, 23.0, 18.0, 38.0, 32.0, 40.0, 41.0, 36.0, 42.0, 43.0, 1071.0, 39.0, 34.0, 47.0, 42.0, 34.0, 44.0, 30.0, 31.0, 20.0, 25.0, 21.0, 21.0, 21.0, 14.0, 11.0, 16.0, 13.0, 4.0, 2.0, 10.0, 3.0, 6.0, 3.0, 6.0, 0.0, 5.0, 1.0, 2.0, 0.0, 2.0], "bins": [-6.37890625, -6.19122314453125, -6.0035400390625, -5.81585693359375, -5.628173828125, -5.44049072265625, -5.2528076171875, -5.06512451171875, -4.87744140625, -4.68975830078125, -4.5020751953125, -4.31439208984375, -4.126708984375, -3.93902587890625, -3.7513427734375, -3.56365966796875, -3.3759765625, -3.18829345703125, -3.0006103515625, -2.81292724609375, -2.625244140625, -2.43756103515625, -2.2498779296875, -2.06219482421875, -1.87451171875, -1.68682861328125, -1.4991455078125, -1.31146240234375, -1.123779296875, -0.93609619140625, -0.7484130859375, -0.56072998046875, -0.373046875, -0.18536376953125, 0.0023193359375, 0.19000244140625, 0.377685546875, 0.56536865234375, 0.7530517578125, 0.94073486328125, 1.12841796875, 1.31610107421875, 1.5037841796875, 1.69146728515625, 1.879150390625, 2.06683349609375, 2.2545166015625, 2.44219970703125, 2.6298828125, 2.81756591796875, 3.0052490234375, 3.19293212890625, 3.380615234375, 3.56829833984375, 3.7559814453125, 3.94366455078125, 4.13134765625, 4.31903076171875, 4.5067138671875, 4.69439697265625, 4.882080078125, 5.06976318359375, 5.2574462890625, 5.44512939453125, 5.6328125]}, "gradients/decoder.transformer.h.12.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 1.0, 3.0, 3.0, 11.0, 6.0, 15.0, 28.0, 41.0, 74.0, 108.0, 187.0, 278.0, 415.0, 661.0, 1070.0, 1634.0, 2623.0, 4078.0, 6631.0, 10832.0, 17757.0, 28481.0, 45684.0, 74160.0, 113751.0, 159769.0, 1223382.0, 143651.0, 98124.0, 62237.0, 38881.0, 23584.0, 14698.0, 9054.0, 5568.0, 3580.0, 2194.0, 1422.0, 932.0, 549.0, 368.0, 216.0, 148.0, 92.0, 63.0, 37.0, 22.0, 17.0, 10.0, 7.0, 6.0, 2.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.677734375, -0.6553802490234375, -0.633026123046875, -0.6106719970703125, -0.58831787109375, -0.5659637451171875, -0.543609619140625, -0.5212554931640625, -0.4989013671875, -0.4765472412109375, -0.454193115234375, -0.4318389892578125, -0.40948486328125, -0.3871307373046875, -0.364776611328125, -0.3424224853515625, -0.320068359375, -0.2977142333984375, -0.275360107421875, -0.2530059814453125, -0.23065185546875, -0.2082977294921875, -0.185943603515625, -0.1635894775390625, -0.1412353515625, -0.1188812255859375, -0.096527099609375, -0.0741729736328125, -0.05181884765625, -0.0294647216796875, -0.007110595703125, 0.0152435302734375, 0.03759765625, 0.0599517822265625, 0.082305908203125, 0.1046600341796875, 0.12701416015625, 0.1493682861328125, 0.171722412109375, 0.1940765380859375, 0.2164306640625, 0.2387847900390625, 0.261138916015625, 0.2834930419921875, 0.30584716796875, 0.3282012939453125, 0.350555419921875, 0.3729095458984375, 0.395263671875, 0.4176177978515625, 0.439971923828125, 0.4623260498046875, 0.48468017578125, 0.5070343017578125, 0.529388427734375, 0.5517425537109375, 0.5740966796875, 0.5964508056640625, 0.618804931640625, 0.6411590576171875, 0.66351318359375, 0.6858673095703125, 0.708221435546875, 0.7305755615234375, 0.7529296875]}, "gradients/decoder.transformer.h.12.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 2.0, 1.0, 1.0, 2.0, 1.0, 4.0, 8.0, 5.0, 10.0, 12.0, 14.0, 17.0, 28.0, 25.0, 31.0, 40.0, 54.0, 55.0, 58.0, 77.0, 75.0, 66.0, 66.0, 67.0, 65.0, 54.0, 34.0, 36.0, 18.0, 19.0, 13.0, 5.0, 7.0, 6.0, 6.0, 9.0, 6.0, 0.0, 0.0, 4.0, 2.0, 2.0, 0.0, 2.0, 1.0, 1.0, 1.0, 3.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0], "bins": [-0.006961822509765625, -0.006736636161804199, -0.0065114498138427734, -0.006286263465881348, -0.006061077117919922, -0.005835890769958496, -0.00561070442199707, -0.0053855180740356445, -0.005160331726074219, -0.004935145378112793, -0.004709959030151367, -0.004484772682189941, -0.004259586334228516, -0.00403439998626709, -0.003809213638305664, -0.0035840272903442383, -0.0033588409423828125, -0.0031336545944213867, -0.002908468246459961, -0.002683281898498535, -0.0024580955505371094, -0.0022329092025756836, -0.002007722854614258, -0.001782536506652832, -0.0015573501586914062, -0.0013321638107299805, -0.0011069774627685547, -0.0008817911148071289, -0.0006566047668457031, -0.00043141841888427734, -0.00020623207092285156, 1.895427703857422e-05, 0.000244140625, 0.0004693269729614258, 0.0006945133209228516, 0.0009196996688842773, 0.0011448860168457031, 0.001370072364807129, 0.0015952587127685547, 0.0018204450607299805, 0.0020456314086914062, 0.002270817756652832, 0.002496004104614258, 0.0027211904525756836, 0.0029463768005371094, 0.003171563148498535, 0.003396749496459961, 0.0036219358444213867, 0.0038471221923828125, 0.004072308540344238, 0.004297494888305664, 0.00452268123626709, 0.004747867584228516, 0.004973053932189941, 0.005198240280151367, 0.005423426628112793, 0.005648612976074219, 0.0058737993240356445, 0.00609898567199707, 0.006324172019958496, 0.006549358367919922, 0.006774544715881348, 0.0069997310638427734, 0.007224917411804199, 0.007450103759765625]}, "gradients/decoder.transformer.h.12.crossattention.q_attn.weight": {"_type": "histogram", "values": [3.0, 0.0, 0.0, 1.0, 0.0, 1.0, 3.0, 0.0, 1.0, 1.0, 4.0, 2.0, 2.0, 3.0, 4.0, 10.0, 7.0, 6.0, 7.0, 14.0, 11.0, 13.0, 16.0, 27.0, 31.0, 55.0, 87.0, 117.0, 193.0, 317.0, 1224.0, 57901.0, 977767.0, 9280.0, 634.0, 265.0, 165.0, 128.0, 67.0, 41.0, 40.0, 26.0, 24.0, 13.0, 11.0, 10.0, 13.0, 4.0, 7.0, 5.0, 4.0, 3.0, 0.0, 1.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-0.14892578125, -0.144317626953125, -0.13970947265625, -0.135101318359375, -0.1304931640625, -0.125885009765625, -0.12127685546875, -0.116668701171875, -0.112060546875, -0.107452392578125, -0.10284423828125, -0.098236083984375, -0.0936279296875, -0.089019775390625, -0.08441162109375, -0.079803466796875, -0.0751953125, -0.070587158203125, -0.06597900390625, -0.061370849609375, -0.0567626953125, -0.052154541015625, -0.04754638671875, -0.042938232421875, -0.038330078125, -0.033721923828125, -0.02911376953125, -0.024505615234375, -0.0198974609375, -0.015289306640625, -0.01068115234375, -0.006072998046875, -0.00146484375, 0.003143310546875, 0.00775146484375, 0.012359619140625, 0.0169677734375, 0.021575927734375, 0.02618408203125, 0.030792236328125, 0.035400390625, 0.040008544921875, 0.04461669921875, 0.049224853515625, 0.0538330078125, 0.058441162109375, 0.06304931640625, 0.067657470703125, 0.072265625, 0.076873779296875, 0.08148193359375, 0.086090087890625, 0.0906982421875, 0.095306396484375, 0.09991455078125, 0.104522705078125, 0.109130859375, 0.113739013671875, 0.11834716796875, 0.122955322265625, 0.1275634765625, 0.132171630859375, 0.13677978515625, 0.141387939453125, 0.14599609375]}, "gradients/decoder.transformer.h.12.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 2.0, 0.0, 0.0, 11.0, 163.0, 705.0, 131.0, 6.0], "bins": [-0.07777281850576401, -0.07650747150182724, -0.07524211704730988, -0.07397677004337311, -0.07271141558885574, -0.07144606858491898, -0.07018071413040161, -0.06891536712646484, -0.06765001267194748, -0.06638466566801071, -0.06511931121349335, -0.06385396420955658, -0.06258860975503922, -0.06132325902581215, -0.06005790829658508, -0.058792561292648315, -0.05752721056342125, -0.05626185983419418, -0.05499650910496712, -0.05373115837574005, -0.052465807646512985, -0.05120045691728592, -0.04993510618805885, -0.04866975545883179, -0.04740440845489502, -0.046139057725667953, -0.04487370699644089, -0.04360835626721382, -0.042343005537986755, -0.04107765480875969, -0.03981230407953262, -0.038546957075595856, -0.03728159889578819, -0.03601624816656113, -0.03475089743733406, -0.033485546708106995, -0.03222019597887993, -0.030954845249652863, -0.029689496383070946, -0.02842414565384388, -0.027158796787261963, -0.025893446058034897, -0.02462809532880783, -0.023362744599580765, -0.0220973938703537, -0.020832043141126633, -0.019566694274544716, -0.01830134354531765, -0.017035992816090584, -0.015770642086863518, -0.014505291357636452, -0.01323994155973196, -0.011974590830504894, -0.010709240101277828, -0.009443890303373337, -0.00817853957414627, -0.006913188379257917, -0.005647838115692139, -0.004382487386465073, -0.0031171368900686502, -0.0018517863936722279, -0.0005864356644451618, 0.0006789145991206169, 0.0019442648626863956, 0.0032096155919134617]}, "gradients/decoder.transformer.h.12.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 4.0, 2.0, 4.0, 6.0, 5.0, 8.0, 3.0, 6.0, 11.0, 13.0, 15.0, 21.0, 13.0, 19.0, 20.0, 31.0, 29.0, 33.0, 29.0, 51.0, 34.0, 49.0, 44.0, 46.0, 49.0, 62.0, 43.0, 46.0, 31.0, 34.0, 38.0, 29.0, 25.0, 26.0, 21.0, 23.0, 15.0, 13.0, 17.0, 14.0, 7.0, 7.0, 6.0, 4.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0], "bins": [-0.0040964484214782715, -0.003973698243498802, -0.003850948065519333, -0.0037281978875398636, -0.0036054477095603943, -0.003482697531580925, -0.0033599473536014557, -0.0032371971756219864, -0.003114446997642517, -0.002991696819663048, -0.0028689466416835785, -0.002746196463704109, -0.00262344628572464, -0.0025006961077451706, -0.0023779459297657013, -0.002255195751786232, -0.0021324455738067627, -0.0020096953958272934, -0.001886945217847824, -0.0017641950398683548, -0.0016414448618888855, -0.0015186946839094162, -0.001395944505929947, -0.0012731943279504776, -0.0011504441499710083, -0.001027693971991539, -0.0009049437940120697, -0.0007821936160326004, -0.0006594434380531311, -0.0005366932600736618, -0.0004139430820941925, -0.0002911929041147232, -0.0001684427261352539, -4.569254815578461e-05, 7.705762982368469e-05, 0.000199807807803154, 0.0003225579857826233, 0.0004453081637620926, 0.0005680583417415619, 0.0006908085197210312, 0.0008135586977005005, 0.0009363088756799698, 0.001059059053659439, 0.0011818092316389084, 0.0013045594096183777, 0.001427309587597847, 0.0015500597655773163, 0.0016728099435567856, 0.0017955601215362549, 0.0019183102995157242, 0.0020410604774951935, 0.0021638106554746628, 0.002286560833454132, 0.0024093110114336014, 0.0025320611894130707, 0.00265481136739254, 0.0027775615453720093, 0.0029003117233514786, 0.003023061901330948, 0.003145812079310417, 0.0032685622572898865, 0.0033913124352693558, 0.003514062613248825, 0.0036368127912282944, 0.0037595629692077637]}, "gradients/decoder.transformer.h.12.attn.c_proj.bias": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 0.0, 0.0, 2.0, 0.0, 2.0, 3.0, 1.0, 2.0, 1.0, 6.0, 9.0, 7.0, 12.0, 13.0, 11.0, 16.0, 20.0, 23.0, 22.0, 28.0, 36.0, 40.0, 35.0, 35.0, 38.0, 45.0, 50.0, 42.0, 36.0, 43.0, 35.0, 38.0, 29.0, 39.0, 37.0, 37.0, 25.0, 24.0, 23.0, 20.0, 31.0, 18.0, 16.0, 14.0, 14.0, 8.0, 3.0, 8.0, 7.0, 5.0, 4.0, 1.0, 1.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-9.5546875, -9.252685546875, -8.95068359375, -8.648681640625, -8.3466796875, -8.044677734375, -7.74267578125, -7.440673828125, -7.138671875, -6.836669921875, -6.53466796875, -6.232666015625, -5.9306640625, -5.628662109375, -5.32666015625, -5.024658203125, -4.72265625, -4.420654296875, -4.11865234375, -3.816650390625, -3.5146484375, -3.212646484375, -2.91064453125, -2.608642578125, -2.306640625, -2.004638671875, -1.70263671875, -1.400634765625, -1.0986328125, -0.796630859375, -0.49462890625, -0.192626953125, 0.109375, 0.411376953125, 0.71337890625, 1.015380859375, 1.3173828125, 1.619384765625, 1.92138671875, 2.223388671875, 2.525390625, 2.827392578125, 3.12939453125, 3.431396484375, 3.7333984375, 4.035400390625, 4.33740234375, 4.639404296875, 4.94140625, 5.243408203125, 5.54541015625, 5.847412109375, 6.1494140625, 6.451416015625, 6.75341796875, 7.055419921875, 7.357421875, 7.659423828125, 7.96142578125, 8.263427734375, 8.5654296875, 8.867431640625, 9.16943359375, 9.471435546875, 9.7734375]}, "gradients/decoder.transformer.h.12.attn.c_proj.weight": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 0.0, 0.0, 2.0, 0.0, 2.0, 3.0, 1.0, 2.0, 2.0, 7.0, 11.0, 11.0, 14.0, 18.0, 20.0, 36.0, 52.0, 79.0, 117.0, 194.0, 331.0, 603.0, 1223.0, 2215.0, 4833.0, 11499.0, 35272.0, 140720.0, 547711.0, 223840.0, 51831.0, 15679.0, 6285.0, 2810.0, 1301.0, 758.0, 410.0, 230.0, 141.0, 75.0, 72.0, 46.0, 27.0, 17.0, 23.0, 11.0, 7.0, 8.0, 7.0, 5.0, 4.0, 1.0, 1.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-9.171875, -8.882080078125, -8.59228515625, -8.302490234375, -8.0126953125, -7.722900390625, -7.43310546875, -7.143310546875, -6.853515625, -6.563720703125, -6.27392578125, -5.984130859375, -5.6943359375, -5.404541015625, -5.11474609375, -4.824951171875, -4.53515625, -4.245361328125, -3.95556640625, -3.665771484375, -3.3759765625, -3.086181640625, -2.79638671875, -2.506591796875, -2.216796875, -1.927001953125, -1.63720703125, -1.347412109375, -1.0576171875, -0.767822265625, -0.47802734375, -0.188232421875, 0.1015625, 0.391357421875, 0.68115234375, 0.970947265625, 1.2607421875, 1.550537109375, 1.84033203125, 2.130126953125, 2.419921875, 2.709716796875, 2.99951171875, 3.289306640625, 3.5791015625, 3.868896484375, 4.15869140625, 4.448486328125, 4.73828125, 5.028076171875, 5.31787109375, 5.607666015625, 5.8974609375, 6.187255859375, 6.47705078125, 6.766845703125, 7.056640625, 7.346435546875, 7.63623046875, 7.926025390625, 8.2158203125, 8.505615234375, 8.79541015625, 9.085205078125, 9.375]}, "gradients/decoder.transformer.h.12.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 3.0, 2.0, 6.0, 2.0, 8.0, 8.0, 2.0, 9.0, 4.0, 16.0, 15.0, 25.0, 30.0, 26.0, 28.0, 53.0, 36.0, 51.0, 47.0, 53.0, 112.0, 1811.0, 233.0, 53.0, 60.0, 53.0, 53.0, 37.0, 40.0, 33.0, 26.0, 24.0, 20.0, 22.0, 13.0, 14.0, 6.0, 4.0, 3.0, 5.0, 6.0, 2.0, 7.0, 1.0, 3.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-35.34375, -34.1923828125, -33.041015625, -31.8896484375, -30.73828125, -29.5869140625, -28.435546875, -27.2841796875, -26.1328125, -24.9814453125, -23.830078125, -22.6787109375, -21.52734375, -20.3759765625, -19.224609375, -18.0732421875, -16.921875, -15.7705078125, -14.619140625, -13.4677734375, -12.31640625, -11.1650390625, -10.013671875, -8.8623046875, -7.7109375, -6.5595703125, -5.408203125, -4.2568359375, -3.10546875, -1.9541015625, -0.802734375, 0.3486328125, 1.5, 2.6513671875, 3.802734375, 4.9541015625, 6.10546875, 7.2568359375, 8.408203125, 9.5595703125, 10.7109375, 11.8623046875, 13.013671875, 14.1650390625, 15.31640625, 16.4677734375, 17.619140625, 18.7705078125, 19.921875, 21.0732421875, 22.224609375, 23.3759765625, 24.52734375, 25.6787109375, 26.830078125, 27.9814453125, 29.1328125, 30.2841796875, 31.435546875, 32.5869140625, 33.73828125, 34.8896484375, 36.041015625, 37.1923828125, 38.34375]}, "gradients/decoder.transformer.h.12.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 0.0, 2.0, 2.0, 2.0, 2.0, 3.0, 10.0, 8.0, 9.0, 6.0, 10.0, 21.0, 30.0, 43.0, 57.0, 69.0, 115.0, 176.0, 282.0, 495.0, 2258.0, 2961852.0, 177831.0, 1270.0, 413.0, 259.0, 151.0, 104.0, 67.0, 55.0, 27.0, 17.0, 13.0, 11.0, 15.0, 4.0, 10.0, 4.0, 3.0, 4.0, 0.0, 2.0, 2.0, 0.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 2.0], "bins": [-113.6875, -110.4296875, -107.171875, -103.9140625, -100.65625, -97.3984375, -94.140625, -90.8828125, -87.625, -84.3671875, -81.109375, -77.8515625, -74.59375, -71.3359375, -68.078125, -64.8203125, -61.5625, -58.3046875, -55.046875, -51.7890625, -48.53125, -45.2734375, -42.015625, -38.7578125, -35.5, -32.2421875, -28.984375, -25.7265625, -22.46875, -19.2109375, -15.953125, -12.6953125, -9.4375, -6.1796875, -2.921875, 0.3359375, 3.59375, 6.8515625, 10.109375, 13.3671875, 16.625, 19.8828125, 23.140625, 26.3984375, 29.65625, 32.9140625, 36.171875, 39.4296875, 42.6875, 45.9453125, 49.203125, 52.4609375, 55.71875, 58.9765625, 62.234375, 65.4921875, 68.75, 72.0078125, 75.265625, 78.5234375, 81.78125, 85.0390625, 88.296875, 91.5546875, 94.8125]}, "gradients/decoder.transformer.h.12.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 2.0, 332.0, 681.0, 5.0, 0.0, 0.0, 1.0], "bins": [-538.5452880859375, -529.4317016601562, -520.3180541992188, -511.2044677734375, -502.09088134765625, -492.9772644042969, -483.8636779785156, -474.75006103515625, -465.636474609375, -456.5228576660156, -447.4092712402344, -438.295654296875, -429.18206787109375, -420.0684509277344, -410.9548645019531, -401.84124755859375, -392.7276306152344, -383.614013671875, -374.50042724609375, -365.3868103027344, -356.2732238769531, -347.15960693359375, -338.0460205078125, -328.9324035644531, -319.81878662109375, -310.7051696777344, -301.5915832519531, -292.47796630859375, -283.3643798828125, -274.2507629394531, -265.1371765136719, -256.0235595703125, -246.90994262695312, -237.7963409423828, -228.6827392578125, -219.5691375732422, -210.45553588867188, -201.3419189453125, -192.2283172607422, -183.11471557617188, -174.00111389160156, -164.88751220703125, -155.77391052246094, -146.66030883789062, -137.54669189453125, -128.43310546875, -119.31948852539062, -110.20588684082031, -101.09228515625, -91.97868347167969, -82.86508178710938, -73.75147247314453, -64.63787078857422, -55.524269104003906, -46.41066360473633, -37.29705810546875, -28.183460235595703, -19.069856643676758, -9.956253051757812, -0.8426494598388672, 8.270954132080078, 17.38455581665039, 26.49816131591797, 35.61176681518555, 44.72536849975586]}, "gradients/decoder.transformer.h.12.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 1.0, 3.0, 3.0, 5.0, 6.0, 5.0, 6.0, 9.0, 12.0, 15.0, 17.0, 18.0, 22.0, 29.0, 24.0, 30.0, 36.0, 33.0, 43.0, 39.0, 50.0, 39.0, 43.0, 50.0, 49.0, 46.0, 49.0, 40.0, 35.0, 29.0, 36.0, 26.0, 23.0, 27.0, 21.0, 14.0, 13.0, 14.0, 9.0, 13.0, 10.0, 9.0, 5.0, 2.0, 4.0, 1.0, 2.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-85.31979370117188, -82.54969024658203, -79.77959442138672, -77.00949096679688, -74.23938751220703, -71.46929168701172, -68.69918823242188, -65.92909240722656, -63.15898895263672, -60.38888931274414, -57.6187858581543, -54.84868621826172, -52.07858657836914, -49.30848693847656, -46.53838348388672, -43.76828384399414, -40.9981803894043, -38.22808074951172, -35.457977294921875, -32.6878776550293, -29.91777801513672, -27.147676467895508, -24.377574920654297, -21.60747528076172, -18.837373733520508, -16.067272186279297, -13.297172546386719, -10.527070999145508, -7.756970405578613, -4.986869812011719, -2.216768264770508, 0.5533313751220703, 3.3234329223632812, 6.093533515930176, 8.86363410949707, 11.633735656738281, 14.403836250305176, 17.17393684387207, 19.94403839111328, 22.71413803100586, 25.48423957824707, 28.25434112548828, 31.02444076538086, 33.79454040527344, 36.56464385986328, 39.33474349975586, 42.10484313964844, 44.87494659423828, 47.64504623413086, 50.41514587402344, 53.18524932861328, 55.95534896850586, 58.72544860839844, 61.49555206298828, 64.26565551757812, 67.03575134277344, 69.80585479736328, 72.57595825195312, 75.34605407714844, 78.11615753173828, 80.88626098632812, 83.65635681152344, 86.42646026611328, 89.19656372070312, 91.96665954589844]}, "gradients/decoder.transformer.h.11.mlp.c_proj.bias": {"_type": "histogram", "values": [3.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 1.0, 2.0, 1.0, 6.0, 2.0, 6.0, 8.0, 8.0, 7.0, 13.0, 16.0, 14.0, 16.0, 25.0, 22.0, 38.0, 33.0, 35.0, 38.0, 37.0, 34.0, 36.0, 43.0, 47.0, 59.0, 37.0, 33.0, 31.0, 33.0, 39.0, 41.0, 28.0, 31.0, 20.0, 24.0, 20.0, 22.0, 23.0, 8.0, 20.0, 10.0, 8.0, 10.0, 7.0, 5.0, 3.0, 8.0, 3.0, 0.0, 1.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-9.765625, -9.4566650390625, -9.147705078125, -8.8387451171875, -8.52978515625, -8.2208251953125, -7.911865234375, -7.6029052734375, -7.2939453125, -6.9849853515625, -6.676025390625, -6.3670654296875, -6.05810546875, -5.7491455078125, -5.440185546875, -5.1312255859375, -4.822265625, -4.5133056640625, -4.204345703125, -3.8953857421875, -3.58642578125, -3.2774658203125, -2.968505859375, -2.6595458984375, -2.3505859375, -2.0416259765625, -1.732666015625, -1.4237060546875, -1.11474609375, -0.8057861328125, -0.496826171875, -0.1878662109375, 0.12109375, 0.4300537109375, 0.739013671875, 1.0479736328125, 1.35693359375, 1.6658935546875, 1.974853515625, 2.2838134765625, 2.5927734375, 2.9017333984375, 3.210693359375, 3.5196533203125, 3.82861328125, 4.1375732421875, 4.446533203125, 4.7554931640625, 5.064453125, 5.3734130859375, 5.682373046875, 5.9913330078125, 6.30029296875, 6.6092529296875, 6.918212890625, 7.2271728515625, 7.5361328125, 7.8450927734375, 8.154052734375, 8.4630126953125, 8.77197265625, 9.0809326171875, 9.389892578125, 9.6988525390625, 10.0078125]}, "gradients/decoder.transformer.h.11.mlp.c_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 3.0, 1.0, 0.0, 0.0, 0.0, 5.0, 7.0, 4.0, 8.0, 10.0, 8.0, 12.0, 12.0, 14.0, 20.0, 22.0, 28.0, 34.0, 35.0, 61.0, 62.0, 103.0, 144.0, 253.0, 413.0, 882.0, 2376.0, 8546.0, 44151.0, 415328.0, 2309488.0, 1253330.0, 133082.0, 18710.0, 4170.0, 1421.0, 560.0, 285.0, 186.0, 103.0, 85.0, 73.0, 42.0, 39.0, 40.0, 18.0, 26.0, 17.0, 20.0, 8.0, 14.0, 12.0, 6.0, 9.0, 8.0, 2.0, 1.0, 2.0, 1.0, 1.0, 1.0], "bins": [-22.65625, -21.985107421875, -21.31396484375, -20.642822265625, -19.9716796875, -19.300537109375, -18.62939453125, -17.958251953125, -17.287109375, -16.615966796875, -15.94482421875, -15.273681640625, -14.6025390625, -13.931396484375, -13.26025390625, -12.589111328125, -11.91796875, -11.246826171875, -10.57568359375, -9.904541015625, -9.2333984375, -8.562255859375, -7.89111328125, -7.219970703125, -6.548828125, -5.877685546875, -5.20654296875, -4.535400390625, -3.8642578125, -3.193115234375, -2.52197265625, -1.850830078125, -1.1796875, -0.508544921875, 0.16259765625, 0.833740234375, 1.5048828125, 2.176025390625, 2.84716796875, 3.518310546875, 4.189453125, 4.860595703125, 5.53173828125, 6.202880859375, 6.8740234375, 7.545166015625, 8.21630859375, 8.887451171875, 9.55859375, 10.229736328125, 10.90087890625, 11.572021484375, 12.2431640625, 12.914306640625, 13.58544921875, 14.256591796875, 14.927734375, 15.598876953125, 16.27001953125, 16.941162109375, 17.6123046875, 18.283447265625, 18.95458984375, 19.625732421875, 20.296875]}, "gradients/decoder.transformer.h.11.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 2.0, 1.0, 3.0, 6.0, 5.0, 2.0, 9.0, 7.0, 18.0, 24.0, 32.0, 44.0, 68.0, 109.0, 159.0, 204.0, 269.0, 393.0, 511.0, 546.0, 477.0, 342.0, 260.0, 168.0, 132.0, 84.0, 55.0, 50.0, 41.0, 21.0, 11.0, 11.0, 8.0, 2.0, 5.0, 3.0, 1.0, 3.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-24.84375, -24.1416015625, -23.439453125, -22.7373046875, -22.03515625, -21.3330078125, -20.630859375, -19.9287109375, -19.2265625, -18.5244140625, -17.822265625, -17.1201171875, -16.41796875, -15.7158203125, -15.013671875, -14.3115234375, -13.609375, -12.9072265625, -12.205078125, -11.5029296875, -10.80078125, -10.0986328125, -9.396484375, -8.6943359375, -7.9921875, -7.2900390625, -6.587890625, -5.8857421875, -5.18359375, -4.4814453125, -3.779296875, -3.0771484375, -2.375, -1.6728515625, -0.970703125, -0.2685546875, 0.43359375, 1.1357421875, 1.837890625, 2.5400390625, 3.2421875, 3.9443359375, 4.646484375, 5.3486328125, 6.05078125, 6.7529296875, 7.455078125, 8.1572265625, 8.859375, 9.5615234375, 10.263671875, 10.9658203125, 11.66796875, 12.3701171875, 13.072265625, 13.7744140625, 14.4765625, 15.1787109375, 15.880859375, 16.5830078125, 17.28515625, 17.9873046875, 18.689453125, 19.3916015625, 20.09375]}, "gradients/decoder.transformer.h.11.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 5.0, 0.0, 4.0, 7.0, 7.0, 13.0, 19.0, 34.0, 44.0, 71.0, 92.0, 115.0, 175.0, 270.0, 378.0, 749.0, 10190.0, 3988672.0, 190556.0, 1358.0, 489.0, 315.0, 207.0, 159.0, 119.0, 75.0, 49.0, 46.0, 21.0, 14.0, 7.0, 10.0, 6.0, 4.0, 6.0, 4.0, 1.0, 1.0, 3.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-109.625, -106.2744140625, -102.923828125, -99.5732421875, -96.22265625, -92.8720703125, -89.521484375, -86.1708984375, -82.8203125, -79.4697265625, -76.119140625, -72.7685546875, -69.41796875, -66.0673828125, -62.716796875, -59.3662109375, -56.015625, -52.6650390625, -49.314453125, -45.9638671875, -42.61328125, -39.2626953125, -35.912109375, -32.5615234375, -29.2109375, -25.8603515625, -22.509765625, -19.1591796875, -15.80859375, -12.4580078125, -9.107421875, -5.7568359375, -2.40625, 0.9443359375, 4.294921875, 7.6455078125, 10.99609375, 14.3466796875, 17.697265625, 21.0478515625, 24.3984375, 27.7490234375, 31.099609375, 34.4501953125, 37.80078125, 41.1513671875, 44.501953125, 47.8525390625, 51.203125, 54.5537109375, 57.904296875, 61.2548828125, 64.60546875, 67.9560546875, 71.306640625, 74.6572265625, 78.0078125, 81.3583984375, 84.708984375, 88.0595703125, 91.41015625, 94.7607421875, 98.111328125, 101.4619140625, 104.8125]}, "gradients/decoder.transformer.h.11.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 39.0, 278.0, 518.0, 164.0, 19.0, 1.0, 1.0], "bins": [-743.01708984375, -730.5066528320312, -717.9962158203125, -705.4857788085938, -692.975341796875, -680.4649047851562, -667.9544677734375, -655.4440307617188, -642.93359375, -630.4231567382812, -617.9127197265625, -605.4022827148438, -592.891845703125, -580.3814086914062, -567.8709716796875, -555.3605346679688, -542.8501586914062, -530.3397216796875, -517.8292846679688, -505.31884765625, -492.80841064453125, -480.2979736328125, -467.78753662109375, -455.2771301269531, -442.76666259765625, -430.2562255859375, -417.74578857421875, -405.2353515625, -392.72491455078125, -380.2144775390625, -367.70404052734375, -355.1936340332031, -342.6832275390625, -330.17279052734375, -317.662353515625, -305.15191650390625, -292.6414794921875, -280.13104248046875, -267.62060546875, -255.11019897460938, -242.59974670410156, -230.0893096923828, -217.57887268066406, -205.06845092773438, -192.55801391601562, -180.04757690429688, -167.53713989257812, -155.02670288085938, -142.51626586914062, -130.00582885742188, -117.49539947509766, -104.9849624633789, -92.47453308105469, -79.96409606933594, -67.45365905761719, -54.94322967529297, -42.43280029296875, -29.922367095947266, -17.41193199157715, -4.901496887207031, 7.608936309814453, 20.119369506835938, 32.62980651855469, 45.140235900878906, 57.650672912597656]}, "gradients/decoder.transformer.h.11.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 5.0, 2.0, 4.0, 5.0, 12.0, 4.0, 6.0, 18.0, 7.0, 19.0, 20.0, 16.0, 30.0, 29.0, 31.0, 30.0, 27.0, 37.0, 34.0, 51.0, 41.0, 40.0, 59.0, 52.0, 40.0, 47.0, 39.0, 33.0, 35.0, 30.0, 35.0, 26.0, 17.0, 21.0, 20.0, 21.0, 15.0, 14.0, 8.0, 8.0, 8.0, 9.0, 2.0, 2.0, 1.0, 2.0, 0.0, 3.0, 0.0, 1.0, 2.0], "bins": [-72.4144287109375, -70.39883422851562, -68.38323211669922, -66.36763763427734, -64.35203552246094, -62.33644104003906, -60.32084274291992, -58.30524444580078, -56.28964614868164, -54.2740478515625, -52.25844955444336, -50.24285125732422, -48.227256774902344, -46.21165466308594, -44.19606018066406, -42.18046188354492, -40.16486358642578, -38.14926528930664, -36.1336669921875, -34.11806869506836, -32.10247039794922, -30.08687400817871, -28.071277618408203, -26.055679321289062, -24.040081024169922, -22.02448272705078, -20.00888442993164, -17.993288040161133, -15.977689743041992, -13.962091445922852, -11.946494102478027, -9.930896759033203, -7.9152984619140625, -5.89970064163208, -3.8841028213500977, -1.8685050010681152, 0.1470928192138672, 2.162691116333008, 4.178288459777832, 6.193885803222656, 8.209484100341797, 10.225082397460938, 12.240679740905762, 14.256277084350586, 16.271875381469727, 18.287473678588867, 20.303070068359375, 22.318668365478516, 24.334266662597656, 26.349864959716797, 28.365463256835938, 30.381059646606445, 32.39665985107422, 34.412254333496094, 36.427852630615234, 38.443450927734375, 40.459049224853516, 42.474647521972656, 44.4902458190918, 46.50584411621094, 48.52143859863281, 50.53704071044922, 52.552635192871094, 54.568233489990234, 56.583831787109375]}, "gradients/decoder.transformer.h.11.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0, 4.0, 1.0, 3.0, 1.0, 0.0, 9.0, 9.0, 5.0, 16.0, 13.0, 13.0, 18.0, 22.0, 23.0, 24.0, 27.0, 30.0, 46.0, 37.0, 35.0, 42.0, 45.0, 44.0, 36.0, 52.0, 38.0, 34.0, 45.0, 42.0, 31.0, 33.0, 20.0, 35.0, 23.0, 28.0, 15.0, 19.0, 12.0, 20.0, 13.0, 12.0, 8.0, 9.0, 5.0, 5.0, 5.0, 1.0, 5.0, 2.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-9.875, -9.5689697265625, -9.262939453125, -8.9569091796875, -8.65087890625, -8.3448486328125, -8.038818359375, -7.7327880859375, -7.4267578125, -7.1207275390625, -6.814697265625, -6.5086669921875, -6.20263671875, -5.8966064453125, -5.590576171875, -5.2845458984375, -4.978515625, -4.6724853515625, -4.366455078125, -4.0604248046875, -3.75439453125, -3.4483642578125, -3.142333984375, -2.8363037109375, -2.5302734375, -2.2242431640625, -1.918212890625, -1.6121826171875, -1.30615234375, -1.0001220703125, -0.694091796875, -0.3880615234375, -0.08203125, 0.2239990234375, 0.530029296875, 0.8360595703125, 1.14208984375, 1.4481201171875, 1.754150390625, 2.0601806640625, 2.3662109375, 2.6722412109375, 2.978271484375, 3.2843017578125, 3.59033203125, 3.8963623046875, 4.202392578125, 4.5084228515625, 4.814453125, 5.1204833984375, 5.426513671875, 5.7325439453125, 6.03857421875, 6.3446044921875, 6.650634765625, 6.9566650390625, 7.2626953125, 7.5687255859375, 7.874755859375, 8.1807861328125, 8.48681640625, 8.7928466796875, 9.098876953125, 9.4049072265625, 9.7109375]}, "gradients/decoder.transformer.h.11.crossattention.c_proj.weight": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 1.0, 3.0, 1.0, 1.0, 2.0, 6.0, 8.0, 12.0, 16.0, 24.0, 19.0, 41.0, 54.0, 126.0, 174.0, 282.0, 444.0, 689.0, 1079.0, 1690.0, 2761.0, 4658.0, 7942.0, 13559.0, 23548.0, 41906.0, 73488.0, 127080.0, 201233.0, 212615.0, 142105.0, 82460.0, 46757.0, 26382.0, 15017.0, 8782.0, 5188.0, 3233.0, 1951.0, 1228.0, 731.0, 436.0, 289.0, 207.0, 117.0, 70.0, 42.0, 35.0, 21.0, 17.0, 11.0, 12.0, 7.0, 3.0, 1.0, 1.0, 2.0, 4.0, 0.0, 1.0, 1.0], "bins": [-1.072265625, -1.0388641357421875, -1.005462646484375, -0.9720611572265625, -0.93865966796875, -0.9052581787109375, -0.871856689453125, -0.8384552001953125, -0.8050537109375, -0.7716522216796875, -0.738250732421875, -0.7048492431640625, -0.67144775390625, -0.6380462646484375, -0.604644775390625, -0.5712432861328125, -0.537841796875, -0.5044403076171875, -0.471038818359375, -0.4376373291015625, -0.40423583984375, -0.3708343505859375, -0.337432861328125, -0.3040313720703125, -0.2706298828125, -0.2372283935546875, -0.203826904296875, -0.1704254150390625, -0.13702392578125, -0.1036224365234375, -0.070220947265625, -0.0368194580078125, -0.00341796875, 0.0299835205078125, 0.063385009765625, 0.0967864990234375, 0.13018798828125, 0.1635894775390625, 0.196990966796875, 0.2303924560546875, 0.2637939453125, 0.2971954345703125, 0.330596923828125, 0.3639984130859375, 0.39739990234375, 0.4308013916015625, 0.464202880859375, 0.4976043701171875, 0.531005859375, 0.5644073486328125, 0.597808837890625, 0.6312103271484375, 0.66461181640625, 0.6980133056640625, 0.731414794921875, 0.7648162841796875, 0.7982177734375, 0.8316192626953125, 0.865020751953125, 0.8984222412109375, 0.93182373046875, 0.9652252197265625, 0.998626708984375, 1.0320281982421875, 1.0654296875]}, "gradients/decoder.transformer.h.11.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 2.0, 1.0, 0.0, 2.0, 3.0, 2.0, 1.0, 8.0, 7.0, 6.0, 13.0, 8.0, 16.0, 15.0, 12.0, 21.0, 12.0, 22.0, 35.0, 29.0, 31.0, 26.0, 33.0, 45.0, 34.0, 38.0, 25.0, 33.0, 1060.0, 51.0, 35.0, 22.0, 38.0, 37.0, 33.0, 28.0, 33.0, 26.0, 27.0, 26.0, 27.0, 20.0, 19.0, 15.0, 11.0, 9.0, 11.0, 6.0, 9.0, 2.0, 8.0, 3.0, 2.0, 1.0, 1.0, 2.0, 2.0, 0.0, 1.0, 1.0], "bins": [-5.91796875, -5.73358154296875, -5.5491943359375, -5.36480712890625, -5.180419921875, -4.99603271484375, -4.8116455078125, -4.62725830078125, -4.44287109375, -4.25848388671875, -4.0740966796875, -3.88970947265625, -3.705322265625, -3.52093505859375, -3.3365478515625, -3.15216064453125, -2.9677734375, -2.78338623046875, -2.5989990234375, -2.41461181640625, -2.230224609375, -2.04583740234375, -1.8614501953125, -1.67706298828125, -1.49267578125, -1.30828857421875, -1.1239013671875, -0.93951416015625, -0.755126953125, -0.57073974609375, -0.3863525390625, -0.20196533203125, -0.017578125, 0.16680908203125, 0.3511962890625, 0.53558349609375, 0.719970703125, 0.90435791015625, 1.0887451171875, 1.27313232421875, 1.45751953125, 1.64190673828125, 1.8262939453125, 2.01068115234375, 2.195068359375, 2.37945556640625, 2.5638427734375, 2.74822998046875, 2.9326171875, 3.11700439453125, 3.3013916015625, 3.48577880859375, 3.670166015625, 3.85455322265625, 4.0389404296875, 4.22332763671875, 4.40771484375, 4.59210205078125, 4.7764892578125, 4.96087646484375, 5.145263671875, 5.32965087890625, 5.5140380859375, 5.69842529296875, 5.8828125]}, "gradients/decoder.transformer.h.11.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 3.0, 1.0, 5.0, 9.0, 12.0, 13.0, 22.0, 37.0, 56.0, 78.0, 133.0, 166.0, 274.0, 442.0, 757.0, 1196.0, 1880.0, 3091.0, 5088.0, 8118.0, 12685.0, 20401.0, 32354.0, 50949.0, 79351.0, 118048.0, 161481.0, 1207399.0, 133833.0, 93194.0, 61506.0, 39002.0, 24408.0, 15393.0, 9727.0, 6051.0, 3796.0, 2284.0, 1446.0, 933.0, 562.0, 365.0, 204.0, 125.0, 97.0, 51.0, 41.0, 27.0, 14.0, 12.0, 6.0, 11.0, 3.0, 3.0, 2.0, 1.0, 1.0], "bins": [-0.71044921875, -0.6896743774414062, -0.6688995361328125, -0.6481246948242188, -0.627349853515625, -0.6065750122070312, -0.5858001708984375, -0.5650253295898438, -0.54425048828125, -0.5234756469726562, -0.5027008056640625, -0.48192596435546875, -0.461151123046875, -0.44037628173828125, -0.4196014404296875, -0.39882659912109375, -0.3780517578125, -0.35727691650390625, -0.3365020751953125, -0.31572723388671875, -0.294952392578125, -0.27417755126953125, -0.2534027099609375, -0.23262786865234375, -0.21185302734375, -0.19107818603515625, -0.1703033447265625, -0.14952850341796875, -0.128753662109375, -0.10797882080078125, -0.0872039794921875, -0.06642913818359375, -0.045654296875, -0.02487945556640625, -0.0041046142578125, 0.01667022705078125, 0.037445068359375, 0.05821990966796875, 0.0789947509765625, 0.09976959228515625, 0.12054443359375, 0.14131927490234375, 0.1620941162109375, 0.18286895751953125, 0.203643798828125, 0.22441864013671875, 0.2451934814453125, 0.26596832275390625, 0.2867431640625, 0.30751800537109375, 0.3282928466796875, 0.34906768798828125, 0.369842529296875, 0.39061737060546875, 0.4113922119140625, 0.43216705322265625, 0.45294189453125, 0.47371673583984375, 0.4944915771484375, 0.5152664184570312, 0.536041259765625, 0.5568161010742188, 0.5775909423828125, 0.5983657836914062, 0.619140625]}, "gradients/decoder.transformer.h.11.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 3.0, 2.0, 1.0, 2.0, 1.0, 4.0, 2.0, 4.0, 4.0, 6.0, 10.0, 14.0, 8.0, 9.0, 13.0, 22.0, 20.0, 25.0, 27.0, 24.0, 34.0, 38.0, 39.0, 39.0, 47.0, 47.0, 46.0, 41.0, 49.0, 46.0, 50.0, 33.0, 41.0, 35.0, 28.0, 31.0, 25.0, 26.0, 24.0, 13.0, 13.0, 11.0, 5.0, 9.0, 11.0, 8.0, 8.0, 5.0, 3.0, 2.0, 2.0, 2.0, 1.0, 1.0, 1.0, 2.0, 2.0], "bins": [-0.0055389404296875, -0.0053809285163879395, -0.005222916603088379, -0.005064904689788818, -0.004906892776489258, -0.004748880863189697, -0.004590868949890137, -0.004432857036590576, -0.004274845123291016, -0.004116833209991455, -0.0039588212966918945, -0.003800809383392334, -0.0036427974700927734, -0.003484785556793213, -0.0033267736434936523, -0.003168761730194092, -0.0030107498168945312, -0.0028527379035949707, -0.00269472599029541, -0.0025367140769958496, -0.002378702163696289, -0.0022206902503967285, -0.002062678337097168, -0.0019046664237976074, -0.0017466545104980469, -0.0015886425971984863, -0.0014306306838989258, -0.0012726187705993652, -0.0011146068572998047, -0.0009565949440002441, -0.0007985830307006836, -0.000640571117401123, -0.0004825592041015625, -0.00032454729080200195, -0.0001665353775024414, -8.52346420288086e-06, 0.0001494884490966797, 0.00030750036239624023, 0.0004655122756958008, 0.0006235241889953613, 0.0007815361022949219, 0.0009395480155944824, 0.001097559928894043, 0.0012555718421936035, 0.001413583755493164, 0.0015715956687927246, 0.0017296075820922852, 0.0018876194953918457, 0.0020456314086914062, 0.002203643321990967, 0.0023616552352905273, 0.002519667148590088, 0.0026776790618896484, 0.002835690975189209, 0.0029937028884887695, 0.00315171480178833, 0.0033097267150878906, 0.003467738628387451, 0.0036257505416870117, 0.0037837624549865723, 0.003941774368286133, 0.004099786281585693, 0.004257798194885254, 0.0044158101081848145, 0.004573822021484375]}, "gradients/decoder.transformer.h.11.crossattention.q_attn.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 3.0, 4.0, 5.0, 6.0, 7.0, 10.0, 6.0, 6.0, 13.0, 16.0, 13.0, 20.0, 28.0, 21.0, 39.0, 44.0, 54.0, 102.0, 116.0, 219.0, 624.0, 13418.0, 974122.0, 57452.0, 1287.0, 290.0, 138.0, 104.0, 74.0, 76.0, 43.0, 32.0, 36.0, 13.0, 28.0, 16.0, 11.0, 20.0, 3.0, 5.0, 12.0, 3.0, 7.0, 4.0, 3.0, 1.0, 4.0, 1.0, 0.0, 3.0, 3.0, 2.0, 0.0, 1.0, 1.0, 1.0], "bins": [-0.116455078125, -0.1126556396484375, -0.108856201171875, -0.1050567626953125, -0.10125732421875, -0.0974578857421875, -0.093658447265625, -0.0898590087890625, -0.0860595703125, -0.0822601318359375, -0.078460693359375, -0.0746612548828125, -0.07086181640625, -0.0670623779296875, -0.063262939453125, -0.0594635009765625, -0.0556640625, -0.0518646240234375, -0.048065185546875, -0.0442657470703125, -0.04046630859375, -0.0366668701171875, -0.032867431640625, -0.0290679931640625, -0.0252685546875, -0.0214691162109375, -0.017669677734375, -0.0138702392578125, -0.01007080078125, -0.0062713623046875, -0.002471923828125, 0.0013275146484375, 0.005126953125, 0.0089263916015625, 0.012725830078125, 0.0165252685546875, 0.02032470703125, 0.0241241455078125, 0.027923583984375, 0.0317230224609375, 0.0355224609375, 0.0393218994140625, 0.043121337890625, 0.0469207763671875, 0.05072021484375, 0.0545196533203125, 0.058319091796875, 0.0621185302734375, 0.06591796875, 0.0697174072265625, 0.073516845703125, 0.0773162841796875, 0.08111572265625, 0.0849151611328125, 0.088714599609375, 0.0925140380859375, 0.0963134765625, 0.1001129150390625, 0.103912353515625, 0.1077117919921875, 0.11151123046875, 0.1153106689453125, 0.119110107421875, 0.1229095458984375, 0.126708984375]}, "gradients/decoder.transformer.h.11.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 3.0, 7.0, 17.0, 33.0, 83.0, 186.0, 279.0, 228.0, 120.0, 32.0, 18.0, 7.0, 1.0, 3.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.0032854918390512466, -0.0029052465688437223, -0.0025250010658055544, -0.00214475579559803, -0.001764510408975184, -0.0013842650223523378, -0.0010040197521448135, -0.0006237742491066456, -0.00024352897889912128, 0.00013671637861989439, 0.0005169617361389101, 0.0008972070645540953, 0.0012774524511769414, 0.0016576978377997875, 0.002037943108007312, 0.0024181886110454798, 0.002798433881253004, 0.0031786791514605284, 0.0035589246544986963, 0.003939169924706221, 0.004319415427744389, 0.004699660465121269, 0.005079905968159437, 0.005460151471197605, 0.005840396508574486, 0.006220642011612654, 0.006600887048989534, 0.006981132552027702, 0.00736137805506587, 0.007741623558104038, 0.008121868595480919, 0.008502114564180374, 0.008882359601557255, 0.009262604638934135, 0.00964285060763359, 0.010023095645010471, 0.010403340682387352, 0.010783586651086807, 0.011163831688463688, 0.011544076725840569, 0.011924322694540024, 0.012304567731916904, 0.01268481370061636, 0.01306505873799324, 0.013445303775370121, 0.013825549744069576, 0.014205794781446457, 0.014586040750145912, 0.014966284856200218, 0.015346529893577099, 0.01572677493095398, 0.016107020899653435, 0.01648726686835289, 0.016867510974407196, 0.01724775694310665, 0.017628002911806107, 0.018008248880505562, 0.018388494849205017, 0.018768738955259323, 0.01914898492395878, 0.019529230892658234, 0.01990947499871254, 0.020289720967411995, 0.02066996693611145, 0.021050211042165756]}, "gradients/decoder.transformer.h.11.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 3.0, 0.0, 3.0, 4.0, 4.0, 5.0, 5.0, 7.0, 18.0, 11.0, 18.0, 24.0, 14.0, 25.0, 27.0, 24.0, 29.0, 43.0, 32.0, 44.0, 48.0, 44.0, 53.0, 45.0, 32.0, 49.0, 39.0, 48.0, 35.0, 42.0, 24.0, 32.0, 30.0, 28.0, 16.0, 24.0, 16.0, 15.0, 7.0, 11.0, 11.0, 7.0, 6.0, 3.0, 4.0, 2.0, 2.0, 0.0, 3.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0], "bins": [-0.003509700298309326, -0.0033970950171351433, -0.0032844897359609604, -0.0031718844547867775, -0.0030592791736125946, -0.0029466738924384117, -0.002834068611264229, -0.002721463330090046, -0.002608858048915863, -0.00249625276774168, -0.0023836474865674973, -0.0022710422053933144, -0.0021584369242191315, -0.0020458316430449486, -0.0019332263618707657, -0.0018206210806965828, -0.0017080157995224, -0.001595410518348217, -0.0014828052371740341, -0.0013701999559998512, -0.0012575946748256683, -0.0011449893936514854, -0.0010323841124773026, -0.0009197788313031197, -0.0008071735501289368, -0.0006945682689547539, -0.000581962987780571, -0.0004693577066063881, -0.0003567524254322052, -0.0002441471442580223, -0.00013154186308383942, -1.8936581909656525e-05, 9.366869926452637e-05, 0.00020627398043870926, 0.00031887926161289215, 0.00043148454278707504, 0.0005440898239612579, 0.0006566951051354408, 0.0007693003863096237, 0.0008819056674838066, 0.0009945109486579895, 0.0011071162298321724, 0.0012197215110063553, 0.0013323267921805382, 0.001444932073354721, 0.001557537354528904, 0.0016701426357030869, 0.0017827479168772697, 0.0018953531980514526, 0.0020079584792256355, 0.0021205637603998184, 0.0022331690415740013, 0.002345774322748184, 0.002458379603922367, 0.00257098488509655, 0.002683590166270733, 0.0027961954474449158, 0.0029088007286190987, 0.0030214060097932816, 0.0031340112909674644, 0.0032466165721416473, 0.0033592218533158302, 0.003471827134490013, 0.003584432415664196, 0.003697037696838379]}, "gradients/decoder.transformer.h.11.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0, 4.0, 1.0, 3.0, 1.0, 0.0, 9.0, 9.0, 5.0, 16.0, 13.0, 13.0, 18.0, 22.0, 23.0, 24.0, 27.0, 30.0, 46.0, 37.0, 35.0, 42.0, 45.0, 44.0, 36.0, 52.0, 38.0, 34.0, 45.0, 41.0, 32.0, 33.0, 20.0, 35.0, 23.0, 28.0, 15.0, 19.0, 12.0, 20.0, 13.0, 12.0, 8.0, 9.0, 5.0, 5.0, 5.0, 1.0, 5.0, 2.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-9.875, -9.5689697265625, -9.262939453125, -8.9569091796875, -8.65087890625, -8.3448486328125, -8.038818359375, -7.7327880859375, -7.4267578125, -7.1207275390625, -6.814697265625, -6.5086669921875, -6.20263671875, -5.8966064453125, -5.590576171875, -5.2845458984375, -4.978515625, -4.6724853515625, -4.366455078125, -4.0604248046875, -3.75439453125, -3.4483642578125, -3.142333984375, -2.8363037109375, -2.5302734375, -2.2242431640625, -1.918212890625, -1.6121826171875, -1.30615234375, -1.0001220703125, -0.694091796875, -0.3880615234375, -0.08203125, 0.2239990234375, 0.530029296875, 0.8360595703125, 1.14208984375, 1.4481201171875, 1.754150390625, 2.0601806640625, 2.3662109375, 2.6722412109375, 2.978271484375, 3.2843017578125, 3.59033203125, 3.8963623046875, 4.202392578125, 4.5084228515625, 4.814453125, 5.1204833984375, 5.426513671875, 5.7325439453125, 6.03857421875, 6.3446044921875, 6.650634765625, 6.9566650390625, 7.2626953125, 7.5687255859375, 7.874755859375, 8.1807861328125, 8.48681640625, 8.7928466796875, 9.098876953125, 9.4049072265625, 9.7109375]}, "gradients/decoder.transformer.h.11.attn.c_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 1.0, 2.0, 2.0, 6.0, 7.0, 8.0, 17.0, 15.0, 19.0, 28.0, 38.0, 71.0, 94.0, 171.0, 358.0, 611.0, 1134.0, 2244.0, 4577.0, 8902.0, 18126.0, 38922.0, 97050.0, 305107.0, 372709.0, 113715.0, 44262.0, 20170.0, 9789.0, 5025.0, 2558.0, 1215.0, 651.0, 362.0, 214.0, 126.0, 72.0, 54.0, 32.0, 22.0, 15.0, 20.0, 15.0, 5.0, 9.0, 6.0, 4.0, 0.0, 3.0, 0.0, 0.0, 2.0, 2.0, 1.0, 0.0, 0.0, 2.0], "bins": [-6.50390625, -6.29461669921875, -6.0853271484375, -5.87603759765625, -5.666748046875, -5.45745849609375, -5.2481689453125, -5.03887939453125, -4.82958984375, -4.62030029296875, -4.4110107421875, -4.20172119140625, -3.992431640625, -3.78314208984375, -3.5738525390625, -3.36456298828125, -3.1552734375, -2.94598388671875, -2.7366943359375, -2.52740478515625, -2.318115234375, -2.10882568359375, -1.8995361328125, -1.69024658203125, -1.48095703125, -1.27166748046875, -1.0623779296875, -0.85308837890625, -0.643798828125, -0.43450927734375, -0.2252197265625, -0.01593017578125, 0.193359375, 0.40264892578125, 0.6119384765625, 0.82122802734375, 1.030517578125, 1.23980712890625, 1.4490966796875, 1.65838623046875, 1.86767578125, 2.07696533203125, 2.2862548828125, 2.49554443359375, 2.704833984375, 2.91412353515625, 3.1234130859375, 3.33270263671875, 3.5419921875, 3.75128173828125, 3.9605712890625, 4.16986083984375, 4.379150390625, 4.58843994140625, 4.7977294921875, 5.00701904296875, 5.21630859375, 5.42559814453125, 5.6348876953125, 5.84417724609375, 6.053466796875, 6.26275634765625, 6.4720458984375, 6.68133544921875, 6.890625]}, "gradients/decoder.transformer.h.11.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 2.0, 3.0, 0.0, 3.0, 3.0, 8.0, 5.0, 9.0, 11.0, 12.0, 9.0, 14.0, 22.0, 28.0, 23.0, 28.0, 35.0, 39.0, 36.0, 52.0, 54.0, 72.0, 265.0, 1669.0, 166.0, 80.0, 66.0, 46.0, 46.0, 46.0, 41.0, 33.0, 27.0, 21.0, 14.0, 12.0, 12.0, 7.0, 12.0, 5.0, 6.0, 8.0, 2.0, 0.0, 3.0, 2.0, 2.0, 3.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-38.8125, -37.61083984375, -36.4091796875, -35.20751953125, -34.005859375, -32.80419921875, -31.6025390625, -30.40087890625, -29.19921875, -27.99755859375, -26.7958984375, -25.59423828125, -24.392578125, -23.19091796875, -21.9892578125, -20.78759765625, -19.5859375, -18.38427734375, -17.1826171875, -15.98095703125, -14.779296875, -13.57763671875, -12.3759765625, -11.17431640625, -9.97265625, -8.77099609375, -7.5693359375, -6.36767578125, -5.166015625, -3.96435546875, -2.7626953125, -1.56103515625, -0.359375, 0.84228515625, 2.0439453125, 3.24560546875, 4.447265625, 5.64892578125, 6.8505859375, 8.05224609375, 9.25390625, 10.45556640625, 11.6572265625, 12.85888671875, 14.060546875, 15.26220703125, 16.4638671875, 17.66552734375, 18.8671875, 20.06884765625, 21.2705078125, 22.47216796875, 23.673828125, 24.87548828125, 26.0771484375, 27.27880859375, 28.48046875, 29.68212890625, 30.8837890625, 32.08544921875, 33.287109375, 34.48876953125, 35.6904296875, 36.89208984375, 38.09375]}, "gradients/decoder.transformer.h.11.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 2.0, 3.0, 2.0, 2.0, 2.0, 1.0, 2.0, 6.0, 5.0, 5.0, 16.0, 9.0, 18.0, 18.0, 24.0, 29.0, 50.0, 60.0, 84.0, 119.0, 128.0, 164.0, 280.0, 480.0, 1462.0, 140676.0, 2997112.0, 3323.0, 555.0, 293.0, 199.0, 143.0, 115.0, 70.0, 56.0, 35.0, 45.0, 22.0, 22.0, 17.0, 14.0, 17.0, 8.0, 7.0, 7.0, 4.0, 1.0, 2.0, 1.0, 3.0, 2.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-88.375, -85.5390625, -82.703125, -79.8671875, -77.03125, -74.1953125, -71.359375, -68.5234375, -65.6875, -62.8515625, -60.015625, -57.1796875, -54.34375, -51.5078125, -48.671875, -45.8359375, -43.0, -40.1640625, -37.328125, -34.4921875, -31.65625, -28.8203125, -25.984375, -23.1484375, -20.3125, -17.4765625, -14.640625, -11.8046875, -8.96875, -6.1328125, -3.296875, -0.4609375, 2.375, 5.2109375, 8.046875, 10.8828125, 13.71875, 16.5546875, 19.390625, 22.2265625, 25.0625, 27.8984375, 30.734375, 33.5703125, 36.40625, 39.2421875, 42.078125, 44.9140625, 47.75, 50.5859375, 53.421875, 56.2578125, 59.09375, 61.9296875, 64.765625, 67.6015625, 70.4375, 73.2734375, 76.109375, 78.9453125, 81.78125, 84.6171875, 87.453125, 90.2890625, 93.125]}, "gradients/decoder.transformer.h.11.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 2.0, 3.0, 7.0, 18.0, 38.0, 58.0, 88.0, 150.0, 167.0, 176.0, 124.0, 82.0, 59.0, 22.0, 10.0, 4.0, 4.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-38.597721099853516, -37.487117767333984, -36.37651443481445, -35.26591110229492, -34.15530776977539, -33.044700622558594, -31.934099197387695, -30.823495864868164, -29.712890625, -28.60228729248047, -27.491683959960938, -26.381080627441406, -25.270475387573242, -24.15987205505371, -23.04926872253418, -21.93866539001465, -20.828062057495117, -19.717458724975586, -18.606855392456055, -17.49625015258789, -16.38564682006836, -15.275043487548828, -14.164440155029297, -13.053836822509766, -11.943232536315918, -10.832629203796387, -9.722024917602539, -8.611421585083008, -7.500817775726318, -6.390213966369629, -5.279610633850098, -4.169006824493408, -3.0584030151367188, -1.9477993249893188, -0.837195634841919, 0.2734079360961914, 1.3840117454528809, 2.4946155548095703, 3.6052188873291016, 4.715822696685791, 5.8264265060424805, 6.93703031539917, 8.04763412475586, 9.15823745727539, 10.268840789794922, 11.37944507598877, 12.4900484085083, 13.600652694702148, 14.71125602722168, 15.821859359741211, 16.932462692260742, 18.043067932128906, 19.153671264648438, 20.26427459716797, 21.3748779296875, 22.48548126220703, 23.596084594726562, 24.706687927246094, 25.817291259765625, 26.927894592285156, 28.03849983215332, 29.14910316467285, 30.259706497192383, 31.370309829711914, 32.48091506958008]}, "gradients/decoder.transformer.h.11.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 2.0, 4.0, 2.0, 5.0, 2.0, 4.0, 5.0, 6.0, 6.0, 23.0, 18.0, 11.0, 22.0, 18.0, 20.0, 32.0, 29.0, 31.0, 39.0, 35.0, 26.0, 38.0, 44.0, 46.0, 35.0, 44.0, 33.0, 42.0, 49.0, 38.0, 37.0, 32.0, 33.0, 25.0, 27.0, 37.0, 24.0, 14.0, 16.0, 10.0, 9.0, 10.0, 6.0, 7.0, 4.0, 2.0, 1.0, 3.0, 4.0, 1.0, 1.0, 1.0, 2.0, 0.0, 1.0, 1.0, 2.0], "bins": [-83.06470489501953, -80.42549133300781, -77.7862777709961, -75.14705657958984, -72.50784301757812, -69.8686294555664, -67.22941589355469, -64.59020233154297, -61.950984954833984, -59.311771392822266, -56.67255401611328, -54.03334045410156, -51.394126892089844, -48.75490951538086, -46.11569595336914, -43.476478576660156, -40.83726501464844, -38.19805145263672, -35.558834075927734, -32.919620513916016, -30.280405044555664, -27.641189575195312, -25.001976013183594, -22.362760543823242, -19.72354507446289, -17.08432960510254, -14.445115089416504, -11.805900573730469, -9.166685104370117, -6.527469635009766, -3.8882551193237305, -1.2490406036376953, 1.3901824951171875, 4.029397487640381, 6.668612480163574, 9.30782699584961, 11.947042465209961, 14.586257934570312, 17.22547149658203, 19.864686965942383, 22.503902435302734, 25.143117904663086, 27.782333374023438, 30.421546936035156, 33.060760498046875, 35.69997787475586, 38.33919143676758, 40.97840881347656, 43.61762237548828, 46.2568359375, 48.896053314208984, 51.5352668762207, 54.17448425292969, 56.813697814941406, 59.452911376953125, 62.092124938964844, 64.73133850097656, 67.37055206298828, 70.009765625, 72.64898681640625, 75.28820037841797, 77.92741394042969, 80.5666275024414, 83.20584106445312, 85.84506225585938]}, "gradients/decoder.transformer.h.10.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 3.0, 1.0, 1.0, 0.0, 3.0, 0.0, 4.0, 2.0, 7.0, 3.0, 12.0, 8.0, 11.0, 13.0, 20.0, 21.0, 21.0, 14.0, 26.0, 25.0, 44.0, 35.0, 35.0, 40.0, 48.0, 43.0, 39.0, 43.0, 39.0, 31.0, 38.0, 40.0, 44.0, 34.0, 28.0, 30.0, 32.0, 26.0, 20.0, 18.0, 15.0, 12.0, 18.0, 20.0, 11.0, 6.0, 9.0, 10.0, 2.0, 4.0, 1.0, 3.0, 1.0, 4.0, 2.0, 0.0, 0.0, 2.0, 0.0, 1.0], "bins": [-9.9921875, -9.6815185546875, -9.370849609375, -9.0601806640625, -8.74951171875, -8.4388427734375, -8.128173828125, -7.8175048828125, -7.5068359375, -7.1961669921875, -6.885498046875, -6.5748291015625, -6.26416015625, -5.9534912109375, -5.642822265625, -5.3321533203125, -5.021484375, -4.7108154296875, -4.400146484375, -4.0894775390625, -3.77880859375, -3.4681396484375, -3.157470703125, -2.8468017578125, -2.5361328125, -2.2254638671875, -1.914794921875, -1.6041259765625, -1.29345703125, -0.9827880859375, -0.672119140625, -0.3614501953125, -0.05078125, 0.2598876953125, 0.570556640625, 0.8812255859375, 1.19189453125, 1.5025634765625, 1.813232421875, 2.1239013671875, 2.4345703125, 2.7452392578125, 3.055908203125, 3.3665771484375, 3.67724609375, 3.9879150390625, 4.298583984375, 4.6092529296875, 4.919921875, 5.2305908203125, 5.541259765625, 5.8519287109375, 6.16259765625, 6.4732666015625, 6.783935546875, 7.0946044921875, 7.4052734375, 7.7159423828125, 8.026611328125, 8.3372802734375, 8.64794921875, 8.9586181640625, 9.269287109375, 9.5799560546875, 9.890625]}, "gradients/decoder.transformer.h.10.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 4.0, 3.0, 2.0, 6.0, 3.0, 4.0, 14.0, 12.0, 9.0, 18.0, 23.0, 17.0, 16.0, 33.0, 37.0, 45.0, 63.0, 179.0, 350.0, 1477.0, 8524.0, 85086.0, 1813026.0, 2167839.0, 105554.0, 9493.0, 1554.0, 403.0, 166.0, 65.0, 39.0, 46.0, 35.0, 23.0, 23.0, 19.0, 16.0, 20.0, 16.0, 9.0, 3.0, 3.0, 5.0, 7.0, 2.0, 1.0, 3.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-31.484375, -30.501708984375, -29.51904296875, -28.536376953125, -27.5537109375, -26.571044921875, -25.58837890625, -24.605712890625, -23.623046875, -22.640380859375, -21.65771484375, -20.675048828125, -19.6923828125, -18.709716796875, -17.72705078125, -16.744384765625, -15.76171875, -14.779052734375, -13.79638671875, -12.813720703125, -11.8310546875, -10.848388671875, -9.86572265625, -8.883056640625, -7.900390625, -6.917724609375, -5.93505859375, -4.952392578125, -3.9697265625, -2.987060546875, -2.00439453125, -1.021728515625, -0.0390625, 0.943603515625, 1.92626953125, 2.908935546875, 3.8916015625, 4.874267578125, 5.85693359375, 6.839599609375, 7.822265625, 8.804931640625, 9.78759765625, 10.770263671875, 11.7529296875, 12.735595703125, 13.71826171875, 14.700927734375, 15.68359375, 16.666259765625, 17.64892578125, 18.631591796875, 19.6142578125, 20.596923828125, 21.57958984375, 22.562255859375, 23.544921875, 24.527587890625, 25.51025390625, 26.492919921875, 27.4755859375, 28.458251953125, 29.44091796875, 30.423583984375, 31.40625]}, "gradients/decoder.transformer.h.10.mlp.c_fc.bias": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 3.0, 4.0, 2.0, 3.0, 4.0, 7.0, 2.0, 5.0, 9.0, 10.0, 11.0, 29.0, 32.0, 41.0, 49.0, 82.0, 82.0, 131.0, 127.0, 222.0, 264.0, 316.0, 408.0, 415.0, 412.0, 308.0, 262.0, 205.0, 153.0, 96.0, 92.0, 57.0, 54.0, 50.0, 30.0, 15.0, 21.0, 16.0, 11.0, 11.0, 10.0, 4.0, 4.0, 8.0, 3.0, 1.0, 3.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-15.3203125, -14.8013916015625, -14.282470703125, -13.7635498046875, -13.24462890625, -12.7257080078125, -12.206787109375, -11.6878662109375, -11.1689453125, -10.6500244140625, -10.131103515625, -9.6121826171875, -9.09326171875, -8.5743408203125, -8.055419921875, -7.5364990234375, -7.017578125, -6.4986572265625, -5.979736328125, -5.4608154296875, -4.94189453125, -4.4229736328125, -3.904052734375, -3.3851318359375, -2.8662109375, -2.3472900390625, -1.828369140625, -1.3094482421875, -0.79052734375, -0.2716064453125, 0.247314453125, 0.7662353515625, 1.28515625, 1.8040771484375, 2.322998046875, 2.8419189453125, 3.36083984375, 3.8797607421875, 4.398681640625, 4.9176025390625, 5.4365234375, 5.9554443359375, 6.474365234375, 6.9932861328125, 7.51220703125, 8.0311279296875, 8.550048828125, 9.0689697265625, 9.587890625, 10.1068115234375, 10.625732421875, 11.1446533203125, 11.66357421875, 12.1824951171875, 12.701416015625, 13.2203369140625, 13.7392578125, 14.2581787109375, 14.777099609375, 15.2960205078125, 15.81494140625, 16.3338623046875, 16.852783203125, 17.3717041015625, 17.890625]}, "gradients/decoder.transformer.h.10.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 2.0, 1.0, 2.0, 1.0, 8.0, 5.0, 10.0, 9.0, 17.0, 17.0, 20.0, 28.0, 36.0, 48.0, 65.0, 87.0, 103.0, 120.0, 174.0, 212.0, 298.0, 509.0, 2326.0, 221669.0, 3934266.0, 31538.0, 1105.0, 433.0, 291.0, 191.0, 173.0, 134.0, 103.0, 67.0, 61.0, 46.0, 30.0, 22.0, 23.0, 12.0, 6.0, 7.0, 5.0, 3.0, 6.0, 1.0, 2.0, 3.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-90.3125, -87.681640625, -85.05078125, -82.419921875, -79.7890625, -77.158203125, -74.52734375, -71.896484375, -69.265625, -66.634765625, -64.00390625, -61.373046875, -58.7421875, -56.111328125, -53.48046875, -50.849609375, -48.21875, -45.587890625, -42.95703125, -40.326171875, -37.6953125, -35.064453125, -32.43359375, -29.802734375, -27.171875, -24.541015625, -21.91015625, -19.279296875, -16.6484375, -14.017578125, -11.38671875, -8.755859375, -6.125, -3.494140625, -0.86328125, 1.767578125, 4.3984375, 7.029296875, 9.66015625, 12.291015625, 14.921875, 17.552734375, 20.18359375, 22.814453125, 25.4453125, 28.076171875, 30.70703125, 33.337890625, 35.96875, 38.599609375, 41.23046875, 43.861328125, 46.4921875, 49.123046875, 51.75390625, 54.384765625, 57.015625, 59.646484375, 62.27734375, 64.908203125, 67.5390625, 70.169921875, 72.80078125, 75.431640625, 78.0625]}, "gradients/decoder.transformer.h.10.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 19.0, 61.0, 169.0, 291.0, 258.0, 152.0, 54.0, 11.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-269.2635803222656, -262.8293762207031, -256.3951721191406, -249.9609832763672, -243.52679443359375, -237.09259033203125, -230.65838623046875, -224.2241973876953, -217.79000854492188, -211.35580444335938, -204.92161560058594, -198.48741149902344, -192.05322265625, -185.6190185546875, -179.184814453125, -172.75062561035156, -166.31642150878906, -159.88221740722656, -153.44802856445312, -147.01382446289062, -140.5796356201172, -134.1454315185547, -127.71123504638672, -121.27703857421875, -114.84284210205078, -108.40864562988281, -101.97444915771484, -95.54025268554688, -89.10604858398438, -82.67185974121094, -76.23765563964844, -69.80345916748047, -63.3692626953125, -56.93506622314453, -50.50086975097656, -44.06666946411133, -37.63247299194336, -31.19827651977539, -24.764076232910156, -18.329879760742188, -11.895683288574219, -5.461485862731934, 0.9727115631103516, 7.406909942626953, 13.841106414794922, 20.27530288696289, 26.709503173828125, 33.143699645996094, 39.57789611816406, 46.01209259033203, 52.4462890625, 58.880489349365234, 65.31468200683594, 71.74888610839844, 78.1830825805664, 84.61727905273438, 91.05147552490234, 97.48567199707031, 103.91986846923828, 110.35406494140625, 116.78826904296875, 123.22245788574219, 129.6566619873047, 136.09085083007812, 142.52505493164062]}, "gradients/decoder.transformer.h.10.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 2.0, 7.0, 2.0, 1.0, 7.0, 4.0, 7.0, 11.0, 10.0, 8.0, 12.0, 17.0, 17.0, 14.0, 19.0, 25.0, 26.0, 24.0, 35.0, 36.0, 21.0, 42.0, 36.0, 50.0, 42.0, 31.0, 43.0, 35.0, 38.0, 48.0, 36.0, 28.0, 40.0, 39.0, 28.0, 20.0, 21.0, 21.0, 17.0, 24.0, 8.0, 14.0, 11.0, 8.0, 9.0, 4.0, 6.0, 4.0, 1.0, 2.0, 2.0, 3.0, 1.0, 2.0], "bins": [-65.9259033203125, -64.07484436035156, -62.22378158569336, -60.372718811035156, -58.52165985107422, -56.670597076416016, -54.81953430175781, -52.968475341796875, -51.11741256713867, -49.26634979248047, -47.41529083251953, -45.56422805786133, -43.713165283203125, -41.86210632324219, -40.011043548583984, -38.15998077392578, -36.308921813964844, -34.45785903930664, -32.6068000793457, -30.7557373046875, -28.90467643737793, -27.05361557006836, -25.202552795410156, -23.351491928100586, -21.500431060791016, -19.649370193481445, -17.798309326171875, -15.947246551513672, -14.096185684204102, -12.245124816894531, -10.394062995910645, -8.543001174926758, -6.6919403076171875, -4.840878963470459, -2.9898176193237305, -1.138756275177002, 0.7123050689697266, 2.563365936279297, 4.414427757263184, 6.26548957824707, 8.11655044555664, 9.967611312866211, 11.818673133850098, 13.669734954833984, 15.520795822143555, 17.371856689453125, 19.222919464111328, 21.0739803314209, 22.92504119873047, 24.77610206604004, 26.62716293334961, 28.478225708007812, 30.329286575317383, 32.18034744262695, 34.031410217285156, 35.882469177246094, 37.7335319519043, 39.5845947265625, 41.43565368652344, 43.28671646118164, 45.137779235839844, 46.98883819580078, 48.839900970458984, 50.69096374511719, 52.542022705078125]}, "gradients/decoder.transformer.h.10.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 3.0, 3.0, 7.0, 1.0, 2.0, 5.0, 2.0, 12.0, 20.0, 11.0, 11.0, 23.0, 19.0, 24.0, 26.0, 23.0, 35.0, 29.0, 33.0, 41.0, 40.0, 39.0, 47.0, 51.0, 47.0, 43.0, 30.0, 38.0, 39.0, 36.0, 29.0, 32.0, 28.0, 28.0, 36.0, 13.0, 20.0, 15.0, 8.0, 12.0, 11.0, 13.0, 7.0, 6.0, 5.0, 2.0, 4.0, 2.0, 3.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-9.59375, -9.287353515625, -8.98095703125, -8.674560546875, -8.3681640625, -8.061767578125, -7.75537109375, -7.448974609375, -7.142578125, -6.836181640625, -6.52978515625, -6.223388671875, -5.9169921875, -5.610595703125, -5.30419921875, -4.997802734375, -4.69140625, -4.385009765625, -4.07861328125, -3.772216796875, -3.4658203125, -3.159423828125, -2.85302734375, -2.546630859375, -2.240234375, -1.933837890625, -1.62744140625, -1.321044921875, -1.0146484375, -0.708251953125, -0.40185546875, -0.095458984375, 0.2109375, 0.517333984375, 0.82373046875, 1.130126953125, 1.4365234375, 1.742919921875, 2.04931640625, 2.355712890625, 2.662109375, 2.968505859375, 3.27490234375, 3.581298828125, 3.8876953125, 4.194091796875, 4.50048828125, 4.806884765625, 5.11328125, 5.419677734375, 5.72607421875, 6.032470703125, 6.3388671875, 6.645263671875, 6.95166015625, 7.258056640625, 7.564453125, 7.870849609375, 8.17724609375, 8.483642578125, 8.7900390625, 9.096435546875, 9.40283203125, 9.709228515625, 10.015625]}, "gradients/decoder.transformer.h.10.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 4.0, 1.0, 9.0, 5.0, 4.0, 11.0, 22.0, 27.0, 26.0, 37.0, 63.0, 92.0, 173.0, 289.0, 444.0, 610.0, 915.0, 1467.0, 2351.0, 3712.0, 5765.0, 9369.0, 14953.0, 24527.0, 40062.0, 65818.0, 107544.0, 164162.0, 195680.0, 153577.0, 98895.0, 61154.0, 37009.0, 22448.0, 13802.0, 8753.0, 5473.0, 3283.0, 2106.0, 1402.0, 899.0, 545.0, 371.0, 239.0, 163.0, 97.0, 74.0, 40.0, 39.0, 12.0, 14.0, 9.0, 9.0, 8.0, 3.0, 1.0, 4.0, 3.0], "bins": [-0.98095703125, -0.95245361328125, -0.9239501953125, -0.89544677734375, -0.866943359375, -0.83843994140625, -0.8099365234375, -0.78143310546875, -0.7529296875, -0.72442626953125, -0.6959228515625, -0.66741943359375, -0.638916015625, -0.61041259765625, -0.5819091796875, -0.55340576171875, -0.52490234375, -0.49639892578125, -0.4678955078125, -0.43939208984375, -0.410888671875, -0.38238525390625, -0.3538818359375, -0.32537841796875, -0.296875, -0.26837158203125, -0.2398681640625, -0.21136474609375, -0.182861328125, -0.15435791015625, -0.1258544921875, -0.09735107421875, -0.06884765625, -0.04034423828125, -0.0118408203125, 0.01666259765625, 0.045166015625, 0.07366943359375, 0.1021728515625, 0.13067626953125, 0.1591796875, 0.18768310546875, 0.2161865234375, 0.24468994140625, 0.273193359375, 0.30169677734375, 0.3302001953125, 0.35870361328125, 0.38720703125, 0.41571044921875, 0.4442138671875, 0.47271728515625, 0.501220703125, 0.52972412109375, 0.5582275390625, 0.58673095703125, 0.615234375, 0.64373779296875, 0.6722412109375, 0.70074462890625, 0.729248046875, 0.75775146484375, 0.7862548828125, 0.81475830078125, 0.84326171875]}, "gradients/decoder.transformer.h.10.crossattention.c_attn.bias": {"_type": "histogram", "values": [2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 2.0, 0.0, 5.0, 10.0, 4.0, 10.0, 14.0, 15.0, 8.0, 11.0, 13.0, 17.0, 12.0, 19.0, 31.0, 34.0, 28.0, 45.0, 40.0, 38.0, 42.0, 39.0, 34.0, 35.0, 1059.0, 43.0, 33.0, 38.0, 34.0, 34.0, 34.0, 37.0, 30.0, 23.0, 20.0, 19.0, 20.0, 15.0, 13.0, 16.0, 12.0, 10.0, 12.0, 8.0, 4.0, 4.0, 3.0, 3.0, 1.0, 3.0, 1.0, 2.0, 1.0, 1.0, 1.0], "bins": [-6.0703125, -5.88861083984375, -5.7069091796875, -5.52520751953125, -5.343505859375, -5.16180419921875, -4.9801025390625, -4.79840087890625, -4.61669921875, -4.43499755859375, -4.2532958984375, -4.07159423828125, -3.889892578125, -3.70819091796875, -3.5264892578125, -3.34478759765625, -3.1630859375, -2.98138427734375, -2.7996826171875, -2.61798095703125, -2.436279296875, -2.25457763671875, -2.0728759765625, -1.89117431640625, -1.70947265625, -1.52777099609375, -1.3460693359375, -1.16436767578125, -0.982666015625, -0.80096435546875, -0.6192626953125, -0.43756103515625, -0.255859375, -0.07415771484375, 0.1075439453125, 0.28924560546875, 0.470947265625, 0.65264892578125, 0.8343505859375, 1.01605224609375, 1.19775390625, 1.37945556640625, 1.5611572265625, 1.74285888671875, 1.924560546875, 2.10626220703125, 2.2879638671875, 2.46966552734375, 2.6513671875, 2.83306884765625, 3.0147705078125, 3.19647216796875, 3.378173828125, 3.55987548828125, 3.7415771484375, 3.92327880859375, 4.10498046875, 4.28668212890625, 4.4683837890625, 4.65008544921875, 4.831787109375, 5.01348876953125, 5.1951904296875, 5.37689208984375, 5.55859375]}, "gradients/decoder.transformer.h.10.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 4.0, 1.0, 4.0, 5.0, 2.0, 17.0, 16.0, 20.0, 34.0, 45.0, 73.0, 98.0, 140.0, 243.0, 331.0, 500.0, 767.0, 1175.0, 1846.0, 2632.0, 4227.0, 6625.0, 10201.0, 15843.0, 25089.0, 39552.0, 62081.0, 94810.0, 135367.0, 1206662.0, 159363.0, 114195.0, 77685.0, 49630.0, 31489.0, 19927.0, 12939.0, 8187.0, 5334.0, 3429.0, 2257.0, 1511.0, 977.0, 622.0, 395.0, 239.0, 203.0, 119.0, 83.0, 55.0, 22.0, 31.0, 17.0, 16.0, 3.0, 4.0, 3.0, 3.0, 1.0, 2.0], "bins": [-0.685546875, -0.6652374267578125, -0.644927978515625, -0.6246185302734375, -0.60430908203125, -0.5839996337890625, -0.563690185546875, -0.5433807373046875, -0.5230712890625, -0.5027618408203125, -0.482452392578125, -0.4621429443359375, -0.44183349609375, -0.4215240478515625, -0.401214599609375, -0.3809051513671875, -0.360595703125, -0.3402862548828125, -0.319976806640625, -0.2996673583984375, -0.27935791015625, -0.2590484619140625, -0.238739013671875, -0.2184295654296875, -0.1981201171875, -0.1778106689453125, -0.157501220703125, -0.1371917724609375, -0.11688232421875, -0.0965728759765625, -0.076263427734375, -0.0559539794921875, -0.03564453125, -0.0153350830078125, 0.004974365234375, 0.0252838134765625, 0.04559326171875, 0.0659027099609375, 0.086212158203125, 0.1065216064453125, 0.1268310546875, 0.1471405029296875, 0.167449951171875, 0.1877593994140625, 0.20806884765625, 0.2283782958984375, 0.248687744140625, 0.2689971923828125, 0.289306640625, 0.3096160888671875, 0.329925537109375, 0.3502349853515625, 0.37054443359375, 0.3908538818359375, 0.411163330078125, 0.4314727783203125, 0.4517822265625, 0.4720916748046875, 0.492401123046875, 0.5127105712890625, 0.53302001953125, 0.5533294677734375, 0.573638916015625, 0.5939483642578125, 0.6142578125]}, "gradients/decoder.transformer.h.10.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 3.0, 2.0, 0.0, 0.0, 1.0, 1.0, 4.0, 2.0, 2.0, 5.0, 8.0, 3.0, 7.0, 11.0, 15.0, 17.0, 16.0, 25.0, 28.0, 43.0, 38.0, 48.0, 75.0, 72.0, 94.0, 88.0, 69.0, 78.0, 59.0, 50.0, 30.0, 23.0, 23.0, 16.0, 13.0, 10.0, 4.0, 6.0, 6.0, 6.0, 3.0, 4.0, 2.0, 2.0, 2.0, 0.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00814056396484375, -0.00788867473602295, -0.0076367855072021484, -0.007384896278381348, -0.007133007049560547, -0.006881117820739746, -0.006629228591918945, -0.0063773393630981445, -0.006125450134277344, -0.005873560905456543, -0.005621671676635742, -0.005369782447814941, -0.005117893218994141, -0.00486600399017334, -0.004614114761352539, -0.004362225532531738, -0.0041103363037109375, -0.0038584470748901367, -0.003606557846069336, -0.003354668617248535, -0.0031027793884277344, -0.0028508901596069336, -0.002599000930786133, -0.002347111701965332, -0.0020952224731445312, -0.0018433332443237305, -0.0015914440155029297, -0.001339554786682129, -0.0010876655578613281, -0.0008357763290405273, -0.0005838871002197266, -0.0003319978713989258, -8.0108642578125e-05, 0.00017178058624267578, 0.00042366981506347656, 0.0006755590438842773, 0.0009274482727050781, 0.001179337501525879, 0.0014312267303466797, 0.0016831159591674805, 0.0019350051879882812, 0.002186894416809082, 0.002438783645629883, 0.0026906728744506836, 0.0029425621032714844, 0.003194451332092285, 0.003446340560913086, 0.0036982297897338867, 0.0039501190185546875, 0.004202008247375488, 0.004453897476196289, 0.00470578670501709, 0.004957675933837891, 0.005209565162658691, 0.005461454391479492, 0.005713343620300293, 0.005965232849121094, 0.0062171220779418945, 0.006469011306762695, 0.006720900535583496, 0.006972789764404297, 0.007224678993225098, 0.0074765682220458984, 0.007728457450866699, 0.0079803466796875]}, "gradients/decoder.transformer.h.10.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 3.0, 0.0, 1.0, 0.0, 1.0, 2.0, 2.0, 4.0, 3.0, 5.0, 5.0, 6.0, 4.0, 8.0, 9.0, 16.0, 25.0, 27.0, 25.0, 40.0, 64.0, 100.0, 164.0, 369.0, 4519.0, 1029888.0, 12130.0, 500.0, 188.0, 116.0, 78.0, 61.0, 48.0, 27.0, 23.0, 22.0, 19.0, 11.0, 14.0, 9.0, 7.0, 4.0, 2.0, 7.0, 2.0, 3.0, 0.0, 3.0, 0.0, 2.0, 3.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.17041015625, -0.16501426696777344, -0.15961837768554688, -0.1542224884033203, -0.14882659912109375, -0.1434307098388672, -0.13803482055664062, -0.13263893127441406, -0.1272430419921875, -0.12184715270996094, -0.11645126342773438, -0.11105537414550781, -0.10565948486328125, -0.10026359558105469, -0.09486770629882812, -0.08947181701660156, -0.084075927734375, -0.07868003845214844, -0.07328414916992188, -0.06788825988769531, -0.06249237060546875, -0.05709648132324219, -0.051700592041015625, -0.04630470275878906, -0.0409088134765625, -0.03551292419433594, -0.030117034912109375, -0.024721145629882812, -0.01932525634765625, -0.013929367065429688, -0.008533477783203125, -0.0031375885009765625, 0.00225830078125, 0.0076541900634765625, 0.013050079345703125, 0.018445968627929688, 0.02384185791015625, 0.029237747192382812, 0.034633636474609375, 0.04002952575683594, 0.0454254150390625, 0.05082130432128906, 0.056217193603515625, 0.06161308288574219, 0.06700897216796875, 0.07240486145019531, 0.07780075073242188, 0.08319664001464844, 0.088592529296875, 0.09398841857910156, 0.09938430786132812, 0.10478019714355469, 0.11017608642578125, 0.11557197570800781, 0.12096786499023438, 0.12636375427246094, 0.1317596435546875, 0.13715553283691406, 0.14255142211914062, 0.1479473114013672, 0.15334320068359375, 0.1587390899658203, 0.16413497924804688, 0.16953086853027344, 0.1749267578125]}, "gradients/decoder.transformer.h.10.ln_cross_attn.weight": {"_type": "histogram", "values": [7.0, 391.0, 608.0, 12.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0034315327648073435, -0.001782377134077251, -0.00013322150334715843, 0.0015159340109676123, 0.0031650897581130266, 0.004814245738089085, 0.006463401019573212, 0.008112557232379913, 0.00976171251386404, 0.011410867795348167, 0.013060024008154869, 0.014709179289638996, 0.016358334571123123, 0.018007490783929825, 0.019656646996736526, 0.021305803209543228, 0.02295495755970478, 0.024604113772511482, 0.026253268122673035, 0.027902424335479736, 0.029551580548286438, 0.03120073676109314, 0.03284989297389984, 0.03449904918670654, 0.036148201674222946, 0.03779735788702965, 0.03944651409983635, 0.04109566658735275, 0.042744822800159454, 0.044393979012966156, 0.04604313522577286, 0.04769229143857956, 0.04934144392609596, 0.050990600138902664, 0.052639756351709366, 0.05428890883922577, 0.05593806505203247, 0.05758722126483917, 0.059236377477645874, 0.060885533690452576, 0.06253468990325928, 0.06418384611606598, 0.06583300232887268, 0.06748215854167938, 0.06913131475448608, 0.07078047096729279, 0.07242962718009949, 0.07407878339290619, 0.0757279321551323, 0.077377088367939, 0.0790262445807457, 0.0806754007935524, 0.0823245570063591, 0.0839737132191658, 0.0856228619813919, 0.08727201819419861, 0.08892118185758591, 0.09057033807039261, 0.09221949428319931, 0.09386865049600601, 0.09551780670881271, 0.09716696292161942, 0.09881611168384552, 0.10046526789665222, 0.10211442410945892]}, "gradients/decoder.transformer.h.10.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 3.0, 4.0, 8.0, 9.0, 6.0, 15.0, 11.0, 15.0, 21.0, 22.0, 19.0, 24.0, 21.0, 26.0, 32.0, 40.0, 47.0, 36.0, 41.0, 40.0, 39.0, 46.0, 49.0, 40.0, 45.0, 45.0, 31.0, 33.0, 21.0, 33.0, 32.0, 20.0, 22.0, 22.0, 20.0, 17.0, 11.0, 13.0, 7.0, 4.0, 9.0, 9.0, 5.0, 1.0, 2.0, 0.0, 1.0, 1.0, 1.0, 1.0], "bins": [-0.004518568515777588, -0.004389395006000996, -0.004260221496224403, -0.004131047986447811, -0.004001874476671219, -0.0038727009668946266, -0.0037435274571180344, -0.003614353947341442, -0.00348518043756485, -0.0033560069277882576, -0.0032268334180116653, -0.003097659908235073, -0.002968486398458481, -0.0028393128886818886, -0.0027101393789052963, -0.002580965869128704, -0.002451792359352112, -0.0023226188495755196, -0.0021934453397989273, -0.002064271830022335, -0.0019350983202457428, -0.0018059248104691505, -0.0016767513006925583, -0.001547577790915966, -0.0014184042811393738, -0.0012892307713627815, -0.0011600572615861893, -0.001030883751809597, -0.0009017102420330048, -0.0007725367322564125, -0.0006433632224798203, -0.000514189712703228, -0.00038501620292663574, -0.0002558426931500435, -0.00012666918337345123, 2.5043264031410217e-06, 0.00013167783617973328, 0.00026085134595632553, 0.0003900248557329178, 0.00051919836550951, 0.0006483718752861023, 0.0007775453850626945, 0.0009067188948392868, 0.001035892404615879, 0.0011650659143924713, 0.0012942394241690636, 0.0014234129339456558, 0.001552586443722248, 0.0016817599534988403, 0.0018109334632754326, 0.0019401069730520248, 0.002069280482828617, 0.0021984539926052094, 0.0023276275023818016, 0.002456801012158394, 0.002585974521934986, 0.0027151480317115784, 0.0028443215414881706, 0.002973495051264763, 0.003102668561041355, 0.0032318420708179474, 0.0033610155805945396, 0.003490189090371132, 0.003619362600147724, 0.0037485361099243164]}, "gradients/decoder.transformer.h.10.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 3.0, 3.0, 7.0, 1.0, 2.0, 5.0, 2.0, 12.0, 20.0, 11.0, 11.0, 23.0, 19.0, 24.0, 26.0, 23.0, 35.0, 29.0, 33.0, 41.0, 40.0, 39.0, 47.0, 51.0, 47.0, 42.0, 31.0, 38.0, 39.0, 36.0, 29.0, 32.0, 28.0, 28.0, 36.0, 13.0, 20.0, 15.0, 8.0, 12.0, 11.0, 13.0, 7.0, 6.0, 5.0, 2.0, 4.0, 2.0, 3.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-9.59375, -9.287353515625, -8.98095703125, -8.674560546875, -8.3681640625, -8.061767578125, -7.75537109375, -7.448974609375, -7.142578125, -6.836181640625, -6.52978515625, -6.223388671875, -5.9169921875, -5.610595703125, -5.30419921875, -4.997802734375, -4.69140625, -4.385009765625, -4.07861328125, -3.772216796875, -3.4658203125, -3.159423828125, -2.85302734375, -2.546630859375, -2.240234375, -1.933837890625, -1.62744140625, -1.321044921875, -1.0146484375, -0.708251953125, -0.40185546875, -0.095458984375, 0.2109375, 0.517333984375, 0.82373046875, 1.130126953125, 1.4365234375, 1.742919921875, 2.04931640625, 2.355712890625, 2.662109375, 2.968505859375, 3.27490234375, 3.581298828125, 3.8876953125, 4.194091796875, 4.50048828125, 4.806884765625, 5.11328125, 5.419677734375, 5.72607421875, 6.032470703125, 6.3388671875, 6.645263671875, 6.95166015625, 7.258056640625, 7.564453125, 7.870849609375, 8.17724609375, 8.483642578125, 8.7900390625, 9.096435546875, 9.40283203125, 9.709228515625, 10.015625]}, "gradients/decoder.transformer.h.10.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 6.0, 5.0, 5.0, 4.0, 7.0, 8.0, 13.0, 15.0, 25.0, 32.0, 46.0, 51.0, 73.0, 136.0, 150.0, 240.0, 375.0, 670.0, 1417.0, 3204.0, 8004.0, 20953.0, 58814.0, 192869.0, 507883.0, 168509.0, 52836.0, 18757.0, 7298.0, 2982.0, 1361.0, 676.0, 387.0, 233.0, 132.0, 101.0, 65.0, 61.0, 39.0, 33.0, 19.0, 22.0, 16.0, 7.0, 5.0, 8.0, 5.0, 3.0, 2.0, 3.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0], "bins": [-10.265625, -9.939208984375, -9.61279296875, -9.286376953125, -8.9599609375, -8.633544921875, -8.30712890625, -7.980712890625, -7.654296875, -7.327880859375, -7.00146484375, -6.675048828125, -6.3486328125, -6.022216796875, -5.69580078125, -5.369384765625, -5.04296875, -4.716552734375, -4.39013671875, -4.063720703125, -3.7373046875, -3.410888671875, -3.08447265625, -2.758056640625, -2.431640625, -2.105224609375, -1.77880859375, -1.452392578125, -1.1259765625, -0.799560546875, -0.47314453125, -0.146728515625, 0.1796875, 0.506103515625, 0.83251953125, 1.158935546875, 1.4853515625, 1.811767578125, 2.13818359375, 2.464599609375, 2.791015625, 3.117431640625, 3.44384765625, 3.770263671875, 4.0966796875, 4.423095703125, 4.74951171875, 5.075927734375, 5.40234375, 5.728759765625, 6.05517578125, 6.381591796875, 6.7080078125, 7.034423828125, 7.36083984375, 7.687255859375, 8.013671875, 8.340087890625, 8.66650390625, 8.992919921875, 9.3193359375, 9.645751953125, 9.97216796875, 10.298583984375, 10.625]}, "gradients/decoder.transformer.h.10.attn.c_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 6.0, 1.0, 2.0, 3.0, 8.0, 4.0, 2.0, 7.0, 8.0, 8.0, 11.0, 9.0, 14.0, 16.0, 16.0, 32.0, 25.0, 28.0, 50.0, 39.0, 56.0, 50.0, 54.0, 90.0, 308.0, 1585.0, 162.0, 58.0, 57.0, 46.0, 43.0, 41.0, 31.0, 25.0, 21.0, 23.0, 24.0, 25.0, 16.0, 11.0, 10.0, 7.0, 6.0, 4.0, 8.0, 4.0, 3.0, 4.0, 3.0, 0.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-27.703125, -26.754150390625, -25.80517578125, -24.856201171875, -23.9072265625, -22.958251953125, -22.00927734375, -21.060302734375, -20.111328125, -19.162353515625, -18.21337890625, -17.264404296875, -16.3154296875, -15.366455078125, -14.41748046875, -13.468505859375, -12.51953125, -11.570556640625, -10.62158203125, -9.672607421875, -8.7236328125, -7.774658203125, -6.82568359375, -5.876708984375, -4.927734375, -3.978759765625, -3.02978515625, -2.080810546875, -1.1318359375, -0.182861328125, 0.76611328125, 1.715087890625, 2.6640625, 3.613037109375, 4.56201171875, 5.510986328125, 6.4599609375, 7.408935546875, 8.35791015625, 9.306884765625, 10.255859375, 11.204833984375, 12.15380859375, 13.102783203125, 14.0517578125, 15.000732421875, 15.94970703125, 16.898681640625, 17.84765625, 18.796630859375, 19.74560546875, 20.694580078125, 21.6435546875, 22.592529296875, 23.54150390625, 24.490478515625, 25.439453125, 26.388427734375, 27.33740234375, 28.286376953125, 29.2353515625, 30.184326171875, 31.13330078125, 32.082275390625, 33.03125]}, "gradients/decoder.transformer.h.10.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 1.0, 1.0, 0.0, 2.0, 3.0, 3.0, 6.0, 5.0, 16.0, 15.0, 14.0, 20.0, 35.0, 46.0, 63.0, 87.0, 104.0, 174.0, 261.0, 504.0, 1367.0, 40567.0, 3092917.0, 7553.0, 828.0, 382.0, 232.0, 152.0, 112.0, 63.0, 52.0, 30.0, 29.0, 22.0, 9.0, 15.0, 8.0, 4.0, 4.0, 4.0, 3.0, 3.0, 0.0, 3.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0], "bins": [-105.75, -102.8388671875, -99.927734375, -97.0166015625, -94.10546875, -91.1943359375, -88.283203125, -85.3720703125, -82.4609375, -79.5498046875, -76.638671875, -73.7275390625, -70.81640625, -67.9052734375, -64.994140625, -62.0830078125, -59.171875, -56.2607421875, -53.349609375, -50.4384765625, -47.52734375, -44.6162109375, -41.705078125, -38.7939453125, -35.8828125, -32.9716796875, -30.060546875, -27.1494140625, -24.23828125, -21.3271484375, -18.416015625, -15.5048828125, -12.59375, -9.6826171875, -6.771484375, -3.8603515625, -0.94921875, 1.9619140625, 4.873046875, 7.7841796875, 10.6953125, 13.6064453125, 16.517578125, 19.4287109375, 22.33984375, 25.2509765625, 28.162109375, 31.0732421875, 33.984375, 36.8955078125, 39.806640625, 42.7177734375, 45.62890625, 48.5400390625, 51.451171875, 54.3623046875, 57.2734375, 60.1845703125, 63.095703125, 66.0068359375, 68.91796875, 71.8291015625, 74.740234375, 77.6513671875, 80.5625]}, "gradients/decoder.transformer.h.10.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 3.0, 19.0, 115.0, 299.0, 356.0, 182.0, 35.0, 9.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-136.70484924316406, -133.67849731445312, -130.6521453857422, -127.62578582763672, -124.59943389892578, -121.57308197021484, -118.5467300415039, -115.52037048339844, -112.4940185546875, -109.46766662597656, -106.44131469726562, -103.41495513916016, -100.38860321044922, -97.36225128173828, -94.33589935302734, -91.30953979492188, -88.28318786621094, -85.2568359375, -82.23048400878906, -79.2041244506836, -76.17777252197266, -73.15142059326172, -70.12506866455078, -67.09870910644531, -64.0723648071289, -61.04601287841797, -58.019657135009766, -54.99330520629883, -51.966949462890625, -48.94059753417969, -45.91424560546875, -42.88788986206055, -39.86153030395508, -36.83517837524414, -33.80882263183594, -30.782470703125, -27.756114959716797, -24.72976303100586, -21.70340919494629, -18.67705535888672, -15.650701522827148, -12.624347686767578, -9.597993850708008, -6.571640968322754, -3.5452871322631836, -0.5189332962036133, 2.5074195861816406, 5.533773422241211, 8.560127258300781, 11.586481094360352, 14.612834930419922, 17.63918685913086, 20.665542602539062, 23.69189453125, 26.71824836730957, 29.74460220336914, 32.770957946777344, 35.79730987548828, 38.823665618896484, 41.85001754760742, 44.876373291015625, 47.90272521972656, 50.9290771484375, 53.9554328918457, 56.98178482055664]}, "gradients/decoder.transformer.h.10.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 2.0, 2.0, 2.0, 2.0, 2.0, 3.0, 4.0, 9.0, 9.0, 9.0, 14.0, 5.0, 9.0, 17.0, 27.0, 13.0, 24.0, 36.0, 46.0, 29.0, 24.0, 35.0, 30.0, 35.0, 46.0, 50.0, 46.0, 36.0, 40.0, 36.0, 42.0, 37.0, 29.0, 32.0, 37.0, 30.0, 26.0, 21.0, 26.0, 18.0, 8.0, 9.0, 16.0, 7.0, 5.0, 7.0, 3.0, 5.0, 4.0, 3.0, 1.0, 4.0, 0.0, 1.0, 4.0, 3.0], "bins": [-80.87769317626953, -78.50401306152344, -76.13034057617188, -73.75666809082031, -71.38298797607422, -69.00930786132812, -66.63563537597656, -64.261962890625, -61.888282775878906, -59.51460647583008, -57.14093017578125, -54.76725387573242, -52.393577575683594, -50.019901275634766, -47.64622497558594, -45.27254867553711, -42.89887237548828, -40.52519607543945, -38.151519775390625, -35.7778434753418, -33.40416717529297, -31.03049087524414, -28.656814575195312, -26.283138275146484, -23.909461975097656, -21.535785675048828, -19.162109375, -16.788433074951172, -14.414756774902344, -12.041080474853516, -9.667404174804688, -7.293727874755859, -4.9200592041015625, -2.5463829040527344, -0.17270660400390625, 2.200969696044922, 4.57464599609375, 6.948322296142578, 9.321998596191406, 11.695674896240234, 14.069351196289062, 16.44302749633789, 18.81670379638672, 21.190380096435547, 23.564056396484375, 25.937732696533203, 28.31140899658203, 30.68508529663086, 33.05876159667969, 35.432437896728516, 37.806114196777344, 40.17979049682617, 42.553466796875, 44.92714309692383, 47.300819396972656, 49.674495697021484, 52.04817199707031, 54.42184829711914, 56.79552459716797, 59.1692008972168, 61.542877197265625, 63.91655349731445, 66.29022979736328, 68.66390991210938, 71.03758239746094]}, "gradients/decoder.transformer.h.9.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 2.0, 5.0, 6.0, 3.0, 2.0, 1.0, 11.0, 13.0, 13.0, 13.0, 19.0, 17.0, 29.0, 18.0, 26.0, 28.0, 35.0, 32.0, 39.0, 40.0, 34.0, 43.0, 55.0, 51.0, 39.0, 38.0, 39.0, 39.0, 33.0, 35.0, 32.0, 21.0, 34.0, 33.0, 25.0, 18.0, 14.0, 17.0, 7.0, 11.0, 9.0, 11.0, 4.0, 6.0, 5.0, 3.0, 3.0, 3.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-10.3046875, -9.9801025390625, -9.655517578125, -9.3309326171875, -9.00634765625, -8.6817626953125, -8.357177734375, -8.0325927734375, -7.7080078125, -7.3834228515625, -7.058837890625, -6.7342529296875, -6.40966796875, -6.0850830078125, -5.760498046875, -5.4359130859375, -5.111328125, -4.7867431640625, -4.462158203125, -4.1375732421875, -3.81298828125, -3.4884033203125, -3.163818359375, -2.8392333984375, -2.5146484375, -2.1900634765625, -1.865478515625, -1.5408935546875, -1.21630859375, -0.8917236328125, -0.567138671875, -0.2425537109375, 0.08203125, 0.4066162109375, 0.731201171875, 1.0557861328125, 1.38037109375, 1.7049560546875, 2.029541015625, 2.3541259765625, 2.6787109375, 3.0032958984375, 3.327880859375, 3.6524658203125, 3.97705078125, 4.3016357421875, 4.626220703125, 4.9508056640625, 5.275390625, 5.5999755859375, 5.924560546875, 6.2491455078125, 6.57373046875, 6.8983154296875, 7.222900390625, 7.5474853515625, 7.8720703125, 8.1966552734375, 8.521240234375, 8.8458251953125, 9.17041015625, 9.4949951171875, 9.819580078125, 10.1441650390625, 10.46875]}, "gradients/decoder.transformer.h.9.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 2.0, 2.0, 3.0, 3.0, 5.0, 9.0, 12.0, 9.0, 11.0, 19.0, 19.0, 26.0, 28.0, 23.0, 47.0, 63.0, 99.0, 196.0, 367.0, 970.0, 4355.0, 32710.0, 662498.0, 3198662.0, 272339.0, 17579.0, 2633.0, 769.0, 305.0, 165.0, 93.0, 69.0, 43.0, 37.0, 22.0, 17.0, 18.0, 18.0, 8.0, 9.0, 6.0, 6.0, 8.0, 4.0, 4.0, 3.0, 2.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0], "bins": [-32.59375, -31.5869140625, -30.580078125, -29.5732421875, -28.56640625, -27.5595703125, -26.552734375, -25.5458984375, -24.5390625, -23.5322265625, -22.525390625, -21.5185546875, -20.51171875, -19.5048828125, -18.498046875, -17.4912109375, -16.484375, -15.4775390625, -14.470703125, -13.4638671875, -12.45703125, -11.4501953125, -10.443359375, -9.4365234375, -8.4296875, -7.4228515625, -6.416015625, -5.4091796875, -4.40234375, -3.3955078125, -2.388671875, -1.3818359375, -0.375, 0.6318359375, 1.638671875, 2.6455078125, 3.65234375, 4.6591796875, 5.666015625, 6.6728515625, 7.6796875, 8.6865234375, 9.693359375, 10.7001953125, 11.70703125, 12.7138671875, 13.720703125, 14.7275390625, 15.734375, 16.7412109375, 17.748046875, 18.7548828125, 19.76171875, 20.7685546875, 21.775390625, 22.7822265625, 23.7890625, 24.7958984375, 25.802734375, 26.8095703125, 27.81640625, 28.8232421875, 29.830078125, 30.8369140625, 31.84375]}, "gradients/decoder.transformer.h.9.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 1.0, 1.0, 0.0, 4.0, 1.0, 4.0, 5.0, 6.0, 7.0, 19.0, 19.0, 37.0, 44.0, 79.0, 120.0, 145.0, 238.0, 430.0, 569.0, 668.0, 594.0, 413.0, 239.0, 141.0, 97.0, 64.0, 48.0, 34.0, 20.0, 7.0, 11.0, 7.0, 6.0, 3.0, 2.0, 2.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-21.640625, -20.853271484375, -20.06591796875, -19.278564453125, -18.4912109375, -17.703857421875, -16.91650390625, -16.129150390625, -15.341796875, -14.554443359375, -13.76708984375, -12.979736328125, -12.1923828125, -11.405029296875, -10.61767578125, -9.830322265625, -9.04296875, -8.255615234375, -7.46826171875, -6.680908203125, -5.8935546875, -5.106201171875, -4.31884765625, -3.531494140625, -2.744140625, -1.956787109375, -1.16943359375, -0.382080078125, 0.4052734375, 1.192626953125, 1.97998046875, 2.767333984375, 3.5546875, 4.342041015625, 5.12939453125, 5.916748046875, 6.7041015625, 7.491455078125, 8.27880859375, 9.066162109375, 9.853515625, 10.640869140625, 11.42822265625, 12.215576171875, 13.0029296875, 13.790283203125, 14.57763671875, 15.364990234375, 16.15234375, 16.939697265625, 17.72705078125, 18.514404296875, 19.3017578125, 20.089111328125, 20.87646484375, 21.663818359375, 22.451171875, 23.238525390625, 24.02587890625, 24.813232421875, 25.6005859375, 26.387939453125, 27.17529296875, 27.962646484375, 28.75]}, "gradients/decoder.transformer.h.9.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 2.0, 4.0, 6.0, 8.0, 9.0, 14.0, 26.0, 29.0, 43.0, 93.0, 121.0, 174.0, 307.0, 444.0, 1433.0, 795435.0, 3392565.0, 2108.0, 557.0, 269.0, 235.0, 154.0, 86.0, 58.0, 48.0, 30.0, 12.0, 7.0, 8.0, 1.0, 3.0, 3.0, 2.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-163.75, -159.2109375, -154.671875, -150.1328125, -145.59375, -141.0546875, -136.515625, -131.9765625, -127.4375, -122.8984375, -118.359375, -113.8203125, -109.28125, -104.7421875, -100.203125, -95.6640625, -91.125, -86.5859375, -82.046875, -77.5078125, -72.96875, -68.4296875, -63.890625, -59.3515625, -54.8125, -50.2734375, -45.734375, -41.1953125, -36.65625, -32.1171875, -27.578125, -23.0390625, -18.5, -13.9609375, -9.421875, -4.8828125, -0.34375, 4.1953125, 8.734375, 13.2734375, 17.8125, 22.3515625, 26.890625, 31.4296875, 35.96875, 40.5078125, 45.046875, 49.5859375, 54.125, 58.6640625, 63.203125, 67.7421875, 72.28125, 76.8203125, 81.359375, 85.8984375, 90.4375, 94.9765625, 99.515625, 104.0546875, 108.59375, 113.1328125, 117.671875, 122.2109375, 126.75]}, "gradients/decoder.transformer.h.9.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 4.0, 12.0, 27.0, 44.0, 85.0, 133.0, 182.0, 175.0, 143.0, 99.0, 57.0, 29.0, 17.0, 7.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-95.95034790039062, -92.30764770507812, -88.66495513916016, -85.02226257324219, -81.37956237792969, -77.73686218261719, -74.09416961669922, -70.45147705078125, -66.80877685546875, -63.166080474853516, -59.52338409423828, -55.88068771362305, -52.23799133300781, -48.59529495239258, -44.952598571777344, -41.30990219116211, -37.667205810546875, -34.02450942993164, -30.381813049316406, -26.739116668701172, -23.096420288085938, -19.453723907470703, -15.811027526855469, -12.168331146240234, -8.525634765625, -4.882938385009766, -1.2402420043945312, 2.402454376220703, 6.0451507568359375, 9.687847137451172, 13.330543518066406, 16.97323989868164, 20.615936279296875, 24.25863265991211, 27.901329040527344, 31.544025421142578, 35.18672180175781, 38.82941818237305, 42.47211456298828, 46.114810943603516, 49.75750732421875, 53.400203704833984, 57.04290008544922, 60.68559646606445, 64.32829284667969, 67.97099304199219, 71.61368560791016, 75.25637817382812, 78.89907836914062, 82.54177856445312, 86.1844711303711, 89.82716369628906, 93.46986389160156, 97.11256408691406, 100.75525665283203, 104.39794921875, 108.0406494140625, 111.683349609375, 115.32604217529297, 118.96873474121094, 122.61143493652344, 126.25413513183594, 129.89682006835938, 133.53952026367188, 137.18222045898438]}, "gradients/decoder.transformer.h.9.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 2.0, 2.0, 2.0, 2.0, 5.0, 6.0, 14.0, 8.0, 19.0, 16.0, 10.0, 13.0, 23.0, 21.0, 23.0, 38.0, 27.0, 43.0, 46.0, 36.0, 40.0, 43.0, 33.0, 51.0, 29.0, 45.0, 39.0, 47.0, 32.0, 35.0, 37.0, 36.0, 27.0, 33.0, 30.0, 18.0, 14.0, 13.0, 9.0, 9.0, 7.0, 11.0, 8.0, 2.0, 3.0, 3.0, 2.0, 2.0, 1.0, 2.0, 2.0, 0.0, 0.0, 1.0], "bins": [-61.39698791503906, -59.54039001464844, -57.68379211425781, -55.82719802856445, -53.97060012817383, -52.1140022277832, -50.257408142089844, -48.40081024169922, -46.544212341308594, -44.68761444091797, -42.831016540527344, -40.974422454833984, -39.11782455444336, -37.261226654052734, -35.404632568359375, -33.54803466796875, -31.691436767578125, -29.8348388671875, -27.978242874145508, -26.121646881103516, -24.26504898071289, -22.408451080322266, -20.551855087280273, -18.69525909423828, -16.838661193847656, -14.982064247131348, -13.125467300415039, -11.26887035369873, -9.412273406982422, -7.555676460266113, -5.699079513549805, -3.842482566833496, -1.9858856201171875, -0.1292886734008789, 1.7273082733154297, 3.5839052200317383, 5.440502166748047, 7.2970991134643555, 9.153696060180664, 11.010293006896973, 12.866889953613281, 14.72348690032959, 16.5800838470459, 18.43667984008789, 20.293277740478516, 22.14987564086914, 24.006471633911133, 25.863067626953125, 27.71966552734375, 29.576263427734375, 31.432859420776367, 33.28945541381836, 35.146053314208984, 37.00265121459961, 38.85924530029297, 40.715843200683594, 42.57244110107422, 44.429039001464844, 46.28563690185547, 48.14223098754883, 49.99882888793945, 51.85542678833008, 53.71202087402344, 55.56861877441406, 57.42521667480469]}, "gradients/decoder.transformer.h.9.crossattention.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 3.0, 6.0, 3.0, 7.0, 8.0, 9.0, 10.0, 18.0, 18.0, 22.0, 36.0, 30.0, 30.0, 33.0, 35.0, 30.0, 48.0, 55.0, 44.0, 53.0, 48.0, 37.0, 41.0, 47.0, 44.0, 32.0, 28.0, 42.0, 23.0, 27.0, 28.0, 16.0, 18.0, 16.0, 13.0, 12.0, 10.0, 6.0, 9.0, 3.0, 4.0, 8.0, 4.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-10.1953125, -9.8502197265625, -9.505126953125, -9.1600341796875, -8.81494140625, -8.4698486328125, -8.124755859375, -7.7796630859375, -7.4345703125, -7.0894775390625, -6.744384765625, -6.3992919921875, -6.05419921875, -5.7091064453125, -5.364013671875, -5.0189208984375, -4.673828125, -4.3287353515625, -3.983642578125, -3.6385498046875, -3.29345703125, -2.9483642578125, -2.603271484375, -2.2581787109375, -1.9130859375, -1.5679931640625, -1.222900390625, -0.8778076171875, -0.53271484375, -0.1876220703125, 0.157470703125, 0.5025634765625, 0.84765625, 1.1927490234375, 1.537841796875, 1.8829345703125, 2.22802734375, 2.5731201171875, 2.918212890625, 3.2633056640625, 3.6083984375, 3.9534912109375, 4.298583984375, 4.6436767578125, 4.98876953125, 5.3338623046875, 5.678955078125, 6.0240478515625, 6.369140625, 6.7142333984375, 7.059326171875, 7.4044189453125, 7.74951171875, 8.0946044921875, 8.439697265625, 8.7847900390625, 9.1298828125, 9.4749755859375, 9.820068359375, 10.1651611328125, 10.51025390625, 10.8553466796875, 11.200439453125, 11.5455322265625, 11.890625]}, "gradients/decoder.transformer.h.9.crossattention.c_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 3.0, 2.0, 1.0, 4.0, 5.0, 11.0, 4.0, 13.0, 22.0, 29.0, 62.0, 68.0, 101.0, 159.0, 253.0, 428.0, 567.0, 935.0, 1365.0, 2185.0, 3394.0, 5331.0, 8496.0, 14018.0, 22837.0, 37391.0, 62160.0, 102072.0, 162014.0, 203365.0, 161362.0, 101644.0, 61097.0, 37276.0, 22773.0, 13786.0, 8455.0, 5430.0, 3358.0, 2132.0, 1375.0, 873.0, 573.0, 388.0, 272.0, 165.0, 120.0, 72.0, 49.0, 21.0, 22.0, 5.0, 11.0, 6.0, 5.0, 1.0, 4.0, 1.0, 1.0, 0.0, 2.0], "bins": [-1.033203125, -1.00140380859375, -0.9696044921875, -0.93780517578125, -0.906005859375, -0.87420654296875, -0.8424072265625, -0.81060791015625, -0.77880859375, -0.74700927734375, -0.7152099609375, -0.68341064453125, -0.651611328125, -0.61981201171875, -0.5880126953125, -0.55621337890625, -0.5244140625, -0.49261474609375, -0.4608154296875, -0.42901611328125, -0.397216796875, -0.36541748046875, -0.3336181640625, -0.30181884765625, -0.27001953125, -0.23822021484375, -0.2064208984375, -0.17462158203125, -0.142822265625, -0.11102294921875, -0.0792236328125, -0.04742431640625, -0.015625, 0.01617431640625, 0.0479736328125, 0.07977294921875, 0.111572265625, 0.14337158203125, 0.1751708984375, 0.20697021484375, 0.23876953125, 0.27056884765625, 0.3023681640625, 0.33416748046875, 0.365966796875, 0.39776611328125, 0.4295654296875, 0.46136474609375, 0.4931640625, 0.52496337890625, 0.5567626953125, 0.58856201171875, 0.620361328125, 0.65216064453125, 0.6839599609375, 0.71575927734375, 0.74755859375, 0.77935791015625, 0.8111572265625, 0.84295654296875, 0.874755859375, 0.90655517578125, 0.9383544921875, 0.97015380859375, 1.001953125]}, "gradients/decoder.transformer.h.9.crossattention.c_attn.bias": {"_type": "histogram", "values": [3.0, 0.0, 2.0, 1.0, 4.0, 2.0, 5.0, 4.0, 4.0, 11.0, 5.0, 12.0, 19.0, 12.0, 16.0, 19.0, 24.0, 22.0, 37.0, 25.0, 29.0, 30.0, 26.0, 47.0, 45.0, 53.0, 39.0, 1062.0, 42.0, 35.0, 42.0, 37.0, 42.0, 41.0, 35.0, 27.0, 24.0, 29.0, 22.0, 15.0, 13.0, 12.0, 14.0, 6.0, 15.0, 7.0, 10.0, 3.0, 3.0, 2.0, 4.0, 3.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-5.76171875, -5.55523681640625, -5.3487548828125, -5.14227294921875, -4.935791015625, -4.72930908203125, -4.5228271484375, -4.31634521484375, -4.10986328125, -3.90338134765625, -3.6968994140625, -3.49041748046875, -3.283935546875, -3.07745361328125, -2.8709716796875, -2.66448974609375, -2.4580078125, -2.25152587890625, -2.0450439453125, -1.83856201171875, -1.632080078125, -1.42559814453125, -1.2191162109375, -1.01263427734375, -0.80615234375, -0.59967041015625, -0.3931884765625, -0.18670654296875, 0.019775390625, 0.22625732421875, 0.4327392578125, 0.63922119140625, 0.845703125, 1.05218505859375, 1.2586669921875, 1.46514892578125, 1.671630859375, 1.87811279296875, 2.0845947265625, 2.29107666015625, 2.49755859375, 2.70404052734375, 2.9105224609375, 3.11700439453125, 3.323486328125, 3.52996826171875, 3.7364501953125, 3.94293212890625, 4.1494140625, 4.35589599609375, 4.5623779296875, 4.76885986328125, 4.975341796875, 5.18182373046875, 5.3883056640625, 5.59478759765625, 5.80126953125, 6.00775146484375, 6.2142333984375, 6.42071533203125, 6.627197265625, 6.83367919921875, 7.0401611328125, 7.24664306640625, 7.453125]}, "gradients/decoder.transformer.h.9.crossattention.c_attn.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 1.0, 3.0, 3.0, 3.0, 5.0, 6.0, 20.0, 14.0, 32.0, 31.0, 80.0, 95.0, 143.0, 225.0, 343.0, 512.0, 794.0, 1158.0, 1690.0, 2622.0, 4117.0, 6346.0, 9638.0, 15229.0, 23375.0, 35632.0, 55180.0, 83900.0, 121278.0, 340511.0, 1020260.0, 124786.0, 86563.0, 57284.0, 36765.0, 23924.0, 15565.0, 10084.0, 6352.0, 4274.0, 2846.0, 1819.0, 1228.0, 824.0, 558.0, 316.0, 240.0, 160.0, 105.0, 74.0, 39.0, 30.0, 24.0, 16.0, 9.0, 5.0, 5.0, 3.0, 4.0, 0.0, 1.0], "bins": [-0.6884765625, -0.6676406860351562, -0.6468048095703125, -0.6259689331054688, -0.605133056640625, -0.5842971801757812, -0.5634613037109375, -0.5426254272460938, -0.52178955078125, -0.5009536743164062, -0.4801177978515625, -0.45928192138671875, -0.438446044921875, -0.41761016845703125, -0.3967742919921875, -0.37593841552734375, -0.3551025390625, -0.33426666259765625, -0.3134307861328125, -0.29259490966796875, -0.271759033203125, -0.25092315673828125, -0.2300872802734375, -0.20925140380859375, -0.18841552734375, -0.16757965087890625, -0.1467437744140625, -0.12590789794921875, -0.105072021484375, -0.08423614501953125, -0.0634002685546875, -0.04256439208984375, -0.021728515625, -0.00089263916015625, 0.0199432373046875, 0.04077911376953125, 0.061614990234375, 0.08245086669921875, 0.1032867431640625, 0.12412261962890625, 0.14495849609375, 0.16579437255859375, 0.1866302490234375, 0.20746612548828125, 0.228302001953125, 0.24913787841796875, 0.2699737548828125, 0.29080963134765625, 0.3116455078125, 0.33248138427734375, 0.3533172607421875, 0.37415313720703125, 0.394989013671875, 0.41582489013671875, 0.4366607666015625, 0.45749664306640625, 0.47833251953125, 0.49916839599609375, 0.5200042724609375, 0.5408401489257812, 0.561676025390625, 0.5825119018554688, 0.6033477783203125, 0.6241836547851562, 0.64501953125]}, "gradients/decoder.transformer.h.9.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 0.0, 5.0, 2.0, 2.0, 2.0, 4.0, 5.0, 6.0, 4.0, 5.0, 6.0, 8.0, 11.0, 18.0, 9.0, 15.0, 26.0, 16.0, 22.0, 44.0, 64.0, 64.0, 68.0, 68.0, 81.0, 73.0, 64.0, 56.0, 51.0, 37.0, 40.0, 19.0, 18.0, 16.0, 16.0, 7.0, 10.0, 8.0, 6.0, 13.0, 8.0, 4.0, 1.0, 6.0, 1.0, 3.0, 3.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.006893157958984375, -0.0066803693771362305, -0.006467580795288086, -0.006254792213439941, -0.006042003631591797, -0.005829215049743652, -0.005616426467895508, -0.005403637886047363, -0.005190849304199219, -0.004978060722351074, -0.00476527214050293, -0.004552483558654785, -0.004339694976806641, -0.004126906394958496, -0.0039141178131103516, -0.003701329231262207, -0.0034885406494140625, -0.003275752067565918, -0.0030629634857177734, -0.002850174903869629, -0.0026373863220214844, -0.00242459774017334, -0.0022118091583251953, -0.0019990205764770508, -0.0017862319946289062, -0.0015734434127807617, -0.0013606548309326172, -0.0011478662490844727, -0.0009350776672363281, -0.0007222890853881836, -0.0005095005035400391, -0.00029671192169189453, -8.392333984375e-05, 0.00012886524200439453, 0.00034165382385253906, 0.0005544424057006836, 0.0007672309875488281, 0.0009800195693969727, 0.0011928081512451172, 0.0014055967330932617, 0.0016183853149414062, 0.0018311738967895508, 0.0020439624786376953, 0.00225675106048584, 0.0024695396423339844, 0.002682328224182129, 0.0028951168060302734, 0.003107905387878418, 0.0033206939697265625, 0.003533482551574707, 0.0037462711334228516, 0.003959059715270996, 0.004171848297119141, 0.004384636878967285, 0.00459742546081543, 0.004810214042663574, 0.005023002624511719, 0.005235791206359863, 0.005448579788208008, 0.005661368370056152, 0.005874156951904297, 0.006086945533752441, 0.006299734115600586, 0.0065125226974487305, 0.006725311279296875]}, "gradients/decoder.transformer.h.9.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 3.0, 3.0, 1.0, 2.0, 1.0, 7.0, 8.0, 9.0, 15.0, 14.0, 25.0, 20.0, 28.0, 46.0, 39.0, 62.0, 81.0, 160.0, 327.0, 951.0, 66375.0, 972372.0, 6838.0, 499.0, 229.0, 129.0, 86.0, 53.0, 35.0, 23.0, 20.0, 20.0, 21.0, 19.0, 10.0, 9.0, 5.0, 7.0, 4.0, 3.0, 4.0, 1.0, 0.0, 3.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.1715087890625, -0.16635704040527344, -0.16120529174804688, -0.1560535430908203, -0.15090179443359375, -0.1457500457763672, -0.14059829711914062, -0.13544654846191406, -0.1302947998046875, -0.12514305114746094, -0.11999130249023438, -0.11483955383300781, -0.10968780517578125, -0.10453605651855469, -0.09938430786132812, -0.09423255920410156, -0.089080810546875, -0.08392906188964844, -0.07877731323242188, -0.07362556457519531, -0.06847381591796875, -0.06332206726074219, -0.058170318603515625, -0.05301856994628906, -0.0478668212890625, -0.04271507263183594, -0.037563323974609375, -0.03241157531738281, -0.02725982666015625, -0.022108078002929688, -0.016956329345703125, -0.011804580688476562, -0.00665283203125, -0.0015010833740234375, 0.003650665283203125, 0.008802413940429688, 0.01395416259765625, 0.019105911254882812, 0.024257659912109375, 0.029409408569335938, 0.0345611572265625, 0.03971290588378906, 0.044864654541015625, 0.05001640319824219, 0.05516815185546875, 0.06031990051269531, 0.06547164916992188, 0.07062339782714844, 0.075775146484375, 0.08092689514160156, 0.08607864379882812, 0.09123039245605469, 0.09638214111328125, 0.10153388977050781, 0.10668563842773438, 0.11183738708496094, 0.1169891357421875, 0.12214088439941406, 0.12729263305664062, 0.1324443817138672, 0.13759613037109375, 0.1427478790283203, 0.14789962768554688, 0.15305137634277344, 0.158203125]}, "gradients/decoder.transformer.h.9.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0, 2.0, 25.0, 57.0, 177.0, 388.0, 240.0, 90.0, 24.0, 9.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.03152065351605415, -0.030872805044054985, -0.030224956572055817, -0.029577108100056648, -0.02892925962805748, -0.02828141115605831, -0.027633562684059143, -0.026985714212059975, -0.026337865740060806, -0.025690017268061638, -0.02504216879606247, -0.0243943203240633, -0.023746471852064133, -0.023098623380064964, -0.022450774908065796, -0.021802926436066628, -0.02115507796406746, -0.02050722949206829, -0.019859381020069122, -0.019211532548069954, -0.018563684076070786, -0.017915835604071617, -0.01726798713207245, -0.01662013866007328, -0.015972288325428963, -0.015324439853429794, -0.014676591381430626, -0.014028742909431458, -0.013380894437432289, -0.01273304596543312, -0.012085197493433952, -0.011437349021434784, -0.010789499618113041, -0.010141651146113873, -0.009493802674114704, -0.008845954202115536, -0.008198105730116367, -0.007550257258117199, -0.006902408320456743, -0.006254559848457575, -0.0056067113764584064, -0.004958862904459238, -0.00431101443246007, -0.0036631657276302576, -0.003015317255631089, -0.002367468783631921, -0.0017196200788021088, -0.0010717716068029404, -0.00042392313480377197, 0.00022392539540305734, 0.0008717739256098866, 0.0015196225140243769, 0.0021674709860235453, 0.0028153194580227137, 0.0034631681628525257, 0.004111016634851694, 0.0047588651068508625, 0.005406713578850031, 0.006054562050849199, 0.006702410988509655, 0.007350259460508823, 0.007998107932507992, 0.00864595640450716, 0.009293804876506329, 0.009941653348505497]}, "gradients/decoder.transformer.h.9.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 3.0, 1.0, 1.0, 0.0, 3.0, 6.0, 6.0, 8.0, 7.0, 6.0, 5.0, 8.0, 9.0, 11.0, 17.0, 23.0, 21.0, 24.0, 21.0, 29.0, 32.0, 36.0, 39.0, 48.0, 32.0, 45.0, 29.0, 50.0, 45.0, 42.0, 41.0, 32.0, 37.0, 27.0, 43.0, 35.0, 35.0, 21.0, 22.0, 19.0, 16.0, 17.0, 24.0, 13.0, 3.0, 1.0, 5.0, 7.0, 5.0, 3.0, 3.0, 1.0, 4.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.003565967082977295, -0.0034563280642032623, -0.0033466890454292297, -0.003237050026655197, -0.0031274110078811646, -0.003017771989107132, -0.0029081329703330994, -0.0027984939515590668, -0.002688854932785034, -0.0025792159140110016, -0.002469576895236969, -0.0023599378764629364, -0.002250298857688904, -0.002140659838914871, -0.0020310208201408386, -0.001921381801366806, -0.0018117427825927734, -0.0017021037638187408, -0.0015924647450447083, -0.0014828257262706757, -0.001373186707496643, -0.0012635476887226105, -0.0011539086699485779, -0.0010442696511745453, -0.0009346306324005127, -0.0008249916136264801, -0.0007153525948524475, -0.0006057135760784149, -0.0004960745573043823, -0.00038643553853034973, -0.00027679651975631714, -0.00016715750098228455, -5.751848220825195e-05, 5.212053656578064e-05, 0.00016175955533981323, 0.0002713985741138458, 0.0003810375928878784, 0.000490676611661911, 0.0006003156304359436, 0.0007099546492099762, 0.0008195936679840088, 0.0009292326867580414, 0.001038871705532074, 0.0011485107243061066, 0.0012581497430801392, 0.0013677887618541718, 0.0014774277806282043, 0.001587066799402237, 0.0016967058181762695, 0.0018063448369503021, 0.0019159838557243347, 0.0020256228744983673, 0.0021352618932724, 0.0022449009120464325, 0.002354539930820465, 0.0024641789495944977, 0.0025738179683685303, 0.002683456987142563, 0.0027930960059165955, 0.002902735024690628, 0.0030123740434646606, 0.0031220130622386932, 0.003231652081012726, 0.0033412910997867584, 0.003450930118560791]}, "gradients/decoder.transformer.h.9.attn.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 3.0, 6.0, 3.0, 7.0, 8.0, 9.0, 10.0, 18.0, 18.0, 22.0, 36.0, 30.0, 30.0, 33.0, 35.0, 30.0, 48.0, 55.0, 44.0, 53.0, 48.0, 37.0, 41.0, 47.0, 44.0, 32.0, 28.0, 42.0, 23.0, 28.0, 27.0, 16.0, 18.0, 16.0, 13.0, 12.0, 10.0, 6.0, 9.0, 3.0, 4.0, 8.0, 4.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-10.1953125, -9.8502197265625, -9.505126953125, -9.1600341796875, -8.81494140625, -8.4698486328125, -8.124755859375, -7.7796630859375, -7.4345703125, -7.0894775390625, -6.744384765625, -6.3992919921875, -6.05419921875, -5.7091064453125, -5.364013671875, -5.0189208984375, -4.673828125, -4.3287353515625, -3.983642578125, -3.6385498046875, -3.29345703125, -2.9483642578125, -2.603271484375, -2.2581787109375, -1.9130859375, -1.5679931640625, -1.222900390625, -0.8778076171875, -0.53271484375, -0.1876220703125, 0.157470703125, 0.5025634765625, 0.84765625, 1.1927490234375, 1.537841796875, 1.8829345703125, 2.22802734375, 2.5731201171875, 2.918212890625, 3.2633056640625, 3.6083984375, 3.9534912109375, 4.298583984375, 4.6436767578125, 4.98876953125, 5.3338623046875, 5.678955078125, 6.0240478515625, 6.369140625, 6.7142333984375, 7.059326171875, 7.4044189453125, 7.74951171875, 8.0946044921875, 8.439697265625, 8.7847900390625, 9.1298828125, 9.4749755859375, 9.820068359375, 10.1651611328125, 10.51025390625, 10.8553466796875, 11.200439453125, 11.5455322265625, 11.890625]}, "gradients/decoder.transformer.h.9.attn.c_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 5.0, 5.0, 7.0, 8.0, 7.0, 12.0, 17.0, 32.0, 36.0, 42.0, 69.0, 98.0, 118.0, 191.0, 263.0, 460.0, 929.0, 2792.0, 12846.0, 88210.0, 777322.0, 140417.0, 18357.0, 3696.0, 1159.0, 538.0, 289.0, 190.0, 105.0, 86.0, 58.0, 44.0, 39.0, 28.0, 21.0, 21.0, 10.0, 8.0, 10.0, 5.0, 3.0, 6.0, 5.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-18.59375, -17.965576171875, -17.33740234375, -16.709228515625, -16.0810546875, -15.452880859375, -14.82470703125, -14.196533203125, -13.568359375, -12.940185546875, -12.31201171875, -11.683837890625, -11.0556640625, -10.427490234375, -9.79931640625, -9.171142578125, -8.54296875, -7.914794921875, -7.28662109375, -6.658447265625, -6.0302734375, -5.402099609375, -4.77392578125, -4.145751953125, -3.517578125, -2.889404296875, -2.26123046875, -1.633056640625, -1.0048828125, -0.376708984375, 0.25146484375, 0.879638671875, 1.5078125, 2.135986328125, 2.76416015625, 3.392333984375, 4.0205078125, 4.648681640625, 5.27685546875, 5.905029296875, 6.533203125, 7.161376953125, 7.78955078125, 8.417724609375, 9.0458984375, 9.674072265625, 10.30224609375, 10.930419921875, 11.55859375, 12.186767578125, 12.81494140625, 13.443115234375, 14.0712890625, 14.699462890625, 15.32763671875, 15.955810546875, 16.583984375, 17.212158203125, 17.84033203125, 18.468505859375, 19.0966796875, 19.724853515625, 20.35302734375, 20.981201171875, 21.609375]}, "gradients/decoder.transformer.h.9.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 5.0, 2.0, 1.0, 3.0, 1.0, 5.0, 0.0, 3.0, 6.0, 3.0, 5.0, 11.0, 9.0, 6.0, 25.0, 17.0, 27.0, 26.0, 38.0, 28.0, 38.0, 47.0, 49.0, 51.0, 93.0, 214.0, 1764.0, 138.0, 76.0, 53.0, 44.0, 40.0, 41.0, 36.0, 30.0, 26.0, 15.0, 18.0, 8.0, 12.0, 15.0, 8.0, 13.0, 6.0, 0.0, 0.0, 4.0, 2.0, 1.0, 4.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-38.65625, -37.5322265625, -36.408203125, -35.2841796875, -34.16015625, -33.0361328125, -31.912109375, -30.7880859375, -29.6640625, -28.5400390625, -27.416015625, -26.2919921875, -25.16796875, -24.0439453125, -22.919921875, -21.7958984375, -20.671875, -19.5478515625, -18.423828125, -17.2998046875, -16.17578125, -15.0517578125, -13.927734375, -12.8037109375, -11.6796875, -10.5556640625, -9.431640625, -8.3076171875, -7.18359375, -6.0595703125, -4.935546875, -3.8115234375, -2.6875, -1.5634765625, -0.439453125, 0.6845703125, 1.80859375, 2.9326171875, 4.056640625, 5.1806640625, 6.3046875, 7.4287109375, 8.552734375, 9.6767578125, 10.80078125, 11.9248046875, 13.048828125, 14.1728515625, 15.296875, 16.4208984375, 17.544921875, 18.6689453125, 19.79296875, 20.9169921875, 22.041015625, 23.1650390625, 24.2890625, 25.4130859375, 26.537109375, 27.6611328125, 28.78515625, 29.9091796875, 31.033203125, 32.1572265625, 33.28125]}, "gradients/decoder.transformer.h.9.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 5.0, 1.0, 1.0, 3.0, 2.0, 0.0, 5.0, 11.0, 9.0, 13.0, 11.0, 14.0, 14.0, 21.0, 28.0, 41.0, 53.0, 53.0, 99.0, 129.0, 244.0, 545.0, 4878.0, 3124970.0, 12981.0, 704.0, 297.0, 170.0, 97.0, 67.0, 53.0, 55.0, 20.0, 29.0, 25.0, 18.0, 5.0, 12.0, 9.0, 4.0, 4.0, 5.0, 3.0, 1.0, 3.0, 2.0, 2.0, 2.0, 5.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-99.625, -96.2587890625, -92.892578125, -89.5263671875, -86.16015625, -82.7939453125, -79.427734375, -76.0615234375, -72.6953125, -69.3291015625, -65.962890625, -62.5966796875, -59.23046875, -55.8642578125, -52.498046875, -49.1318359375, -45.765625, -42.3994140625, -39.033203125, -35.6669921875, -32.30078125, -28.9345703125, -25.568359375, -22.2021484375, -18.8359375, -15.4697265625, -12.103515625, -8.7373046875, -5.37109375, -2.0048828125, 1.361328125, 4.7275390625, 8.09375, 11.4599609375, 14.826171875, 18.1923828125, 21.55859375, 24.9248046875, 28.291015625, 31.6572265625, 35.0234375, 38.3896484375, 41.755859375, 45.1220703125, 48.48828125, 51.8544921875, 55.220703125, 58.5869140625, 61.953125, 65.3193359375, 68.685546875, 72.0517578125, 75.41796875, 78.7841796875, 82.150390625, 85.5166015625, 88.8828125, 92.2490234375, 95.615234375, 98.9814453125, 102.34765625, 105.7138671875, 109.080078125, 112.4462890625, 115.8125]}, "gradients/decoder.transformer.h.9.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 4.0, 7.0, 6.0, 12.0, 26.0, 41.0, 56.0, 88.0, 109.0, 126.0, 126.0, 89.0, 115.0, 85.0, 52.0, 34.0, 20.0, 13.0, 4.0, 3.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-34.07749938964844, -33.27366256713867, -32.46982192993164, -31.665983200073242, -30.862144470214844, -30.058307647705078, -29.25446891784668, -28.45063018798828, -27.646791458129883, -26.842952728271484, -26.039113998413086, -25.235275268554688, -24.431438446044922, -23.62759780883789, -22.823760986328125, -22.019922256469727, -21.216083526611328, -20.41224479675293, -19.60840606689453, -18.804567337036133, -18.000728607177734, -17.19689178466797, -16.39305305480957, -15.589214324951172, -14.785375595092773, -13.981536865234375, -13.177698135375977, -12.373860359191895, -11.570021629333496, -10.766182899475098, -9.962345123291016, -9.158506393432617, -8.354665756225586, -7.5508270263671875, -6.746988773345947, -5.943150520324707, -5.139311790466309, -4.33547306060791, -3.53163480758667, -2.7277965545654297, -1.9239578247070312, -1.120119333267212, -0.3162808418273926, 0.48755764961242676, 1.291396141052246, 2.0952348709106445, 2.8990731239318848, 3.702911376953125, 4.506750106811523, 5.310588836669922, 6.114427089691162, 6.918265342712402, 7.722104072570801, 8.5259428024292, 9.329780578613281, 10.13361930847168, 10.937458038330078, 11.741296768188477, 12.545135498046875, 13.348973274230957, 14.152812004089355, 14.956650733947754, 15.760488510131836, 16.564327239990234, 17.368165969848633]}, "gradients/decoder.transformer.h.9.ln_1.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 4.0, 3.0, 5.0, 9.0, 9.0, 12.0, 6.0, 13.0, 11.0, 12.0, 18.0, 15.0, 12.0, 20.0, 25.0, 29.0, 29.0, 26.0, 30.0, 30.0, 34.0, 38.0, 42.0, 37.0, 33.0, 36.0, 41.0, 47.0, 39.0, 29.0, 26.0, 35.0, 23.0, 31.0, 18.0, 16.0, 24.0, 18.0, 23.0, 15.0, 18.0, 15.0, 11.0, 10.0, 11.0, 8.0, 5.0, 8.0, 1.0, 0.0, 2.0, 1.0, 2.0, 0.0, 1.0, 2.0, 2.0], "bins": [-82.13314819335938, -79.55266571044922, -76.97217559814453, -74.39169311523438, -71.81120300292969, -69.23072052001953, -66.65023803710938, -64.06974792480469, -61.48926544189453, -58.90877914428711, -56.32829284667969, -53.74781036376953, -51.16732406616211, -48.58683776855469, -46.00635528564453, -43.42586898803711, -40.84538269042969, -38.264896392822266, -35.684410095214844, -33.10392761230469, -30.523441314697266, -27.942955017089844, -25.362470626831055, -22.781986236572266, -20.201499938964844, -17.621013641357422, -15.040529251098633, -12.460043907165527, -9.879558563232422, -7.299073219299316, -4.718587875366211, -2.138103485107422, 0.44237518310546875, 3.022860527038574, 5.60334587097168, 8.183831214904785, 10.76431655883789, 13.344801902770996, 15.925287246704102, 18.50577163696289, 21.086257934570312, 23.666744232177734, 26.247228622436523, 28.827713012695312, 31.408199310302734, 33.988685607910156, 36.56916809082031, 39.149654388427734, 41.730140686035156, 44.31062698364258, 46.89111328125, 49.471595764160156, 52.05208206176758, 54.632568359375, 57.213050842285156, 59.79353713989258, 62.3740234375, 64.95450592041016, 67.53499603271484, 70.115478515625, 72.69596862792969, 75.27645111083984, 77.85693359375, 80.43742370605469, 83.01790618896484]}, "gradients/decoder.transformer.h.8.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0, 2.0, 5.0, 4.0, 5.0, 8.0, 10.0, 6.0, 14.0, 17.0, 21.0, 22.0, 32.0, 37.0, 28.0, 39.0, 32.0, 39.0, 49.0, 44.0, 43.0, 53.0, 46.0, 47.0, 47.0, 35.0, 37.0, 42.0, 29.0, 20.0, 37.0, 26.0, 16.0, 18.0, 22.0, 17.0, 13.0, 4.0, 11.0, 10.0, 10.0, 6.0, 3.0, 2.0, 6.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-10.7890625, -10.432373046875, -10.07568359375, -9.718994140625, -9.3623046875, -9.005615234375, -8.64892578125, -8.292236328125, -7.935546875, -7.578857421875, -7.22216796875, -6.865478515625, -6.5087890625, -6.152099609375, -5.79541015625, -5.438720703125, -5.08203125, -4.725341796875, -4.36865234375, -4.011962890625, -3.6552734375, -3.298583984375, -2.94189453125, -2.585205078125, -2.228515625, -1.871826171875, -1.51513671875, -1.158447265625, -0.8017578125, -0.445068359375, -0.08837890625, 0.268310546875, 0.625, 0.981689453125, 1.33837890625, 1.695068359375, 2.0517578125, 2.408447265625, 2.76513671875, 3.121826171875, 3.478515625, 3.835205078125, 4.19189453125, 4.548583984375, 4.9052734375, 5.261962890625, 5.61865234375, 5.975341796875, 6.33203125, 6.688720703125, 7.04541015625, 7.402099609375, 7.7587890625, 8.115478515625, 8.47216796875, 8.828857421875, 9.185546875, 9.542236328125, 9.89892578125, 10.255615234375, 10.6123046875, 10.968994140625, 11.32568359375, 11.682373046875, 12.0390625]}, "gradients/decoder.transformer.h.8.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 3.0, 0.0, 0.0, 2.0, 0.0, 2.0, 2.0, 1.0, 4.0, 7.0, 10.0, 10.0, 16.0, 41.0, 34.0, 56.0, 83.0, 126.0, 180.0, 263.0, 454.0, 735.0, 1293.0, 2320.0, 4349.0, 9096.0, 20396.0, 50759.0, 144051.0, 420610.0, 971971.0, 1310910.0, 796083.0, 294379.0, 99122.0, 37028.0, 15059.0, 6960.0, 3395.0, 1745.0, 1007.0, 579.0, 371.0, 260.0, 160.0, 111.0, 78.0, 37.0, 49.0, 24.0, 20.0, 7.0, 14.0, 5.0, 5.0, 5.0, 4.0, 3.0, 1.0, 4.0, 2.0], "bins": [-11.4140625, -11.0816650390625, -10.749267578125, -10.4168701171875, -10.08447265625, -9.7520751953125, -9.419677734375, -9.0872802734375, -8.7548828125, -8.4224853515625, -8.090087890625, -7.7576904296875, -7.42529296875, -7.0928955078125, -6.760498046875, -6.4281005859375, -6.095703125, -5.7633056640625, -5.430908203125, -5.0985107421875, -4.76611328125, -4.4337158203125, -4.101318359375, -3.7689208984375, -3.4365234375, -3.1041259765625, -2.771728515625, -2.4393310546875, -2.10693359375, -1.7745361328125, -1.442138671875, -1.1097412109375, -0.77734375, -0.4449462890625, -0.112548828125, 0.2198486328125, 0.55224609375, 0.8846435546875, 1.217041015625, 1.5494384765625, 1.8818359375, 2.2142333984375, 2.546630859375, 2.8790283203125, 3.21142578125, 3.5438232421875, 3.876220703125, 4.2086181640625, 4.541015625, 4.8734130859375, 5.205810546875, 5.5382080078125, 5.87060546875, 6.2030029296875, 6.535400390625, 6.8677978515625, 7.2001953125, 7.5325927734375, 7.864990234375, 8.1973876953125, 8.52978515625, 8.8621826171875, 9.194580078125, 9.5269775390625, 9.859375]}, "gradients/decoder.transformer.h.8.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 3.0, 3.0, 3.0, 4.0, 1.0, 3.0, 6.0, 17.0, 11.0, 23.0, 40.0, 54.0, 69.0, 107.0, 123.0, 191.0, 244.0, 358.0, 440.0, 520.0, 448.0, 386.0, 275.0, 202.0, 150.0, 135.0, 84.0, 54.0, 44.0, 29.0, 24.0, 16.0, 4.0, 5.0, 5.0, 5.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-25.5625, -24.88525390625, -24.2080078125, -23.53076171875, -22.853515625, -22.17626953125, -21.4990234375, -20.82177734375, -20.14453125, -19.46728515625, -18.7900390625, -18.11279296875, -17.435546875, -16.75830078125, -16.0810546875, -15.40380859375, -14.7265625, -14.04931640625, -13.3720703125, -12.69482421875, -12.017578125, -11.34033203125, -10.6630859375, -9.98583984375, -9.30859375, -8.63134765625, -7.9541015625, -7.27685546875, -6.599609375, -5.92236328125, -5.2451171875, -4.56787109375, -3.890625, -3.21337890625, -2.5361328125, -1.85888671875, -1.181640625, -0.50439453125, 0.1728515625, 0.85009765625, 1.52734375, 2.20458984375, 2.8818359375, 3.55908203125, 4.236328125, 4.91357421875, 5.5908203125, 6.26806640625, 6.9453125, 7.62255859375, 8.2998046875, 8.97705078125, 9.654296875, 10.33154296875, 11.0087890625, 11.68603515625, 12.36328125, 13.04052734375, 13.7177734375, 14.39501953125, 15.072265625, 15.74951171875, 16.4267578125, 17.10400390625, 17.78125]}, "gradients/decoder.transformer.h.8.mlp.c_fc.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 5.0, 3.0, 6.0, 8.0, 14.0, 21.0, 16.0, 38.0, 68.0, 80.0, 127.0, 138.0, 187.0, 248.0, 325.0, 706.0, 2594.0, 170583.0, 3981655.0, 34053.0, 1577.0, 556.0, 374.0, 221.0, 180.0, 147.0, 111.0, 77.0, 40.0, 36.0, 37.0, 14.0, 16.0, 12.0, 2.0, 8.0, 6.0, 2.0, 3.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-81.5, -78.8154296875, -76.130859375, -73.4462890625, -70.76171875, -68.0771484375, -65.392578125, -62.7080078125, -60.0234375, -57.3388671875, -54.654296875, -51.9697265625, -49.28515625, -46.6005859375, -43.916015625, -41.2314453125, -38.546875, -35.8623046875, -33.177734375, -30.4931640625, -27.80859375, -25.1240234375, -22.439453125, -19.7548828125, -17.0703125, -14.3857421875, -11.701171875, -9.0166015625, -6.33203125, -3.6474609375, -0.962890625, 1.7216796875, 4.40625, 7.0908203125, 9.775390625, 12.4599609375, 15.14453125, 17.8291015625, 20.513671875, 23.1982421875, 25.8828125, 28.5673828125, 31.251953125, 33.9365234375, 36.62109375, 39.3056640625, 41.990234375, 44.6748046875, 47.359375, 50.0439453125, 52.728515625, 55.4130859375, 58.09765625, 60.7822265625, 63.466796875, 66.1513671875, 68.8359375, 71.5205078125, 74.205078125, 76.8896484375, 79.57421875, 82.2587890625, 84.943359375, 87.6279296875, 90.3125]}, "gradients/decoder.transformer.h.8.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 29.0, 241.0, 529.0, 199.0, 19.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-596.387451171875, -585.2305908203125, -574.07373046875, -562.9168701171875, -551.7600708007812, -540.6032104492188, -529.4463500976562, -518.2894897460938, -507.13262939453125, -495.97576904296875, -484.8189392089844, -473.6620788574219, -462.5052185058594, -451.348388671875, -440.1915283203125, -429.03466796875, -417.8778381347656, -406.7209777832031, -395.56414794921875, -384.40728759765625, -373.25042724609375, -362.09356689453125, -350.9367370605469, -339.7798767089844, -328.623046875, -317.4661865234375, -306.3093566894531, -295.1524963378906, -283.9956359863281, -272.83880615234375, -261.68194580078125, -250.52508544921875, -239.36819458007812, -228.2113494873047, -217.0544891357422, -205.89764404296875, -194.74078369140625, -183.5839385986328, -172.42709350585938, -161.27023315429688, -150.1134033203125, -138.95655822753906, -127.79969787597656, -116.64285278320312, -105.48600006103516, -94.32914733886719, -83.17230224609375, -72.01544952392578, -60.85858917236328, -49.70173645019531, -38.54488754272461, -27.388038635253906, -16.231185913085938, -5.074333190917969, 6.082511901855469, 17.239364624023438, 28.396217346191406, 39.553070068359375, 50.70991897583008, 61.86676788330078, 73.02362060546875, 84.18047332763672, 95.33731842041016, 106.49417114257812, 117.6510238647461]}, "gradients/decoder.transformer.h.8.ln_2.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 6.0, 5.0, 5.0, 13.0, 13.0, 12.0, 18.0, 19.0, 18.0, 29.0, 12.0, 22.0, 28.0, 40.0, 33.0, 34.0, 32.0, 44.0, 37.0, 38.0, 38.0, 48.0, 49.0, 31.0, 38.0, 37.0, 34.0, 39.0, 30.0, 33.0, 36.0, 26.0, 17.0, 10.0, 19.0, 7.0, 17.0, 11.0, 7.0, 8.0, 7.0, 2.0, 1.0, 0.0, 1.0, 0.0, 3.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-70.13406372070312, -68.04097747802734, -65.94789123535156, -63.854801177978516, -61.761714935302734, -59.66862487792969, -57.575538635253906, -55.482452392578125, -53.389366149902344, -51.29627990722656, -49.203189849853516, -47.110103607177734, -45.01701736450195, -42.923927307128906, -40.830841064453125, -38.737754821777344, -36.6446647644043, -34.551578521728516, -32.45848846435547, -30.365402221679688, -28.272315979003906, -26.179227828979492, -24.086139678955078, -21.993053436279297, -19.899965286254883, -17.80687713623047, -15.713790893554688, -13.620702743530273, -11.527615547180176, -9.434528350830078, -7.341440200805664, -5.248353004455566, -3.1552658081054688, -1.062178373336792, 1.0309090614318848, 3.1239967346191406, 5.217083930969238, 7.310171127319336, 9.40325927734375, 11.496346473693848, 13.589433670043945, 15.682520866394043, 17.77560806274414, 19.868696212768555, 21.96178436279297, 24.05487060546875, 26.147958755493164, 28.241046905517578, 30.33413314819336, 32.42721939086914, 34.52030944824219, 36.61339569091797, 38.70648193359375, 40.79956817626953, 42.89265823364258, 44.98574447631836, 47.078834533691406, 49.17192077636719, 51.265010833740234, 53.358097076416016, 55.4511833190918, 57.544273376464844, 59.637359619140625, 61.730445861816406, 63.82353210449219]}, "gradients/decoder.transformer.h.8.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 2.0, 2.0, 1.0, 3.0, 6.0, 9.0, 7.0, 12.0, 16.0, 22.0, 15.0, 26.0, 22.0, 36.0, 24.0, 32.0, 39.0, 33.0, 47.0, 44.0, 51.0, 42.0, 45.0, 51.0, 41.0, 56.0, 34.0, 42.0, 38.0, 33.0, 31.0, 20.0, 21.0, 14.0, 17.0, 7.0, 24.0, 12.0, 11.0, 6.0, 9.0, 6.0, 1.0, 4.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-10.3828125, -10.028564453125, -9.67431640625, -9.320068359375, -8.9658203125, -8.611572265625, -8.25732421875, -7.903076171875, -7.548828125, -7.194580078125, -6.84033203125, -6.486083984375, -6.1318359375, -5.777587890625, -5.42333984375, -5.069091796875, -4.71484375, -4.360595703125, -4.00634765625, -3.652099609375, -3.2978515625, -2.943603515625, -2.58935546875, -2.235107421875, -1.880859375, -1.526611328125, -1.17236328125, -0.818115234375, -0.4638671875, -0.109619140625, 0.24462890625, 0.598876953125, 0.953125, 1.307373046875, 1.66162109375, 2.015869140625, 2.3701171875, 2.724365234375, 3.07861328125, 3.432861328125, 3.787109375, 4.141357421875, 4.49560546875, 4.849853515625, 5.2041015625, 5.558349609375, 5.91259765625, 6.266845703125, 6.62109375, 6.975341796875, 7.32958984375, 7.683837890625, 8.0380859375, 8.392333984375, 8.74658203125, 9.100830078125, 9.455078125, 9.809326171875, 10.16357421875, 10.517822265625, 10.8720703125, 11.226318359375, 11.58056640625, 11.934814453125, 12.2890625]}, "gradients/decoder.transformer.h.8.crossattention.c_proj.weight": {"_type": "histogram", "values": [3.0, 1.0, 2.0, 4.0, 4.0, 6.0, 10.0, 11.0, 28.0, 35.0, 41.0, 60.0, 113.0, 136.0, 232.0, 323.0, 483.0, 736.0, 1160.0, 1719.0, 2601.0, 3908.0, 6001.0, 8915.0, 14006.0, 21739.0, 34029.0, 53211.0, 83041.0, 127094.0, 172639.0, 169631.0, 122752.0, 80216.0, 51193.0, 32910.0, 20671.0, 13412.0, 8743.0, 5702.0, 3714.0, 2388.0, 1605.0, 1101.0, 718.0, 535.0, 327.0, 201.0, 176.0, 93.0, 70.0, 40.0, 28.0, 20.0, 11.0, 8.0, 4.0, 6.0, 3.0, 4.0, 1.0, 0.0, 0.0, 2.0], "bins": [-0.853515625, -0.8259429931640625, -0.798370361328125, -0.7707977294921875, -0.74322509765625, -0.7156524658203125, -0.688079833984375, -0.6605072021484375, -0.6329345703125, -0.6053619384765625, -0.577789306640625, -0.5502166748046875, -0.52264404296875, -0.4950714111328125, -0.467498779296875, -0.4399261474609375, -0.412353515625, -0.3847808837890625, -0.357208251953125, -0.3296356201171875, -0.30206298828125, -0.2744903564453125, -0.246917724609375, -0.2193450927734375, -0.1917724609375, -0.1641998291015625, -0.136627197265625, -0.1090545654296875, -0.08148193359375, -0.0539093017578125, -0.026336669921875, 0.0012359619140625, 0.02880859375, 0.0563812255859375, 0.083953857421875, 0.1115264892578125, 0.13909912109375, 0.1666717529296875, 0.194244384765625, 0.2218170166015625, 0.2493896484375, 0.2769622802734375, 0.304534912109375, 0.3321075439453125, 0.35968017578125, 0.3872528076171875, 0.414825439453125, 0.4423980712890625, 0.469970703125, 0.4975433349609375, 0.525115966796875, 0.5526885986328125, 0.58026123046875, 0.6078338623046875, 0.635406494140625, 0.6629791259765625, 0.6905517578125, 0.7181243896484375, 0.745697021484375, 0.7732696533203125, 0.80084228515625, 0.8284149169921875, 0.855987548828125, 0.8835601806640625, 0.9111328125]}, "gradients/decoder.transformer.h.8.crossattention.c_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 2.0, 7.0, 9.0, 4.0, 4.0, 13.0, 15.0, 17.0, 14.0, 19.0, 14.0, 24.0, 26.0, 35.0, 21.0, 26.0, 44.0, 34.0, 36.0, 43.0, 31.0, 37.0, 42.0, 1063.0, 39.0, 45.0, 29.0, 34.0, 29.0, 26.0, 34.0, 25.0, 28.0, 30.0, 32.0, 15.0, 23.0, 11.0, 13.0, 12.0, 3.0, 8.0, 3.0, 4.0, 4.0, 5.0, 3.0, 4.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.19140625, -5.98565673828125, -5.7799072265625, -5.57415771484375, -5.368408203125, -5.16265869140625, -4.9569091796875, -4.75115966796875, -4.54541015625, -4.33966064453125, -4.1339111328125, -3.92816162109375, -3.722412109375, -3.51666259765625, -3.3109130859375, -3.10516357421875, -2.8994140625, -2.69366455078125, -2.4879150390625, -2.28216552734375, -2.076416015625, -1.87066650390625, -1.6649169921875, -1.45916748046875, -1.25341796875, -1.04766845703125, -0.8419189453125, -0.63616943359375, -0.430419921875, -0.22467041015625, -0.0189208984375, 0.18682861328125, 0.392578125, 0.59832763671875, 0.8040771484375, 1.00982666015625, 1.215576171875, 1.42132568359375, 1.6270751953125, 1.83282470703125, 2.03857421875, 2.24432373046875, 2.4500732421875, 2.65582275390625, 2.861572265625, 3.06732177734375, 3.2730712890625, 3.47882080078125, 3.6845703125, 3.89031982421875, 4.0960693359375, 4.30181884765625, 4.507568359375, 4.71331787109375, 4.9190673828125, 5.12481689453125, 5.33056640625, 5.53631591796875, 5.7420654296875, 5.94781494140625, 6.153564453125, 6.35931396484375, 6.5650634765625, 6.77081298828125, 6.9765625]}, "gradients/decoder.transformer.h.8.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 3.0, 6.0, 5.0, 6.0, 18.0, 26.0, 36.0, 41.0, 57.0, 102.0, 130.0, 208.0, 315.0, 435.0, 605.0, 911.0, 1413.0, 2055.0, 3040.0, 4672.0, 6751.0, 10236.0, 15731.0, 23237.0, 34790.0, 51641.0, 75302.0, 105388.0, 136438.0, 1194006.0, 127662.0, 95447.0, 66967.0, 45876.0, 31172.0, 20810.0, 13822.0, 9325.0, 6070.0, 4073.0, 2637.0, 1893.0, 1231.0, 856.0, 550.0, 401.0, 255.0, 157.0, 100.0, 83.0, 59.0, 27.0, 23.0, 21.0, 8.0, 5.0, 6.0, 2.0, 3.0, 1.0, 3.0], "bins": [-0.6279296875, -0.6085205078125, -0.589111328125, -0.5697021484375, -0.55029296875, -0.5308837890625, -0.511474609375, -0.4920654296875, -0.47265625, -0.4532470703125, -0.433837890625, -0.4144287109375, -0.39501953125, -0.3756103515625, -0.356201171875, -0.3367919921875, -0.3173828125, -0.2979736328125, -0.278564453125, -0.2591552734375, -0.23974609375, -0.2203369140625, -0.200927734375, -0.1815185546875, -0.162109375, -0.1427001953125, -0.123291015625, -0.1038818359375, -0.08447265625, -0.0650634765625, -0.045654296875, -0.0262451171875, -0.0068359375, 0.0125732421875, 0.031982421875, 0.0513916015625, 0.07080078125, 0.0902099609375, 0.109619140625, 0.1290283203125, 0.1484375, 0.1678466796875, 0.187255859375, 0.2066650390625, 0.22607421875, 0.2454833984375, 0.264892578125, 0.2843017578125, 0.3037109375, 0.3231201171875, 0.342529296875, 0.3619384765625, 0.38134765625, 0.4007568359375, 0.420166015625, 0.4395751953125, 0.458984375, 0.4783935546875, 0.497802734375, 0.5172119140625, 0.53662109375, 0.5560302734375, 0.575439453125, 0.5948486328125, 0.6142578125]}, "gradients/decoder.transformer.h.8.crossattention.q_attn.bias": {"_type": "histogram", "values": [2.0, 2.0, 0.0, 0.0, 2.0, 1.0, 1.0, 1.0, 0.0, 3.0, 1.0, 1.0, 4.0, 6.0, 11.0, 6.0, 12.0, 14.0, 7.0, 15.0, 15.0, 25.0, 31.0, 29.0, 45.0, 47.0, 41.0, 63.0, 57.0, 52.0, 59.0, 50.0, 63.0, 55.0, 46.0, 38.0, 38.0, 44.0, 31.0, 18.0, 18.0, 12.0, 12.0, 7.0, 8.0, 5.0, 4.0, 4.0, 4.0, 2.0, 2.0, 1.0, 1.0, 1.0, 2.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.005855560302734375, -0.005664348602294922, -0.005473136901855469, -0.005281925201416016, -0.0050907135009765625, -0.004899501800537109, -0.004708290100097656, -0.004517078399658203, -0.00432586669921875, -0.004134654998779297, -0.003943443298339844, -0.0037522315979003906, -0.0035610198974609375, -0.0033698081970214844, -0.0031785964965820312, -0.002987384796142578, -0.002796173095703125, -0.002604961395263672, -0.0024137496948242188, -0.0022225379943847656, -0.0020313262939453125, -0.0018401145935058594, -0.0016489028930664062, -0.0014576911926269531, -0.0012664794921875, -0.0010752677917480469, -0.0008840560913085938, -0.0006928443908691406, -0.0005016326904296875, -0.0003104209899902344, -0.00011920928955078125, 7.200241088867188e-05, 0.000263214111328125, 0.0004544258117675781, 0.0006456375122070312, 0.0008368492126464844, 0.0010280609130859375, 0.0012192726135253906, 0.0014104843139648438, 0.0016016960144042969, 0.00179290771484375, 0.001984119415283203, 0.0021753311157226562, 0.0023665428161621094, 0.0025577545166015625, 0.0027489662170410156, 0.0029401779174804688, 0.003131389617919922, 0.003322601318359375, 0.003513813018798828, 0.0037050247192382812, 0.0038962364196777344, 0.0040874481201171875, 0.004278659820556641, 0.004469871520996094, 0.004661083221435547, 0.004852294921875, 0.005043506622314453, 0.005234718322753906, 0.005425930023193359, 0.0056171417236328125, 0.005808353424072266, 0.005999565124511719, 0.006190776824951172, 0.006381988525390625]}, "gradients/decoder.transformer.h.8.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 3.0, 2.0, 2.0, 0.0, 1.0, 1.0, 2.0, 4.0, 9.0, 3.0, 4.0, 10.0, 7.0, 14.0, 22.0, 17.0, 21.0, 36.0, 41.0, 75.0, 82.0, 101.0, 164.0, 285.0, 740.0, 19583.0, 1015007.0, 10688.0, 730.0, 280.0, 152.0, 83.0, 105.0, 67.0, 49.0, 38.0, 25.0, 28.0, 10.0, 17.0, 15.0, 8.0, 9.0, 10.0, 3.0, 6.0, 2.0, 1.0, 4.0, 1.0, 1.0, 0.0, 2.0, 1.0, 0.0, 1.0, 0.0, 1.0, 2.0], "bins": [-0.132080078125, -0.12788009643554688, -0.12368011474609375, -0.11948013305664062, -0.1152801513671875, -0.11108016967773438, -0.10688018798828125, -0.10268020629882812, -0.098480224609375, -0.09428024291992188, -0.09008026123046875, -0.08588027954101562, -0.0816802978515625, -0.07748031616210938, -0.07328033447265625, -0.06908035278320312, -0.06488037109375, -0.060680389404296875, -0.05648040771484375, -0.052280426025390625, -0.0480804443359375, -0.043880462646484375, -0.03968048095703125, -0.035480499267578125, -0.031280517578125, -0.027080535888671875, -0.02288055419921875, -0.018680572509765625, -0.0144805908203125, -0.010280609130859375, -0.00608062744140625, -0.001880645751953125, 0.0023193359375, 0.006519317626953125, 0.01071929931640625, 0.014919281005859375, 0.0191192626953125, 0.023319244384765625, 0.02751922607421875, 0.031719207763671875, 0.035919189453125, 0.040119171142578125, 0.04431915283203125, 0.048519134521484375, 0.0527191162109375, 0.056919097900390625, 0.06111907958984375, 0.06531906127929688, 0.06951904296875, 0.07371902465820312, 0.07791900634765625, 0.08211898803710938, 0.0863189697265625, 0.09051895141601562, 0.09471893310546875, 0.09891891479492188, 0.103118896484375, 0.10731887817382812, 0.11151885986328125, 0.11571884155273438, 0.1199188232421875, 0.12411880493164062, 0.12831878662109375, 0.13251876831054688, 0.13671875]}, "gradients/decoder.transformer.h.8.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 1.0, 10.0, 417.0, 563.0, 24.0, 1.0, 1.0], "bins": [-0.07816892117261887, -0.0768674984574318, -0.07556608319282532, -0.07426466047763824, -0.07296324521303177, -0.0716618224978447, -0.07036040723323822, -0.06905898451805115, -0.06775756925344467, -0.0664561465382576, -0.06515473127365112, -0.06385330855846405, -0.06255189329385757, -0.0612504743039608, -0.059949055314064026, -0.05864763259887695, -0.05734621360898018, -0.056044794619083405, -0.05474337562918663, -0.053441956639289856, -0.05214053764939308, -0.05083911865949631, -0.04953769966959953, -0.04823628067970276, -0.046934857964515686, -0.04563343897461891, -0.04433201998472214, -0.04303060099482536, -0.04172918200492859, -0.040427763015031815, -0.03912634402513504, -0.03782492130994797, -0.03652350604534149, -0.03522208705544472, -0.03392066806554794, -0.03261924907565117, -0.031317830085754395, -0.03001641109585762, -0.028714990243315697, -0.027413571253418922, -0.026112154126167297, -0.024810735136270523, -0.02350931614637375, -0.022207897156476974, -0.0209064781665802, -0.019605059176683426, -0.018303638324141502, -0.017002219334244728, -0.015700798481702805, -0.01439937949180603, -0.013097960501909256, -0.011796540580689907, -0.010495121590793133, -0.009193702600896358, -0.00789228267967701, -0.006590863689780235, -0.005289445631206036, -0.003988026641309261, -0.0026866071857511997, -0.0013851879630237818, -8.376874029636383e-05, 0.0012176502496004105, 0.002519069705158472, 0.0038204891607165337, 0.005121908150613308]}, "gradients/decoder.transformer.h.8.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 3.0, 2.0, 2.0, 4.0, 10.0, 7.0, 13.0, 9.0, 8.0, 10.0, 16.0, 17.0, 18.0, 30.0, 18.0, 26.0, 35.0, 41.0, 37.0, 45.0, 48.0, 49.0, 40.0, 54.0, 45.0, 52.0, 48.0, 40.0, 41.0, 38.0, 36.0, 30.0, 30.0, 28.0, 12.0, 7.0, 16.0, 16.0, 4.0, 10.0, 6.0, 5.0, 5.0, 0.0, 2.0, 2.0, 2.0, 2.0], "bins": [-0.004800736904144287, -0.004676404409110546, -0.004552071914076805, -0.004427739419043064, -0.004303406924009323, -0.004179074428975582, -0.004054741933941841, -0.0039304094389081, -0.003806076943874359, -0.003681744448840618, -0.003557411953806877, -0.003433079458773136, -0.003308746963739395, -0.003184414468705654, -0.003060081973671913, -0.002935749478638172, -0.002811416983604431, -0.00268708448857069, -0.002562751993536949, -0.002438419498503208, -0.002314087003469467, -0.002189754508435726, -0.002065422013401985, -0.0019410895183682442, -0.0018167570233345032, -0.0016924245283007622, -0.0015680920332670212, -0.0014437595382332802, -0.0013194270431995392, -0.0011950945481657982, -0.0010707620531320572, -0.0009464295580983162, -0.0008220970630645752, -0.0006977645680308342, -0.0005734320729970932, -0.0004490995779633522, -0.0003247670829296112, -0.0002004345878958702, -7.610209286212921e-05, 4.8230402171611786e-05, 0.00017256289720535278, 0.0002968953922390938, 0.0004212278872728348, 0.0005455603823065758, 0.0006698928773403168, 0.0007942253723740578, 0.0009185578674077988, 0.0010428903624415398, 0.0011672228574752808, 0.0012915553525090218, 0.0014158878475427628, 0.0015402203425765038, 0.0016645528376102448, 0.0017888853326439857, 0.0019132178276777267, 0.0020375503227114677, 0.0021618828177452087, 0.0022862153127789497, 0.0024105478078126907, 0.0025348803028464317, 0.0026592127978801727, 0.0027835452929139137, 0.0029078777879476547, 0.0030322102829813957, 0.0031565427780151367]}, "gradients/decoder.transformer.h.8.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 2.0, 2.0, 1.0, 3.0, 6.0, 9.0, 7.0, 12.0, 16.0, 22.0, 15.0, 26.0, 22.0, 36.0, 24.0, 32.0, 39.0, 33.0, 47.0, 44.0, 51.0, 42.0, 46.0, 50.0, 41.0, 56.0, 34.0, 42.0, 38.0, 33.0, 31.0, 20.0, 21.0, 14.0, 17.0, 7.0, 24.0, 12.0, 11.0, 6.0, 9.0, 6.0, 1.0, 4.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-10.3828125, -10.028564453125, -9.67431640625, -9.320068359375, -8.9658203125, -8.611572265625, -8.25732421875, -7.903076171875, -7.548828125, -7.194580078125, -6.84033203125, -6.486083984375, -6.1318359375, -5.777587890625, -5.42333984375, -5.069091796875, -4.71484375, -4.360595703125, -4.00634765625, -3.652099609375, -3.2978515625, -2.943603515625, -2.58935546875, -2.235107421875, -1.880859375, -1.526611328125, -1.17236328125, -0.818115234375, -0.4638671875, -0.109619140625, 0.24462890625, 0.598876953125, 0.953125, 1.307373046875, 1.66162109375, 2.015869140625, 2.3701171875, 2.724365234375, 3.07861328125, 3.432861328125, 3.787109375, 4.141357421875, 4.49560546875, 4.849853515625, 5.2041015625, 5.558349609375, 5.91259765625, 6.266845703125, 6.62109375, 6.975341796875, 7.32958984375, 7.683837890625, 8.0380859375, 8.392333984375, 8.74658203125, 9.100830078125, 9.455078125, 9.809326171875, 10.16357421875, 10.517822265625, 10.8720703125, 11.226318359375, 11.58056640625, 11.934814453125, 12.2890625]}, "gradients/decoder.transformer.h.8.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 2.0, 3.0, 0.0, 1.0, 1.0, 6.0, 8.0, 15.0, 15.0, 14.0, 31.0, 40.0, 60.0, 106.0, 175.0, 367.0, 758.0, 1691.0, 4033.0, 9280.0, 21973.0, 55841.0, 167651.0, 454943.0, 217418.0, 68221.0, 26479.0, 10820.0, 4715.0, 2077.0, 875.0, 428.0, 190.0, 109.0, 66.0, 38.0, 34.0, 30.0, 13.0, 7.0, 11.0, 9.0, 8.0, 1.0, 4.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-10.3125, -9.964111328125, -9.61572265625, -9.267333984375, -8.9189453125, -8.570556640625, -8.22216796875, -7.873779296875, -7.525390625, -7.177001953125, -6.82861328125, -6.480224609375, -6.1318359375, -5.783447265625, -5.43505859375, -5.086669921875, -4.73828125, -4.389892578125, -4.04150390625, -3.693115234375, -3.3447265625, -2.996337890625, -2.64794921875, -2.299560546875, -1.951171875, -1.602783203125, -1.25439453125, -0.906005859375, -0.5576171875, -0.209228515625, 0.13916015625, 0.487548828125, 0.8359375, 1.184326171875, 1.53271484375, 1.881103515625, 2.2294921875, 2.577880859375, 2.92626953125, 3.274658203125, 3.623046875, 3.971435546875, 4.31982421875, 4.668212890625, 5.0166015625, 5.364990234375, 5.71337890625, 6.061767578125, 6.41015625, 6.758544921875, 7.10693359375, 7.455322265625, 7.8037109375, 8.152099609375, 8.50048828125, 8.848876953125, 9.197265625, 9.545654296875, 9.89404296875, 10.242431640625, 10.5908203125, 10.939208984375, 11.28759765625, 11.635986328125, 11.984375]}, "gradients/decoder.transformer.h.8.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 2.0, 3.0, 2.0, 5.0, 1.0, 7.0, 10.0, 10.0, 7.0, 10.0, 12.0, 23.0, 23.0, 22.0, 18.0, 21.0, 30.0, 45.0, 46.0, 49.0, 53.0, 67.0, 153.0, 1583.0, 318.0, 110.0, 60.0, 39.0, 34.0, 54.0, 31.0, 37.0, 24.0, 30.0, 20.0, 22.0, 9.0, 13.0, 14.0, 10.0, 13.0, 6.0, 3.0, 3.0, 4.0, 1.0, 2.0, 1.0, 0.0, 1.0, 1.0, 2.0, 2.0], "bins": [-35.8125, -34.783203125, -33.75390625, -32.724609375, -31.6953125, -30.666015625, -29.63671875, -28.607421875, -27.578125, -26.548828125, -25.51953125, -24.490234375, -23.4609375, -22.431640625, -21.40234375, -20.373046875, -19.34375, -18.314453125, -17.28515625, -16.255859375, -15.2265625, -14.197265625, -13.16796875, -12.138671875, -11.109375, -10.080078125, -9.05078125, -8.021484375, -6.9921875, -5.962890625, -4.93359375, -3.904296875, -2.875, -1.845703125, -0.81640625, 0.212890625, 1.2421875, 2.271484375, 3.30078125, 4.330078125, 5.359375, 6.388671875, 7.41796875, 8.447265625, 9.4765625, 10.505859375, 11.53515625, 12.564453125, 13.59375, 14.623046875, 15.65234375, 16.681640625, 17.7109375, 18.740234375, 19.76953125, 20.798828125, 21.828125, 22.857421875, 23.88671875, 24.916015625, 25.9453125, 26.974609375, 28.00390625, 29.033203125, 30.0625]}, "gradients/decoder.transformer.h.8.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 2.0, 1.0, 4.0, 0.0, 0.0, 2.0, 3.0, 7.0, 3.0, 8.0, 13.0, 15.0, 12.0, 19.0, 20.0, 26.0, 31.0, 51.0, 82.0, 101.0, 108.0, 164.0, 218.0, 322.0, 656.0, 4472.0, 313013.0, 2809294.0, 14724.0, 1013.0, 385.0, 262.0, 162.0, 119.0, 81.0, 73.0, 55.0, 50.0, 35.0, 18.0, 18.0, 15.0, 13.0, 16.0, 6.0, 6.0, 6.0, 6.0, 3.0, 4.0, 4.0, 0.0, 3.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-54.65625, -52.90283203125, -51.1494140625, -49.39599609375, -47.642578125, -45.88916015625, -44.1357421875, -42.38232421875, -40.62890625, -38.87548828125, -37.1220703125, -35.36865234375, -33.615234375, -31.86181640625, -30.1083984375, -28.35498046875, -26.6015625, -24.84814453125, -23.0947265625, -21.34130859375, -19.587890625, -17.83447265625, -16.0810546875, -14.32763671875, -12.57421875, -10.82080078125, -9.0673828125, -7.31396484375, -5.560546875, -3.80712890625, -2.0537109375, -0.30029296875, 1.453125, 3.20654296875, 4.9599609375, 6.71337890625, 8.466796875, 10.22021484375, 11.9736328125, 13.72705078125, 15.48046875, 17.23388671875, 18.9873046875, 20.74072265625, 22.494140625, 24.24755859375, 26.0009765625, 27.75439453125, 29.5078125, 31.26123046875, 33.0146484375, 34.76806640625, 36.521484375, 38.27490234375, 40.0283203125, 41.78173828125, 43.53515625, 45.28857421875, 47.0419921875, 48.79541015625, 50.548828125, 52.30224609375, 54.0556640625, 55.80908203125, 57.5625]}, "gradients/decoder.transformer.h.8.ln_1.weight": {"_type": "histogram", "values": [11.0, 207.0, 612.0, 180.0, 9.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-14.5772066116333, -8.724058151245117, -2.8709096908569336, 2.9822378158569336, 8.835387229919434, 14.688536643981934, 20.541683197021484, 26.394832611083984, 32.247982025146484, 38.101131439208984, 43.95427703857422, 49.80742645263672, 55.66057586669922, 61.51372528076172, 67.36686706542969, 73.22001647949219, 79.07316589355469, 84.92631530761719, 90.77946472167969, 96.63261413574219, 102.48576354980469, 108.33891296386719, 114.19205474853516, 120.04520416259766, 125.89835357666016, 131.75149536132812, 137.60464477539062, 143.45779418945312, 149.31094360351562, 155.16409301757812, 161.01724243164062, 166.87039184570312, 172.72354125976562, 178.57669067382812, 184.42984008789062, 190.28298950195312, 196.13613891601562, 201.98928833007812, 207.84243774414062, 213.69558715820312, 219.54873657226562, 225.40188598632812, 231.25503540039062, 237.10818481445312, 242.96133422851562, 248.81448364257812, 254.66763305664062, 260.5207824707031, 266.3739013671875, 272.22705078125, 278.0802001953125, 283.933349609375, 289.7864990234375, 295.6396484375, 301.4927978515625, 307.345947265625, 313.1990966796875, 319.05224609375, 324.9053955078125, 330.758544921875, 336.6116943359375, 342.46484375, 348.3179931640625, 354.171142578125, 360.0242919921875]}, "gradients/decoder.transformer.h.8.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 4.0, 0.0, 6.0, 4.0, 7.0, 10.0, 7.0, 12.0, 16.0, 15.0, 24.0, 14.0, 20.0, 24.0, 33.0, 33.0, 30.0, 36.0, 40.0, 34.0, 45.0, 37.0, 46.0, 45.0, 38.0, 56.0, 53.0, 23.0, 35.0, 29.0, 26.0, 23.0, 27.0, 26.0, 18.0, 26.0, 13.0, 12.0, 11.0, 9.0, 12.0, 12.0, 1.0, 8.0, 3.0, 5.0, 3.0, 4.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0], "bins": [-80.3714599609375, -77.92533874511719, -75.4792251586914, -73.0331039428711, -70.58698272705078, -68.140869140625, -65.69474792480469, -63.248626708984375, -60.80250930786133, -58.35639190673828, -55.91027069091797, -53.46415328979492, -51.018035888671875, -48.57191467285156, -46.125797271728516, -43.67967987060547, -41.233558654785156, -38.78744125366211, -36.3413200378418, -33.89520263671875, -31.44908332824707, -29.00296401977539, -26.556846618652344, -24.110727310180664, -21.664608001708984, -19.218488693237305, -16.772369384765625, -14.326251983642578, -11.880132675170898, -9.434013366699219, -6.9878950119018555, -4.541776657104492, -2.0956497192382812, 0.35046911239624023, 2.7965879440307617, 5.242706775665283, 7.688825607299805, 10.134944915771484, 12.581063270568848, 15.027181625366211, 17.47330093383789, 19.91942024230957, 22.36553955078125, 24.811656951904297, 27.257776260375977, 29.703895568847656, 32.1500129699707, 34.59613037109375, 37.04225158691406, 39.48836898803711, 41.93449020385742, 44.38060760498047, 46.82672882080078, 49.27284622192383, 51.718963623046875, 54.16508483886719, 56.611202239990234, 59.05731964111328, 61.503440856933594, 63.94955825805664, 66.39567565917969, 68.841796875, 71.28791809082031, 73.7340316772461, 76.1801528930664]}, "gradients/decoder.transformer.h.7.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 2.0, 1.0, 2.0, 3.0, 2.0, 5.0, 5.0, 2.0, 5.0, 13.0, 15.0, 20.0, 19.0, 21.0, 31.0, 24.0, 33.0, 25.0, 34.0, 41.0, 36.0, 51.0, 38.0, 49.0, 51.0, 41.0, 50.0, 50.0, 45.0, 37.0, 39.0, 37.0, 29.0, 19.0, 24.0, 20.0, 14.0, 16.0, 17.0, 11.0, 11.0, 7.0, 8.0, 8.0, 4.0, 2.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-10.515625, -10.1484375, -9.78125, -9.4140625, -9.046875, -8.6796875, -8.3125, -7.9453125, -7.578125, -7.2109375, -6.84375, -6.4765625, -6.109375, -5.7421875, -5.375, -5.0078125, -4.640625, -4.2734375, -3.90625, -3.5390625, -3.171875, -2.8046875, -2.4375, -2.0703125, -1.703125, -1.3359375, -0.96875, -0.6015625, -0.234375, 0.1328125, 0.5, 0.8671875, 1.234375, 1.6015625, 1.96875, 2.3359375, 2.703125, 3.0703125, 3.4375, 3.8046875, 4.171875, 4.5390625, 4.90625, 5.2734375, 5.640625, 6.0078125, 6.375, 6.7421875, 7.109375, 7.4765625, 7.84375, 8.2109375, 8.578125, 8.9453125, 9.3125, 9.6796875, 10.046875, 10.4140625, 10.78125, 11.1484375, 11.515625, 11.8828125, 12.25, 12.6171875, 12.984375]}, "gradients/decoder.transformer.h.7.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 3.0, 0.0, 2.0, 3.0, 4.0, 4.0, 6.0, 5.0, 9.0, 15.0, 15.0, 19.0, 24.0, 22.0, 25.0, 37.0, 38.0, 55.0, 73.0, 89.0, 161.0, 406.0, 1489.0, 9157.0, 86087.0, 1482364.0, 2428205.0, 167567.0, 14926.0, 2284.0, 521.0, 230.0, 96.0, 52.0, 55.0, 50.0, 38.0, 37.0, 19.0, 18.0, 12.0, 13.0, 11.0, 16.0, 8.0, 7.0, 7.0, 4.0, 5.0, 2.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-25.875, -24.986572265625, -24.09814453125, -23.209716796875, -22.3212890625, -21.432861328125, -20.54443359375, -19.656005859375, -18.767578125, -17.879150390625, -16.99072265625, -16.102294921875, -15.2138671875, -14.325439453125, -13.43701171875, -12.548583984375, -11.66015625, -10.771728515625, -9.88330078125, -8.994873046875, -8.1064453125, -7.218017578125, -6.32958984375, -5.441162109375, -4.552734375, -3.664306640625, -2.77587890625, -1.887451171875, -0.9990234375, -0.110595703125, 0.77783203125, 1.666259765625, 2.5546875, 3.443115234375, 4.33154296875, 5.219970703125, 6.1083984375, 6.996826171875, 7.88525390625, 8.773681640625, 9.662109375, 10.550537109375, 11.43896484375, 12.327392578125, 13.2158203125, 14.104248046875, 14.99267578125, 15.881103515625, 16.76953125, 17.657958984375, 18.54638671875, 19.434814453125, 20.3232421875, 21.211669921875, 22.10009765625, 22.988525390625, 23.876953125, 24.765380859375, 25.65380859375, 26.542236328125, 27.4306640625, 28.319091796875, 29.20751953125, 30.095947265625, 30.984375]}, "gradients/decoder.transformer.h.7.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 0.0, 2.0, 2.0, 0.0, 3.0, 8.0, 2.0, 9.0, 11.0, 15.0, 8.0, 27.0, 27.0, 50.0, 59.0, 71.0, 96.0, 148.0, 158.0, 234.0, 335.0, 443.0, 440.0, 431.0, 392.0, 280.0, 219.0, 146.0, 118.0, 103.0, 60.0, 38.0, 30.0, 29.0, 23.0, 15.0, 14.0, 7.0, 4.0, 10.0, 7.0, 2.0, 5.0, 2.0, 1.0, 1.0, 0.0, 4.0], "bins": [-21.8125, -21.2506103515625, -20.688720703125, -20.1268310546875, -19.56494140625, -19.0030517578125, -18.441162109375, -17.8792724609375, -17.3173828125, -16.7554931640625, -16.193603515625, -15.6317138671875, -15.06982421875, -14.5079345703125, -13.946044921875, -13.3841552734375, -12.822265625, -12.2603759765625, -11.698486328125, -11.1365966796875, -10.57470703125, -10.0128173828125, -9.450927734375, -8.8890380859375, -8.3271484375, -7.7652587890625, -7.203369140625, -6.6414794921875, -6.07958984375, -5.5177001953125, -4.955810546875, -4.3939208984375, -3.83203125, -3.2701416015625, -2.708251953125, -2.1463623046875, -1.58447265625, -1.0225830078125, -0.460693359375, 0.1011962890625, 0.6630859375, 1.2249755859375, 1.786865234375, 2.3487548828125, 2.91064453125, 3.4725341796875, 4.034423828125, 4.5963134765625, 5.158203125, 5.7200927734375, 6.281982421875, 6.8438720703125, 7.40576171875, 7.9676513671875, 8.529541015625, 9.0914306640625, 9.6533203125, 10.2152099609375, 10.777099609375, 11.3389892578125, 11.90087890625, 12.4627685546875, 13.024658203125, 13.5865478515625, 14.1484375]}, "gradients/decoder.transformer.h.7.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 4.0, 1.0, 1.0, 5.0, 3.0, 4.0, 10.0, 11.0, 21.0, 22.0, 14.0, 33.0, 38.0, 62.0, 78.0, 114.0, 167.0, 203.0, 303.0, 626.0, 2854.0, 235784.0, 3931361.0, 20020.0, 1137.0, 414.0, 247.0, 191.0, 136.0, 121.0, 88.0, 60.0, 48.0, 23.0, 22.0, 17.0, 13.0, 9.0, 8.0, 7.0, 3.0, 2.0, 4.0, 3.0, 3.0, 2.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-91.375, -88.4541015625, -85.533203125, -82.6123046875, -79.69140625, -76.7705078125, -73.849609375, -70.9287109375, -68.0078125, -65.0869140625, -62.166015625, -59.2451171875, -56.32421875, -53.4033203125, -50.482421875, -47.5615234375, -44.640625, -41.7197265625, -38.798828125, -35.8779296875, -32.95703125, -30.0361328125, -27.115234375, -24.1943359375, -21.2734375, -18.3525390625, -15.431640625, -12.5107421875, -9.58984375, -6.6689453125, -3.748046875, -0.8271484375, 2.09375, 5.0146484375, 7.935546875, 10.8564453125, 13.77734375, 16.6982421875, 19.619140625, 22.5400390625, 25.4609375, 28.3818359375, 31.302734375, 34.2236328125, 37.14453125, 40.0654296875, 42.986328125, 45.9072265625, 48.828125, 51.7490234375, 54.669921875, 57.5908203125, 60.51171875, 63.4326171875, 66.353515625, 69.2744140625, 72.1953125, 75.1162109375, 78.037109375, 80.9580078125, 83.87890625, 86.7998046875, 89.720703125, 92.6416015625, 95.5625]}, "gradients/decoder.transformer.h.7.ln_2.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 4.0, 6.0, 23.0, 80.0, 142.0, 225.0, 215.0, 173.0, 97.0, 36.0, 12.0, 2.0, 3.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-192.38648986816406, -188.0321044921875, -183.67770385742188, -179.32330322265625, -174.9689178466797, -170.61453247070312, -166.2601318359375, -161.90573120117188, -157.5513458251953, -153.19696044921875, -148.84255981445312, -144.4881591796875, -140.13377380371094, -135.77938842773438, -131.42498779296875, -127.07059478759766, -122.71620178222656, -118.36180877685547, -114.00741577148438, -109.65302276611328, -105.29862976074219, -100.9442367553711, -96.58984375, -92.2354507446289, -87.88105773925781, -83.52666473388672, -79.17227172851562, -74.81787872314453, -70.46348571777344, -66.10909271240234, -61.75469970703125, -57.400306701660156, -53.045921325683594, -48.6915283203125, -44.337135314941406, -39.98274230957031, -35.62834930419922, -31.273956298828125, -26.91956329345703, -22.565170288085938, -18.210777282714844, -13.85638427734375, -9.501991271972656, -5.1475982666015625, -0.7932052612304688, 3.561187744140625, 7.915580749511719, 12.269973754882812, 16.624366760253906, 20.978759765625, 25.333152770996094, 29.687545776367188, 34.04193878173828, 38.396331787109375, 42.75072479248047, 47.10511779785156, 51.459510803222656, 55.81390380859375, 60.168296813964844, 64.52268981933594, 68.87708282470703, 73.23147583007812, 77.58586883544922, 81.94026184082031, 86.2946548461914]}, "gradients/decoder.transformer.h.7.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 2.0, 2.0, 2.0, 3.0, 5.0, 4.0, 6.0, 9.0, 10.0, 13.0, 26.0, 16.0, 20.0, 25.0, 27.0, 25.0, 37.0, 51.0, 37.0, 30.0, 53.0, 43.0, 46.0, 54.0, 46.0, 44.0, 40.0, 47.0, 48.0, 26.0, 31.0, 34.0, 27.0, 22.0, 30.0, 11.0, 20.0, 11.0, 6.0, 7.0, 6.0, 3.0, 3.0, 2.0, 5.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-65.0369873046875, -62.9004020690918, -60.763816833496094, -58.62723159790039, -56.49064636230469, -54.35406494140625, -52.21747970581055, -50.080894470214844, -47.94430923461914, -45.80772399902344, -43.671138763427734, -41.53455352783203, -39.397972106933594, -37.261383056640625, -35.12480163574219, -32.988216400146484, -30.85163116455078, -28.715045928955078, -26.578460693359375, -24.441877365112305, -22.3052921295166, -20.1687068939209, -18.032123565673828, -15.895538330078125, -13.758953094482422, -11.622367858886719, -9.485783576965332, -7.349198818206787, -5.212614059448242, -3.076028823852539, -0.9394445419311523, 1.1971397399902344, 3.3337249755859375, 5.470309734344482, 7.606894493103027, 9.743478775024414, 11.880064010620117, 14.01664924621582, 16.15323257446289, 18.289817810058594, 20.426403045654297, 22.56298828125, 24.699573516845703, 26.836156845092773, 28.972742080688477, 31.10932731628418, 33.24591064453125, 35.38249588012695, 37.519081115722656, 39.65566635131836, 41.79225158691406, 43.928836822509766, 46.06542205810547, 48.202003479003906, 50.33858871459961, 52.47517395019531, 54.611759185791016, 56.74834442138672, 58.88492965698242, 61.021514892578125, 63.15809631347656, 65.29468536376953, 67.43126678466797, 69.56785583496094, 71.70443725585938]}, "gradients/decoder.transformer.h.7.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 3.0, 1.0, 2.0, 5.0, 5.0, 3.0, 10.0, 10.0, 10.0, 15.0, 17.0, 21.0, 14.0, 28.0, 17.0, 20.0, 34.0, 34.0, 31.0, 40.0, 45.0, 44.0, 41.0, 44.0, 50.0, 54.0, 49.0, 37.0, 40.0, 37.0, 30.0, 29.0, 25.0, 19.0, 26.0, 20.0, 16.0, 19.0, 12.0, 11.0, 12.0, 16.0, 6.0, 7.0, 3.0, 2.0, 3.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-10.796875, -10.450927734375, -10.10498046875, -9.759033203125, -9.4130859375, -9.067138671875, -8.72119140625, -8.375244140625, -8.029296875, -7.683349609375, -7.33740234375, -6.991455078125, -6.6455078125, -6.299560546875, -5.95361328125, -5.607666015625, -5.26171875, -4.915771484375, -4.56982421875, -4.223876953125, -3.8779296875, -3.531982421875, -3.18603515625, -2.840087890625, -2.494140625, -2.148193359375, -1.80224609375, -1.456298828125, -1.1103515625, -0.764404296875, -0.41845703125, -0.072509765625, 0.2734375, 0.619384765625, 0.96533203125, 1.311279296875, 1.6572265625, 2.003173828125, 2.34912109375, 2.695068359375, 3.041015625, 3.386962890625, 3.73291015625, 4.078857421875, 4.4248046875, 4.770751953125, 5.11669921875, 5.462646484375, 5.80859375, 6.154541015625, 6.50048828125, 6.846435546875, 7.1923828125, 7.538330078125, 7.88427734375, 8.230224609375, 8.576171875, 8.922119140625, 9.26806640625, 9.614013671875, 9.9599609375, 10.305908203125, 10.65185546875, 10.997802734375, 11.34375]}, "gradients/decoder.transformer.h.7.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 5.0, 7.0, 10.0, 8.0, 11.0, 22.0, 30.0, 47.0, 79.0, 112.0, 186.0, 283.0, 462.0, 794.0, 1344.0, 2089.0, 3665.0, 6129.0, 10575.0, 18362.0, 31950.0, 56574.0, 98994.0, 169599.0, 226509.0, 176898.0, 104859.0, 59091.0, 33882.0, 18948.0, 11087.0, 6489.0, 3813.0, 2197.0, 1357.0, 767.0, 455.0, 309.0, 223.0, 118.0, 94.0, 54.0, 28.0, 22.0, 12.0, 8.0, 3.0, 3.0, 2.0, 2.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.11328125, -1.0779876708984375, -1.042694091796875, -1.0074005126953125, -0.97210693359375, -0.9368133544921875, -0.901519775390625, -0.8662261962890625, -0.8309326171875, -0.7956390380859375, -0.760345458984375, -0.7250518798828125, -0.68975830078125, -0.6544647216796875, -0.619171142578125, -0.5838775634765625, -0.548583984375, -0.5132904052734375, -0.477996826171875, -0.4427032470703125, -0.40740966796875, -0.3721160888671875, -0.336822509765625, -0.3015289306640625, -0.2662353515625, -0.2309417724609375, -0.195648193359375, -0.1603546142578125, -0.12506103515625, -0.0897674560546875, -0.054473876953125, -0.0191802978515625, 0.01611328125, 0.0514068603515625, 0.086700439453125, 0.1219940185546875, 0.15728759765625, 0.1925811767578125, 0.227874755859375, 0.2631683349609375, 0.2984619140625, 0.3337554931640625, 0.369049072265625, 0.4043426513671875, 0.43963623046875, 0.4749298095703125, 0.510223388671875, 0.5455169677734375, 0.580810546875, 0.6161041259765625, 0.651397705078125, 0.6866912841796875, 0.72198486328125, 0.7572784423828125, 0.792572021484375, 0.8278656005859375, 0.8631591796875, 0.8984527587890625, 0.933746337890625, 0.9690399169921875, 1.00433349609375, 1.0396270751953125, 1.074920654296875, 1.1102142333984375, 1.1455078125]}, "gradients/decoder.transformer.h.7.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0, 3.0, 1.0, 4.0, 5.0, 3.0, 7.0, 9.0, 4.0, 7.0, 19.0, 10.0, 20.0, 21.0, 28.0, 22.0, 20.0, 33.0, 27.0, 37.0, 25.0, 41.0, 34.0, 31.0, 48.0, 1070.0, 43.0, 35.0, 50.0, 41.0, 38.0, 34.0, 30.0, 29.0, 27.0, 18.0, 23.0, 25.0, 22.0, 21.0, 14.0, 12.0, 12.0, 7.0, 6.0, 8.0, 4.0, 4.0, 1.0, 1.0, 0.0, 1.0, 3.0, 4.0, 1.0], "bins": [-7.14453125, -6.938720703125, -6.73291015625, -6.527099609375, -6.3212890625, -6.115478515625, -5.90966796875, -5.703857421875, -5.498046875, -5.292236328125, -5.08642578125, -4.880615234375, -4.6748046875, -4.468994140625, -4.26318359375, -4.057373046875, -3.8515625, -3.645751953125, -3.43994140625, -3.234130859375, -3.0283203125, -2.822509765625, -2.61669921875, -2.410888671875, -2.205078125, -1.999267578125, -1.79345703125, -1.587646484375, -1.3818359375, -1.176025390625, -0.97021484375, -0.764404296875, -0.55859375, -0.352783203125, -0.14697265625, 0.058837890625, 0.2646484375, 0.470458984375, 0.67626953125, 0.882080078125, 1.087890625, 1.293701171875, 1.49951171875, 1.705322265625, 1.9111328125, 2.116943359375, 2.32275390625, 2.528564453125, 2.734375, 2.940185546875, 3.14599609375, 3.351806640625, 3.5576171875, 3.763427734375, 3.96923828125, 4.175048828125, 4.380859375, 4.586669921875, 4.79248046875, 4.998291015625, 5.2041015625, 5.409912109375, 5.61572265625, 5.821533203125, 6.02734375]}, "gradients/decoder.transformer.h.7.crossattention.c_attn.weight": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 2.0, 1.0, 16.0, 6.0, 19.0, 13.0, 34.0, 57.0, 70.0, 128.0, 158.0, 233.0, 346.0, 501.0, 757.0, 1063.0, 1543.0, 2353.0, 3249.0, 5076.0, 7676.0, 10895.0, 16509.0, 24298.0, 36315.0, 53210.0, 77534.0, 107146.0, 138627.0, 1188907.0, 124182.0, 93256.0, 64958.0, 44599.0, 30450.0, 20434.0, 13821.0, 9322.0, 6309.0, 4279.0, 2820.0, 1943.0, 1264.0, 888.0, 590.0, 428.0, 287.0, 166.0, 137.0, 78.0, 60.0, 39.0, 31.0, 30.0, 14.0, 5.0, 6.0, 3.0, 1.0, 3.0, 3.0], "bins": [-0.60986328125, -0.5909652709960938, -0.5720672607421875, -0.5531692504882812, -0.534271240234375, -0.5153732299804688, -0.4964752197265625, -0.47757720947265625, -0.45867919921875, -0.43978118896484375, -0.4208831787109375, -0.40198516845703125, -0.383087158203125, -0.36418914794921875, -0.3452911376953125, -0.32639312744140625, -0.3074951171875, -0.28859710693359375, -0.2696990966796875, -0.25080108642578125, -0.231903076171875, -0.21300506591796875, -0.1941070556640625, -0.17520904541015625, -0.15631103515625, -0.13741302490234375, -0.1185150146484375, -0.09961700439453125, -0.080718994140625, -0.06182098388671875, -0.0429229736328125, -0.02402496337890625, -0.005126953125, 0.01377105712890625, 0.0326690673828125, 0.05156707763671875, 0.070465087890625, 0.08936309814453125, 0.1082611083984375, 0.12715911865234375, 0.14605712890625, 0.16495513916015625, 0.1838531494140625, 0.20275115966796875, 0.221649169921875, 0.24054718017578125, 0.2594451904296875, 0.27834320068359375, 0.2972412109375, 0.31613922119140625, 0.3350372314453125, 0.35393524169921875, 0.372833251953125, 0.39173126220703125, 0.4106292724609375, 0.42952728271484375, 0.44842529296875, 0.46732330322265625, 0.4862213134765625, 0.5051193237304688, 0.524017333984375, 0.5429153442382812, 0.5618133544921875, 0.5807113647460938, 0.599609375]}, "gradients/decoder.transformer.h.7.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 3.0, 0.0, 2.0, 2.0, 2.0, 0.0, 6.0, 1.0, 2.0, 8.0, 12.0, 10.0, 10.0, 11.0, 22.0, 14.0, 22.0, 30.0, 50.0, 45.0, 55.0, 58.0, 72.0, 75.0, 62.0, 71.0, 66.0, 49.0, 44.0, 44.0, 32.0, 22.0, 20.0, 16.0, 18.0, 18.0, 12.0, 6.0, 5.0, 2.0, 9.0, 4.0, 4.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00661468505859375, -0.006408393383026123, -0.006202101707458496, -0.005995810031890869, -0.005789518356323242, -0.005583226680755615, -0.005376935005187988, -0.005170643329620361, -0.004964351654052734, -0.004758059978485107, -0.0045517683029174805, -0.0043454766273498535, -0.0041391849517822266, -0.0039328932762146, -0.0037266016006469727, -0.0035203099250793457, -0.0033140182495117188, -0.003107726573944092, -0.002901434898376465, -0.002695143222808838, -0.002488851547241211, -0.002282559871673584, -0.002076268196105957, -0.00186997652053833, -0.0016636848449707031, -0.0014573931694030762, -0.0012511014938354492, -0.0010448098182678223, -0.0008385181427001953, -0.0006322264671325684, -0.0004259347915649414, -0.00021964311599731445, -1.33514404296875e-05, 0.00019294023513793945, 0.0003992319107055664, 0.0006055235862731934, 0.0008118152618408203, 0.0010181069374084473, 0.0012243986129760742, 0.0014306902885437012, 0.0016369819641113281, 0.001843273639678955, 0.002049565315246582, 0.002255856990814209, 0.002462148666381836, 0.002668440341949463, 0.00287473201751709, 0.003081023693084717, 0.0032873153686523438, 0.0034936070442199707, 0.0036998987197875977, 0.0039061903953552246, 0.0041124820709228516, 0.0043187737464904785, 0.0045250654220581055, 0.004731357097625732, 0.004937648773193359, 0.005143940448760986, 0.005350232124328613, 0.00555652379989624, 0.005762815475463867, 0.005969107151031494, 0.006175398826599121, 0.006381690502166748, 0.006587982177734375]}, "gradients/decoder.transformer.h.7.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 3.0, 3.0, 4.0, 10.0, 5.0, 5.0, 4.0, 10.0, 9.0, 11.0, 17.0, 21.0, 22.0, 25.0, 38.0, 42.0, 55.0, 77.0, 113.0, 165.0, 325.0, 881.0, 53041.0, 987222.0, 5113.0, 527.0, 249.0, 147.0, 105.0, 58.0, 58.0, 36.0, 26.0, 28.0, 27.0, 17.0, 13.0, 9.0, 7.0, 3.0, 10.0, 8.0, 4.0, 1.0, 2.0, 3.0, 3.0, 3.0, 3.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.1240234375, -0.11993026733398438, -0.11583709716796875, -0.11174392700195312, -0.1076507568359375, -0.10355758666992188, -0.09946441650390625, -0.09537124633789062, -0.091278076171875, -0.08718490600585938, -0.08309173583984375, -0.07899856567382812, -0.0749053955078125, -0.07081222534179688, -0.06671905517578125, -0.06262588500976562, -0.05853271484375, -0.054439544677734375, -0.05034637451171875, -0.046253204345703125, -0.0421600341796875, -0.038066864013671875, -0.03397369384765625, -0.029880523681640625, -0.025787353515625, -0.021694183349609375, -0.01760101318359375, -0.013507843017578125, -0.0094146728515625, -0.005321502685546875, -0.00122833251953125, 0.002864837646484375, 0.0069580078125, 0.011051177978515625, 0.01514434814453125, 0.019237518310546875, 0.0233306884765625, 0.027423858642578125, 0.03151702880859375, 0.035610198974609375, 0.039703369140625, 0.043796539306640625, 0.04788970947265625, 0.051982879638671875, 0.0560760498046875, 0.060169219970703125, 0.06426239013671875, 0.06835556030273438, 0.07244873046875, 0.07654190063476562, 0.08063507080078125, 0.08472824096679688, 0.0888214111328125, 0.09291458129882812, 0.09700775146484375, 0.10110092163085938, 0.105194091796875, 0.10928726196289062, 0.11338043212890625, 0.11747360229492188, 0.1215667724609375, 0.12565994262695312, 0.12975311279296875, 0.13384628295898438, 0.137939453125]}, "gradients/decoder.transformer.h.7.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 1.0, 0.0, 1.0, 4.0, 88.0, 680.0, 233.0, 9.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.035988617688417435, -0.03498046472668648, -0.03397230803966522, -0.032964155077934265, -0.03195600211620331, -0.030947847291827202, -0.029939692467451096, -0.02893153950572014, -0.027923384681344032, -0.026915229856967926, -0.02590707689523697, -0.024898922070860863, -0.023890767246484756, -0.0228826142847538, -0.021874459460377693, -0.020866304636001587, -0.01985815167427063, -0.018849996849894524, -0.017841843888163567, -0.01683368906378746, -0.015825536102056503, -0.014817381277680397, -0.01380922645330429, -0.012801072560250759, -0.011792918667197227, -0.010784764774143696, -0.009776610881090164, -0.008768456056714058, -0.007760302163660526, -0.006752148270606995, -0.005743993911892176, -0.004735839553177357, -0.003727683797478676, -0.0027195296715945005, -0.0017113755457103252, -0.0007032214198261499, 0.00030493270605802536, 0.001313086599111557, 0.002321240957826376, 0.003329395316541195, 0.0043375492095947266, 0.005345703102648258, 0.006353857461363077, 0.007362011820077896, 0.008370165713131428, 0.00937831960618496, 0.010386474430561066, 0.011394628323614597, 0.012402782216668129, 0.01341093610972166, 0.014419090002775192, 0.015427244827151299, 0.016435399651527405, 0.017443552613258362, 0.018451707437634468, 0.019459862262010574, 0.02046801522374153, 0.021476170048117638, 0.022484323009848595, 0.0234924778342247, 0.024500630795955658, 0.025508785620331764, 0.02651694044470787, 0.027525093406438828, 0.028533248230814934]}, "gradients/decoder.transformer.h.7.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 3.0, 2.0, 3.0, 0.0, 4.0, 4.0, 2.0, 6.0, 7.0, 10.0, 6.0, 11.0, 12.0, 11.0, 23.0, 13.0, 25.0, 20.0, 26.0, 18.0, 27.0, 29.0, 26.0, 39.0, 37.0, 23.0, 38.0, 36.0, 47.0, 35.0, 29.0, 44.0, 27.0, 27.0, 22.0, 47.0, 32.0, 25.0, 22.0, 25.0, 15.0, 18.0, 22.0, 22.0, 28.0, 14.0, 10.0, 6.0, 6.0, 8.0, 8.0, 6.0, 4.0, 2.0, 2.0, 5.0, 1.0, 1.0, 1.0], "bins": [-0.0030020475387573242, -0.0029143188148736954, -0.0028265900909900665, -0.0027388613671064377, -0.002651132643222809, -0.00256340391933918, -0.002475675195455551, -0.0023879464715719223, -0.0023002177476882935, -0.0022124890238046646, -0.0021247602999210358, -0.002037031576037407, -0.001949302852153778, -0.0018615741282701492, -0.0017738454043865204, -0.0016861166805028915, -0.0015983879566192627, -0.0015106592327356339, -0.001422930508852005, -0.0013352017849683762, -0.0012474730610847473, -0.0011597443372011185, -0.0010720156133174896, -0.0009842868894338608, -0.0008965581655502319, -0.0008088294416666031, -0.0007211007177829742, -0.0006333719938993454, -0.0005456432700157166, -0.0004579145461320877, -0.00037018582224845886, -0.00028245709836483, -0.00019472837448120117, -0.00010699965059757233, -1.927092671394348e-05, 6.845779716968536e-05, 0.0001561865210533142, 0.00024391524493694305, 0.0003316439688205719, 0.00041937269270420074, 0.0005071014165878296, 0.0005948301404714584, 0.0006825588643550873, 0.0007702875882387161, 0.000858016312122345, 0.0009457450360059738, 0.0010334737598896027, 0.0011212024837732315, 0.0012089312076568604, 0.0012966599315404892, 0.001384388655424118, 0.0014721173793077469, 0.0015598461031913757, 0.0016475748270750046, 0.0017353035509586334, 0.0018230322748422623, 0.0019107609987258911, 0.00199848972260952, 0.002086218446493149, 0.0021739471703767776, 0.0022616758942604065, 0.0023494046181440353, 0.002437133342027664, 0.002524862065911293, 0.002612590789794922]}, "gradients/decoder.transformer.h.7.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 3.0, 1.0, 2.0, 5.0, 5.0, 3.0, 10.0, 10.0, 10.0, 15.0, 17.0, 21.0, 14.0, 28.0, 17.0, 21.0, 33.0, 34.0, 31.0, 40.0, 45.0, 44.0, 41.0, 44.0, 50.0, 54.0, 49.0, 37.0, 40.0, 37.0, 30.0, 29.0, 25.0, 19.0, 26.0, 20.0, 16.0, 19.0, 12.0, 11.0, 12.0, 16.0, 6.0, 7.0, 3.0, 2.0, 3.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-10.796875, -10.450927734375, -10.10498046875, -9.759033203125, -9.4130859375, -9.067138671875, -8.72119140625, -8.375244140625, -8.029296875, -7.683349609375, -7.33740234375, -6.991455078125, -6.6455078125, -6.299560546875, -5.95361328125, -5.607666015625, -5.26171875, -4.915771484375, -4.56982421875, -4.223876953125, -3.8779296875, -3.531982421875, -3.18603515625, -2.840087890625, -2.494140625, -2.148193359375, -1.80224609375, -1.456298828125, -1.1103515625, -0.764404296875, -0.41845703125, -0.072509765625, 0.2734375, 0.619384765625, 0.96533203125, 1.311279296875, 1.6572265625, 2.003173828125, 2.34912109375, 2.695068359375, 3.041015625, 3.386962890625, 3.73291015625, 4.078857421875, 4.4248046875, 4.770751953125, 5.11669921875, 5.462646484375, 5.80859375, 6.154541015625, 6.50048828125, 6.846435546875, 7.1923828125, 7.538330078125, 7.88427734375, 8.230224609375, 8.576171875, 8.922119140625, 9.26806640625, 9.614013671875, 9.9599609375, 10.305908203125, 10.65185546875, 10.997802734375, 11.34375]}, "gradients/decoder.transformer.h.7.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 3.0, 1.0, 3.0, 6.0, 5.0, 6.0, 7.0, 12.0, 14.0, 17.0, 25.0, 33.0, 39.0, 51.0, 53.0, 74.0, 120.0, 204.0, 268.0, 535.0, 1431.0, 5978.0, 31393.0, 274150.0, 655872.0, 63441.0, 10709.0, 2332.0, 683.0, 367.0, 205.0, 145.0, 90.0, 61.0, 55.0, 38.0, 28.0, 28.0, 18.0, 13.0, 18.0, 15.0, 7.0, 7.0, 4.0, 2.0, 3.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-21.546875, -20.85595703125, -20.1650390625, -19.47412109375, -18.783203125, -18.09228515625, -17.4013671875, -16.71044921875, -16.01953125, -15.32861328125, -14.6376953125, -13.94677734375, -13.255859375, -12.56494140625, -11.8740234375, -11.18310546875, -10.4921875, -9.80126953125, -9.1103515625, -8.41943359375, -7.728515625, -7.03759765625, -6.3466796875, -5.65576171875, -4.96484375, -4.27392578125, -3.5830078125, -2.89208984375, -2.201171875, -1.51025390625, -0.8193359375, -0.12841796875, 0.5625, 1.25341796875, 1.9443359375, 2.63525390625, 3.326171875, 4.01708984375, 4.7080078125, 5.39892578125, 6.08984375, 6.78076171875, 7.4716796875, 8.16259765625, 8.853515625, 9.54443359375, 10.2353515625, 10.92626953125, 11.6171875, 12.30810546875, 12.9990234375, 13.68994140625, 14.380859375, 15.07177734375, 15.7626953125, 16.45361328125, 17.14453125, 17.83544921875, 18.5263671875, 19.21728515625, 19.908203125, 20.59912109375, 21.2900390625, 21.98095703125, 22.671875]}, "gradients/decoder.transformer.h.7.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 3.0, 1.0, 0.0, 3.0, 2.0, 6.0, 5.0, 5.0, 5.0, 7.0, 7.0, 10.0, 11.0, 12.0, 13.0, 16.0, 25.0, 29.0, 30.0, 39.0, 26.0, 43.0, 49.0, 65.0, 70.0, 137.0, 1608.0, 315.0, 100.0, 50.0, 52.0, 40.0, 36.0, 37.0, 31.0, 32.0, 19.0, 21.0, 15.0, 18.0, 20.0, 8.0, 5.0, 11.0, 11.0, 3.0, 2.0, 0.0, 0.0, 5.0, 3.0, 1.0, 0.0, 1.0, 3.0, 2.0], "bins": [-36.78125, -35.723388671875, -34.66552734375, -33.607666015625, -32.5498046875, -31.491943359375, -30.43408203125, -29.376220703125, -28.318359375, -27.260498046875, -26.20263671875, -25.144775390625, -24.0869140625, -23.029052734375, -21.97119140625, -20.913330078125, -19.85546875, -18.797607421875, -17.73974609375, -16.681884765625, -15.6240234375, -14.566162109375, -13.50830078125, -12.450439453125, -11.392578125, -10.334716796875, -9.27685546875, -8.218994140625, -7.1611328125, -6.103271484375, -5.04541015625, -3.987548828125, -2.9296875, -1.871826171875, -0.81396484375, 0.243896484375, 1.3017578125, 2.359619140625, 3.41748046875, 4.475341796875, 5.533203125, 6.591064453125, 7.64892578125, 8.706787109375, 9.7646484375, 10.822509765625, 11.88037109375, 12.938232421875, 13.99609375, 15.053955078125, 16.11181640625, 17.169677734375, 18.2275390625, 19.285400390625, 20.34326171875, 21.401123046875, 22.458984375, 23.516845703125, 24.57470703125, 25.632568359375, 26.6904296875, 27.748291015625, 28.80615234375, 29.864013671875, 30.921875]}, "gradients/decoder.transformer.h.7.attn.c_attn.weight": {"_type": "histogram", "values": [2.0, 1.0, 4.0, 0.0, 2.0, 5.0, 4.0, 1.0, 0.0, 1.0, 3.0, 11.0, 9.0, 12.0, 5.0, 16.0, 23.0, 18.0, 25.0, 33.0, 49.0, 49.0, 83.0, 113.0, 162.0, 268.0, 653.0, 3575.0, 1280845.0, 1854245.0, 3944.0, 661.0, 304.0, 148.0, 93.0, 75.0, 70.0, 40.0, 28.0, 31.0, 19.0, 16.0, 12.0, 10.0, 12.0, 11.0, 4.0, 7.0, 7.0, 3.0, 5.0, 1.0, 2.0, 1.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-71.0, -68.552734375, -66.10546875, -63.658203125, -61.2109375, -58.763671875, -56.31640625, -53.869140625, -51.421875, -48.974609375, -46.52734375, -44.080078125, -41.6328125, -39.185546875, -36.73828125, -34.291015625, -31.84375, -29.396484375, -26.94921875, -24.501953125, -22.0546875, -19.607421875, -17.16015625, -14.712890625, -12.265625, -9.818359375, -7.37109375, -4.923828125, -2.4765625, -0.029296875, 2.41796875, 4.865234375, 7.3125, 9.759765625, 12.20703125, 14.654296875, 17.1015625, 19.548828125, 21.99609375, 24.443359375, 26.890625, 29.337890625, 31.78515625, 34.232421875, 36.6796875, 39.126953125, 41.57421875, 44.021484375, 46.46875, 48.916015625, 51.36328125, 53.810546875, 56.2578125, 58.705078125, 61.15234375, 63.599609375, 66.046875, 68.494140625, 70.94140625, 73.388671875, 75.8359375, 78.283203125, 80.73046875, 83.177734375, 85.625]}, "gradients/decoder.transformer.h.7.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 4.0, 4.0, 45.0, 299.0, 465.0, 178.0, 23.0], "bins": [-242.27796936035156, -238.32241821289062, -234.36688232421875, -230.4113311767578, -226.45578002929688, -222.50022888183594, -218.54469299316406, -214.58914184570312, -210.6335906982422, -206.67803955078125, -202.72250366210938, -198.76695251464844, -194.8114013671875, -190.85585021972656, -186.9003143310547, -182.94476318359375, -178.98922729492188, -175.03367614746094, -171.07814025878906, -167.12258911132812, -163.1670379638672, -159.21148681640625, -155.25595092773438, -151.30039978027344, -147.3448486328125, -143.38929748535156, -139.4337615966797, -135.47821044921875, -131.5226593017578, -127.5671157836914, -123.611572265625, -119.65602111816406, -115.70047760009766, -111.74493408203125, -107.78938293457031, -103.8338394165039, -99.87828826904297, -95.92274475097656, -91.96719360351562, -88.01165008544922, -84.05610656738281, -80.1005630493164, -76.14501190185547, -72.18946838378906, -68.23391723632812, -64.27837371826172, -60.32282638549805, -56.367279052734375, -52.41172790527344, -48.456180572509766, -44.500633239746094, -40.54508972167969, -36.58953857421875, -32.633995056152344, -28.678447723388672, -24.722900390625, -20.767353057861328, -16.811805725097656, -12.8562593460083, -8.900712966918945, -4.945165634155273, -0.9896183013916016, 2.9659271240234375, 6.921474456787109, 10.877021789550781]}, "gradients/decoder.transformer.h.7.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 3.0, 5.0, 3.0, 6.0, 5.0, 5.0, 10.0, 12.0, 12.0, 8.0, 15.0, 18.0, 23.0, 23.0, 21.0, 29.0, 28.0, 29.0, 32.0, 35.0, 32.0, 35.0, 38.0, 39.0, 46.0, 47.0, 37.0, 37.0, 42.0, 44.0, 30.0, 36.0, 29.0, 24.0, 28.0, 17.0, 22.0, 21.0, 17.0, 19.0, 11.0, 7.0, 2.0, 6.0, 7.0, 5.0, 8.0, 4.0, 3.0, 3.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-75.22042083740234, -72.49496459960938, -69.76951599121094, -67.04405975341797, -64.318603515625, -61.5931510925293, -58.867698669433594, -56.142242431640625, -53.41679000854492, -50.69133758544922, -47.96588134765625, -45.24042892456055, -42.514976501464844, -39.789520263671875, -37.06406784057617, -34.33861541748047, -31.6131591796875, -28.887704849243164, -26.162250518798828, -23.436798095703125, -20.71134376525879, -17.985889434814453, -15.26043701171875, -12.534982681274414, -9.809528350830078, -7.0840744972229, -4.358620643615723, -1.6331672668457031, 1.0922870635986328, 3.8177413940429688, 6.543193817138672, 9.268648147583008, 11.994094848632812, 14.719549179077148, 17.445003509521484, 20.170455932617188, 22.895910263061523, 25.62136459350586, 28.346817016601562, 31.0722713470459, 33.797725677490234, 36.52317810058594, 39.248634338378906, 41.97408676147461, 44.69953918457031, 47.42499542236328, 50.150447845458984, 52.87590026855469, 55.601356506347656, 58.32680892944336, 61.05226516723633, 63.77771759033203, 66.503173828125, 69.22862243652344, 71.9540786743164, 74.67953491210938, 77.40498352050781, 80.13043975830078, 82.85588836669922, 85.58134460449219, 88.30680084228516, 91.03225708007812, 93.75770568847656, 96.48316192626953, 99.2086181640625]}, "gradients/decoder.transformer.h.6.mlp.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 2.0, 1.0, 2.0, 7.0, 3.0, 4.0, 11.0, 6.0, 7.0, 11.0, 16.0, 22.0, 10.0, 21.0, 21.0, 19.0, 31.0, 31.0, 23.0, 27.0, 37.0, 32.0, 53.0, 39.0, 50.0, 39.0, 50.0, 40.0, 41.0, 41.0, 40.0, 30.0, 27.0, 25.0, 23.0, 28.0, 16.0, 18.0, 16.0, 14.0, 18.0, 15.0, 9.0, 15.0, 8.0, 5.0, 3.0, 3.0, 3.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0], "bins": [-10.84375, -10.5103759765625, -10.177001953125, -9.8436279296875, -9.51025390625, -9.1768798828125, -8.843505859375, -8.5101318359375, -8.1767578125, -7.8433837890625, -7.510009765625, -7.1766357421875, -6.84326171875, -6.5098876953125, -6.176513671875, -5.8431396484375, -5.509765625, -5.1763916015625, -4.843017578125, -4.5096435546875, -4.17626953125, -3.8428955078125, -3.509521484375, -3.1761474609375, -2.8427734375, -2.5093994140625, -2.176025390625, -1.8426513671875, -1.50927734375, -1.1759033203125, -0.842529296875, -0.5091552734375, -0.17578125, 0.1575927734375, 0.490966796875, 0.8243408203125, 1.15771484375, 1.4910888671875, 1.824462890625, 2.1578369140625, 2.4912109375, 2.8245849609375, 3.157958984375, 3.4913330078125, 3.82470703125, 4.1580810546875, 4.491455078125, 4.8248291015625, 5.158203125, 5.4915771484375, 5.824951171875, 6.1583251953125, 6.49169921875, 6.8250732421875, 7.158447265625, 7.4918212890625, 7.8251953125, 8.1585693359375, 8.491943359375, 8.8253173828125, 9.15869140625, 9.4920654296875, 9.825439453125, 10.1588134765625, 10.4921875]}, "gradients/decoder.transformer.h.6.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 4.0, 2.0, 2.0, 3.0, 6.0, 3.0, 9.0, 11.0, 8.0, 11.0, 10.0, 10.0, 15.0, 12.0, 12.0, 31.0, 31.0, 36.0, 46.0, 57.0, 96.0, 171.0, 362.0, 830.0, 2822.0, 12230.0, 80965.0, 965423.0, 2655609.0, 424389.0, 40802.0, 7178.0, 1744.0, 578.0, 258.0, 161.0, 78.0, 61.0, 46.0, 27.0, 8.0, 17.0, 15.0, 18.0, 14.0, 13.0, 10.0, 11.0, 11.0, 10.0, 4.0, 7.0, 5.0, 4.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0], "bins": [-22.359375, -21.59716796875, -20.8349609375, -20.07275390625, -19.310546875, -18.54833984375, -17.7861328125, -17.02392578125, -16.26171875, -15.49951171875, -14.7373046875, -13.97509765625, -13.212890625, -12.45068359375, -11.6884765625, -10.92626953125, -10.1640625, -9.40185546875, -8.6396484375, -7.87744140625, -7.115234375, -6.35302734375, -5.5908203125, -4.82861328125, -4.06640625, -3.30419921875, -2.5419921875, -1.77978515625, -1.017578125, -0.25537109375, 0.5068359375, 1.26904296875, 2.03125, 2.79345703125, 3.5556640625, 4.31787109375, 5.080078125, 5.84228515625, 6.6044921875, 7.36669921875, 8.12890625, 8.89111328125, 9.6533203125, 10.41552734375, 11.177734375, 11.93994140625, 12.7021484375, 13.46435546875, 14.2265625, 14.98876953125, 15.7509765625, 16.51318359375, 17.275390625, 18.03759765625, 18.7998046875, 19.56201171875, 20.32421875, 21.08642578125, 21.8486328125, 22.61083984375, 23.373046875, 24.13525390625, 24.8974609375, 25.65966796875, 26.421875]}, "gradients/decoder.transformer.h.6.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 3.0, 0.0, 2.0, 1.0, 5.0, 8.0, 6.0, 8.0, 10.0, 10.0, 14.0, 24.0, 24.0, 30.0, 73.0, 59.0, 80.0, 98.0, 136.0, 140.0, 209.0, 277.0, 376.0, 386.0, 421.0, 379.0, 316.0, 250.0, 183.0, 145.0, 106.0, 64.0, 68.0, 37.0, 33.0, 29.0, 16.0, 14.0, 8.0, 15.0, 9.0, 7.0, 2.0, 3.0, 3.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0], "bins": [-17.984375, -17.486328125, -16.98828125, -16.490234375, -15.9921875, -15.494140625, -14.99609375, -14.498046875, -14.0, -13.501953125, -13.00390625, -12.505859375, -12.0078125, -11.509765625, -11.01171875, -10.513671875, -10.015625, -9.517578125, -9.01953125, -8.521484375, -8.0234375, -7.525390625, -7.02734375, -6.529296875, -6.03125, -5.533203125, -5.03515625, -4.537109375, -4.0390625, -3.541015625, -3.04296875, -2.544921875, -2.046875, -1.548828125, -1.05078125, -0.552734375, -0.0546875, 0.443359375, 0.94140625, 1.439453125, 1.9375, 2.435546875, 2.93359375, 3.431640625, 3.9296875, 4.427734375, 4.92578125, 5.423828125, 5.921875, 6.419921875, 6.91796875, 7.416015625, 7.9140625, 8.412109375, 8.91015625, 9.408203125, 9.90625, 10.404296875, 10.90234375, 11.400390625, 11.8984375, 12.396484375, 12.89453125, 13.392578125, 13.890625]}, "gradients/decoder.transformer.h.6.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0, 0.0, 1.0, 1.0, 4.0, 5.0, 7.0, 8.0, 12.0, 15.0, 24.0, 28.0, 44.0, 51.0, 56.0, 84.0, 136.0, 140.0, 261.0, 465.0, 1225.0, 11345.0, 1659784.0, 2503227.0, 14684.0, 1240.0, 462.0, 259.0, 187.0, 136.0, 97.0, 68.0, 54.0, 44.0, 26.0, 29.0, 22.0, 12.0, 15.0, 9.0, 5.0, 4.0, 5.0, 7.0, 3.0, 1.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0], "bins": [-70.4375, -68.16796875, -65.8984375, -63.62890625, -61.359375, -59.08984375, -56.8203125, -54.55078125, -52.28125, -50.01171875, -47.7421875, -45.47265625, -43.203125, -40.93359375, -38.6640625, -36.39453125, -34.125, -31.85546875, -29.5859375, -27.31640625, -25.046875, -22.77734375, -20.5078125, -18.23828125, -15.96875, -13.69921875, -11.4296875, -9.16015625, -6.890625, -4.62109375, -2.3515625, -0.08203125, 2.1875, 4.45703125, 6.7265625, 8.99609375, 11.265625, 13.53515625, 15.8046875, 18.07421875, 20.34375, 22.61328125, 24.8828125, 27.15234375, 29.421875, 31.69140625, 33.9609375, 36.23046875, 38.5, 40.76953125, 43.0390625, 45.30859375, 47.578125, 49.84765625, 52.1171875, 54.38671875, 56.65625, 58.92578125, 61.1953125, 63.46484375, 65.734375, 68.00390625, 70.2734375, 72.54296875, 74.8125]}, "gradients/decoder.transformer.h.6.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 3.0, 56.0, 414.0, 454.0, 87.0, 1.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-488.49615478515625, -477.9068603515625, -467.31756591796875, -456.728271484375, -446.13897705078125, -435.5496826171875, -424.96038818359375, -414.3711242675781, -403.7818298339844, -393.1925354003906, -382.6032409667969, -372.0139465332031, -361.4246520996094, -350.83538818359375, -340.24609375, -329.65679931640625, -319.0675048828125, -308.47821044921875, -297.888916015625, -287.29962158203125, -276.7103271484375, -266.12103271484375, -255.53175354003906, -244.94247436523438, -234.3531494140625, -223.76385498046875, -213.174560546875, -202.58526611328125, -191.99598693847656, -181.4066925048828, -170.81739807128906, -160.22811889648438, -149.63880920410156, -139.0495147705078, -128.46022033691406, -117.87093353271484, -107.28164672851562, -96.69235229492188, -86.10305786132812, -75.5137710571289, -64.92447662353516, -54.33518600463867, -43.74589538574219, -33.15660095214844, -22.567310333251953, -11.978019714355469, -1.3887252807617188, 9.2005615234375, 19.78985595703125, 30.379146575927734, 40.96843719482422, 51.55773162841797, 62.14702224731445, 72.73631286621094, 83.32560729980469, 93.9148941040039, 104.50418853759766, 115.0934829711914, 125.68276977539062, 136.27206420898438, 146.86135864257812, 157.45065307617188, 168.03994750976562, 178.6292266845703, 189.21852111816406]}, "gradients/decoder.transformer.h.6.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 3.0, 2.0, 4.0, 2.0, 1.0, 5.0, 5.0, 5.0, 5.0, 6.0, 9.0, 12.0, 9.0, 18.0, 11.0, 14.0, 20.0, 23.0, 24.0, 26.0, 26.0, 27.0, 31.0, 27.0, 39.0, 42.0, 35.0, 37.0, 43.0, 36.0, 32.0, 30.0, 32.0, 36.0, 43.0, 29.0, 31.0, 27.0, 25.0, 23.0, 24.0, 22.0, 22.0, 14.0, 13.0, 19.0, 12.0, 9.0, 9.0, 5.0, 5.0, 3.0, 1.0, 1.0, 1.0, 0.0, 2.0, 1.0, 2.0, 1.0], "bins": [-56.32893753051758, -54.6518440246582, -52.97475051879883, -51.29765319824219, -49.62055969238281, -47.94346618652344, -46.26637268066406, -44.58927917480469, -42.91218566894531, -41.23509216308594, -39.55799865722656, -37.88090515136719, -36.20380783081055, -34.52671432495117, -32.8496208190918, -31.172527313232422, -29.49542999267578, -27.818336486816406, -26.1412410736084, -24.464147567749023, -22.787052154541016, -21.10995864868164, -19.432865142822266, -17.75577163696289, -16.078676223754883, -14.401581764221191, -12.7244873046875, -11.047393798828125, -9.370299339294434, -7.693204879760742, -6.016111373901367, -4.339016914367676, -2.6619186401367188, -0.9848244190216064, 0.6922698020935059, 2.369363784790039, 4.0464582443237305, 5.723552703857422, 7.400646209716797, 9.077740669250488, 10.75483512878418, 12.431929588317871, 14.109024047851562, 15.786117553710938, 17.463211059570312, 19.14030647277832, 20.817399978637695, 22.494495391845703, 24.171588897705078, 25.848682403564453, 27.52577781677246, 29.202871322631836, 30.879966735839844, 32.55706024169922, 34.234153747558594, 35.91124725341797, 37.588340759277344, 39.26543426513672, 40.942527770996094, 42.61962127685547, 44.29671859741211, 45.973812103271484, 47.65090560913086, 49.327999114990234, 51.005096435546875]}, "gradients/decoder.transformer.h.6.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 3.0, 4.0, 8.0, 5.0, 3.0, 5.0, 8.0, 16.0, 6.0, 20.0, 17.0, 31.0, 14.0, 28.0, 40.0, 28.0, 31.0, 41.0, 33.0, 54.0, 50.0, 55.0, 54.0, 46.0, 35.0, 41.0, 58.0, 29.0, 40.0, 30.0, 21.0, 26.0, 20.0, 18.0, 28.0, 10.0, 15.0, 10.0, 8.0, 8.0, 6.0, 3.0, 3.0, 4.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-12.2734375, -11.892333984375, -11.51123046875, -11.130126953125, -10.7490234375, -10.367919921875, -9.98681640625, -9.605712890625, -9.224609375, -8.843505859375, -8.46240234375, -8.081298828125, -7.7001953125, -7.319091796875, -6.93798828125, -6.556884765625, -6.17578125, -5.794677734375, -5.41357421875, -5.032470703125, -4.6513671875, -4.270263671875, -3.88916015625, -3.508056640625, -3.126953125, -2.745849609375, -2.36474609375, -1.983642578125, -1.6025390625, -1.221435546875, -0.84033203125, -0.459228515625, -0.078125, 0.302978515625, 0.68408203125, 1.065185546875, 1.4462890625, 1.827392578125, 2.20849609375, 2.589599609375, 2.970703125, 3.351806640625, 3.73291015625, 4.114013671875, 4.4951171875, 4.876220703125, 5.25732421875, 5.638427734375, 6.01953125, 6.400634765625, 6.78173828125, 7.162841796875, 7.5439453125, 7.925048828125, 8.30615234375, 8.687255859375, 9.068359375, 9.449462890625, 9.83056640625, 10.211669921875, 10.5927734375, 10.973876953125, 11.35498046875, 11.736083984375, 12.1171875]}, "gradients/decoder.transformer.h.6.crossattention.c_proj.weight": {"_type": "histogram", "values": [3.0, 3.0, 1.0, 4.0, 4.0, 2.0, 5.0, 8.0, 21.0, 8.0, 33.0, 27.0, 53.0, 85.0, 141.0, 167.0, 306.0, 437.0, 617.0, 1029.0, 1480.0, 2315.0, 3522.0, 5573.0, 8629.0, 13435.0, 21766.0, 33993.0, 54627.0, 86496.0, 135225.0, 180931.0, 171300.0, 119684.0, 75993.0, 48224.0, 29891.0, 18909.0, 11861.0, 7726.0, 4960.0, 3164.0, 2039.0, 1227.0, 879.0, 592.0, 386.0, 249.0, 194.0, 116.0, 70.0, 46.0, 42.0, 21.0, 15.0, 14.0, 8.0, 6.0, 7.0, 0.0, 4.0, 1.0, 0.0, 2.0], "bins": [-0.947265625, -0.9175338745117188, -0.8878021240234375, -0.8580703735351562, -0.828338623046875, -0.7986068725585938, -0.7688751220703125, -0.7391433715820312, -0.70941162109375, -0.6796798706054688, -0.6499481201171875, -0.6202163696289062, -0.590484619140625, -0.5607528686523438, -0.5310211181640625, -0.5012893676757812, -0.4715576171875, -0.44182586669921875, -0.4120941162109375, -0.38236236572265625, -0.352630615234375, -0.32289886474609375, -0.2931671142578125, -0.26343536376953125, -0.23370361328125, -0.20397186279296875, -0.1742401123046875, -0.14450836181640625, -0.114776611328125, -0.08504486083984375, -0.0553131103515625, -0.02558135986328125, 0.004150390625, 0.03388214111328125, 0.0636138916015625, 0.09334564208984375, 0.123077392578125, 0.15280914306640625, 0.1825408935546875, 0.21227264404296875, 0.24200439453125, 0.27173614501953125, 0.3014678955078125, 0.33119964599609375, 0.360931396484375, 0.39066314697265625, 0.4203948974609375, 0.45012664794921875, 0.4798583984375, 0.5095901489257812, 0.5393218994140625, 0.5690536499023438, 0.598785400390625, 0.6285171508789062, 0.6582489013671875, 0.6879806518554688, 0.71771240234375, 0.7474441528320312, 0.7771759033203125, 0.8069076538085938, 0.836639404296875, 0.8663711547851562, 0.8961029052734375, 0.9258346557617188, 0.95556640625]}, "gradients/decoder.transformer.h.6.crossattention.c_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 3.0, 0.0, 2.0, 7.0, 3.0, 5.0, 6.0, 7.0, 10.0, 13.0, 13.0, 18.0, 13.0, 25.0, 23.0, 32.0, 29.0, 37.0, 38.0, 34.0, 39.0, 32.0, 43.0, 48.0, 32.0, 1073.0, 36.0, 43.0, 42.0, 38.0, 27.0, 37.0, 36.0, 23.0, 35.0, 19.0, 17.0, 25.0, 12.0, 8.0, 9.0, 7.0, 6.0, 13.0, 2.0, 6.0, 7.0, 2.0, 2.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-7.44921875, -7.23040771484375, -7.0115966796875, -6.79278564453125, -6.573974609375, -6.35516357421875, -6.1363525390625, -5.91754150390625, -5.69873046875, -5.47991943359375, -5.2611083984375, -5.04229736328125, -4.823486328125, -4.60467529296875, -4.3858642578125, -4.16705322265625, -3.9482421875, -3.72943115234375, -3.5106201171875, -3.29180908203125, -3.072998046875, -2.85418701171875, -2.6353759765625, -2.41656494140625, -2.19775390625, -1.97894287109375, -1.7601318359375, -1.54132080078125, -1.322509765625, -1.10369873046875, -0.8848876953125, -0.66607666015625, -0.447265625, -0.22845458984375, -0.0096435546875, 0.20916748046875, 0.427978515625, 0.64678955078125, 0.8656005859375, 1.08441162109375, 1.30322265625, 1.52203369140625, 1.7408447265625, 1.95965576171875, 2.178466796875, 2.39727783203125, 2.6160888671875, 2.83489990234375, 3.0537109375, 3.27252197265625, 3.4913330078125, 3.71014404296875, 3.928955078125, 4.14776611328125, 4.3665771484375, 4.58538818359375, 4.80419921875, 5.02301025390625, 5.2418212890625, 5.46063232421875, 5.679443359375, 5.89825439453125, 6.1170654296875, 6.33587646484375, 6.5546875]}, "gradients/decoder.transformer.h.6.crossattention.c_attn.weight": {"_type": "histogram", "values": [2.0, 4.0, 1.0, 6.0, 10.0, 21.0, 20.0, 22.0, 44.0, 80.0, 93.0, 138.0, 197.0, 283.0, 398.0, 581.0, 780.0, 1123.0, 1558.0, 2283.0, 3335.0, 4802.0, 7112.0, 10095.0, 14793.0, 20892.0, 30620.0, 44083.0, 63104.0, 86949.0, 114467.0, 1123347.0, 188005.0, 107773.0, 81074.0, 57693.0, 40185.0, 27968.0, 19606.0, 13320.0, 9396.0, 6321.0, 4409.0, 3016.0, 2120.0, 1477.0, 1113.0, 743.0, 490.0, 362.0, 255.0, 170.0, 139.0, 89.0, 60.0, 36.0, 30.0, 20.0, 12.0, 7.0, 11.0, 3.0, 3.0, 3.0], "bins": [-0.58203125, -0.5637741088867188, -0.5455169677734375, -0.5272598266601562, -0.509002685546875, -0.49074554443359375, -0.4724884033203125, -0.45423126220703125, -0.43597412109375, -0.41771697998046875, -0.3994598388671875, -0.38120269775390625, -0.362945556640625, -0.34468841552734375, -0.3264312744140625, -0.30817413330078125, -0.2899169921875, -0.27165985107421875, -0.2534027099609375, -0.23514556884765625, -0.216888427734375, -0.19863128662109375, -0.1803741455078125, -0.16211700439453125, -0.14385986328125, -0.12560272216796875, -0.1073455810546875, -0.08908843994140625, -0.070831298828125, -0.05257415771484375, -0.0343170166015625, -0.01605987548828125, 0.002197265625, 0.02045440673828125, 0.0387115478515625, 0.05696868896484375, 0.075225830078125, 0.09348297119140625, 0.1117401123046875, 0.12999725341796875, 0.14825439453125, 0.16651153564453125, 0.1847686767578125, 0.20302581787109375, 0.221282958984375, 0.23954010009765625, 0.2577972412109375, 0.27605438232421875, 0.2943115234375, 0.31256866455078125, 0.3308258056640625, 0.34908294677734375, 0.367340087890625, 0.38559722900390625, 0.4038543701171875, 0.42211151123046875, 0.44036865234375, 0.45862579345703125, 0.4768829345703125, 0.49514007568359375, 0.513397216796875, 0.5316543579101562, 0.5499114990234375, 0.5681686401367188, 0.58642578125]}, "gradients/decoder.transformer.h.6.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 3.0, 1.0, 2.0, 1.0, 1.0, 2.0, 4.0, 5.0, 5.0, 10.0, 5.0, 5.0, 6.0, 11.0, 13.0, 4.0, 8.0, 17.0, 20.0, 17.0, 26.0, 28.0, 34.0, 50.0, 45.0, 56.0, 82.0, 83.0, 73.0, 64.0, 72.0, 43.0, 28.0, 36.0, 20.0, 25.0, 20.0, 13.0, 16.0, 17.0, 9.0, 7.0, 9.0, 1.0, 3.0, 4.0, 2.0, 5.0, 3.0, 1.0, 2.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.007778167724609375, -0.007513225078582764, -0.007248282432556152, -0.006983339786529541, -0.00671839714050293, -0.006453454494476318, -0.006188511848449707, -0.005923569202423096, -0.005658626556396484, -0.005393683910369873, -0.005128741264343262, -0.00486379861831665, -0.004598855972290039, -0.004333913326263428, -0.004068970680236816, -0.003804028034210205, -0.0035390853881835938, -0.0032741427421569824, -0.003009200096130371, -0.0027442574501037598, -0.0024793148040771484, -0.002214372158050537, -0.0019494295120239258, -0.0016844868659973145, -0.0014195442199707031, -0.0011546015739440918, -0.0008896589279174805, -0.0006247162818908691, -0.0003597736358642578, -9.483098983764648e-05, 0.00017011165618896484, 0.00043505430221557617, 0.0006999969482421875, 0.0009649395942687988, 0.0012298822402954102, 0.0014948248863220215, 0.0017597675323486328, 0.002024710178375244, 0.0022896528244018555, 0.002554595470428467, 0.002819538116455078, 0.0030844807624816895, 0.0033494234085083008, 0.003614366054534912, 0.0038793087005615234, 0.004144251346588135, 0.004409193992614746, 0.004674136638641357, 0.004939079284667969, 0.00520402193069458, 0.005468964576721191, 0.005733907222747803, 0.005998849868774414, 0.006263792514801025, 0.006528735160827637, 0.006793677806854248, 0.007058620452880859, 0.007323563098907471, 0.007588505744934082, 0.007853448390960693, 0.008118391036987305, 0.008383333683013916, 0.008648276329040527, 0.008913218975067139, 0.00917816162109375]}, "gradients/decoder.transformer.h.6.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 1.0, 1.0, 3.0, 0.0, 3.0, 3.0, 2.0, 4.0, 3.0, 7.0, 7.0, 8.0, 9.0, 13.0, 24.0, 28.0, 27.0, 42.0, 44.0, 61.0, 98.0, 131.0, 247.0, 551.0, 15081.0, 1025457.0, 5515.0, 500.0, 240.0, 106.0, 69.0, 50.0, 49.0, 40.0, 29.0, 18.0, 22.0, 8.0, 11.0, 4.0, 14.0, 9.0, 6.0, 8.0, 3.0, 1.0, 2.0, 5.0, 3.0, 2.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.229248046875, -0.2227764129638672, -0.21630477905273438, -0.20983314514160156, -0.20336151123046875, -0.19688987731933594, -0.19041824340820312, -0.1839466094970703, -0.1774749755859375, -0.1710033416748047, -0.16453170776367188, -0.15806007385253906, -0.15158843994140625, -0.14511680603027344, -0.13864517211914062, -0.1321735382080078, -0.125701904296875, -0.11923027038574219, -0.11275863647460938, -0.10628700256347656, -0.09981536865234375, -0.09334373474121094, -0.08687210083007812, -0.08040046691894531, -0.0739288330078125, -0.06745719909667969, -0.060985565185546875, -0.05451393127441406, -0.04804229736328125, -0.04157066345214844, -0.035099029541015625, -0.028627395629882812, -0.02215576171875, -0.015684127807617188, -0.009212493896484375, -0.0027408599853515625, 0.00373077392578125, 0.010202407836914062, 0.016674041748046875, 0.023145675659179688, 0.0296173095703125, 0.03608894348144531, 0.042560577392578125, 0.04903221130371094, 0.05550384521484375, 0.06197547912597656, 0.06844711303710938, 0.07491874694824219, 0.081390380859375, 0.08786201477050781, 0.09433364868164062, 0.10080528259277344, 0.10727691650390625, 0.11374855041503906, 0.12022018432617188, 0.1266918182373047, 0.1331634521484375, 0.1396350860595703, 0.14610671997070312, 0.15257835388183594, 0.15904998779296875, 0.16552162170410156, 0.17199325561523438, 0.1784648895263672, 0.1849365234375]}, "gradients/decoder.transformer.h.6.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 24.0, 929.0, 63.0, 2.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0065441676415503025, -0.00398502591997385, -0.001425884198397398, 0.0011332575231790543, 0.0036923992447555065, 0.006251541431993246, 0.008810682222247124, 0.011369824409484863, 0.013928966596722603, 0.016488108783960342, 0.019047249108552933, 0.021606391295790672, 0.024165533483028412, 0.02672467567026615, 0.02928381785750389, 0.03184295818209648, 0.03440209850668907, 0.03696123883128166, 0.03952038288116455, 0.04207952320575714, 0.04463866353034973, 0.04719780758023262, 0.04975694790482521, 0.0523160919547081, 0.05487523227930069, 0.05743437260389328, 0.05999351665377617, 0.06255266070365906, 0.06511180102825165, 0.06767094135284424, 0.07023008167743683, 0.07278922200202942, 0.07534836232662201, 0.0779075026512146, 0.08046664297580719, 0.08302579075098038, 0.08558493107557297, 0.08814407140016556, 0.09070321172475815, 0.09326235204935074, 0.09582149982452393, 0.09838064014911652, 0.1009397804737091, 0.1034989282488823, 0.10605806857347488, 0.10861720889806747, 0.11117634922266006, 0.11373548954725266, 0.11629462987184525, 0.11885377019643784, 0.12141291052103043, 0.12397205829620361, 0.1265311986207962, 0.1290903389453888, 0.13164947926998138, 0.13420861959457397, 0.13676775991916656, 0.13932690024375916, 0.14188604056835175, 0.14444518089294434, 0.14700432121753693, 0.14956346154212952, 0.1521226167678833, 0.1546817570924759, 0.15724089741706848]}, "gradients/decoder.transformer.h.6.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 2.0, 0.0, 0.0, 6.0, 1.0, 0.0, 4.0, 5.0, 4.0, 5.0, 6.0, 9.0, 9.0, 14.0, 22.0, 22.0, 26.0, 34.0, 36.0, 40.0, 45.0, 43.0, 30.0, 46.0, 51.0, 46.0, 47.0, 58.0, 47.0, 45.0, 45.0, 40.0, 39.0, 28.0, 31.0, 26.0, 17.0, 17.0, 9.0, 15.0, 11.0, 9.0, 11.0, 3.0, 5.0, 0.0, 3.0, 3.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.006034553050994873, -0.005863676778972149, -0.005692800506949425, -0.005521924234926701, -0.0053510479629039764, -0.005180171690881252, -0.005009295418858528, -0.004838419146835804, -0.00466754287481308, -0.004496666602790356, -0.0043257903307676315, -0.004154914058744907, -0.003984037786722183, -0.003813161514699459, -0.003642285242676735, -0.0034714089706540108, -0.0033005326986312866, -0.0031296564266085625, -0.0029587801545858383, -0.002787903882563114, -0.00261702761054039, -0.002446151338517666, -0.0022752750664949417, -0.0021043987944722176, -0.0019335225224494934, -0.0017626462504267693, -0.001591769978404045, -0.001420893706381321, -0.0012500174343585968, -0.0010791411623358727, -0.0009082648903131485, -0.0007373886182904243, -0.0005665123462677002, -0.00039563607424497604, -0.0002247598022222519, -5.388353019952774e-05, 0.00011699274182319641, 0.00028786901384592056, 0.0004587452858686447, 0.0006296215578913689, 0.000800497829914093, 0.0009713741019368172, 0.0011422503739595413, 0.0013131266459822655, 0.0014840029180049896, 0.0016548791900277138, 0.001825755462050438, 0.001996631734073162, 0.0021675080060958862, 0.0023383842781186104, 0.0025092605501413345, 0.0026801368221640587, 0.002851013094186783, 0.003021889366209507, 0.003192765638232231, 0.0033636419102549553, 0.0035345181822776794, 0.0037053944543004036, 0.0038762707263231277, 0.004047146998345852, 0.004218023270368576, 0.0043888995423913, 0.004559775814414024, 0.0047306520864367485, 0.004901528358459473]}, "gradients/decoder.transformer.h.6.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 3.0, 4.0, 8.0, 5.0, 3.0, 5.0, 8.0, 16.0, 7.0, 19.0, 17.0, 31.0, 14.0, 28.0, 42.0, 26.0, 32.0, 40.0, 33.0, 54.0, 52.0, 53.0, 55.0, 46.0, 34.0, 42.0, 57.0, 31.0, 38.0, 30.0, 21.0, 26.0, 20.0, 18.0, 28.0, 10.0, 15.0, 10.0, 8.0, 8.0, 6.0, 3.0, 3.0, 4.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-12.265625, -11.8846435546875, -11.503662109375, -11.1226806640625, -10.74169921875, -10.3607177734375, -9.979736328125, -9.5987548828125, -9.2177734375, -8.8367919921875, -8.455810546875, -8.0748291015625, -7.69384765625, -7.3128662109375, -6.931884765625, -6.5509033203125, -6.169921875, -5.7889404296875, -5.407958984375, -5.0269775390625, -4.64599609375, -4.2650146484375, -3.884033203125, -3.5030517578125, -3.1220703125, -2.7410888671875, -2.360107421875, -1.9791259765625, -1.59814453125, -1.2171630859375, -0.836181640625, -0.4552001953125, -0.07421875, 0.3067626953125, 0.687744140625, 1.0687255859375, 1.44970703125, 1.8306884765625, 2.211669921875, 2.5926513671875, 2.9736328125, 3.3546142578125, 3.735595703125, 4.1165771484375, 4.49755859375, 4.8785400390625, 5.259521484375, 5.6405029296875, 6.021484375, 6.4024658203125, 6.783447265625, 7.1644287109375, 7.54541015625, 7.9263916015625, 8.307373046875, 8.6883544921875, 9.0693359375, 9.4503173828125, 9.831298828125, 10.2122802734375, 10.59326171875, 10.9742431640625, 11.355224609375, 11.7362060546875, 12.1171875]}, "gradients/decoder.transformer.h.6.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 4.0, 4.0, 8.0, 10.0, 8.0, 10.0, 13.0, 18.0, 29.0, 26.0, 41.0, 54.0, 91.0, 109.0, 154.0, 222.0, 275.0, 433.0, 635.0, 1251.0, 4265.0, 20442.0, 217911.0, 749670.0, 41191.0, 7237.0, 1890.0, 849.0, 528.0, 321.0, 246.0, 143.0, 127.0, 91.0, 59.0, 54.0, 30.0, 37.0, 21.0, 18.0, 12.0, 8.0, 7.0, 7.0, 4.0, 1.0, 2.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-30.90625, -29.946044921875, -28.98583984375, -28.025634765625, -27.0654296875, -26.105224609375, -25.14501953125, -24.184814453125, -23.224609375, -22.264404296875, -21.30419921875, -20.343994140625, -19.3837890625, -18.423583984375, -17.46337890625, -16.503173828125, -15.54296875, -14.582763671875, -13.62255859375, -12.662353515625, -11.7021484375, -10.741943359375, -9.78173828125, -8.821533203125, -7.861328125, -6.901123046875, -5.94091796875, -4.980712890625, -4.0205078125, -3.060302734375, -2.10009765625, -1.139892578125, -0.1796875, 0.780517578125, 1.74072265625, 2.700927734375, 3.6611328125, 4.621337890625, 5.58154296875, 6.541748046875, 7.501953125, 8.462158203125, 9.42236328125, 10.382568359375, 11.3427734375, 12.302978515625, 13.26318359375, 14.223388671875, 15.18359375, 16.143798828125, 17.10400390625, 18.064208984375, 19.0244140625, 19.984619140625, 20.94482421875, 21.905029296875, 22.865234375, 23.825439453125, 24.78564453125, 25.745849609375, 26.7060546875, 27.666259765625, 28.62646484375, 29.586669921875, 30.546875]}, "gradients/decoder.transformer.h.6.attn.c_attn.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 2.0, 1.0, 2.0, 5.0, 6.0, 3.0, 6.0, 7.0, 12.0, 12.0, 9.0, 11.0, 25.0, 20.0, 18.0, 19.0, 22.0, 23.0, 40.0, 31.0, 34.0, 37.0, 28.0, 62.0, 82.0, 223.0, 1682.0, 157.0, 64.0, 47.0, 45.0, 37.0, 41.0, 30.0, 46.0, 32.0, 18.0, 13.0, 10.0, 14.0, 15.0, 16.0, 15.0, 9.0, 9.0, 9.0, 3.0, 1.0, 5.0, 0.0, 3.0, 1.0, 2.0, 2.0, 0.0, 1.0], "bins": [-30.390625, -29.50634765625, -28.6220703125, -27.73779296875, -26.853515625, -25.96923828125, -25.0849609375, -24.20068359375, -23.31640625, -22.43212890625, -21.5478515625, -20.66357421875, -19.779296875, -18.89501953125, -18.0107421875, -17.12646484375, -16.2421875, -15.35791015625, -14.4736328125, -13.58935546875, -12.705078125, -11.82080078125, -10.9365234375, -10.05224609375, -9.16796875, -8.28369140625, -7.3994140625, -6.51513671875, -5.630859375, -4.74658203125, -3.8623046875, -2.97802734375, -2.09375, -1.20947265625, -0.3251953125, 0.55908203125, 1.443359375, 2.32763671875, 3.2119140625, 4.09619140625, 4.98046875, 5.86474609375, 6.7490234375, 7.63330078125, 8.517578125, 9.40185546875, 10.2861328125, 11.17041015625, 12.0546875, 12.93896484375, 13.8232421875, 14.70751953125, 15.591796875, 16.47607421875, 17.3603515625, 18.24462890625, 19.12890625, 20.01318359375, 20.8974609375, 21.78173828125, 22.666015625, 23.55029296875, 24.4345703125, 25.31884765625, 26.203125]}, "gradients/decoder.transformer.h.6.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 0.0, 3.0, 3.0, 1.0, 3.0, 2.0, 1.0, 9.0, 9.0, 12.0, 12.0, 20.0, 11.0, 17.0, 15.0, 18.0, 18.0, 28.0, 59.0, 38.0, 65.0, 109.0, 163.0, 285.0, 755.0, 8693.0, 3033000.0, 99291.0, 1853.0, 467.0, 201.0, 129.0, 83.0, 56.0, 49.0, 41.0, 26.0, 26.0, 27.0, 23.0, 18.0, 17.0, 10.0, 9.0, 10.0, 7.0, 7.0, 4.0, 7.0, 5.0, 1.0, 2.0, 2.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 2.0], "bins": [-73.125, -70.66796875, -68.2109375, -65.75390625, -63.296875, -60.83984375, -58.3828125, -55.92578125, -53.46875, -51.01171875, -48.5546875, -46.09765625, -43.640625, -41.18359375, -38.7265625, -36.26953125, -33.8125, -31.35546875, -28.8984375, -26.44140625, -23.984375, -21.52734375, -19.0703125, -16.61328125, -14.15625, -11.69921875, -9.2421875, -6.78515625, -4.328125, -1.87109375, 0.5859375, 3.04296875, 5.5, 7.95703125, 10.4140625, 12.87109375, 15.328125, 17.78515625, 20.2421875, 22.69921875, 25.15625, 27.61328125, 30.0703125, 32.52734375, 34.984375, 37.44140625, 39.8984375, 42.35546875, 44.8125, 47.26953125, 49.7265625, 52.18359375, 54.640625, 57.09765625, 59.5546875, 62.01171875, 64.46875, 66.92578125, 69.3828125, 71.83984375, 74.296875, 76.75390625, 79.2109375, 81.66796875, 84.125]}, "gradients/decoder.transformer.h.6.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 4.0, 11.0, 40.0, 136.0, 247.0, 310.0, 176.0, 68.0, 18.0, 4.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-42.41730499267578, -40.21316146850586, -38.0090217590332, -35.80487823486328, -33.600738525390625, -31.396595001220703, -29.19245147705078, -26.988309860229492, -24.784168243408203, -22.580026626586914, -20.375885009765625, -18.171741485595703, -15.967599868774414, -13.763458251953125, -11.55931568145752, -9.355173110961914, -7.151031494140625, -4.946889400482178, -2.7427473068237305, -0.5386052131652832, 1.665536880493164, 3.869678497314453, 6.073821067810059, 8.277963638305664, 10.482105255126953, 12.686246871948242, 14.890389442443848, 17.094532012939453, 19.298673629760742, 21.50281524658203, 23.706958770751953, 25.911100387573242, 28.11524200439453, 30.31938362121582, 32.52352523803711, 34.72766876220703, 36.93180847167969, 39.13595199584961, 41.34009552001953, 43.54423522949219, 45.74837875366211, 47.95252227783203, 50.15666198730469, 52.36080551147461, 54.56494903564453, 56.76908874511719, 58.97323226928711, 61.17737579345703, 63.38151550292969, 65.58565521240234, 67.78980255126953, 69.99394226074219, 72.19808197021484, 74.4022216796875, 76.60636901855469, 78.81050872802734, 81.0146484375, 83.21878814697266, 85.42293548583984, 87.6270751953125, 89.83121490478516, 92.03535461425781, 94.239501953125, 96.44364166259766, 98.64778900146484]}, "gradients/decoder.transformer.h.6.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 3.0, 6.0, 12.0, 10.0, 15.0, 16.0, 16.0, 25.0, 18.0, 13.0, 28.0, 29.0, 34.0, 27.0, 31.0, 31.0, 37.0, 43.0, 29.0, 35.0, 40.0, 42.0, 33.0, 34.0, 30.0, 29.0, 44.0, 34.0, 33.0, 29.0, 24.0, 24.0, 18.0, 15.0, 14.0, 14.0, 17.0, 13.0, 12.0, 8.0, 7.0, 6.0, 5.0, 7.0, 3.0, 4.0, 3.0, 3.0, 3.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-67.84556579589844, -65.63578796386719, -63.426002502441406, -61.21622085571289, -59.006439208984375, -56.796661376953125, -54.58687973022461, -52.377098083496094, -50.16731643676758, -47.95753479003906, -45.74775314331055, -43.53797149658203, -41.32819366455078, -39.118408203125, -36.90863037109375, -34.698848724365234, -32.48906707763672, -30.279285430908203, -28.069503784179688, -25.859724044799805, -23.64994239807129, -21.440160751342773, -19.23038101196289, -17.020599365234375, -14.81081771850586, -12.601036071777344, -10.391255378723145, -8.181474685668945, -5.97169303894043, -3.761911392211914, -1.5521306991577148, 0.6576499938964844, 2.867431640625, 5.077212810516357, 7.286993980407715, 9.496774673461914, 11.70655632019043, 13.916337966918945, 16.126117706298828, 18.335899353027344, 20.54568099975586, 22.755462646484375, 24.96524429321289, 27.175024032592773, 29.38480567932129, 31.594587326049805, 33.80436706542969, 36.0141487121582, 38.22393035888672, 40.433712005615234, 42.64349365234375, 44.853275299072266, 47.06305694580078, 49.27283477783203, 51.48261642456055, 53.69239807128906, 55.90217971801758, 58.111961364746094, 60.32174301147461, 62.531524658203125, 64.74130249023438, 66.95108795166016, 69.1608657836914, 71.37065124511719, 73.58042907714844]}, "gradients/decoder.transformer.h.5.mlp.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 2.0, 7.0, 3.0, 5.0, 4.0, 6.0, 5.0, 9.0, 17.0, 19.0, 22.0, 15.0, 15.0, 24.0, 33.0, 22.0, 32.0, 26.0, 35.0, 39.0, 42.0, 50.0, 62.0, 48.0, 51.0, 36.0, 51.0, 35.0, 35.0, 37.0, 33.0, 23.0, 22.0, 17.0, 26.0, 17.0, 20.0, 16.0, 10.0, 6.0, 13.0, 10.0, 4.0, 6.0, 3.0, 2.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-12.3359375, -11.9666748046875, -11.597412109375, -11.2281494140625, -10.85888671875, -10.4896240234375, -10.120361328125, -9.7510986328125, -9.3818359375, -9.0125732421875, -8.643310546875, -8.2740478515625, -7.90478515625, -7.5355224609375, -7.166259765625, -6.7969970703125, -6.427734375, -6.0584716796875, -5.689208984375, -5.3199462890625, -4.95068359375, -4.5814208984375, -4.212158203125, -3.8428955078125, -3.4736328125, -3.1043701171875, -2.735107421875, -2.3658447265625, -1.99658203125, -1.6273193359375, -1.258056640625, -0.8887939453125, -0.51953125, -0.1502685546875, 0.218994140625, 0.5882568359375, 0.95751953125, 1.3267822265625, 1.696044921875, 2.0653076171875, 2.4345703125, 2.8038330078125, 3.173095703125, 3.5423583984375, 3.91162109375, 4.2808837890625, 4.650146484375, 5.0194091796875, 5.388671875, 5.7579345703125, 6.127197265625, 6.4964599609375, 6.86572265625, 7.2349853515625, 7.604248046875, 7.9735107421875, 8.3427734375, 8.7120361328125, 9.081298828125, 9.4505615234375, 9.81982421875, 10.1890869140625, 10.558349609375, 10.9276123046875, 11.296875]}, "gradients/decoder.transformer.h.5.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 1.0, 3.0, 4.0, 2.0, 5.0, 10.0, 11.0, 9.0, 14.0, 14.0, 23.0, 28.0, 27.0, 38.0, 53.0, 55.0, 67.0, 89.0, 121.0, 195.0, 246.0, 367.0, 702.0, 1317.0, 3302.0, 10351.0, 42068.0, 272196.0, 1619817.0, 1836395.0, 336930.0, 50749.0, 11710.0, 3725.0, 1448.0, 742.0, 425.0, 258.0, 165.0, 114.0, 104.0, 87.0, 63.0, 50.0, 29.0, 29.0, 35.0, 25.0, 13.0, 14.0, 9.0, 8.0, 9.0, 6.0, 7.0, 6.0, 5.0, 0.0, 1.0, 1.0, 3.0], "bins": [-18.625, -18.0439453125, -17.462890625, -16.8818359375, -16.30078125, -15.7197265625, -15.138671875, -14.5576171875, -13.9765625, -13.3955078125, -12.814453125, -12.2333984375, -11.65234375, -11.0712890625, -10.490234375, -9.9091796875, -9.328125, -8.7470703125, -8.166015625, -7.5849609375, -7.00390625, -6.4228515625, -5.841796875, -5.2607421875, -4.6796875, -4.0986328125, -3.517578125, -2.9365234375, -2.35546875, -1.7744140625, -1.193359375, -0.6123046875, -0.03125, 0.5498046875, 1.130859375, 1.7119140625, 2.29296875, 2.8740234375, 3.455078125, 4.0361328125, 4.6171875, 5.1982421875, 5.779296875, 6.3603515625, 6.94140625, 7.5224609375, 8.103515625, 8.6845703125, 9.265625, 9.8466796875, 10.427734375, 11.0087890625, 11.58984375, 12.1708984375, 12.751953125, 13.3330078125, 13.9140625, 14.4951171875, 15.076171875, 15.6572265625, 16.23828125, 16.8193359375, 17.400390625, 17.9814453125, 18.5625]}, "gradients/decoder.transformer.h.5.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 4.0, 8.0, 7.0, 7.0, 27.0, 35.0, 63.0, 94.0, 157.0, 262.0, 414.0, 706.0, 782.0, 606.0, 377.0, 220.0, 126.0, 79.0, 40.0, 36.0, 14.0, 5.0, 6.0, 6.0, 1.0, 3.0, 2.0, 1.0, 1.0], "bins": [-43.5625, -42.6402587890625, -41.718017578125, -40.7957763671875, -39.87353515625, -38.9512939453125, -38.029052734375, -37.1068115234375, -36.1845703125, -35.2623291015625, -34.340087890625, -33.4178466796875, -32.49560546875, -31.5733642578125, -30.651123046875, -29.7288818359375, -28.806640625, -27.8843994140625, -26.962158203125, -26.0399169921875, -25.11767578125, -24.1954345703125, -23.273193359375, -22.3509521484375, -21.4287109375, -20.5064697265625, -19.584228515625, -18.6619873046875, -17.73974609375, -16.8175048828125, -15.895263671875, -14.9730224609375, -14.05078125, -13.1285400390625, -12.206298828125, -11.2840576171875, -10.36181640625, -9.4395751953125, -8.517333984375, -7.5950927734375, -6.6728515625, -5.7506103515625, -4.828369140625, -3.9061279296875, -2.98388671875, -2.0616455078125, -1.139404296875, -0.2171630859375, 0.705078125, 1.6273193359375, 2.549560546875, 3.4718017578125, 4.39404296875, 5.3162841796875, 6.238525390625, 7.1607666015625, 8.0830078125, 9.0052490234375, 9.927490234375, 10.8497314453125, 11.77197265625, 12.6942138671875, 13.616455078125, 14.5386962890625, 15.4609375]}, "gradients/decoder.transformer.h.5.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 7.0, 7.0, 2.0, 10.0, 13.0, 27.0, 28.0, 43.0, 85.0, 123.0, 238.0, 450.0, 1267.0, 10367.0, 3799627.0, 377348.0, 3083.0, 731.0, 326.0, 183.0, 107.0, 78.0, 45.0, 29.0, 24.0, 12.0, 10.0, 8.0, 6.0, 1.0, 4.0, 0.0, 2.0, 2.0, 0.0, 0.0, 0.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-88.1875, -85.1259765625, -82.064453125, -79.0029296875, -75.94140625, -72.8798828125, -69.818359375, -66.7568359375, -63.6953125, -60.6337890625, -57.572265625, -54.5107421875, -51.44921875, -48.3876953125, -45.326171875, -42.2646484375, -39.203125, -36.1416015625, -33.080078125, -30.0185546875, -26.95703125, -23.8955078125, -20.833984375, -17.7724609375, -14.7109375, -11.6494140625, -8.587890625, -5.5263671875, -2.46484375, 0.5966796875, 3.658203125, 6.7197265625, 9.78125, 12.8427734375, 15.904296875, 18.9658203125, 22.02734375, 25.0888671875, 28.150390625, 31.2119140625, 34.2734375, 37.3349609375, 40.396484375, 43.4580078125, 46.51953125, 49.5810546875, 52.642578125, 55.7041015625, 58.765625, 61.8271484375, 64.888671875, 67.9501953125, 71.01171875, 74.0732421875, 77.134765625, 80.1962890625, 83.2578125, 86.3193359375, 89.380859375, 92.4423828125, 95.50390625, 98.5654296875, 101.626953125, 104.6884765625, 107.75]}, "gradients/decoder.transformer.h.5.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 32.0, 642.0, 333.0, 8.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-897.5950927734375, -881.2247314453125, -864.8543701171875, -848.4840698242188, -832.1137084960938, -815.7433471679688, -799.3729858398438, -783.0026245117188, -766.63232421875, -750.261962890625, -733.8916015625, -717.5213012695312, -701.1509399414062, -684.7805786132812, -668.4102172851562, -652.0398559570312, -635.6694946289062, -619.2991333007812, -602.9287719726562, -586.5584716796875, -570.1881103515625, -553.8177490234375, -537.4473876953125, -521.0770263671875, -504.7066955566406, -488.3363342285156, -471.96600341796875, -455.59564208984375, -439.22528076171875, -422.8549499511719, -406.4845886230469, -390.1142578125, -373.74395751953125, -357.37359619140625, -341.0032653808594, -324.6329040527344, -308.2625732421875, -291.8922119140625, -275.5218505859375, -259.1514892578125, -242.78115844726562, -226.4108123779297, -210.04046630859375, -193.67010498046875, -177.2997589111328, -160.92941284179688, -144.55905151367188, -128.18870544433594, -111.818359375, -95.44801330566406, -79.0776596069336, -62.70730972290039, -46.33695983886719, -29.96661376953125, -13.596260070800781, 2.7740936279296875, 19.144439697265625, 35.51478958129883, 51.88513946533203, 68.2554931640625, 84.62583923339844, 100.99618530273438, 117.36653900146484, 133.7368927001953, 150.10723876953125]}, "gradients/decoder.transformer.h.5.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 4.0, 1.0, 0.0, 4.0, 3.0, 4.0, 5.0, 4.0, 7.0, 1.0, 6.0, 9.0, 13.0, 15.0, 18.0, 22.0, 23.0, 17.0, 28.0, 22.0, 40.0, 29.0, 39.0, 33.0, 42.0, 47.0, 45.0, 44.0, 38.0, 32.0, 36.0, 43.0, 38.0, 36.0, 23.0, 30.0, 28.0, 25.0, 20.0, 24.0, 25.0, 16.0, 14.0, 8.0, 10.0, 7.0, 9.0, 9.0, 5.0, 4.0, 5.0, 2.0, 2.0, 0.0, 4.0, 1.0, 1.0, 1.0, 0.0, 2.0], "bins": [-59.84124755859375, -58.01295852661133, -56.18466567993164, -54.35637664794922, -52.5280876159668, -50.699798583984375, -48.87150573730469, -47.043216705322266, -45.214927673339844, -43.38663864135742, -41.558345794677734, -39.73005676269531, -37.90176773071289, -36.07347869873047, -34.24518585205078, -32.41689682006836, -30.588603973388672, -28.760313034057617, -26.932024002075195, -25.10373306274414, -23.27544403076172, -21.447153091430664, -19.61886215209961, -17.790573120117188, -15.962282180786133, -14.133992195129395, -12.305702209472656, -10.477411270141602, -8.649121284484863, -6.820831298828125, -4.99254035949707, -3.164250373840332, -1.3359603881835938, 0.49232983589172363, 2.320620059967041, 4.1489105224609375, 5.977200508117676, 7.805490493774414, 9.633781433105469, 11.462071418762207, 13.290361404418945, 15.118651390075684, 16.946941375732422, 18.775232315063477, 20.60352325439453, 22.431812286376953, 24.260103225708008, 26.088394165039062, 27.916683197021484, 29.74497413635254, 31.57326316833496, 33.401554107666016, 35.22984313964844, 37.058135986328125, 38.88642501831055, 40.71471405029297, 42.543006896972656, 44.37129592895508, 46.199588775634766, 48.02787780761719, 49.85616683959961, 51.68445587158203, 53.51274871826172, 55.34103775024414, 57.16932678222656]}, "gradients/decoder.transformer.h.5.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 4.0, 3.0, 6.0, 5.0, 9.0, 8.0, 9.0, 12.0, 27.0, 11.0, 23.0, 23.0, 35.0, 32.0, 33.0, 21.0, 45.0, 52.0, 34.0, 46.0, 54.0, 46.0, 56.0, 48.0, 40.0, 33.0, 40.0, 34.0, 40.0, 32.0, 28.0, 29.0, 19.0, 15.0, 15.0, 17.0, 5.0, 13.0, 5.0, 3.0, 3.0, 3.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-13.5234375, -13.104248046875, -12.68505859375, -12.265869140625, -11.8466796875, -11.427490234375, -11.00830078125, -10.589111328125, -10.169921875, -9.750732421875, -9.33154296875, -8.912353515625, -8.4931640625, -8.073974609375, -7.65478515625, -7.235595703125, -6.81640625, -6.397216796875, -5.97802734375, -5.558837890625, -5.1396484375, -4.720458984375, -4.30126953125, -3.882080078125, -3.462890625, -3.043701171875, -2.62451171875, -2.205322265625, -1.7861328125, -1.366943359375, -0.94775390625, -0.528564453125, -0.109375, 0.309814453125, 0.72900390625, 1.148193359375, 1.5673828125, 1.986572265625, 2.40576171875, 2.824951171875, 3.244140625, 3.663330078125, 4.08251953125, 4.501708984375, 4.9208984375, 5.340087890625, 5.75927734375, 6.178466796875, 6.59765625, 7.016845703125, 7.43603515625, 7.855224609375, 8.2744140625, 8.693603515625, 9.11279296875, 9.531982421875, 9.951171875, 10.370361328125, 10.78955078125, 11.208740234375, 11.6279296875, 12.047119140625, 12.46630859375, 12.885498046875, 13.3046875]}, "gradients/decoder.transformer.h.5.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 2.0, 1.0, 1.0, 4.0, 3.0, 17.0, 14.0, 25.0, 29.0, 39.0, 60.0, 79.0, 146.0, 205.0, 295.0, 445.0, 716.0, 1100.0, 1679.0, 2610.0, 4039.0, 6393.0, 9896.0, 15728.0, 25045.0, 41029.0, 67894.0, 109913.0, 164154.0, 190090.0, 150555.0, 98232.0, 60058.0, 36321.0, 22886.0, 14243.0, 8827.0, 5691.0, 3589.0, 2230.0, 1435.0, 963.0, 615.0, 419.0, 262.0, 198.0, 132.0, 89.0, 45.0, 41.0, 33.0, 14.0, 14.0, 8.0, 5.0, 5.0, 4.0, 2.0, 4.0, 2.0], "bins": [-1.0224609375, -0.9918289184570312, -0.9611968994140625, -0.9305648803710938, -0.899932861328125, -0.8693008422851562, -0.8386688232421875, -0.8080368041992188, -0.77740478515625, -0.7467727661132812, -0.7161407470703125, -0.6855087280273438, -0.654876708984375, -0.6242446899414062, -0.5936126708984375, -0.5629806518554688, -0.5323486328125, -0.5017166137695312, -0.4710845947265625, -0.44045257568359375, -0.409820556640625, -0.37918853759765625, -0.3485565185546875, -0.31792449951171875, -0.28729248046875, -0.25666046142578125, -0.2260284423828125, -0.19539642333984375, -0.164764404296875, -0.13413238525390625, -0.1035003662109375, -0.07286834716796875, -0.042236328125, -0.01160430908203125, 0.0190277099609375, 0.04965972900390625, 0.080291748046875, 0.11092376708984375, 0.1415557861328125, 0.17218780517578125, 0.20281982421875, 0.23345184326171875, 0.2640838623046875, 0.29471588134765625, 0.325347900390625, 0.35597991943359375, 0.3866119384765625, 0.41724395751953125, 0.4478759765625, 0.47850799560546875, 0.5091400146484375, 0.5397720336914062, 0.570404052734375, 0.6010360717773438, 0.6316680908203125, 0.6623001098632812, 0.69293212890625, 0.7235641479492188, 0.7541961669921875, 0.7848281860351562, 0.815460205078125, 0.8460922241210938, 0.8767242431640625, 0.9073562622070312, 0.93798828125]}, "gradients/decoder.transformer.h.5.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 3.0, 2.0, 2.0, 2.0, 2.0, 1.0, 1.0, 4.0, 5.0, 6.0, 6.0, 6.0, 14.0, 20.0, 14.0, 16.0, 22.0, 24.0, 18.0, 29.0, 39.0, 47.0, 35.0, 41.0, 44.0, 44.0, 36.0, 1075.0, 35.0, 44.0, 39.0, 34.0, 42.0, 30.0, 35.0, 34.0, 21.0, 29.0, 19.0, 25.0, 11.0, 20.0, 17.0, 17.0, 8.0, 5.0, 5.0, 4.0, 2.0, 4.0, 2.0, 2.0, 4.0], "bins": [-9.8046875, -9.552490234375, -9.30029296875, -9.048095703125, -8.7958984375, -8.543701171875, -8.29150390625, -8.039306640625, -7.787109375, -7.534912109375, -7.28271484375, -7.030517578125, -6.7783203125, -6.526123046875, -6.27392578125, -6.021728515625, -5.76953125, -5.517333984375, -5.26513671875, -5.012939453125, -4.7607421875, -4.508544921875, -4.25634765625, -4.004150390625, -3.751953125, -3.499755859375, -3.24755859375, -2.995361328125, -2.7431640625, -2.490966796875, -2.23876953125, -1.986572265625, -1.734375, -1.482177734375, -1.22998046875, -0.977783203125, -0.7255859375, -0.473388671875, -0.22119140625, 0.031005859375, 0.283203125, 0.535400390625, 0.78759765625, 1.039794921875, 1.2919921875, 1.544189453125, 1.79638671875, 2.048583984375, 2.30078125, 2.552978515625, 2.80517578125, 3.057373046875, 3.3095703125, 3.561767578125, 3.81396484375, 4.066162109375, 4.318359375, 4.570556640625, 4.82275390625, 5.074951171875, 5.3271484375, 5.579345703125, 5.83154296875, 6.083740234375, 6.3359375]}, "gradients/decoder.transformer.h.5.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 3.0, 6.0, 3.0, 4.0, 12.0, 16.0, 24.0, 29.0, 49.0, 88.0, 124.0, 161.0, 252.0, 419.0, 653.0, 951.0, 1445.0, 2319.0, 3548.0, 5283.0, 8372.0, 13009.0, 20205.0, 31142.0, 47933.0, 72285.0, 108020.0, 147499.0, 1207613.0, 138059.0, 98401.0, 66332.0, 43174.0, 28339.0, 18243.0, 11888.0, 7398.0, 4888.0, 3226.0, 2005.0, 1300.0, 824.0, 566.0, 354.0, 254.0, 148.0, 93.0, 71.0, 35.0, 27.0, 23.0, 11.0, 7.0, 6.0, 2.0, 6.0, 1.0, 2.0, 1.0], "bins": [-0.759765625, -0.7370071411132812, -0.7142486572265625, -0.6914901733398438, -0.668731689453125, -0.6459732055664062, -0.6232147216796875, -0.6004562377929688, -0.57769775390625, -0.5549392700195312, -0.5321807861328125, -0.5094223022460938, -0.486663818359375, -0.46390533447265625, -0.4411468505859375, -0.41838836669921875, -0.3956298828125, -0.37287139892578125, -0.3501129150390625, -0.32735443115234375, -0.304595947265625, -0.28183746337890625, -0.2590789794921875, -0.23632049560546875, -0.21356201171875, -0.19080352783203125, -0.1680450439453125, -0.14528656005859375, -0.122528076171875, -0.09976959228515625, -0.0770111083984375, -0.05425262451171875, -0.031494140625, -0.00873565673828125, 0.0140228271484375, 0.03678131103515625, 0.059539794921875, 0.08229827880859375, 0.1050567626953125, 0.12781524658203125, 0.15057373046875, 0.17333221435546875, 0.1960906982421875, 0.21884918212890625, 0.241607666015625, 0.26436614990234375, 0.2871246337890625, 0.30988311767578125, 0.3326416015625, 0.35540008544921875, 0.3781585693359375, 0.40091705322265625, 0.423675537109375, 0.44643402099609375, 0.4691925048828125, 0.49195098876953125, 0.51470947265625, 0.5374679565429688, 0.5602264404296875, 0.5829849243164062, 0.605743408203125, 0.6285018920898438, 0.6512603759765625, 0.6740188598632812, 0.69677734375]}, "gradients/decoder.transformer.h.5.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 6.0, 1.0, 6.0, 6.0, 9.0, 7.0, 8.0, 12.0, 26.0, 30.0, 35.0, 36.0, 66.0, 51.0, 87.0, 64.0, 77.0, 74.0, 77.0, 51.0, 56.0, 51.0, 40.0, 23.0, 21.0, 20.0, 12.0, 15.0, 12.0, 5.0, 7.0, 3.0, 0.0, 4.0, 2.0, 3.0, 2.0, 1.0, 2.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0106964111328125, -0.010326743125915527, -0.009957075119018555, -0.009587407112121582, -0.00921773910522461, -0.008848071098327637, -0.008478403091430664, -0.008108735084533691, -0.007739067077636719, -0.007369399070739746, -0.0069997310638427734, -0.006630063056945801, -0.006260395050048828, -0.0058907270431518555, -0.005521059036254883, -0.00515139102935791, -0.0047817230224609375, -0.004412055015563965, -0.004042387008666992, -0.0036727190017700195, -0.003303050994873047, -0.0029333829879760742, -0.0025637149810791016, -0.002194046974182129, -0.0018243789672851562, -0.0014547109603881836, -0.001085042953491211, -0.0007153749465942383, -0.0003457069396972656, 2.396106719970703e-05, 0.0003936290740966797, 0.0007632970809936523, 0.001132965087890625, 0.0015026330947875977, 0.0018723011016845703, 0.002241969108581543, 0.0026116371154785156, 0.0029813051223754883, 0.003350973129272461, 0.0037206411361694336, 0.004090309143066406, 0.004459977149963379, 0.0048296451568603516, 0.005199313163757324, 0.005568981170654297, 0.0059386491775512695, 0.006308317184448242, 0.006677985191345215, 0.0070476531982421875, 0.00741732120513916, 0.007786989212036133, 0.008156657218933105, 0.008526325225830078, 0.00889599323272705, 0.009265661239624023, 0.009635329246520996, 0.010004997253417969, 0.010374665260314941, 0.010744333267211914, 0.011114001274108887, 0.01148366928100586, 0.011853337287902832, 0.012223005294799805, 0.012592673301696777, 0.01296234130859375]}, "gradients/decoder.transformer.h.5.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 0.0, 1.0, 1.0, 2.0, 1.0, 4.0, 1.0, 5.0, 4.0, 5.0, 8.0, 11.0, 13.0, 23.0, 26.0, 26.0, 44.0, 52.0, 99.0, 118.0, 254.0, 478.0, 2973.0, 1038591.0, 4511.0, 532.0, 257.0, 175.0, 106.0, 65.0, 51.0, 31.0, 19.0, 20.0, 12.0, 11.0, 9.0, 12.0, 6.0, 0.0, 3.0, 2.0, 0.0, 0.0, 2.0, 1.0, 0.0, 2.0, 2.0, 0.0, 1.0, 1.0], "bins": [-0.29443359375, -0.2861480712890625, -0.277862548828125, -0.2695770263671875, -0.26129150390625, -0.2530059814453125, -0.244720458984375, -0.2364349365234375, -0.2281494140625, -0.2198638916015625, -0.211578369140625, -0.2032928466796875, -0.19500732421875, -0.1867218017578125, -0.178436279296875, -0.1701507568359375, -0.161865234375, -0.1535797119140625, -0.145294189453125, -0.1370086669921875, -0.12872314453125, -0.1204376220703125, -0.112152099609375, -0.1038665771484375, -0.0955810546875, -0.0872955322265625, -0.079010009765625, -0.0707244873046875, -0.06243896484375, -0.0541534423828125, -0.045867919921875, -0.0375823974609375, -0.029296875, -0.0210113525390625, -0.012725830078125, -0.0044403076171875, 0.00384521484375, 0.0121307373046875, 0.020416259765625, 0.0287017822265625, 0.0369873046875, 0.0452728271484375, 0.053558349609375, 0.0618438720703125, 0.07012939453125, 0.0784149169921875, 0.086700439453125, 0.0949859619140625, 0.103271484375, 0.1115570068359375, 0.119842529296875, 0.1281280517578125, 0.13641357421875, 0.1446990966796875, 0.152984619140625, 0.1612701416015625, 0.1695556640625, 0.1778411865234375, 0.186126708984375, 0.1944122314453125, 0.20269775390625, 0.2109832763671875, 0.219268798828125, 0.2275543212890625, 0.23583984375]}, "gradients/decoder.transformer.h.5.ln_cross_attn.weight": {"_type": "histogram", "values": [3.0, 7.0, 417.0, 573.0, 20.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00501313665881753, -0.003385726362466812, -0.0017583160661160946, -0.00013090576976537704, 0.0014965045265853405, 0.0031239143572747707, 0.004751325119286776, 0.0063787358812987804, 0.008006146177649498, 0.009633556008338928, 0.011260966770350933, 0.012888377532362938, 0.014515787363052368, 0.0161431971937418, 0.01777060702443123, 0.019398018717765808, 0.02102542854845524, 0.02265283837914467, 0.024280250072479248, 0.02590765990316868, 0.02753506973385811, 0.02916247956454754, 0.03078988939523697, 0.03241730108857155, 0.03404470905661583, 0.03567212074995041, 0.03729952871799469, 0.03892694041132927, 0.04055435210466385, 0.04218176007270813, 0.04380917176604271, 0.04543658345937729, 0.04706399142742157, 0.04869140312075615, 0.05031881108880043, 0.05194622278213501, 0.05357363075017929, 0.05520104244351387, 0.05682845413684845, 0.05845586210489273, 0.06008327379822731, 0.06171068549156189, 0.06333809345960617, 0.06496550142765045, 0.06659291684627533, 0.06822032481431961, 0.06984773278236389, 0.07147514820098877, 0.07310255616903305, 0.07472996413707733, 0.07635737955570221, 0.07798478752374649, 0.07961219549179077, 0.08123961091041565, 0.08286701887845993, 0.08449442684650421, 0.08612184226512909, 0.08774925023317337, 0.08937666565179825, 0.09100407361984253, 0.09263148158788681, 0.09425888955593109, 0.09588630497455597, 0.09751371294260025, 0.09914112091064453]}, "gradients/decoder.transformer.h.5.ln_cross_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 2.0, 3.0, 3.0, 5.0, 7.0, 5.0, 11.0, 11.0, 9.0, 10.0, 6.0, 14.0, 17.0, 18.0, 18.0, 24.0, 22.0, 27.0, 21.0, 25.0, 20.0, 39.0, 30.0, 47.0, 43.0, 30.0, 36.0, 35.0, 33.0, 35.0, 30.0, 31.0, 29.0, 42.0, 36.0, 36.0, 23.0, 26.0, 17.0, 26.0, 17.0, 16.0, 15.0, 14.0, 6.0, 11.0, 9.0, 7.0, 1.0, 2.0, 4.0, 3.0, 3.0, 4.0, 0.0, 2.0, 3.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-0.0043978095054626465, -0.004244734533131123, -0.004091659560799599, -0.003938584588468075, -0.003785509616136551, -0.003632434643805027, -0.003479359671473503, -0.0033262846991419792, -0.0031732097268104553, -0.0030201347544789314, -0.0028670597821474075, -0.0027139848098158836, -0.0025609098374843597, -0.002407834865152836, -0.002254759892821312, -0.002101684920489788, -0.0019486099481582642, -0.0017955349758267403, -0.0016424600034952164, -0.0014893850311636925, -0.0013363100588321686, -0.0011832350865006447, -0.0010301601141691208, -0.0008770851418375969, -0.000724010169506073, -0.0005709351971745491, -0.0004178602248430252, -0.0002647852525115013, -0.00011171028017997742, 4.136469215154648e-05, 0.00019443966448307037, 0.00034751463681459427, 0.0005005896091461182, 0.0006536645814776421, 0.000806739553809166, 0.0009598145261406898, 0.0011128894984722137, 0.0012659644708037376, 0.0014190394431352615, 0.0015721144154667854, 0.0017251893877983093, 0.0018782643601298332, 0.002031339332461357, 0.002184414304792881, 0.002337489277124405, 0.002490564249455929, 0.0026436392217874527, 0.0027967141941189766, 0.0029497891664505005, 0.0031028641387820244, 0.0032559391111135483, 0.003409014083445072, 0.003562089055776596, 0.00371516402810812, 0.003868239000439644, 0.004021313972771168, 0.004174388945102692, 0.0043274639174342155, 0.0044805388897657394, 0.004633613862097263, 0.004786688834428787, 0.004939763806760311, 0.005092838779091835, 0.005245913751423359, 0.005398988723754883]}, "gradients/decoder.transformer.h.5.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 4.0, 3.0, 6.0, 5.0, 9.0, 8.0, 9.0, 12.0, 27.0, 11.0, 23.0, 23.0, 34.0, 33.0, 33.0, 21.0, 45.0, 52.0, 34.0, 46.0, 54.0, 46.0, 56.0, 48.0, 40.0, 33.0, 40.0, 34.0, 40.0, 32.0, 28.0, 29.0, 19.0, 15.0, 15.0, 17.0, 5.0, 13.0, 5.0, 3.0, 3.0, 3.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-13.5234375, -13.104248046875, -12.68505859375, -12.265869140625, -11.8466796875, -11.427490234375, -11.00830078125, -10.589111328125, -10.169921875, -9.750732421875, -9.33154296875, -8.912353515625, -8.4931640625, -8.073974609375, -7.65478515625, -7.235595703125, -6.81640625, -6.397216796875, -5.97802734375, -5.558837890625, -5.1396484375, -4.720458984375, -4.30126953125, -3.882080078125, -3.462890625, -3.043701171875, -2.62451171875, -2.205322265625, -1.7861328125, -1.366943359375, -0.94775390625, -0.528564453125, -0.109375, 0.309814453125, 0.72900390625, 1.148193359375, 1.5673828125, 1.986572265625, 2.40576171875, 2.824951171875, 3.244140625, 3.663330078125, 4.08251953125, 4.501708984375, 4.9208984375, 5.340087890625, 5.75927734375, 6.178466796875, 6.59765625, 7.016845703125, 7.43603515625, 7.855224609375, 8.2744140625, 8.693603515625, 9.11279296875, 9.531982421875, 9.951171875, 10.370361328125, 10.78955078125, 11.208740234375, 11.6279296875, 12.047119140625, 12.46630859375, 12.885498046875, 13.3046875]}, "gradients/decoder.transformer.h.5.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 2.0, 1.0, 3.0, 5.0, 8.0, 11.0, 17.0, 15.0, 30.0, 39.0, 55.0, 45.0, 89.0, 112.0, 204.0, 295.0, 550.0, 1162.0, 2815.0, 7676.0, 24215.0, 89056.0, 340440.0, 424559.0, 111456.0, 30306.0, 9159.0, 3237.0, 1381.0, 597.0, 367.0, 201.0, 128.0, 92.0, 75.0, 44.0, 36.0, 32.0, 13.0, 15.0, 8.0, 9.0, 0.0, 4.0, 0.0, 1.0, 2.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0], "bins": [-17.65625, -17.10595703125, -16.5556640625, -16.00537109375, -15.455078125, -14.90478515625, -14.3544921875, -13.80419921875, -13.25390625, -12.70361328125, -12.1533203125, -11.60302734375, -11.052734375, -10.50244140625, -9.9521484375, -9.40185546875, -8.8515625, -8.30126953125, -7.7509765625, -7.20068359375, -6.650390625, -6.10009765625, -5.5498046875, -4.99951171875, -4.44921875, -3.89892578125, -3.3486328125, -2.79833984375, -2.248046875, -1.69775390625, -1.1474609375, -0.59716796875, -0.046875, 0.50341796875, 1.0537109375, 1.60400390625, 2.154296875, 2.70458984375, 3.2548828125, 3.80517578125, 4.35546875, 4.90576171875, 5.4560546875, 6.00634765625, 6.556640625, 7.10693359375, 7.6572265625, 8.20751953125, 8.7578125, 9.30810546875, 9.8583984375, 10.40869140625, 10.958984375, 11.50927734375, 12.0595703125, 12.60986328125, 13.16015625, 13.71044921875, 14.2607421875, 14.81103515625, 15.361328125, 15.91162109375, 16.4619140625, 17.01220703125, 17.5625]}, "gradients/decoder.transformer.h.5.attn.c_attn.bias": {"_type": "histogram", "values": [2.0, 3.0, 2.0, 4.0, 1.0, 5.0, 6.0, 9.0, 7.0, 10.0, 13.0, 7.0, 15.0, 11.0, 22.0, 28.0, 26.0, 27.0, 31.0, 40.0, 34.0, 45.0, 55.0, 69.0, 96.0, 186.0, 1464.0, 229.0, 104.0, 95.0, 52.0, 38.0, 45.0, 37.0, 42.0, 31.0, 15.0, 32.0, 24.0, 18.0, 19.0, 13.0, 12.0, 8.0, 8.0, 3.0, 4.0, 2.0, 5.0, 5.0, 0.0, 2.0, 3.0, 2.0, 0.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-25.859375, -24.888427734375, -23.91748046875, -22.946533203125, -21.9755859375, -21.004638671875, -20.03369140625, -19.062744140625, -18.091796875, -17.120849609375, -16.14990234375, -15.178955078125, -14.2080078125, -13.237060546875, -12.26611328125, -11.295166015625, -10.32421875, -9.353271484375, -8.38232421875, -7.411376953125, -6.4404296875, -5.469482421875, -4.49853515625, -3.527587890625, -2.556640625, -1.585693359375, -0.61474609375, 0.356201171875, 1.3271484375, 2.298095703125, 3.26904296875, 4.239990234375, 5.2109375, 6.181884765625, 7.15283203125, 8.123779296875, 9.0947265625, 10.065673828125, 11.03662109375, 12.007568359375, 12.978515625, 13.949462890625, 14.92041015625, 15.891357421875, 16.8623046875, 17.833251953125, 18.80419921875, 19.775146484375, 20.74609375, 21.717041015625, 22.68798828125, 23.658935546875, 24.6298828125, 25.600830078125, 26.57177734375, 27.542724609375, 28.513671875, 29.484619140625, 30.45556640625, 31.426513671875, 32.3974609375, 33.368408203125, 34.33935546875, 35.310302734375, 36.28125]}, "gradients/decoder.transformer.h.5.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 2.0, 1.0, 3.0, 5.0, 6.0, 4.0, 10.0, 16.0, 21.0, 21.0, 30.0, 39.0, 62.0, 114.0, 165.0, 229.0, 421.0, 1062.0, 13566.0, 2620681.0, 502356.0, 5254.0, 727.0, 306.0, 204.0, 128.0, 87.0, 67.0, 46.0, 28.0, 20.0, 15.0, 11.0, 8.0, 2.0, 2.0, 0.0, 2.0, 2.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-42.875, -40.8203125, -38.765625, -36.7109375, -34.65625, -32.6015625, -30.546875, -28.4921875, -26.4375, -24.3828125, -22.328125, -20.2734375, -18.21875, -16.1640625, -14.109375, -12.0546875, -10.0, -7.9453125, -5.890625, -3.8359375, -1.78125, 0.2734375, 2.328125, 4.3828125, 6.4375, 8.4921875, 10.546875, 12.6015625, 14.65625, 16.7109375, 18.765625, 20.8203125, 22.875, 24.9296875, 26.984375, 29.0390625, 31.09375, 33.1484375, 35.203125, 37.2578125, 39.3125, 41.3671875, 43.421875, 45.4765625, 47.53125, 49.5859375, 51.640625, 53.6953125, 55.75, 57.8046875, 59.859375, 61.9140625, 63.96875, 66.0234375, 68.078125, 70.1328125, 72.1875, 74.2421875, 76.296875, 78.3515625, 80.40625, 82.4609375, 84.515625, 86.5703125, 88.625]}, "gradients/decoder.transformer.h.5.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 31.0, 409.0, 526.0, 46.0, 3.0, 0.0, 1.0, 0.0, 1.0, 0.0, 3.0], "bins": [-452.2023010253906, -444.0010986328125, -435.79986572265625, -427.5986633300781, -419.3974304199219, -411.19622802734375, -402.9949951171875, -394.7937927246094, -386.59259033203125, -378.3913879394531, -370.1901550292969, -361.98895263671875, -353.7877197265625, -345.5865173339844, -337.3852844238281, -329.18408203125, -320.98284912109375, -312.7816467285156, -304.5804138183594, -296.37921142578125, -288.177978515625, -279.9767761230469, -271.7755432128906, -263.5743408203125, -255.3731231689453, -247.17190551757812, -238.97068786621094, -230.76947021484375, -222.56826782226562, -214.36703491210938, -206.16583251953125, -197.96461486816406, -189.76341247558594, -181.56219482421875, -173.36097717285156, -165.15975952148438, -156.95855712890625, -148.75732421875, -140.55612182617188, -132.3549041748047, -124.1536865234375, -115.95246887207031, -107.75125122070312, -99.55004119873047, -91.34882354736328, -83.1476058959961, -74.94639587402344, -66.74517822265625, -58.54396057128906, -50.342742919921875, -42.14152908325195, -33.94031524658203, -25.739097595214844, -17.537879943847656, -9.336666107177734, -1.1354522705078125, 7.065765380859375, 15.26698112487793, 23.468196868896484, 31.66941261291504, 39.870628356933594, 48.07184600830078, 56.2730598449707, 64.47427368164062, 72.67549133300781]}, "gradients/decoder.transformer.h.5.ln_1.bias": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 0.0, 3.0, 3.0, 3.0, 2.0, 4.0, 1.0, 8.0, 8.0, 2.0, 11.0, 8.0, 13.0, 17.0, 9.0, 15.0, 16.0, 21.0, 22.0, 23.0, 25.0, 41.0, 33.0, 35.0, 20.0, 39.0, 41.0, 32.0, 39.0, 35.0, 28.0, 30.0, 53.0, 34.0, 43.0, 34.0, 29.0, 23.0, 30.0, 25.0, 24.0, 20.0, 17.0, 16.0, 22.0, 11.0, 11.0, 4.0, 10.0, 5.0, 5.0, 4.0, 1.0, 3.0, 2.0, 4.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-78.55891418457031, -76.28256225585938, -74.00621032714844, -71.7298583984375, -69.4535140991211, -67.17716217041016, -64.90081024169922, -62.62445831298828, -60.348106384277344, -58.071754455566406, -55.795406341552734, -53.5190544128418, -51.24270248413086, -48.96635437011719, -46.69000244140625, -44.41365051269531, -42.13730239868164, -39.8609504699707, -37.58460235595703, -35.308250427246094, -33.031898498535156, -30.75554847717285, -28.479198455810547, -26.20284652709961, -23.926496505737305, -21.650146484375, -19.373794555664062, -17.097444534301758, -14.821093559265137, -12.544742584228516, -10.268392562866211, -7.99204158782959, -5.715690612792969, -3.4393398761749268, -1.1629891395568848, 1.1133613586425781, 3.389712333679199, 5.66606330871582, 7.942413330078125, 10.218764305114746, 12.495115280151367, 14.771466255187988, 17.04781723022461, 19.324167251586914, 21.60051727294922, 23.876869201660156, 26.15321922302246, 28.429569244384766, 30.705921173095703, 32.98227310180664, 35.25862121582031, 37.53497314453125, 39.81132507324219, 42.087677001953125, 44.3640251159668, 46.640377044677734, 48.916725158691406, 51.193077087402344, 53.469425201416016, 55.74577713012695, 58.02212905883789, 60.29847717285156, 62.5748291015625, 64.85118103027344, 67.12753295898438]}, "gradients/decoder.transformer.h.4.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 3.0, 1.0, 0.0, 4.0, 6.0, 6.0, 6.0, 8.0, 7.0, 13.0, 19.0, 22.0, 11.0, 22.0, 28.0, 30.0, 34.0, 25.0, 41.0, 33.0, 47.0, 48.0, 46.0, 58.0, 36.0, 47.0, 39.0, 54.0, 40.0, 45.0, 24.0, 35.0, 28.0, 30.0, 26.0, 16.0, 15.0, 11.0, 14.0, 13.0, 7.0, 4.0, 8.0, 2.0, 0.0, 2.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-13.0703125, -12.6605224609375, -12.250732421875, -11.8409423828125, -11.43115234375, -11.0213623046875, -10.611572265625, -10.2017822265625, -9.7919921875, -9.3822021484375, -8.972412109375, -8.5626220703125, -8.15283203125, -7.7430419921875, -7.333251953125, -6.9234619140625, -6.513671875, -6.1038818359375, -5.694091796875, -5.2843017578125, -4.87451171875, -4.4647216796875, -4.054931640625, -3.6451416015625, -3.2353515625, -2.8255615234375, -2.415771484375, -2.0059814453125, -1.59619140625, -1.1864013671875, -0.776611328125, -0.3668212890625, 0.04296875, 0.4527587890625, 0.862548828125, 1.2723388671875, 1.68212890625, 2.0919189453125, 2.501708984375, 2.9114990234375, 3.3212890625, 3.7310791015625, 4.140869140625, 4.5506591796875, 4.96044921875, 5.3702392578125, 5.780029296875, 6.1898193359375, 6.599609375, 7.0093994140625, 7.419189453125, 7.8289794921875, 8.23876953125, 8.6485595703125, 9.058349609375, 9.4681396484375, 9.8779296875, 10.2877197265625, 10.697509765625, 11.1072998046875, 11.51708984375, 11.9268798828125, 12.336669921875, 12.7464599609375, 13.15625]}, "gradients/decoder.transformer.h.4.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 4.0, 2.0, 5.0, 4.0, 5.0, 6.0, 9.0, 13.0, 7.0, 11.0, 20.0, 16.0, 22.0, 24.0, 23.0, 35.0, 49.0, 86.0, 167.0, 391.0, 1136.0, 4761.0, 31457.0, 534015.0, 3112368.0, 475189.0, 28271.0, 4304.0, 1052.0, 364.0, 154.0, 79.0, 55.0, 27.0, 26.0, 18.0, 22.0, 20.0, 16.0, 12.0, 9.0, 4.0, 14.0, 5.0, 3.0, 5.0, 3.0, 2.0, 2.0, 2.0, 0.0, 3.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-29.296875, -28.335693359375, -27.37451171875, -26.413330078125, -25.4521484375, -24.490966796875, -23.52978515625, -22.568603515625, -21.607421875, -20.646240234375, -19.68505859375, -18.723876953125, -17.7626953125, -16.801513671875, -15.84033203125, -14.879150390625, -13.91796875, -12.956787109375, -11.99560546875, -11.034423828125, -10.0732421875, -9.112060546875, -8.15087890625, -7.189697265625, -6.228515625, -5.267333984375, -4.30615234375, -3.344970703125, -2.3837890625, -1.422607421875, -0.46142578125, 0.499755859375, 1.4609375, 2.422119140625, 3.38330078125, 4.344482421875, 5.3056640625, 6.266845703125, 7.22802734375, 8.189208984375, 9.150390625, 10.111572265625, 11.07275390625, 12.033935546875, 12.9951171875, 13.956298828125, 14.91748046875, 15.878662109375, 16.83984375, 17.801025390625, 18.76220703125, 19.723388671875, 20.6845703125, 21.645751953125, 22.60693359375, 23.568115234375, 24.529296875, 25.490478515625, 26.45166015625, 27.412841796875, 28.3740234375, 29.335205078125, 30.29638671875, 31.257568359375, 32.21875]}, "gradients/decoder.transformer.h.4.mlp.c_fc.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 1.0, 0.0, 2.0, 1.0, 1.0, 4.0, 7.0, 5.0, 7.0, 11.0, 23.0, 34.0, 31.0, 44.0, 64.0, 89.0, 124.0, 195.0, 308.0, 417.0, 500.0, 532.0, 472.0, 328.0, 258.0, 150.0, 129.0, 77.0, 65.0, 49.0, 44.0, 28.0, 21.0, 10.0, 7.0, 10.0, 8.0, 10.0, 4.0, 2.0, 3.0, 2.0, 0.0, 2.0, 2.0, 1.0, 1.0, 1.0, 2.0, 3.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-15.9765625, -15.4476318359375, -14.918701171875, -14.3897705078125, -13.86083984375, -13.3319091796875, -12.802978515625, -12.2740478515625, -11.7451171875, -11.2161865234375, -10.687255859375, -10.1583251953125, -9.62939453125, -9.1004638671875, -8.571533203125, -8.0426025390625, -7.513671875, -6.9847412109375, -6.455810546875, -5.9268798828125, -5.39794921875, -4.8690185546875, -4.340087890625, -3.8111572265625, -3.2822265625, -2.7532958984375, -2.224365234375, -1.6954345703125, -1.16650390625, -0.6375732421875, -0.108642578125, 0.4202880859375, 0.94921875, 1.4781494140625, 2.007080078125, 2.5360107421875, 3.06494140625, 3.5938720703125, 4.122802734375, 4.6517333984375, 5.1806640625, 5.7095947265625, 6.238525390625, 6.7674560546875, 7.29638671875, 7.8253173828125, 8.354248046875, 8.8831787109375, 9.412109375, 9.9410400390625, 10.469970703125, 10.9989013671875, 11.52783203125, 12.0567626953125, 12.585693359375, 13.1146240234375, 13.6435546875, 14.1724853515625, 14.701416015625, 15.2303466796875, 15.75927734375, 16.2882080078125, 16.817138671875, 17.3460693359375, 17.875]}, "gradients/decoder.transformer.h.4.mlp.c_fc.weight": {"_type": "histogram", "values": [3.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 2.0, 1.0, 2.0, 2.0, 8.0, 12.0, 13.0, 15.0, 17.0, 17.0, 29.0, 39.0, 55.0, 75.0, 134.0, 225.0, 462.0, 1266.0, 4553.0, 27844.0, 311414.0, 3406971.0, 399107.0, 34147.0, 5408.0, 1374.0, 488.0, 215.0, 126.0, 70.0, 49.0, 46.0, 26.0, 18.0, 8.0, 9.0, 10.0, 7.0, 7.0, 5.0, 3.0, 7.0, 0.0, 2.0, 2.0, 1.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-32.34375, -31.21044921875, -30.0771484375, -28.94384765625, -27.810546875, -26.67724609375, -25.5439453125, -24.41064453125, -23.27734375, -22.14404296875, -21.0107421875, -19.87744140625, -18.744140625, -17.61083984375, -16.4775390625, -15.34423828125, -14.2109375, -13.07763671875, -11.9443359375, -10.81103515625, -9.677734375, -8.54443359375, -7.4111328125, -6.27783203125, -5.14453125, -4.01123046875, -2.8779296875, -1.74462890625, -0.611328125, 0.52197265625, 1.6552734375, 2.78857421875, 3.921875, 5.05517578125, 6.1884765625, 7.32177734375, 8.455078125, 9.58837890625, 10.7216796875, 11.85498046875, 12.98828125, 14.12158203125, 15.2548828125, 16.38818359375, 17.521484375, 18.65478515625, 19.7880859375, 20.92138671875, 22.0546875, 23.18798828125, 24.3212890625, 25.45458984375, 26.587890625, 27.72119140625, 28.8544921875, 29.98779296875, 31.12109375, 32.25439453125, 33.3876953125, 34.52099609375, 35.654296875, 36.78759765625, 37.9208984375, 39.05419921875, 40.1875]}, "gradients/decoder.transformer.h.4.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 6.0, 290.0, 695.0, 29.0], "bins": [-1067.7012939453125, -1050.521728515625, -1033.342041015625, -1016.1624755859375, -998.9828491210938, -981.8032836914062, -964.6236572265625, -947.4440307617188, -930.2644653320312, -913.0848388671875, -895.9052734375, -878.7256469726562, -861.5460205078125, -844.366455078125, -827.1868286132812, -810.0072021484375, -792.82763671875, -775.6480102539062, -758.4684448242188, -741.288818359375, -724.1091918945312, -706.9296264648438, -689.75, -672.5703735351562, -655.3907470703125, -638.2111206054688, -621.0315551757812, -603.8519287109375, -586.6723022460938, -569.4927368164062, -552.3131103515625, -535.1334838867188, -517.953857421875, -500.7742614746094, -483.5946350097656, -466.4150390625, -449.2354431152344, -432.05584716796875, -414.876220703125, -397.6966247558594, -380.51702880859375, -363.3374328613281, -346.1578063964844, -328.97821044921875, -311.7986145019531, -294.6190185546875, -277.43939208984375, -260.2597961425781, -243.08016967773438, -225.9005584716797, -208.72096252441406, -191.54135131835938, -174.36175537109375, -157.18214416503906, -140.00253295898438, -122.82292938232422, -105.6433334350586, -88.46372985839844, -71.28411865234375, -54.104515075683594, -36.92491149902344, -19.74530792236328, -2.5656967163085938, 14.613906860351562, 31.79351043701172]}, "gradients/decoder.transformer.h.4.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 2.0, 3.0, 4.0, 1.0, 6.0, 10.0, 4.0, 9.0, 13.0, 6.0, 12.0, 21.0, 20.0, 21.0, 24.0, 31.0, 23.0, 36.0, 38.0, 19.0, 35.0, 38.0, 41.0, 42.0, 48.0, 42.0, 44.0, 41.0, 33.0, 38.0, 38.0, 32.0, 32.0, 26.0, 25.0, 23.0, 22.0, 20.0, 13.0, 14.0, 12.0, 12.0, 9.0, 7.0, 7.0, 7.0, 1.0, 2.0, 2.0, 4.0, 3.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-59.52099609375, -57.7048225402832, -55.888648986816406, -54.07247543334961, -52.25630187988281, -50.440128326416016, -48.62395477294922, -46.807777404785156, -44.991607666015625, -43.17543411254883, -41.35926055908203, -39.543087005615234, -37.72691345214844, -35.91073989868164, -34.094566345214844, -32.27838897705078, -30.462215423583984, -28.646041870117188, -26.82986831665039, -25.013694763183594, -23.197521209716797, -21.38134765625, -19.56517219543457, -17.748998641967773, -15.932825088500977, -14.11665153503418, -12.300477981567383, -10.48430347442627, -8.668129920959473, -6.851956367492676, -5.0357818603515625, -3.2196083068847656, -1.4034347534179688, 0.4127390384674072, 2.228912830352783, 4.045086860656738, 5.861260414123535, 7.677433967590332, 9.493608474731445, 11.309782028198242, 13.125955581665039, 14.942129135131836, 16.758302688598633, 18.574478149414062, 20.39065170288086, 22.206825256347656, 24.022998809814453, 25.83917236328125, 27.655345916748047, 29.471519470214844, 31.28769302368164, 33.10386657714844, 34.920040130615234, 36.73621368408203, 38.552391052246094, 40.368560791015625, 42.18473815917969, 44.000911712646484, 45.81708526611328, 47.63325881958008, 49.449432373046875, 51.26560592651367, 53.08177947998047, 54.89795684814453, 56.71412658691406]}, "gradients/decoder.transformer.h.4.crossattention.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 2.0, 0.0, 2.0, 3.0, 3.0, 1.0, 3.0, 4.0, 4.0, 10.0, 13.0, 9.0, 16.0, 15.0, 16.0, 24.0, 24.0, 24.0, 45.0, 30.0, 35.0, 42.0, 38.0, 38.0, 42.0, 39.0, 50.0, 43.0, 45.0, 44.0, 38.0, 40.0, 32.0, 40.0, 27.0, 26.0, 21.0, 23.0, 20.0, 26.0, 13.0, 13.0, 15.0, 5.0, 4.0, 1.0, 3.0, 2.0, 3.0, 1.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-12.140625, -11.7418212890625, -11.343017578125, -10.9442138671875, -10.54541015625, -10.1466064453125, -9.747802734375, -9.3489990234375, -8.9501953125, -8.5513916015625, -8.152587890625, -7.7537841796875, -7.35498046875, -6.9561767578125, -6.557373046875, -6.1585693359375, -5.759765625, -5.3609619140625, -4.962158203125, -4.5633544921875, -4.16455078125, -3.7657470703125, -3.366943359375, -2.9681396484375, -2.5693359375, -2.1705322265625, -1.771728515625, -1.3729248046875, -0.97412109375, -0.5753173828125, -0.176513671875, 0.2222900390625, 0.62109375, 1.0198974609375, 1.418701171875, 1.8175048828125, 2.21630859375, 2.6151123046875, 3.013916015625, 3.4127197265625, 3.8115234375, 4.2103271484375, 4.609130859375, 5.0079345703125, 5.40673828125, 5.8055419921875, 6.204345703125, 6.6031494140625, 7.001953125, 7.4007568359375, 7.799560546875, 8.1983642578125, 8.59716796875, 8.9959716796875, 9.394775390625, 9.7935791015625, 10.1923828125, 10.5911865234375, 10.989990234375, 11.3887939453125, 11.78759765625, 12.1864013671875, 12.585205078125, 12.9840087890625, 13.3828125]}, "gradients/decoder.transformer.h.4.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 2.0, 1.0, 3.0, 2.0, 5.0, 14.0, 15.0, 25.0, 36.0, 70.0, 92.0, 134.0, 179.0, 318.0, 419.0, 651.0, 1065.0, 1551.0, 2408.0, 3892.0, 6055.0, 9371.0, 15368.0, 24339.0, 39034.0, 63615.0, 104891.0, 161428.0, 195916.0, 156025.0, 100037.0, 61008.0, 37372.0, 23185.0, 14615.0, 9423.0, 5738.0, 3576.0, 2358.0, 1516.0, 988.0, 606.0, 411.0, 266.0, 188.0, 118.0, 80.0, 54.0, 37.0, 24.0, 17.0, 5.0, 8.0, 7.0, 5.0, 3.0, 1.0, 2.0, 0.0, 0.0, 1.0], "bins": [-1.0400390625, -1.0069732666015625, -0.973907470703125, -0.9408416748046875, -0.90777587890625, -0.8747100830078125, -0.841644287109375, -0.8085784912109375, -0.7755126953125, -0.7424468994140625, -0.709381103515625, -0.6763153076171875, -0.64324951171875, -0.6101837158203125, -0.577117919921875, -0.5440521240234375, -0.510986328125, -0.4779205322265625, -0.444854736328125, -0.4117889404296875, -0.37872314453125, -0.3456573486328125, -0.312591552734375, -0.2795257568359375, -0.2464599609375, -0.2133941650390625, -0.180328369140625, -0.1472625732421875, -0.11419677734375, -0.0811309814453125, -0.048065185546875, -0.0149993896484375, 0.01806640625, 0.0511322021484375, 0.084197998046875, 0.1172637939453125, 0.15032958984375, 0.1833953857421875, 0.216461181640625, 0.2495269775390625, 0.2825927734375, 0.3156585693359375, 0.348724365234375, 0.3817901611328125, 0.41485595703125, 0.4479217529296875, 0.480987548828125, 0.5140533447265625, 0.547119140625, 0.5801849365234375, 0.613250732421875, 0.6463165283203125, 0.67938232421875, 0.7124481201171875, 0.745513916015625, 0.7785797119140625, 0.8116455078125, 0.8447113037109375, 0.877777099609375, 0.9108428955078125, 0.94390869140625, 0.9769744873046875, 1.010040283203125, 1.0431060791015625, 1.076171875]}, "gradients/decoder.transformer.h.4.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 4.0, 0.0, 0.0, 2.0, 4.0, 2.0, 3.0, 6.0, 6.0, 8.0, 11.0, 18.0, 15.0, 17.0, 11.0, 12.0, 22.0, 25.0, 21.0, 34.0, 27.0, 29.0, 37.0, 44.0, 48.0, 40.0, 44.0, 37.0, 1064.0, 39.0, 37.0, 42.0, 39.0, 27.0, 31.0, 28.0, 19.0, 30.0, 20.0, 23.0, 17.0, 17.0, 19.0, 12.0, 15.0, 6.0, 8.0, 5.0, 5.0, 0.0, 4.0, 4.0, 2.0, 1.0, 2.0, 0.0, 0.0, 2.0], "bins": [-7.86328125, -7.63232421875, -7.4013671875, -7.17041015625, -6.939453125, -6.70849609375, -6.4775390625, -6.24658203125, -6.015625, -5.78466796875, -5.5537109375, -5.32275390625, -5.091796875, -4.86083984375, -4.6298828125, -4.39892578125, -4.16796875, -3.93701171875, -3.7060546875, -3.47509765625, -3.244140625, -3.01318359375, -2.7822265625, -2.55126953125, -2.3203125, -2.08935546875, -1.8583984375, -1.62744140625, -1.396484375, -1.16552734375, -0.9345703125, -0.70361328125, -0.47265625, -0.24169921875, -0.0107421875, 0.22021484375, 0.451171875, 0.68212890625, 0.9130859375, 1.14404296875, 1.375, 1.60595703125, 1.8369140625, 2.06787109375, 2.298828125, 2.52978515625, 2.7607421875, 2.99169921875, 3.22265625, 3.45361328125, 3.6845703125, 3.91552734375, 4.146484375, 4.37744140625, 4.6083984375, 4.83935546875, 5.0703125, 5.30126953125, 5.5322265625, 5.76318359375, 5.994140625, 6.22509765625, 6.4560546875, 6.68701171875, 6.91796875]}, "gradients/decoder.transformer.h.4.crossattention.c_attn.weight": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 1.0, 1.0, 2.0, 8.0, 15.0, 14.0, 25.0, 25.0, 41.0, 78.0, 104.0, 165.0, 238.0, 325.0, 483.0, 841.0, 1258.0, 1855.0, 2712.0, 4128.0, 6449.0, 9906.0, 15792.0, 24065.0, 37856.0, 58073.0, 88034.0, 126928.0, 1139284.0, 222834.0, 120002.0, 82876.0, 54205.0, 35125.0, 22138.0, 14503.0, 9368.0, 6023.0, 3895.0, 2559.0, 1630.0, 1101.0, 701.0, 520.0, 302.0, 213.0, 150.0, 96.0, 70.0, 44.0, 29.0, 21.0, 9.0, 8.0, 5.0, 7.0, 5.0, 1.0, 1.0, 0.0, 1.0], "bins": [-0.75048828125, -0.7269744873046875, -0.703460693359375, -0.6799468994140625, -0.65643310546875, -0.6329193115234375, -0.609405517578125, -0.5858917236328125, -0.5623779296875, -0.5388641357421875, -0.515350341796875, -0.4918365478515625, -0.46832275390625, -0.4448089599609375, -0.421295166015625, -0.3977813720703125, -0.374267578125, -0.3507537841796875, -0.327239990234375, -0.3037261962890625, -0.28021240234375, -0.2566986083984375, -0.233184814453125, -0.2096710205078125, -0.1861572265625, -0.1626434326171875, -0.139129638671875, -0.1156158447265625, -0.09210205078125, -0.0685882568359375, -0.045074462890625, -0.0215606689453125, 0.001953125, 0.0254669189453125, 0.048980712890625, 0.0724945068359375, 0.09600830078125, 0.1195220947265625, 0.143035888671875, 0.1665496826171875, 0.1900634765625, 0.2135772705078125, 0.237091064453125, 0.2606048583984375, 0.28411865234375, 0.3076324462890625, 0.331146240234375, 0.3546600341796875, 0.378173828125, 0.4016876220703125, 0.425201416015625, 0.4487152099609375, 0.47222900390625, 0.4957427978515625, 0.519256591796875, 0.5427703857421875, 0.5662841796875, 0.5897979736328125, 0.613311767578125, 0.6368255615234375, 0.66033935546875, 0.6838531494140625, 0.707366943359375, 0.7308807373046875, 0.75439453125]}, "gradients/decoder.transformer.h.4.crossattention.q_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 3.0, 0.0, 1.0, 0.0, 1.0, 1.0, 4.0, 4.0, 2.0, 6.0, 4.0, 4.0, 6.0, 6.0, 8.0, 7.0, 4.0, 14.0, 12.0, 15.0, 18.0, 16.0, 25.0, 25.0, 23.0, 49.0, 48.0, 36.0, 54.0, 57.0, 56.0, 43.0, 78.0, 44.0, 42.0, 35.0, 35.0, 30.0, 27.0, 25.0, 28.0, 19.0, 9.0, 10.0, 10.0, 17.0, 10.0, 6.0, 5.0, 4.0, 8.0, 7.0, 3.0, 2.0, 6.0, 2.0, 2.0, 2.0, 2.0, 1.0, 0.0, 1.0], "bins": [-0.00629425048828125, -0.006106317043304443, -0.005918383598327637, -0.00573045015335083, -0.0055425167083740234, -0.005354583263397217, -0.00516664981842041, -0.0049787163734436035, -0.004790782928466797, -0.00460284948348999, -0.004414916038513184, -0.004226982593536377, -0.00403904914855957, -0.0038511157035827637, -0.003663182258605957, -0.0034752488136291504, -0.0032873153686523438, -0.003099381923675537, -0.0029114484786987305, -0.002723515033721924, -0.002535581588745117, -0.0023476481437683105, -0.002159714698791504, -0.0019717812538146973, -0.0017838478088378906, -0.001595914363861084, -0.0014079809188842773, -0.0012200474739074707, -0.001032114028930664, -0.0008441805839538574, -0.0006562471389770508, -0.00046831369400024414, -0.0002803802490234375, -9.244680404663086e-05, 9.548664093017578e-05, 0.0002834200859069824, 0.00047135353088378906, 0.0006592869758605957, 0.0008472204208374023, 0.001035153865814209, 0.0012230873107910156, 0.0014110207557678223, 0.001598954200744629, 0.0017868876457214355, 0.001974821090698242, 0.002162754535675049, 0.0023506879806518555, 0.002538621425628662, 0.0027265548706054688, 0.0029144883155822754, 0.003102421760559082, 0.0032903552055358887, 0.0034782886505126953, 0.003666222095489502, 0.0038541555404663086, 0.004042088985443115, 0.004230022430419922, 0.0044179558753967285, 0.004605889320373535, 0.004793822765350342, 0.0049817562103271484, 0.005169689655303955, 0.005357623100280762, 0.005545556545257568, 0.005733489990234375]}, "gradients/decoder.transformer.h.4.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 5.0, 3.0, 2.0, 3.0, 10.0, 5.0, 5.0, 3.0, 11.0, 6.0, 10.0, 15.0, 16.0, 25.0, 20.0, 17.0, 25.0, 36.0, 45.0, 55.0, 77.0, 95.0, 147.0, 174.0, 291.0, 567.0, 3407.0, 421630.0, 615802.0, 4182.0, 660.0, 315.0, 189.0, 151.0, 122.0, 97.0, 77.0, 47.0, 27.0, 36.0, 15.0, 18.0, 18.0, 15.0, 16.0, 9.0, 8.0, 10.0, 13.0, 9.0, 3.0, 2.0, 5.0, 6.0, 3.0, 3.0, 2.0, 4.0, 2.0, 0.0, 2.0], "bins": [-0.11529541015625, -0.11158084869384766, -0.10786628723144531, -0.10415172576904297, -0.10043716430664062, -0.09672260284423828, -0.09300804138183594, -0.0892934799194336, -0.08557891845703125, -0.0818643569946289, -0.07814979553222656, -0.07443523406982422, -0.07072067260742188, -0.06700611114501953, -0.06329154968261719, -0.059576988220214844, -0.0558624267578125, -0.052147865295410156, -0.04843330383300781, -0.04471874237060547, -0.041004180908203125, -0.03728961944580078, -0.03357505798339844, -0.029860496520996094, -0.02614593505859375, -0.022431373596191406, -0.018716812133789062, -0.015002250671386719, -0.011287689208984375, -0.007573127746582031, -0.0038585662841796875, -0.00014400482177734375, 0.003570556640625, 0.007285118103027344, 0.010999679565429688, 0.014714241027832031, 0.018428802490234375, 0.02214336395263672, 0.025857925415039062, 0.029572486877441406, 0.03328704833984375, 0.037001609802246094, 0.04071617126464844, 0.04443073272705078, 0.048145294189453125, 0.05185985565185547, 0.05557441711425781, 0.059288978576660156, 0.0630035400390625, 0.06671810150146484, 0.07043266296386719, 0.07414722442626953, 0.07786178588867188, 0.08157634735107422, 0.08529090881347656, 0.0890054702758789, 0.09272003173828125, 0.0964345932006836, 0.10014915466308594, 0.10386371612548828, 0.10757827758789062, 0.11129283905029297, 0.11500740051269531, 0.11872196197509766, 0.1224365234375]}, "gradients/decoder.transformer.h.4.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 4.0, 134.0, 769.0, 109.0, 5.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.008730384521186352, -0.007379175629466772, -0.006027966737747192, -0.004676757380366325, -0.0033255484886467457, -0.001974339596927166, -0.000623130239546299, 0.0007280781865119934, 0.0020792875438928604, 0.00343049643561244, 0.00478170532733202, 0.006132914684712887, 0.0074841235764324665, 0.008835332468152046, 0.010186541825532913, 0.011537750251591206, 0.012888959608972073, 0.01424016896635294, 0.015591377392411232, 0.0169425867497921, 0.018293796107172966, 0.019645005464553833, 0.02099621295928955, 0.022347422316670418, 0.023698631674051285, 0.025049841031432152, 0.02640105038881302, 0.027752257883548737, 0.029103467240929604, 0.03045467659831047, 0.03180588781833649, 0.033157095313072205, 0.03450830653309822, 0.03585951402783394, 0.037210725247859955, 0.03856193274259567, 0.03991314396262169, 0.04126435145735741, 0.04261556267738342, 0.04396677017211914, 0.04531797766685486, 0.046669185161590576, 0.04802039638161659, 0.04937160387635231, 0.050722815096378326, 0.052074022591114044, 0.05342523008584976, 0.05477644130587578, 0.056127652525901794, 0.05747886002063751, 0.05883007124066353, 0.060181278735399246, 0.06153248995542526, 0.06288369745016098, 0.0642349049448967, 0.06558611243963242, 0.06693731993436813, 0.06828852742910385, 0.06963973492383957, 0.07099094986915588, 0.0723421573638916, 0.07369336485862732, 0.07504457235336304, 0.07639577984809875, 0.07774699479341507]}, "gradients/decoder.transformer.h.4.ln_cross_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 3.0, 7.0, 2.0, 3.0, 5.0, 3.0, 9.0, 15.0, 9.0, 13.0, 7.0, 17.0, 26.0, 31.0, 22.0, 31.0, 36.0, 30.0, 50.0, 43.0, 50.0, 41.0, 54.0, 47.0, 43.0, 50.0, 34.0, 41.0, 33.0, 31.0, 32.0, 29.0, 28.0, 26.0, 27.0, 11.0, 16.0, 11.0, 10.0, 12.0, 7.0, 6.0, 4.0, 3.0, 1.0, 2.0, 2.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.004168152809143066, -0.004037458449602127, -0.003906764090061188, -0.0037760697305202484, -0.003645375370979309, -0.0035146810114383698, -0.0033839866518974304, -0.003253292292356491, -0.0031225979328155518, -0.0029919035732746124, -0.002861209213733673, -0.0027305148541927338, -0.0025998204946517944, -0.002469126135110855, -0.0023384317755699158, -0.0022077374160289764, -0.002077043056488037, -0.0019463486969470978, -0.0018156543374061584, -0.0016849599778652191, -0.0015542656183242798, -0.0014235712587833405, -0.0012928768992424011, -0.0011621825397014618, -0.0010314881801605225, -0.0009007938206195831, -0.0007700994610786438, -0.0006394051015377045, -0.0005087107419967651, -0.0003780163824558258, -0.0002473220229148865, -0.00011662766337394714, 1.4066696166992188e-05, 0.00014476105570793152, 0.00027545541524887085, 0.0004061497747898102, 0.0005368441343307495, 0.0006675384938716888, 0.0007982328534126282, 0.0009289272129535675, 0.0010596215724945068, 0.0011903159320354462, 0.0013210102915763855, 0.0014517046511173248, 0.0015823990106582642, 0.0017130933701992035, 0.0018437877297401428, 0.001974482089281082, 0.0021051764488220215, 0.002235870808362961, 0.0023665651679039, 0.0024972595274448395, 0.002627953886985779, 0.002758648246526718, 0.0028893426060676575, 0.003020036965608597, 0.003150731325149536, 0.0032814256846904755, 0.003412120044231415, 0.003542814403772354, 0.0036735087633132935, 0.003804203122854233, 0.003934897482395172, 0.0040655918419361115, 0.004196286201477051]}, "gradients/decoder.transformer.h.4.attn.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 2.0, 0.0, 2.0, 3.0, 3.0, 1.0, 3.0, 4.0, 4.0, 10.0, 13.0, 10.0, 15.0, 15.0, 16.0, 24.0, 25.0, 23.0, 45.0, 31.0, 34.0, 43.0, 38.0, 40.0, 39.0, 39.0, 50.0, 45.0, 43.0, 44.0, 38.0, 40.0, 32.0, 40.0, 27.0, 26.0, 21.0, 23.0, 21.0, 25.0, 13.0, 13.0, 15.0, 5.0, 4.0, 1.0, 3.0, 2.0, 3.0, 1.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-12.1328125, -11.734130859375, -11.33544921875, -10.936767578125, -10.5380859375, -10.139404296875, -9.74072265625, -9.342041015625, -8.943359375, -8.544677734375, -8.14599609375, -7.747314453125, -7.3486328125, -6.949951171875, -6.55126953125, -6.152587890625, -5.75390625, -5.355224609375, -4.95654296875, -4.557861328125, -4.1591796875, -3.760498046875, -3.36181640625, -2.963134765625, -2.564453125, -2.165771484375, -1.76708984375, -1.368408203125, -0.9697265625, -0.571044921875, -0.17236328125, 0.226318359375, 0.625, 1.023681640625, 1.42236328125, 1.821044921875, 2.2197265625, 2.618408203125, 3.01708984375, 3.415771484375, 3.814453125, 4.213134765625, 4.61181640625, 5.010498046875, 5.4091796875, 5.807861328125, 6.20654296875, 6.605224609375, 7.00390625, 7.402587890625, 7.80126953125, 8.199951171875, 8.5986328125, 8.997314453125, 9.39599609375, 9.794677734375, 10.193359375, 10.592041015625, 10.99072265625, 11.389404296875, 11.7880859375, 12.186767578125, 12.58544921875, 12.984130859375, 13.3828125]}, "gradients/decoder.transformer.h.4.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 5.0, 1.0, 4.0, 6.0, 8.0, 7.0, 12.0, 21.0, 26.0, 43.0, 55.0, 88.0, 115.0, 165.0, 264.0, 435.0, 694.0, 1328.0, 2876.0, 7261.0, 20653.0, 62881.0, 202199.0, 447746.0, 203385.0, 63816.0, 20914.0, 7230.0, 3021.0, 1391.0, 690.0, 398.0, 255.0, 175.0, 125.0, 99.0, 46.0, 37.0, 25.0, 18.0, 15.0, 6.0, 14.0, 4.0, 6.0, 2.0, 2.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 2.0], "bins": [-17.140625, -16.6290283203125, -16.117431640625, -15.6058349609375, -15.09423828125, -14.5826416015625, -14.071044921875, -13.5594482421875, -13.0478515625, -12.5362548828125, -12.024658203125, -11.5130615234375, -11.00146484375, -10.4898681640625, -9.978271484375, -9.4666748046875, -8.955078125, -8.4434814453125, -7.931884765625, -7.4202880859375, -6.90869140625, -6.3970947265625, -5.885498046875, -5.3739013671875, -4.8623046875, -4.3507080078125, -3.839111328125, -3.3275146484375, -2.81591796875, -2.3043212890625, -1.792724609375, -1.2811279296875, -0.76953125, -0.2579345703125, 0.253662109375, 0.7652587890625, 1.27685546875, 1.7884521484375, 2.300048828125, 2.8116455078125, 3.3232421875, 3.8348388671875, 4.346435546875, 4.8580322265625, 5.36962890625, 5.8812255859375, 6.392822265625, 6.9044189453125, 7.416015625, 7.9276123046875, 8.439208984375, 8.9508056640625, 9.46240234375, 9.9739990234375, 10.485595703125, 10.9971923828125, 11.5087890625, 12.0203857421875, 12.531982421875, 13.0435791015625, 13.55517578125, 14.0667724609375, 14.578369140625, 15.0899658203125, 15.6015625]}, "gradients/decoder.transformer.h.4.attn.c_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 4.0, 2.0, 6.0, 3.0, 8.0, 7.0, 8.0, 14.0, 18.0, 13.0, 20.0, 20.0, 29.0, 47.0, 28.0, 39.0, 46.0, 61.0, 63.0, 84.0, 166.0, 1450.0, 294.0, 168.0, 87.0, 60.0, 43.0, 51.0, 30.0, 31.0, 30.0, 19.0, 19.0, 13.0, 25.0, 13.0, 8.0, 4.0, 6.0, 8.0, 2.0, 2.0, 4.0, 2.0, 3.0, 1.0, 1.0, 3.0, 0.0, 0.0, 1.0], "bins": [-36.03125, -34.99853515625, -33.9658203125, -32.93310546875, -31.900390625, -30.86767578125, -29.8349609375, -28.80224609375, -27.76953125, -26.73681640625, -25.7041015625, -24.67138671875, -23.638671875, -22.60595703125, -21.5732421875, -20.54052734375, -19.5078125, -18.47509765625, -17.4423828125, -16.40966796875, -15.376953125, -14.34423828125, -13.3115234375, -12.27880859375, -11.24609375, -10.21337890625, -9.1806640625, -8.14794921875, -7.115234375, -6.08251953125, -5.0498046875, -4.01708984375, -2.984375, -1.95166015625, -0.9189453125, 0.11376953125, 1.146484375, 2.17919921875, 3.2119140625, 4.24462890625, 5.27734375, 6.31005859375, 7.3427734375, 8.37548828125, 9.408203125, 10.44091796875, 11.4736328125, 12.50634765625, 13.5390625, 14.57177734375, 15.6044921875, 16.63720703125, 17.669921875, 18.70263671875, 19.7353515625, 20.76806640625, 21.80078125, 22.83349609375, 23.8662109375, 24.89892578125, 25.931640625, 26.96435546875, 27.9970703125, 29.02978515625, 30.0625]}, "gradients/decoder.transformer.h.4.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 0.0, 2.0, 1.0, 0.0, 3.0, 0.0, 4.0, 2.0, 7.0, 6.0, 5.0, 8.0, 11.0, 16.0, 18.0, 38.0, 36.0, 38.0, 68.0, 69.0, 90.0, 133.0, 174.0, 264.0, 489.0, 1159.0, 13021.0, 796338.0, 2307239.0, 23168.0, 1649.0, 519.0, 319.0, 207.0, 129.0, 121.0, 72.0, 67.0, 44.0, 43.0, 31.0, 28.0, 17.0, 13.0, 10.0, 11.0, 4.0, 9.0, 9.0, 2.0, 0.0, 4.0, 1.0, 2.0, 1.0, 0.0, 4.0, 0.0, 0.0, 2.0], "bins": [-49.75, -48.19970703125, -46.6494140625, -45.09912109375, -43.548828125, -41.99853515625, -40.4482421875, -38.89794921875, -37.34765625, -35.79736328125, -34.2470703125, -32.69677734375, -31.146484375, -29.59619140625, -28.0458984375, -26.49560546875, -24.9453125, -23.39501953125, -21.8447265625, -20.29443359375, -18.744140625, -17.19384765625, -15.6435546875, -14.09326171875, -12.54296875, -10.99267578125, -9.4423828125, -7.89208984375, -6.341796875, -4.79150390625, -3.2412109375, -1.69091796875, -0.140625, 1.40966796875, 2.9599609375, 4.51025390625, 6.060546875, 7.61083984375, 9.1611328125, 10.71142578125, 12.26171875, 13.81201171875, 15.3623046875, 16.91259765625, 18.462890625, 20.01318359375, 21.5634765625, 23.11376953125, 24.6640625, 26.21435546875, 27.7646484375, 29.31494140625, 30.865234375, 32.41552734375, 33.9658203125, 35.51611328125, 37.06640625, 38.61669921875, 40.1669921875, 41.71728515625, 43.267578125, 44.81787109375, 46.3681640625, 47.91845703125, 49.46875]}, "gradients/decoder.transformer.h.4.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 5.0, 415.0, 594.0, 6.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-146.52137756347656, -133.27561950683594, -120.02987670898438, -106.78411865234375, -93.53836822509766, -80.29261779785156, -67.04685974121094, -53.801109313964844, -40.55535888671875, -27.309606552124023, -14.063854217529297, -0.8180999755859375, 12.427650451660156, 25.67340087890625, 38.919158935546875, 52.16490936279297, 65.41065979003906, 78.65641021728516, 91.90216064453125, 105.14791870117188, 118.39366912841797, 131.63941955566406, 144.8851776123047, 158.13092041015625, 171.37667846679688, 184.6224365234375, 197.86817932128906, 211.1139373779297, 224.35968017578125, 237.60543823242188, 250.8511962890625, 264.0969543457031, 277.34271240234375, 290.5884704589844, 303.834228515625, 317.0799560546875, 330.3257141113281, 343.57147216796875, 356.8172302246094, 370.06298828125, 383.3087158203125, 396.5544738769531, 409.80023193359375, 423.04595947265625, 436.2917175292969, 449.5374755859375, 462.7832336425781, 476.02899169921875, 489.2747497558594, 502.5205078125, 515.7662353515625, 529.0120239257812, 542.2577514648438, 555.5035400390625, 568.749267578125, 581.9949951171875, 595.2407836914062, 608.4865112304688, 621.7322998046875, 634.97802734375, 648.2238159179688, 661.4695434570312, 674.71533203125, 687.9610595703125, 701.206787109375]}, "gradients/decoder.transformer.h.4.ln_1.bias": {"_type": "histogram", "values": [2.0, 3.0, 1.0, 3.0, 3.0, 1.0, 4.0, 2.0, 10.0, 6.0, 8.0, 3.0, 20.0, 12.0, 17.0, 20.0, 25.0, 16.0, 27.0, 25.0, 25.0, 32.0, 31.0, 45.0, 29.0, 58.0, 29.0, 52.0, 29.0, 31.0, 45.0, 46.0, 47.0, 28.0, 24.0, 28.0, 28.0, 30.0, 28.0, 24.0, 15.0, 15.0, 11.0, 20.0, 12.0, 11.0, 7.0, 7.0, 4.0, 8.0, 0.0, 6.0, 2.0, 2.0, 1.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-58.77857971191406, -56.7213134765625, -54.66404724121094, -52.606781005859375, -50.54951477050781, -48.49224853515625, -46.43497848510742, -44.37771224975586, -42.3204460144043, -40.263179779052734, -38.20591354370117, -36.14864730834961, -34.09137725830078, -32.03411102294922, -29.976844787597656, -27.919578552246094, -25.86231231689453, -23.80504608154297, -21.747779846191406, -19.69051170349121, -17.63324546813965, -15.575979232788086, -13.518712043762207, -11.461444854736328, -9.404178619384766, -7.346911907196045, -5.289645195007324, -3.2323784828186035, -1.1751117706298828, 0.8821544647216797, 2.9394216537475586, 4.9966888427734375, 7.053962707519531, 9.111228942871094, 11.168496131896973, 13.225763320922852, 15.283029556274414, 17.340295791625977, 19.397563934326172, 21.454830169677734, 23.512096405029297, 25.56936264038086, 27.626628875732422, 29.683897018432617, 31.74116325378418, 33.798431396484375, 35.85569763183594, 37.9129638671875, 39.97023010253906, 42.027496337890625, 44.08476257324219, 46.14202880859375, 48.19929504394531, 50.256561279296875, 52.3138313293457, 54.371097564697266, 56.42836380004883, 58.48563003540039, 60.54289627075195, 62.600162506103516, 64.65743255615234, 66.7146987915039, 68.77196502685547, 70.82923126220703, 72.8864974975586]}, "gradients/decoder.transformer.h.3.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 1.0, 1.0, 1.0, 2.0, 6.0, 3.0, 5.0, 10.0, 8.0, 10.0, 10.0, 12.0, 12.0, 20.0, 24.0, 27.0, 19.0, 36.0, 31.0, 37.0, 42.0, 30.0, 46.0, 53.0, 35.0, 44.0, 37.0, 49.0, 39.0, 31.0, 40.0, 44.0, 26.0, 28.0, 29.0, 21.0, 29.0, 19.0, 22.0, 23.0, 11.0, 8.0, 8.0, 9.0, 3.0, 5.0, 5.0, 1.0, 2.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-12.71875, -12.3292236328125, -11.939697265625, -11.5501708984375, -11.16064453125, -10.7711181640625, -10.381591796875, -9.9920654296875, -9.6025390625, -9.2130126953125, -8.823486328125, -8.4339599609375, -8.04443359375, -7.6549072265625, -7.265380859375, -6.8758544921875, -6.486328125, -6.0968017578125, -5.707275390625, -5.3177490234375, -4.92822265625, -4.5386962890625, -4.149169921875, -3.7596435546875, -3.3701171875, -2.9805908203125, -2.591064453125, -2.2015380859375, -1.81201171875, -1.4224853515625, -1.032958984375, -0.6434326171875, -0.25390625, 0.1356201171875, 0.525146484375, 0.9146728515625, 1.30419921875, 1.6937255859375, 2.083251953125, 2.4727783203125, 2.8623046875, 3.2518310546875, 3.641357421875, 4.0308837890625, 4.42041015625, 4.8099365234375, 5.199462890625, 5.5889892578125, 5.978515625, 6.3680419921875, 6.757568359375, 7.1470947265625, 7.53662109375, 7.9261474609375, 8.315673828125, 8.7052001953125, 9.0947265625, 9.4842529296875, 9.873779296875, 10.2633056640625, 10.65283203125, 11.0423583984375, 11.431884765625, 11.8214111328125, 12.2109375]}, "gradients/decoder.transformer.h.3.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 4.0, 2.0, 5.0, 6.0, 5.0, 11.0, 12.0, 10.0, 14.0, 19.0, 23.0, 33.0, 32.0, 46.0, 54.0, 67.0, 104.0, 148.0, 206.0, 264.0, 329.0, 435.0, 1376.0, 4185589.0, 3527.0, 534.0, 343.0, 273.0, 218.0, 134.0, 113.0, 82.0, 55.0, 38.0, 41.0, 23.0, 25.0, 23.0, 15.0, 12.0, 7.0, 9.0, 8.0, 5.0, 3.0, 6.0, 1.0, 0.0, 4.0, 2.0, 0.0, 2.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-246.375, -238.322265625, -230.26953125, -222.216796875, -214.1640625, -206.111328125, -198.05859375, -190.005859375, -181.953125, -173.900390625, -165.84765625, -157.794921875, -149.7421875, -141.689453125, -133.63671875, -125.583984375, -117.53125, -109.478515625, -101.42578125, -93.373046875, -85.3203125, -77.267578125, -69.21484375, -61.162109375, -53.109375, -45.056640625, -37.00390625, -28.951171875, -20.8984375, -12.845703125, -4.79296875, 3.259765625, 11.3125, 19.365234375, 27.41796875, 35.470703125, 43.5234375, 51.576171875, 59.62890625, 67.681640625, 75.734375, 83.787109375, 91.83984375, 99.892578125, 107.9453125, 115.998046875, 124.05078125, 132.103515625, 140.15625, 148.208984375, 156.26171875, 164.314453125, 172.3671875, 180.419921875, 188.47265625, 196.525390625, 204.578125, 212.630859375, 220.68359375, 228.736328125, 236.7890625, 244.841796875, 252.89453125, 260.947265625, 269.0]}, "gradients/decoder.transformer.h.3.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 3.0, 1.0, 3.0, 6.0, 9.0, 9.0, 10.0, 17.0, 8.0, 22.0, 17.0, 41.0, 50.0, 65.0, 85.0, 116.0, 174.0, 287.0, 412.0, 575.0, 649.0, 466.0, 310.0, 234.0, 143.0, 89.0, 76.0, 54.0, 34.0, 32.0, 15.0, 17.0, 19.0, 7.0, 4.0, 3.0, 7.0, 2.0, 5.0, 2.0, 4.0, 1.0, 0.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 2.0], "bins": [-20.609375, -20.00634765625, -19.4033203125, -18.80029296875, -18.197265625, -17.59423828125, -16.9912109375, -16.38818359375, -15.78515625, -15.18212890625, -14.5791015625, -13.97607421875, -13.373046875, -12.77001953125, -12.1669921875, -11.56396484375, -10.9609375, -10.35791015625, -9.7548828125, -9.15185546875, -8.548828125, -7.94580078125, -7.3427734375, -6.73974609375, -6.13671875, -5.53369140625, -4.9306640625, -4.32763671875, -3.724609375, -3.12158203125, -2.5185546875, -1.91552734375, -1.3125, -0.70947265625, -0.1064453125, 0.49658203125, 1.099609375, 1.70263671875, 2.3056640625, 2.90869140625, 3.51171875, 4.11474609375, 4.7177734375, 5.32080078125, 5.923828125, 6.52685546875, 7.1298828125, 7.73291015625, 8.3359375, 8.93896484375, 9.5419921875, 10.14501953125, 10.748046875, 11.35107421875, 11.9541015625, 12.55712890625, 13.16015625, 13.76318359375, 14.3662109375, 14.96923828125, 15.572265625, 16.17529296875, 16.7783203125, 17.38134765625, 17.984375]}, "gradients/decoder.transformer.h.3.mlp.c_fc.weight": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 1.0, 3.0, 2.0, 3.0, 8.0, 6.0, 5.0, 6.0, 5.0, 11.0, 8.0, 18.0, 14.0, 30.0, 29.0, 31.0, 47.0, 53.0, 65.0, 106.0, 282.0, 3018.0, 3820361.0, 368362.0, 1228.0, 178.0, 88.0, 53.0, 62.0, 38.0, 25.0, 27.0, 22.0, 25.0, 17.0, 18.0, 10.0, 8.0, 7.0, 1.0, 5.0, 3.0, 1.0, 1.0, 2.0, 1.0, 0.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-109.875, -105.626953125, -101.37890625, -97.130859375, -92.8828125, -88.634765625, -84.38671875, -80.138671875, -75.890625, -71.642578125, -67.39453125, -63.146484375, -58.8984375, -54.650390625, -50.40234375, -46.154296875, -41.90625, -37.658203125, -33.41015625, -29.162109375, -24.9140625, -20.666015625, -16.41796875, -12.169921875, -7.921875, -3.673828125, 0.57421875, 4.822265625, 9.0703125, 13.318359375, 17.56640625, 21.814453125, 26.0625, 30.310546875, 34.55859375, 38.806640625, 43.0546875, 47.302734375, 51.55078125, 55.798828125, 60.046875, 64.294921875, 68.54296875, 72.791015625, 77.0390625, 81.287109375, 85.53515625, 89.783203125, 94.03125, 98.279296875, 102.52734375, 106.775390625, 111.0234375, 115.271484375, 119.51953125, 123.767578125, 128.015625, 132.263671875, 136.51171875, 140.759765625, 145.0078125, 149.255859375, 153.50390625, 157.751953125, 162.0]}, "gradients/decoder.transformer.h.3.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 3.0, 0.0, 10.0, 213.0, 599.0, 182.0, 10.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-612.478515625, -601.6349487304688, -590.7913818359375, -579.94775390625, -569.1041870117188, -558.2606201171875, -547.4170532226562, -536.573486328125, -525.7299194335938, -514.8863525390625, -504.0427551269531, -493.1991882324219, -482.3556213378906, -471.51202392578125, -460.66845703125, -449.82489013671875, -438.98126220703125, -428.1376953125, -417.2940979003906, -406.4505310058594, -395.6069641113281, -384.76336669921875, -373.9197998046875, -363.07623291015625, -352.232666015625, -341.38909912109375, -330.5455017089844, -319.7019348144531, -308.8583679199219, -298.0147705078125, -287.17120361328125, -276.32763671875, -265.4840393066406, -254.6404571533203, -243.79689025878906, -232.95330810546875, -222.10972595214844, -211.26614379882812, -200.42257690429688, -189.57899475097656, -178.73541259765625, -167.89183044433594, -157.0482635498047, -146.20468139648438, -135.36109924316406, -124.51752471923828, -113.6739501953125, -102.83036804199219, -91.98680114746094, -81.14322662353516, -70.29964447021484, -59.45606994628906, -48.612491607666016, -37.76891326904297, -26.925338745117188, -16.081756591796875, -5.238182067871094, 5.605395317077637, 16.448972702026367, 27.29254913330078, 38.13612747192383, 48.979705810546875, 59.823280334472656, 70.66686248779297, 81.51043701171875]}, "gradients/decoder.transformer.h.3.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 4.0, 3.0, 3.0, 0.0, 3.0, 8.0, 6.0, 11.0, 13.0, 9.0, 15.0, 14.0, 19.0, 22.0, 24.0, 34.0, 37.0, 37.0, 48.0, 44.0, 56.0, 45.0, 37.0, 51.0, 47.0, 52.0, 49.0, 42.0, 35.0, 44.0, 28.0, 26.0, 20.0, 32.0, 20.0, 13.0, 14.0, 11.0, 10.0, 9.0, 7.0, 2.0, 4.0, 2.0, 6.0, 0.0, 1.0, 1.0, 0.0, 2.0], "bins": [-79.68128967285156, -77.56857299804688, -75.45584869384766, -73.34313201904297, -71.23041534423828, -69.11769104003906, -67.00497436523438, -64.89225769042969, -62.779537200927734, -60.66681671142578, -58.554100036621094, -56.44137954711914, -54.32865905761719, -52.2159423828125, -50.10322189331055, -47.990501403808594, -45.877784729003906, -43.76506423950195, -41.652347564697266, -39.53962707519531, -37.426910400390625, -35.31418991088867, -33.20146942138672, -31.0887508392334, -28.976032257080078, -26.863313674926758, -24.750595092773438, -22.637874603271484, -20.525156021118164, -18.412437438964844, -16.29971694946289, -14.18699836730957, -12.074287414550781, -9.961568832397461, -7.848849296569824, -5.736130237579346, -3.623411178588867, -1.5106925964355469, 0.6020269393920898, 2.7147464752197266, 4.827465057373047, 6.940184116363525, 9.052903175354004, 11.16562271118164, 13.278341293334961, 15.391059875488281, 17.503780364990234, 19.616498947143555, 21.729217529296875, 23.841936111450195, 25.954654693603516, 28.06737518310547, 30.18009376525879, 32.29281234741211, 34.40553283691406, 36.51824951171875, 38.6309700012207, 40.743690490722656, 42.856407165527344, 44.9691276550293, 47.08184814453125, 49.19456481933594, 51.30728530883789, 53.420005798339844, 55.53272247314453]}, "gradients/decoder.transformer.h.3.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 3.0, 3.0, 6.0, 9.0, 9.0, 10.0, 12.0, 9.0, 14.0, 16.0, 24.0, 25.0, 31.0, 29.0, 34.0, 39.0, 37.0, 53.0, 36.0, 49.0, 54.0, 53.0, 57.0, 42.0, 53.0, 38.0, 42.0, 39.0, 44.0, 27.0, 25.0, 26.0, 13.0, 15.0, 13.0, 8.0, 12.0, 3.0, 2.0, 4.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-13.59375, -13.1527099609375, -12.711669921875, -12.2706298828125, -11.82958984375, -11.3885498046875, -10.947509765625, -10.5064697265625, -10.0654296875, -9.6243896484375, -9.183349609375, -8.7423095703125, -8.30126953125, -7.8602294921875, -7.419189453125, -6.9781494140625, -6.537109375, -6.0960693359375, -5.655029296875, -5.2139892578125, -4.77294921875, -4.3319091796875, -3.890869140625, -3.4498291015625, -3.0087890625, -2.5677490234375, -2.126708984375, -1.6856689453125, -1.24462890625, -0.8035888671875, -0.362548828125, 0.0784912109375, 0.51953125, 0.9605712890625, 1.401611328125, 1.8426513671875, 2.28369140625, 2.7247314453125, 3.165771484375, 3.6068115234375, 4.0478515625, 4.4888916015625, 4.929931640625, 5.3709716796875, 5.81201171875, 6.2530517578125, 6.694091796875, 7.1351318359375, 7.576171875, 8.0172119140625, 8.458251953125, 8.8992919921875, 9.34033203125, 9.7813720703125, 10.222412109375, 10.6634521484375, 11.1044921875, 11.5455322265625, 11.986572265625, 12.4276123046875, 12.86865234375, 13.3096923828125, 13.750732421875, 14.1917724609375, 14.6328125]}, "gradients/decoder.transformer.h.3.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 3.0, 2.0, 2.0, 1.0, 2.0, 0.0, 1.0, 10.0, 12.0, 21.0, 21.0, 44.0, 70.0, 87.0, 141.0, 206.0, 282.0, 494.0, 688.0, 1107.0, 1741.0, 2589.0, 4266.0, 6786.0, 11113.0, 17982.0, 28984.0, 47693.0, 77746.0, 124618.0, 179735.0, 186361.0, 135128.0, 84533.0, 51816.0, 31800.0, 19738.0, 12248.0, 7561.0, 4835.0, 2920.0, 1850.0, 1164.0, 728.0, 508.0, 318.0, 212.0, 146.0, 66.0, 57.0, 36.0, 35.0, 21.0, 19.0, 11.0, 4.0, 2.0, 6.0, 2.0, 1.0, 0.0, 1.0, 1.0], "bins": [-1.021484375, -0.989654541015625, -0.95782470703125, -0.925994873046875, -0.8941650390625, -0.862335205078125, -0.83050537109375, -0.798675537109375, -0.766845703125, -0.735015869140625, -0.70318603515625, -0.671356201171875, -0.6395263671875, -0.607696533203125, -0.57586669921875, -0.544036865234375, -0.51220703125, -0.480377197265625, -0.44854736328125, -0.416717529296875, -0.3848876953125, -0.353057861328125, -0.32122802734375, -0.289398193359375, -0.257568359375, -0.225738525390625, -0.19390869140625, -0.162078857421875, -0.1302490234375, -0.098419189453125, -0.06658935546875, -0.034759521484375, -0.0029296875, 0.028900146484375, 0.06072998046875, 0.092559814453125, 0.1243896484375, 0.156219482421875, 0.18804931640625, 0.219879150390625, 0.251708984375, 0.283538818359375, 0.31536865234375, 0.347198486328125, 0.3790283203125, 0.410858154296875, 0.44268798828125, 0.474517822265625, 0.50634765625, 0.538177490234375, 0.57000732421875, 0.601837158203125, 0.6336669921875, 0.665496826171875, 0.69732666015625, 0.729156494140625, 0.760986328125, 0.792816162109375, 0.82464599609375, 0.856475830078125, 0.8883056640625, 0.920135498046875, 0.95196533203125, 0.983795166015625, 1.015625]}, "gradients/decoder.transformer.h.3.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 3.0, 0.0, 2.0, 3.0, 8.0, 3.0, 9.0, 7.0, 11.0, 4.0, 12.0, 10.0, 15.0, 28.0, 22.0, 18.0, 36.0, 35.0, 41.0, 31.0, 37.0, 39.0, 44.0, 42.0, 1074.0, 62.0, 45.0, 44.0, 40.0, 38.0, 57.0, 34.0, 22.0, 28.0, 29.0, 21.0, 22.0, 23.0, 8.0, 9.0, 6.0, 8.0, 3.0, 2.0, 0.0, 3.0, 1.0, 3.0, 1.0, 0.0, 0.0, 0.0, 2.0], "bins": [-9.140625, -8.8814697265625, -8.622314453125, -8.3631591796875, -8.10400390625, -7.8448486328125, -7.585693359375, -7.3265380859375, -7.0673828125, -6.8082275390625, -6.549072265625, -6.2899169921875, -6.03076171875, -5.7716064453125, -5.512451171875, -5.2532958984375, -4.994140625, -4.7349853515625, -4.475830078125, -4.2166748046875, -3.95751953125, -3.6983642578125, -3.439208984375, -3.1800537109375, -2.9208984375, -2.6617431640625, -2.402587890625, -2.1434326171875, -1.88427734375, -1.6251220703125, -1.365966796875, -1.1068115234375, -0.84765625, -0.5885009765625, -0.329345703125, -0.0701904296875, 0.18896484375, 0.4481201171875, 0.707275390625, 0.9664306640625, 1.2255859375, 1.4847412109375, 1.743896484375, 2.0030517578125, 2.26220703125, 2.5213623046875, 2.780517578125, 3.0396728515625, 3.298828125, 3.5579833984375, 3.817138671875, 4.0762939453125, 4.33544921875, 4.5946044921875, 4.853759765625, 5.1129150390625, 5.3720703125, 5.6312255859375, 5.890380859375, 6.1495361328125, 6.40869140625, 6.6678466796875, 6.927001953125, 7.1861572265625, 7.4453125]}, "gradients/decoder.transformer.h.3.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 4.0, 7.0, 13.0, 4.0, 14.0, 23.0, 46.0, 69.0, 80.0, 104.0, 159.0, 262.0, 370.0, 565.0, 747.0, 1242.0, 1851.0, 2713.0, 4020.0, 5951.0, 8844.0, 13570.0, 20573.0, 30955.0, 46949.0, 69709.0, 102507.0, 138915.0, 1201987.0, 137387.0, 101262.0, 69305.0, 46360.0, 30656.0, 20088.0, 13224.0, 8840.0, 5764.0, 3951.0, 2662.0, 1739.0, 1228.0, 783.0, 543.0, 387.0, 240.0, 151.0, 113.0, 68.0, 53.0, 26.0, 19.0, 15.0, 8.0, 15.0, 3.0, 2.0, 1.0, 2.0, 0.0, 2.0], "bins": [-0.68017578125, -0.6585693359375, -0.636962890625, -0.6153564453125, -0.59375, -0.5721435546875, -0.550537109375, -0.5289306640625, -0.50732421875, -0.4857177734375, -0.464111328125, -0.4425048828125, -0.4208984375, -0.3992919921875, -0.377685546875, -0.3560791015625, -0.33447265625, -0.3128662109375, -0.291259765625, -0.2696533203125, -0.248046875, -0.2264404296875, -0.204833984375, -0.1832275390625, -0.16162109375, -0.1400146484375, -0.118408203125, -0.0968017578125, -0.0751953125, -0.0535888671875, -0.031982421875, -0.0103759765625, 0.01123046875, 0.0328369140625, 0.054443359375, 0.0760498046875, 0.09765625, 0.1192626953125, 0.140869140625, 0.1624755859375, 0.18408203125, 0.2056884765625, 0.227294921875, 0.2489013671875, 0.2705078125, 0.2921142578125, 0.313720703125, 0.3353271484375, 0.35693359375, 0.3785400390625, 0.400146484375, 0.4217529296875, 0.443359375, 0.4649658203125, 0.486572265625, 0.5081787109375, 0.52978515625, 0.5513916015625, 0.572998046875, 0.5946044921875, 0.6162109375, 0.6378173828125, 0.659423828125, 0.6810302734375, 0.70263671875]}, "gradients/decoder.transformer.h.3.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 1.0, 1.0, 0.0, 2.0, 2.0, 5.0, 4.0, 5.0, 10.0, 18.0, 20.0, 31.0, 33.0, 52.0, 71.0, 75.0, 96.0, 111.0, 94.0, 92.0, 75.0, 40.0, 36.0, 31.0, 24.0, 27.0, 12.0, 16.0, 7.0, 3.0, 4.0, 5.0, 0.0, 1.0, 3.0, 4.0, 1.0, 2.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.01358795166015625, -0.013218998908996582, -0.012850046157836914, -0.012481093406677246, -0.012112140655517578, -0.01174318790435791, -0.011374235153198242, -0.011005282402038574, -0.010636329650878906, -0.010267376899719238, -0.00989842414855957, -0.009529471397399902, -0.009160518646240234, -0.008791565895080566, -0.008422613143920898, -0.00805366039276123, -0.0076847076416015625, -0.0073157548904418945, -0.0069468021392822266, -0.006577849388122559, -0.006208896636962891, -0.005839943885803223, -0.005470991134643555, -0.005102038383483887, -0.004733085632324219, -0.004364132881164551, -0.003995180130004883, -0.003626227378845215, -0.003257274627685547, -0.002888321876525879, -0.002519369125366211, -0.002150416374206543, -0.001781463623046875, -0.001412510871887207, -0.001043558120727539, -0.0006746053695678711, -0.0003056526184082031, 6.330013275146484e-05, 0.0004322528839111328, 0.0008012056350708008, 0.0011701583862304688, 0.0015391111373901367, 0.0019080638885498047, 0.0022770166397094727, 0.0026459693908691406, 0.0030149221420288086, 0.0033838748931884766, 0.0037528276443481445, 0.0041217803955078125, 0.0044907331466674805, 0.0048596858978271484, 0.005228638648986816, 0.005597591400146484, 0.005966544151306152, 0.00633549690246582, 0.006704449653625488, 0.007073402404785156, 0.007442355155944824, 0.007811307907104492, 0.00818026065826416, 0.008549213409423828, 0.008918166160583496, 0.009287118911743164, 0.009656071662902832, 0.0100250244140625]}, "gradients/decoder.transformer.h.3.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 6.0, 7.0, 5.0, 2.0, 8.0, 6.0, 9.0, 12.0, 19.0, 33.0, 40.0, 48.0, 81.0, 92.0, 170.0, 336.0, 692.0, 9027.0, 1034693.0, 1995.0, 559.0, 281.0, 138.0, 95.0, 61.0, 44.0, 34.0, 31.0, 13.0, 9.0, 3.0, 6.0, 2.0, 0.0, 4.0, 2.0, 2.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.1964111328125, -0.18922996520996094, -0.18204879760742188, -0.1748676300048828, -0.16768646240234375, -0.1605052947998047, -0.15332412719726562, -0.14614295959472656, -0.1389617919921875, -0.13178062438964844, -0.12459945678710938, -0.11741828918457031, -0.11023712158203125, -0.10305595397949219, -0.09587478637695312, -0.08869361877441406, -0.081512451171875, -0.07433128356933594, -0.06715011596679688, -0.05996894836425781, -0.05278778076171875, -0.04560661315917969, -0.038425445556640625, -0.031244277954101562, -0.0240631103515625, -0.016881942749023438, -0.009700775146484375, -0.0025196075439453125, 0.00466156005859375, 0.011842727661132812, 0.019023895263671875, 0.026205062866210938, 0.03338623046875, 0.04056739807128906, 0.047748565673828125, 0.05492973327636719, 0.06211090087890625, 0.06929206848144531, 0.07647323608398438, 0.08365440368652344, 0.0908355712890625, 0.09801673889160156, 0.10519790649414062, 0.11237907409667969, 0.11956024169921875, 0.1267414093017578, 0.13392257690429688, 0.14110374450683594, 0.148284912109375, 0.15546607971191406, 0.16264724731445312, 0.1698284149169922, 0.17700958251953125, 0.1841907501220703, 0.19137191772460938, 0.19855308532714844, 0.2057342529296875, 0.21291542053222656, 0.22009658813476562, 0.2272777557373047, 0.23445892333984375, 0.2416400909423828, 0.24882125854492188, 0.25600242614746094, 0.26318359375]}, "gradients/decoder.transformer.h.3.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 6.0, 384.0, 613.0, 12.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.08323973417282104, -0.08180711418390274, -0.08037449419498444, -0.07894188165664673, -0.07750926166772842, -0.07607664167881012, -0.07464402914047241, -0.07321140915155411, -0.0717787891626358, -0.0703461691737175, -0.0689135491847992, -0.06748093664646149, -0.06604831665754318, -0.06461569666862488, -0.06318308413028717, -0.061750464141368866, -0.06031784415245056, -0.05888522416353226, -0.05745260789990425, -0.056019991636276245, -0.05458737164735794, -0.053154751658439636, -0.05172213539481163, -0.050289519131183624, -0.04885689914226532, -0.047424279153347015, -0.04599166288971901, -0.044559046626091, -0.0431264266371727, -0.041693806648254395, -0.04026119038462639, -0.03882857412099838, -0.03739595040678978, -0.035963334143161774, -0.03453071415424347, -0.033098094165325165, -0.03166547790169716, -0.030232859775424004, -0.02880024164915085, -0.027367623522877693, -0.025935005396604538, -0.024502387270331383, -0.023069769144058228, -0.021637151017785072, -0.020204532891511917, -0.018771914765238762, -0.017339296638965607, -0.01590667851269245, -0.014474061317741871, -0.013041443191468716, -0.01160882506519556, -0.010176206938922405, -0.00874358881264925, -0.007310970686376095, -0.00587835256010294, -0.004445734433829784, -0.003013116307556629, -0.001580498181283474, -0.00014788005501031876, 0.0012847380712628365, 0.0027173561975359917, 0.004149974323809147, 0.005582592450082302, 0.007015210576355457, 0.008447828702628613]}, "gradients/decoder.transformer.h.3.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 4.0, 9.0, 3.0, 7.0, 19.0, 16.0, 12.0, 14.0, 24.0, 21.0, 34.0, 25.0, 32.0, 41.0, 44.0, 38.0, 38.0, 46.0, 46.0, 46.0, 49.0, 49.0, 49.0, 42.0, 43.0, 38.0, 49.0, 24.0, 38.0, 20.0, 19.0, 15.0, 18.0, 11.0, 2.0, 10.0, 4.0, 2.0, 1.0, 3.0, 3.0, 1.0, 3.0, 0.0, 1.0, 0.0, 2.0, 0.0, 2.0, 2.0], "bins": [-0.005097866058349609, -0.004945721477270126, -0.004793576896190643, -0.00464143231511116, -0.004489287734031677, -0.004337143152952194, -0.004184998571872711, -0.004032853990793228, -0.003880709409713745, -0.003728564828634262, -0.003576420247554779, -0.003424275666475296, -0.003272131085395813, -0.00311998650431633, -0.002967841923236847, -0.002815697342157364, -0.002663552761077881, -0.002511408179998398, -0.002359263598918915, -0.0022071190178394318, -0.0020549744367599487, -0.0019028298556804657, -0.0017506852746009827, -0.0015985406935214996, -0.0014463961124420166, -0.0012942515313625336, -0.0011421069502830505, -0.0009899623692035675, -0.0008378177881240845, -0.0006856732070446014, -0.0005335286259651184, -0.0003813840448856354, -0.00022923946380615234, -7.709488272666931e-05, 7.504969835281372e-05, 0.00022719427943229675, 0.0003793388605117798, 0.0005314834415912628, 0.0006836280226707458, 0.0008357726037502289, 0.000987917184829712, 0.001140061765909195, 0.001292206346988678, 0.001444350928068161, 0.001596495509147644, 0.001748640090227127, 0.00190078467130661, 0.002052929252386093, 0.002205073833465576, 0.002357218414545059, 0.0025093629956245422, 0.0026615075767040253, 0.0028136521577835083, 0.0029657967388629913, 0.0031179413199424744, 0.0032700859010219574, 0.0034222304821014404, 0.0035743750631809235, 0.0037265196442604065, 0.0038786642253398895, 0.0040308088064193726, 0.004182953387498856, 0.004335097968578339, 0.004487242549657822, 0.004639387130737305]}, "gradients/decoder.transformer.h.3.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 3.0, 3.0, 6.0, 9.0, 9.0, 10.0, 12.0, 9.0, 14.0, 16.0, 24.0, 25.0, 31.0, 29.0, 34.0, 39.0, 37.0, 53.0, 36.0, 49.0, 54.0, 53.0, 57.0, 42.0, 53.0, 38.0, 42.0, 39.0, 44.0, 27.0, 25.0, 26.0, 13.0, 15.0, 13.0, 8.0, 12.0, 3.0, 2.0, 4.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-13.59375, -13.1527099609375, -12.711669921875, -12.2706298828125, -11.82958984375, -11.3885498046875, -10.947509765625, -10.5064697265625, -10.0654296875, -9.6243896484375, -9.183349609375, -8.7423095703125, -8.30126953125, -7.8602294921875, -7.419189453125, -6.9781494140625, -6.537109375, -6.0960693359375, -5.655029296875, -5.2139892578125, -4.77294921875, -4.3319091796875, -3.890869140625, -3.4498291015625, -3.0087890625, -2.5677490234375, -2.126708984375, -1.6856689453125, -1.24462890625, -0.8035888671875, -0.362548828125, 0.0784912109375, 0.51953125, 0.9605712890625, 1.401611328125, 1.8426513671875, 2.28369140625, 2.7247314453125, 3.165771484375, 3.6068115234375, 4.0478515625, 4.4888916015625, 4.929931640625, 5.3709716796875, 5.81201171875, 6.2530517578125, 6.694091796875, 7.1351318359375, 7.576171875, 8.0172119140625, 8.458251953125, 8.8992919921875, 9.34033203125, 9.7813720703125, 10.222412109375, 10.6634521484375, 11.1044921875, 11.5455322265625, 11.986572265625, 12.4276123046875, 12.86865234375, 13.3096923828125, 13.750732421875, 14.1917724609375, 14.6328125]}, "gradients/decoder.transformer.h.3.attn.c_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 4.0, 3.0, 5.0, 4.0, 2.0, 11.0, 10.0, 24.0, 38.0, 54.0, 98.0, 91.0, 141.0, 220.0, 285.0, 393.0, 614.0, 759.0, 1130.0, 1867.0, 3931.0, 15534.0, 95580.0, 510215.0, 344520.0, 55311.0, 9668.0, 2977.0, 1532.0, 1021.0, 721.0, 501.0, 386.0, 294.0, 180.0, 151.0, 108.0, 66.0, 40.0, 24.0, 22.0, 14.0, 5.0, 5.0, 3.0, 0.0, 5.0, 2.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-21.859375, -21.15087890625, -20.4423828125, -19.73388671875, -19.025390625, -18.31689453125, -17.6083984375, -16.89990234375, -16.19140625, -15.48291015625, -14.7744140625, -14.06591796875, -13.357421875, -12.64892578125, -11.9404296875, -11.23193359375, -10.5234375, -9.81494140625, -9.1064453125, -8.39794921875, -7.689453125, -6.98095703125, -6.2724609375, -5.56396484375, -4.85546875, -4.14697265625, -3.4384765625, -2.72998046875, -2.021484375, -1.31298828125, -0.6044921875, 0.10400390625, 0.8125, 1.52099609375, 2.2294921875, 2.93798828125, 3.646484375, 4.35498046875, 5.0634765625, 5.77197265625, 6.48046875, 7.18896484375, 7.8974609375, 8.60595703125, 9.314453125, 10.02294921875, 10.7314453125, 11.43994140625, 12.1484375, 12.85693359375, 13.5654296875, 14.27392578125, 14.982421875, 15.69091796875, 16.3994140625, 17.10791015625, 17.81640625, 18.52490234375, 19.2333984375, 19.94189453125, 20.650390625, 21.35888671875, 22.0673828125, 22.77587890625, 23.484375]}, "gradients/decoder.transformer.h.3.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 2.0, 1.0, 4.0, 5.0, 6.0, 5.0, 13.0, 15.0, 13.0, 17.0, 24.0, 30.0, 26.0, 29.0, 34.0, 52.0, 58.0, 88.0, 109.0, 299.0, 1594.0, 197.0, 90.0, 63.0, 43.0, 49.0, 23.0, 27.0, 33.0, 32.0, 20.0, 18.0, 13.0, 6.0, 6.0, 4.0, 5.0, 3.0, 2.0, 2.0, 3.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-38.84375, -37.71240234375, -36.5810546875, -35.44970703125, -34.318359375, -33.18701171875, -32.0556640625, -30.92431640625, -29.79296875, -28.66162109375, -27.5302734375, -26.39892578125, -25.267578125, -24.13623046875, -23.0048828125, -21.87353515625, -20.7421875, -19.61083984375, -18.4794921875, -17.34814453125, -16.216796875, -15.08544921875, -13.9541015625, -12.82275390625, -11.69140625, -10.56005859375, -9.4287109375, -8.29736328125, -7.166015625, -6.03466796875, -4.9033203125, -3.77197265625, -2.640625, -1.50927734375, -0.3779296875, 0.75341796875, 1.884765625, 3.01611328125, 4.1474609375, 5.27880859375, 6.41015625, 7.54150390625, 8.6728515625, 9.80419921875, 10.935546875, 12.06689453125, 13.1982421875, 14.32958984375, 15.4609375, 16.59228515625, 17.7236328125, 18.85498046875, 19.986328125, 21.11767578125, 22.2490234375, 23.38037109375, 24.51171875, 25.64306640625, 26.7744140625, 27.90576171875, 29.037109375, 30.16845703125, 31.2998046875, 32.43115234375, 33.5625]}, "gradients/decoder.transformer.h.3.attn.c_attn.weight": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 1.0, 0.0, 4.0, 4.0, 0.0, 5.0, 3.0, 2.0, 8.0, 13.0, 8.0, 14.0, 12.0, 15.0, 15.0, 25.0, 18.0, 40.0, 66.0, 92.0, 162.0, 242.0, 461.0, 1078.0, 11104.0, 3124332.0, 5816.0, 978.0, 431.0, 229.0, 156.0, 97.0, 77.0, 47.0, 42.0, 22.0, 28.0, 16.0, 12.0, 9.0, 11.0, 6.0, 11.0, 1.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-92.9375, -89.669921875, -86.40234375, -83.134765625, -79.8671875, -76.599609375, -73.33203125, -70.064453125, -66.796875, -63.529296875, -60.26171875, -56.994140625, -53.7265625, -50.458984375, -47.19140625, -43.923828125, -40.65625, -37.388671875, -34.12109375, -30.853515625, -27.5859375, -24.318359375, -21.05078125, -17.783203125, -14.515625, -11.248046875, -7.98046875, -4.712890625, -1.4453125, 1.822265625, 5.08984375, 8.357421875, 11.625, 14.892578125, 18.16015625, 21.427734375, 24.6953125, 27.962890625, 31.23046875, 34.498046875, 37.765625, 41.033203125, 44.30078125, 47.568359375, 50.8359375, 54.103515625, 57.37109375, 60.638671875, 63.90625, 67.173828125, 70.44140625, 73.708984375, 76.9765625, 80.244140625, 83.51171875, 86.779296875, 90.046875, 93.314453125, 96.58203125, 99.849609375, 103.1171875, 106.384765625, 109.65234375, 112.919921875, 116.1875]}, "gradients/decoder.transformer.h.3.ln_1.weight": {"_type": "histogram", "values": [3.0, 1.0, 0.0, 1.0, 9.0, 212.0, 598.0, 182.0, 13.0, 3.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-50.307533264160156, -42.56134796142578, -34.81515884399414, -27.068973541259766, -19.322786331176758, -11.57659912109375, -3.830413818359375, 3.9157752990722656, 11.66196060180664, 19.40814781188965, 27.154335021972656, 34.90052032470703, 42.646705627441406, 50.39289474487305, 58.13908004760742, 65.88526916503906, 73.63145446777344, 81.37763977050781, 89.12382507324219, 96.87001037597656, 104.61620330810547, 112.36238861083984, 120.10857391357422, 127.85476684570312, 135.6009521484375, 143.34713745117188, 151.09332275390625, 158.83950805664062, 166.585693359375, 174.33187866210938, 182.07806396484375, 189.8242645263672, 197.57044982910156, 205.31663513183594, 213.0628204345703, 220.8090057373047, 228.55519104003906, 236.3013916015625, 244.04757690429688, 251.79376220703125, 259.5399475097656, 267.2861328125, 275.0323181152344, 282.77850341796875, 290.5246887207031, 298.2708740234375, 306.0170593261719, 313.76324462890625, 321.5094299316406, 329.255615234375, 337.0018005371094, 344.74798583984375, 352.4941711425781, 360.2403564453125, 367.9865417480469, 375.73272705078125, 383.47894287109375, 391.2251281738281, 398.9713134765625, 406.7174987792969, 414.46368408203125, 422.2098693847656, 429.9560546875, 437.7022399902344, 445.44842529296875]}, "gradients/decoder.transformer.h.3.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 2.0, 0.0, 2.0, 1.0, 2.0, 0.0, 3.0, 2.0, 2.0, 2.0, 8.0, 9.0, 10.0, 11.0, 11.0, 10.0, 12.0, 15.0, 14.0, 22.0, 14.0, 25.0, 24.0, 36.0, 31.0, 30.0, 38.0, 41.0, 32.0, 52.0, 32.0, 44.0, 37.0, 41.0, 32.0, 38.0, 28.0, 26.0, 43.0, 26.0, 29.0, 27.0, 16.0, 16.0, 24.0, 12.0, 11.0, 16.0, 16.0, 8.0, 9.0, 4.0, 4.0, 7.0, 1.0, 2.0, 5.0, 4.0, 2.0, 0.0, 1.0], "bins": [-77.93260192871094, -75.6983413696289, -73.46408081054688, -71.22982025146484, -68.99555969238281, -66.76129150390625, -64.52703094482422, -62.29277038574219, -60.058509826660156, -57.824249267578125, -55.589988708496094, -53.3557243347168, -51.121463775634766, -48.887203216552734, -46.65293884277344, -44.418678283691406, -42.184417724609375, -39.950157165527344, -37.71589660644531, -35.481632232666016, -33.247371673583984, -31.013111114501953, -28.77884864807129, -26.544586181640625, -24.310325622558594, -22.076065063476562, -19.8418025970459, -17.607540130615234, -15.373279571533203, -13.139018058776855, -10.904756546020508, -8.67049503326416, -6.436225891113281, -4.201964378356934, -1.967702865600586, 0.2665586471557617, 2.5008201599121094, 4.735081672668457, 6.969343185424805, 9.203604698181152, 11.4378662109375, 13.672127723693848, 15.906389236450195, 18.14065170288086, 20.37491226196289, 22.609172821044922, 24.843435287475586, 27.07769775390625, 29.31195831298828, 31.546218872070312, 33.780479431152344, 36.01474380493164, 38.24900436401367, 40.4832649230957, 42.717529296875, 44.95178985595703, 47.18605041503906, 49.420310974121094, 51.654571533203125, 53.88883590698242, 56.12309646606445, 58.357357025146484, 60.59162139892578, 62.82588195800781, 65.06014251708984]}, "gradients/decoder.transformer.h.2.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 2.0, 7.0, 5.0, 7.0, 4.0, 7.0, 9.0, 10.0, 16.0, 23.0, 20.0, 18.0, 17.0, 36.0, 22.0, 45.0, 31.0, 34.0, 51.0, 53.0, 47.0, 39.0, 50.0, 48.0, 46.0, 39.0, 43.0, 36.0, 35.0, 44.0, 34.0, 26.0, 31.0, 18.0, 10.0, 9.0, 10.0, 9.0, 7.0, 5.0, 5.0, 3.0, 5.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-12.6171875, -12.194580078125, -11.77197265625, -11.349365234375, -10.9267578125, -10.504150390625, -10.08154296875, -9.658935546875, -9.236328125, -8.813720703125, -8.39111328125, -7.968505859375, -7.5458984375, -7.123291015625, -6.70068359375, -6.278076171875, -5.85546875, -5.432861328125, -5.01025390625, -4.587646484375, -4.1650390625, -3.742431640625, -3.31982421875, -2.897216796875, -2.474609375, -2.052001953125, -1.62939453125, -1.206787109375, -0.7841796875, -0.361572265625, 0.06103515625, 0.483642578125, 0.90625, 1.328857421875, 1.75146484375, 2.174072265625, 2.5966796875, 3.019287109375, 3.44189453125, 3.864501953125, 4.287109375, 4.709716796875, 5.13232421875, 5.554931640625, 5.9775390625, 6.400146484375, 6.82275390625, 7.245361328125, 7.66796875, 8.090576171875, 8.51318359375, 8.935791015625, 9.3583984375, 9.781005859375, 10.20361328125, 10.626220703125, 11.048828125, 11.471435546875, 11.89404296875, 12.316650390625, 12.7392578125, 13.161865234375, 13.58447265625, 14.007080078125, 14.4296875]}, "gradients/decoder.transformer.h.2.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 4.0, 2.0, 2.0, 5.0, 7.0, 7.0, 10.0, 22.0, 13.0, 32.0, 44.0, 45.0, 86.0, 87.0, 163.0, 229.0, 420.0, 876.0, 2376.0, 7993.0, 44617.0, 743685.0, 3003119.0, 354436.0, 26769.0, 5755.0, 1756.0, 751.0, 318.0, 172.0, 129.0, 93.0, 68.0, 43.0, 45.0, 22.0, 27.0, 18.0, 9.0, 9.0, 9.0, 7.0, 4.0, 1.0, 5.0, 1.0, 3.0, 1.0, 2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-26.859375, -25.841064453125, -24.82275390625, -23.804443359375, -22.7861328125, -21.767822265625, -20.74951171875, -19.731201171875, -18.712890625, -17.694580078125, -16.67626953125, -15.657958984375, -14.6396484375, -13.621337890625, -12.60302734375, -11.584716796875, -10.56640625, -9.548095703125, -8.52978515625, -7.511474609375, -6.4931640625, -5.474853515625, -4.45654296875, -3.438232421875, -2.419921875, -1.401611328125, -0.38330078125, 0.635009765625, 1.6533203125, 2.671630859375, 3.68994140625, 4.708251953125, 5.7265625, 6.744873046875, 7.76318359375, 8.781494140625, 9.7998046875, 10.818115234375, 11.83642578125, 12.854736328125, 13.873046875, 14.891357421875, 15.90966796875, 16.927978515625, 17.9462890625, 18.964599609375, 19.98291015625, 21.001220703125, 22.01953125, 23.037841796875, 24.05615234375, 25.074462890625, 26.0927734375, 27.111083984375, 28.12939453125, 29.147705078125, 30.166015625, 31.184326171875, 32.20263671875, 33.220947265625, 34.2392578125, 35.257568359375, 36.27587890625, 37.294189453125, 38.3125]}, "gradients/decoder.transformer.h.2.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 1.0, 2.0, 3.0, 2.0, 7.0, 7.0, 8.0, 7.0, 14.0, 19.0, 22.0, 29.0, 33.0, 57.0, 61.0, 87.0, 107.0, 137.0, 184.0, 255.0, 305.0, 445.0, 465.0, 455.0, 325.0, 230.0, 182.0, 123.0, 102.0, 93.0, 74.0, 52.0, 34.0, 37.0, 30.0, 25.0, 10.0, 13.0, 9.0, 5.0, 12.0, 4.0, 4.0, 0.0, 3.0, 1.0, 5.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-19.203125, -18.609619140625, -18.01611328125, -17.422607421875, -16.8291015625, -16.235595703125, -15.64208984375, -15.048583984375, -14.455078125, -13.861572265625, -13.26806640625, -12.674560546875, -12.0810546875, -11.487548828125, -10.89404296875, -10.300537109375, -9.70703125, -9.113525390625, -8.52001953125, -7.926513671875, -7.3330078125, -6.739501953125, -6.14599609375, -5.552490234375, -4.958984375, -4.365478515625, -3.77197265625, -3.178466796875, -2.5849609375, -1.991455078125, -1.39794921875, -0.804443359375, -0.2109375, 0.382568359375, 0.97607421875, 1.569580078125, 2.1630859375, 2.756591796875, 3.35009765625, 3.943603515625, 4.537109375, 5.130615234375, 5.72412109375, 6.317626953125, 6.9111328125, 7.504638671875, 8.09814453125, 8.691650390625, 9.28515625, 9.878662109375, 10.47216796875, 11.065673828125, 11.6591796875, 12.252685546875, 12.84619140625, 13.439697265625, 14.033203125, 14.626708984375, 15.22021484375, 15.813720703125, 16.4072265625, 17.000732421875, 17.59423828125, 18.187744140625, 18.78125]}, "gradients/decoder.transformer.h.2.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 4.0, 2.0, 2.0, 9.0, 9.0, 13.0, 14.0, 21.0, 28.0, 47.0, 81.0, 96.0, 187.0, 313.0, 713.0, 2216.0, 17378.0, 1194920.0, 2941398.0, 32314.0, 2862.0, 841.0, 344.0, 196.0, 106.0, 55.0, 38.0, 29.0, 18.0, 9.0, 8.0, 10.0, 5.0, 2.0, 3.0, 3.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-53.875, -51.95703125, -50.0390625, -48.12109375, -46.203125, -44.28515625, -42.3671875, -40.44921875, -38.53125, -36.61328125, -34.6953125, -32.77734375, -30.859375, -28.94140625, -27.0234375, -25.10546875, -23.1875, -21.26953125, -19.3515625, -17.43359375, -15.515625, -13.59765625, -11.6796875, -9.76171875, -7.84375, -5.92578125, -4.0078125, -2.08984375, -0.171875, 1.74609375, 3.6640625, 5.58203125, 7.5, 9.41796875, 11.3359375, 13.25390625, 15.171875, 17.08984375, 19.0078125, 20.92578125, 22.84375, 24.76171875, 26.6796875, 28.59765625, 30.515625, 32.43359375, 34.3515625, 36.26953125, 38.1875, 40.10546875, 42.0234375, 43.94140625, 45.859375, 47.77734375, 49.6953125, 51.61328125, 53.53125, 55.44921875, 57.3671875, 59.28515625, 61.203125, 63.12109375, 65.0390625, 66.95703125, 68.875]}, "gradients/decoder.transformer.h.2.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 960.0, 53.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1211.3843994140625, -1170.5318603515625, -1129.679443359375, -1088.826904296875, -1047.974365234375, -1007.1219482421875, -966.2694091796875, -925.4169311523438, -884.564453125, -843.7119750976562, -802.8594970703125, -762.0069580078125, -721.1544799804688, -680.302001953125, -639.449462890625, -598.5969848632812, -557.7445068359375, -516.8920288085938, -476.0395202636719, -435.18701171875, -394.33453369140625, -353.4820556640625, -312.6295471191406, -271.77703857421875, -230.924560546875, -190.0720672607422, -149.21957397460938, -108.36708068847656, -67.51458740234375, -26.662094116210938, 14.190399169921875, 55.04290771484375, 95.8955078125, 136.7480010986328, 177.60049438476562, 218.45298767089844, 259.30548095703125, 300.157958984375, 341.0104675292969, 381.86297607421875, 422.7154541015625, 463.56793212890625, 504.4204406738281, 545.27294921875, 586.1254272460938, 626.9779052734375, 667.8304443359375, 708.6829223632812, 749.535400390625, 790.3878784179688, 831.2403564453125, 872.0928955078125, 912.9453735351562, 953.7978515625, 994.650390625, 1035.5029296875, 1076.3553466796875, 1117.2078857421875, 1158.060302734375, 1198.912841796875, 1239.765380859375, 1280.6177978515625, 1321.4703369140625, 1362.32275390625, 1403.17529296875]}, "gradients/decoder.transformer.h.2.ln_2.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 2.0, 3.0, 1.0, 2.0, 1.0, 4.0, 2.0, 3.0, 10.0, 8.0, 9.0, 6.0, 10.0, 6.0, 19.0, 17.0, 12.0, 24.0, 30.0, 30.0, 35.0, 27.0, 34.0, 30.0, 31.0, 34.0, 43.0, 38.0, 43.0, 44.0, 46.0, 39.0, 38.0, 32.0, 35.0, 26.0, 32.0, 28.0, 27.0, 20.0, 24.0, 18.0, 16.0, 18.0, 11.0, 12.0, 6.0, 8.0, 7.0, 2.0, 2.0, 2.0, 0.0, 3.0, 3.0, 2.0, 3.0, 0.0, 1.0, 2.0], "bins": [-75.8946533203125, -73.54013061523438, -71.18561553955078, -68.83109283447266, -66.47657775878906, -64.12205505371094, -61.76753616333008, -59.41301727294922, -57.058494567871094, -54.703975677490234, -52.349456787109375, -49.99493408203125, -47.64041519165039, -45.28589630126953, -42.93137741088867, -40.57685852050781, -38.22233963012695, -35.867820739746094, -33.513301849365234, -31.158781051635742, -28.80426025390625, -26.44974136352539, -24.09522247314453, -21.74070167541504, -19.38618278503418, -17.03166389465332, -14.677143096923828, -12.322624206542969, -9.968104362487793, -7.613584518432617, -5.259065628051758, -2.9045448303222656, -0.5500259399414062, 1.8044936656951904, 4.159013271331787, 6.513532638549805, 8.86805248260498, 11.222572326660156, 13.577091217041016, 15.931612014770508, 18.286130905151367, 20.640649795532227, 22.99517059326172, 25.349689483642578, 27.704208374023438, 30.05872917175293, 32.413246154785156, 34.76776885986328, 37.12228775024414, 39.476806640625, 41.83132553100586, 44.18584442138672, 46.540367126464844, 48.8948860168457, 51.24940490722656, 53.60392761230469, 55.95844268798828, 58.31296157836914, 60.66748046875, 63.022003173828125, 65.37651824951172, 67.73104095458984, 70.08555603027344, 72.44007873535156, 74.79460144042969]}, "gradients/decoder.transformer.h.2.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 1.0, 3.0, 1.0, 2.0, 2.0, 5.0, 5.0, 10.0, 9.0, 11.0, 11.0, 10.0, 13.0, 12.0, 24.0, 26.0, 30.0, 28.0, 29.0, 43.0, 40.0, 46.0, 33.0, 47.0, 45.0, 47.0, 49.0, 47.0, 39.0, 26.0, 39.0, 38.0, 31.0, 43.0, 22.0, 23.0, 20.0, 15.0, 28.0, 9.0, 11.0, 12.0, 4.0, 4.0, 6.0, 6.0, 3.0, 4.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-8.96875, -8.65478515625, -8.3408203125, -8.02685546875, -7.712890625, -7.39892578125, -7.0849609375, -6.77099609375, -6.45703125, -6.14306640625, -5.8291015625, -5.51513671875, -5.201171875, -4.88720703125, -4.5732421875, -4.25927734375, -3.9453125, -3.63134765625, -3.3173828125, -3.00341796875, -2.689453125, -2.37548828125, -2.0615234375, -1.74755859375, -1.43359375, -1.11962890625, -0.8056640625, -0.49169921875, -0.177734375, 0.13623046875, 0.4501953125, 0.76416015625, 1.078125, 1.39208984375, 1.7060546875, 2.02001953125, 2.333984375, 2.64794921875, 2.9619140625, 3.27587890625, 3.58984375, 3.90380859375, 4.2177734375, 4.53173828125, 4.845703125, 5.15966796875, 5.4736328125, 5.78759765625, 6.1015625, 6.41552734375, 6.7294921875, 7.04345703125, 7.357421875, 7.67138671875, 7.9853515625, 8.29931640625, 8.61328125, 8.92724609375, 9.2412109375, 9.55517578125, 9.869140625, 10.18310546875, 10.4970703125, 10.81103515625, 11.125]}, "gradients/decoder.transformer.h.2.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 3.0, 0.0, 2.0, 3.0, 12.0, 9.0, 10.0, 20.0, 27.0, 43.0, 53.0, 62.0, 92.0, 157.0, 216.0, 311.0, 440.0, 618.0, 935.0, 1357.0, 1936.0, 2918.0, 4305.0, 6529.0, 9600.0, 14457.0, 22340.0, 34492.0, 53529.0, 82764.0, 126003.0, 169599.0, 165892.0, 121339.0, 80034.0, 51228.0, 33037.0, 21512.0, 14052.0, 9416.0, 6147.0, 4190.0, 2844.0, 1936.0, 1304.0, 915.0, 598.0, 410.0, 278.0, 188.0, 132.0, 73.0, 62.0, 48.0, 23.0, 18.0, 22.0, 11.0, 4.0, 7.0, 8.0, 5.0], "bins": [-0.7763671875, -0.7535018920898438, -0.7306365966796875, -0.7077713012695312, -0.684906005859375, -0.6620407104492188, -0.6391754150390625, -0.6163101196289062, -0.59344482421875, -0.5705795288085938, -0.5477142333984375, -0.5248489379882812, -0.501983642578125, -0.47911834716796875, -0.4562530517578125, -0.43338775634765625, -0.4105224609375, -0.38765716552734375, -0.3647918701171875, -0.34192657470703125, -0.319061279296875, -0.29619598388671875, -0.2733306884765625, -0.25046539306640625, -0.22760009765625, -0.20473480224609375, -0.1818695068359375, -0.15900421142578125, -0.136138916015625, -0.11327362060546875, -0.0904083251953125, -0.06754302978515625, -0.044677734375, -0.02181243896484375, 0.0010528564453125, 0.02391815185546875, 0.046783447265625, 0.06964874267578125, 0.0925140380859375, 0.11537933349609375, 0.13824462890625, 0.16110992431640625, 0.1839752197265625, 0.20684051513671875, 0.229705810546875, 0.25257110595703125, 0.2754364013671875, 0.29830169677734375, 0.3211669921875, 0.34403228759765625, 0.3668975830078125, 0.38976287841796875, 0.412628173828125, 0.43549346923828125, 0.4583587646484375, 0.48122406005859375, 0.50408935546875, 0.5269546508789062, 0.5498199462890625, 0.5726852416992188, 0.595550537109375, 0.6184158325195312, 0.6412811279296875, 0.6641464233398438, 0.68701171875]}, "gradients/decoder.transformer.h.2.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 5.0, 5.0, 7.0, 7.0, 8.0, 7.0, 14.0, 10.0, 17.0, 23.0, 24.0, 27.0, 26.0, 25.0, 27.0, 34.0, 46.0, 50.0, 32.0, 32.0, 33.0, 42.0, 1070.0, 42.0, 43.0, 43.0, 39.0, 37.0, 34.0, 35.0, 21.0, 24.0, 24.0, 26.0, 26.0, 6.0, 12.0, 15.0, 7.0, 7.0, 4.0, 6.0, 4.0, 8.0, 1.0, 2.0, 2.0, 1.0, 3.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.1484375, -5.95751953125, -5.7666015625, -5.57568359375, -5.384765625, -5.19384765625, -5.0029296875, -4.81201171875, -4.62109375, -4.43017578125, -4.2392578125, -4.04833984375, -3.857421875, -3.66650390625, -3.4755859375, -3.28466796875, -3.09375, -2.90283203125, -2.7119140625, -2.52099609375, -2.330078125, -2.13916015625, -1.9482421875, -1.75732421875, -1.56640625, -1.37548828125, -1.1845703125, -0.99365234375, -0.802734375, -0.61181640625, -0.4208984375, -0.22998046875, -0.0390625, 0.15185546875, 0.3427734375, 0.53369140625, 0.724609375, 0.91552734375, 1.1064453125, 1.29736328125, 1.48828125, 1.67919921875, 1.8701171875, 2.06103515625, 2.251953125, 2.44287109375, 2.6337890625, 2.82470703125, 3.015625, 3.20654296875, 3.3974609375, 3.58837890625, 3.779296875, 3.97021484375, 4.1611328125, 4.35205078125, 4.54296875, 4.73388671875, 4.9248046875, 5.11572265625, 5.306640625, 5.49755859375, 5.6884765625, 5.87939453125, 6.0703125]}, "gradients/decoder.transformer.h.2.crossattention.c_attn.weight": {"_type": "histogram", "values": [2.0, 1.0, 3.0, 3.0, 5.0, 9.0, 11.0, 13.0, 34.0, 32.0, 48.0, 78.0, 127.0, 169.0, 209.0, 380.0, 501.0, 759.0, 1156.0, 1656.0, 2579.0, 4057.0, 5912.0, 9235.0, 14128.0, 21658.0, 32995.0, 49857.0, 75880.0, 109095.0, 144909.0, 1201762.0, 132302.0, 96312.0, 65622.0, 43421.0, 28611.0, 18546.0, 11996.0, 7915.0, 5052.0, 3355.0, 2232.0, 1490.0, 1006.0, 621.0, 448.0, 281.0, 219.0, 150.0, 109.0, 58.0, 52.0, 29.0, 15.0, 17.0, 11.0, 10.0, 4.0, 2.0, 1.0, 1.0, 0.0, 1.0], "bins": [-0.5546875, -0.5369796752929688, -0.5192718505859375, -0.5015640258789062, -0.483856201171875, -0.46614837646484375, -0.4484405517578125, -0.43073272705078125, -0.41302490234375, -0.39531707763671875, -0.3776092529296875, -0.35990142822265625, -0.342193603515625, -0.32448577880859375, -0.3067779541015625, -0.28907012939453125, -0.2713623046875, -0.25365447998046875, -0.2359466552734375, -0.21823883056640625, -0.200531005859375, -0.18282318115234375, -0.1651153564453125, -0.14740753173828125, -0.12969970703125, -0.11199188232421875, -0.0942840576171875, -0.07657623291015625, -0.058868408203125, -0.04116058349609375, -0.0234527587890625, -0.00574493408203125, 0.011962890625, 0.02967071533203125, 0.0473785400390625, 0.06508636474609375, 0.082794189453125, 0.10050201416015625, 0.1182098388671875, 0.13591766357421875, 0.15362548828125, 0.17133331298828125, 0.1890411376953125, 0.20674896240234375, 0.224456787109375, 0.24216461181640625, 0.2598724365234375, 0.27758026123046875, 0.2952880859375, 0.31299591064453125, 0.3307037353515625, 0.34841156005859375, 0.366119384765625, 0.38382720947265625, 0.4015350341796875, 0.41924285888671875, 0.43695068359375, 0.45465850830078125, 0.4723663330078125, 0.49007415771484375, 0.507781982421875, 0.5254898071289062, 0.5431976318359375, 0.5609054565429688, 0.57861328125]}, "gradients/decoder.transformer.h.2.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 3.0, 3.0, 2.0, 1.0, 2.0, 5.0, 8.0, 3.0, 4.0, 5.0, 9.0, 5.0, 6.0, 7.0, 13.0, 17.0, 16.0, 25.0, 38.0, 40.0, 58.0, 77.0, 81.0, 83.0, 56.0, 84.0, 68.0, 60.0, 43.0, 42.0, 34.0, 22.0, 20.0, 16.0, 6.0, 10.0, 6.0, 5.0, 7.0, 2.0, 5.0, 4.0, 3.0, 3.0, 3.0, 1.0, 1.0, 1.0, 0.0, 0.0, 2.0, 1.0, 2.0], "bins": [-0.00640106201171875, -0.006217479705810547, -0.006033897399902344, -0.005850315093994141, -0.0056667327880859375, -0.005483150482177734, -0.005299568176269531, -0.005115985870361328, -0.004932403564453125, -0.004748821258544922, -0.004565238952636719, -0.004381656646728516, -0.0041980743408203125, -0.004014492034912109, -0.0038309097290039062, -0.003647327423095703, -0.0034637451171875, -0.003280162811279297, -0.0030965805053710938, -0.0029129981994628906, -0.0027294158935546875, -0.0025458335876464844, -0.0023622512817382812, -0.002178668975830078, -0.001995086669921875, -0.0018115043640136719, -0.0016279220581054688, -0.0014443397521972656, -0.0012607574462890625, -0.0010771751403808594, -0.0008935928344726562, -0.0007100105285644531, -0.00052642822265625, -0.0003428459167480469, -0.00015926361083984375, 2.4318695068359375e-05, 0.0002079010009765625, 0.0003914833068847656, 0.0005750656127929688, 0.0007586479187011719, 0.000942230224609375, 0.0011258125305175781, 0.0013093948364257812, 0.0014929771423339844, 0.0016765594482421875, 0.0018601417541503906, 0.0020437240600585938, 0.002227306365966797, 0.002410888671875, 0.002594470977783203, 0.0027780532836914062, 0.0029616355895996094, 0.0031452178955078125, 0.0033288002014160156, 0.0035123825073242188, 0.003695964813232422, 0.003879547119140625, 0.004063129425048828, 0.004246711730957031, 0.004430294036865234, 0.0046138763427734375, 0.004797458648681641, 0.004981040954589844, 0.005164623260498047, 0.00534820556640625]}, "gradients/decoder.transformer.h.2.crossattention.q_attn.weight": {"_type": "histogram", "values": [2.0, 1.0, 4.0, 0.0, 0.0, 2.0, 0.0, 3.0, 5.0, 7.0, 5.0, 4.0, 7.0, 9.0, 8.0, 10.0, 12.0, 21.0, 20.0, 32.0, 45.0, 41.0, 69.0, 102.0, 154.0, 169.0, 316.0, 536.0, 1577.0, 139645.0, 899932.0, 4062.0, 618.0, 365.0, 225.0, 135.0, 102.0, 66.0, 50.0, 41.0, 26.0, 32.0, 17.0, 18.0, 16.0, 7.0, 6.0, 12.0, 10.0, 4.0, 3.0, 6.0, 3.0, 2.0, 0.0, 2.0, 1.0, 0.0, 1.0, 5.0, 1.0, 0.0, 1.0, 1.0], "bins": [-0.099365234375, -0.09607219696044922, -0.09277915954589844, -0.08948612213134766, -0.08619308471679688, -0.0829000473022461, -0.07960700988769531, -0.07631397247314453, -0.07302093505859375, -0.06972789764404297, -0.06643486022949219, -0.0631418228149414, -0.059848785400390625, -0.056555747985839844, -0.05326271057128906, -0.04996967315673828, -0.0466766357421875, -0.04338359832763672, -0.04009056091308594, -0.036797523498535156, -0.033504486083984375, -0.030211448669433594, -0.026918411254882812, -0.02362537384033203, -0.02033233642578125, -0.01703929901123047, -0.013746261596679688, -0.010453224182128906, -0.007160186767578125, -0.0038671493530273438, -0.0005741119384765625, 0.0027189254760742188, 0.006011962890625, 0.009305000305175781, 0.012598037719726562, 0.015891075134277344, 0.019184112548828125, 0.022477149963378906, 0.025770187377929688, 0.02906322479248047, 0.03235626220703125, 0.03564929962158203, 0.03894233703613281, 0.042235374450683594, 0.045528411865234375, 0.048821449279785156, 0.05211448669433594, 0.05540752410888672, 0.0587005615234375, 0.06199359893798828, 0.06528663635253906, 0.06857967376708984, 0.07187271118164062, 0.0751657485961914, 0.07845878601074219, 0.08175182342529297, 0.08504486083984375, 0.08833789825439453, 0.09163093566894531, 0.0949239730834961, 0.09821701049804688, 0.10151004791259766, 0.10480308532714844, 0.10809612274169922, 0.11138916015625]}, "gradients/decoder.transformer.h.2.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 2.0, 6.0, 12.0, 47.0, 163.0, 336.0, 268.0, 127.0, 40.0, 9.0, 4.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.014430073089897633, -0.014085158705711365, -0.013740244321525097, -0.013395329937338829, -0.013050415553152561, -0.012705501168966293, -0.0123605877161026, -0.012015673331916332, -0.011670758947730064, -0.011325844563543797, -0.010980930179357529, -0.01063601579517126, -0.010291101410984993, -0.0099461879581213, -0.009601272642612457, -0.009256359189748764, -0.008911443874239922, -0.008566529490053654, -0.008221615105867386, -0.007876700721681118, -0.0075317868031561375, -0.00718687241896987, -0.006841958034783602, -0.006497044116258621, -0.006152129732072353, -0.0058072153478860855, -0.005462300963699818, -0.00511738657951355, -0.004772472660988569, -0.004427558276802301, -0.0040826438926160336, -0.0037377297412604094, -0.003392815124243498, -0.00304790074005723, -0.002702986588701606, -0.002358072204515338, -0.0020131580531597137, -0.001668243668973446, -0.001323329284787178, -0.0009784151334315538, -0.000633500749245286, -0.0002885864523705095, 5.632784450426698e-05, 0.0004012421704828739, 0.0007461564382538199, 0.001091070706024766, 0.0014359850902110338, 0.001780899241566658, 0.002125813625752926, 0.0024707280099391937, 0.002815642161294818, 0.0031605565454810858, 0.00350547069683671, 0.003850385081022978, 0.004195299465209246, 0.004540213383734226, 0.004885127767920494, 0.005230042152106762, 0.00557495653629303, 0.005919870920479298, 0.006264784839004278, 0.006609699223190546, 0.006954613607376814, 0.007299527525901794, 0.00764444237574935]}, "gradients/decoder.transformer.h.2.ln_cross_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 2.0, 2.0, 4.0, 3.0, 5.0, 3.0, 8.0, 4.0, 8.0, 13.0, 15.0, 12.0, 19.0, 18.0, 27.0, 27.0, 19.0, 34.0, 36.0, 41.0, 41.0, 33.0, 41.0, 32.0, 53.0, 41.0, 52.0, 50.0, 53.0, 46.0, 28.0, 42.0, 34.0, 29.0, 22.0, 21.0, 23.0, 14.0, 14.0, 10.0, 8.0, 9.0, 3.0, 9.0, 5.0, 3.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.002740025520324707, -0.0026406217366456985, -0.00254121795296669, -0.0024418141692876816, -0.002342410385608673, -0.0022430066019296646, -0.002143602818250656, -0.0020441990345716476, -0.0019447952508926392, -0.0018453914672136307, -0.0017459876835346222, -0.0016465838998556137, -0.0015471801161766052, -0.0014477763324975967, -0.0013483725488185883, -0.0012489687651395798, -0.0011495649814605713, -0.0010501611977815628, -0.0009507574141025543, -0.0008513536304235458, -0.0007519498467445374, -0.0006525460630655289, -0.0005531422793865204, -0.0004537384957075119, -0.0003543347120285034, -0.00025493092834949493, -0.00015552714467048645, -5.6123360991477966e-05, 4.328042268753052e-05, 0.000142684206366539, 0.00024208799004554749, 0.00034149177372455597, 0.00044089555740356445, 0.0005402993410825729, 0.0006397031247615814, 0.0007391069084405899, 0.0008385106921195984, 0.0009379144757986069, 0.0010373182594776154, 0.0011367220431566238, 0.0012361258268356323, 0.0013355296105146408, 0.0014349333941936493, 0.0015343371778726578, 0.0016337409615516663, 0.0017331447452306747, 0.0018325485289096832, 0.0019319523125886917, 0.0020313560962677, 0.0021307598799467087, 0.002230163663625717, 0.0023295674473047256, 0.002428971230983734, 0.0025283750146627426, 0.002627778798341751, 0.0027271825820207596, 0.002826586365699768, 0.0029259901493787766, 0.003025393933057785, 0.0031247977167367935, 0.003224201500415802, 0.0033236052840948105, 0.003423009067773819, 0.0035224128514528275, 0.003621816635131836]}, "gradients/decoder.transformer.h.2.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 1.0, 3.0, 1.0, 2.0, 2.0, 5.0, 5.0, 10.0, 9.0, 11.0, 11.0, 10.0, 13.0, 12.0, 25.0, 25.0, 31.0, 27.0, 30.0, 42.0, 40.0, 46.0, 33.0, 47.0, 45.0, 48.0, 48.0, 47.0, 41.0, 24.0, 39.0, 38.0, 32.0, 42.0, 22.0, 25.0, 18.0, 15.0, 28.0, 9.0, 11.0, 12.0, 4.0, 4.0, 6.0, 6.0, 3.0, 4.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-8.96875, -8.6546630859375, -8.340576171875, -8.0264892578125, -7.71240234375, -7.3983154296875, -7.084228515625, -6.7701416015625, -6.4560546875, -6.1419677734375, -5.827880859375, -5.5137939453125, -5.19970703125, -4.8856201171875, -4.571533203125, -4.2574462890625, -3.943359375, -3.6292724609375, -3.315185546875, -3.0010986328125, -2.68701171875, -2.3729248046875, -2.058837890625, -1.7447509765625, -1.4306640625, -1.1165771484375, -0.802490234375, -0.4884033203125, -0.17431640625, 0.1397705078125, 0.453857421875, 0.7679443359375, 1.08203125, 1.3961181640625, 1.710205078125, 2.0242919921875, 2.33837890625, 2.6524658203125, 2.966552734375, 3.2806396484375, 3.5947265625, 3.9088134765625, 4.222900390625, 4.5369873046875, 4.85107421875, 5.1651611328125, 5.479248046875, 5.7933349609375, 6.107421875, 6.4215087890625, 6.735595703125, 7.0496826171875, 7.36376953125, 7.6778564453125, 7.991943359375, 8.3060302734375, 8.6201171875, 8.9342041015625, 9.248291015625, 9.5623779296875, 9.87646484375, 10.1905517578125, 10.504638671875, 10.8187255859375, 11.1328125]}, "gradients/decoder.transformer.h.2.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 1.0, 1.0, 0.0, 8.0, 3.0, 6.0, 10.0, 5.0, 18.0, 15.0, 29.0, 35.0, 45.0, 80.0, 91.0, 134.0, 169.0, 254.0, 407.0, 550.0, 867.0, 1302.0, 2141.0, 3991.0, 11200.0, 83071.0, 732706.0, 180886.0, 18353.0, 5144.0, 2448.0, 1584.0, 936.0, 620.0, 422.0, 304.0, 216.0, 154.0, 108.0, 66.0, 49.0, 35.0, 27.0, 22.0, 19.0, 10.0, 6.0, 3.0, 9.0, 3.0, 3.0, 2.0, 0.0, 0.0, 1.0, 3.0], "bins": [-28.921875, -28.087158203125, -27.25244140625, -26.417724609375, -25.5830078125, -24.748291015625, -23.91357421875, -23.078857421875, -22.244140625, -21.409423828125, -20.57470703125, -19.739990234375, -18.9052734375, -18.070556640625, -17.23583984375, -16.401123046875, -15.56640625, -14.731689453125, -13.89697265625, -13.062255859375, -12.2275390625, -11.392822265625, -10.55810546875, -9.723388671875, -8.888671875, -8.053955078125, -7.21923828125, -6.384521484375, -5.5498046875, -4.715087890625, -3.88037109375, -3.045654296875, -2.2109375, -1.376220703125, -0.54150390625, 0.293212890625, 1.1279296875, 1.962646484375, 2.79736328125, 3.632080078125, 4.466796875, 5.301513671875, 6.13623046875, 6.970947265625, 7.8056640625, 8.640380859375, 9.47509765625, 10.309814453125, 11.14453125, 11.979248046875, 12.81396484375, 13.648681640625, 14.4833984375, 15.318115234375, 16.15283203125, 16.987548828125, 17.822265625, 18.656982421875, 19.49169921875, 20.326416015625, 21.1611328125, 21.995849609375, 22.83056640625, 23.665283203125, 24.5]}, "gradients/decoder.transformer.h.2.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 3.0, 3.0, 1.0, 2.0, 2.0, 0.0, 2.0, 5.0, 3.0, 9.0, 8.0, 8.0, 4.0, 20.0, 7.0, 16.0, 21.0, 17.0, 24.0, 36.0, 42.0, 45.0, 46.0, 58.0, 78.0, 188.0, 1666.0, 245.0, 84.0, 59.0, 58.0, 39.0, 38.0, 35.0, 22.0, 28.0, 29.0, 16.0, 11.0, 6.0, 14.0, 13.0, 10.0, 12.0, 6.0, 6.0, 12.0, 3.0, 1.0, 2.0, 2.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0], "bins": [-26.546875, -25.7333984375, -24.919921875, -24.1064453125, -23.29296875, -22.4794921875, -21.666015625, -20.8525390625, -20.0390625, -19.2255859375, -18.412109375, -17.5986328125, -16.78515625, -15.9716796875, -15.158203125, -14.3447265625, -13.53125, -12.7177734375, -11.904296875, -11.0908203125, -10.27734375, -9.4638671875, -8.650390625, -7.8369140625, -7.0234375, -6.2099609375, -5.396484375, -4.5830078125, -3.76953125, -2.9560546875, -2.142578125, -1.3291015625, -0.515625, 0.2978515625, 1.111328125, 1.9248046875, 2.73828125, 3.5517578125, 4.365234375, 5.1787109375, 5.9921875, 6.8056640625, 7.619140625, 8.4326171875, 9.24609375, 10.0595703125, 10.873046875, 11.6865234375, 12.5, 13.3134765625, 14.126953125, 14.9404296875, 15.75390625, 16.5673828125, 17.380859375, 18.1943359375, 19.0078125, 19.8212890625, 20.634765625, 21.4482421875, 22.26171875, 23.0751953125, 23.888671875, 24.7021484375, 25.515625]}, "gradients/decoder.transformer.h.2.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 4.0, 4.0, 1.0, 8.0, 14.0, 15.0, 12.0, 9.0, 13.0, 20.0, 23.0, 25.0, 32.0, 48.0, 76.0, 73.0, 97.0, 116.0, 171.0, 293.0, 526.0, 1218.0, 16863.0, 3118783.0, 4957.0, 973.0, 454.0, 247.0, 150.0, 109.0, 90.0, 52.0, 57.0, 36.0, 34.0, 26.0, 18.0, 19.0, 12.0, 11.0, 6.0, 5.0, 3.0, 3.0, 2.0, 3.0, 2.0, 4.0, 0.0, 2.0, 0.0, 2.0, 0.0, 0.0, 1.0], "bins": [-70.75, -68.5654296875, -66.380859375, -64.1962890625, -62.01171875, -59.8271484375, -57.642578125, -55.4580078125, -53.2734375, -51.0888671875, -48.904296875, -46.7197265625, -44.53515625, -42.3505859375, -40.166015625, -37.9814453125, -35.796875, -33.6123046875, -31.427734375, -29.2431640625, -27.05859375, -24.8740234375, -22.689453125, -20.5048828125, -18.3203125, -16.1357421875, -13.951171875, -11.7666015625, -9.58203125, -7.3974609375, -5.212890625, -3.0283203125, -0.84375, 1.3408203125, 3.525390625, 5.7099609375, 7.89453125, 10.0791015625, 12.263671875, 14.4482421875, 16.6328125, 18.8173828125, 21.001953125, 23.1865234375, 25.37109375, 27.5556640625, 29.740234375, 31.9248046875, 34.109375, 36.2939453125, 38.478515625, 40.6630859375, 42.84765625, 45.0322265625, 47.216796875, 49.4013671875, 51.5859375, 53.7705078125, 55.955078125, 58.1396484375, 60.32421875, 62.5087890625, 64.693359375, 66.8779296875, 69.0625]}, "gradients/decoder.transformer.h.2.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 7.0, 24.0, 171.0, 418.0, 298.0, 79.0, 18.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-148.11790466308594, -143.9623260498047, -139.8067626953125, -135.65118408203125, -131.49562072753906, -127.34004211425781, -123.1844711303711, -119.02890014648438, -114.87332153320312, -110.7177505493164, -106.56217956542969, -102.40660095214844, -98.25102996826172, -94.095458984375, -89.93988800048828, -85.78431701660156, -81.62874603271484, -77.47317504882812, -73.3176040649414, -69.16203308105469, -65.00645446777344, -60.85088348388672, -56.6953125, -52.53974151611328, -48.3841667175293, -44.22859573364258, -40.073020935058594, -35.917449951171875, -31.761877059936523, -27.606304168701172, -23.450733184814453, -19.2951602935791, -15.139595031738281, -10.98402214050293, -6.8284502029418945, -2.6728782653808594, 1.4826946258544922, 5.638267517089844, 9.793838500976562, 13.949411392211914, 18.104984283447266, 22.260557174682617, 26.41613006591797, 30.571701049804688, 34.727272033691406, 38.88284683227539, 43.03841781616211, 47.193992614746094, 51.34956359863281, 55.50513458251953, 59.660709381103516, 63.816280364990234, 67.97185516357422, 72.12742614746094, 76.28299713134766, 80.43856811523438, 84.59414672851562, 88.74971771240234, 92.90528869628906, 97.06086730957031, 101.21643829345703, 105.37200927734375, 109.52758026123047, 113.68315124511719, 117.8387222290039]}, "gradients/decoder.transformer.h.2.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 3.0, 2.0, 1.0, 2.0, 1.0, 2.0, 2.0, 9.0, 1.0, 11.0, 6.0, 12.0, 12.0, 5.0, 15.0, 10.0, 26.0, 23.0, 24.0, 27.0, 27.0, 25.0, 39.0, 35.0, 27.0, 35.0, 41.0, 39.0, 41.0, 35.0, 44.0, 41.0, 37.0, 35.0, 35.0, 36.0, 27.0, 31.0, 27.0, 27.0, 18.0, 21.0, 13.0, 17.0, 17.0, 13.0, 6.0, 7.0, 6.0, 7.0, 3.0, 3.0, 7.0, 4.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-64.98450469970703, -62.85363006591797, -60.72275161743164, -58.59187698364258, -56.46099853515625, -54.33012390136719, -52.199249267578125, -50.06837463378906, -47.937496185302734, -45.80662155151367, -43.675743103027344, -41.54486846923828, -39.41399383544922, -37.28311538696289, -35.15224075317383, -33.0213623046875, -30.890487670898438, -28.759611129760742, -26.628734588623047, -24.497859954833984, -22.36698341369629, -20.236106872558594, -18.10523223876953, -15.974355697631836, -13.84347915649414, -11.712602615356445, -9.581727027893066, -7.450850963592529, -5.319974899291992, -3.189098358154297, -1.058222770690918, 1.072652816772461, 3.203521728515625, 5.334397792816162, 7.465273857116699, 9.596149444580078, 11.727025985717773, 13.857902526855469, 15.988778114318848, 18.119653701782227, 20.250530242919922, 22.381406784057617, 24.512283325195312, 26.643157958984375, 28.77403450012207, 30.904911041259766, 33.03578567504883, 35.166664123535156, 37.29753875732422, 39.42841339111328, 41.55929183959961, 43.69016647338867, 45.821044921875, 47.95191955566406, 50.082794189453125, 52.21366882324219, 54.344547271728516, 56.47542190551758, 58.606300354003906, 60.73717498779297, 62.86804962158203, 64.99893188476562, 67.12980651855469, 69.26068115234375, 71.39155578613281]}, "gradients/decoder.transformer.h.1.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 6.0, 1.0, 7.0, 9.0, 6.0, 5.0, 11.0, 13.0, 12.0, 12.0, 18.0, 24.0, 21.0, 29.0, 30.0, 25.0, 43.0, 49.0, 42.0, 61.0, 44.0, 53.0, 38.0, 45.0, 32.0, 39.0, 40.0, 40.0, 43.0, 28.0, 35.0, 29.0, 22.0, 15.0, 20.0, 16.0, 8.0, 9.0, 7.0, 8.0, 6.0, 5.0, 5.0, 1.0, 2.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-10.15625, -9.8074951171875, -9.458740234375, -9.1099853515625, -8.76123046875, -8.4124755859375, -8.063720703125, -7.7149658203125, -7.3662109375, -7.0174560546875, -6.668701171875, -6.3199462890625, -5.97119140625, -5.6224365234375, -5.273681640625, -4.9249267578125, -4.576171875, -4.2274169921875, -3.878662109375, -3.5299072265625, -3.18115234375, -2.8323974609375, -2.483642578125, -2.1348876953125, -1.7861328125, -1.4373779296875, -1.088623046875, -0.7398681640625, -0.39111328125, -0.0423583984375, 0.306396484375, 0.6551513671875, 1.00390625, 1.3526611328125, 1.701416015625, 2.0501708984375, 2.39892578125, 2.7476806640625, 3.096435546875, 3.4451904296875, 3.7939453125, 4.1427001953125, 4.491455078125, 4.8402099609375, 5.18896484375, 5.5377197265625, 5.886474609375, 6.2352294921875, 6.583984375, 6.9327392578125, 7.281494140625, 7.6302490234375, 7.97900390625, 8.3277587890625, 8.676513671875, 9.0252685546875, 9.3740234375, 9.7227783203125, 10.071533203125, 10.4202880859375, 10.76904296875, 11.1177978515625, 11.466552734375, 11.8153076171875, 12.1640625]}, "gradients/decoder.transformer.h.1.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 8.0, 5.0, 12.0, 13.0, 14.0, 17.0, 27.0, 35.0, 41.0, 71.0, 91.0, 161.0, 219.0, 296.0, 490.0, 794.0, 1337.0, 2161.0, 3805.0, 7042.0, 15672.0, 51207.0, 277893.0, 1245943.0, 1862569.0, 572894.0, 104773.0, 24392.0, 9829.0, 5107.0, 2791.0, 1615.0, 1005.0, 659.0, 446.0, 272.0, 155.0, 127.0, 88.0, 63.0, 53.0, 29.0, 19.0, 17.0, 10.0, 7.0, 10.0, 1.0, 1.0, 2.0, 3.0, 4.0, 0.0, 2.0, 1.0, 1.0], "bins": [-14.328125, -13.8834228515625, -13.438720703125, -12.9940185546875, -12.54931640625, -12.1046142578125, -11.659912109375, -11.2152099609375, -10.7705078125, -10.3258056640625, -9.881103515625, -9.4364013671875, -8.99169921875, -8.5469970703125, -8.102294921875, -7.6575927734375, -7.212890625, -6.7681884765625, -6.323486328125, -5.8787841796875, -5.43408203125, -4.9893798828125, -4.544677734375, -4.0999755859375, -3.6552734375, -3.2105712890625, -2.765869140625, -2.3211669921875, -1.87646484375, -1.4317626953125, -0.987060546875, -0.5423583984375, -0.09765625, 0.3470458984375, 0.791748046875, 1.2364501953125, 1.68115234375, 2.1258544921875, 2.570556640625, 3.0152587890625, 3.4599609375, 3.9046630859375, 4.349365234375, 4.7940673828125, 5.23876953125, 5.6834716796875, 6.128173828125, 6.5728759765625, 7.017578125, 7.4622802734375, 7.906982421875, 8.3516845703125, 8.79638671875, 9.2410888671875, 9.685791015625, 10.1304931640625, 10.5751953125, 11.0198974609375, 11.464599609375, 11.9093017578125, 12.35400390625, 12.7987060546875, 13.243408203125, 13.6881103515625, 14.1328125]}, "gradients/decoder.transformer.h.1.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 4.0, 1.0, 0.0, 7.0, 9.0, 18.0, 62.0, 157.0, 384.0, 923.0, 1394.0, 662.0, 302.0, 98.0, 43.0, 18.0, 6.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-55.625, -54.05029296875, -52.4755859375, -50.90087890625, -49.326171875, -47.75146484375, -46.1767578125, -44.60205078125, -43.02734375, -41.45263671875, -39.8779296875, -38.30322265625, -36.728515625, -35.15380859375, -33.5791015625, -32.00439453125, -30.4296875, -28.85498046875, -27.2802734375, -25.70556640625, -24.130859375, -22.55615234375, -20.9814453125, -19.40673828125, -17.83203125, -16.25732421875, -14.6826171875, -13.10791015625, -11.533203125, -9.95849609375, -8.3837890625, -6.80908203125, -5.234375, -3.65966796875, -2.0849609375, -0.51025390625, 1.064453125, 2.63916015625, 4.2138671875, 5.78857421875, 7.36328125, 8.93798828125, 10.5126953125, 12.08740234375, 13.662109375, 15.23681640625, 16.8115234375, 18.38623046875, 19.9609375, 21.53564453125, 23.1103515625, 24.68505859375, 26.259765625, 27.83447265625, 29.4091796875, 30.98388671875, 32.55859375, 34.13330078125, 35.7080078125, 37.28271484375, 38.857421875, 40.43212890625, 42.0068359375, 43.58154296875, 45.15625]}, "gradients/decoder.transformer.h.1.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 2.0, 7.0, 5.0, 10.0, 21.0, 54.0, 128.0, 373.0, 1201.0, 7359.0, 4154677.0, 27934.0, 1731.0, 476.0, 167.0, 63.0, 30.0, 25.0, 10.0, 6.0, 5.0, 2.0, 1.0, 2.0, 2.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-94.9375, -91.7333984375, -88.529296875, -85.3251953125, -82.12109375, -78.9169921875, -75.712890625, -72.5087890625, -69.3046875, -66.1005859375, -62.896484375, -59.6923828125, -56.48828125, -53.2841796875, -50.080078125, -46.8759765625, -43.671875, -40.4677734375, -37.263671875, -34.0595703125, -30.85546875, -27.6513671875, -24.447265625, -21.2431640625, -18.0390625, -14.8349609375, -11.630859375, -8.4267578125, -5.22265625, -2.0185546875, 1.185546875, 4.3896484375, 7.59375, 10.7978515625, 14.001953125, 17.2060546875, 20.41015625, 23.6142578125, 26.818359375, 30.0224609375, 33.2265625, 36.4306640625, 39.634765625, 42.8388671875, 46.04296875, 49.2470703125, 52.451171875, 55.6552734375, 58.859375, 62.0634765625, 65.267578125, 68.4716796875, 71.67578125, 74.8798828125, 78.083984375, 81.2880859375, 84.4921875, 87.6962890625, 90.900390625, 94.1044921875, 97.30859375, 100.5126953125, 103.716796875, 106.9208984375, 110.125]}, "gradients/decoder.transformer.h.1.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 9.0, 25.0, 38.0, 101.0, 179.0, 249.0, 178.0, 145.0, 53.0, 23.0, 5.0, 5.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-116.05976867675781, -112.28221130371094, -108.50465393066406, -104.72710418701172, -100.94954681396484, -97.17198944091797, -93.39443969726562, -89.61688232421875, -85.83932495117188, -82.061767578125, -78.28421020507812, -74.50666046142578, -70.7291030883789, -66.95154571533203, -63.17399215698242, -59.39643859863281, -55.61888122558594, -51.84132385253906, -48.06377029418945, -44.286216735839844, -40.50865936279297, -36.731101989746094, -32.953548431396484, -29.175992965698242, -25.3984375, -21.620882034301758, -17.843326568603516, -14.065771102905273, -10.288215637207031, -6.510660171508789, -2.733104705810547, 1.0444507598876953, 4.8220062255859375, 8.59956169128418, 12.377117156982422, 16.154672622680664, 19.932228088378906, 23.70978355407715, 27.48733901977539, 31.264894485473633, 35.042449951171875, 38.82000732421875, 42.59756088256836, 46.37511444091797, 50.152671813964844, 53.93022918701172, 57.70778274536133, 61.48533630371094, 65.26289367675781, 69.04045104980469, 72.81800842285156, 76.5955581665039, 80.37311553955078, 84.15067291259766, 87.92822265625, 91.70578002929688, 95.48333740234375, 99.26089477539062, 103.0384521484375, 106.81600189208984, 110.59355926513672, 114.3711166381836, 118.14866638183594, 121.92622375488281, 125.70378112792969]}, "gradients/decoder.transformer.h.1.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 2.0, 2.0, 1.0, 1.0, 1.0, 1.0, 4.0, 3.0, 3.0, 3.0, 10.0, 11.0, 12.0, 10.0, 16.0, 19.0, 18.0, 22.0, 27.0, 22.0, 21.0, 32.0, 44.0, 28.0, 44.0, 45.0, 44.0, 42.0, 28.0, 45.0, 45.0, 37.0, 45.0, 36.0, 31.0, 37.0, 27.0, 18.0, 20.0, 23.0, 28.0, 17.0, 15.0, 16.0, 13.0, 6.0, 8.0, 8.0, 3.0, 7.0, 8.0, 1.0, 2.0, 1.0, 3.0, 0.0, 3.0, 0.0, 2.0, 1.0], "bins": [-72.61593627929688, -70.43852233886719, -68.2611083984375, -66.08369445800781, -63.90628433227539, -61.7288703918457, -59.55146026611328, -57.374046325683594, -55.196632385253906, -53.01921844482422, -50.84180450439453, -48.66439437866211, -46.48698043823242, -44.309566497802734, -42.13215637207031, -39.954742431640625, -37.77732849121094, -35.59991455078125, -33.42250061035156, -31.24509048461914, -29.067676544189453, -26.890262603759766, -24.71285057067871, -22.535438537597656, -20.35802459716797, -18.18061065673828, -16.003198623657227, -13.825785636901855, -11.648372650146484, -9.470959663391113, -7.293546676635742, -5.116133689880371, -2.938720703125, -0.7613077163696289, 1.4161052703857422, 3.5935182571411133, 5.770931243896484, 7.9483442306518555, 10.125757217407227, 12.303170204162598, 14.480583190917969, 16.657997131347656, 18.83540916442871, 21.012821197509766, 23.190235137939453, 25.36764907836914, 27.545061111450195, 29.72247314453125, 31.899887084960938, 34.077301025390625, 36.25471496582031, 38.432125091552734, 40.60953903198242, 42.78695297241211, 44.96436309814453, 47.14177703857422, 49.319190979003906, 51.496604919433594, 53.67401885986328, 55.8514289855957, 58.02884292602539, 60.20625686645508, 62.3836669921875, 64.56108093261719, 66.73849487304688]}, "gradients/decoder.transformer.h.1.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 3.0, 5.0, 5.0, 12.0, 11.0, 11.0, 17.0, 16.0, 15.0, 25.0, 24.0, 26.0, 29.0, 41.0, 32.0, 44.0, 41.0, 48.0, 35.0, 47.0, 49.0, 40.0, 43.0, 45.0, 40.0, 42.0, 34.0, 38.0, 34.0, 23.0, 17.0, 17.0, 13.0, 14.0, 16.0, 9.0, 10.0, 14.0, 10.0, 5.0, 3.0, 3.0, 1.0, 2.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0], "bins": [-8.78125, -8.51922607421875, -8.2572021484375, -7.99517822265625, -7.733154296875, -7.47113037109375, -7.2091064453125, -6.94708251953125, -6.68505859375, -6.42303466796875, -6.1610107421875, -5.89898681640625, -5.636962890625, -5.37493896484375, -5.1129150390625, -4.85089111328125, -4.5888671875, -4.32684326171875, -4.0648193359375, -3.80279541015625, -3.540771484375, -3.27874755859375, -3.0167236328125, -2.75469970703125, -2.49267578125, -2.23065185546875, -1.9686279296875, -1.70660400390625, -1.444580078125, -1.18255615234375, -0.9205322265625, -0.65850830078125, -0.396484375, -0.13446044921875, 0.1275634765625, 0.38958740234375, 0.651611328125, 0.91363525390625, 1.1756591796875, 1.43768310546875, 1.69970703125, 1.96173095703125, 2.2237548828125, 2.48577880859375, 2.747802734375, 3.00982666015625, 3.2718505859375, 3.53387451171875, 3.7958984375, 4.05792236328125, 4.3199462890625, 4.58197021484375, 4.843994140625, 5.10601806640625, 5.3680419921875, 5.63006591796875, 5.89208984375, 6.15411376953125, 6.4161376953125, 6.67816162109375, 6.940185546875, 7.20220947265625, 7.4642333984375, 7.72625732421875, 7.98828125]}, "gradients/decoder.transformer.h.1.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 5.0, 8.0, 5.0, 9.0, 16.0, 21.0, 32.0, 67.0, 80.0, 123.0, 159.0, 239.0, 326.0, 399.0, 604.0, 847.0, 1153.0, 1796.0, 2639.0, 3725.0, 5374.0, 8077.0, 11589.0, 17239.0, 25922.0, 37887.0, 57266.0, 85429.0, 123137.0, 158701.0, 152920.0, 114445.0, 78417.0, 52081.0, 34787.0, 23217.0, 15657.0, 10606.0, 7266.0, 5064.0, 3365.0, 2354.0, 1754.0, 1034.0, 824.0, 584.0, 384.0, 271.0, 196.0, 141.0, 89.0, 83.0, 58.0, 35.0, 25.0, 13.0, 10.0, 7.0, 5.0, 4.0, 1.0, 2.0, 2.0], "bins": [-0.5673828125, -0.54901123046875, -0.5306396484375, -0.51226806640625, -0.493896484375, -0.47552490234375, -0.4571533203125, -0.43878173828125, -0.42041015625, -0.40203857421875, -0.3836669921875, -0.36529541015625, -0.346923828125, -0.32855224609375, -0.3101806640625, -0.29180908203125, -0.2734375, -0.25506591796875, -0.2366943359375, -0.21832275390625, -0.199951171875, -0.18157958984375, -0.1632080078125, -0.14483642578125, -0.12646484375, -0.10809326171875, -0.0897216796875, -0.07135009765625, -0.052978515625, -0.03460693359375, -0.0162353515625, 0.00213623046875, 0.0205078125, 0.03887939453125, 0.0572509765625, 0.07562255859375, 0.093994140625, 0.11236572265625, 0.1307373046875, 0.14910888671875, 0.16748046875, 0.18585205078125, 0.2042236328125, 0.22259521484375, 0.240966796875, 0.25933837890625, 0.2777099609375, 0.29608154296875, 0.314453125, 0.33282470703125, 0.3511962890625, 0.36956787109375, 0.387939453125, 0.40631103515625, 0.4246826171875, 0.44305419921875, 0.46142578125, 0.47979736328125, 0.4981689453125, 0.51654052734375, 0.534912109375, 0.55328369140625, 0.5716552734375, 0.59002685546875, 0.6083984375]}, "gradients/decoder.transformer.h.1.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 2.0, 1.0, 3.0, 1.0, 1.0, 1.0, 3.0, 3.0, 5.0, 6.0, 5.0, 3.0, 20.0, 11.0, 15.0, 15.0, 19.0, 13.0, 22.0, 29.0, 26.0, 42.0, 30.0, 39.0, 38.0, 31.0, 39.0, 39.0, 1062.0, 41.0, 44.0, 32.0, 35.0, 37.0, 53.0, 33.0, 30.0, 37.0, 32.0, 19.0, 23.0, 18.0, 18.0, 15.0, 4.0, 8.0, 16.0, 5.0, 4.0, 3.0, 7.0, 1.0, 3.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-5.08203125, -4.92010498046875, -4.7581787109375, -4.59625244140625, -4.434326171875, -4.27239990234375, -4.1104736328125, -3.94854736328125, -3.78662109375, -3.62469482421875, -3.4627685546875, -3.30084228515625, -3.138916015625, -2.97698974609375, -2.8150634765625, -2.65313720703125, -2.4912109375, -2.32928466796875, -2.1673583984375, -2.00543212890625, -1.843505859375, -1.68157958984375, -1.5196533203125, -1.35772705078125, -1.19580078125, -1.03387451171875, -0.8719482421875, -0.71002197265625, -0.548095703125, -0.38616943359375, -0.2242431640625, -0.06231689453125, 0.099609375, 0.26153564453125, 0.4234619140625, 0.58538818359375, 0.747314453125, 0.90924072265625, 1.0711669921875, 1.23309326171875, 1.39501953125, 1.55694580078125, 1.7188720703125, 1.88079833984375, 2.042724609375, 2.20465087890625, 2.3665771484375, 2.52850341796875, 2.6904296875, 2.85235595703125, 3.0142822265625, 3.17620849609375, 3.338134765625, 3.50006103515625, 3.6619873046875, 3.82391357421875, 3.98583984375, 4.14776611328125, 4.3096923828125, 4.47161865234375, 4.633544921875, 4.79547119140625, 4.9573974609375, 5.11932373046875, 5.28125]}, "gradients/decoder.transformer.h.1.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 0.0, 1.0, 2.0, 4.0, 2.0, 6.0, 10.0, 12.0, 17.0, 30.0, 40.0, 58.0, 99.0, 149.0, 217.0, 327.0, 492.0, 727.0, 1104.0, 1734.0, 2637.0, 3925.0, 5950.0, 9123.0, 14329.0, 22324.0, 34714.0, 52740.0, 80262.0, 118977.0, 189770.0, 1172307.0, 127635.0, 89885.0, 58566.0, 38151.0, 24603.0, 16120.0, 10553.0, 6733.0, 4364.0, 2902.0, 1909.0, 1269.0, 807.0, 558.0, 338.0, 214.0, 170.0, 91.0, 64.0, 38.0, 31.0, 24.0, 10.0, 10.0, 7.0, 3.0, 2.0, 1.0, 2.0], "bins": [-0.5556640625, -0.5393791198730469, -0.5230941772460938, -0.5068092346191406, -0.4905242919921875, -0.4742393493652344, -0.45795440673828125, -0.4416694641113281, -0.425384521484375, -0.4090995788574219, -0.39281463623046875, -0.3765296936035156, -0.3602447509765625, -0.3439598083496094, -0.32767486572265625, -0.3113899230957031, -0.29510498046875, -0.2788200378417969, -0.26253509521484375, -0.24625015258789062, -0.2299652099609375, -0.21368026733398438, -0.19739532470703125, -0.18111038208007812, -0.164825439453125, -0.14854049682617188, -0.13225555419921875, -0.11597061157226562, -0.0996856689453125, -0.08340072631835938, -0.06711578369140625, -0.050830841064453125, -0.0345458984375, -0.018260955810546875, -0.00197601318359375, 0.014308929443359375, 0.0305938720703125, 0.046878814697265625, 0.06316375732421875, 0.07944869995117188, 0.095733642578125, 0.11201858520507812, 0.12830352783203125, 0.14458847045898438, 0.1608734130859375, 0.17715835571289062, 0.19344329833984375, 0.20972824096679688, 0.22601318359375, 0.24229812622070312, 0.25858306884765625, 0.2748680114746094, 0.2911529541015625, 0.3074378967285156, 0.32372283935546875, 0.3400077819824219, 0.356292724609375, 0.3725776672363281, 0.38886260986328125, 0.4051475524902344, 0.4214324951171875, 0.4377174377441406, 0.45400238037109375, 0.4702873229980469, 0.486572265625]}, "gradients/decoder.transformer.h.1.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 2.0, 2.0, 4.0, 4.0, 1.0, 1.0, 2.0, 6.0, 8.0, 2.0, 11.0, 6.0, 13.0, 14.0, 24.0, 13.0, 38.0, 44.0, 43.0, 55.0, 71.0, 63.0, 83.0, 69.0, 63.0, 71.0, 60.0, 37.0, 31.0, 27.0, 28.0, 22.0, 13.0, 9.0, 11.0, 10.0, 12.0, 7.0, 8.0, 3.0, 4.0, 2.0, 4.0, 3.0, 1.0, 5.0, 0.0, 3.0, 1.0, 2.0, 0.0, 1.0, 2.0], "bins": [-0.005489349365234375, -0.0053269267082214355, -0.005164504051208496, -0.005002081394195557, -0.004839658737182617, -0.004677236080169678, -0.004514813423156738, -0.004352390766143799, -0.004189968109130859, -0.00402754545211792, -0.0038651227951049805, -0.003702700138092041, -0.0035402774810791016, -0.003377854824066162, -0.0032154321670532227, -0.003053009510040283, -0.0028905868530273438, -0.0027281641960144043, -0.002565741539001465, -0.0024033188819885254, -0.002240896224975586, -0.0020784735679626465, -0.001916050910949707, -0.0017536282539367676, -0.0015912055969238281, -0.0014287829399108887, -0.0012663602828979492, -0.0011039376258850098, -0.0009415149688720703, -0.0007790923118591309, -0.0006166696548461914, -0.00045424699783325195, -0.0002918243408203125, -0.00012940168380737305, 3.3020973205566406e-05, 0.00019544363021850586, 0.0003578662872314453, 0.0005202889442443848, 0.0006827116012573242, 0.0008451342582702637, 0.0010075569152832031, 0.0011699795722961426, 0.001332402229309082, 0.0014948248863220215, 0.001657247543334961, 0.0018196702003479004, 0.00198209285736084, 0.0021445155143737793, 0.0023069381713867188, 0.002469360828399658, 0.0026317834854125977, 0.002794206142425537, 0.0029566287994384766, 0.003119051456451416, 0.0032814741134643555, 0.003443896770477295, 0.0036063194274902344, 0.003768742084503174, 0.003931164741516113, 0.004093587398529053, 0.004256010055541992, 0.004418432712554932, 0.004580855369567871, 0.0047432780265808105, 0.00490570068359375]}, "gradients/decoder.transformer.h.1.crossattention.q_attn.weight": {"_type": "histogram", "values": [2.0, 2.0, 1.0, 2.0, 2.0, 5.0, 3.0, 4.0, 5.0, 5.0, 5.0, 10.0, 7.0, 10.0, 10.0, 18.0, 16.0, 24.0, 31.0, 35.0, 44.0, 55.0, 71.0, 82.0, 131.0, 194.0, 294.0, 526.0, 920.0, 16600.0, 1023462.0, 3932.0, 752.0, 430.0, 252.0, 178.0, 124.0, 83.0, 53.0, 36.0, 42.0, 18.0, 17.0, 10.0, 18.0, 10.0, 7.0, 4.0, 8.0, 6.0, 4.0, 7.0, 2.0, 1.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.09210205078125, -0.08906936645507812, -0.08603668212890625, -0.08300399780273438, -0.0799713134765625, -0.07693862915039062, -0.07390594482421875, -0.07087326049804688, -0.067840576171875, -0.06480789184570312, -0.06177520751953125, -0.058742523193359375, -0.0557098388671875, -0.052677154541015625, -0.04964447021484375, -0.046611785888671875, -0.0435791015625, -0.040546417236328125, -0.03751373291015625, -0.034481048583984375, -0.0314483642578125, -0.028415679931640625, -0.02538299560546875, -0.022350311279296875, -0.019317626953125, -0.016284942626953125, -0.01325225830078125, -0.010219573974609375, -0.0071868896484375, -0.004154205322265625, -0.00112152099609375, 0.001911163330078125, 0.00494384765625, 0.007976531982421875, 0.01100921630859375, 0.014041900634765625, 0.0170745849609375, 0.020107269287109375, 0.02313995361328125, 0.026172637939453125, 0.029205322265625, 0.032238006591796875, 0.03527069091796875, 0.038303375244140625, 0.0413360595703125, 0.044368743896484375, 0.04740142822265625, 0.050434112548828125, 0.053466796875, 0.056499481201171875, 0.05953216552734375, 0.06256484985351562, 0.0655975341796875, 0.06863021850585938, 0.07166290283203125, 0.07469558715820312, 0.077728271484375, 0.08076095581054688, 0.08379364013671875, 0.08682632446289062, 0.0898590087890625, 0.09289169311523438, 0.09592437744140625, 0.09895706176757812, 0.10198974609375]}, "gradients/decoder.transformer.h.1.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 4.0, 15.0, 118.0, 409.0, 372.0, 76.0, 15.0, 5.0, 0.0, 1.0, 1.0, 1.0], "bins": [-0.018808776512742043, -0.018472231924533844, -0.018135687336325645, -0.017799142748117447, -0.01746259815990925, -0.01712605357170105, -0.01678950898349285, -0.016452964395284653, -0.016116419807076454, -0.015779875218868256, -0.015443330630660057, -0.015106786042451859, -0.01477024145424366, -0.014433696866035461, -0.014097152277827263, -0.013760607689619064, -0.013424063101410866, -0.013087518513202667, -0.012750973924994469, -0.01241442933678627, -0.012077884748578072, -0.011741340160369873, -0.011404795572161674, -0.011068250983953476, -0.010731706395745277, -0.010395161807537079, -0.01005861721932888, -0.009722072631120682, -0.009385528042912483, -0.009048983454704285, -0.008712438866496086, -0.008375894278287888, -0.008039349690079689, -0.0077028051018714905, -0.007366260513663292, -0.007029715925455093, -0.006693171337246895, -0.006356626749038696, -0.006020082160830498, -0.005683537572622299, -0.005346993450075388, -0.005010448861867189, -0.004673904273658991, -0.004337359685450792, -0.004000815097242594, -0.0036642705090343952, -0.0033277259208261967, -0.002991181332617998, -0.0026546367444097996, -0.002318092156201601, -0.0019815475679934025, -0.001645002979785204, -0.0013084583915770054, -0.0009719138033688068, -0.0006353692151606083, -0.00029882462695240974, 3.77199612557888e-05, 0.00037426454946398735, 0.0007108091376721859, 0.0010473537258803844, 0.001383898314088583, 0.0017204429022967815, 0.00205698749050498, 0.0023935320787131786, 0.002730076666921377]}, "gradients/decoder.transformer.h.1.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 2.0, 2.0, 4.0, 2.0, 2.0, 3.0, 7.0, 10.0, 15.0, 8.0, 14.0, 19.0, 22.0, 11.0, 29.0, 17.0, 20.0, 33.0, 35.0, 28.0, 48.0, 37.0, 41.0, 47.0, 44.0, 60.0, 51.0, 43.0, 38.0, 28.0, 46.0, 39.0, 25.0, 25.0, 22.0, 22.0, 23.0, 21.0, 20.0, 7.0, 13.0, 5.0, 13.0, 1.0, 5.0, 4.0, 2.0, 3.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0031191110610961914, -0.003028430975973606, -0.002937750890851021, -0.0028470708057284355, -0.0027563907206058502, -0.002665710635483265, -0.0025750305503606796, -0.0024843504652380943, -0.002393670380115509, -0.0023029902949929237, -0.0022123102098703384, -0.002121630124747753, -0.002030950039625168, -0.0019402699545025826, -0.0018495898693799973, -0.001758909784257412, -0.0016682296991348267, -0.0015775496140122414, -0.001486869528889656, -0.0013961894437670708, -0.0013055093586444855, -0.0012148292735219002, -0.0011241491883993149, -0.0010334691032767296, -0.0009427890181541443, -0.000852108933031559, -0.0007614288479089737, -0.0006707487627863884, -0.0005800686776638031, -0.0004893885925412178, -0.0003987085074186325, -0.0003080284222960472, -0.00021734833717346191, -0.00012666825205087662, -3.598816692829132e-05, 5.4691918194293976e-05, 0.00014537200331687927, 0.00023605208843946457, 0.00032673217356204987, 0.00041741225868463516, 0.0005080923438072205, 0.0005987724289298058, 0.000689452514052391, 0.0007801325991749763, 0.0008708126842975616, 0.0009614927694201469, 0.0010521728545427322, 0.0011428529396653175, 0.0012335330247879028, 0.0013242131099104881, 0.0014148931950330734, 0.0015055732801556587, 0.001596253365278244, 0.0016869334504008293, 0.0017776135355234146, 0.001868293620646, 0.001958973705768585, 0.0020496537908911705, 0.002140333876013756, 0.002231013961136341, 0.0023216940462589264, 0.0024123741313815117, 0.002503054216504097, 0.0025937343016266823, 0.0026844143867492676]}, "gradients/decoder.transformer.h.1.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 3.0, 5.0, 5.0, 12.0, 11.0, 11.0, 17.0, 16.0, 15.0, 25.0, 24.0, 25.0, 30.0, 40.0, 33.0, 44.0, 41.0, 47.0, 36.0, 46.0, 50.0, 40.0, 43.0, 44.0, 41.0, 42.0, 34.0, 38.0, 34.0, 23.0, 17.0, 17.0, 13.0, 14.0, 15.0, 10.0, 10.0, 14.0, 10.0, 5.0, 3.0, 3.0, 1.0, 2.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0], "bins": [-8.78125, -8.519287109375, -8.25732421875, -7.995361328125, -7.7333984375, -7.471435546875, -7.20947265625, -6.947509765625, -6.685546875, -6.423583984375, -6.16162109375, -5.899658203125, -5.6376953125, -5.375732421875, -5.11376953125, -4.851806640625, -4.58984375, -4.327880859375, -4.06591796875, -3.803955078125, -3.5419921875, -3.280029296875, -3.01806640625, -2.756103515625, -2.494140625, -2.232177734375, -1.97021484375, -1.708251953125, -1.4462890625, -1.184326171875, -0.92236328125, -0.660400390625, -0.3984375, -0.136474609375, 0.12548828125, 0.387451171875, 0.6494140625, 0.911376953125, 1.17333984375, 1.435302734375, 1.697265625, 1.959228515625, 2.22119140625, 2.483154296875, 2.7451171875, 3.007080078125, 3.26904296875, 3.531005859375, 3.79296875, 4.054931640625, 4.31689453125, 4.578857421875, 4.8408203125, 5.102783203125, 5.36474609375, 5.626708984375, 5.888671875, 6.150634765625, 6.41259765625, 6.674560546875, 6.9365234375, 7.198486328125, 7.46044921875, 7.722412109375, 7.984375]}, "gradients/decoder.transformer.h.1.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 3.0, 1.0, 1.0, 1.0, 3.0, 1.0, 4.0, 6.0, 4.0, 13.0, 29.0, 26.0, 44.0, 58.0, 85.0, 112.0, 138.0, 168.0, 256.0, 405.0, 590.0, 976.0, 1793.0, 3814.0, 10469.0, 43394.0, 282742.0, 580630.0, 92381.0, 18698.0, 5715.0, 2413.0, 1256.0, 778.0, 520.0, 310.0, 191.0, 130.0, 104.0, 74.0, 60.0, 46.0, 29.0, 26.0, 13.0, 12.0, 12.0, 8.0, 8.0, 6.0, 3.0, 5.0, 2.0, 1.0, 2.0, 2.0, 0.0, 0.0, 2.0], "bins": [-15.4609375, -14.9810791015625, -14.501220703125, -14.0213623046875, -13.54150390625, -13.0616455078125, -12.581787109375, -12.1019287109375, -11.6220703125, -11.1422119140625, -10.662353515625, -10.1824951171875, -9.70263671875, -9.2227783203125, -8.742919921875, -8.2630615234375, -7.783203125, -7.3033447265625, -6.823486328125, -6.3436279296875, -5.86376953125, -5.3839111328125, -4.904052734375, -4.4241943359375, -3.9443359375, -3.4644775390625, -2.984619140625, -2.5047607421875, -2.02490234375, -1.5450439453125, -1.065185546875, -0.5853271484375, -0.10546875, 0.3743896484375, 0.854248046875, 1.3341064453125, 1.81396484375, 2.2938232421875, 2.773681640625, 3.2535400390625, 3.7333984375, 4.2132568359375, 4.693115234375, 5.1729736328125, 5.65283203125, 6.1326904296875, 6.612548828125, 7.0924072265625, 7.572265625, 8.0521240234375, 8.531982421875, 9.0118408203125, 9.49169921875, 9.9715576171875, 10.451416015625, 10.9312744140625, 11.4111328125, 11.8909912109375, 12.370849609375, 12.8507080078125, 13.33056640625, 13.8104248046875, 14.290283203125, 14.7701416015625, 15.25]}, "gradients/decoder.transformer.h.1.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 3.0, 1.0, 0.0, 1.0, 3.0, 4.0, 2.0, 7.0, 11.0, 8.0, 7.0, 21.0, 16.0, 29.0, 31.0, 28.0, 38.0, 44.0, 61.0, 91.0, 177.0, 1569.0, 366.0, 142.0, 94.0, 70.0, 45.0, 39.0, 32.0, 28.0, 23.0, 14.0, 27.0, 7.0, 4.0, 6.0, 4.0, 4.0, 2.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-27.875, -26.9716796875, -26.068359375, -25.1650390625, -24.26171875, -23.3583984375, -22.455078125, -21.5517578125, -20.6484375, -19.7451171875, -18.841796875, -17.9384765625, -17.03515625, -16.1318359375, -15.228515625, -14.3251953125, -13.421875, -12.5185546875, -11.615234375, -10.7119140625, -9.80859375, -8.9052734375, -8.001953125, -7.0986328125, -6.1953125, -5.2919921875, -4.388671875, -3.4853515625, -2.58203125, -1.6787109375, -0.775390625, 0.1279296875, 1.03125, 1.9345703125, 2.837890625, 3.7412109375, 4.64453125, 5.5478515625, 6.451171875, 7.3544921875, 8.2578125, 9.1611328125, 10.064453125, 10.9677734375, 11.87109375, 12.7744140625, 13.677734375, 14.5810546875, 15.484375, 16.3876953125, 17.291015625, 18.1943359375, 19.09765625, 20.0009765625, 20.904296875, 21.8076171875, 22.7109375, 23.6142578125, 24.517578125, 25.4208984375, 26.32421875, 27.2275390625, 28.130859375, 29.0341796875, 29.9375]}, "gradients/decoder.transformer.h.1.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 3.0, 3.0, 3.0, 1.0, 3.0, 3.0, 15.0, 15.0, 19.0, 24.0, 35.0, 37.0, 73.0, 166.0, 285.0, 650.0, 2640.0, 3132158.0, 7973.0, 809.0, 351.0, 177.0, 94.0, 47.0, 24.0, 27.0, 22.0, 16.0, 10.0, 10.0, 5.0, 5.0, 4.0, 5.0, 3.0, 0.0, 0.0, 1.0, 2.0, 0.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-90.5, -87.8095703125, -85.119140625, -82.4287109375, -79.73828125, -77.0478515625, -74.357421875, -71.6669921875, -68.9765625, -66.2861328125, -63.595703125, -60.9052734375, -58.21484375, -55.5244140625, -52.833984375, -50.1435546875, -47.453125, -44.7626953125, -42.072265625, -39.3818359375, -36.69140625, -34.0009765625, -31.310546875, -28.6201171875, -25.9296875, -23.2392578125, -20.548828125, -17.8583984375, -15.16796875, -12.4775390625, -9.787109375, -7.0966796875, -4.40625, -1.7158203125, 0.974609375, 3.6650390625, 6.35546875, 9.0458984375, 11.736328125, 14.4267578125, 17.1171875, 19.8076171875, 22.498046875, 25.1884765625, 27.87890625, 30.5693359375, 33.259765625, 35.9501953125, 38.640625, 41.3310546875, 44.021484375, 46.7119140625, 49.40234375, 52.0927734375, 54.783203125, 57.4736328125, 60.1640625, 62.8544921875, 65.544921875, 68.2353515625, 70.92578125, 73.6162109375, 76.306640625, 78.9970703125, 81.6875]}, "gradients/decoder.transformer.h.1.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 2.0, 172.0, 827.0, 12.0, 2.0], "bins": [-472.2624206542969, -464.5577087402344, -456.8529968261719, -449.14825439453125, -441.44354248046875, -433.73883056640625, -426.03411865234375, -418.32940673828125, -410.6246643066406, -402.9199523925781, -395.2152404785156, -387.510498046875, -379.8057861328125, -372.10107421875, -364.3963623046875, -356.691650390625, -348.9869384765625, -341.2822265625, -333.5775146484375, -325.8727722167969, -318.1680603027344, -310.4633483886719, -302.7586364746094, -295.0539245605469, -287.34918212890625, -279.64447021484375, -271.93975830078125, -264.2350158691406, -256.5303039550781, -248.82559204101562, -241.12088012695312, -233.41615295410156, -225.71142578125, -218.0067138671875, -210.30198669433594, -202.59727478027344, -194.89254760742188, -187.18783569335938, -179.48312377929688, -171.7783966064453, -164.0736846923828, -156.3689727783203, -148.66424560546875, -140.95953369140625, -133.2548065185547, -125.55009460449219, -117.84537506103516, -110.14065551757812, -102.43594360351562, -94.7312240600586, -87.02650451660156, -79.32179260253906, -71.61707305908203, -63.912353515625, -56.20763397216797, -48.5029182434082, -40.79819869995117, -33.09347915649414, -25.388763427734375, -17.684043884277344, -9.979326248168945, -2.274608612060547, 5.430110931396484, 13.13482666015625, 20.83954620361328]}, "gradients/decoder.transformer.h.1.ln_1.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 3.0, 2.0, 2.0, 5.0, 6.0, 4.0, 4.0, 12.0, 5.0, 9.0, 9.0, 12.0, 16.0, 13.0, 22.0, 24.0, 21.0, 23.0, 34.0, 35.0, 34.0, 44.0, 37.0, 44.0, 45.0, 35.0, 52.0, 32.0, 39.0, 44.0, 30.0, 28.0, 36.0, 30.0, 22.0, 26.0, 28.0, 25.0, 23.0, 15.0, 13.0, 13.0, 7.0, 11.0, 8.0, 6.0, 7.0, 8.0, 4.0, 3.0, 1.0, 1.0, 4.0, 2.0, 1.0, 0.0, 2.0], "bins": [-51.977237701416016, -50.453521728515625, -48.929805755615234, -47.406089782714844, -45.88237380981445, -44.35865783691406, -42.83494186401367, -41.31122589111328, -39.78750991821289, -38.2637939453125, -36.74007797241211, -35.21636199951172, -33.69264602661133, -32.16893005371094, -30.645214080810547, -29.121498107910156, -27.597782135009766, -26.074066162109375, -24.550350189208984, -23.026634216308594, -21.502918243408203, -19.979202270507812, -18.455486297607422, -16.93177032470703, -15.40805435180664, -13.88433837890625, -12.36062240600586, -10.836906433105469, -9.313190460205078, -7.7894744873046875, -6.265758514404297, -4.742042541503906, -3.2183303833007812, -1.6946144104003906, -0.1708984375, 1.3528175354003906, 2.8765335083007812, 4.400249481201172, 5.9239654541015625, 7.447681427001953, 8.971397399902344, 10.495113372802734, 12.018829345703125, 13.542545318603516, 15.066261291503906, 16.589977264404297, 18.113693237304688, 19.637409210205078, 21.16112518310547, 22.68484115600586, 24.20855712890625, 25.73227310180664, 27.25598907470703, 28.779705047607422, 30.303421020507812, 31.827136993408203, 33.350852966308594, 34.874568939208984, 36.398284912109375, 37.922000885009766, 39.445716857910156, 40.96943283081055, 42.49314880371094, 44.01686477661133, 45.54058074951172]}, "gradients/decoder.transformer.h.0.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 2.0, 1.0, 3.0, 6.0, 3.0, 3.0, 6.0, 9.0, 8.0, 16.0, 14.0, 19.0, 26.0, 27.0, 23.0, 37.0, 36.0, 30.0, 34.0, 43.0, 41.0, 52.0, 42.0, 60.0, 40.0, 42.0, 53.0, 50.0, 44.0, 28.0, 27.0, 23.0, 27.0, 26.0, 16.0, 18.0, 18.0, 12.0, 12.0, 11.0, 5.0, 6.0, 4.0, 3.0, 2.0, 3.0, 1.0, 1.0, 0.0, 1.0, 2.0, 1.0, 1.0, 1.0], "bins": [-10.46875, -10.15576171875, -9.8427734375, -9.52978515625, -9.216796875, -8.90380859375, -8.5908203125, -8.27783203125, -7.96484375, -7.65185546875, -7.3388671875, -7.02587890625, -6.712890625, -6.39990234375, -6.0869140625, -5.77392578125, -5.4609375, -5.14794921875, -4.8349609375, -4.52197265625, -4.208984375, -3.89599609375, -3.5830078125, -3.27001953125, -2.95703125, -2.64404296875, -2.3310546875, -2.01806640625, -1.705078125, -1.39208984375, -1.0791015625, -0.76611328125, -0.453125, -0.14013671875, 0.1728515625, 0.48583984375, 0.798828125, 1.11181640625, 1.4248046875, 1.73779296875, 2.05078125, 2.36376953125, 2.6767578125, 2.98974609375, 3.302734375, 3.61572265625, 3.9287109375, 4.24169921875, 4.5546875, 4.86767578125, 5.1806640625, 5.49365234375, 5.806640625, 6.11962890625, 6.4326171875, 6.74560546875, 7.05859375, 7.37158203125, 7.6845703125, 7.99755859375, 8.310546875, 8.62353515625, 8.9365234375, 9.24951171875, 9.5625]}, "gradients/decoder.transformer.h.0.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 2.0, 2.0, 9.0, 9.0, 11.0, 16.0, 23.0, 25.0, 19.0, 50.0, 55.0, 69.0, 69.0, 93.0, 104.0, 180.0, 215.0, 305.0, 441.0, 704.0, 1445.0, 4976.0, 145922.0, 3891797.0, 139015.0, 4895.0, 1451.0, 728.0, 455.0, 260.0, 219.0, 161.0, 109.0, 91.0, 76.0, 73.0, 39.0, 44.0, 28.0, 22.0, 18.0, 21.0, 10.0, 6.0, 10.0, 2.0, 3.0, 5.0, 5.0, 3.0, 3.0, 3.0, 2.0, 1.0, 0.0, 0.0, 1.0], "bins": [-53.25, -51.50390625, -49.7578125, -48.01171875, -46.265625, -44.51953125, -42.7734375, -41.02734375, -39.28125, -37.53515625, -35.7890625, -34.04296875, -32.296875, -30.55078125, -28.8046875, -27.05859375, -25.3125, -23.56640625, -21.8203125, -20.07421875, -18.328125, -16.58203125, -14.8359375, -13.08984375, -11.34375, -9.59765625, -7.8515625, -6.10546875, -4.359375, -2.61328125, -0.8671875, 0.87890625, 2.625, 4.37109375, 6.1171875, 7.86328125, 9.609375, 11.35546875, 13.1015625, 14.84765625, 16.59375, 18.33984375, 20.0859375, 21.83203125, 23.578125, 25.32421875, 27.0703125, 28.81640625, 30.5625, 32.30859375, 34.0546875, 35.80078125, 37.546875, 39.29296875, 41.0390625, 42.78515625, 44.53125, 46.27734375, 48.0234375, 49.76953125, 51.515625, 53.26171875, 55.0078125, 56.75390625, 58.5]}, "gradients/decoder.transformer.h.0.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 2.0, 0.0, 2.0, 2.0, 5.0, 4.0, 8.0, 8.0, 7.0, 5.0, 23.0, 36.0, 46.0, 75.0, 95.0, 164.0, 249.0, 487.0, 687.0, 715.0, 507.0, 364.0, 227.0, 152.0, 68.0, 52.0, 28.0, 17.0, 16.0, 9.0, 7.0, 7.0, 3.0, 2.0, 2.0, 1.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-18.734375, -18.0927734375, -17.451171875, -16.8095703125, -16.16796875, -15.5263671875, -14.884765625, -14.2431640625, -13.6015625, -12.9599609375, -12.318359375, -11.6767578125, -11.03515625, -10.3935546875, -9.751953125, -9.1103515625, -8.46875, -7.8271484375, -7.185546875, -6.5439453125, -5.90234375, -5.2607421875, -4.619140625, -3.9775390625, -3.3359375, -2.6943359375, -2.052734375, -1.4111328125, -0.76953125, -0.1279296875, 0.513671875, 1.1552734375, 1.796875, 2.4384765625, 3.080078125, 3.7216796875, 4.36328125, 5.0048828125, 5.646484375, 6.2880859375, 6.9296875, 7.5712890625, 8.212890625, 8.8544921875, 9.49609375, 10.1376953125, 10.779296875, 11.4208984375, 12.0625, 12.7041015625, 13.345703125, 13.9873046875, 14.62890625, 15.2705078125, 15.912109375, 16.5537109375, 17.1953125, 17.8369140625, 18.478515625, 19.1201171875, 19.76171875, 20.4033203125, 21.044921875, 21.6865234375, 22.328125]}, "gradients/decoder.transformer.h.0.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 4.0, 0.0, 2.0, 1.0, 1.0, 3.0, 4.0, 7.0, 8.0, 14.0, 13.0, 47.0, 70.0, 144.0, 274.0, 580.0, 1805.0, 6456.0, 39857.0, 693182.0, 3256620.0, 173370.0, 16250.0, 3604.0, 1084.0, 452.0, 192.0, 110.0, 49.0, 30.0, 23.0, 12.0, 11.0, 9.0, 4.0, 3.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-25.3125, -24.50341796875, -23.6943359375, -22.88525390625, -22.076171875, -21.26708984375, -20.4580078125, -19.64892578125, -18.83984375, -18.03076171875, -17.2216796875, -16.41259765625, -15.603515625, -14.79443359375, -13.9853515625, -13.17626953125, -12.3671875, -11.55810546875, -10.7490234375, -9.93994140625, -9.130859375, -8.32177734375, -7.5126953125, -6.70361328125, -5.89453125, -5.08544921875, -4.2763671875, -3.46728515625, -2.658203125, -1.84912109375, -1.0400390625, -0.23095703125, 0.578125, 1.38720703125, 2.1962890625, 3.00537109375, 3.814453125, 4.62353515625, 5.4326171875, 6.24169921875, 7.05078125, 7.85986328125, 8.6689453125, 9.47802734375, 10.287109375, 11.09619140625, 11.9052734375, 12.71435546875, 13.5234375, 14.33251953125, 15.1416015625, 15.95068359375, 16.759765625, 17.56884765625, 18.3779296875, 19.18701171875, 19.99609375, 20.80517578125, 21.6142578125, 22.42333984375, 23.232421875, 24.04150390625, 24.8505859375, 25.65966796875, 26.46875]}, "gradients/decoder.transformer.h.0.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 4.0, 5.0, 10.0, 10.0, 10.0, 18.0, 32.0, 41.0, 55.0, 68.0, 97.0, 118.0, 106.0, 123.0, 89.0, 76.0, 51.0, 40.0, 19.0, 15.0, 9.0, 7.0, 1.0, 2.0, 0.0, 4.0, 1.0, 2.0, 2.0], "bins": [-169.18980407714844, -165.64413452148438, -162.09848022460938, -158.5528106689453, -155.00714111328125, -151.4614715576172, -147.91580200195312, -144.37014770507812, -140.82447814941406, -137.27880859375, -133.733154296875, -130.18748474121094, -126.64181518554688, -123.09614562988281, -119.55048370361328, -116.00482177734375, -112.45915222167969, -108.91348266601562, -105.3678207397461, -101.82215881347656, -98.2764892578125, -94.73081970214844, -91.1851577758789, -87.63949584960938, -84.09382629394531, -80.54815673828125, -77.00249481201172, -73.45683288574219, -69.91116333007812, -66.36549377441406, -62.81983184814453, -59.274166107177734, -55.72850799560547, -52.18284225463867, -48.637176513671875, -45.09151077270508, -41.54584503173828, -38.000179290771484, -34.45451354980469, -30.90884780883789, -27.363182067871094, -23.817516326904297, -20.2718505859375, -16.726184844970703, -13.180519104003906, -9.63485336303711, -6.0891876220703125, -2.5435218811035156, 1.0021438598632812, 4.547809600830078, 8.093475341796875, 11.639141082763672, 15.184806823730469, 18.730472564697266, 22.276138305664062, 25.82180404663086, 29.367469787597656, 32.91313552856445, 36.45880126953125, 40.00446701049805, 43.550132751464844, 47.09579849243164, 50.64146423339844, 54.187129974365234, 57.73279571533203]}, "gradients/decoder.transformer.h.0.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 2.0, 5.0, 4.0, 3.0, 0.0, 6.0, 8.0, 6.0, 13.0, 16.0, 19.0, 16.0, 27.0, 33.0, 31.0, 34.0, 37.0, 44.0, 47.0, 46.0, 43.0, 48.0, 44.0, 30.0, 53.0, 47.0, 38.0, 31.0, 40.0, 27.0, 35.0, 23.0, 26.0, 21.0, 23.0, 15.0, 13.0, 12.0, 4.0, 8.0, 9.0, 4.0, 15.0, 5.0, 0.0, 1.0, 3.0, 2.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0], "bins": [-62.480873107910156, -60.58003234863281, -58.67919158935547, -56.77835464477539, -54.87751388549805, -52.9766731262207, -51.075836181640625, -49.17499542236328, -47.27415466308594, -45.373313903808594, -43.47247314453125, -41.57163619995117, -39.67079544067383, -37.769954681396484, -35.869117736816406, -33.96827697753906, -32.06743621826172, -30.166595458984375, -28.265756607055664, -26.364917755126953, -24.46407699584961, -22.563236236572266, -20.662397384643555, -18.761558532714844, -16.8607177734375, -14.959877967834473, -13.059038162231445, -11.158198356628418, -9.25735855102539, -7.356518745422363, -5.455678939819336, -3.5548391342163086, -1.6539993286132812, 0.2468404769897461, 2.1476802825927734, 4.048520088195801, 5.949359893798828, 7.8501996994018555, 9.751039505004883, 11.65187931060791, 13.552719116210938, 15.453558921813965, 17.354398727416992, 19.255237579345703, 21.156078338623047, 23.05691909790039, 24.9577579498291, 26.858596801757812, 28.759437561035156, 30.6602783203125, 32.561119079589844, 34.46195602416992, 36.362796783447266, 38.26363754272461, 40.16447448730469, 42.06531524658203, 43.966156005859375, 45.86699676513672, 47.76783752441406, 49.66867446899414, 51.569515228271484, 53.47035598754883, 55.371192932128906, 57.27203369140625, 59.172874450683594]}, "gradients/decoder.transformer.h.0.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 2.0, 2.0, 4.0, 4.0, 6.0, 8.0, 10.0, 10.0, 11.0, 24.0, 24.0, 18.0, 25.0, 33.0, 24.0, 35.0, 42.0, 54.0, 41.0, 44.0, 59.0, 53.0, 36.0, 43.0, 41.0, 47.0, 41.0, 39.0, 27.0, 27.0, 27.0, 24.0, 21.0, 18.0, 15.0, 11.0, 9.0, 16.0, 8.0, 1.0, 8.0, 8.0, 6.0, 4.0, 2.0, 1.0, 0.0, 1.0, 0.0, 2.0, 1.0, 0.0, 1.0], "bins": [-108.6875, -105.3935546875, -102.099609375, -98.8056640625, -95.51171875, -92.2177734375, -88.923828125, -85.6298828125, -82.3359375, -79.0419921875, -75.748046875, -72.4541015625, -69.16015625, -65.8662109375, -62.572265625, -59.2783203125, -55.984375, -52.6904296875, -49.396484375, -46.1025390625, -42.80859375, -39.5146484375, -36.220703125, -32.9267578125, -29.6328125, -26.3388671875, -23.044921875, -19.7509765625, -16.45703125, -13.1630859375, -9.869140625, -6.5751953125, -3.28125, 0.0126953125, 3.306640625, 6.6005859375, 9.89453125, 13.1884765625, 16.482421875, 19.7763671875, 23.0703125, 26.3642578125, 29.658203125, 32.9521484375, 36.24609375, 39.5400390625, 42.833984375, 46.1279296875, 49.421875, 52.7158203125, 56.009765625, 59.3037109375, 62.59765625, 65.8916015625, 69.185546875, 72.4794921875, 75.7734375, 79.0673828125, 82.361328125, 85.6552734375, 88.94921875, 92.2431640625, 95.537109375, 98.8310546875, 102.125]}, "gradients/decoder.transformer.h.0.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 2.0, 1.0, 1.0, 4.0, 10.0, 13.0, 29.0, 24.0, 49.0, 75.0, 93.0, 145.0, 221.0, 331.0, 464.0, 710.0, 1087.0, 1617.0, 2432.0, 3586.0, 5402.0, 8233.0, 12596.0, 19356.0, 28994.0, 45470.0, 69884.0, 106764.0, 148354.0, 167261.0, 140708.0, 98707.0, 64539.0, 42018.0, 27372.0, 17581.0, 11690.0, 7690.0, 4993.0, 3358.0, 2223.0, 1507.0, 1004.0, 667.0, 427.0, 289.0, 176.0, 121.0, 95.0, 62.0, 46.0, 33.0, 27.0, 8.0, 9.0, 6.0, 3.0, 2.0, 2.0, 1.0, 1.0], "bins": [-7.30078125, -7.07550048828125, -6.8502197265625, -6.62493896484375, -6.399658203125, -6.17437744140625, -5.9490966796875, -5.72381591796875, -5.49853515625, -5.27325439453125, -5.0479736328125, -4.82269287109375, -4.597412109375, -4.37213134765625, -4.1468505859375, -3.92156982421875, -3.6962890625, -3.47100830078125, -3.2457275390625, -3.02044677734375, -2.795166015625, -2.56988525390625, -2.3446044921875, -2.11932373046875, -1.89404296875, -1.66876220703125, -1.4434814453125, -1.21820068359375, -0.992919921875, -0.76763916015625, -0.5423583984375, -0.31707763671875, -0.091796875, 0.13348388671875, 0.3587646484375, 0.58404541015625, 0.809326171875, 1.03460693359375, 1.2598876953125, 1.48516845703125, 1.71044921875, 1.93572998046875, 2.1610107421875, 2.38629150390625, 2.611572265625, 2.83685302734375, 3.0621337890625, 3.28741455078125, 3.5126953125, 3.73797607421875, 3.9632568359375, 4.18853759765625, 4.413818359375, 4.63909912109375, 4.8643798828125, 5.08966064453125, 5.31494140625, 5.54022216796875, 5.7655029296875, 5.99078369140625, 6.216064453125, 6.44134521484375, 6.6666259765625, 6.89190673828125, 7.1171875]}, "gradients/decoder.transformer.h.0.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 2.0, 0.0, 2.0, 1.0, 2.0, 4.0, 5.0, 3.0, 9.0, 5.0, 8.0, 8.0, 8.0, 9.0, 13.0, 20.0, 14.0, 14.0, 25.0, 27.0, 34.0, 33.0, 37.0, 42.0, 43.0, 41.0, 39.0, 37.0, 32.0, 1057.0, 43.0, 27.0, 47.0, 42.0, 30.0, 36.0, 30.0, 31.0, 27.0, 27.0, 17.0, 22.0, 10.0, 12.0, 9.0, 9.0, 8.0, 12.0, 8.0, 4.0, 6.0, 6.0, 1.0, 4.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0], "bins": [-58.375, -56.5576171875, -54.740234375, -52.9228515625, -51.10546875, -49.2880859375, -47.470703125, -45.6533203125, -43.8359375, -42.0185546875, -40.201171875, -38.3837890625, -36.56640625, -34.7490234375, -32.931640625, -31.1142578125, -29.296875, -27.4794921875, -25.662109375, -23.8447265625, -22.02734375, -20.2099609375, -18.392578125, -16.5751953125, -14.7578125, -12.9404296875, -11.123046875, -9.3056640625, -7.48828125, -5.6708984375, -3.853515625, -2.0361328125, -0.21875, 1.5986328125, 3.416015625, 5.2333984375, 7.05078125, 8.8681640625, 10.685546875, 12.5029296875, 14.3203125, 16.1376953125, 17.955078125, 19.7724609375, 21.58984375, 23.4072265625, 25.224609375, 27.0419921875, 28.859375, 30.6767578125, 32.494140625, 34.3115234375, 36.12890625, 37.9462890625, 39.763671875, 41.5810546875, 43.3984375, 45.2158203125, 47.033203125, 48.8505859375, 50.66796875, 52.4853515625, 54.302734375, 56.1201171875, 57.9375]}, "gradients/decoder.transformer.h.0.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 2.0, 3.0, 5.0, 10.0, 4.0, 12.0, 16.0, 23.0, 32.0, 78.0, 93.0, 164.0, 220.0, 347.0, 590.0, 950.0, 1406.0, 2388.0, 3623.0, 6158.0, 9821.0, 16320.0, 26628.0, 44435.0, 74160.0, 120135.0, 179429.0, 1233862.0, 144637.0, 90795.0, 55542.0, 33109.0, 20108.0, 12451.0, 7475.0, 4479.0, 2795.0, 1726.0, 1057.0, 744.0, 458.0, 291.0, 199.0, 134.0, 81.0, 45.0, 30.0, 30.0, 16.0, 12.0, 4.0, 4.0, 4.0, 3.0, 1.0, 1.0, 0.0, 2.0], "bins": [-7.9140625, -7.67578125, -7.4375, -7.19921875, -6.9609375, -6.72265625, -6.484375, -6.24609375, -6.0078125, -5.76953125, -5.53125, -5.29296875, -5.0546875, -4.81640625, -4.578125, -4.33984375, -4.1015625, -3.86328125, -3.625, -3.38671875, -3.1484375, -2.91015625, -2.671875, -2.43359375, -2.1953125, -1.95703125, -1.71875, -1.48046875, -1.2421875, -1.00390625, -0.765625, -0.52734375, -0.2890625, -0.05078125, 0.1875, 0.42578125, 0.6640625, 0.90234375, 1.140625, 1.37890625, 1.6171875, 1.85546875, 2.09375, 2.33203125, 2.5703125, 2.80859375, 3.046875, 3.28515625, 3.5234375, 3.76171875, 4.0, 4.23828125, 4.4765625, 4.71484375, 4.953125, 5.19140625, 5.4296875, 5.66796875, 5.90625, 6.14453125, 6.3828125, 6.62109375, 6.859375, 7.09765625, 7.3359375]}, "gradients/decoder.transformer.h.0.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 3.0, 2.0, 2.0, 4.0, 4.0, 3.0, 5.0, 4.0, 11.0, 9.0, 13.0, 11.0, 16.0, 14.0, 26.0, 19.0, 19.0, 24.0, 22.0, 39.0, 32.0, 36.0, 31.0, 42.0, 43.0, 36.0, 49.0, 37.0, 33.0, 36.0, 44.0, 40.0, 38.0, 31.0, 35.0, 24.0, 16.0, 23.0, 19.0, 21.0, 24.0, 12.0, 6.0, 8.0, 9.0, 8.0, 7.0, 4.0, 8.0, 2.0, 2.0, 5.0, 3.0, 3.0, 1.0, 1.0, 1.0], "bins": [-0.049224853515625, -0.04775667190551758, -0.046288490295410156, -0.044820308685302734, -0.04335212707519531, -0.04188394546508789, -0.04041576385498047, -0.03894758224487305, -0.037479400634765625, -0.0360112190246582, -0.03454303741455078, -0.03307485580444336, -0.03160667419433594, -0.030138492584228516, -0.028670310974121094, -0.027202129364013672, -0.02573394775390625, -0.024265766143798828, -0.022797584533691406, -0.021329402923583984, -0.019861221313476562, -0.01839303970336914, -0.01692485809326172, -0.015456676483154297, -0.013988494873046875, -0.012520313262939453, -0.011052131652832031, -0.00958395004272461, -0.008115768432617188, -0.006647586822509766, -0.005179405212402344, -0.003711223602294922, -0.0022430419921875, -0.0007748603820800781, 0.0006933212280273438, 0.0021615028381347656, 0.0036296844482421875, 0.005097866058349609, 0.006566047668457031, 0.008034229278564453, 0.009502410888671875, 0.010970592498779297, 0.012438774108886719, 0.01390695571899414, 0.015375137329101562, 0.016843318939208984, 0.018311500549316406, 0.019779682159423828, 0.02124786376953125, 0.022716045379638672, 0.024184226989746094, 0.025652408599853516, 0.027120590209960938, 0.02858877182006836, 0.03005695343017578, 0.0315251350402832, 0.032993316650390625, 0.03446149826049805, 0.03592967987060547, 0.03739786148071289, 0.03886604309082031, 0.040334224700927734, 0.041802406311035156, 0.04327058792114258, 0.04473876953125]}, "gradients/decoder.transformer.h.0.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 4.0, 3.0, 7.0, 7.0, 8.0, 11.0, 17.0, 21.0, 21.0, 25.0, 39.0, 50.0, 62.0, 101.0, 113.0, 183.0, 262.0, 438.0, 705.0, 1413.0, 2699.0, 6075.0, 15147.0, 40183.0, 114895.0, 315833.0, 343787.0, 130440.0, 45674.0, 16710.0, 6884.0, 2961.0, 1575.0, 806.0, 459.0, 293.0, 194.0, 117.0, 98.0, 65.0, 42.0, 33.0, 25.0, 26.0, 12.0, 10.0, 13.0, 10.0, 2.0, 2.0, 3.0, 1.0, 3.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0, 2.0], "bins": [-0.336669921875, -0.325469970703125, -0.31427001953125, -0.303070068359375, -0.2918701171875, -0.280670166015625, -0.26947021484375, -0.258270263671875, -0.2470703125, -0.235870361328125, -0.22467041015625, -0.213470458984375, -0.2022705078125, -0.191070556640625, -0.17987060546875, -0.168670654296875, -0.157470703125, -0.146270751953125, -0.13507080078125, -0.123870849609375, -0.1126708984375, -0.101470947265625, -0.09027099609375, -0.079071044921875, -0.06787109375, -0.056671142578125, -0.04547119140625, -0.034271240234375, -0.0230712890625, -0.011871337890625, -0.00067138671875, 0.010528564453125, 0.021728515625, 0.032928466796875, 0.04412841796875, 0.055328369140625, 0.0665283203125, 0.077728271484375, 0.08892822265625, 0.100128173828125, 0.111328125, 0.122528076171875, 0.13372802734375, 0.144927978515625, 0.1561279296875, 0.167327880859375, 0.17852783203125, 0.189727783203125, 0.200927734375, 0.212127685546875, 0.22332763671875, 0.234527587890625, 0.2457275390625, 0.256927490234375, 0.26812744140625, 0.279327392578125, 0.29052734375, 0.301727294921875, 0.31292724609375, 0.324127197265625, 0.3353271484375, 0.346527099609375, 0.35772705078125, 0.368927001953125, 0.380126953125]}, "gradients/decoder.transformer.h.0.ln_cross_attn.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0, 2.0, 5.0, 5.0, 6.0, 9.0, 17.0, 24.0, 31.0, 27.0, 41.0, 67.0, 68.0, 83.0, 72.0, 92.0, 77.0, 86.0, 61.0, 46.0, 48.0, 43.0, 30.0, 21.0, 17.0, 7.0, 9.0, 6.0, 3.0, 2.0, 2.0, 3.0, 1.0, 1.0, 1.0, 1.0], "bins": [-0.10573045164346695, -0.10330674797296524, -0.10088305175304413, -0.09845934808254242, -0.09603564441204071, -0.093611940741539, -0.09118823707103729, -0.08876454085111618, -0.08634083718061447, -0.08391713351011276, -0.08149343729019165, -0.07906973361968994, -0.07664602994918823, -0.07422232627868652, -0.07179862260818481, -0.0693749263882637, -0.066951222717762, -0.06452751904726028, -0.062103819102048874, -0.05968011915683746, -0.057256415486335754, -0.054832711815834045, -0.052409011870622635, -0.049985311925411224, -0.047561608254909515, -0.045137904584407806, -0.042714204639196396, -0.040290504693984985, -0.037866801023483276, -0.03544309735298157, -0.03301939740777016, -0.030595695599913597, -0.028171993792057037, -0.025748291984200478, -0.023324590176343918, -0.020900888368487358, -0.0184771865606308, -0.01605348475277424, -0.013629782944917679, -0.011206081137061119, -0.00878237932920456, -0.006358677521348, -0.00393497571349144, -0.00151127390563488, 0.0009124279022216797, 0.0033361297100782394, 0.005759831517934799, 0.008183533325791359, 0.010607235133647919, 0.013030936941504478, 0.015454638749361038, 0.017878340557217598, 0.020302042365074158, 0.022725744172930717, 0.025149445980787277, 0.027573147788643837, 0.029996849596500397, 0.032420553267002106, 0.034844253212213516, 0.03726795315742493, 0.039691656827926636, 0.042115360498428345, 0.044539060443639755, 0.046962760388851166, 0.049386464059352875]}, "gradients/decoder.transformer.h.0.ln_cross_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 2.0, 2.0, 2.0, 4.0, 2.0, 3.0, 7.0, 7.0, 12.0, 8.0, 10.0, 8.0, 20.0, 15.0, 27.0, 17.0, 25.0, 30.0, 25.0, 26.0, 43.0, 40.0, 29.0, 40.0, 47.0, 40.0, 32.0, 40.0, 43.0, 42.0, 36.0, 43.0, 36.0, 36.0, 30.0, 26.0, 23.0, 19.0, 18.0, 15.0, 16.0, 13.0, 10.0, 8.0, 7.0, 6.0, 4.0, 4.0, 2.0, 6.0, 5.0, 1.0, 2.0, 3.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 2.0], "bins": [-0.028862714767456055, -0.02783888950943947, -0.026815062388777733, -0.025791235268115997, -0.02476741001009941, -0.023743584752082825, -0.02271975763142109, -0.021695930510759354, -0.020672105252742767, -0.01964827999472618, -0.018624452874064445, -0.01760062575340271, -0.016576800495386124, -0.015552974306046963, -0.014529148116707802, -0.013505321927368641, -0.01248149573802948, -0.011457669548690319, -0.010433843359351158, -0.009410017170011997, -0.008386190980672836, -0.007362364791333675, -0.0063385386019945145, -0.0053147124126553535, -0.004290886223316193, -0.0032670600339770317, -0.002243233844637871, -0.0012194076552987099, -0.00019558146595954895, 0.000828244723379612, 0.0018520709127187729, 0.002875897102057934, 0.0038997232913970947, 0.004923549480736256, 0.0059473756700754166, 0.0069712018594145775, 0.007995028048753738, 0.0090188542380929, 0.01004268042743206, 0.011066506616771221, 0.012090332806110382, 0.013114158995449543, 0.014137985184788704, 0.015161811374127865, 0.016185637563467026, 0.017209462821483612, 0.018233289942145348, 0.019257117062807083, 0.02028094232082367, 0.021304767578840256, 0.02232859469950199, 0.023352421820163727, 0.024376247078180313, 0.0254000723361969, 0.026423899456858635, 0.02744772657752037, 0.028471551835536957, 0.029495377093553543, 0.03051920421421528, 0.031543031334877014, 0.0325668565928936, 0.03359068185091019, 0.03461451083421707, 0.03563833609223366, 0.036662161350250244]}, "gradients/decoder.transformer.h.0.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 2.0, 2.0, 4.0, 4.0, 6.0, 8.0, 9.0, 11.0, 11.0, 24.0, 23.0, 19.0, 26.0, 33.0, 23.0, 36.0, 42.0, 55.0, 38.0, 44.0, 60.0, 54.0, 35.0, 42.0, 42.0, 47.0, 41.0, 40.0, 27.0, 28.0, 26.0, 23.0, 21.0, 18.0, 15.0, 12.0, 8.0, 15.0, 9.0, 1.0, 8.0, 8.0, 6.0, 4.0, 2.0, 1.0, 0.0, 1.0, 0.0, 2.0, 1.0, 0.0, 1.0], "bins": [-108.75, -105.453125, -102.15625, -98.859375, -95.5625, -92.265625, -88.96875, -85.671875, -82.375, -79.078125, -75.78125, -72.484375, -69.1875, -65.890625, -62.59375, -59.296875, -56.0, -52.703125, -49.40625, -46.109375, -42.8125, -39.515625, -36.21875, -32.921875, -29.625, -26.328125, -23.03125, -19.734375, -16.4375, -13.140625, -9.84375, -6.546875, -3.25, 0.046875, 3.34375, 6.640625, 9.9375, 13.234375, 16.53125, 19.828125, 23.125, 26.421875, 29.71875, 33.015625, 36.3125, 39.609375, 42.90625, 46.203125, 49.5, 52.796875, 56.09375, 59.390625, 62.6875, 65.984375, 69.28125, 72.578125, 75.875, 79.171875, 82.46875, 85.765625, 89.0625, 92.359375, 95.65625, 98.953125, 102.25]}, "gradients/decoder.transformer.h.0.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 3.0, 0.0, 2.0, 5.0, 3.0, 7.0, 5.0, 1.0, 11.0, 14.0, 24.0, 27.0, 41.0, 39.0, 79.0, 120.0, 169.0, 277.0, 435.0, 735.0, 1280.0, 2269.0, 4528.0, 10340.0, 32787.0, 156858.0, 569239.0, 205508.0, 40925.0, 11969.0, 4928.0, 2432.0, 1281.0, 792.0, 524.0, 289.0, 189.0, 138.0, 70.0, 43.0, 54.0, 35.0, 21.0, 18.0, 20.0, 7.0, 6.0, 10.0, 2.0, 2.0, 4.0, 3.0, 2.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-20.9375, -20.204833984375, -19.47216796875, -18.739501953125, -18.0068359375, -17.274169921875, -16.54150390625, -15.808837890625, -15.076171875, -14.343505859375, -13.61083984375, -12.878173828125, -12.1455078125, -11.412841796875, -10.68017578125, -9.947509765625, -9.21484375, -8.482177734375, -7.74951171875, -7.016845703125, -6.2841796875, -5.551513671875, -4.81884765625, -4.086181640625, -3.353515625, -2.620849609375, -1.88818359375, -1.155517578125, -0.4228515625, 0.309814453125, 1.04248046875, 1.775146484375, 2.5078125, 3.240478515625, 3.97314453125, 4.705810546875, 5.4384765625, 6.171142578125, 6.90380859375, 7.636474609375, 8.369140625, 9.101806640625, 9.83447265625, 10.567138671875, 11.2998046875, 12.032470703125, 12.76513671875, 13.497802734375, 14.23046875, 14.963134765625, 15.69580078125, 16.428466796875, 17.1611328125, 17.893798828125, 18.62646484375, 19.359130859375, 20.091796875, 20.824462890625, 21.55712890625, 22.289794921875, 23.0224609375, 23.755126953125, 24.48779296875, 25.220458984375, 25.953125]}, "gradients/decoder.transformer.h.0.attn.c_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 2.0, 1.0, 2.0, 2.0, 8.0, 7.0, 2.0, 4.0, 8.0, 14.0, 17.0, 28.0, 20.0, 18.0, 31.0, 22.0, 28.0, 36.0, 27.0, 45.0, 43.0, 43.0, 49.0, 1799.0, 345.0, 41.0, 42.0, 47.0, 30.0, 43.0, 37.0, 37.0, 20.0, 22.0, 27.0, 20.0, 16.0, 16.0, 15.0, 12.0, 6.0, 12.0, 7.0, 6.0, 0.0, 1.0, 2.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-110.5, -106.400390625, -102.30078125, -98.201171875, -94.1015625, -90.001953125, -85.90234375, -81.802734375, -77.703125, -73.603515625, -69.50390625, -65.404296875, -61.3046875, -57.205078125, -53.10546875, -49.005859375, -44.90625, -40.806640625, -36.70703125, -32.607421875, -28.5078125, -24.408203125, -20.30859375, -16.208984375, -12.109375, -8.009765625, -3.91015625, 0.189453125, 4.2890625, 8.388671875, 12.48828125, 16.587890625, 20.6875, 24.787109375, 28.88671875, 32.986328125, 37.0859375, 41.185546875, 45.28515625, 49.384765625, 53.484375, 57.583984375, 61.68359375, 65.783203125, 69.8828125, 73.982421875, 78.08203125, 82.181640625, 86.28125, 90.380859375, 94.48046875, 98.580078125, 102.6796875, 106.779296875, 110.87890625, 114.978515625, 119.078125, 123.177734375, 127.27734375, 131.376953125, 135.4765625, 139.576171875, 143.67578125, 147.775390625, 151.875]}, "gradients/decoder.transformer.h.0.attn.c_attn.weight": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 0.0, 0.0, 2.0, 1.0, 2.0, 2.0, 3.0, 6.0, 7.0, 9.0, 16.0, 20.0, 32.0, 37.0, 74.0, 104.0, 163.0, 255.0, 383.0, 746.0, 1478.0, 4893.0, 60375.0, 2903500.0, 160674.0, 8784.0, 1965.0, 880.0, 506.0, 269.0, 182.0, 113.0, 82.0, 46.0, 35.0, 19.0, 17.0, 9.0, 8.0, 4.0, 6.0, 3.0, 0.0, 3.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0], "bins": [-43.59375, -42.21630859375, -40.8388671875, -39.46142578125, -38.083984375, -36.70654296875, -35.3291015625, -33.95166015625, -32.57421875, -31.19677734375, -29.8193359375, -28.44189453125, -27.064453125, -25.68701171875, -24.3095703125, -22.93212890625, -21.5546875, -20.17724609375, -18.7998046875, -17.42236328125, -16.044921875, -14.66748046875, -13.2900390625, -11.91259765625, -10.53515625, -9.15771484375, -7.7802734375, -6.40283203125, -5.025390625, -3.64794921875, -2.2705078125, -0.89306640625, 0.484375, 1.86181640625, 3.2392578125, 4.61669921875, 5.994140625, 7.37158203125, 8.7490234375, 10.12646484375, 11.50390625, 12.88134765625, 14.2587890625, 15.63623046875, 17.013671875, 18.39111328125, 19.7685546875, 21.14599609375, 22.5234375, 23.90087890625, 25.2783203125, 26.65576171875, 28.033203125, 29.41064453125, 30.7880859375, 32.16552734375, 33.54296875, 34.92041015625, 36.2978515625, 37.67529296875, 39.052734375, 40.43017578125, 41.8076171875, 43.18505859375, 44.5625]}, "gradients/decoder.transformer.h.0.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 3.0, 2.0, 2.0, 2.0, 6.0, 5.0, 6.0, 12.0, 21.0, 45.0, 115.0, 236.0, 249.0, 172.0, 73.0, 32.0, 11.0, 10.0, 4.0, 3.0, 3.0, 1.0, 3.0, 1.0, 0.0, 1.0, 1.0], "bins": [-582.6221313476562, -570.7791137695312, -558.9360961914062, -547.0930786132812, -535.25, -523.406982421875, -511.56396484375, -499.720947265625, -487.8779296875, -476.034912109375, -464.1918640136719, -452.3488464355469, -440.5058288574219, -428.6628112792969, -416.81976318359375, -404.97674560546875, -393.13372802734375, -381.29071044921875, -369.4476623535156, -357.6046447753906, -345.7616271972656, -333.9186096191406, -322.0755615234375, -310.2325439453125, -298.3894958496094, -286.5464782714844, -274.70343017578125, -262.86041259765625, -251.01739501953125, -239.1743621826172, -227.33132934570312, -215.48831176757812, -203.64527893066406, -191.80224609375, -179.959228515625, -168.11619567871094, -156.27317810058594, -144.43014526367188, -132.58712768554688, -120.74409484863281, -108.90106964111328, -97.05804443359375, -85.21501922607422, -73.37199401855469, -61.52896499633789, -49.68593978881836, -37.84291076660156, -25.99988555908203, -14.1568603515625, -2.3138341903686523, 9.529191970825195, 21.37221908569336, 33.21524429321289, 45.05826950073242, 56.90129852294922, 68.74432373046875, 80.58734893798828, 92.43037414550781, 104.27339935302734, 116.11642456054688, 127.95945739746094, 139.80247497558594, 151.6455078125, 163.488525390625, 175.33155822753906]}, "gradients/decoder.transformer.h.0.ln_1.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 4.0, 1.0, 8.0, 3.0, 10.0, 5.0, 9.0, 8.0, 11.0, 19.0, 21.0, 22.0, 27.0, 22.0, 30.0, 36.0, 37.0, 39.0, 35.0, 60.0, 46.0, 52.0, 61.0, 39.0, 46.0, 59.0, 33.0, 47.0, 31.0, 34.0, 36.0, 18.0, 21.0, 18.0, 14.0, 9.0, 8.0, 5.0, 6.0, 5.0, 5.0, 4.0, 3.0, 1.0, 5.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-170.09039306640625, -164.2017364501953, -158.31307983398438, -152.4244384765625, -146.53578186035156, -140.64712524414062, -134.7584686279297, -128.86981201171875, -122.98116302490234, -117.0925064086914, -111.203857421875, -105.31520080566406, -99.42654418945312, -93.53789520263672, -87.64923858642578, -81.76058959960938, -75.87193298339844, -69.9832763671875, -64.0946273803711, -58.205970764160156, -52.317317962646484, -46.42866516113281, -40.540008544921875, -34.6513557434082, -28.76270294189453, -22.87405014038086, -16.985395431518555, -11.09674072265625, -5.208087921142578, 0.6805648803710938, 6.569221496582031, 12.457874298095703, 18.346511840820312, 24.235164642333984, 30.12381935119629, 36.012474060058594, 41.901126861572266, 47.78977966308594, 53.678436279296875, 59.56708908081055, 65.45574188232422, 71.34439849853516, 77.23304748535156, 83.1217041015625, 89.01036071777344, 94.89900970458984, 100.78766632080078, 106.67631530761719, 112.56497192382812, 118.45362854003906, 124.34227752685547, 130.23092651367188, 136.1195831298828, 142.00823974609375, 147.8968963623047, 153.78555297851562, 159.6741943359375, 165.56285095214844, 171.45150756835938, 177.34014892578125, 183.2288055419922, 189.11746215820312, 195.00611877441406, 200.894775390625, 206.78343200683594]}, "gradients/decoder.transformer.wpe.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 3.0, 6.0, 3.0, 7.0, 7.0, 11.0, 18.0, 17.0, 34.0, 32.0, 43.0, 50.0, 67.0, 78.0, 95.0, 132.0, 153.0, 204.0, 228.0, 284.0, 358.0, 365.0, 531.0, 649.0, 1040830.0, 1142.0, 578.0, 490.0, 406.0, 323.0, 257.0, 232.0, 184.0, 133.0, 132.0, 106.0, 82.0, 69.0, 54.0, 42.0, 30.0, 27.0, 24.0, 15.0, 11.0, 9.0, 4.0, 4.0, 2.0, 2.0, 3.0, 1.0, 2.0], "bins": [-61.37311553955078, -59.66419982910156, -57.95528793334961, -56.24637222290039, -54.53746032714844, -52.82854461669922, -51.11962890625, -49.41071319580078, -47.70180130004883, -45.99288558959961, -44.283973693847656, -42.57505798339844, -40.86614227294922, -39.157230377197266, -37.44831466674805, -35.739402770996094, -34.030487060546875, -32.321571350097656, -30.612659454345703, -28.903743743896484, -27.1948299407959, -25.485916137695312, -23.777000427246094, -22.068086624145508, -20.359172821044922, -18.650259017944336, -16.94134521484375, -15.232429504394531, -13.523515701293945, -11.81460189819336, -10.105687141418457, -8.396772384643555, -6.687854766845703, -4.978940486907959, -3.270026206970215, -1.5611119270324707, 0.14780235290527344, 1.8567161560058594, 3.5656309127807617, 5.274545669555664, 6.98345947265625, 8.692373275756836, 10.401288032531738, 12.11020278930664, 13.819116592407227, 15.528030395507812, 17.23694610595703, 18.945859909057617, 20.654773712158203, 22.36368751525879, 24.072601318359375, 25.781517028808594, 27.49043083190918, 29.199344635009766, 30.908260345458984, 32.61717224121094, 34.326087951660156, 36.035003662109375, 37.74391555786133, 39.45283126831055, 41.1617431640625, 42.87065887451172, 44.57957458496094, 46.288490295410156, 47.99740219116211]}, "gradients/decoder.transformer.wte.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 2.0, 2.0, 1.0, 2.0, 3.0, 4.0, 3.0, 11.0, 5.0, 10.0, 6.0, 3.0, 16.0, 17.0, 34.0, 132.0, 28456.0, 51434396.0, 110.0, 30.0, 15.0, 12.0, 13.0, 8.0, 6.0, 5.0, 5.0, 0.0, 1.0, 1.0, 4.0, 1.0, 1.0, 3.0, 1.0, 1.0, 3.0, 0.0, 0.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5340.0, -5073.3935546875, -4806.78662109375, -4540.1796875, -4273.5732421875, -4006.966552734375, -3740.35986328125, -3473.753173828125, -3207.146484375, -2940.539794921875, -2673.93310546875, -2407.326416015625, -2140.7197265625, -1874.113037109375, -1607.50634765625, -1340.899658203125, -1074.29296875, -807.686279296875, -541.07958984375, -274.472900390625, -7.8662109375, 258.740478515625, 525.34716796875, 791.953857421875, 1058.560546875, 1325.167236328125, 1591.77392578125, 1858.380615234375, 2124.9873046875, 2391.593994140625, 2658.20068359375, 2924.807373046875, 3191.4140625, 3458.020751953125, 3724.62744140625, 3991.234130859375, 4257.8408203125, 4524.447265625, 4791.05419921875, 5057.6611328125, 5324.267578125, 5590.8740234375, 5857.48095703125, 6124.087890625, 6390.6943359375, 6657.30078125, 6923.90771484375, 7190.5146484375, 7457.12109375, 7723.7275390625, 7990.33447265625, 8256.94140625, 8523.5478515625, 8790.154296875, 9056.76171875, 9323.3681640625, 9589.974609375, 9856.5810546875, 10123.1875, 10389.794921875, 10656.4013671875, 10923.0078125, 11189.615234375, 11456.2216796875, 11722.828125]}, "gradients/encoder.adapter.layers.2.conv.weight": {"_type": "histogram", "values": [6.0, 6.0, 7.0, 8.0, 12.0, 15.0, 36.0, 47.0, 60.0, 85.0, 109.0, 194.0, 301.0, 472.0, 683.0, 1014.0, 1516.0, 2496.0, 3931.0, 6452.0, 10008.0, 15946.0, 26015.0, 41538.0, 68227.0, 109726.0, 175957.0, 275775.0, 412405.0, 3107569.0, 1009566.0, 369621.0, 244620.0, 154876.0, 95944.0, 59310.0, 36576.0, 22373.0, 14093.0, 8701.0, 5583.0, 3453.0, 2086.0, 1394.0, 932.0, 582.0, 366.0, 244.0, 178.0, 106.0, 74.0, 49.0, 34.0, 23.0, 12.0, 11.0, 8.0, 5.0, 5.0, 4.0, 2.0, 8.0, 0.0, 1.0], "bins": [-2.46875, -2.386077880859375, -2.30340576171875, -2.220733642578125, -2.1380615234375, -2.055389404296875, -1.97271728515625, -1.890045166015625, -1.807373046875, -1.724700927734375, -1.64202880859375, -1.559356689453125, -1.4766845703125, -1.394012451171875, -1.31134033203125, -1.228668212890625, -1.14599609375, -1.063323974609375, -0.98065185546875, -0.897979736328125, -0.8153076171875, -0.732635498046875, -0.64996337890625, -0.567291259765625, -0.484619140625, -0.401947021484375, -0.31927490234375, -0.236602783203125, -0.1539306640625, -0.071258544921875, 0.01141357421875, 0.094085693359375, 0.1767578125, 0.259429931640625, 0.34210205078125, 0.424774169921875, 0.5074462890625, 0.590118408203125, 0.67279052734375, 0.755462646484375, 0.838134765625, 0.920806884765625, 1.00347900390625, 1.086151123046875, 1.1688232421875, 1.251495361328125, 1.33416748046875, 1.416839599609375, 1.49951171875, 1.582183837890625, 1.66485595703125, 1.747528076171875, 1.8302001953125, 1.912872314453125, 1.99554443359375, 2.078216552734375, 2.160888671875, 2.243560791015625, 2.32623291015625, 2.408905029296875, 2.4915771484375, 2.574249267578125, 2.65692138671875, 2.739593505859375, 2.822265625]}, "gradients/encoder.adapter.layers.2.conv.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 5.0, 0.0, 5.0, 4.0, 4.0, 8.0, 10.0, 15.0, 11.0, 15.0, 21.0, 32.0, 30.0, 31.0, 27.0, 35.0, 38.0, 28.0, 39.0, 54.0, 37.0, 99.0, 939.0, 135.0, 38.0, 55.0, 38.0, 32.0, 29.0, 45.0, 20.0, 27.0, 25.0, 24.0, 12.0, 9.0, 20.0, 12.0, 4.0, 3.0, 4.0, 5.0, 4.0, 3.0, 5.0, 7.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-25.421875, -24.666748046875, -23.91162109375, -23.156494140625, -22.4013671875, -21.646240234375, -20.89111328125, -20.135986328125, -19.380859375, -18.625732421875, -17.87060546875, -17.115478515625, -16.3603515625, -15.605224609375, -14.85009765625, -14.094970703125, -13.33984375, -12.584716796875, -11.82958984375, -11.074462890625, -10.3193359375, -9.564208984375, -8.80908203125, -8.053955078125, -7.298828125, -6.543701171875, -5.78857421875, -5.033447265625, -4.2783203125, -3.523193359375, -2.76806640625, -2.012939453125, -1.2578125, -0.502685546875, 0.25244140625, 1.007568359375, 1.7626953125, 2.517822265625, 3.27294921875, 4.028076171875, 4.783203125, 5.538330078125, 6.29345703125, 7.048583984375, 7.8037109375, 8.558837890625, 9.31396484375, 10.069091796875, 10.82421875, 11.579345703125, 12.33447265625, 13.089599609375, 13.8447265625, 14.599853515625, 15.35498046875, 16.110107421875, 16.865234375, 17.620361328125, 18.37548828125, 19.130615234375, 19.8857421875, 20.640869140625, 21.39599609375, 22.151123046875, 22.90625]}, "gradients/encoder.adapter.layers.1.conv.weight": {"_type": "histogram", "values": [2.0, 3.0, 2.0, 6.0, 10.0, 8.0, 21.0, 29.0, 50.0, 61.0, 120.0, 145.0, 205.0, 281.0, 472.0, 724.0, 1190.0, 1802.0, 2794.0, 4503.0, 7233.0, 11396.0, 18444.0, 29979.0, 49503.0, 81359.0, 136072.0, 225952.0, 364831.0, 807796.0, 3352760.0, 460852.0, 288806.0, 175153.0, 105332.0, 62691.0, 38254.0, 23380.0, 14454.0, 9189.0, 5789.0, 3553.0, 2216.0, 1454.0, 911.0, 566.0, 385.0, 227.0, 172.0, 112.0, 60.0, 43.0, 44.0, 15.0, 7.0, 11.0, 7.0, 4.0, 2.0, 4.0, 6.0, 0.0, 0.0, 4.0], "bins": [-2.6640625, -2.57598876953125, -2.4879150390625, -2.39984130859375, -2.311767578125, -2.22369384765625, -2.1356201171875, -2.04754638671875, -1.95947265625, -1.87139892578125, -1.7833251953125, -1.69525146484375, -1.607177734375, -1.51910400390625, -1.4310302734375, -1.34295654296875, -1.2548828125, -1.16680908203125, -1.0787353515625, -0.99066162109375, -0.902587890625, -0.81451416015625, -0.7264404296875, -0.63836669921875, -0.55029296875, -0.46221923828125, -0.3741455078125, -0.28607177734375, -0.197998046875, -0.10992431640625, -0.0218505859375, 0.06622314453125, 0.154296875, 0.24237060546875, 0.3304443359375, 0.41851806640625, 0.506591796875, 0.59466552734375, 0.6827392578125, 0.77081298828125, 0.85888671875, 0.94696044921875, 1.0350341796875, 1.12310791015625, 1.211181640625, 1.29925537109375, 1.3873291015625, 1.47540283203125, 1.5634765625, 1.65155029296875, 1.7396240234375, 1.82769775390625, 1.915771484375, 2.00384521484375, 2.0919189453125, 2.17999267578125, 2.26806640625, 2.35614013671875, 2.4442138671875, 2.53228759765625, 2.620361328125, 2.70843505859375, 2.7965087890625, 2.88458251953125, 2.97265625]}, "gradients/encoder.adapter.layers.1.conv.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 3.0, 0.0, 2.0, 2.0, 6.0, 3.0, 7.0, 4.0, 8.0, 14.0, 12.0, 13.0, 16.0, 23.0, 26.0, 32.0, 30.0, 31.0, 24.0, 37.0, 35.0, 38.0, 44.0, 55.0, 301.0, 790.0, 51.0, 49.0, 50.0, 39.0, 39.0, 40.0, 28.0, 26.0, 29.0, 16.0, 17.0, 25.0, 13.0, 9.0, 11.0, 12.0, 3.0, 5.0, 7.0, 4.0, 3.0, 5.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 2.0], "bins": [-17.078125, -16.549072265625, -16.02001953125, -15.490966796875, -14.9619140625, -14.432861328125, -13.90380859375, -13.374755859375, -12.845703125, -12.316650390625, -11.78759765625, -11.258544921875, -10.7294921875, -10.200439453125, -9.67138671875, -9.142333984375, -8.61328125, -8.084228515625, -7.55517578125, -7.026123046875, -6.4970703125, -5.968017578125, -5.43896484375, -4.909912109375, -4.380859375, -3.851806640625, -3.32275390625, -2.793701171875, -2.2646484375, -1.735595703125, -1.20654296875, -0.677490234375, -0.1484375, 0.380615234375, 0.90966796875, 1.438720703125, 1.9677734375, 2.496826171875, 3.02587890625, 3.554931640625, 4.083984375, 4.613037109375, 5.14208984375, 5.671142578125, 6.2001953125, 6.729248046875, 7.25830078125, 7.787353515625, 8.31640625, 8.845458984375, 9.37451171875, 9.903564453125, 10.4326171875, 10.961669921875, 11.49072265625, 12.019775390625, 12.548828125, 13.077880859375, 13.60693359375, 14.135986328125, 14.6650390625, 15.194091796875, 15.72314453125, 16.252197265625, 16.78125]}, "gradients/encoder.adapter.layers.0.conv.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 4.0, 2.0, 2.0, 0.0, 3.0, 7.0, 5.0, 13.0, 11.0, 16.0, 20.0, 22.0, 29.0, 49.0, 70.0, 89.0, 126.0, 186.0, 279.0, 334.0, 502.0, 797.0, 1207.0, 1936.0, 3688.0, 7773.0, 21346.0, 84349.0, 5902716.0, 203896.0, 37777.0, 11969.0, 5085.0, 2581.0, 1474.0, 943.0, 656.0, 430.0, 285.0, 184.0, 166.0, 111.0, 95.0, 57.0, 28.0, 39.0, 19.0, 25.0, 11.0, 12.0, 3.0, 10.0, 0.0, 1.0, 4.0, 2.0, 5.0, 1.0, 1.0, 2.0], "bins": [-20.296875, -19.6767578125, -19.056640625, -18.4365234375, -17.81640625, -17.1962890625, -16.576171875, -15.9560546875, -15.3359375, -14.7158203125, -14.095703125, -13.4755859375, -12.85546875, -12.2353515625, -11.615234375, -10.9951171875, -10.375, -9.7548828125, -9.134765625, -8.5146484375, -7.89453125, -7.2744140625, -6.654296875, -6.0341796875, -5.4140625, -4.7939453125, -4.173828125, -3.5537109375, -2.93359375, -2.3134765625, -1.693359375, -1.0732421875, -0.453125, 0.1669921875, 0.787109375, 1.4072265625, 2.02734375, 2.6474609375, 3.267578125, 3.8876953125, 4.5078125, 5.1279296875, 5.748046875, 6.3681640625, 6.98828125, 7.6083984375, 8.228515625, 8.8486328125, 9.46875, 10.0888671875, 10.708984375, 11.3291015625, 11.94921875, 12.5693359375, 13.189453125, 13.8095703125, 14.4296875, 15.0498046875, 15.669921875, 16.2900390625, 16.91015625, 17.5302734375, 18.150390625, 18.7705078125, 19.390625]}, "gradients/encoder.adapter.layers.0.conv.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 2.0, 3.0, 1.0, 6.0, 4.0, 10.0, 5.0, 7.0, 13.0, 20.0, 19.0, 27.0, 27.0, 24.0, 34.0, 36.0, 28.0, 45.0, 49.0, 39.0, 42.0, 112.0, 783.0, 267.0, 53.0, 38.0, 44.0, 34.0, 37.0, 30.0, 33.0, 30.0, 27.0, 21.0, 21.0, 16.0, 7.0, 11.0, 11.0, 4.0, 4.0, 5.0, 1.0, 4.0, 2.0, 2.0, 2.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0], "bins": [-14.4609375, -14.032958984375, -13.60498046875, -13.177001953125, -12.7490234375, -12.321044921875, -11.89306640625, -11.465087890625, -11.037109375, -10.609130859375, -10.18115234375, -9.753173828125, -9.3251953125, -8.897216796875, -8.46923828125, -8.041259765625, -7.61328125, -7.185302734375, -6.75732421875, -6.329345703125, -5.9013671875, -5.473388671875, -5.04541015625, -4.617431640625, -4.189453125, -3.761474609375, -3.33349609375, -2.905517578125, -2.4775390625, -2.049560546875, -1.62158203125, -1.193603515625, -0.765625, -0.337646484375, 0.09033203125, 0.518310546875, 0.9462890625, 1.374267578125, 1.80224609375, 2.230224609375, 2.658203125, 3.086181640625, 3.51416015625, 3.942138671875, 4.3701171875, 4.798095703125, 5.22607421875, 5.654052734375, 6.08203125, 6.510009765625, 6.93798828125, 7.365966796875, 7.7939453125, 8.221923828125, 8.64990234375, 9.077880859375, 9.505859375, 9.933837890625, 10.36181640625, 10.789794921875, 11.2177734375, 11.645751953125, 12.07373046875, 12.501708984375, 12.9296875]}, "gradients/encoder.encoder.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 11.0, 54.0, 268.0, 497.0, 142.0, 30.0, 6.0, 4.0, 2.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-177.8101806640625, -174.4757080078125, -171.14122009277344, -167.80674743652344, -164.47227478027344, -161.13778686523438, -157.80331420898438, -154.46884155273438, -151.1343536376953, -147.7998809814453, -144.46539306640625, -141.13092041015625, -137.79644775390625, -134.4619598388672, -131.1274871826172, -127.79301452636719, -124.45854187011719, -121.12406158447266, -117.78958892822266, -114.45510864257812, -111.1206283569336, -107.7861557006836, -104.45167541503906, -101.11720275878906, -97.78271484375, -94.44823455810547, -91.11376190185547, -87.77928161621094, -84.4448013305664, -81.1103286743164, -77.77584838867188, -74.44137573242188, -71.10689544677734, -67.77241516113281, -64.43794250488281, -61.10346221923828, -57.768985748291016, -54.43450927734375, -51.10002899169922, -47.76555252075195, -44.43107604980469, -41.09659957885742, -37.76211929321289, -34.427642822265625, -31.09316635131836, -27.75868797302246, -24.424209594726562, -21.089733123779297, -17.755252838134766, -14.420775413513184, -11.086297988891602, -7.751819610595703, -4.417342185974121, -1.082864761352539, 2.2516136169433594, 5.586090087890625, 8.920568466186523, 12.255045890808105, 15.589523315429688, 18.924001693725586, 22.258480072021484, 25.59295654296875, 28.92743492126465, 32.26190948486328, 35.59638977050781]}, "gradients/encoder.encoder.layer_norm.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 3.0, 8.0, 12.0, 11.0, 14.0, 15.0, 26.0, 31.0, 40.0, 52.0, 57.0, 58.0, 76.0, 68.0, 65.0, 55.0, 57.0, 59.0, 59.0, 53.0, 53.0, 43.0, 32.0, 23.0, 18.0, 12.0, 7.0, 4.0, 4.0, 4.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-20.64204216003418, -19.494304656982422, -18.346567153930664, -17.198829650878906, -16.05109405517578, -14.903356552124023, -13.755619049072266, -12.607882499694824, -11.460144996643066, -10.312407493591309, -9.164670944213867, -8.01693344116211, -6.86919641494751, -5.72145938873291, -4.573721885681152, -3.425985336303711, -2.278247833251953, -1.130510687828064, 0.017226457595825195, 1.164963722229004, 2.3127007484436035, 3.460437774658203, 4.608175277709961, 5.755911827087402, 6.90364933013916, 8.051386833190918, 9.19912338256836, 10.346860885620117, 11.494598388671875, 12.642334938049316, 13.790072441101074, 14.937808990478516, 16.08554458618164, 17.2332820892334, 18.381019592285156, 19.52875518798828, 20.67649269104004, 21.824230194091797, 22.971967697143555, 24.119705200195312, 25.267440795898438, 26.415178298950195, 27.562915802001953, 28.710651397705078, 29.858388900756836, 31.006126403808594, 32.15386199951172, 33.30160140991211, 34.4493408203125, 35.597076416015625, 36.744815826416016, 37.89255142211914, 39.04029083251953, 40.188026428222656, 41.33576202392578, 42.48350143432617, 43.6312370300293, 44.77897262573242, 45.92671203613281, 47.07444763183594, 48.22218704223633, 49.36992263793945, 50.517662048339844, 51.66539764404297, 52.813133239746094]}, "gradients/encoder.encoder.layers.23.feed_forward.output_dense.weight": {"_type": "histogram", "values": [2.0, 2.0, 4.0, 4.0, 5.0, 8.0, 8.0, 14.0, 20.0, 32.0, 38.0, 38.0, 68.0, 110.0, 170.0, 316.0, 533.0, 847.0, 1730.0, 3318.0, 7776.0, 22489.0, 174730.0, 3925999.0, 35588.0, 10599.0, 4389.0, 2216.0, 1163.0, 648.0, 430.0, 270.0, 188.0, 149.0, 85.0, 74.0, 53.0, 36.0, 19.0, 23.0, 20.0, 20.0, 13.0, 9.0, 3.0, 7.0, 6.0, 4.0, 4.0, 4.0, 2.0, 1.0, 1.0, 2.0, 2.0, 5.0, 2.0, 3.0, 1.0, 3.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.5126953125, -0.49056243896484375, -0.4684295654296875, -0.44629669189453125, -0.424163818359375, -0.40203094482421875, -0.3798980712890625, -0.35776519775390625, -0.33563232421875, -0.31349945068359375, -0.2913665771484375, -0.26923370361328125, -0.247100830078125, -0.22496795654296875, -0.2028350830078125, -0.18070220947265625, -0.1585693359375, -0.13643646240234375, -0.1143035888671875, -0.09217071533203125, -0.070037841796875, -0.04790496826171875, -0.0257720947265625, -0.00363922119140625, 0.01849365234375, 0.04062652587890625, 0.0627593994140625, 0.08489227294921875, 0.107025146484375, 0.12915802001953125, 0.1512908935546875, 0.17342376708984375, 0.195556640625, 0.21768951416015625, 0.2398223876953125, 0.26195526123046875, 0.284088134765625, 0.30622100830078125, 0.3283538818359375, 0.35048675537109375, 0.37261962890625, 0.39475250244140625, 0.4168853759765625, 0.43901824951171875, 0.461151123046875, 0.48328399658203125, 0.5054168701171875, 0.5275497436523438, 0.5496826171875, 0.5718154907226562, 0.5939483642578125, 0.6160812377929688, 0.638214111328125, 0.6603469848632812, 0.6824798583984375, 0.7046127319335938, 0.72674560546875, 0.7488784790039062, 0.7710113525390625, 0.7931442260742188, 0.815277099609375, 0.8374099731445312, 0.8595428466796875, 0.8816757202148438, 0.90380859375]}, "gradients/encoder.encoder.layers.23.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 2.0, 0.0, 6.0, 3.0, 9.0, 5.0, 8.0, 11.0, 12.0, 13.0, 22.0, 13.0, 315.0, 488.0, 17.0, 12.0, 13.0, 14.0, 11.0, 7.0, 5.0, 6.0, 4.0, 4.0, 2.0, 3.0, 4.0, 0.0, 1.0, 4.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.084716796875, -0.08087539672851562, -0.07703399658203125, -0.07319259643554688, -0.0693511962890625, -0.06550979614257812, -0.06166839599609375, -0.057826995849609375, -0.053985595703125, -0.050144195556640625, -0.04630279541015625, -0.042461395263671875, -0.0386199951171875, -0.034778594970703125, -0.03093719482421875, -0.027095794677734375, -0.02325439453125, -0.019412994384765625, -0.01557159423828125, -0.011730194091796875, -0.0078887939453125, -0.004047393798828125, -0.00020599365234375, 0.003635406494140625, 0.007476806640625, 0.011318206787109375, 0.01515960693359375, 0.019001007080078125, 0.0228424072265625, 0.026683807373046875, 0.03052520751953125, 0.034366607666015625, 0.0382080078125, 0.042049407958984375, 0.04589080810546875, 0.049732208251953125, 0.0535736083984375, 0.057415008544921875, 0.06125640869140625, 0.06509780883789062, 0.068939208984375, 0.07278060913085938, 0.07662200927734375, 0.08046340942382812, 0.0843048095703125, 0.08814620971679688, 0.09198760986328125, 0.09582901000976562, 0.09967041015625, 0.10351181030273438, 0.10735321044921875, 0.11119461059570312, 0.1150360107421875, 0.11887741088867188, 0.12271881103515625, 0.12656021118164062, 0.130401611328125, 0.13424301147460938, 0.13808441162109375, 0.14192581176757812, 0.1457672119140625, 0.14960861206054688, 0.15345001220703125, 0.15729141235351562, 0.1611328125]}, "gradients/encoder.encoder.layers.23.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 3.0, 3.0, 5.0, 9.0, 13.0, 12.0, 24.0, 28.0, 34.0, 63.0, 74.0, 115.0, 187.0, 308.0, 509.0, 937.0, 1921.0, 4305.0, 12019.0, 46891.0, 417325.0, 3480003.0, 181885.0, 31031.0, 9178.0, 3703.0, 1607.0, 849.0, 468.0, 259.0, 160.0, 106.0, 70.0, 61.0, 41.0, 34.0, 11.0, 8.0, 10.0, 8.0, 5.0, 3.0, 3.0, 2.0, 0.0, 0.0, 0.0, 5.0, 1.0], "bins": [-0.70703125, -0.6875762939453125, -0.668121337890625, -0.6486663818359375, -0.62921142578125, -0.6097564697265625, -0.590301513671875, -0.5708465576171875, -0.5513916015625, -0.5319366455078125, -0.512481689453125, -0.4930267333984375, -0.47357177734375, -0.4541168212890625, -0.434661865234375, -0.4152069091796875, -0.395751953125, -0.3762969970703125, -0.356842041015625, -0.3373870849609375, -0.31793212890625, -0.2984771728515625, -0.279022216796875, -0.2595672607421875, -0.2401123046875, -0.2206573486328125, -0.201202392578125, -0.1817474365234375, -0.16229248046875, -0.1428375244140625, -0.123382568359375, -0.1039276123046875, -0.08447265625, -0.0650177001953125, -0.045562744140625, -0.0261077880859375, -0.00665283203125, 0.0128021240234375, 0.032257080078125, 0.0517120361328125, 0.0711669921875, 0.0906219482421875, 0.110076904296875, 0.1295318603515625, 0.14898681640625, 0.1684417724609375, 0.187896728515625, 0.2073516845703125, 0.226806640625, 0.2462615966796875, 0.265716552734375, 0.2851715087890625, 0.30462646484375, 0.3240814208984375, 0.343536376953125, 0.3629913330078125, 0.3824462890625, 0.4019012451171875, 0.421356201171875, 0.4408111572265625, 0.46026611328125, 0.4797210693359375, 0.499176025390625, 0.5186309814453125, 0.5380859375]}, "gradients/encoder.encoder.layers.23.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 3.0, 1.0, 0.0, 5.0, 8.0, 11.0, 16.0, 17.0, 21.0, 25.0, 50.0, 80.0, 124.0, 167.0, 356.0, 973.0, 1231.0, 375.0, 191.0, 123.0, 83.0, 53.0, 39.0, 23.0, 15.0, 25.0, 15.0, 5.0, 9.0, 5.0, 4.0, 2.0, 8.0, 5.0, 2.0, 2.0, 3.0, 1.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.1385498046875, -0.13323402404785156, -0.12791824340820312, -0.12260246276855469, -0.11728668212890625, -0.11197090148925781, -0.10665512084960938, -0.10133934020996094, -0.0960235595703125, -0.09070777893066406, -0.08539199829101562, -0.08007621765136719, -0.07476043701171875, -0.06944465637207031, -0.06412887573242188, -0.05881309509277344, -0.053497314453125, -0.04818153381347656, -0.042865753173828125, -0.03754997253417969, -0.03223419189453125, -0.026918411254882812, -0.021602630615234375, -0.016286849975585938, -0.0109710693359375, -0.0056552886962890625, -0.000339508056640625, 0.0049762725830078125, 0.01029205322265625, 0.015607833862304688, 0.020923614501953125, 0.026239395141601562, 0.03155517578125, 0.03687095642089844, 0.042186737060546875, 0.04750251770019531, 0.05281829833984375, 0.05813407897949219, 0.06344985961914062, 0.06876564025878906, 0.0740814208984375, 0.07939720153808594, 0.08471298217773438, 0.09002876281738281, 0.09534454345703125, 0.10066032409667969, 0.10597610473632812, 0.11129188537597656, 0.116607666015625, 0.12192344665527344, 0.12723922729492188, 0.1325550079345703, 0.13787078857421875, 0.1431865692138672, 0.14850234985351562, 0.15381813049316406, 0.1591339111328125, 0.16444969177246094, 0.16976547241210938, 0.1750812530517578, 0.18039703369140625, 0.1857128143310547, 0.19102859497070312, 0.19634437561035156, 0.20166015625]}, "gradients/encoder.encoder.layers.23.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 6.0, 12.0, 110.0, 549.0, 269.0, 53.0, 8.0, 3.0, 1.0, 1.0, 3.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.296167850494385, -5.166160583496094, -5.036153316497803, -4.906146049499512, -4.776138782501221, -4.64613151550293, -4.516124248504639, -4.386116981506348, -4.256110191345215, -4.126102924346924, -3.996095657348633, -3.866088390350342, -3.736081123352051, -3.6060738563537598, -3.476066827774048, -3.346059560775757, -3.2160520553588867, -3.0860447883605957, -2.9560375213623047, -2.8260302543640137, -2.6960229873657227, -2.5660157203674316, -2.4360086917877197, -2.3060014247894287, -2.1759941577911377, -2.0459868907928467, -1.9159796237945557, -1.7859724760055542, -1.6559652090072632, -1.5259579420089722, -1.3959507942199707, -1.2659435272216797, -1.1359362602233887, -1.0059289932250977, -0.8759217858314514, -0.7459145784378052, -0.6159073114395142, -0.48590004444122314, -0.3558928370475769, -0.22588562965393066, -0.09587836265563965, 0.03412887454032898, 0.1641361117362976, 0.29414334893226624, 0.42415058612823486, 0.5541578531265259, 0.6841650605201721, 0.8141722679138184, 0.9441795349121094, 1.0741868019104004, 1.2041940689086914, 1.3342012166976929, 1.4642084836959839, 1.594215750694275, 1.7242228984832764, 1.8542301654815674, 1.9842374324798584, 2.1142446994781494, 2.2442519664764404, 2.3742592334747314, 2.5042662620544434, 2.6342735290527344, 2.7642807960510254, 2.8942880630493164, 3.0242953300476074]}, "gradients/encoder.encoder.layers.23.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 2.0, 1.0, 1.0, 2.0, 2.0, 2.0, 1.0, 13.0, 21.0, 21.0, 49.0, 61.0, 76.0, 83.0, 93.0, 102.0, 90.0, 101.0, 78.0, 55.0, 57.0, 35.0, 27.0, 18.0, 8.0, 5.0, 2.0, 2.0, 5.0, 1.0, 1.0, 3.0], "bins": [-1.8116366863250732, -1.7747716903686523, -1.737906575202942, -1.7010414600372314, -1.6641764640808105, -1.6273114681243896, -1.5904463529586792, -1.5535812377929688, -1.5167162418365479, -1.479851245880127, -1.4429861307144165, -1.406121015548706, -1.3692560195922852, -1.3323910236358643, -1.2955259084701538, -1.2586607933044434, -1.2217957973480225, -1.1849308013916016, -1.1480656862258911, -1.1112005710601807, -1.0743355751037598, -1.0374705791473389, -1.0006054639816284, -0.9637404084205627, -0.9268753528594971, -0.8900102972984314, -0.8531452417373657, -0.8162801861763, -0.7794151306152344, -0.7425500750541687, -0.705685019493103, -0.6688199639320374, -0.6319547891616821, -0.5950897336006165, -0.5582246780395508, -0.5213596224784851, -0.48449456691741943, -0.44762951135635376, -0.4107644557952881, -0.3738994002342224, -0.33703434467315674, -0.30016928911209106, -0.2633042335510254, -0.22643917798995972, -0.18957412242889404, -0.15270906686782837, -0.1158440113067627, -0.07897895574569702, -0.04211390018463135, -0.005248844623565674, 0.0316162109375, 0.06848126649856567, 0.10534632205963135, 0.14221137762069702, 0.1790764331817627, 0.21594148874282837, 0.25280654430389404, 0.2896715998649597, 0.3265366554260254, 0.36340171098709106, 0.40026676654815674, 0.4371318221092224, 0.4739968776702881, 0.5108619332313538, 0.5477269887924194]}, "gradients/encoder.encoder.layers.23.attention.out_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 3.0, 1.0, 3.0, 0.0, 1.0, 4.0, 4.0, 4.0, 6.0, 7.0, 21.0, 19.0, 34.0, 43.0, 65.0, 75.0, 125.0, 160.0, 283.0, 494.0, 798.0, 1435.0, 2845.0, 5961.0, 14425.0, 48778.0, 886375.0, 57805.0, 15624.0, 6401.0, 2970.0, 1511.0, 874.0, 486.0, 303.0, 202.0, 136.0, 81.0, 57.0, 41.0, 27.0, 21.0, 15.0, 12.0, 10.0, 7.0, 8.0, 6.0, 0.0, 1.0, 4.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.6953125, -0.6709518432617188, -0.6465911865234375, -0.6222305297851562, -0.597869873046875, -0.5735092163085938, -0.5491485595703125, -0.5247879028320312, -0.50042724609375, -0.47606658935546875, -0.4517059326171875, -0.42734527587890625, -0.402984619140625, -0.37862396240234375, -0.3542633056640625, -0.32990264892578125, -0.3055419921875, -0.28118133544921875, -0.2568206787109375, -0.23246002197265625, -0.208099365234375, -0.18373870849609375, -0.1593780517578125, -0.13501739501953125, -0.11065673828125, -0.08629608154296875, -0.0619354248046875, -0.03757476806640625, -0.013214111328125, 0.01114654541015625, 0.0355072021484375, 0.05986785888671875, 0.084228515625, 0.10858917236328125, 0.1329498291015625, 0.15731048583984375, 0.181671142578125, 0.20603179931640625, 0.2303924560546875, 0.25475311279296875, 0.27911376953125, 0.30347442626953125, 0.3278350830078125, 0.35219573974609375, 0.376556396484375, 0.40091705322265625, 0.4252777099609375, 0.44963836669921875, 0.4739990234375, 0.49835968017578125, 0.5227203369140625, 0.5470809936523438, 0.571441650390625, 0.5958023071289062, 0.6201629638671875, 0.6445236206054688, 0.66888427734375, 0.6932449340820312, 0.7176055908203125, 0.7419662475585938, 0.766326904296875, 0.7906875610351562, 0.8150482177734375, 0.8394088745117188, 0.86376953125]}, "gradients/encoder.encoder.layers.23.attention.out_proj.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 3.0, 4.0, 2.0, 4.0, 3.0, 7.0, 8.0, 6.0, 11.0, 11.0, 15.0, 13.0, 13.0, 105.0, 359.0, 290.0, 59.0, 20.0, 11.0, 8.0, 7.0, 9.0, 8.0, 7.0, 6.0, 7.0, 3.0, 3.0, 2.0, 1.0, 3.0, 0.0, 3.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.07769775390625, -0.07469844818115234, -0.07169914245605469, -0.06869983673095703, -0.06570053100585938, -0.06270122528076172, -0.05970191955566406, -0.056702613830566406, -0.05370330810546875, -0.050704002380371094, -0.04770469665527344, -0.04470539093017578, -0.041706085205078125, -0.03870677947998047, -0.03570747375488281, -0.032708168029785156, -0.0297088623046875, -0.026709556579589844, -0.023710250854492188, -0.02071094512939453, -0.017711639404296875, -0.014712333679199219, -0.011713027954101562, -0.008713722229003906, -0.00571441650390625, -0.0027151107788085938, 0.0002841949462890625, 0.0032835006713867188, 0.006282806396484375, 0.009282112121582031, 0.012281417846679688, 0.015280723571777344, 0.018280029296875, 0.021279335021972656, 0.024278640747070312, 0.02727794647216797, 0.030277252197265625, 0.03327655792236328, 0.03627586364746094, 0.039275169372558594, 0.04227447509765625, 0.045273780822753906, 0.04827308654785156, 0.05127239227294922, 0.054271697998046875, 0.05727100372314453, 0.06027030944824219, 0.06326961517333984, 0.0662689208984375, 0.06926822662353516, 0.07226753234863281, 0.07526683807373047, 0.07826614379882812, 0.08126544952392578, 0.08426475524902344, 0.0872640609741211, 0.09026336669921875, 0.0932626724243164, 0.09626197814941406, 0.09926128387451172, 0.10226058959960938, 0.10525989532470703, 0.10825920104980469, 0.11125850677490234, 0.1142578125]}, "gradients/encoder.encoder.layers.23.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 0.0, 2.0, 0.0, 2.0, 2.0, 2.0, 2.0, 4.0, 6.0, 10.0, 10.0, 18.0, 20.0, 23.0, 35.0, 54.0, 76.0, 101.0, 180.0, 295.0, 510.0, 894.0, 2156.0, 5881.0, 30118.0, 578569.0, 396382.0, 23884.0, 5326.0, 1923.0, 812.0, 457.0, 276.0, 147.0, 101.0, 83.0, 60.0, 50.0, 30.0, 21.0, 13.0, 7.0, 5.0, 3.0, 5.0, 7.0, 4.0, 4.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.841796875, -0.812713623046875, -0.78363037109375, -0.754547119140625, -0.7254638671875, -0.696380615234375, -0.66729736328125, -0.638214111328125, -0.609130859375, -0.580047607421875, -0.55096435546875, -0.521881103515625, -0.4927978515625, -0.463714599609375, -0.43463134765625, -0.405548095703125, -0.37646484375, -0.347381591796875, -0.31829833984375, -0.289215087890625, -0.2601318359375, -0.231048583984375, -0.20196533203125, -0.172882080078125, -0.143798828125, -0.114715576171875, -0.08563232421875, -0.056549072265625, -0.0274658203125, 0.001617431640625, 0.03070068359375, 0.059783935546875, 0.0888671875, 0.117950439453125, 0.14703369140625, 0.176116943359375, 0.2052001953125, 0.234283447265625, 0.26336669921875, 0.292449951171875, 0.321533203125, 0.350616455078125, 0.37969970703125, 0.408782958984375, 0.4378662109375, 0.466949462890625, 0.49603271484375, 0.525115966796875, 0.55419921875, 0.583282470703125, 0.61236572265625, 0.641448974609375, 0.6705322265625, 0.699615478515625, 0.72869873046875, 0.757781982421875, 0.786865234375, 0.815948486328125, 0.84503173828125, 0.874114990234375, 0.9031982421875, 0.932281494140625, 0.96136474609375, 0.990447998046875, 1.01953125]}, "gradients/encoder.encoder.layers.23.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 2.0, 5.0, 1.0, 5.0, 9.0, 7.0, 8.0, 12.0, 20.0, 19.0, 16.0, 17.0, 24.0, 25.0, 34.0, 47.0, 43.0, 49.0, 53.0, 56.0, 49.0, 40.0, 46.0, 57.0, 48.0, 54.0, 40.0, 44.0, 40.0, 24.0, 24.0, 24.0, 14.0, 11.0, 8.0, 15.0, 4.0, 7.0, 5.0, 7.0, 2.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.32666015625, -0.3145408630371094, -0.30242156982421875, -0.2903022766113281, -0.2781829833984375, -0.2660636901855469, -0.25394439697265625, -0.24182510375976562, -0.229705810546875, -0.21758651733398438, -0.20546722412109375, -0.19334793090820312, -0.1812286376953125, -0.16910934448242188, -0.15699005126953125, -0.14487075805664062, -0.13275146484375, -0.12063217163085938, -0.10851287841796875, -0.09639358520507812, -0.0842742919921875, -0.07215499877929688, -0.06003570556640625, -0.047916412353515625, -0.035797119140625, -0.023677825927734375, -0.01155853271484375, 0.000560760498046875, 0.0126800537109375, 0.024799346923828125, 0.03691864013671875, 0.049037933349609375, 0.0611572265625, 0.07327651977539062, 0.08539581298828125, 0.09751510620117188, 0.1096343994140625, 0.12175369262695312, 0.13387298583984375, 0.14599227905273438, 0.158111572265625, 0.17023086547851562, 0.18235015869140625, 0.19446945190429688, 0.2065887451171875, 0.21870803833007812, 0.23082733154296875, 0.24294662475585938, 0.25506591796875, 0.2671852111816406, 0.27930450439453125, 0.2914237976074219, 0.3035430908203125, 0.3156623840332031, 0.32778167724609375, 0.3399009704589844, 0.352020263671875, 0.3641395568847656, 0.37625885009765625, 0.3883781433105469, 0.4004974365234375, 0.4126167297363281, 0.42473602294921875, 0.4368553161621094, 0.448974609375]}, "gradients/encoder.encoder.layers.23.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 3.0, 0.0, 2.0, 0.0, 3.0, 3.0, 5.0, 7.0, 7.0, 17.0, 26.0, 19.0, 41.0, 68.0, 128.0, 270.0, 599.0, 1947.0, 12358.0, 978381.0, 49369.0, 3597.0, 924.0, 360.0, 182.0, 97.0, 57.0, 22.0, 24.0, 19.0, 9.0, 5.0, 5.0, 1.0, 8.0, 0.0, 1.0, 1.0, 1.0, 2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.96484375, -1.902862548828125, -1.84088134765625, -1.778900146484375, -1.7169189453125, -1.654937744140625, -1.59295654296875, -1.530975341796875, -1.468994140625, -1.407012939453125, -1.34503173828125, -1.283050537109375, -1.2210693359375, -1.159088134765625, -1.09710693359375, -1.035125732421875, -0.97314453125, -0.911163330078125, -0.84918212890625, -0.787200927734375, -0.7252197265625, -0.663238525390625, -0.60125732421875, -0.539276123046875, -0.477294921875, -0.415313720703125, -0.35333251953125, -0.291351318359375, -0.2293701171875, -0.167388916015625, -0.10540771484375, -0.043426513671875, 0.0185546875, 0.080535888671875, 0.14251708984375, 0.204498291015625, 0.2664794921875, 0.328460693359375, 0.39044189453125, 0.452423095703125, 0.514404296875, 0.576385498046875, 0.63836669921875, 0.700347900390625, 0.7623291015625, 0.824310302734375, 0.88629150390625, 0.948272705078125, 1.01025390625, 1.072235107421875, 1.13421630859375, 1.196197509765625, 1.2581787109375, 1.320159912109375, 1.38214111328125, 1.444122314453125, 1.506103515625, 1.568084716796875, 1.63006591796875, 1.692047119140625, 1.7540283203125, 1.816009521484375, 1.87799072265625, 1.939971923828125, 2.001953125]}, "gradients/encoder.encoder.layers.23.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 2.0, 5.0, 1.0, 7.0, 7.0, 20.0, 21.0, 31.0, 53.0, 54.0, 102.0, 105.0, 135.0, 113.0, 113.0, 74.0, 59.0, 41.0, 31.0, 12.0, 11.0, 5.0, 4.0, 4.0, 1.0, 1.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.5299530029296875e-05, -4.395097494125366e-05, -4.260241985321045e-05, -4.1253864765167236e-05, -3.9905309677124023e-05, -3.855675458908081e-05, -3.72081995010376e-05, -3.5859644412994385e-05, -3.451108932495117e-05, -3.316253423690796e-05, -3.1813979148864746e-05, -3.0465424060821533e-05, -2.911686897277832e-05, -2.7768313884735107e-05, -2.6419758796691895e-05, -2.507120370864868e-05, -2.372264862060547e-05, -2.2374093532562256e-05, -2.1025538444519043e-05, -1.967698335647583e-05, -1.8328428268432617e-05, -1.6979873180389404e-05, -1.563131809234619e-05, -1.4282763004302979e-05, -1.2934207916259766e-05, -1.1585652828216553e-05, -1.023709774017334e-05, -8.888542652130127e-06, -7.539987564086914e-06, -6.191432476043701e-06, -4.842877388000488e-06, -3.4943222999572754e-06, -2.1457672119140625e-06, -7.972121238708496e-07, 5.513429641723633e-07, 1.8998980522155762e-06, 3.248453140258789e-06, 4.597008228302002e-06, 5.945563316345215e-06, 7.294118404388428e-06, 8.64267349243164e-06, 9.991228580474854e-06, 1.1339783668518066e-05, 1.268833875656128e-05, 1.4036893844604492e-05, 1.5385448932647705e-05, 1.6734004020690918e-05, 1.808255910873413e-05, 1.9431114196777344e-05, 2.0779669284820557e-05, 2.212822437286377e-05, 2.3476779460906982e-05, 2.4825334548950195e-05, 2.6173889636993408e-05, 2.752244472503662e-05, 2.8870999813079834e-05, 3.0219554901123047e-05, 3.156810998916626e-05, 3.291666507720947e-05, 3.4265220165252686e-05, 3.56137752532959e-05, 3.696233034133911e-05, 3.8310885429382324e-05, 3.965944051742554e-05, 4.100799560546875e-05]}, "gradients/encoder.encoder.layers.23.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 7.0, 3.0, 3.0, 6.0, 8.0, 12.0, 21.0, 29.0, 43.0, 53.0, 87.0, 167.0, 211.0, 385.0, 646.0, 1245.0, 2521.0, 6165.0, 19179.0, 112460.0, 768168.0, 106796.0, 18954.0, 6000.0, 2528.0, 1224.0, 671.0, 370.0, 219.0, 130.0, 78.0, 56.0, 36.0, 29.0, 21.0, 8.0, 7.0, 7.0, 0.0, 4.0, 7.0, 0.0, 1.0, 2.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.7470703125, -0.724761962890625, -0.70245361328125, -0.680145263671875, -0.6578369140625, -0.635528564453125, -0.61322021484375, -0.590911865234375, -0.568603515625, -0.546295166015625, -0.52398681640625, -0.501678466796875, -0.4793701171875, -0.457061767578125, -0.43475341796875, -0.412445068359375, -0.39013671875, -0.367828369140625, -0.34552001953125, -0.323211669921875, -0.3009033203125, -0.278594970703125, -0.25628662109375, -0.233978271484375, -0.211669921875, -0.189361572265625, -0.16705322265625, -0.144744873046875, -0.1224365234375, -0.100128173828125, -0.07781982421875, -0.055511474609375, -0.033203125, -0.010894775390625, 0.01141357421875, 0.033721923828125, 0.0560302734375, 0.078338623046875, 0.10064697265625, 0.122955322265625, 0.145263671875, 0.167572021484375, 0.18988037109375, 0.212188720703125, 0.2344970703125, 0.256805419921875, 0.27911376953125, 0.301422119140625, 0.32373046875, 0.346038818359375, 0.36834716796875, 0.390655517578125, 0.4129638671875, 0.435272216796875, 0.45758056640625, 0.479888916015625, 0.502197265625, 0.524505615234375, 0.54681396484375, 0.569122314453125, 0.5914306640625, 0.613739013671875, 0.63604736328125, 0.658355712890625, 0.6806640625]}, "gradients/encoder.encoder.layers.23.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 4.0, 6.0, 11.0, 15.0, 19.0, 45.0, 59.0, 99.0, 145.0, 139.0, 143.0, 108.0, 70.0, 41.0, 29.0, 19.0, 13.0, 12.0, 6.0, 6.0, 5.0, 4.0, 5.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0], "bins": [-0.93896484375, -0.9169731140136719, -0.8949813842773438, -0.8729896545410156, -0.8509979248046875, -0.8290061950683594, -0.8070144653320312, -0.7850227355957031, -0.763031005859375, -0.7410392761230469, -0.7190475463867188, -0.6970558166503906, -0.6750640869140625, -0.6530723571777344, -0.6310806274414062, -0.6090888977050781, -0.58709716796875, -0.5651054382324219, -0.5431137084960938, -0.5211219787597656, -0.4991302490234375, -0.4771385192871094, -0.45514678955078125, -0.4331550598144531, -0.411163330078125, -0.3891716003417969, -0.36717987060546875, -0.3451881408691406, -0.3231964111328125, -0.3012046813964844, -0.27921295166015625, -0.2572212219238281, -0.2352294921875, -0.21323776245117188, -0.19124603271484375, -0.16925430297851562, -0.1472625732421875, -0.12527084350585938, -0.10327911376953125, -0.08128738403320312, -0.059295654296875, -0.037303924560546875, -0.01531219482421875, 0.006679534912109375, 0.0286712646484375, 0.050662994384765625, 0.07265472412109375, 0.09464645385742188, 0.11663818359375, 0.13862991333007812, 0.16062164306640625, 0.18261337280273438, 0.2046051025390625, 0.22659683227539062, 0.24858856201171875, 0.2705802917480469, 0.292572021484375, 0.3145637512207031, 0.33655548095703125, 0.3585472106933594, 0.3805389404296875, 0.4025306701660156, 0.42452239990234375, 0.4465141296386719, 0.468505859375]}, "gradients/encoder.encoder.layers.23.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 0.0, 1.0, 0.0, 3.0, 6.0, 10.0, 20.0, 39.0, 202.0, 528.0, 147.0, 28.0, 14.0, 7.0, 4.0, 2.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-13.350286483764648, -12.983917236328125, -12.617548942565918, -12.251179695129395, -11.884811401367188, -11.518442153930664, -11.15207290649414, -10.785703659057617, -10.41933536529541, -10.052966117858887, -9.68659782409668, -9.320228576660156, -8.953859329223633, -8.587491035461426, -8.221121788024902, -7.854753017425537, -7.488384246826172, -7.122015476226807, -6.755646705627441, -6.389277458190918, -6.022908687591553, -5.6565399169921875, -5.290170669555664, -4.923801898956299, -4.557433128356934, -4.191064357757568, -3.824695348739624, -3.4583263397216797, -3.0919575691223145, -2.725588798522949, -2.359219789505005, -1.9928507804870605, -1.6264820098876953, -1.2601131200790405, -0.8937442302703857, -0.527375340461731, -0.16100645065307617, 0.2053624391555786, 0.5717313289642334, 0.9381003379821777, 1.304469108581543, 1.6708379983901978, 2.0372068881988525, 2.403575897216797, 2.769944667816162, 3.1363134384155273, 3.5026824474334717, 3.869051456451416, 4.235420227050781, 4.6017889976501465, 4.968157768249512, 5.334527015686035, 5.7008957862854, 6.067264556884766, 6.433633804321289, 6.800002574920654, 7.1663713455200195, 7.532740116119385, 7.89910888671875, 8.265478134155273, 8.631847381591797, 8.998215675354004, 9.364584922790527, 9.730953216552734, 10.097322463989258]}, "gradients/encoder.encoder.layers.23.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 2.0, 3.0, 2.0, 1.0, 0.0, 2.0, 1.0, 2.0, 3.0, 4.0, 2.0, 5.0, 5.0, 4.0, 13.0, 22.0, 21.0, 23.0, 52.0, 87.0, 101.0, 119.0, 133.0, 100.0, 103.0, 69.0, 44.0, 24.0, 15.0, 12.0, 5.0, 5.0, 3.0, 2.0, 7.0, 6.0, 2.0, 0.0, 4.0, 4.0, 2.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-6.68892240524292, -6.488001823425293, -6.287081718444824, -6.086161136627197, -5.88524055480957, -5.684320449829102, -5.483399868011475, -5.282479286193848, -5.081559181213379, -4.880638599395752, -4.679718494415283, -4.478797912597656, -4.2778778076171875, -4.0769572257995605, -3.8760366439819336, -3.6751163005828857, -3.474195957183838, -3.27327561378479, -3.072355270385742, -2.8714346885681152, -2.6705143451690674, -2.4695940017700195, -2.2686734199523926, -2.0677530765533447, -1.8668327331542969, -1.665912389755249, -1.4649919271469116, -1.2640714645385742, -1.0631511211395264, -0.8622307777404785, -0.6613103151321411, -0.4603898525238037, -0.25946950912475586, -0.05854910612106323, 0.1423712968826294, 0.343291699886322, 0.5442121028900146, 0.7451324462890625, 0.9460529088973999, 1.1469733715057373, 1.3478937149047852, 1.548814058303833, 1.7497345209121704, 1.9506549835205078, 2.1515753269195557, 2.3524956703186035, 2.5534162521362305, 2.7543365955352783, 2.955256938934326, 3.156177282333374, 3.357097625732422, 3.558018207550049, 3.7589385509490967, 3.9598588943481445, 4.1607794761657715, 4.361700057983398, 4.562620162963867, 4.763540744781494, 4.964460849761963, 5.16538143157959, 5.366301536560059, 5.5672221183776855, 5.7681427001953125, 5.969062805175781, 6.169983386993408]}, "gradients/encoder.encoder.layers.22.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 4.0, 3.0, 5.0, 5.0, 7.0, 21.0, 34.0, 50.0, 282.0, 20983.0, 4167917.0, 4318.0, 443.0, 135.0, 40.0, 20.0, 14.0, 5.0, 4.0, 1.0, 2.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.380859375, -3.126739501953125, -2.87261962890625, -2.618499755859375, -2.3643798828125, -2.110260009765625, -1.85614013671875, -1.602020263671875, -1.347900390625, -1.093780517578125, -0.83966064453125, -0.585540771484375, -0.3314208984375, -0.077301025390625, 0.17681884765625, 0.430938720703125, 0.68505859375, 0.939178466796875, 1.19329833984375, 1.447418212890625, 1.7015380859375, 1.955657958984375, 2.20977783203125, 2.463897705078125, 2.718017578125, 2.972137451171875, 3.22625732421875, 3.480377197265625, 3.7344970703125, 3.988616943359375, 4.24273681640625, 4.496856689453125, 4.7509765625, 5.005096435546875, 5.25921630859375, 5.513336181640625, 5.7674560546875, 6.021575927734375, 6.27569580078125, 6.529815673828125, 6.783935546875, 7.038055419921875, 7.29217529296875, 7.546295166015625, 7.8004150390625, 8.054534912109375, 8.30865478515625, 8.562774658203125, 8.81689453125, 9.071014404296875, 9.32513427734375, 9.579254150390625, 9.8333740234375, 10.087493896484375, 10.34161376953125, 10.595733642578125, 10.849853515625, 11.103973388671875, 11.35809326171875, 11.612213134765625, 11.8663330078125, 12.120452880859375, 12.37457275390625, 12.628692626953125, 12.8828125]}, "gradients/encoder.encoder.layers.22.feed_forward.output_dense.bias": {"_type": "histogram", "values": [3.0, 1.0, 2.0, 0.0, 2.0, 4.0, 5.0, 5.0, 7.0, 3.0, 10.0, 5.0, 10.0, 10.0, 18.0, 34.0, 110.0, 241.0, 260.0, 142.0, 51.0, 25.0, 11.0, 13.0, 9.0, 2.0, 7.0, 7.0, 2.0, 4.0, 7.0, 5.0, 1.0, 0.0, 2.0, 2.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.04693603515625, -0.04436492919921875, -0.0417938232421875, -0.03922271728515625, -0.036651611328125, -0.03408050537109375, -0.0315093994140625, -0.02893829345703125, -0.0263671875, -0.02379608154296875, -0.0212249755859375, -0.01865386962890625, -0.016082763671875, -0.01351165771484375, -0.0109405517578125, -0.00836944580078125, -0.00579833984375, -0.00322723388671875, -0.0006561279296875, 0.00191497802734375, 0.004486083984375, 0.00705718994140625, 0.0096282958984375, 0.01219940185546875, 0.0147705078125, 0.01734161376953125, 0.0199127197265625, 0.02248382568359375, 0.025054931640625, 0.02762603759765625, 0.0301971435546875, 0.03276824951171875, 0.03533935546875, 0.03791046142578125, 0.0404815673828125, 0.04305267333984375, 0.045623779296875, 0.04819488525390625, 0.0507659912109375, 0.05333709716796875, 0.055908203125, 0.05847930908203125, 0.0610504150390625, 0.06362152099609375, 0.066192626953125, 0.06876373291015625, 0.0713348388671875, 0.07390594482421875, 0.07647705078125, 0.07904815673828125, 0.0816192626953125, 0.08419036865234375, 0.086761474609375, 0.08933258056640625, 0.0919036865234375, 0.09447479248046875, 0.0970458984375, 0.09961700439453125, 0.1021881103515625, 0.10475921630859375, 0.107330322265625, 0.10990142822265625, 0.1124725341796875, 0.11504364013671875, 0.11761474609375]}, "gradients/encoder.encoder.layers.22.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 2.0, 3.0, 4.0, 9.0, 15.0, 20.0, 22.0, 26.0, 49.0, 66.0, 123.0, 209.0, 328.0, 635.0, 1778.0, 5277.0, 24364.0, 272254.0, 3772188.0, 97290.0, 13881.0, 3552.0, 1210.0, 464.0, 247.0, 98.0, 83.0, 33.0, 24.0, 22.0, 4.0, 5.0, 2.0, 3.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-1.33984375, -1.3057785034179688, -1.2717132568359375, -1.2376480102539062, -1.203582763671875, -1.1695175170898438, -1.1354522705078125, -1.1013870239257812, -1.06732177734375, -1.0332565307617188, -0.9991912841796875, -0.9651260375976562, -0.931060791015625, -0.8969955444335938, -0.8629302978515625, -0.8288650512695312, -0.7947998046875, -0.7607345581054688, -0.7266693115234375, -0.6926040649414062, -0.658538818359375, -0.6244735717773438, -0.5904083251953125, -0.5563430786132812, -0.52227783203125, -0.48821258544921875, -0.4541473388671875, -0.42008209228515625, -0.386016845703125, -0.35195159912109375, -0.3178863525390625, -0.28382110595703125, -0.249755859375, -0.21569061279296875, -0.1816253662109375, -0.14756011962890625, -0.113494873046875, -0.07942962646484375, -0.0453643798828125, -0.01129913330078125, 0.02276611328125, 0.05683135986328125, 0.0908966064453125, 0.12496185302734375, 0.159027099609375, 0.19309234619140625, 0.2271575927734375, 0.26122283935546875, 0.2952880859375, 0.32935333251953125, 0.3634185791015625, 0.39748382568359375, 0.431549072265625, 0.46561431884765625, 0.4996795654296875, 0.5337448120117188, 0.56781005859375, 0.6018753051757812, 0.6359405517578125, 0.6700057983398438, 0.704071044921875, 0.7381362915039062, 0.7722015380859375, 0.8062667846679688, 0.84033203125]}, "gradients/encoder.encoder.layers.22.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 2.0, 1.0, 4.0, 3.0, 1.0, 5.0, 3.0, 12.0, 5.0, 8.0, 8.0, 11.0, 9.0, 19.0, 25.0, 29.0, 36.0, 59.0, 84.0, 129.0, 271.0, 787.0, 1447.0, 523.0, 203.0, 118.0, 63.0, 44.0, 42.0, 30.0, 13.0, 19.0, 11.0, 10.0, 6.0, 7.0, 4.0, 9.0, 5.0, 2.0, 5.0, 2.0, 1.0, 5.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 2.0, 0.0, 2.0, 3.0, 0.0, 0.0, 1.0], "bins": [-0.08026123046875, -0.0775299072265625, -0.074798583984375, -0.0720672607421875, -0.0693359375, -0.0666046142578125, -0.063873291015625, -0.0611419677734375, -0.05841064453125, -0.0556793212890625, -0.052947998046875, -0.0502166748046875, -0.0474853515625, -0.0447540283203125, -0.042022705078125, -0.0392913818359375, -0.03656005859375, -0.0338287353515625, -0.031097412109375, -0.0283660888671875, -0.025634765625, -0.0229034423828125, -0.020172119140625, -0.0174407958984375, -0.01470947265625, -0.0119781494140625, -0.009246826171875, -0.0065155029296875, -0.0037841796875, -0.0010528564453125, 0.001678466796875, 0.0044097900390625, 0.00714111328125, 0.0098724365234375, 0.012603759765625, 0.0153350830078125, 0.01806640625, 0.0207977294921875, 0.023529052734375, 0.0262603759765625, 0.02899169921875, 0.0317230224609375, 0.034454345703125, 0.0371856689453125, 0.0399169921875, 0.0426483154296875, 0.045379638671875, 0.0481109619140625, 0.05084228515625, 0.0535736083984375, 0.056304931640625, 0.0590362548828125, 0.061767578125, 0.0644989013671875, 0.067230224609375, 0.0699615478515625, 0.07269287109375, 0.0754241943359375, 0.078155517578125, 0.0808868408203125, 0.0836181640625, 0.0863494873046875, 0.089080810546875, 0.0918121337890625, 0.09454345703125]}, "gradients/encoder.encoder.layers.22.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 15.0, 85.0, 850.0, 56.0, 6.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-7.9730353355407715, -7.801554203033447, -7.630073070526123, -7.458591461181641, -7.287110328674316, -7.115629196166992, -6.944148063659668, -6.772666931152344, -6.6011857986450195, -6.429704666137695, -6.258223533630371, -6.086742401123047, -5.9152607917785645, -5.74377965927124, -5.572298526763916, -5.400817394256592, -5.229335784912109, -5.057854652404785, -4.886373519897461, -4.714892387390137, -4.543410778045654, -4.37192964553833, -4.200448513031006, -4.028967380523682, -3.8574862480163574, -3.686005115509033, -3.51452374458313, -3.3430426120758057, -3.1715614795684814, -3.000080108642578, -2.828598976135254, -2.6571178436279297, -2.4856367111206055, -2.3141555786132812, -2.142674207687378, -1.9711930751800537, -1.7997119426727295, -1.6282306909561157, -1.456749439239502, -1.2852683067321777, -1.113787055015564, -0.942305862903595, -0.770824670791626, -0.5993434190750122, -0.4278622269630432, -0.2563810348510742, -0.08489978313446045, 0.08658134937286377, 0.25806260108947754, 0.42954379320144653, 0.6010249853134155, 0.7725062370300293, 0.9439874291419983, 1.1154686212539673, 1.286949872970581, 1.4584310054779053, 1.629912257194519, 1.8013935089111328, 1.972874641418457, 2.1443557739257812, 2.3158371448516846, 2.487318277359009, 2.658799648284912, 2.8302807807922363, 3.0017619132995605]}, "gradients/encoder.encoder.layers.22.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 3.0, 0.0, 1.0, 3.0, 1.0, 0.0, 2.0, 8.0, 2.0, 12.0, 15.0, 19.0, 31.0, 47.0, 58.0, 87.0, 81.0, 97.0, 92.0, 101.0, 93.0, 79.0, 62.0, 42.0, 26.0, 14.0, 9.0, 8.0, 3.0, 7.0, 2.0, 5.0, 2.0, 2.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.7158904075622559, -0.6941738128662109, -0.6724572777748108, -0.6507406830787659, -0.629024088382721, -0.6073075532913208, -0.5855909585952759, -0.563874363899231, -0.542157769203186, -0.5204411745071411, -0.4987246096134186, -0.47700804471969604, -0.4552914500236511, -0.4335748851299286, -0.41185832023620605, -0.39014172554016113, -0.368425190448761, -0.34670862555503845, -0.32499203085899353, -0.303275465965271, -0.2815588712692261, -0.25984230637550354, -0.238125741481781, -0.21640916168689728, -0.19469258189201355, -0.17297600209712982, -0.1512594223022461, -0.12954285740852356, -0.10782627761363983, -0.0861096978187561, -0.06439313292503357, -0.04267655313014984, -0.020959973335266113, 0.0007566027343273163, 0.022473178803920746, 0.04418975114822388, 0.0659063309431076, 0.08762291073799133, 0.10933947563171387, 0.1310560554265976, 0.15277263522148132, 0.17448921501636505, 0.19620579481124878, 0.2179223597049713, 0.23963893949985504, 0.26135551929473877, 0.2830720841884613, 0.30478864908218384, 0.32650524377822876, 0.3482218086719513, 0.3699384033679962, 0.39165496826171875, 0.41337156295776367, 0.4350881278514862, 0.45680469274520874, 0.47852128744125366, 0.5002378225326538, 0.5219544172286987, 0.5436709523200989, 0.5653875470161438, 0.5871041417121887, 0.6088206768035889, 0.6305372714996338, 0.6522538661956787, 0.6739704608917236]}, "gradients/encoder.encoder.layers.22.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 4.0, 3.0, 3.0, 6.0, 11.0, 13.0, 19.0, 21.0, 41.0, 80.0, 113.0, 206.0, 417.0, 836.0, 1675.0, 3541.0, 7966.0, 20748.0, 112964.0, 754839.0, 109654.0, 20698.0, 7857.0, 3529.0, 1592.0, 816.0, 396.0, 214.0, 117.0, 74.0, 30.0, 33.0, 19.0, 6.0, 6.0, 5.0, 5.0, 3.0, 2.0, 2.0, 1.0, 2.0, 1.0, 1.0, 0.0, 1.0, 1.0], "bins": [-0.82470703125, -0.8021087646484375, -0.779510498046875, -0.7569122314453125, -0.73431396484375, -0.7117156982421875, -0.689117431640625, -0.6665191650390625, -0.6439208984375, -0.6213226318359375, -0.598724365234375, -0.5761260986328125, -0.55352783203125, -0.5309295654296875, -0.508331298828125, -0.4857330322265625, -0.463134765625, -0.4405364990234375, -0.417938232421875, -0.3953399658203125, -0.37274169921875, -0.3501434326171875, -0.327545166015625, -0.3049468994140625, -0.2823486328125, -0.2597503662109375, -0.237152099609375, -0.2145538330078125, -0.19195556640625, -0.1693572998046875, -0.146759033203125, -0.1241607666015625, -0.1015625, -0.0789642333984375, -0.056365966796875, -0.0337677001953125, -0.01116943359375, 0.0114288330078125, 0.034027099609375, 0.0566253662109375, 0.0792236328125, 0.1018218994140625, 0.124420166015625, 0.1470184326171875, 0.16961669921875, 0.1922149658203125, 0.214813232421875, 0.2374114990234375, 0.260009765625, 0.2826080322265625, 0.305206298828125, 0.3278045654296875, 0.35040283203125, 0.3730010986328125, 0.395599365234375, 0.4181976318359375, 0.4407958984375, 0.4633941650390625, 0.485992431640625, 0.5085906982421875, 0.53118896484375, 0.5537872314453125, 0.576385498046875, 0.5989837646484375, 0.62158203125]}, "gradients/encoder.encoder.layers.22.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 2.0, 1.0, 4.0, 1.0, 3.0, 4.0, 2.0, 6.0, 2.0, 8.0, 7.0, 5.0, 6.0, 11.0, 19.0, 36.0, 73.0, 94.0, 144.0, 195.0, 137.0, 93.0, 49.0, 31.0, 19.0, 11.0, 6.0, 7.0, 10.0, 2.0, 3.0, 4.0, 2.0, 2.0, 6.0, 1.0, 7.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.05706787109375, -0.05512809753417969, -0.053188323974609375, -0.05124855041503906, -0.04930877685546875, -0.04736900329589844, -0.045429229736328125, -0.04348945617675781, -0.0415496826171875, -0.03960990905761719, -0.037670135498046875, -0.03573036193847656, -0.03379058837890625, -0.03185081481933594, -0.029911041259765625, -0.027971267700195312, -0.026031494140625, -0.024091720581054688, -0.022151947021484375, -0.020212173461914062, -0.01827239990234375, -0.016332626342773438, -0.014392852783203125, -0.012453079223632812, -0.0105133056640625, -0.008573532104492188, -0.006633758544921875, -0.0046939849853515625, -0.00275421142578125, -0.0008144378662109375, 0.001125335693359375, 0.0030651092529296875, 0.0050048828125, 0.0069446563720703125, 0.008884429931640625, 0.010824203491210938, 0.01276397705078125, 0.014703750610351562, 0.016643524169921875, 0.018583297729492188, 0.0205230712890625, 0.022462844848632812, 0.024402618408203125, 0.026342391967773438, 0.02828216552734375, 0.030221939086914062, 0.032161712646484375, 0.03410148620605469, 0.036041259765625, 0.03798103332519531, 0.039920806884765625, 0.04186058044433594, 0.04380035400390625, 0.04574012756347656, 0.047679901123046875, 0.04961967468261719, 0.0515594482421875, 0.05349922180175781, 0.055438995361328125, 0.05737876892089844, 0.05931854248046875, 0.06125831604003906, 0.06319808959960938, 0.06513786315917969, 0.06707763671875]}, "gradients/encoder.encoder.layers.22.attention.v_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 3.0, 1.0, 0.0, 2.0, 6.0, 2.0, 2.0, 7.0, 5.0, 6.0, 7.0, 12.0, 16.0, 24.0, 41.0, 149.0, 565.0, 5532.0, 290855.0, 737439.0, 12529.0, 990.0, 206.0, 73.0, 24.0, 14.0, 5.0, 7.0, 7.0, 6.0, 3.0, 4.0, 6.0, 1.0, 1.0, 3.0, 1.0, 3.0, 0.0, 3.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0], "bins": [-1.34765625, -1.3057098388671875, -1.263763427734375, -1.2218170166015625, -1.17987060546875, -1.1379241943359375, -1.095977783203125, -1.0540313720703125, -1.0120849609375, -0.9701385498046875, -0.928192138671875, -0.8862457275390625, -0.84429931640625, -0.8023529052734375, -0.760406494140625, -0.7184600830078125, -0.676513671875, -0.6345672607421875, -0.592620849609375, -0.5506744384765625, -0.50872802734375, -0.4667816162109375, -0.424835205078125, -0.3828887939453125, -0.3409423828125, -0.2989959716796875, -0.257049560546875, -0.2151031494140625, -0.17315673828125, -0.1312103271484375, -0.089263916015625, -0.0473175048828125, -0.00537109375, 0.0365753173828125, 0.078521728515625, 0.1204681396484375, 0.16241455078125, 0.2043609619140625, 0.246307373046875, 0.2882537841796875, 0.3302001953125, 0.3721466064453125, 0.414093017578125, 0.4560394287109375, 0.49798583984375, 0.5399322509765625, 0.581878662109375, 0.6238250732421875, 0.665771484375, 0.7077178955078125, 0.749664306640625, 0.7916107177734375, 0.83355712890625, 0.8755035400390625, 0.917449951171875, 0.9593963623046875, 1.0013427734375, 1.0432891845703125, 1.085235595703125, 1.1271820068359375, 1.16912841796875, 1.2110748291015625, 1.253021240234375, 1.2949676513671875, 1.3369140625]}, "gradients/encoder.encoder.layers.22.attention.v_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 5.0, 1.0, 4.0, 5.0, 7.0, 3.0, 7.0, 2.0, 11.0, 9.0, 13.0, 18.0, 24.0, 23.0, 28.0, 27.0, 31.0, 23.0, 40.0, 43.0, 35.0, 44.0, 46.0, 38.0, 52.0, 40.0, 54.0, 58.0, 38.0, 35.0, 29.0, 29.0, 29.0, 22.0, 28.0, 20.0, 17.0, 25.0, 5.0, 11.0, 6.0, 8.0, 4.0, 1.0, 6.0, 2.0, 0.0, 4.0, 1.0, 2.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 2.0], "bins": [-0.21435546875, -0.2073955535888672, -0.20043563842773438, -0.19347572326660156, -0.18651580810546875, -0.17955589294433594, -0.17259597778320312, -0.1656360626220703, -0.1586761474609375, -0.1517162322998047, -0.14475631713867188, -0.13779640197753906, -0.13083648681640625, -0.12387657165527344, -0.11691665649414062, -0.10995674133300781, -0.102996826171875, -0.09603691101074219, -0.08907699584960938, -0.08211708068847656, -0.07515716552734375, -0.06819725036621094, -0.061237335205078125, -0.05427742004394531, -0.0473175048828125, -0.04035758972167969, -0.033397674560546875, -0.026437759399414062, -0.01947784423828125, -0.012517929077148438, -0.005558013916015625, 0.0014019012451171875, 0.00836181640625, 0.015321731567382812, 0.022281646728515625, 0.029241561889648438, 0.03620147705078125, 0.04316139221191406, 0.050121307373046875, 0.05708122253417969, 0.0640411376953125, 0.07100105285644531, 0.07796096801757812, 0.08492088317871094, 0.09188079833984375, 0.09884071350097656, 0.10580062866210938, 0.11276054382324219, 0.119720458984375, 0.1266803741455078, 0.13364028930664062, 0.14060020446777344, 0.14756011962890625, 0.15452003479003906, 0.16147994995117188, 0.1684398651123047, 0.1753997802734375, 0.1823596954345703, 0.18931961059570312, 0.19627952575683594, 0.20323944091796875, 0.21019935607910156, 0.21715927124023438, 0.2241191864013672, 0.2310791015625]}, "gradients/encoder.encoder.layers.22.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 2.0, 0.0, 3.0, 0.0, 1.0, 2.0, 1.0, 1.0, 1.0, 4.0, 3.0, 4.0, 12.0, 14.0, 40.0, 147.0, 913.0, 59245.0, 985615.0, 2190.0, 243.0, 64.0, 21.0, 10.0, 5.0, 5.0, 4.0, 4.0, 1.0, 6.0, 0.0, 2.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-3.298828125, -3.18572998046875, -3.0726318359375, -2.95953369140625, -2.846435546875, -2.73333740234375, -2.6202392578125, -2.50714111328125, -2.39404296875, -2.28094482421875, -2.1678466796875, -2.05474853515625, -1.941650390625, -1.82855224609375, -1.7154541015625, -1.60235595703125, -1.4892578125, -1.37615966796875, -1.2630615234375, -1.14996337890625, -1.036865234375, -0.92376708984375, -0.8106689453125, -0.69757080078125, -0.58447265625, -0.47137451171875, -0.3582763671875, -0.24517822265625, -0.132080078125, -0.01898193359375, 0.0941162109375, 0.20721435546875, 0.3203125, 0.43341064453125, 0.5465087890625, 0.65960693359375, 0.772705078125, 0.88580322265625, 0.9989013671875, 1.11199951171875, 1.22509765625, 1.33819580078125, 1.4512939453125, 1.56439208984375, 1.677490234375, 1.79058837890625, 1.9036865234375, 2.01678466796875, 2.1298828125, 2.24298095703125, 2.3560791015625, 2.46917724609375, 2.582275390625, 2.69537353515625, 2.8084716796875, 2.92156982421875, 3.03466796875, 3.14776611328125, 3.2608642578125, 3.37396240234375, 3.487060546875, 3.60015869140625, 3.7132568359375, 3.82635498046875, 3.939453125]}, "gradients/encoder.encoder.layers.22.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 3.0, 2.0, 7.0, 6.0, 9.0, 17.0, 27.0, 41.0, 87.0, 179.0, 243.0, 179.0, 100.0, 37.0, 27.0, 17.0, 7.0, 5.0, 4.0, 4.0, 5.0, 3.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0001590251922607422, -0.00015500467270612717, -0.00015098415315151215, -0.00014696363359689713, -0.0001429431140422821, -0.00013892259448766708, -0.00013490207493305206, -0.00013088155537843704, -0.00012686103582382202, -0.000122840516269207, -0.00011881999671459198, -0.00011479947715997696, -0.00011077895760536194, -0.00010675843805074692, -0.0001027379184961319, -9.871739894151688e-05, -9.469687938690186e-05, -9.067635983228683e-05, -8.665584027767181e-05, -8.263532072305679e-05, -7.861480116844177e-05, -7.459428161382675e-05, -7.057376205921173e-05, -6.655324250459671e-05, -6.253272294998169e-05, -5.851220339536667e-05, -5.449168384075165e-05, -5.047116428613663e-05, -4.6450644731521606e-05, -4.2430125176906586e-05, -3.8409605622291565e-05, -3.4389086067676544e-05, -3.0368566513061523e-05, -2.6348046958446503e-05, -2.2327527403831482e-05, -1.830700784921646e-05, -1.428648829460144e-05, -1.026596873998642e-05, -6.245449185371399e-06, -2.224929630756378e-06, 1.7955899238586426e-06, 5.816109478473663e-06, 9.836629033088684e-06, 1.3857148587703705e-05, 1.7877668142318726e-05, 2.1898187696933746e-05, 2.5918707251548767e-05, 2.9939226806163788e-05, 3.395974636077881e-05, 3.798026591539383e-05, 4.200078547000885e-05, 4.602130502462387e-05, 5.004182457923889e-05, 5.406234413385391e-05, 5.808286368846893e-05, 6.210338324308395e-05, 6.612390279769897e-05, 7.0144422352314e-05, 7.416494190692902e-05, 7.818546146154404e-05, 8.220598101615906e-05, 8.622650057077408e-05, 9.02470201253891e-05, 9.426753968000412e-05, 9.828805923461914e-05]}, "gradients/encoder.encoder.layers.22.attention.q_proj.weight": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 4.0, 6.0, 6.0, 12.0, 14.0, 26.0, 45.0, 43.0, 92.0, 150.0, 274.0, 539.0, 1174.0, 3070.0, 12045.0, 111530.0, 829849.0, 74278.0, 10219.0, 2799.0, 1131.0, 526.0, 298.0, 154.0, 94.0, 55.0, 40.0, 34.0, 24.0, 12.0, 7.0, 4.0, 4.0, 3.0, 0.0, 3.0, 0.0, 1.0, 2.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.619140625, -0.5915374755859375, -0.563934326171875, -0.5363311767578125, -0.50872802734375, -0.4811248779296875, -0.453521728515625, -0.4259185791015625, -0.3983154296875, -0.3707122802734375, -0.343109130859375, -0.3155059814453125, -0.28790283203125, -0.2602996826171875, -0.232696533203125, -0.2050933837890625, -0.177490234375, -0.1498870849609375, -0.122283935546875, -0.0946807861328125, -0.06707763671875, -0.0394744873046875, -0.011871337890625, 0.0157318115234375, 0.0433349609375, 0.0709381103515625, 0.098541259765625, 0.1261444091796875, 0.15374755859375, 0.1813507080078125, 0.208953857421875, 0.2365570068359375, 0.26416015625, 0.2917633056640625, 0.319366455078125, 0.3469696044921875, 0.37457275390625, 0.4021759033203125, 0.429779052734375, 0.4573822021484375, 0.4849853515625, 0.5125885009765625, 0.540191650390625, 0.5677947998046875, 0.59539794921875, 0.6230010986328125, 0.650604248046875, 0.6782073974609375, 0.705810546875, 0.7334136962890625, 0.761016845703125, 0.7886199951171875, 0.81622314453125, 0.8438262939453125, 0.871429443359375, 0.8990325927734375, 0.9266357421875, 0.9542388916015625, 0.981842041015625, 1.0094451904296875, 1.03704833984375, 1.0646514892578125, 1.092254638671875, 1.1198577880859375, 1.1474609375]}, "gradients/encoder.encoder.layers.22.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 4.0, 1.0, 4.0, 11.0, 16.0, 15.0, 26.0, 34.0, 76.0, 87.0, 119.0, 138.0, 128.0, 116.0, 89.0, 45.0, 37.0, 19.0, 13.0, 7.0, 10.0, 7.0, 1.0, 3.0, 1.0, 2.0, 5.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.257080078125, -0.24400711059570312, -0.23093414306640625, -0.21786117553710938, -0.2047882080078125, -0.19171524047851562, -0.17864227294921875, -0.16556930541992188, -0.152496337890625, -0.13942337036132812, -0.12635040283203125, -0.11327743530273438, -0.1002044677734375, -0.08713150024414062, -0.07405853271484375, -0.060985565185546875, -0.04791259765625, -0.034839630126953125, -0.02176666259765625, -0.008693695068359375, 0.0043792724609375, 0.017452239990234375, 0.03052520751953125, 0.043598175048828125, 0.056671142578125, 0.06974411010742188, 0.08281707763671875, 0.09589004516601562, 0.1089630126953125, 0.12203598022460938, 0.13510894775390625, 0.14818191528320312, 0.1612548828125, 0.17432785034179688, 0.18740081787109375, 0.20047378540039062, 0.2135467529296875, 0.22661972045898438, 0.23969268798828125, 0.2527656555175781, 0.265838623046875, 0.2789115905761719, 0.29198455810546875, 0.3050575256347656, 0.3181304931640625, 0.3312034606933594, 0.34427642822265625, 0.3573493957519531, 0.37042236328125, 0.3834953308105469, 0.39656829833984375, 0.4096412658691406, 0.4227142333984375, 0.4357872009277344, 0.44886016845703125, 0.4619331359863281, 0.475006103515625, 0.4880790710449219, 0.5011520385742188, 0.5142250061035156, 0.5272979736328125, 0.5403709411621094, 0.5534439086914062, 0.5665168762207031, 0.57958984375]}, "gradients/encoder.encoder.layers.22.layer_norm.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 3.0, 1.0, 1.0, 1.0, 3.0, 2.0, 3.0, 2.0, 5.0, 11.0, 39.0, 127.0, 248.0, 324.0, 152.0, 57.0, 16.0, 6.0, 3.0, 4.0, 1.0, 4.0, 3.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.7624757289886475, -2.618496894836426, -2.474518299102783, -2.3305397033691406, -2.186560869216919, -2.0425820350646973, -1.8986034393310547, -1.7546247243881226, -1.6106460094451904, -1.4666672945022583, -1.3226885795593262, -1.178709864616394, -1.034731149673462, -0.8907524347305298, -0.7467737197875977, -0.6027950048446655, -0.4588162899017334, -0.31483757495880127, -0.17085886001586914, -0.02688014507293701, 0.11709856986999512, 0.26107728481292725, 0.4050559997558594, 0.5490347146987915, 0.6930134296417236, 0.8369921445846558, 0.9809708595275879, 1.12494957447052, 1.2689282894134521, 1.4129070043563843, 1.5568857192993164, 1.7008644342422485, 1.8448429107666016, 1.9888216257095337, 2.132800340652466, 2.2767791748046875, 2.42075777053833, 2.5647363662719727, 2.7087152004241943, 2.852694034576416, 2.9966726303100586, 3.140651226043701, 3.284630060195923, 3.4286088943481445, 3.572587490081787, 3.7165660858154297, 3.8605449199676514, 4.004523754119873, 4.148502349853516, 4.292480945587158, 4.436459541320801, 4.580438613891602, 4.724417209625244, 4.868395805358887, 5.0123748779296875, 5.15635347366333, 5.300332069396973, 5.444310665130615, 5.588289260864258, 5.732268333435059, 5.876246929168701, 6.020225524902344, 6.1642045974731445, 6.308183193206787, 6.45216178894043]}, "gradients/encoder.encoder.layers.22.layer_norm.bias": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 4.0, 0.0, 2.0, 5.0, 3.0, 6.0, 7.0, 6.0, 12.0, 14.0, 30.0, 29.0, 37.0, 56.0, 64.0, 81.0, 86.0, 92.0, 95.0, 84.0, 83.0, 47.0, 50.0, 40.0, 21.0, 12.0, 13.0, 9.0, 4.0, 1.0, 8.0, 2.0, 2.0, 1.0, 1.0, 3.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-2.517001152038574, -2.426503896713257, -2.3360066413879395, -2.245509386062622, -2.1550121307373047, -2.0645151138305664, -1.974017858505249, -1.8835206031799316, -1.7930233478546143, -1.7025260925292969, -1.6120288372039795, -1.5215317010879517, -1.4310344457626343, -1.340537190437317, -1.250040054321289, -1.1595427989959717, -1.0690455436706543, -0.9785482883453369, -0.8880510926246643, -0.7975538969039917, -0.7070566415786743, -0.6165593862533569, -0.5260621905326843, -0.4355649948120117, -0.34506773948669434, -0.25457051396369934, -0.16407328844070435, -0.07357606291770935, 0.016921162605285645, 0.10741838812828064, 0.19791561365127563, 0.28841280937194824, 0.3789100646972656, 0.4694072902202606, 0.5599045157432556, 0.6504017114639282, 0.7408989667892456, 0.831396222114563, 0.9218934178352356, 1.0123906135559082, 1.1028878688812256, 1.193385124206543, 1.2838823795318604, 1.3743795156478882, 1.4648767709732056, 1.555374026298523, 1.6458711624145508, 1.7363684177398682, 1.8268656730651855, 1.917362928390503, 2.0078601837158203, 2.0983574390411377, 2.188854694366455, 2.2793517112731934, 2.3698489665985107, 2.460346221923828, 2.5508434772491455, 2.641340732574463, 2.7318379878997803, 2.8223352432250977, 2.912832260131836, 3.0033295154571533, 3.0938267707824707, 3.184324026107788, 3.2748212814331055]}, "gradients/encoder.encoder.layers.21.feed_forward.output_dense.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 2.0, 4.0, 2.0, 4.0, 4.0, 6.0, 2.0, 2.0, 3.0, 7.0, 11.0, 12.0, 11.0, 45.0, 33.0, 57.0, 68.0, 107.0, 155.0, 215.0, 398.0, 663.0, 1255.0, 2942.0, 8802.0, 40576.0, 3441914.0, 657838.0, 27898.0, 6472.0, 2348.0, 1066.0, 538.0, 293.0, 182.0, 125.0, 75.0, 43.0, 30.0, 20.0, 13.0, 7.0, 6.0, 10.0, 4.0, 9.0, 2.0, 5.0, 0.0, 4.0, 3.0, 2.0, 0.0, 2.0, 0.0, 0.0, 5.0], "bins": [-1.400390625, -1.3590850830078125, -1.317779541015625, -1.2764739990234375, -1.23516845703125, -1.1938629150390625, -1.152557373046875, -1.1112518310546875, -1.0699462890625, -1.0286407470703125, -0.987335205078125, -0.9460296630859375, -0.90472412109375, -0.8634185791015625, -0.822113037109375, -0.7808074951171875, -0.739501953125, -0.6981964111328125, -0.656890869140625, -0.6155853271484375, -0.57427978515625, -0.5329742431640625, -0.491668701171875, -0.4503631591796875, -0.4090576171875, -0.3677520751953125, -0.326446533203125, -0.2851409912109375, -0.24383544921875, -0.2025299072265625, -0.161224365234375, -0.1199188232421875, -0.07861328125, -0.0373077392578125, 0.003997802734375, 0.0453033447265625, 0.08660888671875, 0.1279144287109375, 0.169219970703125, 0.2105255126953125, 0.2518310546875, 0.2931365966796875, 0.334442138671875, 0.3757476806640625, 0.41705322265625, 0.4583587646484375, 0.499664306640625, 0.5409698486328125, 0.582275390625, 0.6235809326171875, 0.664886474609375, 0.7061920166015625, 0.74749755859375, 0.7888031005859375, 0.830108642578125, 0.8714141845703125, 0.9127197265625, 0.9540252685546875, 0.995330810546875, 1.0366363525390625, 1.07794189453125, 1.1192474365234375, 1.160552978515625, 1.2018585205078125, 1.2431640625]}, "gradients/encoder.encoder.layers.21.feed_forward.output_dense.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 2.0, 2.0, 0.0, 1.0, 2.0, 1.0, 0.0, 5.0, 2.0, 9.0, 4.0, 4.0, 7.0, 3.0, 14.0, 21.0, 33.0, 33.0, 40.0, 57.0, 67.0, 72.0, 112.0, 112.0, 89.0, 76.0, 69.0, 42.0, 34.0, 17.0, 18.0, 9.0, 10.0, 11.0, 8.0, 4.0, 1.0, 1.0, 1.0, 4.0, 2.0, 2.0, 3.0, 1.0, 2.0, 2.0, 1.0, 2.0, 1.0, 1.0, 1.0], "bins": [-0.050140380859375, -0.048745155334472656, -0.04734992980957031, -0.04595470428466797, -0.044559478759765625, -0.04316425323486328, -0.04176902770996094, -0.040373802185058594, -0.03897857666015625, -0.037583351135253906, -0.03618812561035156, -0.03479290008544922, -0.033397674560546875, -0.03200244903564453, -0.030607223510742188, -0.029211997985839844, -0.0278167724609375, -0.026421546936035156, -0.025026321411132812, -0.02363109588623047, -0.022235870361328125, -0.02084064483642578, -0.019445419311523438, -0.018050193786621094, -0.01665496826171875, -0.015259742736816406, -0.013864517211914062, -0.012469291687011719, -0.011074066162109375, -0.009678840637207031, -0.008283615112304688, -0.006888389587402344, -0.0054931640625, -0.004097938537597656, -0.0027027130126953125, -0.0013074874877929688, 8.7738037109375e-05, 0.0014829635620117188, 0.0028781890869140625, 0.004273414611816406, 0.00566864013671875, 0.007063865661621094, 0.008459091186523438, 0.009854316711425781, 0.011249542236328125, 0.012644767761230469, 0.014039993286132812, 0.015435218811035156, 0.0168304443359375, 0.018225669860839844, 0.019620895385742188, 0.02101612091064453, 0.022411346435546875, 0.02380657196044922, 0.025201797485351562, 0.026597023010253906, 0.02799224853515625, 0.029387474060058594, 0.030782699584960938, 0.03217792510986328, 0.033573150634765625, 0.03496837615966797, 0.03636360168457031, 0.037758827209472656, 0.039154052734375]}, "gradients/encoder.encoder.layers.21.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 6.0, 2.0, 23.0, 33.0, 108.0, 603.0, 35956.0, 4151394.0, 5754.0, 304.0, 71.0, 28.0, 9.0, 2.0, 2.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-2.115234375, -1.993194580078125, -1.87115478515625, -1.749114990234375, -1.6270751953125, -1.505035400390625, -1.38299560546875, -1.260955810546875, -1.138916015625, -1.016876220703125, -0.89483642578125, -0.772796630859375, -0.6507568359375, -0.528717041015625, -0.40667724609375, -0.284637451171875, -0.16259765625, -0.040557861328125, 0.08148193359375, 0.203521728515625, 0.3255615234375, 0.447601318359375, 0.56964111328125, 0.691680908203125, 0.813720703125, 0.935760498046875, 1.05780029296875, 1.179840087890625, 1.3018798828125, 1.423919677734375, 1.54595947265625, 1.667999267578125, 1.7900390625, 1.912078857421875, 2.03411865234375, 2.156158447265625, 2.2781982421875, 2.400238037109375, 2.52227783203125, 2.644317626953125, 2.766357421875, 2.888397216796875, 3.01043701171875, 3.132476806640625, 3.2545166015625, 3.376556396484375, 3.49859619140625, 3.620635986328125, 3.74267578125, 3.864715576171875, 3.98675537109375, 4.108795166015625, 4.2308349609375, 4.352874755859375, 4.47491455078125, 4.596954345703125, 4.718994140625, 4.841033935546875, 4.96307373046875, 5.085113525390625, 5.2071533203125, 5.329193115234375, 5.45123291015625, 5.573272705078125, 5.6953125]}, "gradients/encoder.encoder.layers.21.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 3.0, 5.0, 4.0, 4.0, 7.0, 8.0, 7.0, 4.0, 12.0, 13.0, 26.0, 29.0, 30.0, 62.0, 84.0, 119.0, 226.0, 477.0, 1287.0, 803.0, 326.0, 188.0, 118.0, 76.0, 34.0, 40.0, 25.0, 17.0, 15.0, 4.0, 4.0, 2.0, 5.0, 1.0, 5.0, 4.0, 0.0, 0.0, 3.0, 0.0, 2.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.08917236328125, -0.08596038818359375, -0.0827484130859375, -0.07953643798828125, -0.076324462890625, -0.07311248779296875, -0.0699005126953125, -0.06668853759765625, -0.0634765625, -0.06026458740234375, -0.0570526123046875, -0.05384063720703125, -0.050628662109375, -0.04741668701171875, -0.0442047119140625, -0.04099273681640625, -0.03778076171875, -0.03456878662109375, -0.0313568115234375, -0.02814483642578125, -0.024932861328125, -0.02172088623046875, -0.0185089111328125, -0.01529693603515625, -0.0120849609375, -0.00887298583984375, -0.0056610107421875, -0.00244903564453125, 0.000762939453125, 0.00397491455078125, 0.0071868896484375, 0.01039886474609375, 0.01361083984375, 0.01682281494140625, 0.0200347900390625, 0.02324676513671875, 0.026458740234375, 0.02967071533203125, 0.0328826904296875, 0.03609466552734375, 0.039306640625, 0.04251861572265625, 0.0457305908203125, 0.04894256591796875, 0.052154541015625, 0.05536651611328125, 0.0585784912109375, 0.06179046630859375, 0.06500244140625, 0.06821441650390625, 0.0714263916015625, 0.07463836669921875, 0.077850341796875, 0.08106231689453125, 0.0842742919921875, 0.08748626708984375, 0.0906982421875, 0.09391021728515625, 0.0971221923828125, 0.10033416748046875, 0.103546142578125, 0.10675811767578125, 0.1099700927734375, 0.11318206787109375, 0.11639404296875]}, "gradients/encoder.encoder.layers.21.final_layer_norm.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 1.0, 4.0, 17.0, 797.0, 195.0, 4.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.9089664220809937, -1.66550612449646, -1.4220457077026367, -1.1785852909088135, -0.9351249933242798, -0.6916645765304565, -0.44820427894592285, -0.20474398136138916, 0.03871643543243408, 0.28217679262161255, 0.525637149810791, 0.7690975069999695, 1.012557864189148, 1.2560182809829712, 1.4994785785675049, 1.7429388761520386, 1.9863992929458618, 2.2298595905303955, 2.4733200073242188, 2.716780424118042, 2.9602408409118652, 3.2037010192871094, 3.4471614360809326, 3.690621852874756, 3.93408203125, 4.177542209625244, 4.4210028648376465, 4.664463043212891, 4.907923698425293, 5.151383876800537, 5.394844055175781, 5.638304710388184, 5.881765365600586, 6.12522554397583, 6.368686199188232, 6.612146377563477, 6.855607032775879, 7.099067211151123, 7.342527389526367, 7.5859880447387695, 7.829448223114014, 8.072908401489258, 8.31636905670166, 8.559829711914062, 8.803289413452148, 9.04675006866455, 9.290210723876953, 9.533670425415039, 9.777131080627441, 10.020591735839844, 10.26405143737793, 10.507512092590332, 10.750972747802734, 10.99443244934082, 11.237893104553223, 11.481353759765625, 11.724813461303711, 11.968274116516113, 12.2117338180542, 12.455194473266602, 12.698655128479004, 12.942115783691406, 13.185575485229492, 13.429036140441895, 13.672496795654297]}, "gradients/encoder.encoder.layers.21.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 3.0, 7.0, 6.0, 8.0, 20.0, 36.0, 56.0, 98.0, 117.0, 129.0, 152.0, 116.0, 116.0, 70.0, 36.0, 19.0, 11.0, 7.0, 8.0, 3.0, 0.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.4736161231994629, -0.4380244314670563, -0.40243270993232727, -0.36684101819992065, -0.33124929666519165, -0.29565760493278503, -0.2600659132003784, -0.2244742065668106, -0.1888824999332428, -0.153290793299675, -0.11769909411668777, -0.08210739493370056, -0.04651568830013275, -0.010923981666564941, 0.024667710065841675, 0.060259416699409485, 0.0958511233329773, 0.1314428299665451, 0.16703453660011292, 0.20262622833251953, 0.23821793496608734, 0.27380964159965515, 0.30940133333206177, 0.34499305486679077, 0.3805847465991974, 0.416176438331604, 0.451768159866333, 0.4873598515987396, 0.5229515433311462, 0.5585432648658752, 0.5941349267959595, 0.6297266483306885, 0.6653183698654175, 0.7009100914001465, 0.7365017533302307, 0.7720934748649597, 0.8076851963996887, 0.843276858329773, 0.878868579864502, 0.914460301399231, 0.95005202293396, 0.985643744468689, 1.021235466003418, 1.0568270683288574, 1.0924187898635864, 1.1280105113983154, 1.1636022329330444, 1.1991939544677734, 1.234785556793213, 1.270377278327942, 1.305968999862671, 1.3415606021881104, 1.3771523237228394, 1.4127440452575684, 1.4483357667922974, 1.4839274883270264, 1.5195192098617554, 1.5551109313964844, 1.5907026529312134, 1.6262943744659424, 1.6618859767913818, 1.6974776983261108, 1.7330694198608398, 1.7686611413955688, 1.8042528629302979]}, "gradients/encoder.encoder.layers.21.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 3.0, 1.0, 2.0, 2.0, 2.0, 2.0, 6.0, 4.0, 4.0, 3.0, 9.0, 7.0, 10.0, 12.0, 21.0, 22.0, 21.0, 33.0, 30.0, 25.0, 36.0, 36.0, 38.0, 58.0, 249.0, 6277.0, 975793.0, 64667.0, 738.0, 81.0, 53.0, 46.0, 39.0, 27.0, 42.0, 27.0, 30.0, 20.0, 19.0, 11.0, 10.0, 12.0, 7.0, 6.0, 7.0, 3.0, 5.0, 2.0, 5.0, 2.0, 2.0, 3.0, 0.0, 0.0, 1.0, 2.0], "bins": [-2.365234375, -2.297088623046875, -2.22894287109375, -2.160797119140625, -2.0926513671875, -2.024505615234375, -1.95635986328125, -1.888214111328125, -1.820068359375, -1.751922607421875, -1.68377685546875, -1.615631103515625, -1.5474853515625, -1.479339599609375, -1.41119384765625, -1.343048095703125, -1.27490234375, -1.206756591796875, -1.13861083984375, -1.070465087890625, -1.0023193359375, -0.934173583984375, -0.86602783203125, -0.797882080078125, -0.729736328125, -0.661590576171875, -0.59344482421875, -0.525299072265625, -0.4571533203125, -0.389007568359375, -0.32086181640625, -0.252716064453125, -0.1845703125, -0.116424560546875, -0.04827880859375, 0.019866943359375, 0.0880126953125, 0.156158447265625, 0.22430419921875, 0.292449951171875, 0.360595703125, 0.428741455078125, 0.49688720703125, 0.565032958984375, 0.6331787109375, 0.701324462890625, 0.76947021484375, 0.837615966796875, 0.90576171875, 0.973907470703125, 1.04205322265625, 1.110198974609375, 1.1783447265625, 1.246490478515625, 1.31463623046875, 1.382781982421875, 1.450927734375, 1.519073486328125, 1.58721923828125, 1.655364990234375, 1.7235107421875, 1.791656494140625, 1.85980224609375, 1.927947998046875, 1.99609375]}, "gradients/encoder.encoder.layers.21.attention.out_proj.bias": {"_type": "histogram", "values": [4.0, 3.0, 11.0, 65.0, 233.0, 395.0, 241.0, 52.0, 14.0, 5.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.041717529296875, -0.03418397903442383, -0.026650428771972656, -0.019116878509521484, -0.011583328247070312, -0.004049777984619141, 0.0034837722778320312, 0.011017322540283203, 0.018550872802734375, 0.026084423065185547, 0.03361797332763672, 0.04115152359008789, 0.04868507385253906, 0.056218624114990234, 0.0637521743774414, 0.07128572463989258, 0.07881927490234375, 0.08635282516479492, 0.0938863754272461, 0.10141992568969727, 0.10895347595214844, 0.11648702621459961, 0.12402057647705078, 0.13155412673950195, 0.13908767700195312, 0.1466212272644043, 0.15415477752685547, 0.16168832778930664, 0.1692218780517578, 0.17675542831420898, 0.18428897857666016, 0.19182252883911133, 0.1993560791015625, 0.20688962936401367, 0.21442317962646484, 0.22195672988891602, 0.2294902801513672, 0.23702383041381836, 0.24455738067626953, 0.2520909309387207, 0.2596244812011719, 0.26715803146362305, 0.2746915817260742, 0.2822251319885254, 0.28975868225097656, 0.29729223251342773, 0.3048257827758789, 0.3123593330383301, 0.31989288330078125, 0.3274264335632324, 0.3349599838256836, 0.34249353408813477, 0.35002708435058594, 0.3575606346130371, 0.3650941848754883, 0.37262773513793945, 0.3801612854003906, 0.3876948356628418, 0.39522838592529297, 0.40276193618774414, 0.4102954864501953, 0.4178290367126465, 0.42536258697509766, 0.43289613723754883, 0.4404296875]}, "gradients/encoder.encoder.layers.21.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 2.0, 3.0, 6.0, 2.0, 8.0, 7.0, 13.0, 12.0, 19.0, 21.0, 45.0, 54.0, 70.0, 105.0, 197.0, 333.0, 706.0, 1776.0, 6284.0, 30119.0, 215130.0, 614218.0, 149105.0, 22351.0, 4973.0, 1529.0, 628.0, 289.0, 187.0, 114.0, 82.0, 39.0, 34.0, 32.0, 9.0, 16.0, 9.0, 7.0, 7.0, 6.0, 5.0, 3.0, 7.0, 1.0, 2.0, 1.0, 2.0, 0.0, 1.0], "bins": [-0.77880859375, -0.757415771484375, -0.73602294921875, -0.714630126953125, -0.6932373046875, -0.671844482421875, -0.65045166015625, -0.629058837890625, -0.607666015625, -0.586273193359375, -0.56488037109375, -0.543487548828125, -0.5220947265625, -0.500701904296875, -0.47930908203125, -0.457916259765625, -0.4365234375, -0.415130615234375, -0.39373779296875, -0.372344970703125, -0.3509521484375, -0.329559326171875, -0.30816650390625, -0.286773681640625, -0.265380859375, -0.243988037109375, -0.22259521484375, -0.201202392578125, -0.1798095703125, -0.158416748046875, -0.13702392578125, -0.115631103515625, -0.09423828125, -0.072845458984375, -0.05145263671875, -0.030059814453125, -0.0086669921875, 0.012725830078125, 0.03411865234375, 0.055511474609375, 0.076904296875, 0.098297119140625, 0.11968994140625, 0.141082763671875, 0.1624755859375, 0.183868408203125, 0.20526123046875, 0.226654052734375, 0.248046875, 0.269439697265625, 0.29083251953125, 0.312225341796875, 0.3336181640625, 0.355010986328125, 0.37640380859375, 0.397796630859375, 0.419189453125, 0.440582275390625, 0.46197509765625, 0.483367919921875, 0.5047607421875, 0.526153564453125, 0.54754638671875, 0.568939208984375, 0.59033203125]}, "gradients/encoder.encoder.layers.21.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 6.0, 2.0, 4.0, 3.0, 6.0, 5.0, 10.0, 10.0, 14.0, 6.0, 10.0, 21.0, 28.0, 28.0, 29.0, 40.0, 34.0, 32.0, 53.0, 58.0, 48.0, 55.0, 64.0, 62.0, 49.0, 44.0, 43.0, 40.0, 35.0, 28.0, 19.0, 27.0, 21.0, 10.0, 16.0, 11.0, 12.0, 9.0, 5.0, 3.0, 4.0, 4.0, 4.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.3740234375, -0.3628807067871094, -0.35173797607421875, -0.3405952453613281, -0.3294525146484375, -0.3183097839355469, -0.30716705322265625, -0.2960243225097656, -0.284881591796875, -0.2737388610839844, -0.26259613037109375, -0.2514533996582031, -0.2403106689453125, -0.22916793823242188, -0.21802520751953125, -0.20688247680664062, -0.19573974609375, -0.18459701538085938, -0.17345428466796875, -0.16231155395507812, -0.1511688232421875, -0.14002609252929688, -0.12888336181640625, -0.11774063110351562, -0.106597900390625, -0.09545516967773438, -0.08431243896484375, -0.07316970825195312, -0.0620269775390625, -0.050884246826171875, -0.03974151611328125, -0.028598785400390625, -0.0174560546875, -0.006313323974609375, 0.00482940673828125, 0.015972137451171875, 0.0271148681640625, 0.038257598876953125, 0.04940032958984375, 0.060543060302734375, 0.071685791015625, 0.08282852172851562, 0.09397125244140625, 0.10511398315429688, 0.1162567138671875, 0.12739944458007812, 0.13854217529296875, 0.14968490600585938, 0.16082763671875, 0.17197036743164062, 0.18311309814453125, 0.19425582885742188, 0.2053985595703125, 0.21654129028320312, 0.22768402099609375, 0.23882675170898438, 0.249969482421875, 0.2611122131347656, 0.27225494384765625, 0.2833976745605469, 0.2945404052734375, 0.3056831359863281, 0.31682586669921875, 0.3279685974121094, 0.339111328125]}, "gradients/encoder.encoder.layers.21.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 4.0, 1.0, 7.0, 2.0, 6.0, 6.0, 7.0, 11.0, 32.0, 48.0, 73.0, 111.0, 183.0, 300.0, 654.0, 1550.0, 4727.0, 22359.0, 189351.0, 677973.0, 127990.0, 16622.0, 3910.0, 1369.0, 563.0, 275.0, 156.0, 76.0, 56.0, 49.0, 28.0, 18.0, 5.0, 15.0, 8.0, 7.0, 4.0, 5.0, 3.0, 1.0, 3.0, 0.0, 2.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.496337890625, -0.4794654846191406, -0.46259307861328125, -0.4457206726074219, -0.4288482666015625, -0.4119758605957031, -0.39510345458984375, -0.3782310485839844, -0.361358642578125, -0.3444862365722656, -0.32761383056640625, -0.3107414245605469, -0.2938690185546875, -0.2769966125488281, -0.26012420654296875, -0.24325180053710938, -0.22637939453125, -0.20950698852539062, -0.19263458251953125, -0.17576217651367188, -0.1588897705078125, -0.14201736450195312, -0.12514495849609375, -0.10827255249023438, -0.091400146484375, -0.07452774047851562, -0.05765533447265625, -0.040782928466796875, -0.0239105224609375, -0.007038116455078125, 0.00983428955078125, 0.026706695556640625, 0.0435791015625, 0.060451507568359375, 0.07732391357421875, 0.09419631958007812, 0.1110687255859375, 0.12794113159179688, 0.14481353759765625, 0.16168594360351562, 0.178558349609375, 0.19543075561523438, 0.21230316162109375, 0.22917556762695312, 0.2460479736328125, 0.2629203796386719, 0.27979278564453125, 0.2966651916503906, 0.31353759765625, 0.3304100036621094, 0.34728240966796875, 0.3641548156738281, 0.3810272216796875, 0.3978996276855469, 0.41477203369140625, 0.4316444396972656, 0.448516845703125, 0.4653892517089844, 0.48226165771484375, 0.4991340637207031, 0.5160064697265625, 0.5328788757324219, 0.5497512817382812, 0.5666236877441406, 0.58349609375]}, "gradients/encoder.encoder.layers.21.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 2.0, 2.0, 0.0, 2.0, 3.0, 2.0, 6.0, 8.0, 13.0, 28.0, 40.0, 46.0, 63.0, 72.0, 109.0, 108.0, 115.0, 91.0, 94.0, 65.0, 33.0, 35.0, 23.0, 16.0, 14.0, 6.0, 5.0, 7.0, 2.0, 3.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-7.009506225585938e-05, -6.834324449300766e-05, -6.659142673015594e-05, -6.483960896730423e-05, -6.308779120445251e-05, -6.13359734416008e-05, -5.9584155678749084e-05, -5.783233791589737e-05, -5.6080520153045654e-05, -5.432870239019394e-05, -5.2576884627342224e-05, -5.082506686449051e-05, -4.9073249101638794e-05, -4.732143133878708e-05, -4.5569613575935364e-05, -4.381779581308365e-05, -4.2065978050231934e-05, -4.031416028738022e-05, -3.8562342524528503e-05, -3.681052476167679e-05, -3.505870699882507e-05, -3.330688923597336e-05, -3.155507147312164e-05, -2.9803253710269928e-05, -2.8051435947418213e-05, -2.6299618184566498e-05, -2.4547800421714783e-05, -2.2795982658863068e-05, -2.1044164896011353e-05, -1.9292347133159637e-05, -1.7540529370307922e-05, -1.5788711607456207e-05, -1.4036893844604492e-05, -1.2285076081752777e-05, -1.0533258318901062e-05, -8.781440556049347e-06, -7.029622793197632e-06, -5.277805030345917e-06, -3.5259872674942017e-06, -1.7741695046424866e-06, -2.2351741790771484e-08, 1.7294660210609436e-06, 3.4812837839126587e-06, 5.233101546764374e-06, 6.984919309616089e-06, 8.736737072467804e-06, 1.0488554835319519e-05, 1.2240372598171234e-05, 1.399219036102295e-05, 1.5744008123874664e-05, 1.749582588672638e-05, 1.9247643649578094e-05, 2.099946141242981e-05, 2.2751279175281525e-05, 2.450309693813324e-05, 2.6254914700984955e-05, 2.800673246383667e-05, 2.9758550226688385e-05, 3.15103679895401e-05, 3.3262185752391815e-05, 3.501400351524353e-05, 3.6765821278095245e-05, 3.851763904094696e-05, 4.0269456803798676e-05, 4.202127456665039e-05]}, "gradients/encoder.encoder.layers.21.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 2.0, 2.0, 6.0, 8.0, 20.0, 20.0, 30.0, 61.0, 178.0, 410.0, 1217.0, 6162.0, 171451.0, 839457.0, 26074.0, 2359.0, 670.0, 209.0, 108.0, 52.0, 31.0, 10.0, 8.0, 7.0, 3.0, 6.0, 2.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.1455078125, -1.113922119140625, -1.08233642578125, -1.050750732421875, -1.0191650390625, -0.987579345703125, -0.95599365234375, -0.924407958984375, -0.892822265625, -0.861236572265625, -0.82965087890625, -0.798065185546875, -0.7664794921875, -0.734893798828125, -0.70330810546875, -0.671722412109375, -0.64013671875, -0.608551025390625, -0.57696533203125, -0.545379638671875, -0.5137939453125, -0.482208251953125, -0.45062255859375, -0.419036865234375, -0.387451171875, -0.355865478515625, -0.32427978515625, -0.292694091796875, -0.2611083984375, -0.229522705078125, -0.19793701171875, -0.166351318359375, -0.134765625, -0.103179931640625, -0.07159423828125, -0.040008544921875, -0.0084228515625, 0.023162841796875, 0.05474853515625, 0.086334228515625, 0.117919921875, 0.149505615234375, 0.18109130859375, 0.212677001953125, 0.2442626953125, 0.275848388671875, 0.30743408203125, 0.339019775390625, 0.37060546875, 0.402191162109375, 0.43377685546875, 0.465362548828125, 0.4969482421875, 0.528533935546875, 0.56011962890625, 0.591705322265625, 0.623291015625, 0.654876708984375, 0.68646240234375, 0.718048095703125, 0.7496337890625, 0.781219482421875, 0.81280517578125, 0.844390869140625, 0.8759765625]}, "gradients/encoder.encoder.layers.21.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 3.0, 3.0, 6.0, 5.0, 5.0, 12.0, 23.0, 26.0, 40.0, 69.0, 97.0, 140.0, 144.0, 152.0, 98.0, 76.0, 41.0, 30.0, 12.0, 11.0, 11.0, 7.0, 1.0, 0.0, 1.0, 0.0, 3.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.468505859375, -0.4566841125488281, -0.44486236572265625, -0.4330406188964844, -0.4212188720703125, -0.4093971252441406, -0.39757537841796875, -0.3857536315917969, -0.373931884765625, -0.3621101379394531, -0.35028839111328125, -0.3384666442871094, -0.3266448974609375, -0.3148231506347656, -0.30300140380859375, -0.2911796569824219, -0.27935791015625, -0.2675361633300781, -0.25571441650390625, -0.24389266967773438, -0.2320709228515625, -0.22024917602539062, -0.20842742919921875, -0.19660568237304688, -0.184783935546875, -0.17296218872070312, -0.16114044189453125, -0.14931869506835938, -0.1374969482421875, -0.12567520141601562, -0.11385345458984375, -0.10203170776367188, -0.0902099609375, -0.07838821411132812, -0.06656646728515625, -0.054744720458984375, -0.0429229736328125, -0.031101226806640625, -0.01927947998046875, -0.007457733154296875, 0.004364013671875, 0.016185760498046875, 0.02800750732421875, 0.039829254150390625, 0.0516510009765625, 0.06347274780273438, 0.07529449462890625, 0.08711624145507812, 0.09893798828125, 0.11075973510742188, 0.12258148193359375, 0.13440322875976562, 0.1462249755859375, 0.15804672241210938, 0.16986846923828125, 0.18169021606445312, 0.193511962890625, 0.20533370971679688, 0.21715545654296875, 0.22897720336914062, 0.2407989501953125, 0.2526206970214844, 0.26444244384765625, 0.2762641906738281, 0.2880859375]}, "gradients/encoder.encoder.layers.21.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 1.0, 3.0, 2.0, 8.0, 5.0, 13.0, 44.0, 478.0, 453.0, 10.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-7.3227763175964355, -6.718204498291016, -6.1136322021484375, -5.509060382843018, -4.904488563537598, -4.299916744232178, -3.6953446865081787, -3.0907726287841797, -2.4862008094787598, -1.8816288709640503, -1.2770569324493408, -0.6724849939346313, -0.06791305541992188, 0.536658763885498, 1.141230821609497, 1.745802879333496, 2.350374698638916, 2.954946517944336, 3.559518575668335, 4.164090633392334, 4.768662452697754, 5.373234272003174, 5.977806091308594, 6.582378387451172, 7.186950206756592, 7.791522026062012, 8.39609432220459, 9.000665664672852, 9.60523796081543, 10.209810256958008, 10.81438159942627, 11.418953895568848, 12.02352523803711, 12.628097534179688, 13.23266887664795, 13.837241172790527, 14.441812515258789, 15.046384811401367, 15.650957107543945, 16.255529403686523, 16.86009979248047, 17.464672088623047, 18.069244384765625, 18.67381477355957, 19.27838706970215, 19.882959365844727, 20.487531661987305, 21.092103958129883, 21.69667625427246, 22.30124855041504, 22.905820846557617, 23.510391235351562, 24.11496353149414, 24.71953582763672, 25.324108123779297, 25.928680419921875, 26.533252716064453, 27.13782501220703, 27.74239730834961, 28.346967697143555, 28.951539993286133, 29.55611228942871, 30.16068458557129, 30.765256881713867, 31.369827270507812]}, "gradients/encoder.encoder.layers.21.layer_norm.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 3.0, 0.0, 1.0, 2.0, 2.0, 1.0, 3.0, 6.0, 7.0, 7.0, 5.0, 11.0, 22.0, 19.0, 30.0, 23.0, 30.0, 32.0, 46.0, 36.0, 42.0, 58.0, 53.0, 62.0, 54.0, 53.0, 55.0, 55.0, 57.0, 33.0, 31.0, 41.0, 22.0, 22.0, 23.0, 16.0, 9.0, 10.0, 7.0, 4.0, 8.0, 8.0, 2.0, 1.0, 3.0, 3.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-2.47566556930542, -2.390188217163086, -2.304711103439331, -2.219233751296997, -2.133756637573242, -2.048279285430908, -1.9628020524978638, -1.8773248195648193, -1.791847586631775, -1.7063703536987305, -1.620893120765686, -1.5354158878326416, -1.4499385356903076, -1.3644614219665527, -1.2789840698242188, -1.1935068368911743, -1.1080296039581299, -1.0225523710250854, -0.937075138092041, -0.8515978455543518, -0.7661206126213074, -0.6806433796882629, -0.5951660871505737, -0.5096888542175293, -0.42421162128448486, -0.33873438835144043, -0.2532571256160736, -0.1677798628807068, -0.08230262994766235, 0.00317460298538208, 0.08865189552307129, 0.17412912845611572, 0.25960612297058105, 0.3450833559036255, 0.4305606186389923, 0.5160378813743591, 0.6015151143074036, 0.686992347240448, 0.7724696397781372, 0.8579468727111816, 0.9434241056442261, 1.0289013385772705, 1.114378571510315, 1.1998558044433594, 1.2853331565856934, 1.3708102703094482, 1.4562876224517822, 1.5417648553848267, 1.627242088317871, 1.7127193212509155, 1.79819655418396, 1.883673906326294, 1.9691510200500488, 2.054628372192383, 2.140105724334717, 2.2255828380584717, 2.3110599517822266, 2.3965373039245605, 2.4820144176483154, 2.5674917697906494, 2.6529688835144043, 2.7384462356567383, 2.8239235877990723, 2.909400701522827, 2.994878053665161]}, "gradients/encoder.encoder.layers.20.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 1.0, 3.0, 3.0, 4.0, 18.0, 44.0, 128.0, 2340.0, 4188016.0, 2887.0, 470.0, 184.0, 91.0, 43.0, 31.0, 17.0, 6.0, 5.0, 6.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.08203125, -2.75799560546875, -2.4339599609375, -2.10992431640625, -1.785888671875, -1.46185302734375, -1.1378173828125, -0.81378173828125, -0.48974609375, -0.16571044921875, 0.1583251953125, 0.48236083984375, 0.806396484375, 1.13043212890625, 1.4544677734375, 1.77850341796875, 2.1025390625, 2.42657470703125, 2.7506103515625, 3.07464599609375, 3.398681640625, 3.72271728515625, 4.0467529296875, 4.37078857421875, 4.69482421875, 5.01885986328125, 5.3428955078125, 5.66693115234375, 5.990966796875, 6.31500244140625, 6.6390380859375, 6.96307373046875, 7.287109375, 7.61114501953125, 7.9351806640625, 8.25921630859375, 8.583251953125, 8.90728759765625, 9.2313232421875, 9.55535888671875, 9.87939453125, 10.20343017578125, 10.5274658203125, 10.85150146484375, 11.175537109375, 11.49957275390625, 11.8236083984375, 12.14764404296875, 12.4716796875, 12.79571533203125, 13.1197509765625, 13.44378662109375, 13.767822265625, 14.09185791015625, 14.4158935546875, 14.73992919921875, 15.06396484375, 15.38800048828125, 15.7120361328125, 16.03607177734375, 16.360107421875, 16.68414306640625, 17.0081787109375, 17.33221435546875, 17.65625]}, "gradients/encoder.encoder.layers.20.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 1.0, 4.0, 4.0, 6.0, 22.0, 54.0, 131.0, 247.0, 293.0, 151.0, 70.0, 28.0, 8.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.08056640625, -0.07173919677734375, -0.0629119873046875, -0.05408477783203125, -0.045257568359375, -0.03643035888671875, -0.0276031494140625, -0.01877593994140625, -0.00994873046875, -0.00112152099609375, 0.0077056884765625, 0.01653289794921875, 0.025360107421875, 0.03418731689453125, 0.0430145263671875, 0.05184173583984375, 0.0606689453125, 0.06949615478515625, 0.0783233642578125, 0.08715057373046875, 0.095977783203125, 0.10480499267578125, 0.1136322021484375, 0.12245941162109375, 0.13128662109375, 0.14011383056640625, 0.1489410400390625, 0.15776824951171875, 0.166595458984375, 0.17542266845703125, 0.1842498779296875, 0.19307708740234375, 0.201904296875, 0.21073150634765625, 0.2195587158203125, 0.22838592529296875, 0.237213134765625, 0.24604034423828125, 0.2548675537109375, 0.26369476318359375, 0.27252197265625, 0.28134918212890625, 0.2901763916015625, 0.29900360107421875, 0.307830810546875, 0.31665802001953125, 0.3254852294921875, 0.33431243896484375, 0.3431396484375, 0.35196685791015625, 0.3607940673828125, 0.36962127685546875, 0.378448486328125, 0.38727569580078125, 0.3961029052734375, 0.40493011474609375, 0.41375732421875, 0.42258453369140625, 0.4314117431640625, 0.44023895263671875, 0.449066162109375, 0.45789337158203125, 0.4667205810546875, 0.47554779052734375, 0.484375]}, "gradients/encoder.encoder.layers.20.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 17.0, 44.0, 92.0, 142.0, 308.0, 20444.0, 4172240.0, 421.0, 236.0, 166.0, 102.0, 56.0, 20.0, 3.0, 2.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-14.3828125, -13.7864990234375, -13.190185546875, -12.5938720703125, -11.99755859375, -11.4012451171875, -10.804931640625, -10.2086181640625, -9.6123046875, -9.0159912109375, -8.419677734375, -7.8233642578125, -7.22705078125, -6.6307373046875, -6.034423828125, -5.4381103515625, -4.841796875, -4.2454833984375, -3.649169921875, -3.0528564453125, -2.45654296875, -1.8602294921875, -1.263916015625, -0.6676025390625, -0.0712890625, 0.5250244140625, 1.121337890625, 1.7176513671875, 2.31396484375, 2.9102783203125, 3.506591796875, 4.1029052734375, 4.69921875, 5.2955322265625, 5.891845703125, 6.4881591796875, 7.08447265625, 7.6807861328125, 8.277099609375, 8.8734130859375, 9.4697265625, 10.0660400390625, 10.662353515625, 11.2586669921875, 11.85498046875, 12.4512939453125, 13.047607421875, 13.6439208984375, 14.240234375, 14.8365478515625, 15.432861328125, 16.0291748046875, 16.62548828125, 17.2218017578125, 17.818115234375, 18.4144287109375, 19.0107421875, 19.6070556640625, 20.203369140625, 20.7996826171875, 21.39599609375, 21.9923095703125, 22.588623046875, 23.1849365234375, 23.78125]}, "gradients/encoder.encoder.layers.20.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 2.0, 9.0, 18.0, 75.0, 3051.0, 913.0, 19.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.6337890625, -0.5805206298828125, -0.527252197265625, -0.4739837646484375, -0.42071533203125, -0.3674468994140625, -0.314178466796875, -0.2609100341796875, -0.2076416015625, -0.1543731689453125, -0.101104736328125, -0.0478363037109375, 0.00543212890625, 0.0587005615234375, 0.111968994140625, 0.1652374267578125, 0.218505859375, 0.2717742919921875, 0.325042724609375, 0.3783111572265625, 0.43157958984375, 0.4848480224609375, 0.538116455078125, 0.5913848876953125, 0.6446533203125, 0.6979217529296875, 0.751190185546875, 0.8044586181640625, 0.85772705078125, 0.9109954833984375, 0.964263916015625, 1.0175323486328125, 1.07080078125, 1.1240692138671875, 1.177337646484375, 1.2306060791015625, 1.28387451171875, 1.3371429443359375, 1.390411376953125, 1.4436798095703125, 1.4969482421875, 1.5502166748046875, 1.603485107421875, 1.6567535400390625, 1.71002197265625, 1.7632904052734375, 1.816558837890625, 1.8698272705078125, 1.923095703125, 1.9763641357421875, 2.029632568359375, 2.0829010009765625, 2.13616943359375, 2.1894378662109375, 2.242706298828125, 2.2959747314453125, 2.3492431640625, 2.4025115966796875, 2.455780029296875, 2.5090484619140625, 2.56231689453125, 2.6155853271484375, 2.668853759765625, 2.7221221923828125, 2.775390625]}, "gradients/encoder.encoder.layers.20.final_layer_norm.weight": {"_type": "histogram", "values": [3.0, 0.0, 0.0, 1.0, 1.0, 9.0, 75.0, 846.0, 74.0, 8.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.254074573516846, -5.424280166625977, -4.594485282897949, -3.76469087600708, -2.934896469116211, -2.105102062225342, -1.2753071784973145, -0.4455127716064453, 0.38428163528442383, 1.2140761613845825, 2.043870687484741, 2.8736653327941895, 3.7034597396850586, 4.533254146575928, 5.363049030303955, 6.192843437194824, 7.022637844085693, 7.8524322509765625, 8.68222713470459, 9.512022018432617, 10.341815948486328, 11.171609878540039, 12.001405715942383, 12.831199645996094, 13.660993576049805, 14.490788459777832, 15.320582389831543, 16.15037727355957, 16.98017120361328, 17.809967041015625, 18.639760971069336, 19.469554901123047, 20.29935073852539, 21.1291446685791, 21.958940505981445, 22.788734436035156, 23.618528366088867, 24.448322296142578, 25.278118133544922, 26.107912063598633, 26.937705993652344, 27.767499923706055, 28.5972957611084, 29.42708969116211, 30.25688362121582, 31.08667755126953, 31.916473388671875, 32.74626922607422, 33.57606506347656, 34.405860900878906, 35.235652923583984, 36.06544876098633, 36.89524459838867, 37.72503662109375, 38.554832458496094, 39.38462829589844, 40.214420318603516, 41.04421615600586, 41.87400817871094, 42.70380401611328, 43.533599853515625, 44.3633918762207, 45.19318771362305, 46.02298355102539, 46.85277557373047]}, "gradients/encoder.encoder.layers.20.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 5.0, 14.0, 17.0, 43.0, 78.0, 102.0, 146.0, 155.0, 157.0, 120.0, 78.0, 40.0, 26.0, 15.0, 5.0, 4.0, 1.0, 1.0, 2.0, 3.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-9.333023071289062, -9.049577713012695, -8.766131401062012, -8.482686042785645, -8.199240684509277, -7.915794849395752, -7.632349014282227, -7.348903656005859, -7.065457820892334, -6.782011985778809, -6.498566627502441, -6.215120792388916, -5.931674957275391, -5.648229598999023, -5.364783763885498, -5.081337928771973, -4.7978925704956055, -4.51444673538208, -4.231001377105713, -3.9475555419921875, -3.664109945297241, -3.380664348602295, -3.0972185134887695, -2.8137729167938232, -2.530327320098877, -2.2468817234039307, -1.9634360074996948, -1.679990291595459, -1.3965446949005127, -1.1130990982055664, -0.8296533823013306, -0.5462076663970947, -0.26276111602783203, 0.020684540271759033, 0.3041301965713501, 0.5875758528709412, 0.8710215091705322, 1.1544671058654785, 1.4379128217697144, 1.7213585376739502, 2.0048041343688965, 2.2882497310638428, 2.571695327758789, 2.8551411628723145, 3.1385867595672607, 3.422032356262207, 3.7054781913757324, 3.9889237880706787, 4.272369384765625, 4.55581521987915, 4.839260578155518, 5.122706413269043, 5.40615177154541, 5.6895976066589355, 5.973043441772461, 6.256488800048828, 6.5399346351623535, 6.823380470275879, 7.106825828552246, 7.3902716636657715, 7.673717498779297, 7.957162857055664, 8.240608215332031, 8.524054527282715, 8.807499885559082]}, "gradients/encoder.encoder.layers.20.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 2.0, 1.0, 5.0, 1.0, 1.0, 6.0, 5.0, 16.0, 14.0, 12.0, 19.0, 20.0, 27.0, 47.0, 61.0, 106.0, 198.0, 360.0, 801.0, 2297.0, 13698.0, 320257.0, 682960.0, 22454.0, 3287.0, 954.0, 413.0, 182.0, 122.0, 80.0, 46.0, 34.0, 24.0, 18.0, 11.0, 6.0, 3.0, 4.0, 5.0, 2.0, 5.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0], "bins": [-3.8828125, -3.7752685546875, -3.667724609375, -3.5601806640625, -3.45263671875, -3.3450927734375, -3.237548828125, -3.1300048828125, -3.0224609375, -2.9149169921875, -2.807373046875, -2.6998291015625, -2.59228515625, -2.4847412109375, -2.377197265625, -2.2696533203125, -2.162109375, -2.0545654296875, -1.947021484375, -1.8394775390625, -1.73193359375, -1.6243896484375, -1.516845703125, -1.4093017578125, -1.3017578125, -1.1942138671875, -1.086669921875, -0.9791259765625, -0.87158203125, -0.7640380859375, -0.656494140625, -0.5489501953125, -0.44140625, -0.3338623046875, -0.226318359375, -0.1187744140625, -0.01123046875, 0.0963134765625, 0.203857421875, 0.3114013671875, 0.4189453125, 0.5264892578125, 0.634033203125, 0.7415771484375, 0.84912109375, 0.9566650390625, 1.064208984375, 1.1717529296875, 1.279296875, 1.3868408203125, 1.494384765625, 1.6019287109375, 1.70947265625, 1.8170166015625, 1.924560546875, 2.0321044921875, 2.1396484375, 2.2471923828125, 2.354736328125, 2.4622802734375, 2.56982421875, 2.6773681640625, 2.784912109375, 2.8924560546875, 3.0]}, "gradients/encoder.encoder.layers.20.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 3.0, 3.0, 5.0, 9.0, 26.0, 55.0, 98.0, 148.0, 186.0, 183.0, 134.0, 86.0, 42.0, 18.0, 10.0, 1.0, 1.0, 3.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.34033203125, -0.32405853271484375, -0.3077850341796875, -0.29151153564453125, -0.275238037109375, -0.25896453857421875, -0.2426910400390625, -0.22641754150390625, -0.21014404296875, -0.19387054443359375, -0.1775970458984375, -0.16132354736328125, -0.145050048828125, -0.12877655029296875, -0.1125030517578125, -0.09622955322265625, -0.0799560546875, -0.06368255615234375, -0.0474090576171875, -0.03113555908203125, -0.014862060546875, 0.00141143798828125, 0.0176849365234375, 0.03395843505859375, 0.05023193359375, 0.06650543212890625, 0.0827789306640625, 0.09905242919921875, 0.115325927734375, 0.13159942626953125, 0.1478729248046875, 0.16414642333984375, 0.180419921875, 0.19669342041015625, 0.2129669189453125, 0.22924041748046875, 0.245513916015625, 0.26178741455078125, 0.2780609130859375, 0.29433441162109375, 0.31060791015625, 0.32688140869140625, 0.3431549072265625, 0.35942840576171875, 0.375701904296875, 0.39197540283203125, 0.4082489013671875, 0.42452239990234375, 0.4407958984375, 0.45706939697265625, 0.4733428955078125, 0.48961639404296875, 0.505889892578125, 0.5221633911132812, 0.5384368896484375, 0.5547103881835938, 0.57098388671875, 0.5872573852539062, 0.6035308837890625, 0.6198043823242188, 0.636077880859375, 0.6523513793945312, 0.6686248779296875, 0.6848983764648438, 0.701171875]}, "gradients/encoder.encoder.layers.20.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 2.0, 2.0, 1.0, 2.0, 6.0, 7.0, 6.0, 15.0, 14.0, 16.0, 21.0, 29.0, 56.0, 71.0, 150.0, 249.0, 559.0, 1647.0, 7676.0, 80867.0, 857330.0, 88620.0, 8054.0, 1903.0, 584.0, 282.0, 142.0, 77.0, 47.0, 28.0, 32.0, 18.0, 16.0, 5.0, 12.0, 6.0, 8.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 0.0, 1.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 2.0], "bins": [-1.900390625, -1.840087890625, -1.77978515625, -1.719482421875, -1.6591796875, -1.598876953125, -1.53857421875, -1.478271484375, -1.41796875, -1.357666015625, -1.29736328125, -1.237060546875, -1.1767578125, -1.116455078125, -1.05615234375, -0.995849609375, -0.935546875, -0.875244140625, -0.81494140625, -0.754638671875, -0.6943359375, -0.634033203125, -0.57373046875, -0.513427734375, -0.453125, -0.392822265625, -0.33251953125, -0.272216796875, -0.2119140625, -0.151611328125, -0.09130859375, -0.031005859375, 0.029296875, 0.089599609375, 0.14990234375, 0.210205078125, 0.2705078125, 0.330810546875, 0.39111328125, 0.451416015625, 0.51171875, 0.572021484375, 0.63232421875, 0.692626953125, 0.7529296875, 0.813232421875, 0.87353515625, 0.933837890625, 0.994140625, 1.054443359375, 1.11474609375, 1.175048828125, 1.2353515625, 1.295654296875, 1.35595703125, 1.416259765625, 1.4765625, 1.536865234375, 1.59716796875, 1.657470703125, 1.7177734375, 1.778076171875, 1.83837890625, 1.898681640625, 1.958984375]}, "gradients/encoder.encoder.layers.20.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 2.0, 3.0, 4.0, 2.0, 2.0, 4.0, 7.0, 9.0, 14.0, 5.0, 20.0, 18.0, 31.0, 22.0, 41.0, 43.0, 42.0, 47.0, 52.0, 61.0, 80.0, 62.0, 68.0, 61.0, 52.0, 52.0, 35.0, 33.0, 32.0, 23.0, 17.0, 16.0, 14.0, 9.0, 9.0, 4.0, 6.0, 3.0, 2.0, 0.0, 1.0, 2.0, 3.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 3.0], "bins": [-0.90869140625, -0.8810882568359375, -0.853485107421875, -0.8258819580078125, -0.79827880859375, -0.7706756591796875, -0.743072509765625, -0.7154693603515625, -0.6878662109375, -0.6602630615234375, -0.632659912109375, -0.6050567626953125, -0.57745361328125, -0.5498504638671875, -0.522247314453125, -0.4946441650390625, -0.467041015625, -0.4394378662109375, -0.411834716796875, -0.3842315673828125, -0.35662841796875, -0.3290252685546875, -0.301422119140625, -0.2738189697265625, -0.2462158203125, -0.2186126708984375, -0.191009521484375, -0.1634063720703125, -0.13580322265625, -0.1082000732421875, -0.080596923828125, -0.0529937744140625, -0.025390625, 0.0022125244140625, 0.029815673828125, 0.0574188232421875, 0.08502197265625, 0.1126251220703125, 0.140228271484375, 0.1678314208984375, 0.1954345703125, 0.2230377197265625, 0.250640869140625, 0.2782440185546875, 0.30584716796875, 0.3334503173828125, 0.361053466796875, 0.3886566162109375, 0.416259765625, 0.4438629150390625, 0.471466064453125, 0.4990692138671875, 0.52667236328125, 0.5542755126953125, 0.581878662109375, 0.6094818115234375, 0.6370849609375, 0.6646881103515625, 0.692291259765625, 0.7198944091796875, 0.74749755859375, 0.7751007080078125, 0.802703857421875, 0.8303070068359375, 0.85791015625]}, "gradients/encoder.encoder.layers.20.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 2.0, 1.0, 3.0, 2.0, 6.0, 13.0, 25.0, 28.0, 89.0, 138.0, 445.0, 3335.0, 992827.0, 49929.0, 1225.0, 236.0, 106.0, 69.0, 39.0, 22.0, 14.0, 3.0, 0.0, 7.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.775390625, -2.642059326171875, -2.50872802734375, -2.375396728515625, -2.2420654296875, -2.108734130859375, -1.97540283203125, -1.842071533203125, -1.708740234375, -1.575408935546875, -1.44207763671875, -1.308746337890625, -1.1754150390625, -1.042083740234375, -0.90875244140625, -0.775421142578125, -0.64208984375, -0.508758544921875, -0.37542724609375, -0.242095947265625, -0.1087646484375, 0.024566650390625, 0.15789794921875, 0.291229248046875, 0.424560546875, 0.557891845703125, 0.69122314453125, 0.824554443359375, 0.9578857421875, 1.091217041015625, 1.22454833984375, 1.357879638671875, 1.4912109375, 1.624542236328125, 1.75787353515625, 1.891204833984375, 2.0245361328125, 2.157867431640625, 2.29119873046875, 2.424530029296875, 2.557861328125, 2.691192626953125, 2.82452392578125, 2.957855224609375, 3.0911865234375, 3.224517822265625, 3.35784912109375, 3.491180419921875, 3.62451171875, 3.757843017578125, 3.89117431640625, 4.024505615234375, 4.1578369140625, 4.291168212890625, 4.42449951171875, 4.557830810546875, 4.691162109375, 4.824493408203125, 4.95782470703125, 5.091156005859375, 5.2244873046875, 5.357818603515625, 5.49114990234375, 5.624481201171875, 5.7578125]}, "gradients/encoder.encoder.layers.20.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 3.0, 1.0, 1.0, 5.0, 2.0, 4.0, 10.0, 7.0, 12.0, 16.0, 22.0, 40.0, 75.0, 85.0, 124.0, 130.0, 132.0, 95.0, 84.0, 45.0, 43.0, 28.0, 14.0, 10.0, 2.0, 5.0, 4.0, 2.0, 3.0, 3.0, 1.0, 1.0, 1.0, 2.0, 0.0, 1.0, 1.0], "bins": [-0.00011724233627319336, -0.0001145070418715477, -0.00011177174746990204, -0.00010903645306825638, -0.00010630115866661072, -0.00010356586426496506, -0.0001008305698633194, -9.809527546167374e-05, -9.535998106002808e-05, -9.262468665838242e-05, -8.988939225673676e-05, -8.71540978550911e-05, -8.441880345344543e-05, -8.168350905179977e-05, -7.894821465015411e-05, -7.621292024850845e-05, -7.347762584686279e-05, -7.074233144521713e-05, -6.800703704357147e-05, -6.527174264192581e-05, -6.253644824028015e-05, -5.980115383863449e-05, -5.706585943698883e-05, -5.433056503534317e-05, -5.159527063369751e-05, -4.885997623205185e-05, -4.612468183040619e-05, -4.338938742876053e-05, -4.065409302711487e-05, -3.791879862546921e-05, -3.518350422382355e-05, -3.244820982217789e-05, -2.9712915420532227e-05, -2.6977621018886566e-05, -2.4242326617240906e-05, -2.1507032215595245e-05, -1.8771737813949585e-05, -1.6036443412303925e-05, -1.3301149010658264e-05, -1.0565854609012604e-05, -7.830560207366943e-06, -5.095265805721283e-06, -2.3599714040756226e-06, 3.7532299757003784e-07, 3.1106173992156982e-06, 5.845911800861359e-06, 8.581206202507019e-06, 1.131650060415268e-05, 1.405179500579834e-05, 1.6787089407444e-05, 1.952238380908966e-05, 2.225767821073532e-05, 2.499297261238098e-05, 2.7728267014026642e-05, 3.0463561415672302e-05, 3.319885581731796e-05, 3.593415021896362e-05, 3.8669444620609283e-05, 4.1404739022254944e-05, 4.4140033423900604e-05, 4.6875327825546265e-05, 4.9610622227191925e-05, 5.2345916628837585e-05, 5.5081211030483246e-05, 5.7816505432128906e-05]}, "gradients/encoder.encoder.layers.20.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 2.0, 0.0, 1.0, 1.0, 0.0, 3.0, 5.0, 9.0, 9.0, 9.0, 14.0, 27.0, 43.0, 73.0, 114.0, 181.0, 474.0, 1652.0, 10954.0, 880248.0, 147482.0, 5402.0, 1035.0, 375.0, 168.0, 96.0, 60.0, 30.0, 29.0, 22.0, 16.0, 12.0, 6.0, 2.0, 2.0, 2.0, 1.0, 1.0, 1.0, 3.0, 2.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.931640625, -1.868988037109375, -1.80633544921875, -1.743682861328125, -1.6810302734375, -1.618377685546875, -1.55572509765625, -1.493072509765625, -1.430419921875, -1.367767333984375, -1.30511474609375, -1.242462158203125, -1.1798095703125, -1.117156982421875, -1.05450439453125, -0.991851806640625, -0.92919921875, -0.866546630859375, -0.80389404296875, -0.741241455078125, -0.6785888671875, -0.615936279296875, -0.55328369140625, -0.490631103515625, -0.427978515625, -0.365325927734375, -0.30267333984375, -0.240020751953125, -0.1773681640625, -0.114715576171875, -0.05206298828125, 0.010589599609375, 0.0732421875, 0.135894775390625, 0.19854736328125, 0.261199951171875, 0.3238525390625, 0.386505126953125, 0.44915771484375, 0.511810302734375, 0.574462890625, 0.637115478515625, 0.69976806640625, 0.762420654296875, 0.8250732421875, 0.887725830078125, 0.95037841796875, 1.013031005859375, 1.07568359375, 1.138336181640625, 1.20098876953125, 1.263641357421875, 1.3262939453125, 1.388946533203125, 1.45159912109375, 1.514251708984375, 1.576904296875, 1.639556884765625, 1.70220947265625, 1.764862060546875, 1.8275146484375, 1.890167236328125, 1.95281982421875, 2.015472412109375, 2.078125]}, "gradients/encoder.encoder.layers.20.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 4.0, 2.0, 5.0, 6.0, 4.0, 12.0, 15.0, 32.0, 124.0, 248.0, 307.0, 124.0, 59.0, 21.0, 14.0, 10.0, 1.0, 4.0, 1.0, 7.0, 5.0, 1.0, 0.0, 0.0, 2.0, 2.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.80126953125, -0.7706222534179688, -0.7399749755859375, -0.7093276977539062, -0.678680419921875, -0.6480331420898438, -0.6173858642578125, -0.5867385864257812, -0.55609130859375, -0.5254440307617188, -0.4947967529296875, -0.46414947509765625, -0.433502197265625, -0.40285491943359375, -0.3722076416015625, -0.34156036376953125, -0.3109130859375, -0.28026580810546875, -0.2496185302734375, -0.21897125244140625, -0.188323974609375, -0.15767669677734375, -0.1270294189453125, -0.09638214111328125, -0.06573486328125, -0.03508758544921875, -0.0044403076171875, 0.02620697021484375, 0.056854248046875, 0.08750152587890625, 0.1181488037109375, 0.14879608154296875, 0.179443359375, 0.21009063720703125, 0.2407379150390625, 0.27138519287109375, 0.302032470703125, 0.33267974853515625, 0.3633270263671875, 0.39397430419921875, 0.42462158203125, 0.45526885986328125, 0.4859161376953125, 0.5165634155273438, 0.547210693359375, 0.5778579711914062, 0.6085052490234375, 0.6391525268554688, 0.6697998046875, 0.7004470825195312, 0.7310943603515625, 0.7617416381835938, 0.792388916015625, 0.8230361938476562, 0.8536834716796875, 0.8843307495117188, 0.91497802734375, 0.9456253051757812, 0.9762725830078125, 1.0069198608398438, 1.037567138671875, 1.0682144165039062, 1.0988616943359375, 1.1295089721679688, 1.16015625]}, "gradients/encoder.encoder.layers.20.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 4.0, 13.0, 215.0, 742.0, 33.0, 5.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-39.22661590576172, -38.082000732421875, -36.93738555908203, -35.79277038574219, -34.648155212402344, -33.5035400390625, -32.358924865722656, -31.214305877685547, -30.069690704345703, -28.92507553100586, -27.780460357666016, -26.635845184326172, -25.491228103637695, -24.34661293029785, -23.201997756958008, -22.05738067626953, -20.91276741027832, -19.768152236938477, -18.623537063598633, -17.478919982910156, -16.334304809570312, -15.189689636230469, -14.045074462890625, -12.900458335876465, -11.755843162536621, -10.611227989196777, -9.466611862182617, -8.321996688842773, -7.1773810386657715, -6.0327653884887695, -4.888150215148926, -3.7435340881347656, -2.598918914794922, -1.4543033838272095, -0.30968785285949707, 0.8349275588989258, 1.9795432090759277, 3.1241588592529297, 4.268774032592773, 5.413390159606934, 6.558005332946777, 7.702620983123779, 8.847236633300781, 9.991851806640625, 11.136466979980469, 12.281083106994629, 13.425698280334473, 14.570314407348633, 15.714929580688477, 16.85954475402832, 18.004159927368164, 19.14877700805664, 20.293392181396484, 21.438007354736328, 22.582622528076172, 23.727237701416016, 24.87185287475586, 26.016468048095703, 27.161083221435547, 28.30569839477539, 29.450315475463867, 30.59493064880371, 31.739545822143555, 32.88416290283203, 34.028778076171875]}, "gradients/encoder.encoder.layers.20.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 2.0, 3.0, 5.0, 5.0, 6.0, 6.0, 5.0, 8.0, 9.0, 15.0, 18.0, 19.0, 21.0, 22.0, 26.0, 36.0, 34.0, 49.0, 51.0, 50.0, 54.0, 53.0, 50.0, 51.0, 51.0, 50.0, 39.0, 49.0, 38.0, 30.0, 34.0, 11.0, 24.0, 21.0, 13.0, 16.0, 6.0, 9.0, 9.0, 5.0, 6.0, 2.0, 1.0, 2.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 2.0], "bins": [-5.533768177032471, -5.3812642097473145, -5.228760242462158, -5.076256275177002, -4.923752307891846, -4.7712483406066895, -4.618744373321533, -4.466239929199219, -4.3137359619140625, -4.161231994628906, -4.00872802734375, -3.8562240600585938, -3.7037200927734375, -3.5512161254882812, -3.398711919784546, -3.2462079524993896, -3.0937042236328125, -2.9412002563476562, -2.7886962890625, -2.6361923217773438, -2.4836883544921875, -2.3311843872070312, -2.178680181503296, -2.0261762142181396, -1.8736722469329834, -1.7211682796478271, -1.568664312362671, -1.416160225868225, -1.2636562585830688, -1.1111522912979126, -0.9586482644081116, -0.8061442375183105, -0.6536407470703125, -0.5011367797851562, -0.3486327528953552, -0.1961287558078766, -0.04362475872039795, 0.1088792085647583, 0.2613832354545593, 0.41388726234436035, 0.5663912296295166, 0.7188951969146729, 0.8713992238044739, 1.023903250694275, 1.1764072179794312, 1.3289111852645874, 1.4814152717590332, 1.6339192390441895, 1.7864232063293457, 1.938927173614502, 2.091431140899658, 2.2439351081848145, 2.3964390754699707, 2.548943042755127, 2.7014472484588623, 2.8539512157440186, 3.006455183029175, 3.158959150314331, 3.3114631175994873, 3.4639670848846436, 3.616471290588379, 3.768975257873535, 3.9214792251586914, 4.073983192443848, 4.226487159729004]}, "gradients/encoder.encoder.layers.19.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 1.0, 4.0, 1.0, 3.0, 8.0, 10.0, 18.0, 22.0, 36.0, 59.0, 152.0, 381.0, 778.0, 2138.0, 7883.0, 55087.0, 3773129.0, 326841.0, 20211.0, 4232.0, 1450.0, 690.0, 405.0, 216.0, 126.0, 87.0, 76.0, 51.0, 30.0, 29.0, 25.0, 23.0, 22.0, 18.0, 11.0, 11.0, 7.0, 4.0, 6.0, 3.0, 1.0, 1.0, 1.0, 0.0, 0.0, 3.0, 0.0, 3.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0], "bins": [-1.615234375, -1.524383544921875, -1.43353271484375, -1.342681884765625, -1.2518310546875, -1.160980224609375, -1.07012939453125, -0.979278564453125, -0.888427734375, -0.797576904296875, -0.70672607421875, -0.615875244140625, -0.5250244140625, -0.434173583984375, -0.34332275390625, -0.252471923828125, -0.16162109375, -0.070770263671875, 0.02008056640625, 0.110931396484375, 0.2017822265625, 0.292633056640625, 0.38348388671875, 0.474334716796875, 0.565185546875, 0.656036376953125, 0.74688720703125, 0.837738037109375, 0.9285888671875, 1.019439697265625, 1.11029052734375, 1.201141357421875, 1.2919921875, 1.382843017578125, 1.47369384765625, 1.564544677734375, 1.6553955078125, 1.746246337890625, 1.83709716796875, 1.927947998046875, 2.018798828125, 2.109649658203125, 2.20050048828125, 2.291351318359375, 2.3822021484375, 2.473052978515625, 2.56390380859375, 2.654754638671875, 2.74560546875, 2.836456298828125, 2.92730712890625, 3.018157958984375, 3.1090087890625, 3.199859619140625, 3.29071044921875, 3.381561279296875, 3.472412109375, 3.563262939453125, 3.65411376953125, 3.744964599609375, 3.8358154296875, 3.926666259765625, 4.01751708984375, 4.108367919921875, 4.19921875]}, "gradients/encoder.encoder.layers.19.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 3.0, 1.0, 3.0, 2.0, 5.0, 12.0, 24.0, 39.0, 80.0, 92.0, 153.0, 175.0, 140.0, 112.0, 72.0, 51.0, 20.0, 21.0, 6.0, 3.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.25, -0.23580169677734375, -0.2216033935546875, -0.20740509033203125, -0.193206787109375, -0.17900848388671875, -0.1648101806640625, -0.15061187744140625, -0.13641357421875, -0.12221527099609375, -0.1080169677734375, -0.09381866455078125, -0.079620361328125, -0.06542205810546875, -0.0512237548828125, -0.03702545166015625, -0.0228271484375, -0.00862884521484375, 0.0055694580078125, 0.01976776123046875, 0.033966064453125, 0.04816436767578125, 0.0623626708984375, 0.07656097412109375, 0.09075927734375, 0.10495758056640625, 0.1191558837890625, 0.13335418701171875, 0.147552490234375, 0.16175079345703125, 0.1759490966796875, 0.19014739990234375, 0.204345703125, 0.21854400634765625, 0.2327423095703125, 0.24694061279296875, 0.261138916015625, 0.27533721923828125, 0.2895355224609375, 0.30373382568359375, 0.31793212890625, 0.33213043212890625, 0.3463287353515625, 0.36052703857421875, 0.374725341796875, 0.38892364501953125, 0.4031219482421875, 0.41732025146484375, 0.4315185546875, 0.44571685791015625, 0.4599151611328125, 0.47411346435546875, 0.488311767578125, 0.5025100708007812, 0.5167083740234375, 0.5309066772460938, 0.54510498046875, 0.5593032836914062, 0.5735015869140625, 0.5876998901367188, 0.601898193359375, 0.6160964965820312, 0.6302947998046875, 0.6444931030273438, 0.65869140625]}, "gradients/encoder.encoder.layers.19.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 3.0, 0.0, 3.0, 5.0, 13.0, 41.0, 124.0, 837.0, 4181111.0, 11450.0, 457.0, 160.0, 53.0, 20.0, 11.0, 4.0, 4.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-9.609375, -9.06884765625, -8.5283203125, -7.98779296875, -7.447265625, -6.90673828125, -6.3662109375, -5.82568359375, -5.28515625, -4.74462890625, -4.2041015625, -3.66357421875, -3.123046875, -2.58251953125, -2.0419921875, -1.50146484375, -0.9609375, -0.42041015625, 0.1201171875, 0.66064453125, 1.201171875, 1.74169921875, 2.2822265625, 2.82275390625, 3.36328125, 3.90380859375, 4.4443359375, 4.98486328125, 5.525390625, 6.06591796875, 6.6064453125, 7.14697265625, 7.6875, 8.22802734375, 8.7685546875, 9.30908203125, 9.849609375, 10.39013671875, 10.9306640625, 11.47119140625, 12.01171875, 12.55224609375, 13.0927734375, 13.63330078125, 14.173828125, 14.71435546875, 15.2548828125, 15.79541015625, 16.3359375, 16.87646484375, 17.4169921875, 17.95751953125, 18.498046875, 19.03857421875, 19.5791015625, 20.11962890625, 20.66015625, 21.20068359375, 21.7412109375, 22.28173828125, 22.822265625, 23.36279296875, 23.9033203125, 24.44384765625, 24.984375]}, "gradients/encoder.encoder.layers.19.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 3.0, 8.0, 10.0, 29.0, 97.0, 854.0, 2827.0, 175.0, 45.0, 18.0, 10.0, 5.0, 4.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.59423828125, -0.5522384643554688, -0.5102386474609375, -0.46823883056640625, -0.426239013671875, -0.38423919677734375, -0.3422393798828125, -0.30023956298828125, -0.25823974609375, -0.21623992919921875, -0.1742401123046875, -0.13224029541015625, -0.090240478515625, -0.04824066162109375, -0.0062408447265625, 0.03575897216796875, 0.0777587890625, 0.11975860595703125, 0.1617584228515625, 0.20375823974609375, 0.245758056640625, 0.28775787353515625, 0.3297576904296875, 0.37175750732421875, 0.41375732421875, 0.45575714111328125, 0.4977569580078125, 0.5397567749023438, 0.581756591796875, 0.6237564086914062, 0.6657562255859375, 0.7077560424804688, 0.749755859375, 0.7917556762695312, 0.8337554931640625, 0.8757553100585938, 0.917755126953125, 0.9597549438476562, 1.0017547607421875, 1.0437545776367188, 1.08575439453125, 1.1277542114257812, 1.1697540283203125, 1.2117538452148438, 1.253753662109375, 1.2957534790039062, 1.3377532958984375, 1.3797531127929688, 1.4217529296875, 1.4637527465820312, 1.5057525634765625, 1.5477523803710938, 1.589752197265625, 1.6317520141601562, 1.6737518310546875, 1.7157516479492188, 1.75775146484375, 1.7997512817382812, 1.8417510986328125, 1.8837509155273438, 1.925750732421875, 1.9677505493164062, 2.0097503662109375, 2.0517501831054688, 2.09375]}, "gradients/encoder.encoder.layers.19.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 4.0, 11.0, 223.0, 721.0, 38.0, 10.0, 3.0, 3.0, 1.0, 2.0, 0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-21.894742965698242, -21.288745880126953, -20.682748794555664, -20.076753616333008, -19.47075653076172, -18.86475944519043, -18.25876235961914, -17.65276527404785, -17.046768188476562, -16.440771102905273, -15.8347749710083, -15.228777885437012, -14.622781753540039, -14.01678466796875, -13.410787582397461, -12.804790496826172, -12.198795318603516, -11.592798233032227, -10.986802101135254, -10.380805015563965, -9.774808883666992, -9.168811798095703, -8.562814712524414, -7.956818103790283, -7.350821495056152, -6.7448248863220215, -6.138828277587891, -5.532831192016602, -4.926834583282471, -4.32083797454834, -3.71484112739563, -3.10884428024292, -2.502849578857422, -1.8968528509140015, -1.290856122970581, -0.6848593950271606, -0.07886266708374023, 0.5271339416503906, 1.1331307888031006, 1.7391276359558105, 2.3451242446899414, 2.9511208534240723, 3.5571177005767822, 4.163114547729492, 4.769111156463623, 5.375107765197754, 5.981104850769043, 6.587101459503174, 7.193098068237305, 7.7990946769714355, 8.405091285705566, 9.011088371276855, 9.617084503173828, 10.223081588745117, 10.829078674316406, 11.435075759887695, 12.041071891784668, 12.647068977355957, 13.25306510925293, 13.859062194824219, 14.465059280395508, 15.07105541229248, 15.67705249786377, 16.283048629760742, 16.88904571533203]}, "gradients/encoder.encoder.layers.19.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0, 3.0, 1.0, 3.0, 8.0, 12.0, 17.0, 35.0, 44.0, 75.0, 85.0, 101.0, 95.0, 101.0, 92.0, 88.0, 80.0, 61.0, 36.0, 28.0, 13.0, 17.0, 6.0, 3.0, 7.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-4.018214702606201, -3.871316909790039, -3.724419355392456, -3.577521562576294, -3.430624008178711, -3.283726215362549, -3.1368284225463867, -2.9899306297302246, -2.8430330753326416, -2.6961352825164795, -2.5492377281188965, -2.4023399353027344, -2.2554421424865723, -2.1085445880889893, -1.9616467952728271, -1.8147491216659546, -1.667851448059082, -1.5209537744522095, -1.374056100845337, -1.2271583080291748, -1.0802606344223022, -0.9333629608154297, -0.7864652276039124, -0.639567494392395, -0.49266982078552246, -0.3457721173763275, -0.19887441396713257, -0.05197671055793762, 0.09492099285125732, 0.24181866645812988, 0.3887163996696472, 0.5356141328811646, 0.6825122833251953, 0.8294099569320679, 0.9763076901435852, 1.1232054233551025, 1.270103096961975, 1.4170007705688477, 1.5638985633850098, 1.7107962369918823, 1.8576939105987549, 2.004591703414917, 2.1514892578125, 2.298387050628662, 2.445284843444824, 2.5921823978424072, 2.7390801906585693, 2.8859777450561523, 3.0328755378723145, 3.1797733306884766, 3.3266708850860596, 3.4735686779022217, 3.6204662322998047, 3.767364025115967, 3.914261817932129, 4.061159610748291, 4.208057403564453, 4.354955196380615, 4.501852989196777, 4.648750305175781, 4.795648097991943, 4.9425458908081055, 5.089443683624268, 5.23634147644043, 5.383238792419434]}, "gradients/encoder.encoder.layers.19.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 3.0, 1.0, 3.0, 0.0, 1.0, 6.0, 3.0, 5.0, 11.0, 12.0, 15.0, 11.0, 21.0, 29.0, 37.0, 47.0, 75.0, 91.0, 131.0, 190.0, 277.0, 424.0, 752.0, 1428.0, 3683.0, 11884.0, 52723.0, 387425.0, 500107.0, 66723.0, 14207.0, 4328.0, 1694.0, 778.0, 471.0, 280.0, 181.0, 124.0, 99.0, 79.0, 52.0, 37.0, 28.0, 26.0, 14.0, 11.0, 16.0, 8.0, 7.0, 4.0, 2.0, 5.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-1.6650390625, -1.6096649169921875, -1.554290771484375, -1.4989166259765625, -1.44354248046875, -1.3881683349609375, -1.332794189453125, -1.2774200439453125, -1.2220458984375, -1.1666717529296875, -1.111297607421875, -1.0559234619140625, -1.00054931640625, -0.9451751708984375, -0.889801025390625, -0.8344268798828125, -0.779052734375, -0.7236785888671875, -0.668304443359375, -0.6129302978515625, -0.55755615234375, -0.5021820068359375, -0.446807861328125, -0.3914337158203125, -0.3360595703125, -0.2806854248046875, -0.225311279296875, -0.1699371337890625, -0.11456298828125, -0.0591888427734375, -0.003814697265625, 0.0515594482421875, 0.10693359375, 0.1623077392578125, 0.217681884765625, 0.2730560302734375, 0.32843017578125, 0.3838043212890625, 0.439178466796875, 0.4945526123046875, 0.5499267578125, 0.6053009033203125, 0.660675048828125, 0.7160491943359375, 0.77142333984375, 0.8267974853515625, 0.882171630859375, 0.9375457763671875, 0.992919921875, 1.0482940673828125, 1.103668212890625, 1.1590423583984375, 1.21441650390625, 1.2697906494140625, 1.325164794921875, 1.3805389404296875, 1.4359130859375, 1.4912872314453125, 1.546661376953125, 1.6020355224609375, 1.65740966796875, 1.7127838134765625, 1.768157958984375, 1.8235321044921875, 1.87890625]}, "gradients/encoder.encoder.layers.19.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 2.0, 1.0, 1.0, 0.0, 1.0, 4.0, 5.0, 5.0, 8.0, 15.0, 26.0, 51.0, 74.0, 100.0, 110.0, 128.0, 114.0, 104.0, 101.0, 65.0, 41.0, 22.0, 15.0, 12.0, 5.0, 2.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.399658203125, -0.3842353820800781, -0.36881256103515625, -0.3533897399902344, -0.3379669189453125, -0.3225440979003906, -0.30712127685546875, -0.2916984558105469, -0.276275634765625, -0.2608528137207031, -0.24542999267578125, -0.23000717163085938, -0.2145843505859375, -0.19916152954101562, -0.18373870849609375, -0.16831588745117188, -0.15289306640625, -0.13747024536132812, -0.12204742431640625, -0.10662460327148438, -0.0912017822265625, -0.07577896118164062, -0.06035614013671875, -0.044933319091796875, -0.029510498046875, -0.014087677001953125, 0.00133514404296875, 0.016757965087890625, 0.0321807861328125, 0.047603607177734375, 0.06302642822265625, 0.07844924926757812, 0.0938720703125, 0.10929489135742188, 0.12471771240234375, 0.14014053344726562, 0.1555633544921875, 0.17098617553710938, 0.18640899658203125, 0.20183181762695312, 0.217254638671875, 0.23267745971679688, 0.24810028076171875, 0.2635231018066406, 0.2789459228515625, 0.2943687438964844, 0.30979156494140625, 0.3252143859863281, 0.34063720703125, 0.3560600280761719, 0.37148284912109375, 0.3869056701660156, 0.4023284912109375, 0.4177513122558594, 0.43317413330078125, 0.4485969543457031, 0.464019775390625, 0.4794425964355469, 0.49486541748046875, 0.5102882385253906, 0.5257110595703125, 0.5411338806152344, 0.5565567016601562, 0.5719795227050781, 0.58740234375]}, "gradients/encoder.encoder.layers.19.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 2.0, 1.0, 1.0, 1.0, 2.0, 3.0, 3.0, 3.0, 2.0, 8.0, 11.0, 13.0, 19.0, 23.0, 35.0, 45.0, 68.0, 111.0, 150.0, 297.0, 610.0, 1375.0, 3861.0, 14869.0, 101960.0, 693439.0, 199477.0, 23338.0, 5456.0, 1743.0, 712.0, 357.0, 195.0, 124.0, 88.0, 42.0, 28.0, 17.0, 17.0, 14.0, 16.0, 8.0, 7.0, 6.0, 3.0, 5.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 3.0], "bins": [-1.685546875, -1.6395416259765625, -1.593536376953125, -1.5475311279296875, -1.50152587890625, -1.4555206298828125, -1.409515380859375, -1.3635101318359375, -1.3175048828125, -1.2714996337890625, -1.225494384765625, -1.1794891357421875, -1.13348388671875, -1.0874786376953125, -1.041473388671875, -0.9954681396484375, -0.949462890625, -0.9034576416015625, -0.857452392578125, -0.8114471435546875, -0.76544189453125, -0.7194366455078125, -0.673431396484375, -0.6274261474609375, -0.5814208984375, -0.5354156494140625, -0.489410400390625, -0.4434051513671875, -0.39739990234375, -0.3513946533203125, -0.305389404296875, -0.2593841552734375, -0.21337890625, -0.1673736572265625, -0.121368408203125, -0.0753631591796875, -0.02935791015625, 0.0166473388671875, 0.062652587890625, 0.1086578369140625, 0.1546630859375, 0.2006683349609375, 0.246673583984375, 0.2926788330078125, 0.33868408203125, 0.3846893310546875, 0.430694580078125, 0.4766998291015625, 0.522705078125, 0.5687103271484375, 0.614715576171875, 0.6607208251953125, 0.70672607421875, 0.7527313232421875, 0.798736572265625, 0.8447418212890625, 0.8907470703125, 0.9367523193359375, 0.982757568359375, 1.0287628173828125, 1.07476806640625, 1.1207733154296875, 1.166778564453125, 1.2127838134765625, 1.2587890625]}, "gradients/encoder.encoder.layers.19.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 2.0, 2.0, 2.0, 2.0, 0.0, 1.0, 2.0, 5.0, 7.0, 11.0, 5.0, 4.0, 19.0, 18.0, 42.0, 23.0, 24.0, 43.0, 50.0, 56.0, 59.0, 68.0, 41.0, 66.0, 53.0, 63.0, 42.0, 52.0, 36.0, 35.0, 37.0, 29.0, 26.0, 18.0, 16.0, 13.0, 11.0, 8.0, 5.0, 7.0, 0.0, 4.0, 3.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-1.1455078125, -1.1124343872070312, -1.0793609619140625, -1.0462875366210938, -1.013214111328125, -0.9801406860351562, -0.9470672607421875, -0.9139938354492188, -0.88092041015625, -0.8478469848632812, -0.8147735595703125, -0.7817001342773438, -0.748626708984375, -0.7155532836914062, -0.6824798583984375, -0.6494064331054688, -0.6163330078125, -0.5832595825195312, -0.5501861572265625, -0.5171127319335938, -0.484039306640625, -0.45096588134765625, -0.4178924560546875, -0.38481903076171875, -0.35174560546875, -0.31867218017578125, -0.2855987548828125, -0.25252532958984375, -0.219451904296875, -0.18637847900390625, -0.1533050537109375, -0.12023162841796875, -0.087158203125, -0.05408477783203125, -0.0210113525390625, 0.01206207275390625, 0.045135498046875, 0.07820892333984375, 0.1112823486328125, 0.14435577392578125, 0.17742919921875, 0.21050262451171875, 0.2435760498046875, 0.27664947509765625, 0.309722900390625, 0.34279632568359375, 0.3758697509765625, 0.40894317626953125, 0.4420166015625, 0.47509002685546875, 0.5081634521484375, 0.5412368774414062, 0.574310302734375, 0.6073837280273438, 0.6404571533203125, 0.6735305786132812, 0.70660400390625, 0.7396774291992188, 0.7727508544921875, 0.8058242797851562, 0.838897705078125, 0.8719711303710938, 0.9050445556640625, 0.9381179809570312, 0.97119140625]}, "gradients/encoder.encoder.layers.19.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0, 3.0, 5.0, 4.0, 0.0, 7.0, 13.0, 8.0, 4.0, 19.0, 26.0, 38.0, 44.0, 72.0, 115.0, 229.0, 538.0, 1347.0, 4041.0, 17891.0, 187296.0, 728758.0, 91668.0, 11663.0, 2913.0, 970.0, 385.0, 190.0, 108.0, 64.0, 44.0, 25.0, 10.0, 15.0, 8.0, 7.0, 6.0, 6.0, 6.0, 4.0, 1.0, 5.0, 5.0, 1.0, 2.0, 1.0, 0.0, 2.0, 0.0, 1.0], "bins": [-0.6611328125, -0.6424407958984375, -0.623748779296875, -0.6050567626953125, -0.58636474609375, -0.5676727294921875, -0.548980712890625, -0.5302886962890625, -0.5115966796875, -0.4929046630859375, -0.474212646484375, -0.4555206298828125, -0.43682861328125, -0.4181365966796875, -0.399444580078125, -0.3807525634765625, -0.362060546875, -0.3433685302734375, -0.324676513671875, -0.3059844970703125, -0.28729248046875, -0.2686004638671875, -0.249908447265625, -0.2312164306640625, -0.2125244140625, -0.1938323974609375, -0.175140380859375, -0.1564483642578125, -0.13775634765625, -0.1190643310546875, -0.100372314453125, -0.0816802978515625, -0.06298828125, -0.0442962646484375, -0.025604248046875, -0.0069122314453125, 0.01177978515625, 0.0304718017578125, 0.049163818359375, 0.0678558349609375, 0.0865478515625, 0.1052398681640625, 0.123931884765625, 0.1426239013671875, 0.16131591796875, 0.1800079345703125, 0.198699951171875, 0.2173919677734375, 0.236083984375, 0.2547760009765625, 0.273468017578125, 0.2921600341796875, 0.31085205078125, 0.3295440673828125, 0.348236083984375, 0.3669281005859375, 0.3856201171875, 0.4043121337890625, 0.423004150390625, 0.4416961669921875, 0.46038818359375, 0.4790802001953125, 0.497772216796875, 0.5164642333984375, 0.53515625]}, "gradients/encoder.encoder.layers.19.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0, 1.0, 3.0, 1.0, 2.0, 4.0, 7.0, 15.0, 8.0, 17.0, 21.0, 18.0, 17.0, 41.0, 44.0, 51.0, 59.0, 71.0, 80.0, 95.0, 77.0, 92.0, 54.0, 51.0, 47.0, 26.0, 24.0, 15.0, 13.0, 15.0, 9.0, 8.0, 10.0, 4.0, 0.0, 3.0, 2.0, 0.0, 2.0, 1.0, 0.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.3822994232177734e-05, -5.221273750066757e-05, -5.060248076915741e-05, -4.899222403764725e-05, -4.7381967306137085e-05, -4.577171057462692e-05, -4.416145384311676e-05, -4.25511971116066e-05, -4.0940940380096436e-05, -3.933068364858627e-05, -3.772042691707611e-05, -3.611017018556595e-05, -3.4499913454055786e-05, -3.2889656722545624e-05, -3.127939999103546e-05, -2.96691432595253e-05, -2.8058886528015137e-05, -2.6448629796504974e-05, -2.4838373064994812e-05, -2.322811633348465e-05, -2.1617859601974487e-05, -2.0007602870464325e-05, -1.8397346138954163e-05, -1.6787089407444e-05, -1.5176832675933838e-05, -1.3566575944423676e-05, -1.1956319212913513e-05, -1.034606248140335e-05, -8.735805749893188e-06, -7.125549018383026e-06, -5.515292286872864e-06, -3.905035555362701e-06, -2.294778823852539e-06, -6.845220923423767e-07, 9.257346391677856e-07, 2.535991370677948e-06, 4.14624810218811e-06, 5.756504833698273e-06, 7.366761565208435e-06, 8.977018296718597e-06, 1.058727502822876e-05, 1.2197531759738922e-05, 1.3807788491249084e-05, 1.5418045222759247e-05, 1.702830195426941e-05, 1.863855868577957e-05, 2.0248815417289734e-05, 2.1859072148799896e-05, 2.346932888031006e-05, 2.507958561182022e-05, 2.6689842343330383e-05, 2.8300099074840546e-05, 2.9910355806350708e-05, 3.152061253786087e-05, 3.313086926937103e-05, 3.4741126000881195e-05, 3.635138273239136e-05, 3.796163946390152e-05, 3.957189619541168e-05, 4.1182152926921844e-05, 4.279240965843201e-05, 4.440266638994217e-05, 4.601292312145233e-05, 4.7623179852962494e-05, 4.9233436584472656e-05]}, "gradients/encoder.encoder.layers.19.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 2.0, 0.0, 3.0, 6.0, 5.0, 10.0, 18.0, 14.0, 17.0, 30.0, 41.0, 61.0, 120.0, 156.0, 342.0, 758.0, 2103.0, 7181.0, 34345.0, 362140.0, 567345.0, 58895.0, 10194.0, 2832.0, 977.0, 415.0, 218.0, 135.0, 68.0, 41.0, 29.0, 17.0, 8.0, 7.0, 6.0, 4.0, 4.0, 3.0, 4.0, 2.0, 4.0, 3.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.469482421875, -0.4533653259277344, -0.43724822998046875, -0.4211311340332031, -0.4050140380859375, -0.3888969421386719, -0.37277984619140625, -0.3566627502441406, -0.340545654296875, -0.3244285583496094, -0.30831146240234375, -0.2921943664550781, -0.2760772705078125, -0.2599601745605469, -0.24384307861328125, -0.22772598266601562, -0.21160888671875, -0.19549179077148438, -0.17937469482421875, -0.16325759887695312, -0.1471405029296875, -0.13102340698242188, -0.11490631103515625, -0.09878921508789062, -0.082672119140625, -0.06655502319335938, -0.05043792724609375, -0.034320831298828125, -0.0182037353515625, -0.002086639404296875, 0.01403045654296875, 0.030147552490234375, 0.0462646484375, 0.062381744384765625, 0.07849884033203125, 0.09461593627929688, 0.1107330322265625, 0.12685012817382812, 0.14296722412109375, 0.15908432006835938, 0.175201416015625, 0.19131851196289062, 0.20743560791015625, 0.22355270385742188, 0.2396697998046875, 0.2557868957519531, 0.27190399169921875, 0.2880210876464844, 0.30413818359375, 0.3202552795410156, 0.33637237548828125, 0.3524894714355469, 0.3686065673828125, 0.3847236633300781, 0.40084075927734375, 0.4169578552246094, 0.433074951171875, 0.4491920471191406, 0.46530914306640625, 0.4814262390136719, 0.4975433349609375, 0.5136604309082031, 0.5297775268554688, 0.5458946228027344, 0.56201171875]}, "gradients/encoder.encoder.layers.19.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 2.0, 1.0, 4.0, 0.0, 4.0, 1.0, 3.0, 4.0, 8.0, 7.0, 11.0, 6.0, 24.0, 26.0, 29.0, 49.0, 51.0, 67.0, 83.0, 104.0, 94.0, 87.0, 79.0, 70.0, 60.0, 41.0, 20.0, 16.0, 13.0, 9.0, 4.0, 3.0, 5.0, 5.0, 0.0, 2.0, 4.0, 3.0, 3.0, 0.0, 1.0, 5.0, 2.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 2.0, 1.0], "bins": [-0.34326171875, -0.332977294921875, -0.32269287109375, -0.312408447265625, -0.3021240234375, -0.291839599609375, -0.28155517578125, -0.271270751953125, -0.260986328125, -0.250701904296875, -0.24041748046875, -0.230133056640625, -0.2198486328125, -0.209564208984375, -0.19927978515625, -0.188995361328125, -0.1787109375, -0.168426513671875, -0.15814208984375, -0.147857666015625, -0.1375732421875, -0.127288818359375, -0.11700439453125, -0.106719970703125, -0.096435546875, -0.086151123046875, -0.07586669921875, -0.065582275390625, -0.0552978515625, -0.045013427734375, -0.03472900390625, -0.024444580078125, -0.01416015625, -0.003875732421875, 0.00640869140625, 0.016693115234375, 0.0269775390625, 0.037261962890625, 0.04754638671875, 0.057830810546875, 0.068115234375, 0.078399658203125, 0.08868408203125, 0.098968505859375, 0.1092529296875, 0.119537353515625, 0.12982177734375, 0.140106201171875, 0.150390625, 0.160675048828125, 0.17095947265625, 0.181243896484375, 0.1915283203125, 0.201812744140625, 0.21209716796875, 0.222381591796875, 0.232666015625, 0.242950439453125, 0.25323486328125, 0.263519287109375, 0.2738037109375, 0.284088134765625, 0.29437255859375, 0.304656982421875, 0.31494140625]}, "gradients/encoder.encoder.layers.19.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 4.0, 7.0, 11.0, 18.0, 68.0, 250.0, 437.0, 164.0, 39.0, 5.0, 6.0, 3.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-8.110148429870605, -7.644254684448242, -7.178360462188721, -6.712466716766357, -6.246572494506836, -5.780678749084473, -5.314785003662109, -4.848891258239746, -4.382997035980225, -3.9171030521392822, -3.45120906829834, -2.9853153228759766, -2.519421339035034, -2.053527355194092, -1.5876336097717285, -1.1217396259307861, -0.6558456420898438, -0.18995171785354614, 0.27594220638275146, 0.7418360710144043, 1.2077300548553467, 1.673624038696289, 2.1395177841186523, 2.6054117679595947, 3.071305751800537, 3.5371997356414795, 4.003093719482422, 4.468987464904785, 4.934881210327148, 5.40077543258667, 5.866669178009033, 6.332563400268555, 6.798458099365234, 7.264351844787598, 7.730246067047119, 8.19614028930664, 8.662034034729004, 9.127927780151367, 9.59382152557373, 10.059715270996094, 10.525609970092773, 10.991503715515137, 11.4573974609375, 11.92329216003418, 12.389185905456543, 12.855079650878906, 13.32097339630127, 13.786867141723633, 14.252760887145996, 14.71865463256836, 15.184548377990723, 15.650442123413086, 16.116336822509766, 16.582229614257812, 17.048124313354492, 17.514019012451172, 17.97991180419922, 18.4458065032959, 18.911699295043945, 19.377593994140625, 19.843486785888672, 20.30938148498535, 20.77527618408203, 21.241168975830078, 21.707063674926758]}, "gradients/encoder.encoder.layers.19.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 2.0, 1.0, 0.0, 1.0, 0.0, 5.0, 5.0, 8.0, 6.0, 11.0, 12.0, 9.0, 16.0, 16.0, 24.0, 30.0, 29.0, 39.0, 45.0, 47.0, 54.0, 64.0, 52.0, 80.0, 54.0, 55.0, 45.0, 47.0, 38.0, 43.0, 32.0, 35.0, 22.0, 24.0, 12.0, 20.0, 13.0, 6.0, 2.0, 8.0, 3.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.703734874725342, -5.513448715209961, -5.323163032531738, -5.132876873016357, -4.942591190338135, -4.752305030822754, -4.562019348144531, -4.37173318862915, -4.1814470291137695, -3.9911611080169678, -3.800875186920166, -3.610589027404785, -3.4203033447265625, -3.2300171852111816, -3.03973126411438, -2.849445343017578, -2.6591596603393555, -2.4688737392425537, -2.278587818145752, -2.088301658630371, -1.8980158567428589, -1.7077299356460571, -1.5174438953399658, -1.327157974243164, -1.1368720531463623, -0.9465861320495605, -0.756300151348114, -0.5660141706466675, -0.3757282495498657, -0.18544232845306396, 0.004843711853027344, 0.1951296329498291, 0.38541603088378906, 0.5757019519805908, 0.7659879326820374, 0.9562739133834839, 1.1465598344802856, 1.3368457555770874, 1.5271317958831787, 1.7174177169799805, 1.9077036380767822, 2.097989559173584, 2.2882754802703857, 2.4785614013671875, 2.6688475608825684, 2.859133243560791, 3.049419403076172, 3.2397053241729736, 3.4299912452697754, 3.620277166366577, 3.810563087463379, 4.00084924697876, 4.191134929656982, 4.381421089172363, 4.571706771850586, 4.761992931365967, 4.952279090881348, 5.1425652503967285, 5.332850933074951, 5.523137092590332, 5.713422775268555, 5.9037089347839355, 6.093995094299316, 6.284280776977539, 6.474566459655762]}, "gradients/encoder.encoder.layers.18.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 1.0, 2.0, 3.0, 4.0, 6.0, 12.0, 13.0, 16.0, 25.0, 52.0, 65.0, 96.0, 153.0, 304.0, 591.0, 1269.0, 2942.0, 9081.0, 49760.0, 3485387.0, 604990.0, 27707.0, 6523.0, 2471.0, 1125.0, 648.0, 348.0, 189.0, 140.0, 100.0, 56.0, 38.0, 29.0, 18.0, 26.0, 16.0, 12.0, 15.0, 13.0, 8.0, 5.0, 7.0, 7.0, 4.0, 2.0, 3.0, 4.0, 1.0, 2.0, 1.0, 2.0, 1.0, 3.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-1.5615234375, -1.4961090087890625, -1.430694580078125, -1.3652801513671875, -1.29986572265625, -1.2344512939453125, -1.169036865234375, -1.1036224365234375, -1.0382080078125, -0.9727935791015625, -0.907379150390625, -0.8419647216796875, -0.77655029296875, -0.7111358642578125, -0.645721435546875, -0.5803070068359375, -0.514892578125, -0.4494781494140625, -0.384063720703125, -0.3186492919921875, -0.25323486328125, -0.1878204345703125, -0.122406005859375, -0.0569915771484375, 0.0084228515625, 0.0738372802734375, 0.139251708984375, 0.2046661376953125, 0.27008056640625, 0.3354949951171875, 0.400909423828125, 0.4663238525390625, 0.53173828125, 0.5971527099609375, 0.662567138671875, 0.7279815673828125, 0.79339599609375, 0.8588104248046875, 0.924224853515625, 0.9896392822265625, 1.0550537109375, 1.1204681396484375, 1.185882568359375, 1.2512969970703125, 1.31671142578125, 1.3821258544921875, 1.447540283203125, 1.5129547119140625, 1.578369140625, 1.6437835693359375, 1.709197998046875, 1.7746124267578125, 1.84002685546875, 1.9054412841796875, 1.970855712890625, 2.0362701416015625, 2.1016845703125, 2.1670989990234375, 2.232513427734375, 2.2979278564453125, 2.36334228515625, 2.4287567138671875, 2.494171142578125, 2.5595855712890625, 2.625]}, "gradients/encoder.encoder.layers.18.feed_forward.output_dense.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 3.0, 2.0, 3.0, 7.0, 4.0, 6.0, 15.0, 27.0, 33.0, 44.0, 80.0, 89.0, 105.0, 105.0, 108.0, 108.0, 86.0, 61.0, 44.0, 32.0, 20.0, 11.0, 7.0, 4.0, 2.0, 3.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.302734375, -0.28887939453125, -0.2750244140625, -0.26116943359375, -0.247314453125, -0.23345947265625, -0.2196044921875, -0.20574951171875, -0.19189453125, -0.17803955078125, -0.1641845703125, -0.15032958984375, -0.136474609375, -0.12261962890625, -0.1087646484375, -0.09490966796875, -0.0810546875, -0.06719970703125, -0.0533447265625, -0.03948974609375, -0.025634765625, -0.01177978515625, 0.0020751953125, 0.01593017578125, 0.02978515625, 0.04364013671875, 0.0574951171875, 0.07135009765625, 0.085205078125, 0.09906005859375, 0.1129150390625, 0.12677001953125, 0.140625, 0.15447998046875, 0.1683349609375, 0.18218994140625, 0.196044921875, 0.20989990234375, 0.2237548828125, 0.23760986328125, 0.25146484375, 0.26531982421875, 0.2791748046875, 0.29302978515625, 0.306884765625, 0.32073974609375, 0.3345947265625, 0.34844970703125, 0.3623046875, 0.37615966796875, 0.3900146484375, 0.40386962890625, 0.417724609375, 0.43157958984375, 0.4454345703125, 0.45928955078125, 0.47314453125, 0.48699951171875, 0.5008544921875, 0.51470947265625, 0.528564453125, 0.54241943359375, 0.5562744140625, 0.57012939453125, 0.583984375]}, "gradients/encoder.encoder.layers.18.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 2.0, 3.0, 11.0, 18.0, 32.0, 62.0, 124.0, 315.0, 1203.0, 622483.0, 3567648.0, 1538.0, 402.0, 223.0, 100.0, 58.0, 31.0, 15.0, 7.0, 7.0, 3.0, 2.0, 1.0, 0.0, 4.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.25390625, -5.03533935546875, -4.8167724609375, -4.59820556640625, -4.379638671875, -4.16107177734375, -3.9425048828125, -3.72393798828125, -3.50537109375, -3.28680419921875, -3.0682373046875, -2.84967041015625, -2.631103515625, -2.41253662109375, -2.1939697265625, -1.97540283203125, -1.7568359375, -1.53826904296875, -1.3197021484375, -1.10113525390625, -0.882568359375, -0.66400146484375, -0.4454345703125, -0.22686767578125, -0.00830078125, 0.21026611328125, 0.4288330078125, 0.64739990234375, 0.865966796875, 1.08453369140625, 1.3031005859375, 1.52166748046875, 1.740234375, 1.95880126953125, 2.1773681640625, 2.39593505859375, 2.614501953125, 2.83306884765625, 3.0516357421875, 3.27020263671875, 3.48876953125, 3.70733642578125, 3.9259033203125, 4.14447021484375, 4.363037109375, 4.58160400390625, 4.8001708984375, 5.01873779296875, 5.2373046875, 5.45587158203125, 5.6744384765625, 5.89300537109375, 6.111572265625, 6.33013916015625, 6.5487060546875, 6.76727294921875, 6.98583984375, 7.20440673828125, 7.4229736328125, 7.64154052734375, 7.860107421875, 8.07867431640625, 8.2972412109375, 8.51580810546875, 8.734375]}, "gradients/encoder.encoder.layers.18.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 6.0, 2.0, 8.0, 12.0, 50.0, 316.0, 3023.0, 552.0, 78.0, 21.0, 13.0, 2.0, 2.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.775390625, -0.7394561767578125, -0.703521728515625, -0.6675872802734375, -0.63165283203125, -0.5957183837890625, -0.559783935546875, -0.5238494873046875, -0.4879150390625, -0.4519805908203125, -0.416046142578125, -0.3801116943359375, -0.34417724609375, -0.3082427978515625, -0.272308349609375, -0.2363739013671875, -0.200439453125, -0.1645050048828125, -0.128570556640625, -0.0926361083984375, -0.05670166015625, -0.0207672119140625, 0.015167236328125, 0.0511016845703125, 0.0870361328125, 0.1229705810546875, 0.158905029296875, 0.1948394775390625, 0.23077392578125, 0.2667083740234375, 0.302642822265625, 0.3385772705078125, 0.37451171875, 0.4104461669921875, 0.446380615234375, 0.4823150634765625, 0.51824951171875, 0.5541839599609375, 0.590118408203125, 0.6260528564453125, 0.6619873046875, 0.6979217529296875, 0.733856201171875, 0.7697906494140625, 0.80572509765625, 0.8416595458984375, 0.877593994140625, 0.9135284423828125, 0.949462890625, 0.9853973388671875, 1.021331787109375, 1.0572662353515625, 1.09320068359375, 1.1291351318359375, 1.165069580078125, 1.2010040283203125, 1.2369384765625, 1.2728729248046875, 1.308807373046875, 1.3447418212890625, 1.38067626953125, 1.4166107177734375, 1.452545166015625, 1.4884796142578125, 1.5244140625]}, "gradients/encoder.encoder.layers.18.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 5.0, 14.0, 149.0, 636.0, 176.0, 14.0, 6.0, 3.0, 2.0, 3.0, 3.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.7808456420898438, -3.5183842182159424, -3.255922794342041, -2.9934611320495605, -2.730999708175659, -2.468538284301758, -2.2060766220092773, -1.943615198135376, -1.6811537742614746, -1.4186923503875732, -1.1562308073043823, -0.8937693238258362, -0.63130784034729, -0.36884641647338867, -0.10638487339019775, 0.15607666969299316, 0.41853809356689453, 0.6809995770454407, 0.9434610605239868, 1.2059226036071777, 1.468384027481079, 1.7308454513549805, 1.9933069944381714, 2.2557685375213623, 2.5182299613952637, 2.780691385269165, 3.0431528091430664, 3.305614471435547, 3.5680758953094482, 3.8305373191833496, 4.09299898147583, 4.355460166931152, 4.617920875549316, 4.880382537841797, 5.142843723297119, 5.4053053855896, 5.667766571044922, 5.930228233337402, 6.192689895629883, 6.455151557922363, 6.7176127433776855, 6.980074405670166, 7.242535591125488, 7.504997253417969, 7.767458915710449, 8.02992057800293, 8.292381286621094, 8.554842948913574, 8.817304611206055, 9.079766273498535, 9.342227935791016, 9.60468864440918, 9.86715030670166, 10.12961196899414, 10.392073631286621, 10.654535293579102, 10.916996002197266, 11.179457664489746, 11.441919326782227, 11.70438003540039, 11.966841697692871, 12.229303359985352, 12.491765022277832, 12.754226684570312, 13.016688346862793]}, "gradients/encoder.encoder.layers.18.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 3.0, 1.0, 0.0, 3.0, 3.0, 12.0, 11.0, 12.0, 13.0, 20.0, 34.0, 32.0, 42.0, 50.0, 57.0, 42.0, 70.0, 75.0, 71.0, 66.0, 71.0, 62.0, 52.0, 56.0, 36.0, 27.0, 26.0, 21.0, 12.0, 11.0, 6.0, 3.0, 6.0, 2.0, 2.0, 3.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.1153998374938965, -2.047651767730713, -1.9799038171768188, -1.9121558666229248, -1.8444077968597412, -1.7766598463058472, -1.7089118957519531, -1.6411638259887695, -1.573415756225586, -1.505667805671692, -1.4379197359085083, -1.3701717853546143, -1.3024237155914307, -1.2346757650375366, -1.1669278144836426, -1.099179744720459, -1.031431794166565, -0.9636837840080261, -0.8959357738494873, -0.8281878232955933, -0.7604397535324097, -0.6926918029785156, -0.6249437928199768, -0.557195782661438, -0.48944777250289917, -0.42169976234436035, -0.35395175218582153, -0.2862037718296051, -0.21845576167106628, -0.15070775151252747, -0.08295977115631104, -0.015211760997772217, 0.0525362491607666, 0.12028425186872482, 0.18803225457668304, 0.25578024983406067, 0.3235282599925995, 0.3912762701511383, 0.45902425050735474, 0.5267722606658936, 0.5945202708244324, 0.6622682809829712, 0.73001629114151, 0.7977643013000488, 0.8655122518539429, 0.9332603216171265, 1.0010082721710205, 1.068756341934204, 1.1365042924880981, 1.2042522430419922, 1.2720003128051758, 1.3397482633590698, 1.4074963331222534, 1.4752442836761475, 1.542992353439331, 1.610740303993225, 1.6784882545471191, 1.7462362051010132, 1.8139842748641968, 1.8817322254180908, 1.9494802951812744, 2.017228364944458, 2.0849761962890625, 2.152724266052246, 2.2204723358154297]}, "gradients/encoder.encoder.layers.18.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 3.0, 3.0, 7.0, 9.0, 18.0, 27.0, 42.0, 65.0, 98.0, 142.0, 224.0, 413.0, 782.0, 1693.0, 4511.0, 18115.0, 127598.0, 719078.0, 147536.0, 19931.0, 4716.0, 1661.0, 734.0, 409.0, 263.0, 152.0, 100.0, 61.0, 53.0, 30.0, 25.0, 11.0, 12.0, 8.0, 10.0, 8.0, 2.0, 5.0, 3.0, 4.0, 1.0, 1.0, 2.0], "bins": [-2.50390625, -2.4389190673828125, -2.373931884765625, -2.3089447021484375, -2.24395751953125, -2.1789703369140625, -2.113983154296875, -2.0489959716796875, -1.9840087890625, -1.9190216064453125, -1.854034423828125, -1.7890472412109375, -1.72406005859375, -1.6590728759765625, -1.594085693359375, -1.5290985107421875, -1.464111328125, -1.3991241455078125, -1.334136962890625, -1.2691497802734375, -1.20416259765625, -1.1391754150390625, -1.074188232421875, -1.0092010498046875, -0.9442138671875, -0.8792266845703125, -0.814239501953125, -0.7492523193359375, -0.68426513671875, -0.6192779541015625, -0.554290771484375, -0.4893035888671875, -0.42431640625, -0.3593292236328125, -0.294342041015625, -0.2293548583984375, -0.16436767578125, -0.0993804931640625, -0.034393310546875, 0.0305938720703125, 0.0955810546875, 0.1605682373046875, 0.225555419921875, 0.2905426025390625, 0.35552978515625, 0.4205169677734375, 0.485504150390625, 0.5504913330078125, 0.615478515625, 0.6804656982421875, 0.745452880859375, 0.8104400634765625, 0.87542724609375, 0.9404144287109375, 1.005401611328125, 1.0703887939453125, 1.1353759765625, 1.2003631591796875, 1.265350341796875, 1.3303375244140625, 1.39532470703125, 1.4603118896484375, 1.525299072265625, 1.5902862548828125, 1.6552734375]}, "gradients/encoder.encoder.layers.18.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 1.0, 1.0, 3.0, 1.0, 0.0, 1.0, 3.0, 4.0, 3.0, 3.0, 8.0, 12.0, 19.0, 22.0, 30.0, 50.0, 55.0, 78.0, 103.0, 83.0, 83.0, 82.0, 87.0, 69.0, 68.0, 33.0, 32.0, 25.0, 17.0, 13.0, 14.0, 4.0, 2.0, 1.0, 2.0, 0.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.3125, -0.29920196533203125, -0.2859039306640625, -0.27260589599609375, -0.259307861328125, -0.24600982666015625, -0.2327117919921875, -0.21941375732421875, -0.20611572265625, -0.19281768798828125, -0.1795196533203125, -0.16622161865234375, -0.152923583984375, -0.13962554931640625, -0.1263275146484375, -0.11302947998046875, -0.0997314453125, -0.08643341064453125, -0.0731353759765625, -0.05983734130859375, -0.046539306640625, -0.03324127197265625, -0.0199432373046875, -0.00664520263671875, 0.00665283203125, 0.01995086669921875, 0.0332489013671875, 0.04654693603515625, 0.059844970703125, 0.07314300537109375, 0.0864410400390625, 0.09973907470703125, 0.113037109375, 0.12633514404296875, 0.1396331787109375, 0.15293121337890625, 0.166229248046875, 0.17952728271484375, 0.1928253173828125, 0.20612335205078125, 0.21942138671875, 0.23271942138671875, 0.2460174560546875, 0.25931549072265625, 0.272613525390625, 0.28591156005859375, 0.2992095947265625, 0.31250762939453125, 0.3258056640625, 0.33910369873046875, 0.3524017333984375, 0.36569976806640625, 0.378997802734375, 0.39229583740234375, 0.4055938720703125, 0.41889190673828125, 0.43218994140625, 0.44548797607421875, 0.4587860107421875, 0.47208404541015625, 0.485382080078125, 0.49868011474609375, 0.5119781494140625, 0.5252761840820312, 0.53857421875]}, "gradients/encoder.encoder.layers.18.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 3.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 5.0, 4.0, 4.0, 7.0, 8.0, 26.0, 22.0, 45.0, 61.0, 86.0, 175.0, 338.0, 719.0, 1893.0, 7265.0, 59786.0, 825012.0, 137053.0, 11570.0, 2631.0, 928.0, 397.0, 205.0, 101.0, 83.0, 43.0, 30.0, 15.0, 16.0, 6.0, 9.0, 6.0, 4.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.064453125, -1.98974609375, -1.9150390625, -1.84033203125, -1.765625, -1.69091796875, -1.6162109375, -1.54150390625, -1.466796875, -1.39208984375, -1.3173828125, -1.24267578125, -1.16796875, -1.09326171875, -1.0185546875, -0.94384765625, -0.869140625, -0.79443359375, -0.7197265625, -0.64501953125, -0.5703125, -0.49560546875, -0.4208984375, -0.34619140625, -0.271484375, -0.19677734375, -0.1220703125, -0.04736328125, 0.02734375, 0.10205078125, 0.1767578125, 0.25146484375, 0.326171875, 0.40087890625, 0.4755859375, 0.55029296875, 0.625, 0.69970703125, 0.7744140625, 0.84912109375, 0.923828125, 0.99853515625, 1.0732421875, 1.14794921875, 1.22265625, 1.29736328125, 1.3720703125, 1.44677734375, 1.521484375, 1.59619140625, 1.6708984375, 1.74560546875, 1.8203125, 1.89501953125, 1.9697265625, 2.04443359375, 2.119140625, 2.19384765625, 2.2685546875, 2.34326171875, 2.41796875, 2.49267578125, 2.5673828125, 2.64208984375, 2.716796875]}, "gradients/encoder.encoder.layers.18.attention.v_proj.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 3.0, 5.0, 0.0, 1.0, 3.0, 6.0, 4.0, 10.0, 19.0, 16.0, 15.0, 25.0, 40.0, 35.0, 56.0, 72.0, 73.0, 83.0, 84.0, 91.0, 76.0, 73.0, 46.0, 41.0, 28.0, 36.0, 21.0, 6.0, 11.0, 10.0, 10.0, 3.0, 5.0, 4.0, 1.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.32421875, -1.269989013671875, -1.21575927734375, -1.161529541015625, -1.1072998046875, -1.053070068359375, -0.99884033203125, -0.944610595703125, -0.890380859375, -0.836151123046875, -0.78192138671875, -0.727691650390625, -0.6734619140625, -0.619232177734375, -0.56500244140625, -0.510772705078125, -0.45654296875, -0.402313232421875, -0.34808349609375, -0.293853759765625, -0.2396240234375, -0.185394287109375, -0.13116455078125, -0.076934814453125, -0.022705078125, 0.031524658203125, 0.08575439453125, 0.139984130859375, 0.1942138671875, 0.248443603515625, 0.30267333984375, 0.356903076171875, 0.4111328125, 0.465362548828125, 0.51959228515625, 0.573822021484375, 0.6280517578125, 0.682281494140625, 0.73651123046875, 0.790740966796875, 0.844970703125, 0.899200439453125, 0.95343017578125, 1.007659912109375, 1.0618896484375, 1.116119384765625, 1.17034912109375, 1.224578857421875, 1.27880859375, 1.333038330078125, 1.38726806640625, 1.441497802734375, 1.4957275390625, 1.549957275390625, 1.60418701171875, 1.658416748046875, 1.712646484375, 1.766876220703125, 1.82110595703125, 1.875335693359375, 1.9295654296875, 1.983795166015625, 2.03802490234375, 2.092254638671875, 2.146484375]}, "gradients/encoder.encoder.layers.18.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 2.0, 0.0, 2.0, 5.0, 4.0, 8.0, 8.0, 4.0, 21.0, 25.0, 25.0, 33.0, 70.0, 115.0, 204.0, 392.0, 764.0, 1810.0, 6238.0, 34816.0, 403030.0, 539303.0, 49861.0, 7814.0, 2239.0, 857.0, 398.0, 198.0, 119.0, 63.0, 42.0, 29.0, 13.0, 11.0, 13.0, 9.0, 6.0, 7.0, 3.0, 2.0, 2.0, 4.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.5087890625, -0.49066925048828125, -0.4725494384765625, -0.45442962646484375, -0.436309814453125, -0.41819000244140625, -0.4000701904296875, -0.38195037841796875, -0.36383056640625, -0.34571075439453125, -0.3275909423828125, -0.30947113037109375, -0.291351318359375, -0.27323150634765625, -0.2551116943359375, -0.23699188232421875, -0.2188720703125, -0.20075225830078125, -0.1826324462890625, -0.16451263427734375, -0.146392822265625, -0.12827301025390625, -0.1101531982421875, -0.09203338623046875, -0.07391357421875, -0.05579376220703125, -0.0376739501953125, -0.01955413818359375, -0.001434326171875, 0.01668548583984375, 0.0348052978515625, 0.05292510986328125, 0.071044921875, 0.08916473388671875, 0.1072845458984375, 0.12540435791015625, 0.143524169921875, 0.16164398193359375, 0.1797637939453125, 0.19788360595703125, 0.21600341796875, 0.23412322998046875, 0.2522430419921875, 0.27036285400390625, 0.288482666015625, 0.30660247802734375, 0.3247222900390625, 0.34284210205078125, 0.3609619140625, 0.37908172607421875, 0.3972015380859375, 0.41532135009765625, 0.433441162109375, 0.45156097412109375, 0.4696807861328125, 0.48780059814453125, 0.50592041015625, 0.5240402221679688, 0.5421600341796875, 0.5602798461914062, 0.578399658203125, 0.5965194702148438, 0.6146392822265625, 0.6327590942382812, 0.65087890625]}, "gradients/encoder.encoder.layers.18.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 6.0, 7.0, 11.0, 21.0, 38.0, 56.0, 95.0, 173.0, 211.0, 180.0, 102.0, 60.0, 29.0, 12.0, 9.0, 0.0, 6.0, 2.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00018477439880371094, -0.00018032174557447433, -0.00017586909234523773, -0.00017141643911600113, -0.00016696378588676453, -0.00016251113265752792, -0.00015805847942829132, -0.00015360582619905472, -0.00014915317296981812, -0.0001447005197405815, -0.0001402478665113449, -0.0001357952132821083, -0.0001313425600528717, -0.0001268899068236351, -0.0001224372535943985, -0.0001179846003651619, -0.00011353194713592529, -0.00010907929390668869, -0.00010462664067745209, -0.00010017398744821548, -9.572133421897888e-05, -9.126868098974228e-05, -8.681602776050568e-05, -8.236337453126907e-05, -7.791072130203247e-05, -7.345806807279587e-05, -6.900541484355927e-05, -6.455276161432266e-05, -6.010010838508606e-05, -5.564745515584946e-05, -5.1194801926612854e-05, -4.674214869737625e-05, -4.228949546813965e-05, -3.7836842238903046e-05, -3.338418900966644e-05, -2.893153578042984e-05, -2.4478882551193237e-05, -2.0026229321956635e-05, -1.5573576092720032e-05, -1.1120922863483429e-05, -6.668269634246826e-06, -2.2156164050102234e-06, 2.2370368242263794e-06, 6.689690053462982e-06, 1.1142343282699585e-05, 1.5594996511936188e-05, 2.004764974117279e-05, 2.4500302970409393e-05, 2.8952956199645996e-05, 3.34056094288826e-05, 3.78582626581192e-05, 4.2310915887355804e-05, 4.676356911659241e-05, 5.121622234582901e-05, 5.566887557506561e-05, 6.0121528804302216e-05, 6.457418203353882e-05, 6.902683526277542e-05, 7.347948849201202e-05, 7.793214172124863e-05, 8.238479495048523e-05, 8.683744817972183e-05, 9.129010140895844e-05, 9.574275463819504e-05, 0.00010019540786743164]}, "gradients/encoder.encoder.layers.18.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 4.0, 4.0, 1.0, 5.0, 10.0, 11.0, 21.0, 22.0, 31.0, 67.0, 91.0, 202.0, 504.0, 1435.0, 5903.0, 50407.0, 762704.0, 209927.0, 13227.0, 2536.0, 792.0, 303.0, 163.0, 82.0, 41.0, 24.0, 16.0, 8.0, 10.0, 2.0, 7.0, 3.0, 2.0, 0.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.6416015625, -0.6192626953125, -0.596923828125, -0.5745849609375, -0.55224609375, -0.5299072265625, -0.507568359375, -0.4852294921875, -0.462890625, -0.4405517578125, -0.418212890625, -0.3958740234375, -0.37353515625, -0.3511962890625, -0.328857421875, -0.3065185546875, -0.2841796875, -0.2618408203125, -0.239501953125, -0.2171630859375, -0.19482421875, -0.1724853515625, -0.150146484375, -0.1278076171875, -0.10546875, -0.0831298828125, -0.060791015625, -0.0384521484375, -0.01611328125, 0.0062255859375, 0.028564453125, 0.0509033203125, 0.0732421875, 0.0955810546875, 0.117919921875, 0.1402587890625, 0.16259765625, 0.1849365234375, 0.207275390625, 0.2296142578125, 0.251953125, 0.2742919921875, 0.296630859375, 0.3189697265625, 0.34130859375, 0.3636474609375, 0.385986328125, 0.4083251953125, 0.4306640625, 0.4530029296875, 0.475341796875, 0.4976806640625, 0.52001953125, 0.5423583984375, 0.564697265625, 0.5870361328125, 0.609375, 0.6317138671875, 0.654052734375, 0.6763916015625, 0.69873046875, 0.7210693359375, 0.743408203125, 0.7657470703125, 0.7880859375]}, "gradients/encoder.encoder.layers.18.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 3.0, 3.0, 3.0, 6.0, 6.0, 8.0, 9.0, 16.0, 22.0, 44.0, 50.0, 77.0, 115.0, 120.0, 161.0, 115.0, 95.0, 61.0, 32.0, 28.0, 7.0, 6.0, 6.0, 3.0, 4.0, 2.0, 6.0, 3.0, 3.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.46533203125, -0.4508018493652344, -0.43627166748046875, -0.4217414855957031, -0.4072113037109375, -0.3926811218261719, -0.37815093994140625, -0.3636207580566406, -0.349090576171875, -0.3345603942871094, -0.32003021240234375, -0.3055000305175781, -0.2909698486328125, -0.2764396667480469, -0.26190948486328125, -0.24737930297851562, -0.23284912109375, -0.21831893920898438, -0.20378875732421875, -0.18925857543945312, -0.1747283935546875, -0.16019821166992188, -0.14566802978515625, -0.13113784790039062, -0.116607666015625, -0.10207748413085938, -0.08754730224609375, -0.07301712036132812, -0.0584869384765625, -0.043956756591796875, -0.02942657470703125, -0.014896392822265625, -0.0003662109375, 0.014163970947265625, 0.02869415283203125, 0.043224334716796875, 0.0577545166015625, 0.07228469848632812, 0.08681488037109375, 0.10134506225585938, 0.115875244140625, 0.13040542602539062, 0.14493560791015625, 0.15946578979492188, 0.1739959716796875, 0.18852615356445312, 0.20305633544921875, 0.21758651733398438, 0.23211669921875, 0.24664688110351562, 0.26117706298828125, 0.2757072448730469, 0.2902374267578125, 0.3047676086425781, 0.31929779052734375, 0.3338279724121094, 0.348358154296875, 0.3628883361816406, 0.37741851806640625, 0.3919486999511719, 0.4064788818359375, 0.4210090637207031, 0.43553924560546875, 0.4500694274902344, 0.464599609375]}, "gradients/encoder.encoder.layers.18.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 2.0, 0.0, 3.0, 0.0, 13.0, 43.0, 130.0, 390.0, 303.0, 81.0, 31.0, 8.0, 6.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.776620864868164, -6.20587158203125, -5.635122299194336, -5.064373016357422, -4.493623733520508, -3.9228744506835938, -3.3521251678466797, -2.7813758850097656, -2.2106266021728516, -1.6398773193359375, -1.0691280364990234, -0.4983787536621094, 0.07237052917480469, 0.6431198120117188, 1.2138690948486328, 1.7846183776855469, 2.355367660522461, 2.926116943359375, 3.496866226196289, 4.067615509033203, 4.638364791870117, 5.209114074707031, 5.779863357543945, 6.350612640380859, 6.921361923217773, 7.4921112060546875, 8.062860488891602, 8.633609771728516, 9.20435905456543, 9.775108337402344, 10.345857620239258, 10.916606903076172, 11.487358093261719, 12.058107376098633, 12.628856658935547, 13.199605941772461, 13.770355224609375, 14.341104507446289, 14.911853790283203, 15.482603073120117, 16.05335235595703, 16.624101638793945, 17.19485092163086, 17.765600204467773, 18.336349487304688, 18.9070987701416, 19.477848052978516, 20.04859733581543, 20.619346618652344, 21.190095901489258, 21.760845184326172, 22.331594467163086, 22.90234375, 23.473093032836914, 24.043842315673828, 24.614591598510742, 25.185340881347656, 25.75609016418457, 26.326839447021484, 26.8975887298584, 27.468338012695312, 28.039087295532227, 28.60983657836914, 29.180585861206055, 29.75133514404297]}, "gradients/encoder.encoder.layers.18.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 4.0, 1.0, 1.0, 3.0, 2.0, 6.0, 4.0, 8.0, 8.0, 13.0, 18.0, 24.0, 20.0, 30.0, 21.0, 34.0, 45.0, 48.0, 49.0, 50.0, 49.0, 58.0, 53.0, 50.0, 53.0, 62.0, 40.0, 41.0, 34.0, 36.0, 30.0, 24.0, 19.0, 22.0, 11.0, 11.0, 11.0, 2.0, 4.0, 3.0, 6.0, 2.0, 1.0, 5.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-7.640623569488525, -7.410745143890381, -7.180866241455078, -6.950987815856934, -6.721109390258789, -6.4912309646606445, -6.261352062225342, -6.031473636627197, -5.8015947341918945, -5.57171630859375, -5.341837406158447, -5.111958980560303, -4.882080554962158, -4.6522016525268555, -4.422323226928711, -4.192444801330566, -3.962566375732422, -3.7326877117156982, -3.5028092861175537, -3.27293062210083, -3.0430521965026855, -2.813173532485962, -2.5832948684692383, -2.3534164428710938, -2.12353777885437, -1.893659234046936, -1.663780689239502, -1.4339020252227783, -1.2040234804153442, -0.9741449356079102, -0.7442662715911865, -0.5143877267837524, -0.28450918197631836, -0.05463060736656189, 0.17524796724319458, 0.40512657165527344, 0.6350051164627075, 0.8648836612701416, 1.0947623252868652, 1.3246408700942993, 1.5545194149017334, 1.7843979597091675, 2.0142765045166016, 2.244155168533325, 2.474033832550049, 2.7039122581481934, 2.933790922164917, 3.1636695861816406, 3.393548011779785, 3.623426675796509, 3.8533051013946533, 4.083183765411377, 4.3130621910095215, 4.542941093444824, 4.772819519042969, 5.002697944641113, 5.232576370239258, 5.462454795837402, 5.692333698272705, 5.92221212387085, 6.152090549468994, 6.381969451904297, 6.611847877502441, 6.841726303100586, 7.071605205535889]}, "gradients/encoder.encoder.layers.17.feed_forward.output_dense.weight": {"_type": "histogram", "values": [2.0, 2.0, 3.0, 6.0, 5.0, 11.0, 13.0, 16.0, 33.0, 26.0, 51.0, 76.0, 156.0, 247.0, 398.0, 806.0, 1415.0, 3273.0, 9134.0, 37121.0, 584785.0, 3463139.0, 70218.0, 13732.0, 4693.0, 2059.0, 1050.0, 622.0, 339.0, 250.0, 156.0, 117.0, 70.0, 57.0, 34.0, 40.0, 26.0, 31.0, 9.0, 10.0, 13.0, 10.0, 9.0, 3.0, 8.0, 5.0, 2.0, 2.0, 5.0, 4.0, 1.0, 0.0, 4.0, 0.0, 1.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.615234375, -1.5389404296875, -1.462646484375, -1.3863525390625, -1.31005859375, -1.2337646484375, -1.157470703125, -1.0811767578125, -1.0048828125, -0.9285888671875, -0.852294921875, -0.7760009765625, -0.69970703125, -0.6234130859375, -0.547119140625, -0.4708251953125, -0.39453125, -0.3182373046875, -0.241943359375, -0.1656494140625, -0.08935546875, -0.0130615234375, 0.063232421875, 0.1395263671875, 0.2158203125, 0.2921142578125, 0.368408203125, 0.4447021484375, 0.52099609375, 0.5972900390625, 0.673583984375, 0.7498779296875, 0.826171875, 0.9024658203125, 0.978759765625, 1.0550537109375, 1.13134765625, 1.2076416015625, 1.283935546875, 1.3602294921875, 1.4365234375, 1.5128173828125, 1.589111328125, 1.6654052734375, 1.74169921875, 1.8179931640625, 1.894287109375, 1.9705810546875, 2.046875, 2.1231689453125, 2.199462890625, 2.2757568359375, 2.35205078125, 2.4283447265625, 2.504638671875, 2.5809326171875, 2.6572265625, 2.7335205078125, 2.809814453125, 2.8861083984375, 2.96240234375, 3.0386962890625, 3.114990234375, 3.1912841796875, 3.267578125]}, "gradients/encoder.encoder.layers.17.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 0.0, 4.0, 1.0, 0.0, 2.0, 3.0, 3.0, 6.0, 11.0, 17.0, 19.0, 28.0, 21.0, 54.0, 49.0, 73.0, 71.0, 83.0, 112.0, 74.0, 85.0, 65.0, 57.0, 45.0, 39.0, 26.0, 22.0, 16.0, 7.0, 5.0, 6.0, 3.0, 2.0, 2.0, 0.0, 3.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.289306640625, -0.2765617370605469, -0.26381683349609375, -0.2510719299316406, -0.2383270263671875, -0.22558212280273438, -0.21283721923828125, -0.20009231567382812, -0.187347412109375, -0.17460250854492188, -0.16185760498046875, -0.14911270141601562, -0.1363677978515625, -0.12362289428710938, -0.11087799072265625, -0.09813308715820312, -0.08538818359375, -0.07264328002929688, -0.05989837646484375, -0.047153472900390625, -0.0344085693359375, -0.021663665771484375, -0.00891876220703125, 0.003826141357421875, 0.016571044921875, 0.029315948486328125, 0.04206085205078125, 0.054805755615234375, 0.0675506591796875, 0.08029556274414062, 0.09304046630859375, 0.10578536987304688, 0.1185302734375, 0.13127517700195312, 0.14402008056640625, 0.15676498413085938, 0.1695098876953125, 0.18225479125976562, 0.19499969482421875, 0.20774459838867188, 0.220489501953125, 0.23323440551757812, 0.24597930908203125, 0.2587242126464844, 0.2714691162109375, 0.2842140197753906, 0.29695892333984375, 0.3097038269042969, 0.32244873046875, 0.3351936340332031, 0.34793853759765625, 0.3606834411621094, 0.3734283447265625, 0.3861732482910156, 0.39891815185546875, 0.4116630554199219, 0.424407958984375, 0.4371528625488281, 0.44989776611328125, 0.4626426696777344, 0.4753875732421875, 0.4881324768066406, 0.5008773803710938, 0.5136222839355469, 0.5263671875]}, "gradients/encoder.encoder.layers.17.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 2.0, 2.0, 3.0, 5.0, 9.0, 9.0, 31.0, 39.0, 64.0, 173.0, 432.0, 1908.0, 16493.0, 4140130.0, 31004.0, 2851.0, 628.0, 240.0, 128.0, 50.0, 35.0, 27.0, 10.0, 6.0, 8.0, 2.0, 4.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-7.5, -7.28924560546875, -7.0784912109375, -6.86773681640625, -6.656982421875, -6.44622802734375, -6.2354736328125, -6.02471923828125, -5.81396484375, -5.60321044921875, -5.3924560546875, -5.18170166015625, -4.970947265625, -4.76019287109375, -4.5494384765625, -4.33868408203125, -4.1279296875, -3.91717529296875, -3.7064208984375, -3.49566650390625, -3.284912109375, -3.07415771484375, -2.8634033203125, -2.65264892578125, -2.44189453125, -2.23114013671875, -2.0203857421875, -1.80963134765625, -1.598876953125, -1.38812255859375, -1.1773681640625, -0.96661376953125, -0.755859375, -0.54510498046875, -0.3343505859375, -0.12359619140625, 0.087158203125, 0.29791259765625, 0.5086669921875, 0.71942138671875, 0.93017578125, 1.14093017578125, 1.3516845703125, 1.56243896484375, 1.773193359375, 1.98394775390625, 2.1947021484375, 2.40545654296875, 2.6162109375, 2.82696533203125, 3.0377197265625, 3.24847412109375, 3.459228515625, 3.66998291015625, 3.8807373046875, 4.09149169921875, 4.30224609375, 4.51300048828125, 4.7237548828125, 4.93450927734375, 5.145263671875, 5.35601806640625, 5.5667724609375, 5.77752685546875, 5.98828125]}, "gradients/encoder.encoder.layers.17.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 0.0, 5.0, 2.0, 1.0, 1.0, 1.0, 2.0, 3.0, 6.0, 14.0, 13.0, 32.0, 54.0, 247.0, 2160.0, 1245.0, 186.0, 50.0, 18.0, 10.0, 8.0, 6.0, 6.0, 2.0, 3.0, 5.0, 2.0, 1.0, 3.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.00390625, -0.950775146484375, -0.89764404296875, -0.844512939453125, -0.7913818359375, -0.738250732421875, -0.68511962890625, -0.631988525390625, -0.578857421875, -0.525726318359375, -0.47259521484375, -0.419464111328125, -0.3663330078125, -0.313201904296875, -0.26007080078125, -0.206939697265625, -0.15380859375, -0.100677490234375, -0.04754638671875, 0.005584716796875, 0.0587158203125, 0.111846923828125, 0.16497802734375, 0.218109130859375, 0.271240234375, 0.324371337890625, 0.37750244140625, 0.430633544921875, 0.4837646484375, 0.536895751953125, 0.59002685546875, 0.643157958984375, 0.6962890625, 0.749420166015625, 0.80255126953125, 0.855682373046875, 0.9088134765625, 0.961944580078125, 1.01507568359375, 1.068206787109375, 1.121337890625, 1.174468994140625, 1.22760009765625, 1.280731201171875, 1.3338623046875, 1.386993408203125, 1.44012451171875, 1.493255615234375, 1.54638671875, 1.599517822265625, 1.65264892578125, 1.705780029296875, 1.7589111328125, 1.812042236328125, 1.86517333984375, 1.918304443359375, 1.971435546875, 2.024566650390625, 2.07769775390625, 2.130828857421875, 2.1839599609375, 2.237091064453125, 2.29022216796875, 2.343353271484375, 2.396484375]}, "gradients/encoder.encoder.layers.17.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 3.0, 3.0, 20.0, 731.0, 243.0, 9.0, 6.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-10.54916763305664, -9.574657440185547, -8.600146293640137, -7.625636100769043, -6.651125907897949, -5.676615238189697, -4.702104568481445, -3.7275943756103516, -2.7530837059020996, -1.7785732746124268, -0.8040627241134644, 0.17044782638549805, 1.144958257675171, 2.1194686889648438, 3.0939793586730957, 4.0684895515441895, 5.043000221252441, 6.017510890960693, 6.992021083831787, 7.966531753540039, 8.941041946411133, 9.915552139282227, 10.890063285827637, 11.86457347869873, 12.83908462524414, 13.813594818115234, 14.788105964660645, 15.762616157531738, 16.73712730407715, 17.711637496948242, 18.686147689819336, 19.66065788269043, 20.635168075561523, 21.609678268432617, 22.58418846130371, 23.558700561523438, 24.53321075439453, 25.507720947265625, 26.48223114013672, 27.456741333007812, 28.431251525878906, 29.40576171875, 30.380271911621094, 31.354782104492188, 32.32929229736328, 33.303802490234375, 34.27831268310547, 35.25282287597656, 36.22733688354492, 37.201847076416016, 38.17635726928711, 39.1508674621582, 40.1253776550293, 41.09988784790039, 42.07440185546875, 43.048912048339844, 44.02341842651367, 44.997928619384766, 45.97243881225586, 46.94694900512695, 47.92145919799805, 48.89596939086914, 49.8704833984375, 50.844993591308594, 51.81950378417969]}, "gradients/encoder.encoder.layers.17.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 4.0, 5.0, 5.0, 13.0, 14.0, 45.0, 60.0, 83.0, 109.0, 134.0, 126.0, 136.0, 113.0, 61.0, 43.0, 26.0, 17.0, 9.0, 3.0, 2.0, 1.0, 2.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-7.170922756195068, -6.939195156097412, -6.707467555999756, -6.4757399559021, -6.244012355804443, -6.012284755706787, -5.780557155609131, -5.548829555511475, -5.317101955413818, -5.085374355316162, -4.853646755218506, -4.62191915512085, -4.390191555023193, -4.158463954925537, -3.926736354827881, -3.6950087547302246, -3.4632811546325684, -3.231553554534912, -2.999825954437256, -2.7680983543395996, -2.5363707542419434, -2.304643154144287, -2.072915554046631, -1.8411879539489746, -1.6094603538513184, -1.377732753753662, -1.1460051536560059, -0.9142775535583496, -0.6825499534606934, -0.4508223533630371, -0.21909475326538086, 0.01263284683227539, 0.24435997009277344, 0.4760875701904297, 0.7078151702880859, 0.9395427703857422, 1.1712703704833984, 1.4029979705810547, 1.634725570678711, 1.8664531707763672, 2.0981807708740234, 2.3299083709716797, 2.561635971069336, 2.793363571166992, 3.0250911712646484, 3.2568187713623047, 3.488546371459961, 3.720273971557617, 3.9520015716552734, 4.18372917175293, 4.415456771850586, 4.647184371948242, 4.878911972045898, 5.110639572143555, 5.342367172241211, 5.574094772338867, 5.805822372436523, 6.03754997253418, 6.269277572631836, 6.501005172729492, 6.732732772827148, 6.964460372924805, 7.196187973022461, 7.427915573120117, 7.659643173217773]}, "gradients/encoder.encoder.layers.17.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 2.0, 0.0, 4.0, 4.0, 4.0, 6.0, 10.0, 8.0, 9.0, 24.0, 29.0, 42.0, 49.0, 94.0, 164.0, 263.0, 550.0, 1192.0, 3295.0, 16814.0, 281744.0, 698127.0, 37585.0, 5316.0, 1673.0, 722.0, 332.0, 170.0, 121.0, 73.0, 43.0, 26.0, 21.0, 12.0, 12.0, 9.0, 6.0, 4.0, 6.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-2.31640625, -2.2279052734375, -2.139404296875, -2.0509033203125, -1.96240234375, -1.8739013671875, -1.785400390625, -1.6968994140625, -1.6083984375, -1.5198974609375, -1.431396484375, -1.3428955078125, -1.25439453125, -1.1658935546875, -1.077392578125, -0.9888916015625, -0.900390625, -0.8118896484375, -0.723388671875, -0.6348876953125, -0.54638671875, -0.4578857421875, -0.369384765625, -0.2808837890625, -0.1923828125, -0.1038818359375, -0.015380859375, 0.0731201171875, 0.16162109375, 0.2501220703125, 0.338623046875, 0.4271240234375, 0.515625, 0.6041259765625, 0.692626953125, 0.7811279296875, 0.86962890625, 0.9581298828125, 1.046630859375, 1.1351318359375, 1.2236328125, 1.3121337890625, 1.400634765625, 1.4891357421875, 1.57763671875, 1.6661376953125, 1.754638671875, 1.8431396484375, 1.931640625, 2.0201416015625, 2.108642578125, 2.1971435546875, 2.28564453125, 2.3741455078125, 2.462646484375, 2.5511474609375, 2.6396484375, 2.7281494140625, 2.816650390625, 2.9051513671875, 2.99365234375, 3.0821533203125, 3.170654296875, 3.2591552734375, 3.34765625]}, "gradients/encoder.encoder.layers.17.attention.out_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 3.0, 1.0, 4.0, 3.0, 2.0, 5.0, 3.0, 19.0, 37.0, 35.0, 71.0, 91.0, 132.0, 128.0, 137.0, 84.0, 102.0, 70.0, 40.0, 17.0, 7.0, 5.0, 6.0, 6.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 2.0, 0.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.39892578125, -0.37984466552734375, -0.3607635498046875, -0.34168243408203125, -0.322601318359375, -0.30352020263671875, -0.2844390869140625, -0.26535797119140625, -0.24627685546875, -0.22719573974609375, -0.2081146240234375, -0.18903350830078125, -0.169952392578125, -0.15087127685546875, -0.1317901611328125, -0.11270904541015625, -0.0936279296875, -0.07454681396484375, -0.0554656982421875, -0.03638458251953125, -0.017303466796875, 0.00177764892578125, 0.0208587646484375, 0.03993988037109375, 0.05902099609375, 0.07810211181640625, 0.0971832275390625, 0.11626434326171875, 0.135345458984375, 0.15442657470703125, 0.1735076904296875, 0.19258880615234375, 0.211669921875, 0.23075103759765625, 0.2498321533203125, 0.26891326904296875, 0.287994384765625, 0.30707550048828125, 0.3261566162109375, 0.34523773193359375, 0.36431884765625, 0.38339996337890625, 0.4024810791015625, 0.42156219482421875, 0.440643310546875, 0.45972442626953125, 0.4788055419921875, 0.49788665771484375, 0.5169677734375, 0.5360488891601562, 0.5551300048828125, 0.5742111206054688, 0.593292236328125, 0.6123733520507812, 0.6314544677734375, 0.6505355834960938, 0.66961669921875, 0.6886978149414062, 0.7077789306640625, 0.7268600463867188, 0.745941162109375, 0.7650222778320312, 0.7841033935546875, 0.8031845092773438, 0.822265625]}, "gradients/encoder.encoder.layers.17.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 3.0, 4.0, 2.0, 1.0, 5.0, 16.0, 17.0, 27.0, 32.0, 49.0, 84.0, 120.0, 224.0, 434.0, 975.0, 2879.0, 13784.0, 108487.0, 662602.0, 226241.0, 25254.0, 4621.0, 1344.0, 603.0, 304.0, 190.0, 106.0, 50.0, 39.0, 33.0, 13.0, 13.0, 6.0, 5.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-1.8037109375, -1.7558135986328125, -1.707916259765625, -1.6600189208984375, -1.61212158203125, -1.5642242431640625, -1.516326904296875, -1.4684295654296875, -1.4205322265625, -1.3726348876953125, -1.324737548828125, -1.2768402099609375, -1.22894287109375, -1.1810455322265625, -1.133148193359375, -1.0852508544921875, -1.037353515625, -0.9894561767578125, -0.941558837890625, -0.8936614990234375, -0.84576416015625, -0.7978668212890625, -0.749969482421875, -0.7020721435546875, -0.6541748046875, -0.6062774658203125, -0.558380126953125, -0.5104827880859375, -0.46258544921875, -0.4146881103515625, -0.366790771484375, -0.3188934326171875, -0.27099609375, -0.2230987548828125, -0.175201416015625, -0.1273040771484375, -0.07940673828125, -0.0315093994140625, 0.016387939453125, 0.0642852783203125, 0.1121826171875, 0.1600799560546875, 0.207977294921875, 0.2558746337890625, 0.30377197265625, 0.3516693115234375, 0.399566650390625, 0.4474639892578125, 0.495361328125, 0.5432586669921875, 0.591156005859375, 0.6390533447265625, 0.68695068359375, 0.7348480224609375, 0.782745361328125, 0.8306427001953125, 0.8785400390625, 0.9264373779296875, 0.974334716796875, 1.0222320556640625, 1.07012939453125, 1.1180267333984375, 1.165924072265625, 1.2138214111328125, 1.26171875]}, "gradients/encoder.encoder.layers.17.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 0.0, 1.0, 2.0, 3.0, 3.0, 4.0, 10.0, 5.0, 12.0, 16.0, 19.0, 14.0, 19.0, 32.0, 31.0, 34.0, 35.0, 42.0, 27.0, 63.0, 41.0, 55.0, 49.0, 40.0, 59.0, 50.0, 57.0, 47.0, 40.0, 43.0, 30.0, 28.0, 23.0, 14.0, 14.0, 13.0, 12.0, 11.0, 3.0, 4.0, 2.0, 5.0, 3.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.28125, -1.2464828491210938, -1.2117156982421875, -1.1769485473632812, -1.142181396484375, -1.1074142456054688, -1.0726470947265625, -1.0378799438476562, -1.00311279296875, -0.9683456420898438, -0.9335784912109375, -0.8988113403320312, -0.864044189453125, -0.8292770385742188, -0.7945098876953125, -0.7597427368164062, -0.7249755859375, -0.6902084350585938, -0.6554412841796875, -0.6206741333007812, -0.585906982421875, -0.5511398315429688, -0.5163726806640625, -0.48160552978515625, -0.44683837890625, -0.41207122802734375, -0.3773040771484375, -0.34253692626953125, -0.307769775390625, -0.27300262451171875, -0.2382354736328125, -0.20346832275390625, -0.168701171875, -0.13393402099609375, -0.0991668701171875, -0.06439971923828125, -0.029632568359375, 0.00513458251953125, 0.0399017333984375, 0.07466888427734375, 0.10943603515625, 0.14420318603515625, 0.1789703369140625, 0.21373748779296875, 0.248504638671875, 0.28327178955078125, 0.3180389404296875, 0.35280609130859375, 0.3875732421875, 0.42234039306640625, 0.4571075439453125, 0.49187469482421875, 0.526641845703125, 0.5614089965820312, 0.5961761474609375, 0.6309432983398438, 0.66571044921875, 0.7004776000976562, 0.7352447509765625, 0.7700119018554688, 0.804779052734375, 0.8395462036132812, 0.8743133544921875, 0.9090805053710938, 0.94384765625]}, "gradients/encoder.encoder.layers.17.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 2.0, 4.0, 3.0, 6.0, 15.0, 25.0, 28.0, 60.0, 137.0, 491.0, 1947.0, 19531.0, 922232.0, 98767.0, 4165.0, 730.0, 224.0, 98.0, 35.0, 23.0, 11.0, 11.0, 7.0, 5.0, 5.0, 1.0, 3.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.7685546875, -1.7239990234375, -1.679443359375, -1.6348876953125, -1.59033203125, -1.5457763671875, -1.501220703125, -1.4566650390625, -1.412109375, -1.3675537109375, -1.322998046875, -1.2784423828125, -1.23388671875, -1.1893310546875, -1.144775390625, -1.1002197265625, -1.0556640625, -1.0111083984375, -0.966552734375, -0.9219970703125, -0.87744140625, -0.8328857421875, -0.788330078125, -0.7437744140625, -0.69921875, -0.6546630859375, -0.610107421875, -0.5655517578125, -0.52099609375, -0.4764404296875, -0.431884765625, -0.3873291015625, -0.3427734375, -0.2982177734375, -0.253662109375, -0.2091064453125, -0.16455078125, -0.1199951171875, -0.075439453125, -0.0308837890625, 0.013671875, 0.0582275390625, 0.102783203125, 0.1473388671875, 0.19189453125, 0.2364501953125, 0.281005859375, 0.3255615234375, 0.3701171875, 0.4146728515625, 0.459228515625, 0.5037841796875, 0.54833984375, 0.5928955078125, 0.637451171875, 0.6820068359375, 0.7265625, 0.7711181640625, 0.815673828125, 0.8602294921875, 0.90478515625, 0.9493408203125, 0.993896484375, 1.0384521484375, 1.0830078125]}, "gradients/encoder.encoder.layers.17.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 4.0, 5.0, 7.0, 6.0, 6.0, 17.0, 21.0, 35.0, 43.0, 51.0, 86.0, 90.0, 93.0, 102.0, 103.0, 95.0, 79.0, 50.0, 41.0, 34.0, 14.0, 11.0, 5.0, 7.0, 7.0, 1.0, 3.0, 1.0, 1.0, 1.0, 2.0], "bins": [-0.00011086463928222656, -0.00010847579687833786, -0.00010608695447444916, -0.00010369811207056046, -0.00010130926966667175, -9.892042726278305e-05, -9.653158485889435e-05, -9.414274245500565e-05, -9.175390005111694e-05, -8.936505764722824e-05, -8.697621524333954e-05, -8.458737283945084e-05, -8.219853043556213e-05, -7.980968803167343e-05, -7.742084562778473e-05, -7.503200322389603e-05, -7.264316082000732e-05, -7.025431841611862e-05, -6.786547601222992e-05, -6.547663360834122e-05, -6.308779120445251e-05, -6.069894880056381e-05, -5.831010639667511e-05, -5.592126399278641e-05, -5.3532421588897705e-05, -5.1143579185009e-05, -4.87547367811203e-05, -4.63658943772316e-05, -4.3977051973342896e-05, -4.158820956945419e-05, -3.919936716556549e-05, -3.681052476167679e-05, -3.4421682357788086e-05, -3.2032839953899384e-05, -2.964399755001068e-05, -2.725515514612198e-05, -2.4866312742233276e-05, -2.2477470338344574e-05, -2.008862793445587e-05, -1.769978553056717e-05, -1.5310943126678467e-05, -1.2922100722789764e-05, -1.0533258318901062e-05, -8.14441591501236e-06, -5.755573511123657e-06, -3.366731107234955e-06, -9.778887033462524e-07, 1.41095370054245e-06, 3.7997961044311523e-06, 6.188638508319855e-06, 8.577480912208557e-06, 1.096632331609726e-05, 1.3355165719985962e-05, 1.5744008123874664e-05, 1.8132850527763367e-05, 2.052169293165207e-05, 2.291053533554077e-05, 2.5299377739429474e-05, 2.7688220143318176e-05, 3.007706254720688e-05, 3.246590495109558e-05, 3.4854747354984283e-05, 3.7243589758872986e-05, 3.963243216276169e-05, 4.202127456665039e-05]}, "gradients/encoder.encoder.layers.17.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 3.0, 1.0, 5.0, 14.0, 14.0, 32.0, 52.0, 131.0, 298.0, 850.0, 4076.0, 52811.0, 901768.0, 81935.0, 4999.0, 988.0, 314.0, 149.0, 63.0, 35.0, 11.0, 1.0, 8.0, 4.0, 1.0, 2.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.037109375, -1.0062026977539062, -0.9752960205078125, -0.9443893432617188, -0.913482666015625, -0.8825759887695312, -0.8516693115234375, -0.8207626342773438, -0.78985595703125, -0.7589492797851562, -0.7280426025390625, -0.6971359252929688, -0.666229248046875, -0.6353225708007812, -0.6044158935546875, -0.5735092163085938, -0.5426025390625, -0.5116958618164062, -0.4807891845703125, -0.44988250732421875, -0.418975830078125, -0.38806915283203125, -0.3571624755859375, -0.32625579833984375, -0.29534912109375, -0.26444244384765625, -0.2335357666015625, -0.20262908935546875, -0.171722412109375, -0.14081573486328125, -0.1099090576171875, -0.07900238037109375, -0.048095703125, -0.01718902587890625, 0.0137176513671875, 0.04462432861328125, 0.075531005859375, 0.10643768310546875, 0.1373443603515625, 0.16825103759765625, 0.19915771484375, 0.23006439208984375, 0.2609710693359375, 0.29187774658203125, 0.322784423828125, 0.35369110107421875, 0.3845977783203125, 0.41550445556640625, 0.4464111328125, 0.47731781005859375, 0.5082244873046875, 0.5391311645507812, 0.570037841796875, 0.6009445190429688, 0.6318511962890625, 0.6627578735351562, 0.69366455078125, 0.7245712280273438, 0.7554779052734375, 0.7863845825195312, 0.817291259765625, 0.8481979370117188, 0.8791046142578125, 0.9100112915039062, 0.94091796875]}, "gradients/encoder.encoder.layers.17.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 2.0, 5.0, 3.0, 6.0, 1.0, 11.0, 11.0, 19.0, 13.0, 39.0, 34.0, 71.0, 109.0, 108.0, 128.0, 112.0, 98.0, 87.0, 44.0, 31.0, 22.0, 19.0, 8.0, 8.0, 10.0, 4.0, 1.0, 4.0, 4.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.69287109375, -0.6766357421875, -0.660400390625, -0.6441650390625, -0.6279296875, -0.6116943359375, -0.595458984375, -0.5792236328125, -0.56298828125, -0.5467529296875, -0.530517578125, -0.5142822265625, -0.498046875, -0.4818115234375, -0.465576171875, -0.4493408203125, -0.43310546875, -0.4168701171875, -0.400634765625, -0.3843994140625, -0.3681640625, -0.3519287109375, -0.335693359375, -0.3194580078125, -0.30322265625, -0.2869873046875, -0.270751953125, -0.2545166015625, -0.23828125, -0.2220458984375, -0.205810546875, -0.1895751953125, -0.17333984375, -0.1571044921875, -0.140869140625, -0.1246337890625, -0.1083984375, -0.0921630859375, -0.075927734375, -0.0596923828125, -0.04345703125, -0.0272216796875, -0.010986328125, 0.0052490234375, 0.021484375, 0.0377197265625, 0.053955078125, 0.0701904296875, 0.08642578125, 0.1026611328125, 0.118896484375, 0.1351318359375, 0.1513671875, 0.1676025390625, 0.183837890625, 0.2000732421875, 0.21630859375, 0.2325439453125, 0.248779296875, 0.2650146484375, 0.28125, 0.2974853515625, 0.313720703125, 0.3299560546875, 0.34619140625]}, "gradients/encoder.encoder.layers.17.layer_norm.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0, 2.0, 4.0, 0.0, 2.0, 6.0, 18.0, 26.0, 41.0, 72.0, 135.0, 187.0, 226.0, 150.0, 69.0, 33.0, 17.0, 10.0, 5.0, 4.0, 3.0, 1.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-11.163978576660156, -10.886470794677734, -10.608963012695312, -10.33145523071289, -10.053947448730469, -9.776439666748047, -9.498931884765625, -9.221423149108887, -8.943915367126465, -8.666407585144043, -8.388899803161621, -8.1113920211792, -7.833883762359619, -7.556375980377197, -7.278868198394775, -7.0013604164123535, -6.723852634429932, -6.44634485244751, -6.168837070465088, -5.891328811645508, -5.613821029663086, -5.336313247680664, -5.058805465698242, -4.78129768371582, -4.503789901733398, -4.226282119750977, -3.9487740993499756, -3.6712663173675537, -3.3937582969665527, -3.116250514984131, -2.838742733001709, -2.561234951019287, -2.2837271690368652, -2.0062193870544434, -1.7287113666534424, -1.4512035846710205, -1.173695683479309, -0.8961877822875977, -0.6186800003051758, -0.34117209911346436, -0.06366419792175293, 0.2138436734676361, 0.49135154485702515, 0.7688593864440918, 1.0463672876358032, 1.3238751888275146, 1.6013829708099365, 1.878890872001648, 2.1563987731933594, 2.4339065551757812, 2.7114145755767822, 2.988922357559204, 3.266430377960205, 3.543938159942627, 3.821445941925049, 4.098953723907471, 4.376461982727051, 4.653969764709473, 4.9314775466918945, 5.208985328674316, 5.4864935874938965, 5.764001369476318, 6.04150915145874, 6.319016933441162, 6.596524715423584]}, "gradients/encoder.encoder.layers.17.layer_norm.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 2.0, 1.0, 0.0, 4.0, 3.0, 0.0, 3.0, 3.0, 7.0, 4.0, 7.0, 10.0, 11.0, 12.0, 18.0, 19.0, 18.0, 29.0, 37.0, 33.0, 28.0, 33.0, 43.0, 38.0, 28.0, 42.0, 36.0, 46.0, 40.0, 39.0, 40.0, 41.0, 39.0, 33.0, 30.0, 37.0, 32.0, 25.0, 24.0, 22.0, 16.0, 15.0, 15.0, 12.0, 10.0, 6.0, 5.0, 3.0, 4.0, 5.0, 3.0, 2.0, 2.0, 1.0, 2.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.451428413391113, -5.280288219451904, -5.1091485023498535, -4.9380083084106445, -4.7668681144714355, -4.595727920532227, -4.424588203430176, -4.253448009490967, -4.082307815551758, -3.911167860031128, -3.740027666091919, -3.568887710571289, -3.39774751663208, -3.22660756111145, -3.0554676055908203, -2.8843274116516113, -2.7131876945495605, -2.5420477390289307, -2.3709075450897217, -2.199767589569092, -2.028627395629883, -1.857487440109253, -1.686347484588623, -1.5152074098587036, -1.3440673351287842, -1.1729272603988647, -1.0017871856689453, -0.8306472301483154, -0.659507155418396, -0.48836708068847656, -0.3172271251678467, -0.14608705043792725, 0.025053024291992188, 0.19619306921958923, 0.3673331141471863, 0.5384731292724609, 0.7096132040023804, 0.8807532787322998, 1.0518932342529297, 1.2230333089828491, 1.3941733837127686, 1.565313458442688, 1.7364535331726074, 1.9075934886932373, 2.078733444213867, 2.249873638153076, 2.421013593673706, 2.592153549194336, 2.763293743133545, 2.934433698654175, 3.105573892593384, 3.2767138481140137, 3.4478540420532227, 3.6189939975738525, 3.7901339530944824, 3.9612741470336914, 4.132413864135742, 4.303554058074951, 4.474693775177002, 4.645833969116211, 4.81697416305542, 4.988114356994629, 5.15925407409668, 5.330394268035889, 5.501534461975098]}, "gradients/encoder.encoder.layers.16.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 6.0, 3.0, 1.0, 3.0, 3.0, 9.0, 13.0, 20.0, 41.0, 49.0, 85.0, 138.0, 287.0, 570.0, 1329.0, 3816.0, 15534.0, 146691.0, 3884966.0, 120255.0, 13790.0, 3546.0, 1383.0, 693.0, 362.0, 225.0, 121.0, 89.0, 76.0, 49.0, 37.0, 25.0, 19.0, 12.0, 9.0, 8.0, 7.0, 6.0, 7.0, 4.0, 2.0, 2.0, 2.0, 2.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-1.6953125, -1.635772705078125, -1.57623291015625, -1.516693115234375, -1.4571533203125, -1.397613525390625, -1.33807373046875, -1.278533935546875, -1.218994140625, -1.159454345703125, -1.09991455078125, -1.040374755859375, -0.9808349609375, -0.921295166015625, -0.86175537109375, -0.802215576171875, -0.74267578125, -0.683135986328125, -0.62359619140625, -0.564056396484375, -0.5045166015625, -0.444976806640625, -0.38543701171875, -0.325897216796875, -0.266357421875, -0.206817626953125, -0.14727783203125, -0.087738037109375, -0.0281982421875, 0.031341552734375, 0.09088134765625, 0.150421142578125, 0.2099609375, 0.269500732421875, 0.32904052734375, 0.388580322265625, 0.4481201171875, 0.507659912109375, 0.56719970703125, 0.626739501953125, 0.686279296875, 0.745819091796875, 0.80535888671875, 0.864898681640625, 0.9244384765625, 0.983978271484375, 1.04351806640625, 1.103057861328125, 1.16259765625, 1.222137451171875, 1.28167724609375, 1.341217041015625, 1.4007568359375, 1.460296630859375, 1.51983642578125, 1.579376220703125, 1.638916015625, 1.698455810546875, 1.75799560546875, 1.817535400390625, 1.8770751953125, 1.936614990234375, 1.99615478515625, 2.055694580078125, 2.115234375]}, "gradients/encoder.encoder.layers.16.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 4.0, 2.0, 3.0, 5.0, 4.0, 9.0, 22.0, 24.0, 32.0, 59.0, 93.0, 92.0, 110.0, 119.0, 107.0, 81.0, 95.0, 63.0, 29.0, 28.0, 15.0, 6.0, 1.0, 2.0, 3.0, 1.0, 2.0, 1.0, 1.0, 0.0, 1.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.427490234375, -0.4084587097167969, -0.38942718505859375, -0.3703956604003906, -0.3513641357421875, -0.3323326110839844, -0.31330108642578125, -0.2942695617675781, -0.275238037109375, -0.2562065124511719, -0.23717498779296875, -0.21814346313476562, -0.1991119384765625, -0.18008041381835938, -0.16104888916015625, -0.14201736450195312, -0.12298583984375, -0.10395431518554688, -0.08492279052734375, -0.06589126586914062, -0.0468597412109375, -0.027828216552734375, -0.00879669189453125, 0.010234832763671875, 0.029266357421875, 0.048297882080078125, 0.06732940673828125, 0.08636093139648438, 0.1053924560546875, 0.12442398071289062, 0.14345550537109375, 0.16248703002929688, 0.1815185546875, 0.20055007934570312, 0.21958160400390625, 0.23861312866210938, 0.2576446533203125, 0.2766761779785156, 0.29570770263671875, 0.3147392272949219, 0.333770751953125, 0.3528022766113281, 0.37183380126953125, 0.3908653259277344, 0.4098968505859375, 0.4289283752441406, 0.44795989990234375, 0.4669914245605469, 0.48602294921875, 0.5050544738769531, 0.5240859985351562, 0.5431175231933594, 0.5621490478515625, 0.5811805725097656, 0.6002120971679688, 0.6192436218261719, 0.638275146484375, 0.6573066711425781, 0.6763381958007812, 0.6953697204589844, 0.7144012451171875, 0.7334327697753906, 0.7524642944335938, 0.7714958190917969, 0.79052734375]}, "gradients/encoder.encoder.layers.16.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 1.0, 2.0, 4.0, 7.0, 10.0, 18.0, 38.0, 50.0, 85.0, 175.0, 245.0, 466.0, 1113.0, 5777.0, 339842.0, 3829353.0, 13923.0, 1766.0, 640.0, 281.0, 194.0, 120.0, 60.0, 54.0, 30.0, 16.0, 10.0, 4.0, 4.0, 1.0, 1.0, 1.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.263671875, -2.16998291015625, -2.0762939453125, -1.98260498046875, -1.888916015625, -1.79522705078125, -1.7015380859375, -1.60784912109375, -1.51416015625, -1.42047119140625, -1.3267822265625, -1.23309326171875, -1.139404296875, -1.04571533203125, -0.9520263671875, -0.85833740234375, -0.7646484375, -0.67095947265625, -0.5772705078125, -0.48358154296875, -0.389892578125, -0.29620361328125, -0.2025146484375, -0.10882568359375, -0.01513671875, 0.07855224609375, 0.1722412109375, 0.26593017578125, 0.359619140625, 0.45330810546875, 0.5469970703125, 0.64068603515625, 0.734375, 0.82806396484375, 0.9217529296875, 1.01544189453125, 1.109130859375, 1.20281982421875, 1.2965087890625, 1.39019775390625, 1.48388671875, 1.57757568359375, 1.6712646484375, 1.76495361328125, 1.858642578125, 1.95233154296875, 2.0460205078125, 2.13970947265625, 2.2333984375, 2.32708740234375, 2.4207763671875, 2.51446533203125, 2.608154296875, 2.70184326171875, 2.7955322265625, 2.88922119140625, 2.98291015625, 3.07659912109375, 3.1702880859375, 3.26397705078125, 3.357666015625, 3.45135498046875, 3.5450439453125, 3.63873291015625, 3.732421875]}, "gradients/encoder.encoder.layers.16.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 3.0, 1.0, 10.0, 10.0, 20.0, 48.0, 126.0, 422.0, 2558.0, 626.0, 148.0, 56.0, 28.0, 14.0, 5.0, 5.0, 1.0, 0.0, 4.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-1.2119140625, -1.1805267333984375, -1.149139404296875, -1.1177520751953125, -1.08636474609375, -1.0549774169921875, -1.023590087890625, -0.9922027587890625, -0.9608154296875, -0.9294281005859375, -0.898040771484375, -0.8666534423828125, -0.83526611328125, -0.8038787841796875, -0.772491455078125, -0.7411041259765625, -0.709716796875, -0.6783294677734375, -0.646942138671875, -0.6155548095703125, -0.58416748046875, -0.5527801513671875, -0.521392822265625, -0.4900054931640625, -0.4586181640625, -0.4272308349609375, -0.395843505859375, -0.3644561767578125, -0.33306884765625, -0.3016815185546875, -0.270294189453125, -0.2389068603515625, -0.20751953125, -0.1761322021484375, -0.144744873046875, -0.1133575439453125, -0.08197021484375, -0.0505828857421875, -0.019195556640625, 0.0121917724609375, 0.0435791015625, 0.0749664306640625, 0.106353759765625, 0.1377410888671875, 0.16912841796875, 0.2005157470703125, 0.231903076171875, 0.2632904052734375, 0.294677734375, 0.3260650634765625, 0.357452392578125, 0.3888397216796875, 0.42022705078125, 0.4516143798828125, 0.483001708984375, 0.5143890380859375, 0.5457763671875, 0.5771636962890625, 0.608551025390625, 0.6399383544921875, 0.67132568359375, 0.7027130126953125, 0.734100341796875, 0.7654876708984375, 0.796875]}, "gradients/encoder.encoder.layers.16.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 7.0, 43.0, 513.0, 423.0, 21.0, 7.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-13.56855583190918, -13.129997253417969, -12.691439628601074, -12.252881050109863, -11.814323425292969, -11.375764846801758, -10.937207221984863, -10.498648643493652, -10.060091018676758, -9.621532440185547, -9.182974815368652, -8.744416236877441, -8.305858612060547, -7.867300033569336, -7.428742408752441, -6.9901838302612305, -6.551625728607178, -6.113067626953125, -5.674509525299072, -5.2359514236450195, -4.797393321990967, -4.358835220336914, -3.9202768802642822, -3.4817187786102295, -3.0431606769561768, -2.604602575302124, -2.1660444736480713, -1.727486252784729, -1.2889281511306763, -0.850369930267334, -0.41181182861328125, 0.026746273040771484, 0.4653043746948242, 0.903862476348877, 1.3424205780029297, 1.780978798866272, 2.219536781311035, 2.658095121383667, 3.0966532230377197, 3.5352113246917725, 3.973769426345825, 4.412327766418457, 4.85088586807251, 5.2894439697265625, 5.728002071380615, 6.166560173034668, 6.605118274688721, 7.043676376342773, 7.482234477996826, 7.920792579650879, 8.35935115814209, 8.797908782958984, 9.236467361450195, 9.67502498626709, 10.1135835647583, 10.552141189575195, 10.990699768066406, 11.429258346557617, 11.867815971374512, 12.306374549865723, 12.744932174682617, 13.183490753173828, 13.622048377990723, 14.060606956481934, 14.499164581298828]}, "gradients/encoder.encoder.layers.16.final_layer_norm.bias": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 0.0, 4.0, 3.0, 5.0, 5.0, 5.0, 8.0, 6.0, 6.0, 16.0, 15.0, 15.0, 23.0, 27.0, 38.0, 30.0, 40.0, 33.0, 55.0, 45.0, 55.0, 69.0, 55.0, 66.0, 60.0, 48.0, 48.0, 33.0, 43.0, 20.0, 30.0, 17.0, 20.0, 14.0, 14.0, 13.0, 6.0, 7.0, 3.0, 8.0, 3.0, 4.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.477252721786499, -1.4193100929260254, -1.3613675832748413, -1.3034249544143677, -1.2454824447631836, -1.18753981590271, -1.1295971870422363, -1.0716546773910522, -1.0137121677398682, -0.9557695984840393, -0.8978270292282104, -0.8398844003677368, -0.7819418907165527, -0.7239992618560791, -0.6660566926002502, -0.6081141233444214, -0.5501714944839478, -0.4922289252281189, -0.43428635597229004, -0.3763437569141388, -0.31840118765830994, -0.2604586184024811, -0.20251601934432983, -0.14457345008850098, -0.08663088083267212, -0.028688304126262665, 0.02925427258014679, 0.08719685673713684, 0.1451394259929657, 0.20308199524879456, 0.2610245943069458, 0.31896716356277466, 0.3769097328186035, 0.4348523020744324, 0.49279487133026123, 0.5507375001907349, 0.608680009841919, 0.6666226387023926, 0.7245652079582214, 0.7825077772140503, 0.8404503464698792, 0.898392915725708, 0.9563354849815369, 1.0142780542373657, 1.0722206830978394, 1.1301631927490234, 1.188105821609497, 1.2460484504699707, 1.3039909601211548, 1.3619335889816284, 1.4198760986328125, 1.4778187274932861, 1.5357612371444702, 1.5937038660049438, 1.651646375656128, 1.7095890045166016, 1.7675316333770752, 1.8254742622375488, 1.883416771888733, 1.9413594007492065, 1.9993019104003906, 2.0572445392608643, 2.115187168121338, 2.1731295585632324, 2.231072187423706]}, "gradients/encoder.encoder.layers.16.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 2.0, 3.0, 3.0, 5.0, 5.0, 8.0, 11.0, 10.0, 7.0, 16.0, 16.0, 29.0, 30.0, 54.0, 81.0, 122.0, 173.0, 235.0, 490.0, 880.0, 2070.0, 6512.0, 32780.0, 304317.0, 607739.0, 75331.0, 11570.0, 3174.0, 1252.0, 652.0, 348.0, 199.0, 129.0, 86.0, 65.0, 51.0, 28.0, 18.0, 19.0, 14.0, 14.0, 4.0, 3.0, 1.0, 8.0, 2.0, 0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-2.1875, -2.121612548828125, -2.05572509765625, -1.989837646484375, -1.9239501953125, -1.858062744140625, -1.79217529296875, -1.726287841796875, -1.660400390625, -1.594512939453125, -1.52862548828125, -1.462738037109375, -1.3968505859375, -1.330963134765625, -1.26507568359375, -1.199188232421875, -1.13330078125, -1.067413330078125, -1.00152587890625, -0.935638427734375, -0.8697509765625, -0.803863525390625, -0.73797607421875, -0.672088623046875, -0.606201171875, -0.540313720703125, -0.47442626953125, -0.408538818359375, -0.3426513671875, -0.276763916015625, -0.21087646484375, -0.144989013671875, -0.0791015625, -0.013214111328125, 0.05267333984375, 0.118560791015625, 0.1844482421875, 0.250335693359375, 0.31622314453125, 0.382110595703125, 0.447998046875, 0.513885498046875, 0.57977294921875, 0.645660400390625, 0.7115478515625, 0.777435302734375, 0.84332275390625, 0.909210205078125, 0.97509765625, 1.040985107421875, 1.10687255859375, 1.172760009765625, 1.2386474609375, 1.304534912109375, 1.37042236328125, 1.436309814453125, 1.502197265625, 1.568084716796875, 1.63397216796875, 1.699859619140625, 1.7657470703125, 1.831634521484375, 1.89752197265625, 1.963409423828125, 2.029296875]}, "gradients/encoder.encoder.layers.16.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 4.0, 2.0, 4.0, 4.0, 1.0, 6.0, 17.0, 28.0, 37.0, 55.0, 88.0, 92.0, 121.0, 124.0, 96.0, 95.0, 100.0, 52.0, 36.0, 23.0, 9.0, 2.0, 6.0, 3.0, 3.0, 1.0, 0.0, 1.0, 2.0, 1.0, 0.0, 1.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.45458984375, -0.43524169921875, -0.4158935546875, -0.39654541015625, -0.377197265625, -0.35784912109375, -0.3385009765625, -0.31915283203125, -0.2998046875, -0.28045654296875, -0.2611083984375, -0.24176025390625, -0.222412109375, -0.20306396484375, -0.1837158203125, -0.16436767578125, -0.14501953125, -0.12567138671875, -0.1063232421875, -0.08697509765625, -0.067626953125, -0.04827880859375, -0.0289306640625, -0.00958251953125, 0.009765625, 0.02911376953125, 0.0484619140625, 0.06781005859375, 0.087158203125, 0.10650634765625, 0.1258544921875, 0.14520263671875, 0.16455078125, 0.18389892578125, 0.2032470703125, 0.22259521484375, 0.241943359375, 0.26129150390625, 0.2806396484375, 0.29998779296875, 0.3193359375, 0.33868408203125, 0.3580322265625, 0.37738037109375, 0.396728515625, 0.41607666015625, 0.4354248046875, 0.45477294921875, 0.47412109375, 0.49346923828125, 0.5128173828125, 0.53216552734375, 0.551513671875, 0.57086181640625, 0.5902099609375, 0.60955810546875, 0.62890625, 0.64825439453125, 0.6676025390625, 0.68695068359375, 0.706298828125, 0.72564697265625, 0.7449951171875, 0.76434326171875, 0.78369140625]}, "gradients/encoder.encoder.layers.16.attention.v_proj.weight": {"_type": "histogram", "values": [4.0, 3.0, 3.0, 2.0, 10.0, 6.0, 8.0, 8.0, 11.0, 19.0, 21.0, 31.0, 32.0, 44.0, 51.0, 73.0, 74.0, 106.0, 150.0, 207.0, 316.0, 484.0, 768.0, 1476.0, 2878.0, 6297.0, 14515.0, 37665.0, 114704.0, 331111.0, 345074.0, 122916.0, 40957.0, 15224.0, 6346.0, 2955.0, 1489.0, 866.0, 533.0, 298.0, 179.0, 146.0, 124.0, 84.0, 57.0, 49.0, 42.0, 32.0, 26.0, 21.0, 22.0, 14.0, 9.0, 5.0, 7.0, 5.0, 6.0, 6.0, 0.0, 1.0, 2.0, 1.0, 0.0, 3.0], "bins": [-0.76318359375, -0.7377700805664062, -0.7123565673828125, -0.6869430541992188, -0.661529541015625, -0.6361160278320312, -0.6107025146484375, -0.5852890014648438, -0.55987548828125, -0.5344619750976562, -0.5090484619140625, -0.48363494873046875, -0.458221435546875, -0.43280792236328125, -0.4073944091796875, -0.38198089599609375, -0.3565673828125, -0.33115386962890625, -0.3057403564453125, -0.28032684326171875, -0.254913330078125, -0.22949981689453125, -0.2040863037109375, -0.17867279052734375, -0.15325927734375, -0.12784576416015625, -0.1024322509765625, -0.07701873779296875, -0.051605224609375, -0.02619171142578125, -0.0007781982421875, 0.02463531494140625, 0.050048828125, 0.07546234130859375, 0.1008758544921875, 0.12628936767578125, 0.151702880859375, 0.17711639404296875, 0.2025299072265625, 0.22794342041015625, 0.25335693359375, 0.27877044677734375, 0.3041839599609375, 0.32959747314453125, 0.355010986328125, 0.38042449951171875, 0.4058380126953125, 0.43125152587890625, 0.4566650390625, 0.48207855224609375, 0.5074920654296875, 0.5329055786132812, 0.558319091796875, 0.5837326049804688, 0.6091461181640625, 0.6345596313476562, 0.65997314453125, 0.6853866577148438, 0.7108001708984375, 0.7362136840820312, 0.761627197265625, 0.7870407104492188, 0.8124542236328125, 0.8378677368164062, 0.86328125]}, "gradients/encoder.encoder.layers.16.attention.v_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 1.0, 0.0, 3.0, 1.0, 0.0, 2.0, 3.0, 4.0, 1.0, 2.0, 5.0, 9.0, 10.0, 8.0, 17.0, 24.0, 19.0, 22.0, 19.0, 32.0, 34.0, 29.0, 39.0, 33.0, 39.0, 55.0, 52.0, 51.0, 45.0, 38.0, 47.0, 44.0, 37.0, 46.0, 27.0, 24.0, 22.0, 26.0, 19.0, 37.0, 13.0, 10.0, 13.0, 10.0, 8.0, 9.0, 5.0, 5.0, 6.0, 5.0, 2.0, 2.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-1.0224609375, -0.9888916015625, -0.955322265625, -0.9217529296875, -0.88818359375, -0.8546142578125, -0.821044921875, -0.7874755859375, -0.75390625, -0.7203369140625, -0.686767578125, -0.6531982421875, -0.61962890625, -0.5860595703125, -0.552490234375, -0.5189208984375, -0.4853515625, -0.4517822265625, -0.418212890625, -0.3846435546875, -0.35107421875, -0.3175048828125, -0.283935546875, -0.2503662109375, -0.216796875, -0.1832275390625, -0.149658203125, -0.1160888671875, -0.08251953125, -0.0489501953125, -0.015380859375, 0.0181884765625, 0.0517578125, 0.0853271484375, 0.118896484375, 0.1524658203125, 0.18603515625, 0.2196044921875, 0.253173828125, 0.2867431640625, 0.3203125, 0.3538818359375, 0.387451171875, 0.4210205078125, 0.45458984375, 0.4881591796875, 0.521728515625, 0.5552978515625, 0.5888671875, 0.6224365234375, 0.656005859375, 0.6895751953125, 0.72314453125, 0.7567138671875, 0.790283203125, 0.8238525390625, 0.857421875, 0.8909912109375, 0.924560546875, 0.9581298828125, 0.99169921875, 1.0252685546875, 1.058837890625, 1.0924072265625, 1.1259765625]}, "gradients/encoder.encoder.layers.16.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 2.0, 3.0, 0.0, 1.0, 1.0, 0.0, 4.0, 7.0, 5.0, 5.0, 10.0, 7.0, 14.0, 16.0, 33.0, 51.0, 97.0, 175.0, 341.0, 697.0, 1758.0, 6297.0, 47229.0, 480078.0, 458144.0, 44332.0, 6058.0, 1764.0, 698.0, 325.0, 150.0, 102.0, 54.0, 40.0, 28.0, 11.0, 4.0, 7.0, 5.0, 2.0, 4.0, 2.0, 3.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 2.0], "bins": [-0.6142578125, -0.5961837768554688, -0.5781097412109375, -0.5600357055664062, -0.541961669921875, -0.5238876342773438, -0.5058135986328125, -0.48773956298828125, -0.46966552734375, -0.45159149169921875, -0.4335174560546875, -0.41544342041015625, -0.397369384765625, -0.37929534912109375, -0.3612213134765625, -0.34314727783203125, -0.3250732421875, -0.30699920654296875, -0.2889251708984375, -0.27085113525390625, -0.252777099609375, -0.23470306396484375, -0.2166290283203125, -0.19855499267578125, -0.18048095703125, -0.16240692138671875, -0.1443328857421875, -0.12625885009765625, -0.108184814453125, -0.09011077880859375, -0.0720367431640625, -0.05396270751953125, -0.035888671875, -0.01781463623046875, 0.0002593994140625, 0.01833343505859375, 0.036407470703125, 0.05448150634765625, 0.0725555419921875, 0.09062957763671875, 0.10870361328125, 0.12677764892578125, 0.1448516845703125, 0.16292572021484375, 0.180999755859375, 0.19907379150390625, 0.2171478271484375, 0.23522186279296875, 0.2532958984375, 0.27136993408203125, 0.2894439697265625, 0.30751800537109375, 0.325592041015625, 0.34366607666015625, 0.3617401123046875, 0.37981414794921875, 0.39788818359375, 0.41596221923828125, 0.4340362548828125, 0.45211029052734375, 0.470184326171875, 0.48825836181640625, 0.5063323974609375, 0.5244064331054688, 0.54248046875]}, "gradients/encoder.encoder.layers.16.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 6.0, 1.0, 2.0, 6.0, 6.0, 16.0, 7.0, 16.0, 29.0, 23.0, 34.0, 58.0, 56.0, 44.0, 77.0, 81.0, 91.0, 88.0, 95.0, 57.0, 56.0, 41.0, 30.0, 18.0, 20.0, 19.0, 8.0, 9.0, 6.0, 3.0, 7.0, 2.0, 2.0, 0.0, 2.0, 1.0, 1.0, 2.0], "bins": [-8.630752563476562e-05, -8.426979184150696e-05, -8.223205804824829e-05, -8.019432425498962e-05, -7.815659046173096e-05, -7.611885666847229e-05, -7.408112287521362e-05, -7.204338908195496e-05, -7.000565528869629e-05, -6.796792149543762e-05, -6.593018770217896e-05, -6.389245390892029e-05, -6.185472011566162e-05, -5.9816986322402954e-05, -5.777925252914429e-05, -5.574151873588562e-05, -5.370378494262695e-05, -5.1666051149368286e-05, -4.962831735610962e-05, -4.759058356285095e-05, -4.5552849769592285e-05, -4.351511597633362e-05, -4.147738218307495e-05, -3.9439648389816284e-05, -3.740191459655762e-05, -3.536418080329895e-05, -3.332644701004028e-05, -3.1288713216781616e-05, -2.925097942352295e-05, -2.7213245630264282e-05, -2.5175511837005615e-05, -2.3137778043746948e-05, -2.110004425048828e-05, -1.9062310457229614e-05, -1.7024576663970947e-05, -1.498684287071228e-05, -1.2949109077453613e-05, -1.0911375284194946e-05, -8.87364149093628e-06, -6.835907697677612e-06, -4.798173904418945e-06, -2.7604401111602783e-06, -7.227063179016113e-07, 1.3150274753570557e-06, 3.3527612686157227e-06, 5.39049506187439e-06, 7.428228855133057e-06, 9.465962648391724e-06, 1.150369644165039e-05, 1.3541430234909058e-05, 1.5579164028167725e-05, 1.761689782142639e-05, 1.965463161468506e-05, 2.1692365407943726e-05, 2.3730099201202393e-05, 2.576783299446106e-05, 2.7805566787719727e-05, 2.9843300580978394e-05, 3.188103437423706e-05, 3.391876816749573e-05, 3.5956501960754395e-05, 3.799423575401306e-05, 4.003196954727173e-05, 4.2069703340530396e-05, 4.410743713378906e-05]}, "gradients/encoder.encoder.layers.16.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 3.0, 2.0, 2.0, 3.0, 2.0, 3.0, 2.0, 7.0, 6.0, 21.0, 23.0, 35.0, 66.0, 110.0, 218.0, 463.0, 1021.0, 2548.0, 9214.0, 55921.0, 406741.0, 482437.0, 73452.0, 11155.0, 2928.0, 1123.0, 452.0, 245.0, 143.0, 81.0, 46.0, 36.0, 20.0, 7.0, 12.0, 5.0, 4.0, 0.0, 1.0, 4.0, 2.0, 1.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.5068359375, -0.4923820495605469, -0.47792816162109375, -0.4634742736816406, -0.4490203857421875, -0.4345664978027344, -0.42011260986328125, -0.4056587219238281, -0.391204833984375, -0.3767509460449219, -0.36229705810546875, -0.3478431701660156, -0.3333892822265625, -0.3189353942871094, -0.30448150634765625, -0.2900276184082031, -0.27557373046875, -0.2611198425292969, -0.24666595458984375, -0.23221206665039062, -0.2177581787109375, -0.20330429077148438, -0.18885040283203125, -0.17439651489257812, -0.159942626953125, -0.14548873901367188, -0.13103485107421875, -0.11658096313476562, -0.1021270751953125, -0.08767318725585938, -0.07321929931640625, -0.058765411376953125, -0.0443115234375, -0.029857635498046875, -0.01540374755859375, -0.000949859619140625, 0.0135040283203125, 0.027957916259765625, 0.04241180419921875, 0.056865692138671875, 0.071319580078125, 0.08577346801757812, 0.10022735595703125, 0.11468124389648438, 0.1291351318359375, 0.14358901977539062, 0.15804290771484375, 0.17249679565429688, 0.18695068359375, 0.20140457153320312, 0.21585845947265625, 0.23031234741210938, 0.2447662353515625, 0.2592201232910156, 0.27367401123046875, 0.2881278991699219, 0.302581787109375, 0.3170356750488281, 0.33148956298828125, 0.3459434509277344, 0.3603973388671875, 0.3748512268066406, 0.38930511474609375, 0.4037590026855469, 0.418212890625]}, "gradients/encoder.encoder.layers.16.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 3.0, 3.0, 1.0, 4.0, 7.0, 8.0, 5.0, 14.0, 20.0, 24.0, 36.0, 47.0, 56.0, 62.0, 86.0, 96.0, 96.0, 93.0, 82.0, 52.0, 50.0, 46.0, 31.0, 30.0, 20.0, 5.0, 9.0, 7.0, 6.0, 5.0, 3.0, 0.0, 3.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.4033203125, -0.3908958435058594, -0.37847137451171875, -0.3660469055175781, -0.3536224365234375, -0.3411979675292969, -0.32877349853515625, -0.3163490295410156, -0.303924560546875, -0.2915000915527344, -0.27907562255859375, -0.2666511535644531, -0.2542266845703125, -0.24180221557617188, -0.22937774658203125, -0.21695327758789062, -0.20452880859375, -0.19210433959960938, -0.17967987060546875, -0.16725540161132812, -0.1548309326171875, -0.14240646362304688, -0.12998199462890625, -0.11755752563476562, -0.105133056640625, -0.09270858764648438, -0.08028411865234375, -0.06785964965820312, -0.0554351806640625, -0.043010711669921875, -0.03058624267578125, -0.018161773681640625, -0.0057373046875, 0.006687164306640625, 0.01911163330078125, 0.031536102294921875, 0.0439605712890625, 0.056385040283203125, 0.06880950927734375, 0.08123397827148438, 0.093658447265625, 0.10608291625976562, 0.11850738525390625, 0.13093185424804688, 0.1433563232421875, 0.15578079223632812, 0.16820526123046875, 0.18062973022460938, 0.19305419921875, 0.20547866821289062, 0.21790313720703125, 0.23032760620117188, 0.2427520751953125, 0.2551765441894531, 0.26760101318359375, 0.2800254821777344, 0.292449951171875, 0.3048744201660156, 0.31729888916015625, 0.3297233581542969, 0.3421478271484375, 0.3545722961425781, 0.36699676513671875, 0.3794212341308594, 0.391845703125]}, "gradients/encoder.encoder.layers.16.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 4.0, 2.0, 5.0, 8.0, 11.0, 36.0, 42.0, 101.0, 138.0, 184.0, 200.0, 135.0, 53.0, 38.0, 25.0, 20.0, 3.0, 1.0, 5.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.6469926834106445, -6.356942176818848, -6.066891670227051, -5.776841163635254, -5.486790657043457, -5.19674015045166, -4.906689167022705, -4.616638660430908, -4.326588153839111, -4.0365376472473145, -3.7464871406555176, -3.4564363956451416, -3.1663858890533447, -2.876335382461548, -2.586284637451172, -2.296234130859375, -2.006183624267578, -1.7161331176757812, -1.4260824918746948, -1.1360318660736084, -0.8459813594818115, -0.5559308528900146, -0.2658802270889282, 0.024170398712158203, 0.3142209053039551, 0.6042714715003967, 0.8943220376968384, 1.1843726634979248, 1.4744231700897217, 1.7644736766815186, 2.0545244216918945, 2.3445749282836914, 2.6346263885498047, 2.9246768951416016, 3.2147274017333984, 3.5047781467437744, 3.7948286533355713, 4.084878921508789, 4.374929904937744, 4.664980411529541, 4.955030918121338, 5.245081424713135, 5.535131931304932, 5.8251824378967285, 6.115233421325684, 6.4052839279174805, 6.695334434509277, 6.985384941101074, 7.275435447692871, 7.565485954284668, 7.855536460876465, 8.145586967468262, 8.435637474060059, 8.725687980651855, 9.015738487243652, 9.305789947509766, 9.595840454101562, 9.88589096069336, 10.175941467285156, 10.465991973876953, 10.75604248046875, 11.046092987060547, 11.336143493652344, 11.62619400024414, 11.916244506835938]}, "gradients/encoder.encoder.layers.16.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 3.0, 3.0, 2.0, 3.0, 5.0, 7.0, 4.0, 7.0, 11.0, 12.0, 17.0, 12.0, 14.0, 14.0, 25.0, 29.0, 31.0, 33.0, 32.0, 30.0, 38.0, 52.0, 55.0, 58.0, 49.0, 42.0, 48.0, 37.0, 46.0, 30.0, 37.0, 32.0, 32.0, 26.0, 21.0, 31.0, 13.0, 20.0, 13.0, 12.0, 7.0, 5.0, 6.0, 1.0, 1.0, 3.0, 2.0, 2.0, 2.0, 0.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.400047302246094, -6.196839332580566, -5.993630886077881, -5.7904229164123535, -5.587214469909668, -5.384006500244141, -5.180798530578613, -4.977590084075928, -4.774381637573242, -4.571173667907715, -4.367965221405029, -4.164757251739502, -3.9615488052368164, -3.758340835571289, -3.5551326274871826, -3.351924419403076, -3.148716449737549, -2.9455082416534424, -2.742300033569336, -2.5390920639038086, -2.335883617401123, -2.1326756477355957, -1.9294674396514893, -1.7262592315673828, -1.5230510234832764, -1.31984281539917, -1.1166346073150635, -0.9134265184402466, -0.7102183103561401, -0.5070101022720337, -0.3038020133972168, -0.10059380531311035, 0.1026144027709961, 0.30582258105278015, 0.5090307593345642, 0.7122389078140259, 0.9154471158981323, 1.1186553239822388, 1.3218634128570557, 1.525071620941162, 1.7282798290252686, 1.931488037109375, 2.1346962451934814, 2.337904453277588, 2.5411124229431152, 2.744320869445801, 2.947528839111328, 3.1507370471954346, 3.353945255279541, 3.5571534633636475, 3.760361671447754, 3.9635696411132812, 4.166778087615967, 4.369986057281494, 4.57319450378418, 4.776402473449707, 4.979610443115234, 5.182818412780762, 5.386026859283447, 5.589234828948975, 5.79244327545166, 5.9956512451171875, 6.198859214782715, 6.4020676612854, 6.605276107788086]}, "gradients/encoder.encoder.layers.15.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 1.0, 3.0, 0.0, 1.0, 1.0, 2.0, 0.0, 1.0, 7.0, 9.0, 12.0, 19.0, 28.0, 23.0, 56.0, 120.0, 190.0, 379.0, 825.0, 2146.0, 7458.0, 53103.0, 3913822.0, 196809.0, 13435.0, 3289.0, 1233.0, 559.0, 297.0, 157.0, 95.0, 63.0, 36.0, 29.0, 25.0, 15.0, 5.0, 9.0, 9.0, 10.0, 3.0, 3.0, 3.0, 2.0, 1.0, 1.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-1.935546875, -1.853851318359375, -1.77215576171875, -1.690460205078125, -1.6087646484375, -1.527069091796875, -1.44537353515625, -1.363677978515625, -1.281982421875, -1.200286865234375, -1.11859130859375, -1.036895751953125, -0.9552001953125, -0.873504638671875, -0.79180908203125, -0.710113525390625, -0.62841796875, -0.546722412109375, -0.46502685546875, -0.383331298828125, -0.3016357421875, -0.219940185546875, -0.13824462890625, -0.056549072265625, 0.025146484375, 0.106842041015625, 0.18853759765625, 0.270233154296875, 0.3519287109375, 0.433624267578125, 0.51531982421875, 0.597015380859375, 0.6787109375, 0.760406494140625, 0.84210205078125, 0.923797607421875, 1.0054931640625, 1.087188720703125, 1.16888427734375, 1.250579833984375, 1.332275390625, 1.413970947265625, 1.49566650390625, 1.577362060546875, 1.6590576171875, 1.740753173828125, 1.82244873046875, 1.904144287109375, 1.98583984375, 2.067535400390625, 2.14923095703125, 2.230926513671875, 2.3126220703125, 2.394317626953125, 2.47601318359375, 2.557708740234375, 2.639404296875, 2.721099853515625, 2.80279541015625, 2.884490966796875, 2.9661865234375, 3.047882080078125, 3.12957763671875, 3.211273193359375, 3.29296875]}, "gradients/encoder.encoder.layers.15.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 6.0, 3.0, 5.0, 1.0, 4.0, 8.0, 22.0, 25.0, 28.0, 49.0, 52.0, 64.0, 88.0, 104.0, 114.0, 91.0, 85.0, 69.0, 67.0, 47.0, 31.0, 22.0, 11.0, 5.0, 3.0, 3.0, 1.0, 1.0, 2.0, 1.0, 1.0, 0.0, 1.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.53564453125, -0.51611328125, -0.49658203125, -0.47705078125, -0.45751953125, -0.43798828125, -0.41845703125, -0.39892578125, -0.37939453125, -0.35986328125, -0.34033203125, -0.32080078125, -0.30126953125, -0.28173828125, -0.26220703125, -0.24267578125, -0.22314453125, -0.20361328125, -0.18408203125, -0.16455078125, -0.14501953125, -0.12548828125, -0.10595703125, -0.08642578125, -0.06689453125, -0.04736328125, -0.02783203125, -0.00830078125, 0.01123046875, 0.03076171875, 0.05029296875, 0.06982421875, 0.08935546875, 0.10888671875, 0.12841796875, 0.14794921875, 0.16748046875, 0.18701171875, 0.20654296875, 0.22607421875, 0.24560546875, 0.26513671875, 0.28466796875, 0.30419921875, 0.32373046875, 0.34326171875, 0.36279296875, 0.38232421875, 0.40185546875, 0.42138671875, 0.44091796875, 0.46044921875, 0.47998046875, 0.49951171875, 0.51904296875, 0.53857421875, 0.55810546875, 0.57763671875, 0.59716796875, 0.61669921875, 0.63623046875, 0.65576171875, 0.67529296875, 0.69482421875, 0.71435546875]}, "gradients/encoder.encoder.layers.15.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 2.0, 2.0, 0.0, 2.0, 2.0, 2.0, 8.0, 1.0, 3.0, 5.0, 11.0, 11.0, 12.0, 22.0, 33.0, 59.0, 102.0, 174.0, 325.0, 552.0, 1150.0, 2723.0, 7903.0, 36922.0, 508804.0, 3506450.0, 105303.0, 15813.0, 4418.0, 1589.0, 832.0, 427.0, 246.0, 145.0, 90.0, 47.0, 38.0, 13.0, 15.0, 9.0, 6.0, 7.0, 7.0, 3.0, 3.0, 1.0, 5.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-1.296875, -1.2509613037109375, -1.205047607421875, -1.1591339111328125, -1.11322021484375, -1.0673065185546875, -1.021392822265625, -0.9754791259765625, -0.9295654296875, -0.8836517333984375, -0.837738037109375, -0.7918243408203125, -0.74591064453125, -0.6999969482421875, -0.654083251953125, -0.6081695556640625, -0.562255859375, -0.5163421630859375, -0.470428466796875, -0.4245147705078125, -0.37860107421875, -0.3326873779296875, -0.286773681640625, -0.2408599853515625, -0.1949462890625, -0.1490325927734375, -0.103118896484375, -0.0572052001953125, -0.01129150390625, 0.0346221923828125, 0.080535888671875, 0.1264495849609375, 0.17236328125, 0.2182769775390625, 0.264190673828125, 0.3101043701171875, 0.35601806640625, 0.4019317626953125, 0.447845458984375, 0.4937591552734375, 0.5396728515625, 0.5855865478515625, 0.631500244140625, 0.6774139404296875, 0.72332763671875, 0.7692413330078125, 0.815155029296875, 0.8610687255859375, 0.906982421875, 0.9528961181640625, 0.998809814453125, 1.0447235107421875, 1.09063720703125, 1.1365509033203125, 1.182464599609375, 1.2283782958984375, 1.2742919921875, 1.3202056884765625, 1.366119384765625, 1.4120330810546875, 1.45794677734375, 1.5038604736328125, 1.549774169921875, 1.5956878662109375, 1.6416015625]}, "gradients/encoder.encoder.layers.15.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 1.0, 2.0, 3.0, 2.0, 1.0, 4.0, 12.0, 8.0, 14.0, 27.0, 33.0, 57.0, 117.0, 248.0, 977.0, 1801.0, 387.0, 170.0, 75.0, 41.0, 33.0, 24.0, 13.0, 11.0, 6.0, 5.0, 3.0, 3.0, 1.0, 2.0, 1.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.7490234375, -0.7233734130859375, -0.697723388671875, -0.6720733642578125, -0.64642333984375, -0.6207733154296875, -0.595123291015625, -0.5694732666015625, -0.5438232421875, -0.5181732177734375, -0.492523193359375, -0.4668731689453125, -0.44122314453125, -0.4155731201171875, -0.389923095703125, -0.3642730712890625, -0.338623046875, -0.3129730224609375, -0.287322998046875, -0.2616729736328125, -0.23602294921875, -0.2103729248046875, -0.184722900390625, -0.1590728759765625, -0.1334228515625, -0.1077728271484375, -0.082122802734375, -0.0564727783203125, -0.03082275390625, -0.0051727294921875, 0.020477294921875, 0.0461273193359375, 0.07177734375, 0.0974273681640625, 0.123077392578125, 0.1487274169921875, 0.17437744140625, 0.2000274658203125, 0.225677490234375, 0.2513275146484375, 0.2769775390625, 0.3026275634765625, 0.328277587890625, 0.3539276123046875, 0.37957763671875, 0.4052276611328125, 0.430877685546875, 0.4565277099609375, 0.482177734375, 0.5078277587890625, 0.533477783203125, 0.5591278076171875, 0.58477783203125, 0.6104278564453125, 0.636077880859375, 0.6617279052734375, 0.6873779296875, 0.7130279541015625, 0.738677978515625, 0.7643280029296875, 0.78997802734375, 0.8156280517578125, 0.841278076171875, 0.8669281005859375, 0.892578125]}, "gradients/encoder.encoder.layers.15.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 5.0, 7.0, 9.0, 28.0, 53.0, 168.0, 259.0, 222.0, 152.0, 63.0, 27.0, 9.0, 4.0, 3.0, 1.0, 1.0, 0.0, 1.0, 0.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.8459692001342773, -2.6874990463256836, -2.52902889251709, -2.370558738708496, -2.2120885848999023, -2.0536184310913086, -1.8951480388641357, -1.736677885055542, -1.5782077312469482, -1.4197375774383545, -1.2612674236297607, -1.1027971506118774, -0.9443269968032837, -0.7858568429946899, -0.6273866295814514, -0.4689164161682129, -0.31044626235961914, -0.151976078748703, 0.006494104862213135, 0.16496428847312927, 0.3234344720840454, 0.48190462589263916, 0.6403748393058777, 0.7988450527191162, 0.95731520652771, 1.1157853603363037, 1.2742555141448975, 1.4327257871627808, 1.5911959409713745, 1.7496660947799683, 1.9081363677978516, 2.0666065216064453, 2.225076675415039, 2.383546829223633, 2.5420169830322266, 2.7004871368408203, 2.858957290649414, 3.017427444458008, 3.1758978366851807, 3.3343679904937744, 3.492838144302368, 3.651308298110962, 3.8097784519195557, 3.9682486057281494, 4.126718997955322, 4.285189151763916, 4.44365930557251, 4.6021294593811035, 4.760599613189697, 4.919069766998291, 5.077539920806885, 5.2360100746154785, 5.394480228424072, 5.552950382232666, 5.71142053604126, 5.869891166687012, 6.0283613204956055, 6.186831474304199, 6.345301628112793, 6.503771781921387, 6.6622419357299805, 6.820712089538574, 6.979182243347168, 7.137652397155762, 7.2961225509643555]}, "gradients/encoder.encoder.layers.15.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 0.0, 3.0, 3.0, 2.0, 10.0, 10.0, 23.0, 19.0, 35.0, 37.0, 56.0, 55.0, 71.0, 62.0, 73.0, 89.0, 74.0, 69.0, 73.0, 65.0, 45.0, 38.0, 25.0, 24.0, 19.0, 16.0, 7.0, 2.0, 3.0, 5.0, 0.0, 2.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.082407474517822, -3.9742565155029297, -3.866105318069458, -3.7579543590545654, -3.6498031616210938, -3.541652202606201, -3.4335012435913086, -3.325350046157837, -3.2171990871429443, -3.1090481281280518, -3.00089693069458, -2.8927459716796875, -2.784594774246216, -2.6764438152313232, -2.5682926177978516, -2.460141658782959, -2.3519906997680664, -2.243839740753174, -2.135688543319702, -2.0275375843048096, -1.9193865060806274, -1.8112354278564453, -1.7030843496322632, -1.594933271408081, -1.4867820739746094, -1.3786309957504272, -1.2704799175262451, -1.1623289585113525, -1.0541778802871704, -0.9460268020629883, -0.8378757238388062, -0.7297247052192688, -0.6215736865997314, -0.5134226083755493, -0.40527158975601196, -0.29712051153182983, -0.1889694631099701, -0.08081841468811035, 0.027332663536071777, 0.13548368215560913, 0.24363476037979126, 0.351785808801651, 0.45993685722351074, 0.5680879354476929, 0.676239013671875, 0.7843900322914124, 0.8925411105155945, 1.0006921291351318, 1.108843207359314, 1.216994285583496, 1.3251453638076782, 1.4332964420318604, 1.541447401046753, 1.649598479270935, 1.7577495574951172, 1.8659005165100098, 1.9740517139434814, 2.082202672958374, 2.1903538703918457, 2.2985048294067383, 2.40665602684021, 2.5148069858551025, 2.622958183288574, 2.731109142303467, 2.8392601013183594]}, "gradients/encoder.encoder.layers.15.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 3.0, 3.0, 3.0, 3.0, 7.0, 8.0, 6.0, 12.0, 27.0, 31.0, 49.0, 101.0, 142.0, 334.0, 785.0, 2528.0, 12244.0, 227708.0, 767519.0, 30536.0, 4263.0, 1235.0, 533.0, 203.0, 101.0, 65.0, 40.0, 22.0, 14.0, 13.0, 9.0, 2.0, 7.0, 3.0, 2.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0], "bins": [-3.7109375, -3.6024169921875, -3.493896484375, -3.3853759765625, -3.27685546875, -3.1683349609375, -3.059814453125, -2.9512939453125, -2.8427734375, -2.7342529296875, -2.625732421875, -2.5172119140625, -2.40869140625, -2.3001708984375, -2.191650390625, -2.0831298828125, -1.974609375, -1.8660888671875, -1.757568359375, -1.6490478515625, -1.54052734375, -1.4320068359375, -1.323486328125, -1.2149658203125, -1.1064453125, -0.9979248046875, -0.889404296875, -0.7808837890625, -0.67236328125, -0.5638427734375, -0.455322265625, -0.3468017578125, -0.23828125, -0.1297607421875, -0.021240234375, 0.0872802734375, 0.19580078125, 0.3043212890625, 0.412841796875, 0.5213623046875, 0.6298828125, 0.7384033203125, 0.846923828125, 0.9554443359375, 1.06396484375, 1.1724853515625, 1.281005859375, 1.3895263671875, 1.498046875, 1.6065673828125, 1.715087890625, 1.8236083984375, 1.93212890625, 2.0406494140625, 2.149169921875, 2.2576904296875, 2.3662109375, 2.4747314453125, 2.583251953125, 2.6917724609375, 2.80029296875, 2.9088134765625, 3.017333984375, 3.1258544921875, 3.234375]}, "gradients/encoder.encoder.layers.15.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 3.0, 1.0, 5.0, 3.0, 4.0, 3.0, 8.0, 13.0, 18.0, 39.0, 47.0, 55.0, 53.0, 108.0, 99.0, 114.0, 108.0, 80.0, 80.0, 54.0, 46.0, 23.0, 22.0, 12.0, 5.0, 2.0, 1.0, 1.0, 3.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.54052734375, -0.52008056640625, -0.4996337890625, -0.47918701171875, -0.458740234375, -0.43829345703125, -0.4178466796875, -0.39739990234375, -0.376953125, -0.35650634765625, -0.3360595703125, -0.31561279296875, -0.295166015625, -0.27471923828125, -0.2542724609375, -0.23382568359375, -0.21337890625, -0.19293212890625, -0.1724853515625, -0.15203857421875, -0.131591796875, -0.11114501953125, -0.0906982421875, -0.07025146484375, -0.0498046875, -0.02935791015625, -0.0089111328125, 0.01153564453125, 0.031982421875, 0.05242919921875, 0.0728759765625, 0.09332275390625, 0.11376953125, 0.13421630859375, 0.1546630859375, 0.17510986328125, 0.195556640625, 0.21600341796875, 0.2364501953125, 0.25689697265625, 0.27734375, 0.29779052734375, 0.3182373046875, 0.33868408203125, 0.359130859375, 0.37957763671875, 0.4000244140625, 0.42047119140625, 0.44091796875, 0.46136474609375, 0.4818115234375, 0.50225830078125, 0.522705078125, 0.54315185546875, 0.5635986328125, 0.58404541015625, 0.6044921875, 0.62493896484375, 0.6453857421875, 0.66583251953125, 0.686279296875, 0.70672607421875, 0.7271728515625, 0.74761962890625, 0.76806640625]}, "gradients/encoder.encoder.layers.15.attention.v_proj.weight": {"_type": "histogram", "values": [3.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 6.0, 6.0, 5.0, 10.0, 16.0, 19.0, 14.0, 16.0, 29.0, 41.0, 51.0, 67.0, 102.0, 152.0, 248.0, 473.0, 1018.0, 3113.0, 12804.0, 79501.0, 638463.0, 270912.0, 31649.0, 6410.0, 1744.0, 732.0, 319.0, 197.0, 129.0, 74.0, 60.0, 54.0, 35.0, 23.0, 15.0, 9.0, 13.0, 10.0, 7.0, 4.0, 2.0, 5.0, 2.0, 3.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-1.681640625, -1.6287078857421875, -1.575775146484375, -1.5228424072265625, -1.46990966796875, -1.4169769287109375, -1.364044189453125, -1.3111114501953125, -1.2581787109375, -1.2052459716796875, -1.152313232421875, -1.0993804931640625, -1.04644775390625, -0.9935150146484375, -0.940582275390625, -0.8876495361328125, -0.834716796875, -0.7817840576171875, -0.728851318359375, -0.6759185791015625, -0.62298583984375, -0.5700531005859375, -0.517120361328125, -0.4641876220703125, -0.4112548828125, -0.3583221435546875, -0.305389404296875, -0.2524566650390625, -0.19952392578125, -0.1465911865234375, -0.093658447265625, -0.0407257080078125, 0.01220703125, 0.0651397705078125, 0.118072509765625, 0.1710052490234375, 0.22393798828125, 0.2768707275390625, 0.329803466796875, 0.3827362060546875, 0.4356689453125, 0.4886016845703125, 0.541534423828125, 0.5944671630859375, 0.64739990234375, 0.7003326416015625, 0.753265380859375, 0.8061981201171875, 0.859130859375, 0.9120635986328125, 0.964996337890625, 1.0179290771484375, 1.07086181640625, 1.1237945556640625, 1.176727294921875, 1.2296600341796875, 1.2825927734375, 1.3355255126953125, 1.388458251953125, 1.4413909912109375, 1.49432373046875, 1.5472564697265625, 1.600189208984375, 1.6531219482421875, 1.7060546875]}, "gradients/encoder.encoder.layers.15.attention.v_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 3.0, 0.0, 0.0, 2.0, 3.0, 4.0, 4.0, 4.0, 11.0, 11.0, 12.0, 22.0, 19.0, 29.0, 35.0, 33.0, 30.0, 27.0, 46.0, 41.0, 49.0, 48.0, 47.0, 63.0, 52.0, 51.0, 58.0, 44.0, 46.0, 37.0, 31.0, 28.0, 19.0, 18.0, 18.0, 12.0, 16.0, 8.0, 9.0, 9.0, 3.0, 1.0, 1.0, 4.0, 3.0, 2.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0], "bins": [-1.46875, -1.425048828125, -1.38134765625, -1.337646484375, -1.2939453125, -1.250244140625, -1.20654296875, -1.162841796875, -1.119140625, -1.075439453125, -1.03173828125, -0.988037109375, -0.9443359375, -0.900634765625, -0.85693359375, -0.813232421875, -0.76953125, -0.725830078125, -0.68212890625, -0.638427734375, -0.5947265625, -0.551025390625, -0.50732421875, -0.463623046875, -0.419921875, -0.376220703125, -0.33251953125, -0.288818359375, -0.2451171875, -0.201416015625, -0.15771484375, -0.114013671875, -0.0703125, -0.026611328125, 0.01708984375, 0.060791015625, 0.1044921875, 0.148193359375, 0.19189453125, 0.235595703125, 0.279296875, 0.322998046875, 0.36669921875, 0.410400390625, 0.4541015625, 0.497802734375, 0.54150390625, 0.585205078125, 0.62890625, 0.672607421875, 0.71630859375, 0.760009765625, 0.8037109375, 0.847412109375, 0.89111328125, 0.934814453125, 0.978515625, 1.022216796875, 1.06591796875, 1.109619140625, 1.1533203125, 1.197021484375, 1.24072265625, 1.284423828125, 1.328125]}, "gradients/encoder.encoder.layers.15.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 1.0, 1.0, 0.0, 4.0, 3.0, 11.0, 10.0, 36.0, 102.0, 408.0, 5791.0, 1036576.0, 5065.0, 398.0, 110.0, 30.0, 9.0, 4.0, 2.0, 4.0, 1.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.0234375, -3.903289794921875, -3.78314208984375, -3.662994384765625, -3.5428466796875, -3.422698974609375, -3.30255126953125, -3.182403564453125, -3.062255859375, -2.942108154296875, -2.82196044921875, -2.701812744140625, -2.5816650390625, -2.461517333984375, -2.34136962890625, -2.221221923828125, -2.10107421875, -1.980926513671875, -1.86077880859375, -1.740631103515625, -1.6204833984375, -1.500335693359375, -1.38018798828125, -1.260040283203125, -1.139892578125, -1.019744873046875, -0.89959716796875, -0.779449462890625, -0.6593017578125, -0.539154052734375, -0.41900634765625, -0.298858642578125, -0.1787109375, -0.058563232421875, 0.06158447265625, 0.181732177734375, 0.3018798828125, 0.422027587890625, 0.54217529296875, 0.662322998046875, 0.782470703125, 0.902618408203125, 1.02276611328125, 1.142913818359375, 1.2630615234375, 1.383209228515625, 1.50335693359375, 1.623504638671875, 1.74365234375, 1.863800048828125, 1.98394775390625, 2.104095458984375, 2.2242431640625, 2.344390869140625, 2.46453857421875, 2.584686279296875, 2.704833984375, 2.824981689453125, 2.94512939453125, 3.065277099609375, 3.1854248046875, 3.305572509765625, 3.42572021484375, 3.545867919921875, 3.666015625]}, "gradients/encoder.encoder.layers.15.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 3.0, 1.0, 1.0, 2.0, 3.0, 2.0, 0.0, 4.0, 1.0, 2.0, 12.0, 15.0, 16.0, 42.0, 76.0, 117.0, 191.0, 189.0, 131.0, 93.0, 42.0, 23.0, 18.0, 9.0, 3.0, 3.0, 2.0, 4.0, 0.0, 2.0, 2.0, 1.0, 1.0, 2.0, 2.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-9.459257125854492e-05, -9.080860763788223e-05, -8.702464401721954e-05, -8.324068039655685e-05, -7.945671677589417e-05, -7.567275315523148e-05, -7.188878953456879e-05, -6.81048259139061e-05, -6.432086229324341e-05, -6.053689867258072e-05, -5.675293505191803e-05, -5.296897143125534e-05, -4.918500781059265e-05, -4.540104418992996e-05, -4.161708056926727e-05, -3.7833116948604584e-05, -3.4049153327941895e-05, -3.0265189707279205e-05, -2.6481226086616516e-05, -2.2697262465953827e-05, -1.8913298845291138e-05, -1.5129335224628448e-05, -1.134537160396576e-05, -7.56140798330307e-06, -3.777444362640381e-06, 6.51925802230835e-09, 3.7904828786849976e-06, 7.574446499347687e-06, 1.1358410120010376e-05, 1.5142373740673065e-05, 1.8926337361335754e-05, 2.2710300981998444e-05, 2.6494264602661133e-05, 3.0278228223323822e-05, 3.406219184398651e-05, 3.78461554646492e-05, 4.163011908531189e-05, 4.541408270597458e-05, 4.919804632663727e-05, 5.298200994729996e-05, 5.6765973567962646e-05, 6.0549937188625336e-05, 6.433390080928802e-05, 6.811786442995071e-05, 7.19018280506134e-05, 7.568579167127609e-05, 7.946975529193878e-05, 8.325371891260147e-05, 8.703768253326416e-05, 9.082164615392685e-05, 9.460560977458954e-05, 9.838957339525223e-05, 0.00010217353701591492, 0.0001059575006365776, 0.0001097414642572403, 0.00011352542787790298, 0.00011730939149856567, 0.00012109335511922836, 0.00012487731873989105, 0.00012866128236055374, 0.00013244524598121643, 0.00013622920960187912, 0.0001400131732225418, 0.0001437971368432045, 0.0001475811004638672]}, "gradients/encoder.encoder.layers.15.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 2.0, 5.0, 1.0, 10.0, 11.0, 34.0, 114.0, 208.0, 745.0, 439178.0, 607071.0, 796.0, 215.0, 101.0, 36.0, 17.0, 9.0, 7.0, 2.0, 1.0, 0.0, 3.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.4140625, -3.29217529296875, -3.1702880859375, -3.04840087890625, -2.926513671875, -2.80462646484375, -2.6827392578125, -2.56085205078125, -2.43896484375, -2.31707763671875, -2.1951904296875, -2.07330322265625, -1.951416015625, -1.82952880859375, -1.7076416015625, -1.58575439453125, -1.4638671875, -1.34197998046875, -1.2200927734375, -1.09820556640625, -0.976318359375, -0.85443115234375, -0.7325439453125, -0.61065673828125, -0.48876953125, -0.36688232421875, -0.2449951171875, -0.12310791015625, -0.001220703125, 0.12066650390625, 0.2425537109375, 0.36444091796875, 0.486328125, 0.60821533203125, 0.7301025390625, 0.85198974609375, 0.973876953125, 1.09576416015625, 1.2176513671875, 1.33953857421875, 1.46142578125, 1.58331298828125, 1.7052001953125, 1.82708740234375, 1.948974609375, 2.07086181640625, 2.1927490234375, 2.31463623046875, 2.4365234375, 2.55841064453125, 2.6802978515625, 2.80218505859375, 2.924072265625, 3.04595947265625, 3.1678466796875, 3.28973388671875, 3.41162109375, 3.53350830078125, 3.6553955078125, 3.77728271484375, 3.899169921875, 4.02105712890625, 4.1429443359375, 4.26483154296875, 4.38671875]}, "gradients/encoder.encoder.layers.15.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 5.0, 11.0, 64.0, 301.0, 516.0, 91.0, 17.0, 11.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.40625, -2.320892333984375, -2.23553466796875, -2.150177001953125, -2.0648193359375, -1.979461669921875, -1.89410400390625, -1.808746337890625, -1.723388671875, -1.638031005859375, -1.55267333984375, -1.467315673828125, -1.3819580078125, -1.296600341796875, -1.21124267578125, -1.125885009765625, -1.04052734375, -0.955169677734375, -0.86981201171875, -0.784454345703125, -0.6990966796875, -0.613739013671875, -0.52838134765625, -0.443023681640625, -0.357666015625, -0.272308349609375, -0.18695068359375, -0.101593017578125, -0.0162353515625, 0.069122314453125, 0.15447998046875, 0.239837646484375, 0.3251953125, 0.410552978515625, 0.49591064453125, 0.581268310546875, 0.6666259765625, 0.751983642578125, 0.83734130859375, 0.922698974609375, 1.008056640625, 1.093414306640625, 1.17877197265625, 1.264129638671875, 1.3494873046875, 1.434844970703125, 1.52020263671875, 1.605560302734375, 1.69091796875, 1.776275634765625, 1.86163330078125, 1.946990966796875, 2.0323486328125, 2.117706298828125, 2.20306396484375, 2.288421630859375, 2.373779296875, 2.459136962890625, 2.54449462890625, 2.629852294921875, 2.7152099609375, 2.800567626953125, 2.88592529296875, 2.971282958984375, 3.056640625]}, "gradients/encoder.encoder.layers.15.layer_norm.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 4.0, 9.0, 15.0, 59.0, 169.0, 354.0, 283.0, 85.0, 21.0, 9.0, 4.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.268885612487793, -5.632908821105957, -4.996932029724121, -4.360955715179443, -3.7249789237976074, -3.0890021324157715, -2.4530255794525146, -1.8170490264892578, -1.1810722351074219, -0.5450955629348755, 0.0908811092376709, 0.7268577814102173, 1.3628344535827637, 1.9988112449645996, 2.6347877979278564, 3.2707643508911133, 3.906741142272949, 4.542717933654785, 5.178694725036621, 5.814671039581299, 6.450647830963135, 7.086624622344971, 7.722600936889648, 8.358577728271484, 8.99455451965332, 9.630531311035156, 10.266508102416992, 10.902484893798828, 11.538461685180664, 12.1744384765625, 12.81041431427002, 13.446391105651855, 14.082366943359375, 14.718343734741211, 15.354320526123047, 15.990297317504883, 16.62627410888672, 17.262250900268555, 17.89822769165039, 18.534202575683594, 19.170181274414062, 19.8061580657959, 20.442134857177734, 21.07811164855957, 21.714088439941406, 22.350065231323242, 22.986042022705078, 23.62201690673828, 24.257993698120117, 24.893970489501953, 25.52994728088379, 26.165924072265625, 26.80190086364746, 27.437877655029297, 28.073854446411133, 28.70983123779297, 29.345806121826172, 29.981782913208008, 30.617759704589844, 31.25373649597168, 31.889713287353516, 32.52568817138672, 33.16166687011719, 33.79764175415039, 34.43362045288086]}, "gradients/encoder.encoder.layers.15.layer_norm.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 2.0, 2.0, 1.0, 3.0, 5.0, 9.0, 5.0, 18.0, 5.0, 11.0, 16.0, 14.0, 20.0, 22.0, 19.0, 28.0, 26.0, 29.0, 31.0, 36.0, 29.0, 45.0, 29.0, 29.0, 48.0, 54.0, 41.0, 38.0, 36.0, 42.0, 34.0, 28.0, 28.0, 22.0, 21.0, 26.0, 22.0, 28.0, 26.0, 15.0, 9.0, 17.0, 5.0, 10.0, 5.0, 6.0, 1.0, 3.0, 5.0, 3.0, 1.0, 2.0, 2.0, 2.0, 3.0], "bins": [-6.289222717285156, -6.104809284210205, -5.920395851135254, -5.735982418060303, -5.551568984985352, -5.3671555519104, -5.182742118835449, -4.998329162597656, -4.813915252685547, -4.629501819610596, -4.4450883865356445, -4.260674953460693, -4.076261520385742, -3.891848087310791, -3.707434892654419, -3.5230214595794678, -3.3386082649230957, -3.1541948318481445, -2.9697813987731934, -2.785367965698242, -2.600954532623291, -2.41654109954834, -2.2321279048919678, -2.0477144718170166, -1.8633010387420654, -1.6788876056671143, -1.494474172592163, -1.3100608587265015, -1.1256474256515503, -0.9412339925765991, -0.7568206787109375, -0.5724072456359863, -0.38799428939819336, -0.20358088612556458, -0.01916748285293579, 0.1652458906173706, 0.3496593236923218, 0.534072756767273, 0.7184860706329346, 0.9028995037078857, 1.087312936782837, 1.271726369857788, 1.4561398029327393, 1.6405531167984009, 1.824966549873352, 2.0093798637390137, 2.193793296813965, 2.378206729888916, 2.562620162963867, 2.7470335960388184, 2.9314470291137695, 3.1158604621887207, 3.300273895263672, 3.484687328338623, 3.669100522994995, 3.8535139560699463, 4.037927627563477, 4.222341060638428, 4.406754493713379, 4.59116792678833, 4.775581359863281, 4.959994792938232, 5.144408226013184, 5.328821182250977, 5.513234615325928]}, "gradients/encoder.encoder.layers.14.feed_forward.output_dense.weight": {"_type": "histogram", "values": [3.0, 0.0, 0.0, 0.0, 1.0, 1.0, 5.0, 3.0, 7.0, 10.0, 11.0, 20.0, 32.0, 42.0, 64.0, 103.0, 180.0, 359.0, 693.0, 1428.0, 3740.0, 12012.0, 69646.0, 3761034.0, 309566.0, 24643.0, 6157.0, 2241.0, 997.0, 476.0, 268.0, 166.0, 111.0, 70.0, 59.0, 34.0, 18.0, 20.0, 21.0, 16.0, 5.0, 4.0, 8.0, 7.0, 3.0, 3.0, 1.0, 3.0, 2.0, 1.0, 2.0, 2.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-1.421875, -1.36199951171875, -1.3021240234375, -1.24224853515625, -1.182373046875, -1.12249755859375, -1.0626220703125, -1.00274658203125, -0.94287109375, -0.88299560546875, -0.8231201171875, -0.76324462890625, -0.703369140625, -0.64349365234375, -0.5836181640625, -0.52374267578125, -0.4638671875, -0.40399169921875, -0.3441162109375, -0.28424072265625, -0.224365234375, -0.16448974609375, -0.1046142578125, -0.04473876953125, 0.01513671875, 0.07501220703125, 0.1348876953125, 0.19476318359375, 0.254638671875, 0.31451416015625, 0.3743896484375, 0.43426513671875, 0.494140625, 0.55401611328125, 0.6138916015625, 0.67376708984375, 0.733642578125, 0.79351806640625, 0.8533935546875, 0.91326904296875, 0.97314453125, 1.03302001953125, 1.0928955078125, 1.15277099609375, 1.212646484375, 1.27252197265625, 1.3323974609375, 1.39227294921875, 1.4521484375, 1.51202392578125, 1.5718994140625, 1.63177490234375, 1.691650390625, 1.75152587890625, 1.8114013671875, 1.87127685546875, 1.93115234375, 1.99102783203125, 2.0509033203125, 2.11077880859375, 2.170654296875, 2.23052978515625, 2.2904052734375, 2.35028076171875, 2.41015625]}, "gradients/encoder.encoder.layers.14.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 3.0, 3.0, 5.0, 2.0, 8.0, 4.0, 18.0, 18.0, 22.0, 37.0, 44.0, 52.0, 98.0, 80.0, 91.0, 108.0, 90.0, 83.0, 72.0, 51.0, 29.0, 28.0, 27.0, 15.0, 10.0, 5.0, 2.0, 1.0, 2.0, 3.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.55126953125, -0.5302200317382812, -0.5091705322265625, -0.48812103271484375, -0.467071533203125, -0.44602203369140625, -0.4249725341796875, -0.40392303466796875, -0.38287353515625, -0.36182403564453125, -0.3407745361328125, -0.31972503662109375, -0.298675537109375, -0.27762603759765625, -0.2565765380859375, -0.23552703857421875, -0.2144775390625, -0.19342803955078125, -0.1723785400390625, -0.15132904052734375, -0.130279541015625, -0.10923004150390625, -0.0881805419921875, -0.06713104248046875, -0.04608154296875, -0.02503204345703125, -0.0039825439453125, 0.01706695556640625, 0.038116455078125, 0.05916595458984375, 0.0802154541015625, 0.10126495361328125, 0.122314453125, 0.14336395263671875, 0.1644134521484375, 0.18546295166015625, 0.206512451171875, 0.22756195068359375, 0.2486114501953125, 0.26966094970703125, 0.29071044921875, 0.31175994873046875, 0.3328094482421875, 0.35385894775390625, 0.374908447265625, 0.39595794677734375, 0.4170074462890625, 0.43805694580078125, 0.4591064453125, 0.48015594482421875, 0.5012054443359375, 0.5222549438476562, 0.543304443359375, 0.5643539428710938, 0.5854034423828125, 0.6064529418945312, 0.62750244140625, 0.6485519409179688, 0.6696014404296875, 0.6906509399414062, 0.711700439453125, 0.7327499389648438, 0.7537994384765625, 0.7748489379882812, 0.7958984375]}, "gradients/encoder.encoder.layers.14.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 3.0, 2.0, 12.0, 11.0, 23.0, 28.0, 58.0, 83.0, 210.0, 376.0, 1190.0, 4659.0, 56963.0, 4046698.0, 76143.0, 5546.0, 1341.0, 517.0, 207.0, 98.0, 56.0, 30.0, 13.0, 11.0, 3.0, 6.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.4453125, -2.35650634765625, -2.2677001953125, -2.17889404296875, -2.090087890625, -2.00128173828125, -1.9124755859375, -1.82366943359375, -1.73486328125, -1.64605712890625, -1.5572509765625, -1.46844482421875, -1.379638671875, -1.29083251953125, -1.2020263671875, -1.11322021484375, -1.0244140625, -0.93560791015625, -0.8468017578125, -0.75799560546875, -0.669189453125, -0.58038330078125, -0.4915771484375, -0.40277099609375, -0.31396484375, -0.22515869140625, -0.1363525390625, -0.04754638671875, 0.041259765625, 0.13006591796875, 0.2188720703125, 0.30767822265625, 0.396484375, 0.48529052734375, 0.5740966796875, 0.66290283203125, 0.751708984375, 0.84051513671875, 0.9293212890625, 1.01812744140625, 1.10693359375, 1.19573974609375, 1.2845458984375, 1.37335205078125, 1.462158203125, 1.55096435546875, 1.6397705078125, 1.72857666015625, 1.8173828125, 1.90618896484375, 1.9949951171875, 2.08380126953125, 2.172607421875, 2.26141357421875, 2.3502197265625, 2.43902587890625, 2.52783203125, 2.61663818359375, 2.7054443359375, 2.79425048828125, 2.883056640625, 2.97186279296875, 3.0606689453125, 3.14947509765625, 3.23828125]}, "gradients/encoder.encoder.layers.14.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 3.0, 2.0, 4.0, 6.0, 5.0, 12.0, 16.0, 31.0, 68.0, 171.0, 471.0, 2319.0, 592.0, 186.0, 72.0, 42.0, 24.0, 21.0, 10.0, 7.0, 5.0, 6.0, 1.0, 3.0, 0.0, 0.0, 0.0, 1.0, 1.0, 3.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.869140625, -0.844696044921875, -0.82025146484375, -0.795806884765625, -0.7713623046875, -0.746917724609375, -0.72247314453125, -0.698028564453125, -0.673583984375, -0.649139404296875, -0.62469482421875, -0.600250244140625, -0.5758056640625, -0.551361083984375, -0.52691650390625, -0.502471923828125, -0.47802734375, -0.453582763671875, -0.42913818359375, -0.404693603515625, -0.3802490234375, -0.355804443359375, -0.33135986328125, -0.306915283203125, -0.282470703125, -0.258026123046875, -0.23358154296875, -0.209136962890625, -0.1846923828125, -0.160247802734375, -0.13580322265625, -0.111358642578125, -0.0869140625, -0.062469482421875, -0.03802490234375, -0.013580322265625, 0.0108642578125, 0.035308837890625, 0.05975341796875, 0.084197998046875, 0.108642578125, 0.133087158203125, 0.15753173828125, 0.181976318359375, 0.2064208984375, 0.230865478515625, 0.25531005859375, 0.279754638671875, 0.30419921875, 0.328643798828125, 0.35308837890625, 0.377532958984375, 0.4019775390625, 0.426422119140625, 0.45086669921875, 0.475311279296875, 0.499755859375, 0.524200439453125, 0.54864501953125, 0.573089599609375, 0.5975341796875, 0.621978759765625, 0.64642333984375, 0.670867919921875, 0.6953125]}, "gradients/encoder.encoder.layers.14.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 2.0, 2.0, 3.0, 17.0, 27.0, 138.0, 375.0, 320.0, 86.0, 28.0, 5.0, 5.0, 1.0, 1.0, 1.0, 2.0, 0.0, 1.0, 2.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.6058623790740967, -2.3814826011657715, -2.1571028232574463, -1.9327229261398315, -1.7083431482315063, -1.4839633703231812, -1.2595834732055664, -1.0352036952972412, -0.810823917388916, -0.5864441394805908, -0.36206430196762085, -0.13768446445465088, 0.08669531345367432, 0.3110750913619995, 0.5354549884796143, 0.7598347663879395, 0.9842145442962646, 1.2085943222045898, 1.432974100112915, 1.6573539972305298, 1.881733775138855, 2.1061134338378906, 2.330493450164795, 2.55487322807312, 2.7792530059814453, 3.0036327838897705, 3.2280125617980957, 3.452392578125, 3.676772117614746, 3.9011521339416504, 4.125532150268555, 4.349911689758301, 4.574291229248047, 4.798671245574951, 5.023050785064697, 5.247430801391602, 5.471810340881348, 5.696190357208252, 5.920570373535156, 6.144949913024902, 6.369329452514648, 6.593709468841553, 6.818089008331299, 7.042469024658203, 7.266848564147949, 7.4912285804748535, 7.715608596801758, 7.939988136291504, 8.16436767578125, 8.388747215270996, 8.613127708435059, 8.837507247924805, 9.06188678741455, 9.286266326904297, 9.51064682006836, 9.735026359558105, 9.959406852722168, 10.183786392211914, 10.408166885375977, 10.632546424865723, 10.856925964355469, 11.081305503845215, 11.305685997009277, 11.530065536499023, 11.75444507598877]}, "gradients/encoder.encoder.layers.14.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 1.0, 0.0, 3.0, 3.0, 2.0, 7.0, 8.0, 7.0, 21.0, 9.0, 16.0, 24.0, 20.0, 38.0, 51.0, 38.0, 56.0, 57.0, 54.0, 60.0, 61.0, 68.0, 56.0, 54.0, 49.0, 44.0, 28.0, 36.0, 26.0, 21.0, 22.0, 17.0, 18.0, 8.0, 7.0, 6.0, 5.0, 8.0, 2.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0], "bins": [-2.2382194995880127, -2.172811269760132, -2.107403039932251, -2.04199481010437, -1.9765865802764893, -1.9111783504486084, -1.8457701206207275, -1.7803618907928467, -1.7149536609649658, -1.649545431137085, -1.584137201309204, -1.5187289714813232, -1.4533207416534424, -1.3879125118255615, -1.3225042819976807, -1.2570960521697998, -1.1916877031326294, -1.1262794733047485, -1.0608712434768677, -0.9954630136489868, -0.930054783821106, -0.8646465539932251, -0.7992382645606995, -0.7338300347328186, -0.6684218049049377, -0.6030135750770569, -0.537605345249176, -0.4721970856189728, -0.4067888557910919, -0.34138062596321106, -0.2759723663330078, -0.21056413650512695, -0.14515602588653564, -0.07974778860807419, -0.014339551329612732, 0.05106869339942932, 0.11647692322731018, 0.18188515305519104, 0.2472934126853943, 0.31270164251327515, 0.378109872341156, 0.44351810216903687, 0.5089263319969177, 0.5743346214294434, 0.6397428512573242, 0.7051510810852051, 0.7705593109130859, 0.8359675407409668, 0.9013757705688477, 0.9667840003967285, 1.0321922302246094, 1.0976004600524902, 1.163008689880371, 1.228416919708252, 1.2938251495361328, 1.3592333793640137, 1.4246416091918945, 1.4900498390197754, 1.5554580688476562, 1.620866298675537, 1.686274528503418, 1.7516827583312988, 1.8170909881591797, 1.8824992179870605, 1.947907567024231]}, "gradients/encoder.encoder.layers.14.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 0.0, 2.0, 1.0, 2.0, 0.0, 2.0, 9.0, 7.0, 7.0, 8.0, 16.0, 22.0, 37.0, 64.0, 112.0, 222.0, 448.0, 1241.0, 5676.0, 60284.0, 830182.0, 137819.0, 9408.0, 1856.0, 582.0, 238.0, 130.0, 77.0, 46.0, 24.0, 12.0, 12.0, 3.0, 5.0, 6.0, 2.0, 4.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.4921875, -2.38671875, -2.28125, -2.17578125, -2.0703125, -1.96484375, -1.859375, -1.75390625, -1.6484375, -1.54296875, -1.4375, -1.33203125, -1.2265625, -1.12109375, -1.015625, -0.91015625, -0.8046875, -0.69921875, -0.59375, -0.48828125, -0.3828125, -0.27734375, -0.171875, -0.06640625, 0.0390625, 0.14453125, 0.25, 0.35546875, 0.4609375, 0.56640625, 0.671875, 0.77734375, 0.8828125, 0.98828125, 1.09375, 1.19921875, 1.3046875, 1.41015625, 1.515625, 1.62109375, 1.7265625, 1.83203125, 1.9375, 2.04296875, 2.1484375, 2.25390625, 2.359375, 2.46484375, 2.5703125, 2.67578125, 2.78125, 2.88671875, 2.9921875, 3.09765625, 3.203125, 3.30859375, 3.4140625, 3.51953125, 3.625, 3.73046875, 3.8359375, 3.94140625, 4.046875, 4.15234375, 4.2578125]}, "gradients/encoder.encoder.layers.14.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 2.0, 3.0, 3.0, 3.0, 6.0, 12.0, 14.0, 21.0, 27.0, 39.0, 46.0, 68.0, 90.0, 88.0, 101.0, 129.0, 93.0, 64.0, 67.0, 41.0, 30.0, 27.0, 15.0, 7.0, 6.0, 3.0, 2.0, 1.0, 3.0, 1.0, 0.0, 0.0, 1.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.5947265625, -0.5724945068359375, -0.550262451171875, -0.5280303955078125, -0.50579833984375, -0.4835662841796875, -0.461334228515625, -0.4391021728515625, -0.4168701171875, -0.3946380615234375, -0.372406005859375, -0.3501739501953125, -0.32794189453125, -0.3057098388671875, -0.283477783203125, -0.2612457275390625, -0.239013671875, -0.2167816162109375, -0.194549560546875, -0.1723175048828125, -0.15008544921875, -0.1278533935546875, -0.105621337890625, -0.0833892822265625, -0.0611572265625, -0.0389251708984375, -0.016693115234375, 0.0055389404296875, 0.02777099609375, 0.0500030517578125, 0.072235107421875, 0.0944671630859375, 0.11669921875, 0.1389312744140625, 0.161163330078125, 0.1833953857421875, 0.20562744140625, 0.2278594970703125, 0.250091552734375, 0.2723236083984375, 0.2945556640625, 0.3167877197265625, 0.339019775390625, 0.3612518310546875, 0.38348388671875, 0.4057159423828125, 0.427947998046875, 0.4501800537109375, 0.472412109375, 0.4946441650390625, 0.516876220703125, 0.5391082763671875, 0.56134033203125, 0.5835723876953125, 0.605804443359375, 0.6280364990234375, 0.6502685546875, 0.6725006103515625, 0.694732666015625, 0.7169647216796875, 0.73919677734375, 0.7614288330078125, 0.783660888671875, 0.8058929443359375, 0.828125]}, "gradients/encoder.encoder.layers.14.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 0.0, 1.0, 1.0, 2.0, 1.0, 4.0, 10.0, 9.0, 11.0, 9.0, 12.0, 25.0, 27.0, 36.0, 32.0, 47.0, 75.0, 91.0, 124.0, 224.0, 395.0, 841.0, 2223.0, 7356.0, 29496.0, 156122.0, 590485.0, 208783.0, 38212.0, 9060.0, 2658.0, 942.0, 443.0, 263.0, 148.0, 94.0, 54.0, 68.0, 37.0, 29.0, 28.0, 21.0, 11.0, 16.0, 4.0, 16.0, 5.0, 9.0, 1.0, 3.0, 2.0, 2.0, 0.0, 0.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-1.3203125, -1.2756805419921875, -1.231048583984375, -1.1864166259765625, -1.14178466796875, -1.0971527099609375, -1.052520751953125, -1.0078887939453125, -0.9632568359375, -0.9186248779296875, -0.873992919921875, -0.8293609619140625, -0.78472900390625, -0.7400970458984375, -0.695465087890625, -0.6508331298828125, -0.606201171875, -0.5615692138671875, -0.516937255859375, -0.4723052978515625, -0.42767333984375, -0.3830413818359375, -0.338409423828125, -0.2937774658203125, -0.2491455078125, -0.2045135498046875, -0.159881591796875, -0.1152496337890625, -0.07061767578125, -0.0259857177734375, 0.018646240234375, 0.0632781982421875, 0.10791015625, 0.1525421142578125, 0.197174072265625, 0.2418060302734375, 0.28643798828125, 0.3310699462890625, 0.375701904296875, 0.4203338623046875, 0.4649658203125, 0.5095977783203125, 0.554229736328125, 0.5988616943359375, 0.64349365234375, 0.6881256103515625, 0.732757568359375, 0.7773895263671875, 0.822021484375, 0.8666534423828125, 0.911285400390625, 0.9559173583984375, 1.00054931640625, 1.0451812744140625, 1.089813232421875, 1.1344451904296875, 1.1790771484375, 1.2237091064453125, 1.268341064453125, 1.3129730224609375, 1.35760498046875, 1.4022369384765625, 1.446868896484375, 1.4915008544921875, 1.5361328125]}, "gradients/encoder.encoder.layers.14.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 6.0, 1.0, 4.0, 5.0, 1.0, 3.0, 10.0, 11.0, 10.0, 13.0, 15.0, 16.0, 17.0, 19.0, 23.0, 23.0, 29.0, 20.0, 43.0, 45.0, 43.0, 44.0, 38.0, 37.0, 55.0, 39.0, 37.0, 48.0, 35.0, 44.0, 55.0, 30.0, 32.0, 31.0, 30.0, 17.0, 21.0, 13.0, 10.0, 5.0, 6.0, 3.0, 2.0, 9.0, 6.0, 4.0, 2.0, 2.0, 2.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-1.29296875, -1.2508087158203125, -1.208648681640625, -1.1664886474609375, -1.12432861328125, -1.0821685791015625, -1.040008544921875, -0.9978485107421875, -0.9556884765625, -0.9135284423828125, -0.871368408203125, -0.8292083740234375, -0.78704833984375, -0.7448883056640625, -0.702728271484375, -0.6605682373046875, -0.618408203125, -0.5762481689453125, -0.534088134765625, -0.4919281005859375, -0.44976806640625, -0.4076080322265625, -0.365447998046875, -0.3232879638671875, -0.2811279296875, -0.2389678955078125, -0.196807861328125, -0.1546478271484375, -0.11248779296875, -0.0703277587890625, -0.028167724609375, 0.0139923095703125, 0.05615234375, 0.0983123779296875, 0.140472412109375, 0.1826324462890625, 0.22479248046875, 0.2669525146484375, 0.309112548828125, 0.3512725830078125, 0.3934326171875, 0.4355926513671875, 0.477752685546875, 0.5199127197265625, 0.56207275390625, 0.6042327880859375, 0.646392822265625, 0.6885528564453125, 0.730712890625, 0.7728729248046875, 0.815032958984375, 0.8571929931640625, 0.89935302734375, 0.9415130615234375, 0.983673095703125, 1.0258331298828125, 1.0679931640625, 1.1101531982421875, 1.152313232421875, 1.1944732666015625, 1.23663330078125, 1.2787933349609375, 1.320953369140625, 1.3631134033203125, 1.4052734375]}, "gradients/encoder.encoder.layers.14.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 2.0, 1.0, 2.0, 5.0, 9.0, 6.0, 15.0, 11.0, 24.0, 41.0, 71.0, 92.0, 146.0, 365.0, 1721.0, 22848.0, 934961.0, 84115.0, 3065.0, 576.0, 204.0, 84.0, 57.0, 54.0, 23.0, 30.0, 14.0, 9.0, 4.0, 4.0, 2.0, 4.0, 0.0, 0.0, 1.0, 0.0, 5.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.27734375, -1.23822021484375, -1.1990966796875, -1.15997314453125, -1.120849609375, -1.08172607421875, -1.0426025390625, -1.00347900390625, -0.96435546875, -0.92523193359375, -0.8861083984375, -0.84698486328125, -0.807861328125, -0.76873779296875, -0.7296142578125, -0.69049072265625, -0.6513671875, -0.61224365234375, -0.5731201171875, -0.53399658203125, -0.494873046875, -0.45574951171875, -0.4166259765625, -0.37750244140625, -0.33837890625, -0.29925537109375, -0.2601318359375, -0.22100830078125, -0.181884765625, -0.14276123046875, -0.1036376953125, -0.06451416015625, -0.025390625, 0.01373291015625, 0.0528564453125, 0.09197998046875, 0.131103515625, 0.17022705078125, 0.2093505859375, 0.24847412109375, 0.28759765625, 0.32672119140625, 0.3658447265625, 0.40496826171875, 0.444091796875, 0.48321533203125, 0.5223388671875, 0.56146240234375, 0.6005859375, 0.63970947265625, 0.6788330078125, 0.71795654296875, 0.757080078125, 0.79620361328125, 0.8353271484375, 0.87445068359375, 0.91357421875, 0.95269775390625, 0.9918212890625, 1.03094482421875, 1.070068359375, 1.10919189453125, 1.1483154296875, 1.18743896484375, 1.2265625]}, "gradients/encoder.encoder.layers.14.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 3.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 7.0, 4.0, 14.0, 9.0, 6.0, 23.0, 34.0, 36.0, 60.0, 82.0, 98.0, 114.0, 111.0, 93.0, 80.0, 59.0, 40.0, 43.0, 20.0, 18.0, 21.0, 8.0, 4.0, 2.0, 6.0, 7.0, 4.0, 3.0, 2.0, 2.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-7.37905502319336e-05, -7.115211337804794e-05, -6.851367652416229e-05, -6.587523967027664e-05, -6.323680281639099e-05, -6.059836596250534e-05, -5.795992910861969e-05, -5.532149225473404e-05, -5.268305540084839e-05, -5.004461854696274e-05, -4.740618169307709e-05, -4.476774483919144e-05, -4.2129307985305786e-05, -3.9490871131420135e-05, -3.6852434277534485e-05, -3.4213997423648834e-05, -3.1575560569763184e-05, -2.8937123715877533e-05, -2.6298686861991882e-05, -2.366025000810623e-05, -2.102181315422058e-05, -1.838337630033493e-05, -1.574493944644928e-05, -1.3106502592563629e-05, -1.0468065738677979e-05, -7.829628884792328e-06, -5.191192030906677e-06, -2.5527551770210266e-06, 8.568167686462402e-08, 2.7241185307502747e-06, 5.362555384635925e-06, 8.000992238521576e-06, 1.0639429092407227e-05, 1.3277865946292877e-05, 1.5916302800178528e-05, 1.855473965406418e-05, 2.119317650794983e-05, 2.383161336183548e-05, 2.647005021572113e-05, 2.910848706960678e-05, 3.174692392349243e-05, 3.438536077737808e-05, 3.702379763126373e-05, 3.9662234485149384e-05, 4.2300671339035034e-05, 4.4939108192920685e-05, 4.7577545046806335e-05, 5.0215981900691986e-05, 5.285441875457764e-05, 5.549285560846329e-05, 5.813129246234894e-05, 6.076972931623459e-05, 6.340816617012024e-05, 6.604660302400589e-05, 6.868503987789154e-05, 7.132347673177719e-05, 7.396191358566284e-05, 7.660035043954849e-05, 7.923878729343414e-05, 8.18772241473198e-05, 8.451566100120544e-05, 8.71540978550911e-05, 8.979253470897675e-05, 9.24309715628624e-05, 9.506940841674805e-05]}, "gradients/encoder.encoder.layers.14.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 4.0, 1.0, 4.0, 4.0, 11.0, 13.0, 49.0, 105.0, 300.0, 1086.0, 15824.0, 947835.0, 80542.0, 2073.0, 449.0, 154.0, 56.0, 24.0, 13.0, 8.0, 4.0, 6.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-1.7529296875, -1.7108612060546875, -1.668792724609375, -1.6267242431640625, -1.58465576171875, -1.5425872802734375, -1.500518798828125, -1.4584503173828125, -1.4163818359375, -1.3743133544921875, -1.332244873046875, -1.2901763916015625, -1.24810791015625, -1.2060394287109375, -1.163970947265625, -1.1219024658203125, -1.079833984375, -1.0377655029296875, -0.995697021484375, -0.9536285400390625, -0.91156005859375, -0.8694915771484375, -0.827423095703125, -0.7853546142578125, -0.7432861328125, -0.7012176513671875, -0.659149169921875, -0.6170806884765625, -0.57501220703125, -0.5329437255859375, -0.490875244140625, -0.4488067626953125, -0.40673828125, -0.3646697998046875, -0.322601318359375, -0.2805328369140625, -0.23846435546875, -0.1963958740234375, -0.154327392578125, -0.1122589111328125, -0.0701904296875, -0.0281219482421875, 0.013946533203125, 0.0560150146484375, 0.09808349609375, 0.1401519775390625, 0.182220458984375, 0.2242889404296875, 0.266357421875, 0.3084259033203125, 0.350494384765625, 0.3925628662109375, 0.43463134765625, 0.4766998291015625, 0.518768310546875, 0.5608367919921875, 0.6029052734375, 0.6449737548828125, 0.687042236328125, 0.7291107177734375, 0.77117919921875, 0.8132476806640625, 0.855316162109375, 0.8973846435546875, 0.939453125]}, "gradients/encoder.encoder.layers.14.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 3.0, 7.0, 5.0, 14.0, 28.0, 54.0, 85.0, 154.0, 182.0, 171.0, 135.0, 71.0, 48.0, 29.0, 15.0, 7.0, 6.0, 2.0, 1.0, 2.0, 0.0, 0.0, 1.0], "bins": [-1.455078125, -1.4259147644042969, -1.3967514038085938, -1.3675880432128906, -1.3384246826171875, -1.3092613220214844, -1.2800979614257812, -1.2509346008300781, -1.221771240234375, -1.1926078796386719, -1.1634445190429688, -1.1342811584472656, -1.1051177978515625, -1.0759544372558594, -1.0467910766601562, -1.0176277160644531, -0.98846435546875, -0.9593009948730469, -0.9301376342773438, -0.9009742736816406, -0.8718109130859375, -0.8426475524902344, -0.8134841918945312, -0.7843208312988281, -0.755157470703125, -0.7259941101074219, -0.6968307495117188, -0.6676673889160156, -0.6385040283203125, -0.6093406677246094, -0.5801773071289062, -0.5510139465332031, -0.5218505859375, -0.4926872253417969, -0.46352386474609375, -0.4343605041503906, -0.4051971435546875, -0.3760337829589844, -0.34687042236328125, -0.3177070617675781, -0.288543701171875, -0.2593803405761719, -0.23021697998046875, -0.20105361938476562, -0.1718902587890625, -0.14272689819335938, -0.11356353759765625, -0.08440017700195312, -0.05523681640625, -0.026073455810546875, 0.00308990478515625, 0.032253265380859375, 0.0614166259765625, 0.09057998657226562, 0.11974334716796875, 0.14890670776367188, 0.178070068359375, 0.20723342895507812, 0.23639678955078125, 0.2655601501464844, 0.2947235107421875, 0.3238868713378906, 0.35305023193359375, 0.3822135925292969, 0.411376953125]}, "gradients/encoder.encoder.layers.14.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 2.0, 4.0, 7.0, 8.0, 20.0, 58.0, 179.0, 347.0, 248.0, 103.0, 24.0, 13.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-19.727996826171875, -19.127971649169922, -18.52794647216797, -17.927921295166016, -17.327898025512695, -16.727872848510742, -16.12784767150879, -15.527822494506836, -14.927797317504883, -14.32777214050293, -13.727747917175293, -13.12772274017334, -12.527697563171387, -11.92767333984375, -11.327648162841797, -10.727622985839844, -10.127598762512207, -9.527573585510254, -8.927549362182617, -8.327524185180664, -7.727499008178711, -7.127474308013916, -6.527449607849121, -5.927424430847168, -5.327399730682373, -4.727375030517578, -4.127349853515625, -3.52732515335083, -2.927300214767456, -2.327275276184082, -1.727250576019287, -1.127225399017334, -0.5272006988525391, 0.07282418012619019, 0.6728490591049194, 1.272873878479004, 1.872898817062378, 2.472923755645752, 3.072948455810547, 3.6729736328125, 4.272998332977295, 4.87302303314209, 5.473048210144043, 6.073072910308838, 6.673097610473633, 7.273122787475586, 7.873147487640381, 8.473173141479492, 9.073197364807129, 9.673222541809082, 10.273246765136719, 10.873271942138672, 11.473297119140625, 12.073322296142578, 12.673346519470215, 13.273371696472168, 13.873395919799805, 14.473421096801758, 15.073445320129395, 15.673470497131348, 16.273494720458984, 16.873519897460938, 17.47354507446289, 18.073570251464844, 18.673595428466797]}, "gradients/encoder.encoder.layers.14.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 2.0, 2.0, 5.0, 2.0, 2.0, 5.0, 11.0, 6.0, 16.0, 19.0, 34.0, 20.0, 37.0, 34.0, 34.0, 29.0, 50.0, 38.0, 47.0, 51.0, 50.0, 45.0, 39.0, 45.0, 50.0, 51.0, 40.0, 50.0, 36.0, 29.0, 25.0, 22.0, 20.0, 18.0, 8.0, 11.0, 3.0, 12.0, 8.0, 6.0, 1.0, 2.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-9.005902290344238, -8.745743751525879, -8.48558521270752, -8.225425720214844, -7.965267181396484, -7.705108642578125, -7.444950103759766, -7.184791088104248, -6.9246320724487305, -6.664473533630371, -6.4043145179748535, -6.144155979156494, -5.883996963500977, -5.623838424682617, -5.363679885864258, -5.10352087020874, -4.843362331390381, -4.5832037925720215, -4.323044776916504, -4.0628862380981445, -3.802727222442627, -3.5425686836242676, -3.282409906387329, -3.0222511291503906, -2.762092351913452, -2.5019335746765137, -2.241774797439575, -1.9816161394119263, -1.7214573621749878, -1.4612985849380493, -1.2011399269104004, -0.9409811496734619, -0.6808223724365234, -0.42066362500190735, -0.16050487756729126, 0.09965384006500244, 0.3598126173019409, 0.6199713945388794, 0.8801300525665283, 1.1402888298034668, 1.4004476070404053, 1.6606063842773438, 1.9207651615142822, 2.1809239387512207, 2.44108247756958, 2.7012414932250977, 2.961400032043457, 3.2215588092803955, 3.481717586517334, 3.7418763637542725, 4.002035140991211, 4.26219367980957, 4.522352695465088, 4.782511234283447, 5.042670249938965, 5.302828788757324, 5.562987327575684, 5.823145866394043, 6.0833048820495605, 6.34346342086792, 6.6036224365234375, 6.863780975341797, 7.123939514160156, 7.384098529815674, 7.644257545471191]}, "gradients/encoder.encoder.layers.13.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 5.0, 3.0, 9.0, 16.0, 24.0, 50.0, 67.0, 93.0, 188.0, 331.0, 671.0, 1393.0, 3601.0, 10940.0, 53001.0, 3856557.0, 234831.0, 22155.0, 5992.0, 2208.0, 911.0, 451.0, 239.0, 174.0, 101.0, 67.0, 52.0, 28.0, 39.0, 24.0, 21.0, 9.0, 9.0, 8.0, 6.0, 5.0, 3.0, 3.0, 2.0, 4.0, 1.0, 0.0, 3.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.326171875, -2.235870361328125, -2.14556884765625, -2.055267333984375, -1.9649658203125, -1.874664306640625, -1.78436279296875, -1.694061279296875, -1.603759765625, -1.513458251953125, -1.42315673828125, -1.332855224609375, -1.2425537109375, -1.152252197265625, -1.06195068359375, -0.971649169921875, -0.88134765625, -0.791046142578125, -0.70074462890625, -0.610443115234375, -0.5201416015625, -0.429840087890625, -0.33953857421875, -0.249237060546875, -0.158935546875, -0.068634033203125, 0.02166748046875, 0.111968994140625, 0.2022705078125, 0.292572021484375, 0.38287353515625, 0.473175048828125, 0.5634765625, 0.653778076171875, 0.74407958984375, 0.834381103515625, 0.9246826171875, 1.014984130859375, 1.10528564453125, 1.195587158203125, 1.285888671875, 1.376190185546875, 1.46649169921875, 1.556793212890625, 1.6470947265625, 1.737396240234375, 1.82769775390625, 1.917999267578125, 2.00830078125, 2.098602294921875, 2.18890380859375, 2.279205322265625, 2.3695068359375, 2.459808349609375, 2.55010986328125, 2.640411376953125, 2.730712890625, 2.821014404296875, 2.91131591796875, 3.001617431640625, 3.0919189453125, 3.182220458984375, 3.27252197265625, 3.362823486328125, 3.453125]}, "gradients/encoder.encoder.layers.13.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 1.0, 4.0, 0.0, 5.0, 3.0, 10.0, 7.0, 16.0, 21.0, 19.0, 37.0, 41.0, 45.0, 76.0, 89.0, 67.0, 93.0, 80.0, 88.0, 71.0, 59.0, 43.0, 38.0, 29.0, 20.0, 20.0, 10.0, 6.0, 4.0, 2.0, 6.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.611328125, -0.5900497436523438, -0.5687713623046875, -0.5474929809570312, -0.526214599609375, -0.5049362182617188, -0.4836578369140625, -0.46237945556640625, -0.44110107421875, -0.41982269287109375, -0.3985443115234375, -0.37726593017578125, -0.355987548828125, -0.33470916748046875, -0.3134307861328125, -0.29215240478515625, -0.2708740234375, -0.24959564208984375, -0.2283172607421875, -0.20703887939453125, -0.185760498046875, -0.16448211669921875, -0.1432037353515625, -0.12192535400390625, -0.10064697265625, -0.07936859130859375, -0.0580902099609375, -0.03681182861328125, -0.015533447265625, 0.00574493408203125, 0.0270233154296875, 0.04830169677734375, 0.069580078125, 0.09085845947265625, 0.1121368408203125, 0.13341522216796875, 0.154693603515625, 0.17597198486328125, 0.1972503662109375, 0.21852874755859375, 0.23980712890625, 0.26108551025390625, 0.2823638916015625, 0.30364227294921875, 0.324920654296875, 0.34619903564453125, 0.3674774169921875, 0.38875579833984375, 0.4100341796875, 0.43131256103515625, 0.4525909423828125, 0.47386932373046875, 0.495147705078125, 0.5164260864257812, 0.5377044677734375, 0.5589828491210938, 0.58026123046875, 0.6015396118164062, 0.6228179931640625, 0.6440963745117188, 0.665374755859375, 0.6866531372070312, 0.7079315185546875, 0.7292098999023438, 0.75048828125]}, "gradients/encoder.encoder.layers.13.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 1.0, 4.0, 3.0, 4.0, 8.0, 7.0, 22.0, 27.0, 37.0, 55.0, 104.0, 175.0, 258.0, 451.0, 790.0, 1565.0, 3362.0, 7765.0, 24493.0, 124086.0, 3673059.0, 298725.0, 39336.0, 11232.0, 4364.0, 1940.0, 993.0, 554.0, 313.0, 208.0, 112.0, 82.0, 57.0, 30.0, 25.0, 18.0, 7.0, 9.0, 5.0, 0.0, 5.0, 1.0, 4.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.4599609375, -1.4090423583984375, -1.358123779296875, -1.3072052001953125, -1.25628662109375, -1.2053680419921875, -1.154449462890625, -1.1035308837890625, -1.0526123046875, -1.0016937255859375, -0.950775146484375, -0.8998565673828125, -0.84893798828125, -0.7980194091796875, -0.747100830078125, -0.6961822509765625, -0.645263671875, -0.5943450927734375, -0.543426513671875, -0.4925079345703125, -0.44158935546875, -0.3906707763671875, -0.339752197265625, -0.2888336181640625, -0.2379150390625, -0.1869964599609375, -0.136077880859375, -0.0851593017578125, -0.03424072265625, 0.0166778564453125, 0.067596435546875, 0.1185150146484375, 0.16943359375, 0.2203521728515625, 0.271270751953125, 0.3221893310546875, 0.37310791015625, 0.4240264892578125, 0.474945068359375, 0.5258636474609375, 0.5767822265625, 0.6277008056640625, 0.678619384765625, 0.7295379638671875, 0.78045654296875, 0.8313751220703125, 0.882293701171875, 0.9332122802734375, 0.984130859375, 1.0350494384765625, 1.085968017578125, 1.1368865966796875, 1.18780517578125, 1.2387237548828125, 1.289642333984375, 1.3405609130859375, 1.3914794921875, 1.4423980712890625, 1.493316650390625, 1.5442352294921875, 1.59515380859375, 1.6460723876953125, 1.696990966796875, 1.7479095458984375, 1.798828125]}, "gradients/encoder.encoder.layers.13.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 5.0, 1.0, 1.0, 2.0, 2.0, 0.0, 3.0, 1.0, 4.0, 3.0, 6.0, 2.0, 9.0, 7.0, 14.0, 21.0, 34.0, 46.0, 99.0, 204.0, 888.0, 2174.0, 258.0, 115.0, 49.0, 35.0, 29.0, 23.0, 12.0, 8.0, 4.0, 6.0, 6.0, 6.0, 2.0, 3.0, 1.0, 1.0, 0.0, 1.0, 1.0, 4.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.8291015625, -0.8040847778320312, -0.7790679931640625, -0.7540512084960938, -0.729034423828125, -0.7040176391601562, -0.6790008544921875, -0.6539840698242188, -0.62896728515625, -0.6039505004882812, -0.5789337158203125, -0.5539169311523438, -0.528900146484375, -0.5038833618164062, -0.4788665771484375, -0.45384979248046875, -0.4288330078125, -0.40381622314453125, -0.3787994384765625, -0.35378265380859375, -0.328765869140625, -0.30374908447265625, -0.2787322998046875, -0.25371551513671875, -0.22869873046875, -0.20368194580078125, -0.1786651611328125, -0.15364837646484375, -0.128631591796875, -0.10361480712890625, -0.0785980224609375, -0.05358123779296875, -0.028564453125, -0.00354766845703125, 0.0214691162109375, 0.04648590087890625, 0.071502685546875, 0.09651947021484375, 0.1215362548828125, 0.14655303955078125, 0.17156982421875, 0.19658660888671875, 0.2216033935546875, 0.24662017822265625, 0.271636962890625, 0.29665374755859375, 0.3216705322265625, 0.34668731689453125, 0.3717041015625, 0.39672088623046875, 0.4217376708984375, 0.44675445556640625, 0.471771240234375, 0.49678802490234375, 0.5218048095703125, 0.5468215942382812, 0.57183837890625, 0.5968551635742188, 0.6218719482421875, 0.6468887329101562, 0.671905517578125, 0.6969223022460938, 0.7219390869140625, 0.7469558715820312, 0.77197265625]}, "gradients/encoder.encoder.layers.13.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0, 2.0, 1.0, 5.0, 35.0, 325.0, 489.0, 117.0, 18.0, 5.0, 5.0, 3.0, 2.0, 2.0, 0.0, 1.0, 1.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-6.956040859222412, -6.613516330718994, -6.270991802215576, -5.928467750549316, -5.585943222045898, -5.2434186935424805, -4.9008941650390625, -4.5583696365356445, -4.215845108032227, -3.8733205795288086, -3.5307962894439697, -3.1882717609405518, -2.845747470855713, -2.503222942352295, -2.160698413848877, -1.818174123764038, -1.4756498336791992, -1.1331254243850708, -0.7906009554862976, -0.4480764865875244, -0.105552077293396, 0.23697233200073242, 0.5794968605041504, 0.9220211505889893, 1.2645456790924072, 1.6070700883865356, 1.949594497680664, 2.292119026184082, 2.6346435546875, 2.977167844772339, 3.319692373275757, 3.6622166633605957, 4.0047407150268555, 4.347265243530273, 4.689789772033691, 5.032314300537109, 5.374838352203369, 5.717362880706787, 6.059887409210205, 6.402411460876465, 6.744935989379883, 7.087460517883301, 7.429985046386719, 7.772509574890137, 8.115034103393555, 8.457557678222656, 8.80008316040039, 9.142606735229492, 9.485132217407227, 9.827656745910645, 10.170181274414062, 10.51270580291748, 10.855230331420898, 11.19775390625, 11.540279388427734, 11.882802963256836, 12.225327491760254, 12.567852020263672, 12.91037654876709, 13.252901077270508, 13.595425605773926, 13.937950134277344, 14.280473709106445, 14.622998237609863, 14.965522766113281]}, "gradients/encoder.encoder.layers.13.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 4.0, 6.0, 10.0, 20.0, 16.0, 26.0, 31.0, 31.0, 45.0, 52.0, 64.0, 55.0, 72.0, 70.0, 76.0, 82.0, 63.0, 70.0, 48.0, 35.0, 34.0, 27.0, 20.0, 15.0, 10.0, 8.0, 6.0, 1.0, 4.0, 1.0, 3.0, 1.0, 1.0, 1.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.9553394317626953, -2.8630781173706055, -2.7708168029785156, -2.678555488586426, -2.586294174194336, -2.494032859802246, -2.4017715454101562, -2.3095102310180664, -2.2172489166259766, -2.1249876022338867, -2.032726287841797, -1.940464973449707, -1.8482036590576172, -1.7559423446655273, -1.6636810302734375, -1.5714197158813477, -1.4791582822799683, -1.3868969678878784, -1.2946356534957886, -1.2023743391036987, -1.1101130247116089, -1.017851710319519, -0.9255903363227844, -0.8333290219306946, -0.7410677075386047, -0.6488063931465149, -0.556545078754425, -0.4642837345600128, -0.372022420167923, -0.27976110577583313, -0.1874997615814209, -0.09523844718933105, -0.002977132797241211, 0.08928418904542923, 0.18154551088809967, 0.2738068401813507, 0.36606815457344055, 0.4583294689655304, 0.5505908131599426, 0.6428521275520325, 0.7351134419441223, 0.8273747563362122, 0.919636070728302, 1.0118974447250366, 1.1041587591171265, 1.1964200735092163, 1.2886813879013062, 1.380942702293396, 1.4732040166854858, 1.5654653310775757, 1.6577266454696655, 1.7499879598617554, 1.8422492742538452, 1.934510588645935, 2.0267720222473145, 2.1190333366394043, 2.211294651031494, 2.303555965423584, 2.395817279815674, 2.4880785942077637, 2.5803399085998535, 2.6726012229919434, 2.764862537384033, 2.857123851776123, 2.949385166168213]}, "gradients/encoder.encoder.layers.13.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 1.0, 1.0, 4.0, 8.0, 5.0, 7.0, 16.0, 21.0, 20.0, 30.0, 51.0, 74.0, 127.0, 229.0, 415.0, 986.0, 3017.0, 13409.0, 98872.0, 695968.0, 204869.0, 22971.0, 4726.0, 1519.0, 610.0, 247.0, 149.0, 78.0, 48.0, 26.0, 20.0, 9.0, 15.0, 4.0, 6.0, 5.0, 3.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.9296875, -2.837127685546875, -2.74456787109375, -2.652008056640625, -2.5594482421875, -2.466888427734375, -2.37432861328125, -2.281768798828125, -2.189208984375, -2.096649169921875, -2.00408935546875, -1.911529541015625, -1.8189697265625, -1.726409912109375, -1.63385009765625, -1.541290283203125, -1.44873046875, -1.356170654296875, -1.26361083984375, -1.171051025390625, -1.0784912109375, -0.985931396484375, -0.89337158203125, -0.800811767578125, -0.708251953125, -0.615692138671875, -0.52313232421875, -0.430572509765625, -0.3380126953125, -0.245452880859375, -0.15289306640625, -0.060333251953125, 0.0322265625, 0.124786376953125, 0.21734619140625, 0.309906005859375, 0.4024658203125, 0.495025634765625, 0.58758544921875, 0.680145263671875, 0.772705078125, 0.865264892578125, 0.95782470703125, 1.050384521484375, 1.1429443359375, 1.235504150390625, 1.32806396484375, 1.420623779296875, 1.51318359375, 1.605743408203125, 1.69830322265625, 1.790863037109375, 1.8834228515625, 1.975982666015625, 2.06854248046875, 2.161102294921875, 2.253662109375, 2.346221923828125, 2.43878173828125, 2.531341552734375, 2.6239013671875, 2.716461181640625, 2.80902099609375, 2.901580810546875, 2.994140625]}, "gradients/encoder.encoder.layers.13.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 3.0, 4.0, 0.0, 3.0, 4.0, 3.0, 12.0, 22.0, 21.0, 31.0, 47.0, 56.0, 69.0, 78.0, 90.0, 80.0, 88.0, 94.0, 72.0, 70.0, 43.0, 34.0, 31.0, 18.0, 13.0, 8.0, 7.0, 5.0, 2.0, 2.0, 0.0, 2.0, 1.0, 0.0, 1.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.68115234375, -0.65753173828125, -0.6339111328125, -0.61029052734375, -0.586669921875, -0.56304931640625, -0.5394287109375, -0.51580810546875, -0.4921875, -0.46856689453125, -0.4449462890625, -0.42132568359375, -0.397705078125, -0.37408447265625, -0.3504638671875, -0.32684326171875, -0.30322265625, -0.27960205078125, -0.2559814453125, -0.23236083984375, -0.208740234375, -0.18511962890625, -0.1614990234375, -0.13787841796875, -0.1142578125, -0.09063720703125, -0.0670166015625, -0.04339599609375, -0.019775390625, 0.00384521484375, 0.0274658203125, 0.05108642578125, 0.07470703125, 0.09832763671875, 0.1219482421875, 0.14556884765625, 0.169189453125, 0.19281005859375, 0.2164306640625, 0.24005126953125, 0.263671875, 0.28729248046875, 0.3109130859375, 0.33453369140625, 0.358154296875, 0.38177490234375, 0.4053955078125, 0.42901611328125, 0.45263671875, 0.47625732421875, 0.4998779296875, 0.52349853515625, 0.547119140625, 0.57073974609375, 0.5943603515625, 0.61798095703125, 0.6416015625, 0.66522216796875, 0.6888427734375, 0.71246337890625, 0.736083984375, 0.75970458984375, 0.7833251953125, 0.80694580078125, 0.83056640625]}, "gradients/encoder.encoder.layers.13.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 4.0, 6.0, 9.0, 5.0, 10.0, 8.0, 19.0, 26.0, 42.0, 56.0, 95.0, 132.0, 209.0, 376.0, 610.0, 1155.0, 2235.0, 4716.0, 11306.0, 31023.0, 98684.0, 320437.0, 385674.0, 127366.0, 39114.0, 13827.0, 5559.0, 2613.0, 1318.0, 745.0, 428.0, 263.0, 184.0, 99.0, 53.0, 51.0, 35.0, 23.0, 17.0, 6.0, 7.0, 4.0, 7.0, 3.0, 2.0, 3.0, 0.0, 1.0, 2.0, 0.0, 0.0, 3.0, 1.0], "bins": [-1.318359375, -1.278533935546875, -1.23870849609375, -1.198883056640625, -1.1590576171875, -1.119232177734375, -1.07940673828125, -1.039581298828125, -0.999755859375, -0.959930419921875, -0.92010498046875, -0.880279541015625, -0.8404541015625, -0.800628662109375, -0.76080322265625, -0.720977783203125, -0.68115234375, -0.641326904296875, -0.60150146484375, -0.561676025390625, -0.5218505859375, -0.482025146484375, -0.44219970703125, -0.402374267578125, -0.362548828125, -0.322723388671875, -0.28289794921875, -0.243072509765625, -0.2032470703125, -0.163421630859375, -0.12359619140625, -0.083770751953125, -0.0439453125, -0.004119873046875, 0.03570556640625, 0.075531005859375, 0.1153564453125, 0.155181884765625, 0.19500732421875, 0.234832763671875, 0.274658203125, 0.314483642578125, 0.35430908203125, 0.394134521484375, 0.4339599609375, 0.473785400390625, 0.51361083984375, 0.553436279296875, 0.59326171875, 0.633087158203125, 0.67291259765625, 0.712738037109375, 0.7525634765625, 0.792388916015625, 0.83221435546875, 0.872039794921875, 0.911865234375, 0.951690673828125, 0.99151611328125, 1.031341552734375, 1.0711669921875, 1.110992431640625, 1.15081787109375, 1.190643310546875, 1.23046875]}, "gradients/encoder.encoder.layers.13.attention.v_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 2.0, 2.0, 4.0, 7.0, 5.0, 8.0, 6.0, 2.0, 14.0, 13.0, 26.0, 17.0, 18.0, 41.0, 34.0, 32.0, 38.0, 34.0, 46.0, 44.0, 48.0, 52.0, 53.0, 44.0, 53.0, 49.0, 50.0, 42.0, 44.0, 31.0, 25.0, 22.0, 17.0, 21.0, 15.0, 11.0, 14.0, 8.0, 8.0, 6.0, 3.0, 3.0, 5.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.5419921875, -1.4770355224609375, -1.412078857421875, -1.3471221923828125, -1.28216552734375, -1.2172088623046875, -1.152252197265625, -1.0872955322265625, -1.0223388671875, -0.9573822021484375, -0.892425537109375, -0.8274688720703125, -0.76251220703125, -0.6975555419921875, -0.632598876953125, -0.5676422119140625, -0.502685546875, -0.4377288818359375, -0.372772216796875, -0.3078155517578125, -0.24285888671875, -0.1779022216796875, -0.112945556640625, -0.0479888916015625, 0.0169677734375, 0.0819244384765625, 0.146881103515625, 0.2118377685546875, 0.27679443359375, 0.3417510986328125, 0.406707763671875, 0.4716644287109375, 0.53662109375, 0.6015777587890625, 0.666534423828125, 0.7314910888671875, 0.79644775390625, 0.8614044189453125, 0.926361083984375, 0.9913177490234375, 1.0562744140625, 1.1212310791015625, 1.186187744140625, 1.2511444091796875, 1.31610107421875, 1.3810577392578125, 1.446014404296875, 1.5109710693359375, 1.575927734375, 1.6408843994140625, 1.705841064453125, 1.7707977294921875, 1.83575439453125, 1.9007110595703125, 1.965667724609375, 2.0306243896484375, 2.0955810546875, 2.1605377197265625, 2.225494384765625, 2.2904510498046875, 2.35540771484375, 2.4203643798828125, 2.485321044921875, 2.5502777099609375, 2.615234375]}, "gradients/encoder.encoder.layers.13.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 1.0, 2.0, 1.0, 1.0, 8.0, 7.0, 14.0, 15.0, 25.0, 43.0, 67.0, 137.0, 330.0, 877.0, 3604.0, 32618.0, 766746.0, 229779.0, 11379.0, 1930.0, 550.0, 190.0, 107.0, 66.0, 19.0, 16.0, 12.0, 7.0, 6.0, 1.0, 1.0, 1.0, 0.0, 2.0, 0.0, 1.0, 2.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.126953125, -1.0891265869140625, -1.051300048828125, -1.0134735107421875, -0.97564697265625, -0.9378204345703125, -0.899993896484375, -0.8621673583984375, -0.8243408203125, -0.7865142822265625, -0.748687744140625, -0.7108612060546875, -0.67303466796875, -0.6352081298828125, -0.597381591796875, -0.5595550537109375, -0.521728515625, -0.4839019775390625, -0.446075439453125, -0.4082489013671875, -0.37042236328125, -0.3325958251953125, -0.294769287109375, -0.2569427490234375, -0.2191162109375, -0.1812896728515625, -0.143463134765625, -0.1056365966796875, -0.06781005859375, -0.0299835205078125, 0.007843017578125, 0.0456695556640625, 0.08349609375, 0.1213226318359375, 0.159149169921875, 0.1969757080078125, 0.23480224609375, 0.2726287841796875, 0.310455322265625, 0.3482818603515625, 0.3861083984375, 0.4239349365234375, 0.461761474609375, 0.4995880126953125, 0.53741455078125, 0.5752410888671875, 0.613067626953125, 0.6508941650390625, 0.688720703125, 0.7265472412109375, 0.764373779296875, 0.8022003173828125, 0.84002685546875, 0.8778533935546875, 0.915679931640625, 0.9535064697265625, 0.9913330078125, 1.0291595458984375, 1.066986083984375, 1.1048126220703125, 1.14263916015625, 1.1804656982421875, 1.218292236328125, 1.2561187744140625, 1.2939453125]}, "gradients/encoder.encoder.layers.13.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 4.0, 3.0, 2.0, 5.0, 3.0, 6.0, 18.0, 28.0, 30.0, 47.0, 61.0, 108.0, 146.0, 134.0, 128.0, 86.0, 66.0, 47.0, 37.0, 15.0, 12.0, 9.0, 4.0, 3.0, 1.0, 3.0, 1.0, 0.0, 1.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0001380443572998047, -0.00013433396816253662, -0.00013062357902526855, -0.0001269131898880005, -0.00012320280075073242, -0.00011949241161346436, -0.00011578202247619629, -0.00011207163333892822, -0.00010836124420166016, -0.00010465085506439209, -0.00010094046592712402, -9.723007678985596e-05, -9.351968765258789e-05, -8.980929851531982e-05, -8.609890937805176e-05, -8.238852024078369e-05, -7.867813110351562e-05, -7.496774196624756e-05, -7.125735282897949e-05, -6.754696369171143e-05, -6.383657455444336e-05, -6.012618541717529e-05, -5.6415796279907227e-05, -5.270540714263916e-05, -4.8995018005371094e-05, -4.528462886810303e-05, -4.157423973083496e-05, -3.7863850593566895e-05, -3.415346145629883e-05, -3.0443072319030762e-05, -2.6732683181762695e-05, -2.302229404449463e-05, -1.9311904907226562e-05, -1.5601515769958496e-05, -1.189112663269043e-05, -8.180737495422363e-06, -4.470348358154297e-06, -7.599592208862305e-07, 2.950429916381836e-06, 6.660819053649902e-06, 1.0371208190917969e-05, 1.4081597328186035e-05, 1.77919864654541e-05, 2.1502375602722168e-05, 2.5212764739990234e-05, 2.89231538772583e-05, 3.263354301452637e-05, 3.6343932151794434e-05, 4.00543212890625e-05, 4.3764710426330566e-05, 4.747509956359863e-05, 5.11854887008667e-05, 5.4895877838134766e-05, 5.860626697540283e-05, 6.23166561126709e-05, 6.602704524993896e-05, 6.973743438720703e-05, 7.34478235244751e-05, 7.715821266174316e-05, 8.086860179901123e-05, 8.45789909362793e-05, 8.828938007354736e-05, 9.199976921081543e-05, 9.57101583480835e-05, 9.942054748535156e-05]}, "gradients/encoder.encoder.layers.13.attention.q_proj.weight": {"_type": "histogram", "values": [2.0, 2.0, 0.0, 2.0, 1.0, 1.0, 2.0, 3.0, 0.0, 2.0, 3.0, 6.0, 7.0, 11.0, 21.0, 20.0, 26.0, 49.0, 76.0, 132.0, 233.0, 413.0, 846.0, 1938.0, 5194.0, 16408.0, 67650.0, 347219.0, 472540.0, 101604.0, 22795.0, 6660.0, 2510.0, 1052.0, 488.0, 267.0, 147.0, 87.0, 46.0, 34.0, 14.0, 12.0, 9.0, 10.0, 6.0, 3.0, 6.0, 5.0, 4.0, 1.0, 2.0, 0.0, 2.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.495361328125, -0.4777488708496094, -0.46013641357421875, -0.4425239562988281, -0.4249114990234375, -0.4072990417480469, -0.38968658447265625, -0.3720741271972656, -0.354461669921875, -0.3368492126464844, -0.31923675537109375, -0.3016242980957031, -0.2840118408203125, -0.2663993835449219, -0.24878692626953125, -0.23117446899414062, -0.21356201171875, -0.19594955444335938, -0.17833709716796875, -0.16072463989257812, -0.1431121826171875, -0.12549972534179688, -0.10788726806640625, -0.09027481079101562, -0.072662353515625, -0.055049896240234375, -0.03743743896484375, -0.019824981689453125, -0.0022125244140625, 0.015399932861328125, 0.03301239013671875, 0.050624847412109375, 0.0682373046875, 0.08584976196289062, 0.10346221923828125, 0.12107467651367188, 0.1386871337890625, 0.15629959106445312, 0.17391204833984375, 0.19152450561523438, 0.209136962890625, 0.22674942016601562, 0.24436187744140625, 0.2619743347167969, 0.2795867919921875, 0.2971992492675781, 0.31481170654296875, 0.3324241638183594, 0.35003662109375, 0.3676490783691406, 0.38526153564453125, 0.4028739929199219, 0.4204864501953125, 0.4380989074707031, 0.45571136474609375, 0.4733238220214844, 0.490936279296875, 0.5085487365722656, 0.5261611938476562, 0.5437736511230469, 0.5613861083984375, 0.5789985656738281, 0.5966110229492188, 0.6142234802246094, 0.6318359375]}, "gradients/encoder.encoder.layers.13.attention.q_proj.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 2.0, 3.0, 2.0, 3.0, 5.0, 5.0, 4.0, 4.0, 14.0, 13.0, 16.0, 17.0, 24.0, 35.0, 40.0, 55.0, 64.0, 62.0, 64.0, 83.0, 76.0, 78.0, 49.0, 65.0, 46.0, 39.0, 29.0, 25.0, 28.0, 5.0, 14.0, 13.0, 9.0, 7.0, 0.0, 1.0, 4.0, 1.0, 1.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0], "bins": [-0.58837890625, -0.5673065185546875, -0.546234130859375, -0.5251617431640625, -0.50408935546875, -0.4830169677734375, -0.461944580078125, -0.4408721923828125, -0.4197998046875, -0.3987274169921875, -0.377655029296875, -0.3565826416015625, -0.33551025390625, -0.3144378662109375, -0.293365478515625, -0.2722930908203125, -0.251220703125, -0.2301483154296875, -0.209075927734375, -0.1880035400390625, -0.16693115234375, -0.1458587646484375, -0.124786376953125, -0.1037139892578125, -0.0826416015625, -0.0615692138671875, -0.040496826171875, -0.0194244384765625, 0.00164794921875, 0.0227203369140625, 0.043792724609375, 0.0648651123046875, 0.0859375, 0.1070098876953125, 0.128082275390625, 0.1491546630859375, 0.17022705078125, 0.1912994384765625, 0.212371826171875, 0.2334442138671875, 0.2545166015625, 0.2755889892578125, 0.296661376953125, 0.3177337646484375, 0.33880615234375, 0.3598785400390625, 0.380950927734375, 0.4020233154296875, 0.423095703125, 0.4441680908203125, 0.465240478515625, 0.4863128662109375, 0.50738525390625, 0.5284576416015625, 0.549530029296875, 0.5706024169921875, 0.5916748046875, 0.6127471923828125, 0.633819580078125, 0.6548919677734375, 0.67596435546875, 0.6970367431640625, 0.718109130859375, 0.7391815185546875, 0.76025390625]}, "gradients/encoder.encoder.layers.13.layer_norm.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 1.0, 5.0, 5.0, 6.0, 9.0, 18.0, 26.0, 68.0, 97.0, 161.0, 195.0, 176.0, 116.0, 55.0, 36.0, 18.0, 12.0, 2.0, 2.0, 3.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-11.996576309204102, -11.492213249206543, -10.987850189208984, -10.483488082885742, -9.979125022888184, -9.474761962890625, -8.970399856567383, -8.466036796569824, -7.961673736572266, -7.457310676574707, -6.952948093414307, -6.448585510253906, -5.944222450256348, -5.439859390258789, -4.935496807098389, -4.431134223937988, -3.9267711639404297, -3.42240834236145, -2.9180455207824707, -2.413682699203491, -1.9093198776245117, -1.4049570560455322, -0.9005942344665527, -0.39623141288757324, 0.10813140869140625, 0.6124942302703857, 1.1168570518493652, 1.6212198734283447, 2.125582695007324, 2.6299455165863037, 3.134308338165283, 3.6386711597442627, 4.143033981323242, 4.647397041320801, 5.151759624481201, 5.656122207641602, 6.16048526763916, 6.664848327636719, 7.169210910797119, 7.6735734939575195, 8.177936553955078, 8.682299613952637, 9.186662673950195, 9.691024780273438, 10.195387840270996, 10.699750900268555, 11.204113006591797, 11.708476066589355, 12.212839126586914, 12.717202186584473, 13.221565246582031, 13.725927352905273, 14.230290412902832, 14.73465347290039, 15.239015579223633, 15.743378639221191, 16.24774169921875, 16.752103805541992, 17.256467819213867, 17.76082992553711, 18.265193939208984, 18.769556045532227, 19.27391815185547, 19.778282165527344, 20.282644271850586]}, "gradients/encoder.encoder.layers.13.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 4.0, 4.0, 4.0, 3.0, 5.0, 7.0, 13.0, 10.0, 7.0, 21.0, 25.0, 38.0, 22.0, 23.0, 35.0, 43.0, 41.0, 32.0, 49.0, 57.0, 41.0, 51.0, 44.0, 45.0, 50.0, 42.0, 48.0, 37.0, 28.0, 38.0, 31.0, 18.0, 18.0, 17.0, 10.0, 16.0, 8.0, 8.0, 8.0, 2.0, 4.0, 2.0, 2.0, 1.0, 1.0, 1.0, 1.0, 4.0], "bins": [-13.998659133911133, -13.62474536895752, -13.250832557678223, -12.87691879272461, -12.503005981445312, -12.1290922164917, -11.755178451538086, -11.381265640258789, -11.007352828979492, -10.633439064025879, -10.259526252746582, -9.885612487792969, -9.511699676513672, -9.137785911560059, -8.763872146606445, -8.389959335327148, -8.016045570373535, -7.64213228225708, -7.268218994140625, -6.894305229187012, -6.520392417907715, -6.146478652954102, -5.7725653648376465, -5.398652076721191, -5.024738788604736, -4.650825500488281, -4.276912212371826, -3.902998685836792, -3.529085397720337, -3.155172109603882, -2.7812585830688477, -2.4073452949523926, -2.033432960510254, -1.6595196723937988, -1.2856062650680542, -0.9116928577423096, -0.5377795696258545, -0.16386628150939941, 0.21004724502563477, 0.5839605331420898, 0.9578738212585449, 1.331787109375, 1.7057005167007446, 2.0796139240264893, 2.4535272121429443, 2.8274405002593994, 3.2013540267944336, 3.5752673149108887, 3.9491806030273438, 4.323093891143799, 4.697007179260254, 5.070920944213867, 5.444833755493164, 5.818747520446777, 6.192660808563232, 6.5665740966796875, 6.940487384796143, 7.314400672912598, 7.688313961029053, 8.062227249145508, 8.436141014099121, 8.810053825378418, 9.183967590332031, 9.557880401611328, 9.931794166564941]}, "gradients/encoder.encoder.layers.12.feed_forward.output_dense.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 5.0, 3.0, 13.0, 15.0, 15.0, 32.0, 43.0, 97.0, 129.0, 236.0, 442.0, 840.0, 1693.0, 3580.0, 9193.0, 31111.0, 286038.0, 3767592.0, 67408.0, 15029.0, 5535.0, 2428.0, 1226.0, 626.0, 360.0, 206.0, 123.0, 78.0, 53.0, 41.0, 24.0, 18.0, 20.0, 10.0, 3.0, 4.0, 8.0, 6.0, 6.0, 2.0, 1.0, 0.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.392578125, -2.304718017578125, -2.21685791015625, -2.128997802734375, -2.0411376953125, -1.953277587890625, -1.86541748046875, -1.777557373046875, -1.689697265625, -1.601837158203125, -1.51397705078125, -1.426116943359375, -1.3382568359375, -1.250396728515625, -1.16253662109375, -1.074676513671875, -0.98681640625, -0.898956298828125, -0.81109619140625, -0.723236083984375, -0.6353759765625, -0.547515869140625, -0.45965576171875, -0.371795654296875, -0.283935546875, -0.196075439453125, -0.10821533203125, -0.020355224609375, 0.0675048828125, 0.155364990234375, 0.24322509765625, 0.331085205078125, 0.4189453125, 0.506805419921875, 0.59466552734375, 0.682525634765625, 0.7703857421875, 0.858245849609375, 0.94610595703125, 1.033966064453125, 1.121826171875, 1.209686279296875, 1.29754638671875, 1.385406494140625, 1.4732666015625, 1.561126708984375, 1.64898681640625, 1.736846923828125, 1.82470703125, 1.912567138671875, 2.00042724609375, 2.088287353515625, 2.1761474609375, 2.264007568359375, 2.35186767578125, 2.439727783203125, 2.527587890625, 2.615447998046875, 2.70330810546875, 2.791168212890625, 2.8790283203125, 2.966888427734375, 3.05474853515625, 3.142608642578125, 3.23046875]}, "gradients/encoder.encoder.layers.12.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 3.0, 1.0, 3.0, 4.0, 2.0, 9.0, 9.0, 21.0, 20.0, 20.0, 26.0, 32.0, 35.0, 54.0, 58.0, 74.0, 73.0, 64.0, 82.0, 57.0, 72.0, 50.0, 56.0, 41.0, 38.0, 24.0, 23.0, 14.0, 12.0, 7.0, 6.0, 9.0, 4.0, 5.0, 3.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.70849609375, -0.6848831176757812, -0.6612701416015625, -0.6376571655273438, -0.614044189453125, -0.5904312133789062, -0.5668182373046875, -0.5432052612304688, -0.51959228515625, -0.49597930908203125, -0.4723663330078125, -0.44875335693359375, -0.425140380859375, -0.40152740478515625, -0.3779144287109375, -0.35430145263671875, -0.3306884765625, -0.30707550048828125, -0.2834625244140625, -0.25984954833984375, -0.236236572265625, -0.21262359619140625, -0.1890106201171875, -0.16539764404296875, -0.14178466796875, -0.11817169189453125, -0.0945587158203125, -0.07094573974609375, -0.047332763671875, -0.02371978759765625, -0.0001068115234375, 0.02350616455078125, 0.047119140625, 0.07073211669921875, 0.0943450927734375, 0.11795806884765625, 0.141571044921875, 0.16518402099609375, 0.1887969970703125, 0.21240997314453125, 0.23602294921875, 0.25963592529296875, 0.2832489013671875, 0.30686187744140625, 0.330474853515625, 0.35408782958984375, 0.3777008056640625, 0.40131378173828125, 0.4249267578125, 0.44853973388671875, 0.4721527099609375, 0.49576568603515625, 0.519378662109375, 0.5429916381835938, 0.5666046142578125, 0.5902175903320312, 0.61383056640625, 0.6374435424804688, 0.6610565185546875, 0.6846694946289062, 0.708282470703125, 0.7318954467773438, 0.7555084228515625, 0.7791213989257812, 0.802734375]}, "gradients/encoder.encoder.layers.12.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 3.0, 4.0, 5.0, 4.0, 11.0, 10.0, 17.0, 43.0, 79.0, 94.0, 152.0, 246.0, 501.0, 746.0, 1364.0, 2497.0, 4807.0, 10709.0, 29920.0, 137290.0, 3510351.0, 410790.0, 54040.0, 16346.0, 6709.0, 3308.0, 1777.0, 979.0, 626.0, 369.0, 206.0, 120.0, 69.0, 47.0, 19.0, 19.0, 8.0, 2.0, 6.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0], "bins": [-1.912109375, -1.85540771484375, -1.7987060546875, -1.74200439453125, -1.685302734375, -1.62860107421875, -1.5718994140625, -1.51519775390625, -1.45849609375, -1.40179443359375, -1.3450927734375, -1.28839111328125, -1.231689453125, -1.17498779296875, -1.1182861328125, -1.06158447265625, -1.0048828125, -0.94818115234375, -0.8914794921875, -0.83477783203125, -0.778076171875, -0.72137451171875, -0.6646728515625, -0.60797119140625, -0.55126953125, -0.49456787109375, -0.4378662109375, -0.38116455078125, -0.324462890625, -0.26776123046875, -0.2110595703125, -0.15435791015625, -0.09765625, -0.04095458984375, 0.0157470703125, 0.07244873046875, 0.129150390625, 0.18585205078125, 0.2425537109375, 0.29925537109375, 0.35595703125, 0.41265869140625, 0.4693603515625, 0.52606201171875, 0.582763671875, 0.63946533203125, 0.6961669921875, 0.75286865234375, 0.8095703125, 0.86627197265625, 0.9229736328125, 0.97967529296875, 1.036376953125, 1.09307861328125, 1.1497802734375, 1.20648193359375, 1.26318359375, 1.31988525390625, 1.3765869140625, 1.43328857421875, 1.489990234375, 1.54669189453125, 1.6033935546875, 1.66009521484375, 1.716796875]}, "gradients/encoder.encoder.layers.12.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 5.0, 0.0, 1.0, 1.0, 0.0, 7.0, 2.0, 4.0, 4.0, 6.0, 8.0, 4.0, 10.0, 13.0, 18.0, 45.0, 52.0, 127.0, 290.0, 2265.0, 785.0, 182.0, 99.0, 53.0, 40.0, 22.0, 12.0, 8.0, 1.0, 4.0, 3.0, 0.0, 2.0, 2.0, 4.0, 5.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-1.3115234375, -1.2702789306640625, -1.229034423828125, -1.1877899169921875, -1.14654541015625, -1.1053009033203125, -1.064056396484375, -1.0228118896484375, -0.9815673828125, -0.9403228759765625, -0.899078369140625, -0.8578338623046875, -0.81658935546875, -0.7753448486328125, -0.734100341796875, -0.6928558349609375, -0.651611328125, -0.6103668212890625, -0.569122314453125, -0.5278778076171875, -0.48663330078125, -0.4453887939453125, -0.404144287109375, -0.3628997802734375, -0.3216552734375, -0.2804107666015625, -0.239166259765625, -0.1979217529296875, -0.15667724609375, -0.1154327392578125, -0.074188232421875, -0.0329437255859375, 0.00830078125, 0.0495452880859375, 0.090789794921875, 0.1320343017578125, 0.17327880859375, 0.2145233154296875, 0.255767822265625, 0.2970123291015625, 0.3382568359375, 0.3795013427734375, 0.420745849609375, 0.4619903564453125, 0.50323486328125, 0.5444793701171875, 0.585723876953125, 0.6269683837890625, 0.668212890625, 0.7094573974609375, 0.750701904296875, 0.7919464111328125, 0.83319091796875, 0.8744354248046875, 0.915679931640625, 0.9569244384765625, 0.9981689453125, 1.0394134521484375, 1.080657958984375, 1.1219024658203125, 1.16314697265625, 1.2043914794921875, 1.245635986328125, 1.2868804931640625, 1.328125]}, "gradients/encoder.encoder.layers.12.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 2.0, 4.0, 13.0, 41.0, 183.0, 435.0, 259.0, 58.0, 13.0, 4.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-24.100658416748047, -23.58439826965332, -23.068138122558594, -22.551877975463867, -22.03561782836914, -21.519357681274414, -21.003097534179688, -20.48683738708496, -19.970577239990234, -19.454317092895508, -18.93805694580078, -18.421796798706055, -17.905536651611328, -17.3892765045166, -16.873016357421875, -16.35675621032715, -15.840496063232422, -15.324235916137695, -14.807975769042969, -14.291715621948242, -13.775455474853516, -13.259195327758789, -12.742935180664062, -12.226675033569336, -11.71041488647461, -11.194154739379883, -10.677894592285156, -10.16163444519043, -9.645374298095703, -9.129114151000977, -8.61285400390625, -8.096593856811523, -7.58033561706543, -7.064075469970703, -6.547815322875977, -6.03155517578125, -5.515295028686523, -4.999034881591797, -4.48277473449707, -3.9665145874023438, -3.450254440307617, -2.9339942932128906, -2.417734146118164, -1.9014739990234375, -1.385213851928711, -0.8689537048339844, -0.3526935577392578, 0.16356658935546875, 0.6798267364501953, 1.1960868835449219, 1.7123470306396484, 2.228607177734375, 2.7448673248291016, 3.261127471923828, 3.7773876190185547, 4.293647766113281, 4.809907913208008, 5.326168060302734, 5.842428207397461, 6.3586883544921875, 6.874948501586914, 7.391208648681641, 7.907468795776367, 8.423728942871094, 8.93998908996582]}, "gradients/encoder.encoder.layers.12.final_layer_norm.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 3.0, 3.0, 6.0, 2.0, 15.0, 9.0, 5.0, 4.0, 11.0, 16.0, 35.0, 23.0, 41.0, 39.0, 43.0, 47.0, 40.0, 55.0, 61.0, 57.0, 81.0, 54.0, 51.0, 48.0, 55.0, 42.0, 33.0, 28.0, 20.0, 21.0, 17.0, 15.0, 8.0, 10.0, 2.0, 3.0, 4.0, 4.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.0510759353637695, -2.936291217803955, -2.8215062618255615, -2.706721544265747, -2.5919368267059326, -2.477151870727539, -2.3623671531677246, -2.24758243560791, -2.1327977180480957, -2.0180130004882812, -1.9032281637191772, -1.7884433269500732, -1.6736586093902588, -1.5588737726211548, -1.4440889358520508, -1.3293042182922363, -1.2145192623138428, -1.0997344255447388, -0.9849497079849243, -0.8701648712158203, -0.7553800940513611, -0.6405953168869019, -0.5258104801177979, -0.4110257029533386, -0.2962409257888794, -0.18145613372325897, -0.06667134165763855, 0.048113465309143066, 0.1628982424736023, 0.2776830196380615, 0.3924678564071655, 0.5072526335716248, 0.622037410736084, 0.7368221879005432, 0.8516069650650024, 0.9663918018341064, 1.081176519393921, 1.195961356163025, 1.310746192932129, 1.4255309104919434, 1.5403157472610474, 1.6551005840301514, 1.7698853015899658, 1.8846701383590698, 1.9994549751281738, 2.1142396926879883, 2.2290244102478027, 2.3438093662261963, 2.4585940837860107, 2.573378801345825, 2.6881637573242188, 2.802948474884033, 2.9177331924438477, 3.032517910003662, 3.1473028659820557, 3.26208758354187, 3.3768725395202637, 3.491657257080078, 3.6064422130584717, 3.721226930618286, 3.8360116481781006, 3.950796604156494, 4.065581321716309, 4.180366039276123, 4.2951507568359375]}, "gradients/encoder.encoder.layers.12.attention.out_proj.weight": {"_type": "histogram", "values": [3.0, 0.0, 0.0, 0.0, 2.0, 1.0, 2.0, 3.0, 1.0, 0.0, 1.0, 0.0, 5.0, 10.0, 10.0, 13.0, 12.0, 17.0, 36.0, 24.0, 47.0, 79.0, 143.0, 198.0, 406.0, 727.0, 1483.0, 3256.0, 8411.0, 25439.0, 99700.0, 449320.0, 352416.0, 74239.0, 20160.0, 6903.0, 2717.0, 1273.0, 652.0, 316.0, 190.0, 110.0, 68.0, 46.0, 42.0, 24.0, 24.0, 14.0, 4.0, 6.0, 5.0, 3.0, 2.0, 3.0, 2.0, 0.0, 1.0, 3.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0], "bins": [-2.814453125, -2.726226806640625, -2.63800048828125, -2.549774169921875, -2.4615478515625, -2.373321533203125, -2.28509521484375, -2.196868896484375, -2.108642578125, -2.020416259765625, -1.93218994140625, -1.843963623046875, -1.7557373046875, -1.667510986328125, -1.57928466796875, -1.491058349609375, -1.40283203125, -1.314605712890625, -1.22637939453125, -1.138153076171875, -1.0499267578125, -0.961700439453125, -0.87347412109375, -0.785247802734375, -0.697021484375, -0.608795166015625, -0.52056884765625, -0.432342529296875, -0.3441162109375, -0.255889892578125, -0.16766357421875, -0.079437255859375, 0.0087890625, 0.097015380859375, 0.18524169921875, 0.273468017578125, 0.3616943359375, 0.449920654296875, 0.53814697265625, 0.626373291015625, 0.714599609375, 0.802825927734375, 0.89105224609375, 0.979278564453125, 1.0675048828125, 1.155731201171875, 1.24395751953125, 1.332183837890625, 1.42041015625, 1.508636474609375, 1.59686279296875, 1.685089111328125, 1.7733154296875, 1.861541748046875, 1.94976806640625, 2.037994384765625, 2.126220703125, 2.214447021484375, 2.30267333984375, 2.390899658203125, 2.4791259765625, 2.567352294921875, 2.65557861328125, 2.743804931640625, 2.83203125]}, "gradients/encoder.encoder.layers.12.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 2.0, 0.0, 2.0, 0.0, 4.0, 5.0, 6.0, 8.0, 7.0, 20.0, 23.0, 29.0, 22.0, 46.0, 56.0, 63.0, 67.0, 78.0, 74.0, 67.0, 77.0, 67.0, 51.0, 51.0, 49.0, 45.0, 18.0, 19.0, 17.0, 9.0, 11.0, 6.0, 6.0, 2.0, 4.0, 1.0, 2.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.7685546875, -0.7430038452148438, -0.7174530029296875, -0.6919021606445312, -0.666351318359375, -0.6408004760742188, -0.6152496337890625, -0.5896987915039062, -0.56414794921875, -0.5385971069335938, -0.5130462646484375, -0.48749542236328125, -0.461944580078125, -0.43639373779296875, -0.4108428955078125, -0.38529205322265625, -0.3597412109375, -0.33419036865234375, -0.3086395263671875, -0.28308868408203125, -0.257537841796875, -0.23198699951171875, -0.2064361572265625, -0.18088531494140625, -0.15533447265625, -0.12978363037109375, -0.1042327880859375, -0.07868194580078125, -0.053131103515625, -0.02758026123046875, -0.0020294189453125, 0.02352142333984375, 0.049072265625, 0.07462310791015625, 0.1001739501953125, 0.12572479248046875, 0.151275634765625, 0.17682647705078125, 0.2023773193359375, 0.22792816162109375, 0.25347900390625, 0.27902984619140625, 0.3045806884765625, 0.33013153076171875, 0.355682373046875, 0.38123321533203125, 0.4067840576171875, 0.43233489990234375, 0.4578857421875, 0.48343658447265625, 0.5089874267578125, 0.5345382690429688, 0.560089111328125, 0.5856399536132812, 0.6111907958984375, 0.6367416381835938, 0.66229248046875, 0.6878433227539062, 0.7133941650390625, 0.7389450073242188, 0.764495849609375, 0.7900466918945312, 0.8155975341796875, 0.8411483764648438, 0.86669921875]}, "gradients/encoder.encoder.layers.12.attention.v_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 2.0, 2.0, 6.0, 9.0, 9.0, 8.0, 8.0, 21.0, 36.0, 57.0, 91.0, 127.0, 198.0, 273.0, 465.0, 787.0, 1432.0, 2606.0, 4954.0, 10610.0, 24842.0, 66212.0, 213713.0, 437678.0, 184068.0, 58340.0, 22134.0, 9660.0, 4627.0, 2374.0, 1300.0, 685.0, 427.0, 276.0, 155.0, 112.0, 64.0, 54.0, 39.0, 30.0, 14.0, 18.0, 9.0, 6.0, 9.0, 5.0, 5.0, 4.0, 1.0, 2.0, 2.0, 1.0], "bins": [-2.078125, -2.0194854736328125, -1.960845947265625, -1.9022064208984375, -1.84356689453125, -1.7849273681640625, -1.726287841796875, -1.6676483154296875, -1.6090087890625, -1.5503692626953125, -1.491729736328125, -1.4330902099609375, -1.37445068359375, -1.3158111572265625, -1.257171630859375, -1.1985321044921875, -1.139892578125, -1.0812530517578125, -1.022613525390625, -0.9639739990234375, -0.90533447265625, -0.8466949462890625, -0.788055419921875, -0.7294158935546875, -0.6707763671875, -0.6121368408203125, -0.553497314453125, -0.4948577880859375, -0.43621826171875, -0.3775787353515625, -0.318939208984375, -0.2602996826171875, -0.20166015625, -0.1430206298828125, -0.084381103515625, -0.0257415771484375, 0.03289794921875, 0.0915374755859375, 0.150177001953125, 0.2088165283203125, 0.2674560546875, 0.3260955810546875, 0.384735107421875, 0.4433746337890625, 0.50201416015625, 0.5606536865234375, 0.619293212890625, 0.6779327392578125, 0.736572265625, 0.7952117919921875, 0.853851318359375, 0.9124908447265625, 0.97113037109375, 1.0297698974609375, 1.088409423828125, 1.1470489501953125, 1.2056884765625, 1.2643280029296875, 1.322967529296875, 1.3816070556640625, 1.44024658203125, 1.4988861083984375, 1.557525634765625, 1.6161651611328125, 1.6748046875]}, "gradients/encoder.encoder.layers.12.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 2.0, 0.0, 4.0, 3.0, 5.0, 7.0, 9.0, 9.0, 9.0, 11.0, 25.0, 25.0, 24.0, 21.0, 32.0, 47.0, 49.0, 56.0, 66.0, 68.0, 53.0, 50.0, 61.0, 61.0, 43.0, 39.0, 34.0, 37.0, 31.0, 19.0, 21.0, 21.0, 19.0, 12.0, 6.0, 11.0, 6.0, 3.0, 3.0, 3.0, 2.0, 3.0, 1.0, 4.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.62109375, -3.511383056640625, -3.40167236328125, -3.291961669921875, -3.1822509765625, -3.072540283203125, -2.96282958984375, -2.853118896484375, -2.743408203125, -2.633697509765625, -2.52398681640625, -2.414276123046875, -2.3045654296875, -2.194854736328125, -2.08514404296875, -1.975433349609375, -1.86572265625, -1.756011962890625, -1.64630126953125, -1.536590576171875, -1.4268798828125, -1.317169189453125, -1.20745849609375, -1.097747802734375, -0.988037109375, -0.878326416015625, -0.76861572265625, -0.658905029296875, -0.5491943359375, -0.439483642578125, -0.32977294921875, -0.220062255859375, -0.1103515625, -0.000640869140625, 0.10906982421875, 0.218780517578125, 0.3284912109375, 0.438201904296875, 0.54791259765625, 0.657623291015625, 0.767333984375, 0.877044677734375, 0.98675537109375, 1.096466064453125, 1.2061767578125, 1.315887451171875, 1.42559814453125, 1.535308837890625, 1.64501953125, 1.754730224609375, 1.86444091796875, 1.974151611328125, 2.0838623046875, 2.193572998046875, 2.30328369140625, 2.412994384765625, 2.522705078125, 2.632415771484375, 2.74212646484375, 2.851837158203125, 2.9615478515625, 3.071258544921875, 3.18096923828125, 3.290679931640625, 3.400390625]}, "gradients/encoder.encoder.layers.12.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 3.0, 6.0, 5.0, 4.0, 6.0, 7.0, 16.0, 35.0, 41.0, 83.0, 113.0, 269.0, 484.0, 1211.0, 3622.0, 15446.0, 106375.0, 673903.0, 212981.0, 25690.0, 5311.0, 1630.0, 611.0, 312.0, 183.0, 82.0, 43.0, 30.0, 15.0, 13.0, 10.0, 8.0, 5.0, 2.0, 1.0, 4.0, 1.0, 3.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.0390625, -1.0027923583984375, -0.966522216796875, -0.9302520751953125, -0.89398193359375, -0.8577117919921875, -0.821441650390625, -0.7851715087890625, -0.7489013671875, -0.7126312255859375, -0.676361083984375, -0.6400909423828125, -0.60382080078125, -0.5675506591796875, -0.531280517578125, -0.4950103759765625, -0.458740234375, -0.4224700927734375, -0.386199951171875, -0.3499298095703125, -0.31365966796875, -0.2773895263671875, -0.241119384765625, -0.2048492431640625, -0.1685791015625, -0.1323089599609375, -0.096038818359375, -0.0597686767578125, -0.02349853515625, 0.0127716064453125, 0.049041748046875, 0.0853118896484375, 0.12158203125, 0.1578521728515625, 0.194122314453125, 0.2303924560546875, 0.26666259765625, 0.3029327392578125, 0.339202880859375, 0.3754730224609375, 0.4117431640625, 0.4480133056640625, 0.484283447265625, 0.5205535888671875, 0.55682373046875, 0.5930938720703125, 0.629364013671875, 0.6656341552734375, 0.701904296875, 0.7381744384765625, 0.774444580078125, 0.8107147216796875, 0.84698486328125, 0.8832550048828125, 0.919525146484375, 0.9557952880859375, 0.9920654296875, 1.0283355712890625, 1.064605712890625, 1.1008758544921875, 1.13714599609375, 1.1734161376953125, 1.209686279296875, 1.2459564208984375, 1.2822265625]}, "gradients/encoder.encoder.layers.12.attention.k_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 3.0, 3.0, 6.0, 8.0, 4.0, 15.0, 15.0, 23.0, 25.0, 47.0, 71.0, 82.0, 95.0, 89.0, 111.0, 86.0, 91.0, 59.0, 50.0, 37.0, 27.0, 17.0, 6.0, 12.0, 4.0, 5.0, 7.0, 3.0, 2.0, 2.0, 3.0, 0.0, 3.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-9.250640869140625e-05, -8.836574852466583e-05, -8.422508835792542e-05, -8.0084428191185e-05, -7.594376802444458e-05, -7.180310785770416e-05, -6.766244769096375e-05, -6.352178752422333e-05, -5.938112735748291e-05, -5.524046719074249e-05, -5.1099807024002075e-05, -4.695914685726166e-05, -4.281848669052124e-05, -3.867782652378082e-05, -3.4537166357040405e-05, -3.0396506190299988e-05, -2.625584602355957e-05, -2.2115185856819153e-05, -1.7974525690078735e-05, -1.3833865523338318e-05, -9.6932053565979e-06, -5.552545189857483e-06, -1.4118850231170654e-06, 2.728775143623352e-06, 6.8694353103637695e-06, 1.1010095477104187e-05, 1.5150755643844604e-05, 1.9291415810585022e-05, 2.343207597732544e-05, 2.7572736144065857e-05, 3.1713396310806274e-05, 3.585405647754669e-05, 3.999471664428711e-05, 4.413537681102753e-05, 4.8276036977767944e-05, 5.241669714450836e-05, 5.655735731124878e-05, 6.06980174779892e-05, 6.483867764472961e-05, 6.897933781147003e-05, 7.311999797821045e-05, 7.726065814495087e-05, 8.140131831169128e-05, 8.55419784784317e-05, 8.968263864517212e-05, 9.382329881191254e-05, 9.796395897865295e-05, 0.00010210461914539337, 0.00010624527931213379, 0.0001103859394788742, 0.00011452659964561462, 0.00011866725981235504, 0.00012280791997909546, 0.00012694858014583588, 0.0001310892403125763, 0.0001352299004793167, 0.00013937056064605713, 0.00014351122081279755, 0.00014765188097953796, 0.00015179254114627838, 0.0001559332013130188, 0.00016007386147975922, 0.00016421452164649963, 0.00016835518181324005, 0.00017249584197998047]}, "gradients/encoder.encoder.layers.12.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 3.0, 5.0, 5.0, 8.0, 9.0, 14.0, 50.0, 71.0, 164.0, 324.0, 838.0, 2954.0, 14626.0, 119410.0, 690601.0, 192739.0, 20939.0, 3930.0, 1086.0, 414.0, 162.0, 89.0, 53.0, 37.0, 17.0, 9.0, 3.0, 7.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.0830078125, -1.046417236328125, -1.00982666015625, -0.973236083984375, -0.9366455078125, -0.900054931640625, -0.86346435546875, -0.826873779296875, -0.790283203125, -0.753692626953125, -0.71710205078125, -0.680511474609375, -0.6439208984375, -0.607330322265625, -0.57073974609375, -0.534149169921875, -0.49755859375, -0.460968017578125, -0.42437744140625, -0.387786865234375, -0.3511962890625, -0.314605712890625, -0.27801513671875, -0.241424560546875, -0.204833984375, -0.168243408203125, -0.13165283203125, -0.095062255859375, -0.0584716796875, -0.021881103515625, 0.01470947265625, 0.051300048828125, 0.087890625, 0.124481201171875, 0.16107177734375, 0.197662353515625, 0.2342529296875, 0.270843505859375, 0.30743408203125, 0.344024658203125, 0.380615234375, 0.417205810546875, 0.45379638671875, 0.490386962890625, 0.5269775390625, 0.563568115234375, 0.60015869140625, 0.636749267578125, 0.67333984375, 0.709930419921875, 0.74652099609375, 0.783111572265625, 0.8197021484375, 0.856292724609375, 0.89288330078125, 0.929473876953125, 0.966064453125, 1.002655029296875, 1.03924560546875, 1.075836181640625, 1.1124267578125, 1.149017333984375, 1.18560791015625, 1.222198486328125, 1.2587890625]}, "gradients/encoder.encoder.layers.12.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0, 2.0, 1.0, 1.0, 5.0, 7.0, 11.0, 19.0, 22.0, 33.0, 55.0, 74.0, 104.0, 122.0, 134.0, 120.0, 94.0, 61.0, 48.0, 38.0, 16.0, 11.0, 10.0, 7.0, 10.0, 2.0, 3.0, 3.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.326171875, -1.2758331298828125, -1.225494384765625, -1.1751556396484375, -1.12481689453125, -1.0744781494140625, -1.024139404296875, -0.9738006591796875, -0.9234619140625, -0.8731231689453125, -0.822784423828125, -0.7724456787109375, -0.72210693359375, -0.6717681884765625, -0.621429443359375, -0.5710906982421875, -0.520751953125, -0.4704132080078125, -0.420074462890625, -0.3697357177734375, -0.31939697265625, -0.2690582275390625, -0.218719482421875, -0.1683807373046875, -0.1180419921875, -0.0677032470703125, -0.017364501953125, 0.0329742431640625, 0.08331298828125, 0.1336517333984375, 0.183990478515625, 0.2343292236328125, 0.28466796875, 0.3350067138671875, 0.385345458984375, 0.4356842041015625, 0.48602294921875, 0.5363616943359375, 0.586700439453125, 0.6370391845703125, 0.6873779296875, 0.7377166748046875, 0.788055419921875, 0.8383941650390625, 0.88873291015625, 0.9390716552734375, 0.989410400390625, 1.0397491455078125, 1.090087890625, 1.1404266357421875, 1.190765380859375, 1.2411041259765625, 1.29144287109375, 1.3417816162109375, 1.392120361328125, 1.4424591064453125, 1.4927978515625, 1.5431365966796875, 1.593475341796875, 1.6438140869140625, 1.69415283203125, 1.7444915771484375, 1.794830322265625, 1.8451690673828125, 1.8955078125]}, "gradients/encoder.encoder.layers.12.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 1.0, 4.0, 1.0, 1.0, 1.0, 1.0, 5.0, 2.0, 12.0, 16.0, 18.0, 33.0, 57.0, 82.0, 134.0, 161.0, 163.0, 111.0, 80.0, 44.0, 26.0, 22.0, 12.0, 14.0, 5.0, 6.0, 1.0, 1.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-12.96710205078125, -12.323904037475586, -11.680706024169922, -11.037508010864258, -10.394309997558594, -9.75111198425293, -9.107913970947266, -8.464715957641602, -7.821517467498779, -7.178319454193115, -6.535121440887451, -5.891922950744629, -5.248724937438965, -4.605526924133301, -3.9623289108276367, -3.3191308975219727, -2.6759328842163086, -2.0327348709106445, -1.389536738395691, -0.7463386058807373, -0.10314059257507324, 0.5400574207305908, 1.183255672454834, 1.826453685760498, 2.469651699066162, 3.112849712371826, 3.7560477256774902, 4.3992462158203125, 5.042444229125977, 5.685642242431641, 6.328840255737305, 6.972038269042969, 7.615236282348633, 8.258434295654297, 8.901632308959961, 9.544830322265625, 10.188028335571289, 10.831226348876953, 11.474424362182617, 12.117622375488281, 12.760820388793945, 13.40401840209961, 14.047216415405273, 14.690414428710938, 15.333612442016602, 15.976810455322266, 16.62000846862793, 17.263206481933594, 17.90640640258789, 18.549604415893555, 19.19280242919922, 19.836000442504883, 20.479198455810547, 21.12239646911621, 21.765594482421875, 22.40879249572754, 23.051990509033203, 23.695188522338867, 24.33838653564453, 24.981584548950195, 25.62478256225586, 26.267980575561523, 26.911178588867188, 27.55437660217285, 28.197574615478516]}, "gradients/encoder.encoder.layers.12.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 2.0, 0.0, 2.0, 6.0, 2.0, 0.0, 3.0, 3.0, 2.0, 9.0, 6.0, 7.0, 4.0, 14.0, 15.0, 11.0, 15.0, 23.0, 17.0, 29.0, 28.0, 32.0, 28.0, 33.0, 33.0, 41.0, 33.0, 47.0, 48.0, 41.0, 43.0, 43.0, 30.0, 42.0, 46.0, 34.0, 32.0, 27.0, 24.0, 25.0, 16.0, 17.0, 18.0, 15.0, 12.0, 9.0, 8.0, 10.0, 8.0, 6.0, 4.0, 4.0, 0.0, 2.0, 2.0, 3.0, 1.0, 1.0, 2.0, 1.0, 2.0], "bins": [-15.823076248168945, -15.334844589233398, -14.846612930297852, -14.358381271362305, -13.870148658752441, -13.381916999816895, -12.893685340881348, -12.4054536819458, -11.917221069335938, -11.42898941040039, -10.940757751464844, -10.452526092529297, -9.964293479919434, -9.476061820983887, -8.98783016204834, -8.499598503112793, -8.011366844177246, -7.523135185241699, -7.034903049468994, -6.546671390533447, -6.058439254760742, -5.570207595825195, -5.081975936889648, -4.593744277954102, -4.1055121421813965, -3.6172802448272705, -3.1290483474731445, -2.6408166885375977, -2.1525847911834717, -1.6643528938293457, -1.1761212348937988, -0.6878893375396729, -0.19965744018554688, 0.2885743975639343, 0.7768062353134155, 1.265038013458252, 1.753269910812378, 2.241501808166504, 2.729733467102051, 3.2179653644561768, 3.7061972618103027, 4.19442892074585, 4.682661056518555, 5.170892715454102, 5.659124374389648, 6.1473565101623535, 6.6355881690979, 7.1238203048706055, 7.612051963806152, 8.1002836227417, 8.588515281677246, 9.07674789428711, 9.564979553222656, 10.053211212158203, 10.54144287109375, 11.029674530029297, 11.517906188964844, 12.00613784790039, 12.494369506835938, 12.982601165771484, 13.470833778381348, 13.959065437316895, 14.447297096252441, 14.935528755187988, 15.423761367797852]}, "gradients/encoder.encoder.layers.11.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 2.0, 0.0, 3.0, 3.0, 10.0, 7.0, 10.0, 22.0, 37.0, 59.0, 91.0, 178.0, 299.0, 472.0, 922.0, 1766.0, 3903.0, 11945.0, 270217.0, 3883496.0, 12736.0, 4202.0, 1860.0, 887.0, 455.0, 277.0, 177.0, 86.0, 53.0, 32.0, 27.0, 22.0, 16.0, 6.0, 5.0, 4.0, 2.0, 3.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0], "bins": [-4.68359375, -4.54986572265625, -4.4161376953125, -4.28240966796875, -4.148681640625, -4.01495361328125, -3.8812255859375, -3.74749755859375, -3.61376953125, -3.48004150390625, -3.3463134765625, -3.21258544921875, -3.078857421875, -2.94512939453125, -2.8114013671875, -2.67767333984375, -2.5439453125, -2.41021728515625, -2.2764892578125, -2.14276123046875, -2.009033203125, -1.87530517578125, -1.7415771484375, -1.60784912109375, -1.47412109375, -1.34039306640625, -1.2066650390625, -1.07293701171875, -0.939208984375, -0.80548095703125, -0.6717529296875, -0.53802490234375, -0.404296875, -0.27056884765625, -0.1368408203125, -0.00311279296875, 0.130615234375, 0.26434326171875, 0.3980712890625, 0.53179931640625, 0.66552734375, 0.79925537109375, 0.9329833984375, 1.06671142578125, 1.200439453125, 1.33416748046875, 1.4678955078125, 1.60162353515625, 1.7353515625, 1.86907958984375, 2.0028076171875, 2.13653564453125, 2.270263671875, 2.40399169921875, 2.5377197265625, 2.67144775390625, 2.80517578125, 2.93890380859375, 3.0726318359375, 3.20635986328125, 3.340087890625, 3.47381591796875, 3.6075439453125, 3.74127197265625, 3.875]}, "gradients/encoder.encoder.layers.11.feed_forward.output_dense.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 3.0, 4.0, 4.0, 5.0, 8.0, 14.0, 13.0, 16.0, 20.0, 18.0, 22.0, 36.0, 39.0, 47.0, 45.0, 60.0, 51.0, 69.0, 60.0, 67.0, 52.0, 64.0, 41.0, 44.0, 44.0, 26.0, 32.0, 26.0, 23.0, 14.0, 7.0, 6.0, 6.0, 7.0, 3.0, 1.0, 9.0, 1.0, 4.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.87255859375, -0.8419418334960938, -0.8113250732421875, -0.7807083129882812, -0.750091552734375, -0.7194747924804688, -0.6888580322265625, -0.6582412719726562, -0.62762451171875, -0.5970077514648438, -0.5663909912109375, -0.5357742309570312, -0.505157470703125, -0.47454071044921875, -0.4439239501953125, -0.41330718994140625, -0.3826904296875, -0.35207366943359375, -0.3214569091796875, -0.29084014892578125, -0.260223388671875, -0.22960662841796875, -0.1989898681640625, -0.16837310791015625, -0.13775634765625, -0.10713958740234375, -0.0765228271484375, -0.04590606689453125, -0.015289306640625, 0.01532745361328125, 0.0459442138671875, 0.07656097412109375, 0.107177734375, 0.13779449462890625, 0.1684112548828125, 0.19902801513671875, 0.229644775390625, 0.26026153564453125, 0.2908782958984375, 0.32149505615234375, 0.35211181640625, 0.38272857666015625, 0.4133453369140625, 0.44396209716796875, 0.474578857421875, 0.5051956176757812, 0.5358123779296875, 0.5664291381835938, 0.5970458984375, 0.6276626586914062, 0.6582794189453125, 0.6888961791992188, 0.719512939453125, 0.7501296997070312, 0.7807464599609375, 0.8113632202148438, 0.84197998046875, 0.8725967407226562, 0.9032135009765625, 0.9338302612304688, 0.964447021484375, 0.9950637817382812, 1.0256805419921875, 1.0562973022460938, 1.0869140625]}, "gradients/encoder.encoder.layers.11.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 1.0, 5.0, 4.0, 6.0, 8.0, 14.0, 13.0, 27.0, 26.0, 44.0, 65.0, 82.0, 111.0, 158.0, 251.0, 363.0, 569.0, 894.0, 1675.0, 3423.0, 8670.0, 32594.0, 4038652.0, 82379.0, 13899.0, 4933.0, 2183.0, 1124.0, 702.0, 436.0, 266.0, 208.0, 142.0, 106.0, 82.0, 41.0, 33.0, 35.0, 19.0, 18.0, 11.0, 8.0, 6.0, 3.0, 3.0, 3.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.771484375, -2.667694091796875, -2.56390380859375, -2.460113525390625, -2.3563232421875, -2.252532958984375, -2.14874267578125, -2.044952392578125, -1.941162109375, -1.837371826171875, -1.73358154296875, -1.629791259765625, -1.5260009765625, -1.422210693359375, -1.31842041015625, -1.214630126953125, -1.11083984375, -1.007049560546875, -0.90325927734375, -0.799468994140625, -0.6956787109375, -0.591888427734375, -0.48809814453125, -0.384307861328125, -0.280517578125, -0.176727294921875, -0.07293701171875, 0.030853271484375, 0.1346435546875, 0.238433837890625, 0.34222412109375, 0.446014404296875, 0.5498046875, 0.653594970703125, 0.75738525390625, 0.861175537109375, 0.9649658203125, 1.068756103515625, 1.17254638671875, 1.276336669921875, 1.380126953125, 1.483917236328125, 1.58770751953125, 1.691497802734375, 1.7952880859375, 1.899078369140625, 2.00286865234375, 2.106658935546875, 2.21044921875, 2.314239501953125, 2.41802978515625, 2.521820068359375, 2.6256103515625, 2.729400634765625, 2.83319091796875, 2.936981201171875, 3.040771484375, 3.144561767578125, 3.24835205078125, 3.352142333984375, 3.4559326171875, 3.559722900390625, 3.66351318359375, 3.767303466796875, 3.87109375]}, "gradients/encoder.encoder.layers.11.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 3.0, 3.0, 10.0, 9.0, 10.0, 19.0, 29.0, 77.0, 3662.0, 173.0, 30.0, 21.0, 10.0, 6.0, 9.0, 2.0, 1.0, 5.0, 3.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.77783203125, -0.7487411499023438, -0.7196502685546875, -0.6905593872070312, -0.661468505859375, -0.6323776245117188, -0.6032867431640625, -0.5741958618164062, -0.54510498046875, -0.5160140991210938, -0.4869232177734375, -0.45783233642578125, -0.428741455078125, -0.39965057373046875, -0.3705596923828125, -0.34146881103515625, -0.3123779296875, -0.28328704833984375, -0.2541961669921875, -0.22510528564453125, -0.196014404296875, -0.16692352294921875, -0.1378326416015625, -0.10874176025390625, -0.07965087890625, -0.05055999755859375, -0.0214691162109375, 0.00762176513671875, 0.036712646484375, 0.06580352783203125, 0.0948944091796875, 0.12398529052734375, 0.153076171875, 0.18216705322265625, 0.2112579345703125, 0.24034881591796875, 0.269439697265625, 0.29853057861328125, 0.3276214599609375, 0.35671234130859375, 0.38580322265625, 0.41489410400390625, 0.4439849853515625, 0.47307586669921875, 0.502166748046875, 0.5312576293945312, 0.5603485107421875, 0.5894393920898438, 0.6185302734375, 0.6476211547851562, 0.6767120361328125, 0.7058029174804688, 0.734893798828125, 0.7639846801757812, 0.7930755615234375, 0.8221664428710938, 0.85125732421875, 0.8803482055664062, 0.9094390869140625, 0.9385299682617188, 0.967620849609375, 0.9967117309570312, 1.0258026123046875, 1.0548934936523438, 1.083984375]}, "gradients/encoder.encoder.layers.11.final_layer_norm.weight": {"_type": "histogram", "values": [3.0, 13.0, 26.0, 92.0, 213.0, 349.0, 204.0, 74.0, 26.0, 8.0, 5.0, 2.0, 2.0, 1.0, 0.0, 3.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.9026222229003906, -0.738679051399231, -0.5747358798980713, -0.41079264879226685, -0.24684947729110718, -0.08290630578994751, 0.08103692531585693, 0.2449800968170166, 0.40892326831817627, 0.5728664398193359, 0.7368096113204956, 0.9007528424263, 1.0646960735321045, 1.2286391258239746, 1.3925824165344238, 1.5565255880355835, 1.7204687595367432, 1.8844119310379028, 2.0483551025390625, 2.2122983932495117, 2.376241445541382, 2.540184736251831, 2.704127788543701, 2.8680710792541504, 3.0320143699645996, 3.195957660675049, 3.359900712966919, 3.523844003677368, 3.6877870559692383, 3.8517303466796875, 4.015673637390137, 4.179616928100586, 4.343559741973877, 4.507503032684326, 4.671446323394775, 4.835389137268066, 4.999332427978516, 5.163275718688965, 5.327219009399414, 5.491162300109863, 5.655105113983154, 5.8190484046936035, 5.982991695404053, 6.146934509277344, 6.310877799987793, 6.474821090698242, 6.638764381408691, 6.802707672119141, 6.96665096282959, 7.130594253540039, 7.294537544250488, 7.458480358123779, 7.6224236488342285, 7.786366939544678, 7.950310230255127, 8.114253044128418, 8.278196334838867, 8.442139625549316, 8.606082916259766, 8.770026206970215, 8.933969497680664, 9.097911834716797, 9.261855125427246, 9.425798416137695, 9.589741706848145]}, "gradients/encoder.encoder.layers.11.final_layer_norm.bias": {"_type": "histogram", "values": [3.0, 0.0, 1.0, 3.0, 8.0, 10.0, 6.0, 7.0, 12.0, 7.0, 12.0, 17.0, 22.0, 22.0, 26.0, 30.0, 40.0, 27.0, 52.0, 37.0, 49.0, 59.0, 53.0, 41.0, 49.0, 53.0, 59.0, 40.0, 44.0, 40.0, 26.0, 27.0, 30.0, 23.0, 19.0, 8.0, 11.0, 11.0, 7.0, 4.0, 4.0, 5.0, 2.0, 2.0, 4.0, 3.0, 2.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.9365293383598328, -0.8964431881904602, -0.8563570380210876, -0.8162708878517151, -0.7761847376823425, -0.73609858751297, -0.6960123777389526, -0.6559262275695801, -0.6158400774002075, -0.575753927230835, -0.5356677770614624, -0.49558162689208984, -0.4554954767227173, -0.4154093265533447, -0.3753231465816498, -0.3352369964122772, -0.29515087604522705, -0.2550647258758545, -0.21497857570648193, -0.17489241063594818, -0.13480626046657562, -0.09472011029720306, -0.05463394522666931, -0.014547795057296753, 0.025538355112075806, 0.06562450528144836, 0.10571066290140152, 0.14579682052135468, 0.18588297069072723, 0.2259691208600998, 0.26605528593063354, 0.3061414361000061, 0.3462275266647339, 0.38631367683410645, 0.426399827003479, 0.46648597717285156, 0.5065721273422241, 0.5466582775115967, 0.5867444276809692, 0.6268305778503418, 0.6669167280197144, 0.7070028781890869, 0.7470890283584595, 0.787175178527832, 0.8272613286972046, 0.8673474788665771, 0.9074336290359497, 0.9475197792053223, 0.9876059889793396, 1.027692198753357, 1.0677783489227295, 1.107864499092102, 1.1479506492614746, 1.1880367994308472, 1.2281229496002197, 1.2682090997695923, 1.3082952499389648, 1.3483814001083374, 1.38846755027771, 1.4285537004470825, 1.468639850616455, 1.5087260007858276, 1.5488121509552002, 1.5888983011245728, 1.6289844512939453]}, "gradients/encoder.encoder.layers.11.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 3.0, 1.0, 1.0, 2.0, 8.0, 7.0, 6.0, 7.0, 12.0, 12.0, 28.0, 42.0, 64.0, 81.0, 128.0, 190.0, 274.0, 435.0, 610.0, 1060.0, 1781.0, 3047.0, 5486.0, 10042.0, 20046.0, 44667.0, 107843.0, 253185.0, 315632.0, 160368.0, 65272.0, 28045.0, 13285.0, 7141.0, 3885.0, 2238.0, 1275.0, 781.0, 548.0, 362.0, 209.0, 146.0, 104.0, 65.0, 41.0, 28.0, 19.0, 14.0, 12.0, 9.0, 7.0, 2.0, 3.0, 3.0, 1.0, 4.0, 2.0, 2.0], "bins": [-2.109375, -2.0477294921875, -1.986083984375, -1.9244384765625, -1.86279296875, -1.8011474609375, -1.739501953125, -1.6778564453125, -1.6162109375, -1.5545654296875, -1.492919921875, -1.4312744140625, -1.36962890625, -1.3079833984375, -1.246337890625, -1.1846923828125, -1.123046875, -1.0614013671875, -0.999755859375, -0.9381103515625, -0.87646484375, -0.8148193359375, -0.753173828125, -0.6915283203125, -0.6298828125, -0.5682373046875, -0.506591796875, -0.4449462890625, -0.38330078125, -0.3216552734375, -0.260009765625, -0.1983642578125, -0.13671875, -0.0750732421875, -0.013427734375, 0.0482177734375, 0.10986328125, 0.1715087890625, 0.233154296875, 0.2947998046875, 0.3564453125, 0.4180908203125, 0.479736328125, 0.5413818359375, 0.60302734375, 0.6646728515625, 0.726318359375, 0.7879638671875, 0.849609375, 0.9112548828125, 0.972900390625, 1.0345458984375, 1.09619140625, 1.1578369140625, 1.219482421875, 1.2811279296875, 1.3427734375, 1.4044189453125, 1.466064453125, 1.5277099609375, 1.58935546875, 1.6510009765625, 1.712646484375, 1.7742919921875, 1.8359375]}, "gradients/encoder.encoder.layers.11.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 3.0, 4.0, 7.0, 4.0, 11.0, 14.0, 10.0, 12.0, 15.0, 18.0, 21.0, 22.0, 32.0, 44.0, 46.0, 34.0, 46.0, 62.0, 51.0, 56.0, 59.0, 44.0, 46.0, 54.0, 58.0, 46.0, 30.0, 30.0, 24.0, 21.0, 20.0, 19.0, 11.0, 4.0, 6.0, 6.0, 3.0, 0.0, 5.0, 7.0, 2.0, 2.0, 4.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.91845703125, -0.8895950317382812, -0.8607330322265625, -0.8318710327148438, -0.803009033203125, -0.7741470336914062, -0.7452850341796875, -0.7164230346679688, -0.68756103515625, -0.6586990356445312, -0.6298370361328125, -0.6009750366210938, -0.572113037109375, -0.5432510375976562, -0.5143890380859375, -0.48552703857421875, -0.4566650390625, -0.42780303955078125, -0.3989410400390625, -0.37007904052734375, -0.341217041015625, -0.31235504150390625, -0.2834930419921875, -0.25463104248046875, -0.22576904296875, -0.19690704345703125, -0.1680450439453125, -0.13918304443359375, -0.110321044921875, -0.08145904541015625, -0.0525970458984375, -0.02373504638671875, 0.005126953125, 0.03398895263671875, 0.0628509521484375, 0.09171295166015625, 0.120574951171875, 0.14943695068359375, 0.1782989501953125, 0.20716094970703125, 0.23602294921875, 0.26488494873046875, 0.2937469482421875, 0.32260894775390625, 0.351470947265625, 0.38033294677734375, 0.4091949462890625, 0.43805694580078125, 0.4669189453125, 0.49578094482421875, 0.5246429443359375, 0.5535049438476562, 0.582366943359375, 0.6112289428710938, 0.6400909423828125, 0.6689529418945312, 0.69781494140625, 0.7266769409179688, 0.7555389404296875, 0.7844009399414062, 0.813262939453125, 0.8421249389648438, 0.8709869384765625, 0.8998489379882812, 0.9287109375]}, "gradients/encoder.encoder.layers.11.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 3.0, 0.0, 0.0, 4.0, 4.0, 6.0, 10.0, 9.0, 7.0, 14.0, 32.0, 26.0, 35.0, 63.0, 83.0, 137.0, 256.0, 514.0, 919.0, 2190.0, 5142.0, 14393.0, 50659.0, 265627.0, 550698.0, 115714.0, 26923.0, 8608.0, 3364.0, 1490.0, 695.0, 379.0, 190.0, 107.0, 80.0, 61.0, 24.0, 32.0, 14.0, 9.0, 13.0, 10.0, 5.0, 5.0, 1.0, 8.0, 2.0, 0.0, 1.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0], "bins": [-3.416015625, -3.313323974609375, -3.21063232421875, -3.107940673828125, -3.0052490234375, -2.902557373046875, -2.79986572265625, -2.697174072265625, -2.594482421875, -2.491790771484375, -2.38909912109375, -2.286407470703125, -2.1837158203125, -2.081024169921875, -1.97833251953125, -1.875640869140625, -1.77294921875, -1.670257568359375, -1.56756591796875, -1.464874267578125, -1.3621826171875, -1.259490966796875, -1.15679931640625, -1.054107666015625, -0.951416015625, -0.848724365234375, -0.74603271484375, -0.643341064453125, -0.5406494140625, -0.437957763671875, -0.33526611328125, -0.232574462890625, -0.1298828125, -0.027191162109375, 0.07550048828125, 0.178192138671875, 0.2808837890625, 0.383575439453125, 0.48626708984375, 0.588958740234375, 0.691650390625, 0.794342041015625, 0.89703369140625, 0.999725341796875, 1.1024169921875, 1.205108642578125, 1.30780029296875, 1.410491943359375, 1.51318359375, 1.615875244140625, 1.71856689453125, 1.821258544921875, 1.9239501953125, 2.026641845703125, 2.12933349609375, 2.232025146484375, 2.334716796875, 2.437408447265625, 2.54010009765625, 2.642791748046875, 2.7454833984375, 2.848175048828125, 2.95086669921875, 3.053558349609375, 3.15625]}, "gradients/encoder.encoder.layers.11.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 3.0, 4.0, 3.0, 6.0, 5.0, 5.0, 6.0, 4.0, 13.0, 19.0, 16.0, 18.0, 22.0, 50.0, 38.0, 46.0, 52.0, 73.0, 69.0, 65.0, 64.0, 74.0, 64.0, 56.0, 41.0, 27.0, 34.0, 33.0, 31.0, 16.0, 14.0, 9.0, 6.0, 4.0, 5.0, 5.0, 6.0, 6.0, 3.0, 3.0, 0.0, 0.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.4140625, -4.211669921875, -4.00927734375, -3.806884765625, -3.6044921875, -3.402099609375, -3.19970703125, -2.997314453125, -2.794921875, -2.592529296875, -2.39013671875, -2.187744140625, -1.9853515625, -1.782958984375, -1.58056640625, -1.378173828125, -1.17578125, -0.973388671875, -0.77099609375, -0.568603515625, -0.3662109375, -0.163818359375, 0.03857421875, 0.240966796875, 0.443359375, 0.645751953125, 0.84814453125, 1.050537109375, 1.2529296875, 1.455322265625, 1.65771484375, 1.860107421875, 2.0625, 2.264892578125, 2.46728515625, 2.669677734375, 2.8720703125, 3.074462890625, 3.27685546875, 3.479248046875, 3.681640625, 3.884033203125, 4.08642578125, 4.288818359375, 4.4912109375, 4.693603515625, 4.89599609375, 5.098388671875, 5.30078125, 5.503173828125, 5.70556640625, 5.907958984375, 6.1103515625, 6.312744140625, 6.51513671875, 6.717529296875, 6.919921875, 7.122314453125, 7.32470703125, 7.527099609375, 7.7294921875, 7.931884765625, 8.13427734375, 8.336669921875, 8.5390625]}, "gradients/encoder.encoder.layers.11.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 4.0, 7.0, 8.0, 11.0, 19.0, 45.0, 85.0, 211.0, 515.0, 2033.0, 25169.0, 1006940.0, 11402.0, 1366.0, 409.0, 162.0, 87.0, 32.0, 30.0, 10.0, 10.0, 8.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.2890625, -6.0201416015625, -5.751220703125, -5.4822998046875, -5.21337890625, -4.9444580078125, -4.675537109375, -4.4066162109375, -4.1376953125, -3.8687744140625, -3.599853515625, -3.3309326171875, -3.06201171875, -2.7930908203125, -2.524169921875, -2.2552490234375, -1.986328125, -1.7174072265625, -1.448486328125, -1.1795654296875, -0.91064453125, -0.6417236328125, -0.372802734375, -0.1038818359375, 0.1650390625, 0.4339599609375, 0.702880859375, 0.9718017578125, 1.24072265625, 1.5096435546875, 1.778564453125, 2.0474853515625, 2.31640625, 2.5853271484375, 2.854248046875, 3.1231689453125, 3.39208984375, 3.6610107421875, 3.929931640625, 4.1988525390625, 4.4677734375, 4.7366943359375, 5.005615234375, 5.2745361328125, 5.54345703125, 5.8123779296875, 6.081298828125, 6.3502197265625, 6.619140625, 6.8880615234375, 7.156982421875, 7.4259033203125, 7.69482421875, 7.9637451171875, 8.232666015625, 8.5015869140625, 8.7705078125, 9.0394287109375, 9.308349609375, 9.5772705078125, 9.84619140625, 10.1151123046875, 10.384033203125, 10.6529541015625, 10.921875]}, "gradients/encoder.encoder.layers.11.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 2.0, 2.0, 2.0, 1.0, 2.0, 3.0, 9.0, 11.0, 6.0, 15.0, 31.0, 32.0, 88.0, 110.0, 173.0, 154.0, 144.0, 83.0, 44.0, 37.0, 12.0, 17.0, 11.0, 9.0, 5.0, 2.0, 0.0, 1.0, 1.0, 2.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.0002753734588623047, -0.00026654452085494995, -0.0002577155828475952, -0.0002488866448402405, -0.00024005770683288574, -0.000231228768825531, -0.00022239983081817627, -0.00021357089281082153, -0.0002047419548034668, -0.00019591301679611206, -0.00018708407878875732, -0.0001782551407814026, -0.00016942620277404785, -0.00016059726476669312, -0.00015176832675933838, -0.00014293938875198364, -0.0001341104507446289, -0.00012528151273727417, -0.00011645257472991943, -0.0001076236367225647, -9.879469871520996e-05, -8.996576070785522e-05, -8.113682270050049e-05, -7.230788469314575e-05, -6.347894668579102e-05, -5.465000867843628e-05, -4.582107067108154e-05, -3.699213266372681e-05, -2.816319465637207e-05, -1.9334256649017334e-05, -1.0505318641662598e-05, -1.6763806343078613e-06, 7.152557373046875e-06, 1.598149538040161e-05, 2.4810433387756348e-05, 3.3639371395111084e-05, 4.246830940246582e-05, 5.129724740982056e-05, 6.012618541717529e-05, 6.895512342453003e-05, 7.778406143188477e-05, 8.66129994392395e-05, 9.544193744659424e-05, 0.00010427087545394897, 0.00011309981346130371, 0.00012192875146865845, 0.00013075768947601318, 0.00013958662748336792, 0.00014841556549072266, 0.0001572445034980774, 0.00016607344150543213, 0.00017490237951278687, 0.0001837313175201416, 0.00019256025552749634, 0.00020138919353485107, 0.0002102181315422058, 0.00021904706954956055, 0.00022787600755691528, 0.00023670494556427002, 0.00024553388357162476, 0.0002543628215789795, 0.00026319175958633423, 0.00027202069759368896, 0.0002808496356010437, 0.00028967857360839844]}, "gradients/encoder.encoder.layers.11.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 2.0, 5.0, 3.0, 4.0, 3.0, 8.0, 11.0, 18.0, 26.0, 44.0, 83.0, 141.0, 250.0, 548.0, 1339.0, 4008.0, 16189.0, 274219.0, 715354.0, 27591.0, 5565.0, 1733.0, 718.0, 307.0, 180.0, 69.0, 50.0, 31.0, 20.0, 20.0, 8.0, 5.0, 3.0, 3.0, 3.0, 3.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-3.080078125, -2.98712158203125, -2.8941650390625, -2.80120849609375, -2.708251953125, -2.61529541015625, -2.5223388671875, -2.42938232421875, -2.33642578125, -2.24346923828125, -2.1505126953125, -2.05755615234375, -1.964599609375, -1.87164306640625, -1.7786865234375, -1.68572998046875, -1.5927734375, -1.49981689453125, -1.4068603515625, -1.31390380859375, -1.220947265625, -1.12799072265625, -1.0350341796875, -0.94207763671875, -0.84912109375, -0.75616455078125, -0.6632080078125, -0.57025146484375, -0.477294921875, -0.38433837890625, -0.2913818359375, -0.19842529296875, -0.10546875, -0.01251220703125, 0.0804443359375, 0.17340087890625, 0.266357421875, 0.35931396484375, 0.4522705078125, 0.54522705078125, 0.63818359375, 0.73114013671875, 0.8240966796875, 0.91705322265625, 1.010009765625, 1.10296630859375, 1.1959228515625, 1.28887939453125, 1.3818359375, 1.47479248046875, 1.5677490234375, 1.66070556640625, 1.753662109375, 1.84661865234375, 1.9395751953125, 2.03253173828125, 2.12548828125, 2.21844482421875, 2.3114013671875, 2.40435791015625, 2.497314453125, 2.59027099609375, 2.6832275390625, 2.77618408203125, 2.869140625]}, "gradients/encoder.encoder.layers.11.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 2.0, 5.0, 3.0, 4.0, 5.0, 1.0, 1.0, 6.0, 11.0, 23.0, 27.0, 57.0, 105.0, 203.0, 256.0, 154.0, 70.0, 29.0, 20.0, 7.0, 8.0, 7.0, 1.0, 0.0, 1.0, 2.0, 2.0, 3.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.849609375, -3.675384521484375, -3.50115966796875, -3.326934814453125, -3.1527099609375, -2.978485107421875, -2.80426025390625, -2.630035400390625, -2.455810546875, -2.281585693359375, -2.10736083984375, -1.933135986328125, -1.7589111328125, -1.584686279296875, -1.41046142578125, -1.236236572265625, -1.06201171875, -0.887786865234375, -0.71356201171875, -0.539337158203125, -0.3651123046875, -0.190887451171875, -0.01666259765625, 0.157562255859375, 0.331787109375, 0.506011962890625, 0.68023681640625, 0.854461669921875, 1.0286865234375, 1.202911376953125, 1.37713623046875, 1.551361083984375, 1.7255859375, 1.899810791015625, 2.07403564453125, 2.248260498046875, 2.4224853515625, 2.596710205078125, 2.77093505859375, 2.945159912109375, 3.119384765625, 3.293609619140625, 3.46783447265625, 3.642059326171875, 3.8162841796875, 3.990509033203125, 4.16473388671875, 4.338958740234375, 4.51318359375, 4.687408447265625, 4.86163330078125, 5.035858154296875, 5.2100830078125, 5.384307861328125, 5.55853271484375, 5.732757568359375, 5.906982421875, 6.081207275390625, 6.25543212890625, 6.429656982421875, 6.6038818359375, 6.778106689453125, 6.95233154296875, 7.126556396484375, 7.30078125]}, "gradients/encoder.encoder.layers.11.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 4.0, 9.0, 22.0, 59.0, 160.0, 337.0, 274.0, 97.0, 35.0, 12.0, 6.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-72.20555877685547, -69.6031494140625, -67.000732421875, -64.39832305908203, -61.7959098815918, -59.19349670410156, -56.59108352661133, -53.988670349121094, -51.386260986328125, -48.78384780883789, -46.181434631347656, -43.57902526855469, -40.97661209106445, -38.37419891357422, -35.771785736083984, -33.16937255859375, -30.566959381103516, -27.96454620361328, -25.36213493347168, -22.759721755981445, -20.157310485839844, -17.55489730834961, -14.952484130859375, -12.350072860717773, -9.747659683227539, -7.145247459411621, -4.542834758758545, -1.9404220581054688, 0.6619901657104492, 3.264402389526367, 5.866815567016602, 8.469226837158203, 11.071640014648438, 13.674052238464355, 16.276464462280273, 18.878877639770508, 21.48128890991211, 24.083702087402344, 26.686115264892578, 29.28852653503418, 31.890939712524414, 34.493350982666016, 37.09576416015625, 39.698177337646484, 42.30059051513672, 44.90299987792969, 47.50541687011719, 50.107826232910156, 52.71023941040039, 55.312652587890625, 57.91506576538086, 60.517478942871094, 63.11988830566406, 65.72230529785156, 68.32471466064453, 70.9271240234375, 73.529541015625, 76.13195037841797, 78.73436737060547, 81.33677673339844, 83.93919372558594, 86.5416030883789, 89.14401245117188, 91.74642944335938, 94.34883880615234]}, "gradients/encoder.encoder.layers.11.layer_norm.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 2.0, 0.0, 3.0, 0.0, 2.0, 1.0, 0.0, 1.0, 0.0, 5.0, 5.0, 6.0, 12.0, 19.0, 16.0, 28.0, 21.0, 30.0, 33.0, 27.0, 53.0, 55.0, 62.0, 65.0, 59.0, 61.0, 66.0, 58.0, 57.0, 32.0, 41.0, 48.0, 21.0, 24.0, 20.0, 13.0, 11.0, 9.0, 7.0, 13.0, 6.0, 9.0, 7.0, 2.0, 2.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-34.153141021728516, -33.114498138427734, -32.07585525512695, -31.037214279174805, -29.998571395874023, -28.959928512573242, -27.921287536621094, -26.882644653320312, -25.84400177001953, -24.80535888671875, -23.76671600341797, -22.72807502746582, -21.68943214416504, -20.650789260864258, -19.61214828491211, -18.573505401611328, -17.534862518310547, -16.496219635009766, -15.4575777053833, -14.418935775756836, -13.380292892456055, -12.341650009155273, -11.303008079528809, -10.264366149902344, -9.225723266601562, -8.187080383300781, -7.148438453674316, -6.109796047210693, -5.07115364074707, -4.032511234283447, -2.993868827819824, -1.9552264213562012, -0.9165802001953125, 0.12206220626831055, 1.1607046127319336, 2.1993470191955566, 3.2379894256591797, 4.276631832122803, 5.315274238586426, 6.353916645050049, 7.392559051513672, 8.431201934814453, 9.469843864440918, 10.508485794067383, 11.547128677368164, 12.585771560668945, 13.62441349029541, 14.663055419921875, 15.701698303222656, 16.740341186523438, 17.77898406982422, 18.817625045776367, 19.85626792907715, 20.89491081237793, 21.933551788330078, 22.97219467163086, 24.01083755493164, 25.049480438232422, 26.088123321533203, 27.12676429748535, 28.165407180786133, 29.204050064086914, 30.242691040039062, 31.281333923339844, 32.319976806640625]}, "gradients/encoder.encoder.layers.10.feed_forward.output_dense.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 4.0, 2.0, 1.0, 2.0, 0.0, 3.0, 7.0, 3.0, 7.0, 14.0, 12.0, 12.0, 21.0, 23.0, 27.0, 38.0, 50.0, 88.0, 129.0, 188.0, 343.0, 596.0, 1151.0, 2644.0, 7322.0, 41235.0, 4080213.0, 46483.0, 8149.0, 2776.0, 1281.0, 611.0, 349.0, 176.0, 109.0, 81.0, 49.0, 34.0, 14.0, 14.0, 13.0, 5.0, 11.0, 1.0, 2.0, 1.0, 1.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.23828125, -5.06085205078125, -4.8834228515625, -4.70599365234375, -4.528564453125, -4.35113525390625, -4.1737060546875, -3.99627685546875, -3.81884765625, -3.64141845703125, -3.4639892578125, -3.28656005859375, -3.109130859375, -2.93170166015625, -2.7542724609375, -2.57684326171875, -2.3994140625, -2.22198486328125, -2.0445556640625, -1.86712646484375, -1.689697265625, -1.51226806640625, -1.3348388671875, -1.15740966796875, -0.97998046875, -0.80255126953125, -0.6251220703125, -0.44769287109375, -0.270263671875, -0.09283447265625, 0.0845947265625, 0.26202392578125, 0.439453125, 0.61688232421875, 0.7943115234375, 0.97174072265625, 1.149169921875, 1.32659912109375, 1.5040283203125, 1.68145751953125, 1.85888671875, 2.03631591796875, 2.2137451171875, 2.39117431640625, 2.568603515625, 2.74603271484375, 2.9234619140625, 3.10089111328125, 3.2783203125, 3.45574951171875, 3.6331787109375, 3.81060791015625, 3.988037109375, 4.16546630859375, 4.3428955078125, 4.52032470703125, 4.69775390625, 4.87518310546875, 5.0526123046875, 5.23004150390625, 5.407470703125, 5.58489990234375, 5.7623291015625, 5.93975830078125, 6.1171875]}, "gradients/encoder.encoder.layers.10.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 3.0, 0.0, 2.0, 1.0, 2.0, 4.0, 6.0, 2.0, 12.0, 7.0, 15.0, 22.0, 36.0, 36.0, 74.0, 78.0, 100.0, 110.0, 106.0, 99.0, 83.0, 52.0, 42.0, 27.0, 26.0, 19.0, 20.0, 7.0, 6.0, 6.0, 2.0, 1.0, 5.0, 0.0, 1.0, 1.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.107421875, -2.0435333251953125, -1.979644775390625, -1.9157562255859375, -1.85186767578125, -1.7879791259765625, -1.724090576171875, -1.6602020263671875, -1.5963134765625, -1.5324249267578125, -1.468536376953125, -1.4046478271484375, -1.34075927734375, -1.2768707275390625, -1.212982177734375, -1.1490936279296875, -1.085205078125, -1.0213165283203125, -0.957427978515625, -0.8935394287109375, -0.82965087890625, -0.7657623291015625, -0.701873779296875, -0.6379852294921875, -0.5740966796875, -0.5102081298828125, -0.446319580078125, -0.3824310302734375, -0.31854248046875, -0.2546539306640625, -0.190765380859375, -0.1268768310546875, -0.06298828125, 0.0009002685546875, 0.064788818359375, 0.1286773681640625, 0.19256591796875, 0.2564544677734375, 0.320343017578125, 0.3842315673828125, 0.4481201171875, 0.5120086669921875, 0.575897216796875, 0.6397857666015625, 0.70367431640625, 0.7675628662109375, 0.831451416015625, 0.8953399658203125, 0.959228515625, 1.0231170654296875, 1.087005615234375, 1.1508941650390625, 1.21478271484375, 1.2786712646484375, 1.342559814453125, 1.4064483642578125, 1.4703369140625, 1.5342254638671875, 1.598114013671875, 1.6620025634765625, 1.72589111328125, 1.7897796630859375, 1.853668212890625, 1.9175567626953125, 1.9814453125]}, "gradients/encoder.encoder.layers.10.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 1.0, 1.0, 4.0, 3.0, 5.0, 10.0, 9.0, 10.0, 18.0, 16.0, 28.0, 33.0, 61.0, 69.0, 85.0, 145.0, 183.0, 248.0, 312.0, 464.0, 703.0, 930.0, 1487.0, 2338.0, 3673.0, 6816.0, 12865.0, 29776.0, 109406.0, 3813287.0, 143944.0, 34189.0, 14337.0, 7309.0, 3966.0, 2483.0, 1608.0, 965.0, 724.0, 520.0, 346.0, 258.0, 157.0, 143.0, 93.0, 66.0, 58.0, 38.0, 28.0, 24.0, 16.0, 12.0, 9.0, 2.0, 4.0, 2.0, 5.0, 2.0, 1.0, 3.0, 3.0], "bins": [-3.24609375, -3.14642333984375, -3.0467529296875, -2.94708251953125, -2.847412109375, -2.74774169921875, -2.6480712890625, -2.54840087890625, -2.44873046875, -2.34906005859375, -2.2493896484375, -2.14971923828125, -2.050048828125, -1.95037841796875, -1.8507080078125, -1.75103759765625, -1.6513671875, -1.55169677734375, -1.4520263671875, -1.35235595703125, -1.252685546875, -1.15301513671875, -1.0533447265625, -0.95367431640625, -0.85400390625, -0.75433349609375, -0.6546630859375, -0.55499267578125, -0.455322265625, -0.35565185546875, -0.2559814453125, -0.15631103515625, -0.056640625, 0.04302978515625, 0.1427001953125, 0.24237060546875, 0.342041015625, 0.44171142578125, 0.5413818359375, 0.64105224609375, 0.74072265625, 0.84039306640625, 0.9400634765625, 1.03973388671875, 1.139404296875, 1.23907470703125, 1.3387451171875, 1.43841552734375, 1.5380859375, 1.63775634765625, 1.7374267578125, 1.83709716796875, 1.936767578125, 2.03643798828125, 2.1361083984375, 2.23577880859375, 2.33544921875, 2.43511962890625, 2.5347900390625, 2.63446044921875, 2.734130859375, 2.83380126953125, 2.9334716796875, 3.03314208984375, 3.1328125]}, "gradients/encoder.encoder.layers.10.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0, 0.0, 2.0, 5.0, 2.0, 2.0, 6.0, 4.0, 11.0, 8.0, 13.0, 16.0, 19.0, 30.0, 54.0, 96.0, 454.0, 3011.0, 151.0, 59.0, 36.0, 19.0, 20.0, 11.0, 10.0, 8.0, 12.0, 4.0, 3.0, 3.0, 1.0, 3.0, 5.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 3.0, 0.0, 1.0, 2.0, 0.0, 2.0], "bins": [-1.228515625, -1.191436767578125, -1.15435791015625, -1.117279052734375, -1.0802001953125, -1.043121337890625, -1.00604248046875, -0.968963623046875, -0.931884765625, -0.894805908203125, -0.85772705078125, -0.820648193359375, -0.7835693359375, -0.746490478515625, -0.70941162109375, -0.672332763671875, -0.63525390625, -0.598175048828125, -0.56109619140625, -0.524017333984375, -0.4869384765625, -0.449859619140625, -0.41278076171875, -0.375701904296875, -0.338623046875, -0.301544189453125, -0.26446533203125, -0.227386474609375, -0.1903076171875, -0.153228759765625, -0.11614990234375, -0.079071044921875, -0.0419921875, -0.004913330078125, 0.03216552734375, 0.069244384765625, 0.1063232421875, 0.143402099609375, 0.18048095703125, 0.217559814453125, 0.254638671875, 0.291717529296875, 0.32879638671875, 0.365875244140625, 0.4029541015625, 0.440032958984375, 0.47711181640625, 0.514190673828125, 0.55126953125, 0.588348388671875, 0.62542724609375, 0.662506103515625, 0.6995849609375, 0.736663818359375, 0.77374267578125, 0.810821533203125, 0.847900390625, 0.884979248046875, 0.92205810546875, 0.959136962890625, 0.9962158203125, 1.033294677734375, 1.07037353515625, 1.107452392578125, 1.14453125]}, "gradients/encoder.encoder.layers.10.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 4.0, 4.0, 9.0, 24.0, 55.0, 90.0, 145.0, 199.0, 178.0, 131.0, 68.0, 37.0, 26.0, 15.0, 6.0, 9.0, 3.0, 5.0, 2.0, 2.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.453207492828369, -4.201204299926758, -3.9492008686065674, -3.697197437286377, -3.4451942443847656, -3.193190813064575, -2.9411873817443848, -2.6891841888427734, -2.437180757522583, -2.1851773262023926, -1.9331741333007812, -1.6811707019805908, -1.42916738986969, -1.177164077758789, -0.9251606464385986, -0.6731573343276978, -0.4211540222167969, -0.1691506803035736, 0.08285266160964966, 0.3348560333251953, 0.5868593454360962, 0.8388626575469971, 1.0908660888671875, 1.3428694009780884, 1.5948727130889893, 1.8468760251998901, 2.098879337310791, 2.3508827686309814, 2.602886199951172, 2.854889392852783, 3.1068928241729736, 3.358896255493164, 3.6108999252319336, 3.862903356552124, 4.1149067878723145, 4.366909980773926, 4.618913173675537, 4.870916366577148, 5.122920036315918, 5.374923229217529, 5.626926422119141, 5.878929615020752, 6.1309332847595215, 6.382936477661133, 6.634939670562744, 6.8869428634643555, 7.138946533203125, 7.390949726104736, 7.642953395843506, 7.894956588745117, 8.146960258483887, 8.398963928222656, 8.65096664428711, 8.902970314025879, 9.154973983764648, 9.406976699829102, 9.658980369567871, 9.91098403930664, 10.162986755371094, 10.414990425109863, 10.666994094848633, 10.918996810913086, 11.171000480651855, 11.423004150390625, 11.675006866455078]}, "gradients/encoder.encoder.layers.10.final_layer_norm.bias": {"_type": "histogram", "values": [2.0, 0.0, 2.0, 0.0, 0.0, 4.0, 0.0, 4.0, 1.0, 4.0, 9.0, 10.0, 4.0, 12.0, 14.0, 11.0, 19.0, 19.0, 20.0, 23.0, 24.0, 30.0, 28.0, 43.0, 32.0, 39.0, 50.0, 34.0, 56.0, 51.0, 42.0, 31.0, 28.0, 45.0, 36.0, 46.0, 28.0, 24.0, 37.0, 31.0, 29.0, 16.0, 18.0, 17.0, 12.0, 5.0, 7.0, 3.0, 5.0, 5.0, 2.0, 5.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.00634765625, -2.904017925262451, -2.8016879558563232, -2.6993582248687744, -2.5970282554626465, -2.4946985244750977, -2.392368793487549, -2.2900390625, -2.187709093093872, -2.0853793621063232, -1.9830493927001953, -1.8807196617126465, -1.778389811515808, -1.6760599613189697, -1.573730230331421, -1.4714003801345825, -1.3690705299377441, -1.2667406797409058, -1.1644108295440674, -1.0620810985565186, -0.9597512483596802, -0.8574213981628418, -0.7550916075706482, -0.6527618169784546, -0.5504319667816162, -0.4481021463871002, -0.34577232599258423, -0.24344250559806824, -0.14111268520355225, -0.03878283500671387, 0.06354695558547974, 0.16587674617767334, 0.2682068347930908, 0.3705366551876068, 0.4728664755821228, 0.5751962661743164, 0.6775261163711548, 0.7798559665679932, 0.8821857571601868, 0.9845155477523804, 1.0868453979492188, 1.1891752481460571, 1.2915050983428955, 1.3938348293304443, 1.4961646795272827, 1.598494529724121, 1.70082426071167, 1.8031541109085083, 1.9054839611053467, 2.0078136920928955, 2.1101436614990234, 2.2124733924865723, 2.314803123474121, 2.417133092880249, 2.519462823867798, 2.621792793273926, 2.7241225242614746, 2.8264522552490234, 2.9287822246551514, 3.0311119556427, 3.133441925048828, 3.235771656036377, 3.338101387023926, 3.4404311180114746, 3.5427610874176025]}, "gradients/encoder.encoder.layers.10.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 2.0, 3.0, 5.0, 10.0, 8.0, 15.0, 15.0, 17.0, 43.0, 57.0, 76.0, 98.0, 170.0, 235.0, 400.0, 605.0, 1060.0, 1973.0, 4018.0, 9196.0, 23491.0, 69331.0, 213637.0, 410239.0, 206894.0, 66358.0, 23177.0, 8952.0, 3920.0, 1886.0, 991.0, 581.0, 349.0, 236.0, 153.0, 112.0, 71.0, 46.0, 45.0, 17.0, 17.0, 16.0, 8.0, 9.0, 7.0, 2.0, 5.0, 6.0, 3.0, 0.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0], "bins": [-2.365234375, -2.282196044921875, -2.19915771484375, -2.116119384765625, -2.0330810546875, -1.950042724609375, -1.86700439453125, -1.783966064453125, -1.700927734375, -1.617889404296875, -1.53485107421875, -1.451812744140625, -1.3687744140625, -1.285736083984375, -1.20269775390625, -1.119659423828125, -1.03662109375, -0.953582763671875, -0.87054443359375, -0.787506103515625, -0.7044677734375, -0.621429443359375, -0.53839111328125, -0.455352783203125, -0.372314453125, -0.289276123046875, -0.20623779296875, -0.123199462890625, -0.0401611328125, 0.042877197265625, 0.12591552734375, 0.208953857421875, 0.2919921875, 0.375030517578125, 0.45806884765625, 0.541107177734375, 0.6241455078125, 0.707183837890625, 0.79022216796875, 0.873260498046875, 0.956298828125, 1.039337158203125, 1.12237548828125, 1.205413818359375, 1.2884521484375, 1.371490478515625, 1.45452880859375, 1.537567138671875, 1.62060546875, 1.703643798828125, 1.78668212890625, 1.869720458984375, 1.9527587890625, 2.035797119140625, 2.11883544921875, 2.201873779296875, 2.284912109375, 2.367950439453125, 2.45098876953125, 2.534027099609375, 2.6170654296875, 2.700103759765625, 2.78314208984375, 2.866180419921875, 2.94921875]}, "gradients/encoder.encoder.layers.10.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 2.0, 1.0, 3.0, 1.0, 3.0, 5.0, 4.0, 8.0, 18.0, 20.0, 34.0, 45.0, 44.0, 54.0, 69.0, 69.0, 93.0, 98.0, 84.0, 62.0, 50.0, 56.0, 52.0, 36.0, 33.0, 9.0, 23.0, 10.0, 6.0, 8.0, 4.0, 1.0, 3.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-2.19140625, -2.1310882568359375, -2.070770263671875, -2.0104522705078125, -1.95013427734375, -1.8898162841796875, -1.829498291015625, -1.7691802978515625, -1.7088623046875, -1.6485443115234375, -1.588226318359375, -1.5279083251953125, -1.46759033203125, -1.4072723388671875, -1.346954345703125, -1.2866363525390625, -1.226318359375, -1.1660003662109375, -1.105682373046875, -1.0453643798828125, -0.98504638671875, -0.9247283935546875, -0.864410400390625, -0.8040924072265625, -0.7437744140625, -0.6834564208984375, -0.623138427734375, -0.5628204345703125, -0.50250244140625, -0.4421844482421875, -0.381866455078125, -0.3215484619140625, -0.26123046875, -0.2009124755859375, -0.140594482421875, -0.0802764892578125, -0.01995849609375, 0.0403594970703125, 0.100677490234375, 0.1609954833984375, 0.2213134765625, 0.2816314697265625, 0.341949462890625, 0.4022674560546875, 0.46258544921875, 0.5229034423828125, 0.583221435546875, 0.6435394287109375, 0.703857421875, 0.7641754150390625, 0.824493408203125, 0.8848114013671875, 0.94512939453125, 1.0054473876953125, 1.065765380859375, 1.1260833740234375, 1.1864013671875, 1.2467193603515625, 1.307037353515625, 1.3673553466796875, 1.42767333984375, 1.4879913330078125, 1.548309326171875, 1.6086273193359375, 1.6689453125]}, "gradients/encoder.encoder.layers.10.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 2.0, 4.0, 3.0, 2.0, 4.0, 2.0, 8.0, 5.0, 12.0, 37.0, 44.0, 81.0, 147.0, 294.0, 625.0, 1668.0, 5985.0, 30959.0, 336257.0, 602547.0, 56834.0, 9063.0, 2382.0, 849.0, 364.0, 157.0, 99.0, 53.0, 25.0, 14.0, 14.0, 9.0, 4.0, 7.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0], "bins": [-7.09765625, -6.8897705078125, -6.681884765625, -6.4739990234375, -6.26611328125, -6.0582275390625, -5.850341796875, -5.6424560546875, -5.4345703125, -5.2266845703125, -5.018798828125, -4.8109130859375, -4.60302734375, -4.3951416015625, -4.187255859375, -3.9793701171875, -3.771484375, -3.5635986328125, -3.355712890625, -3.1478271484375, -2.93994140625, -2.7320556640625, -2.524169921875, -2.3162841796875, -2.1083984375, -1.9005126953125, -1.692626953125, -1.4847412109375, -1.27685546875, -1.0689697265625, -0.861083984375, -0.6531982421875, -0.4453125, -0.2374267578125, -0.029541015625, 0.1783447265625, 0.38623046875, 0.5941162109375, 0.802001953125, 1.0098876953125, 1.2177734375, 1.4256591796875, 1.633544921875, 1.8414306640625, 2.04931640625, 2.2572021484375, 2.465087890625, 2.6729736328125, 2.880859375, 3.0887451171875, 3.296630859375, 3.5045166015625, 3.71240234375, 3.9202880859375, 4.128173828125, 4.3360595703125, 4.5439453125, 4.7518310546875, 4.959716796875, 5.1676025390625, 5.37548828125, 5.5833740234375, 5.791259765625, 5.9991455078125, 6.20703125]}, "gradients/encoder.encoder.layers.10.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 2.0, 3.0, 4.0, 5.0, 7.0, 5.0, 7.0, 6.0, 10.0, 14.0, 13.0, 15.0, 17.0, 21.0, 31.0, 26.0, 29.0, 33.0, 44.0, 49.0, 49.0, 49.0, 54.0, 57.0, 58.0, 60.0, 42.0, 46.0, 42.0, 35.0, 39.0, 27.0, 20.0, 23.0, 16.0, 15.0, 9.0, 8.0, 6.0, 1.0, 1.0, 4.0, 2.0, 1.0, 2.0, 3.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-6.72265625, -6.5030517578125, -6.283447265625, -6.0638427734375, -5.84423828125, -5.6246337890625, -5.405029296875, -5.1854248046875, -4.9658203125, -4.7462158203125, -4.526611328125, -4.3070068359375, -4.08740234375, -3.8677978515625, -3.648193359375, -3.4285888671875, -3.208984375, -2.9893798828125, -2.769775390625, -2.5501708984375, -2.33056640625, -2.1109619140625, -1.891357421875, -1.6717529296875, -1.4521484375, -1.2325439453125, -1.012939453125, -0.7933349609375, -0.57373046875, -0.3541259765625, -0.134521484375, 0.0850830078125, 0.3046875, 0.5242919921875, 0.743896484375, 0.9635009765625, 1.18310546875, 1.4027099609375, 1.622314453125, 1.8419189453125, 2.0615234375, 2.2811279296875, 2.500732421875, 2.7203369140625, 2.93994140625, 3.1595458984375, 3.379150390625, 3.5987548828125, 3.818359375, 4.0379638671875, 4.257568359375, 4.4771728515625, 4.69677734375, 4.9163818359375, 5.135986328125, 5.3555908203125, 5.5751953125, 5.7947998046875, 6.014404296875, 6.2340087890625, 6.45361328125, 6.6732177734375, 6.892822265625, 7.1124267578125, 7.33203125]}, "gradients/encoder.encoder.layers.10.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 2.0, 2.0, 6.0, 11.0, 11.0, 14.0, 37.0, 70.0, 131.0, 292.0, 877.0, 3872.0, 92575.0, 930700.0, 17065.0, 1901.0, 561.0, 203.0, 103.0, 55.0, 26.0, 19.0, 16.0, 7.0, 2.0, 3.0, 3.0, 2.0, 1.0, 2.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.80859375, -6.62603759765625, -6.4434814453125, -6.26092529296875, -6.078369140625, -5.89581298828125, -5.7132568359375, -5.53070068359375, -5.34814453125, -5.16558837890625, -4.9830322265625, -4.80047607421875, -4.617919921875, -4.43536376953125, -4.2528076171875, -4.07025146484375, -3.8876953125, -3.70513916015625, -3.5225830078125, -3.34002685546875, -3.157470703125, -2.97491455078125, -2.7923583984375, -2.60980224609375, -2.42724609375, -2.24468994140625, -2.0621337890625, -1.87957763671875, -1.697021484375, -1.51446533203125, -1.3319091796875, -1.14935302734375, -0.966796875, -0.78424072265625, -0.6016845703125, -0.41912841796875, -0.236572265625, -0.05401611328125, 0.1285400390625, 0.31109619140625, 0.49365234375, 0.67620849609375, 0.8587646484375, 1.04132080078125, 1.223876953125, 1.40643310546875, 1.5889892578125, 1.77154541015625, 1.9541015625, 2.13665771484375, 2.3192138671875, 2.50177001953125, 2.684326171875, 2.86688232421875, 3.0494384765625, 3.23199462890625, 3.41455078125, 3.59710693359375, 3.7796630859375, 3.96221923828125, 4.144775390625, 4.32733154296875, 4.5098876953125, 4.69244384765625, 4.875]}, "gradients/encoder.encoder.layers.10.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 4.0, 7.0, 9.0, 15.0, 31.0, 64.0, 110.0, 202.0, 224.0, 162.0, 76.0, 59.0, 24.0, 16.0, 9.0, 7.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0006542205810546875, -0.0006339848041534424, -0.0006137490272521973, -0.0005935132503509521, -0.000573277473449707, -0.0005530416965484619, -0.0005328059196472168, -0.0005125701427459717, -0.0004923343658447266, -0.00047209858894348145, -0.00045186281204223633, -0.0004316270351409912, -0.0004113912582397461, -0.000391155481338501, -0.00037091970443725586, -0.00035068392753601074, -0.0003304481506347656, -0.0003102123737335205, -0.0002899765968322754, -0.0002697408199310303, -0.00024950504302978516, -0.00022926926612854004, -0.00020903348922729492, -0.0001887977123260498, -0.0001685619354248047, -0.00014832615852355957, -0.00012809038162231445, -0.00010785460472106934, -8.761882781982422e-05, -6.73830509185791e-05, -4.7147274017333984e-05, -2.6911497116088867e-05, -6.67572021484375e-06, 1.3560056686401367e-05, 3.3795833587646484e-05, 5.40316104888916e-05, 7.426738739013672e-05, 9.450316429138184e-05, 0.00011473894119262695, 0.00013497471809387207, 0.0001552104949951172, 0.0001754462718963623, 0.00019568204879760742, 0.00021591782569885254, 0.00023615360260009766, 0.0002563893795013428, 0.0002766251564025879, 0.000296860933303833, 0.0003170967102050781, 0.00033733248710632324, 0.00035756826400756836, 0.0003778040409088135, 0.0003980398178100586, 0.0004182755947113037, 0.00043851137161254883, 0.00045874714851379395, 0.00047898292541503906, 0.0004992187023162842, 0.0005194544792175293, 0.0005396902561187744, 0.0005599260330200195, 0.0005801618099212646, 0.0006003975868225098, 0.0006206333637237549, 0.000640869140625]}, "gradients/encoder.encoder.layers.10.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 3.0, 2.0, 4.0, 4.0, 5.0, 16.0, 20.0, 38.0, 87.0, 280.0, 948.0, 6211.0, 473213.0, 559489.0, 6770.0, 1024.0, 266.0, 87.0, 47.0, 22.0, 12.0, 3.0, 7.0, 4.0, 2.0, 2.0, 0.0, 3.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.625, -6.4552001953125, -6.285400390625, -6.1156005859375, -5.94580078125, -5.7760009765625, -5.606201171875, -5.4364013671875, -5.2666015625, -5.0968017578125, -4.927001953125, -4.7572021484375, -4.58740234375, -4.4176025390625, -4.247802734375, -4.0780029296875, -3.908203125, -3.7384033203125, -3.568603515625, -3.3988037109375, -3.22900390625, -3.0592041015625, -2.889404296875, -2.7196044921875, -2.5498046875, -2.3800048828125, -2.210205078125, -2.0404052734375, -1.87060546875, -1.7008056640625, -1.531005859375, -1.3612060546875, -1.19140625, -1.0216064453125, -0.851806640625, -0.6820068359375, -0.51220703125, -0.3424072265625, -0.172607421875, -0.0028076171875, 0.1669921875, 0.3367919921875, 0.506591796875, 0.6763916015625, 0.84619140625, 1.0159912109375, 1.185791015625, 1.3555908203125, 1.525390625, 1.6951904296875, 1.864990234375, 2.0347900390625, 2.20458984375, 2.3743896484375, 2.544189453125, 2.7139892578125, 2.8837890625, 3.0535888671875, 3.223388671875, 3.3931884765625, 3.56298828125, 3.7327880859375, 3.902587890625, 4.0723876953125, 4.2421875]}, "gradients/encoder.encoder.layers.10.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 3.0, 3.0, 6.0, 2.0, 7.0, 11.0, 16.0, 21.0, 43.0, 57.0, 74.0, 116.0, 122.0, 130.0, 97.0, 90.0, 77.0, 41.0, 26.0, 22.0, 19.0, 9.0, 12.0, 2.0, 4.0, 0.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0], "bins": [-4.19140625, -4.0869140625, -3.982421875, -3.8779296875, -3.7734375, -3.6689453125, -3.564453125, -3.4599609375, -3.35546875, -3.2509765625, -3.146484375, -3.0419921875, -2.9375, -2.8330078125, -2.728515625, -2.6240234375, -2.51953125, -2.4150390625, -2.310546875, -2.2060546875, -2.1015625, -1.9970703125, -1.892578125, -1.7880859375, -1.68359375, -1.5791015625, -1.474609375, -1.3701171875, -1.265625, -1.1611328125, -1.056640625, -0.9521484375, -0.84765625, -0.7431640625, -0.638671875, -0.5341796875, -0.4296875, -0.3251953125, -0.220703125, -0.1162109375, -0.01171875, 0.0927734375, 0.197265625, 0.3017578125, 0.40625, 0.5107421875, 0.615234375, 0.7197265625, 0.82421875, 0.9287109375, 1.033203125, 1.1376953125, 1.2421875, 1.3466796875, 1.451171875, 1.5556640625, 1.66015625, 1.7646484375, 1.869140625, 1.9736328125, 2.078125, 2.1826171875, 2.287109375, 2.3916015625, 2.49609375]}, "gradients/encoder.encoder.layers.10.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 3.0, 2.0, 2.0, 2.0, 12.0, 23.0, 58.0, 120.0, 205.0, 219.0, 183.0, 104.0, 37.0, 21.0, 14.0, 4.0, 2.0, 2.0, 1.0, 0.0, 3.0, 2.0], "bins": [-101.10289001464844, -99.13160705566406, -97.16031646728516, -95.18903350830078, -93.2177505493164, -91.2464599609375, -89.27517700195312, -87.30389404296875, -85.33261108398438, -83.361328125, -81.3900375366211, -79.41875457763672, -77.44747161865234, -75.47618103027344, -73.50489807128906, -71.53361511230469, -69.56232452392578, -67.5910415649414, -65.6197509765625, -63.648468017578125, -61.67718505859375, -59.70589828491211, -57.73461151123047, -55.763328552246094, -53.79204177856445, -51.82075500488281, -49.84947204589844, -47.8781852722168, -45.906898498535156, -43.93561553955078, -41.96432876586914, -39.9930419921875, -38.02176284790039, -36.05047607421875, -34.079193115234375, -32.107906341552734, -30.136621475219727, -28.16533660888672, -26.194049835205078, -24.22276496887207, -22.251480102539062, -20.280195236206055, -18.308910369873047, -16.337623596191406, -14.366338729858398, -12.39505386352539, -10.423768043518066, -8.452482223510742, -6.481197357177734, -4.509912014007568, -2.5386266708374023, -0.5673413276672363, 1.4039440155029297, 3.3752288818359375, 5.346514701843262, 7.317800521850586, 9.289085388183594, 11.260370254516602, 13.231656074523926, 15.20294189453125, 17.174226760864258, 19.145511627197266, 21.116798400878906, 23.088083267211914, 25.059368133544922]}, "gradients/encoder.encoder.layers.10.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 2.0, 1.0, 1.0, 5.0, 2.0, 4.0, 5.0, 3.0, 11.0, 11.0, 11.0, 13.0, 12.0, 21.0, 18.0, 18.0, 18.0, 26.0, 35.0, 31.0, 38.0, 41.0, 68.0, 50.0, 50.0, 56.0, 45.0, 51.0, 52.0, 47.0, 36.0, 27.0, 32.0, 32.0, 30.0, 16.0, 13.0, 13.0, 13.0, 13.0, 8.0, 7.0, 3.0, 8.0, 4.0, 4.0, 2.0, 2.0, 3.0, 1.0, 2.0, 1.0, 3.0], "bins": [-32.41179656982422, -31.51688003540039, -30.621965408325195, -29.72705078125, -28.832134246826172, -27.937217712402344, -27.04230308532715, -26.147388458251953, -25.252471923828125, -24.357555389404297, -23.4626407623291, -22.567726135253906, -21.672809600830078, -20.77789306640625, -19.882978439331055, -18.98806381225586, -18.09314727783203, -17.198230743408203, -16.303316116333008, -15.408400535583496, -14.513484954833984, -13.618569374084473, -12.723653793334961, -11.82873821258545, -10.933822631835938, -10.038907051086426, -9.143991470336914, -8.249075889587402, -7.354160308837891, -6.459244728088379, -5.564329147338867, -4.6694135665893555, -3.774496078491211, -2.879580497741699, -1.9846649169921875, -1.0897493362426758, -0.19483375549316406, 0.7000818252563477, 1.5949974060058594, 2.489912986755371, 3.384828567504883, 4.2797441482543945, 5.174659729003906, 6.069575309753418, 6.96449089050293, 7.859406471252441, 8.754322052001953, 9.649237632751465, 10.544153213500977, 11.439068794250488, 12.333984375, 13.228899955749512, 14.123815536499023, 15.018731117248535, 15.913646697998047, 16.808563232421875, 17.70347785949707, 18.598392486572266, 19.493309020996094, 20.388225555419922, 21.283140182495117, 22.178054809570312, 23.07297134399414, 23.96788787841797, 24.862802505493164]}, "gradients/encoder.encoder.layers.9.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 0.0, 2.0, 4.0, 5.0, 1.0, 3.0, 2.0, 7.0, 6.0, 15.0, 17.0, 21.0, 35.0, 55.0, 91.0, 191.0, 394.0, 1223.0, 8189.0, 4173585.0, 8234.0, 1380.0, 424.0, 204.0, 92.0, 41.0, 25.0, 20.0, 14.0, 8.0, 1.0, 2.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-18.375, -17.885009765625, -17.39501953125, -16.905029296875, -16.4150390625, -15.925048828125, -15.43505859375, -14.945068359375, -14.455078125, -13.965087890625, -13.47509765625, -12.985107421875, -12.4951171875, -12.005126953125, -11.51513671875, -11.025146484375, -10.53515625, -10.045166015625, -9.55517578125, -9.065185546875, -8.5751953125, -8.085205078125, -7.59521484375, -7.105224609375, -6.615234375, -6.125244140625, -5.63525390625, -5.145263671875, -4.6552734375, -4.165283203125, -3.67529296875, -3.185302734375, -2.6953125, -2.205322265625, -1.71533203125, -1.225341796875, -0.7353515625, -0.245361328125, 0.24462890625, 0.734619140625, 1.224609375, 1.714599609375, 2.20458984375, 2.694580078125, 3.1845703125, 3.674560546875, 4.16455078125, 4.654541015625, 5.14453125, 5.634521484375, 6.12451171875, 6.614501953125, 7.1044921875, 7.594482421875, 8.08447265625, 8.574462890625, 9.064453125, 9.554443359375, 10.04443359375, 10.534423828125, 11.0244140625, 11.514404296875, 12.00439453125, 12.494384765625, 12.984375]}, "gradients/encoder.encoder.layers.9.feed_forward.output_dense.bias": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 5.0, 4.0, 1.0, 4.0, 4.0, 8.0, 8.0, 7.0, 13.0, 28.0, 37.0, 48.0, 65.0, 86.0, 80.0, 107.0, 102.0, 87.0, 80.0, 54.0, 57.0, 39.0, 19.0, 20.0, 11.0, 14.0, 7.0, 3.0, 3.0, 5.0, 1.0, 2.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.302734375, -2.225677490234375, -2.14862060546875, -2.071563720703125, -1.9945068359375, -1.917449951171875, -1.84039306640625, -1.763336181640625, -1.686279296875, -1.609222412109375, -1.53216552734375, -1.455108642578125, -1.3780517578125, -1.300994873046875, -1.22393798828125, -1.146881103515625, -1.06982421875, -0.992767333984375, -0.91571044921875, -0.838653564453125, -0.7615966796875, -0.684539794921875, -0.60748291015625, -0.530426025390625, -0.453369140625, -0.376312255859375, -0.29925537109375, -0.222198486328125, -0.1451416015625, -0.068084716796875, 0.00897216796875, 0.086029052734375, 0.1630859375, 0.240142822265625, 0.31719970703125, 0.394256591796875, 0.4713134765625, 0.548370361328125, 0.62542724609375, 0.702484130859375, 0.779541015625, 0.856597900390625, 0.93365478515625, 1.010711669921875, 1.0877685546875, 1.164825439453125, 1.24188232421875, 1.318939208984375, 1.39599609375, 1.473052978515625, 1.55010986328125, 1.627166748046875, 1.7042236328125, 1.781280517578125, 1.85833740234375, 1.935394287109375, 2.012451171875, 2.089508056640625, 2.16656494140625, 2.243621826171875, 2.3206787109375, 2.397735595703125, 2.47479248046875, 2.551849365234375, 2.62890625]}, "gradients/encoder.encoder.layers.9.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 3.0, 3.0, 1.0, 2.0, 7.0, 7.0, 12.0, 16.0, 18.0, 29.0, 28.0, 39.0, 51.0, 64.0, 91.0, 116.0, 182.0, 216.0, 358.0, 542.0, 844.0, 1504.0, 2537.0, 5293.0, 13827.0, 135871.0, 4002343.0, 16931.0, 6025.0, 2913.0, 1576.0, 931.0, 583.0, 387.0, 264.0, 184.0, 132.0, 96.0, 63.0, 49.0, 44.0, 21.0, 18.0, 16.0, 13.0, 9.0, 10.0, 4.0, 7.0, 6.0, 5.0, 4.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], "bins": [-7.14453125, -6.921875, -6.69921875, -6.4765625, -6.25390625, -6.03125, -5.80859375, -5.5859375, -5.36328125, -5.140625, -4.91796875, -4.6953125, -4.47265625, -4.25, -4.02734375, -3.8046875, -3.58203125, -3.359375, -3.13671875, -2.9140625, -2.69140625, -2.46875, -2.24609375, -2.0234375, -1.80078125, -1.578125, -1.35546875, -1.1328125, -0.91015625, -0.6875, -0.46484375, -0.2421875, -0.01953125, 0.203125, 0.42578125, 0.6484375, 0.87109375, 1.09375, 1.31640625, 1.5390625, 1.76171875, 1.984375, 2.20703125, 2.4296875, 2.65234375, 2.875, 3.09765625, 3.3203125, 3.54296875, 3.765625, 3.98828125, 4.2109375, 4.43359375, 4.65625, 4.87890625, 5.1015625, 5.32421875, 5.546875, 5.76953125, 5.9921875, 6.21484375, 6.4375, 6.66015625, 6.8828125, 7.10546875]}, "gradients/encoder.encoder.layers.9.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 3.0, 2.0, 4.0, 1.0, 2.0, 4.0, 7.0, 8.0, 11.0, 18.0, 66.0, 3811.0, 81.0, 27.0, 12.0, 9.0, 6.0, 5.0, 2.0, 3.0, 0.0, 0.0, 3.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.515625, -2.438262939453125, -2.36090087890625, -2.283538818359375, -2.2061767578125, -2.128814697265625, -2.05145263671875, -1.974090576171875, -1.896728515625, -1.819366455078125, -1.74200439453125, -1.664642333984375, -1.5872802734375, -1.509918212890625, -1.43255615234375, -1.355194091796875, -1.27783203125, -1.200469970703125, -1.12310791015625, -1.045745849609375, -0.9683837890625, -0.891021728515625, -0.81365966796875, -0.736297607421875, -0.658935546875, -0.581573486328125, -0.50421142578125, -0.426849365234375, -0.3494873046875, -0.272125244140625, -0.19476318359375, -0.117401123046875, -0.0400390625, 0.037322998046875, 0.11468505859375, 0.192047119140625, 0.2694091796875, 0.346771240234375, 0.42413330078125, 0.501495361328125, 0.578857421875, 0.656219482421875, 0.73358154296875, 0.810943603515625, 0.8883056640625, 0.965667724609375, 1.04302978515625, 1.120391845703125, 1.19775390625, 1.275115966796875, 1.35247802734375, 1.429840087890625, 1.5072021484375, 1.584564208984375, 1.66192626953125, 1.739288330078125, 1.816650390625, 1.894012451171875, 1.97137451171875, 2.048736572265625, 2.1260986328125, 2.203460693359375, 2.28082275390625, 2.358184814453125, 2.435546875]}, "gradients/encoder.encoder.layers.9.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 3.0, 2.0, 3.0, 1.0, 7.0, 8.0, 18.0, 22.0, 45.0, 78.0, 108.0, 161.0, 179.0, 136.0, 91.0, 52.0, 47.0, 24.0, 13.0, 2.0, 6.0, 2.0, 2.0, 2.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.41562557220459, -4.199612617492676, -3.98360013961792, -3.767587423324585, -3.55157470703125, -3.335561752319336, -3.11954927444458, -2.903536319732666, -2.68752384185791, -2.471511125564575, -2.2554984092712402, -2.0394856929779053, -1.8234729766845703, -1.6074601411819458, -1.3914474248886108, -1.1754347085952759, -0.9594218730926514, -0.7434091567993164, -0.5273964405059814, -0.3113836646080017, -0.09537094831466675, 0.12064182758331299, 0.33665454387664795, 0.5526672601699829, 0.7686799764633179, 0.9846926927566528, 1.2007054090499878, 1.4167182445526123, 1.6327309608459473, 1.8487436771392822, 2.064756393432617, 2.280769109725952, 2.496781826019287, 2.712794542312622, 2.928807258605957, 3.144819974899292, 3.360832691192627, 3.576845645904541, 3.792858123779297, 4.008871078491211, 4.224883556365967, 4.440896511077881, 4.656908988952637, 4.872921943664551, 5.088934421539307, 5.304947376251221, 5.520959854125977, 5.736972808837891, 5.952985763549805, 6.168998718261719, 6.385011196136475, 6.601024150848389, 6.8170366287231445, 7.033049583435059, 7.2490620613098145, 7.4650750160217285, 7.681087493896484, 7.897100448608398, 8.113113403320312, 8.32912540435791, 8.545138359069824, 8.761151313781738, 8.977164268493652, 9.19317626953125, 9.409189224243164]}, "gradients/encoder.encoder.layers.9.final_layer_norm.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0, 3.0, 3.0, 3.0, 7.0, 12.0, 10.0, 12.0, 13.0, 23.0, 19.0, 18.0, 23.0, 29.0, 28.0, 33.0, 22.0, 28.0, 36.0, 39.0, 41.0, 40.0, 41.0, 49.0, 40.0, 28.0, 44.0, 34.0, 37.0, 28.0, 29.0, 38.0, 27.0, 22.0, 24.0, 14.0, 16.0, 19.0, 12.0, 10.0, 9.0, 6.0, 5.0, 9.0, 7.0, 5.0, 2.0, 4.0, 2.0, 1.0, 3.0, 0.0, 1.0, 2.0, 1.0, 1.0], "bins": [-2.6977663040161133, -2.6127142906188965, -2.5276622772216797, -2.442610263824463, -2.357558250427246, -2.27250599861145, -2.1874539852142334, -2.1024019718170166, -2.0173499584198, -1.932297945022583, -1.8472459316253662, -1.7621937990188599, -1.677141785621643, -1.5920897722244263, -1.50703763961792, -1.4219856262207031, -1.3369336128234863, -1.2518815994262695, -1.1668295860290527, -1.0817774534225464, -0.9967254400253296, -0.9116734266281128, -0.8266213536262512, -0.7415692806243896, -0.6565172672271729, -0.571465253829956, -0.4864131808280945, -0.4013611376285553, -0.3163090944290161, -0.23125705122947693, -0.14620500802993774, -0.06115293502807617, 0.023899078369140625, 0.10895112156867981, 0.194003164768219, 0.2790552079677582, 0.36410725116729736, 0.44915929436683655, 0.5342113375663757, 0.6192634105682373, 0.7043154239654541, 0.7893674373626709, 0.8744195103645325, 0.959471583366394, 1.0445235967636108, 1.1295756101608276, 1.214627742767334, 1.2996797561645508, 1.3847317695617676, 1.4697837829589844, 1.5548357963562012, 1.6398879289627075, 1.7249399423599243, 1.8099919557571411, 1.8950440883636475, 1.9800961017608643, 2.065148115158081, 2.150200128555298, 2.2352521419525146, 2.3203041553497314, 2.4053564071655273, 2.490408420562744, 2.575460433959961, 2.6605124473571777, 2.7455644607543945]}, "gradients/encoder.encoder.layers.9.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 2.0, 2.0, 0.0, 2.0, 3.0, 6.0, 4.0, 10.0, 6.0, 14.0, 14.0, 18.0, 29.0, 51.0, 57.0, 109.0, 187.0, 295.0, 545.0, 1267.0, 3575.0, 11530.0, 53305.0, 362240.0, 513143.0, 78689.0, 15763.0, 4509.0, 1642.0, 668.0, 353.0, 206.0, 123.0, 72.0, 30.0, 25.0, 16.0, 22.0, 6.0, 4.0, 8.0, 6.0, 3.0, 3.0, 4.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.90234375, -5.7388916015625, -5.575439453125, -5.4119873046875, -5.24853515625, -5.0850830078125, -4.921630859375, -4.7581787109375, -4.5947265625, -4.4312744140625, -4.267822265625, -4.1043701171875, -3.94091796875, -3.7774658203125, -3.614013671875, -3.4505615234375, -3.287109375, -3.1236572265625, -2.960205078125, -2.7967529296875, -2.63330078125, -2.4698486328125, -2.306396484375, -2.1429443359375, -1.9794921875, -1.8160400390625, -1.652587890625, -1.4891357421875, -1.32568359375, -1.1622314453125, -0.998779296875, -0.8353271484375, -0.671875, -0.5084228515625, -0.344970703125, -0.1815185546875, -0.01806640625, 0.1453857421875, 0.308837890625, 0.4722900390625, 0.6357421875, 0.7991943359375, 0.962646484375, 1.1260986328125, 1.28955078125, 1.4530029296875, 1.616455078125, 1.7799072265625, 1.943359375, 2.1068115234375, 2.270263671875, 2.4337158203125, 2.59716796875, 2.7606201171875, 2.924072265625, 3.0875244140625, 3.2509765625, 3.4144287109375, 3.577880859375, 3.7413330078125, 3.90478515625, 4.0682373046875, 4.231689453125, 4.3951416015625, 4.55859375]}, "gradients/encoder.encoder.layers.9.attention.out_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 5.0, 1.0, 2.0, 3.0, 4.0, 3.0, 5.0, 7.0, 14.0, 13.0, 8.0, 18.0, 40.0, 29.0, 47.0, 39.0, 77.0, 78.0, 65.0, 85.0, 82.0, 70.0, 57.0, 52.0, 41.0, 34.0, 28.0, 24.0, 20.0, 13.0, 15.0, 8.0, 6.0, 2.0, 10.0, 4.0, 3.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.185546875, -2.1203460693359375, -2.055145263671875, -1.9899444580078125, -1.92474365234375, -1.8595428466796875, -1.794342041015625, -1.7291412353515625, -1.6639404296875, -1.5987396240234375, -1.533538818359375, -1.4683380126953125, -1.40313720703125, -1.3379364013671875, -1.272735595703125, -1.2075347900390625, -1.142333984375, -1.0771331787109375, -1.011932373046875, -0.9467315673828125, -0.88153076171875, -0.8163299560546875, -0.751129150390625, -0.6859283447265625, -0.6207275390625, -0.5555267333984375, -0.490325927734375, -0.4251251220703125, -0.35992431640625, -0.2947235107421875, -0.229522705078125, -0.1643218994140625, -0.09912109375, -0.0339202880859375, 0.031280517578125, 0.0964813232421875, 0.16168212890625, 0.2268829345703125, 0.292083740234375, 0.3572845458984375, 0.4224853515625, 0.4876861572265625, 0.552886962890625, 0.6180877685546875, 0.68328857421875, 0.7484893798828125, 0.813690185546875, 0.8788909912109375, 0.944091796875, 1.0092926025390625, 1.074493408203125, 1.1396942138671875, 1.20489501953125, 1.2700958251953125, 1.335296630859375, 1.4004974365234375, 1.4656982421875, 1.5308990478515625, 1.596099853515625, 1.6613006591796875, 1.72650146484375, 1.7917022705078125, 1.856903076171875, 1.9221038818359375, 1.9873046875]}, "gradients/encoder.encoder.layers.9.attention.v_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 3.0, 4.0, 7.0, 9.0, 9.0, 17.0, 19.0, 27.0, 59.0, 75.0, 135.0, 314.0, 663.0, 1882.0, 7877.0, 50052.0, 543069.0, 398045.0, 37294.0, 6158.0, 1594.0, 633.0, 277.0, 142.0, 56.0, 34.0, 35.0, 22.0, 22.0, 8.0, 3.0, 3.0, 2.0, 3.0, 2.0, 3.0, 1.0, 2.0, 2.0, 2.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.44140625, -6.233154296875, -6.02490234375, -5.816650390625, -5.6083984375, -5.400146484375, -5.19189453125, -4.983642578125, -4.775390625, -4.567138671875, -4.35888671875, -4.150634765625, -3.9423828125, -3.734130859375, -3.52587890625, -3.317626953125, -3.109375, -2.901123046875, -2.69287109375, -2.484619140625, -2.2763671875, -2.068115234375, -1.85986328125, -1.651611328125, -1.443359375, -1.235107421875, -1.02685546875, -0.818603515625, -0.6103515625, -0.402099609375, -0.19384765625, 0.014404296875, 0.22265625, 0.430908203125, 0.63916015625, 0.847412109375, 1.0556640625, 1.263916015625, 1.47216796875, 1.680419921875, 1.888671875, 2.096923828125, 2.30517578125, 2.513427734375, 2.7216796875, 2.929931640625, 3.13818359375, 3.346435546875, 3.5546875, 3.762939453125, 3.97119140625, 4.179443359375, 4.3876953125, 4.595947265625, 4.80419921875, 5.012451171875, 5.220703125, 5.428955078125, 5.63720703125, 5.845458984375, 6.0537109375, 6.261962890625, 6.47021484375, 6.678466796875, 6.88671875]}, "gradients/encoder.encoder.layers.9.attention.v_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 2.0, 3.0, 1.0, 2.0, 4.0, 7.0, 7.0, 9.0, 16.0, 14.0, 23.0, 22.0, 44.0, 44.0, 35.0, 51.0, 53.0, 64.0, 82.0, 71.0, 51.0, 63.0, 54.0, 50.0, 48.0, 42.0, 37.0, 24.0, 24.0, 28.0, 8.0, 13.0, 9.0, 5.0, 2.0, 2.0, 2.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-7.0546875, -6.7564697265625, -6.458251953125, -6.1600341796875, -5.86181640625, -5.5635986328125, -5.265380859375, -4.9671630859375, -4.6689453125, -4.3707275390625, -4.072509765625, -3.7742919921875, -3.47607421875, -3.1778564453125, -2.879638671875, -2.5814208984375, -2.283203125, -1.9849853515625, -1.686767578125, -1.3885498046875, -1.09033203125, -0.7921142578125, -0.493896484375, -0.1956787109375, 0.1025390625, 0.4007568359375, 0.698974609375, 0.9971923828125, 1.29541015625, 1.5936279296875, 1.891845703125, 2.1900634765625, 2.48828125, 2.7864990234375, 3.084716796875, 3.3829345703125, 3.68115234375, 3.9793701171875, 4.277587890625, 4.5758056640625, 4.8740234375, 5.1722412109375, 5.470458984375, 5.7686767578125, 6.06689453125, 6.3651123046875, 6.663330078125, 6.9615478515625, 7.259765625, 7.5579833984375, 7.856201171875, 8.1544189453125, 8.45263671875, 8.7508544921875, 9.049072265625, 9.3472900390625, 9.6455078125, 9.9437255859375, 10.241943359375, 10.5401611328125, 10.83837890625, 11.1365966796875, 11.434814453125, 11.7330322265625, 12.03125]}, "gradients/encoder.encoder.layers.9.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 4.0, 3.0, 7.0, 6.0, 15.0, 15.0, 18.0, 34.0, 33.0, 91.0, 113.0, 219.0, 549.0, 1618.0, 12106.0, 583326.0, 437547.0, 10163.0, 1638.0, 501.0, 227.0, 119.0, 77.0, 54.0, 26.0, 13.0, 16.0, 9.0, 6.0, 5.0, 1.0, 3.0, 1.0, 0.0, 0.0, 2.0, 2.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.88671875, -4.74285888671875, -4.5989990234375, -4.45513916015625, -4.311279296875, -4.16741943359375, -4.0235595703125, -3.87969970703125, -3.73583984375, -3.59197998046875, -3.4481201171875, -3.30426025390625, -3.160400390625, -3.01654052734375, -2.8726806640625, -2.72882080078125, -2.5849609375, -2.44110107421875, -2.2972412109375, -2.15338134765625, -2.009521484375, -1.86566162109375, -1.7218017578125, -1.57794189453125, -1.43408203125, -1.29022216796875, -1.1463623046875, -1.00250244140625, -0.858642578125, -0.71478271484375, -0.5709228515625, -0.42706298828125, -0.283203125, -0.13934326171875, 0.0045166015625, 0.14837646484375, 0.292236328125, 0.43609619140625, 0.5799560546875, 0.72381591796875, 0.86767578125, 1.01153564453125, 1.1553955078125, 1.29925537109375, 1.443115234375, 1.58697509765625, 1.7308349609375, 1.87469482421875, 2.0185546875, 2.16241455078125, 2.3062744140625, 2.45013427734375, 2.593994140625, 2.73785400390625, 2.8817138671875, 3.02557373046875, 3.16943359375, 3.31329345703125, 3.4571533203125, 3.60101318359375, 3.744873046875, 3.88873291015625, 4.0325927734375, 4.17645263671875, 4.3203125]}, "gradients/encoder.encoder.layers.9.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 2.0, 3.0, 7.0, 3.0, 4.0, 4.0, 10.0, 12.0, 16.0, 16.0, 18.0, 29.0, 43.0, 65.0, 66.0, 89.0, 92.0, 95.0, 74.0, 83.0, 68.0, 57.0, 40.0, 28.0, 26.0, 13.0, 16.0, 10.0, 11.0, 4.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.0003323554992675781, -0.00032315775752067566, -0.0003139600157737732, -0.00030476227402687073, -0.00029556453227996826, -0.0002863667905330658, -0.00027716904878616333, -0.00026797130703926086, -0.0002587735652923584, -0.00024957582354545593, -0.00024037808179855347, -0.000231180340051651, -0.00022198259830474854, -0.00021278485655784607, -0.0002035871148109436, -0.00019438937306404114, -0.00018519163131713867, -0.0001759938895702362, -0.00016679614782333374, -0.00015759840607643127, -0.0001484006643295288, -0.00013920292258262634, -0.00013000518083572388, -0.00012080743908882141, -0.00011160969734191895, -0.00010241195559501648, -9.321421384811401e-05, -8.401647210121155e-05, -7.481873035430908e-05, -6.562098860740662e-05, -5.642324686050415e-05, -4.7225505113601685e-05, -3.802776336669922e-05, -2.8830021619796753e-05, -1.9632279872894287e-05, -1.0434538125991821e-05, -1.2367963790893555e-06, 7.96094536781311e-06, 1.7158687114715576e-05, 2.6356428861618042e-05, 3.555417060852051e-05, 4.4751912355422974e-05, 5.394965410232544e-05, 6.31473958492279e-05, 7.234513759613037e-05, 8.154287934303284e-05, 9.07406210899353e-05, 9.993836283683777e-05, 0.00010913610458374023, 0.0001183338463306427, 0.00012753158807754517, 0.00013672932982444763, 0.0001459270715713501, 0.00015512481331825256, 0.00016432255506515503, 0.0001735202968120575, 0.00018271803855895996, 0.00019191578030586243, 0.0002011135220527649, 0.00021031126379966736, 0.00021950900554656982, 0.0002287067472934723, 0.00023790448904037476, 0.0002471022307872772, 0.0002562999725341797]}, "gradients/encoder.encoder.layers.9.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0, 3.0, 0.0, 5.0, 7.0, 7.0, 12.0, 18.0, 14.0, 42.0, 50.0, 94.0, 157.0, 302.0, 686.0, 2013.0, 9640.0, 130107.0, 836979.0, 59710.0, 6067.0, 1498.0, 532.0, 217.0, 153.0, 88.0, 57.0, 31.0, 19.0, 12.0, 16.0, 5.0, 3.0, 4.0, 2.0, 1.0, 3.0, 4.0, 3.0, 2.0, 1.0, 1.0, 1.0, 2.0], "bins": [-3.919921875, -3.817779541015625, -3.71563720703125, -3.613494873046875, -3.5113525390625, -3.409210205078125, -3.30706787109375, -3.204925537109375, -3.102783203125, -3.000640869140625, -2.89849853515625, -2.796356201171875, -2.6942138671875, -2.592071533203125, -2.48992919921875, -2.387786865234375, -2.28564453125, -2.183502197265625, -2.08135986328125, -1.979217529296875, -1.8770751953125, -1.774932861328125, -1.67279052734375, -1.570648193359375, -1.468505859375, -1.366363525390625, -1.26422119140625, -1.162078857421875, -1.0599365234375, -0.957794189453125, -0.85565185546875, -0.753509521484375, -0.6513671875, -0.549224853515625, -0.44708251953125, -0.344940185546875, -0.2427978515625, -0.140655517578125, -0.03851318359375, 0.063629150390625, 0.165771484375, 0.267913818359375, 0.37005615234375, 0.472198486328125, 0.5743408203125, 0.676483154296875, 0.77862548828125, 0.880767822265625, 0.98291015625, 1.085052490234375, 1.18719482421875, 1.289337158203125, 1.3914794921875, 1.493621826171875, 1.59576416015625, 1.697906494140625, 1.800048828125, 1.902191162109375, 2.00433349609375, 2.106475830078125, 2.2086181640625, 2.310760498046875, 2.41290283203125, 2.515045166015625, 2.6171875]}, "gradients/encoder.encoder.layers.9.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 2.0, 5.0, 6.0, 11.0, 20.0, 24.0, 33.0, 55.0, 87.0, 129.0, 161.0, 154.0, 112.0, 78.0, 43.0, 34.0, 27.0, 15.0, 5.0, 3.0, 3.0, 2.0, 0.0, 2.0, 3.0, 1.0, 0.0, 0.0, 1.0], "bins": [-5.64453125, -5.521270751953125, -5.39801025390625, -5.274749755859375, -5.1514892578125, -5.028228759765625, -4.90496826171875, -4.781707763671875, -4.658447265625, -4.535186767578125, -4.41192626953125, -4.288665771484375, -4.1654052734375, -4.042144775390625, -3.91888427734375, -3.795623779296875, -3.67236328125, -3.549102783203125, -3.42584228515625, -3.302581787109375, -3.1793212890625, -3.056060791015625, -2.93280029296875, -2.809539794921875, -2.686279296875, -2.563018798828125, -2.43975830078125, -2.316497802734375, -2.1932373046875, -2.069976806640625, -1.94671630859375, -1.823455810546875, -1.7001953125, -1.576934814453125, -1.45367431640625, -1.330413818359375, -1.2071533203125, -1.083892822265625, -0.96063232421875, -0.837371826171875, -0.714111328125, -0.590850830078125, -0.46759033203125, -0.344329833984375, -0.2210693359375, -0.097808837890625, 0.02545166015625, 0.148712158203125, 0.27197265625, 0.395233154296875, 0.51849365234375, 0.641754150390625, 0.7650146484375, 0.888275146484375, 1.01153564453125, 1.134796142578125, 1.258056640625, 1.381317138671875, 1.50457763671875, 1.627838134765625, 1.7510986328125, 1.874359130859375, 1.99761962890625, 2.120880126953125, 2.244140625]}, "gradients/encoder.encoder.layers.9.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 4.0, 4.0, 15.0, 14.0, 35.0, 51.0, 88.0, 136.0, 146.0, 143.0, 144.0, 99.0, 53.0, 29.0, 23.0, 14.0, 4.0, 4.0, 3.0, 1.0, 0.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-61.3626708984375, -60.002540588378906, -58.64241027832031, -57.28227996826172, -55.922149658203125, -54.5620231628418, -53.2018928527832, -51.84176254272461, -50.481632232666016, -49.12150192260742, -47.76137161254883, -46.401241302490234, -45.041114807128906, -43.68098449707031, -42.32085418701172, -40.960723876953125, -39.60059356689453, -38.24046325683594, -36.880332946777344, -35.52020263671875, -34.160072326660156, -32.79994583129883, -31.439815521240234, -30.07968521118164, -28.719554901123047, -27.359424591064453, -25.99929428100586, -24.6391658782959, -23.279035568237305, -21.91890525817871, -20.55877685546875, -19.198646545410156, -17.838516235351562, -16.47838592529297, -15.118256568908691, -13.758127212524414, -12.39799690246582, -11.037866592407227, -9.67773723602295, -8.317607879638672, -6.957477569580078, -5.597347736358643, -4.237217903137207, -2.8770880699157715, -1.516958236694336, -0.1568284034729004, 1.2033014297485352, 2.5634307861328125, 3.9235610961914062, 5.283690929412842, 6.643820762634277, 8.003950119018555, 9.364080429077148, 10.724210739135742, 12.08434009552002, 13.444469451904297, 14.80459976196289, 16.164730072021484, 17.524860382080078, 18.88498878479004, 20.245119094848633, 21.605249404907227, 22.965377807617188, 24.32550811767578, 25.685638427734375]}, "gradients/encoder.encoder.layers.9.layer_norm.bias": {"_type": "histogram", "values": [1.0, 2.0, 3.0, 1.0, 4.0, 3.0, 4.0, 1.0, 6.0, 3.0, 6.0, 7.0, 4.0, 9.0, 15.0, 9.0, 15.0, 22.0, 17.0, 21.0, 18.0, 27.0, 24.0, 33.0, 39.0, 42.0, 44.0, 50.0, 50.0, 49.0, 55.0, 37.0, 44.0, 35.0, 36.0, 38.0, 30.0, 27.0, 30.0, 24.0, 22.0, 17.0, 15.0, 15.0, 13.0, 8.0, 7.0, 4.0, 6.0, 7.0, 6.0, 2.0, 5.0, 3.0, 4.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0], "bins": [-25.166027069091797, -24.3118953704834, -23.457763671875, -22.6036319732666, -21.749500274658203, -20.895366668701172, -20.041234970092773, -19.187103271484375, -18.332971572875977, -17.478839874267578, -16.62470817565918, -15.770575523376465, -14.916443824768066, -14.062312126159668, -13.208179473876953, -12.354047775268555, -11.499916076660156, -10.645784378051758, -9.79165267944336, -8.937520027160645, -8.083388328552246, -7.229256629943848, -6.375124454498291, -5.520992279052734, -4.666860580444336, -3.8127286434173584, -2.958596706390381, -2.1044647693634033, -1.2503328323364258, -0.39620113372802734, 0.4579310417175293, 1.312063217163086, 2.1661930084228516, 3.020324945449829, 3.8744568824768066, 4.728589057922363, 5.582720756530762, 6.43685245513916, 7.290984630584717, 8.145116806030273, 8.999248504638672, 9.85338020324707, 10.707511901855469, 11.561644554138184, 12.415776252746582, 13.26990795135498, 14.124040603637695, 14.978172302246094, 15.832304000854492, 16.68643569946289, 17.54056739807129, 18.394699096679688, 19.24883270263672, 20.102962493896484, 20.957096099853516, 21.811227798461914, 22.665359497070312, 23.51949119567871, 24.37362289428711, 25.227754592895508, 26.081886291503906, 26.936019897460938, 27.790151596069336, 28.644283294677734, 29.498414993286133]}, "gradients/encoder.encoder.layers.8.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 1.0, 3.0, 2.0, 5.0, 7.0, 7.0, 5.0, 8.0, 9.0, 15.0, 19.0, 26.0, 35.0, 51.0, 76.0, 118.0, 195.0, 328.0, 622.0, 1301.0, 3325.0, 11223.0, 3986869.0, 174877.0, 9544.0, 3031.0, 1296.0, 596.0, 279.0, 174.0, 82.0, 61.0, 39.0, 19.0, 12.0, 14.0, 7.0, 5.0, 6.0, 0.0, 0.0, 1.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.98828125, -6.7823486328125, -6.576416015625, -6.3704833984375, -6.16455078125, -5.9586181640625, -5.752685546875, -5.5467529296875, -5.3408203125, -5.1348876953125, -4.928955078125, -4.7230224609375, -4.51708984375, -4.3111572265625, -4.105224609375, -3.8992919921875, -3.693359375, -3.4874267578125, -3.281494140625, -3.0755615234375, -2.86962890625, -2.6636962890625, -2.457763671875, -2.2518310546875, -2.0458984375, -1.8399658203125, -1.634033203125, -1.4281005859375, -1.22216796875, -1.0162353515625, -0.810302734375, -0.6043701171875, -0.3984375, -0.1925048828125, 0.013427734375, 0.2193603515625, 0.42529296875, 0.6312255859375, 0.837158203125, 1.0430908203125, 1.2490234375, 1.4549560546875, 1.660888671875, 1.8668212890625, 2.07275390625, 2.2786865234375, 2.484619140625, 2.6905517578125, 2.896484375, 3.1024169921875, 3.308349609375, 3.5142822265625, 3.72021484375, 3.9261474609375, 4.132080078125, 4.3380126953125, 4.5439453125, 4.7498779296875, 4.955810546875, 5.1617431640625, 5.36767578125, 5.5736083984375, 5.779541015625, 5.9854736328125, 6.19140625]}, "gradients/encoder.encoder.layers.8.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 3.0, 2.0, 2.0, 3.0, 6.0, 11.0, 9.0, 13.0, 28.0, 30.0, 40.0, 54.0, 76.0, 91.0, 89.0, 90.0, 101.0, 83.0, 74.0, 54.0, 41.0, 30.0, 24.0, 11.0, 13.0, 8.0, 10.0, 6.0, 3.0, 4.0, 3.0, 3.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.876953125, -2.798309326171875, -2.71966552734375, -2.641021728515625, -2.5623779296875, -2.483734130859375, -2.40509033203125, -2.326446533203125, -2.247802734375, -2.169158935546875, -2.09051513671875, -2.011871337890625, -1.9332275390625, -1.854583740234375, -1.77593994140625, -1.697296142578125, -1.61865234375, -1.540008544921875, -1.46136474609375, -1.382720947265625, -1.3040771484375, -1.225433349609375, -1.14678955078125, -1.068145751953125, -0.989501953125, -0.910858154296875, -0.83221435546875, -0.753570556640625, -0.6749267578125, -0.596282958984375, -0.51763916015625, -0.438995361328125, -0.3603515625, -0.281707763671875, -0.20306396484375, -0.124420166015625, -0.0457763671875, 0.032867431640625, 0.11151123046875, 0.190155029296875, 0.268798828125, 0.347442626953125, 0.42608642578125, 0.504730224609375, 0.5833740234375, 0.662017822265625, 0.74066162109375, 0.819305419921875, 0.89794921875, 0.976593017578125, 1.05523681640625, 1.133880615234375, 1.2125244140625, 1.291168212890625, 1.36981201171875, 1.448455810546875, 1.527099609375, 1.605743408203125, 1.68438720703125, 1.763031005859375, 1.8416748046875, 1.920318603515625, 1.99896240234375, 2.077606201171875, 2.15625]}, "gradients/encoder.encoder.layers.8.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [3.0, 0.0, 1.0, 1.0, 2.0, 2.0, 3.0, 1.0, 4.0, 5.0, 9.0, 10.0, 17.0, 18.0, 46.0, 57.0, 80.0, 97.0, 136.0, 206.0, 285.0, 419.0, 658.0, 1031.0, 1733.0, 3210.0, 6236.0, 14456.0, 67636.0, 4013651.0, 57105.0, 13449.0, 6031.0, 2964.0, 1705.0, 1012.0, 629.0, 443.0, 283.0, 204.0, 132.0, 90.0, 71.0, 36.0, 33.0, 37.0, 18.0, 12.0, 5.0, 5.0, 3.0, 9.0, 3.0, 1.0, 3.0, 2.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0], "bins": [-4.0390625, -3.9019775390625, -3.764892578125, -3.6278076171875, -3.49072265625, -3.3536376953125, -3.216552734375, -3.0794677734375, -2.9423828125, -2.8052978515625, -2.668212890625, -2.5311279296875, -2.39404296875, -2.2569580078125, -2.119873046875, -1.9827880859375, -1.845703125, -1.7086181640625, -1.571533203125, -1.4344482421875, -1.29736328125, -1.1602783203125, -1.023193359375, -0.8861083984375, -0.7490234375, -0.6119384765625, -0.474853515625, -0.3377685546875, -0.20068359375, -0.0635986328125, 0.073486328125, 0.2105712890625, 0.34765625, 0.4847412109375, 0.621826171875, 0.7589111328125, 0.89599609375, 1.0330810546875, 1.170166015625, 1.3072509765625, 1.4443359375, 1.5814208984375, 1.718505859375, 1.8555908203125, 1.99267578125, 2.1297607421875, 2.266845703125, 2.4039306640625, 2.541015625, 2.6781005859375, 2.815185546875, 2.9522705078125, 3.08935546875, 3.2264404296875, 3.363525390625, 3.5006103515625, 3.6376953125, 3.7747802734375, 3.911865234375, 4.0489501953125, 4.18603515625, 4.3231201171875, 4.460205078125, 4.5972900390625, 4.734375]}, "gradients/encoder.encoder.layers.8.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 8.0, 3.0, 6.0, 3.0, 9.0, 8.0, 20.0, 38.0, 98.0, 3479.0, 258.0, 51.0, 32.0, 16.0, 15.0, 9.0, 10.0, 4.0, 1.0, 4.0, 3.0, 1.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.1328125, -2.049957275390625, -1.96710205078125, -1.884246826171875, -1.8013916015625, -1.718536376953125, -1.63568115234375, -1.552825927734375, -1.469970703125, -1.387115478515625, -1.30426025390625, -1.221405029296875, -1.1385498046875, -1.055694580078125, -0.97283935546875, -0.889984130859375, -0.80712890625, -0.724273681640625, -0.64141845703125, -0.558563232421875, -0.4757080078125, -0.392852783203125, -0.30999755859375, -0.227142333984375, -0.144287109375, -0.061431884765625, 0.02142333984375, 0.104278564453125, 0.1871337890625, 0.269989013671875, 0.35284423828125, 0.435699462890625, 0.5185546875, 0.601409912109375, 0.68426513671875, 0.767120361328125, 0.8499755859375, 0.932830810546875, 1.01568603515625, 1.098541259765625, 1.181396484375, 1.264251708984375, 1.34710693359375, 1.429962158203125, 1.5128173828125, 1.595672607421875, 1.67852783203125, 1.761383056640625, 1.84423828125, 1.927093505859375, 2.00994873046875, 2.092803955078125, 2.1756591796875, 2.258514404296875, 2.34136962890625, 2.424224853515625, 2.507080078125, 2.589935302734375, 2.67279052734375, 2.755645751953125, 2.8385009765625, 2.921356201171875, 3.00421142578125, 3.087066650390625, 3.169921875]}, "gradients/encoder.encoder.layers.8.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 3.0, 1.0, 1.0, 3.0, 1.0, 7.0, 12.0, 26.0, 28.0, 63.0, 106.0, 146.0, 161.0, 149.0, 128.0, 70.0, 32.0, 28.0, 15.0, 8.0, 5.0, 4.0, 5.0, 3.0, 2.0, 2.0, 2.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-6.927201271057129, -6.6313910484313965, -6.335580348968506, -6.039770126342773, -5.743959426879883, -5.44814920425415, -5.152338981628418, -4.856528282165527, -4.560718059539795, -4.2649078369140625, -3.969097137451172, -3.6732869148254395, -3.377476453781128, -3.0816659927368164, -2.785855770111084, -2.4900453090667725, -2.194234848022461, -1.8984243869781494, -1.6026140451431274, -1.3068037033081055, -1.010993242263794, -0.7151827812194824, -0.41937243938446045, -0.12356209754943848, 0.17224836349487305, 0.4680587649345398, 0.7638691663742065, 1.0596795082092285, 1.35548996925354, 1.6513004302978516, 1.9471107721328735, 2.2429211139678955, 2.5387325286865234, 2.834542989730835, 3.1303534507751465, 3.426163673400879, 3.7219741344451904, 4.017784595489502, 4.313594818115234, 4.609405517578125, 4.905215740203857, 5.20102596282959, 5.4968366622924805, 5.792646884918213, 6.088457107543945, 6.384267807006836, 6.680078029632568, 6.975888252258301, 7.271698951721191, 7.567509174346924, 7.8633198738098145, 8.159130096435547, 8.454940795898438, 8.750751495361328, 9.046561241149902, 9.342371940612793, 9.638181686401367, 9.933992385864258, 10.229802131652832, 10.525612831115723, 10.821423530578613, 11.117233276367188, 11.413043975830078, 11.708854675292969, 12.00466537475586]}, "gradients/encoder.encoder.layers.8.final_layer_norm.bias": {"_type": "histogram", "values": [2.0, 1.0, 3.0, 0.0, 1.0, 0.0, 3.0, 1.0, 9.0, 7.0, 7.0, 8.0, 14.0, 12.0, 16.0, 17.0, 17.0, 27.0, 33.0, 37.0, 45.0, 53.0, 40.0, 45.0, 62.0, 64.0, 60.0, 50.0, 47.0, 57.0, 48.0, 38.0, 29.0, 29.0, 20.0, 30.0, 20.0, 12.0, 16.0, 4.0, 8.0, 4.0, 7.0, 3.0, 5.0, 5.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.17314338684082, -4.01095724105835, -3.8487708568573, -3.686584711074829, -3.5243985652923584, -3.3622121810913086, -3.200026035308838, -3.037839889526367, -2.8756537437438965, -2.713467597961426, -2.551281213760376, -2.3890950679779053, -2.2269089221954346, -2.0647225379943848, -1.902536392211914, -1.7403502464294434, -1.5781638622283936, -1.4159775972366333, -1.2537914514541626, -1.0916051864624023, -0.9294189810752869, -0.7672327756881714, -0.6050465106964111, -0.44286036491394043, -0.2806740999221802, -0.1184878796339035, 0.04369834065437317, 0.20588457584381104, 0.3680707812309265, 0.530256986618042, 0.6924432516098022, 0.854629397392273, 1.0168156623840332, 1.1790019273757935, 1.3411880731582642, 1.5033743381500244, 1.6655604839324951, 1.8277467489242554, 1.9899330139160156, 2.1521191596984863, 2.314305305480957, 2.4764914512634277, 2.6386778354644775, 2.8008639812469482, 2.963050127029419, 3.1252365112304688, 3.2874226570129395, 3.44960880279541, 3.61179518699646, 3.7739813327789307, 3.9361677169799805, 4.098353862762451, 4.260540008544922, 4.422726154327393, 4.584912300109863, 4.747098922729492, 4.909285068511963, 5.071471214294434, 5.233657360076904, 5.395843505859375, 5.558030128479004, 5.720216274261475, 5.882402420043945, 6.044588565826416, 6.206774711608887]}, "gradients/encoder.encoder.layers.8.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 0.0, 2.0, 2.0, 0.0, 1.0, 4.0, 4.0, 10.0, 13.0, 16.0, 19.0, 68.0, 79.0, 155.0, 292.0, 572.0, 1298.0, 3350.0, 11206.0, 50079.0, 302517.0, 544102.0, 105466.0, 20244.0, 5467.0, 1903.0, 811.0, 382.0, 236.0, 100.0, 62.0, 36.0, 25.0, 15.0, 9.0, 4.0, 5.0, 4.0, 0.0, 3.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.19921875, -5.03265380859375, -4.8660888671875, -4.69952392578125, -4.532958984375, -4.36639404296875, -4.1998291015625, -4.03326416015625, -3.86669921875, -3.70013427734375, -3.5335693359375, -3.36700439453125, -3.200439453125, -3.03387451171875, -2.8673095703125, -2.70074462890625, -2.5341796875, -2.36761474609375, -2.2010498046875, -2.03448486328125, -1.867919921875, -1.70135498046875, -1.5347900390625, -1.36822509765625, -1.20166015625, -1.03509521484375, -0.8685302734375, -0.70196533203125, -0.535400390625, -0.36883544921875, -0.2022705078125, -0.03570556640625, 0.130859375, 0.29742431640625, 0.4639892578125, 0.63055419921875, 0.797119140625, 0.96368408203125, 1.1302490234375, 1.29681396484375, 1.46337890625, 1.62994384765625, 1.7965087890625, 1.96307373046875, 2.129638671875, 2.29620361328125, 2.4627685546875, 2.62933349609375, 2.7958984375, 2.96246337890625, 3.1290283203125, 3.29559326171875, 3.462158203125, 3.62872314453125, 3.7952880859375, 3.96185302734375, 4.12841796875, 4.29498291015625, 4.4615478515625, 4.62811279296875, 4.794677734375, 4.96124267578125, 5.1278076171875, 5.29437255859375, 5.4609375]}, "gradients/encoder.encoder.layers.8.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 3.0, 2.0, 2.0, 8.0, 15.0, 10.0, 11.0, 24.0, 25.0, 42.0, 40.0, 69.0, 89.0, 85.0, 104.0, 87.0, 85.0, 75.0, 54.0, 50.0, 35.0, 27.0, 14.0, 16.0, 9.0, 11.0, 8.0, 5.0, 5.0, 2.0, 3.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0], "bins": [-3.248046875, -3.166839599609375, -3.08563232421875, -3.004425048828125, -2.9232177734375, -2.842010498046875, -2.76080322265625, -2.679595947265625, -2.598388671875, -2.517181396484375, -2.43597412109375, -2.354766845703125, -2.2735595703125, -2.192352294921875, -2.11114501953125, -2.029937744140625, -1.94873046875, -1.867523193359375, -1.78631591796875, -1.705108642578125, -1.6239013671875, -1.542694091796875, -1.46148681640625, -1.380279541015625, -1.299072265625, -1.217864990234375, -1.13665771484375, -1.055450439453125, -0.9742431640625, -0.893035888671875, -0.81182861328125, -0.730621337890625, -0.6494140625, -0.568206787109375, -0.48699951171875, -0.405792236328125, -0.3245849609375, -0.243377685546875, -0.16217041015625, -0.080963134765625, 0.000244140625, 0.081451416015625, 0.16265869140625, 0.243865966796875, 0.3250732421875, 0.406280517578125, 0.48748779296875, 0.568695068359375, 0.64990234375, 0.731109619140625, 0.81231689453125, 0.893524169921875, 0.9747314453125, 1.055938720703125, 1.13714599609375, 1.218353271484375, 1.299560546875, 1.380767822265625, 1.46197509765625, 1.543182373046875, 1.6243896484375, 1.705596923828125, 1.78680419921875, 1.868011474609375, 1.94921875]}, "gradients/encoder.encoder.layers.8.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 5.0, 8.0, 5.0, 9.0, 20.0, 35.0, 35.0, 64.0, 82.0, 182.0, 320.0, 801.0, 1764.0, 6091.0, 31543.0, 320423.0, 611255.0, 61617.0, 9773.0, 2627.0, 912.0, 446.0, 232.0, 98.0, 76.0, 31.0, 33.0, 25.0, 13.0, 14.0, 10.0, 6.0, 4.0, 3.0, 1.0, 3.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-6.2578125, -6.043212890625, -5.82861328125, -5.614013671875, -5.3994140625, -5.184814453125, -4.97021484375, -4.755615234375, -4.541015625, -4.326416015625, -4.11181640625, -3.897216796875, -3.6826171875, -3.468017578125, -3.25341796875, -3.038818359375, -2.82421875, -2.609619140625, -2.39501953125, -2.180419921875, -1.9658203125, -1.751220703125, -1.53662109375, -1.322021484375, -1.107421875, -0.892822265625, -0.67822265625, -0.463623046875, -0.2490234375, -0.034423828125, 0.18017578125, 0.394775390625, 0.609375, 0.823974609375, 1.03857421875, 1.253173828125, 1.4677734375, 1.682373046875, 1.89697265625, 2.111572265625, 2.326171875, 2.540771484375, 2.75537109375, 2.969970703125, 3.1845703125, 3.399169921875, 3.61376953125, 3.828369140625, 4.04296875, 4.257568359375, 4.47216796875, 4.686767578125, 4.9013671875, 5.115966796875, 5.33056640625, 5.545166015625, 5.759765625, 5.974365234375, 6.18896484375, 6.403564453125, 6.6181640625, 6.832763671875, 7.04736328125, 7.261962890625, 7.4765625]}, "gradients/encoder.encoder.layers.8.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 3.0, 0.0, 0.0, 1.0, 0.0, 5.0, 7.0, 3.0, 7.0, 15.0, 15.0, 20.0, 20.0, 35.0, 34.0, 37.0, 59.0, 63.0, 61.0, 57.0, 80.0, 84.0, 68.0, 68.0, 65.0, 42.0, 42.0, 35.0, 26.0, 14.0, 11.0, 15.0, 5.0, 4.0, 6.0, 4.0, 1.0, 2.0, 2.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-9.4609375, -9.1510009765625, -8.841064453125, -8.5311279296875, -8.22119140625, -7.9112548828125, -7.601318359375, -7.2913818359375, -6.9814453125, -6.6715087890625, -6.361572265625, -6.0516357421875, -5.74169921875, -5.4317626953125, -5.121826171875, -4.8118896484375, -4.501953125, -4.1920166015625, -3.882080078125, -3.5721435546875, -3.26220703125, -2.9522705078125, -2.642333984375, -2.3323974609375, -2.0224609375, -1.7125244140625, -1.402587890625, -1.0926513671875, -0.78271484375, -0.4727783203125, -0.162841796875, 0.1470947265625, 0.45703125, 0.7669677734375, 1.076904296875, 1.3868408203125, 1.69677734375, 2.0067138671875, 2.316650390625, 2.6265869140625, 2.9365234375, 3.2464599609375, 3.556396484375, 3.8663330078125, 4.17626953125, 4.4862060546875, 4.796142578125, 5.1060791015625, 5.416015625, 5.7259521484375, 6.035888671875, 6.3458251953125, 6.65576171875, 6.9656982421875, 7.275634765625, 7.5855712890625, 7.8955078125, 8.2054443359375, 8.515380859375, 8.8253173828125, 9.13525390625, 9.4451904296875, 9.755126953125, 10.0650634765625, 10.375]}, "gradients/encoder.encoder.layers.8.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 1.0, 2.0, 3.0, 2.0, 3.0, 3.0, 6.0, 10.0, 6.0, 8.0, 11.0, 14.0, 23.0, 22.0, 44.0, 69.0, 100.0, 202.0, 361.0, 859.0, 2760.0, 13867.0, 211604.0, 767637.0, 42835.0, 5330.0, 1500.0, 588.0, 281.0, 151.0, 92.0, 68.0, 34.0, 20.0, 18.0, 10.0, 6.0, 5.0, 4.0, 4.0, 1.0, 1.0, 0.0, 0.0, 3.0, 0.0, 0.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.34765625, -2.261474609375, -2.17529296875, -2.089111328125, -2.0029296875, -1.916748046875, -1.83056640625, -1.744384765625, -1.658203125, -1.572021484375, -1.48583984375, -1.399658203125, -1.3134765625, -1.227294921875, -1.14111328125, -1.054931640625, -0.96875, -0.882568359375, -0.79638671875, -0.710205078125, -0.6240234375, -0.537841796875, -0.45166015625, -0.365478515625, -0.279296875, -0.193115234375, -0.10693359375, -0.020751953125, 0.0654296875, 0.151611328125, 0.23779296875, 0.323974609375, 0.41015625, 0.496337890625, 0.58251953125, 0.668701171875, 0.7548828125, 0.841064453125, 0.92724609375, 1.013427734375, 1.099609375, 1.185791015625, 1.27197265625, 1.358154296875, 1.4443359375, 1.530517578125, 1.61669921875, 1.702880859375, 1.7890625, 1.875244140625, 1.96142578125, 2.047607421875, 2.1337890625, 2.219970703125, 2.30615234375, 2.392333984375, 2.478515625, 2.564697265625, 2.65087890625, 2.737060546875, 2.8232421875, 2.909423828125, 2.99560546875, 3.081787109375, 3.16796875]}, "gradients/encoder.encoder.layers.8.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 3.0, 3.0, 4.0, 9.0, 2.0, 6.0, 13.0, 9.0, 23.0, 30.0, 46.0, 69.0, 75.0, 111.0, 100.0, 138.0, 98.0, 72.0, 50.0, 44.0, 26.0, 22.0, 20.0, 12.0, 8.0, 6.0, 4.0, 4.0, 0.0, 5.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.000278472900390625, -0.00026776641607284546, -0.0002570599317550659, -0.0002463534474372864, -0.00023564696311950684, -0.0002249404788017273, -0.00021423399448394775, -0.0002035275101661682, -0.00019282102584838867, -0.00018211454153060913, -0.0001714080572128296, -0.00016070157289505005, -0.0001499950885772705, -0.00013928860425949097, -0.00012858211994171143, -0.00011787563562393188, -0.00010716915130615234, -9.64626669883728e-05, -8.575618267059326e-05, -7.504969835281372e-05, -6.434321403503418e-05, -5.363672971725464e-05, -4.29302453994751e-05, -3.222376108169556e-05, -2.1517276763916016e-05, -1.0810792446136475e-05, -1.043081283569336e-07, 1.0602176189422607e-05, 2.130866050720215e-05, 3.201514482498169e-05, 4.272162914276123e-05, 5.342811346054077e-05, 6.413459777832031e-05, 7.484108209609985e-05, 8.55475664138794e-05, 9.625405073165894e-05, 0.00010696053504943848, 0.00011766701936721802, 0.00012837350368499756, 0.0001390799880027771, 0.00014978647232055664, 0.00016049295663833618, 0.00017119944095611572, 0.00018190592527389526, 0.0001926124095916748, 0.00020331889390945435, 0.0002140253782272339, 0.00022473186254501343, 0.00023543834686279297, 0.0002461448311805725, 0.00025685131549835205, 0.0002675577998161316, 0.00027826428413391113, 0.0002889707684516907, 0.0002996772527694702, 0.00031038373708724976, 0.0003210902214050293, 0.00033179670572280884, 0.0003425031900405884, 0.0003532096743583679, 0.00036391615867614746, 0.000374622642993927, 0.00038532912731170654, 0.0003960356116294861, 0.0004067420959472656]}, "gradients/encoder.encoder.layers.8.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 3.0, 1.0, 5.0, 1.0, 3.0, 3.0, 6.0, 8.0, 15.0, 22.0, 27.0, 59.0, 84.0, 186.0, 337.0, 680.0, 1822.0, 6754.0, 50257.0, 745389.0, 221314.0, 16083.0, 3376.0, 1078.0, 482.0, 255.0, 133.0, 56.0, 50.0, 26.0, 19.0, 4.0, 6.0, 6.0, 3.0, 6.0, 0.0, 2.0, 1.0, 3.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 1.0], "bins": [-2.14453125, -2.064361572265625, -1.98419189453125, -1.904022216796875, -1.8238525390625, -1.743682861328125, -1.66351318359375, -1.583343505859375, -1.503173828125, -1.423004150390625, -1.34283447265625, -1.262664794921875, -1.1824951171875, -1.102325439453125, -1.02215576171875, -0.941986083984375, -0.86181640625, -0.781646728515625, -0.70147705078125, -0.621307373046875, -0.5411376953125, -0.460968017578125, -0.38079833984375, -0.300628662109375, -0.220458984375, -0.140289306640625, -0.06011962890625, 0.020050048828125, 0.1002197265625, 0.180389404296875, 0.26055908203125, 0.340728759765625, 0.4208984375, 0.501068115234375, 0.58123779296875, 0.661407470703125, 0.7415771484375, 0.821746826171875, 0.90191650390625, 0.982086181640625, 1.062255859375, 1.142425537109375, 1.22259521484375, 1.302764892578125, 1.3829345703125, 1.463104248046875, 1.54327392578125, 1.623443603515625, 1.70361328125, 1.783782958984375, 1.86395263671875, 1.944122314453125, 2.0242919921875, 2.104461669921875, 2.18463134765625, 2.264801025390625, 2.344970703125, 2.425140380859375, 2.50531005859375, 2.585479736328125, 2.6656494140625, 2.745819091796875, 2.82598876953125, 2.906158447265625, 2.986328125]}, "gradients/encoder.encoder.layers.8.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 1.0, 2.0, 0.0, 10.0, 6.0, 14.0, 27.0, 51.0, 92.0, 109.0, 166.0, 150.0, 163.0, 84.0, 56.0, 34.0, 23.0, 5.0, 9.0, 4.0, 3.0, 0.0, 1.0, 1.0, 3.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.37109375, -3.255859375, -3.140625, -3.025390625, -2.91015625, -2.794921875, -2.6796875, -2.564453125, -2.44921875, -2.333984375, -2.21875, -2.103515625, -1.98828125, -1.873046875, -1.7578125, -1.642578125, -1.52734375, -1.412109375, -1.296875, -1.181640625, -1.06640625, -0.951171875, -0.8359375, -0.720703125, -0.60546875, -0.490234375, -0.375, -0.259765625, -0.14453125, -0.029296875, 0.0859375, 0.201171875, 0.31640625, 0.431640625, 0.546875, 0.662109375, 0.77734375, 0.892578125, 1.0078125, 1.123046875, 1.23828125, 1.353515625, 1.46875, 1.583984375, 1.69921875, 1.814453125, 1.9296875, 2.044921875, 2.16015625, 2.275390625, 2.390625, 2.505859375, 2.62109375, 2.736328125, 2.8515625, 2.966796875, 3.08203125, 3.197265625, 3.3125, 3.427734375, 3.54296875, 3.658203125, 3.7734375, 3.888671875, 4.00390625]}, "gradients/encoder.encoder.layers.8.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 4.0, 3.0, 7.0, 16.0, 35.0, 58.0, 134.0, 189.0, 222.0, 160.0, 97.0, 37.0, 20.0, 18.0, 5.0, 2.0, 1.0, 0.0, 3.0, 0.0, 2.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-48.40748596191406, -46.488380432128906, -44.56927490234375, -42.65017318725586, -40.7310676574707, -38.81196212768555, -36.892860412597656, -34.9737548828125, -33.054649353027344, -31.135543823242188, -29.216440200805664, -27.29733657836914, -25.378231048583984, -23.459125518798828, -21.540021896362305, -19.62091827392578, -17.701812744140625, -15.782708168029785, -13.863603591918945, -11.944499015808105, -10.025394439697266, -8.106289863586426, -6.187185287475586, -4.268080711364746, -2.3489761352539062, -0.4298715591430664, 1.4892330169677734, 3.4083375930786133, 5.327442169189453, 7.246546745300293, 9.165651321411133, 11.084755897521973, 13.003860473632812, 14.922965049743652, 16.842069625854492, 18.761173248291016, 20.680278778076172, 22.599384307861328, 24.51848793029785, 26.437591552734375, 28.35669708251953, 30.275802612304688, 32.194908142089844, 34.114009857177734, 36.03311538696289, 37.95222091674805, 39.87132263183594, 41.790428161621094, 43.70953369140625, 45.628639221191406, 47.54774475097656, 49.46684646606445, 51.38595199584961, 53.305057525634766, 55.224159240722656, 57.14326477050781, 59.06237030029297, 60.981475830078125, 62.90058135986328, 64.81968688964844, 66.73878479003906, 68.65789031982422, 70.57699584960938, 72.49610137939453, 74.41520690917969]}, "gradients/encoder.encoder.layers.8.layer_norm.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 2.0, 3.0, 2.0, 3.0, 4.0, 3.0, 5.0, 11.0, 11.0, 16.0, 20.0, 28.0, 34.0, 45.0, 53.0, 58.0, 75.0, 66.0, 86.0, 91.0, 77.0, 77.0, 49.0, 42.0, 42.0, 31.0, 21.0, 21.0, 14.0, 8.0, 8.0, 3.0, 2.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-38.65980529785156, -37.1434440612793, -35.6270866394043, -34.11072540283203, -32.594364166259766, -31.078004837036133, -29.5616455078125, -28.045284271240234, -26.5289249420166, -25.01256561279297, -23.496204376220703, -21.97984504699707, -20.463485717773438, -18.947124481201172, -17.43076515197754, -15.91440486907959, -14.39804458618164, -12.881684303283691, -11.365324020385742, -9.84896469116211, -8.33260440826416, -6.816244125366211, -5.299884796142578, -3.783524513244629, -2.2671642303466797, -0.7508041858673096, 0.7655558586120605, 2.2819156646728516, 3.798275947570801, 5.31463623046875, 6.830995559692383, 8.347355842590332, 9.863712310791016, 11.380072593688965, 12.896432876586914, 14.412792205810547, 15.929152488708496, 17.445512771606445, 18.961872100830078, 20.478233337402344, 21.994592666625977, 23.51095199584961, 25.027313232421875, 26.543672561645508, 28.06003189086914, 29.576393127441406, 31.09275245666504, 32.60911178588867, 34.12547302246094, 35.6418342590332, 37.1581916809082, 38.67455291748047, 40.190914154052734, 41.707275390625, 43.2236328125, 44.739994049072266, 46.25635528564453, 47.7727165222168, 49.2890739440918, 50.80543518066406, 52.32179641723633, 53.838157653808594, 55.354515075683594, 56.87087631225586, 58.38723373413086]}, "gradients/encoder.encoder.layers.7.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 2.0, 2.0, 0.0, 2.0, 0.0, 7.0, 10.0, 9.0, 32.0, 68.0, 115.0, 276.0, 675.0, 1941.0, 6278.0, 28473.0, 534300.0, 3558239.0, 50355.0, 9134.0, 2586.0, 899.0, 379.0, 188.0, 94.0, 60.0, 36.0, 29.0, 19.0, 11.0, 12.0, 9.0, 4.0, 9.0, 5.0, 5.0, 5.0, 3.0, 1.0, 6.0, 4.0, 1.0, 0.0, 1.0, 4.0, 0.0, 4.0, 1.0, 3.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-3.71875, -3.5343017578125, -3.349853515625, -3.1654052734375, -2.98095703125, -2.7965087890625, -2.612060546875, -2.4276123046875, -2.2431640625, -2.0587158203125, -1.874267578125, -1.6898193359375, -1.50537109375, -1.3209228515625, -1.136474609375, -0.9520263671875, -0.767578125, -0.5831298828125, -0.398681640625, -0.2142333984375, -0.02978515625, 0.1546630859375, 0.339111328125, 0.5235595703125, 0.7080078125, 0.8924560546875, 1.076904296875, 1.2613525390625, 1.44580078125, 1.6302490234375, 1.814697265625, 1.9991455078125, 2.18359375, 2.3680419921875, 2.552490234375, 2.7369384765625, 2.92138671875, 3.1058349609375, 3.290283203125, 3.4747314453125, 3.6591796875, 3.8436279296875, 4.028076171875, 4.2125244140625, 4.39697265625, 4.5814208984375, 4.765869140625, 4.9503173828125, 5.134765625, 5.3192138671875, 5.503662109375, 5.6881103515625, 5.87255859375, 6.0570068359375, 6.241455078125, 6.4259033203125, 6.6103515625, 6.7947998046875, 6.979248046875, 7.1636962890625, 7.34814453125, 7.5325927734375, 7.717041015625, 7.9014892578125, 8.0859375]}, "gradients/encoder.encoder.layers.7.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 4.0, 6.0, 16.0, 30.0, 68.0, 116.0, 159.0, 176.0, 161.0, 114.0, 69.0, 51.0, 24.0, 11.0, 10.0, 5.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.552734375, -1.409210205078125, -1.26568603515625, -1.122161865234375, -0.9786376953125, -0.835113525390625, -0.69158935546875, -0.548065185546875, -0.404541015625, -0.261016845703125, -0.11749267578125, 0.026031494140625, 0.1695556640625, 0.313079833984375, 0.45660400390625, 0.600128173828125, 0.74365234375, 0.887176513671875, 1.03070068359375, 1.174224853515625, 1.3177490234375, 1.461273193359375, 1.60479736328125, 1.748321533203125, 1.891845703125, 2.035369873046875, 2.17889404296875, 2.322418212890625, 2.4659423828125, 2.609466552734375, 2.75299072265625, 2.896514892578125, 3.0400390625, 3.183563232421875, 3.32708740234375, 3.470611572265625, 3.6141357421875, 3.757659912109375, 3.90118408203125, 4.044708251953125, 4.188232421875, 4.331756591796875, 4.47528076171875, 4.618804931640625, 4.7623291015625, 4.905853271484375, 5.04937744140625, 5.192901611328125, 5.33642578125, 5.479949951171875, 5.62347412109375, 5.766998291015625, 5.9105224609375, 6.054046630859375, 6.19757080078125, 6.341094970703125, 6.484619140625, 6.628143310546875, 6.77166748046875, 6.915191650390625, 7.0587158203125, 7.202239990234375, 7.34576416015625, 7.489288330078125, 7.6328125]}, "gradients/encoder.encoder.layers.7.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 2.0, 0.0, 2.0, 0.0, 5.0, 4.0, 3.0, 4.0, 9.0, 12.0, 11.0, 15.0, 21.0, 37.0, 69.0, 102.0, 199.0, 296.0, 606.0, 1497.0, 3456.0, 9991.0, 38537.0, 294650.0, 3647932.0, 157911.0, 26650.0, 7270.0, 2636.0, 1134.0, 526.0, 306.0, 142.0, 83.0, 54.0, 39.0, 20.0, 22.0, 11.0, 9.0, 3.0, 4.0, 4.0, 3.0, 3.0, 3.0, 1.0, 0.0, 2.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-4.58984375, -4.443603515625, -4.29736328125, -4.151123046875, -4.0048828125, -3.858642578125, -3.71240234375, -3.566162109375, -3.419921875, -3.273681640625, -3.12744140625, -2.981201171875, -2.8349609375, -2.688720703125, -2.54248046875, -2.396240234375, -2.25, -2.103759765625, -1.95751953125, -1.811279296875, -1.6650390625, -1.518798828125, -1.37255859375, -1.226318359375, -1.080078125, -0.933837890625, -0.78759765625, -0.641357421875, -0.4951171875, -0.348876953125, -0.20263671875, -0.056396484375, 0.08984375, 0.236083984375, 0.38232421875, 0.528564453125, 0.6748046875, 0.821044921875, 0.96728515625, 1.113525390625, 1.259765625, 1.406005859375, 1.55224609375, 1.698486328125, 1.8447265625, 1.990966796875, 2.13720703125, 2.283447265625, 2.4296875, 2.575927734375, 2.72216796875, 2.868408203125, 3.0146484375, 3.160888671875, 3.30712890625, 3.453369140625, 3.599609375, 3.745849609375, 3.89208984375, 4.038330078125, 4.1845703125, 4.330810546875, 4.47705078125, 4.623291015625, 4.76953125]}, "gradients/encoder.encoder.layers.7.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 3.0, 1.0, 0.0, 2.0, 3.0, 2.0, 1.0, 2.0, 5.0, 3.0, 10.0, 7.0, 15.0, 20.0, 17.0, 24.0, 38.0, 58.0, 100.0, 162.0, 395.0, 1170.0, 1192.0, 434.0, 160.0, 93.0, 55.0, 38.0, 20.0, 19.0, 9.0, 7.0, 9.0, 4.0, 3.0, 1.0, 1.0, 3.0, 0.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.3046875, -4.1453857421875, -3.986083984375, -3.8267822265625, -3.66748046875, -3.5081787109375, -3.348876953125, -3.1895751953125, -3.0302734375, -2.8709716796875, -2.711669921875, -2.5523681640625, -2.39306640625, -2.2337646484375, -2.074462890625, -1.9151611328125, -1.755859375, -1.5965576171875, -1.437255859375, -1.2779541015625, -1.11865234375, -0.9593505859375, -0.800048828125, -0.6407470703125, -0.4814453125, -0.3221435546875, -0.162841796875, -0.0035400390625, 0.15576171875, 0.3150634765625, 0.474365234375, 0.6336669921875, 0.79296875, 0.9522705078125, 1.111572265625, 1.2708740234375, 1.43017578125, 1.5894775390625, 1.748779296875, 1.9080810546875, 2.0673828125, 2.2266845703125, 2.385986328125, 2.5452880859375, 2.70458984375, 2.8638916015625, 3.023193359375, 3.1824951171875, 3.341796875, 3.5010986328125, 3.660400390625, 3.8197021484375, 3.97900390625, 4.1383056640625, 4.297607421875, 4.4569091796875, 4.6162109375, 4.7755126953125, 4.934814453125, 5.0941162109375, 5.25341796875, 5.4127197265625, 5.572021484375, 5.7313232421875, 5.890625]}, "gradients/encoder.encoder.layers.7.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 2.0, 1.0, 3.0, 4.0, 3.0, 2.0, 16.0, 31.0, 80.0, 200.0, 313.0, 218.0, 90.0, 28.0, 10.0, 3.0, 6.0, 2.0, 1.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-99.85432434082031, -97.70513916015625, -95.55594635009766, -93.4067611694336, -91.25757598876953, -89.10838317871094, -86.95919799804688, -84.81001281738281, -82.66082000732422, -80.51163482666016, -78.36244201660156, -76.2132568359375, -74.06407165527344, -71.91487884521484, -69.76569366455078, -67.61650848388672, -65.46731567382812, -63.3181266784668, -61.168941497802734, -59.019752502441406, -56.87056350708008, -54.72137451171875, -52.57218933105469, -50.42300033569336, -48.27381896972656, -46.124629974365234, -43.97544479370117, -41.826255798339844, -39.677066802978516, -37.52787780761719, -35.378692626953125, -33.2295036315918, -31.08031463623047, -28.931127548217773, -26.781938552856445, -24.63275146484375, -22.483562469482422, -20.334375381469727, -18.18518829345703, -16.035999298095703, -13.886812210083008, -11.737624168395996, -9.588436126708984, -7.439249038696289, -5.290060997009277, -3.1408729553222656, -0.9916858673095703, 1.1575021743774414, 3.306690216064453, 5.455878257751465, 7.605065822601318, 9.754253387451172, 11.903441429138184, 14.052629470825195, 16.20181655883789, 18.35100555419922, 20.500192642211914, 22.64937973022461, 24.798568725585938, 26.947755813598633, 29.096942901611328, 31.246131896972656, 33.39531707763672, 35.54450607299805, 37.693695068359375]}, "gradients/encoder.encoder.layers.7.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 1.0, 0.0, 3.0, 7.0, 6.0, 8.0, 15.0, 26.0, 25.0, 22.0, 37.0, 54.0, 74.0, 73.0, 68.0, 77.0, 81.0, 67.0, 65.0, 71.0, 43.0, 51.0, 41.0, 22.0, 27.0, 17.0, 9.0, 11.0, 7.0, 2.0, 1.0, 1.0, 5.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-38.652130126953125, -37.7086181640625, -36.765106201171875, -35.82159423828125, -34.87808609008789, -33.934574127197266, -32.99106216430664, -32.047550201416016, -31.10403823852539, -30.160526275634766, -29.217016220092773, -28.27350425720215, -27.329992294311523, -26.38648223876953, -25.442970275878906, -24.49945831298828, -23.55594825744629, -22.612436294555664, -21.668926239013672, -20.725414276123047, -19.781902313232422, -18.838390350341797, -17.894880294799805, -16.95136833190918, -16.007858276367188, -15.064347267150879, -14.120835304260254, -13.177324295043945, -12.23381233215332, -11.290301322937012, -10.346790313720703, -9.403278350830078, -8.45976448059082, -7.5162529945373535, -6.572741508483887, -5.629230499267578, -4.685719013214111, -3.7422075271606445, -2.798696517944336, -1.8551850318908691, -0.9116735458374023, 0.0318378210067749, 0.9753491878509521, 1.9188604354858398, 2.8623719215393066, 3.8058834075927734, 4.749394416809082, 5.692905902862549, 6.636417388916016, 7.579928874969482, 8.52344036102295, 9.466951370239258, 10.410463333129883, 11.353974342346191, 12.2974853515625, 13.240997314453125, 14.184508323669434, 15.128019332885742, 16.071531295776367, 17.01504135131836, 17.958553314208984, 18.90206527709961, 19.845577239990234, 20.789087295532227, 21.73259925842285]}, "gradients/encoder.encoder.layers.7.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 2.0, 2.0, 1.0, 3.0, 7.0, 6.0, 1.0, 8.0, 8.0, 20.0, 15.0, 29.0, 45.0, 65.0, 144.0, 304.0, 615.0, 1471.0, 3717.0, 11556.0, 42219.0, 176336.0, 496541.0, 236642.0, 56280.0, 14728.0, 4639.0, 1689.0, 759.0, 336.0, 150.0, 83.0, 44.0, 33.0, 26.0, 19.0, 9.0, 6.0, 5.0, 3.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-3.783203125, -3.659576416015625, -3.53594970703125, -3.412322998046875, -3.2886962890625, -3.165069580078125, -3.04144287109375, -2.917816162109375, -2.794189453125, -2.670562744140625, -2.54693603515625, -2.423309326171875, -2.2996826171875, -2.176055908203125, -2.05242919921875, -1.928802490234375, -1.80517578125, -1.681549072265625, -1.55792236328125, -1.434295654296875, -1.3106689453125, -1.187042236328125, -1.06341552734375, -0.939788818359375, -0.816162109375, -0.692535400390625, -0.56890869140625, -0.445281982421875, -0.3216552734375, -0.198028564453125, -0.07440185546875, 0.049224853515625, 0.1728515625, 0.296478271484375, 0.42010498046875, 0.543731689453125, 0.6673583984375, 0.790985107421875, 0.91461181640625, 1.038238525390625, 1.161865234375, 1.285491943359375, 1.40911865234375, 1.532745361328125, 1.6563720703125, 1.779998779296875, 1.90362548828125, 2.027252197265625, 2.15087890625, 2.274505615234375, 2.39813232421875, 2.521759033203125, 2.6453857421875, 2.769012451171875, 2.89263916015625, 3.016265869140625, 3.139892578125, 3.263519287109375, 3.38714599609375, 3.510772705078125, 3.6343994140625, 3.758026123046875, 3.88165283203125, 4.005279541015625, 4.12890625]}, "gradients/encoder.encoder.layers.7.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 9.0, 16.0, 17.0, 21.0, 44.0, 69.0, 73.0, 88.0, 133.0, 122.0, 98.0, 89.0, 68.0, 55.0, 34.0, 36.0, 19.0, 12.0, 7.0, 3.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.84765625, -1.75726318359375, -1.6668701171875, -1.57647705078125, -1.486083984375, -1.39569091796875, -1.3052978515625, -1.21490478515625, -1.12451171875, -1.03411865234375, -0.9437255859375, -0.85333251953125, -0.762939453125, -0.67254638671875, -0.5821533203125, -0.49176025390625, -0.4013671875, -0.31097412109375, -0.2205810546875, -0.13018798828125, -0.039794921875, 0.05059814453125, 0.1409912109375, 0.23138427734375, 0.32177734375, 0.41217041015625, 0.5025634765625, 0.59295654296875, 0.683349609375, 0.77374267578125, 0.8641357421875, 0.95452880859375, 1.044921875, 1.13531494140625, 1.2257080078125, 1.31610107421875, 1.406494140625, 1.49688720703125, 1.5872802734375, 1.67767333984375, 1.76806640625, 1.85845947265625, 1.9488525390625, 2.03924560546875, 2.129638671875, 2.22003173828125, 2.3104248046875, 2.40081787109375, 2.4912109375, 2.58160400390625, 2.6719970703125, 2.76239013671875, 2.852783203125, 2.94317626953125, 3.0335693359375, 3.12396240234375, 3.21435546875, 3.30474853515625, 3.3951416015625, 3.48553466796875, 3.575927734375, 3.66632080078125, 3.7567138671875, 3.84710693359375, 3.9375]}, "gradients/encoder.encoder.layers.7.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 4.0, 3.0, 3.0, 3.0, 6.0, 3.0, 7.0, 13.0, 13.0, 15.0, 22.0, 27.0, 78.0, 70.0, 129.0, 234.0, 384.0, 714.0, 1426.0, 2909.0, 8031.0, 28464.0, 152739.0, 625475.0, 179920.0, 32951.0, 8730.0, 3085.0, 1365.0, 694.0, 438.0, 224.0, 145.0, 86.0, 57.0, 39.0, 16.0, 15.0, 8.0, 7.0, 1.0, 2.0, 3.0, 3.0, 4.0, 0.0, 2.0, 2.0, 0.0, 1.0], "bins": [-4.84765625, -4.718536376953125, -4.58941650390625, -4.460296630859375, -4.3311767578125, -4.202056884765625, -4.07293701171875, -3.943817138671875, -3.814697265625, -3.685577392578125, -3.55645751953125, -3.427337646484375, -3.2982177734375, -3.169097900390625, -3.03997802734375, -2.910858154296875, -2.78173828125, -2.652618408203125, -2.52349853515625, -2.394378662109375, -2.2652587890625, -2.136138916015625, -2.00701904296875, -1.877899169921875, -1.748779296875, -1.619659423828125, -1.49053955078125, -1.361419677734375, -1.2322998046875, -1.103179931640625, -0.97406005859375, -0.844940185546875, -0.7158203125, -0.586700439453125, -0.45758056640625, -0.328460693359375, -0.1993408203125, -0.070220947265625, 0.05889892578125, 0.188018798828125, 0.317138671875, 0.446258544921875, 0.57537841796875, 0.704498291015625, 0.8336181640625, 0.962738037109375, 1.09185791015625, 1.220977783203125, 1.35009765625, 1.479217529296875, 1.60833740234375, 1.737457275390625, 1.8665771484375, 1.995697021484375, 2.12481689453125, 2.253936767578125, 2.383056640625, 2.512176513671875, 2.64129638671875, 2.770416259765625, 2.8995361328125, 3.028656005859375, 3.15777587890625, 3.286895751953125, 3.416015625]}, "gradients/encoder.encoder.layers.7.attention.v_proj.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 3.0, 0.0, 1.0, 3.0, 3.0, 4.0, 6.0, 9.0, 14.0, 11.0, 17.0, 21.0, 28.0, 21.0, 23.0, 31.0, 36.0, 39.0, 37.0, 38.0, 59.0, 49.0, 69.0, 66.0, 51.0, 57.0, 47.0, 39.0, 41.0, 46.0, 27.0, 22.0, 23.0, 10.0, 13.0, 12.0, 6.0, 10.0, 7.0, 5.0, 3.0, 1.0, 2.0, 1.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.09375, -5.89471435546875, -5.6956787109375, -5.49664306640625, -5.297607421875, -5.09857177734375, -4.8995361328125, -4.70050048828125, -4.50146484375, -4.30242919921875, -4.1033935546875, -3.90435791015625, -3.705322265625, -3.50628662109375, -3.3072509765625, -3.10821533203125, -2.9091796875, -2.71014404296875, -2.5111083984375, -2.31207275390625, -2.113037109375, -1.91400146484375, -1.7149658203125, -1.51593017578125, -1.31689453125, -1.11785888671875, -0.9188232421875, -0.71978759765625, -0.520751953125, -0.32171630859375, -0.1226806640625, 0.07635498046875, 0.275390625, 0.47442626953125, 0.6734619140625, 0.87249755859375, 1.071533203125, 1.27056884765625, 1.4696044921875, 1.66864013671875, 1.86767578125, 2.06671142578125, 2.2657470703125, 2.46478271484375, 2.663818359375, 2.86285400390625, 3.0618896484375, 3.26092529296875, 3.4599609375, 3.65899658203125, 3.8580322265625, 4.05706787109375, 4.256103515625, 4.45513916015625, 4.6541748046875, 4.85321044921875, 5.05224609375, 5.25128173828125, 5.4503173828125, 5.64935302734375, 5.848388671875, 6.04742431640625, 6.2464599609375, 6.44549560546875, 6.64453125]}, "gradients/encoder.encoder.layers.7.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 3.0, 0.0, 1.0, 5.0, 4.0, 1.0, 9.0, 12.0, 16.0, 20.0, 20.0, 41.0, 71.0, 85.0, 155.0, 279.0, 599.0, 1333.0, 4007.0, 16788.0, 129918.0, 742276.0, 129471.0, 16821.0, 3944.0, 1352.0, 627.0, 263.0, 156.0, 94.0, 59.0, 41.0, 25.0, 17.0, 16.0, 9.0, 10.0, 5.0, 4.0, 5.0, 1.0, 3.0, 1.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.966796875, -1.906280517578125, -1.84576416015625, -1.785247802734375, -1.7247314453125, -1.664215087890625, -1.60369873046875, -1.543182373046875, -1.482666015625, -1.422149658203125, -1.36163330078125, -1.301116943359375, -1.2406005859375, -1.180084228515625, -1.11956787109375, -1.059051513671875, -0.99853515625, -0.938018798828125, -0.87750244140625, -0.816986083984375, -0.7564697265625, -0.695953369140625, -0.63543701171875, -0.574920654296875, -0.514404296875, -0.453887939453125, -0.39337158203125, -0.332855224609375, -0.2723388671875, -0.211822509765625, -0.15130615234375, -0.090789794921875, -0.0302734375, 0.030242919921875, 0.09075927734375, 0.151275634765625, 0.2117919921875, 0.272308349609375, 0.33282470703125, 0.393341064453125, 0.453857421875, 0.514373779296875, 0.57489013671875, 0.635406494140625, 0.6959228515625, 0.756439208984375, 0.81695556640625, 0.877471923828125, 0.93798828125, 0.998504638671875, 1.05902099609375, 1.119537353515625, 1.1800537109375, 1.240570068359375, 1.30108642578125, 1.361602783203125, 1.422119140625, 1.482635498046875, 1.54315185546875, 1.603668212890625, 1.6641845703125, 1.724700927734375, 1.78521728515625, 1.845733642578125, 1.90625]}, "gradients/encoder.encoder.layers.7.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 2.0, 2.0, 1.0, 0.0, 6.0, 3.0, 3.0, 2.0, 2.0, 8.0, 6.0, 14.0, 15.0, 21.0, 23.0, 39.0, 41.0, 55.0, 64.0, 73.0, 101.0, 109.0, 108.0, 68.0, 52.0, 54.0, 32.0, 23.0, 25.0, 15.0, 20.0, 7.0, 6.0, 4.0, 5.0, 5.0, 2.0, 2.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00023484230041503906, -0.0002243146300315857, -0.00021378695964813232, -0.00020325928926467896, -0.00019273161888122559, -0.00018220394849777222, -0.00017167627811431885, -0.00016114860773086548, -0.0001506209373474121, -0.00014009326696395874, -0.00012956559658050537, -0.000119037926197052, -0.00010851025581359863, -9.798258543014526e-05, -8.74549150466919e-05, -7.692724466323853e-05, -6.639957427978516e-05, -5.587190389633179e-05, -4.534423351287842e-05, -3.481656312942505e-05, -2.428889274597168e-05, -1.376122236251831e-05, -3.2335519790649414e-06, 7.294118404388428e-06, 1.7821788787841797e-05, 2.8349459171295166e-05, 3.8877129554748535e-05, 4.9404799938201904e-05, 5.9932470321655273e-05, 7.046014070510864e-05, 8.098781108856201e-05, 9.151548147201538e-05, 0.00010204315185546875, 0.00011257082223892212, 0.0001230984926223755, 0.00013362616300582886, 0.00014415383338928223, 0.0001546815037727356, 0.00016520917415618896, 0.00017573684453964233, 0.0001862645149230957, 0.00019679218530654907, 0.00020731985569000244, 0.0002178475260734558, 0.00022837519645690918, 0.00023890286684036255, 0.0002494305372238159, 0.0002599582076072693, 0.00027048587799072266, 0.000281013548374176, 0.0002915412187576294, 0.00030206888914108276, 0.00031259655952453613, 0.0003231242299079895, 0.00033365190029144287, 0.00034417957067489624, 0.0003547072410583496, 0.000365234911441803, 0.00037576258182525635, 0.0003862902522087097, 0.0003968179225921631, 0.00040734559297561646, 0.0004178732633590698, 0.0004284009337425232, 0.00043892860412597656]}, "gradients/encoder.encoder.layers.7.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 2.0, 0.0, 1.0, 2.0, 2.0, 8.0, 8.0, 13.0, 16.0, 28.0, 43.0, 77.0, 138.0, 308.0, 753.0, 2270.0, 10690.0, 119772.0, 818136.0, 84285.0, 8650.0, 2078.0, 679.0, 279.0, 141.0, 75.0, 43.0, 24.0, 16.0, 10.0, 6.0, 4.0, 2.0, 2.0, 3.0, 2.0, 3.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.06640625, -1.988250732421875, -1.91009521484375, -1.831939697265625, -1.7537841796875, -1.675628662109375, -1.59747314453125, -1.519317626953125, -1.441162109375, -1.363006591796875, -1.28485107421875, -1.206695556640625, -1.1285400390625, -1.050384521484375, -0.97222900390625, -0.894073486328125, -0.81591796875, -0.737762451171875, -0.65960693359375, -0.581451416015625, -0.5032958984375, -0.425140380859375, -0.34698486328125, -0.268829345703125, -0.190673828125, -0.112518310546875, -0.03436279296875, 0.043792724609375, 0.1219482421875, 0.200103759765625, 0.27825927734375, 0.356414794921875, 0.4345703125, 0.512725830078125, 0.59088134765625, 0.669036865234375, 0.7471923828125, 0.825347900390625, 0.90350341796875, 0.981658935546875, 1.059814453125, 1.137969970703125, 1.21612548828125, 1.294281005859375, 1.3724365234375, 1.450592041015625, 1.52874755859375, 1.606903076171875, 1.68505859375, 1.763214111328125, 1.84136962890625, 1.919525146484375, 1.9976806640625, 2.075836181640625, 2.15399169921875, 2.232147216796875, 2.310302734375, 2.388458251953125, 2.46661376953125, 2.544769287109375, 2.6229248046875, 2.701080322265625, 2.77923583984375, 2.857391357421875, 2.935546875]}, "gradients/encoder.encoder.layers.7.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 2.0, 2.0, 3.0, 1.0, 5.0, 8.0, 12.0, 14.0, 27.0, 45.0, 69.0, 96.0, 149.0, 180.0, 127.0, 102.0, 63.0, 36.0, 18.0, 21.0, 11.0, 11.0, 9.0, 1.0, 3.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.638671875, -2.53887939453125, -2.4390869140625, -2.33929443359375, -2.239501953125, -2.13970947265625, -2.0399169921875, -1.94012451171875, -1.84033203125, -1.74053955078125, -1.6407470703125, -1.54095458984375, -1.441162109375, -1.34136962890625, -1.2415771484375, -1.14178466796875, -1.0419921875, -0.94219970703125, -0.8424072265625, -0.74261474609375, -0.642822265625, -0.54302978515625, -0.4432373046875, -0.34344482421875, -0.24365234375, -0.14385986328125, -0.0440673828125, 0.05572509765625, 0.155517578125, 0.25531005859375, 0.3551025390625, 0.45489501953125, 0.5546875, 0.65447998046875, 0.7542724609375, 0.85406494140625, 0.953857421875, 1.05364990234375, 1.1534423828125, 1.25323486328125, 1.35302734375, 1.45281982421875, 1.5526123046875, 1.65240478515625, 1.752197265625, 1.85198974609375, 1.9517822265625, 2.05157470703125, 2.1513671875, 2.25115966796875, 2.3509521484375, 2.45074462890625, 2.550537109375, 2.65032958984375, 2.7501220703125, 2.84991455078125, 2.94970703125, 3.04949951171875, 3.1492919921875, 3.24908447265625, 3.348876953125, 3.44866943359375, 3.5484619140625, 3.64825439453125, 3.748046875]}, "gradients/encoder.encoder.layers.7.layer_norm.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 3.0, 1.0, 0.0, 1.0, 3.0, 3.0, 8.0, 12.0, 16.0, 32.0, 43.0, 64.0, 110.0, 149.0, 161.0, 139.0, 112.0, 63.0, 44.0, 13.0, 14.0, 9.0, 3.0, 3.0, 4.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0], "bins": [-44.44267654418945, -43.4490966796875, -42.45552062988281, -41.46194076538086, -40.468360900878906, -39.47478103637695, -38.481204986572266, -37.48762512207031, -36.49404525756836, -35.500465393066406, -34.50688934326172, -33.513309478759766, -32.51972961425781, -31.526151657104492, -30.532573699951172, -29.53899383544922, -28.5454158782959, -27.551837921142578, -26.558258056640625, -25.564680099487305, -24.57110023498535, -23.57752227783203, -22.583942413330078, -21.590364456176758, -20.596786499023438, -19.603208541870117, -18.609628677368164, -17.616050720214844, -16.62247085571289, -15.62889289855957, -14.635313987731934, -13.641735076904297, -12.648155212402344, -11.654576301574707, -10.66099739074707, -9.66741943359375, -8.673839569091797, -7.680261135101318, -6.68668270111084, -5.693103790283203, -4.699524879455566, -3.7059459686279297, -2.712367296218872, -1.7187886238098145, -0.7252097129821777, 0.268369197845459, 1.2619476318359375, 2.255526542663574, 3.249105453491211, 4.242684364318848, 5.236263275146484, 6.229841709136963, 7.2234206199646, 8.216999053955078, 9.210577964782715, 10.204156875610352, 11.197735786437988, 12.191314697265625, 13.184893608093262, 14.178472518920898, 15.172050476074219, 16.165630340576172, 17.159208297729492, 18.152786254882812, 19.146366119384766]}, "gradients/encoder.encoder.layers.7.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 4.0, 2.0, 1.0, 6.0, 8.0, 9.0, 6.0, 11.0, 14.0, 11.0, 19.0, 27.0, 31.0, 33.0, 51.0, 38.0, 42.0, 54.0, 80.0, 79.0, 73.0, 60.0, 58.0, 50.0, 44.0, 47.0, 32.0, 23.0, 19.0, 17.0, 16.0, 12.0, 12.0, 11.0, 3.0, 4.0, 3.0, 3.0, 0.0, 4.0, 1.0, 1.0, 0.0, 2.0], "bins": [-38.483909606933594, -37.52277755737305, -36.5616455078125, -35.60051727294922, -34.63938522338867, -33.678253173828125, -32.71712112426758, -31.755990982055664, -30.79486083984375, -29.833728790283203, -28.87259864807129, -27.911466598510742, -26.950336456298828, -25.98920440673828, -25.028072357177734, -24.06694221496582, -23.105810165405273, -22.144678115844727, -21.183547973632812, -20.222415924072266, -19.26128578186035, -18.300153732299805, -17.33902359008789, -16.377891540527344, -15.416760444641113, -14.455629348754883, -13.494498252868652, -12.533367156982422, -11.572235107421875, -10.611104965209961, -9.649972915649414, -8.688841819763184, -7.727710723876953, -6.766579627990723, -5.805448532104492, -4.8443169593811035, -3.883185863494873, -2.9220547676086426, -1.960923194885254, -0.9997920989990234, -0.03866100311279297, 0.922470211982727, 1.883601427078247, 2.8447327613830566, 3.805863857269287, 4.766994953155518, 5.728126525878906, 6.689257621765137, 7.650388717651367, 8.611519813537598, 9.572650909423828, 10.533782958984375, 11.494913101196289, 12.456045150756836, 13.417176246643066, 14.378307342529297, 15.339438438415527, 16.300569534301758, 17.261701583862305, 18.22283172607422, 19.183963775634766, 20.14509391784668, 21.106225967407227, 22.06735610961914, 23.028488159179688]}, "gradients/encoder.encoder.layers.6.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 3.0, 2.0, 4.0, 6.0, 6.0, 9.0, 21.0, 23.0, 43.0, 73.0, 125.0, 252.0, 476.0, 1148.0, 3210.0, 12013.0, 77374.0, 3362446.0, 691658.0, 34512.0, 7186.0, 2088.0, 818.0, 390.0, 161.0, 93.0, 56.0, 27.0, 27.0, 11.0, 11.0, 7.0, 4.0, 4.0, 1.0, 2.0, 1.0, 1.0, 3.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.220703125, -3.104888916015625, -2.98907470703125, -2.873260498046875, -2.7574462890625, -2.641632080078125, -2.52581787109375, -2.410003662109375, -2.294189453125, -2.178375244140625, -2.06256103515625, -1.946746826171875, -1.8309326171875, -1.715118408203125, -1.59930419921875, -1.483489990234375, -1.36767578125, -1.251861572265625, -1.13604736328125, -1.020233154296875, -0.9044189453125, -0.788604736328125, -0.67279052734375, -0.556976318359375, -0.441162109375, -0.325347900390625, -0.20953369140625, -0.093719482421875, 0.0220947265625, 0.137908935546875, 0.25372314453125, 0.369537353515625, 0.4853515625, 0.601165771484375, 0.71697998046875, 0.832794189453125, 0.9486083984375, 1.064422607421875, 1.18023681640625, 1.296051025390625, 1.411865234375, 1.527679443359375, 1.64349365234375, 1.759307861328125, 1.8751220703125, 1.990936279296875, 2.10675048828125, 2.222564697265625, 2.33837890625, 2.454193115234375, 2.57000732421875, 2.685821533203125, 2.8016357421875, 2.917449951171875, 3.03326416015625, 3.149078369140625, 3.264892578125, 3.380706787109375, 3.49652099609375, 3.612335205078125, 3.7281494140625, 3.843963623046875, 3.95977783203125, 4.075592041015625, 4.19140625]}, "gradients/encoder.encoder.layers.6.feed_forward.output_dense.bias": {"_type": "histogram", "values": [4.0, 1.0, 2.0, 3.0, 1.0, 5.0, 5.0, 3.0, 6.0, 11.0, 8.0, 13.0, 28.0, 29.0, 35.0, 45.0, 52.0, 62.0, 56.0, 62.0, 76.0, 69.0, 68.0, 49.0, 58.0, 40.0, 45.0, 29.0, 32.0, 20.0, 25.0, 19.0, 22.0, 9.0, 5.0, 5.0, 5.0, 6.0, 4.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.060546875, -1.01092529296875, -0.9613037109375, -0.91168212890625, -0.862060546875, -0.81243896484375, -0.7628173828125, -0.71319580078125, -0.66357421875, -0.61395263671875, -0.5643310546875, -0.51470947265625, -0.465087890625, -0.41546630859375, -0.3658447265625, -0.31622314453125, -0.2666015625, -0.21697998046875, -0.1673583984375, -0.11773681640625, -0.068115234375, -0.01849365234375, 0.0311279296875, 0.08074951171875, 0.13037109375, 0.17999267578125, 0.2296142578125, 0.27923583984375, 0.328857421875, 0.37847900390625, 0.4281005859375, 0.47772216796875, 0.52734375, 0.57696533203125, 0.6265869140625, 0.67620849609375, 0.725830078125, 0.77545166015625, 0.8250732421875, 0.87469482421875, 0.92431640625, 0.97393798828125, 1.0235595703125, 1.07318115234375, 1.122802734375, 1.17242431640625, 1.2220458984375, 1.27166748046875, 1.3212890625, 1.37091064453125, 1.4205322265625, 1.47015380859375, 1.519775390625, 1.56939697265625, 1.6190185546875, 1.66864013671875, 1.71826171875, 1.76788330078125, 1.8175048828125, 1.86712646484375, 1.916748046875, 1.96636962890625, 2.0159912109375, 2.06561279296875, 2.115234375]}, "gradients/encoder.encoder.layers.6.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 1.0, 4.0, 1.0, 5.0, 12.0, 29.0, 55.0, 144.0, 325.0, 1025.0, 4320.0, 24496.0, 299693.0, 3728574.0, 116839.0, 14912.0, 2779.0, 695.0, 205.0, 86.0, 47.0, 24.0, 14.0, 5.0, 2.0, 3.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.46484375, -3.30255126953125, -3.1402587890625, -2.97796630859375, -2.815673828125, -2.65338134765625, -2.4910888671875, -2.32879638671875, -2.16650390625, -2.00421142578125, -1.8419189453125, -1.67962646484375, -1.517333984375, -1.35504150390625, -1.1927490234375, -1.03045654296875, -0.8681640625, -0.70587158203125, -0.5435791015625, -0.38128662109375, -0.218994140625, -0.05670166015625, 0.1055908203125, 0.26788330078125, 0.43017578125, 0.59246826171875, 0.7547607421875, 0.91705322265625, 1.079345703125, 1.24163818359375, 1.4039306640625, 1.56622314453125, 1.728515625, 1.89080810546875, 2.0531005859375, 2.21539306640625, 2.377685546875, 2.53997802734375, 2.7022705078125, 2.86456298828125, 3.02685546875, 3.18914794921875, 3.3514404296875, 3.51373291015625, 3.676025390625, 3.83831787109375, 4.0006103515625, 4.16290283203125, 4.3251953125, 4.48748779296875, 4.6497802734375, 4.81207275390625, 4.974365234375, 5.13665771484375, 5.2989501953125, 5.46124267578125, 5.62353515625, 5.78582763671875, 5.9481201171875, 6.11041259765625, 6.272705078125, 6.43499755859375, 6.5972900390625, 6.75958251953125, 6.921875]}, "gradients/encoder.encoder.layers.6.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 3.0, 0.0, 2.0, 2.0, 3.0, 2.0, 2.0, 3.0, 3.0, 4.0, 5.0, 2.0, 15.0, 13.0, 18.0, 23.0, 30.0, 41.0, 73.0, 90.0, 197.0, 451.0, 865.0, 987.0, 547.0, 287.0, 129.0, 63.0, 47.0, 31.0, 35.0, 29.0, 22.0, 17.0, 16.0, 5.0, 11.0, 5.0, 5.0, 2.0, 1.0, 2.0, 1.0, 0.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.740234375, -2.635955810546875, -2.53167724609375, -2.427398681640625, -2.3231201171875, -2.218841552734375, -2.11456298828125, -2.010284423828125, -1.906005859375, -1.801727294921875, -1.69744873046875, -1.593170166015625, -1.4888916015625, -1.384613037109375, -1.28033447265625, -1.176055908203125, -1.07177734375, -0.967498779296875, -0.86322021484375, -0.758941650390625, -0.6546630859375, -0.550384521484375, -0.44610595703125, -0.341827392578125, -0.237548828125, -0.133270263671875, -0.02899169921875, 0.075286865234375, 0.1795654296875, 0.283843994140625, 0.38812255859375, 0.492401123046875, 0.5966796875, 0.700958251953125, 0.80523681640625, 0.909515380859375, 1.0137939453125, 1.118072509765625, 1.22235107421875, 1.326629638671875, 1.430908203125, 1.535186767578125, 1.63946533203125, 1.743743896484375, 1.8480224609375, 1.952301025390625, 2.05657958984375, 2.160858154296875, 2.26513671875, 2.369415283203125, 2.47369384765625, 2.577972412109375, 2.6822509765625, 2.786529541015625, 2.89080810546875, 2.995086669921875, 3.099365234375, 3.203643798828125, 3.30792236328125, 3.412200927734375, 3.5164794921875, 3.620758056640625, 3.72503662109375, 3.829315185546875, 3.93359375]}, "gradients/encoder.encoder.layers.6.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 3.0, 2.0, 5.0, 8.0, 11.0, 21.0, 26.0, 72.0, 99.0, 175.0, 242.0, 160.0, 108.0, 39.0, 17.0, 10.0, 2.0, 1.0, 3.0, 2.0, 1.0, 1.0, 2.0, 1.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-23.220149993896484, -22.087230682373047, -20.954313278198242, -19.821395874023438, -18.6884765625, -17.555557250976562, -16.422639846801758, -15.289721488952637, -14.156803131103516, -13.023884773254395, -11.890966415405273, -10.758048057556152, -9.625129699707031, -8.49221134185791, -7.359292984008789, -6.226374626159668, -5.093456268310547, -3.960537910461426, -2.8276195526123047, -1.6947011947631836, -0.5617828369140625, 0.5711355209350586, 1.7040538787841797, 2.836972236633301, 3.969890594482422, 5.102808952331543, 6.235727310180664, 7.368645668029785, 8.501564025878906, 9.634482383728027, 10.767400741577148, 11.90031909942627, 13.033241271972656, 14.166159629821777, 15.299077987670898, 16.431995391845703, 17.56491470336914, 18.697834014892578, 19.830751419067383, 20.963668823242188, 22.096588134765625, 23.229507446289062, 24.362424850463867, 25.495342254638672, 26.62826156616211, 27.761180877685547, 28.89409828186035, 30.027015686035156, 31.159934997558594, 32.29285430908203, 33.42577362060547, 34.55868911743164, 35.69160842895508, 36.824527740478516, 37.95744323730469, 39.090362548828125, 40.22328186035156, 41.356201171875, 42.48912048339844, 43.62203598022461, 44.75495529174805, 45.887874603271484, 47.020790100097656, 48.153709411621094, 49.28662872314453]}, "gradients/encoder.encoder.layers.6.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 1.0, 0.0, 3.0, 1.0, 4.0, 7.0, 6.0, 11.0, 23.0, 25.0, 31.0, 37.0, 32.0, 59.0, 73.0, 40.0, 53.0, 76.0, 62.0, 71.0, 64.0, 56.0, 59.0, 52.0, 40.0, 30.0, 31.0, 19.0, 18.0, 12.0, 4.0, 4.0, 4.0, 5.0, 1.0, 0.0, 0.0, 0.0, 3.0, 1.0], "bins": [-26.497116088867188, -25.8597469329834, -25.222379684448242, -24.585010528564453, -23.947641372680664, -23.310272216796875, -22.67290496826172, -22.03553581237793, -21.39816665649414, -20.76079750061035, -20.123430252075195, -19.486061096191406, -18.848691940307617, -18.211322784423828, -17.573955535888672, -16.936586380004883, -16.299217224121094, -15.661849021911621, -15.024479866027832, -14.38711166381836, -13.74974250793457, -13.112374305725098, -12.475006103515625, -11.837636947631836, -11.20026969909668, -10.562901496887207, -9.925532341003418, -9.288164138793945, -8.650794982910156, -8.013426780700684, -7.376058101654053, -6.738689422607422, -6.101320266723633, -5.463951587677002, -4.826582908630371, -4.189214706420898, -3.5518457889556885, -2.9144771099090576, -2.277108669281006, -1.639739990234375, -1.0023713111877441, -0.36500269174575806, 0.272365927696228, 0.9097344875335693, 1.5471031665802002, 2.184471845626831, 2.821840286254883, 3.4592089653015137, 4.0965776443481445, 4.733946323394775, 5.371315002441406, 6.008683204650879, 6.646052360534668, 7.283420562744141, 7.9207892417907715, 8.558157920837402, 9.195526123046875, 9.832894325256348, 10.470263481140137, 11.10763168334961, 11.745000839233398, 12.382369041442871, 13.019737243652344, 13.657106399536133, 14.294475555419922]}, "gradients/encoder.encoder.layers.6.attention.out_proj.weight": {"_type": "histogram", "values": [3.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 3.0, 5.0, 7.0, 3.0, 9.0, 18.0, 17.0, 33.0, 50.0, 64.0, 88.0, 167.0, 277.0, 427.0, 650.0, 1158.0, 1940.0, 3320.0, 6222.0, 12196.0, 24258.0, 52112.0, 115204.0, 243829.0, 292614.0, 156301.0, 70325.0, 32894.0, 15972.0, 8170.0, 4266.0, 2364.0, 1307.0, 848.0, 511.0, 306.0, 206.0, 138.0, 105.0, 54.0, 46.0, 22.0, 16.0, 14.0, 9.0, 5.0, 5.0, 2.0, 2.0, 0.0, 1.0, 2.0, 2.0, 0.0, 1.0, 1.0, 2.0], "bins": [-1.96875, -1.9056396484375, -1.842529296875, -1.7794189453125, -1.71630859375, -1.6531982421875, -1.590087890625, -1.5269775390625, -1.4638671875, -1.4007568359375, -1.337646484375, -1.2745361328125, -1.21142578125, -1.1483154296875, -1.085205078125, -1.0220947265625, -0.958984375, -0.8958740234375, -0.832763671875, -0.7696533203125, -0.70654296875, -0.6434326171875, -0.580322265625, -0.5172119140625, -0.4541015625, -0.3909912109375, -0.327880859375, -0.2647705078125, -0.20166015625, -0.1385498046875, -0.075439453125, -0.0123291015625, 0.05078125, 0.1138916015625, 0.177001953125, 0.2401123046875, 0.30322265625, 0.3663330078125, 0.429443359375, 0.4925537109375, 0.5556640625, 0.6187744140625, 0.681884765625, 0.7449951171875, 0.80810546875, 0.8712158203125, 0.934326171875, 0.9974365234375, 1.060546875, 1.1236572265625, 1.186767578125, 1.2498779296875, 1.31298828125, 1.3760986328125, 1.439208984375, 1.5023193359375, 1.5654296875, 1.6285400390625, 1.691650390625, 1.7547607421875, 1.81787109375, 1.8809814453125, 1.944091796875, 2.0072021484375, 2.0703125]}, "gradients/encoder.encoder.layers.6.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0, 2.0, 4.0, 2.0, 3.0, 1.0, 0.0, 3.0, 3.0, 6.0, 9.0, 12.0, 6.0, 14.0, 18.0, 18.0, 32.0, 22.0, 25.0, 35.0, 36.0, 31.0, 41.0, 32.0, 45.0, 42.0, 38.0, 46.0, 48.0, 46.0, 35.0, 34.0, 33.0, 39.0, 37.0, 26.0, 28.0, 18.0, 18.0, 24.0, 14.0, 13.0, 13.0, 13.0, 12.0, 8.0, 4.0, 5.0, 6.0, 2.0, 5.0, 1.0, 5.0, 1.0, 1.0, 2.0, 2.0], "bins": [-1.1982421875, -1.1637115478515625, -1.129180908203125, -1.0946502685546875, -1.06011962890625, -1.0255889892578125, -0.991058349609375, -0.9565277099609375, -0.9219970703125, -0.8874664306640625, -0.852935791015625, -0.8184051513671875, -0.78387451171875, -0.7493438720703125, -0.714813232421875, -0.6802825927734375, -0.645751953125, -0.6112213134765625, -0.576690673828125, -0.5421600341796875, -0.50762939453125, -0.4730987548828125, -0.438568115234375, -0.4040374755859375, -0.3695068359375, -0.3349761962890625, -0.300445556640625, -0.2659149169921875, -0.23138427734375, -0.1968536376953125, -0.162322998046875, -0.1277923583984375, -0.09326171875, -0.0587310791015625, -0.024200439453125, 0.0103302001953125, 0.04486083984375, 0.0793914794921875, 0.113922119140625, 0.1484527587890625, 0.1829833984375, 0.2175140380859375, 0.252044677734375, 0.2865753173828125, 0.32110595703125, 0.3556365966796875, 0.390167236328125, 0.4246978759765625, 0.459228515625, 0.4937591552734375, 0.528289794921875, 0.5628204345703125, 0.59735107421875, 0.6318817138671875, 0.666412353515625, 0.7009429931640625, 0.7354736328125, 0.7700042724609375, 0.804534912109375, 0.8390655517578125, 0.87359619140625, 0.9081268310546875, 0.942657470703125, 0.9771881103515625, 1.01171875]}, "gradients/encoder.encoder.layers.6.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 3.0, 0.0, 1.0, 0.0, 4.0, 1.0, 3.0, 3.0, 3.0, 3.0, 7.0, 10.0, 14.0, 20.0, 26.0, 38.0, 56.0, 113.0, 162.0, 279.0, 449.0, 862.0, 2059.0, 6372.0, 24384.0, 125797.0, 616035.0, 218109.0, 39168.0, 9240.0, 2837.0, 1158.0, 556.0, 271.0, 179.0, 121.0, 66.0, 44.0, 38.0, 16.0, 20.0, 13.0, 7.0, 9.0, 6.0, 3.0, 1.0, 3.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.62890625, -3.4976806640625, -3.366455078125, -3.2352294921875, -3.10400390625, -2.9727783203125, -2.841552734375, -2.7103271484375, -2.5791015625, -2.4478759765625, -2.316650390625, -2.1854248046875, -2.05419921875, -1.9229736328125, -1.791748046875, -1.6605224609375, -1.529296875, -1.3980712890625, -1.266845703125, -1.1356201171875, -1.00439453125, -0.8731689453125, -0.741943359375, -0.6107177734375, -0.4794921875, -0.3482666015625, -0.217041015625, -0.0858154296875, 0.04541015625, 0.1766357421875, 0.307861328125, 0.4390869140625, 0.5703125, 0.7015380859375, 0.832763671875, 0.9639892578125, 1.09521484375, 1.2264404296875, 1.357666015625, 1.4888916015625, 1.6201171875, 1.7513427734375, 1.882568359375, 2.0137939453125, 2.14501953125, 2.2762451171875, 2.407470703125, 2.5386962890625, 2.669921875, 2.8011474609375, 2.932373046875, 3.0635986328125, 3.19482421875, 3.3260498046875, 3.457275390625, 3.5885009765625, 3.7197265625, 3.8509521484375, 3.982177734375, 4.1134033203125, 4.24462890625, 4.3758544921875, 4.507080078125, 4.6383056640625, 4.76953125]}, "gradients/encoder.encoder.layers.6.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 1.0, 2.0, 4.0, 5.0, 0.0, 6.0, 4.0, 9.0, 10.0, 12.0, 12.0, 14.0, 18.0, 22.0, 23.0, 34.0, 27.0, 32.0, 37.0, 40.0, 39.0, 46.0, 59.0, 34.0, 51.0, 46.0, 36.0, 45.0, 41.0, 37.0, 35.0, 29.0, 20.0, 21.0, 24.0, 26.0, 18.0, 22.0, 15.0, 22.0, 7.0, 4.0, 4.0, 4.0, 4.0, 3.0, 6.0, 2.0, 1.0, 0.0, 4.0], "bins": [-5.30078125, -5.158599853515625, -5.01641845703125, -4.874237060546875, -4.7320556640625, -4.589874267578125, -4.44769287109375, -4.305511474609375, -4.163330078125, -4.021148681640625, -3.87896728515625, -3.736785888671875, -3.5946044921875, -3.452423095703125, -3.31024169921875, -3.168060302734375, -3.02587890625, -2.883697509765625, -2.74151611328125, -2.599334716796875, -2.4571533203125, -2.314971923828125, -2.17279052734375, -2.030609130859375, -1.888427734375, -1.746246337890625, -1.60406494140625, -1.461883544921875, -1.3197021484375, -1.177520751953125, -1.03533935546875, -0.893157958984375, -0.7509765625, -0.608795166015625, -0.46661376953125, -0.324432373046875, -0.1822509765625, -0.040069580078125, 0.10211181640625, 0.244293212890625, 0.386474609375, 0.528656005859375, 0.67083740234375, 0.813018798828125, 0.9552001953125, 1.097381591796875, 1.23956298828125, 1.381744384765625, 1.52392578125, 1.666107177734375, 1.80828857421875, 1.950469970703125, 2.0926513671875, 2.234832763671875, 2.37701416015625, 2.519195556640625, 2.661376953125, 2.803558349609375, 2.94573974609375, 3.087921142578125, 3.2301025390625, 3.372283935546875, 3.51446533203125, 3.656646728515625, 3.798828125]}, "gradients/encoder.encoder.layers.6.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 2.0, 1.0, 1.0, 4.0, 4.0, 8.0, 10.0, 6.0, 12.0, 20.0, 15.0, 25.0, 39.0, 70.0, 120.0, 206.0, 366.0, 927.0, 2237.0, 8783.0, 52906.0, 529328.0, 397451.0, 44564.0, 7574.0, 2140.0, 812.0, 404.0, 189.0, 114.0, 75.0, 36.0, 24.0, 29.0, 20.0, 11.0, 12.0, 7.0, 2.0, 2.0, 1.0, 0.0, 4.0, 3.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-1.912109375, -1.855804443359375, -1.79949951171875, -1.743194580078125, -1.6868896484375, -1.630584716796875, -1.57427978515625, -1.517974853515625, -1.461669921875, -1.405364990234375, -1.34906005859375, -1.292755126953125, -1.2364501953125, -1.180145263671875, -1.12384033203125, -1.067535400390625, -1.01123046875, -0.954925537109375, -0.89862060546875, -0.842315673828125, -0.7860107421875, -0.729705810546875, -0.67340087890625, -0.617095947265625, -0.560791015625, -0.504486083984375, -0.44818115234375, -0.391876220703125, -0.3355712890625, -0.279266357421875, -0.22296142578125, -0.166656494140625, -0.1103515625, -0.054046630859375, 0.00225830078125, 0.058563232421875, 0.1148681640625, 0.171173095703125, 0.22747802734375, 0.283782958984375, 0.340087890625, 0.396392822265625, 0.45269775390625, 0.509002685546875, 0.5653076171875, 0.621612548828125, 0.67791748046875, 0.734222412109375, 0.79052734375, 0.846832275390625, 0.90313720703125, 0.959442138671875, 1.0157470703125, 1.072052001953125, 1.12835693359375, 1.184661865234375, 1.240966796875, 1.297271728515625, 1.35357666015625, 1.409881591796875, 1.4661865234375, 1.522491455078125, 1.57879638671875, 1.635101318359375, 1.69140625]}, "gradients/encoder.encoder.layers.6.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 5.0, 13.0, 9.0, 23.0, 44.0, 69.0, 102.0, 174.0, 179.0, 155.0, 92.0, 51.0, 33.0, 23.0, 22.0, 7.0, 9.0, 0.0, 3.0, 3.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0007905960083007812, -0.0007714889943599701, -0.0007523819804191589, -0.0007332749664783478, -0.0007141679525375366, -0.0006950609385967255, -0.0006759539246559143, -0.0006568469107151031, -0.000637739896774292, -0.0006186328828334808, -0.0005995258688926697, -0.0005804188549518585, -0.0005613118410110474, -0.0005422048270702362, -0.000523097813129425, -0.0005039907991886139, -0.00048488378524780273, -0.0004657767713069916, -0.0004466697573661804, -0.00042756274342536926, -0.0004084557294845581, -0.00038934871554374695, -0.0003702417016029358, -0.00035113468766212463, -0.0003320276737213135, -0.0003129206597805023, -0.00029381364583969116, -0.00027470663189888, -0.00025559961795806885, -0.0002364926040172577, -0.00021738559007644653, -0.00019827857613563538, -0.00017917156219482422, -0.00016006454825401306, -0.0001409575343132019, -0.00012185052037239075, -0.00010274350643157959, -8.363649249076843e-05, -6.452947854995728e-05, -4.542246460914612e-05, -2.631545066833496e-05, -7.208436727523804e-06, 1.1898577213287354e-05, 3.100559115409851e-05, 5.011260509490967e-05, 6.921961903572083e-05, 8.832663297653198e-05, 0.00010743364691734314, 0.0001265406608581543, 0.00014564767479896545, 0.0001647546887397766, 0.00018386170268058777, 0.00020296871662139893, 0.00022207573056221008, 0.00024118274450302124, 0.0002602897584438324, 0.00027939677238464355, 0.0002985037863254547, 0.00031761080026626587, 0.000336717814207077, 0.0003558248281478882, 0.00037493184208869934, 0.0003940388560295105, 0.00041314586997032166, 0.0004322528839111328]}, "gradients/encoder.encoder.layers.6.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 0.0, 2.0, 2.0, 1.0, 5.0, 4.0, 5.0, 7.0, 8.0, 18.0, 25.0, 36.0, 42.0, 85.0, 163.0, 309.0, 715.0, 1714.0, 5921.0, 37976.0, 432244.0, 518644.0, 41041.0, 6393.0, 1777.0, 687.0, 335.0, 169.0, 87.0, 61.0, 37.0, 12.0, 14.0, 7.0, 5.0, 6.0, 2.0, 4.0, 1.0, 2.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.21875, -2.1571502685546875, -2.095550537109375, -2.0339508056640625, -1.97235107421875, -1.9107513427734375, -1.849151611328125, -1.7875518798828125, -1.7259521484375, -1.6643524169921875, -1.602752685546875, -1.5411529541015625, -1.47955322265625, -1.4179534912109375, -1.356353759765625, -1.2947540283203125, -1.233154296875, -1.1715545654296875, -1.109954833984375, -1.0483551025390625, -0.98675537109375, -0.9251556396484375, -0.863555908203125, -0.8019561767578125, -0.7403564453125, -0.6787567138671875, -0.617156982421875, -0.5555572509765625, -0.49395751953125, -0.4323577880859375, -0.370758056640625, -0.3091583251953125, -0.24755859375, -0.1859588623046875, -0.124359130859375, -0.0627593994140625, -0.00115966796875, 0.0604400634765625, 0.122039794921875, 0.1836395263671875, 0.2452392578125, 0.3068389892578125, 0.368438720703125, 0.4300384521484375, 0.49163818359375, 0.5532379150390625, 0.614837646484375, 0.6764373779296875, 0.738037109375, 0.7996368408203125, 0.861236572265625, 0.9228363037109375, 0.98443603515625, 1.0460357666015625, 1.107635498046875, 1.1692352294921875, 1.2308349609375, 1.2924346923828125, 1.354034423828125, 1.4156341552734375, 1.47723388671875, 1.5388336181640625, 1.600433349609375, 1.6620330810546875, 1.7236328125]}, "gradients/encoder.encoder.layers.6.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 6.0, 3.0, 5.0, 2.0, 9.0, 7.0, 8.0, 17.0, 14.0, 27.0, 41.0, 56.0, 58.0, 80.0, 91.0, 111.0, 95.0, 97.0, 64.0, 59.0, 46.0, 34.0, 27.0, 13.0, 11.0, 12.0, 11.0, 2.0, 2.0, 2.0, 5.0, 3.0, 1.0, 1.0, 1.0], "bins": [-2.64453125, -2.585540771484375, -2.52655029296875, -2.467559814453125, -2.4085693359375, -2.349578857421875, -2.29058837890625, -2.231597900390625, -2.172607421875, -2.113616943359375, -2.05462646484375, -1.995635986328125, -1.9366455078125, -1.877655029296875, -1.81866455078125, -1.759674072265625, -1.70068359375, -1.641693115234375, -1.58270263671875, -1.523712158203125, -1.4647216796875, -1.405731201171875, -1.34674072265625, -1.287750244140625, -1.228759765625, -1.169769287109375, -1.11077880859375, -1.051788330078125, -0.9927978515625, -0.933807373046875, -0.87481689453125, -0.815826416015625, -0.7568359375, -0.697845458984375, -0.63885498046875, -0.579864501953125, -0.5208740234375, -0.461883544921875, -0.40289306640625, -0.343902587890625, -0.284912109375, -0.225921630859375, -0.16693115234375, -0.107940673828125, -0.0489501953125, 0.010040283203125, 0.06903076171875, 0.128021240234375, 0.18701171875, 0.246002197265625, 0.30499267578125, 0.363983154296875, 0.4229736328125, 0.481964111328125, 0.54095458984375, 0.599945068359375, 0.658935546875, 0.717926025390625, 0.77691650390625, 0.835906982421875, 0.8948974609375, 0.953887939453125, 1.01287841796875, 1.071868896484375, 1.130859375]}, "gradients/encoder.encoder.layers.6.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 3.0, 2.0, 3.0, 2.0, 1.0, 2.0, 6.0, 5.0, 13.0, 15.0, 41.0, 70.0, 125.0, 194.0, 214.0, 131.0, 96.0, 45.0, 20.0, 10.0, 6.0, 4.0, 2.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-41.853790283203125, -40.68950653076172, -39.52522277832031, -38.360939025878906, -37.1966552734375, -36.03237533569336, -34.86809158325195, -33.70380783081055, -32.53952407836914, -31.375240325927734, -30.210956573486328, -29.046674728393555, -27.88239097595215, -26.718107223510742, -25.55382537841797, -24.389541625976562, -23.225257873535156, -22.06097412109375, -20.896690368652344, -19.73240852355957, -18.568124771118164, -17.403841018676758, -16.239559173583984, -15.075275421142578, -13.910991668701172, -12.746707916259766, -11.582425117492676, -10.418142318725586, -9.25385856628418, -8.089574813842773, -6.925292015075684, -5.761009216308594, -4.5967254638671875, -3.4324421882629395, -2.2681589126586914, -1.1038756370544434, 0.06040763854980469, 1.2246909141540527, 2.388974189758301, 3.5532569885253906, 4.717540740966797, 5.881824016571045, 7.046107292175293, 8.210390090942383, 9.374673843383789, 10.538957595825195, 11.703240394592285, 12.867523193359375, 14.031806945800781, 15.196090698242188, 16.360374450683594, 17.524656295776367, 18.688940048217773, 19.85322380065918, 21.017505645751953, 22.18178939819336, 23.346073150634766, 24.510356903076172, 25.674640655517578, 26.83892250061035, 28.003206253051758, 29.167490005493164, 30.331771850585938, 31.496055603027344, 32.66033935546875]}, "gradients/encoder.encoder.layers.6.layer_norm.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 5.0, 1.0, 1.0, 3.0, 4.0, 6.0, 7.0, 11.0, 16.0, 19.0, 12.0, 21.0, 24.0, 31.0, 35.0, 38.0, 26.0, 30.0, 38.0, 46.0, 47.0, 64.0, 67.0, 59.0, 56.0, 42.0, 45.0, 42.0, 36.0, 29.0, 20.0, 23.0, 25.0, 16.0, 18.0, 7.0, 8.0, 9.0, 6.0, 11.0, 2.0, 4.0, 1.0, 0.0, 4.0, 3.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-19.22872543334961, -18.54726791381836, -17.86581039428711, -17.18435287475586, -16.50289535522461, -15.821436882019043, -15.139978408813477, -14.458520889282227, -13.777063369750977, -13.095605850219727, -12.414148330688477, -11.73268985748291, -11.05123233795166, -10.36977481842041, -9.688316345214844, -9.006858825683594, -8.325401306152344, -7.643943786621094, -6.9624857902526855, -6.281027793884277, -5.599570274353027, -4.918112754821777, -4.236654758453369, -3.555196762084961, -2.873739242553711, -2.192281484603882, -1.5108237266540527, -0.8293659687042236, -0.14790821075439453, 0.5335495471954346, 1.2150073051452637, 1.8964653015136719, 2.577922821044922, 3.259380578994751, 3.94083833694458, 4.622296333312988, 5.303753852844238, 5.985211372375488, 6.6666693687438965, 7.348127365112305, 8.029584884643555, 8.711042404174805, 9.392499923706055, 10.073958396911621, 10.755415916442871, 11.436873435974121, 12.118331909179688, 12.799789428710938, 13.481246948242188, 14.162704467773438, 14.844161987304688, 15.525620460510254, 16.207077026367188, 16.88853645324707, 17.56999397277832, 18.25145149230957, 18.93290901184082, 19.61436653137207, 20.29582405090332, 20.97728157043457, 21.658740997314453, 22.340198516845703, 23.021656036376953, 23.703113555908203, 24.384571075439453]}, "gradients/encoder.encoder.layers.5.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 5.0, 6.0, 2.0, 10.0, 14.0, 25.0, 53.0, 86.0, 142.0, 266.0, 545.0, 1404.0, 4145.0, 16270.0, 90470.0, 1323966.0, 2554673.0, 169335.0, 24605.0, 5583.0, 1614.0, 588.0, 238.0, 124.0, 43.0, 34.0, 11.0, 8.0, 9.0, 7.0, 1.0, 2.0, 3.0, 2.0, 3.0, 2.0, 1.0, 0.0, 1.0], "bins": [-3.48828125, -3.4034881591796875, -3.318695068359375, -3.2339019775390625, -3.14910888671875, -3.0643157958984375, -2.979522705078125, -2.8947296142578125, -2.8099365234375, -2.7251434326171875, -2.640350341796875, -2.5555572509765625, -2.47076416015625, -2.3859710693359375, -2.301177978515625, -2.2163848876953125, -2.131591796875, -2.0467987060546875, -1.962005615234375, -1.8772125244140625, -1.79241943359375, -1.7076263427734375, -1.622833251953125, -1.5380401611328125, -1.4532470703125, -1.3684539794921875, -1.283660888671875, -1.1988677978515625, -1.11407470703125, -1.0292816162109375, -0.944488525390625, -0.8596954345703125, -0.77490234375, -0.6901092529296875, -0.605316162109375, -0.5205230712890625, -0.43572998046875, -0.3509368896484375, -0.266143798828125, -0.1813507080078125, -0.0965576171875, -0.0117645263671875, 0.073028564453125, 0.1578216552734375, 0.24261474609375, 0.3274078369140625, 0.412200927734375, 0.4969940185546875, 0.581787109375, 0.6665802001953125, 0.751373291015625, 0.8361663818359375, 0.92095947265625, 1.0057525634765625, 1.090545654296875, 1.1753387451171875, 1.2601318359375, 1.3449249267578125, 1.429718017578125, 1.5145111083984375, 1.59930419921875, 1.6840972900390625, 1.768890380859375, 1.8536834716796875, 1.9384765625]}, "gradients/encoder.encoder.layers.5.feed_forward.output_dense.bias": {"_type": "histogram", "values": [4.0, 2.0, 1.0, 4.0, 9.0, 6.0, 10.0, 6.0, 14.0, 19.0, 22.0, 23.0, 39.0, 57.0, 43.0, 68.0, 69.0, 68.0, 73.0, 67.0, 66.0, 59.0, 47.0, 54.0, 36.0, 39.0, 23.0, 19.0, 18.0, 21.0, 10.0, 13.0, 3.0, 4.0, 2.0, 0.0, 2.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.92724609375, -0.8779678344726562, -0.8286895751953125, -0.7794113159179688, -0.730133056640625, -0.6808547973632812, -0.6315765380859375, -0.5822982788085938, -0.53302001953125, -0.48374176025390625, -0.4344635009765625, -0.38518524169921875, -0.335906982421875, -0.28662872314453125, -0.2373504638671875, -0.18807220458984375, -0.1387939453125, -0.08951568603515625, -0.0402374267578125, 0.00904083251953125, 0.058319091796875, 0.10759735107421875, 0.1568756103515625, 0.20615386962890625, 0.25543212890625, 0.30471038818359375, 0.3539886474609375, 0.40326690673828125, 0.452545166015625, 0.5018234252929688, 0.5511016845703125, 0.6003799438476562, 0.649658203125, 0.6989364624023438, 0.7482147216796875, 0.7974929809570312, 0.846771240234375, 0.8960494995117188, 0.9453277587890625, 0.9946060180664062, 1.04388427734375, 1.0931625366210938, 1.1424407958984375, 1.1917190551757812, 1.240997314453125, 1.2902755737304688, 1.3395538330078125, 1.3888320922851562, 1.4381103515625, 1.4873886108398438, 1.5366668701171875, 1.5859451293945312, 1.635223388671875, 1.6845016479492188, 1.7337799072265625, 1.7830581665039062, 1.83233642578125, 1.8816146850585938, 1.9308929443359375, 1.9801712036132812, 2.029449462890625, 2.0787277221679688, 2.1280059814453125, 2.1772842407226562, 2.2265625]}, "gradients/encoder.encoder.layers.5.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 5.0, 1.0, 2.0, 2.0, 6.0, 6.0, 14.0, 17.0, 26.0, 44.0, 62.0, 83.0, 150.0, 249.0, 499.0, 1297.0, 3920.0, 14644.0, 77187.0, 846200.0, 2991212.0, 218272.0, 30198.0, 6432.0, 2071.0, 808.0, 411.0, 172.0, 104.0, 66.0, 40.0, 25.0, 19.0, 14.0, 11.0, 7.0, 4.0, 6.0, 1.0, 2.0, 2.0, 2.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.845703125, -2.744964599609375, -2.64422607421875, -2.543487548828125, -2.4427490234375, -2.342010498046875, -2.24127197265625, -2.140533447265625, -2.039794921875, -1.939056396484375, -1.83831787109375, -1.737579345703125, -1.6368408203125, -1.536102294921875, -1.43536376953125, -1.334625244140625, -1.23388671875, -1.133148193359375, -1.03240966796875, -0.931671142578125, -0.8309326171875, -0.730194091796875, -0.62945556640625, -0.528717041015625, -0.427978515625, -0.327239990234375, -0.22650146484375, -0.125762939453125, -0.0250244140625, 0.075714111328125, 0.17645263671875, 0.277191162109375, 0.3779296875, 0.478668212890625, 0.57940673828125, 0.680145263671875, 0.7808837890625, 0.881622314453125, 0.98236083984375, 1.083099365234375, 1.183837890625, 1.284576416015625, 1.38531494140625, 1.486053466796875, 1.5867919921875, 1.687530517578125, 1.78826904296875, 1.889007568359375, 1.98974609375, 2.090484619140625, 2.19122314453125, 2.291961669921875, 2.3927001953125, 2.493438720703125, 2.59417724609375, 2.694915771484375, 2.795654296875, 2.896392822265625, 2.99713134765625, 3.097869873046875, 3.1986083984375, 3.299346923828125, 3.40008544921875, 3.500823974609375, 3.6015625]}, "gradients/encoder.encoder.layers.5.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 1.0, 8.0, 2.0, 4.0, 7.0, 5.0, 8.0, 17.0, 26.0, 12.0, 29.0, 36.0, 53.0, 74.0, 123.0, 164.0, 211.0, 351.0, 565.0, 647.0, 561.0, 364.0, 252.0, 168.0, 101.0, 83.0, 63.0, 44.0, 29.0, 19.0, 20.0, 8.0, 8.0, 6.0, 5.0, 1.0, 2.0, 3.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.015625, -2.920166015625, -2.82470703125, -2.729248046875, -2.6337890625, -2.538330078125, -2.44287109375, -2.347412109375, -2.251953125, -2.156494140625, -2.06103515625, -1.965576171875, -1.8701171875, -1.774658203125, -1.67919921875, -1.583740234375, -1.48828125, -1.392822265625, -1.29736328125, -1.201904296875, -1.1064453125, -1.010986328125, -0.91552734375, -0.820068359375, -0.724609375, -0.629150390625, -0.53369140625, -0.438232421875, -0.3427734375, -0.247314453125, -0.15185546875, -0.056396484375, 0.0390625, 0.134521484375, 0.22998046875, 0.325439453125, 0.4208984375, 0.516357421875, 0.61181640625, 0.707275390625, 0.802734375, 0.898193359375, 0.99365234375, 1.089111328125, 1.1845703125, 1.280029296875, 1.37548828125, 1.470947265625, 1.56640625, 1.661865234375, 1.75732421875, 1.852783203125, 1.9482421875, 2.043701171875, 2.13916015625, 2.234619140625, 2.330078125, 2.425537109375, 2.52099609375, 2.616455078125, 2.7119140625, 2.807373046875, 2.90283203125, 2.998291015625, 3.09375]}, "gradients/encoder.encoder.layers.5.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 3.0, 1.0, 1.0, 4.0, 21.0, 57.0, 178.0, 333.0, 264.0, 96.0, 31.0, 8.0, 3.0, 2.0, 2.0, 2.0, 3.0, 0.0, 3.0, 1.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-76.65087890625, -74.4437484741211, -72.23661804199219, -70.02947998046875, -67.82234954833984, -65.61521911621094, -63.408084869384766, -61.200950622558594, -58.99382019042969, -56.78668975830078, -54.57955551147461, -52.37242126464844, -50.16529083251953, -47.958160400390625, -45.75102615356445, -43.54389190673828, -41.336761474609375, -39.12963104248047, -36.9224967956543, -34.715362548828125, -32.50823211669922, -30.30109977722168, -28.09396743774414, -25.8868350982666, -23.679702758789062, -21.472570419311523, -19.265438079833984, -17.058305740356445, -14.851173400878906, -12.644041061401367, -10.436908721923828, -8.229776382446289, -6.02264404296875, -3.815511703491211, -1.6083793640136719, 0.5987529754638672, 2.8058853149414062, 5.013017654418945, 7.220149993896484, 9.427282333374023, 11.634414672851562, 13.841547012329102, 16.04867935180664, 18.25581169128418, 20.46294403076172, 22.670076370239258, 24.877208709716797, 27.084341049194336, 29.291473388671875, 31.498605728149414, 33.70573806762695, 35.912872314453125, 38.12000274658203, 40.32713317871094, 42.53426742553711, 44.74140167236328, 46.94853210449219, 49.155662536621094, 51.362796783447266, 53.56993103027344, 55.777061462402344, 57.98419189453125, 60.19132614135742, 62.398460388183594, 64.6055908203125]}, "gradients/encoder.encoder.layers.5.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 2.0, 4.0, 2.0, 3.0, 1.0, 5.0, 7.0, 12.0, 7.0, 14.0, 16.0, 19.0, 30.0, 30.0, 27.0, 53.0, 42.0, 40.0, 50.0, 67.0, 53.0, 49.0, 54.0, 54.0, 56.0, 53.0, 41.0, 44.0, 28.0, 31.0, 25.0, 28.0, 16.0, 13.0, 10.0, 6.0, 6.0, 2.0, 3.0, 7.0, 2.0, 1.0, 2.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-18.493680953979492, -17.888896942138672, -17.28411102294922, -16.6793270111084, -16.074541091918945, -15.469757080078125, -14.864972114562988, -14.260187149047852, -13.655402183532715, -13.050617218017578, -12.445832252502441, -11.841047286987305, -11.236263275146484, -10.631477355957031, -10.026693344116211, -9.421908378601074, -8.817123413085938, -8.2123384475708, -7.607553482055664, -7.0027689933776855, -6.397984027862549, -5.793199062347412, -5.188414573669434, -4.583629608154297, -3.97884464263916, -3.3740596771240234, -2.769274950027466, -2.164490222930908, -1.5597052574157715, -0.9549202919006348, -0.35013556480407715, 0.25464916229248047, 0.8594341278076172, 1.4642189741134644, 2.0690038204193115, 2.673788547515869, 3.278573513031006, 3.8833584785461426, 4.488142967224121, 5.092927932739258, 5.6977128982543945, 6.302497863769531, 6.907282829284668, 7.5120673179626465, 8.116851806640625, 8.721637725830078, 9.326421737670898, 9.931206703186035, 10.535991668701172, 11.140776634216309, 11.745561599731445, 12.350346565246582, 12.955131530761719, 13.559915542602539, 14.164700508117676, 14.769485473632812, 15.37427043914795, 15.979055404663086, 16.583839416503906, 17.18862533569336, 17.79340934753418, 18.398195266723633, 19.002979278564453, 19.607765197753906, 20.212549209594727]}, "gradients/encoder.encoder.layers.5.attention.out_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 2.0, 4.0, 4.0, 2.0, 4.0, 7.0, 13.0, 21.0, 21.0, 41.0, 55.0, 87.0, 126.0, 239.0, 340.0, 606.0, 1082.0, 1982.0, 3594.0, 6767.0, 12679.0, 24397.0, 45660.0, 84910.0, 151093.0, 223879.0, 207943.0, 129722.0, 71705.0, 38187.0, 20273.0, 10582.0, 5549.0, 3018.0, 1667.0, 929.0, 535.0, 322.0, 186.0, 125.0, 81.0, 42.0, 28.0, 18.0, 9.0, 7.0, 9.0, 9.0, 3.0, 1.0, 4.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.4130859375, -1.3672943115234375, -1.321502685546875, -1.2757110595703125, -1.22991943359375, -1.1841278076171875, -1.138336181640625, -1.0925445556640625, -1.0467529296875, -1.0009613037109375, -0.955169677734375, -0.9093780517578125, -0.86358642578125, -0.8177947998046875, -0.772003173828125, -0.7262115478515625, -0.680419921875, -0.6346282958984375, -0.588836669921875, -0.5430450439453125, -0.49725341796875, -0.4514617919921875, -0.405670166015625, -0.3598785400390625, -0.3140869140625, -0.2682952880859375, -0.222503662109375, -0.1767120361328125, -0.13092041015625, -0.0851287841796875, -0.039337158203125, 0.0064544677734375, 0.05224609375, 0.0980377197265625, 0.143829345703125, 0.1896209716796875, 0.23541259765625, 0.2812042236328125, 0.326995849609375, 0.3727874755859375, 0.4185791015625, 0.4643707275390625, 0.510162353515625, 0.5559539794921875, 0.60174560546875, 0.6475372314453125, 0.693328857421875, 0.7391204833984375, 0.784912109375, 0.8307037353515625, 0.876495361328125, 0.9222869873046875, 0.96807861328125, 1.0138702392578125, 1.059661865234375, 1.1054534912109375, 1.1512451171875, 1.1970367431640625, 1.242828369140625, 1.2886199951171875, 1.33441162109375, 1.3802032470703125, 1.425994873046875, 1.4717864990234375, 1.517578125]}, "gradients/encoder.encoder.layers.5.attention.out_proj.bias": {"_type": "histogram", "values": [3.0, 1.0, 2.0, 2.0, 2.0, 7.0, 4.0, 3.0, 4.0, 6.0, 6.0, 3.0, 7.0, 11.0, 14.0, 17.0, 19.0, 20.0, 18.0, 12.0, 21.0, 32.0, 22.0, 33.0, 42.0, 42.0, 42.0, 36.0, 27.0, 35.0, 48.0, 47.0, 50.0, 38.0, 40.0, 32.0, 31.0, 34.0, 15.0, 20.0, 20.0, 21.0, 17.0, 19.0, 18.0, 12.0, 10.0, 12.0, 14.0, 4.0, 5.0, 5.0, 4.0, 5.0, 1.0, 4.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.875, -0.8459091186523438, -0.8168182373046875, -0.7877273559570312, -0.758636474609375, -0.7295455932617188, -0.7004547119140625, -0.6713638305664062, -0.64227294921875, -0.6131820678710938, -0.5840911865234375, -0.5550003051757812, -0.525909423828125, -0.49681854248046875, -0.4677276611328125, -0.43863677978515625, -0.4095458984375, -0.38045501708984375, -0.3513641357421875, -0.32227325439453125, -0.293182373046875, -0.26409149169921875, -0.2350006103515625, -0.20590972900390625, -0.17681884765625, -0.14772796630859375, -0.1186370849609375, -0.08954620361328125, -0.060455322265625, -0.03136444091796875, -0.0022735595703125, 0.02681732177734375, 0.055908203125, 0.08499908447265625, 0.1140899658203125, 0.14318084716796875, 0.172271728515625, 0.20136260986328125, 0.2304534912109375, 0.25954437255859375, 0.28863525390625, 0.31772613525390625, 0.3468170166015625, 0.37590789794921875, 0.404998779296875, 0.43408966064453125, 0.4631805419921875, 0.49227142333984375, 0.5213623046875, 0.5504531860351562, 0.5795440673828125, 0.6086349487304688, 0.637725830078125, 0.6668167114257812, 0.6959075927734375, 0.7249984741210938, 0.75408935546875, 0.7831802368164062, 0.8122711181640625, 0.8413619995117188, 0.870452880859375, 0.8995437622070312, 0.9286346435546875, 0.9577255249023438, 0.98681640625]}, "gradients/encoder.encoder.layers.5.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 2.0, 4.0, 1.0, 5.0, 6.0, 11.0, 25.0, 16.0, 29.0, 38.0, 41.0, 52.0, 69.0, 110.0, 141.0, 168.0, 280.0, 395.0, 586.0, 912.0, 1721.0, 3575.0, 9593.0, 34332.0, 155049.0, 580522.0, 197142.0, 43147.0, 11397.0, 4111.0, 1935.0, 1004.0, 696.0, 423.0, 288.0, 207.0, 130.0, 118.0, 66.0, 55.0, 47.0, 31.0, 21.0, 14.0, 12.0, 18.0, 5.0, 4.0, 6.0, 2.0, 1.0, 5.0, 0.0, 3.0], "bins": [-3.552734375, -3.452880859375, -3.35302734375, -3.253173828125, -3.1533203125, -3.053466796875, -2.95361328125, -2.853759765625, -2.75390625, -2.654052734375, -2.55419921875, -2.454345703125, -2.3544921875, -2.254638671875, -2.15478515625, -2.054931640625, -1.955078125, -1.855224609375, -1.75537109375, -1.655517578125, -1.5556640625, -1.455810546875, -1.35595703125, -1.256103515625, -1.15625, -1.056396484375, -0.95654296875, -0.856689453125, -0.7568359375, -0.656982421875, -0.55712890625, -0.457275390625, -0.357421875, -0.257568359375, -0.15771484375, -0.057861328125, 0.0419921875, 0.141845703125, 0.24169921875, 0.341552734375, 0.44140625, 0.541259765625, 0.64111328125, 0.740966796875, 0.8408203125, 0.940673828125, 1.04052734375, 1.140380859375, 1.240234375, 1.340087890625, 1.43994140625, 1.539794921875, 1.6396484375, 1.739501953125, 1.83935546875, 1.939208984375, 2.0390625, 2.138916015625, 2.23876953125, 2.338623046875, 2.4384765625, 2.538330078125, 2.63818359375, 2.738037109375, 2.837890625]}, "gradients/encoder.encoder.layers.5.attention.v_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 3.0, 6.0, 3.0, 3.0, 6.0, 8.0, 6.0, 5.0, 10.0, 13.0, 9.0, 18.0, 17.0, 22.0, 28.0, 31.0, 33.0, 40.0, 25.0, 37.0, 31.0, 42.0, 38.0, 52.0, 42.0, 48.0, 54.0, 36.0, 39.0, 39.0, 25.0, 34.0, 24.0, 26.0, 31.0, 22.0, 16.0, 15.0, 9.0, 11.0, 11.0, 6.0, 8.0, 8.0, 4.0, 0.0, 9.0, 2.0, 4.0, 0.0, 1.0, 0.0, 1.0, 2.0, 1.0, 0.0, 2.0], "bins": [-4.25, -4.11419677734375, -3.9783935546875, -3.84259033203125, -3.706787109375, -3.57098388671875, -3.4351806640625, -3.29937744140625, -3.16357421875, -3.02777099609375, -2.8919677734375, -2.75616455078125, -2.620361328125, -2.48455810546875, -2.3487548828125, -2.21295166015625, -2.0771484375, -1.94134521484375, -1.8055419921875, -1.66973876953125, -1.533935546875, -1.39813232421875, -1.2623291015625, -1.12652587890625, -0.99072265625, -0.85491943359375, -0.7191162109375, -0.58331298828125, -0.447509765625, -0.31170654296875, -0.1759033203125, -0.04010009765625, 0.095703125, 0.23150634765625, 0.3673095703125, 0.50311279296875, 0.638916015625, 0.77471923828125, 0.9105224609375, 1.04632568359375, 1.18212890625, 1.31793212890625, 1.4537353515625, 1.58953857421875, 1.725341796875, 1.86114501953125, 1.9969482421875, 2.13275146484375, 2.2685546875, 2.40435791015625, 2.5401611328125, 2.67596435546875, 2.811767578125, 2.94757080078125, 3.0833740234375, 3.21917724609375, 3.35498046875, 3.49078369140625, 3.6265869140625, 3.76239013671875, 3.898193359375, 4.03399658203125, 4.1697998046875, 4.30560302734375, 4.44140625]}, "gradients/encoder.encoder.layers.5.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 4.0, 1.0, 3.0, 3.0, 6.0, 4.0, 9.0, 11.0, 17.0, 30.0, 38.0, 73.0, 93.0, 180.0, 369.0, 795.0, 2339.0, 8724.0, 51206.0, 562141.0, 368880.0, 42391.0, 7613.0, 2088.0, 740.0, 352.0, 184.0, 82.0, 81.0, 31.0, 23.0, 19.0, 8.0, 11.0, 3.0, 7.0, 3.0, 4.0, 3.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.625, -1.582183837890625, -1.53936767578125, -1.496551513671875, -1.4537353515625, -1.410919189453125, -1.36810302734375, -1.325286865234375, -1.282470703125, -1.239654541015625, -1.19683837890625, -1.154022216796875, -1.1112060546875, -1.068389892578125, -1.02557373046875, -0.982757568359375, -0.93994140625, -0.897125244140625, -0.85430908203125, -0.811492919921875, -0.7686767578125, -0.725860595703125, -0.68304443359375, -0.640228271484375, -0.597412109375, -0.554595947265625, -0.51177978515625, -0.468963623046875, -0.4261474609375, -0.383331298828125, -0.34051513671875, -0.297698974609375, -0.2548828125, -0.212066650390625, -0.16925048828125, -0.126434326171875, -0.0836181640625, -0.040802001953125, 0.00201416015625, 0.044830322265625, 0.087646484375, 0.130462646484375, 0.17327880859375, 0.216094970703125, 0.2589111328125, 0.301727294921875, 0.34454345703125, 0.387359619140625, 0.43017578125, 0.472991943359375, 0.51580810546875, 0.558624267578125, 0.6014404296875, 0.644256591796875, 0.68707275390625, 0.729888916015625, 0.772705078125, 0.815521240234375, 0.85833740234375, 0.901153564453125, 0.9439697265625, 0.986785888671875, 1.02960205078125, 1.072418212890625, 1.115234375]}, "gradients/encoder.encoder.layers.5.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 2.0, 1.0, 0.0, 3.0, 2.0, 2.0, 3.0, 5.0, 6.0, 6.0, 8.0, 14.0, 9.0, 16.0, 13.0, 36.0, 33.0, 50.0, 52.0, 52.0, 68.0, 65.0, 77.0, 59.0, 72.0, 58.0, 54.0, 46.0, 32.0, 39.0, 24.0, 19.0, 20.0, 15.0, 10.0, 10.0, 5.0, 4.0, 5.0, 8.0, 5.0, 3.0, 0.0, 1.0, 2.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0], "bins": [-0.0002455711364746094, -0.00023787468671798706, -0.00023017823696136475, -0.00022248178720474243, -0.00021478533744812012, -0.0002070888876914978, -0.0001993924379348755, -0.00019169598817825317, -0.00018399953842163086, -0.00017630308866500854, -0.00016860663890838623, -0.00016091018915176392, -0.0001532137393951416, -0.0001455172896385193, -0.00013782083988189697, -0.00013012439012527466, -0.00012242794036865234, -0.00011473149061203003, -0.00010703504085540771, -9.93385910987854e-05, -9.164214134216309e-05, -8.394569158554077e-05, -7.624924182891846e-05, -6.855279207229614e-05, -6.085634231567383e-05, -5.3159892559051514e-05, -4.54634428024292e-05, -3.7766993045806885e-05, -3.007054328918457e-05, -2.2374093532562256e-05, -1.4677643775939941e-05, -6.981194019317627e-06, 7.152557373046875e-07, 8.411705493927002e-06, 1.6108155250549316e-05, 2.380460500717163e-05, 3.1501054763793945e-05, 3.919750452041626e-05, 4.6893954277038574e-05, 5.459040403366089e-05, 6.22868537902832e-05, 6.998330354690552e-05, 7.767975330352783e-05, 8.537620306015015e-05, 9.307265281677246e-05, 0.00010076910257339478, 0.00010846555233001709, 0.0001161620020866394, 0.00012385845184326172, 0.00013155490159988403, 0.00013925135135650635, 0.00014694780111312866, 0.00015464425086975098, 0.0001623407006263733, 0.0001700371503829956, 0.00017773360013961792, 0.00018543004989624023, 0.00019312649965286255, 0.00020082294940948486, 0.00020851939916610718, 0.0002162158489227295, 0.0002239122986793518, 0.00023160874843597412, 0.00023930519819259644, 0.00024700164794921875]}, "gradients/encoder.encoder.layers.5.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 4.0, 5.0, 3.0, 6.0, 12.0, 10.0, 26.0, 23.0, 41.0, 91.0, 156.0, 293.0, 766.0, 2067.0, 9088.0, 89196.0, 800293.0, 130725.0, 11603.0, 2563.0, 821.0, 367.0, 196.0, 99.0, 41.0, 28.0, 19.0, 9.0, 4.0, 5.0, 6.0, 2.0, 0.0, 2.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.99267578125, -0.9419479370117188, -0.8912200927734375, -0.8404922485351562, -0.789764404296875, -0.7390365600585938, -0.6883087158203125, -0.6375808715820312, -0.58685302734375, -0.5361251831054688, -0.4853973388671875, -0.43466949462890625, -0.383941650390625, -0.33321380615234375, -0.2824859619140625, -0.23175811767578125, -0.1810302734375, -0.13030242919921875, -0.0795745849609375, -0.02884674072265625, 0.021881103515625, 0.07260894775390625, 0.1233367919921875, 0.17406463623046875, 0.22479248046875, 0.27552032470703125, 0.3262481689453125, 0.37697601318359375, 0.427703857421875, 0.47843170166015625, 0.5291595458984375, 0.5798873901367188, 0.630615234375, 0.6813430786132812, 0.7320709228515625, 0.7827987670898438, 0.833526611328125, 0.8842544555664062, 0.9349822998046875, 0.9857101440429688, 1.03643798828125, 1.0871658325195312, 1.1378936767578125, 1.1886215209960938, 1.239349365234375, 1.2900772094726562, 1.3408050537109375, 1.3915328979492188, 1.4422607421875, 1.4929885864257812, 1.5437164306640625, 1.5944442749023438, 1.645172119140625, 1.6958999633789062, 1.7466278076171875, 1.7973556518554688, 1.84808349609375, 1.8988113403320312, 1.9495391845703125, 2.0002670288085938, 2.050994873046875, 2.1017227172851562, 2.1524505615234375, 2.2031784057617188, 2.25390625]}, "gradients/encoder.encoder.layers.5.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 3.0, 5.0, 3.0, 6.0, 9.0, 11.0, 12.0, 13.0, 10.0, 18.0, 27.0, 25.0, 35.0, 35.0, 55.0, 42.0, 52.0, 54.0, 57.0, 48.0, 55.0, 49.0, 64.0, 46.0, 45.0, 33.0, 32.0, 24.0, 28.0, 26.0, 23.0, 12.0, 9.0, 7.0, 8.0, 6.0, 4.0, 5.0, 5.0, 0.0, 2.0, 1.0, 3.0, 3.0, 0.0, 0.0, 2.0], "bins": [-1.0498046875, -1.0211944580078125, -0.992584228515625, -0.9639739990234375, -0.93536376953125, -0.9067535400390625, -0.878143310546875, -0.8495330810546875, -0.8209228515625, -0.7923126220703125, -0.763702392578125, -0.7350921630859375, -0.70648193359375, -0.6778717041015625, -0.649261474609375, -0.6206512451171875, -0.592041015625, -0.5634307861328125, -0.534820556640625, -0.5062103271484375, -0.47760009765625, -0.4489898681640625, -0.420379638671875, -0.3917694091796875, -0.3631591796875, -0.3345489501953125, -0.305938720703125, -0.2773284912109375, -0.24871826171875, -0.2201080322265625, -0.191497802734375, -0.1628875732421875, -0.13427734375, -0.1056671142578125, -0.077056884765625, -0.0484466552734375, -0.01983642578125, 0.0087738037109375, 0.037384033203125, 0.0659942626953125, 0.0946044921875, 0.1232147216796875, 0.151824951171875, 0.1804351806640625, 0.20904541015625, 0.2376556396484375, 0.266265869140625, 0.2948760986328125, 0.323486328125, 0.3520965576171875, 0.380706787109375, 0.4093170166015625, 0.43792724609375, 0.4665374755859375, 0.495147705078125, 0.5237579345703125, 0.5523681640625, 0.5809783935546875, 0.609588623046875, 0.6381988525390625, 0.66680908203125, 0.6954193115234375, 0.724029541015625, 0.7526397705078125, 0.78125]}, "gradients/encoder.encoder.layers.5.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 2.0, 1.0, 1.0, 1.0, 3.0, 3.0, 15.0, 57.0, 165.0, 323.0, 256.0, 125.0, 41.0, 9.0, 4.0, 3.0, 2.0, 1.0, 3.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-62.32345199584961, -60.5822639465332, -58.84107208251953, -57.099884033203125, -55.35869598388672, -53.61750411987305, -51.87631607055664, -50.13512420654297, -48.39393615722656, -46.652748107910156, -44.911556243896484, -43.17036819458008, -41.429176330566406, -39.68798828125, -37.946800231933594, -36.20561218261719, -34.464420318603516, -32.72323226928711, -30.982040405273438, -29.24085235595703, -27.499662399291992, -25.758472442626953, -24.017284393310547, -22.276094436645508, -20.53490447998047, -18.79371452331543, -17.05252456665039, -15.311336517333984, -13.570146560668945, -11.828956604003906, -10.087767601013184, -8.346578598022461, -6.6053924560546875, -4.864202976226807, -3.123013496398926, -1.381824016571045, 0.35936546325683594, 2.100555419921875, 3.8417444229125977, 5.58293342590332, 7.324123382568359, 9.065313339233398, 10.806502342224121, 12.547691345214844, 14.288881301879883, 16.030071258544922, 17.771259307861328, 19.512449264526367, 21.253639221191406, 22.994829177856445, 24.736019134521484, 26.47720718383789, 28.21839714050293, 29.95958709716797, 31.700775146484375, 33.44196319580078, 35.18315505981445, 36.92434310913086, 38.66553497314453, 40.40672302246094, 42.147911071777344, 43.889102935791016, 45.63029098510742, 47.371482849121094, 49.1126708984375]}, "gradients/encoder.encoder.layers.5.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 3.0, 1.0, 3.0, 1.0, 5.0, 3.0, 3.0, 4.0, 6.0, 8.0, 11.0, 9.0, 15.0, 7.0, 13.0, 19.0, 15.0, 21.0, 30.0, 39.0, 15.0, 24.0, 32.0, 33.0, 38.0, 62.0, 57.0, 66.0, 54.0, 39.0, 41.0, 40.0, 32.0, 24.0, 37.0, 32.0, 26.0, 20.0, 19.0, 9.0, 17.0, 10.0, 13.0, 16.0, 12.0, 7.0, 5.0, 3.0, 3.0, 2.0, 3.0, 4.0, 2.0, 2.0, 1.0, 0.0, 4.0], "bins": [-18.59769630432129, -18.057756423950195, -17.5178165435791, -16.977876663208008, -16.437936782836914, -15.89799690246582, -15.35805606842041, -14.818116188049316, -14.278176307678223, -13.738236427307129, -13.198296546936035, -12.658356666564941, -12.118415832519531, -11.578475952148438, -11.038536071777344, -10.49859619140625, -9.958656311035156, -9.418716430664062, -8.878776550292969, -8.338836669921875, -7.798896312713623, -7.258956432342529, -6.719016075134277, -6.179076194763184, -5.63913631439209, -5.099196434020996, -4.559256553649902, -4.01931619644165, -3.4793763160705566, -2.939436435699463, -2.39949631690979, -1.8595561981201172, -1.3196144104003906, -0.7796744108200073, -0.23973441123962402, 0.3002055883407593, 0.8401455879211426, 1.3800854682922363, 1.9200255870819092, 2.459965705871582, 2.999905586242676, 3.5398454666137695, 4.079785346984863, 4.619725704193115, 5.159665584564209, 5.699605464935303, 6.239545822143555, 6.779485702514648, 7.319425582885742, 7.859365463256836, 8.39930534362793, 8.939245223999023, 9.479185104370117, 10.019124984741211, 10.559065818786621, 11.099005699157715, 11.638945579528809, 12.178885459899902, 12.718825340270996, 13.25876522064209, 13.7987060546875, 14.338645935058594, 14.878585815429688, 15.418525695800781, 15.958465576171875]}, "gradients/encoder.encoder.layers.4.feed_forward.output_dense.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 3.0, 4.0, 1.0, 2.0, 7.0, 3.0, 7.0, 13.0, 10.0, 20.0, 34.0, 45.0, 78.0, 119.0, 160.0, 325.0, 689.0, 1680.0, 5184.0, 19449.0, 101503.0, 998580.0, 2687704.0, 318540.0, 45020.0, 10317.0, 2944.0, 994.0, 391.0, 176.0, 111.0, 77.0, 32.0, 15.0, 18.0, 13.0, 4.0, 12.0, 7.0, 2.0, 3.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0], "bins": [-2.78515625, -2.7142333984375, -2.643310546875, -2.5723876953125, -2.50146484375, -2.4305419921875, -2.359619140625, -2.2886962890625, -2.2177734375, -2.1468505859375, -2.075927734375, -2.0050048828125, -1.93408203125, -1.8631591796875, -1.792236328125, -1.7213134765625, -1.650390625, -1.5794677734375, -1.508544921875, -1.4376220703125, -1.36669921875, -1.2957763671875, -1.224853515625, -1.1539306640625, -1.0830078125, -1.0120849609375, -0.941162109375, -0.8702392578125, -0.79931640625, -0.7283935546875, -0.657470703125, -0.5865478515625, -0.515625, -0.4447021484375, -0.373779296875, -0.3028564453125, -0.23193359375, -0.1610107421875, -0.090087890625, -0.0191650390625, 0.0517578125, 0.1226806640625, 0.193603515625, 0.2645263671875, 0.33544921875, 0.4063720703125, 0.477294921875, 0.5482177734375, 0.619140625, 0.6900634765625, 0.760986328125, 0.8319091796875, 0.90283203125, 0.9737548828125, 1.044677734375, 1.1156005859375, 1.1865234375, 1.2574462890625, 1.328369140625, 1.3992919921875, 1.47021484375, 1.5411376953125, 1.612060546875, 1.6829833984375, 1.75390625]}, "gradients/encoder.encoder.layers.4.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 6.0, 3.0, 5.0, 13.0, 11.0, 15.0, 23.0, 20.0, 27.0, 33.0, 41.0, 42.0, 52.0, 62.0, 60.0, 73.0, 68.0, 66.0, 65.0, 58.0, 60.0, 46.0, 32.0, 24.0, 22.0, 20.0, 15.0, 19.0, 8.0, 8.0, 7.0, 3.0, 3.0, 2.0, 1.0, 0.0, 2.0, 0.0, 2.0, 0.0, 0.0, 1.0], "bins": [-1.7451171875, -1.6994781494140625, -1.653839111328125, -1.6082000732421875, -1.56256103515625, -1.5169219970703125, -1.471282958984375, -1.4256439208984375, -1.3800048828125, -1.3343658447265625, -1.288726806640625, -1.2430877685546875, -1.19744873046875, -1.1518096923828125, -1.106170654296875, -1.0605316162109375, -1.014892578125, -0.9692535400390625, -0.923614501953125, -0.8779754638671875, -0.83233642578125, -0.7866973876953125, -0.741058349609375, -0.6954193115234375, -0.6497802734375, -0.6041412353515625, -0.558502197265625, -0.5128631591796875, -0.46722412109375, -0.4215850830078125, -0.375946044921875, -0.3303070068359375, -0.28466796875, -0.2390289306640625, -0.193389892578125, -0.1477508544921875, -0.10211181640625, -0.0564727783203125, -0.010833740234375, 0.0348052978515625, 0.0804443359375, 0.1260833740234375, 0.171722412109375, 0.2173614501953125, 0.26300048828125, 0.3086395263671875, 0.354278564453125, 0.3999176025390625, 0.445556640625, 0.4911956787109375, 0.536834716796875, 0.5824737548828125, 0.62811279296875, 0.6737518310546875, 0.719390869140625, 0.7650299072265625, 0.8106689453125, 0.8563079833984375, 0.901947021484375, 0.9475860595703125, 0.99322509765625, 1.0388641357421875, 1.084503173828125, 1.1301422119140625, 1.17578125]}, "gradients/encoder.encoder.layers.4.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 3.0, 0.0, 1.0, 1.0, 0.0, 0.0, 5.0, 2.0, 3.0, 9.0, 19.0, 22.0, 38.0, 63.0, 147.0, 232.0, 503.0, 1143.0, 3769.0, 15920.0, 115474.0, 2521481.0, 1448061.0, 72658.0, 10567.0, 2580.0, 852.0, 336.0, 161.0, 106.0, 57.0, 39.0, 13.0, 10.0, 7.0, 5.0, 3.0, 1.0, 3.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.79296875, -2.6851806640625, -2.577392578125, -2.4696044921875, -2.36181640625, -2.2540283203125, -2.146240234375, -2.0384521484375, -1.9306640625, -1.8228759765625, -1.715087890625, -1.6072998046875, -1.49951171875, -1.3917236328125, -1.283935546875, -1.1761474609375, -1.068359375, -0.9605712890625, -0.852783203125, -0.7449951171875, -0.63720703125, -0.5294189453125, -0.421630859375, -0.3138427734375, -0.2060546875, -0.0982666015625, 0.009521484375, 0.1173095703125, 0.22509765625, 0.3328857421875, 0.440673828125, 0.5484619140625, 0.65625, 0.7640380859375, 0.871826171875, 0.9796142578125, 1.08740234375, 1.1951904296875, 1.302978515625, 1.4107666015625, 1.5185546875, 1.6263427734375, 1.734130859375, 1.8419189453125, 1.94970703125, 2.0574951171875, 2.165283203125, 2.2730712890625, 2.380859375, 2.4886474609375, 2.596435546875, 2.7042236328125, 2.81201171875, 2.9197998046875, 3.027587890625, 3.1353759765625, 3.2431640625, 3.3509521484375, 3.458740234375, 3.5665283203125, 3.67431640625, 3.7821044921875, 3.889892578125, 3.9976806640625, 4.10546875]}, "gradients/encoder.encoder.layers.4.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 1.0, 4.0, 1.0, 1.0, 7.0, 9.0, 3.0, 12.0, 16.0, 18.0, 33.0, 39.0, 66.0, 122.0, 188.0, 350.0, 553.0, 894.0, 684.0, 475.0, 229.0, 135.0, 103.0, 56.0, 29.0, 14.0, 20.0, 7.0, 7.0, 4.0, 2.0, 3.0, 0.0, 1.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.375, -4.25604248046875, -4.1370849609375, -4.01812744140625, -3.899169921875, -3.78021240234375, -3.6612548828125, -3.54229736328125, -3.42333984375, -3.30438232421875, -3.1854248046875, -3.06646728515625, -2.947509765625, -2.82855224609375, -2.7095947265625, -2.59063720703125, -2.4716796875, -2.35272216796875, -2.2337646484375, -2.11480712890625, -1.995849609375, -1.87689208984375, -1.7579345703125, -1.63897705078125, -1.52001953125, -1.40106201171875, -1.2821044921875, -1.16314697265625, -1.044189453125, -0.92523193359375, -0.8062744140625, -0.68731689453125, -0.568359375, -0.44940185546875, -0.3304443359375, -0.21148681640625, -0.092529296875, 0.02642822265625, 0.1453857421875, 0.26434326171875, 0.38330078125, 0.50225830078125, 0.6212158203125, 0.74017333984375, 0.859130859375, 0.97808837890625, 1.0970458984375, 1.21600341796875, 1.3349609375, 1.45391845703125, 1.5728759765625, 1.69183349609375, 1.810791015625, 1.92974853515625, 2.0487060546875, 2.16766357421875, 2.28662109375, 2.40557861328125, 2.5245361328125, 2.64349365234375, 2.762451171875, 2.88140869140625, 3.0003662109375, 3.11932373046875, 3.23828125]}, "gradients/encoder.encoder.layers.4.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 0.0, 1.0, 3.0, 5.0, 15.0, 24.0, 86.0, 163.0, 247.0, 223.0, 134.0, 52.0, 26.0, 14.0, 4.0, 3.0, 2.0, 1.0, 3.0, 1.0, 0.0, 1.0, 1.0, 3.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-65.60090637207031, -64.03793334960938, -62.474952697753906, -60.9119758605957, -59.3489990234375, -57.7860221862793, -56.223045349121094, -54.66006851196289, -53.09709167480469, -51.534114837646484, -49.97113800048828, -48.40816116333008, -46.845184326171875, -45.28220748901367, -43.71923065185547, -42.156253814697266, -40.59327697753906, -39.03030014038086, -37.467323303222656, -35.90434646606445, -34.34136962890625, -32.77839279174805, -31.215415954589844, -29.65243911743164, -28.089462280273438, -26.526485443115234, -24.96350860595703, -23.400531768798828, -21.837554931640625, -20.274578094482422, -18.71160125732422, -17.148624420166016, -15.585643768310547, -14.022666931152344, -12.45969009399414, -10.896713256835938, -9.333736419677734, -7.770759582519531, -6.207782745361328, -4.644805908203125, -3.081829071044922, -1.5188522338867188, 0.044124603271484375, 1.6071014404296875, 3.1700782775878906, 4.733055114746094, 6.296031951904297, 7.8590087890625, 9.421985626220703, 10.984962463378906, 12.54793930053711, 14.110916137695312, 15.673892974853516, 17.23686981201172, 18.799846649169922, 20.362823486328125, 21.925800323486328, 23.48877716064453, 25.051753997802734, 26.614730834960938, 28.17770767211914, 29.740684509277344, 31.303661346435547, 32.86663818359375, 34.42961502075195]}, "gradients/encoder.encoder.layers.4.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 3.0, 4.0, 5.0, 8.0, 4.0, 6.0, 6.0, 8.0, 13.0, 25.0, 26.0, 31.0, 39.0, 50.0, 51.0, 56.0, 47.0, 59.0, 65.0, 57.0, 66.0, 59.0, 50.0, 49.0, 38.0, 48.0, 38.0, 29.0, 22.0, 15.0, 10.0, 7.0, 6.0, 5.0, 5.0, 5.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-17.67677116394043, -17.081113815307617, -16.485456466674805, -15.889799118041992, -15.29414176940918, -14.698484420776367, -14.102828025817871, -13.507170677185059, -12.911513328552246, -12.315855979919434, -11.720198631286621, -11.124541282653809, -10.528884887695312, -9.9332275390625, -9.337570190429688, -8.741912841796875, -8.146255493164062, -7.55059814453125, -6.9549407958984375, -6.359283924102783, -5.763626575469971, -5.167969226837158, -4.572312355041504, -3.9766550064086914, -3.380997657775879, -2.7853403091430664, -2.189683198928833, -1.59402596950531, -0.9983687400817871, -0.4027113914489746, 0.1929457187652588, 0.7886028289794922, 1.3842601776123047, 1.9799174070358276, 2.5755746364593506, 3.171231746673584, 3.7668890953063965, 4.362546443939209, 4.958203315734863, 5.553860664367676, 6.149518013000488, 6.745175361633301, 7.340832710266113, 7.936489582061768, 8.532146453857422, 9.127803802490234, 9.723461151123047, 10.31911849975586, 10.914775848388672, 11.510433197021484, 12.106090545654297, 12.70174789428711, 13.297405242919922, 13.893062591552734, 14.48871898651123, 15.084376335144043, 15.680033683776855, 16.27569007873535, 16.871347427368164, 17.467004776000977, 18.06266212463379, 18.6583194732666, 19.253976821899414, 19.849634170532227, 20.44529151916504]}, "gradients/encoder.encoder.layers.4.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 2.0, 0.0, 5.0, 3.0, 1.0, 4.0, 6.0, 12.0, 19.0, 35.0, 52.0, 62.0, 114.0, 171.0, 255.0, 426.0, 722.0, 1173.0, 1794.0, 3151.0, 5189.0, 8803.0, 15116.0, 26367.0, 45991.0, 80023.0, 135297.0, 202056.0, 199861.0, 134885.0, 79411.0, 45103.0, 25753.0, 15000.0, 8631.0, 4970.0, 3095.0, 1841.0, 1159.0, 741.0, 455.0, 294.0, 199.0, 118.0, 66.0, 49.0, 31.0, 27.0, 9.0, 7.0, 4.0, 5.0, 2.0, 2.0, 4.0, 0.0, 1.0, 0.0, 1.0], "bins": [-1.4169921875, -1.3740386962890625, -1.331085205078125, -1.2881317138671875, -1.24517822265625, -1.2022247314453125, -1.159271240234375, -1.1163177490234375, -1.0733642578125, -1.0304107666015625, -0.987457275390625, -0.9445037841796875, -0.90155029296875, -0.8585968017578125, -0.815643310546875, -0.7726898193359375, -0.729736328125, -0.6867828369140625, -0.643829345703125, -0.6008758544921875, -0.55792236328125, -0.5149688720703125, -0.472015380859375, -0.4290618896484375, -0.3861083984375, -0.3431549072265625, -0.300201416015625, -0.2572479248046875, -0.21429443359375, -0.1713409423828125, -0.128387451171875, -0.0854339599609375, -0.04248046875, 0.0004730224609375, 0.043426513671875, 0.0863800048828125, 0.12933349609375, 0.1722869873046875, 0.215240478515625, 0.2581939697265625, 0.3011474609375, 0.3441009521484375, 0.387054443359375, 0.4300079345703125, 0.47296142578125, 0.5159149169921875, 0.558868408203125, 0.6018218994140625, 0.644775390625, 0.6877288818359375, 0.730682373046875, 0.7736358642578125, 0.81658935546875, 0.8595428466796875, 0.902496337890625, 0.9454498291015625, 0.9884033203125, 1.0313568115234375, 1.074310302734375, 1.1172637939453125, 1.16021728515625, 1.2031707763671875, 1.246124267578125, 1.2890777587890625, 1.33203125]}, "gradients/encoder.encoder.layers.4.attention.out_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 1.0, 3.0, 3.0, 1.0, 0.0, 1.0, 0.0, 2.0, 7.0, 8.0, 5.0, 9.0, 9.0, 8.0, 11.0, 6.0, 15.0, 26.0, 15.0, 28.0, 32.0, 32.0, 49.0, 44.0, 41.0, 36.0, 41.0, 51.0, 40.0, 50.0, 40.0, 47.0, 45.0, 50.0, 31.0, 37.0, 31.0, 18.0, 19.0, 22.0, 17.0, 18.0, 14.0, 15.0, 13.0, 6.0, 6.0, 1.0, 5.0, 1.0, 3.0, 4.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.09765625, -1.063873291015625, -1.03009033203125, -0.996307373046875, -0.9625244140625, -0.928741455078125, -0.89495849609375, -0.861175537109375, -0.827392578125, -0.793609619140625, -0.75982666015625, -0.726043701171875, -0.6922607421875, -0.658477783203125, -0.62469482421875, -0.590911865234375, -0.55712890625, -0.523345947265625, -0.48956298828125, -0.455780029296875, -0.4219970703125, -0.388214111328125, -0.35443115234375, -0.320648193359375, -0.286865234375, -0.253082275390625, -0.21929931640625, -0.185516357421875, -0.1517333984375, -0.117950439453125, -0.08416748046875, -0.050384521484375, -0.0166015625, 0.017181396484375, 0.05096435546875, 0.084747314453125, 0.1185302734375, 0.152313232421875, 0.18609619140625, 0.219879150390625, 0.253662109375, 0.287445068359375, 0.32122802734375, 0.355010986328125, 0.3887939453125, 0.422576904296875, 0.45635986328125, 0.490142822265625, 0.52392578125, 0.557708740234375, 0.59149169921875, 0.625274658203125, 0.6590576171875, 0.692840576171875, 0.72662353515625, 0.760406494140625, 0.794189453125, 0.827972412109375, 0.86175537109375, 0.895538330078125, 0.9293212890625, 0.963104248046875, 0.99688720703125, 1.030670166015625, 1.064453125]}, "gradients/encoder.encoder.layers.4.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 2.0, 1.0, 5.0, 8.0, 13.0, 8.0, 17.0, 29.0, 34.0, 43.0, 80.0, 89.0, 122.0, 193.0, 300.0, 422.0, 713.0, 1419.0, 3150.0, 9755.0, 43646.0, 246290.0, 613550.0, 99187.0, 19386.0, 5255.0, 2068.0, 1039.0, 592.0, 370.0, 254.0, 156.0, 111.0, 89.0, 56.0, 38.0, 20.0, 13.0, 14.0, 8.0, 8.0, 5.0, 3.0, 0.0, 3.0, 2.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-4.2578125, -4.129730224609375, -4.00164794921875, -3.873565673828125, -3.7454833984375, -3.617401123046875, -3.48931884765625, -3.361236572265625, -3.233154296875, -3.105072021484375, -2.97698974609375, -2.848907470703125, -2.7208251953125, -2.592742919921875, -2.46466064453125, -2.336578369140625, -2.20849609375, -2.080413818359375, -1.95233154296875, -1.824249267578125, -1.6961669921875, -1.568084716796875, -1.44000244140625, -1.311920166015625, -1.183837890625, -1.055755615234375, -0.92767333984375, -0.799591064453125, -0.6715087890625, -0.543426513671875, -0.41534423828125, -0.287261962890625, -0.1591796875, -0.031097412109375, 0.09698486328125, 0.225067138671875, 0.3531494140625, 0.481231689453125, 0.60931396484375, 0.737396240234375, 0.865478515625, 0.993560791015625, 1.12164306640625, 1.249725341796875, 1.3778076171875, 1.505889892578125, 1.63397216796875, 1.762054443359375, 1.89013671875, 2.018218994140625, 2.14630126953125, 2.274383544921875, 2.4024658203125, 2.530548095703125, 2.65863037109375, 2.786712646484375, 2.914794921875, 3.042877197265625, 3.17095947265625, 3.299041748046875, 3.4271240234375, 3.555206298828125, 3.68328857421875, 3.811370849609375, 3.939453125]}, "gradients/encoder.encoder.layers.4.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 2.0, 0.0, 1.0, 2.0, 2.0, 4.0, 5.0, 5.0, 4.0, 7.0, 8.0, 10.0, 12.0, 18.0, 26.0, 31.0, 25.0, 33.0, 48.0, 48.0, 56.0, 39.0, 75.0, 65.0, 62.0, 67.0, 40.0, 39.0, 57.0, 34.0, 34.0, 31.0, 29.0, 16.0, 26.0, 18.0, 12.0, 7.0, 7.0, 4.0, 3.0, 4.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.65234375, -4.47662353515625, -4.3009033203125, -4.12518310546875, -3.949462890625, -3.77374267578125, -3.5980224609375, -3.42230224609375, -3.24658203125, -3.07086181640625, -2.8951416015625, -2.71942138671875, -2.543701171875, -2.36798095703125, -2.1922607421875, -2.01654052734375, -1.8408203125, -1.66510009765625, -1.4893798828125, -1.31365966796875, -1.137939453125, -0.96221923828125, -0.7864990234375, -0.61077880859375, -0.43505859375, -0.25933837890625, -0.0836181640625, 0.09210205078125, 0.267822265625, 0.44354248046875, 0.6192626953125, 0.79498291015625, 0.970703125, 1.14642333984375, 1.3221435546875, 1.49786376953125, 1.673583984375, 1.84930419921875, 2.0250244140625, 2.20074462890625, 2.37646484375, 2.55218505859375, 2.7279052734375, 2.90362548828125, 3.079345703125, 3.25506591796875, 3.4307861328125, 3.60650634765625, 3.7822265625, 3.95794677734375, 4.1336669921875, 4.30938720703125, 4.485107421875, 4.66082763671875, 4.8365478515625, 5.01226806640625, 5.18798828125, 5.36370849609375, 5.5394287109375, 5.71514892578125, 5.890869140625, 6.06658935546875, 6.2423095703125, 6.41802978515625, 6.59375]}, "gradients/encoder.encoder.layers.4.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 1.0, 4.0, 4.0, 3.0, 2.0, 5.0, 9.0, 13.0, 13.0, 19.0, 35.0, 44.0, 84.0, 104.0, 165.0, 262.0, 507.0, 950.0, 2103.0, 5694.0, 17773.0, 63377.0, 229201.0, 539332.0, 134299.0, 36833.0, 10838.0, 3725.0, 1476.0, 658.0, 378.0, 229.0, 130.0, 82.0, 58.0, 47.0, 28.0, 20.0, 12.0, 11.0, 8.0, 9.0, 9.0, 3.0, 1.0, 4.0, 2.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.849609375, -0.8224563598632812, -0.7953033447265625, -0.7681503295898438, -0.740997314453125, -0.7138442993164062, -0.6866912841796875, -0.6595382690429688, -0.63238525390625, -0.6052322387695312, -0.5780792236328125, -0.5509262084960938, -0.523773193359375, -0.49662017822265625, -0.4694671630859375, -0.44231414794921875, -0.4151611328125, -0.38800811767578125, -0.3608551025390625, -0.33370208740234375, -0.306549072265625, -0.27939605712890625, -0.2522430419921875, -0.22509002685546875, -0.19793701171875, -0.17078399658203125, -0.1436309814453125, -0.11647796630859375, -0.089324951171875, -0.06217193603515625, -0.0350189208984375, -0.00786590576171875, 0.019287109375, 0.04644012451171875, 0.0735931396484375, 0.10074615478515625, 0.127899169921875, 0.15505218505859375, 0.1822052001953125, 0.20935821533203125, 0.23651123046875, 0.26366424560546875, 0.2908172607421875, 0.31797027587890625, 0.345123291015625, 0.37227630615234375, 0.3994293212890625, 0.42658233642578125, 0.4537353515625, 0.48088836669921875, 0.5080413818359375, 0.5351943969726562, 0.562347412109375, 0.5895004272460938, 0.6166534423828125, 0.6438064575195312, 0.67095947265625, 0.6981124877929688, 0.7252655029296875, 0.7524185180664062, 0.779571533203125, 0.8067245483398438, 0.8338775634765625, 0.8610305786132812, 0.88818359375]}, "gradients/encoder.encoder.layers.4.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 2.0, 0.0, 3.0, 1.0, 1.0, 3.0, 2.0, 4.0, 11.0, 6.0, 13.0, 13.0, 20.0, 23.0, 28.0, 48.0, 65.0, 71.0, 86.0, 98.0, 108.0, 83.0, 70.0, 57.0, 46.0, 32.0, 37.0, 19.0, 12.0, 15.0, 16.0, 5.0, 2.0, 3.0, 4.0, 0.0, 4.0, 0.0, 2.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00029659271240234375, -0.0002851225435733795, -0.0002736523747444153, -0.00026218220591545105, -0.0002507120370864868, -0.00023924186825752258, -0.00022777169942855835, -0.00021630153059959412, -0.00020483136177062988, -0.00019336119294166565, -0.00018189102411270142, -0.00017042085528373718, -0.00015895068645477295, -0.00014748051762580872, -0.00013601034879684448, -0.00012454017996788025, -0.00011307001113891602, -0.00010159984230995178, -9.012967348098755e-05, -7.865950465202332e-05, -6.718933582305908e-05, -5.571916699409485e-05, -4.4248998165130615e-05, -3.277882933616638e-05, -2.130866050720215e-05, -9.838491678237915e-06, 1.6316771507263184e-06, 1.3101845979690552e-05, 2.4572014808654785e-05, 3.604218363761902e-05, 4.751235246658325e-05, 5.8982521295547485e-05, 7.045269012451172e-05, 8.192285895347595e-05, 9.339302778244019e-05, 0.00010486319661140442, 0.00011633336544036865, 0.00012780353426933289, 0.00013927370309829712, 0.00015074387192726135, 0.00016221404075622559, 0.00017368420958518982, 0.00018515437841415405, 0.00019662454724311829, 0.00020809471607208252, 0.00021956488490104675, 0.00023103505373001099, 0.00024250522255897522, 0.00025397539138793945, 0.0002654455602169037, 0.0002769157290458679, 0.00028838589787483215, 0.0002998560667037964, 0.0003113262355327606, 0.00032279640436172485, 0.0003342665731906891, 0.0003457367420196533, 0.00035720691084861755, 0.0003686770796775818, 0.000380147248506546, 0.00039161741733551025, 0.0004030875861644745, 0.0004145577549934387, 0.00042602792382240295, 0.0004374980926513672]}, "gradients/encoder.encoder.layers.4.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 1.0, 2.0, 3.0, 8.0, 3.0, 4.0, 7.0, 7.0, 13.0, 17.0, 25.0, 28.0, 38.0, 51.0, 81.0, 116.0, 166.0, 271.0, 370.0, 616.0, 1143.0, 2220.0, 4888.0, 13255.0, 41215.0, 140173.0, 505251.0, 231988.0, 71015.0, 21343.0, 7360.0, 3107.0, 1492.0, 768.0, 476.0, 322.0, 203.0, 150.0, 94.0, 58.0, 60.0, 40.0, 33.0, 30.0, 16.0, 14.0, 8.0, 5.0, 6.0, 4.0, 4.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0], "bins": [-0.7841796875, -0.76092529296875, -0.7376708984375, -0.71441650390625, -0.691162109375, -0.66790771484375, -0.6446533203125, -0.62139892578125, -0.59814453125, -0.57489013671875, -0.5516357421875, -0.52838134765625, -0.505126953125, -0.48187255859375, -0.4586181640625, -0.43536376953125, -0.412109375, -0.38885498046875, -0.3656005859375, -0.34234619140625, -0.319091796875, -0.29583740234375, -0.2725830078125, -0.24932861328125, -0.22607421875, -0.20281982421875, -0.1795654296875, -0.15631103515625, -0.133056640625, -0.10980224609375, -0.0865478515625, -0.06329345703125, -0.0400390625, -0.01678466796875, 0.0064697265625, 0.02972412109375, 0.052978515625, 0.07623291015625, 0.0994873046875, 0.12274169921875, 0.14599609375, 0.16925048828125, 0.1925048828125, 0.21575927734375, 0.239013671875, 0.26226806640625, 0.2855224609375, 0.30877685546875, 0.33203125, 0.35528564453125, 0.3785400390625, 0.40179443359375, 0.425048828125, 0.44830322265625, 0.4715576171875, 0.49481201171875, 0.51806640625, 0.54132080078125, 0.5645751953125, 0.58782958984375, 0.611083984375, 0.63433837890625, 0.6575927734375, 0.68084716796875, 0.7041015625]}, "gradients/encoder.encoder.layers.4.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 2.0, 3.0, 2.0, 4.0, 4.0, 8.0, 7.0, 9.0, 8.0, 9.0, 14.0, 12.0, 13.0, 30.0, 26.0, 37.0, 42.0, 48.0, 60.0, 47.0, 54.0, 54.0, 47.0, 51.0, 42.0, 44.0, 45.0, 50.0, 27.0, 31.0, 32.0, 19.0, 27.0, 16.0, 12.0, 12.0, 13.0, 13.0, 11.0, 4.0, 5.0, 3.0, 2.0, 4.0, 1.0, 1.0, 3.0, 1.0, 1.0, 3.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.74951171875, -0.7248992919921875, -0.700286865234375, -0.6756744384765625, -0.65106201171875, -0.6264495849609375, -0.601837158203125, -0.5772247314453125, -0.5526123046875, -0.5279998779296875, -0.503387451171875, -0.4787750244140625, -0.45416259765625, -0.4295501708984375, -0.404937744140625, -0.3803253173828125, -0.355712890625, -0.3311004638671875, -0.306488037109375, -0.2818756103515625, -0.25726318359375, -0.2326507568359375, -0.208038330078125, -0.1834259033203125, -0.1588134765625, -0.1342010498046875, -0.109588623046875, -0.0849761962890625, -0.06036376953125, -0.0357513427734375, -0.011138916015625, 0.0134735107421875, 0.0380859375, 0.0626983642578125, 0.087310791015625, 0.1119232177734375, 0.13653564453125, 0.1611480712890625, 0.185760498046875, 0.2103729248046875, 0.2349853515625, 0.2595977783203125, 0.284210205078125, 0.3088226318359375, 0.33343505859375, 0.3580474853515625, 0.382659912109375, 0.4072723388671875, 0.431884765625, 0.4564971923828125, 0.481109619140625, 0.5057220458984375, 0.53033447265625, 0.5549468994140625, 0.579559326171875, 0.6041717529296875, 0.6287841796875, 0.6533966064453125, 0.678009033203125, 0.7026214599609375, 0.72723388671875, 0.7518463134765625, 0.776458740234375, 0.8010711669921875, 0.82568359375]}, "gradients/encoder.encoder.layers.4.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 1.0, 3.0, 5.0, 5.0, 8.0, 15.0, 30.0, 59.0, 96.0, 144.0, 249.0, 145.0, 116.0, 61.0, 27.0, 23.0, 9.0, 2.0, 7.0, 1.0, 2.0, 1.0, 2.0, 0.0, 0.0, 0.0, 2.0, 2.0, 0.0, 1.0], "bins": [-42.741580963134766, -41.780296325683594, -40.81901550292969, -39.857730865478516, -38.89645004272461, -37.93516540527344, -36.97388458251953, -36.01259994506836, -35.05131912231445, -34.09003448486328, -33.128753662109375, -32.1674690246582, -31.206188201904297, -30.244905471801758, -29.28362274169922, -28.32234001159668, -27.36105728149414, -26.3997745513916, -25.438491821289062, -24.477209091186523, -23.515926361083984, -22.554643630981445, -21.593360900878906, -20.632078170776367, -19.670793533325195, -18.709510803222656, -17.748228073120117, -16.786945343017578, -15.825662612915039, -14.8643798828125, -13.903097152709961, -12.941814422607422, -11.980531692504883, -11.019248962402344, -10.057966232299805, -9.096683502197266, -8.135400772094727, -7.174117565155029, -6.21283483505249, -5.251552104949951, -4.290269374847412, -3.328986644744873, -2.367703914642334, -1.4064209461212158, -0.44513821601867676, 0.5161447525024414, 1.4774274826049805, 2.4387102127075195, 3.3999929428100586, 4.361275672912598, 5.322558403015137, 6.283841133117676, 7.245123863220215, 8.20640754699707, 9.16769027709961, 10.128973007202148, 11.090255737304688, 12.051538467407227, 13.012821197509766, 13.974103927612305, 14.935386657714844, 15.896669387817383, 16.857952117919922, 17.81923484802246, 18.780517578125]}, "gradients/encoder.encoder.layers.4.layer_norm.bias": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 1.0, 2.0, 0.0, 3.0, 2.0, 5.0, 1.0, 7.0, 7.0, 5.0, 4.0, 10.0, 11.0, 11.0, 18.0, 12.0, 20.0, 16.0, 20.0, 27.0, 29.0, 26.0, 44.0, 37.0, 45.0, 47.0, 62.0, 68.0, 67.0, 61.0, 50.0, 33.0, 35.0, 33.0, 21.0, 24.0, 18.0, 16.0, 14.0, 20.0, 12.0, 15.0, 18.0, 6.0, 5.0, 2.0, 7.0, 4.0, 1.0, 7.0, 3.0, 4.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-16.809316635131836, -16.25734519958496, -15.705373764038086, -15.153402328491211, -14.601430892944336, -14.049459457397461, -13.497488021850586, -12.945516586303711, -12.393545150756836, -11.841573715209961, -11.289602279663086, -10.737630844116211, -10.185659408569336, -9.633687973022461, -9.081716537475586, -8.529745101928711, -7.977773666381836, -7.425802230834961, -6.873830795288086, -6.321859359741211, -5.769887924194336, -5.217916488647461, -4.665945053100586, -4.113973617553711, -3.562002182006836, -3.010030746459961, -2.458059310913086, -1.906087875366211, -1.354116439819336, -0.8021450042724609, -0.25017356872558594, 0.30179786682128906, 0.8537673950195312, 1.4057388305664062, 1.9577102661132812, 2.5096817016601562, 3.0616531372070312, 3.6136245727539062, 4.165596008300781, 4.717567443847656, 5.269538879394531, 5.821510314941406, 6.373481750488281, 6.925453186035156, 7.477424621582031, 8.029396057128906, 8.581367492675781, 9.133338928222656, 9.685310363769531, 10.237281799316406, 10.789253234863281, 11.341224670410156, 11.893196105957031, 12.445167541503906, 12.997138977050781, 13.549110412597656, 14.101081848144531, 14.653053283691406, 15.205024719238281, 15.756996154785156, 16.30896759033203, 16.860939025878906, 17.41291046142578, 17.964881896972656, 18.51685333251953]}, "gradients/encoder.encoder.layers.3.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 2.0, 4.0, 2.0, 4.0, 3.0, 6.0, 14.0, 7.0, 19.0, 36.0, 41.0, 71.0, 154.0, 314.0, 483.0, 891.0, 1687.0, 3526.0, 8393.0, 24919.0, 114128.0, 942106.0, 2557304.0, 447799.0, 63971.0, 16765.0, 6042.0, 2644.0, 1331.0, 715.0, 394.0, 219.0, 108.0, 81.0, 47.0, 30.0, 14.0, 11.0, 4.0, 6.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-2.05078125, -1.9986419677734375, -1.946502685546875, -1.8943634033203125, -1.84222412109375, -1.7900848388671875, -1.737945556640625, -1.6858062744140625, -1.6336669921875, -1.5815277099609375, -1.529388427734375, -1.4772491455078125, -1.42510986328125, -1.3729705810546875, -1.320831298828125, -1.2686920166015625, -1.216552734375, -1.1644134521484375, -1.112274169921875, -1.0601348876953125, -1.00799560546875, -0.9558563232421875, -0.903717041015625, -0.8515777587890625, -0.7994384765625, -0.7472991943359375, -0.695159912109375, -0.6430206298828125, -0.59088134765625, -0.5387420654296875, -0.486602783203125, -0.4344635009765625, -0.38232421875, -0.3301849365234375, -0.278045654296875, -0.2259063720703125, -0.17376708984375, -0.1216278076171875, -0.069488525390625, -0.0173492431640625, 0.0347900390625, 0.0869293212890625, 0.139068603515625, 0.1912078857421875, 0.24334716796875, 0.2954864501953125, 0.347625732421875, 0.3997650146484375, 0.451904296875, 0.5040435791015625, 0.556182861328125, 0.6083221435546875, 0.66046142578125, 0.7126007080078125, 0.764739990234375, 0.8168792724609375, 0.8690185546875, 0.9211578369140625, 0.973297119140625, 1.0254364013671875, 1.07757568359375, 1.1297149658203125, 1.181854248046875, 1.2339935302734375, 1.2861328125]}, "gradients/encoder.encoder.layers.3.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 2.0, 2.0, 2.0, 0.0, 6.0, 3.0, 6.0, 6.0, 10.0, 11.0, 12.0, 13.0, 20.0, 19.0, 19.0, 27.0, 18.0, 35.0, 37.0, 41.0, 42.0, 39.0, 38.0, 48.0, 53.0, 47.0, 40.0, 45.0, 35.0, 37.0, 38.0, 39.0, 29.0, 34.0, 20.0, 23.0, 16.0, 12.0, 23.0, 10.0, 17.0, 7.0, 7.0, 10.0, 5.0, 2.0, 0.0, 2.0, 2.0, 3.0, 2.0, 2.0, 0.0, 2.0, 0.0, 0.0, 1.0], "bins": [-0.99267578125, -0.9618072509765625, -0.930938720703125, -0.9000701904296875, -0.86920166015625, -0.8383331298828125, -0.807464599609375, -0.7765960693359375, -0.7457275390625, -0.7148590087890625, -0.683990478515625, -0.6531219482421875, -0.62225341796875, -0.5913848876953125, -0.560516357421875, -0.5296478271484375, -0.498779296875, -0.4679107666015625, -0.437042236328125, -0.4061737060546875, -0.37530517578125, -0.3444366455078125, -0.313568115234375, -0.2826995849609375, -0.2518310546875, -0.2209625244140625, -0.190093994140625, -0.1592254638671875, -0.12835693359375, -0.0974884033203125, -0.066619873046875, -0.0357513427734375, -0.0048828125, 0.0259857177734375, 0.056854248046875, 0.0877227783203125, 0.11859130859375, 0.1494598388671875, 0.180328369140625, 0.2111968994140625, 0.2420654296875, 0.2729339599609375, 0.303802490234375, 0.3346710205078125, 0.36553955078125, 0.3964080810546875, 0.427276611328125, 0.4581451416015625, 0.489013671875, 0.5198822021484375, 0.550750732421875, 0.5816192626953125, 0.61248779296875, 0.6433563232421875, 0.674224853515625, 0.7050933837890625, 0.7359619140625, 0.7668304443359375, 0.797698974609375, 0.8285675048828125, 0.85943603515625, 0.8903045654296875, 0.921173095703125, 0.9520416259765625, 0.98291015625]}, "gradients/encoder.encoder.layers.3.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 1.0, 1.0, 5.0, 3.0, 5.0, 5.0, 13.0, 15.0, 28.0, 27.0, 69.0, 119.0, 195.0, 389.0, 739.0, 1596.0, 4275.0, 16738.0, 125856.0, 3305435.0, 680652.0, 44663.0, 8624.0, 2675.0, 1040.0, 533.0, 270.0, 140.0, 87.0, 31.0, 24.0, 19.0, 5.0, 3.0, 4.0, 2.0, 4.0, 1.0, 0.0, 3.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0], "bins": [-3.650390625, -3.54534912109375, -3.4403076171875, -3.33526611328125, -3.230224609375, -3.12518310546875, -3.0201416015625, -2.91510009765625, -2.81005859375, -2.70501708984375, -2.5999755859375, -2.49493408203125, -2.389892578125, -2.28485107421875, -2.1798095703125, -2.07476806640625, -1.9697265625, -1.86468505859375, -1.7596435546875, -1.65460205078125, -1.549560546875, -1.44451904296875, -1.3394775390625, -1.23443603515625, -1.12939453125, -1.02435302734375, -0.9193115234375, -0.81427001953125, -0.709228515625, -0.60418701171875, -0.4991455078125, -0.39410400390625, -0.2890625, -0.18402099609375, -0.0789794921875, 0.02606201171875, 0.131103515625, 0.23614501953125, 0.3411865234375, 0.44622802734375, 0.55126953125, 0.65631103515625, 0.7613525390625, 0.86639404296875, 0.971435546875, 1.07647705078125, 1.1815185546875, 1.28656005859375, 1.3916015625, 1.49664306640625, 1.6016845703125, 1.70672607421875, 1.811767578125, 1.91680908203125, 2.0218505859375, 2.12689208984375, 2.23193359375, 2.33697509765625, 2.4420166015625, 2.54705810546875, 2.652099609375, 2.75714111328125, 2.8621826171875, 2.96722412109375, 3.072265625]}, "gradients/encoder.encoder.layers.3.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 4.0, 2.0, 3.0, 3.0, 7.0, 5.0, 8.0, 15.0, 12.0, 24.0, 23.0, 51.0, 62.0, 99.0, 164.0, 249.0, 467.0, 725.0, 703.0, 548.0, 307.0, 191.0, 120.0, 66.0, 61.0, 39.0, 44.0, 19.0, 16.0, 13.0, 11.0, 6.0, 6.0, 7.0, 2.0, 0.0, 1.0, 0.0, 3.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.423828125, -2.3372802734375, -2.250732421875, -2.1641845703125, -2.07763671875, -1.9910888671875, -1.904541015625, -1.8179931640625, -1.7314453125, -1.6448974609375, -1.558349609375, -1.4718017578125, -1.38525390625, -1.2987060546875, -1.212158203125, -1.1256103515625, -1.0390625, -0.9525146484375, -0.865966796875, -0.7794189453125, -0.69287109375, -0.6063232421875, -0.519775390625, -0.4332275390625, -0.3466796875, -0.2601318359375, -0.173583984375, -0.0870361328125, -0.00048828125, 0.0860595703125, 0.172607421875, 0.2591552734375, 0.345703125, 0.4322509765625, 0.518798828125, 0.6053466796875, 0.69189453125, 0.7784423828125, 0.864990234375, 0.9515380859375, 1.0380859375, 1.1246337890625, 1.211181640625, 1.2977294921875, 1.38427734375, 1.4708251953125, 1.557373046875, 1.6439208984375, 1.73046875, 1.8170166015625, 1.903564453125, 1.9901123046875, 2.07666015625, 2.1632080078125, 2.249755859375, 2.3363037109375, 2.4228515625, 2.5093994140625, 2.595947265625, 2.6824951171875, 2.76904296875, 2.8555908203125, 2.942138671875, 3.0286865234375, 3.115234375]}, "gradients/encoder.encoder.layers.3.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 4.0, 1.0, 2.0, 10.0, 12.0, 25.0, 88.0, 254.0, 307.0, 164.0, 75.0, 31.0, 24.0, 5.0, 2.0, 2.0, 3.0, 3.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-59.70853042602539, -57.643943786621094, -55.57935333251953, -53.514766693115234, -51.45018005371094, -49.385589599609375, -47.32100296020508, -45.25641632080078, -43.19182586669922, -41.12723922729492, -39.06264877319336, -36.99806213378906, -34.9334716796875, -32.8688850402832, -30.804298400878906, -28.739709854125977, -26.675121307373047, -24.610532760620117, -22.545944213867188, -20.48135757446289, -18.41676902770996, -16.35218048095703, -14.287592887878418, -12.223005294799805, -10.158416748046875, -8.093828201293945, -6.029240608215332, -3.9646525382995605, -1.900064468383789, 0.16452407836914062, 2.229111671447754, 4.293699264526367, 6.3582916259765625, 8.422880172729492, 10.487467765808105, 12.552055358886719, 14.616643905639648, 16.681232452392578, 18.745819091796875, 20.810407638549805, 22.874996185302734, 24.939584732055664, 27.004173278808594, 29.06875991821289, 31.13334846496582, 33.19793701171875, 35.26252365112305, 37.327110290527344, 39.391700744628906, 41.4562873840332, 43.520877838134766, 45.58546447753906, 47.650054931640625, 49.71464157104492, 51.77922821044922, 53.84381866455078, 55.90840530395508, 57.972991943359375, 60.03758239746094, 62.102169036865234, 64.16675567626953, 66.2313461303711, 68.29593658447266, 70.36051940917969, 72.42510986328125]}, "gradients/encoder.encoder.layers.3.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 3.0, 4.0, 1.0, 3.0, 4.0, 1.0, 6.0, 10.0, 17.0, 10.0, 14.0, 21.0, 24.0, 25.0, 33.0, 30.0, 42.0, 41.0, 43.0, 36.0, 46.0, 45.0, 36.0, 35.0, 42.0, 41.0, 41.0, 32.0, 41.0, 31.0, 38.0, 25.0, 23.0, 21.0, 18.0, 19.0, 19.0, 23.0, 18.0, 15.0, 7.0, 4.0, 4.0, 6.0, 4.0, 3.0, 1.0, 6.0, 2.0, 3.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-14.588845252990723, -14.091793060302734, -13.59473991394043, -13.097687721252441, -12.600634574890137, -12.103582382202148, -11.606529235839844, -11.109477043151855, -10.612424850463867, -10.115372657775879, -9.618319511413574, -9.121267318725586, -8.624214172363281, -8.127161979675293, -7.6301093101501465, -7.133056640625, -6.636003494262695, -6.138950824737549, -5.641898155212402, -5.144845962524414, -4.647792816162109, -4.150740623474121, -3.6536879539489746, -3.156635284423828, -2.6595826148986816, -2.162529945373535, -1.6654773950576782, -1.1684248447418213, -0.6713721752166748, -0.17431950569152832, 0.32273292541503906, 0.8197855949401855, 1.3168373107910156, 1.813889980316162, 2.3109426498413086, 2.807995080947876, 3.3050477504730225, 3.802100419998169, 4.299152851104736, 4.796205520629883, 5.293258190155029, 5.790310859680176, 6.287363529205322, 6.784416198730469, 7.281468391418457, 7.778521537780762, 8.27557373046875, 8.772626876831055, 9.269679069519043, 9.766731262207031, 10.263784408569336, 10.760836601257324, 11.257889747619629, 11.754941940307617, 12.251995086669922, 12.74904727935791, 13.246099472045898, 13.743151664733887, 14.240204811096191, 14.73725700378418, 15.234310150146484, 15.731362342834473, 16.22841453552246, 16.725467681884766, 17.22252082824707]}, "gradients/encoder.encoder.layers.3.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 4.0, 10.0, 17.0, 13.0, 27.0, 48.0, 37.0, 86.0, 90.0, 151.0, 220.0, 326.0, 458.0, 719.0, 1216.0, 1819.0, 2776.0, 4618.0, 7898.0, 13113.0, 22967.0, 40832.0, 71903.0, 123974.0, 188590.0, 206310.0, 149755.0, 90316.0, 50725.0, 28416.0, 16368.0, 9526.0, 5674.0, 3426.0, 2151.0, 1392.0, 825.0, 573.0, 376.0, 245.0, 186.0, 115.0, 68.0, 55.0, 39.0, 39.0, 19.0, 9.0, 10.0, 11.0, 6.0, 6.0, 3.0, 4.0, 5.0, 0.0, 2.0], "bins": [-1.3671875, -1.3247222900390625, -1.282257080078125, -1.2397918701171875, -1.19732666015625, -1.1548614501953125, -1.112396240234375, -1.0699310302734375, -1.0274658203125, -0.9850006103515625, -0.942535400390625, -0.9000701904296875, -0.85760498046875, -0.8151397705078125, -0.772674560546875, -0.7302093505859375, -0.687744140625, -0.6452789306640625, -0.602813720703125, -0.5603485107421875, -0.51788330078125, -0.4754180908203125, -0.432952880859375, -0.3904876708984375, -0.3480224609375, -0.3055572509765625, -0.263092041015625, -0.2206268310546875, -0.17816162109375, -0.1356964111328125, -0.093231201171875, -0.0507659912109375, -0.00830078125, 0.0341644287109375, 0.076629638671875, 0.1190948486328125, 0.16156005859375, 0.2040252685546875, 0.246490478515625, 0.2889556884765625, 0.3314208984375, 0.3738861083984375, 0.416351318359375, 0.4588165283203125, 0.50128173828125, 0.5437469482421875, 0.586212158203125, 0.6286773681640625, 0.671142578125, 0.7136077880859375, 0.756072998046875, 0.7985382080078125, 0.84100341796875, 0.8834686279296875, 0.925933837890625, 0.9683990478515625, 1.0108642578125, 1.0533294677734375, 1.095794677734375, 1.1382598876953125, 1.18072509765625, 1.2231903076171875, 1.265655517578125, 1.3081207275390625, 1.3505859375]}, "gradients/encoder.encoder.layers.3.attention.out_proj.bias": {"_type": "histogram", "values": [2.0, 1.0, 2.0, 2.0, 2.0, 1.0, 3.0, 2.0, 5.0, 3.0, 2.0, 3.0, 10.0, 8.0, 11.0, 11.0, 15.0, 15.0, 21.0, 11.0, 19.0, 26.0, 34.0, 27.0, 28.0, 31.0, 34.0, 33.0, 39.0, 34.0, 39.0, 60.0, 45.0, 38.0, 30.0, 29.0, 38.0, 40.0, 27.0, 33.0, 20.0, 25.0, 21.0, 20.0, 13.0, 19.0, 18.0, 17.0, 5.0, 8.0, 6.0, 12.0, 3.0, 5.0, 1.0, 3.0, 5.0, 1.0, 3.0, 1.0, 1.0, 1.0, 1.0, 1.0], "bins": [-0.97509765625, -0.9443206787109375, -0.913543701171875, -0.8827667236328125, -0.85198974609375, -0.8212127685546875, -0.790435791015625, -0.7596588134765625, -0.7288818359375, -0.6981048583984375, -0.667327880859375, -0.6365509033203125, -0.60577392578125, -0.5749969482421875, -0.544219970703125, -0.5134429931640625, -0.482666015625, -0.4518890380859375, -0.421112060546875, -0.3903350830078125, -0.35955810546875, -0.3287811279296875, -0.298004150390625, -0.2672271728515625, -0.2364501953125, -0.2056732177734375, -0.174896240234375, -0.1441192626953125, -0.11334228515625, -0.0825653076171875, -0.051788330078125, -0.0210113525390625, 0.009765625, 0.0405426025390625, 0.071319580078125, 0.1020965576171875, 0.13287353515625, 0.1636505126953125, 0.194427490234375, 0.2252044677734375, 0.2559814453125, 0.2867584228515625, 0.317535400390625, 0.3483123779296875, 0.37908935546875, 0.4098663330078125, 0.440643310546875, 0.4714202880859375, 0.502197265625, 0.5329742431640625, 0.563751220703125, 0.5945281982421875, 0.62530517578125, 0.6560821533203125, 0.686859130859375, 0.7176361083984375, 0.7484130859375, 0.7791900634765625, 0.809967041015625, 0.8407440185546875, 0.87152099609375, 0.9022979736328125, 0.933074951171875, 0.9638519287109375, 0.99462890625]}, "gradients/encoder.encoder.layers.3.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 4.0, 1.0, 7.0, 1.0, 5.0, 8.0, 13.0, 10.0, 21.0, 26.0, 45.0, 62.0, 98.0, 139.0, 231.0, 442.0, 843.0, 1899.0, 4865.0, 14304.0, 50475.0, 197062.0, 572147.0, 149125.0, 38108.0, 11318.0, 3939.0, 1606.0, 708.0, 412.0, 222.0, 130.0, 84.0, 53.0, 35.0, 31.0, 19.0, 19.0, 8.0, 9.0, 7.0, 12.0, 3.0, 2.0, 4.0, 3.0, 1.0, 1.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.0546875, -2.954193115234375, -2.85369873046875, -2.753204345703125, -2.6527099609375, -2.552215576171875, -2.45172119140625, -2.351226806640625, -2.250732421875, -2.150238037109375, -2.04974365234375, -1.949249267578125, -1.8487548828125, -1.748260498046875, -1.64776611328125, -1.547271728515625, -1.44677734375, -1.346282958984375, -1.24578857421875, -1.145294189453125, -1.0447998046875, -0.944305419921875, -0.84381103515625, -0.743316650390625, -0.642822265625, -0.542327880859375, -0.44183349609375, -0.341339111328125, -0.2408447265625, -0.140350341796875, -0.03985595703125, 0.060638427734375, 0.1611328125, 0.261627197265625, 0.36212158203125, 0.462615966796875, 0.5631103515625, 0.663604736328125, 0.76409912109375, 0.864593505859375, 0.965087890625, 1.065582275390625, 1.16607666015625, 1.266571044921875, 1.3670654296875, 1.467559814453125, 1.56805419921875, 1.668548583984375, 1.76904296875, 1.869537353515625, 1.97003173828125, 2.070526123046875, 2.1710205078125, 2.271514892578125, 2.37200927734375, 2.472503662109375, 2.572998046875, 2.673492431640625, 2.77398681640625, 2.874481201171875, 2.9749755859375, 3.075469970703125, 3.17596435546875, 3.276458740234375, 3.376953125]}, "gradients/encoder.encoder.layers.3.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 2.0, 2.0, 3.0, 7.0, 7.0, 6.0, 9.0, 8.0, 7.0, 13.0, 17.0, 26.0, 15.0, 26.0, 24.0, 36.0, 36.0, 44.0, 36.0, 44.0, 44.0, 43.0, 39.0, 44.0, 66.0, 42.0, 54.0, 43.0, 25.0, 33.0, 42.0, 29.0, 23.0, 19.0, 15.0, 16.0, 17.0, 14.0, 5.0, 9.0, 7.0, 1.0, 4.0, 2.0, 2.0, 1.0, 4.0, 0.0, 1.0, 0.0, 2.0, 1.0, 1.0, 0.0, 0.0, 2.0, 1.0], "bins": [-3.9921875, -3.858154296875, -3.72412109375, -3.590087890625, -3.4560546875, -3.322021484375, -3.18798828125, -3.053955078125, -2.919921875, -2.785888671875, -2.65185546875, -2.517822265625, -2.3837890625, -2.249755859375, -2.11572265625, -1.981689453125, -1.84765625, -1.713623046875, -1.57958984375, -1.445556640625, -1.3115234375, -1.177490234375, -1.04345703125, -0.909423828125, -0.775390625, -0.641357421875, -0.50732421875, -0.373291015625, -0.2392578125, -0.105224609375, 0.02880859375, 0.162841796875, 0.296875, 0.430908203125, 0.56494140625, 0.698974609375, 0.8330078125, 0.967041015625, 1.10107421875, 1.235107421875, 1.369140625, 1.503173828125, 1.63720703125, 1.771240234375, 1.9052734375, 2.039306640625, 2.17333984375, 2.307373046875, 2.44140625, 2.575439453125, 2.70947265625, 2.843505859375, 2.9775390625, 3.111572265625, 3.24560546875, 3.379638671875, 3.513671875, 3.647705078125, 3.78173828125, 3.915771484375, 4.0498046875, 4.183837890625, 4.31787109375, 4.451904296875, 4.5859375]}, "gradients/encoder.encoder.layers.3.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 3.0, 3.0, 5.0, 8.0, 8.0, 19.0, 19.0, 23.0, 33.0, 43.0, 65.0, 86.0, 151.0, 224.0, 365.0, 591.0, 920.0, 1612.0, 2728.0, 5474.0, 12038.0, 29137.0, 81205.0, 256207.0, 486426.0, 106149.0, 36302.0, 14403.0, 6528.0, 3237.0, 1701.0, 992.0, 633.0, 399.0, 274.0, 169.0, 103.0, 83.0, 47.0, 39.0, 34.0, 24.0, 9.0, 17.0, 14.0, 8.0, 3.0, 2.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.8984375, -0.8686065673828125, -0.838775634765625, -0.8089447021484375, -0.77911376953125, -0.7492828369140625, -0.719451904296875, -0.6896209716796875, -0.6597900390625, -0.6299591064453125, -0.600128173828125, -0.5702972412109375, -0.54046630859375, -0.5106353759765625, -0.480804443359375, -0.4509735107421875, -0.421142578125, -0.3913116455078125, -0.361480712890625, -0.3316497802734375, -0.30181884765625, -0.2719879150390625, -0.242156982421875, -0.2123260498046875, -0.1824951171875, -0.1526641845703125, -0.122833251953125, -0.0930023193359375, -0.06317138671875, -0.0333404541015625, -0.003509521484375, 0.0263214111328125, 0.05615234375, 0.0859832763671875, 0.115814208984375, 0.1456451416015625, 0.17547607421875, 0.2053070068359375, 0.235137939453125, 0.2649688720703125, 0.2947998046875, 0.3246307373046875, 0.354461669921875, 0.3842926025390625, 0.41412353515625, 0.4439544677734375, 0.473785400390625, 0.5036163330078125, 0.533447265625, 0.5632781982421875, 0.593109130859375, 0.6229400634765625, 0.65277099609375, 0.6826019287109375, 0.712432861328125, 0.7422637939453125, 0.7720947265625, 0.8019256591796875, 0.831756591796875, 0.8615875244140625, 0.89141845703125, 0.9212493896484375, 0.951080322265625, 0.9809112548828125, 1.0107421875]}, "gradients/encoder.encoder.layers.3.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 3.0, 6.0, 4.0, 1.0, 5.0, 2.0, 6.0, 7.0, 15.0, 6.0, 21.0, 27.0, 28.0, 38.0, 54.0, 61.0, 81.0, 97.0, 118.0, 85.0, 101.0, 59.0, 46.0, 36.0, 22.0, 20.0, 15.0, 13.0, 8.0, 10.0, 9.0, 1.0, 3.0, 2.0, 2.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0003418922424316406, -0.00032940879464149475, -0.0003169253468513489, -0.000304441899061203, -0.00029195845127105713, -0.00027947500348091125, -0.0002669915556907654, -0.0002545081079006195, -0.00024202466011047363, -0.00022954121232032776, -0.00021705776453018188, -0.000204574316740036, -0.00019209086894989014, -0.00017960742115974426, -0.0001671239733695984, -0.00015464052557945251, -0.00014215707778930664, -0.00012967362999916077, -0.00011719018220901489, -0.00010470673441886902, -9.222328662872314e-05, -7.973983883857727e-05, -6.72563910484314e-05, -5.477294325828552e-05, -4.228949546813965e-05, -2.9806047677993774e-05, -1.73225998878479e-05, -4.839152097702026e-06, 7.644295692443848e-06, 2.012774348258972e-05, 3.2611191272735596e-05, 4.509463906288147e-05, 5.7578086853027344e-05, 7.006153464317322e-05, 8.254498243331909e-05, 9.502843022346497e-05, 0.00010751187801361084, 0.00011999532580375671, 0.0001324787735939026, 0.00014496222138404846, 0.00015744566917419434, 0.0001699291169643402, 0.00018241256475448608, 0.00019489601254463196, 0.00020737946033477783, 0.0002198629081249237, 0.00023234635591506958, 0.00024482980370521545, 0.00025731325149536133, 0.0002697966992855072, 0.0002822801470756531, 0.00029476359486579895, 0.0003072470426559448, 0.0003197304904460907, 0.00033221393823623657, 0.00034469738602638245, 0.0003571808338165283, 0.0003696642816066742, 0.00038214772939682007, 0.00039463117718696594, 0.0004071146249771118, 0.0004195980727672577, 0.00043208152055740356, 0.00044456496834754944, 0.0004570484161376953]}, "gradients/encoder.encoder.layers.3.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 4.0, 0.0, 1.0, 2.0, 3.0, 3.0, 6.0, 21.0, 13.0, 35.0, 37.0, 63.0, 116.0, 236.0, 328.0, 565.0, 1072.0, 1939.0, 4331.0, 10453.0, 29362.0, 99508.0, 506710.0, 278184.0, 76035.0, 23328.0, 8617.0, 3633.0, 1750.0, 897.0, 506.0, 293.0, 188.0, 124.0, 73.0, 51.0, 28.0, 9.0, 16.0, 9.0, 4.0, 9.0, 1.0, 0.0, 3.0, 4.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.142578125, -1.1088714599609375, -1.075164794921875, -1.0414581298828125, -1.00775146484375, -0.9740447998046875, -0.940338134765625, -0.9066314697265625, -0.8729248046875, -0.8392181396484375, -0.805511474609375, -0.7718048095703125, -0.73809814453125, -0.7043914794921875, -0.670684814453125, -0.6369781494140625, -0.603271484375, -0.5695648193359375, -0.535858154296875, -0.5021514892578125, -0.46844482421875, -0.4347381591796875, -0.401031494140625, -0.3673248291015625, -0.3336181640625, -0.2999114990234375, -0.266204833984375, -0.2324981689453125, -0.19879150390625, -0.1650848388671875, -0.131378173828125, -0.0976715087890625, -0.06396484375, -0.0302581787109375, 0.003448486328125, 0.0371551513671875, 0.07086181640625, 0.1045684814453125, 0.138275146484375, 0.1719818115234375, 0.2056884765625, 0.2393951416015625, 0.273101806640625, 0.3068084716796875, 0.34051513671875, 0.3742218017578125, 0.407928466796875, 0.4416351318359375, 0.475341796875, 0.5090484619140625, 0.542755126953125, 0.5764617919921875, 0.61016845703125, 0.6438751220703125, 0.677581787109375, 0.7112884521484375, 0.7449951171875, 0.7787017822265625, 0.812408447265625, 0.8461151123046875, 0.87982177734375, 0.9135284423828125, 0.947235107421875, 0.9809417724609375, 1.0146484375]}, "gradients/encoder.encoder.layers.3.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 4.0, 1.0, 5.0, 6.0, 5.0, 8.0, 14.0, 10.0, 11.0, 18.0, 32.0, 32.0, 45.0, 62.0, 70.0, 66.0, 78.0, 76.0, 73.0, 69.0, 64.0, 61.0, 44.0, 35.0, 33.0, 27.0, 9.0, 14.0, 8.0, 5.0, 6.0, 2.0, 8.0, 4.0, 5.0, 2.0, 2.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.00390625, -0.96453857421875, -0.9251708984375, -0.88580322265625, -0.846435546875, -0.80706787109375, -0.7677001953125, -0.72833251953125, -0.68896484375, -0.64959716796875, -0.6102294921875, -0.57086181640625, -0.531494140625, -0.49212646484375, -0.4527587890625, -0.41339111328125, -0.3740234375, -0.33465576171875, -0.2952880859375, -0.25592041015625, -0.216552734375, -0.17718505859375, -0.1378173828125, -0.09844970703125, -0.05908203125, -0.01971435546875, 0.0196533203125, 0.05902099609375, 0.098388671875, 0.13775634765625, 0.1771240234375, 0.21649169921875, 0.255859375, 0.29522705078125, 0.3345947265625, 0.37396240234375, 0.413330078125, 0.45269775390625, 0.4920654296875, 0.53143310546875, 0.57080078125, 0.61016845703125, 0.6495361328125, 0.68890380859375, 0.728271484375, 0.76763916015625, 0.8070068359375, 0.84637451171875, 0.8857421875, 0.92510986328125, 0.9644775390625, 1.00384521484375, 1.043212890625, 1.08258056640625, 1.1219482421875, 1.16131591796875, 1.20068359375, 1.24005126953125, 1.2794189453125, 1.31878662109375, 1.358154296875, 1.39752197265625, 1.4368896484375, 1.47625732421875, 1.515625]}, "gradients/encoder.encoder.layers.3.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 6.0, 1.0, 13.0, 18.0, 25.0, 65.0, 120.0, 225.0, 293.0, 134.0, 64.0, 23.0, 14.0, 3.0, 2.0, 2.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-49.14048767089844, -47.78323745727539, -46.425987243652344, -45.0687370300293, -43.71148681640625, -42.35423278808594, -40.996986389160156, -39.639732360839844, -38.2824821472168, -36.92523193359375, -35.5679817199707, -34.210731506347656, -32.85348129272461, -31.49622917175293, -30.138978958129883, -28.781726837158203, -27.42447853088379, -26.067228317260742, -24.709978103637695, -23.352725982666016, -21.99547576904297, -20.638225555419922, -19.280975341796875, -17.923725128173828, -16.56647491455078, -15.209224700927734, -13.851973533630371, -12.494723320007324, -11.137472152709961, -9.780221939086914, -8.422971725463867, -7.065720558166504, -5.708469390869141, -4.3512187004089355, -2.9939682483673096, -1.6367177963256836, -0.2794671058654785, 1.0777835845947266, 2.4350337982177734, 3.7922849655151367, 5.149535179138184, 6.506785869598389, 7.864036560058594, 9.22128677368164, 10.578536987304688, 11.93578815460205, 13.293038368225098, 14.650289535522461, 16.007539749145508, 17.364789962768555, 18.7220401763916, 20.07929229736328, 21.436542510986328, 22.793792724609375, 24.151042938232422, 25.50829315185547, 26.865543365478516, 28.222793579101562, 29.58004379272461, 30.937294006347656, 32.2945442199707, 33.65179443359375, 35.00904846191406, 36.36629867553711, 37.723548889160156]}, "gradients/encoder.encoder.layers.3.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 2.0, 2.0, 2.0, 2.0, 1.0, 9.0, 6.0, 9.0, 6.0, 9.0, 20.0, 18.0, 24.0, 29.0, 32.0, 35.0, 58.0, 55.0, 58.0, 96.0, 96.0, 63.0, 61.0, 48.0, 53.0, 37.0, 40.0, 31.0, 24.0, 24.0, 15.0, 18.0, 10.0, 6.0, 4.0, 6.0, 5.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-25.599811553955078, -24.875669479370117, -24.151527404785156, -23.427385330200195, -22.703243255615234, -21.979103088378906, -21.254959106445312, -20.530818939208984, -19.806676864624023, -19.082534790039062, -18.3583927154541, -17.63425064086914, -16.91010856628418, -16.18596649169922, -15.461825370788574, -14.73768424987793, -14.013541221618652, -13.289399147033691, -12.56525707244873, -11.841115951538086, -11.116973876953125, -10.392831802368164, -9.668689727783203, -8.944547653198242, -8.220405578613281, -7.49626350402832, -6.772121906280518, -6.047979831695557, -5.323838233947754, -4.599696159362793, -3.875554084777832, -3.1514124870300293, -2.4272708892822266, -1.7031290531158447, -0.9789870977401733, -0.25484514236450195, 0.4692966938018799, 1.1934385299682617, 1.9175806045532227, 2.6417222023010254, 3.3658642768859863, 4.090006351470947, 4.81414794921875, 5.538290023803711, 6.262432098388672, 6.986573696136475, 7.7107157707214355, 8.434857368469238, 9.1589994430542, 9.88314151763916, 10.607283592224121, 11.331424713134766, 12.055566787719727, 12.779708862304688, 13.503850936889648, 14.22799301147461, 14.95213508605957, 15.676277160644531, 16.400419235229492, 17.124561309814453, 17.848703384399414, 18.572845458984375, 19.296985626220703, 20.021127700805664, 20.745269775390625]}, "gradients/encoder.encoder.layers.2.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 2.0, 3.0, 2.0, 6.0, 4.0, 7.0, 8.0, 16.0, 23.0, 28.0, 33.0, 55.0, 90.0, 110.0, 197.0, 331.0, 544.0, 1004.0, 1895.0, 3956.0, 9741.0, 30758.0, 248044.0, 2895418.0, 908286.0, 66881.0, 15716.0, 5637.0, 2556.0, 1292.0, 646.0, 404.0, 196.0, 133.0, 85.0, 56.0, 43.0, 31.0, 18.0, 9.0, 5.0, 7.0, 9.0, 4.0, 2.0, 4.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.64453125, -2.568359375, -2.4921875, -2.416015625, -2.33984375, -2.263671875, -2.1875, -2.111328125, -2.03515625, -1.958984375, -1.8828125, -1.806640625, -1.73046875, -1.654296875, -1.578125, -1.501953125, -1.42578125, -1.349609375, -1.2734375, -1.197265625, -1.12109375, -1.044921875, -0.96875, -0.892578125, -0.81640625, -0.740234375, -0.6640625, -0.587890625, -0.51171875, -0.435546875, -0.359375, -0.283203125, -0.20703125, -0.130859375, -0.0546875, 0.021484375, 0.09765625, 0.173828125, 0.25, 0.326171875, 0.40234375, 0.478515625, 0.5546875, 0.630859375, 0.70703125, 0.783203125, 0.859375, 0.935546875, 1.01171875, 1.087890625, 1.1640625, 1.240234375, 1.31640625, 1.392578125, 1.46875, 1.544921875, 1.62109375, 1.697265625, 1.7734375, 1.849609375, 1.92578125, 2.001953125, 2.078125, 2.154296875, 2.23046875]}, "gradients/encoder.encoder.layers.2.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 1.0, 2.0, 1.0, 1.0, 0.0, 2.0, 5.0, 5.0, 4.0, 4.0, 9.0, 9.0, 17.0, 12.0, 21.0, 16.0, 18.0, 20.0, 22.0, 26.0, 35.0, 43.0, 30.0, 56.0, 45.0, 47.0, 49.0, 50.0, 41.0, 51.0, 43.0, 42.0, 44.0, 27.0, 33.0, 27.0, 29.0, 26.0, 17.0, 18.0, 14.0, 15.0, 7.0, 10.0, 6.0, 7.0, 3.0, 0.0, 3.0, 3.0, 1.0, 2.0, 1.0], "bins": [-1.271484375, -1.2380828857421875, -1.204681396484375, -1.1712799072265625, -1.13787841796875, -1.1044769287109375, -1.071075439453125, -1.0376739501953125, -1.0042724609375, -0.9708709716796875, -0.937469482421875, -0.9040679931640625, -0.87066650390625, -0.8372650146484375, -0.803863525390625, -0.7704620361328125, -0.737060546875, -0.7036590576171875, -0.670257568359375, -0.6368560791015625, -0.60345458984375, -0.5700531005859375, -0.536651611328125, -0.5032501220703125, -0.4698486328125, -0.4364471435546875, -0.403045654296875, -0.3696441650390625, -0.33624267578125, -0.3028411865234375, -0.269439697265625, -0.2360382080078125, -0.20263671875, -0.1692352294921875, -0.135833740234375, -0.1024322509765625, -0.06903076171875, -0.0356292724609375, -0.002227783203125, 0.0311737060546875, 0.0645751953125, 0.0979766845703125, 0.131378173828125, 0.1647796630859375, 0.19818115234375, 0.2315826416015625, 0.264984130859375, 0.2983856201171875, 0.331787109375, 0.3651885986328125, 0.398590087890625, 0.4319915771484375, 0.46539306640625, 0.4987945556640625, 0.532196044921875, 0.5655975341796875, 0.5989990234375, 0.6324005126953125, 0.665802001953125, 0.6992034912109375, 0.73260498046875, 0.7660064697265625, 0.799407958984375, 0.8328094482421875, 0.8662109375]}, "gradients/encoder.encoder.layers.2.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 5.0, 4.0, 11.0, 21.0, 47.0, 95.0, 148.0, 361.0, 853.0, 3416.0, 165036.0, 4009397.0, 12291.0, 1438.0, 605.0, 279.0, 135.0, 67.0, 37.0, 19.0, 12.0, 8.0, 0.0, 3.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-12.1953125, -11.8492431640625, -11.503173828125, -11.1571044921875, -10.81103515625, -10.4649658203125, -10.118896484375, -9.7728271484375, -9.4267578125, -9.0806884765625, -8.734619140625, -8.3885498046875, -8.04248046875, -7.6964111328125, -7.350341796875, -7.0042724609375, -6.658203125, -6.3121337890625, -5.966064453125, -5.6199951171875, -5.27392578125, -4.9278564453125, -4.581787109375, -4.2357177734375, -3.8896484375, -3.5435791015625, -3.197509765625, -2.8514404296875, -2.50537109375, -2.1593017578125, -1.813232421875, -1.4671630859375, -1.12109375, -0.7750244140625, -0.428955078125, -0.0828857421875, 0.26318359375, 0.6092529296875, 0.955322265625, 1.3013916015625, 1.6474609375, 1.9935302734375, 2.339599609375, 2.6856689453125, 3.03173828125, 3.3778076171875, 3.723876953125, 4.0699462890625, 4.416015625, 4.7620849609375, 5.108154296875, 5.4542236328125, 5.80029296875, 6.1463623046875, 6.492431640625, 6.8385009765625, 7.1845703125, 7.5306396484375, 7.876708984375, 8.2227783203125, 8.56884765625, 8.9149169921875, 9.260986328125, 9.6070556640625, 9.953125]}, "gradients/encoder.encoder.layers.2.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 3.0, 0.0, 3.0, 2.0, 3.0, 6.0, 4.0, 14.0, 17.0, 29.0, 49.0, 81.0, 148.0, 234.0, 579.0, 1143.0, 978.0, 441.0, 166.0, 81.0, 53.0, 18.0, 14.0, 9.0, 4.0, 4.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-2.62890625, -2.4722900390625, -2.315673828125, -2.1590576171875, -2.00244140625, -1.8458251953125, -1.689208984375, -1.5325927734375, -1.3759765625, -1.2193603515625, -1.062744140625, -0.9061279296875, -0.74951171875, -0.5928955078125, -0.436279296875, -0.2796630859375, -0.123046875, 0.0335693359375, 0.190185546875, 0.3468017578125, 0.50341796875, 0.6600341796875, 0.816650390625, 0.9732666015625, 1.1298828125, 1.2864990234375, 1.443115234375, 1.5997314453125, 1.75634765625, 1.9129638671875, 2.069580078125, 2.2261962890625, 2.3828125, 2.5394287109375, 2.696044921875, 2.8526611328125, 3.00927734375, 3.1658935546875, 3.322509765625, 3.4791259765625, 3.6357421875, 3.7923583984375, 3.948974609375, 4.1055908203125, 4.26220703125, 4.4188232421875, 4.575439453125, 4.7320556640625, 4.888671875, 5.0452880859375, 5.201904296875, 5.3585205078125, 5.51513671875, 5.6717529296875, 5.828369140625, 5.9849853515625, 6.1416015625, 6.2982177734375, 6.454833984375, 6.6114501953125, 6.76806640625, 6.9246826171875, 7.081298828125, 7.2379150390625, 7.39453125]}, "gradients/encoder.encoder.layers.2.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 3.0, 4.0, 0.0, 6.0, 9.0, 25.0, 66.0, 168.0, 320.0, 233.0, 118.0, 38.0, 10.0, 5.0, 4.0, 2.0, 2.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-90.71321868896484, -88.59353637695312, -86.47384643554688, -84.35416412353516, -82.23448181152344, -80.11479187011719, -77.99510955810547, -75.87541961669922, -73.7557373046875, -71.63605499267578, -69.51636505126953, -67.39668273925781, -65.27699279785156, -63.157310485839844, -61.037628173828125, -58.91794204711914, -56.798255920410156, -54.67856979370117, -52.55888366699219, -50.43920135498047, -48.319515228271484, -46.1998291015625, -44.08014678955078, -41.9604606628418, -39.84077453613281, -37.72108840942383, -35.601402282714844, -33.481719970703125, -31.36203384399414, -29.242347717285156, -27.122663497924805, -25.002979278564453, -22.88330078125, -20.763614654541016, -18.643930435180664, -16.524246215820312, -14.404560089111328, -12.28487491607666, -10.165189743041992, -8.045504570007324, -5.925819396972656, -3.8061342239379883, -1.6864490509033203, 0.43323612213134766, 2.5529212951660156, 4.672606468200684, 6.792291641235352, 8.91197681427002, 11.031661987304688, 13.151347160339355, 15.271032333374023, 17.390716552734375, 19.51040267944336, 21.630088806152344, 23.749773025512695, 25.869457244873047, 27.98914337158203, 30.108829498291016, 32.228515625, 34.34819793701172, 36.4678840637207, 38.58757019042969, 40.707252502441406, 42.82693862915039, 44.946624755859375]}, "gradients/encoder.encoder.layers.2.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 1.0, 2.0, 4.0, 2.0, 3.0, 6.0, 11.0, 16.0, 15.0, 11.0, 28.0, 20.0, 31.0, 37.0, 41.0, 43.0, 37.0, 63.0, 54.0, 54.0, 81.0, 53.0, 58.0, 54.0, 43.0, 44.0, 37.0, 33.0, 26.0, 23.0, 24.0, 16.0, 13.0, 8.0, 8.0, 3.0, 5.0, 3.0, 0.0, 2.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-21.478548049926758, -20.798233032226562, -20.117916107177734, -19.43760108947754, -18.757286071777344, -18.07697105407715, -17.396656036376953, -16.716339111328125, -16.03602409362793, -15.355709075927734, -14.675393104553223, -13.995077133178711, -13.314762115478516, -12.63444709777832, -11.954131126403809, -11.273815155029297, -10.593500137329102, -9.913185119628906, -9.232869148254395, -8.552553176879883, -7.8722381591796875, -7.191922664642334, -6.5116071701049805, -5.831291675567627, -5.150976181030273, -4.47066068649292, -3.7903451919555664, -3.110029697418213, -2.4297142028808594, -1.7493987083435059, -1.0690832138061523, -0.38876771926879883, 0.2915458679199219, 0.9718613624572754, 1.652176856994629, 2.3324923515319824, 3.012807846069336, 3.6931233406066895, 4.373438835144043, 5.0537543296813965, 5.73406982421875, 6.4143853187561035, 7.094700813293457, 7.7750163078308105, 8.455331802368164, 9.13564682006836, 9.815962791442871, 10.496278762817383, 11.176593780517578, 11.856908798217773, 12.537224769592285, 13.217540740966797, 13.897855758666992, 14.578170776367188, 15.2584867477417, 15.938802719116211, 16.619117736816406, 17.2994327545166, 17.979747772216797, 18.660064697265625, 19.34037971496582, 20.020694732666016, 20.701011657714844, 21.38132667541504, 22.061641693115234]}, "gradients/encoder.encoder.layers.2.attention.out_proj.weight": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 2.0, 2.0, 2.0, 2.0, 5.0, 8.0, 7.0, 11.0, 11.0, 20.0, 31.0, 37.0, 50.0, 87.0, 131.0, 161.0, 242.0, 356.0, 535.0, 814.0, 1404.0, 2254.0, 3595.0, 6074.0, 10528.0, 18748.0, 34318.0, 64987.0, 124318.0, 216415.0, 240062.0, 149693.0, 78780.0, 41600.0, 22315.0, 12649.0, 7034.0, 4253.0, 2546.0, 1637.0, 974.0, 602.0, 403.0, 272.0, 159.0, 134.0, 79.0, 54.0, 44.0, 33.0, 27.0, 19.0, 14.0, 10.0, 9.0, 6.0, 2.0, 6.0, 1.0, 0.0, 1.0], "bins": [-1.6181640625, -1.5693511962890625, -1.520538330078125, -1.4717254638671875, -1.42291259765625, -1.3740997314453125, -1.325286865234375, -1.2764739990234375, -1.2276611328125, -1.1788482666015625, -1.130035400390625, -1.0812225341796875, -1.03240966796875, -0.9835968017578125, -0.934783935546875, -0.8859710693359375, -0.837158203125, -0.7883453369140625, -0.739532470703125, -0.6907196044921875, -0.64190673828125, -0.5930938720703125, -0.544281005859375, -0.4954681396484375, -0.4466552734375, -0.3978424072265625, -0.349029541015625, -0.3002166748046875, -0.25140380859375, -0.2025909423828125, -0.153778076171875, -0.1049652099609375, -0.05615234375, -0.0073394775390625, 0.041473388671875, 0.0902862548828125, 0.13909912109375, 0.1879119873046875, 0.236724853515625, 0.2855377197265625, 0.3343505859375, 0.3831634521484375, 0.431976318359375, 0.4807891845703125, 0.52960205078125, 0.5784149169921875, 0.627227783203125, 0.6760406494140625, 0.724853515625, 0.7736663818359375, 0.822479248046875, 0.8712921142578125, 0.92010498046875, 0.9689178466796875, 1.017730712890625, 1.0665435791015625, 1.1153564453125, 1.1641693115234375, 1.212982177734375, 1.2617950439453125, 1.31060791015625, 1.3594207763671875, 1.408233642578125, 1.4570465087890625, 1.505859375]}, "gradients/encoder.encoder.layers.2.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 3.0, 2.0, 0.0, 4.0, 3.0, 8.0, 9.0, 9.0, 11.0, 15.0, 13.0, 13.0, 21.0, 17.0, 28.0, 32.0, 33.0, 33.0, 40.0, 51.0, 50.0, 63.0, 50.0, 57.0, 56.0, 50.0, 53.0, 34.0, 33.0, 31.0, 32.0, 24.0, 20.0, 18.0, 23.0, 13.0, 17.0, 7.0, 19.0, 6.0, 3.0, 4.0, 6.0, 3.0, 2.0, 0.0, 0.0, 1.0], "bins": [-1.748046875, -1.7032318115234375, -1.658416748046875, -1.6136016845703125, -1.56878662109375, -1.5239715576171875, -1.479156494140625, -1.4343414306640625, -1.3895263671875, -1.3447113037109375, -1.299896240234375, -1.2550811767578125, -1.21026611328125, -1.1654510498046875, -1.120635986328125, -1.0758209228515625, -1.031005859375, -0.9861907958984375, -0.941375732421875, -0.8965606689453125, -0.85174560546875, -0.8069305419921875, -0.762115478515625, -0.7173004150390625, -0.6724853515625, -0.6276702880859375, -0.582855224609375, -0.5380401611328125, -0.49322509765625, -0.4484100341796875, -0.403594970703125, -0.3587799072265625, -0.31396484375, -0.2691497802734375, -0.224334716796875, -0.1795196533203125, -0.13470458984375, -0.0898895263671875, -0.045074462890625, -0.0002593994140625, 0.0445556640625, 0.0893707275390625, 0.134185791015625, 0.1790008544921875, 0.22381591796875, 0.2686309814453125, 0.313446044921875, 0.3582611083984375, 0.403076171875, 0.4478912353515625, 0.492706298828125, 0.5375213623046875, 0.58233642578125, 0.6271514892578125, 0.671966552734375, 0.7167816162109375, 0.7615966796875, 0.8064117431640625, 0.851226806640625, 0.8960418701171875, 0.94085693359375, 0.9856719970703125, 1.030487060546875, 1.0753021240234375, 1.1201171875]}, "gradients/encoder.encoder.layers.2.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 2.0, 3.0, 7.0, 0.0, 0.0, 4.0, 3.0, 7.0, 9.0, 14.0, 21.0, 27.0, 46.0, 65.0, 118.0, 179.0, 299.0, 616.0, 1410.0, 4448.0, 24106.0, 237083.0, 715325.0, 52941.0, 8036.0, 2066.0, 763.0, 376.0, 225.0, 131.0, 73.0, 54.0, 32.0, 14.0, 16.0, 16.0, 12.0, 5.0, 6.0, 4.0, 2.0, 1.0, 2.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.92578125, -5.74169921875, -5.5576171875, -5.37353515625, -5.189453125, -5.00537109375, -4.8212890625, -4.63720703125, -4.453125, -4.26904296875, -4.0849609375, -3.90087890625, -3.716796875, -3.53271484375, -3.3486328125, -3.16455078125, -2.98046875, -2.79638671875, -2.6123046875, -2.42822265625, -2.244140625, -2.06005859375, -1.8759765625, -1.69189453125, -1.5078125, -1.32373046875, -1.1396484375, -0.95556640625, -0.771484375, -0.58740234375, -0.4033203125, -0.21923828125, -0.03515625, 0.14892578125, 0.3330078125, 0.51708984375, 0.701171875, 0.88525390625, 1.0693359375, 1.25341796875, 1.4375, 1.62158203125, 1.8056640625, 1.98974609375, 2.173828125, 2.35791015625, 2.5419921875, 2.72607421875, 2.91015625, 3.09423828125, 3.2783203125, 3.46240234375, 3.646484375, 3.83056640625, 4.0146484375, 4.19873046875, 4.3828125, 4.56689453125, 4.7509765625, 4.93505859375, 5.119140625, 5.30322265625, 5.4873046875, 5.67138671875, 5.85546875]}, "gradients/encoder.encoder.layers.2.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 4.0, 1.0, 3.0, 4.0, 2.0, 4.0, 9.0, 5.0, 13.0, 14.0, 17.0, 11.0, 8.0, 22.0, 21.0, 21.0, 26.0, 31.0, 43.0, 52.0, 55.0, 40.0, 42.0, 48.0, 60.0, 50.0, 45.0, 45.0, 41.0, 40.0, 35.0, 21.0, 29.0, 22.0, 24.0, 22.0, 18.0, 13.0, 7.0, 5.0, 6.0, 6.0, 7.0, 2.0, 4.0, 7.0, 3.0, 0.0, 1.0, 1.0, 1.0, 1.0, 3.0], "bins": [-5.69140625, -5.52947998046875, -5.3675537109375, -5.20562744140625, -5.043701171875, -4.88177490234375, -4.7198486328125, -4.55792236328125, -4.39599609375, -4.23406982421875, -4.0721435546875, -3.91021728515625, -3.748291015625, -3.58636474609375, -3.4244384765625, -3.26251220703125, -3.1005859375, -2.93865966796875, -2.7767333984375, -2.61480712890625, -2.452880859375, -2.29095458984375, -2.1290283203125, -1.96710205078125, -1.80517578125, -1.64324951171875, -1.4813232421875, -1.31939697265625, -1.157470703125, -0.99554443359375, -0.8336181640625, -0.67169189453125, -0.509765625, -0.34783935546875, -0.1859130859375, -0.02398681640625, 0.137939453125, 0.29986572265625, 0.4617919921875, 0.62371826171875, 0.78564453125, 0.94757080078125, 1.1094970703125, 1.27142333984375, 1.433349609375, 1.59527587890625, 1.7572021484375, 1.91912841796875, 2.0810546875, 2.24298095703125, 2.4049072265625, 2.56683349609375, 2.728759765625, 2.89068603515625, 3.0526123046875, 3.21453857421875, 3.37646484375, 3.53839111328125, 3.7003173828125, 3.86224365234375, 4.024169921875, 4.18609619140625, 4.3480224609375, 4.50994873046875, 4.671875]}, "gradients/encoder.encoder.layers.2.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 3.0, 1.0, 1.0, 1.0, 1.0, 3.0, 1.0, 3.0, 5.0, 7.0, 6.0, 11.0, 11.0, 20.0, 24.0, 33.0, 58.0, 116.0, 207.0, 432.0, 965.0, 2733.0, 11017.0, 87092.0, 844383.0, 85878.0, 10927.0, 2780.0, 922.0, 398.0, 201.0, 115.0, 56.0, 51.0, 28.0, 13.0, 18.0, 10.0, 10.0, 6.0, 3.0, 7.0, 4.0, 3.0, 3.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.8330078125, -1.7729034423828125, -1.712799072265625, -1.6526947021484375, -1.59259033203125, -1.5324859619140625, -1.472381591796875, -1.4122772216796875, -1.3521728515625, -1.2920684814453125, -1.231964111328125, -1.1718597412109375, -1.11175537109375, -1.0516510009765625, -0.991546630859375, -0.9314422607421875, -0.871337890625, -0.8112335205078125, -0.751129150390625, -0.6910247802734375, -0.63092041015625, -0.5708160400390625, -0.510711669921875, -0.4506072998046875, -0.3905029296875, -0.3303985595703125, -0.270294189453125, -0.2101898193359375, -0.15008544921875, -0.0899810791015625, -0.029876708984375, 0.0302276611328125, 0.09033203125, 0.1504364013671875, 0.210540771484375, 0.2706451416015625, 0.33074951171875, 0.3908538818359375, 0.450958251953125, 0.5110626220703125, 0.5711669921875, 0.6312713623046875, 0.691375732421875, 0.7514801025390625, 0.81158447265625, 0.8716888427734375, 0.931793212890625, 0.9918975830078125, 1.052001953125, 1.1121063232421875, 1.172210693359375, 1.2323150634765625, 1.29241943359375, 1.3525238037109375, 1.412628173828125, 1.4727325439453125, 1.5328369140625, 1.5929412841796875, 1.653045654296875, 1.7131500244140625, 1.77325439453125, 1.8333587646484375, 1.893463134765625, 1.9535675048828125, 2.013671875]}, "gradients/encoder.encoder.layers.2.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 0.0, 1.0, 1.0, 4.0, 1.0, 2.0, 2.0, 5.0, 10.0, 4.0, 6.0, 2.0, 4.0, 6.0, 6.0, 19.0, 9.0, 11.0, 18.0, 28.0, 22.0, 41.0, 56.0, 67.0, 85.0, 85.0, 119.0, 70.0, 82.0, 53.0, 38.0, 30.0, 19.0, 24.0, 16.0, 12.0, 10.0, 11.0, 4.0, 6.0, 4.0, 3.0, 4.0, 1.0, 4.0, 2.0, 1.0, 3.0, 0.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.0002727508544921875, -0.00026455894112586975, -0.000256367027759552, -0.00024817511439323425, -0.0002399832010269165, -0.00023179128766059875, -0.000223599374294281, -0.00021540746092796326, -0.0002072155475616455, -0.00019902363419532776, -0.00019083172082901, -0.00018263980746269226, -0.0001744478940963745, -0.00016625598073005676, -0.00015806406736373901, -0.00014987215399742126, -0.00014168024063110352, -0.00013348832726478577, -0.00012529641389846802, -0.00011710450053215027, -0.00010891258716583252, -0.00010072067379951477, -9.252876043319702e-05, -8.433684706687927e-05, -7.614493370056152e-05, -6.795302033424377e-05, -5.9761106967926025e-05, -5.1569193601608276e-05, -4.337728023529053e-05, -3.518536686897278e-05, -2.699345350265503e-05, -1.880154013633728e-05, -1.0609626770019531e-05, -2.4177134037017822e-06, 5.774199962615967e-06, 1.3966113328933716e-05, 2.2158026695251465e-05, 3.0349940061569214e-05, 3.854185342788696e-05, 4.673376679420471e-05, 5.492568016052246e-05, 6.311759352684021e-05, 7.130950689315796e-05, 7.950142025947571e-05, 8.769333362579346e-05, 9.58852469921112e-05, 0.00010407716035842896, 0.0001122690737247467, 0.00012046098709106445, 0.0001286529004573822, 0.00013684481382369995, 0.0001450367271900177, 0.00015322864055633545, 0.0001614205539226532, 0.00016961246728897095, 0.0001778043806552887, 0.00018599629402160645, 0.0001941882073879242, 0.00020238012075424194, 0.0002105720341205597, 0.00021876394748687744, 0.0002269558608531952, 0.00023514777421951294, 0.0002433396875858307, 0.00025153160095214844]}, "gradients/encoder.encoder.layers.2.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 4.0, 2.0, 7.0, 4.0, 4.0, 14.0, 20.0, 29.0, 52.0, 126.0, 271.0, 703.0, 2211.0, 11846.0, 154673.0, 832252.0, 39168.0, 5159.0, 1211.0, 440.0, 164.0, 81.0, 49.0, 21.0, 19.0, 14.0, 7.0, 7.0, 2.0, 2.0, 4.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.7744140625, -1.7068634033203125, -1.639312744140625, -1.5717620849609375, -1.50421142578125, -1.4366607666015625, -1.369110107421875, -1.3015594482421875, -1.2340087890625, -1.1664581298828125, -1.098907470703125, -1.0313568115234375, -0.96380615234375, -0.8962554931640625, -0.828704833984375, -0.7611541748046875, -0.693603515625, -0.6260528564453125, -0.558502197265625, -0.4909515380859375, -0.42340087890625, -0.3558502197265625, -0.288299560546875, -0.2207489013671875, -0.1531982421875, -0.0856475830078125, -0.018096923828125, 0.0494537353515625, 0.11700439453125, 0.1845550537109375, 0.252105712890625, 0.3196563720703125, 0.38720703125, 0.4547576904296875, 0.522308349609375, 0.5898590087890625, 0.65740966796875, 0.7249603271484375, 0.792510986328125, 0.8600616455078125, 0.9276123046875, 0.9951629638671875, 1.062713623046875, 1.1302642822265625, 1.19781494140625, 1.2653656005859375, 1.332916259765625, 1.4004669189453125, 1.468017578125, 1.5355682373046875, 1.603118896484375, 1.6706695556640625, 1.73822021484375, 1.8057708740234375, 1.873321533203125, 1.9408721923828125, 2.0084228515625, 2.0759735107421875, 2.143524169921875, 2.2110748291015625, 2.27862548828125, 2.3461761474609375, 2.413726806640625, 2.4812774658203125, 2.548828125]}, "gradients/encoder.encoder.layers.2.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 4.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 3.0, 0.0, 4.0, 2.0, 1.0, 1.0, 3.0, 9.0, 10.0, 9.0, 12.0, 13.0, 14.0, 19.0, 24.0, 34.0, 32.0, 64.0, 56.0, 57.0, 69.0, 73.0, 61.0, 72.0, 65.0, 52.0, 51.0, 34.0, 39.0, 26.0, 18.0, 18.0, 13.0, 12.0, 7.0, 7.0, 8.0, 3.0, 4.0, 2.0, 1.0, 6.0, 1.0, 0.0, 1.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.87744140625, -0.8484268188476562, -0.8194122314453125, -0.7903976440429688, -0.761383056640625, -0.7323684692382812, -0.7033538818359375, -0.6743392944335938, -0.64532470703125, -0.6163101196289062, -0.5872955322265625, -0.5582809448242188, -0.529266357421875, -0.5002517700195312, -0.4712371826171875, -0.44222259521484375, -0.4132080078125, -0.38419342041015625, -0.3551788330078125, -0.32616424560546875, -0.297149658203125, -0.26813507080078125, -0.2391204833984375, -0.21010589599609375, -0.18109130859375, -0.15207672119140625, -0.1230621337890625, -0.09404754638671875, -0.065032958984375, -0.03601837158203125, -0.0070037841796875, 0.02201080322265625, 0.051025390625, 0.08003997802734375, 0.1090545654296875, 0.13806915283203125, 0.167083740234375, 0.19609832763671875, 0.2251129150390625, 0.25412750244140625, 0.28314208984375, 0.31215667724609375, 0.3411712646484375, 0.37018585205078125, 0.399200439453125, 0.42821502685546875, 0.4572296142578125, 0.48624420166015625, 0.5152587890625, 0.5442733764648438, 0.5732879638671875, 0.6023025512695312, 0.631317138671875, 0.6603317260742188, 0.6893463134765625, 0.7183609008789062, 0.74737548828125, 0.7763900756835938, 0.8054046630859375, 0.8344192504882812, 0.863433837890625, 0.8924484252929688, 0.9214630126953125, 0.9504776000976562, 0.9794921875]}, "gradients/encoder.encoder.layers.2.layer_norm.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 3.0, 2.0, 3.0, 7.0, 12.0, 15.0, 24.0, 34.0, 48.0, 88.0, 139.0, 268.0, 124.0, 77.0, 75.0, 28.0, 21.0, 19.0, 4.0, 6.0, 4.0, 3.0, 5.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-17.634052276611328, -16.80976104736328, -15.98547077178955, -15.161179542541504, -14.336889266967773, -13.512598037719727, -12.68830680847168, -11.864015579223633, -11.039725303649902, -10.215434074401855, -9.391143798828125, -8.566852569580078, -7.7425618171691895, -6.918271064758301, -6.093979835510254, -5.269689083099365, -4.445398330688477, -3.621107578277588, -2.79681658744812, -1.9725255966186523, -1.1482348442077637, -0.323944091796875, 0.5003471374511719, 1.3246378898620605, 2.148928642272949, 2.973219394683838, 3.7975103855133057, 4.621801376342773, 5.446092128753662, 6.270382881164551, 7.094674110412598, 7.918964862823486, 8.743255615234375, 9.567546844482422, 10.391837120056152, 11.2161283493042, 12.04041862487793, 12.864709854125977, 13.689001083374023, 14.51329231262207, 15.3375825881958, 16.16187286376953, 16.986164093017578, 17.810455322265625, 18.634746551513672, 19.45903778076172, 20.283329010009766, 21.10761833190918, 21.931909561157227, 22.756200790405273, 23.58049201965332, 24.404781341552734, 25.22907257080078, 26.053363800048828, 26.877655029296875, 27.701946258544922, 28.52623748779297, 29.350528717041016, 30.174819946289062, 30.99911117553711, 31.823400497436523, 32.64768981933594, 33.47198486328125, 34.29627227783203, 35.12056350708008]}, "gradients/encoder.encoder.layers.2.layer_norm.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 2.0, 5.0, 2.0, 7.0, 3.0, 5.0, 6.0, 17.0, 15.0, 17.0, 14.0, 16.0, 24.0, 31.0, 30.0, 43.0, 35.0, 46.0, 73.0, 113.0, 102.0, 79.0, 42.0, 37.0, 33.0, 29.0, 32.0, 27.0, 20.0, 21.0, 25.0, 11.0, 14.0, 10.0, 8.0, 1.0, 8.0, 2.0, 3.0, 2.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0], "bins": [-23.476234436035156, -22.722492218017578, -21.96875, -21.215007781982422, -20.461265563964844, -19.707523345947266, -18.953781127929688, -18.20003890991211, -17.44629669189453, -16.692554473876953, -15.938812255859375, -15.185070037841797, -14.431327819824219, -13.67758560180664, -12.923843383789062, -12.170101165771484, -11.416359901428223, -10.662617683410645, -9.908875465393066, -9.155133247375488, -8.40139102935791, -7.647648811340332, -6.893907070159912, -6.140164852142334, -5.386422634124756, -4.632680416107178, -3.8789381980895996, -3.1251962184906006, -2.3714540004730225, -1.6177117824554443, -0.8639698028564453, -0.11022758483886719, 0.6435146331787109, 1.397256851196289, 2.150999069213867, 2.904741048812866, 3.6584832668304443, 4.412225723266602, 5.1659674644470215, 5.9197096824646, 6.673451900482178, 7.427194118499756, 8.180935859680176, 8.934678077697754, 9.688420295715332, 10.44216251373291, 11.195904731750488, 11.949646949768066, 12.703389167785645, 13.457131385803223, 14.2108736038208, 14.964615821838379, 15.718358039855957, 16.47209930419922, 17.225841522216797, 17.979583740234375, 18.733325958251953, 19.48706817626953, 20.24081039428711, 20.994552612304688, 21.748294830322266, 22.502037048339844, 23.255779266357422, 24.009521484375, 24.763263702392578]}, "gradients/encoder.encoder.layers.1.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 5.0, 1.0, 4.0, 8.0, 8.0, 22.0, 13.0, 26.0, 36.0, 38.0, 60.0, 121.0, 146.0, 244.0, 373.0, 635.0, 1150.0, 2135.0, 4165.0, 10720.0, 49723.0, 500653.0, 2773148.0, 754072.0, 72767.0, 13681.0, 4928.0, 2309.0, 1205.0, 725.0, 419.0, 245.0, 154.0, 126.0, 72.0, 44.0, 37.0, 18.0, 15.0, 15.0, 13.0, 7.0, 4.0, 2.0, 2.0, 3.0, 0.0, 1.0, 1.0], "bins": [-2.615234375, -2.545654296875, -2.47607421875, -2.406494140625, -2.3369140625, -2.267333984375, -2.19775390625, -2.128173828125, -2.05859375, -1.989013671875, -1.91943359375, -1.849853515625, -1.7802734375, -1.710693359375, -1.64111328125, -1.571533203125, -1.501953125, -1.432373046875, -1.36279296875, -1.293212890625, -1.2236328125, -1.154052734375, -1.08447265625, -1.014892578125, -0.9453125, -0.875732421875, -0.80615234375, -0.736572265625, -0.6669921875, -0.597412109375, -0.52783203125, -0.458251953125, -0.388671875, -0.319091796875, -0.24951171875, -0.179931640625, -0.1103515625, -0.040771484375, 0.02880859375, 0.098388671875, 0.16796875, 0.237548828125, 0.30712890625, 0.376708984375, 0.4462890625, 0.515869140625, 0.58544921875, 0.655029296875, 0.724609375, 0.794189453125, 0.86376953125, 0.933349609375, 1.0029296875, 1.072509765625, 1.14208984375, 1.211669921875, 1.28125, 1.350830078125, 1.42041015625, 1.489990234375, 1.5595703125, 1.629150390625, 1.69873046875, 1.768310546875, 1.837890625]}, "gradients/encoder.encoder.layers.1.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 3.0, 3.0, 2.0, 3.0, 2.0, 3.0, 9.0, 6.0, 10.0, 14.0, 9.0, 15.0, 16.0, 22.0, 17.0, 29.0, 33.0, 35.0, 36.0, 44.0, 40.0, 54.0, 45.0, 45.0, 49.0, 42.0, 52.0, 49.0, 39.0, 33.0, 34.0, 31.0, 27.0, 23.0, 25.0, 20.0, 27.0, 14.0, 11.0, 8.0, 2.0, 11.0, 2.0, 7.0, 6.0, 3.0, 4.0, 1.0, 1.0, 1.0, 1.0, 0.0, 2.0], "bins": [-1.462890625, -1.4214935302734375, -1.380096435546875, -1.3386993408203125, -1.29730224609375, -1.2559051513671875, -1.214508056640625, -1.1731109619140625, -1.1317138671875, -1.0903167724609375, -1.048919677734375, -1.0075225830078125, -0.96612548828125, -0.9247283935546875, -0.883331298828125, -0.8419342041015625, -0.800537109375, -0.7591400146484375, -0.717742919921875, -0.6763458251953125, -0.63494873046875, -0.5935516357421875, -0.552154541015625, -0.5107574462890625, -0.4693603515625, -0.4279632568359375, -0.386566162109375, -0.3451690673828125, -0.30377197265625, -0.2623748779296875, -0.220977783203125, -0.1795806884765625, -0.13818359375, -0.0967864990234375, -0.055389404296875, -0.0139923095703125, 0.02740478515625, 0.0688018798828125, 0.110198974609375, 0.1515960693359375, 0.1929931640625, 0.2343902587890625, 0.275787353515625, 0.3171844482421875, 0.35858154296875, 0.3999786376953125, 0.441375732421875, 0.4827728271484375, 0.524169921875, 0.5655670166015625, 0.606964111328125, 0.6483612060546875, 0.68975830078125, 0.7311553955078125, 0.772552490234375, 0.8139495849609375, 0.8553466796875, 0.8967437744140625, 0.938140869140625, 0.9795379638671875, 1.02093505859375, 1.0623321533203125, 1.103729248046875, 1.1451263427734375, 1.1865234375]}, "gradients/encoder.encoder.layers.1.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 4.0, 5.0, 10.0, 22.0, 42.0, 60.0, 139.0, 268.0, 482.0, 1333.0, 33394.0, 4138969.0, 17251.0, 1208.0, 501.0, 264.0, 172.0, 95.0, 32.0, 23.0, 7.0, 6.0, 4.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-12.7734375, -12.3795166015625, -11.985595703125, -11.5916748046875, -11.19775390625, -10.8038330078125, -10.409912109375, -10.0159912109375, -9.6220703125, -9.2281494140625, -8.834228515625, -8.4403076171875, -8.04638671875, -7.6524658203125, -7.258544921875, -6.8646240234375, -6.470703125, -6.0767822265625, -5.682861328125, -5.2889404296875, -4.89501953125, -4.5010986328125, -4.107177734375, -3.7132568359375, -3.3193359375, -2.9254150390625, -2.531494140625, -2.1375732421875, -1.74365234375, -1.3497314453125, -0.955810546875, -0.5618896484375, -0.16796875, 0.2259521484375, 0.619873046875, 1.0137939453125, 1.40771484375, 1.8016357421875, 2.195556640625, 2.5894775390625, 2.9833984375, 3.3773193359375, 3.771240234375, 4.1651611328125, 4.55908203125, 4.9530029296875, 5.346923828125, 5.7408447265625, 6.134765625, 6.5286865234375, 6.922607421875, 7.3165283203125, 7.71044921875, 8.1043701171875, 8.498291015625, 8.8922119140625, 9.2861328125, 9.6800537109375, 10.073974609375, 10.4678955078125, 10.86181640625, 11.2557373046875, 11.649658203125, 12.0435791015625, 12.4375]}, "gradients/encoder.encoder.layers.1.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 0.0, 3.0, 0.0, 2.0, 4.0, 4.0, 13.0, 32.0, 44.0, 99.0, 215.0, 546.0, 1125.0, 1095.0, 486.0, 207.0, 82.0, 56.0, 31.0, 9.0, 9.0, 6.0, 9.0, 4.0, 5.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.806640625, -2.631072998046875, -2.45550537109375, -2.279937744140625, -2.1043701171875, -1.928802490234375, -1.75323486328125, -1.577667236328125, -1.402099609375, -1.226531982421875, -1.05096435546875, -0.875396728515625, -0.6998291015625, -0.524261474609375, -0.34869384765625, -0.173126220703125, 0.00244140625, 0.178009033203125, 0.35357666015625, 0.529144287109375, 0.7047119140625, 0.880279541015625, 1.05584716796875, 1.231414794921875, 1.406982421875, 1.582550048828125, 1.75811767578125, 1.933685302734375, 2.1092529296875, 2.284820556640625, 2.46038818359375, 2.635955810546875, 2.8115234375, 2.987091064453125, 3.16265869140625, 3.338226318359375, 3.5137939453125, 3.689361572265625, 3.86492919921875, 4.040496826171875, 4.216064453125, 4.391632080078125, 4.56719970703125, 4.742767333984375, 4.9183349609375, 5.093902587890625, 5.26947021484375, 5.445037841796875, 5.62060546875, 5.796173095703125, 5.97174072265625, 6.147308349609375, 6.3228759765625, 6.498443603515625, 6.67401123046875, 6.849578857421875, 7.025146484375, 7.200714111328125, 7.37628173828125, 7.551849365234375, 7.7274169921875, 7.902984619140625, 8.07855224609375, 8.254119873046875, 8.4296875]}, "gradients/encoder.encoder.layers.1.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 1.0, 3.0, 7.0, 9.0, 13.0, 41.0, 63.0, 138.0, 229.0, 229.0, 124.0, 67.0, 36.0, 25.0, 10.0, 8.0, 4.0, 1.0, 6.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-28.824981689453125, -27.37922477722168, -25.933467864990234, -24.487709045410156, -23.041954040527344, -21.596195220947266, -20.15043830871582, -18.704681396484375, -17.25892448425293, -15.813167572021484, -14.367410659790039, -12.921652793884277, -11.475895881652832, -10.030138969421387, -8.584381103515625, -7.13862419128418, -5.692867279052734, -4.247110366821289, -2.8013529777526855, -1.355595588684082, 0.09016132354736328, 1.5359182357788086, 2.9816761016845703, 4.427433013916016, 5.873189926147461, 7.318946838378906, 8.764703750610352, 10.210461616516113, 11.656218528747559, 13.101975440979004, 14.547733306884766, 15.993490219116211, 17.439247131347656, 18.8850040435791, 20.330760955810547, 21.776519775390625, 23.222274780273438, 24.668033599853516, 26.11379051208496, 27.559547424316406, 29.00530433654785, 30.451061248779297, 31.896818161010742, 33.34257507324219, 34.788333892822266, 36.23408889770508, 37.679847717285156, 39.12560272216797, 40.57136154174805, 42.017120361328125, 43.46287536621094, 44.908634185791016, 46.35438919067383, 47.800148010253906, 49.24590301513672, 50.6916618347168, 52.137420654296875, 53.58317947387695, 55.028934478759766, 56.474693298339844, 57.920448303222656, 59.366207122802734, 60.81196594238281, 62.257720947265625, 63.70347595214844]}, "gradients/encoder.encoder.layers.1.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 2.0, 3.0, 2.0, 2.0, 6.0, 8.0, 8.0, 8.0, 12.0, 13.0, 21.0, 20.0, 23.0, 25.0, 35.0, 32.0, 54.0, 56.0, 43.0, 54.0, 69.0, 73.0, 72.0, 48.0, 47.0, 46.0, 43.0, 30.0, 32.0, 28.0, 23.0, 17.0, 10.0, 13.0, 10.0, 7.0, 5.0, 5.0, 5.0, 1.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-24.54575538635254, -23.855375289916992, -23.164995193481445, -22.47461700439453, -21.784236907958984, -21.093856811523438, -20.40347671508789, -19.713096618652344, -19.022716522216797, -18.33233642578125, -17.641956329345703, -16.951576232910156, -16.261198043823242, -15.570817947387695, -14.880437850952148, -14.190057754516602, -13.499679565429688, -12.80929946899414, -12.11892032623291, -11.428540229797363, -10.738161087036133, -10.047780990600586, -9.357400894165039, -8.667020797729492, -7.976641654968262, -7.286262035369873, -6.595882415771484, -5.9055023193359375, -5.215122699737549, -4.52474308013916, -3.8343629837036133, -3.1439833641052246, -2.453601837158203, -1.763222098350525, -1.0728423595428467, -0.3824625015258789, 0.30791711807250977, 0.9982967376708984, 1.6886768341064453, 2.379056453704834, 3.0694360733032227, 3.7598156929016113, 4.4501953125, 5.140575408935547, 5.8309550285339355, 6.521334648132324, 7.211714744567871, 7.90209436416626, 8.592473983764648, 9.282854080200195, 9.973233222961426, 10.663613319396973, 11.353992462158203, 12.04437255859375, 12.734752655029297, 13.425132751464844, 14.115511894226074, 14.805891990661621, 15.496271133422852, 16.1866512298584, 16.877031326293945, 17.56740951538086, 18.257789611816406, 18.948169708251953, 19.6385498046875]}, "gradients/encoder.encoder.layers.1.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 6.0, 8.0, 7.0, 11.0, 9.0, 19.0, 29.0, 39.0, 51.0, 96.0, 127.0, 180.0, 261.0, 402.0, 592.0, 934.0, 1459.0, 2248.0, 3839.0, 6628.0, 11750.0, 22192.0, 45594.0, 101486.0, 231026.0, 313738.0, 161857.0, 70969.0, 33622.0, 16991.0, 8951.0, 5070.0, 2984.0, 1965.0, 1148.0, 721.0, 518.0, 333.0, 233.0, 145.0, 100.0, 71.0, 45.0, 38.0, 20.0, 19.0, 8.0, 11.0, 5.0, 3.0, 5.0, 4.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.005859375, -1.9349365234375, -1.864013671875, -1.7930908203125, -1.72216796875, -1.6512451171875, -1.580322265625, -1.5093994140625, -1.4384765625, -1.3675537109375, -1.296630859375, -1.2257080078125, -1.15478515625, -1.0838623046875, -1.012939453125, -0.9420166015625, -0.87109375, -0.8001708984375, -0.729248046875, -0.6583251953125, -0.58740234375, -0.5164794921875, -0.445556640625, -0.3746337890625, -0.3037109375, -0.2327880859375, -0.161865234375, -0.0909423828125, -0.02001953125, 0.0509033203125, 0.121826171875, 0.1927490234375, 0.263671875, 0.3345947265625, 0.405517578125, 0.4764404296875, 0.54736328125, 0.6182861328125, 0.689208984375, 0.7601318359375, 0.8310546875, 0.9019775390625, 0.972900390625, 1.0438232421875, 1.11474609375, 1.1856689453125, 1.256591796875, 1.3275146484375, 1.3984375, 1.4693603515625, 1.540283203125, 1.6112060546875, 1.68212890625, 1.7530517578125, 1.823974609375, 1.8948974609375, 1.9658203125, 2.0367431640625, 2.107666015625, 2.1785888671875, 2.24951171875, 2.3204345703125, 2.391357421875, 2.4622802734375, 2.533203125]}, "gradients/encoder.encoder.layers.1.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 2.0, 2.0, 2.0, 5.0, 5.0, 5.0, 5.0, 4.0, 4.0, 9.0, 7.0, 11.0, 13.0, 20.0, 24.0, 25.0, 29.0, 33.0, 40.0, 40.0, 37.0, 41.0, 52.0, 65.0, 41.0, 61.0, 39.0, 47.0, 47.0, 37.0, 37.0, 44.0, 27.0, 22.0, 29.0, 21.0, 17.0, 17.0, 12.0, 8.0, 6.0, 9.0, 4.0, 7.0, 3.0, 1.0, 3.0, 0.0, 1.0], "bins": [-2.01171875, -1.9609375, -1.91015625, -1.859375, -1.80859375, -1.7578125, -1.70703125, -1.65625, -1.60546875, -1.5546875, -1.50390625, -1.453125, -1.40234375, -1.3515625, -1.30078125, -1.25, -1.19921875, -1.1484375, -1.09765625, -1.046875, -0.99609375, -0.9453125, -0.89453125, -0.84375, -0.79296875, -0.7421875, -0.69140625, -0.640625, -0.58984375, -0.5390625, -0.48828125, -0.4375, -0.38671875, -0.3359375, -0.28515625, -0.234375, -0.18359375, -0.1328125, -0.08203125, -0.03125, 0.01953125, 0.0703125, 0.12109375, 0.171875, 0.22265625, 0.2734375, 0.32421875, 0.375, 0.42578125, 0.4765625, 0.52734375, 0.578125, 0.62890625, 0.6796875, 0.73046875, 0.78125, 0.83203125, 0.8828125, 0.93359375, 0.984375, 1.03515625, 1.0859375, 1.13671875, 1.1875, 1.23828125]}, "gradients/encoder.encoder.layers.1.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 0.0, 4.0, 3.0, 3.0, 1.0, 0.0, 4.0, 6.0, 5.0, 5.0, 6.0, 12.0, 13.0, 26.0, 26.0, 37.0, 31.0, 67.0, 61.0, 114.0, 170.0, 248.0, 394.0, 689.0, 1398.0, 3842.0, 19182.0, 233499.0, 741852.0, 36946.0, 5842.0, 1817.0, 814.0, 494.0, 288.0, 186.0, 128.0, 81.0, 78.0, 44.0, 32.0, 30.0, 16.0, 13.0, 15.0, 9.0, 8.0, 7.0, 6.0, 2.0, 2.0, 3.0, 3.0, 2.0, 3.0, 3.0, 1.0, 0.0, 0.0, 0.0, 2.0], "bins": [-6.3828125, -6.17791748046875, -5.9730224609375, -5.76812744140625, -5.563232421875, -5.35833740234375, -5.1534423828125, -4.94854736328125, -4.74365234375, -4.53875732421875, -4.3338623046875, -4.12896728515625, -3.924072265625, -3.71917724609375, -3.5142822265625, -3.30938720703125, -3.1044921875, -2.89959716796875, -2.6947021484375, -2.48980712890625, -2.284912109375, -2.08001708984375, -1.8751220703125, -1.67022705078125, -1.46533203125, -1.26043701171875, -1.0555419921875, -0.85064697265625, -0.645751953125, -0.44085693359375, -0.2359619140625, -0.03106689453125, 0.173828125, 0.37872314453125, 0.5836181640625, 0.78851318359375, 0.993408203125, 1.19830322265625, 1.4031982421875, 1.60809326171875, 1.81298828125, 2.01788330078125, 2.2227783203125, 2.42767333984375, 2.632568359375, 2.83746337890625, 3.0423583984375, 3.24725341796875, 3.4521484375, 3.65704345703125, 3.8619384765625, 4.06683349609375, 4.271728515625, 4.47662353515625, 4.6815185546875, 4.88641357421875, 5.09130859375, 5.29620361328125, 5.5010986328125, 5.70599365234375, 5.910888671875, 6.11578369140625, 6.3206787109375, 6.52557373046875, 6.73046875]}, "gradients/encoder.encoder.layers.1.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 3.0, 3.0, 4.0, 5.0, 2.0, 5.0, 7.0, 5.0, 12.0, 13.0, 17.0, 10.0, 34.0, 27.0, 30.0, 40.0, 36.0, 44.0, 64.0, 56.0, 66.0, 77.0, 64.0, 62.0, 66.0, 52.0, 39.0, 47.0, 26.0, 22.0, 20.0, 13.0, 13.0, 5.0, 8.0, 6.0, 6.0, 3.0, 1.0, 0.0, 0.0, 0.0, 2.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.75390625, -5.53265380859375, -5.3114013671875, -5.09014892578125, -4.868896484375, -4.64764404296875, -4.4263916015625, -4.20513916015625, -3.98388671875, -3.76263427734375, -3.5413818359375, -3.32012939453125, -3.098876953125, -2.87762451171875, -2.6563720703125, -2.43511962890625, -2.2138671875, -1.99261474609375, -1.7713623046875, -1.55010986328125, -1.328857421875, -1.10760498046875, -0.8863525390625, -0.66510009765625, -0.44384765625, -0.22259521484375, -0.0013427734375, 0.21990966796875, 0.441162109375, 0.66241455078125, 0.8836669921875, 1.10491943359375, 1.326171875, 1.54742431640625, 1.7686767578125, 1.98992919921875, 2.211181640625, 2.43243408203125, 2.6536865234375, 2.87493896484375, 3.09619140625, 3.31744384765625, 3.5386962890625, 3.75994873046875, 3.981201171875, 4.20245361328125, 4.4237060546875, 4.64495849609375, 4.8662109375, 5.08746337890625, 5.3087158203125, 5.52996826171875, 5.751220703125, 5.97247314453125, 6.1937255859375, 6.41497802734375, 6.63623046875, 6.85748291015625, 7.0787353515625, 7.29998779296875, 7.521240234375, 7.74249267578125, 7.9637451171875, 8.18499755859375, 8.40625]}, "gradients/encoder.encoder.layers.1.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 3.0, 1.0, 1.0, 4.0, 3.0, 5.0, 4.0, 8.0, 5.0, 10.0, 26.0, 37.0, 44.0, 92.0, 116.0, 233.0, 461.0, 1079.0, 3053.0, 10803.0, 70336.0, 854345.0, 89762.0, 12576.0, 3311.0, 1153.0, 466.0, 244.0, 161.0, 66.0, 43.0, 37.0, 22.0, 15.0, 8.0, 8.0, 4.0, 3.0, 5.0, 3.0, 1.0, 4.0, 3.0, 2.0, 4.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-2.201171875, -2.139251708984375, -2.07733154296875, -2.015411376953125, -1.9534912109375, -1.891571044921875, -1.82965087890625, -1.767730712890625, -1.705810546875, -1.643890380859375, -1.58197021484375, -1.520050048828125, -1.4581298828125, -1.396209716796875, -1.33428955078125, -1.272369384765625, -1.21044921875, -1.148529052734375, -1.08660888671875, -1.024688720703125, -0.9627685546875, -0.900848388671875, -0.83892822265625, -0.777008056640625, -0.715087890625, -0.653167724609375, -0.59124755859375, -0.529327392578125, -0.4674072265625, -0.405487060546875, -0.34356689453125, -0.281646728515625, -0.2197265625, -0.157806396484375, -0.09588623046875, -0.033966064453125, 0.0279541015625, 0.089874267578125, 0.15179443359375, 0.213714599609375, 0.275634765625, 0.337554931640625, 0.39947509765625, 0.461395263671875, 0.5233154296875, 0.585235595703125, 0.64715576171875, 0.709075927734375, 0.77099609375, 0.832916259765625, 0.89483642578125, 0.956756591796875, 1.0186767578125, 1.080596923828125, 1.14251708984375, 1.204437255859375, 1.266357421875, 1.328277587890625, 1.39019775390625, 1.452117919921875, 1.5140380859375, 1.575958251953125, 1.63787841796875, 1.699798583984375, 1.76171875]}, "gradients/encoder.encoder.layers.1.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 3.0, 1.0, 1.0, 3.0, 3.0, 0.0, 3.0, 5.0, 9.0, 18.0, 22.0, 21.0, 46.0, 101.0, 140.0, 251.0, 161.0, 87.0, 52.0, 34.0, 12.0, 14.0, 9.0, 6.0, 0.0, 4.0, 3.0, 1.0, 1.0, 2.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0008168220520019531, -0.0007892102003097534, -0.0007615983486175537, -0.000733986496925354, -0.0007063746452331543, -0.0006787627935409546, -0.0006511509418487549, -0.0006235390901565552, -0.0005959272384643555, -0.0005683153867721558, -0.0005407035350799561, -0.0005130916833877563, -0.00048547983169555664, -0.00045786798000335693, -0.0004302561283111572, -0.0004026442766189575, -0.0003750324249267578, -0.0003474205732345581, -0.0003198087215423584, -0.0002921968698501587, -0.000264585018157959, -0.00023697316646575928, -0.00020936131477355957, -0.00018174946308135986, -0.00015413761138916016, -0.00012652575969696045, -9.891390800476074e-05, -7.130205631256104e-05, -4.369020462036133e-05, -1.607835292816162e-05, 1.1533498764038086e-05, 3.914535045623779e-05, 6.67572021484375e-05, 9.436905384063721e-05, 0.00012198090553283691, 0.00014959275722503662, 0.00017720460891723633, 0.00020481646060943604, 0.00023242831230163574, 0.00026004016399383545, 0.00028765201568603516, 0.00031526386737823486, 0.00034287571907043457, 0.0003704875707626343, 0.000398099422454834, 0.0004257112741470337, 0.0004533231258392334, 0.0004809349775314331, 0.0005085468292236328, 0.0005361586809158325, 0.0005637705326080322, 0.0005913823843002319, 0.0006189942359924316, 0.0006466060876846313, 0.0006742179393768311, 0.0007018297910690308, 0.0007294416427612305, 0.0007570534944534302, 0.0007846653461456299, 0.0008122771978378296, 0.0008398890495300293, 0.000867500901222229, 0.0008951127529144287, 0.0009227246046066284, 0.0009503364562988281]}, "gradients/encoder.encoder.layers.1.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 2.0, 5.0, 8.0, 3.0, 4.0, 7.0, 8.0, 11.0, 15.0, 20.0, 32.0, 43.0, 44.0, 60.0, 100.0, 126.0, 237.0, 495.0, 1348.0, 5295.0, 39386.0, 877422.0, 110872.0, 9413.0, 2002.0, 679.0, 328.0, 171.0, 113.0, 89.0, 59.0, 36.0, 35.0, 29.0, 16.0, 12.0, 11.0, 9.0, 8.0, 4.0, 2.0, 4.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 2.0], "bins": [-2.361328125, -2.2890625, -2.216796875, -2.14453125, -2.072265625, -2.0, -1.927734375, -1.85546875, -1.783203125, -1.7109375, -1.638671875, -1.56640625, -1.494140625, -1.421875, -1.349609375, -1.27734375, -1.205078125, -1.1328125, -1.060546875, -0.98828125, -0.916015625, -0.84375, -0.771484375, -0.69921875, -0.626953125, -0.5546875, -0.482421875, -0.41015625, -0.337890625, -0.265625, -0.193359375, -0.12109375, -0.048828125, 0.0234375, 0.095703125, 0.16796875, 0.240234375, 0.3125, 0.384765625, 0.45703125, 0.529296875, 0.6015625, 0.673828125, 0.74609375, 0.818359375, 0.890625, 0.962890625, 1.03515625, 1.107421875, 1.1796875, 1.251953125, 1.32421875, 1.396484375, 1.46875, 1.541015625, 1.61328125, 1.685546875, 1.7578125, 1.830078125, 1.90234375, 1.974609375, 2.046875, 2.119140625, 2.19140625, 2.263671875]}, "gradients/encoder.encoder.layers.1.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 4.0, 8.0, 6.0, 5.0, 5.0, 6.0, 13.0, 7.0, 11.0, 16.0, 18.0, 27.0, 34.0, 46.0, 45.0, 67.0, 86.0, 90.0, 83.0, 83.0, 68.0, 53.0, 30.0, 34.0, 38.0, 26.0, 13.0, 13.0, 10.0, 18.0, 8.0, 14.0, 2.0, 5.0, 6.0, 4.0, 5.0, 1.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.3525390625, -1.3103790283203125, -1.268218994140625, -1.2260589599609375, -1.18389892578125, -1.1417388916015625, -1.099578857421875, -1.0574188232421875, -1.0152587890625, -0.9730987548828125, -0.930938720703125, -0.8887786865234375, -0.84661865234375, -0.8044586181640625, -0.762298583984375, -0.7201385498046875, -0.677978515625, -0.6358184814453125, -0.593658447265625, -0.5514984130859375, -0.50933837890625, -0.4671783447265625, -0.425018310546875, -0.3828582763671875, -0.3406982421875, -0.2985382080078125, -0.256378173828125, -0.2142181396484375, -0.17205810546875, -0.1298980712890625, -0.087738037109375, -0.0455780029296875, -0.00341796875, 0.0387420654296875, 0.080902099609375, 0.1230621337890625, 0.16522216796875, 0.2073822021484375, 0.249542236328125, 0.2917022705078125, 0.3338623046875, 0.3760223388671875, 0.418182373046875, 0.4603424072265625, 0.50250244140625, 0.5446624755859375, 0.586822509765625, 0.6289825439453125, 0.671142578125, 0.7133026123046875, 0.755462646484375, 0.7976226806640625, 0.83978271484375, 0.8819427490234375, 0.924102783203125, 0.9662628173828125, 1.0084228515625, 1.0505828857421875, 1.092742919921875, 1.1349029541015625, 1.17706298828125, 1.2192230224609375, 1.261383056640625, 1.3035430908203125, 1.345703125]}, "gradients/encoder.encoder.layers.1.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 2.0, 3.0, 13.0, 193.0, 650.0, 112.0, 29.0, 7.0, 3.0, 0.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-54.13117980957031, -50.056297302246094, -45.981414794921875, -41.90653610229492, -37.8316535949707, -33.756771087646484, -29.6818904876709, -25.607009887695312, -21.532127380371094, -17.457244873046875, -13.382364273071289, -9.307482719421387, -5.232601165771484, -1.1577186584472656, 2.9171619415283203, 6.992042541503906, 11.066925048828125, 15.141806602478027, 19.21668815612793, 23.291568756103516, 27.366451263427734, 31.441333770751953, 35.516212463378906, 39.591094970703125, 43.665977478027344, 47.74085998535156, 51.81574249267578, 55.890621185302734, 59.96550369262695, 64.04039001464844, 68.11526489257812, 72.19014739990234, 76.26502990722656, 80.33991241455078, 84.414794921875, 88.48967742919922, 92.56455993652344, 96.63943481445312, 100.71431732177734, 104.78919982910156, 108.86408233642578, 112.93896484375, 117.01384735107422, 121.08872985839844, 125.16360473632812, 129.23849487304688, 133.31336975097656, 137.38824462890625, 141.463134765625, 145.5380096435547, 149.61289978027344, 153.68777465820312, 157.76266479492188, 161.83753967285156, 165.9124298095703, 169.9873046875, 174.06219482421875, 178.13706970214844, 182.2119598388672, 186.28683471679688, 190.36172485351562, 194.4365997314453, 198.51148986816406, 202.58636474609375, 206.66123962402344]}, "gradients/encoder.encoder.layers.1.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 2.0, 1.0, 3.0, 2.0, 3.0, 8.0, 3.0, 5.0, 6.0, 5.0, 8.0, 9.0, 12.0, 8.0, 9.0, 18.0, 16.0, 23.0, 17.0, 20.0, 39.0, 26.0, 23.0, 41.0, 55.0, 92.0, 146.0, 92.0, 53.0, 30.0, 25.0, 32.0, 28.0, 25.0, 21.0, 21.0, 16.0, 11.0, 10.0, 12.0, 6.0, 5.0, 2.0, 6.0, 5.0, 2.0, 6.0, 3.0, 3.0, 3.0, 3.0], "bins": [-27.875879287719727, -27.167110443115234, -26.45833969116211, -25.749570846557617, -25.040802001953125, -24.33203125, -23.623262405395508, -22.914493560791016, -22.20572280883789, -21.4969539642334, -20.788183212280273, -20.07941436767578, -19.37064552307129, -18.661876678466797, -17.953105926513672, -17.24433708190918, -16.535568237304688, -15.826798439025879, -15.118029594421387, -14.409259796142578, -13.700490951538086, -12.991721153259277, -12.282951354980469, -11.574182510375977, -10.865412712097168, -10.15664291381836, -9.447874069213867, -8.739104270935059, -8.03033447265625, -7.321565628051758, -6.612795829772949, -5.904026508331299, -5.195255279541016, -4.486485958099365, -3.7777163982391357, -3.0689468383789062, -2.360177516937256, -1.6514081954956055, -0.9426383972167969, -0.23386907577514648, 0.4749002456665039, 1.1836696863174438, 1.8924391269683838, 2.6012086868286133, 3.3099780082702637, 4.018747329711914, 4.727517127990723, 5.436286449432373, 6.145055770874023, 6.853825092315674, 7.562594413757324, 8.271364212036133, 8.980133056640625, 9.688902854919434, 10.397672653198242, 11.106441497802734, 11.815211296081543, 12.523981094360352, 13.232749938964844, 13.941519737243652, 14.650289535522461, 15.359058380126953, 16.067829132080078, 16.77659797668457, 17.485366821289062]}, "gradients/encoder.encoder.layers.0.feed_forward.output_dense.weight": {"_type": "histogram", "values": [3.0, 0.0, 2.0, 2.0, 4.0, 3.0, 1.0, 6.0, 7.0, 17.0, 25.0, 27.0, 33.0, 41.0, 62.0, 85.0, 127.0, 163.0, 269.0, 396.0, 562.0, 870.0, 1349.0, 2332.0, 4048.0, 7840.0, 17931.0, 56200.0, 260186.0, 1095330.0, 1838547.0, 673597.0, 162423.0, 43012.0, 14219.0, 6309.0, 3275.0, 1852.0, 1084.0, 691.0, 426.0, 292.0, 190.0, 125.0, 117.0, 62.0, 48.0, 32.0, 27.0, 15.0, 10.0, 10.0, 6.0, 2.0, 2.0, 3.0, 4.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-1.87890625, -1.81695556640625, -1.7550048828125, -1.69305419921875, -1.631103515625, -1.56915283203125, -1.5072021484375, -1.44525146484375, -1.38330078125, -1.32135009765625, -1.2593994140625, -1.19744873046875, -1.135498046875, -1.07354736328125, -1.0115966796875, -0.94964599609375, -0.8876953125, -0.82574462890625, -0.7637939453125, -0.70184326171875, -0.639892578125, -0.57794189453125, -0.5159912109375, -0.45404052734375, -0.39208984375, -0.33013916015625, -0.2681884765625, -0.20623779296875, -0.144287109375, -0.08233642578125, -0.0203857421875, 0.04156494140625, 0.103515625, 0.16546630859375, 0.2274169921875, 0.28936767578125, 0.351318359375, 0.41326904296875, 0.4752197265625, 0.53717041015625, 0.59912109375, 0.66107177734375, 0.7230224609375, 0.78497314453125, 0.846923828125, 0.90887451171875, 0.9708251953125, 1.03277587890625, 1.0947265625, 1.15667724609375, 1.2186279296875, 1.28057861328125, 1.342529296875, 1.40447998046875, 1.4664306640625, 1.52838134765625, 1.59033203125, 1.65228271484375, 1.7142333984375, 1.77618408203125, 1.838134765625, 1.90008544921875, 1.9620361328125, 2.02398681640625, 2.0859375]}, "gradients/encoder.encoder.layers.0.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 5.0, 1.0, 3.0, 1.0, 0.0, 1.0, 5.0, 3.0, 2.0, 5.0, 3.0, 6.0, 6.0, 2.0, 8.0, 5.0, 9.0, 12.0, 11.0, 11.0, 13.0, 10.0, 19.0, 23.0, 25.0, 30.0, 27.0, 29.0, 35.0, 24.0, 30.0, 34.0, 34.0, 48.0, 36.0, 49.0, 43.0, 33.0, 26.0, 36.0, 44.0, 27.0, 23.0, 34.0, 20.0, 22.0, 17.0, 20.0, 19.0, 11.0, 13.0, 13.0, 12.0, 11.0, 7.0, 4.0, 3.0, 4.0, 6.0, 5.0, 2.0, 1.0, 2.0], "bins": [-1.23828125, -1.20330810546875, -1.1683349609375, -1.13336181640625, -1.098388671875, -1.06341552734375, -1.0284423828125, -0.99346923828125, -0.95849609375, -0.92352294921875, -0.8885498046875, -0.85357666015625, -0.818603515625, -0.78363037109375, -0.7486572265625, -0.71368408203125, -0.6787109375, -0.64373779296875, -0.6087646484375, -0.57379150390625, -0.538818359375, -0.50384521484375, -0.4688720703125, -0.43389892578125, -0.39892578125, -0.36395263671875, -0.3289794921875, -0.29400634765625, -0.259033203125, -0.22406005859375, -0.1890869140625, -0.15411376953125, -0.119140625, -0.08416748046875, -0.0491943359375, -0.01422119140625, 0.020751953125, 0.05572509765625, 0.0906982421875, 0.12567138671875, 0.16064453125, 0.19561767578125, 0.2305908203125, 0.26556396484375, 0.300537109375, 0.33551025390625, 0.3704833984375, 0.40545654296875, 0.4404296875, 0.47540283203125, 0.5103759765625, 0.54534912109375, 0.580322265625, 0.61529541015625, 0.6502685546875, 0.68524169921875, 0.72021484375, 0.75518798828125, 0.7901611328125, 0.82513427734375, 0.860107421875, 0.89508056640625, 0.9300537109375, 0.96502685546875, 1.0]}, "gradients/encoder.encoder.layers.0.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 6.0, 2.0, 5.0, 13.0, 56.0, 192.0, 609.0, 4466.0, 4132914.0, 53890.0, 1623.0, 342.0, 105.0, 34.0, 18.0, 8.0, 7.0, 3.0, 0.0, 0.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-22.0625, -21.34521484375, -20.6279296875, -19.91064453125, -19.193359375, -18.47607421875, -17.7587890625, -17.04150390625, -16.32421875, -15.60693359375, -14.8896484375, -14.17236328125, -13.455078125, -12.73779296875, -12.0205078125, -11.30322265625, -10.5859375, -9.86865234375, -9.1513671875, -8.43408203125, -7.716796875, -6.99951171875, -6.2822265625, -5.56494140625, -4.84765625, -4.13037109375, -3.4130859375, -2.69580078125, -1.978515625, -1.26123046875, -0.5439453125, 0.17333984375, 0.890625, 1.60791015625, 2.3251953125, 3.04248046875, 3.759765625, 4.47705078125, 5.1943359375, 5.91162109375, 6.62890625, 7.34619140625, 8.0634765625, 8.78076171875, 9.498046875, 10.21533203125, 10.9326171875, 11.64990234375, 12.3671875, 13.08447265625, 13.8017578125, 14.51904296875, 15.236328125, 15.95361328125, 16.6708984375, 17.38818359375, 18.10546875, 18.82275390625, 19.5400390625, 20.25732421875, 20.974609375, 21.69189453125, 22.4091796875, 23.12646484375, 23.84375]}, "gradients/encoder.encoder.layers.0.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [2.0, 2.0, 1.0, 2.0, 4.0, 3.0, 7.0, 15.0, 22.0, 37.0, 73.0, 152.0, 285.0, 646.0, 1077.0, 866.0, 447.0, 197.0, 100.0, 69.0, 40.0, 20.0, 14.0, 3.0, 3.0, 1.0, 4.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.94921875, -3.68170166015625, -3.4141845703125, -3.14666748046875, -2.879150390625, -2.61163330078125, -2.3441162109375, -2.07659912109375, -1.80908203125, -1.54156494140625, -1.2740478515625, -1.00653076171875, -0.739013671875, -0.47149658203125, -0.2039794921875, 0.06353759765625, 0.3310546875, 0.59857177734375, 0.8660888671875, 1.13360595703125, 1.401123046875, 1.66864013671875, 1.9361572265625, 2.20367431640625, 2.47119140625, 2.73870849609375, 3.0062255859375, 3.27374267578125, 3.541259765625, 3.80877685546875, 4.0762939453125, 4.34381103515625, 4.611328125, 4.87884521484375, 5.1463623046875, 5.41387939453125, 5.681396484375, 5.94891357421875, 6.2164306640625, 6.48394775390625, 6.75146484375, 7.01898193359375, 7.2864990234375, 7.55401611328125, 7.821533203125, 8.08905029296875, 8.3565673828125, 8.62408447265625, 8.8916015625, 9.15911865234375, 9.4266357421875, 9.69415283203125, 9.961669921875, 10.22918701171875, 10.4967041015625, 10.76422119140625, 11.03173828125, 11.29925537109375, 11.5667724609375, 11.83428955078125, 12.101806640625, 12.36932373046875, 12.6368408203125, 12.90435791015625, 13.171875]}, "gradients/encoder.encoder.layers.0.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 1.0, 3.0, 6.0, 15.0, 45.0, 146.0, 400.0, 265.0, 81.0, 23.0, 10.0, 8.0, 4.0, 3.0, 3.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-82.39708709716797, -78.61478424072266, -74.83247375488281, -71.0501708984375, -67.26786804199219, -63.485557556152344, -59.70325469970703, -55.92094802856445, -52.138641357421875, -48.3563346862793, -44.57402801513672, -40.791725158691406, -37.00941848754883, -33.22711181640625, -29.444807052612305, -25.66250228881836, -21.88019561767578, -18.097888946533203, -14.315584182739258, -10.533278465270996, -6.750972747802734, -2.9686660766601562, 0.8136386871337891, 4.595943450927734, 8.378250122070312, 12.160555839538574, 15.942861557006836, 19.72516632080078, 23.50747299194336, 27.289779663085938, 31.072084426879883, 34.85438919067383, 38.636688232421875, 42.41899490356445, 46.20130157470703, 49.983604431152344, 53.76591110229492, 57.5482177734375, 61.33052062988281, 65.11282348632812, 68.89513397216797, 72.67743682861328, 76.45974731445312, 80.24205017089844, 84.02435302734375, 87.8066635131836, 91.5889663696289, 95.37127685546875, 99.15357971191406, 102.93588256835938, 106.71819305419922, 110.50049591064453, 114.28280639648438, 118.06510925292969, 121.847412109375, 125.62971496582031, 129.41201782226562, 133.19432067871094, 136.97662353515625, 140.75894165039062, 144.54124450683594, 148.32354736328125, 152.10585021972656, 155.88815307617188, 159.67047119140625]}, "gradients/encoder.encoder.layers.0.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 3.0, 2.0, 2.0, 1.0, 1.0, 1.0, 4.0, 2.0, 3.0, 10.0, 10.0, 8.0, 20.0, 18.0, 19.0, 23.0, 31.0, 37.0, 36.0, 37.0, 49.0, 68.0, 65.0, 53.0, 60.0, 77.0, 46.0, 47.0, 45.0, 48.0, 47.0, 34.0, 21.0, 20.0, 22.0, 13.0, 8.0, 3.0, 3.0, 3.0, 3.0, 5.0, 3.0, 0.0, 4.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-38.04997634887695, -36.936824798583984, -35.823673248291016, -34.71052169799805, -33.59736633300781, -32.484214782714844, -31.371063232421875, -30.257911682128906, -29.144760131835938, -28.03160858154297, -26.91845703125, -25.8053035736084, -24.69215202331543, -23.57900047302246, -22.46584701538086, -21.35269546508789, -20.239543914794922, -19.126392364501953, -18.013240814208984, -16.900087356567383, -15.786935806274414, -14.673784255981445, -13.56063175201416, -12.447479248046875, -11.334327697753906, -10.221176147460938, -9.108023643493652, -7.994871616363525, -6.881719589233398, -5.7685675621032715, -4.6554155349731445, -3.5422635078430176, -2.4291114807128906, -1.3159594535827637, -0.20280742645263672, 0.9103446006774902, 2.023496627807617, 3.136648654937744, 4.249800682067871, 5.362952709197998, 6.476104736328125, 7.589256763458252, 8.702408790588379, 9.815561294555664, 10.928712844848633, 12.041864395141602, 13.155016899108887, 14.268169403076172, 15.38132095336914, 16.49447250366211, 17.607624053955078, 18.72077751159668, 19.83392906188965, 20.947080612182617, 22.06023406982422, 23.173385620117188, 24.286537170410156, 25.399688720703125, 26.512840270996094, 27.625993728637695, 28.739145278930664, 29.852296829223633, 30.965450286865234, 32.0786018371582, 33.19175338745117]}, "gradients/encoder.encoder.layers.0.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 3.0, 3.0, 3.0, 0.0, 6.0, 7.0, 6.0, 8.0, 13.0, 18.0, 23.0, 50.0, 57.0, 103.0, 150.0, 299.0, 544.0, 1189.0, 2547.0, 6204.0, 18066.0, 73579.0, 544944.0, 325650.0, 52177.0, 13864.0, 4909.0, 2022.0, 925.0, 482.0, 268.0, 186.0, 104.0, 48.0, 39.0, 19.0, 15.0, 8.0, 8.0, 4.0, 5.0, 3.0, 3.0, 3.0, 0.0, 1.0, 0.0, 0.0, 3.0, 2.0, 1.0], "bins": [-4.5703125, -4.442962646484375, -4.31561279296875, -4.188262939453125, -4.0609130859375, -3.933563232421875, -3.80621337890625, -3.678863525390625, -3.551513671875, -3.424163818359375, -3.29681396484375, -3.169464111328125, -3.0421142578125, -2.914764404296875, -2.78741455078125, -2.660064697265625, -2.53271484375, -2.405364990234375, -2.27801513671875, -2.150665283203125, -2.0233154296875, -1.895965576171875, -1.76861572265625, -1.641265869140625, -1.513916015625, -1.386566162109375, -1.25921630859375, -1.131866455078125, -1.0045166015625, -0.877166748046875, -0.74981689453125, -0.622467041015625, -0.4951171875, -0.367767333984375, -0.24041748046875, -0.113067626953125, 0.0142822265625, 0.141632080078125, 0.26898193359375, 0.396331787109375, 0.523681640625, 0.651031494140625, 0.77838134765625, 0.905731201171875, 1.0330810546875, 1.160430908203125, 1.28778076171875, 1.415130615234375, 1.54248046875, 1.669830322265625, 1.79718017578125, 1.924530029296875, 2.0518798828125, 2.179229736328125, 2.30657958984375, 2.433929443359375, 2.561279296875, 2.688629150390625, 2.81597900390625, 2.943328857421875, 3.0706787109375, 3.198028564453125, 3.32537841796875, 3.452728271484375, 3.580078125]}, "gradients/encoder.encoder.layers.0.attention.out_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0, 2.0, 1.0, 2.0, 5.0, 6.0, 7.0, 8.0, 12.0, 14.0, 24.0, 20.0, 26.0, 40.0, 38.0, 58.0, 54.0, 66.0, 71.0, 68.0, 69.0, 72.0, 63.0, 58.0, 51.0, 45.0, 24.0, 23.0, 20.0, 27.0, 14.0, 4.0, 5.0, 5.0, 7.0, 2.0, 1.0, 0.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.166015625, -3.083953857421875, -3.00189208984375, -2.919830322265625, -2.8377685546875, -2.755706787109375, -2.67364501953125, -2.591583251953125, -2.509521484375, -2.427459716796875, -2.34539794921875, -2.263336181640625, -2.1812744140625, -2.099212646484375, -2.01715087890625, -1.935089111328125, -1.85302734375, -1.770965576171875, -1.68890380859375, -1.606842041015625, -1.5247802734375, -1.442718505859375, -1.36065673828125, -1.278594970703125, -1.196533203125, -1.114471435546875, -1.03240966796875, -0.950347900390625, -0.8682861328125, -0.786224365234375, -0.70416259765625, -0.622100830078125, -0.5400390625, -0.457977294921875, -0.37591552734375, -0.293853759765625, -0.2117919921875, -0.129730224609375, -0.04766845703125, 0.034393310546875, 0.116455078125, 0.198516845703125, 0.28057861328125, 0.362640380859375, 0.4447021484375, 0.526763916015625, 0.60882568359375, 0.690887451171875, 0.77294921875, 0.855010986328125, 0.93707275390625, 1.019134521484375, 1.1011962890625, 1.183258056640625, 1.26531982421875, 1.347381591796875, 1.429443359375, 1.511505126953125, 1.59356689453125, 1.675628662109375, 1.7576904296875, 1.839752197265625, 1.92181396484375, 2.003875732421875, 2.0859375]}, "gradients/encoder.encoder.layers.0.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 3.0, 0.0, 2.0, 5.0, 4.0, 2.0, 2.0, 7.0, 6.0, 8.0, 12.0, 18.0, 22.0, 39.0, 61.0, 83.0, 189.0, 450.0, 1263.0, 7204.0, 904134.0, 128834.0, 4516.0, 945.0, 376.0, 166.0, 102.0, 39.0, 26.0, 14.0, 14.0, 6.0, 4.0, 6.0, 4.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-13.921875, -13.5638427734375, -13.205810546875, -12.8477783203125, -12.48974609375, -12.1317138671875, -11.773681640625, -11.4156494140625, -11.0576171875, -10.6995849609375, -10.341552734375, -9.9835205078125, -9.62548828125, -9.2674560546875, -8.909423828125, -8.5513916015625, -8.193359375, -7.8353271484375, -7.477294921875, -7.1192626953125, -6.76123046875, -6.4031982421875, -6.045166015625, -5.6871337890625, -5.3291015625, -4.9710693359375, -4.613037109375, -4.2550048828125, -3.89697265625, -3.5389404296875, -3.180908203125, -2.8228759765625, -2.46484375, -2.1068115234375, -1.748779296875, -1.3907470703125, -1.03271484375, -0.6746826171875, -0.316650390625, 0.0413818359375, 0.3994140625, 0.7574462890625, 1.115478515625, 1.4735107421875, 1.83154296875, 2.1895751953125, 2.547607421875, 2.9056396484375, 3.263671875, 3.6217041015625, 3.979736328125, 4.3377685546875, 4.69580078125, 5.0538330078125, 5.411865234375, 5.7698974609375, 6.1279296875, 6.4859619140625, 6.843994140625, 7.2020263671875, 7.56005859375, 7.9180908203125, 8.276123046875, 8.6341552734375, 8.9921875]}, "gradients/encoder.encoder.layers.0.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 3.0, 2.0, 2.0, 3.0, 1.0, 3.0, 5.0, 9.0, 6.0, 11.0, 16.0, 15.0, 24.0, 28.0, 39.0, 48.0, 60.0, 84.0, 112.0, 119.0, 89.0, 81.0, 55.0, 29.0, 46.0, 31.0, 23.0, 16.0, 16.0, 10.0, 9.0, 5.0, 6.0, 7.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-12.2421875, -11.86376953125, -11.4853515625, -11.10693359375, -10.728515625, -10.35009765625, -9.9716796875, -9.59326171875, -9.21484375, -8.83642578125, -8.4580078125, -8.07958984375, -7.701171875, -7.32275390625, -6.9443359375, -6.56591796875, -6.1875, -5.80908203125, -5.4306640625, -5.05224609375, -4.673828125, -4.29541015625, -3.9169921875, -3.53857421875, -3.16015625, -2.78173828125, -2.4033203125, -2.02490234375, -1.646484375, -1.26806640625, -0.8896484375, -0.51123046875, -0.1328125, 0.24560546875, 0.6240234375, 1.00244140625, 1.380859375, 1.75927734375, 2.1376953125, 2.51611328125, 2.89453125, 3.27294921875, 3.6513671875, 4.02978515625, 4.408203125, 4.78662109375, 5.1650390625, 5.54345703125, 5.921875, 6.30029296875, 6.6787109375, 7.05712890625, 7.435546875, 7.81396484375, 8.1923828125, 8.57080078125, 8.94921875, 9.32763671875, 9.7060546875, 10.08447265625, 10.462890625, 10.84130859375, 11.2197265625, 11.59814453125, 11.9765625]}, "gradients/encoder.encoder.layers.0.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 2.0, 3.0, 0.0, 0.0, 6.0, 10.0, 8.0, 13.0, 16.0, 20.0, 28.0, 36.0, 44.0, 79.0, 125.0, 161.0, 307.0, 568.0, 1099.0, 2414.0, 6071.0, 20931.0, 162759.0, 808705.0, 31002.0, 8095.0, 2976.0, 1440.0, 690.0, 332.0, 211.0, 102.0, 88.0, 59.0, 48.0, 30.0, 24.0, 15.0, 9.0, 13.0, 10.0, 4.0, 4.0, 2.0, 2.0, 1.0, 1.0, 0.0, 4.0, 1.0, 1.0], "bins": [-1.224609375, -1.1907196044921875, -1.156829833984375, -1.1229400634765625, -1.08905029296875, -1.0551605224609375, -1.021270751953125, -0.9873809814453125, -0.9534912109375, -0.9196014404296875, -0.885711669921875, -0.8518218994140625, -0.81793212890625, -0.7840423583984375, -0.750152587890625, -0.7162628173828125, -0.682373046875, -0.6484832763671875, -0.614593505859375, -0.5807037353515625, -0.54681396484375, -0.5129241943359375, -0.479034423828125, -0.4451446533203125, -0.4112548828125, -0.3773651123046875, -0.343475341796875, -0.3095855712890625, -0.27569580078125, -0.2418060302734375, -0.207916259765625, -0.1740264892578125, -0.14013671875, -0.1062469482421875, -0.072357177734375, -0.0384674072265625, -0.00457763671875, 0.0293121337890625, 0.063201904296875, 0.0970916748046875, 0.1309814453125, 0.1648712158203125, 0.198760986328125, 0.2326507568359375, 0.26654052734375, 0.3004302978515625, 0.334320068359375, 0.3682098388671875, 0.402099609375, 0.4359893798828125, 0.469879150390625, 0.5037689208984375, 0.53765869140625, 0.5715484619140625, 0.605438232421875, 0.6393280029296875, 0.6732177734375, 0.7071075439453125, 0.740997314453125, 0.7748870849609375, 0.80877685546875, 0.8426666259765625, 0.876556396484375, 0.9104461669921875, 0.9443359375]}, "gradients/encoder.encoder.layers.0.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 2.0, 2.0, 7.0, 4.0, 6.0, 8.0, 5.0, 6.0, 12.0, 23.0, 42.0, 71.0, 108.0, 258.0, 220.0, 85.0, 48.0, 36.0, 20.0, 14.0, 8.0, 8.0, 3.0, 4.0, 4.0, 3.0, 1.0, 0.0, 1.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0005950927734375, -0.0005774572491645813, -0.0005598217248916626, -0.0005421862006187439, -0.0005245506763458252, -0.0005069151520729065, -0.0004892796277999878, -0.0004716441035270691, -0.0004540085792541504, -0.0004363730549812317, -0.000418737530708313, -0.0004011020064353943, -0.0003834664821624756, -0.0003658309578895569, -0.0003481954336166382, -0.0003305599093437195, -0.0003129243850708008, -0.0002952888607978821, -0.0002776533365249634, -0.0002600178122520447, -0.00024238228797912598, -0.00022474676370620728, -0.00020711123943328857, -0.00018947571516036987, -0.00017184019088745117, -0.00015420466661453247, -0.00013656914234161377, -0.00011893361806869507, -0.00010129809379577637, -8.366256952285767e-05, -6.602704524993896e-05, -4.8391520977020264e-05, -3.075599670410156e-05, -1.3120472431182861e-05, 4.51505184173584e-06, 2.215057611465454e-05, 3.978610038757324e-05, 5.742162466049194e-05, 7.505714893341064e-05, 9.269267320632935e-05, 0.00011032819747924805, 0.00012796372175216675, 0.00014559924602508545, 0.00016323477029800415, 0.00018087029457092285, 0.00019850581884384155, 0.00021614134311676025, 0.00023377686738967896, 0.00025141239166259766, 0.00026904791593551636, 0.00028668344020843506, 0.00030431896448135376, 0.00032195448875427246, 0.00033959001302719116, 0.00035722553730010986, 0.00037486106157302856, 0.00039249658584594727, 0.00041013211011886597, 0.00042776763439178467, 0.00044540315866470337, 0.00046303868293762207, 0.00048067420721054077, 0.0004983097314834595, 0.0005159452557563782, 0.0005335807800292969]}, "gradients/encoder.encoder.layers.0.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0, 1.0, 3.0, 2.0, 1.0, 2.0, 4.0, 3.0, 12.0, 7.0, 20.0, 21.0, 40.0, 67.0, 113.0, 246.0, 520.0, 1516.0, 6621.0, 68249.0, 936248.0, 28999.0, 4006.0, 1038.0, 403.0, 172.0, 96.0, 40.0, 32.0, 20.0, 16.0, 19.0, 10.0, 4.0, 3.0, 3.0, 5.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0], "bins": [-1.443359375, -1.39874267578125, -1.3541259765625, -1.30950927734375, -1.264892578125, -1.22027587890625, -1.1756591796875, -1.13104248046875, -1.08642578125, -1.04180908203125, -0.9971923828125, -0.95257568359375, -0.907958984375, -0.86334228515625, -0.8187255859375, -0.77410888671875, -0.7294921875, -0.68487548828125, -0.6402587890625, -0.59564208984375, -0.551025390625, -0.50640869140625, -0.4617919921875, -0.41717529296875, -0.37255859375, -0.32794189453125, -0.2833251953125, -0.23870849609375, -0.194091796875, -0.14947509765625, -0.1048583984375, -0.06024169921875, -0.015625, 0.02899169921875, 0.0736083984375, 0.11822509765625, 0.162841796875, 0.20745849609375, 0.2520751953125, 0.29669189453125, 0.34130859375, 0.38592529296875, 0.4305419921875, 0.47515869140625, 0.519775390625, 0.56439208984375, 0.6090087890625, 0.65362548828125, 0.6982421875, 0.74285888671875, 0.7874755859375, 0.83209228515625, 0.876708984375, 0.92132568359375, 0.9659423828125, 1.01055908203125, 1.05517578125, 1.09979248046875, 1.1444091796875, 1.18902587890625, 1.233642578125, 1.27825927734375, 1.3228759765625, 1.36749267578125, 1.412109375]}, "gradients/encoder.encoder.layers.0.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 2.0, 2.0, 2.0, 3.0, 5.0, 1.0, 5.0, 7.0, 5.0, 10.0, 13.0, 14.0, 21.0, 22.0, 23.0, 36.0, 59.0, 81.0, 89.0, 136.0, 127.0, 79.0, 58.0, 52.0, 32.0, 28.0, 26.0, 15.0, 10.0, 10.0, 12.0, 6.0, 7.0, 4.0, 3.0, 1.0, 2.0, 3.0, 0.0, 1.0, 3.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.0654296875, -1.02728271484375, -0.9891357421875, -0.95098876953125, -0.912841796875, -0.87469482421875, -0.8365478515625, -0.79840087890625, -0.76025390625, -0.72210693359375, -0.6839599609375, -0.64581298828125, -0.607666015625, -0.56951904296875, -0.5313720703125, -0.49322509765625, -0.455078125, -0.41693115234375, -0.3787841796875, -0.34063720703125, -0.302490234375, -0.26434326171875, -0.2261962890625, -0.18804931640625, -0.14990234375, -0.11175537109375, -0.0736083984375, -0.03546142578125, 0.002685546875, 0.04083251953125, 0.0789794921875, 0.11712646484375, 0.1552734375, 0.19342041015625, 0.2315673828125, 0.26971435546875, 0.307861328125, 0.34600830078125, 0.3841552734375, 0.42230224609375, 0.46044921875, 0.49859619140625, 0.5367431640625, 0.57489013671875, 0.613037109375, 0.65118408203125, 0.6893310546875, 0.72747802734375, 0.765625, 0.80377197265625, 0.8419189453125, 0.88006591796875, 0.918212890625, 0.95635986328125, 0.9945068359375, 1.03265380859375, 1.07080078125, 1.10894775390625, 1.1470947265625, 1.18524169921875, 1.223388671875, 1.26153564453125, 1.2996826171875, 1.33782958984375, 1.3759765625]}, "gradients/encoder.encoder.layers.0.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 1.0, 0.0, 1.0, 5.0, 5.0, 17.0, 38.0, 91.0, 617.0, 163.0, 50.0, 10.0, 8.0, 4.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-114.14292907714844, -111.64790344238281, -109.15287780761719, -106.65784454345703, -104.1628189086914, -101.66779327392578, -99.17276763916016, -96.677734375, -94.18270874023438, -91.68768310546875, -89.19265747070312, -86.69762420654297, -84.20259857177734, -81.70757293701172, -79.2125473022461, -76.71751403808594, -74.22248840332031, -71.72746276855469, -69.23243713378906, -66.7374038696289, -64.24237823486328, -61.747352600097656, -59.25232696533203, -56.75729751586914, -54.26227569580078, -51.767250061035156, -49.272220611572266, -46.77719497680664, -44.28216552734375, -41.787139892578125, -39.2921142578125, -36.79708480834961, -34.30205535888672, -31.80702781677246, -29.312000274658203, -26.816974639892578, -24.321945190429688, -21.826919555664062, -19.331892013549805, -16.836864471435547, -14.341836929321289, -11.846809387207031, -9.351781845092773, -6.856755256652832, -4.361727714538574, -1.8667001724243164, 0.628326416015625, 3.123353958129883, 5.618381500244141, 8.113409042358398, 10.608436584472656, 13.103463172912598, 15.598490715026855, 18.093517303466797, 20.588544845581055, 23.083572387695312, 25.57859992980957, 28.073627471923828, 30.568655014038086, 33.063682556152344, 35.55870819091797, 38.05373764038086, 40.548763275146484, 43.043792724609375, 45.538818359375]}, "gradients/encoder.encoder.layers.0.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 1.0, 1.0, 3.0, 3.0, 5.0, 5.0, 4.0, 1.0, 8.0, 1.0, 6.0, 10.0, 10.0, 10.0, 15.0, 14.0, 16.0, 19.0, 19.0, 20.0, 21.0, 19.0, 35.0, 60.0, 198.0, 219.0, 48.0, 36.0, 26.0, 27.0, 28.0, 17.0, 19.0, 12.0, 12.0, 14.0, 5.0, 6.0, 5.0, 5.0, 7.0, 7.0, 4.0, 6.0, 2.0, 1.0, 0.0, 0.0, 4.0, 0.0, 2.0], "bins": [-28.064186096191406, -27.322744369506836, -26.581302642822266, -25.839862823486328, -25.098421096801758, -24.356979370117188, -23.615537643432617, -22.874095916748047, -22.13265609741211, -21.39121437072754, -20.64977264404297, -19.90833282470703, -19.16689109802246, -18.42544937133789, -17.68400764465332, -16.94256591796875, -16.20112419128418, -15.45968246459961, -14.718241691589355, -13.976799964904785, -13.235359191894531, -12.493917465209961, -11.75247573852539, -11.01103401184082, -10.269593238830566, -9.528151512145996, -8.786710739135742, -8.045269012451172, -7.30382776260376, -6.562386512756348, -5.820944786071777, -5.079503536224365, -4.338062286376953, -3.596621036529541, -2.85517954826355, -2.1137380599975586, -1.3722968101501465, -0.6308555603027344, 0.11058616638183594, 0.852027416229248, 1.5934686660766602, 2.3349099159240723, 3.0763514041900635, 3.8177928924560547, 4.559234142303467, 5.300675392150879, 6.042117118835449, 6.783558368682861, 7.524999618530273, 8.266441345214844, 9.007882118225098, 9.749323844909668, 10.490764617919922, 11.232206344604492, 11.973648071289062, 12.715089797973633, 13.456530570983887, 14.197972297668457, 14.939413070678711, 15.680854797363281, 16.42229652404785, 17.163738250732422, 17.90517807006836, 18.64661979675293, 19.3880615234375]}, "gradients/encoder.encoder.pos_conv_embed.conv.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 0.0, 3.0, 4.0, 0.0, 3.0, 3.0, 6.0, 9.0, 14.0, 15.0, 18.0, 19.0, 21.0, 20.0, 23.0, 30.0, 38.0, 47.0, 116.0, 246.0, 97.0, 43.0, 33.0, 40.0, 36.0, 25.0, 21.0, 14.0, 14.0, 9.0, 12.0, 10.0, 6.0, 6.0, 0.0, 3.0, 6.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.095703125, -2.034942626953125, -1.97418212890625, -1.913421630859375, -1.8526611328125, -1.791900634765625, -1.73114013671875, -1.670379638671875, -1.609619140625, -1.548858642578125, -1.48809814453125, -1.427337646484375, -1.3665771484375, -1.305816650390625, -1.24505615234375, -1.184295654296875, -1.12353515625, -1.062774658203125, -1.00201416015625, -0.941253662109375, -0.8804931640625, -0.819732666015625, -0.75897216796875, -0.698211669921875, -0.637451171875, -0.576690673828125, -0.51593017578125, -0.455169677734375, -0.3944091796875, -0.333648681640625, -0.27288818359375, -0.212127685546875, -0.1513671875, -0.090606689453125, -0.02984619140625, 0.030914306640625, 0.0916748046875, 0.152435302734375, 0.21319580078125, 0.273956298828125, 0.334716796875, 0.395477294921875, 0.45623779296875, 0.516998291015625, 0.5777587890625, 0.638519287109375, 0.69927978515625, 0.760040283203125, 0.82080078125, 0.881561279296875, 0.94232177734375, 1.003082275390625, 1.0638427734375, 1.124603271484375, 1.18536376953125, 1.246124267578125, 1.306884765625, 1.367645263671875, 1.42840576171875, 1.489166259765625, 1.5499267578125, 1.610687255859375, 1.67144775390625, 1.732208251953125, 1.79296875]}, "gradients/encoder.encoder.pos_conv_embed.conv.weight_v": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 3.0, 3.0, 4.0, 0.0, 0.0, 2.0, 3.0, 8.0, 18.0, 14.0, 21.0, 16.0, 34.0, 40.0, 107.0, 185.0, 465.0, 1235.0, 4527.0, 69750.0, 8301943.0, 7289.0, 1734.0, 564.0, 263.0, 139.0, 63.0, 48.0, 29.0, 16.0, 7.0, 14.0, 13.0, 4.0, 13.0, 6.0, 2.0, 2.0, 0.0, 0.0, 2.0, 1.0, 1.0, 2.0, 5.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 3.0, 2.0], "bins": [-15.027771949768066, -14.511971473693848, -13.996170997619629, -13.48037052154541, -12.964570045471191, -12.448769569396973, -11.93297004699707, -11.417169570922852, -10.901369094848633, -10.385568618774414, -9.869768142700195, -9.353967666625977, -8.838167190551758, -8.322366714477539, -7.8065667152404785, -7.29076623916626, -6.774965286254883, -6.259164810180664, -5.743364334106445, -5.227563858032227, -4.711763381958008, -4.195962905883789, -3.6801629066467285, -3.1643624305725098, -2.648561954498291, -2.1327614784240723, -1.616961121559143, -1.1011607646942139, -0.5853602886199951, -0.06955981254577637, 0.4462404251098633, 0.962040901184082, 1.4778423309326172, 1.993642807006836, 2.5094432830810547, 3.0252435207366943, 3.541043996810913, 4.056844711303711, 4.5726447105407715, 5.08844518661499, 5.604245662689209, 6.120046138763428, 6.6358466148376465, 7.151646614074707, 7.667447090148926, 8.183247566223145, 8.699048042297363, 9.214848518371582, 9.7306489944458, 10.24644947052002, 10.762249946594238, 11.278050422668457, 11.793850898742676, 12.309651374816895, 12.825450897216797, 13.341251373291016, 13.857051849365234, 14.372852325439453, 14.888652801513672, 15.40445327758789, 15.92025375366211, 16.436054229736328, 16.951854705810547, 17.467655181884766, 17.983455657958984]}, "gradients/encoder.encoder.pos_conv_embed.conv.weight_g": {"_type": "histogram", "values": [3.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 2.0, 1.0, 1.0, 1.0, 6.0, 4.0, 4.0, 1.0, 3.0, 2.0, 6.0, 1.0, 3.0, 2.0, 3.0, 5.0, 7.0, 4.0, 7.0, 6.0, 5.0, 5.0, 3.0, 3.0, 5.0, 4.0, 0.0, 1.0, 3.0, 5.0, 3.0, 1.0, 1.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-14.410181045532227, -14.031996726989746, -13.653812408447266, -13.275628089904785, -12.897443771362305, -12.519259452819824, -12.141075134277344, -11.76289176940918, -11.384706497192383, -11.006522178649902, -10.628337860107422, -10.250153541564941, -9.871969223022461, -9.49378490447998, -9.1156005859375, -8.737417221069336, -8.359232902526855, -7.981048583984375, -7.6028642654418945, -7.224679946899414, -6.846495628356934, -6.468311309814453, -6.090127468109131, -5.71194314956665, -5.33375883102417, -4.9555745124816895, -4.577390193939209, -4.199206352233887, -3.821021795272827, -3.4428374767303467, -3.0646533966064453, -2.686469078063965, -2.308283805847168, -1.9300994873046875, -1.5519152879714966, -1.1737310886383057, -0.7955467700958252, -0.4173624515533447, -0.03917837142944336, 0.3390059471130371, 0.7171902656555176, 1.095374584197998, 1.473558783531189, 1.8517429828643799, 2.2299273014068604, 2.608111619949341, 2.986295700073242, 3.3644800186157227, 3.742664337158203, 4.120848655700684, 4.499032974243164, 4.8772172927856445, 5.255401611328125, 5.6335859298706055, 6.011769771575928, 6.389954090118408, 6.768138408660889, 7.146322727203369, 7.52450704574585, 7.902690887451172, 8.280875205993652, 8.659059524536133, 9.037243843078613, 9.415428161621094, 9.793612480163574]}, "gradients/encoder.feature_projection.projection.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 2.0, 4.0, 0.0, 3.0, 3.0, 1.0, 8.0, 5.0, 5.0, 12.0, 21.0, 20.0, 48.0, 84.0, 158.0, 202.0, 464.0, 1050.0, 2784.0, 12009.0, 82892.0, 333055.0, 75621.0, 11199.0, 2689.0, 950.0, 437.0, 236.0, 141.0, 79.0, 36.0, 22.0, 15.0, 7.0, 7.0, 6.0, 0.0, 0.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-31.171875, -30.42041015625, -29.6689453125, -28.91748046875, -28.166015625, -27.41455078125, -26.6630859375, -25.91162109375, -25.16015625, -24.40869140625, -23.6572265625, -22.90576171875, -22.154296875, -21.40283203125, -20.6513671875, -19.89990234375, -19.1484375, -18.39697265625, -17.6455078125, -16.89404296875, -16.142578125, -15.39111328125, -14.6396484375, -13.88818359375, -13.13671875, -12.38525390625, -11.6337890625, -10.88232421875, -10.130859375, -9.37939453125, -8.6279296875, -7.87646484375, -7.125, -6.37353515625, -5.6220703125, -4.87060546875, -4.119140625, -3.36767578125, -2.6162109375, -1.86474609375, -1.11328125, -0.36181640625, 0.3896484375, 1.14111328125, 1.892578125, 2.64404296875, 3.3955078125, 4.14697265625, 4.8984375, 5.64990234375, 6.4013671875, 7.15283203125, 7.904296875, 8.65576171875, 9.4072265625, 10.15869140625, 10.91015625, 11.66162109375, 12.4130859375, 13.16455078125, 13.916015625, 14.66748046875, 15.4189453125, 16.17041015625, 16.921875]}, "gradients/encoder.feature_projection.projection.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 2.0, 3.0, 0.0, 8.0, 9.0, 14.0, 10.0, 17.0, 26.0, 16.0, 33.0, 41.0, 66.0, 73.0, 91.0, 92.0, 80.0, 88.0, 68.0, 53.0, 51.0, 46.0, 31.0, 29.0, 14.0, 11.0, 19.0, 8.0, 6.0, 5.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.439453125, -3.3560791015625, -3.272705078125, -3.1893310546875, -3.10595703125, -3.0225830078125, -2.939208984375, -2.8558349609375, -2.7724609375, -2.6890869140625, -2.605712890625, -2.5223388671875, -2.43896484375, -2.3555908203125, -2.272216796875, -2.1888427734375, -2.10546875, -2.0220947265625, -1.938720703125, -1.8553466796875, -1.77197265625, -1.6885986328125, -1.605224609375, -1.5218505859375, -1.4384765625, -1.3551025390625, -1.271728515625, -1.1883544921875, -1.10498046875, -1.0216064453125, -0.938232421875, -0.8548583984375, -0.771484375, -0.6881103515625, -0.604736328125, -0.5213623046875, -0.43798828125, -0.3546142578125, -0.271240234375, -0.1878662109375, -0.1044921875, -0.0211181640625, 0.062255859375, 0.1456298828125, 0.22900390625, 0.3123779296875, 0.395751953125, 0.4791259765625, 0.5625, 0.6458740234375, 0.729248046875, 0.8126220703125, 0.89599609375, 0.9793701171875, 1.062744140625, 1.1461181640625, 1.2294921875, 1.3128662109375, 1.396240234375, 1.4796142578125, 1.56298828125, 1.6463623046875, 1.729736328125, 1.8131103515625, 1.896484375]}, "gradients/encoder.feature_projection.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 4.0, 3.0, 2.0, 3.0, 7.0, 7.0, 8.0, 8.0, 4.0, 21.0, 18.0, 35.0, 33.0, 67.0, 69.0, 66.0, 63.0, 34.0, 22.0, 5.0, 10.0, 4.0, 2.0, 3.0, 3.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-13.699359893798828, -13.20518970489502, -12.711020469665527, -12.216850280761719, -11.722681045532227, -11.228510856628418, -10.73434066772461, -10.240171432495117, -9.746002197265625, -9.251832008361816, -8.757662773132324, -8.263492584228516, -7.769323348999023, -7.275153160095215, -6.7809834480285645, -6.286813735961914, -5.7926435470581055, -5.298473834991455, -4.804304122924805, -4.310133934020996, -3.815964460372925, -3.3217947483062744, -2.827624797821045, -2.3334550857543945, -1.8392853736877441, -1.3451156616210938, -0.8509458303451538, -0.35677599906921387, 0.13739371299743652, 0.6315634250640869, 1.1257333755493164, 1.6199030876159668, 2.114072799682617, 2.6082425117492676, 3.102412223815918, 3.5965821743011475, 4.090751647949219, 4.584921836853027, 5.079091548919678, 5.573261260986328, 6.0674309730529785, 6.561600685119629, 7.055770397186279, 7.54994010925293, 8.044110298156738, 8.53827953338623, 9.032449722290039, 9.526618957519531, 10.02078914642334, 10.514959335327148, 11.00912857055664, 11.50329875946045, 11.997467994689941, 12.49163818359375, 12.985807418823242, 13.47997760772705, 13.97414779663086, 14.468317985534668, 14.96248722076416, 15.456657409667969, 15.950826644897461, 16.444995880126953, 16.939167022705078, 17.43333625793457, 17.927505493164062]}, "gradients/encoder.feature_projection.layer_norm.bias": {"_type": "histogram", "values": [1.0, 2.0, 1.0, 3.0, 2.0, 1.0, 1.0, 1.0, 0.0, 2.0, 1.0, 5.0, 7.0, 4.0, 5.0, 4.0, 10.0, 7.0, 19.0, 29.0, 51.0, 70.0, 87.0, 69.0, 54.0, 19.0, 8.0, 12.0, 5.0, 3.0, 2.0, 2.0, 2.0, 4.0, 4.0, 2.0, 1.0, 2.0, 1.0, 4.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-9.68094539642334, -9.310079574584961, -8.939213752746582, -8.568347930908203, -8.197481155395508, -7.826615810394287, -7.45574951171875, -7.084883689880371, -6.714017868041992, -6.343152046203613, -5.972286224365234, -5.601419925689697, -5.230554103851318, -4.8596882820129395, -4.488821983337402, -4.117956161499023, -3.7470903396606445, -3.3762245178222656, -3.0053584575653076, -2.6344923973083496, -2.2636265754699707, -1.8927606344223022, -1.5218946933746338, -1.1510286331176758, -0.7801628112792969, -0.4092968702316284, -0.03843092918395996, 0.3324350118637085, 0.703300952911377, 1.0741668939590454, 1.4450328350067139, 1.8158988952636719, 2.186764717102051, 2.5576305389404297, 2.9284965991973877, 3.2993626594543457, 3.6702284812927246, 4.0410943031311035, 4.411960601806641, 4.7828264236450195, 5.153692245483398, 5.524558067321777, 5.895423889160156, 6.266290187835693, 6.637156009674072, 7.008021831512451, 7.378888130187988, 7.749753952026367, 8.120619773864746, 8.491485595703125, 8.862351417541504, 9.233217239379883, 9.604084014892578, 9.974949836730957, 10.345815658569336, 10.716681480407715, 11.087547302246094, 11.458413124084473, 11.829278945922852, 12.20014476776123, 12.57101058959961, 12.941877365112305, 13.312743186950684, 13.683609008789062, 14.054474830627441]}, "_wandb": {"runtime": 3721}} \ No newline at end of file diff --git a/wandb/run-20220302_233655-33dtvgaa/logs/debug-internal.log b/wandb/run-20220302_233655-33dtvgaa/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..caa6d0dcbd44a07763b8592d8496c10782350508 --- /dev/null +++ b/wandb/run-20220302_233655-33dtvgaa/logs/debug-internal.log @@ -0,0 +1,5815 @@ +2022-03-02 23:36:56,255 INFO MainThread:266733 [internal.py:wandb_internal():89] W&B internal server running at pid: 266733, started at: 2022-03-02 23:36:56.254695 +2022-03-02 23:36:56,256 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: check_version +2022-03-02 23:36:56,257 INFO WriterThread:266733 [datastore.py:open_for_write():77] open: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/run-33dtvgaa.wandb +2022-03-02 23:36:56,258 DEBUG SenderThread:266733 [sender.py:send():235] send: header +2022-03-02 23:36:56,258 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: check_version +2022-03-02 23:36:56,329 DEBUG SenderThread:266733 [sender.py:send():235] send: run +2022-03-02 23:36:56,427 INFO SenderThread:266733 [dir_watcher.py:__init__():169] watching files in: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files +2022-03-02 23:36:56,427 INFO SenderThread:266733 [sender.py:_start_run_threads():809] run started: 33dtvgaa with start time 1646264215 +2022-03-02 23:36:56,427 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-02 23:36:56,427 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:36:56,428 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: run_start +2022-03-02 23:36:56,434 DEBUG HandlerThread:266733 [meta.py:__init__():36] meta init +2022-03-02 23:36:56,434 DEBUG HandlerThread:266733 [meta.py:__init__():50] meta init done +2022-03-02 23:36:56,434 DEBUG HandlerThread:266733 [meta.py:probe():210] probe +2022-03-02 23:36:56,440 DEBUG HandlerThread:266733 [meta.py:_setup_git():200] setup git +2022-03-02 23:36:56,455 DEBUG HandlerThread:266733 [meta.py:_setup_git():207] setup git done +2022-03-02 23:36:56,455 DEBUG HandlerThread:266733 [meta.py:_save_pip():54] save pip +2022-03-02 23:36:56,455 DEBUG HandlerThread:266733 [meta.py:_save_pip():68] save pip done +2022-03-02 23:36:56,456 DEBUG HandlerThread:266733 [meta.py:probe():248] probe done +2022-03-02 23:36:56,560 DEBUG SenderThread:266733 [sender.py:send():235] send: files +2022-03-02 23:36:56,560 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-metadata.json with policy now +2022-03-02 23:36:56,565 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:36:56,565 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:36:56,598 DEBUG SenderThread:266733 [sender.py:send():235] send: config +2022-03-02 23:36:56,599 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:36:56,599 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:36:56,599 WARNING SenderThread:266733 [sender.py:send_metric():902] Seen metric with glob (shouldnt happen) +2022-03-02 23:36:56,815 INFO Thread-11 :266733 [upload_job.py:push():137] Uploaded file /tmp/tmpyjbn79wcwandb/26pf5gyx-wandb-metadata.json +2022-03-02 23:36:57,429 INFO Thread-8 :266733 [dir_watcher.py:_on_file_created():217] file/dir created: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/requirements.txt +2022-03-02 23:36:57,429 INFO Thread-8 :266733 [dir_watcher.py:_on_file_created():217] file/dir created: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-02 23:36:57,429 INFO Thread-8 :266733 [dir_watcher.py:_on_file_created():217] file/dir created: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-metadata.json +2022-03-02 23:36:57,429 INFO Thread-8 :266733 [dir_watcher.py:_on_file_created():217] file/dir created: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:36:59,428 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:37:03,429 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:37:05,430 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:37:09,431 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:37:11,432 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:37:11,711 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:37:11,711 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:37:15,433 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:37:17,434 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:37:21,436 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:37:22,586 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:37:22,587 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:37:22,587 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:37:22,587 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-02 23:37:22,587 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-02 23:37:22,587 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:37:23,437 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-02 23:37:23,437 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:37:24,836 DEBUG SenderThread:266733 [sender.py:send():235] send: stats +2022-03-02 23:37:26,779 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:37:26,779 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:37:27,438 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/config.yaml +2022-03-02 23:37:27,438 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:37:29,439 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:37:34,440 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:37:36,441 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:37:40,443 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:37:41,933 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:37:41,933 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:37:42,443 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:37:46,445 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:37:46,697 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-02 23:37:46,698 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-02 23:37:46,698 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:37:47,445 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-02 23:37:48,445 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:37:50,446 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:37:54,447 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:37:55,277 DEBUG SenderThread:266733 [sender.py:send():235] send: stats +2022-03-02 23:37:56,448 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:37:57,060 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:37:57,060 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:38:00,449 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:38:02,450 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:38:06,451 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:38:08,452 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:38:10,379 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-02 23:38:10,379 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-02 23:38:10,379 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:38:10,453 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-02 23:38:11,453 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:38:12,128 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:38:12,128 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:38:12,454 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:38:13,454 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:38:15,455 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:38:17,455 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:38:21,457 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:38:23,457 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:38:25,655 DEBUG SenderThread:266733 [sender.py:send():235] send: stats +2022-03-02 23:38:27,173 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:38:27,174 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:38:27,459 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:38:29,460 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:38:31,460 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:38:33,601 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-02 23:38:33,602 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-02 23:38:33,602 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:38:34,461 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-02 23:38:34,462 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:38:35,462 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:38:37,463 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:38:41,464 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:38:42,207 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:38:42,208 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:38:44,465 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:38:46,466 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:38:50,467 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:38:52,468 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:38:56,049 DEBUG SenderThread:266733 [sender.py:send():235] send: stats +2022-03-02 23:38:56,470 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:38:56,649 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-02 23:38:56,649 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-02 23:38:56,650 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:38:57,470 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-02 23:38:57,496 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:38:57,496 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:38:58,470 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:38:59,471 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:39:00,471 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:39:04,473 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:39:06,473 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:39:10,475 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:39:12,476 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:39:12,669 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:39:12,669 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:39:14,476 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:39:18,478 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:39:19,523 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-02 23:39:19,523 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-02 23:39:19,524 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:39:20,478 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-02 23:39:21,479 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:39:25,480 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:39:26,443 DEBUG SenderThread:266733 [sender.py:send():235] send: stats +2022-03-02 23:39:27,481 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:39:27,714 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:39:27,714 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:39:29,481 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:39:33,483 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:39:35,484 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:39:39,485 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:39:41,486 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:39:42,360 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-02 23:39:42,360 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-02 23:39:42,360 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:39:42,486 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-02 23:39:42,894 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:39:42,894 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:39:43,487 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:39:47,488 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:39:49,489 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:39:51,490 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:39:56,491 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:39:56,832 DEBUG SenderThread:266733 [sender.py:send():235] send: stats +2022-03-02 23:39:58,072 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:39:58,072 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:39:58,492 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:40:02,494 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:40:04,494 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:40:05,090 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-02 23:40:05,090 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-02 23:40:05,090 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:40:05,495 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-02 23:40:06,495 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:40:10,497 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:40:12,497 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:40:13,107 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:40:13,108 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:40:14,498 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:40:18,499 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:40:20,500 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:40:24,501 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:40:26,502 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:40:27,215 DEBUG SenderThread:266733 [sender.py:send():235] send: stats +2022-03-02 23:40:27,770 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-02 23:40:27,770 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-02 23:40:27,771 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:40:28,196 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:40:28,196 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:40:28,503 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-02 23:40:28,503 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:40:30,504 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:40:33,505 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:40:35,505 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:40:37,506 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:40:41,508 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:40:43,375 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:40:43,376 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:40:43,508 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:40:45,509 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:40:49,511 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:40:50,289 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-02 23:40:50,289 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-02 23:40:50,289 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:40:50,511 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-02 23:40:51,511 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:40:52,512 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:40:54,512 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:40:57,599 DEBUG SenderThread:266733 [sender.py:send():235] send: stats +2022-03-02 23:40:58,417 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:40:58,417 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:40:58,514 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:41:00,515 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:41:02,515 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:41:06,517 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:41:08,517 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:41:10,518 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:41:12,420 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-02 23:41:12,420 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-02 23:41:12,422 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:41:12,519 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-02 23:41:13,613 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:41:13,613 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:41:14,520 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:41:16,520 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:41:18,521 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:41:22,523 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:41:24,523 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:41:27,524 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:41:27,975 DEBUG SenderThread:266733 [sender.py:send():235] send: stats +2022-03-02 23:41:28,728 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:41:28,728 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:41:31,526 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:41:33,527 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:41:34,257 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-02 23:41:34,258 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-02 23:41:34,258 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:41:34,527 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-02 23:41:35,527 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:41:37,528 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:41:39,529 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:41:41,530 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:41:43,531 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:41:43,925 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:41:43,925 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:41:47,532 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:41:49,533 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:41:53,534 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:41:55,535 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:41:56,867 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,872 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,872 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,878 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,878 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,878 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,878 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,883 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,883 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,888 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,894 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,899 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,904 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,909 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,917 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,917 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,917 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,918 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,918 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,918 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,918 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,918 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,918 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,918 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,918 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,918 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,918 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,918 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,918 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,918 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,918 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,919 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,919 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,919 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,919 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,919 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,919 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,919 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,919 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,919 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,919 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,919 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,919 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,920 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,920 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,920 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,920 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,920 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,920 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,925 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,925 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,925 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,925 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,925 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,926 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,926 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,926 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,926 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,926 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,926 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,926 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,926 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,926 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,926 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,926 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,937 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,937 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,937 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,937 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,937 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,937 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,937 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,937 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,937 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,937 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,938 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,938 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,938 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,938 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,938 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,938 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,938 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,938 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,938 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,938 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,938 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,938 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,939 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,939 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,939 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,939 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,939 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,939 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,939 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,939 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,939 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,944 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,960 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,960 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,960 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,960 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,960 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,961 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,961 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,961 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,961 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,961 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,961 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,961 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,961 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,961 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,961 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,961 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,961 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,961 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,961 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,967 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,967 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,967 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,967 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,967 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,967 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,967 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,967 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,967 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,968 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,968 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,968 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,968 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,968 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,968 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,968 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,968 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,968 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,968 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,968 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,968 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,968 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,968 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,968 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,969 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,969 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,974 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,974 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,974 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,974 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,974 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,974 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,974 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,975 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,980 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,980 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,980 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,980 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,980 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,980 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,980 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,980 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,986 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,986 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,986 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,986 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,986 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,986 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,986 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,986 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,986 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,986 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,992 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,992 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,992 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,992 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,992 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,992 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,992 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,992 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,992 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,992 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,992 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,993 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,993 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,993 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,993 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,993 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,993 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,993 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,993 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,998 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,999 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,999 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,999 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,999 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,999 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,999 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,999 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,999 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,999 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:56,999 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,005 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,005 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,005 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,005 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,005 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,005 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,005 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,005 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,005 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,005 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,005 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,005 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,006 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,006 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,006 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,006 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,006 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,006 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,006 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,006 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,006 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,006 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,006 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,006 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,006 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,012 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,012 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,012 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,012 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,012 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,012 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,012 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,012 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,012 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,012 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,013 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,013 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,013 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,018 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,018 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,018 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,023 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,023 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,023 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,023 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,023 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,023 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,024 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,024 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,024 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,024 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,024 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,024 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,024 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,024 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,024 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,024 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,024 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,024 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,024 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,025 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,025 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,025 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,025 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,025 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,025 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,025 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,025 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,025 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,025 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,025 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,025 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,026 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,026 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,026 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,026 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,026 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,026 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,026 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,026 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,026 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,026 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,026 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,026 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,027 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,027 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,027 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,027 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,027 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,027 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,027 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,027 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,027 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,027 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,027 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,027 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,027 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,028 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,028 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,028 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,028 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,028 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,028 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,028 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,028 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,028 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,028 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,028 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,028 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,028 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,029 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,029 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,029 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,029 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,029 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,029 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,029 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,029 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,029 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,029 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,029 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,029 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,029 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,030 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,030 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,030 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,030 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,030 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,030 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,030 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,030 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,030 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,030 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,030 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,030 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,030 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,031 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,031 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,031 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,031 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,031 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,031 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,031 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,031 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,031 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,031 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,031 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,032 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,032 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,032 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,032 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,032 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,032 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,032 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,032 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,032 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,032 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,033 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,033 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,033 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,033 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,033 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,033 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,033 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,033 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,033 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,033 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,033 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,033 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,034 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,034 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,034 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,034 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,034 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,034 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,034 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,034 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,034 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,034 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,034 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,034 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,034 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,034 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,035 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,035 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,035 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,035 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,035 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,035 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,035 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,035 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,035 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,035 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,035 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,035 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,035 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,035 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,035 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,035 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,035 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,036 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,036 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,036 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,036 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,036 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,036 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,036 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,036 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,036 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,036 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,036 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,036 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,036 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,036 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,037 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,037 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,037 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,037 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,037 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,037 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,037 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,037 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,037 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,037 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,037 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,037 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,037 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,037 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,037 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,038 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,038 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,038 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,038 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,038 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,038 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,038 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,038 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,038 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,038 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,038 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,038 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,038 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,038 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,038 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,038 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,039 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,039 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,039 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,039 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,039 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,039 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,039 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,039 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,039 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,039 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,039 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,039 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,039 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,039 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,039 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,039 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,040 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,040 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,040 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,040 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,040 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,040 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,040 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,040 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,040 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,040 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,040 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,040 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,040 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,040 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,040 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,040 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,040 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,041 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,041 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,041 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,041 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,041 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,041 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,041 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,041 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,041 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,041 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,042 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,042 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,042 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,042 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,042 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,042 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,042 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,042 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,043 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,043 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,043 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,043 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,043 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,043 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,043 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,043 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,043 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,043 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,043 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,043 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,043 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,043 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,043 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,044 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,044 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,044 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,044 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,044 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,044 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,044 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,044 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,044 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,044 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,044 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,044 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,044 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,044 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,044 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,044 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,045 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,045 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,045 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,045 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,045 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,045 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,045 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,045 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,045 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,045 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,045 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,045 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,045 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,045 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,045 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,045 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,045 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,046 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,046 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,046 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,046 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,046 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,046 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,046 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,046 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,046 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,046 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,046 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,046 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,046 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,046 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,046 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,047 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,047 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,047 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,047 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,047 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,047 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,047 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,047 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,047 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,047 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,047 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,047 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,047 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,047 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,047 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,047 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,047 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,048 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,048 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,048 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,048 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,048 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,048 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,048 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,048 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,048 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,048 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,048 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,048 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,048 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,048 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,048 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,048 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,048 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,048 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,049 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,049 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,049 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,049 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,049 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,049 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,049 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,049 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,049 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,049 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,049 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,049 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,049 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,049 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,049 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,049 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,049 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,050 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,050 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,050 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,050 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,050 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,050 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,050 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,050 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,050 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,050 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,050 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,050 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,050 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,050 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,050 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,050 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,051 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,051 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,051 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,051 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,051 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,051 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,051 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,051 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,051 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,051 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,051 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,051 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,052 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,052 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,052 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,052 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,052 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,052 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,052 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,052 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,052 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,052 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,053 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,053 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,053 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,053 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,053 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,053 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,053 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,053 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,053 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,053 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,053 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,054 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,054 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,054 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,054 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,054 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,054 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,054 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,054 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,054 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,054 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,054 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,054 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,054 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,054 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,055 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,055 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,055 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,055 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,055 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,055 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,055 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,055 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,055 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,055 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,055 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,055 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,055 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,055 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,055 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,055 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,055 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,056 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,056 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,056 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,056 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,056 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,056 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,056 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,056 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,056 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,056 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,056 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,056 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,056 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,056 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,056 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,056 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,056 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,057 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,057 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,057 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,057 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,057 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,057 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,057 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,057 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,057 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,057 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,057 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,057 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,057 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,057 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,057 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,057 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,057 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,057 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,057 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,058 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,058 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,058 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,058 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,058 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,058 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,058 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,058 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,058 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,058 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,058 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,058 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,058 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,058 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,058 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,058 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,058 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,059 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,059 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,059 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,059 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,059 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,059 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,059 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,059 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,059 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,059 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,059 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,059 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,059 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,059 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,059 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,059 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,059 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,059 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,059 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,060 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,060 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,060 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,060 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,060 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,060 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,060 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,060 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,060 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,060 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,060 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,060 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,060 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,060 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,060 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,060 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,060 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,061 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,061 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,061 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,061 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,061 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,061 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,061 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,061 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,061 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,061 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,061 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,061 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,061 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,061 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,061 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,061 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,061 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,061 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,062 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,062 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,062 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,062 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,062 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,062 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,062 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,062 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,062 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,062 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,062 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,062 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,062 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,062 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,062 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,062 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,063 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,063 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,063 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,063 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,063 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,063 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,063 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,063 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,063 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,063 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,063 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,063 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,063 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,063 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,063 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,063 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,064 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,064 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,064 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,064 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,064 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,064 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,064 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,064 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,064 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,064 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,064 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,064 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,064 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,064 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,064 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,064 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,064 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,064 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,065 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,065 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,065 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,065 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,065 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,065 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,065 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,065 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,065 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,065 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,065 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,065 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,065 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,065 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,065 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,065 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,065 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,066 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,066 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,066 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,066 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,066 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,066 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,066 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,066 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,066 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,066 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,066 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,066 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,066 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,066 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,066 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,066 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,066 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,067 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,067 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,067 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,067 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,067 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,067 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,067 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,067 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,067 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,067 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,067 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,067 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,067 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,067 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,067 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,067 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,067 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,067 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,068 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,068 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,068 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,068 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,068 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,068 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,068 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,068 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,068 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,068 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,068 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,068 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,068 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,068 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,068 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,068 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,068 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,068 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,068 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,069 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,069 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,069 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,069 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,069 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,069 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,069 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,069 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,069 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,069 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,069 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,069 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,069 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,069 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,069 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,069 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,069 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,069 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,070 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,070 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,070 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,070 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,070 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,070 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,070 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,070 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,070 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,070 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,070 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,070 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,070 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,070 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,070 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,070 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,070 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,071 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,071 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,071 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,071 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,071 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,071 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,071 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,071 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,071 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,071 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,071 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,071 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,071 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,071 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,071 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,071 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,071 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,071 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,071 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,072 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,072 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,072 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,072 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,072 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,072 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,072 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,072 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,072 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,072 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,072 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,072 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,072 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,072 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,072 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,072 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,073 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,073 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,073 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,073 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,073 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,073 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,073 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,073 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,073 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,073 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,073 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,073 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,073 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,073 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,073 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,073 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,073 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,073 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,074 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,074 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,074 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,074 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,074 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,074 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,074 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,074 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,074 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,074 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,074 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,074 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,074 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,074 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,074 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,074 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,074 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,074 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,075 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,075 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,075 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,075 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,075 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,075 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,075 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,075 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,075 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,075 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,075 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,075 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,075 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,075 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,075 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,075 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,075 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,075 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,076 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,076 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,076 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,076 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,076 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,076 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,076 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,076 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,076 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,076 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,076 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,076 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,076 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,076 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,076 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,076 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,076 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,077 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,077 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,077 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,077 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,077 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,077 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,077 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,077 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,077 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,077 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,077 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,077 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,077 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,077 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,077 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,077 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,077 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,078 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,078 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,078 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,078 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,078 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,078 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,078 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,078 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,078 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,078 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,078 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,078 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,078 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,078 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,078 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,078 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,078 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,079 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,079 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,079 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,079 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,079 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,079 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,079 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,079 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,079 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,079 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,079 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,079 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,079 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,079 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,079 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,079 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,079 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,080 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,080 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,080 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,080 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,080 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,080 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,080 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,080 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,080 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,080 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,080 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,080 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,080 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,080 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,080 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,080 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,081 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,081 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,081 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,081 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,081 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,081 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,081 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,081 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,081 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,081 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,081 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,081 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,081 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,081 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,081 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,081 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,081 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,082 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,082 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,082 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,082 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,082 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,082 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,082 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,082 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,082 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,082 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,082 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,082 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,082 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,082 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,082 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,082 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,083 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,083 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,083 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,083 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,083 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,083 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,083 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,083 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,083 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,083 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,083 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,083 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,083 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,083 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,083 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,083 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,083 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,084 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,084 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,084 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,084 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,084 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,084 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,084 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,084 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,084 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,084 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,084 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,084 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,084 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,084 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,084 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,084 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,084 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,084 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,084 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,085 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,085 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,085 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,085 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,085 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,085 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,085 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,085 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,085 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,085 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,085 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,085 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,085 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,085 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,085 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,085 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,085 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,086 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,086 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,086 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,086 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,086 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,086 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,086 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,086 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,086 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,086 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,086 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,086 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,086 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,086 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,086 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,086 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,086 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,087 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,087 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,087 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,087 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,087 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,087 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,087 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,087 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,087 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,087 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,087 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,087 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,087 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,087 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,087 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,087 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,087 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,087 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,088 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,088 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,088 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,088 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,088 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,088 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,088 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,088 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,088 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,088 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,088 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,088 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,088 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,088 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,088 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,088 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,088 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,089 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,089 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,089 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,089 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,089 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,089 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,089 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,089 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,089 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,089 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,089 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,089 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,089 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,089 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,089 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,089 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,089 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,089 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,090 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,090 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,090 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,090 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,090 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,090 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,090 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,090 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,090 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,090 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,090 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,090 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,090 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,090 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,090 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,091 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,091 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,091 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,091 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,091 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,091 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,091 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,091 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,091 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,091 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,091 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,091 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,091 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,091 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,091 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,091 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,091 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,091 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,092 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,092 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,092 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,092 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,092 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,092 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,092 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,092 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,092 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,092 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,092 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,092 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,092 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,092 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,092 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,092 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,092 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,093 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,093 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,093 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,093 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,093 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,093 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,093 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,093 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,093 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,093 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,093 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,093 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,093 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,093 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,093 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,093 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,093 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,094 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,094 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,094 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,094 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,094 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,094 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,094 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,094 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,094 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,094 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,094 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,094 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,094 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,094 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,094 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,094 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,094 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,095 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,095 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,095 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,095 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,095 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,095 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,095 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,095 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,095 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,095 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,095 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,095 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,095 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,095 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,095 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,095 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,095 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,095 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,095 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,096 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,096 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,096 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,096 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,096 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,096 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,096 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,096 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,096 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,096 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,096 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,096 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,096 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,096 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,096 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,096 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,096 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,097 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,097 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,097 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,097 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,097 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,097 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,097 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,097 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,097 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,097 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,097 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,097 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,097 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,097 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,097 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,097 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,097 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,097 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,097 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,098 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,098 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,098 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,098 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,098 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,098 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,098 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,098 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,098 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,098 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,098 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,098 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,098 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,098 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,098 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,098 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,098 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,099 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,099 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,099 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,099 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,099 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,099 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,099 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,099 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,099 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,099 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,099 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,099 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,099 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,099 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,099 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,099 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,099 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,099 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,100 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,100 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,100 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,100 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,100 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,100 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,100 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,100 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,100 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,100 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,100 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,100 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,100 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,100 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,100 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,100 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,100 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,100 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,101 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,101 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,101 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,101 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,101 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,101 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,101 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,101 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,101 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,101 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,101 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,101 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,101 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,101 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,101 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,101 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,101 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,101 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,102 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,102 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,102 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,102 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,102 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,102 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,102 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,102 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,102 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,102 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,102 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,102 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,102 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,102 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,103 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,103 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,103 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,103 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,103 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,103 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,103 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,103 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,103 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,103 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,103 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,103 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,103 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,103 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,103 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,103 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,104 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,104 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,104 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,104 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,104 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,104 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,104 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,104 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,104 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,104 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,104 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,104 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,104 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,104 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,104 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,104 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,105 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,105 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,105 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,105 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,105 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,105 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,105 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,105 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,105 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,105 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,105 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,105 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,105 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,105 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,105 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,105 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,105 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,105 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,106 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,106 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,106 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,106 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,106 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,106 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,106 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,106 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,106 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,106 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,106 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,106 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,106 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,106 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,106 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,106 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,107 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,107 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,107 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,107 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,107 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,107 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,107 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,107 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,107 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,107 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,107 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,107 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,107 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,107 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,107 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,107 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,108 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,108 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,108 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,108 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,108 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,108 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,108 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,108 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,108 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,108 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,108 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,108 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,108 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,108 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,108 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,108 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,108 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,109 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,109 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,109 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,109 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,109 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,109 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,109 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,109 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,109 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,109 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,109 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,109 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,109 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,109 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,109 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,109 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,109 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,109 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,110 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,110 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,110 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,110 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,110 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,110 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,110 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,110 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,110 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,110 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,110 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,110 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,110 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,110 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,110 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,110 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,110 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,110 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,111 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,111 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,111 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,111 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,111 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,111 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,111 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,111 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,111 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,111 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,111 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,111 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,111 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,111 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,111 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,111 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,111 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,111 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,112 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,112 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,112 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,112 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,112 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,112 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,112 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,112 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,112 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,112 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,112 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,112 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,112 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,112 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,112 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,112 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,113 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,113 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,113 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,113 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,113 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,113 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,113 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,113 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,113 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,113 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,113 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,113 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,113 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,113 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,113 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,114 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,114 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,114 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,114 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,114 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,114 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,114 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,114 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,114 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,114 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,114 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,114 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,114 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,114 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,114 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,114 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,114 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,115 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,115 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,115 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,115 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,115 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,115 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,115 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,115 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,115 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,115 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,115 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,115 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,115 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,115 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,115 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,115 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,115 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,115 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,116 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,116 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,116 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,116 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,116 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,116 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,116 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,116 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,116 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,116 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,116 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,116 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,116 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,116 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,116 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,116 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,116 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,117 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,117 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,117 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,117 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,117 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,117 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,117 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,117 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,117 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,117 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,117 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,117 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,117 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,117 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,117 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,117 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,117 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,118 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,118 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,118 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,118 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,118 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,118 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,118 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,118 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,118 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,118 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,118 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,118 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,118 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,118 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,118 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,118 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,118 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,119 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,119 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,119 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,119 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,119 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,119 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,119 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,119 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,119 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,119 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,119 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,119 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,119 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,119 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,119 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,119 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,119 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,120 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,120 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,120 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,120 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,120 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,120 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,120 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,120 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,120 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,120 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,120 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,120 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,120 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,120 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,120 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,120 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,120 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,120 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,121 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,121 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,121 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,121 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,121 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,121 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,121 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,121 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,121 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,121 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,121 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,121 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,121 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,121 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,121 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,121 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,121 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,121 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,122 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,122 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,122 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,122 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,122 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,122 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,122 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,122 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,122 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,122 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,122 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,122 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,122 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,122 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,122 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,123 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,123 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,123 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,123 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,123 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,123 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,123 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,123 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,123 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,123 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,123 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,123 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,123 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,123 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,123 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,123 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,124 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,124 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,124 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,124 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,124 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,124 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,124 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,124 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,124 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,124 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,124 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,124 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,124 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,124 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,124 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,124 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,125 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,125 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,125 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,125 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,125 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,125 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,125 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,125 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,125 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,125 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,125 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,125 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,125 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,125 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,125 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,125 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,125 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,125 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,126 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,126 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,126 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,126 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,126 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,126 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,126 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,126 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,126 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,126 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,126 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,126 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,126 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,126 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,126 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,126 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,126 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,127 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,127 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,127 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,127 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,127 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,127 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,127 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,127 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,127 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,127 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,127 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,127 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,127 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,127 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,127 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,127 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,127 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,127 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,128 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,128 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,128 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,128 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,128 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,128 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,128 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,128 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,128 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,128 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,128 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,128 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,128 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,128 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,128 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,128 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,128 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,129 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,129 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,129 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,129 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,129 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,129 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,129 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,129 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,129 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,129 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,129 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,129 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,129 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,129 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,129 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,129 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,129 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,130 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,130 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,130 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,130 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,130 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,130 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,130 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,130 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,130 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,130 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,130 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,130 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,130 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,130 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,130 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,130 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,130 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,131 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,131 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,131 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,131 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,131 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,131 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,131 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,131 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,131 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,131 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,131 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,131 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,131 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,131 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,131 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,131 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,131 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,132 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,132 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,132 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,132 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,132 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,132 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,132 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,132 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,132 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,132 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,132 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,132 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,133 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,133 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,133 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,133 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,133 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,133 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,133 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,133 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,133 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,133 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,133 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,133 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,133 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,133 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,133 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,133 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,133 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,134 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,134 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,134 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,134 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,134 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,134 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,134 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,134 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,134 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,134 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,134 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,134 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,134 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,134 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,134 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,134 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,135 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,135 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,135 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,135 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,135 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,135 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,135 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,135 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,135 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,135 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,135 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,135 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,135 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,135 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,135 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,135 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,135 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,136 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,136 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,136 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,136 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,136 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,136 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,136 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,136 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,136 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,136 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,136 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,136 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,136 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,136 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,136 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,136 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,136 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,137 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,137 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,137 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,137 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,137 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,137 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,137 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,137 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,137 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,137 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,137 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,137 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,137 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,137 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,137 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,137 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,137 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,137 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,138 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,138 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,138 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,138 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,138 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,138 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,138 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,138 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,138 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,138 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,138 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,138 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,138 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,138 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,138 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,138 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,138 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,138 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,139 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,139 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,139 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,139 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,139 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,139 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,139 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,139 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,139 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,139 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,139 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,139 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,139 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,139 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,139 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,139 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,139 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,140 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,140 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,140 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,140 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,140 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,140 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,140 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,140 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,140 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,140 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,140 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,140 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,140 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,140 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,140 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,140 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,140 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,140 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,140 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,141 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,141 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,141 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,141 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,141 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,141 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,141 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,141 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,141 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,141 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,141 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,141 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,141 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,141 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,141 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,141 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,141 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,141 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,142 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,142 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,142 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,142 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,142 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,142 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,142 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,142 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,142 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,142 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,142 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,142 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,142 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,142 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,142 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,142 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,143 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,143 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,143 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,143 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,143 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,143 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,143 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,143 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,143 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,143 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,143 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,143 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,143 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,143 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,143 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,143 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,143 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,143 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,144 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,144 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,144 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,144 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,144 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,144 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,144 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,144 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,144 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,144 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,144 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,144 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,144 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,144 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,144 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,144 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,144 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,145 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,145 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,145 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,145 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,145 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,145 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,145 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,145 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,145 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,145 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,145 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,145 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,145 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,145 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,145 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,145 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,145 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,145 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,145 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,146 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,146 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,146 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,146 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,146 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,146 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,146 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,146 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,146 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,146 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,146 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,146 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,146 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,146 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,146 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,146 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,146 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,146 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,147 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,147 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,147 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,147 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,147 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,147 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,147 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,147 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,147 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,147 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,147 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,147 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,147 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,147 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,147 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,147 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,147 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,147 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,147 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,148 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,148 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,148 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,148 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,148 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,148 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,148 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,148 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,148 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,148 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,148 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,148 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,148 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,148 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,148 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,148 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,148 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,148 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,149 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,149 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,149 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,149 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,149 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,149 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,149 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,149 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,149 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,149 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,149 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,149 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,149 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,149 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,149 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,149 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,149 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,150 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,150 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,150 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,150 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,150 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,150 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,150 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,150 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,150 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,150 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,150 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,150 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,150 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,150 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,150 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,150 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,150 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,151 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,151 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,151 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,151 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,151 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,151 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,151 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,151 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,151 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,151 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,151 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,151 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,151 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,151 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,151 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,151 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,151 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,151 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,151 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,152 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,152 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,152 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,152 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,152 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,152 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,152 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,152 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,152 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,152 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,152 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,152 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,152 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,152 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,152 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,152 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,152 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,153 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,153 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,153 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,153 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,153 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,153 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,153 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,153 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,153 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,153 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,153 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,153 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,153 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,153 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,153 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,153 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,153 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,153 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,154 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,154 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,154 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,154 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,154 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,154 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,154 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,154 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,154 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,154 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,154 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,154 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,154 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,154 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,154 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,154 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,154 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,155 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,155 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,155 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,155 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,155 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,155 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,155 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,155 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,155 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,155 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,155 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,155 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,155 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,155 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,155 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,155 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,155 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,155 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,155 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,156 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,156 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,156 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,156 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,156 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,156 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,156 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,156 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,156 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,156 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,156 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,156 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,156 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,156 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,156 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,156 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,156 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,157 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,157 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,157 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,157 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,157 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,157 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,157 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,157 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,157 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,157 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,157 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,157 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,157 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,157 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,157 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,157 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,157 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,157 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,158 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,158 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,158 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,158 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,158 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,158 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,158 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,158 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,158 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,158 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,158 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,158 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,158 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,158 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,158 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,158 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,159 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,159 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,159 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,159 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,159 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,159 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,159 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,159 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,159 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,159 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,159 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,159 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,159 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,159 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,159 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,160 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,160 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,160 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,160 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,160 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,160 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,160 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,160 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,160 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,160 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,160 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,160 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,160 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,160 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,160 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,160 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,160 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,160 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,161 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,161 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,161 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,161 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,161 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,161 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,161 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,161 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,161 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,161 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,161 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,161 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,161 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,161 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,161 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,161 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,161 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,162 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,162 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,162 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,162 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,162 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,162 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,162 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,162 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,162 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,162 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,162 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,162 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,162 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,162 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,162 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,162 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,163 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,163 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,163 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,163 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,163 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,163 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,163 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,163 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,163 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,163 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,163 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,163 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,163 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,163 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,163 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,163 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,163 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,164 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,164 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,164 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,164 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,164 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,164 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,164 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,164 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,164 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,164 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,164 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,164 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,164 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,164 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,164 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,164 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,165 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,165 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,165 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,165 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,165 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,165 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,165 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,165 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,165 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,165 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,165 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,165 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,165 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,165 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,165 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,165 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,165 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,165 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,166 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,166 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,166 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,166 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,166 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,166 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,166 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,166 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,166 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,166 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,166 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,166 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,166 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,166 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,166 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,166 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,166 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,166 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,167 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,167 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,167 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,167 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,167 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,167 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,167 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,167 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,167 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,167 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,167 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,167 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,167 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,167 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,167 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,167 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,167 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,167 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,167 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,168 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,168 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,168 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,168 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,168 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,168 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,168 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,168 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,168 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,168 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,168 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,168 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,168 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,168 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,168 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,168 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,168 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,169 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,169 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,169 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,169 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,169 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,169 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,169 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,169 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,169 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,169 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,169 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,169 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,169 DEBUG SenderThread:266733 [sender.py:send():235] send: metric +2022-03-02 23:41:57,169 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-02 23:41:57,258 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-02 23:41:57,344 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:41:57,535 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-02 23:41:58,396 DEBUG SenderThread:266733 [sender.py:send():235] send: stats +2022-03-02 23:41:58,536 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:41:58,959 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:41:58,960 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:41:59,603 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:42:00,603 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/config.yaml +2022-03-02 23:42:00,603 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:42:04,605 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:42:06,605 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:42:10,607 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:42:12,608 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:42:14,178 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:42:14,178 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:42:14,609 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:42:16,610 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:42:18,482 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-02 23:42:18,534 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-02 23:42:18,619 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:42:19,618 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-02 23:42:19,619 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:42:20,619 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:42:22,619 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:42:24,620 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:42:28,621 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:42:28,910 DEBUG SenderThread:266733 [sender.py:send():235] send: stats +2022-03-02 23:42:29,319 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:42:29,319 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:42:30,622 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:42:32,623 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:42:36,624 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:42:38,625 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:42:40,153 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-02 23:42:40,206 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-02 23:42:40,322 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:42:40,626 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-02 23:42:40,626 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:42:42,626 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:42:44,478 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:42:44,479 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:42:44,627 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:42:46,628 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:42:49,629 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:42:53,631 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:42:55,631 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:42:57,632 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:42:59,297 DEBUG SenderThread:266733 [sender.py:send():235] send: stats +2022-03-02 23:42:59,591 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:42:59,592 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:42:59,633 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:43:01,431 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-02 23:43:01,494 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-02 23:43:01,578 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:43:01,633 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-02 23:43:03,634 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:43:04,635 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:43:05,635 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:43:07,636 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:43:11,637 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:43:13,638 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:43:14,875 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:43:14,876 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:43:15,639 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:43:19,640 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:43:21,641 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:43:22,781 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-02 23:43:22,842 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-02 23:43:22,931 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:43:23,642 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-02 23:43:23,642 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:43:24,642 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:43:26,643 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:43:29,741 DEBUG SenderThread:266733 [sender.py:send():235] send: stats +2022-03-02 23:43:30,027 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:43:30,027 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:43:30,644 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:43:32,645 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:43:34,646 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:43:36,647 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:43:40,648 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:43:42,649 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:43:43,817 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-02 23:43:43,871 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-02 23:43:43,971 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:43:44,650 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-02 23:43:44,650 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:43:45,093 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:43:45,094 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:43:48,651 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:43:50,652 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:43:52,653 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:43:57,654 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:43:59,655 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:44:00,186 DEBUG SenderThread:266733 [sender.py:send():235] send: stats +2022-03-02 23:44:00,284 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:44:00,284 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:44:01,656 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:44:03,657 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:44:04,893 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-02 23:44:04,947 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-02 23:44:05,032 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:44:05,657 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-02 23:44:06,658 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:44:07,658 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:44:09,659 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:44:11,660 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:44:13,661 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:44:15,520 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:44:15,521 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:44:17,662 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:44:19,663 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:44:21,663 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:44:23,664 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:44:25,691 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-02 23:44:25,743 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-02 23:44:25,828 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:44:26,665 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-02 23:44:26,665 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:44:27,665 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:44:28,666 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:44:30,561 DEBUG SenderThread:266733 [sender.py:send():235] send: stats +2022-03-02 23:44:30,666 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:44:30,965 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:44:30,966 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:44:32,667 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:44:34,668 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:44:36,668 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:44:40,670 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:44:42,671 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:44:44,671 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:44:46,264 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-02 23:44:46,321 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-02 23:44:46,409 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:44:46,417 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:44:46,418 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:44:46,672 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-02 23:44:46,673 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:44:47,673 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:44:50,674 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:44:52,675 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:44:54,675 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:44:58,677 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:45:00,678 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:45:01,011 DEBUG SenderThread:266733 [sender.py:send():235] send: stats +2022-03-02 23:45:01,698 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:45:01,699 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:45:02,678 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:45:04,679 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:45:06,716 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-02 23:45:06,801 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-02 23:45:06,894 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:45:07,680 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-02 23:45:07,680 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:45:09,681 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:45:11,681 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:45:13,682 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:45:15,683 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:45:16,851 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:45:16,852 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:45:19,685 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:45:21,685 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:45:23,686 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:45:25,687 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:45:27,188 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-02 23:45:27,242 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-02 23:45:27,353 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:45:27,687 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-02 23:45:27,687 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:45:29,688 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:45:31,438 DEBUG SenderThread:266733 [sender.py:send():235] send: stats +2022-03-02 23:45:31,689 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:45:32,033 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:45:32,034 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:45:33,689 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:45:37,691 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:45:39,691 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:45:41,692 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:45:43,693 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:45:45,694 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:45:47,075 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:45:47,076 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:45:47,609 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-02 23:45:47,684 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-02 23:45:47,770 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:45:48,768 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-02 23:45:48,769 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:45:49,769 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:45:51,769 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:45:53,770 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:45:55,771 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:46:00,773 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:46:01,990 DEBUG SenderThread:266733 [sender.py:send():235] send: stats +2022-03-02 23:46:02,172 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:46:02,172 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:46:02,773 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:46:04,774 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:46:06,775 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:46:08,407 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-02 23:46:08,578 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-02 23:46:08,667 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:46:08,776 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-02 23:46:10,776 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:46:12,777 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:46:14,778 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:46:16,778 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:46:17,221 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:46:17,222 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:46:20,780 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:46:22,780 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:46:24,781 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:46:26,782 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:46:28,299 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-02 23:46:28,354 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-02 23:46:28,449 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:46:28,782 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-02 23:46:28,783 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:46:30,783 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:46:32,270 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:46:32,271 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:46:32,547 DEBUG SenderThread:266733 [sender.py:send():235] send: stats +2022-03-02 23:46:33,784 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:46:35,785 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:46:37,786 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:46:39,787 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:46:41,787 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:46:45,789 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:46:47,320 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:46:47,320 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:46:47,789 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:46:48,060 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-02 23:46:48,113 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-02 23:46:48,195 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:46:48,790 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-02 23:46:49,790 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:46:50,791 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:46:51,791 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:46:53,792 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:46:57,793 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:46:59,794 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:47:01,794 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:47:02,371 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:47:02,371 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:47:02,933 DEBUG SenderThread:266733 [sender.py:send():235] send: stats +2022-03-02 23:47:03,795 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:47:06,796 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:47:07,800 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-02 23:47:07,852 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-02 23:47:07,937 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:47:08,827 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-02 23:47:08,828 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:47:10,828 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:47:12,829 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:47:14,829 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:47:16,830 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:47:17,425 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:47:17,425 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:47:18,831 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:47:20,831 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:47:24,833 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:47:26,834 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:47:27,323 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-02 23:47:27,377 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-02 23:47:27,489 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:47:27,834 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-02 23:47:28,835 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:47:29,835 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:47:30,835 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:47:32,597 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:47:32,606 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:47:32,836 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:47:33,375 DEBUG SenderThread:266733 [sender.py:send():235] send: stats +2022-03-02 23:47:36,837 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:47:38,838 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:47:40,839 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:47:42,840 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:47:45,841 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:47:46,709 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-02 23:47:46,784 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-02 23:47:46,883 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:47:47,866 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:47:47,867 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:47:47,882 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-02 23:47:47,882 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:47:51,884 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:47:53,884 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:47:55,885 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:47:57,886 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:47:59,886 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:48:01,887 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:48:02,926 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:48:02,927 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:48:03,778 DEBUG SenderThread:266733 [sender.py:send():235] send: stats +2022-03-02 23:48:05,889 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:48:05,954 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-02 23:48:06,024 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-02 23:48:06,116 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:48:06,889 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-02 23:48:07,889 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:48:09,890 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:48:11,891 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:48:13,891 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:48:16,893 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:48:18,066 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:48:18,066 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:48:18,893 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:48:22,895 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:48:24,895 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:48:24,983 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-02 23:48:25,069 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-02 23:48:25,161 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:48:25,896 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-02 23:48:25,896 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:48:26,896 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:48:28,897 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:48:32,898 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:48:33,230 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:48:33,231 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:48:34,208 DEBUG SenderThread:266733 [sender.py:send():235] send: stats +2022-03-02 23:48:34,899 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:48:36,900 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:48:38,900 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:48:40,901 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:48:42,902 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:48:43,774 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-02 23:48:43,830 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-02 23:48:43,916 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:48:44,914 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-02 23:48:44,915 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:48:46,915 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:48:48,434 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:48:48,435 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:48:48,916 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:48:50,916 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:48:55,918 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:48:57,919 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:48:59,920 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:49:01,920 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:49:02,085 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-02 23:49:02,139 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-02 23:49:02,230 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:49:02,921 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-02 23:49:03,633 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:49:03,634 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:49:03,921 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:49:04,625 DEBUG SenderThread:266733 [sender.py:send():235] send: stats +2022-03-02 23:49:04,922 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:49:05,922 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:49:07,923 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:49:11,924 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:49:13,925 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:49:15,926 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:49:17,927 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:49:18,824 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:49:18,824 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:49:19,928 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:49:20,172 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-02 23:49:20,225 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-02 23:49:20,313 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:49:20,928 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-02 23:49:20,928 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:49:21,929 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:49:23,929 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:49:25,930 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:49:27,931 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:49:29,931 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:49:33,933 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:49:34,137 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:49:34,137 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:49:35,015 DEBUG SenderThread:266733 [sender.py:send():235] send: stats +2022-03-02 23:49:35,934 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:49:37,839 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-02 23:49:37,922 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-02 23:49:38,007 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:49:39,006 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-02 23:49:39,006 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:49:43,007 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:49:45,008 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:49:47,009 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:49:49,009 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:49:49,420 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:49:49,421 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:49:51,010 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:49:53,011 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:49:55,011 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:49:55,309 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-02 23:49:55,363 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-02 23:49:55,453 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:49:56,012 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-02 23:49:57,012 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:49:58,012 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:49:59,013 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:50:01,014 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:50:03,014 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:50:04,628 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:50:04,629 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:50:05,432 DEBUG SenderThread:266733 [sender.py:send():235] send: stats +2022-03-02 23:50:07,016 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:50:11,017 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:50:12,901 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-02 23:50:13,040 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:50:13,076 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-02 23:50:13,166 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:50:14,041 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-02 23:50:14,041 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:50:16,041 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:50:18,042 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:50:19,922 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:50:19,922 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:50:20,043 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:50:22,044 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:50:24,044 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:50:26,045 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:50:28,046 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:50:29,167 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-02 23:50:29,245 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-02 23:50:29,351 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:50:30,046 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-02 23:50:30,047 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:50:32,047 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:50:33,047 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:50:35,004 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:50:35,005 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:50:35,049 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:50:35,863 DEBUG SenderThread:266733 [sender.py:send():235] send: stats +2022-03-02 23:50:37,050 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:50:39,051 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:50:41,052 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:50:43,053 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:50:44,688 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-02 23:50:44,742 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-02 23:50:44,833 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:50:45,054 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-02 23:50:45,054 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:50:46,054 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:50:47,054 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:50:49,055 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:50:50,050 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:50:50,051 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:50:51,056 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:50:53,057 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:50:55,058 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:50:57,058 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:50:59,059 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:50:59,338 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-02 23:50:59,390 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-02 23:50:59,479 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:51:00,059 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-02 23:51:00,060 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:51:01,060 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:51:03,060 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:51:05,061 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:51:05,118 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:51:05,119 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:51:06,294 DEBUG SenderThread:266733 [sender.py:send():235] send: stats +2022-03-02 23:51:07,062 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:51:09,063 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:51:11,064 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:51:13,062 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-02 23:51:13,116 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-02 23:51:13,126 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:51:13,200 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:51:14,117 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-02 23:51:14,117 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:51:15,117 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:51:17,118 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:51:19,119 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:51:20,253 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:51:20,254 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:51:21,120 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:51:23,120 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:51:25,121 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:51:25,820 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-02 23:51:25,874 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-02 23:51:25,979 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:51:26,121 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-02 23:51:27,122 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:51:29,122 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:51:31,123 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:51:33,124 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:51:35,124 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:51:35,305 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:51:35,306 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:51:36,741 DEBUG SenderThread:266733 [sender.py:send():235] send: stats +2022-03-02 23:51:37,125 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:51:37,491 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-02 23:51:37,545 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-02 23:51:37,630 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:51:38,125 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-02 23:51:39,126 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:51:41,127 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:51:43,127 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:51:45,128 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:51:47,129 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:51:47,877 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-02 23:51:47,929 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-02 23:51:48,017 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:51:48,130 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-02 23:51:49,130 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:51:50,382 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:51:50,383 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:51:52,131 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:51:54,132 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:51:56,133 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:51:57,250 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-02 23:51:57,306 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-02 23:51:57,392 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:51:58,133 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-02 23:51:58,133 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:51:59,134 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:52:00,134 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:52:02,135 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:52:04,136 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:52:05,488 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:52:05,490 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:52:05,609 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-02 23:52:05,665 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-02 23:52:05,754 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:52:06,136 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-02 23:52:06,136 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:52:07,137 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:52:07,191 DEBUG SenderThread:266733 [sender.py:send():235] send: stats +2022-03-02 23:52:08,137 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:52:10,138 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:52:12,138 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:52:12,867 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-02 23:52:12,923 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-02 23:52:13,018 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:52:13,139 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-02 23:52:14,139 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:52:15,139 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:52:16,140 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:52:18,140 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:52:19,195 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-02 23:52:19,250 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-02 23:52:19,335 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:52:20,141 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-02 23:52:20,141 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:52:20,535 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:52:20,536 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:52:21,141 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:52:22,142 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:52:24,143 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:52:24,921 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-02 23:52:25,098 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-02 23:52:25,235 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:52:26,177 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-02 23:52:26,178 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:52:27,178 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:52:30,179 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:52:32,180 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:52:35,641 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:52:35,642 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:52:36,181 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:52:37,659 DEBUG SenderThread:266733 [sender.py:send():235] send: stats +2022-03-02 23:52:38,182 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:52:42,183 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:52:46,185 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:52:48,185 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:52:49,567 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-02 23:52:49,623 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-02 23:52:49,706 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:52:50,186 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-02 23:52:50,186 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:52:50,848 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:52:50,849 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:52:51,186 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:52:54,188 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:52:56,188 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:53:01,190 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:53:03,191 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:53:05,954 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:53:05,955 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:53:07,192 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:53:08,066 DEBUG SenderThread:266733 [sender.py:send():235] send: stats +2022-03-02 23:53:09,193 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:53:11,193 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:53:13,623 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-02 23:53:13,678 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-02 23:53:13,767 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:53:14,195 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-02 23:53:15,195 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:53:16,196 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:53:17,196 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:53:21,065 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:53:21,066 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:53:21,197 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:53:23,198 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:53:27,200 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:53:29,201 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:53:34,202 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:53:36,127 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:53:36,127 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:53:36,203 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:53:37,185 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-02 23:53:37,240 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-02 23:53:37,334 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:53:38,241 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-02 23:53:38,241 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:53:38,482 DEBUG SenderThread:266733 [sender.py:send():235] send: stats +2022-03-02 23:53:40,242 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:53:42,243 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:53:44,244 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:53:48,245 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:53:50,246 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:53:51,172 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:53:51,174 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:53:54,247 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:53:56,248 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:53:58,249 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:54:00,518 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-02 23:54:00,571 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-02 23:54:00,652 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:54:01,250 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-02 23:54:02,250 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:54:04,251 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:54:06,221 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:54:06,222 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:54:08,252 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:54:08,883 DEBUG SenderThread:266733 [sender.py:send():235] send: stats +2022-03-02 23:54:11,253 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:54:13,254 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:54:17,255 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:54:19,256 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:54:21,466 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:54:21,466 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:54:23,258 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:54:23,683 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-02 23:54:23,736 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-02 23:54:23,820 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:54:24,258 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-02 23:54:24,259 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:54:25,259 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:54:27,259 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:54:31,261 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:54:33,262 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:54:36,667 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:54:36,669 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:54:37,263 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:54:39,262 DEBUG SenderThread:266733 [sender.py:send():235] send: stats +2022-03-02 23:54:39,264 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:54:44,266 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:54:46,266 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:54:46,689 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-02 23:54:46,744 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-02 23:54:46,834 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:54:47,267 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-02 23:54:48,267 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:54:49,267 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:54:51,722 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:54:51,723 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:54:52,269 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:54:54,269 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:54:56,270 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:55:00,271 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:55:02,272 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:55:06,274 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:55:06,952 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:55:06,952 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:55:08,274 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:55:09,426 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-02 23:55:09,488 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-02 23:55:09,571 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:55:09,650 DEBUG SenderThread:266733 [sender.py:send():235] send: stats +2022-03-02 23:55:10,275 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-02 23:55:10,275 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:55:11,276 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:55:14,277 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:55:16,277 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:55:20,279 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:55:22,046 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:55:22,048 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:55:22,280 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:55:25,281 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:55:29,282 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:55:31,283 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:55:32,285 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-02 23:55:32,338 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-02 23:55:32,429 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:55:33,332 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-02 23:55:33,332 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:55:35,333 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:55:37,122 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:55:37,123 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:55:37,333 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:55:39,334 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:55:40,050 DEBUG SenderThread:266733 [sender.py:send():235] send: stats +2022-03-02 23:55:43,335 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:55:45,336 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:55:49,338 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:55:51,338 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:55:52,242 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:55:52,242 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:55:53,339 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:55:55,038 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-02 23:55:55,092 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-02 23:55:55,178 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:55:55,340 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-02 23:55:56,340 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:55:57,340 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:55:59,341 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:56:03,342 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:56:05,343 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:56:07,344 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:56:07,592 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:56:07,593 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:56:10,461 DEBUG SenderThread:266733 [sender.py:send():235] send: stats +2022-03-02 23:56:12,346 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:56:14,347 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:56:16,347 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:56:17,655 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-02 23:56:17,709 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-02 23:56:17,793 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:56:18,348 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-02 23:56:20,349 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:56:22,349 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:56:22,708 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:56:22,709 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:56:24,350 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:56:28,352 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:56:30,352 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:56:32,354 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:56:36,355 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:56:37,821 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:56:37,822 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:56:38,356 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:56:40,098 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-02 23:56:40,152 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-02 23:56:40,241 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:56:40,357 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-02 23:56:40,883 DEBUG SenderThread:266733 [sender.py:send():235] send: stats +2022-03-02 23:56:41,357 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:56:42,357 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:56:45,359 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:56:47,359 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:56:49,360 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:56:52,990 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:56:52,991 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:56:53,361 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:56:55,362 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:56:57,363 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:57:01,365 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:57:02,436 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-02 23:57:02,490 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-02 23:57:02,581 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:57:03,365 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-02 23:57:03,366 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:57:05,366 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:57:08,227 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:57:08,229 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:57:10,368 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:57:11,314 DEBUG SenderThread:266733 [sender.py:send():235] send: stats +2022-03-02 23:57:12,369 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:57:16,370 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:57:18,371 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:57:20,372 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:57:23,454 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:57:23,454 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:57:24,373 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:57:25,218 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-02 23:57:25,396 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-02 23:57:25,483 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:57:26,393 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-02 23:57:26,394 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:57:27,394 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:57:28,394 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:57:32,395 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:57:34,396 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:57:38,539 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:57:38,539 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:57:39,398 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:57:41,398 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:57:41,706 DEBUG SenderThread:266733 [sender.py:send():235] send: stats +2022-03-02 23:57:43,399 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:57:47,238 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-02 23:57:47,292 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-02 23:57:47,376 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:57:47,401 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-02 23:57:47,401 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:57:49,401 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:57:51,402 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:57:53,403 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:57:53,733 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:57:53,734 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:57:57,405 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:57:59,405 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:58:03,407 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:58:05,407 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:58:07,408 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:58:08,830 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:58:08,831 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:58:09,120 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-02 23:58:09,176 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-02 23:58:09,262 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:58:09,409 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-02 23:58:11,410 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:58:12,133 DEBUG SenderThread:266733 [sender.py:send():235] send: stats +2022-03-02 23:58:13,410 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:58:15,411 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:58:18,412 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:58:22,414 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:58:23,925 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:58:23,927 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:58:24,415 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:58:28,416 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:58:30,417 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:58:30,735 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-02 23:58:30,789 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-02 23:58:30,871 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:58:31,417 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-02 23:58:32,418 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:58:34,418 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:58:38,420 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:58:39,245 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:58:39,247 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:58:40,421 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:58:42,421 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:58:42,532 DEBUG SenderThread:266733 [sender.py:send():235] send: stats +2022-03-02 23:58:44,422 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:58:49,424 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:58:51,425 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:58:52,137 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-02 23:58:52,215 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-02 23:58:52,299 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:58:52,425 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-02 23:58:53,426 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:58:54,409 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:58:54,410 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:58:54,426 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:58:55,426 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:58:57,427 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:58:59,428 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:59:01,428 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:59:05,430 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:59:07,431 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:59:09,431 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:59:09,462 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:59:09,462 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:59:13,004 DEBUG SenderThread:266733 [sender.py:send():235] send: stats +2022-03-02 23:59:13,433 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:59:13,506 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-02 23:59:13,558 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-02 23:59:13,663 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:59:14,433 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-02 23:59:14,434 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:59:15,434 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:59:17,434 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:59:19,435 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:59:23,436 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:59:24,572 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:59:24,574 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:59:25,437 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:59:27,438 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:59:31,439 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:59:33,440 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:59:34,940 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-02 23:59:34,994 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-02 23:59:35,084 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:59:35,441 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-02 23:59:36,442 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:59:39,733 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:59:39,735 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:59:40,443 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:59:42,444 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:59:43,423 DEBUG SenderThread:266733 [sender.py:send():235] send: stats +2022-03-02 23:59:44,445 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:59:48,446 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:59:50,447 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:59:52,448 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:59:54,448 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:59:54,783 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 23:59:54,783 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-02 23:59:56,006 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-02 23:59:56,060 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-02 23:59:56,142 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 23:59:56,449 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-02 23:59:57,450 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-02 23:59:58,450 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:00:00,451 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:00:02,451 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:00:06,453 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:00:08,454 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:00:09,919 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:00:09,920 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:00:10,454 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:00:12,455 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:00:13,822 DEBUG SenderThread:266733 [sender.py:send():235] send: stats +2022-03-03 00:00:16,457 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:00:16,916 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:00:16,969 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:00:17,055 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:00:17,457 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:00:18,457 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:00:19,458 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:00:21,458 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:00:23,459 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:00:25,157 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:00:25,158 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:00:27,461 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:00:29,462 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:00:31,462 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:00:33,463 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:00:37,465 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:00:37,759 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:00:37,833 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:00:37,925 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:00:38,465 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:00:39,466 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:00:40,320 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:00:40,321 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:00:41,466 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:00:44,230 DEBUG SenderThread:266733 [sender.py:send():235] send: stats +2022-03-03 00:00:45,468 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:00:47,468 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:00:49,469 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:00:51,470 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:00:54,471 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:00:55,470 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:00:55,470 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:00:58,333 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:00:58,386 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:00:58,474 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:00:58,474 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:00:58,478 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:00:59,474 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:01:00,474 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:01:02,475 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:01:06,477 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:01:08,477 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:01:10,478 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:01:10,519 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:01:10,520 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:01:12,479 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:01:14,725 DEBUG SenderThread:266733 [sender.py:send():235] send: stats +2022-03-03 00:01:16,481 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:01:18,481 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:01:19,016 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:01:19,071 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:01:19,164 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:01:19,482 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:01:20,482 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:01:22,483 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:01:25,566 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:01:25,568 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:01:26,484 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:01:28,485 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:01:30,485 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:01:32,486 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:01:36,488 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:01:38,488 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:01:39,979 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:01:40,154 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:01:40,244 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:01:40,489 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:01:40,489 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:01:40,628 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:01:40,629 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:01:42,490 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:01:45,128 DEBUG SenderThread:266733 [sender.py:send():235] send: stats +2022-03-03 00:01:45,491 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:01:47,492 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:01:49,492 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:01:51,493 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:01:53,494 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:01:56,039 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:01:56,039 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:01:57,495 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:01:59,496 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:02:00,209 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:02:00,266 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:02:00,359 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:02:00,496 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:02:01,497 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:02:02,497 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:02:05,498 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:02:07,499 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:02:09,499 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:02:11,311 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:02:11,312 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:02:11,500 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:02:13,501 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:02:15,516 DEBUG SenderThread:266733 [sender.py:send():235] send: stats +2022-03-03 00:02:17,502 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:02:19,503 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:02:20,194 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:02:20,250 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:02:20,338 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:02:20,504 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:02:21,504 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:02:22,504 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:02:23,505 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:02:26,394 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:02:26,396 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:02:26,506 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:02:30,507 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:02:32,508 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:02:34,509 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:02:36,509 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:02:38,510 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:02:39,870 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:02:39,924 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:02:40,015 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:02:40,511 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:02:40,511 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:02:41,444 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:02:41,446 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:02:42,511 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:02:44,512 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:02:45,935 DEBUG SenderThread:266733 [sender.py:send():235] send: stats +2022-03-03 00:02:46,513 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:02:48,513 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:02:50,514 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:02:54,515 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:02:56,495 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:02:56,496 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:02:56,516 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:02:58,517 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:02:59,512 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:02:59,566 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:02:59,652 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:03:00,534 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:03:00,534 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:03:01,534 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:03:02,534 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:03:06,536 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:03:08,537 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:03:10,538 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:03:11,613 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:03:11,614 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:03:12,539 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:03:14,539 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:03:16,389 DEBUG SenderThread:266733 [sender.py:send():235] send: stats +2022-03-03 00:03:18,803 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:03:18,859 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:03:18,955 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:03:19,541 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:03:19,542 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:03:21,542 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:03:23,543 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:03:25,543 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:03:26,874 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:03:26,875 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:03:27,544 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:03:29,545 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:03:33,547 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:03:35,547 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:03:37,548 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:03:38,018 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:03:38,074 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:03:38,161 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:03:38,548 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:03:39,549 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:03:42,024 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:03:42,026 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:03:43,550 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:03:45,551 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:03:46,824 DEBUG SenderThread:266733 [sender.py:send():235] send: stats +2022-03-03 00:03:47,552 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:03:49,552 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:03:51,553 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:03:53,554 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:03:55,555 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:03:56,793 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:03:56,848 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:03:56,936 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:03:57,196 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:03:57,197 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:03:57,556 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:03:57,556 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:04:01,557 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:04:03,558 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:04:05,558 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:04:07,559 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:04:09,560 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:04:12,266 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:04:12,267 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:04:12,561 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:04:14,562 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:04:15,621 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:04:15,677 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:04:15,762 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:04:16,563 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:04:17,218 DEBUG SenderThread:266733 [sender.py:send():235] send: stats +2022-03-03 00:04:17,563 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:04:18,563 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:04:20,564 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:04:22,565 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:04:24,566 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:04:26,567 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:04:27,443 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:04:27,445 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:04:28,568 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:04:30,568 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:04:32,569 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:04:33,841 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:04:33,896 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:04:33,990 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:04:34,570 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:04:34,570 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:04:36,571 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:04:41,573 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:04:42,584 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:04:42,585 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:04:43,574 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:04:45,575 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:04:47,610 DEBUG SenderThread:266733 [sender.py:send():235] send: stats +2022-03-03 00:04:49,576 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:04:51,527 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:04:51,583 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:04:51,593 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:04:51,674 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:04:52,583 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:04:52,584 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:04:53,584 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:04:55,584 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:04:57,585 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:04:57,634 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:04:57,636 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:04:59,586 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:05:01,587 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:05:03,587 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:05:05,588 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:05:07,589 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:05:09,026 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:05:09,080 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:05:09,167 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:05:09,590 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:05:10,590 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:05:12,591 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:05:12,710 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:05:12,711 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:05:14,592 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:05:16,593 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:05:18,016 DEBUG SenderThread:266733 [sender.py:send():235] send: stats +2022-03-03 00:05:18,593 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:05:20,594 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:05:22,595 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:05:24,595 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:05:26,074 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:05:26,129 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:05:26,223 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:05:26,596 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:05:26,597 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:05:27,758 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:05:27,760 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:05:28,597 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:05:31,598 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:05:33,599 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:05:37,600 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:05:39,601 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:05:41,602 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:05:42,803 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:05:42,803 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:05:43,269 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:05:43,492 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:05:43,577 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:05:43,602 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:05:43,603 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:05:45,603 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:05:47,604 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:05:48,605 DEBUG SenderThread:266733 [sender.py:send():235] send: stats +2022-03-03 00:05:49,619 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:05:51,620 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:05:53,621 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:05:55,621 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:05:57,622 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:05:57,916 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:05:57,917 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:05:59,267 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:05:59,321 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:05:59,416 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:05:59,623 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:05:59,623 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:06:01,623 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:06:03,624 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:06:05,625 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:06:07,625 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:06:09,626 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:06:11,627 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:06:13,142 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:06:13,144 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:06:13,627 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:06:14,557 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:06:14,613 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:06:14,703 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:06:15,701 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:06:15,702 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:06:17,702 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:06:18,992 DEBUG SenderThread:266733 [sender.py:send():235] send: stats +2022-03-03 00:06:19,703 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:06:21,703 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:06:23,704 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:06:25,705 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:06:27,706 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:06:28,306 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:06:28,308 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:06:28,930 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:06:28,985 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:06:29,070 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:06:29,706 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:06:29,707 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:06:31,707 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:06:33,708 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:06:35,708 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:06:38,709 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:06:40,710 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:06:42,299 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:06:42,355 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:06:42,445 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:06:42,711 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:06:42,711 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:06:43,349 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:06:43,351 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:06:43,711 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:06:44,712 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:06:46,713 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:06:48,713 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:06:49,416 DEBUG SenderThread:266733 [sender.py:send():235] send: stats +2022-03-03 00:06:50,714 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:06:52,715 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:06:54,716 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:06:54,812 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:06:54,868 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:06:54,960 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:06:55,716 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:06:55,717 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:06:56,717 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:06:58,414 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:06:58,415 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:06:58,717 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:07:00,718 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:07:02,719 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:07:04,719 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:07:06,185 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:07:06,240 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:07:06,334 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:07:06,720 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:07:06,720 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:07:07,721 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:07:08,721 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:07:10,722 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:07:12,722 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:07:13,474 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:07:13,475 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:07:14,723 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:07:16,524 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:07:16,578 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:07:16,665 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:07:16,724 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:07:16,724 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:07:17,724 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:07:18,725 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:07:19,832 DEBUG SenderThread:266733 [sender.py:send():235] send: stats +2022-03-03 00:07:20,725 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:07:22,726 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:07:24,727 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:07:25,820 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:07:25,876 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:07:25,966 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:07:26,727 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:07:26,728 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:07:27,728 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:07:28,529 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:07:28,530 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:07:28,728 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:07:30,729 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:07:32,729 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:07:33,827 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:07:33,881 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:07:33,967 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:07:34,730 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:07:34,730 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:07:35,731 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:07:36,731 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:07:38,732 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:07:40,732 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:07:40,910 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:07:40,966 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:07:41,088 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:07:41,733 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:07:41,733 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:07:42,733 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:07:43,576 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:07:43,577 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:07:45,734 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:07:46,950 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:07:47,007 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:07:47,094 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:07:47,735 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:07:47,735 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:07:49,736 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:07:50,357 DEBUG SenderThread:266733 [sender.py:send():235] send: stats +2022-03-03 00:07:51,736 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:07:52,696 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:07:52,881 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:07:52,969 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:07:53,836 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:07:53,837 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:07:54,837 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:07:57,838 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:07:58,636 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:07:58,636 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:08:01,839 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:08:03,840 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:08:07,842 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:08:09,842 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:08:11,843 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:08:13,685 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:08:13,685 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:08:15,844 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:08:17,122 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:08:17,173 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:08:17,256 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:08:17,845 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:08:17,845 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:08:18,846 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:08:20,837 DEBUG SenderThread:266733 [sender.py:send():235] send: stats +2022-03-03 00:08:21,853 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:08:23,854 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:08:28,740 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:08:28,741 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:08:28,855 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:08:30,856 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:08:34,857 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:08:36,858 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:08:38,859 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:08:40,953 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:08:41,008 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:08:41,099 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:08:41,860 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:08:42,861 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:08:43,792 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:08:43,793 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:08:44,861 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:08:48,863 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:08:50,864 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:08:51,251 DEBUG SenderThread:266733 [sender.py:send():235] send: stats +2022-03-03 00:08:55,865 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:08:57,866 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:08:59,140 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:08:59,140 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:08:59,867 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:09:03,868 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:09:04,135 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:09:04,189 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:09:04,272 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:09:04,869 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:09:04,869 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:09:05,869 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:09:07,870 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:09:11,871 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:09:13,872 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:09:14,320 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:09:14,322 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:09:17,873 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:09:19,874 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:09:21,627 DEBUG SenderThread:266733 [sender.py:send():235] send: stats +2022-03-03 00:09:21,875 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:09:25,877 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:09:27,202 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:09:27,255 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:09:27,367 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:09:27,877 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:09:27,878 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:09:29,379 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:09:29,381 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:09:29,878 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:09:32,879 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:09:34,880 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:09:38,882 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:09:40,882 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:09:42,883 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:09:44,698 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:09:44,698 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:09:46,885 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:09:48,885 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:09:50,142 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:09:50,194 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:09:50,297 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:09:50,886 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:09:50,886 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:09:51,887 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:09:52,038 DEBUG SenderThread:266733 [sender.py:send():235] send: stats +2022-03-03 00:09:54,888 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:09:56,888 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:10:00,065 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:10:00,067 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:10:00,890 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:10:02,891 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:10:04,891 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:10:08,893 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:10:11,894 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:10:12,901 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:10:12,954 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:10:13,039 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:10:13,895 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:10:14,895 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:10:15,137 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:10:15,139 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:10:15,895 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:10:17,896 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:10:19,897 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:10:22,426 DEBUG SenderThread:266733 [sender.py:send():235] send: stats +2022-03-03 00:10:23,898 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:10:25,899 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:10:27,900 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:10:30,229 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:10:30,230 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:10:31,901 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:10:33,902 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:10:35,607 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:10:35,661 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:10:35,750 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:10:35,903 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:10:35,903 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:10:36,903 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:10:38,904 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:10:40,905 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:10:42,905 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:10:45,349 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:10:45,351 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:10:46,907 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:10:48,908 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:10:52,804 DEBUG SenderThread:266733 [sender.py:send():235] send: stats +2022-03-03 00:10:52,909 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:10:54,910 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:10:56,911 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:10:58,298 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:10:58,349 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:10:58,439 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:10:58,911 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:10:58,912 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:11:00,451 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:11:00,453 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:11:00,912 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:11:02,913 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:11:04,913 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:11:08,915 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:11:10,916 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:11:13,917 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:11:15,520 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:11:15,520 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:11:17,918 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:11:19,919 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:11:20,711 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:11:20,764 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:11:20,852 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:11:20,919 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:11:21,920 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:11:23,224 DEBUG SenderThread:266733 [sender.py:send():235] send: stats +2022-03-03 00:11:23,920 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:11:25,921 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:11:27,922 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:11:30,761 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:11:30,762 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:11:31,923 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:11:33,924 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:11:35,925 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:11:39,927 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:11:41,927 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:11:42,904 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:11:42,958 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:11:43,044 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:11:43,965 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:11:43,965 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:11:45,837 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:11:45,838 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:11:46,966 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:11:50,967 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:11:52,968 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:11:53,626 DEBUG SenderThread:266733 [sender.py:send():235] send: stats +2022-03-03 00:11:56,969 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:11:58,970 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:12:00,971 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:12:01,000 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:12:01,000 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:12:02,971 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:12:04,670 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:12:04,723 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:12:04,812 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:12:04,972 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:12:06,973 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:12:08,974 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:12:12,976 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:12:14,976 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:12:16,171 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:12:16,172 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:12:16,977 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:12:18,977 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:12:22,979 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:12:24,035 DEBUG SenderThread:266733 [sender.py:send():235] send: stats +2022-03-03 00:12:24,980 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:12:26,651 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:12:26,704 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:12:26,812 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:12:26,980 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:12:28,981 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:12:29,982 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:12:31,340 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:12:31,341 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:12:31,983 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:12:33,983 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:12:35,984 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:12:39,985 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:12:41,986 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:12:45,988 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:12:46,399 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:12:46,400 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:12:47,988 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:12:49,022 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:12:49,207 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:12:49,293 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:12:50,051 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:12:50,051 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:12:51,051 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:12:52,052 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:12:54,052 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:12:54,483 DEBUG SenderThread:266733 [sender.py:send():235] send: stats +2022-03-03 00:12:56,053 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:12:58,054 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:13:01,557 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:13:01,558 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:13:03,056 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:13:05,056 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:13:07,057 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:13:09,058 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:13:10,697 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:13:10,750 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:13:10,842 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:13:11,059 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:13:13,059 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:13:15,060 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:13:16,649 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:13:16,649 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:13:17,061 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:13:21,063 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:13:23,063 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:13:24,934 DEBUG SenderThread:266733 [sender.py:send():235] send: stats +2022-03-03 00:13:25,064 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:13:29,065 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:13:31,066 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:13:31,886 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:13:31,886 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:13:32,333 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:13:32,388 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:13:32,476 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:13:33,067 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:13:33,067 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:13:37,068 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:13:39,069 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:13:42,070 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:13:46,072 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:13:46,970 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:13:46,971 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:13:48,073 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:13:50,073 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:13:52,074 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:13:53,650 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:13:53,704 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:13:53,788 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:13:54,075 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:13:55,352 DEBUG SenderThread:266733 [sender.py:send():235] send: stats +2022-03-03 00:13:56,075 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:13:58,076 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:14:00,077 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:14:02,028 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:14:02,030 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:14:04,078 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:14:06,079 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:14:08,080 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:14:12,081 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:14:14,082 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:14:14,982 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:14:15,037 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:14:15,146 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:14:16,145 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:14:16,145 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:14:17,264 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:14:17,265 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:14:18,145 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:14:22,147 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:14:24,147 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:14:25,718 DEBUG SenderThread:266733 [sender.py:send():235] send: stats +2022-03-03 00:14:26,148 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:14:29,149 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:14:32,344 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:14:32,344 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:14:33,150 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:14:35,151 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:14:36,000 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:14:36,082 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:14:36,172 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:14:37,171 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:14:37,171 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:14:38,171 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:14:39,171 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:14:41,172 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:14:43,173 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:14:45,174 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:14:47,503 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:14:47,504 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:14:49,175 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:14:51,176 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:14:53,176 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:14:55,177 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:14:56,125 DEBUG SenderThread:266733 [sender.py:send():235] send: stats +2022-03-03 00:14:57,033 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:14:57,086 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:14:57,168 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:14:57,178 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:14:59,178 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:15:01,179 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:15:02,701 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:15:02,703 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:15:03,180 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:15:08,182 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:15:10,182 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:15:12,183 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:15:14,184 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:15:16,185 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:15:17,889 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:15:17,942 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:15:18,026 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:15:18,186 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:15:18,254 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:15:18,255 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:15:19,186 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:15:20,186 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:15:22,187 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:15:24,188 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:15:26,606 DEBUG SenderThread:266733 [sender.py:send():235] send: stats +2022-03-03 00:15:28,189 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:15:30,190 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:15:32,191 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:15:33,581 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:15:33,582 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:15:34,191 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:15:38,193 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:15:38,401 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:15:38,456 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:15:38,547 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:15:39,193 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:15:40,193 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:15:41,194 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:15:42,194 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:15:45,195 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:15:47,196 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:15:48,917 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:15:48,918 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:15:51,198 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:15:53,198 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:15:55,199 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:15:57,008 DEBUG SenderThread:266733 [sender.py:send():235] send: stats +2022-03-03 00:15:57,200 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:15:58,770 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:15:58,851 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:15:58,940 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:15:59,201 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:16:01,202 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:16:03,202 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:16:04,190 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:16:04,192 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:16:05,203 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:16:09,204 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:16:11,205 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:16:13,206 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:16:15,207 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:16:17,207 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:16:19,054 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:16:19,109 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:16:19,193 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:16:19,208 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:16:19,564 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:16:19,566 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:16:20,209 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:16:21,209 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:16:23,210 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:16:25,210 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:16:27,211 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:16:27,432 DEBUG SenderThread:266733 [sender.py:send():235] send: stats +2022-03-03 00:16:30,212 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:16:34,213 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:16:34,776 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:16:34,776 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:16:36,214 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:16:38,215 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:16:39,087 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:16:39,141 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:16:39,227 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:16:40,225 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:16:40,226 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:16:44,227 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:16:46,227 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:16:48,228 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:16:50,054 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:16:50,056 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:16:50,229 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:16:54,230 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:16:56,231 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:16:57,859 DEBUG SenderThread:266733 [sender.py:send():235] send: stats +2022-03-03 00:16:58,232 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:16:59,779 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:16:59,997 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:17:00,088 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:17:00,232 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:17:00,233 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:17:02,233 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:17:03,233 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:17:05,234 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:17:05,349 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:17:05,350 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:17:07,235 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:17:09,236 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:17:11,236 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:17:13,237 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:17:17,238 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:17:19,239 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:17:19,512 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:17:19,568 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:17:19,662 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:17:20,240 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:17:20,240 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:17:20,518 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:17:20,519 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:17:21,240 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:17:23,241 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:17:25,241 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:17:28,515 DEBUG SenderThread:266733 [sender.py:send():235] send: stats +2022-03-03 00:17:29,243 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:17:31,244 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:17:33,244 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:17:35,245 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:17:35,598 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:17:35,598 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:17:37,246 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:17:39,151 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:17:39,226 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:17:39,324 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:17:40,318 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:17:40,318 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:17:41,318 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:17:42,318 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:17:44,319 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:17:46,320 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:17:48,321 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:17:50,321 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:17:50,643 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:17:50,644 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:17:52,322 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:17:56,324 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:17:58,324 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:17:58,855 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:17:58,947 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:17:59,035 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:17:59,073 DEBUG SenderThread:266733 [sender.py:send():235] send: stats +2022-03-03 00:17:59,325 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:18:00,325 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:18:01,326 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:18:04,327 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:18:05,688 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:18:05,689 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:18:06,328 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:18:08,328 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:18:10,329 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:18:12,330 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:18:14,330 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:18:17,331 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:18:18,312 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:18:18,395 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:18:18,510 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:18:19,359 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:18:19,360 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:18:20,761 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:18:20,762 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:18:21,360 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:18:23,361 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:18:25,362 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:18:27,363 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:18:29,363 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:18:29,574 DEBUG SenderThread:266733 [sender.py:send():235] send: stats +2022-03-03 00:18:31,364 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:18:33,365 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:18:36,013 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:18:36,013 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:18:37,366 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:18:37,597 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:18:37,651 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:18:37,736 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:18:38,367 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:18:39,367 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:18:41,368 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:18:43,368 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:18:45,369 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:18:48,370 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:18:50,371 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:18:51,127 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:18:51,128 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:18:54,373 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:18:56,373 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:18:56,490 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:18:56,545 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:18:56,630 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:18:57,374 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:18:58,374 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:19:00,097 DEBUG SenderThread:266733 [sender.py:send():235] send: stats +2022-03-03 00:19:00,375 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:19:02,376 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:19:04,376 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:19:06,345 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:19:06,346 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:19:08,378 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:19:11,379 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:19:13,380 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:19:15,366 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:19:15,423 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:19:15,429 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:19:15,521 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:19:16,424 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:19:17,424 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:19:18,424 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:19:19,425 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:19:21,425 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:19:21,489 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:19:21,491 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:19:25,427 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:19:27,427 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:19:29,428 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:19:30,517 DEBUG SenderThread:266733 [sender.py:send():235] send: stats +2022-03-03 00:19:31,429 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:19:33,429 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:19:34,095 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:19:34,149 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:19:34,235 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:19:34,430 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:19:35,430 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:19:36,431 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:19:36,593 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:19:36,594 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:19:37,431 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:19:41,432 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:19:43,433 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:19:45,434 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:19:47,435 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:19:49,436 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:19:51,436 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:19:51,728 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:19:51,729 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:19:52,362 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:19:52,421 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:19:52,515 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:19:53,509 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:19:53,509 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:19:54,509 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:19:55,510 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:19:57,510 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:20:00,947 DEBUG SenderThread:266733 [sender.py:send():235] send: stats +2022-03-03 00:20:02,512 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:20:04,513 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:20:06,513 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:20:06,880 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:20:06,882 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:20:08,514 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:20:10,515 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:20:10,553 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:20:10,613 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:20:10,704 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:20:11,515 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:20:12,516 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:20:14,516 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:20:16,517 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:20:18,518 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:20:20,518 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:20:22,071 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:20:22,072 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:20:22,519 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:20:24,519 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:20:26,520 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:20:28,115 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:20:28,175 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:20:28,269 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:20:28,521 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:20:28,521 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:20:31,448 DEBUG SenderThread:266733 [sender.py:send():235] send: stats +2022-03-03 00:20:31,522 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:20:32,522 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:20:34,523 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:20:36,524 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:20:37,287 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:20:37,289 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:20:38,525 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:20:40,525 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:20:42,526 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:20:44,527 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:20:45,274 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:20:45,328 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:20:45,417 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:20:45,527 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:20:46,527 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:20:47,528 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:20:48,528 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:20:50,529 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:20:52,493 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:20:52,495 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:20:53,530 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:20:55,530 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:20:57,531 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:20:59,532 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:21:01,532 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:21:01,853 DEBUG SenderThread:266733 [sender.py:send():235] send: stats +2022-03-03 00:21:02,678 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:21:02,854 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:21:02,951 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:21:03,533 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:21:03,533 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:21:05,534 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:21:07,534 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:21:07,593 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:21:07,594 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:21:09,535 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:21:12,536 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:21:14,537 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:21:16,537 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:21:18,538 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:21:18,864 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:21:18,916 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:21:19,002 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:21:19,538 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:21:20,539 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:21:21,539 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:21:22,539 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:21:22,709 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:21:22,710 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:21:24,540 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:21:26,541 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:21:28,541 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:21:30,542 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:21:32,397 DEBUG SenderThread:266733 [sender.py:send():235] send: stats +2022-03-03 00:21:32,543 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:21:34,195 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:21:34,249 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:21:34,337 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:21:34,543 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:21:34,544 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:21:35,544 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:21:36,544 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:21:37,847 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:21:37,848 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:21:38,545 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:21:40,546 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:21:42,547 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:21:44,547 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:21:46,548 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:21:48,549 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:21:48,745 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:21:48,802 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:21:48,913 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:21:49,549 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:21:50,549 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:21:52,550 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:21:52,988 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:21:52,990 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:21:54,551 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:21:56,551 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:21:58,552 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:22:00,553 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:22:02,316 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:22:02,367 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:22:02,468 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:22:02,554 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:22:02,554 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:22:02,903 DEBUG SenderThread:266733 [sender.py:send():235] send: stats +2022-03-03 00:22:04,555 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:22:06,555 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:22:08,077 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:22:08,078 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:22:08,556 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:22:10,557 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:22:12,557 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:22:14,558 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:22:15,019 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:22:15,077 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:22:15,169 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:22:15,558 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:22:16,559 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:22:17,559 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:22:19,560 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:22:21,561 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:22:23,127 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:22:23,128 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:22:23,561 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:22:25,562 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:22:26,637 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:22:26,687 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:22:26,777 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:22:27,563 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:22:27,563 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:22:28,563 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:22:29,563 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:22:31,564 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:22:33,486 DEBUG SenderThread:266733 [sender.py:send():235] send: stats +2022-03-03 00:22:33,565 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:22:35,566 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:22:36,809 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:22:36,862 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:22:36,955 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:22:37,566 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:22:37,566 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:22:38,222 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:22:38,223 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:22:38,567 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:22:39,567 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:22:41,568 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:22:43,569 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:22:45,569 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:22:45,919 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:22:45,973 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:22:46,066 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:22:46,570 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:22:46,570 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:22:47,570 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:22:49,571 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:22:51,572 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:22:53,366 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:22:53,367 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:22:53,573 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:22:53,997 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:22:54,069 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:22:54,159 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:22:54,573 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:22:55,573 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:22:56,574 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:22:57,574 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:22:59,575 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:23:01,095 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:23:01,148 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:23:01,233 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:23:01,575 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:23:01,576 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:23:02,576 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:23:03,576 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:23:03,928 DEBUG SenderThread:266733 [sender.py:send():235] send: stats +2022-03-03 00:23:05,577 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:23:07,131 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:23:07,185 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:23:07,269 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:23:07,578 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:23:07,578 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:23:08,422 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:23:08,423 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:23:08,578 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:23:09,579 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:23:11,579 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:23:12,810 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:23:12,977 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:23:13,057 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:23:13,580 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:23:13,580 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:23:14,580 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:23:17,581 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:23:19,582 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:23:23,505 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:23:23,505 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:23:24,584 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:23:28,585 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:23:30,586 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:23:32,587 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:23:34,331 DEBUG SenderThread:266733 [sender.py:send():235] send: stats +2022-03-03 00:23:36,588 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:23:37,435 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:23:37,487 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:23:37,573 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:23:37,589 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:23:38,589 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:23:38,719 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:23:38,721 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:23:39,589 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:23:42,590 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:23:44,591 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:23:48,593 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:23:50,593 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:23:53,830 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:23:53,830 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:23:54,595 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:23:56,596 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:24:00,597 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:24:01,221 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:24:01,277 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:24:01,363 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:24:01,598 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:24:02,598 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:24:03,598 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:24:04,599 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:24:04,753 DEBUG SenderThread:266733 [sender.py:send():235] send: stats +2022-03-03 00:24:08,884 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:24:08,886 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:24:09,600 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:24:11,601 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:24:15,603 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:24:17,603 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:24:19,604 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:24:23,606 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:24:23,934 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:24:23,934 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:24:24,689 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:24:24,741 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:24:24,827 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:24:25,607 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:24:25,607 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:24:29,608 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:24:31,609 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:24:35,178 DEBUG SenderThread:266733 [sender.py:send():235] send: stats +2022-03-03 00:24:35,610 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:24:37,611 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:24:38,989 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:24:38,990 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:24:40,612 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:24:44,613 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:24:46,614 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:24:47,779 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:24:47,831 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:24:47,912 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:24:48,615 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:24:50,615 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:24:52,616 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:24:54,098 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:24:54,100 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:24:54,617 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:24:58,618 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:25:00,619 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:25:04,621 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:25:05,603 DEBUG SenderThread:266733 [sender.py:send():235] send: stats +2022-03-03 00:25:06,621 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:25:08,622 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:25:09,267 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:25:09,267 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:25:10,630 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:25:10,682 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:25:10,768 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:25:11,623 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:25:12,624 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:25:14,625 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:25:17,626 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:25:21,627 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:25:23,628 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:25:24,311 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:25:24,312 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:25:27,629 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:25:29,630 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:25:31,631 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:25:33,339 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:25:33,392 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:25:33,480 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:25:33,632 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:25:34,632 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:25:35,632 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:25:36,056 DEBUG SenderThread:266733 [sender.py:send():235] send: stats +2022-03-03 00:25:37,633 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:25:39,628 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:25:39,629 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:25:39,634 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:25:43,635 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:25:45,636 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:25:47,637 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:25:52,638 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:25:54,639 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:25:54,798 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:25:54,799 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:25:55,957 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:25:56,010 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:25:56,098 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:25:56,640 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:25:57,640 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:25:58,641 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:26:00,642 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:26:04,643 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:26:06,455 DEBUG SenderThread:266733 [sender.py:send():235] send: stats +2022-03-03 00:26:06,644 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:26:08,645 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:26:09,848 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:26:09,850 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:26:12,646 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:26:14,647 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:26:16,648 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:26:18,665 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:26:18,742 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:26:18,825 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:26:19,649 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:26:19,649 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:26:20,649 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:26:22,650 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:26:24,965 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:26:24,966 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:26:27,652 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:26:29,652 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:26:31,653 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:26:35,654 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:26:36,882 DEBUG SenderThread:266733 [sender.py:send():235] send: stats +2022-03-03 00:26:37,655 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:26:39,656 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:26:40,135 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:26:40,136 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:26:41,114 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:26:41,168 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:26:41,249 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:26:41,657 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:26:43,657 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:26:45,658 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:26:47,659 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:26:51,660 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:26:53,661 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:26:55,349 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:26:55,350 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:26:57,662 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:26:59,663 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:27:01,664 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:27:03,450 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:27:03,506 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:27:03,589 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:27:03,664 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:27:04,665 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:27:06,666 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:27:07,287 DEBUG SenderThread:266733 [sender.py:send():235] send: stats +2022-03-03 00:27:08,666 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:27:10,518 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:27:10,519 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:27:10,667 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:27:12,668 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:27:16,669 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:27:18,670 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:27:22,671 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:27:24,672 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:27:25,471 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:27:25,525 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:27:25,608 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:27:25,672 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:27:25,726 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:27:25,728 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:27:26,673 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:27:30,674 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:27:32,675 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:27:34,676 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:27:37,806 DEBUG SenderThread:266733 [sender.py:send():235] send: stats +2022-03-03 00:27:38,677 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:27:40,678 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:27:40,891 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:27:40,892 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:27:43,679 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:27:47,620 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:27:47,670 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:27:47,749 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:27:47,751 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:27:48,750 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:27:48,750 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:27:49,750 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:27:51,751 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:27:55,752 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:27:56,044 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:27:56,046 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:27:57,753 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:28:01,755 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:28:03,756 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:28:05,756 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:28:08,241 DEBUG SenderThread:266733 [sender.py:send():235] send: stats +2022-03-03 00:28:09,758 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:28:10,418 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:28:10,630 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:28:10,758 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:28:10,767 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:28:11,274 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:28:11,275 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:28:11,759 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:28:12,759 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:28:13,760 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:28:17,761 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:28:19,762 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:28:23,763 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:28:25,764 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:28:26,529 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:28:26,529 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:28:27,765 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:28:29,765 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:28:32,057 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:28:32,110 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:28:32,195 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:28:32,766 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:28:33,767 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:28:34,767 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:28:36,768 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:28:38,726 DEBUG SenderThread:266733 [sender.py:send():235] send: stats +2022-03-03 00:28:38,769 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:28:41,584 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:28:41,585 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:28:42,770 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:28:44,771 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:28:46,772 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:28:48,773 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:28:52,774 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:28:53,709 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:28:53,758 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:28:53,844 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:28:54,843 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:28:54,843 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:28:55,843 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:28:56,689 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:28:56,690 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:28:57,844 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:29:01,846 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:29:03,846 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:29:05,847 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:29:09,157 DEBUG SenderThread:266733 [sender.py:send():235] send: stats +2022-03-03 00:29:09,848 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:29:11,849 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:29:11,852 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:29:11,852 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:29:13,850 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:29:15,263 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:29:15,315 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:29:15,406 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:29:15,851 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:29:17,851 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:29:19,852 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:29:21,853 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:29:25,854 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:29:27,049 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:29:27,050 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:29:27,855 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:29:29,856 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:29:31,857 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:29:35,858 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:29:36,644 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:29:36,699 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:29:36,791 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:29:36,859 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:29:37,859 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:29:39,580 DEBUG SenderThread:266733 [sender.py:send():235] send: stats +2022-03-03 00:29:39,860 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:29:42,383 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:29:42,385 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:29:44,862 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:29:46,863 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:29:48,863 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:29:52,865 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:29:54,865 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:29:56,866 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:29:57,553 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:29:57,554 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:29:57,852 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:29:57,906 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:29:57,999 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:29:58,906 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:29:58,907 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:29:59,907 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:30:02,908 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:30:04,909 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:30:06,909 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:30:08,910 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:30:09,982 DEBUG SenderThread:266733 [sender.py:send():235] send: stats +2022-03-03 00:30:12,601 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:30:12,602 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:30:12,912 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:30:14,912 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:30:16,913 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:30:18,815 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:30:18,873 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:30:18,962 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:30:19,956 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:30:19,956 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:30:20,956 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:30:21,957 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:30:23,958 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:30:25,958 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:30:27,767 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:30:27,768 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:30:27,959 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:30:31,961 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:30:33,961 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:30:35,962 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:30:37,963 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:30:39,685 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:30:39,736 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:30:39,828 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:30:39,963 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:30:40,379 DEBUG SenderThread:266733 [sender.py:send():235] send: stats +2022-03-03 00:30:40,964 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:30:41,964 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:30:42,941 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:30:42,942 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:30:43,965 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:30:45,966 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:30:47,966 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:30:51,968 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:30:53,969 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:30:55,969 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:30:58,149 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:30:58,150 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:30:58,970 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:31:00,453 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:31:00,509 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:31:00,601 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:31:00,971 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:31:02,972 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:31:04,973 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:31:06,974 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:31:10,776 DEBUG SenderThread:266733 [sender.py:send():235] send: stats +2022-03-03 00:31:10,976 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:31:12,977 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:31:13,419 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:31:13,421 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:31:14,978 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:31:16,979 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:31:20,981 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:31:21,073 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:31:21,126 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:31:21,211 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:31:21,981 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:31:22,981 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:31:24,982 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:31:26,983 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:31:28,496 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:31:28,497 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:31:30,985 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:31:32,985 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:31:34,986 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:31:36,987 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:31:41,217 DEBUG SenderThread:266733 [sender.py:send():235] send: stats +2022-03-03 00:31:41,538 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:31:41,592 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:31:41,675 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:31:41,989 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:31:41,989 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:31:43,536 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:31:43,537 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:31:43,990 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:31:45,991 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:31:47,991 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:31:51,993 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:31:53,994 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:31:55,994 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:31:57,995 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:31:58,582 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:31:58,582 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:31:59,996 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:32:01,931 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:32:01,986 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:32:02,078 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:32:03,071 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:32:04,072 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:32:06,073 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:32:08,073 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:32:10,074 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:32:11,634 DEBUG SenderThread:266733 [sender.py:send():235] send: stats +2022-03-03 00:32:13,624 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:32:13,625 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:32:15,076 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:32:17,076 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:32:19,077 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:32:21,078 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:32:22,840 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:32:23,018 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:32:23,105 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:32:24,103 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:32:24,104 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:32:25,104 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:32:27,105 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:32:28,800 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:32:28,801 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:32:29,105 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:32:33,107 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:32:35,108 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:32:37,108 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:32:39,109 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:32:41,110 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:32:42,057 DEBUG SenderThread:266733 [sender.py:send():235] send: stats +2022-03-03 00:32:42,798 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:32:42,852 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:32:42,946 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:32:43,111 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:32:43,111 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:32:43,926 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:32:43,927 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:32:44,111 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:32:47,112 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:32:49,113 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:32:51,114 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:32:56,115 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:32:58,116 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:32:58,994 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:32:58,994 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:33:00,117 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:33:02,118 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:33:02,763 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:33:02,820 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:33:02,904 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:33:03,118 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:33:04,119 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:33:08,120 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:33:10,121 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:33:12,122 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:33:12,470 DEBUG SenderThread:266733 [sender.py:send():235] send: stats +2022-03-03 00:33:14,048 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:33:14,049 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:33:14,123 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:33:16,124 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:33:20,125 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:33:22,126 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:33:22,417 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:33:22,461 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:33:22,553 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:33:23,126 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:33:24,127 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:33:25,127 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:33:26,127 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:33:28,128 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:33:29,095 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:33:29,097 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:33:32,130 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:33:34,131 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:33:36,131 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:33:38,132 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:33:40,133 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:33:41,953 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:33:42,005 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:33:42,090 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:33:42,133 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:33:42,933 DEBUG SenderThread:266733 [sender.py:send():235] send: stats +2022-03-03 00:33:43,134 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:33:44,147 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:33:44,149 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:33:47,135 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:33:49,137 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:33:51,137 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:33:53,138 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:33:55,139 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:33:59,140 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:33:59,309 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:33:59,310 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:34:01,141 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:34:01,245 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:34:01,298 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:34:01,386 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:34:02,141 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:34:03,142 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:34:05,142 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:34:07,143 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:34:09,144 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:34:13,145 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:34:13,365 DEBUG SenderThread:266733 [sender.py:send():235] send: stats +2022-03-03 00:34:14,670 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:34:14,672 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:34:15,146 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:34:17,147 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:34:20,148 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:34:21,180 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:34:21,236 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:34:21,323 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:34:22,149 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:34:22,149 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:34:24,149 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:34:26,150 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:34:28,151 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:34:29,879 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:34:29,880 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:34:30,152 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:34:32,152 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:34:34,153 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:34:36,154 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:34:40,012 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:34:40,068 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:34:40,155 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:34:40,155 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:34:40,156 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:34:42,156 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:34:43,759 DEBUG SenderThread:266733 [sender.py:send():235] send: stats +2022-03-03 00:34:44,157 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:34:45,021 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:34:45,023 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:34:46,157 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:34:48,158 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:34:50,159 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:34:54,160 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:34:56,161 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:34:58,162 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:34:58,688 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:34:58,744 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:34:58,837 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:34:59,162 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:35:00,162 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:35:00,168 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:35:00,169 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:35:02,163 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:35:05,164 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:35:07,165 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:35:09,166 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:35:11,166 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:35:13,167 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:35:14,161 DEBUG SenderThread:266733 [sender.py:send():235] send: stats +2022-03-03 00:35:15,334 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:35:15,334 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:35:17,014 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:35:17,067 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:35:17,155 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:35:17,171 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:35:17,171 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:35:18,171 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:35:19,171 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:35:21,172 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:35:23,173 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:35:25,173 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:35:27,174 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:35:29,175 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:35:30,403 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:35:30,404 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:35:32,176 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:35:34,177 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:35:35,185 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:35:35,240 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:35:35,331 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:35:36,178 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:35:36,178 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:35:40,180 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:35:42,181 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:35:44,182 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:35:44,579 DEBUG SenderThread:266733 [sender.py:send():235] send: stats +2022-03-03 00:35:45,448 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:35:45,449 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:35:46,183 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:35:48,183 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:35:50,184 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:35:52,185 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:35:53,091 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:35:53,149 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:35:53,274 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:35:54,240 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:35:54,241 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:35:55,241 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:35:56,241 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:36:00,243 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:36:00,614 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:36:00,615 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:36:02,244 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:36:05,245 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:36:07,246 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:36:09,246 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:36:11,247 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:36:11,261 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:36:11,318 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:36:11,423 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:36:12,247 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:36:13,248 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:36:15,087 DEBUG SenderThread:266733 [sender.py:send():235] send: stats +2022-03-03 00:36:15,249 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:36:15,765 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:36:15,766 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:36:17,249 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:36:21,251 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:36:23,252 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:36:25,253 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:36:27,253 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:36:29,254 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:36:30,559 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:36:30,739 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:36:30,839 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:36:30,844 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:36:30,846 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:36:31,255 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:36:31,255 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:36:33,255 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:36:36,257 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:36:40,258 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:36:42,259 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:36:44,260 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:36:45,577 DEBUG SenderThread:266733 [sender.py:send():235] send: stats +2022-03-03 00:36:45,979 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:36:45,979 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:36:46,260 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:36:48,261 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:36:48,464 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:36:48,556 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:36:48,646 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:36:49,262 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:36:49,262 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:36:50,262 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:36:52,263 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:36:54,264 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:36:56,264 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:36:58,265 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:37:00,266 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:37:01,183 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:37:01,185 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:37:02,267 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:37:04,267 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:37:05,203 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:37:05,257 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:37:05,391 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:37:06,384 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:37:06,384 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:37:08,384 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:37:10,385 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:37:12,386 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:37:14,387 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:37:16,068 DEBUG SenderThread:266733 [sender.py:send():235] send: stats +2022-03-03 00:37:16,383 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:37:16,385 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:37:16,387 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:37:18,388 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:37:20,389 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:37:21,206 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:37:21,263 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:37:21,352 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:37:21,389 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:37:22,389 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:37:24,390 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:37:26,391 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:37:28,392 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:37:30,392 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:37:31,445 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:37:31,446 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:37:33,393 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:37:35,394 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:37:35,911 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:37:35,966 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:37:36,054 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:37:36,395 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:37:37,395 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:37:38,395 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:37:39,396 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:37:41,396 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:37:43,397 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:37:45,398 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:37:46,524 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:37:46,526 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:37:46,575 DEBUG SenderThread:266733 [sender.py:send():235] send: stats +2022-03-03 00:37:47,399 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:37:49,400 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:37:49,737 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:37:49,796 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:37:49,896 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:37:50,400 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:37:50,400 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:37:51,400 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:37:53,401 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:37:55,402 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:37:57,403 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:37:59,404 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:38:01,405 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:38:01,676 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:38:01,678 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:38:02,048 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:38:02,104 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:38:02,198 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:38:02,405 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:38:03,405 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:38:04,406 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:38:05,406 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:38:07,407 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:38:09,408 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:38:11,409 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:38:13,409 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:38:13,429 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:38:13,485 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:38:13,577 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:38:14,410 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:38:14,410 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:38:15,410 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:38:16,727 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:38:16,728 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:38:16,964 DEBUG SenderThread:266733 [sender.py:send():235] send: stats +2022-03-03 00:38:17,411 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:38:19,412 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:38:21,413 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:38:23,413 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:38:23,704 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:38:23,761 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:38:23,861 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:38:24,414 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:38:25,414 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:38:27,415 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:38:29,416 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:38:31,416 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:38:31,824 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:38:31,825 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:38:32,684 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:38:32,739 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:38:32,823 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:38:33,417 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:38:33,417 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:38:35,418 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:38:37,419 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:38:39,419 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:38:40,576 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:38:40,637 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:38:40,730 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:38:41,420 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:38:41,420 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:38:43,421 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:38:45,422 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:38:46,978 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:38:46,979 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:38:47,252 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:38:47,328 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:38:47,419 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:38:47,422 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:38:47,423 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:38:47,463 DEBUG SenderThread:266733 [sender.py:send():235] send: stats +2022-03-03 00:38:49,423 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:38:52,424 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:38:53,637 DEBUG SenderThread:266733 [sender.py:send():235] send: history +2022-03-03 00:38:53,829 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:38:53,977 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:38:54,425 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:38:54,425 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:38:55,426 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:38:56,426 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:38:58,392 DEBUG SenderThread:266733 [sender.py:send():235] send: telemetry +2022-03-03 00:38:58,392 DEBUG SenderThread:266733 [sender.py:send():235] send: exit +2022-03-03 00:38:58,392 INFO SenderThread:266733 [sender.py:send_exit():371] handling exit code: 1 +2022-03-03 00:38:58,393 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-03 00:38:58,393 INFO SenderThread:266733 [sender.py:send_exit():373] handling runtime: 3721 +2022-03-03 00:38:58,451 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:38:58,454 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:38:58,455 INFO SenderThread:266733 [sender.py:send_exit():379] send defer +2022-03-03 00:38:58,455 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: poll_exit +2022-03-03 00:38:58,456 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: defer +2022-03-03 00:38:58,456 INFO HandlerThread:266733 [handler.py:handle_request_defer():154] handle defer: 0 +2022-03-03 00:38:58,456 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: defer +2022-03-03 00:38:58,456 INFO SenderThread:266733 [sender.py:send_request_defer():388] handle sender defer: 0 +2022-03-03 00:38:58,456 INFO SenderThread:266733 [sender.py:transition_state():392] send defer: 1 +2022-03-03 00:38:58,457 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: defer +2022-03-03 00:38:58,457 INFO HandlerThread:266733 [handler.py:handle_request_defer():154] handle defer: 1 +2022-03-03 00:38:58,584 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-03 00:38:58,585 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: defer +2022-03-03 00:38:58,585 INFO SenderThread:266733 [sender.py:send_request_defer():388] handle sender defer: 1 +2022-03-03 00:38:58,585 INFO SenderThread:266733 [sender.py:transition_state():392] send defer: 2 +2022-03-03 00:38:58,586 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: poll_exit +2022-03-03 00:38:58,586 DEBUG SenderThread:266733 [sender.py:send():235] send: stats +2022-03-03 00:38:58,586 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: defer +2022-03-03 00:38:58,587 INFO HandlerThread:266733 [handler.py:handle_request_defer():154] handle defer: 2 +2022-03-03 00:38:58,587 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: defer +2022-03-03 00:38:58,587 INFO SenderThread:266733 [sender.py:send_request_defer():388] handle sender defer: 2 +2022-03-03 00:38:58,587 INFO SenderThread:266733 [sender.py:transition_state():392] send defer: 3 +2022-03-03 00:38:58,588 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: defer +2022-03-03 00:38:58,588 INFO HandlerThread:266733 [handler.py:handle_request_defer():154] handle defer: 3 +2022-03-03 00:38:58,649 DEBUG SenderThread:266733 [sender.py:send():235] send: summary +2022-03-03 00:38:58,741 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-03 00:38:58,770 INFO SenderThread:266733 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:38:58,771 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: defer +2022-03-03 00:38:58,771 INFO SenderThread:266733 [sender.py:send_request_defer():388] handle sender defer: 3 +2022-03-03 00:38:58,771 INFO SenderThread:266733 [sender.py:transition_state():392] send defer: 4 +2022-03-03 00:38:58,771 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: poll_exit +2022-03-03 00:38:58,772 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: defer +2022-03-03 00:38:58,772 INFO HandlerThread:266733 [handler.py:handle_request_defer():154] handle defer: 4 +2022-03-03 00:38:58,772 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: defer +2022-03-03 00:38:58,772 INFO SenderThread:266733 [sender.py:send_request_defer():388] handle sender defer: 4 +2022-03-03 00:38:58,873 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-03 00:38:59,513 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:38:59,763 INFO SenderThread:266733 [sender.py:transition_state():392] send defer: 5 +2022-03-03 00:38:59,763 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: poll_exit +2022-03-03 00:38:59,764 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: defer +2022-03-03 00:38:59,765 INFO HandlerThread:266733 [handler.py:handle_request_defer():154] handle defer: 5 +2022-03-03 00:38:59,765 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: defer +2022-03-03 00:38:59,765 INFO SenderThread:266733 [sender.py:send_request_defer():388] handle sender defer: 5 +2022-03-03 00:38:59,765 INFO SenderThread:266733 [dir_watcher.py:finish():283] shutting down directory watcher +2022-03-03 00:38:59,866 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-03 00:39:00,503 INFO Thread-8 :266733 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/config.yaml +2022-03-03 00:39:00,504 INFO SenderThread:266733 [dir_watcher.py:finish():313] scan: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files +2022-03-03 00:39:00,504 INFO SenderThread:266733 [dir_watcher.py:finish():327] scan save: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-metadata.json wandb-metadata.json +2022-03-03 00:39:00,504 INFO SenderThread:266733 [dir_watcher.py:finish():327] scan save: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log output.log +2022-03-03 00:39:00,504 INFO SenderThread:266733 [dir_watcher.py:finish():327] scan save: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json wandb-summary.json +2022-03-03 00:39:00,504 INFO SenderThread:266733 [dir_watcher.py:finish():327] scan save: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/requirements.txt requirements.txt +2022-03-03 00:39:00,508 INFO SenderThread:266733 [dir_watcher.py:finish():327] scan save: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/config.yaml config.yaml +2022-03-03 00:39:00,508 INFO SenderThread:266733 [sender.py:transition_state():392] send defer: 6 +2022-03-03 00:39:00,509 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: poll_exit +2022-03-03 00:39:00,512 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: defer +2022-03-03 00:39:00,512 INFO HandlerThread:266733 [handler.py:handle_request_defer():154] handle defer: 6 +2022-03-03 00:39:00,513 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: defer +2022-03-03 00:39:00,516 INFO SenderThread:266733 [sender.py:send_request_defer():388] handle sender defer: 6 +2022-03-03 00:39:00,519 INFO SenderThread:266733 [file_pusher.py:finish():177] shutting down file pusher +2022-03-03 00:39:00,614 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-03 00:39:00,614 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: poll_exit +2022-03-03 00:39:00,717 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-03 00:39:00,717 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: poll_exit +2022-03-03 00:39:00,808 INFO Thread-15 :266733 [upload_job.py:push():137] Uploaded file /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/config.yaml +2022-03-03 00:39:00,819 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-03 00:39:00,819 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: poll_exit +2022-03-03 00:39:00,841 INFO Thread-12 :266733 [upload_job.py:push():137] Uploaded file /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/output.log +2022-03-03 00:39:00,860 INFO Thread-13 :266733 [upload_job.py:push():137] Uploaded file /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/wandb-summary.json +2022-03-03 00:39:00,921 INFO Thread-14 :266733 [upload_job.py:push():137] Uploaded file /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/files/requirements.txt +2022-03-03 00:39:00,922 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-03 00:39:00,923 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: poll_exit +2022-03-03 00:39:01,025 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-03 00:39:01,025 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: poll_exit +2022-03-03 00:39:01,123 INFO Thread-7 :266733 [sender.py:transition_state():392] send defer: 7 +2022-03-03 00:39:01,123 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: defer +2022-03-03 00:39:01,123 INFO HandlerThread:266733 [handler.py:handle_request_defer():154] handle defer: 7 +2022-03-03 00:39:01,124 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: defer +2022-03-03 00:39:01,124 INFO SenderThread:266733 [sender.py:send_request_defer():388] handle sender defer: 7 +2022-03-03 00:39:01,137 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-03 00:39:02,761 INFO SenderThread:266733 [sender.py:transition_state():392] send defer: 8 +2022-03-03 00:39:02,761 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: poll_exit +2022-03-03 00:39:02,762 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: defer +2022-03-03 00:39:02,762 INFO HandlerThread:266733 [handler.py:handle_request_defer():154] handle defer: 8 +2022-03-03 00:39:02,762 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: defer +2022-03-03 00:39:02,762 INFO SenderThread:266733 [sender.py:send_request_defer():388] handle sender defer: 8 +2022-03-03 00:39:02,763 INFO SenderThread:266733 [sender.py:transition_state():392] send defer: 9 +2022-03-03 00:39:02,764 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: defer +2022-03-03 00:39:02,765 INFO HandlerThread:266733 [handler.py:handle_request_defer():154] handle defer: 9 +2022-03-03 00:39:02,765 DEBUG SenderThread:266733 [sender.py:send():235] send: final +2022-03-03 00:39:02,765 DEBUG SenderThread:266733 [sender.py:send():235] send: footer +2022-03-03 00:39:02,765 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: defer +2022-03-03 00:39:02,765 INFO SenderThread:266733 [sender.py:send_request_defer():388] handle sender defer: 9 +2022-03-03 00:39:02,864 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-03 00:39:02,864 DEBUG SenderThread:266733 [sender.py:send_request():249] send_request: poll_exit +2022-03-03 00:39:02,864 INFO SenderThread:266733 [file_pusher.py:join():182] waiting for file pusher +2022-03-03 00:39:02,930 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: get_summary +2022-03-03 00:39:03,043 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: sampled_history +2022-03-03 00:39:03,046 DEBUG HandlerThread:266733 [handler.py:handle_request():131] handle_request: shutdown +2022-03-03 00:39:03,047 INFO HandlerThread:266733 [handler.py:finish():739] shutting down handler +2022-03-03 00:39:03,764 INFO WriterThread:266733 [datastore.py:close():281] close: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/run-33dtvgaa.wandb +2022-03-03 00:39:03,929 INFO SenderThread:266733 [sender.py:finish():1075] shutting down sender +2022-03-03 00:39:03,929 INFO SenderThread:266733 [file_pusher.py:finish():177] shutting down file pusher +2022-03-03 00:39:03,930 INFO SenderThread:266733 [file_pusher.py:join():182] waiting for file pusher +2022-03-03 00:39:03,938 INFO MainThread:266733 [internal.py:handle_exit():79] Internal process exited diff --git a/wandb/run-20220302_233655-33dtvgaa/logs/debug.log b/wandb/run-20220302_233655-33dtvgaa/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..32fe00b70c3d3605e6c6342d4d62eefff3939e4f --- /dev/null +++ b/wandb/run-20220302_233655-33dtvgaa/logs/debug.log @@ -0,0 +1,133 @@ +2022-03-02 23:36:55,285 INFO MainThread:266634 [wandb_setup.py:_flush():75] Loading settings from /home/sanchit_huggingface_co/.config/wandb/settings +2022-03-02 23:36:55,285 INFO MainThread:266634 [wandb_setup.py:_flush():75] Loading settings from /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/settings +2022-03-02 23:36:55,285 INFO MainThread:266634 [wandb_setup.py:_flush():75] Loading settings from environment variables: {} +2022-03-02 23:36:55,285 INFO MainThread:266634 [wandb_setup.py:_flush():75] Inferring run settings from compute environment: {'program_relpath': 'run_speech_recognition_seq2seq.py', 'program': '/home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/run_speech_recognition_seq2seq.py'} +2022-03-02 23:36:55,285 INFO MainThread:266634 [wandb_init.py:_log_setup():386] Logging user logs to /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/logs/debug.log +2022-03-02 23:36:55,285 INFO MainThread:266634 [wandb_init.py:_log_setup():387] Logging internal logs to /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_233655-33dtvgaa/logs/debug-internal.log +2022-03-02 23:36:55,285 INFO MainThread:266634 [wandb_init.py:init():420] calling init triggers +2022-03-02 23:36:55,285 INFO MainThread:266634 [wandb_init.py:init():425] wandb.init called with sweep_config: {} +config: {} +2022-03-02 23:36:55,285 INFO MainThread:266634 [wandb_init.py:init():471] starting backend +2022-03-02 23:36:55,286 INFO MainThread:266634 [backend.py:_multiprocessing_setup():99] multiprocessing start_methods=fork,spawn,forkserver, using: spawn +2022-03-02 23:36:55,362 INFO MainThread:266634 [backend.py:ensure_launched():219] starting backend process... +2022-03-02 23:36:55,432 INFO MainThread:266634 [backend.py:ensure_launched():224] started backend process with pid: 266733 +2022-03-02 23:36:55,434 INFO MainThread:266634 [wandb_init.py:init():480] backend started and connected +2022-03-02 23:36:55,444 INFO MainThread:266634 [wandb_init.py:init():550] updated telemetry +2022-03-02 23:36:55,608 INFO MainThread:266634 [wandb_init.py:init():581] communicating current version +2022-03-02 23:36:56,328 INFO MainThread:266634 [wandb_init.py:init():586] got version response upgrade_message: "wandb version 0.12.11 is available! To upgrade, please run:\n $ pip install wandb --upgrade" + +2022-03-02 23:36:56,328 INFO MainThread:266634 [wandb_init.py:init():596] communicating run to backend with 30 second timeout +2022-03-02 23:36:56,427 INFO MainThread:266634 [wandb_init.py:init():624] starting run threads in backend +2022-03-02 23:36:56,564 INFO MainThread:266634 [wandb_run.py:_console_start():1827] atexit reg +2022-03-02 23:36:56,564 INFO MainThread:266634 [wandb_run.py:_redirect():1701] redirect: SettingsConsole.REDIRECT +2022-03-02 23:36:56,565 INFO MainThread:266634 [wandb_run.py:_redirect():1706] Redirecting console. +2022-03-02 23:36:56,566 INFO MainThread:266634 [wandb_run.py:_redirect():1762] Redirects installed. +2022-03-02 23:36:56,567 INFO MainThread:266634 [wandb_init.py:init():651] run started, returning control to user process +2022-03-02 23:36:56,569 INFO MainThread:266634 [wandb_run.py:_config_callback():966] config_cb None None {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'torch.float32', 'use_bfloat16': False, 'pruned_heads': {}, 'tie_word_embeddings': False, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 50, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'chunk_size_feed_forward': 0, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'architectures': ['SpeechEncoderDecoderModel'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': None, 'pad_token_id': 50256, 'eos_token_id': 50256, 'sep_token_id': None, 'decoder_start_token_id': 50256, 'task_specific_params': None, 'problem_type': None, '_name_or_path': './', 'transformers_version': None, 'decoder': {'vocab_size': 50257, 'n_positions': 1024, 'n_embd': 1024, 'n_layer': 24, 'n_head': 16, 'n_inner': None, 'activation_function': 'gelu_new', 'resid_pdrop': 0.0, 'embd_pdrop': 0.0, 'attn_pdrop': 0.0, 'layer_norm_epsilon': 1e-05, 'initializer_range': 0.02, 'summary_type': 'cls_index', 'summary_use_proj': True, 'summary_activation': None, 'summary_first_dropout': 0.0, 'summary_proj_to_labels': True, 'scale_attn_weights': True, 'use_cache': False, 'scale_attn_by_inverse_layer_idx': False, 'reorder_and_upcast_attn': False, 'bos_token_id': 50256, 'eos_token_id': 50256, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': None, 'use_bfloat16': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'is_encoder_decoder': False, 'is_decoder': True, 'cross_attention_hidden_size': None, 'add_cross_attention': True, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'chunk_size_feed_forward': 0, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'architectures': ['GPT2LMHeadModel'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'pad_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': {'text-generation': {'do_sample': True, 'max_length': 50}}, 'problem_type': None, '_name_or_path': 'gpt2-medium', 'transformers_version': '4.17.0.dev0', 'n_ctx': 1024, 'n_special': 0, 'predict_special_tokens': True, 'model_type': 'gpt2'}, 'encoder': {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': None, 'use_bfloat16': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'chunk_size_feed_forward': 0, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'architectures': ['Wav2Vec2ForPreTraining'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 1, 'pad_token_id': 0, 'eos_token_id': 2, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'facebook/wav2vec2-large-lv60', 'transformers_version': '4.17.0.dev0', 'feat_extract_dropout': 0.0, 'gradient_checkpointing': False, 'hidden_dropout_prob': 0.0, 'num_feat_extract_layers': 7, 'hidden_size': 1024, 'feat_extract_norm': 'layer', 'feat_extract_activation': 'gelu', 'conv_dim': [512, 512, 512, 512, 512, 512, 512], 'conv_stride': [5, 2, 2, 2, 2, 2, 2], 'conv_kernel': [10, 3, 3, 3, 3, 2, 2], 'conv_bias': True, 'num_conv_pos_embeddings': 128, 'num_conv_pos_embedding_groups': 16, 'num_hidden_layers': 24, 'intermediate_size': 4096, 'hidden_act': 'gelu', 'num_attention_heads': 16, 'hidden_dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.0, 'feat_proj_dropout': 0.0, 'final_dropout': 0.0, 'layerdrop': 0.0, 'layer_norm_eps': 1e-05, 'initializer_range': 0.02, 'vocab_size': 32, 'do_stable_layer_norm': True, 'use_weighted_layer_sum': False, 'apply_spec_augment': False, 'mask_time_prob': 0.0, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'num_codevectors_per_group': 320, 'num_codevector_groups': 2, 'contrastive_logits_temperature': 0.1, 'feat_quantizer_dropout': 0.0, 'num_negatives': 100, 'codevector_dim': 768, 'proj_codevector_dim': 768, 'diversity_loss_weight': 0.1, 'ctc_loss_reduction': 'sum', 'ctc_zero_infinity': False, 'add_adapter': True, 'adapter_kernel_size': 3, 'adapter_stride': 2, 'num_adapter_layers': 3, 'output_hidden_size': 1024, 'classifier_proj_size': 256, 'tdnn_dim': [512, 512, 512, 512, 1500], 'tdnn_kernel': [5, 3, 3, 1, 1], 'tdnn_dilation': [1, 2, 3, 1, 1], 'xvector_output_dim': 512, 'model_type': 'wav2vec2'}, 'model_type': 'speech-encoder-decoder', 'processor_class': 'Wav2Vec2Processor', 'use_cache': False, 'output_dir': './', 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'evaluation_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 14, 'per_device_eval_batch_size': 14, 'per_gpu_train_batch_size': 'None', 'per_gpu_eval_batch_size': 'None', 'gradient_accumulation_steps': 8, 'eval_accumulation_steps': 'None', 'learning_rate': 0.0003, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 1.0, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'warmup_ratio': 0.0, 'warmup_steps': 500, 'log_level': -1, 'log_level_replica': -1, 'log_on_each_node': True, 'logging_dir': './runs/Mar02_23-36-13_sanchit--v100', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 1, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 500, 'save_total_limit': 1, 'save_on_each_node': False, 'no_cuda': False, 'seed': 42, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'amp', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': 'None', 'local_rank': -1, 'xpu_backend': 'None', 'tpu_num_cores': 'None', 'tpu_metrics_debug': False, 'debug': '[]', 'dataloader_drop_last': False, 'eval_steps': 500, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': 'None', 'load_best_model_at_end': False, 'metric_for_best_model': 'None', 'greater_is_better': 'None', 'ignore_data_skip': False, 'sharded_ddp': '[]', 'deepspeed': 'None', 'label_smoothing_factor': 0.0, 'optim': 'adamw_hf', 'adafactor': False, 'group_by_length': True, 'length_column_name': 'input_length', 'report_to': "['wandb']", 'ddp_find_unused_parameters': 'None', 'ddp_bucket_cap_mb': 'None', 'dataloader_pin_memory': True, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': 'None', 'hub_model_id': 'None', 'hub_strategy': 'every_save', 'hub_token': '', 'gradient_checkpointing': True, 'fp16_backend': 'auto', 'push_to_hub_model_id': 'None', 'push_to_hub_organization': 'None', 'push_to_hub_token': '', '_n_gpu': 1, 'mp_parameters': '', 'sortish_sampler': False, 'predict_with_generate': True, 'generation_max_length': 40, 'generation_num_beams': 1, 'train_batch_size': 14, 'eval_batch_size': 14} +2022-03-02 23:36:56,572 INFO MainThread:266634 [wandb_watch.py:watch():43] Watching +2022-03-03 00:38:55,561 INFO MainThread:266634 [wandb_run.py:_atexit_cleanup():1797] got exitcode: 1 +2022-03-03 00:38:55,563 INFO MainThread:266634 [wandb_run.py:_restore():1769] restore +2022-03-03 00:38:58,456 INFO MainThread:266634 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 1 +} +pusher_stats { + uploaded_bytes: 2095 + total_bytes: 2095 +} + +2022-03-03 00:38:58,586 INFO MainThread:266634 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 1 +} +pusher_stats { + uploaded_bytes: 2095 + total_bytes: 2095 +} + +2022-03-03 00:38:58,772 INFO MainThread:266634 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 1 +} +pusher_stats { + uploaded_bytes: 2095 + total_bytes: 2095 +} + +2022-03-03 00:38:59,764 INFO MainThread:266634 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 1 +} +pusher_stats { + uploaded_bytes: 2095 + total_bytes: 2095 +} + +2022-03-03 00:39:00,513 INFO MainThread:266634 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 4 +} +pusher_stats { + uploaded_bytes: 2095 + total_bytes: 1999756 +} + +2022-03-03 00:39:00,615 INFO MainThread:266634 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 299669 + total_bytes: 2297330 +} + +2022-03-03 00:39:00,718 INFO MainThread:266634 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 2297330 + total_bytes: 2297330 +} + +2022-03-03 00:39:00,820 INFO MainThread:266634 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 2297330 + total_bytes: 2297330 +} + +2022-03-03 00:39:00,923 INFO MainThread:266634 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 2297330 + total_bytes: 2297330 +} + +2022-03-03 00:39:01,026 INFO MainThread:266634 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 2297330 + total_bytes: 2297330 +} + +2022-03-03 00:39:02,762 INFO MainThread:266634 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 2297330 + total_bytes: 2297330 +} + +2022-03-03 00:39:02,929 INFO MainThread:266634 [wandb_run.py:_wait_for_finish():1929] got exit ret: done: true +exit_result { +} +file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 2297330 + total_bytes: 2297330 +} +local_info { +} + +2022-03-03 00:39:04,125 INFO MainThread:266634 [wandb_run.py:_append_history():2144] rendering history +2022-03-03 00:39:04,126 INFO MainThread:266634 [wandb_run.py:_append_summary():2102] rendering summary +2022-03-03 00:39:04,127 INFO MainThread:266634 [wandb_run.py:_append_files():2194] logging synced files diff --git a/wandb/run-20220302_233655-33dtvgaa/run-33dtvgaa.wandb b/wandb/run-20220302_233655-33dtvgaa/run-33dtvgaa.wandb new file mode 100644 index 0000000000000000000000000000000000000000..d6035b5fdc9e801d4c6bb7b3a2774512c6eb9de6 --- /dev/null +++ b/wandb/run-20220302_233655-33dtvgaa/run-33dtvgaa.wandb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12a5d36de07abad2de3fa12743b03c3105d2a4cedeb9e2f608b915a435c08667 +size 26660446 diff --git a/wandb/run-20220303_004039-1a18zsso/files/config.yaml b/wandb/run-20220303_004039-1a18zsso/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a3875db7a4af4e4400e028f7df7cb2ca854d1e76 --- /dev/null +++ b/wandb/run-20220303_004039-1a18zsso/files/config.yaml @@ -0,0 +1,713 @@ +wandb_version: 1 + +_n_gpu: + desc: null + value: 1 +_name_or_path: + desc: null + value: ./ +_wandb: + desc: null + value: + cli_version: 0.12.10 + framework: huggingface + huggingface_version: 4.17.0.dev0 + is_jupyter_run: false + is_kaggle_kernel: false + m: + - 1: train/global_step + 6: + - 3 + python_version: 3.9.5 + start_time: 1646268039 + t: + 1: + - 1 + - 5 + - 11 + 2: + - 1 + - 5 + - 11 + 3: + - 1 + - 7 + - 13 + 4: 3.9.5 + 5: 0.12.10 + 6: 4.17.0.dev0 + 8: + - 5 +adafactor: + desc: null + value: false +adam_beta1: + desc: null + value: 0.9 +adam_beta2: + desc: null + value: 0.999 +adam_epsilon: + desc: null + value: 1.0e-08 +add_cross_attention: + desc: null + value: false +architectures: + desc: null + value: + - SpeechEncoderDecoderModel +bad_words_ids: + desc: null + value: null +bf16: + desc: null + value: false +bf16_full_eval: + desc: null + value: false +bos_token_id: + desc: null + value: null +chunk_size_feed_forward: + desc: null + value: 0 +cross_attention_hidden_size: + desc: null + value: null +dataloader_drop_last: + desc: null + value: false +dataloader_num_workers: + desc: null + value: 0 +dataloader_pin_memory: + desc: null + value: true +ddp_bucket_cap_mb: + desc: null + value: None +ddp_find_unused_parameters: + desc: null + value: None +debug: + desc: null + value: '[]' +decoder: + desc: null + value: + _name_or_path: gpt2-medium + activation_function: gelu_new + add_cross_attention: true + architectures: + - GPT2LMHeadModel + attn_pdrop: 0.0 + bad_words_ids: null + bos_token_id: 50256 + chunk_size_feed_forward: 0 + cross_attention_hidden_size: null + decoder_start_token_id: null + diversity_penalty: 0.0 + do_sample: false + early_stopping: false + embd_pdrop: 0.0 + encoder_no_repeat_ngram_size: 0 + eos_token_id: 50256 + finetuning_task: null + forced_bos_token_id: null + forced_eos_token_id: null + id2label: + '0': LABEL_0 + '1': LABEL_1 + initializer_range: 0.02 + is_decoder: true + is_encoder_decoder: false + label2id: + LABEL_0: 0 + LABEL_1: 1 + layer_norm_epsilon: 1.0e-05 + length_penalty: 1.0 + max_length: 20 + min_length: 0 + model_type: gpt2 + n_ctx: 1024 + n_embd: 1024 + n_head: 16 + n_inner: null + n_layer: 24 + n_positions: 1024 + n_special: 0 + no_repeat_ngram_size: 0 + num_beam_groups: 1 + num_beams: 1 + num_return_sequences: 1 + output_attentions: false + output_hidden_states: false + output_scores: false + pad_token_id: null + predict_special_tokens: true + prefix: null + problem_type: null + pruned_heads: {} + remove_invalid_values: false + reorder_and_upcast_attn: false + repetition_penalty: 1.0 + resid_pdrop: 0.0 + return_dict: true + return_dict_in_generate: false + scale_attn_by_inverse_layer_idx: false + scale_attn_weights: true + sep_token_id: null + summary_activation: null + summary_first_dropout: 0.0 + summary_proj_to_labels: true + summary_type: cls_index + summary_use_proj: true + task_specific_params: + text-generation: + do_sample: true + max_length: 50 + temperature: 1.0 + tie_encoder_decoder: false + tie_word_embeddings: true + tokenizer_class: null + top_k: 50 + top_p: 1.0 + torch_dtype: null + torchscript: false + transformers_version: 4.17.0.dev0 + use_bfloat16: false + use_cache: false + vocab_size: 50257 +decoder_start_token_id: + desc: null + value: 50256 +deepspeed: + desc: null + value: None +disable_tqdm: + desc: null + value: false +diversity_penalty: + desc: null + value: 0.0 +do_eval: + desc: null + value: true +do_predict: + desc: null + value: false +do_sample: + desc: null + value: false +do_train: + desc: null + value: true +early_stopping: + desc: null + value: false +encoder: + desc: null + value: + _name_or_path: facebook/wav2vec2-large-lv60 + activation_dropout: 0.0 + adapter_kernel_size: 3 + adapter_stride: 2 + add_adapter: true + add_cross_attention: false + apply_spec_augment: false + architectures: + - Wav2Vec2ForPreTraining + attention_dropout: 0.0 + bad_words_ids: null + bos_token_id: 1 + chunk_size_feed_forward: 0 + classifier_proj_size: 256 + codevector_dim: 768 + contrastive_logits_temperature: 0.1 + conv_bias: true + conv_dim: + - 512 + - 512 + - 512 + - 512 + - 512 + - 512 + - 512 + conv_kernel: + - 10 + - 3 + - 3 + - 3 + - 3 + - 2 + - 2 + conv_stride: + - 5 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + cross_attention_hidden_size: null + ctc_loss_reduction: sum + ctc_zero_infinity: false + decoder_start_token_id: null + diversity_loss_weight: 0.1 + diversity_penalty: 0.0 + do_sample: false + do_stable_layer_norm: true + early_stopping: false + encoder_no_repeat_ngram_size: 0 + eos_token_id: 2 + feat_extract_activation: gelu + feat_extract_dropout: 0.0 + feat_extract_norm: layer + feat_proj_dropout: 0.0 + feat_quantizer_dropout: 0.0 + final_dropout: 0.0 + finetuning_task: null + forced_bos_token_id: null + forced_eos_token_id: null + gradient_checkpointing: false + hidden_act: gelu + hidden_dropout: 0.0 + hidden_dropout_prob: 0.0 + hidden_size: 1024 + id2label: + '0': LABEL_0 + '1': LABEL_1 + initializer_range: 0.02 + intermediate_size: 4096 + is_decoder: false + is_encoder_decoder: false + label2id: + LABEL_0: 0 + LABEL_1: 1 + layer_norm_eps: 1.0e-05 + layerdrop: 0.0 + length_penalty: 1.0 + mask_feature_length: 10 + mask_feature_min_masks: 0 + mask_feature_prob: 0.0 + mask_time_length: 10 + mask_time_min_masks: 2 + mask_time_prob: 0.0 + max_length: 20 + min_length: 0 + model_type: wav2vec2 + no_repeat_ngram_size: 0 + num_adapter_layers: 3 + num_attention_heads: 16 + num_beam_groups: 1 + num_beams: 1 + num_codevector_groups: 2 + num_codevectors_per_group: 320 + num_conv_pos_embedding_groups: 16 + num_conv_pos_embeddings: 128 + num_feat_extract_layers: 7 + num_hidden_layers: 24 + num_negatives: 100 + num_return_sequences: 1 + output_attentions: false + output_hidden_size: 1024 + output_hidden_states: false + output_scores: false + pad_token_id: 0 + prefix: null + problem_type: null + proj_codevector_dim: 768 + pruned_heads: {} + remove_invalid_values: false + repetition_penalty: 1.0 + return_dict: true + return_dict_in_generate: false + sep_token_id: null + task_specific_params: null + tdnn_dilation: + - 1 + - 2 + - 3 + - 1 + - 1 + tdnn_dim: + - 512 + - 512 + - 512 + - 512 + - 1500 + tdnn_kernel: + - 5 + - 3 + - 3 + - 1 + - 1 + temperature: 1.0 + tie_encoder_decoder: false + tie_word_embeddings: true + tokenizer_class: null + top_k: 50 + top_p: 1.0 + torch_dtype: null + torchscript: false + transformers_version: 4.17.0.dev0 + use_bfloat16: false + use_weighted_layer_sum: false + vocab_size: 32 + xvector_output_dim: 512 +encoder_no_repeat_ngram_size: + desc: null + value: 0 +eos_token_id: + desc: null + value: 50256 +eval_accumulation_steps: + desc: null + value: None +eval_batch_size: + desc: null + value: 16 +eval_steps: + desc: null + value: 500 +evaluation_strategy: + desc: null + value: steps +finetuning_task: + desc: null + value: null +forced_bos_token_id: + desc: null + value: null +forced_eos_token_id: + desc: null + value: null +fp16: + desc: null + value: true +fp16_backend: + desc: null + value: auto +fp16_full_eval: + desc: null + value: false +fp16_opt_level: + desc: null + value: O1 +generation_max_length: + desc: null + value: 40 +generation_num_beams: + desc: null + value: 1 +gradient_accumulation_steps: + desc: null + value: 2 +gradient_checkpointing: + desc: null + value: true +greater_is_better: + desc: null + value: None +group_by_length: + desc: null + value: true +half_precision_backend: + desc: null + value: amp +hub_model_id: + desc: null + value: None +hub_strategy: + desc: null + value: every_save +hub_token: + desc: null + value: +id2label: + desc: null + value: + '0': LABEL_0 + '1': LABEL_1 +ignore_data_skip: + desc: null + value: false +is_decoder: + desc: null + value: false +is_encoder_decoder: + desc: null + value: true +label2id: + desc: null + value: + LABEL_0: 0 + LABEL_1: 1 +label_names: + desc: null + value: None +label_smoothing_factor: + desc: null + value: 0.0 +learning_rate: + desc: null + value: 0.0003 +length_column_name: + desc: null + value: input_length +length_penalty: + desc: null + value: 1.0 +load_best_model_at_end: + desc: null + value: false +local_rank: + desc: null + value: -1 +log_level: + desc: null + value: -1 +log_level_replica: + desc: null + value: -1 +log_on_each_node: + desc: null + value: true +logging_dir: + desc: null + value: ./runs/Mar03_00-39-53_sanchit--v100 +logging_first_step: + desc: null + value: false +logging_nan_inf_filter: + desc: null + value: true +logging_steps: + desc: null + value: 1 +logging_strategy: + desc: null + value: steps +lr_scheduler_type: + desc: null + value: linear +max_grad_norm: + desc: null + value: 1.0 +max_length: + desc: null + value: 50 +max_steps: + desc: null + value: -1 +metric_for_best_model: + desc: null + value: None +min_length: + desc: null + value: 0 +model_type: + desc: null + value: speech-encoder-decoder +mp_parameters: + desc: null + value: '' +no_cuda: + desc: null + value: false +no_repeat_ngram_size: + desc: null + value: 0 +num_beam_groups: + desc: null + value: 1 +num_beams: + desc: null + value: 1 +num_return_sequences: + desc: null + value: 1 +num_train_epochs: + desc: null + value: 1.0 +optim: + desc: null + value: adamw_hf +output_attentions: + desc: null + value: false +output_dir: + desc: null + value: ./ +output_hidden_states: + desc: null + value: false +output_scores: + desc: null + value: false +overwrite_output_dir: + desc: null + value: true +pad_token_id: + desc: null + value: 50256 +past_index: + desc: null + value: -1 +per_device_eval_batch_size: + desc: null + value: 16 +per_device_train_batch_size: + desc: null + value: 16 +per_gpu_eval_batch_size: + desc: null + value: None +per_gpu_train_batch_size: + desc: null + value: None +predict_with_generate: + desc: null + value: true +prediction_loss_only: + desc: null + value: false +prefix: + desc: null + value: null +problem_type: + desc: null + value: null +processor_class: + desc: null + value: Wav2Vec2Processor +pruned_heads: + desc: null + value: {} +push_to_hub: + desc: null + value: true +push_to_hub_model_id: + desc: null + value: None +push_to_hub_organization: + desc: null + value: None +push_to_hub_token: + desc: null + value: +remove_invalid_values: + desc: null + value: false +remove_unused_columns: + desc: null + value: true +repetition_penalty: + desc: null + value: 1.0 +report_to: + desc: null + value: '[''wandb'']' +resume_from_checkpoint: + desc: null + value: None +return_dict: + desc: null + value: true +return_dict_in_generate: + desc: null + value: false +run_name: + desc: null + value: ./ +save_on_each_node: + desc: null + value: false +save_steps: + desc: null + value: 500 +save_strategy: + desc: null + value: steps +save_total_limit: + desc: null + value: 1 +seed: + desc: null + value: 42 +sep_token_id: + desc: null + value: null +sharded_ddp: + desc: null + value: '[]' +skip_memory_metrics: + desc: null + value: true +sortish_sampler: + desc: null + value: false +task_specific_params: + desc: null + value: null +temperature: + desc: null + value: 1.0 +tf32: + desc: null + value: None +tie_encoder_decoder: + desc: null + value: false +tie_word_embeddings: + desc: null + value: false +tokenizer_class: + desc: null + value: null +top_k: + desc: null + value: 50 +top_p: + desc: null + value: 1.0 +torch_dtype: + desc: null + value: torch.float32 +torchscript: + desc: null + value: false +tpu_metrics_debug: + desc: null + value: false +tpu_num_cores: + desc: null + value: None +train_batch_size: + desc: null + value: 16 +transformers_version: + desc: null + value: null +use_bfloat16: + desc: null + value: false +use_cache: + desc: null + value: false +use_legacy_prediction_loop: + desc: null + value: false +warmup_ratio: + desc: null + value: 0.0 +warmup_steps: + desc: null + value: 500 +weight_decay: + desc: null + value: 0.0 +xpu_backend: + desc: null + value: None diff --git a/wandb/run-20220303_004039-1a18zsso/files/output.log b/wandb/run-20220303_004039-1a18zsso/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..1f6f8b9cbfc41dc418854f76b03a3cd79bd9b516 --- /dev/null +++ b/wandb/run-20220303_004039-1a18zsso/files/output.log @@ -0,0 +1,37 @@ + + + 0%| | 0/892 [00:00> Could not estimate the number of tokens of the input, floating-point operations will not be computed +Traceback (most recent call last): + File "/home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/run_speech_recognition_seq2seq.py", line 539, in + main() + File "/home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/run_speech_recognition_seq2seq.py", line 491, in main + train_result = trainer.train(resume_from_checkpoint=checkpoint) + File "/home/sanchit_huggingface_co/transformers/src/transformers/trainer.py", line 1384, in train + tr_loss_step = self.training_step(model, inputs) + File "/home/sanchit_huggingface_co/transformers/src/transformers/trainer.py", line 1959, in training_step + loss = self.compute_loss(model, inputs) + File "/home/sanchit_huggingface_co/transformers/src/transformers/trainer.py", line 1991, in compute_loss + outputs = model(**inputs) + File "/home/sanchit_huggingface_co/gcp/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl + return forward_call(*input, **kwargs) + File "/home/sanchit_huggingface_co/transformers/src/transformers/models/speech_encoder_decoder/modeling_speech_encoder_decoder.py", line 503, in forward + encoder_outputs = self.encoder( + File "/home/sanchit_huggingface_co/gcp/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl + return forward_call(*input, **kwargs) + File "/home/sanchit_huggingface_co/transformers/src/transformers/models/wav2vec2/modeling_wav2vec2.py", line 1346, in forward + extract_features = self.feature_extractor(input_values) + File "/home/sanchit_huggingface_co/gcp/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl + return forward_call(*input, **kwargs) + File "/home/sanchit_huggingface_co/transformers/src/transformers/models/wav2vec2/modeling_wav2vec2.py", line 514, in forward + hidden_states = conv_layer(hidden_states) + File "/home/sanchit_huggingface_co/gcp/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl + return forward_call(*input, **kwargs) + File "/home/sanchit_huggingface_co/transformers/src/transformers/models/wav2vec2/modeling_wav2vec2.py", line 389, in forward + hidden_states = self.layer_norm(hidden_states) + File "/home/sanchit_huggingface_co/gcp/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl + return forward_call(*input, **kwargs) + File "/home/sanchit_huggingface_co/gcp/lib/python3.9/site-packages/torch/nn/modules/normalization.py", line 189, in forward + return F.layer_norm( + File "/home/sanchit_huggingface_co/gcp/lib/python3.9/site-packages/torch/nn/functional.py", line 2347, in layer_norm + return torch.layer_norm(input, normalized_shape, weight, bias, eps, torch.backends.cudnn.enabled) +RuntimeError: CUDA out of memory. Tried to allocate 1.63 GiB (GPU 0; 15.78 GiB total capacity; 10.07 GiB already allocated; 707.31 MiB free; 13.39 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF \ No newline at end of file diff --git a/wandb/run-20220303_004039-1a18zsso/files/requirements.txt b/wandb/run-20220303_004039-1a18zsso/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..3974f97a24952deb24d97850f53367da9e7c347d --- /dev/null +++ b/wandb/run-20220303_004039-1a18zsso/files/requirements.txt @@ -0,0 +1,184 @@ +absl-py==1.0.0 +aiohttp==3.8.1 +aiosignal==1.2.0 +anyio==3.5.0 +appdirs==1.4.4 +argon2-cffi-bindings==21.2.0 +argon2-cffi==21.3.0 +asttokens==2.0.5 +async-timeout==4.0.2 +attrs==21.4.0 +audioread==2.1.9 +babel==2.9.1 +backcall==0.2.0 +bitsandbytes-cuda113==0.26.0 +black==22.1.0 +bleach==4.1.0 +cachetools==5.0.0 +certifi==2021.10.8 +cffi==1.15.0 +charset-normalizer==2.0.11 +chex==0.1.0 +click==8.0.3 +clldutils==3.10.1 +colorlog==6.6.0 +csvw==1.11.0 +cycler==0.11.0 +datasets==1.18.3 +debugpy==1.5.1 +decorator==5.1.1 +defusedxml==0.7.1 +dill==0.3.4 +dlinfo==1.2.1 +dm-tree==0.1.6 +docker-pycreds==0.4.0 +entrypoints==0.4 +executing==0.8.2 +filelock==3.4.2 +flatbuffers==2.0 +flax==0.4.0 +fonttools==4.29.1 +frozenlist==1.3.0 +fsspec==2022.1.0 +gitdb==4.0.9 +gitpython==3.1.27 +google-auth-oauthlib==0.4.6 +google-auth==2.6.0 +grpcio==1.43.0 +huggingface-hub==0.4.0 +hypothesis==6.36.1 +idna==3.3 +importlib-metadata==4.10.1 +ipykernel==6.8.0 +ipython-genutils==0.2.0 +ipython==8.0.1 +ipywidgets==7.6.5 +isodate==0.6.1 +jax==0.2.28 +jaxlib==0.1.76+cuda11.cudnn82 +jedi==0.18.1 +jinja2==3.0.3 +jiwer==2.3.0 +joblib==1.1.0 +json5==0.9.6 +jsonschema==4.4.0 +jupyter-client==7.1.2 +jupyter-console==6.4.0 +jupyter-core==4.9.1 +jupyter-server==1.13.5 +jupyter==1.0.0 +jupyterlab-pygments==0.1.2 +jupyterlab-server==2.10.3 +jupyterlab-widgets==1.0.2 +jupyterlab==3.2.9 +kiwisolver==1.3.2 +librosa==0.8.1 +llvmlite==0.38.0 +markdown==3.3.6 +markupsafe==2.0.1 +matplotlib-inline==0.1.3 +matplotlib==3.5.1 +mistune==0.8.4 +msgpack==1.0.3 +multidict==6.0.2 +multiprocess==0.70.12.2 +mypy-extensions==0.4.3 +nbclassic==0.3.5 +nbclient==0.5.10 +nbconvert==6.4.1 +nbformat==5.1.3 +nest-asyncio==1.5.4 +notebook==6.4.8 +numba==0.55.1 +numpy==1.21.5 +oauthlib==3.2.0 +opt-einsum==3.3.0 +optax==0.1.0 +packaging==21.3 +pandas==1.4.0 +pandocfilters==1.5.0 +parso==0.8.3 +pathspec==0.9.0 +pathtools==0.1.2 +pexpect==4.8.0 +phonemizer==3.0.1 +pickleshare==0.7.5 +pillow==9.0.0 +pip==22.0.2 +pkg-resources==0.0.0 +platformdirs==2.4.1 +pooch==1.6.0 +prometheus-client==0.13.1 +promise==2.3 +prompt-toolkit==3.0.26 +protobuf==3.19.4 +psutil==5.9.0 +ptyprocess==0.7.0 +pure-eval==0.2.2 +pyarrow==6.0.1 +pyasn1-modules==0.2.8 +pyasn1==0.4.8 +pycparser==2.21 +pyctcdecode==0.3.0 +pygments==2.11.2 +pygtrie==2.4.2 +pyparsing==3.0.7 +pyrsistent==0.18.1 +python-dateutil==2.8.2 +python-levenshtein==0.12.2 +pytz==2021.3 +pyyaml==6.0 +pyzmq==22.3.0 +qtconsole==5.2.2 +qtpy==2.0.1 +regex==2022.1.18 +requests-oauthlib==1.3.1 +requests==2.27.1 +resampy==0.2.2 +rfc3986==2.0.0 +rsa==4.8 +sacremoses==0.0.47 +scikit-learn==1.0.2 +scipy==1.7.3 +segments==2.2.0 +send2trash==1.8.0 +sentry-sdk==1.5.6 +setuptools==44.1.1 +shortuuid==1.0.8 +six==1.16.0 +smmap==5.0.0 +sniffio==1.2.0 +sortedcontainers==2.4.0 +soundfile==0.10.3.post1 +stack-data==0.1.4 +tabulate==0.8.9 +tensorboard-data-server==0.6.1 +tensorboard-plugin-wit==1.8.1 +tensorboard==2.8.0 +termcolor==1.1.0 +terminado==0.13.1 +testpath==0.5.0 +threadpoolctl==3.1.0 +tokenizers==0.11.4 +tomli==2.0.0 +toolz==0.11.2 +torch==1.10.2+cu113 +torchaudio==0.10.2+cu113 +tornado==6.1 +tqdm==4.62.3 +traitlets==5.1.1 +transformers==4.17.0.dev0 +typing-extensions==3.10.0.2 +uritemplate==4.1.1 +urllib3==1.26.8 +wandb==0.12.10 +wcwidth==0.2.5 +webencodings==0.5.1 +websocket-client==1.2.3 +werkzeug==2.0.2 +wheel==0.37.1 +widgetsnbextension==3.5.2 +xxhash==2.0.2 +yarl==1.7.2 +yaspin==2.1.0 +zipp==3.7.0 \ No newline at end of file diff --git a/wandb/run-20220303_004039-1a18zsso/files/wandb-metadata.json b/wandb/run-20220303_004039-1a18zsso/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..fcb1517a824af908da77fcf05e0032f731ae9365 --- /dev/null +++ b/wandb/run-20220303_004039-1a18zsso/files/wandb-metadata.json @@ -0,0 +1,60 @@ +{ + "os": "Linux-5.11.0-1028-gcp-x86_64-with-glibc2.33", + "python": "3.9.5", + "heartbeatAt": "2022-03-03T00:40:40.798823", + "startedAt": "2022-03-03T00:40:39.514688", + "docker": null, + "gpu": "Tesla V100-SXM2-16GB", + "gpu_count": 2, + "cpu_count": 16, + "cuda": null, + "args": [ + "--dataset_name=librispeech_asr", + "--model_name_or_path=./", + "--tokenizer_name=./", + "--dataset_config_name=clean", + "--train_split_name=train.100", + "--eval_split_name=validation", + "--output_dir=./", + "--preprocessing_num_workers=1", + "--length_column_name=input_length", + "--overwrite_output_dir", + "--num_train_epochs=1", + "--per_device_train_batch_size=16", + "--per_device_eval_batch_size=16", + "--gradient_accumulation_steps=2", + "--generation_max_length=40", + "--generation_num_beams=1", + "--learning_rate=3e-4", + "--warmup_steps=500", + "--evaluation_strategy=steps", + "--text_column_name=text", + "--save_steps=500", + "--eval_steps=500", + "--logging_steps=1", + "--save_total_limit=1", + "--freeze_feature_encoder", + "--gradient_checkpointing", + "--fp16", + "--group_by_length", + "--predict_with_generate", + "--do_lower_case", + "--do_train", + "--do_eval", + "--report_to=wandb", + "--push_to_hub", + "--use_auth_token" + ], + "state": "running", + "program": "/home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/run_speech_recognition_seq2seq.py", + "codePath": "run_speech_recognition_seq2seq.py", + "git": { + "remote": "https://huggingface.co/sanchit-gandhi/wav2vec2-gpt2-wandb-grid-search", + "commit": "8c7181143c175387040dc1a6ac2ddbc9179b550c" + }, + "email": "sanchit@huggingface.co", + "root": "/home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search", + "host": "sanchit--v100", + "username": "sanchit_huggingface_co", + "executable": "/home/sanchit_huggingface_co/gcp/bin/python" +} diff --git a/wandb/run-20220303_004039-1a18zsso/files/wandb-summary.json b/wandb/run-20220303_004039-1a18zsso/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..b1ac4f7d3564b2fd407d247e6957709faa41a169 --- /dev/null +++ b/wandb/run-20220303_004039-1a18zsso/files/wandb-summary.json @@ -0,0 +1 @@ +{"_wandb": {"runtime": 9}} \ No newline at end of file diff --git a/wandb/run-20220303_004039-1a18zsso/logs/debug-internal.log b/wandb/run-20220303_004039-1a18zsso/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..a52f92a07dcc852def2bf034025a94686694a94b --- /dev/null +++ b/wandb/run-20220303_004039-1a18zsso/logs/debug-internal.log @@ -0,0 +1,142 @@ +2022-03-03 00:40:40,587 INFO MainThread:267195 [internal.py:wandb_internal():89] W&B internal server running at pid: 267195, started at: 2022-03-03 00:40:40.586996 +2022-03-03 00:40:40,590 INFO WriterThread:267195 [datastore.py:open_for_write():77] open: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004039-1a18zsso/run-1a18zsso.wandb +2022-03-03 00:40:40,590 DEBUG HandlerThread:267195 [handler.py:handle_request():131] handle_request: check_version +2022-03-03 00:40:40,591 DEBUG SenderThread:267195 [sender.py:send():235] send: header +2022-03-03 00:40:40,591 DEBUG SenderThread:267195 [sender.py:send_request():249] send_request: check_version +2022-03-03 00:40:40,668 DEBUG SenderThread:267195 [sender.py:send():235] send: run +2022-03-03 00:40:40,791 INFO SenderThread:267195 [dir_watcher.py:__init__():169] watching files in: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004039-1a18zsso/files +2022-03-03 00:40:40,791 INFO SenderThread:267195 [sender.py:_start_run_threads():809] run started: 1a18zsso with start time 1646268039 +2022-03-03 00:40:40,791 DEBUG SenderThread:267195 [sender.py:send():235] send: summary +2022-03-03 00:40:40,791 INFO SenderThread:267195 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:40:40,793 DEBUG HandlerThread:267195 [handler.py:handle_request():131] handle_request: run_start +2022-03-03 00:40:40,798 DEBUG HandlerThread:267195 [meta.py:__init__():36] meta init +2022-03-03 00:40:40,798 DEBUG HandlerThread:267195 [meta.py:__init__():50] meta init done +2022-03-03 00:40:40,798 DEBUG HandlerThread:267195 [meta.py:probe():210] probe +2022-03-03 00:40:40,805 DEBUG HandlerThread:267195 [meta.py:_setup_git():200] setup git +2022-03-03 00:40:40,821 DEBUG HandlerThread:267195 [meta.py:_setup_git():207] setup git done +2022-03-03 00:40:40,822 DEBUG HandlerThread:267195 [meta.py:_save_pip():54] save pip +2022-03-03 00:40:40,823 DEBUG HandlerThread:267195 [meta.py:_save_pip():68] save pip done +2022-03-03 00:40:40,823 DEBUG HandlerThread:267195 [meta.py:probe():248] probe done +2022-03-03 00:40:40,946 DEBUG SenderThread:267195 [sender.py:send():235] send: files +2022-03-03 00:40:40,946 INFO SenderThread:267195 [sender.py:_save_file():944] saving file wandb-metadata.json with policy now +2022-03-03 00:40:40,951 DEBUG HandlerThread:267195 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:40:40,952 DEBUG SenderThread:267195 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:40:40,999 DEBUG SenderThread:267195 [sender.py:send():235] send: config +2022-03-03 00:40:41,000 DEBUG SenderThread:267195 [sender.py:send():235] send: metric +2022-03-03 00:40:41,000 DEBUG SenderThread:267195 [sender.py:send():235] send: metric +2022-03-03 00:40:41,000 WARNING SenderThread:267195 [sender.py:send_metric():902] Seen metric with glob (shouldnt happen) +2022-03-03 00:40:41,240 INFO Thread-11 :267195 [upload_job.py:push():137] Uploaded file /tmp/tmp9mhkg11ywandb/2vus7xrr-wandb-metadata.json +2022-03-03 00:40:41,793 INFO Thread-8 :267195 [dir_watcher.py:_on_file_created():217] file/dir created: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004039-1a18zsso/files/output.log +2022-03-03 00:40:41,793 INFO Thread-8 :267195 [dir_watcher.py:_on_file_created():217] file/dir created: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004039-1a18zsso/files/wandb-metadata.json +2022-03-03 00:40:41,794 INFO Thread-8 :267195 [dir_watcher.py:_on_file_created():217] file/dir created: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004039-1a18zsso/files/requirements.txt +2022-03-03 00:40:41,794 INFO Thread-8 :267195 [dir_watcher.py:_on_file_created():217] file/dir created: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004039-1a18zsso/files/wandb-summary.json +2022-03-03 00:40:43,792 INFO Thread-8 :267195 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004039-1a18zsso/files/output.log +2022-03-03 00:40:47,793 INFO Thread-8 :267195 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004039-1a18zsso/files/output.log +2022-03-03 00:40:49,794 INFO Thread-8 :267195 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004039-1a18zsso/files/output.log +2022-03-03 00:40:49,872 DEBUG SenderThread:267195 [sender.py:send():235] send: telemetry +2022-03-03 00:40:49,872 DEBUG SenderThread:267195 [sender.py:send():235] send: exit +2022-03-03 00:40:49,872 DEBUG HandlerThread:267195 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-03 00:40:49,873 INFO SenderThread:267195 [sender.py:send_exit():371] handling exit code: 1 +2022-03-03 00:40:49,873 INFO SenderThread:267195 [sender.py:send_exit():373] handling runtime: 9 +2022-03-03 00:40:49,873 INFO SenderThread:267195 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:40:49,873 INFO SenderThread:267195 [sender.py:send_exit():379] send defer +2022-03-03 00:40:49,874 DEBUG SenderThread:267195 [sender.py:send_request():249] send_request: poll_exit +2022-03-03 00:40:49,874 DEBUG HandlerThread:267195 [handler.py:handle_request():131] handle_request: defer +2022-03-03 00:40:49,875 INFO HandlerThread:267195 [handler.py:handle_request_defer():154] handle defer: 0 +2022-03-03 00:40:49,875 DEBUG SenderThread:267195 [sender.py:send_request():249] send_request: defer +2022-03-03 00:40:49,875 INFO SenderThread:267195 [sender.py:send_request_defer():388] handle sender defer: 0 +2022-03-03 00:40:49,875 INFO SenderThread:267195 [sender.py:transition_state():392] send defer: 1 +2022-03-03 00:40:49,875 DEBUG HandlerThread:267195 [handler.py:handle_request():131] handle_request: defer +2022-03-03 00:40:49,876 INFO HandlerThread:267195 [handler.py:handle_request_defer():154] handle defer: 1 +2022-03-03 00:40:49,994 DEBUG HandlerThread:267195 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-03 00:40:49,994 DEBUG SenderThread:267195 [sender.py:send_request():249] send_request: defer +2022-03-03 00:40:49,994 INFO SenderThread:267195 [sender.py:send_request_defer():388] handle sender defer: 1 +2022-03-03 00:40:49,994 INFO SenderThread:267195 [sender.py:transition_state():392] send defer: 2 +2022-03-03 00:40:49,995 DEBUG SenderThread:267195 [sender.py:send_request():249] send_request: poll_exit +2022-03-03 00:40:49,995 DEBUG SenderThread:267195 [sender.py:send():235] send: stats +2022-03-03 00:40:49,996 DEBUG HandlerThread:267195 [handler.py:handle_request():131] handle_request: defer +2022-03-03 00:40:49,996 INFO HandlerThread:267195 [handler.py:handle_request_defer():154] handle defer: 2 +2022-03-03 00:40:49,996 DEBUG SenderThread:267195 [sender.py:send_request():249] send_request: defer +2022-03-03 00:40:49,996 INFO SenderThread:267195 [sender.py:send_request_defer():388] handle sender defer: 2 +2022-03-03 00:40:49,996 INFO SenderThread:267195 [sender.py:transition_state():392] send defer: 3 +2022-03-03 00:40:49,996 DEBUG HandlerThread:267195 [handler.py:handle_request():131] handle_request: defer +2022-03-03 00:40:49,997 INFO HandlerThread:267195 [handler.py:handle_request_defer():154] handle defer: 3 +2022-03-03 00:40:49,997 DEBUG SenderThread:267195 [sender.py:send():235] send: summary +2022-03-03 00:40:49,998 INFO SenderThread:267195 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:40:49,998 DEBUG SenderThread:267195 [sender.py:send_request():249] send_request: defer +2022-03-03 00:40:49,998 INFO SenderThread:267195 [sender.py:send_request_defer():388] handle sender defer: 3 +2022-03-03 00:40:49,998 INFO SenderThread:267195 [sender.py:transition_state():392] send defer: 4 +2022-03-03 00:40:49,998 DEBUG HandlerThread:267195 [handler.py:handle_request():131] handle_request: defer +2022-03-03 00:40:49,998 INFO HandlerThread:267195 [handler.py:handle_request_defer():154] handle defer: 4 +2022-03-03 00:40:49,998 DEBUG SenderThread:267195 [sender.py:send_request():249] send_request: defer +2022-03-03 00:40:49,998 INFO SenderThread:267195 [sender.py:send_request_defer():388] handle sender defer: 4 +2022-03-03 00:40:50,102 DEBUG HandlerThread:267195 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-03 00:40:50,128 INFO SenderThread:267195 [sender.py:transition_state():392] send defer: 5 +2022-03-03 00:40:50,128 DEBUG SenderThread:267195 [sender.py:send_request():249] send_request: poll_exit +2022-03-03 00:40:50,129 DEBUG HandlerThread:267195 [handler.py:handle_request():131] handle_request: defer +2022-03-03 00:40:50,129 INFO HandlerThread:267195 [handler.py:handle_request_defer():154] handle defer: 5 +2022-03-03 00:40:50,129 DEBUG SenderThread:267195 [sender.py:send_request():249] send_request: defer +2022-03-03 00:40:50,130 INFO SenderThread:267195 [sender.py:send_request_defer():388] handle sender defer: 5 +2022-03-03 00:40:50,130 INFO SenderThread:267195 [dir_watcher.py:finish():283] shutting down directory watcher +2022-03-03 00:40:50,231 DEBUG HandlerThread:267195 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-03 00:40:50,795 INFO Thread-8 :267195 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004039-1a18zsso/files/output.log +2022-03-03 00:40:50,795 INFO SenderThread:267195 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004039-1a18zsso/files/wandb-summary.json +2022-03-03 00:40:50,796 INFO SenderThread:267195 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004039-1a18zsso/files/config.yaml +2022-03-03 00:40:50,796 INFO SenderThread:267195 [dir_watcher.py:finish():313] scan: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004039-1a18zsso/files +2022-03-03 00:40:50,796 INFO SenderThread:267195 [dir_watcher.py:finish():327] scan save: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004039-1a18zsso/files/wandb-metadata.json wandb-metadata.json +2022-03-03 00:40:50,796 INFO SenderThread:267195 [dir_watcher.py:finish():327] scan save: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004039-1a18zsso/files/output.log output.log +2022-03-03 00:40:50,797 INFO SenderThread:267195 [dir_watcher.py:finish():327] scan save: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004039-1a18zsso/files/wandb-summary.json wandb-summary.json +2022-03-03 00:40:50,800 INFO SenderThread:267195 [dir_watcher.py:finish():327] scan save: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004039-1a18zsso/files/requirements.txt requirements.txt +2022-03-03 00:40:50,803 INFO SenderThread:267195 [dir_watcher.py:finish():327] scan save: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004039-1a18zsso/files/config.yaml config.yaml +2022-03-03 00:40:50,807 INFO SenderThread:267195 [sender.py:transition_state():392] send defer: 6 +2022-03-03 00:40:50,807 DEBUG SenderThread:267195 [sender.py:send_request():249] send_request: poll_exit +2022-03-03 00:40:50,808 DEBUG HandlerThread:267195 [handler.py:handle_request():131] handle_request: defer +2022-03-03 00:40:50,809 INFO HandlerThread:267195 [handler.py:handle_request_defer():154] handle defer: 6 +2022-03-03 00:40:50,812 DEBUG SenderThread:267195 [sender.py:send_request():249] send_request: defer +2022-03-03 00:40:50,813 INFO SenderThread:267195 [sender.py:send_request_defer():388] handle sender defer: 6 +2022-03-03 00:40:50,813 INFO SenderThread:267195 [file_pusher.py:finish():177] shutting down file pusher +2022-03-03 00:40:50,910 DEBUG HandlerThread:267195 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-03 00:40:50,911 DEBUG SenderThread:267195 [sender.py:send_request():249] send_request: poll_exit +2022-03-03 00:40:51,013 DEBUG HandlerThread:267195 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-03 00:40:51,013 DEBUG SenderThread:267195 [sender.py:send_request():249] send_request: poll_exit +2022-03-03 00:40:51,068 INFO Thread-15 :267195 [upload_job.py:push():137] Uploaded file /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004039-1a18zsso/files/config.yaml +2022-03-03 00:40:51,071 INFO Thread-13 :267195 [upload_job.py:push():137] Uploaded file /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004039-1a18zsso/files/wandb-summary.json +2022-03-03 00:40:51,116 INFO Thread-12 :267195 [upload_job.py:push():137] Uploaded file /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004039-1a18zsso/files/output.log +2022-03-03 00:40:51,117 DEBUG HandlerThread:267195 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-03 00:40:51,117 DEBUG SenderThread:267195 [sender.py:send_request():249] send_request: poll_exit +2022-03-03 00:40:51,172 INFO Thread-14 :267195 [upload_job.py:push():137] Uploaded file /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004039-1a18zsso/files/requirements.txt +2022-03-03 00:40:51,219 DEBUG HandlerThread:267195 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-03 00:40:51,219 DEBUG SenderThread:267195 [sender.py:send_request():249] send_request: poll_exit +2022-03-03 00:40:51,321 DEBUG HandlerThread:267195 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-03 00:40:51,322 DEBUG SenderThread:267195 [sender.py:send_request():249] send_request: poll_exit +2022-03-03 00:40:51,372 INFO Thread-7 :267195 [sender.py:transition_state():392] send defer: 7 +2022-03-03 00:40:51,373 DEBUG HandlerThread:267195 [handler.py:handle_request():131] handle_request: defer +2022-03-03 00:40:51,373 INFO HandlerThread:267195 [handler.py:handle_request_defer():154] handle defer: 7 +2022-03-03 00:40:51,373 DEBUG SenderThread:267195 [sender.py:send_request():249] send_request: defer +2022-03-03 00:40:51,373 INFO SenderThread:267195 [sender.py:send_request_defer():388] handle sender defer: 7 +2022-03-03 00:40:51,424 DEBUG HandlerThread:267195 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-03 00:40:51,429 INFO SenderThread:267195 [sender.py:transition_state():392] send defer: 8 +2022-03-03 00:40:51,429 DEBUG SenderThread:267195 [sender.py:send_request():249] send_request: poll_exit +2022-03-03 00:40:51,430 DEBUG HandlerThread:267195 [handler.py:handle_request():131] handle_request: defer +2022-03-03 00:40:51,430 INFO HandlerThread:267195 [handler.py:handle_request_defer():154] handle defer: 8 +2022-03-03 00:40:51,431 DEBUG SenderThread:267195 [sender.py:send_request():249] send_request: defer +2022-03-03 00:40:51,431 INFO SenderThread:267195 [sender.py:send_request_defer():388] handle sender defer: 8 +2022-03-03 00:40:51,431 INFO SenderThread:267195 [sender.py:transition_state():392] send defer: 9 +2022-03-03 00:40:51,431 DEBUG SenderThread:267195 [sender.py:send():235] send: final +2022-03-03 00:40:51,432 DEBUG SenderThread:267195 [sender.py:send():235] send: footer +2022-03-03 00:40:51,432 DEBUG HandlerThread:267195 [handler.py:handle_request():131] handle_request: defer +2022-03-03 00:40:51,432 INFO HandlerThread:267195 [handler.py:handle_request_defer():154] handle defer: 9 +2022-03-03 00:40:51,432 DEBUG SenderThread:267195 [sender.py:send_request():249] send_request: defer +2022-03-03 00:40:51,433 INFO SenderThread:267195 [sender.py:send_request_defer():388] handle sender defer: 9 +2022-03-03 00:40:51,531 DEBUG HandlerThread:267195 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-03 00:40:51,532 DEBUG SenderThread:267195 [sender.py:send_request():249] send_request: poll_exit +2022-03-03 00:40:51,532 INFO SenderThread:267195 [file_pusher.py:join():182] waiting for file pusher +2022-03-03 00:40:51,631 DEBUG HandlerThread:267195 [handler.py:handle_request():131] handle_request: get_summary +2022-03-03 00:40:51,632 DEBUG HandlerThread:267195 [handler.py:handle_request():131] handle_request: sampled_history +2022-03-03 00:40:51,633 DEBUG HandlerThread:267195 [handler.py:handle_request():131] handle_request: shutdown +2022-03-03 00:40:51,633 INFO HandlerThread:267195 [handler.py:finish():739] shutting down handler +2022-03-03 00:40:52,432 INFO WriterThread:267195 [datastore.py:close():281] close: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004039-1a18zsso/run-1a18zsso.wandb +2022-03-03 00:40:52,630 INFO SenderThread:267195 [sender.py:finish():1075] shutting down sender +2022-03-03 00:40:52,630 INFO SenderThread:267195 [file_pusher.py:finish():177] shutting down file pusher +2022-03-03 00:40:52,630 INFO SenderThread:267195 [file_pusher.py:join():182] waiting for file pusher +2022-03-03 00:40:52,632 INFO MainThread:267195 [internal.py:handle_exit():79] Internal process exited diff --git a/wandb/run-20220303_004039-1a18zsso/logs/debug.log b/wandb/run-20220303_004039-1a18zsso/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..67548b196c85b9da39cb7fc05af1bdd23ab8f132 --- /dev/null +++ b/wandb/run-20220303_004039-1a18zsso/logs/debug.log @@ -0,0 +1,123 @@ +2022-03-03 00:40:39,518 INFO MainThread:267096 [wandb_setup.py:_flush():75] Loading settings from /home/sanchit_huggingface_co/.config/wandb/settings +2022-03-03 00:40:39,518 INFO MainThread:267096 [wandb_setup.py:_flush():75] Loading settings from /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/settings +2022-03-03 00:40:39,518 INFO MainThread:267096 [wandb_setup.py:_flush():75] Loading settings from environment variables: {} +2022-03-03 00:40:39,518 INFO MainThread:267096 [wandb_setup.py:_flush():75] Inferring run settings from compute environment: {'program_relpath': 'run_speech_recognition_seq2seq.py', 'program': '/home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/run_speech_recognition_seq2seq.py'} +2022-03-03 00:40:39,518 INFO MainThread:267096 [wandb_init.py:_log_setup():386] Logging user logs to /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004039-1a18zsso/logs/debug.log +2022-03-03 00:40:39,518 INFO MainThread:267096 [wandb_init.py:_log_setup():387] Logging internal logs to /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004039-1a18zsso/logs/debug-internal.log +2022-03-03 00:40:39,518 INFO MainThread:267096 [wandb_init.py:init():420] calling init triggers +2022-03-03 00:40:39,518 INFO MainThread:267096 [wandb_init.py:init():425] wandb.init called with sweep_config: {} +config: {} +2022-03-03 00:40:39,519 INFO MainThread:267096 [wandb_init.py:init():471] starting backend +2022-03-03 00:40:39,519 INFO MainThread:267096 [backend.py:_multiprocessing_setup():99] multiprocessing start_methods=fork,spawn,forkserver, using: spawn +2022-03-03 00:40:39,592 INFO MainThread:267096 [backend.py:ensure_launched():219] starting backend process... +2022-03-03 00:40:39,663 INFO MainThread:267096 [backend.py:ensure_launched():224] started backend process with pid: 267195 +2022-03-03 00:40:39,665 INFO MainThread:267096 [wandb_init.py:init():480] backend started and connected +2022-03-03 00:40:39,677 INFO MainThread:267096 [wandb_init.py:init():550] updated telemetry +2022-03-03 00:40:39,846 INFO MainThread:267096 [wandb_init.py:init():581] communicating current version +2022-03-03 00:40:40,667 INFO MainThread:267096 [wandb_init.py:init():586] got version response upgrade_message: "wandb version 0.12.11 is available! To upgrade, please run:\n $ pip install wandb --upgrade" + +2022-03-03 00:40:40,667 INFO MainThread:267096 [wandb_init.py:init():596] communicating run to backend with 30 second timeout +2022-03-03 00:40:40,792 INFO MainThread:267096 [wandb_init.py:init():624] starting run threads in backend +2022-03-03 00:40:40,950 INFO MainThread:267096 [wandb_run.py:_console_start():1827] atexit reg +2022-03-03 00:40:40,951 INFO MainThread:267096 [wandb_run.py:_redirect():1701] redirect: SettingsConsole.REDIRECT +2022-03-03 00:40:40,952 INFO MainThread:267096 [wandb_run.py:_redirect():1706] Redirecting console. +2022-03-03 00:40:40,954 INFO MainThread:267096 [wandb_run.py:_redirect():1762] Redirects installed. +2022-03-03 00:40:40,954 INFO MainThread:267096 [wandb_init.py:init():651] run started, returning control to user process +2022-03-03 00:40:40,956 INFO MainThread:267096 [wandb_run.py:_config_callback():966] config_cb None None {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'torch.float32', 'use_bfloat16': False, 'pruned_heads': {}, 'tie_word_embeddings': False, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 50, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'chunk_size_feed_forward': 0, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'architectures': ['SpeechEncoderDecoderModel'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': None, 'pad_token_id': 50256, 'eos_token_id': 50256, 'sep_token_id': None, 'decoder_start_token_id': 50256, 'task_specific_params': None, 'problem_type': None, '_name_or_path': './', 'transformers_version': None, 'decoder': {'vocab_size': 50257, 'n_positions': 1024, 'n_embd': 1024, 'n_layer': 24, 'n_head': 16, 'n_inner': None, 'activation_function': 'gelu_new', 'resid_pdrop': 0.0, 'embd_pdrop': 0.0, 'attn_pdrop': 0.0, 'layer_norm_epsilon': 1e-05, 'initializer_range': 0.02, 'summary_type': 'cls_index', 'summary_use_proj': True, 'summary_activation': None, 'summary_first_dropout': 0.0, 'summary_proj_to_labels': True, 'scale_attn_weights': True, 'use_cache': False, 'scale_attn_by_inverse_layer_idx': False, 'reorder_and_upcast_attn': False, 'bos_token_id': 50256, 'eos_token_id': 50256, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': None, 'use_bfloat16': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'is_encoder_decoder': False, 'is_decoder': True, 'cross_attention_hidden_size': None, 'add_cross_attention': True, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'chunk_size_feed_forward': 0, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'architectures': ['GPT2LMHeadModel'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'pad_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': {'text-generation': {'do_sample': True, 'max_length': 50}}, 'problem_type': None, '_name_or_path': 'gpt2-medium', 'transformers_version': '4.17.0.dev0', 'n_ctx': 1024, 'n_special': 0, 'predict_special_tokens': True, 'model_type': 'gpt2'}, 'encoder': {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': None, 'use_bfloat16': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'chunk_size_feed_forward': 0, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'architectures': ['Wav2Vec2ForPreTraining'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 1, 'pad_token_id': 0, 'eos_token_id': 2, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'facebook/wav2vec2-large-lv60', 'transformers_version': '4.17.0.dev0', 'feat_extract_dropout': 0.0, 'gradient_checkpointing': False, 'hidden_dropout_prob': 0.0, 'num_feat_extract_layers': 7, 'hidden_size': 1024, 'feat_extract_norm': 'layer', 'feat_extract_activation': 'gelu', 'conv_dim': [512, 512, 512, 512, 512, 512, 512], 'conv_stride': [5, 2, 2, 2, 2, 2, 2], 'conv_kernel': [10, 3, 3, 3, 3, 2, 2], 'conv_bias': True, 'num_conv_pos_embeddings': 128, 'num_conv_pos_embedding_groups': 16, 'num_hidden_layers': 24, 'intermediate_size': 4096, 'hidden_act': 'gelu', 'num_attention_heads': 16, 'hidden_dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.0, 'feat_proj_dropout': 0.0, 'final_dropout': 0.0, 'layerdrop': 0.0, 'layer_norm_eps': 1e-05, 'initializer_range': 0.02, 'vocab_size': 32, 'do_stable_layer_norm': True, 'use_weighted_layer_sum': False, 'apply_spec_augment': False, 'mask_time_prob': 0.0, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'num_codevectors_per_group': 320, 'num_codevector_groups': 2, 'contrastive_logits_temperature': 0.1, 'feat_quantizer_dropout': 0.0, 'num_negatives': 100, 'codevector_dim': 768, 'proj_codevector_dim': 768, 'diversity_loss_weight': 0.1, 'ctc_loss_reduction': 'sum', 'ctc_zero_infinity': False, 'add_adapter': True, 'adapter_kernel_size': 3, 'adapter_stride': 2, 'num_adapter_layers': 3, 'output_hidden_size': 1024, 'classifier_proj_size': 256, 'tdnn_dim': [512, 512, 512, 512, 1500], 'tdnn_kernel': [5, 3, 3, 1, 1], 'tdnn_dilation': [1, 2, 3, 1, 1], 'xvector_output_dim': 512, 'model_type': 'wav2vec2'}, 'model_type': 'speech-encoder-decoder', 'processor_class': 'Wav2Vec2Processor', 'use_cache': False, 'output_dir': './', 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'evaluation_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 16, 'per_device_eval_batch_size': 16, 'per_gpu_train_batch_size': 'None', 'per_gpu_eval_batch_size': 'None', 'gradient_accumulation_steps': 2, 'eval_accumulation_steps': 'None', 'learning_rate': 0.0003, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 1.0, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'warmup_ratio': 0.0, 'warmup_steps': 500, 'log_level': -1, 'log_level_replica': -1, 'log_on_each_node': True, 'logging_dir': './runs/Mar03_00-39-53_sanchit--v100', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 1, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 500, 'save_total_limit': 1, 'save_on_each_node': False, 'no_cuda': False, 'seed': 42, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'amp', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': 'None', 'local_rank': -1, 'xpu_backend': 'None', 'tpu_num_cores': 'None', 'tpu_metrics_debug': False, 'debug': '[]', 'dataloader_drop_last': False, 'eval_steps': 500, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': 'None', 'load_best_model_at_end': False, 'metric_for_best_model': 'None', 'greater_is_better': 'None', 'ignore_data_skip': False, 'sharded_ddp': '[]', 'deepspeed': 'None', 'label_smoothing_factor': 0.0, 'optim': 'adamw_hf', 'adafactor': False, 'group_by_length': True, 'length_column_name': 'input_length', 'report_to': "['wandb']", 'ddp_find_unused_parameters': 'None', 'ddp_bucket_cap_mb': 'None', 'dataloader_pin_memory': True, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': 'None', 'hub_model_id': 'None', 'hub_strategy': 'every_save', 'hub_token': '', 'gradient_checkpointing': True, 'fp16_backend': 'auto', 'push_to_hub_model_id': 'None', 'push_to_hub_organization': 'None', 'push_to_hub_token': '', '_n_gpu': 1, 'mp_parameters': '', 'sortish_sampler': False, 'predict_with_generate': True, 'generation_max_length': 40, 'generation_num_beams': 1, 'train_batch_size': 16, 'eval_batch_size': 16} +2022-03-03 00:40:40,960 INFO MainThread:267096 [wandb_watch.py:watch():43] Watching +2022-03-03 00:40:47,275 INFO MainThread:267096 [wandb_run.py:_atexit_cleanup():1797] got exitcode: 1 +2022-03-03 00:40:47,278 INFO MainThread:267096 [wandb_run.py:_restore():1769] restore +2022-03-03 00:40:49,875 INFO MainThread:267096 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 1 +} +pusher_stats { + uploaded_bytes: 2095 + total_bytes: 2095 +} + +2022-03-03 00:40:49,996 INFO MainThread:267096 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 1 +} +pusher_stats { + uploaded_bytes: 2095 + total_bytes: 2095 +} + +2022-03-03 00:40:50,129 INFO MainThread:267096 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 1 +} +pusher_stats { + uploaded_bytes: 2095 + total_bytes: 2095 +} + +2022-03-03 00:40:50,809 INFO MainThread:267096 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 2095 + total_bytes: 21066 +} + +2022-03-03 00:40:50,912 INFO MainThread:267096 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 17829 + total_bytes: 21066 +} + +2022-03-03 00:40:51,015 INFO MainThread:267096 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 21066 + total_bytes: 21066 +} + +2022-03-03 00:40:51,118 INFO MainThread:267096 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 21066 + total_bytes: 21066 +} + +2022-03-03 00:40:51,220 INFO MainThread:267096 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 21066 + total_bytes: 21066 +} + +2022-03-03 00:40:51,323 INFO MainThread:267096 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 21066 + total_bytes: 21066 +} + +2022-03-03 00:40:51,430 INFO MainThread:267096 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 21066 + total_bytes: 21066 +} + +2022-03-03 00:40:51,630 INFO MainThread:267096 [wandb_run.py:_wait_for_finish():1929] got exit ret: done: true +exit_result { +} +file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 21066 + total_bytes: 21066 +} +local_info { +} + +2022-03-03 00:40:52,787 INFO MainThread:267096 [wandb_run.py:_append_files():2194] logging synced files diff --git a/wandb/run-20220303_004039-1a18zsso/run-1a18zsso.wandb b/wandb/run-20220303_004039-1a18zsso/run-1a18zsso.wandb new file mode 100644 index 0000000000000000000000000000000000000000..5381548e90500d39c972aecc897917f8f336c3b3 Binary files /dev/null and b/wandb/run-20220303_004039-1a18zsso/run-1a18zsso.wandb differ diff --git a/wandb/run-20220303_004215-2ttcbe3s/files/config.yaml b/wandb/run-20220303_004215-2ttcbe3s/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6785791586d975c88c216ebb9420ad20ff928c10 --- /dev/null +++ b/wandb/run-20220303_004215-2ttcbe3s/files/config.yaml @@ -0,0 +1,713 @@ +wandb_version: 1 + +_n_gpu: + desc: null + value: 1 +_name_or_path: + desc: null + value: ./ +_wandb: + desc: null + value: + cli_version: 0.12.10 + framework: huggingface + huggingface_version: 4.17.0.dev0 + is_jupyter_run: false + is_kaggle_kernel: false + m: + - 1: train/global_step + 6: + - 3 + python_version: 3.9.5 + start_time: 1646268136 + t: + 1: + - 1 + - 5 + - 11 + 2: + - 1 + - 5 + - 11 + 3: + - 1 + - 7 + - 13 + 4: 3.9.5 + 5: 0.12.10 + 6: 4.17.0.dev0 + 8: + - 5 +adafactor: + desc: null + value: false +adam_beta1: + desc: null + value: 0.9 +adam_beta2: + desc: null + value: 0.999 +adam_epsilon: + desc: null + value: 1.0e-08 +add_cross_attention: + desc: null + value: false +architectures: + desc: null + value: + - SpeechEncoderDecoderModel +bad_words_ids: + desc: null + value: null +bf16: + desc: null + value: false +bf16_full_eval: + desc: null + value: false +bos_token_id: + desc: null + value: null +chunk_size_feed_forward: + desc: null + value: 0 +cross_attention_hidden_size: + desc: null + value: null +dataloader_drop_last: + desc: null + value: false +dataloader_num_workers: + desc: null + value: 0 +dataloader_pin_memory: + desc: null + value: true +ddp_bucket_cap_mb: + desc: null + value: None +ddp_find_unused_parameters: + desc: null + value: None +debug: + desc: null + value: '[]' +decoder: + desc: null + value: + _name_or_path: gpt2-medium + activation_function: gelu_new + add_cross_attention: true + architectures: + - GPT2LMHeadModel + attn_pdrop: 0.0 + bad_words_ids: null + bos_token_id: 50256 + chunk_size_feed_forward: 0 + cross_attention_hidden_size: null + decoder_start_token_id: null + diversity_penalty: 0.0 + do_sample: false + early_stopping: false + embd_pdrop: 0.0 + encoder_no_repeat_ngram_size: 0 + eos_token_id: 50256 + finetuning_task: null + forced_bos_token_id: null + forced_eos_token_id: null + id2label: + '0': LABEL_0 + '1': LABEL_1 + initializer_range: 0.02 + is_decoder: true + is_encoder_decoder: false + label2id: + LABEL_0: 0 + LABEL_1: 1 + layer_norm_epsilon: 1.0e-05 + length_penalty: 1.0 + max_length: 20 + min_length: 0 + model_type: gpt2 + n_ctx: 1024 + n_embd: 1024 + n_head: 16 + n_inner: null + n_layer: 24 + n_positions: 1024 + n_special: 0 + no_repeat_ngram_size: 0 + num_beam_groups: 1 + num_beams: 1 + num_return_sequences: 1 + output_attentions: false + output_hidden_states: false + output_scores: false + pad_token_id: null + predict_special_tokens: true + prefix: null + problem_type: null + pruned_heads: {} + remove_invalid_values: false + reorder_and_upcast_attn: false + repetition_penalty: 1.0 + resid_pdrop: 0.0 + return_dict: true + return_dict_in_generate: false + scale_attn_by_inverse_layer_idx: false + scale_attn_weights: true + sep_token_id: null + summary_activation: null + summary_first_dropout: 0.0 + summary_proj_to_labels: true + summary_type: cls_index + summary_use_proj: true + task_specific_params: + text-generation: + do_sample: true + max_length: 50 + temperature: 1.0 + tie_encoder_decoder: false + tie_word_embeddings: true + tokenizer_class: null + top_k: 50 + top_p: 1.0 + torch_dtype: null + torchscript: false + transformers_version: 4.17.0.dev0 + use_bfloat16: false + use_cache: false + vocab_size: 50257 +decoder_start_token_id: + desc: null + value: 50256 +deepspeed: + desc: null + value: None +disable_tqdm: + desc: null + value: false +diversity_penalty: + desc: null + value: 0.0 +do_eval: + desc: null + value: true +do_predict: + desc: null + value: false +do_sample: + desc: null + value: false +do_train: + desc: null + value: true +early_stopping: + desc: null + value: false +encoder: + desc: null + value: + _name_or_path: facebook/wav2vec2-large-lv60 + activation_dropout: 0.0 + adapter_kernel_size: 3 + adapter_stride: 2 + add_adapter: true + add_cross_attention: false + apply_spec_augment: false + architectures: + - Wav2Vec2ForPreTraining + attention_dropout: 0.0 + bad_words_ids: null + bos_token_id: 1 + chunk_size_feed_forward: 0 + classifier_proj_size: 256 + codevector_dim: 768 + contrastive_logits_temperature: 0.1 + conv_bias: true + conv_dim: + - 512 + - 512 + - 512 + - 512 + - 512 + - 512 + - 512 + conv_kernel: + - 10 + - 3 + - 3 + - 3 + - 3 + - 2 + - 2 + conv_stride: + - 5 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + cross_attention_hidden_size: null + ctc_loss_reduction: sum + ctc_zero_infinity: false + decoder_start_token_id: null + diversity_loss_weight: 0.1 + diversity_penalty: 0.0 + do_sample: false + do_stable_layer_norm: true + early_stopping: false + encoder_no_repeat_ngram_size: 0 + eos_token_id: 2 + feat_extract_activation: gelu + feat_extract_dropout: 0.0 + feat_extract_norm: layer + feat_proj_dropout: 0.0 + feat_quantizer_dropout: 0.0 + final_dropout: 0.0 + finetuning_task: null + forced_bos_token_id: null + forced_eos_token_id: null + gradient_checkpointing: false + hidden_act: gelu + hidden_dropout: 0.0 + hidden_dropout_prob: 0.0 + hidden_size: 1024 + id2label: + '0': LABEL_0 + '1': LABEL_1 + initializer_range: 0.02 + intermediate_size: 4096 + is_decoder: false + is_encoder_decoder: false + label2id: + LABEL_0: 0 + LABEL_1: 1 + layer_norm_eps: 1.0e-05 + layerdrop: 0.0 + length_penalty: 1.0 + mask_feature_length: 10 + mask_feature_min_masks: 0 + mask_feature_prob: 0.0 + mask_time_length: 10 + mask_time_min_masks: 2 + mask_time_prob: 0.0 + max_length: 20 + min_length: 0 + model_type: wav2vec2 + no_repeat_ngram_size: 0 + num_adapter_layers: 3 + num_attention_heads: 16 + num_beam_groups: 1 + num_beams: 1 + num_codevector_groups: 2 + num_codevectors_per_group: 320 + num_conv_pos_embedding_groups: 16 + num_conv_pos_embeddings: 128 + num_feat_extract_layers: 7 + num_hidden_layers: 24 + num_negatives: 100 + num_return_sequences: 1 + output_attentions: false + output_hidden_size: 1024 + output_hidden_states: false + output_scores: false + pad_token_id: 0 + prefix: null + problem_type: null + proj_codevector_dim: 768 + pruned_heads: {} + remove_invalid_values: false + repetition_penalty: 1.0 + return_dict: true + return_dict_in_generate: false + sep_token_id: null + task_specific_params: null + tdnn_dilation: + - 1 + - 2 + - 3 + - 1 + - 1 + tdnn_dim: + - 512 + - 512 + - 512 + - 512 + - 1500 + tdnn_kernel: + - 5 + - 3 + - 3 + - 1 + - 1 + temperature: 1.0 + tie_encoder_decoder: false + tie_word_embeddings: true + tokenizer_class: null + top_k: 50 + top_p: 1.0 + torch_dtype: null + torchscript: false + transformers_version: 4.17.0.dev0 + use_bfloat16: false + use_weighted_layer_sum: false + vocab_size: 32 + xvector_output_dim: 512 +encoder_no_repeat_ngram_size: + desc: null + value: 0 +eos_token_id: + desc: null + value: 50256 +eval_accumulation_steps: + desc: null + value: None +eval_batch_size: + desc: null + value: 16 +eval_steps: + desc: null + value: 500 +evaluation_strategy: + desc: null + value: steps +finetuning_task: + desc: null + value: null +forced_bos_token_id: + desc: null + value: null +forced_eos_token_id: + desc: null + value: null +fp16: + desc: null + value: true +fp16_backend: + desc: null + value: auto +fp16_full_eval: + desc: null + value: false +fp16_opt_level: + desc: null + value: O1 +generation_max_length: + desc: null + value: 40 +generation_num_beams: + desc: null + value: 1 +gradient_accumulation_steps: + desc: null + value: 4 +gradient_checkpointing: + desc: null + value: true +greater_is_better: + desc: null + value: None +group_by_length: + desc: null + value: true +half_precision_backend: + desc: null + value: amp +hub_model_id: + desc: null + value: None +hub_strategy: + desc: null + value: every_save +hub_token: + desc: null + value: +id2label: + desc: null + value: + '0': LABEL_0 + '1': LABEL_1 +ignore_data_skip: + desc: null + value: false +is_decoder: + desc: null + value: false +is_encoder_decoder: + desc: null + value: true +label2id: + desc: null + value: + LABEL_0: 0 + LABEL_1: 1 +label_names: + desc: null + value: None +label_smoothing_factor: + desc: null + value: 0.0 +learning_rate: + desc: null + value: 0.0003 +length_column_name: + desc: null + value: input_length +length_penalty: + desc: null + value: 1.0 +load_best_model_at_end: + desc: null + value: false +local_rank: + desc: null + value: -1 +log_level: + desc: null + value: -1 +log_level_replica: + desc: null + value: -1 +log_on_each_node: + desc: null + value: true +logging_dir: + desc: null + value: ./runs/Mar03_00-41-32_sanchit--v100 +logging_first_step: + desc: null + value: false +logging_nan_inf_filter: + desc: null + value: true +logging_steps: + desc: null + value: 1 +logging_strategy: + desc: null + value: steps +lr_scheduler_type: + desc: null + value: linear +max_grad_norm: + desc: null + value: 1.0 +max_length: + desc: null + value: 50 +max_steps: + desc: null + value: -1 +metric_for_best_model: + desc: null + value: None +min_length: + desc: null + value: 0 +model_type: + desc: null + value: speech-encoder-decoder +mp_parameters: + desc: null + value: '' +no_cuda: + desc: null + value: false +no_repeat_ngram_size: + desc: null + value: 0 +num_beam_groups: + desc: null + value: 1 +num_beams: + desc: null + value: 1 +num_return_sequences: + desc: null + value: 1 +num_train_epochs: + desc: null + value: 1.0 +optim: + desc: null + value: adamw_hf +output_attentions: + desc: null + value: false +output_dir: + desc: null + value: ./ +output_hidden_states: + desc: null + value: false +output_scores: + desc: null + value: false +overwrite_output_dir: + desc: null + value: true +pad_token_id: + desc: null + value: 50256 +past_index: + desc: null + value: -1 +per_device_eval_batch_size: + desc: null + value: 16 +per_device_train_batch_size: + desc: null + value: 16 +per_gpu_eval_batch_size: + desc: null + value: None +per_gpu_train_batch_size: + desc: null + value: None +predict_with_generate: + desc: null + value: true +prediction_loss_only: + desc: null + value: false +prefix: + desc: null + value: null +problem_type: + desc: null + value: null +processor_class: + desc: null + value: Wav2Vec2Processor +pruned_heads: + desc: null + value: {} +push_to_hub: + desc: null + value: true +push_to_hub_model_id: + desc: null + value: None +push_to_hub_organization: + desc: null + value: None +push_to_hub_token: + desc: null + value: +remove_invalid_values: + desc: null + value: false +remove_unused_columns: + desc: null + value: true +repetition_penalty: + desc: null + value: 1.0 +report_to: + desc: null + value: '[''wandb'']' +resume_from_checkpoint: + desc: null + value: None +return_dict: + desc: null + value: true +return_dict_in_generate: + desc: null + value: false +run_name: + desc: null + value: ./ +save_on_each_node: + desc: null + value: false +save_steps: + desc: null + value: 500 +save_strategy: + desc: null + value: steps +save_total_limit: + desc: null + value: 1 +seed: + desc: null + value: 42 +sep_token_id: + desc: null + value: null +sharded_ddp: + desc: null + value: '[]' +skip_memory_metrics: + desc: null + value: true +sortish_sampler: + desc: null + value: false +task_specific_params: + desc: null + value: null +temperature: + desc: null + value: 1.0 +tf32: + desc: null + value: None +tie_encoder_decoder: + desc: null + value: false +tie_word_embeddings: + desc: null + value: false +tokenizer_class: + desc: null + value: null +top_k: + desc: null + value: 50 +top_p: + desc: null + value: 1.0 +torch_dtype: + desc: null + value: torch.float32 +torchscript: + desc: null + value: false +tpu_metrics_debug: + desc: null + value: false +tpu_num_cores: + desc: null + value: None +train_batch_size: + desc: null + value: 16 +transformers_version: + desc: null + value: null +use_bfloat16: + desc: null + value: false +use_cache: + desc: null + value: false +use_legacy_prediction_loop: + desc: null + value: false +warmup_ratio: + desc: null + value: 0.0 +warmup_steps: + desc: null + value: 500 +weight_decay: + desc: null + value: 0.0 +xpu_backend: + desc: null + value: None diff --git a/wandb/run-20220303_004215-2ttcbe3s/files/output.log b/wandb/run-20220303_004215-2ttcbe3s/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..03e5b7190a466daa0087ab5f5b707f03a871d42d --- /dev/null +++ b/wandb/run-20220303_004215-2ttcbe3s/files/output.log @@ -0,0 +1,37 @@ + + + 0%| | 0/446 [00:00> Could not estimate the number of tokens of the input, floating-point operations will not be computed +Traceback (most recent call last): + File "/home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/run_speech_recognition_seq2seq.py", line 539, in + main() + File "/home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/run_speech_recognition_seq2seq.py", line 491, in main + train_result = trainer.train(resume_from_checkpoint=checkpoint) + File "/home/sanchit_huggingface_co/transformers/src/transformers/trainer.py", line 1384, in train + tr_loss_step = self.training_step(model, inputs) + File "/home/sanchit_huggingface_co/transformers/src/transformers/trainer.py", line 1959, in training_step + loss = self.compute_loss(model, inputs) + File "/home/sanchit_huggingface_co/transformers/src/transformers/trainer.py", line 1991, in compute_loss + outputs = model(**inputs) + File "/home/sanchit_huggingface_co/gcp/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl + return forward_call(*input, **kwargs) + File "/home/sanchit_huggingface_co/transformers/src/transformers/models/speech_encoder_decoder/modeling_speech_encoder_decoder.py", line 503, in forward + encoder_outputs = self.encoder( + File "/home/sanchit_huggingface_co/gcp/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl + return forward_call(*input, **kwargs) + File "/home/sanchit_huggingface_co/transformers/src/transformers/models/wav2vec2/modeling_wav2vec2.py", line 1346, in forward + extract_features = self.feature_extractor(input_values) + File "/home/sanchit_huggingface_co/gcp/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl + return forward_call(*input, **kwargs) + File "/home/sanchit_huggingface_co/transformers/src/transformers/models/wav2vec2/modeling_wav2vec2.py", line 514, in forward + hidden_states = conv_layer(hidden_states) + File "/home/sanchit_huggingface_co/gcp/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl + return forward_call(*input, **kwargs) + File "/home/sanchit_huggingface_co/transformers/src/transformers/models/wav2vec2/modeling_wav2vec2.py", line 389, in forward + hidden_states = self.layer_norm(hidden_states) + File "/home/sanchit_huggingface_co/gcp/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl + return forward_call(*input, **kwargs) + File "/home/sanchit_huggingface_co/gcp/lib/python3.9/site-packages/torch/nn/modules/normalization.py", line 189, in forward + return F.layer_norm( + File "/home/sanchit_huggingface_co/gcp/lib/python3.9/site-packages/torch/nn/functional.py", line 2347, in layer_norm + return torch.layer_norm(input, normalized_shape, weight, bias, eps, torch.backends.cudnn.enabled) +RuntimeError: CUDA out of memory. Tried to allocate 1.64 GiB (GPU 0; 15.78 GiB total capacity; 10.11 GiB already allocated; 707.31 MiB free; 13.39 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF \ No newline at end of file diff --git a/wandb/run-20220303_004215-2ttcbe3s/files/requirements.txt b/wandb/run-20220303_004215-2ttcbe3s/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..3974f97a24952deb24d97850f53367da9e7c347d --- /dev/null +++ b/wandb/run-20220303_004215-2ttcbe3s/files/requirements.txt @@ -0,0 +1,184 @@ +absl-py==1.0.0 +aiohttp==3.8.1 +aiosignal==1.2.0 +anyio==3.5.0 +appdirs==1.4.4 +argon2-cffi-bindings==21.2.0 +argon2-cffi==21.3.0 +asttokens==2.0.5 +async-timeout==4.0.2 +attrs==21.4.0 +audioread==2.1.9 +babel==2.9.1 +backcall==0.2.0 +bitsandbytes-cuda113==0.26.0 +black==22.1.0 +bleach==4.1.0 +cachetools==5.0.0 +certifi==2021.10.8 +cffi==1.15.0 +charset-normalizer==2.0.11 +chex==0.1.0 +click==8.0.3 +clldutils==3.10.1 +colorlog==6.6.0 +csvw==1.11.0 +cycler==0.11.0 +datasets==1.18.3 +debugpy==1.5.1 +decorator==5.1.1 +defusedxml==0.7.1 +dill==0.3.4 +dlinfo==1.2.1 +dm-tree==0.1.6 +docker-pycreds==0.4.0 +entrypoints==0.4 +executing==0.8.2 +filelock==3.4.2 +flatbuffers==2.0 +flax==0.4.0 +fonttools==4.29.1 +frozenlist==1.3.0 +fsspec==2022.1.0 +gitdb==4.0.9 +gitpython==3.1.27 +google-auth-oauthlib==0.4.6 +google-auth==2.6.0 +grpcio==1.43.0 +huggingface-hub==0.4.0 +hypothesis==6.36.1 +idna==3.3 +importlib-metadata==4.10.1 +ipykernel==6.8.0 +ipython-genutils==0.2.0 +ipython==8.0.1 +ipywidgets==7.6.5 +isodate==0.6.1 +jax==0.2.28 +jaxlib==0.1.76+cuda11.cudnn82 +jedi==0.18.1 +jinja2==3.0.3 +jiwer==2.3.0 +joblib==1.1.0 +json5==0.9.6 +jsonschema==4.4.0 +jupyter-client==7.1.2 +jupyter-console==6.4.0 +jupyter-core==4.9.1 +jupyter-server==1.13.5 +jupyter==1.0.0 +jupyterlab-pygments==0.1.2 +jupyterlab-server==2.10.3 +jupyterlab-widgets==1.0.2 +jupyterlab==3.2.9 +kiwisolver==1.3.2 +librosa==0.8.1 +llvmlite==0.38.0 +markdown==3.3.6 +markupsafe==2.0.1 +matplotlib-inline==0.1.3 +matplotlib==3.5.1 +mistune==0.8.4 +msgpack==1.0.3 +multidict==6.0.2 +multiprocess==0.70.12.2 +mypy-extensions==0.4.3 +nbclassic==0.3.5 +nbclient==0.5.10 +nbconvert==6.4.1 +nbformat==5.1.3 +nest-asyncio==1.5.4 +notebook==6.4.8 +numba==0.55.1 +numpy==1.21.5 +oauthlib==3.2.0 +opt-einsum==3.3.0 +optax==0.1.0 +packaging==21.3 +pandas==1.4.0 +pandocfilters==1.5.0 +parso==0.8.3 +pathspec==0.9.0 +pathtools==0.1.2 +pexpect==4.8.0 +phonemizer==3.0.1 +pickleshare==0.7.5 +pillow==9.0.0 +pip==22.0.2 +pkg-resources==0.0.0 +platformdirs==2.4.1 +pooch==1.6.0 +prometheus-client==0.13.1 +promise==2.3 +prompt-toolkit==3.0.26 +protobuf==3.19.4 +psutil==5.9.0 +ptyprocess==0.7.0 +pure-eval==0.2.2 +pyarrow==6.0.1 +pyasn1-modules==0.2.8 +pyasn1==0.4.8 +pycparser==2.21 +pyctcdecode==0.3.0 +pygments==2.11.2 +pygtrie==2.4.2 +pyparsing==3.0.7 +pyrsistent==0.18.1 +python-dateutil==2.8.2 +python-levenshtein==0.12.2 +pytz==2021.3 +pyyaml==6.0 +pyzmq==22.3.0 +qtconsole==5.2.2 +qtpy==2.0.1 +regex==2022.1.18 +requests-oauthlib==1.3.1 +requests==2.27.1 +resampy==0.2.2 +rfc3986==2.0.0 +rsa==4.8 +sacremoses==0.0.47 +scikit-learn==1.0.2 +scipy==1.7.3 +segments==2.2.0 +send2trash==1.8.0 +sentry-sdk==1.5.6 +setuptools==44.1.1 +shortuuid==1.0.8 +six==1.16.0 +smmap==5.0.0 +sniffio==1.2.0 +sortedcontainers==2.4.0 +soundfile==0.10.3.post1 +stack-data==0.1.4 +tabulate==0.8.9 +tensorboard-data-server==0.6.1 +tensorboard-plugin-wit==1.8.1 +tensorboard==2.8.0 +termcolor==1.1.0 +terminado==0.13.1 +testpath==0.5.0 +threadpoolctl==3.1.0 +tokenizers==0.11.4 +tomli==2.0.0 +toolz==0.11.2 +torch==1.10.2+cu113 +torchaudio==0.10.2+cu113 +tornado==6.1 +tqdm==4.62.3 +traitlets==5.1.1 +transformers==4.17.0.dev0 +typing-extensions==3.10.0.2 +uritemplate==4.1.1 +urllib3==1.26.8 +wandb==0.12.10 +wcwidth==0.2.5 +webencodings==0.5.1 +websocket-client==1.2.3 +werkzeug==2.0.2 +wheel==0.37.1 +widgetsnbextension==3.5.2 +xxhash==2.0.2 +yarl==1.7.2 +yaspin==2.1.0 +zipp==3.7.0 \ No newline at end of file diff --git a/wandb/run-20220303_004215-2ttcbe3s/files/wandb-metadata.json b/wandb/run-20220303_004215-2ttcbe3s/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..40ae65157c7d8a1c8aced03478868ba2ba6ab4d0 --- /dev/null +++ b/wandb/run-20220303_004215-2ttcbe3s/files/wandb-metadata.json @@ -0,0 +1,60 @@ +{ + "os": "Linux-5.11.0-1028-gcp-x86_64-with-glibc2.33", + "python": "3.9.5", + "heartbeatAt": "2022-03-03T00:42:17.150413", + "startedAt": "2022-03-03T00:42:15.987965", + "docker": null, + "gpu": "Tesla V100-SXM2-16GB", + "gpu_count": 2, + "cpu_count": 16, + "cuda": null, + "args": [ + "--dataset_name=librispeech_asr", + "--model_name_or_path=./", + "--tokenizer_name=./", + "--dataset_config_name=clean", + "--train_split_name=train.100", + "--eval_split_name=validation", + "--output_dir=./", + "--preprocessing_num_workers=1", + "--length_column_name=input_length", + "--overwrite_output_dir", + "--num_train_epochs=1", + "--per_device_train_batch_size=16", + "--per_device_eval_batch_size=16", + "--gradient_accumulation_steps=4", + "--generation_max_length=40", + "--generation_num_beams=1", + "--learning_rate=3e-4", + "--warmup_steps=500", + "--evaluation_strategy=steps", + "--text_column_name=text", + "--save_steps=500", + "--eval_steps=500", + "--logging_steps=1", + "--save_total_limit=1", + "--freeze_feature_encoder", + "--gradient_checkpointing", + "--fp16", + "--group_by_length", + "--predict_with_generate", + "--do_lower_case", + "--do_train", + "--do_eval", + "--report_to=wandb", + "--push_to_hub", + "--use_auth_token" + ], + "state": "running", + "program": "/home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/run_speech_recognition_seq2seq.py", + "codePath": "run_speech_recognition_seq2seq.py", + "git": { + "remote": "https://huggingface.co/sanchit-gandhi/wav2vec2-gpt2-wandb-grid-search", + "commit": "8c7181143c175387040dc1a6ac2ddbc9179b550c" + }, + "email": "sanchit@huggingface.co", + "root": "/home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search", + "host": "sanchit--v100", + "username": "sanchit_huggingface_co", + "executable": "/home/sanchit_huggingface_co/gcp/bin/python" +} diff --git a/wandb/run-20220303_004215-2ttcbe3s/files/wandb-summary.json b/wandb/run-20220303_004215-2ttcbe3s/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..a594e25ada167c5ad54a828e8a5b1a6223620b44 --- /dev/null +++ b/wandb/run-20220303_004215-2ttcbe3s/files/wandb-summary.json @@ -0,0 +1 @@ +{"_wandb": {"runtime": 8}} \ No newline at end of file diff --git a/wandb/run-20220303_004215-2ttcbe3s/logs/debug-internal.log b/wandb/run-20220303_004215-2ttcbe3s/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..2681bd30246f3ce4dc7f706c14b4f60a2c3ecf5c --- /dev/null +++ b/wandb/run-20220303_004215-2ttcbe3s/logs/debug-internal.log @@ -0,0 +1,138 @@ +2022-03-03 00:42:16,953 INFO MainThread:267379 [internal.py:wandb_internal():89] W&B internal server running at pid: 267379, started at: 2022-03-03 00:42:16.952980 +2022-03-03 00:42:16,956 DEBUG HandlerThread:267379 [handler.py:handle_request():131] handle_request: check_version +2022-03-03 00:42:16,956 INFO WriterThread:267379 [datastore.py:open_for_write():77] open: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004215-2ttcbe3s/run-2ttcbe3s.wandb +2022-03-03 00:42:16,957 DEBUG SenderThread:267379 [sender.py:send():235] send: header +2022-03-03 00:42:16,957 DEBUG SenderThread:267379 [sender.py:send_request():249] send_request: check_version +2022-03-03 00:42:17,024 DEBUG SenderThread:267379 [sender.py:send():235] send: run +2022-03-03 00:42:17,144 INFO SenderThread:267379 [dir_watcher.py:__init__():169] watching files in: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004215-2ttcbe3s/files +2022-03-03 00:42:17,144 INFO SenderThread:267379 [sender.py:_start_run_threads():809] run started: 2ttcbe3s with start time 1646268136 +2022-03-03 00:42:17,145 DEBUG SenderThread:267379 [sender.py:send():235] send: summary +2022-03-03 00:42:17,145 INFO SenderThread:267379 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:42:17,145 DEBUG HandlerThread:267379 [handler.py:handle_request():131] handle_request: run_start +2022-03-03 00:42:17,150 DEBUG HandlerThread:267379 [meta.py:__init__():36] meta init +2022-03-03 00:42:17,150 DEBUG HandlerThread:267379 [meta.py:__init__():50] meta init done +2022-03-03 00:42:17,150 DEBUG HandlerThread:267379 [meta.py:probe():210] probe +2022-03-03 00:42:17,156 DEBUG HandlerThread:267379 [meta.py:_setup_git():200] setup git +2022-03-03 00:42:17,172 DEBUG HandlerThread:267379 [meta.py:_setup_git():207] setup git done +2022-03-03 00:42:17,172 DEBUG HandlerThread:267379 [meta.py:_save_pip():54] save pip +2022-03-03 00:42:17,173 DEBUG HandlerThread:267379 [meta.py:_save_pip():68] save pip done +2022-03-03 00:42:17,173 DEBUG HandlerThread:267379 [meta.py:probe():248] probe done +2022-03-03 00:42:17,254 DEBUG SenderThread:267379 [sender.py:send():235] send: files +2022-03-03 00:42:17,254 INFO SenderThread:267379 [sender.py:_save_file():944] saving file wandb-metadata.json with policy now +2022-03-03 00:42:17,259 DEBUG HandlerThread:267379 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:42:17,259 DEBUG SenderThread:267379 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:42:17,295 DEBUG SenderThread:267379 [sender.py:send():235] send: config +2022-03-03 00:42:17,296 DEBUG SenderThread:267379 [sender.py:send():235] send: metric +2022-03-03 00:42:17,296 DEBUG SenderThread:267379 [sender.py:send():235] send: metric +2022-03-03 00:42:17,296 WARNING SenderThread:267379 [sender.py:send_metric():902] Seen metric with glob (shouldnt happen) +2022-03-03 00:42:17,551 INFO Thread-11 :267379 [upload_job.py:push():137] Uploaded file /tmp/tmpamllbtqswandb/2k16no24-wandb-metadata.json +2022-03-03 00:42:18,148 INFO Thread-8 :267379 [dir_watcher.py:_on_file_created():217] file/dir created: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004215-2ttcbe3s/files/wandb-metadata.json +2022-03-03 00:42:18,148 INFO Thread-8 :267379 [dir_watcher.py:_on_file_created():217] file/dir created: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004215-2ttcbe3s/files/wandb-summary.json +2022-03-03 00:42:18,148 INFO Thread-8 :267379 [dir_watcher.py:_on_file_created():217] file/dir created: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004215-2ttcbe3s/files/requirements.txt +2022-03-03 00:42:18,148 INFO Thread-8 :267379 [dir_watcher.py:_on_file_created():217] file/dir created: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004215-2ttcbe3s/files/output.log +2022-03-03 00:42:20,146 INFO Thread-8 :267379 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004215-2ttcbe3s/files/output.log +2022-03-03 00:42:24,147 INFO Thread-8 :267379 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004215-2ttcbe3s/files/output.log +2022-03-03 00:42:26,115 DEBUG HandlerThread:267379 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-03 00:42:26,115 DEBUG SenderThread:267379 [sender.py:send():235] send: telemetry +2022-03-03 00:42:26,115 DEBUG SenderThread:267379 [sender.py:send():235] send: exit +2022-03-03 00:42:26,116 INFO SenderThread:267379 [sender.py:send_exit():371] handling exit code: 1 +2022-03-03 00:42:26,116 INFO SenderThread:267379 [sender.py:send_exit():373] handling runtime: 8 +2022-03-03 00:42:26,116 INFO SenderThread:267379 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:42:26,116 INFO SenderThread:267379 [sender.py:send_exit():379] send defer +2022-03-03 00:42:26,116 DEBUG SenderThread:267379 [sender.py:send_request():249] send_request: poll_exit +2022-03-03 00:42:26,117 DEBUG HandlerThread:267379 [handler.py:handle_request():131] handle_request: defer +2022-03-03 00:42:26,117 INFO HandlerThread:267379 [handler.py:handle_request_defer():154] handle defer: 0 +2022-03-03 00:42:26,117 DEBUG SenderThread:267379 [sender.py:send_request():249] send_request: defer +2022-03-03 00:42:26,117 INFO SenderThread:267379 [sender.py:send_request_defer():388] handle sender defer: 0 +2022-03-03 00:42:26,117 INFO SenderThread:267379 [sender.py:transition_state():392] send defer: 1 +2022-03-03 00:42:26,118 DEBUG HandlerThread:267379 [handler.py:handle_request():131] handle_request: defer +2022-03-03 00:42:26,118 INFO HandlerThread:267379 [handler.py:handle_request_defer():154] handle defer: 1 +2022-03-03 00:42:26,148 INFO Thread-8 :267379 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004215-2ttcbe3s/files/wandb-summary.json +2022-03-03 00:42:26,148 INFO Thread-8 :267379 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004215-2ttcbe3s/files/output.log +2022-03-03 00:42:26,201 DEBUG SenderThread:267379 [sender.py:send_request():249] send_request: defer +2022-03-03 00:42:26,201 INFO SenderThread:267379 [sender.py:send_request_defer():388] handle sender defer: 1 +2022-03-03 00:42:26,201 INFO SenderThread:267379 [sender.py:transition_state():392] send defer: 2 +2022-03-03 00:42:26,202 DEBUG SenderThread:267379 [sender.py:send():235] send: stats +2022-03-03 00:42:26,202 DEBUG HandlerThread:267379 [handler.py:handle_request():131] handle_request: defer +2022-03-03 00:42:26,203 INFO HandlerThread:267379 [handler.py:handle_request_defer():154] handle defer: 2 +2022-03-03 00:42:26,203 DEBUG SenderThread:267379 [sender.py:send_request():249] send_request: defer +2022-03-03 00:42:26,203 INFO SenderThread:267379 [sender.py:send_request_defer():388] handle sender defer: 2 +2022-03-03 00:42:26,203 INFO SenderThread:267379 [sender.py:transition_state():392] send defer: 3 +2022-03-03 00:42:26,203 DEBUG HandlerThread:267379 [handler.py:handle_request():131] handle_request: defer +2022-03-03 00:42:26,203 INFO HandlerThread:267379 [handler.py:handle_request_defer():154] handle defer: 3 +2022-03-03 00:42:26,203 DEBUG SenderThread:267379 [sender.py:send():235] send: summary +2022-03-03 00:42:26,204 INFO SenderThread:267379 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:42:26,204 DEBUG SenderThread:267379 [sender.py:send_request():249] send_request: defer +2022-03-03 00:42:26,204 INFO SenderThread:267379 [sender.py:send_request_defer():388] handle sender defer: 3 +2022-03-03 00:42:26,204 INFO SenderThread:267379 [sender.py:transition_state():392] send defer: 4 +2022-03-03 00:42:26,204 DEBUG HandlerThread:267379 [handler.py:handle_request():131] handle_request: defer +2022-03-03 00:42:26,205 INFO HandlerThread:267379 [handler.py:handle_request_defer():154] handle defer: 4 +2022-03-03 00:42:26,205 DEBUG SenderThread:267379 [sender.py:send_request():249] send_request: defer +2022-03-03 00:42:26,205 INFO SenderThread:267379 [sender.py:send_request_defer():388] handle sender defer: 4 +2022-03-03 00:42:26,218 DEBUG HandlerThread:267379 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-03 00:42:26,313 INFO SenderThread:267379 [sender.py:transition_state():392] send defer: 5 +2022-03-03 00:42:26,313 DEBUG SenderThread:267379 [sender.py:send_request():249] send_request: poll_exit +2022-03-03 00:42:26,314 DEBUG HandlerThread:267379 [handler.py:handle_request():131] handle_request: defer +2022-03-03 00:42:26,314 INFO HandlerThread:267379 [handler.py:handle_request_defer():154] handle defer: 5 +2022-03-03 00:42:26,314 DEBUG SenderThread:267379 [sender.py:send_request():249] send_request: defer +2022-03-03 00:42:26,315 INFO SenderThread:267379 [sender.py:send_request_defer():388] handle sender defer: 5 +2022-03-03 00:42:26,315 INFO SenderThread:267379 [dir_watcher.py:finish():283] shutting down directory watcher +2022-03-03 00:42:26,415 DEBUG HandlerThread:267379 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-03 00:42:27,148 INFO Thread-8 :267379 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004215-2ttcbe3s/files/wandb-summary.json +2022-03-03 00:42:27,149 INFO SenderThread:267379 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004215-2ttcbe3s/files/config.yaml +2022-03-03 00:42:27,149 INFO SenderThread:267379 [dir_watcher.py:finish():313] scan: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004215-2ttcbe3s/files +2022-03-03 00:42:27,150 INFO SenderThread:267379 [dir_watcher.py:finish():327] scan save: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004215-2ttcbe3s/files/wandb-metadata.json wandb-metadata.json +2022-03-03 00:42:27,150 INFO SenderThread:267379 [dir_watcher.py:finish():327] scan save: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004215-2ttcbe3s/files/output.log output.log +2022-03-03 00:42:27,150 INFO SenderThread:267379 [dir_watcher.py:finish():327] scan save: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004215-2ttcbe3s/files/wandb-summary.json wandb-summary.json +2022-03-03 00:42:27,152 INFO SenderThread:267379 [dir_watcher.py:finish():327] scan save: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004215-2ttcbe3s/files/requirements.txt requirements.txt +2022-03-03 00:42:27,156 INFO SenderThread:267379 [dir_watcher.py:finish():327] scan save: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004215-2ttcbe3s/files/config.yaml config.yaml +2022-03-03 00:42:27,156 INFO SenderThread:267379 [sender.py:transition_state():392] send defer: 6 +2022-03-03 00:42:27,156 DEBUG SenderThread:267379 [sender.py:send_request():249] send_request: poll_exit +2022-03-03 00:42:27,157 DEBUG HandlerThread:267379 [handler.py:handle_request():131] handle_request: defer +2022-03-03 00:42:27,157 INFO HandlerThread:267379 [handler.py:handle_request_defer():154] handle defer: 6 +2022-03-03 00:42:27,160 DEBUG SenderThread:267379 [sender.py:send_request():249] send_request: defer +2022-03-03 00:42:27,160 INFO SenderThread:267379 [sender.py:send_request_defer():388] handle sender defer: 6 +2022-03-03 00:42:27,160 INFO SenderThread:267379 [file_pusher.py:finish():177] shutting down file pusher +2022-03-03 00:42:27,258 DEBUG HandlerThread:267379 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-03 00:42:27,258 DEBUG SenderThread:267379 [sender.py:send_request():249] send_request: poll_exit +2022-03-03 00:42:27,360 DEBUG HandlerThread:267379 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-03 00:42:27,360 DEBUG SenderThread:267379 [sender.py:send_request():249] send_request: poll_exit +2022-03-03 00:42:27,434 INFO Thread-12 :267379 [upload_job.py:push():137] Uploaded file /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004215-2ttcbe3s/files/output.log +2022-03-03 00:42:27,457 INFO Thread-13 :267379 [upload_job.py:push():137] Uploaded file /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004215-2ttcbe3s/files/wandb-summary.json +2022-03-03 00:42:27,460 INFO Thread-15 :267379 [upload_job.py:push():137] Uploaded file /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004215-2ttcbe3s/files/config.yaml +2022-03-03 00:42:27,462 INFO Thread-14 :267379 [upload_job.py:push():137] Uploaded file /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004215-2ttcbe3s/files/requirements.txt +2022-03-03 00:42:27,462 DEBUG HandlerThread:267379 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-03 00:42:27,463 DEBUG SenderThread:267379 [sender.py:send_request():249] send_request: poll_exit +2022-03-03 00:42:27,565 DEBUG HandlerThread:267379 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-03 00:42:27,565 DEBUG SenderThread:267379 [sender.py:send_request():249] send_request: poll_exit +2022-03-03 00:42:27,663 INFO Thread-7 :267379 [sender.py:transition_state():392] send defer: 7 +2022-03-03 00:42:27,664 DEBUG HandlerThread:267379 [handler.py:handle_request():131] handle_request: defer +2022-03-03 00:42:27,664 INFO HandlerThread:267379 [handler.py:handle_request_defer():154] handle defer: 7 +2022-03-03 00:42:27,664 DEBUG SenderThread:267379 [sender.py:send_request():249] send_request: defer +2022-03-03 00:42:27,664 INFO SenderThread:267379 [sender.py:send_request_defer():388] handle sender defer: 7 +2022-03-03 00:42:27,667 DEBUG HandlerThread:267379 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-03 00:42:27,834 INFO SenderThread:267379 [sender.py:transition_state():392] send defer: 8 +2022-03-03 00:42:27,834 DEBUG SenderThread:267379 [sender.py:send_request():249] send_request: poll_exit +2022-03-03 00:42:27,835 DEBUG HandlerThread:267379 [handler.py:handle_request():131] handle_request: defer +2022-03-03 00:42:27,835 INFO HandlerThread:267379 [handler.py:handle_request_defer():154] handle defer: 8 +2022-03-03 00:42:27,835 DEBUG SenderThread:267379 [sender.py:send_request():249] send_request: defer +2022-03-03 00:42:27,835 INFO SenderThread:267379 [sender.py:send_request_defer():388] handle sender defer: 8 +2022-03-03 00:42:27,835 INFO SenderThread:267379 [sender.py:transition_state():392] send defer: 9 +2022-03-03 00:42:27,836 DEBUG HandlerThread:267379 [handler.py:handle_request():131] handle_request: defer +2022-03-03 00:42:27,836 INFO HandlerThread:267379 [handler.py:handle_request_defer():154] handle defer: 9 +2022-03-03 00:42:27,836 DEBUG SenderThread:267379 [sender.py:send():235] send: final +2022-03-03 00:42:27,836 DEBUG SenderThread:267379 [sender.py:send():235] send: footer +2022-03-03 00:42:27,836 DEBUG SenderThread:267379 [sender.py:send_request():249] send_request: defer +2022-03-03 00:42:27,836 INFO SenderThread:267379 [sender.py:send_request_defer():388] handle sender defer: 9 +2022-03-03 00:42:27,936 DEBUG HandlerThread:267379 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-03 00:42:27,937 DEBUG SenderThread:267379 [sender.py:send_request():249] send_request: poll_exit +2022-03-03 00:42:27,937 INFO SenderThread:267379 [file_pusher.py:join():182] waiting for file pusher +2022-03-03 00:42:28,000 DEBUG HandlerThread:267379 [handler.py:handle_request():131] handle_request: get_summary +2022-03-03 00:42:28,001 DEBUG HandlerThread:267379 [handler.py:handle_request():131] handle_request: sampled_history +2022-03-03 00:42:28,002 DEBUG HandlerThread:267379 [handler.py:handle_request():131] handle_request: shutdown +2022-03-03 00:42:28,002 INFO HandlerThread:267379 [handler.py:finish():739] shutting down handler +2022-03-03 00:42:28,836 INFO WriterThread:267379 [datastore.py:close():281] close: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004215-2ttcbe3s/run-2ttcbe3s.wandb +2022-03-03 00:42:28,999 INFO SenderThread:267379 [sender.py:finish():1075] shutting down sender +2022-03-03 00:42:28,999 INFO SenderThread:267379 [file_pusher.py:finish():177] shutting down file pusher +2022-03-03 00:42:28,999 INFO SenderThread:267379 [file_pusher.py:join():182] waiting for file pusher +2022-03-03 00:42:29,001 INFO MainThread:267379 [internal.py:handle_exit():79] Internal process exited diff --git a/wandb/run-20220303_004215-2ttcbe3s/logs/debug.log b/wandb/run-20220303_004215-2ttcbe3s/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..0b7d81d23c3961ba7ccb530eed16213000d5dc23 --- /dev/null +++ b/wandb/run-20220303_004215-2ttcbe3s/logs/debug.log @@ -0,0 +1,107 @@ +2022-03-03 00:42:15,989 INFO MainThread:267272 [wandb_setup.py:_flush():75] Loading settings from /home/sanchit_huggingface_co/.config/wandb/settings +2022-03-03 00:42:15,990 INFO MainThread:267272 [wandb_setup.py:_flush():75] Loading settings from /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/settings +2022-03-03 00:42:15,990 INFO MainThread:267272 [wandb_setup.py:_flush():75] Loading settings from environment variables: {} +2022-03-03 00:42:15,990 INFO MainThread:267272 [wandb_setup.py:_flush():75] Inferring run settings from compute environment: {'program_relpath': 'run_speech_recognition_seq2seq.py', 'program': '/home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/run_speech_recognition_seq2seq.py'} +2022-03-03 00:42:15,990 INFO MainThread:267272 [wandb_init.py:_log_setup():386] Logging user logs to /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004215-2ttcbe3s/logs/debug.log +2022-03-03 00:42:15,990 INFO MainThread:267272 [wandb_init.py:_log_setup():387] Logging internal logs to /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004215-2ttcbe3s/logs/debug-internal.log +2022-03-03 00:42:15,990 INFO MainThread:267272 [wandb_init.py:init():420] calling init triggers +2022-03-03 00:42:15,990 INFO MainThread:267272 [wandb_init.py:init():425] wandb.init called with sweep_config: {} +config: {} +2022-03-03 00:42:15,990 INFO MainThread:267272 [wandb_init.py:init():471] starting backend +2022-03-03 00:42:15,990 INFO MainThread:267272 [backend.py:_multiprocessing_setup():99] multiprocessing start_methods=fork,spawn,forkserver, using: spawn +2022-03-03 00:42:16,048 INFO MainThread:267272 [backend.py:ensure_launched():219] starting backend process... +2022-03-03 00:42:16,104 INFO MainThread:267272 [backend.py:ensure_launched():224] started backend process with pid: 267379 +2022-03-03 00:42:16,106 INFO MainThread:267272 [wandb_init.py:init():480] backend started and connected +2022-03-03 00:42:16,116 INFO MainThread:267272 [wandb_init.py:init():550] updated telemetry +2022-03-03 00:42:16,256 INFO MainThread:267272 [wandb_init.py:init():581] communicating current version +2022-03-03 00:42:17,023 INFO MainThread:267272 [wandb_init.py:init():586] got version response upgrade_message: "wandb version 0.12.11 is available! To upgrade, please run:\n $ pip install wandb --upgrade" + +2022-03-03 00:42:17,023 INFO MainThread:267272 [wandb_init.py:init():596] communicating run to backend with 30 second timeout +2022-03-03 00:42:17,145 INFO MainThread:267272 [wandb_init.py:init():624] starting run threads in backend +2022-03-03 00:42:17,258 INFO MainThread:267272 [wandb_run.py:_console_start():1827] atexit reg +2022-03-03 00:42:17,258 INFO MainThread:267272 [wandb_run.py:_redirect():1701] redirect: SettingsConsole.REDIRECT +2022-03-03 00:42:17,259 INFO MainThread:267272 [wandb_run.py:_redirect():1706] Redirecting console. +2022-03-03 00:42:17,261 INFO MainThread:267272 [wandb_run.py:_redirect():1762] Redirects installed. +2022-03-03 00:42:17,261 INFO MainThread:267272 [wandb_init.py:init():651] run started, returning control to user process +2022-03-03 00:42:17,263 INFO MainThread:267272 [wandb_run.py:_config_callback():966] config_cb None None {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'torch.float32', 'use_bfloat16': False, 'pruned_heads': {}, 'tie_word_embeddings': False, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 50, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'chunk_size_feed_forward': 0, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'architectures': ['SpeechEncoderDecoderModel'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': None, 'pad_token_id': 50256, 'eos_token_id': 50256, 'sep_token_id': None, 'decoder_start_token_id': 50256, 'task_specific_params': None, 'problem_type': None, '_name_or_path': './', 'transformers_version': None, 'decoder': {'vocab_size': 50257, 'n_positions': 1024, 'n_embd': 1024, 'n_layer': 24, 'n_head': 16, 'n_inner': None, 'activation_function': 'gelu_new', 'resid_pdrop': 0.0, 'embd_pdrop': 0.0, 'attn_pdrop': 0.0, 'layer_norm_epsilon': 1e-05, 'initializer_range': 0.02, 'summary_type': 'cls_index', 'summary_use_proj': True, 'summary_activation': None, 'summary_first_dropout': 0.0, 'summary_proj_to_labels': True, 'scale_attn_weights': True, 'use_cache': False, 'scale_attn_by_inverse_layer_idx': False, 'reorder_and_upcast_attn': False, 'bos_token_id': 50256, 'eos_token_id': 50256, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': None, 'use_bfloat16': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'is_encoder_decoder': False, 'is_decoder': True, 'cross_attention_hidden_size': None, 'add_cross_attention': True, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'chunk_size_feed_forward': 0, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'architectures': ['GPT2LMHeadModel'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'pad_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': {'text-generation': {'do_sample': True, 'max_length': 50}}, 'problem_type': None, '_name_or_path': 'gpt2-medium', 'transformers_version': '4.17.0.dev0', 'n_ctx': 1024, 'n_special': 0, 'predict_special_tokens': True, 'model_type': 'gpt2'}, 'encoder': {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': None, 'use_bfloat16': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'chunk_size_feed_forward': 0, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'architectures': ['Wav2Vec2ForPreTraining'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 1, 'pad_token_id': 0, 'eos_token_id': 2, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'facebook/wav2vec2-large-lv60', 'transformers_version': '4.17.0.dev0', 'feat_extract_dropout': 0.0, 'gradient_checkpointing': False, 'hidden_dropout_prob': 0.0, 'num_feat_extract_layers': 7, 'hidden_size': 1024, 'feat_extract_norm': 'layer', 'feat_extract_activation': 'gelu', 'conv_dim': [512, 512, 512, 512, 512, 512, 512], 'conv_stride': [5, 2, 2, 2, 2, 2, 2], 'conv_kernel': [10, 3, 3, 3, 3, 2, 2], 'conv_bias': True, 'num_conv_pos_embeddings': 128, 'num_conv_pos_embedding_groups': 16, 'num_hidden_layers': 24, 'intermediate_size': 4096, 'hidden_act': 'gelu', 'num_attention_heads': 16, 'hidden_dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.0, 'feat_proj_dropout': 0.0, 'final_dropout': 0.0, 'layerdrop': 0.0, 'layer_norm_eps': 1e-05, 'initializer_range': 0.02, 'vocab_size': 32, 'do_stable_layer_norm': True, 'use_weighted_layer_sum': False, 'apply_spec_augment': False, 'mask_time_prob': 0.0, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'num_codevectors_per_group': 320, 'num_codevector_groups': 2, 'contrastive_logits_temperature': 0.1, 'feat_quantizer_dropout': 0.0, 'num_negatives': 100, 'codevector_dim': 768, 'proj_codevector_dim': 768, 'diversity_loss_weight': 0.1, 'ctc_loss_reduction': 'sum', 'ctc_zero_infinity': False, 'add_adapter': True, 'adapter_kernel_size': 3, 'adapter_stride': 2, 'num_adapter_layers': 3, 'output_hidden_size': 1024, 'classifier_proj_size': 256, 'tdnn_dim': [512, 512, 512, 512, 1500], 'tdnn_kernel': [5, 3, 3, 1, 1], 'tdnn_dilation': [1, 2, 3, 1, 1], 'xvector_output_dim': 512, 'model_type': 'wav2vec2'}, 'model_type': 'speech-encoder-decoder', 'processor_class': 'Wav2Vec2Processor', 'use_cache': False, 'output_dir': './', 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'evaluation_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 16, 'per_device_eval_batch_size': 16, 'per_gpu_train_batch_size': 'None', 'per_gpu_eval_batch_size': 'None', 'gradient_accumulation_steps': 4, 'eval_accumulation_steps': 'None', 'learning_rate': 0.0003, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 1.0, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'warmup_ratio': 0.0, 'warmup_steps': 500, 'log_level': -1, 'log_level_replica': -1, 'log_on_each_node': True, 'logging_dir': './runs/Mar03_00-41-32_sanchit--v100', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 1, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 500, 'save_total_limit': 1, 'save_on_each_node': False, 'no_cuda': False, 'seed': 42, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'amp', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': 'None', 'local_rank': -1, 'xpu_backend': 'None', 'tpu_num_cores': 'None', 'tpu_metrics_debug': False, 'debug': '[]', 'dataloader_drop_last': False, 'eval_steps': 500, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': 'None', 'load_best_model_at_end': False, 'metric_for_best_model': 'None', 'greater_is_better': 'None', 'ignore_data_skip': False, 'sharded_ddp': '[]', 'deepspeed': 'None', 'label_smoothing_factor': 0.0, 'optim': 'adamw_hf', 'adafactor': False, 'group_by_length': True, 'length_column_name': 'input_length', 'report_to': "['wandb']", 'ddp_find_unused_parameters': 'None', 'ddp_bucket_cap_mb': 'None', 'dataloader_pin_memory': True, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': 'None', 'hub_model_id': 'None', 'hub_strategy': 'every_save', 'hub_token': '', 'gradient_checkpointing': True, 'fp16_backend': 'auto', 'push_to_hub_model_id': 'None', 'push_to_hub_organization': 'None', 'push_to_hub_token': '', '_n_gpu': 1, 'mp_parameters': '', 'sortish_sampler': False, 'predict_with_generate': True, 'generation_max_length': 40, 'generation_num_beams': 1, 'train_batch_size': 16, 'eval_batch_size': 16} +2022-03-03 00:42:17,267 INFO MainThread:267272 [wandb_watch.py:watch():43] Watching +2022-03-03 00:42:23,675 INFO MainThread:267272 [wandb_run.py:_atexit_cleanup():1797] got exitcode: 1 +2022-03-03 00:42:23,678 INFO MainThread:267272 [wandb_run.py:_restore():1769] restore +2022-03-03 00:42:26,117 INFO MainThread:267272 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 1 +} +pusher_stats { + uploaded_bytes: 2095 + total_bytes: 2095 +} + +2022-03-03 00:42:26,314 INFO MainThread:267272 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 1 +} +pusher_stats { + uploaded_bytes: 2095 + total_bytes: 2095 +} + +2022-03-03 00:42:27,157 INFO MainThread:267272 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 3 +} +pusher_stats { + uploaded_bytes: 2095 + total_bytes: 5481 +} + +2022-03-03 00:42:27,259 INFO MainThread:267272 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 21066 + total_bytes: 21066 +} + +2022-03-03 00:42:27,361 INFO MainThread:267272 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 21066 + total_bytes: 21066 +} + +2022-03-03 00:42:27,464 INFO MainThread:267272 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 21066 + total_bytes: 21066 +} + +2022-03-03 00:42:27,566 INFO MainThread:267272 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 21066 + total_bytes: 21066 +} + +2022-03-03 00:42:27,835 INFO MainThread:267272 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 21066 + total_bytes: 21066 +} + +2022-03-03 00:42:27,999 INFO MainThread:267272 [wandb_run.py:_wait_for_finish():1929] got exit ret: done: true +exit_result { +} +file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 21066 + total_bytes: 21066 +} +local_info { +} + +2022-03-03 00:42:29,155 INFO MainThread:267272 [wandb_run.py:_append_files():2194] logging synced files diff --git a/wandb/run-20220303_004215-2ttcbe3s/run-2ttcbe3s.wandb b/wandb/run-20220303_004215-2ttcbe3s/run-2ttcbe3s.wandb new file mode 100644 index 0000000000000000000000000000000000000000..d304f8541798723fc991b7a6b7658c541d8312f4 Binary files /dev/null and b/wandb/run-20220303_004215-2ttcbe3s/run-2ttcbe3s.wandb differ diff --git a/wandb/run-20220303_004347-13qlgnoe/files/config.yaml b/wandb/run-20220303_004347-13qlgnoe/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..65490574304db90f27cc97fe881f7a6d5bd637e5 --- /dev/null +++ b/wandb/run-20220303_004347-13qlgnoe/files/config.yaml @@ -0,0 +1,713 @@ +wandb_version: 1 + +_n_gpu: + desc: null + value: 1 +_name_or_path: + desc: null + value: ./ +_wandb: + desc: null + value: + cli_version: 0.12.10 + framework: huggingface + huggingface_version: 4.17.0.dev0 + is_jupyter_run: false + is_kaggle_kernel: false + m: + - 1: train/global_step + 6: + - 3 + python_version: 3.9.5 + start_time: 1646268227 + t: + 1: + - 1 + - 5 + - 11 + 2: + - 1 + - 5 + - 11 + 3: + - 1 + - 7 + - 13 + 4: 3.9.5 + 5: 0.12.10 + 6: 4.17.0.dev0 + 8: + - 5 +adafactor: + desc: null + value: false +adam_beta1: + desc: null + value: 0.9 +adam_beta2: + desc: null + value: 0.999 +adam_epsilon: + desc: null + value: 1.0e-08 +add_cross_attention: + desc: null + value: false +architectures: + desc: null + value: + - SpeechEncoderDecoderModel +bad_words_ids: + desc: null + value: null +bf16: + desc: null + value: false +bf16_full_eval: + desc: null + value: false +bos_token_id: + desc: null + value: null +chunk_size_feed_forward: + desc: null + value: 0 +cross_attention_hidden_size: + desc: null + value: null +dataloader_drop_last: + desc: null + value: false +dataloader_num_workers: + desc: null + value: 0 +dataloader_pin_memory: + desc: null + value: true +ddp_bucket_cap_mb: + desc: null + value: None +ddp_find_unused_parameters: + desc: null + value: None +debug: + desc: null + value: '[]' +decoder: + desc: null + value: + _name_or_path: gpt2-medium + activation_function: gelu_new + add_cross_attention: true + architectures: + - GPT2LMHeadModel + attn_pdrop: 0.0 + bad_words_ids: null + bos_token_id: 50256 + chunk_size_feed_forward: 0 + cross_attention_hidden_size: null + decoder_start_token_id: null + diversity_penalty: 0.0 + do_sample: false + early_stopping: false + embd_pdrop: 0.0 + encoder_no_repeat_ngram_size: 0 + eos_token_id: 50256 + finetuning_task: null + forced_bos_token_id: null + forced_eos_token_id: null + id2label: + '0': LABEL_0 + '1': LABEL_1 + initializer_range: 0.02 + is_decoder: true + is_encoder_decoder: false + label2id: + LABEL_0: 0 + LABEL_1: 1 + layer_norm_epsilon: 1.0e-05 + length_penalty: 1.0 + max_length: 20 + min_length: 0 + model_type: gpt2 + n_ctx: 1024 + n_embd: 1024 + n_head: 16 + n_inner: null + n_layer: 24 + n_positions: 1024 + n_special: 0 + no_repeat_ngram_size: 0 + num_beam_groups: 1 + num_beams: 1 + num_return_sequences: 1 + output_attentions: false + output_hidden_states: false + output_scores: false + pad_token_id: null + predict_special_tokens: true + prefix: null + problem_type: null + pruned_heads: {} + remove_invalid_values: false + reorder_and_upcast_attn: false + repetition_penalty: 1.0 + resid_pdrop: 0.0 + return_dict: true + return_dict_in_generate: false + scale_attn_by_inverse_layer_idx: false + scale_attn_weights: true + sep_token_id: null + summary_activation: null + summary_first_dropout: 0.0 + summary_proj_to_labels: true + summary_type: cls_index + summary_use_proj: true + task_specific_params: + text-generation: + do_sample: true + max_length: 50 + temperature: 1.0 + tie_encoder_decoder: false + tie_word_embeddings: true + tokenizer_class: null + top_k: 50 + top_p: 1.0 + torch_dtype: null + torchscript: false + transformers_version: 4.17.0.dev0 + use_bfloat16: false + use_cache: false + vocab_size: 50257 +decoder_start_token_id: + desc: null + value: 50256 +deepspeed: + desc: null + value: None +disable_tqdm: + desc: null + value: false +diversity_penalty: + desc: null + value: 0.0 +do_eval: + desc: null + value: true +do_predict: + desc: null + value: false +do_sample: + desc: null + value: false +do_train: + desc: null + value: true +early_stopping: + desc: null + value: false +encoder: + desc: null + value: + _name_or_path: facebook/wav2vec2-large-lv60 + activation_dropout: 0.0 + adapter_kernel_size: 3 + adapter_stride: 2 + add_adapter: true + add_cross_attention: false + apply_spec_augment: false + architectures: + - Wav2Vec2ForPreTraining + attention_dropout: 0.0 + bad_words_ids: null + bos_token_id: 1 + chunk_size_feed_forward: 0 + classifier_proj_size: 256 + codevector_dim: 768 + contrastive_logits_temperature: 0.1 + conv_bias: true + conv_dim: + - 512 + - 512 + - 512 + - 512 + - 512 + - 512 + - 512 + conv_kernel: + - 10 + - 3 + - 3 + - 3 + - 3 + - 2 + - 2 + conv_stride: + - 5 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + cross_attention_hidden_size: null + ctc_loss_reduction: sum + ctc_zero_infinity: false + decoder_start_token_id: null + diversity_loss_weight: 0.1 + diversity_penalty: 0.0 + do_sample: false + do_stable_layer_norm: true + early_stopping: false + encoder_no_repeat_ngram_size: 0 + eos_token_id: 2 + feat_extract_activation: gelu + feat_extract_dropout: 0.0 + feat_extract_norm: layer + feat_proj_dropout: 0.0 + feat_quantizer_dropout: 0.0 + final_dropout: 0.0 + finetuning_task: null + forced_bos_token_id: null + forced_eos_token_id: null + gradient_checkpointing: false + hidden_act: gelu + hidden_dropout: 0.0 + hidden_dropout_prob: 0.0 + hidden_size: 1024 + id2label: + '0': LABEL_0 + '1': LABEL_1 + initializer_range: 0.02 + intermediate_size: 4096 + is_decoder: false + is_encoder_decoder: false + label2id: + LABEL_0: 0 + LABEL_1: 1 + layer_norm_eps: 1.0e-05 + layerdrop: 0.0 + length_penalty: 1.0 + mask_feature_length: 10 + mask_feature_min_masks: 0 + mask_feature_prob: 0.0 + mask_time_length: 10 + mask_time_min_masks: 2 + mask_time_prob: 0.0 + max_length: 20 + min_length: 0 + model_type: wav2vec2 + no_repeat_ngram_size: 0 + num_adapter_layers: 3 + num_attention_heads: 16 + num_beam_groups: 1 + num_beams: 1 + num_codevector_groups: 2 + num_codevectors_per_group: 320 + num_conv_pos_embedding_groups: 16 + num_conv_pos_embeddings: 128 + num_feat_extract_layers: 7 + num_hidden_layers: 24 + num_negatives: 100 + num_return_sequences: 1 + output_attentions: false + output_hidden_size: 1024 + output_hidden_states: false + output_scores: false + pad_token_id: 0 + prefix: null + problem_type: null + proj_codevector_dim: 768 + pruned_heads: {} + remove_invalid_values: false + repetition_penalty: 1.0 + return_dict: true + return_dict_in_generate: false + sep_token_id: null + task_specific_params: null + tdnn_dilation: + - 1 + - 2 + - 3 + - 1 + - 1 + tdnn_dim: + - 512 + - 512 + - 512 + - 512 + - 1500 + tdnn_kernel: + - 5 + - 3 + - 3 + - 1 + - 1 + temperature: 1.0 + tie_encoder_decoder: false + tie_word_embeddings: true + tokenizer_class: null + top_k: 50 + top_p: 1.0 + torch_dtype: null + torchscript: false + transformers_version: 4.17.0.dev0 + use_bfloat16: false + use_weighted_layer_sum: false + vocab_size: 32 + xvector_output_dim: 512 +encoder_no_repeat_ngram_size: + desc: null + value: 0 +eos_token_id: + desc: null + value: 50256 +eval_accumulation_steps: + desc: null + value: None +eval_batch_size: + desc: null + value: 16 +eval_steps: + desc: null + value: 500 +evaluation_strategy: + desc: null + value: steps +finetuning_task: + desc: null + value: null +forced_bos_token_id: + desc: null + value: null +forced_eos_token_id: + desc: null + value: null +fp16: + desc: null + value: true +fp16_backend: + desc: null + value: auto +fp16_full_eval: + desc: null + value: false +fp16_opt_level: + desc: null + value: O1 +generation_max_length: + desc: null + value: 40 +generation_num_beams: + desc: null + value: 1 +gradient_accumulation_steps: + desc: null + value: 8 +gradient_checkpointing: + desc: null + value: true +greater_is_better: + desc: null + value: None +group_by_length: + desc: null + value: true +half_precision_backend: + desc: null + value: amp +hub_model_id: + desc: null + value: None +hub_strategy: + desc: null + value: every_save +hub_token: + desc: null + value: +id2label: + desc: null + value: + '0': LABEL_0 + '1': LABEL_1 +ignore_data_skip: + desc: null + value: false +is_decoder: + desc: null + value: false +is_encoder_decoder: + desc: null + value: true +label2id: + desc: null + value: + LABEL_0: 0 + LABEL_1: 1 +label_names: + desc: null + value: None +label_smoothing_factor: + desc: null + value: 0.0 +learning_rate: + desc: null + value: 0.0003 +length_column_name: + desc: null + value: input_length +length_penalty: + desc: null + value: 1.0 +load_best_model_at_end: + desc: null + value: false +local_rank: + desc: null + value: -1 +log_level: + desc: null + value: -1 +log_level_replica: + desc: null + value: -1 +log_on_each_node: + desc: null + value: true +logging_dir: + desc: null + value: ./runs/Mar03_00-43-05_sanchit--v100 +logging_first_step: + desc: null + value: false +logging_nan_inf_filter: + desc: null + value: true +logging_steps: + desc: null + value: 1 +logging_strategy: + desc: null + value: steps +lr_scheduler_type: + desc: null + value: linear +max_grad_norm: + desc: null + value: 1.0 +max_length: + desc: null + value: 50 +max_steps: + desc: null + value: -1 +metric_for_best_model: + desc: null + value: None +min_length: + desc: null + value: 0 +model_type: + desc: null + value: speech-encoder-decoder +mp_parameters: + desc: null + value: '' +no_cuda: + desc: null + value: false +no_repeat_ngram_size: + desc: null + value: 0 +num_beam_groups: + desc: null + value: 1 +num_beams: + desc: null + value: 1 +num_return_sequences: + desc: null + value: 1 +num_train_epochs: + desc: null + value: 1.0 +optim: + desc: null + value: adamw_hf +output_attentions: + desc: null + value: false +output_dir: + desc: null + value: ./ +output_hidden_states: + desc: null + value: false +output_scores: + desc: null + value: false +overwrite_output_dir: + desc: null + value: true +pad_token_id: + desc: null + value: 50256 +past_index: + desc: null + value: -1 +per_device_eval_batch_size: + desc: null + value: 16 +per_device_train_batch_size: + desc: null + value: 16 +per_gpu_eval_batch_size: + desc: null + value: None +per_gpu_train_batch_size: + desc: null + value: None +predict_with_generate: + desc: null + value: true +prediction_loss_only: + desc: null + value: false +prefix: + desc: null + value: null +problem_type: + desc: null + value: null +processor_class: + desc: null + value: Wav2Vec2Processor +pruned_heads: + desc: null + value: {} +push_to_hub: + desc: null + value: true +push_to_hub_model_id: + desc: null + value: None +push_to_hub_organization: + desc: null + value: None +push_to_hub_token: + desc: null + value: +remove_invalid_values: + desc: null + value: false +remove_unused_columns: + desc: null + value: true +repetition_penalty: + desc: null + value: 1.0 +report_to: + desc: null + value: '[''wandb'']' +resume_from_checkpoint: + desc: null + value: None +return_dict: + desc: null + value: true +return_dict_in_generate: + desc: null + value: false +run_name: + desc: null + value: ./ +save_on_each_node: + desc: null + value: false +save_steps: + desc: null + value: 500 +save_strategy: + desc: null + value: steps +save_total_limit: + desc: null + value: 1 +seed: + desc: null + value: 42 +sep_token_id: + desc: null + value: null +sharded_ddp: + desc: null + value: '[]' +skip_memory_metrics: + desc: null + value: true +sortish_sampler: + desc: null + value: false +task_specific_params: + desc: null + value: null +temperature: + desc: null + value: 1.0 +tf32: + desc: null + value: None +tie_encoder_decoder: + desc: null + value: false +tie_word_embeddings: + desc: null + value: false +tokenizer_class: + desc: null + value: null +top_k: + desc: null + value: 50 +top_p: + desc: null + value: 1.0 +torch_dtype: + desc: null + value: torch.float32 +torchscript: + desc: null + value: false +tpu_metrics_debug: + desc: null + value: false +tpu_num_cores: + desc: null + value: None +train_batch_size: + desc: null + value: 16 +transformers_version: + desc: null + value: null +use_bfloat16: + desc: null + value: false +use_cache: + desc: null + value: false +use_legacy_prediction_loop: + desc: null + value: false +warmup_ratio: + desc: null + value: 0.0 +warmup_steps: + desc: null + value: 500 +weight_decay: + desc: null + value: 0.0 +xpu_backend: + desc: null + value: None diff --git a/wandb/run-20220303_004347-13qlgnoe/files/output.log b/wandb/run-20220303_004347-13qlgnoe/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..1df0a47a87f9d00da226e3c3bbc8348e5358a31c --- /dev/null +++ b/wandb/run-20220303_004347-13qlgnoe/files/output.log @@ -0,0 +1,37 @@ + + + 0%| | 0/223 [00:00> Could not estimate the number of tokens of the input, floating-point operations will not be computed +Traceback (most recent call last): + File "/home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/run_speech_recognition_seq2seq.py", line 539, in + main() + File "/home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/run_speech_recognition_seq2seq.py", line 491, in main + train_result = trainer.train(resume_from_checkpoint=checkpoint) + File "/home/sanchit_huggingface_co/transformers/src/transformers/trainer.py", line 1384, in train + tr_loss_step = self.training_step(model, inputs) + File "/home/sanchit_huggingface_co/transformers/src/transformers/trainer.py", line 1959, in training_step + loss = self.compute_loss(model, inputs) + File "/home/sanchit_huggingface_co/transformers/src/transformers/trainer.py", line 1991, in compute_loss + outputs = model(**inputs) + File "/home/sanchit_huggingface_co/gcp/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl + return forward_call(*input, **kwargs) + File "/home/sanchit_huggingface_co/transformers/src/transformers/models/speech_encoder_decoder/modeling_speech_encoder_decoder.py", line 503, in forward + encoder_outputs = self.encoder( + File "/home/sanchit_huggingface_co/gcp/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl + return forward_call(*input, **kwargs) + File "/home/sanchit_huggingface_co/transformers/src/transformers/models/wav2vec2/modeling_wav2vec2.py", line 1346, in forward + extract_features = self.feature_extractor(input_values) + File "/home/sanchit_huggingface_co/gcp/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl + return forward_call(*input, **kwargs) + File "/home/sanchit_huggingface_co/transformers/src/transformers/models/wav2vec2/modeling_wav2vec2.py", line 514, in forward + hidden_states = conv_layer(hidden_states) + File "/home/sanchit_huggingface_co/gcp/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl + return forward_call(*input, **kwargs) + File "/home/sanchit_huggingface_co/transformers/src/transformers/models/wav2vec2/modeling_wav2vec2.py", line 389, in forward + hidden_states = self.layer_norm(hidden_states) + File "/home/sanchit_huggingface_co/gcp/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl + return forward_call(*input, **kwargs) + File "/home/sanchit_huggingface_co/gcp/lib/python3.9/site-packages/torch/nn/modules/normalization.py", line 189, in forward + return F.layer_norm( + File "/home/sanchit_huggingface_co/gcp/lib/python3.9/site-packages/torch/nn/functional.py", line 2347, in layer_norm + return torch.layer_norm(input, normalized_shape, weight, bias, eps, torch.backends.cudnn.enabled) +RuntimeError: CUDA out of memory. Tried to allocate 1.65 GiB (GPU 0; 15.78 GiB total capacity; 10.12 GiB already allocated; 707.31 MiB free; 13.39 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF \ No newline at end of file diff --git a/wandb/run-20220303_004347-13qlgnoe/files/requirements.txt b/wandb/run-20220303_004347-13qlgnoe/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..3974f97a24952deb24d97850f53367da9e7c347d --- /dev/null +++ b/wandb/run-20220303_004347-13qlgnoe/files/requirements.txt @@ -0,0 +1,184 @@ +absl-py==1.0.0 +aiohttp==3.8.1 +aiosignal==1.2.0 +anyio==3.5.0 +appdirs==1.4.4 +argon2-cffi-bindings==21.2.0 +argon2-cffi==21.3.0 +asttokens==2.0.5 +async-timeout==4.0.2 +attrs==21.4.0 +audioread==2.1.9 +babel==2.9.1 +backcall==0.2.0 +bitsandbytes-cuda113==0.26.0 +black==22.1.0 +bleach==4.1.0 +cachetools==5.0.0 +certifi==2021.10.8 +cffi==1.15.0 +charset-normalizer==2.0.11 +chex==0.1.0 +click==8.0.3 +clldutils==3.10.1 +colorlog==6.6.0 +csvw==1.11.0 +cycler==0.11.0 +datasets==1.18.3 +debugpy==1.5.1 +decorator==5.1.1 +defusedxml==0.7.1 +dill==0.3.4 +dlinfo==1.2.1 +dm-tree==0.1.6 +docker-pycreds==0.4.0 +entrypoints==0.4 +executing==0.8.2 +filelock==3.4.2 +flatbuffers==2.0 +flax==0.4.0 +fonttools==4.29.1 +frozenlist==1.3.0 +fsspec==2022.1.0 +gitdb==4.0.9 +gitpython==3.1.27 +google-auth-oauthlib==0.4.6 +google-auth==2.6.0 +grpcio==1.43.0 +huggingface-hub==0.4.0 +hypothesis==6.36.1 +idna==3.3 +importlib-metadata==4.10.1 +ipykernel==6.8.0 +ipython-genutils==0.2.0 +ipython==8.0.1 +ipywidgets==7.6.5 +isodate==0.6.1 +jax==0.2.28 +jaxlib==0.1.76+cuda11.cudnn82 +jedi==0.18.1 +jinja2==3.0.3 +jiwer==2.3.0 +joblib==1.1.0 +json5==0.9.6 +jsonschema==4.4.0 +jupyter-client==7.1.2 +jupyter-console==6.4.0 +jupyter-core==4.9.1 +jupyter-server==1.13.5 +jupyter==1.0.0 +jupyterlab-pygments==0.1.2 +jupyterlab-server==2.10.3 +jupyterlab-widgets==1.0.2 +jupyterlab==3.2.9 +kiwisolver==1.3.2 +librosa==0.8.1 +llvmlite==0.38.0 +markdown==3.3.6 +markupsafe==2.0.1 +matplotlib-inline==0.1.3 +matplotlib==3.5.1 +mistune==0.8.4 +msgpack==1.0.3 +multidict==6.0.2 +multiprocess==0.70.12.2 +mypy-extensions==0.4.3 +nbclassic==0.3.5 +nbclient==0.5.10 +nbconvert==6.4.1 +nbformat==5.1.3 +nest-asyncio==1.5.4 +notebook==6.4.8 +numba==0.55.1 +numpy==1.21.5 +oauthlib==3.2.0 +opt-einsum==3.3.0 +optax==0.1.0 +packaging==21.3 +pandas==1.4.0 +pandocfilters==1.5.0 +parso==0.8.3 +pathspec==0.9.0 +pathtools==0.1.2 +pexpect==4.8.0 +phonemizer==3.0.1 +pickleshare==0.7.5 +pillow==9.0.0 +pip==22.0.2 +pkg-resources==0.0.0 +platformdirs==2.4.1 +pooch==1.6.0 +prometheus-client==0.13.1 +promise==2.3 +prompt-toolkit==3.0.26 +protobuf==3.19.4 +psutil==5.9.0 +ptyprocess==0.7.0 +pure-eval==0.2.2 +pyarrow==6.0.1 +pyasn1-modules==0.2.8 +pyasn1==0.4.8 +pycparser==2.21 +pyctcdecode==0.3.0 +pygments==2.11.2 +pygtrie==2.4.2 +pyparsing==3.0.7 +pyrsistent==0.18.1 +python-dateutil==2.8.2 +python-levenshtein==0.12.2 +pytz==2021.3 +pyyaml==6.0 +pyzmq==22.3.0 +qtconsole==5.2.2 +qtpy==2.0.1 +regex==2022.1.18 +requests-oauthlib==1.3.1 +requests==2.27.1 +resampy==0.2.2 +rfc3986==2.0.0 +rsa==4.8 +sacremoses==0.0.47 +scikit-learn==1.0.2 +scipy==1.7.3 +segments==2.2.0 +send2trash==1.8.0 +sentry-sdk==1.5.6 +setuptools==44.1.1 +shortuuid==1.0.8 +six==1.16.0 +smmap==5.0.0 +sniffio==1.2.0 +sortedcontainers==2.4.0 +soundfile==0.10.3.post1 +stack-data==0.1.4 +tabulate==0.8.9 +tensorboard-data-server==0.6.1 +tensorboard-plugin-wit==1.8.1 +tensorboard==2.8.0 +termcolor==1.1.0 +terminado==0.13.1 +testpath==0.5.0 +threadpoolctl==3.1.0 +tokenizers==0.11.4 +tomli==2.0.0 +toolz==0.11.2 +torch==1.10.2+cu113 +torchaudio==0.10.2+cu113 +tornado==6.1 +tqdm==4.62.3 +traitlets==5.1.1 +transformers==4.17.0.dev0 +typing-extensions==3.10.0.2 +uritemplate==4.1.1 +urllib3==1.26.8 +wandb==0.12.10 +wcwidth==0.2.5 +webencodings==0.5.1 +websocket-client==1.2.3 +werkzeug==2.0.2 +wheel==0.37.1 +widgetsnbextension==3.5.2 +xxhash==2.0.2 +yarl==1.7.2 +yaspin==2.1.0 +zipp==3.7.0 \ No newline at end of file diff --git a/wandb/run-20220303_004347-13qlgnoe/files/wandb-metadata.json b/wandb/run-20220303_004347-13qlgnoe/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..3932b3032e5d51d3dab3420495315f964867ba94 --- /dev/null +++ b/wandb/run-20220303_004347-13qlgnoe/files/wandb-metadata.json @@ -0,0 +1,60 @@ +{ + "os": "Linux-5.11.0-1028-gcp-x86_64-with-glibc2.33", + "python": "3.9.5", + "heartbeatAt": "2022-03-03T00:43:48.869101", + "startedAt": "2022-03-03T00:43:47.738965", + "docker": null, + "gpu": "Tesla V100-SXM2-16GB", + "gpu_count": 2, + "cpu_count": 16, + "cuda": null, + "args": [ + "--dataset_name=librispeech_asr", + "--model_name_or_path=./", + "--tokenizer_name=./", + "--dataset_config_name=clean", + "--train_split_name=train.100", + "--eval_split_name=validation", + "--output_dir=./", + "--preprocessing_num_workers=1", + "--length_column_name=input_length", + "--overwrite_output_dir", + "--num_train_epochs=1", + "--per_device_train_batch_size=16", + "--per_device_eval_batch_size=16", + "--gradient_accumulation_steps=8", + "--generation_max_length=40", + "--generation_num_beams=1", + "--learning_rate=3e-4", + "--warmup_steps=500", + "--evaluation_strategy=steps", + "--text_column_name=text", + "--save_steps=500", + "--eval_steps=500", + "--logging_steps=1", + "--save_total_limit=1", + "--freeze_feature_encoder", + "--gradient_checkpointing", + "--fp16", + "--group_by_length", + "--predict_with_generate", + "--do_lower_case", + "--do_train", + "--do_eval", + "--report_to=wandb", + "--push_to_hub", + "--use_auth_token" + ], + "state": "running", + "program": "/home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/run_speech_recognition_seq2seq.py", + "codePath": "run_speech_recognition_seq2seq.py", + "git": { + "remote": "https://huggingface.co/sanchit-gandhi/wav2vec2-gpt2-wandb-grid-search", + "commit": "8c7181143c175387040dc1a6ac2ddbc9179b550c" + }, + "email": "sanchit@huggingface.co", + "root": "/home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search", + "host": "sanchit--v100", + "username": "sanchit_huggingface_co", + "executable": "/home/sanchit_huggingface_co/gcp/bin/python" +} diff --git a/wandb/run-20220303_004347-13qlgnoe/files/wandb-summary.json b/wandb/run-20220303_004347-13qlgnoe/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..b1ac4f7d3564b2fd407d247e6957709faa41a169 --- /dev/null +++ b/wandb/run-20220303_004347-13qlgnoe/files/wandb-summary.json @@ -0,0 +1 @@ +{"_wandb": {"runtime": 9}} \ No newline at end of file diff --git a/wandb/run-20220303_004347-13qlgnoe/logs/debug-internal.log b/wandb/run-20220303_004347-13qlgnoe/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..60ef833c70e90fc0275cd3425932823f1092a8e7 --- /dev/null +++ b/wandb/run-20220303_004347-13qlgnoe/logs/debug-internal.log @@ -0,0 +1,140 @@ +2022-03-03 00:43:48,671 INFO MainThread:267559 [internal.py:wandb_internal():89] W&B internal server running at pid: 267559, started at: 2022-03-03 00:43:48.670697 +2022-03-03 00:43:48,673 DEBUG HandlerThread:267559 [handler.py:handle_request():131] handle_request: check_version +2022-03-03 00:43:48,674 INFO WriterThread:267559 [datastore.py:open_for_write():77] open: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004347-13qlgnoe/run-13qlgnoe.wandb +2022-03-03 00:43:48,675 DEBUG SenderThread:267559 [sender.py:send():235] send: header +2022-03-03 00:43:48,675 DEBUG SenderThread:267559 [sender.py:send_request():249] send_request: check_version +2022-03-03 00:43:48,744 DEBUG SenderThread:267559 [sender.py:send():235] send: run +2022-03-03 00:43:48,863 INFO SenderThread:267559 [dir_watcher.py:__init__():169] watching files in: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004347-13qlgnoe/files +2022-03-03 00:43:48,863 INFO SenderThread:267559 [sender.py:_start_run_threads():809] run started: 13qlgnoe with start time 1646268227 +2022-03-03 00:43:48,863 DEBUG SenderThread:267559 [sender.py:send():235] send: summary +2022-03-03 00:43:48,863 INFO SenderThread:267559 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:43:48,864 DEBUG HandlerThread:267559 [handler.py:handle_request():131] handle_request: run_start +2022-03-03 00:43:48,868 DEBUG HandlerThread:267559 [meta.py:__init__():36] meta init +2022-03-03 00:43:48,869 DEBUG HandlerThread:267559 [meta.py:__init__():50] meta init done +2022-03-03 00:43:48,869 DEBUG HandlerThread:267559 [meta.py:probe():210] probe +2022-03-03 00:43:48,875 DEBUG HandlerThread:267559 [meta.py:_setup_git():200] setup git +2022-03-03 00:43:48,891 DEBUG HandlerThread:267559 [meta.py:_setup_git():207] setup git done +2022-03-03 00:43:48,891 DEBUG HandlerThread:267559 [meta.py:_save_pip():54] save pip +2022-03-03 00:43:48,892 DEBUG HandlerThread:267559 [meta.py:_save_pip():68] save pip done +2022-03-03 00:43:48,892 DEBUG HandlerThread:267559 [meta.py:probe():248] probe done +2022-03-03 00:43:48,974 DEBUG SenderThread:267559 [sender.py:send():235] send: files +2022-03-03 00:43:48,974 INFO SenderThread:267559 [sender.py:_save_file():944] saving file wandb-metadata.json with policy now +2022-03-03 00:43:48,979 DEBUG HandlerThread:267559 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:43:48,980 DEBUG SenderThread:267559 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:43:49,032 DEBUG SenderThread:267559 [sender.py:send():235] send: config +2022-03-03 00:43:49,033 DEBUG SenderThread:267559 [sender.py:send():235] send: metric +2022-03-03 00:43:49,033 DEBUG SenderThread:267559 [sender.py:send():235] send: metric +2022-03-03 00:43:49,033 WARNING SenderThread:267559 [sender.py:send_metric():902] Seen metric with glob (shouldnt happen) +2022-03-03 00:43:49,247 INFO Thread-11 :267559 [upload_job.py:push():137] Uploaded file /tmp/tmps9bhl1w0wandb/kc71lad7-wandb-metadata.json +2022-03-03 00:43:49,865 INFO Thread-8 :267559 [dir_watcher.py:_on_file_created():217] file/dir created: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004347-13qlgnoe/files/output.log +2022-03-03 00:43:49,865 INFO Thread-8 :267559 [dir_watcher.py:_on_file_created():217] file/dir created: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004347-13qlgnoe/files/wandb-metadata.json +2022-03-03 00:43:49,865 INFO Thread-8 :267559 [dir_watcher.py:_on_file_created():217] file/dir created: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004347-13qlgnoe/files/wandb-summary.json +2022-03-03 00:43:49,865 INFO Thread-8 :267559 [dir_watcher.py:_on_file_created():217] file/dir created: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004347-13qlgnoe/files/requirements.txt +2022-03-03 00:43:51,864 INFO Thread-8 :267559 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004347-13qlgnoe/files/output.log +2022-03-03 00:43:55,866 INFO Thread-8 :267559 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004347-13qlgnoe/files/output.log +2022-03-03 00:43:57,866 INFO Thread-8 :267559 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004347-13qlgnoe/files/output.log +2022-03-03 00:43:58,509 DEBUG HandlerThread:267559 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-03 00:43:58,509 DEBUG SenderThread:267559 [sender.py:send():235] send: telemetry +2022-03-03 00:43:58,510 DEBUG SenderThread:267559 [sender.py:send():235] send: exit +2022-03-03 00:43:58,510 INFO SenderThread:267559 [sender.py:send_exit():371] handling exit code: 1 +2022-03-03 00:43:58,510 INFO SenderThread:267559 [sender.py:send_exit():373] handling runtime: 9 +2022-03-03 00:43:58,510 INFO SenderThread:267559 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:43:58,510 INFO SenderThread:267559 [sender.py:send_exit():379] send defer +2022-03-03 00:43:58,511 DEBUG SenderThread:267559 [sender.py:send_request():249] send_request: poll_exit +2022-03-03 00:43:58,511 DEBUG HandlerThread:267559 [handler.py:handle_request():131] handle_request: defer +2022-03-03 00:43:58,511 INFO HandlerThread:267559 [handler.py:handle_request_defer():154] handle defer: 0 +2022-03-03 00:43:58,512 DEBUG SenderThread:267559 [sender.py:send_request():249] send_request: defer +2022-03-03 00:43:58,512 INFO SenderThread:267559 [sender.py:send_request_defer():388] handle sender defer: 0 +2022-03-03 00:43:58,512 INFO SenderThread:267559 [sender.py:transition_state():392] send defer: 1 +2022-03-03 00:43:58,512 DEBUG HandlerThread:267559 [handler.py:handle_request():131] handle_request: defer +2022-03-03 00:43:58,512 INFO HandlerThread:267559 [handler.py:handle_request_defer():154] handle defer: 1 +2022-03-03 00:43:58,627 DEBUG SenderThread:267559 [sender.py:send_request():249] send_request: defer +2022-03-03 00:43:58,627 DEBUG HandlerThread:267559 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-03 00:43:58,627 INFO SenderThread:267559 [sender.py:send_request_defer():388] handle sender defer: 1 +2022-03-03 00:43:58,628 INFO SenderThread:267559 [sender.py:transition_state():392] send defer: 2 +2022-03-03 00:43:58,628 DEBUG SenderThread:267559 [sender.py:send_request():249] send_request: poll_exit +2022-03-03 00:43:58,628 DEBUG SenderThread:267559 [sender.py:send():235] send: stats +2022-03-03 00:43:58,629 DEBUG HandlerThread:267559 [handler.py:handle_request():131] handle_request: defer +2022-03-03 00:43:58,629 INFO HandlerThread:267559 [handler.py:handle_request_defer():154] handle defer: 2 +2022-03-03 00:43:58,629 DEBUG SenderThread:267559 [sender.py:send_request():249] send_request: defer +2022-03-03 00:43:58,629 INFO SenderThread:267559 [sender.py:send_request_defer():388] handle sender defer: 2 +2022-03-03 00:43:58,629 INFO SenderThread:267559 [sender.py:transition_state():392] send defer: 3 +2022-03-03 00:43:58,629 DEBUG HandlerThread:267559 [handler.py:handle_request():131] handle_request: defer +2022-03-03 00:43:58,629 INFO HandlerThread:267559 [handler.py:handle_request_defer():154] handle defer: 3 +2022-03-03 00:43:58,630 DEBUG SenderThread:267559 [sender.py:send():235] send: summary +2022-03-03 00:43:58,630 INFO SenderThread:267559 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:43:58,630 DEBUG SenderThread:267559 [sender.py:send_request():249] send_request: defer +2022-03-03 00:43:58,630 INFO SenderThread:267559 [sender.py:send_request_defer():388] handle sender defer: 3 +2022-03-03 00:43:58,630 INFO SenderThread:267559 [sender.py:transition_state():392] send defer: 4 +2022-03-03 00:43:58,631 DEBUG HandlerThread:267559 [handler.py:handle_request():131] handle_request: defer +2022-03-03 00:43:58,631 INFO HandlerThread:267559 [handler.py:handle_request_defer():154] handle defer: 4 +2022-03-03 00:43:58,631 DEBUG SenderThread:267559 [sender.py:send_request():249] send_request: defer +2022-03-03 00:43:58,631 INFO SenderThread:267559 [sender.py:send_request_defer():388] handle sender defer: 4 +2022-03-03 00:43:58,730 DEBUG HandlerThread:267559 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-03 00:43:58,867 INFO Thread-8 :267559 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004347-13qlgnoe/files/wandb-summary.json +2022-03-03 00:43:58,867 INFO Thread-8 :267559 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004347-13qlgnoe/files/output.log +2022-03-03 00:43:58,921 INFO SenderThread:267559 [sender.py:transition_state():392] send defer: 5 +2022-03-03 00:43:58,922 DEBUG SenderThread:267559 [sender.py:send_request():249] send_request: poll_exit +2022-03-03 00:43:58,922 DEBUG HandlerThread:267559 [handler.py:handle_request():131] handle_request: defer +2022-03-03 00:43:58,922 INFO HandlerThread:267559 [handler.py:handle_request_defer():154] handle defer: 5 +2022-03-03 00:43:58,923 DEBUG SenderThread:267559 [sender.py:send_request():249] send_request: defer +2022-03-03 00:43:58,923 INFO SenderThread:267559 [sender.py:send_request_defer():388] handle sender defer: 5 +2022-03-03 00:43:58,923 INFO SenderThread:267559 [dir_watcher.py:finish():283] shutting down directory watcher +2022-03-03 00:43:59,024 DEBUG HandlerThread:267559 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-03 00:43:59,867 INFO Thread-8 :267559 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004347-13qlgnoe/files/config.yaml +2022-03-03 00:43:59,868 INFO SenderThread:267559 [dir_watcher.py:finish():313] scan: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004347-13qlgnoe/files +2022-03-03 00:43:59,868 INFO SenderThread:267559 [dir_watcher.py:finish():327] scan save: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004347-13qlgnoe/files/wandb-metadata.json wandb-metadata.json +2022-03-03 00:43:59,869 INFO SenderThread:267559 [dir_watcher.py:finish():327] scan save: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004347-13qlgnoe/files/output.log output.log +2022-03-03 00:43:59,869 INFO SenderThread:267559 [dir_watcher.py:finish():327] scan save: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004347-13qlgnoe/files/wandb-summary.json wandb-summary.json +2022-03-03 00:43:59,869 INFO SenderThread:267559 [dir_watcher.py:finish():327] scan save: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004347-13qlgnoe/files/requirements.txt requirements.txt +2022-03-03 00:43:59,869 INFO SenderThread:267559 [dir_watcher.py:finish():327] scan save: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004347-13qlgnoe/files/config.yaml config.yaml +2022-03-03 00:43:59,873 INFO SenderThread:267559 [sender.py:transition_state():392] send defer: 6 +2022-03-03 00:43:59,876 DEBUG SenderThread:267559 [sender.py:send_request():249] send_request: poll_exit +2022-03-03 00:43:59,882 DEBUG HandlerThread:267559 [handler.py:handle_request():131] handle_request: defer +2022-03-03 00:43:59,882 INFO HandlerThread:267559 [handler.py:handle_request_defer():154] handle defer: 6 +2022-03-03 00:43:59,884 DEBUG SenderThread:267559 [sender.py:send_request():249] send_request: defer +2022-03-03 00:43:59,884 INFO SenderThread:267559 [sender.py:send_request_defer():388] handle sender defer: 6 +2022-03-03 00:43:59,884 INFO SenderThread:267559 [file_pusher.py:finish():177] shutting down file pusher +2022-03-03 00:43:59,977 DEBUG HandlerThread:267559 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-03 00:43:59,978 DEBUG SenderThread:267559 [sender.py:send_request():249] send_request: poll_exit +2022-03-03 00:44:00,080 DEBUG HandlerThread:267559 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-03 00:44:00,080 DEBUG SenderThread:267559 [sender.py:send_request():249] send_request: poll_exit +2022-03-03 00:44:00,168 INFO Thread-15 :267559 [upload_job.py:push():137] Uploaded file /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004347-13qlgnoe/files/config.yaml +2022-03-03 00:44:00,179 INFO Thread-12 :267559 [upload_job.py:push():137] Uploaded file /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004347-13qlgnoe/files/output.log +2022-03-03 00:44:00,181 INFO Thread-13 :267559 [upload_job.py:push():137] Uploaded file /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004347-13qlgnoe/files/wandb-summary.json +2022-03-03 00:44:00,182 DEBUG HandlerThread:267559 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-03 00:44:00,182 DEBUG SenderThread:267559 [sender.py:send_request():249] send_request: poll_exit +2022-03-03 00:44:00,186 INFO Thread-14 :267559 [upload_job.py:push():137] Uploaded file /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004347-13qlgnoe/files/requirements.txt +2022-03-03 00:44:00,284 DEBUG HandlerThread:267559 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-03 00:44:00,284 DEBUG SenderThread:267559 [sender.py:send_request():249] send_request: poll_exit +2022-03-03 00:44:00,386 DEBUG HandlerThread:267559 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-03 00:44:00,386 DEBUG SenderThread:267559 [sender.py:send_request():249] send_request: poll_exit +2022-03-03 00:44:00,386 INFO Thread-7 :267559 [sender.py:transition_state():392] send defer: 7 +2022-03-03 00:44:00,387 DEBUG HandlerThread:267559 [handler.py:handle_request():131] handle_request: defer +2022-03-03 00:44:00,387 INFO HandlerThread:267559 [handler.py:handle_request_defer():154] handle defer: 7 +2022-03-03 00:44:00,387 DEBUG SenderThread:267559 [sender.py:send_request():249] send_request: defer +2022-03-03 00:44:00,387 INFO SenderThread:267559 [sender.py:send_request_defer():388] handle sender defer: 7 +2022-03-03 00:44:00,423 INFO SenderThread:267559 [sender.py:transition_state():392] send defer: 8 +2022-03-03 00:44:00,423 DEBUG HandlerThread:267559 [handler.py:handle_request():131] handle_request: defer +2022-03-03 00:44:00,423 INFO HandlerThread:267559 [handler.py:handle_request_defer():154] handle defer: 8 +2022-03-03 00:44:00,423 DEBUG SenderThread:267559 [sender.py:send_request():249] send_request: defer +2022-03-03 00:44:00,424 INFO SenderThread:267559 [sender.py:send_request_defer():388] handle sender defer: 8 +2022-03-03 00:44:00,424 INFO SenderThread:267559 [sender.py:transition_state():392] send defer: 9 +2022-03-03 00:44:00,424 DEBUG SenderThread:267559 [sender.py:send():235] send: final +2022-03-03 00:44:00,425 DEBUG SenderThread:267559 [sender.py:send():235] send: footer +2022-03-03 00:44:00,425 DEBUG HandlerThread:267559 [handler.py:handle_request():131] handle_request: defer +2022-03-03 00:44:00,425 INFO HandlerThread:267559 [handler.py:handle_request_defer():154] handle defer: 9 +2022-03-03 00:44:00,425 DEBUG SenderThread:267559 [sender.py:send_request():249] send_request: defer +2022-03-03 00:44:00,425 INFO SenderThread:267559 [sender.py:send_request_defer():388] handle sender defer: 9 +2022-03-03 00:44:00,488 DEBUG HandlerThread:267559 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-03 00:44:00,489 DEBUG SenderThread:267559 [sender.py:send_request():249] send_request: poll_exit +2022-03-03 00:44:00,489 INFO SenderThread:267559 [file_pusher.py:join():182] waiting for file pusher +2022-03-03 00:44:00,551 DEBUG HandlerThread:267559 [handler.py:handle_request():131] handle_request: get_summary +2022-03-03 00:44:00,552 DEBUG HandlerThread:267559 [handler.py:handle_request():131] handle_request: sampled_history +2022-03-03 00:44:00,552 DEBUG HandlerThread:267559 [handler.py:handle_request():131] handle_request: shutdown +2022-03-03 00:44:00,552 INFO HandlerThread:267559 [handler.py:finish():739] shutting down handler +2022-03-03 00:44:01,425 INFO WriterThread:267559 [datastore.py:close():281] close: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004347-13qlgnoe/run-13qlgnoe.wandb +2022-03-03 00:44:01,549 INFO SenderThread:267559 [sender.py:finish():1075] shutting down sender +2022-03-03 00:44:01,550 INFO SenderThread:267559 [file_pusher.py:finish():177] shutting down file pusher +2022-03-03 00:44:01,550 INFO SenderThread:267559 [file_pusher.py:join():182] waiting for file pusher +2022-03-03 00:44:01,552 INFO MainThread:267559 [internal.py:handle_exit():79] Internal process exited diff --git a/wandb/run-20220303_004347-13qlgnoe/logs/debug.log b/wandb/run-20220303_004347-13qlgnoe/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..113668f838f64c6fd6835ab06529937978bd29b0 --- /dev/null +++ b/wandb/run-20220303_004347-13qlgnoe/logs/debug.log @@ -0,0 +1,115 @@ +2022-03-03 00:43:47,740 INFO MainThread:267458 [wandb_setup.py:_flush():75] Loading settings from /home/sanchit_huggingface_co/.config/wandb/settings +2022-03-03 00:43:47,740 INFO MainThread:267458 [wandb_setup.py:_flush():75] Loading settings from /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/settings +2022-03-03 00:43:47,741 INFO MainThread:267458 [wandb_setup.py:_flush():75] Loading settings from environment variables: {} +2022-03-03 00:43:47,741 INFO MainThread:267458 [wandb_setup.py:_flush():75] Inferring run settings from compute environment: {'program_relpath': 'run_speech_recognition_seq2seq.py', 'program': '/home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/run_speech_recognition_seq2seq.py'} +2022-03-03 00:43:47,741 INFO MainThread:267458 [wandb_init.py:_log_setup():386] Logging user logs to /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004347-13qlgnoe/logs/debug.log +2022-03-03 00:43:47,741 INFO MainThread:267458 [wandb_init.py:_log_setup():387] Logging internal logs to /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004347-13qlgnoe/logs/debug-internal.log +2022-03-03 00:43:47,741 INFO MainThread:267458 [wandb_init.py:init():420] calling init triggers +2022-03-03 00:43:47,741 INFO MainThread:267458 [wandb_init.py:init():425] wandb.init called with sweep_config: {} +config: {} +2022-03-03 00:43:47,741 INFO MainThread:267458 [wandb_init.py:init():471] starting backend +2022-03-03 00:43:47,741 INFO MainThread:267458 [backend.py:_multiprocessing_setup():99] multiprocessing start_methods=fork,spawn,forkserver, using: spawn +2022-03-03 00:43:47,797 INFO MainThread:267458 [backend.py:ensure_launched():219] starting backend process... +2022-03-03 00:43:47,849 INFO MainThread:267458 [backend.py:ensure_launched():224] started backend process with pid: 267559 +2022-03-03 00:43:47,851 INFO MainThread:267458 [wandb_init.py:init():480] backend started and connected +2022-03-03 00:43:47,862 INFO MainThread:267458 [wandb_init.py:init():550] updated telemetry +2022-03-03 00:43:47,992 INFO MainThread:267458 [wandb_init.py:init():581] communicating current version +2022-03-03 00:43:48,742 INFO MainThread:267458 [wandb_init.py:init():586] got version response upgrade_message: "wandb version 0.12.11 is available! To upgrade, please run:\n $ pip install wandb --upgrade" + +2022-03-03 00:43:48,742 INFO MainThread:267458 [wandb_init.py:init():596] communicating run to backend with 30 second timeout +2022-03-03 00:43:48,863 INFO MainThread:267458 [wandb_init.py:init():624] starting run threads in backend +2022-03-03 00:43:48,979 INFO MainThread:267458 [wandb_run.py:_console_start():1827] atexit reg +2022-03-03 00:43:48,979 INFO MainThread:267458 [wandb_run.py:_redirect():1701] redirect: SettingsConsole.REDIRECT +2022-03-03 00:43:48,980 INFO MainThread:267458 [wandb_run.py:_redirect():1706] Redirecting console. +2022-03-03 00:43:48,981 INFO MainThread:267458 [wandb_run.py:_redirect():1762] Redirects installed. +2022-03-03 00:43:48,981 INFO MainThread:267458 [wandb_init.py:init():651] run started, returning control to user process +2022-03-03 00:43:48,986 INFO MainThread:267458 [wandb_run.py:_config_callback():966] config_cb None None {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'torch.float32', 'use_bfloat16': False, 'pruned_heads': {}, 'tie_word_embeddings': False, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 50, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'chunk_size_feed_forward': 0, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'architectures': ['SpeechEncoderDecoderModel'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': None, 'pad_token_id': 50256, 'eos_token_id': 50256, 'sep_token_id': None, 'decoder_start_token_id': 50256, 'task_specific_params': None, 'problem_type': None, '_name_or_path': './', 'transformers_version': None, 'decoder': {'vocab_size': 50257, 'n_positions': 1024, 'n_embd': 1024, 'n_layer': 24, 'n_head': 16, 'n_inner': None, 'activation_function': 'gelu_new', 'resid_pdrop': 0.0, 'embd_pdrop': 0.0, 'attn_pdrop': 0.0, 'layer_norm_epsilon': 1e-05, 'initializer_range': 0.02, 'summary_type': 'cls_index', 'summary_use_proj': True, 'summary_activation': None, 'summary_first_dropout': 0.0, 'summary_proj_to_labels': True, 'scale_attn_weights': True, 'use_cache': False, 'scale_attn_by_inverse_layer_idx': False, 'reorder_and_upcast_attn': False, 'bos_token_id': 50256, 'eos_token_id': 50256, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': None, 'use_bfloat16': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'is_encoder_decoder': False, 'is_decoder': True, 'cross_attention_hidden_size': None, 'add_cross_attention': True, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'chunk_size_feed_forward': 0, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'architectures': ['GPT2LMHeadModel'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'pad_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': {'text-generation': {'do_sample': True, 'max_length': 50}}, 'problem_type': None, '_name_or_path': 'gpt2-medium', 'transformers_version': '4.17.0.dev0', 'n_ctx': 1024, 'n_special': 0, 'predict_special_tokens': True, 'model_type': 'gpt2'}, 'encoder': {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': None, 'use_bfloat16': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'chunk_size_feed_forward': 0, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'architectures': ['Wav2Vec2ForPreTraining'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 1, 'pad_token_id': 0, 'eos_token_id': 2, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'facebook/wav2vec2-large-lv60', 'transformers_version': '4.17.0.dev0', 'feat_extract_dropout': 0.0, 'gradient_checkpointing': False, 'hidden_dropout_prob': 0.0, 'num_feat_extract_layers': 7, 'hidden_size': 1024, 'feat_extract_norm': 'layer', 'feat_extract_activation': 'gelu', 'conv_dim': [512, 512, 512, 512, 512, 512, 512], 'conv_stride': [5, 2, 2, 2, 2, 2, 2], 'conv_kernel': [10, 3, 3, 3, 3, 2, 2], 'conv_bias': True, 'num_conv_pos_embeddings': 128, 'num_conv_pos_embedding_groups': 16, 'num_hidden_layers': 24, 'intermediate_size': 4096, 'hidden_act': 'gelu', 'num_attention_heads': 16, 'hidden_dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.0, 'feat_proj_dropout': 0.0, 'final_dropout': 0.0, 'layerdrop': 0.0, 'layer_norm_eps': 1e-05, 'initializer_range': 0.02, 'vocab_size': 32, 'do_stable_layer_norm': True, 'use_weighted_layer_sum': False, 'apply_spec_augment': False, 'mask_time_prob': 0.0, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'num_codevectors_per_group': 320, 'num_codevector_groups': 2, 'contrastive_logits_temperature': 0.1, 'feat_quantizer_dropout': 0.0, 'num_negatives': 100, 'codevector_dim': 768, 'proj_codevector_dim': 768, 'diversity_loss_weight': 0.1, 'ctc_loss_reduction': 'sum', 'ctc_zero_infinity': False, 'add_adapter': True, 'adapter_kernel_size': 3, 'adapter_stride': 2, 'num_adapter_layers': 3, 'output_hidden_size': 1024, 'classifier_proj_size': 256, 'tdnn_dim': [512, 512, 512, 512, 1500], 'tdnn_kernel': [5, 3, 3, 1, 1], 'tdnn_dilation': [1, 2, 3, 1, 1], 'xvector_output_dim': 512, 'model_type': 'wav2vec2'}, 'model_type': 'speech-encoder-decoder', 'processor_class': 'Wav2Vec2Processor', 'use_cache': False, 'output_dir': './', 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'evaluation_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 16, 'per_device_eval_batch_size': 16, 'per_gpu_train_batch_size': 'None', 'per_gpu_eval_batch_size': 'None', 'gradient_accumulation_steps': 8, 'eval_accumulation_steps': 'None', 'learning_rate': 0.0003, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 1.0, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'warmup_ratio': 0.0, 'warmup_steps': 500, 'log_level': -1, 'log_level_replica': -1, 'log_on_each_node': True, 'logging_dir': './runs/Mar03_00-43-05_sanchit--v100', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 1, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 500, 'save_total_limit': 1, 'save_on_each_node': False, 'no_cuda': False, 'seed': 42, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'amp', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': 'None', 'local_rank': -1, 'xpu_backend': 'None', 'tpu_num_cores': 'None', 'tpu_metrics_debug': False, 'debug': '[]', 'dataloader_drop_last': False, 'eval_steps': 500, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': 'None', 'load_best_model_at_end': False, 'metric_for_best_model': 'None', 'greater_is_better': 'None', 'ignore_data_skip': False, 'sharded_ddp': '[]', 'deepspeed': 'None', 'label_smoothing_factor': 0.0, 'optim': 'adamw_hf', 'adafactor': False, 'group_by_length': True, 'length_column_name': 'input_length', 'report_to': "['wandb']", 'ddp_find_unused_parameters': 'None', 'ddp_bucket_cap_mb': 'None', 'dataloader_pin_memory': True, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': 'None', 'hub_model_id': 'None', 'hub_strategy': 'every_save', 'hub_token': '', 'gradient_checkpointing': True, 'fp16_backend': 'auto', 'push_to_hub_model_id': 'None', 'push_to_hub_organization': 'None', 'push_to_hub_token': '', '_n_gpu': 1, 'mp_parameters': '', 'sortish_sampler': False, 'predict_with_generate': True, 'generation_max_length': 40, 'generation_num_beams': 1, 'train_batch_size': 16, 'eval_batch_size': 16} +2022-03-03 00:43:48,990 INFO MainThread:267458 [wandb_watch.py:watch():43] Watching +2022-03-03 00:43:56,069 INFO MainThread:267458 [wandb_run.py:_atexit_cleanup():1797] got exitcode: 1 +2022-03-03 00:43:56,073 INFO MainThread:267458 [wandb_run.py:_restore():1769] restore +2022-03-03 00:43:58,511 INFO MainThread:267458 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 1 +} +pusher_stats { + uploaded_bytes: 2095 + total_bytes: 2095 +} + +2022-03-03 00:43:58,629 INFO MainThread:267458 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 1 +} +pusher_stats { + uploaded_bytes: 2095 + total_bytes: 2095 +} + +2022-03-03 00:43:58,922 INFO MainThread:267458 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 1 +} +pusher_stats { + uploaded_bytes: 2095 + total_bytes: 2095 +} + +2022-03-03 00:43:59,876 INFO MainThread:267458 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 4 +} +pusher_stats { + uploaded_bytes: 2095 + total_bytes: 8718 +} + +2022-03-03 00:43:59,978 INFO MainThread:267458 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 21066 + total_bytes: 21066 +} + +2022-03-03 00:44:00,081 INFO MainThread:267458 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 21066 + total_bytes: 21066 +} + +2022-03-03 00:44:00,182 INFO MainThread:267458 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 21066 + total_bytes: 21066 +} + +2022-03-03 00:44:00,284 INFO MainThread:267458 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 21066 + total_bytes: 21066 +} + +2022-03-03 00:44:00,387 INFO MainThread:267458 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 21066 + total_bytes: 21066 +} + +2022-03-03 00:44:00,550 INFO MainThread:267458 [wandb_run.py:_wait_for_finish():1929] got exit ret: done: true +exit_result { +} +file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 21066 + total_bytes: 21066 +} +local_info { +} + +2022-03-03 00:44:01,706 INFO MainThread:267458 [wandb_run.py:_append_files():2194] logging synced files diff --git a/wandb/run-20220303_004347-13qlgnoe/run-13qlgnoe.wandb b/wandb/run-20220303_004347-13qlgnoe/run-13qlgnoe.wandb new file mode 100644 index 0000000000000000000000000000000000000000..001ed6312af5dca5f36cacb7c9cd0a26d7a5c5c5 Binary files /dev/null and b/wandb/run-20220303_004347-13qlgnoe/run-13qlgnoe.wandb differ diff --git a/wandb/run-20220303_004520-25bnjrx1/files/config.yaml b/wandb/run-20220303_004520-25bnjrx1/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..91053a0cbae77f8a150dae1d394b3e2fbf2b098b --- /dev/null +++ b/wandb/run-20220303_004520-25bnjrx1/files/config.yaml @@ -0,0 +1,11335 @@ +wandb_version: 1 + +_n_gpu: + desc: null + value: 1 +_name_or_path: + desc: null + value: ./ +_wandb: + desc: null + value: + cli_version: 0.12.10 + framework: huggingface + huggingface_version: 4.17.0.dev0 + is_jupyter_run: false + is_kaggle_kernel: false + m: + - 1: train/global_step + 6: + - 3 + - 1: train/loss + 5: 1 + 6: + - 1 + - 1: train/learning_rate + 5: 1 + 6: + - 1 + - 1: train/epoch + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.ln_f\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.ln_f\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.ln_f\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.ln_f\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.ln_f\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.ln_f\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.wpe\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.wpe\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.wpe\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.wte\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.wte\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.wte\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.2\.conv\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.2\.conv\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.2\.conv\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.2\.conv\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.2\.conv\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.2\.conv\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.1\.conv\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.1\.conv\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.1\.conv\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.1\.conv\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.1\.conv\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.1\.conv\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.0\.conv\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.0\.conv\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.0\.conv\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.0\.conv\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.0\.conv\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.0\.conv\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.pos_conv_embed\.conv\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.pos_conv_embed\.conv\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.pos_conv_embed\.conv\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.pos_conv_embed\.conv\.weight_v._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.pos_conv_embed\.conv\.weight_v.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.pos_conv_embed\.conv\.weight_v.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.pos_conv_embed\.conv\.weight_g._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.pos_conv_embed\.conv\.weight_g.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.pos_conv_embed\.conv\.weight_g.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.feature_projection\.projection\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.feature_projection\.projection\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.feature_projection\.projection\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.feature_projection\.projection\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.feature_projection\.projection\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.feature_projection\.projection\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.feature_projection\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.feature_projection\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.feature_projection\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.feature_projection\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.feature_projection\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.feature_projection\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: eval/loss + 5: 1 + 6: + - 1 + - 1: eval/wer + 5: 1 + 6: + - 1 + - 1: eval/runtime + 5: 1 + 6: + - 1 + - 1: eval/samples_per_second + 5: 1 + 6: + - 1 + - 1: eval/steps_per_second + 5: 1 + 6: + - 1 + python_version: 3.9.5 + start_time: 1646268320 + t: + 1: + - 1 + - 5 + - 11 + 3: + - 13 + 4: 3.9.5 + 5: 0.12.10 + 6: 4.17.0.dev0 + 8: + - 5 +adafactor: + desc: null + value: false +adam_beta1: + desc: null + value: 0.9 +adam_beta2: + desc: null + value: 0.999 +adam_epsilon: + desc: null + value: 1.0e-08 +add_cross_attention: + desc: null + value: false +architectures: + desc: null + value: + - SpeechEncoderDecoderModel +bad_words_ids: + desc: null + value: null +bf16: + desc: null + value: false +bf16_full_eval: + desc: null + value: false +bos_token_id: + desc: null + value: null +chunk_size_feed_forward: + desc: null + value: 0 +cross_attention_hidden_size: + desc: null + value: null +dataloader_drop_last: + desc: null + value: false +dataloader_num_workers: + desc: null + value: 0 +dataloader_pin_memory: + desc: null + value: true +ddp_bucket_cap_mb: + desc: null + value: None +ddp_find_unused_parameters: + desc: null + value: None +debug: + desc: null + value: '[]' +decoder: + desc: null + value: + _name_or_path: gpt2-medium + activation_function: gelu_new + add_cross_attention: true + architectures: + - GPT2LMHeadModel + attn_pdrop: 0.0 + bad_words_ids: null + bos_token_id: 50256 + chunk_size_feed_forward: 0 + cross_attention_hidden_size: null + decoder_start_token_id: null + diversity_penalty: 0.0 + do_sample: false + early_stopping: false + embd_pdrop: 0.0 + encoder_no_repeat_ngram_size: 0 + eos_token_id: 50256 + finetuning_task: null + forced_bos_token_id: null + forced_eos_token_id: null + id2label: + '0': LABEL_0 + '1': LABEL_1 + initializer_range: 0.02 + is_decoder: true + is_encoder_decoder: false + label2id: + LABEL_0: 0 + LABEL_1: 1 + layer_norm_epsilon: 1.0e-05 + length_penalty: 1.0 + max_length: 20 + min_length: 0 + model_type: gpt2 + n_ctx: 1024 + n_embd: 1024 + n_head: 16 + n_inner: null + n_layer: 24 + n_positions: 1024 + n_special: 0 + no_repeat_ngram_size: 0 + num_beam_groups: 1 + num_beams: 1 + num_return_sequences: 1 + output_attentions: false + output_hidden_states: false + output_scores: false + pad_token_id: null + predict_special_tokens: true + prefix: null + problem_type: null + pruned_heads: {} + remove_invalid_values: false + reorder_and_upcast_attn: false + repetition_penalty: 1.0 + resid_pdrop: 0.0 + return_dict: true + return_dict_in_generate: false + scale_attn_by_inverse_layer_idx: false + scale_attn_weights: true + sep_token_id: null + summary_activation: null + summary_first_dropout: 0.0 + summary_proj_to_labels: true + summary_type: cls_index + summary_use_proj: true + task_specific_params: + text-generation: + do_sample: true + max_length: 50 + temperature: 1.0 + tie_encoder_decoder: false + tie_word_embeddings: true + tokenizer_class: null + top_k: 50 + top_p: 1.0 + torch_dtype: null + torchscript: false + transformers_version: 4.17.0.dev0 + use_bfloat16: false + use_cache: false + vocab_size: 50257 +decoder_start_token_id: + desc: null + value: 50256 +deepspeed: + desc: null + value: None +disable_tqdm: + desc: null + value: false +diversity_penalty: + desc: null + value: 0.0 +do_eval: + desc: null + value: true +do_predict: + desc: null + value: false +do_sample: + desc: null + value: false +do_train: + desc: null + value: true +early_stopping: + desc: null + value: false +encoder: + desc: null + value: + _name_or_path: facebook/wav2vec2-large-lv60 + activation_dropout: 0.0 + adapter_kernel_size: 3 + adapter_stride: 2 + add_adapter: true + add_cross_attention: false + apply_spec_augment: false + architectures: + - Wav2Vec2ForPreTraining + attention_dropout: 0.0 + bad_words_ids: null + bos_token_id: 1 + chunk_size_feed_forward: 0 + classifier_proj_size: 256 + codevector_dim: 768 + contrastive_logits_temperature: 0.1 + conv_bias: true + conv_dim: + - 512 + - 512 + - 512 + - 512 + - 512 + - 512 + - 512 + conv_kernel: + - 10 + - 3 + - 3 + - 3 + - 3 + - 2 + - 2 + conv_stride: + - 5 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + cross_attention_hidden_size: null + ctc_loss_reduction: sum + ctc_zero_infinity: false + decoder_start_token_id: null + diversity_loss_weight: 0.1 + diversity_penalty: 0.0 + do_sample: false + do_stable_layer_norm: true + early_stopping: false + encoder_no_repeat_ngram_size: 0 + eos_token_id: 2 + feat_extract_activation: gelu + feat_extract_dropout: 0.0 + feat_extract_norm: layer + feat_proj_dropout: 0.0 + feat_quantizer_dropout: 0.0 + final_dropout: 0.0 + finetuning_task: null + forced_bos_token_id: null + forced_eos_token_id: null + gradient_checkpointing: false + hidden_act: gelu + hidden_dropout: 0.0 + hidden_dropout_prob: 0.0 + hidden_size: 1024 + id2label: + '0': LABEL_0 + '1': LABEL_1 + initializer_range: 0.02 + intermediate_size: 4096 + is_decoder: false + is_encoder_decoder: false + label2id: + LABEL_0: 0 + LABEL_1: 1 + layer_norm_eps: 1.0e-05 + layerdrop: 0.0 + length_penalty: 1.0 + mask_feature_length: 10 + mask_feature_min_masks: 0 + mask_feature_prob: 0.0 + mask_time_length: 10 + mask_time_min_masks: 2 + mask_time_prob: 0.0 + max_length: 20 + min_length: 0 + model_type: wav2vec2 + no_repeat_ngram_size: 0 + num_adapter_layers: 3 + num_attention_heads: 16 + num_beam_groups: 1 + num_beams: 1 + num_codevector_groups: 2 + num_codevectors_per_group: 320 + num_conv_pos_embedding_groups: 16 + num_conv_pos_embeddings: 128 + num_feat_extract_layers: 7 + num_hidden_layers: 24 + num_negatives: 100 + num_return_sequences: 1 + output_attentions: false + output_hidden_size: 1024 + output_hidden_states: false + output_scores: false + pad_token_id: 0 + prefix: null + problem_type: null + proj_codevector_dim: 768 + pruned_heads: {} + remove_invalid_values: false + repetition_penalty: 1.0 + return_dict: true + return_dict_in_generate: false + sep_token_id: null + task_specific_params: null + tdnn_dilation: + - 1 + - 2 + - 3 + - 1 + - 1 + tdnn_dim: + - 512 + - 512 + - 512 + - 512 + - 1500 + tdnn_kernel: + - 5 + - 3 + - 3 + - 1 + - 1 + temperature: 1.0 + tie_encoder_decoder: false + tie_word_embeddings: true + tokenizer_class: null + top_k: 50 + top_p: 1.0 + torch_dtype: null + torchscript: false + transformers_version: 4.17.0.dev0 + use_bfloat16: false + use_weighted_layer_sum: false + vocab_size: 32 + xvector_output_dim: 512 +encoder_no_repeat_ngram_size: + desc: null + value: 0 +eos_token_id: + desc: null + value: 50256 +eval_accumulation_steps: + desc: null + value: None +eval_batch_size: + desc: null + value: 8 +eval_steps: + desc: null + value: 500 +evaluation_strategy: + desc: null + value: steps +finetuning_task: + desc: null + value: null +forced_bos_token_id: + desc: null + value: null +forced_eos_token_id: + desc: null + value: null +fp16: + desc: null + value: true +fp16_backend: + desc: null + value: auto +fp16_full_eval: + desc: null + value: false +fp16_opt_level: + desc: null + value: O1 +generation_max_length: + desc: null + value: 40 +generation_num_beams: + desc: null + value: 1 +gradient_accumulation_steps: + desc: null + value: 2 +gradient_checkpointing: + desc: null + value: true +greater_is_better: + desc: null + value: None +group_by_length: + desc: null + value: true +half_precision_backend: + desc: null + value: amp +hub_model_id: + desc: null + value: None +hub_strategy: + desc: null + value: every_save +hub_token: + desc: null + value: +id2label: + desc: null + value: + '0': LABEL_0 + '1': LABEL_1 +ignore_data_skip: + desc: null + value: false +is_decoder: + desc: null + value: false +is_encoder_decoder: + desc: null + value: true +label2id: + desc: null + value: + LABEL_0: 0 + LABEL_1: 1 +label_names: + desc: null + value: None +label_smoothing_factor: + desc: null + value: 0.0 +learning_rate: + desc: null + value: 0.001 +length_column_name: + desc: null + value: input_length +length_penalty: + desc: null + value: 1.0 +load_best_model_at_end: + desc: null + value: false +local_rank: + desc: null + value: -1 +log_level: + desc: null + value: -1 +log_level_replica: + desc: null + value: -1 +log_on_each_node: + desc: null + value: true +logging_dir: + desc: null + value: ./runs/Mar03_00-44-38_sanchit--v100 +logging_first_step: + desc: null + value: false +logging_nan_inf_filter: + desc: null + value: true +logging_steps: + desc: null + value: 1 +logging_strategy: + desc: null + value: steps +lr_scheduler_type: + desc: null + value: linear +max_grad_norm: + desc: null + value: 1.0 +max_length: + desc: null + value: 50 +max_steps: + desc: null + value: -1 +metric_for_best_model: + desc: null + value: None +min_length: + desc: null + value: 0 +model_type: + desc: null + value: speech-encoder-decoder +mp_parameters: + desc: null + value: '' +no_cuda: + desc: null + value: false +no_repeat_ngram_size: + desc: null + value: 0 +num_beam_groups: + desc: null + value: 1 +num_beams: + desc: null + value: 1 +num_return_sequences: + desc: null + value: 1 +num_train_epochs: + desc: null + value: 1.0 +optim: + desc: null + value: adamw_hf +output_attentions: + desc: null + value: false +output_dir: + desc: null + value: ./ +output_hidden_states: + desc: null + value: false +output_scores: + desc: null + value: false +overwrite_output_dir: + desc: null + value: true +pad_token_id: + desc: null + value: 50256 +past_index: + desc: null + value: -1 +per_device_eval_batch_size: + desc: null + value: 8 +per_device_train_batch_size: + desc: null + value: 8 +per_gpu_eval_batch_size: + desc: null + value: None +per_gpu_train_batch_size: + desc: null + value: None +predict_with_generate: + desc: null + value: true +prediction_loss_only: + desc: null + value: false +prefix: + desc: null + value: null +problem_type: + desc: null + value: null +processor_class: + desc: null + value: Wav2Vec2Processor +pruned_heads: + desc: null + value: {} +push_to_hub: + desc: null + value: true +push_to_hub_model_id: + desc: null + value: None +push_to_hub_organization: + desc: null + value: None +push_to_hub_token: + desc: null + value: +remove_invalid_values: + desc: null + value: false +remove_unused_columns: + desc: null + value: true +repetition_penalty: + desc: null + value: 1.0 +report_to: + desc: null + value: '[''wandb'']' +resume_from_checkpoint: + desc: null + value: None +return_dict: + desc: null + value: true +return_dict_in_generate: + desc: null + value: false +run_name: + desc: null + value: ./ +save_on_each_node: + desc: null + value: false +save_steps: + desc: null + value: 500 +save_strategy: + desc: null + value: steps +save_total_limit: + desc: null + value: 1 +seed: + desc: null + value: 42 +sep_token_id: + desc: null + value: null +sharded_ddp: + desc: null + value: '[]' +skip_memory_metrics: + desc: null + value: true +sortish_sampler: + desc: null + value: false +task_specific_params: + desc: null + value: null +temperature: + desc: null + value: 1.0 +tf32: + desc: null + value: None +tie_encoder_decoder: + desc: null + value: false +tie_word_embeddings: + desc: null + value: false +tokenizer_class: + desc: null + value: null +top_k: + desc: null + value: 50 +top_p: + desc: null + value: 1.0 +torch_dtype: + desc: null + value: torch.float32 +torchscript: + desc: null + value: false +tpu_metrics_debug: + desc: null + value: false +tpu_num_cores: + desc: null + value: None +train_batch_size: + desc: null + value: 8 +transformers_version: + desc: null + value: null +use_bfloat16: + desc: null + value: false +use_cache: + desc: null + value: false +use_legacy_prediction_loop: + desc: null + value: false +warmup_ratio: + desc: null + value: 0.0 +warmup_steps: + desc: null + value: 500 +weight_decay: + desc: null + value: 0.0 +xpu_backend: + desc: null + value: None diff --git a/wandb/run-20220303_004520-25bnjrx1/files/output.log b/wandb/run-20220303_004520-25bnjrx1/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..21b162e3ef165ee505b0b3106816260cf0567666 --- /dev/null +++ b/wandb/run-20220303_004520-25bnjrx1/files/output.log @@ -0,0 +1,1629 @@ + + + 0%| | 0/1784 [00:00> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.9004, 'learning_rate': 0.0, 'epoch': 0.0} +[WARNING|modeling_utils.py:388] 2022-03-03 00:45:25,905 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 0%| | 1/1784 [00:04<2:06:52, 4.27s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:45:27,846 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.028, 'learning_rate': 0.0, 'epoch': 0.0} +[WARNING|modeling_utils.py:388] 2022-03-03 00:45:29,734 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 0%| | 2/1784 [00:08<1:59:06, 4.01s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:45:31,640 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:45:33,485 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.1215, 'learning_rate': 2e-06, 'epoch': 0.0} + 0%|▏ | 3/1784 [00:12<1:58:52, 4.00s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:45:35,597 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6759, 'learning_rate': 4e-06, 'epoch': 0.0} +[WARNING|modeling_utils.py:388] 2022-03-03 00:45:37,399 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 0%|▏ | 4/1784 [00:15<1:55:28, 3.89s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:45:39,346 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.7525, 'learning_rate': 6e-06, 'epoch': 0.0} +[WARNING|modeling_utils.py:388] 2022-03-03 00:45:41,156 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 0%|▏ | 5/1784 [00:19<1:53:57, 3.84s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:45:43,076 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.7486, 'learning_rate': 8e-06, 'epoch': 0.0} +[WARNING|modeling_utils.py:388] 2022-03-03 00:45:44,869 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 0%|▎ | 6/1784 [00:23<1:52:35, 3.80s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:45:46,761 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5801, 'learning_rate': 1e-05, 'epoch': 0.0} +[WARNING|modeling_utils.py:388] 2022-03-03 00:45:48,914 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 0%|▎ | 7/1784 [00:27<1:55:33, 3.90s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:45:51,556 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6594, 'learning_rate': 1.2e-05, 'epoch': 0.0} +[WARNING|modeling_utils.py:388] 2022-03-03 00:45:53,353 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 0%|▎ | 8/1784 [00:31<1:59:55, 4.05s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:45:55,231 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.831, 'learning_rate': 1.4e-05, 'epoch': 0.01} +[WARNING|modeling_utils.py:388] 2022-03-03 00:45:56,985 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 1%|▍ | 9/1784 [00:35<1:55:58, 3.92s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:45:58,828 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:46:00,568 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▍ | 10/1784 [00:38<1:52:50, 3.82s/it] + + 1%|▍ | 10/1784 [00:38<1:52:50, 3.82s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:46:02,425 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.513, 'learning_rate': 1.8e-05, 'epoch': 0.01} +[WARNING|modeling_utils.py:388] 2022-03-03 00:46:04,150 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▍ | 11/1784 [00:42<1:50:38, 3.74s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:46:05,979 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:46:07,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▌ | 12/1784 [00:46<1:48:30, 3.67s/it] + + 1%|▌ | 12/1784 [00:46<1:48:30, 3.67s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:46:09,484 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:46:11,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▌ | 13/1784 [00:49<1:47:08, 3.63s/it] + + 1%|▌ | 13/1784 [00:49<1:47:08, 3.63s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:46:13,030 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2523, 'learning_rate': 2.4e-05, 'epoch': 0.01} +[WARNING|modeling_utils.py:388] 2022-03-03 00:46:14,664 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▌ | 14/1784 [00:53<1:45:40, 3.58s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:46:16,478 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:46:18,133 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▋ | 15/1784 [00:56<1:44:37, 3.55s/it] + + 1%|▋ | 15/1784 [00:56<1:44:37, 3.55s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:46:19,892 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:46:21,568 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▋ | 16/1784 [00:59<1:43:33, 3.51s/it] + + 1%|▋ | 16/1784 [00:59<1:43:33, 3.51s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:46:23,367 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4427, 'learning_rate': 3e-05, 'epoch': 0.01} +[WARNING|modeling_utils.py:388] 2022-03-03 00:46:25,019 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▊ | 17/1784 [01:03<1:42:56, 3.50s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:46:26,753 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:46:28,393 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▊ | 18/1784 [01:06<1:41:48, 3.46s/it] + + 1%|▊ | 18/1784 [01:06<1:41:48, 3.46s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:46:30,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:46:31,775 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▊ | 19/1784 [01:10<1:41:04, 3.44s/it] + 1%|▊ | 19/1784 [01:10<1:41:04, 3.44s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:46:33,480 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:46:35,125 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▉ | 20/1784 [01:13<1:40:16, 3.41s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:46:36,870 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6023, 'learning_rate': 3.6e-05, 'epoch': 0.01} +[WARNING|modeling_utils.py:388] 2022-03-03 00:46:38,474 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▉ | 21/1784 [01:16<1:39:38, 3.39s/it] + + 1%|▉ | 21/1784 [01:16<1:39:38, 3.39s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:46:40,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:46:41,865 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▉ | 22/1784 [01:20<1:39:35, 3.39s/it] + + 1%|▉ | 22/1784 [01:20<1:39:35, 3.39s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:46:43,606 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4214, 'learning_rate': 4.2000000000000004e-05, 'epoch': 0.01} +[WARNING|modeling_utils.py:388] 2022-03-03 00:46:45,216 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|█ | 23/1784 [01:23<1:39:09, 3.38s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:46:46,917 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:46:48,494 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|█ | 24/1784 [01:26<1:38:13, 3.35s/it] + + 1%|█ | 24/1784 [01:26<1:38:13, 3.35s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:46:50,185 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:46:51,737 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|█ | 25/1784 [01:30<1:37:15, 3.32s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:46:53,383 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3643, 'learning_rate': 4.6e-05, 'epoch': 0.01} +[WARNING|modeling_utils.py:388] 2022-03-03 00:46:54,946 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|█▏ | 26/1784 [01:33<1:36:15, 3.28s/it] + + 1%|█▏ | 26/1784 [01:33<1:36:15, 3.28s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:46:56,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:46:58,173 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 2%|█▏ | 27/1784 [01:36<1:35:41, 3.27s/it] + + 2%|█▏ | 27/1784 [01:36<1:35:41, 3.27s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:46:59,863 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.032, 'learning_rate': 5.2e-05, 'epoch': 0.02} +[WARNING|modeling_utils.py:388] 2022-03-03 00:47:01,455 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 2%|█▏ | 28/1784 [01:39<1:35:45, 3.27s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:47:03,121 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.341, 'learning_rate': 5.4e-05, 'epoch': 0.02} +[WARNING|modeling_utils.py:388] 2022-03-03 00:47:04,725 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 2%|█▎ | 29/1784 [01:43<1:35:40, 3.27s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:47:06,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:47:07,916 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 2%|█▎ | 30/1784 [01:46<1:34:55, 3.25s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:47:09,562 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2808, 'learning_rate': 5.6e-05, 'epoch': 0.02} +[WARNING|modeling_utils.py:388] 2022-03-03 00:47:11,085 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 2%|█▎ | 31/1784 [01:49<1:34:11, 3.22s/it] + + 2%|█▎ | 31/1784 [01:49<1:34:11, 3.22s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:47:12,715 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:47:14,214 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 2%|█▍ | 32/1784 [01:52<1:33:17, 3.19s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:47:15,799 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5311, 'learning_rate': 6e-05, 'epoch': 0.02} +[WARNING|modeling_utils.py:388] 2022-03-03 00:47:17,243 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 2%|█▍ | 33/1784 [01:55<1:31:47, 3.15s/it] + + 2%|█▍ | 33/1784 [01:55<1:31:47, 3.15s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:47:18,807 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:47:20,228 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 2%|█▌ | 34/1784 [01:58<1:30:19, 3.10s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:47:21,763 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.107, 'learning_rate': 6.4e-05, 'epoch': 0.02} +[WARNING|modeling_utils.py:388] 2022-03-03 00:47:23,172 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 2%|█▌ | 35/1784 [02:01<1:28:56, 3.05s/it] + + 2%|█▌ | 35/1784 [02:01<1:28:56, 3.05s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:47:24,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:47:26,108 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 2%|█▌ | 36/1784 [02:04<1:27:54, 3.02s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:47:27,586 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.674, 'learning_rate': 6.800000000000001e-05, 'epoch': 0.02} +[WARNING|modeling_utils.py:388] 2022-03-03 00:47:28,933 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 2%|█▋ | 37/1784 [02:07<1:26:10, 2.96s/it] + 2%|█▋ | 37/1784 [02:07<1:26:10, 2.96s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:47:30,407 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:47:31,748 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 2%|█▋ | 38/1784 [02:10<1:24:52, 2.92s/it] + + 2%|█▋ | 38/1784 [02:10<1:24:52, 2.92s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:47:33,155 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:47:34,467 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 2%|█▋ | 39/1784 [02:12<1:23:05, 2.86s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:47:35,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5041, 'learning_rate': 7.4e-05, 'epoch': 0.02} +[WARNING|modeling_utils.py:388] 2022-03-03 00:47:37,110 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 2%|█▊ | 40/1784 [02:15<1:21:09, 2.79s/it] + 2%|█▊ | 40/1784 [02:15<1:21:09, 2.79s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:47:38,469 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:47:39,695 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 2%|█▊ | 41/1784 [02:18<1:19:17, 2.73s/it] + 2%|█▊ | 41/1784 [02:18<1:19:17, 2.73s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:47:40,980 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:47:42,126 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 2%|█▊ | 42/1784 [02:20<1:16:42, 2.64s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:47:43,378 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1578, 'learning_rate': 8e-05, 'epoch': 0.02} +[WARNING|modeling_utils.py:388] 2022-03-03 00:47:44,451 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 2%|█▉ | 43/1784 [02:22<1:13:51, 2.55s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:47:45,609 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4119, 'learning_rate': 8.2e-05, 'epoch': 0.02} +{'loss': 4.2466, 'learning_rate': 8.400000000000001e-05, 'epoch': 0.02} +[WARNING|modeling_utils.py:388] 2022-03-03 00:47:46,615 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 2%|█▉ | 44/1784 [02:25<1:10:30, 2.43s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:47:47,695 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:47:48,615 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 3%|█▉ | 45/1784 [02:27<1:06:42, 2.30s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:47:49,565 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:47:50,352 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5696, 'learning_rate': 8.599999999999999e-05, 'epoch': 0.03} + 3%|██ | 46/1784 [02:28<1:01:46, 2.13s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:47:51,209 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:47:51,930 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 3%|██▏ | 47/1784 [02:30<56:54, 1.97s/it] +{'loss': 4.5456, 'learning_rate': 8.8e-05, 'epoch': 0.03} + 3%|██▏ | 47/1784 [02:30<56:54, 1.97s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:47:52,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:47:53,337 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 3%|██▏ | 48/1784 [02:31<52:01, 1.80s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:47:54,016 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.8245, 'learning_rate': 9.2e-05, 'epoch': 0.03} +[WARNING|modeling_utils.py:388] 2022-03-03 00:47:54,576 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 3%|██▏ | 49/1784 [02:33<47:11, 1.63s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:47:55,219 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:47:56,276 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 3%|██▎ | 50/1784 [02:34<47:43, 1.65s/it] + 3%|██▎ | 50/1784 [02:34<47:43, 1.65s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:47:58,398 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 3%|██▎ | 50/1784 [02:34<47:43, 1.65s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:47:58,398 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 3%|██▎ | 51/1784 [02:38<1:07:56, 2.35s/it]g-point operations will not be computed-03 00:47:58,398 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 3%|██▎ | 51/1784 [02:38<1:07:56, 2.35s/it]g-point operations will not be computed-03 00:47:58,398 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 3%|██▎ | 51/1784 [02:38<1:07:56, 2.35s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:48:02,222 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 3%|██▎ | 51/1784 [02:38<1:07:56, 2.35s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:48:02,222 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 3%|██▎ | 52/1784 [02:42<1:20:27, 2.79s/it]g-point operations will not be computed-03 00:48:02,222 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 3%|██▎ | 52/1784 [02:42<1:20:27, 2.79s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:48:06,006 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 3%|██▎ | 52/1784 [02:42<1:20:27, 2.79s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:48:06,006 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 3%|██▎ | 53/1784 [02:46<1:28:53, 3.08s/it]g-point operations will not be computed-03 00:48:06,006 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 3%|██▎ | 53/1784 [02:46<1:28:53, 3.08s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:48:09,725 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 3%|██▎ | 53/1784 [02:46<1:28:53, 3.08s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:48:09,725 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 3%|██▍ | 54/1784 [02:49<1:34:09, 3.27s/it]g-point operations will not be computed-03 00:48:09,725 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 3%|██▍ | 54/1784 [02:49<1:34:09, 3.27s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:48:13,410 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 3%|██▍ | 55/1784 [02:53<1:37:30, 3.38s/it]g-point operations will not be computed-03 00:48:13,410 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 3%|██▍ | 55/1784 [02:53<1:37:30, 3.38s/it]g-point operations will not be computed-03 00:48:13,410 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:48:18,807 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:48:17,057 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:48:18,807 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:48:17,057 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4468, 'learning_rate': 0.000108, 'epoch': 0.03} + 3%|██▍ | 56/1784 [02:57<1:39:29, 3.45s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:48:20,680 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:48:22,445 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 3%|██▌ | 57/1784 [03:00<1:41:01, 3.51s/it] + 3%|██▌ | 57/1784 [03:00<1:41:01, 3.51s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:48:24,277 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 3%|██▌ | 58/1784 [03:04<1:41:32, 3.53s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:48:24,277 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 3%|██▌ | 58/1784 [03:04<1:41:32, 3.53s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:48:24,277 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 3%|██▌ | 58/1784 [03:04<1:41:32, 3.53s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:48:27,882 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 3%|██▌ | 59/1784 [03:08<1:41:46, 3.54s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:48:27,882 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 3%|██▌ | 59/1784 [03:08<1:41:46, 3.54s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:48:27,882 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 3%|██▌ | 59/1784 [03:08<1:41:46, 3.54s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:48:31,479 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 3%|██▌ | 59/1784 [03:08<1:41:46, 3.54s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:48:31,479 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 3%|██▋ | 60/1784 [03:11<1:42:35, 3.57s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:48:35,075 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 3%|██▋ | 61/1784 [03:15<1:42:06, 3.56s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:48:35,075 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 3%|██▋ | 61/1784 [03:15<1:42:06, 3.56s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:48:35,075 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 3%|██▋ | 61/1784 [03:15<1:42:06, 3.56s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:48:38,533 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 3%|██▋ | 62/1784 [03:18<1:41:25, 3.53s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:48:38,533 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 3%|██▋ | 62/1784 [03:18<1:41:25, 3.53s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:48:38,533 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 3%|██▋ | 62/1784 [03:18<1:41:25, 3.53s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:48:42,043 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|██▊ | 63/1784 [03:22<1:41:18, 3.53s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:48:42,043 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|██▊ | 63/1784 [03:22<1:41:18, 3.53s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:48:42,043 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|██▊ | 64/1784 [03:25<1:41:01, 3.52s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:48:45,563 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|██▊ | 64/1784 [03:25<1:41:01, 3.52s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:48:45,563 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|██▊ | 64/1784 [03:25<1:41:01, 3.52s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:48:49,060 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|██▉ | 65/1784 [03:29<1:40:24, 3.50s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:48:49,060 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|██▉ | 65/1784 [03:29<1:40:24, 3.50s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:48:49,060 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|██▉ | 65/1784 [03:29<1:40:24, 3.50s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:48:52,491 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|██▉ | 66/1784 [03:32<1:39:47, 3.49s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:48:52,491 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|██▉ | 66/1784 [03:32<1:39:47, 3.49s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:48:52,491 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|██▉ | 67/1784 [03:36<1:39:26, 3.47s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:48:55,967 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|██▉ | 67/1784 [03:36<1:39:26, 3.47s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:48:55,967 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|██▉ | 67/1784 [03:36<1:39:26, 3.47s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:48:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███ | 68/1784 [03:39<1:38:23, 3.44s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:48:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███ | 68/1784 [03:39<1:38:23, 3.44s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:48:59,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███ | 68/1784 [03:39<1:38:23, 3.44s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:49:02,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███ | 69/1784 [03:42<1:37:56, 3.43s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:49:02,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███ | 69/1784 [03:42<1:37:56, 3.43s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:49:02,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███ | 70/1784 [03:46<1:37:49, 3.42s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:49:06,122 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███ | 70/1784 [03:46<1:37:49, 3.42s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:49:06,122 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███ | 70/1784 [03:46<1:37:49, 3.42s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:49:09,504 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███▏ | 71/1784 [03:49<1:37:01, 3.40s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:49:09,504 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███▏ | 71/1784 [03:49<1:37:01, 3.40s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:49:09,504 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███▏ | 71/1784 [03:49<1:37:01, 3.40s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:49:12,861 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███▏ | 72/1784 [03:52<1:36:31, 3.38s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:49:16,203 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███▏ | 72/1784 [03:52<1:36:31, 3.38s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:49:16,203 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███▏ | 73/1784 [03:56<1:36:09, 3.37s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:49:16,203 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███▏ | 73/1784 [03:56<1:36:09, 3.37s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:49:16,203 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███▏ | 73/1784 [03:56<1:36:09, 3.37s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:49:19,529 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███▎ | 74/1784 [03:59<1:35:10, 3.34s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:49:19,529 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███▎ | 74/1784 [03:59<1:35:10, 3.34s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:49:19,529 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███▎ | 75/1784 [04:02<1:34:40, 3.32s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:49:22,786 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███▎ | 75/1784 [04:02<1:34:40, 3.32s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:49:22,786 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███▎ | 75/1784 [04:02<1:34:40, 3.32s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:49:26,062 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███▎ | 76/1784 [04:06<1:34:10, 3.31s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:49:26,062 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███▎ | 76/1784 [04:06<1:34:10, 3.31s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:49:26,062 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███▎ | 76/1784 [04:06<1:34:10, 3.31s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:49:29,349 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███▎ | 76/1784 [04:06<1:34:10, 3.31s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:49:29,349 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███▍ | 77/1784 [04:09<1:33:17, 3.28s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:49:32,532 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███▍ | 78/1784 [04:12<1:32:48, 3.26s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:49:32,532 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███▍ | 78/1784 [04:12<1:32:48, 3.26s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:49:32,532 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███▍ | 78/1784 [04:12<1:32:48, 3.26s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:49:35,734 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███▍ | 79/1784 [04:15<1:31:47, 3.23s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:49:38,849 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███▍ | 79/1784 [04:15<1:31:47, 3.23s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:49:38,849 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███▌ | 80/1784 [04:18<1:30:56, 3.20s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:49:38,849 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███▌ | 80/1784 [04:18<1:30:56, 3.20s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:49:38,849 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███▌ | 80/1784 [04:18<1:30:56, 3.20s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:49:41,978 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|███▌ | 81/1784 [04:21<1:30:06, 3.17s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:49:45,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|███▌ | 81/1784 [04:21<1:30:06, 3.17s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:49:45,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|███▋ | 82/1784 [04:24<1:28:47, 3.13s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:49:45,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|███▋ | 82/1784 [04:24<1:28:47, 3.13s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:49:45,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|███▋ | 82/1784 [04:24<1:28:47, 3.13s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:49:48,105 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|███▋ | 82/1784 [04:24<1:28:47, 3.13s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:49:48,105 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|███▋ | 83/1784 [04:28<1:28:16, 3.11s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:49:51,084 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|███▋ | 83/1784 [04:28<1:28:16, 3.11s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:49:51,084 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|███▋ | 84/1784 [04:30<1:26:49, 3.06s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:49:51,084 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|███▋ | 84/1784 [04:30<1:26:49, 3.06s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:49:54,018 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|███▋ | 84/1784 [04:30<1:26:49, 3.06s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:49:54,018 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|███▊ | 85/1784 [04:33<1:24:55, 3.00s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:49:56,859 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|███▊ | 85/1784 [04:33<1:24:55, 3.00s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:49:56,859 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|███▊ | 86/1784 [04:36<1:23:30, 2.95s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:49:56,859 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|███▊ | 86/1784 [04:36<1:23:30, 2.95s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:49:56,859 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|███▊ | 87/1784 [04:39<1:22:22, 2.91s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:49:59,714 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|███▊ | 87/1784 [04:39<1:22:22, 2.91s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:50:02,453 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|███▊ | 87/1784 [04:39<1:22:22, 2.91s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:50:02,453 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|███▉ | 88/1784 [04:42<1:21:28, 2.88s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:50:05,229 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|███▉ | 88/1784 [04:42<1:21:28, 2.88s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:50:05,229 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|███▉ | 89/1784 [04:44<1:19:06, 2.80s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:50:05,229 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|███▉ | 89/1784 [04:44<1:19:06, 2.80s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:50:05,229 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|███▉ | 90/1784 [04:47<1:17:05, 2.73s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:50:07,817 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|███▉ | 90/1784 [04:47<1:17:05, 2.73s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:50:07,817 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████ | 91/1784 [04:49<1:14:12, 2.63s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:50:10,284 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████ | 92/1784 [04:52<1:11:23, 2.53s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:50:12,639 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████ | 92/1784 [04:52<1:11:23, 2.53s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:50:12,639 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████ | 93/1784 [04:54<1:08:10, 2.42s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:50:14,880 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████ | 93/1784 [04:54<1:08:10, 2.42s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:50:14,880 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████▏ | 94/1784 [04:56<1:04:02, 2.27s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:50:16,927 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████▏ | 94/1784 [04:56<1:04:02, 2.27s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:50:16,927 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.8955, 'learning_rate': 0.000184, 'epoch': 0.05} + 5%|████▎ | 95/1784 [04:57<59:26, 2.11s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:50:18,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████▎ | 95/1784 [04:57<59:26, 2.11s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:50:18,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████▎ | 96/1784 [04:59<55:08, 1.96s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:50:21,941 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████▎ | 96/1784 [04:59<55:08, 1.96s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:50:21,941 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████▍ | 98/1784 [05:02<46:19, 1.65s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:50:23,307 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████▍ | 98/1784 [05:02<46:19, 1.65s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:50:23,307 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6766, 'learning_rate': 0.000192, 'epoch': 0.05} + 6%|████▍ | 99/1784 [05:03<42:28, 1.51s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:50:25,705 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▍ | 99/1784 [05:03<42:28, 1.51s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:50:25,705 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▍ | 100/1784 [05:05<43:49, 1.56s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:50:25,705 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▍ | 100/1784 [05:05<43:49, 1.56s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:50:28,794 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▍ | 100/1784 [05:05<43:49, 1.56s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:50:28,794 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▍ | 101/1784 [05:09<1:03:00, 2.25s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:50:28,794 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▍ | 102/1784 [05:12<1:14:49, 2.67s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:50:32,493 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▍ | 102/1784 [05:12<1:14:49, 2.67s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:50:32,493 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▍ | 102/1784 [05:12<1:14:49, 2.67s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:50:36,116 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▌ | 103/1784 [05:16<1:22:45, 2.95s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:50:36,116 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▌ | 103/1784 [05:16<1:22:45, 2.95s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:50:36,116 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▌ | 103/1784 [05:16<1:22:45, 2.95s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:50:39,758 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▌ | 103/1784 [05:16<1:22:45, 2.95s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:50:39,758 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▌ | 104/1784 [05:19<1:28:38, 3.17s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:50:39,758 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▌ | 104/1784 [05:19<1:28:38, 3.17s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:50:43,380 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▌ | 104/1784 [05:19<1:28:38, 3.17s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:50:43,380 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▌ | 105/1784 [05:23<1:31:48, 3.28s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:50:43,380 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▌ | 105/1784 [05:23<1:31:48, 3.28s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:50:46,904 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▌ | 105/1784 [05:23<1:31:48, 3.28s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:50:46,904 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▋ | 106/1784 [05:27<1:34:01, 3.36s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:50:46,904 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▋ | 107/1784 [05:30<1:35:33, 3.42s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:50:50,490 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▋ | 107/1784 [05:30<1:35:33, 3.42s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:50:50,490 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▋ | 107/1784 [05:30<1:35:33, 3.42s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:50:54,034 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▋ | 107/1784 [05:30<1:35:33, 3.42s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:50:54,034 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▋ | 108/1784 [05:34<1:36:41, 3.46s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:50:54,034 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▋ | 108/1784 [05:34<1:36:41, 3.46s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:50:57,573 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▋ | 108/1784 [05:34<1:36:41, 3.46s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:50:57,573 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▊ | 109/1784 [05:37<1:37:23, 3.49s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:50:57,573 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▊ | 110/1784 [05:41<1:37:44, 3.50s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:51:01,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▊ | 110/1784 [05:41<1:37:44, 3.50s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:51:01,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▊ | 110/1784 [05:41<1:37:44, 3.50s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:51:04,670 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▊ | 111/1784 [05:44<1:37:39, 3.50s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:51:04,670 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▊ | 111/1784 [05:44<1:37:39, 3.50s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:51:04,670 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▊ | 111/1784 [05:44<1:37:39, 3.50s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:51:08,103 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▊ | 111/1784 [05:44<1:37:39, 3.50s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:51:08,103 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▉ | 112/1784 [05:48<1:37:02, 3.48s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:51:08,103 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▉ | 112/1784 [05:48<1:37:02, 3.48s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:51:11,507 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▉ | 112/1784 [05:48<1:37:02, 3.48s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:51:11,507 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▉ | 113/1784 [05:51<1:35:59, 3.45s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:51:14,943 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▉ | 113/1784 [05:51<1:35:59, 3.45s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:51:14,943 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▉ | 114/1784 [05:55<1:36:08, 3.45s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:51:14,943 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▉ | 114/1784 [05:55<1:36:08, 3.45s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:51:18,360 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▉ | 114/1784 [05:55<1:36:08, 3.45s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:51:18,360 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|█████ | 115/1784 [05:58<1:35:24, 3.43s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:51:18,360 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + [WARNING|modeling_utils.py:388] 2022-03-03 00:51:21,744 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + [WARNING|modeling_utils.py:388] 2022-03-03 00:51:21,744 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████ | 116/1784 [06:01<1:34:54, 3.41s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:51:25,118 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████ | 116/1784 [06:01<1:34:54, 3.41s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:51:25,118 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████ | 117/1784 [06:05<1:34:53, 3.42s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:51:25,118 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████ | 117/1784 [06:05<1:34:53, 3.42s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:51:28,472 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████ | 117/1784 [06:05<1:34:53, 3.42s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:51:28,472 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▏ | 118/1784 [06:08<1:34:00, 3.39s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:51:28,472 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▏ | 118/1784 [06:08<1:34:00, 3.39s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:51:31,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▏ | 118/1784 [06:08<1:34:00, 3.39s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:51:31,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▏ | 119/1784 [06:11<1:34:26, 3.40s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:51:35,241 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▏ | 119/1784 [06:11<1:34:26, 3.40s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:51:35,241 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▏ | 120/1784 [06:15<1:33:31, 3.37s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:51:35,241 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▏ | 120/1784 [06:15<1:33:31, 3.37s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:51:38,533 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▏ | 120/1784 [06:15<1:33:31, 3.37s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:51:38,533 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▎ | 121/1784 [06:18<1:32:52, 3.35s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:51:41,887 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▎ | 122/1784 [06:21<1:33:00, 3.36s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:51:41,887 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▎ | 122/1784 [06:21<1:33:00, 3.36s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:51:41,887 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▎ | 122/1784 [06:21<1:33:00, 3.36s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:51:45,181 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▎ | 122/1784 [06:21<1:33:00, 3.36s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:51:45,181 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▍ | 123/1784 [06:25<1:31:47, 3.32s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:51:45,181 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▍ | 124/1784 [06:28<1:31:06, 3.29s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:51:48,443 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▍ | 124/1784 [06:28<1:31:06, 3.29s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:51:48,443 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▍ | 124/1784 [06:28<1:31:06, 3.29s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:51:51,640 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▍ | 124/1784 [06:28<1:31:06, 3.29s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:51:51,640 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▍ | 125/1784 [06:31<1:30:35, 3.28s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:51:51,640 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▍ | 125/1784 [06:31<1:30:35, 3.28s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:51:54,839 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▍ | 125/1784 [06:31<1:30:35, 3.28s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:51:54,839 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▌ | 126/1784 [06:34<1:29:43, 3.25s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:51:54,839 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▌ | 126/1784 [06:34<1:29:43, 3.25s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:51:54,839 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▌ | 127/1784 [06:37<1:29:07, 3.23s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:51:58,035 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▌ | 127/1784 [06:37<1:29:07, 3.23s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:52:01,179 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▌ | 127/1784 [06:37<1:29:07, 3.23s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:52:01,179 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▌ | 128/1784 [06:41<1:28:29, 3.21s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:52:01,179 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▌ | 128/1784 [06:41<1:28:29, 3.21s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:52:01,179 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▋ | 129/1784 [06:44<1:27:52, 3.19s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:52:04,337 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▋ | 129/1784 [06:44<1:27:52, 3.19s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:52:07,459 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▋ | 129/1784 [06:44<1:27:52, 3.19s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:52:07,459 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▋ | 130/1784 [06:47<1:26:51, 3.15s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:52:07,459 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▋ | 131/1784 [06:50<1:26:08, 3.13s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:52:10,517 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▋ | 131/1784 [06:50<1:26:08, 3.13s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:52:10,517 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▋ | 131/1784 [06:50<1:26:08, 3.13s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:52:13,615 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▋ | 131/1784 [06:50<1:26:08, 3.13s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:52:13,615 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▊ | 132/1784 [06:53<1:25:48, 3.12s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:52:13,615 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▊ | 133/1784 [06:56<1:25:35, 3.11s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:52:16,747 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▊ | 133/1784 [06:56<1:25:35, 3.11s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:52:16,747 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▊ | 133/1784 [06:56<1:25:35, 3.11s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:52:19,689 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▊ | 133/1784 [06:56<1:25:35, 3.11s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:52:19,689 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|█████▊ | 134/1784 [06:59<1:23:40, 3.04s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:52:22,584 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|█████▊ | 134/1784 [06:59<1:23:40, 3.04s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:52:22,584 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|█████▉ | 135/1784 [07:02<1:22:26, 3.00s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:52:22,584 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|█████▉ | 135/1784 [07:02<1:22:26, 3.00s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:52:25,483 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|█████▉ | 135/1784 [07:02<1:22:26, 3.00s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:52:25,483 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|█████▉ | 136/1784 [07:05<1:21:33, 2.97s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:52:28,348 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|█████▉ | 136/1784 [07:05<1:21:33, 2.97s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:52:28,348 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|█████▉ | 137/1784 [07:08<1:20:11, 2.92s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:52:28,348 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████ | 138/1784 [07:10<1:18:49, 2.87s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:52:31,136 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████ | 138/1784 [07:10<1:18:49, 2.87s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:52:31,136 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████ | 138/1784 [07:10<1:18:49, 2.87s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:52:33,855 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████ | 138/1784 [07:10<1:18:49, 2.87s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:52:33,855 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████ | 139/1784 [07:13<1:17:00, 2.81s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:52:36,439 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████ | 139/1784 [07:13<1:17:00, 2.81s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:52:36,439 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████ | 140/1784 [07:16<1:14:57, 2.74s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:52:36,439 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████ | 140/1784 [07:16<1:14:57, 2.74s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:52:36,439 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▏ | 141/1784 [07:18<1:12:41, 2.65s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:52:38,966 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▏ | 142/1784 [07:20<1:10:02, 2.56s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:52:41,386 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▏ | 142/1784 [07:20<1:10:02, 2.56s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:52:41,386 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▎ | 143/1784 [07:23<1:07:18, 2.46s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:52:43,661 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▎ | 143/1784 [07:23<1:07:18, 2.46s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:52:43,661 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▎ | 144/1784 [07:25<1:03:38, 2.33s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:52:45,785 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▎ | 144/1784 [07:25<1:03:38, 2.33s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:52:45,785 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▎ | 145/1784 [07:27<1:00:02, 2.20s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:52:47,722 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▎ | 145/1784 [07:27<1:00:02, 2.20s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:52:47,722 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.2152, 'learning_rate': 0.00028599999999999996, 'epoch': 0.08} + 8%|██████▌ | 146/1784 [07:28<56:37, 2.07s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:52:49,610 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▌ | 146/1784 [07:28<56:37, 2.07s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:52:49,610 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▌ | 147/1784 [07:30<51:57, 1.90s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:52:52,719 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▌ | 147/1784 [07:30<51:57, 1.90s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:52:52,719 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▋ | 149/1784 [07:32<43:37, 1.60s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:52:55,189 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▋ | 149/1784 [07:32<43:37, 1.60s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:52:55,189 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.2187, 'learning_rate': 0.000294, 'epoch': 0.08} + 8%|██████▋ | 150/1784 [07:34<43:48, 1.61s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:52:55,189 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▋ | 150/1784 [07:34<43:48, 1.61s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:52:58,245 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▋ | 150/1784 [07:34<43:48, 1.61s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:52:58,245 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▌ | 151/1784 [07:38<1:01:53, 2.27s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:52:58,245 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▌ | 151/1784 [07:38<1:01:53, 2.27s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:53:01,950 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▌ | 151/1784 [07:38<1:01:53, 2.27s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:53:01,950 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|██████▋ | 152/1784 [07:42<1:13:34, 2.70s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:53:01,950 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|██████▋ | 152/1784 [07:42<1:13:34, 2.70s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:53:05,613 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|██████▋ | 152/1784 [07:42<1:13:34, 2.70s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:53:05,613 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|██████▋ | 153/1784 [07:45<1:21:41, 3.01s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:53:05,613 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|██████▋ | 153/1784 [07:45<1:21:41, 3.01s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:53:09,341 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|██████▋ | 153/1784 [07:45<1:21:41, 3.01s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:53:09,341 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|██████▋ | 154/1784 [07:49<1:27:34, 3.22s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:53:13,056 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|██████▋ | 154/1784 [07:49<1:27:34, 3.22s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:53:13,056 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|██████▊ | 155/1784 [07:53<1:31:12, 3.36s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:53:13,056 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|██████▊ | 155/1784 [07:53<1:31:12, 3.36s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:53:16,785 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|██████▊ | 155/1784 [07:53<1:31:12, 3.36s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:53:16,785 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|██████▊ | 156/1784 [07:56<1:33:34, 3.45s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:53:16,785 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|██████▊ | 156/1784 [07:56<1:33:34, 3.45s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:53:20,341 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|██████▊ | 156/1784 [07:56<1:33:34, 3.45s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:53:20,341 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|██████▊ | 157/1784 [08:00<1:34:35, 3.49s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:53:20,341 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|██████▊ | 157/1784 [08:00<1:34:35, 3.49s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:53:23,916 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|██████▉ | 158/1784 [08:04<1:35:09, 3.51s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:53:23,916 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|██████▉ | 158/1784 [08:04<1:35:09, 3.51s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:53:23,916 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.9038, 'learning_rate': 0.000312, 'epoch': 0.09} + 9%|██████▉ | 159/1784 [08:07<1:35:20, 3.52s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:53:27,472 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|██████▉ | 159/1784 [08:07<1:35:20, 3.52s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:53:31,033 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|██████▉ | 159/1784 [08:07<1:35:20, 3.52s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:53:31,033 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|██████▉ | 160/1784 [08:11<1:35:31, 3.53s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:53:31,033 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|██████▉ | 160/1784 [08:11<1:35:31, 3.53s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:53:34,579 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|██████▉ | 160/1784 [08:11<1:35:31, 3.53s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:53:34,579 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████ | 161/1784 [08:14<1:35:40, 3.54s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:53:34,579 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████ | 161/1784 [08:14<1:35:40, 3.54s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:53:34,579 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████ | 161/1784 [08:14<1:35:40, 3.54s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:53:34,579 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████ | 162/1784 [08:18<1:35:20, 3.53s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:53:34,579 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████ | 162/1784 [08:18<1:35:20, 3.53s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:53:34,579 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:53:43,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:53:34,579 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:53:43,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:53:34,579 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:53:43,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:53:34,579 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▏ | 164/1784 [08:25<1:34:19, 3.49s/it]g-point operations will not be computed-03 00:53:34,579 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▏ | 164/1784 [08:25<1:34:19, 3.49s/it]g-point operations will not be computed-03 00:53:34,579 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▏ | 164/1784 [08:25<1:34:19, 3.49s/it]g-point operations will not be computed-03 00:53:34,579 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▏ | 165/1784 [08:28<1:33:51, 3.48s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:53:51,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▏ | 165/1784 [08:28<1:33:51, 3.48s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:53:51,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▎ | 166/1784 [08:32<1:33:41, 3.47s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:53:51,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▎ | 166/1784 [08:32<1:33:41, 3.47s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:53:51,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▎ | 166/1784 [08:32<1:33:41, 3.47s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:53:51,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▎ | 167/1784 [08:35<1:32:50, 3.45s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:53:51,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▎ | 167/1784 [08:35<1:32:50, 3.45s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:53:51,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▎ | 167/1784 [08:35<1:32:50, 3.45s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:53:51,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▎ | 168/1784 [08:38<1:32:19, 3.43s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:54:02,122 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▎ | 168/1784 [08:38<1:32:19, 3.43s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:54:02,122 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▍ | 169/1784 [08:42<1:31:34, 3.40s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:54:02,122 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▍ | 169/1784 [08:42<1:31:34, 3.40s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:54:02,122 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▍ | 169/1784 [08:42<1:31:34, 3.40s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:54:02,122 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▍ | 170/1784 [08:45<1:31:05, 3.39s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:54:02,122 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:54:10,458 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:54:02,122 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:54:10,458 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:54:02,122 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.9869, 'learning_rate': 0.00033800000000000003, 'epoch': 0.1} +[WARNING|modeling_utils.py:388] 2022-03-03 00:54:10,458 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:54:02,122 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▌ | 172/1784 [08:52<1:30:46, 3.38s/it]g-point operations will not be computed-03 00:54:02,122 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▌ | 172/1784 [08:52<1:30:46, 3.38s/it]g-point operations will not be computed-03 00:54:02,122 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▌ | 172/1784 [08:52<1:30:46, 3.38s/it]g-point operations will not be computed-03 00:54:02,122 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▌ | 173/1784 [08:55<1:30:11, 3.36s/it]g-point operations will not be computed-03 00:54:02,122 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▌ | 173/1784 [08:55<1:30:11, 3.36s/it]g-point operations will not be computed-03 00:54:02,122 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:54:20,420 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:54:02,122 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:54:20,420 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:54:02,122 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:54:20,420 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:54:02,122 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▋ | 175/1784 [09:02<1:28:52, 3.31s/it]g-point operations will not be computed-03 00:54:02,122 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:54:26,857 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:54:02,122 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:54:26,857 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:54:02,122 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6924, 'learning_rate': 0.000348, 'epoch': 0.1} +[WARNING|modeling_utils.py:388] 2022-03-03 00:54:26,857 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:54:02,122 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▋ | 177/1784 [09:08<1:26:59, 3.25s/it]g-point operations will not be computed-03 00:54:02,122 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:54:33,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:54:02,122 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:54:33,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:54:02,122 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.8362, 'learning_rate': 0.000352, 'epoch': 0.1} +[WARNING|modeling_utils.py:388] 2022-03-03 00:54:33,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:54:02,122 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:54:36,292 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:54:02,122 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:54:36,292 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:54:02,122 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:54:36,292 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:54:02,122 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▊ | 180/1784 [09:17<1:24:09, 3.15s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:54:40,983 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▊ | 180/1784 [09:17<1:24:09, 3.15s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:54:40,983 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▉ | 181/1784 [09:20<1:24:03, 3.15s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:54:40,983 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▉ | 181/1784 [09:20<1:24:03, 3.15s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:54:40,983 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▉ | 181/1784 [09:20<1:24:03, 3.15s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:54:40,983 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▉ | 182/1784 [09:24<1:23:36, 3.13s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:54:47,218 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▉ | 182/1784 [09:24<1:23:36, 3.13s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:54:47,218 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████ | 183/1784 [09:27<1:23:08, 3.12s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:54:47,218 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████ | 183/1784 [09:27<1:23:08, 3.12s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:54:47,218 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████ | 183/1784 [09:27<1:23:08, 3.12s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:54:47,218 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████ | 184/1784 [09:30<1:21:41, 3.06s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:54:53,170 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████ | 184/1784 [09:30<1:21:41, 3.06s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:54:53,170 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████ | 185/1784 [09:33<1:20:50, 3.03s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:54:53,170 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:54:57,470 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:54:53,170 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:54:57,470 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:54:53,170 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.6795, 'learning_rate': 0.000368, 'epoch': 0.1} +[WARNING|modeling_utils.py:388] 2022-03-03 00:54:57,470 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:54:53,170 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████▏ | 187/1784 [09:38<1:18:57, 2.97s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:55:01,909 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▏ | 188/1784 [09:41<1:18:07, 2.94s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:55:01,909 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▏ | 188/1784 [09:41<1:18:07, 2.94s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:55:01,909 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:55:05,958 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:55:01,909 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:55:05,958 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:55:01,909 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.2655, 'learning_rate': 0.000374, 'epoch': 0.11} +[WARNING|modeling_utils.py:388] 2022-03-03 00:55:05,958 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:55:01,909 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▎ | 190/1784 [09:46<1:14:05, 2.79s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:55:09,862 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▎ | 190/1784 [09:46<1:14:05, 2.79s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:55:09,862 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▎ | 191/1784 [09:49<1:11:30, 2.69s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:55:09,862 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▎ | 191/1784 [09:49<1:11:30, 2.69s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:55:09,862 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:55:13,383 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:55:09,862 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:55:13,383 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:55:09,862 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:55:15,570 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:55:09,862 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:55:15,570 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:55:09,862 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:55:17,642 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:55:09,862 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:55:17,642 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:55:09,862 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:55:19,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:55:09,862 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:55:19,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:55:09,862 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:55:21,418 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:55:09,862 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:55:21,418 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:55:09,862 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:55:24,416 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:55:09,862 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:55:24,416 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:55:09,862 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.9989, 'learning_rate': 0.00039200000000000004, 'epoch': 0.11} +[WARNING|modeling_utils.py:388] 2022-03-03 00:55:25,643 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:55:09,862 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:55:25,643 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:55:09,862 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:55:27,311 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:55:09,862 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:55:27,311 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:55:09,862 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:55:27,311 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:55:09,862 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:55:31,154 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:55:09,862 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:55:31,154 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:55:09,862 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:55:31,154 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:55:09,862 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▊ | 202/1784 [10:13<1:11:54, 2.73s/it]g-point operations will not be computed-03 00:55:09,862 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▊ | 202/1784 [10:13<1:11:54, 2.73s/it]g-point operations will not be computed-03 00:55:09,862 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▊ | 202/1784 [10:13<1:11:54, 2.73s/it]g-point operations will not be computed-03 00:55:09,862 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▉ | 203/1784 [10:17<1:19:32, 3.02s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:55:40,454 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▉ | 204/1784 [10:20<1:24:16, 3.20s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:55:40,454 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▉ | 204/1784 [10:20<1:24:16, 3.20s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:55:40,454 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.4006, 'learning_rate': 0.000404, 'epoch': 0.11} + 11%|████████▉ | 204/1784 [10:20<1:24:16, 3.20s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:55:40,454 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▉ | 205/1784 [10:24<1:26:54, 3.30s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:55:40,454 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▉ | 205/1784 [10:24<1:26:54, 3.30s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:55:40,454 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▉ | 205/1784 [10:24<1:26:54, 3.30s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:55:40,454 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████ | 206/1784 [10:27<1:28:43, 3.37s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:55:40,454 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:55:52,826 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:55:40,454 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:55:52,826 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:55:40,454 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.0031, 'learning_rate': 0.00041, 'epoch': 0.12} + 12%|█████████ | 208/1784 [10:34<1:31:04, 3.47s/it]g-point operations will not be computed-03 00:55:40,454 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████ | 208/1784 [10:34<1:31:04, 3.47s/it]g-point operations will not be computed-03 00:55:40,454 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.7179, 'learning_rate': 0.000412, 'epoch': 0.12} + 12%|█████████ | 208/1784 [10:34<1:31:04, 3.47s/it]g-point operations will not be computed-03 00:55:40,454 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▏ | 209/1784 [10:38<1:31:14, 3.48s/it]g-point operations will not be computed-03 00:55:40,454 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▏ | 209/1784 [10:38<1:31:14, 3.48s/it]g-point operations will not be computed-03 00:55:40,454 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▏ | 209/1784 [10:38<1:31:14, 3.48s/it]g-point operations will not be computed-03 00:55:40,454 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▏ | 210/1784 [10:41<1:31:16, 3.48s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:56:05,127 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▏ | 211/1784 [10:45<1:30:43, 3.46s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:56:05,127 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▏ | 211/1784 [10:45<1:30:43, 3.46s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:56:05,127 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.9536, 'learning_rate': 0.00041799999999999997, 'epoch': 0.12} + 12%|█████████▎ | 212/1784 [10:48<1:30:33, 3.46s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:56:05,127 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▎ | 212/1784 [10:48<1:30:33, 3.46s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:56:05,127 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.6715, 'learning_rate': 0.00042, 'epoch': 0.12} + 12%|█████████▎ | 212/1784 [10:48<1:30:33, 3.46s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:56:05,127 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▎ | 213/1784 [10:52<1:30:38, 3.46s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:56:15,516 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▎ | 214/1784 [10:55<1:30:19, 3.45s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:56:15,516 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▎ | 214/1784 [10:55<1:30:19, 3.45s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:56:15,516 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.7195, 'learning_rate': 0.000424, 'epoch': 0.12} + 12%|█████████▍ | 215/1784 [10:58<1:29:56, 3.44s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:56:15,516 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▍ | 215/1784 [10:58<1:29:56, 3.44s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:56:15,516 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.8662, 'learning_rate': 0.000426, 'epoch': 0.12} + 12%|█████████▍ | 215/1784 [10:58<1:29:56, 3.44s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:56:15,516 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▍ | 216/1784 [11:02<1:29:49, 3.44s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:56:25,708 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▍ | 217/1784 [11:05<1:29:16, 3.42s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:56:25,708 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▍ | 217/1784 [11:05<1:29:16, 3.42s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:56:25,708 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.5386, 'learning_rate': 0.00043, 'epoch': 0.12} + 12%|█████████▍ | 217/1784 [11:05<1:29:16, 3.42s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:56:25,708 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▌ | 218/1784 [11:09<1:29:14, 3.42s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:56:25,708 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▌ | 218/1784 [11:09<1:29:14, 3.42s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:56:25,708 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▌ | 218/1784 [11:09<1:29:14, 3.42s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:56:25,708 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▌ | 219/1784 [11:12<1:28:43, 3.40s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:56:35,832 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▌ | 220/1784 [11:15<1:27:53, 3.37s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:56:35,832 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▌ | 220/1784 [11:15<1:27:53, 3.37s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:56:35,832 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.9084, 'learning_rate': 0.000436, 'epoch': 0.12} + 12%|█████████▋ | 221/1784 [11:19<1:27:12, 3.35s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:56:35,832 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▋ | 221/1784 [11:19<1:27:12, 3.35s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:56:35,832 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:56:44,072 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:56:35,832 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:56:44,072 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:56:35,832 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.9058, 'learning_rate': 0.00044, 'epoch': 0.12} + 12%|█████████▊ | 223/1784 [11:25<1:26:57, 3.34s/it]g-point operations will not be computed-03 00:56:35,832 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▊ | 223/1784 [11:25<1:26:57, 3.34s/it]g-point operations will not be computed-03 00:56:35,832 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.9857, 'learning_rate': 0.000442, 'epoch': 0.12} + 12%|█████████▊ | 223/1784 [11:25<1:26:57, 3.34s/it]g-point operations will not be computed-03 00:56:35,832 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|█████████▊ | 224/1784 [11:29<1:26:55, 3.34s/it]g-point operations will not be computed-03 00:56:35,832 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:56:53,983 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:56:35,832 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:56:53,983 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:56:35,832 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.1024, 'learning_rate': 0.000446, 'epoch': 0.13} +[WARNING|modeling_utils.py:388] 2022-03-03 00:56:53,983 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:56:35,832 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|█████████▉ | 226/1784 [11:35<1:25:44, 3.30s/it]g-point operations will not be computed-03 00:56:35,832 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|█████████▉ | 226/1784 [11:35<1:25:44, 3.30s/it]g-point operations will not be computed-03 00:56:35,832 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|█████████▉ | 226/1784 [11:35<1:25:44, 3.30s/it]g-point operations will not be computed-03 00:56:35,832 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|█████████▉ | 227/1784 [11:38<1:24:43, 3.26s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:57:02,106 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|█████████▉ | 228/1784 [11:42<1:23:59, 3.24s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:57:02,106 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|█████████▉ | 228/1784 [11:42<1:23:59, 3.24s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:57:02,106 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.799, 'learning_rate': 0.00045200000000000004, 'epoch': 0.13} + 13%|█████████▉ | 228/1784 [11:42<1:23:59, 3.24s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:57:02,106 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████ | 229/1784 [11:45<1:23:02, 3.20s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:57:08,370 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████ | 230/1784 [11:48<1:22:07, 3.17s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:57:08,370 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████ | 230/1784 [11:48<1:22:07, 3.17s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:57:08,370 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.9686, 'learning_rate': 0.000456, 'epoch': 0.13} + 13%|██████████ | 230/1784 [11:48<1:22:07, 3.17s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:57:08,370 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████ | 231/1784 [11:51<1:21:40, 3.16s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:57:14,570 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▏ | 232/1784 [11:54<1:21:00, 3.13s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:57:14,570 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▏ | 232/1784 [11:54<1:21:00, 3.13s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:57:14,570 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.2565, 'learning_rate': 0.00046, 'epoch': 0.13} + 13%|██████████▏ | 232/1784 [11:54<1:21:00, 3.13s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:57:14,570 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▏ | 233/1784 [11:57<1:19:25, 3.07s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:57:20,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▏ | 234/1784 [12:00<1:17:57, 3.02s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:57:20,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▏ | 234/1784 [12:00<1:17:57, 3.02s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:57:20,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:57:24,683 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:57:20,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:57:24,683 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:57:20,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:57:27,540 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:57:20,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:57:27,540 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:57:20,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.6232, 'learning_rate': 0.00046800000000000005, 'epoch': 0.13} + 13%|██████████▎ | 237/1784 [12:08<1:13:45, 2.86s/it]g-point operations will not be computed-03 00:57:20,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▎ | 237/1784 [12:08<1:13:45, 2.86s/it]g-point operations will not be computed-03 00:57:20,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:57:32,914 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:57:20,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:57:32,914 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:57:20,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.3229, 'learning_rate': 0.000472, 'epoch': 0.13} + 13%|██████████▍ | 239/1784 [12:13<1:10:28, 2.74s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:57:36,806 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▍ | 239/1784 [12:13<1:10:28, 2.74s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:57:36,806 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▍ | 240/1784 [12:16<1:08:26, 2.66s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:57:39,171 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▍ | 240/1784 [12:16<1:08:26, 2.66s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:57:39,171 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|██████████▌ | 241/1784 [12:18<1:05:12, 2.54s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:57:41,338 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|██████████▌ | 241/1784 [12:18<1:05:12, 2.54s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:57:41,338 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|██████████▌ | 242/1784 [12:20<1:01:45, 2.40s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:57:43,359 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|██████████▌ | 242/1784 [12:20<1:01:45, 2.40s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:57:43,359 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|██████████▉ | 243/1784 [12:22<58:02, 2.26s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:57:45,190 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|██████████▉ | 243/1784 [12:22<58:02, 2.26s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:57:45,190 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|██████████▉ | 244/1784 [12:24<54:19, 2.12s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:57:46,905 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|██████████▉ | 244/1784 [12:24<54:19, 2.12s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:57:46,905 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████ | 246/1784 [12:27<47:00, 1.83s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:57:48,501 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████ | 246/1784 [12:27<47:00, 1.83s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:57:48,501 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.3271, 'learning_rate': 0.000486, 'epoch': 0.14} + 14%|███████████ | 247/1784 [12:28<43:34, 1.70s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:57:51,248 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████ | 247/1784 [12:28<43:34, 1.70s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:57:51,248 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████▏ | 249/1784 [12:31<37:12, 1.45s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:57:53,597 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████▏ | 249/1784 [12:31<37:12, 1.45s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:57:53,597 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.3764, 'learning_rate': 0.000492, 'epoch': 0.14} + 14%|███████████▏ | 250/1784 [12:33<38:30, 1.51s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:57:53,597 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████▏ | 250/1784 [12:33<38:30, 1.51s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:57:53,597 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████▏ | 250/1784 [12:33<38:30, 1.51s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:57:56,683 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████▎ | 251/1784 [12:36<56:55, 2.23s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:57:56,683 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████▎ | 251/1784 [12:36<56:55, 2.23s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:57:56,683 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████▎ | 251/1784 [12:36<56:55, 2.23s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:58:00,488 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████ | 252/1784 [12:40<1:08:40, 2.69s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:58:00,488 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████ | 252/1784 [12:40<1:08:40, 2.69s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:58:00,488 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.5569, 'learning_rate': 0.0005, 'epoch': 0.14} + 14%|███████████ | 253/1784 [12:44<1:16:22, 2.99s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:58:00,488 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████ | 253/1784 [12:44<1:16:22, 2.99s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:58:00,488 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.1258, 'learning_rate': 0.0005020000000000001, 'epoch': 0.14} + 14%|███████████ | 253/1784 [12:44<1:16:22, 2.99s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:58:00,488 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████ | 254/1784 [12:48<1:21:58, 3.21s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:58:11,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████▏ | 255/1784 [12:51<1:25:30, 3.36s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:58:11,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████▏ | 255/1784 [12:51<1:25:30, 3.36s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:58:11,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.2723, 'learning_rate': 0.000506, 'epoch': 0.14} + 14%|███████████▏ | 256/1784 [12:55<1:27:03, 3.42s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:58:11,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████▏ | 256/1784 [12:55<1:27:03, 3.42s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:58:11,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.2628, 'learning_rate': 0.000508, 'epoch': 0.14} + 14%|███████████▏ | 257/1784 [12:59<1:28:22, 3.47s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:58:11,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████▏ | 257/1784 [12:59<1:28:22, 3.47s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:58:11,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.2784, 'learning_rate': 0.00051, 'epoch': 0.14} + 14%|███████████▏ | 257/1784 [12:59<1:28:22, 3.47s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:58:11,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████▎ | 258/1784 [13:02<1:28:57, 3.50s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:58:11,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:58:27,708 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:58:11,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:58:27,708 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:58:11,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.4261, 'learning_rate': 0.000514, 'epoch': 0.15} + 15%|███████████▎ | 260/1784 [13:09<1:29:46, 3.53s/it]g-point operations will not be computed-03 00:58:11,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▎ | 260/1784 [13:09<1:29:46, 3.53s/it]g-point operations will not be computed-03 00:58:11,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.9251, 'learning_rate': 0.0005160000000000001, 'epoch': 0.15} + 15%|███████████▍ | 261/1784 [13:13<1:29:29, 3.53s/it]g-point operations will not be computed-03 00:58:11,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▍ | 261/1784 [13:13<1:29:29, 3.53s/it]g-point operations will not be computed-03 00:58:11,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:58:38,217 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:58:11,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:58:38,217 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:58:11,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.8142, 'learning_rate': 0.0005200000000000001, 'epoch': 0.15} + 15%|███████████▍ | 263/1784 [13:20<1:28:29, 3.49s/it]g-point operations will not be computed-03 00:58:11,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▍ | 263/1784 [13:20<1:28:29, 3.49s/it]g-point operations will not be computed-03 00:58:11,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.0686, 'learning_rate': 0.000522, 'epoch': 0.15} + 15%|███████████▌ | 264/1784 [13:23<1:28:22, 3.49s/it]g-point operations will not be computed-03 00:58:11,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▌ | 264/1784 [13:23<1:28:22, 3.49s/it]g-point operations will not be computed-03 00:58:11,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.6381, 'learning_rate': 0.000524, 'epoch': 0.15} + 15%|███████████▌ | 264/1784 [13:23<1:28:22, 3.49s/it]g-point operations will not be computed-03 00:58:11,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▌ | 265/1784 [13:26<1:27:38, 3.46s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:58:50,348 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▋ | 266/1784 [13:30<1:27:01, 3.44s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:58:50,348 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▋ | 266/1784 [13:30<1:27:01, 3.44s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:58:50,348 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.8176, 'learning_rate': 0.000528, 'epoch': 0.15} + 15%|███████████▋ | 267/1784 [13:33<1:26:42, 3.43s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:58:50,348 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▋ | 267/1784 [13:33<1:26:42, 3.43s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:58:50,348 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:58:58,738 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:58:50,348 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:58:58,738 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:58:50,348 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.9445, 'learning_rate': 0.000532, 'epoch': 0.15} + 15%|███████████▊ | 269/1784 [13:40<1:25:39, 3.39s/it]g-point operations will not be computed-03 00:58:50,348 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▊ | 269/1784 [13:40<1:25:39, 3.39s/it]g-point operations will not be computed-03 00:58:50,348 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.2645, 'learning_rate': 0.0005340000000000001, 'epoch': 0.15} + 15%|███████████▊ | 270/1784 [13:43<1:25:41, 3.40s/it]g-point operations will not be computed-03 00:58:50,348 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▊ | 270/1784 [13:43<1:25:41, 3.40s/it]g-point operations will not be computed-03 00:58:50,348 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:59:08,861 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:58:50,348 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:59:08,861 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:58:50,348 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.3492, 'learning_rate': 0.0005380000000000001, 'epoch': 0.15} + 15%|███████████▉ | 272/1784 [13:50<1:24:34, 3.36s/it]g-point operations will not be computed-03 00:58:50,348 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▉ | 272/1784 [13:50<1:24:34, 3.36s/it]g-point operations will not be computed-03 00:58:50,348 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.3108, 'learning_rate': 0.00054, 'epoch': 0.15} + 15%|███████████▉ | 272/1784 [13:50<1:24:34, 3.36s/it]g-point operations will not be computed-03 00:58:50,348 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▉ | 273/1784 [13:53<1:24:05, 3.34s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:59:17,110 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▉ | 274/1784 [13:57<1:23:22, 3.31s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:59:17,110 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▉ | 274/1784 [13:57<1:23:22, 3.31s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:59:17,110 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.3335, 'learning_rate': 0.0005440000000000001, 'epoch': 0.15} + 15%|████████████ | 275/1784 [14:00<1:22:27, 3.28s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:59:17,110 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|████████████ | 275/1784 [14:00<1:22:27, 3.28s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:59:17,110 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:59:25,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:59:17,110 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:59:25,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:59:17,110 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.1695, 'learning_rate': 0.0005480000000000001, 'epoch': 0.15} + 16%|████████████ | 277/1784 [14:06<1:21:56, 3.26s/it]g-point operations will not be computed-03 00:59:17,110 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████ | 277/1784 [14:06<1:21:56, 3.26s/it]g-point operations will not be computed-03 00:59:17,110 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.2355, 'learning_rate': 0.00055, 'epoch': 0.16} +[WARNING|modeling_utils.py:388] 2022-03-03 00:59:31,561 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:59:17,110 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:59:31,561 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:59:17,110 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:59:31,561 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:59:17,110 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▏ | 279/1784 [14:13<1:20:28, 3.21s/it]g-point operations will not be computed-03 00:59:17,110 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:59:37,835 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:59:17,110 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:59:37,835 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:59:17,110 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:59:37,835 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:59:17,110 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▎ | 281/1784 [14:19<1:19:29, 3.17s/it]g-point operations will not be computed-03 00:59:17,110 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▎ | 281/1784 [14:19<1:19:29, 3.17s/it]g-point operations will not be computed-03 00:59:17,110 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.8305, 'learning_rate': 0.000558, 'epoch': 0.16} + 16%|████████████▎ | 282/1784 [14:22<1:19:02, 3.16s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:59:45,676 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▎ | 282/1784 [14:22<1:19:02, 3.16s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:59:45,676 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▎ | 283/1784 [14:25<1:18:10, 3.12s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:59:45,676 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▎ | 283/1784 [14:25<1:18:10, 3.12s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:59:45,676 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.5372, 'learning_rate': 0.0005620000000000001, 'epoch': 0.16} + 16%|████████████▍ | 284/1784 [14:28<1:17:08, 3.09s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:59:51,651 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▍ | 284/1784 [14:28<1:17:08, 3.09s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:59:51,651 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▍ | 285/1784 [14:31<1:15:56, 3.04s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:59:51,651 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▍ | 285/1784 [14:31<1:15:56, 3.04s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:59:51,651 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:59:55,967 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:59:51,651 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:59:55,967 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:59:51,651 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.7623, 'learning_rate': 0.0005679999999999999, 'epoch': 0.16} + 16%|████████████▌ | 287/1784 [14:37<1:13:43, 2.95s/it]g-point operations will not be computed-03 00:59:51,651 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▌ | 287/1784 [14:37<1:13:43, 2.95s/it]g-point operations will not be computed-03 00:59:51,651 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 01:00:01,523 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:59:51,651 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 01:00:01,523 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:59:51,651 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 01:00:04,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:59:51,651 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 01:00:04,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:59:51,651 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.0644, 'learning_rate': 0.000574, 'epoch': 0.16} + 16%|████████████▋ | 290/1784 [14:45<1:07:51, 2.73s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:00:08,005 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▋ | 290/1784 [14:45<1:07:51, 2.73s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:00:08,005 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▋ | 291/1784 [14:47<1:05:28, 2.63s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:00:10,367 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▋ | 291/1784 [14:47<1:05:28, 2.63s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:00:10,367 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▊ | 292/1784 [14:49<1:03:20, 2.55s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:00:10,367 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▊ | 292/1784 [14:49<1:03:20, 2.55s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:00:10,367 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.9815, 'learning_rate': 0.00058, 'epoch': 0.16} +[WARNING|modeling_utils.py:388] 2022-03-03 01:00:13,600 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:00:10,367 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 01:00:15,616 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:00:10,367 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 01:00:15,616 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:00:10,367 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 01:00:17,566 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:00:10,367 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 01:00:17,566 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:00:10,367 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.4079, 'learning_rate': 0.0005859999999999999, 'epoch': 0.17} +[WARNING|modeling_utils.py:388] 2022-03-03 01:00:19,312 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:00:10,367 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 01:00:19,312 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:00:10,367 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.8535, 'learning_rate': 0.00059, 'epoch': 0.17} +[WARNING|modeling_utils.py:388] 2022-03-03 01:00:22,365 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:00:10,367 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 01:00:22,365 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:00:10,367 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 01:00:23,665 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:00:10,367 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 01:00:23,665 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:00:10,367 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 01:00:25,348 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:00:10,367 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 01:00:25,348 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:00:10,367 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 01:00:25,348 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:00:10,367 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 01:00:29,216 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:00:10,367 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 01:00:32,971 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:00:10,367 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 01:00:32,971 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:00:10,367 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.825, 'learning_rate': 0.0006, 'epoch': 0.17} +[WARNING|modeling_utils.py:388] 2022-03-03 01:00:32,971 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:00:10,367 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▏ | 303/1784 [15:15<1:14:55, 3.04s/it]g-point operations will not be computed-03 01:00:10,367 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▏ | 303/1784 [15:15<1:14:55, 3.04s/it]g-point operations will not be computed-03 01:00:10,367 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▏ | 303/1784 [15:15<1:14:55, 3.04s/it]g-point operations will not be computed-03 01:00:10,367 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▎ | 304/1784 [15:18<1:20:04, 3.25s/it]g-point operations will not be computed-03 01:00:10,367 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▎ | 304/1784 [15:18<1:20:04, 3.25s/it]g-point operations will not be computed-03 01:00:10,367 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▎ | 304/1784 [15:18<1:20:04, 3.25s/it]g-point operations will not be computed-03 01:00:10,367 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▎ | 305/1784 [15:22<1:23:41, 3.40s/it]g-point operations will not be computed-03 01:00:10,367 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▎ | 305/1784 [15:22<1:23:41, 3.40s/it]g-point operations will not be computed-03 01:00:10,367 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▎ | 305/1784 [15:22<1:23:41, 3.40s/it]g-point operations will not be computed-03 01:00:10,367 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▍ | 306/1784 [15:26<1:25:40, 3.48s/it]g-point operations will not be computed-03 01:00:10,367 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 01:00:51,454 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:00:10,367 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 01:00:51,454 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:00:10,367 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.5677, 'learning_rate': 0.00061, 'epoch': 0.17} +[WARNING|modeling_utils.py:388] 2022-03-03 01:00:51,454 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:00:10,367 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▍ | 308/1784 [15:33<1:27:19, 3.55s/it]g-point operations will not be computed-03 01:00:10,367 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▍ | 308/1784 [15:33<1:27:19, 3.55s/it]g-point operations will not be computed-03 01:00:10,367 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▍ | 308/1784 [15:33<1:27:19, 3.55s/it]g-point operations will not be computed-03 01:00:10,367 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▌ | 309/1784 [15:37<1:27:37, 3.56s/it]g-point operations will not be computed-03 01:00:10,367 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▌ | 309/1784 [15:37<1:27:37, 3.56s/it]g-point operations will not be computed-03 01:00:10,367 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▌ | 309/1784 [15:37<1:27:37, 3.56s/it]g-point operations will not be computed-03 01:00:10,367 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▌ | 310/1784 [15:40<1:27:18, 3.55s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:01:03,999 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▌ | 311/1784 [15:44<1:26:49, 3.54s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:01:03,999 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▌ | 311/1784 [15:44<1:26:49, 3.54s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:01:03,999 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.3806, 'learning_rate': 0.0006180000000000001, 'epoch': 0.17} + 17%|█████████████▌ | 311/1784 [15:44<1:26:49, 3.54s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:01:03,999 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▋ | 312/1784 [15:47<1:26:41, 3.53s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:01:03,999 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▋ | 312/1784 [15:47<1:26:41, 3.53s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:01:03,999 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▋ | 312/1784 [15:47<1:26:41, 3.53s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:01:03,999 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|█████████████▋ | 313/1784 [15:51<1:26:07, 3.51s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:01:03,999 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|█████████████▋ | 313/1784 [15:51<1:26:07, 3.51s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:01:03,999 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|█████████████▋ | 313/1784 [15:51<1:26:07, 3.51s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:01:03,999 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|█████████████▋ | 314/1784 [15:54<1:25:38, 3.50s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:01:17,975 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|█████████████▋ | 314/1784 [15:54<1:25:38, 3.50s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:01:17,975 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|█████████████▊ | 315/1784 [15:58<1:25:45, 3.50s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:01:17,975 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|█████████████▊ | 315/1784 [15:58<1:25:45, 3.50s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:01:17,975 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|█████████████▊ | 315/1784 [15:58<1:25:45, 3.50s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:01:17,975 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|█████████████▊ | 316/1784 [16:01<1:25:02, 3.48s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:01:17,975 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|█████████████▊ | 316/1784 [16:01<1:25:02, 3.48s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:01:17,975 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|█████████████▊ | 316/1784 [16:01<1:25:02, 3.48s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:01:17,975 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|█████████████▊ | 317/1784 [16:04<1:25:03, 3.48s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:01:28,340 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|█████████████▊ | 317/1784 [16:04<1:25:03, 3.48s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:01:28,340 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|█████████████▉ | 318/1784 [16:08<1:24:47, 3.47s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:01:28,340 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|█████████████▉ | 318/1784 [16:08<1:24:47, 3.47s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:01:28,340 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|█████████████▉ | 318/1784 [16:08<1:24:47, 3.47s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:01:28,340 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|█████████████▉ | 319/1784 [16:11<1:24:05, 3.44s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:01:28,340 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|█████████████▉ | 319/1784 [16:11<1:24:05, 3.44s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:01:28,340 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|█████████████▉ | 319/1784 [16:11<1:24:05, 3.44s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:01:28,340 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|█████████████▉ | 320/1784 [16:15<1:23:51, 3.44s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:01:28,340 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|█████████████▉ | 320/1784 [16:15<1:23:51, 3.44s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:01:28,340 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 01:01:40,140 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:01:28,340 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 01:01:40,140 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:01:28,340 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 01:01:40,140 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:01:28,340 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████ | 322/1784 [16:21<1:22:46, 3.40s/it]g-point operations will not be computed-03 01:01:28,340 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████ | 322/1784 [16:21<1:22:46, 3.40s/it]g-point operations will not be computed-03 01:01:28,340 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████ | 322/1784 [16:21<1:22:46, 3.40s/it]g-point operations will not be computed-03 01:01:28,340 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████ | 323/1784 [16:25<1:22:06, 3.37s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:01:48,486 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████ | 323/1784 [16:25<1:22:06, 3.37s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:01:48,486 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▏ | 324/1784 [16:28<1:20:57, 3.33s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:01:48,486 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▏ | 324/1784 [16:28<1:20:57, 3.33s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:01:48,486 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▏ | 325/1784 [16:31<1:20:06, 3.29s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:01:48,486 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▏ | 325/1784 [16:31<1:20:06, 3.29s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:01:48,486 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 01:01:56,531 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:01:48,486 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 01:01:56,531 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:01:48,486 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.7701, 'learning_rate': 0.000648, 'epoch': 0.18} + 18%|██████████████▎ | 327/1784 [16:38<1:19:36, 3.28s/it]g-point operations will not be computed-03 01:01:48,486 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▎ | 327/1784 [16:38<1:19:36, 3.28s/it]g-point operations will not be computed-03 01:01:48,486 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 01:02:02,998 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:01:48,486 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 01:02:02,998 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:01:48,486 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.1997, 'learning_rate': 0.000652, 'epoch': 0.18} + 18%|██████████████▍ | 329/1784 [16:44<1:18:29, 3.24s/it]g-point operations will not be computed-03 01:01:48,486 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▍ | 329/1784 [16:44<1:18:29, 3.24s/it]g-point operations will not be computed-03 01:01:48,486 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.2808, 'learning_rate': 0.0006540000000000001, 'epoch': 0.18} + 18%|██████████████▍ | 329/1784 [16:44<1:18:29, 3.24s/it]g-point operations will not be computed-03 01:01:48,486 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▍ | 330/1784 [16:47<1:18:20, 3.23s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:02:11,017 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|██████████████▍ | 331/1784 [16:50<1:17:12, 3.19s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:02:11,017 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|██████████████▍ | 331/1784 [16:50<1:17:12, 3.19s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:02:11,017 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 01:02:15,574 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:02:11,017 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 01:02:15,574 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:02:11,017 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 01:02:15,574 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:02:11,017 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|██████████████▌ | 333/1784 [16:57<1:15:35, 3.13s/it]g-point operations will not be computed-03 01:02:11,017 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|██████████████▌ | 333/1784 [16:57<1:15:35, 3.13s/it]g-point operations will not be computed-03 01:02:11,017 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.6766, 'learning_rate': 0.000662, 'epoch': 0.19} + 19%|██████████████▌ | 333/1784 [16:57<1:15:35, 3.13s/it]g-point operations will not be computed-03 01:02:11,017 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|██████████████▌ | 334/1784 [17:00<1:14:37, 3.09s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:02:23,170 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|██████████████▋ | 335/1784 [17:02<1:13:21, 3.04s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:02:23,170 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|██████████████▋ | 335/1784 [17:02<1:13:21, 3.04s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:02:23,170 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 01:02:27,458 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:02:23,170 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 01:02:27,458 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:02:23,170 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.0338, 'learning_rate': 0.0006680000000000001, 'epoch': 0.19} + 19%|██████████████▋ | 337/1784 [17:08<1:11:11, 2.95s/it]g-point operations will not be computed-03 01:02:23,170 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|██████████████▋ | 337/1784 [17:08<1:11:11, 2.95s/it]g-point operations will not be computed-03 01:02:23,170 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 01:02:33,116 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:02:23,170 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 01:02:33,116 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:02:23,170 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.4539, 'learning_rate': 0.0006720000000000001, 'epoch': 0.19} +[WARNING|modeling_utils.py:388] 2022-03-03 01:02:33,116 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:02:23,170 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|██████████████▊ | 339/1784 [17:14<1:08:56, 2.86s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:02:37,294 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|██████████████▊ | 340/1784 [17:16<1:07:38, 2.81s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:02:39,909 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|██████████████▊ | 340/1784 [17:16<1:07:38, 2.81s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:02:39,909 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|██████████████▉ | 341/1784 [17:19<1:06:07, 2.75s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:02:39,909 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|██████████████▉ | 341/1784 [17:19<1:06:07, 2.75s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:02:39,909 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 01:02:43,607 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:02:39,909 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 01:02:43,607 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:02:39,909 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 01:02:45,894 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:02:39,909 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 01:02:45,894 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:02:39,909 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 01:02:48,072 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:02:39,909 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 01:02:48,072 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:02:39,909 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 01:02:50,055 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:02:39,909 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 01:02:50,055 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:02:39,909 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 01:02:51,903 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:02:39,909 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 01:02:51,903 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:02:39,909 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.198, 'learning_rate': 0.0006879999999999999, 'epoch': 0.19} +[WARNING|modeling_utils.py:388] 2022-03-03 01:02:53,571 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:02:39,909 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 01:02:56,341 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:02:39,909 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 01:02:56,341 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:02:39,909 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.5543, 'learning_rate': 0.000692, 'epoch': 0.2} +{'loss': 5.9238, 'learning_rate': 0.000694, 'epoch': 0.2} +[WARNING|modeling_utils.py:388] 2022-03-03 01:02:58,112 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:02:39,909 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 01:02:58,112 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:02:39,909 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 01:02:58,112 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:02:39,909 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 01:03:02,110 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:02:39,909 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 01:03:02,110 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:02:39,909 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 01:03:02,110 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:02:39,909 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▍ | 352/1784 [17:44<1:06:50, 2.80s/it]g-point operations will not be computed-03 01:02:39,909 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▍ | 352/1784 [17:44<1:06:50, 2.80s/it]g-point operations will not be computed-03 01:02:39,909 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▍ | 352/1784 [17:44<1:06:50, 2.80s/it]g-point operations will not be computed-03 01:02:39,909 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▍ | 353/1784 [17:48<1:13:24, 3.08s/it]g-point operations will not be computed-03 01:02:39,909 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▍ | 353/1784 [17:48<1:13:24, 3.08s/it]g-point operations will not be computed-03 01:02:39,909 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▍ | 353/1784 [17:48<1:13:24, 3.08s/it]g-point operations will not be computed-03 01:02:39,909 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▍ | 354/1784 [17:51<1:17:55, 3.27s/it]g-point operations will not be computed-03 01:02:39,909 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▍ | 354/1784 [17:51<1:17:55, 3.27s/it]g-point operations will not be computed-03 01:02:39,909 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▍ | 354/1784 [17:51<1:17:55, 3.27s/it]g-point operations will not be computed-03 01:02:39,909 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▌ | 355/1784 [17:55<1:21:01, 3.40s/it]g-point operations will not be computed-03 01:02:39,909 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▌ | 355/1784 [17:55<1:21:01, 3.40s/it]g-point operations will not be computed-03 01:02:39,909 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 01:03:20,667 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:02:39,909 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 01:03:20,667 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:02:39,909 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 01:03:20,667 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:02:39,909 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▌ | 357/1784 [18:02<1:23:36, 3.52s/it]g-point operations will not be computed-03 01:02:39,909 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▌ | 357/1784 [18:02<1:23:36, 3.52s/it]g-point operations will not be computed-03 01:02:39,909 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▌ | 357/1784 [18:02<1:23:36, 3.52s/it]g-point operations will not be computed-03 01:02:39,909 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▋ | 358/1784 [18:06<1:24:16, 3.55s/it]g-point operations will not be computed-03 01:02:39,909 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▋ | 358/1784 [18:06<1:24:16, 3.55s/it]g-point operations will not be computed-03 01:02:39,909 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▋ | 358/1784 [18:06<1:24:16, 3.55s/it]g-point operations will not be computed-03 01:02:39,909 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▋ | 359/1784 [18:09<1:24:43, 3.57s/it]g-point operations will not be computed-03 01:02:39,909 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▋ | 359/1784 [18:09<1:24:43, 3.57s/it]g-point operations will not be computed-03 01:02:39,909 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 01:03:35,101 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:02:39,909 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 01:03:35,101 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:02:39,909 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 01:03:35,101 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:02:39,909 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▊ | 361/1784 [18:17<1:25:03, 3.59s/it]g-point operations will not be computed-03 01:02:39,909 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▊ | 361/1784 [18:17<1:25:03, 3.59s/it]g-point operations will not be computed-03 01:02:39,909 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▊ | 361/1784 [18:17<1:25:03, 3.59s/it]g-point operations will not be computed-03 01:02:39,909 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▊ | 362/1784 [18:20<1:24:29, 3.56s/it]g-point operations will not be computed-03 01:02:39,909 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▊ | 362/1784 [18:20<1:24:29, 3.56s/it]g-point operations will not be computed-03 01:02:39,909 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▊ | 362/1784 [18:20<1:24:29, 3.56s/it]g-point operations will not be computed-03 01:02:39,909 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▊ | 363/1784 [18:24<1:23:47, 3.54s/it]g-point operations will not be computed-03 01:02:39,909 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 01:03:49,180 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:02:39,909 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 01:03:49,180 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:02:39,909 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.9611, 'learning_rate': 0.000724, 'epoch': 0.2} +[WARNING|modeling_utils.py:388] 2022-03-03 01:03:49,180 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:02:39,909 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▉ | 365/1784 [18:31<1:23:02, 3.51s/it]g-point operations will not be computed-03 01:02:39,909 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▉ | 365/1784 [18:31<1:23:02, 3.51s/it]g-point operations will not be computed-03 01:02:39,909 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▉ | 365/1784 [18:31<1:23:02, 3.51s/it]g-point operations will not be computed-03 01:02:39,909 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████ | 366/1784 [18:34<1:22:24, 3.49s/it]g-point operations will not be computed-03 01:02:39,909 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████ | 366/1784 [18:34<1:22:24, 3.49s/it]g-point operations will not be computed-03 01:02:39,909 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 01:03:59,521 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:02:39,909 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 01:03:59,521 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:02:39,909 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 01:03:59,521 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:02:39,909 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████ | 368/1784 [18:41<1:21:27, 3.45s/it]g-point operations will not be computed-03 01:02:39,909 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████ | 368/1784 [18:41<1:21:27, 3.45s/it]g-point operations will not be computed-03 01:02:39,909 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████ | 368/1784 [18:41<1:21:27, 3.45s/it]g-point operations will not be computed-03 01:02:39,909 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▏ | 369/1784 [18:44<1:21:14, 3.44s/it]g-point operations will not be computed-03 01:02:39,909 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 01:04:09,781 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:02:39,909 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 01:04:09,781 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:02:39,909 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.649, 'learning_rate': 0.000736, 'epoch': 0.21} +[WARNING|modeling_utils.py:388] 2022-03-03 01:04:09,781 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:02:39,909 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▏ | 371/1784 [18:51<1:20:10, 3.40s/it]g-point operations will not be computed-03 01:02:39,909 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 01:04:16,448 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:02:39,909 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 01:04:16,448 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:02:39,909 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.9999, 'learning_rate': 0.00074, 'epoch': 0.21} +[WARNING|modeling_utils.py:388] 2022-03-03 01:04:16,448 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:02:39,909 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▎ | 373/1784 [18:58<1:18:52, 3.35s/it]g-point operations will not be computed-03 01:02:39,909 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▎ | 373/1784 [18:58<1:18:52, 3.35s/it]g-point operations will not be computed-03 01:02:39,909 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▎ | 373/1784 [18:58<1:18:52, 3.35s/it]g-point operations will not be computed-03 01:02:39,909 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▎ | 374/1784 [19:01<1:17:59, 3.32s/it]g-point operations will not be computed-03 01:02:39,909 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 01:04:26,217 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:02:39,909 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 01:04:26,217 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:02:39,909 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.1289, 'learning_rate': 0.000746, 'epoch': 0.21} +[WARNING|modeling_utils.py:388] 2022-03-03 01:04:26,217 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:02:39,909 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▍ | 376/1784 [19:07<1:16:51, 3.28s/it]g-point operations will not be computed-03 01:02:39,909 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 01:04:32,643 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:02:39,909 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 01:04:32,643 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:02:39,909 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.5028, 'learning_rate': 0.00075, 'epoch': 0.21} +[WARNING|modeling_utils.py:388] 2022-03-03 01:04:32,643 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:02:39,909 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▌ | 378/1784 [19:14<1:15:33, 3.22s/it]g-point operations will not be computed-03 01:02:39,909 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▌ | 378/1784 [19:14<1:15:33, 3.22s/it]g-point operations will not be computed-03 01:02:39,909 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▌ | 378/1784 [19:14<1:15:33, 3.22s/it]g-point operations will not be computed-03 01:02:39,909 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▌ | 379/1784 [19:17<1:15:03, 3.21s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:04:40,597 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▌ | 380/1784 [19:20<1:14:26, 3.18s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:04:40,597 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▌ | 380/1784 [19:20<1:14:26, 3.18s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:04:40,597 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.76, 'learning_rate': 0.000756, 'epoch': 0.21} + 21%|████████████████▌ | 380/1784 [19:20<1:14:26, 3.18s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:04:40,597 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▋ | 381/1784 [19:23<1:14:08, 3.17s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:04:46,838 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▋ | 382/1784 [19:26<1:13:39, 3.15s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:04:46,838 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▋ | 382/1784 [19:26<1:13:39, 3.15s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:04:46,838 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.3031, 'learning_rate': 0.00076, 'epoch': 0.21} + 21%|████████████████▋ | 382/1784 [19:26<1:13:39, 3.15s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:04:46,838 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▋ | 383/1784 [19:29<1:12:37, 3.11s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:04:52,853 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▋ | 383/1784 [19:29<1:12:37, 3.11s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:04:52,853 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|████████████████▊ | 384/1784 [19:32<1:11:13, 3.05s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:04:52,853 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 01:04:57,183 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:04:52,853 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 01:04:57,183 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:04:52,853 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.8042, 'learning_rate': 0.0007660000000000001, 'epoch': 0.22} +[WARNING|modeling_utils.py:388] 2022-03-03 01:04:57,183 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:04:52,853 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|████████████████▉ | 386/1784 [19:38<1:09:43, 2.99s/it]g-point operations will not be computed-03 01:04:52,853 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 01:05:02,915 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:04:52,853 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 01:05:02,915 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:04:52,853 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 01:05:05,619 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:04:52,853 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 01:05:05,619 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:04:52,853 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.0925, 'learning_rate': 0.000772, 'epoch': 0.22} +[WARNING|modeling_utils.py:388] 2022-03-03 01:05:05,619 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:04:52,853 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████ | 389/1784 [19:46<1:05:08, 2.80s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:05:09,627 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████ | 389/1784 [19:46<1:05:08, 2.80s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:05:09,627 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████ | 390/1784 [19:49<1:03:30, 2.73s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:05:09,627 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 01:05:13,398 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:05:09,627 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 01:05:13,398 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:05:09,627 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 01:05:15,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:05:09,627 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 01:05:15,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:05:09,627 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 01:05:17,865 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:05:09,627 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 01:05:17,865 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:05:09,627 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 01:05:19,856 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:05:09,627 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 01:05:19,856 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:05:09,627 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 01:05:21,665 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:05:09,627 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 01:05:21,665 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:05:09,627 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 01:05:23,311 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:05:09,627 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 01:05:23,311 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:05:09,627 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.1241, 'learning_rate': 0.0007880000000000001, 'epoch': 0.22} +[WARNING|modeling_utils.py:388] 2022-03-03 01:05:26,030 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:05:09,627 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 01:05:26,030 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:05:09,627 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 01:05:27,249 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:05:09,627 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 01:05:27,249 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:05:09,627 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 10.8678, 'learning_rate': 0.0007940000000000001, 'epoch': 0.22} +[WARNING|modeling_utils.py:388] 2022-03-03 01:05:28,930 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:05:09,627 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 01:05:28,930 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:05:09,627 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 01:05:28,930 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:05:09,627 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 01:05:32,830 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:05:09,627 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 01:05:32,830 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:05:09,627 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 01:05:32,830 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:05:09,627 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|█████████████████▌ | 402/1784 [20:15<1:02:49, 2.73s/it]g-point operations will not be computed-03 01:05:09,627 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|█████████████████▌ | 402/1784 [20:15<1:02:49, 2.73s/it]g-point operations will not be computed-03 01:05:09,627 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|█████████████████▌ | 402/1784 [20:15<1:02:49, 2.73s/it]g-point operations will not be computed-03 01:05:09,627 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|█████████████████▌ | 403/1784 [20:18<1:09:56, 3.04s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:05:42,252 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|█████████████████▋ | 404/1784 [20:22<1:14:11, 3.23s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:05:42,252 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|█████████████████▋ | 404/1784 [20:22<1:14:11, 3.23s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:05:42,252 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.8218, 'learning_rate': 0.000804, 'epoch': 0.23} + 23%|█████████████████▋ | 405/1784 [20:26<1:17:35, 3.38s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:05:42,252 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|█████████████████▋ | 405/1784 [20:26<1:17:35, 3.38s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:05:42,252 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.3772, 'learning_rate': 0.0008060000000000001, 'epoch': 0.23} + 23%|█████████████████▋ | 405/1784 [20:26<1:17:35, 3.38s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:05:42,252 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|█████████████████▊ | 406/1784 [20:29<1:19:35, 3.47s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:05:42,252 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|█████████████████▊ | 406/1784 [20:29<1:19:35, 3.47s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:05:42,252 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|█████████████████▊ | 406/1784 [20:29<1:19:35, 3.47s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:05:42,252 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|█████████████████▊ | 407/1784 [20:33<1:20:36, 3.51s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:05:42,252 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|█████████████████▊ | 407/1784 [20:33<1:20:36, 3.51s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:05:42,252 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|█████████████████▊ | 407/1784 [20:33<1:20:36, 3.51s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:05:42,252 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|█████████████████▊ | 408/1784 [20:37<1:20:50, 3.52s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:06:00,430 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|█████████████████▉ | 409/1784 [20:40<1:20:55, 3.53s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:06:00,430 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|█████████████████▉ | 409/1784 [20:40<1:20:55, 3.53s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:06:00,430 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.9483, 'learning_rate': 0.0008139999999999999, 'epoch': 0.23} + 23%|█████████████████▉ | 409/1784 [20:40<1:20:55, 3.53s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:06:00,430 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|█████████████████▉ | 410/1784 [20:44<1:21:04, 3.54s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:06:00,430 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|█████████████████▉ | 410/1784 [20:44<1:21:04, 3.54s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:06:00,430 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|█████████████████▉ | 410/1784 [20:44<1:21:04, 3.54s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:06:00,430 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|█████████████████▉ | 411/1784 [20:47<1:20:44, 3.53s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:06:00,430 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 01:06:12,758 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:06:00,430 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 01:06:12,758 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:06:00,430 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.5031, 'learning_rate': 0.00082, 'epoch': 0.23} + 23%|██████████████████ | 413/1784 [20:54<1:20:44, 3.53s/it]g-point operations will not be computed-03 01:06:00,430 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████ | 413/1784 [20:54<1:20:44, 3.53s/it]g-point operations will not be computed-03 01:06:00,430 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.3287, 'learning_rate': 0.0008219999999999999, 'epoch': 0.23} + 23%|██████████████████ | 413/1784 [20:54<1:20:44, 3.53s/it]g-point operations will not be computed-03 01:06:00,430 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████ | 414/1784 [20:58<1:20:25, 3.52s/it]g-point operations will not be computed-03 01:06:00,430 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████ | 414/1784 [20:58<1:20:25, 3.52s/it]g-point operations will not be computed-03 01:06:00,430 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████ | 414/1784 [20:58<1:20:25, 3.52s/it]g-point operations will not be computed-03 01:06:00,430 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▏ | 415/1784 [21:01<1:20:23, 3.52s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:06:25,097 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▏ | 416/1784 [21:05<1:19:49, 3.50s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:06:25,097 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▏ | 416/1784 [21:05<1:19:49, 3.50s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:06:25,097 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.4594, 'learning_rate': 0.000828, 'epoch': 0.23} + 23%|██████████████████▏ | 416/1784 [21:05<1:19:49, 3.50s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:06:25,097 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▏ | 417/1784 [21:08<1:19:10, 3.47s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:06:25,097 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▏ | 417/1784 [21:08<1:19:10, 3.47s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:06:25,097 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▏ | 417/1784 [21:08<1:19:10, 3.47s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:06:25,097 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▎ | 418/1784 [21:12<1:19:07, 3.48s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:06:35,423 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▎ | 419/1784 [21:15<1:18:29, 3.45s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:06:35,423 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▎ | 419/1784 [21:15<1:18:29, 3.45s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:06:35,423 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.57, 'learning_rate': 0.000834, 'epoch': 0.23} + 23%|██████████████████▎ | 419/1784 [21:15<1:18:29, 3.45s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:06:35,423 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|██████████████████▎ | 420/1784 [21:18<1:18:54, 3.47s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:06:35,423 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|██████████████████▎ | 420/1784 [21:18<1:18:54, 3.47s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:06:35,423 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|██████████████████▎ | 420/1784 [21:18<1:18:54, 3.47s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:06:35,423 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|██████████████████▍ | 421/1784 [21:22<1:18:23, 3.45s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:06:35,423 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 01:06:47,302 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:06:35,423 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 01:06:47,302 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:06:35,423 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.8436, 'learning_rate': 0.00084, 'epoch': 0.24} +[WARNING|modeling_utils.py:388] 2022-03-03 01:06:47,302 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:06:35,423 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|██████████████████▍ | 423/1784 [21:29<1:16:58, 3.39s/it]g-point operations will not be computed-03 01:06:35,423 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|██████████████████▍ | 423/1784 [21:29<1:16:58, 3.39s/it]g-point operations will not be computed-03 01:06:35,423 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|██████████████████▍ | 423/1784 [21:29<1:16:58, 3.39s/it]g-point operations will not be computed-03 01:06:35,423 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|██████████████████▌ | 424/1784 [21:32<1:16:29, 3.37s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:06:55,685 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|██████████████████▌ | 425/1784 [21:35<1:16:01, 3.36s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:06:55,685 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|██████████████████▌ | 425/1784 [21:35<1:16:01, 3.36s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:06:55,685 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.5169, 'learning_rate': 0.000846, 'epoch': 0.24} + 24%|██████████████████▌ | 425/1784 [21:35<1:16:01, 3.36s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:06:55,685 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|██████████████████▋ | 426/1784 [21:38<1:14:58, 3.31s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:06:55,685 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 01:07:03,699 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:06:55,685 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 01:07:03,699 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:06:55,685 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.98, 'learning_rate': 0.00085, 'epoch': 0.24} +[WARNING|modeling_utils.py:388] 2022-03-03 01:07:03,699 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:06:55,685 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|██████████████████▋ | 428/1784 [21:45<1:13:19, 3.24s/it]g-point operations will not be computed-03 01:06:55,685 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 01:07:10,019 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:06:55,685 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 01:07:10,019 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:06:55,685 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.7028, 'learning_rate': 0.000854, 'epoch': 0.24} +[WARNING|modeling_utils.py:388] 2022-03-03 01:07:10,019 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:06:55,685 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|██████████████████▊ | 430/1784 [21:51<1:12:13, 3.20s/it]g-point operations will not be computed-03 01:06:55,685 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 01:07:16,310 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:06:55,685 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 01:07:16,310 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:06:55,685 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.0355, 'learning_rate': 0.000858, 'epoch': 0.24} +[WARNING|modeling_utils.py:388] 2022-03-03 01:07:16,310 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:06:55,685 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|██████████████████▉ | 432/1784 [21:57<1:11:09, 3.16s/it]g-point operations will not be computed-03 01:06:55,685 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 01:07:22,409 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:06:55,685 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 01:07:22,409 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:06:55,685 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.6191, 'learning_rate': 0.000862, 'epoch': 0.24} +[WARNING|modeling_utils.py:388] 2022-03-03 01:07:22,409 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:06:55,685 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|██████████████████▉ | 434/1784 [22:03<1:09:13, 3.08s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:07:26,943 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████ | 435/1784 [22:06<1:07:59, 3.02s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:07:26,943 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████ | 435/1784 [22:06<1:07:59, 3.02s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:07:26,943 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 01:07:31,140 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:07:26,943 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 01:07:31,140 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:07:26,943 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.7428, 'learning_rate': 0.0008680000000000001, 'epoch': 0.24} +[WARNING|modeling_utils.py:388] 2022-03-03 01:07:31,140 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:07:26,943 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████ | 437/1784 [22:12<1:05:15, 2.91s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:07:35,384 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▏ | 438/1784 [22:15<1:04:27, 2.87s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:07:35,384 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▏ | 438/1784 [22:15<1:04:27, 2.87s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:07:35,384 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 01:07:39,383 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:07:35,384 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 01:07:39,383 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:07:35,384 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.2282, 'learning_rate': 0.000874, 'epoch': 0.25} +[WARNING|modeling_utils.py:388] 2022-03-03 01:07:39,383 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:07:35,384 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▏ | 440/1784 [22:20<1:01:31, 2.75s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:07:43,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▊ | 441/1784 [22:22<59:35, 2.66s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:07:45,616 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▊ | 441/1784 [22:22<59:35, 2.66s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:07:45,616 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▊ | 442/1784 [22:25<56:43, 2.54s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:07:47,789 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▊ | 442/1784 [22:25<56:43, 2.54s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:07:47,789 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▊ | 443/1784 [22:27<53:22, 2.39s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:07:49,761 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▊ | 443/1784 [22:27<53:22, 2.39s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:07:49,761 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▉ | 444/1784 [22:29<50:16, 2.25s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:07:51,623 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▉ | 444/1784 [22:29<50:16, 2.25s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:07:51,623 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▉ | 445/1784 [22:30<47:23, 2.12s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:07:53,394 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▉ | 445/1784 [22:30<47:23, 2.12s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:07:53,394 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.5112, 'learning_rate': 0.0008860000000000001, 'epoch': 0.25} + 25%|████████████████████ | 447/1784 [22:34<41:04, 1.84s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:07:54,970 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|████████████████████ | 447/1784 [22:34<41:04, 1.84s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:07:54,970 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|████████████████████ | 448/1784 [22:35<37:41, 1.69s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:07:57,690 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|████████████████████ | 448/1784 [22:35<37:41, 1.69s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:07:57,690 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.0517, 'learning_rate': 0.000892, 'epoch': 0.25} + 25%|████████████████████▏ | 449/1784 [22:36<34:45, 1.56s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:07:58,919 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|████████████████████▏ | 449/1784 [22:36<34:45, 1.56s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:07:58,919 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|████████████████████▏ | 450/1784 [22:38<36:02, 1.62s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:07:58,919 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|████████████████████▏ | 451/1784 [22:42<51:28, 2.32s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:08:02,077 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|████████████████████▏ | 451/1784 [22:42<51:28, 2.32s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:08:02,077 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|████████████████████▏ | 451/1784 [22:42<51:28, 2.32s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:08:05,928 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▊ | 452/1784 [22:46<1:01:32, 2.77s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:08:05,928 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▊ | 452/1784 [22:46<1:01:32, 2.77s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:08:05,928 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.8886, 'learning_rate': 0.0009000000000000001, 'epoch': 0.25} + 25%|███████████████████▊ | 453/1784 [22:49<1:07:44, 3.05s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:08:05,928 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▊ | 453/1784 [22:49<1:07:44, 3.05s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:08:05,928 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.934, 'learning_rate': 0.000902, 'epoch': 0.25} + 25%|███████████████████▊ | 454/1784 [22:53<1:12:34, 3.27s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:08:05,928 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▊ | 454/1784 [22:53<1:12:34, 3.27s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:08:05,928 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.5421, 'learning_rate': 0.0009040000000000001, 'epoch': 0.25} + 26%|███████████████████▉ | 455/1784 [22:57<1:15:24, 3.40s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:08:05,928 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|███████████████████▉ | 455/1784 [22:57<1:15:24, 3.40s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:08:05,928 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.5831, 'learning_rate': 0.000906, 'epoch': 0.26} + 26%|███████████████████▉ | 455/1784 [22:57<1:15:24, 3.40s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:08:05,928 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|███████████████████▉ | 456/1784 [23:01<1:17:28, 3.50s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:08:05,928 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 01:08:26,393 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:08:05,928 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 01:08:26,393 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:08:05,928 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.0166, 'learning_rate': 0.00091, 'epoch': 0.26} + 26%|████████████████████ | 458/1784 [23:08<1:19:26, 3.59s/it]g-point operations will not be computed-03 01:08:05,928 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████ | 458/1784 [23:08<1:19:26, 3.59s/it]g-point operations will not be computed-03 01:08:05,928 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.5779, 'learning_rate': 0.000912, 'epoch': 0.26} + 26%|████████████████████ | 459/1784 [23:12<1:19:33, 3.60s/it]g-point operations will not be computed-03 01:08:05,928 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████ | 459/1784 [23:12<1:19:33, 3.60s/it]g-point operations will not be computed-03 01:08:05,928 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.4935, 'learning_rate': 0.0009140000000000001, 'epoch': 0.26} + 26%|████████████████████ | 460/1784 [23:15<1:19:27, 3.60s/it]g-point operations will not be computed-03 01:08:05,928 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████ | 460/1784 [23:15<1:19:27, 3.60s/it]g-point operations will not be computed-03 01:08:05,928 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 01:08:40,881 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:08:05,928 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 01:08:40,881 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:08:05,928 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.7115, 'learning_rate': 0.0009180000000000001, 'epoch': 0.26} + 26%|████████████████████▏ | 462/1784 [23:22<1:19:14, 3.60s/it]g-point operations will not be computed-03 01:08:05,928 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▏ | 462/1784 [23:22<1:19:14, 3.60s/it]g-point operations will not be computed-03 01:08:05,928 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.3225, 'learning_rate': 0.00092, 'epoch': 0.26} + 26%|████████████████████▏ | 463/1784 [23:26<1:18:35, 3.57s/it]g-point operations will not be computed-03 01:08:05,928 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▏ | 463/1784 [23:26<1:18:35, 3.57s/it]g-point operations will not be computed-03 01:08:05,928 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.9773, 'learning_rate': 0.0009220000000000001, 'epoch': 0.26} + 26%|████████████████████▎ | 464/1784 [23:29<1:17:58, 3.54s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:08:53,271 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▎ | 464/1784 [23:29<1:17:58, 3.54s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:08:53,271 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▎ | 465/1784 [23:33<1:17:34, 3.53s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:08:53,271 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▎ | 465/1784 [23:33<1:17:34, 3.53s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:08:53,271 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.67, 'learning_rate': 0.0009260000000000001, 'epoch': 0.26} + 26%|████████████████████▎ | 466/1784 [23:36<1:17:01, 3.51s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:08:53,271 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▎ | 466/1784 [23:36<1:17:01, 3.51s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:08:53,271 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.6142, 'learning_rate': 0.0009280000000000001, 'epoch': 0.26} + 26%|████████████████████▍ | 467/1784 [23:40<1:16:45, 3.50s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:08:53,271 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▍ | 467/1784 [23:40<1:16:45, 3.50s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:08:53,271 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 01:09:05,324 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:08:53,271 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 01:09:05,324 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:08:53,271 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.3521, 'learning_rate': 0.0009320000000000001, 'epoch': 0.26} + 26%|████████████████████▌ | 469/1784 [23:47<1:16:11, 3.48s/it]g-point operations will not be computed-03 01:08:53,271 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▌ | 469/1784 [23:47<1:16:11, 3.48s/it]g-point operations will not be computed-03 01:08:53,271 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.3542, 'learning_rate': 0.000934, 'epoch': 0.26} + 26%|████████████████████▌ | 470/1784 [23:50<1:15:33, 3.45s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:09:13,944 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▌ | 470/1784 [23:50<1:15:33, 3.45s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:09:13,944 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▌ | 471/1784 [23:54<1:15:13, 3.44s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:09:13,944 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▌ | 471/1784 [23:54<1:15:13, 3.44s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:09:13,944 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.0444, 'learning_rate': 0.0009379999999999999, 'epoch': 0.26} + 26%|████████████████████▋ | 472/1784 [23:57<1:14:44, 3.42s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:09:13,944 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 472/1784 [23:57<1:14:44, 3.42s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:09:13,944 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.4191, 'learning_rate': 0.00094, 'epoch': 0.26} + 27%|████████████████████▋ | 473/1784 [24:00<1:14:08, 3.39s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:09:24,024 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|████████████████████▋ | 473/1784 [24:00<1:14:08, 3.39s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:09:24,024 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|████████████████████▋ | 474/1784 [24:04<1:13:45, 3.38s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:09:24,024 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|████████████████████▋ | 474/1784 [24:04<1:13:45, 3.38s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:09:24,024 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.6918, 'learning_rate': 0.000944, 'epoch': 0.27} + 27%|████████████████████▊ | 475/1784 [24:07<1:13:29, 3.37s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:09:24,024 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|████████████████████▊ | 475/1784 [24:07<1:13:29, 3.37s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:09:24,024 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.6161, 'learning_rate': 0.000946, 'epoch': 0.27} + 27%|████████████████████▊ | 475/1784 [24:07<1:13:29, 3.37s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:09:24,024 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|████████████████████▊ | 476/1784 [24:10<1:12:40, 3.33s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:09:33,928 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|████████████████████▊ | 477/1784 [24:13<1:12:11, 3.31s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:09:33,928 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|████████████████████▊ | 477/1784 [24:13<1:12:11, 3.31s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:09:33,928 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.776, 'learning_rate': 0.00095, 'epoch': 0.27} + 27%|████████████████████▉ | 478/1784 [24:17<1:11:26, 3.28s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:09:40,374 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|████████████████████▉ | 478/1784 [24:17<1:11:26, 3.28s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:09:40,374 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|████████████████████▉ | 479/1784 [24:20<1:10:54, 3.26s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:09:40,374 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|████████████████████▉ | 479/1784 [24:20<1:10:54, 3.26s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:09:40,374 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.8037, 'learning_rate': 0.000954, 'epoch': 0.27} + 27%|████████████████████▉ | 480/1784 [24:23<1:10:45, 3.26s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:09:40,374 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|████████████████████▉ | 480/1784 [24:23<1:10:45, 3.26s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:09:40,374 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 01:09:48,332 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:09:40,374 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 01:09:48,332 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:09:40,374 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.6293, 'learning_rate': 0.000958, 'epoch': 0.27} + 27%|█████████████████████ | 482/1784 [24:29<1:09:42, 3.21s/it]g-point operations will not be computed-03 01:09:40,374 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████ | 482/1784 [24:29<1:09:42, 3.21s/it]g-point operations will not be computed-03 01:09:40,374 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 01:09:54,597 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:09:40,374 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 01:09:54,597 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:09:40,374 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.0316, 'learning_rate': 0.000962, 'epoch': 0.27} +[WARNING|modeling_utils.py:388] 2022-03-03 01:09:54,597 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:09:40,374 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▏ | 484/1784 [24:36<1:08:13, 3.15s/it]g-point operations will not be computed-03 01:09:40,374 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▏ | 484/1784 [24:36<1:08:13, 3.15s/it]g-point operations will not be computed-03 01:09:40,374 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 01:10:00,699 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:09:40,374 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 01:10:00,699 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:09:40,374 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 01:10:00,699 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:09:40,374 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▏ | 486/1784 [24:42<1:06:10, 3.06s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:10:05,144 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▎ | 487/1784 [24:44<1:04:58, 3.01s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:10:05,144 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▎ | 487/1784 [24:44<1:04:58, 3.01s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:10:05,144 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.478, 'learning_rate': 0.0009699999999999999, 'epoch': 0.27} + 27%|█████████████████████▎ | 487/1784 [24:44<1:04:58, 3.01s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:10:05,144 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▎ | 488/1784 [24:47<1:03:39, 2.95s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:10:10,755 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▎ | 488/1784 [24:47<1:03:39, 2.95s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:10:10,755 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▍ | 489/1784 [24:50<1:02:18, 2.89s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:10:10,755 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 01:10:14,734 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:10:10,755 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 01:10:14,734 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 01:10:10,755 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.5472, 'learning_rate': 0.000976, 'epoch': 0.27} +{'loss': 5.2288, 'learning_rate': 0.000978, 'epoch': 0.28} + 28%|██████████████████████ | 491/1784 [24:55<59:14, 2.75s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:10:18,606 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████ | 491/1784 [24:55<59:14, 2.75s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:10:18,606 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████ | 492/1784 [24:58<57:00, 2.65s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:10:20,966 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████ | 492/1784 [24:58<57:00, 2.65s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:10:20,966 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████ | 493/1784 [25:00<54:32, 2.53s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:10:23,137 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████ | 493/1784 [25:00<54:32, 2.53s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:10:23,137 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▏ | 494/1784 [25:02<51:28, 2.39s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:10:25,127 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▏ | 494/1784 [25:02<51:28, 2.39s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:10:25,127 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▏ | 495/1784 [25:04<48:37, 2.26s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:10:26,995 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▏ | 495/1784 [25:04<48:37, 2.26s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:10:26,995 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.5781, 'learning_rate': 0.000988, 'epoch': 0.28} + 28%|██████████████████████▎ | 497/1784 [25:07<42:10, 1.97s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:10:28,693 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▎ | 497/1784 [25:07<42:10, 1.97s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:10:28,693 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▎ | 498/1784 [25:09<38:42, 1.81s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:10:31,566 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▎ | 498/1784 [25:09<38:42, 1.81s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:10:31,566 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▍ | 499/1784 [25:10<35:21, 1.65s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:10:32,807 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▍ | 499/1784 [25:10<35:21, 1.65s/it][WARNING|modeling_utils.py:388] 2022-03-03 01:10:32,807 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-03 01:10:34,079 >> Num examples = 2642 | 500/1784 [25:12<35:59, 1.68s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. +[INFO|trainer.py:2366] 2022-03-03 01:10:34,079 >> Num examples = 2642 | 500/1784 [25:12<35:59, 1.68s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. +[INFO|trainer.py:2366] 2022-03-03 01:10:34,079 >> Num examples = 2642 | 500/1784 [25:12<35:59, 1.68s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. +[INFO|trainer.py:2366] 2022-03-03 01:10:34,079 >> Num examples = 2642 | 500/1784 [25:12<35:59, 1.68s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 1%|█ | 4/331 [00:06<10:17, 1.89s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 2%|█▎ | 5/331 [00:09<11:51, 2.18s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 2%|█▌ | 6/331 [00:12<12:52, 2.38s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 2%|█▊ | 7/331 [00:15<13:05, 2.42s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 2%|██ | 8/331 [00:17<13:30, 2.51s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 3%|██▎ | 9/331 [00:20<14:07, 2.63s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 3%|██▍ | 10/331 [00:23<15:01, 2.81s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 3%|██▋ | 11/331 [00:26<14:30, 2.72s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 4%|██▉ | 12/331 [00:29<14:23, 2.71s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 4%|███▏ | 13/331 [00:31<14:11, 2.68s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 4%|███▍ | 14/331 [00:34<13:58, 2.65s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 5%|███▋ | 15/331 [00:37<15:10, 2.88s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 5%|███▉ | 16/331 [00:41<16:03, 3.06s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 5%|████▏ | 17/331 [00:44<16:13, 3.10s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 5%|████▍ | 18/331 [00:46<14:47, 2.83s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 6%|████▋ | 19/331 [00:49<14:36, 2.81s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 6%|████▉ | 20/331 [00:51<13:36, 2.63s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 6%|█████▏ | 21/331 [00:54<14:07, 2.73s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 7%|█████▍ | 22/331 [00:57<15:14, 2.96s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 7%|█████▋ | 23/331 [01:01<16:45, 3.26s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 7%|█████▉ | 24/331 [01:05<17:42, 3.46s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 8%|██████▏ | 25/331 [01:08<17:03, 3.35s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 8%|██████▍ | 26/331 [01:11<15:49, 3.11s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 8%|██████▋ | 27/331 [01:14<15:52, 3.13s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 8%|██████▉ | 28/331 [01:17<15:20, 3.04s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 9%|███████▏ | 29/331 [01:20<14:57, 2.97s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 9%|███████▍ | 30/331 [01:22<14:22, 2.86s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 9%|███████▋ | 31/331 [01:25<13:48, 2.76s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 10%|███████▉ | 32/331 [01:28<13:34, 2.72s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 10%|████████▏ | 33/331 [01:30<13:37, 2.74s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 10%|████████▍ | 34/331 [01:33<13:33, 2.74s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 11%|████████▋ | 35/331 [01:36<13:43, 2.78s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 11%|████████▉ | 36/331 [01:39<14:18, 2.91s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 11%|█████████▏ | 37/331 [01:43<14:59, 3.06s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 11%|█████████▍ | 38/331 [01:46<15:14, 3.12s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 12%|█████████▋ | 39/331 [01:49<15:17, 3.14s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 12%|█████████▉ | 40/331 [01:51<13:55, 2.87s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 12%|██████████▏ | 41/331 [01:54<13:13, 2.74s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 13%|██████████▍ | 42/331 [01:57<14:11, 2.95s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 13%|██████████▋ | 43/331 [02:01<14:49, 3.09s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 13%|██████████▉ | 44/331 [02:04<15:18, 3.20s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 14%|███████████▏ | 45/331 [02:07<14:29, 3.04s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 14%|███████████▍ | 46/331 [02:09<13:27, 2.83s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 14%|███████████▋ | 47/331 [02:11<12:33, 2.65s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 15%|███████████▉ | 48/331 [02:14<12:57, 2.75s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 15%|████████████▏ | 49/331 [02:17<13:35, 2.89s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 15%|████████████▍ | 50/331 [02:20<13:25, 2.87s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 15%|████████████▋ | 51/331 [02:23<13:45, 2.95s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 16%|████████████▉ | 52/331 [02:26<13:07, 2.82s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 16%|█████████████▏ | 53/331 [02:29<13:02, 2.82s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 16%|█████████████▍ | 54/331 [02:31<12:28, 2.70s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 17%|█████████████▋ | 55/331 [02:35<13:36, 2.96s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 17%|█████████████▊ | 56/331 [02:38<13:24, 2.93s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 17%|██████████████ | 57/331 [02:40<12:59, 2.85s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 18%|██████████████▎ | 58/331 [02:43<13:31, 2.97s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 18%|██████████████▌ | 59/331 [02:46<12:46, 2.82s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 18%|██████████████▊ | 60/331 [02:49<12:23, 2.74s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 18%|███████████████ | 61/331 [02:52<12:51, 2.86s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 19%|███████████████▎ | 62/331 [02:54<12:42, 2.83s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 19%|███████████████▌ | 63/331 [02:58<13:50, 3.10s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 19%|███████████████▊ | 64/331 [03:01<13:17, 2.99s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 20%|████████████████ | 65/331 [03:04<13:06, 2.96s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 20%|████████████████▎ | 66/331 [03:08<14:16, 3.23s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 20%|████████████████▌ | 67/331 [03:11<14:55, 3.39s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 21%|████████████████▊ | 68/331 [03:15<15:07, 3.45s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 21%|█████████████████ | 69/331 [03:18<14:45, 3.38s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 21%|█████████████████▎ | 70/331 [03:21<14:23, 3.31s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 21%|█████████████████▌ | 71/331 [03:25<14:29, 3.34s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 22%|█████████████████▊ | 72/331 [03:28<14:23, 3.34s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 22%|██████████████████ | 73/331 [03:31<13:56, 3.24s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 22%|██████████████████▎ | 74/331 [03:34<13:35, 3.17s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 23%|██████████████████▌ | 75/331 [03:37<13:44, 3.22s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 23%|██████████████████▊ | 76/331 [03:40<13:02, 3.07s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 23%|███████████████████ | 77/331 [03:43<12:41, 3.00s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 24%|███████████████████▎ | 78/331 [03:46<12:05, 2.87s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 24%|███████████████████▌ | 79/331 [03:48<11:44, 2.79s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 24%|███████████████████▊ | 80/331 [03:51<11:33, 2.76s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 24%|████████████████████ | 81/331 [03:54<12:01, 2.89s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 25%|████████████████████▎ | 82/331 [03:57<11:49, 2.85s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 25%|████████████████████▌ | 83/331 [04:00<12:08, 2.94s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 25%|████████████████████▊ | 84/331 [04:04<12:54, 3.13s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 26%|█████████████████████ | 85/331 [04:06<12:00, 2.93s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 26%|█████████████████████▎ | 86/331 [04:09<12:39, 3.10s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 26%|█████████████████████▌ | 87/331 [04:12<12:15, 3.01s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 27%|█████████████████████▊ | 88/331 [04:15<11:54, 2.94s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 27%|██████████████████████ | 89/331 [04:17<11:09, 2.77s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 27%|██████████████████████▎ | 90/331 [04:20<10:36, 2.64s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 27%|██████████████████████▌ | 91/331 [04:23<11:03, 2.77s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 28%|██████████████████████▊ | 92/331 [04:25<10:20, 2.60s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 28%|███████████████████████ | 93/331 [04:28<10:31, 2.65s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 28%|███████████████████████▎ | 94/331 [04:31<10:45, 2.72s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 29%|███████████████████████▌ | 95/331 [04:34<10:54, 2.77s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 29%|███████████████████████▊ | 96/331 [04:37<11:02, 2.82s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 29%|████████████████████████ | 97/331 [04:39<10:33, 2.71s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 30%|████████████████████████▎ | 98/331 [04:42<10:52, 2.80s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 30%|████████████████████████▌ | 99/331 [04:45<10:51, 2.81s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 30%|████████████████████████▍ | 100/331 [04:47<10:19, 2.68s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 31%|████████████████████████▋ | 101/331 [04:50<10:16, 2.68s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 31%|████████████████████████▉ | 102/331 [04:53<11:01, 2.89s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 31%|█████████████████████████▏ | 103/331 [04:56<10:33, 2.78s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 31%|█████████████████████████▍ | 104/331 [04:59<10:35, 2.80s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 32%|█████████████████████████▋ | 105/331 [05:02<10:41, 2.84s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 32%|█████████████████████████▉ | 106/331 [05:04<10:38, 2.84s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 32%|██████████████████████████▏ | 107/331 [05:07<09:55, 2.66s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 33%|██████████████████████████▍ | 108/331 [05:09<09:42, 2.61s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 33%|██████████████████████████▋ | 109/331 [05:12<09:39, 2.61s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 33%|██████████████████████████▉ | 110/331 [05:15<10:09, 2.76s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 34%|███████████████████████████▏ | 111/331 [05:18<10:15, 2.80s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 34%|███████████████████████████▍ | 112/331 [05:21<10:14, 2.80s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 34%|███████████████████████████▋ | 113/331 [05:23<09:39, 2.66s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 34%|███████████████████████████▉ | 114/331 [05:26<09:41, 2.68s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 35%|████████████████████████████▏ | 115/331 [05:28<09:38, 2.68s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 35%|████████████████████████████▍ | 116/331 [05:31<09:58, 2.79s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 35%|████████████████████████████▋ | 117/331 [05:34<09:56, 2.79s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 36%|████████████████████████████▉ | 118/331 [05:37<09:45, 2.75s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 36%|█████████████████████████████ | 119/331 [05:40<09:45, 2.76s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 36%|█████████████████████████████▎ | 120/331 [05:42<09:40, 2.75s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 37%|█████████████████████████████▌ | 121/331 [05:46<10:10, 2.91s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 37%|█████████████████████████████▊ | 122/331 [05:48<09:52, 2.83s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 37%|██████████████████████████████ | 123/331 [05:52<10:27, 3.02s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 37%|██████████████████████████████▎ | 124/331 [05:55<10:16, 2.98s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 38%|██████████████████████████████▌ | 125/331 [05:58<10:49, 3.15s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 38%|██████████████████████████████▊ | 126/331 [06:01<10:55, 3.20s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 38%|███████████████████████████████ | 127/331 [06:05<11:21, 3.34s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 39%|███████████████████████████████▎ | 128/331 [06:08<11:22, 3.36s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 39%|███████████████████████████████▌ | 129/331 [06:12<11:07, 3.30s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 39%|███████████████████████████████▊ | 130/331 [06:15<11:13, 3.35s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 40%|████████████████████████████████ | 131/331 [06:19<11:28, 3.44s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 40%|████████████████████████████████▎ | 132/331 [06:22<10:50, 3.27s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 40%|████████████████████████████████▌ | 133/331 [06:24<10:08, 3.08s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 40%|████████████████████████████████▊ | 134/331 [06:27<09:48, 2.99s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 41%|█████████████████████████████████ | 135/331 [06:30<09:53, 3.03s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 41%|█████████████████████████████████▎ | 136/331 [06:33<10:06, 3.11s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 41%|█████████████████████████████████▌ | 137/331 [06:37<10:27, 3.23s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 42%|█████████████████████████████████▊ | 138/331 [06:41<10:46, 3.35s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 42%|██████████████████████████████████ | 139/331 [06:43<09:38, 3.01s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 42%|██████████████████████████████████▎ | 140/331 [06:47<10:19, 3.25s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 43%|██████████████████████████████████▌ | 141/331 [06:49<09:49, 3.10s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 43%|██████████████████████████████████▋ | 142/331 [06:52<09:28, 3.01s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 43%|██████████████████████████████████▉ | 143/331 [06:56<09:52, 3.15s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 44%|███████████████████████████████████▏ | 144/331 [06:58<09:27, 3.04s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 44%|███████████████████████████████████▍ | 145/331 [07:01<09:16, 2.99s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 44%|███████████████████████████████████▋ | 146/331 [07:05<09:43, 3.16s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 44%|███████████████████████████████████▉ | 147/331 [07:08<09:22, 3.06s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 45%|████████████████████████████████████▏ | 148/331 [07:10<08:45, 2.87s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 45%|████████████████████████████████████▍ | 149/331 [07:13<08:16, 2.73s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 45%|████████████████████████████████████▋ | 150/331 [07:16<08:36, 2.85s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 46%|████████████████████████████████████▉ | 151/331 [07:18<08:26, 2.81s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 46%|█████████████████████████████████████▏ | 152/331 [07:21<08:03, 2.70s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 46%|█████████████████████████████████████▍ | 153/331 [07:23<07:57, 2.68s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 47%|█████████████████████████████████████▋ | 154/331 [07:27<08:22, 2.84s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 47%|█████████████████████████████████████▉ | 155/331 [07:30<08:47, 3.00s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 47%|██████████████████████████████████████▏ | 156/331 [07:33<08:57, 3.07s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 47%|██████████████████████████████████████▍ | 157/331 [07:37<09:14, 3.19s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 48%|██████████████████████████████████████▋ | 158/331 [07:40<09:17, 3.22s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 48%|██████████████████████████████████████▉ | 159/331 [07:43<09:23, 3.28s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 48%|███████████████████████████████████████▏ | 160/331 [07:46<08:51, 3.11s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 49%|███████████████████████████████████████▍ | 161/331 [07:49<08:39, 3.05s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 49%|███████████████████████████████████████▋ | 162/331 [07:53<09:05, 3.23s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 49%|███████████████████████████████████████▉ | 163/331 [07:56<09:06, 3.25s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 50%|████████████████████████████████████████▏ | 164/331 [07:59<08:38, 3.11s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 50%|████████████████████████████████████████▍ | 165/331 [08:02<08:25, 3.05s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 50%|████████████████████████████████████████▌ | 166/331 [08:04<08:09, 2.97s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 50%|████████████████████████████████████████▊ | 167/331 [08:08<08:16, 3.03s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 51%|█████████████████████████████████████████ | 168/331 [08:10<07:49, 2.88s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 51%|█████████████████████████████████████████▎ | 169/331 [08:13<07:56, 2.94s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 51%|█████████████████████████████████████████▌ | 170/331 [08:16<07:30, 2.80s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 52%|█████████████████████████████████████████▊ | 171/331 [08:19<07:27, 2.79s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 52%|██████████████████████████████████████████ | 172/331 [08:21<07:10, 2.71s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 52%|██████████████████████████████████████████▎ | 173/331 [08:24<07:23, 2.81s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 53%|██████████████████████████████████████████▌ | 174/331 [08:27<07:04, 2.71s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 53%|██████████████████████████████████████████▊ | 175/331 [08:29<07:08, 2.74s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 53%|███████████████████████████████████████████ | 176/331 [08:32<06:52, 2.66s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 53%|███████████████████████████████████████████▎ | 177/331 [08:35<07:14, 2.82s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 54%|███████████████████████████████████████████▌ | 178/331 [08:39<07:41, 3.01s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 54%|███████████████████████████████████████████▊ | 179/331 [08:42<08:01, 3.17s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 54%|████████████████████████████████████████████ | 180/331 [08:45<07:53, 3.13s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 55%|████████████████████████████████████████████▎ | 181/331 [08:48<07:46, 3.11s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 55%|████████████████████████████████████████████▌ | 182/331 [08:50<07:09, 2.88s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 55%|████████████████████████████████████████████▊ | 183/331 [08:53<06:37, 2.68s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 56%|█████████████████████████████████████████████ | 184/331 [08:55<06:11, 2.53s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 56%|█████████████████████████████████████████████▎ | 185/331 [08:57<05:46, 2.38s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 56%|█████████████████████████████████████████████▌ | 186/331 [09:00<05:57, 2.46s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 56%|█████████████████████████████████████████████▊ | 187/331 [09:03<06:26, 2.69s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 57%|██████████████████████████████████████████████ | 188/331 [09:06<06:26, 2.71s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 57%|██████████████████████████████████████████████▎ | 189/331 [09:08<06:07, 2.59s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 57%|██████████████████████████████████████████████▍ | 190/331 [09:10<05:52, 2.50s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 58%|██████████████████████████████████████████████▋ | 191/331 [09:13<05:51, 2.51s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 58%|██████████████████████████████████████████████▉ | 192/331 [09:15<05:41, 2.46s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 58%|███████████████████████████████████████████████▏ | 193/331 [09:18<06:10, 2.68s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 59%|███████████████████████████████████████████████▍ | 194/331 [09:20<05:49, 2.55s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 59%|███████████████████████████████████████████████▋ | 195/331 [09:23<05:43, 2.52s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 59%|███████████████████████████████████████████████▉ | 196/331 [09:26<05:48, 2.58s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 60%|████████████████████████████████████████████████▏ | 197/331 [09:29<06:06, 2.73s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 60%|████████████████████████████████████████████████▍ | 198/331 [09:31<05:48, 2.62s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 60%|████████████████████████████████████████████████▋ | 199/331 [09:34<05:54, 2.69s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 60%|████████████████████████████████████████████████▉ | 200/331 [09:36<05:36, 2.57s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 61%|█████████████████████████████████████████████████▏ | 201/331 [09:39<05:32, 2.56s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 61%|█████████████████████████████████████████████████▍ | 202/331 [09:42<05:41, 2.65s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 61%|█████████████████████████████████████████████████▋ | 203/331 [09:44<05:41, 2.67s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 62%|█████████████████████████████████████████████████▉ | 204/331 [09:48<06:01, 2.85s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 62%|██████████████████████████████████████████████████▏ | 205/331 [09:51<06:01, 2.87s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 62%|██████████████████████████████████████████████████▍ | 206/331 [09:53<05:52, 2.82s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 63%|██████████████████████████████████████████████████▋ | 207/331 [09:57<06:07, 2.96s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 63%|██████████████████████████████████████████████████▉ | 208/331 [10:00<06:12, 3.03s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 63%|███████████████████████████████████████████████████▏ | 209/331 [10:02<05:43, 2.82s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 63%|███████████████████████████████████████████████████▍ | 210/331 [10:04<05:20, 2.65s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 64%|███████████████████████████████████████████████████▋ | 211/331 [10:07<05:25, 2.71s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 64%|███████████████████████████████████████████████████▉ | 212/331 [10:09<05:09, 2.60s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 64%|████████████████████████████████████████████████████ | 213/331 [10:12<05:07, 2.61s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 65%|████████████████████████████████████████████████████▎ | 214/331 [10:14<04:49, 2.48s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 65%|████████████████████████████████████████████████████▌ | 215/331 [10:16<04:37, 2.39s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 65%|████████████████████████████████████████████████████▊ | 216/331 [10:20<05:06, 2.67s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 66%|█████████████████████████████████████████████████████ | 217/331 [10:22<05:05, 2.68s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 66%|█████████████████████████████████████████████████████▎ | 218/331 [10:26<05:19, 2.83s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 66%|█████████████████████████████████████████████████████▌ | 219/331 [10:28<05:15, 2.81s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 66%|█████████████████████████████████████████████████████▊ | 220/331 [10:31<05:01, 2.71s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 67%|██████████████████████████████████████████████████████ | 221/331 [10:34<05:03, 2.76s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 67%|██████████████████████████████████████████████████████▎ | 222/331 [10:36<04:50, 2.67s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 67%|██████████████████████████████████████████████████████▌ | 223/331 [10:39<04:52, 2.71s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 68%|██████████████████████████████████████████████████████▊ | 224/331 [10:42<04:52, 2.73s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 68%|███████████████████████████████████████████████████████ | 225/331 [10:45<04:47, 2.72s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 68%|███████████████████████████████████████████████████████▎ | 226/331 [10:48<04:59, 2.85s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 69%|███████████████████████████████████████████████████████▌ | 227/331 [10:50<04:53, 2.82s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 69%|███████████████████████████████████████████████████████▊ | 228/331 [10:53<04:46, 2.78s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 69%|████████████████████████████████████████████████████████ | 229/331 [10:56<04:41, 2.76s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 69%|████████████████████████████████████████████████████████▎ | 230/331 [10:58<04:32, 2.70s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 70%|████████████████████████████████████████████████████████▌ | 231/331 [11:01<04:38, 2.79s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 70%|████████████████████████████████████████████████████████▊ | 232/331 [11:04<04:31, 2.74s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 70%|█████████████████████████████████████████████████████████ | 233/331 [11:07<04:39, 2.85s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 71%|█████████████████████████████████████████████████████████▎ | 234/331 [11:10<04:25, 2.74s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 71%|█████████████████████████████████████████████████████████▌ | 235/331 [11:12<04:15, 2.66s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 71%|█████████████████████████████████████████████████████████▊ | 236/331 [11:16<04:42, 2.97s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 72%|█████████████████████████████████████████████████████████▉ | 237/331 [11:19<04:51, 3.10s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 72%|██████████████████████████████████████████████████████████▏ | 238/331 [11:22<04:49, 3.11s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 72%|██████████████████████████████████████████████████████████▍ | 239/331 [11:26<04:49, 3.15s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 73%|██████████████████████████████████████████████████████████▋ | 240/331 [11:29<04:52, 3.21s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 73%|██████████████████████████████████████████████████████████▉ | 241/331 [11:32<04:57, 3.30s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 73%|███████████████████████████████████████████████████████████▏ | 242/331 [11:36<04:55, 3.32s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 73%|███████████████████████████████████████████████████████████▍ | 243/331 [11:39<04:52, 3.33s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 74%|███████████████████████████████████████████████████████████▋ | 244/331 [11:43<04:58, 3.43s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 74%|███████████████████████████████████████████████████████████▉ | 245/331 [11:46<04:46, 3.34s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 74%|████████████████████████████████████████████████████████████▏ | 246/331 [11:50<04:55, 3.48s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 75%|████████████████████████████████████████████████████████████▍ | 247/331 [11:53<04:42, 3.36s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 75%|████████████████████████████████████████████████████████████▋ | 248/331 [11:55<04:21, 3.15s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 75%|████████████████████████████████████████████████████████████▉ | 249/331 [11:58<04:00, 2.93s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 76%|█████████████████████████████████████████████████████████████▏ | 250/331 [12:00<03:48, 2.82s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 76%|█████████████████████████████████████████████████████████████▍ | 251/331 [12:03<03:51, 2.89s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 76%|█████████████████████████████████████████████████████████████▋ | 252/331 [12:06<03:38, 2.77s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 76%|█████████████████████████████████████████████████████████████▉ | 253/331 [12:09<03:49, 2.94s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 77%|██████████████████████████████████████████████████████████████▏ | 254/331 [12:12<03:40, 2.87s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 77%|██████████████████████████████████████████████████████████████▍ | 255/331 [12:15<03:46, 2.99s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 77%|██████████████████████████████████████████████████████████████▋ | 256/331 [12:18<03:38, 2.91s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 78%|██████████████████████████████████████████████████████████████▉ | 257/331 [12:21<03:41, 2.99s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 78%|███████████████████████████████████████████████████████████████▏ | 258/331 [12:24<03:27, 2.84s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 78%|███████████████████████████████████████████████████████████████▍ | 259/331 [12:26<03:21, 2.80s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 79%|███████████████████████████████████████████████████████████████▋ | 260/331 [12:29<03:23, 2.86s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 79%|███████████████████████████████████████████████████████████████▊ | 261/331 [12:32<03:09, 2.71s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 79%|████████████████████████████████████████████████████████████████ | 262/331 [12:34<03:07, 2.72s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 79%|████████████████████████████████████████████████████████████████▎ | 263/331 [12:38<03:14, 2.87s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 80%|████████████████████████████████████████████████████████████████▌ | 264/331 [12:40<03:07, 2.80s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 80%|████████████████████████████████████████████████████████████████▊ | 265/331 [12:43<03:01, 2.75s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 80%|█████████████████████████████████████████████████████████████████ | 266/331 [12:46<02:54, 2.69s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 81%|█████████████████████████████████████████████████████████████████▎ | 267/331 [12:49<03:03, 2.87s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 81%|█████████████████████████████████████████████████████████████████▌ | 268/331 [12:52<03:00, 2.86s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 81%|█████████████████████████████████████████████████████████████████▊ | 269/331 [12:55<03:07, 3.02s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 82%|██████████████████████████████████████████████████████████████████ | 270/331 [12:58<03:02, 2.99s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 82%|██████████████████████████████████████████████████████████████████▎ | 271/331 [13:01<03:05, 3.10s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 82%|██████████████████████████████████████████████████████████████████▌ | 272/331 [13:04<02:56, 3.00s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 82%|██████████████████████████████████████████████████████████████████▊ | 273/331 [13:07<02:55, 3.03s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 83%|███████████████████████████████████████████████████████████████████ | 274/331 [13:11<03:01, 3.19s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 83%|███████████████████████████████████████████████████████████████████▎ | 275/331 [13:14<03:01, 3.23s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 83%|███████████████████████████████████████████████████████████████████▌ | 276/331 [13:17<02:47, 3.05s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 84%|███████████████████████████████████████████████████████████████████▊ | 277/331 [13:19<02:39, 2.96s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 84%|████████████████████████████████████████████████████████████████████ | 278/331 [13:22<02:35, 2.93s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 84%|████████████████████████████████████████████████████████████████████▎ | 279/331 [13:26<02:45, 3.17s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 85%|████████████████████████████████████████████████████████████████████▌ | 280/331 [13:29<02:37, 3.09s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 85%|████████████████████████████████████████████████████████████████████▊ | 281/331 [13:32<02:39, 3.18s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 85%|█████████████████████████████████████████████████████████████████████ | 282/331 [13:35<02:35, 3.17s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 85%|█████████████████████████████████████████████████████████████████████▎ | 283/331 [13:39<02:35, 3.24s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 86%|█████████████████████████████████████████████████████████████████████▍ | 284/331 [13:42<02:37, 3.35s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 86%|█████████████████████████████████████████████████████████████████████▋ | 285/331 [13:46<02:36, 3.40s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 86%|█████████████████████████████████████████████████████████████████████▉ | 286/331 [13:50<02:34, 3.44s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 87%|██████████████████████████████████████████████████████████████████████▏ | 287/331 [13:53<02:35, 3.54s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 87%|██████████████████████████████████████████████████████████████████████▍ | 288/331 [13:57<02:30, 3.50s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 87%|██████████████████████████████████████████████████████████████████████▋ | 289/331 [14:00<02:18, 3.29s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 88%|██████████████████████████████████████████████████████████████████████▉ | 290/331 [14:02<02:07, 3.10s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 88%|███████████████████████████████████████████████████████████████████████▏ | 291/331 [14:05<01:58, 2.96s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 88%|███████████████████████████████████████████████████████████████████████▍ | 292/331 [14:08<01:52, 2.88s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 89%|███████████████████████████████████████████████████████████████████████▋ | 293/331 [14:10<01:48, 2.87s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 89%|███████████████████████████████████████████████████████████████████████▉ | 294/331 [14:13<01:42, 2.76s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 89%|████████████████████████████████████████████████████████████████████████▏ | 295/331 [14:15<01:36, 2.68s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 89%|████████████████████████████████████████████████████████████████████████▍ | 296/331 [14:18<01:31, 2.61s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 90%|████████████████████████████████████████████████████████████████████████▋ | 297/331 [14:21<01:39, 2.92s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 90%|████████████████████████████████████████████████████████████████████████▉ | 298/331 [14:25<01:44, 3.16s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 90%|█████████████████████████████████████████████████████████████████████████▏ | 299/331 [14:28<01:37, 3.04s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 91%|█████████████████████████████████████████████████████████████████████████▍ | 300/331 [14:31<01:33, 3.00s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 91%|█████████████████████████████████████████████████████████████████████████▋ | 301/331 [14:34<01:28, 2.95s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 91%|█████████████████████████████████████████████████████████████████████████▉ | 302/331 [14:36<01:23, 2.89s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 92%|██████████████████████████████████████████████████████████████████████████▏ | 303/331 [14:39<01:18, 2.80s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 92%|██████████████████████████████████████████████████████████████████████████▍ | 304/331 [14:42<01:18, 2.90s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 92%|██████████████████████████████████████████████████████████████████████████▋ | 305/331 [14:45<01:18, 3.02s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 92%|██████████████████████████████████████████████████████████████████████████▉ | 306/331 [14:49<01:19, 3.19s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 93%|███████████████████████████████████████████████████████████████████████████▏ | 307/331 [14:53<01:19, 3.32s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 93%|███████████████████████████████████████████████████████████████████████████▎ | 308/331 [14:57<01:20, 3.51s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 93%|███████████████████████████████████████████████████████████████████████████▌ | 309/331 [15:00<01:17, 3.54s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 94%|███████████████████████████████████████████████████████████████████████████▊ | 310/331 [15:03<01:09, 3.32s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 94%|████████████████████████████████████████████████████████████████████████████ | 311/331 [15:06<01:06, 3.31s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 94%|████████████████████████████████████████████████████████████████████████████▎ | 312/331 [15:09<00:58, 3.10s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 95%|████████████████████████████████████████████████████████████████████████████▌ | 313/331 [15:12<00:54, 3.04s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 95%|████████████████████████████████████████████████████████████████████████████▊ | 314/331 [15:15<00:52, 3.09s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 95%|█████████████████████████████████████████████████████████████████████████████ | 315/331 [15:18<00:51, 3.19s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 95%|█████████████████████████████████████████████████████████████████████████████▎ | 316/331 [15:22<00:48, 3.22s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 96%|█████████████████████████████████████████████████████████████████████████████▌ | 317/331 [15:25<00:47, 3.36s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 96%|█████████████████████████████████████████████████████████████████████████████▊ | 318/331 [15:28<00:41, 3.18s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 96%|██████████████████████████████████████████████████████████████████████████████ | 319/331 [15:31<00:36, 3.02s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 97%|██████████████████████████████████████████████████████████████████████████████▎ | 320/331 [15:34<00:33, 3.06s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 97%|██████████████████████████████████████████████████████████████████████████████▌ | 321/331 [15:37<00:30, 3.01s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 97%|██████████████████████████████████████████████████████████████████████████████▊ | 322/331 [15:40<00:28, 3.16s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 98%|███████████████████████████████████████████████████████████████████████████████ | 323/331 [15:43<00:24, 3.06s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 98%|███████████████████████████████████████████████████████████████████████████████ | 323/331 [15:43<00:24, 3.06s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 98%|███████████████████████████████████████████████████████████████████████████████ | 323/331 [15:43<00:24, 3.06s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 98%|███████████████████████████████████████████████████████████████████████████████▌ | 325/331 [15:50<00:19, 3.22s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 98%|███████████████████████████████████████████████████████████████████████████████▊ | 326/331 [15:53<00:16, 3.26s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 99%|████████████████████████████████████████████████████████████████████████████████ | 327/331 [15:57<00:12, 3.25s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 99%|████████████████████████████████████████████████████████████████████████████████▎| 328/331 [16:00<00:09, 3.26s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 99%|████████████████████████████████████████████████████████████████████████████████▎| 328/331 [16:00<00:09, 3.26s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 99%|████████████████████████████████████████████████████████████████████████████████▎| 328/331 [16:00<00:09, 3.26s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. +100%|████████████████████████████████████████████████████████████████████████████████▊| 330/331 [16:07<00:03, 3.35s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. +100%|█████████████████████████████████████████████████████████████████████████████████| 331/331 [16:08<00:00, 2.92s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. +100%|█████████████████████████████████████████████████████████████████████████████████| 331/331 [16:08<00:00, 2.92s/it][INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. +03/03/2022 01:26:46 - INFO - datasets.metric - Removing /home/sanchit_huggingface_co/.cache/huggingface/metrics/wer/default/default_experiment-1-0.arrow +[INFO|configuration_utils.py:438] 2022-03-03 01:26:46,475 >> Configuration saved in ./checkpoint-500/config.json [INFO|trainer.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. +[INFO|feature_extraction_utils.py:324] 2022-03-03 01:27:02,906 >> Configuration saved in ./checkpoint-500/preprocessor_config.jsonner.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. +[INFO|feature_extraction_utils.py:324] 2022-03-03 01:27:02,906 >> Configuration saved in ./checkpoint-500/preprocessor_config.jsonner.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. +[INFO|feature_extraction_utils.py:324] 2022-03-03 01:27:02,906 >> Configuration saved in ./checkpoint-500/preprocessor_config.jsonner.py:560] 2022-03-03 01:10:34,077 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. +03/03/2022 01:28:45 - WARNING - huggingface_hub.repository - Adding files tracked by Git LFS: ['wandb/run-20220302_214437-2u4nhnsf/run-2u4nhnsf.wandb', 'wandb/run-20220302_222605-10glutwr/run-10glutwr.wandb', 'wandb/run-20220302_233655-33dtvgaa/run-33dtvgaa.wandb', 'wandb/run-20220303_004520-25bnjrx1/run-25bnjrx1.wandb']. This may take a bit of time if the files are large. diff --git a/wandb/run-20220303_004520-25bnjrx1/files/requirements.txt b/wandb/run-20220303_004520-25bnjrx1/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..3974f97a24952deb24d97850f53367da9e7c347d --- /dev/null +++ b/wandb/run-20220303_004520-25bnjrx1/files/requirements.txt @@ -0,0 +1,184 @@ +absl-py==1.0.0 +aiohttp==3.8.1 +aiosignal==1.2.0 +anyio==3.5.0 +appdirs==1.4.4 +argon2-cffi-bindings==21.2.0 +argon2-cffi==21.3.0 +asttokens==2.0.5 +async-timeout==4.0.2 +attrs==21.4.0 +audioread==2.1.9 +babel==2.9.1 +backcall==0.2.0 +bitsandbytes-cuda113==0.26.0 +black==22.1.0 +bleach==4.1.0 +cachetools==5.0.0 +certifi==2021.10.8 +cffi==1.15.0 +charset-normalizer==2.0.11 +chex==0.1.0 +click==8.0.3 +clldutils==3.10.1 +colorlog==6.6.0 +csvw==1.11.0 +cycler==0.11.0 +datasets==1.18.3 +debugpy==1.5.1 +decorator==5.1.1 +defusedxml==0.7.1 +dill==0.3.4 +dlinfo==1.2.1 +dm-tree==0.1.6 +docker-pycreds==0.4.0 +entrypoints==0.4 +executing==0.8.2 +filelock==3.4.2 +flatbuffers==2.0 +flax==0.4.0 +fonttools==4.29.1 +frozenlist==1.3.0 +fsspec==2022.1.0 +gitdb==4.0.9 +gitpython==3.1.27 +google-auth-oauthlib==0.4.6 +google-auth==2.6.0 +grpcio==1.43.0 +huggingface-hub==0.4.0 +hypothesis==6.36.1 +idna==3.3 +importlib-metadata==4.10.1 +ipykernel==6.8.0 +ipython-genutils==0.2.0 +ipython==8.0.1 +ipywidgets==7.6.5 +isodate==0.6.1 +jax==0.2.28 +jaxlib==0.1.76+cuda11.cudnn82 +jedi==0.18.1 +jinja2==3.0.3 +jiwer==2.3.0 +joblib==1.1.0 +json5==0.9.6 +jsonschema==4.4.0 +jupyter-client==7.1.2 +jupyter-console==6.4.0 +jupyter-core==4.9.1 +jupyter-server==1.13.5 +jupyter==1.0.0 +jupyterlab-pygments==0.1.2 +jupyterlab-server==2.10.3 +jupyterlab-widgets==1.0.2 +jupyterlab==3.2.9 +kiwisolver==1.3.2 +librosa==0.8.1 +llvmlite==0.38.0 +markdown==3.3.6 +markupsafe==2.0.1 +matplotlib-inline==0.1.3 +matplotlib==3.5.1 +mistune==0.8.4 +msgpack==1.0.3 +multidict==6.0.2 +multiprocess==0.70.12.2 +mypy-extensions==0.4.3 +nbclassic==0.3.5 +nbclient==0.5.10 +nbconvert==6.4.1 +nbformat==5.1.3 +nest-asyncio==1.5.4 +notebook==6.4.8 +numba==0.55.1 +numpy==1.21.5 +oauthlib==3.2.0 +opt-einsum==3.3.0 +optax==0.1.0 +packaging==21.3 +pandas==1.4.0 +pandocfilters==1.5.0 +parso==0.8.3 +pathspec==0.9.0 +pathtools==0.1.2 +pexpect==4.8.0 +phonemizer==3.0.1 +pickleshare==0.7.5 +pillow==9.0.0 +pip==22.0.2 +pkg-resources==0.0.0 +platformdirs==2.4.1 +pooch==1.6.0 +prometheus-client==0.13.1 +promise==2.3 +prompt-toolkit==3.0.26 +protobuf==3.19.4 +psutil==5.9.0 +ptyprocess==0.7.0 +pure-eval==0.2.2 +pyarrow==6.0.1 +pyasn1-modules==0.2.8 +pyasn1==0.4.8 +pycparser==2.21 +pyctcdecode==0.3.0 +pygments==2.11.2 +pygtrie==2.4.2 +pyparsing==3.0.7 +pyrsistent==0.18.1 +python-dateutil==2.8.2 +python-levenshtein==0.12.2 +pytz==2021.3 +pyyaml==6.0 +pyzmq==22.3.0 +qtconsole==5.2.2 +qtpy==2.0.1 +regex==2022.1.18 +requests-oauthlib==1.3.1 +requests==2.27.1 +resampy==0.2.2 +rfc3986==2.0.0 +rsa==4.8 +sacremoses==0.0.47 +scikit-learn==1.0.2 +scipy==1.7.3 +segments==2.2.0 +send2trash==1.8.0 +sentry-sdk==1.5.6 +setuptools==44.1.1 +shortuuid==1.0.8 +six==1.16.0 +smmap==5.0.0 +sniffio==1.2.0 +sortedcontainers==2.4.0 +soundfile==0.10.3.post1 +stack-data==0.1.4 +tabulate==0.8.9 +tensorboard-data-server==0.6.1 +tensorboard-plugin-wit==1.8.1 +tensorboard==2.8.0 +termcolor==1.1.0 +terminado==0.13.1 +testpath==0.5.0 +threadpoolctl==3.1.0 +tokenizers==0.11.4 +tomli==2.0.0 +toolz==0.11.2 +torch==1.10.2+cu113 +torchaudio==0.10.2+cu113 +tornado==6.1 +tqdm==4.62.3 +traitlets==5.1.1 +transformers==4.17.0.dev0 +typing-extensions==3.10.0.2 +uritemplate==4.1.1 +urllib3==1.26.8 +wandb==0.12.10 +wcwidth==0.2.5 +webencodings==0.5.1 +websocket-client==1.2.3 +werkzeug==2.0.2 +wheel==0.37.1 +widgetsnbextension==3.5.2 +xxhash==2.0.2 +yarl==1.7.2 +yaspin==2.1.0 +zipp==3.7.0 \ No newline at end of file diff --git a/wandb/run-20220303_004520-25bnjrx1/files/wandb-metadata.json b/wandb/run-20220303_004520-25bnjrx1/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..b5684c4aedcf645e2515c9e87260e26f3ca32c34 --- /dev/null +++ b/wandb/run-20220303_004520-25bnjrx1/files/wandb-metadata.json @@ -0,0 +1,60 @@ +{ + "os": "Linux-5.11.0-1028-gcp-x86_64-with-glibc2.33", + "python": "3.9.5", + "heartbeatAt": "2022-03-03T00:45:21.555404", + "startedAt": "2022-03-03T00:45:20.419276", + "docker": null, + "gpu": "Tesla V100-SXM2-16GB", + "gpu_count": 2, + "cpu_count": 16, + "cuda": null, + "args": [ + "--dataset_name=librispeech_asr", + "--model_name_or_path=./", + "--tokenizer_name=./", + "--dataset_config_name=clean", + "--train_split_name=train.100", + "--eval_split_name=validation", + "--output_dir=./", + "--preprocessing_num_workers=1", + "--length_column_name=input_length", + "--overwrite_output_dir", + "--num_train_epochs=1", + "--per_device_train_batch_size=8", + "--per_device_eval_batch_size=8", + "--gradient_accumulation_steps=2", + "--generation_max_length=40", + "--generation_num_beams=1", + "--learning_rate=1e-3", + "--warmup_steps=500", + "--evaluation_strategy=steps", + "--text_column_name=text", + "--save_steps=500", + "--eval_steps=500", + "--logging_steps=1", + "--save_total_limit=1", + "--freeze_feature_encoder", + "--gradient_checkpointing", + "--fp16", + "--group_by_length", + "--predict_with_generate", + "--do_lower_case", + "--do_train", + "--do_eval", + "--report_to=wandb", + "--push_to_hub", + "--use_auth_token" + ], + "state": "running", + "program": "/home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/run_speech_recognition_seq2seq.py", + "codePath": "run_speech_recognition_seq2seq.py", + "git": { + "remote": "https://huggingface.co/sanchit-gandhi/wav2vec2-gpt2-wandb-grid-search", + "commit": "8c7181143c175387040dc1a6ac2ddbc9179b550c" + }, + "email": "sanchit@huggingface.co", + "root": "/home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search", + "host": "sanchit--v100", + "username": "sanchit_huggingface_co", + "executable": "/home/sanchit_huggingface_co/gcp/bin/python" +} diff --git a/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json b/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..158aa42b0a839f331b613d31e3b7029da83dbdd7 --- /dev/null +++ b/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json @@ -0,0 +1 @@ +{"train/loss": 5.4548, "train/learning_rate": 0.000996, "train/epoch": 0.28, "train/global_step": 500, "_runtime": 2486, "_timestamp": 1646270806, "_step": 500, "gradients/decoder.transformer.ln_f.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 11.0, 26.0, 128.0, 404.0, 339.0, 82.0, 18.0, 6.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-555.4196166992188, -544.9506225585938, -534.4815673828125, -524.0125732421875, -513.5435791015625, -503.07452392578125, -492.60552978515625, -482.1365051269531, -471.66748046875, -461.1984558105469, -450.7294616699219, -440.26043701171875, -429.7914123535156, -419.3223876953125, -408.8533935546875, -398.3843688964844, -387.91534423828125, -377.4463195800781, -366.9773254394531, -356.50830078125, -346.0392761230469, -335.57025146484375, -325.10125732421875, -314.6322326660156, -304.1632385253906, -293.6942138671875, -283.2252197265625, -272.7561950683594, -262.28717041015625, -251.8181610107422, -241.34915161132812, -230.880126953125, -220.41107177734375, -209.9420623779297, -199.47303771972656, -189.0040283203125, -178.53500366210938, -168.0659942626953, -157.59698486328125, -147.12796020507812, -136.65895080566406, -126.18993377685547, -115.72091674804688, -105.25190734863281, -94.78288269042969, -84.31387329101562, -73.84485626220703, -63.37583923339844, -52.906822204589844, -42.43780517578125, -31.96879005432129, -21.499774932861328, -11.030757904052734, -0.5617408752441406, 9.907272338867188, 20.37628936767578, 30.845306396484375, 41.31432342529297, 51.78334045410156, 62.25235366821289, 72.72137451171875, 83.19038391113281, 93.6594009399414, 104.12841796875, 114.5974349975586]}, "gradients/decoder.transformer.ln_f.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 4.0, 4.0, 3.0, 6.0, 3.0, 4.0, 4.0, 5.0, 6.0, 20.0, 9.0, 19.0, 24.0, 18.0, 18.0, 26.0, 40.0, 33.0, 36.0, 53.0, 40.0, 45.0, 50.0, 65.0, 42.0, 46.0, 31.0, 50.0, 43.0, 37.0, 43.0, 29.0, 20.0, 22.0, 21.0, 27.0, 15.0, 10.0, 11.0, 6.0, 5.0, 11.0, 5.0, 2.0, 1.0, 0.0, 1.0, 0.0, 2.0, 1.0, 1.0, 1.0, 0.0, 1.0], "bins": [-357.82763671875, -347.09246826171875, -336.3573303222656, -325.6221618652344, -314.88702392578125, -304.15185546875, -293.41668701171875, -282.6815490722656, -271.9463806152344, -261.2112121582031, -250.47607421875, -239.74090576171875, -229.00575256347656, -218.27059936523438, -207.5354461669922, -196.80029296875, -186.0651397705078, -175.32998657226562, -164.59483337402344, -153.85968017578125, -143.12451171875, -132.3893585205078, -121.65420532226562, -110.9190444946289, -100.18389129638672, -89.44873809814453, -78.71357727050781, -67.97842407226562, -57.24326705932617, -46.50811004638672, -35.77295684814453, -25.037796020507812, -14.302642822265625, -3.5674867630004883, 7.167669296264648, 17.90282440185547, 28.637981414794922, 39.373138427734375, 50.10829162597656, 60.84345245361328, 71.57860565185547, 82.31375885009766, 93.04891967773438, 103.78407287597656, 114.51922607421875, 125.25438690185547, 135.98953247070312, 146.72470092773438, 157.45985412597656, 168.19500732421875, 178.93016052246094, 189.66531372070312, 200.40048217773438, 211.13563537597656, 221.87078857421875, 232.60595703125, 243.34109497070312, 254.0762481689453, 264.8114013671875, 275.54656982421875, 286.2817077636719, 297.0168762207031, 307.75201416015625, 318.4871826171875, 329.22235107421875]}, "gradients/decoder.transformer.h.23.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 4.0, 2.0, 9.0, 9.0, 6.0, 7.0, 17.0, 12.0, 24.0, 28.0, 31.0, 26.0, 38.0, 52.0, 51.0, 55.0, 59.0, 68.0, 63.0, 60.0, 67.0, 52.0, 46.0, 40.0, 38.0, 33.0, 31.0, 25.0, 14.0, 13.0, 12.0, 10.0, 4.0, 2.0, 2.0, 2.0, 4.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.787109375, -3.661285400390625, -3.53546142578125, -3.409637451171875, -3.2838134765625, -3.157989501953125, -3.03216552734375, -2.906341552734375, -2.780517578125, -2.654693603515625, -2.52886962890625, -2.403045654296875, -2.2772216796875, -2.151397705078125, -2.02557373046875, -1.899749755859375, -1.77392578125, -1.648101806640625, -1.52227783203125, -1.396453857421875, -1.2706298828125, -1.144805908203125, -1.01898193359375, -0.893157958984375, -0.767333984375, -0.641510009765625, -0.51568603515625, -0.389862060546875, -0.2640380859375, -0.138214111328125, -0.01239013671875, 0.113433837890625, 0.2392578125, 0.365081787109375, 0.49090576171875, 0.616729736328125, 0.7425537109375, 0.868377685546875, 0.99420166015625, 1.120025634765625, 1.245849609375, 1.371673583984375, 1.49749755859375, 1.623321533203125, 1.7491455078125, 1.874969482421875, 2.00079345703125, 2.126617431640625, 2.25244140625, 2.378265380859375, 2.50408935546875, 2.629913330078125, 2.7557373046875, 2.881561279296875, 3.00738525390625, 3.133209228515625, 3.259033203125, 3.384857177734375, 3.51068115234375, 3.636505126953125, 3.7623291015625, 3.888153076171875, 4.01397705078125, 4.139801025390625, 4.265625]}, "gradients/decoder.transformer.h.23.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 1.0, 2.0, 2.0, 0.0, 0.0, 1.0, 3.0, 4.0, 10.0, 21.0, 29.0, 35.0, 46.0, 52.0, 77.0, 158.0, 247.0, 340.0, 521.0, 853.0, 1443.0, 2810.0, 6623.0, 20190.0, 188234.0, 3914498.0, 39514.0, 10004.0, 3823.0, 1905.0, 1047.0, 645.0, 391.0, 243.0, 159.0, 111.0, 80.0, 57.0, 29.0, 21.0, 12.0, 14.0, 8.0, 15.0, 7.0, 2.0, 2.0, 3.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 3.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-39.0, -37.61767578125, -36.2353515625, -34.85302734375, -33.470703125, -32.08837890625, -30.7060546875, -29.32373046875, -27.94140625, -26.55908203125, -25.1767578125, -23.79443359375, -22.412109375, -21.02978515625, -19.6474609375, -18.26513671875, -16.8828125, -15.50048828125, -14.1181640625, -12.73583984375, -11.353515625, -9.97119140625, -8.5888671875, -7.20654296875, -5.82421875, -4.44189453125, -3.0595703125, -1.67724609375, -0.294921875, 1.08740234375, 2.4697265625, 3.85205078125, 5.234375, 6.61669921875, 7.9990234375, 9.38134765625, 10.763671875, 12.14599609375, 13.5283203125, 14.91064453125, 16.29296875, 17.67529296875, 19.0576171875, 20.43994140625, 21.822265625, 23.20458984375, 24.5869140625, 25.96923828125, 27.3515625, 28.73388671875, 30.1162109375, 31.49853515625, 32.880859375, 34.26318359375, 35.6455078125, 37.02783203125, 38.41015625, 39.79248046875, 41.1748046875, 42.55712890625, 43.939453125, 45.32177734375, 46.7041015625, 48.08642578125, 49.46875]}, "gradients/decoder.transformer.h.23.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 4.0, 1.0, 1.0, 1.0, 1.0, 4.0, 4.0, 3.0, 4.0, 8.0, 18.0, 16.0, 21.0, 42.0, 36.0, 87.0, 99.0, 238.0, 791.0, 1437.0, 611.0, 214.0, 123.0, 80.0, 58.0, 41.0, 49.0, 30.0, 14.0, 17.0, 11.0, 5.0, 6.0, 6.0, 0.0, 3.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-23.25, -22.611083984375, -21.97216796875, -21.333251953125, -20.6943359375, -20.055419921875, -19.41650390625, -18.777587890625, -18.138671875, -17.499755859375, -16.86083984375, -16.221923828125, -15.5830078125, -14.944091796875, -14.30517578125, -13.666259765625, -13.02734375, -12.388427734375, -11.74951171875, -11.110595703125, -10.4716796875, -9.832763671875, -9.19384765625, -8.554931640625, -7.916015625, -7.277099609375, -6.63818359375, -5.999267578125, -5.3603515625, -4.721435546875, -4.08251953125, -3.443603515625, -2.8046875, -2.165771484375, -1.52685546875, -0.887939453125, -0.2490234375, 0.389892578125, 1.02880859375, 1.667724609375, 2.306640625, 2.945556640625, 3.58447265625, 4.223388671875, 4.8623046875, 5.501220703125, 6.14013671875, 6.779052734375, 7.41796875, 8.056884765625, 8.69580078125, 9.334716796875, 9.9736328125, 10.612548828125, 11.25146484375, 11.890380859375, 12.529296875, 13.168212890625, 13.80712890625, 14.446044921875, 15.0849609375, 15.723876953125, 16.36279296875, 17.001708984375, 17.640625]}, "gradients/decoder.transformer.h.23.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 3.0, 8.0, 5.0, 7.0, 13.0, 14.0, 20.0, 29.0, 51.0, 64.0, 100.0, 165.0, 316.0, 1004.0, 10994.0, 4139263.0, 39737.0, 1475.0, 424.0, 247.0, 126.0, 89.0, 48.0, 27.0, 20.0, 15.0, 6.0, 6.0, 2.0, 0.0, 5.0, 4.0, 0.0, 1.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-57.75, -55.4970703125, -53.244140625, -50.9912109375, -48.73828125, -46.4853515625, -44.232421875, -41.9794921875, -39.7265625, -37.4736328125, -35.220703125, -32.9677734375, -30.71484375, -28.4619140625, -26.208984375, -23.9560546875, -21.703125, -19.4501953125, -17.197265625, -14.9443359375, -12.69140625, -10.4384765625, -8.185546875, -5.9326171875, -3.6796875, -1.4267578125, 0.826171875, 3.0791015625, 5.33203125, 7.5849609375, 9.837890625, 12.0908203125, 14.34375, 16.5966796875, 18.849609375, 21.1025390625, 23.35546875, 25.6083984375, 27.861328125, 30.1142578125, 32.3671875, 34.6201171875, 36.873046875, 39.1259765625, 41.37890625, 43.6318359375, 45.884765625, 48.1376953125, 50.390625, 52.6435546875, 54.896484375, 57.1494140625, 59.40234375, 61.6552734375, 63.908203125, 66.1611328125, 68.4140625, 70.6669921875, 72.919921875, 75.1728515625, 77.42578125, 79.6787109375, 81.931640625, 84.1845703125, 86.4375]}, "gradients/decoder.transformer.h.23.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 244.0, 731.0, 36.0, 1.0, 0.0, 2.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-537.5723266601562, -527.2598266601562, -516.947265625, -506.6347351074219, -496.32220458984375, -486.0096740722656, -475.6971435546875, -465.3846435546875, -455.07208251953125, -444.7595520019531, -434.447021484375, -424.1344909667969, -413.82196044921875, -403.5094299316406, -393.1968994140625, -382.8843994140625, -372.5718688964844, -362.25933837890625, -351.9468078613281, -341.63427734375, -331.3217468261719, -321.00921630859375, -310.6966857910156, -300.3841552734375, -290.0716552734375, -279.7591247558594, -269.44659423828125, -259.1340637207031, -248.821533203125, -238.50900268554688, -228.1964874267578, -217.8839569091797, -207.5714111328125, -197.25888061523438, -186.94635009765625, -176.63381958007812, -166.3212890625, -156.00875854492188, -145.6962432861328, -135.3837127685547, -125.0711898803711, -114.75865936279297, -104.44613647460938, -94.13360595703125, -83.82107543945312, -73.508544921875, -63.19601821899414, -52.88349151611328, -42.570960998535156, -32.25843048095703, -21.945903778076172, -11.63337516784668, -1.3208465576171875, 8.991683959960938, 19.304210662841797, 29.616737365722656, 39.92926788330078, 50.241798400878906, 60.554325103759766, 70.86685180664062, 81.17938232421875, 91.49191284179688, 101.804443359375, 112.1169662475586, 122.42949676513672]}, "gradients/decoder.transformer.h.23.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 3.0, 8.0, 2.0, 5.0, 6.0, 13.0, 20.0, 18.0, 24.0, 28.0, 30.0, 26.0, 43.0, 42.0, 35.0, 45.0, 47.0, 38.0, 61.0, 54.0, 50.0, 58.0, 54.0, 55.0, 33.0, 38.0, 22.0, 38.0, 25.0, 24.0, 15.0, 15.0, 8.0, 7.0, 6.0, 8.0, 7.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-91.775390625, -88.53892517089844, -85.3024673461914, -82.06600189208984, -78.82953643798828, -75.59307861328125, -72.35661315917969, -69.12014770507812, -65.8836898803711, -62.6472282409668, -59.410762786865234, -56.17430114746094, -52.93783950805664, -49.701377868652344, -46.46491241455078, -43.228450775146484, -39.99198532104492, -36.755523681640625, -33.51905822753906, -30.282596588134766, -27.04613494873047, -23.80967140197754, -20.57320785522461, -17.336746215820312, -14.100282669067383, -10.86382007598877, -7.627357006072998, -4.390893936157227, -1.1544313430786133, 2.08203125, 5.31849479675293, 8.554956436157227, 11.791419982910156, 15.02788257598877, 18.264345169067383, 21.500808715820312, 24.73727035522461, 27.97373390197754, 31.21019744873047, 34.446659088134766, 37.68312072753906, 40.91958236694336, 44.15604782104492, 47.39250946044922, 50.628971099853516, 53.86543273925781, 57.101898193359375, 60.33835983276367, 63.574825286865234, 66.81128692626953, 70.0477523803711, 73.28421020507812, 76.52067565917969, 79.75714111328125, 82.99360656738281, 86.23006439208984, 89.4665298461914, 92.70299530029297, 95.939453125, 99.17591857910156, 102.41238403320312, 105.64884185791016, 108.88530731201172, 112.12176513671875, 115.35823059082031]}, "gradients/decoder.transformer.h.23.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 6.0, 9.0, 6.0, 7.0, 9.0, 13.0, 19.0, 17.0, 24.0, 25.0, 36.0, 42.0, 51.0, 43.0, 53.0, 65.0, 66.0, 51.0, 72.0, 55.0, 71.0, 44.0, 34.0, 42.0, 34.0, 23.0, 25.0, 15.0, 14.0, 15.0, 7.0, 8.0, 4.0, 3.0, 1.0, 2.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.7109375, -3.58404541015625, -3.4571533203125, -3.33026123046875, -3.203369140625, -3.07647705078125, -2.9495849609375, -2.82269287109375, -2.69580078125, -2.56890869140625, -2.4420166015625, -2.31512451171875, -2.188232421875, -2.06134033203125, -1.9344482421875, -1.80755615234375, -1.6806640625, -1.55377197265625, -1.4268798828125, -1.29998779296875, -1.173095703125, -1.04620361328125, -0.9193115234375, -0.79241943359375, -0.66552734375, -0.53863525390625, -0.4117431640625, -0.28485107421875, -0.157958984375, -0.03106689453125, 0.0958251953125, 0.22271728515625, 0.349609375, 0.47650146484375, 0.6033935546875, 0.73028564453125, 0.857177734375, 0.98406982421875, 1.1109619140625, 1.23785400390625, 1.36474609375, 1.49163818359375, 1.6185302734375, 1.74542236328125, 1.872314453125, 1.99920654296875, 2.1260986328125, 2.25299072265625, 2.3798828125, 2.50677490234375, 2.6336669921875, 2.76055908203125, 2.887451171875, 3.01434326171875, 3.1412353515625, 3.26812744140625, 3.39501953125, 3.52191162109375, 3.6488037109375, 3.77569580078125, 3.902587890625, 4.02947998046875, 4.1563720703125, 4.28326416015625, 4.41015625]}, "gradients/decoder.transformer.h.23.crossattention.c_proj.weight": {"_type": "histogram", "values": [2.0, 3.0, 0.0, 3.0, 4.0, 3.0, 5.0, 8.0, 8.0, 16.0, 11.0, 29.0, 37.0, 54.0, 81.0, 129.0, 158.0, 253.0, 405.0, 673.0, 1098.0, 1748.0, 2845.0, 4700.0, 8040.0, 13996.0, 25167.0, 47208.0, 95975.0, 263209.0, 354384.0, 109975.0, 53013.0, 27855.0, 15320.0, 8896.0, 5170.0, 3084.0, 1871.0, 1125.0, 675.0, 469.0, 285.0, 193.0, 105.0, 72.0, 51.0, 44.0, 36.0, 18.0, 17.0, 15.0, 8.0, 5.0, 6.0, 4.0, 2.0, 3.0, 0.0, 1.0, 3.0, 1.0, 0.0, 2.0], "bins": [-0.1370849609375, -0.13252830505371094, -0.12797164916992188, -0.12341499328613281, -0.11885833740234375, -0.11430168151855469, -0.10974502563476562, -0.10518836975097656, -0.1006317138671875, -0.09607505798339844, -0.09151840209960938, -0.08696174621582031, -0.08240509033203125, -0.07784843444824219, -0.07329177856445312, -0.06873512268066406, -0.064178466796875, -0.05962181091308594, -0.055065155029296875, -0.05050849914550781, -0.04595184326171875, -0.04139518737792969, -0.036838531494140625, -0.03228187561035156, -0.0277252197265625, -0.023168563842773438, -0.018611907958984375, -0.014055252075195312, -0.00949859619140625, -0.0049419403076171875, -0.000385284423828125, 0.0041713714599609375, 0.00872802734375, 0.013284683227539062, 0.017841339111328125, 0.022397994995117188, 0.02695465087890625, 0.03151130676269531, 0.036067962646484375, 0.04062461853027344, 0.0451812744140625, 0.04973793029785156, 0.054294586181640625, 0.05885124206542969, 0.06340789794921875, 0.06796455383300781, 0.07252120971679688, 0.07707786560058594, 0.081634521484375, 0.08619117736816406, 0.09074783325195312, 0.09530448913574219, 0.09986114501953125, 0.10441780090332031, 0.10897445678710938, 0.11353111267089844, 0.1180877685546875, 0.12264442443847656, 0.12720108032226562, 0.1317577362060547, 0.13631439208984375, 0.1408710479736328, 0.14542770385742188, 0.14998435974121094, 0.154541015625]}, "gradients/decoder.transformer.h.23.crossattention.c_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 0.0, 3.0, 1.0, 2.0, 5.0, 2.0, 0.0, 3.0, 5.0, 9.0, 16.0, 8.0, 17.0, 18.0, 13.0, 8.0, 25.0, 21.0, 24.0, 22.0, 32.0, 31.0, 35.0, 33.0, 36.0, 37.0, 37.0, 40.0, 1060.0, 46.0, 31.0, 35.0, 38.0, 32.0, 42.0, 37.0, 24.0, 31.0, 28.0, 22.0, 21.0, 25.0, 19.0, 8.0, 18.0, 8.0, 4.0, 14.0, 4.0, 2.0, 4.0, 0.0, 3.0, 2.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0], "bins": [-2.298828125, -2.2283935546875, -2.157958984375, -2.0875244140625, -2.01708984375, -1.9466552734375, -1.876220703125, -1.8057861328125, -1.7353515625, -1.6649169921875, -1.594482421875, -1.5240478515625, -1.45361328125, -1.3831787109375, -1.312744140625, -1.2423095703125, -1.171875, -1.1014404296875, -1.031005859375, -0.9605712890625, -0.89013671875, -0.8197021484375, -0.749267578125, -0.6788330078125, -0.6083984375, -0.5379638671875, -0.467529296875, -0.3970947265625, -0.32666015625, -0.2562255859375, -0.185791015625, -0.1153564453125, -0.044921875, 0.0255126953125, 0.095947265625, 0.1663818359375, 0.23681640625, 0.3072509765625, 0.377685546875, 0.4481201171875, 0.5185546875, 0.5889892578125, 0.659423828125, 0.7298583984375, 0.80029296875, 0.8707275390625, 0.941162109375, 1.0115966796875, 1.08203125, 1.1524658203125, 1.222900390625, 1.2933349609375, 1.36376953125, 1.4342041015625, 1.504638671875, 1.5750732421875, 1.6455078125, 1.7159423828125, 1.786376953125, 1.8568115234375, 1.92724609375, 1.9976806640625, 2.068115234375, 2.1385498046875, 2.208984375]}, "gradients/decoder.transformer.h.23.crossattention.c_attn.weight": {"_type": "histogram", "values": [2.0, 1.0, 3.0, 7.0, 10.0, 9.0, 10.0, 23.0, 38.0, 47.0, 69.0, 87.0, 116.0, 151.0, 267.0, 376.0, 490.0, 663.0, 981.0, 1361.0, 1852.0, 2589.0, 3783.0, 5094.0, 7265.0, 10482.0, 14911.0, 21722.0, 32424.0, 48200.0, 74589.0, 120101.0, 1323083.0, 157530.0, 88706.0, 57355.0, 38054.0, 25645.0, 17701.0, 12089.0, 8568.0, 5876.0, 4134.0, 3036.0, 2163.0, 1548.0, 1212.0, 757.0, 554.0, 428.0, 279.0, 201.0, 144.0, 102.0, 81.0, 58.0, 47.0, 27.0, 14.0, 15.0, 9.0, 5.0, 5.0, 3.0], "bins": [-0.0552978515625, -0.05360698699951172, -0.05191612243652344, -0.050225257873535156, -0.048534393310546875, -0.046843528747558594, -0.04515266418457031, -0.04346179962158203, -0.04177093505859375, -0.04008007049560547, -0.03838920593261719, -0.036698341369628906, -0.035007476806640625, -0.033316612243652344, -0.03162574768066406, -0.02993488311767578, -0.0282440185546875, -0.02655315399169922, -0.024862289428710938, -0.023171424865722656, -0.021480560302734375, -0.019789695739746094, -0.018098831176757812, -0.01640796661376953, -0.01471710205078125, -0.013026237487792969, -0.011335372924804688, -0.009644508361816406, -0.007953643798828125, -0.006262779235839844, -0.0045719146728515625, -0.0028810501098632812, -0.001190185546875, 0.0005006790161132812, 0.0021915435791015625, 0.0038824081420898438, 0.005573272705078125, 0.007264137268066406, 0.008955001831054688, 0.010645866394042969, 0.01233673095703125, 0.014027595520019531, 0.015718460083007812, 0.017409324645996094, 0.019100189208984375, 0.020791053771972656, 0.022481918334960938, 0.02417278289794922, 0.0258636474609375, 0.02755451202392578, 0.029245376586914062, 0.030936241149902344, 0.032627105712890625, 0.034317970275878906, 0.03600883483886719, 0.03769969940185547, 0.03939056396484375, 0.04108142852783203, 0.04277229309082031, 0.044463157653808594, 0.046154022216796875, 0.047844886779785156, 0.04953575134277344, 0.05122661590576172, 0.05291748046875]}, "gradients/decoder.transformer.h.23.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 2.0, 4.0, 3.0, 5.0, 4.0, 8.0, 3.0, 7.0, 9.0, 15.0, 14.0, 18.0, 28.0, 45.0, 41.0, 55.0, 52.0, 88.0, 172.0, 108.0, 63.0, 48.0, 48.0, 45.0, 30.0, 20.0, 13.0, 15.0, 16.0, 9.0, 7.0, 6.0, 5.0, 3.0, 0.0, 3.0, 3.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.516674041748047e-06, -3.398396074771881e-06, -3.2801181077957153e-06, -3.1618401408195496e-06, -3.043562173843384e-06, -2.925284206867218e-06, -2.8070062398910522e-06, -2.6887282729148865e-06, -2.5704503059387207e-06, -2.452172338962555e-06, -2.333894371986389e-06, -2.2156164050102234e-06, -2.0973384380340576e-06, -1.979060471057892e-06, -1.860782504081726e-06, -1.7425045371055603e-06, -1.6242265701293945e-06, -1.5059486031532288e-06, -1.387670636177063e-06, -1.2693926692008972e-06, -1.1511147022247314e-06, -1.0328367352485657e-06, -9.145587682723999e-07, -7.962808012962341e-07, -6.780028343200684e-07, -5.597248673439026e-07, -4.414469003677368e-07, -3.2316893339157104e-07, -2.0489096641540527e-07, -8.66129994392395e-08, 3.166496753692627e-08, 1.4994293451309204e-07, 2.682209014892578e-07, 3.864988684654236e-07, 5.047768354415894e-07, 6.230548024177551e-07, 7.413327693939209e-07, 8.596107363700867e-07, 9.778887033462524e-07, 1.0961666703224182e-06, 1.214444637298584e-06, 1.3327226042747498e-06, 1.4510005712509155e-06, 1.5692785382270813e-06, 1.687556505203247e-06, 1.8058344721794128e-06, 1.9241124391555786e-06, 2.0423904061317444e-06, 2.16066837310791e-06, 2.278946340084076e-06, 2.3972243070602417e-06, 2.5155022740364075e-06, 2.6337802410125732e-06, 2.752058207988739e-06, 2.8703361749649048e-06, 2.9886141419410706e-06, 3.1068921089172363e-06, 3.225170075893402e-06, 3.343448042869568e-06, 3.4617260098457336e-06, 3.5800039768218994e-06, 3.698281943798065e-06, 3.816559910774231e-06, 3.934837877750397e-06, 4.0531158447265625e-06]}, "gradients/decoder.transformer.h.23.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 5.0, 2.0, 0.0, 4.0, 2.0, 5.0, 6.0, 8.0, 15.0, 31.0, 25.0, 30.0, 32.0, 70.0, 92.0, 120.0, 182.0, 292.0, 676.0, 1040854.0, 4883.0, 433.0, 263.0, 147.0, 101.0, 84.0, 60.0, 34.0, 35.0, 18.0, 11.0, 11.0, 7.0, 8.0, 8.0, 3.0, 4.0, 3.0, 1.0, 3.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-9.626150131225586e-05, -9.357277303934097e-05, -9.088404476642609e-05, -8.81953164935112e-05, -8.550658822059631e-05, -8.281785994768143e-05, -8.012913167476654e-05, -7.744040340185165e-05, -7.475167512893677e-05, -7.206294685602188e-05, -6.9374218583107e-05, -6.668549031019211e-05, -6.399676203727722e-05, -6.130803376436234e-05, -5.861930549144745e-05, -5.593057721853256e-05, -5.3241848945617676e-05, -5.055312067270279e-05, -4.78643923997879e-05, -4.5175664126873016e-05, -4.248693585395813e-05, -3.9798207581043243e-05, -3.710947930812836e-05, -3.442075103521347e-05, -3.1732022762298584e-05, -2.9043294489383698e-05, -2.635456621646881e-05, -2.3665837943553925e-05, -2.0977109670639038e-05, -1.828838139772415e-05, -1.5599653124809265e-05, -1.2910924851894379e-05, -1.0222196578979492e-05, -7.533468306064606e-06, -4.844740033149719e-06, -2.1560117602348328e-06, 5.327165126800537e-07, 3.22144478559494e-06, 5.910173058509827e-06, 8.598901331424713e-06, 1.12876296043396e-05, 1.3976357877254486e-05, 1.6665086150169373e-05, 1.935381442308426e-05, 2.2042542695999146e-05, 2.4731270968914032e-05, 2.741999924182892e-05, 3.0108727514743805e-05, 3.279745578765869e-05, 3.548618406057358e-05, 3.8174912333488464e-05, 4.086364060640335e-05, 4.355236887931824e-05, 4.6241097152233124e-05, 4.892982542514801e-05, 5.16185536980629e-05, 5.430728197097778e-05, 5.699601024389267e-05, 5.9684738516807556e-05, 6.237346678972244e-05, 6.506219506263733e-05, 6.775092333555222e-05, 7.04396516084671e-05, 7.312837988138199e-05, 7.581710815429688e-05]}, "gradients/decoder.transformer.h.23.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 341.0, 667.0, 7.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.257505027751904e-05, -1.2318921108089853e-05, -1.2062791938660666e-05, -1.1806661859736778e-05, -1.1550532690307591e-05, -1.1294403520878404e-05, -1.1038273441954516e-05, -1.0782144272525329e-05, -1.0526015103096142e-05, -1.0269885933666956e-05, -1.0013756764237769e-05, -9.75762668531388e-06, -9.501497515884694e-06, -9.245368346455507e-06, -8.989238267531618e-06, -8.733109098102432e-06, -8.476979928673245e-06, -8.220850759244058e-06, -7.964721589814872e-06, -7.708591510890983e-06, -7.452462341461796e-06, -7.19633317203261e-06, -6.940203547856072e-06, -6.684073923679534e-06, -6.427944754250348e-06, -6.171815584821161e-06, -5.915685960644623e-06, -5.659556336468086e-06, -5.403427167038899e-06, -5.147297997609712e-06, -4.8911683734331746e-06, -4.635038749256637e-06, -4.378910489322152e-06, -4.122781319892965e-06, -3.866651695716428e-06, -3.6105222989135655e-06, -3.3543929021107033e-06, -3.098263505307841e-06, -2.842134108504979e-06, -2.5860047117021168e-06, -2.3298753148992546e-06, -2.0737459180963924e-06, -1.8176165212935302e-06, -1.561487124490668e-06, -1.3053577276878059e-06, -1.0492283308849437e-06, -7.930989340820815e-07, -5.369695372792194e-07, -2.808401404763572e-07, -2.4710743673495017e-08, 2.3141865312936716e-07, 4.875480499322293e-07, 7.436774467350915e-07, 9.998068435379537e-07, 1.2559362403408159e-06, 1.512065637143678e-06, 1.7681950339465402e-06, 2.0243244307494024e-06, 2.2804538275522646e-06, 2.5365832243551267e-06, 2.792712621157989e-06, 3.048842017960851e-06, 3.3049714147637133e-06, 3.5611008115665754e-06, 3.817230208369438e-06]}, "gradients/decoder.transformer.h.23.ln_cross_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 2.0, 2.0, 4.0, 0.0, 3.0, 3.0, 7.0, 4.0, 4.0, 0.0, 5.0, 9.0, 14.0, 13.0, 17.0, 24.0, 0.0, 33.0, 27.0, 37.0, 30.0, 39.0, 0.0, 44.0, 56.0, 53.0, 46.0, 49.0, 0.0, 48.0, 47.0, 44.0, 64.0, 33.0, 40.0, 0.0, 33.0, 40.0, 27.0, 27.0, 16.0, 0.0, 18.0, 19.0, 7.0, 10.0, 7.0, 4.0, 0.0, 3.0, 4.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-1.5497207641601562e-06, -1.4994293451309204e-06, -1.4491379261016846e-06, -1.3988465070724487e-06, -1.3485550880432129e-06, -1.298263669013977e-06, -1.2479722499847412e-06, -1.1976808309555054e-06, -1.1473894119262695e-06, -1.0970979928970337e-06, -1.0468065738677979e-06, -9.96515154838562e-07, -9.462237358093262e-07, -8.959323167800903e-07, -8.456408977508545e-07, -7.953494787216187e-07, -7.450580596923828e-07, -6.94766640663147e-07, -6.444752216339111e-07, -5.941838026046753e-07, -5.438923835754395e-07, -4.936009645462036e-07, -4.4330954551696777e-07, -3.9301812648773193e-07, -3.427267074584961e-07, -2.9243528842926025e-07, -2.421438694000244e-07, -1.9185245037078857e-07, -1.4156103134155273e-07, -9.12696123123169e-08, -4.0978193283081055e-08, 9.313225746154785e-09, 5.960464477539063e-08, 1.0989606380462646e-07, 1.601874828338623e-07, 2.1047890186309814e-07, 2.60770320892334e-07, 3.110617399215698e-07, 3.6135315895080566e-07, 4.116445779800415e-07, 4.6193599700927734e-07, 5.122274160385132e-07, 5.62518835067749e-07, 6.128102540969849e-07, 6.631016731262207e-07, 7.133930921554565e-07, 7.636845111846924e-07, 8.139759302139282e-07, 8.642673492431641e-07, 9.145587682723999e-07, 9.648501873016357e-07, 1.0151416063308716e-06, 1.0654330253601074e-06, 1.1157244443893433e-06, 1.166015863418579e-06, 1.216307282447815e-06, 1.2665987014770508e-06, 1.3168901205062866e-06, 1.3671815395355225e-06, 1.4174729585647583e-06, 1.4677643775939941e-06, 1.51805579662323e-06, 1.5683472156524658e-06, 1.6186386346817017e-06, 1.6689300537109375e-06]}, "gradients/decoder.transformer.h.23.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 6.0, 9.0, 6.0, 7.0, 9.0, 13.0, 19.0, 17.0, 24.0, 25.0, 36.0, 42.0, 51.0, 43.0, 53.0, 65.0, 66.0, 51.0, 72.0, 55.0, 71.0, 44.0, 34.0, 42.0, 34.0, 23.0, 25.0, 15.0, 14.0, 15.0, 7.0, 8.0, 4.0, 3.0, 1.0, 2.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.7109375, -3.58404541015625, -3.4571533203125, -3.33026123046875, -3.203369140625, -3.07647705078125, -2.9495849609375, -2.82269287109375, -2.69580078125, -2.56890869140625, -2.4420166015625, -2.31512451171875, -2.188232421875, -2.06134033203125, -1.9344482421875, -1.80755615234375, -1.6806640625, -1.55377197265625, -1.4268798828125, -1.29998779296875, -1.173095703125, -1.04620361328125, -0.9193115234375, -0.79241943359375, -0.66552734375, -0.53863525390625, -0.4117431640625, -0.28485107421875, -0.157958984375, -0.03106689453125, 0.0958251953125, 0.22271728515625, 0.349609375, 0.47650146484375, 0.6033935546875, 0.73028564453125, 0.857177734375, 0.98406982421875, 1.1109619140625, 1.23785400390625, 1.36474609375, 1.49163818359375, 1.6185302734375, 1.74542236328125, 1.872314453125, 1.99920654296875, 2.1260986328125, 2.25299072265625, 2.3798828125, 2.50677490234375, 2.6336669921875, 2.76055908203125, 2.887451171875, 3.01434326171875, 3.1412353515625, 3.26812744140625, 3.39501953125, 3.52191162109375, 3.6488037109375, 3.77569580078125, 3.902587890625, 4.02947998046875, 4.1563720703125, 4.28326416015625, 4.41015625]}, "gradients/decoder.transformer.h.23.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 4.0, 2.0, 0.0, 0.0, 0.0, 5.0, 1.0, 9.0, 8.0, 9.0, 10.0, 18.0, 20.0, 46.0, 52.0, 156.0, 238.0, 356.0, 612.0, 1132.0, 1836.0, 3800.0, 9520.0, 48484.0, 905954.0, 57308.0, 10287.0, 4032.0, 1925.0, 1134.0, 627.0, 380.0, 254.0, 137.0, 61.0, 47.0, 30.0, 20.0, 14.0, 6.0, 11.0, 5.0, 3.0, 7.0, 2.0, 1.0, 1.0, 0.0, 2.0, 0.0, 1.0, 2.0, 1.0, 1.0], "bins": [-70.3125, -68.2763671875, -66.240234375, -64.2041015625, -62.16796875, -60.1318359375, -58.095703125, -56.0595703125, -54.0234375, -51.9873046875, -49.951171875, -47.9150390625, -45.87890625, -43.8427734375, -41.806640625, -39.7705078125, -37.734375, -35.6982421875, -33.662109375, -31.6259765625, -29.58984375, -27.5537109375, -25.517578125, -23.4814453125, -21.4453125, -19.4091796875, -17.373046875, -15.3369140625, -13.30078125, -11.2646484375, -9.228515625, -7.1923828125, -5.15625, -3.1201171875, -1.083984375, 0.9521484375, 2.98828125, 5.0244140625, 7.060546875, 9.0966796875, 11.1328125, 13.1689453125, 15.205078125, 17.2412109375, 19.27734375, 21.3134765625, 23.349609375, 25.3857421875, 27.421875, 29.4580078125, 31.494140625, 33.5302734375, 35.56640625, 37.6025390625, 39.638671875, 41.6748046875, 43.7109375, 45.7470703125, 47.783203125, 49.8193359375, 51.85546875, 53.8916015625, 55.927734375, 57.9638671875, 60.0]}, "gradients/decoder.transformer.h.23.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 5.0, 1.0, 1.0, 0.0, 2.0, 0.0, 1.0, 5.0, 5.0, 6.0, 6.0, 8.0, 5.0, 7.0, 14.0, 22.0, 20.0, 25.0, 29.0, 35.0, 29.0, 34.0, 56.0, 34.0, 45.0, 58.0, 86.0, 1752.0, 225.0, 100.0, 54.0, 52.0, 43.0, 38.0, 39.0, 36.0, 30.0, 30.0, 28.0, 21.0, 17.0, 12.0, 13.0, 10.0, 10.0, 7.0, 3.0, 2.0, 0.0, 2.0, 2.0, 2.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-11.4375, -11.09033203125, -10.7431640625, -10.39599609375, -10.048828125, -9.70166015625, -9.3544921875, -9.00732421875, -8.66015625, -8.31298828125, -7.9658203125, -7.61865234375, -7.271484375, -6.92431640625, -6.5771484375, -6.22998046875, -5.8828125, -5.53564453125, -5.1884765625, -4.84130859375, -4.494140625, -4.14697265625, -3.7998046875, -3.45263671875, -3.10546875, -2.75830078125, -2.4111328125, -2.06396484375, -1.716796875, -1.36962890625, -1.0224609375, -0.67529296875, -0.328125, 0.01904296875, 0.3662109375, 0.71337890625, 1.060546875, 1.40771484375, 1.7548828125, 2.10205078125, 2.44921875, 2.79638671875, 3.1435546875, 3.49072265625, 3.837890625, 4.18505859375, 4.5322265625, 4.87939453125, 5.2265625, 5.57373046875, 5.9208984375, 6.26806640625, 6.615234375, 6.96240234375, 7.3095703125, 7.65673828125, 8.00390625, 8.35107421875, 8.6982421875, 9.04541015625, 9.392578125, 9.73974609375, 10.0869140625, 10.43408203125, 10.78125]}, "gradients/decoder.transformer.h.23.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 8.0, 5.0, 3.0, 11.0, 9.0, 12.0, 8.0, 18.0, 23.0, 27.0, 33.0, 33.0, 42.0, 46.0, 51.0, 83.0, 150.0, 283.0, 837.0, 4522.0, 178380.0, 2945190.0, 13572.0, 1310.0, 384.0, 175.0, 93.0, 91.0, 70.0, 48.0, 33.0, 30.0, 21.0, 23.0, 18.0, 18.0, 8.0, 12.0, 10.0, 1.0, 2.0, 8.0, 1.0, 4.0, 3.0, 1.0, 3.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0, 4.0], "bins": [-37.21875, -35.986328125, -34.75390625, -33.521484375, -32.2890625, -31.056640625, -29.82421875, -28.591796875, -27.359375, -26.126953125, -24.89453125, -23.662109375, -22.4296875, -21.197265625, -19.96484375, -18.732421875, -17.5, -16.267578125, -15.03515625, -13.802734375, -12.5703125, -11.337890625, -10.10546875, -8.873046875, -7.640625, -6.408203125, -5.17578125, -3.943359375, -2.7109375, -1.478515625, -0.24609375, 0.986328125, 2.21875, 3.451171875, 4.68359375, 5.916015625, 7.1484375, 8.380859375, 9.61328125, 10.845703125, 12.078125, 13.310546875, 14.54296875, 15.775390625, 17.0078125, 18.240234375, 19.47265625, 20.705078125, 21.9375, 23.169921875, 24.40234375, 25.634765625, 26.8671875, 28.099609375, 29.33203125, 30.564453125, 31.796875, 33.029296875, 34.26171875, 35.494140625, 36.7265625, 37.958984375, 39.19140625, 40.423828125, 41.65625]}, "gradients/decoder.transformer.h.23.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 948.0, 66.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-366.40008544921875, -353.9674377441406, -341.5348205566406, -329.1021728515625, -316.6695556640625, -304.2369079589844, -291.80426025390625, -279.37164306640625, -266.93902587890625, -254.5063934326172, -242.07376098632812, -229.64111328125, -217.20849609375, -204.77584838867188, -192.3432159423828, -179.91058349609375, -167.47793579101562, -155.04530334472656, -142.6126708984375, -130.18002319335938, -117.74739837646484, -105.31476593017578, -92.88212585449219, -80.44949340820312, -68.01686096191406, -55.584228515625, -43.15159225463867, -30.718955993652344, -18.28632354736328, -5.853691101074219, 6.578948974609375, 19.011581420898438, 31.4442138671875, 43.87684631347656, 56.30948257446289, 68.74211883544922, 81.17475128173828, 93.60738372802734, 106.04002380371094, 118.47265625, 130.90528869628906, 143.33792114257812, 155.7705535888672, 168.20318603515625, 180.63583374023438, 193.06845092773438, 205.5010986328125, 217.93373107910156, 230.36636352539062, 242.7989959716797, 255.23162841796875, 267.6642761230469, 280.0968933105469, 292.529541015625, 304.962158203125, 317.3948059082031, 329.82745361328125, 342.2601013183594, 354.6927185058594, 367.1253662109375, 379.5579833984375, 391.9906311035156, 404.42327880859375, 416.85589599609375, 429.28851318359375]}, "gradients/decoder.transformer.h.23.ln_1.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 2.0, 2.0, 0.0, 3.0, 0.0, 4.0, 4.0, 2.0, 2.0, 9.0, 8.0, 12.0, 29.0, 16.0, 21.0, 24.0, 29.0, 33.0, 42.0, 32.0, 51.0, 38.0, 28.0, 39.0, 45.0, 47.0, 40.0, 39.0, 44.0, 50.0, 43.0, 37.0, 35.0, 29.0, 32.0, 23.0, 24.0, 12.0, 25.0, 7.0, 13.0, 6.0, 10.0, 9.0, 4.0, 4.0, 2.0, 3.0, 1.0, 0.0, 0.0, 0.0, 3.0, 3.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-55.02482986450195, -53.43215560913086, -51.839481353759766, -50.24680709838867, -48.65413284301758, -47.061458587646484, -45.468780517578125, -43.87610626220703, -42.28343200683594, -40.690757751464844, -39.09808349609375, -37.505409240722656, -35.91273498535156, -34.32006072998047, -32.727386474609375, -31.13471031188965, -29.542037963867188, -27.949363708496094, -26.356689453125, -24.764015197753906, -23.171340942382812, -21.57866668701172, -19.985990524291992, -18.3933162689209, -16.800642013549805, -15.207967758178711, -13.615293502807617, -12.022618293762207, -10.429944038391113, -8.83726978302002, -7.244594573974609, -5.651920318603516, -4.059242248535156, -2.4665677547454834, -0.8738932609558105, 0.7187814712524414, 2.311455726623535, 3.904129981994629, 5.496805191040039, 7.089479446411133, 8.682153701782227, 10.27482795715332, 11.867502212524414, 13.460177421569824, 15.052851676940918, 16.645526885986328, 18.238201141357422, 19.830875396728516, 21.42354965209961, 23.016223907470703, 24.608898162841797, 26.20157241821289, 27.794246673583984, 29.386920928955078, 30.979597091674805, 32.57227325439453, 34.164947509765625, 35.75762176513672, 37.35029602050781, 38.942970275878906, 40.53564453125, 42.128318786621094, 43.72099304199219, 45.31366729736328, 46.906341552734375]}, "gradients/decoder.transformer.h.22.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 2.0, 7.0, 6.0, 6.0, 6.0, 7.0, 12.0, 14.0, 16.0, 22.0, 20.0, 31.0, 36.0, 48.0, 44.0, 49.0, 35.0, 62.0, 59.0, 61.0, 49.0, 57.0, 62.0, 51.0, 38.0, 37.0, 37.0, 29.0, 19.0, 23.0, 16.0, 13.0, 13.0, 9.0, 5.0, 3.0, 2.0, 3.0, 3.0, 1.0, 1.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.478515625, -3.355926513671875, -3.23333740234375, -3.110748291015625, -2.9881591796875, -2.865570068359375, -2.74298095703125, -2.620391845703125, -2.497802734375, -2.375213623046875, -2.25262451171875, -2.130035400390625, -2.0074462890625, -1.884857177734375, -1.76226806640625, -1.639678955078125, -1.51708984375, -1.394500732421875, -1.27191162109375, -1.149322509765625, -1.0267333984375, -0.904144287109375, -0.78155517578125, -0.658966064453125, -0.536376953125, -0.413787841796875, -0.29119873046875, -0.168609619140625, -0.0460205078125, 0.076568603515625, 0.19915771484375, 0.321746826171875, 0.4443359375, 0.566925048828125, 0.68951416015625, 0.812103271484375, 0.9346923828125, 1.057281494140625, 1.17987060546875, 1.302459716796875, 1.425048828125, 1.547637939453125, 1.67022705078125, 1.792816162109375, 1.9154052734375, 2.037994384765625, 2.16058349609375, 2.283172607421875, 2.40576171875, 2.528350830078125, 2.65093994140625, 2.773529052734375, 2.8961181640625, 3.018707275390625, 3.14129638671875, 3.263885498046875, 3.386474609375, 3.509063720703125, 3.63165283203125, 3.754241943359375, 3.8768310546875, 3.999420166015625, 4.12200927734375, 4.244598388671875, 4.3671875]}, "gradients/decoder.transformer.h.22.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 2.0, 2.0, 10.0, 6.0, 9.0, 13.0, 16.0, 32.0, 38.0, 57.0, 102.0, 136.0, 216.0, 372.0, 584.0, 1180.0, 3568.0, 22803.0, 4065976.0, 89175.0, 6243.0, 1740.0, 822.0, 421.0, 264.0, 174.0, 102.0, 68.0, 49.0, 26.0, 27.0, 17.0, 12.0, 11.0, 6.0, 3.0, 4.0, 4.0, 3.0, 1.0, 0.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-38.0625, -36.52099609375, -34.9794921875, -33.43798828125, -31.896484375, -30.35498046875, -28.8134765625, -27.27197265625, -25.73046875, -24.18896484375, -22.6474609375, -21.10595703125, -19.564453125, -18.02294921875, -16.4814453125, -14.93994140625, -13.3984375, -11.85693359375, -10.3154296875, -8.77392578125, -7.232421875, -5.69091796875, -4.1494140625, -2.60791015625, -1.06640625, 0.47509765625, 2.0166015625, 3.55810546875, 5.099609375, 6.64111328125, 8.1826171875, 9.72412109375, 11.265625, 12.80712890625, 14.3486328125, 15.89013671875, 17.431640625, 18.97314453125, 20.5146484375, 22.05615234375, 23.59765625, 25.13916015625, 26.6806640625, 28.22216796875, 29.763671875, 31.30517578125, 32.8466796875, 34.38818359375, 35.9296875, 37.47119140625, 39.0126953125, 40.55419921875, 42.095703125, 43.63720703125, 45.1787109375, 46.72021484375, 48.26171875, 49.80322265625, 51.3447265625, 52.88623046875, 54.427734375, 55.96923828125, 57.5107421875, 59.05224609375, 60.59375]}, "gradients/decoder.transformer.h.22.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 4.0, 2.0, 2.0, 4.0, 2.0, 1.0, 1.0, 4.0, 10.0, 14.0, 17.0, 27.0, 48.0, 59.0, 97.0, 167.0, 254.0, 541.0, 1225.0, 812.0, 337.0, 143.0, 91.0, 62.0, 51.0, 19.0, 28.0, 16.0, 10.0, 11.0, 7.0, 5.0, 3.0, 7.0, 3.0, 2.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-17.921875, -17.4591064453125, -16.996337890625, -16.5335693359375, -16.07080078125, -15.6080322265625, -15.145263671875, -14.6824951171875, -14.2197265625, -13.7569580078125, -13.294189453125, -12.8314208984375, -12.36865234375, -11.9058837890625, -11.443115234375, -10.9803466796875, -10.517578125, -10.0548095703125, -9.592041015625, -9.1292724609375, -8.66650390625, -8.2037353515625, -7.740966796875, -7.2781982421875, -6.8154296875, -6.3526611328125, -5.889892578125, -5.4271240234375, -4.96435546875, -4.5015869140625, -4.038818359375, -3.5760498046875, -3.11328125, -2.6505126953125, -2.187744140625, -1.7249755859375, -1.26220703125, -0.7994384765625, -0.336669921875, 0.1260986328125, 0.5888671875, 1.0516357421875, 1.514404296875, 1.9771728515625, 2.43994140625, 2.9027099609375, 3.365478515625, 3.8282470703125, 4.291015625, 4.7537841796875, 5.216552734375, 5.6793212890625, 6.14208984375, 6.6048583984375, 7.067626953125, 7.5303955078125, 7.9931640625, 8.4559326171875, 8.918701171875, 9.3814697265625, 9.84423828125, 10.3070068359375, 10.769775390625, 11.2325439453125, 11.6953125]}, "gradients/decoder.transformer.h.22.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 2.0, 2.0, 1.0, 4.0, 6.0, 16.0, 21.0, 37.0, 73.0, 142.0, 271.0, 551.0, 1384.0, 4537.0, 28832.0, 1054589.0, 3052914.0, 42491.0, 5647.0, 1512.0, 556.0, 302.0, 148.0, 100.0, 44.0, 23.0, 28.0, 18.0, 8.0, 10.0, 5.0, 6.0, 7.0, 3.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-24.84375, -23.76708984375, -22.6904296875, -21.61376953125, -20.537109375, -19.46044921875, -18.3837890625, -17.30712890625, -16.23046875, -15.15380859375, -14.0771484375, -13.00048828125, -11.923828125, -10.84716796875, -9.7705078125, -8.69384765625, -7.6171875, -6.54052734375, -5.4638671875, -4.38720703125, -3.310546875, -2.23388671875, -1.1572265625, -0.08056640625, 0.99609375, 2.07275390625, 3.1494140625, 4.22607421875, 5.302734375, 6.37939453125, 7.4560546875, 8.53271484375, 9.609375, 10.68603515625, 11.7626953125, 12.83935546875, 13.916015625, 14.99267578125, 16.0693359375, 17.14599609375, 18.22265625, 19.29931640625, 20.3759765625, 21.45263671875, 22.529296875, 23.60595703125, 24.6826171875, 25.75927734375, 26.8359375, 27.91259765625, 28.9892578125, 30.06591796875, 31.142578125, 32.21923828125, 33.2958984375, 34.37255859375, 35.44921875, 36.52587890625, 37.6025390625, 38.67919921875, 39.755859375, 40.83251953125, 41.9091796875, 42.98583984375, 44.0625]}, "gradients/decoder.transformer.h.22.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 18.0, 420.0, 547.0, 27.0, 5.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-326.48529052734375, -317.1799011230469, -307.87451171875, -298.569091796875, -289.2637023925781, -279.95831298828125, -270.65289306640625, -261.3475036621094, -252.0421142578125, -242.73672485351562, -233.4313201904297, -224.12591552734375, -214.82052612304688, -205.51513671875, -196.20973205566406, -186.90432739257812, -177.59893798828125, -168.29354858398438, -158.98814392089844, -149.6827392578125, -140.37734985351562, -131.07196044921875, -121.76655578613281, -112.4611587524414, -103.15576171875, -93.8503646850586, -84.54496765136719, -75.23957061767578, -65.93417358398438, -56.62877655029297, -47.32337951660156, -38.017982482910156, -28.712615966796875, -19.40721893310547, -10.101821899414062, -0.7964248657226562, 8.50897216796875, 17.814369201660156, 27.119766235351562, 36.42516326904297, 45.730560302734375, 55.03595733642578, 64.34135437011719, 73.6467514038086, 82.9521484375, 92.2575454711914, 101.56294250488281, 110.86833953857422, 120.17373657226562, 129.4791259765625, 138.78453063964844, 148.08993530273438, 157.39532470703125, 166.70071411132812, 176.00611877441406, 185.3115234375, 194.61691284179688, 203.92230224609375, 213.2277069091797, 222.53311157226562, 231.8385009765625, 241.14389038085938, 250.4492950439453, 259.75469970703125, 269.0600891113281]}, "gradients/decoder.transformer.h.22.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 4.0, 3.0, 10.0, 5.0, 7.0, 6.0, 10.0, 12.0, 14.0, 13.0, 24.0, 21.0, 28.0, 36.0, 41.0, 49.0, 37.0, 56.0, 51.0, 56.0, 50.0, 50.0, 54.0, 46.0, 39.0, 45.0, 44.0, 38.0, 40.0, 25.0, 21.0, 17.0, 16.0, 12.0, 4.0, 8.0, 6.0, 7.0, 4.0, 5.0, 0.0, 2.0, 1.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-40.82464599609375, -39.291019439697266, -37.757389068603516, -36.22376251220703, -34.69013214111328, -33.1565055847168, -31.622879028320312, -30.089250564575195, -28.555622100830078, -27.02199363708496, -25.488365173339844, -23.95473861694336, -22.421110153198242, -20.887481689453125, -19.35385513305664, -17.820226669311523, -16.286598205566406, -14.752969741821289, -13.219342231750488, -11.685714721679688, -10.15208625793457, -8.618457794189453, -7.084830284118652, -5.551202774047852, -4.017574310302734, -2.4839463233947754, -0.9503183364868164, 0.5833096504211426, 2.1169376373291016, 3.6505656242370605, 5.1841936111450195, 6.71782112121582, 8.251449584960938, 9.785078048706055, 11.318705558776855, 12.852333068847656, 14.385961532592773, 15.91958999633789, 17.453216552734375, 18.986845016479492, 20.52047348022461, 22.054101943969727, 23.587730407714844, 25.121356964111328, 26.654985427856445, 28.188613891601562, 29.722240447998047, 31.255868911743164, 32.78949737548828, 34.323123931884766, 35.856754302978516, 37.390380859375, 38.92401123046875, 40.457637786865234, 41.99126434326172, 43.52489471435547, 45.05852127075195, 46.59214782714844, 48.12577819824219, 49.65940475463867, 51.193031311035156, 52.726661682128906, 54.26028823852539, 55.793914794921875, 57.327545166015625]}, "gradients/decoder.transformer.h.22.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 2.0, 1.0, 1.0, 0.0, 2.0, 3.0, 4.0, 3.0, 3.0, 4.0, 8.0, 8.0, 13.0, 18.0, 18.0, 12.0, 21.0, 19.0, 32.0, 34.0, 38.0, 35.0, 40.0, 43.0, 33.0, 54.0, 55.0, 45.0, 45.0, 49.0, 48.0, 40.0, 44.0, 39.0, 43.0, 32.0, 21.0, 16.0, 15.0, 18.0, 13.0, 11.0, 6.0, 10.0, 7.0, 3.0, 2.0, 2.0, 2.0, 1.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-3.08984375, -2.97979736328125, -2.8697509765625, -2.75970458984375, -2.649658203125, -2.53961181640625, -2.4295654296875, -2.31951904296875, -2.20947265625, -2.09942626953125, -1.9893798828125, -1.87933349609375, -1.769287109375, -1.65924072265625, -1.5491943359375, -1.43914794921875, -1.3291015625, -1.21905517578125, -1.1090087890625, -0.99896240234375, -0.888916015625, -0.77886962890625, -0.6688232421875, -0.55877685546875, -0.44873046875, -0.33868408203125, -0.2286376953125, -0.11859130859375, -0.008544921875, 0.10150146484375, 0.2115478515625, 0.32159423828125, 0.431640625, 0.54168701171875, 0.6517333984375, 0.76177978515625, 0.871826171875, 0.98187255859375, 1.0919189453125, 1.20196533203125, 1.31201171875, 1.42205810546875, 1.5321044921875, 1.64215087890625, 1.752197265625, 1.86224365234375, 1.9722900390625, 2.08233642578125, 2.1923828125, 2.30242919921875, 2.4124755859375, 2.52252197265625, 2.632568359375, 2.74261474609375, 2.8526611328125, 2.96270751953125, 3.07275390625, 3.18280029296875, 3.2928466796875, 3.40289306640625, 3.512939453125, 3.62298583984375, 3.7330322265625, 3.84307861328125, 3.953125]}, "gradients/decoder.transformer.h.22.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 4.0, 2.0, 6.0, 11.0, 14.0, 14.0, 31.0, 36.0, 61.0, 79.0, 103.0, 176.0, 241.0, 344.0, 543.0, 839.0, 1297.0, 1963.0, 3025.0, 4617.0, 7575.0, 12104.0, 19853.0, 34267.0, 61350.0, 118942.0, 343566.0, 217121.0, 95263.0, 51158.0, 28749.0, 16820.0, 10393.0, 6403.0, 3970.0, 2608.0, 1685.0, 1078.0, 752.0, 487.0, 311.0, 226.0, 153.0, 97.0, 78.0, 45.0, 39.0, 16.0, 11.0, 15.0, 10.0, 4.0, 8.0, 4.0, 1.0, 1.0, 0.0, 2.0], "bins": [-0.1358642578125, -0.1317272186279297, -0.12759017944335938, -0.12345314025878906, -0.11931610107421875, -0.11517906188964844, -0.11104202270507812, -0.10690498352050781, -0.1027679443359375, -0.09863090515136719, -0.09449386596679688, -0.09035682678222656, -0.08621978759765625, -0.08208274841308594, -0.07794570922851562, -0.07380867004394531, -0.069671630859375, -0.06553459167480469, -0.061397552490234375, -0.05726051330566406, -0.05312347412109375, -0.04898643493652344, -0.044849395751953125, -0.04071235656738281, -0.0365753173828125, -0.03243827819824219, -0.028301239013671875, -0.024164199829101562, -0.02002716064453125, -0.015890121459960938, -0.011753082275390625, -0.0076160430908203125, -0.00347900390625, 0.0006580352783203125, 0.004795074462890625, 0.008932113647460938, 0.01306915283203125, 0.017206192016601562, 0.021343231201171875, 0.025480270385742188, 0.0296173095703125, 0.03375434875488281, 0.037891387939453125, 0.04202842712402344, 0.04616546630859375, 0.05030250549316406, 0.054439544677734375, 0.05857658386230469, 0.062713623046875, 0.06685066223144531, 0.07098770141601562, 0.07512474060058594, 0.07926177978515625, 0.08339881896972656, 0.08753585815429688, 0.09167289733886719, 0.0958099365234375, 0.09994697570800781, 0.10408401489257812, 0.10822105407714844, 0.11235809326171875, 0.11649513244628906, 0.12063217163085938, 0.12476921081542969, 0.12890625]}, "gradients/decoder.transformer.h.22.crossattention.c_attn.bias": {"_type": "histogram", "values": [4.0, 2.0, 4.0, 1.0, 3.0, 4.0, 4.0, 6.0, 4.0, 3.0, 14.0, 5.0, 6.0, 13.0, 8.0, 21.0, 14.0, 19.0, 23.0, 17.0, 28.0, 39.0, 28.0, 28.0, 28.0, 38.0, 26.0, 34.0, 34.0, 38.0, 1048.0, 41.0, 38.0, 33.0, 29.0, 26.0, 31.0, 35.0, 22.0, 27.0, 29.0, 19.0, 17.0, 19.0, 20.0, 18.0, 14.0, 20.0, 10.0, 13.0, 6.0, 4.0, 9.0, 3.0, 4.0, 4.0, 1.0, 6.0, 1.0, 2.0, 1.0, 1.0, 0.0, 1.0], "bins": [-2.0625, -1.993927001953125, -1.92535400390625, -1.856781005859375, -1.7882080078125, -1.719635009765625, -1.65106201171875, -1.582489013671875, -1.513916015625, -1.445343017578125, -1.37677001953125, -1.308197021484375, -1.2396240234375, -1.171051025390625, -1.10247802734375, -1.033905029296875, -0.96533203125, -0.896759033203125, -0.82818603515625, -0.759613037109375, -0.6910400390625, -0.622467041015625, -0.55389404296875, -0.485321044921875, -0.416748046875, -0.348175048828125, -0.27960205078125, -0.211029052734375, -0.1424560546875, -0.073883056640625, -0.00531005859375, 0.063262939453125, 0.1318359375, 0.200408935546875, 0.26898193359375, 0.337554931640625, 0.4061279296875, 0.474700927734375, 0.54327392578125, 0.611846923828125, 0.680419921875, 0.748992919921875, 0.81756591796875, 0.886138916015625, 0.9547119140625, 1.023284912109375, 1.09185791015625, 1.160430908203125, 1.22900390625, 1.297576904296875, 1.36614990234375, 1.434722900390625, 1.5032958984375, 1.571868896484375, 1.64044189453125, 1.709014892578125, 1.777587890625, 1.846160888671875, 1.91473388671875, 1.983306884765625, 2.0518798828125, 2.120452880859375, 2.18902587890625, 2.257598876953125, 2.326171875]}, "gradients/decoder.transformer.h.22.crossattention.c_attn.weight": {"_type": "histogram", "values": [3.0, 2.0, 7.0, 12.0, 22.0, 42.0, 56.0, 80.0, 93.0, 147.0, 206.0, 282.0, 379.0, 500.0, 712.0, 879.0, 1249.0, 1651.0, 2329.0, 3082.0, 4050.0, 5636.0, 7709.0, 10740.0, 14969.0, 21421.0, 30325.0, 43943.0, 64290.0, 98096.0, 200043.0, 1265755.0, 101392.0, 65191.0, 44232.0, 30407.0, 21446.0, 15308.0, 10912.0, 7884.0, 5778.0, 4211.0, 3055.0, 2296.0, 1605.0, 1235.0, 931.0, 701.0, 489.0, 404.0, 298.0, 192.0, 154.0, 107.0, 76.0, 54.0, 37.0, 24.0, 10.0, 6.0, 4.0, 1.0, 1.0, 1.0], "bins": [-0.052734375, -0.05103492736816406, -0.049335479736328125, -0.04763603210449219, -0.04593658447265625, -0.04423713684082031, -0.042537689208984375, -0.04083824157714844, -0.0391387939453125, -0.03743934631347656, -0.035739898681640625, -0.03404045104980469, -0.03234100341796875, -0.030641555786132812, -0.028942108154296875, -0.027242660522460938, -0.025543212890625, -0.023843765258789062, -0.022144317626953125, -0.020444869995117188, -0.01874542236328125, -0.017045974731445312, -0.015346527099609375, -0.013647079467773438, -0.0119476318359375, -0.010248184204101562, -0.008548736572265625, -0.0068492889404296875, -0.00514984130859375, -0.0034503936767578125, -0.001750946044921875, -5.14984130859375e-05, 0.00164794921875, 0.0033473968505859375, 0.005046844482421875, 0.0067462921142578125, 0.00844573974609375, 0.010145187377929688, 0.011844635009765625, 0.013544082641601562, 0.0152435302734375, 0.016942977905273438, 0.018642425537109375, 0.020341873168945312, 0.02204132080078125, 0.023740768432617188, 0.025440216064453125, 0.027139663696289062, 0.028839111328125, 0.030538558959960938, 0.032238006591796875, 0.03393745422363281, 0.03563690185546875, 0.03733634948730469, 0.039035797119140625, 0.04073524475097656, 0.0424346923828125, 0.04413414001464844, 0.045833587646484375, 0.04753303527832031, 0.04923248291015625, 0.05093193054199219, 0.052631378173828125, 0.05433082580566406, 0.0560302734375]}, "gradients/decoder.transformer.h.22.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 3.0, 0.0, 1.0, 0.0, 1.0, 6.0, 4.0, 7.0, 3.0, 5.0, 9.0, 2.0, 5.0, 4.0, 11.0, 9.0, 13.0, 18.0, 23.0, 30.0, 27.0, 23.0, 56.0, 67.0, 95.0, 160.0, 86.0, 37.0, 77.0, 42.0, 40.0, 29.0, 24.0, 23.0, 9.0, 11.0, 13.0, 8.0, 7.0, 6.0, 1.0, 3.0, 6.0, 5.0, 4.0, 4.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.2186508178710938e-06, -3.1087547540664673e-06, -2.998858690261841e-06, -2.8889626264572144e-06, -2.779066562652588e-06, -2.6691704988479614e-06, -2.559274435043335e-06, -2.4493783712387085e-06, -2.339482307434082e-06, -2.2295862436294556e-06, -2.119690179824829e-06, -2.0097941160202026e-06, -1.8998980522155762e-06, -1.7900019884109497e-06, -1.6801059246063232e-06, -1.5702098608016968e-06, -1.4603137969970703e-06, -1.3504177331924438e-06, -1.2405216693878174e-06, -1.130625605583191e-06, -1.0207295417785645e-06, -9.10833477973938e-07, -8.009374141693115e-07, -6.910413503646851e-07, -5.811452865600586e-07, -4.7124922275543213e-07, -3.6135315895080566e-07, -2.514570951461792e-07, -1.4156103134155273e-07, -3.166496753692627e-08, 7.82310962677002e-08, 1.8812716007232666e-07, 2.980232238769531e-07, 4.079192876815796e-07, 5.178153514862061e-07, 6.277114152908325e-07, 7.37607479095459e-07, 8.475035429000854e-07, 9.57399606704712e-07, 1.0672956705093384e-06, 1.1771917343139648e-06, 1.2870877981185913e-06, 1.3969838619232178e-06, 1.5068799257278442e-06, 1.6167759895324707e-06, 1.7266720533370972e-06, 1.8365681171417236e-06, 1.94646418094635e-06, 2.0563602447509766e-06, 2.166256308555603e-06, 2.2761523723602295e-06, 2.386048436164856e-06, 2.4959444999694824e-06, 2.605840563774109e-06, 2.7157366275787354e-06, 2.825632691383362e-06, 2.9355287551879883e-06, 3.0454248189926147e-06, 3.155320882797241e-06, 3.2652169466018677e-06, 3.375113010406494e-06, 3.4850090742111206e-06, 3.594905138015747e-06, 3.7048012018203735e-06, 3.814697265625e-06]}, "gradients/decoder.transformer.h.22.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 4.0, 2.0, 5.0, 12.0, 15.0, 10.0, 7.0, 20.0, 5.0, 10.0, 19.0, 21.0, 25.0, 26.0, 45.0, 55.0, 73.0, 81.0, 127.0, 175.0, 401.0, 2389.0, 975417.0, 67813.0, 846.0, 294.0, 133.0, 104.0, 90.0, 65.0, 52.0, 53.0, 28.0, 18.0, 24.0, 19.0, 9.0, 16.0, 12.0, 14.0, 15.0, 3.0, 5.0, 3.0, 3.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 3.0, 0.0, 2.0], "bins": [-6.29425048828125e-05, -6.096344441175461e-05, -5.8984383940696716e-05, -5.7005323469638824e-05, -5.502626299858093e-05, -5.304720252752304e-05, -5.106814205646515e-05, -4.908908158540726e-05, -4.7110021114349365e-05, -4.5130960643291473e-05, -4.315190017223358e-05, -4.117283970117569e-05, -3.91937792301178e-05, -3.7214718759059906e-05, -3.5235658288002014e-05, -3.325659781694412e-05, -3.127753734588623e-05, -2.929847687482834e-05, -2.7319416403770447e-05, -2.5340355932712555e-05, -2.3361295461654663e-05, -2.138223499059677e-05, -1.940317451953888e-05, -1.7424114048480988e-05, -1.5445053577423096e-05, -1.3465993106365204e-05, -1.1486932635307312e-05, -9.50787216424942e-06, -7.528811693191528e-06, -5.5497512221336365e-06, -3.5706907510757446e-06, -1.5916302800178528e-06, 3.8743019104003906e-07, 2.366490662097931e-06, 4.345551133155823e-06, 6.324611604213715e-06, 8.303672075271606e-06, 1.0282732546329498e-05, 1.226179301738739e-05, 1.4240853488445282e-05, 1.6219913959503174e-05, 1.8198974430561066e-05, 2.0178034901618958e-05, 2.215709537267685e-05, 2.413615584373474e-05, 2.6115216314792633e-05, 2.8094276785850525e-05, 3.0073337256908417e-05, 3.205239772796631e-05, 3.40314581990242e-05, 3.601051867008209e-05, 3.7989579141139984e-05, 3.9968639612197876e-05, 4.194770008325577e-05, 4.392676055431366e-05, 4.590582102537155e-05, 4.788488149642944e-05, 4.9863941967487335e-05, 5.184300243854523e-05, 5.382206290960312e-05, 5.580112338066101e-05, 5.77801838517189e-05, 5.9759244322776794e-05, 6.173830479383469e-05, 6.371736526489258e-05]}, "gradients/decoder.transformer.h.22.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 4.0, 97.0, 816.0, 98.0, 4.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.7749847756931558e-05, -1.7276232028962113e-05, -1.6802616300992668e-05, -1.632899875403382e-05, -1.5855383026064374e-05, -1.5381767298094928e-05, -1.4908151570125483e-05, -1.4434534932661336e-05, -1.3960918295197189e-05, -1.3487302567227744e-05, -1.3013685929763597e-05, -1.2540070201794151e-05, -1.2066453564330004e-05, -1.1592837836360559e-05, -1.1119222108391114e-05, -1.0645605470926967e-05, -1.0171989742957521e-05, -9.698374014988076e-06, -9.224757377523929e-06, -8.751141649554484e-06, -8.277525012090337e-06, -7.803909284120891e-06, -7.330293101404095e-06, -6.856676918687299e-06, -6.383060735970503e-06, -5.9094445532537065e-06, -5.43582837053691e-06, -4.962212187820114e-06, -4.488596459850669e-06, -4.014979822386522e-06, -3.5413640944170766e-06, -3.0677479117002804e-06, -2.594131728983484e-06, -2.120515546266688e-06, -1.6468994772367296e-06, -1.1732834082067711e-06, -6.99667225489975e-07, -2.2605104277317878e-07, 2.4756491256994195e-07, 7.211810952867381e-07, 1.1947972780035343e-06, 1.6684134607203305e-06, 2.1420296434371267e-06, 2.6156455987802474e-06, 3.0892617814970436e-06, 3.5628779642138397e-06, 4.0364939195569605e-06, 4.510110102273757e-06, 4.983726284990553e-06, 5.457342467707349e-06, 5.930958650424145e-06, 6.4045743783935905e-06, 6.8781910158577375e-06, 7.351806743827183e-06, 7.82542338129133e-06, 8.299039109260775e-06, 8.77265483723022e-06, 9.246270565199666e-06, 9.719887202663813e-06, 1.0193502930633258e-05, 1.0667119568097405e-05, 1.114073529606685e-05, 1.1614351024036296e-05, 1.2087967661500443e-05, 1.256158429896459e-05]}, "gradients/decoder.transformer.h.22.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 4.0, 2.0, 4.0, 7.0, 0.0, 4.0, 8.0, 9.0, 14.0, 19.0, 15.0, 0.0, 26.0, 28.0, 33.0, 42.0, 19.0, 0.0, 41.0, 49.0, 38.0, 45.0, 34.0, 0.0, 42.0, 53.0, 53.0, 46.0, 40.0, 33.0, 0.0, 43.0, 24.0, 37.0, 35.0, 32.0, 0.0, 28.0, 22.0, 20.0, 12.0, 10.0, 13.0, 0.0, 11.0, 8.0, 5.0, 5.0, 1.0, 0.0, 1.0, 2.0, 0.0, 1.0, 0.0, 2.0], "bins": [-1.6689300537109375e-06, -1.6186386346817017e-06, -1.5683472156524658e-06, -1.51805579662323e-06, -1.4677643775939941e-06, -1.4174729585647583e-06, -1.3671815395355225e-06, -1.3168901205062866e-06, -1.2665987014770508e-06, -1.216307282447815e-06, -1.166015863418579e-06, -1.1157244443893433e-06, -1.0654330253601074e-06, -1.0151416063308716e-06, -9.648501873016357e-07, -9.145587682723999e-07, -8.642673492431641e-07, -8.139759302139282e-07, -7.636845111846924e-07, -7.133930921554565e-07, -6.631016731262207e-07, -6.128102540969849e-07, -5.62518835067749e-07, -5.122274160385132e-07, -4.6193599700927734e-07, -4.116445779800415e-07, -3.6135315895080566e-07, -3.110617399215698e-07, -2.60770320892334e-07, -2.1047890186309814e-07, -1.601874828338623e-07, -1.0989606380462646e-07, -5.960464477539063e-08, -9.313225746154785e-09, 4.0978193283081055e-08, 9.12696123123169e-08, 1.4156103134155273e-07, 1.9185245037078857e-07, 2.421438694000244e-07, 2.9243528842926025e-07, 3.427267074584961e-07, 3.9301812648773193e-07, 4.4330954551696777e-07, 4.936009645462036e-07, 5.438923835754395e-07, 5.941838026046753e-07, 6.444752216339111e-07, 6.94766640663147e-07, 7.450580596923828e-07, 7.953494787216187e-07, 8.456408977508545e-07, 8.959323167800903e-07, 9.462237358093262e-07, 9.96515154838562e-07, 1.0468065738677979e-06, 1.0970979928970337e-06, 1.1473894119262695e-06, 1.1976808309555054e-06, 1.2479722499847412e-06, 1.298263669013977e-06, 1.3485550880432129e-06, 1.3988465070724487e-06, 1.4491379261016846e-06, 1.4994293451309204e-06, 1.5497207641601562e-06]}, "gradients/decoder.transformer.h.22.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 2.0, 1.0, 1.0, 0.0, 2.0, 3.0, 4.0, 3.0, 3.0, 4.0, 8.0, 8.0, 13.0, 18.0, 18.0, 12.0, 21.0, 19.0, 32.0, 34.0, 38.0, 35.0, 40.0, 43.0, 33.0, 54.0, 55.0, 45.0, 45.0, 49.0, 48.0, 40.0, 44.0, 39.0, 43.0, 32.0, 21.0, 16.0, 15.0, 18.0, 13.0, 11.0, 6.0, 10.0, 7.0, 3.0, 2.0, 2.0, 2.0, 1.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-3.08984375, -2.97979736328125, -2.8697509765625, -2.75970458984375, -2.649658203125, -2.53961181640625, -2.4295654296875, -2.31951904296875, -2.20947265625, -2.09942626953125, -1.9893798828125, -1.87933349609375, -1.769287109375, -1.65924072265625, -1.5491943359375, -1.43914794921875, -1.3291015625, -1.21905517578125, -1.1090087890625, -0.99896240234375, -0.888916015625, -0.77886962890625, -0.6688232421875, -0.55877685546875, -0.44873046875, -0.33868408203125, -0.2286376953125, -0.11859130859375, -0.008544921875, 0.10150146484375, 0.2115478515625, 0.32159423828125, 0.431640625, 0.54168701171875, 0.6517333984375, 0.76177978515625, 0.871826171875, 0.98187255859375, 1.0919189453125, 1.20196533203125, 1.31201171875, 1.42205810546875, 1.5321044921875, 1.64215087890625, 1.752197265625, 1.86224365234375, 1.9722900390625, 2.08233642578125, 2.1923828125, 2.30242919921875, 2.4124755859375, 2.52252197265625, 2.632568359375, 2.74261474609375, 2.8526611328125, 2.96270751953125, 3.07275390625, 3.18280029296875, 3.2928466796875, 3.40289306640625, 3.512939453125, 3.62298583984375, 3.7330322265625, 3.84307861328125, 3.953125]}, "gradients/decoder.transformer.h.22.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 3.0, 1.0, 2.0, 3.0, 4.0, 4.0, 10.0, 6.0, 13.0, 11.0, 11.0, 28.0, 34.0, 48.0, 61.0, 96.0, 128.0, 232.0, 412.0, 730.0, 1554.0, 3562.0, 9556.0, 31740.0, 139003.0, 656729.0, 152974.0, 34140.0, 10247.0, 3668.0, 1628.0, 783.0, 423.0, 254.0, 139.0, 88.0, 73.0, 51.0, 32.0, 22.0, 23.0, 11.0, 7.0, 6.0, 6.0, 3.0, 3.0, 3.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0], "bins": [-6.27734375, -6.07171630859375, -5.8660888671875, -5.66046142578125, -5.454833984375, -5.24920654296875, -5.0435791015625, -4.83795166015625, -4.63232421875, -4.42669677734375, -4.2210693359375, -4.01544189453125, -3.809814453125, -3.60418701171875, -3.3985595703125, -3.19293212890625, -2.9873046875, -2.78167724609375, -2.5760498046875, -2.37042236328125, -2.164794921875, -1.95916748046875, -1.7535400390625, -1.54791259765625, -1.34228515625, -1.13665771484375, -0.9310302734375, -0.72540283203125, -0.519775390625, -0.31414794921875, -0.1085205078125, 0.09710693359375, 0.302734375, 0.50836181640625, 0.7139892578125, 0.91961669921875, 1.125244140625, 1.33087158203125, 1.5364990234375, 1.74212646484375, 1.94775390625, 2.15338134765625, 2.3590087890625, 2.56463623046875, 2.770263671875, 2.97589111328125, 3.1815185546875, 3.38714599609375, 3.5927734375, 3.79840087890625, 4.0040283203125, 4.20965576171875, 4.415283203125, 4.62091064453125, 4.8265380859375, 5.03216552734375, 5.23779296875, 5.44342041015625, 5.6490478515625, 5.85467529296875, 6.060302734375, 6.26593017578125, 6.4715576171875, 6.67718505859375, 6.8828125]}, "gradients/decoder.transformer.h.22.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 1.0, 1.0, 2.0, 2.0, 1.0, 3.0, 2.0, 2.0, 2.0, 7.0, 8.0, 10.0, 10.0, 11.0, 10.0, 14.0, 9.0, 18.0, 22.0, 24.0, 35.0, 30.0, 30.0, 43.0, 38.0, 39.0, 48.0, 94.0, 1965.0, 115.0, 51.0, 55.0, 44.0, 32.0, 38.0, 34.0, 40.0, 21.0, 24.0, 18.0, 14.0, 10.0, 21.0, 9.0, 11.0, 11.0, 5.0, 9.0, 7.0, 7.0, 1.0, 0.0, 3.0, 2.0, 3.0, 1.0, 0.0, 0.0, 1.0, 1.0], "bins": [-10.9453125, -10.6083984375, -10.271484375, -9.9345703125, -9.59765625, -9.2607421875, -8.923828125, -8.5869140625, -8.25, -7.9130859375, -7.576171875, -7.2392578125, -6.90234375, -6.5654296875, -6.228515625, -5.8916015625, -5.5546875, -5.2177734375, -4.880859375, -4.5439453125, -4.20703125, -3.8701171875, -3.533203125, -3.1962890625, -2.859375, -2.5224609375, -2.185546875, -1.8486328125, -1.51171875, -1.1748046875, -0.837890625, -0.5009765625, -0.1640625, 0.1728515625, 0.509765625, 0.8466796875, 1.18359375, 1.5205078125, 1.857421875, 2.1943359375, 2.53125, 2.8681640625, 3.205078125, 3.5419921875, 3.87890625, 4.2158203125, 4.552734375, 4.8896484375, 5.2265625, 5.5634765625, 5.900390625, 6.2373046875, 6.57421875, 6.9111328125, 7.248046875, 7.5849609375, 7.921875, 8.2587890625, 8.595703125, 8.9326171875, 9.26953125, 9.6064453125, 9.943359375, 10.2802734375, 10.6171875]}, "gradients/decoder.transformer.h.22.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 3.0, 2.0, 3.0, 0.0, 2.0, 5.0, 7.0, 7.0, 9.0, 8.0, 12.0, 10.0, 21.0, 10.0, 14.0, 24.0, 24.0, 26.0, 45.0, 49.0, 70.0, 79.0, 135.0, 293.0, 614.0, 10195.0, 3123506.0, 9198.0, 564.0, 273.0, 120.0, 85.0, 57.0, 46.0, 41.0, 29.0, 22.0, 19.0, 12.0, 12.0, 10.0, 9.0, 10.0, 12.0, 8.0, 5.0, 3.0, 5.0, 1.0, 2.0, 1.0, 2.0, 1.0, 1.0, 1.0, 2.0, 0.0, 0.0, 1.0], "bins": [-27.984375, -27.095703125, -26.20703125, -25.318359375, -24.4296875, -23.541015625, -22.65234375, -21.763671875, -20.875, -19.986328125, -19.09765625, -18.208984375, -17.3203125, -16.431640625, -15.54296875, -14.654296875, -13.765625, -12.876953125, -11.98828125, -11.099609375, -10.2109375, -9.322265625, -8.43359375, -7.544921875, -6.65625, -5.767578125, -4.87890625, -3.990234375, -3.1015625, -2.212890625, -1.32421875, -0.435546875, 0.453125, 1.341796875, 2.23046875, 3.119140625, 4.0078125, 4.896484375, 5.78515625, 6.673828125, 7.5625, 8.451171875, 9.33984375, 10.228515625, 11.1171875, 12.005859375, 12.89453125, 13.783203125, 14.671875, 15.560546875, 16.44921875, 17.337890625, 18.2265625, 19.115234375, 20.00390625, 20.892578125, 21.78125, 22.669921875, 23.55859375, 24.447265625, 25.3359375, 26.224609375, 27.11328125, 28.001953125, 28.890625]}, "gradients/decoder.transformer.h.22.ln_1.weight": {"_type": "histogram", "values": [1017.0, 3.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.343914985656738, 5.451971054077148, 15.247857093811035, 25.043743133544922, 34.839630126953125, 44.63551712036133, 54.431400299072266, 64.22728729248047, 74.0231704711914, 83.81905364990234, 93.61494445800781, 103.41082763671875, 113.20671081542969, 123.00260162353516, 132.79849243164062, 142.59437561035156, 152.3902587890625, 162.18614196777344, 171.98202514648438, 181.77792358398438, 191.5738067626953, 201.36968994140625, 211.1655731201172, 220.96145629882812, 230.75735473632812, 240.55323791503906, 250.34912109375, 260.14501953125, 269.9408874511719, 279.7367858886719, 289.53265380859375, 299.32855224609375, 309.1244201660156, 318.9203186035156, 328.7161865234375, 338.5120849609375, 348.3079528808594, 358.1038513183594, 367.89971923828125, 377.69561767578125, 387.49151611328125, 397.28741455078125, 407.0832824707031, 416.8791809082031, 426.675048828125, 436.470947265625, 446.266845703125, 456.0627136230469, 465.85858154296875, 475.65447998046875, 485.4503479003906, 495.2462463378906, 505.0421142578125, 514.8380126953125, 524.6339111328125, 534.4298095703125, 544.2257080078125, 554.0216064453125, 563.8175048828125, 573.6133422851562, 583.4092407226562, 593.2051391601562, 603.0010375976562, 612.7969360351562, 622.5927734375]}, "gradients/decoder.transformer.h.22.ln_1.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 2.0, 1.0, 3.0, 2.0, 0.0, 7.0, 3.0, 3.0, 2.0, 4.0, 14.0, 15.0, 6.0, 11.0, 18.0, 14.0, 28.0, 31.0, 33.0, 28.0, 27.0, 27.0, 33.0, 30.0, 34.0, 39.0, 34.0, 40.0, 33.0, 37.0, 39.0, 29.0, 39.0, 33.0, 34.0, 31.0, 30.0, 35.0, 29.0, 23.0, 19.0, 24.0, 11.0, 9.0, 12.0, 7.0, 8.0, 9.0, 6.0, 2.0, 7.0, 4.0, 6.0, 3.0, 4.0, 2.0, 4.0, 3.0, 0.0, 1.0], "bins": [-31.377431869506836, -30.399642944335938, -29.42185401916504, -28.44406509399414, -27.466278076171875, -26.488487243652344, -25.510700225830078, -24.53291130065918, -23.55512237548828, -22.577333450317383, -21.599544525146484, -20.621755599975586, -19.643966674804688, -18.666179656982422, -17.688390731811523, -16.710601806640625, -15.732812881469727, -14.755023956298828, -13.77723503112793, -12.799447059631348, -11.82165813446045, -10.84386920928955, -9.866081237792969, -8.88829231262207, -7.910503387451172, -6.932714462280273, -5.954926013946533, -4.977137565612793, -3.9993486404418945, -3.021559715270996, -2.043771266937256, -1.0659828186035156, -0.08819198608398438, 0.889596700668335, 1.8673853874206543, 2.8451740741729736, 3.822962760925293, 4.800751686096191, 5.778540134429932, 6.756328582763672, 7.73411750793457, 8.711906433105469, 9.689695358276367, 10.66748332977295, 11.645272254943848, 12.623061180114746, 13.600849151611328, 14.578638076782227, 15.556427001953125, 16.534215927124023, 17.512004852294922, 18.48979377746582, 19.46758270263672, 20.445369720458984, 21.423158645629883, 22.40094757080078, 23.37873649597168, 24.356525421142578, 25.334314346313477, 26.312103271484375, 27.28989028930664, 28.267681121826172, 29.245468139648438, 30.223257064819336, 31.201045989990234]}, "gradients/decoder.transformer.h.21.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 3.0, 0.0, 1.0, 2.0, 4.0, 4.0, 1.0, 6.0, 4.0, 5.0, 14.0, 15.0, 16.0, 20.0, 14.0, 16.0, 27.0, 34.0, 34.0, 40.0, 31.0, 51.0, 35.0, 42.0, 51.0, 60.0, 33.0, 46.0, 51.0, 47.0, 50.0, 32.0, 45.0, 35.0, 28.0, 18.0, 15.0, 17.0, 14.0, 17.0, 7.0, 7.0, 8.0, 6.0, 3.0, 2.0, 2.0, 2.0, 1.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.076171875, -2.964874267578125, -2.85357666015625, -2.742279052734375, -2.6309814453125, -2.519683837890625, -2.40838623046875, -2.297088623046875, -2.185791015625, -2.074493408203125, -1.96319580078125, -1.851898193359375, -1.7406005859375, -1.629302978515625, -1.51800537109375, -1.406707763671875, -1.29541015625, -1.184112548828125, -1.07281494140625, -0.961517333984375, -0.8502197265625, -0.738922119140625, -0.62762451171875, -0.516326904296875, -0.405029296875, -0.293731689453125, -0.18243408203125, -0.071136474609375, 0.0401611328125, 0.151458740234375, 0.26275634765625, 0.374053955078125, 0.4853515625, 0.596649169921875, 0.70794677734375, 0.819244384765625, 0.9305419921875, 1.041839599609375, 1.15313720703125, 1.264434814453125, 1.375732421875, 1.487030029296875, 1.59832763671875, 1.709625244140625, 1.8209228515625, 1.932220458984375, 2.04351806640625, 2.154815673828125, 2.26611328125, 2.377410888671875, 2.48870849609375, 2.600006103515625, 2.7113037109375, 2.822601318359375, 2.93389892578125, 3.045196533203125, 3.156494140625, 3.267791748046875, 3.37908935546875, 3.490386962890625, 3.6016845703125, 3.712982177734375, 3.82427978515625, 3.935577392578125, 4.046875]}, "gradients/decoder.transformer.h.21.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 4.0, 0.0, 0.0, 2.0, 2.0, 6.0, 7.0, 1.0, 8.0, 15.0, 15.0, 24.0, 32.0, 28.0, 55.0, 81.0, 126.0, 209.0, 470.0, 997.0, 3175.0, 20072.0, 2843924.0, 1304403.0, 15915.0, 2779.0, 888.0, 412.0, 243.0, 127.0, 74.0, 45.0, 39.0, 34.0, 23.0, 15.0, 8.0, 14.0, 5.0, 6.0, 2.0, 6.0, 4.0, 2.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-24.5625, -23.578125, -22.59375, -21.609375, -20.625, -19.640625, -18.65625, -17.671875, -16.6875, -15.703125, -14.71875, -13.734375, -12.75, -11.765625, -10.78125, -9.796875, -8.8125, -7.828125, -6.84375, -5.859375, -4.875, -3.890625, -2.90625, -1.921875, -0.9375, 0.046875, 1.03125, 2.015625, 3.0, 3.984375, 4.96875, 5.953125, 6.9375, 7.921875, 8.90625, 9.890625, 10.875, 11.859375, 12.84375, 13.828125, 14.8125, 15.796875, 16.78125, 17.765625, 18.75, 19.734375, 20.71875, 21.703125, 22.6875, 23.671875, 24.65625, 25.640625, 26.625, 27.609375, 28.59375, 29.578125, 30.5625, 31.546875, 32.53125, 33.515625, 34.5, 35.484375, 36.46875, 37.453125, 38.4375]}, "gradients/decoder.transformer.h.21.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 2.0, 2.0, 5.0, 2.0, 4.0, 10.0, 4.0, 8.0, 19.0, 23.0, 33.0, 59.0, 85.0, 134.0, 234.0, 428.0, 892.0, 938.0, 557.0, 245.0, 126.0, 73.0, 51.0, 44.0, 33.0, 20.0, 9.0, 8.0, 7.0, 11.0, 3.0, 5.0, 5.0, 1.0, 2.0, 0.0, 3.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-14.265625, -13.8343505859375, -13.403076171875, -12.9718017578125, -12.54052734375, -12.1092529296875, -11.677978515625, -11.2467041015625, -10.8154296875, -10.3841552734375, -9.952880859375, -9.5216064453125, -9.09033203125, -8.6590576171875, -8.227783203125, -7.7965087890625, -7.365234375, -6.9339599609375, -6.502685546875, -6.0714111328125, -5.64013671875, -5.2088623046875, -4.777587890625, -4.3463134765625, -3.9150390625, -3.4837646484375, -3.052490234375, -2.6212158203125, -2.18994140625, -1.7586669921875, -1.327392578125, -0.8961181640625, -0.46484375, -0.0335693359375, 0.397705078125, 0.8289794921875, 1.26025390625, 1.6915283203125, 2.122802734375, 2.5540771484375, 2.9853515625, 3.4166259765625, 3.847900390625, 4.2791748046875, 4.71044921875, 5.1417236328125, 5.572998046875, 6.0042724609375, 6.435546875, 6.8668212890625, 7.298095703125, 7.7293701171875, 8.16064453125, 8.5919189453125, 9.023193359375, 9.4544677734375, 9.8857421875, 10.3170166015625, 10.748291015625, 11.1795654296875, 11.61083984375, 12.0421142578125, 12.473388671875, 12.9046630859375, 13.3359375]}, "gradients/decoder.transformer.h.21.mlp.c_fc.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 2.0, 4.0, 3.0, 7.0, 15.0, 21.0, 48.0, 74.0, 139.0, 355.0, 799.0, 2903.0, 19923.0, 574967.0, 3535862.0, 51726.0, 5327.0, 1273.0, 476.0, 203.0, 86.0, 43.0, 16.0, 8.0, 7.0, 4.0, 3.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-21.140625, -19.909912109375, -18.67919921875, -17.448486328125, -16.2177734375, -14.987060546875, -13.75634765625, -12.525634765625, -11.294921875, -10.064208984375, -8.83349609375, -7.602783203125, -6.3720703125, -5.141357421875, -3.91064453125, -2.679931640625, -1.44921875, -0.218505859375, 1.01220703125, 2.242919921875, 3.4736328125, 4.704345703125, 5.93505859375, 7.165771484375, 8.396484375, 9.627197265625, 10.85791015625, 12.088623046875, 13.3193359375, 14.550048828125, 15.78076171875, 17.011474609375, 18.2421875, 19.472900390625, 20.70361328125, 21.934326171875, 23.1650390625, 24.395751953125, 25.62646484375, 26.857177734375, 28.087890625, 29.318603515625, 30.54931640625, 31.780029296875, 33.0107421875, 34.241455078125, 35.47216796875, 36.702880859375, 37.93359375, 39.164306640625, 40.39501953125, 41.625732421875, 42.8564453125, 44.087158203125, 45.31787109375, 46.548583984375, 47.779296875, 49.010009765625, 50.24072265625, 51.471435546875, 52.7021484375, 53.932861328125, 55.16357421875, 56.394287109375, 57.625]}, "gradients/decoder.transformer.h.21.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 12.0, 65.0, 231.0, 373.0, 251.0, 64.0, 17.0, 5.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-47.96653366088867, -42.98837661743164, -38.010223388671875, -33.032066345214844, -28.053909301757812, -23.07575225830078, -18.097597122192383, -13.119441986083984, -8.141284942626953, -3.1631288528442383, 1.8150272369384766, 6.793183326721191, 11.771339416503906, 16.749496459960938, 21.727651596069336, 26.705806732177734, 31.683963775634766, 36.6621208190918, 41.64027404785156, 46.618431091308594, 51.596588134765625, 56.574745178222656, 61.55290222167969, 66.53105163574219, 71.50921630859375, 76.48737335205078, 81.46553039550781, 86.44368743896484, 91.42184448242188, 96.39999389648438, 101.3781509399414, 106.35630798339844, 111.33447265625, 116.31262969970703, 121.29078674316406, 126.2689437866211, 131.24710083007812, 136.22525024414062, 141.2034149169922, 146.1815643310547, 151.15972900390625, 156.13787841796875, 161.1160430908203, 166.0941925048828, 171.07235717773438, 176.05050659179688, 181.02867126464844, 186.00682067871094, 190.98497009277344, 195.96311950683594, 200.9412841796875, 205.91943359375, 210.89759826660156, 215.87574768066406, 220.85391235351562, 225.83206176757812, 230.81021118164062, 235.78836059570312, 240.7665252685547, 245.7446746826172, 250.72283935546875, 255.70098876953125, 260.67913818359375, 265.65728759765625, 270.6354675292969]}, "gradients/decoder.transformer.h.21.ln_2.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 2.0, 0.0, 1.0, 1.0, 3.0, 2.0, 5.0, 6.0, 8.0, 10.0, 7.0, 11.0, 7.0, 21.0, 16.0, 19.0, 22.0, 20.0, 25.0, 17.0, 19.0, 34.0, 43.0, 41.0, 29.0, 37.0, 33.0, 31.0, 38.0, 39.0, 33.0, 40.0, 35.0, 28.0, 28.0, 39.0, 40.0, 29.0, 22.0, 24.0, 23.0, 17.0, 15.0, 12.0, 12.0, 17.0, 11.0, 7.0, 10.0, 4.0, 4.0, 6.0, 3.0, 4.0, 1.0, 3.0, 1.0, 2.0, 3.0, 0.0, 1.0], "bins": [-30.247344970703125, -29.30308723449707, -28.358829498291016, -27.414569854736328, -26.470312118530273, -25.52605438232422, -24.58179473876953, -23.637537002563477, -22.693279266357422, -21.749021530151367, -20.804763793945312, -19.860504150390625, -18.91624641418457, -17.971988677978516, -17.027729034423828, -16.083471298217773, -15.139213562011719, -14.194955825805664, -13.250697135925293, -12.306438446044922, -11.362180709838867, -10.417922973632812, -9.473664283752441, -8.52940559387207, -7.585147857666016, -6.640889644622803, -5.69663143157959, -4.752373218536377, -3.808115005493164, -2.863856792449951, -1.9195985794067383, -0.9753403663635254, -0.0310821533203125, 0.9131760597229004, 1.8574342727661133, 2.801692485809326, 3.745950698852539, 4.690208911895752, 5.634467124938965, 6.578725337982178, 7.522983551025391, 8.467241287231445, 9.411499977111816, 10.355758666992188, 11.300016403198242, 12.244274139404297, 13.188532829284668, 14.132791519165039, 15.077049255371094, 16.02130699157715, 16.965564727783203, 17.90982437133789, 18.854082107543945, 19.79833984375, 20.742599487304688, 21.686857223510742, 22.631114959716797, 23.57537269592285, 24.519630432128906, 25.463890075683594, 26.40814781188965, 27.352405548095703, 28.29666519165039, 29.240922927856445, 30.1851806640625]}, "gradients/decoder.transformer.h.21.crossattention.c_proj.bias": {"_type": "histogram", "values": [3.0, 2.0, 2.0, 1.0, 3.0, 4.0, 1.0, 4.0, 4.0, 11.0, 5.0, 9.0, 9.0, 13.0, 16.0, 16.0, 15.0, 24.0, 26.0, 34.0, 27.0, 48.0, 36.0, 29.0, 44.0, 46.0, 47.0, 45.0, 51.0, 38.0, 37.0, 55.0, 43.0, 42.0, 25.0, 26.0, 32.0, 34.0, 20.0, 12.0, 20.0, 6.0, 10.0, 7.0, 8.0, 10.0, 5.0, 4.0, 1.0, 4.0, 0.0, 2.0, 4.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-2.94921875, -2.84246826171875, -2.7357177734375, -2.62896728515625, -2.522216796875, -2.41546630859375, -2.3087158203125, -2.20196533203125, -2.09521484375, -1.98846435546875, -1.8817138671875, -1.77496337890625, -1.668212890625, -1.56146240234375, -1.4547119140625, -1.34796142578125, -1.2412109375, -1.13446044921875, -1.0277099609375, -0.92095947265625, -0.814208984375, -0.70745849609375, -0.6007080078125, -0.49395751953125, -0.38720703125, -0.28045654296875, -0.1737060546875, -0.06695556640625, 0.039794921875, 0.14654541015625, 0.2532958984375, 0.36004638671875, 0.466796875, 0.57354736328125, 0.6802978515625, 0.78704833984375, 0.893798828125, 1.00054931640625, 1.1072998046875, 1.21405029296875, 1.32080078125, 1.42755126953125, 1.5343017578125, 1.64105224609375, 1.747802734375, 1.85455322265625, 1.9613037109375, 2.06805419921875, 2.1748046875, 2.28155517578125, 2.3883056640625, 2.49505615234375, 2.601806640625, 2.70855712890625, 2.8153076171875, 2.92205810546875, 3.02880859375, 3.13555908203125, 3.2423095703125, 3.34906005859375, 3.455810546875, 3.56256103515625, 3.6693115234375, 3.77606201171875, 3.8828125]}, "gradients/decoder.transformer.h.21.crossattention.c_proj.weight": {"_type": "histogram", "values": [4.0, 5.0, 6.0, 9.0, 14.0, 12.0, 17.0, 20.0, 22.0, 73.0, 96.0, 146.0, 191.0, 269.0, 431.0, 668.0, 1014.0, 1401.0, 2199.0, 3461.0, 5284.0, 8277.0, 13228.0, 21897.0, 36946.0, 65842.0, 133569.0, 374344.0, 183076.0, 82458.0, 44679.0, 25840.0, 15594.0, 9807.0, 6105.0, 3985.0, 2507.0, 1690.0, 1108.0, 786.0, 484.0, 318.0, 225.0, 134.0, 105.0, 76.0, 59.0, 38.0, 18.0, 10.0, 6.0, 6.0, 4.0, 3.0, 2.0, 2.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.1240234375, -0.11954116821289062, -0.11505889892578125, -0.11057662963867188, -0.1060943603515625, -0.10161209106445312, -0.09712982177734375, -0.09264755249023438, -0.088165283203125, -0.08368301391601562, -0.07920074462890625, -0.07471847534179688, -0.0702362060546875, -0.06575393676757812, -0.06127166748046875, -0.056789398193359375, -0.05230712890625, -0.047824859619140625, -0.04334259033203125, -0.038860321044921875, -0.0343780517578125, -0.029895782470703125, -0.02541351318359375, -0.020931243896484375, -0.016448974609375, -0.011966705322265625, -0.00748443603515625, -0.003002166748046875, 0.0014801025390625, 0.005962371826171875, 0.01044464111328125, 0.014926910400390625, 0.0194091796875, 0.023891448974609375, 0.02837371826171875, 0.032855987548828125, 0.0373382568359375, 0.041820526123046875, 0.04630279541015625, 0.050785064697265625, 0.055267333984375, 0.059749603271484375, 0.06423187255859375, 0.06871414184570312, 0.0731964111328125, 0.07767868041992188, 0.08216094970703125, 0.08664321899414062, 0.09112548828125, 0.09560775756835938, 0.10009002685546875, 0.10457229614257812, 0.1090545654296875, 0.11353683471679688, 0.11801910400390625, 0.12250137329101562, 0.126983642578125, 0.13146591186523438, 0.13594818115234375, 0.14043045043945312, 0.1449127197265625, 0.14939498901367188, 0.15387725830078125, 0.15835952758789062, 0.162841796875]}, "gradients/decoder.transformer.h.21.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 5.0, 5.0, 7.0, 2.0, 4.0, 7.0, 7.0, 10.0, 8.0, 11.0, 20.0, 18.0, 14.0, 27.0, 32.0, 17.0, 35.0, 28.0, 27.0, 37.0, 35.0, 40.0, 42.0, 45.0, 1072.0, 46.0, 46.0, 36.0, 35.0, 43.0, 43.0, 38.0, 23.0, 25.0, 20.0, 18.0, 13.0, 19.0, 15.0, 15.0, 10.0, 11.0, 5.0, 2.0, 5.0, 3.0, 4.0, 6.0, 0.0, 2.0, 2.0, 1.0, 0.0, 1.0], "bins": [-2.814453125, -2.73187255859375, -2.6492919921875, -2.56671142578125, -2.484130859375, -2.40155029296875, -2.3189697265625, -2.23638916015625, -2.15380859375, -2.07122802734375, -1.9886474609375, -1.90606689453125, -1.823486328125, -1.74090576171875, -1.6583251953125, -1.57574462890625, -1.4931640625, -1.41058349609375, -1.3280029296875, -1.24542236328125, -1.162841796875, -1.08026123046875, -0.9976806640625, -0.91510009765625, -0.83251953125, -0.74993896484375, -0.6673583984375, -0.58477783203125, -0.502197265625, -0.41961669921875, -0.3370361328125, -0.25445556640625, -0.171875, -0.08929443359375, -0.0067138671875, 0.07586669921875, 0.158447265625, 0.24102783203125, 0.3236083984375, 0.40618896484375, 0.48876953125, 0.57135009765625, 0.6539306640625, 0.73651123046875, 0.819091796875, 0.90167236328125, 0.9842529296875, 1.06683349609375, 1.1494140625, 1.23199462890625, 1.3145751953125, 1.39715576171875, 1.479736328125, 1.56231689453125, 1.6448974609375, 1.72747802734375, 1.81005859375, 1.89263916015625, 1.9752197265625, 2.05780029296875, 2.140380859375, 2.22296142578125, 2.3055419921875, 2.38812255859375, 2.470703125]}, "gradients/decoder.transformer.h.21.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 2.0, 4.0, 2.0, 4.0, 8.0, 14.0, 33.0, 52.0, 54.0, 88.0, 152.0, 202.0, 258.0, 336.0, 540.0, 705.0, 1000.0, 1369.0, 2003.0, 2781.0, 3821.0, 5633.0, 7905.0, 11612.0, 17237.0, 26063.0, 39401.0, 62485.0, 104182.0, 1295298.0, 232110.0, 100618.0, 61575.0, 39028.0, 25403.0, 16894.0, 11508.0, 7898.0, 5593.0, 3853.0, 2683.0, 1979.0, 1336.0, 928.0, 771.0, 518.0, 348.0, 269.0, 180.0, 157.0, 72.0, 69.0, 44.0, 22.0, 23.0, 8.0, 7.0, 3.0, 3.0, 2.0, 3.0], "bins": [-0.06756591796875, -0.06551742553710938, -0.06346893310546875, -0.061420440673828125, -0.0593719482421875, -0.057323455810546875, -0.05527496337890625, -0.053226470947265625, -0.051177978515625, -0.049129486083984375, -0.04708099365234375, -0.045032501220703125, -0.0429840087890625, -0.040935516357421875, -0.03888702392578125, -0.036838531494140625, -0.0347900390625, -0.032741546630859375, -0.03069305419921875, -0.028644561767578125, -0.0265960693359375, -0.024547576904296875, -0.02249908447265625, -0.020450592041015625, -0.018402099609375, -0.016353607177734375, -0.01430511474609375, -0.012256622314453125, -0.0102081298828125, -0.008159637451171875, -0.00611114501953125, -0.004062652587890625, -0.00201416015625, 3.4332275390625e-05, 0.00208282470703125, 0.004131317138671875, 0.0061798095703125, 0.008228302001953125, 0.01027679443359375, 0.012325286865234375, 0.014373779296875, 0.016422271728515625, 0.01847076416015625, 0.020519256591796875, 0.0225677490234375, 0.024616241455078125, 0.02666473388671875, 0.028713226318359375, 0.03076171875, 0.032810211181640625, 0.03485870361328125, 0.036907196044921875, 0.0389556884765625, 0.041004180908203125, 0.04305267333984375, 0.045101165771484375, 0.047149658203125, 0.049198150634765625, 0.05124664306640625, 0.053295135498046875, 0.0553436279296875, 0.057392120361328125, 0.05944061279296875, 0.061489105224609375, 0.06353759765625]}, "gradients/decoder.transformer.h.21.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 2.0, 2.0, 3.0, 3.0, 6.0, 8.0, 8.0, 14.0, 9.0, 12.0, 36.0, 19.0, 29.0, 47.0, 53.0, 74.0, 123.0, 170.0, 77.0, 93.0, 48.0, 36.0, 21.0, 26.0, 29.0, 20.0, 14.0, 15.0, 4.0, 3.0, 6.0, 1.0, 0.0, 3.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.231929779052734e-06, -4.092231392860413e-06, -3.952533006668091e-06, -3.812834620475769e-06, -3.6731362342834473e-06, -3.5334378480911255e-06, -3.3937394618988037e-06, -3.254041075706482e-06, -3.11434268951416e-06, -2.9746443033218384e-06, -2.8349459171295166e-06, -2.695247530937195e-06, -2.555549144744873e-06, -2.4158507585525513e-06, -2.2761523723602295e-06, -2.1364539861679077e-06, -1.996755599975586e-06, -1.8570572137832642e-06, -1.7173588275909424e-06, -1.5776604413986206e-06, -1.4379620552062988e-06, -1.298263669013977e-06, -1.1585652828216553e-06, -1.0188668966293335e-06, -8.791685104370117e-07, -7.394701242446899e-07, -5.997717380523682e-07, -4.600733518600464e-07, -3.203749656677246e-07, -1.8067657947540283e-07, -4.0978193283081055e-08, 9.872019290924072e-08, 2.384185791015625e-07, 3.781169652938843e-07, 5.178153514862061e-07, 6.575137376785278e-07, 7.972121238708496e-07, 9.369105100631714e-07, 1.0766088962554932e-06, 1.216307282447815e-06, 1.3560056686401367e-06, 1.4957040548324585e-06, 1.6354024410247803e-06, 1.775100827217102e-06, 1.914799213409424e-06, 2.0544975996017456e-06, 2.1941959857940674e-06, 2.333894371986389e-06, 2.473592758178711e-06, 2.6132911443710327e-06, 2.7529895305633545e-06, 2.8926879167556763e-06, 3.032386302947998e-06, 3.17208468914032e-06, 3.3117830753326416e-06, 3.4514814615249634e-06, 3.591179847717285e-06, 3.730878233909607e-06, 3.870576620101929e-06, 4.0102750062942505e-06, 4.149973392486572e-06, 4.289671778678894e-06, 4.429370164871216e-06, 4.569068551063538e-06, 4.708766937255859e-06]}, "gradients/decoder.transformer.h.21.crossattention.q_attn.weight": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 7.0, 4.0, 8.0, 3.0, 11.0, 22.0, 18.0, 22.0, 38.0, 55.0, 56.0, 67.0, 103.0, 122.0, 202.0, 447.0, 3281.0, 878033.0, 163298.0, 1716.0, 369.0, 160.0, 101.0, 109.0, 69.0, 66.0, 51.0, 39.0, 25.0, 18.0, 12.0, 7.0, 9.0, 4.0, 5.0, 3.0, 0.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-6.270408630371094e-05, -6.0605816543102264e-05, -5.850754678249359e-05, -5.640927702188492e-05, -5.4311007261276245e-05, -5.221273750066757e-05, -5.01144677400589e-05, -4.8016197979450226e-05, -4.591792821884155e-05, -4.381965845823288e-05, -4.1721388697624207e-05, -3.9623118937015533e-05, -3.752484917640686e-05, -3.542657941579819e-05, -3.3328309655189514e-05, -3.123003989458084e-05, -2.9131770133972168e-05, -2.7033500373363495e-05, -2.4935230612754822e-05, -2.283696085214615e-05, -2.0738691091537476e-05, -1.8640421330928802e-05, -1.654215157032013e-05, -1.4443881809711456e-05, -1.2345612049102783e-05, -1.024734228849411e-05, -8.149072527885437e-06, -6.050802767276764e-06, -3.952533006668091e-06, -1.8542632460594177e-06, 2.4400651454925537e-07, 2.3422762751579285e-06, 4.4405460357666016e-06, 6.538815796375275e-06, 8.637085556983948e-06, 1.0735355317592621e-05, 1.2833625078201294e-05, 1.4931894838809967e-05, 1.703016459941864e-05, 1.9128434360027313e-05, 2.1226704120635986e-05, 2.332497388124466e-05, 2.5423243641853333e-05, 2.7521513402462006e-05, 2.961978316307068e-05, 3.171805292367935e-05, 3.3816322684288025e-05, 3.59145924448967e-05, 3.801286220550537e-05, 4.0111131966114044e-05, 4.220940172672272e-05, 4.430767148733139e-05, 4.6405941247940063e-05, 4.8504211008548737e-05, 5.060248076915741e-05, 5.270075052976608e-05, 5.4799020290374756e-05, 5.689729005098343e-05, 5.89955598115921e-05, 6.109382957220078e-05, 6.319209933280945e-05, 6.529036909341812e-05, 6.73886388540268e-05, 6.948690861463547e-05, 7.158517837524414e-05]}, "gradients/decoder.transformer.h.21.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 10.0, 31.0, 145.0, 371.0, 323.0, 103.0, 25.0, 5.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-8.357136721315328e-06, -8.110566341201775e-06, -7.863995961088222e-06, -7.617425580974668e-06, -7.370855200861115e-06, -7.124284820747562e-06, -6.8777144406340085e-06, -6.631144060520455e-06, -6.384573680406902e-06, -6.138003300293349e-06, -5.8914329201797955e-06, -5.644862540066242e-06, -5.398292159952689e-06, -5.151721779839136e-06, -4.9051513997255825e-06, -4.658581019612029e-06, -4.412010639498476e-06, -4.165440259384923e-06, -3.9188698792713694e-06, -3.672299499157816e-06, -3.425729119044263e-06, -3.1791587389307097e-06, -2.9325883588171564e-06, -2.686017978703603e-06, -2.43944759859005e-06, -2.1928772184764966e-06, -1.9463068383629434e-06, -1.6997364582493901e-06, -1.4531660781358369e-06, -1.2065956980222836e-06, -9.600253179087304e-07, -7.134549377951771e-07, -4.6688364818692207e-07, -2.2031326807336882e-07, 2.6257112040184438e-08, 2.728274921537377e-07, 5.19397872267291e-07, 7.659682523808442e-07, 1.0125386324943975e-06, 1.2591090126079507e-06, 1.505679392721504e-06, 1.7522497728350572e-06, 1.9988201529486105e-06, 2.2453905330621637e-06, 2.491960913175717e-06, 2.7385312932892703e-06, 2.9851016734028235e-06, 3.2316720535163768e-06, 3.47824243362993e-06, 3.7248128137434833e-06, 3.9713831938570365e-06, 4.21795357397059e-06, 4.464523954084143e-06, 4.711094334197696e-06, 4.9576647143112496e-06, 5.204235094424803e-06, 5.450805474538356e-06, 5.697375854651909e-06, 5.943946234765463e-06, 6.190516614879016e-06, 6.437086994992569e-06, 6.683657375106122e-06, 6.930227755219676e-06, 7.176798135333229e-06, 7.423368515446782e-06]}, "gradients/decoder.transformer.h.21.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 6.0, 1.0, 2.0, 0.0, 5.0, 8.0, 9.0, 14.0, 0.0, 14.0, 21.0, 23.0, 24.0, 0.0, 34.0, 39.0, 33.0, 34.0, 0.0, 33.0, 42.0, 47.0, 54.0, 0.0, 45.0, 57.0, 46.0, 34.0, 0.0, 46.0, 51.0, 42.0, 34.0, 0.0, 28.0, 30.0, 23.0, 19.0, 0.0, 21.0, 18.0, 14.0, 9.0, 0.0, 12.0, 13.0, 10.0, 7.0, 0.0, 5.0, 4.0, 1.0, 2.0, 0.0, 1.0, 1.0, 1.0, 2.0], "bins": [-1.4901161193847656e-06, -1.4426186680793762e-06, -1.3951212167739868e-06, -1.3476237654685974e-06, -1.300126314163208e-06, -1.2526288628578186e-06, -1.2051314115524292e-06, -1.1576339602470398e-06, -1.1101365089416504e-06, -1.062639057636261e-06, -1.0151416063308716e-06, -9.676441550254822e-07, -9.201467037200928e-07, -8.726492524147034e-07, -8.25151801109314e-07, -7.776543498039246e-07, -7.301568984985352e-07, -6.826594471931458e-07, -6.351619958877563e-07, -5.876645445823669e-07, -5.401670932769775e-07, -4.926696419715881e-07, -4.4517219066619873e-07, -3.976747393608093e-07, -3.501772880554199e-07, -3.026798367500305e-07, -2.551823854446411e-07, -2.076849341392517e-07, -1.601874828338623e-07, -1.126900315284729e-07, -6.51925802230835e-08, -1.7695128917694092e-08, 2.9802322387695312e-08, 7.729977369308472e-08, 1.2479722499847412e-07, 1.7229467630386353e-07, 2.1979212760925293e-07, 2.6728957891464233e-07, 3.1478703022003174e-07, 3.6228448152542114e-07, 4.0978193283081055e-07, 4.5727938413619995e-07, 5.047768354415894e-07, 5.522742867469788e-07, 5.997717380523682e-07, 6.472691893577576e-07, 6.94766640663147e-07, 7.422640919685364e-07, 7.897615432739258e-07, 8.372589945793152e-07, 8.847564458847046e-07, 9.32253897190094e-07, 9.797513484954834e-07, 1.0272487998008728e-06, 1.0747462511062622e-06, 1.1222437024116516e-06, 1.169741153717041e-06, 1.2172386050224304e-06, 1.2647360563278198e-06, 1.3122335076332092e-06, 1.3597309589385986e-06, 1.407228410243988e-06, 1.4547258615493774e-06, 1.5022233128547668e-06, 1.5497207641601562e-06]}, "gradients/decoder.transformer.h.21.attn.c_proj.bias": {"_type": "histogram", "values": [3.0, 2.0, 2.0, 1.0, 3.0, 4.0, 1.0, 4.0, 4.0, 11.0, 5.0, 9.0, 9.0, 13.0, 16.0, 16.0, 15.0, 24.0, 26.0, 34.0, 27.0, 48.0, 36.0, 29.0, 44.0, 46.0, 47.0, 45.0, 51.0, 38.0, 37.0, 55.0, 43.0, 42.0, 25.0, 26.0, 32.0, 34.0, 20.0, 12.0, 20.0, 6.0, 10.0, 7.0, 8.0, 10.0, 5.0, 4.0, 1.0, 4.0, 0.0, 2.0, 4.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-2.94921875, -2.84246826171875, -2.7357177734375, -2.62896728515625, -2.522216796875, -2.41546630859375, -2.3087158203125, -2.20196533203125, -2.09521484375, -1.98846435546875, -1.8817138671875, -1.77496337890625, -1.668212890625, -1.56146240234375, -1.4547119140625, -1.34796142578125, -1.2412109375, -1.13446044921875, -1.0277099609375, -0.92095947265625, -0.814208984375, -0.70745849609375, -0.6007080078125, -0.49395751953125, -0.38720703125, -0.28045654296875, -0.1737060546875, -0.06695556640625, 0.039794921875, 0.14654541015625, 0.2532958984375, 0.36004638671875, 0.466796875, 0.57354736328125, 0.6802978515625, 0.78704833984375, 0.893798828125, 1.00054931640625, 1.1072998046875, 1.21405029296875, 1.32080078125, 1.42755126953125, 1.5343017578125, 1.64105224609375, 1.747802734375, 1.85455322265625, 1.9613037109375, 2.06805419921875, 2.1748046875, 2.28155517578125, 2.3883056640625, 2.49505615234375, 2.601806640625, 2.70855712890625, 2.8153076171875, 2.92205810546875, 3.02880859375, 3.13555908203125, 3.2423095703125, 3.34906005859375, 3.455810546875, 3.56256103515625, 3.6693115234375, 3.77606201171875, 3.8828125]}, "gradients/decoder.transformer.h.21.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 3.0, 0.0, 1.0, 2.0, 4.0, 4.0, 3.0, 11.0, 10.0, 17.0, 20.0, 29.0, 35.0, 72.0, 98.0, 138.0, 251.0, 389.0, 681.0, 1109.0, 1939.0, 3692.0, 7682.0, 18284.0, 57836.0, 286351.0, 538942.0, 86114.0, 24859.0, 9755.0, 4469.0, 2402.0, 1355.0, 770.0, 481.0, 276.0, 155.0, 99.0, 66.0, 55.0, 45.0, 17.0, 14.0, 10.0, 4.0, 8.0, 4.0, 2.0, 3.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 1.0], "bins": [-3.861328125, -3.7412109375, -3.62109375, -3.5009765625, -3.380859375, -3.2607421875, -3.140625, -3.0205078125, -2.900390625, -2.7802734375, -2.66015625, -2.5400390625, -2.419921875, -2.2998046875, -2.1796875, -2.0595703125, -1.939453125, -1.8193359375, -1.69921875, -1.5791015625, -1.458984375, -1.3388671875, -1.21875, -1.0986328125, -0.978515625, -0.8583984375, -0.73828125, -0.6181640625, -0.498046875, -0.3779296875, -0.2578125, -0.1376953125, -0.017578125, 0.1025390625, 0.22265625, 0.3427734375, 0.462890625, 0.5830078125, 0.703125, 0.8232421875, 0.943359375, 1.0634765625, 1.18359375, 1.3037109375, 1.423828125, 1.5439453125, 1.6640625, 1.7841796875, 1.904296875, 2.0244140625, 2.14453125, 2.2646484375, 2.384765625, 2.5048828125, 2.625, 2.7451171875, 2.865234375, 2.9853515625, 3.10546875, 3.2255859375, 3.345703125, 3.4658203125, 3.5859375, 3.7060546875, 3.826171875]}, "gradients/decoder.transformer.h.21.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 3.0, 2.0, 4.0, 4.0, 5.0, 10.0, 5.0, 7.0, 7.0, 15.0, 17.0, 18.0, 15.0, 18.0, 22.0, 24.0, 21.0, 45.0, 35.0, 45.0, 48.0, 52.0, 68.0, 117.0, 1862.0, 120.0, 67.0, 47.0, 43.0, 29.0, 44.0, 34.0, 26.0, 22.0, 24.0, 20.0, 23.0, 15.0, 8.0, 10.0, 11.0, 10.0, 6.0, 9.0, 4.0, 4.0, 6.0, 1.0, 3.0, 4.0, 1.0, 1.0, 3.0, 1.0, 1.0], "bins": [-12.2109375, -11.845947265625, -11.48095703125, -11.115966796875, -10.7509765625, -10.385986328125, -10.02099609375, -9.656005859375, -9.291015625, -8.926025390625, -8.56103515625, -8.196044921875, -7.8310546875, -7.466064453125, -7.10107421875, -6.736083984375, -6.37109375, -6.006103515625, -5.64111328125, -5.276123046875, -4.9111328125, -4.546142578125, -4.18115234375, -3.816162109375, -3.451171875, -3.086181640625, -2.72119140625, -2.356201171875, -1.9912109375, -1.626220703125, -1.26123046875, -0.896240234375, -0.53125, -0.166259765625, 0.19873046875, 0.563720703125, 0.9287109375, 1.293701171875, 1.65869140625, 2.023681640625, 2.388671875, 2.753662109375, 3.11865234375, 3.483642578125, 3.8486328125, 4.213623046875, 4.57861328125, 4.943603515625, 5.30859375, 5.673583984375, 6.03857421875, 6.403564453125, 6.7685546875, 7.133544921875, 7.49853515625, 7.863525390625, 8.228515625, 8.593505859375, 8.95849609375, 9.323486328125, 9.6884765625, 10.053466796875, 10.41845703125, 10.783447265625, 11.1484375]}, "gradients/decoder.transformer.h.21.attn.c_attn.weight": {"_type": "histogram", "values": [2.0, 1.0, 2.0, 1.0, 2.0, 5.0, 1.0, 4.0, 4.0, 3.0, 5.0, 10.0, 8.0, 7.0, 11.0, 11.0, 10.0, 16.0, 21.0, 17.0, 27.0, 22.0, 34.0, 55.0, 72.0, 118.0, 218.0, 444.0, 1614.0, 47809.0, 3083544.0, 9836.0, 861.0, 351.0, 169.0, 100.0, 66.0, 46.0, 21.0, 28.0, 21.0, 19.0, 15.0, 15.0, 16.0, 13.0, 9.0, 7.0, 6.0, 10.0, 3.0, 5.0, 2.0, 3.0, 2.0, 1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 0.0, 1.0], "bins": [-23.328125, -22.558837890625, -21.78955078125, -21.020263671875, -20.2509765625, -19.481689453125, -18.71240234375, -17.943115234375, -17.173828125, -16.404541015625, -15.63525390625, -14.865966796875, -14.0966796875, -13.327392578125, -12.55810546875, -11.788818359375, -11.01953125, -10.250244140625, -9.48095703125, -8.711669921875, -7.9423828125, -7.173095703125, -6.40380859375, -5.634521484375, -4.865234375, -4.095947265625, -3.32666015625, -2.557373046875, -1.7880859375, -1.018798828125, -0.24951171875, 0.519775390625, 1.2890625, 2.058349609375, 2.82763671875, 3.596923828125, 4.3662109375, 5.135498046875, 5.90478515625, 6.674072265625, 7.443359375, 8.212646484375, 8.98193359375, 9.751220703125, 10.5205078125, 11.289794921875, 12.05908203125, 12.828369140625, 13.59765625, 14.366943359375, 15.13623046875, 15.905517578125, 16.6748046875, 17.444091796875, 18.21337890625, 18.982666015625, 19.751953125, 20.521240234375, 21.29052734375, 22.059814453125, 22.8291015625, 23.598388671875, 24.36767578125, 25.136962890625, 25.90625]}, "gradients/decoder.transformer.h.21.ln_1.weight": {"_type": "histogram", "values": [1013.0, 8.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.933707237243652, 3.4880237579345703, 12.909754753112793, 22.331485748291016, 31.753215789794922, 41.17494583129883, 50.5966796875, 60.018409729003906, 69.44013977050781, 78.86186981201172, 88.28359985351562, 97.70533752441406, 107.12705993652344, 116.54879760742188, 125.97052764892578, 135.3922576904297, 144.81399536132812, 154.23573303222656, 163.65745544433594, 173.07919311523438, 182.50091552734375, 191.9226531982422, 201.34439086914062, 210.76611328125, 220.18783569335938, 229.6095733642578, 239.0312957763672, 248.45303344726562, 257.874755859375, 267.2964782714844, 276.7182312011719, 286.13995361328125, 295.5616760253906, 304.9833984375, 314.4051513671875, 323.8268737792969, 333.24859619140625, 342.6703186035156, 352.0920715332031, 361.5137939453125, 370.9355163574219, 380.35723876953125, 389.77899169921875, 399.2007141113281, 408.6224365234375, 418.0441589355469, 427.4659118652344, 436.88763427734375, 446.30938720703125, 455.7311096191406, 465.1528625488281, 474.5745849609375, 483.9963073730469, 493.41802978515625, 502.83978271484375, 512.261474609375, 521.6832275390625, 531.10498046875, 540.5266723632812, 549.9484252929688, 559.3701782226562, 568.7918701171875, 578.213623046875, 587.6353759765625, 597.0570678710938]}, "gradients/decoder.transformer.h.21.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 1.0, 4.0, 1.0, 2.0, 5.0, 3.0, 5.0, 6.0, 9.0, 12.0, 9.0, 14.0, 13.0, 25.0, 13.0, 19.0, 33.0, 28.0, 33.0, 32.0, 39.0, 54.0, 50.0, 37.0, 43.0, 48.0, 39.0, 42.0, 31.0, 39.0, 34.0, 26.0, 33.0, 30.0, 28.0, 23.0, 29.0, 23.0, 18.0, 19.0, 11.0, 10.0, 9.0, 10.0, 8.0, 6.0, 5.0, 5.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-34.88990020751953, -33.69934844970703, -32.50879669189453, -31.31824493408203, -30.12769317626953, -28.93714141845703, -27.74658966064453, -26.55603790283203, -25.36548614501953, -24.17493438720703, -22.98438262939453, -21.79383087158203, -20.60327911376953, -19.41272735595703, -18.22217559814453, -17.03162384033203, -15.841072082519531, -14.650520324707031, -13.459968566894531, -12.269416809082031, -11.078865051269531, -9.888313293457031, -8.697761535644531, -7.507209777832031, -6.316658020019531, -5.126106262207031, -3.9355545043945312, -2.7450027465820312, -1.5544509887695312, -0.36389923095703125, 0.8266525268554688, 2.0172042846679688, 3.2077598571777344, 4.398311614990234, 5.588863372802734, 6.779415130615234, 7.969966888427734, 9.160518646240234, 10.351070404052734, 11.541622161865234, 12.732173919677734, 13.922725677490234, 15.113277435302734, 16.303829193115234, 17.494380950927734, 18.684932708740234, 19.875484466552734, 21.066036224365234, 22.256587982177734, 23.447139739990234, 24.637691497802734, 25.828243255615234, 27.018795013427734, 28.209346771240234, 29.399898529052734, 30.590450286865234, 31.781002044677734, 32.971553802490234, 34.162105560302734, 35.352657318115234, 36.543209075927734, 37.733760833740234, 38.924312591552734, 40.114864349365234, 41.305416107177734]}, "gradients/decoder.transformer.h.20.mlp.c_proj.bias": {"_type": "histogram", "values": [4.0, 3.0, 0.0, 2.0, 5.0, 2.0, 3.0, 4.0, 5.0, 9.0, 7.0, 13.0, 7.0, 12.0, 14.0, 17.0, 17.0, 27.0, 25.0, 29.0, 34.0, 39.0, 39.0, 37.0, 46.0, 39.0, 40.0, 54.0, 45.0, 38.0, 43.0, 51.0, 37.0, 43.0, 27.0, 25.0, 32.0, 32.0, 22.0, 13.0, 15.0, 9.0, 9.0, 8.0, 8.0, 8.0, 6.0, 5.0, 2.0, 3.0, 1.0, 0.0, 2.0, 4.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-2.921875, -2.81561279296875, -2.7093505859375, -2.60308837890625, -2.496826171875, -2.39056396484375, -2.2843017578125, -2.17803955078125, -2.07177734375, -1.96551513671875, -1.8592529296875, -1.75299072265625, -1.646728515625, -1.54046630859375, -1.4342041015625, -1.32794189453125, -1.2216796875, -1.11541748046875, -1.0091552734375, -0.90289306640625, -0.796630859375, -0.69036865234375, -0.5841064453125, -0.47784423828125, -0.37158203125, -0.26531982421875, -0.1590576171875, -0.05279541015625, 0.053466796875, 0.15972900390625, 0.2659912109375, 0.37225341796875, 0.478515625, 0.58477783203125, 0.6910400390625, 0.79730224609375, 0.903564453125, 1.00982666015625, 1.1160888671875, 1.22235107421875, 1.32861328125, 1.43487548828125, 1.5411376953125, 1.64739990234375, 1.753662109375, 1.85992431640625, 1.9661865234375, 2.07244873046875, 2.1787109375, 2.28497314453125, 2.3912353515625, 2.49749755859375, 2.603759765625, 2.71002197265625, 2.8162841796875, 2.92254638671875, 3.02880859375, 3.13507080078125, 3.2413330078125, 3.34759521484375, 3.453857421875, 3.56011962890625, 3.6663818359375, 3.77264404296875, 3.87890625]}, "gradients/decoder.transformer.h.20.mlp.c_proj.weight": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 3.0, 5.0, 2.0, 5.0, 3.0, 6.0, 5.0, 7.0, 8.0, 12.0, 10.0, 15.0, 26.0, 34.0, 32.0, 60.0, 93.0, 154.0, 275.0, 405.0, 788.0, 1532.0, 3836.0, 13903.0, 100725.0, 3493802.0, 535761.0, 31248.0, 6567.0, 2379.0, 1091.0, 554.0, 311.0, 222.0, 118.0, 75.0, 64.0, 37.0, 29.0, 17.0, 15.0, 9.0, 14.0, 6.0, 9.0, 8.0, 4.0, 6.0, 1.0, 0.0, 4.0, 1.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-16.125, -15.56396484375, -15.0029296875, -14.44189453125, -13.880859375, -13.31982421875, -12.7587890625, -12.19775390625, -11.63671875, -11.07568359375, -10.5146484375, -9.95361328125, -9.392578125, -8.83154296875, -8.2705078125, -7.70947265625, -7.1484375, -6.58740234375, -6.0263671875, -5.46533203125, -4.904296875, -4.34326171875, -3.7822265625, -3.22119140625, -2.66015625, -2.09912109375, -1.5380859375, -0.97705078125, -0.416015625, 0.14501953125, 0.7060546875, 1.26708984375, 1.828125, 2.38916015625, 2.9501953125, 3.51123046875, 4.072265625, 4.63330078125, 5.1943359375, 5.75537109375, 6.31640625, 6.87744140625, 7.4384765625, 7.99951171875, 8.560546875, 9.12158203125, 9.6826171875, 10.24365234375, 10.8046875, 11.36572265625, 11.9267578125, 12.48779296875, 13.048828125, 13.60986328125, 14.1708984375, 14.73193359375, 15.29296875, 15.85400390625, 16.4150390625, 16.97607421875, 17.537109375, 18.09814453125, 18.6591796875, 19.22021484375, 19.78125]}, "gradients/decoder.transformer.h.20.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 2.0, 2.0, 3.0, 3.0, 6.0, 7.0, 9.0, 14.0, 11.0, 17.0, 25.0, 43.0, 64.0, 136.0, 207.0, 381.0, 762.0, 990.0, 642.0, 312.0, 158.0, 100.0, 64.0, 36.0, 19.0, 22.0, 14.0, 9.0, 5.0, 4.0, 4.0, 3.0, 6.0, 1.0, 2.0, 2.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 3.0], "bins": [-17.546875, -17.0889892578125, -16.631103515625, -16.1732177734375, -15.71533203125, -15.2574462890625, -14.799560546875, -14.3416748046875, -13.8837890625, -13.4259033203125, -12.968017578125, -12.5101318359375, -12.05224609375, -11.5943603515625, -11.136474609375, -10.6785888671875, -10.220703125, -9.7628173828125, -9.304931640625, -8.8470458984375, -8.38916015625, -7.9312744140625, -7.473388671875, -7.0155029296875, -6.5576171875, -6.0997314453125, -5.641845703125, -5.1839599609375, -4.72607421875, -4.2681884765625, -3.810302734375, -3.3524169921875, -2.89453125, -2.4366455078125, -1.978759765625, -1.5208740234375, -1.06298828125, -0.6051025390625, -0.147216796875, 0.3106689453125, 0.7685546875, 1.2264404296875, 1.684326171875, 2.1422119140625, 2.60009765625, 3.0579833984375, 3.515869140625, 3.9737548828125, 4.431640625, 4.8895263671875, 5.347412109375, 5.8052978515625, 6.26318359375, 6.7210693359375, 7.178955078125, 7.6368408203125, 8.0947265625, 8.5526123046875, 9.010498046875, 9.4683837890625, 9.92626953125, 10.3841552734375, 10.842041015625, 11.2999267578125, 11.7578125]}, "gradients/decoder.transformer.h.20.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 2.0, 1.0, 2.0, 3.0, 2.0, 3.0, 8.0, 2.0, 7.0, 8.0, 16.0, 19.0, 26.0, 42.0, 81.0, 177.0, 371.0, 806.0, 2442.0, 12203.0, 170621.0, 3822632.0, 168625.0, 12245.0, 2436.0, 832.0, 296.0, 153.0, 94.0, 48.0, 32.0, 23.0, 11.0, 5.0, 5.0, 6.0, 1.0, 3.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-37.25, -36.0673828125, -34.884765625, -33.7021484375, -32.51953125, -31.3369140625, -30.154296875, -28.9716796875, -27.7890625, -26.6064453125, -25.423828125, -24.2412109375, -23.05859375, -21.8759765625, -20.693359375, -19.5107421875, -18.328125, -17.1455078125, -15.962890625, -14.7802734375, -13.59765625, -12.4150390625, -11.232421875, -10.0498046875, -8.8671875, -7.6845703125, -6.501953125, -5.3193359375, -4.13671875, -2.9541015625, -1.771484375, -0.5888671875, 0.59375, 1.7763671875, 2.958984375, 4.1416015625, 5.32421875, 6.5068359375, 7.689453125, 8.8720703125, 10.0546875, 11.2373046875, 12.419921875, 13.6025390625, 14.78515625, 15.9677734375, 17.150390625, 18.3330078125, 19.515625, 20.6982421875, 21.880859375, 23.0634765625, 24.24609375, 25.4287109375, 26.611328125, 27.7939453125, 28.9765625, 30.1591796875, 31.341796875, 32.5244140625, 33.70703125, 34.8896484375, 36.072265625, 37.2548828125, 38.4375]}, "gradients/decoder.transformer.h.20.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 3.0, 3.0, 25.0, 98.0, 208.0, 324.0, 226.0, 90.0, 33.0, 4.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-170.78118896484375, -165.8493194580078, -160.91744995117188, -155.98556518554688, -151.05369567871094, -146.121826171875, -141.18994140625, -136.25807189941406, -131.32620239257812, -126.39433288574219, -121.46245574951172, -116.53057861328125, -111.59870910644531, -106.66683959960938, -101.7349624633789, -96.80308532714844, -91.8712158203125, -86.93934631347656, -82.0074691772461, -77.07559204101562, -72.14372253417969, -67.21185302734375, -62.27997589111328, -57.34810256958008, -52.416229248046875, -47.48435592651367, -42.55248260498047, -37.620609283447266, -32.68873596191406, -27.75686264038086, -22.824989318847656, -17.893115997314453, -12.961227416992188, -8.029354095458984, -3.0974807739257812, 1.8343925476074219, 6.766265869140625, 11.698139190673828, 16.63001251220703, 21.561885833740234, 26.493759155273438, 31.42563247680664, 36.357505798339844, 41.28937911987305, 46.22125244140625, 51.15312576293945, 56.084999084472656, 61.01687240600586, 65.94874572753906, 70.880615234375, 75.81249237060547, 80.74436950683594, 85.67623901367188, 90.60810852050781, 95.53998565673828, 100.47186279296875, 105.40373229980469, 110.33560180664062, 115.2674789428711, 120.19935607910156, 125.1312255859375, 130.06309509277344, 134.99496459960938, 139.92684936523438, 144.8587188720703]}, "gradients/decoder.transformer.h.20.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 2.0, 2.0, 5.0, 2.0, 8.0, 4.0, 3.0, 8.0, 7.0, 15.0, 22.0, 15.0, 14.0, 11.0, 15.0, 23.0, 15.0, 34.0, 26.0, 33.0, 31.0, 43.0, 39.0, 40.0, 37.0, 47.0, 40.0, 43.0, 31.0, 40.0, 34.0, 30.0, 40.0, 36.0, 34.0, 29.0, 26.0, 25.0, 18.0, 15.0, 12.0, 11.0, 10.0, 5.0, 12.0, 5.0, 6.0, 6.0, 1.0, 1.0, 1.0, 2.0, 3.0, 0.0, 1.0, 0.0, 0.0, 3.0, 0.0, 1.0], "bins": [-33.880401611328125, -32.769962310791016, -31.65952491760254, -30.549087524414062, -29.438648223876953, -28.328208923339844, -27.217771530151367, -26.10733413696289, -24.99689483642578, -23.886455535888672, -22.776018142700195, -21.66558074951172, -20.55514144897461, -19.4447021484375, -18.334264755249023, -17.223827362060547, -16.113388061523438, -15.002949714660645, -13.892511367797852, -12.782073020935059, -11.671634674072266, -10.561196327209473, -9.45075798034668, -8.340319633483887, -7.229881286621094, -6.119442939758301, -5.009004592895508, -3.898566246032715, -2.788127899169922, -1.677689552307129, -0.5672512054443359, 0.543187141418457, 1.65362548828125, 2.764063835144043, 3.874502182006836, 4.984940528869629, 6.095378875732422, 7.205817222595215, 8.316255569458008, 9.4266939163208, 10.537132263183594, 11.647570610046387, 12.75800895690918, 13.868447303771973, 14.978885650634766, 16.089324951171875, 17.19976234436035, 18.310199737548828, 19.420639038085938, 20.531078338623047, 21.641515731811523, 22.751953125, 23.86239242553711, 24.97283172607422, 26.083269119262695, 27.193706512451172, 28.30414581298828, 29.41458511352539, 30.525022506713867, 31.635459899902344, 32.74589920043945, 33.85633850097656, 34.966773986816406, 36.077213287353516, 37.187652587890625]}, "gradients/decoder.transformer.h.20.crossattention.c_proj.bias": {"_type": "histogram", "values": [2.0, 3.0, 3.0, 2.0, 1.0, 2.0, 2.0, 6.0, 3.0, 2.0, 2.0, 5.0, 10.0, 12.0, 5.0, 10.0, 18.0, 18.0, 20.0, 23.0, 21.0, 36.0, 27.0, 34.0, 34.0, 41.0, 37.0, 32.0, 38.0, 42.0, 43.0, 43.0, 45.0, 38.0, 46.0, 43.0, 36.0, 27.0, 27.0, 26.0, 24.0, 20.0, 26.0, 14.0, 10.0, 9.0, 8.0, 9.0, 3.0, 9.0, 4.0, 3.0, 5.0, 3.0, 2.0, 3.0, 1.0, 0.0, 3.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-3.236328125, -3.12933349609375, -3.0223388671875, -2.91534423828125, -2.808349609375, -2.70135498046875, -2.5943603515625, -2.48736572265625, -2.38037109375, -2.27337646484375, -2.1663818359375, -2.05938720703125, -1.952392578125, -1.84539794921875, -1.7384033203125, -1.63140869140625, -1.5244140625, -1.41741943359375, -1.3104248046875, -1.20343017578125, -1.096435546875, -0.98944091796875, -0.8824462890625, -0.77545166015625, -0.66845703125, -0.56146240234375, -0.4544677734375, -0.34747314453125, -0.240478515625, -0.13348388671875, -0.0264892578125, 0.08050537109375, 0.1875, 0.29449462890625, 0.4014892578125, 0.50848388671875, 0.615478515625, 0.72247314453125, 0.8294677734375, 0.93646240234375, 1.04345703125, 1.15045166015625, 1.2574462890625, 1.36444091796875, 1.471435546875, 1.57843017578125, 1.6854248046875, 1.79241943359375, 1.8994140625, 2.00640869140625, 2.1134033203125, 2.22039794921875, 2.327392578125, 2.43438720703125, 2.5413818359375, 2.64837646484375, 2.75537109375, 2.86236572265625, 2.9693603515625, 3.07635498046875, 3.183349609375, 3.29034423828125, 3.3973388671875, 3.50433349609375, 3.611328125]}, "gradients/decoder.transformer.h.20.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 2.0, 0.0, 4.0, 2.0, 1.0, 7.0, 4.0, 8.0, 18.0, 26.0, 28.0, 60.0, 70.0, 143.0, 147.0, 225.0, 339.0, 487.0, 697.0, 1054.0, 1479.0, 2219.0, 3280.0, 5115.0, 7767.0, 12334.0, 20096.0, 33138.0, 58764.0, 114873.0, 342733.0, 222949.0, 93200.0, 49672.0, 28782.0, 17418.0, 10769.0, 6828.0, 4550.0, 2941.0, 2030.0, 1317.0, 928.0, 639.0, 450.0, 308.0, 204.0, 161.0, 111.0, 61.0, 43.0, 20.0, 19.0, 15.0, 11.0, 10.0, 5.0, 5.0, 3.0, 2.0, 3.0], "bins": [-0.15234375, -0.1478443145751953, -0.14334487915039062, -0.13884544372558594, -0.13434600830078125, -0.12984657287597656, -0.12534713745117188, -0.12084770202636719, -0.1163482666015625, -0.11184883117675781, -0.10734939575195312, -0.10284996032714844, -0.09835052490234375, -0.09385108947753906, -0.08935165405273438, -0.08485221862792969, -0.080352783203125, -0.07585334777832031, -0.07135391235351562, -0.06685447692871094, -0.06235504150390625, -0.05785560607910156, -0.053356170654296875, -0.04885673522949219, -0.0443572998046875, -0.03985786437988281, -0.035358428955078125, -0.030858993530273438, -0.02635955810546875, -0.021860122680664062, -0.017360687255859375, -0.012861251831054688, -0.00836181640625, -0.0038623809814453125, 0.000637054443359375, 0.0051364898681640625, 0.00963592529296875, 0.014135360717773438, 0.018634796142578125, 0.023134231567382812, 0.0276336669921875, 0.03213310241699219, 0.036632537841796875, 0.04113197326660156, 0.04563140869140625, 0.05013084411621094, 0.054630279541015625, 0.05912971496582031, 0.063629150390625, 0.06812858581542969, 0.07262802124023438, 0.07712745666503906, 0.08162689208984375, 0.08612632751464844, 0.09062576293945312, 0.09512519836425781, 0.0996246337890625, 0.10412406921386719, 0.10862350463867188, 0.11312294006347656, 0.11762237548828125, 0.12212181091308594, 0.12662124633789062, 0.1311206817626953, 0.1356201171875]}, "gradients/decoder.transformer.h.20.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 2.0, 3.0, 2.0, 6.0, 9.0, 5.0, 9.0, 9.0, 10.0, 10.0, 19.0, 13.0, 24.0, 20.0, 23.0, 20.0, 27.0, 25.0, 25.0, 36.0, 33.0, 38.0, 37.0, 32.0, 54.0, 37.0, 1055.0, 31.0, 28.0, 33.0, 32.0, 29.0, 29.0, 27.0, 18.0, 31.0, 29.0, 26.0, 27.0, 9.0, 12.0, 16.0, 17.0, 10.0, 7.0, 9.0, 6.0, 5.0, 2.0, 6.0, 4.0, 5.0, 3.0, 5.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.26171875, -2.185546875, -2.109375, -2.033203125, -1.95703125, -1.880859375, -1.8046875, -1.728515625, -1.65234375, -1.576171875, -1.5, -1.423828125, -1.34765625, -1.271484375, -1.1953125, -1.119140625, -1.04296875, -0.966796875, -0.890625, -0.814453125, -0.73828125, -0.662109375, -0.5859375, -0.509765625, -0.43359375, -0.357421875, -0.28125, -0.205078125, -0.12890625, -0.052734375, 0.0234375, 0.099609375, 0.17578125, 0.251953125, 0.328125, 0.404296875, 0.48046875, 0.556640625, 0.6328125, 0.708984375, 0.78515625, 0.861328125, 0.9375, 1.013671875, 1.08984375, 1.166015625, 1.2421875, 1.318359375, 1.39453125, 1.470703125, 1.546875, 1.623046875, 1.69921875, 1.775390625, 1.8515625, 1.927734375, 2.00390625, 2.080078125, 2.15625, 2.232421875, 2.30859375, 2.384765625, 2.4609375, 2.537109375, 2.61328125]}, "gradients/decoder.transformer.h.20.crossattention.c_attn.weight": {"_type": "histogram", "values": [3.0, 1.0, 4.0, 3.0, 13.0, 14.0, 27.0, 47.0, 73.0, 106.0, 139.0, 223.0, 308.0, 412.0, 545.0, 732.0, 1008.0, 1418.0, 1945.0, 2665.0, 3711.0, 5237.0, 7313.0, 10318.0, 14292.0, 20530.0, 29471.0, 43413.0, 64880.0, 102390.0, 1267911.0, 208857.0, 101730.0, 64478.0, 43414.0, 29443.0, 20207.0, 14206.0, 10058.0, 7102.0, 5154.0, 3749.0, 2588.0, 1918.0, 1435.0, 1079.0, 731.0, 550.0, 416.0, 276.0, 207.0, 132.0, 97.0, 65.0, 42.0, 30.0, 15.0, 7.0, 4.0, 5.0, 2.0, 1.0, 1.0, 1.0], "bins": [-0.0592041015625, -0.057292938232421875, -0.05538177490234375, -0.053470611572265625, -0.0515594482421875, -0.049648284912109375, -0.04773712158203125, -0.045825958251953125, -0.043914794921875, -0.042003631591796875, -0.04009246826171875, -0.038181304931640625, -0.0362701416015625, -0.034358978271484375, -0.03244781494140625, -0.030536651611328125, -0.02862548828125, -0.026714324951171875, -0.02480316162109375, -0.022891998291015625, -0.0209808349609375, -0.019069671630859375, -0.01715850830078125, -0.015247344970703125, -0.013336181640625, -0.011425018310546875, -0.00951385498046875, -0.007602691650390625, -0.0056915283203125, -0.003780364990234375, -0.00186920166015625, 4.1961669921875e-05, 0.001953125, 0.003864288330078125, 0.00577545166015625, 0.007686614990234375, 0.0095977783203125, 0.011508941650390625, 0.01342010498046875, 0.015331268310546875, 0.017242431640625, 0.019153594970703125, 0.02106475830078125, 0.022975921630859375, 0.0248870849609375, 0.026798248291015625, 0.02870941162109375, 0.030620574951171875, 0.03253173828125, 0.034442901611328125, 0.03635406494140625, 0.038265228271484375, 0.0401763916015625, 0.042087554931640625, 0.04399871826171875, 0.045909881591796875, 0.047821044921875, 0.049732208251953125, 0.05164337158203125, 0.053554534912109375, 0.0554656982421875, 0.057376861572265625, 0.05928802490234375, 0.061199188232421875, 0.0631103515625]}, "gradients/decoder.transformer.h.20.crossattention.q_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 4.0, 3.0, 0.0, 2.0, 1.0, 5.0, 2.0, 7.0, 11.0, 8.0, 10.0, 5.0, 21.0, 15.0, 23.0, 37.0, 18.0, 40.0, 33.0, 92.0, 37.0, 108.0, 94.0, 84.0, 79.0, 31.0, 54.0, 25.0, 32.0, 14.0, 24.0, 18.0, 9.0, 19.0, 4.0, 13.0, 5.0, 10.0, 2.0, 2.0, 2.0, 0.0, 3.0, 0.0, 1.0, 1.0, 3.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0], "bins": [-2.9206275939941406e-06, -2.8274953365325928e-06, -2.734363079071045e-06, -2.641230821609497e-06, -2.5480985641479492e-06, -2.4549663066864014e-06, -2.3618340492248535e-06, -2.2687017917633057e-06, -2.175569534301758e-06, -2.08243727684021e-06, -1.989305019378662e-06, -1.8961727619171143e-06, -1.8030405044555664e-06, -1.7099082469940186e-06, -1.6167759895324707e-06, -1.5236437320709229e-06, -1.430511474609375e-06, -1.3373792171478271e-06, -1.2442469596862793e-06, -1.1511147022247314e-06, -1.0579824447631836e-06, -9.648501873016357e-07, -8.717179298400879e-07, -7.7858567237854e-07, -6.854534149169922e-07, -5.923211574554443e-07, -4.991888999938965e-07, -4.0605664253234863e-07, -3.129243850708008e-07, -2.1979212760925293e-07, -1.2665987014770508e-07, -3.3527612686157227e-08, 5.960464477539063e-08, 1.5273690223693848e-07, 2.4586915969848633e-07, 3.390014171600342e-07, 4.3213367462158203e-07, 5.252659320831299e-07, 6.183981895446777e-07, 7.115304470062256e-07, 8.046627044677734e-07, 8.977949619293213e-07, 9.909272193908691e-07, 1.084059476852417e-06, 1.1771917343139648e-06, 1.2703239917755127e-06, 1.3634562492370605e-06, 1.4565885066986084e-06, 1.5497207641601562e-06, 1.642853021621704e-06, 1.735985279083252e-06, 1.8291175365447998e-06, 1.9222497940063477e-06, 2.0153820514678955e-06, 2.1085143089294434e-06, 2.201646566390991e-06, 2.294778823852539e-06, 2.387911081314087e-06, 2.4810433387756348e-06, 2.5741755962371826e-06, 2.6673078536987305e-06, 2.7604401111602783e-06, 2.853572368621826e-06, 2.946704626083374e-06, 3.039836883544922e-06]}, "gradients/decoder.transformer.h.20.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 3.0, 2.0, 1.0, 1.0, 2.0, 4.0, 6.0, 1.0, 3.0, 7.0, 15.0, 12.0, 15.0, 22.0, 38.0, 46.0, 65.0, 72.0, 83.0, 139.0, 208.0, 333.0, 1714.0, 31121.0, 954588.0, 56486.0, 2407.0, 472.0, 188.0, 128.0, 86.0, 70.0, 57.0, 32.0, 23.0, 22.0, 14.0, 16.0, 22.0, 16.0, 6.0, 5.0, 4.0, 4.0, 1.0, 5.0, 0.0, 3.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-5.233287811279297e-05, -5.072634667158127e-05, -4.911981523036957e-05, -4.751328378915787e-05, -4.590675234794617e-05, -4.4300220906734467e-05, -4.2693689465522766e-05, -4.1087158024311066e-05, -3.9480626583099365e-05, -3.7874095141887665e-05, -3.6267563700675964e-05, -3.4661032259464264e-05, -3.3054500818252563e-05, -3.144796937704086e-05, -2.9841437935829163e-05, -2.8234906494617462e-05, -2.6628375053405762e-05, -2.502184361219406e-05, -2.341531217098236e-05, -2.180878072977066e-05, -2.020224928855896e-05, -1.859571784734726e-05, -1.698918640613556e-05, -1.538265496492386e-05, -1.3776123523712158e-05, -1.2169592082500458e-05, -1.0563060641288757e-05, -8.956529200077057e-06, -7.3499977588653564e-06, -5.743466317653656e-06, -4.1369348764419556e-06, -2.530403435230255e-06, -9.238719940185547e-07, 6.826594471931458e-07, 2.289190888404846e-06, 3.895722329616547e-06, 5.502253770828247e-06, 7.1087852120399475e-06, 8.715316653251648e-06, 1.0321848094463348e-05, 1.1928379535675049e-05, 1.353491097688675e-05, 1.514144241809845e-05, 1.674797385931015e-05, 1.835450530052185e-05, 1.996103674173355e-05, 2.156756818294525e-05, 2.3174099624156952e-05, 2.4780631065368652e-05, 2.6387162506580353e-05, 2.7993693947792053e-05, 2.9600225389003754e-05, 3.1206756830215454e-05, 3.2813288271427155e-05, 3.4419819712638855e-05, 3.6026351153850555e-05, 3.7632882595062256e-05, 3.9239414036273956e-05, 4.084594547748566e-05, 4.245247691869736e-05, 4.405900835990906e-05, 4.566553980112076e-05, 4.727207124233246e-05, 4.887860268354416e-05, 5.048513412475586e-05]}, "gradients/decoder.transformer.h.20.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 3.0, 10.0, 35.0, 187.0, 440.0, 258.0, 67.0, 18.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.4266997823142447e-05, -1.3954485439171549e-05, -1.3641973055200651e-05, -1.3329459761735052e-05, -1.3016947377764154e-05, -1.2704434993793257e-05, -1.2391922609822359e-05, -1.2079410225851461e-05, -1.1766896932385862e-05, -1.1454384548414964e-05, -1.1141872164444067e-05, -1.0829358870978467e-05, -1.051684648700757e-05, -1.0204334103036672e-05, -9.891821719065774e-06, -9.579309335094877e-06, -9.266796951123979e-06, -8.954284567153081e-06, -8.641772183182184e-06, -8.329258889716584e-06, -8.016746505745687e-06, -7.704234121774789e-06, -7.391721737803891e-06, -7.079209353832994e-06, -6.766696515114745e-06, -6.4541841311438475e-06, -6.141671292425599e-06, -5.829158908454701e-06, -5.516646524483804e-06, -5.204133685765555e-06, -4.8916213017946575e-06, -4.579108463076409e-06, -4.26659562435816e-06, -3.954083240387263e-06, -3.6415704016690142e-06, -3.3290580176981166e-06, -3.0165454063535435e-06, -2.7040327950089704e-06, -2.3915204110380728e-06, -2.0790077996934997e-06, -1.7664951883489266e-06, -1.4539825770043535e-06, -1.1414700793466181e-06, -8.289575248454639e-07, -5.164449703443097e-07, -2.039323589997366e-07, 1.0858013865799876e-07, 4.210926363157341e-07, 7.336052476603072e-07, 1.0461178590048803e-06, 1.3586303566626157e-06, 1.671142854320351e-06, 1.983655465664924e-06, 2.296168077009497e-06, 2.608680460980395e-06, 2.921193072324968e-06, 3.233705683669541e-06, 3.546218295014114e-06, 3.858730906358687e-06, 4.171243290329585e-06, 4.483756129047833e-06, 4.796268513018731e-06, 5.108780896989629e-06, 5.421293280960526e-06, 5.733806119678775e-06]}, "gradients/decoder.transformer.h.20.ln_cross_attn.bias": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 0.0, 2.0, 3.0, 0.0, 7.0, 5.0, 6.0, 0.0, 19.0, 8.0, 0.0, 22.0, 23.0, 0.0, 29.0, 35.0, 25.0, 0.0, 35.0, 53.0, 0.0, 48.0, 48.0, 0.0, 53.0, 63.0, 43.0, 0.0, 60.0, 53.0, 0.0, 51.0, 47.0, 47.0, 0.0, 33.0, 26.0, 0.0, 37.0, 36.0, 0.0, 30.0, 9.0, 11.0, 0.0, 19.0, 8.0, 0.0, 6.0, 3.0, 0.0, 6.0, 2.0, 3.0, 0.0, 3.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.2516975402832031e-06, -1.2097880244255066e-06, -1.16787850856781e-06, -1.1259689927101135e-06, -1.084059476852417e-06, -1.0421499609947205e-06, -1.000240445137024e-06, -9.583309292793274e-07, -9.164214134216309e-07, -8.745118975639343e-07, -8.326023817062378e-07, -7.906928658485413e-07, -7.487833499908447e-07, -7.068738341331482e-07, -6.649643182754517e-07, -6.230548024177551e-07, -5.811452865600586e-07, -5.392357707023621e-07, -4.973262548446655e-07, -4.55416738986969e-07, -4.1350722312927246e-07, -3.7159770727157593e-07, -3.296881914138794e-07, -2.8777867555618286e-07, -2.4586915969848633e-07, -2.039596438407898e-07, -1.6205012798309326e-07, -1.2014061212539673e-07, -7.82310962677002e-08, -3.632158041000366e-08, 5.587935447692871e-09, 4.7497451305389404e-08, 8.940696716308594e-08, 1.3131648302078247e-07, 1.73225998878479e-07, 2.1513551473617554e-07, 2.5704503059387207e-07, 2.989545464515686e-07, 3.4086406230926514e-07, 3.8277357816696167e-07, 4.246830940246582e-07, 4.6659260988235474e-07, 5.085021257400513e-07, 5.504116415977478e-07, 5.923211574554443e-07, 6.342306733131409e-07, 6.761401891708374e-07, 7.180497050285339e-07, 7.599592208862305e-07, 8.01868736743927e-07, 8.437782526016235e-07, 8.856877684593201e-07, 9.275972843170166e-07, 9.695068001747131e-07, 1.0114163160324097e-06, 1.0533258318901062e-06, 1.0952353477478027e-06, 1.1371448636054993e-06, 1.1790543794631958e-06, 1.2209638953208923e-06, 1.2628734111785889e-06, 1.3047829270362854e-06, 1.346692442893982e-06, 1.3886019587516785e-06, 1.430511474609375e-06]}, "gradients/decoder.transformer.h.20.attn.c_proj.bias": {"_type": "histogram", "values": [2.0, 3.0, 3.0, 2.0, 1.0, 2.0, 2.0, 6.0, 3.0, 2.0, 2.0, 5.0, 10.0, 12.0, 5.0, 10.0, 18.0, 18.0, 20.0, 23.0, 21.0, 36.0, 27.0, 34.0, 34.0, 41.0, 37.0, 32.0, 38.0, 42.0, 43.0, 43.0, 45.0, 38.0, 46.0, 43.0, 36.0, 27.0, 27.0, 26.0, 24.0, 20.0, 26.0, 14.0, 10.0, 9.0, 8.0, 9.0, 3.0, 9.0, 4.0, 3.0, 5.0, 3.0, 2.0, 3.0, 1.0, 0.0, 3.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-3.236328125, -3.12933349609375, -3.0223388671875, -2.91534423828125, -2.808349609375, -2.70135498046875, -2.5943603515625, -2.48736572265625, -2.38037109375, -2.27337646484375, -2.1663818359375, -2.05938720703125, -1.952392578125, -1.84539794921875, -1.7384033203125, -1.63140869140625, -1.5244140625, -1.41741943359375, -1.3104248046875, -1.20343017578125, -1.096435546875, -0.98944091796875, -0.8824462890625, -0.77545166015625, -0.66845703125, -0.56146240234375, -0.4544677734375, -0.34747314453125, -0.240478515625, -0.13348388671875, -0.0264892578125, 0.08050537109375, 0.1875, 0.29449462890625, 0.4014892578125, 0.50848388671875, 0.615478515625, 0.72247314453125, 0.8294677734375, 0.93646240234375, 1.04345703125, 1.15045166015625, 1.2574462890625, 1.36444091796875, 1.471435546875, 1.57843017578125, 1.6854248046875, 1.79241943359375, 1.8994140625, 2.00640869140625, 2.1134033203125, 2.22039794921875, 2.327392578125, 2.43438720703125, 2.5413818359375, 2.64837646484375, 2.75537109375, 2.86236572265625, 2.9693603515625, 3.07635498046875, 3.183349609375, 3.29034423828125, 3.3973388671875, 3.50433349609375, 3.611328125]}, "gradients/decoder.transformer.h.20.attn.c_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 3.0, 4.0, 5.0, 9.0, 13.0, 6.0, 32.0, 32.0, 56.0, 86.0, 96.0, 193.0, 240.0, 399.0, 665.0, 1128.0, 2240.0, 4376.0, 9082.0, 21298.0, 90518.0, 803379.0, 76933.0, 20078.0, 8637.0, 4118.0, 2028.0, 1155.0, 614.0, 404.0, 251.0, 154.0, 104.0, 58.0, 53.0, 44.0, 24.0, 19.0, 16.0, 7.0, 1.0, 3.0, 2.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-7.796875, -7.52294921875, -7.2490234375, -6.97509765625, -6.701171875, -6.42724609375, -6.1533203125, -5.87939453125, -5.60546875, -5.33154296875, -5.0576171875, -4.78369140625, -4.509765625, -4.23583984375, -3.9619140625, -3.68798828125, -3.4140625, -3.14013671875, -2.8662109375, -2.59228515625, -2.318359375, -2.04443359375, -1.7705078125, -1.49658203125, -1.22265625, -0.94873046875, -0.6748046875, -0.40087890625, -0.126953125, 0.14697265625, 0.4208984375, 0.69482421875, 0.96875, 1.24267578125, 1.5166015625, 1.79052734375, 2.064453125, 2.33837890625, 2.6123046875, 2.88623046875, 3.16015625, 3.43408203125, 3.7080078125, 3.98193359375, 4.255859375, 4.52978515625, 4.8037109375, 5.07763671875, 5.3515625, 5.62548828125, 5.8994140625, 6.17333984375, 6.447265625, 6.72119140625, 6.9951171875, 7.26904296875, 7.54296875, 7.81689453125, 8.0908203125, 8.36474609375, 8.638671875, 8.91259765625, 9.1865234375, 9.46044921875, 9.734375]}, "gradients/decoder.transformer.h.20.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 3.0, 0.0, 1.0, 4.0, 1.0, 7.0, 7.0, 7.0, 14.0, 19.0, 20.0, 25.0, 27.0, 36.0, 48.0, 47.0, 53.0, 72.0, 80.0, 104.0, 1889.0, 127.0, 71.0, 64.0, 58.0, 45.0, 50.0, 38.0, 38.0, 22.0, 17.0, 18.0, 10.0, 9.0, 11.0, 5.0, 9.0, 1.0, 4.0, 5.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-17.09375, -16.56884765625, -16.0439453125, -15.51904296875, -14.994140625, -14.46923828125, -13.9443359375, -13.41943359375, -12.89453125, -12.36962890625, -11.8447265625, -11.31982421875, -10.794921875, -10.27001953125, -9.7451171875, -9.22021484375, -8.6953125, -8.17041015625, -7.6455078125, -7.12060546875, -6.595703125, -6.07080078125, -5.5458984375, -5.02099609375, -4.49609375, -3.97119140625, -3.4462890625, -2.92138671875, -2.396484375, -1.87158203125, -1.3466796875, -0.82177734375, -0.296875, 0.22802734375, 0.7529296875, 1.27783203125, 1.802734375, 2.32763671875, 2.8525390625, 3.37744140625, 3.90234375, 4.42724609375, 4.9521484375, 5.47705078125, 6.001953125, 6.52685546875, 7.0517578125, 7.57666015625, 8.1015625, 8.62646484375, 9.1513671875, 9.67626953125, 10.201171875, 10.72607421875, 11.2509765625, 11.77587890625, 12.30078125, 12.82568359375, 13.3505859375, 13.87548828125, 14.400390625, 14.92529296875, 15.4501953125, 15.97509765625, 16.5]}, "gradients/decoder.transformer.h.20.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 3.0, 4.0, 0.0, 5.0, 3.0, 8.0, 12.0, 5.0, 17.0, 11.0, 18.0, 35.0, 47.0, 59.0, 111.0, 201.0, 403.0, 991.0, 3200.0, 15307.0, 141097.0, 2926887.0, 46542.0, 7556.0, 1862.0, 676.0, 271.0, 135.0, 76.0, 44.0, 31.0, 24.0, 18.0, 16.0, 13.0, 6.0, 15.0, 3.0, 4.0, 2.0, 2.0, 0.0, 1.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-22.046875, -21.267578125, -20.48828125, -19.708984375, -18.9296875, -18.150390625, -17.37109375, -16.591796875, -15.8125, -15.033203125, -14.25390625, -13.474609375, -12.6953125, -11.916015625, -11.13671875, -10.357421875, -9.578125, -8.798828125, -8.01953125, -7.240234375, -6.4609375, -5.681640625, -4.90234375, -4.123046875, -3.34375, -2.564453125, -1.78515625, -1.005859375, -0.2265625, 0.552734375, 1.33203125, 2.111328125, 2.890625, 3.669921875, 4.44921875, 5.228515625, 6.0078125, 6.787109375, 7.56640625, 8.345703125, 9.125, 9.904296875, 10.68359375, 11.462890625, 12.2421875, 13.021484375, 13.80078125, 14.580078125, 15.359375, 16.138671875, 16.91796875, 17.697265625, 18.4765625, 19.255859375, 20.03515625, 20.814453125, 21.59375, 22.373046875, 23.15234375, 23.931640625, 24.7109375, 25.490234375, 26.26953125, 27.048828125, 27.828125]}, "gradients/decoder.transformer.h.20.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 188.0, 805.0, 24.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-73.20474243164062, -65.29386901855469, -57.382999420166016, -49.472129821777344, -41.561256408691406, -33.65038299560547, -25.739513397216797, -17.828643798828125, -9.917770385742188, -2.006898880004883, 5.903972625732422, 13.814844131469727, 21.72571563720703, 29.63658905029297, 37.54745864868164, 45.45832824707031, 53.36920166015625, 61.28007507324219, 69.19094848632812, 77.10181427001953, 85.01268768310547, 92.9235610961914, 100.83442687988281, 108.74530029296875, 116.65617370605469, 124.56704711914062, 132.47792053222656, 140.3887939453125, 148.29965209960938, 156.21054077148438, 164.12139892578125, 172.0322723388672, 179.94314575195312, 187.85401916503906, 195.764892578125, 203.67576599121094, 211.58663940429688, 219.49749755859375, 227.4083709716797, 235.31924438476562, 243.23011779785156, 251.1409912109375, 259.0518493652344, 266.9627380371094, 274.87359619140625, 282.78448486328125, 290.6953430175781, 298.606201171875, 306.51708984375, 314.4279479980469, 322.3388366699219, 330.24969482421875, 338.16058349609375, 346.0714416503906, 353.9823303222656, 361.8931884765625, 369.8040771484375, 377.7149353027344, 385.6258239746094, 393.53668212890625, 401.44757080078125, 409.3584289550781, 417.2693176269531, 425.18017578125, 433.0910339355469]}, "gradients/decoder.transformer.h.20.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 2.0, 3.0, 6.0, 4.0, 8.0, 6.0, 4.0, 11.0, 16.0, 21.0, 23.0, 14.0, 21.0, 15.0, 32.0, 32.0, 30.0, 38.0, 40.0, 41.0, 48.0, 43.0, 48.0, 35.0, 40.0, 49.0, 39.0, 48.0, 43.0, 28.0, 38.0, 26.0, 21.0, 15.0, 16.0, 16.0, 8.0, 9.0, 16.0, 6.0, 8.0, 8.0, 11.0, 5.0, 5.0, 4.0, 8.0, 2.0, 1.0, 1.0, 3.0, 1.0, 0.0, 2.0, 1.0], "bins": [-41.92207336425781, -40.62774658203125, -39.33341979980469, -38.039093017578125, -36.74476623535156, -35.450439453125, -34.15611267089844, -32.861785888671875, -31.56745719909668, -30.273130416870117, -28.978803634643555, -27.684476852416992, -26.390148162841797, -25.095821380615234, -23.801494598388672, -22.50716781616211, -21.212841033935547, -19.918514251708984, -18.624187469482422, -17.32986068725586, -16.035533905029297, -14.741206169128418, -13.446878433227539, -12.152551651000977, -10.858224868774414, -9.563898086547852, -8.269571304321289, -6.97524356842041, -5.680916786193848, -4.386590003967285, -3.0922627449035645, -1.7979354858398438, -0.5036048889160156, 0.790722131729126, 2.0850491523742676, 3.379376173019409, 4.673703193664551, 5.968029975891113, 7.262357234954834, 8.556684494018555, 9.851011276245117, 11.14533805847168, 12.439664840698242, 13.733992576599121, 15.028319358825684, 16.322647094726562, 17.616973876953125, 18.911300659179688, 20.20562744140625, 21.499954223632812, 22.794281005859375, 24.088607788085938, 25.3829345703125, 26.677261352539062, 27.971590042114258, 29.26591682434082, 30.560243606567383, 31.854570388793945, 33.14889907836914, 34.4432258605957, 35.737552642822266, 37.03187942504883, 38.32620620727539, 39.62053298950195, 40.914859771728516]}, "gradients/decoder.transformer.h.19.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 2.0, 4.0, 2.0, 1.0, 1.0, 4.0, 3.0, 4.0, 3.0, 4.0, 2.0, 15.0, 6.0, 7.0, 13.0, 16.0, 20.0, 25.0, 22.0, 31.0, 29.0, 33.0, 34.0, 37.0, 45.0, 42.0, 42.0, 42.0, 43.0, 47.0, 44.0, 39.0, 46.0, 39.0, 42.0, 36.0, 27.0, 27.0, 22.0, 23.0, 17.0, 13.0, 14.0, 6.0, 7.0, 6.0, 10.0, 2.0, 5.0, 4.0, 3.0, 4.0, 1.0, 0.0, 2.0, 3.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.39453125, -3.27899169921875, -3.1634521484375, -3.04791259765625, -2.932373046875, -2.81683349609375, -2.7012939453125, -2.58575439453125, -2.47021484375, -2.35467529296875, -2.2391357421875, -2.12359619140625, -2.008056640625, -1.89251708984375, -1.7769775390625, -1.66143798828125, -1.5458984375, -1.43035888671875, -1.3148193359375, -1.19927978515625, -1.083740234375, -0.96820068359375, -0.8526611328125, -0.73712158203125, -0.62158203125, -0.50604248046875, -0.3905029296875, -0.27496337890625, -0.159423828125, -0.04388427734375, 0.0716552734375, 0.18719482421875, 0.302734375, 0.41827392578125, 0.5338134765625, 0.64935302734375, 0.764892578125, 0.88043212890625, 0.9959716796875, 1.11151123046875, 1.22705078125, 1.34259033203125, 1.4581298828125, 1.57366943359375, 1.689208984375, 1.80474853515625, 1.9202880859375, 2.03582763671875, 2.1513671875, 2.26690673828125, 2.3824462890625, 2.49798583984375, 2.613525390625, 2.72906494140625, 2.8446044921875, 2.96014404296875, 3.07568359375, 3.19122314453125, 3.3067626953125, 3.42230224609375, 3.537841796875, 3.65338134765625, 3.7689208984375, 3.88446044921875, 4.0]}, "gradients/decoder.transformer.h.19.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 0.0, 4.0, 5.0, 8.0, 2.0, 16.0, 4.0, 15.0, 26.0, 33.0, 50.0, 83.0, 120.0, 164.0, 271.0, 502.0, 1107.0, 3332.0, 12850.0, 75810.0, 1718597.0, 2271469.0, 89402.0, 14162.0, 3653.0, 1195.0, 573.0, 313.0, 165.0, 107.0, 78.0, 53.0, 29.0, 29.0, 18.0, 9.0, 10.0, 11.0, 1.0, 6.0, 7.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-13.328125, -12.852783203125, -12.37744140625, -11.902099609375, -11.4267578125, -10.951416015625, -10.47607421875, -10.000732421875, -9.525390625, -9.050048828125, -8.57470703125, -8.099365234375, -7.6240234375, -7.148681640625, -6.67333984375, -6.197998046875, -5.72265625, -5.247314453125, -4.77197265625, -4.296630859375, -3.8212890625, -3.345947265625, -2.87060546875, -2.395263671875, -1.919921875, -1.444580078125, -0.96923828125, -0.493896484375, -0.0185546875, 0.456787109375, 0.93212890625, 1.407470703125, 1.8828125, 2.358154296875, 2.83349609375, 3.308837890625, 3.7841796875, 4.259521484375, 4.73486328125, 5.210205078125, 5.685546875, 6.160888671875, 6.63623046875, 7.111572265625, 7.5869140625, 8.062255859375, 8.53759765625, 9.012939453125, 9.48828125, 9.963623046875, 10.43896484375, 10.914306640625, 11.3896484375, 11.864990234375, 12.34033203125, 12.815673828125, 13.291015625, 13.766357421875, 14.24169921875, 14.717041015625, 15.1923828125, 15.667724609375, 16.14306640625, 16.618408203125, 17.09375]}, "gradients/decoder.transformer.h.19.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 4.0, 6.0, 3.0, 6.0, 2.0, 1.0, 4.0, 13.0, 9.0, 8.0, 18.0, 31.0, 36.0, 85.0, 101.0, 165.0, 299.0, 508.0, 897.0, 747.0, 460.0, 234.0, 138.0, 100.0, 74.0, 37.0, 23.0, 20.0, 15.0, 13.0, 5.0, 7.0, 2.0, 3.0, 2.0, 1.0, 1.0, 1.0, 2.0, 1.0, 0.0, 3.0, 1.0, 1.0, 1.0, 1.0], "bins": [-15.84375, -15.4134521484375, -14.983154296875, -14.5528564453125, -14.12255859375, -13.6922607421875, -13.261962890625, -12.8316650390625, -12.4013671875, -11.9710693359375, -11.540771484375, -11.1104736328125, -10.68017578125, -10.2498779296875, -9.819580078125, -9.3892822265625, -8.958984375, -8.5286865234375, -8.098388671875, -7.6680908203125, -7.23779296875, -6.8074951171875, -6.377197265625, -5.9468994140625, -5.5166015625, -5.0863037109375, -4.656005859375, -4.2257080078125, -3.79541015625, -3.3651123046875, -2.934814453125, -2.5045166015625, -2.07421875, -1.6439208984375, -1.213623046875, -0.7833251953125, -0.35302734375, 0.0772705078125, 0.507568359375, 0.9378662109375, 1.3681640625, 1.7984619140625, 2.228759765625, 2.6590576171875, 3.08935546875, 3.5196533203125, 3.949951171875, 4.3802490234375, 4.810546875, 5.2408447265625, 5.671142578125, 6.1014404296875, 6.53173828125, 6.9620361328125, 7.392333984375, 7.8226318359375, 8.2529296875, 8.6832275390625, 9.113525390625, 9.5438232421875, 9.97412109375, 10.4044189453125, 10.834716796875, 11.2650146484375, 11.6953125]}, "gradients/decoder.transformer.h.19.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 2.0, 1.0, 0.0, 3.0, 2.0, 2.0, 4.0, 3.0, 8.0, 7.0, 11.0, 32.0, 46.0, 72.0, 118.0, 234.0, 427.0, 1022.0, 2707.0, 10563.0, 81481.0, 2842269.0, 1193070.0, 50665.0, 7681.0, 2210.0, 830.0, 370.0, 173.0, 99.0, 77.0, 39.0, 23.0, 16.0, 8.0, 9.0, 4.0, 4.0, 0.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-25.515625, -24.530517578125, -23.54541015625, -22.560302734375, -21.5751953125, -20.590087890625, -19.60498046875, -18.619873046875, -17.634765625, -16.649658203125, -15.66455078125, -14.679443359375, -13.6943359375, -12.709228515625, -11.72412109375, -10.739013671875, -9.75390625, -8.768798828125, -7.78369140625, -6.798583984375, -5.8134765625, -4.828369140625, -3.84326171875, -2.858154296875, -1.873046875, -0.887939453125, 0.09716796875, 1.082275390625, 2.0673828125, 3.052490234375, 4.03759765625, 5.022705078125, 6.0078125, 6.992919921875, 7.97802734375, 8.963134765625, 9.9482421875, 10.933349609375, 11.91845703125, 12.903564453125, 13.888671875, 14.873779296875, 15.85888671875, 16.843994140625, 17.8291015625, 18.814208984375, 19.79931640625, 20.784423828125, 21.76953125, 22.754638671875, 23.73974609375, 24.724853515625, 25.7099609375, 26.695068359375, 27.68017578125, 28.665283203125, 29.650390625, 30.635498046875, 31.62060546875, 32.605712890625, 33.5908203125, 34.575927734375, 35.56103515625, 36.546142578125, 37.53125]}, "gradients/decoder.transformer.h.19.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 5.0, 8.0, 26.0, 70.0, 181.0, 256.0, 215.0, 154.0, 61.0, 21.0, 11.0, 6.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-171.47047424316406, -167.29556274414062, -163.1206512451172, -158.94573974609375, -154.7708282470703, -150.59591674804688, -146.42100524902344, -142.24609375, -138.0711669921875, -133.89625549316406, -129.72134399414062, -125.54643249511719, -121.37152099609375, -117.19660949707031, -113.02169036865234, -108.8467788696289, -104.671875, -100.49696350097656, -96.32205200195312, -92.14714050292969, -87.97222900390625, -83.79731750488281, -79.62239837646484, -75.4474868774414, -71.27257537841797, -67.09766387939453, -62.922752380371094, -58.74783706665039, -54.57292556762695, -50.398014068603516, -46.22309875488281, -42.048187255859375, -37.873268127441406, -33.69835662841797, -29.5234432220459, -25.348529815673828, -21.17361831665039, -16.998706817626953, -12.823793411254883, -8.648880004882812, -4.473968505859375, -0.2990560531616211, 3.875856399536133, 8.050768852233887, 12.22568130493164, 16.400592803955078, 20.57550621032715, 24.75041961669922, 28.925331115722656, 33.100242614746094, 37.27515411376953, 41.450069427490234, 45.62498092651367, 49.79989242553711, 53.97480773925781, 58.14971923828125, 62.32463073730469, 66.49954223632812, 70.67445373535156, 74.849365234375, 79.02427673339844, 83.19918823242188, 87.37410736083984, 91.54901885986328, 95.72393035888672]}, "gradients/decoder.transformer.h.19.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 3.0, 3.0, 3.0, 3.0, 2.0, 5.0, 8.0, 7.0, 5.0, 12.0, 15.0, 15.0, 10.0, 19.0, 22.0, 16.0, 29.0, 24.0, 32.0, 25.0, 24.0, 33.0, 41.0, 42.0, 42.0, 35.0, 40.0, 54.0, 40.0, 37.0, 43.0, 36.0, 34.0, 25.0, 37.0, 23.0, 30.0, 22.0, 19.0, 15.0, 14.0, 12.0, 8.0, 7.0, 14.0, 9.0, 6.0, 8.0, 4.0, 2.0, 3.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-36.68695068359375, -35.592689514160156, -34.49842834472656, -33.40416717529297, -32.309906005859375, -31.21564292907715, -30.121379852294922, -29.027118682861328, -27.932857513427734, -26.83859634399414, -25.744335174560547, -24.65007209777832, -23.555810928344727, -22.461549758911133, -21.367286682128906, -20.273025512695312, -19.17876434326172, -18.084503173828125, -16.99024200439453, -15.895978927612305, -14.801717758178711, -13.707456588745117, -12.613194465637207, -11.518932342529297, -10.424671173095703, -9.33041000366211, -8.2361478805542, -7.141886234283447, -6.047624588012695, -4.953362941741943, -3.8591012954711914, -2.7648396492004395, -1.6705780029296875, -0.5763163566589355, 0.5179452896118164, 1.6122069358825684, 2.7064685821533203, 3.8007302284240723, 4.894991874694824, 5.989253520965576, 7.083515167236328, 8.177776336669922, 9.272038459777832, 10.366300582885742, 11.460561752319336, 12.55482292175293, 13.64908504486084, 14.74334716796875, 15.837608337402344, 16.931869506835938, 18.02613067626953, 19.120393753051758, 20.21465492248535, 21.308916091918945, 22.403179168701172, 23.497440338134766, 24.59170150756836, 25.685962677001953, 26.780223846435547, 27.874486923217773, 28.968748092651367, 30.06300926208496, 31.157272338867188, 32.25153350830078, 33.345794677734375]}, "gradients/decoder.transformer.h.19.crossattention.c_proj.bias": {"_type": "histogram", "values": [3.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 4.0, 5.0, 3.0, 5.0, 10.0, 10.0, 8.0, 10.0, 18.0, 11.0, 33.0, 26.0, 28.0, 31.0, 34.0, 37.0, 45.0, 47.0, 30.0, 49.0, 39.0, 56.0, 34.0, 41.0, 44.0, 45.0, 49.0, 36.0, 29.0, 30.0, 16.0, 32.0, 12.0, 14.0, 15.0, 13.0, 9.0, 7.0, 9.0, 5.0, 5.0, 8.0, 2.0, 1.0, 0.0, 4.0, 3.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.544921875, -3.423065185546875, -3.30120849609375, -3.179351806640625, -3.0574951171875, -2.935638427734375, -2.81378173828125, -2.691925048828125, -2.570068359375, -2.448211669921875, -2.32635498046875, -2.204498291015625, -2.0826416015625, -1.960784912109375, -1.83892822265625, -1.717071533203125, -1.59521484375, -1.473358154296875, -1.35150146484375, -1.229644775390625, -1.1077880859375, -0.985931396484375, -0.86407470703125, -0.742218017578125, -0.620361328125, -0.498504638671875, -0.37664794921875, -0.254791259765625, -0.1329345703125, -0.011077880859375, 0.11077880859375, 0.232635498046875, 0.3544921875, 0.476348876953125, 0.59820556640625, 0.720062255859375, 0.8419189453125, 0.963775634765625, 1.08563232421875, 1.207489013671875, 1.329345703125, 1.451202392578125, 1.57305908203125, 1.694915771484375, 1.8167724609375, 1.938629150390625, 2.06048583984375, 2.182342529296875, 2.30419921875, 2.426055908203125, 2.54791259765625, 2.669769287109375, 2.7916259765625, 2.913482666015625, 3.03533935546875, 3.157196044921875, 3.279052734375, 3.400909423828125, 3.52276611328125, 3.644622802734375, 3.7664794921875, 3.888336181640625, 4.01019287109375, 4.132049560546875, 4.25390625]}, "gradients/decoder.transformer.h.19.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0, 7.0, 16.0, 21.0, 20.0, 35.0, 53.0, 65.0, 82.0, 128.0, 183.0, 233.0, 395.0, 561.0, 843.0, 1219.0, 1867.0, 2772.0, 4430.0, 6638.0, 10800.0, 17930.0, 30715.0, 55415.0, 108874.0, 300835.0, 268158.0, 105360.0, 53692.0, 29880.0, 17573.0, 10641.0, 6500.0, 4261.0, 2721.0, 1807.0, 1198.0, 842.0, 548.0, 396.0, 230.0, 195.0, 125.0, 77.0, 67.0, 50.0, 26.0, 25.0, 17.0, 14.0, 7.0, 6.0, 6.0, 3.0, 1.0, 0.0, 1.0], "bins": [-0.1649169921875, -0.15991592407226562, -0.15491485595703125, -0.14991378784179688, -0.1449127197265625, -0.13991165161132812, -0.13491058349609375, -0.12990951538085938, -0.124908447265625, -0.11990737915039062, -0.11490631103515625, -0.10990524291992188, -0.1049041748046875, -0.09990310668945312, -0.09490203857421875, -0.08990097045898438, -0.08489990234375, -0.07989883422851562, -0.07489776611328125, -0.06989669799804688, -0.0648956298828125, -0.059894561767578125, -0.05489349365234375, -0.049892425537109375, -0.044891357421875, -0.039890289306640625, -0.03488922119140625, -0.029888153076171875, -0.0248870849609375, -0.019886016845703125, -0.01488494873046875, -0.009883880615234375, -0.0048828125, 0.000118255615234375, 0.00511932373046875, 0.010120391845703125, 0.0151214599609375, 0.020122528076171875, 0.02512359619140625, 0.030124664306640625, 0.035125732421875, 0.040126800537109375, 0.04512786865234375, 0.050128936767578125, 0.0551300048828125, 0.060131072998046875, 0.06513214111328125, 0.07013320922851562, 0.07513427734375, 0.08013534545898438, 0.08513641357421875, 0.09013748168945312, 0.0951385498046875, 0.10013961791992188, 0.10514068603515625, 0.11014175415039062, 0.115142822265625, 0.12014389038085938, 0.12514495849609375, 0.13014602661132812, 0.1351470947265625, 0.14014816284179688, 0.14514923095703125, 0.15015029907226562, 0.1551513671875]}, "gradients/decoder.transformer.h.19.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 1.0, 2.0, 3.0, 1.0, 0.0, 2.0, 4.0, 5.0, 3.0, 5.0, 6.0, 10.0, 14.0, 16.0, 13.0, 20.0, 21.0, 26.0, 29.0, 24.0, 34.0, 40.0, 38.0, 29.0, 47.0, 36.0, 42.0, 30.0, 1065.0, 48.0, 42.0, 38.0, 43.0, 28.0, 17.0, 34.0, 31.0, 26.0, 27.0, 15.0, 23.0, 12.0, 13.0, 8.0, 16.0, 7.0, 3.0, 10.0, 4.0, 8.0, 8.0, 3.0, 6.0, 2.0, 2.0, 1.0, 0.0, 0.0, 1.0, 3.0], "bins": [-3.18359375, -3.0853271484375, -2.987060546875, -2.8887939453125, -2.79052734375, -2.6922607421875, -2.593994140625, -2.4957275390625, -2.3974609375, -2.2991943359375, -2.200927734375, -2.1026611328125, -2.00439453125, -1.9061279296875, -1.807861328125, -1.7095947265625, -1.611328125, -1.5130615234375, -1.414794921875, -1.3165283203125, -1.21826171875, -1.1199951171875, -1.021728515625, -0.9234619140625, -0.8251953125, -0.7269287109375, -0.628662109375, -0.5303955078125, -0.43212890625, -0.3338623046875, -0.235595703125, -0.1373291015625, -0.0390625, 0.0592041015625, 0.157470703125, 0.2557373046875, 0.35400390625, 0.4522705078125, 0.550537109375, 0.6488037109375, 0.7470703125, 0.8453369140625, 0.943603515625, 1.0418701171875, 1.14013671875, 1.2384033203125, 1.336669921875, 1.4349365234375, 1.533203125, 1.6314697265625, 1.729736328125, 1.8280029296875, 1.92626953125, 2.0245361328125, 2.122802734375, 2.2210693359375, 2.3193359375, 2.4176025390625, 2.515869140625, 2.6141357421875, 2.71240234375, 2.8106689453125, 2.908935546875, 3.0072021484375, 3.10546875]}, "gradients/decoder.transformer.h.19.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 3.0, 10.0, 14.0, 15.0, 24.0, 38.0, 47.0, 65.0, 92.0, 134.0, 187.0, 255.0, 356.0, 478.0, 615.0, 865.0, 1134.0, 1454.0, 2034.0, 2929.0, 4027.0, 5590.0, 7857.0, 11439.0, 16286.0, 23708.0, 35215.0, 53038.0, 83025.0, 148542.0, 1334641.0, 130967.0, 76743.0, 49176.0, 32245.0, 22093.0, 14832.0, 10467.0, 7360.0, 5288.0, 3729.0, 2772.0, 1943.0, 1456.0, 1117.0, 774.0, 522.0, 390.0, 316.0, 237.0, 174.0, 139.0, 96.0, 68.0, 39.0, 25.0, 21.0, 19.0, 11.0, 5.0, 7.0, 2.0], "bins": [-0.07635498046875, -0.07399845123291016, -0.07164192199707031, -0.06928539276123047, -0.06692886352539062, -0.06457233428955078, -0.06221580505371094, -0.059859275817871094, -0.05750274658203125, -0.055146217346191406, -0.05278968811035156, -0.05043315887451172, -0.048076629638671875, -0.04572010040283203, -0.04336357116699219, -0.041007041931152344, -0.0386505126953125, -0.036293983459472656, -0.03393745422363281, -0.03158092498779297, -0.029224395751953125, -0.02686786651611328, -0.024511337280273438, -0.022154808044433594, -0.01979827880859375, -0.017441749572753906, -0.015085220336914062, -0.012728691101074219, -0.010372161865234375, -0.008015632629394531, -0.0056591033935546875, -0.0033025741577148438, -0.000946044921875, 0.0014104843139648438, 0.0037670135498046875, 0.006123542785644531, 0.008480072021484375, 0.010836601257324219, 0.013193130493164062, 0.015549659729003906, 0.01790618896484375, 0.020262718200683594, 0.022619247436523438, 0.02497577667236328, 0.027332305908203125, 0.02968883514404297, 0.03204536437988281, 0.034401893615722656, 0.0367584228515625, 0.039114952087402344, 0.04147148132324219, 0.04382801055908203, 0.046184539794921875, 0.04854106903076172, 0.05089759826660156, 0.053254127502441406, 0.05561065673828125, 0.057967185974121094, 0.06032371520996094, 0.06268024444580078, 0.06503677368164062, 0.06739330291748047, 0.06974983215332031, 0.07210636138916016, 0.074462890625]}, "gradients/decoder.transformer.h.19.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 2.0, 2.0, 2.0, 4.0, 8.0, 3.0, 6.0, 7.0, 3.0, 9.0, 10.0, 22.0, 17.0, 25.0, 41.0, 42.0, 62.0, 99.0, 200.0, 127.0, 67.0, 66.0, 29.0, 29.0, 19.0, 28.0, 14.0, 9.0, 10.0, 10.0, 11.0, 6.0, 7.0, 3.0, 0.0, 2.0, 6.0, 5.0, 3.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.245208740234375e-06, -5.03193587064743e-06, -4.818663001060486e-06, -4.605390131473541e-06, -4.392117261886597e-06, -4.178844392299652e-06, -3.9655715227127075e-06, -3.752298653125763e-06, -3.5390257835388184e-06, -3.3257529139518738e-06, -3.112480044364929e-06, -2.8992071747779846e-06, -2.68593430519104e-06, -2.4726614356040955e-06, -2.259388566017151e-06, -2.0461156964302063e-06, -1.8328428268432617e-06, -1.6195699572563171e-06, -1.4062970876693726e-06, -1.193024218082428e-06, -9.797513484954834e-07, -7.664784789085388e-07, -5.532056093215942e-07, -3.3993273973464966e-07, -1.2665987014770508e-07, 8.66129994392395e-08, 2.998858690261841e-07, 5.131587386131287e-07, 7.264316082000732e-07, 9.397044777870178e-07, 1.1529773473739624e-06, 1.366250216960907e-06, 1.5795230865478516e-06, 1.7927959561347961e-06, 2.0060688257217407e-06, 2.2193416953086853e-06, 2.43261456489563e-06, 2.6458874344825745e-06, 2.859160304069519e-06, 3.0724331736564636e-06, 3.285706043243408e-06, 3.4989789128303528e-06, 3.7122517824172974e-06, 3.925524652004242e-06, 4.1387975215911865e-06, 4.352070391178131e-06, 4.565343260765076e-06, 4.77861613035202e-06, 4.991888999938965e-06, 5.2051618695259094e-06, 5.418434739112854e-06, 5.631707608699799e-06, 5.844980478286743e-06, 6.058253347873688e-06, 6.271526217460632e-06, 6.484799087047577e-06, 6.6980719566345215e-06, 6.911344826221466e-06, 7.124617695808411e-06, 7.337890565395355e-06, 7.5511634349823e-06, 7.764436304569244e-06, 7.977709174156189e-06, 8.190982043743134e-06, 8.404254913330078e-06]}, "gradients/decoder.transformer.h.19.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 6.0, 7.0, 4.0, 1.0, 5.0, 5.0, 11.0, 13.0, 21.0, 27.0, 21.0, 32.0, 45.0, 51.0, 94.0, 150.0, 292.0, 823.0, 16734.0, 1006926.0, 21586.0, 908.0, 285.0, 156.0, 97.0, 72.0, 51.0, 32.0, 20.0, 29.0, 18.0, 11.0, 11.0, 6.0, 3.0, 4.0, 5.0, 1.0, 1.0, 3.0, 2.0, 0.0, 1.0, 2.0], "bins": [-0.00013124942779541016, -0.00012792926281690598, -0.0001246090978384018, -0.00012128893285989761, -0.00011796876788139343, -0.00011464860290288925, -0.00011132843792438507, -0.00010800827294588089, -0.00010468810796737671, -0.00010136794298887253, -9.804777801036835e-05, -9.472761303186417e-05, -9.140744805335999e-05, -8.80872830748558e-05, -8.476711809635162e-05, -8.144695311784744e-05, -7.812678813934326e-05, -7.480662316083908e-05, -7.14864581823349e-05, -6.816629320383072e-05, -6.484612822532654e-05, -6.152596324682236e-05, -5.8205798268318176e-05, -5.4885633289813995e-05, -5.1565468311309814e-05, -4.8245303332805634e-05, -4.492513835430145e-05, -4.160497337579727e-05, -3.828480839729309e-05, -3.496464341878891e-05, -3.164447844028473e-05, -2.8324313461780548e-05, -2.5004148483276367e-05, -2.1683983504772186e-05, -1.8363818526268005e-05, -1.5043653547763824e-05, -1.1723488569259644e-05, -8.403323590755463e-06, -5.083158612251282e-06, -1.7629936337471008e-06, 1.55717134475708e-06, 4.877336323261261e-06, 8.197501301765442e-06, 1.1517666280269623e-05, 1.4837831258773804e-05, 1.8157996237277985e-05, 2.1478161215782166e-05, 2.4798326194286346e-05, 2.8118491172790527e-05, 3.143865615129471e-05, 3.475882112979889e-05, 3.807898610830307e-05, 4.139915108680725e-05, 4.471931606531143e-05, 4.803948104381561e-05, 5.1359646022319794e-05, 5.4679811000823975e-05, 5.7999975979328156e-05, 6.132014095783234e-05, 6.464030593633652e-05, 6.79604709148407e-05, 7.128063589334488e-05, 7.460080087184906e-05, 7.792096585035324e-05, 8.124113082885742e-05]}, "gradients/decoder.transformer.h.19.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 1.0, 7.0, 8.0, 22.0, 28.0, 51.0, 84.0, 129.0, 207.0, 181.0, 131.0, 71.0, 47.0, 21.0, 10.0, 5.0, 6.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.699270903074648e-06, -4.536424967227504e-06, -4.373579031380359e-06, -4.2107330955332145e-06, -4.047887614433421e-06, -3.885041678586276e-06, -3.7221957427391317e-06, -3.559349806891987e-06, -3.3965038710448425e-06, -3.233657935197698e-06, -3.070812226724229e-06, -2.9079662908770842e-06, -2.7451203550299397e-06, -2.5822746465564705e-06, -2.419428710709326e-06, -2.2565827748621814e-06, -2.0937370663887123e-06, -1.9308911305415677e-06, -1.7680453083812608e-06, -1.605199486220954e-06, -1.4423535503738094e-06, -1.2795077282135026e-06, -1.1166619060531957e-06, -9.538159702060511e-07, -7.909701480457443e-07, -6.281242690420186e-07, -4.652784184600023e-07, -3.02432567877986e-07, -1.395866888742603e-07, 2.3259190129465424e-08, 1.8610501228977228e-07, 3.4895094813691685e-07, 5.117967702972237e-07, 6.746426493009494e-07, 8.374885283046751e-07, 1.000334350464982e-06, 1.1631802863121266e-06, 1.3260261084724334e-06, 1.4888719306327403e-06, 1.6517178664798848e-06, 1.8145636886401917e-06, 1.9774095108004985e-06, 2.140255446647643e-06, 2.3031011551211122e-06, 2.465947090968257e-06, 2.6287930268154014e-06, 2.791638962662546e-06, 2.9544848985096905e-06, 3.1173306069831597e-06, 3.2801765428303042e-06, 3.4430222513037734e-06, 3.605868187150918e-06, 3.7687141229980625e-06, 3.931560058845207e-06, 4.094405994692352e-06, 4.257251930539496e-06, 4.42009741163929e-06, 4.5829433474864345e-06, 4.745789283333579e-06, 4.908634764433373e-06, 5.071480700280517e-06, 5.234326636127662e-06, 5.3971725719748065e-06, 5.560018507821951e-06, 5.722864443669096e-06]}, "gradients/decoder.transformer.h.19.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 3.0, 7.0, 1.0, 2.0, 8.0, 6.0, 11.0, 2.0, 4.0, 17.0, 17.0, 27.0, 11.0, 19.0, 29.0, 13.0, 17.0, 50.0, 30.0, 53.0, 30.0, 27.0, 53.0, 30.0, 56.0, 26.0, 34.0, 70.0, 19.0, 56.0, 22.0, 25.0, 45.0, 29.0, 35.0, 15.0, 18.0, 24.0, 12.0, 8.0, 13.0, 8.0, 13.0, 8.0, 4.0, 1.0, 1.0, 4.0, 0.0, 1.0, 3.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-2.562999725341797e-06, -2.4801120162010193e-06, -2.3972243070602417e-06, -2.314336597919464e-06, -2.2314488887786865e-06, -2.148561179637909e-06, -2.0656734704971313e-06, -1.9827857613563538e-06, -1.8998980522155762e-06, -1.8170103430747986e-06, -1.734122633934021e-06, -1.6512349247932434e-06, -1.5683472156524658e-06, -1.4854595065116882e-06, -1.4025717973709106e-06, -1.319684088230133e-06, -1.2367963790893555e-06, -1.1539086699485779e-06, -1.0710209608078003e-06, -9.881332516670227e-07, -9.052455425262451e-07, -8.223578333854675e-07, -7.394701242446899e-07, -6.565824151039124e-07, -5.736947059631348e-07, -4.908069968223572e-07, -4.079192876815796e-07, -3.25031578540802e-07, -2.421438694000244e-07, -1.5925616025924683e-07, -7.636845111846924e-08, 6.51925802230835e-09, 8.940696716308594e-08, 1.7229467630386353e-07, 2.551823854446411e-07, 3.380700945854187e-07, 4.209578037261963e-07, 5.038455128669739e-07, 5.867332220077515e-07, 6.69620931148529e-07, 7.525086402893066e-07, 8.353963494300842e-07, 9.182840585708618e-07, 1.0011717677116394e-06, 1.084059476852417e-06, 1.1669471859931946e-06, 1.2498348951339722e-06, 1.3327226042747498e-06, 1.4156103134155273e-06, 1.498498022556305e-06, 1.5813857316970825e-06, 1.6642734408378601e-06, 1.7471611499786377e-06, 1.8300488591194153e-06, 1.912936568260193e-06, 1.9958242774009705e-06, 2.078711986541748e-06, 2.1615996956825256e-06, 2.2444874048233032e-06, 2.327375113964081e-06, 2.4102628231048584e-06, 2.493150532245636e-06, 2.5760382413864136e-06, 2.658925950527191e-06, 2.7418136596679688e-06]}, "gradients/decoder.transformer.h.19.attn.c_proj.bias": {"_type": "histogram", "values": [3.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 4.0, 5.0, 3.0, 5.0, 10.0, 10.0, 8.0, 10.0, 18.0, 11.0, 33.0, 26.0, 28.0, 31.0, 34.0, 37.0, 45.0, 47.0, 30.0, 49.0, 39.0, 56.0, 34.0, 41.0, 44.0, 45.0, 49.0, 36.0, 29.0, 30.0, 16.0, 32.0, 12.0, 14.0, 15.0, 13.0, 9.0, 7.0, 9.0, 5.0, 5.0, 8.0, 2.0, 1.0, 0.0, 4.0, 3.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.544921875, -3.423065185546875, -3.30120849609375, -3.179351806640625, -3.0574951171875, -2.935638427734375, -2.81378173828125, -2.691925048828125, -2.570068359375, -2.448211669921875, -2.32635498046875, -2.204498291015625, -2.0826416015625, -1.960784912109375, -1.83892822265625, -1.717071533203125, -1.59521484375, -1.473358154296875, -1.35150146484375, -1.229644775390625, -1.1077880859375, -0.985931396484375, -0.86407470703125, -0.742218017578125, -0.620361328125, -0.498504638671875, -0.37664794921875, -0.254791259765625, -0.1329345703125, -0.011077880859375, 0.11077880859375, 0.232635498046875, 0.3544921875, 0.476348876953125, 0.59820556640625, 0.720062255859375, 0.8419189453125, 0.963775634765625, 1.08563232421875, 1.207489013671875, 1.329345703125, 1.451202392578125, 1.57305908203125, 1.694915771484375, 1.8167724609375, 1.938629150390625, 2.06048583984375, 2.182342529296875, 2.30419921875, 2.426055908203125, 2.54791259765625, 2.669769287109375, 2.7916259765625, 2.913482666015625, 3.03533935546875, 3.157196044921875, 3.279052734375, 3.400909423828125, 3.52276611328125, 3.644622802734375, 3.7664794921875, 3.888336181640625, 4.01019287109375, 4.132049560546875, 4.25390625]}, "gradients/decoder.transformer.h.19.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 4.0, 3.0, 7.0, 25.0, 44.0, 109.0, 275.0, 727.0, 2183.0, 7188.0, 32014.0, 585445.0, 384349.0, 26707.0, 6433.0, 1951.0, 668.0, 237.0, 92.0, 36.0, 22.0, 10.0, 7.0, 5.0, 2.0, 2.0, 1.0, 1.0, 1.0, 3.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 3.0, 2.0, 0.0, 0.0, 1.0], "bins": [-11.9609375, -11.608642578125, -11.25634765625, -10.904052734375, -10.5517578125, -10.199462890625, -9.84716796875, -9.494873046875, -9.142578125, -8.790283203125, -8.43798828125, -8.085693359375, -7.7333984375, -7.381103515625, -7.02880859375, -6.676513671875, -6.32421875, -5.971923828125, -5.61962890625, -5.267333984375, -4.9150390625, -4.562744140625, -4.21044921875, -3.858154296875, -3.505859375, -3.153564453125, -2.80126953125, -2.448974609375, -2.0966796875, -1.744384765625, -1.39208984375, -1.039794921875, -0.6875, -0.335205078125, 0.01708984375, 0.369384765625, 0.7216796875, 1.073974609375, 1.42626953125, 1.778564453125, 2.130859375, 2.483154296875, 2.83544921875, 3.187744140625, 3.5400390625, 3.892333984375, 4.24462890625, 4.596923828125, 4.94921875, 5.301513671875, 5.65380859375, 6.006103515625, 6.3583984375, 6.710693359375, 7.06298828125, 7.415283203125, 7.767578125, 8.119873046875, 8.47216796875, 8.824462890625, 9.1767578125, 9.529052734375, 9.88134765625, 10.233642578125, 10.5859375]}, "gradients/decoder.transformer.h.19.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 7.0, 8.0, 6.0, 7.0, 7.0, 10.0, 19.0, 12.0, 21.0, 27.0, 38.0, 39.0, 48.0, 50.0, 67.0, 100.0, 178.0, 1744.0, 178.0, 86.0, 78.0, 56.0, 48.0, 39.0, 39.0, 27.0, 28.0, 28.0, 17.0, 7.0, 13.0, 8.0, 6.0, 9.0, 2.0, 3.0, 2.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-19.359375, -18.7978515625, -18.236328125, -17.6748046875, -17.11328125, -16.5517578125, -15.990234375, -15.4287109375, -14.8671875, -14.3056640625, -13.744140625, -13.1826171875, -12.62109375, -12.0595703125, -11.498046875, -10.9365234375, -10.375, -9.8134765625, -9.251953125, -8.6904296875, -8.12890625, -7.5673828125, -7.005859375, -6.4443359375, -5.8828125, -5.3212890625, -4.759765625, -4.1982421875, -3.63671875, -3.0751953125, -2.513671875, -1.9521484375, -1.390625, -0.8291015625, -0.267578125, 0.2939453125, 0.85546875, 1.4169921875, 1.978515625, 2.5400390625, 3.1015625, 3.6630859375, 4.224609375, 4.7861328125, 5.34765625, 5.9091796875, 6.470703125, 7.0322265625, 7.59375, 8.1552734375, 8.716796875, 9.2783203125, 9.83984375, 10.4013671875, 10.962890625, 11.5244140625, 12.0859375, 12.6474609375, 13.208984375, 13.7705078125, 14.33203125, 14.8935546875, 15.455078125, 16.0166015625, 16.578125]}, "gradients/decoder.transformer.h.19.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 1.0, 3.0, 5.0, 5.0, 6.0, 9.0, 11.0, 8.0, 20.0, 22.0, 30.0, 23.0, 45.0, 51.0, 82.0, 149.0, 333.0, 903.0, 5205.0, 148183.0, 2964298.0, 23200.0, 2026.0, 528.0, 212.0, 108.0, 68.0, 46.0, 26.0, 24.0, 17.0, 16.0, 14.0, 8.0, 7.0, 6.0, 7.0, 5.0, 5.0, 4.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-26.5625, -25.655029296875, -24.74755859375, -23.840087890625, -22.9326171875, -22.025146484375, -21.11767578125, -20.210205078125, -19.302734375, -18.395263671875, -17.48779296875, -16.580322265625, -15.6728515625, -14.765380859375, -13.85791015625, -12.950439453125, -12.04296875, -11.135498046875, -10.22802734375, -9.320556640625, -8.4130859375, -7.505615234375, -6.59814453125, -5.690673828125, -4.783203125, -3.875732421875, -2.96826171875, -2.060791015625, -1.1533203125, -0.245849609375, 0.66162109375, 1.569091796875, 2.4765625, 3.384033203125, 4.29150390625, 5.198974609375, 6.1064453125, 7.013916015625, 7.92138671875, 8.828857421875, 9.736328125, 10.643798828125, 11.55126953125, 12.458740234375, 13.3662109375, 14.273681640625, 15.18115234375, 16.088623046875, 16.99609375, 17.903564453125, 18.81103515625, 19.718505859375, 20.6259765625, 21.533447265625, 22.44091796875, 23.348388671875, 24.255859375, 25.163330078125, 26.07080078125, 26.978271484375, 27.8857421875, 28.793212890625, 29.70068359375, 30.608154296875, 31.515625]}, "gradients/decoder.transformer.h.19.ln_1.weight": {"_type": "histogram", "values": [1010.0, 6.0, 3.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-12.32279109954834, 7.618739128112793, 27.56026840209961, 47.501800537109375, 67.44332885742188, 87.38485717773438, 107.3263931274414, 127.2679214477539, 147.20945739746094, 167.15098571777344, 187.09251403808594, 207.0340576171875, 226.9755859375, 246.9171142578125, 266.858642578125, 286.8001708984375, 306.74169921875, 326.6832275390625, 346.624755859375, 366.5662841796875, 386.5078125, 406.4493408203125, 426.390869140625, 446.3323974609375, 466.27392578125, 486.2154541015625, 506.156982421875, 526.0985107421875, 546.0400390625, 565.9815673828125, 585.923095703125, 605.8646240234375, 625.8062133789062, 645.7477416992188, 665.6892700195312, 685.6307983398438, 705.5723266601562, 725.5138549804688, 745.4553833007812, 765.3969116210938, 785.3384399414062, 805.2799682617188, 825.2214965820312, 845.1630249023438, 865.1045532226562, 885.0460815429688, 904.9876098632812, 924.9291381835938, 944.8707275390625, 964.812255859375, 984.7537841796875, 1004.6953125, 1024.6368408203125, 1044.578369140625, 1064.5198974609375, 1084.46142578125, 1104.4029541015625, 1124.344482421875, 1144.2860107421875, 1164.2275390625, 1184.1690673828125, 1204.110595703125, 1224.0521240234375, 1243.99365234375, 1263.9351806640625]}, "gradients/decoder.transformer.h.19.ln_1.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 2.0, 0.0, 2.0, 5.0, 4.0, 3.0, 8.0, 6.0, 12.0, 11.0, 13.0, 19.0, 10.0, 15.0, 17.0, 34.0, 25.0, 20.0, 30.0, 21.0, 27.0, 31.0, 29.0, 27.0, 29.0, 29.0, 39.0, 46.0, 39.0, 43.0, 41.0, 40.0, 37.0, 32.0, 36.0, 24.0, 20.0, 22.0, 24.0, 23.0, 18.0, 13.0, 9.0, 17.0, 10.0, 7.0, 7.0, 12.0, 6.0, 5.0, 5.0, 1.0, 7.0, 1.0, 3.0], "bins": [-48.95342254638672, -47.63716125488281, -46.32090377807617, -45.004642486572266, -43.688385009765625, -42.37212371826172, -41.05586242675781, -39.73960494995117, -38.423343658447266, -37.10708236694336, -35.79082489013672, -34.47456359863281, -33.15830612182617, -31.842044830322266, -30.525785446166992, -29.20952606201172, -27.893266677856445, -26.577007293701172, -25.2607479095459, -23.944488525390625, -22.62822723388672, -21.311967849731445, -19.995708465576172, -18.679447174072266, -17.363189697265625, -16.04693031311035, -14.730669975280762, -13.414410591125488, -12.098150253295898, -10.781890869140625, -9.465631484985352, -8.149371147155762, -6.833110809326172, -5.51685094833374, -4.200591087341309, -2.884331703186035, -1.5680718421936035, -0.2518119812011719, 1.0644474029541016, 2.3807077407836914, 3.696967124938965, 5.0132269859313965, 6.329486846923828, 7.645746231079102, 8.962005615234375, 10.278265953063965, 11.594525337219238, 12.910785675048828, 14.227045059204102, 15.543304443359375, 16.85956382751465, 18.175823211669922, 19.492084503173828, 20.8083438873291, 22.124603271484375, 23.44086456298828, 24.757122039794922, 26.073381423950195, 27.38964080810547, 28.705902099609375, 30.02216148376465, 31.338420867919922, 32.65467834472656, 33.97093963623047, 35.287200927734375]}, "gradients/decoder.transformer.h.18.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 3.0, 3.0, 1.0, 2.0, 2.0, 4.0, 5.0, 2.0, 2.0, 8.0, 7.0, 5.0, 20.0, 12.0, 16.0, 13.0, 15.0, 28.0, 29.0, 22.0, 39.0, 45.0, 43.0, 42.0, 41.0, 45.0, 37.0, 43.0, 46.0, 45.0, 51.0, 45.0, 42.0, 34.0, 31.0, 24.0, 33.0, 20.0, 14.0, 14.0, 16.0, 14.0, 7.0, 9.0, 9.0, 6.0, 6.0, 6.0, 2.0, 3.0, 3.0, 2.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.814453125, -3.688385009765625, -3.56231689453125, -3.436248779296875, -3.3101806640625, -3.184112548828125, -3.05804443359375, -2.931976318359375, -2.805908203125, -2.679840087890625, -2.55377197265625, -2.427703857421875, -2.3016357421875, -2.175567626953125, -2.04949951171875, -1.923431396484375, -1.79736328125, -1.671295166015625, -1.54522705078125, -1.419158935546875, -1.2930908203125, -1.167022705078125, -1.04095458984375, -0.914886474609375, -0.788818359375, -0.662750244140625, -0.53668212890625, -0.410614013671875, -0.2845458984375, -0.158477783203125, -0.03240966796875, 0.093658447265625, 0.2197265625, 0.345794677734375, 0.47186279296875, 0.597930908203125, 0.7239990234375, 0.850067138671875, 0.97613525390625, 1.102203369140625, 1.228271484375, 1.354339599609375, 1.48040771484375, 1.606475830078125, 1.7325439453125, 1.858612060546875, 1.98468017578125, 2.110748291015625, 2.23681640625, 2.362884521484375, 2.48895263671875, 2.615020751953125, 2.7410888671875, 2.867156982421875, 2.99322509765625, 3.119293212890625, 3.245361328125, 3.371429443359375, 3.49749755859375, 3.623565673828125, 3.7496337890625, 3.875701904296875, 4.00177001953125, 4.127838134765625, 4.25390625]}, "gradients/decoder.transformer.h.18.mlp.c_proj.weight": {"_type": "histogram", "values": [3.0, 3.0, 2.0, 2.0, 2.0, 4.0, 2.0, 7.0, 5.0, 4.0, 9.0, 8.0, 21.0, 10.0, 23.0, 28.0, 38.0, 58.0, 64.0, 120.0, 214.0, 342.0, 721.0, 1790.0, 5539.0, 23699.0, 175782.0, 3159541.0, 755780.0, 54728.0, 10372.0, 2937.0, 1130.0, 508.0, 285.0, 153.0, 94.0, 71.0, 56.0, 39.0, 23.0, 16.0, 13.0, 12.0, 14.0, 3.0, 4.0, 6.0, 4.0, 3.0, 1.0, 2.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-12.21875, -11.7781982421875, -11.337646484375, -10.8970947265625, -10.45654296875, -10.0159912109375, -9.575439453125, -9.1348876953125, -8.6943359375, -8.2537841796875, -7.813232421875, -7.3726806640625, -6.93212890625, -6.4915771484375, -6.051025390625, -5.6104736328125, -5.169921875, -4.7293701171875, -4.288818359375, -3.8482666015625, -3.40771484375, -2.9671630859375, -2.526611328125, -2.0860595703125, -1.6455078125, -1.2049560546875, -0.764404296875, -0.3238525390625, 0.11669921875, 0.5572509765625, 0.997802734375, 1.4383544921875, 1.87890625, 2.3194580078125, 2.760009765625, 3.2005615234375, 3.64111328125, 4.0816650390625, 4.522216796875, 4.9627685546875, 5.4033203125, 5.8438720703125, 6.284423828125, 6.7249755859375, 7.16552734375, 7.6060791015625, 8.046630859375, 8.4871826171875, 8.927734375, 9.3682861328125, 9.808837890625, 10.2493896484375, 10.68994140625, 11.1304931640625, 11.571044921875, 12.0115966796875, 12.4521484375, 12.8927001953125, 13.333251953125, 13.7738037109375, 14.21435546875, 14.6549072265625, 15.095458984375, 15.5360107421875, 15.9765625]}, "gradients/decoder.transformer.h.18.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 5.0, 2.0, 3.0, 2.0, 5.0, 7.0, 10.0, 17.0, 16.0, 24.0, 47.0, 80.0, 135.0, 265.0, 489.0, 941.0, 961.0, 480.0, 252.0, 124.0, 97.0, 53.0, 24.0, 17.0, 9.0, 6.0, 5.0, 2.0, 1.0, 3.0, 2.0, 2.0, 2.0, 0.0, 1.0], "bins": [-24.953125, -24.398193359375, -23.84326171875, -23.288330078125, -22.7333984375, -22.178466796875, -21.62353515625, -21.068603515625, -20.513671875, -19.958740234375, -19.40380859375, -18.848876953125, -18.2939453125, -17.739013671875, -17.18408203125, -16.629150390625, -16.07421875, -15.519287109375, -14.96435546875, -14.409423828125, -13.8544921875, -13.299560546875, -12.74462890625, -12.189697265625, -11.634765625, -11.079833984375, -10.52490234375, -9.969970703125, -9.4150390625, -8.860107421875, -8.30517578125, -7.750244140625, -7.1953125, -6.640380859375, -6.08544921875, -5.530517578125, -4.9755859375, -4.420654296875, -3.86572265625, -3.310791015625, -2.755859375, -2.200927734375, -1.64599609375, -1.091064453125, -0.5361328125, 0.018798828125, 0.57373046875, 1.128662109375, 1.68359375, 2.238525390625, 2.79345703125, 3.348388671875, 3.9033203125, 4.458251953125, 5.01318359375, 5.568115234375, 6.123046875, 6.677978515625, 7.23291015625, 7.787841796875, 8.3427734375, 8.897705078125, 9.45263671875, 10.007568359375, 10.5625]}, "gradients/decoder.transformer.h.18.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 2.0, 2.0, 3.0, 7.0, 4.0, 4.0, 11.0, 20.0, 23.0, 44.0, 49.0, 56.0, 81.0, 128.0, 188.0, 353.0, 597.0, 1366.0, 3943.0, 17859.0, 163112.0, 3435970.0, 523447.0, 36796.0, 6442.0, 1835.0, 814.0, 416.0, 200.0, 154.0, 112.0, 66.0, 61.0, 42.0, 28.0, 24.0, 11.0, 9.0, 6.0, 6.0, 1.0, 0.0, 4.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-21.09375, -20.1640625, -19.234375, -18.3046875, -17.375, -16.4453125, -15.515625, -14.5859375, -13.65625, -12.7265625, -11.796875, -10.8671875, -9.9375, -9.0078125, -8.078125, -7.1484375, -6.21875, -5.2890625, -4.359375, -3.4296875, -2.5, -1.5703125, -0.640625, 0.2890625, 1.21875, 2.1484375, 3.078125, 4.0078125, 4.9375, 5.8671875, 6.796875, 7.7265625, 8.65625, 9.5859375, 10.515625, 11.4453125, 12.375, 13.3046875, 14.234375, 15.1640625, 16.09375, 17.0234375, 17.953125, 18.8828125, 19.8125, 20.7421875, 21.671875, 22.6015625, 23.53125, 24.4609375, 25.390625, 26.3203125, 27.25, 28.1796875, 29.109375, 30.0390625, 30.96875, 31.8984375, 32.828125, 33.7578125, 34.6875, 35.6171875, 36.546875, 37.4765625, 38.40625]}, "gradients/decoder.transformer.h.18.ln_2.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 6.0, 24.0, 119.0, 285.0, 332.0, 170.0, 63.0, 15.0, 4.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-72.39004516601562, -66.02128601074219, -59.65252685546875, -53.28376770019531, -46.915008544921875, -40.54624938964844, -34.177490234375, -27.808731079101562, -21.439971923828125, -15.071212768554688, -8.70245361328125, -2.3336944580078125, 4.035064697265625, 10.403823852539062, 16.7725830078125, 23.141342163085938, 29.510101318359375, 35.87886047363281, 42.24761962890625, 48.61637878417969, 54.985137939453125, 61.35389709472656, 67.72265625, 74.09141540527344, 80.46017456054688, 86.82893371582031, 93.19769287109375, 99.56645202636719, 105.93521118164062, 112.30397033691406, 118.6727294921875, 125.04148864746094, 131.41024780273438, 137.7790069580078, 144.14776611328125, 150.5165252685547, 156.88528442382812, 163.25404357910156, 169.622802734375, 175.99156188964844, 182.36032104492188, 188.7290802001953, 195.09783935546875, 201.4665985107422, 207.83535766601562, 214.20411682128906, 220.5728759765625, 226.94163513183594, 233.31039428710938, 239.6791534423828, 246.04791259765625, 252.4166717529297, 258.7854309082031, 265.1541748046875, 271.52294921875, 277.8917236328125, 284.2604675292969, 290.62921142578125, 296.99798583984375, 303.36676025390625, 309.7355041503906, 316.104248046875, 322.4730224609375, 328.841796875, 335.2105407714844]}, "gradients/decoder.transformer.h.18.ln_2.bias": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 0.0, 4.0, 3.0, 2.0, 4.0, 5.0, 9.0, 6.0, 9.0, 13.0, 15.0, 12.0, 19.0, 12.0, 12.0, 18.0, 16.0, 27.0, 39.0, 32.0, 41.0, 28.0, 35.0, 34.0, 35.0, 49.0, 46.0, 47.0, 48.0, 34.0, 38.0, 45.0, 27.0, 32.0, 27.0, 33.0, 15.0, 30.0, 22.0, 14.0, 14.0, 12.0, 9.0, 8.0, 11.0, 4.0, 4.0, 5.0, 3.0, 4.0, 3.0, 1.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-37.569122314453125, -36.27867889404297, -34.98823165893555, -33.69778823852539, -32.407344818115234, -31.116899490356445, -29.826454162597656, -28.5360107421875, -27.24556541442871, -25.955120086669922, -24.664676666259766, -23.374231338500977, -22.083786010742188, -20.79334259033203, -19.502897262573242, -18.212451934814453, -16.922008514404297, -15.631564140319824, -14.341119766235352, -13.050674438476562, -11.76023006439209, -10.469785690307617, -9.179340362548828, -7.8888959884643555, -6.598451614379883, -5.30800724029541, -4.017562389373779, -2.7271177768707275, -1.4366731643676758, -0.14622879028320312, 1.1442160606384277, 2.4346609115600586, 3.7251052856445312, 5.015549659729004, 6.305994510650635, 7.596439361572266, 8.886883735656738, 10.177328109741211, 11.4677734375, 12.758217811584473, 14.048662185668945, 15.339106559753418, 16.62955093383789, 17.91999626159668, 19.21044158935547, 20.500885009765625, 21.791330337524414, 23.081775665283203, 24.37221908569336, 25.66266441345215, 26.953107833862305, 28.243553161621094, 29.53399658203125, 30.82444190979004, 32.11488723754883, 33.405330657958984, 34.695777893066406, 35.98622131347656, 37.276668548583984, 38.56711196899414, 39.8575553894043, 41.14800262451172, 42.438446044921875, 43.72888946533203, 45.01933288574219]}, "gradients/decoder.transformer.h.18.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 3.0, 0.0, 1.0, 1.0, 3.0, 5.0, 5.0, 2.0, 6.0, 5.0, 5.0, 7.0, 9.0, 15.0, 9.0, 17.0, 16.0, 16.0, 16.0, 27.0, 27.0, 33.0, 40.0, 39.0, 41.0, 46.0, 29.0, 36.0, 52.0, 43.0, 37.0, 43.0, 45.0, 37.0, 41.0, 35.0, 25.0, 29.0, 16.0, 26.0, 21.0, 16.0, 15.0, 18.0, 10.0, 9.0, 11.0, 8.0, 6.0, 4.0, 4.0, 3.0, 1.0, 2.0, 0.0, 1.0, 2.0, 0.0, 2.0, 0.0, 1.0], "bins": [-4.10546875, -3.97711181640625, -3.8487548828125, -3.72039794921875, -3.592041015625, -3.46368408203125, -3.3353271484375, -3.20697021484375, -3.07861328125, -2.95025634765625, -2.8218994140625, -2.69354248046875, -2.565185546875, -2.43682861328125, -2.3084716796875, -2.18011474609375, -2.0517578125, -1.92340087890625, -1.7950439453125, -1.66668701171875, -1.538330078125, -1.40997314453125, -1.2816162109375, -1.15325927734375, -1.02490234375, -0.89654541015625, -0.7681884765625, -0.63983154296875, -0.511474609375, -0.38311767578125, -0.2547607421875, -0.12640380859375, 0.001953125, 0.13031005859375, 0.2586669921875, 0.38702392578125, 0.515380859375, 0.64373779296875, 0.7720947265625, 0.90045166015625, 1.02880859375, 1.15716552734375, 1.2855224609375, 1.41387939453125, 1.542236328125, 1.67059326171875, 1.7989501953125, 1.92730712890625, 2.0556640625, 2.18402099609375, 2.3123779296875, 2.44073486328125, 2.569091796875, 2.69744873046875, 2.8258056640625, 2.95416259765625, 3.08251953125, 3.21087646484375, 3.3392333984375, 3.46759033203125, 3.595947265625, 3.72430419921875, 3.8526611328125, 3.98101806640625, 4.109375]}, "gradients/decoder.transformer.h.18.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 1.0, 2.0, 2.0, 2.0, 6.0, 6.0, 12.0, 22.0, 27.0, 47.0, 70.0, 96.0, 125.0, 190.0, 317.0, 459.0, 651.0, 922.0, 1428.0, 2172.0, 3198.0, 4870.0, 7230.0, 11588.0, 18355.0, 30901.0, 55600.0, 107958.0, 278092.0, 278455.0, 107388.0, 55183.0, 31186.0, 18674.0, 11484.0, 7548.0, 4677.0, 3175.0, 2032.0, 1434.0, 943.0, 631.0, 432.0, 289.0, 222.0, 150.0, 107.0, 67.0, 44.0, 41.0, 18.0, 15.0, 10.0, 10.0, 0.0, 3.0, 2.0, 2.0, 0.0, 1.0, 1.0], "bins": [-0.17724609375, -0.1717071533203125, -0.166168212890625, -0.1606292724609375, -0.15509033203125, -0.1495513916015625, -0.144012451171875, -0.1384735107421875, -0.1329345703125, -0.1273956298828125, -0.121856689453125, -0.1163177490234375, -0.11077880859375, -0.1052398681640625, -0.099700927734375, -0.0941619873046875, -0.088623046875, -0.0830841064453125, -0.077545166015625, -0.0720062255859375, -0.06646728515625, -0.0609283447265625, -0.055389404296875, -0.0498504638671875, -0.0443115234375, -0.0387725830078125, -0.033233642578125, -0.0276947021484375, -0.02215576171875, -0.0166168212890625, -0.011077880859375, -0.0055389404296875, 0.0, 0.0055389404296875, 0.011077880859375, 0.0166168212890625, 0.02215576171875, 0.0276947021484375, 0.033233642578125, 0.0387725830078125, 0.0443115234375, 0.0498504638671875, 0.055389404296875, 0.0609283447265625, 0.06646728515625, 0.0720062255859375, 0.077545166015625, 0.0830841064453125, 0.088623046875, 0.0941619873046875, 0.099700927734375, 0.1052398681640625, 0.11077880859375, 0.1163177490234375, 0.121856689453125, 0.1273956298828125, 0.1329345703125, 0.1384735107421875, 0.144012451171875, 0.1495513916015625, 0.15509033203125, 0.1606292724609375, 0.166168212890625, 0.1717071533203125, 0.17724609375]}, "gradients/decoder.transformer.h.18.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 5.0, 1.0, 5.0, 2.0, 7.0, 4.0, 6.0, 9.0, 7.0, 11.0, 16.0, 11.0, 21.0, 17.0, 21.0, 24.0, 22.0, 25.0, 28.0, 38.0, 41.0, 37.0, 47.0, 32.0, 42.0, 1069.0, 32.0, 35.0, 27.0, 34.0, 36.0, 39.0, 27.0, 25.0, 24.0, 21.0, 25.0, 20.0, 21.0, 23.0, 15.0, 12.0, 15.0, 10.0, 8.0, 8.0, 9.0, 5.0, 11.0, 3.0, 4.0, 3.0, 0.0, 1.0, 2.0, 1.0, 1.0], "bins": [-3.697265625, -3.58441162109375, -3.4715576171875, -3.35870361328125, -3.245849609375, -3.13299560546875, -3.0201416015625, -2.90728759765625, -2.79443359375, -2.68157958984375, -2.5687255859375, -2.45587158203125, -2.343017578125, -2.23016357421875, -2.1173095703125, -2.00445556640625, -1.8916015625, -1.77874755859375, -1.6658935546875, -1.55303955078125, -1.440185546875, -1.32733154296875, -1.2144775390625, -1.10162353515625, -0.98876953125, -0.87591552734375, -0.7630615234375, -0.65020751953125, -0.537353515625, -0.42449951171875, -0.3116455078125, -0.19879150390625, -0.0859375, 0.02691650390625, 0.1397705078125, 0.25262451171875, 0.365478515625, 0.47833251953125, 0.5911865234375, 0.70404052734375, 0.81689453125, 0.92974853515625, 1.0426025390625, 1.15545654296875, 1.268310546875, 1.38116455078125, 1.4940185546875, 1.60687255859375, 1.7197265625, 1.83258056640625, 1.9454345703125, 2.05828857421875, 2.171142578125, 2.28399658203125, 2.3968505859375, 2.50970458984375, 2.62255859375, 2.73541259765625, 2.8482666015625, 2.96112060546875, 3.073974609375, 3.18682861328125, 3.2996826171875, 3.41253662109375, 3.525390625]}, "gradients/decoder.transformer.h.18.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 2.0, 2.0, 5.0, 9.0, 14.0, 11.0, 19.0, 31.0, 63.0, 96.0, 132.0, 182.0, 253.0, 343.0, 483.0, 679.0, 928.0, 1150.0, 1647.0, 2224.0, 3025.0, 4241.0, 5885.0, 8040.0, 11244.0, 15656.0, 22552.0, 32902.0, 48126.0, 73880.0, 119578.0, 1320318.0, 154438.0, 87531.0, 55733.0, 36965.0, 25488.0, 17779.0, 12493.0, 9026.0, 6590.0, 4754.0, 3416.0, 2533.0, 1851.0, 1326.0, 955.0, 726.0, 548.0, 380.0, 257.0, 196.0, 146.0, 115.0, 63.0, 37.0, 25.0, 22.0, 15.0, 7.0, 8.0, 6.0, 2.0], "bins": [-0.0885009765625, -0.08579349517822266, -0.08308601379394531, -0.08037853240966797, -0.07767105102539062, -0.07496356964111328, -0.07225608825683594, -0.0695486068725586, -0.06684112548828125, -0.0641336441040039, -0.06142616271972656, -0.05871868133544922, -0.056011199951171875, -0.05330371856689453, -0.05059623718261719, -0.047888755798339844, -0.0451812744140625, -0.042473793029785156, -0.03976631164550781, -0.03705883026123047, -0.034351348876953125, -0.03164386749267578, -0.028936386108398438, -0.026228904724121094, -0.02352142333984375, -0.020813941955566406, -0.018106460571289062, -0.015398979187011719, -0.012691497802734375, -0.009984016418457031, -0.0072765350341796875, -0.004569053649902344, -0.001861572265625, 0.0008459091186523438, 0.0035533905029296875, 0.006260871887207031, 0.008968353271484375, 0.011675834655761719, 0.014383316040039062, 0.017090797424316406, 0.01979827880859375, 0.022505760192871094, 0.025213241577148438, 0.02792072296142578, 0.030628204345703125, 0.03333568572998047, 0.03604316711425781, 0.038750648498535156, 0.0414581298828125, 0.044165611267089844, 0.04687309265136719, 0.04958057403564453, 0.052288055419921875, 0.05499553680419922, 0.05770301818847656, 0.060410499572753906, 0.06311798095703125, 0.0658254623413086, 0.06853294372558594, 0.07124042510986328, 0.07394790649414062, 0.07665538787841797, 0.07936286926269531, 0.08207035064697266, 0.08477783203125]}, "gradients/decoder.transformer.h.18.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 3.0, 0.0, 5.0, 4.0, 4.0, 3.0, 4.0, 4.0, 11.0, 15.0, 17.0, 13.0, 37.0, 56.0, 64.0, 94.0, 177.0, 168.0, 108.0, 46.0, 54.0, 29.0, 19.0, 16.0, 10.0, 13.0, 9.0, 5.0, 5.0, 0.0, 3.0, 3.0, 3.0, 3.0, 1.0, 4.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0], "bins": [-9.834766387939453e-06, -9.560026228427887e-06, -9.28528606891632e-06, -9.010545909404755e-06, -8.735805749893188e-06, -8.461065590381622e-06, -8.186325430870056e-06, -7.91158527135849e-06, -7.636845111846924e-06, -7.362104952335358e-06, -7.0873647928237915e-06, -6.812624633312225e-06, -6.537884473800659e-06, -6.263144314289093e-06, -5.988404154777527e-06, -5.713663995265961e-06, -5.4389238357543945e-06, -5.164183676242828e-06, -4.889443516731262e-06, -4.614703357219696e-06, -4.33996319770813e-06, -4.065223038196564e-06, -3.7904828786849976e-06, -3.5157427191734314e-06, -3.2410025596618652e-06, -2.966262400150299e-06, -2.691522240638733e-06, -2.4167820811271667e-06, -2.1420419216156006e-06, -1.8673017621040344e-06, -1.5925616025924683e-06, -1.317821443080902e-06, -1.043081283569336e-06, -7.683411240577698e-07, -4.936009645462036e-07, -2.1886080503463745e-07, 5.587935447692871e-08, 3.3061951398849487e-07, 6.05359673500061e-07, 8.800998330116272e-07, 1.1548399925231934e-06, 1.4295801520347595e-06, 1.7043203115463257e-06, 1.979060471057892e-06, 2.253800630569458e-06, 2.528540790081024e-06, 2.8032809495925903e-06, 3.0780211091041565e-06, 3.3527612686157227e-06, 3.627501428127289e-06, 3.902241587638855e-06, 4.176981747150421e-06, 4.451721906661987e-06, 4.7264620661735535e-06, 5.00120222568512e-06, 5.275942385196686e-06, 5.550682544708252e-06, 5.825422704219818e-06, 6.100162863731384e-06, 6.3749030232429504e-06, 6.649643182754517e-06, 6.924383342266083e-06, 7.199123501777649e-06, 7.473863661289215e-06, 7.748603820800781e-06]}, "gradients/decoder.transformer.h.18.crossattention.q_attn.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 3.0, 1.0, 4.0, 6.0, 3.0, 1.0, 3.0, 4.0, 7.0, 4.0, 15.0, 17.0, 19.0, 38.0, 40.0, 91.0, 154.0, 270.0, 1016.0, 39397.0, 1001791.0, 4629.0, 533.0, 187.0, 132.0, 59.0, 41.0, 25.0, 18.0, 16.0, 13.0, 6.0, 5.0, 5.0, 3.0, 3.0, 2.0, 1.0, 3.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0001239776611328125, -0.00011959485709667206, -0.00011521205306053162, -0.00011082924902439117, -0.00010644644498825073, -0.00010206364095211029, -9.768083691596985e-05, -9.32980328798294e-05, -8.891522884368896e-05, -8.453242480754852e-05, -8.014962077140808e-05, -7.576681673526764e-05, -7.13840126991272e-05, -6.700120866298676e-05, -6.261840462684631e-05, -5.823560059070587e-05, -5.385279655456543e-05, -4.946999251842499e-05, -4.5087188482284546e-05, -4.0704384446144104e-05, -3.632158041000366e-05, -3.193877637386322e-05, -2.755597233772278e-05, -2.3173168301582336e-05, -1.8790364265441895e-05, -1.4407560229301453e-05, -1.002475619316101e-05, -5.641952157020569e-06, -1.259148120880127e-06, 3.123655915260315e-06, 7.506459951400757e-06, 1.1889263987541199e-05, 1.627206802368164e-05, 2.0654872059822083e-05, 2.5037676095962524e-05, 2.9420480132102966e-05, 3.380328416824341e-05, 3.818608820438385e-05, 4.256889224052429e-05, 4.6951696276664734e-05, 5.1334500312805176e-05, 5.571730434894562e-05, 6.010010838508606e-05, 6.44829124212265e-05, 6.886571645736694e-05, 7.324852049350739e-05, 7.763132452964783e-05, 8.201412856578827e-05, 8.639693260192871e-05, 9.077973663806915e-05, 9.51625406742096e-05, 9.954534471035004e-05, 0.00010392814874649048, 0.00010831095278263092, 0.00011269375681877136, 0.0001170765608549118, 0.00012145936489105225, 0.0001258421689271927, 0.00013022497296333313, 0.00013460777699947357, 0.00013899058103561401, 0.00014337338507175446, 0.0001477561891078949, 0.00015213899314403534, 0.00015652179718017578]}, "gradients/decoder.transformer.h.18.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 8.0, 140.0, 698.0, 164.0, 9.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.8033500459278e-05, -3.7179255741648376e-05, -3.632501102401875e-05, -3.547076630638912e-05, -3.46165215887595e-05, -3.376227687112987e-05, -3.2908032153500244e-05, -3.205378743587062e-05, -3.119954271824099e-05, -3.0345298000611365e-05, -2.949105328298174e-05, -2.8636808565352112e-05, -2.7782563847722486e-05, -2.692831913009286e-05, -2.6074074412463233e-05, -2.5219829694833606e-05, -2.436558497720398e-05, -2.3511340259574354e-05, -2.2657095541944727e-05, -2.18028508243151e-05, -2.0948606106685475e-05, -2.0094361389055848e-05, -1.9240116671426222e-05, -1.8385871953796595e-05, -1.753162723616697e-05, -1.6677382518537343e-05, -1.5823137800907716e-05, -1.496889308327809e-05, -1.4114648365648463e-05, -1.3260403648018837e-05, -1.240615893038921e-05, -1.1551914212759584e-05, -1.0697667676140554e-05, -9.843422958510928e-06, -8.989178240881301e-06, -8.134933523251675e-06, -7.280688805622049e-06, -6.426444087992422e-06, -5.572199370362796e-06, -4.7179546527331695e-06, -3.863709935103543e-06, -3.0094652174739167e-06, -2.1552204998442903e-06, -1.300975782214664e-06, -4.4673106458503753e-07, 4.0751365304458886e-07, 1.2617583706742153e-06, 2.1160030883038417e-06, 2.970247805933468e-06, 3.8244925235630944e-06, 4.678737241192721e-06, 5.532981958822347e-06, 6.387226676451974e-06, 7.2414713940816e-06, 8.095716111711226e-06, 8.949960829340853e-06, 9.80420554697048e-06, 1.0658450264600106e-05, 1.1512694982229732e-05, 1.2366939699859358e-05, 1.3221184417488985e-05, 1.4075429135118611e-05, 1.4929673852748238e-05, 1.5783918570377864e-05, 1.663816328800749e-05]}, "gradients/decoder.transformer.h.18.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 2.0, 2.0, 8.0, 1.0, 5.0, 4.0, 6.0, 7.0, 6.0, 9.0, 14.0, 19.0, 23.0, 12.0, 14.0, 30.0, 14.0, 13.0, 35.0, 23.0, 35.0, 19.0, 26.0, 35.0, 28.0, 27.0, 52.0, 27.0, 50.0, 25.0, 29.0, 50.0, 26.0, 24.0, 43.0, 13.0, 40.0, 23.0, 17.0, 36.0, 9.0, 14.0, 30.0, 8.0, 19.0, 3.0, 7.0, 9.0, 7.0, 7.0, 8.0, 4.0, 7.0, 1.0, 4.0, 4.0, 2.0, 5.0], "bins": [-2.8014183044433594e-06, -2.7194619178771973e-06, -2.637505531311035e-06, -2.555549144744873e-06, -2.473592758178711e-06, -2.391636371612549e-06, -2.3096799850463867e-06, -2.2277235984802246e-06, -2.1457672119140625e-06, -2.0638108253479004e-06, -1.9818544387817383e-06, -1.8998980522155762e-06, -1.817941665649414e-06, -1.735985279083252e-06, -1.6540288925170898e-06, -1.5720725059509277e-06, -1.4901161193847656e-06, -1.4081597328186035e-06, -1.3262033462524414e-06, -1.2442469596862793e-06, -1.1622905731201172e-06, -1.080334186553955e-06, -9.98377799987793e-07, -9.164214134216309e-07, -8.344650268554688e-07, -7.525086402893066e-07, -6.705522537231445e-07, -5.885958671569824e-07, -5.066394805908203e-07, -4.246830940246582e-07, -3.427267074584961e-07, -2.60770320892334e-07, -1.7881393432617188e-07, -9.685754776000977e-08, -1.4901161193847656e-08, 6.705522537231445e-08, 1.4901161193847656e-07, 2.3096799850463867e-07, 3.129243850708008e-07, 3.948807716369629e-07, 4.76837158203125e-07, 5.587935447692871e-07, 6.407499313354492e-07, 7.227063179016113e-07, 8.046627044677734e-07, 8.866190910339355e-07, 9.685754776000977e-07, 1.0505318641662598e-06, 1.1324882507324219e-06, 1.214444637298584e-06, 1.296401023864746e-06, 1.3783574104309082e-06, 1.4603137969970703e-06, 1.5422701835632324e-06, 1.6242265701293945e-06, 1.7061829566955566e-06, 1.7881393432617188e-06, 1.8700957298278809e-06, 1.952052116394043e-06, 2.034008502960205e-06, 2.115964889526367e-06, 2.1979212760925293e-06, 2.2798776626586914e-06, 2.3618340492248535e-06, 2.4437904357910156e-06]}, "gradients/decoder.transformer.h.18.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 3.0, 0.0, 1.0, 1.0, 3.0, 5.0, 5.0, 2.0, 6.0, 5.0, 5.0, 7.0, 9.0, 15.0, 9.0, 17.0, 16.0, 16.0, 16.0, 27.0, 27.0, 33.0, 40.0, 39.0, 41.0, 46.0, 29.0, 36.0, 52.0, 43.0, 37.0, 43.0, 45.0, 37.0, 41.0, 35.0, 25.0, 29.0, 16.0, 26.0, 21.0, 16.0, 15.0, 18.0, 10.0, 9.0, 11.0, 8.0, 6.0, 4.0, 4.0, 3.0, 1.0, 2.0, 0.0, 1.0, 2.0, 0.0, 2.0, 0.0, 1.0], "bins": [-4.10546875, -3.97711181640625, -3.8487548828125, -3.72039794921875, -3.592041015625, -3.46368408203125, -3.3353271484375, -3.20697021484375, -3.07861328125, -2.95025634765625, -2.8218994140625, -2.69354248046875, -2.565185546875, -2.43682861328125, -2.3084716796875, -2.18011474609375, -2.0517578125, -1.92340087890625, -1.7950439453125, -1.66668701171875, -1.538330078125, -1.40997314453125, -1.2816162109375, -1.15325927734375, -1.02490234375, -0.89654541015625, -0.7681884765625, -0.63983154296875, -0.511474609375, -0.38311767578125, -0.2547607421875, -0.12640380859375, 0.001953125, 0.13031005859375, 0.2586669921875, 0.38702392578125, 0.515380859375, 0.64373779296875, 0.7720947265625, 0.90045166015625, 1.02880859375, 1.15716552734375, 1.2855224609375, 1.41387939453125, 1.542236328125, 1.67059326171875, 1.7989501953125, 1.92730712890625, 2.0556640625, 2.18402099609375, 2.3123779296875, 2.44073486328125, 2.569091796875, 2.69744873046875, 2.8258056640625, 2.95416259765625, 3.08251953125, 3.21087646484375, 3.3392333984375, 3.46759033203125, 3.595947265625, 3.72430419921875, 3.8526611328125, 3.98101806640625, 4.109375]}, "gradients/decoder.transformer.h.18.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 2.0, 5.0, 8.0, 6.0, 5.0, 23.0, 24.0, 32.0, 60.0, 93.0, 127.0, 247.0, 349.0, 558.0, 980.0, 1802.0, 3323.0, 6314.0, 13198.0, 29453.0, 75907.0, 336625.0, 435431.0, 82921.0, 32137.0, 14210.0, 6679.0, 3515.0, 1861.0, 1054.0, 594.0, 366.0, 220.0, 149.0, 101.0, 57.0, 48.0, 33.0, 26.0, 10.0, 6.0, 4.0, 2.0, 2.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0], "bins": [-9.578125, -9.3048095703125, -9.031494140625, -8.7581787109375, -8.48486328125, -8.2115478515625, -7.938232421875, -7.6649169921875, -7.3916015625, -7.1182861328125, -6.844970703125, -6.5716552734375, -6.29833984375, -6.0250244140625, -5.751708984375, -5.4783935546875, -5.205078125, -4.9317626953125, -4.658447265625, -4.3851318359375, -4.11181640625, -3.8385009765625, -3.565185546875, -3.2918701171875, -3.0185546875, -2.7452392578125, -2.471923828125, -2.1986083984375, -1.92529296875, -1.6519775390625, -1.378662109375, -1.1053466796875, -0.83203125, -0.5587158203125, -0.285400390625, -0.0120849609375, 0.26123046875, 0.5345458984375, 0.807861328125, 1.0811767578125, 1.3544921875, 1.6278076171875, 1.901123046875, 2.1744384765625, 2.44775390625, 2.7210693359375, 2.994384765625, 3.2677001953125, 3.541015625, 3.8143310546875, 4.087646484375, 4.3609619140625, 4.63427734375, 4.9075927734375, 5.180908203125, 5.4542236328125, 5.7275390625, 6.0008544921875, 6.274169921875, 6.5474853515625, 6.82080078125, 7.0941162109375, 7.367431640625, 7.6407470703125, 7.9140625]}, "gradients/decoder.transformer.h.18.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 6.0, 2.0, 1.0, 4.0, 10.0, 7.0, 10.0, 8.0, 11.0, 8.0, 18.0, 17.0, 16.0, 17.0, 23.0, 19.0, 23.0, 33.0, 31.0, 39.0, 41.0, 57.0, 68.0, 100.0, 205.0, 1541.0, 174.0, 107.0, 74.0, 47.0, 35.0, 44.0, 35.0, 28.0, 22.0, 24.0, 23.0, 17.0, 14.0, 18.0, 12.0, 13.0, 9.0, 12.0, 7.0, 8.0, 10.0, 3.0, 4.0, 2.0, 3.0, 2.0, 0.0, 0.0, 2.0], "bins": [-14.828125, -14.397216796875, -13.96630859375, -13.535400390625, -13.1044921875, -12.673583984375, -12.24267578125, -11.811767578125, -11.380859375, -10.949951171875, -10.51904296875, -10.088134765625, -9.6572265625, -9.226318359375, -8.79541015625, -8.364501953125, -7.93359375, -7.502685546875, -7.07177734375, -6.640869140625, -6.2099609375, -5.779052734375, -5.34814453125, -4.917236328125, -4.486328125, -4.055419921875, -3.62451171875, -3.193603515625, -2.7626953125, -2.331787109375, -1.90087890625, -1.469970703125, -1.0390625, -0.608154296875, -0.17724609375, 0.253662109375, 0.6845703125, 1.115478515625, 1.54638671875, 1.977294921875, 2.408203125, 2.839111328125, 3.27001953125, 3.700927734375, 4.1318359375, 4.562744140625, 4.99365234375, 5.424560546875, 5.85546875, 6.286376953125, 6.71728515625, 7.148193359375, 7.5791015625, 8.010009765625, 8.44091796875, 8.871826171875, 9.302734375, 9.733642578125, 10.16455078125, 10.595458984375, 11.0263671875, 11.457275390625, 11.88818359375, 12.319091796875, 12.75]}, "gradients/decoder.transformer.h.18.attn.c_attn.weight": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 0.0, 0.0, 4.0, 3.0, 3.0, 4.0, 6.0, 4.0, 12.0, 6.0, 14.0, 15.0, 13.0, 15.0, 25.0, 30.0, 26.0, 58.0, 66.0, 100.0, 153.0, 216.0, 450.0, 1151.0, 4633.0, 28261.0, 396840.0, 2616831.0, 82402.0, 10586.0, 2218.0, 624.0, 315.0, 170.0, 120.0, 74.0, 55.0, 41.0, 25.0, 31.0, 34.0, 17.0, 12.0, 5.0, 13.0, 10.0, 6.0, 9.0, 5.0, 0.0, 5.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 3.0, 0.0, 1.0], "bins": [-22.40625, -21.665283203125, -20.92431640625, -20.183349609375, -19.4423828125, -18.701416015625, -17.96044921875, -17.219482421875, -16.478515625, -15.737548828125, -14.99658203125, -14.255615234375, -13.5146484375, -12.773681640625, -12.03271484375, -11.291748046875, -10.55078125, -9.809814453125, -9.06884765625, -8.327880859375, -7.5869140625, -6.845947265625, -6.10498046875, -5.364013671875, -4.623046875, -3.882080078125, -3.14111328125, -2.400146484375, -1.6591796875, -0.918212890625, -0.17724609375, 0.563720703125, 1.3046875, 2.045654296875, 2.78662109375, 3.527587890625, 4.2685546875, 5.009521484375, 5.75048828125, 6.491455078125, 7.232421875, 7.973388671875, 8.71435546875, 9.455322265625, 10.1962890625, 10.937255859375, 11.67822265625, 12.419189453125, 13.16015625, 13.901123046875, 14.64208984375, 15.383056640625, 16.1240234375, 16.864990234375, 17.60595703125, 18.346923828125, 19.087890625, 19.828857421875, 20.56982421875, 21.310791015625, 22.0517578125, 22.792724609375, 23.53369140625, 24.274658203125, 25.015625]}, "gradients/decoder.transformer.h.18.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 6.0, 71.0, 658.0, 270.0, 16.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-58.560386657714844, -50.94347381591797, -43.326560974121094, -35.70964813232422, -28.09273338317871, -20.475818634033203, -12.858905792236328, -5.241992950439453, 2.374919891357422, 9.991832733154297, 17.608745574951172, 25.22566032409668, 32.84257507324219, 40.45948791503906, 48.07640075683594, 55.69331359863281, 63.31022644042969, 70.92713928222656, 78.54405212402344, 86.16096496582031, 93.77787780761719, 101.39479064941406, 109.01170349121094, 116.62861633300781, 124.24552917480469, 131.86244201660156, 139.47935485839844, 147.0962677001953, 154.7131805419922, 162.33009338378906, 169.94700622558594, 177.5639190673828, 185.1808319091797, 192.79774475097656, 200.41465759277344, 208.0315704345703, 215.6484832763672, 223.26539611816406, 230.88230895996094, 238.4992218017578, 246.1161346435547, 253.73304748535156, 261.3499755859375, 268.9668884277344, 276.58380126953125, 284.2007141113281, 291.817626953125, 299.4345397949219, 307.05145263671875, 314.6683654785156, 322.2852783203125, 329.9021911621094, 337.51910400390625, 345.1360168457031, 352.7529296875, 360.3698425292969, 367.98675537109375, 375.6036682128906, 383.2205810546875, 390.8374938964844, 398.45440673828125, 406.0713195800781, 413.688232421875, 421.3051452636719, 428.92205810546875]}, "gradients/decoder.transformer.h.18.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 3.0, 3.0, 3.0, 2.0, 4.0, 3.0, 2.0, 6.0, 10.0, 13.0, 6.0, 16.0, 11.0, 26.0, 29.0, 27.0, 23.0, 33.0, 35.0, 29.0, 46.0, 45.0, 29.0, 40.0, 47.0, 42.0, 54.0, 29.0, 48.0, 27.0, 31.0, 41.0, 30.0, 25.0, 28.0, 22.0, 20.0, 28.0, 12.0, 20.0, 9.0, 12.0, 8.0, 8.0, 6.0, 4.0, 5.0, 1.0, 4.0, 3.0, 3.0, 2.0, 0.0, 3.0, 2.0, 1.0, 0.0, 1.0], "bins": [-46.734596252441406, -45.24544143676758, -43.756282806396484, -42.267127990722656, -40.77796936035156, -39.288814544677734, -37.799659729003906, -36.31050109863281, -34.82134246826172, -33.33218765258789, -31.843029022216797, -30.35387420654297, -28.864715576171875, -27.375560760498047, -25.886404037475586, -24.397247314453125, -22.908092498779297, -21.418935775756836, -19.929779052734375, -18.440624237060547, -16.951465606689453, -15.462309837341309, -13.973154067993164, -12.483997344970703, -10.994840621948242, -9.505683898925781, -8.01652717590332, -6.527371406555176, -5.038214683532715, -3.549057960510254, -2.0599021911621094, -0.5707454681396484, 0.9184150695800781, 2.40757155418396, 3.896728038787842, 5.3858842849731445, 6.8750410079956055, 8.364197731018066, 9.853353500366211, 11.342510223388672, 12.831666946411133, 14.320823669433594, 15.809980392456055, 17.299137115478516, 18.788291931152344, 20.277450561523438, 21.766605377197266, 23.255762100219727, 24.744918823242188, 26.23407554626465, 27.72323226928711, 29.212387084960938, 30.70154571533203, 32.19070053100586, 33.67985534667969, 35.16901397705078, 36.658172607421875, 38.1473274230957, 39.6364860534668, 41.125640869140625, 42.61479949951172, 44.10395431518555, 45.593109130859375, 47.08226776123047, 48.5714225769043]}, "gradients/decoder.transformer.h.17.mlp.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 1.0, 1.0, 5.0, 3.0, 0.0, 4.0, 3.0, 7.0, 3.0, 3.0, 10.0, 13.0, 12.0, 11.0, 17.0, 15.0, 13.0, 16.0, 26.0, 34.0, 33.0, 27.0, 46.0, 36.0, 35.0, 45.0, 39.0, 55.0, 32.0, 42.0, 35.0, 42.0, 50.0, 30.0, 38.0, 34.0, 26.0, 21.0, 25.0, 18.0, 18.0, 8.0, 18.0, 12.0, 16.0, 8.0, 5.0, 8.0, 7.0, 4.0, 3.0, 2.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 1.0], "bins": [-4.1953125, -4.064697265625, -3.93408203125, -3.803466796875, -3.6728515625, -3.542236328125, -3.41162109375, -3.281005859375, -3.150390625, -3.019775390625, -2.88916015625, -2.758544921875, -2.6279296875, -2.497314453125, -2.36669921875, -2.236083984375, -2.10546875, -1.974853515625, -1.84423828125, -1.713623046875, -1.5830078125, -1.452392578125, -1.32177734375, -1.191162109375, -1.060546875, -0.929931640625, -0.79931640625, -0.668701171875, -0.5380859375, -0.407470703125, -0.27685546875, -0.146240234375, -0.015625, 0.114990234375, 0.24560546875, 0.376220703125, 0.5068359375, 0.637451171875, 0.76806640625, 0.898681640625, 1.029296875, 1.159912109375, 1.29052734375, 1.421142578125, 1.5517578125, 1.682373046875, 1.81298828125, 1.943603515625, 2.07421875, 2.204833984375, 2.33544921875, 2.466064453125, 2.5966796875, 2.727294921875, 2.85791015625, 2.988525390625, 3.119140625, 3.249755859375, 3.38037109375, 3.510986328125, 3.6416015625, 3.772216796875, 3.90283203125, 4.033447265625, 4.1640625]}, "gradients/decoder.transformer.h.17.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 0.0, 1.0, 2.0, 2.0, 3.0, 1.0, 5.0, 3.0, 12.0, 19.0, 29.0, 43.0, 63.0, 96.0, 127.0, 192.0, 255.0, 450.0, 681.0, 1187.0, 2056.0, 3546.0, 7192.0, 15157.0, 35243.0, 96223.0, 375073.0, 1826309.0, 1416797.0, 278372.0, 78015.0, 29665.0, 13043.0, 6352.0, 3342.0, 1839.0, 1064.0, 625.0, 414.0, 267.0, 165.0, 113.0, 76.0, 56.0, 37.0, 20.0, 21.0, 13.0, 14.0, 8.0, 1.0, 1.0, 1.0, 2.0, 4.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0], "bins": [-6.4375, -6.22918701171875, -6.0208740234375, -5.81256103515625, -5.604248046875, -5.39593505859375, -5.1876220703125, -4.97930908203125, -4.77099609375, -4.56268310546875, -4.3543701171875, -4.14605712890625, -3.937744140625, -3.72943115234375, -3.5211181640625, -3.31280517578125, -3.1044921875, -2.89617919921875, -2.6878662109375, -2.47955322265625, -2.271240234375, -2.06292724609375, -1.8546142578125, -1.64630126953125, -1.43798828125, -1.22967529296875, -1.0213623046875, -0.81304931640625, -0.604736328125, -0.39642333984375, -0.1881103515625, 0.02020263671875, 0.228515625, 0.43682861328125, 0.6451416015625, 0.85345458984375, 1.061767578125, 1.27008056640625, 1.4783935546875, 1.68670654296875, 1.89501953125, 2.10333251953125, 2.3116455078125, 2.51995849609375, 2.728271484375, 2.93658447265625, 3.1448974609375, 3.35321044921875, 3.5615234375, 3.76983642578125, 3.9781494140625, 4.18646240234375, 4.394775390625, 4.60308837890625, 4.8114013671875, 5.01971435546875, 5.22802734375, 5.43634033203125, 5.6446533203125, 5.85296630859375, 6.061279296875, 6.26959228515625, 6.4779052734375, 6.68621826171875, 6.89453125]}, "gradients/decoder.transformer.h.17.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 3.0, 1.0, 1.0, 3.0, 8.0, 12.0, 7.0, 15.0, 26.0, 21.0, 45.0, 64.0, 96.0, 165.0, 268.0, 410.0, 687.0, 783.0, 540.0, 309.0, 212.0, 130.0, 85.0, 48.0, 31.0, 37.0, 19.0, 16.0, 15.0, 4.0, 9.0, 4.0, 2.0, 4.0, 2.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], "bins": [-16.53125, -16.10986328125, -15.6884765625, -15.26708984375, -14.845703125, -14.42431640625, -14.0029296875, -13.58154296875, -13.16015625, -12.73876953125, -12.3173828125, -11.89599609375, -11.474609375, -11.05322265625, -10.6318359375, -10.21044921875, -9.7890625, -9.36767578125, -8.9462890625, -8.52490234375, -8.103515625, -7.68212890625, -7.2607421875, -6.83935546875, -6.41796875, -5.99658203125, -5.5751953125, -5.15380859375, -4.732421875, -4.31103515625, -3.8896484375, -3.46826171875, -3.046875, -2.62548828125, -2.2041015625, -1.78271484375, -1.361328125, -0.93994140625, -0.5185546875, -0.09716796875, 0.32421875, 0.74560546875, 1.1669921875, 1.58837890625, 2.009765625, 2.43115234375, 2.8525390625, 3.27392578125, 3.6953125, 4.11669921875, 4.5380859375, 4.95947265625, 5.380859375, 5.80224609375, 6.2236328125, 6.64501953125, 7.06640625, 7.48779296875, 7.9091796875, 8.33056640625, 8.751953125, 9.17333984375, 9.5947265625, 10.01611328125, 10.4375]}, "gradients/decoder.transformer.h.17.mlp.c_fc.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 3.0, 4.0, 6.0, 1.0, 6.0, 9.0, 17.0, 22.0, 37.0, 51.0, 71.0, 91.0, 174.0, 330.0, 691.0, 1655.0, 5797.0, 28626.0, 282089.0, 3445781.0, 383389.0, 35630.0, 6451.0, 1854.0, 726.0, 322.0, 163.0, 117.0, 69.0, 38.0, 26.0, 13.0, 14.0, 10.0, 3.0, 5.0, 4.0, 1.0, 0.0, 1.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-18.890625, -18.052978515625, -17.21533203125, -16.377685546875, -15.5400390625, -14.702392578125, -13.86474609375, -13.027099609375, -12.189453125, -11.351806640625, -10.51416015625, -9.676513671875, -8.8388671875, -8.001220703125, -7.16357421875, -6.325927734375, -5.48828125, -4.650634765625, -3.81298828125, -2.975341796875, -2.1376953125, -1.300048828125, -0.46240234375, 0.375244140625, 1.212890625, 2.050537109375, 2.88818359375, 3.725830078125, 4.5634765625, 5.401123046875, 6.23876953125, 7.076416015625, 7.9140625, 8.751708984375, 9.58935546875, 10.427001953125, 11.2646484375, 12.102294921875, 12.93994140625, 13.777587890625, 14.615234375, 15.452880859375, 16.29052734375, 17.128173828125, 17.9658203125, 18.803466796875, 19.64111328125, 20.478759765625, 21.31640625, 22.154052734375, 22.99169921875, 23.829345703125, 24.6669921875, 25.504638671875, 26.34228515625, 27.179931640625, 28.017578125, 28.855224609375, 29.69287109375, 30.530517578125, 31.3681640625, 32.205810546875, 33.04345703125, 33.881103515625, 34.71875]}, "gradients/decoder.transformer.h.17.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 11.0, 26.0, 53.0, 99.0, 185.0, 194.0, 195.0, 140.0, 61.0, 22.0, 16.0, 8.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-134.54388427734375, -130.88369750976562, -127.22352600097656, -123.56334686279297, -119.90316772460938, -116.24298095703125, -112.58280181884766, -108.92262268066406, -105.26244354248047, -101.60226440429688, -97.94208526611328, -94.28190612792969, -90.62171936035156, -86.9615478515625, -83.30136108398438, -79.64118194580078, -75.98100280761719, -72.3208236694336, -68.66064453125, -65.0004653930664, -61.34028244018555, -57.68010330200195, -54.019920349121094, -50.3597412109375, -46.699562072753906, -43.03938293457031, -39.37920379638672, -35.71902084350586, -32.058841705322266, -28.398662567138672, -24.738481521606445, -21.07830047607422, -17.418128967285156, -13.757948875427246, -10.097768783569336, -6.437588691711426, -2.7774085998535156, 0.8827705383300781, 4.542951583862305, 8.203132629394531, 11.863311767578125, 15.523491859436035, 19.183671951293945, 22.843852996826172, 26.504032135009766, 30.16421127319336, 33.82439422607422, 37.48457336425781, 41.144752502441406, 44.804931640625, 48.465110778808594, 52.12529373168945, 55.78547286987305, 59.44565200805664, 63.1058349609375, 66.7660140991211, 70.42619323730469, 74.08637237548828, 77.74655151367188, 81.40673065185547, 85.06690979003906, 88.72709655761719, 92.38727569580078, 96.04745483398438, 99.70763397216797]}, "gradients/decoder.transformer.h.17.ln_2.bias": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 0.0, 0.0, 2.0, 1.0, 2.0, 3.0, 2.0, 4.0, 7.0, 2.0, 8.0, 14.0, 12.0, 17.0, 17.0, 19.0, 18.0, 24.0, 26.0, 25.0, 30.0, 28.0, 40.0, 21.0, 37.0, 33.0, 31.0, 31.0, 28.0, 38.0, 35.0, 23.0, 49.0, 28.0, 29.0, 43.0, 29.0, 30.0, 34.0, 24.0, 26.0, 17.0, 22.0, 11.0, 21.0, 14.0, 12.0, 5.0, 8.0, 6.0, 10.0, 6.0, 5.0, 4.0, 0.0, 2.0, 3.0, 1.0, 2.0, 0.0, 1.0], "bins": [-32.07818603515625, -31.08555030822754, -30.092914581298828, -29.100278854370117, -28.107643127441406, -27.115009307861328, -26.122373580932617, -25.129737854003906, -24.137102127075195, -23.144466400146484, -22.151830673217773, -21.159194946289062, -20.166561126708984, -19.17392349243164, -18.181289672851562, -17.18865394592285, -16.19601821899414, -15.20338249206543, -14.210746765136719, -13.218111991882324, -12.225476264953613, -11.232840538024902, -10.240205764770508, -9.247570037841797, -8.254934310913086, -7.262298583984375, -6.269663333892822, -5.2770280838012695, -4.284392356872559, -3.2917566299438477, -2.299121379852295, -1.3064861297607422, -0.31385040283203125, 0.6787850856781006, 1.6714205741882324, 2.6640560626983643, 3.656691551208496, 4.649327278137207, 5.64196252822876, 6.6345977783203125, 7.627233505249023, 8.619869232177734, 9.612504959106445, 10.60513973236084, 11.59777545928955, 12.590411186218262, 13.583045959472656, 14.575681686401367, 15.568317413330078, 16.56095314025879, 17.5535888671875, 18.54622459411621, 19.538860321044922, 20.531494140625, 21.52412986755371, 22.516765594482422, 23.509401321411133, 24.502037048339844, 25.494672775268555, 26.487308502197266, 27.479942321777344, 28.472579956054688, 29.465213775634766, 30.457849502563477, 31.450485229492188]}, "gradients/decoder.transformer.h.17.crossattention.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 5.0, 8.0, 3.0, 3.0, 5.0, 7.0, 6.0, 5.0, 13.0, 12.0, 12.0, 20.0, 18.0, 24.0, 24.0, 30.0, 25.0, 34.0, 29.0, 52.0, 36.0, 37.0, 48.0, 44.0, 44.0, 35.0, 46.0, 35.0, 37.0, 34.0, 42.0, 26.0, 25.0, 38.0, 21.0, 25.0, 10.0, 13.0, 14.0, 14.0, 8.0, 11.0, 9.0, 7.0, 8.0, 4.0, 3.0, 2.0, 0.0, 1.0, 0.0, 1.0, 2.0, 0.0, 1.0, 0.0, 1.0], "bins": [-4.38671875, -4.2462158203125, -4.105712890625, -3.9652099609375, -3.82470703125, -3.6842041015625, -3.543701171875, -3.4031982421875, -3.2626953125, -3.1221923828125, -2.981689453125, -2.8411865234375, -2.70068359375, -2.5601806640625, -2.419677734375, -2.2791748046875, -2.138671875, -1.9981689453125, -1.857666015625, -1.7171630859375, -1.57666015625, -1.4361572265625, -1.295654296875, -1.1551513671875, -1.0146484375, -0.8741455078125, -0.733642578125, -0.5931396484375, -0.45263671875, -0.3121337890625, -0.171630859375, -0.0311279296875, 0.109375, 0.2498779296875, 0.390380859375, 0.5308837890625, 0.67138671875, 0.8118896484375, 0.952392578125, 1.0928955078125, 1.2333984375, 1.3739013671875, 1.514404296875, 1.6549072265625, 1.79541015625, 1.9359130859375, 2.076416015625, 2.2169189453125, 2.357421875, 2.4979248046875, 2.638427734375, 2.7789306640625, 2.91943359375, 3.0599365234375, 3.200439453125, 3.3409423828125, 3.4814453125, 3.6219482421875, 3.762451171875, 3.9029541015625, 4.04345703125, 4.1839599609375, 4.324462890625, 4.4649658203125, 4.60546875]}, "gradients/decoder.transformer.h.17.crossattention.c_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 4.0, 1.0, 7.0, 7.0, 19.0, 24.0, 32.0, 43.0, 57.0, 97.0, 131.0, 220.0, 307.0, 438.0, 657.0, 1031.0, 1411.0, 1912.0, 2828.0, 4104.0, 6101.0, 9345.0, 13950.0, 20899.0, 32579.0, 53086.0, 90783.0, 190925.0, 315374.0, 120175.0, 65889.0, 40089.0, 25476.0, 16483.0, 10936.0, 7271.0, 5106.0, 3341.0, 2365.0, 1573.0, 1081.0, 761.0, 506.0, 348.0, 270.0, 184.0, 128.0, 75.0, 54.0, 27.0, 21.0, 17.0, 9.0, 7.0, 2.0, 3.0, 2.0, 1.0, 1.0, 0.0, 1.0], "bins": [-0.170166015625, -0.16471481323242188, -0.15926361083984375, -0.15381240844726562, -0.1483612060546875, -0.14291000366210938, -0.13745880126953125, -0.13200759887695312, -0.126556396484375, -0.12110519409179688, -0.11565399169921875, -0.11020278930664062, -0.1047515869140625, -0.09930038452148438, -0.09384918212890625, -0.08839797973632812, -0.08294677734375, -0.07749557495117188, -0.07204437255859375, -0.06659317016601562, -0.0611419677734375, -0.055690765380859375, -0.05023956298828125, -0.044788360595703125, -0.039337158203125, -0.033885955810546875, -0.02843475341796875, -0.022983551025390625, -0.0175323486328125, -0.012081146240234375, -0.00662994384765625, -0.001178741455078125, 0.0042724609375, 0.009723663330078125, 0.01517486572265625, 0.020626068115234375, 0.0260772705078125, 0.031528472900390625, 0.03697967529296875, 0.042430877685546875, 0.047882080078125, 0.053333282470703125, 0.05878448486328125, 0.06423568725585938, 0.0696868896484375, 0.07513809204101562, 0.08058929443359375, 0.08604049682617188, 0.09149169921875, 0.09694290161132812, 0.10239410400390625, 0.10784530639648438, 0.1132965087890625, 0.11874771118164062, 0.12419891357421875, 0.12965011596679688, 0.135101318359375, 0.14055252075195312, 0.14600372314453125, 0.15145492553710938, 0.1569061279296875, 0.16235733032226562, 0.16780853271484375, 0.17325973510742188, 0.1787109375]}, "gradients/decoder.transformer.h.17.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 2.0, 2.0, 2.0, 2.0, 6.0, 9.0, 4.0, 10.0, 11.0, 10.0, 9.0, 15.0, 21.0, 29.0, 25.0, 22.0, 29.0, 26.0, 41.0, 32.0, 38.0, 33.0, 43.0, 47.0, 41.0, 1073.0, 31.0, 44.0, 35.0, 32.0, 35.0, 28.0, 19.0, 33.0, 21.0, 24.0, 19.0, 22.0, 19.0, 20.0, 14.0, 9.0, 10.0, 10.0, 8.0, 9.0, 5.0, 2.0, 6.0, 1.0, 3.0, 1.0, 0.0, 0.0, 1.0], "bins": [-4.81640625, -4.675537109375, -4.53466796875, -4.393798828125, -4.2529296875, -4.112060546875, -3.97119140625, -3.830322265625, -3.689453125, -3.548583984375, -3.40771484375, -3.266845703125, -3.1259765625, -2.985107421875, -2.84423828125, -2.703369140625, -2.5625, -2.421630859375, -2.28076171875, -2.139892578125, -1.9990234375, -1.858154296875, -1.71728515625, -1.576416015625, -1.435546875, -1.294677734375, -1.15380859375, -1.012939453125, -0.8720703125, -0.731201171875, -0.59033203125, -0.449462890625, -0.30859375, -0.167724609375, -0.02685546875, 0.114013671875, 0.2548828125, 0.395751953125, 0.53662109375, 0.677490234375, 0.818359375, 0.959228515625, 1.10009765625, 1.240966796875, 1.3818359375, 1.522705078125, 1.66357421875, 1.804443359375, 1.9453125, 2.086181640625, 2.22705078125, 2.367919921875, 2.5087890625, 2.649658203125, 2.79052734375, 2.931396484375, 3.072265625, 3.213134765625, 3.35400390625, 3.494873046875, 3.6357421875, 3.776611328125, 3.91748046875, 4.058349609375, 4.19921875]}, "gradients/decoder.transformer.h.17.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 2.0, 3.0, 3.0, 4.0, 4.0, 14.0, 22.0, 43.0, 48.0, 70.0, 126.0, 171.0, 277.0, 403.0, 546.0, 736.0, 1061.0, 1477.0, 2127.0, 2998.0, 4260.0, 6011.0, 8786.0, 12686.0, 18450.0, 27602.0, 40865.0, 62750.0, 101694.0, 1284295.0, 228345.0, 101894.0, 62181.0, 40609.0, 26763.0, 18363.0, 12388.0, 8770.0, 6143.0, 4229.0, 2896.0, 2070.0, 1507.0, 1027.0, 774.0, 506.0, 377.0, 258.0, 162.0, 130.0, 62.0, 64.0, 40.0, 16.0, 17.0, 8.0, 6.0, 2.0, 4.0, 1.0, 3.0], "bins": [-0.1158447265625, -0.11233329772949219, -0.10882186889648438, -0.10531044006347656, -0.10179901123046875, -0.09828758239746094, -0.09477615356445312, -0.09126472473144531, -0.0877532958984375, -0.08424186706542969, -0.08073043823242188, -0.07721900939941406, -0.07370758056640625, -0.07019615173339844, -0.06668472290039062, -0.06317329406738281, -0.059661865234375, -0.05615043640136719, -0.052639007568359375, -0.04912757873535156, -0.04561614990234375, -0.04210472106933594, -0.038593292236328125, -0.03508186340332031, -0.0315704345703125, -0.028059005737304688, -0.024547576904296875, -0.021036148071289062, -0.01752471923828125, -0.014013290405273438, -0.010501861572265625, -0.0069904327392578125, -0.00347900390625, 3.24249267578125e-05, 0.003543853759765625, 0.0070552825927734375, 0.01056671142578125, 0.014078140258789062, 0.017589569091796875, 0.021100997924804688, 0.0246124267578125, 0.028123855590820312, 0.031635284423828125, 0.03514671325683594, 0.03865814208984375, 0.04216957092285156, 0.045680999755859375, 0.04919242858886719, 0.052703857421875, 0.05621528625488281, 0.059726715087890625, 0.06323814392089844, 0.06674957275390625, 0.07026100158691406, 0.07377243041992188, 0.07728385925292969, 0.0807952880859375, 0.08430671691894531, 0.08781814575195312, 0.09132957458496094, 0.09484100341796875, 0.09835243225097656, 0.10186386108398438, 0.10537528991699219, 0.10888671875]}, "gradients/decoder.transformer.h.17.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 2.0, 3.0, 0.0, 0.0, 1.0, 1.0, 3.0, 2.0, 5.0, 2.0, 3.0, 4.0, 12.0, 6.0, 16.0, 11.0, 23.0, 25.0, 38.0, 53.0, 86.0, 107.0, 159.0, 125.0, 76.0, 59.0, 39.0, 40.0, 30.0, 19.0, 18.0, 10.0, 10.0, 5.0, 11.0, 2.0, 2.0, 2.0, 1.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.794929504394531e-06, -6.557442247867584e-06, -6.319954991340637e-06, -6.08246773481369e-06, -5.844980478286743e-06, -5.607493221759796e-06, -5.370005965232849e-06, -5.132518708705902e-06, -4.895031452178955e-06, -4.657544195652008e-06, -4.420056939125061e-06, -4.182569682598114e-06, -3.945082426071167e-06, -3.70759516954422e-06, -3.470107913017273e-06, -3.232620656490326e-06, -2.995133399963379e-06, -2.757646143436432e-06, -2.520158886909485e-06, -2.282671630382538e-06, -2.045184373855591e-06, -1.8076971173286438e-06, -1.5702098608016968e-06, -1.3327226042747498e-06, -1.0952353477478027e-06, -8.577480912208557e-07, -6.202608346939087e-07, -3.8277357816696167e-07, -1.4528632164001465e-07, 9.220093488693237e-08, 3.296881914138794e-07, 5.671754479408264e-07, 8.046627044677734e-07, 1.0421499609947205e-06, 1.2796372175216675e-06, 1.5171244740486145e-06, 1.7546117305755615e-06, 1.9920989871025085e-06, 2.2295862436294556e-06, 2.4670735001564026e-06, 2.7045607566833496e-06, 2.9420480132102966e-06, 3.1795352697372437e-06, 3.4170225262641907e-06, 3.6545097827911377e-06, 3.891997039318085e-06, 4.129484295845032e-06, 4.366971552371979e-06, 4.604458808898926e-06, 4.841946065425873e-06, 5.07943332195282e-06, 5.316920578479767e-06, 5.554407835006714e-06, 5.791895091533661e-06, 6.029382348060608e-06, 6.266869604587555e-06, 6.504356861114502e-06, 6.741844117641449e-06, 6.979331374168396e-06, 7.216818630695343e-06, 7.45430588722229e-06, 7.691793143749237e-06, 7.929280400276184e-06, 8.166767656803131e-06, 8.404254913330078e-06]}, "gradients/decoder.transformer.h.17.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 2.0, 1.0, 5.0, 8.0, 6.0, 12.0, 19.0, 15.0, 32.0, 38.0, 53.0, 92.0, 117.0, 330.0, 1798.0, 177107.0, 864725.0, 3321.0, 415.0, 191.0, 95.0, 53.0, 36.0, 23.0, 19.0, 10.0, 10.0, 5.0, 6.0, 1.0, 7.0, 1.0, 1.0, 3.0, 0.0, 1.0, 0.0, 1.0, 3.0, 2.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00013768672943115234, -0.00013376586139202118, -0.00012984499335289001, -0.00012592412531375885, -0.00012200325727462769, -0.00011808238923549652, -0.00011416152119636536, -0.00011024065315723419, -0.00010631978511810303, -0.00010239891707897186, -9.84780490398407e-05, -9.455718100070953e-05, -9.063631296157837e-05, -8.67154449224472e-05, -8.279457688331604e-05, -7.887370884418488e-05, -7.495284080505371e-05, -7.103197276592255e-05, -6.711110472679138e-05, -6.319023668766022e-05, -5.926936864852905e-05, -5.534850060939789e-05, -5.1427632570266724e-05, -4.750676453113556e-05, -4.3585896492004395e-05, -3.966502845287323e-05, -3.5744160413742065e-05, -3.18232923746109e-05, -2.7902424335479736e-05, -2.3981556296348572e-05, -2.0060688257217407e-05, -1.6139820218086243e-05, -1.2218952178955078e-05, -8.298084139823914e-06, -4.377216100692749e-06, -4.5634806156158447e-07, 3.46451997756958e-06, 7.385388016700745e-06, 1.130625605583191e-05, 1.5227124094963074e-05, 1.9147992134094238e-05, 2.3068860173225403e-05, 2.6989728212356567e-05, 3.091059625148773e-05, 3.4831464290618896e-05, 3.875233232975006e-05, 4.2673200368881226e-05, 4.659406840801239e-05, 5.0514936447143555e-05, 5.443580448627472e-05, 5.8356672525405884e-05, 6.227754056453705e-05, 6.619840860366821e-05, 7.011927664279938e-05, 7.404014468193054e-05, 7.79610127210617e-05, 8.188188076019287e-05, 8.580274879932404e-05, 8.97236168384552e-05, 9.364448487758636e-05, 9.756535291671753e-05, 0.0001014862209558487, 0.00010540708899497986, 0.00010932795703411102, 0.00011324882507324219]}, "gradients/decoder.transformer.h.17.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 30.0, 78.0, 249.0, 362.0, 213.0, 52.0, 16.0, 10.0, 2.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-1.757371319399681e-05, -1.7202886738232337e-05, -1.683205846347846e-05, -1.646123200771399e-05, -1.6090405551949516e-05, -1.5719579096185043e-05, -1.534875264042057e-05, -1.4977924365666695e-05, -1.4607097909902222e-05, -1.423627145413775e-05, -1.3865444088878576e-05, -1.3494616723619401e-05, -1.3123790267854929e-05, -1.2752963812090456e-05, -1.2382136446831282e-05, -1.2011309081572108e-05, -1.1640482625807635e-05, -1.1269656170043163e-05, -1.0898828804783989e-05, -1.0528001439524814e-05, -1.0157174983760342e-05, -9.78634852799587e-06, -9.415521162736695e-06, -9.044693797477521e-06, -8.673867341713049e-06, -8.303040885948576e-06, -7.932213520689402e-06, -7.561386610177578e-06, -7.190559699665755e-06, -6.819732789153932e-06, -6.448905878642108e-06, -6.078078968130285e-06, -5.7072525123658124e-06, -5.336425601853989e-06, -4.965598691342166e-06, -4.594771780830342e-06, -4.223944870318519e-06, -3.853117959806696e-06, -3.482291049294872e-06, -3.111464138783049e-06, -2.7406372282712255e-06, -2.369810317759402e-06, -1.9989834072475787e-06, -1.6281564967357554e-06, -1.257329586223932e-06, -8.865026757121086e-07, -5.156757652002852e-07, -1.4484885468846187e-07, 2.259780558233615e-07, 5.968049663351849e-07, 9.676318768470082e-07, 1.3384587873588316e-06, 1.709285697870655e-06, 2.0801126083824784e-06, 2.4509395188943017e-06, 2.821766429406125e-06, 3.1925933399179485e-06, 3.563420250429772e-06, 3.934247160941595e-06, 4.305074071453419e-06, 4.675900981965242e-06, 5.046727892477065e-06, 5.417554802988889e-06, 5.788381713500712e-06, 6.1592086240125354e-06]}, "gradients/decoder.transformer.h.17.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 3.0, 1.0, 2.0, 2.0, 3.0, 2.0, 8.0, 4.0, 9.0, 5.0, 17.0, 5.0, 22.0, 9.0, 29.0, 17.0, 34.0, 19.0, 38.0, 26.0, 49.0, 33.0, 56.0, 63.0, 22.0, 49.0, 43.0, 54.0, 28.0, 53.0, 26.0, 52.0, 27.0, 40.0, 21.0, 35.0, 18.0, 23.0, 9.0, 23.0, 4.0, 14.0, 4.0, 8.0, 1.0, 5.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-3.0994415283203125e-06, -3.009103238582611e-06, -2.9187649488449097e-06, -2.8284266591072083e-06, -2.738088369369507e-06, -2.6477500796318054e-06, -2.557411789894104e-06, -2.4670735001564026e-06, -2.376735210418701e-06, -2.2863969206809998e-06, -2.1960586309432983e-06, -2.105720341205597e-06, -2.0153820514678955e-06, -1.925043761730194e-06, -1.8347054719924927e-06, -1.7443671822547913e-06, -1.6540288925170898e-06, -1.5636906027793884e-06, -1.473352313041687e-06, -1.3830140233039856e-06, -1.2926757335662842e-06, -1.2023374438285828e-06, -1.1119991540908813e-06, -1.02166086435318e-06, -9.313225746154785e-07, -8.409842848777771e-07, -7.506459951400757e-07, -6.603077054023743e-07, -5.699694156646729e-07, -4.796311259269714e-07, -3.8929283618927e-07, -2.989545464515686e-07, -2.086162567138672e-07, -1.1827796697616577e-07, -2.7939677238464355e-08, 6.239861249923706e-08, 1.5273690223693848e-07, 2.430751919746399e-07, 3.334134817123413e-07, 4.237517714500427e-07, 5.140900611877441e-07, 6.044283509254456e-07, 6.94766640663147e-07, 7.851049304008484e-07, 8.754432201385498e-07, 9.657815098762512e-07, 1.0561197996139526e-06, 1.146458089351654e-06, 1.2367963790893555e-06, 1.3271346688270569e-06, 1.4174729585647583e-06, 1.5078112483024597e-06, 1.5981495380401611e-06, 1.6884878277778625e-06, 1.778826117515564e-06, 1.8691644072532654e-06, 1.959502696990967e-06, 2.0498409867286682e-06, 2.1401792764663696e-06, 2.230517566204071e-06, 2.3208558559417725e-06, 2.411194145679474e-06, 2.5015324354171753e-06, 2.5918707251548767e-06, 2.682209014892578e-06]}, "gradients/decoder.transformer.h.17.attn.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 5.0, 8.0, 3.0, 3.0, 5.0, 7.0, 6.0, 5.0, 13.0, 12.0, 12.0, 20.0, 18.0, 24.0, 24.0, 30.0, 25.0, 34.0, 29.0, 52.0, 36.0, 37.0, 48.0, 44.0, 44.0, 35.0, 46.0, 35.0, 37.0, 34.0, 42.0, 26.0, 25.0, 38.0, 21.0, 25.0, 10.0, 13.0, 14.0, 14.0, 8.0, 11.0, 9.0, 7.0, 8.0, 4.0, 3.0, 2.0, 0.0, 1.0, 0.0, 1.0, 2.0, 0.0, 1.0, 0.0, 1.0], "bins": [-4.38671875, -4.2462158203125, -4.105712890625, -3.9652099609375, -3.82470703125, -3.6842041015625, -3.543701171875, -3.4031982421875, -3.2626953125, -3.1221923828125, -2.981689453125, -2.8411865234375, -2.70068359375, -2.5601806640625, -2.419677734375, -2.2791748046875, -2.138671875, -1.9981689453125, -1.857666015625, -1.7171630859375, -1.57666015625, -1.4361572265625, -1.295654296875, -1.1551513671875, -1.0146484375, -0.8741455078125, -0.733642578125, -0.5931396484375, -0.45263671875, -0.3121337890625, -0.171630859375, -0.0311279296875, 0.109375, 0.2498779296875, 0.390380859375, 0.5308837890625, 0.67138671875, 0.8118896484375, 0.952392578125, 1.0928955078125, 1.2333984375, 1.3739013671875, 1.514404296875, 1.6549072265625, 1.79541015625, 1.9359130859375, 2.076416015625, 2.2169189453125, 2.357421875, 2.4979248046875, 2.638427734375, 2.7789306640625, 2.91943359375, 3.0599365234375, 3.200439453125, 3.3409423828125, 3.4814453125, 3.6219482421875, 3.762451171875, 3.9029541015625, 4.04345703125, 4.1839599609375, 4.324462890625, 4.4649658203125, 4.60546875]}, "gradients/decoder.transformer.h.17.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 5.0, 3.0, 5.0, 13.0, 15.0, 14.0, 33.0, 47.0, 62.0, 115.0, 205.0, 338.0, 654.0, 1222.0, 2546.0, 5684.0, 14383.0, 48632.0, 311014.0, 562701.0, 69111.0, 18602.0, 6838.0, 3135.0, 1475.0, 732.0, 410.0, 211.0, 133.0, 84.0, 45.0, 35.0, 22.0, 16.0, 10.0, 7.0, 3.0, 3.0, 3.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0], "bins": [-11.84375, -11.524658203125, -11.20556640625, -10.886474609375, -10.5673828125, -10.248291015625, -9.92919921875, -9.610107421875, -9.291015625, -8.971923828125, -8.65283203125, -8.333740234375, -8.0146484375, -7.695556640625, -7.37646484375, -7.057373046875, -6.73828125, -6.419189453125, -6.10009765625, -5.781005859375, -5.4619140625, -5.142822265625, -4.82373046875, -4.504638671875, -4.185546875, -3.866455078125, -3.54736328125, -3.228271484375, -2.9091796875, -2.590087890625, -2.27099609375, -1.951904296875, -1.6328125, -1.313720703125, -0.99462890625, -0.675537109375, -0.3564453125, -0.037353515625, 0.28173828125, 0.600830078125, 0.919921875, 1.239013671875, 1.55810546875, 1.877197265625, 2.1962890625, 2.515380859375, 2.83447265625, 3.153564453125, 3.47265625, 3.791748046875, 4.11083984375, 4.429931640625, 4.7490234375, 5.068115234375, 5.38720703125, 5.706298828125, 6.025390625, 6.344482421875, 6.66357421875, 6.982666015625, 7.3017578125, 7.620849609375, 7.93994140625, 8.259033203125, 8.578125]}, "gradients/decoder.transformer.h.17.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 3.0, 0.0, 2.0, 1.0, 4.0, 5.0, 2.0, 4.0, 5.0, 2.0, 6.0, 7.0, 13.0, 11.0, 8.0, 7.0, 18.0, 13.0, 11.0, 12.0, 25.0, 19.0, 21.0, 29.0, 49.0, 32.0, 40.0, 50.0, 76.0, 109.0, 339.0, 1478.0, 132.0, 79.0, 55.0, 36.0, 54.0, 30.0, 39.0, 26.0, 26.0, 25.0, 22.0, 18.0, 26.0, 14.0, 13.0, 12.0, 6.0, 4.0, 8.0, 3.0, 8.0, 10.0, 6.0, 3.0, 4.0, 2.0, 2.0, 2.0, 4.0, 1.0], "bins": [-14.5078125, -14.068603515625, -13.62939453125, -13.190185546875, -12.7509765625, -12.311767578125, -11.87255859375, -11.433349609375, -10.994140625, -10.554931640625, -10.11572265625, -9.676513671875, -9.2373046875, -8.798095703125, -8.35888671875, -7.919677734375, -7.48046875, -7.041259765625, -6.60205078125, -6.162841796875, -5.7236328125, -5.284423828125, -4.84521484375, -4.406005859375, -3.966796875, -3.527587890625, -3.08837890625, -2.649169921875, -2.2099609375, -1.770751953125, -1.33154296875, -0.892333984375, -0.453125, -0.013916015625, 0.42529296875, 0.864501953125, 1.3037109375, 1.742919921875, 2.18212890625, 2.621337890625, 3.060546875, 3.499755859375, 3.93896484375, 4.378173828125, 4.8173828125, 5.256591796875, 5.69580078125, 6.135009765625, 6.57421875, 7.013427734375, 7.45263671875, 7.891845703125, 8.3310546875, 8.770263671875, 9.20947265625, 9.648681640625, 10.087890625, 10.527099609375, 10.96630859375, 11.405517578125, 11.8447265625, 12.283935546875, 12.72314453125, 13.162353515625, 13.6015625]}, "gradients/decoder.transformer.h.17.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 2.0, 2.0, 3.0, 4.0, 5.0, 4.0, 4.0, 9.0, 5.0, 6.0, 5.0, 8.0, 9.0, 17.0, 19.0, 27.0, 34.0, 41.0, 43.0, 61.0, 87.0, 132.0, 230.0, 368.0, 795.0, 2203.0, 9435.0, 95105.0, 2887579.0, 133813.0, 11348.0, 2437.0, 826.0, 381.0, 206.0, 106.0, 102.0, 50.0, 38.0, 38.0, 18.0, 16.0, 20.0, 7.0, 15.0, 9.0, 14.0, 11.0, 3.0, 5.0, 2.0, 5.0, 2.0, 3.0, 2.0, 1.0, 2.0, 0.0, 2.0, 1.0], "bins": [-29.0625, -28.169677734375, -27.27685546875, -26.384033203125, -25.4912109375, -24.598388671875, -23.70556640625, -22.812744140625, -21.919921875, -21.027099609375, -20.13427734375, -19.241455078125, -18.3486328125, -17.455810546875, -16.56298828125, -15.670166015625, -14.77734375, -13.884521484375, -12.99169921875, -12.098876953125, -11.2060546875, -10.313232421875, -9.42041015625, -8.527587890625, -7.634765625, -6.741943359375, -5.84912109375, -4.956298828125, -4.0634765625, -3.170654296875, -2.27783203125, -1.385009765625, -0.4921875, 0.400634765625, 1.29345703125, 2.186279296875, 3.0791015625, 3.971923828125, 4.86474609375, 5.757568359375, 6.650390625, 7.543212890625, 8.43603515625, 9.328857421875, 10.2216796875, 11.114501953125, 12.00732421875, 12.900146484375, 13.79296875, 14.685791015625, 15.57861328125, 16.471435546875, 17.3642578125, 18.257080078125, 19.14990234375, 20.042724609375, 20.935546875, 21.828369140625, 22.72119140625, 23.614013671875, 24.5068359375, 25.399658203125, 26.29248046875, 27.185302734375, 28.078125]}, "gradients/decoder.transformer.h.17.ln_1.weight": {"_type": "histogram", "values": [3.0, 199.0, 768.0, 49.0, 4.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-22.195981979370117, -12.497512817382812, -2.799043655395508, 6.899425506591797, 16.5978946685791, 26.29636573791504, 35.994834899902344, 45.69329833984375, 55.39176940917969, 65.09024047851562, 74.78871154785156, 84.48717498779297, 94.1856460571289, 103.88411712646484, 113.58258056640625, 123.28105163574219, 132.97952270507812, 142.67799377441406, 152.37646484375, 162.07493591308594, 171.77340698242188, 181.47186279296875, 191.1703338623047, 200.86880493164062, 210.56727600097656, 220.2657470703125, 229.96421813964844, 239.66268920898438, 249.36114501953125, 259.05963134765625, 268.7580871582031, 278.45654296875, 288.155029296875, 297.8534851074219, 307.5519714355469, 317.25042724609375, 326.94891357421875, 336.6473693847656, 346.3458557128906, 356.0443115234375, 365.7427978515625, 375.4412536621094, 385.1397399902344, 394.83819580078125, 404.53668212890625, 414.2351379394531, 423.9336242675781, 433.632080078125, 443.3305358886719, 453.02899169921875, 462.72747802734375, 472.4259338378906, 482.1244201660156, 491.8228759765625, 501.5213623046875, 511.2198181152344, 520.9182739257812, 530.6167602539062, 540.315185546875, 550.013671875, 559.712158203125, 569.41064453125, 579.1090698242188, 588.8075561523438, 598.5060424804688]}, "gradients/decoder.transformer.h.17.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 1.0, 5.0, 2.0, 3.0, 2.0, 8.0, 6.0, 7.0, 15.0, 15.0, 11.0, 30.0, 21.0, 29.0, 36.0, 18.0, 28.0, 34.0, 49.0, 39.0, 41.0, 51.0, 39.0, 40.0, 43.0, 34.0, 47.0, 39.0, 35.0, 31.0, 41.0, 27.0, 30.0, 21.0, 22.0, 24.0, 19.0, 11.0, 14.0, 8.0, 14.0, 4.0, 4.0, 5.0, 4.0, 3.0, 3.0, 1.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-46.613548278808594, -45.17844772338867, -43.743343353271484, -42.30824279785156, -40.87314224243164, -39.43804168701172, -38.00293731689453, -36.56783676147461, -35.13273620605469, -33.697635650634766, -32.26253128051758, -30.827430725097656, -29.392330169677734, -27.95722770690918, -26.522125244140625, -25.087024688720703, -23.651920318603516, -22.21681785583496, -20.78171730041504, -19.346614837646484, -17.911514282226562, -16.476411819458008, -15.041309356689453, -13.606207847595215, -12.171106338500977, -10.736004829406738, -9.3009033203125, -7.865800857543945, -6.430699348449707, -4.995597839355469, -3.560495376586914, -2.125393867492676, -0.6902961730957031, 0.7448055744171143, 2.1799073219299316, 3.615009307861328, 5.050110816955566, 6.485212326049805, 7.920314788818359, 9.355416297912598, 10.790517807006836, 12.225619316101074, 13.660720825195312, 15.095823287963867, 16.530925750732422, 17.966026306152344, 19.4011287689209, 20.836231231689453, 22.271331787109375, 23.70643424987793, 25.14153480529785, 26.576637268066406, 28.011737823486328, 29.446840286254883, 30.881942749023438, 32.31704330444336, 33.75214385986328, 35.1872444152832, 36.62234878540039, 38.05744934082031, 39.492549896240234, 40.927650451660156, 42.362754821777344, 43.797855377197266, 45.23295974731445]}, "gradients/decoder.transformer.h.16.mlp.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 1.0, 1.0, 3.0, 6.0, 4.0, 6.0, 2.0, 2.0, 7.0, 6.0, 9.0, 15.0, 8.0, 11.0, 15.0, 21.0, 26.0, 19.0, 22.0, 34.0, 26.0, 35.0, 46.0, 37.0, 42.0, 51.0, 48.0, 39.0, 34.0, 48.0, 38.0, 32.0, 42.0, 31.0, 33.0, 28.0, 24.0, 34.0, 20.0, 11.0, 15.0, 11.0, 19.0, 12.0, 7.0, 7.0, 7.0, 9.0, 4.0, 4.0, 2.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0], "bins": [-4.49609375, -4.35296630859375, -4.2098388671875, -4.06671142578125, -3.923583984375, -3.78045654296875, -3.6373291015625, -3.49420166015625, -3.35107421875, -3.20794677734375, -3.0648193359375, -2.92169189453125, -2.778564453125, -2.63543701171875, -2.4923095703125, -2.34918212890625, -2.2060546875, -2.06292724609375, -1.9197998046875, -1.77667236328125, -1.633544921875, -1.49041748046875, -1.3472900390625, -1.20416259765625, -1.06103515625, -0.91790771484375, -0.7747802734375, -0.63165283203125, -0.488525390625, -0.34539794921875, -0.2022705078125, -0.05914306640625, 0.083984375, 0.22711181640625, 0.3702392578125, 0.51336669921875, 0.656494140625, 0.79962158203125, 0.9427490234375, 1.08587646484375, 1.22900390625, 1.37213134765625, 1.5152587890625, 1.65838623046875, 1.801513671875, 1.94464111328125, 2.0877685546875, 2.23089599609375, 2.3740234375, 2.51715087890625, 2.6602783203125, 2.80340576171875, 2.946533203125, 3.08966064453125, 3.2327880859375, 3.37591552734375, 3.51904296875, 3.66217041015625, 3.8052978515625, 3.94842529296875, 4.091552734375, 4.23468017578125, 4.3778076171875, 4.52093505859375, 4.6640625]}, "gradients/decoder.transformer.h.16.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 6.0, 4.0, 2.0, 0.0, 2.0, 4.0, 4.0, 9.0, 7.0, 9.0, 12.0, 14.0, 14.0, 8.0, 21.0, 15.0, 26.0, 40.0, 58.0, 81.0, 130.0, 229.0, 501.0, 1431.0, 4876.0, 21205.0, 155300.0, 2716970.0, 1199620.0, 75530.0, 12973.0, 3160.0, 1019.0, 394.0, 206.0, 101.0, 64.0, 45.0, 33.0, 34.0, 27.0, 20.0, 20.0, 10.0, 6.0, 11.0, 14.0, 7.0, 8.0, 3.0, 4.0, 3.0, 3.0, 1.0, 0.0, 2.0, 0.0, 2.0, 0.0, 1.0, 1.0], "bins": [-15.359375, -14.861328125, -14.36328125, -13.865234375, -13.3671875, -12.869140625, -12.37109375, -11.873046875, -11.375, -10.876953125, -10.37890625, -9.880859375, -9.3828125, -8.884765625, -8.38671875, -7.888671875, -7.390625, -6.892578125, -6.39453125, -5.896484375, -5.3984375, -4.900390625, -4.40234375, -3.904296875, -3.40625, -2.908203125, -2.41015625, -1.912109375, -1.4140625, -0.916015625, -0.41796875, 0.080078125, 0.578125, 1.076171875, 1.57421875, 2.072265625, 2.5703125, 3.068359375, 3.56640625, 4.064453125, 4.5625, 5.060546875, 5.55859375, 6.056640625, 6.5546875, 7.052734375, 7.55078125, 8.048828125, 8.546875, 9.044921875, 9.54296875, 10.041015625, 10.5390625, 11.037109375, 11.53515625, 12.033203125, 12.53125, 13.029296875, 13.52734375, 14.025390625, 14.5234375, 15.021484375, 15.51953125, 16.017578125, 16.515625]}, "gradients/decoder.transformer.h.16.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 5.0, 7.0, 4.0, 3.0, 4.0, 10.0, 18.0, 23.0, 20.0, 40.0, 60.0, 94.0, 127.0, 256.0, 381.0, 640.0, 848.0, 593.0, 349.0, 219.0, 132.0, 77.0, 60.0, 27.0, 27.0, 17.0, 13.0, 8.0, 5.0, 6.0, 2.0, 3.0, 2.0, 2.0, 1.0, 0.0, 0.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-17.3125, -16.822265625, -16.33203125, -15.841796875, -15.3515625, -14.861328125, -14.37109375, -13.880859375, -13.390625, -12.900390625, -12.41015625, -11.919921875, -11.4296875, -10.939453125, -10.44921875, -9.958984375, -9.46875, -8.978515625, -8.48828125, -7.998046875, -7.5078125, -7.017578125, -6.52734375, -6.037109375, -5.546875, -5.056640625, -4.56640625, -4.076171875, -3.5859375, -3.095703125, -2.60546875, -2.115234375, -1.625, -1.134765625, -0.64453125, -0.154296875, 0.3359375, 0.826171875, 1.31640625, 1.806640625, 2.296875, 2.787109375, 3.27734375, 3.767578125, 4.2578125, 4.748046875, 5.23828125, 5.728515625, 6.21875, 6.708984375, 7.19921875, 7.689453125, 8.1796875, 8.669921875, 9.16015625, 9.650390625, 10.140625, 10.630859375, 11.12109375, 11.611328125, 12.1015625, 12.591796875, 13.08203125, 13.572265625, 14.0625]}, "gradients/decoder.transformer.h.16.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 4.0, 0.0, 2.0, 3.0, 9.0, 4.0, 11.0, 11.0, 15.0, 19.0, 40.0, 50.0, 92.0, 152.0, 281.0, 550.0, 1292.0, 4492.0, 29690.0, 615936.0, 3410092.0, 117212.0, 10429.0, 2242.0, 795.0, 390.0, 194.0, 114.0, 61.0, 36.0, 21.0, 16.0, 7.0, 7.0, 14.0, 1.0, 2.0, 3.0, 2.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0], "bins": [-35.53125, -34.39453125, -33.2578125, -32.12109375, -30.984375, -29.84765625, -28.7109375, -27.57421875, -26.4375, -25.30078125, -24.1640625, -23.02734375, -21.890625, -20.75390625, -19.6171875, -18.48046875, -17.34375, -16.20703125, -15.0703125, -13.93359375, -12.796875, -11.66015625, -10.5234375, -9.38671875, -8.25, -7.11328125, -5.9765625, -4.83984375, -3.703125, -2.56640625, -1.4296875, -0.29296875, 0.84375, 1.98046875, 3.1171875, 4.25390625, 5.390625, 6.52734375, 7.6640625, 8.80078125, 9.9375, 11.07421875, 12.2109375, 13.34765625, 14.484375, 15.62109375, 16.7578125, 17.89453125, 19.03125, 20.16796875, 21.3046875, 22.44140625, 23.578125, 24.71484375, 25.8515625, 26.98828125, 28.125, 29.26171875, 30.3984375, 31.53515625, 32.671875, 33.80859375, 34.9453125, 36.08203125, 37.21875]}, "gradients/decoder.transformer.h.16.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 6.0, 12.0, 47.0, 169.0, 312.0, 273.0, 145.0, 37.0, 11.0, 5.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-90.2447509765625, -83.79078674316406, -77.33682250976562, -70.88285827636719, -64.42890167236328, -57.974937438964844, -51.520973205566406, -45.067012786865234, -38.6130485534668, -32.15908432006836, -25.705123901367188, -19.25115966796875, -12.797197341918945, -6.343235015869141, 0.11072921752929688, 6.564689636230469, 13.018653869628906, 19.47261619567871, 25.926578521728516, 32.38054275512695, 38.834503173828125, 45.28846740722656, 51.742431640625, 58.19639205932617, 64.65036010742188, 71.10432434082031, 77.55828857421875, 84.01225280761719, 90.4662094116211, 96.92017364501953, 103.37413787841797, 109.82809448242188, 116.28205871582031, 122.73602294921875, 129.1899871826172, 135.64395141601562, 142.09791564941406, 148.5518798828125, 155.00582885742188, 161.4597930908203, 167.91375732421875, 174.3677215576172, 180.82168579101562, 187.27565002441406, 193.7296142578125, 200.18356323242188, 206.63754272460938, 213.09149169921875, 219.54547119140625, 225.9994354248047, 232.45339965820312, 238.90736389160156, 245.361328125, 251.81527709960938, 258.2692565917969, 264.72320556640625, 271.1771545410156, 277.631103515625, 284.0850830078125, 290.5390319824219, 296.9930114746094, 303.44696044921875, 309.90093994140625, 316.3548889160156, 322.8088684082031]}, "gradients/decoder.transformer.h.16.ln_2.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 6.0, 2.0, 3.0, 3.0, 4.0, 6.0, 5.0, 6.0, 4.0, 12.0, 10.0, 10.0, 10.0, 17.0, 17.0, 23.0, 29.0, 17.0, 28.0, 28.0, 29.0, 40.0, 34.0, 31.0, 39.0, 42.0, 48.0, 37.0, 38.0, 36.0, 41.0, 47.0, 37.0, 32.0, 33.0, 24.0, 23.0, 26.0, 22.0, 20.0, 14.0, 16.0, 12.0, 17.0, 7.0, 4.0, 6.0, 4.0, 5.0, 2.0, 6.0, 1.0, 1.0, 3.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-33.38653564453125, -32.26866149902344, -31.15079116821289, -30.03291893005371, -28.91504669189453, -27.79717445373535, -26.679302215576172, -25.561429977416992, -24.443557739257812, -23.325685501098633, -22.207813262939453, -21.089941024780273, -19.972068786621094, -18.854196548461914, -17.736324310302734, -16.618452072143555, -15.500579833984375, -14.382707595825195, -13.264835357666016, -12.146963119506836, -11.029090881347656, -9.911218643188477, -8.793346405029297, -7.675474166870117, -6.5576019287109375, -5.439729690551758, -4.321857452392578, -3.2039852142333984, -2.0861129760742188, -0.9682407379150391, 0.14963150024414062, 1.2675037384033203, 2.3853759765625, 3.5032482147216797, 4.621120452880859, 5.738992691040039, 6.856864929199219, 7.974737167358398, 9.092609405517578, 10.210481643676758, 11.328353881835938, 12.446226119995117, 13.564098358154297, 14.681970596313477, 15.799842834472656, 16.917715072631836, 18.035587310791016, 19.153459548950195, 20.271331787109375, 21.389204025268555, 22.507076263427734, 23.624948501586914, 24.742820739746094, 25.860692977905273, 26.978565216064453, 28.096437454223633, 29.214309692382812, 30.332181930541992, 31.450054168701172, 32.56792449951172, 33.68579864501953, 34.803672790527344, 35.92154312133789, 37.03941345214844, 38.15728759765625]}, "gradients/decoder.transformer.h.16.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 3.0, 1.0, 6.0, 4.0, 4.0, 7.0, 4.0, 12.0, 14.0, 6.0, 9.0, 15.0, 19.0, 14.0, 26.0, 20.0, 29.0, 29.0, 35.0, 37.0, 44.0, 39.0, 51.0, 49.0, 35.0, 38.0, 36.0, 49.0, 35.0, 32.0, 32.0, 40.0, 25.0, 30.0, 31.0, 29.0, 20.0, 23.0, 12.0, 15.0, 8.0, 9.0, 7.0, 5.0, 4.0, 7.0, 5.0, 1.0, 6.0, 3.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-4.7421875, -4.59442138671875, -4.4466552734375, -4.29888916015625, -4.151123046875, -4.00335693359375, -3.8555908203125, -3.70782470703125, -3.56005859375, -3.41229248046875, -3.2645263671875, -3.11676025390625, -2.968994140625, -2.82122802734375, -2.6734619140625, -2.52569580078125, -2.3779296875, -2.23016357421875, -2.0823974609375, -1.93463134765625, -1.786865234375, -1.63909912109375, -1.4913330078125, -1.34356689453125, -1.19580078125, -1.04803466796875, -0.9002685546875, -0.75250244140625, -0.604736328125, -0.45697021484375, -0.3092041015625, -0.16143798828125, -0.013671875, 0.13409423828125, 0.2818603515625, 0.42962646484375, 0.577392578125, 0.72515869140625, 0.8729248046875, 1.02069091796875, 1.16845703125, 1.31622314453125, 1.4639892578125, 1.61175537109375, 1.759521484375, 1.90728759765625, 2.0550537109375, 2.20281982421875, 2.3505859375, 2.49835205078125, 2.6461181640625, 2.79388427734375, 2.941650390625, 3.08941650390625, 3.2371826171875, 3.38494873046875, 3.53271484375, 3.68048095703125, 3.8282470703125, 3.97601318359375, 4.123779296875, 4.27154541015625, 4.4193115234375, 4.56707763671875, 4.71484375]}, "gradients/decoder.transformer.h.16.crossattention.c_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 2.0, 2.0, 1.0, 3.0, 8.0, 4.0, 13.0, 24.0, 25.0, 30.0, 46.0, 83.0, 126.0, 206.0, 312.0, 490.0, 758.0, 1205.0, 1752.0, 2866.0, 4409.0, 7185.0, 11679.0, 19355.0, 33221.0, 60405.0, 119048.0, 327343.0, 235126.0, 97978.0, 51386.0, 29019.0, 16934.0, 10293.0, 6227.0, 3943.0, 2515.0, 1611.0, 1034.0, 657.0, 440.0, 283.0, 186.0, 118.0, 72.0, 52.0, 26.0, 22.0, 19.0, 12.0, 6.0, 2.0, 4.0, 2.0, 1.0, 2.0, 0.0, 2.0, 0.0, 1.0], "bins": [-0.20849609375, -0.20195388793945312, -0.19541168212890625, -0.18886947631835938, -0.1823272705078125, -0.17578506469726562, -0.16924285888671875, -0.16270065307617188, -0.156158447265625, -0.14961624145507812, -0.14307403564453125, -0.13653182983398438, -0.1299896240234375, -0.12344741821289062, -0.11690521240234375, -0.11036300659179688, -0.10382080078125, -0.09727859497070312, -0.09073638916015625, -0.08419418334960938, -0.0776519775390625, -0.07110977172851562, -0.06456756591796875, -0.058025360107421875, -0.051483154296875, -0.044940948486328125, -0.03839874267578125, -0.031856536865234375, -0.0253143310546875, -0.018772125244140625, -0.01222991943359375, -0.005687713623046875, 0.0008544921875, 0.007396697998046875, 0.01393890380859375, 0.020481109619140625, 0.0270233154296875, 0.033565521240234375, 0.04010772705078125, 0.046649932861328125, 0.053192138671875, 0.059734344482421875, 0.06627655029296875, 0.07281875610351562, 0.0793609619140625, 0.08590316772460938, 0.09244537353515625, 0.09898757934570312, 0.10552978515625, 0.11207199096679688, 0.11861419677734375, 0.12515640258789062, 0.1316986083984375, 0.13824081420898438, 0.14478302001953125, 0.15132522583007812, 0.157867431640625, 0.16440963745117188, 0.17095184326171875, 0.17749404907226562, 0.1840362548828125, 0.19057846069335938, 0.19712066650390625, 0.20366287231445312, 0.210205078125]}, "gradients/decoder.transformer.h.16.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 5.0, 3.0, 4.0, 3.0, 6.0, 11.0, 12.0, 14.0, 27.0, 19.0, 14.0, 21.0, 24.0, 23.0, 21.0, 35.0, 41.0, 31.0, 38.0, 36.0, 47.0, 44.0, 1068.0, 42.0, 34.0, 50.0, 41.0, 35.0, 32.0, 28.0, 34.0, 34.0, 27.0, 20.0, 19.0, 17.0, 16.0, 17.0, 12.0, 4.0, 9.0, 4.0, 4.0, 2.0, 3.0, 3.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.15234375, -4.99798583984375, -4.8436279296875, -4.68927001953125, -4.534912109375, -4.38055419921875, -4.2261962890625, -4.07183837890625, -3.91748046875, -3.76312255859375, -3.6087646484375, -3.45440673828125, -3.300048828125, -3.14569091796875, -2.9913330078125, -2.83697509765625, -2.6826171875, -2.52825927734375, -2.3739013671875, -2.21954345703125, -2.065185546875, -1.91082763671875, -1.7564697265625, -1.60211181640625, -1.44775390625, -1.29339599609375, -1.1390380859375, -0.98468017578125, -0.830322265625, -0.67596435546875, -0.5216064453125, -0.36724853515625, -0.212890625, -0.05853271484375, 0.0958251953125, 0.25018310546875, 0.404541015625, 0.55889892578125, 0.7132568359375, 0.86761474609375, 1.02197265625, 1.17633056640625, 1.3306884765625, 1.48504638671875, 1.639404296875, 1.79376220703125, 1.9481201171875, 2.10247802734375, 2.2568359375, 2.41119384765625, 2.5655517578125, 2.71990966796875, 2.874267578125, 3.02862548828125, 3.1829833984375, 3.33734130859375, 3.49169921875, 3.64605712890625, 3.8004150390625, 3.95477294921875, 4.109130859375, 4.26348876953125, 4.4178466796875, 4.57220458984375, 4.7265625]}, "gradients/decoder.transformer.h.16.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 2.0, 5.0, 6.0, 3.0, 6.0, 19.0, 28.0, 41.0, 44.0, 71.0, 107.0, 159.0, 212.0, 296.0, 439.0, 635.0, 933.0, 1333.0, 1875.0, 2736.0, 3959.0, 5704.0, 8204.0, 12027.0, 17926.0, 26892.0, 40808.0, 64501.0, 105396.0, 1292918.0, 223200.0, 101714.0, 62516.0, 40056.0, 26559.0, 17711.0, 11654.0, 8114.0, 5529.0, 3927.0, 2723.0, 1903.0, 1306.0, 901.0, 613.0, 429.0, 277.0, 230.0, 149.0, 111.0, 79.0, 53.0, 43.0, 22.0, 16.0, 9.0, 8.0, 3.0, 4.0, 1.0, 4.0], "bins": [-0.1239013671875, -0.12014389038085938, -0.11638641357421875, -0.11262893676757812, -0.1088714599609375, -0.10511398315429688, -0.10135650634765625, -0.09759902954101562, -0.093841552734375, -0.09008407592773438, -0.08632659912109375, -0.08256912231445312, -0.0788116455078125, -0.07505416870117188, -0.07129669189453125, -0.06753921508789062, -0.06378173828125, -0.060024261474609375, -0.05626678466796875, -0.052509307861328125, -0.0487518310546875, -0.044994354248046875, -0.04123687744140625, -0.037479400634765625, -0.033721923828125, -0.029964447021484375, -0.02620697021484375, -0.022449493408203125, -0.0186920166015625, -0.014934539794921875, -0.01117706298828125, -0.007419586181640625, -0.003662109375, 9.5367431640625e-05, 0.00385284423828125, 0.007610321044921875, 0.0113677978515625, 0.015125274658203125, 0.01888275146484375, 0.022640228271484375, 0.026397705078125, 0.030155181884765625, 0.03391265869140625, 0.037670135498046875, 0.0414276123046875, 0.045185089111328125, 0.04894256591796875, 0.052700042724609375, 0.05645751953125, 0.060214996337890625, 0.06397247314453125, 0.06772994995117188, 0.0714874267578125, 0.07524490356445312, 0.07900238037109375, 0.08275985717773438, 0.086517333984375, 0.09027481079101562, 0.09403228759765625, 0.09778976440429688, 0.1015472412109375, 0.10530471801757812, 0.10906219482421875, 0.11281967163085938, 0.1165771484375]}, "gradients/decoder.transformer.h.16.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 3.0, 2.0, 3.0, 4.0, 4.0, 4.0, 11.0, 4.0, 11.0, 14.0, 8.0, 12.0, 11.0, 24.0, 41.0, 50.0, 40.0, 68.0, 107.0, 110.0, 138.0, 90.0, 29.0, 43.0, 34.0, 22.0, 26.0, 21.0, 11.0, 12.0, 10.0, 8.0, 8.0, 7.0, 7.0, 5.0, 5.0, 2.0, 2.0, 0.0, 0.0, 4.0, 2.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-5.364418029785156e-06, -5.2051618695259094e-06, -5.045905709266663e-06, -4.886649549007416e-06, -4.727393388748169e-06, -4.568137228488922e-06, -4.408881068229675e-06, -4.2496249079704285e-06, -4.090368747711182e-06, -3.931112587451935e-06, -3.771856427192688e-06, -3.612600266933441e-06, -3.4533441066741943e-06, -3.2940879464149475e-06, -3.1348317861557007e-06, -2.975575625896454e-06, -2.816319465637207e-06, -2.65706330537796e-06, -2.4978071451187134e-06, -2.3385509848594666e-06, -2.1792948246002197e-06, -2.020038664340973e-06, -1.860782504081726e-06, -1.7015263438224792e-06, -1.5422701835632324e-06, -1.3830140233039856e-06, -1.2237578630447388e-06, -1.064501702785492e-06, -9.052455425262451e-07, -7.459893822669983e-07, -5.867332220077515e-07, -4.2747706174850464e-07, -2.682209014892578e-07, -1.0896474123001099e-07, 5.029141902923584e-08, 2.0954757928848267e-07, 3.688037395477295e-07, 5.280598998069763e-07, 6.873160600662231e-07, 8.4657222032547e-07, 1.0058283805847168e-06, 1.1650845408439636e-06, 1.3243407011032104e-06, 1.4835968613624573e-06, 1.642853021621704e-06, 1.802109181880951e-06, 1.9613653421401978e-06, 2.1206215023994446e-06, 2.2798776626586914e-06, 2.4391338229179382e-06, 2.598389983177185e-06, 2.757646143436432e-06, 2.9169023036956787e-06, 3.0761584639549255e-06, 3.2354146242141724e-06, 3.394670784473419e-06, 3.553926944732666e-06, 3.713183104991913e-06, 3.87243926525116e-06, 4.0316954255104065e-06, 4.190951585769653e-06, 4.3502077460289e-06, 4.509463906288147e-06, 4.668720066547394e-06, 4.827976226806641e-06]}, "gradients/decoder.transformer.h.16.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 2.0, 0.0, 2.0, 1.0, 4.0, 1.0, 1.0, 2.0, 2.0, 5.0, 3.0, 8.0, 9.0, 7.0, 8.0, 15.0, 16.0, 19.0, 33.0, 47.0, 84.0, 134.0, 271.0, 1158.0, 26288.0, 969516.0, 48610.0, 1563.0, 309.0, 162.0, 116.0, 45.0, 35.0, 14.0, 12.0, 21.0, 9.0, 9.0, 5.0, 7.0, 7.0, 3.0, 2.0, 2.0, 4.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-8.124113082885742e-05, -7.839780300855637e-05, -7.555447518825531e-05, -7.271114736795425e-05, -6.98678195476532e-05, -6.702449172735214e-05, -6.418116390705109e-05, -6.133783608675003e-05, -5.8494508266448975e-05, -5.565118044614792e-05, -5.280785262584686e-05, -4.996452480554581e-05, -4.712119698524475e-05, -4.4277869164943695e-05, -4.143454134464264e-05, -3.859121352434158e-05, -3.574788570404053e-05, -3.290455788373947e-05, -3.0061230063438416e-05, -2.721790224313736e-05, -2.4374574422836304e-05, -2.1531246602535248e-05, -1.8687918782234192e-05, -1.5844590961933136e-05, -1.300126314163208e-05, -1.0157935321331024e-05, -7.314607501029968e-06, -4.471279680728912e-06, -1.6279518604278564e-06, 1.2153759598731995e-06, 4.058703780174255e-06, 6.902031600475311e-06, 9.745359420776367e-06, 1.2588687241077423e-05, 1.543201506137848e-05, 1.8275342881679535e-05, 2.111867070198059e-05, 2.3961998522281647e-05, 2.6805326342582703e-05, 2.964865416288376e-05, 3.2491981983184814e-05, 3.533530980348587e-05, 3.8178637623786926e-05, 4.102196544408798e-05, 4.386529326438904e-05, 4.6708621084690094e-05, 4.955194890499115e-05, 5.2395276725292206e-05, 5.523860454559326e-05, 5.808193236589432e-05, 6.0925260186195374e-05, 6.376858800649643e-05, 6.661191582679749e-05, 6.945524364709854e-05, 7.22985714673996e-05, 7.514189928770065e-05, 7.798522710800171e-05, 8.082855492830276e-05, 8.367188274860382e-05, 8.651521056890488e-05, 8.935853838920593e-05, 9.220186620950699e-05, 9.504519402980804e-05, 9.78885218501091e-05, 0.00010073184967041016]}, "gradients/decoder.transformer.h.16.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 3.0, 5.0, 24.0, 128.0, 495.0, 316.0, 39.0, 8.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.5646826290758327e-06, -2.9443194762279745e-06, -2.323956096006441e-06, -1.703592829471745e-06, -1.0832295629370492e-06, -4.6286641008919105e-07, 1.5749697013234254e-07, 7.778603503538761e-07, 1.3982235032017343e-06, 2.0185866560495924e-06, 2.638950036271126e-06, 3.2593134164926596e-06, 3.879676569340518e-06, 4.500039722188376e-06, 5.120403329783585e-06, 5.740766482631443e-06, 6.361129635479301e-06, 6.981492788327159e-06, 7.6018559411750175e-06, 8.222219548770227e-06, 8.842582246870734e-06, 9.462945854465943e-06, 1.0083309462061152e-05, 1.0703672160161659e-05, 1.1324035767756868e-05, 1.1944399375352077e-05, 1.2564762073452584e-05, 1.3185125681047793e-05, 1.3805489288643003e-05, 1.442585198674351e-05, 1.5046215594338719e-05, 1.5666579201933928e-05, 1.628694008104503e-05, 1.690730277914554e-05, 1.752766729623545e-05, 1.8148029994335957e-05, 1.8768392692436464e-05, 1.9388757209526375e-05, 2.0009119907626882e-05, 2.062948260572739e-05, 2.1249845303827897e-05, 2.1870208001928404e-05, 2.2490572519018315e-05, 2.3110935217118822e-05, 2.373129791521933e-05, 2.435166243230924e-05, 2.4972025130409747e-05, 2.5592387828510255e-05, 2.6212752345600165e-05, 2.6833115043700673e-05, 2.7453479560790583e-05, 2.807384225889109e-05, 2.8694204956991598e-05, 2.9314567655092105e-05, 2.9934932172182016e-05, 3.055529668927193e-05, 3.1175659387372434e-05, 3.179602208547294e-05, 3.241638478357345e-05, 3.3036747481673956e-05, 3.365711381775327e-05, 3.427747651585378e-05, 3.4897839213954285e-05, 3.551820191205479e-05, 3.61385646101553e-05]}, "gradients/decoder.transformer.h.16.ln_cross_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 2.0, 2.0, 0.0, 0.0, 0.0, 4.0, 2.0, 2.0, 6.0, 6.0, 8.0, 6.0, 2.0, 10.0, 21.0, 16.0, 17.0, 20.0, 28.0, 51.0, 30.0, 20.0, 31.0, 35.0, 73.0, 29.0, 33.0, 33.0, 31.0, 40.0, 70.0, 34.0, 27.0, 34.0, 25.0, 49.0, 33.0, 16.0, 21.0, 14.0, 30.0, 15.0, 15.0, 15.0, 14.0, 7.0, 15.0, 2.0, 5.0, 5.0, 8.0, 2.0, 0.0, 2.0, 1.0, 0.0, 2.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-2.205371856689453e-06, -2.1345913410186768e-06, -2.0638108253479004e-06, -1.993030309677124e-06, -1.9222497940063477e-06, -1.8514692783355713e-06, -1.780688762664795e-06, -1.7099082469940186e-06, -1.6391277313232422e-06, -1.5683472156524658e-06, -1.4975666999816895e-06, -1.426786184310913e-06, -1.3560056686401367e-06, -1.2852251529693604e-06, -1.214444637298584e-06, -1.1436641216278076e-06, -1.0728836059570312e-06, -1.0021030902862549e-06, -9.313225746154785e-07, -8.605420589447021e-07, -7.897615432739258e-07, -7.189810276031494e-07, -6.48200511932373e-07, -5.774199962615967e-07, -5.066394805908203e-07, -4.3585896492004395e-07, -3.650784492492676e-07, -2.942979335784912e-07, -2.2351741790771484e-07, -1.5273690223693848e-07, -8.195638656616211e-08, -1.1175870895385742e-08, 5.960464477539063e-08, 1.30385160446167e-07, 2.0116567611694336e-07, 2.7194619178771973e-07, 3.427267074584961e-07, 4.1350722312927246e-07, 4.842877388000488e-07, 5.550682544708252e-07, 6.258487701416016e-07, 6.966292858123779e-07, 7.674098014831543e-07, 8.381903171539307e-07, 9.08970832824707e-07, 9.797513484954834e-07, 1.0505318641662598e-06, 1.1213123798370361e-06, 1.1920928955078125e-06, 1.2628734111785889e-06, 1.3336539268493652e-06, 1.4044344425201416e-06, 1.475214958190918e-06, 1.5459954738616943e-06, 1.6167759895324707e-06, 1.687556505203247e-06, 1.7583370208740234e-06, 1.8291175365447998e-06, 1.8998980522155762e-06, 1.9706785678863525e-06, 2.041459083557129e-06, 2.1122395992279053e-06, 2.1830201148986816e-06, 2.253800630569458e-06, 2.3245811462402344e-06]}, "gradients/decoder.transformer.h.16.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 3.0, 1.0, 6.0, 4.0, 4.0, 7.0, 4.0, 12.0, 14.0, 6.0, 9.0, 15.0, 19.0, 14.0, 26.0, 20.0, 29.0, 29.0, 35.0, 37.0, 44.0, 39.0, 51.0, 49.0, 35.0, 38.0, 36.0, 49.0, 35.0, 32.0, 32.0, 40.0, 25.0, 30.0, 31.0, 29.0, 20.0, 23.0, 12.0, 15.0, 8.0, 9.0, 7.0, 5.0, 4.0, 7.0, 5.0, 1.0, 6.0, 3.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-4.7421875, -4.59442138671875, -4.4466552734375, -4.29888916015625, -4.151123046875, -4.00335693359375, -3.8555908203125, -3.70782470703125, -3.56005859375, -3.41229248046875, -3.2645263671875, -3.11676025390625, -2.968994140625, -2.82122802734375, -2.6734619140625, -2.52569580078125, -2.3779296875, -2.23016357421875, -2.0823974609375, -1.93463134765625, -1.786865234375, -1.63909912109375, -1.4913330078125, -1.34356689453125, -1.19580078125, -1.04803466796875, -0.9002685546875, -0.75250244140625, -0.604736328125, -0.45697021484375, -0.3092041015625, -0.16143798828125, -0.013671875, 0.13409423828125, 0.2818603515625, 0.42962646484375, 0.577392578125, 0.72515869140625, 0.8729248046875, 1.02069091796875, 1.16845703125, 1.31622314453125, 1.4639892578125, 1.61175537109375, 1.759521484375, 1.90728759765625, 2.0550537109375, 2.20281982421875, 2.3505859375, 2.49835205078125, 2.6461181640625, 2.79388427734375, 2.941650390625, 3.08941650390625, 3.2371826171875, 3.38494873046875, 3.53271484375, 3.68048095703125, 3.8282470703125, 3.97601318359375, 4.123779296875, 4.27154541015625, 4.4193115234375, 4.56707763671875, 4.71484375]}, "gradients/decoder.transformer.h.16.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 0.0, 2.0, 2.0, 3.0, 5.0, 2.0, 4.0, 10.0, 10.0, 9.0, 16.0, 17.0, 14.0, 45.0, 100.0, 188.0, 332.0, 710.0, 1609.0, 4001.0, 11241.0, 37976.0, 155982.0, 605765.0, 170734.0, 40455.0, 12013.0, 4182.0, 1613.0, 751.0, 332.0, 172.0, 86.0, 54.0, 39.0, 23.0, 16.0, 5.0, 7.0, 12.0, 10.0, 5.0, 4.0, 4.0, 1.0, 2.0, 4.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-7.11328125, -6.89459228515625, -6.6759033203125, -6.45721435546875, -6.238525390625, -6.01983642578125, -5.8011474609375, -5.58245849609375, -5.36376953125, -5.14508056640625, -4.9263916015625, -4.70770263671875, -4.489013671875, -4.27032470703125, -4.0516357421875, -3.83294677734375, -3.6142578125, -3.39556884765625, -3.1768798828125, -2.95819091796875, -2.739501953125, -2.52081298828125, -2.3021240234375, -2.08343505859375, -1.86474609375, -1.64605712890625, -1.4273681640625, -1.20867919921875, -0.989990234375, -0.77130126953125, -0.5526123046875, -0.33392333984375, -0.115234375, 0.10345458984375, 0.3221435546875, 0.54083251953125, 0.759521484375, 0.97821044921875, 1.1968994140625, 1.41558837890625, 1.63427734375, 1.85296630859375, 2.0716552734375, 2.29034423828125, 2.509033203125, 2.72772216796875, 2.9464111328125, 3.16510009765625, 3.3837890625, 3.60247802734375, 3.8211669921875, 4.03985595703125, 4.258544921875, 4.47723388671875, 4.6959228515625, 4.91461181640625, 5.13330078125, 5.35198974609375, 5.5706787109375, 5.78936767578125, 6.008056640625, 6.22674560546875, 6.4454345703125, 6.66412353515625, 6.8828125]}, "gradients/decoder.transformer.h.16.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 4.0, 3.0, 2.0, 3.0, 2.0, 3.0, 3.0, 9.0, 3.0, 6.0, 6.0, 18.0, 12.0, 20.0, 24.0, 29.0, 23.0, 36.0, 33.0, 42.0, 42.0, 43.0, 65.0, 105.0, 293.0, 1614.0, 162.0, 79.0, 47.0, 49.0, 38.0, 34.0, 35.0, 23.0, 27.0, 17.0, 20.0, 13.0, 14.0, 7.0, 8.0, 12.0, 12.0, 6.0, 2.0, 6.0, 3.0, 2.0, 2.0, 3.0, 0.0, 2.0, 0.0, 0.0, 1.0], "bins": [-20.3125, -19.719482421875, -19.12646484375, -18.533447265625, -17.9404296875, -17.347412109375, -16.75439453125, -16.161376953125, -15.568359375, -14.975341796875, -14.38232421875, -13.789306640625, -13.1962890625, -12.603271484375, -12.01025390625, -11.417236328125, -10.82421875, -10.231201171875, -9.63818359375, -9.045166015625, -8.4521484375, -7.859130859375, -7.26611328125, -6.673095703125, -6.080078125, -5.487060546875, -4.89404296875, -4.301025390625, -3.7080078125, -3.114990234375, -2.52197265625, -1.928955078125, -1.3359375, -0.742919921875, -0.14990234375, 0.443115234375, 1.0361328125, 1.629150390625, 2.22216796875, 2.815185546875, 3.408203125, 4.001220703125, 4.59423828125, 5.187255859375, 5.7802734375, 6.373291015625, 6.96630859375, 7.559326171875, 8.15234375, 8.745361328125, 9.33837890625, 9.931396484375, 10.5244140625, 11.117431640625, 11.71044921875, 12.303466796875, 12.896484375, 13.489501953125, 14.08251953125, 14.675537109375, 15.2685546875, 15.861572265625, 16.45458984375, 17.047607421875, 17.640625]}, "gradients/decoder.transformer.h.16.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 0.0, 3.0, 1.0, 1.0, 1.0, 9.0, 2.0, 7.0, 7.0, 13.0, 10.0, 7.0, 19.0, 13.0, 17.0, 27.0, 37.0, 66.0, 78.0, 103.0, 151.0, 207.0, 405.0, 1131.0, 11260.0, 2878367.0, 248415.0, 3636.0, 701.0, 303.0, 226.0, 134.0, 79.0, 59.0, 42.0, 47.0, 30.0, 25.0, 17.0, 17.0, 11.0, 5.0, 4.0, 8.0, 3.0, 4.0, 1.0, 3.0, 2.0, 5.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-32.9375, -31.83251953125, -30.7275390625, -29.62255859375, -28.517578125, -27.41259765625, -26.3076171875, -25.20263671875, -24.09765625, -22.99267578125, -21.8876953125, -20.78271484375, -19.677734375, -18.57275390625, -17.4677734375, -16.36279296875, -15.2578125, -14.15283203125, -13.0478515625, -11.94287109375, -10.837890625, -9.73291015625, -8.6279296875, -7.52294921875, -6.41796875, -5.31298828125, -4.2080078125, -3.10302734375, -1.998046875, -0.89306640625, 0.2119140625, 1.31689453125, 2.421875, 3.52685546875, 4.6318359375, 5.73681640625, 6.841796875, 7.94677734375, 9.0517578125, 10.15673828125, 11.26171875, 12.36669921875, 13.4716796875, 14.57666015625, 15.681640625, 16.78662109375, 17.8916015625, 18.99658203125, 20.1015625, 21.20654296875, 22.3115234375, 23.41650390625, 24.521484375, 25.62646484375, 26.7314453125, 27.83642578125, 28.94140625, 30.04638671875, 31.1513671875, 32.25634765625, 33.361328125, 34.46630859375, 35.5712890625, 36.67626953125, 37.78125]}, "gradients/decoder.transformer.h.16.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 17.0, 992.0, 11.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-83.04647064208984, -67.99940490722656, -52.95234680175781, -37.90528106689453, -22.858219146728516, -7.8111572265625, 7.235908508300781, 22.28296661376953, 37.33003234863281, 52.37709426879883, 67.42415618896484, 82.47122192382812, 97.51828002929688, 112.56534576416016, 127.61241149902344, 142.6594696044922, 157.70654296875, 172.75360107421875, 187.80067443847656, 202.8477325439453, 217.89479064941406, 232.94186401367188, 247.98892211914062, 263.0359802246094, 278.0830383300781, 293.1300964355469, 308.1771545410156, 323.2242431640625, 338.27130126953125, 353.318359375, 368.36541748046875, 383.4124755859375, 398.4595642089844, 413.5066223144531, 428.5536804199219, 443.60076904296875, 458.6478271484375, 473.69488525390625, 488.741943359375, 503.78900146484375, 518.8360595703125, 533.8831176757812, 548.93017578125, 563.9772338867188, 579.0242919921875, 594.0713500976562, 609.118408203125, 624.16552734375, 639.2125854492188, 654.2596435546875, 669.3067016601562, 684.353759765625, 699.4008178710938, 714.4478759765625, 729.4949951171875, 744.5419921875, 759.5890502929688, 774.6361083984375, 789.6831665039062, 804.730224609375, 819.7772827148438, 834.8243408203125, 849.8714599609375, 864.91845703125, 879.965576171875]}, "gradients/decoder.transformer.h.16.ln_1.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 1.0, 2.0, 4.0, 7.0, 4.0, 6.0, 4.0, 12.0, 9.0, 13.0, 11.0, 16.0, 19.0, 25.0, 21.0, 25.0, 28.0, 34.0, 41.0, 37.0, 35.0, 39.0, 29.0, 46.0, 47.0, 33.0, 36.0, 44.0, 37.0, 39.0, 46.0, 22.0, 31.0, 28.0, 24.0, 23.0, 22.0, 17.0, 17.0, 12.0, 14.0, 11.0, 11.0, 6.0, 6.0, 5.0, 1.0, 5.0, 1.0, 2.0, 4.0, 3.0, 0.0, 1.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-47.774635314941406, -46.082088470458984, -44.38954162597656, -42.696998596191406, -41.004451751708984, -39.31190490722656, -37.61935806274414, -35.92681121826172, -34.23426818847656, -32.54172134399414, -30.84917640686035, -29.15662956237793, -27.46408462524414, -25.77153778076172, -24.078990936279297, -22.386444091796875, -20.693897247314453, -19.00135040283203, -17.308805465698242, -15.61625862121582, -13.923712730407715, -12.23116683959961, -10.538619995117188, -8.846074104309082, -7.153528213500977, -5.460982322692871, -3.7684359550476074, -2.0758895874023438, -0.3833436965942383, 1.3092021942138672, 3.001749038696289, 4.6942949295043945, 6.386844635009766, 8.079390525817871, 9.771936416625977, 11.464483261108398, 13.157029151916504, 14.84957504272461, 16.54212188720703, 18.234668731689453, 19.927213668823242, 21.619760513305664, 23.312305450439453, 25.004852294921875, 26.697399139404297, 28.389944076538086, 30.082490921020508, 31.775035858154297, 33.46758270263672, 35.16012954711914, 36.85267639160156, 38.54521942138672, 40.23776626586914, 41.93031311035156, 43.622859954833984, 45.315406799316406, 47.00794982910156, 48.700496673583984, 50.393043518066406, 52.08558654785156, 53.778133392333984, 55.470680236816406, 57.16322708129883, 58.85577392578125, 60.54832077026367]}, "gradients/decoder.transformer.h.15.mlp.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 4.0, 6.0, 7.0, 6.0, 5.0, 9.0, 10.0, 13.0, 9.0, 9.0, 21.0, 16.0, 24.0, 24.0, 24.0, 31.0, 33.0, 32.0, 40.0, 41.0, 58.0, 47.0, 28.0, 40.0, 38.0, 40.0, 39.0, 28.0, 38.0, 34.0, 31.0, 29.0, 31.0, 32.0, 19.0, 22.0, 15.0, 19.0, 6.0, 9.0, 10.0, 6.0, 6.0, 6.0, 1.0, 4.0, 5.0, 6.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0], "bins": [-4.64453125, -4.49609375, -4.34765625, -4.19921875, -4.05078125, -3.90234375, -3.75390625, -3.60546875, -3.45703125, -3.30859375, -3.16015625, -3.01171875, -2.86328125, -2.71484375, -2.56640625, -2.41796875, -2.26953125, -2.12109375, -1.97265625, -1.82421875, -1.67578125, -1.52734375, -1.37890625, -1.23046875, -1.08203125, -0.93359375, -0.78515625, -0.63671875, -0.48828125, -0.33984375, -0.19140625, -0.04296875, 0.10546875, 0.25390625, 0.40234375, 0.55078125, 0.69921875, 0.84765625, 0.99609375, 1.14453125, 1.29296875, 1.44140625, 1.58984375, 1.73828125, 1.88671875, 2.03515625, 2.18359375, 2.33203125, 2.48046875, 2.62890625, 2.77734375, 2.92578125, 3.07421875, 3.22265625, 3.37109375, 3.51953125, 3.66796875, 3.81640625, 3.96484375, 4.11328125, 4.26171875, 4.41015625, 4.55859375, 4.70703125, 4.85546875]}, "gradients/decoder.transformer.h.15.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 0.0, 3.0, 3.0, 16.0, 26.0, 42.0, 59.0, 98.0, 215.0, 370.0, 733.0, 1546.0, 3391.0, 8433.0, 23370.0, 81902.0, 463935.0, 2458648.0, 956662.0, 139352.0, 35094.0, 11833.0, 4649.0, 1961.0, 894.0, 447.0, 247.0, 154.0, 94.0, 51.0, 27.0, 22.0, 8.0, 3.0, 2.0, 4.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-11.0390625, -10.7296142578125, -10.420166015625, -10.1107177734375, -9.80126953125, -9.4918212890625, -9.182373046875, -8.8729248046875, -8.5634765625, -8.2540283203125, -7.944580078125, -7.6351318359375, -7.32568359375, -7.0162353515625, -6.706787109375, -6.3973388671875, -6.087890625, -5.7784423828125, -5.468994140625, -5.1595458984375, -4.85009765625, -4.5406494140625, -4.231201171875, -3.9217529296875, -3.6123046875, -3.3028564453125, -2.993408203125, -2.6839599609375, -2.37451171875, -2.0650634765625, -1.755615234375, -1.4461669921875, -1.13671875, -0.8272705078125, -0.517822265625, -0.2083740234375, 0.10107421875, 0.4105224609375, 0.719970703125, 1.0294189453125, 1.3388671875, 1.6483154296875, 1.957763671875, 2.2672119140625, 2.57666015625, 2.8861083984375, 3.195556640625, 3.5050048828125, 3.814453125, 4.1239013671875, 4.433349609375, 4.7427978515625, 5.05224609375, 5.3616943359375, 5.671142578125, 5.9805908203125, 6.2900390625, 6.5994873046875, 6.908935546875, 7.2183837890625, 7.52783203125, 7.8372802734375, 8.146728515625, 8.4561767578125, 8.765625]}, "gradients/decoder.transformer.h.15.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 2.0, 3.0, 2.0, 6.0, 5.0, 9.0, 16.0, 19.0, 27.0, 34.0, 66.0, 106.0, 164.0, 306.0, 457.0, 715.0, 767.0, 531.0, 288.0, 193.0, 102.0, 90.0, 54.0, 38.0, 25.0, 13.0, 10.0, 8.0, 10.0, 6.0, 3.0, 2.0, 4.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-16.390625, -15.909423828125, -15.42822265625, -14.947021484375, -14.4658203125, -13.984619140625, -13.50341796875, -13.022216796875, -12.541015625, -12.059814453125, -11.57861328125, -11.097412109375, -10.6162109375, -10.135009765625, -9.65380859375, -9.172607421875, -8.69140625, -8.210205078125, -7.72900390625, -7.247802734375, -6.7666015625, -6.285400390625, -5.80419921875, -5.322998046875, -4.841796875, -4.360595703125, -3.87939453125, -3.398193359375, -2.9169921875, -2.435791015625, -1.95458984375, -1.473388671875, -0.9921875, -0.510986328125, -0.02978515625, 0.451416015625, 0.9326171875, 1.413818359375, 1.89501953125, 2.376220703125, 2.857421875, 3.338623046875, 3.81982421875, 4.301025390625, 4.7822265625, 5.263427734375, 5.74462890625, 6.225830078125, 6.70703125, 7.188232421875, 7.66943359375, 8.150634765625, 8.6318359375, 9.113037109375, 9.59423828125, 10.075439453125, 10.556640625, 11.037841796875, 11.51904296875, 12.000244140625, 12.4814453125, 12.962646484375, 13.44384765625, 13.925048828125, 14.40625]}, "gradients/decoder.transformer.h.15.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 4.0, 0.0, 1.0, 1.0, 1.0, 0.0, 6.0, 6.0, 5.0, 7.0, 15.0, 16.0, 21.0, 30.0, 42.0, 56.0, 83.0, 191.0, 363.0, 774.0, 2059.0, 8328.0, 57319.0, 1163455.0, 2835129.0, 108953.0, 12826.0, 2806.0, 930.0, 386.0, 194.0, 115.0, 63.0, 43.0, 24.0, 15.0, 12.0, 5.0, 4.0, 2.0, 2.0, 1.0, 1.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-29.859375, -28.900146484375, -27.94091796875, -26.981689453125, -26.0224609375, -25.063232421875, -24.10400390625, -23.144775390625, -22.185546875, -21.226318359375, -20.26708984375, -19.307861328125, -18.3486328125, -17.389404296875, -16.43017578125, -15.470947265625, -14.51171875, -13.552490234375, -12.59326171875, -11.634033203125, -10.6748046875, -9.715576171875, -8.75634765625, -7.797119140625, -6.837890625, -5.878662109375, -4.91943359375, -3.960205078125, -3.0009765625, -2.041748046875, -1.08251953125, -0.123291015625, 0.8359375, 1.795166015625, 2.75439453125, 3.713623046875, 4.6728515625, 5.632080078125, 6.59130859375, 7.550537109375, 8.509765625, 9.468994140625, 10.42822265625, 11.387451171875, 12.3466796875, 13.305908203125, 14.26513671875, 15.224365234375, 16.18359375, 17.142822265625, 18.10205078125, 19.061279296875, 20.0205078125, 20.979736328125, 21.93896484375, 22.898193359375, 23.857421875, 24.816650390625, 25.77587890625, 26.735107421875, 27.6943359375, 28.653564453125, 29.61279296875, 30.572021484375, 31.53125]}, "gradients/decoder.transformer.h.15.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 7.0, 6.0, 10.0, 26.0, 37.0, 75.0, 125.0, 156.0, 193.0, 145.0, 98.0, 76.0, 29.0, 18.0, 10.0, 3.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-156.6702880859375, -153.29116821289062, -149.9120635986328, -146.532958984375, -143.15383911132812, -139.77471923828125, -136.39561462402344, -133.01651000976562, -129.63739013671875, -126.2582778930664, -122.87916564941406, -119.50005340576172, -116.12094116210938, -112.74182891845703, -109.36271667480469, -105.98360443115234, -102.6044921875, -99.22537994384766, -95.84626770019531, -92.46715545654297, -89.08804321289062, -85.70893096923828, -82.32981872558594, -78.9507064819336, -75.57159423828125, -72.1924819946289, -68.81336975097656, -65.43425750732422, -62.055145263671875, -58.67603302001953, -55.29692077636719, -51.917808532714844, -48.5386962890625, -45.159584045410156, -41.78047180175781, -38.40135955810547, -35.022247314453125, -31.64313507080078, -28.264022827148438, -24.884910583496094, -21.50579833984375, -18.126686096191406, -14.747573852539062, -11.368461608886719, -7.989349365234375, -4.610237121582031, -1.2311248779296875, 2.1479873657226562, 5.527099609375, 8.906211853027344, 12.285324096679688, 15.664436340332031, 19.043548583984375, 22.42266082763672, 25.801773071289062, 29.180885314941406, 32.55999755859375, 35.939109802246094, 39.31822204589844, 42.69733428955078, 46.076446533203125, 49.45555877685547, 52.83467102050781, 56.213783264160156, 59.5928955078125]}, "gradients/decoder.transformer.h.15.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 2.0, 2.0, 3.0, 5.0, 6.0, 3.0, 7.0, 7.0, 12.0, 16.0, 14.0, 21.0, 10.0, 28.0, 27.0, 19.0, 23.0, 37.0, 40.0, 40.0, 39.0, 49.0, 31.0, 45.0, 32.0, 42.0, 41.0, 34.0, 32.0, 37.0, 31.0, 33.0, 37.0, 18.0, 27.0, 20.0, 29.0, 20.0, 9.0, 17.0, 17.0, 11.0, 13.0, 5.0, 5.0, 5.0, 1.0, 3.0, 6.0, 2.0, 3.0, 1.0, 0.0, 0.0, 1.0], "bins": [-41.21928405761719, -40.00331115722656, -38.78733444213867, -37.57136154174805, -36.355384826660156, -35.13941192626953, -33.92343521118164, -32.707462310791016, -31.491487503051758, -30.2755126953125, -29.059537887573242, -27.843563079833984, -26.62759017944336, -25.41161346435547, -24.195640563964844, -22.979665756225586, -21.763690948486328, -20.54771614074707, -19.331741333007812, -18.115766525268555, -16.899791717529297, -15.683817863464355, -14.467844009399414, -13.251869201660156, -12.035894393920898, -10.81991958618164, -9.603944778442383, -8.387970924377441, -7.171996116638184, -5.956021308898926, -4.740046977996826, -3.5240726470947266, -2.3080978393554688, -1.09212327003479, 0.12385129928588867, 1.3398258686065674, 2.555800437927246, 3.771775245666504, 4.9877495765686035, 6.203723907470703, 7.419698715209961, 8.635673522949219, 9.851648330688477, 11.067622184753418, 12.283596992492676, 13.499571800231934, 14.715545654296875, 15.931520462036133, 17.14749526977539, 18.36347007751465, 19.579444885253906, 20.795419692993164, 22.011394500732422, 23.227367401123047, 24.443342208862305, 25.659317016601562, 26.87529182434082, 28.091266632080078, 29.307241439819336, 30.523216247558594, 31.73918914794922, 32.95516586303711, 34.171138763427734, 35.387115478515625, 36.60308837890625]}, "gradients/decoder.transformer.h.15.crossattention.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 4.0, 6.0, 6.0, 10.0, 5.0, 11.0, 6.0, 8.0, 8.0, 11.0, 19.0, 15.0, 16.0, 29.0, 36.0, 23.0, 24.0, 20.0, 42.0, 37.0, 45.0, 54.0, 37.0, 30.0, 36.0, 34.0, 32.0, 51.0, 27.0, 34.0, 37.0, 40.0, 26.0, 28.0, 21.0, 17.0, 20.0, 20.0, 15.0, 13.0, 13.0, 9.0, 7.0, 6.0, 5.0, 5.0, 4.0, 1.0, 3.0, 3.0, 2.0, 0.0, 1.0, 1.0, 2.0], "bins": [-4.69140625, -4.5496826171875, -4.407958984375, -4.2662353515625, -4.12451171875, -3.9827880859375, -3.841064453125, -3.6993408203125, -3.5576171875, -3.4158935546875, -3.274169921875, -3.1324462890625, -2.99072265625, -2.8489990234375, -2.707275390625, -2.5655517578125, -2.423828125, -2.2821044921875, -2.140380859375, -1.9986572265625, -1.85693359375, -1.7152099609375, -1.573486328125, -1.4317626953125, -1.2900390625, -1.1483154296875, -1.006591796875, -0.8648681640625, -0.72314453125, -0.5814208984375, -0.439697265625, -0.2979736328125, -0.15625, -0.0145263671875, 0.127197265625, 0.2689208984375, 0.41064453125, 0.5523681640625, 0.694091796875, 0.8358154296875, 0.9775390625, 1.1192626953125, 1.260986328125, 1.4027099609375, 1.54443359375, 1.6861572265625, 1.827880859375, 1.9696044921875, 2.111328125, 2.2530517578125, 2.394775390625, 2.5364990234375, 2.67822265625, 2.8199462890625, 2.961669921875, 3.1033935546875, 3.2451171875, 3.3868408203125, 3.528564453125, 3.6702880859375, 3.81201171875, 3.9537353515625, 4.095458984375, 4.2371826171875, 4.37890625]}, "gradients/decoder.transformer.h.15.crossattention.c_proj.weight": {"_type": "histogram", "values": [3.0, 3.0, 1.0, 5.0, 6.0, 9.0, 10.0, 20.0, 22.0, 44.0, 65.0, 92.0, 137.0, 163.0, 242.0, 323.0, 465.0, 641.0, 919.0, 1327.0, 1741.0, 2378.0, 3304.0, 4707.0, 6571.0, 9387.0, 13824.0, 19800.0, 29285.0, 45385.0, 73979.0, 137439.0, 306067.0, 156919.0, 82135.0, 48814.0, 31735.0, 21250.0, 14416.0, 10095.0, 7163.0, 4973.0, 3663.0, 2537.0, 1839.0, 1309.0, 963.0, 677.0, 517.0, 361.0, 255.0, 180.0, 135.0, 78.0, 65.0, 36.0, 33.0, 22.0, 13.0, 12.0, 7.0, 5.0, 2.0, 3.0], "bins": [-0.1553955078125, -0.15062713623046875, -0.1458587646484375, -0.14109039306640625, -0.136322021484375, -0.13155364990234375, -0.1267852783203125, -0.12201690673828125, -0.11724853515625, -0.11248016357421875, -0.1077117919921875, -0.10294342041015625, -0.098175048828125, -0.09340667724609375, -0.0886383056640625, -0.08386993408203125, -0.0791015625, -0.07433319091796875, -0.0695648193359375, -0.06479644775390625, -0.060028076171875, -0.05525970458984375, -0.0504913330078125, -0.04572296142578125, -0.04095458984375, -0.03618621826171875, -0.0314178466796875, -0.02664947509765625, -0.021881103515625, -0.01711273193359375, -0.0123443603515625, -0.00757598876953125, -0.0028076171875, 0.00196075439453125, 0.0067291259765625, 0.01149749755859375, 0.016265869140625, 0.02103424072265625, 0.0258026123046875, 0.03057098388671875, 0.03533935546875, 0.04010772705078125, 0.0448760986328125, 0.04964447021484375, 0.054412841796875, 0.05918121337890625, 0.0639495849609375, 0.06871795654296875, 0.073486328125, 0.07825469970703125, 0.0830230712890625, 0.08779144287109375, 0.092559814453125, 0.09732818603515625, 0.1020965576171875, 0.10686492919921875, 0.11163330078125, 0.11640167236328125, 0.1211700439453125, 0.12593841552734375, 0.130706787109375, 0.13547515869140625, 0.1402435302734375, 0.14501190185546875, 0.1497802734375]}, "gradients/decoder.transformer.h.15.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 3.0, 1.0, 2.0, 2.0, 3.0, 2.0, 8.0, 5.0, 8.0, 10.0, 15.0, 12.0, 20.0, 22.0, 20.0, 20.0, 15.0, 21.0, 27.0, 32.0, 39.0, 40.0, 46.0, 43.0, 27.0, 41.0, 1077.0, 44.0, 39.0, 27.0, 23.0, 41.0, 34.0, 32.0, 36.0, 24.0, 31.0, 22.0, 20.0, 13.0, 15.0, 15.0, 15.0, 7.0, 13.0, 8.0, 3.0, 5.0, 3.0, 1.0, 1.0, 2.0, 2.0, 3.0, 1.0, 2.0, 1.0, 0.0, 0.0, 1.0], "bins": [-4.11328125, -3.97918701171875, -3.8450927734375, -3.71099853515625, -3.576904296875, -3.44281005859375, -3.3087158203125, -3.17462158203125, -3.04052734375, -2.90643310546875, -2.7723388671875, -2.63824462890625, -2.504150390625, -2.37005615234375, -2.2359619140625, -2.10186767578125, -1.9677734375, -1.83367919921875, -1.6995849609375, -1.56549072265625, -1.431396484375, -1.29730224609375, -1.1632080078125, -1.02911376953125, -0.89501953125, -0.76092529296875, -0.6268310546875, -0.49273681640625, -0.358642578125, -0.22454833984375, -0.0904541015625, 0.04364013671875, 0.177734375, 0.31182861328125, 0.4459228515625, 0.58001708984375, 0.714111328125, 0.84820556640625, 0.9822998046875, 1.11639404296875, 1.25048828125, 1.38458251953125, 1.5186767578125, 1.65277099609375, 1.786865234375, 1.92095947265625, 2.0550537109375, 2.18914794921875, 2.3232421875, 2.45733642578125, 2.5914306640625, 2.72552490234375, 2.859619140625, 2.99371337890625, 3.1278076171875, 3.26190185546875, 3.39599609375, 3.53009033203125, 3.6641845703125, 3.79827880859375, 3.932373046875, 4.06646728515625, 4.2005615234375, 4.33465576171875, 4.46875]}, "gradients/decoder.transformer.h.15.crossattention.c_attn.weight": {"_type": "histogram", "values": [4.0, 1.0, 4.0, 6.0, 18.0, 17.0, 32.0, 42.0, 82.0, 80.0, 95.0, 167.0, 202.0, 312.0, 413.0, 555.0, 826.0, 1160.0, 1645.0, 2280.0, 3121.0, 4445.0, 6153.0, 8833.0, 12757.0, 18631.0, 27084.0, 41162.0, 63848.0, 101220.0, 218321.0, 1283461.0, 103107.0, 65108.0, 41658.0, 27995.0, 18823.0, 12801.0, 8754.0, 6199.0, 4468.0, 3295.0, 2225.0, 1666.0, 1203.0, 783.0, 608.0, 415.0, 320.0, 228.0, 157.0, 95.0, 81.0, 63.0, 39.0, 37.0, 16.0, 11.0, 6.0, 8.0, 3.0, 1.0, 1.0, 1.0], "bins": [-0.1011962890625, -0.09793376922607422, -0.09467124938964844, -0.09140872955322266, -0.08814620971679688, -0.0848836898803711, -0.08162117004394531, -0.07835865020751953, -0.07509613037109375, -0.07183361053466797, -0.06857109069824219, -0.0653085708618164, -0.062046051025390625, -0.058783531188964844, -0.05552101135253906, -0.05225849151611328, -0.0489959716796875, -0.04573345184326172, -0.04247093200683594, -0.039208412170410156, -0.035945892333984375, -0.032683372497558594, -0.029420852661132812, -0.02615833282470703, -0.02289581298828125, -0.01963329315185547, -0.016370773315429688, -0.013108253479003906, -0.009845733642578125, -0.006583213806152344, -0.0033206939697265625, -5.817413330078125e-05, 0.003204345703125, 0.006466865539550781, 0.009729385375976562, 0.012991905212402344, 0.016254425048828125, 0.019516944885253906, 0.022779464721679688, 0.02604198455810547, 0.02930450439453125, 0.03256702423095703, 0.03582954406738281, 0.039092063903808594, 0.042354583740234375, 0.045617103576660156, 0.04887962341308594, 0.05214214324951172, 0.0554046630859375, 0.05866718292236328, 0.06192970275878906, 0.06519222259521484, 0.06845474243164062, 0.0717172622680664, 0.07497978210449219, 0.07824230194091797, 0.08150482177734375, 0.08476734161376953, 0.08802986145019531, 0.0912923812866211, 0.09455490112304688, 0.09781742095947266, 0.10107994079589844, 0.10434246063232422, 0.10760498046875]}, "gradients/decoder.transformer.h.15.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 2.0, 1.0, 1.0, 0.0, 1.0, 1.0, 2.0, 2.0, 7.0, 3.0, 3.0, 3.0, 12.0, 9.0, 6.0, 9.0, 9.0, 14.0, 21.0, 24.0, 57.0, 49.0, 62.0, 68.0, 160.0, 126.0, 56.0, 51.0, 52.0, 35.0, 34.0, 28.0, 16.0, 20.0, 16.0, 7.0, 14.0, 8.0, 6.0, 1.0, 3.0, 6.0, 2.0, 1.0, 1.0, 1.0, 2.0, 1.0, 2.0, 2.0, 1.0, 2.0], "bins": [-5.900859832763672e-06, -5.7406723499298096e-06, -5.580484867095947e-06, -5.420297384262085e-06, -5.260109901428223e-06, -5.09992241859436e-06, -4.939734935760498e-06, -4.779547452926636e-06, -4.6193599700927734e-06, -4.459172487258911e-06, -4.298985004425049e-06, -4.1387975215911865e-06, -3.978610038757324e-06, -3.818422555923462e-06, -3.6582350730895996e-06, -3.4980475902557373e-06, -3.337860107421875e-06, -3.1776726245880127e-06, -3.0174851417541504e-06, -2.857297658920288e-06, -2.6971101760864258e-06, -2.5369226932525635e-06, -2.376735210418701e-06, -2.216547727584839e-06, -2.0563602447509766e-06, -1.8961727619171143e-06, -1.735985279083252e-06, -1.5757977962493896e-06, -1.4156103134155273e-06, -1.255422830581665e-06, -1.0952353477478027e-06, -9.350478649139404e-07, -7.748603820800781e-07, -6.146728992462158e-07, -4.544854164123535e-07, -2.942979335784912e-07, -1.341104507446289e-07, 2.60770320892334e-08, 1.862645149230957e-07, 3.46451997756958e-07, 5.066394805908203e-07, 6.668269634246826e-07, 8.270144462585449e-07, 9.872019290924072e-07, 1.1473894119262695e-06, 1.3075768947601318e-06, 1.4677643775939941e-06, 1.6279518604278564e-06, 1.7881393432617188e-06, 1.948326826095581e-06, 2.1085143089294434e-06, 2.2687017917633057e-06, 2.428889274597168e-06, 2.5890767574310303e-06, 2.7492642402648926e-06, 2.909451723098755e-06, 3.069639205932617e-06, 3.2298266887664795e-06, 3.390014171600342e-06, 3.550201654434204e-06, 3.7103891372680664e-06, 3.870576620101929e-06, 4.030764102935791e-06, 4.190951585769653e-06, 4.351139068603516e-06]}, "gradients/decoder.transformer.h.15.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 2.0, 3.0, 2.0, 1.0, 1.0, 0.0, 3.0, 4.0, 5.0, 2.0, 4.0, 8.0, 10.0, 6.0, 10.0, 20.0, 24.0, 35.0, 45.0, 81.0, 151.0, 490.0, 3299.0, 82063.0, 937008.0, 23239.0, 1467.0, 271.0, 128.0, 58.0, 33.0, 22.0, 16.0, 6.0, 5.0, 9.0, 8.0, 9.0, 1.0, 4.0, 5.0, 4.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 2.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-8.004903793334961e-05, -7.712002843618393e-05, -7.419101893901825e-05, -7.126200944185257e-05, -6.833299994468689e-05, -6.540399044752121e-05, -6.247498095035553e-05, -5.954597145318985e-05, -5.661696195602417e-05, -5.368795245885849e-05, -5.075894296169281e-05, -4.782993346452713e-05, -4.490092396736145e-05, -4.197191447019577e-05, -3.904290497303009e-05, -3.611389547586441e-05, -3.318488597869873e-05, -3.025587648153305e-05, -2.732686698436737e-05, -2.439785748720169e-05, -2.146884799003601e-05, -1.853983849287033e-05, -1.561082899570465e-05, -1.2681819498538971e-05, -9.752810001373291e-06, -6.823800504207611e-06, -3.894791007041931e-06, -9.657815098762512e-07, 1.9632279872894287e-06, 4.892237484455109e-06, 7.821246981620789e-06, 1.0750256478786469e-05, 1.3679265975952148e-05, 1.660827547311783e-05, 1.9537284970283508e-05, 2.2466294467449188e-05, 2.5395303964614868e-05, 2.8324313461780548e-05, 3.125332295894623e-05, 3.418233245611191e-05, 3.711134195327759e-05, 4.004035145044327e-05, 4.296936094760895e-05, 4.589837044477463e-05, 4.882737994194031e-05, 5.175638943910599e-05, 5.468539893627167e-05, 5.761440843343735e-05, 6.054341793060303e-05, 6.347242742776871e-05, 6.640143692493439e-05, 6.933044642210007e-05, 7.225945591926575e-05, 7.518846541643143e-05, 7.811747491359711e-05, 8.104648441076279e-05, 8.397549390792847e-05, 8.690450340509415e-05, 8.983351290225983e-05, 9.27625223994255e-05, 9.569153189659119e-05, 9.862054139375687e-05, 0.00010154955089092255, 0.00010447856038808823, 0.0001074075698852539]}, "gradients/decoder.transformer.h.15.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 0.0, 1.0, 1.0, 1.0, 4.0, 0.0, 5.0, 6.0, 9.0, 12.0, 18.0, 44.0, 61.0, 106.0, 134.0, 145.0, 144.0, 100.0, 79.0, 54.0, 29.0, 18.0, 14.0, 10.0, 4.0, 4.0, 6.0, 3.0, 1.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-7.216835911094677e-06, -7.043640835036058e-06, -6.870445304230088e-06, -6.69725022817147e-06, -6.5240546973655e-06, -6.350859621306881e-06, -6.177664545248263e-06, -6.004469469189644e-06, -5.831273938383674e-06, -5.6580788623250555e-06, -5.484883331519086e-06, -5.311688255460467e-06, -5.1384931794018485e-06, -4.965297648595879e-06, -4.79210257253726e-06, -4.6189070417312905e-06, -4.445711965672672e-06, -4.272516889614053e-06, -4.0993213588080835e-06, -3.926126282749465e-06, -3.7529309793171706e-06, -3.5797356758848764e-06, -3.4065405998262577e-06, -3.2333452963939635e-06, -3.0601499929616693e-06, -2.886954689529375e-06, -2.713759386097081e-06, -2.5405643100384623e-06, -2.367369006606168e-06, -2.194173703173874e-06, -2.0209786271152552e-06, -1.847783323682961e-06, -1.6745880202506669e-06, -1.5013927168183727e-06, -1.3281975270729163e-06, -1.1550023373274598e-06, -9.818070338951657e-07, -8.086117873062904e-07, -6.35416540717415e-07, -4.622213509719586e-07, -2.8902604753966443e-07, -1.1583080095078913e-07, 5.736444563808618e-08, 2.305596922269615e-07, 4.037549388158368e-07, 5.769501854047121e-07, 7.501454319935874e-07, 9.233406217390439e-07, 1.096535925171338e-06, 1.2697312286036322e-06, 1.4429264183490886e-06, 1.616121608094545e-06, 1.7893169115268392e-06, 1.9625122149591334e-06, 2.135707291017752e-06, 2.3089025944500463e-06, 2.4820978978823405e-06, 2.6552932013146346e-06, 2.828488504746929e-06, 3.0016835808055475e-06, 3.1748788842378417e-06, 3.348074187670136e-06, 3.5212692637287546e-06, 3.6944645671610488e-06, 3.867659870593343e-06]}, "gradients/decoder.transformer.h.15.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 3.0, 2.0, 4.0, 1.0, 5.0, 9.0, 12.0, 8.0, 6.0, 5.0, 8.0, 23.0, 10.0, 17.0, 17.0, 21.0, 20.0, 51.0, 28.0, 32.0, 36.0, 31.0, 35.0, 37.0, 66.0, 32.0, 34.0, 26.0, 22.0, 37.0, 70.0, 29.0, 37.0, 27.0, 28.0, 20.0, 47.0, 18.0, 16.0, 16.0, 8.0, 16.0, 10.0, 13.0, 2.0, 6.0, 4.0, 3.0, 4.0, 5.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0], "bins": [-2.3245811462402344e-06, -2.255663275718689e-06, -2.1867454051971436e-06, -2.117827534675598e-06, -2.0489096641540527e-06, -1.9799917936325073e-06, -1.911073923110962e-06, -1.8421560525894165e-06, -1.773238182067871e-06, -1.7043203115463257e-06, -1.6354024410247803e-06, -1.5664845705032349e-06, -1.4975666999816895e-06, -1.428648829460144e-06, -1.3597309589385986e-06, -1.2908130884170532e-06, -1.2218952178955078e-06, -1.1529773473739624e-06, -1.084059476852417e-06, -1.0151416063308716e-06, -9.462237358093262e-07, -8.773058652877808e-07, -8.083879947662354e-07, -7.394701242446899e-07, -6.705522537231445e-07, -6.016343832015991e-07, -5.327165126800537e-07, -4.637986421585083e-07, -3.948807716369629e-07, -3.259629011154175e-07, -2.5704503059387207e-07, -1.8812716007232666e-07, -1.1920928955078125e-07, -5.029141902923584e-08, 1.862645149230957e-08, 8.754432201385498e-08, 1.564621925354004e-07, 2.253800630569458e-07, 2.942979335784912e-07, 3.632158041000366e-07, 4.3213367462158203e-07, 5.010515451431274e-07, 5.699694156646729e-07, 6.388872861862183e-07, 7.078051567077637e-07, 7.767230272293091e-07, 8.456408977508545e-07, 9.145587682723999e-07, 9.834766387939453e-07, 1.0523945093154907e-06, 1.1213123798370361e-06, 1.1902302503585815e-06, 1.259148120880127e-06, 1.3280659914016724e-06, 1.3969838619232178e-06, 1.4659017324447632e-06, 1.5348196029663086e-06, 1.603737473487854e-06, 1.6726553440093994e-06, 1.7415732145309448e-06, 1.8104910850524902e-06, 1.8794089555740356e-06, 1.948326826095581e-06, 2.0172446966171265e-06, 2.086162567138672e-06]}, "gradients/decoder.transformer.h.15.attn.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 4.0, 6.0, 6.0, 10.0, 5.0, 11.0, 6.0, 8.0, 8.0, 11.0, 19.0, 15.0, 16.0, 29.0, 36.0, 23.0, 24.0, 20.0, 42.0, 37.0, 45.0, 54.0, 37.0, 30.0, 36.0, 34.0, 32.0, 51.0, 27.0, 34.0, 37.0, 40.0, 26.0, 28.0, 21.0, 17.0, 20.0, 20.0, 15.0, 13.0, 13.0, 9.0, 7.0, 6.0, 5.0, 5.0, 4.0, 1.0, 3.0, 3.0, 2.0, 0.0, 1.0, 1.0, 2.0], "bins": [-4.69140625, -4.5496826171875, -4.407958984375, -4.2662353515625, -4.12451171875, -3.9827880859375, -3.841064453125, -3.6993408203125, -3.5576171875, -3.4158935546875, -3.274169921875, -3.1324462890625, -2.99072265625, -2.8489990234375, -2.707275390625, -2.5655517578125, -2.423828125, -2.2821044921875, -2.140380859375, -1.9986572265625, -1.85693359375, -1.7152099609375, -1.573486328125, -1.4317626953125, -1.2900390625, -1.1483154296875, -1.006591796875, -0.8648681640625, -0.72314453125, -0.5814208984375, -0.439697265625, -0.2979736328125, -0.15625, -0.0145263671875, 0.127197265625, 0.2689208984375, 0.41064453125, 0.5523681640625, 0.694091796875, 0.8358154296875, 0.9775390625, 1.1192626953125, 1.260986328125, 1.4027099609375, 1.54443359375, 1.6861572265625, 1.827880859375, 1.9696044921875, 2.111328125, 2.2530517578125, 2.394775390625, 2.5364990234375, 2.67822265625, 2.8199462890625, 2.961669921875, 3.1033935546875, 3.2451171875, 3.3868408203125, 3.528564453125, 3.6702880859375, 3.81201171875, 3.9537353515625, 4.095458984375, 4.2371826171875, 4.37890625]}, "gradients/decoder.transformer.h.15.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 3.0, 4.0, 3.0, 5.0, 7.0, 9.0, 14.0, 28.0, 36.0, 46.0, 72.0, 124.0, 193.0, 317.0, 505.0, 893.0, 1428.0, 2284.0, 4001.0, 6732.0, 11873.0, 21233.0, 40407.0, 84952.0, 235100.0, 388669.0, 128209.0, 56241.0, 28675.0, 15294.0, 8634.0, 5017.0, 2952.0, 1818.0, 1052.0, 649.0, 396.0, 227.0, 163.0, 112.0, 61.0, 40.0, 33.0, 19.0, 10.0, 10.0, 4.0, 7.0, 3.0, 0.0, 2.0, 0.0, 0.0, 0.0, 2.0, 4.0], "bins": [-6.32421875, -6.1339111328125, -5.943603515625, -5.7532958984375, -5.56298828125, -5.3726806640625, -5.182373046875, -4.9920654296875, -4.8017578125, -4.6114501953125, -4.421142578125, -4.2308349609375, -4.04052734375, -3.8502197265625, -3.659912109375, -3.4696044921875, -3.279296875, -3.0889892578125, -2.898681640625, -2.7083740234375, -2.51806640625, -2.3277587890625, -2.137451171875, -1.9471435546875, -1.7568359375, -1.5665283203125, -1.376220703125, -1.1859130859375, -0.99560546875, -0.8052978515625, -0.614990234375, -0.4246826171875, -0.234375, -0.0440673828125, 0.146240234375, 0.3365478515625, 0.52685546875, 0.7171630859375, 0.907470703125, 1.0977783203125, 1.2880859375, 1.4783935546875, 1.668701171875, 1.8590087890625, 2.04931640625, 2.2396240234375, 2.429931640625, 2.6202392578125, 2.810546875, 3.0008544921875, 3.191162109375, 3.3814697265625, 3.57177734375, 3.7620849609375, 3.952392578125, 4.1427001953125, 4.3330078125, 4.5233154296875, 4.713623046875, 4.9039306640625, 5.09423828125, 5.2845458984375, 5.474853515625, 5.6651611328125, 5.85546875]}, "gradients/decoder.transformer.h.15.attn.c_attn.bias": {"_type": "histogram", "values": [2.0, 3.0, 1.0, 0.0, 0.0, 3.0, 0.0, 2.0, 2.0, 1.0, 6.0, 12.0, 5.0, 9.0, 15.0, 11.0, 15.0, 30.0, 22.0, 31.0, 38.0, 38.0, 46.0, 79.0, 96.0, 157.0, 1525.0, 294.0, 147.0, 104.0, 65.0, 53.0, 48.0, 40.0, 36.0, 27.0, 22.0, 19.0, 16.0, 10.0, 12.0, 6.0, 7.0, 3.0, 4.0, 0.0, 4.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-19.640625, -18.908203125, -18.17578125, -17.443359375, -16.7109375, -15.978515625, -15.24609375, -14.513671875, -13.78125, -13.048828125, -12.31640625, -11.583984375, -10.8515625, -10.119140625, -9.38671875, -8.654296875, -7.921875, -7.189453125, -6.45703125, -5.724609375, -4.9921875, -4.259765625, -3.52734375, -2.794921875, -2.0625, -1.330078125, -0.59765625, 0.134765625, 0.8671875, 1.599609375, 2.33203125, 3.064453125, 3.796875, 4.529296875, 5.26171875, 5.994140625, 6.7265625, 7.458984375, 8.19140625, 8.923828125, 9.65625, 10.388671875, 11.12109375, 11.853515625, 12.5859375, 13.318359375, 14.05078125, 14.783203125, 15.515625, 16.248046875, 16.98046875, 17.712890625, 18.4453125, 19.177734375, 19.91015625, 20.642578125, 21.375, 22.107421875, 22.83984375, 23.572265625, 24.3046875, 25.037109375, 25.76953125, 26.501953125, 27.234375]}, "gradients/decoder.transformer.h.15.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 3.0, 2.0, 6.0, 2.0, 4.0, 7.0, 6.0, 12.0, 18.0, 22.0, 32.0, 38.0, 64.0, 74.0, 138.0, 202.0, 350.0, 853.0, 3076.0, 22663.0, 1801214.0, 1291439.0, 20889.0, 2973.0, 759.0, 305.0, 175.0, 113.0, 71.0, 61.0, 43.0, 23.0, 20.0, 19.0, 8.0, 7.0, 14.0, 4.0, 1.0, 3.0, 0.0, 1.0, 1.0, 4.0, 1.0, 0.0, 3.0], "bins": [-53.84375, -52.42626953125, -51.0087890625, -49.59130859375, -48.173828125, -46.75634765625, -45.3388671875, -43.92138671875, -42.50390625, -41.08642578125, -39.6689453125, -38.25146484375, -36.833984375, -35.41650390625, -33.9990234375, -32.58154296875, -31.1640625, -29.74658203125, -28.3291015625, -26.91162109375, -25.494140625, -24.07666015625, -22.6591796875, -21.24169921875, -19.82421875, -18.40673828125, -16.9892578125, -15.57177734375, -14.154296875, -12.73681640625, -11.3193359375, -9.90185546875, -8.484375, -7.06689453125, -5.6494140625, -4.23193359375, -2.814453125, -1.39697265625, 0.0205078125, 1.43798828125, 2.85546875, 4.27294921875, 5.6904296875, 7.10791015625, 8.525390625, 9.94287109375, 11.3603515625, 12.77783203125, 14.1953125, 15.61279296875, 17.0302734375, 18.44775390625, 19.865234375, 21.28271484375, 22.7001953125, 24.11767578125, 25.53515625, 26.95263671875, 28.3701171875, 29.78759765625, 31.205078125, 32.62255859375, 34.0400390625, 35.45751953125, 36.875]}, "gradients/decoder.transformer.h.15.ln_1.weight": {"_type": "histogram", "values": [175.0, 816.0, 30.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-22.178464889526367, -5.579530715942383, 11.019403457641602, 27.618337631225586, 44.21726989746094, 60.81620788574219, 77.4151382446289, 94.01406860351562, 110.61300659179688, 127.21194458007812, 143.81088256835938, 160.40980529785156, 177.0087432861328, 193.60768127441406, 210.20660400390625, 226.8055419921875, 243.40447998046875, 260.00341796875, 276.60235595703125, 293.2012939453125, 309.80023193359375, 326.399169921875, 342.9980773925781, 359.5970153808594, 376.1959533691406, 392.7948913574219, 409.3938293457031, 425.9927673339844, 442.5916748046875, 459.19061279296875, 475.78955078125, 492.38848876953125, 508.9874267578125, 525.5863647460938, 542.185302734375, 558.7842407226562, 575.3831787109375, 591.9821166992188, 608.5810546875, 625.179931640625, 641.7789306640625, 658.3778686523438, 674.976806640625, 691.5757446289062, 708.1746826171875, 724.7736206054688, 741.37255859375, 757.971435546875, 774.5703735351562, 791.1693115234375, 807.7682495117188, 824.3671875, 840.9661254882812, 857.5650634765625, 874.1640014648438, 890.762939453125, 907.36181640625, 923.9607543945312, 940.5596923828125, 957.1586303710938, 973.757568359375, 990.3565063476562, 1006.9554443359375, 1023.5543212890625, 1040.1533203125]}, "gradients/decoder.transformer.h.15.ln_1.bias": {"_type": "histogram", "values": [3.0, 1.0, 3.0, 5.0, 0.0, 2.0, 1.0, 1.0, 5.0, 6.0, 8.0, 4.0, 11.0, 19.0, 10.0, 19.0, 20.0, 23.0, 30.0, 28.0, 18.0, 27.0, 32.0, 43.0, 48.0, 41.0, 55.0, 35.0, 38.0, 40.0, 45.0, 46.0, 36.0, 39.0, 45.0, 32.0, 34.0, 25.0, 21.0, 18.0, 14.0, 17.0, 13.0, 12.0, 6.0, 10.0, 7.0, 7.0, 6.0, 4.0, 2.0, 2.0, 2.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-49.94910430908203, -48.174312591552734, -46.39952087402344, -44.62472915649414, -42.849937438964844, -41.07514953613281, -39.30035400390625, -37.52556610107422, -35.75077438354492, -33.975982666015625, -32.20119094848633, -30.42639923095703, -28.651609420776367, -26.87681770324707, -25.102025985717773, -23.32723617553711, -21.55244255065918, -19.777650833129883, -18.002859115600586, -16.228069305419922, -14.453277587890625, -12.678485870361328, -10.903694152832031, -9.12890338897705, -7.354111671447754, -5.579320430755615, -3.8045289516448975, -2.0297374725341797, -0.254946231842041, 1.5198450088500977, 3.2946367263793945, 5.069427490234375, 6.844219207763672, 8.619010925292969, 10.39380168914795, 12.168593406677246, 13.943384170532227, 15.718175888061523, 17.49296760559082, 19.267757415771484, 21.04254913330078, 22.817340850830078, 24.592132568359375, 26.366924285888672, 28.141714096069336, 29.916505813598633, 31.69129753112793, 33.466087341308594, 35.240882873535156, 37.01567459106445, 38.79046630859375, 40.56525802612305, 42.340049743652344, 44.114837646484375, 45.88963317871094, 47.66442108154297, 49.439212799072266, 51.21400451660156, 52.98879623413086, 54.763587951660156, 56.53837966918945, 58.31317138671875, 60.08795928955078, 61.86275100708008, 63.637542724609375]}, "gradients/decoder.transformer.h.14.mlp.c_proj.bias": {"_type": "histogram", "values": [2.0, 1.0, 2.0, 0.0, 3.0, 2.0, 4.0, 6.0, 2.0, 6.0, 7.0, 10.0, 6.0, 10.0, 8.0, 13.0, 13.0, 20.0, 18.0, 26.0, 31.0, 25.0, 24.0, 30.0, 36.0, 38.0, 43.0, 40.0, 42.0, 35.0, 41.0, 36.0, 27.0, 38.0, 31.0, 36.0, 38.0, 30.0, 38.0, 30.0, 21.0, 28.0, 21.0, 24.0, 10.0, 12.0, 7.0, 6.0, 10.0, 9.0, 2.0, 6.0, 2.0, 4.0, 3.0, 1.0, 5.0, 0.0, 2.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-4.5078125, -4.35931396484375, -4.2108154296875, -4.06231689453125, -3.913818359375, -3.76531982421875, -3.6168212890625, -3.46832275390625, -3.31982421875, -3.17132568359375, -3.0228271484375, -2.87432861328125, -2.725830078125, -2.57733154296875, -2.4288330078125, -2.28033447265625, -2.1318359375, -1.98333740234375, -1.8348388671875, -1.68634033203125, -1.537841796875, -1.38934326171875, -1.2408447265625, -1.09234619140625, -0.94384765625, -0.79534912109375, -0.6468505859375, -0.49835205078125, -0.349853515625, -0.20135498046875, -0.0528564453125, 0.09564208984375, 0.244140625, 0.39263916015625, 0.5411376953125, 0.68963623046875, 0.838134765625, 0.98663330078125, 1.1351318359375, 1.28363037109375, 1.43212890625, 1.58062744140625, 1.7291259765625, 1.87762451171875, 2.026123046875, 2.17462158203125, 2.3231201171875, 2.47161865234375, 2.6201171875, 2.76861572265625, 2.9171142578125, 3.06561279296875, 3.214111328125, 3.36260986328125, 3.5111083984375, 3.65960693359375, 3.80810546875, 3.95660400390625, 4.1051025390625, 4.25360107421875, 4.402099609375, 4.55059814453125, 4.6990966796875, 4.84759521484375, 4.99609375]}, "gradients/decoder.transformer.h.14.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 3.0, 1.0, 4.0, 5.0, 7.0, 6.0, 14.0, 8.0, 6.0, 8.0, 10.0, 21.0, 19.0, 38.0, 57.0, 63.0, 99.0, 177.0, 346.0, 848.0, 3187.0, 14797.0, 107323.0, 2094802.0, 1856221.0, 97990.0, 13691.0, 2850.0, 889.0, 308.0, 153.0, 99.0, 47.0, 43.0, 30.0, 21.0, 22.0, 16.0, 15.0, 8.0, 8.0, 7.0, 6.0, 5.0, 5.0, 6.0, 1.0, 3.0, 1.0, 1.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-17.296875, -16.738525390625, -16.18017578125, -15.621826171875, -15.0634765625, -14.505126953125, -13.94677734375, -13.388427734375, -12.830078125, -12.271728515625, -11.71337890625, -11.155029296875, -10.5966796875, -10.038330078125, -9.47998046875, -8.921630859375, -8.36328125, -7.804931640625, -7.24658203125, -6.688232421875, -6.1298828125, -5.571533203125, -5.01318359375, -4.454833984375, -3.896484375, -3.338134765625, -2.77978515625, -2.221435546875, -1.6630859375, -1.104736328125, -0.54638671875, 0.011962890625, 0.5703125, 1.128662109375, 1.68701171875, 2.245361328125, 2.8037109375, 3.362060546875, 3.92041015625, 4.478759765625, 5.037109375, 5.595458984375, 6.15380859375, 6.712158203125, 7.2705078125, 7.828857421875, 8.38720703125, 8.945556640625, 9.50390625, 10.062255859375, 10.62060546875, 11.178955078125, 11.7373046875, 12.295654296875, 12.85400390625, 13.412353515625, 13.970703125, 14.529052734375, 15.08740234375, 15.645751953125, 16.2041015625, 16.762451171875, 17.32080078125, 17.879150390625, 18.4375]}, "gradients/decoder.transformer.h.14.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 2.0, 2.0, 2.0, 2.0, 3.0, 4.0, 9.0, 8.0, 10.0, 24.0, 24.0, 42.0, 67.0, 108.0, 128.0, 225.0, 319.0, 521.0, 666.0, 633.0, 427.0, 262.0, 172.0, 136.0, 88.0, 49.0, 41.0, 36.0, 19.0, 16.0, 7.0, 15.0, 6.0, 7.0, 4.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-14.046875, -13.6307373046875, -13.214599609375, -12.7984619140625, -12.38232421875, -11.9661865234375, -11.550048828125, -11.1339111328125, -10.7177734375, -10.3016357421875, -9.885498046875, -9.4693603515625, -9.05322265625, -8.6370849609375, -8.220947265625, -7.8048095703125, -7.388671875, -6.9725341796875, -6.556396484375, -6.1402587890625, -5.72412109375, -5.3079833984375, -4.891845703125, -4.4757080078125, -4.0595703125, -3.6434326171875, -3.227294921875, -2.8111572265625, -2.39501953125, -1.9788818359375, -1.562744140625, -1.1466064453125, -0.73046875, -0.3143310546875, 0.101806640625, 0.5179443359375, 0.93408203125, 1.3502197265625, 1.766357421875, 2.1824951171875, 2.5986328125, 3.0147705078125, 3.430908203125, 3.8470458984375, 4.26318359375, 4.6793212890625, 5.095458984375, 5.5115966796875, 5.927734375, 6.3438720703125, 6.760009765625, 7.1761474609375, 7.59228515625, 8.0084228515625, 8.424560546875, 8.8406982421875, 9.2568359375, 9.6729736328125, 10.089111328125, 10.5052490234375, 10.92138671875, 11.3375244140625, 11.753662109375, 12.1697998046875, 12.5859375]}, "gradients/decoder.transformer.h.14.mlp.c_fc.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 2.0, 2.0, 1.0, 2.0, 3.0, 2.0, 4.0, 7.0, 8.0, 11.0, 23.0, 31.0, 39.0, 61.0, 82.0, 130.0, 182.0, 322.0, 638.0, 1478.0, 4434.0, 24421.0, 259472.0, 3331408.0, 521466.0, 40434.0, 6257.0, 1746.0, 711.0, 362.0, 208.0, 108.0, 85.0, 49.0, 35.0, 22.0, 12.0, 11.0, 7.0, 4.0, 3.0, 6.0, 2.0, 2.0, 0.0, 3.0, 0.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-22.875, -22.015625, -21.15625, -20.296875, -19.4375, -18.578125, -17.71875, -16.859375, -16.0, -15.140625, -14.28125, -13.421875, -12.5625, -11.703125, -10.84375, -9.984375, -9.125, -8.265625, -7.40625, -6.546875, -5.6875, -4.828125, -3.96875, -3.109375, -2.25, -1.390625, -0.53125, 0.328125, 1.1875, 2.046875, 2.90625, 3.765625, 4.625, 5.484375, 6.34375, 7.203125, 8.0625, 8.921875, 9.78125, 10.640625, 11.5, 12.359375, 13.21875, 14.078125, 14.9375, 15.796875, 16.65625, 17.515625, 18.375, 19.234375, 20.09375, 20.953125, 21.8125, 22.671875, 23.53125, 24.390625, 25.25, 26.109375, 26.96875, 27.828125, 28.6875, 29.546875, 30.40625, 31.265625, 32.125]}, "gradients/decoder.transformer.h.14.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 0.0, 8.0, 7.0, 11.0, 22.0, 35.0, 36.0, 59.0, 85.0, 107.0, 131.0, 116.0, 124.0, 84.0, 57.0, 43.0, 46.0, 19.0, 9.0, 4.0, 3.0, 5.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-73.60099792480469, -71.44290161132812, -69.28480529785156, -67.126708984375, -64.96861267089844, -62.810516357421875, -60.65242385864258, -58.494327545166016, -56.33623123168945, -54.17813491821289, -52.02003860473633, -49.861942291259766, -47.70384979248047, -45.545753479003906, -43.387657165527344, -41.22956085205078, -39.07146453857422, -36.913368225097656, -34.755271911621094, -32.59717559814453, -30.4390811920166, -28.28098487854004, -26.12289047241211, -23.964794158935547, -21.806697845458984, -19.648601531982422, -17.49050521850586, -15.33241081237793, -13.174314498901367, -11.016218185424805, -8.858122825622559, -6.7000274658203125, -4.541938781738281, -2.383842945098877, -0.22574710845947266, 1.9323487281799316, 4.090444564819336, 6.248540878295898, 8.406636238098145, 10.56473159790039, 12.722827911376953, 14.880924224853516, 17.039020538330078, 19.197114944458008, 21.35521125793457, 23.513307571411133, 25.671401977539062, 27.829498291015625, 29.987594604492188, 32.14569091796875, 34.30378723144531, 36.461883544921875, 38.61997985839844, 40.778076171875, 42.9361686706543, 45.09426498413086, 47.25236129760742, 49.410457611083984, 51.56855392456055, 53.72665023803711, 55.884742736816406, 58.04283905029297, 60.20093536376953, 62.359031677246094, 64.51712799072266]}, "gradients/decoder.transformer.h.14.ln_2.bias": {"_type": "histogram", "values": [2.0, 2.0, 1.0, 1.0, 4.0, 4.0, 3.0, 2.0, 6.0, 10.0, 5.0, 10.0, 8.0, 7.0, 6.0, 16.0, 15.0, 19.0, 21.0, 23.0, 25.0, 19.0, 36.0, 23.0, 37.0, 33.0, 30.0, 47.0, 43.0, 45.0, 28.0, 40.0, 32.0, 28.0, 39.0, 42.0, 32.0, 38.0, 27.0, 22.0, 22.0, 18.0, 20.0, 16.0, 10.0, 16.0, 15.0, 16.0, 9.0, 9.0, 10.0, 5.0, 4.0, 3.0, 6.0, 4.0, 1.0, 3.0, 1.0, 1.0, 1.0, 0.0, 1.0, 2.0], "bins": [-30.715057373046875, -29.70749855041504, -28.69993782043457, -27.692378997802734, -26.684818267822266, -25.67725944519043, -24.669700622558594, -23.662139892578125, -22.65458106994629, -21.647022247314453, -20.639461517333984, -19.63190269470215, -18.624343872070312, -17.616783142089844, -16.609224319458008, -15.601664543151855, -14.594104766845703, -13.58654499053955, -12.578985214233398, -11.571426391601562, -10.56386661529541, -9.556306838989258, -8.548748016357422, -7.5411882400512695, -6.533628463745117, -5.526068687438965, -4.518509387969971, -3.5109498500823975, -2.503390312194824, -1.4958305358886719, -0.48827123641967773, 0.5192880630493164, 1.5268478393554688, 2.534407377243042, 3.5419669151306152, 4.549526214599609, 5.557085990905762, 6.564645767211914, 7.572205066680908, 8.579764366149902, 9.587324142456055, 10.594883918762207, 11.60244369506836, 12.610002517700195, 13.617562294006348, 14.6251220703125, 15.632680892944336, 16.640239715576172, 17.64780044555664, 18.655359268188477, 19.662919998168945, 20.67047882080078, 21.67803955078125, 22.685598373413086, 23.693157196044922, 24.70071792602539, 25.708276748657227, 26.715835571289062, 27.72339630126953, 28.730955123901367, 29.738513946533203, 30.746074676513672, 31.753633499145508, 32.761192321777344, 33.76875305175781]}, "gradients/decoder.transformer.h.14.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 2.0, 1.0, 2.0, 5.0, 3.0, 5.0, 5.0, 4.0, 9.0, 7.0, 13.0, 6.0, 9.0, 14.0, 14.0, 23.0, 20.0, 24.0, 24.0, 23.0, 38.0, 30.0, 42.0, 30.0, 36.0, 35.0, 50.0, 36.0, 36.0, 29.0, 33.0, 39.0, 33.0, 31.0, 29.0, 36.0, 36.0, 40.0, 18.0, 22.0, 22.0, 18.0, 15.0, 12.0, 8.0, 10.0, 8.0, 8.0, 3.0, 3.0, 4.0, 3.0, 5.0, 2.0, 2.0, 1.0, 1.0, 1.0, 2.0, 0.0, 2.0], "bins": [-4.68359375, -4.53363037109375, -4.3836669921875, -4.23370361328125, -4.083740234375, -3.93377685546875, -3.7838134765625, -3.63385009765625, -3.48388671875, -3.33392333984375, -3.1839599609375, -3.03399658203125, -2.884033203125, -2.73406982421875, -2.5841064453125, -2.43414306640625, -2.2841796875, -2.13421630859375, -1.9842529296875, -1.83428955078125, -1.684326171875, -1.53436279296875, -1.3843994140625, -1.23443603515625, -1.08447265625, -0.93450927734375, -0.7845458984375, -0.63458251953125, -0.484619140625, -0.33465576171875, -0.1846923828125, -0.03472900390625, 0.115234375, 0.26519775390625, 0.4151611328125, 0.56512451171875, 0.715087890625, 0.86505126953125, 1.0150146484375, 1.16497802734375, 1.31494140625, 1.46490478515625, 1.6148681640625, 1.76483154296875, 1.914794921875, 2.06475830078125, 2.2147216796875, 2.36468505859375, 2.5146484375, 2.66461181640625, 2.8145751953125, 2.96453857421875, 3.114501953125, 3.26446533203125, 3.4144287109375, 3.56439208984375, 3.71435546875, 3.86431884765625, 4.0142822265625, 4.16424560546875, 4.314208984375, 4.46417236328125, 4.6141357421875, 4.76409912109375, 4.9140625]}, "gradients/decoder.transformer.h.14.crossattention.c_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 4.0, 3.0, 9.0, 10.0, 13.0, 18.0, 35.0, 49.0, 76.0, 113.0, 157.0, 236.0, 357.0, 565.0, 715.0, 1050.0, 1512.0, 2094.0, 3048.0, 4406.0, 6403.0, 9680.0, 14465.0, 22569.0, 35050.0, 57178.0, 93921.0, 176782.0, 291644.0, 125081.0, 73152.0, 44478.0, 28547.0, 17985.0, 11949.0, 7853.0, 5287.0, 3726.0, 2440.0, 1771.0, 1285.0, 867.0, 634.0, 455.0, 298.0, 198.0, 138.0, 94.0, 58.0, 37.0, 31.0, 15.0, 13.0, 5.0, 4.0, 4.0, 1.0, 0.0, 3.0, 0.0, 2.0], "bins": [-0.17138671875, -0.16590309143066406, -0.16041946411132812, -0.1549358367919922, -0.14945220947265625, -0.1439685821533203, -0.13848495483398438, -0.13300132751464844, -0.1275177001953125, -0.12203407287597656, -0.11655044555664062, -0.11106681823730469, -0.10558319091796875, -0.10009956359863281, -0.09461593627929688, -0.08913230895996094, -0.083648681640625, -0.07816505432128906, -0.07268142700195312, -0.06719779968261719, -0.06171417236328125, -0.05623054504394531, -0.050746917724609375, -0.04526329040527344, -0.0397796630859375, -0.03429603576660156, -0.028812408447265625, -0.023328781127929688, -0.01784515380859375, -0.012361526489257812, -0.006877899169921875, -0.0013942718505859375, 0.00408935546875, 0.009572982788085938, 0.015056610107421875, 0.020540237426757812, 0.02602386474609375, 0.03150749206542969, 0.036991119384765625, 0.04247474670410156, 0.0479583740234375, 0.05344200134277344, 0.058925628662109375, 0.06440925598144531, 0.06989288330078125, 0.07537651062011719, 0.08086013793945312, 0.08634376525878906, 0.091827392578125, 0.09731101989746094, 0.10279464721679688, 0.10827827453613281, 0.11376190185546875, 0.11924552917480469, 0.12472915649414062, 0.13021278381347656, 0.1356964111328125, 0.14118003845214844, 0.14666366577148438, 0.1521472930908203, 0.15763092041015625, 0.1631145477294922, 0.16859817504882812, 0.17408180236816406, 0.1795654296875]}, "gradients/decoder.transformer.h.14.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 2.0, 3.0, 2.0, 4.0, 4.0, 6.0, 7.0, 3.0, 8.0, 9.0, 12.0, 13.0, 19.0, 14.0, 30.0, 22.0, 20.0, 29.0, 26.0, 39.0, 43.0, 29.0, 51.0, 38.0, 40.0, 1080.0, 39.0, 42.0, 39.0, 33.0, 40.0, 33.0, 30.0, 24.0, 28.0, 24.0, 28.0, 17.0, 19.0, 15.0, 12.0, 15.0, 9.0, 10.0, 4.0, 6.0, 6.0, 4.0, 1.0, 2.0, 3.0, 2.0, 2.0, 1.0, 1.0, 0.0, 1.0], "bins": [-4.39453125, -4.259765625, -4.125, -3.990234375, -3.85546875, -3.720703125, -3.5859375, -3.451171875, -3.31640625, -3.181640625, -3.046875, -2.912109375, -2.77734375, -2.642578125, -2.5078125, -2.373046875, -2.23828125, -2.103515625, -1.96875, -1.833984375, -1.69921875, -1.564453125, -1.4296875, -1.294921875, -1.16015625, -1.025390625, -0.890625, -0.755859375, -0.62109375, -0.486328125, -0.3515625, -0.216796875, -0.08203125, 0.052734375, 0.1875, 0.322265625, 0.45703125, 0.591796875, 0.7265625, 0.861328125, 0.99609375, 1.130859375, 1.265625, 1.400390625, 1.53515625, 1.669921875, 1.8046875, 1.939453125, 2.07421875, 2.208984375, 2.34375, 2.478515625, 2.61328125, 2.748046875, 2.8828125, 3.017578125, 3.15234375, 3.287109375, 3.421875, 3.556640625, 3.69140625, 3.826171875, 3.9609375, 4.095703125, 4.23046875]}, "gradients/decoder.transformer.h.14.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 2.0, 2.0, 4.0, 8.0, 11.0, 10.0, 20.0, 31.0, 59.0, 71.0, 102.0, 135.0, 192.0, 268.0, 348.0, 467.0, 652.0, 945.0, 1262.0, 1752.0, 2444.0, 3352.0, 4822.0, 6782.0, 9653.0, 13887.0, 20255.0, 30458.0, 46098.0, 72481.0, 122476.0, 1346620.0, 158839.0, 86811.0, 53630.0, 35058.0, 23522.0, 15860.0, 10848.0, 7765.0, 5438.0, 3872.0, 2765.0, 1944.0, 1400.0, 1002.0, 771.0, 532.0, 396.0, 323.0, 203.0, 143.0, 110.0, 75.0, 56.0, 42.0, 28.0, 17.0, 14.0, 7.0, 5.0, 4.0, 2.0], "bins": [-0.1063232421875, -0.10307025909423828, -0.09981727600097656, -0.09656429290771484, -0.09331130981445312, -0.0900583267211914, -0.08680534362792969, -0.08355236053466797, -0.08029937744140625, -0.07704639434814453, -0.07379341125488281, -0.0705404281616211, -0.06728744506835938, -0.06403446197509766, -0.06078147888183594, -0.05752849578857422, -0.0542755126953125, -0.05102252960205078, -0.04776954650878906, -0.044516563415527344, -0.041263580322265625, -0.038010597229003906, -0.03475761413574219, -0.03150463104248047, -0.02825164794921875, -0.02499866485595703, -0.021745681762695312, -0.018492698669433594, -0.015239715576171875, -0.011986732482910156, -0.008733749389648438, -0.005480766296386719, -0.002227783203125, 0.0010251998901367188, 0.0042781829833984375, 0.007531166076660156, 0.010784149169921875, 0.014037132263183594, 0.017290115356445312, 0.02054309844970703, 0.02379608154296875, 0.02704906463623047, 0.030302047729492188, 0.033555030822753906, 0.036808013916015625, 0.040060997009277344, 0.04331398010253906, 0.04656696319580078, 0.0498199462890625, 0.05307292938232422, 0.05632591247558594, 0.059578895568847656, 0.06283187866210938, 0.0660848617553711, 0.06933784484863281, 0.07259082794189453, 0.07584381103515625, 0.07909679412841797, 0.08234977722167969, 0.0856027603149414, 0.08885574340820312, 0.09210872650146484, 0.09536170959472656, 0.09861469268798828, 0.10186767578125]}, "gradients/decoder.transformer.h.14.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 0.0, 2.0, 1.0, 1.0, 3.0, 1.0, 5.0, 10.0, 4.0, 19.0, 17.0, 29.0, 27.0, 34.0, 58.0, 39.0, 72.0, 78.0, 157.0, 88.0, 79.0, 54.0, 55.0, 37.0, 40.0, 23.0, 17.0, 12.0, 8.0, 11.0, 8.0, 6.0, 3.0, 6.0, 5.0, 2.0, 2.0, 2.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-7.62939453125e-06, -7.422640919685364e-06, -7.2158873081207275e-06, -7.009133696556091e-06, -6.802380084991455e-06, -6.595626473426819e-06, -6.388872861862183e-06, -6.182119250297546e-06, -5.97536563873291e-06, -5.768612027168274e-06, -5.561858415603638e-06, -5.3551048040390015e-06, -5.148351192474365e-06, -4.941597580909729e-06, -4.734843969345093e-06, -4.5280903577804565e-06, -4.32133674621582e-06, -4.114583134651184e-06, -3.907829523086548e-06, -3.7010759115219116e-06, -3.4943222999572754e-06, -3.287568688392639e-06, -3.080815076828003e-06, -2.8740614652633667e-06, -2.6673078536987305e-06, -2.4605542421340942e-06, -2.253800630569458e-06, -2.0470470190048218e-06, -1.8402934074401855e-06, -1.6335397958755493e-06, -1.426786184310913e-06, -1.2200325727462769e-06, -1.0132789611816406e-06, -8.065253496170044e-07, -5.997717380523682e-07, -3.9301812648773193e-07, -1.862645149230957e-07, 2.0489096641540527e-08, 2.2724270820617676e-07, 4.33996319770813e-07, 6.407499313354492e-07, 8.475035429000854e-07, 1.0542571544647217e-06, 1.261010766029358e-06, 1.4677643775939941e-06, 1.6745179891586304e-06, 1.8812716007232666e-06, 2.088025212287903e-06, 2.294778823852539e-06, 2.5015324354171753e-06, 2.7082860469818115e-06, 2.9150396585464478e-06, 3.121793270111084e-06, 3.3285468816757202e-06, 3.5353004932403564e-06, 3.7420541048049927e-06, 3.948807716369629e-06, 4.155561327934265e-06, 4.362314939498901e-06, 4.569068551063538e-06, 4.775822162628174e-06, 4.98257577419281e-06, 5.189329385757446e-06, 5.3960829973220825e-06, 5.602836608886719e-06]}, "gradients/decoder.transformer.h.14.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 3.0, 1.0, 5.0, 7.0, 7.0, 3.0, 4.0, 9.0, 19.0, 4.0, 19.0, 23.0, 33.0, 56.0, 90.0, 173.0, 379.0, 4921.0, 843992.0, 196036.0, 2052.0, 330.0, 152.0, 65.0, 49.0, 29.0, 32.0, 27.0, 13.0, 12.0, 5.0, 8.0, 2.0, 2.0, 0.0, 1.0, 4.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-9.626150131225586e-05, -9.268056601285934e-05, -8.909963071346283e-05, -8.551869541406631e-05, -8.19377601146698e-05, -7.835682481527328e-05, -7.477588951587677e-05, -7.119495421648026e-05, -6.761401891708374e-05, -6.403308361768723e-05, -6.045214831829071e-05, -5.6871213018894196e-05, -5.329027771949768e-05, -4.9709342420101166e-05, -4.612840712070465e-05, -4.2547471821308136e-05, -3.896653652191162e-05, -3.5385601222515106e-05, -3.180466592311859e-05, -2.8223730623722076e-05, -2.464279532432556e-05, -2.1061860024929047e-05, -1.7480924725532532e-05, -1.3899989426136017e-05, -1.0319054126739502e-05, -6.738118827342987e-06, -3.157183527946472e-06, 4.237517714500427e-07, 4.004687070846558e-06, 7.5856223702430725e-06, 1.1166557669639587e-05, 1.4747492969036102e-05, 1.8328428268432617e-05, 2.1909363567829132e-05, 2.5490298867225647e-05, 2.9071234166622162e-05, 3.265216946601868e-05, 3.623310476541519e-05, 3.9814040064811707e-05, 4.339497536420822e-05, 4.6975910663604736e-05, 5.055684596300125e-05, 5.4137781262397766e-05, 5.771871656179428e-05, 6.12996518611908e-05, 6.488058716058731e-05, 6.846152245998383e-05, 7.204245775938034e-05, 7.562339305877686e-05, 7.920432835817337e-05, 8.278526365756989e-05, 8.63661989569664e-05, 8.994713425636292e-05, 9.352806955575943e-05, 9.710900485515594e-05, 0.00010068994015455246, 0.00010427087545394897, 0.00010785181075334549, 0.000111432746052742, 0.00011501368135213852, 0.00011859461665153503, 0.00012217555195093155, 0.00012575648725032806, 0.00012933742254972458, 0.0001329183578491211]}, "gradients/decoder.transformer.h.14.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 4.0, 8.0, 11.0, 65.0, 192.0, 301.0, 275.0, 111.0, 36.0, 11.0, 6.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.754028198774904e-05, -1.7180776922032237e-05, -1.6821273675304838e-05, -1.6461768609588034e-05, -1.610226354387123e-05, -1.5742760297143832e-05, -1.538325523142703e-05, -1.5023751075204927e-05, -1.4664246918982826e-05, -1.4304742762760725e-05, -1.3945237697043922e-05, -1.358573354082182e-05, -1.322622938459972e-05, -1.2866725228377618e-05, -1.2507220162660815e-05, -1.2147716006438714e-05, -1.178821094072191e-05, -1.1428706784499809e-05, -1.1069201718783006e-05, -1.0709697562560905e-05, -1.0350193406338803e-05, -9.990688340622e-06, -9.631184184399899e-06, -9.271680028177798e-06, -8.912174962460995e-06, -8.552670806238893e-06, -8.19316574052209e-06, -7.833661584299989e-06, -7.474157428077888e-06, -7.1146528171084356e-06, -6.755148206138983e-06, -6.395644049916882e-06, -6.036140348442132e-06, -5.6766357374726795e-06, -5.317131581250578e-06, -4.957626970281126e-06, -4.598122359311674e-06, -4.2386182030895725e-06, -3.87911359212012e-06, -3.5196092085243436e-06, -3.160104824928567e-06, -2.80060044133279e-06, -2.4410960577370133e-06, -2.081591446767561e-06, -1.7220870631717844e-06, -1.3625826795760076e-06, -1.0030780686065555e-06, -6.435736850107787e-07, -2.8406930141500197e-07, 7.543513902419363e-08, 4.3493957946338924e-07, 7.944440767460037e-07, 1.1539484603417804e-06, 1.5134528439375572e-06, 1.8729574549070094e-06, 2.232461838502786e-06, 2.591966222098563e-06, 2.9514706056943396e-06, 3.3109749892901164e-06, 3.6704796002595685e-06, 4.02998375648167e-06, 4.389488367451122e-06, 4.748992978420574e-06, 5.1084971346426755e-06, 5.468001745612128e-06]}, "gradients/decoder.transformer.h.14.ln_cross_attn.bias": {"_type": "histogram", "values": [4.0, 0.0, 0.0, 2.0, 0.0, 3.0, 3.0, 6.0, 2.0, 3.0, 14.0, 8.0, 7.0, 13.0, 8.0, 14.0, 32.0, 16.0, 10.0, 21.0, 18.0, 49.0, 22.0, 31.0, 28.0, 32.0, 64.0, 26.0, 32.0, 28.0, 39.0, 33.0, 57.0, 49.0, 23.0, 25.0, 32.0, 41.0, 13.0, 19.0, 22.0, 19.0, 37.0, 11.0, 15.0, 10.0, 9.0, 11.0, 22.0, 4.0, 2.0, 5.0, 10.0, 8.0, 4.0, 3.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.086162567138672e-06, -2.0153820514678955e-06, -1.944601535797119e-06, -1.8738210201263428e-06, -1.8030405044555664e-06, -1.73225998878479e-06, -1.6614794731140137e-06, -1.5906989574432373e-06, -1.519918441772461e-06, -1.4491379261016846e-06, -1.3783574104309082e-06, -1.3075768947601318e-06, -1.2367963790893555e-06, -1.166015863418579e-06, -1.0952353477478027e-06, -1.0244548320770264e-06, -9.5367431640625e-07, -8.828938007354736e-07, -8.121132850646973e-07, -7.413327693939209e-07, -6.705522537231445e-07, -5.997717380523682e-07, -5.289912223815918e-07, -4.5821070671081543e-07, -3.8743019104003906e-07, -3.166496753692627e-07, -2.4586915969848633e-07, -1.7508864402770996e-07, -1.043081283569336e-07, -3.3527612686157227e-08, 3.725290298461914e-08, 1.0803341865539551e-07, 1.7881393432617188e-07, 2.4959444999694824e-07, 3.203749656677246e-07, 3.91155481338501e-07, 4.6193599700927734e-07, 5.327165126800537e-07, 6.034970283508301e-07, 6.742775440216064e-07, 7.450580596923828e-07, 8.158385753631592e-07, 8.866190910339355e-07, 9.57399606704712e-07, 1.0281801223754883e-06, 1.0989606380462646e-06, 1.169741153717041e-06, 1.2405216693878174e-06, 1.3113021850585938e-06, 1.3820827007293701e-06, 1.4528632164001465e-06, 1.5236437320709229e-06, 1.5944242477416992e-06, 1.6652047634124756e-06, 1.735985279083252e-06, 1.8067657947540283e-06, 1.8775463104248047e-06, 1.948326826095581e-06, 2.0191073417663574e-06, 2.089887857437134e-06, 2.16066837310791e-06, 2.2314488887786865e-06, 2.302229404449463e-06, 2.3730099201202393e-06, 2.4437904357910156e-06]}, "gradients/decoder.transformer.h.14.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 2.0, 1.0, 2.0, 5.0, 3.0, 5.0, 5.0, 4.0, 9.0, 7.0, 13.0, 6.0, 9.0, 14.0, 14.0, 23.0, 20.0, 24.0, 24.0, 23.0, 38.0, 30.0, 42.0, 30.0, 36.0, 35.0, 50.0, 36.0, 36.0, 29.0, 33.0, 39.0, 33.0, 31.0, 29.0, 36.0, 36.0, 40.0, 18.0, 22.0, 22.0, 18.0, 15.0, 12.0, 8.0, 10.0, 8.0, 8.0, 3.0, 3.0, 4.0, 3.0, 5.0, 2.0, 2.0, 1.0, 1.0, 1.0, 2.0, 0.0, 2.0], "bins": [-4.68359375, -4.53363037109375, -4.3836669921875, -4.23370361328125, -4.083740234375, -3.93377685546875, -3.7838134765625, -3.63385009765625, -3.48388671875, -3.33392333984375, -3.1839599609375, -3.03399658203125, -2.884033203125, -2.73406982421875, -2.5841064453125, -2.43414306640625, -2.2841796875, -2.13421630859375, -1.9842529296875, -1.83428955078125, -1.684326171875, -1.53436279296875, -1.3843994140625, -1.23443603515625, -1.08447265625, -0.93450927734375, -0.7845458984375, -0.63458251953125, -0.484619140625, -0.33465576171875, -0.1846923828125, -0.03472900390625, 0.115234375, 0.26519775390625, 0.4151611328125, 0.56512451171875, 0.715087890625, 0.86505126953125, 1.0150146484375, 1.16497802734375, 1.31494140625, 1.46490478515625, 1.6148681640625, 1.76483154296875, 1.914794921875, 2.06475830078125, 2.2147216796875, 2.36468505859375, 2.5146484375, 2.66461181640625, 2.8145751953125, 2.96453857421875, 3.114501953125, 3.26446533203125, 3.4144287109375, 3.56439208984375, 3.71435546875, 3.86431884765625, 4.0142822265625, 4.16424560546875, 4.314208984375, 4.46417236328125, 4.6141357421875, 4.76409912109375, 4.9140625]}, "gradients/decoder.transformer.h.14.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 6.0, 1.0, 10.0, 8.0, 15.0, 12.0, 32.0, 35.0, 43.0, 71.0, 99.0, 149.0, 236.0, 349.0, 547.0, 930.0, 1694.0, 2829.0, 5356.0, 10156.0, 21200.0, 48461.0, 125617.0, 431320.0, 250096.0, 82818.0, 33485.0, 15471.0, 7783.0, 4119.0, 2199.0, 1296.0, 767.0, 452.0, 295.0, 189.0, 147.0, 77.0, 51.0, 40.0, 27.0, 29.0, 16.0, 12.0, 10.0, 6.0, 1.0, 5.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-8.890625, -8.6109619140625, -8.331298828125, -8.0516357421875, -7.77197265625, -7.4923095703125, -7.212646484375, -6.9329833984375, -6.6533203125, -6.3736572265625, -6.093994140625, -5.8143310546875, -5.53466796875, -5.2550048828125, -4.975341796875, -4.6956787109375, -4.416015625, -4.1363525390625, -3.856689453125, -3.5770263671875, -3.29736328125, -3.0177001953125, -2.738037109375, -2.4583740234375, -2.1787109375, -1.8990478515625, -1.619384765625, -1.3397216796875, -1.06005859375, -0.7803955078125, -0.500732421875, -0.2210693359375, 0.05859375, 0.3382568359375, 0.617919921875, 0.8975830078125, 1.17724609375, 1.4569091796875, 1.736572265625, 2.0162353515625, 2.2958984375, 2.5755615234375, 2.855224609375, 3.1348876953125, 3.41455078125, 3.6942138671875, 3.973876953125, 4.2535400390625, 4.533203125, 4.8128662109375, 5.092529296875, 5.3721923828125, 5.65185546875, 5.9315185546875, 6.211181640625, 6.4908447265625, 6.7705078125, 7.0501708984375, 7.329833984375, 7.6094970703125, 7.88916015625, 8.1688232421875, 8.448486328125, 8.7281494140625, 9.0078125]}, "gradients/decoder.transformer.h.14.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 2.0, 0.0, 2.0, 2.0, 1.0, 5.0, 1.0, 6.0, 4.0, 4.0, 8.0, 11.0, 18.0, 13.0, 20.0, 28.0, 24.0, 26.0, 23.0, 33.0, 33.0, 42.0, 51.0, 53.0, 89.0, 101.0, 184.0, 1443.0, 214.0, 118.0, 86.0, 63.0, 54.0, 52.0, 37.0, 36.0, 26.0, 20.0, 23.0, 31.0, 15.0, 10.0, 10.0, 7.0, 4.0, 8.0, 6.0, 8.0, 5.0, 1.0, 1.0, 3.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-18.796875, -18.2529296875, -17.708984375, -17.1650390625, -16.62109375, -16.0771484375, -15.533203125, -14.9892578125, -14.4453125, -13.9013671875, -13.357421875, -12.8134765625, -12.26953125, -11.7255859375, -11.181640625, -10.6376953125, -10.09375, -9.5498046875, -9.005859375, -8.4619140625, -7.91796875, -7.3740234375, -6.830078125, -6.2861328125, -5.7421875, -5.1982421875, -4.654296875, -4.1103515625, -3.56640625, -3.0224609375, -2.478515625, -1.9345703125, -1.390625, -0.8466796875, -0.302734375, 0.2412109375, 0.78515625, 1.3291015625, 1.873046875, 2.4169921875, 2.9609375, 3.5048828125, 4.048828125, 4.5927734375, 5.13671875, 5.6806640625, 6.224609375, 6.7685546875, 7.3125, 7.8564453125, 8.400390625, 8.9443359375, 9.48828125, 10.0322265625, 10.576171875, 11.1201171875, 11.6640625, 12.2080078125, 12.751953125, 13.2958984375, 13.83984375, 14.3837890625, 14.927734375, 15.4716796875, 16.015625]}, "gradients/decoder.transformer.h.14.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 4.0, 4.0, 4.0, 6.0, 10.0, 9.0, 8.0, 12.0, 15.0, 17.0, 28.0, 40.0, 58.0, 63.0, 91.0, 127.0, 212.0, 374.0, 817.0, 3064.0, 20794.0, 452268.0, 2596508.0, 62084.0, 6305.0, 1409.0, 467.0, 276.0, 172.0, 108.0, 77.0, 59.0, 42.0, 44.0, 31.0, 29.0, 21.0, 15.0, 8.0, 5.0, 6.0, 7.0, 6.0, 4.0, 0.0, 4.0, 4.0, 1.0, 0.0, 2.0, 1.0, 1.0, 0.0, 2.0], "bins": [-35.0625, -33.97412109375, -32.8857421875, -31.79736328125, -30.708984375, -29.62060546875, -28.5322265625, -27.44384765625, -26.35546875, -25.26708984375, -24.1787109375, -23.09033203125, -22.001953125, -20.91357421875, -19.8251953125, -18.73681640625, -17.6484375, -16.56005859375, -15.4716796875, -14.38330078125, -13.294921875, -12.20654296875, -11.1181640625, -10.02978515625, -8.94140625, -7.85302734375, -6.7646484375, -5.67626953125, -4.587890625, -3.49951171875, -2.4111328125, -1.32275390625, -0.234375, 0.85400390625, 1.9423828125, 3.03076171875, 4.119140625, 5.20751953125, 6.2958984375, 7.38427734375, 8.47265625, 9.56103515625, 10.6494140625, 11.73779296875, 12.826171875, 13.91455078125, 15.0029296875, 16.09130859375, 17.1796875, 18.26806640625, 19.3564453125, 20.44482421875, 21.533203125, 22.62158203125, 23.7099609375, 24.79833984375, 25.88671875, 26.97509765625, 28.0634765625, 29.15185546875, 30.240234375, 31.32861328125, 32.4169921875, 33.50537109375, 34.59375]}, "gradients/decoder.transformer.h.14.ln_1.weight": {"_type": "histogram", "values": [867.0, 149.0, 1.0, 6.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-21.857406616210938, 4.371376037597656, 30.60015869140625, 56.828941345214844, 83.05772399902344, 109.28651428222656, 135.51528930664062, 161.7440643310547, 187.9728546142578, 214.20162963867188, 240.430419921875, 266.6592102050781, 292.88800048828125, 319.11676025390625, 345.3455505371094, 371.5743408203125, 397.8031005859375, 424.0318908691406, 450.26068115234375, 476.48944091796875, 502.7182312011719, 528.947021484375, 555.17578125, 581.404541015625, 607.6333618164062, 633.8621215820312, 660.0909423828125, 686.3197021484375, 712.5484619140625, 738.7772827148438, 765.0060424804688, 791.23486328125, 817.463623046875, 843.6923828125, 869.9212036132812, 896.1499633789062, 922.3787841796875, 948.6075439453125, 974.8363037109375, 1001.0650634765625, 1027.2939453125, 1053.522705078125, 1079.75146484375, 1105.9803466796875, 1132.2091064453125, 1158.4378662109375, 1184.6666259765625, 1210.8953857421875, 1237.1241455078125, 1263.3529052734375, 1289.5816650390625, 1315.810546875, 1342.039306640625, 1368.26806640625, 1394.496826171875, 1420.7255859375, 1446.954345703125, 1473.18310546875, 1499.411865234375, 1525.6407470703125, 1551.8695068359375, 1578.0982666015625, 1604.3270263671875, 1630.5557861328125, 1656.78466796875]}, "gradients/decoder.transformer.h.14.ln_1.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 1.0, 2.0, 4.0, 3.0, 8.0, 5.0, 2.0, 8.0, 9.0, 9.0, 7.0, 17.0, 20.0, 22.0, 26.0, 18.0, 22.0, 21.0, 37.0, 35.0, 45.0, 53.0, 41.0, 54.0, 47.0, 40.0, 43.0, 35.0, 42.0, 49.0, 45.0, 41.0, 34.0, 29.0, 19.0, 29.0, 19.0, 15.0, 10.0, 12.0, 12.0, 7.0, 4.0, 6.0, 2.0, 4.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0], "bins": [-57.5391845703125, -55.54476547241211, -53.550350189208984, -51.555931091308594, -49.5615119934082, -47.56709289550781, -45.57267761230469, -43.5782585144043, -41.583839416503906, -39.589420318603516, -37.59500503540039, -35.6005859375, -33.60616683959961, -31.61174964904785, -29.617332458496094, -27.622913360595703, -25.628498077392578, -23.63408088684082, -21.63966178894043, -19.645244598388672, -17.65082550048828, -15.656408309936523, -13.661991119384766, -11.667572975158691, -9.673154830932617, -7.678736686706543, -5.684319019317627, -3.689901351928711, -1.6954832077026367, 0.2989349365234375, 2.2933521270751953, 4.2877702713012695, 6.282188415527344, 8.276606559753418, 10.271024703979492, 12.26544189453125, 14.259860038757324, 16.2542781829834, 18.248695373535156, 20.243114471435547, 22.237531661987305, 24.231948852539062, 26.226367950439453, 28.22078514099121, 30.21520233154297, 32.20962142944336, 34.20404052734375, 36.198455810546875, 38.192874908447266, 40.187294006347656, 42.18170928955078, 44.17612838745117, 46.17054748535156, 48.16496276855469, 50.15938186645508, 52.15380096435547, 54.148216247558594, 56.142635345458984, 58.13705062866211, 60.1314697265625, 62.12588882446289, 64.12030792236328, 66.1147232055664, 68.10913848876953, 70.10356140136719]}, "gradients/decoder.transformer.h.13.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 1.0, 3.0, 2.0, 3.0, 8.0, 5.0, 6.0, 6.0, 11.0, 7.0, 10.0, 20.0, 11.0, 20.0, 24.0, 27.0, 30.0, 30.0, 43.0, 46.0, 35.0, 45.0, 30.0, 44.0, 34.0, 39.0, 53.0, 37.0, 37.0, 37.0, 31.0, 24.0, 34.0, 44.0, 24.0, 26.0, 22.0, 19.0, 15.0, 9.0, 12.0, 13.0, 7.0, 6.0, 7.0, 3.0, 4.0, 4.0, 3.0, 1.0, 2.0, 2.0, 1.0, 0.0, 0.0, 1.0, 1.0], "bins": [-5.6640625, -5.488037109375, -5.31201171875, -5.135986328125, -4.9599609375, -4.783935546875, -4.60791015625, -4.431884765625, -4.255859375, -4.079833984375, -3.90380859375, -3.727783203125, -3.5517578125, -3.375732421875, -3.19970703125, -3.023681640625, -2.84765625, -2.671630859375, -2.49560546875, -2.319580078125, -2.1435546875, -1.967529296875, -1.79150390625, -1.615478515625, -1.439453125, -1.263427734375, -1.08740234375, -0.911376953125, -0.7353515625, -0.559326171875, -0.38330078125, -0.207275390625, -0.03125, 0.144775390625, 0.32080078125, 0.496826171875, 0.6728515625, 0.848876953125, 1.02490234375, 1.200927734375, 1.376953125, 1.552978515625, 1.72900390625, 1.905029296875, 2.0810546875, 2.257080078125, 2.43310546875, 2.609130859375, 2.78515625, 2.961181640625, 3.13720703125, 3.313232421875, 3.4892578125, 3.665283203125, 3.84130859375, 4.017333984375, 4.193359375, 4.369384765625, 4.54541015625, 4.721435546875, 4.8974609375, 5.073486328125, 5.24951171875, 5.425537109375, 5.6015625]}, "gradients/decoder.transformer.h.13.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 0.0, 3.0, 1.0, 2.0, 1.0, 1.0, 3.0, 5.0, 6.0, 3.0, 7.0, 9.0, 9.0, 12.0, 9.0, 17.0, 19.0, 24.0, 17.0, 41.0, 47.0, 79.0, 117.0, 264.0, 622.0, 1510.0, 4448.0, 14387.0, 59936.0, 407197.0, 2664957.0, 903477.0, 104633.0, 22296.0, 6337.0, 2158.0, 841.0, 330.0, 143.0, 79.0, 53.0, 37.0, 26.0, 25.0, 17.0, 20.0, 7.0, 10.0, 6.0, 11.0, 7.0, 5.0, 4.0, 6.0, 3.0, 3.0, 5.0, 5.0, 0.0, 1.0, 3.0], "bins": [-14.515625, -14.08447265625, -13.6533203125, -13.22216796875, -12.791015625, -12.35986328125, -11.9287109375, -11.49755859375, -11.06640625, -10.63525390625, -10.2041015625, -9.77294921875, -9.341796875, -8.91064453125, -8.4794921875, -8.04833984375, -7.6171875, -7.18603515625, -6.7548828125, -6.32373046875, -5.892578125, -5.46142578125, -5.0302734375, -4.59912109375, -4.16796875, -3.73681640625, -3.3056640625, -2.87451171875, -2.443359375, -2.01220703125, -1.5810546875, -1.14990234375, -0.71875, -0.28759765625, 0.1435546875, 0.57470703125, 1.005859375, 1.43701171875, 1.8681640625, 2.29931640625, 2.73046875, 3.16162109375, 3.5927734375, 4.02392578125, 4.455078125, 4.88623046875, 5.3173828125, 5.74853515625, 6.1796875, 6.61083984375, 7.0419921875, 7.47314453125, 7.904296875, 8.33544921875, 8.7666015625, 9.19775390625, 9.62890625, 10.06005859375, 10.4912109375, 10.92236328125, 11.353515625, 11.78466796875, 12.2158203125, 12.64697265625, 13.078125]}, "gradients/decoder.transformer.h.13.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 4.0, 8.0, 9.0, 11.0, 14.0, 24.0, 35.0, 64.0, 118.0, 214.0, 344.0, 617.0, 896.0, 689.0, 391.0, 259.0, 152.0, 100.0, 42.0, 26.0, 28.0, 8.0, 6.0, 14.0, 4.0, 7.0, 2.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-16.015625, -15.389404296875, -14.76318359375, -14.136962890625, -13.5107421875, -12.884521484375, -12.25830078125, -11.632080078125, -11.005859375, -10.379638671875, -9.75341796875, -9.127197265625, -8.5009765625, -7.874755859375, -7.24853515625, -6.622314453125, -5.99609375, -5.369873046875, -4.74365234375, -4.117431640625, -3.4912109375, -2.864990234375, -2.23876953125, -1.612548828125, -0.986328125, -0.360107421875, 0.26611328125, 0.892333984375, 1.5185546875, 2.144775390625, 2.77099609375, 3.397216796875, 4.0234375, 4.649658203125, 5.27587890625, 5.902099609375, 6.5283203125, 7.154541015625, 7.78076171875, 8.406982421875, 9.033203125, 9.659423828125, 10.28564453125, 10.911865234375, 11.5380859375, 12.164306640625, 12.79052734375, 13.416748046875, 14.04296875, 14.669189453125, 15.29541015625, 15.921630859375, 16.5478515625, 17.174072265625, 17.80029296875, 18.426513671875, 19.052734375, 19.678955078125, 20.30517578125, 20.931396484375, 21.5576171875, 22.183837890625, 22.81005859375, 23.436279296875, 24.0625]}, "gradients/decoder.transformer.h.13.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 6.0, 6.0, 6.0, 12.0, 19.0, 26.0, 26.0, 56.0, 111.0, 166.0, 362.0, 953.0, 6613.0, 449513.0, 3702046.0, 31426.0, 1980.0, 512.0, 224.0, 107.0, 51.0, 23.0, 16.0, 12.0, 9.0, 5.0, 6.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-70.875, -69.02099609375, -67.1669921875, -65.31298828125, -63.458984375, -61.60498046875, -59.7509765625, -57.89697265625, -56.04296875, -54.18896484375, -52.3349609375, -50.48095703125, -48.626953125, -46.77294921875, -44.9189453125, -43.06494140625, -41.2109375, -39.35693359375, -37.5029296875, -35.64892578125, -33.794921875, -31.94091796875, -30.0869140625, -28.23291015625, -26.37890625, -24.52490234375, -22.6708984375, -20.81689453125, -18.962890625, -17.10888671875, -15.2548828125, -13.40087890625, -11.546875, -9.69287109375, -7.8388671875, -5.98486328125, -4.130859375, -2.27685546875, -0.4228515625, 1.43115234375, 3.28515625, 5.13916015625, 6.9931640625, 8.84716796875, 10.701171875, 12.55517578125, 14.4091796875, 16.26318359375, 18.1171875, 19.97119140625, 21.8251953125, 23.67919921875, 25.533203125, 27.38720703125, 29.2412109375, 31.09521484375, 32.94921875, 34.80322265625, 36.6572265625, 38.51123046875, 40.365234375, 42.21923828125, 44.0732421875, 45.92724609375, 47.78125]}, "gradients/decoder.transformer.h.13.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 4.0, 18.0, 73.0, 264.0, 352.0, 230.0, 61.0, 12.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-158.41964721679688, -151.3292694091797, -144.2388916015625, -137.14849853515625, -130.05812072753906, -122.96774291992188, -115.87736511230469, -108.7869873046875, -101.69660186767578, -94.6062240600586, -87.51583862304688, -80.42546081542969, -73.3350830078125, -66.24469757080078, -59.154319763183594, -52.06393814086914, -44.97355651855469, -37.883174896240234, -30.792795181274414, -23.702415466308594, -16.61203384399414, -9.521652221679688, -2.4312744140625, 4.659107208251953, 11.749488830566406, 18.83987045288086, 25.93025016784668, 33.0206298828125, 40.11101150512695, 47.201393127441406, 54.291770935058594, 61.38215255737305, 68.4725341796875, 75.56291198730469, 82.6532974243164, 89.7436752319336, 96.83406066894531, 103.9244384765625, 111.01481628417969, 118.10519409179688, 125.1955795288086, 132.2859649658203, 139.3763427734375, 146.4667205810547, 153.55709838867188, 160.64749145507812, 167.73785400390625, 174.8282470703125, 181.9186248779297, 189.00900268554688, 196.09938049316406, 203.18975830078125, 210.2801513671875, 217.3705291748047, 224.46090698242188, 231.55128479003906, 238.64166259765625, 245.73204040527344, 252.82241821289062, 259.9128112792969, 267.003173828125, 274.09356689453125, 281.1839599609375, 288.2743225097656, 295.3647155761719]}, "gradients/decoder.transformer.h.13.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 2.0, 0.0, 2.0, 1.0, 1.0, 2.0, 1.0, 3.0, 4.0, 5.0, 6.0, 8.0, 7.0, 11.0, 12.0, 19.0, 13.0, 22.0, 20.0, 30.0, 22.0, 33.0, 30.0, 44.0, 39.0, 29.0, 36.0, 34.0, 37.0, 50.0, 51.0, 54.0, 52.0, 43.0, 45.0, 35.0, 36.0, 24.0, 31.0, 15.0, 20.0, 22.0, 16.0, 7.0, 5.0, 9.0, 5.0, 8.0, 6.0, 1.0, 2.0, 3.0, 0.0, 3.0, 2.0, 1.0, 1.0, 0.0, 1.0], "bins": [-48.409912109375, -46.95657730102539, -45.50324249267578, -44.04991149902344, -42.59657669067383, -41.14324188232422, -39.689910888671875, -38.236576080322266, -36.783241271972656, -35.32990646362305, -33.87657165527344, -32.423240661621094, -30.969905853271484, -29.516571044921875, -28.0632381439209, -26.609905242919922, -25.156570434570312, -23.703235626220703, -22.249902725219727, -20.79656982421875, -19.34323501586914, -17.88990020751953, -16.436567306518555, -14.983233451843262, -13.529899597167969, -12.076565742492676, -10.623231887817383, -9.16989803314209, -7.716564178466797, -6.263230323791504, -4.809896469116211, -3.356562614440918, -1.903228759765625, -0.44989490509033203, 1.003438949584961, 2.456772804260254, 3.910106658935547, 5.36344051361084, 6.816774368286133, 8.270108222961426, 9.723442077636719, 11.176775932312012, 12.630109786987305, 14.083443641662598, 15.53677749633789, 16.9901123046875, 18.443445205688477, 19.896778106689453, 21.350112915039062, 22.803447723388672, 24.25678062438965, 25.710113525390625, 27.163448333740234, 28.616783142089844, 30.07011604309082, 31.523448944091797, 32.976783752441406, 34.430118560791016, 35.883453369140625, 37.33678436279297, 38.79011917114258, 40.24345397949219, 41.69678497314453, 43.15011978149414, 44.60345458984375]}, "gradients/decoder.transformer.h.13.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 2.0, 1.0, 2.0, 2.0, 4.0, 8.0, 3.0, 13.0, 6.0, 10.0, 12.0, 16.0, 14.0, 18.0, 27.0, 26.0, 38.0, 38.0, 33.0, 51.0, 45.0, 36.0, 37.0, 45.0, 44.0, 47.0, 27.0, 43.0, 39.0, 39.0, 39.0, 32.0, 33.0, 34.0, 31.0, 18.0, 23.0, 13.0, 14.0, 8.0, 6.0, 15.0, 8.0, 3.0, 4.0, 0.0, 2.0, 3.0, 1.0, 3.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.36328125, -5.1763916015625, -4.989501953125, -4.8026123046875, -4.61572265625, -4.4288330078125, -4.241943359375, -4.0550537109375, -3.8681640625, -3.6812744140625, -3.494384765625, -3.3074951171875, -3.12060546875, -2.9337158203125, -2.746826171875, -2.5599365234375, -2.373046875, -2.1861572265625, -1.999267578125, -1.8123779296875, -1.62548828125, -1.4385986328125, -1.251708984375, -1.0648193359375, -0.8779296875, -0.6910400390625, -0.504150390625, -0.3172607421875, -0.13037109375, 0.0565185546875, 0.243408203125, 0.4302978515625, 0.6171875, 0.8040771484375, 0.990966796875, 1.1778564453125, 1.36474609375, 1.5516357421875, 1.738525390625, 1.9254150390625, 2.1123046875, 2.2991943359375, 2.486083984375, 2.6729736328125, 2.85986328125, 3.0467529296875, 3.233642578125, 3.4205322265625, 3.607421875, 3.7943115234375, 3.981201171875, 4.1680908203125, 4.35498046875, 4.5418701171875, 4.728759765625, 4.9156494140625, 5.1025390625, 5.2894287109375, 5.476318359375, 5.6632080078125, 5.85009765625, 6.0369873046875, 6.223876953125, 6.4107666015625, 6.59765625]}, "gradients/decoder.transformer.h.13.crossattention.c_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 3.0, 1.0, 1.0, 10.0, 10.0, 16.0, 12.0, 32.0, 33.0, 47.0, 89.0, 122.0, 174.0, 251.0, 356.0, 560.0, 768.0, 1168.0, 1714.0, 2630.0, 3960.0, 6181.0, 9555.0, 15621.0, 25408.0, 43467.0, 76827.0, 147676.0, 346009.0, 162664.0, 83203.0, 46675.0, 27193.0, 16580.0, 10418.0, 6466.0, 4196.0, 2777.0, 1835.0, 1209.0, 836.0, 537.0, 392.0, 296.0, 190.0, 116.0, 86.0, 52.0, 51.0, 28.0, 27.0, 17.0, 8.0, 5.0, 4.0, 5.0, 0.0, 2.0, 1.0, 3.0], "bins": [-0.2210693359375, -0.2142810821533203, -0.20749282836914062, -0.20070457458496094, -0.19391632080078125, -0.18712806701660156, -0.18033981323242188, -0.1735515594482422, -0.1667633056640625, -0.1599750518798828, -0.15318679809570312, -0.14639854431152344, -0.13961029052734375, -0.13282203674316406, -0.12603378295898438, -0.11924552917480469, -0.112457275390625, -0.10566902160644531, -0.09888076782226562, -0.09209251403808594, -0.08530426025390625, -0.07851600646972656, -0.07172775268554688, -0.06493949890136719, -0.0581512451171875, -0.05136299133300781, -0.044574737548828125, -0.03778648376464844, -0.03099822998046875, -0.024209976196289062, -0.017421722412109375, -0.010633468627929688, -0.00384521484375, 0.0029430389404296875, 0.009731292724609375, 0.016519546508789062, 0.02330780029296875, 0.030096054077148438, 0.036884307861328125, 0.04367256164550781, 0.0504608154296875, 0.05724906921386719, 0.06403732299804688, 0.07082557678222656, 0.07761383056640625, 0.08440208435058594, 0.09119033813476562, 0.09797859191894531, 0.104766845703125, 0.11155509948730469, 0.11834335327148438, 0.12513160705566406, 0.13191986083984375, 0.13870811462402344, 0.14549636840820312, 0.1522846221923828, 0.1590728759765625, 0.1658611297607422, 0.17264938354492188, 0.17943763732910156, 0.18622589111328125, 0.19301414489746094, 0.19980239868164062, 0.2065906524658203, 0.21337890625]}, "gradients/decoder.transformer.h.13.crossattention.c_attn.bias": {"_type": "histogram", "values": [2.0, 1.0, 2.0, 0.0, 0.0, 1.0, 2.0, 1.0, 1.0, 3.0, 2.0, 6.0, 4.0, 5.0, 8.0, 10.0, 14.0, 12.0, 10.0, 18.0, 15.0, 17.0, 25.0, 14.0, 31.0, 23.0, 31.0, 35.0, 20.0, 34.0, 29.0, 31.0, 36.0, 49.0, 1062.0, 48.0, 37.0, 42.0, 42.0, 41.0, 33.0, 34.0, 28.0, 25.0, 24.0, 22.0, 20.0, 16.0, 16.0, 10.0, 16.0, 4.0, 5.0, 6.0, 7.0, 4.0, 1.0, 2.0, 2.0, 3.0, 3.0, 0.0, 0.0, 3.0], "bins": [-4.43359375, -4.303375244140625, -4.17315673828125, -4.042938232421875, -3.9127197265625, -3.782501220703125, -3.65228271484375, -3.522064208984375, -3.391845703125, -3.261627197265625, -3.13140869140625, -3.001190185546875, -2.8709716796875, -2.740753173828125, -2.61053466796875, -2.480316162109375, -2.35009765625, -2.219879150390625, -2.08966064453125, -1.959442138671875, -1.8292236328125, -1.699005126953125, -1.56878662109375, -1.438568115234375, -1.308349609375, -1.178131103515625, -1.04791259765625, -0.917694091796875, -0.7874755859375, -0.657257080078125, -0.52703857421875, -0.396820068359375, -0.2666015625, -0.136383056640625, -0.00616455078125, 0.124053955078125, 0.2542724609375, 0.384490966796875, 0.51470947265625, 0.644927978515625, 0.775146484375, 0.905364990234375, 1.03558349609375, 1.165802001953125, 1.2960205078125, 1.426239013671875, 1.55645751953125, 1.686676025390625, 1.81689453125, 1.947113037109375, 2.07733154296875, 2.207550048828125, 2.3377685546875, 2.467987060546875, 2.59820556640625, 2.728424072265625, 2.858642578125, 2.988861083984375, 3.11907958984375, 3.249298095703125, 3.3795166015625, 3.509735107421875, 3.63995361328125, 3.770172119140625, 3.900390625]}, "gradients/decoder.transformer.h.13.crossattention.c_attn.weight": {"_type": "histogram", "values": [2.0, 2.0, 4.0, 4.0, 10.0, 16.0, 21.0, 13.0, 26.0, 51.0, 77.0, 99.0, 120.0, 192.0, 237.0, 338.0, 449.0, 634.0, 804.0, 1213.0, 1660.0, 2277.0, 3153.0, 4494.0, 6252.0, 8777.0, 12713.0, 18385.0, 27187.0, 40862.0, 63250.0, 101631.0, 227090.0, 1279108.0, 100879.0, 63300.0, 41394.0, 27611.0, 18657.0, 12935.0, 9001.0, 6162.0, 4425.0, 3279.0, 2277.0, 1643.0, 1222.0, 912.0, 618.0, 469.0, 353.0, 230.0, 153.0, 128.0, 109.0, 73.0, 40.0, 44.0, 33.0, 19.0, 13.0, 12.0, 5.0, 5.0], "bins": [-0.10711669921875, -0.10387134552001953, -0.10062599182128906, -0.0973806381225586, -0.09413528442382812, -0.09088993072509766, -0.08764457702636719, -0.08439922332763672, -0.08115386962890625, -0.07790851593017578, -0.07466316223144531, -0.07141780853271484, -0.06817245483398438, -0.0649271011352539, -0.06168174743652344, -0.05843639373779297, -0.0551910400390625, -0.05194568634033203, -0.04870033264160156, -0.045454978942871094, -0.042209625244140625, -0.038964271545410156, -0.03571891784667969, -0.03247356414794922, -0.02922821044921875, -0.02598285675048828, -0.022737503051757812, -0.019492149353027344, -0.016246795654296875, -0.013001441955566406, -0.009756088256835938, -0.006510734558105469, -0.003265380859375, -2.002716064453125e-05, 0.0032253265380859375, 0.006470680236816406, 0.009716033935546875, 0.012961387634277344, 0.016206741333007812, 0.01945209503173828, 0.02269744873046875, 0.02594280242919922, 0.029188156127929688, 0.032433509826660156, 0.035678863525390625, 0.038924217224121094, 0.04216957092285156, 0.04541492462158203, 0.0486602783203125, 0.05190563201904297, 0.05515098571777344, 0.058396339416503906, 0.061641693115234375, 0.06488704681396484, 0.06813240051269531, 0.07137775421142578, 0.07462310791015625, 0.07786846160888672, 0.08111381530761719, 0.08435916900634766, 0.08760452270507812, 0.0908498764038086, 0.09409523010253906, 0.09734058380126953, 0.1005859375]}, "gradients/decoder.transformer.h.13.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 5.0, 3.0, 2.0, 1.0, 9.0, 11.0, 4.0, 7.0, 11.0, 18.0, 23.0, 30.0, 18.0, 36.0, 36.0, 42.0, 40.0, 90.0, 145.0, 115.0, 77.0, 43.0, 47.0, 33.0, 35.0, 33.0, 31.0, 7.0, 14.0, 13.0, 4.0, 10.0, 2.0, 7.0, 1.0, 3.0, 4.0, 3.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-6.616115570068359e-06, -6.441958248615265e-06, -6.26780092716217e-06, -6.093643605709076e-06, -5.9194862842559814e-06, -5.745328962802887e-06, -5.5711716413497925e-06, -5.397014319896698e-06, -5.2228569984436035e-06, -5.048699676990509e-06, -4.8745423555374146e-06, -4.70038503408432e-06, -4.526227712631226e-06, -4.352070391178131e-06, -4.177913069725037e-06, -4.003755748271942e-06, -3.829598426818848e-06, -3.655441105365753e-06, -3.4812837839126587e-06, -3.307126462459564e-06, -3.1329691410064697e-06, -2.9588118195533752e-06, -2.7846544981002808e-06, -2.6104971766471863e-06, -2.436339855194092e-06, -2.2621825337409973e-06, -2.088025212287903e-06, -1.9138678908348083e-06, -1.7397105693817139e-06, -1.5655532479286194e-06, -1.391395926475525e-06, -1.2172386050224304e-06, -1.043081283569336e-06, -8.689239621162415e-07, -6.94766640663147e-07, -5.206093192100525e-07, -3.46451997756958e-07, -1.7229467630386353e-07, 1.862645149230957e-09, 1.7601996660232544e-07, 3.501772880554199e-07, 5.243346095085144e-07, 6.984919309616089e-07, 8.726492524147034e-07, 1.0468065738677979e-06, 1.2209638953208923e-06, 1.3951212167739868e-06, 1.5692785382270813e-06, 1.7434358596801758e-06, 1.9175931811332703e-06, 2.0917505025863647e-06, 2.2659078240394592e-06, 2.4400651454925537e-06, 2.614222466945648e-06, 2.7883797883987427e-06, 2.962537109851837e-06, 3.1366944313049316e-06, 3.310851752758026e-06, 3.4850090742111206e-06, 3.659166395664215e-06, 3.8333237171173096e-06, 4.007481038570404e-06, 4.1816383600234985e-06, 4.355795681476593e-06, 4.5299530029296875e-06]}, "gradients/decoder.transformer.h.13.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 3.0, 7.0, 4.0, 4.0, 6.0, 7.0, 9.0, 9.0, 11.0, 33.0, 35.0, 43.0, 58.0, 120.0, 345.0, 3067.0, 318219.0, 720828.0, 4936.0, 392.0, 163.0, 72.0, 43.0, 34.0, 30.0, 19.0, 19.0, 11.0, 4.0, 10.0, 5.0, 7.0, 4.0, 5.0, 3.0, 0.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-8.64267349243164e-05, -8.29789787530899e-05, -7.95312225818634e-05, -7.60834664106369e-05, -7.26357102394104e-05, -6.91879540681839e-05, -6.57401978969574e-05, -6.22924417257309e-05, -5.8844685554504395e-05, -5.539692938327789e-05, -5.194917321205139e-05, -4.850141704082489e-05, -4.505366086959839e-05, -4.160590469837189e-05, -3.8158148527145386e-05, -3.4710392355918884e-05, -3.126263618469238e-05, -2.781488001346588e-05, -2.436712384223938e-05, -2.091936767101288e-05, -1.7471611499786377e-05, -1.4023855328559875e-05, -1.0576099157333374e-05, -7.1283429861068726e-06, -3.680586814880371e-06, -2.3283064365386963e-07, 3.214925527572632e-06, 6.662681698799133e-06, 1.0110437870025635e-05, 1.3558194041252136e-05, 1.7005950212478638e-05, 2.045370638370514e-05, 2.390146255493164e-05, 2.7349218726158142e-05, 3.0796974897384644e-05, 3.4244731068611145e-05, 3.7692487239837646e-05, 4.114024341106415e-05, 4.458799958229065e-05, 4.803575575351715e-05, 5.148351192474365e-05, 5.4931268095970154e-05, 5.8379024267196655e-05, 6.182678043842316e-05, 6.527453660964966e-05, 6.872229278087616e-05, 7.217004895210266e-05, 7.561780512332916e-05, 7.906556129455566e-05, 8.251331746578217e-05, 8.596107363700867e-05, 8.940882980823517e-05, 9.285658597946167e-05, 9.630434215068817e-05, 9.975209832191467e-05, 0.00010319985449314117, 0.00010664761066436768, 0.00011009536683559418, 0.00011354312300682068, 0.00011699087917804718, 0.00012043863534927368, 0.00012388639152050018, 0.00012733414769172668, 0.00013078190386295319, 0.0001342296600341797]}, "gradients/decoder.transformer.h.13.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 5.0, 2.0, 11.0, 35.0, 94.0, 222.0, 296.0, 207.0, 103.0, 23.0, 12.0, 6.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-1.7903881598613225e-05, -1.7562721041031182e-05, -1.7221558664459735e-05, -1.6880398106877692e-05, -1.6539235730306245e-05, -1.6198075172724202e-05, -1.5856912796152756e-05, -1.5515752238570713e-05, -1.5174589861999266e-05, -1.4833428394922521e-05, -1.4492266927845776e-05, -1.4151105460769031e-05, -1.3809943993692286e-05, -1.3468782526615541e-05, -1.3127621059538797e-05, -1.2786460501956753e-05, -1.2445299034880009e-05, -1.2104137567803264e-05, -1.1762976100726519e-05, -1.1421814633649774e-05, -1.1080653166573029e-05, -1.0739491699496284e-05, -1.0398331141914241e-05, -1.0057168765342794e-05, -9.716008207760751e-06, -9.374846740684006e-06, -9.033685273607261e-06, -8.692523806530517e-06, -8.351362339453772e-06, -8.010200872377027e-06, -7.669039405300282e-06, -7.327878392970888e-06, -6.986717380641494e-06, -6.645555913564749e-06, -6.304394446488004e-06, -5.963232979411259e-06, -5.622071512334514e-06, -5.2809100452577695e-06, -4.9397490329283755e-06, -4.598587565851631e-06, -4.257426098774886e-06, -3.916264631698141e-06, -3.575103164621396e-06, -3.2339419249183265e-06, -2.8927804578415817e-06, -2.5516189907648368e-06, -2.2104577510617673e-06, -1.8692962839850225e-06, -1.5281348169082776e-06, -1.1869733498315327e-06, -8.458119964416255e-07, -5.046506430517184e-07, -1.6348917597497348e-07, 1.776722911017714e-07, 5.188335308048408e-07, 8.599949978815857e-07, 1.2011564649583306e-06, 1.5423179320350755e-06, 1.8834792854249827e-06, 2.22464063881489e-06, 2.5658021058916347e-06, 2.9069635729683796e-06, 3.248124812671449e-06, 3.589286279748194e-06, 3.930447746824939e-06]}, "gradients/decoder.transformer.h.13.ln_cross_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 1.0, 1.0, 1.0, 2.0, 3.0, 2.0, 2.0, 2.0, 9.0, 8.0, 3.0, 6.0, 9.0, 10.0, 33.0, 22.0, 20.0, 20.0, 20.0, 26.0, 54.0, 23.0, 25.0, 28.0, 37.0, 28.0, 61.0, 32.0, 33.0, 42.0, 29.0, 72.0, 37.0, 19.0, 32.0, 26.0, 23.0, 50.0, 21.0, 24.0, 16.0, 17.0, 15.0, 18.0, 8.0, 10.0, 4.0, 4.0, 3.0, 8.0, 9.0, 0.0, 3.0, 2.0, 0.0, 4.0, 0.0, 2.0, 0.0, 1.0, 1.0], "bins": [-2.205371856689453e-06, -2.1355226635932922e-06, -2.0656734704971313e-06, -1.9958242774009705e-06, -1.9259750843048096e-06, -1.8561258912086487e-06, -1.7862766981124878e-06, -1.716427505016327e-06, -1.646578311920166e-06, -1.5767291188240051e-06, -1.5068799257278442e-06, -1.4370307326316833e-06, -1.3671815395355225e-06, -1.2973323464393616e-06, -1.2274831533432007e-06, -1.1576339602470398e-06, -1.087784767150879e-06, -1.017935574054718e-06, -9.480863809585571e-07, -8.782371878623962e-07, -8.083879947662354e-07, -7.385388016700745e-07, -6.686896085739136e-07, -5.988404154777527e-07, -5.289912223815918e-07, -4.591420292854309e-07, -3.8929283618927e-07, -3.1944364309310913e-07, -2.4959444999694824e-07, -1.7974525690078735e-07, -1.0989606380462646e-07, -4.0046870708465576e-08, 2.9802322387695312e-08, 9.96515154838562e-08, 1.695007085800171e-07, 2.39349901676178e-07, 3.0919909477233887e-07, 3.7904828786849976e-07, 4.4889748096466064e-07, 5.187466740608215e-07, 5.885958671569824e-07, 6.584450602531433e-07, 7.282942533493042e-07, 7.981434464454651e-07, 8.67992639541626e-07, 9.378418326377869e-07, 1.0076910257339478e-06, 1.0775402188301086e-06, 1.1473894119262695e-06, 1.2172386050224304e-06, 1.2870877981185913e-06, 1.3569369912147522e-06, 1.426786184310913e-06, 1.496635377407074e-06, 1.5664845705032349e-06, 1.6363337635993958e-06, 1.7061829566955566e-06, 1.7760321497917175e-06, 1.8458813428878784e-06, 1.9157305359840393e-06, 1.9855797290802e-06, 2.055428922176361e-06, 2.125278115272522e-06, 2.195127308368683e-06, 2.2649765014648438e-06]}, "gradients/decoder.transformer.h.13.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 2.0, 1.0, 2.0, 2.0, 4.0, 8.0, 3.0, 13.0, 6.0, 10.0, 12.0, 16.0, 14.0, 18.0, 27.0, 26.0, 38.0, 38.0, 33.0, 51.0, 45.0, 36.0, 37.0, 45.0, 44.0, 47.0, 27.0, 43.0, 39.0, 39.0, 39.0, 32.0, 33.0, 34.0, 31.0, 18.0, 23.0, 13.0, 14.0, 8.0, 6.0, 15.0, 8.0, 3.0, 4.0, 0.0, 2.0, 3.0, 1.0, 3.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.36328125, -5.1763916015625, -4.989501953125, -4.8026123046875, -4.61572265625, -4.4288330078125, -4.241943359375, -4.0550537109375, -3.8681640625, -3.6812744140625, -3.494384765625, -3.3074951171875, -3.12060546875, -2.9337158203125, -2.746826171875, -2.5599365234375, -2.373046875, -2.1861572265625, -1.999267578125, -1.8123779296875, -1.62548828125, -1.4385986328125, -1.251708984375, -1.0648193359375, -0.8779296875, -0.6910400390625, -0.504150390625, -0.3172607421875, -0.13037109375, 0.0565185546875, 0.243408203125, 0.4302978515625, 0.6171875, 0.8040771484375, 0.990966796875, 1.1778564453125, 1.36474609375, 1.5516357421875, 1.738525390625, 1.9254150390625, 2.1123046875, 2.2991943359375, 2.486083984375, 2.6729736328125, 2.85986328125, 3.0467529296875, 3.233642578125, 3.4205322265625, 3.607421875, 3.7943115234375, 3.981201171875, 4.1680908203125, 4.35498046875, 4.5418701171875, 4.728759765625, 4.9156494140625, 5.1025390625, 5.2894287109375, 5.476318359375, 5.6632080078125, 5.85009765625, 6.0369873046875, 6.223876953125, 6.4107666015625, 6.59765625]}, "gradients/decoder.transformer.h.13.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 5.0, 1.0, 2.0, 0.0, 3.0, 5.0, 7.0, 6.0, 14.0, 18.0, 27.0, 36.0, 73.0, 103.0, 143.0, 193.0, 309.0, 491.0, 699.0, 1041.0, 1675.0, 2672.0, 4237.0, 7240.0, 12336.0, 22247.0, 43538.0, 95910.0, 294428.0, 346835.0, 108301.0, 48092.0, 24251.0, 13289.0, 7716.0, 4610.0, 2811.0, 1807.0, 1131.0, 754.0, 496.0, 314.0, 246.0, 140.0, 103.0, 60.0, 53.0, 30.0, 23.0, 12.0, 13.0, 9.0, 5.0, 4.0, 2.0, 3.0, 2.0, 1.0], "bins": [-7.92578125, -7.69976806640625, -7.4737548828125, -7.24774169921875, -7.021728515625, -6.79571533203125, -6.5697021484375, -6.34368896484375, -6.11767578125, -5.89166259765625, -5.6656494140625, -5.43963623046875, -5.213623046875, -4.98760986328125, -4.7615966796875, -4.53558349609375, -4.3095703125, -4.08355712890625, -3.8575439453125, -3.63153076171875, -3.405517578125, -3.17950439453125, -2.9534912109375, -2.72747802734375, -2.50146484375, -2.27545166015625, -2.0494384765625, -1.82342529296875, -1.597412109375, -1.37139892578125, -1.1453857421875, -0.91937255859375, -0.693359375, -0.46734619140625, -0.2413330078125, -0.01531982421875, 0.210693359375, 0.43670654296875, 0.6627197265625, 0.88873291015625, 1.11474609375, 1.34075927734375, 1.5667724609375, 1.79278564453125, 2.018798828125, 2.24481201171875, 2.4708251953125, 2.69683837890625, 2.9228515625, 3.14886474609375, 3.3748779296875, 3.60089111328125, 3.826904296875, 4.05291748046875, 4.2789306640625, 4.50494384765625, 4.73095703125, 4.95697021484375, 5.1829833984375, 5.40899658203125, 5.635009765625, 5.86102294921875, 6.0870361328125, 6.31304931640625, 6.5390625]}, "gradients/decoder.transformer.h.13.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 1.0, 4.0, 0.0, 5.0, 7.0, 3.0, 6.0, 6.0, 10.0, 11.0, 11.0, 29.0, 26.0, 29.0, 30.0, 36.0, 41.0, 53.0, 72.0, 53.0, 103.0, 119.0, 294.0, 1459.0, 141.0, 100.0, 78.0, 59.0, 38.0, 43.0, 30.0, 32.0, 24.0, 22.0, 12.0, 14.0, 10.0, 9.0, 15.0, 6.0, 5.0, 5.0, 5.0, 3.0, 1.0, 0.0, 3.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-21.1875, -20.567138671875, -19.94677734375, -19.326416015625, -18.7060546875, -18.085693359375, -17.46533203125, -16.844970703125, -16.224609375, -15.604248046875, -14.98388671875, -14.363525390625, -13.7431640625, -13.122802734375, -12.50244140625, -11.882080078125, -11.26171875, -10.641357421875, -10.02099609375, -9.400634765625, -8.7802734375, -8.159912109375, -7.53955078125, -6.919189453125, -6.298828125, -5.678466796875, -5.05810546875, -4.437744140625, -3.8173828125, -3.197021484375, -2.57666015625, -1.956298828125, -1.3359375, -0.715576171875, -0.09521484375, 0.525146484375, 1.1455078125, 1.765869140625, 2.38623046875, 3.006591796875, 3.626953125, 4.247314453125, 4.86767578125, 5.488037109375, 6.1083984375, 6.728759765625, 7.34912109375, 7.969482421875, 8.58984375, 9.210205078125, 9.83056640625, 10.450927734375, 11.0712890625, 11.691650390625, 12.31201171875, 12.932373046875, 13.552734375, 14.173095703125, 14.79345703125, 15.413818359375, 16.0341796875, 16.654541015625, 17.27490234375, 17.895263671875, 18.515625]}, "gradients/decoder.transformer.h.13.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 1.0, 1.0, 2.0, 2.0, 2.0, 1.0, 5.0, 4.0, 7.0, 14.0, 19.0, 22.0, 43.0, 50.0, 85.0, 107.0, 175.0, 311.0, 566.0, 2682.0, 44509.0, 2903498.0, 185687.0, 5948.0, 917.0, 382.0, 221.0, 136.0, 114.0, 68.0, 44.0, 27.0, 26.0, 10.0, 6.0, 11.0, 5.0, 4.0, 3.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0], "bins": [-65.0625, -63.462890625, -61.86328125, -60.263671875, -58.6640625, -57.064453125, -55.46484375, -53.865234375, -52.265625, -50.666015625, -49.06640625, -47.466796875, -45.8671875, -44.267578125, -42.66796875, -41.068359375, -39.46875, -37.869140625, -36.26953125, -34.669921875, -33.0703125, -31.470703125, -29.87109375, -28.271484375, -26.671875, -25.072265625, -23.47265625, -21.873046875, -20.2734375, -18.673828125, -17.07421875, -15.474609375, -13.875, -12.275390625, -10.67578125, -9.076171875, -7.4765625, -5.876953125, -4.27734375, -2.677734375, -1.078125, 0.521484375, 2.12109375, 3.720703125, 5.3203125, 6.919921875, 8.51953125, 10.119140625, 11.71875, 13.318359375, 14.91796875, 16.517578125, 18.1171875, 19.716796875, 21.31640625, 22.916015625, 24.515625, 26.115234375, 27.71484375, 29.314453125, 30.9140625, 32.513671875, 34.11328125, 35.712890625, 37.3125]}, "gradients/decoder.transformer.h.13.ln_1.weight": {"_type": "histogram", "values": [491.0, 529.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-19.369287490844727, -0.0404510498046875, 19.28838539123535, 38.617225646972656, 57.94606018066406, 77.27489471435547, 96.6037368774414, 115.93256378173828, 135.2613983154297, 154.59024047851562, 173.9190673828125, 193.24790954589844, 212.57675170898438, 231.90557861328125, 251.2344207763672, 270.563232421875, 289.89208984375, 309.2209167480469, 328.5497741699219, 347.87860107421875, 367.2074279785156, 386.5362548828125, 405.8651123046875, 425.1939392089844, 444.5227966308594, 463.85162353515625, 483.18048095703125, 502.5093078613281, 521.838134765625, 541.1669921875, 560.495849609375, 579.8246459960938, 599.1535034179688, 618.4823608398438, 637.8111572265625, 657.1400146484375, 676.4688720703125, 695.7976684570312, 715.1265258789062, 734.455322265625, 753.7841796875, 773.113037109375, 792.4418334960938, 811.7706909179688, 831.0995483398438, 850.4283447265625, 869.7572021484375, 889.0860595703125, 908.4149169921875, 927.7437744140625, 947.0725708007812, 966.4014282226562, 985.7302856445312, 1005.05908203125, 1024.387939453125, 1043.716796875, 1063.0455322265625, 1082.3743896484375, 1101.7032470703125, 1121.031982421875, 1140.36083984375, 1159.689697265625, 1179.0185546875, 1198.347412109375, 1217.67626953125]}, "gradients/decoder.transformer.h.13.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 3.0, 2.0, 6.0, 2.0, 3.0, 9.0, 8.0, 8.0, 13.0, 19.0, 15.0, 20.0, 18.0, 18.0, 18.0, 42.0, 34.0, 39.0, 29.0, 25.0, 49.0, 45.0, 46.0, 61.0, 34.0, 43.0, 39.0, 48.0, 37.0, 36.0, 33.0, 25.0, 21.0, 32.0, 26.0, 20.0, 24.0, 12.0, 3.0, 15.0, 10.0, 7.0, 7.0, 5.0, 1.0, 2.0, 1.0, 4.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-56.122032165527344, -54.33345031738281, -52.54487228393555, -50.756290435791016, -48.96771240234375, -47.17913055419922, -45.39055252075195, -43.60197067260742, -41.813392639160156, -40.024810791015625, -38.23623275756836, -36.44765090942383, -34.65907287597656, -32.87049102783203, -31.081912994384766, -29.293331146240234, -27.504751205444336, -25.716171264648438, -23.92759132385254, -22.13901138305664, -20.350431442260742, -18.561851501464844, -16.773269653320312, -14.98469066619873, -13.196110725402832, -11.407530784606934, -9.618950843811035, -7.8303704261779785, -6.04179048538208, -4.253210067749023, -2.464630126953125, -0.6760501861572266, 1.1125297546386719, 2.9011096954345703, 4.689689636230469, 6.478270053863525, 8.266849517822266, 10.05543041229248, 11.844010353088379, 13.632590293884277, 15.421170234680176, 17.20975112915039, 18.99833106994629, 20.786911010742188, 22.575490951538086, 24.364070892333984, 26.152650833129883, 27.94123077392578, 29.72981071472168, 31.518390655517578, 33.30697250366211, 35.095550537109375, 36.884132385253906, 38.67271041870117, 40.4612922668457, 42.24987030029297, 44.0384521484375, 45.82703399658203, 47.6156120300293, 49.40419387817383, 51.192771911621094, 52.981353759765625, 54.76993179321289, 56.55851364135742, 58.34709167480469]}, "gradients/decoder.transformer.h.12.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 3.0, 3.0, 8.0, 6.0, 3.0, 11.0, 11.0, 10.0, 8.0, 17.0, 23.0, 22.0, 25.0, 26.0, 39.0, 34.0, 42.0, 50.0, 39.0, 43.0, 62.0, 34.0, 42.0, 47.0, 33.0, 36.0, 47.0, 43.0, 35.0, 31.0, 35.0, 32.0, 22.0, 18.0, 9.0, 8.0, 19.0, 9.0, 8.0, 6.0, 4.0, 4.0, 3.0, 1.0, 1.0, 1.0, 2.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.66796875, -5.4652099609375, -5.262451171875, -5.0596923828125, -4.85693359375, -4.6541748046875, -4.451416015625, -4.2486572265625, -4.0458984375, -3.8431396484375, -3.640380859375, -3.4376220703125, -3.23486328125, -3.0321044921875, -2.829345703125, -2.6265869140625, -2.423828125, -2.2210693359375, -2.018310546875, -1.8155517578125, -1.61279296875, -1.4100341796875, -1.207275390625, -1.0045166015625, -0.8017578125, -0.5989990234375, -0.396240234375, -0.1934814453125, 0.00927734375, 0.2120361328125, 0.414794921875, 0.6175537109375, 0.8203125, 1.0230712890625, 1.225830078125, 1.4285888671875, 1.63134765625, 1.8341064453125, 2.036865234375, 2.2396240234375, 2.4423828125, 2.6451416015625, 2.847900390625, 3.0506591796875, 3.25341796875, 3.4561767578125, 3.658935546875, 3.8616943359375, 4.064453125, 4.2672119140625, 4.469970703125, 4.6727294921875, 4.87548828125, 5.0782470703125, 5.281005859375, 5.4837646484375, 5.6865234375, 5.8892822265625, 6.092041015625, 6.2947998046875, 6.49755859375, 6.7003173828125, 6.903076171875, 7.1058349609375, 7.30859375]}, "gradients/decoder.transformer.h.12.mlp.c_proj.weight": {"_type": "histogram", "values": [3.0, 0.0, 0.0, 0.0, 0.0, 2.0, 6.0, 3.0, 3.0, 5.0, 3.0, 11.0, 4.0, 14.0, 27.0, 31.0, 52.0, 101.0, 125.0, 211.0, 313.0, 542.0, 868.0, 1629.0, 2881.0, 5563.0, 12145.0, 28355.0, 75816.0, 274193.0, 1282408.0, 1862651.0, 465296.0, 111418.0, 38243.0, 15794.0, 7322.0, 3732.0, 1937.0, 1024.0, 575.0, 338.0, 234.0, 121.0, 99.0, 71.0, 47.0, 20.0, 16.0, 17.0, 8.0, 7.0, 5.0, 3.0, 0.0, 3.0, 2.0, 2.0, 0.0, 2.0, 0.0, 1.0, 1.0, 1.0], "bins": [-8.796875, -8.5146484375, -8.232421875, -7.9501953125, -7.66796875, -7.3857421875, -7.103515625, -6.8212890625, -6.5390625, -6.2568359375, -5.974609375, -5.6923828125, -5.41015625, -5.1279296875, -4.845703125, -4.5634765625, -4.28125, -3.9990234375, -3.716796875, -3.4345703125, -3.15234375, -2.8701171875, -2.587890625, -2.3056640625, -2.0234375, -1.7412109375, -1.458984375, -1.1767578125, -0.89453125, -0.6123046875, -0.330078125, -0.0478515625, 0.234375, 0.5166015625, 0.798828125, 1.0810546875, 1.36328125, 1.6455078125, 1.927734375, 2.2099609375, 2.4921875, 2.7744140625, 3.056640625, 3.3388671875, 3.62109375, 3.9033203125, 4.185546875, 4.4677734375, 4.75, 5.0322265625, 5.314453125, 5.5966796875, 5.87890625, 6.1611328125, 6.443359375, 6.7255859375, 7.0078125, 7.2900390625, 7.572265625, 7.8544921875, 8.13671875, 8.4189453125, 8.701171875, 8.9833984375, 9.265625]}, "gradients/decoder.transformer.h.12.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 2.0, 3.0, 3.0, 5.0, 3.0, 6.0, 7.0, 15.0, 16.0, 32.0, 31.0, 44.0, 77.0, 177.0, 252.0, 436.0, 650.0, 768.0, 545.0, 334.0, 222.0, 153.0, 98.0, 61.0, 59.0, 31.0, 20.0, 14.0, 10.0, 5.0, 2.0, 2.0, 0.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-18.75, -18.2467041015625, -17.743408203125, -17.2401123046875, -16.73681640625, -16.2335205078125, -15.730224609375, -15.2269287109375, -14.7236328125, -14.2203369140625, -13.717041015625, -13.2137451171875, -12.71044921875, -12.2071533203125, -11.703857421875, -11.2005615234375, -10.697265625, -10.1939697265625, -9.690673828125, -9.1873779296875, -8.68408203125, -8.1807861328125, -7.677490234375, -7.1741943359375, -6.6708984375, -6.1676025390625, -5.664306640625, -5.1610107421875, -4.65771484375, -4.1544189453125, -3.651123046875, -3.1478271484375, -2.64453125, -2.1412353515625, -1.637939453125, -1.1346435546875, -0.63134765625, -0.1280517578125, 0.375244140625, 0.8785400390625, 1.3818359375, 1.8851318359375, 2.388427734375, 2.8917236328125, 3.39501953125, 3.8983154296875, 4.401611328125, 4.9049072265625, 5.408203125, 5.9114990234375, 6.414794921875, 6.9180908203125, 7.42138671875, 7.9246826171875, 8.427978515625, 8.9312744140625, 9.4345703125, 9.9378662109375, 10.441162109375, 10.9444580078125, 11.44775390625, 11.9510498046875, 12.454345703125, 12.9576416015625, 13.4609375]}, "gradients/decoder.transformer.h.12.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 4.0, 2.0, 0.0, 2.0, 7.0, 7.0, 20.0, 29.0, 37.0, 62.0, 99.0, 135.0, 228.0, 447.0, 1076.0, 4808.0, 68780.0, 3481536.0, 616310.0, 17344.0, 2058.0, 597.0, 285.0, 166.0, 93.0, 48.0, 35.0, 28.0, 10.0, 11.0, 11.0, 5.0, 5.0, 1.0, 4.0, 0.0, 4.0, 0.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-40.28125, -38.88134765625, -37.4814453125, -36.08154296875, -34.681640625, -33.28173828125, -31.8818359375, -30.48193359375, -29.08203125, -27.68212890625, -26.2822265625, -24.88232421875, -23.482421875, -22.08251953125, -20.6826171875, -19.28271484375, -17.8828125, -16.48291015625, -15.0830078125, -13.68310546875, -12.283203125, -10.88330078125, -9.4833984375, -8.08349609375, -6.68359375, -5.28369140625, -3.8837890625, -2.48388671875, -1.083984375, 0.31591796875, 1.7158203125, 3.11572265625, 4.515625, 5.91552734375, 7.3154296875, 8.71533203125, 10.115234375, 11.51513671875, 12.9150390625, 14.31494140625, 15.71484375, 17.11474609375, 18.5146484375, 19.91455078125, 21.314453125, 22.71435546875, 24.1142578125, 25.51416015625, 26.9140625, 28.31396484375, 29.7138671875, 31.11376953125, 32.513671875, 33.91357421875, 35.3134765625, 36.71337890625, 38.11328125, 39.51318359375, 40.9130859375, 42.31298828125, 43.712890625, 45.11279296875, 46.5126953125, 47.91259765625, 49.3125]}, "gradients/decoder.transformer.h.12.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 6.0, 21.0, 58.0, 85.0, 153.0, 208.0, 199.0, 148.0, 71.0, 45.0, 12.0, 6.0, 4.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-165.80215454101562, -161.73146057128906, -157.66078186035156, -153.590087890625, -149.51939392089844, -145.44869995117188, -141.37802124023438, -137.3073272705078, -133.23663330078125, -129.1659393310547, -125.09525299072266, -121.02456665039062, -116.95387268066406, -112.88318634033203, -108.8125, -104.74180603027344, -100.67112731933594, -96.6004409790039, -92.52974700927734, -88.45906066894531, -84.38836669921875, -80.31768035888672, -76.24699401855469, -72.17630004882812, -68.1056137084961, -64.03492736816406, -59.9642333984375, -55.89354705810547, -51.82285690307617, -47.752166748046875, -43.681480407714844, -39.61079025268555, -35.54010772705078, -31.469417572021484, -27.39872932434082, -23.328041076660156, -19.25735092163086, -15.186660766601562, -11.115972518920898, -7.045284271240234, -2.9745941162109375, 1.096095085144043, 5.166784286499023, 9.237473487854004, 13.308162689208984, 17.37885284423828, 21.449541091918945, 25.52022933959961, 29.590919494628906, 33.6616096496582, 37.7322998046875, 41.80298614501953, 45.87367630004883, 49.944366455078125, 54.015052795410156, 58.08574295043945, 62.15643310546875, 66.22711944580078, 70.29781341552734, 74.36849975585938, 78.43919372558594, 82.50988006591797, 86.58056640625, 90.65126037597656, 94.7219467163086]}, "gradients/decoder.transformer.h.12.ln_2.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 3.0, 1.0, 0.0, 5.0, 7.0, 4.0, 7.0, 6.0, 15.0, 12.0, 13.0, 18.0, 12.0, 18.0, 22.0, 21.0, 27.0, 36.0, 35.0, 45.0, 38.0, 36.0, 49.0, 52.0, 39.0, 40.0, 34.0, 44.0, 43.0, 43.0, 40.0, 33.0, 40.0, 20.0, 22.0, 24.0, 14.0, 24.0, 14.0, 19.0, 12.0, 8.0, 6.0, 3.0, 5.0, 6.0, 2.0, 2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-37.90277862548828, -36.538978576660156, -35.1751823425293, -33.81138229370117, -32.44758605957031, -31.08378791809082, -29.719989776611328, -28.356189727783203, -26.992393493652344, -25.62859535217285, -24.26479721069336, -22.900999069213867, -21.537200927734375, -20.173402786254883, -18.80960464477539, -17.445804595947266, -16.082006454467773, -14.718208312988281, -13.354410171508789, -11.990612030029297, -10.626813888549805, -9.263015747070312, -7.899216651916504, -6.535418510437012, -5.1716203689575195, -3.8078222274780273, -2.444023847579956, -1.0802254676818848, 0.2835726737976074, 1.6473708152770996, 3.01116943359375, 4.374967575073242, 5.738765716552734, 7.102563858032227, 8.466361999511719, 9.830160140991211, 11.193958282470703, 12.557756423950195, 13.921555519104004, 15.285353660583496, 16.649150848388672, 18.012948989868164, 19.376747131347656, 20.74054527282715, 22.10434341430664, 23.468141555786133, 24.831939697265625, 26.19573974609375, 27.559537887573242, 28.923336029052734, 30.287134170532227, 31.65093231201172, 33.014732360839844, 34.3785285949707, 35.74232864379883, 37.10612487792969, 38.46992492675781, 39.83372497558594, 41.1975212097168, 42.56132125854492, 43.92511749267578, 45.288917541503906, 46.652713775634766, 48.01651382446289, 49.38031005859375]}, "gradients/decoder.transformer.h.12.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 7.0, 4.0, 1.0, 5.0, 5.0, 7.0, 9.0, 2.0, 14.0, 14.0, 15.0, 20.0, 36.0, 31.0, 32.0, 34.0, 35.0, 29.0, 44.0, 44.0, 41.0, 44.0, 37.0, 41.0, 34.0, 51.0, 31.0, 39.0, 41.0, 43.0, 35.0, 33.0, 39.0, 16.0, 14.0, 22.0, 13.0, 7.0, 12.0, 8.0, 6.0, 6.0, 5.0, 3.0, 4.0, 2.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.2421875, -5.04840087890625, -4.8546142578125, -4.66082763671875, -4.467041015625, -4.27325439453125, -4.0794677734375, -3.88568115234375, -3.69189453125, -3.49810791015625, -3.3043212890625, -3.11053466796875, -2.916748046875, -2.72296142578125, -2.5291748046875, -2.33538818359375, -2.1416015625, -1.94781494140625, -1.7540283203125, -1.56024169921875, -1.366455078125, -1.17266845703125, -0.9788818359375, -0.78509521484375, -0.59130859375, -0.39752197265625, -0.2037353515625, -0.00994873046875, 0.183837890625, 0.37762451171875, 0.5714111328125, 0.76519775390625, 0.958984375, 1.15277099609375, 1.3465576171875, 1.54034423828125, 1.734130859375, 1.92791748046875, 2.1217041015625, 2.31549072265625, 2.50927734375, 2.70306396484375, 2.8968505859375, 3.09063720703125, 3.284423828125, 3.47821044921875, 3.6719970703125, 3.86578369140625, 4.0595703125, 4.25335693359375, 4.4471435546875, 4.64093017578125, 4.834716796875, 5.02850341796875, 5.2222900390625, 5.41607666015625, 5.60986328125, 5.80364990234375, 5.9974365234375, 6.19122314453125, 6.385009765625, 6.57879638671875, 6.7725830078125, 6.96636962890625, 7.16015625]}, "gradients/decoder.transformer.h.12.crossattention.c_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 1.0, 4.0, 0.0, 1.0, 6.0, 11.0, 4.0, 22.0, 30.0, 27.0, 53.0, 65.0, 139.0, 152.0, 267.0, 398.0, 654.0, 1021.0, 1755.0, 2758.0, 4867.0, 7924.0, 13925.0, 25439.0, 46264.0, 86715.0, 188010.0, 377230.0, 137064.0, 68391.0, 36711.0, 20360.0, 11504.0, 6646.0, 3985.0, 2311.0, 1443.0, 863.0, 553.0, 352.0, 205.0, 162.0, 91.0, 60.0, 35.0, 35.0, 25.0, 9.0, 9.0, 5.0, 3.0, 3.0, 2.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.257080078125, -0.248870849609375, -0.24066162109375, -0.232452392578125, -0.2242431640625, -0.216033935546875, -0.20782470703125, -0.199615478515625, -0.19140625, -0.183197021484375, -0.17498779296875, -0.166778564453125, -0.1585693359375, -0.150360107421875, -0.14215087890625, -0.133941650390625, -0.125732421875, -0.117523193359375, -0.10931396484375, -0.101104736328125, -0.0928955078125, -0.084686279296875, -0.07647705078125, -0.068267822265625, -0.06005859375, -0.051849365234375, -0.04364013671875, -0.035430908203125, -0.0272216796875, -0.019012451171875, -0.01080322265625, -0.002593994140625, 0.005615234375, 0.013824462890625, 0.02203369140625, 0.030242919921875, 0.0384521484375, 0.046661376953125, 0.05487060546875, 0.063079833984375, 0.0712890625, 0.079498291015625, 0.08770751953125, 0.095916748046875, 0.1041259765625, 0.112335205078125, 0.12054443359375, 0.128753662109375, 0.136962890625, 0.145172119140625, 0.15338134765625, 0.161590576171875, 0.1697998046875, 0.178009033203125, 0.18621826171875, 0.194427490234375, 0.20263671875, 0.210845947265625, 0.21905517578125, 0.227264404296875, 0.2354736328125, 0.243682861328125, 0.25189208984375, 0.260101318359375, 0.268310546875]}, "gradients/decoder.transformer.h.12.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 3.0, 4.0, 1.0, 3.0, 2.0, 4.0, 5.0, 3.0, 2.0, 6.0, 11.0, 7.0, 10.0, 16.0, 14.0, 17.0, 26.0, 14.0, 28.0, 23.0, 21.0, 22.0, 29.0, 36.0, 36.0, 24.0, 40.0, 41.0, 34.0, 1064.0, 42.0, 34.0, 38.0, 33.0, 26.0, 35.0, 40.0, 34.0, 23.0, 22.0, 19.0, 26.0, 24.0, 14.0, 13.0, 14.0, 16.0, 8.0, 6.0, 14.0, 5.0, 1.0, 1.0, 5.0, 2.0, 2.0, 0.0, 0.0, 1.0, 1.0], "bins": [-4.45703125, -4.32366943359375, -4.1903076171875, -4.05694580078125, -3.923583984375, -3.79022216796875, -3.6568603515625, -3.52349853515625, -3.39013671875, -3.25677490234375, -3.1234130859375, -2.99005126953125, -2.856689453125, -2.72332763671875, -2.5899658203125, -2.45660400390625, -2.3232421875, -2.18988037109375, -2.0565185546875, -1.92315673828125, -1.789794921875, -1.65643310546875, -1.5230712890625, -1.38970947265625, -1.25634765625, -1.12298583984375, -0.9896240234375, -0.85626220703125, -0.722900390625, -0.58953857421875, -0.4561767578125, -0.32281494140625, -0.189453125, -0.05609130859375, 0.0772705078125, 0.21063232421875, 0.343994140625, 0.47735595703125, 0.6107177734375, 0.74407958984375, 0.87744140625, 1.01080322265625, 1.1441650390625, 1.27752685546875, 1.410888671875, 1.54425048828125, 1.6776123046875, 1.81097412109375, 1.9443359375, 2.07769775390625, 2.2110595703125, 2.34442138671875, 2.477783203125, 2.61114501953125, 2.7445068359375, 2.87786865234375, 3.01123046875, 3.14459228515625, 3.2779541015625, 3.41131591796875, 3.544677734375, 3.67803955078125, 3.8114013671875, 3.94476318359375, 4.078125]}, "gradients/decoder.transformer.h.12.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 4.0, 7.0, 13.0, 10.0, 29.0, 45.0, 51.0, 61.0, 100.0, 136.0, 192.0, 289.0, 360.0, 545.0, 741.0, 982.0, 1431.0, 1826.0, 2679.0, 3613.0, 5046.0, 6896.0, 9722.0, 13998.0, 20076.0, 28836.0, 42579.0, 63995.0, 99656.0, 210846.0, 1274412.0, 101049.0, 64449.0, 42882.0, 29575.0, 20254.0, 14202.0, 9936.0, 7235.0, 5123.0, 3685.0, 2643.0, 1931.0, 1370.0, 1011.0, 703.0, 501.0, 412.0, 282.0, 212.0, 152.0, 123.0, 82.0, 55.0, 37.0, 28.0, 24.0, 7.0, 4.0, 2.0, 4.0], "bins": [-0.107666015625, -0.10440540313720703, -0.10114479064941406, -0.0978841781616211, -0.09462356567382812, -0.09136295318603516, -0.08810234069824219, -0.08484172821044922, -0.08158111572265625, -0.07832050323486328, -0.07505989074707031, -0.07179927825927734, -0.06853866577148438, -0.0652780532836914, -0.06201744079589844, -0.05875682830810547, -0.0554962158203125, -0.05223560333251953, -0.04897499084472656, -0.045714378356933594, -0.042453765869140625, -0.039193153381347656, -0.03593254089355469, -0.03267192840576172, -0.02941131591796875, -0.02615070343017578, -0.022890090942382812, -0.019629478454589844, -0.016368865966796875, -0.013108253479003906, -0.009847640991210938, -0.006587028503417969, -0.003326416015625, -6.580352783203125e-05, 0.0031948089599609375, 0.006455421447753906, 0.009716033935546875, 0.012976646423339844, 0.016237258911132812, 0.01949787139892578, 0.02275848388671875, 0.02601909637451172, 0.029279708862304688, 0.032540321350097656, 0.035800933837890625, 0.039061546325683594, 0.04232215881347656, 0.04558277130126953, 0.0488433837890625, 0.05210399627685547, 0.05536460876464844, 0.058625221252441406, 0.061885833740234375, 0.06514644622802734, 0.06840705871582031, 0.07166767120361328, 0.07492828369140625, 0.07818889617919922, 0.08144950866699219, 0.08471012115478516, 0.08797073364257812, 0.0912313461303711, 0.09449195861816406, 0.09775257110595703, 0.10101318359375]}, "gradients/decoder.transformer.h.12.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 2.0, 4.0, 2.0, 4.0, 5.0, 3.0, 8.0, 6.0, 11.0, 10.0, 16.0, 18.0, 21.0, 29.0, 28.0, 44.0, 47.0, 24.0, 50.0, 77.0, 105.0, 82.0, 45.0, 58.0, 38.0, 46.0, 42.0, 28.0, 28.0, 20.0, 17.0, 14.0, 8.0, 10.0, 18.0, 10.0, 13.0, 4.0, 6.0, 5.0, 2.0, 1.0, 1.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0], "bins": [-4.649162292480469e-06, -4.482455551624298e-06, -4.3157488107681274e-06, -4.149042069911957e-06, -3.982335329055786e-06, -3.8156285881996155e-06, -3.648921847343445e-06, -3.482215106487274e-06, -3.3155083656311035e-06, -3.148801624774933e-06, -2.982094883918762e-06, -2.8153881430625916e-06, -2.648681402206421e-06, -2.4819746613502502e-06, -2.3152679204940796e-06, -2.148561179637909e-06, -1.9818544387817383e-06, -1.8151476979255676e-06, -1.648440957069397e-06, -1.4817342162132263e-06, -1.3150274753570557e-06, -1.148320734500885e-06, -9.816139936447144e-07, -8.149072527885437e-07, -6.48200511932373e-07, -4.814937710762024e-07, -3.1478703022003174e-07, -1.4808028936386108e-07, 1.862645149230957e-08, 1.8533319234848022e-07, 3.520399332046509e-07, 5.187466740608215e-07, 6.854534149169922e-07, 8.521601557731628e-07, 1.0188668966293335e-06, 1.1855736374855042e-06, 1.3522803783416748e-06, 1.5189871191978455e-06, 1.6856938600540161e-06, 1.8524006009101868e-06, 2.0191073417663574e-06, 2.185814082622528e-06, 2.3525208234786987e-06, 2.5192275643348694e-06, 2.68593430519104e-06, 2.8526410460472107e-06, 3.0193477869033813e-06, 3.186054527759552e-06, 3.3527612686157227e-06, 3.5194680094718933e-06, 3.686174750328064e-06, 3.852881491184235e-06, 4.019588232040405e-06, 4.186294972896576e-06, 4.353001713752747e-06, 4.519708454608917e-06, 4.686415195465088e-06, 4.8531219363212585e-06, 5.019828677177429e-06, 5.1865354180336e-06, 5.3532421588897705e-06, 5.519948899745941e-06, 5.686655640602112e-06, 5.8533623814582825e-06, 6.020069122314453e-06]}, "gradients/decoder.transformer.h.12.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 3.0, 0.0, 2.0, 2.0, 4.0, 10.0, 11.0, 10.0, 9.0, 10.0, 15.0, 10.0, 11.0, 14.0, 27.0, 28.0, 42.0, 52.0, 82.0, 96.0, 225.0, 738.0, 6291.0, 253220.0, 772662.0, 13174.0, 1108.0, 274.0, 131.0, 79.0, 49.0, 36.0, 29.0, 25.0, 23.0, 10.0, 6.0, 7.0, 7.0, 10.0, 7.0, 6.0, 2.0, 1.0, 5.0, 3.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0], "bins": [-0.00010633468627929688, -0.00010339077562093735, -0.00010044686496257782, -9.750295430421829e-05, -9.455904364585876e-05, -9.161513298749924e-05, -8.867122232913971e-05, -8.572731167078018e-05, -8.278340101242065e-05, -7.983949035406113e-05, -7.68955796957016e-05, -7.395166903734207e-05, -7.100775837898254e-05, -6.806384772062302e-05, -6.511993706226349e-05, -6.217602640390396e-05, -5.9232115745544434e-05, -5.6288205087184906e-05, -5.334429442882538e-05, -5.040038377046585e-05, -4.745647311210632e-05, -4.4512562453746796e-05, -4.156865179538727e-05, -3.862474113702774e-05, -3.568083047866821e-05, -3.2736919820308685e-05, -2.9793009161949158e-05, -2.684909850358963e-05, -2.3905187845230103e-05, -2.0961277186870575e-05, -1.8017366528511047e-05, -1.507345587015152e-05, -1.2129545211791992e-05, -9.185634553432465e-06, -6.241723895072937e-06, -3.2978132367134094e-06, -3.5390257835388184e-07, 2.5900080800056458e-06, 5.533918738365173e-06, 8.477829396724701e-06, 1.1421740055084229e-05, 1.4365650713443756e-05, 1.7309561371803284e-05, 2.025347203016281e-05, 2.319738268852234e-05, 2.6141293346881866e-05, 2.9085204005241394e-05, 3.202911466360092e-05, 3.497302532196045e-05, 3.791693598031998e-05, 4.0860846638679504e-05, 4.380475729703903e-05, 4.674866795539856e-05, 4.969257861375809e-05, 5.2636489272117615e-05, 5.558039993047714e-05, 5.852431058883667e-05, 6.14682212471962e-05, 6.441213190555573e-05, 6.735604256391525e-05, 7.029995322227478e-05, 7.324386388063431e-05, 7.618777453899384e-05, 7.913168519735336e-05, 8.207559585571289e-05]}, "gradients/decoder.transformer.h.12.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 0.0, 5.0, 17.0, 69.0, 206.0, 405.0, 211.0, 71.0, 24.0, 6.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0], "bins": [-2.4231681891251355e-05, -2.3742129997117445e-05, -2.3252578102983534e-05, -2.2763026208849624e-05, -2.2273474314715713e-05, -2.1783922420581803e-05, -2.1294370526447892e-05, -2.080481863231398e-05, -2.031526673818007e-05, -1.982571484404616e-05, -1.933616294991225e-05, -1.884661105577834e-05, -1.835705916164443e-05, -1.786750726751052e-05, -1.7377955373376608e-05, -1.6888403479242697e-05, -1.6398851585108787e-05, -1.5909299690974876e-05, -1.5419747796840966e-05, -1.4930195902707055e-05, -1.4440644008573145e-05, -1.3951092114439234e-05, -1.3461540220305324e-05, -1.2971988326171413e-05, -1.2482436432037503e-05, -1.1992884537903592e-05, -1.1503332643769681e-05, -1.1013780749635771e-05, -1.052422885550186e-05, -1.003467696136795e-05, -9.54512506723404e-06, -9.055573173100129e-06, -8.566019459976815e-06, -8.076467565842904e-06, -7.586915671708994e-06, -7.097363777575083e-06, -6.6078118834411725e-06, -6.118259989307262e-06, -5.6287080951733515e-06, -5.139156201039441e-06, -4.649604761652881e-06, -4.160052867518971e-06, -3.67050097338506e-06, -3.1809490792511497e-06, -2.691397185117239e-06, -2.2018452909833286e-06, -1.712293396849418e-06, -1.2227415027155075e-06, -7.33189608581597e-07, -2.4363771444768645e-07, 2.459141796862241e-07, 7.354660738201346e-07, 1.2250179679540452e-06, 1.7145698620879557e-06, 2.2041217562218662e-06, 2.6936736503557768e-06, 3.1832255444896873e-06, 3.672777438623598e-06, 4.162329332757508e-06, 4.651881226891419e-06, 5.1414331210253295e-06, 5.63098501515924e-06, 6.1205369092931505e-06, 6.610088803427061e-06, 7.099640697560972e-06]}, "gradients/decoder.transformer.h.12.ln_cross_attn.bias": {"_type": "histogram", "values": [2.0, 2.0, 1.0, 1.0, 2.0, 3.0, 5.0, 5.0, 2.0, 7.0, 10.0, 6.0, 5.0, 9.0, 22.0, 8.0, 6.0, 32.0, 19.0, 16.0, 18.0, 42.0, 21.0, 24.0, 42.0, 22.0, 26.0, 25.0, 63.0, 26.0, 24.0, 30.0, 74.0, 25.0, 32.0, 47.0, 29.0, 29.0, 23.0, 41.0, 14.0, 21.0, 47.0, 9.0, 14.0, 18.0, 17.0, 9.0, 8.0, 10.0, 9.0, 2.0, 3.0, 6.0, 3.0, 1.0, 0.0, 3.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.384185791015625e-06, -2.3078173398971558e-06, -2.2314488887786865e-06, -2.1550804376602173e-06, -2.078711986541748e-06, -2.002343535423279e-06, -1.9259750843048096e-06, -1.8496066331863403e-06, -1.773238182067871e-06, -1.6968697309494019e-06, -1.6205012798309326e-06, -1.5441328287124634e-06, -1.4677643775939941e-06, -1.391395926475525e-06, -1.3150274753570557e-06, -1.2386590242385864e-06, -1.1622905731201172e-06, -1.085922122001648e-06, -1.0095536708831787e-06, -9.331852197647095e-07, -8.568167686462402e-07, -7.80448317527771e-07, -7.040798664093018e-07, -6.277114152908325e-07, -5.513429641723633e-07, -4.7497451305389404e-07, -3.986060619354248e-07, -3.2223761081695557e-07, -2.4586915969848633e-07, -1.695007085800171e-07, -9.313225746154785e-08, -1.6763806343078613e-08, 5.960464477539063e-08, 1.3597309589385986e-07, 2.123415470123291e-07, 2.8870999813079834e-07, 3.650784492492676e-07, 4.414469003677368e-07, 5.178153514862061e-07, 5.941838026046753e-07, 6.705522537231445e-07, 7.469207048416138e-07, 8.23289155960083e-07, 8.996576070785522e-07, 9.760260581970215e-07, 1.0523945093154907e-06, 1.12876296043396e-06, 1.2051314115524292e-06, 1.2814998626708984e-06, 1.3578683137893677e-06, 1.434236764907837e-06, 1.5106052160263062e-06, 1.5869736671447754e-06, 1.6633421182632446e-06, 1.7397105693817139e-06, 1.816079020500183e-06, 1.8924474716186523e-06, 1.9688159227371216e-06, 2.045184373855591e-06, 2.12155282497406e-06, 2.1979212760925293e-06, 2.2742897272109985e-06, 2.3506581783294678e-06, 2.427026629447937e-06, 2.5033950805664062e-06]}, "gradients/decoder.transformer.h.12.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 7.0, 4.0, 1.0, 5.0, 5.0, 7.0, 9.0, 2.0, 14.0, 14.0, 15.0, 20.0, 36.0, 31.0, 32.0, 34.0, 35.0, 29.0, 44.0, 44.0, 41.0, 44.0, 37.0, 41.0, 34.0, 51.0, 31.0, 39.0, 41.0, 43.0, 35.0, 33.0, 39.0, 16.0, 14.0, 22.0, 13.0, 7.0, 12.0, 8.0, 6.0, 6.0, 5.0, 3.0, 4.0, 2.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.2421875, -5.04840087890625, -4.8546142578125, -4.66082763671875, -4.467041015625, -4.27325439453125, -4.0794677734375, -3.88568115234375, -3.69189453125, -3.49810791015625, -3.3043212890625, -3.11053466796875, -2.916748046875, -2.72296142578125, -2.5291748046875, -2.33538818359375, -2.1416015625, -1.94781494140625, -1.7540283203125, -1.56024169921875, -1.366455078125, -1.17266845703125, -0.9788818359375, -0.78509521484375, -0.59130859375, -0.39752197265625, -0.2037353515625, -0.00994873046875, 0.183837890625, 0.37762451171875, 0.5714111328125, 0.76519775390625, 0.958984375, 1.15277099609375, 1.3465576171875, 1.54034423828125, 1.734130859375, 1.92791748046875, 2.1217041015625, 2.31549072265625, 2.50927734375, 2.70306396484375, 2.8968505859375, 3.09063720703125, 3.284423828125, 3.47821044921875, 3.6719970703125, 3.86578369140625, 4.0595703125, 4.25335693359375, 4.4471435546875, 4.64093017578125, 4.834716796875, 5.02850341796875, 5.2222900390625, 5.41607666015625, 5.60986328125, 5.80364990234375, 5.9974365234375, 6.19122314453125, 6.385009765625, 6.57879638671875, 6.7725830078125, 6.96636962890625, 7.16015625]}, "gradients/decoder.transformer.h.12.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 3.0, 1.0, 3.0, 6.0, 10.0, 7.0, 15.0, 27.0, 27.0, 68.0, 89.0, 142.0, 219.0, 385.0, 631.0, 1096.0, 1978.0, 3861.0, 7789.0, 16968.0, 46157.0, 175992.0, 562739.0, 156495.0, 42469.0, 15976.0, 7290.0, 3573.0, 1903.0, 1065.0, 612.0, 385.0, 195.0, 121.0, 91.0, 69.0, 41.0, 17.0, 11.0, 13.0, 8.0, 5.0, 6.0, 6.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-5.80078125, -5.6103515625, -5.419921875, -5.2294921875, -5.0390625, -4.8486328125, -4.658203125, -4.4677734375, -4.27734375, -4.0869140625, -3.896484375, -3.7060546875, -3.515625, -3.3251953125, -3.134765625, -2.9443359375, -2.75390625, -2.5634765625, -2.373046875, -2.1826171875, -1.9921875, -1.8017578125, -1.611328125, -1.4208984375, -1.23046875, -1.0400390625, -0.849609375, -0.6591796875, -0.46875, -0.2783203125, -0.087890625, 0.1025390625, 0.29296875, 0.4833984375, 0.673828125, 0.8642578125, 1.0546875, 1.2451171875, 1.435546875, 1.6259765625, 1.81640625, 2.0068359375, 2.197265625, 2.3876953125, 2.578125, 2.7685546875, 2.958984375, 3.1494140625, 3.33984375, 3.5302734375, 3.720703125, 3.9111328125, 4.1015625, 4.2919921875, 4.482421875, 4.6728515625, 4.86328125, 5.0537109375, 5.244140625, 5.4345703125, 5.625, 5.8154296875, 6.005859375, 6.1962890625, 6.38671875]}, "gradients/decoder.transformer.h.12.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 3.0, 1.0, 0.0, 2.0, 4.0, 6.0, 7.0, 8.0, 11.0, 15.0, 8.0, 13.0, 18.0, 11.0, 28.0, 14.0, 28.0, 40.0, 40.0, 43.0, 38.0, 48.0, 39.0, 89.0, 268.0, 1670.0, 139.0, 72.0, 44.0, 44.0, 37.0, 34.0, 26.0, 25.0, 28.0, 22.0, 24.0, 22.0, 16.0, 13.0, 17.0, 9.0, 6.0, 9.0, 1.0, 4.0, 5.0, 4.0, 3.0, 7.0, 1.0, 2.0, 1.0], "bins": [-21.390625, -20.80029296875, -20.2099609375, -19.61962890625, -19.029296875, -18.43896484375, -17.8486328125, -17.25830078125, -16.66796875, -16.07763671875, -15.4873046875, -14.89697265625, -14.306640625, -13.71630859375, -13.1259765625, -12.53564453125, -11.9453125, -11.35498046875, -10.7646484375, -10.17431640625, -9.583984375, -8.99365234375, -8.4033203125, -7.81298828125, -7.22265625, -6.63232421875, -6.0419921875, -5.45166015625, -4.861328125, -4.27099609375, -3.6806640625, -3.09033203125, -2.5, -1.90966796875, -1.3193359375, -0.72900390625, -0.138671875, 0.45166015625, 1.0419921875, 1.63232421875, 2.22265625, 2.81298828125, 3.4033203125, 3.99365234375, 4.583984375, 5.17431640625, 5.7646484375, 6.35498046875, 6.9453125, 7.53564453125, 8.1259765625, 8.71630859375, 9.306640625, 9.89697265625, 10.4873046875, 11.07763671875, 11.66796875, 12.25830078125, 12.8486328125, 13.43896484375, 14.029296875, 14.61962890625, 15.2099609375, 15.80029296875, 16.390625]}, "gradients/decoder.transformer.h.12.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 6.0, 4.0, 3.0, 6.0, 3.0, 3.0, 9.0, 4.0, 10.0, 10.0, 17.0, 18.0, 24.0, 37.0, 37.0, 45.0, 61.0, 82.0, 126.0, 157.0, 238.0, 329.0, 726.0, 4440.0, 141580.0, 2976524.0, 18191.0, 1569.0, 472.0, 280.0, 178.0, 106.0, 101.0, 83.0, 61.0, 25.0, 34.0, 22.0, 17.0, 19.0, 9.0, 13.0, 12.0, 7.0, 9.0, 6.0, 3.0, 1.0, 1.0, 0.0, 4.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-30.359375, -29.245361328125, -28.13134765625, -27.017333984375, -25.9033203125, -24.789306640625, -23.67529296875, -22.561279296875, -21.447265625, -20.333251953125, -19.21923828125, -18.105224609375, -16.9912109375, -15.877197265625, -14.76318359375, -13.649169921875, -12.53515625, -11.421142578125, -10.30712890625, -9.193115234375, -8.0791015625, -6.965087890625, -5.85107421875, -4.737060546875, -3.623046875, -2.509033203125, -1.39501953125, -0.281005859375, 0.8330078125, 1.947021484375, 3.06103515625, 4.175048828125, 5.2890625, 6.403076171875, 7.51708984375, 8.631103515625, 9.7451171875, 10.859130859375, 11.97314453125, 13.087158203125, 14.201171875, 15.315185546875, 16.42919921875, 17.543212890625, 18.6572265625, 19.771240234375, 20.88525390625, 21.999267578125, 23.11328125, 24.227294921875, 25.34130859375, 26.455322265625, 27.5693359375, 28.683349609375, 29.79736328125, 30.911376953125, 32.025390625, 33.139404296875, 34.25341796875, 35.367431640625, 36.4814453125, 37.595458984375, 38.70947265625, 39.823486328125, 40.9375]}, "gradients/decoder.transformer.h.12.ln_1.weight": {"_type": "histogram", "values": [3.0, 298.0, 708.0, 12.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-17.300928115844727, -9.37182903289795, -1.4427299499511719, 6.486368179321289, 14.415468215942383, 22.344568252563477, 30.273664474487305, 38.20276641845703, 46.131866455078125, 54.06096649169922, 61.99006652832031, 69.91915893554688, 77.8482666015625, 85.77735900878906, 93.70645904541016, 101.63555908203125, 109.56465911865234, 117.49375915527344, 125.42285919189453, 133.35195922851562, 141.2810516357422, 149.2101593017578, 157.13925170898438, 165.068359375, 172.99745178222656, 180.92654418945312, 188.85565185546875, 196.7847442626953, 204.71385192871094, 212.6429443359375, 220.57205200195312, 228.5011444091797, 236.43023681640625, 244.3593292236328, 252.28843688964844, 260.217529296875, 268.1466369628906, 276.07574462890625, 284.00482177734375, 291.9339294433594, 299.863037109375, 307.7921447753906, 315.7212219238281, 323.65032958984375, 331.5794372558594, 339.508544921875, 347.4376220703125, 355.3667297363281, 363.29583740234375, 371.2249450683594, 379.1540222167969, 387.0831298828125, 395.0122375488281, 402.94134521484375, 410.87042236328125, 418.7995300292969, 426.7286071777344, 434.65771484375, 442.5867919921875, 450.5158996582031, 458.44500732421875, 466.3741149902344, 474.3031921386719, 482.2322998046875, 490.1614074707031]}, "gradients/decoder.transformer.h.12.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 3.0, 2.0, 3.0, 4.0, 3.0, 5.0, 6.0, 5.0, 9.0, 5.0, 11.0, 6.0, 10.0, 13.0, 14.0, 12.0, 17.0, 25.0, 31.0, 26.0, 31.0, 23.0, 33.0, 34.0, 39.0, 39.0, 39.0, 41.0, 33.0, 39.0, 44.0, 41.0, 33.0, 36.0, 29.0, 30.0, 21.0, 23.0, 22.0, 15.0, 19.0, 24.0, 11.0, 15.0, 19.0, 12.0, 12.0, 11.0, 8.0, 5.0, 4.0, 4.0, 7.0, 5.0, 1.0, 3.0, 1.0, 1.0, 1.0, 2.0, 1.0, 2.0], "bins": [-46.242759704589844, -44.77268600463867, -43.302608489990234, -41.83253479003906, -40.362457275390625, -38.89238357543945, -37.42230987548828, -35.952232360839844, -34.48215866088867, -33.0120849609375, -31.542007446289062, -30.07193374633789, -28.601858139038086, -27.13178253173828, -25.66170883178711, -24.191633224487305, -22.7215576171875, -21.251482009887695, -19.78140640258789, -18.31133270263672, -16.841257095336914, -15.37118148803711, -13.901106834411621, -12.431032180786133, -10.960956573486328, -9.490880966186523, -8.020806312561035, -6.550731182098389, -5.080656051635742, -3.6105809211730957, -2.140505790710449, -0.6704311370849609, 0.7996444702148438, 2.2697196006774902, 3.7397947311401367, 5.209869861602783, 6.67994499206543, 8.150020599365234, 9.620095252990723, 11.090169906616211, 12.560245513916016, 14.03032112121582, 15.500395774841309, 16.970470428466797, 18.4405460357666, 19.910621643066406, 21.380695343017578, 22.850770950317383, 24.320846557617188, 25.790922164916992, 27.260997772216797, 28.73107147216797, 30.201147079467773, 31.671222686767578, 33.14129638671875, 34.61137390136719, 36.08144760131836, 37.55152130126953, 39.02159881591797, 40.49167251586914, 41.96174621582031, 43.43182373046875, 44.90189743041992, 46.371971130371094, 47.84204864501953]}, "gradients/decoder.transformer.h.11.mlp.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 2.0, 2.0, 3.0, 3.0, 4.0, 6.0, 6.0, 7.0, 11.0, 7.0, 17.0, 18.0, 15.0, 34.0, 24.0, 41.0, 29.0, 34.0, 28.0, 46.0, 36.0, 44.0, 48.0, 51.0, 31.0, 44.0, 38.0, 40.0, 42.0, 30.0, 50.0, 44.0, 28.0, 24.0, 28.0, 17.0, 15.0, 15.0, 13.0, 6.0, 6.0, 7.0, 10.0, 2.0, 4.0, 4.0, 2.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.234375, -5.0340576171875, -4.833740234375, -4.6334228515625, -4.43310546875, -4.2327880859375, -4.032470703125, -3.8321533203125, -3.6318359375, -3.4315185546875, -3.231201171875, -3.0308837890625, -2.83056640625, -2.6302490234375, -2.429931640625, -2.2296142578125, -2.029296875, -1.8289794921875, -1.628662109375, -1.4283447265625, -1.22802734375, -1.0277099609375, -0.827392578125, -0.6270751953125, -0.4267578125, -0.2264404296875, -0.026123046875, 0.1741943359375, 0.37451171875, 0.5748291015625, 0.775146484375, 0.9754638671875, 1.17578125, 1.3760986328125, 1.576416015625, 1.7767333984375, 1.97705078125, 2.1773681640625, 2.377685546875, 2.5780029296875, 2.7783203125, 2.9786376953125, 3.178955078125, 3.3792724609375, 3.57958984375, 3.7799072265625, 3.980224609375, 4.1805419921875, 4.380859375, 4.5811767578125, 4.781494140625, 4.9818115234375, 5.18212890625, 5.3824462890625, 5.582763671875, 5.7830810546875, 5.9833984375, 6.1837158203125, 6.384033203125, 6.5843505859375, 6.78466796875, 6.9849853515625, 7.185302734375, 7.3856201171875, 7.5859375]}, "gradients/decoder.transformer.h.11.mlp.c_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 0.0, 3.0, 3.0, 4.0, 12.0, 10.0, 30.0, 30.0, 53.0, 80.0, 148.0, 223.0, 408.0, 860.0, 1689.0, 3619.0, 8976.0, 23948.0, 82284.0, 431272.0, 2354960.0, 1062767.0, 159189.0, 40066.0, 13431.0, 5412.0, 2317.0, 1090.0, 630.0, 333.0, 162.0, 111.0, 79.0, 29.0, 21.0, 19.0, 9.0, 5.0, 8.0, 3.0, 5.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-9.1171875, -8.7484130859375, -8.379638671875, -8.0108642578125, -7.64208984375, -7.2733154296875, -6.904541015625, -6.5357666015625, -6.1669921875, -5.7982177734375, -5.429443359375, -5.0606689453125, -4.69189453125, -4.3231201171875, -3.954345703125, -3.5855712890625, -3.216796875, -2.8480224609375, -2.479248046875, -2.1104736328125, -1.74169921875, -1.3729248046875, -1.004150390625, -0.6353759765625, -0.2666015625, 0.1021728515625, 0.470947265625, 0.8397216796875, 1.20849609375, 1.5772705078125, 1.946044921875, 2.3148193359375, 2.68359375, 3.0523681640625, 3.421142578125, 3.7899169921875, 4.15869140625, 4.5274658203125, 4.896240234375, 5.2650146484375, 5.6337890625, 6.0025634765625, 6.371337890625, 6.7401123046875, 7.10888671875, 7.4776611328125, 7.846435546875, 8.2152099609375, 8.583984375, 8.9527587890625, 9.321533203125, 9.6903076171875, 10.05908203125, 10.4278564453125, 10.796630859375, 11.1654052734375, 11.5341796875, 11.9029541015625, 12.271728515625, 12.6405029296875, 13.00927734375, 13.3780517578125, 13.746826171875, 14.1156005859375, 14.484375]}, "gradients/decoder.transformer.h.11.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 7.0, 4.0, 3.0, 8.0, 13.0, 4.0, 14.0, 28.0, 39.0, 58.0, 84.0, 97.0, 130.0, 212.0, 318.0, 506.0, 684.0, 593.0, 406.0, 294.0, 166.0, 124.0, 87.0, 64.0, 42.0, 25.0, 18.0, 15.0, 9.0, 10.0, 9.0, 4.0, 4.0, 2.0, 3.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-11.7890625, -11.3790283203125, -10.968994140625, -10.5589599609375, -10.14892578125, -9.7388916015625, -9.328857421875, -8.9188232421875, -8.5087890625, -8.0987548828125, -7.688720703125, -7.2786865234375, -6.86865234375, -6.4586181640625, -6.048583984375, -5.6385498046875, -5.228515625, -4.8184814453125, -4.408447265625, -3.9984130859375, -3.58837890625, -3.1783447265625, -2.768310546875, -2.3582763671875, -1.9482421875, -1.5382080078125, -1.128173828125, -0.7181396484375, -0.30810546875, 0.1019287109375, 0.511962890625, 0.9219970703125, 1.33203125, 1.7420654296875, 2.152099609375, 2.5621337890625, 2.97216796875, 3.3822021484375, 3.792236328125, 4.2022705078125, 4.6123046875, 5.0223388671875, 5.432373046875, 5.8424072265625, 6.25244140625, 6.6624755859375, 7.072509765625, 7.4825439453125, 7.892578125, 8.3026123046875, 8.712646484375, 9.1226806640625, 9.53271484375, 9.9427490234375, 10.352783203125, 10.7628173828125, 11.1728515625, 11.5828857421875, 11.992919921875, 12.4029541015625, 12.81298828125, 13.2230224609375, 13.633056640625, 14.0430908203125, 14.453125]}, "gradients/decoder.transformer.h.11.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 2.0, 2.0, 5.0, 4.0, 7.0, 15.0, 5.0, 7.0, 21.0, 25.0, 34.0, 52.0, 70.0, 125.0, 181.0, 333.0, 783.0, 4449.0, 136861.0, 3922308.0, 123298.0, 4107.0, 725.0, 320.0, 174.0, 113.0, 80.0, 58.0, 38.0, 23.0, 18.0, 18.0, 5.0, 5.0, 7.0, 6.0, 4.0, 2.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0], "bins": [-55.90625, -54.23681640625, -52.5673828125, -50.89794921875, -49.228515625, -47.55908203125, -45.8896484375, -44.22021484375, -42.55078125, -40.88134765625, -39.2119140625, -37.54248046875, -35.873046875, -34.20361328125, -32.5341796875, -30.86474609375, -29.1953125, -27.52587890625, -25.8564453125, -24.18701171875, -22.517578125, -20.84814453125, -19.1787109375, -17.50927734375, -15.83984375, -14.17041015625, -12.5009765625, -10.83154296875, -9.162109375, -7.49267578125, -5.8232421875, -4.15380859375, -2.484375, -0.81494140625, 0.8544921875, 2.52392578125, 4.193359375, 5.86279296875, 7.5322265625, 9.20166015625, 10.87109375, 12.54052734375, 14.2099609375, 15.87939453125, 17.548828125, 19.21826171875, 20.8876953125, 22.55712890625, 24.2265625, 25.89599609375, 27.5654296875, 29.23486328125, 30.904296875, 32.57373046875, 34.2431640625, 35.91259765625, 37.58203125, 39.25146484375, 40.9208984375, 42.59033203125, 44.259765625, 45.92919921875, 47.5986328125, 49.26806640625, 50.9375]}, "gradients/decoder.transformer.h.11.ln_2.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 18.0, 41.0, 63.0, 113.0, 176.0, 203.0, 187.0, 97.0, 59.0, 31.0, 13.0, 6.0, 5.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-66.6964340209961, -63.29662322998047, -59.896812438964844, -56.49700164794922, -53.097190856933594, -49.69738006591797, -46.29756546020508, -42.89775466918945, -39.49794387817383, -36.0981330871582, -32.69832229614258, -29.29850959777832, -25.898698806762695, -22.49888801574707, -19.099075317382812, -15.699264526367188, -12.299453735351562, -8.899642944335938, -5.499831199645996, -2.1000194549560547, 1.2997913360595703, 4.699602127075195, 8.099414825439453, 11.499225616455078, 14.899036407470703, 18.298847198486328, 21.698657989501953, 25.09847068786621, 28.498281478881836, 31.89809226989746, 35.29790496826172, 38.697715759277344, 42.0975341796875, 45.497344970703125, 48.89715576171875, 52.296966552734375, 55.69677734375, 59.096588134765625, 62.496402740478516, 65.89620971679688, 69.2960205078125, 72.69583129882812, 76.09564208984375, 79.49545288085938, 82.895263671875, 86.29507446289062, 89.69488525390625, 93.09469604492188, 96.49451446533203, 99.89432525634766, 103.29413604736328, 106.6939468383789, 110.09375762939453, 113.49356842041016, 116.89338684082031, 120.29319763183594, 123.69300842285156, 127.09281921386719, 130.4926300048828, 133.89244079589844, 137.29225158691406, 140.6920623779297, 144.0918731689453, 147.49168395996094, 150.89149475097656]}, "gradients/decoder.transformer.h.11.ln_2.bias": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 4.0, 2.0, 5.0, 6.0, 3.0, 4.0, 3.0, 7.0, 7.0, 15.0, 20.0, 11.0, 15.0, 22.0, 28.0, 17.0, 25.0, 22.0, 25.0, 39.0, 29.0, 44.0, 57.0, 28.0, 37.0, 48.0, 40.0, 43.0, 30.0, 35.0, 36.0, 36.0, 23.0, 28.0, 27.0, 35.0, 25.0, 15.0, 25.0, 9.0, 18.0, 17.0, 14.0, 4.0, 6.0, 14.0, 1.0, 1.0, 2.0, 2.0, 2.0, 3.0, 1.0, 3.0, 1.0, 0.0, 2.0], "bins": [-35.21600341796875, -34.128196716308594, -33.0403938293457, -31.952587127685547, -30.864782333374023, -29.7769775390625, -28.689170837402344, -27.60136604309082, -26.513561248779297, -25.425756454467773, -24.33795166015625, -23.250144958496094, -22.16234016418457, -21.074535369873047, -19.98672866821289, -18.898923873901367, -17.811119079589844, -16.72331428527832, -15.63550853729248, -14.54770278930664, -13.459897994995117, -12.372093200683594, -11.284287452697754, -10.196481704711914, -9.10867691040039, -8.020872116088867, -6.933066368103027, -5.845261096954346, -4.757455825805664, -3.6696505546569824, -2.581845283508301, -1.4940400123596191, -0.4062347412109375, 0.6815705299377441, 1.7693758010864258, 2.8571810722351074, 3.944986343383789, 5.032791614532471, 6.120596885681152, 7.208402156829834, 8.296207427978516, 9.384012222290039, 10.471817970275879, 11.559623718261719, 12.647428512573242, 13.735233306884766, 14.823039054870605, 15.910844802856445, 16.99864959716797, 18.086454391479492, 19.174259185791016, 20.262065887451172, 21.349870681762695, 22.43767547607422, 23.525482177734375, 24.6132869720459, 25.701091766357422, 26.788896560668945, 27.87670135498047, 28.964508056640625, 30.05231285095215, 31.140117645263672, 32.22792434692383, 33.31572723388672, 34.403533935546875]}, "gradients/decoder.transformer.h.11.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 4.0, 1.0, 5.0, 5.0, 3.0, 5.0, 7.0, 8.0, 9.0, 10.0, 20.0, 15.0, 25.0, 30.0, 31.0, 27.0, 42.0, 40.0, 37.0, 31.0, 47.0, 34.0, 41.0, 41.0, 42.0, 50.0, 50.0, 37.0, 35.0, 46.0, 42.0, 23.0, 24.0, 32.0, 18.0, 20.0, 17.0, 12.0, 9.0, 7.0, 6.0, 8.0, 6.0, 6.0, 3.0, 1.0, 2.0, 3.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.78125, -5.5792236328125, -5.377197265625, -5.1751708984375, -4.97314453125, -4.7711181640625, -4.569091796875, -4.3670654296875, -4.1650390625, -3.9630126953125, -3.760986328125, -3.5589599609375, -3.35693359375, -3.1549072265625, -2.952880859375, -2.7508544921875, -2.548828125, -2.3468017578125, -2.144775390625, -1.9427490234375, -1.74072265625, -1.5386962890625, -1.336669921875, -1.1346435546875, -0.9326171875, -0.7305908203125, -0.528564453125, -0.3265380859375, -0.12451171875, 0.0775146484375, 0.279541015625, 0.4815673828125, 0.68359375, 0.8856201171875, 1.087646484375, 1.2896728515625, 1.49169921875, 1.6937255859375, 1.895751953125, 2.0977783203125, 2.2998046875, 2.5018310546875, 2.703857421875, 2.9058837890625, 3.10791015625, 3.3099365234375, 3.511962890625, 3.7139892578125, 3.916015625, 4.1180419921875, 4.320068359375, 4.5220947265625, 4.72412109375, 4.9261474609375, 5.128173828125, 5.3302001953125, 5.5322265625, 5.7342529296875, 5.936279296875, 6.1383056640625, 6.34033203125, 6.5423583984375, 6.744384765625, 6.9464111328125, 7.1484375]}, "gradients/decoder.transformer.h.11.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 3.0, 0.0, 2.0, 4.0, 3.0, 8.0, 9.0, 15.0, 12.0, 26.0, 36.0, 54.0, 69.0, 128.0, 196.0, 235.0, 405.0, 585.0, 815.0, 1155.0, 1630.0, 2436.0, 3760.0, 5484.0, 8202.0, 12361.0, 19378.0, 30525.0, 49921.0, 87704.0, 172554.0, 323095.0, 135755.0, 72797.0, 42500.0, 26609.0, 16796.0, 10893.0, 7294.0, 4912.0, 3174.0, 2205.0, 1514.0, 1043.0, 689.0, 542.0, 341.0, 225.0, 158.0, 99.0, 67.0, 54.0, 32.0, 18.0, 21.0, 6.0, 10.0, 2.0, 5.0, 1.0, 3.0], "bins": [-0.230712890625, -0.22399520874023438, -0.21727752685546875, -0.21055984497070312, -0.2038421630859375, -0.19712448120117188, -0.19040679931640625, -0.18368911743164062, -0.176971435546875, -0.17025375366210938, -0.16353607177734375, -0.15681838989257812, -0.1501007080078125, -0.14338302612304688, -0.13666534423828125, -0.12994766235351562, -0.12322998046875, -0.11651229858398438, -0.10979461669921875, -0.10307693481445312, -0.0963592529296875, -0.08964157104492188, -0.08292388916015625, -0.07620620727539062, -0.069488525390625, -0.06277084350585938, -0.05605316162109375, -0.049335479736328125, -0.0426177978515625, -0.035900115966796875, -0.02918243408203125, -0.022464752197265625, -0.0157470703125, -0.009029388427734375, -0.00231170654296875, 0.004405975341796875, 0.0111236572265625, 0.017841339111328125, 0.02455902099609375, 0.031276702880859375, 0.037994384765625, 0.044712066650390625, 0.05142974853515625, 0.058147430419921875, 0.0648651123046875, 0.07158279418945312, 0.07830047607421875, 0.08501815795898438, 0.09173583984375, 0.09845352172851562, 0.10517120361328125, 0.11188888549804688, 0.1186065673828125, 0.12532424926757812, 0.13204193115234375, 0.13875961303710938, 0.145477294921875, 0.15219497680664062, 0.15891265869140625, 0.16563034057617188, 0.1723480224609375, 0.17906570434570312, 0.18578338623046875, 0.19250106811523438, 0.19921875]}, "gradients/decoder.transformer.h.11.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 2.0, 2.0, 3.0, 8.0, 3.0, 5.0, 4.0, 8.0, 8.0, 8.0, 12.0, 14.0, 21.0, 14.0, 29.0, 20.0, 26.0, 36.0, 42.0, 38.0, 47.0, 47.0, 38.0, 52.0, 1069.0, 41.0, 45.0, 32.0, 31.0, 32.0, 44.0, 34.0, 36.0, 26.0, 36.0, 15.0, 25.0, 9.0, 12.0, 15.0, 12.0, 6.0, 5.0, 7.0, 6.0, 5.0, 6.0, 1.0, 4.0, 0.0, 0.0, 2.0], "bins": [-5.39453125, -5.24658203125, -5.0986328125, -4.95068359375, -4.802734375, -4.65478515625, -4.5068359375, -4.35888671875, -4.2109375, -4.06298828125, -3.9150390625, -3.76708984375, -3.619140625, -3.47119140625, -3.3232421875, -3.17529296875, -3.02734375, -2.87939453125, -2.7314453125, -2.58349609375, -2.435546875, -2.28759765625, -2.1396484375, -1.99169921875, -1.84375, -1.69580078125, -1.5478515625, -1.39990234375, -1.251953125, -1.10400390625, -0.9560546875, -0.80810546875, -0.66015625, -0.51220703125, -0.3642578125, -0.21630859375, -0.068359375, 0.07958984375, 0.2275390625, 0.37548828125, 0.5234375, 0.67138671875, 0.8193359375, 0.96728515625, 1.115234375, 1.26318359375, 1.4111328125, 1.55908203125, 1.70703125, 1.85498046875, 2.0029296875, 2.15087890625, 2.298828125, 2.44677734375, 2.5947265625, 2.74267578125, 2.890625, 3.03857421875, 3.1865234375, 3.33447265625, 3.482421875, 3.63037109375, 3.7783203125, 3.92626953125, 4.07421875]}, "gradients/decoder.transformer.h.11.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 2.0, 3.0, 2.0, 1.0, 0.0, 8.0, 9.0, 21.0, 20.0, 37.0, 54.0, 100.0, 147.0, 246.0, 295.0, 454.0, 720.0, 938.0, 1357.0, 1908.0, 2848.0, 4079.0, 6385.0, 9480.0, 14415.0, 22830.0, 36511.0, 60529.0, 103817.0, 232260.0, 1319677.0, 108643.0, 62866.0, 37931.0, 23492.0, 14996.0, 9774.0, 6525.0, 4335.0, 2895.0, 2011.0, 1361.0, 1034.0, 659.0, 486.0, 317.0, 217.0, 174.0, 102.0, 58.0, 33.0, 33.0, 20.0, 8.0, 9.0, 4.0, 5.0, 1.0, 3.0, 1.0, 3.0], "bins": [-0.1302490234375, -0.12630748748779297, -0.12236595153808594, -0.1184244155883789, -0.11448287963867188, -0.11054134368896484, -0.10659980773925781, -0.10265827178955078, -0.09871673583984375, -0.09477519989013672, -0.09083366394042969, -0.08689212799072266, -0.08295059204101562, -0.0790090560913086, -0.07506752014160156, -0.07112598419189453, -0.0671844482421875, -0.06324291229248047, -0.05930137634277344, -0.055359840393066406, -0.051418304443359375, -0.047476768493652344, -0.04353523254394531, -0.03959369659423828, -0.03565216064453125, -0.03171062469482422, -0.027769088745117188, -0.023827552795410156, -0.019886016845703125, -0.015944480895996094, -0.012002944946289062, -0.008061408996582031, -0.004119873046875, -0.00017833709716796875, 0.0037631988525390625, 0.007704734802246094, 0.011646270751953125, 0.015587806701660156, 0.019529342651367188, 0.02347087860107422, 0.02741241455078125, 0.03135395050048828, 0.03529548645019531, 0.039237022399902344, 0.043178558349609375, 0.047120094299316406, 0.05106163024902344, 0.05500316619873047, 0.0589447021484375, 0.06288623809814453, 0.06682777404785156, 0.0707693099975586, 0.07471084594726562, 0.07865238189697266, 0.08259391784667969, 0.08653545379638672, 0.09047698974609375, 0.09441852569580078, 0.09836006164550781, 0.10230159759521484, 0.10624313354492188, 0.1101846694946289, 0.11412620544433594, 0.11806774139404297, 0.12200927734375]}, "gradients/decoder.transformer.h.11.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 3.0, 3.0, 3.0, 3.0, 5.0, 7.0, 6.0, 11.0, 10.0, 14.0, 15.0, 19.0, 16.0, 32.0, 42.0, 43.0, 51.0, 85.0, 137.0, 159.0, 84.0, 66.0, 38.0, 35.0, 26.0, 25.0, 12.0, 15.0, 6.0, 8.0, 6.0, 5.0, 4.0, 3.0, 3.0, 3.0, 5.0, 1.0, 2.0, 1.0, 3.0, 1.0, 0.0, 1.0, 2.0], "bins": [-6.67572021484375e-06, -6.50063157081604e-06, -6.32554292678833e-06, -6.15045428276062e-06, -5.97536563873291e-06, -5.8002769947052e-06, -5.62518835067749e-06, -5.45009970664978e-06, -5.27501106262207e-06, -5.09992241859436e-06, -4.92483377456665e-06, -4.7497451305389404e-06, -4.5746564865112305e-06, -4.3995678424835205e-06, -4.2244791984558105e-06, -4.049390554428101e-06, -3.874301910400391e-06, -3.6992132663726807e-06, -3.5241246223449707e-06, -3.3490359783172607e-06, -3.1739473342895508e-06, -2.998858690261841e-06, -2.823770046234131e-06, -2.648681402206421e-06, -2.473592758178711e-06, -2.298504114151001e-06, -2.123415470123291e-06, -1.948326826095581e-06, -1.773238182067871e-06, -1.5981495380401611e-06, -1.4230608940124512e-06, -1.2479722499847412e-06, -1.0728836059570312e-06, -8.977949619293213e-07, -7.227063179016113e-07, -5.476176738739014e-07, -3.725290298461914e-07, -1.9744038581848145e-07, -2.2351741790771484e-08, 1.5273690223693848e-07, 3.2782554626464844e-07, 5.029141902923584e-07, 6.780028343200684e-07, 8.530914783477783e-07, 1.0281801223754883e-06, 1.2032687664031982e-06, 1.3783574104309082e-06, 1.5534460544586182e-06, 1.7285346984863281e-06, 1.903623342514038e-06, 2.078711986541748e-06, 2.253800630569458e-06, 2.428889274597168e-06, 2.603977918624878e-06, 2.779066562652588e-06, 2.954155206680298e-06, 3.129243850708008e-06, 3.3043324947357178e-06, 3.4794211387634277e-06, 3.6545097827911377e-06, 3.829598426818848e-06, 4.004687070846558e-06, 4.179775714874268e-06, 4.3548643589019775e-06, 4.5299530029296875e-06]}, "gradients/decoder.transformer.h.11.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 2.0, 4.0, 5.0, 2.0, 3.0, 2.0, 0.0, 2.0, 7.0, 3.0, 8.0, 16.0, 19.0, 22.0, 38.0, 53.0, 98.0, 231.0, 1009.0, 28730.0, 980378.0, 36135.0, 1241.0, 226.0, 117.0, 59.0, 54.0, 29.0, 11.0, 15.0, 10.0, 4.0, 13.0, 3.0, 3.0, 7.0, 4.0, 3.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-7.998943328857422e-05, -7.672794163227081e-05, -7.346644997596741e-05, -7.0204958319664e-05, -6.69434666633606e-05, -6.368197500705719e-05, -6.0420483350753784e-05, -5.715899169445038e-05, -5.389750003814697e-05, -5.063600838184357e-05, -4.737451672554016e-05, -4.4113025069236755e-05, -4.085153341293335e-05, -3.7590041756629944e-05, -3.432855010032654e-05, -3.106705844402313e-05, -2.7805566787719727e-05, -2.454407513141632e-05, -2.1282583475112915e-05, -1.802109181880951e-05, -1.4759600162506104e-05, -1.1498108506202698e-05, -8.236616849899292e-06, -4.975125193595886e-06, -1.7136335372924805e-06, 1.5478581190109253e-06, 4.809349775314331e-06, 8.070841431617737e-06, 1.1332333087921143e-05, 1.4593824744224548e-05, 1.7855316400527954e-05, 2.111680805683136e-05, 2.4378299713134766e-05, 2.763979136943817e-05, 3.090128302574158e-05, 3.416277468204498e-05, 3.742426633834839e-05, 4.0685757994651794e-05, 4.39472496509552e-05, 4.7208741307258606e-05, 5.047023296356201e-05, 5.373172461986542e-05, 5.699321627616882e-05, 6.025470793247223e-05, 6.351619958877563e-05, 6.677769124507904e-05, 7.003918290138245e-05, 7.330067455768585e-05, 7.656216621398926e-05, 7.982365787029266e-05, 8.308514952659607e-05, 8.634664118289948e-05, 8.960813283920288e-05, 9.286962449550629e-05, 9.613111615180969e-05, 9.93926078081131e-05, 0.0001026540994644165, 0.00010591559112071991, 0.00010917708277702332, 0.00011243857443332672, 0.00011570006608963013, 0.00011896155774593353, 0.00012222304940223694, 0.00012548454105854034, 0.00012874603271484375]}, "gradients/decoder.transformer.h.11.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 3.0, 8.0, 30.0, 76.0, 226.0, 302.0, 234.0, 91.0, 36.0, 7.0, 5.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.593897286511492e-05, -1.5580793842673302e-05, -1.5222616639221087e-05, -1.486443852627417e-05, -1.4506260413327254e-05, -1.4148081390885636e-05, -1.378990327793872e-05, -1.3431725164991803e-05, -1.3073547052044887e-05, -1.271536893909797e-05, -1.2357190826151054e-05, -1.1999012713204138e-05, -1.164083369076252e-05, -1.1282655577815603e-05, -1.0924477464868687e-05, -1.056629935192177e-05, -1.0208121238974854e-05, -9.849943126027938e-06, -9.491765013081022e-06, -9.133586900134105e-06, -8.775408787187189e-06, -8.41722976474557e-06, -8.059051651798654e-06, -7.700873538851738e-06, -7.342695425904822e-06, -6.984517312957905e-06, -6.626339200010989e-06, -6.268160632316722e-06, -5.909982519369805e-06, -5.551804406422889e-06, -5.193625838728622e-06, -4.835447725781705e-06, -4.47727006758214e-06, -4.119091954635223e-06, -3.7609136143146316e-06, -3.4027352739940397e-06, -3.0445571610471234e-06, -2.686379048100207e-06, -2.328200707779615e-06, -1.9700223674590234e-06, -1.611844254512107e-06, -1.253666027878353e-06, -8.954878012445988e-07, -5.373095746108447e-07, -1.7913134797709063e-07, 1.7904687865666347e-07, 5.372251052904176e-07, 8.954034456110094e-07, 1.2535815585579257e-06, 1.6117597851916798e-06, 1.969938011825434e-06, 2.3281163521460257e-06, 2.686294465092942e-06, 3.0444725780398585e-06, 3.4026509183604503e-06, 3.760829258681042e-06, 4.1190073716279585e-06, 4.477185484574875e-06, 4.835364052269142e-06, 5.1935421652160585e-06, 5.551720278162975e-06, 5.909898391109891e-06, 6.268076504056808e-06, 6.626255071751075e-06, 6.984433184697991e-06]}, "gradients/decoder.transformer.h.11.ln_cross_attn.bias": {"_type": "histogram", "values": [3.0, 0.0, 0.0, 3.0, 2.0, 2.0, 2.0, 3.0, 6.0, 3.0, 2.0, 7.0, 8.0, 2.0, 12.0, 8.0, 10.0, 9.0, 8.0, 10.0, 17.0, 38.0, 24.0, 22.0, 29.0, 24.0, 27.0, 27.0, 36.0, 34.0, 29.0, 41.0, 70.0, 33.0, 38.0, 34.0, 39.0, 36.0, 38.0, 26.0, 24.0, 28.0, 52.0, 24.0, 14.0, 16.0, 19.0, 9.0, 14.0, 10.0, 10.0, 7.0, 6.0, 8.0, 3.0, 2.0, 2.0, 5.0, 2.0, 0.0, 1.0, 1.0, 2.0, 3.0], "bins": [-2.1457672119140625e-06, -2.080574631690979e-06, -2.0153820514678955e-06, -1.950189471244812e-06, -1.8849968910217285e-06, -1.819804310798645e-06, -1.7546117305755615e-06, -1.689419150352478e-06, -1.6242265701293945e-06, -1.559033989906311e-06, -1.4938414096832275e-06, -1.428648829460144e-06, -1.3634562492370605e-06, -1.298263669013977e-06, -1.2330710887908936e-06, -1.16787850856781e-06, -1.1026859283447266e-06, -1.037493348121643e-06, -9.723007678985596e-07, -9.071081876754761e-07, -8.419156074523926e-07, -7.767230272293091e-07, -7.115304470062256e-07, -6.463378667831421e-07, -5.811452865600586e-07, -5.159527063369751e-07, -4.507601261138916e-07, -3.855675458908081e-07, -3.203749656677246e-07, -2.551823854446411e-07, -1.8998980522155762e-07, -1.2479722499847412e-07, -5.960464477539063e-08, 5.587935447692871e-09, 7.078051567077637e-08, 1.3597309589385986e-07, 2.0116567611694336e-07, 2.6635825634002686e-07, 3.3155083656311035e-07, 3.9674341678619385e-07, 4.6193599700927734e-07, 5.271285772323608e-07, 5.923211574554443e-07, 6.575137376785278e-07, 7.227063179016113e-07, 7.878988981246948e-07, 8.530914783477783e-07, 9.182840585708618e-07, 9.834766387939453e-07, 1.0486692190170288e-06, 1.1138617992401123e-06, 1.1790543794631958e-06, 1.2442469596862793e-06, 1.3094395399093628e-06, 1.3746321201324463e-06, 1.4398247003555298e-06, 1.5050172805786133e-06, 1.5702098608016968e-06, 1.6354024410247803e-06, 1.7005950212478638e-06, 1.7657876014709473e-06, 1.8309801816940308e-06, 1.8961727619171143e-06, 1.9613653421401978e-06, 2.0265579223632812e-06]}, "gradients/decoder.transformer.h.11.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 4.0, 1.0, 5.0, 5.0, 3.0, 5.0, 7.0, 8.0, 9.0, 10.0, 20.0, 15.0, 25.0, 30.0, 31.0, 27.0, 42.0, 40.0, 37.0, 31.0, 47.0, 34.0, 41.0, 41.0, 42.0, 50.0, 50.0, 37.0, 35.0, 46.0, 42.0, 23.0, 24.0, 32.0, 18.0, 20.0, 17.0, 12.0, 9.0, 7.0, 6.0, 8.0, 6.0, 6.0, 3.0, 1.0, 2.0, 3.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.78125, -5.5792236328125, -5.377197265625, -5.1751708984375, -4.97314453125, -4.7711181640625, -4.569091796875, -4.3670654296875, -4.1650390625, -3.9630126953125, -3.760986328125, -3.5589599609375, -3.35693359375, -3.1549072265625, -2.952880859375, -2.7508544921875, -2.548828125, -2.3468017578125, -2.144775390625, -1.9427490234375, -1.74072265625, -1.5386962890625, -1.336669921875, -1.1346435546875, -0.9326171875, -0.7305908203125, -0.528564453125, -0.3265380859375, -0.12451171875, 0.0775146484375, 0.279541015625, 0.4815673828125, 0.68359375, 0.8856201171875, 1.087646484375, 1.2896728515625, 1.49169921875, 1.6937255859375, 1.895751953125, 2.0977783203125, 2.2998046875, 2.5018310546875, 2.703857421875, 2.9058837890625, 3.10791015625, 3.3099365234375, 3.511962890625, 3.7139892578125, 3.916015625, 4.1180419921875, 4.320068359375, 4.5220947265625, 4.72412109375, 4.9261474609375, 5.128173828125, 5.3302001953125, 5.5322265625, 5.7342529296875, 5.936279296875, 6.1383056640625, 6.34033203125, 6.5423583984375, 6.744384765625, 6.9464111328125, 7.1484375]}, "gradients/decoder.transformer.h.11.attn.c_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 7.0, 1.0, 4.0, 10.0, 10.0, 14.0, 29.0, 45.0, 59.0, 100.0, 188.0, 333.0, 509.0, 874.0, 1593.0, 2786.0, 5205.0, 9484.0, 20024.0, 44882.0, 119888.0, 379871.0, 300169.0, 91509.0, 36110.0, 16269.0, 8372.0, 4440.0, 2380.0, 1438.0, 815.0, 501.0, 257.0, 154.0, 105.0, 58.0, 26.0, 23.0, 11.0, 3.0, 5.0, 1.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-8.1015625, -7.847412109375, -7.59326171875, -7.339111328125, -7.0849609375, -6.830810546875, -6.57666015625, -6.322509765625, -6.068359375, -5.814208984375, -5.56005859375, -5.305908203125, -5.0517578125, -4.797607421875, -4.54345703125, -4.289306640625, -4.03515625, -3.781005859375, -3.52685546875, -3.272705078125, -3.0185546875, -2.764404296875, -2.51025390625, -2.256103515625, -2.001953125, -1.747802734375, -1.49365234375, -1.239501953125, -0.9853515625, -0.731201171875, -0.47705078125, -0.222900390625, 0.03125, 0.285400390625, 0.53955078125, 0.793701171875, 1.0478515625, 1.302001953125, 1.55615234375, 1.810302734375, 2.064453125, 2.318603515625, 2.57275390625, 2.826904296875, 3.0810546875, 3.335205078125, 3.58935546875, 3.843505859375, 4.09765625, 4.351806640625, 4.60595703125, 4.860107421875, 5.1142578125, 5.368408203125, 5.62255859375, 5.876708984375, 6.130859375, 6.385009765625, 6.63916015625, 6.893310546875, 7.1474609375, 7.401611328125, 7.65576171875, 7.909912109375, 8.1640625]}, "gradients/decoder.transformer.h.11.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 2.0, 3.0, 3.0, 6.0, 1.0, 9.0, 6.0, 16.0, 15.0, 12.0, 10.0, 14.0, 26.0, 18.0, 26.0, 46.0, 31.0, 35.0, 44.0, 42.0, 62.0, 83.0, 221.0, 1539.0, 253.0, 111.0, 63.0, 47.0, 45.0, 33.0, 29.0, 30.0, 24.0, 30.0, 19.0, 24.0, 18.0, 14.0, 12.0, 10.0, 6.0, 8.0, 6.0, 4.0, 4.0, 3.0, 0.0, 0.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-19.953125, -19.27783203125, -18.6025390625, -17.92724609375, -17.251953125, -16.57666015625, -15.9013671875, -15.22607421875, -14.55078125, -13.87548828125, -13.2001953125, -12.52490234375, -11.849609375, -11.17431640625, -10.4990234375, -9.82373046875, -9.1484375, -8.47314453125, -7.7978515625, -7.12255859375, -6.447265625, -5.77197265625, -5.0966796875, -4.42138671875, -3.74609375, -3.07080078125, -2.3955078125, -1.72021484375, -1.044921875, -0.36962890625, 0.3056640625, 0.98095703125, 1.65625, 2.33154296875, 3.0068359375, 3.68212890625, 4.357421875, 5.03271484375, 5.7080078125, 6.38330078125, 7.05859375, 7.73388671875, 8.4091796875, 9.08447265625, 9.759765625, 10.43505859375, 11.1103515625, 11.78564453125, 12.4609375, 13.13623046875, 13.8115234375, 14.48681640625, 15.162109375, 15.83740234375, 16.5126953125, 17.18798828125, 17.86328125, 18.53857421875, 19.2138671875, 19.88916015625, 20.564453125, 21.23974609375, 21.9150390625, 22.59033203125, 23.265625]}, "gradients/decoder.transformer.h.11.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 4.0, 4.0, 8.0, 4.0, 9.0, 8.0, 13.0, 11.0, 16.0, 31.0, 30.0, 31.0, 60.0, 57.0, 102.0, 107.0, 190.0, 248.0, 464.0, 1976.0, 23540.0, 2728908.0, 379241.0, 8355.0, 1123.0, 385.0, 183.0, 152.0, 103.0, 88.0, 65.0, 49.0, 37.0, 24.0, 18.0, 12.0, 22.0, 13.0, 5.0, 10.0, 4.0, 1.0, 5.0, 2.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-49.5625, -48.13916015625, -46.7158203125, -45.29248046875, -43.869140625, -42.44580078125, -41.0224609375, -39.59912109375, -38.17578125, -36.75244140625, -35.3291015625, -33.90576171875, -32.482421875, -31.05908203125, -29.6357421875, -28.21240234375, -26.7890625, -25.36572265625, -23.9423828125, -22.51904296875, -21.095703125, -19.67236328125, -18.2490234375, -16.82568359375, -15.40234375, -13.97900390625, -12.5556640625, -11.13232421875, -9.708984375, -8.28564453125, -6.8623046875, -5.43896484375, -4.015625, -2.59228515625, -1.1689453125, 0.25439453125, 1.677734375, 3.10107421875, 4.5244140625, 5.94775390625, 7.37109375, 8.79443359375, 10.2177734375, 11.64111328125, 13.064453125, 14.48779296875, 15.9111328125, 17.33447265625, 18.7578125, 20.18115234375, 21.6044921875, 23.02783203125, 24.451171875, 25.87451171875, 27.2978515625, 28.72119140625, 30.14453125, 31.56787109375, 32.9912109375, 34.41455078125, 35.837890625, 37.26123046875, 38.6845703125, 40.10791015625, 41.53125]}, "gradients/decoder.transformer.h.11.ln_1.weight": {"_type": "histogram", "values": [173.0, 842.0, 3.0, 3.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-27.6108341217041, -4.602449417114258, 18.405935287475586, 41.41432189941406, 64.4227066040039, 87.43109130859375, 110.4394760131836, 133.44784545898438, 156.45623779296875, 179.46463012695312, 202.47300720214844, 225.48138427734375, 248.48977661132812, 271.4981689453125, 294.50653076171875, 317.5149230957031, 340.5233154296875, 363.5317077636719, 386.54010009765625, 409.5484619140625, 432.5568542480469, 455.56524658203125, 478.5736083984375, 501.5820007324219, 524.5903930664062, 547.5987548828125, 570.607177734375, 593.6155395507812, 616.6239013671875, 639.63232421875, 662.6406860351562, 685.6490478515625, 708.657470703125, 731.6658325195312, 754.6742553710938, 777.6826171875, 800.6910400390625, 823.6994018554688, 846.707763671875, 869.7161865234375, 892.7245483398438, 915.73291015625, 938.7413330078125, 961.7496948242188, 984.758056640625, 1007.7664794921875, 1030.77490234375, 1053.783203125, 1076.7916259765625, 1099.800048828125, 1122.808349609375, 1145.8167724609375, 1168.8251953125, 1191.83349609375, 1214.8419189453125, 1237.850341796875, 1260.858642578125, 1283.8670654296875, 1306.8753662109375, 1329.8837890625, 1352.8922119140625, 1375.9005126953125, 1398.908935546875, 1421.9173583984375, 1444.92578125]}, "gradients/decoder.transformer.h.11.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 3.0, 5.0, 3.0, 5.0, 2.0, 4.0, 10.0, 5.0, 7.0, 16.0, 15.0, 11.0, 10.0, 24.0, 22.0, 24.0, 32.0, 25.0, 36.0, 23.0, 37.0, 39.0, 51.0, 36.0, 53.0, 43.0, 46.0, 37.0, 35.0, 40.0, 45.0, 32.0, 23.0, 34.0, 23.0, 31.0, 14.0, 20.0, 22.0, 15.0, 10.0, 8.0, 3.0, 9.0, 6.0, 3.0, 6.0, 6.0, 3.0, 1.0, 0.0, 3.0, 1.0, 0.0, 1.0, 2.0, 1.0], "bins": [-56.919952392578125, -55.22909927368164, -53.538246154785156, -51.84739685058594, -50.15654373168945, -48.46569061279297, -46.77484130859375, -45.083988189697266, -43.39313507080078, -41.7022819519043, -40.01142883300781, -38.320579528808594, -36.62972640991211, -34.938873291015625, -33.248023986816406, -31.557170867919922, -29.866317749023438, -28.175464630126953, -26.4846134185791, -24.79376220703125, -23.102909088134766, -21.41205596923828, -19.72120475769043, -18.030353546142578, -16.339500427246094, -14.648648262023926, -12.957796096801758, -11.26694393157959, -9.576091766357422, -7.885239601135254, -6.194387435913086, -4.503535270690918, -2.81268310546875, -1.121830940246582, 0.5690212249755859, 2.259873390197754, 3.950725555419922, 5.64157772064209, 7.332429885864258, 9.023282051086426, 10.714134216308594, 12.404986381530762, 14.09583854675293, 15.786690711975098, 17.477542877197266, 19.16839599609375, 20.8592472076416, 22.550098419189453, 24.240951538085938, 25.931804656982422, 27.622655868530273, 29.313507080078125, 31.00436019897461, 32.695213317871094, 34.38606262207031, 36.0769157409668, 37.76776885986328, 39.458621978759766, 41.14947509765625, 42.84032440185547, 44.53117752075195, 46.22203063964844, 47.912879943847656, 49.60373306274414, 51.294586181640625]}, "gradients/decoder.transformer.h.10.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 3.0, 4.0, 2.0, 4.0, 4.0, 7.0, 4.0, 8.0, 11.0, 15.0, 12.0, 19.0, 24.0, 20.0, 27.0, 23.0, 25.0, 29.0, 41.0, 40.0, 37.0, 36.0, 39.0, 30.0, 39.0, 41.0, 45.0, 37.0, 35.0, 39.0, 40.0, 32.0, 36.0, 30.0, 33.0, 23.0, 17.0, 21.0, 12.0, 17.0, 12.0, 3.0, 10.0, 9.0, 4.0, 2.0, 5.0, 3.0, 3.0, 2.0, 2.0, 0.0, 0.0, 0.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.359375, -5.16778564453125, -4.9761962890625, -4.78460693359375, -4.593017578125, -4.40142822265625, -4.2098388671875, -4.01824951171875, -3.82666015625, -3.63507080078125, -3.4434814453125, -3.25189208984375, -3.060302734375, -2.86871337890625, -2.6771240234375, -2.48553466796875, -2.2939453125, -2.10235595703125, -1.9107666015625, -1.71917724609375, -1.527587890625, -1.33599853515625, -1.1444091796875, -0.95281982421875, -0.76123046875, -0.56964111328125, -0.3780517578125, -0.18646240234375, 0.005126953125, 0.19671630859375, 0.3883056640625, 0.57989501953125, 0.771484375, 0.96307373046875, 1.1546630859375, 1.34625244140625, 1.537841796875, 1.72943115234375, 1.9210205078125, 2.11260986328125, 2.30419921875, 2.49578857421875, 2.6873779296875, 2.87896728515625, 3.070556640625, 3.26214599609375, 3.4537353515625, 3.64532470703125, 3.8369140625, 4.02850341796875, 4.2200927734375, 4.41168212890625, 4.603271484375, 4.79486083984375, 4.9864501953125, 5.17803955078125, 5.36962890625, 5.56121826171875, 5.7528076171875, 5.94439697265625, 6.135986328125, 6.32757568359375, 6.5191650390625, 6.71075439453125, 6.90234375]}, "gradients/decoder.transformer.h.10.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 2.0, 3.0, 3.0, 2.0, 5.0, 6.0, 5.0, 7.0, 6.0, 12.0, 22.0, 16.0, 37.0, 40.0, 55.0, 83.0, 143.0, 231.0, 436.0, 999.0, 2275.0, 6050.0, 17625.0, 66048.0, 413707.0, 2724397.0, 820581.0, 103062.0, 24971.0, 8017.0, 3043.0, 1153.0, 461.0, 307.0, 152.0, 95.0, 57.0, 40.0, 37.0, 24.0, 13.0, 16.0, 13.0, 8.0, 8.0, 6.0, 5.0, 7.0, 2.0, 2.0, 0.0, 0.0, 2.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0], "bins": [-13.1015625, -12.6441650390625, -12.186767578125, -11.7293701171875, -11.27197265625, -10.8145751953125, -10.357177734375, -9.8997802734375, -9.4423828125, -8.9849853515625, -8.527587890625, -8.0701904296875, -7.61279296875, -7.1553955078125, -6.697998046875, -6.2406005859375, -5.783203125, -5.3258056640625, -4.868408203125, -4.4110107421875, -3.95361328125, -3.4962158203125, -3.038818359375, -2.5814208984375, -2.1240234375, -1.6666259765625, -1.209228515625, -0.7518310546875, -0.29443359375, 0.1629638671875, 0.620361328125, 1.0777587890625, 1.53515625, 1.9925537109375, 2.449951171875, 2.9073486328125, 3.36474609375, 3.8221435546875, 4.279541015625, 4.7369384765625, 5.1943359375, 5.6517333984375, 6.109130859375, 6.5665283203125, 7.02392578125, 7.4813232421875, 7.938720703125, 8.3961181640625, 8.853515625, 9.3109130859375, 9.768310546875, 10.2257080078125, 10.68310546875, 11.1405029296875, 11.597900390625, 12.0552978515625, 12.5126953125, 12.9700927734375, 13.427490234375, 13.8848876953125, 14.34228515625, 14.7996826171875, 15.257080078125, 15.7144775390625, 16.171875]}, "gradients/decoder.transformer.h.10.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 3.0, 1.0, 1.0, 5.0, 5.0, 9.0, 6.0, 12.0, 17.0, 23.0, 34.0, 62.0, 88.0, 131.0, 184.0, 318.0, 487.0, 708.0, 651.0, 456.0, 251.0, 199.0, 133.0, 77.0, 77.0, 41.0, 34.0, 25.0, 15.0, 8.0, 6.0, 5.0, 4.0, 6.0, 2.0, 2.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-11.59375, -11.1279296875, -10.662109375, -10.1962890625, -9.73046875, -9.2646484375, -8.798828125, -8.3330078125, -7.8671875, -7.4013671875, -6.935546875, -6.4697265625, -6.00390625, -5.5380859375, -5.072265625, -4.6064453125, -4.140625, -3.6748046875, -3.208984375, -2.7431640625, -2.27734375, -1.8115234375, -1.345703125, -0.8798828125, -0.4140625, 0.0517578125, 0.517578125, 0.9833984375, 1.44921875, 1.9150390625, 2.380859375, 2.8466796875, 3.3125, 3.7783203125, 4.244140625, 4.7099609375, 5.17578125, 5.6416015625, 6.107421875, 6.5732421875, 7.0390625, 7.5048828125, 7.970703125, 8.4365234375, 8.90234375, 9.3681640625, 9.833984375, 10.2998046875, 10.765625, 11.2314453125, 11.697265625, 12.1630859375, 12.62890625, 13.0947265625, 13.560546875, 14.0263671875, 14.4921875, 14.9580078125, 15.423828125, 15.8896484375, 16.35546875, 16.8212890625, 17.287109375, 17.7529296875, 18.21875]}, "gradients/decoder.transformer.h.10.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 4.0, 5.0, 4.0, 4.0, 0.0, 8.0, 12.0, 19.0, 21.0, 35.0, 47.0, 71.0, 100.0, 158.0, 278.0, 653.0, 2342.0, 38131.0, 3777250.0, 366161.0, 7066.0, 1047.0, 374.0, 190.0, 110.0, 79.0, 42.0, 20.0, 19.0, 11.0, 10.0, 7.0, 7.0, 2.0, 3.0, 4.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0], "bins": [-76.375, -74.453125, -72.53125, -70.609375, -68.6875, -66.765625, -64.84375, -62.921875, -61.0, -59.078125, -57.15625, -55.234375, -53.3125, -51.390625, -49.46875, -47.546875, -45.625, -43.703125, -41.78125, -39.859375, -37.9375, -36.015625, -34.09375, -32.171875, -30.25, -28.328125, -26.40625, -24.484375, -22.5625, -20.640625, -18.71875, -16.796875, -14.875, -12.953125, -11.03125, -9.109375, -7.1875, -5.265625, -3.34375, -1.421875, 0.5, 2.421875, 4.34375, 6.265625, 8.1875, 10.109375, 12.03125, 13.953125, 15.875, 17.796875, 19.71875, 21.640625, 23.5625, 25.484375, 27.40625, 29.328125, 31.25, 33.171875, 35.09375, 37.015625, 38.9375, 40.859375, 42.78125, 44.703125, 46.625]}, "gradients/decoder.transformer.h.10.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 3.0, 2.0, 5.0, 11.0, 21.0, 20.0, 40.0, 60.0, 93.0, 120.0, 135.0, 141.0, 109.0, 77.0, 69.0, 45.0, 23.0, 14.0, 12.0, 3.0, 5.0, 7.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-68.11187744140625, -65.82868194580078, -63.54548263549805, -61.26228332519531, -58.979087829589844, -56.69588851928711, -54.412689208984375, -52.129493713378906, -49.84629821777344, -47.5630989074707, -45.279903411865234, -42.9967041015625, -40.71350860595703, -38.4303092956543, -36.14710998535156, -33.863914489746094, -31.58071517944336, -29.297517776489258, -27.014320373535156, -24.731121063232422, -22.447925567626953, -20.16472625732422, -17.881528854370117, -15.598331451416016, -13.315134048461914, -11.031936645507812, -8.748739242553711, -6.465540885925293, -4.182343482971191, -1.8991460800170898, 0.3840522766113281, 2.6672496795654297, 4.950447082519531, 7.233644485473633, 9.516841888427734, 11.800040245056152, 14.083237648010254, 16.366436004638672, 18.649633407592773, 20.932830810546875, 23.216028213500977, 25.499225616455078, 27.78242301940918, 30.06562042236328, 32.348819732666016, 34.632015228271484, 36.91521453857422, 39.19841003417969, 41.48160934448242, 43.764808654785156, 46.048004150390625, 48.33120346069336, 50.61439895629883, 52.89759826660156, 55.18079376220703, 57.463993072509766, 59.7471923828125, 62.030391693115234, 64.31359100341797, 66.59678649902344, 68.8799819946289, 71.16317749023438, 73.44638061523438, 75.72957611083984, 78.01277160644531]}, "gradients/decoder.transformer.h.10.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 4.0, 2.0, 5.0, 5.0, 6.0, 7.0, 3.0, 8.0, 10.0, 18.0, 10.0, 15.0, 17.0, 28.0, 21.0, 32.0, 34.0, 33.0, 43.0, 38.0, 43.0, 36.0, 45.0, 60.0, 51.0, 47.0, 50.0, 41.0, 36.0, 34.0, 23.0, 32.0, 23.0, 26.0, 25.0, 14.0, 22.0, 10.0, 15.0, 8.0, 6.0, 6.0, 6.0, 3.0, 6.0, 4.0, 2.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-44.50270080566406, -43.19538879394531, -41.88807678222656, -40.58076477050781, -39.27345275878906, -37.96614074707031, -36.65882873535156, -35.35151672363281, -34.04420471191406, -32.73689270019531, -31.429580688476562, -30.122268676757812, -28.814956665039062, -27.507644653320312, -26.200332641601562, -24.893020629882812, -23.585708618164062, -22.278396606445312, -20.971084594726562, -19.663772583007812, -18.356460571289062, -17.049148559570312, -15.741836547851562, -14.434524536132812, -13.127212524414062, -11.819900512695312, -10.512588500976562, -9.205276489257812, -7.8979644775390625, -6.5906524658203125, -5.2833404541015625, -3.9760284423828125, -2.668712615966797, -1.3614006042480469, -0.054088592529296875, 1.2532234191894531, 2.560535430908203, 3.867847442626953, 5.175159454345703, 6.482471466064453, 7.789783477783203, 9.097095489501953, 10.404407501220703, 11.711719512939453, 13.019031524658203, 14.326343536376953, 15.633655548095703, 16.940967559814453, 18.248279571533203, 19.555591583251953, 20.862903594970703, 22.170215606689453, 23.477527618408203, 24.784839630126953, 26.092151641845703, 27.399463653564453, 28.706775665283203, 30.014087677001953, 31.321399688720703, 32.62871170043945, 33.9360237121582, 35.24333572387695, 36.5506477355957, 37.85795974731445, 39.1652717590332]}, "gradients/decoder.transformer.h.10.crossattention.c_proj.bias": {"_type": "histogram", "values": [2.0, 2.0, 5.0, 0.0, 6.0, 5.0, 4.0, 2.0, 7.0, 12.0, 11.0, 12.0, 14.0, 20.0, 21.0, 26.0, 26.0, 28.0, 36.0, 27.0, 31.0, 56.0, 42.0, 38.0, 37.0, 46.0, 27.0, 49.0, 36.0, 30.0, 48.0, 43.0, 40.0, 25.0, 32.0, 31.0, 22.0, 22.0, 20.0, 11.0, 14.0, 13.0, 7.0, 8.0, 3.0, 3.0, 4.0, 2.0, 6.0, 2.0, 3.0, 0.0, 3.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.328125, -5.124267578125, -4.92041015625, -4.716552734375, -4.5126953125, -4.308837890625, -4.10498046875, -3.901123046875, -3.697265625, -3.493408203125, -3.28955078125, -3.085693359375, -2.8818359375, -2.677978515625, -2.47412109375, -2.270263671875, -2.06640625, -1.862548828125, -1.65869140625, -1.454833984375, -1.2509765625, -1.047119140625, -0.84326171875, -0.639404296875, -0.435546875, -0.231689453125, -0.02783203125, 0.176025390625, 0.3798828125, 0.583740234375, 0.78759765625, 0.991455078125, 1.1953125, 1.399169921875, 1.60302734375, 1.806884765625, 2.0107421875, 2.214599609375, 2.41845703125, 2.622314453125, 2.826171875, 3.030029296875, 3.23388671875, 3.437744140625, 3.6416015625, 3.845458984375, 4.04931640625, 4.253173828125, 4.45703125, 4.660888671875, 4.86474609375, 5.068603515625, 5.2724609375, 5.476318359375, 5.68017578125, 5.884033203125, 6.087890625, 6.291748046875, 6.49560546875, 6.699462890625, 6.9033203125, 7.107177734375, 7.31103515625, 7.514892578125, 7.71875]}, "gradients/decoder.transformer.h.10.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 4.0, 1.0, 0.0, 3.0, 5.0, 8.0, 7.0, 18.0, 13.0, 35.0, 30.0, 69.0, 89.0, 145.0, 206.0, 279.0, 436.0, 659.0, 926.0, 1430.0, 2154.0, 3284.0, 5202.0, 8201.0, 13001.0, 20954.0, 34510.0, 58907.0, 108090.0, 260160.0, 267448.0, 110188.0, 59552.0, 34735.0, 21144.0, 13044.0, 8277.0, 5399.0, 3355.0, 2239.0, 1462.0, 975.0, 638.0, 446.0, 282.0, 183.0, 132.0, 74.0, 60.0, 42.0, 25.0, 16.0, 11.0, 8.0, 4.0, 2.0, 2.0, 3.0, 1.0], "bins": [-0.275390625, -0.26752471923828125, -0.2596588134765625, -0.25179290771484375, -0.243927001953125, -0.23606109619140625, -0.2281951904296875, -0.22032928466796875, -0.21246337890625, -0.20459747314453125, -0.1967315673828125, -0.18886566162109375, -0.180999755859375, -0.17313385009765625, -0.1652679443359375, -0.15740203857421875, -0.1495361328125, -0.14167022705078125, -0.1338043212890625, -0.12593841552734375, -0.118072509765625, -0.11020660400390625, -0.1023406982421875, -0.09447479248046875, -0.08660888671875, -0.07874298095703125, -0.0708770751953125, -0.06301116943359375, -0.055145263671875, -0.04727935791015625, -0.0394134521484375, -0.03154754638671875, -0.023681640625, -0.01581573486328125, -0.0079498291015625, -8.392333984375e-05, 0.007781982421875, 0.01564788818359375, 0.0235137939453125, 0.03137969970703125, 0.03924560546875, 0.04711151123046875, 0.0549774169921875, 0.06284332275390625, 0.070709228515625, 0.07857513427734375, 0.0864410400390625, 0.09430694580078125, 0.1021728515625, 0.11003875732421875, 0.1179046630859375, 0.12577056884765625, 0.133636474609375, 0.14150238037109375, 0.1493682861328125, 0.15723419189453125, 0.16510009765625, 0.17296600341796875, 0.1808319091796875, 0.18869781494140625, 0.196563720703125, 0.20442962646484375, 0.2122955322265625, 0.22016143798828125, 0.22802734375]}, "gradients/decoder.transformer.h.10.crossattention.c_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 2.0, 1.0, 1.0, 2.0, 1.0, 3.0, 2.0, 6.0, 3.0, 7.0, 14.0, 13.0, 12.0, 12.0, 14.0, 16.0, 15.0, 26.0, 23.0, 30.0, 27.0, 25.0, 27.0, 38.0, 31.0, 36.0, 31.0, 39.0, 1054.0, 52.0, 47.0, 39.0, 36.0, 33.0, 35.0, 23.0, 32.0, 35.0, 21.0, 19.0, 21.0, 20.0, 15.0, 20.0, 16.0, 8.0, 7.0, 12.0, 13.0, 10.0, 4.0, 2.0, 5.0, 3.0, 3.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0], "bins": [-4.25390625, -4.11517333984375, -3.9764404296875, -3.83770751953125, -3.698974609375, -3.56024169921875, -3.4215087890625, -3.28277587890625, -3.14404296875, -3.00531005859375, -2.8665771484375, -2.72784423828125, -2.589111328125, -2.45037841796875, -2.3116455078125, -2.17291259765625, -2.0341796875, -1.89544677734375, -1.7567138671875, -1.61798095703125, -1.479248046875, -1.34051513671875, -1.2017822265625, -1.06304931640625, -0.92431640625, -0.78558349609375, -0.6468505859375, -0.50811767578125, -0.369384765625, -0.23065185546875, -0.0919189453125, 0.04681396484375, 0.185546875, 0.32427978515625, 0.4630126953125, 0.60174560546875, 0.740478515625, 0.87921142578125, 1.0179443359375, 1.15667724609375, 1.29541015625, 1.43414306640625, 1.5728759765625, 1.71160888671875, 1.850341796875, 1.98907470703125, 2.1278076171875, 2.26654052734375, 2.4052734375, 2.54400634765625, 2.6827392578125, 2.82147216796875, 2.960205078125, 3.09893798828125, 3.2376708984375, 3.37640380859375, 3.51513671875, 3.65386962890625, 3.7926025390625, 3.93133544921875, 4.070068359375, 4.20880126953125, 4.3475341796875, 4.48626708984375, 4.625]}, "gradients/decoder.transformer.h.10.crossattention.c_attn.weight": {"_type": "histogram", "values": [6.0, 6.0, 9.0, 12.0, 25.0, 21.0, 32.0, 44.0, 77.0, 95.0, 122.0, 188.0, 269.0, 413.0, 545.0, 771.0, 1022.0, 1434.0, 1945.0, 2598.0, 3683.0, 5114.0, 6919.0, 10039.0, 14275.0, 19925.0, 29131.0, 42643.0, 63949.0, 101620.0, 1281575.0, 211466.0, 98290.0, 61814.0, 41508.0, 27787.0, 19464.0, 13703.0, 9813.0, 6927.0, 4888.0, 3540.0, 2579.0, 1965.0, 1439.0, 1003.0, 681.0, 506.0, 371.0, 288.0, 162.0, 134.0, 98.0, 81.0, 43.0, 34.0, 15.0, 9.0, 15.0, 10.0, 6.0, 2.0, 2.0, 2.0], "bins": [-0.1046142578125, -0.10123634338378906, -0.09785842895507812, -0.09448051452636719, -0.09110260009765625, -0.08772468566894531, -0.08434677124023438, -0.08096885681152344, -0.0775909423828125, -0.07421302795410156, -0.07083511352539062, -0.06745719909667969, -0.06407928466796875, -0.06070137023925781, -0.057323455810546875, -0.05394554138183594, -0.050567626953125, -0.04718971252441406, -0.043811798095703125, -0.04043388366699219, -0.03705596923828125, -0.03367805480957031, -0.030300140380859375, -0.026922225952148438, -0.0235443115234375, -0.020166397094726562, -0.016788482666015625, -0.013410568237304688, -0.01003265380859375, -0.0066547393798828125, -0.003276824951171875, 0.0001010894775390625, 0.00347900390625, 0.0068569183349609375, 0.010234832763671875, 0.013612747192382812, 0.01699066162109375, 0.020368576049804688, 0.023746490478515625, 0.027124404907226562, 0.0305023193359375, 0.03388023376464844, 0.037258148193359375, 0.04063606262207031, 0.04401397705078125, 0.04739189147949219, 0.050769805908203125, 0.05414772033691406, 0.057525634765625, 0.06090354919433594, 0.06428146362304688, 0.06765937805175781, 0.07103729248046875, 0.07441520690917969, 0.07779312133789062, 0.08117103576660156, 0.0845489501953125, 0.08792686462402344, 0.09130477905273438, 0.09468269348144531, 0.09806060791015625, 0.10143852233886719, 0.10481643676757812, 0.10819435119628906, 0.111572265625]}, "gradients/decoder.transformer.h.10.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 3.0, 0.0, 4.0, 2.0, 2.0, 4.0, 3.0, 8.0, 1.0, 2.0, 0.0, 6.0, 2.0, 5.0, 12.0, 17.0, 20.0, 4.0, 23.0, 7.0, 31.0, 18.0, 38.0, 31.0, 53.0, 32.0, 84.0, 143.0, 52.0, 96.0, 41.0, 59.0, 14.0, 35.0, 18.0, 26.0, 10.0, 20.0, 14.0, 11.0, 10.0, 5.0, 17.0, 8.0, 9.0, 3.0, 3.0, 1.0, 4.0, 2.0, 0.0, 1.0, 3.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0], "bins": [-3.039836883544922e-06, -2.9476359486579895e-06, -2.855435013771057e-06, -2.7632340788841248e-06, -2.6710331439971924e-06, -2.57883220911026e-06, -2.4866312742233276e-06, -2.3944303393363953e-06, -2.302229404449463e-06, -2.2100284695625305e-06, -2.117827534675598e-06, -2.0256265997886658e-06, -1.9334256649017334e-06, -1.841224730014801e-06, -1.7490237951278687e-06, -1.6568228602409363e-06, -1.564621925354004e-06, -1.4724209904670715e-06, -1.3802200555801392e-06, -1.2880191206932068e-06, -1.1958181858062744e-06, -1.103617250919342e-06, -1.0114163160324097e-06, -9.192153811454773e-07, -8.270144462585449e-07, -7.348135113716125e-07, -6.426125764846802e-07, -5.504116415977478e-07, -4.5821070671081543e-07, -3.6600977182388306e-07, -2.738088369369507e-07, -1.816079020500183e-07, -8.940696716308594e-08, 2.7939677238464355e-09, 9.499490261077881e-08, 1.8719583749771118e-07, 2.7939677238464355e-07, 3.7159770727157593e-07, 4.637986421585083e-07, 5.559995770454407e-07, 6.48200511932373e-07, 7.404014468193054e-07, 8.326023817062378e-07, 9.248033165931702e-07, 1.0170042514801025e-06, 1.109205186367035e-06, 1.2014061212539673e-06, 1.2936070561408997e-06, 1.385807991027832e-06, 1.4780089259147644e-06, 1.5702098608016968e-06, 1.6624107956886292e-06, 1.7546117305755615e-06, 1.846812665462494e-06, 1.9390136003494263e-06, 2.0312145352363586e-06, 2.123415470123291e-06, 2.2156164050102234e-06, 2.3078173398971558e-06, 2.400018274784088e-06, 2.4922192096710205e-06, 2.584420144557953e-06, 2.6766210794448853e-06, 2.7688220143318176e-06, 2.86102294921875e-06]}, "gradients/decoder.transformer.h.10.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 3.0, 4.0, 5.0, 3.0, 11.0, 9.0, 11.0, 10.0, 14.0, 10.0, 15.0, 28.0, 23.0, 35.0, 70.0, 135.0, 727.0, 12180.0, 794663.0, 234211.0, 5599.0, 432.0, 129.0, 56.0, 37.0, 25.0, 27.0, 12.0, 16.0, 12.0, 11.0, 8.0, 9.0, 7.0, 5.0, 5.0, 2.0, 4.0, 2.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-7.516145706176758e-05, -7.264595478773117e-05, -7.013045251369476e-05, -6.761495023965836e-05, -6.509944796562195e-05, -6.258394569158554e-05, -6.006844341754913e-05, -5.7552941143512726e-05, -5.503743886947632e-05, -5.252193659543991e-05, -5.0006434321403503e-05, -4.7490932047367096e-05, -4.497542977333069e-05, -4.245992749929428e-05, -3.9944425225257874e-05, -3.7428922951221466e-05, -3.491342067718506e-05, -3.239791840314865e-05, -2.9882416129112244e-05, -2.7366913855075836e-05, -2.485141158103943e-05, -2.233590930700302e-05, -1.9820407032966614e-05, -1.7304904758930206e-05, -1.4789402484893799e-05, -1.2273900210857391e-05, -9.758397936820984e-06, -7.242895662784576e-06, -4.727393388748169e-06, -2.2118911147117615e-06, 3.03611159324646e-07, 2.8191134333610535e-06, 5.334615707397461e-06, 7.850117981433868e-06, 1.0365620255470276e-05, 1.2881122529506683e-05, 1.539662480354309e-05, 1.7912127077579498e-05, 2.0427629351615906e-05, 2.2943131625652313e-05, 2.545863389968872e-05, 2.7974136173725128e-05, 3.0489638447761536e-05, 3.300514072179794e-05, 3.552064299583435e-05, 3.803614526987076e-05, 4.0551647543907166e-05, 4.306714981794357e-05, 4.558265209197998e-05, 4.809815436601639e-05, 5.0613656640052795e-05, 5.31291589140892e-05, 5.564466118812561e-05, 5.816016346216202e-05, 6.0675665736198425e-05, 6.319116801023483e-05, 6.570667028427124e-05, 6.822217255830765e-05, 7.073767483234406e-05, 7.325317710638046e-05, 7.576867938041687e-05, 7.828418165445328e-05, 8.079968392848969e-05, 8.331518620252609e-05, 8.58306884765625e-05]}, "gradients/decoder.transformer.h.10.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 3.0, 1.0, 3.0, 10.0, 15.0, 23.0, 38.0, 75.0, 118.0, 161.0, 164.0, 143.0, 99.0, 69.0, 39.0, 20.0, 15.0, 12.0, 2.0, 4.0, 0.0, 3.0, 0.0, 0.0, 1.0], "bins": [-8.647355571156368e-06, -8.472243280266412e-06, -8.297131898871157e-06, -8.122019607981201e-06, -7.946908226585947e-06, -7.771795935695991e-06, -7.596684554300737e-06, -7.421572263410781e-06, -7.246460427268175e-06, -7.07134859112557e-06, -6.896236754982965e-06, -6.72112491884036e-06, -6.546013082697755e-06, -6.3709012465551496e-06, -6.1957889556651935e-06, -6.020677119522588e-06, -5.845565283379983e-06, -5.670453447237378e-06, -5.495341611094773e-06, -5.320229774952168e-06, -5.1451179388095625e-06, -4.970005647919606e-06, -4.794894266524352e-06, -4.619781975634396e-06, -4.444670594239142e-06, -4.269558758096537e-06, -4.0944469219539315e-06, -3.919335085811326e-06, -3.7442230222950457e-06, -3.5691111861524405e-06, -3.3939993500098353e-06, -3.2188872864935547e-06, -3.0437754503509495e-06, -2.8686636142083444e-06, -2.693551778065739e-06, -2.518439941923134e-06, -2.3433278784068534e-06, -2.1682160422642482e-06, -1.993104206121643e-06, -1.8179922562922002e-06, -1.642880420149595e-06, -1.4677685840069898e-06, -1.292656634177547e-06, -1.1175447980349418e-06, -9.424329050489177e-07, -7.673210120628937e-07, -5.922091759202885e-07, -4.1709722609084565e-07, -2.419853899482405e-07, -6.687351117307117e-08, 1.0823836760209815e-07, 2.8335023216641275e-07, 4.584621251524368e-07, 6.335740181384608e-07, 8.08685854281066e-07, 9.837978041105089e-07, 1.158909640253114e-06, 1.3340214763957192e-06, 1.5091334262251621e-06, 1.6842452623677673e-06, 1.8593570985103725e-06, 2.034469162026653e-06, 2.2095809981692582e-06, 2.3846928343118634e-06, 2.5598046704544686e-06]}, "gradients/decoder.transformer.h.10.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0, 2.0, 0.0, 9.0, 3.0, 6.0, 7.0, 10.0, 13.0, 0.0, 12.0, 19.0, 26.0, 19.0, 38.0, 0.0, 33.0, 32.0, 40.0, 44.0, 56.0, 0.0, 49.0, 42.0, 49.0, 46.0, 39.0, 46.0, 0.0, 54.0, 38.0, 44.0, 28.0, 30.0, 0.0, 28.0, 30.0, 28.0, 22.0, 17.0, 17.0, 0.0, 7.0, 9.0, 9.0, 5.0, 6.0, 0.0, 0.0, 3.0, 1.0, 1.0, 0.0, 1.0], "bins": [-1.7881393432617188e-06, -1.737847924232483e-06, -1.687556505203247e-06, -1.6372650861740112e-06, -1.5869736671447754e-06, -1.5366822481155396e-06, -1.4863908290863037e-06, -1.4360994100570679e-06, -1.385807991027832e-06, -1.3355165719985962e-06, -1.2852251529693604e-06, -1.2349337339401245e-06, -1.1846423149108887e-06, -1.1343508958816528e-06, -1.084059476852417e-06, -1.0337680578231812e-06, -9.834766387939453e-07, -9.331852197647095e-07, -8.828938007354736e-07, -8.326023817062378e-07, -7.82310962677002e-07, -7.320195436477661e-07, -6.817281246185303e-07, -6.314367055892944e-07, -5.811452865600586e-07, -5.308538675308228e-07, -4.805624485015869e-07, -4.302710294723511e-07, -3.7997961044311523e-07, -3.296881914138794e-07, -2.7939677238464355e-07, -2.2910535335540771e-07, -1.7881393432617188e-07, -1.2852251529693604e-07, -7.82310962677002e-08, -2.7939677238464355e-08, 2.2351741790771484e-08, 7.264316082000732e-08, 1.2293457984924316e-07, 1.73225998878479e-07, 2.2351741790771484e-07, 2.738088369369507e-07, 3.241002559661865e-07, 3.7439167499542236e-07, 4.246830940246582e-07, 4.7497451305389404e-07, 5.252659320831299e-07, 5.755573511123657e-07, 6.258487701416016e-07, 6.761401891708374e-07, 7.264316082000732e-07, 7.767230272293091e-07, 8.270144462585449e-07, 8.773058652877808e-07, 9.275972843170166e-07, 9.778887033462524e-07, 1.0281801223754883e-06, 1.0784715414047241e-06, 1.12876296043396e-06, 1.1790543794631958e-06, 1.2293457984924316e-06, 1.2796372175216675e-06, 1.3299286365509033e-06, 1.3802200555801392e-06, 1.430511474609375e-06]}, "gradients/decoder.transformer.h.10.attn.c_proj.bias": {"_type": "histogram", "values": [2.0, 2.0, 5.0, 0.0, 6.0, 5.0, 4.0, 2.0, 7.0, 12.0, 11.0, 12.0, 14.0, 20.0, 21.0, 26.0, 26.0, 28.0, 36.0, 27.0, 31.0, 56.0, 42.0, 38.0, 37.0, 46.0, 27.0, 49.0, 36.0, 30.0, 48.0, 43.0, 40.0, 25.0, 32.0, 31.0, 22.0, 22.0, 20.0, 11.0, 14.0, 13.0, 7.0, 8.0, 3.0, 3.0, 4.0, 2.0, 6.0, 2.0, 3.0, 0.0, 3.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.328125, -5.124267578125, -4.92041015625, -4.716552734375, -4.5126953125, -4.308837890625, -4.10498046875, -3.901123046875, -3.697265625, -3.493408203125, -3.28955078125, -3.085693359375, -2.8818359375, -2.677978515625, -2.47412109375, -2.270263671875, -2.06640625, -1.862548828125, -1.65869140625, -1.454833984375, -1.2509765625, -1.047119140625, -0.84326171875, -0.639404296875, -0.435546875, -0.231689453125, -0.02783203125, 0.176025390625, 0.3798828125, 0.583740234375, 0.78759765625, 0.991455078125, 1.1953125, 1.399169921875, 1.60302734375, 1.806884765625, 2.0107421875, 2.214599609375, 2.41845703125, 2.622314453125, 2.826171875, 3.030029296875, 3.23388671875, 3.437744140625, 3.6416015625, 3.845458984375, 4.04931640625, 4.253173828125, 4.45703125, 4.660888671875, 4.86474609375, 5.068603515625, 5.2724609375, 5.476318359375, 5.68017578125, 5.884033203125, 6.087890625, 6.291748046875, 6.49560546875, 6.699462890625, 6.9033203125, 7.107177734375, 7.31103515625, 7.514892578125, 7.71875]}, "gradients/decoder.transformer.h.10.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0, 2.0, 1.0, 4.0, 7.0, 4.0, 12.0, 25.0, 26.0, 31.0, 50.0, 93.0, 161.0, 233.0, 436.0, 696.0, 1291.0, 2494.0, 5139.0, 11906.0, 31502.0, 110427.0, 507741.0, 274980.0, 64071.0, 20868.0, 8504.0, 3745.0, 1798.0, 961.0, 528.0, 307.0, 173.0, 128.0, 70.0, 46.0, 33.0, 22.0, 19.0, 13.0, 8.0, 4.0, 2.0, 1.0, 1.0, 1.0, 2.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0], "bins": [-8.359375, -8.1044921875, -7.849609375, -7.5947265625, -7.33984375, -7.0849609375, -6.830078125, -6.5751953125, -6.3203125, -6.0654296875, -5.810546875, -5.5556640625, -5.30078125, -5.0458984375, -4.791015625, -4.5361328125, -4.28125, -4.0263671875, -3.771484375, -3.5166015625, -3.26171875, -3.0068359375, -2.751953125, -2.4970703125, -2.2421875, -1.9873046875, -1.732421875, -1.4775390625, -1.22265625, -0.9677734375, -0.712890625, -0.4580078125, -0.203125, 0.0517578125, 0.306640625, 0.5615234375, 0.81640625, 1.0712890625, 1.326171875, 1.5810546875, 1.8359375, 2.0908203125, 2.345703125, 2.6005859375, 2.85546875, 3.1103515625, 3.365234375, 3.6201171875, 3.875, 4.1298828125, 4.384765625, 4.6396484375, 4.89453125, 5.1494140625, 5.404296875, 5.6591796875, 5.9140625, 6.1689453125, 6.423828125, 6.6787109375, 6.93359375, 7.1884765625, 7.443359375, 7.6982421875, 7.953125]}, "gradients/decoder.transformer.h.10.attn.c_attn.bias": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 0.0, 0.0, 3.0, 3.0, 3.0, 5.0, 10.0, 6.0, 10.0, 9.0, 15.0, 21.0, 21.0, 15.0, 17.0, 41.0, 32.0, 31.0, 39.0, 33.0, 55.0, 50.0, 86.0, 99.0, 354.0, 1495.0, 128.0, 89.0, 53.0, 43.0, 30.0, 30.0, 27.0, 27.0, 23.0, 25.0, 24.0, 21.0, 12.0, 14.0, 19.0, 8.0, 9.0, 4.0, 7.0, 6.0, 4.0, 4.0, 0.0, 0.0, 1.0, 2.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-16.234375, -15.65576171875, -15.0771484375, -14.49853515625, -13.919921875, -13.34130859375, -12.7626953125, -12.18408203125, -11.60546875, -11.02685546875, -10.4482421875, -9.86962890625, -9.291015625, -8.71240234375, -8.1337890625, -7.55517578125, -6.9765625, -6.39794921875, -5.8193359375, -5.24072265625, -4.662109375, -4.08349609375, -3.5048828125, -2.92626953125, -2.34765625, -1.76904296875, -1.1904296875, -0.61181640625, -0.033203125, 0.54541015625, 1.1240234375, 1.70263671875, 2.28125, 2.85986328125, 3.4384765625, 4.01708984375, 4.595703125, 5.17431640625, 5.7529296875, 6.33154296875, 6.91015625, 7.48876953125, 8.0673828125, 8.64599609375, 9.224609375, 9.80322265625, 10.3818359375, 10.96044921875, 11.5390625, 12.11767578125, 12.6962890625, 13.27490234375, 13.853515625, 14.43212890625, 15.0107421875, 15.58935546875, 16.16796875, 16.74658203125, 17.3251953125, 17.90380859375, 18.482421875, 19.06103515625, 19.6396484375, 20.21826171875, 20.796875]}, "gradients/decoder.transformer.h.10.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 0.0, 3.0, 3.0, 1.0, 3.0, 6.0, 6.0, 11.0, 8.0, 11.0, 21.0, 25.0, 27.0, 43.0, 54.0, 68.0, 96.0, 139.0, 230.0, 431.0, 917.0, 2766.0, 26414.0, 2882912.0, 220884.0, 7437.0, 1524.0, 689.0, 356.0, 192.0, 125.0, 83.0, 61.0, 33.0, 45.0, 26.0, 16.0, 14.0, 10.0, 7.0, 4.0, 5.0, 4.0, 1.0, 2.0, 2.0, 1.0, 2.0, 0.0, 1.0, 1.0, 1.0, 1.0], "bins": [-42.25, -41.03466796875, -39.8193359375, -38.60400390625, -37.388671875, -36.17333984375, -34.9580078125, -33.74267578125, -32.52734375, -31.31201171875, -30.0966796875, -28.88134765625, -27.666015625, -26.45068359375, -25.2353515625, -24.02001953125, -22.8046875, -21.58935546875, -20.3740234375, -19.15869140625, -17.943359375, -16.72802734375, -15.5126953125, -14.29736328125, -13.08203125, -11.86669921875, -10.6513671875, -9.43603515625, -8.220703125, -7.00537109375, -5.7900390625, -4.57470703125, -3.359375, -2.14404296875, -0.9287109375, 0.28662109375, 1.501953125, 2.71728515625, 3.9326171875, 5.14794921875, 6.36328125, 7.57861328125, 8.7939453125, 10.00927734375, 11.224609375, 12.43994140625, 13.6552734375, 14.87060546875, 16.0859375, 17.30126953125, 18.5166015625, 19.73193359375, 20.947265625, 22.16259765625, 23.3779296875, 24.59326171875, 25.80859375, 27.02392578125, 28.2392578125, 29.45458984375, 30.669921875, 31.88525390625, 33.1005859375, 34.31591796875, 35.53125]}, "gradients/decoder.transformer.h.10.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 5.0, 10.0, 69.0, 300.0, 447.0, 158.0, 25.0, 4.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-89.14562225341797, -85.30768585205078, -81.46975708007812, -77.63182067871094, -73.79389190673828, -69.9559555053711, -66.11802673339844, -62.28009033203125, -58.44215774536133, -54.604225158691406, -50.766292572021484, -46.92835998535156, -43.090423583984375, -39.25249481201172, -35.41455841064453, -31.57662582397461, -27.738693237304688, -23.900760650634766, -20.062828063964844, -16.22489356994629, -12.386960983276367, -8.549028396606445, -4.711093902587891, -0.8731613159179688, 2.964771270751953, 6.802704334259033, 10.640637397766113, 14.478570938110352, 18.316503524780273, 22.154436111450195, 25.99237060546875, 29.830303192138672, 33.668243408203125, 37.50617599487305, 41.34410858154297, 45.182044982910156, 49.01997375488281, 52.85791015625, 56.69584274291992, 60.533775329589844, 64.3717041015625, 68.20964050292969, 72.04756927490234, 75.88550567626953, 79.72343444824219, 83.56137084960938, 87.39930725097656, 91.23723602294922, 95.0751724243164, 98.9131088256836, 102.75103759765625, 106.58897399902344, 110.4269027709961, 114.26483917236328, 118.10276794433594, 121.94070434570312, 125.77864074707031, 129.6165771484375, 133.4545135498047, 137.2924346923828, 141.13037109375, 144.9683074951172, 148.80624389648438, 152.6441650390625, 156.4821014404297]}, "gradients/decoder.transformer.h.10.ln_1.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 6.0, 2.0, 6.0, 1.0, 4.0, 10.0, 6.0, 11.0, 11.0, 13.0, 13.0, 18.0, 14.0, 20.0, 26.0, 31.0, 29.0, 26.0, 34.0, 34.0, 36.0, 28.0, 45.0, 37.0, 41.0, 30.0, 36.0, 41.0, 30.0, 26.0, 33.0, 36.0, 34.0, 24.0, 20.0, 17.0, 36.0, 20.0, 21.0, 17.0, 10.0, 16.0, 15.0, 8.0, 7.0, 4.0, 8.0, 6.0, 4.0, 6.0, 0.0, 2.0, 6.0, 0.0, 1.0, 0.0, 1.0, 1.0, 2.0, 2.0], "bins": [-48.067718505859375, -46.505489349365234, -44.94326400756836, -43.38103485107422, -41.81880569458008, -40.25657653808594, -38.69435119628906, -37.13212203979492, -35.56989288330078, -34.00766372680664, -32.445438385009766, -30.883209228515625, -29.320980072021484, -27.758752822875977, -26.19652557373047, -24.634296417236328, -23.072071075439453, -21.509843826293945, -19.947614669799805, -18.385387420654297, -16.823158264160156, -15.260931015014648, -13.69870376586914, -12.136475563049316, -10.574247360229492, -9.012019157409668, -7.449791431427002, -5.887563705444336, -4.325335502624512, -2.7631072998046875, -1.2008800506591797, 0.36134815216064453, 1.9235763549804688, 3.485804319381714, 5.048032283782959, 6.610260009765625, 8.17248821258545, 9.734716415405273, 11.296943664550781, 12.859171867370605, 14.42140007019043, 15.983628273010254, 17.545856475830078, 19.108083724975586, 20.670310974121094, 22.232540130615234, 23.794767379760742, 25.35699462890625, 26.91922378540039, 28.4814510345459, 30.04368019104004, 31.605907440185547, 33.16813659667969, 34.73036193847656, 36.2925910949707, 37.854820251464844, 39.41704559326172, 40.97927474975586, 42.541500091552734, 44.103729248046875, 45.665958404541016, 47.228187561035156, 48.79041290283203, 50.35264205932617, 51.91487121582031]}, "gradients/decoder.transformer.h.9.mlp.c_proj.bias": {"_type": "histogram", "values": [3.0, 1.0, 6.0, 2.0, 4.0, 6.0, 4.0, 2.0, 7.0, 12.0, 12.0, 23.0, 9.0, 23.0, 19.0, 24.0, 21.0, 32.0, 39.0, 21.0, 41.0, 53.0, 40.0, 41.0, 32.0, 44.0, 39.0, 38.0, 39.0, 41.0, 44.0, 44.0, 25.0, 34.0, 29.0, 28.0, 23.0, 16.0, 18.0, 20.0, 15.0, 8.0, 6.0, 5.0, 3.0, 6.0, 8.0, 1.0, 3.0, 2.0, 1.0, 2.0, 0.0, 0.0, 2.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.30078125, -5.09564208984375, -4.8905029296875, -4.68536376953125, -4.480224609375, -4.27508544921875, -4.0699462890625, -3.86480712890625, -3.65966796875, -3.45452880859375, -3.2493896484375, -3.04425048828125, -2.839111328125, -2.63397216796875, -2.4288330078125, -2.22369384765625, -2.0185546875, -1.81341552734375, -1.6082763671875, -1.40313720703125, -1.197998046875, -0.99285888671875, -0.7877197265625, -0.58258056640625, -0.37744140625, -0.17230224609375, 0.0328369140625, 0.23797607421875, 0.443115234375, 0.64825439453125, 0.8533935546875, 1.05853271484375, 1.263671875, 1.46881103515625, 1.6739501953125, 1.87908935546875, 2.084228515625, 2.28936767578125, 2.4945068359375, 2.69964599609375, 2.90478515625, 3.10992431640625, 3.3150634765625, 3.52020263671875, 3.725341796875, 3.93048095703125, 4.1356201171875, 4.34075927734375, 4.5458984375, 4.75103759765625, 4.9561767578125, 5.16131591796875, 5.366455078125, 5.57159423828125, 5.7767333984375, 5.98187255859375, 6.18701171875, 6.39215087890625, 6.5972900390625, 6.80242919921875, 7.007568359375, 7.21270751953125, 7.4178466796875, 7.62298583984375, 7.828125]}, "gradients/decoder.transformer.h.9.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 4.0, 0.0, 1.0, 6.0, 4.0, 3.0, 6.0, 4.0, 9.0, 11.0, 14.0, 16.0, 33.0, 30.0, 57.0, 105.0, 186.0, 351.0, 863.0, 2072.0, 5397.0, 16855.0, 65290.0, 409078.0, 2876428.0, 692537.0, 91583.0, 22176.0, 6899.0, 2425.0, 971.0, 391.0, 199.0, 103.0, 50.0, 38.0, 24.0, 12.0, 11.0, 13.0, 14.0, 7.0, 6.0, 3.0, 2.0, 7.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-15.75, -15.251708984375, -14.75341796875, -14.255126953125, -13.7568359375, -13.258544921875, -12.76025390625, -12.261962890625, -11.763671875, -11.265380859375, -10.76708984375, -10.268798828125, -9.7705078125, -9.272216796875, -8.77392578125, -8.275634765625, -7.77734375, -7.279052734375, -6.78076171875, -6.282470703125, -5.7841796875, -5.285888671875, -4.78759765625, -4.289306640625, -3.791015625, -3.292724609375, -2.79443359375, -2.296142578125, -1.7978515625, -1.299560546875, -0.80126953125, -0.302978515625, 0.1953125, 0.693603515625, 1.19189453125, 1.690185546875, 2.1884765625, 2.686767578125, 3.18505859375, 3.683349609375, 4.181640625, 4.679931640625, 5.17822265625, 5.676513671875, 6.1748046875, 6.673095703125, 7.17138671875, 7.669677734375, 8.16796875, 8.666259765625, 9.16455078125, 9.662841796875, 10.1611328125, 10.659423828125, 11.15771484375, 11.656005859375, 12.154296875, 12.652587890625, 13.15087890625, 13.649169921875, 14.1474609375, 14.645751953125, 15.14404296875, 15.642333984375, 16.140625]}, "gradients/decoder.transformer.h.9.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 5.0, 0.0, 2.0, 2.0, 4.0, 7.0, 9.0, 5.0, 14.0, 21.0, 24.0, 25.0, 29.0, 52.0, 80.0, 117.0, 168.0, 231.0, 360.0, 619.0, 709.0, 563.0, 299.0, 202.0, 145.0, 108.0, 73.0, 57.0, 33.0, 29.0, 20.0, 13.0, 14.0, 13.0, 6.0, 6.0, 3.0, 7.0, 1.0, 3.0, 1.0, 1.0, 3.0, 1.0, 3.0, 1.0, 1.0, 0.0, 1.0, 1.0], "bins": [-13.96875, -13.56201171875, -13.1552734375, -12.74853515625, -12.341796875, -11.93505859375, -11.5283203125, -11.12158203125, -10.71484375, -10.30810546875, -9.9013671875, -9.49462890625, -9.087890625, -8.68115234375, -8.2744140625, -7.86767578125, -7.4609375, -7.05419921875, -6.6474609375, -6.24072265625, -5.833984375, -5.42724609375, -5.0205078125, -4.61376953125, -4.20703125, -3.80029296875, -3.3935546875, -2.98681640625, -2.580078125, -2.17333984375, -1.7666015625, -1.35986328125, -0.953125, -0.54638671875, -0.1396484375, 0.26708984375, 0.673828125, 1.08056640625, 1.4873046875, 1.89404296875, 2.30078125, 2.70751953125, 3.1142578125, 3.52099609375, 3.927734375, 4.33447265625, 4.7412109375, 5.14794921875, 5.5546875, 5.96142578125, 6.3681640625, 6.77490234375, 7.181640625, 7.58837890625, 7.9951171875, 8.40185546875, 8.80859375, 9.21533203125, 9.6220703125, 10.02880859375, 10.435546875, 10.84228515625, 11.2490234375, 11.65576171875, 12.0625]}, "gradients/decoder.transformer.h.9.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 5.0, 1.0, 2.0, 1.0, 4.0, 4.0, 3.0, 6.0, 1.0, 9.0, 7.0, 9.0, 18.0, 17.0, 31.0, 28.0, 43.0, 72.0, 97.0, 182.0, 282.0, 568.0, 2474.0, 62013.0, 3978777.0, 144246.0, 3879.0, 714.0, 296.0, 181.0, 92.0, 72.0, 45.0, 23.0, 22.0, 24.0, 13.0, 11.0, 6.0, 5.0, 2.0, 4.0, 3.0, 1.0, 1.0, 2.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-67.375, -65.2421875, -63.109375, -60.9765625, -58.84375, -56.7109375, -54.578125, -52.4453125, -50.3125, -48.1796875, -46.046875, -43.9140625, -41.78125, -39.6484375, -37.515625, -35.3828125, -33.25, -31.1171875, -28.984375, -26.8515625, -24.71875, -22.5859375, -20.453125, -18.3203125, -16.1875, -14.0546875, -11.921875, -9.7890625, -7.65625, -5.5234375, -3.390625, -1.2578125, 0.875, 3.0078125, 5.140625, 7.2734375, 9.40625, 11.5390625, 13.671875, 15.8046875, 17.9375, 20.0703125, 22.203125, 24.3359375, 26.46875, 28.6015625, 30.734375, 32.8671875, 35.0, 37.1328125, 39.265625, 41.3984375, 43.53125, 45.6640625, 47.796875, 49.9296875, 52.0625, 54.1953125, 56.328125, 58.4609375, 60.59375, 62.7265625, 64.859375, 66.9921875, 69.125]}, "gradients/decoder.transformer.h.9.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 11.0, 32.0, 54.0, 119.0, 164.0, 214.0, 202.0, 112.0, 60.0, 29.0, 7.0, 8.0, 1.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-133.87864685058594, -130.11471557617188, -126.35079193115234, -122.58686065673828, -118.82293701171875, -115.05900573730469, -111.29507446289062, -107.5311508178711, -103.76722717285156, -100.0032958984375, -96.23937225341797, -92.4754409790039, -88.71151733398438, -84.94758605957031, -81.18365478515625, -77.41973114013672, -73.65579986572266, -69.8918685913086, -66.12794494628906, -62.364013671875, -58.60009002685547, -54.836158752441406, -51.07223129272461, -47.30830383300781, -43.544376373291016, -39.78044891357422, -36.01652145385742, -32.252593994140625, -28.488664627075195, -24.7247371673584, -20.96080780029297, -17.196880340576172, -13.432952880859375, -9.669025421142578, -5.905097007751465, -2.1411685943603516, 1.6227588653564453, 5.386686325073242, 9.150615692138672, 12.914543151855469, 16.678470611572266, 20.442398071289062, 24.20632553100586, 27.97025489807129, 31.734182357788086, 35.49810791015625, 39.26203918457031, 43.02596664428711, 46.789894104003906, 50.5538215637207, 54.3177490234375, 58.08168029785156, 61.845603942871094, 65.60953521728516, 69.37345886230469, 73.13739013671875, 76.90132141113281, 80.66525268554688, 84.4291763305664, 88.19310760498047, 91.95703125, 95.72096252441406, 99.48489379882812, 103.24881744384766, 107.01274108886719]}, "gradients/decoder.transformer.h.9.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 2.0, 1.0, 1.0, 1.0, 2.0, 4.0, 2.0, 6.0, 5.0, 8.0, 10.0, 5.0, 10.0, 11.0, 15.0, 11.0, 20.0, 11.0, 17.0, 18.0, 27.0, 31.0, 36.0, 46.0, 37.0, 30.0, 40.0, 35.0, 32.0, 40.0, 36.0, 36.0, 46.0, 48.0, 29.0, 31.0, 31.0, 36.0, 27.0, 27.0, 22.0, 17.0, 13.0, 25.0, 10.0, 9.0, 11.0, 4.0, 9.0, 11.0, 6.0, 2.0, 3.0, 7.0, 3.0, 4.0, 0.0, 3.0], "bins": [-41.97718811035156, -40.795040130615234, -39.612892150878906, -38.43074417114258, -37.24859619140625, -36.06644821166992, -34.884300231933594, -33.70215606689453, -32.52000427246094, -31.33785629272461, -30.15570831298828, -28.973560333251953, -27.791412353515625, -26.609264373779297, -25.4271183013916, -24.244970321655273, -23.062824249267578, -21.88067626953125, -20.698528289794922, -19.516380310058594, -18.334232330322266, -17.152084350585938, -15.969938278198242, -14.787790298461914, -13.605642318725586, -12.423494338989258, -11.24134635925293, -10.059199333190918, -8.87705135345459, -7.694903373718262, -6.512755870819092, -5.330608367919922, -4.148460388183594, -2.9663126468658447, -1.7841649055480957, -0.6020171642303467, 0.5801305770874023, 1.7622785568237305, 2.9444260597229004, 4.12657356262207, 5.308721542358398, 6.490869522094727, 7.6730170249938965, 8.855164527893066, 10.037312507629395, 11.219460487365723, 12.401607513427734, 13.583755493164062, 14.76590347290039, 15.948051452636719, 17.130199432373047, 18.312347412109375, 19.494495391845703, 20.67664337158203, 21.858789443969727, 23.040937423706055, 24.223085403442383, 25.40523338317871, 26.58738136291504, 27.769529342651367, 28.951675415039062, 30.13382339477539, 31.31597137451172, 32.49811935424805, 33.680267333984375]}, "gradients/decoder.transformer.h.9.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 2.0, 0.0, 2.0, 3.0, 5.0, 7.0, 2.0, 6.0, 7.0, 10.0, 14.0, 14.0, 20.0, 17.0, 20.0, 29.0, 31.0, 29.0, 31.0, 30.0, 36.0, 33.0, 54.0, 42.0, 42.0, 34.0, 41.0, 54.0, 37.0, 41.0, 26.0, 40.0, 36.0, 32.0, 25.0, 25.0, 29.0, 17.0, 20.0, 14.0, 12.0, 14.0, 9.0, 1.0, 7.0, 7.0, 5.0, 2.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.19140625, -5.97552490234375, -5.7596435546875, -5.54376220703125, -5.327880859375, -5.11199951171875, -4.8961181640625, -4.68023681640625, -4.46435546875, -4.24847412109375, -4.0325927734375, -3.81671142578125, -3.600830078125, -3.38494873046875, -3.1690673828125, -2.95318603515625, -2.7373046875, -2.52142333984375, -2.3055419921875, -2.08966064453125, -1.873779296875, -1.65789794921875, -1.4420166015625, -1.22613525390625, -1.01025390625, -0.79437255859375, -0.5784912109375, -0.36260986328125, -0.146728515625, 0.06915283203125, 0.2850341796875, 0.50091552734375, 0.716796875, 0.93267822265625, 1.1485595703125, 1.36444091796875, 1.580322265625, 1.79620361328125, 2.0120849609375, 2.22796630859375, 2.44384765625, 2.65972900390625, 2.8756103515625, 3.09149169921875, 3.307373046875, 3.52325439453125, 3.7391357421875, 3.95501708984375, 4.1708984375, 4.38677978515625, 4.6026611328125, 4.81854248046875, 5.034423828125, 5.25030517578125, 5.4661865234375, 5.68206787109375, 5.89794921875, 6.11383056640625, 6.3297119140625, 6.54559326171875, 6.761474609375, 6.97735595703125, 7.1932373046875, 7.40911865234375, 7.625]}, "gradients/decoder.transformer.h.9.crossattention.c_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 4.0, 0.0, 2.0, 3.0, 6.0, 5.0, 11.0, 23.0, 19.0, 34.0, 68.0, 58.0, 99.0, 164.0, 252.0, 354.0, 531.0, 849.0, 1218.0, 1920.0, 2769.0, 4197.0, 6488.0, 9717.0, 15007.0, 23603.0, 37690.0, 62600.0, 115545.0, 317194.0, 210108.0, 94115.0, 52863.0, 32302.0, 20347.0, 13083.0, 8561.0, 5605.0, 3750.0, 2491.0, 1642.0, 1139.0, 709.0, 459.0, 332.0, 224.0, 155.0, 77.0, 55.0, 46.0, 28.0, 18.0, 11.0, 8.0, 5.0, 4.0, 2.0, 3.0, 2.0], "bins": [-0.27392578125, -0.2660636901855469, -0.25820159912109375, -0.2503395080566406, -0.2424774169921875, -0.23461532592773438, -0.22675323486328125, -0.21889114379882812, -0.211029052734375, -0.20316696166992188, -0.19530487060546875, -0.18744277954101562, -0.1795806884765625, -0.17171859741210938, -0.16385650634765625, -0.15599441528320312, -0.14813232421875, -0.14027023315429688, -0.13240814208984375, -0.12454605102539062, -0.1166839599609375, -0.10882186889648438, -0.10095977783203125, -0.09309768676757812, -0.085235595703125, -0.07737350463867188, -0.06951141357421875, -0.061649322509765625, -0.0537872314453125, -0.045925140380859375, -0.03806304931640625, -0.030200958251953125, -0.0223388671875, -0.014476776123046875, -0.00661468505859375, 0.001247406005859375, 0.0091094970703125, 0.016971588134765625, 0.02483367919921875, 0.032695770263671875, 0.040557861328125, 0.048419952392578125, 0.05628204345703125, 0.06414413452148438, 0.0720062255859375, 0.07986831665039062, 0.08773040771484375, 0.09559249877929688, 0.10345458984375, 0.11131668090820312, 0.11917877197265625, 0.12704086303710938, 0.1349029541015625, 0.14276504516601562, 0.15062713623046875, 0.15848922729492188, 0.166351318359375, 0.17421340942382812, 0.18207550048828125, 0.18993759155273438, 0.1977996826171875, 0.20566177368164062, 0.21352386474609375, 0.22138595581054688, 0.229248046875]}, "gradients/decoder.transformer.h.9.crossattention.c_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 0.0, 2.0, 6.0, 4.0, 10.0, 4.0, 8.0, 8.0, 13.0, 9.0, 19.0, 16.0, 18.0, 19.0, 18.0, 18.0, 27.0, 39.0, 27.0, 33.0, 30.0, 38.0, 46.0, 42.0, 33.0, 1063.0, 41.0, 47.0, 32.0, 40.0, 38.0, 28.0, 27.0, 25.0, 30.0, 25.0, 22.0, 19.0, 24.0, 22.0, 16.0, 8.0, 10.0, 8.0, 6.0, 8.0, 1.0, 2.0, 3.0, 2.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0], "bins": [-4.80078125, -4.656494140625, -4.51220703125, -4.367919921875, -4.2236328125, -4.079345703125, -3.93505859375, -3.790771484375, -3.646484375, -3.502197265625, -3.35791015625, -3.213623046875, -3.0693359375, -2.925048828125, -2.78076171875, -2.636474609375, -2.4921875, -2.347900390625, -2.20361328125, -2.059326171875, -1.9150390625, -1.770751953125, -1.62646484375, -1.482177734375, -1.337890625, -1.193603515625, -1.04931640625, -0.905029296875, -0.7607421875, -0.616455078125, -0.47216796875, -0.327880859375, -0.18359375, -0.039306640625, 0.10498046875, 0.249267578125, 0.3935546875, 0.537841796875, 0.68212890625, 0.826416015625, 0.970703125, 1.114990234375, 1.25927734375, 1.403564453125, 1.5478515625, 1.692138671875, 1.83642578125, 1.980712890625, 2.125, 2.269287109375, 2.41357421875, 2.557861328125, 2.7021484375, 2.846435546875, 2.99072265625, 3.135009765625, 3.279296875, 3.423583984375, 3.56787109375, 3.712158203125, 3.8564453125, 4.000732421875, 4.14501953125, 4.289306640625, 4.43359375]}, "gradients/decoder.transformer.h.9.crossattention.c_attn.weight": {"_type": "histogram", "values": [2.0, 1.0, 2.0, 5.0, 10.0, 14.0, 14.0, 16.0, 35.0, 42.0, 75.0, 77.0, 127.0, 195.0, 221.0, 300.0, 435.0, 655.0, 830.0, 1272.0, 1743.0, 2446.0, 3410.0, 4723.0, 6655.0, 9398.0, 13520.0, 19597.0, 28453.0, 42712.0, 64922.0, 106006.0, 1295075.0, 203874.0, 98703.0, 61122.0, 40150.0, 27303.0, 18737.0, 13043.0, 9026.0, 6235.0, 4562.0, 3213.0, 2294.0, 1686.0, 1199.0, 840.0, 646.0, 429.0, 352.0, 200.0, 161.0, 117.0, 76.0, 56.0, 34.0, 29.0, 24.0, 25.0, 10.0, 8.0, 3.0, 7.0], "bins": [-0.11578369140625, -0.11226844787597656, -0.10875320434570312, -0.10523796081542969, -0.10172271728515625, -0.09820747375488281, -0.09469223022460938, -0.09117698669433594, -0.0876617431640625, -0.08414649963378906, -0.08063125610351562, -0.07711601257324219, -0.07360076904296875, -0.07008552551269531, -0.06657028198242188, -0.06305503845214844, -0.059539794921875, -0.05602455139160156, -0.052509307861328125, -0.04899406433105469, -0.04547882080078125, -0.04196357727050781, -0.038448333740234375, -0.03493309020996094, -0.0314178466796875, -0.027902603149414062, -0.024387359619140625, -0.020872116088867188, -0.01735687255859375, -0.013841629028320312, -0.010326385498046875, -0.0068111419677734375, -0.0032958984375, 0.0002193450927734375, 0.003734588623046875, 0.0072498321533203125, 0.01076507568359375, 0.014280319213867188, 0.017795562744140625, 0.021310806274414062, 0.0248260498046875, 0.028341293334960938, 0.031856536865234375, 0.03537178039550781, 0.03888702392578125, 0.04240226745605469, 0.045917510986328125, 0.04943275451660156, 0.052947998046875, 0.05646324157714844, 0.059978485107421875, 0.06349372863769531, 0.06700897216796875, 0.07052421569824219, 0.07403945922851562, 0.07755470275878906, 0.0810699462890625, 0.08458518981933594, 0.08810043334960938, 0.09161567687988281, 0.09513092041015625, 0.09864616394042969, 0.10216140747070312, 0.10567665100097656, 0.10919189453125]}, "gradients/decoder.transformer.h.9.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 4.0, 4.0, 5.0, 4.0, 4.0, 4.0, 7.0, 10.0, 8.0, 22.0, 31.0, 32.0, 44.0, 80.0, 81.0, 181.0, 177.0, 77.0, 52.0, 43.0, 46.0, 20.0, 12.0, 22.0, 19.0, 4.0, 3.0, 8.0, 4.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0], "bins": [-9.655952453613281e-06, -9.379349648952484e-06, -9.102746844291687e-06, -8.82614403963089e-06, -8.549541234970093e-06, -8.272938430309296e-06, -7.996335625648499e-06, -7.719732820987701e-06, -7.443130016326904e-06, -7.166527211666107e-06, -6.88992440700531e-06, -6.613321602344513e-06, -6.336718797683716e-06, -6.060115993022919e-06, -5.783513188362122e-06, -5.5069103837013245e-06, -5.230307579040527e-06, -4.95370477437973e-06, -4.677101969718933e-06, -4.400499165058136e-06, -4.123896360397339e-06, -3.847293555736542e-06, -3.5706907510757446e-06, -3.2940879464149475e-06, -3.0174851417541504e-06, -2.7408823370933533e-06, -2.464279532432556e-06, -2.187676727771759e-06, -1.911073923110962e-06, -1.6344711184501648e-06, -1.3578683137893677e-06, -1.0812655091285706e-06, -8.046627044677734e-07, -5.280598998069763e-07, -2.514570951461792e-07, 2.514570951461792e-08, 3.0174851417541504e-07, 5.783513188362122e-07, 8.549541234970093e-07, 1.1315569281578064e-06, 1.4081597328186035e-06, 1.6847625374794006e-06, 1.9613653421401978e-06, 2.237968146800995e-06, 2.514570951461792e-06, 2.791173756122589e-06, 3.0677765607833862e-06, 3.3443793654441833e-06, 3.6209821701049805e-06, 3.897584974765778e-06, 4.174187779426575e-06, 4.450790584087372e-06, 4.727393388748169e-06, 5.003996193408966e-06, 5.280598998069763e-06, 5.55720180273056e-06, 5.833804607391357e-06, 6.1104074120521545e-06, 6.387010216712952e-06, 6.663613021373749e-06, 6.940215826034546e-06, 7.216818630695343e-06, 7.49342143535614e-06, 7.770024240016937e-06, 8.046627044677734e-06]}, "gradients/decoder.transformer.h.9.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 3.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 2.0, 3.0, 6.0, 3.0, 8.0, 16.0, 14.0, 27.0, 29.0, 49.0, 65.0, 168.0, 731.0, 112704.0, 932569.0, 1695.0, 219.0, 88.0, 45.0, 44.0, 14.0, 21.0, 7.0, 6.0, 7.0, 4.0, 4.0, 2.0, 4.0, 6.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.00016617774963378906, -0.00016047433018684387, -0.00015477091073989868, -0.0001490674912929535, -0.0001433640718460083, -0.0001376606523990631, -0.00013195723295211792, -0.00012625381350517273, -0.00012055039405822754, -0.00011484697461128235, -0.00010914355516433716, -0.00010344013571739197, -9.773671627044678e-05, -9.203329682350159e-05, -8.63298773765564e-05, -8.06264579296112e-05, -7.492303848266602e-05, -6.921961903572083e-05, -6.351619958877563e-05, -5.7812780141830444e-05, -5.2109360694885254e-05, -4.6405941247940063e-05, -4.070252180099487e-05, -3.499910235404968e-05, -2.9295682907104492e-05, -2.3592263460159302e-05, -1.788884401321411e-05, -1.2185424566268921e-05, -6.4820051193237305e-06, -7.7858567237854e-07, 4.92483377456665e-06, 1.062825322151184e-05, 1.633167266845703e-05, 2.203509211540222e-05, 2.7738511562347412e-05, 3.34419310092926e-05, 3.914535045623779e-05, 4.4848769903182983e-05, 5.0552189350128174e-05, 5.6255608797073364e-05, 6.195902824401855e-05, 6.766244769096375e-05, 7.336586713790894e-05, 7.906928658485413e-05, 8.477270603179932e-05, 9.047612547874451e-05, 9.61795449256897e-05, 0.00010188296437263489, 0.00010758638381958008, 0.00011328980326652527, 0.00011899322271347046, 0.00012469664216041565, 0.00013040006160736084, 0.00013610348105430603, 0.00014180690050125122, 0.0001475103199481964, 0.0001532137393951416, 0.0001589171588420868, 0.00016462057828903198, 0.00017032399773597717, 0.00017602741718292236, 0.00018173083662986755, 0.00018743425607681274, 0.00019313767552375793, 0.00019884109497070312]}, "gradients/decoder.transformer.h.9.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0, 1.0, 0.0, 3.0, 2.0, 12.0, 18.0, 38.0, 76.0, 128.0, 202.0, 217.0, 162.0, 91.0, 41.0, 14.0, 7.0, 4.0, 1.0, 1.0], "bins": [-1.4555203961208463e-05, -1.4291117622633465e-05, -1.4027031284058467e-05, -1.3762944945483468e-05, -1.349885860690847e-05, -1.3234772268333472e-05, -1.2970685929758474e-05, -1.2706599591183476e-05, -1.2442513252608478e-05, -1.217842691403348e-05, -1.1914340575458482e-05, -1.1650254236883484e-05, -1.1386167898308486e-05, -1.1122081559733488e-05, -1.085799522115849e-05, -1.0593908882583492e-05, -1.0329822544008493e-05, -1.0065736205433495e-05, -9.801649866858497e-06, -9.5375635282835e-06, -9.273477189708501e-06, -9.009390851133503e-06, -8.745304512558505e-06, -8.481218173983507e-06, -8.217131835408509e-06, -7.95304549683351e-06, -7.688959158258513e-06, -7.4248728196835145e-06, -7.1607864811085165e-06, -6.896700142533518e-06, -6.63261380395852e-06, -6.368527465383522e-06, -6.104440672061173e-06, -5.840354333486175e-06, -5.576267994911177e-06, -5.312181656336179e-06, -5.048095317761181e-06, -4.784008979186183e-06, -4.519922640611185e-06, -4.255836302036187e-06, -3.991749963461189e-06, -3.7276636248861905e-06, -3.4635772863111924e-06, -3.1994909477361944e-06, -2.9354046091611963e-06, -2.671318270586198e-06, -2.4072319320112e-06, -2.143145593436202e-06, -1.879059254861204e-06, -1.6149729162862059e-06, -1.3508865777112078e-06, -1.0868002391362097e-06, -8.227139005612116e-07, -5.586275619862135e-07, -2.9454122341121547e-07, -3.045488483621739e-08, 2.336314537387807e-07, 4.977177923137788e-07, 7.618041308887769e-07, 1.025890469463775e-06, 1.289976808038773e-06, 1.554063146613771e-06, 1.8181494851887692e-06, 2.0822358237637673e-06, 2.3463221623387653e-06]}, "gradients/decoder.transformer.h.9.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 0.0, 1.0, 1.0, 2.0, 2.0, 2.0, 6.0, 5.0, 5.0, 18.0, 2.0, 15.0, 11.0, 22.0, 19.0, 29.0, 15.0, 20.0, 12.0, 40.0, 19.0, 40.0, 18.0, 66.0, 30.0, 64.0, 37.0, 27.0, 39.0, 26.0, 54.0, 24.0, 42.0, 22.0, 53.0, 24.0, 33.0, 16.0, 32.0, 10.0, 37.0, 19.0, 8.0, 8.0, 9.0, 13.0, 6.0, 3.0, 2.0, 2.0, 1.0, 3.0, 0.0, 1.0, 2.0, 0.0, 2.0], "bins": [-3.159046173095703e-06, -3.0677765607833862e-06, -2.9765069484710693e-06, -2.8852373361587524e-06, -2.7939677238464355e-06, -2.7026981115341187e-06, -2.6114284992218018e-06, -2.520158886909485e-06, -2.428889274597168e-06, -2.337619662284851e-06, -2.246350049972534e-06, -2.1550804376602173e-06, -2.0638108253479004e-06, -1.9725412130355835e-06, -1.8812716007232666e-06, -1.7900019884109497e-06, -1.6987323760986328e-06, -1.607462763786316e-06, -1.516193151473999e-06, -1.4249235391616821e-06, -1.3336539268493652e-06, -1.2423843145370483e-06, -1.1511147022247314e-06, -1.0598450899124146e-06, -9.685754776000977e-07, -8.773058652877808e-07, -7.860362529754639e-07, -6.94766640663147e-07, -6.034970283508301e-07, -5.122274160385132e-07, -4.209578037261963e-07, -3.296881914138794e-07, -2.384185791015625e-07, -1.471489667892456e-07, -5.587935447692871e-08, 3.5390257835388184e-08, 1.2665987014770508e-07, 2.1792948246002197e-07, 3.0919909477233887e-07, 4.0046870708465576e-07, 4.917383193969727e-07, 5.830079317092896e-07, 6.742775440216064e-07, 7.655471563339233e-07, 8.568167686462402e-07, 9.480863809585571e-07, 1.039355993270874e-06, 1.130625605583191e-06, 1.2218952178955078e-06, 1.3131648302078247e-06, 1.4044344425201416e-06, 1.4957040548324585e-06, 1.5869736671447754e-06, 1.6782432794570923e-06, 1.7695128917694092e-06, 1.860782504081726e-06, 1.952052116394043e-06, 2.04332172870636e-06, 2.1345913410186768e-06, 2.2258609533309937e-06, 2.3171305656433105e-06, 2.4084001779556274e-06, 2.4996697902679443e-06, 2.5909394025802612e-06, 2.682209014892578e-06]}, "gradients/decoder.transformer.h.9.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 2.0, 0.0, 2.0, 3.0, 5.0, 7.0, 2.0, 6.0, 7.0, 10.0, 14.0, 14.0, 20.0, 17.0, 20.0, 29.0, 31.0, 29.0, 31.0, 30.0, 36.0, 33.0, 54.0, 42.0, 42.0, 34.0, 41.0, 54.0, 37.0, 41.0, 26.0, 40.0, 36.0, 32.0, 25.0, 25.0, 29.0, 17.0, 20.0, 14.0, 12.0, 14.0, 9.0, 1.0, 7.0, 7.0, 5.0, 2.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.19140625, -5.97552490234375, -5.7596435546875, -5.54376220703125, -5.327880859375, -5.11199951171875, -4.8961181640625, -4.68023681640625, -4.46435546875, -4.24847412109375, -4.0325927734375, -3.81671142578125, -3.600830078125, -3.38494873046875, -3.1690673828125, -2.95318603515625, -2.7373046875, -2.52142333984375, -2.3055419921875, -2.08966064453125, -1.873779296875, -1.65789794921875, -1.4420166015625, -1.22613525390625, -1.01025390625, -0.79437255859375, -0.5784912109375, -0.36260986328125, -0.146728515625, 0.06915283203125, 0.2850341796875, 0.50091552734375, 0.716796875, 0.93267822265625, 1.1485595703125, 1.36444091796875, 1.580322265625, 1.79620361328125, 2.0120849609375, 2.22796630859375, 2.44384765625, 2.65972900390625, 2.8756103515625, 3.09149169921875, 3.307373046875, 3.52325439453125, 3.7391357421875, 3.95501708984375, 4.1708984375, 4.38677978515625, 4.6026611328125, 4.81854248046875, 5.034423828125, 5.25030517578125, 5.4661865234375, 5.68206787109375, 5.89794921875, 6.11383056640625, 6.3297119140625, 6.54559326171875, 6.761474609375, 6.97735595703125, 7.1932373046875, 7.40911865234375, 7.625]}, "gradients/decoder.transformer.h.9.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 4.0, 6.0, 4.0, 7.0, 17.0, 17.0, 40.0, 44.0, 65.0, 115.0, 141.0, 260.0, 360.0, 612.0, 1012.0, 1571.0, 2605.0, 4339.0, 7304.0, 13162.0, 24470.0, 51551.0, 132720.0, 434901.0, 224818.0, 74801.0, 33222.0, 16962.0, 9554.0, 5326.0, 3340.0, 1959.0, 1212.0, 738.0, 441.0, 292.0, 199.0, 128.0, 77.0, 54.0, 40.0, 23.0, 20.0, 18.0, 7.0, 5.0, 3.0, 0.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-7.36328125, -7.10687255859375, -6.8504638671875, -6.59405517578125, -6.337646484375, -6.08123779296875, -5.8248291015625, -5.56842041015625, -5.31201171875, -5.05560302734375, -4.7991943359375, -4.54278564453125, -4.286376953125, -4.02996826171875, -3.7735595703125, -3.51715087890625, -3.2607421875, -3.00433349609375, -2.7479248046875, -2.49151611328125, -2.235107421875, -1.97869873046875, -1.7222900390625, -1.46588134765625, -1.20947265625, -0.95306396484375, -0.6966552734375, -0.44024658203125, -0.183837890625, 0.07257080078125, 0.3289794921875, 0.58538818359375, 0.841796875, 1.09820556640625, 1.3546142578125, 1.61102294921875, 1.867431640625, 2.12384033203125, 2.3802490234375, 2.63665771484375, 2.89306640625, 3.14947509765625, 3.4058837890625, 3.66229248046875, 3.918701171875, 4.17510986328125, 4.4315185546875, 4.68792724609375, 4.9443359375, 5.20074462890625, 5.4571533203125, 5.71356201171875, 5.969970703125, 6.22637939453125, 6.4827880859375, 6.73919677734375, 6.99560546875, 7.25201416015625, 7.5084228515625, 7.76483154296875, 8.021240234375, 8.27764892578125, 8.5340576171875, 8.79046630859375, 9.046875]}, "gradients/decoder.transformer.h.9.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 2.0, 2.0, 1.0, 2.0, 2.0, 0.0, 6.0, 3.0, 10.0, 10.0, 11.0, 14.0, 17.0, 16.0, 21.0, 15.0, 26.0, 35.0, 28.0, 42.0, 57.0, 53.0, 79.0, 117.0, 151.0, 1385.0, 287.0, 155.0, 84.0, 66.0, 59.0, 49.0, 35.0, 30.0, 36.0, 26.0, 29.0, 14.0, 23.0, 17.0, 15.0, 8.0, 6.0, 3.0, 6.0, 7.0, 1.0, 2.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-20.703125, -20.010986328125, -19.31884765625, -18.626708984375, -17.9345703125, -17.242431640625, -16.55029296875, -15.858154296875, -15.166015625, -14.473876953125, -13.78173828125, -13.089599609375, -12.3974609375, -11.705322265625, -11.01318359375, -10.321044921875, -9.62890625, -8.936767578125, -8.24462890625, -7.552490234375, -6.8603515625, -6.168212890625, -5.47607421875, -4.783935546875, -4.091796875, -3.399658203125, -2.70751953125, -2.015380859375, -1.3232421875, -0.631103515625, 0.06103515625, 0.753173828125, 1.4453125, 2.137451171875, 2.82958984375, 3.521728515625, 4.2138671875, 4.906005859375, 5.59814453125, 6.290283203125, 6.982421875, 7.674560546875, 8.36669921875, 9.058837890625, 9.7509765625, 10.443115234375, 11.13525390625, 11.827392578125, 12.51953125, 13.211669921875, 13.90380859375, 14.595947265625, 15.2880859375, 15.980224609375, 16.67236328125, 17.364501953125, 18.056640625, 18.748779296875, 19.44091796875, 20.133056640625, 20.8251953125, 21.517333984375, 22.20947265625, 22.901611328125, 23.59375]}, "gradients/decoder.transformer.h.9.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 2.0, 1.0, 2.0, 4.0, 4.0, 5.0, 6.0, 11.0, 12.0, 21.0, 22.0, 22.0, 35.0, 47.0, 63.0, 71.0, 119.0, 193.0, 384.0, 1073.0, 4811.0, 48862.0, 2518307.0, 545289.0, 21889.0, 2848.0, 754.0, 309.0, 169.0, 97.0, 66.0, 51.0, 32.0, 23.0, 18.0, 21.0, 18.0, 10.0, 13.0, 6.0, 9.0, 7.0, 3.0, 1.0, 5.0, 4.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-53.53125, -51.9951171875, -50.458984375, -48.9228515625, -47.38671875, -45.8505859375, -44.314453125, -42.7783203125, -41.2421875, -39.7060546875, -38.169921875, -36.6337890625, -35.09765625, -33.5615234375, -32.025390625, -30.4892578125, -28.953125, -27.4169921875, -25.880859375, -24.3447265625, -22.80859375, -21.2724609375, -19.736328125, -18.2001953125, -16.6640625, -15.1279296875, -13.591796875, -12.0556640625, -10.51953125, -8.9833984375, -7.447265625, -5.9111328125, -4.375, -2.8388671875, -1.302734375, 0.2333984375, 1.76953125, 3.3056640625, 4.841796875, 6.3779296875, 7.9140625, 9.4501953125, 10.986328125, 12.5224609375, 14.05859375, 15.5947265625, 17.130859375, 18.6669921875, 20.203125, 21.7392578125, 23.275390625, 24.8115234375, 26.34765625, 27.8837890625, 29.419921875, 30.9560546875, 32.4921875, 34.0283203125, 35.564453125, 37.1005859375, 38.63671875, 40.1728515625, 41.708984375, 43.2451171875, 44.78125]}, "gradients/decoder.transformer.h.9.ln_1.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 9.0, 58.0, 215.0, 398.0, 253.0, 73.0, 9.0, 5.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-33.219886779785156, -28.16683006286621, -23.113773345947266, -18.060718536376953, -13.007661819458008, -7.9546051025390625, -2.90155029296875, 2.151508331298828, 7.204563140869141, 12.257619857788086, 17.31067657470703, 22.363731384277344, 27.41678810119629, 32.469844818115234, 37.52289962768555, 42.575958251953125, 47.62901306152344, 52.68206787109375, 57.73512649536133, 62.78818130493164, 67.84123992919922, 72.89429473876953, 77.94734954833984, 83.00041198730469, 88.053466796875, 93.10652160644531, 98.15957641601562, 103.21263122558594, 108.26569366455078, 113.3187484741211, 118.3718032836914, 123.42486572265625, 128.47792053222656, 133.53097534179688, 138.5840301513672, 143.6370849609375, 148.6901397705078, 153.74319458007812, 158.7962646484375, 163.8493194580078, 168.90237426757812, 173.95542907714844, 179.00848388671875, 184.06153869628906, 189.11459350585938, 194.16766357421875, 199.220703125, 204.27377319335938, 209.32681274414062, 214.37986755371094, 219.43292236328125, 224.48597717285156, 229.53903198242188, 234.59210205078125, 239.6451416015625, 244.69821166992188, 249.7512664794922, 254.8043212890625, 259.8573913574219, 264.9104309082031, 269.9635009765625, 275.01654052734375, 280.0696105957031, 285.1226501464844, 290.17572021484375]}, "gradients/decoder.transformer.h.9.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 3.0, 0.0, 4.0, 4.0, 3.0, 2.0, 7.0, 5.0, 7.0, 11.0, 10.0, 8.0, 15.0, 20.0, 31.0, 26.0, 27.0, 38.0, 47.0, 33.0, 44.0, 39.0, 41.0, 38.0, 36.0, 36.0, 48.0, 44.0, 45.0, 33.0, 34.0, 41.0, 32.0, 21.0, 18.0, 23.0, 18.0, 17.0, 14.0, 22.0, 12.0, 10.0, 19.0, 7.0, 2.0, 9.0, 3.0, 2.0, 3.0, 3.0, 3.0, 0.0, 1.0, 1.0], "bins": [-76.99597930908203, -74.8626480102539, -72.72931671142578, -70.59599304199219, -68.46266174316406, -66.32933044433594, -64.19599914550781, -62.06266784667969, -59.92934036254883, -57.7960090637207, -55.662681579589844, -53.52935028076172, -51.396018981933594, -49.262691497802734, -47.12936019897461, -44.99603271484375, -42.862701416015625, -40.7293701171875, -38.59604263305664, -36.462711334228516, -34.329383850097656, -32.19605255126953, -30.062721252441406, -27.929391860961914, -25.796062469482422, -23.66273307800293, -21.529403686523438, -19.396072387695312, -17.26274299621582, -15.129413604736328, -12.99608325958252, -10.862752914428711, -8.729423522949219, -6.596093654632568, -4.462763786315918, -2.3294339179992676, -0.1961040496826172, 1.937225341796875, 4.070555686950684, 6.203886032104492, 8.337215423583984, 10.470544815063477, 12.603875160217285, 14.737205505371094, 16.870534896850586, 19.003864288330078, 21.137195587158203, 23.270524978637695, 25.403854370117188, 27.53718376159668, 29.670513153076172, 31.803844451904297, 33.937171936035156, 36.07050323486328, 38.203834533691406, 40.33716583251953, 42.47049331665039, 44.603824615478516, 46.737152099609375, 48.8704833984375, 51.003814697265625, 53.137142181396484, 55.27047348022461, 57.40380096435547, 59.537132263183594]}, "gradients/decoder.transformer.h.8.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 3.0, 3.0, 1.0, 3.0, 5.0, 4.0, 3.0, 10.0, 13.0, 7.0, 12.0, 19.0, 19.0, 22.0, 25.0, 33.0, 32.0, 32.0, 32.0, 29.0, 40.0, 46.0, 44.0, 38.0, 46.0, 54.0, 38.0, 40.0, 37.0, 37.0, 30.0, 32.0, 38.0, 32.0, 22.0, 22.0, 21.0, 17.0, 15.0, 16.0, 10.0, 7.0, 8.0, 5.0, 7.0, 1.0, 4.0, 3.0, 3.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-6.79296875, -6.5650634765625, -6.337158203125, -6.1092529296875, -5.88134765625, -5.6534423828125, -5.425537109375, -5.1976318359375, -4.9697265625, -4.7418212890625, -4.513916015625, -4.2860107421875, -4.05810546875, -3.8302001953125, -3.602294921875, -3.3743896484375, -3.146484375, -2.9185791015625, -2.690673828125, -2.4627685546875, -2.23486328125, -2.0069580078125, -1.779052734375, -1.5511474609375, -1.3232421875, -1.0953369140625, -0.867431640625, -0.6395263671875, -0.41162109375, -0.1837158203125, 0.044189453125, 0.2720947265625, 0.5, 0.7279052734375, 0.955810546875, 1.1837158203125, 1.41162109375, 1.6395263671875, 1.867431640625, 2.0953369140625, 2.3232421875, 2.5511474609375, 2.779052734375, 3.0069580078125, 3.23486328125, 3.4627685546875, 3.690673828125, 3.9185791015625, 4.146484375, 4.3743896484375, 4.602294921875, 4.8302001953125, 5.05810546875, 5.2860107421875, 5.513916015625, 5.7418212890625, 5.9697265625, 6.1976318359375, 6.425537109375, 6.6534423828125, 6.88134765625, 7.1092529296875, 7.337158203125, 7.5650634765625, 7.79296875]}, "gradients/decoder.transformer.h.8.mlp.c_proj.weight": {"_type": "histogram", "values": [3.0, 1.0, 2.0, 0.0, 4.0, 2.0, 8.0, 6.0, 13.0, 15.0, 18.0, 35.0, 46.0, 61.0, 102.0, 113.0, 185.0, 284.0, 361.0, 544.0, 857.0, 1294.0, 1996.0, 3105.0, 5190.0, 8191.0, 13615.0, 23942.0, 44426.0, 88806.0, 203835.0, 542018.0, 1268329.0, 1162283.0, 468180.0, 179551.0, 80239.0, 40483.0, 22148.0, 12848.0, 7836.0, 4760.0, 2961.0, 1851.0, 1182.0, 830.0, 562.0, 326.0, 273.0, 157.0, 119.0, 82.0, 70.0, 44.0, 32.0, 20.0, 24.0, 8.0, 8.0, 8.0, 4.0, 4.0, 3.0, 1.0], "bins": [-7.4765625, -7.24951171875, -7.0224609375, -6.79541015625, -6.568359375, -6.34130859375, -6.1142578125, -5.88720703125, -5.66015625, -5.43310546875, -5.2060546875, -4.97900390625, -4.751953125, -4.52490234375, -4.2978515625, -4.07080078125, -3.84375, -3.61669921875, -3.3896484375, -3.16259765625, -2.935546875, -2.70849609375, -2.4814453125, -2.25439453125, -2.02734375, -1.80029296875, -1.5732421875, -1.34619140625, -1.119140625, -0.89208984375, -0.6650390625, -0.43798828125, -0.2109375, 0.01611328125, 0.2431640625, 0.47021484375, 0.697265625, 0.92431640625, 1.1513671875, 1.37841796875, 1.60546875, 1.83251953125, 2.0595703125, 2.28662109375, 2.513671875, 2.74072265625, 2.9677734375, 3.19482421875, 3.421875, 3.64892578125, 3.8759765625, 4.10302734375, 4.330078125, 4.55712890625, 4.7841796875, 5.01123046875, 5.23828125, 5.46533203125, 5.6923828125, 5.91943359375, 6.146484375, 6.37353515625, 6.6005859375, 6.82763671875, 7.0546875]}, "gradients/decoder.transformer.h.8.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 2.0, 2.0, 1.0, 4.0, 7.0, 10.0, 15.0, 10.0, 19.0, 20.0, 29.0, 32.0, 42.0, 50.0, 82.0, 92.0, 119.0, 178.0, 239.0, 358.0, 422.0, 475.0, 430.0, 367.0, 269.0, 180.0, 138.0, 105.0, 93.0, 55.0, 55.0, 36.0, 37.0, 26.0, 21.0, 17.0, 7.0, 8.0, 10.0, 9.0, 7.0, 2.0, 2.0, 3.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-11.4609375, -11.0972900390625, -10.733642578125, -10.3699951171875, -10.00634765625, -9.6427001953125, -9.279052734375, -8.9154052734375, -8.5517578125, -8.1881103515625, -7.824462890625, -7.4608154296875, -7.09716796875, -6.7335205078125, -6.369873046875, -6.0062255859375, -5.642578125, -5.2789306640625, -4.915283203125, -4.5516357421875, -4.18798828125, -3.8243408203125, -3.460693359375, -3.0970458984375, -2.7333984375, -2.3697509765625, -2.006103515625, -1.6424560546875, -1.27880859375, -0.9151611328125, -0.551513671875, -0.1878662109375, 0.17578125, 0.5394287109375, 0.903076171875, 1.2667236328125, 1.63037109375, 1.9940185546875, 2.357666015625, 2.7213134765625, 3.0849609375, 3.4486083984375, 3.812255859375, 4.1759033203125, 4.53955078125, 4.9031982421875, 5.266845703125, 5.6304931640625, 5.994140625, 6.3577880859375, 6.721435546875, 7.0850830078125, 7.44873046875, 7.8123779296875, 8.176025390625, 8.5396728515625, 8.9033203125, 9.2669677734375, 9.630615234375, 9.9942626953125, 10.35791015625, 10.7215576171875, 11.085205078125, 11.4488525390625, 11.8125]}, "gradients/decoder.transformer.h.8.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 2.0, 3.0, 5.0, 7.0, 5.0, 8.0, 9.0, 13.0, 19.0, 25.0, 30.0, 62.0, 66.0, 72.0, 120.0, 191.0, 289.0, 487.0, 1415.0, 8469.0, 136751.0, 3586571.0, 437221.0, 18574.0, 2322.0, 617.0, 323.0, 174.0, 141.0, 75.0, 57.0, 45.0, 24.0, 21.0, 27.0, 20.0, 12.0, 7.0, 3.0, 6.0, 4.0, 0.0, 2.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0], "bins": [-47.8125, -46.39208984375, -44.9716796875, -43.55126953125, -42.130859375, -40.71044921875, -39.2900390625, -37.86962890625, -36.44921875, -35.02880859375, -33.6083984375, -32.18798828125, -30.767578125, -29.34716796875, -27.9267578125, -26.50634765625, -25.0859375, -23.66552734375, -22.2451171875, -20.82470703125, -19.404296875, -17.98388671875, -16.5634765625, -15.14306640625, -13.72265625, -12.30224609375, -10.8818359375, -9.46142578125, -8.041015625, -6.62060546875, -5.2001953125, -3.77978515625, -2.359375, -0.93896484375, 0.4814453125, 1.90185546875, 3.322265625, 4.74267578125, 6.1630859375, 7.58349609375, 9.00390625, 10.42431640625, 11.8447265625, 13.26513671875, 14.685546875, 16.10595703125, 17.5263671875, 18.94677734375, 20.3671875, 21.78759765625, 23.2080078125, 24.62841796875, 26.048828125, 27.46923828125, 28.8896484375, 30.31005859375, 31.73046875, 33.15087890625, 34.5712890625, 35.99169921875, 37.412109375, 38.83251953125, 40.2529296875, 41.67333984375, 43.09375]}, "gradients/decoder.transformer.h.8.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 3.0, 10.0, 57.0, 182.0, 317.0, 277.0, 114.0, 41.0, 10.0, 4.0, 2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-321.2581481933594, -314.9296875, -308.6012268066406, -302.27276611328125, -295.9443054199219, -289.6158447265625, -283.2873840332031, -276.95892333984375, -270.63043212890625, -264.3019714355469, -257.9735107421875, -251.64505004882812, -245.31658935546875, -238.9881134033203, -232.65965270996094, -226.33119201660156, -220.00274658203125, -213.67428588867188, -207.3458251953125, -201.01736450195312, -194.68890380859375, -188.3604278564453, -182.03196716308594, -175.70350646972656, -169.3750457763672, -163.0465850830078, -156.71812438964844, -150.38966369628906, -144.06118774414062, -137.73272705078125, -131.40426635742188, -125.0758056640625, -118.7473373413086, -112.41887664794922, -106.09040832519531, -99.76194763183594, -93.43348693847656, -87.10502624511719, -80.77656555175781, -74.4480972290039, -68.11963653564453, -61.791175842285156, -55.462711334228516, -49.134246826171875, -42.8057861328125, -36.477325439453125, -30.148860931396484, -23.820396423339844, -17.49193572998047, -11.163473129272461, -4.835010528564453, 1.4934520721435547, 7.8219146728515625, 14.15037727355957, 20.478839874267578, 26.80730438232422, 33.135765075683594, 39.46422576904297, 45.79269027709961, 52.12115478515625, 58.449615478515625, 64.778076171875, 71.10653686523438, 77.43500518798828, 83.76346588134766]}, "gradients/decoder.transformer.h.8.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 2.0, 0.0, 6.0, 1.0, 3.0, 5.0, 8.0, 11.0, 7.0, 7.0, 13.0, 17.0, 27.0, 14.0, 16.0, 30.0, 24.0, 29.0, 34.0, 37.0, 42.0, 28.0, 32.0, 42.0, 39.0, 39.0, 52.0, 31.0, 37.0, 31.0, 33.0, 29.0, 38.0, 27.0, 26.0, 29.0, 21.0, 22.0, 17.0, 19.0, 18.0, 14.0, 15.0, 5.0, 7.0, 6.0, 7.0, 4.0, 3.0, 1.0, 2.0, 5.0, 3.0, 2.0, 0.0, 1.0, 1.0, 2.0], "bins": [-41.45867919921875, -40.17283630371094, -38.886993408203125, -37.60115051269531, -36.3153076171875, -35.02946472167969, -33.743621826171875, -32.45777893066406, -31.17193603515625, -29.886093139648438, -28.600250244140625, -27.314407348632812, -26.028564453125, -24.742721557617188, -23.456878662109375, -22.171035766601562, -20.88519287109375, -19.599349975585938, -18.313507080078125, -17.027664184570312, -15.7418212890625, -14.455978393554688, -13.170135498046875, -11.884292602539062, -10.59844970703125, -9.312606811523438, -8.026763916015625, -6.7409210205078125, -5.455078125, -4.1692352294921875, -2.883392333984375, -1.5975494384765625, -0.31170654296875, 0.9741363525390625, 2.259979248046875, 3.5458221435546875, 4.8316650390625, 6.1175079345703125, 7.403350830078125, 8.689193725585938, 9.97503662109375, 11.260879516601562, 12.546722412109375, 13.832565307617188, 15.118408203125, 16.404251098632812, 17.690093994140625, 18.975936889648438, 20.26177978515625, 21.547622680664062, 22.833465576171875, 24.119308471679688, 25.4051513671875, 26.690994262695312, 27.976837158203125, 29.262680053710938, 30.54852294921875, 31.834365844726562, 33.120208740234375, 34.40605163574219, 35.69189453125, 36.97773742675781, 38.263580322265625, 39.54942321777344, 40.83526611328125]}, "gradients/decoder.transformer.h.8.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 6.0, 2.0, 7.0, 3.0, 3.0, 10.0, 7.0, 11.0, 10.0, 14.0, 20.0, 15.0, 20.0, 36.0, 29.0, 31.0, 34.0, 31.0, 39.0, 40.0, 47.0, 42.0, 44.0, 44.0, 35.0, 49.0, 31.0, 33.0, 45.0, 50.0, 32.0, 32.0, 20.0, 22.0, 25.0, 23.0, 15.0, 13.0, 6.0, 10.0, 10.0, 5.0, 2.0, 4.0, 3.0, 3.0, 2.0, 1.0, 1.0, 2.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.4375, -6.2015380859375, -5.965576171875, -5.7296142578125, -5.49365234375, -5.2576904296875, -5.021728515625, -4.7857666015625, -4.5498046875, -4.3138427734375, -4.077880859375, -3.8419189453125, -3.60595703125, -3.3699951171875, -3.134033203125, -2.8980712890625, -2.662109375, -2.4261474609375, -2.190185546875, -1.9542236328125, -1.71826171875, -1.4822998046875, -1.246337890625, -1.0103759765625, -0.7744140625, -0.5384521484375, -0.302490234375, -0.0665283203125, 0.16943359375, 0.4053955078125, 0.641357421875, 0.8773193359375, 1.11328125, 1.3492431640625, 1.585205078125, 1.8211669921875, 2.05712890625, 2.2930908203125, 2.529052734375, 2.7650146484375, 3.0009765625, 3.2369384765625, 3.472900390625, 3.7088623046875, 3.94482421875, 4.1807861328125, 4.416748046875, 4.6527099609375, 4.888671875, 5.1246337890625, 5.360595703125, 5.5965576171875, 5.83251953125, 6.0684814453125, 6.304443359375, 6.5404052734375, 6.7763671875, 7.0123291015625, 7.248291015625, 7.4842529296875, 7.72021484375, 7.9561767578125, 8.192138671875, 8.4281005859375, 8.6640625]}, "gradients/decoder.transformer.h.8.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 3.0, 1.0, 3.0, 3.0, 9.0, 12.0, 17.0, 23.0, 24.0, 64.0, 70.0, 121.0, 202.0, 323.0, 491.0, 793.0, 1263.0, 1984.0, 3257.0, 5233.0, 8633.0, 14205.0, 24060.0, 41942.0, 76307.0, 161561.0, 385186.0, 151362.0, 72590.0, 40029.0, 23296.0, 13751.0, 8308.0, 5169.0, 3067.0, 1942.0, 1200.0, 759.0, 481.0, 294.0, 191.0, 126.0, 78.0, 52.0, 31.0, 16.0, 11.0, 11.0, 4.0, 8.0, 2.0, 1.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.285888671875, -0.2758445739746094, -0.26580047607421875, -0.2557563781738281, -0.2457122802734375, -0.23566818237304688, -0.22562408447265625, -0.21557998657226562, -0.205535888671875, -0.19549179077148438, -0.18544769287109375, -0.17540359497070312, -0.1653594970703125, -0.15531539916992188, -0.14527130126953125, -0.13522720336914062, -0.12518310546875, -0.11513900756835938, -0.10509490966796875, -0.09505081176757812, -0.0850067138671875, -0.07496261596679688, -0.06491851806640625, -0.054874420166015625, -0.044830322265625, -0.034786224365234375, -0.02474212646484375, -0.014698028564453125, -0.0046539306640625, 0.005390167236328125, 0.01543426513671875, 0.025478363037109375, 0.0355224609375, 0.045566558837890625, 0.05561065673828125, 0.06565475463867188, 0.0756988525390625, 0.08574295043945312, 0.09578704833984375, 0.10583114624023438, 0.115875244140625, 0.12591934204101562, 0.13596343994140625, 0.14600753784179688, 0.1560516357421875, 0.16609573364257812, 0.17613983154296875, 0.18618392944335938, 0.19622802734375, 0.20627212524414062, 0.21631622314453125, 0.22636032104492188, 0.2364044189453125, 0.24644851684570312, 0.25649261474609375, 0.2665367126464844, 0.276580810546875, 0.2866249084472656, 0.29666900634765625, 0.3067131042480469, 0.3167572021484375, 0.3268013000488281, 0.33684539794921875, 0.3468894958496094, 0.35693359375]}, "gradients/decoder.transformer.h.8.crossattention.c_attn.bias": {"_type": "histogram", "values": [2.0, 1.0, 2.0, 0.0, 4.0, 0.0, 2.0, 4.0, 4.0, 9.0, 5.0, 6.0, 13.0, 9.0, 12.0, 27.0, 17.0, 13.0, 18.0, 15.0, 24.0, 36.0, 33.0, 27.0, 27.0, 40.0, 43.0, 38.0, 41.0, 1066.0, 40.0, 55.0, 45.0, 36.0, 37.0, 41.0, 32.0, 38.0, 19.0, 23.0, 19.0, 25.0, 13.0, 18.0, 20.0, 5.0, 6.0, 7.0, 8.0, 5.0, 2.0, 1.0, 5.0, 3.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-4.8828125, -4.71881103515625, -4.5548095703125, -4.39080810546875, -4.226806640625, -4.06280517578125, -3.8988037109375, -3.73480224609375, -3.57080078125, -3.40679931640625, -3.2427978515625, -3.07879638671875, -2.914794921875, -2.75079345703125, -2.5867919921875, -2.42279052734375, -2.2587890625, -2.09478759765625, -1.9307861328125, -1.76678466796875, -1.602783203125, -1.43878173828125, -1.2747802734375, -1.11077880859375, -0.94677734375, -0.78277587890625, -0.6187744140625, -0.45477294921875, -0.290771484375, -0.12677001953125, 0.0372314453125, 0.20123291015625, 0.365234375, 0.52923583984375, 0.6932373046875, 0.85723876953125, 1.021240234375, 1.18524169921875, 1.3492431640625, 1.51324462890625, 1.67724609375, 1.84124755859375, 2.0052490234375, 2.16925048828125, 2.333251953125, 2.49725341796875, 2.6612548828125, 2.82525634765625, 2.9892578125, 3.15325927734375, 3.3172607421875, 3.48126220703125, 3.645263671875, 3.80926513671875, 3.9732666015625, 4.13726806640625, 4.30126953125, 4.46527099609375, 4.6292724609375, 4.79327392578125, 4.957275390625, 5.12127685546875, 5.2852783203125, 5.44927978515625, 5.61328125]}, "gradients/decoder.transformer.h.8.crossattention.c_attn.weight": {"_type": "histogram", "values": [4.0, 3.0, 5.0, 10.0, 13.0, 13.0, 29.0, 32.0, 47.0, 62.0, 84.0, 122.0, 170.0, 269.0, 355.0, 477.0, 685.0, 937.0, 1435.0, 1995.0, 2684.0, 4071.0, 5510.0, 7983.0, 11514.0, 17232.0, 25580.0, 38732.0, 61696.0, 103508.0, 236649.0, 1288022.0, 103459.0, 62371.0, 39306.0, 25874.0, 17448.0, 11764.0, 7909.0, 5468.0, 4007.0, 2774.0, 1962.0, 1465.0, 1011.0, 686.0, 504.0, 350.0, 232.0, 206.0, 128.0, 90.0, 67.0, 57.0, 25.0, 22.0, 10.0, 6.0, 8.0, 6.0, 4.0, 2.0, 2.0, 1.0], "bins": [-0.12744140625, -0.12333106994628906, -0.11922073364257812, -0.11511039733886719, -0.11100006103515625, -0.10688972473144531, -0.10277938842773438, -0.09866905212402344, -0.0945587158203125, -0.09044837951660156, -0.08633804321289062, -0.08222770690917969, -0.07811737060546875, -0.07400703430175781, -0.06989669799804688, -0.06578636169433594, -0.061676025390625, -0.05756568908691406, -0.053455352783203125, -0.04934501647949219, -0.04523468017578125, -0.04112434387207031, -0.037014007568359375, -0.03290367126464844, -0.0287933349609375, -0.024682998657226562, -0.020572662353515625, -0.016462326049804688, -0.01235198974609375, -0.008241653442382812, -0.004131317138671875, -2.09808349609375e-05, 0.00408935546875, 0.008199691772460938, 0.012310028076171875, 0.016420364379882812, 0.02053070068359375, 0.024641036987304688, 0.028751373291015625, 0.03286170959472656, 0.0369720458984375, 0.04108238220214844, 0.045192718505859375, 0.04930305480957031, 0.05341339111328125, 0.05752372741699219, 0.061634063720703125, 0.06574440002441406, 0.069854736328125, 0.07396507263183594, 0.07807540893554688, 0.08218574523925781, 0.08629608154296875, 0.09040641784667969, 0.09451675415039062, 0.09862709045410156, 0.1027374267578125, 0.10684776306152344, 0.11095809936523438, 0.11506843566894531, 0.11917877197265625, 0.12328910827636719, 0.12739944458007812, 0.13150978088378906, 0.1356201171875]}, "gradients/decoder.transformer.h.8.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 3.0, 1.0, 2.0, 4.0, 3.0, 5.0, 4.0, 6.0, 8.0, 13.0, 7.0, 14.0, 17.0, 18.0, 12.0, 28.0, 36.0, 54.0, 44.0, 61.0, 153.0, 173.0, 86.0, 36.0, 26.0, 40.0, 31.0, 21.0, 20.0, 15.0, 13.0, 9.0, 9.0, 8.0, 11.0, 5.0, 1.0, 6.0, 7.0, 2.0, 0.0, 0.0, 2.0, 2.0, 0.0, 0.0, 1.0, 1.0, 1.0], "bins": [-8.463859558105469e-06, -8.230097591876984e-06, -7.996335625648499e-06, -7.762573659420013e-06, -7.528811693191528e-06, -7.295049726963043e-06, -7.061287760734558e-06, -6.827525794506073e-06, -6.593763828277588e-06, -6.360001862049103e-06, -6.126239895820618e-06, -5.8924779295921326e-06, -5.6587159633636475e-06, -5.424953997135162e-06, -5.191192030906677e-06, -4.957430064678192e-06, -4.723668098449707e-06, -4.489906132221222e-06, -4.256144165992737e-06, -4.022382199764252e-06, -3.7886202335357666e-06, -3.5548582673072815e-06, -3.3210963010787964e-06, -3.0873343348503113e-06, -2.853572368621826e-06, -2.619810402393341e-06, -2.386048436164856e-06, -2.152286469936371e-06, -1.9185245037078857e-06, -1.6847625374794006e-06, -1.4510005712509155e-06, -1.2172386050224304e-06, -9.834766387939453e-07, -7.497146725654602e-07, -5.159527063369751e-07, -2.8219074010849e-07, -4.842877388000488e-08, 1.8533319234848022e-07, 4.1909515857696533e-07, 6.528571248054504e-07, 8.866190910339355e-07, 1.1203810572624207e-06, 1.3541430234909058e-06, 1.5879049897193909e-06, 1.821666955947876e-06, 2.055428922176361e-06, 2.289190888404846e-06, 2.5229528546333313e-06, 2.7567148208618164e-06, 2.9904767870903015e-06, 3.2242387533187866e-06, 3.4580007195472717e-06, 3.691762685775757e-06, 3.925524652004242e-06, 4.159286618232727e-06, 4.393048584461212e-06, 4.626810550689697e-06, 4.860572516918182e-06, 5.0943344831466675e-06, 5.328096449375153e-06, 5.561858415603638e-06, 5.795620381832123e-06, 6.029382348060608e-06, 6.263144314289093e-06, 6.496906280517578e-06]}, "gradients/decoder.transformer.h.8.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 2.0, 1.0, 1.0, 0.0, 2.0, 3.0, 4.0, 4.0, 1.0, 7.0, 7.0, 10.0, 5.0, 14.0, 13.0, 21.0, 25.0, 34.0, 52.0, 89.0, 124.0, 362.0, 5388.0, 1001613.0, 39638.0, 618.0, 186.0, 112.0, 62.0, 42.0, 23.0, 18.0, 19.0, 14.0, 13.0, 7.0, 12.0, 4.0, 1.0, 3.0, 2.0, 6.0, 2.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00013971328735351562, -0.00013466551899909973, -0.00012961775064468384, -0.00012456998229026794, -0.00011952221393585205, -0.00011447444558143616, -0.00010942667722702026, -0.00010437890887260437, -9.933114051818848e-05, -9.428337216377258e-05, -8.923560380935669e-05, -8.41878354549408e-05, -7.91400671005249e-05, -7.409229874610901e-05, -6.904453039169312e-05, -6.399676203727722e-05, -5.894899368286133e-05, -5.3901225328445435e-05, -4.885345697402954e-05, -4.380568861961365e-05, -3.8757920265197754e-05, -3.371015191078186e-05, -2.8662383556365967e-05, -2.3614615201950073e-05, -1.856684684753418e-05, -1.3519078493118286e-05, -8.471310138702393e-06, -3.423541784286499e-06, 1.6242265701293945e-06, 6.671994924545288e-06, 1.1719763278961182e-05, 1.6767531633377075e-05, 2.181529998779297e-05, 2.6863068342208862e-05, 3.1910836696624756e-05, 3.695860505104065e-05, 4.200637340545654e-05, 4.7054141759872437e-05, 5.210191011428833e-05, 5.7149678468704224e-05, 6.219744682312012e-05, 6.724521517753601e-05, 7.22929835319519e-05, 7.73407518863678e-05, 8.238852024078369e-05, 8.743628859519958e-05, 9.248405694961548e-05, 9.753182530403137e-05, 0.00010257959365844727, 0.00010762736201286316, 0.00011267513036727905, 0.00011772289872169495, 0.00012277066707611084, 0.00012781843543052673, 0.00013286620378494263, 0.00013791397213935852, 0.00014296174049377441, 0.0001480095088481903, 0.0001530572772026062, 0.0001581050455570221, 0.000163152813911438, 0.00016820058226585388, 0.00017324835062026978, 0.00017829611897468567, 0.00018334388732910156]}, "gradients/decoder.transformer.h.8.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 7.0, 17.0, 79.0, 149.0, 292.0, 272.0, 132.0, 45.0, 19.0, 8.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.911409516556887e-06, -5.536326625588117e-06, -5.161244189366698e-06, -4.7861612983979285e-06, -4.411078407429159e-06, -4.035995516460389e-06, -3.66091308023897e-06, -3.2858301892702e-06, -2.910747525675106e-06, -2.5356648620800115e-06, -2.1605819711112417e-06, -1.7854993075161474e-06, -1.4104165302342153e-06, -1.0353337529522832e-06, -6.602510893571889e-07, -2.851681983884191e-07, 8.991446520667523e-08, 4.6499721406689787e-07, 8.400799629271205e-07, 1.2151626833656337e-06, 1.5902454606475658e-06, 1.965328237929498e-06, 2.340410901524592e-06, 2.715493792493362e-06, 3.0905764560884563e-06, 3.4656591196835507e-06, 3.8407420106523205e-06, 4.215824446873739e-06, 4.590907337842509e-06, 4.965990228811279e-06, 5.341073119780049e-06, 5.7161560107488185e-06, 6.0912379922228865e-06, 6.466320883191656e-06, 6.841403319413075e-06, 7.216486210381845e-06, 7.591569101350615e-06, 7.966651537572034e-06, 8.341734428540803e-06, 8.716817319509573e-06, 9.091900210478343e-06, 9.466983101447113e-06, 9.842065992415883e-06, 1.0217148883384652e-05, 1.059223086485872e-05, 1.096731375582749e-05, 1.134239664679626e-05, 1.171747953776503e-05, 1.2092561519239098e-05, 1.2467644410207868e-05, 1.2842727301176637e-05, 1.3217810192145407e-05, 1.3592892173619475e-05, 1.3967975064588245e-05, 1.4343057955557015e-05, 1.4718140846525785e-05, 1.5093223737494554e-05, 1.5468305718968622e-05, 1.5843388609937392e-05, 1.6218471500906162e-05, 1.6593554391874932e-05, 1.69686372828437e-05, 1.734372017381247e-05, 1.771880306478124e-05, 1.809388595575001e-05]}, "gradients/decoder.transformer.h.8.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 3.0, 0.0, 1.0, 0.0, 4.0, 4.0, 10.0, 6.0, 4.0, 9.0, 1.0, 10.0, 5.0, 16.0, 4.0, 23.0, 26.0, 18.0, 33.0, 24.0, 31.0, 15.0, 50.0, 30.0, 23.0, 52.0, 20.0, 55.0, 18.0, 63.0, 31.0, 56.0, 42.0, 27.0, 33.0, 19.0, 33.0, 16.0, 35.0, 31.0, 18.0, 19.0, 10.0, 14.0, 8.0, 17.0, 6.0, 9.0, 12.0, 1.0, 9.0, 2.0, 3.0, 2.0, 3.0, 5.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-2.8014183044433594e-06, -2.7082860469818115e-06, -2.6151537895202637e-06, -2.522021532058716e-06, -2.428889274597168e-06, -2.33575701713562e-06, -2.2426247596740723e-06, -2.1494925022125244e-06, -2.0563602447509766e-06, -1.9632279872894287e-06, -1.8700957298278809e-06, -1.776963472366333e-06, -1.6838312149047852e-06, -1.5906989574432373e-06, -1.4975666999816895e-06, -1.4044344425201416e-06, -1.3113021850585938e-06, -1.218169927597046e-06, -1.125037670135498e-06, -1.0319054126739502e-06, -9.387731552124023e-07, -8.456408977508545e-07, -7.525086402893066e-07, -6.593763828277588e-07, -5.662441253662109e-07, -4.731118679046631e-07, -3.7997961044311523e-07, -2.868473529815674e-07, -1.9371509552001953e-07, -1.0058283805847168e-07, -7.450580596923828e-09, 8.568167686462402e-08, 1.7881393432617188e-07, 2.7194619178771973e-07, 3.650784492492676e-07, 4.5821070671081543e-07, 5.513429641723633e-07, 6.444752216339111e-07, 7.37607479095459e-07, 8.307397365570068e-07, 9.238719940185547e-07, 1.0170042514801025e-06, 1.1101365089416504e-06, 1.2032687664031982e-06, 1.296401023864746e-06, 1.389533281326294e-06, 1.4826655387878418e-06, 1.5757977962493896e-06, 1.6689300537109375e-06, 1.7620623111724854e-06, 1.8551945686340332e-06, 1.948326826095581e-06, 2.041459083557129e-06, 2.1345913410186768e-06, 2.2277235984802246e-06, 2.3208558559417725e-06, 2.4139881134033203e-06, 2.507120370864868e-06, 2.600252628326416e-06, 2.693384885787964e-06, 2.7865171432495117e-06, 2.8796494007110596e-06, 2.9727816581726074e-06, 3.0659139156341553e-06, 3.159046173095703e-06]}, "gradients/decoder.transformer.h.8.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 6.0, 2.0, 7.0, 3.0, 3.0, 10.0, 7.0, 11.0, 10.0, 14.0, 20.0, 15.0, 20.0, 36.0, 29.0, 31.0, 34.0, 31.0, 39.0, 40.0, 47.0, 42.0, 44.0, 44.0, 35.0, 49.0, 31.0, 33.0, 45.0, 50.0, 32.0, 32.0, 20.0, 22.0, 25.0, 23.0, 15.0, 13.0, 6.0, 10.0, 10.0, 5.0, 2.0, 4.0, 3.0, 3.0, 2.0, 1.0, 1.0, 2.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.4375, -6.2015380859375, -5.965576171875, -5.7296142578125, -5.49365234375, -5.2576904296875, -5.021728515625, -4.7857666015625, -4.5498046875, -4.3138427734375, -4.077880859375, -3.8419189453125, -3.60595703125, -3.3699951171875, -3.134033203125, -2.8980712890625, -2.662109375, -2.4261474609375, -2.190185546875, -1.9542236328125, -1.71826171875, -1.4822998046875, -1.246337890625, -1.0103759765625, -0.7744140625, -0.5384521484375, -0.302490234375, -0.0665283203125, 0.16943359375, 0.4053955078125, 0.641357421875, 0.8773193359375, 1.11328125, 1.3492431640625, 1.585205078125, 1.8211669921875, 2.05712890625, 2.2930908203125, 2.529052734375, 2.7650146484375, 3.0009765625, 3.2369384765625, 3.472900390625, 3.7088623046875, 3.94482421875, 4.1807861328125, 4.416748046875, 4.6527099609375, 4.888671875, 5.1246337890625, 5.360595703125, 5.5965576171875, 5.83251953125, 6.0684814453125, 6.304443359375, 6.5404052734375, 6.7763671875, 7.0123291015625, 7.248291015625, 7.4842529296875, 7.72021484375, 7.9561767578125, 8.192138671875, 8.4281005859375, 8.6640625]}, "gradients/decoder.transformer.h.8.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 4.0, 2.0, 3.0, 1.0, 6.0, 12.0, 11.0, 12.0, 21.0, 55.0, 55.0, 89.0, 151.0, 304.0, 568.0, 1141.0, 2106.0, 4305.0, 8451.0, 16474.0, 32531.0, 67116.0, 172926.0, 436693.0, 172296.0, 66922.0, 32828.0, 16535.0, 8295.0, 4202.0, 2143.0, 1060.0, 537.0, 284.0, 176.0, 94.0, 59.0, 26.0, 23.0, 12.0, 13.0, 11.0, 8.0, 3.0, 3.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-7.3359375, -7.05908203125, -6.7822265625, -6.50537109375, -6.228515625, -5.95166015625, -5.6748046875, -5.39794921875, -5.12109375, -4.84423828125, -4.5673828125, -4.29052734375, -4.013671875, -3.73681640625, -3.4599609375, -3.18310546875, -2.90625, -2.62939453125, -2.3525390625, -2.07568359375, -1.798828125, -1.52197265625, -1.2451171875, -0.96826171875, -0.69140625, -0.41455078125, -0.1376953125, 0.13916015625, 0.416015625, 0.69287109375, 0.9697265625, 1.24658203125, 1.5234375, 1.80029296875, 2.0771484375, 2.35400390625, 2.630859375, 2.90771484375, 3.1845703125, 3.46142578125, 3.73828125, 4.01513671875, 4.2919921875, 4.56884765625, 4.845703125, 5.12255859375, 5.3994140625, 5.67626953125, 5.953125, 6.22998046875, 6.5068359375, 6.78369140625, 7.060546875, 7.33740234375, 7.6142578125, 7.89111328125, 8.16796875, 8.44482421875, 8.7216796875, 8.99853515625, 9.275390625, 9.55224609375, 9.8291015625, 10.10595703125, 10.3828125]}, "gradients/decoder.transformer.h.8.attn.c_attn.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 3.0, 1.0, 4.0, 4.0, 2.0, 7.0, 11.0, 4.0, 11.0, 11.0, 12.0, 20.0, 17.0, 18.0, 26.0, 23.0, 33.0, 30.0, 38.0, 34.0, 41.0, 67.0, 56.0, 151.0, 376.0, 1390.0, 173.0, 77.0, 46.0, 32.0, 45.0, 40.0, 25.0, 33.0, 30.0, 21.0, 20.0, 28.0, 13.0, 14.0, 12.0, 13.0, 6.0, 11.0, 7.0, 7.0, 5.0, 5.0, 3.0, 2.0, 2.0, 2.0, 0.0, 1.0, 2.0], "bins": [-22.234375, -21.58154296875, -20.9287109375, -20.27587890625, -19.623046875, -18.97021484375, -18.3173828125, -17.66455078125, -17.01171875, -16.35888671875, -15.7060546875, -15.05322265625, -14.400390625, -13.74755859375, -13.0947265625, -12.44189453125, -11.7890625, -11.13623046875, -10.4833984375, -9.83056640625, -9.177734375, -8.52490234375, -7.8720703125, -7.21923828125, -6.56640625, -5.91357421875, -5.2607421875, -4.60791015625, -3.955078125, -3.30224609375, -2.6494140625, -1.99658203125, -1.34375, -0.69091796875, -0.0380859375, 0.61474609375, 1.267578125, 1.92041015625, 2.5732421875, 3.22607421875, 3.87890625, 4.53173828125, 5.1845703125, 5.83740234375, 6.490234375, 7.14306640625, 7.7958984375, 8.44873046875, 9.1015625, 9.75439453125, 10.4072265625, 11.06005859375, 11.712890625, 12.36572265625, 13.0185546875, 13.67138671875, 14.32421875, 14.97705078125, 15.6298828125, 16.28271484375, 16.935546875, 17.58837890625, 18.2412109375, 18.89404296875, 19.546875]}, "gradients/decoder.transformer.h.8.attn.c_attn.weight": {"_type": "histogram", "values": [3.0, 0.0, 0.0, 1.0, 3.0, 1.0, 3.0, 4.0, 5.0, 5.0, 9.0, 10.0, 7.0, 16.0, 19.0, 29.0, 20.0, 36.0, 36.0, 48.0, 73.0, 87.0, 102.0, 155.0, 234.0, 354.0, 1192.0, 6354.0, 57904.0, 2036302.0, 993826.0, 41843.0, 4888.0, 969.0, 355.0, 205.0, 150.0, 102.0, 73.0, 76.0, 49.0, 39.0, 25.0, 28.0, 19.0, 14.0, 14.0, 8.0, 8.0, 5.0, 3.0, 2.0, 5.0, 0.0, 2.0, 4.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-26.359375, -25.478759765625, -24.59814453125, -23.717529296875, -22.8369140625, -21.956298828125, -21.07568359375, -20.195068359375, -19.314453125, -18.433837890625, -17.55322265625, -16.672607421875, -15.7919921875, -14.911376953125, -14.03076171875, -13.150146484375, -12.26953125, -11.388916015625, -10.50830078125, -9.627685546875, -8.7470703125, -7.866455078125, -6.98583984375, -6.105224609375, -5.224609375, -4.343994140625, -3.46337890625, -2.582763671875, -1.7021484375, -0.821533203125, 0.05908203125, 0.939697265625, 1.8203125, 2.700927734375, 3.58154296875, 4.462158203125, 5.3427734375, 6.223388671875, 7.10400390625, 7.984619140625, 8.865234375, 9.745849609375, 10.62646484375, 11.507080078125, 12.3876953125, 13.268310546875, 14.14892578125, 15.029541015625, 15.91015625, 16.790771484375, 17.67138671875, 18.552001953125, 19.4326171875, 20.313232421875, 21.19384765625, 22.074462890625, 22.955078125, 23.835693359375, 24.71630859375, 25.596923828125, 26.4775390625, 27.358154296875, 28.23876953125, 29.119384765625, 30.0]}, "gradients/decoder.transformer.h.8.ln_1.weight": {"_type": "histogram", "values": [51.0, 930.0, 37.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-19.630159378051758, -6.063111305236816, 7.503936767578125, 21.070985794067383, 34.638031005859375, 48.20508575439453, 61.772132873535156, 75.33917999267578, 88.9062271118164, 102.47327423095703, 116.04032135009766, 129.6073760986328, 143.17442321777344, 156.74147033691406, 170.3085174560547, 183.8755645751953, 197.44261169433594, 211.00965881347656, 224.5767059326172, 238.1437530517578, 251.71080017089844, 265.2778625488281, 278.84490966796875, 292.4119567871094, 305.97900390625, 319.5460510253906, 333.11309814453125, 346.6801452636719, 360.2471923828125, 373.8142395019531, 387.38128662109375, 400.9483337402344, 414.5153503417969, 428.0823974609375, 441.6494445800781, 455.21649169921875, 468.7835388183594, 482.3505859375, 495.9176330566406, 509.48468017578125, 523.0517578125, 536.6188354492188, 550.1858520507812, 563.7529296875, 577.3199462890625, 590.8870239257812, 604.4540405273438, 618.0211181640625, 631.588134765625, 645.1552124023438, 658.7222290039062, 672.289306640625, 685.8563232421875, 699.4234008789062, 712.9904174804688, 726.5574951171875, 740.12451171875, 753.6915893554688, 767.2586059570312, 780.82568359375, 794.3927001953125, 807.9597778320312, 821.5267944335938, 835.0938720703125, 848.660888671875]}, "gradients/decoder.transformer.h.8.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 5.0, 1.0, 0.0, 6.0, 1.0, 6.0, 4.0, 5.0, 7.0, 8.0, 17.0, 8.0, 16.0, 23.0, 16.0, 28.0, 21.0, 24.0, 30.0, 24.0, 40.0, 40.0, 35.0, 38.0, 42.0, 48.0, 31.0, 46.0, 42.0, 35.0, 49.0, 35.0, 42.0, 23.0, 31.0, 34.0, 23.0, 24.0, 18.0, 18.0, 11.0, 11.0, 8.0, 5.0, 11.0, 7.0, 5.0, 4.0, 3.0, 5.0, 2.0, 0.0, 1.0, 3.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-60.26836395263672, -58.399009704589844, -56.5296516418457, -54.66029739379883, -52.79093933105469, -50.92158508300781, -49.05223083496094, -47.1828727722168, -45.31351852416992, -43.44416427612305, -41.574806213378906, -39.70545196533203, -37.83609390258789, -35.966739654541016, -34.097381591796875, -32.22802734375, -30.358671188354492, -28.489315032958984, -26.619958877563477, -24.75060272216797, -22.881248474121094, -21.011892318725586, -19.142536163330078, -17.273181915283203, -15.403824806213379, -13.534468650817871, -11.66511344909668, -9.795757293701172, -7.926401615142822, -6.057045936584473, -4.187689781188965, -2.3183345794677734, -0.4489784240722656, 1.4203773736953735, 3.2897331714630127, 5.159089088439941, 7.028444766998291, 8.89780044555664, 10.767156600952148, 12.63651180267334, 14.505867958068848, 16.37522315979004, 18.244579315185547, 20.113935470581055, 21.983291625976562, 23.852645874023438, 25.722003936767578, 27.591358184814453, 29.46071434020996, 31.33007049560547, 33.199424743652344, 35.068782806396484, 36.93813705444336, 38.8074951171875, 40.676849365234375, 42.54620361328125, 44.41556167602539, 46.284915924072266, 48.154273986816406, 50.02362823486328, 51.89298629760742, 53.7623405456543, 55.63169860839844, 57.50105285644531, 59.37040710449219]}, "gradients/decoder.transformer.h.7.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 5.0, 3.0, 6.0, 4.0, 3.0, 6.0, 7.0, 13.0, 15.0, 18.0, 22.0, 20.0, 17.0, 24.0, 38.0, 36.0, 46.0, 35.0, 42.0, 43.0, 51.0, 38.0, 46.0, 45.0, 30.0, 35.0, 29.0, 58.0, 44.0, 39.0, 30.0, 30.0, 24.0, 17.0, 17.0, 22.0, 10.0, 14.0, 9.0, 4.0, 5.0, 2.0, 5.0, 4.0, 3.0, 0.0, 2.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-7.0546875, -6.8076171875, -6.560546875, -6.3134765625, -6.06640625, -5.8193359375, -5.572265625, -5.3251953125, -5.078125, -4.8310546875, -4.583984375, -4.3369140625, -4.08984375, -3.8427734375, -3.595703125, -3.3486328125, -3.1015625, -2.8544921875, -2.607421875, -2.3603515625, -2.11328125, -1.8662109375, -1.619140625, -1.3720703125, -1.125, -0.8779296875, -0.630859375, -0.3837890625, -0.13671875, 0.1103515625, 0.357421875, 0.6044921875, 0.8515625, 1.0986328125, 1.345703125, 1.5927734375, 1.83984375, 2.0869140625, 2.333984375, 2.5810546875, 2.828125, 3.0751953125, 3.322265625, 3.5693359375, 3.81640625, 4.0634765625, 4.310546875, 4.5576171875, 4.8046875, 5.0517578125, 5.298828125, 5.5458984375, 5.79296875, 6.0400390625, 6.287109375, 6.5341796875, 6.78125, 7.0283203125, 7.275390625, 7.5224609375, 7.76953125, 8.0166015625, 8.263671875, 8.5107421875, 8.7578125]}, "gradients/decoder.transformer.h.7.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 0.0, 1.0, 1.0, 1.0, 2.0, 5.0, 6.0, 5.0, 6.0, 9.0, 7.0, 13.0, 18.0, 14.0, 22.0, 21.0, 43.0, 83.0, 110.0, 173.0, 288.0, 558.0, 1174.0, 2674.0, 6604.0, 17758.0, 54128.0, 234358.0, 2021072.0, 1590814.0, 190762.0, 47383.0, 15685.0, 5818.0, 2360.0, 1045.0, 490.0, 279.0, 192.0, 87.0, 61.0, 48.0, 22.0, 25.0, 15.0, 11.0, 10.0, 13.0, 7.0, 1.0, 0.0, 4.0, 3.0, 2.0, 1.0, 1.0, 2.0, 0.0, 3.0, 1.0, 1.0], "bins": [-16.421875, -15.90771484375, -15.3935546875, -14.87939453125, -14.365234375, -13.85107421875, -13.3369140625, -12.82275390625, -12.30859375, -11.79443359375, -11.2802734375, -10.76611328125, -10.251953125, -9.73779296875, -9.2236328125, -8.70947265625, -8.1953125, -7.68115234375, -7.1669921875, -6.65283203125, -6.138671875, -5.62451171875, -5.1103515625, -4.59619140625, -4.08203125, -3.56787109375, -3.0537109375, -2.53955078125, -2.025390625, -1.51123046875, -0.9970703125, -0.48291015625, 0.03125, 0.54541015625, 1.0595703125, 1.57373046875, 2.087890625, 2.60205078125, 3.1162109375, 3.63037109375, 4.14453125, 4.65869140625, 5.1728515625, 5.68701171875, 6.201171875, 6.71533203125, 7.2294921875, 7.74365234375, 8.2578125, 8.77197265625, 9.2861328125, 9.80029296875, 10.314453125, 10.82861328125, 11.3427734375, 11.85693359375, 12.37109375, 12.88525390625, 13.3994140625, 13.91357421875, 14.427734375, 14.94189453125, 15.4560546875, 15.97021484375, 16.484375]}, "gradients/decoder.transformer.h.7.mlp.c_fc.bias": {"_type": "histogram", "values": [2.0, 0.0, 2.0, 0.0, 1.0, 1.0, 2.0, 1.0, 1.0, 2.0, 4.0, 2.0, 4.0, 10.0, 13.0, 7.0, 19.0, 19.0, 31.0, 37.0, 44.0, 63.0, 75.0, 128.0, 137.0, 219.0, 327.0, 485.0, 601.0, 531.0, 389.0, 234.0, 164.0, 120.0, 103.0, 74.0, 63.0, 45.0, 26.0, 23.0, 22.0, 15.0, 13.0, 14.0, 5.0, 5.0, 4.0, 3.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-11.96875, -11.5498046875, -11.130859375, -10.7119140625, -10.29296875, -9.8740234375, -9.455078125, -9.0361328125, -8.6171875, -8.1982421875, -7.779296875, -7.3603515625, -6.94140625, -6.5224609375, -6.103515625, -5.6845703125, -5.265625, -4.8466796875, -4.427734375, -4.0087890625, -3.58984375, -3.1708984375, -2.751953125, -2.3330078125, -1.9140625, -1.4951171875, -1.076171875, -0.6572265625, -0.23828125, 0.1806640625, 0.599609375, 1.0185546875, 1.4375, 1.8564453125, 2.275390625, 2.6943359375, 3.11328125, 3.5322265625, 3.951171875, 4.3701171875, 4.7890625, 5.2080078125, 5.626953125, 6.0458984375, 6.46484375, 6.8837890625, 7.302734375, 7.7216796875, 8.140625, 8.5595703125, 8.978515625, 9.3974609375, 9.81640625, 10.2353515625, 10.654296875, 11.0732421875, 11.4921875, 11.9111328125, 12.330078125, 12.7490234375, 13.16796875, 13.5869140625, 14.005859375, 14.4248046875, 14.84375]}, "gradients/decoder.transformer.h.7.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 3.0, 2.0, 3.0, 10.0, 8.0, 12.0, 21.0, 19.0, 33.0, 35.0, 53.0, 72.0, 118.0, 138.0, 236.0, 474.0, 1320.0, 6419.0, 54161.0, 1645645.0, 2408066.0, 67523.0, 7282.0, 1439.0, 488.0, 242.0, 120.0, 96.0, 80.0, 52.0, 28.0, 28.0, 15.0, 15.0, 16.0, 6.0, 4.0, 3.0, 3.0, 4.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 3.0, 1.0], "bins": [-52.125, -50.6787109375, -49.232421875, -47.7861328125, -46.33984375, -44.8935546875, -43.447265625, -42.0009765625, -40.5546875, -39.1083984375, -37.662109375, -36.2158203125, -34.76953125, -33.3232421875, -31.876953125, -30.4306640625, -28.984375, -27.5380859375, -26.091796875, -24.6455078125, -23.19921875, -21.7529296875, -20.306640625, -18.8603515625, -17.4140625, -15.9677734375, -14.521484375, -13.0751953125, -11.62890625, -10.1826171875, -8.736328125, -7.2900390625, -5.84375, -4.3974609375, -2.951171875, -1.5048828125, -0.05859375, 1.3876953125, 2.833984375, 4.2802734375, 5.7265625, 7.1728515625, 8.619140625, 10.0654296875, 11.51171875, 12.9580078125, 14.404296875, 15.8505859375, 17.296875, 18.7431640625, 20.189453125, 21.6357421875, 23.08203125, 24.5283203125, 25.974609375, 27.4208984375, 28.8671875, 30.3134765625, 31.759765625, 33.2060546875, 34.65234375, 36.0986328125, 37.544921875, 38.9912109375, 40.4375]}, "gradients/decoder.transformer.h.7.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 13.0, 21.0, 67.0, 154.0, 299.0, 259.0, 137.0, 44.0, 19.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-285.71343994140625, -280.09283447265625, -274.47222900390625, -268.8515930175781, -263.2309875488281, -257.6103820800781, -251.98977661132812, -246.36915588378906, -240.74853515625, -235.1279296875, -229.50730895996094, -223.88670349121094, -218.26608276367188, -212.64547729492188, -207.02487182617188, -201.4042510986328, -195.7836456298828, -190.1630401611328, -184.54241943359375, -178.92181396484375, -173.3011932373047, -167.6805877685547, -162.05996704101562, -156.43936157226562, -150.81875610351562, -145.19815063476562, -139.57752990722656, -133.95692443847656, -128.3363037109375, -122.7156982421875, -117.09508514404297, -111.47447204589844, -105.85385131835938, -100.23323822021484, -94.61262512207031, -88.99201965332031, -83.37139892578125, -77.75079345703125, -72.13018035888672, -66.50956726074219, -60.888954162597656, -55.268341064453125, -49.647727966308594, -44.02711868286133, -38.4065055847168, -32.785892486572266, -27.165283203125, -21.54467010498047, -15.924057006835938, -10.303444862365723, -4.682832717895508, 0.9377784729003906, 6.558391571044922, 12.179004669189453, 17.79961395263672, 23.42022705078125, 29.04084014892578, 34.66145324707031, 40.282066345214844, 45.90267562866211, 51.52328872680664, 57.14390182495117, 62.76451110839844, 68.38512420654297, 74.0057373046875]}, "gradients/decoder.transformer.h.7.ln_2.bias": {"_type": "histogram", "values": [1.0, 2.0, 2.0, 3.0, 5.0, 3.0, 10.0, 9.0, 18.0, 12.0, 17.0, 18.0, 18.0, 22.0, 27.0, 24.0, 33.0, 34.0, 28.0, 33.0, 39.0, 42.0, 40.0, 43.0, 44.0, 39.0, 41.0, 36.0, 31.0, 30.0, 32.0, 36.0, 26.0, 29.0, 28.0, 28.0, 18.0, 26.0, 17.0, 16.0, 10.0, 5.0, 3.0, 12.0, 2.0, 7.0, 1.0, 4.0, 3.0, 5.0, 2.0, 1.0, 3.0, 1.0, 1.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-35.584716796875, -34.18678665161133, -32.788856506347656, -31.390926361083984, -29.992996215820312, -28.59506607055664, -27.197134017944336, -25.799203872680664, -24.401273727416992, -23.00334358215332, -21.60541343688965, -20.207481384277344, -18.809551239013672, -17.41162109375, -16.013690948486328, -14.615760803222656, -13.217830657958984, -11.819900512695312, -10.42197036743164, -9.024039268493652, -7.6261091232299805, -6.228178977966309, -4.83024787902832, -3.4323177337646484, -2.0343875885009766, -0.6364572048187256, 0.7614731788635254, 2.1594038009643555, 3.5573339462280273, 4.955264091491699, 6.3531951904296875, 7.751125335693359, 9.149055480957031, 10.546985626220703, 11.944915771484375, 13.342846870422363, 14.740777015686035, 16.13870620727539, 17.536638259887695, 18.934568405151367, 20.33249855041504, 21.73042869567871, 23.128358840942383, 24.526290893554688, 25.92422103881836, 27.32215118408203, 28.720081329345703, 30.118011474609375, 31.515941619873047, 32.91387176513672, 34.31180191040039, 35.70973205566406, 37.107662200927734, 38.505592346191406, 39.903526306152344, 41.30145263671875, 42.69938659667969, 44.09731674194336, 45.49524688720703, 46.8931770324707, 48.291107177734375, 49.68903732299805, 51.08696746826172, 52.484901428222656, 53.88282775878906]}, "gradients/decoder.transformer.h.7.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 2.0, 2.0, 3.0, 3.0, 6.0, 4.0, 5.0, 7.0, 8.0, 9.0, 18.0, 18.0, 23.0, 29.0, 36.0, 25.0, 36.0, 36.0, 47.0, 43.0, 48.0, 45.0, 38.0, 36.0, 48.0, 43.0, 38.0, 37.0, 46.0, 45.0, 40.0, 26.0, 35.0, 21.0, 31.0, 18.0, 13.0, 10.0, 4.0, 10.0, 3.0, 7.0, 0.0, 6.0, 2.0, 3.0, 0.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-8.1171875, -7.8504638671875, -7.583740234375, -7.3170166015625, -7.05029296875, -6.7835693359375, -6.516845703125, -6.2501220703125, -5.9833984375, -5.7166748046875, -5.449951171875, -5.1832275390625, -4.91650390625, -4.6497802734375, -4.383056640625, -4.1163330078125, -3.849609375, -3.5828857421875, -3.316162109375, -3.0494384765625, -2.78271484375, -2.5159912109375, -2.249267578125, -1.9825439453125, -1.7158203125, -1.4490966796875, -1.182373046875, -0.9156494140625, -0.64892578125, -0.3822021484375, -0.115478515625, 0.1512451171875, 0.41796875, 0.6846923828125, 0.951416015625, 1.2181396484375, 1.48486328125, 1.7515869140625, 2.018310546875, 2.2850341796875, 2.5517578125, 2.8184814453125, 3.085205078125, 3.3519287109375, 3.61865234375, 3.8853759765625, 4.152099609375, 4.4188232421875, 4.685546875, 4.9522705078125, 5.218994140625, 5.4857177734375, 5.75244140625, 6.0191650390625, 6.285888671875, 6.5526123046875, 6.8193359375, 7.0860595703125, 7.352783203125, 7.6195068359375, 7.88623046875, 8.1529541015625, 8.419677734375, 8.6864013671875, 8.953125]}, "gradients/decoder.transformer.h.7.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 3.0, 2.0, 3.0, 1.0, 8.0, 11.0, 12.0, 25.0, 31.0, 38.0, 71.0, 112.0, 123.0, 213.0, 282.0, 423.0, 593.0, 903.0, 1354.0, 1979.0, 3073.0, 4604.0, 7173.0, 11596.0, 18546.0, 29983.0, 51488.0, 92663.0, 198882.0, 335599.0, 123609.0, 65464.0, 37578.0, 22653.0, 14048.0, 8848.0, 5583.0, 3632.0, 2452.0, 1535.0, 1090.0, 734.0, 447.0, 348.0, 220.0, 183.0, 119.0, 60.0, 52.0, 50.0, 18.0, 22.0, 11.0, 7.0, 2.0, 8.0, 3.0, 0.0, 1.0, 2.0, 1.0], "bins": [-0.312255859375, -0.3025627136230469, -0.29286956787109375, -0.2831764221191406, -0.2734832763671875, -0.2637901306152344, -0.25409698486328125, -0.24440383911132812, -0.234710693359375, -0.22501754760742188, -0.21532440185546875, -0.20563125610351562, -0.1959381103515625, -0.18624496459960938, -0.17655181884765625, -0.16685867309570312, -0.15716552734375, -0.14747238159179688, -0.13777923583984375, -0.12808609008789062, -0.1183929443359375, -0.10869979858398438, -0.09900665283203125, -0.08931350708007812, -0.079620361328125, -0.06992721557617188, -0.06023406982421875, -0.050540924072265625, -0.0408477783203125, -0.031154632568359375, -0.02146148681640625, -0.011768341064453125, -0.0020751953125, 0.007617950439453125, 0.01731109619140625, 0.027004241943359375, 0.0366973876953125, 0.046390533447265625, 0.05608367919921875, 0.06577682495117188, 0.075469970703125, 0.08516311645507812, 0.09485626220703125, 0.10454940795898438, 0.1142425537109375, 0.12393569946289062, 0.13362884521484375, 0.14332199096679688, 0.15301513671875, 0.16270828247070312, 0.17240142822265625, 0.18209457397460938, 0.1917877197265625, 0.20148086547851562, 0.21117401123046875, 0.22086715698242188, 0.230560302734375, 0.24025344848632812, 0.24994659423828125, 0.2596397399902344, 0.2693328857421875, 0.2790260314941406, 0.28871917724609375, 0.2984123229980469, 0.30810546875]}, "gradients/decoder.transformer.h.7.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 1.0, 0.0, 1.0, 2.0, 3.0, 1.0, 5.0, 11.0, 10.0, 10.0, 5.0, 13.0, 17.0, 17.0, 21.0, 23.0, 21.0, 25.0, 27.0, 41.0, 36.0, 39.0, 37.0, 40.0, 42.0, 53.0, 1072.0, 40.0, 41.0, 51.0, 38.0, 37.0, 27.0, 26.0, 27.0, 25.0, 23.0, 19.0, 14.0, 19.0, 14.0, 9.0, 12.0, 18.0, 4.0, 7.0, 7.0, 4.0, 3.0, 1.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.1796875, -5.00323486328125, -4.8267822265625, -4.65032958984375, -4.473876953125, -4.29742431640625, -4.1209716796875, -3.94451904296875, -3.76806640625, -3.59161376953125, -3.4151611328125, -3.23870849609375, -3.062255859375, -2.88580322265625, -2.7093505859375, -2.53289794921875, -2.3564453125, -2.17999267578125, -2.0035400390625, -1.82708740234375, -1.650634765625, -1.47418212890625, -1.2977294921875, -1.12127685546875, -0.94482421875, -0.76837158203125, -0.5919189453125, -0.41546630859375, -0.239013671875, -0.06256103515625, 0.1138916015625, 0.29034423828125, 0.466796875, 0.64324951171875, 0.8197021484375, 0.99615478515625, 1.172607421875, 1.34906005859375, 1.5255126953125, 1.70196533203125, 1.87841796875, 2.05487060546875, 2.2313232421875, 2.40777587890625, 2.584228515625, 2.76068115234375, 2.9371337890625, 3.11358642578125, 3.2900390625, 3.46649169921875, 3.6429443359375, 3.81939697265625, 3.995849609375, 4.17230224609375, 4.3487548828125, 4.52520751953125, 4.70166015625, 4.87811279296875, 5.0545654296875, 5.23101806640625, 5.407470703125, 5.58392333984375, 5.7603759765625, 5.93682861328125, 6.11328125]}, "gradients/decoder.transformer.h.7.crossattention.c_attn.weight": {"_type": "histogram", "values": [3.0, 1.0, 4.0, 3.0, 6.0, 4.0, 15.0, 12.0, 27.0, 31.0, 43.0, 73.0, 113.0, 189.0, 277.0, 424.0, 582.0, 818.0, 1291.0, 1809.0, 2515.0, 3578.0, 5211.0, 7755.0, 11377.0, 16954.0, 25387.0, 39641.0, 62881.0, 106594.0, 1318285.0, 220273.0, 99936.0, 59414.0, 36968.0, 24139.0, 16099.0, 10776.0, 7362.0, 4997.0, 3499.0, 2377.0, 1719.0, 1179.0, 835.0, 545.0, 377.0, 264.0, 160.0, 108.0, 67.0, 55.0, 33.0, 29.0, 12.0, 12.0, 3.0, 1.0, 2.0, 3.0, 2.0, 1.0, 1.0, 1.0], "bins": [-0.1383056640625, -0.1338348388671875, -0.129364013671875, -0.1248931884765625, -0.12042236328125, -0.1159515380859375, -0.111480712890625, -0.1070098876953125, -0.1025390625, -0.0980682373046875, -0.093597412109375, -0.0891265869140625, -0.08465576171875, -0.0801849365234375, -0.075714111328125, -0.0712432861328125, -0.0667724609375, -0.0623016357421875, -0.057830810546875, -0.0533599853515625, -0.04888916015625, -0.0444183349609375, -0.039947509765625, -0.0354766845703125, -0.031005859375, -0.0265350341796875, -0.022064208984375, -0.0175933837890625, -0.01312255859375, -0.0086517333984375, -0.004180908203125, 0.0002899169921875, 0.0047607421875, 0.0092315673828125, 0.013702392578125, 0.0181732177734375, 0.02264404296875, 0.0271148681640625, 0.031585693359375, 0.0360565185546875, 0.04052734375, 0.0449981689453125, 0.049468994140625, 0.0539398193359375, 0.05841064453125, 0.0628814697265625, 0.067352294921875, 0.0718231201171875, 0.0762939453125, 0.0807647705078125, 0.085235595703125, 0.0897064208984375, 0.09417724609375, 0.0986480712890625, 0.103118896484375, 0.1075897216796875, 0.112060546875, 0.1165313720703125, 0.121002197265625, 0.1254730224609375, 0.12994384765625, 0.1344146728515625, 0.138885498046875, 0.1433563232421875, 0.1478271484375]}, "gradients/decoder.transformer.h.7.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 3.0, 0.0, 2.0, 1.0, 0.0, 3.0, 0.0, 0.0, 0.0, 2.0, 4.0, 3.0, 5.0, 2.0, 7.0, 7.0, 6.0, 15.0, 24.0, 23.0, 59.0, 128.0, 144.0, 213.0, 113.0, 81.0, 54.0, 23.0, 20.0, 17.0, 14.0, 10.0, 5.0, 8.0, 4.0, 4.0, 6.0, 2.0, 2.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-9.775161743164062e-06, -9.463168680667877e-06, -9.151175618171692e-06, -8.839182555675507e-06, -8.527189493179321e-06, -8.215196430683136e-06, -7.90320336818695e-06, -7.591210305690765e-06, -7.27921724319458e-06, -6.967224180698395e-06, -6.6552311182022095e-06, -6.343238055706024e-06, -6.031244993209839e-06, -5.7192519307136536e-06, -5.407258868217468e-06, -5.095265805721283e-06, -4.783272743225098e-06, -4.471279680728912e-06, -4.159286618232727e-06, -3.847293555736542e-06, -3.5353004932403564e-06, -3.223307430744171e-06, -2.911314368247986e-06, -2.5993213057518005e-06, -2.2873282432556152e-06, -1.97533518075943e-06, -1.6633421182632446e-06, -1.3513490557670593e-06, -1.039355993270874e-06, -7.273629307746887e-07, -4.153698682785034e-07, -1.0337680578231812e-07, 2.086162567138672e-07, 5.206093192100525e-07, 8.326023817062378e-07, 1.144595444202423e-06, 1.4565885066986084e-06, 1.7685815691947937e-06, 2.080574631690979e-06, 2.3925676941871643e-06, 2.7045607566833496e-06, 3.016553819179535e-06, 3.3285468816757202e-06, 3.6405399441719055e-06, 3.952533006668091e-06, 4.264526069164276e-06, 4.5765191316604614e-06, 4.888512194156647e-06, 5.200505256652832e-06, 5.512498319149017e-06, 5.824491381645203e-06, 6.136484444141388e-06, 6.448477506637573e-06, 6.7604705691337585e-06, 7.072463631629944e-06, 7.384456694126129e-06, 7.696449756622314e-06, 8.0084428191185e-06, 8.320435881614685e-06, 8.63242894411087e-06, 8.944422006607056e-06, 9.256415069103241e-06, 9.568408131599426e-06, 9.880401194095612e-06, 1.0192394256591797e-05]}, "gradients/decoder.transformer.h.7.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 3.0, 4.0, 5.0, 2.0, 10.0, 6.0, 4.0, 7.0, 19.0, 27.0, 32.0, 54.0, 98.0, 247.0, 2829.0, 1028251.0, 16296.0, 373.0, 130.0, 54.0, 39.0, 17.0, 10.0, 9.0, 13.0, 2.0, 4.0, 5.0, 3.0, 3.0, 1.0, 0.0, 0.0, 2.0, 0.0, 2.0, 1.0, 0.0, 4.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0002149343490600586, -0.00020834989845752716, -0.00020176544785499573, -0.0001951809972524643, -0.00018859654664993286, -0.00018201209604740143, -0.00017542764544487, -0.00016884319484233856, -0.00016225874423980713, -0.0001556742936372757, -0.00014908984303474426, -0.00014250539243221283, -0.0001359209418296814, -0.00012933649122714996, -0.00012275204062461853, -0.0001161675900220871, -0.00010958313941955566, -0.00010299868881702423, -9.64142382144928e-05, -8.982978761196136e-05, -8.324533700942993e-05, -7.66608864068985e-05, -7.007643580436707e-05, -6.349198520183563e-05, -5.69075345993042e-05, -5.0323083996772766e-05, -4.373863339424133e-05, -3.71541827917099e-05, -3.056973218917847e-05, -2.3985281586647034e-05, -1.74008309841156e-05, -1.0816380381584167e-05, -4.231929779052734e-06, 2.3525208234786987e-06, 8.936971426010132e-06, 1.5521422028541565e-05, 2.2105872631072998e-05, 2.869032323360443e-05, 3.5274773836135864e-05, 4.18592244386673e-05, 4.844367504119873e-05, 5.5028125643730164e-05, 6.16125762462616e-05, 6.819702684879303e-05, 7.478147745132446e-05, 8.13659280538559e-05, 8.795037865638733e-05, 9.453482925891876e-05, 0.0001011192798614502, 0.00010770373046398163, 0.00011428818106651306, 0.0001208726316690445, 0.00012745708227157593, 0.00013404153287410736, 0.0001406259834766388, 0.00014721043407917023, 0.00015379488468170166, 0.0001603793352842331, 0.00016696378588676453, 0.00017354823648929596, 0.0001801326870918274, 0.00018671713769435883, 0.00019330158829689026, 0.0001998860388994217, 0.00020647048950195312]}, "gradients/decoder.transformer.h.7.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 5.0, 4.0, 18.0, 91.0, 266.0, 393.0, 183.0, 45.0, 11.0, 3.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.1184783842472825e-06, -4.6231484702730086e-06, -4.1278190110460855e-06, -3.6324890970718116e-06, -3.1371591830975376e-06, -2.6418292691232637e-06, -2.146499582522665e-06, -1.6511698959220666e-06, -1.1558399819477927e-06, -6.605101816603565e-07, -1.6518038137292024e-07, 3.30149418914516e-07, 8.254792192019522e-07, 1.3208091331762262e-06, 1.8161388197768247e-06, 2.311468506377423e-06, 2.806798420351697e-06, 3.302128334325971e-06, 3.7974580209265696e-06, 4.292787707527168e-06, 4.788117621501442e-06, 5.283447535475716e-06, 5.77877744944999e-06, 6.274106908676913e-06, 6.769436822651187e-06, 7.264766736625461e-06, 7.760096195852384e-06, 8.255426109826658e-06, 8.750756023800932e-06, 9.246085937775206e-06, 9.74141585174948e-06, 1.0236744856229052e-05, 1.0732073860708624e-05, 1.1227403774682898e-05, 1.1722733688657172e-05, 1.2218063602631446e-05, 1.271339351660572e-05, 1.3208722521085292e-05, 1.3704052435059566e-05, 1.419938234903384e-05, 1.4694712263008114e-05, 1.5190042176982388e-05, 1.568537118146196e-05, 1.6180702004930936e-05, 1.6676031009410508e-05, 1.7171361832879484e-05, 1.7666690837359056e-05, 1.8162019841838628e-05, 1.8657350665307604e-05, 1.9152679669787176e-05, 1.964801049325615e-05, 2.0143339497735724e-05, 2.06386703212047e-05, 2.1133999325684272e-05, 2.1629330149153247e-05, 2.212465915363282e-05, 2.2619988158112392e-05, 2.3115317162591964e-05, 2.361064798606094e-05, 2.4105976990540512e-05, 2.4601307814009488e-05, 2.509663681848906e-05, 2.5591967641958036e-05, 2.6087296646437608e-05, 2.6582627469906583e-05]}, "gradients/decoder.transformer.h.7.ln_cross_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 2.0, 2.0, 4.0, 2.0, 11.0, 3.0, 6.0, 11.0, 12.0, 11.0, 5.0, 14.0, 13.0, 14.0, 30.0, 16.0, 34.0, 37.0, 27.0, 45.0, 21.0, 44.0, 59.0, 27.0, 43.0, 27.0, 45.0, 20.0, 52.0, 57.0, 20.0, 54.0, 16.0, 32.0, 38.0, 14.0, 29.0, 14.0, 20.0, 16.0, 7.0, 14.0, 5.0, 9.0, 6.0, 2.0, 9.0, 3.0, 3.0, 5.0, 1.0, 2.0, 0.0, 6.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-2.8014183044433594e-06, -2.7064234018325806e-06, -2.6114284992218018e-06, -2.516433596611023e-06, -2.421438694000244e-06, -2.3264437913894653e-06, -2.2314488887786865e-06, -2.1364539861679077e-06, -2.041459083557129e-06, -1.94646418094635e-06, -1.8514692783355713e-06, -1.7564743757247925e-06, -1.6614794731140137e-06, -1.5664845705032349e-06, -1.471489667892456e-06, -1.3764947652816772e-06, -1.2814998626708984e-06, -1.1865049600601196e-06, -1.0915100574493408e-06, -9.96515154838562e-07, -9.015202522277832e-07, -8.065253496170044e-07, -7.115304470062256e-07, -6.165355443954468e-07, -5.21540641784668e-07, -4.2654573917388916e-07, -3.3155083656311035e-07, -2.3655593395233154e-07, -1.4156103134155273e-07, -4.6566128730773926e-08, 4.842877388000488e-08, 1.434236764907837e-07, 2.384185791015625e-07, 3.334134817123413e-07, 4.284083843231201e-07, 5.234032869338989e-07, 6.183981895446777e-07, 7.133930921554565e-07, 8.083879947662354e-07, 9.033828973770142e-07, 9.98377799987793e-07, 1.0933727025985718e-06, 1.1883676052093506e-06, 1.2833625078201294e-06, 1.3783574104309082e-06, 1.473352313041687e-06, 1.5683472156524658e-06, 1.6633421182632446e-06, 1.7583370208740234e-06, 1.8533319234848022e-06, 1.948326826095581e-06, 2.04332172870636e-06, 2.1383166313171387e-06, 2.2333115339279175e-06, 2.3283064365386963e-06, 2.423301339149475e-06, 2.518296241760254e-06, 2.6132911443710327e-06, 2.7082860469818115e-06, 2.8032809495925903e-06, 2.898275852203369e-06, 2.993270754814148e-06, 3.0882656574249268e-06, 3.1832605600357056e-06, 3.2782554626464844e-06]}, "gradients/decoder.transformer.h.7.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 2.0, 2.0, 3.0, 3.0, 6.0, 4.0, 5.0, 7.0, 8.0, 9.0, 18.0, 18.0, 23.0, 29.0, 36.0, 25.0, 36.0, 36.0, 47.0, 43.0, 48.0, 45.0, 38.0, 36.0, 48.0, 43.0, 38.0, 37.0, 46.0, 45.0, 40.0, 26.0, 35.0, 21.0, 31.0, 18.0, 13.0, 10.0, 4.0, 10.0, 3.0, 7.0, 0.0, 6.0, 2.0, 3.0, 0.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-8.1171875, -7.8504638671875, -7.583740234375, -7.3170166015625, -7.05029296875, -6.7835693359375, -6.516845703125, -6.2501220703125, -5.9833984375, -5.7166748046875, -5.449951171875, -5.1832275390625, -4.91650390625, -4.6497802734375, -4.383056640625, -4.1163330078125, -3.849609375, -3.5828857421875, -3.316162109375, -3.0494384765625, -2.78271484375, -2.5159912109375, -2.249267578125, -1.9825439453125, -1.7158203125, -1.4490966796875, -1.182373046875, -0.9156494140625, -0.64892578125, -0.3822021484375, -0.115478515625, 0.1512451171875, 0.41796875, 0.6846923828125, 0.951416015625, 1.2181396484375, 1.48486328125, 1.7515869140625, 2.018310546875, 2.2850341796875, 2.5517578125, 2.8184814453125, 3.085205078125, 3.3519287109375, 3.61865234375, 3.8853759765625, 4.152099609375, 4.4188232421875, 4.685546875, 4.9522705078125, 5.218994140625, 5.4857177734375, 5.75244140625, 6.0191650390625, 6.285888671875, 6.5526123046875, 6.8193359375, 7.0860595703125, 7.352783203125, 7.6195068359375, 7.88623046875, 8.1529541015625, 8.419677734375, 8.6864013671875, 8.953125]}, "gradients/decoder.transformer.h.7.attn.c_proj.weight": {"_type": "histogram", "values": [3.0, 2.0, 5.0, 2.0, 5.0, 7.0, 5.0, 4.0, 10.0, 9.0, 19.0, 17.0, 39.0, 54.0, 70.0, 114.0, 180.0, 287.0, 500.0, 850.0, 1426.0, 2532.0, 4301.0, 8047.0, 15802.0, 34838.0, 86979.0, 247427.0, 386001.0, 152950.0, 56295.0, 24120.0, 11511.0, 5946.0, 3269.0, 1989.0, 1189.0, 650.0, 444.0, 250.0, 132.0, 92.0, 65.0, 34.0, 30.0, 21.0, 12.0, 4.0, 7.0, 8.0, 2.0, 6.0, 2.0, 3.0, 3.0, 1.0, 3.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-9.21875, -8.8927001953125, -8.566650390625, -8.2406005859375, -7.91455078125, -7.5885009765625, -7.262451171875, -6.9364013671875, -6.6103515625, -6.2843017578125, -5.958251953125, -5.6322021484375, -5.30615234375, -4.9801025390625, -4.654052734375, -4.3280029296875, -4.001953125, -3.6759033203125, -3.349853515625, -3.0238037109375, -2.69775390625, -2.3717041015625, -2.045654296875, -1.7196044921875, -1.3935546875, -1.0675048828125, -0.741455078125, -0.4154052734375, -0.08935546875, 0.2366943359375, 0.562744140625, 0.8887939453125, 1.21484375, 1.5408935546875, 1.866943359375, 2.1929931640625, 2.51904296875, 2.8450927734375, 3.171142578125, 3.4971923828125, 3.8232421875, 4.1492919921875, 4.475341796875, 4.8013916015625, 5.12744140625, 5.4534912109375, 5.779541015625, 6.1055908203125, 6.431640625, 6.7576904296875, 7.083740234375, 7.4097900390625, 7.73583984375, 8.0618896484375, 8.387939453125, 8.7139892578125, 9.0400390625, 9.3660888671875, 9.692138671875, 10.0181884765625, 10.34423828125, 10.6702880859375, 10.996337890625, 11.3223876953125, 11.6484375]}, "gradients/decoder.transformer.h.7.attn.c_attn.bias": {"_type": "histogram", "values": [2.0, 2.0, 0.0, 2.0, 3.0, 2.0, 5.0, 5.0, 9.0, 9.0, 5.0, 12.0, 12.0, 11.0, 18.0, 19.0, 21.0, 29.0, 27.0, 35.0, 39.0, 51.0, 42.0, 65.0, 64.0, 131.0, 238.0, 1437.0, 176.0, 111.0, 76.0, 48.0, 40.0, 44.0, 36.0, 38.0, 39.0, 30.0, 16.0, 20.0, 15.0, 18.0, 14.0, 8.0, 9.0, 10.0, 8.0, 1.0, 6.0, 2.0, 3.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 1.0, 0.0, 1.0], "bins": [-20.828125, -20.06689453125, -19.3056640625, -18.54443359375, -17.783203125, -17.02197265625, -16.2607421875, -15.49951171875, -14.73828125, -13.97705078125, -13.2158203125, -12.45458984375, -11.693359375, -10.93212890625, -10.1708984375, -9.40966796875, -8.6484375, -7.88720703125, -7.1259765625, -6.36474609375, -5.603515625, -4.84228515625, -4.0810546875, -3.31982421875, -2.55859375, -1.79736328125, -1.0361328125, -0.27490234375, 0.486328125, 1.24755859375, 2.0087890625, 2.77001953125, 3.53125, 4.29248046875, 5.0537109375, 5.81494140625, 6.576171875, 7.33740234375, 8.0986328125, 8.85986328125, 9.62109375, 10.38232421875, 11.1435546875, 11.90478515625, 12.666015625, 13.42724609375, 14.1884765625, 14.94970703125, 15.7109375, 16.47216796875, 17.2333984375, 17.99462890625, 18.755859375, 19.51708984375, 20.2783203125, 21.03955078125, 21.80078125, 22.56201171875, 23.3232421875, 24.08447265625, 24.845703125, 25.60693359375, 26.3681640625, 27.12939453125, 27.890625]}, "gradients/decoder.transformer.h.7.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 0.0, 3.0, 2.0, 8.0, 7.0, 5.0, 14.0, 16.0, 22.0, 44.0, 52.0, 72.0, 95.0, 190.0, 375.0, 822.0, 2565.0, 11191.0, 97026.0, 2848422.0, 164598.0, 14935.0, 3251.0, 1031.0, 402.0, 228.0, 91.0, 72.0, 51.0, 47.0, 23.0, 16.0, 11.0, 11.0, 14.0, 1.0, 3.0, 4.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-63.96875, -62.2666015625, -60.564453125, -58.8623046875, -57.16015625, -55.4580078125, -53.755859375, -52.0537109375, -50.3515625, -48.6494140625, -46.947265625, -45.2451171875, -43.54296875, -41.8408203125, -40.138671875, -38.4365234375, -36.734375, -35.0322265625, -33.330078125, -31.6279296875, -29.92578125, -28.2236328125, -26.521484375, -24.8193359375, -23.1171875, -21.4150390625, -19.712890625, -18.0107421875, -16.30859375, -14.6064453125, -12.904296875, -11.2021484375, -9.5, -7.7978515625, -6.095703125, -4.3935546875, -2.69140625, -0.9892578125, 0.712890625, 2.4150390625, 4.1171875, 5.8193359375, 7.521484375, 9.2236328125, 10.92578125, 12.6279296875, 14.330078125, 16.0322265625, 17.734375, 19.4365234375, 21.138671875, 22.8408203125, 24.54296875, 26.2451171875, 27.947265625, 29.6494140625, 31.3515625, 33.0537109375, 34.755859375, 36.4580078125, 38.16015625, 39.8623046875, 41.564453125, 43.2666015625, 44.96875]}, "gradients/decoder.transformer.h.7.ln_1.weight": {"_type": "histogram", "values": [1.0, 6.0, 25.0, 82.0, 319.0, 390.0, 150.0, 36.0, 12.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-41.20253372192383, -33.27638626098633, -25.350242614746094, -17.424095153808594, -9.497949600219727, -1.5718040466308594, 6.354343414306641, 14.280487060546875, 22.206634521484375, 30.132780075073242, 38.05892562866211, 45.98507308959961, 53.911216735839844, 61.837364196777344, 69.76351165771484, 77.68965148925781, 85.61579895019531, 93.54194641113281, 101.46809387207031, 109.39424133300781, 117.32038116455078, 125.24652862548828, 133.17266845703125, 141.09881591796875, 149.02496337890625, 156.95111083984375, 164.87725830078125, 172.80340576171875, 180.72955322265625, 188.65570068359375, 196.58184814453125, 204.5079803466797, 212.4341278076172, 220.3602752685547, 228.2864227294922, 236.2125701904297, 244.1387176513672, 252.06484985351562, 259.9909973144531, 267.9171447753906, 275.8432922363281, 283.7694396972656, 291.6955871582031, 299.6217346191406, 307.5478820800781, 315.4740295410156, 323.4001770019531, 331.3263244628906, 339.2524719238281, 347.1786193847656, 355.1047668457031, 363.0309143066406, 370.9570617675781, 378.8832092285156, 386.8093566894531, 394.7355041503906, 402.66162109375, 410.5877685546875, 418.513916015625, 426.4400634765625, 434.3662109375, 442.2923583984375, 450.218505859375, 458.1446533203125, 466.07080078125]}, "gradients/decoder.transformer.h.7.ln_1.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 1.0, 1.0, 1.0, 2.0, 0.0, 1.0, 1.0, 3.0, 14.0, 7.0, 4.0, 8.0, 7.0, 12.0, 12.0, 10.0, 13.0, 16.0, 21.0, 27.0, 36.0, 26.0, 41.0, 31.0, 43.0, 40.0, 37.0, 36.0, 32.0, 44.0, 50.0, 35.0, 29.0, 36.0, 50.0, 33.0, 27.0, 37.0, 29.0, 23.0, 27.0, 19.0, 15.0, 11.0, 13.0, 6.0, 12.0, 7.0, 9.0, 4.0, 6.0, 4.0, 5.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-69.79849243164062, -67.69772338867188, -65.59696197509766, -63.496192932128906, -61.39542770385742, -59.29466247558594, -57.19389343261719, -55.0931282043457, -52.99236297607422, -50.891597747802734, -48.79083251953125, -46.6900634765625, -44.589298248291016, -42.48853302001953, -40.38776397705078, -38.2869987487793, -36.18623352050781, -34.08546829223633, -31.98470115661621, -29.883934020996094, -27.78316879272461, -25.682403564453125, -23.581636428833008, -21.48086929321289, -19.380104064941406, -17.279338836669922, -15.178571701049805, -13.077805519104004, -10.977039337158203, -8.876273155212402, -6.775506973266602, -4.674740791320801, -2.573974609375, -0.4732084274291992, 1.6275577545166016, 3.7283239364624023, 5.829090118408203, 7.929856300354004, 10.030622482299805, 12.131388664245605, 14.232154846191406, 16.33292007446289, 18.433687210083008, 20.534454345703125, 22.63521957397461, 24.735984802246094, 26.83675193786621, 28.937519073486328, 31.038284301757812, 33.1390495300293, 35.23981475830078, 37.34058380126953, 39.441349029541016, 41.5421142578125, 43.64288330078125, 45.743648529052734, 47.84441375732422, 49.9451789855957, 52.04594421386719, 54.14671325683594, 56.24747848510742, 58.348243713378906, 60.449012756347656, 62.54977798461914, 64.65054321289062]}, "gradients/decoder.transformer.h.6.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 3.0, 5.0, 4.0, 7.0, 6.0, 7.0, 9.0, 7.0, 10.0, 17.0, 22.0, 27.0, 46.0, 17.0, 35.0, 28.0, 40.0, 35.0, 56.0, 43.0, 40.0, 36.0, 45.0, 55.0, 40.0, 40.0, 43.0, 39.0, 31.0, 27.0, 33.0, 34.0, 22.0, 25.0, 21.0, 17.0, 12.0, 5.0, 4.0, 5.0, 3.0, 4.0, 2.0, 4.0, 3.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 2.0], "bins": [-9.3125, -9.0308837890625, -8.749267578125, -8.4676513671875, -8.18603515625, -7.9044189453125, -7.622802734375, -7.3411865234375, -7.0595703125, -6.7779541015625, -6.496337890625, -6.2147216796875, -5.93310546875, -5.6514892578125, -5.369873046875, -5.0882568359375, -4.806640625, -4.5250244140625, -4.243408203125, -3.9617919921875, -3.68017578125, -3.3985595703125, -3.116943359375, -2.8353271484375, -2.5537109375, -2.2720947265625, -1.990478515625, -1.7088623046875, -1.42724609375, -1.1456298828125, -0.864013671875, -0.5823974609375, -0.30078125, -0.0191650390625, 0.262451171875, 0.5440673828125, 0.82568359375, 1.1072998046875, 1.388916015625, 1.6705322265625, 1.9521484375, 2.2337646484375, 2.515380859375, 2.7969970703125, 3.07861328125, 3.3602294921875, 3.641845703125, 3.9234619140625, 4.205078125, 4.4866943359375, 4.768310546875, 5.0499267578125, 5.33154296875, 5.6131591796875, 5.894775390625, 6.1763916015625, 6.4580078125, 6.7396240234375, 7.021240234375, 7.3028564453125, 7.58447265625, 7.8660888671875, 8.147705078125, 8.4293212890625, 8.7109375]}, "gradients/decoder.transformer.h.6.mlp.c_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 3.0, 0.0, 2.0, 6.0, 1.0, 3.0, 6.0, 1.0, 5.0, 8.0, 12.0, 16.0, 17.0, 29.0, 37.0, 42.0, 84.0, 135.0, 249.0, 421.0, 777.0, 1445.0, 2614.0, 5048.0, 10062.0, 20877.0, 48396.0, 132475.0, 546050.0, 2221522.0, 903527.0, 185984.0, 62326.0, 26581.0, 12526.0, 6170.0, 3099.0, 1621.0, 910.0, 493.0, 263.0, 153.0, 85.0, 60.0, 39.0, 27.0, 23.0, 25.0, 9.0, 7.0, 5.0, 5.0, 1.0, 6.0, 4.0, 2.0, 1.0, 2.0, 3.0, 0.0, 0.0, 2.0], "bins": [-13.140625, -12.725341796875, -12.31005859375, -11.894775390625, -11.4794921875, -11.064208984375, -10.64892578125, -10.233642578125, -9.818359375, -9.403076171875, -8.98779296875, -8.572509765625, -8.1572265625, -7.741943359375, -7.32666015625, -6.911376953125, -6.49609375, -6.080810546875, -5.66552734375, -5.250244140625, -4.8349609375, -4.419677734375, -4.00439453125, -3.589111328125, -3.173828125, -2.758544921875, -2.34326171875, -1.927978515625, -1.5126953125, -1.097412109375, -0.68212890625, -0.266845703125, 0.1484375, 0.563720703125, 0.97900390625, 1.394287109375, 1.8095703125, 2.224853515625, 2.64013671875, 3.055419921875, 3.470703125, 3.885986328125, 4.30126953125, 4.716552734375, 5.1318359375, 5.547119140625, 5.96240234375, 6.377685546875, 6.79296875, 7.208251953125, 7.62353515625, 8.038818359375, 8.4541015625, 8.869384765625, 9.28466796875, 9.699951171875, 10.115234375, 10.530517578125, 10.94580078125, 11.361083984375, 11.7763671875, 12.191650390625, 12.60693359375, 13.022216796875, 13.4375]}, "gradients/decoder.transformer.h.6.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 1.0, 0.0, 0.0, 9.0, 5.0, 5.0, 6.0, 8.0, 12.0, 20.0, 24.0, 34.0, 39.0, 57.0, 65.0, 98.0, 133.0, 173.0, 322.0, 441.0, 637.0, 605.0, 416.0, 277.0, 186.0, 123.0, 89.0, 79.0, 42.0, 43.0, 32.0, 20.0, 20.0, 18.0, 10.0, 8.0, 8.0, 4.0, 6.0, 2.0, 4.0, 4.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-13.734375, -13.2587890625, -12.783203125, -12.3076171875, -11.83203125, -11.3564453125, -10.880859375, -10.4052734375, -9.9296875, -9.4541015625, -8.978515625, -8.5029296875, -8.02734375, -7.5517578125, -7.076171875, -6.6005859375, -6.125, -5.6494140625, -5.173828125, -4.6982421875, -4.22265625, -3.7470703125, -3.271484375, -2.7958984375, -2.3203125, -1.8447265625, -1.369140625, -0.8935546875, -0.41796875, 0.0576171875, 0.533203125, 1.0087890625, 1.484375, 1.9599609375, 2.435546875, 2.9111328125, 3.38671875, 3.8623046875, 4.337890625, 4.8134765625, 5.2890625, 5.7646484375, 6.240234375, 6.7158203125, 7.19140625, 7.6669921875, 8.142578125, 8.6181640625, 9.09375, 9.5693359375, 10.044921875, 10.5205078125, 10.99609375, 11.4716796875, 11.947265625, 12.4228515625, 12.8984375, 13.3740234375, 13.849609375, 14.3251953125, 14.80078125, 15.2763671875, 15.751953125, 16.2275390625, 16.703125]}, "gradients/decoder.transformer.h.6.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 3.0, 1.0, 1.0, 3.0, 4.0, 6.0, 4.0, 13.0, 9.0, 11.0, 20.0, 30.0, 42.0, 40.0, 65.0, 94.0, 153.0, 270.0, 574.0, 1687.0, 5708.0, 24799.0, 169254.0, 2949185.0, 944162.0, 78776.0, 13781.0, 3434.0, 1138.0, 427.0, 212.0, 127.0, 69.0, 55.0, 46.0, 31.0, 22.0, 10.0, 6.0, 10.0, 6.0, 2.0, 3.0, 1.0, 3.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-41.53125, -40.33837890625, -39.1455078125, -37.95263671875, -36.759765625, -35.56689453125, -34.3740234375, -33.18115234375, -31.98828125, -30.79541015625, -29.6025390625, -28.40966796875, -27.216796875, -26.02392578125, -24.8310546875, -23.63818359375, -22.4453125, -21.25244140625, -20.0595703125, -18.86669921875, -17.673828125, -16.48095703125, -15.2880859375, -14.09521484375, -12.90234375, -11.70947265625, -10.5166015625, -9.32373046875, -8.130859375, -6.93798828125, -5.7451171875, -4.55224609375, -3.359375, -2.16650390625, -0.9736328125, 0.21923828125, 1.412109375, 2.60498046875, 3.7978515625, 4.99072265625, 6.18359375, 7.37646484375, 8.5693359375, 9.76220703125, 10.955078125, 12.14794921875, 13.3408203125, 14.53369140625, 15.7265625, 16.91943359375, 18.1123046875, 19.30517578125, 20.498046875, 21.69091796875, 22.8837890625, 24.07666015625, 25.26953125, 26.46240234375, 27.6552734375, 28.84814453125, 30.041015625, 31.23388671875, 32.4267578125, 33.61962890625, 34.8125]}, "gradients/decoder.transformer.h.6.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 7.0, 24.0, 175.0, 425.0, 303.0, 75.0, 8.0, 3.0], "bins": [-595.182861328125, -585.2100219726562, -575.2371826171875, -565.264404296875, -555.2915649414062, -545.3187255859375, -535.345947265625, -525.3731079101562, -515.4002685546875, -505.42742919921875, -495.4546203613281, -485.4818115234375, -475.50897216796875, -465.5361328125, -455.5633239746094, -445.59051513671875, -435.61767578125, -425.64483642578125, -415.6720275878906, -405.69921875, -395.72637939453125, -385.7535400390625, -375.7807312011719, -365.80792236328125, -355.8350830078125, -345.86224365234375, -335.8894348144531, -325.9166259765625, -315.94378662109375, -305.970947265625, -295.9981384277344, -286.02532958984375, -276.0525207519531, -266.0797119140625, -256.10687255859375, -246.13404846191406, -236.16122436523438, -226.1884002685547, -216.215576171875, -206.2427520751953, -196.26992797851562, -186.29710388183594, -176.32427978515625, -166.35145568847656, -156.37863159179688, -146.4058074951172, -136.4329833984375, -126.46015930175781, -116.4873275756836, -106.5145034790039, -96.54167938232422, -86.56885528564453, -76.59603118896484, -66.62320709228516, -56.65038299560547, -46.67755889892578, -36.704734802246094, -26.731910705566406, -16.75908660888672, -6.786262512207031, 3.1865615844726562, 13.159385681152344, 23.13220977783203, 33.10503387451172, 43.077857971191406]}, "gradients/decoder.transformer.h.6.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 4.0, 3.0, 3.0, 3.0, 5.0, 6.0, 8.0, 4.0, 12.0, 16.0, 18.0, 18.0, 21.0, 24.0, 26.0, 23.0, 31.0, 27.0, 49.0, 34.0, 33.0, 59.0, 39.0, 46.0, 39.0, 53.0, 49.0, 42.0, 31.0, 31.0, 22.0, 35.0, 23.0, 26.0, 18.0, 16.0, 18.0, 19.0, 14.0, 17.0, 12.0, 4.0, 9.0, 8.0, 4.0, 1.0, 5.0, 2.0, 2.0, 1.0, 2.0, 2.0, 0.0, 2.0], "bins": [-55.51158905029297, -53.874698638916016, -52.2378044128418, -50.600914001464844, -48.964019775390625, -47.32712936401367, -45.69023895263672, -44.0533447265625, -42.41645431518555, -40.779563903808594, -39.142669677734375, -37.50577926635742, -35.8688850402832, -34.23199462890625, -32.59510040283203, -30.958209991455078, -29.321317672729492, -27.684425354003906, -26.04753303527832, -24.410640716552734, -22.77375030517578, -21.136857986450195, -19.49996566772461, -17.863075256347656, -16.226181030273438, -14.589288711547852, -12.952397346496582, -11.315505027770996, -9.678613662719727, -8.04172134399414, -6.404829025268555, -4.767937660217285, -3.1310462951660156, -1.4941543340682983, 0.14273762702941895, 1.7796297073364258, 3.4165215492248535, 5.053413391113281, 6.690305709838867, 8.327197074890137, 9.964089393615723, 11.600981712341309, 13.237873077392578, 14.874765396118164, 16.51165771484375, 18.148548126220703, 19.785442352294922, 21.422332763671875, 23.05922508239746, 24.696117401123047, 26.333009719848633, 27.96990203857422, 29.606792449951172, 31.243684768676758, 32.880577087402344, 34.5174674987793, 36.154361724853516, 37.79125213623047, 39.42814636230469, 41.06503677368164, 42.70193099975586, 44.33882141113281, 45.97571563720703, 47.612606048583984, 49.24949645996094]}, "gradients/decoder.transformer.h.6.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 4.0, 5.0, 10.0, 7.0, 11.0, 12.0, 10.0, 19.0, 26.0, 26.0, 28.0, 25.0, 39.0, 37.0, 41.0, 39.0, 35.0, 44.0, 33.0, 54.0, 55.0, 60.0, 39.0, 45.0, 41.0, 44.0, 32.0, 30.0, 24.0, 24.0, 26.0, 24.0, 18.0, 12.0, 4.0, 9.0, 5.0, 4.0, 6.0, 3.0, 0.0, 2.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 2.0], "bins": [-10.5390625, -10.2353515625, -9.931640625, -9.6279296875, -9.32421875, -9.0205078125, -8.716796875, -8.4130859375, -8.109375, -7.8056640625, -7.501953125, -7.1982421875, -6.89453125, -6.5908203125, -6.287109375, -5.9833984375, -5.6796875, -5.3759765625, -5.072265625, -4.7685546875, -4.46484375, -4.1611328125, -3.857421875, -3.5537109375, -3.25, -2.9462890625, -2.642578125, -2.3388671875, -2.03515625, -1.7314453125, -1.427734375, -1.1240234375, -0.8203125, -0.5166015625, -0.212890625, 0.0908203125, 0.39453125, 0.6982421875, 1.001953125, 1.3056640625, 1.609375, 1.9130859375, 2.216796875, 2.5205078125, 2.82421875, 3.1279296875, 3.431640625, 3.7353515625, 4.0390625, 4.3427734375, 4.646484375, 4.9501953125, 5.25390625, 5.5576171875, 5.861328125, 6.1650390625, 6.46875, 6.7724609375, 7.076171875, 7.3798828125, 7.68359375, 7.9873046875, 8.291015625, 8.5947265625, 8.8984375]}, "gradients/decoder.transformer.h.6.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 2.0, 1.0, 7.0, 5.0, 8.0, 10.0, 13.0, 33.0, 37.0, 45.0, 87.0, 111.0, 191.0, 272.0, 433.0, 668.0, 1120.0, 1790.0, 2924.0, 4893.0, 8180.0, 13387.0, 22405.0, 38046.0, 67093.0, 130792.0, 379929.0, 180449.0, 82667.0, 45705.0, 26857.0, 15862.0, 9684.0, 5663.0, 3482.0, 2155.0, 1296.0, 809.0, 499.0, 329.0, 215.0, 126.0, 96.0, 63.0, 38.0, 30.0, 30.0, 5.0, 8.0, 7.0, 4.0, 5.0, 1.0, 2.0, 0.0, 3.0], "bins": [-0.389892578125, -0.3786468505859375, -0.367401123046875, -0.3561553955078125, -0.34490966796875, -0.3336639404296875, -0.322418212890625, -0.3111724853515625, -0.2999267578125, -0.2886810302734375, -0.277435302734375, -0.2661895751953125, -0.25494384765625, -0.2436981201171875, -0.232452392578125, -0.2212066650390625, -0.2099609375, -0.1987152099609375, -0.187469482421875, -0.1762237548828125, -0.16497802734375, -0.1537322998046875, -0.142486572265625, -0.1312408447265625, -0.1199951171875, -0.1087493896484375, -0.097503662109375, -0.0862579345703125, -0.07501220703125, -0.0637664794921875, -0.052520751953125, -0.0412750244140625, -0.030029296875, -0.0187835693359375, -0.007537841796875, 0.0037078857421875, 0.01495361328125, 0.0261993408203125, 0.037445068359375, 0.0486907958984375, 0.0599365234375, 0.0711822509765625, 0.082427978515625, 0.0936737060546875, 0.10491943359375, 0.1161651611328125, 0.127410888671875, 0.1386566162109375, 0.14990234375, 0.1611480712890625, 0.172393798828125, 0.1836395263671875, 0.19488525390625, 0.2061309814453125, 0.217376708984375, 0.2286224365234375, 0.2398681640625, 0.2511138916015625, 0.262359619140625, 0.2736053466796875, 0.28485107421875, 0.2960968017578125, 0.307342529296875, 0.3185882568359375, 0.329833984375]}, "gradients/decoder.transformer.h.6.crossattention.c_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 2.0, 4.0, 6.0, 4.0, 8.0, 8.0, 11.0, 13.0, 17.0, 22.0, 19.0, 28.0, 26.0, 26.0, 41.0, 30.0, 36.0, 43.0, 33.0, 43.0, 38.0, 1069.0, 38.0, 47.0, 44.0, 39.0, 50.0, 32.0, 38.0, 29.0, 18.0, 34.0, 24.0, 23.0, 22.0, 9.0, 23.0, 12.0, 5.0, 6.0, 5.0, 3.0, 4.0, 4.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.98828125, -5.780029296875, -5.57177734375, -5.363525390625, -5.1552734375, -4.947021484375, -4.73876953125, -4.530517578125, -4.322265625, -4.114013671875, -3.90576171875, -3.697509765625, -3.4892578125, -3.281005859375, -3.07275390625, -2.864501953125, -2.65625, -2.447998046875, -2.23974609375, -2.031494140625, -1.8232421875, -1.614990234375, -1.40673828125, -1.198486328125, -0.990234375, -0.781982421875, -0.57373046875, -0.365478515625, -0.1572265625, 0.051025390625, 0.25927734375, 0.467529296875, 0.67578125, 0.884033203125, 1.09228515625, 1.300537109375, 1.5087890625, 1.717041015625, 1.92529296875, 2.133544921875, 2.341796875, 2.550048828125, 2.75830078125, 2.966552734375, 3.1748046875, 3.383056640625, 3.59130859375, 3.799560546875, 4.0078125, 4.216064453125, 4.42431640625, 4.632568359375, 4.8408203125, 5.049072265625, 5.25732421875, 5.465576171875, 5.673828125, 5.882080078125, 6.09033203125, 6.298583984375, 6.5068359375, 6.715087890625, 6.92333984375, 7.131591796875, 7.33984375]}, "gradients/decoder.transformer.h.6.crossattention.c_attn.weight": {"_type": "histogram", "values": [3.0, 1.0, 3.0, 2.0, 6.0, 4.0, 9.0, 15.0, 22.0, 20.0, 27.0, 55.0, 69.0, 124.0, 179.0, 285.0, 364.0, 598.0, 890.0, 1332.0, 1999.0, 2979.0, 4338.0, 6753.0, 10080.0, 15418.0, 24457.0, 38527.0, 62458.0, 104679.0, 231262.0, 1305517.0, 108753.0, 64480.0, 39579.0, 24912.0, 16054.0, 10380.0, 6803.0, 4625.0, 2981.0, 2018.0, 1333.0, 952.0, 581.0, 402.0, 256.0, 179.0, 131.0, 86.0, 55.0, 40.0, 20.0, 21.0, 10.0, 14.0, 1.0, 1.0, 1.0, 4.0, 2.0, 1.0, 1.0, 1.0], "bins": [-0.1666259765625, -0.1612567901611328, -0.15588760375976562, -0.15051841735839844, -0.14514923095703125, -0.13978004455566406, -0.13441085815429688, -0.1290416717529297, -0.1236724853515625, -0.11830329895019531, -0.11293411254882812, -0.10756492614746094, -0.10219573974609375, -0.09682655334472656, -0.09145736694335938, -0.08608818054199219, -0.080718994140625, -0.07534980773925781, -0.06998062133789062, -0.06461143493652344, -0.05924224853515625, -0.05387306213378906, -0.048503875732421875, -0.04313468933105469, -0.0377655029296875, -0.03239631652832031, -0.027027130126953125, -0.021657943725585938, -0.01628875732421875, -0.010919570922851562, -0.005550384521484375, -0.0001811981201171875, 0.00518798828125, 0.010557174682617188, 0.015926361083984375, 0.021295547485351562, 0.02666473388671875, 0.03203392028808594, 0.037403106689453125, 0.04277229309082031, 0.0481414794921875, 0.05351066589355469, 0.058879852294921875, 0.06424903869628906, 0.06961822509765625, 0.07498741149902344, 0.08035659790039062, 0.08572578430175781, 0.091094970703125, 0.09646415710449219, 0.10183334350585938, 0.10720252990722656, 0.11257171630859375, 0.11794090270996094, 0.12331008911132812, 0.1286792755126953, 0.1340484619140625, 0.1394176483154297, 0.14478683471679688, 0.15015602111816406, 0.15552520751953125, 0.16089439392089844, 0.16626358032226562, 0.1716327667236328, 0.177001953125]}, "gradients/decoder.transformer.h.6.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 2.0, 1.0, 3.0, 1.0, 9.0, 10.0, 6.0, 7.0, 9.0, 11.0, 10.0, 22.0, 25.0, 26.0, 29.0, 54.0, 93.0, 193.0, 156.0, 114.0, 57.0, 38.0, 15.0, 36.0, 19.0, 14.0, 10.0, 10.0, 9.0, 6.0, 5.0, 3.0, 4.0, 3.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-8.821487426757812e-06, -8.538365364074707e-06, -8.255243301391602e-06, -7.972121238708496e-06, -7.68899917602539e-06, -7.405877113342285e-06, -7.12275505065918e-06, -6.839632987976074e-06, -6.556510925292969e-06, -6.273388862609863e-06, -5.990266799926758e-06, -5.707144737243652e-06, -5.424022674560547e-06, -5.140900611877441e-06, -4.857778549194336e-06, -4.5746564865112305e-06, -4.291534423828125e-06, -4.0084123611450195e-06, -3.725290298461914e-06, -3.4421682357788086e-06, -3.159046173095703e-06, -2.8759241104125977e-06, -2.592802047729492e-06, -2.3096799850463867e-06, -2.0265579223632812e-06, -1.7434358596801758e-06, -1.4603137969970703e-06, -1.1771917343139648e-06, -8.940696716308594e-07, -6.109476089477539e-07, -3.2782554626464844e-07, -4.470348358154297e-08, 2.384185791015625e-07, 5.21540641784668e-07, 8.046627044677734e-07, 1.087784767150879e-06, 1.3709068298339844e-06, 1.6540288925170898e-06, 1.9371509552001953e-06, 2.2202730178833008e-06, 2.5033950805664062e-06, 2.7865171432495117e-06, 3.069639205932617e-06, 3.3527612686157227e-06, 3.635883331298828e-06, 3.919005393981934e-06, 4.202127456665039e-06, 4.4852495193481445e-06, 4.76837158203125e-06, 5.0514936447143555e-06, 5.334615707397461e-06, 5.617737770080566e-06, 5.900859832763672e-06, 6.183981895446777e-06, 6.467103958129883e-06, 6.750226020812988e-06, 7.033348083496094e-06, 7.316470146179199e-06, 7.599592208862305e-06, 7.88271427154541e-06, 8.165836334228516e-06, 8.448958396911621e-06, 8.732080459594727e-06, 9.015202522277832e-06, 9.298324584960938e-06]}, "gradients/decoder.transformer.h.6.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 2.0, 0.0, 4.0, 2.0, 2.0, 5.0, 8.0, 7.0, 12.0, 12.0, 15.0, 23.0, 35.0, 48.0, 97.0, 191.0, 1314.0, 939112.0, 106691.0, 571.0, 166.0, 66.0, 47.0, 39.0, 24.0, 20.0, 11.0, 9.0, 11.0, 5.0, 8.0, 5.0, 1.0, 0.0, 3.0, 2.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00021958351135253906, -0.00021290220320224762, -0.00020622089505195618, -0.00019953958690166473, -0.0001928582787513733, -0.00018617697060108185, -0.0001794956624507904, -0.00017281435430049896, -0.00016613304615020752, -0.00015945173799991608, -0.00015277042984962463, -0.0001460891216993332, -0.00013940781354904175, -0.0001327265053987503, -0.00012604519724845886, -0.00011936388909816742, -0.00011268258094787598, -0.00010600127279758453, -9.931996464729309e-05, -9.263865649700165e-05, -8.59573483467102e-05, -7.927604019641876e-05, -7.259473204612732e-05, -6.591342389583588e-05, -5.9232115745544434e-05, -5.255080759525299e-05, -4.586949944496155e-05, -3.9188191294670105e-05, -3.250688314437866e-05, -2.582557499408722e-05, -1.9144266843795776e-05, -1.2462958693504333e-05, -5.781650543212891e-06, 8.996576070785522e-07, 7.580965757369995e-06, 1.4262273907661438e-05, 2.094358205795288e-05, 2.7624890208244324e-05, 3.4306198358535767e-05, 4.098750650882721e-05, 4.766881465911865e-05, 5.4350122809410095e-05, 6.103143095970154e-05, 6.771273910999298e-05, 7.439404726028442e-05, 8.107535541057587e-05, 8.775666356086731e-05, 9.443797171115875e-05, 0.0001011192798614502, 0.00010780058801174164, 0.00011448189616203308, 0.00012116320431232452, 0.00012784451246261597, 0.0001345258206129074, 0.00014120712876319885, 0.0001478884369134903, 0.00015456974506378174, 0.00016125105321407318, 0.00016793236136436462, 0.00017461366951465607, 0.0001812949776649475, 0.00018797628581523895, 0.0001946575939655304, 0.00020133890211582184, 0.00020802021026611328]}, "gradients/decoder.transformer.h.6.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 30.0, 95.0, 306.0, 360.0, 171.0, 38.0, 12.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-9.507875802228227e-06, -8.953255019150674e-06, -8.39863332657842e-06, -7.844012543500867e-06, -7.289391760423314e-06, -6.73477052259841e-06, -6.180149284773506e-06, -5.625528501695953e-06, -5.0709072638710495e-06, -4.516286026046146e-06, -3.961665242968593e-06, -3.407044005143689e-06, -2.8524229946924606e-06, -2.2978019842412323e-06, -1.7431807464163285e-06, -1.1885599633387756e-06, -6.339387255138718e-07, -7.931765821922454e-08, 4.753034090754227e-07, 1.0299245332134888e-06, 1.5845455436647171e-06, 2.1391665541159455e-06, 2.6937877919408493e-06, 3.248408575018402e-06, 3.803029812843306e-06, 4.35765105066821e-06, 4.912271833745763e-06, 5.4668930715706665e-06, 6.02151430939557e-06, 6.576135092473123e-06, 7.130756330298027e-06, 7.68537756812293e-06, 8.239998351200484e-06, 8.794619134278037e-06, 9.349240826850291e-06, 9.903861609927844e-06, 1.0458482393005397e-05, 1.1013104085577652e-05, 1.1567724868655205e-05, 1.2122345651732758e-05, 1.267696643481031e-05, 1.3231587217887864e-05, 1.3786208910460118e-05, 1.4340829693537671e-05, 1.4895450476615224e-05, 1.545007216918748e-05, 1.600469295226503e-05, 1.6559313735342585e-05, 1.711393633740954e-05, 1.7668557120487094e-05, 1.8223177903564647e-05, 1.87777986866422e-05, 1.9332421288709156e-05, 1.988704207178671e-05, 2.0441662854864262e-05, 2.0996283637941815e-05, 2.1550904421019368e-05, 2.210552520409692e-05, 2.2660145987174474e-05, 2.321476858924143e-05, 2.3769389372318983e-05, 2.4324010155396536e-05, 2.487863093847409e-05, 2.5433251721551642e-05, 2.5987872504629195e-05]}, "gradients/decoder.transformer.h.6.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 1.0, 1.0, 6.0, 4.0, 7.0, 5.0, 9.0, 5.0, 13.0, 8.0, 18.0, 9.0, 14.0, 9.0, 17.0, 9.0, 36.0, 21.0, 34.0, 22.0, 41.0, 22.0, 49.0, 25.0, 26.0, 48.0, 26.0, 53.0, 22.0, 38.0, 29.0, 42.0, 20.0, 50.0, 27.0, 44.0, 20.0, 21.0, 13.0, 32.0, 15.0, 22.0, 11.0, 19.0, 3.0, 9.0, 10.0, 12.0, 4.0, 4.0, 1.0, 5.0, 2.0, 4.0, 1.0, 1.0], "bins": [-3.0994415283203125e-06, -3.010965883731842e-06, -2.9224902391433716e-06, -2.834014594554901e-06, -2.7455389499664307e-06, -2.65706330537796e-06, -2.5685876607894897e-06, -2.4801120162010193e-06, -2.391636371612549e-06, -2.3031607270240784e-06, -2.214685082435608e-06, -2.1262094378471375e-06, -2.037733793258667e-06, -1.9492581486701965e-06, -1.860782504081726e-06, -1.7723068594932556e-06, -1.6838312149047852e-06, -1.5953555703163147e-06, -1.5068799257278442e-06, -1.4184042811393738e-06, -1.3299286365509033e-06, -1.2414529919624329e-06, -1.1529773473739624e-06, -1.064501702785492e-06, -9.760260581970215e-07, -8.87550413608551e-07, -7.990747690200806e-07, -7.105991244316101e-07, -6.221234798431396e-07, -5.336478352546692e-07, -4.4517219066619873e-07, -3.5669654607772827e-07, -2.682209014892578e-07, -1.7974525690078735e-07, -9.12696123123169e-08, -2.7939677238464355e-09, 8.568167686462402e-08, 1.7415732145309448e-07, 2.6263296604156494e-07, 3.511086106300354e-07, 4.3958425521850586e-07, 5.280598998069763e-07, 6.165355443954468e-07, 7.050111889839172e-07, 7.934868335723877e-07, 8.819624781608582e-07, 9.704381227493286e-07, 1.058913767337799e-06, 1.1473894119262695e-06, 1.23586505651474e-06, 1.3243407011032104e-06, 1.412816345691681e-06, 1.5012919902801514e-06, 1.5897676348686218e-06, 1.6782432794570923e-06, 1.7667189240455627e-06, 1.8551945686340332e-06, 1.9436702132225037e-06, 2.032145857810974e-06, 2.1206215023994446e-06, 2.209097146987915e-06, 2.2975727915763855e-06, 2.386048436164856e-06, 2.4745240807533264e-06, 2.562999725341797e-06]}, "gradients/decoder.transformer.h.6.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 4.0, 5.0, 10.0, 7.0, 11.0, 12.0, 10.0, 19.0, 26.0, 26.0, 28.0, 25.0, 39.0, 37.0, 41.0, 39.0, 35.0, 44.0, 33.0, 54.0, 55.0, 60.0, 39.0, 45.0, 41.0, 44.0, 32.0, 30.0, 24.0, 24.0, 26.0, 24.0, 18.0, 12.0, 4.0, 9.0, 5.0, 4.0, 6.0, 3.0, 0.0, 2.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 2.0], "bins": [-10.5390625, -10.2353515625, -9.931640625, -9.6279296875, -9.32421875, -9.0205078125, -8.716796875, -8.4130859375, -8.109375, -7.8056640625, -7.501953125, -7.1982421875, -6.89453125, -6.5908203125, -6.287109375, -5.9833984375, -5.6796875, -5.3759765625, -5.072265625, -4.7685546875, -4.46484375, -4.1611328125, -3.857421875, -3.5537109375, -3.25, -2.9462890625, -2.642578125, -2.3388671875, -2.03515625, -1.7314453125, -1.427734375, -1.1240234375, -0.8203125, -0.5166015625, -0.212890625, 0.0908203125, 0.39453125, 0.6982421875, 1.001953125, 1.3056640625, 1.609375, 1.9130859375, 2.216796875, 2.5205078125, 2.82421875, 3.1279296875, 3.431640625, 3.7353515625, 4.0390625, 4.3427734375, 4.646484375, 4.9501953125, 5.25390625, 5.5576171875, 5.861328125, 6.1650390625, 6.46875, 6.7724609375, 7.076171875, 7.3798828125, 7.68359375, 7.9873046875, 8.291015625, 8.5947265625, 8.8984375]}, "gradients/decoder.transformer.h.6.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 1.0, 3.0, 2.0, 3.0, 3.0, 6.0, 8.0, 9.0, 13.0, 27.0, 31.0, 62.0, 110.0, 170.0, 305.0, 483.0, 860.0, 1587.0, 2990.0, 5555.0, 10370.0, 18885.0, 36174.0, 76784.0, 278955.0, 432595.0, 93578.0, 41610.0, 21764.0, 11657.0, 6392.0, 3414.0, 1847.0, 991.0, 521.0, 319.0, 188.0, 107.0, 61.0, 44.0, 24.0, 11.0, 13.0, 8.0, 5.0, 8.0, 2.0, 2.0, 4.0, 3.0, 2.0, 2.0, 1.0, 1.0], "bins": [-17.578125, -17.077392578125, -16.57666015625, -16.075927734375, -15.5751953125, -15.074462890625, -14.57373046875, -14.072998046875, -13.572265625, -13.071533203125, -12.57080078125, -12.070068359375, -11.5693359375, -11.068603515625, -10.56787109375, -10.067138671875, -9.56640625, -9.065673828125, -8.56494140625, -8.064208984375, -7.5634765625, -7.062744140625, -6.56201171875, -6.061279296875, -5.560546875, -5.059814453125, -4.55908203125, -4.058349609375, -3.5576171875, -3.056884765625, -2.55615234375, -2.055419921875, -1.5546875, -1.053955078125, -0.55322265625, -0.052490234375, 0.4482421875, 0.948974609375, 1.44970703125, 1.950439453125, 2.451171875, 2.951904296875, 3.45263671875, 3.953369140625, 4.4541015625, 4.954833984375, 5.45556640625, 5.956298828125, 6.45703125, 6.957763671875, 7.45849609375, 7.959228515625, 8.4599609375, 8.960693359375, 9.46142578125, 9.962158203125, 10.462890625, 10.963623046875, 11.46435546875, 11.965087890625, 12.4658203125, 12.966552734375, 13.46728515625, 13.968017578125, 14.46875]}, "gradients/decoder.transformer.h.6.attn.c_attn.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 0.0, 0.0, 3.0, 3.0, 1.0, 4.0, 3.0, 3.0, 16.0, 8.0, 11.0, 16.0, 18.0, 24.0, 21.0, 28.0, 21.0, 28.0, 32.0, 34.0, 37.0, 55.0, 70.0, 77.0, 157.0, 1423.0, 325.0, 150.0, 92.0, 58.0, 61.0, 46.0, 39.0, 28.0, 36.0, 17.0, 22.0, 18.0, 14.0, 20.0, 10.0, 5.0, 11.0, 1.0, 4.0, 4.0, 3.0, 5.0, 3.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-24.25, -23.41259765625, -22.5751953125, -21.73779296875, -20.900390625, -20.06298828125, -19.2255859375, -18.38818359375, -17.55078125, -16.71337890625, -15.8759765625, -15.03857421875, -14.201171875, -13.36376953125, -12.5263671875, -11.68896484375, -10.8515625, -10.01416015625, -9.1767578125, -8.33935546875, -7.501953125, -6.66455078125, -5.8271484375, -4.98974609375, -4.15234375, -3.31494140625, -2.4775390625, -1.64013671875, -0.802734375, 0.03466796875, 0.8720703125, 1.70947265625, 2.546875, 3.38427734375, 4.2216796875, 5.05908203125, 5.896484375, 6.73388671875, 7.5712890625, 8.40869140625, 9.24609375, 10.08349609375, 10.9208984375, 11.75830078125, 12.595703125, 13.43310546875, 14.2705078125, 15.10791015625, 15.9453125, 16.78271484375, 17.6201171875, 18.45751953125, 19.294921875, 20.13232421875, 20.9697265625, 21.80712890625, 22.64453125, 23.48193359375, 24.3193359375, 25.15673828125, 25.994140625, 26.83154296875, 27.6689453125, 28.50634765625, 29.34375]}, "gradients/decoder.transformer.h.6.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 1.0, 3.0, 5.0, 2.0, 9.0, 2.0, 5.0, 13.0, 7.0, 17.0, 22.0, 29.0, 37.0, 59.0, 82.0, 99.0, 199.0, 452.0, 1304.0, 5755.0, 33375.0, 404803.0, 2583233.0, 98746.0, 13266.0, 2678.0, 715.0, 269.0, 156.0, 87.0, 78.0, 49.0, 37.0, 24.0, 25.0, 18.0, 11.0, 18.0, 11.0, 8.0, 3.0, 1.0, 1.0, 2.0, 1.0, 2.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-45.09375, -43.65087890625, -42.2080078125, -40.76513671875, -39.322265625, -37.87939453125, -36.4365234375, -34.99365234375, -33.55078125, -32.10791015625, -30.6650390625, -29.22216796875, -27.779296875, -26.33642578125, -24.8935546875, -23.45068359375, -22.0078125, -20.56494140625, -19.1220703125, -17.67919921875, -16.236328125, -14.79345703125, -13.3505859375, -11.90771484375, -10.46484375, -9.02197265625, -7.5791015625, -6.13623046875, -4.693359375, -3.25048828125, -1.8076171875, -0.36474609375, 1.078125, 2.52099609375, 3.9638671875, 5.40673828125, 6.849609375, 8.29248046875, 9.7353515625, 11.17822265625, 12.62109375, 14.06396484375, 15.5068359375, 16.94970703125, 18.392578125, 19.83544921875, 21.2783203125, 22.72119140625, 24.1640625, 25.60693359375, 27.0498046875, 28.49267578125, 29.935546875, 31.37841796875, 32.8212890625, 34.26416015625, 35.70703125, 37.14990234375, 38.5927734375, 40.03564453125, 41.478515625, 42.92138671875, 44.3642578125, 45.80712890625, 47.25]}, "gradients/decoder.transformer.h.6.ln_1.weight": {"_type": "histogram", "values": [1.0, 1.0, 11.0, 54.0, 229.0, 372.0, 259.0, 72.0, 17.0, 3.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-35.9978141784668, -29.601634979248047, -23.205455780029297, -16.80927848815918, -10.41309928894043, -4.01692008972168, 2.3792572021484375, 8.775436401367188, 15.171615600585938, 21.567794799804688, 27.963973999023438, 34.36015319824219, 40.75633239746094, 47.15251159667969, 53.54868698120117, 59.94486618041992, 66.34104919433594, 72.73722839355469, 79.13340759277344, 85.52958679199219, 91.92576599121094, 98.32194519042969, 104.71812438964844, 111.11430358886719, 117.5104751586914, 123.90665435791016, 130.30282592773438, 136.69900512695312, 143.09518432617188, 149.49136352539062, 155.88754272460938, 162.28372192382812, 168.67991638183594, 175.0760955810547, 181.47227478027344, 187.8684539794922, 194.26463317871094, 200.6608123779297, 207.05699157714844, 213.4531707763672, 219.84934997558594, 226.2455291748047, 232.64170837402344, 239.0378875732422, 245.43406677246094, 251.8302459716797, 258.2264099121094, 264.6225891113281, 271.0187683105469, 277.4149475097656, 283.8111267089844, 290.2073059082031, 296.6034851074219, 302.9996643066406, 309.3958435058594, 315.7920227050781, 322.1882019042969, 328.5843811035156, 334.9805603027344, 341.3767395019531, 347.7729187011719, 354.1690979003906, 360.5652770996094, 366.9614562988281, 373.3576354980469]}, "gradients/decoder.transformer.h.6.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 1.0, 1.0, 0.0, 2.0, 3.0, 2.0, 7.0, 2.0, 3.0, 7.0, 6.0, 8.0, 11.0, 10.0, 15.0, 19.0, 23.0, 25.0, 28.0, 26.0, 32.0, 32.0, 30.0, 26.0, 41.0, 33.0, 30.0, 50.0, 53.0, 46.0, 46.0, 42.0, 45.0, 33.0, 35.0, 39.0, 32.0, 22.0, 15.0, 25.0, 16.0, 15.0, 18.0, 9.0, 10.0, 10.0, 13.0, 4.0, 11.0, 0.0, 2.0, 3.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-73.28807067871094, -71.00851440429688, -68.72896575927734, -66.44941711425781, -64.16986083984375, -61.89030838012695, -59.610755920410156, -57.33120346069336, -55.05165100097656, -52.772098541259766, -50.49254608154297, -48.21299362182617, -45.933441162109375, -43.65388870239258, -41.37433624267578, -39.094783782958984, -36.81523132324219, -34.53567886352539, -32.256126403808594, -29.976573944091797, -27.697021484375, -25.417469024658203, -23.137916564941406, -20.85836410522461, -18.578811645507812, -16.299259185791016, -14.019706726074219, -11.740154266357422, -9.460601806640625, -7.181049346923828, -4.901496887207031, -2.6219444274902344, -0.3423919677734375, 1.9371604919433594, 4.216712951660156, 6.496265411376953, 8.77581787109375, 11.055370330810547, 13.334922790527344, 15.61447525024414, 17.894027709960938, 20.173580169677734, 22.45313262939453, 24.732685089111328, 27.012237548828125, 29.291790008544922, 31.57134246826172, 33.850894927978516, 36.13044738769531, 38.40999984741211, 40.689552307128906, 42.9691047668457, 45.2486572265625, 47.5282096862793, 49.807762145996094, 52.08731460571289, 54.36686706542969, 56.646419525146484, 58.92597198486328, 61.20552444458008, 63.485076904296875, 65.76463317871094, 68.04418182373047, 70.32373046875, 72.60328674316406]}, "gradients/decoder.transformer.h.5.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 3.0, 1.0, 9.0, 7.0, 11.0, 5.0, 12.0, 14.0, 25.0, 15.0, 22.0, 31.0, 34.0, 27.0, 35.0, 26.0, 35.0, 51.0, 48.0, 48.0, 54.0, 50.0, 51.0, 47.0, 31.0, 42.0, 44.0, 35.0, 38.0, 29.0, 27.0, 27.0, 22.0, 8.0, 14.0, 12.0, 5.0, 7.0, 1.0, 5.0, 1.0, 3.0, 0.0, 1.0, 1.0, 1.0, 2.0, 0.0, 1.0, 0.0, 1.0], "bins": [-10.765625, -10.4471435546875, -10.128662109375, -9.8101806640625, -9.49169921875, -9.1732177734375, -8.854736328125, -8.5362548828125, -8.2177734375, -7.8992919921875, -7.580810546875, -7.2623291015625, -6.94384765625, -6.6253662109375, -6.306884765625, -5.9884033203125, -5.669921875, -5.3514404296875, -5.032958984375, -4.7144775390625, -4.39599609375, -4.0775146484375, -3.759033203125, -3.4405517578125, -3.1220703125, -2.8035888671875, -2.485107421875, -2.1666259765625, -1.84814453125, -1.5296630859375, -1.211181640625, -0.8927001953125, -0.57421875, -0.2557373046875, 0.062744140625, 0.3812255859375, 0.69970703125, 1.0181884765625, 1.336669921875, 1.6551513671875, 1.9736328125, 2.2921142578125, 2.610595703125, 2.9290771484375, 3.24755859375, 3.5660400390625, 3.884521484375, 4.2030029296875, 4.521484375, 4.8399658203125, 5.158447265625, 5.4769287109375, 5.79541015625, 6.1138916015625, 6.432373046875, 6.7508544921875, 7.0693359375, 7.3878173828125, 7.706298828125, 8.0247802734375, 8.34326171875, 8.6617431640625, 8.980224609375, 9.2987060546875, 9.6171875]}, "gradients/decoder.transformer.h.5.mlp.c_proj.weight": {"_type": "histogram", "values": [2.0, 2.0, 1.0, 7.0, 10.0, 13.0, 27.0, 45.0, 74.0, 159.0, 246.0, 484.0, 861.0, 1569.0, 3148.0, 5937.0, 12139.0, 25629.0, 59643.0, 165712.0, 696282.0, 2238612.0, 707191.0, 165945.0, 59959.0, 25587.0, 12156.0, 6025.0, 3041.0, 1728.0, 921.0, 508.0, 278.0, 163.0, 86.0, 38.0, 34.0, 16.0, 12.0, 7.0, 1.0, 3.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-10.5234375, -10.0340576171875, -9.544677734375, -9.0552978515625, -8.56591796875, -8.0765380859375, -7.587158203125, -7.0977783203125, -6.6083984375, -6.1190185546875, -5.629638671875, -5.1402587890625, -4.65087890625, -4.1614990234375, -3.672119140625, -3.1827392578125, -2.693359375, -2.2039794921875, -1.714599609375, -1.2252197265625, -0.73583984375, -0.2464599609375, 0.242919921875, 0.7322998046875, 1.2216796875, 1.7110595703125, 2.200439453125, 2.6898193359375, 3.17919921875, 3.6685791015625, 4.157958984375, 4.6473388671875, 5.13671875, 5.6260986328125, 6.115478515625, 6.6048583984375, 7.09423828125, 7.5836181640625, 8.072998046875, 8.5623779296875, 9.0517578125, 9.5411376953125, 10.030517578125, 10.5198974609375, 11.00927734375, 11.4986572265625, 11.988037109375, 12.4774169921875, 12.966796875, 13.4561767578125, 13.945556640625, 14.4349365234375, 14.92431640625, 15.4136962890625, 15.903076171875, 16.3924560546875, 16.8818359375, 17.3712158203125, 17.860595703125, 18.3499755859375, 18.83935546875, 19.3287353515625, 19.818115234375, 20.3074951171875, 20.796875]}, "gradients/decoder.transformer.h.5.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 4.0, 0.0, 2.0, 4.0, 1.0, 2.0, 4.0, 9.0, 5.0, 15.0, 25.0, 31.0, 38.0, 51.0, 67.0, 100.0, 144.0, 194.0, 310.0, 461.0, 674.0, 580.0, 406.0, 255.0, 192.0, 143.0, 103.0, 84.0, 52.0, 30.0, 28.0, 20.0, 15.0, 5.0, 11.0, 5.0, 2.0, 3.0, 5.0, 2.0, 2.0, 4.0, 0.0, 0.0, 0.0, 2.0], "bins": [-22.59375, -22.010986328125, -21.42822265625, -20.845458984375, -20.2626953125, -19.679931640625, -19.09716796875, -18.514404296875, -17.931640625, -17.348876953125, -16.76611328125, -16.183349609375, -15.6005859375, -15.017822265625, -14.43505859375, -13.852294921875, -13.26953125, -12.686767578125, -12.10400390625, -11.521240234375, -10.9384765625, -10.355712890625, -9.77294921875, -9.190185546875, -8.607421875, -8.024658203125, -7.44189453125, -6.859130859375, -6.2763671875, -5.693603515625, -5.11083984375, -4.528076171875, -3.9453125, -3.362548828125, -2.77978515625, -2.197021484375, -1.6142578125, -1.031494140625, -0.44873046875, 0.134033203125, 0.716796875, 1.299560546875, 1.88232421875, 2.465087890625, 3.0478515625, 3.630615234375, 4.21337890625, 4.796142578125, 5.37890625, 5.961669921875, 6.54443359375, 7.127197265625, 7.7099609375, 8.292724609375, 8.87548828125, 9.458251953125, 10.041015625, 10.623779296875, 11.20654296875, 11.789306640625, 12.3720703125, 12.954833984375, 13.53759765625, 14.120361328125, 14.703125]}, "gradients/decoder.transformer.h.5.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 2.0, 3.0, 2.0, 3.0, 5.0, 4.0, 7.0, 16.0, 18.0, 22.0, 22.0, 32.0, 72.0, 112.0, 172.0, 256.0, 465.0, 1000.0, 2355.0, 7596.0, 33832.0, 220700.0, 2786852.0, 1011191.0, 103012.0, 18537.0, 4873.0, 1601.0, 688.0, 318.0, 177.0, 97.0, 82.0, 42.0, 38.0, 28.0, 16.0, 12.0, 8.0, 11.0, 5.0, 3.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0], "bins": [-35.40625, -34.17724609375, -32.9482421875, -31.71923828125, -30.490234375, -29.26123046875, -28.0322265625, -26.80322265625, -25.57421875, -24.34521484375, -23.1162109375, -21.88720703125, -20.658203125, -19.42919921875, -18.2001953125, -16.97119140625, -15.7421875, -14.51318359375, -13.2841796875, -12.05517578125, -10.826171875, -9.59716796875, -8.3681640625, -7.13916015625, -5.91015625, -4.68115234375, -3.4521484375, -2.22314453125, -0.994140625, 0.23486328125, 1.4638671875, 2.69287109375, 3.921875, 5.15087890625, 6.3798828125, 7.60888671875, 8.837890625, 10.06689453125, 11.2958984375, 12.52490234375, 13.75390625, 14.98291015625, 16.2119140625, 17.44091796875, 18.669921875, 19.89892578125, 21.1279296875, 22.35693359375, 23.5859375, 24.81494140625, 26.0439453125, 27.27294921875, 28.501953125, 29.73095703125, 30.9599609375, 32.18896484375, 33.41796875, 34.64697265625, 35.8759765625, 37.10498046875, 38.333984375, 39.56298828125, 40.7919921875, 42.02099609375, 43.25]}, "gradients/decoder.transformer.h.5.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 3.0, 43.0, 177.0, 464.0, 269.0, 52.0, 7.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-640.8143310546875, -628.8580322265625, -616.9017944335938, -604.9454956054688, -592.9891967773438, -581.032958984375, -569.07666015625, -557.120361328125, -545.1641235351562, -533.2078247070312, -521.2515869140625, -509.2952880859375, -497.3390197753906, -485.38275146484375, -473.42645263671875, -461.4701843261719, -449.5138854980469, -437.5576171875, -425.601318359375, -413.6450500488281, -401.68878173828125, -389.73248291015625, -377.7762145996094, -365.8199462890625, -353.8636474609375, -341.9073791503906, -329.9510803222656, -317.99481201171875, -306.0385437011719, -294.082275390625, -282.1259765625, -270.1697082519531, -258.2134704589844, -246.25718688964844, -234.30091857910156, -222.34463500976562, -210.38836669921875, -198.4320831298828, -186.47579956054688, -174.51953125, -162.563232421875, -150.60694885253906, -138.6506805419922, -126.69439697265625, -114.73812103271484, -102.78184509277344, -90.8255615234375, -78.8692855834961, -66.91301727294922, -54.95674133300781, -43.00046157836914, -31.04418182373047, -19.087905883789062, -7.131629943847656, 4.824653625488281, 16.780929565429688, 28.737205505371094, 40.6934814453125, 52.64976119995117, 64.60604095458984, 76.56231689453125, 88.51859283447266, 100.4748764038086, 112.43115234375, 124.3874282836914]}, "gradients/decoder.transformer.h.5.ln_2.bias": {"_type": "histogram", "values": [2.0, 4.0, 2.0, 2.0, 3.0, 4.0, 3.0, 2.0, 3.0, 6.0, 9.0, 7.0, 12.0, 6.0, 5.0, 19.0, 15.0, 15.0, 21.0, 24.0, 25.0, 43.0, 42.0, 45.0, 32.0, 40.0, 44.0, 46.0, 36.0, 42.0, 40.0, 36.0, 38.0, 43.0, 34.0, 32.0, 25.0, 21.0, 16.0, 18.0, 20.0, 21.0, 19.0, 13.0, 17.0, 9.0, 14.0, 8.0, 2.0, 4.0, 7.0, 8.0, 3.0, 2.0, 4.0, 3.0, 4.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-51.049835205078125, -49.32061767578125, -47.591400146484375, -45.862178802490234, -44.13296127319336, -42.403743743896484, -40.674522399902344, -38.94530487060547, -37.216087341308594, -35.48686981201172, -33.757652282714844, -32.0284309387207, -30.299213409423828, -28.569995880126953, -26.840776443481445, -25.111557006835938, -23.382339477539062, -21.653121948242188, -19.92390251159668, -18.194683074951172, -16.465465545654297, -14.736247062683105, -13.007028579711914, -11.277810096740723, -9.548591613769531, -7.81937313079834, -6.090154647827148, -4.360936164855957, -2.6317176818847656, -0.9024991989135742, 0.8267192840576172, 2.5559377670288086, 4.285152435302734, 6.014370918273926, 7.743589401245117, 9.472807884216309, 11.2020263671875, 12.931244850158691, 14.660463333129883, 16.38968276977539, 18.118900299072266, 19.84811782836914, 21.57733726501465, 23.306556701660156, 25.03577423095703, 26.764991760253906, 28.494211196899414, 30.223430633544922, 31.952648162841797, 33.68186569213867, 35.41108703613281, 37.14030456542969, 38.86952209472656, 40.59873962402344, 42.32795715332031, 44.05717849731445, 45.78639602661133, 47.5156135559082, 49.244834899902344, 50.97405242919922, 52.703269958496094, 54.43248748779297, 56.161705017089844, 57.890926361083984, 59.62014389038086]}, "gradients/decoder.transformer.h.5.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 5.0, 1.0, 1.0, 8.0, 4.0, 8.0, 8.0, 16.0, 16.0, 16.0, 20.0, 21.0, 27.0, 34.0, 30.0, 43.0, 36.0, 47.0, 47.0, 50.0, 40.0, 57.0, 57.0, 38.0, 43.0, 43.0, 41.0, 34.0, 47.0, 28.0, 34.0, 19.0, 18.0, 27.0, 20.0, 9.0, 5.0, 5.0, 1.0, 3.0, 1.0, 5.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0], "bins": [-11.984375, -11.63671875, -11.2890625, -10.94140625, -10.59375, -10.24609375, -9.8984375, -9.55078125, -9.203125, -8.85546875, -8.5078125, -8.16015625, -7.8125, -7.46484375, -7.1171875, -6.76953125, -6.421875, -6.07421875, -5.7265625, -5.37890625, -5.03125, -4.68359375, -4.3359375, -3.98828125, -3.640625, -3.29296875, -2.9453125, -2.59765625, -2.25, -1.90234375, -1.5546875, -1.20703125, -0.859375, -0.51171875, -0.1640625, 0.18359375, 0.53125, 0.87890625, 1.2265625, 1.57421875, 1.921875, 2.26953125, 2.6171875, 2.96484375, 3.3125, 3.66015625, 4.0078125, 4.35546875, 4.703125, 5.05078125, 5.3984375, 5.74609375, 6.09375, 6.44140625, 6.7890625, 7.13671875, 7.484375, 7.83203125, 8.1796875, 8.52734375, 8.875, 9.22265625, 9.5703125, 9.91796875, 10.265625]}, "gradients/decoder.transformer.h.5.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 4.0, 4.0, 3.0, 10.0, 11.0, 28.0, 27.0, 32.0, 66.0, 99.0, 114.0, 173.0, 274.0, 416.0, 629.0, 1060.0, 1676.0, 2895.0, 4750.0, 7502.0, 12321.0, 21510.0, 35595.0, 62050.0, 113007.0, 303540.0, 237561.0, 102949.0, 57246.0, 32936.0, 20069.0, 11819.0, 7025.0, 4186.0, 2542.0, 1612.0, 980.0, 617.0, 457.0, 269.0, 147.0, 116.0, 60.0, 61.0, 39.0, 25.0, 15.0, 14.0, 9.0, 7.0, 4.0, 3.0, 2.0, 3.0, 1.0, 1.0], "bins": [-0.41015625, -0.3980674743652344, -0.38597869873046875, -0.3738899230957031, -0.3618011474609375, -0.3497123718261719, -0.33762359619140625, -0.3255348205566406, -0.313446044921875, -0.3013572692871094, -0.28926849365234375, -0.2771797180175781, -0.2650909423828125, -0.2530021667480469, -0.24091339111328125, -0.22882461547851562, -0.21673583984375, -0.20464706420898438, -0.19255828857421875, -0.18046951293945312, -0.1683807373046875, -0.15629196166992188, -0.14420318603515625, -0.13211441040039062, -0.120025634765625, -0.10793685913085938, -0.09584808349609375, -0.08375930786132812, -0.0716705322265625, -0.059581756591796875, -0.04749298095703125, -0.035404205322265625, -0.0233154296875, -0.011226654052734375, 0.00086212158203125, 0.012950897216796875, 0.0250396728515625, 0.037128448486328125, 0.04921722412109375, 0.061305999755859375, 0.073394775390625, 0.08548355102539062, 0.09757232666015625, 0.10966110229492188, 0.1217498779296875, 0.13383865356445312, 0.14592742919921875, 0.15801620483398438, 0.17010498046875, 0.18219375610351562, 0.19428253173828125, 0.20637130737304688, 0.2184600830078125, 0.23054885864257812, 0.24263763427734375, 0.2547264099121094, 0.266815185546875, 0.2789039611816406, 0.29099273681640625, 0.3030815124511719, 0.3151702880859375, 0.3272590637207031, 0.33934783935546875, 0.3514366149902344, 0.363525390625]}, "gradients/decoder.transformer.h.5.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 3.0, 1.0, 3.0, 3.0, 2.0, 4.0, 6.0, 3.0, 13.0, 7.0, 8.0, 16.0, 16.0, 21.0, 26.0, 22.0, 22.0, 29.0, 27.0, 35.0, 26.0, 25.0, 38.0, 40.0, 43.0, 28.0, 1061.0, 45.0, 40.0, 32.0, 46.0, 27.0, 29.0, 39.0, 27.0, 34.0, 22.0, 25.0, 17.0, 20.0, 16.0, 21.0, 14.0, 8.0, 16.0, 6.0, 7.0, 6.0, 4.0, 3.0, 1.0, 2.0, 3.0, 2.0, 1.0, 4.0], "bins": [-6.90234375, -6.70184326171875, -6.5013427734375, -6.30084228515625, -6.100341796875, -5.89984130859375, -5.6993408203125, -5.49884033203125, -5.29833984375, -5.09783935546875, -4.8973388671875, -4.69683837890625, -4.496337890625, -4.29583740234375, -4.0953369140625, -3.89483642578125, -3.6943359375, -3.49383544921875, -3.2933349609375, -3.09283447265625, -2.892333984375, -2.69183349609375, -2.4913330078125, -2.29083251953125, -2.09033203125, -1.88983154296875, -1.6893310546875, -1.48883056640625, -1.288330078125, -1.08782958984375, -0.8873291015625, -0.68682861328125, -0.486328125, -0.28582763671875, -0.0853271484375, 0.11517333984375, 0.315673828125, 0.51617431640625, 0.7166748046875, 0.91717529296875, 1.11767578125, 1.31817626953125, 1.5186767578125, 1.71917724609375, 1.919677734375, 2.12017822265625, 2.3206787109375, 2.52117919921875, 2.7216796875, 2.92218017578125, 3.1226806640625, 3.32318115234375, 3.523681640625, 3.72418212890625, 3.9246826171875, 4.12518310546875, 4.32568359375, 4.52618408203125, 4.7266845703125, 4.92718505859375, 5.127685546875, 5.32818603515625, 5.5286865234375, 5.72918701171875, 5.9296875]}, "gradients/decoder.transformer.h.5.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 2.0, 3.0, 4.0, 5.0, 13.0, 25.0, 39.0, 58.0, 77.0, 113.0, 152.0, 226.0, 280.0, 393.0, 569.0, 805.0, 1086.0, 1599.0, 2258.0, 3060.0, 4410.0, 6257.0, 8925.0, 12848.0, 19027.0, 28255.0, 41466.0, 63402.0, 100681.0, 211208.0, 1285473.0, 104322.0, 65361.0, 42530.0, 28472.0, 19411.0, 13149.0, 9192.0, 6472.0, 4565.0, 3222.0, 2243.0, 1599.0, 1096.0, 801.0, 586.0, 408.0, 310.0, 223.0, 142.0, 94.0, 66.0, 53.0, 37.0, 36.0, 15.0, 13.0, 4.0, 4.0, 1.0, 3.0], "bins": [-0.1666259765625, -0.16158294677734375, -0.1565399169921875, -0.15149688720703125, -0.146453857421875, -0.14141082763671875, -0.1363677978515625, -0.13132476806640625, -0.12628173828125, -0.12123870849609375, -0.1161956787109375, -0.11115264892578125, -0.106109619140625, -0.10106658935546875, -0.0960235595703125, -0.09098052978515625, -0.0859375, -0.08089447021484375, -0.0758514404296875, -0.07080841064453125, -0.065765380859375, -0.06072235107421875, -0.0556793212890625, -0.05063629150390625, -0.04559326171875, -0.04055023193359375, -0.0355072021484375, -0.03046417236328125, -0.025421142578125, -0.02037811279296875, -0.0153350830078125, -0.01029205322265625, -0.0052490234375, -0.00020599365234375, 0.0048370361328125, 0.00988006591796875, 0.014923095703125, 0.01996612548828125, 0.0250091552734375, 0.03005218505859375, 0.03509521484375, 0.04013824462890625, 0.0451812744140625, 0.05022430419921875, 0.055267333984375, 0.06031036376953125, 0.0653533935546875, 0.07039642333984375, 0.075439453125, 0.08048248291015625, 0.0855255126953125, 0.09056854248046875, 0.095611572265625, 0.10065460205078125, 0.1056976318359375, 0.11074066162109375, 0.11578369140625, 0.12082672119140625, 0.1258697509765625, 0.13091278076171875, 0.135955810546875, 0.14099884033203125, 0.1460418701171875, 0.15108489990234375, 0.1561279296875]}, "gradients/decoder.transformer.h.5.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 5.0, 0.0, 3.0, 3.0, 5.0, 3.0, 6.0, 7.0, 7.0, 8.0, 15.0, 15.0, 13.0, 21.0, 27.0, 48.0, 44.0, 47.0, 92.0, 171.0, 134.0, 85.0, 63.0, 47.0, 30.0, 20.0, 27.0, 14.0, 13.0, 7.0, 5.0, 4.0, 7.0, 3.0, 1.0, 2.0, 2.0, 1.0, 4.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-7.808208465576172e-06, -7.577240467071533e-06, -7.3462724685668945e-06, -7.115304470062256e-06, -6.884336471557617e-06, -6.6533684730529785e-06, -6.42240047454834e-06, -6.191432476043701e-06, -5.9604644775390625e-06, -5.729496479034424e-06, -5.498528480529785e-06, -5.2675604820251465e-06, -5.036592483520508e-06, -4.805624485015869e-06, -4.5746564865112305e-06, -4.343688488006592e-06, -4.112720489501953e-06, -3.8817524909973145e-06, -3.6507844924926758e-06, -3.419816493988037e-06, -3.1888484954833984e-06, -2.9578804969787598e-06, -2.726912498474121e-06, -2.4959444999694824e-06, -2.2649765014648438e-06, -2.034008502960205e-06, -1.8030405044555664e-06, -1.5720725059509277e-06, -1.341104507446289e-06, -1.1101365089416504e-06, -8.791685104370117e-07, -6.48200511932373e-07, -4.172325134277344e-07, -1.862645149230957e-07, 4.470348358154297e-08, 2.7567148208618164e-07, 5.066394805908203e-07, 7.37607479095459e-07, 9.685754776000977e-07, 1.1995434761047363e-06, 1.430511474609375e-06, 1.6614794731140137e-06, 1.8924474716186523e-06, 2.123415470123291e-06, 2.3543834686279297e-06, 2.5853514671325684e-06, 2.816319465637207e-06, 3.0472874641418457e-06, 3.2782554626464844e-06, 3.509223461151123e-06, 3.7401914596557617e-06, 3.9711594581604e-06, 4.202127456665039e-06, 4.433095455169678e-06, 4.664063453674316e-06, 4.895031452178955e-06, 5.125999450683594e-06, 5.356967449188232e-06, 5.587935447692871e-06, 5.81890344619751e-06, 6.0498714447021484e-06, 6.280839443206787e-06, 6.511807441711426e-06, 6.7427754402160645e-06, 6.973743438720703e-06]}, "gradients/decoder.transformer.h.5.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 3.0, 2.0, 1.0, 2.0, 3.0, 2.0, 2.0, 7.0, 9.0, 6.0, 7.0, 13.0, 27.0, 38.0, 51.0, 72.0, 139.0, 314.0, 4391.0, 931927.0, 109850.0, 1131.0, 209.0, 103.0, 73.0, 42.0, 33.0, 24.0, 18.0, 12.0, 10.0, 8.0, 9.0, 7.0, 1.0, 6.0, 1.0, 2.0, 5.0, 2.0, 1.0, 1.0, 0.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.0001538991928100586, -0.00014906004071235657, -0.00014422088861465454, -0.00013938173651695251, -0.0001345425844192505, -0.00012970343232154846, -0.00012486428022384644, -0.00012002512812614441, -0.00011518597602844238, -0.00011034682393074036, -0.00010550767183303833, -0.0001006685197353363, -9.582936763763428e-05, -9.099021553993225e-05, -8.615106344223022e-05, -8.13119113445282e-05, -7.647275924682617e-05, -7.163360714912415e-05, -6.679445505142212e-05, -6.195530295372009e-05, -5.7116150856018066e-05, -5.227699875831604e-05, -4.7437846660614014e-05, -4.259869456291199e-05, -3.775954246520996e-05, -3.2920390367507935e-05, -2.8081238269805908e-05, -2.3242086172103882e-05, -1.8402934074401855e-05, -1.3563781976699829e-05, -8.724629878997803e-06, -3.885477781295776e-06, 9.5367431640625e-07, 5.792826414108276e-06, 1.0631978511810303e-05, 1.547113060951233e-05, 2.0310282707214355e-05, 2.5149434804916382e-05, 2.9988586902618408e-05, 3.4827739000320435e-05, 3.966689109802246e-05, 4.450604319572449e-05, 4.9345195293426514e-05, 5.418434739112854e-05, 5.9023499488830566e-05, 6.386265158653259e-05, 6.870180368423462e-05, 7.354095578193665e-05, 7.838010787963867e-05, 8.32192599773407e-05, 8.805841207504272e-05, 9.289756417274475e-05, 9.773671627044678e-05, 0.0001025758683681488, 0.00010741502046585083, 0.00011225417256355286, 0.00011709332466125488, 0.00012193247675895691, 0.00012677162885665894, 0.00013161078095436096, 0.000136449933052063, 0.00014128908514976501, 0.00014612823724746704, 0.00015096738934516907, 0.0001558065414428711]}, "gradients/decoder.transformer.h.5.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 4.0, 19.0, 26.0, 58.0, 161.0, 265.0, 220.0, 139.0, 63.0, 34.0, 13.0, 8.0, 2.0, 3.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-7.871512025303673e-06, -7.5442276283865795e-06, -7.216943231469486e-06, -6.889658834552392e-06, -6.562374437635299e-06, -6.235090040718205e-06, -5.907806098548463e-06, -5.580521701631369e-06, -5.2532373047142755e-06, -4.925952907797182e-06, -4.5986685108800884e-06, -4.271384568710346e-06, -3.944100171793252e-06, -3.6168155475024832e-06, -3.289531377959065e-06, -2.9622469810419716e-06, -2.634962584124878e-06, -2.3076781872077845e-06, -1.980393790290691e-06, -1.6531096207472729e-06, -1.3258252238301793e-06, -9.985408269130858e-07, -6.712566573696677e-07, -3.439722604525741e-07, -1.6687863535480574e-08, 3.105964765381941e-07, 6.378808166118688e-07, 9.651650998421246e-07, 1.2924494967592182e-06, 1.6197338936763117e-06, 1.94701806321973e-06, 2.2743024601368234e-06, 2.601585947559215e-06, 2.9288703444763087e-06, 3.2561547413934022e-06, 3.5834389109368203e-06, 3.910723535227589e-06, 4.238007932144683e-06, 4.5652918743144255e-06, 4.892576271231519e-06, 5.219860668148613e-06, 5.547145065065706e-06, 5.8744294619828e-06, 6.201713404152542e-06, 6.528997801069636e-06, 6.8562821979867294e-06, 7.183566594903823e-06, 7.5108509918209165e-06, 7.83813538873801e-06, 8.165419785655104e-06, 8.492704182572197e-06, 8.81998857948929e-06, 9.147272976406384e-06, 9.474557373323478e-06, 9.80184086074587e-06, 1.0129126167157665e-05, 1.0456409654580057e-05, 1.078369405149715e-05, 1.1110978448414244e-05, 1.1438262845331337e-05, 1.1765547242248431e-05, 1.2092831639165524e-05, 1.2420116036082618e-05, 1.274739952350501e-05, 1.3074684829916805e-05]}, "gradients/decoder.transformer.h.5.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 2.0, 3.0, 1.0, 3.0, 5.0, 8.0, 8.0, 7.0, 8.0, 16.0, 6.0, 8.0, 14.0, 10.0, 13.0, 29.0, 22.0, 18.0, 40.0, 23.0, 24.0, 44.0, 21.0, 28.0, 66.0, 29.0, 32.0, 57.0, 18.0, 52.0, 29.0, 25.0, 47.0, 25.0, 18.0, 40.0, 24.0, 17.0, 31.0, 17.0, 15.0, 14.0, 12.0, 6.0, 15.0, 10.0, 8.0, 10.0, 5.0, 7.0, 10.0, 3.0, 6.0, 5.0, 0.0, 1.0, 1.0, 3.0, 2.0], "bins": [-2.6226043701171875e-06, -2.5425106287002563e-06, -2.462416887283325e-06, -2.382323145866394e-06, -2.302229404449463e-06, -2.2221356630325317e-06, -2.1420419216156006e-06, -2.0619481801986694e-06, -1.9818544387817383e-06, -1.9017606973648071e-06, -1.821666955947876e-06, -1.7415732145309448e-06, -1.6614794731140137e-06, -1.5813857316970825e-06, -1.5012919902801514e-06, -1.4211982488632202e-06, -1.341104507446289e-06, -1.261010766029358e-06, -1.1809170246124268e-06, -1.1008232831954956e-06, -1.0207295417785645e-06, -9.406358003616333e-07, -8.605420589447021e-07, -7.80448317527771e-07, -7.003545761108398e-07, -6.202608346939087e-07, -5.401670932769775e-07, -4.600733518600464e-07, -3.7997961044311523e-07, -2.998858690261841e-07, -2.1979212760925293e-07, -1.3969838619232178e-07, -5.960464477539063e-08, 2.0489096641540527e-08, 1.0058283805847168e-07, 1.8067657947540283e-07, 2.60770320892334e-07, 3.4086406230926514e-07, 4.209578037261963e-07, 5.010515451431274e-07, 5.811452865600586e-07, 6.612390279769897e-07, 7.413327693939209e-07, 8.21426510810852e-07, 9.015202522277832e-07, 9.816139936447144e-07, 1.0617077350616455e-06, 1.1418014764785767e-06, 1.2218952178955078e-06, 1.301988959312439e-06, 1.3820827007293701e-06, 1.4621764421463013e-06, 1.5422701835632324e-06, 1.6223639249801636e-06, 1.7024576663970947e-06, 1.7825514078140259e-06, 1.862645149230957e-06, 1.942738890647888e-06, 2.0228326320648193e-06, 2.1029263734817505e-06, 2.1830201148986816e-06, 2.263113856315613e-06, 2.343207597732544e-06, 2.423301339149475e-06, 2.5033950805664062e-06]}, "gradients/decoder.transformer.h.5.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 5.0, 1.0, 1.0, 8.0, 4.0, 8.0, 8.0, 16.0, 16.0, 16.0, 20.0, 21.0, 27.0, 34.0, 30.0, 43.0, 36.0, 47.0, 47.0, 50.0, 40.0, 57.0, 57.0, 38.0, 43.0, 43.0, 41.0, 34.0, 47.0, 28.0, 34.0, 19.0, 18.0, 27.0, 20.0, 9.0, 5.0, 5.0, 1.0, 3.0, 1.0, 5.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0], "bins": [-11.984375, -11.63671875, -11.2890625, -10.94140625, -10.59375, -10.24609375, -9.8984375, -9.55078125, -9.203125, -8.85546875, -8.5078125, -8.16015625, -7.8125, -7.46484375, -7.1171875, -6.76953125, -6.421875, -6.07421875, -5.7265625, -5.37890625, -5.03125, -4.68359375, -4.3359375, -3.98828125, -3.640625, -3.29296875, -2.9453125, -2.59765625, -2.25, -1.90234375, -1.5546875, -1.20703125, -0.859375, -0.51171875, -0.1640625, 0.18359375, 0.53125, 0.87890625, 1.2265625, 1.57421875, 1.921875, 2.26953125, 2.6171875, 2.96484375, 3.3125, 3.66015625, 4.0078125, 4.35546875, 4.703125, 5.05078125, 5.3984375, 5.74609375, 6.09375, 6.44140625, 6.7890625, 7.13671875, 7.484375, 7.83203125, 8.1796875, 8.52734375, 8.875, 9.22265625, 9.5703125, 9.91796875, 10.265625]}, "gradients/decoder.transformer.h.5.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 4.0, 1.0, 3.0, 3.0, 6.0, 12.0, 8.0, 30.0, 42.0, 70.0, 123.0, 192.0, 330.0, 578.0, 1088.0, 1875.0, 3405.0, 6119.0, 11369.0, 19835.0, 35808.0, 66371.0, 134746.0, 293210.0, 241373.0, 108102.0, 55580.0, 30057.0, 16983.0, 9498.0, 5146.0, 2854.0, 1582.0, 911.0, 478.0, 301.0, 184.0, 102.0, 80.0, 37.0, 24.0, 17.0, 14.0, 7.0, 1.0, 6.0, 2.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-13.8984375, -13.462158203125, -13.02587890625, -12.589599609375, -12.1533203125, -11.717041015625, -11.28076171875, -10.844482421875, -10.408203125, -9.971923828125, -9.53564453125, -9.099365234375, -8.6630859375, -8.226806640625, -7.79052734375, -7.354248046875, -6.91796875, -6.481689453125, -6.04541015625, -5.609130859375, -5.1728515625, -4.736572265625, -4.30029296875, -3.864013671875, -3.427734375, -2.991455078125, -2.55517578125, -2.118896484375, -1.6826171875, -1.246337890625, -0.81005859375, -0.373779296875, 0.0625, 0.498779296875, 0.93505859375, 1.371337890625, 1.8076171875, 2.243896484375, 2.68017578125, 3.116455078125, 3.552734375, 3.989013671875, 4.42529296875, 4.861572265625, 5.2978515625, 5.734130859375, 6.17041015625, 6.606689453125, 7.04296875, 7.479248046875, 7.91552734375, 8.351806640625, 8.7880859375, 9.224365234375, 9.66064453125, 10.096923828125, 10.533203125, 10.969482421875, 11.40576171875, 11.842041015625, 12.2783203125, 12.714599609375, 13.15087890625, 13.587158203125, 14.0234375]}, "gradients/decoder.transformer.h.5.attn.c_attn.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 2.0, 2.0, 1.0, 5.0, 3.0, 8.0, 7.0, 11.0, 8.0, 23.0, 15.0, 27.0, 22.0, 25.0, 33.0, 29.0, 53.0, 52.0, 60.0, 73.0, 86.0, 130.0, 245.0, 1296.0, 182.0, 134.0, 98.0, 55.0, 60.0, 43.0, 32.0, 40.0, 34.0, 29.0, 20.0, 24.0, 14.0, 11.0, 16.0, 8.0, 9.0, 6.0, 8.0, 6.0, 2.0, 4.0, 2.0, 5.0, 1.0, 1.0, 2.0, 1.0, 0.0, 2.0], "bins": [-29.015625, -28.143310546875, -27.27099609375, -26.398681640625, -25.5263671875, -24.654052734375, -23.78173828125, -22.909423828125, -22.037109375, -21.164794921875, -20.29248046875, -19.420166015625, -18.5478515625, -17.675537109375, -16.80322265625, -15.930908203125, -15.05859375, -14.186279296875, -13.31396484375, -12.441650390625, -11.5693359375, -10.697021484375, -9.82470703125, -8.952392578125, -8.080078125, -7.207763671875, -6.33544921875, -5.463134765625, -4.5908203125, -3.718505859375, -2.84619140625, -1.973876953125, -1.1015625, -0.229248046875, 0.64306640625, 1.515380859375, 2.3876953125, 3.260009765625, 4.13232421875, 5.004638671875, 5.876953125, 6.749267578125, 7.62158203125, 8.493896484375, 9.3662109375, 10.238525390625, 11.11083984375, 11.983154296875, 12.85546875, 13.727783203125, 14.60009765625, 15.472412109375, 16.3447265625, 17.217041015625, 18.08935546875, 18.961669921875, 19.833984375, 20.706298828125, 21.57861328125, 22.450927734375, 23.3232421875, 24.195556640625, 25.06787109375, 25.940185546875, 26.8125]}, "gradients/decoder.transformer.h.5.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 2.0, 1.0, 2.0, 2.0, 4.0, 3.0, 7.0, 12.0, 8.0, 14.0, 21.0, 27.0, 22.0, 38.0, 61.0, 96.0, 160.0, 263.0, 688.0, 2083.0, 7861.0, 33248.0, 165976.0, 1743357.0, 1035439.0, 121882.0, 25320.0, 6099.0, 1811.0, 596.0, 243.0, 121.0, 86.0, 48.0, 34.0, 28.0, 13.0, 13.0, 11.0, 5.0, 6.0, 5.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-32.3125, -31.15478515625, -29.9970703125, -28.83935546875, -27.681640625, -26.52392578125, -25.3662109375, -24.20849609375, -23.05078125, -21.89306640625, -20.7353515625, -19.57763671875, -18.419921875, -17.26220703125, -16.1044921875, -14.94677734375, -13.7890625, -12.63134765625, -11.4736328125, -10.31591796875, -9.158203125, -8.00048828125, -6.8427734375, -5.68505859375, -4.52734375, -3.36962890625, -2.2119140625, -1.05419921875, 0.103515625, 1.26123046875, 2.4189453125, 3.57666015625, 4.734375, 5.89208984375, 7.0498046875, 8.20751953125, 9.365234375, 10.52294921875, 11.6806640625, 12.83837890625, 13.99609375, 15.15380859375, 16.3115234375, 17.46923828125, 18.626953125, 19.78466796875, 20.9423828125, 22.10009765625, 23.2578125, 24.41552734375, 25.5732421875, 26.73095703125, 27.888671875, 29.04638671875, 30.2041015625, 31.36181640625, 32.51953125, 33.67724609375, 34.8349609375, 35.99267578125, 37.150390625, 38.30810546875, 39.4658203125, 40.62353515625, 41.78125]}, "gradients/decoder.transformer.h.5.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 1.0, 15.0, 61.0, 209.0, 369.0, 242.0, 84.0, 19.0, 13.0, 1.0, 2.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-77.38040924072266, -68.53254699707031, -59.6846923828125, -50.836830139160156, -41.98897171020508, -33.14111328125, -24.293251037597656, -15.445392608642578, -6.5975341796875, 2.2503252029418945, 11.098184585571289, 19.946044921875, 28.793903350830078, 37.641761779785156, 46.4896240234375, 55.33748245239258, 64.18534088134766, 73.033203125, 81.88105773925781, 90.72891998291016, 99.5767822265625, 108.42463684082031, 117.27249908447266, 126.120361328125, 134.9682159423828, 143.81607055664062, 152.6639404296875, 161.5117950439453, 170.35964965820312, 179.20751953125, 188.0553741455078, 196.90322875976562, 205.7510986328125, 214.5989532470703, 223.4468231201172, 232.294677734375, 241.1425323486328, 249.99038696289062, 258.8382568359375, 267.6861267089844, 276.5339660644531, 285.3818359375, 294.22967529296875, 303.0775451660156, 311.9254150390625, 320.77325439453125, 329.6211242675781, 338.468994140625, 347.31683349609375, 356.1647033691406, 365.0125427246094, 373.86041259765625, 382.7082824707031, 391.5561218261719, 400.40399169921875, 409.2518615722656, 418.0997314453125, 426.9476013183594, 435.7954406738281, 444.643310546875, 453.4911804199219, 462.3390197753906, 471.1868896484375, 480.0347595214844, 488.8825988769531]}, "gradients/decoder.transformer.h.5.ln_1.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 2.0, 3.0, 3.0, 5.0, 5.0, 10.0, 5.0, 5.0, 8.0, 13.0, 13.0, 12.0, 14.0, 10.0, 23.0, 24.0, 22.0, 21.0, 26.0, 29.0, 28.0, 41.0, 31.0, 36.0, 37.0, 34.0, 33.0, 32.0, 49.0, 18.0, 43.0, 35.0, 43.0, 40.0, 25.0, 32.0, 25.0, 23.0, 21.0, 22.0, 16.0, 12.0, 11.0, 12.0, 12.0, 8.0, 9.0, 10.0, 5.0, 8.0, 4.0, 2.0, 1.0, 3.0, 2.0, 1.0, 2.0, 1.0, 0.0, 1.0], "bins": [-67.21139526367188, -65.18445587158203, -63.15751647949219, -61.130577087402344, -59.1036376953125, -57.076698303222656, -55.04975509643555, -53.0228157043457, -50.99587631225586, -48.968936920166016, -46.94199752807617, -44.91505813598633, -42.88811492919922, -40.861175537109375, -38.83423614501953, -36.80729675292969, -34.780357360839844, -32.75341796875, -30.726478576660156, -28.69953727722168, -26.672597885131836, -24.645658493041992, -22.618717193603516, -20.591777801513672, -18.564838409423828, -16.537899017333984, -14.510958671569824, -12.484018325805664, -10.45707893371582, -8.430139541625977, -6.403199195861816, -4.376258850097656, -2.3493194580078125, -0.32237958908081055, 1.7045602798461914, 3.7315001487731934, 5.758440017700195, 7.785379409790039, 9.8123197555542, 11.83926010131836, 13.866199493408203, 15.893138885498047, 17.92007827758789, 19.947019577026367, 21.97395896911621, 24.000898361206055, 26.02783966064453, 28.054779052734375, 30.08171844482422, 32.10865783691406, 34.135597229003906, 36.16253662109375, 38.189476013183594, 40.21641540527344, 42.24335861206055, 44.27029800415039, 46.297237396240234, 48.32417678833008, 50.35111618041992, 52.378055572509766, 54.404998779296875, 56.43193817138672, 58.45887756347656, 60.485816955566406, 62.51275634765625]}, "gradients/decoder.transformer.h.4.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 3.0, 2.0, 2.0, 6.0, 6.0, 4.0, 10.0, 11.0, 17.0, 9.0, 23.0, 19.0, 26.0, 36.0, 19.0, 26.0, 40.0, 58.0, 39.0, 53.0, 45.0, 46.0, 49.0, 46.0, 47.0, 56.0, 36.0, 30.0, 36.0, 37.0, 37.0, 31.0, 13.0, 21.0, 22.0, 17.0, 10.0, 7.0, 4.0, 8.0, 1.0, 2.0, 2.0, 2.0, 1.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-12.4140625, -12.0509033203125, -11.687744140625, -11.3245849609375, -10.96142578125, -10.5982666015625, -10.235107421875, -9.8719482421875, -9.5087890625, -9.1456298828125, -8.782470703125, -8.4193115234375, -8.05615234375, -7.6929931640625, -7.329833984375, -6.9666748046875, -6.603515625, -6.2403564453125, -5.877197265625, -5.5140380859375, -5.15087890625, -4.7877197265625, -4.424560546875, -4.0614013671875, -3.6982421875, -3.3350830078125, -2.971923828125, -2.6087646484375, -2.24560546875, -1.8824462890625, -1.519287109375, -1.1561279296875, -0.79296875, -0.4298095703125, -0.066650390625, 0.2965087890625, 0.65966796875, 1.0228271484375, 1.385986328125, 1.7491455078125, 2.1123046875, 2.4754638671875, 2.838623046875, 3.2017822265625, 3.56494140625, 3.9281005859375, 4.291259765625, 4.6544189453125, 5.017578125, 5.3807373046875, 5.743896484375, 6.1070556640625, 6.47021484375, 6.8333740234375, 7.196533203125, 7.5596923828125, 7.9228515625, 8.2860107421875, 8.649169921875, 9.0123291015625, 9.37548828125, 9.7386474609375, 10.101806640625, 10.4649658203125, 10.828125]}, "gradients/decoder.transformer.h.4.mlp.c_proj.weight": {"_type": "histogram", "values": [2.0, 1.0, 3.0, 0.0, 1.0, 1.0, 7.0, 4.0, 14.0, 24.0, 54.0, 112.0, 243.0, 576.0, 1787.0, 5853.0, 22260.0, 105715.0, 1146139.0, 2675802.0, 188662.0, 34648.0, 8651.0, 2441.0, 760.0, 276.0, 133.0, 66.0, 24.0, 17.0, 13.0, 2.0, 5.0, 3.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-21.515625, -20.392333984375, -19.26904296875, -18.145751953125, -17.0224609375, -15.899169921875, -14.77587890625, -13.652587890625, -12.529296875, -11.406005859375, -10.28271484375, -9.159423828125, -8.0361328125, -6.912841796875, -5.78955078125, -4.666259765625, -3.54296875, -2.419677734375, -1.29638671875, -0.173095703125, 0.9501953125, 2.073486328125, 3.19677734375, 4.320068359375, 5.443359375, 6.566650390625, 7.68994140625, 8.813232421875, 9.9365234375, 11.059814453125, 12.18310546875, 13.306396484375, 14.4296875, 15.552978515625, 16.67626953125, 17.799560546875, 18.9228515625, 20.046142578125, 21.16943359375, 22.292724609375, 23.416015625, 24.539306640625, 25.66259765625, 26.785888671875, 27.9091796875, 29.032470703125, 30.15576171875, 31.279052734375, 32.40234375, 33.525634765625, 34.64892578125, 35.772216796875, 36.8955078125, 38.018798828125, 39.14208984375, 40.265380859375, 41.388671875, 42.511962890625, 43.63525390625, 44.758544921875, 45.8818359375, 47.005126953125, 48.12841796875, 49.251708984375, 50.375]}, "gradients/decoder.transformer.h.4.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 1.0, 5.0, 6.0, 9.0, 8.0, 13.0, 22.0, 21.0, 24.0, 35.0, 41.0, 60.0, 70.0, 96.0, 159.0, 229.0, 382.0, 566.0, 702.0, 480.0, 317.0, 236.0, 148.0, 112.0, 77.0, 79.0, 43.0, 36.0, 29.0, 15.0, 16.0, 14.0, 10.0, 7.0, 5.0, 6.0, 2.0, 2.0, 2.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-17.6875, -17.0849609375, -16.482421875, -15.8798828125, -15.27734375, -14.6748046875, -14.072265625, -13.4697265625, -12.8671875, -12.2646484375, -11.662109375, -11.0595703125, -10.45703125, -9.8544921875, -9.251953125, -8.6494140625, -8.046875, -7.4443359375, -6.841796875, -6.2392578125, -5.63671875, -5.0341796875, -4.431640625, -3.8291015625, -3.2265625, -2.6240234375, -2.021484375, -1.4189453125, -0.81640625, -0.2138671875, 0.388671875, 0.9912109375, 1.59375, 2.1962890625, 2.798828125, 3.4013671875, 4.00390625, 4.6064453125, 5.208984375, 5.8115234375, 6.4140625, 7.0166015625, 7.619140625, 8.2216796875, 8.82421875, 9.4267578125, 10.029296875, 10.6318359375, 11.234375, 11.8369140625, 12.439453125, 13.0419921875, 13.64453125, 14.2470703125, 14.849609375, 15.4521484375, 16.0546875, 16.6572265625, 17.259765625, 17.8623046875, 18.46484375, 19.0673828125, 19.669921875, 20.2724609375, 20.875]}, "gradients/decoder.transformer.h.4.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 10.0, 4.0, 14.0, 12.0, 15.0, 23.0, 30.0, 54.0, 84.0, 112.0, 166.0, 280.0, 496.0, 913.0, 1891.0, 4029.0, 9626.0, 24253.0, 68070.0, 221867.0, 1128471.0, 2210204.0, 363081.0, 102153.0, 34646.0, 13181.0, 5488.0, 2358.0, 1159.0, 668.0, 349.0, 200.0, 128.0, 85.0, 51.0, 32.0, 22.0, 17.0, 20.0, 14.0, 10.0, 4.0, 1.0, 1.0, 2.0, 2.0, 1.0, 0.0, 0.0, 1.0, 1.0], "bins": [-28.453125, -27.62060546875, -26.7880859375, -25.95556640625, -25.123046875, -24.29052734375, -23.4580078125, -22.62548828125, -21.79296875, -20.96044921875, -20.1279296875, -19.29541015625, -18.462890625, -17.63037109375, -16.7978515625, -15.96533203125, -15.1328125, -14.30029296875, -13.4677734375, -12.63525390625, -11.802734375, -10.97021484375, -10.1376953125, -9.30517578125, -8.47265625, -7.64013671875, -6.8076171875, -5.97509765625, -5.142578125, -4.31005859375, -3.4775390625, -2.64501953125, -1.8125, -0.97998046875, -0.1474609375, 0.68505859375, 1.517578125, 2.35009765625, 3.1826171875, 4.01513671875, 4.84765625, 5.68017578125, 6.5126953125, 7.34521484375, 8.177734375, 9.01025390625, 9.8427734375, 10.67529296875, 11.5078125, 12.34033203125, 13.1728515625, 14.00537109375, 14.837890625, 15.67041015625, 16.5029296875, 17.33544921875, 18.16796875, 19.00048828125, 19.8330078125, 20.66552734375, 21.498046875, 22.33056640625, 23.1630859375, 23.99560546875, 24.828125]}, "gradients/decoder.transformer.h.4.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 18.0, 180.0, 554.0, 244.0, 23.0, 1.0], "bins": [-1024.7259521484375, -1007.798583984375, -990.8712768554688, -973.9439086914062, -957.0165405273438, -940.0892333984375, -923.161865234375, -906.2344970703125, -889.3071899414062, -872.3798217773438, -855.4525146484375, -838.525146484375, -821.5977783203125, -804.6704711914062, -787.7431030273438, -770.8157348632812, -753.888427734375, -736.9610595703125, -720.0337524414062, -703.1063842773438, -686.1790161132812, -669.251708984375, -652.3243408203125, -635.39697265625, -618.4696044921875, -601.542236328125, -584.6149291992188, -567.6875610351562, -550.7601928710938, -533.8328857421875, -516.905517578125, -499.9781799316406, -483.0508117675781, -466.12347412109375, -449.19610595703125, -432.2687683105469, -415.3414306640625, -398.4140625, -381.4867248535156, -364.55938720703125, -347.63201904296875, -330.7046813964844, -313.7773132324219, -296.8499755859375, -279.9226379394531, -262.99530029296875, -246.06793212890625, -229.14059448242188, -212.21324157714844, -195.285888671875, -178.35855102539062, -161.4311981201172, -144.50384521484375, -127.57650756835938, -110.64915466308594, -93.72180938720703, -76.79446411132812, -59.86711883544922, -42.93976974487305, -26.012420654296875, -9.085075378417969, 7.8422698974609375, 24.769622802734375, 41.69696807861328, 58.62430953979492]}, "gradients/decoder.transformer.h.4.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 3.0, 1.0, 0.0, 1.0, 3.0, 0.0, 0.0, 3.0, 4.0, 4.0, 9.0, 5.0, 12.0, 15.0, 10.0, 11.0, 15.0, 24.0, 27.0, 30.0, 29.0, 18.0, 29.0, 43.0, 39.0, 43.0, 47.0, 35.0, 38.0, 46.0, 54.0, 37.0, 33.0, 39.0, 40.0, 32.0, 32.0, 26.0, 27.0, 22.0, 17.0, 20.0, 17.0, 21.0, 9.0, 15.0, 6.0, 4.0, 9.0, 6.0, 6.0, 3.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-70.3316650390625, -68.28080749511719, -66.2299575805664, -64.1791000366211, -62.12825012207031, -60.077392578125, -58.02653884887695, -55.975685119628906, -53.92483139038086, -51.87397766113281, -49.823123931884766, -47.77227020263672, -45.721412658691406, -43.670562744140625, -41.61970520019531, -39.568851470947266, -37.51799774169922, -35.46714401245117, -33.416290283203125, -31.365434646606445, -29.3145809173584, -27.26372718811035, -25.212871551513672, -23.162017822265625, -21.111164093017578, -19.06031036376953, -17.009456634521484, -14.958600997924805, -12.907747268676758, -10.856893539428711, -8.806038856506348, -6.755184173583984, -4.7043304443359375, -2.6534762382507324, -0.6026220321655273, 1.4482321739196777, 3.499086380004883, 5.54994010925293, 7.600794792175293, 9.651649475097656, 11.702503204345703, 13.75335693359375, 15.804211616516113, 17.855066299438477, 19.905920028686523, 21.95677375793457, 24.00762939453125, 26.058483123779297, 28.109336853027344, 30.16019058227539, 32.21104431152344, 34.261898040771484, 36.31275177001953, 38.363609313964844, 40.41446304321289, 42.46531677246094, 44.516170501708984, 46.56702423095703, 48.61787796020508, 50.668731689453125, 52.71958923339844, 54.77043914794922, 56.82129669189453, 58.87215042114258, 60.923004150390625]}, "gradients/decoder.transformer.h.4.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 5.0, 1.0, 4.0, 5.0, 4.0, 10.0, 13.0, 11.0, 13.0, 18.0, 15.0, 27.0, 29.0, 23.0, 28.0, 37.0, 28.0, 54.0, 38.0, 35.0, 35.0, 42.0, 35.0, 56.0, 39.0, 46.0, 37.0, 33.0, 43.0, 35.0, 29.0, 25.0, 25.0, 27.0, 20.0, 14.0, 19.0, 11.0, 12.0, 9.0, 4.0, 2.0, 7.0, 4.0, 2.0, 2.0, 4.0, 3.0, 0.0, 0.0, 0.0, 1.0], "bins": [-12.1328125, -11.7828369140625, -11.432861328125, -11.0828857421875, -10.73291015625, -10.3829345703125, -10.032958984375, -9.6829833984375, -9.3330078125, -8.9830322265625, -8.633056640625, -8.2830810546875, -7.93310546875, -7.5831298828125, -7.233154296875, -6.8831787109375, -6.533203125, -6.1832275390625, -5.833251953125, -5.4832763671875, -5.13330078125, -4.7833251953125, -4.433349609375, -4.0833740234375, -3.7333984375, -3.3834228515625, -3.033447265625, -2.6834716796875, -2.33349609375, -1.9835205078125, -1.633544921875, -1.2835693359375, -0.93359375, -0.5836181640625, -0.233642578125, 0.1163330078125, 0.46630859375, 0.8162841796875, 1.166259765625, 1.5162353515625, 1.8662109375, 2.2161865234375, 2.566162109375, 2.9161376953125, 3.26611328125, 3.6160888671875, 3.966064453125, 4.3160400390625, 4.666015625, 5.0159912109375, 5.365966796875, 5.7159423828125, 6.06591796875, 6.4158935546875, 6.765869140625, 7.1158447265625, 7.4658203125, 7.8157958984375, 8.165771484375, 8.5157470703125, 8.86572265625, 9.2156982421875, 9.565673828125, 9.9156494140625, 10.265625]}, "gradients/decoder.transformer.h.4.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 3.0, 2.0, 3.0, 7.0, 4.0, 6.0, 16.0, 18.0, 38.0, 36.0, 66.0, 84.0, 139.0, 200.0, 294.0, 432.0, 662.0, 952.0, 1493.0, 2187.0, 3351.0, 5236.0, 7950.0, 12470.0, 19186.0, 30357.0, 47961.0, 79083.0, 144143.0, 324571.0, 150036.0, 81289.0, 49222.0, 30951.0, 19748.0, 12694.0, 8163.0, 5294.0, 3469.0, 2233.0, 1501.0, 979.0, 649.0, 460.0, 282.0, 212.0, 128.0, 107.0, 60.0, 53.0, 29.0, 24.0, 11.0, 10.0, 6.0, 4.0, 2.0, 4.0, 1.0, 0.0, 3.0], "bins": [-0.370361328125, -0.3589744567871094, -0.34758758544921875, -0.3362007141113281, -0.3248138427734375, -0.3134269714355469, -0.30204010009765625, -0.2906532287597656, -0.279266357421875, -0.2678794860839844, -0.25649261474609375, -0.24510574340820312, -0.2337188720703125, -0.22233200073242188, -0.21094512939453125, -0.19955825805664062, -0.18817138671875, -0.17678451538085938, -0.16539764404296875, -0.15401077270507812, -0.1426239013671875, -0.13123703002929688, -0.11985015869140625, -0.10846328735351562, -0.097076416015625, -0.08568954467773438, -0.07430267333984375, -0.06291580200195312, -0.0515289306640625, -0.040142059326171875, -0.02875518798828125, -0.017368316650390625, -0.0059814453125, 0.005405426025390625, 0.01679229736328125, 0.028179168701171875, 0.0395660400390625, 0.050952911376953125, 0.06233978271484375, 0.07372665405273438, 0.085113525390625, 0.09650039672851562, 0.10788726806640625, 0.11927413940429688, 0.1306610107421875, 0.14204788208007812, 0.15343475341796875, 0.16482162475585938, 0.17620849609375, 0.18759536743164062, 0.19898223876953125, 0.21036911010742188, 0.2217559814453125, 0.23314285278320312, 0.24452972412109375, 0.2559165954589844, 0.267303466796875, 0.2786903381347656, 0.29007720947265625, 0.3014640808105469, 0.3128509521484375, 0.3242378234863281, 0.33562469482421875, 0.3470115661621094, 0.3583984375]}, "gradients/decoder.transformer.h.4.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 1.0, 4.0, 1.0, 3.0, 2.0, 4.0, 7.0, 5.0, 9.0, 13.0, 18.0, 14.0, 20.0, 25.0, 24.0, 33.0, 26.0, 37.0, 35.0, 37.0, 45.0, 35.0, 35.0, 44.0, 42.0, 1073.0, 47.0, 42.0, 28.0, 44.0, 30.0, 24.0, 24.0, 27.0, 23.0, 23.0, 32.0, 15.0, 16.0, 13.0, 6.0, 13.0, 8.0, 9.0, 9.0, 6.0, 6.0, 2.0, 0.0, 0.0, 1.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-7.34375, -7.0927734375, -6.841796875, -6.5908203125, -6.33984375, -6.0888671875, -5.837890625, -5.5869140625, -5.3359375, -5.0849609375, -4.833984375, -4.5830078125, -4.33203125, -4.0810546875, -3.830078125, -3.5791015625, -3.328125, -3.0771484375, -2.826171875, -2.5751953125, -2.32421875, -2.0732421875, -1.822265625, -1.5712890625, -1.3203125, -1.0693359375, -0.818359375, -0.5673828125, -0.31640625, -0.0654296875, 0.185546875, 0.4365234375, 0.6875, 0.9384765625, 1.189453125, 1.4404296875, 1.69140625, 1.9423828125, 2.193359375, 2.4443359375, 2.6953125, 2.9462890625, 3.197265625, 3.4482421875, 3.69921875, 3.9501953125, 4.201171875, 4.4521484375, 4.703125, 4.9541015625, 5.205078125, 5.4560546875, 5.70703125, 5.9580078125, 6.208984375, 6.4599609375, 6.7109375, 6.9619140625, 7.212890625, 7.4638671875, 7.71484375, 7.9658203125, 8.216796875, 8.4677734375, 8.71875]}, "gradients/decoder.transformer.h.4.crossattention.c_attn.weight": {"_type": "histogram", "values": [3.0, 1.0, 3.0, 3.0, 5.0, 5.0, 14.0, 16.0, 28.0, 38.0, 48.0, 75.0, 105.0, 174.0, 264.0, 390.0, 537.0, 829.0, 1195.0, 1593.0, 2402.0, 3486.0, 5124.0, 7637.0, 11303.0, 17010.0, 25527.0, 39899.0, 61490.0, 100018.0, 225150.0, 1300661.0, 106377.0, 64678.0, 40916.0, 26488.0, 17320.0, 11626.0, 7942.0, 5213.0, 3663.0, 2482.0, 1700.0, 1182.0, 801.0, 532.0, 368.0, 277.0, 192.0, 128.0, 72.0, 53.0, 34.0, 30.0, 17.0, 12.0, 4.0, 2.0, 1.0, 4.0, 2.0, 1.0, 1.0, 1.0], "bins": [-0.1981201171875, -0.19173622131347656, -0.18535232543945312, -0.1789684295654297, -0.17258453369140625, -0.1662006378173828, -0.15981674194335938, -0.15343284606933594, -0.1470489501953125, -0.14066505432128906, -0.13428115844726562, -0.1278972625732422, -0.12151336669921875, -0.11512947082519531, -0.10874557495117188, -0.10236167907714844, -0.095977783203125, -0.08959388732910156, -0.08320999145507812, -0.07682609558105469, -0.07044219970703125, -0.06405830383300781, -0.057674407958984375, -0.05129051208496094, -0.0449066162109375, -0.03852272033691406, -0.032138824462890625, -0.025754928588867188, -0.01937103271484375, -0.012987136840820312, -0.006603240966796875, -0.0002193450927734375, 0.00616455078125, 0.012548446655273438, 0.018932342529296875, 0.025316238403320312, 0.03170013427734375, 0.03808403015136719, 0.044467926025390625, 0.05085182189941406, 0.0572357177734375, 0.06361961364746094, 0.07000350952148438, 0.07638740539550781, 0.08277130126953125, 0.08915519714355469, 0.09553909301757812, 0.10192298889160156, 0.108306884765625, 0.11469078063964844, 0.12107467651367188, 0.1274585723876953, 0.13384246826171875, 0.1402263641357422, 0.14661026000976562, 0.15299415588378906, 0.1593780517578125, 0.16576194763183594, 0.17214584350585938, 0.1785297393798828, 0.18491363525390625, 0.1912975311279297, 0.19768142700195312, 0.20406532287597656, 0.21044921875]}, "gradients/decoder.transformer.h.4.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 3.0, 2.0, 3.0, 4.0, 3.0, 4.0, 5.0, 3.0, 8.0, 11.0, 13.0, 12.0, 19.0, 31.0, 24.0, 20.0, 48.0, 81.0, 150.0, 230.0, 116.0, 55.0, 38.0, 29.0, 13.0, 10.0, 16.0, 13.0, 7.0, 7.0, 9.0, 9.0, 6.0, 2.0, 2.0, 2.0, 6.0, 3.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.4841556549072266e-05, -1.4421530067920685e-05, -1.4001503586769104e-05, -1.3581477105617523e-05, -1.3161450624465942e-05, -1.2741424143314362e-05, -1.232139766216278e-05, -1.19013711810112e-05, -1.1481344699859619e-05, -1.1061318218708038e-05, -1.0641291737556458e-05, -1.0221265256404877e-05, -9.801238775253296e-06, -9.381212294101715e-06, -8.961185812950134e-06, -8.541159331798553e-06, -8.121132850646973e-06, -7.701106369495392e-06, -7.281079888343811e-06, -6.86105340719223e-06, -6.441026926040649e-06, -6.021000444889069e-06, -5.600973963737488e-06, -5.180947482585907e-06, -4.760921001434326e-06, -4.340894520282745e-06, -3.9208680391311646e-06, -3.5008415579795837e-06, -3.080815076828003e-06, -2.660788595676422e-06, -2.2407621145248413e-06, -1.8207356333732605e-06, -1.4007091522216797e-06, -9.806826710700989e-07, -5.606561899185181e-07, -1.4062970876693726e-07, 2.7939677238464355e-07, 6.994232535362244e-07, 1.1194497346878052e-06, 1.539476215839386e-06, 1.959502696990967e-06, 2.3795291781425476e-06, 2.7995556592941284e-06, 3.2195821404457092e-06, 3.63960862159729e-06, 4.059635102748871e-06, 4.479661583900452e-06, 4.8996880650520325e-06, 5.319714546203613e-06, 5.739741027355194e-06, 6.159767508506775e-06, 6.579793989658356e-06, 6.9998204708099365e-06, 7.419846951961517e-06, 7.839873433113098e-06, 8.259899914264679e-06, 8.67992639541626e-06, 9.09995287656784e-06, 9.519979357719421e-06, 9.940005838871002e-06, 1.0360032320022583e-05, 1.0780058801174164e-05, 1.1200085282325745e-05, 1.1620111763477325e-05, 1.2040138244628906e-05]}, "gradients/decoder.transformer.h.4.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 5.0, 6.0, 1.0, 3.0, 8.0, 8.0, 9.0, 13.0, 10.0, 18.0, 30.0, 33.0, 38.0, 70.0, 140.0, 349.0, 3513.0, 968245.0, 74671.0, 788.0, 223.0, 121.0, 77.0, 56.0, 25.0, 25.0, 20.0, 12.0, 19.0, 2.0, 6.0, 7.0, 2.0, 3.0, 5.0, 2.0, 3.0, 2.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00023293495178222656, -0.00022483989596366882, -0.00021674484014511108, -0.00020864978432655334, -0.0002005547285079956, -0.00019245967268943787, -0.00018436461687088013, -0.0001762695610523224, -0.00016817450523376465, -0.0001600794494152069, -0.00015198439359664917, -0.00014388933777809143, -0.0001357942819595337, -0.00012769922614097595, -0.00011960417032241821, -0.00011150911450386047, -0.00010341405868530273, -9.5319002866745e-05, -8.722394704818726e-05, -7.912889122962952e-05, -7.103383541107178e-05, -6.293877959251404e-05, -5.48437237739563e-05, -4.674866795539856e-05, -3.865361213684082e-05, -3.055855631828308e-05, -2.2463500499725342e-05, -1.4368444681167603e-05, -6.273388862609863e-06, 1.821666955947876e-06, 9.916722774505615e-06, 1.8011778593063354e-05, 2.6106834411621094e-05, 3.420189023017883e-05, 4.229694604873657e-05, 5.039200186729431e-05, 5.848705768585205e-05, 6.658211350440979e-05, 7.467716932296753e-05, 8.277222514152527e-05, 9.086728096008301e-05, 9.896233677864075e-05, 0.00010705739259719849, 0.00011515244841575623, 0.00012324750423431396, 0.0001313425600528717, 0.00013943761587142944, 0.00014753267168998718, 0.00015562772750854492, 0.00016372278332710266, 0.0001718178391456604, 0.00017991289496421814, 0.00018800795078277588, 0.00019610300660133362, 0.00020419806241989136, 0.0002122931182384491, 0.00022038817405700684, 0.00022848322987556458, 0.00023657828569412231, 0.00024467334151268005, 0.0002527683973312378, 0.00026086345314979553, 0.00026895850896835327, 0.000277053564786911, 0.00028514862060546875]}, "gradients/decoder.transformer.h.4.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 3.0, 3.0, 5.0, 9.0, 12.0, 29.0, 49.0, 71.0, 121.0, 163.0, 162.0, 139.0, 99.0, 64.0, 36.0, 20.0, 11.0, 6.0, 5.0, 4.0, 4.0, 1.0, 1.0, 0.0, 2.0, 0.0, 1.0], "bins": [-1.5320687452913262e-05, -1.499712834629463e-05, -1.4673569239675999e-05, -1.4350011042552069e-05, -1.4026451935933437e-05, -1.3702892829314806e-05, -1.3379333722696174e-05, -1.3055774616077542e-05, -1.2732216418953612e-05, -1.240865731233498e-05, -1.208509820571635e-05, -1.176154000859242e-05, -1.1437980901973788e-05, -1.1114421795355156e-05, -1.0790862688736524e-05, -1.0467303582117893e-05, -1.0143744475499261e-05, -9.82018536888063e-06, -9.496626262261998e-06, -9.173067155643366e-06, -8.849508958519436e-06, -8.525949851900805e-06, -8.202390745282173e-06, -7.878831638663542e-06, -7.555272986792261e-06, -7.231713880173629e-06, -6.908155228302348e-06, -6.584596121683717e-06, -6.261037015065085e-06, -5.937478363193804e-06, -5.613919256575173e-06, -5.290360604703892e-06, -4.966801952832611e-06, -4.64324284621398e-06, -4.319684194342699e-06, -3.996125087724067e-06, -3.672566208479111e-06, -3.349007329234155e-06, -3.0254482226155233e-06, -2.701889343370567e-06, -2.378330464125611e-06, -2.0547715848806547e-06, -1.7312125919488608e-06, -1.4076535990170669e-06, -1.0840947197721107e-06, -7.605358405271545e-07, -4.369767339085229e-07, -1.1341785466356669e-07, 2.101410245813895e-07, 5.336999606697646e-07, 8.572588967581396e-07, 1.1808178896899335e-06, 1.5043767689348897e-06, 1.8279356481798459e-06, 2.1514947547984775e-06, 2.4750536340434337e-06, 2.79861251328839e-06, 3.122171392533346e-06, 3.4457302717783023e-06, 3.769289378396934e-06, 4.092848030268215e-06, 4.416407136886846e-06, 4.739966243505478e-06, 5.0635253501241095e-06, 5.38708400199539e-06]}, "gradients/decoder.transformer.h.4.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 2.0, 7.0, 3.0, 5.0, 12.0, 13.0, 6.0, 13.0, 14.0, 21.0, 14.0, 31.0, 41.0, 21.0, 42.0, 35.0, 23.0, 42.0, 35.0, 32.0, 52.0, 66.0, 51.0, 28.0, 50.0, 41.0, 20.0, 35.0, 42.0, 25.0, 31.0, 22.0, 22.0, 26.0, 23.0, 11.0, 12.0, 14.0, 10.0, 5.0, 8.0, 4.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 2.0], "bins": [-5.364418029785156e-06, -5.204230546951294e-06, -5.044043064117432e-06, -4.883855581283569e-06, -4.723668098449707e-06, -4.563480615615845e-06, -4.403293132781982e-06, -4.24310564994812e-06, -4.082918167114258e-06, -3.9227306842803955e-06, -3.762543201446533e-06, -3.602355718612671e-06, -3.4421682357788086e-06, -3.2819807529449463e-06, -3.121793270111084e-06, -2.9616057872772217e-06, -2.8014183044433594e-06, -2.641230821609497e-06, -2.4810433387756348e-06, -2.3208558559417725e-06, -2.16066837310791e-06, -2.000480890274048e-06, -1.8402934074401855e-06, -1.6801059246063232e-06, -1.519918441772461e-06, -1.3597309589385986e-06, -1.1995434761047363e-06, -1.039355993270874e-06, -8.791685104370117e-07, -7.189810276031494e-07, -5.587935447692871e-07, -3.986060619354248e-07, -2.384185791015625e-07, -7.82310962677002e-08, 8.195638656616211e-08, 2.421438694000244e-07, 4.023313522338867e-07, 5.62518835067749e-07, 7.227063179016113e-07, 8.828938007354736e-07, 1.043081283569336e-06, 1.2032687664031982e-06, 1.3634562492370605e-06, 1.5236437320709229e-06, 1.6838312149047852e-06, 1.8440186977386475e-06, 2.0042061805725098e-06, 2.164393663406372e-06, 2.3245811462402344e-06, 2.4847686290740967e-06, 2.644956111907959e-06, 2.8051435947418213e-06, 2.9653310775756836e-06, 3.125518560409546e-06, 3.285706043243408e-06, 3.4458935260772705e-06, 3.606081008911133e-06, 3.766268491744995e-06, 3.926455974578857e-06, 4.08664345741272e-06, 4.246830940246582e-06, 4.407018423080444e-06, 4.567205905914307e-06, 4.727393388748169e-06, 4.887580871582031e-06]}, "gradients/decoder.transformer.h.4.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 5.0, 1.0, 4.0, 5.0, 4.0, 10.0, 13.0, 11.0, 13.0, 18.0, 15.0, 27.0, 29.0, 23.0, 28.0, 37.0, 28.0, 54.0, 38.0, 35.0, 35.0, 42.0, 35.0, 56.0, 39.0, 46.0, 37.0, 33.0, 43.0, 35.0, 29.0, 25.0, 25.0, 27.0, 20.0, 14.0, 19.0, 11.0, 12.0, 9.0, 4.0, 2.0, 7.0, 4.0, 2.0, 2.0, 4.0, 3.0, 0.0, 0.0, 0.0, 1.0], "bins": [-12.1328125, -11.7828369140625, -11.432861328125, -11.0828857421875, -10.73291015625, -10.3829345703125, -10.032958984375, -9.6829833984375, -9.3330078125, -8.9830322265625, -8.633056640625, -8.2830810546875, -7.93310546875, -7.5831298828125, -7.233154296875, -6.8831787109375, -6.533203125, -6.1832275390625, -5.833251953125, -5.4832763671875, -5.13330078125, -4.7833251953125, -4.433349609375, -4.0833740234375, -3.7333984375, -3.3834228515625, -3.033447265625, -2.6834716796875, -2.33349609375, -1.9835205078125, -1.633544921875, -1.2835693359375, -0.93359375, -0.5836181640625, -0.233642578125, 0.1163330078125, 0.46630859375, 0.8162841796875, 1.166259765625, 1.5162353515625, 1.8662109375, 2.2161865234375, 2.566162109375, 2.9161376953125, 3.26611328125, 3.6160888671875, 3.966064453125, 4.3160400390625, 4.666015625, 5.0159912109375, 5.365966796875, 5.7159423828125, 6.06591796875, 6.4158935546875, 6.765869140625, 7.1158447265625, 7.4658203125, 7.8157958984375, 8.165771484375, 8.5157470703125, 8.86572265625, 9.2156982421875, 9.565673828125, 9.9156494140625, 10.265625]}, "gradients/decoder.transformer.h.4.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 3.0, 9.0, 8.0, 14.0, 17.0, 20.0, 29.0, 65.0, 98.0, 159.0, 199.0, 335.0, 565.0, 866.0, 1314.0, 2160.0, 3441.0, 5599.0, 8864.0, 14328.0, 23354.0, 37470.0, 62287.0, 114132.0, 261972.0, 245438.0, 109301.0, 60608.0, 36366.0, 22392.0, 14057.0, 8677.0, 5277.0, 3478.0, 2088.0, 1325.0, 800.0, 521.0, 337.0, 185.0, 161.0, 84.0, 55.0, 38.0, 30.0, 17.0, 6.0, 11.0, 5.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0], "bins": [-14.296875, -13.8759765625, -13.455078125, -13.0341796875, -12.61328125, -12.1923828125, -11.771484375, -11.3505859375, -10.9296875, -10.5087890625, -10.087890625, -9.6669921875, -9.24609375, -8.8251953125, -8.404296875, -7.9833984375, -7.5625, -7.1416015625, -6.720703125, -6.2998046875, -5.87890625, -5.4580078125, -5.037109375, -4.6162109375, -4.1953125, -3.7744140625, -3.353515625, -2.9326171875, -2.51171875, -2.0908203125, -1.669921875, -1.2490234375, -0.828125, -0.4072265625, 0.013671875, 0.4345703125, 0.85546875, 1.2763671875, 1.697265625, 2.1181640625, 2.5390625, 2.9599609375, 3.380859375, 3.8017578125, 4.22265625, 4.6435546875, 5.064453125, 5.4853515625, 5.90625, 6.3271484375, 6.748046875, 7.1689453125, 7.58984375, 8.0107421875, 8.431640625, 8.8525390625, 9.2734375, 9.6943359375, 10.115234375, 10.5361328125, 10.95703125, 11.3779296875, 11.798828125, 12.2197265625, 12.640625]}, "gradients/decoder.transformer.h.4.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 2.0, 1.0, 0.0, 2.0, 2.0, 2.0, 7.0, 11.0, 7.0, 10.0, 13.0, 17.0, 20.0, 27.0, 26.0, 27.0, 36.0, 50.0, 64.0, 60.0, 62.0, 115.0, 135.0, 293.0, 1322.0, 161.0, 117.0, 77.0, 68.0, 58.0, 35.0, 48.0, 45.0, 32.0, 19.0, 20.0, 15.0, 13.0, 5.0, 7.0, 10.0, 5.0, 2.0, 4.0, 3.0, 2.0, 3.0, 3.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-29.328125, -28.281005859375, -27.23388671875, -26.186767578125, -25.1396484375, -24.092529296875, -23.04541015625, -21.998291015625, -20.951171875, -19.904052734375, -18.85693359375, -17.809814453125, -16.7626953125, -15.715576171875, -14.66845703125, -13.621337890625, -12.57421875, -11.527099609375, -10.47998046875, -9.432861328125, -8.3857421875, -7.338623046875, -6.29150390625, -5.244384765625, -4.197265625, -3.150146484375, -2.10302734375, -1.055908203125, -0.0087890625, 1.038330078125, 2.08544921875, 3.132568359375, 4.1796875, 5.226806640625, 6.27392578125, 7.321044921875, 8.3681640625, 9.415283203125, 10.46240234375, 11.509521484375, 12.556640625, 13.603759765625, 14.65087890625, 15.697998046875, 16.7451171875, 17.792236328125, 18.83935546875, 19.886474609375, 20.93359375, 21.980712890625, 23.02783203125, 24.074951171875, 25.1220703125, 26.169189453125, 27.21630859375, 28.263427734375, 29.310546875, 30.357666015625, 31.40478515625, 32.451904296875, 33.4990234375, 34.546142578125, 35.59326171875, 36.640380859375, 37.6875]}, "gradients/decoder.transformer.h.4.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 0.0, 1.0, 1.0, 1.0, 1.0, 2.0, 0.0, 1.0, 0.0, 4.0, 4.0, 1.0, 11.0, 8.0, 5.0, 10.0, 26.0, 13.0, 16.0, 33.0, 36.0, 49.0, 83.0, 117.0, 195.0, 376.0, 808.0, 2136.0, 6652.0, 25901.0, 126890.0, 1691188.0, 1150589.0, 108248.0, 22612.0, 6085.0, 1964.0, 745.0, 321.0, 177.0, 105.0, 78.0, 51.0, 50.0, 33.0, 16.0, 17.0, 16.0, 8.0, 10.0, 9.0, 3.0, 4.0, 4.0, 2.0, 1.0, 0.0, 4.0, 1.0, 1.0, 2.0], "bins": [-47.3125, -45.958984375, -44.60546875, -43.251953125, -41.8984375, -40.544921875, -39.19140625, -37.837890625, -36.484375, -35.130859375, -33.77734375, -32.423828125, -31.0703125, -29.716796875, -28.36328125, -27.009765625, -25.65625, -24.302734375, -22.94921875, -21.595703125, -20.2421875, -18.888671875, -17.53515625, -16.181640625, -14.828125, -13.474609375, -12.12109375, -10.767578125, -9.4140625, -8.060546875, -6.70703125, -5.353515625, -4.0, -2.646484375, -1.29296875, 0.060546875, 1.4140625, 2.767578125, 4.12109375, 5.474609375, 6.828125, 8.181640625, 9.53515625, 10.888671875, 12.2421875, 13.595703125, 14.94921875, 16.302734375, 17.65625, 19.009765625, 20.36328125, 21.716796875, 23.0703125, 24.423828125, 25.77734375, 27.130859375, 28.484375, 29.837890625, 31.19140625, 32.544921875, 33.8984375, 35.251953125, 36.60546875, 37.958984375, 39.3125]}, "gradients/decoder.transformer.h.4.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 7.0, 30.0, 77.0, 197.0, 310.0, 235.0, 107.0, 35.0, 11.0, 4.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-395.9424743652344, -386.8664855957031, -377.7904968261719, -368.7145080566406, -359.6385192871094, -350.5625305175781, -341.4865417480469, -332.4105224609375, -323.33453369140625, -314.258544921875, -305.18255615234375, -296.1065673828125, -287.03057861328125, -277.95458984375, -268.87860107421875, -259.8026123046875, -250.72662353515625, -241.650634765625, -232.57464599609375, -223.4986572265625, -214.42266845703125, -205.3466796875, -196.2706756591797, -187.19468688964844, -178.1186981201172, -169.04270935058594, -159.9667205810547, -150.89073181152344, -141.81472778320312, -132.73873901367188, -123.66275024414062, -114.58676147460938, -105.51075744628906, -96.43476867675781, -87.35877990722656, -78.28278350830078, -69.20679473876953, -60.13080596923828, -51.054813385009766, -41.97882080078125, -32.90283203125, -23.826841354370117, -14.750850677490234, -5.674860000610352, 3.4011306762695312, 12.477119445800781, 21.553112030029297, 30.629104614257812, 39.70509338378906, 48.78108215332031, 57.85707473754883, 66.93306732177734, 76.0090560913086, 85.08504486083984, 94.16104125976562, 103.23703002929688, 112.31301879882812, 121.38900756835938, 130.46499633789062, 139.54098510742188, 148.61697387695312, 157.69296264648438, 166.7689666748047, 175.84495544433594, 184.9209442138672]}, "gradients/decoder.transformer.h.4.ln_1.bias": {"_type": "histogram", "values": [1.0, 3.0, 5.0, 0.0, 3.0, 2.0, 6.0, 12.0, 6.0, 10.0, 8.0, 13.0, 17.0, 13.0, 15.0, 25.0, 21.0, 21.0, 19.0, 22.0, 18.0, 25.0, 32.0, 30.0, 36.0, 38.0, 39.0, 32.0, 36.0, 31.0, 28.0, 37.0, 29.0, 41.0, 26.0, 34.0, 39.0, 21.0, 29.0, 27.0, 27.0, 26.0, 14.0, 16.0, 14.0, 15.0, 6.0, 13.0, 7.0, 8.0, 8.0, 6.0, 3.0, 2.0, 4.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-59.412322998046875, -57.16661834716797, -54.9209098815918, -52.67520523071289, -50.42949676513672, -48.18379211425781, -45.938087463378906, -43.692378997802734, -41.44667053222656, -39.200965881347656, -36.955257415771484, -34.70955276489258, -32.463844299316406, -30.2181396484375, -27.97243309020996, -25.726726531982422, -23.481021881103516, -21.235315322875977, -18.989608764648438, -16.74390411376953, -14.498196601867676, -12.252490043640137, -10.006784439086914, -7.761077880859375, -5.515371322631836, -3.269665002822876, -1.023958683013916, 1.2217473983764648, 3.467453956604004, 5.713160514831543, 7.958866119384766, 10.204572677612305, 12.450279235839844, 14.695985794067383, 16.941692352294922, 19.187397003173828, 21.43310546875, 23.678810119628906, 25.924516677856445, 28.170223236083984, 30.415929794311523, 32.66163635253906, 34.90734100341797, 37.15304946899414, 39.39875411987305, 41.64446258544922, 43.890167236328125, 46.13587188720703, 48.3815803527832, 50.62728500366211, 52.87299346923828, 55.11869812011719, 57.36440658569336, 59.610111236572266, 61.85581970214844, 64.10152435302734, 66.34722900390625, 68.59293365478516, 70.83863830566406, 73.0843505859375, 75.3300552368164, 77.57575988769531, 79.82146453857422, 82.06716918945312, 84.31288146972656]}, "gradients/decoder.transformer.h.3.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 1.0, 2.0, 2.0, 3.0, 5.0, 3.0, 9.0, 8.0, 9.0, 9.0, 12.0, 9.0, 14.0, 15.0, 20.0, 20.0, 30.0, 26.0, 29.0, 35.0, 37.0, 43.0, 34.0, 34.0, 36.0, 38.0, 34.0, 46.0, 43.0, 35.0, 27.0, 32.0, 33.0, 37.0, 30.0, 30.0, 27.0, 30.0, 26.0, 15.0, 13.0, 4.0, 12.0, 9.0, 10.0, 8.0, 7.0, 3.0, 6.0, 7.0, 1.0, 3.0, 3.0, 2.0, 2.0], "bins": [-12.3359375, -11.989501953125, -11.64306640625, -11.296630859375, -10.9501953125, -10.603759765625, -10.25732421875, -9.910888671875, -9.564453125, -9.218017578125, -8.87158203125, -8.525146484375, -8.1787109375, -7.832275390625, -7.48583984375, -7.139404296875, -6.79296875, -6.446533203125, -6.10009765625, -5.753662109375, -5.4072265625, -5.060791015625, -4.71435546875, -4.367919921875, -4.021484375, -3.675048828125, -3.32861328125, -2.982177734375, -2.6357421875, -2.289306640625, -1.94287109375, -1.596435546875, -1.25, -0.903564453125, -0.55712890625, -0.210693359375, 0.1357421875, 0.482177734375, 0.82861328125, 1.175048828125, 1.521484375, 1.867919921875, 2.21435546875, 2.560791015625, 2.9072265625, 3.253662109375, 3.60009765625, 3.946533203125, 4.29296875, 4.639404296875, 4.98583984375, 5.332275390625, 5.6787109375, 6.025146484375, 6.37158203125, 6.718017578125, 7.064453125, 7.410888671875, 7.75732421875, 8.103759765625, 8.4501953125, 8.796630859375, 9.14306640625, 9.489501953125, 9.8359375]}, "gradients/decoder.transformer.h.3.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 3.0, 5.0, 18.0, 20.0, 33.0, 45.0, 88.0, 121.0, 234.0, 591.0, 14480.0, 4175292.0, 2453.0, 424.0, 207.0, 107.0, 62.0, 41.0, 31.0, 19.0, 7.0, 3.0, 6.0, 5.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-183.375, -173.900390625, -164.42578125, -154.951171875, -145.4765625, -136.001953125, -126.52734375, -117.052734375, -107.578125, -98.103515625, -88.62890625, -79.154296875, -69.6796875, -60.205078125, -50.73046875, -41.255859375, -31.78125, -22.306640625, -12.83203125, -3.357421875, 6.1171875, 15.591796875, 25.06640625, 34.541015625, 44.015625, 53.490234375, 62.96484375, 72.439453125, 81.9140625, 91.388671875, 100.86328125, 110.337890625, 119.8125, 129.287109375, 138.76171875, 148.236328125, 157.7109375, 167.185546875, 176.66015625, 186.134765625, 195.609375, 205.083984375, 214.55859375, 224.033203125, 233.5078125, 242.982421875, 252.45703125, 261.931640625, 271.40625, 280.880859375, 290.35546875, 299.830078125, 309.3046875, 318.779296875, 328.25390625, 337.728515625, 347.203125, 356.677734375, 366.15234375, 375.626953125, 385.1015625, 394.576171875, 404.05078125, 413.525390625, 423.0]}, "gradients/decoder.transformer.h.3.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 3.0, 3.0, 9.0, 10.0, 16.0, 25.0, 29.0, 35.0, 64.0, 95.0, 182.0, 287.0, 561.0, 1067.0, 779.0, 344.0, 207.0, 118.0, 101.0, 54.0, 32.0, 24.0, 16.0, 5.0, 8.0, 3.0, 3.0, 3.0, 2.0, 4.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-25.515625, -24.595947265625, -23.67626953125, -22.756591796875, -21.8369140625, -20.917236328125, -19.99755859375, -19.077880859375, -18.158203125, -17.238525390625, -16.31884765625, -15.399169921875, -14.4794921875, -13.559814453125, -12.64013671875, -11.720458984375, -10.80078125, -9.881103515625, -8.96142578125, -8.041748046875, -7.1220703125, -6.202392578125, -5.28271484375, -4.363037109375, -3.443359375, -2.523681640625, -1.60400390625, -0.684326171875, 0.2353515625, 1.155029296875, 2.07470703125, 2.994384765625, 3.9140625, 4.833740234375, 5.75341796875, 6.673095703125, 7.5927734375, 8.512451171875, 9.43212890625, 10.351806640625, 11.271484375, 12.191162109375, 13.11083984375, 14.030517578125, 14.9501953125, 15.869873046875, 16.78955078125, 17.709228515625, 18.62890625, 19.548583984375, 20.46826171875, 21.387939453125, 22.3076171875, 23.227294921875, 24.14697265625, 25.066650390625, 25.986328125, 26.906005859375, 27.82568359375, 28.745361328125, 29.6650390625, 30.584716796875, 31.50439453125, 32.424072265625, 33.34375]}, "gradients/decoder.transformer.h.3.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 2.0, 1.0, 2.0, 1.0, 4.0, 6.0, 7.0, 13.0, 18.0, 15.0, 21.0, 37.0, 61.0, 86.0, 117.0, 197.0, 368.0, 857.0, 2218.0, 7975.0, 46052.0, 540583.0, 3393278.0, 173567.0, 21371.0, 4577.0, 1447.0, 590.0, 302.0, 166.0, 102.0, 59.0, 54.0, 38.0, 34.0, 14.0, 19.0, 5.0, 3.0, 11.0, 2.0, 3.0, 3.0, 1.0, 4.0, 2.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-57.375, -55.5986328125, -53.822265625, -52.0458984375, -50.26953125, -48.4931640625, -46.716796875, -44.9404296875, -43.1640625, -41.3876953125, -39.611328125, -37.8349609375, -36.05859375, -34.2822265625, -32.505859375, -30.7294921875, -28.953125, -27.1767578125, -25.400390625, -23.6240234375, -21.84765625, -20.0712890625, -18.294921875, -16.5185546875, -14.7421875, -12.9658203125, -11.189453125, -9.4130859375, -7.63671875, -5.8603515625, -4.083984375, -2.3076171875, -0.53125, 1.2451171875, 3.021484375, 4.7978515625, 6.57421875, 8.3505859375, 10.126953125, 11.9033203125, 13.6796875, 15.4560546875, 17.232421875, 19.0087890625, 20.78515625, 22.5615234375, 24.337890625, 26.1142578125, 27.890625, 29.6669921875, 31.443359375, 33.2197265625, 34.99609375, 36.7724609375, 38.548828125, 40.3251953125, 42.1015625, 43.8779296875, 45.654296875, 47.4306640625, 49.20703125, 50.9833984375, 52.759765625, 54.5361328125, 56.3125]}, "gradients/decoder.transformer.h.3.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 6.0, 14.0, 16.0, 33.0, 62.0, 98.0, 137.0, 163.0, 137.0, 113.0, 85.0, 70.0, 27.0, 29.0, 6.0, 6.0, 4.0, 4.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-127.17681121826172, -123.20262908935547, -119.22843933105469, -115.25425720214844, -111.28007507324219, -107.3058853149414, -103.33170318603516, -99.35751342773438, -95.38333129882812, -91.40914916992188, -87.4349594116211, -83.46077728271484, -79.48658752441406, -75.51240539550781, -71.53822326660156, -67.56404113769531, -63.58985137939453, -59.615665435791016, -55.6414794921875, -51.66729736328125, -47.693111419677734, -43.71892547607422, -39.74474334716797, -35.77055740356445, -31.796371459960938, -27.822185516357422, -23.84800148010254, -19.873817443847656, -15.89963150024414, -11.925445556640625, -7.951261520385742, -3.9770774841308594, -0.0028839111328125, 3.9713010787963867, 7.945486068725586, 11.919671058654785, 15.893856048583984, 19.8680419921875, 23.842226028442383, 27.816410064697266, 31.79059600830078, 35.7647819519043, 39.73896789550781, 43.71315002441406, 47.68733596801758, 51.661521911621094, 55.635704040527344, 59.60988998413086, 63.584075927734375, 67.55825805664062, 71.5324478149414, 75.50662994384766, 79.48081970214844, 83.45500183105469, 87.42918395996094, 91.40336608886719, 95.37755584716797, 99.35173797607422, 103.325927734375, 107.30010986328125, 111.2742919921875, 115.24848175048828, 119.22266387939453, 123.19685363769531, 127.17103576660156]}, "gradients/decoder.transformer.h.3.ln_2.bias": {"_type": "histogram", "values": [2.0, 3.0, 3.0, 4.0, 3.0, 2.0, 7.0, 5.0, 4.0, 3.0, 10.0, 9.0, 9.0, 11.0, 14.0, 12.0, 19.0, 11.0, 30.0, 20.0, 17.0, 21.0, 33.0, 29.0, 35.0, 39.0, 35.0, 33.0, 32.0, 31.0, 52.0, 50.0, 41.0, 35.0, 24.0, 36.0, 40.0, 33.0, 28.0, 33.0, 29.0, 16.0, 17.0, 15.0, 15.0, 16.0, 13.0, 11.0, 7.0, 6.0, 6.0, 4.0, 1.0, 1.0, 2.0, 2.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-56.22740173339844, -54.349937438964844, -52.47247314453125, -50.59500503540039, -48.7175407409668, -46.8400764465332, -44.962608337402344, -43.08514404296875, -41.207679748535156, -39.33021545410156, -37.45275115966797, -35.57528305053711, -33.697818756103516, -31.820354461669922, -29.942888259887695, -28.06542205810547, -26.187957763671875, -24.31049346923828, -22.433027267456055, -20.555561065673828, -18.678096771240234, -16.80063247680664, -14.923166275024414, -13.045701026916504, -11.168235778808594, -9.290770530700684, -7.413305282592773, -5.535840034484863, -3.658374786376953, -1.780909538269043, 0.09655570983886719, 1.9740209579467773, 3.8514862060546875, 5.728951454162598, 7.606416702270508, 9.483881950378418, 11.361347198486328, 13.238812446594238, 15.116277694702148, 16.993743896484375, 18.87120819091797, 20.748672485351562, 22.62613868713379, 24.503604888916016, 26.38106918334961, 28.258533477783203, 30.13599967956543, 32.013465881347656, 33.89093017578125, 35.768394470214844, 37.64585876464844, 39.5233268737793, 41.40079116821289, 43.278255462646484, 45.155723571777344, 47.03318786621094, 48.91065216064453, 50.788116455078125, 52.66558074951172, 54.54304885864258, 56.42051315307617, 58.297977447509766, 60.175445556640625, 62.05290985107422, 63.93037414550781]}, "gradients/decoder.transformer.h.3.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 3.0, 2.0, 2.0, 1.0, 1.0, 5.0, 3.0, 4.0, 6.0, 10.0, 12.0, 8.0, 12.0, 22.0, 23.0, 21.0, 28.0, 33.0, 35.0, 32.0, 40.0, 39.0, 59.0, 51.0, 60.0, 46.0, 50.0, 50.0, 40.0, 32.0, 47.0, 34.0, 31.0, 29.0, 20.0, 23.0, 16.0, 18.0, 17.0, 13.0, 14.0, 4.0, 8.0, 2.0, 2.0, 4.0, 1.0, 0.0, 4.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-15.1328125, -14.6898193359375, -14.246826171875, -13.8038330078125, -13.36083984375, -12.9178466796875, -12.474853515625, -12.0318603515625, -11.5888671875, -11.1458740234375, -10.702880859375, -10.2598876953125, -9.81689453125, -9.3739013671875, -8.930908203125, -8.4879150390625, -8.044921875, -7.6019287109375, -7.158935546875, -6.7159423828125, -6.27294921875, -5.8299560546875, -5.386962890625, -4.9439697265625, -4.5009765625, -4.0579833984375, -3.614990234375, -3.1719970703125, -2.72900390625, -2.2860107421875, -1.843017578125, -1.4000244140625, -0.95703125, -0.5140380859375, -0.071044921875, 0.3719482421875, 0.81494140625, 1.2579345703125, 1.700927734375, 2.1439208984375, 2.5869140625, 3.0299072265625, 3.472900390625, 3.9158935546875, 4.35888671875, 4.8018798828125, 5.244873046875, 5.6878662109375, 6.130859375, 6.5738525390625, 7.016845703125, 7.4598388671875, 7.90283203125, 8.3458251953125, 8.788818359375, 9.2318115234375, 9.6748046875, 10.1177978515625, 10.560791015625, 11.0037841796875, 11.44677734375, 11.8897705078125, 12.332763671875, 12.7757568359375, 13.21875]}, "gradients/decoder.transformer.h.3.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 3.0, 0.0, 1.0, 2.0, 1.0, 6.0, 9.0, 15.0, 34.0, 26.0, 34.0, 53.0, 79.0, 143.0, 176.0, 258.0, 442.0, 587.0, 825.0, 1365.0, 2017.0, 3011.0, 4699.0, 7321.0, 11275.0, 18460.0, 30597.0, 52471.0, 97736.0, 249256.0, 316829.0, 107441.0, 56663.0, 32968.0, 19428.0, 11933.0, 7758.0, 4987.0, 3371.0, 1993.0, 1352.0, 970.0, 617.0, 465.0, 294.0, 190.0, 130.0, 85.0, 61.0, 38.0, 36.0, 20.0, 15.0, 9.0, 5.0, 4.0, 4.0, 2.0, 1.0, 1.0, 1.0], "bins": [-0.5263671875, -0.5104484558105469, -0.49452972412109375, -0.4786109924316406, -0.4626922607421875, -0.4467735290527344, -0.43085479736328125, -0.4149360656738281, -0.399017333984375, -0.3830986022949219, -0.36717987060546875, -0.3512611389160156, -0.3353424072265625, -0.3194236755371094, -0.30350494384765625, -0.2875862121582031, -0.27166748046875, -0.2557487487792969, -0.23983001708984375, -0.22391128540039062, -0.2079925537109375, -0.19207382202148438, -0.17615509033203125, -0.16023635864257812, -0.144317626953125, -0.12839889526367188, -0.11248016357421875, -0.09656143188476562, -0.0806427001953125, -0.06472396850585938, -0.04880523681640625, -0.032886505126953125, -0.0169677734375, -0.001049041748046875, 0.01486968994140625, 0.030788421630859375, 0.0467071533203125, 0.06262588500976562, 0.07854461669921875, 0.09446334838867188, 0.110382080078125, 0.12630081176757812, 0.14221954345703125, 0.15813827514648438, 0.1740570068359375, 0.18997573852539062, 0.20589447021484375, 0.22181320190429688, 0.23773193359375, 0.2536506652832031, 0.26956939697265625, 0.2854881286621094, 0.3014068603515625, 0.3173255920410156, 0.33324432373046875, 0.3491630554199219, 0.365081787109375, 0.3810005187988281, 0.39691925048828125, 0.4128379821777344, 0.4287567138671875, 0.4446754455566406, 0.46059417724609375, 0.4765129089355469, 0.492431640625]}, "gradients/decoder.transformer.h.3.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 3.0, 2.0, 4.0, 1.0, 8.0, 3.0, 7.0, 8.0, 9.0, 13.0, 13.0, 12.0, 14.0, 21.0, 20.0, 14.0, 33.0, 30.0, 28.0, 35.0, 33.0, 37.0, 39.0, 42.0, 36.0, 29.0, 1051.0, 39.0, 35.0, 41.0, 36.0, 29.0, 41.0, 38.0, 26.0, 30.0, 19.0, 21.0, 24.0, 10.0, 18.0, 14.0, 12.0, 16.0, 10.0, 6.0, 10.0, 5.0, 8.0, 2.0, 1.0, 1.0, 3.0, 2.0, 0.0, 2.0], "bins": [-9.7734375, -9.4879150390625, -9.202392578125, -8.9168701171875, -8.63134765625, -8.3458251953125, -8.060302734375, -7.7747802734375, -7.4892578125, -7.2037353515625, -6.918212890625, -6.6326904296875, -6.34716796875, -6.0616455078125, -5.776123046875, -5.4906005859375, -5.205078125, -4.9195556640625, -4.634033203125, -4.3485107421875, -4.06298828125, -3.7774658203125, -3.491943359375, -3.2064208984375, -2.9208984375, -2.6353759765625, -2.349853515625, -2.0643310546875, -1.77880859375, -1.4932861328125, -1.207763671875, -0.9222412109375, -0.63671875, -0.3511962890625, -0.065673828125, 0.2198486328125, 0.50537109375, 0.7908935546875, 1.076416015625, 1.3619384765625, 1.6474609375, 1.9329833984375, 2.218505859375, 2.5040283203125, 2.78955078125, 3.0750732421875, 3.360595703125, 3.6461181640625, 3.931640625, 4.2171630859375, 4.502685546875, 4.7882080078125, 5.07373046875, 5.3592529296875, 5.644775390625, 5.9302978515625, 6.2158203125, 6.5013427734375, 6.786865234375, 7.0723876953125, 7.35791015625, 7.6434326171875, 7.928955078125, 8.2144775390625, 8.5]}, "gradients/decoder.transformer.h.3.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 1.0, 6.0, 3.0, 10.0, 8.0, 20.0, 33.0, 57.0, 76.0, 98.0, 160.0, 218.0, 345.0, 444.0, 632.0, 945.0, 1267.0, 1695.0, 2346.0, 3266.0, 4514.0, 6570.0, 9138.0, 13371.0, 18995.0, 28139.0, 42487.0, 64837.0, 103940.0, 211268.0, 1272711.0, 106634.0, 66272.0, 42859.0, 28417.0, 19446.0, 13519.0, 9290.0, 6583.0, 4653.0, 3380.0, 2427.0, 1757.0, 1239.0, 907.0, 632.0, 469.0, 338.0, 236.0, 167.0, 95.0, 76.0, 51.0, 35.0, 24.0, 15.0, 11.0, 4.0, 8.0, 1.0, 3.0], "bins": [-0.23583984375, -0.22869873046875, -0.2215576171875, -0.21441650390625, -0.207275390625, -0.20013427734375, -0.1929931640625, -0.18585205078125, -0.1787109375, -0.17156982421875, -0.1644287109375, -0.15728759765625, -0.150146484375, -0.14300537109375, -0.1358642578125, -0.12872314453125, -0.12158203125, -0.11444091796875, -0.1072998046875, -0.10015869140625, -0.093017578125, -0.08587646484375, -0.0787353515625, -0.07159423828125, -0.064453125, -0.05731201171875, -0.0501708984375, -0.04302978515625, -0.035888671875, -0.02874755859375, -0.0216064453125, -0.01446533203125, -0.00732421875, -0.00018310546875, 0.0069580078125, 0.01409912109375, 0.021240234375, 0.02838134765625, 0.0355224609375, 0.04266357421875, 0.0498046875, 0.05694580078125, 0.0640869140625, 0.07122802734375, 0.078369140625, 0.08551025390625, 0.0926513671875, 0.09979248046875, 0.10693359375, 0.11407470703125, 0.1212158203125, 0.12835693359375, 0.135498046875, 0.14263916015625, 0.1497802734375, 0.15692138671875, 0.1640625, 0.17120361328125, 0.1783447265625, 0.18548583984375, 0.192626953125, 0.19976806640625, 0.2069091796875, 0.21405029296875, 0.22119140625]}, "gradients/decoder.transformer.h.3.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 2.0, 3.0, 3.0, 3.0, 2.0, 5.0, 5.0, 6.0, 5.0, 9.0, 12.0, 10.0, 16.0, 22.0, 36.0, 38.0, 41.0, 47.0, 74.0, 116.0, 158.0, 99.0, 52.0, 55.0, 32.0, 30.0, 20.0, 22.0, 17.0, 15.0, 4.0, 12.0, 6.0, 9.0, 2.0, 6.0, 1.0, 5.0, 1.0, 1.0, 4.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 3.0], "bins": [-1.5676021575927734e-05, -1.5216879546642303e-05, -1.4757737517356873e-05, -1.4298595488071442e-05, -1.383945345878601e-05, -1.338031142950058e-05, -1.2921169400215149e-05, -1.2462027370929718e-05, -1.2002885341644287e-05, -1.1543743312358856e-05, -1.1084601283073425e-05, -1.0625459253787994e-05, -1.0166317224502563e-05, -9.707175195217133e-06, -9.248033165931702e-06, -8.78889113664627e-06, -8.32974910736084e-06, -7.870607078075409e-06, -7.411465048789978e-06, -6.952323019504547e-06, -6.493180990219116e-06, -6.034038960933685e-06, -5.574896931648254e-06, -5.1157549023628235e-06, -4.656612873077393e-06, -4.197470843791962e-06, -3.7383288145065308e-06, -3.2791867852211e-06, -2.820044755935669e-06, -2.360902726650238e-06, -1.9017606973648071e-06, -1.4426186680793762e-06, -9.834766387939453e-07, -5.243346095085144e-07, -6.51925802230835e-08, 3.939494490623474e-07, 8.530914783477783e-07, 1.3122335076332092e-06, 1.7713755369186401e-06, 2.230517566204071e-06, 2.689659595489502e-06, 3.148801624774933e-06, 3.6079436540603638e-06, 4.067085683345795e-06, 4.526227712631226e-06, 4.9853697419166565e-06, 5.444511771202087e-06, 5.903653800487518e-06, 6.362795829772949e-06, 6.82193785905838e-06, 7.281079888343811e-06, 7.740221917629242e-06, 8.199363946914673e-06, 8.658505976200104e-06, 9.117648005485535e-06, 9.576790034770966e-06, 1.0035932064056396e-05, 1.0495074093341827e-05, 1.0954216122627258e-05, 1.141335815191269e-05, 1.187250018119812e-05, 1.2331642210483551e-05, 1.2790784239768982e-05, 1.3249926269054413e-05, 1.3709068298339844e-05]}, "gradients/decoder.transformer.h.3.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 3.0, 2.0, 3.0, 2.0, 5.0, 1.0, 9.0, 3.0, 11.0, 6.0, 13.0, 16.0, 24.0, 33.0, 58.0, 55.0, 100.0, 175.0, 392.0, 4321.0, 915483.0, 125599.0, 1551.0, 274.0, 133.0, 87.0, 58.0, 37.0, 25.0, 17.0, 17.0, 13.0, 5.0, 7.0, 7.0, 2.0, 2.0, 5.0, 4.0, 4.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0], "bins": [-0.0002765655517578125, -0.00026729702949523926, -0.000258028507232666, -0.0002487599849700928, -0.00023949146270751953, -0.0002302229404449463, -0.00022095441818237305, -0.0002116858959197998, -0.00020241737365722656, -0.00019314885139465332, -0.00018388032913208008, -0.00017461180686950684, -0.0001653432846069336, -0.00015607476234436035, -0.0001468062400817871, -0.00013753771781921387, -0.00012826919555664062, -0.00011900067329406738, -0.00010973215103149414, -0.0001004636287689209, -9.119510650634766e-05, -8.192658424377441e-05, -7.265806198120117e-05, -6.338953971862793e-05, -5.412101745605469e-05, -4.4852495193481445e-05, -3.55839729309082e-05, -2.631545066833496e-05, -1.704692840576172e-05, -7.778406143188477e-06, 1.4901161193847656e-06, 1.0758638381958008e-05, 2.002716064453125e-05, 2.9295682907104492e-05, 3.8564205169677734e-05, 4.7832727432250977e-05, 5.710124969482422e-05, 6.636977195739746e-05, 7.56382942199707e-05, 8.490681648254395e-05, 9.417533874511719e-05, 0.00010344386100769043, 0.00011271238327026367, 0.00012198090553283691, 0.00013124942779541016, 0.0001405179500579834, 0.00014978647232055664, 0.00015905499458312988, 0.00016832351684570312, 0.00017759203910827637, 0.0001868605613708496, 0.00019612908363342285, 0.0002053976058959961, 0.00021466612815856934, 0.00022393465042114258, 0.00023320317268371582, 0.00024247169494628906, 0.0002517402172088623, 0.00026100873947143555, 0.0002702772617340088, 0.00027954578399658203, 0.0002888143062591553, 0.0002980828285217285, 0.00030735135078430176, 0.000316619873046875]}, "gradients/decoder.transformer.h.3.ln_cross_attn.weight": {"_type": "histogram", "values": [4.0, 2.0, 17.0, 50.0, 164.0, 401.0, 272.0, 80.0, 21.0, 6.0, 4.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.730938482884085e-06, -4.73145701107569e-06, -3.731975084519945e-06, -2.7324933853378752e-06, -1.7330116861558054e-06, -7.335297596000601e-07, 2.6595171220833436e-07, 1.2654331840167288e-06, 2.264915110572474e-06, 3.264396809754544e-06, 4.263878508936614e-06, 5.263360435492359e-06, 6.2628419073007535e-06, 7.262323833856499e-06, 8.261805305664893e-06, 9.261286322725937e-06, 1.0260768249281682e-05, 1.1260250175837427e-05, 1.2259732102393173e-05, 1.3259213119454216e-05, 1.4258695046009962e-05, 1.5258176972565707e-05, 1.625765798962675e-05, 1.7257139916182496e-05, 1.825662184273824e-05, 1.9256103769293986e-05, 2.025558569584973e-05, 2.1255067622405477e-05, 2.2254549548961222e-05, 2.3254029656527564e-05, 2.425351158308331e-05, 2.5252993509639055e-05, 2.6252477255184203e-05, 2.725195918173995e-05, 2.8251441108295694e-05, 2.925092303485144e-05, 3.0250404961407185e-05, 3.1249885068973526e-05, 3.2249368814518675e-05, 3.324884892208502e-05, 3.4248332667630166e-05, 3.524781277519651e-05, 3.6247296520741656e-05, 3.7246776628308e-05, 3.824626037385315e-05, 3.924574048141949e-05, 4.024522422696464e-05, 4.124470433453098e-05, 4.224418444209732e-05, 4.324366454966366e-05, 4.424314829520881e-05, 4.5242628402775154e-05, 4.62421121483203e-05, 4.7241592255886644e-05, 4.824107600143179e-05, 4.9240556108998135e-05, 5.0240036216564476e-05, 5.123951632413082e-05, 5.223900006967597e-05, 5.323848017724231e-05, 5.423796392278746e-05, 5.52374440303538e-05, 5.623692777589895e-05, 5.723640788346529e-05, 5.823589162901044e-05]}, "gradients/decoder.transformer.h.3.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 3.0, 1.0, 5.0, 3.0, 6.0, 6.0, 5.0, 16.0, 9.0, 8.0, 8.0, 26.0, 17.0, 15.0, 23.0, 21.0, 36.0, 41.0, 44.0, 40.0, 47.0, 37.0, 49.0, 30.0, 43.0, 39.0, 50.0, 28.0, 38.0, 34.0, 37.0, 41.0, 32.0, 30.0, 22.0, 24.0, 17.0, 11.0, 11.0, 20.0, 6.0, 6.0, 9.0, 4.0, 5.0, 7.0, 1.0, 5.0, 1.0, 1.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.304813385009766e-06, -5.098059773445129e-06, -4.891306161880493e-06, -4.684552550315857e-06, -4.477798938751221e-06, -4.2710453271865845e-06, -4.064291715621948e-06, -3.857538104057312e-06, -3.6507844924926758e-06, -3.4440308809280396e-06, -3.2372772693634033e-06, -3.030523657798767e-06, -2.823770046234131e-06, -2.6170164346694946e-06, -2.4102628231048584e-06, -2.203509211540222e-06, -1.996755599975586e-06, -1.7900019884109497e-06, -1.5832483768463135e-06, -1.3764947652816772e-06, -1.169741153717041e-06, -9.629875421524048e-07, -7.562339305877686e-07, -5.494803190231323e-07, -3.427267074584961e-07, -1.3597309589385986e-07, 7.078051567077637e-08, 2.775341272354126e-07, 4.842877388000488e-07, 6.910413503646851e-07, 8.977949619293213e-07, 1.1045485734939575e-06, 1.3113021850585938e-06, 1.51805579662323e-06, 1.7248094081878662e-06, 1.9315630197525024e-06, 2.1383166313171387e-06, 2.345070242881775e-06, 2.551823854446411e-06, 2.7585774660110474e-06, 2.9653310775756836e-06, 3.17208468914032e-06, 3.378838300704956e-06, 3.5855919122695923e-06, 3.7923455238342285e-06, 3.999099135398865e-06, 4.205852746963501e-06, 4.412606358528137e-06, 4.6193599700927734e-06, 4.82611358165741e-06, 5.032867193222046e-06, 5.239620804786682e-06, 5.446374416351318e-06, 5.653128027915955e-06, 5.859881639480591e-06, 6.066635251045227e-06, 6.273388862609863e-06, 6.4801424741744995e-06, 6.686896085739136e-06, 6.893649697303772e-06, 7.100403308868408e-06, 7.3071569204330444e-06, 7.513910531997681e-06, 7.720664143562317e-06, 7.927417755126953e-06]}, "gradients/decoder.transformer.h.3.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 3.0, 2.0, 2.0, 1.0, 1.0, 5.0, 3.0, 4.0, 6.0, 10.0, 12.0, 8.0, 12.0, 22.0, 23.0, 21.0, 28.0, 33.0, 35.0, 32.0, 40.0, 39.0, 59.0, 51.0, 60.0, 46.0, 50.0, 50.0, 40.0, 32.0, 47.0, 34.0, 31.0, 29.0, 20.0, 23.0, 16.0, 18.0, 17.0, 13.0, 14.0, 4.0, 8.0, 2.0, 2.0, 4.0, 1.0, 0.0, 4.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-15.1328125, -14.6898193359375, -14.246826171875, -13.8038330078125, -13.36083984375, -12.9178466796875, -12.474853515625, -12.0318603515625, -11.5888671875, -11.1458740234375, -10.702880859375, -10.2598876953125, -9.81689453125, -9.3739013671875, -8.930908203125, -8.4879150390625, -8.044921875, -7.6019287109375, -7.158935546875, -6.7159423828125, -6.27294921875, -5.8299560546875, -5.386962890625, -4.9439697265625, -4.5009765625, -4.0579833984375, -3.614990234375, -3.1719970703125, -2.72900390625, -2.2860107421875, -1.843017578125, -1.4000244140625, -0.95703125, -0.5140380859375, -0.071044921875, 0.3719482421875, 0.81494140625, 1.2579345703125, 1.700927734375, 2.1439208984375, 2.5869140625, 3.0299072265625, 3.472900390625, 3.9158935546875, 4.35888671875, 4.8018798828125, 5.244873046875, 5.6878662109375, 6.130859375, 6.5738525390625, 7.016845703125, 7.4598388671875, 7.90283203125, 8.3458251953125, 8.788818359375, 9.2318115234375, 9.6748046875, 10.1177978515625, 10.560791015625, 11.0037841796875, 11.44677734375, 11.8897705078125, 12.332763671875, 12.7757568359375, 13.21875]}, "gradients/decoder.transformer.h.3.attn.c_proj.weight": {"_type": "histogram", "values": [2.0, 4.0, 2.0, 5.0, 1.0, 10.0, 9.0, 16.0, 25.0, 27.0, 34.0, 63.0, 93.0, 121.0, 202.0, 254.0, 479.0, 673.0, 1028.0, 1566.0, 2413.0, 3821.0, 6098.0, 9644.0, 15275.0, 25211.0, 40057.0, 65793.0, 109439.0, 183076.0, 213715.0, 145761.0, 86293.0, 52044.0, 31977.0, 19635.0, 12343.0, 7659.0, 4831.0, 3128.0, 2034.0, 1289.0, 830.0, 550.0, 358.0, 212.0, 155.0, 93.0, 80.0, 51.0, 23.0, 15.0, 17.0, 17.0, 10.0, 4.0, 1.0, 3.0, 1.0, 1.0, 2.0, 0.0, 2.0, 1.0], "bins": [-13.265625, -12.8272705078125, -12.388916015625, -11.9505615234375, -11.51220703125, -11.0738525390625, -10.635498046875, -10.1971435546875, -9.7587890625, -9.3204345703125, -8.882080078125, -8.4437255859375, -8.00537109375, -7.5670166015625, -7.128662109375, -6.6903076171875, -6.251953125, -5.8135986328125, -5.375244140625, -4.9368896484375, -4.49853515625, -4.0601806640625, -3.621826171875, -3.1834716796875, -2.7451171875, -2.3067626953125, -1.868408203125, -1.4300537109375, -0.99169921875, -0.5533447265625, -0.114990234375, 0.3233642578125, 0.76171875, 1.2000732421875, 1.638427734375, 2.0767822265625, 2.51513671875, 2.9534912109375, 3.391845703125, 3.8302001953125, 4.2685546875, 4.7069091796875, 5.145263671875, 5.5836181640625, 6.02197265625, 6.4603271484375, 6.898681640625, 7.3370361328125, 7.775390625, 8.2137451171875, 8.652099609375, 9.0904541015625, 9.52880859375, 9.9671630859375, 10.405517578125, 10.8438720703125, 11.2822265625, 11.7205810546875, 12.158935546875, 12.5972900390625, 13.03564453125, 13.4739990234375, 13.912353515625, 14.3507080078125, 14.7890625]}, "gradients/decoder.transformer.h.3.attn.c_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0, 0.0, 4.0, 2.0, 3.0, 2.0, 3.0, 11.0, 5.0, 8.0, 6.0, 13.0, 19.0, 20.0, 34.0, 22.0, 40.0, 42.0, 67.0, 76.0, 135.0, 210.0, 271.0, 1342.0, 194.0, 119.0, 84.0, 69.0, 40.0, 32.0, 44.0, 21.0, 24.0, 21.0, 12.0, 14.0, 12.0, 9.0, 8.0, 6.0, 6.0, 5.0, 1.0, 2.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0], "bins": [-36.90625, -35.63427734375, -34.3623046875, -33.09033203125, -31.818359375, -30.54638671875, -29.2744140625, -28.00244140625, -26.73046875, -25.45849609375, -24.1865234375, -22.91455078125, -21.642578125, -20.37060546875, -19.0986328125, -17.82666015625, -16.5546875, -15.28271484375, -14.0107421875, -12.73876953125, -11.466796875, -10.19482421875, -8.9228515625, -7.65087890625, -6.37890625, -5.10693359375, -3.8349609375, -2.56298828125, -1.291015625, -0.01904296875, 1.2529296875, 2.52490234375, 3.796875, 5.06884765625, 6.3408203125, 7.61279296875, 8.884765625, 10.15673828125, 11.4287109375, 12.70068359375, 13.97265625, 15.24462890625, 16.5166015625, 17.78857421875, 19.060546875, 20.33251953125, 21.6044921875, 22.87646484375, 24.1484375, 25.42041015625, 26.6923828125, 27.96435546875, 29.236328125, 30.50830078125, 31.7802734375, 33.05224609375, 34.32421875, 35.59619140625, 36.8681640625, 38.14013671875, 39.412109375, 40.68408203125, 41.9560546875, 43.22802734375, 44.5]}, "gradients/decoder.transformer.h.3.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 4.0, 2.0, 6.0, 3.0, 5.0, 7.0, 6.0, 9.0, 10.0, 11.0, 32.0, 30.0, 55.0, 93.0, 170.0, 351.0, 847.0, 5858.0, 1991433.0, 1140285.0, 4868.0, 845.0, 369.0, 171.0, 74.0, 61.0, 30.0, 30.0, 22.0, 8.0, 6.0, 2.0, 5.0, 2.0, 3.0, 5.0, 1.0, 1.0, 0.0, 4.0], "bins": [-194.375, -189.8515625, -185.328125, -180.8046875, -176.28125, -171.7578125, -167.234375, -162.7109375, -158.1875, -153.6640625, -149.140625, -144.6171875, -140.09375, -135.5703125, -131.046875, -126.5234375, -122.0, -117.4765625, -112.953125, -108.4296875, -103.90625, -99.3828125, -94.859375, -90.3359375, -85.8125, -81.2890625, -76.765625, -72.2421875, -67.71875, -63.1953125, -58.671875, -54.1484375, -49.625, -45.1015625, -40.578125, -36.0546875, -31.53125, -27.0078125, -22.484375, -17.9609375, -13.4375, -8.9140625, -4.390625, 0.1328125, 4.65625, 9.1796875, 13.703125, 18.2265625, 22.75, 27.2734375, 31.796875, 36.3203125, 40.84375, 45.3671875, 49.890625, 54.4140625, 58.9375, 63.4609375, 67.984375, 72.5078125, 77.03125, 81.5546875, 86.078125, 90.6015625, 95.125]}, "gradients/decoder.transformer.h.3.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 4.0, 9.0, 35.0, 130.0, 238.0, 288.0, 202.0, 76.0, 23.0, 7.0, 3.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-388.91888427734375, -375.692626953125, -362.4663391113281, -349.24005126953125, -336.0137939453125, -322.78753662109375, -309.5612487792969, -296.3349609375, -283.10870361328125, -269.8824462890625, -256.6561584472656, -243.4298858642578, -230.20361328125, -216.9773406982422, -203.75106811523438, -190.52479553222656, -177.29852294921875, -164.07225036621094, -150.84597778320312, -137.6197052001953, -124.3934326171875, -111.16716003417969, -97.94088745117188, -84.71461486816406, -71.48834228515625, -58.26206970214844, -45.035797119140625, -31.809524536132812, -18.583251953125, -5.3569793701171875, 7.869293212890625, 21.095565795898438, 34.32183837890625, 47.54811096191406, 60.774383544921875, 74.00065612792969, 87.2269287109375, 100.45320129394531, 113.67947387695312, 126.90574645996094, 140.13201904296875, 153.35829162597656, 166.58456420898438, 179.8108367919922, 193.037109375, 206.2633819580078, 219.48965454101562, 232.71592712402344, 245.94219970703125, 259.16845703125, 272.3947448730469, 285.62103271484375, 298.8472900390625, 312.07354736328125, 325.2998352050781, 338.526123046875, 351.75238037109375, 364.9786376953125, 378.2049255371094, 391.43121337890625, 404.657470703125, 417.88372802734375, 431.1100158691406, 444.3363037109375, 457.56256103515625]}, "gradients/decoder.transformer.h.3.ln_1.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 3.0, 1.0, 5.0, 2.0, 5.0, 2.0, 5.0, 5.0, 10.0, 10.0, 13.0, 20.0, 15.0, 17.0, 26.0, 27.0, 30.0, 26.0, 37.0, 39.0, 35.0, 31.0, 36.0, 41.0, 49.0, 48.0, 45.0, 30.0, 43.0, 37.0, 38.0, 47.0, 44.0, 28.0, 19.0, 15.0, 31.0, 19.0, 17.0, 8.0, 11.0, 6.0, 8.0, 9.0, 6.0, 4.0, 4.0, 4.0, 2.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 2.0], "bins": [-118.51641845703125, -114.94306182861328, -111.36970520019531, -107.79634857177734, -104.22299194335938, -100.64962768554688, -97.0762710571289, -93.50291442871094, -89.92955780029297, -86.356201171875, -82.78284454345703, -79.20948791503906, -75.63612365722656, -72.06277465820312, -68.48941040039062, -64.91605377197266, -61.34269714355469, -57.76934051513672, -54.19598388671875, -50.622623443603516, -47.04926681518555, -43.47591018676758, -39.902549743652344, -36.329193115234375, -32.755836486816406, -29.182479858398438, -25.609121322631836, -22.035762786865234, -18.462406158447266, -14.889049530029297, -11.315690994262695, -7.742332458496094, -4.168975830078125, -0.5956182479858398, 2.9777393341064453, 6.5510969161987305, 10.124454498291016, 13.697811126708984, 17.271169662475586, 20.844528198242188, 24.417884826660156, 27.991241455078125, 31.564599990844727, 35.13795852661133, 38.7113151550293, 42.284671783447266, 45.8580322265625, 49.43138885498047, 53.00474548339844, 56.578102111816406, 60.151458740234375, 63.72481918334961, 67.29817199707031, 70.87153625488281, 74.44489288330078, 78.01824951171875, 81.59160614013672, 85.16496276855469, 88.73831939697266, 92.31167602539062, 95.88504028320312, 99.45838928222656, 103.03175354003906, 106.60511016845703, 110.178466796875]}, "gradients/decoder.transformer.h.2.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 1.0, 0.0, 6.0, 2.0, 4.0, 6.0, 5.0, 6.0, 7.0, 6.0, 19.0, 10.0, 32.0, 15.0, 19.0, 28.0, 26.0, 39.0, 39.0, 35.0, 43.0, 53.0, 40.0, 54.0, 51.0, 48.0, 41.0, 40.0, 41.0, 36.0, 45.0, 33.0, 23.0, 28.0, 25.0, 20.0, 14.0, 12.0, 16.0, 7.0, 12.0, 7.0, 7.0, 2.0, 6.0, 2.0, 2.0, 1.0, 2.0, 1.0, 0.0, 1.0, 1.0], "bins": [-15.96875, -15.516357421875, -15.06396484375, -14.611572265625, -14.1591796875, -13.706787109375, -13.25439453125, -12.802001953125, -12.349609375, -11.897216796875, -11.44482421875, -10.992431640625, -10.5400390625, -10.087646484375, -9.63525390625, -9.182861328125, -8.73046875, -8.278076171875, -7.82568359375, -7.373291015625, -6.9208984375, -6.468505859375, -6.01611328125, -5.563720703125, -5.111328125, -4.658935546875, -4.20654296875, -3.754150390625, -3.3017578125, -2.849365234375, -2.39697265625, -1.944580078125, -1.4921875, -1.039794921875, -0.58740234375, -0.135009765625, 0.3173828125, 0.769775390625, 1.22216796875, 1.674560546875, 2.126953125, 2.579345703125, 3.03173828125, 3.484130859375, 3.9365234375, 4.388916015625, 4.84130859375, 5.293701171875, 5.74609375, 6.198486328125, 6.65087890625, 7.103271484375, 7.5556640625, 8.008056640625, 8.46044921875, 8.912841796875, 9.365234375, 9.817626953125, 10.27001953125, 10.722412109375, 11.1748046875, 11.627197265625, 12.07958984375, 12.531982421875, 12.984375]}, "gradients/decoder.transformer.h.2.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 3.0, 3.0, 7.0, 15.0, 14.0, 25.0, 46.0, 66.0, 113.0, 278.0, 496.0, 1119.0, 2834.0, 7918.0, 27006.0, 153036.0, 2605282.0, 1279670.0, 87738.0, 18910.0, 5883.0, 2085.0, 890.0, 408.0, 194.0, 110.0, 50.0, 29.0, 19.0, 18.0, 5.0, 7.0, 6.0, 2.0, 3.0, 4.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-46.46875, -44.85888671875, -43.2490234375, -41.63916015625, -40.029296875, -38.41943359375, -36.8095703125, -35.19970703125, -33.58984375, -31.97998046875, -30.3701171875, -28.76025390625, -27.150390625, -25.54052734375, -23.9306640625, -22.32080078125, -20.7109375, -19.10107421875, -17.4912109375, -15.88134765625, -14.271484375, -12.66162109375, -11.0517578125, -9.44189453125, -7.83203125, -6.22216796875, -4.6123046875, -3.00244140625, -1.392578125, 0.21728515625, 1.8271484375, 3.43701171875, 5.046875, 6.65673828125, 8.2666015625, 9.87646484375, 11.486328125, 13.09619140625, 14.7060546875, 16.31591796875, 17.92578125, 19.53564453125, 21.1455078125, 22.75537109375, 24.365234375, 25.97509765625, 27.5849609375, 29.19482421875, 30.8046875, 32.41455078125, 34.0244140625, 35.63427734375, 37.244140625, 38.85400390625, 40.4638671875, 42.07373046875, 43.68359375, 45.29345703125, 46.9033203125, 48.51318359375, 50.123046875, 51.73291015625, 53.3427734375, 54.95263671875, 56.5625]}, "gradients/decoder.transformer.h.2.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 4.0, 0.0, 0.0, 1.0, 2.0, 5.0, 7.0, 8.0, 9.0, 9.0, 29.0, 39.0, 56.0, 107.0, 168.0, 352.0, 660.0, 1106.0, 664.0, 360.0, 196.0, 114.0, 65.0, 38.0, 21.0, 18.0, 16.0, 8.0, 8.0, 4.0, 6.0, 3.0, 1.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-38.84375, -37.52783203125, -36.2119140625, -34.89599609375, -33.580078125, -32.26416015625, -30.9482421875, -29.63232421875, -28.31640625, -27.00048828125, -25.6845703125, -24.36865234375, -23.052734375, -21.73681640625, -20.4208984375, -19.10498046875, -17.7890625, -16.47314453125, -15.1572265625, -13.84130859375, -12.525390625, -11.20947265625, -9.8935546875, -8.57763671875, -7.26171875, -5.94580078125, -4.6298828125, -3.31396484375, -1.998046875, -0.68212890625, 0.6337890625, 1.94970703125, 3.265625, 4.58154296875, 5.8974609375, 7.21337890625, 8.529296875, 9.84521484375, 11.1611328125, 12.47705078125, 13.79296875, 15.10888671875, 16.4248046875, 17.74072265625, 19.056640625, 20.37255859375, 21.6884765625, 23.00439453125, 24.3203125, 25.63623046875, 26.9521484375, 28.26806640625, 29.583984375, 30.89990234375, 32.2158203125, 33.53173828125, 34.84765625, 36.16357421875, 37.4794921875, 38.79541015625, 40.111328125, 41.42724609375, 42.7431640625, 44.05908203125, 45.375]}, "gradients/decoder.transformer.h.2.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 5.0, 0.0, 4.0, 3.0, 8.0, 11.0, 12.0, 23.0, 31.0, 44.0, 89.0, 135.0, 200.0, 305.0, 468.0, 857.0, 1656.0, 3777.0, 11641.0, 54592.0, 548942.0, 3274337.0, 250144.0, 33023.0, 8065.0, 2759.0, 1329.0, 691.0, 416.0, 240.0, 171.0, 103.0, 83.0, 45.0, 31.0, 17.0, 8.0, 16.0, 8.0, 2.0, 1.0, 2.0, 2.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-57.125, -55.359375, -53.59375, -51.828125, -50.0625, -48.296875, -46.53125, -44.765625, -43.0, -41.234375, -39.46875, -37.703125, -35.9375, -34.171875, -32.40625, -30.640625, -28.875, -27.109375, -25.34375, -23.578125, -21.8125, -20.046875, -18.28125, -16.515625, -14.75, -12.984375, -11.21875, -9.453125, -7.6875, -5.921875, -4.15625, -2.390625, -0.625, 1.140625, 2.90625, 4.671875, 6.4375, 8.203125, 9.96875, 11.734375, 13.5, 15.265625, 17.03125, 18.796875, 20.5625, 22.328125, 24.09375, 25.859375, 27.625, 29.390625, 31.15625, 32.921875, 34.6875, 36.453125, 38.21875, 39.984375, 41.75, 43.515625, 45.28125, 47.046875, 48.8125, 50.578125, 52.34375, 54.109375, 55.875]}, "gradients/decoder.transformer.h.2.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 19.0, 170.0, 499.0, 265.0, 55.0, 6.0, 0.0, 0.0, 1.0], "bins": [-1178.1290283203125, -1157.699951171875, -1137.2708740234375, -1116.8419189453125, -1096.412841796875, -1075.9837646484375, -1055.5546875, -1035.125732421875, -1014.6966552734375, -994.267578125, -973.8385620117188, -953.4094848632812, -932.98046875, -912.5513916015625, -892.1223754882812, -871.6932983398438, -851.2642211914062, -830.8351440429688, -810.4061279296875, -789.97705078125, -769.5480346679688, -749.1189575195312, -728.68994140625, -708.2608642578125, -687.831787109375, -667.4027099609375, -646.9736938476562, -626.5446166992188, -606.1156005859375, -585.6865234375, -565.2575073242188, -544.8284301757812, -524.3994750976562, -503.9704284667969, -483.5413818359375, -463.1123352050781, -442.68328857421875, -422.25421142578125, -401.8251647949219, -381.3961181640625, -360.9670715332031, -340.53802490234375, -320.1089782714844, -299.679931640625, -279.2508544921875, -258.82183837890625, -238.39276123046875, -217.96371459960938, -197.53466796875, -177.10562133789062, -156.67657470703125, -136.2475128173828, -115.81846618652344, -95.38941955566406, -74.96036529541016, -54.53131103515625, -34.102264404296875, -13.673213958740234, 6.755836486816406, 27.184886932373047, 47.61393737792969, 68.04298400878906, 88.47203826904297, 108.90109252929688, 129.33013916015625]}, "gradients/decoder.transformer.h.2.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 4.0, 3.0, 3.0, 6.0, 6.0, 5.0, 6.0, 14.0, 13.0, 8.0, 12.0, 26.0, 17.0, 27.0, 19.0, 30.0, 44.0, 30.0, 32.0, 42.0, 42.0, 47.0, 45.0, 43.0, 49.0, 33.0, 43.0, 55.0, 32.0, 33.0, 38.0, 26.0, 30.0, 24.0, 20.0, 17.0, 12.0, 16.0, 15.0, 8.0, 11.0, 10.0, 6.0, 2.0, 5.0, 3.0, 2.0, 0.0, 2.0, 1.0, 0.0, 2.0, 1.0], "bins": [-107.2833251953125, -104.27992248535156, -101.2765121459961, -98.27310180664062, -95.26969909667969, -92.26629638671875, -89.26288604736328, -86.25947570800781, -83.25607299804688, -80.25267028808594, -77.24925994873047, -74.245849609375, -71.24244689941406, -68.23904418945312, -65.23563385009766, -62.23222732543945, -59.22882080078125, -56.22541427612305, -53.222007751464844, -50.21860122680664, -47.21519470214844, -44.211788177490234, -41.20838165283203, -38.20497512817383, -35.201568603515625, -32.19816207885742, -29.19475555419922, -26.191349029541016, -23.187942504882812, -20.18453598022461, -17.181129455566406, -14.177722930908203, -11.17431640625, -8.170909881591797, -5.167503356933594, -2.1640968322753906, 0.8393096923828125, 3.8427162170410156, 6.846122741699219, 9.849529266357422, 12.852935791015625, 15.856342315673828, 18.85974884033203, 21.863155364990234, 24.866561889648438, 27.86996841430664, 30.873374938964844, 33.87678146362305, 36.88018798828125, 39.88359451293945, 42.887001037597656, 45.89040756225586, 48.89381408691406, 51.897220611572266, 54.90062713623047, 57.90403366088867, 60.907440185546875, 63.91084671020508, 66.91425323486328, 69.91766357421875, 72.92106628417969, 75.92446899414062, 78.9278793334961, 81.93128967285156, 84.9346923828125]}, "gradients/decoder.transformer.h.2.crossattention.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 4.0, 0.0, 6.0, 3.0, 5.0, 5.0, 5.0, 6.0, 7.0, 10.0, 17.0, 28.0, 22.0, 28.0, 25.0, 31.0, 27.0, 48.0, 37.0, 48.0, 40.0, 39.0, 43.0, 57.0, 60.0, 64.0, 36.0, 39.0, 34.0, 32.0, 40.0, 27.0, 28.0, 23.0, 21.0, 12.0, 10.0, 9.0, 9.0, 7.0, 7.0, 7.0, 5.0, 4.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-17.96875, -17.441162109375, -16.91357421875, -16.385986328125, -15.8583984375, -15.330810546875, -14.80322265625, -14.275634765625, -13.748046875, -13.220458984375, -12.69287109375, -12.165283203125, -11.6376953125, -11.110107421875, -10.58251953125, -10.054931640625, -9.52734375, -8.999755859375, -8.47216796875, -7.944580078125, -7.4169921875, -6.889404296875, -6.36181640625, -5.834228515625, -5.306640625, -4.779052734375, -4.25146484375, -3.723876953125, -3.1962890625, -2.668701171875, -2.14111328125, -1.613525390625, -1.0859375, -0.558349609375, -0.03076171875, 0.496826171875, 1.0244140625, 1.552001953125, 2.07958984375, 2.607177734375, 3.134765625, 3.662353515625, 4.18994140625, 4.717529296875, 5.2451171875, 5.772705078125, 6.30029296875, 6.827880859375, 7.35546875, 7.883056640625, 8.41064453125, 8.938232421875, 9.4658203125, 9.993408203125, 10.52099609375, 11.048583984375, 11.576171875, 12.103759765625, 12.63134765625, 13.158935546875, 13.6865234375, 14.214111328125, 14.74169921875, 15.269287109375, 15.796875]}, "gradients/decoder.transformer.h.2.crossattention.c_proj.weight": {"_type": "histogram", "values": [4.0, 4.0, 2.0, 2.0, 3.0, 9.0, 17.0, 7.0, 12.0, 18.0, 49.0, 50.0, 70.0, 107.0, 177.0, 226.0, 332.0, 473.0, 690.0, 1110.0, 1650.0, 2599.0, 3842.0, 5886.0, 9412.0, 15028.0, 23334.0, 38148.0, 65732.0, 129366.0, 356313.0, 184817.0, 83933.0, 46903.0, 28570.0, 17778.0, 11288.0, 7033.0, 4573.0, 3059.0, 2006.0, 1294.0, 800.0, 581.0, 364.0, 287.0, 194.0, 128.0, 92.0, 65.0, 32.0, 19.0, 35.0, 11.0, 13.0, 6.0, 6.0, 4.0, 9.0, 2.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.61669921875, -0.5966110229492188, -0.5765228271484375, -0.5564346313476562, -0.536346435546875, -0.5162582397460938, -0.4961700439453125, -0.47608184814453125, -0.45599365234375, -0.43590545654296875, -0.4158172607421875, -0.39572906494140625, -0.375640869140625, -0.35555267333984375, -0.3354644775390625, -0.31537628173828125, -0.2952880859375, -0.27519989013671875, -0.2551116943359375, -0.23502349853515625, -0.214935302734375, -0.19484710693359375, -0.1747589111328125, -0.15467071533203125, -0.13458251953125, -0.11449432373046875, -0.0944061279296875, -0.07431793212890625, -0.054229736328125, -0.03414154052734375, -0.0140533447265625, 0.00603485107421875, 0.026123046875, 0.04621124267578125, 0.0662994384765625, 0.08638763427734375, 0.106475830078125, 0.12656402587890625, 0.1466522216796875, 0.16674041748046875, 0.18682861328125, 0.20691680908203125, 0.2270050048828125, 0.24709320068359375, 0.267181396484375, 0.28726959228515625, 0.3073577880859375, 0.32744598388671875, 0.3475341796875, 0.36762237548828125, 0.3877105712890625, 0.40779876708984375, 0.427886962890625, 0.44797515869140625, 0.4680633544921875, 0.48815155029296875, 0.50823974609375, 0.5283279418945312, 0.5484161376953125, 0.5685043334960938, 0.588592529296875, 0.6086807250976562, 0.6287689208984375, 0.6488571166992188, 0.6689453125]}, "gradients/decoder.transformer.h.2.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 4.0, 5.0, 6.0, 4.0, 9.0, 6.0, 10.0, 5.0, 14.0, 17.0, 16.0, 25.0, 23.0, 26.0, 31.0, 31.0, 28.0, 31.0, 28.0, 33.0, 48.0, 53.0, 58.0, 1075.0, 45.0, 43.0, 45.0, 37.0, 30.0, 36.0, 22.0, 29.0, 28.0, 28.0, 17.0, 18.0, 12.0, 12.0, 8.0, 9.0, 6.0, 5.0, 9.0, 1.0, 4.0, 2.0, 2.0, 1.0, 2.0, 0.0, 2.0, 3.0, 1.0], "bins": [-12.3203125, -11.9600830078125, -11.599853515625, -11.2396240234375, -10.87939453125, -10.5191650390625, -10.158935546875, -9.7987060546875, -9.4384765625, -9.0782470703125, -8.718017578125, -8.3577880859375, -7.99755859375, -7.6373291015625, -7.277099609375, -6.9168701171875, -6.556640625, -6.1964111328125, -5.836181640625, -5.4759521484375, -5.11572265625, -4.7554931640625, -4.395263671875, -4.0350341796875, -3.6748046875, -3.3145751953125, -2.954345703125, -2.5941162109375, -2.23388671875, -1.8736572265625, -1.513427734375, -1.1531982421875, -0.79296875, -0.4327392578125, -0.072509765625, 0.2877197265625, 0.64794921875, 1.0081787109375, 1.368408203125, 1.7286376953125, 2.0888671875, 2.4490966796875, 2.809326171875, 3.1695556640625, 3.52978515625, 3.8900146484375, 4.250244140625, 4.6104736328125, 4.970703125, 5.3309326171875, 5.691162109375, 6.0513916015625, 6.41162109375, 6.7718505859375, 7.132080078125, 7.4923095703125, 7.8525390625, 8.2127685546875, 8.572998046875, 8.9332275390625, 9.29345703125, 9.6536865234375, 10.013916015625, 10.3741455078125, 10.734375]}, "gradients/decoder.transformer.h.2.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 1.0, 5.0, 4.0, 7.0, 13.0, 30.0, 28.0, 47.0, 53.0, 86.0, 120.0, 198.0, 234.0, 301.0, 453.0, 630.0, 878.0, 1266.0, 1774.0, 2494.0, 3577.0, 5350.0, 7749.0, 11309.0, 16893.0, 25467.0, 39165.0, 61650.0, 105991.0, 1336356.0, 213287.0, 94951.0, 56847.0, 36521.0, 23610.0, 15631.0, 10635.0, 7120.0, 4886.0, 3396.0, 2336.0, 1702.0, 1144.0, 852.0, 596.0, 464.0, 305.0, 216.0, 166.0, 108.0, 72.0, 39.0, 38.0, 35.0, 24.0, 18.0, 8.0, 5.0, 4.0, 1.0, 3.0], "bins": [-0.29736328125, -0.2883262634277344, -0.27928924560546875, -0.2702522277832031, -0.2612152099609375, -0.2521781921386719, -0.24314117431640625, -0.23410415649414062, -0.225067138671875, -0.21603012084960938, -0.20699310302734375, -0.19795608520507812, -0.1889190673828125, -0.17988204956054688, -0.17084503173828125, -0.16180801391601562, -0.15277099609375, -0.14373397827148438, -0.13469696044921875, -0.12565994262695312, -0.1166229248046875, -0.10758590698242188, -0.09854888916015625, -0.08951187133789062, -0.080474853515625, -0.07143783569335938, -0.06240081787109375, -0.053363800048828125, -0.0443267822265625, -0.035289764404296875, -0.02625274658203125, -0.017215728759765625, -0.0081787109375, 0.000858306884765625, 0.00989532470703125, 0.018932342529296875, 0.0279693603515625, 0.037006378173828125, 0.04604339599609375, 0.055080413818359375, 0.064117431640625, 0.07315444946289062, 0.08219146728515625, 0.09122848510742188, 0.1002655029296875, 0.10930252075195312, 0.11833953857421875, 0.12737655639648438, 0.13641357421875, 0.14545059204101562, 0.15448760986328125, 0.16352462768554688, 0.1725616455078125, 0.18159866333007812, 0.19063568115234375, 0.19967269897460938, 0.208709716796875, 0.21774673461914062, 0.22678375244140625, 0.23582077026367188, 0.2448577880859375, 0.2538948059082031, 0.26293182373046875, 0.2719688415527344, 0.281005859375]}, "gradients/decoder.transformer.h.2.crossattention.q_attn.bias": {"_type": "histogram", "values": [2.0, 2.0, 0.0, 0.0, 2.0, 2.0, 0.0, 3.0, 5.0, 3.0, 3.0, 3.0, 4.0, 6.0, 6.0, 8.0, 14.0, 11.0, 20.0, 16.0, 18.0, 22.0, 37.0, 34.0, 46.0, 41.0, 65.0, 96.0, 102.0, 87.0, 60.0, 57.0, 43.0, 39.0, 30.0, 24.0, 24.0, 17.0, 12.0, 6.0, 7.0, 11.0, 4.0, 5.0, 2.0, 5.0, 4.0, 3.0, 3.0, 1.0, 2.0, 3.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.3172626495361328e-05, -1.2709759175777435e-05, -1.2246891856193542e-05, -1.178402453660965e-05, -1.1321157217025757e-05, -1.0858289897441864e-05, -1.0395422577857971e-05, -9.932555258274078e-06, -9.469687938690186e-06, -9.006820619106293e-06, -8.5439532995224e-06, -8.081085979938507e-06, -7.618218660354614e-06, -7.1553513407707214e-06, -6.692484021186829e-06, -6.229616701602936e-06, -5.766749382019043e-06, -5.30388206243515e-06, -4.841014742851257e-06, -4.3781474232673645e-06, -3.915280103683472e-06, -3.452412784099579e-06, -2.989545464515686e-06, -2.5266781449317932e-06, -2.0638108253479004e-06, -1.6009435057640076e-06, -1.1380761861801147e-06, -6.752088665962219e-07, -2.123415470123291e-07, 2.505257725715637e-07, 7.133930921554565e-07, 1.1762604117393494e-06, 1.6391277313232422e-06, 2.101995050907135e-06, 2.564862370491028e-06, 3.0277296900749207e-06, 3.4905970096588135e-06, 3.953464329242706e-06, 4.416331648826599e-06, 4.879198968410492e-06, 5.342066287994385e-06, 5.804933607578278e-06, 6.26780092716217e-06, 6.730668246746063e-06, 7.193535566329956e-06, 7.656402885913849e-06, 8.119270205497742e-06, 8.582137525081635e-06, 9.045004844665527e-06, 9.50787216424942e-06, 9.970739483833313e-06, 1.0433606803417206e-05, 1.0896474123001099e-05, 1.1359341442584991e-05, 1.1822208762168884e-05, 1.2285076081752777e-05, 1.274794340133667e-05, 1.3210810720920563e-05, 1.3673678040504456e-05, 1.4136545360088348e-05, 1.4599412679672241e-05, 1.5062279999256134e-05, 1.5525147318840027e-05, 1.598801463842392e-05, 1.6450881958007812e-05]}, "gradients/decoder.transformer.h.2.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 5.0, 3.0, 3.0, 6.0, 6.0, 11.0, 7.0, 9.0, 6.0, 11.0, 17.0, 21.0, 22.0, 32.0, 51.0, 72.0, 81.0, 110.0, 208.0, 415.0, 1881.0, 28399.0, 918505.0, 93462.0, 3799.0, 601.0, 259.0, 154.0, 87.0, 61.0, 46.0, 48.0, 29.0, 26.0, 20.0, 19.0, 9.0, 15.0, 10.0, 7.0, 10.0, 5.0, 4.0, 3.0, 1.0, 4.0, 2.0, 2.0, 2.0, 1.0, 2.0], "bins": [-0.00025653839111328125, -0.0002493448555469513, -0.00024215131998062134, -0.00023495778441429138, -0.00022776424884796143, -0.00022057071328163147, -0.00021337717771530151, -0.00020618364214897156, -0.0001989901065826416, -0.00019179657101631165, -0.0001846030354499817, -0.00017740949988365173, -0.00017021596431732178, -0.00016302242875099182, -0.00015582889318466187, -0.0001486353576183319, -0.00014144182205200195, -0.000134248286485672, -0.00012705475091934204, -0.00011986121535301208, -0.00011266767978668213, -0.00010547414422035217, -9.828060865402222e-05, -9.108707308769226e-05, -8.38935375213623e-05, -7.670000195503235e-05, -6.950646638870239e-05, -6.231293082237244e-05, -5.511939525604248e-05, -4.7925859689712524e-05, -4.073232412338257e-05, -3.353878855705261e-05, -2.6345252990722656e-05, -1.91517174243927e-05, -1.1958181858062744e-05, -4.764646291732788e-06, 2.428889274597168e-06, 9.622424840927124e-06, 1.681596040725708e-05, 2.4009495973587036e-05, 3.120303153991699e-05, 3.839656710624695e-05, 4.5590102672576904e-05, 5.278363823890686e-05, 5.9977173805236816e-05, 6.717070937156677e-05, 7.436424493789673e-05, 8.155778050422668e-05, 8.875131607055664e-05, 9.59448516368866e-05, 0.00010313838720321655, 0.00011033192276954651, 0.00011752545833587646, 0.00012471899390220642, 0.00013191252946853638, 0.00013910606503486633, 0.0001462996006011963, 0.00015349313616752625, 0.0001606866717338562, 0.00016788020730018616, 0.0001750737428665161, 0.00018226727843284607, 0.00018946081399917603, 0.00019665434956550598, 0.00020384788513183594]}, "gradients/decoder.transformer.h.2.ln_cross_attn.weight": {"_type": "histogram", "values": [6.0, 12.0, 54.0, 176.0, 366.0, 261.0, 98.0, 32.0, 9.0, 2.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.2814425544056576e-06, -4.1670032260299195e-06, -3.0525638976541813e-06, -1.9381245692784432e-06, -8.236852409027051e-07, 2.90754087473033e-07, 1.4051934158487711e-06, 2.5196327442245092e-06, 3.6340720726002473e-06, 4.7485114009759855e-06, 5.8629507293517236e-06, 6.977390057727462e-06, 8.091828931355849e-06, 9.206269169226289e-06, 1.0320707588107325e-05, 1.1435147825977765e-05, 1.2549586244858801e-05, 1.366402557323454e-05, 1.4778464901610278e-05, 1.5892903320491314e-05, 1.7007343558361754e-05, 1.8121783796232194e-05, 1.923622221511323e-05, 2.0350660633994266e-05, 2.1465100871864706e-05, 2.2579541109735146e-05, 2.3693979528616183e-05, 2.480841794749722e-05, 2.592285818536766e-05, 2.70372984232381e-05, 2.8151736842119135e-05, 2.926617526100017e-05, 3.038061549887061e-05, 3.149505573674105e-05, 3.260949597461149e-05, 3.3723932574503124e-05, 3.4838372812373564e-05, 3.5952813050244004e-05, 3.7067249650135636e-05, 3.8181689888006076e-05, 3.9296130125876516e-05, 4.0410570363746956e-05, 4.1525010601617396e-05, 4.263944720150903e-05, 4.375388743937947e-05, 4.486832767724991e-05, 4.598276427714154e-05, 4.709720451501198e-05, 4.821164475288242e-05, 4.932608499075286e-05, 5.04405252286233e-05, 5.1554961828514934e-05, 5.2669402066385373e-05, 5.378384230425581e-05, 5.4898278904147446e-05, 5.6012719142017886e-05, 5.7127159379888326e-05, 5.8241599617758766e-05, 5.9356039855629206e-05, 6.047047645552084e-05, 6.158491305541247e-05, 6.269935693126172e-05, 6.381379353115335e-05, 6.49282374070026e-05, 6.604267400689423e-05]}, "gradients/decoder.transformer.h.2.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 3.0, 2.0, 1.0, 6.0, 2.0, 5.0, 7.0, 7.0, 5.0, 10.0, 10.0, 21.0, 20.0, 21.0, 33.0, 19.0, 38.0, 36.0, 55.0, 39.0, 34.0, 56.0, 44.0, 50.0, 34.0, 42.0, 40.0, 41.0, 36.0, 39.0, 27.0, 26.0, 33.0, 22.0, 36.0, 19.0, 11.0, 8.0, 19.0, 8.0, 15.0, 6.0, 8.0, 4.0, 6.0, 4.0, 3.0, 2.0, 1.0, 1.0, 2.0, 0.0, 1.0], "bins": [-7.867813110351562e-06, -7.640570402145386e-06, -7.413327693939209e-06, -7.186084985733032e-06, -6.9588422775268555e-06, -6.731599569320679e-06, -6.504356861114502e-06, -6.277114152908325e-06, -6.0498714447021484e-06, -5.822628736495972e-06, -5.595386028289795e-06, -5.368143320083618e-06, -5.140900611877441e-06, -4.913657903671265e-06, -4.686415195465088e-06, -4.459172487258911e-06, -4.231929779052734e-06, -4.004687070846558e-06, -3.777444362640381e-06, -3.550201654434204e-06, -3.3229589462280273e-06, -3.0957162380218506e-06, -2.868473529815674e-06, -2.641230821609497e-06, -2.4139881134033203e-06, -2.1867454051971436e-06, -1.959502696990967e-06, -1.73225998878479e-06, -1.5050172805786133e-06, -1.2777745723724365e-06, -1.0505318641662598e-06, -8.23289155960083e-07, -5.960464477539062e-07, -3.688037395477295e-07, -1.4156103134155273e-07, 8.568167686462402e-08, 3.129243850708008e-07, 5.401670932769775e-07, 7.674098014831543e-07, 9.94652509689331e-07, 1.2218952178955078e-06, 1.4491379261016846e-06, 1.6763806343078613e-06, 1.903623342514038e-06, 2.130866050720215e-06, 2.3581087589263916e-06, 2.5853514671325684e-06, 2.812594175338745e-06, 3.039836883544922e-06, 3.2670795917510986e-06, 3.4943222999572754e-06, 3.721565008163452e-06, 3.948807716369629e-06, 4.176050424575806e-06, 4.403293132781982e-06, 4.630535840988159e-06, 4.857778549194336e-06, 5.085021257400513e-06, 5.3122639656066895e-06, 5.539506673812866e-06, 5.766749382019043e-06, 5.99399209022522e-06, 6.2212347984313965e-06, 6.448477506637573e-06, 6.67572021484375e-06]}, "gradients/decoder.transformer.h.2.attn.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 4.0, 0.0, 6.0, 3.0, 5.0, 5.0, 5.0, 6.0, 7.0, 10.0, 17.0, 28.0, 22.0, 28.0, 25.0, 31.0, 27.0, 48.0, 37.0, 48.0, 40.0, 39.0, 43.0, 57.0, 60.0, 64.0, 36.0, 39.0, 34.0, 32.0, 40.0, 27.0, 28.0, 23.0, 21.0, 12.0, 10.0, 9.0, 9.0, 7.0, 7.0, 7.0, 5.0, 4.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-17.96875, -17.441162109375, -16.91357421875, -16.385986328125, -15.8583984375, -15.330810546875, -14.80322265625, -14.275634765625, -13.748046875, -13.220458984375, -12.69287109375, -12.165283203125, -11.6376953125, -11.110107421875, -10.58251953125, -10.054931640625, -9.52734375, -8.999755859375, -8.47216796875, -7.944580078125, -7.4169921875, -6.889404296875, -6.36181640625, -5.834228515625, -5.306640625, -4.779052734375, -4.25146484375, -3.723876953125, -3.1962890625, -2.668701171875, -2.14111328125, -1.613525390625, -1.0859375, -0.558349609375, -0.03076171875, 0.496826171875, 1.0244140625, 1.552001953125, 2.07958984375, 2.607177734375, 3.134765625, 3.662353515625, 4.18994140625, 4.717529296875, 5.2451171875, 5.772705078125, 6.30029296875, 6.827880859375, 7.35546875, 7.883056640625, 8.41064453125, 8.938232421875, 9.4658203125, 9.993408203125, 10.52099609375, 11.048583984375, 11.576171875, 12.103759765625, 12.63134765625, 13.158935546875, 13.6865234375, 14.214111328125, 14.74169921875, 15.269287109375, 15.796875]}, "gradients/decoder.transformer.h.2.attn.c_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 2.0, 2.0, 3.0, 1.0, 3.0, 3.0, 13.0, 9.0, 15.0, 25.0, 27.0, 31.0, 52.0, 81.0, 147.0, 197.0, 337.0, 526.0, 979.0, 1764.0, 3500.0, 7415.0, 16635.0, 40682.0, 101004.0, 266532.0, 370835.0, 140232.0, 55265.0, 22823.0, 9691.0, 4574.0, 2259.0, 1167.0, 674.0, 388.0, 230.0, 174.0, 72.0, 59.0, 46.0, 24.0, 19.0, 16.0, 14.0, 8.0, 5.0, 5.0, 3.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0], "bins": [-24.921875, -24.067626953125, -23.21337890625, -22.359130859375, -21.5048828125, -20.650634765625, -19.79638671875, -18.942138671875, -18.087890625, -17.233642578125, -16.37939453125, -15.525146484375, -14.6708984375, -13.816650390625, -12.96240234375, -12.108154296875, -11.25390625, -10.399658203125, -9.54541015625, -8.691162109375, -7.8369140625, -6.982666015625, -6.12841796875, -5.274169921875, -4.419921875, -3.565673828125, -2.71142578125, -1.857177734375, -1.0029296875, -0.148681640625, 0.70556640625, 1.559814453125, 2.4140625, 3.268310546875, 4.12255859375, 4.976806640625, 5.8310546875, 6.685302734375, 7.53955078125, 8.393798828125, 9.248046875, 10.102294921875, 10.95654296875, 11.810791015625, 12.6650390625, 13.519287109375, 14.37353515625, 15.227783203125, 16.08203125, 16.936279296875, 17.79052734375, 18.644775390625, 19.4990234375, 20.353271484375, 21.20751953125, 22.061767578125, 22.916015625, 23.770263671875, 24.62451171875, 25.478759765625, 26.3330078125, 27.187255859375, 28.04150390625, 28.895751953125, 29.75]}, "gradients/decoder.transformer.h.2.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 2.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 6.0, 4.0, 10.0, 9.0, 15.0, 6.0, 19.0, 12.0, 31.0, 28.0, 42.0, 58.0, 95.0, 108.0, 161.0, 235.0, 1461.0, 226.0, 150.0, 91.0, 66.0, 45.0, 42.0, 34.0, 18.0, 19.0, 14.0, 15.0, 5.0, 8.0, 8.0, 3.0, 2.0, 5.0, 1.0, 2.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 1.0], "bins": [-56.96875, -54.95751953125, -52.9462890625, -50.93505859375, -48.923828125, -46.91259765625, -44.9013671875, -42.89013671875, -40.87890625, -38.86767578125, -36.8564453125, -34.84521484375, -32.833984375, -30.82275390625, -28.8115234375, -26.80029296875, -24.7890625, -22.77783203125, -20.7666015625, -18.75537109375, -16.744140625, -14.73291015625, -12.7216796875, -10.71044921875, -8.69921875, -6.68798828125, -4.6767578125, -2.66552734375, -0.654296875, 1.35693359375, 3.3681640625, 5.37939453125, 7.390625, 9.40185546875, 11.4130859375, 13.42431640625, 15.435546875, 17.44677734375, 19.4580078125, 21.46923828125, 23.48046875, 25.49169921875, 27.5029296875, 29.51416015625, 31.525390625, 33.53662109375, 35.5478515625, 37.55908203125, 39.5703125, 41.58154296875, 43.5927734375, 45.60400390625, 47.615234375, 49.62646484375, 51.6376953125, 53.64892578125, 55.66015625, 57.67138671875, 59.6826171875, 61.69384765625, 63.705078125, 65.71630859375, 67.7275390625, 69.73876953125, 71.75]}, "gradients/decoder.transformer.h.2.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 2.0, 2.0, 5.0, 0.0, 4.0, 3.0, 2.0, 2.0, 9.0, 14.0, 15.0, 32.0, 34.0, 39.0, 60.0, 96.0, 162.0, 297.0, 620.0, 1637.0, 15152.0, 3034155.0, 88934.0, 2695.0, 775.0, 389.0, 198.0, 132.0, 71.0, 57.0, 28.0, 27.0, 16.0, 16.0, 9.0, 7.0, 8.0, 5.0, 4.0, 0.0, 0.0, 2.0, 2.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 2.0], "bins": [-169.625, -164.732421875, -159.83984375, -154.947265625, -150.0546875, -145.162109375, -140.26953125, -135.376953125, -130.484375, -125.591796875, -120.69921875, -115.806640625, -110.9140625, -106.021484375, -101.12890625, -96.236328125, -91.34375, -86.451171875, -81.55859375, -76.666015625, -71.7734375, -66.880859375, -61.98828125, -57.095703125, -52.203125, -47.310546875, -42.41796875, -37.525390625, -32.6328125, -27.740234375, -22.84765625, -17.955078125, -13.0625, -8.169921875, -3.27734375, 1.615234375, 6.5078125, 11.400390625, 16.29296875, 21.185546875, 26.078125, 30.970703125, 35.86328125, 40.755859375, 45.6484375, 50.541015625, 55.43359375, 60.326171875, 65.21875, 70.111328125, 75.00390625, 79.896484375, 84.7890625, 89.681640625, 94.57421875, 99.466796875, 104.359375, 109.251953125, 114.14453125, 119.037109375, 123.9296875, 128.822265625, 133.71484375, 138.607421875, 143.5]}, "gradients/decoder.transformer.h.2.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 3.0, 42.0, 414.0, 491.0, 60.0, 5.0, 1.0, 3.0], "bins": [-2536.422119140625, -2493.509765625, -2450.59716796875, -2407.684814453125, -2364.7724609375, -2321.85986328125, -2278.947509765625, -2236.03515625, -2193.12255859375, -2150.210205078125, -2107.297607421875, -2064.38525390625, -2021.472900390625, -1978.5604248046875, -1935.64794921875, -1892.735595703125, -1849.8232421875, -1806.9107666015625, -1763.9984130859375, -1721.0859375, -1678.173583984375, -1635.2611083984375, -1592.3486328125, -1549.436279296875, -1506.5238037109375, -1463.611328125, -1420.698974609375, -1377.7864990234375, -1334.8740234375, -1291.961669921875, -1249.0491943359375, -1206.13671875, -1163.224365234375, -1120.3118896484375, -1077.3995361328125, -1034.487060546875, -991.5746459960938, -948.6622314453125, -905.749755859375, -862.8373413085938, -819.9248657226562, -777.012451171875, -734.0999755859375, -691.1875610351562, -648.275146484375, -605.3627319335938, -562.4503173828125, -519.537841796875, -476.62542724609375, -433.7130126953125, -390.8005676269531, -347.88812255859375, -304.9757080078125, -262.06329345703125, -219.15084838867188, -176.2384033203125, -133.3260040283203, -90.41357421875, -47.50114440917969, -4.588714599609375, 38.32371520996094, 81.23614501953125, 124.14857482910156, 167.06101989746094, 209.9734344482422]}, "gradients/decoder.transformer.h.2.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 3.0, 2.0, 1.0, 3.0, 5.0, 5.0, 2.0, 8.0, 5.0, 8.0, 15.0, 11.0, 12.0, 22.0, 20.0, 25.0, 29.0, 24.0, 29.0, 34.0, 41.0, 56.0, 43.0, 32.0, 45.0, 54.0, 45.0, 38.0, 47.0, 41.0, 47.0, 32.0, 28.0, 26.0, 37.0, 25.0, 26.0, 15.0, 7.0, 17.0, 13.0, 12.0, 4.0, 5.0, 6.0, 7.0, 2.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-271.48138427734375, -263.4753112792969, -255.46923828125, -247.46315002441406, -239.4570770263672, -231.4510040283203, -223.44491577148438, -215.4388427734375, -207.43276977539062, -199.42669677734375, -191.42062377929688, -183.41453552246094, -175.40846252441406, -167.4023895263672, -159.39630126953125, -151.39022827148438, -143.3841552734375, -135.37808227539062, -127.37200164794922, -119.36592102050781, -111.35984802246094, -103.35377502441406, -95.34769439697266, -87.34161376953125, -79.33554077148438, -71.3294677734375, -63.323387145996094, -55.31731033325195, -47.31123352050781, -39.30515670776367, -31.29907989501953, -23.29300308227539, -15.286956787109375, -7.280879974365234, 0.7251968383789062, 8.731273651123047, 16.737350463867188, 24.743427276611328, 32.74950408935547, 40.75558090209961, 48.76165771484375, 56.76773452758789, 64.77381134033203, 72.77989196777344, 80.78596496582031, 88.79203796386719, 96.7981185913086, 104.80419921875, 112.81027221679688, 120.81634521484375, 128.82241821289062, 136.82850646972656, 144.83457946777344, 152.8406524658203, 160.84674072265625, 168.85281372070312, 176.85888671875, 184.86495971679688, 192.87103271484375, 200.8771209716797, 208.88319396972656, 216.88926696777344, 224.89535522460938, 232.90142822265625, 240.90750122070312]}, "gradients/decoder.transformer.h.1.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 3.0, 2.0, 3.0, 6.0, 5.0, 5.0, 6.0, 2.0, 12.0, 12.0, 13.0, 10.0, 17.0, 20.0, 15.0, 27.0, 23.0, 30.0, 26.0, 33.0, 45.0, 36.0, 52.0, 45.0, 42.0, 48.0, 52.0, 35.0, 46.0, 45.0, 40.0, 29.0, 22.0, 30.0, 27.0, 20.0, 22.0, 23.0, 14.0, 12.0, 11.0, 7.0, 8.0, 8.0, 7.0, 3.0, 3.0, 5.0, 8.0, 0.0, 0.0, 0.0, 1.0, 2.0], "bins": [-18.46875, -17.944580078125, -17.42041015625, -16.896240234375, -16.3720703125, -15.847900390625, -15.32373046875, -14.799560546875, -14.275390625, -13.751220703125, -13.22705078125, -12.702880859375, -12.1787109375, -11.654541015625, -11.13037109375, -10.606201171875, -10.08203125, -9.557861328125, -9.03369140625, -8.509521484375, -7.9853515625, -7.461181640625, -6.93701171875, -6.412841796875, -5.888671875, -5.364501953125, -4.84033203125, -4.316162109375, -3.7919921875, -3.267822265625, -2.74365234375, -2.219482421875, -1.6953125, -1.171142578125, -0.64697265625, -0.122802734375, 0.4013671875, 0.925537109375, 1.44970703125, 1.973876953125, 2.498046875, 3.022216796875, 3.54638671875, 4.070556640625, 4.5947265625, 5.118896484375, 5.64306640625, 6.167236328125, 6.69140625, 7.215576171875, 7.73974609375, 8.263916015625, 8.7880859375, 9.312255859375, 9.83642578125, 10.360595703125, 10.884765625, 11.408935546875, 11.93310546875, 12.457275390625, 12.9814453125, 13.505615234375, 14.02978515625, 14.553955078125, 15.078125]}, "gradients/decoder.transformer.h.1.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 2.0, 4.0, 2.0, 3.0, 8.0, 13.0, 23.0, 28.0, 39.0, 74.0, 136.0, 176.0, 300.0, 519.0, 869.0, 1459.0, 2568.0, 4705.0, 9859.0, 22982.0, 69291.0, 550395.0, 2824701.0, 587587.0, 74404.0, 23437.0, 9754.0, 4835.0, 2593.0, 1434.0, 842.0, 485.0, 300.0, 191.0, 117.0, 64.0, 29.0, 26.0, 15.0, 12.0, 1.0, 6.0, 2.0, 1.0, 1.0, 1.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0], "bins": [-40.78125, -39.3994140625, -38.017578125, -36.6357421875, -35.25390625, -33.8720703125, -32.490234375, -31.1083984375, -29.7265625, -28.3447265625, -26.962890625, -25.5810546875, -24.19921875, -22.8173828125, -21.435546875, -20.0537109375, -18.671875, -17.2900390625, -15.908203125, -14.5263671875, -13.14453125, -11.7626953125, -10.380859375, -8.9990234375, -7.6171875, -6.2353515625, -4.853515625, -3.4716796875, -2.08984375, -0.7080078125, 0.673828125, 2.0556640625, 3.4375, 4.8193359375, 6.201171875, 7.5830078125, 8.96484375, 10.3466796875, 11.728515625, 13.1103515625, 14.4921875, 15.8740234375, 17.255859375, 18.6376953125, 20.01953125, 21.4013671875, 22.783203125, 24.1650390625, 25.546875, 26.9287109375, 28.310546875, 29.6923828125, 31.07421875, 32.4560546875, 33.837890625, 35.2197265625, 36.6015625, 37.9833984375, 39.365234375, 40.7470703125, 42.12890625, 43.5107421875, 44.892578125, 46.2744140625, 47.65625]}, "gradients/decoder.transformer.h.1.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 4.0, 2.0, 5.0, 8.0, 16.0, 28.0, 35.0, 82.0, 183.0, 372.0, 922.0, 1217.0, 608.0, 267.0, 134.0, 88.0, 39.0, 25.0, 18.0, 10.0, 9.0, 2.0, 3.0, 1.0, 1.0, 4.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-63.9375, -62.1240234375, -60.310546875, -58.4970703125, -56.68359375, -54.8701171875, -53.056640625, -51.2431640625, -49.4296875, -47.6162109375, -45.802734375, -43.9892578125, -42.17578125, -40.3623046875, -38.548828125, -36.7353515625, -34.921875, -33.1083984375, -31.294921875, -29.4814453125, -27.66796875, -25.8544921875, -24.041015625, -22.2275390625, -20.4140625, -18.6005859375, -16.787109375, -14.9736328125, -13.16015625, -11.3466796875, -9.533203125, -7.7197265625, -5.90625, -4.0927734375, -2.279296875, -0.4658203125, 1.34765625, 3.1611328125, 4.974609375, 6.7880859375, 8.6015625, 10.4150390625, 12.228515625, 14.0419921875, 15.85546875, 17.6689453125, 19.482421875, 21.2958984375, 23.109375, 24.9228515625, 26.736328125, 28.5498046875, 30.36328125, 32.1767578125, 33.990234375, 35.8037109375, 37.6171875, 39.4306640625, 41.244140625, 43.0576171875, 44.87109375, 46.6845703125, 48.498046875, 50.3115234375, 52.125]}, "gradients/decoder.transformer.h.1.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 1.0, 2.0, 3.0, 1.0, 2.0, 8.0, 6.0, 10.0, 22.0, 25.0, 47.0, 75.0, 141.0, 325.0, 736.0, 2235.0, 9685.0, 157809.0, 3906890.0, 105318.0, 7918.0, 1855.0, 641.0, 263.0, 121.0, 59.0, 30.0, 22.0, 14.0, 9.0, 3.0, 5.0, 3.0, 2.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-108.5, -105.0498046875, -101.599609375, -98.1494140625, -94.69921875, -91.2490234375, -87.798828125, -84.3486328125, -80.8984375, -77.4482421875, -73.998046875, -70.5478515625, -67.09765625, -63.6474609375, -60.197265625, -56.7470703125, -53.296875, -49.8466796875, -46.396484375, -42.9462890625, -39.49609375, -36.0458984375, -32.595703125, -29.1455078125, -25.6953125, -22.2451171875, -18.794921875, -15.3447265625, -11.89453125, -8.4443359375, -4.994140625, -1.5439453125, 1.90625, 5.3564453125, 8.806640625, 12.2568359375, 15.70703125, 19.1572265625, 22.607421875, 26.0576171875, 29.5078125, 32.9580078125, 36.408203125, 39.8583984375, 43.30859375, 46.7587890625, 50.208984375, 53.6591796875, 57.109375, 60.5595703125, 64.009765625, 67.4599609375, 70.91015625, 74.3603515625, 77.810546875, 81.2607421875, 84.7109375, 88.1611328125, 91.611328125, 95.0615234375, 98.51171875, 101.9619140625, 105.412109375, 108.8623046875, 112.3125]}, "gradients/decoder.transformer.h.1.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 3.0, 4.0, 23.0, 49.0, 106.0, 201.0, 227.0, 211.0, 97.0, 54.0, 21.0, 9.0, 3.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-343.3803405761719, -333.4012756347656, -323.42218017578125, -313.443115234375, -303.46405029296875, -293.4849853515625, -283.5058898925781, -273.5268249511719, -263.5477294921875, -253.5686492919922, -243.58958435058594, -233.61050415039062, -223.63143920898438, -213.65235900878906, -203.67327880859375, -193.6942138671875, -183.71514892578125, -173.73606872558594, -163.7570037841797, -153.77792358398438, -143.79885864257812, -133.8197784423828, -123.8406982421875, -113.86162567138672, -103.88255310058594, -93.90348052978516, -83.92440795898438, -73.94532775878906, -63.96625518798828, -53.9871826171875, -44.00810623168945, -34.029029846191406, -24.049957275390625, -14.070882797241211, -4.091808319091797, 5.887266159057617, 15.866340637207031, 25.845413208007812, 35.82448959350586, 45.803565979003906, 55.78263854980469, 65.76171112060547, 75.74078369140625, 85.71986389160156, 95.69893646240234, 105.67800903320312, 115.65708923339844, 125.63616180419922, 135.615234375, 145.5943145751953, 155.57337951660156, 165.55245971679688, 175.53152465820312, 185.51060485839844, 195.48968505859375, 205.46875, 215.4478302001953, 225.42691040039062, 235.40597534179688, 245.3850555419922, 255.3641357421875, 265.34320068359375, 275.322265625, 285.3013610839844, 295.2804260253906]}, "gradients/decoder.transformer.h.1.ln_2.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 3.0, 2.0, 2.0, 3.0, 2.0, 4.0, 3.0, 8.0, 11.0, 10.0, 8.0, 18.0, 24.0, 28.0, 23.0, 29.0, 34.0, 30.0, 31.0, 37.0, 53.0, 63.0, 54.0, 46.0, 63.0, 50.0, 47.0, 53.0, 33.0, 37.0, 31.0, 30.0, 29.0, 22.0, 20.0, 9.0, 16.0, 9.0, 13.0, 5.0, 8.0, 7.0, 5.0, 1.0, 2.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-143.12384033203125, -138.917236328125, -134.7106170654297, -130.50401306152344, -126.29739379882812, -122.09078979492188, -117.8841781616211, -113.67756652832031, -109.47095489501953, -105.26434326171875, -101.05773162841797, -96.85111999511719, -92.64451599121094, -88.43789672851562, -84.23129272460938, -80.0246810913086, -75.81806945800781, -71.61145782470703, -67.40484619140625, -63.198238372802734, -58.99162673950195, -54.78501510620117, -50.578407287597656, -46.371795654296875, -42.165184020996094, -37.95857238769531, -33.75196075439453, -29.545352935791016, -25.338741302490234, -21.132129669189453, -16.925519943237305, -12.718910217285156, -8.512298583984375, -4.30568790435791, -0.09907722473144531, 4.1075334548950195, 8.314144134521484, 12.520755767822266, 16.727365493774414, 20.933975219726562, 25.140586853027344, 29.347198486328125, 33.553810119628906, 37.76041793823242, 41.9670295715332, 46.173641204833984, 50.3802490234375, 54.58686065673828, 58.79347229003906, 63.000083923339844, 67.20669555664062, 71.4133071899414, 75.61991882324219, 79.82652282714844, 84.03313446044922, 88.23974609375, 92.44635772705078, 96.65296936035156, 100.85958099365234, 105.06619262695312, 109.27279663085938, 113.47941589355469, 117.68601989746094, 121.89263153076172, 126.0992431640625]}, "gradients/decoder.transformer.h.1.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 4.0, 4.0, 2.0, 6.0, 3.0, 3.0, 9.0, 6.0, 10.0, 10.0, 6.0, 17.0, 21.0, 22.0, 17.0, 25.0, 29.0, 20.0, 37.0, 30.0, 39.0, 40.0, 33.0, 34.0, 50.0, 45.0, 51.0, 34.0, 40.0, 36.0, 33.0, 28.0, 32.0, 27.0, 22.0, 20.0, 26.0, 18.0, 24.0, 20.0, 15.0, 15.0, 10.0, 8.0, 14.0, 5.0, 9.0, 1.0, 4.0, 2.0, 0.0, 1.0, 0.0, 2.0, 1.0, 0.0, 1.0], "bins": [-18.265625, -17.70947265625, -17.1533203125, -16.59716796875, -16.041015625, -15.48486328125, -14.9287109375, -14.37255859375, -13.81640625, -13.26025390625, -12.7041015625, -12.14794921875, -11.591796875, -11.03564453125, -10.4794921875, -9.92333984375, -9.3671875, -8.81103515625, -8.2548828125, -7.69873046875, -7.142578125, -6.58642578125, -6.0302734375, -5.47412109375, -4.91796875, -4.36181640625, -3.8056640625, -3.24951171875, -2.693359375, -2.13720703125, -1.5810546875, -1.02490234375, -0.46875, 0.08740234375, 0.6435546875, 1.19970703125, 1.755859375, 2.31201171875, 2.8681640625, 3.42431640625, 3.98046875, 4.53662109375, 5.0927734375, 5.64892578125, 6.205078125, 6.76123046875, 7.3173828125, 7.87353515625, 8.4296875, 8.98583984375, 9.5419921875, 10.09814453125, 10.654296875, 11.21044921875, 11.7666015625, 12.32275390625, 12.87890625, 13.43505859375, 13.9912109375, 14.54736328125, 15.103515625, 15.65966796875, 16.2158203125, 16.77197265625, 17.328125]}, "gradients/decoder.transformer.h.1.crossattention.c_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 3.0, 4.0, 0.0, 6.0, 6.0, 9.0, 20.0, 15.0, 41.0, 44.0, 75.0, 90.0, 156.0, 214.0, 303.0, 486.0, 707.0, 1038.0, 1532.0, 2404.0, 3693.0, 5525.0, 8655.0, 13163.0, 20722.0, 32812.0, 53640.0, 92782.0, 190145.0, 321260.0, 121401.0, 66162.0, 40364.0, 25327.0, 15969.0, 10201.0, 6770.0, 4201.0, 2893.0, 1892.0, 1297.0, 878.0, 531.0, 367.0, 234.0, 175.0, 105.0, 94.0, 40.0, 39.0, 25.0, 16.0, 13.0, 11.0, 9.0, 3.0, 1.0, 2.0, 2.0, 1.0, 0.0, 1.0], "bins": [-0.75244140625, -0.72833251953125, -0.7042236328125, -0.68011474609375, -0.656005859375, -0.63189697265625, -0.6077880859375, -0.58367919921875, -0.5595703125, -0.53546142578125, -0.5113525390625, -0.48724365234375, -0.463134765625, -0.43902587890625, -0.4149169921875, -0.39080810546875, -0.36669921875, -0.34259033203125, -0.3184814453125, -0.29437255859375, -0.270263671875, -0.24615478515625, -0.2220458984375, -0.19793701171875, -0.173828125, -0.14971923828125, -0.1256103515625, -0.10150146484375, -0.077392578125, -0.05328369140625, -0.0291748046875, -0.00506591796875, 0.01904296875, 0.04315185546875, 0.0672607421875, 0.09136962890625, 0.115478515625, 0.13958740234375, 0.1636962890625, 0.18780517578125, 0.2119140625, 0.23602294921875, 0.2601318359375, 0.28424072265625, 0.308349609375, 0.33245849609375, 0.3565673828125, 0.38067626953125, 0.40478515625, 0.42889404296875, 0.4530029296875, 0.47711181640625, 0.501220703125, 0.52532958984375, 0.5494384765625, 0.57354736328125, 0.59765625, 0.62176513671875, 0.6458740234375, 0.66998291015625, 0.694091796875, 0.71820068359375, 0.7423095703125, 0.76641845703125, 0.79052734375]}, "gradients/decoder.transformer.h.1.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 3.0, 3.0, 5.0, 3.0, 5.0, 3.0, 4.0, 6.0, 6.0, 14.0, 16.0, 20.0, 19.0, 14.0, 28.0, 38.0, 21.0, 31.0, 39.0, 40.0, 51.0, 36.0, 40.0, 50.0, 1072.0, 36.0, 45.0, 45.0, 43.0, 29.0, 37.0, 27.0, 32.0, 26.0, 20.0, 29.0, 19.0, 21.0, 13.0, 16.0, 6.0, 6.0, 7.0, 7.0, 0.0, 1.0, 6.0, 2.0, 2.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-15.796875, -15.19091796875, -14.5849609375, -13.97900390625, -13.373046875, -12.76708984375, -12.1611328125, -11.55517578125, -10.94921875, -10.34326171875, -9.7373046875, -9.13134765625, -8.525390625, -7.91943359375, -7.3134765625, -6.70751953125, -6.1015625, -5.49560546875, -4.8896484375, -4.28369140625, -3.677734375, -3.07177734375, -2.4658203125, -1.85986328125, -1.25390625, -0.64794921875, -0.0419921875, 0.56396484375, 1.169921875, 1.77587890625, 2.3818359375, 2.98779296875, 3.59375, 4.19970703125, 4.8056640625, 5.41162109375, 6.017578125, 6.62353515625, 7.2294921875, 7.83544921875, 8.44140625, 9.04736328125, 9.6533203125, 10.25927734375, 10.865234375, 11.47119140625, 12.0771484375, 12.68310546875, 13.2890625, 13.89501953125, 14.5009765625, 15.10693359375, 15.712890625, 16.31884765625, 16.9248046875, 17.53076171875, 18.13671875, 18.74267578125, 19.3486328125, 19.95458984375, 20.560546875, 21.16650390625, 21.7724609375, 22.37841796875, 22.984375]}, "gradients/decoder.transformer.h.1.crossattention.c_attn.weight": {"_type": "histogram", "values": [3.0, 1.0, 3.0, 2.0, 4.0, 3.0, 5.0, 5.0, 15.0, 17.0, 18.0, 40.0, 69.0, 114.0, 143.0, 218.0, 325.0, 475.0, 696.0, 1096.0, 1688.0, 2462.0, 3785.0, 5874.0, 8901.0, 13849.0, 22247.0, 36096.0, 60651.0, 106765.0, 250903.0, 1312976.0, 108501.0, 60450.0, 36298.0, 22224.0, 14151.0, 9013.0, 5863.0, 3826.0, 2519.0, 1560.0, 1085.0, 700.0, 505.0, 371.0, 203.0, 149.0, 108.0, 69.0, 35.0, 22.0, 18.0, 12.0, 5.0, 6.0, 0.0, 0.0, 2.0, 3.0, 2.0, 1.0, 1.0, 1.0], "bins": [-0.52197265625, -0.5051422119140625, -0.488311767578125, -0.4714813232421875, -0.45465087890625, -0.4378204345703125, -0.420989990234375, -0.4041595458984375, -0.3873291015625, -0.3704986572265625, -0.353668212890625, -0.3368377685546875, -0.32000732421875, -0.3031768798828125, -0.286346435546875, -0.2695159912109375, -0.252685546875, -0.2358551025390625, -0.219024658203125, -0.2021942138671875, -0.18536376953125, -0.1685333251953125, -0.151702880859375, -0.1348724365234375, -0.1180419921875, -0.1012115478515625, -0.084381103515625, -0.0675506591796875, -0.05072021484375, -0.0338897705078125, -0.017059326171875, -0.0002288818359375, 0.0166015625, 0.0334320068359375, 0.050262451171875, 0.0670928955078125, 0.08392333984375, 0.1007537841796875, 0.117584228515625, 0.1344146728515625, 0.1512451171875, 0.1680755615234375, 0.184906005859375, 0.2017364501953125, 0.21856689453125, 0.2353973388671875, 0.252227783203125, 0.2690582275390625, 0.285888671875, 0.3027191162109375, 0.319549560546875, 0.3363800048828125, 0.35321044921875, 0.3700408935546875, 0.386871337890625, 0.4037017822265625, 0.4205322265625, 0.4373626708984375, 0.454193115234375, 0.4710235595703125, 0.48785400390625, 0.5046844482421875, 0.521514892578125, 0.5383453369140625, 0.55517578125]}, "gradients/decoder.transformer.h.1.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 5.0, 1.0, 1.0, 2.0, 4.0, 3.0, 4.0, 2.0, 11.0, 9.0, 9.0, 19.0, 21.0, 28.0, 31.0, 43.0, 45.0, 80.0, 73.0, 101.0, 90.0, 86.0, 77.0, 63.0, 48.0, 35.0, 29.0, 20.0, 15.0, 16.0, 13.0, 10.0, 3.0, 3.0, 4.0, 3.0, 0.0, 3.0, 4.0, 0.0, 0.0, 2.0, 0.0, 1.0, 2.0, 0.0, 0.0, 2.0], "bins": [-4.798173904418945e-05, -4.665181040763855e-05, -4.5321881771087646e-05, -4.399195313453674e-05, -4.266202449798584e-05, -4.1332095861434937e-05, -4.000216722488403e-05, -3.867223858833313e-05, -3.7342309951782227e-05, -3.601238131523132e-05, -3.468245267868042e-05, -3.3352524042129517e-05, -3.202259540557861e-05, -3.069266676902771e-05, -2.9362738132476807e-05, -2.8032809495925903e-05, -2.6702880859375e-05, -2.5372952222824097e-05, -2.4043023586273193e-05, -2.271309494972229e-05, -2.1383166313171387e-05, -2.0053237676620483e-05, -1.872330904006958e-05, -1.7393380403518677e-05, -1.6063451766967773e-05, -1.473352313041687e-05, -1.3403594493865967e-05, -1.2073665857315063e-05, -1.074373722076416e-05, -9.413808584213257e-06, -8.083879947662354e-06, -6.75395131111145e-06, -5.424022674560547e-06, -4.0940940380096436e-06, -2.7641654014587402e-06, -1.434236764907837e-06, -1.043081283569336e-07, 1.2256205081939697e-06, 2.555549144744873e-06, 3.885477781295776e-06, 5.21540641784668e-06, 6.545335054397583e-06, 7.875263690948486e-06, 9.20519232749939e-06, 1.0535120964050293e-05, 1.1865049600601196e-05, 1.31949782371521e-05, 1.4524906873703003e-05, 1.5854835510253906e-05, 1.718476414680481e-05, 1.8514692783355713e-05, 1.9844621419906616e-05, 2.117455005645752e-05, 2.2504478693008423e-05, 2.3834407329559326e-05, 2.516433596611023e-05, 2.6494264602661133e-05, 2.7824193239212036e-05, 2.915412187576294e-05, 3.0484050512313843e-05, 3.1813979148864746e-05, 3.314390778541565e-05, 3.447383642196655e-05, 3.5803765058517456e-05, 3.713369369506836e-05]}, "gradients/decoder.transformer.h.1.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 1.0, 3.0, 1.0, 0.0, 4.0, 1.0, 1.0, 3.0, 5.0, 5.0, 7.0, 11.0, 9.0, 12.0, 21.0, 21.0, 36.0, 45.0, 53.0, 92.0, 143.0, 215.0, 372.0, 883.0, 11302.0, 983606.0, 48991.0, 1541.0, 433.0, 243.0, 149.0, 100.0, 71.0, 41.0, 38.0, 27.0, 19.0, 19.0, 6.0, 9.0, 1.0, 7.0, 5.0, 6.0, 7.0, 0.0, 2.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.0005130767822265625, -0.0004951730370521545, -0.0004772692918777466, -0.0004593655467033386, -0.00044146180152893066, -0.0004235580563545227, -0.00040565431118011475, -0.0003877505660057068, -0.00036984682083129883, -0.00035194307565689087, -0.0003340393304824829, -0.00031613558530807495, -0.000298231840133667, -0.00028032809495925903, -0.0002624243497848511, -0.0002445206046104431, -0.00022661685943603516, -0.0002087131142616272, -0.00019080936908721924, -0.00017290562391281128, -0.00015500187873840332, -0.00013709813356399536, -0.0001191943883895874, -0.00010129064321517944, -8.338689804077148e-05, -6.548315286636353e-05, -4.7579407691955566e-05, -2.9675662517547607e-05, -1.1771917343139648e-05, 6.1318278312683105e-06, 2.403557300567627e-05, 4.193931818008423e-05, 5.984306335449219e-05, 7.774680852890015e-05, 9.56505537033081e-05, 0.00011355429887771606, 0.00013145804405212402, 0.00014936178922653198, 0.00016726553440093994, 0.0001851692795753479, 0.00020307302474975586, 0.00022097676992416382, 0.00023888051509857178, 0.00025678426027297974, 0.0002746880054473877, 0.00029259175062179565, 0.0003104954957962036, 0.00032839924097061157, 0.00034630298614501953, 0.0003642067313194275, 0.00038211047649383545, 0.0004000142216682434, 0.00041791796684265137, 0.0004358217120170593, 0.0004537254571914673, 0.00047162920236587524, 0.0004895329475402832, 0.0005074366927146912, 0.0005253404378890991, 0.0005432441830635071, 0.000561147928237915, 0.000579051673412323, 0.000596955418586731, 0.0006148591637611389, 0.0006327629089355469]}, "gradients/decoder.transformer.h.1.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 3.0, 2.0, 4.0, 13.0, 71.0, 261.0, 407.0, 171.0, 57.0, 19.0, 5.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.9961807993240654e-05, -1.7817497791838832e-05, -1.567318759043701e-05, -1.3528877389035188e-05, -1.1384567187633365e-05, -9.240256986231543e-06, -7.095946784829721e-06, -4.951636583427899e-06, -2.8073263820260763e-06, -6.63016180624254e-07, 1.4812940207775682e-06, 3.6256042221793905e-06, 5.769914423581213e-06, 7.914224624983035e-06, 1.0058534826384857e-05, 1.220284502778668e-05, 1.4347155229188502e-05, 1.6491465430590324e-05, 1.8635775631992146e-05, 2.078008583339397e-05, 2.292439603479579e-05, 2.5068706236197613e-05, 2.7213016437599435e-05, 2.9357326639001258e-05, 3.150163684040308e-05, 3.3645948860794306e-05, 3.5790257243206725e-05, 3.793456562561914e-05, 4.007887764601037e-05, 4.2223189666401595e-05, 4.4367498048814014e-05, 4.651180643122643e-05, 4.865611845161766e-05, 5.0800430472008884e-05, 5.29447388544213e-05, 5.508904723683372e-05, 5.723335925722495e-05, 5.937767127761617e-05, 6.152197602204978e-05, 6.366628804244101e-05, 6.581060006283224e-05, 6.795491208322346e-05, 7.009922410361469e-05, 7.22435288480483e-05, 7.438784086843953e-05, 7.653215288883075e-05, 7.867645763326436e-05, 8.082076965365559e-05, 8.296508167404681e-05, 8.510939369443804e-05, 8.725370571482927e-05, 8.939801045926288e-05, 9.15423224796541e-05, 9.368663450004533e-05, 9.583093924447894e-05, 9.797525126487017e-05, 0.00010011956328526139, 0.00010226387530565262, 0.00010440818732604384, 0.00010655249207047746, 0.00010869680409086868, 0.00011084111611125991, 0.00011298542085569352, 0.00011512973287608474, 0.00011727404489647597]}, "gradients/decoder.transformer.h.1.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 3.0, 0.0, 3.0, 2.0, 1.0, 8.0, 7.0, 6.0, 9.0, 9.0, 7.0, 16.0, 15.0, 13.0, 25.0, 20.0, 24.0, 31.0, 35.0, 29.0, 31.0, 31.0, 31.0, 35.0, 40.0, 42.0, 38.0, 42.0, 48.0, 28.0, 42.0, 36.0, 38.0, 38.0, 30.0, 25.0, 25.0, 19.0, 18.0, 26.0, 15.0, 12.0, 13.0, 5.0, 12.0, 10.0, 8.0, 4.0, 4.0, 3.0, 2.0, 1.0, 3.0, 1.0, 1.0], "bins": [-1.895427703857422e-05, -1.8421560525894165e-05, -1.788884401321411e-05, -1.7356127500534058e-05, -1.6823410987854004e-05, -1.629069447517395e-05, -1.5757977962493896e-05, -1.5225261449813843e-05, -1.4692544937133789e-05, -1.4159828424453735e-05, -1.3627111911773682e-05, -1.3094395399093628e-05, -1.2561678886413574e-05, -1.202896237373352e-05, -1.1496245861053467e-05, -1.0963529348373413e-05, -1.043081283569336e-05, -9.898096323013306e-06, -9.365379810333252e-06, -8.832663297653198e-06, -8.299946784973145e-06, -7.76723027229309e-06, -7.234513759613037e-06, -6.701797246932983e-06, -6.16908073425293e-06, -5.636364221572876e-06, -5.103647708892822e-06, -4.5709311962127686e-06, -4.038214683532715e-06, -3.505498170852661e-06, -2.9727816581726074e-06, -2.4400651454925537e-06, -1.9073486328125e-06, -1.3746321201324463e-06, -8.419156074523926e-07, -3.0919909477233887e-07, 2.2351741790771484e-07, 7.562339305877686e-07, 1.2889504432678223e-06, 1.821666955947876e-06, 2.3543834686279297e-06, 2.8870999813079834e-06, 3.419816493988037e-06, 3.952533006668091e-06, 4.4852495193481445e-06, 5.017966032028198e-06, 5.550682544708252e-06, 6.083399057388306e-06, 6.616115570068359e-06, 7.148832082748413e-06, 7.681548595428467e-06, 8.21426510810852e-06, 8.746981620788574e-06, 9.279698133468628e-06, 9.812414646148682e-06, 1.0345131158828735e-05, 1.0877847671508789e-05, 1.1410564184188843e-05, 1.1943280696868896e-05, 1.247599720954895e-05, 1.3008713722229004e-05, 1.3541430234909058e-05, 1.4074146747589111e-05, 1.4606863260269165e-05, 1.5139579772949219e-05]}, "gradients/decoder.transformer.h.1.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 4.0, 4.0, 2.0, 6.0, 3.0, 3.0, 9.0, 6.0, 10.0, 10.0, 6.0, 17.0, 21.0, 22.0, 17.0, 25.0, 29.0, 20.0, 37.0, 30.0, 39.0, 40.0, 33.0, 34.0, 50.0, 45.0, 51.0, 34.0, 40.0, 36.0, 33.0, 28.0, 32.0, 27.0, 22.0, 20.0, 26.0, 18.0, 24.0, 20.0, 15.0, 15.0, 10.0, 8.0, 14.0, 5.0, 9.0, 1.0, 4.0, 2.0, 0.0, 1.0, 0.0, 2.0, 1.0, 0.0, 1.0], "bins": [-18.265625, -17.70947265625, -17.1533203125, -16.59716796875, -16.041015625, -15.48486328125, -14.9287109375, -14.37255859375, -13.81640625, -13.26025390625, -12.7041015625, -12.14794921875, -11.591796875, -11.03564453125, -10.4794921875, -9.92333984375, -9.3671875, -8.81103515625, -8.2548828125, -7.69873046875, -7.142578125, -6.58642578125, -6.0302734375, -5.47412109375, -4.91796875, -4.36181640625, -3.8056640625, -3.24951171875, -2.693359375, -2.13720703125, -1.5810546875, -1.02490234375, -0.46875, 0.08740234375, 0.6435546875, 1.19970703125, 1.755859375, 2.31201171875, 2.8681640625, 3.42431640625, 3.98046875, 4.53662109375, 5.0927734375, 5.64892578125, 6.205078125, 6.76123046875, 7.3173828125, 7.87353515625, 8.4296875, 8.98583984375, 9.5419921875, 10.09814453125, 10.654296875, 11.21044921875, 11.7666015625, 12.32275390625, 12.87890625, 13.43505859375, 13.9912109375, 14.54736328125, 15.103515625, 15.65966796875, 16.2158203125, 16.77197265625, 17.328125]}, "gradients/decoder.transformer.h.1.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 3.0, 4.0, 3.0, 2.0, 7.0, 11.0, 8.0, 14.0, 22.0, 33.0, 29.0, 61.0, 67.0, 99.0, 145.0, 211.0, 336.0, 499.0, 795.0, 1362.0, 2460.0, 4928.0, 11165.0, 26720.0, 73685.0, 205742.0, 392181.0, 205250.0, 73537.0, 27020.0, 11038.0, 4877.0, 2550.0, 1381.0, 769.0, 488.0, 344.0, 206.0, 139.0, 97.0, 76.0, 56.0, 45.0, 24.0, 22.0, 22.0, 8.0, 12.0, 5.0, 3.0, 3.0, 1.0, 2.0, 2.0, 1.0, 3.0], "bins": [-29.546875, -28.6904296875, -27.833984375, -26.9775390625, -26.12109375, -25.2646484375, -24.408203125, -23.5517578125, -22.6953125, -21.8388671875, -20.982421875, -20.1259765625, -19.26953125, -18.4130859375, -17.556640625, -16.7001953125, -15.84375, -14.9873046875, -14.130859375, -13.2744140625, -12.41796875, -11.5615234375, -10.705078125, -9.8486328125, -8.9921875, -8.1357421875, -7.279296875, -6.4228515625, -5.56640625, -4.7099609375, -3.853515625, -2.9970703125, -2.140625, -1.2841796875, -0.427734375, 0.4287109375, 1.28515625, 2.1416015625, 2.998046875, 3.8544921875, 4.7109375, 5.5673828125, 6.423828125, 7.2802734375, 8.13671875, 8.9931640625, 9.849609375, 10.7060546875, 11.5625, 12.4189453125, 13.275390625, 14.1318359375, 14.98828125, 15.8447265625, 16.701171875, 17.5576171875, 18.4140625, 19.2705078125, 20.126953125, 20.9833984375, 21.83984375, 22.6962890625, 23.552734375, 24.4091796875, 25.265625]}, "gradients/decoder.transformer.h.1.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 3.0, 1.0, 3.0, 0.0, 6.0, 6.0, 5.0, 4.0, 10.0, 6.0, 7.0, 18.0, 21.0, 22.0, 25.0, 29.0, 28.0, 41.0, 42.0, 55.0, 67.0, 106.0, 189.0, 1559.0, 283.0, 130.0, 82.0, 59.0, 37.0, 41.0, 24.0, 31.0, 31.0, 25.0, 17.0, 14.0, 9.0, 7.0, 4.0, 5.0, 2.0, 3.0, 3.0, 1.0, 1.0, 2.0, 0.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-56.71875, -54.66552734375, -52.6123046875, -50.55908203125, -48.505859375, -46.45263671875, -44.3994140625, -42.34619140625, -40.29296875, -38.23974609375, -36.1865234375, -34.13330078125, -32.080078125, -30.02685546875, -27.9736328125, -25.92041015625, -23.8671875, -21.81396484375, -19.7607421875, -17.70751953125, -15.654296875, -13.60107421875, -11.5478515625, -9.49462890625, -7.44140625, -5.38818359375, -3.3349609375, -1.28173828125, 0.771484375, 2.82470703125, 4.8779296875, 6.93115234375, 8.984375, 11.03759765625, 13.0908203125, 15.14404296875, 17.197265625, 19.25048828125, 21.3037109375, 23.35693359375, 25.41015625, 27.46337890625, 29.5166015625, 31.56982421875, 33.623046875, 35.67626953125, 37.7294921875, 39.78271484375, 41.8359375, 43.88916015625, 45.9423828125, 47.99560546875, 50.048828125, 52.10205078125, 54.1552734375, 56.20849609375, 58.26171875, 60.31494140625, 62.3681640625, 64.42138671875, 66.474609375, 68.52783203125, 70.5810546875, 72.63427734375, 74.6875]}, "gradients/decoder.transformer.h.1.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 3.0, 2.0, 4.0, 5.0, 5.0, 9.0, 11.0, 18.0, 29.0, 38.0, 38.0, 61.0, 82.0, 152.0, 256.0, 533.0, 2149.0, 147331.0, 2986877.0, 6335.0, 850.0, 317.0, 160.0, 128.0, 87.0, 63.0, 41.0, 31.0, 24.0, 20.0, 17.0, 12.0, 10.0, 6.0, 3.0, 3.0, 4.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0], "bins": [-191.625, -186.7373046875, -181.849609375, -176.9619140625, -172.07421875, -167.1865234375, -162.298828125, -157.4111328125, -152.5234375, -147.6357421875, -142.748046875, -137.8603515625, -132.97265625, -128.0849609375, -123.197265625, -118.3095703125, -113.421875, -108.5341796875, -103.646484375, -98.7587890625, -93.87109375, -88.9833984375, -84.095703125, -79.2080078125, -74.3203125, -69.4326171875, -64.544921875, -59.6572265625, -54.76953125, -49.8818359375, -44.994140625, -40.1064453125, -35.21875, -30.3310546875, -25.443359375, -20.5556640625, -15.66796875, -10.7802734375, -5.892578125, -1.0048828125, 3.8828125, 8.7705078125, 13.658203125, 18.5458984375, 23.43359375, 28.3212890625, 33.208984375, 38.0966796875, 42.984375, 47.8720703125, 52.759765625, 57.6474609375, 62.53515625, 67.4228515625, 72.310546875, 77.1982421875, 82.0859375, 86.9736328125, 91.861328125, 96.7490234375, 101.63671875, 106.5244140625, 111.412109375, 116.2998046875, 121.1875]}, "gradients/decoder.transformer.h.1.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 9.0, 19.0, 57.0, 206.0, 376.0, 225.0, 82.0, 23.0, 13.0, 2.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0], "bins": [-438.571044921875, -428.4451599121094, -418.31927490234375, -408.1933898925781, -398.0675048828125, -387.94158935546875, -377.81573486328125, -367.6898193359375, -357.5639343261719, -347.43804931640625, -337.3121643066406, -327.186279296875, -317.0603942871094, -306.93450927734375, -296.80859375, -286.6827087402344, -276.55682373046875, -266.4309387207031, -256.3050537109375, -246.17916870117188, -236.0532684326172, -225.92738342285156, -215.80149841308594, -205.67559814453125, -195.54974365234375, -185.42385864257812, -175.2979736328125, -165.17208862304688, -155.0461883544922, -144.92030334472656, -134.79441833496094, -124.66852569580078, -114.54261779785156, -104.41673278808594, -94.29084014892578, -84.16495513916016, -74.0390625, -63.913177490234375, -53.78729248046875, -43.661399841308594, -33.53551483154297, -23.409626007080078, -13.28373908996582, -3.1578521728515625, 6.968036651611328, 17.09392547607422, 27.219810485839844, 37.345703125, 47.471588134765625, 57.597476959228516, 67.7233657836914, 77.84925079345703, 87.97514343261719, 98.10102844238281, 108.22691345214844, 118.3528060913086, 128.47869873046875, 138.60458374023438, 148.73046875, 158.85635375976562, 168.9822540283203, 179.10813903808594, 189.23402404785156, 199.35992431640625, 209.4857940673828]}, "gradients/decoder.transformer.h.1.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 1.0, 2.0, 3.0, 3.0, 10.0, 8.0, 11.0, 10.0, 12.0, 30.0, 29.0, 27.0, 23.0, 34.0, 52.0, 44.0, 53.0, 51.0, 59.0, 43.0, 46.0, 59.0, 60.0, 44.0, 57.0, 38.0, 41.0, 31.0, 28.0, 30.0, 26.0, 12.0, 9.0, 4.0, 9.0, 5.0, 6.0, 3.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-258.0610656738281, -250.67166137695312, -243.2822723388672, -235.8928680419922, -228.5034637451172, -221.11407470703125, -213.72467041015625, -206.33526611328125, -198.94586181640625, -191.55645751953125, -184.1670684814453, -176.7776641845703, -169.3882598876953, -161.99887084960938, -154.60946655273438, -147.22006225585938, -139.83065795898438, -132.44125366210938, -125.0518569946289, -117.66246032714844, -110.27305603027344, -102.88365936279297, -95.4942626953125, -88.1048583984375, -80.71546936035156, -73.3260726928711, -65.9366683959961, -58.547271728515625, -51.157867431640625, -43.768470764160156, -36.37907028198242, -28.989669799804688, -21.600265502929688, -14.210865020751953, -6.821465492248535, 0.5679340362548828, 7.957334518432617, 15.346733093261719, 22.736133575439453, 30.125534057617188, 37.51493453979492, 44.904335021972656, 52.29373550415039, 59.683135986328125, 67.0725326538086, 74.46192932128906, 81.85133361816406, 89.24073791503906, 96.63013458251953, 104.01953125, 111.408935546875, 118.79833221435547, 126.18773651123047, 133.57713317871094, 140.96653747558594, 148.35592651367188, 155.74533081054688, 163.13473510742188, 170.5241241455078, 177.9135284423828, 185.3029327392578, 192.69232177734375, 200.08172607421875, 207.47113037109375, 214.86053466796875]}, "gradients/decoder.transformer.h.0.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 2.0, 4.0, 5.0, 4.0, 7.0, 4.0, 3.0, 7.0, 8.0, 9.0, 10.0, 18.0, 15.0, 25.0, 18.0, 28.0, 21.0, 28.0, 26.0, 45.0, 38.0, 35.0, 36.0, 38.0, 30.0, 56.0, 49.0, 36.0, 33.0, 46.0, 39.0, 29.0, 50.0, 26.0, 20.0, 23.0, 21.0, 21.0, 15.0, 13.0, 20.0, 9.0, 11.0, 13.0, 6.0, 3.0, 3.0, 3.0, 2.0, 1.0, 3.0, 3.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-18.953125, -18.315673828125, -17.67822265625, -17.040771484375, -16.4033203125, -15.765869140625, -15.12841796875, -14.490966796875, -13.853515625, -13.216064453125, -12.57861328125, -11.941162109375, -11.3037109375, -10.666259765625, -10.02880859375, -9.391357421875, -8.75390625, -8.116455078125, -7.47900390625, -6.841552734375, -6.2041015625, -5.566650390625, -4.92919921875, -4.291748046875, -3.654296875, -3.016845703125, -2.37939453125, -1.741943359375, -1.1044921875, -0.467041015625, 0.17041015625, 0.807861328125, 1.4453125, 2.082763671875, 2.72021484375, 3.357666015625, 3.9951171875, 4.632568359375, 5.27001953125, 5.907470703125, 6.544921875, 7.182373046875, 7.81982421875, 8.457275390625, 9.0947265625, 9.732177734375, 10.36962890625, 11.007080078125, 11.64453125, 12.281982421875, 12.91943359375, 13.556884765625, 14.1943359375, 14.831787109375, 15.46923828125, 16.106689453125, 16.744140625, 17.381591796875, 18.01904296875, 18.656494140625, 19.2939453125, 19.931396484375, 20.56884765625, 21.206298828125, 21.84375]}, "gradients/decoder.transformer.h.0.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 2.0, 0.0, 7.0, 4.0, 6.0, 7.0, 14.0, 18.0, 18.0, 30.0, 52.0, 61.0, 78.0, 92.0, 127.0, 161.0, 251.0, 399.0, 592.0, 970.0, 2310.0, 11629.0, 217380.0, 3817182.0, 129367.0, 8796.0, 2056.0, 872.0, 516.0, 394.0, 237.0, 178.0, 126.0, 103.0, 76.0, 57.0, 37.0, 31.0, 19.0, 14.0, 6.0, 5.0, 3.0, 5.0, 4.0, 2.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0], "bins": [-134.875, -129.7734375, -124.671875, -119.5703125, -114.46875, -109.3671875, -104.265625, -99.1640625, -94.0625, -88.9609375, -83.859375, -78.7578125, -73.65625, -68.5546875, -63.453125, -58.3515625, -53.25, -48.1484375, -43.046875, -37.9453125, -32.84375, -27.7421875, -22.640625, -17.5390625, -12.4375, -7.3359375, -2.234375, 2.8671875, 7.96875, 13.0703125, 18.171875, 23.2734375, 28.375, 33.4765625, 38.578125, 43.6796875, 48.78125, 53.8828125, 58.984375, 64.0859375, 69.1875, 74.2890625, 79.390625, 84.4921875, 89.59375, 94.6953125, 99.796875, 104.8984375, 110.0, 115.1015625, 120.203125, 125.3046875, 130.40625, 135.5078125, 140.609375, 145.7109375, 150.8125, 155.9140625, 161.015625, 166.1171875, 171.21875, 176.3203125, 181.421875, 186.5234375, 191.625]}, "gradients/decoder.transformer.h.0.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 1.0, 1.0, 1.0, 2.0, 4.0, 9.0, 19.0, 10.0, 29.0, 32.0, 34.0, 48.0, 80.0, 126.0, 179.0, 218.0, 322.0, 442.0, 558.0, 583.0, 417.0, 269.0, 209.0, 123.0, 101.0, 76.0, 61.0, 22.0, 36.0, 18.0, 13.0, 7.0, 8.0, 5.0, 8.0, 4.0, 7.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0], "bins": [-52.09375, -50.517578125, -48.94140625, -47.365234375, -45.7890625, -44.212890625, -42.63671875, -41.060546875, -39.484375, -37.908203125, -36.33203125, -34.755859375, -33.1796875, -31.603515625, -30.02734375, -28.451171875, -26.875, -25.298828125, -23.72265625, -22.146484375, -20.5703125, -18.994140625, -17.41796875, -15.841796875, -14.265625, -12.689453125, -11.11328125, -9.537109375, -7.9609375, -6.384765625, -4.80859375, -3.232421875, -1.65625, -0.080078125, 1.49609375, 3.072265625, 4.6484375, 6.224609375, 7.80078125, 9.376953125, 10.953125, 12.529296875, 14.10546875, 15.681640625, 17.2578125, 18.833984375, 20.41015625, 21.986328125, 23.5625, 25.138671875, 26.71484375, 28.291015625, 29.8671875, 31.443359375, 33.01953125, 34.595703125, 36.171875, 37.748046875, 39.32421875, 40.900390625, 42.4765625, 44.052734375, 45.62890625, 47.205078125, 48.78125]}, "gradients/decoder.transformer.h.0.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 3.0, 2.0, 3.0, 2.0, 5.0, 4.0, 5.0, 6.0, 19.0, 25.0, 49.0, 40.0, 59.0, 102.0, 156.0, 320.0, 539.0, 1156.0, 3073.0, 9851.0, 44622.0, 320463.0, 2887650.0, 811484.0, 89791.0, 16958.0, 4560.0, 1668.0, 715.0, 361.0, 226.0, 131.0, 77.0, 53.0, 44.0, 21.0, 19.0, 14.0, 6.0, 4.0, 4.0, 3.0, 1.0, 2.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-54.40625, -52.28857421875, -50.1708984375, -48.05322265625, -45.935546875, -43.81787109375, -41.7001953125, -39.58251953125, -37.46484375, -35.34716796875, -33.2294921875, -31.11181640625, -28.994140625, -26.87646484375, -24.7587890625, -22.64111328125, -20.5234375, -18.40576171875, -16.2880859375, -14.17041015625, -12.052734375, -9.93505859375, -7.8173828125, -5.69970703125, -3.58203125, -1.46435546875, 0.6533203125, 2.77099609375, 4.888671875, 7.00634765625, 9.1240234375, 11.24169921875, 13.359375, 15.47705078125, 17.5947265625, 19.71240234375, 21.830078125, 23.94775390625, 26.0654296875, 28.18310546875, 30.30078125, 32.41845703125, 34.5361328125, 36.65380859375, 38.771484375, 40.88916015625, 43.0068359375, 45.12451171875, 47.2421875, 49.35986328125, 51.4775390625, 53.59521484375, 55.712890625, 57.83056640625, 59.9482421875, 62.06591796875, 64.18359375, 66.30126953125, 68.4189453125, 70.53662109375, 72.654296875, 74.77197265625, 76.8896484375, 79.00732421875, 81.125]}, "gradients/decoder.transformer.h.0.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 3.0, 1.0, 0.0, 3.0, 2.0, 2.0, 8.0, 5.0, 14.0, 12.0, 20.0, 26.0, 24.0, 38.0, 56.0, 46.0, 76.0, 78.0, 79.0, 80.0, 75.0, 72.0, 55.0, 62.0, 40.0, 40.0, 31.0, 14.0, 21.0, 6.0, 4.0, 3.0, 5.0, 3.0, 3.0, 3.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 2.0, 2.0], "bins": [-342.2301940917969, -331.94873046875, -321.6672668457031, -311.38580322265625, -301.1043395996094, -290.8228759765625, -280.5414123535156, -270.25994873046875, -259.9784851074219, -249.697021484375, -239.41555786132812, -229.13409423828125, -218.85263061523438, -208.5711669921875, -198.28970336914062, -188.00823974609375, -177.7267608642578, -167.44529724121094, -157.16383361816406, -146.8823699951172, -136.6009063720703, -126.31944274902344, -116.03797149658203, -105.75650787353516, -95.47504425048828, -85.1935806274414, -74.91211700439453, -64.63064575195312, -54.349185943603516, -44.06772232055664, -33.7862548828125, -23.504791259765625, -13.22332763671875, -2.9418630599975586, 7.339601516723633, 17.62106704711914, 27.902530670166016, 38.18399429321289, 48.46546173095703, 58.746925354003906, 69.02838897705078, 79.30985260009766, 89.59131622314453, 99.87278747558594, 110.15425109863281, 120.43571472167969, 130.71717834472656, 140.99864196777344, 151.2801055908203, 161.5615692138672, 171.84303283691406, 182.12449645996094, 192.4059600830078, 202.6874237060547, 212.96890258789062, 223.2503662109375, 233.53182983398438, 243.81329345703125, 254.09475708007812, 264.376220703125, 274.6576843261719, 284.93914794921875, 295.2206115722656, 305.5020751953125, 315.7835388183594]}, "gradients/decoder.transformer.h.0.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 7.0, 9.0, 3.0, 13.0, 8.0, 13.0, 23.0, 22.0, 23.0, 31.0, 26.0, 32.0, 25.0, 35.0, 38.0, 43.0, 43.0, 41.0, 34.0, 39.0, 38.0, 32.0, 42.0, 27.0, 36.0, 49.0, 33.0, 32.0, 22.0, 29.0, 23.0, 23.0, 17.0, 15.0, 16.0, 20.0, 10.0, 3.0, 4.0, 6.0, 5.0, 8.0, 4.0, 2.0, 2.0, 6.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-185.32296752929688, -178.74801635742188, -172.1730499267578, -165.5980987548828, -159.0231475830078, -152.44818115234375, -145.87322998046875, -139.29827880859375, -132.72332763671875, -126.14836883544922, -119.57341766357422, -112.99845886230469, -106.42350769042969, -99.84854888916016, -93.27359008789062, -86.69863891601562, -80.1236801147461, -73.54872131347656, -66.97377014160156, -60.39881134033203, -53.82386016845703, -47.2489013671875, -40.673946380615234, -34.09899139404297, -27.524036407470703, -20.949081420898438, -14.374125480651855, -7.799169540405273, -1.2242145538330078, 5.350742340087891, 11.925697326660156, 18.500652313232422, 25.075607299804688, 31.650562286376953, 38.22551727294922, 44.80047607421875, 51.37542724609375, 57.95038604736328, 64.52534484863281, 71.10029602050781, 77.67524719238281, 84.25020599365234, 90.82515716552734, 97.40011596679688, 103.97506713867188, 110.5500259399414, 117.12498474121094, 123.69993591308594, 130.27490234375, 136.849853515625, 143.42481994628906, 149.99977111816406, 156.57472229003906, 163.14968872070312, 169.72463989257812, 176.29959106445312, 182.87454223632812, 189.44949340820312, 196.0244598388672, 202.5994110107422, 209.1743621826172, 215.74932861328125, 222.32427978515625, 228.89923095703125, 235.47418212890625]}, "gradients/decoder.transformer.h.0.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 4.0, 4.0, 4.0, 6.0, 14.0, 10.0, 13.0, 19.0, 21.0, 19.0, 19.0, 24.0, 27.0, 27.0, 34.0, 35.0, 47.0, 33.0, 40.0, 47.0, 40.0, 37.0, 33.0, 41.0, 55.0, 41.0, 32.0, 41.0, 31.0, 32.0, 15.0, 30.0, 21.0, 20.0, 23.0, 10.0, 15.0, 8.0, 12.0, 5.0, 6.0, 2.0, 4.0, 4.0, 4.0, 3.0, 4.0, 2.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-346.75, -333.82421875, -320.8984375, -307.97265625, -295.046875, -282.12109375, -269.1953125, -256.26953125, -243.34375, -230.41796875, -217.4921875, -204.56640625, -191.640625, -178.71484375, -165.7890625, -152.86328125, -139.9375, -127.01171875, -114.0859375, -101.16015625, -88.234375, -75.30859375, -62.3828125, -49.45703125, -36.53125, -23.60546875, -10.6796875, 2.24609375, 15.171875, 28.09765625, 41.0234375, 53.94921875, 66.875, 79.80078125, 92.7265625, 105.65234375, 118.578125, 131.50390625, 144.4296875, 157.35546875, 170.28125, 183.20703125, 196.1328125, 209.05859375, 221.984375, 234.91015625, 247.8359375, 260.76171875, 273.6875, 286.61328125, 299.5390625, 312.46484375, 325.390625, 338.31640625, 351.2421875, 364.16796875, 377.09375, 390.01953125, 402.9453125, 415.87109375, 428.796875, 441.72265625, 454.6484375, 467.57421875, 480.5]}, "gradients/decoder.transformer.h.0.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 5.0, 3.0, 6.0, 6.0, 6.0, 9.0, 14.0, 27.0, 42.0, 65.0, 100.0, 169.0, 226.0, 315.0, 505.0, 727.0, 1181.0, 1832.0, 2860.0, 4306.0, 6664.0, 10467.0, 16431.0, 25416.0, 39736.0, 63520.0, 110611.0, 260160.0, 229855.0, 105107.0, 60998.0, 38531.0, 24339.0, 15762.0, 10126.0, 6559.0, 4231.0, 2772.0, 1729.0, 1125.0, 739.0, 475.0, 304.0, 196.0, 111.0, 71.0, 49.0, 34.0, 18.0, 13.0, 4.0, 7.0, 2.0, 2.0, 0.0, 1.0, 4.0], "bins": [-17.6875, -17.1815185546875, -16.675537109375, -16.1695556640625, -15.66357421875, -15.1575927734375, -14.651611328125, -14.1456298828125, -13.6396484375, -13.1336669921875, -12.627685546875, -12.1217041015625, -11.61572265625, -11.1097412109375, -10.603759765625, -10.0977783203125, -9.591796875, -9.0858154296875, -8.579833984375, -8.0738525390625, -7.56787109375, -7.0618896484375, -6.555908203125, -6.0499267578125, -5.5439453125, -5.0379638671875, -4.531982421875, -4.0260009765625, -3.52001953125, -3.0140380859375, -2.508056640625, -2.0020751953125, -1.49609375, -0.9901123046875, -0.484130859375, 0.0218505859375, 0.52783203125, 1.0338134765625, 1.539794921875, 2.0457763671875, 2.5517578125, 3.0577392578125, 3.563720703125, 4.0697021484375, 4.57568359375, 5.0816650390625, 5.587646484375, 6.0936279296875, 6.599609375, 7.1055908203125, 7.611572265625, 8.1175537109375, 8.62353515625, 9.1295166015625, 9.635498046875, 10.1414794921875, 10.6474609375, 11.1534423828125, 11.659423828125, 12.1654052734375, 12.67138671875, 13.1773681640625, 13.683349609375, 14.1893310546875, 14.6953125]}, "gradients/decoder.transformer.h.0.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 3.0, 0.0, 1.0, 3.0, 1.0, 4.0, 3.0, 4.0, 3.0, 4.0, 3.0, 9.0, 16.0, 12.0, 21.0, 11.0, 22.0, 15.0, 22.0, 15.0, 24.0, 26.0, 42.0, 30.0, 36.0, 34.0, 34.0, 42.0, 44.0, 43.0, 1051.0, 40.0, 38.0, 39.0, 36.0, 33.0, 29.0, 20.0, 29.0, 23.0, 29.0, 16.0, 18.0, 24.0, 12.0, 13.0, 10.0, 9.0, 11.0, 6.0, 10.0, 2.0, 2.0, 3.0, 4.0, 3.0, 2.0, 1.0, 1.0, 1.0, 4.0], "bins": [-355.75, -345.1171875, -334.484375, -323.8515625, -313.21875, -302.5859375, -291.953125, -281.3203125, -270.6875, -260.0546875, -249.421875, -238.7890625, -228.15625, -217.5234375, -206.890625, -196.2578125, -185.625, -174.9921875, -164.359375, -153.7265625, -143.09375, -132.4609375, -121.828125, -111.1953125, -100.5625, -89.9296875, -79.296875, -68.6640625, -58.03125, -47.3984375, -36.765625, -26.1328125, -15.5, -4.8671875, 5.765625, 16.3984375, 27.03125, 37.6640625, 48.296875, 58.9296875, 69.5625, 80.1953125, 90.828125, 101.4609375, 112.09375, 122.7265625, 133.359375, 143.9921875, 154.625, 165.2578125, 175.890625, 186.5234375, 197.15625, 207.7890625, 218.421875, 229.0546875, 239.6875, 250.3203125, 260.953125, 271.5859375, 282.21875, 292.8515625, 303.484375, 314.1171875, 324.75]}, "gradients/decoder.transformer.h.0.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 5.0, 9.0, 15.0, 27.0, 32.0, 46.0, 58.0, 101.0, 119.0, 181.0, 231.0, 286.0, 381.0, 594.0, 797.0, 979.0, 1451.0, 1915.0, 2696.0, 3670.0, 5069.0, 7023.0, 9681.0, 13745.0, 19751.0, 28468.0, 41962.0, 63584.0, 102188.0, 1277430.0, 212664.0, 101187.0, 62934.0, 41595.0, 27946.0, 19759.0, 13647.0, 9604.0, 6855.0, 4887.0, 3625.0, 2752.0, 1984.0, 1393.0, 1006.0, 756.0, 559.0, 402.0, 289.0, 209.0, 181.0, 126.0, 97.0, 51.0, 49.0, 38.0, 26.0, 17.0, 8.0, 4.0, 4.0], "bins": [-8.5859375, -8.3255615234375, -8.065185546875, -7.8048095703125, -7.54443359375, -7.2840576171875, -7.023681640625, -6.7633056640625, -6.5029296875, -6.2425537109375, -5.982177734375, -5.7218017578125, -5.46142578125, -5.2010498046875, -4.940673828125, -4.6802978515625, -4.419921875, -4.1595458984375, -3.899169921875, -3.6387939453125, -3.37841796875, -3.1180419921875, -2.857666015625, -2.5972900390625, -2.3369140625, -2.0765380859375, -1.816162109375, -1.5557861328125, -1.29541015625, -1.0350341796875, -0.774658203125, -0.5142822265625, -0.25390625, 0.0064697265625, 0.266845703125, 0.5272216796875, 0.78759765625, 1.0479736328125, 1.308349609375, 1.5687255859375, 1.8291015625, 2.0894775390625, 2.349853515625, 2.6102294921875, 2.87060546875, 3.1309814453125, 3.391357421875, 3.6517333984375, 3.912109375, 4.1724853515625, 4.432861328125, 4.6932373046875, 4.95361328125, 5.2139892578125, 5.474365234375, 5.7347412109375, 5.9951171875, 6.2554931640625, 6.515869140625, 6.7762451171875, 7.03662109375, 7.2969970703125, 7.557373046875, 7.8177490234375, 8.078125]}, "gradients/decoder.transformer.h.0.crossattention.q_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 5.0, 3.0, 5.0, 9.0, 4.0, 2.0, 6.0, 6.0, 13.0, 11.0, 9.0, 16.0, 16.0, 19.0, 30.0, 26.0, 29.0, 28.0, 28.0, 40.0, 51.0, 55.0, 57.0, 45.0, 40.0, 38.0, 47.0, 59.0, 33.0, 42.0, 36.0, 30.0, 26.0, 19.0, 30.0, 17.0, 17.0, 13.0, 12.0, 11.0, 6.0, 5.0, 8.0, 1.0, 3.0, 1.0, 5.0, 0.0, 1.0, 2.0, 0.0, 0.0, 2.0, 0.0, 2.0], "bins": [-0.0009617805480957031, -0.0009326785802841187, -0.0009035766124725342, -0.0008744746446609497, -0.0008453726768493652, -0.0008162707090377808, -0.0007871687412261963, -0.0007580667734146118, -0.0007289648056030273, -0.0006998628377914429, -0.0006707608699798584, -0.0006416589021682739, -0.0006125569343566895, -0.000583454966545105, -0.0005543529987335205, -0.000525251030921936, -0.0004961490631103516, -0.0004670470952987671, -0.0004379451274871826, -0.00040884315967559814, -0.00037974119186401367, -0.0003506392240524292, -0.0003215372562408447, -0.00029243528842926025, -0.0002633333206176758, -0.0002342313528060913, -0.00020512938499450684, -0.00017602741718292236, -0.0001469254493713379, -0.00011782348155975342, -8.872151374816895e-05, -5.961954593658447e-05, -3.0517578125e-05, -1.4156103134155273e-06, 2.7686357498168945e-05, 5.678832530975342e-05, 8.589029312133789e-05, 0.00011499226093292236, 0.00014409422874450684, 0.0001731961965560913, 0.00020229816436767578, 0.00023140013217926025, 0.0002605020999908447, 0.0002896040678024292, 0.00031870603561401367, 0.00034780800342559814, 0.0003769099712371826, 0.0004060119390487671, 0.00043511390686035156, 0.00046421587467193604, 0.0004933178424835205, 0.000522419810295105, 0.0005515217781066895, 0.0005806237459182739, 0.0006097257137298584, 0.0006388276815414429, 0.0006679296493530273, 0.0006970316171646118, 0.0007261335849761963, 0.0007552355527877808, 0.0007843375205993652, 0.0008134394884109497, 0.0008425414562225342, 0.0008716434240341187, 0.0009007453918457031]}, "gradients/decoder.transformer.h.0.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 3.0, 4.0, 3.0, 4.0, 2.0, 10.0, 6.0, 7.0, 12.0, 15.0, 23.0, 32.0, 44.0, 64.0, 78.0, 108.0, 172.0, 257.0, 305.0, 484.0, 655.0, 1145.0, 2000.0, 3987.0, 8696.0, 22446.0, 69308.0, 374279.0, 446452.0, 75038.0, 24059.0, 9258.0, 4110.0, 1994.0, 1193.0, 729.0, 452.0, 327.0, 170.0, 164.0, 136.0, 92.0, 53.0, 38.0, 35.0, 38.0, 31.0, 20.0, 9.0, 7.0, 4.0, 6.0, 0.0, 2.0, 2.0, 1.0, 0.0, 3.0], "bins": [-0.005115509033203125, -0.00496518611907959, -0.004814863204956055, -0.0046645402908325195, -0.004514217376708984, -0.004363894462585449, -0.004213571548461914, -0.004063248634338379, -0.003912925720214844, -0.0037626028060913086, -0.0036122798919677734, -0.0034619569778442383, -0.003311634063720703, -0.003161311149597168, -0.003010988235473633, -0.0028606653213500977, -0.0027103424072265625, -0.0025600194931030273, -0.002409696578979492, -0.002259373664855957, -0.002109050750732422, -0.0019587278366088867, -0.0018084049224853516, -0.0016580820083618164, -0.0015077590942382812, -0.001357436180114746, -0.001207113265991211, -0.0010567903518676758, -0.0009064674377441406, -0.0007561445236206055, -0.0006058216094970703, -0.00045549869537353516, -0.00030517578125, -0.00015485286712646484, -4.5299530029296875e-06, 0.00014579296112060547, 0.0002961158752441406, 0.0004464387893676758, 0.0005967617034912109, 0.0007470846176147461, 0.0008974075317382812, 0.0010477304458618164, 0.0011980533599853516, 0.0013483762741088867, 0.0014986991882324219, 0.001649022102355957, 0.0017993450164794922, 0.0019496679306030273, 0.0020999908447265625, 0.0022503137588500977, 0.002400636672973633, 0.002550959587097168, 0.002701282501220703, 0.0028516054153442383, 0.0030019283294677734, 0.0031522512435913086, 0.0033025741577148438, 0.003452897071838379, 0.003603219985961914, 0.0037535429000854492, 0.0039038658142089844, 0.0040541887283325195, 0.004204511642456055, 0.00435483455657959, 0.004505157470703125]}, "gradients/decoder.transformer.h.0.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 1.0, 1.0, 1.0, 2.0, 3.0, 2.0, 1.0, 5.0, 5.0, 13.0, 16.0, 21.0, 33.0, 50.0, 112.0, 195.0, 241.0, 136.0, 56.0, 37.0, 28.0, 21.0, 6.0, 8.0, 8.0, 1.0, 2.0, 4.0, 3.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0013164895353838801, -0.0012820803094655275, -0.001247670967131853, -0.0012132617412135005, -0.001178852515295148, -0.0011444432893767953, -0.0011100339470431209, -0.0010756247211247683, -0.0010412154952064157, -0.001006806269288063, -0.0009723969851620495, -0.000937987701036036, -0.0009035784751176834, -0.0008691691909916699, -0.0008347599068656564, -0.0008003506809473038, -0.0007659413968212903, -0.0007315321126952767, -0.0006971228867769241, -0.0006627136026509106, -0.000628304376732558, -0.0005938950926065445, -0.0005594858666881919, -0.0005250765825621784, -0.0004906672984361649, -0.0004562580434139818, -0.00042184878839179873, -0.0003874395042657852, -0.0003530302783474326, -0.0003186209942214191, -0.00028421173919923604, -0.000249802484177053, -0.00021539320005103946, -0.0001809839450288564, -0.00014657469000667334, -0.00011216542043257505, -7.775616541039199e-05, -4.3346910388208926e-05, -8.937640814110637e-06, 2.5471614208072424e-05, 5.9880869230255485e-05, 9.429012425243855e-05, 0.0001286993792746216, 0.0001631086488487199, 0.00019751790387090296, 0.00023192715889308602, 0.0002663364284671843, 0.00030074568348936737, 0.0003351549385115504, 0.0003695641935337335, 0.00040397344855591655, 0.00043838273268193007, 0.00047279195860028267, 0.0005072012427262962, 0.0005416105268523097, 0.0005760197527706623, 0.0006104289786890149, 0.0006448382628150284, 0.000679247488733381, 0.0007136567728593946, 0.0007480659987777472, 0.0007824752829037607, 0.0008168845670297742, 0.0008512937929481268, 0.0008857030770741403]}, "gradients/decoder.transformer.h.0.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 2.0, 2.0, 1.0, 2.0, 0.0, 4.0, 8.0, 7.0, 9.0, 17.0, 10.0, 21.0, 13.0, 12.0, 31.0, 31.0, 27.0, 22.0, 37.0, 40.0, 38.0, 31.0, 44.0, 48.0, 37.0, 36.0, 38.0, 49.0, 49.0, 35.0, 34.0, 33.0, 35.0, 23.0, 27.0, 18.0, 26.0, 14.0, 18.0, 22.0, 14.0, 11.0, 7.0, 12.0, 5.0, 5.0, 2.0, 6.0, 0.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.0006003975868225098, -0.0005816221237182617, -0.0005628466606140137, -0.0005440711975097656, -0.0005252957344055176, -0.0005065202713012695, -0.0004877448081970215, -0.00046896934509277344, -0.0004501938819885254, -0.00043141841888427734, -0.0004126429557800293, -0.00039386749267578125, -0.0003750920295715332, -0.00035631656646728516, -0.0003375411033630371, -0.00031876564025878906, -0.000299990177154541, -0.00028121471405029297, -0.0002624392509460449, -0.00024366378784179688, -0.00022488832473754883, -0.00020611286163330078, -0.00018733739852905273, -0.0001685619354248047, -0.00014978647232055664, -0.0001310110092163086, -0.00011223554611206055, -9.34600830078125e-05, -7.468461990356445e-05, -5.5909156799316406e-05, -3.713369369506836e-05, -1.8358230590820312e-05, 4.172325134277344e-07, 1.919269561767578e-05, 3.796815872192383e-05, 5.6743621826171875e-05, 7.551908493041992e-05, 9.429454803466797e-05, 0.00011307001113891602, 0.00013184547424316406, 0.0001506209373474121, 0.00016939640045166016, 0.0001881718635559082, 0.00020694732666015625, 0.0002257227897644043, 0.00024449825286865234, 0.0002632737159729004, 0.00028204917907714844, 0.0003008246421813965, 0.00031960010528564453, 0.0003383755683898926, 0.0003571510314941406, 0.00037592649459838867, 0.0003947019577026367, 0.00041347742080688477, 0.0004322528839111328, 0.00045102834701538086, 0.0004698038101196289, 0.000488579273223877, 0.000507354736328125, 0.000526130199432373, 0.0005449056625366211, 0.0005636811256408691, 0.0005824565887451172, 0.0006012320518493652]}, "gradients/decoder.transformer.h.0.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 4.0, 4.0, 4.0, 6.0, 14.0, 10.0, 13.0, 19.0, 21.0, 19.0, 19.0, 24.0, 27.0, 27.0, 34.0, 35.0, 47.0, 33.0, 40.0, 47.0, 40.0, 37.0, 33.0, 41.0, 55.0, 41.0, 32.0, 41.0, 31.0, 32.0, 15.0, 30.0, 21.0, 20.0, 23.0, 10.0, 15.0, 8.0, 12.0, 5.0, 6.0, 2.0, 4.0, 4.0, 4.0, 3.0, 4.0, 2.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-346.75, -333.82421875, -320.8984375, -307.97265625, -295.046875, -282.12109375, -269.1953125, -256.26953125, -243.34375, -230.41796875, -217.4921875, -204.56640625, -191.640625, -178.71484375, -165.7890625, -152.86328125, -139.9375, -127.01171875, -114.0859375, -101.16015625, -88.234375, -75.30859375, -62.3828125, -49.45703125, -36.53125, -23.60546875, -10.6796875, 2.24609375, 15.171875, 28.09765625, 41.0234375, 53.94921875, 66.875, 79.80078125, 92.7265625, 105.65234375, 118.578125, 131.50390625, 144.4296875, 157.35546875, 170.28125, 183.20703125, 196.1328125, 209.05859375, 221.984375, 234.91015625, 247.8359375, 260.76171875, 273.6875, 286.61328125, 299.5390625, 312.46484375, 325.390625, 338.31640625, 351.2421875, 364.16796875, 377.09375, 390.01953125, 402.9453125, 415.87109375, 428.796875, 441.72265625, 454.6484375, 467.57421875, 480.5]}, "gradients/decoder.transformer.h.0.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 2.0, 1.0, 3.0, 6.0, 11.0, 12.0, 10.0, 16.0, 20.0, 28.0, 36.0, 31.0, 33.0, 67.0, 72.0, 104.0, 228.0, 455.0, 1349.0, 5840.0, 30688.0, 175504.0, 530958.0, 246988.0, 44813.0, 8196.0, 1781.0, 558.0, 246.0, 131.0, 96.0, 58.0, 57.0, 33.0, 37.0, 18.0, 17.0, 16.0, 10.0, 5.0, 9.0, 4.0, 6.0, 2.0, 4.0, 3.0, 2.0, 4.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-68.75, -66.166015625, -63.58203125, -60.998046875, -58.4140625, -55.830078125, -53.24609375, -50.662109375, -48.078125, -45.494140625, -42.91015625, -40.326171875, -37.7421875, -35.158203125, -32.57421875, -29.990234375, -27.40625, -24.822265625, -22.23828125, -19.654296875, -17.0703125, -14.486328125, -11.90234375, -9.318359375, -6.734375, -4.150390625, -1.56640625, 1.017578125, 3.6015625, 6.185546875, 8.76953125, 11.353515625, 13.9375, 16.521484375, 19.10546875, 21.689453125, 24.2734375, 26.857421875, 29.44140625, 32.025390625, 34.609375, 37.193359375, 39.77734375, 42.361328125, 44.9453125, 47.529296875, 50.11328125, 52.697265625, 55.28125, 57.865234375, 60.44921875, 63.033203125, 65.6171875, 68.201171875, 70.78515625, 73.369140625, 75.953125, 78.537109375, 81.12109375, 83.705078125, 86.2890625, 88.873046875, 91.45703125, 94.041015625, 96.625]}, "gradients/decoder.transformer.h.0.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 4.0, 3.0, 2.0, 2.0, 8.0, 5.0, 16.0, 23.0, 20.0, 30.0, 37.0, 37.0, 50.0, 53.0, 66.0, 65.0, 85.0, 2101.0, 66.0, 67.0, 74.0, 62.0, 40.0, 36.0, 32.0, 17.0, 15.0, 13.0, 8.0, 6.0, 10.0, 3.0, 3.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-844.5, -814.5390625, -784.578125, -754.6171875, -724.65625, -694.6953125, -664.734375, -634.7734375, -604.8125, -574.8515625, -544.890625, -514.9296875, -484.96875, -455.0078125, -425.046875, -395.0859375, -365.125, -335.1640625, -305.203125, -275.2421875, -245.28125, -215.3203125, -185.359375, -155.3984375, -125.4375, -95.4765625, -65.515625, -35.5546875, -5.59375, 24.3671875, 54.328125, 84.2890625, 114.25, 144.2109375, 174.171875, 204.1328125, 234.09375, 264.0546875, 294.015625, 323.9765625, 353.9375, 383.8984375, 413.859375, 443.8203125, 473.78125, 503.7421875, 533.703125, 563.6640625, 593.625, 623.5859375, 653.546875, 683.5078125, 713.46875, 743.4296875, 773.390625, 803.3515625, 833.3125, 863.2734375, 893.234375, 923.1953125, 953.15625, 983.1171875, 1013.078125, 1043.0390625, 1073.0]}, "gradients/decoder.transformer.h.0.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 2.0, 1.0, 0.0, 4.0, 10.0, 7.0, 8.0, 9.0, 19.0, 15.0, 26.0, 48.0, 76.0, 108.0, 204.0, 312.0, 591.0, 1149.0, 2559.0, 9043.0, 61520.0, 2635272.0, 384871.0, 38830.0, 6638.0, 2125.0, 942.0, 534.0, 267.0, 213.0, 117.0, 59.0, 47.0, 25.0, 16.0, 17.0, 10.0, 4.0, 4.0, 0.0, 4.0, 5.0, 2.0, 2.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 2.0], "bins": [-87.8125, -85.2958984375, -82.779296875, -80.2626953125, -77.74609375, -75.2294921875, -72.712890625, -70.1962890625, -67.6796875, -65.1630859375, -62.646484375, -60.1298828125, -57.61328125, -55.0966796875, -52.580078125, -50.0634765625, -47.546875, -45.0302734375, -42.513671875, -39.9970703125, -37.48046875, -34.9638671875, -32.447265625, -29.9306640625, -27.4140625, -24.8974609375, -22.380859375, -19.8642578125, -17.34765625, -14.8310546875, -12.314453125, -9.7978515625, -7.28125, -4.7646484375, -2.248046875, 0.2685546875, 2.78515625, 5.3017578125, 7.818359375, 10.3349609375, 12.8515625, 15.3681640625, 17.884765625, 20.4013671875, 22.91796875, 25.4345703125, 27.951171875, 30.4677734375, 32.984375, 35.5009765625, 38.017578125, 40.5341796875, 43.05078125, 45.5673828125, 48.083984375, 50.6005859375, 53.1171875, 55.6337890625, 58.150390625, 60.6669921875, 63.18359375, 65.7001953125, 68.216796875, 70.7333984375, 73.25]}, "gradients/decoder.transformer.h.0.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0, 0.0, 1.0, 2.0, 3.0, 0.0, 2.0, 0.0, 1.0, 1.0, 0.0, 4.0, 2.0, 3.0, 5.0, 10.0, 10.0, 18.0, 34.0, 57.0, 140.0, 188.0, 228.0, 151.0, 71.0, 32.0, 19.0, 17.0, 5.0, 5.0, 2.0, 1.0, 2.0, 1.0, 1.0, 0.0, 1.0], "bins": [-2878.433349609375, -2819.80078125, -2761.16845703125, -2702.535888671875, -2643.903564453125, -2585.27099609375, -2526.638671875, -2468.006103515625, -2409.373779296875, -2350.7412109375, -2292.10888671875, -2233.476318359375, -2174.843994140625, -2116.21142578125, -2057.5791015625, -1998.9466552734375, -1940.314208984375, -1881.6817626953125, -1823.04931640625, -1764.4168701171875, -1705.784423828125, -1647.1519775390625, -1588.51953125, -1529.8870849609375, -1471.2545166015625, -1412.6220703125, -1353.9896240234375, -1295.357177734375, -1236.7247314453125, -1178.09228515625, -1119.4598388671875, -1060.827392578125, -1002.1949462890625, -943.5625, -884.9300537109375, -826.297607421875, -767.6651611328125, -709.03271484375, -650.4002685546875, -591.767822265625, -533.1353759765625, -474.5029296875, -415.8704833984375, -357.238037109375, -298.6055908203125, -239.97311401367188, -181.34066772460938, -122.70822143554688, -64.07574462890625, -5.443294525146484, 53.18915557861328, 111.82160949707031, 170.4540557861328, 229.08651733398438, 287.7189636230469, 346.3514099121094, 404.9838562011719, 463.6163024902344, 522.248779296875, 580.8812255859375, 639.513671875, 698.1461181640625, 756.778564453125, 815.4110107421875, 874.04345703125]}, "gradients/decoder.transformer.h.0.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 1.0, 2.0, 0.0, 0.0, 1.0, 2.0, 7.0, 2.0, 10.0, 4.0, 9.0, 8.0, 9.0, 5.0, 12.0, 25.0, 27.0, 22.0, 27.0, 25.0, 34.0, 31.0, 36.0, 39.0, 37.0, 41.0, 39.0, 35.0, 35.0, 46.0, 49.0, 40.0, 49.0, 29.0, 32.0, 39.0, 30.0, 23.0, 32.0, 22.0, 22.0, 14.0, 7.0, 12.0, 7.0, 7.0, 7.0, 4.0, 8.0, 6.0, 3.0, 0.0, 0.0, 1.0, 3.0, 0.0, 2.0], "bins": [-1313.2681884765625, -1276.1337890625, -1238.99951171875, -1201.8651123046875, -1164.730712890625, -1127.596435546875, -1090.4620361328125, -1053.32763671875, -1016.193359375, -979.0590209960938, -941.9246215820312, -904.790283203125, -867.6559448242188, -830.5216064453125, -793.38720703125, -756.2528686523438, -719.1184692382812, -681.984130859375, -644.8497314453125, -607.7153930664062, -570.5810546875, -533.4466552734375, -496.31231689453125, -459.177978515625, -422.0436096191406, -384.90924072265625, -347.77490234375, -310.6405334472656, -273.50616455078125, -236.371826171875, -199.23745727539062, -162.10311889648438, -124.96875, -87.83439636230469, -50.700035095214844, -13.565673828125, 23.568679809570312, 60.703033447265625, 97.83740234375, 134.97174072265625, 172.10610961914062, 209.24046325683594, 246.37481689453125, 283.5091857910156, 320.6435546875, 357.77789306640625, 394.9122619628906, 432.0466003417969, 469.18096923828125, 506.3153381347656, 543.44970703125, 580.5840454101562, 617.7183837890625, 654.852783203125, 691.9871215820312, 729.1214599609375, 766.255859375, 803.3901977539062, 840.5245971679688, 877.658935546875, 914.7932739257812, 951.9276123046875, 989.06201171875, 1026.1962890625, 1063.3306884765625]}, "gradients/decoder.transformer.wpe.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 1.0, 4.0, 3.0, 3.0, 3.0, 7.0, 11.0, 8.0, 11.0, 10.0, 18.0, 28.0, 33.0, 40.0, 38.0, 64.0, 80.0, 94.0, 120.0, 160.0, 254.0, 312.0, 433.0, 553.0, 702.0, 1011.0, 1040708.0, 1013.0, 656.0, 532.0, 396.0, 331.0, 182.0, 162.0, 142.0, 108.0, 76.0, 71.0, 52.0, 26.0, 22.0, 18.0, 23.0, 9.0, 8.0, 7.0, 9.0, 3.0, 3.0, 5.0, 2.0, 1.0, 1.0, 1.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-231.0337677001953, -223.18121337890625, -215.32864379882812, -207.47608947753906, -199.62353515625, -191.77096557617188, -183.9184112548828, -176.06585693359375, -168.21328735351562, -160.36073303222656, -152.50816345214844, -144.65560913085938, -136.80303955078125, -128.9504852294922, -121.09793090820312, -113.24536895751953, -105.39280700683594, -97.54024505615234, -89.68768310546875, -81.83512878417969, -73.9825668334961, -66.1300048828125, -58.27744674682617, -50.424888610839844, -42.57232666015625, -34.719764709472656, -26.867206573486328, -19.014646530151367, -11.162086486816406, -3.3095245361328125, 4.543033599853516, 12.395591735839844, 20.2481689453125, 28.10072898864746, 35.95328903198242, 43.80584716796875, 51.658409118652344, 59.51097106933594, 67.363525390625, 75.2160873413086, 83.06864929199219, 90.92121124267578, 98.77377319335938, 106.62632751464844, 114.47888946533203, 122.33145141601562, 130.1840057373047, 138.03656005859375, 145.88912963867188, 153.74168395996094, 161.59425354003906, 169.44680786132812, 177.29937744140625, 185.1519317626953, 193.00448608398438, 200.8570556640625, 208.70960998535156, 216.56216430664062, 224.41473388671875, 232.2672882080078, 240.11984252929688, 247.972412109375, 255.82496643066406, 263.6775207519531, 271.53009033203125]}, "gradients/decoder.transformer.wte.weight": {"_type": "histogram", "values": [1.0, 2.0, 3.0, 2.0, 0.0, 1.0, 1.0, 2.0, 5.0, 4.0, 11.0, 12.0, 11.0, 16.0, 16.0, 35.0, 55.0, 119.0, 597.0, 16775.0, 51444104.0, 1001.0, 165.0, 57.0, 33.0, 17.0, 20.0, 23.0, 16.0, 8.0, 8.0, 7.0, 6.0, 16.0, 2.0, 5.0, 8.0, 2.0, 0.0, 1.0, 4.0, 0.0, 5.0, 1.0, 3.0, 2.0, 2.0, 2.0, 3.0, 3.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0], "bins": [-1648.140380859375, -1566.515380859375, -1484.8905029296875, -1403.2655029296875, -1321.640625, -1240.015625, -1158.390625, -1076.7657470703125, -995.1408081054688, -913.515869140625, -831.8909301757812, -750.2659912109375, -668.6409912109375, -587.01611328125, -505.39111328125, -423.76617431640625, -342.1412353515625, -260.51629638671875, -178.89134216308594, -97.26638793945312, -15.641448974609375, 65.98348999023438, 147.60845947265625, 229.2333984375, 310.85833740234375, 392.4832763671875, 474.10821533203125, 555.733154296875, 637.358154296875, 718.9830322265625, 800.6080322265625, 882.2329711914062, 963.85791015625, 1045.48291015625, 1127.1077880859375, 1208.7327880859375, 1290.357666015625, 1371.982666015625, 1453.607666015625, 1535.2325439453125, 1616.857421875, 1698.482421875, 1780.1072998046875, 1861.7322998046875, 1943.357177734375, 2024.982177734375, 2106.607177734375, 2188.23193359375, 2269.85693359375, 2351.48193359375, 2433.10693359375, 2514.731689453125, 2596.356689453125, 2677.981689453125, 2759.606689453125, 2841.2314453125, 2922.856689453125, 3004.481689453125, 3086.106689453125, 3167.7314453125, 3249.3564453125, 3330.9814453125, 3412.6064453125, 3494.2314453125, 3575.856201171875]}, "gradients/encoder.adapter.layers.2.conv.weight": {"_type": "histogram", "values": [2.0, 7.0, 5.0, 15.0, 12.0, 27.0, 40.0, 43.0, 67.0, 97.0, 156.0, 206.0, 283.0, 500.0, 680.0, 882.0, 1387.0, 2068.0, 3002.0, 4231.0, 6155.0, 8993.0, 13003.0, 18767.0, 29140.0, 43629.0, 67243.0, 106293.0, 176591.0, 318656.0, 1713297.0, 2971185.0, 319959.0, 177262.0, 107114.0, 67336.0, 43143.0, 28917.0, 19275.0, 12882.0, 9101.0, 6042.0, 4187.0, 2847.0, 2004.0, 1532.0, 1013.0, 686.0, 484.0, 329.0, 240.0, 153.0, 105.0, 55.0, 40.0, 26.0, 26.0, 11.0, 12.0, 3.0, 3.0, 4.0, 1.0, 2.0], "bins": [-3.765625, -3.6441650390625, -3.522705078125, -3.4012451171875, -3.27978515625, -3.1583251953125, -3.036865234375, -2.9154052734375, -2.7939453125, -2.6724853515625, -2.551025390625, -2.4295654296875, -2.30810546875, -2.1866455078125, -2.065185546875, -1.9437255859375, -1.822265625, -1.7008056640625, -1.579345703125, -1.4578857421875, -1.33642578125, -1.2149658203125, -1.093505859375, -0.9720458984375, -0.8505859375, -0.7291259765625, -0.607666015625, -0.4862060546875, -0.36474609375, -0.2432861328125, -0.121826171875, -0.0003662109375, 0.12109375, 0.2425537109375, 0.364013671875, 0.4854736328125, 0.60693359375, 0.7283935546875, 0.849853515625, 0.9713134765625, 1.0927734375, 1.2142333984375, 1.335693359375, 1.4571533203125, 1.57861328125, 1.7000732421875, 1.821533203125, 1.9429931640625, 2.064453125, 2.1859130859375, 2.307373046875, 2.4288330078125, 2.55029296875, 2.6717529296875, 2.793212890625, 2.9146728515625, 3.0361328125, 3.1575927734375, 3.279052734375, 3.4005126953125, 3.52197265625, 3.6434326171875, 3.764892578125, 3.8863525390625, 4.0078125]}, "gradients/encoder.adapter.layers.2.conv.bias": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 2.0, 2.0, 2.0, 3.0, 6.0, 2.0, 0.0, 6.0, 5.0, 10.0, 10.0, 6.0, 7.0, 12.0, 15.0, 22.0, 21.0, 22.0, 22.0, 24.0, 26.0, 33.0, 37.0, 32.0, 42.0, 47.0, 45.0, 42.0, 1063.0, 43.0, 30.0, 45.0, 31.0, 40.0, 32.0, 31.0, 40.0, 33.0, 22.0, 15.0, 18.0, 19.0, 9.0, 13.0, 9.0, 7.0, 10.0, 9.0, 4.0, 5.0, 4.0, 1.0, 2.0, 1.0, 0.0, 3.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-199.375, -192.974609375, -186.57421875, -180.173828125, -173.7734375, -167.373046875, -160.97265625, -154.572265625, -148.171875, -141.771484375, -135.37109375, -128.970703125, -122.5703125, -116.169921875, -109.76953125, -103.369140625, -96.96875, -90.568359375, -84.16796875, -77.767578125, -71.3671875, -64.966796875, -58.56640625, -52.166015625, -45.765625, -39.365234375, -32.96484375, -26.564453125, -20.1640625, -13.763671875, -7.36328125, -0.962890625, 5.4375, 11.837890625, 18.23828125, 24.638671875, 31.0390625, 37.439453125, 43.83984375, 50.240234375, 56.640625, 63.041015625, 69.44140625, 75.841796875, 82.2421875, 88.642578125, 95.04296875, 101.443359375, 107.84375, 114.244140625, 120.64453125, 127.044921875, 133.4453125, 139.845703125, 146.24609375, 152.646484375, 159.046875, 165.447265625, 171.84765625, 178.248046875, 184.6484375, 191.048828125, 197.44921875, 203.849609375, 210.25]}, "gradients/encoder.adapter.layers.1.conv.weight": {"_type": "histogram", "values": [3.0, 0.0, 2.0, 1.0, 4.0, 4.0, 6.0, 11.0, 12.0, 12.0, 30.0, 28.0, 48.0, 73.0, 107.0, 160.0, 220.0, 344.0, 511.0, 726.0, 1174.0, 1654.0, 2769.0, 4236.0, 6792.0, 10884.0, 17713.0, 29792.0, 50796.0, 85809.0, 148249.0, 266786.0, 544002.0, 4184614.0, 417005.0, 215454.0, 121845.0, 71093.0, 42070.0, 25427.0, 15379.0, 9312.0, 5981.0, 3584.0, 2345.0, 1453.0, 979.0, 640.0, 414.0, 284.0, 220.0, 118.0, 80.0, 57.0, 52.0, 22.0, 16.0, 19.0, 12.0, 7.0, 4.0, 3.0, 3.0, 6.0], "bins": [-5.73046875, -5.55853271484375, -5.3865966796875, -5.21466064453125, -5.042724609375, -4.87078857421875, -4.6988525390625, -4.52691650390625, -4.35498046875, -4.18304443359375, -4.0111083984375, -3.83917236328125, -3.667236328125, -3.49530029296875, -3.3233642578125, -3.15142822265625, -2.9794921875, -2.80755615234375, -2.6356201171875, -2.46368408203125, -2.291748046875, -2.11981201171875, -1.9478759765625, -1.77593994140625, -1.60400390625, -1.43206787109375, -1.2601318359375, -1.08819580078125, -0.916259765625, -0.74432373046875, -0.5723876953125, -0.40045166015625, -0.228515625, -0.05657958984375, 0.1153564453125, 0.28729248046875, 0.459228515625, 0.63116455078125, 0.8031005859375, 0.97503662109375, 1.14697265625, 1.31890869140625, 1.4908447265625, 1.66278076171875, 1.834716796875, 2.00665283203125, 2.1785888671875, 2.35052490234375, 2.5224609375, 2.69439697265625, 2.8663330078125, 3.03826904296875, 3.210205078125, 3.38214111328125, 3.5540771484375, 3.72601318359375, 3.89794921875, 4.06988525390625, 4.2418212890625, 4.41375732421875, 4.585693359375, 4.75762939453125, 4.9295654296875, 5.10150146484375, 5.2734375]}, "gradients/encoder.adapter.layers.1.conv.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 7.0, 3.0, 2.0, 2.0, 7.0, 10.0, 4.0, 5.0, 20.0, 18.0, 27.0, 22.0, 30.0, 34.0, 37.0, 41.0, 40.0, 48.0, 55.0, 44.0, 43.0, 1072.0, 39.0, 38.0, 43.0, 43.0, 35.0, 37.0, 28.0, 32.0, 32.0, 26.0, 23.0, 18.0, 16.0, 16.0, 14.0, 7.0, 8.0, 3.0, 4.0, 0.0, 3.0, 1.0, 3.0, 0.0, 1.0, 0.0, 1.0], "bins": [-377.5, -367.13671875, -356.7734375, -346.41015625, -336.046875, -325.68359375, -315.3203125, -304.95703125, -294.59375, -284.23046875, -273.8671875, -263.50390625, -253.140625, -242.77734375, -232.4140625, -222.05078125, -211.6875, -201.32421875, -190.9609375, -180.59765625, -170.234375, -159.87109375, -149.5078125, -139.14453125, -128.78125, -118.41796875, -108.0546875, -97.69140625, -87.328125, -76.96484375, -66.6015625, -56.23828125, -45.875, -35.51171875, -25.1484375, -14.78515625, -4.421875, 5.94140625, 16.3046875, 26.66796875, 37.03125, 47.39453125, 57.7578125, 68.12109375, 78.484375, 88.84765625, 99.2109375, 109.57421875, 119.9375, 130.30078125, 140.6640625, 151.02734375, 161.390625, 171.75390625, 182.1171875, 192.48046875, 202.84375, 213.20703125, 223.5703125, 233.93359375, 244.296875, 254.66015625, 265.0234375, 275.38671875, 285.75]}, "gradients/encoder.adapter.layers.0.conv.weight": {"_type": "histogram", "values": [6.0, 11.0, 2.0, 17.0, 16.0, 11.0, 23.0, 39.0, 28.0, 78.0, 59.0, 132.0, 158.0, 217.0, 282.0, 326.0, 461.0, 709.0, 938.0, 1202.0, 1631.0, 2263.0, 3472.0, 4928.0, 7633.0, 11913.0, 20263.0, 37653.0, 99518.0, 950460.0, 4924199.0, 120708.0, 42276.0, 21498.0, 12615.0, 7889.0, 5230.0, 3636.0, 2346.0, 1802.0, 1226.0, 922.0, 698.0, 455.0, 430.0, 304.0, 228.0, 123.0, 114.0, 82.0, 51.0, 59.0, 38.0, 19.0, 16.0, 24.0, 4.0, 0.0, 3.0, 0.0, 3.0, 0.0, 6.0, 3.0], "bins": [-9.6796875, -9.3577880859375, -9.035888671875, -8.7139892578125, -8.39208984375, -8.0701904296875, -7.748291015625, -7.4263916015625, -7.1044921875, -6.7825927734375, -6.460693359375, -6.1387939453125, -5.81689453125, -5.4949951171875, -5.173095703125, -4.8511962890625, -4.529296875, -4.2073974609375, -3.885498046875, -3.5635986328125, -3.24169921875, -2.9197998046875, -2.597900390625, -2.2760009765625, -1.9541015625, -1.6322021484375, -1.310302734375, -0.9884033203125, -0.66650390625, -0.3446044921875, -0.022705078125, 0.2991943359375, 0.62109375, 0.9429931640625, 1.264892578125, 1.5867919921875, 1.90869140625, 2.2305908203125, 2.552490234375, 2.8743896484375, 3.1962890625, 3.5181884765625, 3.840087890625, 4.1619873046875, 4.48388671875, 4.8057861328125, 5.127685546875, 5.4495849609375, 5.771484375, 6.0933837890625, 6.415283203125, 6.7371826171875, 7.05908203125, 7.3809814453125, 7.702880859375, 8.0247802734375, 8.3466796875, 8.6685791015625, 8.990478515625, 9.3123779296875, 9.63427734375, 9.9561767578125, 10.278076171875, 10.5999755859375, 10.921875]}, "gradients/encoder.adapter.layers.0.conv.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 2.0, 3.0, 1.0, 2.0, 5.0, 2.0, 8.0, 7.0, 11.0, 7.0, 19.0, 14.0, 6.0, 13.0, 16.0, 15.0, 31.0, 19.0, 33.0, 39.0, 26.0, 30.0, 35.0, 31.0, 52.0, 29.0, 47.0, 1042.0, 40.0, 41.0, 40.0, 29.0, 40.0, 29.0, 41.0, 26.0, 30.0, 36.0, 21.0, 15.0, 19.0, 13.0, 16.0, 14.0, 6.0, 8.0, 7.0, 4.0, 8.0, 1.0, 3.0, 3.0, 5.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 1.0], "bins": [-306.5, -296.30859375, -286.1171875, -275.92578125, -265.734375, -255.54296875, -245.3515625, -235.16015625, -224.96875, -214.77734375, -204.5859375, -194.39453125, -184.203125, -174.01171875, -163.8203125, -153.62890625, -143.4375, -133.24609375, -123.0546875, -112.86328125, -102.671875, -92.48046875, -82.2890625, -72.09765625, -61.90625, -51.71484375, -41.5234375, -31.33203125, -21.140625, -10.94921875, -0.7578125, 9.43359375, 19.625, 29.81640625, 40.0078125, 50.19921875, 60.390625, 70.58203125, 80.7734375, 90.96484375, 101.15625, 111.34765625, 121.5390625, 131.73046875, 141.921875, 152.11328125, 162.3046875, 172.49609375, 182.6875, 192.87890625, 203.0703125, 213.26171875, 223.453125, 233.64453125, 243.8359375, 254.02734375, 264.21875, 274.41015625, 284.6015625, 294.79296875, 304.984375, 315.17578125, 325.3671875, 335.55859375, 345.75]}, "gradients/encoder.encoder.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 3.0, 3.0, 7.0, 4.0, 4.0, 4.0, 10.0, 27.0, 53.0, 655.0, 141.0, 49.0, 26.0, 12.0, 9.0, 2.0, 2.0, 1.0, 1.0, 2.0, 0.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6326.95068359375, -6150.32568359375, -5973.7001953125, -5797.0751953125, -5620.44970703125, -5443.82470703125, -5267.19921875, -5090.57421875, -4913.94921875, -4737.32421875, -4560.69873046875, -4384.07373046875, -4207.4482421875, -4030.8232421875, -3854.197998046875, -3677.57275390625, -3500.947265625, -3324.322021484375, -3147.69677734375, -2971.07177734375, -2794.4462890625, -2617.8212890625, -2441.196044921875, -2264.57080078125, -2087.945556640625, -1911.3203125, -1734.695068359375, -1558.0699462890625, -1381.4447021484375, -1204.8194580078125, -1028.1943359375, -851.569091796875, -674.943359375, -498.3181457519531, -321.69293212890625, -145.0677490234375, 31.5574951171875, 208.1827392578125, 384.807861328125, 561.43310546875, 738.058349609375, 914.68359375, 1091.308837890625, 1267.9339599609375, 1444.5592041015625, 1621.1844482421875, 1797.8095703125, 1974.434814453125, 2151.06005859375, 2327.685302734375, 2504.310546875, 2680.935546875, 2857.56103515625, 3034.18603515625, 3210.811279296875, 3387.4365234375, 3564.061767578125, 3740.68701171875, 3917.312255859375, 4093.9375, 4270.5625, 4447.18798828125, 4623.81298828125, 4800.4384765625, 4977.0634765625]}, "gradients/encoder.encoder.layer_norm.bias": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 1.0, 1.0, 4.0, 0.0, 6.0, 3.0, 7.0, 8.0, 11.0, 8.0, 9.0, 13.0, 13.0, 18.0, 18.0, 24.0, 27.0, 31.0, 40.0, 46.0, 55.0, 62.0, 61.0, 63.0, 49.0, 60.0, 52.0, 56.0, 38.0, 36.0, 27.0, 24.0, 27.0, 19.0, 15.0, 12.0, 21.0, 11.0, 3.0, 7.0, 8.0, 5.0, 2.0, 5.0, 0.0, 4.0, 1.0, 3.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1449.81494140625, -1395.9993896484375, -1342.1837158203125, -1288.3681640625, -1234.552490234375, -1180.7369384765625, -1126.92138671875, -1073.105712890625, -1019.2901000976562, -965.4744873046875, -911.6588745117188, -857.84326171875, -804.0277099609375, -750.2120361328125, -696.396484375, -642.5808715820312, -588.7652587890625, -534.9496459960938, -481.134033203125, -427.3184509277344, -373.5028381347656, -319.6872253417969, -265.87164306640625, -212.0560302734375, -158.24041748046875, -104.42481231689453, -50.60920715332031, 3.206390380859375, 57.022003173828125, 110.83761596679688, 164.6531982421875, 218.46881103515625, 272.284423828125, 326.10003662109375, 379.9156494140625, 433.7312316894531, 487.5468444824219, 541.3624267578125, 595.1780395507812, 648.99365234375, 702.8092651367188, 756.6248779296875, 810.4404907226562, 864.256103515625, 918.0716552734375, 971.8873291015625, 1025.702880859375, 1079.5185546875, 1133.3341064453125, 1187.149658203125, 1240.96533203125, 1294.7808837890625, 1348.5965576171875, 1402.412109375, 1456.227783203125, 1510.0433349609375, 1563.85888671875, 1617.6744384765625, 1671.4901123046875, 1725.3056640625, 1779.121337890625, 1832.9368896484375, 1886.75244140625, 1940.568115234375, 1994.3837890625]}, "gradients/encoder.encoder.layers.23.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 7.0, 4.0, 8.0, 8.0, 5.0, 28.0, 48.0, 54.0, 71.0, 67.0, 125.0, 167.0, 207.0, 236.0, 303.0, 380.0, 431.0, 527.0, 654.0, 795.0, 955.0, 1169.0, 1482.0, 1919.0, 2420.0, 3395.0, 5995.0, 20111.0, 4130412.0, 6881.0, 4040.0, 3037.0, 2221.0, 1717.0, 1327.0, 905.0, 678.0, 426.0, 298.0, 230.0, 150.0, 106.0, 69.0, 43.0, 37.0, 27.0, 20.0, 19.0, 12.0, 15.0, 10.0, 18.0, 4.0, 4.0, 4.0, 6.0, 5.0, 7.0, 1.0, 0.0, 1.0], "bins": [-7.6328125, -7.3878173828125, -7.142822265625, -6.8978271484375, -6.65283203125, -6.4078369140625, -6.162841796875, -5.9178466796875, -5.6728515625, -5.4278564453125, -5.182861328125, -4.9378662109375, -4.69287109375, -4.4478759765625, -4.202880859375, -3.9578857421875, -3.712890625, -3.4678955078125, -3.222900390625, -2.9779052734375, -2.73291015625, -2.4879150390625, -2.242919921875, -1.9979248046875, -1.7529296875, -1.5079345703125, -1.262939453125, -1.0179443359375, -0.77294921875, -0.5279541015625, -0.282958984375, -0.0379638671875, 0.20703125, 0.4520263671875, 0.697021484375, 0.9420166015625, 1.18701171875, 1.4320068359375, 1.677001953125, 1.9219970703125, 2.1669921875, 2.4119873046875, 2.656982421875, 2.9019775390625, 3.14697265625, 3.3919677734375, 3.636962890625, 3.8819580078125, 4.126953125, 4.3719482421875, 4.616943359375, 4.8619384765625, 5.10693359375, 5.3519287109375, 5.596923828125, 5.8419189453125, 6.0869140625, 6.3319091796875, 6.576904296875, 6.8218994140625, 7.06689453125, 7.3118896484375, 7.556884765625, 7.8018798828125, 8.046875]}, "gradients/encoder.encoder.layers.23.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 0.0, 1.0, 3.0, 2.0, 3.0, 3.0, 4.0, 3.0, 2.0, 6.0, 5.0, 5.0, 4.0, 11.0, 12.0, 21.0, 42.0, 789.0, 13.0, 16.0, 4.0, 11.0, 8.0, 4.0, 2.0, 7.0, 7.0, 7.0, 0.0, 3.0, 2.0, 2.0, 2.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.57763671875, -0.5590896606445312, -0.5405426025390625, -0.5219955444335938, -0.503448486328125, -0.48490142822265625, -0.4663543701171875, -0.44780731201171875, -0.42926025390625, -0.41071319580078125, -0.3921661376953125, -0.37361907958984375, -0.355072021484375, -0.33652496337890625, -0.3179779052734375, -0.29943084716796875, -0.2808837890625, -0.26233673095703125, -0.2437896728515625, -0.22524261474609375, -0.206695556640625, -0.18814849853515625, -0.1696014404296875, -0.15105438232421875, -0.13250732421875, -0.11396026611328125, -0.0954132080078125, -0.07686614990234375, -0.058319091796875, -0.03977203369140625, -0.0212249755859375, -0.00267791748046875, 0.015869140625, 0.03441619873046875, 0.0529632568359375, 0.07151031494140625, 0.090057373046875, 0.10860443115234375, 0.1271514892578125, 0.14569854736328125, 0.16424560546875, 0.18279266357421875, 0.2013397216796875, 0.21988677978515625, 0.238433837890625, 0.25698089599609375, 0.2755279541015625, 0.29407501220703125, 0.3126220703125, 0.33116912841796875, 0.3497161865234375, 0.36826324462890625, 0.386810302734375, 0.40535736083984375, 0.4239044189453125, 0.44245147705078125, 0.46099853515625, 0.47954559326171875, 0.4980926513671875, 0.5166397094726562, 0.535186767578125, 0.5537338256835938, 0.5722808837890625, 0.5908279418945312, 0.609375]}, "gradients/encoder.encoder.layers.23.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 4.0, 6.0, 6.0, 6.0, 14.0, 14.0, 23.0, 25.0, 42.0, 48.0, 84.0, 110.0, 165.0, 230.0, 330.0, 523.0, 762.0, 1230.0, 1768.0, 2923.0, 5035.0, 9398.0, 23877.0, 3993425.0, 115839.0, 18680.0, 7848.0, 4385.0, 2518.0, 1640.0, 1130.0, 744.0, 489.0, 311.0, 200.0, 153.0, 98.0, 54.0, 50.0, 35.0, 15.0, 13.0, 11.0, 9.0, 8.0, 5.0, 3.0, 4.0, 4.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 1.0], "bins": [-2.630859375, -2.545654296875, -2.46044921875, -2.375244140625, -2.2900390625, -2.204833984375, -2.11962890625, -2.034423828125, -1.94921875, -1.864013671875, -1.77880859375, -1.693603515625, -1.6083984375, -1.523193359375, -1.43798828125, -1.352783203125, -1.267578125, -1.182373046875, -1.09716796875, -1.011962890625, -0.9267578125, -0.841552734375, -0.75634765625, -0.671142578125, -0.5859375, -0.500732421875, -0.41552734375, -0.330322265625, -0.2451171875, -0.159912109375, -0.07470703125, 0.010498046875, 0.095703125, 0.180908203125, 0.26611328125, 0.351318359375, 0.4365234375, 0.521728515625, 0.60693359375, 0.692138671875, 0.77734375, 0.862548828125, 0.94775390625, 1.032958984375, 1.1181640625, 1.203369140625, 1.28857421875, 1.373779296875, 1.458984375, 1.544189453125, 1.62939453125, 1.714599609375, 1.7998046875, 1.885009765625, 1.97021484375, 2.055419921875, 2.140625, 2.225830078125, 2.31103515625, 2.396240234375, 2.4814453125, 2.566650390625, 2.65185546875, 2.737060546875, 2.822265625]}, "gradients/encoder.encoder.layers.23.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 3.0, 2.0, 2.0, 1.0, 2.0, 4.0, 3.0, 2.0, 4.0, 7.0, 5.0, 5.0, 5.0, 4.0, 9.0, 9.0, 9.0, 10.0, 7.0, 6.0, 14.0, 6.0, 11.0, 40.0, 233.0, 3466.0, 57.0, 18.0, 13.0, 14.0, 10.0, 18.0, 7.0, 8.0, 8.0, 10.0, 4.0, 6.0, 6.0, 1.0, 5.0, 3.0, 5.0, 9.0, 8.0, 0.0, 3.0, 2.0, 2.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.86328125, -0.83721923828125, -0.8111572265625, -0.78509521484375, -0.759033203125, -0.73297119140625, -0.7069091796875, -0.68084716796875, -0.65478515625, -0.62872314453125, -0.6026611328125, -0.57659912109375, -0.550537109375, -0.52447509765625, -0.4984130859375, -0.47235107421875, -0.4462890625, -0.42022705078125, -0.3941650390625, -0.36810302734375, -0.342041015625, -0.31597900390625, -0.2899169921875, -0.26385498046875, -0.23779296875, -0.21173095703125, -0.1856689453125, -0.15960693359375, -0.133544921875, -0.10748291015625, -0.0814208984375, -0.05535888671875, -0.029296875, -0.00323486328125, 0.0228271484375, 0.04888916015625, 0.074951171875, 0.10101318359375, 0.1270751953125, 0.15313720703125, 0.17919921875, 0.20526123046875, 0.2313232421875, 0.25738525390625, 0.283447265625, 0.30950927734375, 0.3355712890625, 0.36163330078125, 0.3876953125, 0.41375732421875, 0.4398193359375, 0.46588134765625, 0.491943359375, 0.51800537109375, 0.5440673828125, 0.57012939453125, 0.59619140625, 0.62225341796875, 0.6483154296875, 0.67437744140625, 0.700439453125, 0.72650146484375, 0.7525634765625, 0.77862548828125, 0.8046875]}, "gradients/encoder.encoder.layers.23.final_layer_norm.weight": {"_type": "histogram", "values": [3.0, 2.0, 4.0, 4.0, 9.0, 17.0, 55.0, 637.0, 185.0, 48.0, 22.0, 11.0, 16.0, 2.0, 2.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.2579293251037598, -1.0959107875823975, -0.9338922500610352, -0.7718737125396729, -0.6098551750183105, -0.44783663749694824, -0.28581809997558594, -0.12379956245422363, 0.03821897506713867, 0.20023751258850098, 0.3622560501098633, 0.5242745876312256, 0.6862931251525879, 0.8483116626739502, 1.0103302001953125, 1.1723487377166748, 1.334367275238037, 1.4963858127593994, 1.6584043502807617, 1.820422887802124, 1.9824414253234863, 2.1444599628448486, 2.306478500366211, 2.4684970378875732, 2.6305155754089355, 2.792534112930298, 2.95455265045166, 3.1165711879730225, 3.2785897254943848, 3.440608263015747, 3.6026268005371094, 3.7646453380584717, 3.926664352416992, 4.088683128356934, 4.250701427459717, 4.4127197265625, 4.574738502502441, 4.736757278442383, 4.898775577545166, 5.060793876647949, 5.222812652587891, 5.384831428527832, 5.546849727630615, 5.708868026733398, 5.87088680267334, 6.032905578613281, 6.1949238777160645, 6.356942176818848, 6.518960952758789, 6.6809797286987305, 6.842998027801514, 7.005016326904297, 7.167035102844238, 7.32905387878418, 7.491072177886963, 7.653090476989746, 7.8151092529296875, 7.977128028869629, 8.13914680480957, 8.301164627075195, 8.463183403015137, 8.625202178955078, 8.787220001220703, 8.949238777160645, 9.111257553100586]}, "gradients/encoder.encoder.layers.23.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 4.0, 2.0, 0.0, 2.0, 0.0, 4.0, 3.0, 2.0, 2.0, 2.0, 5.0, 6.0, 14.0, 12.0, 27.0, 38.0, 50.0, 51.0, 78.0, 101.0, 111.0, 108.0, 108.0, 74.0, 64.0, 24.0, 30.0, 21.0, 16.0, 14.0, 9.0, 6.0, 5.0, 4.0, 2.0, 0.0, 4.0, 2.0, 4.0, 0.0, 2.0, 1.0, 0.0, 2.0, 2.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.962585210800171, -3.838390350341797, -3.714195489883423, -3.590000629425049, -3.4658055305480957, -3.3416106700897217, -3.2174158096313477, -3.0932209491729736, -2.9690260887145996, -2.8448312282562256, -2.7206363677978516, -2.5964412689208984, -2.4722464084625244, -2.3480515480041504, -2.2238566875457764, -2.0996618270874023, -1.9754667282104492, -1.8512718677520752, -1.7270768880844116, -1.6028820276260376, -1.478687047958374, -1.3544921875, -1.230297327041626, -1.106102466583252, -0.9819074869155884, -0.8577125668525696, -0.7335176467895508, -0.6093227863311768, -0.48512786626815796, -0.36093294620513916, -0.23673808574676514, -0.11254316568374634, 0.011651754379272461, 0.13584665954113007, 0.26004156470298767, 0.3842364549636841, 0.5084313750267029, 0.6326262950897217, 0.7568211555480957, 0.8810160756111145, 1.0052109956741333, 1.1294058561325073, 1.253600835800171, 1.377795696258545, 1.501990556716919, 1.6261855363845825, 1.7503803968429565, 1.8745753765106201, 1.9987702369689941, 2.122965097427368, 2.247159957885742, 2.3713550567626953, 2.4955499172210693, 2.6197447776794434, 2.7439396381378174, 2.8681344985961914, 2.9923295974731445, 3.1165244579315186, 3.2407193183898926, 3.3649144172668457, 3.4891092777252197, 3.6133041381835938, 3.7374989986419678, 3.861693859100342, 3.985888719558716]}, "gradients/encoder.encoder.layers.23.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 3.0, 3.0, 1.0, 5.0, 3.0, 7.0, 10.0, 12.0, 10.0, 25.0, 17.0, 35.0, 53.0, 75.0, 96.0, 161.0, 214.0, 296.0, 505.0, 787.0, 1194.0, 1963.0, 3321.0, 6012.0, 11997.0, 35574.0, 938404.0, 24860.0, 10001.0, 5311.0, 2864.0, 1745.0, 1015.0, 633.0, 448.0, 283.0, 184.0, 134.0, 72.0, 65.0, 52.0, 35.0, 27.0, 16.0, 18.0, 7.0, 3.0, 3.0, 6.0, 5.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-2.75, -2.662353515625, -2.57470703125, -2.487060546875, -2.3994140625, -2.311767578125, -2.22412109375, -2.136474609375, -2.048828125, -1.961181640625, -1.87353515625, -1.785888671875, -1.6982421875, -1.610595703125, -1.52294921875, -1.435302734375, -1.34765625, -1.260009765625, -1.17236328125, -1.084716796875, -0.9970703125, -0.909423828125, -0.82177734375, -0.734130859375, -0.646484375, -0.558837890625, -0.47119140625, -0.383544921875, -0.2958984375, -0.208251953125, -0.12060546875, -0.032958984375, 0.0546875, 0.142333984375, 0.22998046875, 0.317626953125, 0.4052734375, 0.492919921875, 0.58056640625, 0.668212890625, 0.755859375, 0.843505859375, 0.93115234375, 1.018798828125, 1.1064453125, 1.194091796875, 1.28173828125, 1.369384765625, 1.45703125, 1.544677734375, 1.63232421875, 1.719970703125, 1.8076171875, 1.895263671875, 1.98291015625, 2.070556640625, 2.158203125, 2.245849609375, 2.33349609375, 2.421142578125, 2.5087890625, 2.596435546875, 2.68408203125, 2.771728515625, 2.859375]}, "gradients/encoder.encoder.layers.23.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 0.0, 2.0, 1.0, 1.0, 2.0, 2.0, 2.0, 0.0, 1.0, 3.0, 3.0, 2.0, 3.0, 4.0, 3.0, 2.0, 6.0, 4.0, 6.0, 5.0, 8.0, 12.0, 19.0, 34.0, 796.0, 15.0, 17.0, 5.0, 10.0, 8.0, 3.0, 4.0, 7.0, 5.0, 9.0, 1.0, 2.0, 2.0, 2.0, 2.0, 2.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.56787109375, -0.5497665405273438, -0.5316619873046875, -0.5135574340820312, -0.495452880859375, -0.47734832763671875, -0.4592437744140625, -0.44113922119140625, -0.42303466796875, -0.40493011474609375, -0.3868255615234375, -0.36872100830078125, -0.350616455078125, -0.33251190185546875, -0.3144073486328125, -0.29630279541015625, -0.2781982421875, -0.26009368896484375, -0.2419891357421875, -0.22388458251953125, -0.205780029296875, -0.18767547607421875, -0.1695709228515625, -0.15146636962890625, -0.13336181640625, -0.11525726318359375, -0.0971527099609375, -0.07904815673828125, -0.060943603515625, -0.04283905029296875, -0.0247344970703125, -0.00662994384765625, 0.011474609375, 0.02957916259765625, 0.0476837158203125, 0.06578826904296875, 0.083892822265625, 0.10199737548828125, 0.1201019287109375, 0.13820648193359375, 0.15631103515625, 0.17441558837890625, 0.1925201416015625, 0.21062469482421875, 0.228729248046875, 0.24683380126953125, 0.2649383544921875, 0.28304290771484375, 0.3011474609375, 0.31925201416015625, 0.3373565673828125, 0.35546112060546875, 0.373565673828125, 0.39167022705078125, 0.4097747802734375, 0.42787933349609375, 0.44598388671875, 0.46408843994140625, 0.4821929931640625, 0.5002975463867188, 0.518402099609375, 0.5365066528320312, 0.5546112060546875, 0.5727157592773438, 0.5908203125]}, "gradients/encoder.encoder.layers.23.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 5.0, 2.0, 2.0, 5.0, 16.0, 11.0, 13.0, 24.0, 36.0, 42.0, 57.0, 64.0, 95.0, 125.0, 205.0, 284.0, 413.0, 651.0, 951.0, 1524.0, 2523.0, 4077.0, 6957.0, 12966.0, 26876.0, 76159.0, 606247.0, 214813.0, 48494.0, 19907.0, 10000.0, 5632.0, 3400.0, 2096.0, 1359.0, 847.0, 527.0, 330.0, 226.0, 171.0, 107.0, 81.0, 58.0, 44.0, 43.0, 25.0, 20.0, 11.0, 13.0, 12.0, 5.0, 10.0, 3.0, 2.0, 3.0, 2.0], "bins": [-1.876953125, -1.823028564453125, -1.76910400390625, -1.715179443359375, -1.6612548828125, -1.607330322265625, -1.55340576171875, -1.499481201171875, -1.445556640625, -1.391632080078125, -1.33770751953125, -1.283782958984375, -1.2298583984375, -1.175933837890625, -1.12200927734375, -1.068084716796875, -1.01416015625, -0.960235595703125, -0.90631103515625, -0.852386474609375, -0.7984619140625, -0.744537353515625, -0.69061279296875, -0.636688232421875, -0.582763671875, -0.528839111328125, -0.47491455078125, -0.420989990234375, -0.3670654296875, -0.313140869140625, -0.25921630859375, -0.205291748046875, -0.1513671875, -0.097442626953125, -0.04351806640625, 0.010406494140625, 0.0643310546875, 0.118255615234375, 0.17218017578125, 0.226104736328125, 0.280029296875, 0.333953857421875, 0.38787841796875, 0.441802978515625, 0.4957275390625, 0.549652099609375, 0.60357666015625, 0.657501220703125, 0.71142578125, 0.765350341796875, 0.81927490234375, 0.873199462890625, 0.9271240234375, 0.981048583984375, 1.03497314453125, 1.088897705078125, 1.142822265625, 1.196746826171875, 1.25067138671875, 1.304595947265625, 1.3585205078125, 1.412445068359375, 1.46636962890625, 1.520294189453125, 1.57421875]}, "gradients/encoder.encoder.layers.23.attention.v_proj.bias": {"_type": "histogram", "values": [2.0, 3.0, 2.0, 1.0, 6.0, 1.0, 6.0, 4.0, 2.0, 13.0, 7.0, 12.0, 8.0, 9.0, 12.0, 23.0, 13.0, 19.0, 23.0, 30.0, 33.0, 26.0, 23.0, 26.0, 34.0, 44.0, 47.0, 40.0, 29.0, 41.0, 35.0, 39.0, 37.0, 46.0, 33.0, 36.0, 21.0, 26.0, 17.0, 27.0, 29.0, 20.0, 18.0, 17.0, 13.0, 10.0, 9.0, 11.0, 9.0, 8.0, 6.0, 7.0, 0.0, 1.0, 2.0, 5.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.5341796875, -1.4816436767578125, -1.429107666015625, -1.3765716552734375, -1.32403564453125, -1.2714996337890625, -1.218963623046875, -1.1664276123046875, -1.1138916015625, -1.0613555908203125, -1.008819580078125, -0.9562835693359375, -0.90374755859375, -0.8512115478515625, -0.798675537109375, -0.7461395263671875, -0.693603515625, -0.6410675048828125, -0.588531494140625, -0.5359954833984375, -0.48345947265625, -0.4309234619140625, -0.378387451171875, -0.3258514404296875, -0.2733154296875, -0.2207794189453125, -0.168243408203125, -0.1157073974609375, -0.06317138671875, -0.0106353759765625, 0.041900634765625, 0.0944366455078125, 0.14697265625, 0.1995086669921875, 0.252044677734375, 0.3045806884765625, 0.35711669921875, 0.4096527099609375, 0.462188720703125, 0.5147247314453125, 0.5672607421875, 0.6197967529296875, 0.672332763671875, 0.7248687744140625, 0.77740478515625, 0.8299407958984375, 0.882476806640625, 0.9350128173828125, 0.987548828125, 1.0400848388671875, 1.092620849609375, 1.1451568603515625, 1.19769287109375, 1.2502288818359375, 1.302764892578125, 1.3553009033203125, 1.4078369140625, 1.4603729248046875, 1.512908935546875, 1.5654449462890625, 1.61798095703125, 1.6705169677734375, 1.723052978515625, 1.7755889892578125, 1.828125]}, "gradients/encoder.encoder.layers.23.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 2.0, 1.0, 1.0, 0.0, 2.0, 1.0, 4.0, 0.0, 5.0, 4.0, 8.0, 6.0, 12.0, 18.0, 20.0, 31.0, 50.0, 59.0, 139.0, 338.0, 1080.0, 11562.0, 1013926.0, 19164.0, 1366.0, 373.0, 152.0, 81.0, 43.0, 31.0, 19.0, 21.0, 13.0, 12.0, 7.0, 6.0, 3.0, 1.0, 2.0, 4.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0009379386901855469, -0.0008981302380561829, -0.0008583217859268188, -0.0008185133337974548, -0.0007787048816680908, -0.0007388964295387268, -0.0006990879774093628, -0.0006592795252799988, -0.0006194710731506348, -0.0005796626210212708, -0.0005398541688919067, -0.0005000457167625427, -0.0004602372646331787, -0.0004204288125038147, -0.0003806203603744507, -0.00034081190824508667, -0.00030100345611572266, -0.00026119500398635864, -0.00022138655185699463, -0.00018157809972763062, -0.0001417696475982666, -0.00010196119546890259, -6.215274333953857e-05, -2.234429121017456e-05, 1.7464160919189453e-05, 5.727261304855347e-05, 9.708106517791748e-05, 0.0001368895173072815, 0.0001766979694366455, 0.00021650642156600952, 0.00025631487369537354, 0.00029612332582473755, 0.00033593177795410156, 0.0003757402300834656, 0.0004155486822128296, 0.0004553571343421936, 0.0004951655864715576, 0.0005349740386009216, 0.0005747824907302856, 0.0006145909428596497, 0.0006543993949890137, 0.0006942078471183777, 0.0007340162992477417, 0.0007738247513771057, 0.0008136332035064697, 0.0008534416556358337, 0.0008932501077651978, 0.0009330585598945618, 0.0009728670120239258, 0.0010126754641532898, 0.0010524839162826538, 0.0010922923684120178, 0.0011321008205413818, 0.0011719092726707458, 0.0012117177248001099, 0.0012515261769294739, 0.0012913346290588379, 0.001331143081188202, 0.001370951533317566, 0.00141075998544693, 0.001450568437576294, 0.001490376889705658, 0.001530185341835022, 0.001569993793964386, 0.00160980224609375]}, "gradients/encoder.encoder.layers.23.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 3.0, 2.0, 2.0, 5.0, 0.0, 8.0, 2.0, 3.0, 6.0, 12.0, 12.0, 21.0, 14.0, 33.0, 12.0, 27.0, 18.0, 55.0, 54.0, 33.0, 68.0, 42.0, 77.0, 69.0, 38.0, 76.0, 43.0, 41.0, 39.0, 27.0, 48.0, 14.0, 21.0, 23.0, 8.0, 18.0, 7.0, 10.0, 10.0, 5.0, 3.0, 2.0, 4.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-3.5762786865234375e-06, -3.4812837839126587e-06, -3.38628888130188e-06, -3.291293978691101e-06, -3.1962990760803223e-06, -3.1013041734695435e-06, -3.0063092708587646e-06, -2.911314368247986e-06, -2.816319465637207e-06, -2.7213245630264282e-06, -2.6263296604156494e-06, -2.5313347578048706e-06, -2.436339855194092e-06, -2.341344952583313e-06, -2.246350049972534e-06, -2.1513551473617554e-06, -2.0563602447509766e-06, -1.9613653421401978e-06, -1.866370439529419e-06, -1.7713755369186401e-06, -1.6763806343078613e-06, -1.5813857316970825e-06, -1.4863908290863037e-06, -1.391395926475525e-06, -1.296401023864746e-06, -1.2014061212539673e-06, -1.1064112186431885e-06, -1.0114163160324097e-06, -9.164214134216309e-07, -8.21426510810852e-07, -7.264316082000732e-07, -6.314367055892944e-07, -5.364418029785156e-07, -4.414469003677368e-07, -3.46451997756958e-07, -2.514570951461792e-07, -1.564621925354004e-07, -6.146728992462158e-08, 3.3527612686157227e-08, 1.2852251529693604e-07, 2.2351741790771484e-07, 3.1851232051849365e-07, 4.1350722312927246e-07, 5.085021257400513e-07, 6.034970283508301e-07, 6.984919309616089e-07, 7.934868335723877e-07, 8.884817361831665e-07, 9.834766387939453e-07, 1.0784715414047241e-06, 1.173466444015503e-06, 1.2684613466262817e-06, 1.3634562492370605e-06, 1.4584511518478394e-06, 1.5534460544586182e-06, 1.648440957069397e-06, 1.7434358596801758e-06, 1.8384307622909546e-06, 1.9334256649017334e-06, 2.028420567512512e-06, 2.123415470123291e-06, 2.21841037273407e-06, 2.3134052753448486e-06, 2.4084001779556274e-06, 2.5033950805664062e-06]}, "gradients/encoder.encoder.layers.23.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 0.0, 2.0, 2.0, 2.0, 7.0, 9.0, 8.0, 13.0, 16.0, 19.0, 25.0, 39.0, 68.0, 103.0, 148.0, 219.0, 330.0, 531.0, 813.0, 1385.0, 2534.0, 5101.0, 11102.0, 29404.0, 146605.0, 749601.0, 64919.0, 18931.0, 8077.0, 3738.0, 1916.0, 1052.0, 644.0, 422.0, 249.0, 168.0, 101.0, 68.0, 54.0, 29.0, 28.0, 20.0, 15.0, 12.0, 10.0, 5.0, 6.0, 4.0, 4.0, 2.0, 4.0, 1.0, 1.0, 3.0, 1.0, 0.0, 1.0], "bins": [-0.00022864341735839844, -0.00022156164050102234, -0.00021447986364364624, -0.00020739808678627014, -0.00020031630992889404, -0.00019323453307151794, -0.00018615275621414185, -0.00017907097935676575, -0.00017198920249938965, -0.00016490742564201355, -0.00015782564878463745, -0.00015074387192726135, -0.00014366209506988525, -0.00013658031821250916, -0.00012949854135513306, -0.00012241676449775696, -0.00011533498764038086, -0.00010825321078300476, -0.00010117143392562866, -9.408965706825256e-05, -8.700788021087646e-05, -7.992610335350037e-05, -7.284432649612427e-05, -6.576254963874817e-05, -5.868077278137207e-05, -5.159899592399597e-05, -4.451721906661987e-05, -3.7435442209243774e-05, -3.0353665351867676e-05, -2.3271888494491577e-05, -1.619011163711548e-05, -9.10833477973938e-06, -2.0265579223632812e-06, 5.055218935012817e-06, 1.2136995792388916e-05, 1.9218772649765015e-05, 2.6300549507141113e-05, 3.338232636451721e-05, 4.046410322189331e-05, 4.754588007926941e-05, 5.462765693664551e-05, 6.17094337940216e-05, 6.87912106513977e-05, 7.58729875087738e-05, 8.29547643661499e-05, 9.0036541223526e-05, 9.71183180809021e-05, 0.0001042000949382782, 0.0001112818717956543, 0.0001183636486530304, 0.0001254454255104065, 0.0001325272023677826, 0.0001396089792251587, 0.0001466907560825348, 0.0001537725329399109, 0.000160854309797287, 0.00016793608665466309, 0.00017501786351203918, 0.00018209964036941528, 0.00018918141722679138, 0.00019626319408416748, 0.00020334497094154358, 0.00021042674779891968, 0.00021750852465629578, 0.00022459030151367188]}, "gradients/encoder.encoder.layers.23.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 2.0, 1.0, 0.0, 1.0, 0.0, 2.0, 1.0, 3.0, 1.0, 5.0, 3.0, 4.0, 4.0, 4.0, 4.0, 9.0, 11.0, 16.0, 16.0, 32.0, 22.0, 30.0, 30.0, 36.0, 38.0, 41.0, 62.0, 58.0, 72.0, 77.0, 66.0, 59.0, 56.0, 43.0, 49.0, 24.0, 26.0, 24.0, 21.0, 18.0, 15.0, 8.0, 4.0, 5.0, 3.0, 3.0, 2.0, 0.0, 3.0, 3.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00021922588348388672, -0.0002123173326253891, -0.00020540878176689148, -0.00019850023090839386, -0.00019159168004989624, -0.00018468312919139862, -0.000177774578332901, -0.00017086602747440338, -0.00016395747661590576, -0.00015704892575740814, -0.00015014037489891052, -0.0001432318240404129, -0.00013632327318191528, -0.00012941472232341766, -0.00012250617146492004, -0.00011559762060642242, -0.0001086890697479248, -0.00010178051888942719, -9.487196803092957e-05, -8.796341717243195e-05, -8.105486631393433e-05, -7.41463154554367e-05, -6.723776459693909e-05, -6.032921373844147e-05, -5.342066287994385e-05, -4.651211202144623e-05, -3.960356116294861e-05, -3.269501030445099e-05, -2.578645944595337e-05, -1.887790858745575e-05, -1.196935772895813e-05, -5.06080687046051e-06, 1.8477439880371094e-06, 8.756294846534729e-06, 1.566484570503235e-05, 2.2573396563529968e-05, 2.9481947422027588e-05, 3.639049828052521e-05, 4.329904913902283e-05, 5.020759999752045e-05, 5.7116150856018066e-05, 6.402470171451569e-05, 7.09332525730133e-05, 7.784180343151093e-05, 8.475035429000854e-05, 9.165890514850616e-05, 9.856745600700378e-05, 0.0001054760068655014, 0.00011238455772399902, 0.00011929310858249664, 0.00012620165944099426, 0.00013311021029949188, 0.0001400187611579895, 0.00014692731201648712, 0.00015383586287498474, 0.00016074441373348236, 0.00016765296459197998, 0.0001745615154504776, 0.00018147006630897522, 0.00018837861716747284, 0.00019528716802597046, 0.00020219571888446808, 0.0002091042697429657, 0.00021601282060146332, 0.00022292137145996094]}, "gradients/encoder.encoder.layers.23.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 1.0, 3.0, 3.0, 1.0, 7.0, 10.0, 17.0, 52.0, 105.0, 577.0, 138.0, 43.0, 20.0, 16.0, 7.0, 5.0, 8.0, 0.0, 1.0, 1.0, 1.0], "bins": [-41.539466857910156, -40.75001525878906, -39.9605598449707, -39.17110824584961, -38.38165283203125, -37.592201232910156, -36.8027458190918, -36.0132942199707, -35.223838806152344, -34.43438720703125, -33.64493179321289, -32.8554801940918, -32.06602478027344, -31.276573181152344, -30.487119674682617, -29.69766616821289, -28.908214569091797, -28.11876106262207, -27.329307556152344, -26.539854049682617, -25.75040054321289, -24.960948944091797, -24.17149543762207, -23.382041931152344, -22.592588424682617, -21.80313491821289, -21.013681411743164, -20.224227905273438, -19.434776306152344, -18.645322799682617, -17.85586929321289, -17.066415786743164, -16.27696418762207, -15.487510681152344, -14.698057174682617, -13.908604621887207, -13.11915111541748, -12.329697608947754, -11.540245056152344, -10.750791549682617, -9.96133804321289, -9.171884536743164, -8.382431030273438, -7.592978477478027, -6.803524971008301, -6.014071464538574, -5.224618434906006, -4.4351654052734375, -3.645711898803711, -2.8562586307525635, -2.066805362701416, -1.2773520946502686, -0.4878988265991211, 0.30155444145202637, 1.0910077095031738, 1.8804607391357422, 2.6699142456054688, 3.459367513656616, 4.248820781707764, 5.038273811340332, 5.827727317810059, 6.617180824279785, 7.4066338539123535, 8.196086883544922, 8.985540390014648]}, "gradients/encoder.encoder.layers.23.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 3.0, 1.0, 1.0, 1.0, 1.0, 2.0, 3.0, 6.0, 9.0, 11.0, 14.0, 23.0, 18.0, 50.0, 71.0, 143.0, 160.0, 151.0, 118.0, 79.0, 45.0, 25.0, 21.0, 11.0, 5.0, 10.0, 6.0, 7.0, 2.0, 2.0, 2.0, 2.0, 4.0, 1.0, 2.0, 2.0, 0.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-41.894203186035156, -40.661190032958984, -39.42817306518555, -38.195159912109375, -36.96214294433594, -35.729129791259766, -34.496116638183594, -33.263099670410156, -32.030086517333984, -30.79707145690918, -29.564056396484375, -28.331043243408203, -27.0980281829834, -25.865013122558594, -24.631999969482422, -23.398984909057617, -22.165969848632812, -20.932954788208008, -19.699939727783203, -18.46692657470703, -17.233911514282227, -16.000896453857422, -14.767882347106934, -13.534868240356445, -12.30185317993164, -11.068838119506836, -9.835824012756348, -8.60280990600586, -7.369794845581055, -6.136780261993408, -4.903765678405762, -3.6707515716552734, -2.4377403259277344, -1.204725742340088, 0.028288841247558594, 1.261303424835205, 2.4943180084228516, 3.727332592010498, 4.9603471755981445, 6.193361282348633, 7.4263763427734375, 8.659391403198242, 9.89240550994873, 11.125419616699219, 12.358434677124023, 13.591449737548828, 14.824463844299316, 16.057477951049805, 17.29049301147461, 18.523508071899414, 19.75652313232422, 20.98953628540039, 22.222551345825195, 23.45556640625, 24.688579559326172, 25.921594619750977, 27.15460968017578, 28.387624740600586, 29.62063980102539, 30.853652954101562, 32.086669921875, 33.31968307495117, 34.552696228027344, 35.78571319580078, 37.01872634887695]}, "gradients/encoder.encoder.layers.22.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 2.0, 2.0, 2.0, 2.0, 3.0, 1.0, 1.0, 3.0, 3.0, 5.0, 6.0, 9.0, 6.0, 11.0, 15.0, 15.0, 32.0, 58.0, 89.0, 187.0, 457.0, 1122.0, 2704.0, 7203.0, 31331.0, 4137613.0, 9662.0, 2504.0, 732.0, 243.0, 116.0, 56.0, 25.0, 20.0, 13.0, 12.0, 9.0, 5.0, 5.0, 3.0, 3.0, 2.0, 2.0, 2.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-41.84375, -40.54296875, -39.2421875, -37.94140625, -36.640625, -35.33984375, -34.0390625, -32.73828125, -31.4375, -30.13671875, -28.8359375, -27.53515625, -26.234375, -24.93359375, -23.6328125, -22.33203125, -21.03125, -19.73046875, -18.4296875, -17.12890625, -15.828125, -14.52734375, -13.2265625, -11.92578125, -10.625, -9.32421875, -8.0234375, -6.72265625, -5.421875, -4.12109375, -2.8203125, -1.51953125, -0.21875, 1.08203125, 2.3828125, 3.68359375, 4.984375, 6.28515625, 7.5859375, 8.88671875, 10.1875, 11.48828125, 12.7890625, 14.08984375, 15.390625, 16.69140625, 17.9921875, 19.29296875, 20.59375, 21.89453125, 23.1953125, 24.49609375, 25.796875, 27.09765625, 28.3984375, 29.69921875, 31.0, 32.30078125, 33.6015625, 34.90234375, 36.203125, 37.50390625, 38.8046875, 40.10546875, 41.40625]}, "gradients/encoder.encoder.layers.22.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 2.0, 2.0, 2.0, 2.0, 2.0, 1.0, 1.0, 2.0, 1.0, 2.0, 2.0, 4.0, 3.0, 3.0, 5.0, 3.0, 3.0, 3.0, 5.0, 5.0, 7.0, 11.0, 10.0, 27.0, 46.0, 757.0, 23.0, 16.0, 7.0, 9.0, 8.0, 3.0, 3.0, 4.0, 6.0, 5.0, 3.0, 4.0, 4.0, 3.0, 2.0, 2.0, 2.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.46533203125, -0.4508628845214844, -0.43639373779296875, -0.4219245910644531, -0.4074554443359375, -0.3929862976074219, -0.37851715087890625, -0.3640480041503906, -0.349578857421875, -0.3351097106933594, -0.32064056396484375, -0.3061714172363281, -0.2917022705078125, -0.2772331237792969, -0.26276397705078125, -0.24829483032226562, -0.23382568359375, -0.21935653686523438, -0.20488739013671875, -0.19041824340820312, -0.1759490966796875, -0.16147994995117188, -0.14701080322265625, -0.13254165649414062, -0.118072509765625, -0.10360336303710938, -0.08913421630859375, -0.07466506958007812, -0.0601959228515625, -0.045726776123046875, -0.03125762939453125, -0.016788482666015625, -0.0023193359375, 0.012149810791015625, 0.02661895751953125, 0.041088104248046875, 0.0555572509765625, 0.07002639770507812, 0.08449554443359375, 0.09896469116210938, 0.113433837890625, 0.12790298461914062, 0.14237213134765625, 0.15684127807617188, 0.1713104248046875, 0.18577957153320312, 0.20024871826171875, 0.21471786499023438, 0.22918701171875, 0.24365615844726562, 0.25812530517578125, 0.2725944519042969, 0.2870635986328125, 0.3015327453613281, 0.31600189208984375, 0.3304710388183594, 0.344940185546875, 0.3594093322753906, 0.37387847900390625, 0.3883476257324219, 0.4028167724609375, 0.4172859191894531, 0.43175506591796875, 0.4462242126464844, 0.460693359375]}, "gradients/encoder.encoder.layers.22.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 2.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 3.0, 2.0, 3.0, 7.0, 6.0, 7.0, 8.0, 13.0, 27.0, 29.0, 63.0, 80.0, 125.0, 180.0, 305.0, 505.0, 824.0, 1516.0, 2732.0, 5368.0, 11865.0, 34277.0, 248125.0, 3827406.0, 35891.0, 12659.0, 5571.0, 2899.0, 1559.0, 888.0, 514.0, 329.0, 163.0, 115.0, 78.0, 53.0, 27.0, 19.0, 10.0, 9.0, 6.0, 6.0, 7.0, 2.0, 5.0, 0.0, 2.0, 1.0, 1.0, 0.0, 2.0, 1.0, 1.0, 1.0, 0.0, 1.0], "bins": [-5.33203125, -5.16009521484375, -4.9881591796875, -4.81622314453125, -4.644287109375, -4.47235107421875, -4.3004150390625, -4.12847900390625, -3.95654296875, -3.78460693359375, -3.6126708984375, -3.44073486328125, -3.268798828125, -3.09686279296875, -2.9249267578125, -2.75299072265625, -2.5810546875, -2.40911865234375, -2.2371826171875, -2.06524658203125, -1.893310546875, -1.72137451171875, -1.5494384765625, -1.37750244140625, -1.20556640625, -1.03363037109375, -0.8616943359375, -0.68975830078125, -0.517822265625, -0.34588623046875, -0.1739501953125, -0.00201416015625, 0.169921875, 0.34185791015625, 0.5137939453125, 0.68572998046875, 0.857666015625, 1.02960205078125, 1.2015380859375, 1.37347412109375, 1.54541015625, 1.71734619140625, 1.8892822265625, 2.06121826171875, 2.233154296875, 2.40509033203125, 2.5770263671875, 2.74896240234375, 2.9208984375, 3.09283447265625, 3.2647705078125, 3.43670654296875, 3.608642578125, 3.78057861328125, 3.9525146484375, 4.12445068359375, 4.29638671875, 4.46832275390625, 4.6402587890625, 4.81219482421875, 4.984130859375, 5.15606689453125, 5.3280029296875, 5.49993896484375, 5.671875]}, "gradients/encoder.encoder.layers.22.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 1.0, 2.0, 0.0, 1.0, 1.0, 2.0, 0.0, 4.0, 2.0, 5.0, 4.0, 3.0, 5.0, 5.0, 5.0, 8.0, 5.0, 9.0, 15.0, 7.0, 16.0, 19.0, 21.0, 13.0, 24.0, 24.0, 20.0, 28.0, 3403.0, 153.0, 32.0, 28.0, 17.0, 28.0, 16.0, 24.0, 12.0, 14.0, 10.0, 17.0, 13.0, 16.0, 10.0, 12.0, 7.0, 4.0, 4.0, 2.0, 6.0, 3.0, 2.0, 3.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 2.0, 1.0], "bins": [-0.79345703125, -0.7694091796875, -0.745361328125, -0.7213134765625, -0.697265625, -0.6732177734375, -0.649169921875, -0.6251220703125, -0.60107421875, -0.5770263671875, -0.552978515625, -0.5289306640625, -0.5048828125, -0.4808349609375, -0.456787109375, -0.4327392578125, -0.40869140625, -0.3846435546875, -0.360595703125, -0.3365478515625, -0.3125, -0.2884521484375, -0.264404296875, -0.2403564453125, -0.21630859375, -0.1922607421875, -0.168212890625, -0.1441650390625, -0.1201171875, -0.0960693359375, -0.072021484375, -0.0479736328125, -0.02392578125, 0.0001220703125, 0.024169921875, 0.0482177734375, 0.072265625, 0.0963134765625, 0.120361328125, 0.1444091796875, 0.16845703125, 0.1925048828125, 0.216552734375, 0.2406005859375, 0.2646484375, 0.2886962890625, 0.312744140625, 0.3367919921875, 0.36083984375, 0.3848876953125, 0.408935546875, 0.4329833984375, 0.45703125, 0.4810791015625, 0.505126953125, 0.5291748046875, 0.55322265625, 0.5772705078125, 0.601318359375, 0.6253662109375, 0.6494140625, 0.6734619140625, 0.697509765625, 0.7215576171875, 0.74560546875]}, "gradients/encoder.encoder.layers.22.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 3.0, 31.0, 821.0, 116.0, 20.0, 11.0, 8.0, 2.0, 2.0, 0.0, 2.0], "bins": [-25.021419525146484, -24.571361541748047, -24.121305465698242, -23.671247482299805, -23.22119140625, -22.771133422851562, -22.321075439453125, -21.87101936340332, -21.420961380004883, -20.970903396606445, -20.52084732055664, -20.070789337158203, -19.6207332611084, -19.17067527770996, -18.720619201660156, -18.27056121826172, -17.82050323486328, -17.370445251464844, -16.92038917541504, -16.4703311920166, -16.020275115966797, -15.57021713256836, -15.120160102844238, -14.670103073120117, -14.220046997070312, -13.769989967346191, -13.31993293762207, -12.869874954223633, -12.419817924499512, -11.96976089477539, -11.51970386505127, -11.069646835327148, -10.619588851928711, -10.16953182220459, -9.719474792480469, -9.269416809082031, -8.81935977935791, -8.369302749633789, -7.919245719909668, -7.469188690185547, -7.019131660461426, -6.569074630737305, -6.119017124176025, -5.668960094451904, -5.218902587890625, -4.768845558166504, -4.318788528442383, -3.8687312602996826, -3.4186737537384033, -2.968616485595703, -2.518559455871582, -2.068502187728882, -1.6184449195861816, -1.1683876514434814, -0.7183306217193604, -0.26827335357666016, 0.18178391456604004, 0.6318411231040955, 1.0818983316421509, 1.5319554805755615, 1.9820127487182617, 2.432070016860962, 2.882127046585083, 3.332184314727783, 3.7822415828704834]}, "gradients/encoder.encoder.layers.22.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 0.0, 0.0, 3.0, 1.0, 2.0, 5.0, 2.0, 6.0, 3.0, 5.0, 5.0, 6.0, 17.0, 21.0, 19.0, 49.0, 37.0, 81.0, 88.0, 113.0, 117.0, 104.0, 85.0, 63.0, 65.0, 36.0, 23.0, 16.0, 8.0, 4.0, 5.0, 4.0, 3.0, 0.0, 3.0, 5.0, 2.0, 2.0, 1.0, 2.0, 1.0, 2.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.757713317871094, -4.5974273681640625, -4.437141418457031, -4.27685546875, -4.116569519042969, -3.9562835693359375, -3.795997381210327, -3.635711431503296, -3.4754254817962646, -3.3151395320892334, -3.154853582382202, -2.994567632675171, -2.8342814445495605, -2.6739954948425293, -2.513709545135498, -2.353423595428467, -2.1931376457214355, -2.0328516960144043, -1.872565746307373, -1.7122796773910522, -1.551993727684021, -1.3917077779769897, -1.231421709060669, -1.0711357593536377, -0.9108498096466064, -0.7505638599395752, -0.5902778506278992, -0.42999187111854553, -0.2697058916091919, -0.10941994190216064, 0.05086606740951538, 0.2111520767211914, 0.37143802642822266, 0.5317239761352539, 0.6920099854469299, 0.852295994758606, 1.0125819444656372, 1.1728678941726685, 1.3331539630889893, 1.4934399127960205, 1.6537258625030518, 1.814011812210083, 1.9742977619171143, 2.1345837116241455, 2.294869899749756, 2.455155849456787, 2.6154417991638184, 2.7757277488708496, 2.936013698577881, 3.096299648284912, 3.2565855979919434, 3.4168715476989746, 3.577157497406006, 3.737443447113037, 3.8977296352386475, 4.058015823364258, 4.218301773071289, 4.37858772277832, 4.538873672485352, 4.699159622192383, 4.859445571899414, 5.019731521606445, 5.180017471313477, 5.340303421020508, 5.500589370727539]}, "gradients/encoder.encoder.layers.22.attention.out_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 5.0, 1.0, 3.0, 3.0, 8.0, 4.0, 10.0, 12.0, 19.0, 16.0, 22.0, 38.0, 43.0, 75.0, 95.0, 142.0, 226.0, 303.0, 455.0, 618.0, 935.0, 1387.0, 2183.0, 3341.0, 5368.0, 9409.0, 20682.0, 794879.0, 169165.0, 17323.0, 8104.0, 4800.0, 2930.0, 2033.0, 1298.0, 892.0, 554.0, 375.0, 238.0, 188.0, 115.0, 91.0, 50.0, 49.0, 28.0, 23.0, 13.0, 8.0, 4.0, 3.0, 2.0, 0.0, 4.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-2.283203125, -2.209320068359375, -2.13543701171875, -2.061553955078125, -1.9876708984375, -1.913787841796875, -1.83990478515625, -1.766021728515625, -1.692138671875, -1.618255615234375, -1.54437255859375, -1.470489501953125, -1.3966064453125, -1.322723388671875, -1.24884033203125, -1.174957275390625, -1.10107421875, -1.027191162109375, -0.95330810546875, -0.879425048828125, -0.8055419921875, -0.731658935546875, -0.65777587890625, -0.583892822265625, -0.510009765625, -0.436126708984375, -0.36224365234375, -0.288360595703125, -0.2144775390625, -0.140594482421875, -0.06671142578125, 0.007171630859375, 0.0810546875, 0.154937744140625, 0.22882080078125, 0.302703857421875, 0.3765869140625, 0.450469970703125, 0.52435302734375, 0.598236083984375, 0.672119140625, 0.746002197265625, 0.81988525390625, 0.893768310546875, 0.9676513671875, 1.041534423828125, 1.11541748046875, 1.189300537109375, 1.26318359375, 1.337066650390625, 1.41094970703125, 1.484832763671875, 1.5587158203125, 1.632598876953125, 1.70648193359375, 1.780364990234375, 1.854248046875, 1.928131103515625, 2.00201416015625, 2.075897216796875, 2.1497802734375, 2.223663330078125, 2.29754638671875, 2.371429443359375, 2.4453125]}, "gradients/encoder.encoder.layers.22.attention.out_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 3.0, 1.0, 2.0, 1.0, 1.0, 1.0, 4.0, 1.0, 2.0, 2.0, 1.0, 5.0, 1.0, 2.0, 1.0, 4.0, 4.0, 3.0, 2.0, 6.0, 5.0, 4.0, 5.0, 8.0, 10.0, 23.0, 26.0, 223.0, 535.0, 44.0, 20.0, 9.0, 5.0, 10.0, 1.0, 4.0, 8.0, 4.0, 1.0, 5.0, 5.0, 2.0, 3.0, 4.0, 3.0, 1.0, 4.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.369873046875, -0.35790252685546875, -0.3459320068359375, -0.33396148681640625, -0.321990966796875, -0.31002044677734375, -0.2980499267578125, -0.28607940673828125, -0.27410888671875, -0.26213836669921875, -0.2501678466796875, -0.23819732666015625, -0.226226806640625, -0.21425628662109375, -0.2022857666015625, -0.19031524658203125, -0.1783447265625, -0.16637420654296875, -0.1544036865234375, -0.14243316650390625, -0.130462646484375, -0.11849212646484375, -0.1065216064453125, -0.09455108642578125, -0.08258056640625, -0.07061004638671875, -0.0586395263671875, -0.04666900634765625, -0.034698486328125, -0.02272796630859375, -0.0107574462890625, 0.00121307373046875, 0.01318359375, 0.02515411376953125, 0.0371246337890625, 0.04909515380859375, 0.061065673828125, 0.07303619384765625, 0.0850067138671875, 0.09697723388671875, 0.10894775390625, 0.12091827392578125, 0.1328887939453125, 0.14485931396484375, 0.156829833984375, 0.16880035400390625, 0.1807708740234375, 0.19274139404296875, 0.2047119140625, 0.21668243408203125, 0.2286529541015625, 0.24062347412109375, 0.252593994140625, 0.26456451416015625, 0.2765350341796875, 0.28850555419921875, 0.30047607421875, 0.31244659423828125, 0.3244171142578125, 0.33638763427734375, 0.348358154296875, 0.36032867431640625, 0.3722991943359375, 0.38426971435546875, 0.396240234375]}, "gradients/encoder.encoder.layers.22.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 3.0, 2.0, 6.0, 2.0, 4.0, 8.0, 6.0, 17.0, 9.0, 21.0, 19.0, 19.0, 33.0, 38.0, 76.0, 95.0, 157.0, 254.0, 552.0, 1054.0, 2672.0, 9296.0, 56039.0, 872181.0, 87864.0, 12033.0, 3298.0, 1256.0, 626.0, 348.0, 195.0, 114.0, 81.0, 49.0, 37.0, 26.0, 19.0, 16.0, 6.0, 10.0, 3.0, 7.0, 4.0, 4.0, 2.0, 5.0, 2.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.857421875, -3.731292724609375, -3.60516357421875, -3.479034423828125, -3.3529052734375, -3.226776123046875, -3.10064697265625, -2.974517822265625, -2.848388671875, -2.722259521484375, -2.59613037109375, -2.470001220703125, -2.3438720703125, -2.217742919921875, -2.09161376953125, -1.965484619140625, -1.83935546875, -1.713226318359375, -1.58709716796875, -1.460968017578125, -1.3348388671875, -1.208709716796875, -1.08258056640625, -0.956451416015625, -0.830322265625, -0.704193115234375, -0.57806396484375, -0.451934814453125, -0.3258056640625, -0.199676513671875, -0.07354736328125, 0.052581787109375, 0.1787109375, 0.304840087890625, 0.43096923828125, 0.557098388671875, 0.6832275390625, 0.809356689453125, 0.93548583984375, 1.061614990234375, 1.187744140625, 1.313873291015625, 1.44000244140625, 1.566131591796875, 1.6922607421875, 1.818389892578125, 1.94451904296875, 2.070648193359375, 2.19677734375, 2.322906494140625, 2.44903564453125, 2.575164794921875, 2.7012939453125, 2.827423095703125, 2.95355224609375, 3.079681396484375, 3.205810546875, 3.331939697265625, 3.45806884765625, 3.584197998046875, 3.7103271484375, 3.836456298828125, 3.96258544921875, 4.088714599609375, 4.21484375]}, "gradients/encoder.encoder.layers.22.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 2.0, 5.0, 2.0, 4.0, 4.0, 7.0, 3.0, 10.0, 5.0, 13.0, 17.0, 17.0, 25.0, 27.0, 26.0, 24.0, 26.0, 48.0, 57.0, 40.0, 36.0, 58.0, 42.0, 42.0, 40.0, 43.0, 49.0, 34.0, 45.0, 36.0, 32.0, 25.0, 27.0, 24.0, 18.0, 15.0, 15.0, 17.0, 9.0, 16.0, 6.0, 8.0, 4.0, 2.0, 6.0, 2.0, 3.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-1.3564453125, -1.3158416748046875, -1.275238037109375, -1.2346343994140625, -1.19403076171875, -1.1534271240234375, -1.112823486328125, -1.0722198486328125, -1.0316162109375, -0.9910125732421875, -0.950408935546875, -0.9098052978515625, -0.86920166015625, -0.8285980224609375, -0.787994384765625, -0.7473907470703125, -0.706787109375, -0.6661834716796875, -0.625579833984375, -0.5849761962890625, -0.54437255859375, -0.5037689208984375, -0.463165283203125, -0.4225616455078125, -0.3819580078125, -0.3413543701171875, -0.300750732421875, -0.2601470947265625, -0.21954345703125, -0.1789398193359375, -0.138336181640625, -0.0977325439453125, -0.05712890625, -0.0165252685546875, 0.024078369140625, 0.0646820068359375, 0.10528564453125, 0.1458892822265625, 0.186492919921875, 0.2270965576171875, 0.2677001953125, 0.3083038330078125, 0.348907470703125, 0.3895111083984375, 0.43011474609375, 0.4707183837890625, 0.511322021484375, 0.5519256591796875, 0.592529296875, 0.6331329345703125, 0.673736572265625, 0.7143402099609375, 0.75494384765625, 0.7955474853515625, 0.836151123046875, 0.8767547607421875, 0.9173583984375, 0.9579620361328125, 0.998565673828125, 1.0391693115234375, 1.07977294921875, 1.1203765869140625, 1.160980224609375, 1.2015838623046875, 1.2421875]}, "gradients/encoder.encoder.layers.22.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 3.0, 5.0, 2.0, 4.0, 3.0, 7.0, 8.0, 10.0, 9.0, 11.0, 22.0, 15.0, 32.0, 41.0, 87.0, 144.0, 302.0, 983.0, 5555.0, 358961.0, 674570.0, 6057.0, 989.0, 299.0, 159.0, 90.0, 45.0, 35.0, 24.0, 16.0, 14.0, 12.0, 5.0, 11.0, 2.0, 4.0, 5.0, 6.0, 4.0, 3.0, 2.0, 2.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-0.005870819091796875, -0.0056874752044677734, -0.005504131317138672, -0.00532078742980957, -0.005137443542480469, -0.004954099655151367, -0.004770755767822266, -0.004587411880493164, -0.0044040679931640625, -0.004220724105834961, -0.004037380218505859, -0.003854036331176758, -0.0036706924438476562, -0.0034873485565185547, -0.003304004669189453, -0.0031206607818603516, -0.00293731689453125, -0.0027539730072021484, -0.002570629119873047, -0.0023872852325439453, -0.0022039413452148438, -0.002020597457885742, -0.0018372535705566406, -0.001653909683227539, -0.0014705657958984375, -0.001287221908569336, -0.0011038780212402344, -0.0009205341339111328, -0.0007371902465820312, -0.0005538463592529297, -0.0003705024719238281, -0.00018715858459472656, -3.814697265625e-06, 0.00017952919006347656, 0.0003628730773925781, 0.0005462169647216797, 0.0007295608520507812, 0.0009129047393798828, 0.0010962486267089844, 0.001279592514038086, 0.0014629364013671875, 0.001646280288696289, 0.0018296241760253906, 0.002012968063354492, 0.0021963119506835938, 0.0023796558380126953, 0.002562999725341797, 0.0027463436126708984, 0.0029296875, 0.0031130313873291016, 0.003296375274658203, 0.0034797191619873047, 0.0036630630493164062, 0.003846406936645508, 0.004029750823974609, 0.004213094711303711, 0.0043964385986328125, 0.004579782485961914, 0.004763126373291016, 0.004946470260620117, 0.005129814147949219, 0.00531315803527832, 0.005496501922607422, 0.0056798458099365234, 0.005863189697265625]}, "gradients/encoder.encoder.layers.22.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 2.0, 4.0, 4.0, 0.0, 3.0, 5.0, 5.0, 11.0, 10.0, 5.0, 19.0, 26.0, 31.0, 33.0, 14.0, 46.0, 49.0, 66.0, 57.0, 39.0, 65.0, 64.0, 72.0, 61.0, 26.0, 45.0, 47.0, 42.0, 36.0, 24.0, 26.0, 18.0, 21.0, 10.0, 2.0, 8.0, 7.0, 3.0, 1.0, 0.0, 4.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0], "bins": [-3.3974647521972656e-06, -3.2903626561164856e-06, -3.1832605600357056e-06, -3.0761584639549255e-06, -2.9690563678741455e-06, -2.8619542717933655e-06, -2.7548521757125854e-06, -2.6477500796318054e-06, -2.5406479835510254e-06, -2.4335458874702454e-06, -2.3264437913894653e-06, -2.2193416953086853e-06, -2.1122395992279053e-06, -2.0051375031471252e-06, -1.8980354070663452e-06, -1.7909333109855652e-06, -1.6838312149047852e-06, -1.5767291188240051e-06, -1.469627022743225e-06, -1.362524926662445e-06, -1.255422830581665e-06, -1.148320734500885e-06, -1.041218638420105e-06, -9.34116542339325e-07, -8.270144462585449e-07, -7.199123501777649e-07, -6.128102540969849e-07, -5.057081580162048e-07, -3.986060619354248e-07, -2.915039658546448e-07, -1.8440186977386475e-07, -7.729977369308472e-08, 2.9802322387695312e-08, 1.3690441846847534e-07, 2.4400651454925537e-07, 3.511086106300354e-07, 4.5821070671081543e-07, 5.653128027915955e-07, 6.724148988723755e-07, 7.795169949531555e-07, 8.866190910339355e-07, 9.937211871147156e-07, 1.1008232831954956e-06, 1.2079253792762756e-06, 1.3150274753570557e-06, 1.4221295714378357e-06, 1.5292316675186157e-06, 1.6363337635993958e-06, 1.7434358596801758e-06, 1.8505379557609558e-06, 1.957640051841736e-06, 2.064742147922516e-06, 2.171844244003296e-06, 2.278946340084076e-06, 2.386048436164856e-06, 2.493150532245636e-06, 2.600252628326416e-06, 2.707354724407196e-06, 2.814456820487976e-06, 2.921558916568756e-06, 3.028661012649536e-06, 3.135763108730316e-06, 3.242865204811096e-06, 3.3499673008918762e-06, 3.4570693969726562e-06]}, "gradients/encoder.encoder.layers.22.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 2.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 3.0, 2.0, 2.0, 3.0, 4.0, 6.0, 7.0, 17.0, 19.0, 26.0, 29.0, 50.0, 75.0, 116.0, 194.0, 403.0, 895.0, 2284.0, 9417.0, 137305.0, 872683.0, 18898.0, 3674.0, 1249.0, 532.0, 243.0, 145.0, 93.0, 47.0, 33.0, 32.0, 24.0, 15.0, 11.0, 4.0, 6.0, 4.0, 0.0, 1.0, 0.0, 1.0, 2.0, 2.0, 2.0, 0.0, 2.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.0037860870361328125, -0.003664642572402954, -0.0035431981086730957, -0.0034217536449432373, -0.003300309181213379, -0.0031788647174835205, -0.003057420253753662, -0.0029359757900238037, -0.0028145313262939453, -0.002693086862564087, -0.0025716423988342285, -0.00245019793510437, -0.0023287534713745117, -0.0022073090076446533, -0.002085864543914795, -0.0019644200801849365, -0.0018429756164550781, -0.0017215311527252197, -0.0016000866889953613, -0.001478642225265503, -0.0013571977615356445, -0.0012357532978057861, -0.0011143088340759277, -0.0009928643703460693, -0.0008714199066162109, -0.0007499754428863525, -0.0006285309791564941, -0.0005070865154266357, -0.00038564205169677734, -0.00026419758796691895, -0.00014275312423706055, -2.130866050720215e-05, 0.00010013580322265625, 0.00022158026695251465, 0.00034302473068237305, 0.00046446919441223145, 0.0005859136581420898, 0.0007073581218719482, 0.0008288025856018066, 0.000950247049331665, 0.0010716915130615234, 0.0011931359767913818, 0.0013145804405212402, 0.0014360249042510986, 0.001557469367980957, 0.0016789138317108154, 0.0018003582954406738, 0.0019218027591705322, 0.0020432472229003906, 0.002164691686630249, 0.0022861361503601074, 0.002407580614089966, 0.0025290250778198242, 0.0026504695415496826, 0.002771914005279541, 0.0028933584690093994, 0.003014802932739258, 0.003136247396469116, 0.0032576918601989746, 0.003379136323928833, 0.0035005807876586914, 0.00362202525138855, 0.003743469715118408, 0.0038649141788482666, 0.003986358642578125]}, "gradients/encoder.encoder.layers.22.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 2.0, 0.0, 2.0, 1.0, 3.0, 1.0, 0.0, 1.0, 0.0, 4.0, 5.0, 1.0, 6.0, 5.0, 7.0, 15.0, 11.0, 16.0, 23.0, 26.0, 38.0, 50.0, 58.0, 75.0, 80.0, 87.0, 80.0, 72.0, 61.0, 67.0, 49.0, 37.0, 24.0, 27.0, 20.0, 11.0, 9.0, 6.0, 7.0, 3.0, 5.0, 4.0, 2.0, 2.0, 2.0, 3.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 2.0, 1.0, 1.0, 1.0], "bins": [-0.00128173828125, -0.0012426823377609253, -0.0012036263942718506, -0.0011645704507827759, -0.0011255145072937012, -0.0010864585638046265, -0.0010474026203155518, -0.001008346676826477, -0.0009692907333374023, -0.0009302347898483276, -0.0008911788463592529, -0.0008521229028701782, -0.0008130669593811035, -0.0007740110158920288, -0.0007349550724029541, -0.0006958991289138794, -0.0006568431854248047, -0.00061778724193573, -0.0005787312984466553, -0.0005396753549575806, -0.0005006194114685059, -0.00046156346797943115, -0.00042250752449035645, -0.00038345158100128174, -0.00034439563751220703, -0.0003053396940231323, -0.0002662837505340576, -0.0002272278070449829, -0.0001881718635559082, -0.0001491159200668335, -0.00011005997657775879, -7.100403308868408e-05, -3.1948089599609375e-05, 7.107853889465332e-06, 4.616379737854004e-05, 8.521974086761475e-05, 0.00012427568435668945, 0.00016333162784576416, 0.00020238757133483887, 0.00024144351482391357, 0.0002804994583129883, 0.000319555401802063, 0.0003586113452911377, 0.0003976672887802124, 0.0004367232322692871, 0.0004757791757583618, 0.0005148351192474365, 0.0005538910627365112, 0.0005929470062255859, 0.0006320029497146606, 0.0006710588932037354, 0.0007101148366928101, 0.0007491707801818848, 0.0007882267236709595, 0.0008272826671600342, 0.0008663386106491089, 0.0009053945541381836, 0.0009444504976272583, 0.000983506441116333, 0.0010225623846054077, 0.0010616183280944824, 0.0011006742715835571, 0.0011397302150726318, 0.0011787861585617065, 0.0012178421020507812]}, "gradients/encoder.encoder.layers.22.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 3.0, 10.0, 9.0, 43.0, 296.0, 574.0, 49.0, 18.0, 7.0, 2.0, 0.0, 2.0, 0.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-16.732952117919922, -15.857190132141113, -14.981428146362305, -14.10566520690918, -13.229903221130371, -12.354141235351562, -11.478378295898438, -10.602616310119629, -9.72685432434082, -8.851092338562012, -7.975329875946045, -7.099567413330078, -6.2238054275512695, -5.348043441772461, -4.472280979156494, -3.5965185165405273, -2.7207565307617188, -1.844994306564331, -0.9692320823669434, -0.09346985816955566, 0.782292366027832, 1.6580543518066406, 2.5338168144226074, 3.409579277038574, 4.285341262817383, 5.161103248596191, 6.036865711212158, 6.912628173828125, 7.788390159606934, 8.664152145385742, 9.539915084838867, 10.415677070617676, 11.291439056396484, 12.167201042175293, 13.042963027954102, 13.918725967407227, 14.794487953186035, 15.670249938964844, 16.54601287841797, 17.421775817871094, 18.297536849975586, 19.17329978942871, 20.049060821533203, 20.924823760986328, 21.800586700439453, 22.676347732543945, 23.55211067199707, 24.427871704101562, 25.303634643554688, 26.179397583007812, 27.055158615112305, 27.93092155456543, 28.806682586669922, 29.682445526123047, 30.558208465576172, 31.433971405029297, 32.309730529785156, 33.18549346923828, 34.061256408691406, 34.937015533447266, 35.81277847290039, 36.688541412353516, 37.56430435180664, 38.440067291259766, 39.31583023071289]}, "gradients/encoder.encoder.layers.22.layer_norm.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 5.0, 2.0, 3.0, 2.0, 4.0, 5.0, 3.0, 6.0, 3.0, 6.0, 13.0, 10.0, 11.0, 29.0, 32.0, 41.0, 52.0, 51.0, 64.0, 81.0, 75.0, 76.0, 82.0, 60.0, 56.0, 50.0, 35.0, 43.0, 21.0, 18.0, 14.0, 14.0, 5.0, 7.0, 5.0, 1.0, 4.0, 3.0, 4.0, 3.0, 1.0, 3.0, 2.0, 2.0, 3.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 2.0], "bins": [-11.385229110717773, -11.008130073547363, -10.631031036376953, -10.253931045532227, -9.876832008361816, -9.499732971191406, -9.12263298034668, -8.74553394317627, -8.36843490600586, -7.991335868835449, -7.614236354827881, -7.2371368408203125, -6.860037803649902, -6.482938766479492, -6.105839252471924, -5.7287397384643555, -5.351640701293945, -4.974541664123535, -4.597442150115967, -4.220342636108398, -3.8432435989379883, -3.466144323348999, -3.0890450477600098, -2.7119457721710205, -2.3348464965820312, -1.957747220993042, -1.5806479454040527, -1.2035486698150635, -0.8264493942260742, -0.44935011863708496, -0.0722508430480957, 0.30484843254089355, 0.6819477081298828, 1.059046983718872, 1.4361462593078613, 1.8132455348968506, 2.19034481048584, 2.567444086074829, 2.9445433616638184, 3.3216426372528076, 3.698741912841797, 4.075840950012207, 4.452940464019775, 4.830039978027344, 5.207139015197754, 5.584238052368164, 5.961337566375732, 6.338437080383301, 6.715536117553711, 7.092635154724121, 7.4697346687316895, 7.846834182739258, 8.223933219909668, 8.601032257080078, 8.978132247924805, 9.355231285095215, 9.732330322265625, 10.109429359436035, 10.486528396606445, 10.863628387451172, 11.240727424621582, 11.617826461791992, 11.994926452636719, 12.372025489807129, 12.749124526977539]}, "gradients/encoder.encoder.layers.21.feed_forward.output_dense.weight": {"_type": "histogram", "values": [3.0, 0.0, 0.0, 1.0, 2.0, 2.0, 0.0, 3.0, 1.0, 2.0, 1.0, 1.0, 1.0, 3.0, 1.0, 3.0, 7.0, 3.0, 2.0, 1.0, 3.0, 6.0, 17.0, 61.0, 167.0, 349.0, 813.0, 2114.0, 5119.0, 13807.0, 44385.0, 4076768.0, 36999.0, 9549.0, 2750.0, 899.0, 267.0, 85.0, 36.0, 26.0, 11.0, 2.0, 5.0, 1.0, 4.0, 6.0, 2.0, 4.0, 3.0, 4.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-13.3203125, -12.896728515625, -12.47314453125, -12.049560546875, -11.6259765625, -11.202392578125, -10.77880859375, -10.355224609375, -9.931640625, -9.508056640625, -9.08447265625, -8.660888671875, -8.2373046875, -7.813720703125, -7.39013671875, -6.966552734375, -6.54296875, -6.119384765625, -5.69580078125, -5.272216796875, -4.8486328125, -4.425048828125, -4.00146484375, -3.577880859375, -3.154296875, -2.730712890625, -2.30712890625, -1.883544921875, -1.4599609375, -1.036376953125, -0.61279296875, -0.189208984375, 0.234375, 0.657958984375, 1.08154296875, 1.505126953125, 1.9287109375, 2.352294921875, 2.77587890625, 3.199462890625, 3.623046875, 4.046630859375, 4.47021484375, 4.893798828125, 5.3173828125, 5.740966796875, 6.16455078125, 6.588134765625, 7.01171875, 7.435302734375, 7.85888671875, 8.282470703125, 8.7060546875, 9.129638671875, 9.55322265625, 9.976806640625, 10.400390625, 10.823974609375, 11.24755859375, 11.671142578125, 12.0947265625, 12.518310546875, 12.94189453125, 13.365478515625, 13.7890625]}, "gradients/encoder.encoder.layers.21.feed_forward.output_dense.bias": {"_type": "histogram", "values": [3.0, 0.0, 0.0, 1.0, 2.0, 2.0, 0.0, 3.0, 1.0, 2.0, 1.0, 1.0, 1.0, 3.0, 1.0, 3.0, 7.0, 3.0, 2.0, 1.0, 3.0, 6.0, 4.0, 3.0, 8.0, 3.0, 8.0, 5.0, 19.0, 25.0, 85.0, 453.0, 252.0, 27.0, 10.0, 10.0, 8.0, 8.0, 2.0, 7.0, 6.0, 1.0, 5.0, 1.0, 4.0, 6.0, 2.0, 4.0, 3.0, 5.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.33056640625, -0.3200531005859375, -0.309539794921875, -0.2990264892578125, -0.28851318359375, -0.2779998779296875, -0.267486572265625, -0.2569732666015625, -0.2464599609375, -0.2359466552734375, -0.225433349609375, -0.2149200439453125, -0.20440673828125, -0.1938934326171875, -0.183380126953125, -0.1728668212890625, -0.162353515625, -0.1518402099609375, -0.141326904296875, -0.1308135986328125, -0.12030029296875, -0.1097869873046875, -0.099273681640625, -0.0887603759765625, -0.0782470703125, -0.0677337646484375, -0.057220458984375, -0.0467071533203125, -0.03619384765625, -0.0256805419921875, -0.015167236328125, -0.0046539306640625, 0.005859375, 0.0163726806640625, 0.026885986328125, 0.0373992919921875, 0.04791259765625, 0.0584259033203125, 0.068939208984375, 0.0794525146484375, 0.0899658203125, 0.1004791259765625, 0.110992431640625, 0.1215057373046875, 0.13201904296875, 0.1425323486328125, 0.153045654296875, 0.1635589599609375, 0.174072265625, 0.1845855712890625, 0.195098876953125, 0.2056121826171875, 0.21612548828125, 0.2266387939453125, 0.237152099609375, 0.2476654052734375, 0.2581787109375, 0.2686920166015625, 0.279205322265625, 0.2897186279296875, 0.30023193359375, 0.3107452392578125, 0.321258544921875, 0.3317718505859375, 0.34228515625]}, "gradients/encoder.encoder.layers.21.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0, 1.0, 3.0, 3.0, 1.0, 4.0, 5.0, 5.0, 9.0, 14.0, 17.0, 17.0, 33.0, 29.0, 38.0, 43.0, 54.0, 102.0, 146.0, 224.0, 458.0, 974.0, 2700.0, 11147.0, 100539.0, 4013316.0, 52716.0, 7710.0, 2152.0, 806.0, 370.0, 188.0, 128.0, 85.0, 62.0, 46.0, 27.0, 35.0, 22.0, 7.0, 13.0, 6.0, 11.0, 5.0, 7.0, 3.0, 4.0, 2.0, 1.0, 2.0, 2.0, 1.0, 1.0, 3.0, 0.0, 0.0, 0.0, 1.0], "bins": [-7.2578125, -7.02655029296875, -6.7952880859375, -6.56402587890625, -6.332763671875, -6.10150146484375, -5.8702392578125, -5.63897705078125, -5.40771484375, -5.17645263671875, -4.9451904296875, -4.71392822265625, -4.482666015625, -4.25140380859375, -4.0201416015625, -3.78887939453125, -3.5576171875, -3.32635498046875, -3.0950927734375, -2.86383056640625, -2.632568359375, -2.40130615234375, -2.1700439453125, -1.93878173828125, -1.70751953125, -1.47625732421875, -1.2449951171875, -1.01373291015625, -0.782470703125, -0.55120849609375, -0.3199462890625, -0.08868408203125, 0.142578125, 0.37384033203125, 0.6051025390625, 0.83636474609375, 1.067626953125, 1.29888916015625, 1.5301513671875, 1.76141357421875, 1.99267578125, 2.22393798828125, 2.4552001953125, 2.68646240234375, 2.917724609375, 3.14898681640625, 3.3802490234375, 3.61151123046875, 3.8427734375, 4.07403564453125, 4.3052978515625, 4.53656005859375, 4.767822265625, 4.99908447265625, 5.2303466796875, 5.46160888671875, 5.69287109375, 5.92413330078125, 6.1553955078125, 6.38665771484375, 6.617919921875, 6.84918212890625, 7.0804443359375, 7.31170654296875, 7.54296875]}, "gradients/encoder.encoder.layers.21.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 3.0, 2.0, 8.0, 2.0, 4.0, 1.0, 10.0, 5.0, 10.0, 23.0, 14.0, 18.0, 29.0, 30.0, 44.0, 43.0, 63.0, 68.0, 76.0, 88.0, 184.0, 2550.0, 216.0, 93.0, 83.0, 89.0, 54.0, 56.0, 31.0, 53.0, 18.0, 20.0, 15.0, 16.0, 19.0, 7.0, 9.0, 10.0, 7.0, 4.0, 3.0, 1.0, 1.0, 3.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0], "bins": [-0.95556640625, -0.9262847900390625, -0.897003173828125, -0.8677215576171875, -0.83843994140625, -0.8091583251953125, -0.779876708984375, -0.7505950927734375, -0.7213134765625, -0.6920318603515625, -0.662750244140625, -0.6334686279296875, -0.60418701171875, -0.5749053955078125, -0.545623779296875, -0.5163421630859375, -0.487060546875, -0.4577789306640625, -0.428497314453125, -0.3992156982421875, -0.36993408203125, -0.3406524658203125, -0.311370849609375, -0.2820892333984375, -0.2528076171875, -0.2235260009765625, -0.194244384765625, -0.1649627685546875, -0.13568115234375, -0.1063995361328125, -0.077117919921875, -0.0478363037109375, -0.0185546875, 0.0107269287109375, 0.040008544921875, 0.0692901611328125, 0.09857177734375, 0.1278533935546875, 0.157135009765625, 0.1864166259765625, 0.2156982421875, 0.2449798583984375, 0.274261474609375, 0.3035430908203125, 0.33282470703125, 0.3621063232421875, 0.391387939453125, 0.4206695556640625, 0.449951171875, 0.4792327880859375, 0.508514404296875, 0.5377960205078125, 0.56707763671875, 0.5963592529296875, 0.625640869140625, 0.6549224853515625, 0.6842041015625, 0.7134857177734375, 0.742767333984375, 0.7720489501953125, 0.80133056640625, 0.8306121826171875, 0.859893798828125, 0.8891754150390625, 0.91845703125]}, "gradients/encoder.encoder.layers.21.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 1.0, 1.0, 1.0, 5.0, 8.0, 8.0, 15.0, 20.0, 55.0, 117.0, 417.0, 206.0, 63.0, 33.0, 29.0, 10.0, 5.0, 8.0, 0.0, 3.0, 2.0, 1.0, 0.0, 0.0, 1.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.40841007232666, -2.3026576042175293, -2.1969053745269775, -2.091153144836426, -1.985400676727295, -1.8796483278274536, -1.7738959789276123, -1.668143630027771, -1.5623912811279297, -1.4566389322280884, -1.350886583328247, -1.2451342344284058, -1.1393818855285645, -1.0336295366287231, -0.9278771877288818, -0.8221248388290405, -0.7163724899291992, -0.6106201410293579, -0.5048677921295166, -0.3991154432296753, -0.293363094329834, -0.18761074542999268, -0.08185839653015137, 0.02389395236968994, 0.12964630126953125, 0.23539865016937256, 0.34115099906921387, 0.4469033479690552, 0.5526556968688965, 0.6584080457687378, 0.7641603946685791, 0.8699127435684204, 0.9756650924682617, 1.081417441368103, 1.1871697902679443, 1.2929221391677856, 1.398674488067627, 1.5044268369674683, 1.6101791858673096, 1.7159315347671509, 1.8216838836669922, 1.9274362325668335, 2.033188581466675, 2.1389408111572266, 2.2446932792663574, 2.3504457473754883, 2.45619797706604, 2.561950206756592, 2.6677026748657227, 2.7734551429748535, 2.8792073726654053, 2.984959602355957, 3.090712070465088, 3.1964645385742188, 3.3022167682647705, 3.4079689979553223, 3.513721466064453, 3.619473934173584, 3.7252261638641357, 3.8309783935546875, 3.9367308616638184, 4.042483329772949, 4.148235321044922, 4.253987789154053, 4.359740257263184]}, "gradients/encoder.encoder.layers.21.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 1.0, 1.0, 1.0, 2.0, 2.0, 7.0, 3.0, 4.0, 2.0, 4.0, 7.0, 3.0, 13.0, 13.0, 20.0, 23.0, 51.0, 57.0, 65.0, 82.0, 93.0, 92.0, 85.0, 79.0, 55.0, 56.0, 54.0, 44.0, 21.0, 14.0, 6.0, 9.0, 12.0, 5.0, 4.0, 2.0, 1.0, 0.0, 1.0, 3.0, 3.0, 3.0, 2.0, 2.0, 2.0, 1.0, 3.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 2.0], "bins": [-5.898839950561523, -5.712709426879883, -5.526578903198242, -5.340448379516602, -5.154317855834961, -4.96818733215332, -4.78205680847168, -4.595925807952881, -4.40979528427124, -4.2236647605896, -4.037534236907959, -3.8514037132263184, -3.6652729511260986, -3.479142427444458, -3.2930119037628174, -3.1068811416625977, -2.920750856399536, -2.7346203327178955, -2.548489809036255, -2.362359046936035, -2.1762285232543945, -1.990097999572754, -1.8039674758911133, -1.617836833000183, -1.4317063093185425, -1.2455757856369019, -1.0594451427459717, -0.873314619064331, -0.6871840357780457, -0.5010534524917603, -0.31492292881011963, -0.12879228591918945, 0.05733823776245117, 0.24346880614757538, 0.4295993745326996, 0.6157299280166626, 0.801860511302948, 0.9879910945892334, 1.174121618270874, 1.3602522611618042, 1.5463827848434448, 1.7325133085250854, 1.9186439514160156, 2.1047744750976562, 2.290904998779297, 2.4770355224609375, 2.663166046142578, 2.849296808242798, 3.0354273319244385, 3.221557855606079, 3.4076883792877197, 3.5938191413879395, 3.77994966506958, 3.9660801887512207, 4.152210712432861, 4.338341236114502, 4.524471759796143, 4.710602283477783, 4.896732807159424, 5.0828633308410645, 5.268993854522705, 5.455124855041504, 5.6412553787231445, 5.827385902404785, 6.013516426086426]}, "gradients/encoder.encoder.layers.21.attention.out_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 1.0, 1.0, 2.0, 1.0, 4.0, 4.0, 7.0, 4.0, 14.0, 17.0, 30.0, 26.0, 36.0, 70.0, 68.0, 111.0, 155.0, 219.0, 318.0, 467.0, 693.0, 1040.0, 1505.0, 2461.0, 4062.0, 7412.0, 16520.0, 77344.0, 845103.0, 58661.0, 14926.0, 6866.0, 3798.0, 2265.0, 1424.0, 946.0, 602.0, 423.0, 279.0, 211.0, 129.0, 102.0, 56.0, 63.0, 35.0, 32.0, 19.0, 10.0, 5.0, 10.0, 4.0, 5.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-2.005859375, -1.942047119140625, -1.87823486328125, -1.814422607421875, -1.7506103515625, -1.686798095703125, -1.62298583984375, -1.559173583984375, -1.495361328125, -1.431549072265625, -1.36773681640625, -1.303924560546875, -1.2401123046875, -1.176300048828125, -1.11248779296875, -1.048675537109375, -0.98486328125, -0.921051025390625, -0.85723876953125, -0.793426513671875, -0.7296142578125, -0.665802001953125, -0.60198974609375, -0.538177490234375, -0.474365234375, -0.410552978515625, -0.34674072265625, -0.282928466796875, -0.2191162109375, -0.155303955078125, -0.09149169921875, -0.027679443359375, 0.0361328125, 0.099945068359375, 0.16375732421875, 0.227569580078125, 0.2913818359375, 0.355194091796875, 0.41900634765625, 0.482818603515625, 0.546630859375, 0.610443115234375, 0.67425537109375, 0.738067626953125, 0.8018798828125, 0.865692138671875, 0.92950439453125, 0.993316650390625, 1.05712890625, 1.120941162109375, 1.18475341796875, 1.248565673828125, 1.3123779296875, 1.376190185546875, 1.44000244140625, 1.503814697265625, 1.567626953125, 1.631439208984375, 1.69525146484375, 1.759063720703125, 1.8228759765625, 1.886688232421875, 1.95050048828125, 2.014312744140625, 2.078125]}, "gradients/encoder.encoder.layers.21.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 2.0, 1.0, 0.0, 2.0, 1.0, 2.0, 2.0, 1.0, 3.0, 2.0, 4.0, 5.0, 3.0, 6.0, 5.0, 3.0, 5.0, 4.0, 9.0, 6.0, 11.0, 22.0, 47.0, 87.0, 217.0, 228.0, 158.0, 75.0, 29.0, 11.0, 6.0, 11.0, 9.0, 5.0, 4.0, 8.0, 4.0, 3.0, 2.0, 1.0, 3.0, 1.0, 2.0, 3.0, 0.0, 1.0, 3.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.294189453125, -0.2864513397216797, -0.2787132263183594, -0.27097511291503906, -0.26323699951171875, -0.25549888610839844, -0.24776077270507812, -0.2400226593017578, -0.2322845458984375, -0.2245464324951172, -0.21680831909179688, -0.20907020568847656, -0.20133209228515625, -0.19359397888183594, -0.18585586547851562, -0.1781177520751953, -0.170379638671875, -0.1626415252685547, -0.15490341186523438, -0.14716529846191406, -0.13942718505859375, -0.13168907165527344, -0.12395095825195312, -0.11621284484863281, -0.1084747314453125, -0.10073661804199219, -0.09299850463867188, -0.08526039123535156, -0.07752227783203125, -0.06978416442871094, -0.062046051025390625, -0.05430793762207031, -0.04656982421875, -0.03883171081542969, -0.031093597412109375, -0.023355484008789062, -0.01561737060546875, -0.007879257202148438, -0.000141143798828125, 0.0075969696044921875, 0.0153350830078125, 0.023073196411132812, 0.030811309814453125, 0.03854942321777344, 0.04628753662109375, 0.05402565002441406, 0.061763763427734375, 0.06950187683105469, 0.077239990234375, 0.08497810363769531, 0.09271621704101562, 0.10045433044433594, 0.10819244384765625, 0.11593055725097656, 0.12366867065429688, 0.1314067840576172, 0.1391448974609375, 0.1468830108642578, 0.15462112426757812, 0.16235923767089844, 0.17009735107421875, 0.17783546447753906, 0.18557357788085938, 0.1933116912841797, 0.2010498046875]}, "gradients/encoder.encoder.layers.21.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 5.0, 3.0, 3.0, 5.0, 4.0, 4.0, 9.0, 8.0, 13.0, 11.0, 9.0, 22.0, 24.0, 16.0, 40.0, 58.0, 69.0, 124.0, 163.0, 293.0, 463.0, 770.0, 1387.0, 4027.0, 45412.0, 948774.0, 39726.0, 3768.0, 1326.0, 722.0, 475.0, 261.0, 165.0, 109.0, 70.0, 46.0, 35.0, 35.0, 18.0, 12.0, 13.0, 12.0, 11.0, 4.0, 11.0, 10.0, 6.0, 6.0, 5.0, 4.0, 0.0, 2.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.9140625, -2.821502685546875, -2.72894287109375, -2.636383056640625, -2.5438232421875, -2.451263427734375, -2.35870361328125, -2.266143798828125, -2.173583984375, -2.081024169921875, -1.98846435546875, -1.895904541015625, -1.8033447265625, -1.710784912109375, -1.61822509765625, -1.525665283203125, -1.43310546875, -1.340545654296875, -1.24798583984375, -1.155426025390625, -1.0628662109375, -0.970306396484375, -0.87774658203125, -0.785186767578125, -0.692626953125, -0.600067138671875, -0.50750732421875, -0.414947509765625, -0.3223876953125, -0.229827880859375, -0.13726806640625, -0.044708251953125, 0.0478515625, 0.140411376953125, 0.23297119140625, 0.325531005859375, 0.4180908203125, 0.510650634765625, 0.60321044921875, 0.695770263671875, 0.788330078125, 0.880889892578125, 0.97344970703125, 1.066009521484375, 1.1585693359375, 1.251129150390625, 1.34368896484375, 1.436248779296875, 1.52880859375, 1.621368408203125, 1.71392822265625, 1.806488037109375, 1.8990478515625, 1.991607666015625, 2.08416748046875, 2.176727294921875, 2.269287109375, 2.361846923828125, 2.45440673828125, 2.546966552734375, 2.6395263671875, 2.732086181640625, 2.82464599609375, 2.917205810546875, 3.009765625]}, "gradients/encoder.encoder.layers.21.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 2.0, 0.0, 4.0, 5.0, 6.0, 6.0, 10.0, 11.0, 4.0, 11.0, 11.0, 13.0, 9.0, 17.0, 24.0, 20.0, 24.0, 26.0, 29.0, 31.0, 22.0, 43.0, 33.0, 39.0, 45.0, 47.0, 44.0, 46.0, 30.0, 38.0, 45.0, 41.0, 36.0, 14.0, 35.0, 22.0, 27.0, 24.0, 9.0, 20.0, 20.0, 9.0, 9.0, 14.0, 8.0, 9.0, 4.0, 4.0, 5.0, 3.0, 3.0, 5.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0], "bins": [-0.69921875, -0.6777191162109375, -0.656219482421875, -0.6347198486328125, -0.61322021484375, -0.5917205810546875, -0.570220947265625, -0.5487213134765625, -0.5272216796875, -0.5057220458984375, -0.484222412109375, -0.4627227783203125, -0.44122314453125, -0.4197235107421875, -0.398223876953125, -0.3767242431640625, -0.355224609375, -0.3337249755859375, -0.312225341796875, -0.2907257080078125, -0.26922607421875, -0.2477264404296875, -0.226226806640625, -0.2047271728515625, -0.1832275390625, -0.1617279052734375, -0.140228271484375, -0.1187286376953125, -0.09722900390625, -0.0757293701171875, -0.054229736328125, -0.0327301025390625, -0.01123046875, 0.0102691650390625, 0.031768798828125, 0.0532684326171875, 0.07476806640625, 0.0962677001953125, 0.117767333984375, 0.1392669677734375, 0.1607666015625, 0.1822662353515625, 0.203765869140625, 0.2252655029296875, 0.24676513671875, 0.2682647705078125, 0.289764404296875, 0.3112640380859375, 0.332763671875, 0.3542633056640625, 0.375762939453125, 0.3972625732421875, 0.41876220703125, 0.4402618408203125, 0.461761474609375, 0.4832611083984375, 0.5047607421875, 0.5262603759765625, 0.547760009765625, 0.5692596435546875, 0.59075927734375, 0.6122589111328125, 0.633758544921875, 0.6552581787109375, 0.6767578125]}, "gradients/encoder.encoder.layers.21.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 0.0, 4.0, 2.0, 1.0, 3.0, 2.0, 2.0, 5.0, 6.0, 9.0, 17.0, 16.0, 20.0, 21.0, 22.0, 34.0, 34.0, 41.0, 75.0, 112.0, 169.0, 247.0, 436.0, 906.0, 4009.0, 845202.0, 191858.0, 3272.0, 846.0, 403.0, 211.0, 175.0, 95.0, 78.0, 47.0, 51.0, 29.0, 36.0, 17.0, 15.0, 8.0, 8.0, 7.0, 5.0, 6.0, 2.0, 0.0, 4.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.053375244140625, -0.05159282684326172, -0.04981040954589844, -0.048027992248535156, -0.046245574951171875, -0.044463157653808594, -0.04268074035644531, -0.04089832305908203, -0.03911590576171875, -0.03733348846435547, -0.03555107116699219, -0.033768653869628906, -0.031986236572265625, -0.030203819274902344, -0.028421401977539062, -0.02663898468017578, -0.0248565673828125, -0.02307415008544922, -0.021291732788085938, -0.019509315490722656, -0.017726898193359375, -0.015944480895996094, -0.014162063598632812, -0.012379646301269531, -0.01059722900390625, -0.008814811706542969, -0.0070323944091796875, -0.005249977111816406, -0.003467559814453125, -0.0016851425170898438, 9.72747802734375e-05, 0.0018796920776367188, 0.003662109375, 0.005444526672363281, 0.0072269439697265625, 0.009009361267089844, 0.010791778564453125, 0.012574195861816406, 0.014356613159179688, 0.01613903045654297, 0.01792144775390625, 0.01970386505126953, 0.021486282348632812, 0.023268699645996094, 0.025051116943359375, 0.026833534240722656, 0.028615951538085938, 0.03039836883544922, 0.0321807861328125, 0.03396320343017578, 0.03574562072753906, 0.037528038024902344, 0.039310455322265625, 0.041092872619628906, 0.04287528991699219, 0.04465770721435547, 0.04644012451171875, 0.04822254180908203, 0.05000495910644531, 0.051787376403808594, 0.053569793701171875, 0.055352210998535156, 0.05713462829589844, 0.05891704559326172, 0.060699462890625]}, "gradients/encoder.encoder.layers.21.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 2.0, 1.0, 2.0, 2.0, 6.0, 2.0, 9.0, 15.0, 24.0, 29.0, 46.0, 45.0, 71.0, 106.0, 113.0, 97.0, 120.0, 92.0, 83.0, 40.0, 42.0, 25.0, 15.0, 7.0, 5.0, 2.0, 5.0, 3.0, 3.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-9.655952453613281e-06, -9.432435035705566e-06, -9.208917617797852e-06, -8.985400199890137e-06, -8.761882781982422e-06, -8.538365364074707e-06, -8.314847946166992e-06, -8.091330528259277e-06, -7.867813110351562e-06, -7.644295692443848e-06, -7.420778274536133e-06, -7.197260856628418e-06, -6.973743438720703e-06, -6.750226020812988e-06, -6.5267086029052734e-06, -6.303191184997559e-06, -6.079673767089844e-06, -5.856156349182129e-06, -5.632638931274414e-06, -5.409121513366699e-06, -5.185604095458984e-06, -4.9620866775512695e-06, -4.738569259643555e-06, -4.51505184173584e-06, -4.291534423828125e-06, -4.06801700592041e-06, -3.844499588012695e-06, -3.6209821701049805e-06, -3.3974647521972656e-06, -3.1739473342895508e-06, -2.950429916381836e-06, -2.726912498474121e-06, -2.5033950805664062e-06, -2.2798776626586914e-06, -2.0563602447509766e-06, -1.8328428268432617e-06, -1.6093254089355469e-06, -1.385807991027832e-06, -1.1622905731201172e-06, -9.387731552124023e-07, -7.152557373046875e-07, -4.917383193969727e-07, -2.682209014892578e-07, -4.470348358154297e-08, 1.7881393432617188e-07, 4.023313522338867e-07, 6.258487701416016e-07, 8.493661880493164e-07, 1.0728836059570312e-06, 1.296401023864746e-06, 1.519918441772461e-06, 1.7434358596801758e-06, 1.9669532775878906e-06, 2.1904706954956055e-06, 2.4139881134033203e-06, 2.637505531311035e-06, 2.86102294921875e-06, 3.084540367126465e-06, 3.3080577850341797e-06, 3.5315752029418945e-06, 3.7550926208496094e-06, 3.978610038757324e-06, 4.202127456665039e-06, 4.425644874572754e-06, 4.649162292480469e-06]}, "gradients/encoder.encoder.layers.21.attention.q_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 1.0, 0.0, 2.0, 1.0, 2.0, 3.0, 4.0, 4.0, 4.0, 8.0, 11.0, 19.0, 27.0, 43.0, 100.0, 290.0, 1072.0, 15972.0, 1027174.0, 2962.0, 508.0, 180.0, 77.0, 45.0, 25.0, 17.0, 6.0, 5.0, 3.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.047027587890625, -0.044817447662353516, -0.04260730743408203, -0.04039716720581055, -0.03818702697753906, -0.03597688674926758, -0.033766746520996094, -0.03155660629272461, -0.029346466064453125, -0.02713632583618164, -0.024926185607910156, -0.022716045379638672, -0.020505905151367188, -0.018295764923095703, -0.01608562469482422, -0.013875484466552734, -0.01166534423828125, -0.009455204010009766, -0.007245063781738281, -0.005034923553466797, -0.0028247833251953125, -0.0006146430969238281, 0.0015954971313476562, 0.0038056373596191406, 0.006015777587890625, 0.00822591781616211, 0.010436058044433594, 0.012646198272705078, 0.014856338500976562, 0.017066478729248047, 0.01927661895751953, 0.021486759185791016, 0.0236968994140625, 0.025907039642333984, 0.02811717987060547, 0.030327320098876953, 0.03253746032714844, 0.03474760055541992, 0.036957740783691406, 0.03916788101196289, 0.041378021240234375, 0.04358816146850586, 0.045798301696777344, 0.04800844192504883, 0.05021858215332031, 0.0524287223815918, 0.05463886260986328, 0.056849002838134766, 0.05905914306640625, 0.061269283294677734, 0.06347942352294922, 0.0656895637512207, 0.06789970397949219, 0.07010984420776367, 0.07231998443603516, 0.07453012466430664, 0.07674026489257812, 0.07895040512084961, 0.0811605453491211, 0.08337068557739258, 0.08558082580566406, 0.08779096603393555, 0.09000110626220703, 0.09221124649047852, 0.09442138671875]}, "gradients/encoder.encoder.layers.21.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0, 14.0, 12.0, 19.0, 26.0, 53.0, 66.0, 145.0, 236.0, 179.0, 98.0, 58.0, 34.0, 18.0, 15.0, 11.0, 7.0, 5.0, 1.0, 3.0, 3.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.021942138671875, -0.021428585052490234, -0.02091503143310547, -0.020401477813720703, -0.019887924194335938, -0.019374370574951172, -0.018860816955566406, -0.01834726333618164, -0.017833709716796875, -0.01732015609741211, -0.016806602478027344, -0.016293048858642578, -0.015779495239257812, -0.015265941619873047, -0.014752388000488281, -0.014238834381103516, -0.01372528076171875, -0.013211727142333984, -0.012698173522949219, -0.012184619903564453, -0.011671066284179688, -0.011157512664794922, -0.010643959045410156, -0.01013040542602539, -0.009616851806640625, -0.00910329818725586, -0.008589744567871094, -0.008076190948486328, -0.0075626373291015625, -0.007049083709716797, -0.006535530090332031, -0.006021976470947266, -0.0055084228515625, -0.004994869232177734, -0.004481315612792969, -0.003967761993408203, -0.0034542083740234375, -0.002940654754638672, -0.0024271011352539062, -0.0019135475158691406, -0.001399993896484375, -0.0008864402770996094, -0.00037288665771484375, 0.00014066696166992188, 0.0006542205810546875, 0.0011677742004394531, 0.0016813278198242188, 0.0021948814392089844, 0.00270843505859375, 0.0032219886779785156, 0.0037355422973632812, 0.004249095916748047, 0.0047626495361328125, 0.005276203155517578, 0.005789756774902344, 0.006303310394287109, 0.006816864013671875, 0.007330417633056641, 0.007843971252441406, 0.008357524871826172, 0.008871078491210938, 0.009384632110595703, 0.009898185729980469, 0.010411739349365234, 0.01092529296875]}, "gradients/encoder.encoder.layers.21.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 3.0, 1.0, 0.0, 10.0, 66.0, 874.0, 56.0, 11.0], "bins": [-45.84514236450195, -45.09990692138672, -44.35467529296875, -43.609439849853516, -42.86420822143555, -42.11897277832031, -41.373741149902344, -40.62850570678711, -39.883270263671875, -39.13803482055664, -38.39280319213867, -37.64756774902344, -36.90233612060547, -36.157100677490234, -35.411865234375, -34.66663360595703, -33.92140197753906, -33.17616653442383, -32.43093490600586, -31.685699462890625, -30.940465927124023, -30.195232391357422, -29.44999885559082, -28.70476531982422, -27.959529876708984, -27.214296340942383, -26.46906280517578, -25.723827362060547, -24.978593826293945, -24.233360290527344, -23.488126754760742, -22.74289321899414, -21.997661590576172, -21.25242805480957, -20.50719451904297, -19.761959075927734, -19.016725540161133, -18.27149200439453, -17.52625846862793, -16.781024932861328, -16.035789489746094, -15.290555953979492, -14.545321464538574, -13.800087928771973, -13.054854393005371, -12.309619903564453, -11.564386367797852, -10.81915283203125, -10.073919296264648, -9.328685760498047, -8.583451271057129, -7.838217735290527, -7.092984199523926, -6.347750186920166, -5.602516174316406, -4.857282638549805, -4.112049102783203, -3.3668153285980225, -2.621581554412842, -1.876347541809082, -1.1311137676239014, -0.3858799934387207, 0.35935401916503906, 1.1045875549316406, 1.8498215675354004]}, "gradients/encoder.encoder.layers.21.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 1.0, 2.0, 3.0, 3.0, 2.0, 13.0, 8.0, 5.0, 13.0, 12.0, 18.0, 24.0, 23.0, 16.0, 30.0, 31.0, 35.0, 45.0, 66.0, 73.0, 50.0, 54.0, 53.0, 54.0, 63.0, 49.0, 32.0, 42.0, 40.0, 27.0, 24.0, 22.0, 18.0, 16.0, 10.0, 8.0, 5.0, 5.0, 5.0, 5.0, 2.0, 3.0, 2.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 3.0, 0.0, 0.0, 1.0], "bins": [-6.492646217346191, -6.297248840332031, -6.101850986480713, -5.9064531326293945, -5.711055755615234, -5.515658378601074, -5.320260524749756, -5.1248626708984375, -4.929465293884277, -4.734067916870117, -4.538670063018799, -4.3432722091674805, -4.14787483215332, -3.952477216720581, -3.757079601287842, -3.5616819858551025, -3.3662843704223633, -3.170886754989624, -2.9754891395568848, -2.7800915241241455, -2.5846939086914062, -2.389296293258667, -2.1938986778259277, -1.9985010623931885, -1.8031034469604492, -1.60770583152771, -1.4123082160949707, -1.2169106006622314, -1.0215129852294922, -0.8261153697967529, -0.6307177543640137, -0.4353201389312744, -0.23992252349853516, -0.0445249080657959, 0.15087270736694336, 0.3462703227996826, 0.5416679382324219, 0.7370655536651611, 0.9324631690979004, 1.1278607845306396, 1.323258399963379, 1.5186560153961182, 1.7140536308288574, 1.9094512462615967, 2.104848861694336, 2.300246477127075, 2.4956440925598145, 2.6910417079925537, 2.886439323425293, 3.0818369388580322, 3.2772345542907715, 3.4726321697235107, 3.66802978515625, 3.8634274005889893, 4.0588250160217285, 4.254222869873047, 4.449620246887207, 4.645017623901367, 4.8404154777526855, 5.035813331604004, 5.231210708618164, 5.426608085632324, 5.622005939483643, 5.817403793334961, 6.012801170349121]}, "gradients/encoder.encoder.layers.20.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 2.0, 0.0, 2.0, 2.0, 1.0, 3.0, 3.0, 4.0, 3.0, 5.0, 4.0, 4.0, 10.0, 12.0, 28.0, 37.0, 49.0, 110.0, 241.0, 608.0, 1719.0, 6218.0, 3972531.0, 206078.0, 4908.0, 1140.0, 320.0, 114.0, 48.0, 29.0, 23.0, 8.0, 5.0, 5.0, 6.0, 3.0, 2.0, 2.0, 4.0, 3.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-6.5234375, -6.3602294921875, -6.197021484375, -6.0338134765625, -5.87060546875, -5.7073974609375, -5.544189453125, -5.3809814453125, -5.2177734375, -5.0545654296875, -4.891357421875, -4.7281494140625, -4.56494140625, -4.4017333984375, -4.238525390625, -4.0753173828125, -3.912109375, -3.7489013671875, -3.585693359375, -3.4224853515625, -3.25927734375, -3.0960693359375, -2.932861328125, -2.7696533203125, -2.6064453125, -2.4432373046875, -2.280029296875, -2.1168212890625, -1.95361328125, -1.7904052734375, -1.627197265625, -1.4639892578125, -1.30078125, -1.1375732421875, -0.974365234375, -0.8111572265625, -0.64794921875, -0.4847412109375, -0.321533203125, -0.1583251953125, 0.0048828125, 0.1680908203125, 0.331298828125, 0.4945068359375, 0.65771484375, 0.8209228515625, 0.984130859375, 1.1473388671875, 1.310546875, 1.4737548828125, 1.636962890625, 1.8001708984375, 1.96337890625, 2.1265869140625, 2.289794921875, 2.4530029296875, 2.6162109375, 2.7794189453125, 2.942626953125, 3.1058349609375, 3.26904296875, 3.4322509765625, 3.595458984375, 3.7586669921875, 3.921875]}, "gradients/encoder.encoder.layers.20.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 2.0, 0.0, 2.0, 2.0, 1.0, 2.0, 3.0, 4.0, 2.0, 4.0, 3.0, 1.0, 5.0, 7.0, 8.0, 5.0, 6.0, 6.0, 14.0, 35.0, 44.0, 130.0, 182.0, 225.0, 131.0, 75.0, 38.0, 12.0, 8.0, 11.0, 14.0, 4.0, 4.0, 4.0, 6.0, 3.0, 2.0, 2.0, 4.0, 3.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-0.313720703125, -0.30586814880371094, -0.2980155944824219, -0.2901630401611328, -0.28231048583984375, -0.2744579315185547, -0.2666053771972656, -0.25875282287597656, -0.2509002685546875, -0.24304771423339844, -0.23519515991210938, -0.2273426055908203, -0.21949005126953125, -0.2116374969482422, -0.20378494262695312, -0.19593238830566406, -0.188079833984375, -0.18022727966308594, -0.17237472534179688, -0.1645221710205078, -0.15666961669921875, -0.1488170623779297, -0.14096450805664062, -0.13311195373535156, -0.1252593994140625, -0.11740684509277344, -0.10955429077148438, -0.10170173645019531, -0.09384918212890625, -0.08599662780761719, -0.07814407348632812, -0.07029151916503906, -0.06243896484375, -0.05458641052246094, -0.046733856201171875, -0.03888130187988281, -0.03102874755859375, -0.023176193237304688, -0.015323638916015625, -0.0074710845947265625, 0.0003814697265625, 0.008234024047851562, 0.016086578369140625, 0.023939132690429688, 0.03179168701171875, 0.03964424133300781, 0.047496795654296875, 0.05534934997558594, 0.063201904296875, 0.07105445861816406, 0.07890701293945312, 0.08675956726074219, 0.09461212158203125, 0.10246467590332031, 0.11031723022460938, 0.11816978454589844, 0.1260223388671875, 0.13387489318847656, 0.14172744750976562, 0.1495800018310547, 0.15743255615234375, 0.1652851104736328, 0.17313766479492188, 0.18099021911621094, 0.1888427734375]}, "gradients/encoder.encoder.layers.20.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 2.0, 4.0, 2.0, 3.0, 7.0, 5.0, 4.0, 8.0, 11.0, 15.0, 15.0, 28.0, 52.0, 97.0, 144.0, 318.0, 988.0, 4325.0, 56224.0, 4113385.0, 15794.0, 1899.0, 490.0, 190.0, 87.0, 52.0, 38.0, 24.0, 17.0, 17.0, 13.0, 11.0, 2.0, 6.0, 3.0, 0.0, 5.0, 3.0, 1.0, 3.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-3.603515625, -3.471588134765625, -3.33966064453125, -3.207733154296875, -3.0758056640625, -2.943878173828125, -2.81195068359375, -2.680023193359375, -2.548095703125, -2.416168212890625, -2.28424072265625, -2.152313232421875, -2.0203857421875, -1.888458251953125, -1.75653076171875, -1.624603271484375, -1.49267578125, -1.360748291015625, -1.22882080078125, -1.096893310546875, -0.9649658203125, -0.833038330078125, -0.70111083984375, -0.569183349609375, -0.437255859375, -0.305328369140625, -0.17340087890625, -0.041473388671875, 0.0904541015625, 0.222381591796875, 0.35430908203125, 0.486236572265625, 0.6181640625, 0.750091552734375, 0.88201904296875, 1.013946533203125, 1.1458740234375, 1.277801513671875, 1.40972900390625, 1.541656494140625, 1.673583984375, 1.805511474609375, 1.93743896484375, 2.069366455078125, 2.2012939453125, 2.333221435546875, 2.46514892578125, 2.597076416015625, 2.72900390625, 2.860931396484375, 2.99285888671875, 3.124786376953125, 3.2567138671875, 3.388641357421875, 3.52056884765625, 3.652496337890625, 3.784423828125, 3.916351318359375, 4.04827880859375, 4.180206298828125, 4.3121337890625, 4.444061279296875, 4.57598876953125, 4.707916259765625, 4.83984375]}, "gradients/encoder.encoder.layers.20.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 3.0, 1.0, 3.0, 5.0, 0.0, 3.0, 3.0, 3.0, 9.0, 9.0, 13.0, 10.0, 14.0, 19.0, 20.0, 31.0, 33.0, 37.0, 48.0, 86.0, 3283.0, 157.0, 66.0, 42.0, 35.0, 31.0, 27.0, 23.0, 15.0, 10.0, 10.0, 6.0, 7.0, 3.0, 5.0, 6.0, 2.0, 2.0, 4.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.6650390625, -0.6469078063964844, -0.6287765502929688, -0.6106452941894531, -0.5925140380859375, -0.5743827819824219, -0.5562515258789062, -0.5381202697753906, -0.519989013671875, -0.5018577575683594, -0.48372650146484375, -0.4655952453613281, -0.4474639892578125, -0.4293327331542969, -0.41120147705078125, -0.3930702209472656, -0.37493896484375, -0.3568077087402344, -0.33867645263671875, -0.3205451965332031, -0.3024139404296875, -0.2842826843261719, -0.26615142822265625, -0.24802017211914062, -0.229888916015625, -0.21175765991210938, -0.19362640380859375, -0.17549514770507812, -0.1573638916015625, -0.13923263549804688, -0.12110137939453125, -0.10297012329101562, -0.0848388671875, -0.06670761108398438, -0.04857635498046875, -0.030445098876953125, -0.0123138427734375, 0.005817413330078125, 0.02394866943359375, 0.042079925537109375, 0.060211181640625, 0.07834243774414062, 0.09647369384765625, 0.11460494995117188, 0.1327362060546875, 0.15086746215820312, 0.16899871826171875, 0.18712997436523438, 0.20526123046875, 0.22339248657226562, 0.24152374267578125, 0.2596549987792969, 0.2777862548828125, 0.2959175109863281, 0.31404876708984375, 0.3321800231933594, 0.350311279296875, 0.3684425354003906, 0.38657379150390625, 0.4047050476074219, 0.4228363037109375, 0.4409675598144531, 0.45909881591796875, 0.4772300720214844, 0.495361328125]}, "gradients/encoder.encoder.layers.20.final_layer_norm.weight": {"_type": "histogram", "values": [2.0, 1.0, 3.0, 9.0, 874.0, 123.0, 4.0, 3.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.0699729919433594, -1.6428709030151367, -1.215768814086914, -0.788666844367981, -0.3615647554397583, 0.0655372142791748, 0.49263930320739746, 0.9197413921356201, 1.3468434810638428, 1.7739455699920654, 2.201047658920288, 2.6281495094299316, 3.0552515983581543, 3.482353687286377, 3.9094557762145996, 4.336557865142822, 4.763659954071045, 5.190762042999268, 5.61786413192749, 6.044966220855713, 6.4720683097839355, 6.899169921875, 7.326272010803223, 7.753374099731445, 8.180476188659668, 8.60757827758789, 9.034680366516113, 9.461782455444336, 9.888884544372559, 10.315986633300781, 10.743088722229004, 11.170190811157227, 11.59729290008545, 12.024394989013672, 12.451497077941895, 12.878599166870117, 13.30570125579834, 13.732803344726562, 14.159905433654785, 14.587007522583008, 15.01410961151123, 15.441211700439453, 15.868313789367676, 16.2954158782959, 16.722517013549805, 17.149620056152344, 17.57672119140625, 18.00382423400879, 18.430925369262695, 18.8580265045166, 19.28512954711914, 19.712230682373047, 20.139333724975586, 20.566434860229492, 20.99353790283203, 21.420639038085938, 21.847742080688477, 22.274843215942383, 22.701946258544922, 23.129047393798828, 23.556150436401367, 23.983251571655273, 24.410354614257812, 24.83745574951172, 25.264558792114258]}, "gradients/encoder.encoder.layers.20.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 3.0, 3.0, 1.0, 4.0, 4.0, 3.0, 7.0, 8.0, 10.0, 15.0, 17.0, 23.0, 28.0, 53.0, 46.0, 69.0, 61.0, 65.0, 64.0, 82.0, 78.0, 73.0, 46.0, 51.0, 42.0, 33.0, 30.0, 25.0, 13.0, 12.0, 14.0, 8.0, 5.0, 4.0, 3.0, 1.0, 2.0, 2.0, 5.0, 1.0, 2.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.9890350103378296, -1.9188755750656128, -1.8487160205841064, -1.7785565853118896, -1.7083971500396729, -1.6382375955581665, -1.5680781602859497, -1.4979186058044434, -1.4277591705322266, -1.3575997352600098, -1.2874401807785034, -1.2172807455062866, -1.1471211910247803, -1.0769617557525635, -1.0068023204803467, -0.9366428256034851, -0.8664833307266235, -0.796323835849762, -0.7261643409729004, -0.6560049057006836, -0.585845410823822, -0.5156859159469604, -0.44552645087242126, -0.3753669857978821, -0.3052074909210205, -0.23504801094532013, -0.16488853096961975, -0.09472905099391937, -0.024569571018218994, 0.04558992385864258, 0.11574938893318176, 0.18590885400772095, 0.25606846809387207, 0.32622796297073364, 0.3963874280452728, 0.466546893119812, 0.5367063879966736, 0.6068658828735352, 0.677025318145752, 0.7471848130226135, 0.8173443078994751, 0.8875038027763367, 0.9576632976531982, 1.027822732925415, 1.0979821681976318, 1.1681417226791382, 1.238301157951355, 1.3084607124328613, 1.3786201477050781, 1.448779582977295, 1.5189391374588013, 1.589098572731018, 1.6592581272125244, 1.7294175624847412, 1.799576997756958, 1.8697364330291748, 1.9398959875106812, 2.0100555419921875, 2.0802149772644043, 2.150374412536621, 2.220533847808838, 2.2906932830810547, 2.3608529567718506, 2.4310123920440674, 2.501171827316284]}, "gradients/encoder.encoder.layers.20.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 2.0, 3.0, 2.0, 2.0, 4.0, 3.0, 6.0, 14.0, 13.0, 23.0, 41.0, 53.0, 73.0, 121.0, 176.0, 325.0, 497.0, 847.0, 1513.0, 2842.0, 6004.0, 15041.0, 76199.0, 853797.0, 64905.0, 14137.0, 5627.0, 2783.0, 1458.0, 810.0, 460.0, 288.0, 169.0, 110.0, 71.0, 44.0, 40.0, 23.0, 11.0, 9.0, 5.0, 7.0, 2.0, 1.0, 1.0, 4.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0], "bins": [-1.876953125, -1.8224945068359375, -1.768035888671875, -1.7135772705078125, -1.65911865234375, -1.6046600341796875, -1.550201416015625, -1.4957427978515625, -1.4412841796875, -1.3868255615234375, -1.332366943359375, -1.2779083251953125, -1.22344970703125, -1.1689910888671875, -1.114532470703125, -1.0600738525390625, -1.005615234375, -0.9511566162109375, -0.896697998046875, -0.8422393798828125, -0.78778076171875, -0.7333221435546875, -0.678863525390625, -0.6244049072265625, -0.5699462890625, -0.5154876708984375, -0.461029052734375, -0.4065704345703125, -0.35211181640625, -0.2976531982421875, -0.243194580078125, -0.1887359619140625, -0.13427734375, -0.0798187255859375, -0.025360107421875, 0.0290985107421875, 0.08355712890625, 0.1380157470703125, 0.192474365234375, 0.2469329833984375, 0.3013916015625, 0.3558502197265625, 0.410308837890625, 0.4647674560546875, 0.51922607421875, 0.5736846923828125, 0.628143310546875, 0.6826019287109375, 0.737060546875, 0.7915191650390625, 0.845977783203125, 0.9004364013671875, 0.95489501953125, 1.0093536376953125, 1.063812255859375, 1.1182708740234375, 1.1727294921875, 1.2271881103515625, 1.281646728515625, 1.3361053466796875, 1.39056396484375, 1.4450225830078125, 1.499481201171875, 1.5539398193359375, 1.6083984375]}, "gradients/encoder.encoder.layers.20.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 3.0, 2.0, 1.0, 2.0, 3.0, 3.0, 5.0, 0.0, 4.0, 3.0, 4.0, 8.0, 9.0, 7.0, 4.0, 12.0, 14.0, 49.0, 74.0, 140.0, 210.0, 170.0, 116.0, 63.0, 28.0, 20.0, 12.0, 6.0, 9.0, 8.0, 3.0, 5.0, 5.0, 3.0, 1.0, 4.0, 4.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0], "bins": [-0.29638671875, -0.2892646789550781, -0.28214263916015625, -0.2750205993652344, -0.2678985595703125, -0.2607765197753906, -0.25365447998046875, -0.24653244018554688, -0.239410400390625, -0.23228836059570312, -0.22516632080078125, -0.21804428100585938, -0.2109222412109375, -0.20380020141601562, -0.19667816162109375, -0.18955612182617188, -0.18243408203125, -0.17531204223632812, -0.16819000244140625, -0.16106796264648438, -0.1539459228515625, -0.14682388305664062, -0.13970184326171875, -0.13257980346679688, -0.125457763671875, -0.11833572387695312, -0.11121368408203125, -0.10409164428710938, -0.0969696044921875, -0.08984756469726562, -0.08272552490234375, -0.07560348510742188, -0.0684814453125, -0.061359405517578125, -0.05423736572265625, -0.047115325927734375, -0.0399932861328125, -0.032871246337890625, -0.02574920654296875, -0.018627166748046875, -0.011505126953125, -0.004383087158203125, 0.00273895263671875, 0.009860992431640625, 0.0169830322265625, 0.024105072021484375, 0.03122711181640625, 0.038349151611328125, 0.04547119140625, 0.052593231201171875, 0.05971527099609375, 0.06683731079101562, 0.0739593505859375, 0.08108139038085938, 0.08820343017578125, 0.09532546997070312, 0.102447509765625, 0.10956954956054688, 0.11669158935546875, 0.12381362915039062, 0.1309356689453125, 0.13805770874023438, 0.14517974853515625, 0.15230178833007812, 0.159423828125]}, "gradients/encoder.encoder.layers.20.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 1.0, 3.0, 2.0, 1.0, 7.0, 3.0, 7.0, 4.0, 3.0, 8.0, 7.0, 9.0, 17.0, 15.0, 15.0, 12.0, 28.0, 47.0, 40.0, 49.0, 82.0, 125.0, 197.0, 336.0, 625.0, 1364.0, 3831.0, 68579.0, 937581.0, 30012.0, 3045.0, 1057.0, 509.0, 294.0, 181.0, 85.0, 86.0, 62.0, 44.0, 35.0, 32.0, 25.0, 20.0, 23.0, 12.0, 9.0, 10.0, 7.0, 4.0, 6.0, 6.0, 4.0, 1.0, 0.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0], "bins": [-1.9912109375, -1.9277801513671875, -1.864349365234375, -1.8009185791015625, -1.73748779296875, -1.6740570068359375, -1.610626220703125, -1.5471954345703125, -1.4837646484375, -1.4203338623046875, -1.356903076171875, -1.2934722900390625, -1.23004150390625, -1.1666107177734375, -1.103179931640625, -1.0397491455078125, -0.976318359375, -0.9128875732421875, -0.849456787109375, -0.7860260009765625, -0.72259521484375, -0.6591644287109375, -0.595733642578125, -0.5323028564453125, -0.4688720703125, -0.4054412841796875, -0.342010498046875, -0.2785797119140625, -0.21514892578125, -0.1517181396484375, -0.088287353515625, -0.0248565673828125, 0.03857421875, 0.1020050048828125, 0.165435791015625, 0.2288665771484375, 0.29229736328125, 0.3557281494140625, 0.419158935546875, 0.4825897216796875, 0.5460205078125, 0.6094512939453125, 0.672882080078125, 0.7363128662109375, 0.79974365234375, 0.8631744384765625, 0.926605224609375, 0.9900360107421875, 1.053466796875, 1.1168975830078125, 1.180328369140625, 1.2437591552734375, 1.30718994140625, 1.3706207275390625, 1.434051513671875, 1.4974822998046875, 1.5609130859375, 1.6243438720703125, 1.687774658203125, 1.7512054443359375, 1.81463623046875, 1.8780670166015625, 1.941497802734375, 2.0049285888671875, 2.068359375]}, "gradients/encoder.encoder.layers.20.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0, 0.0, 1.0, 4.0, 6.0, 6.0, 4.0, 7.0, 10.0, 8.0, 11.0, 20.0, 15.0, 20.0, 22.0, 25.0, 25.0, 30.0, 31.0, 21.0, 31.0, 42.0, 32.0, 41.0, 33.0, 36.0, 36.0, 43.0, 38.0, 40.0, 32.0, 33.0, 43.0, 36.0, 30.0, 22.0, 22.0, 35.0, 18.0, 9.0, 14.0, 14.0, 15.0, 8.0, 7.0, 8.0, 3.0, 4.0, 7.0, 3.0, 7.0, 1.0, 2.0, 3.0, 1.0, 2.0, 1.0, 1.0], "bins": [-0.54150390625, -0.5248947143554688, -0.5082855224609375, -0.49167633056640625, -0.475067138671875, -0.45845794677734375, -0.4418487548828125, -0.42523956298828125, -0.40863037109375, -0.39202117919921875, -0.3754119873046875, -0.35880279541015625, -0.342193603515625, -0.32558441162109375, -0.3089752197265625, -0.29236602783203125, -0.2757568359375, -0.25914764404296875, -0.2425384521484375, -0.22592926025390625, -0.209320068359375, -0.19271087646484375, -0.1761016845703125, -0.15949249267578125, -0.14288330078125, -0.12627410888671875, -0.1096649169921875, -0.09305572509765625, -0.076446533203125, -0.05983734130859375, -0.0432281494140625, -0.02661895751953125, -0.010009765625, 0.00659942626953125, 0.0232086181640625, 0.03981781005859375, 0.056427001953125, 0.07303619384765625, 0.0896453857421875, 0.10625457763671875, 0.12286376953125, 0.13947296142578125, 0.1560821533203125, 0.17269134521484375, 0.189300537109375, 0.20590972900390625, 0.2225189208984375, 0.23912811279296875, 0.2557373046875, 0.27234649658203125, 0.2889556884765625, 0.30556488037109375, 0.322174072265625, 0.33878326416015625, 0.3553924560546875, 0.37200164794921875, 0.38861083984375, 0.40522003173828125, 0.4218292236328125, 0.43843841552734375, 0.455047607421875, 0.47165679931640625, 0.4882659912109375, 0.5048751831054688, 0.521484375]}, "gradients/encoder.encoder.layers.20.attention.k_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 4.0, 3.0, 1.0, 0.0, 0.0, 2.0, 2.0, 2.0, 2.0, 5.0, 2.0, 9.0, 4.0, 7.0, 15.0, 12.0, 13.0, 14.0, 20.0, 33.0, 45.0, 40.0, 71.0, 95.0, 146.0, 204.0, 332.0, 724.0, 2318.0, 24003.0, 1002340.0, 14590.0, 1910.0, 627.0, 322.0, 155.0, 114.0, 76.0, 58.0, 62.0, 39.0, 24.0, 22.0, 31.0, 24.0, 9.0, 5.0, 12.0, 6.0, 2.0, 1.0, 4.0, 2.0, 2.0, 1.0, 1.0, 3.0, 2.0], "bins": [-0.0408935546875, -0.03977084159851074, -0.038648128509521484, -0.03752541542053223, -0.03640270233154297, -0.03527998924255371, -0.03415727615356445, -0.033034563064575195, -0.03191184997558594, -0.03078913688659668, -0.029666423797607422, -0.028543710708618164, -0.027420997619628906, -0.02629828453063965, -0.02517557144165039, -0.024052858352661133, -0.022930145263671875, -0.021807432174682617, -0.02068471908569336, -0.0195620059967041, -0.018439292907714844, -0.017316579818725586, -0.016193866729736328, -0.01507115364074707, -0.013948440551757812, -0.012825727462768555, -0.011703014373779297, -0.010580301284790039, -0.009457588195800781, -0.008334875106811523, -0.007212162017822266, -0.006089448928833008, -0.00496673583984375, -0.003844022750854492, -0.0027213096618652344, -0.0015985965728759766, -0.00047588348388671875, 0.0006468296051025391, 0.0017695426940917969, 0.0028922557830810547, 0.0040149688720703125, 0.00513768196105957, 0.006260395050048828, 0.007383108139038086, 0.008505821228027344, 0.009628534317016602, 0.01075124740600586, 0.011873960494995117, 0.012996673583984375, 0.014119386672973633, 0.01524209976196289, 0.01636481285095215, 0.017487525939941406, 0.018610239028930664, 0.019732952117919922, 0.02085566520690918, 0.021978378295898438, 0.023101091384887695, 0.024223804473876953, 0.02534651756286621, 0.02646923065185547, 0.027591943740844727, 0.028714656829833984, 0.029837369918823242, 0.0309600830078125]}, "gradients/encoder.encoder.layers.20.attention.k_proj.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 4.0, 2.0, 6.0, 3.0, 9.0, 7.0, 14.0, 14.0, 14.0, 43.0, 30.0, 54.0, 68.0, 83.0, 64.0, 112.0, 72.0, 100.0, 66.0, 51.0, 57.0, 25.0, 35.0, 15.0, 11.0, 14.0, 8.0, 8.0, 2.0, 5.0, 0.0, 5.0, 2.0, 1.0, 2.0, 0.0, 2.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.231929779052734e-06, -4.085712134838104e-06, -3.939494490623474e-06, -3.793276846408844e-06, -3.647059202194214e-06, -3.5008415579795837e-06, -3.3546239137649536e-06, -3.2084062695503235e-06, -3.0621886253356934e-06, -2.9159709811210632e-06, -2.769753336906433e-06, -2.623535692691803e-06, -2.477318048477173e-06, -2.3311004042625427e-06, -2.1848827600479126e-06, -2.0386651158332825e-06, -1.8924474716186523e-06, -1.7462298274040222e-06, -1.600012183189392e-06, -1.453794538974762e-06, -1.3075768947601318e-06, -1.1613592505455017e-06, -1.0151416063308716e-06, -8.689239621162415e-07, -7.227063179016113e-07, -5.764886736869812e-07, -4.302710294723511e-07, -2.8405338525772095e-07, -1.3783574104309082e-07, 8.381903171539307e-09, 1.5459954738616943e-07, 3.0081719160079956e-07, 4.470348358154297e-07, 5.932524800300598e-07, 7.394701242446899e-07, 8.856877684593201e-07, 1.0319054126739502e-06, 1.1781230568885803e-06, 1.3243407011032104e-06, 1.4705583453178406e-06, 1.6167759895324707e-06, 1.7629936337471008e-06, 1.909211277961731e-06, 2.055428922176361e-06, 2.201646566390991e-06, 2.3478642106056213e-06, 2.4940818548202515e-06, 2.6402994990348816e-06, 2.7865171432495117e-06, 2.932734787464142e-06, 3.078952431678772e-06, 3.225170075893402e-06, 3.3713877201080322e-06, 3.5176053643226624e-06, 3.6638230085372925e-06, 3.8100406527519226e-06, 3.956258296966553e-06, 4.102475941181183e-06, 4.248693585395813e-06, 4.394911229610443e-06, 4.541128873825073e-06, 4.687346518039703e-06, 4.8335641622543335e-06, 4.979781806468964e-06, 5.125999450683594e-06]}, "gradients/encoder.encoder.layers.20.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 2.0, 2.0, 0.0, 0.0, 3.0, 2.0, 4.0, 6.0, 4.0, 16.0, 11.0, 17.0, 28.0, 61.0, 109.0, 309.0, 1307.0, 60789.0, 983038.0, 2160.0, 367.0, 140.0, 77.0, 37.0, 27.0, 15.0, 10.0, 12.0, 4.0, 1.0, 3.0, 0.0, 2.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.050750732421875, -0.048807621002197266, -0.04686450958251953, -0.0449213981628418, -0.04297828674316406, -0.04103517532348633, -0.039092063903808594, -0.03714895248413086, -0.035205841064453125, -0.03326272964477539, -0.031319618225097656, -0.029376506805419922, -0.027433395385742188, -0.025490283966064453, -0.02354717254638672, -0.021604061126708984, -0.01966094970703125, -0.017717838287353516, -0.01577472686767578, -0.013831615447998047, -0.011888504028320312, -0.009945392608642578, -0.008002281188964844, -0.006059169769287109, -0.004116058349609375, -0.0021729469299316406, -0.00022983551025390625, 0.0017132759094238281, 0.0036563873291015625, 0.005599498748779297, 0.007542610168457031, 0.009485721588134766, 0.0114288330078125, 0.013371944427490234, 0.015315055847167969, 0.017258167266845703, 0.019201278686523438, 0.021144390106201172, 0.023087501525878906, 0.02503061294555664, 0.026973724365234375, 0.02891683578491211, 0.030859947204589844, 0.03280305862426758, 0.03474617004394531, 0.03668928146362305, 0.03863239288330078, 0.040575504302978516, 0.04251861572265625, 0.044461727142333984, 0.04640483856201172, 0.04834794998168945, 0.05029106140136719, 0.05223417282104492, 0.054177284240722656, 0.05612039566040039, 0.058063507080078125, 0.06000661849975586, 0.061949729919433594, 0.06389284133911133, 0.06583595275878906, 0.0677790641784668, 0.06972217559814453, 0.07166528701782227, 0.0736083984375]}, "gradients/encoder.encoder.layers.20.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 1.0, 0.0, 3.0, 1.0, 2.0, 7.0, 6.0, 10.0, 12.0, 14.0, 24.0, 34.0, 64.0, 122.0, 233.0, 203.0, 111.0, 62.0, 29.0, 20.0, 15.0, 9.0, 4.0, 11.0, 1.0, 5.0, 3.0, 0.0, 2.0, 0.0, 0.0, 2.0, 2.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0192718505859375, -0.018763065338134766, -0.01825428009033203, -0.017745494842529297, -0.017236709594726562, -0.016727924346923828, -0.016219139099121094, -0.01571035385131836, -0.015201568603515625, -0.01469278335571289, -0.014183998107910156, -0.013675212860107422, -0.013166427612304688, -0.012657642364501953, -0.012148857116699219, -0.011640071868896484, -0.01113128662109375, -0.010622501373291016, -0.010113716125488281, -0.009604930877685547, -0.009096145629882812, -0.008587360382080078, -0.008078575134277344, -0.007569789886474609, -0.007061004638671875, -0.006552219390869141, -0.006043434143066406, -0.005534648895263672, -0.0050258636474609375, -0.004517078399658203, -0.004008293151855469, -0.0034995079040527344, -0.00299072265625, -0.0024819374084472656, -0.0019731521606445312, -0.0014643669128417969, -0.0009555816650390625, -0.0004467964172363281, 6.198883056640625e-05, 0.0005707740783691406, 0.001079559326171875, 0.0015883445739746094, 0.0020971298217773438, 0.002605915069580078, 0.0031147003173828125, 0.003623485565185547, 0.004132270812988281, 0.004641056060791016, 0.00514984130859375, 0.005658626556396484, 0.006167411804199219, 0.006676197052001953, 0.0071849822998046875, 0.007693767547607422, 0.008202552795410156, 0.00871133804321289, 0.009220123291015625, 0.00972890853881836, 0.010237693786621094, 0.010746479034423828, 0.011255264282226562, 0.011764049530029297, 0.012272834777832031, 0.012781620025634766, 0.0132904052734375]}, "gradients/encoder.encoder.layers.20.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 6.0, 3.0, 9.0, 26.0, 35.0, 103.0, 422.0, 311.0, 62.0, 24.0, 10.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-8.078099250793457, -7.909549236297607, -7.740999221801758, -7.57244873046875, -7.4038987159729, -7.235348701477051, -7.066798210144043, -6.898248195648193, -6.729698181152344, -6.561148166656494, -6.3925981521606445, -6.224047660827637, -6.055497646331787, -5.8869476318359375, -5.71839714050293, -5.54984712600708, -5.3812971115112305, -5.212747097015381, -5.044197082519531, -4.875646591186523, -4.707096576690674, -4.538546562194824, -4.369996070861816, -4.201446056365967, -4.032896041870117, -3.8643460273742676, -3.695795774459839, -3.52724552154541, -3.3586955070495605, -3.190145492553711, -3.0215952396392822, -2.8530449867248535, -2.684494972229004, -2.5159449577331543, -2.3473947048187256, -2.178844451904297, -2.0102944374084473, -1.841744303703308, -1.673194169998169, -1.5046440362930298, -1.3360939025878906, -1.1675437688827515, -0.9989936351776123, -0.8304435014724731, -0.661893367767334, -0.4933432340621948, -0.32479310035705566, -0.1562429666519165, 0.012307167053222656, 0.18085730075836182, 0.349407434463501, 0.5179575681686401, 0.6865077018737793, 0.8550578355789185, 1.0236079692840576, 1.1921581029891968, 1.360708236694336, 1.529258370399475, 1.6978085041046143, 1.8663586378097534, 2.0349087715148926, 2.203458786010742, 2.372009038925171, 2.5405592918395996, 2.709109306335449]}, "gradients/encoder.encoder.layers.20.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 4.0, 1.0, 8.0, 10.0, 11.0, 20.0, 29.0, 16.0, 23.0, 34.0, 42.0, 53.0, 49.0, 54.0, 62.0, 66.0, 70.0, 61.0, 48.0, 62.0, 53.0, 51.0, 35.0, 38.0, 37.0, 21.0, 14.0, 14.0, 7.0, 9.0, 5.0, 3.0, 3.0, 3.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-6.842535018920898, -6.6754150390625, -6.508294582366943, -6.341174602508545, -6.174054145812988, -6.00693416595459, -5.839814186096191, -5.672694206237793, -5.505573749542236, -5.338453769683838, -5.171333312988281, -5.004213333129883, -4.837093353271484, -4.669972896575928, -4.502852916717529, -4.335732460021973, -4.168612480163574, -4.001492500305176, -3.834372043609619, -3.6672520637512207, -3.500131845474243, -3.3330116271972656, -3.165891647338867, -2.9987714290618896, -2.831651210784912, -2.6645309925079346, -2.497410774230957, -2.3302907943725586, -2.163170576095581, -1.9960503578186035, -1.8289302587509155, -1.6618101596832275, -1.49468994140625, -1.3275697231292725, -1.1604496240615845, -0.9933294653892517, -0.826209306716919, -0.6590891480445862, -0.4919689893722534, -0.32484889030456543, -0.1577286720275879, 0.009391486644744873, 0.17651164531707764, 0.3436318039894104, 0.5107519626617432, 0.6778721213340759, 0.8449922800064087, 1.0121123790740967, 1.1792325973510742, 1.3463528156280518, 1.5134729146957397, 1.6805930137634277, 1.8477132320404053, 2.014833450317383, 2.1819534301757812, 2.349073648452759, 2.5161938667297363, 2.683314085006714, 2.8504343032836914, 3.01755428314209, 3.1846745014190674, 3.351794719696045, 3.5189146995544434, 3.686034917831421, 3.8531551361083984]}, "gradients/encoder.encoder.layers.19.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 0.0, 4.0, 0.0, 0.0, 3.0, 3.0, 2.0, 10.0, 3.0, 9.0, 11.0, 12.0, 18.0, 43.0, 54.0, 74.0, 157.0, 275.0, 686.0, 2148.0, 24149.0, 4160956.0, 4144.0, 880.0, 319.0, 146.0, 63.0, 51.0, 24.0, 18.0, 4.0, 7.0, 5.0, 9.0, 2.0, 1.0, 0.0, 3.0, 0.0, 1.0, 0.0, 2.0, 1.0], "bins": [-4.9765625, -4.858612060546875, -4.74066162109375, -4.622711181640625, -4.5047607421875, -4.386810302734375, -4.26885986328125, -4.150909423828125, -4.032958984375, -3.915008544921875, -3.79705810546875, -3.679107666015625, -3.5611572265625, -3.443206787109375, -3.32525634765625, -3.207305908203125, -3.08935546875, -2.971405029296875, -2.85345458984375, -2.735504150390625, -2.6175537109375, -2.499603271484375, -2.38165283203125, -2.263702392578125, -2.145751953125, -2.027801513671875, -1.90985107421875, -1.791900634765625, -1.6739501953125, -1.555999755859375, -1.43804931640625, -1.320098876953125, -1.2021484375, -1.084197998046875, -0.96624755859375, -0.848297119140625, -0.7303466796875, -0.612396240234375, -0.49444580078125, -0.376495361328125, -0.258544921875, -0.140594482421875, -0.02264404296875, 0.095306396484375, 0.2132568359375, 0.331207275390625, 0.44915771484375, 0.567108154296875, 0.68505859375, 0.803009033203125, 0.92095947265625, 1.038909912109375, 1.1568603515625, 1.274810791015625, 1.39276123046875, 1.510711669921875, 1.628662109375, 1.746612548828125, 1.86456298828125, 1.982513427734375, 2.1004638671875, 2.218414306640625, 2.33636474609375, 2.454315185546875, 2.572265625]}, "gradients/encoder.encoder.layers.19.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 1.0, 0.0, 3.0, 0.0, 0.0, 2.0, 2.0, 2.0, 8.0, 2.0, 4.0, 3.0, 3.0, 8.0, 6.0, 10.0, 8.0, 17.0, 29.0, 65.0, 113.0, 174.0, 156.0, 156.0, 91.0, 52.0, 26.0, 15.0, 13.0, 12.0, 10.0, 1.0, 4.0, 4.0, 9.0, 2.0, 1.0, 0.0, 3.0, 0.0, 1.0, 0.0, 2.0, 1.0], "bins": [-0.299072265625, -0.29198455810546875, -0.2848968505859375, -0.27780914306640625, -0.270721435546875, -0.26363372802734375, -0.2565460205078125, -0.24945831298828125, -0.24237060546875, -0.23528289794921875, -0.2281951904296875, -0.22110748291015625, -0.214019775390625, -0.20693206787109375, -0.1998443603515625, -0.19275665283203125, -0.1856689453125, -0.17858123779296875, -0.1714935302734375, -0.16440582275390625, -0.157318115234375, -0.15023040771484375, -0.1431427001953125, -0.13605499267578125, -0.12896728515625, -0.12187957763671875, -0.1147918701171875, -0.10770416259765625, -0.100616455078125, -0.09352874755859375, -0.0864410400390625, -0.07935333251953125, -0.072265625, -0.06517791748046875, -0.0580902099609375, -0.05100250244140625, -0.043914794921875, -0.03682708740234375, -0.0297393798828125, -0.02265167236328125, -0.01556396484375, -0.00847625732421875, -0.0013885498046875, 0.00569915771484375, 0.012786865234375, 0.01987457275390625, 0.0269622802734375, 0.03404998779296875, 0.0411376953125, 0.04822540283203125, 0.0553131103515625, 0.06240081787109375, 0.069488525390625, 0.07657623291015625, 0.0836639404296875, 0.09075164794921875, 0.09783935546875, 0.10492706298828125, 0.1120147705078125, 0.11910247802734375, 0.126190185546875, 0.13327789306640625, 0.1403656005859375, 0.14745330810546875, 0.154541015625]}, "gradients/encoder.encoder.layers.19.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 3.0, 4.0, 3.0, 5.0, 2.0, 3.0, 3.0, 10.0, 10.0, 9.0, 21.0, 24.0, 38.0, 110.0, 218.0, 711.0, 3553.0, 41210.0, 4129954.0, 15684.0, 1975.0, 421.0, 137.0, 56.0, 33.0, 22.0, 17.0, 17.0, 8.0, 8.0, 5.0, 3.0, 5.0, 3.0, 1.0, 3.0, 0.0, 1.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.9873046875, -1.9171600341796875, -1.847015380859375, -1.7768707275390625, -1.70672607421875, -1.6365814208984375, -1.566436767578125, -1.4962921142578125, -1.4261474609375, -1.3560028076171875, -1.285858154296875, -1.2157135009765625, -1.14556884765625, -1.0754241943359375, -1.005279541015625, -0.9351348876953125, -0.864990234375, -0.7948455810546875, -0.724700927734375, -0.6545562744140625, -0.58441162109375, -0.5142669677734375, -0.444122314453125, -0.3739776611328125, -0.3038330078125, -0.2336883544921875, -0.163543701171875, -0.0933990478515625, -0.02325439453125, 0.0468902587890625, 0.117034912109375, 0.1871795654296875, 0.25732421875, 0.3274688720703125, 0.397613525390625, 0.4677581787109375, 0.53790283203125, 0.6080474853515625, 0.678192138671875, 0.7483367919921875, 0.8184814453125, 0.8886260986328125, 0.958770751953125, 1.0289154052734375, 1.09906005859375, 1.1692047119140625, 1.239349365234375, 1.3094940185546875, 1.379638671875, 1.4497833251953125, 1.519927978515625, 1.5900726318359375, 1.66021728515625, 1.7303619384765625, 1.800506591796875, 1.8706512451171875, 1.9407958984375, 2.0109405517578125, 2.081085205078125, 2.1512298583984375, 2.22137451171875, 2.2915191650390625, 2.361663818359375, 2.4318084716796875, 2.501953125]}, "gradients/encoder.encoder.layers.19.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 1.0, 0.0, 3.0, 1.0, 3.0, 4.0, 2.0, 4.0, 7.0, 5.0, 14.0, 8.0, 9.0, 8.0, 10.0, 18.0, 29.0, 39.0, 100.0, 3462.0, 178.0, 41.0, 21.0, 25.0, 24.0, 4.0, 13.0, 9.0, 6.0, 5.0, 8.0, 3.0, 3.0, 1.0, 5.0, 3.0, 3.0, 3.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0], "bins": [-0.43212890625, -0.420013427734375, -0.40789794921875, -0.395782470703125, -0.3836669921875, -0.371551513671875, -0.35943603515625, -0.347320556640625, -0.335205078125, -0.323089599609375, -0.31097412109375, -0.298858642578125, -0.2867431640625, -0.274627685546875, -0.26251220703125, -0.250396728515625, -0.23828125, -0.226165771484375, -0.21405029296875, -0.201934814453125, -0.1898193359375, -0.177703857421875, -0.16558837890625, -0.153472900390625, -0.141357421875, -0.129241943359375, -0.11712646484375, -0.105010986328125, -0.0928955078125, -0.080780029296875, -0.06866455078125, -0.056549072265625, -0.04443359375, -0.032318115234375, -0.02020263671875, -0.008087158203125, 0.0040283203125, 0.016143798828125, 0.02825927734375, 0.040374755859375, 0.052490234375, 0.064605712890625, 0.07672119140625, 0.088836669921875, 0.1009521484375, 0.113067626953125, 0.12518310546875, 0.137298583984375, 0.1494140625, 0.161529541015625, 0.17364501953125, 0.185760498046875, 0.1978759765625, 0.209991455078125, 0.22210693359375, 0.234222412109375, 0.246337890625, 0.258453369140625, 0.27056884765625, 0.282684326171875, 0.2947998046875, 0.306915283203125, 0.31903076171875, 0.331146240234375, 0.34326171875]}, "gradients/encoder.encoder.layers.19.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 10.0, 158.0, 831.0, 22.0], "bins": [-10.96190071105957, -10.785860061645508, -10.609820365905762, -10.4337797164917, -10.257740020751953, -10.08169937133789, -9.905659675598145, -9.729619026184082, -9.553579330444336, -9.377538681030273, -9.201498985290527, -9.025458335876465, -8.849418640136719, -8.673377990722656, -8.49733829498291, -8.321297645568848, -8.145257949829102, -7.969217777252197, -7.793177604675293, -7.617137432098389, -7.441097259521484, -7.26505708694458, -7.089016914367676, -6.9129767417907715, -6.736936092376709, -6.560895919799805, -6.3848557472229, -6.208815574645996, -6.032775402069092, -5.8567352294921875, -5.680695056915283, -5.504654884338379, -5.328614711761475, -5.15257453918457, -4.976534366607666, -4.800494194030762, -4.624454021453857, -4.448413848876953, -4.272373676300049, -4.0963335037231445, -3.920293092727661, -3.744252920150757, -3.5682127475738525, -3.3921725749969482, -3.216132402420044, -3.0400922298431396, -2.8640518188476562, -2.688011646270752, -2.5119714736938477, -2.3359313011169434, -2.159891128540039, -1.9838509559631348, -1.8078107833862305, -1.6317706108093262, -1.4557303190231323, -1.279690146446228, -1.1036500930786133, -0.927609920501709, -0.7515697479248047, -0.5755295157432556, -0.3994893431663513, -0.22344917058944702, -0.04740893840789795, 0.12863123416900635, 0.30467140674591064]}, "gradients/encoder.encoder.layers.19.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 3.0, 2.0, 10.0, 4.0, 6.0, 4.0, 7.0, 11.0, 13.0, 7.0, 29.0, 25.0, 15.0, 42.0, 43.0, 34.0, 49.0, 46.0, 32.0, 38.0, 57.0, 55.0, 34.0, 45.0, 48.0, 49.0, 38.0, 32.0, 28.0, 37.0, 28.0, 22.0, 15.0, 17.0, 20.0, 15.0, 16.0, 14.0, 6.0, 4.0, 1.0, 6.0, 3.0, 1.0, 0.0, 0.0, 1.0, 3.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.8881197571754456, -0.8572815656661987, -0.8264433741569519, -0.7956051826477051, -0.7647669911384583, -0.7339287996292114, -0.7030906081199646, -0.6722524166107178, -0.641414225101471, -0.6105760335922241, -0.5797378420829773, -0.5488996505737305, -0.5180614590644836, -0.4872232675552368, -0.45638507604599, -0.42554688453674316, -0.39470869302749634, -0.3638705015182495, -0.3330323100090027, -0.30219411849975586, -0.27135592699050903, -0.2405177354812622, -0.20967954397201538, -0.17884135246276855, -0.14800316095352173, -0.1171649694442749, -0.08632677793502808, -0.05548858642578125, -0.024650394916534424, 0.006187796592712402, 0.03702598810195923, 0.06786417961120605, 0.09870243072509766, 0.12954062223434448, 0.1603788137435913, 0.19121700525283813, 0.22205519676208496, 0.2528933882713318, 0.2837315797805786, 0.31456977128982544, 0.34540796279907227, 0.3762461543083191, 0.4070843458175659, 0.43792253732681274, 0.46876072883605957, 0.4995989203453064, 0.5304371118545532, 0.5612753033638, 0.5921134948730469, 0.6229516863822937, 0.6537898778915405, 0.6846280694007874, 0.7154662609100342, 0.746304452419281, 0.7771426439285278, 0.8079808354377747, 0.8388190269470215, 0.8696572184562683, 0.9004954099655151, 0.931333601474762, 0.9621717929840088, 0.9930099844932556, 1.0238481760025024, 1.0546863079071045, 1.085524559020996]}, "gradients/encoder.encoder.layers.19.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 1.0, 2.0, 1.0, 1.0, 0.0, 5.0, 6.0, 10.0, 16.0, 20.0, 24.0, 30.0, 50.0, 67.0, 129.0, 237.0, 339.0, 574.0, 1164.0, 2144.0, 4613.0, 11315.0, 47341.0, 790397.0, 155722.0, 20530.0, 7075.0, 3112.0, 1639.0, 797.0, 487.0, 261.0, 152.0, 99.0, 62.0, 46.0, 25.0, 24.0, 22.0, 10.0, 6.0, 3.0, 4.0, 2.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0], "bins": [-0.95947265625, -0.93017578125, -0.90087890625, -0.87158203125, -0.84228515625, -0.81298828125, -0.78369140625, -0.75439453125, -0.72509765625, -0.69580078125, -0.66650390625, -0.63720703125, -0.60791015625, -0.57861328125, -0.54931640625, -0.52001953125, -0.49072265625, -0.46142578125, -0.43212890625, -0.40283203125, -0.37353515625, -0.34423828125, -0.31494140625, -0.28564453125, -0.25634765625, -0.22705078125, -0.19775390625, -0.16845703125, -0.13916015625, -0.10986328125, -0.08056640625, -0.05126953125, -0.02197265625, 0.00732421875, 0.03662109375, 0.06591796875, 0.09521484375, 0.12451171875, 0.15380859375, 0.18310546875, 0.21240234375, 0.24169921875, 0.27099609375, 0.30029296875, 0.32958984375, 0.35888671875, 0.38818359375, 0.41748046875, 0.44677734375, 0.47607421875, 0.50537109375, 0.53466796875, 0.56396484375, 0.59326171875, 0.62255859375, 0.65185546875, 0.68115234375, 0.71044921875, 0.73974609375, 0.76904296875, 0.79833984375, 0.82763671875, 0.85693359375, 0.88623046875, 0.91552734375]}, "gradients/encoder.encoder.layers.19.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 3.0, 2.0, 1.0, 0.0, 1.0, 2.0, 6.0, 2.0, 6.0, 4.0, 1.0, 6.0, 9.0, 5.0, 6.0, 10.0, 21.0, 45.0, 81.0, 109.0, 173.0, 168.0, 129.0, 84.0, 46.0, 29.0, 15.0, 12.0, 11.0, 7.0, 1.0, 2.0, 9.0, 6.0, 1.0, 2.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0], "bins": [-0.284423828125, -0.2774829864501953, -0.2705421447753906, -0.26360130310058594, -0.25666046142578125, -0.24971961975097656, -0.24277877807617188, -0.2358379364013672, -0.2288970947265625, -0.2219562530517578, -0.21501541137695312, -0.20807456970214844, -0.20113372802734375, -0.19419288635253906, -0.18725204467773438, -0.1803112030029297, -0.173370361328125, -0.1664295196533203, -0.15948867797851562, -0.15254783630371094, -0.14560699462890625, -0.13866615295410156, -0.13172531127929688, -0.12478446960449219, -0.1178436279296875, -0.11090278625488281, -0.10396194458007812, -0.09702110290527344, -0.09008026123046875, -0.08313941955566406, -0.07619857788085938, -0.06925773620605469, -0.06231689453125, -0.05537605285644531, -0.048435211181640625, -0.04149436950683594, -0.03455352783203125, -0.027612686157226562, -0.020671844482421875, -0.013731002807617188, -0.0067901611328125, 0.0001506805419921875, 0.007091522216796875, 0.014032363891601562, 0.02097320556640625, 0.027914047241210938, 0.034854888916015625, 0.04179573059082031, 0.048736572265625, 0.05567741394042969, 0.06261825561523438, 0.06955909729003906, 0.07649993896484375, 0.08344078063964844, 0.09038162231445312, 0.09732246398925781, 0.1042633056640625, 0.11120414733886719, 0.11814498901367188, 0.12508583068847656, 0.13202667236328125, 0.13896751403808594, 0.14590835571289062, 0.1528491973876953, 0.1597900390625]}, "gradients/encoder.encoder.layers.19.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 2.0, 1.0, 0.0, 1.0, 1.0, 2.0, 0.0, 2.0, 4.0, 0.0, 3.0, 8.0, 6.0, 12.0, 14.0, 13.0, 12.0, 11.0, 27.0, 34.0, 37.0, 48.0, 51.0, 96.0, 148.0, 242.0, 579.0, 1499.0, 6825.0, 296775.0, 730068.0, 8912.0, 1655.0, 635.0, 275.0, 175.0, 83.0, 67.0, 48.0, 45.0, 27.0, 24.0, 23.0, 12.0, 11.0, 15.0, 8.0, 7.0, 8.0, 2.0, 6.0, 4.0, 5.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0], "bins": [-1.4404296875, -1.3940582275390625, -1.347686767578125, -1.3013153076171875, -1.25494384765625, -1.2085723876953125, -1.162200927734375, -1.1158294677734375, -1.0694580078125, -1.0230865478515625, -0.976715087890625, -0.9303436279296875, -0.88397216796875, -0.8376007080078125, -0.791229248046875, -0.7448577880859375, -0.698486328125, -0.6521148681640625, -0.605743408203125, -0.5593719482421875, -0.51300048828125, -0.4666290283203125, -0.420257568359375, -0.3738861083984375, -0.3275146484375, -0.2811431884765625, -0.234771728515625, -0.1884002685546875, -0.14202880859375, -0.0956573486328125, -0.049285888671875, -0.0029144287109375, 0.04345703125, 0.0898284912109375, 0.136199951171875, 0.1825714111328125, 0.22894287109375, 0.2753143310546875, 0.321685791015625, 0.3680572509765625, 0.4144287109375, 0.4608001708984375, 0.507171630859375, 0.5535430908203125, 0.59991455078125, 0.6462860107421875, 0.692657470703125, 0.7390289306640625, 0.785400390625, 0.8317718505859375, 0.878143310546875, 0.9245147705078125, 0.97088623046875, 1.0172576904296875, 1.063629150390625, 1.1100006103515625, 1.1563720703125, 1.2027435302734375, 1.249114990234375, 1.2954864501953125, 1.34185791015625, 1.3882293701171875, 1.434600830078125, 1.4809722900390625, 1.52734375]}, "gradients/encoder.encoder.layers.19.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 5.0, 5.0, 5.0, 2.0, 8.0, 7.0, 8.0, 12.0, 10.0, 10.0, 21.0, 19.0, 24.0, 28.0, 22.0, 35.0, 33.0, 50.0, 49.0, 49.0, 37.0, 44.0, 50.0, 57.0, 39.0, 50.0, 42.0, 25.0, 38.0, 26.0, 22.0, 30.0, 24.0, 23.0, 20.0, 9.0, 12.0, 11.0, 14.0, 11.0, 6.0, 8.0, 3.0, 0.0, 4.0, 2.0, 0.0, 2.0, 1.0, 1.0, 0.0, 1.0, 2.0, 1.0], "bins": [-0.544921875, -0.5283737182617188, -0.5118255615234375, -0.49527740478515625, -0.478729248046875, -0.46218109130859375, -0.4456329345703125, -0.42908477783203125, -0.41253662109375, -0.39598846435546875, -0.3794403076171875, -0.36289215087890625, -0.346343994140625, -0.32979583740234375, -0.3132476806640625, -0.29669952392578125, -0.2801513671875, -0.26360321044921875, -0.2470550537109375, -0.23050689697265625, -0.213958740234375, -0.19741058349609375, -0.1808624267578125, -0.16431427001953125, -0.14776611328125, -0.13121795654296875, -0.1146697998046875, -0.09812164306640625, -0.081573486328125, -0.06502532958984375, -0.0484771728515625, -0.03192901611328125, -0.015380859375, 0.00116729736328125, 0.0177154541015625, 0.03426361083984375, 0.050811767578125, 0.06735992431640625, 0.0839080810546875, 0.10045623779296875, 0.11700439453125, 0.13355255126953125, 0.1501007080078125, 0.16664886474609375, 0.183197021484375, 0.19974517822265625, 0.2162933349609375, 0.23284149169921875, 0.2493896484375, 0.26593780517578125, 0.2824859619140625, 0.29903411865234375, 0.315582275390625, 0.33213043212890625, 0.3486785888671875, 0.36522674560546875, 0.38177490234375, 0.39832305908203125, 0.4148712158203125, 0.43141937255859375, 0.447967529296875, 0.46451568603515625, 0.4810638427734375, 0.49761199951171875, 0.51416015625]}, "gradients/encoder.encoder.layers.19.attention.k_proj.weight": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 3.0, 0.0, 0.0, 3.0, 0.0, 0.0, 3.0, 1.0, 2.0, 5.0, 4.0, 8.0, 12.0, 18.0, 18.0, 36.0, 52.0, 87.0, 180.0, 382.0, 1070.0, 10760.0, 1027933.0, 6371.0, 943.0, 326.0, 146.0, 72.0, 48.0, 19.0, 23.0, 8.0, 7.0, 6.0, 7.0, 2.0, 6.0, 4.0, 2.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.019134521484375, -0.01838207244873047, -0.017629623413085938, -0.016877174377441406, -0.016124725341796875, -0.015372276306152344, -0.014619827270507812, -0.013867378234863281, -0.01311492919921875, -0.012362480163574219, -0.011610031127929688, -0.010857582092285156, -0.010105133056640625, -0.009352684020996094, -0.008600234985351562, -0.007847785949707031, -0.0070953369140625, -0.006342887878417969, -0.0055904388427734375, -0.004837989807128906, -0.004085540771484375, -0.0033330917358398438, -0.0025806427001953125, -0.0018281936645507812, -0.00107574462890625, -0.00032329559326171875, 0.0004291534423828125, 0.0011816024780273438, 0.001934051513671875, 0.0026865005493164062, 0.0034389495849609375, 0.004191398620605469, 0.00494384765625, 0.005696296691894531, 0.0064487457275390625, 0.007201194763183594, 0.007953643798828125, 0.008706092834472656, 0.009458541870117188, 0.010210990905761719, 0.01096343994140625, 0.011715888977050781, 0.012468338012695312, 0.013220787048339844, 0.013973236083984375, 0.014725685119628906, 0.015478134155273438, 0.01623058319091797, 0.0169830322265625, 0.01773548126220703, 0.018487930297851562, 0.019240379333496094, 0.019992828369140625, 0.020745277404785156, 0.021497726440429688, 0.02225017547607422, 0.02300262451171875, 0.02375507354736328, 0.024507522583007812, 0.025259971618652344, 0.026012420654296875, 0.026764869689941406, 0.027517318725585938, 0.02826976776123047, 0.029022216796875]}, "gradients/encoder.encoder.layers.19.attention.k_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 4.0, 2.0, 0.0, 6.0, 2.0, 4.0, 4.0, 5.0, 7.0, 8.0, 7.0, 10.0, 27.0, 17.0, 14.0, 24.0, 22.0, 28.0, 54.0, 26.0, 30.0, 74.0, 39.0, 39.0, 70.0, 48.0, 45.0, 77.0, 38.0, 28.0, 50.0, 19.0, 27.0, 45.0, 11.0, 17.0, 28.0, 6.0, 10.0, 14.0, 4.0, 3.0, 7.0, 1.0, 2.0, 9.0, 2.0, 0.0, 3.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-2.682209014892578e-06, -2.6030465960502625e-06, -2.5238841772079468e-06, -2.444721758365631e-06, -2.3655593395233154e-06, -2.2863969206809998e-06, -2.207234501838684e-06, -2.1280720829963684e-06, -2.0489096641540527e-06, -1.969747245311737e-06, -1.8905848264694214e-06, -1.8114224076271057e-06, -1.73225998878479e-06, -1.6530975699424744e-06, -1.5739351511001587e-06, -1.494772732257843e-06, -1.4156103134155273e-06, -1.3364478945732117e-06, -1.257285475730896e-06, -1.1781230568885803e-06, -1.0989606380462646e-06, -1.019798219203949e-06, -9.406358003616333e-07, -8.614733815193176e-07, -7.82310962677002e-07, -7.031485438346863e-07, -6.239861249923706e-07, -5.448237061500549e-07, -4.6566128730773926e-07, -3.864988684654236e-07, -3.073364496231079e-07, -2.2817403078079224e-07, -1.4901161193847656e-07, -6.984919309616089e-08, 9.313225746154785e-09, 8.847564458847046e-08, 1.6763806343078613e-07, 2.468004822731018e-07, 3.259629011154175e-07, 4.0512531995773315e-07, 4.842877388000488e-07, 5.634501576423645e-07, 6.426125764846802e-07, 7.217749953269958e-07, 8.009374141693115e-07, 8.800998330116272e-07, 9.592622518539429e-07, 1.0384246706962585e-06, 1.1175870895385742e-06, 1.1967495083808899e-06, 1.2759119272232056e-06, 1.3550743460655212e-06, 1.434236764907837e-06, 1.5133991837501526e-06, 1.5925616025924683e-06, 1.671724021434784e-06, 1.7508864402770996e-06, 1.8300488591194153e-06, 1.909211277961731e-06, 1.9883736968040466e-06, 2.0675361156463623e-06, 2.146698534488678e-06, 2.2258609533309937e-06, 2.3050233721733093e-06, 2.384185791015625e-06]}, "gradients/encoder.encoder.layers.19.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 2.0, 0.0, 0.0, 1.0, 2.0, 2.0, 0.0, 3.0, 1.0, 1.0, 5.0, 2.0, 10.0, 11.0, 8.0, 6.0, 19.0, 20.0, 32.0, 50.0, 97.0, 188.0, 507.0, 1961.0, 49944.0, 988144.0, 5965.0, 912.0, 271.0, 151.0, 84.0, 52.0, 30.0, 22.0, 15.0, 14.0, 13.0, 7.0, 5.0, 4.0, 4.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.019927978515625, -0.019362449645996094, -0.018796920776367188, -0.01823139190673828, -0.017665863037109375, -0.01710033416748047, -0.016534805297851562, -0.015969276428222656, -0.01540374755859375, -0.014838218688964844, -0.014272689819335938, -0.013707160949707031, -0.013141632080078125, -0.012576103210449219, -0.012010574340820312, -0.011445045471191406, -0.0108795166015625, -0.010313987731933594, -0.009748458862304688, -0.009182929992675781, -0.008617401123046875, -0.008051872253417969, -0.0074863433837890625, -0.006920814514160156, -0.00635528564453125, -0.005789756774902344, -0.0052242279052734375, -0.004658699035644531, -0.004093170166015625, -0.0035276412963867188, -0.0029621124267578125, -0.0023965835571289062, -0.0018310546875, -0.0012655258178710938, -0.0006999969482421875, -0.00013446807861328125, 0.000431060791015625, 0.0009965896606445312, 0.0015621185302734375, 0.0021276473999023438, 0.00269317626953125, 0.0032587051391601562, 0.0038242340087890625, 0.004389762878417969, 0.004955291748046875, 0.005520820617675781, 0.0060863494873046875, 0.006651878356933594, 0.0072174072265625, 0.007782936096191406, 0.008348464965820312, 0.008913993835449219, 0.009479522705078125, 0.010045051574707031, 0.010610580444335938, 0.011176109313964844, 0.01174163818359375, 0.012307167053222656, 0.012872695922851562, 0.013438224792480469, 0.014003753662109375, 0.014569282531738281, 0.015134811401367188, 0.015700340270996094, 0.016265869140625]}, "gradients/encoder.encoder.layers.19.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 4.0, 3.0, 2.0, 6.0, 11.0, 9.0, 12.0, 13.0, 17.0, 18.0, 42.0, 43.0, 70.0, 100.0, 79.0, 126.0, 112.0, 85.0, 64.0, 47.0, 33.0, 20.0, 21.0, 12.0, 15.0, 5.0, 8.0, 10.0, 9.0, 2.0, 4.0, 1.0, 1.0, 3.0, 0.0, 2.0, 2.0, 1.0, 0.0, 0.0, 2.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.005809783935546875, -0.0056078433990478516, -0.005405902862548828, -0.005203962326049805, -0.005002021789550781, -0.004800081253051758, -0.004598140716552734, -0.004396200180053711, -0.0041942596435546875, -0.003992319107055664, -0.0037903785705566406, -0.003588438034057617, -0.0033864974975585938, -0.0031845569610595703, -0.002982616424560547, -0.0027806758880615234, -0.0025787353515625, -0.0023767948150634766, -0.002174854278564453, -0.0019729137420654297, -0.0017709732055664062, -0.0015690326690673828, -0.0013670921325683594, -0.001165151596069336, -0.0009632110595703125, -0.0007612705230712891, -0.0005593299865722656, -0.0003573894500732422, -0.00015544891357421875, 4.649162292480469e-05, 0.0002484321594238281, 0.00045037269592285156, 0.000652313232421875, 0.0008542537689208984, 0.0010561943054199219, 0.0012581348419189453, 0.0014600753784179688, 0.0016620159149169922, 0.0018639564514160156, 0.002065896987915039, 0.0022678375244140625, 0.002469778060913086, 0.0026717185974121094, 0.002873659133911133, 0.0030755996704101562, 0.0032775402069091797, 0.003479480743408203, 0.0036814212799072266, 0.00388336181640625, 0.0040853023529052734, 0.004287242889404297, 0.00448918342590332, 0.004691123962402344, 0.004893064498901367, 0.005095005035400391, 0.005296945571899414, 0.0054988861083984375, 0.005700826644897461, 0.005902767181396484, 0.006104707717895508, 0.006306648254394531, 0.006508588790893555, 0.006710529327392578, 0.0069124698638916016, 0.007114410400390625]}, "gradients/encoder.encoder.layers.19.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 11.0, 62.0, 831.0, 105.0, 9.0, 1.0, 0.0, 1.0, 1.0], "bins": [-21.55149269104004, -21.177263259887695, -20.80303382873535, -20.42880630493164, -20.054576873779297, -19.680347442626953, -19.30611801147461, -18.931888580322266, -18.557661056518555, -18.18343162536621, -17.809202194213867, -17.434974670410156, -17.060745239257812, -16.68651580810547, -16.312286376953125, -15.938057899475098, -15.563827514648438, -15.189598083496094, -14.815369606018066, -14.441140174865723, -14.066911697387695, -13.692682266235352, -13.318452835083008, -12.94422435760498, -12.569995880126953, -12.19576644897461, -11.821537971496582, -11.447308540344238, -11.073080062866211, -10.698850631713867, -10.324621200561523, -9.950392723083496, -9.576164245605469, -9.201934814453125, -8.827706336975098, -8.453476905822754, -8.079248428344727, -7.705018997192383, -7.330790042877197, -6.956561088562012, -6.582331657409668, -6.208102703094482, -5.833873748779297, -5.459644317626953, -5.085415363311768, -4.711186408996582, -4.3369574546813965, -3.962728261947632, -3.588499069213867, -3.2142701148986816, -2.840040922164917, -2.4658119678497314, -2.091582775115967, -1.7173538208007812, -1.3431248664855957, -0.968895673751831, -0.5946667194366455, -0.2204376757144928, 0.1537913680076599, 0.5280203819274902, 0.9022494554519653, 1.2764785289764404, 1.650707483291626, 2.0249366760253906, 2.399165630340576]}, "gradients/encoder.encoder.layers.19.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 4.0, 0.0, 7.0, 8.0, 2.0, 2.0, 8.0, 6.0, 11.0, 15.0, 17.0, 13.0, 23.0, 29.0, 21.0, 46.0, 39.0, 41.0, 48.0, 61.0, 66.0, 57.0, 57.0, 41.0, 51.0, 54.0, 43.0, 37.0, 38.0, 29.0, 24.0, 23.0, 14.0, 15.0, 9.0, 14.0, 10.0, 14.0, 6.0, 2.0, 5.0, 1.0, 4.0, 1.0, 1.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.0636181831359863, -2.9542248249053955, -2.8448314666748047, -2.735438108444214, -2.626044750213623, -2.5166513919830322, -2.4072580337524414, -2.2978649139404297, -2.1884713172912598, -2.079077959060669, -1.9696846008300781, -1.8602912425994873, -1.7508978843688965, -1.6415045261383057, -1.5321112871170044, -1.4227179288864136, -1.3133246898651123, -1.2039313316345215, -1.0945379734039307, -0.9851446747779846, -0.8757513165473938, -0.766357958316803, -0.6569646596908569, -0.5475713014602661, -0.4381779432296753, -0.3287845849990845, -0.21939125657081604, -0.10999792814254761, -0.0006045699119567871, 0.10878878831863403, 0.21818208694458008, 0.3275754451751709, 0.4369685649871826, 0.5463619232177734, 0.6557552814483643, 0.7651485800743103, 0.8745419383049011, 0.9839352965354919, 1.093328595161438, 1.2027219533920288, 1.3121153116226196, 1.4215086698532104, 1.5309020280838013, 1.6402952671051025, 1.7496886253356934, 1.8590819835662842, 1.968475341796875, 2.077868700027466, 2.1872620582580566, 2.2966554164886475, 2.4060487747192383, 2.515442132949829, 2.62483549118042, 2.7342288494110107, 2.8436222076416016, 2.9530153274536133, 3.062408924102783, 3.171802282333374, 3.281195640563965, 3.3905889987945557, 3.4999823570251465, 3.6093757152557373, 3.718769073486328, 3.82816219329834, 3.9375555515289307]}, "gradients/encoder.encoder.layers.18.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 2.0, 1.0, 1.0, 2.0, 0.0, 0.0, 3.0, 5.0, 1.0, 4.0, 8.0, 4.0, 4.0, 9.0, 14.0, 21.0, 31.0, 68.0, 133.0, 286.0, 1339.0, 4189868.0, 1856.0, 329.0, 146.0, 63.0, 33.0, 16.0, 11.0, 11.0, 5.0, 5.0, 4.0, 7.0, 5.0, 1.0, 0.0, 0.0, 2.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-4.93359375, -4.81488037109375, -4.6961669921875, -4.57745361328125, -4.458740234375, -4.34002685546875, -4.2213134765625, -4.10260009765625, -3.98388671875, -3.86517333984375, -3.7464599609375, -3.62774658203125, -3.509033203125, -3.39031982421875, -3.2716064453125, -3.15289306640625, -3.0341796875, -2.91546630859375, -2.7967529296875, -2.67803955078125, -2.559326171875, -2.44061279296875, -2.3218994140625, -2.20318603515625, -2.08447265625, -1.96575927734375, -1.8470458984375, -1.72833251953125, -1.609619140625, -1.49090576171875, -1.3721923828125, -1.25347900390625, -1.134765625, -1.01605224609375, -0.8973388671875, -0.77862548828125, -0.659912109375, -0.54119873046875, -0.4224853515625, -0.30377197265625, -0.18505859375, -0.06634521484375, 0.0523681640625, 0.17108154296875, 0.289794921875, 0.40850830078125, 0.5272216796875, 0.64593505859375, 0.7646484375, 0.88336181640625, 1.0020751953125, 1.12078857421875, 1.239501953125, 1.35821533203125, 1.4769287109375, 1.59564208984375, 1.71435546875, 1.83306884765625, 1.9517822265625, 2.07049560546875, 2.189208984375, 2.30792236328125, 2.4266357421875, 2.54534912109375, 2.6640625]}, "gradients/encoder.encoder.layers.18.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 2.0, 1.0, 1.0, 2.0, 0.0, 0.0, 2.0, 5.0, 1.0, 5.0, 5.0, 4.0, 4.0, 5.0, 10.0, 14.0, 8.0, 35.0, 62.0, 110.0, 119.0, 160.0, 165.0, 117.0, 66.0, 39.0, 16.0, 12.0, 8.0, 11.0, 5.0, 5.0, 4.0, 7.0, 5.0, 1.0, 0.0, 0.0, 2.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.281005859375, -0.27424049377441406, -0.2674751281738281, -0.2607097625732422, -0.25394439697265625, -0.2471790313720703, -0.24041366577148438, -0.23364830017089844, -0.2268829345703125, -0.22011756896972656, -0.21335220336914062, -0.2065868377685547, -0.19982147216796875, -0.1930561065673828, -0.18629074096679688, -0.17952537536621094, -0.172760009765625, -0.16599464416503906, -0.15922927856445312, -0.1524639129638672, -0.14569854736328125, -0.1389331817626953, -0.13216781616210938, -0.12540245056152344, -0.1186370849609375, -0.11187171936035156, -0.10510635375976562, -0.09834098815917969, -0.09157562255859375, -0.08481025695800781, -0.07804489135742188, -0.07127952575683594, -0.06451416015625, -0.05774879455566406, -0.050983428955078125, -0.04421806335449219, -0.03745269775390625, -0.030687332153320312, -0.023921966552734375, -0.017156600952148438, -0.0103912353515625, -0.0036258697509765625, 0.003139495849609375, 0.009904861450195312, 0.01667022705078125, 0.023435592651367188, 0.030200958251953125, 0.03696632385253906, 0.043731689453125, 0.05049705505371094, 0.057262420654296875, 0.06402778625488281, 0.07079315185546875, 0.07755851745605469, 0.08432388305664062, 0.09108924865722656, 0.0978546142578125, 0.10461997985839844, 0.11138534545898438, 0.11815071105957031, 0.12491607666015625, 0.1316814422607422, 0.13844680786132812, 0.14521217346191406, 0.1519775390625]}, "gradients/encoder.encoder.layers.18.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 2.0, 2.0, 2.0, 0.0, 2.0, 3.0, 2.0, 3.0, 4.0, 6.0, 10.0, 17.0, 34.0, 70.0, 154.0, 358.0, 986.0, 2917.0, 15653.0, 4163593.0, 8346.0, 1473.0, 388.0, 129.0, 58.0, 32.0, 18.0, 9.0, 7.0, 4.0, 1.0, 2.0, 2.0, 0.0, 1.0, 3.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.70458984375, -0.6744461059570312, -0.6443023681640625, -0.6141586303710938, -0.584014892578125, -0.5538711547851562, -0.5237274169921875, -0.49358367919921875, -0.46343994140625, -0.43329620361328125, -0.4031524658203125, -0.37300872802734375, -0.342864990234375, -0.31272125244140625, -0.2825775146484375, -0.25243377685546875, -0.2222900390625, -0.19214630126953125, -0.1620025634765625, -0.13185882568359375, -0.101715087890625, -0.07157135009765625, -0.0414276123046875, -0.01128387451171875, 0.01885986328125, 0.04900360107421875, 0.0791473388671875, 0.10929107666015625, 0.139434814453125, 0.16957855224609375, 0.1997222900390625, 0.22986602783203125, 0.260009765625, 0.29015350341796875, 0.3202972412109375, 0.35044097900390625, 0.380584716796875, 0.41072845458984375, 0.4408721923828125, 0.47101593017578125, 0.50115966796875, 0.5313034057617188, 0.5614471435546875, 0.5915908813476562, 0.621734619140625, 0.6518783569335938, 0.6820220947265625, 0.7121658325195312, 0.7423095703125, 0.7724533081054688, 0.8025970458984375, 0.8327407836914062, 0.862884521484375, 0.8930282592773438, 0.9231719970703125, 0.9533157348632812, 0.98345947265625, 1.0136032104492188, 1.0437469482421875, 1.0738906860351562, 1.104034423828125, 1.1341781616210938, 1.1643218994140625, 1.1944656372070312, 1.224609375]}, "gradients/encoder.encoder.layers.18.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 3.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 3.0, 0.0, 2.0, 6.0, 7.0, 11.0, 17.0, 21.0, 98.0, 3705.0, 116.0, 42.0, 17.0, 3.0, 6.0, 3.0, 3.0, 4.0, 0.0, 2.0, 1.0, 1.0, 2.0, 0.0, 2.0, 0.0, 1.0, 1.0, 2.0, 1.0, 1.0, 0.0, 1.0], "bins": [-0.262451171875, -0.25598907470703125, -0.2495269775390625, -0.24306488037109375, -0.236602783203125, -0.23014068603515625, -0.2236785888671875, -0.21721649169921875, -0.21075439453125, -0.20429229736328125, -0.1978302001953125, -0.19136810302734375, -0.184906005859375, -0.17844390869140625, -0.1719818115234375, -0.16551971435546875, -0.1590576171875, -0.15259552001953125, -0.1461334228515625, -0.13967132568359375, -0.133209228515625, -0.12674713134765625, -0.1202850341796875, -0.11382293701171875, -0.10736083984375, -0.10089874267578125, -0.0944366455078125, -0.08797454833984375, -0.081512451171875, -0.07505035400390625, -0.0685882568359375, -0.06212615966796875, -0.0556640625, -0.04920196533203125, -0.0427398681640625, -0.03627777099609375, -0.029815673828125, -0.02335357666015625, -0.0168914794921875, -0.01042938232421875, -0.00396728515625, 0.00249481201171875, 0.0089569091796875, 0.01541900634765625, 0.021881103515625, 0.02834320068359375, 0.0348052978515625, 0.04126739501953125, 0.0477294921875, 0.05419158935546875, 0.0606536865234375, 0.06711578369140625, 0.073577880859375, 0.08003997802734375, 0.0865020751953125, 0.09296417236328125, 0.09942626953125, 0.10588836669921875, 0.1123504638671875, 0.11881256103515625, 0.125274658203125, 0.13173675537109375, 0.1381988525390625, 0.14466094970703125, 0.151123046875]}, "gradients/encoder.encoder.layers.18.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 4.0, 26.0, 717.0, 262.0, 7.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.2672343254089355, -3.2087950706481934, -3.150355815887451, -3.091916561126709, -3.033477306365967, -2.9750380516052246, -2.9165987968444824, -2.8581597805023193, -2.799720525741577, -2.741281270980835, -2.6828420162200928, -2.6244027614593506, -2.5659635066986084, -2.5075244903564453, -2.449085235595703, -2.390645980834961, -2.3322067260742188, -2.2737674713134766, -2.2153282165527344, -2.156888961791992, -2.09844970703125, -2.040010452270508, -1.9815713167190552, -1.923132061958313, -1.8646926879882812, -1.806253433227539, -1.7478141784667969, -1.6893749237060547, -1.630935788154602, -1.5724965333938599, -1.5140572786331177, -1.4556180238723755, -1.3971787691116333, -1.3387395143508911, -1.280300259590149, -1.2218611240386963, -1.163421869277954, -1.104982614517212, -1.0465433597564697, -0.9881041049957275, -0.9296649098396301, -0.8712256550788879, -0.8127864599227905, -0.7543472051620483, -0.6959079504013062, -0.6374687552452087, -0.5790295004844666, -0.5205903053283691, -0.46215105056762695, -0.40371182560920715, -0.34527260065078735, -0.28683334589004517, -0.22839412093162537, -0.16995489597320557, -0.11151564121246338, -0.05307641625404358, 0.005362808704376221, 0.06380204111337662, 0.12224127352237701, 0.180680513381958, 0.2391197383403778, 0.2975589632987976, 0.3559982180595398, 0.4144374430179596, 0.4728766679763794]}, "gradients/encoder.encoder.layers.18.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 3.0, 2.0, 2.0, 5.0, 2.0, 2.0, 7.0, 11.0, 13.0, 11.0, 7.0, 20.0, 21.0, 20.0, 26.0, 29.0, 41.0, 55.0, 39.0, 48.0, 43.0, 48.0, 40.0, 44.0, 60.0, 37.0, 47.0, 47.0, 41.0, 43.0, 32.0, 30.0, 31.0, 20.0, 20.0, 10.0, 15.0, 8.0, 5.0, 5.0, 6.0, 7.0, 3.0, 1.0, 4.0, 2.0, 0.0, 2.0, 0.0, 1.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.34555405378341675, -0.3329595923423767, -0.32036513090133667, -0.30777066946029663, -0.2951762080192566, -0.28258174657821655, -0.2699873149394989, -0.25739285349845886, -0.24479839205741882, -0.23220393061637878, -0.21960946917533875, -0.2070150226354599, -0.19442056119441986, -0.18182609975337982, -0.16923165321350098, -0.15663719177246094, -0.1440427303314209, -0.13144826889038086, -0.11885381489992142, -0.10625936090946198, -0.09366489946842194, -0.0810704380273819, -0.06847598403692245, -0.05588153004646301, -0.043287068605422974, -0.030692610889673233, -0.018098153173923492, -0.005503695458173752, 0.007090762257575989, 0.019685223698616028, 0.03227967768907547, 0.04487413167953491, 0.05746859312057495, 0.07006305456161499, 0.08265750855207443, 0.09525196254253387, 0.10784642398357391, 0.12044088542461395, 0.1330353319644928, 0.14562979340553284, 0.15822425484657288, 0.17081871628761292, 0.18341317772865295, 0.1960076242685318, 0.20860208570957184, 0.22119654715061188, 0.23379099369049072, 0.24638545513153076, 0.2589799165725708, 0.27157437801361084, 0.2841688394546509, 0.2967633008956909, 0.30935776233673096, 0.321952223777771, 0.33454665541648865, 0.3471411168575287, 0.3597355782985687, 0.37233003973960876, 0.3849245011806488, 0.39751896262168884, 0.4101133942604065, 0.42270785570144653, 0.4353023171424866, 0.4478967785835266, 0.46049124002456665]}, "gradients/encoder.encoder.layers.18.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 0.0, 3.0, 2.0, 5.0, 3.0, 7.0, 2.0, 8.0, 8.0, 13.0, 8.0, 29.0, 30.0, 57.0, 97.0, 113.0, 189.0, 349.0, 559.0, 1009.0, 1838.0, 3883.0, 9081.0, 29165.0, 244986.0, 687687.0, 47010.0, 12306.0, 4976.0, 2334.0, 1202.0, 645.0, 355.0, 226.0, 111.0, 86.0, 62.0, 42.0, 28.0, 18.0, 8.0, 6.0, 8.0, 2.0, 4.0, 1.0, 2.0, 1.0, 0.0, 1.0, 5.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.397216796875, -0.3831062316894531, -0.36899566650390625, -0.3548851013183594, -0.3407745361328125, -0.3266639709472656, -0.31255340576171875, -0.2984428405761719, -0.284332275390625, -0.2702217102050781, -0.25611114501953125, -0.24200057983398438, -0.2278900146484375, -0.21377944946289062, -0.19966888427734375, -0.18555831909179688, -0.17144775390625, -0.15733718872070312, -0.14322662353515625, -0.12911605834960938, -0.1150054931640625, -0.10089492797851562, -0.08678436279296875, -0.07267379760742188, -0.058563232421875, -0.044452667236328125, -0.03034210205078125, -0.016231536865234375, -0.0021209716796875, 0.011989593505859375, 0.02610015869140625, 0.040210723876953125, 0.0543212890625, 0.06843185424804688, 0.08254241943359375, 0.09665298461914062, 0.1107635498046875, 0.12487411499023438, 0.13898468017578125, 0.15309524536132812, 0.167205810546875, 0.18131637573242188, 0.19542694091796875, 0.20953750610351562, 0.2236480712890625, 0.23775863647460938, 0.25186920166015625, 0.2659797668457031, 0.28009033203125, 0.2942008972167969, 0.30831146240234375, 0.3224220275878906, 0.3365325927734375, 0.3506431579589844, 0.36475372314453125, 0.3788642883300781, 0.392974853515625, 0.4070854187011719, 0.42119598388671875, 0.4353065490722656, 0.4494171142578125, 0.4635276794433594, 0.47763824462890625, 0.4917488098144531, 0.505859375]}, "gradients/encoder.encoder.layers.18.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 2.0, 2.0, 0.0, 0.0, 1.0, 1.0, 6.0, 1.0, 2.0, 7.0, 5.0, 3.0, 5.0, 12.0, 12.0, 7.0, 31.0, 69.0, 97.0, 119.0, 161.0, 171.0, 114.0, 71.0, 38.0, 23.0, 8.0, 7.0, 12.0, 6.0, 7.0, 4.0, 6.0, 4.0, 0.0, 1.0, 0.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.28173828125, -0.2749671936035156, -0.26819610595703125, -0.2614250183105469, -0.2546539306640625, -0.24788284301757812, -0.24111175537109375, -0.23434066772460938, -0.227569580078125, -0.22079849243164062, -0.21402740478515625, -0.20725631713867188, -0.2004852294921875, -0.19371414184570312, -0.18694305419921875, -0.18017196655273438, -0.17340087890625, -0.16662979125976562, -0.15985870361328125, -0.15308761596679688, -0.1463165283203125, -0.13954544067382812, -0.13277435302734375, -0.12600326538085938, -0.119232177734375, -0.11246109008789062, -0.10569000244140625, -0.09891891479492188, -0.0921478271484375, -0.08537673950195312, -0.07860565185546875, -0.07183456420898438, -0.0650634765625, -0.058292388916015625, -0.05152130126953125, -0.044750213623046875, -0.0379791259765625, -0.031208038330078125, -0.02443695068359375, -0.017665863037109375, -0.010894775390625, -0.004123687744140625, 0.00264739990234375, 0.009418487548828125, 0.0161895751953125, 0.022960662841796875, 0.02973175048828125, 0.036502838134765625, 0.04327392578125, 0.050045013427734375, 0.05681610107421875, 0.06358718872070312, 0.0703582763671875, 0.07712936401367188, 0.08390045166015625, 0.09067153930664062, 0.097442626953125, 0.10421371459960938, 0.11098480224609375, 0.11775588989257812, 0.1245269775390625, 0.13129806518554688, 0.13806915283203125, 0.14484024047851562, 0.151611328125]}, "gradients/encoder.encoder.layers.18.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 1.0, 3.0, 3.0, 1.0, 3.0, 1.0, 4.0, 0.0, 4.0, 9.0, 9.0, 15.0, 23.0, 31.0, 29.0, 30.0, 49.0, 79.0, 112.0, 154.0, 240.0, 349.0, 677.0, 1231.0, 3224.0, 17920.0, 345758.0, 645728.0, 25459.0, 3984.0, 1450.0, 708.0, 426.0, 276.0, 169.0, 100.0, 74.0, 61.0, 40.0, 32.0, 27.0, 18.0, 14.0, 12.0, 7.0, 8.0, 7.0, 3.0, 7.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.66748046875, -0.6452789306640625, -0.623077392578125, -0.6008758544921875, -0.57867431640625, -0.5564727783203125, -0.534271240234375, -0.5120697021484375, -0.4898681640625, -0.4676666259765625, -0.445465087890625, -0.4232635498046875, -0.40106201171875, -0.3788604736328125, -0.356658935546875, -0.3344573974609375, -0.312255859375, -0.2900543212890625, -0.267852783203125, -0.2456512451171875, -0.22344970703125, -0.2012481689453125, -0.179046630859375, -0.1568450927734375, -0.1346435546875, -0.1124420166015625, -0.090240478515625, -0.0680389404296875, -0.04583740234375, -0.0236358642578125, -0.001434326171875, 0.0207672119140625, 0.04296875, 0.0651702880859375, 0.087371826171875, 0.1095733642578125, 0.13177490234375, 0.1539764404296875, 0.176177978515625, 0.1983795166015625, 0.2205810546875, 0.2427825927734375, 0.264984130859375, 0.2871856689453125, 0.30938720703125, 0.3315887451171875, 0.353790283203125, 0.3759918212890625, 0.398193359375, 0.4203948974609375, 0.442596435546875, 0.4647979736328125, 0.48699951171875, 0.5092010498046875, 0.531402587890625, 0.5536041259765625, 0.5758056640625, 0.5980072021484375, 0.620208740234375, 0.6424102783203125, 0.66461181640625, 0.6868133544921875, 0.709014892578125, 0.7312164306640625, 0.75341796875]}, "gradients/encoder.encoder.layers.18.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 6.0, 2.0, 5.0, 5.0, 5.0, 8.0, 8.0, 15.0, 15.0, 15.0, 16.0, 14.0, 20.0, 33.0, 28.0, 40.0, 30.0, 35.0, 49.0, 53.0, 48.0, 45.0, 39.0, 42.0, 45.0, 54.0, 45.0, 33.0, 33.0, 36.0, 36.0, 29.0, 19.0, 10.0, 17.0, 23.0, 20.0, 9.0, 5.0, 8.0, 4.0, 0.0, 4.0, 1.0, 3.0, 2.0, 1.0, 3.0, 1.0, 1.0, 1.0, 1.0], "bins": [-0.52001953125, -0.5046920776367188, -0.4893646240234375, -0.47403717041015625, -0.458709716796875, -0.44338226318359375, -0.4280548095703125, -0.41272735595703125, -0.39739990234375, -0.38207244873046875, -0.3667449951171875, -0.35141754150390625, -0.336090087890625, -0.32076263427734375, -0.3054351806640625, -0.29010772705078125, -0.2747802734375, -0.25945281982421875, -0.2441253662109375, -0.22879791259765625, -0.213470458984375, -0.19814300537109375, -0.1828155517578125, -0.16748809814453125, -0.15216064453125, -0.13683319091796875, -0.1215057373046875, -0.10617828369140625, -0.090850830078125, -0.07552337646484375, -0.0601959228515625, -0.04486846923828125, -0.029541015625, -0.01421356201171875, 0.0011138916015625, 0.01644134521484375, 0.031768798828125, 0.04709625244140625, 0.0624237060546875, 0.07775115966796875, 0.09307861328125, 0.10840606689453125, 0.1237335205078125, 0.13906097412109375, 0.154388427734375, 0.16971588134765625, 0.1850433349609375, 0.20037078857421875, 0.2156982421875, 0.23102569580078125, 0.2463531494140625, 0.26168060302734375, 0.277008056640625, 0.29233551025390625, 0.3076629638671875, 0.32299041748046875, 0.33831787109375, 0.35364532470703125, 0.3689727783203125, 0.38430023193359375, 0.399627685546875, 0.41495513916015625, 0.4302825927734375, 0.44561004638671875, 0.4609375]}, "gradients/encoder.encoder.layers.18.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 1.0, 0.0, 2.0, 0.0, 2.0, 6.0, 2.0, 5.0, 9.0, 4.0, 8.0, 10.0, 15.0, 22.0, 34.0, 41.0, 97.0, 180.0, 437.0, 1490.0, 37489.0, 1003902.0, 3595.0, 665.0, 215.0, 121.0, 56.0, 38.0, 30.0, 24.0, 13.0, 9.0, 6.0, 12.0, 6.0, 4.0, 4.0, 7.0, 2.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-0.02264404296875, -0.022001028060913086, -0.021358013153076172, -0.020714998245239258, -0.020071983337402344, -0.01942896842956543, -0.018785953521728516, -0.0181429386138916, -0.017499923706054688, -0.016856908798217773, -0.01621389389038086, -0.015570878982543945, -0.014927864074707031, -0.014284849166870117, -0.013641834259033203, -0.012998819351196289, -0.012355804443359375, -0.011712789535522461, -0.011069774627685547, -0.010426759719848633, -0.009783744812011719, -0.009140729904174805, -0.00849771499633789, -0.007854700088500977, -0.0072116851806640625, -0.0065686702728271484, -0.005925655364990234, -0.00528264045715332, -0.004639625549316406, -0.003996610641479492, -0.003353595733642578, -0.002710580825805664, -0.00206756591796875, -0.001424551010131836, -0.0007815361022949219, -0.0001385211944580078, 0.0005044937133789062, 0.0011475086212158203, 0.0017905235290527344, 0.0024335384368896484, 0.0030765533447265625, 0.0037195682525634766, 0.004362583160400391, 0.005005598068237305, 0.005648612976074219, 0.006291627883911133, 0.006934642791748047, 0.007577657699584961, 0.008220672607421875, 0.008863687515258789, 0.009506702423095703, 0.010149717330932617, 0.010792732238769531, 0.011435747146606445, 0.01207876205444336, 0.012721776962280273, 0.013364791870117188, 0.014007806777954102, 0.014650821685791016, 0.01529383659362793, 0.015936851501464844, 0.016579866409301758, 0.017222881317138672, 0.017865896224975586, 0.0185089111328125]}, "gradients/encoder.encoder.layers.18.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 2.0, 2.0, 2.0, 1.0, 5.0, 4.0, 14.0, 4.0, 13.0, 7.0, 11.0, 19.0, 19.0, 30.0, 30.0, 37.0, 58.0, 43.0, 28.0, 32.0, 48.0, 74.0, 43.0, 40.0, 41.0, 35.0, 73.0, 32.0, 35.0, 24.0, 31.0, 38.0, 18.0, 19.0, 21.0, 13.0, 20.0, 10.0, 9.0, 8.0, 4.0, 4.0, 1.0, 3.0, 4.0, 1.0, 4.0, 1.0, 1.0, 0.0, 2.0], "bins": [-2.562999725341797e-06, -2.491287887096405e-06, -2.419576048851013e-06, -2.3478642106056213e-06, -2.2761523723602295e-06, -2.2044405341148376e-06, -2.132728695869446e-06, -2.061016857624054e-06, -1.989305019378662e-06, -1.9175931811332703e-06, -1.8458813428878784e-06, -1.7741695046424866e-06, -1.7024576663970947e-06, -1.6307458281517029e-06, -1.559033989906311e-06, -1.4873221516609192e-06, -1.4156103134155273e-06, -1.3438984751701355e-06, -1.2721866369247437e-06, -1.2004747986793518e-06, -1.12876296043396e-06, -1.0570511221885681e-06, -9.853392839431763e-07, -9.136274456977844e-07, -8.419156074523926e-07, -7.702037692070007e-07, -6.984919309616089e-07, -6.26780092716217e-07, -5.550682544708252e-07, -4.833564162254333e-07, -4.116445779800415e-07, -3.3993273973464966e-07, -2.682209014892578e-07, -1.9650906324386597e-07, -1.2479722499847412e-07, -5.3085386753082275e-08, 1.862645149230957e-08, 9.033828973770142e-08, 1.6205012798309326e-07, 2.337619662284851e-07, 3.0547380447387695e-07, 3.771856427192688e-07, 4.4889748096466064e-07, 5.206093192100525e-07, 5.923211574554443e-07, 6.640329957008362e-07, 7.35744833946228e-07, 8.074566721916199e-07, 8.791685104370117e-07, 9.508803486824036e-07, 1.0225921869277954e-06, 1.0943040251731873e-06, 1.166015863418579e-06, 1.237727701663971e-06, 1.3094395399093628e-06, 1.3811513781547546e-06, 1.4528632164001465e-06, 1.5245750546455383e-06, 1.5962868928909302e-06, 1.667998731136322e-06, 1.7397105693817139e-06, 1.8114224076271057e-06, 1.8831342458724976e-06, 1.9548460841178894e-06, 2.0265579223632812e-06]}, "gradients/encoder.encoder.layers.18.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 5.0, 1.0, 1.0, 2.0, 4.0, 3.0, 8.0, 14.0, 13.0, 14.0, 28.0, 80.0, 134.0, 255.0, 697.0, 2887.0, 87430.0, 948552.0, 6600.0, 1094.0, 348.0, 154.0, 85.0, 60.0, 33.0, 25.0, 9.0, 9.0, 9.0, 7.0, 5.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0174407958984375, -0.016971588134765625, -0.01650238037109375, -0.016033172607421875, -0.01556396484375, -0.015094757080078125, -0.01462554931640625, -0.014156341552734375, -0.0136871337890625, -0.013217926025390625, -0.01274871826171875, -0.012279510498046875, -0.011810302734375, -0.011341094970703125, -0.01087188720703125, -0.010402679443359375, -0.0099334716796875, -0.009464263916015625, -0.00899505615234375, -0.008525848388671875, -0.008056640625, -0.007587432861328125, -0.00711822509765625, -0.006649017333984375, -0.0061798095703125, -0.005710601806640625, -0.00524139404296875, -0.004772186279296875, -0.004302978515625, -0.003833770751953125, -0.00336456298828125, -0.002895355224609375, -0.0024261474609375, -0.001956939697265625, -0.00148773193359375, -0.001018524169921875, -0.00054931640625, -8.0108642578125e-05, 0.00038909912109375, 0.000858306884765625, 0.0013275146484375, 0.001796722412109375, 0.00226593017578125, 0.002735137939453125, 0.003204345703125, 0.003673553466796875, 0.00414276123046875, 0.004611968994140625, 0.0050811767578125, 0.005550384521484375, 0.00601959228515625, 0.006488800048828125, 0.0069580078125, 0.007427215576171875, 0.00789642333984375, 0.008365631103515625, 0.0088348388671875, 0.009304046630859375, 0.00977325439453125, 0.010242462158203125, 0.010711669921875, 0.011180877685546875, 0.01165008544921875, 0.012119293212890625, 0.0125885009765625]}, "gradients/encoder.encoder.layers.18.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 5.0, 5.0, 3.0, 2.0, 13.0, 12.0, 19.0, 20.0, 31.0, 38.0, 75.0, 98.0, 152.0, 175.0, 141.0, 65.0, 58.0, 27.0, 16.0, 17.0, 10.0, 4.0, 4.0, 6.0, 7.0, 0.0, 4.0, 0.0, 2.0, 1.0, 2.0, 3.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.007282257080078125, -0.0069803595542907715, -0.006678462028503418, -0.0063765645027160645, -0.006074666976928711, -0.005772769451141357, -0.005470871925354004, -0.00516897439956665, -0.004867076873779297, -0.004565179347991943, -0.00426328182220459, -0.003961384296417236, -0.003659486770629883, -0.0033575892448425293, -0.0030556917190551758, -0.0027537941932678223, -0.0024518966674804688, -0.0021499991416931152, -0.0018481016159057617, -0.0015462040901184082, -0.0012443065643310547, -0.0009424090385437012, -0.0006405115127563477, -0.00033861398696899414, -3.6716461181640625e-05, 0.0002651810646057129, 0.0005670785903930664, 0.0008689761161804199, 0.0011708736419677734, 0.001472771167755127, 0.0017746686935424805, 0.002076566219329834, 0.0023784637451171875, 0.002680361270904541, 0.0029822587966918945, 0.003284156322479248, 0.0035860538482666016, 0.003887951374053955, 0.004189848899841309, 0.004491746425628662, 0.004793643951416016, 0.005095541477203369, 0.005397439002990723, 0.005699336528778076, 0.00600123405456543, 0.006303131580352783, 0.006605029106140137, 0.00690692663192749, 0.007208824157714844, 0.007510721683502197, 0.00781261920928955, 0.008114516735076904, 0.008416414260864258, 0.008718311786651611, 0.009020209312438965, 0.009322106838226318, 0.009624004364013672, 0.009925901889801025, 0.010227799415588379, 0.010529696941375732, 0.010831594467163086, 0.01113349199295044, 0.011435389518737793, 0.011737287044525146, 0.0120391845703125]}, "gradients/encoder.encoder.layers.18.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 5.0, 14.0, 74.0, 709.0, 191.0, 17.0, 3.0, 2.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.425246238708496, -2.1748247146606445, -1.9244030714035034, -1.6739814281463623, -1.4235599040985107, -1.1731383800506592, -0.9227167367935181, -0.672295093536377, -0.4218735694885254, -0.17145198583602905, 0.07896959781646729, 0.3293911814689636, 0.57981276512146, 0.8302342891693115, 1.0806559324264526, 1.3310775756835938, 1.5814990997314453, 1.8319206237792969, 2.0823421478271484, 2.332763910293579, 2.5831854343414307, 2.8336069583892822, 3.084028720855713, 3.3344502449035645, 3.584871768951416, 3.8352932929992676, 4.085714817047119, 4.336136341094971, 4.5865583419799805, 4.836979866027832, 5.087401390075684, 5.337822914123535, 5.58824348449707, 5.838665008544922, 6.089086532592773, 6.339508056640625, 6.589929580688477, 6.840351104736328, 7.090773105621338, 7.3411946296691895, 7.591616153717041, 7.842037677764893, 8.092459678649902, 8.342881202697754, 8.593302726745605, 8.843724250793457, 9.094145774841309, 9.34456729888916, 9.594988822937012, 9.845410346984863, 10.095831871032715, 10.346253395080566, 10.596674919128418, 10.84709644317627, 11.097517967224121, 11.347940444946289, 11.59836196899414, 11.848783493041992, 12.099205017089844, 12.349626541137695, 12.600048065185547, 12.850469589233398, 13.10089111328125, 13.351312637329102, 13.601734161376953]}, "gradients/encoder.encoder.layers.18.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 2.0, 1.0, 1.0, 1.0, 0.0, 4.0, 5.0, 4.0, 6.0, 6.0, 4.0, 9.0, 15.0, 11.0, 21.0, 17.0, 20.0, 38.0, 28.0, 40.0, 46.0, 47.0, 48.0, 51.0, 37.0, 66.0, 59.0, 47.0, 63.0, 52.0, 47.0, 43.0, 33.0, 24.0, 15.0, 26.0, 17.0, 14.0, 13.0, 8.0, 9.0, 5.0, 6.0, 3.0, 4.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.9852020740509033, -2.8834846019744873, -2.781766891479492, -2.680049419403076, -2.578331708908081, -2.476614236831665, -2.37489652633667, -2.273179054260254, -2.171461582183838, -2.069744110107422, -1.9680263996124268, -1.8663089275360107, -1.7645912170410156, -1.6628737449645996, -1.561156153678894, -1.4594385623931885, -1.3577208518981934, -1.2560032606124878, -1.1542856693267822, -1.0525681972503662, -0.9508505463600159, -0.8491329550743103, -0.7474154233932495, -0.645697832107544, -0.5439802408218384, -0.4422626495361328, -0.34054508805274963, -0.23882752656936646, -0.1371099352836609, -0.03539234399795532, 0.06632518768310547, 0.16804277896881104, 0.2697603702545166, 0.37147796154022217, 0.47319552302360535, 0.5749130845069885, 0.6766306757926941, 0.7783482670783997, 0.8800657987594604, 0.981783390045166, 1.0835009813308716, 1.1852185726165771, 1.2869361639022827, 1.3886537551879883, 1.4903712272644043, 1.5920889377593994, 1.6938064098358154, 1.795524001121521, 1.8972415924072266, 1.9989591836929321, 2.1006767749786377, 2.2023942470550537, 2.304111957550049, 2.405829429626465, 2.507546901702881, 2.609264612197876, 2.710982322692871, 2.812699794769287, 2.9144175052642822, 3.0161349773406982, 3.1178526878356934, 3.2195701599121094, 3.3212876319885254, 3.4230053424835205, 3.5247228145599365]}, "gradients/encoder.encoder.layers.17.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 1.0, 4.0, 1.0, 1.0, 3.0, 5.0, 7.0, 5.0, 12.0, 9.0, 15.0, 29.0, 29.0, 39.0, 68.0, 98.0, 173.0, 309.0, 670.0, 1926.0, 16839.0, 4162143.0, 9254.0, 1496.0, 523.0, 253.0, 147.0, 82.0, 57.0, 26.0, 26.0, 15.0, 8.0, 11.0, 4.0, 2.0, 1.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.0625, -1.0368270874023438, -1.0111541748046875, -0.9854812622070312, -0.959808349609375, -0.9341354370117188, -0.9084625244140625, -0.8827896118164062, -0.85711669921875, -0.8314437866210938, -0.8057708740234375, -0.7800979614257812, -0.754425048828125, -0.7287521362304688, -0.7030792236328125, -0.6774063110351562, -0.6517333984375, -0.6260604858398438, -0.6003875732421875, -0.5747146606445312, -0.549041748046875, -0.5233688354492188, -0.4976959228515625, -0.47202301025390625, -0.44635009765625, -0.42067718505859375, -0.3950042724609375, -0.36933135986328125, -0.343658447265625, -0.31798553466796875, -0.2923126220703125, -0.26663970947265625, -0.240966796875, -0.21529388427734375, -0.1896209716796875, -0.16394805908203125, -0.138275146484375, -0.11260223388671875, -0.0869293212890625, -0.06125640869140625, -0.03558349609375, -0.00991058349609375, 0.0157623291015625, 0.04143524169921875, 0.067108154296875, 0.09278106689453125, 0.1184539794921875, 0.14412689208984375, 0.1697998046875, 0.19547271728515625, 0.2211456298828125, 0.24681854248046875, 0.272491455078125, 0.29816436767578125, 0.3238372802734375, 0.34951019287109375, 0.37518310546875, 0.40085601806640625, 0.4265289306640625, 0.45220184326171875, 0.477874755859375, 0.5035476684570312, 0.5292205810546875, 0.5548934936523438, 0.58056640625]}, "gradients/encoder.encoder.layers.17.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 2.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 5.0, 3.0, 3.0, 9.0, 5.0, 2.0, 12.0, 13.0, 16.0, 49.0, 68.0, 91.0, 147.0, 167.0, 127.0, 128.0, 52.0, 32.0, 23.0, 19.0, 8.0, 4.0, 8.0, 7.0, 4.0, 5.0, 2.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.277587890625, -0.2708778381347656, -0.26416778564453125, -0.2574577331542969, -0.2507476806640625, -0.24403762817382812, -0.23732757568359375, -0.23061752319335938, -0.223907470703125, -0.21719741821289062, -0.21048736572265625, -0.20377731323242188, -0.1970672607421875, -0.19035720825195312, -0.18364715576171875, -0.17693710327148438, -0.17022705078125, -0.16351699829101562, -0.15680694580078125, -0.15009689331054688, -0.1433868408203125, -0.13667678833007812, -0.12996673583984375, -0.12325668334960938, -0.116546630859375, -0.10983657836914062, -0.10312652587890625, -0.09641647338867188, -0.0897064208984375, -0.08299636840820312, -0.07628631591796875, -0.06957626342773438, -0.0628662109375, -0.056156158447265625, -0.04944610595703125, -0.042736053466796875, -0.0360260009765625, -0.029315948486328125, -0.02260589599609375, -0.015895843505859375, -0.009185791015625, -0.002475738525390625, 0.00423431396484375, 0.010944366455078125, 0.0176544189453125, 0.024364471435546875, 0.03107452392578125, 0.037784576416015625, 0.04449462890625, 0.051204681396484375, 0.05791473388671875, 0.06462478637695312, 0.0713348388671875, 0.07804489135742188, 0.08475494384765625, 0.09146499633789062, 0.098175048828125, 0.10488510131835938, 0.11159515380859375, 0.11830520629882812, 0.1250152587890625, 0.13172531127929688, 0.13843536376953125, 0.14514541625976562, 0.15185546875]}, "gradients/encoder.encoder.layers.17.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 3.0, 1.0, 2.0, 6.0, 5.0, 5.0, 7.0, 10.0, 7.0, 19.0, 22.0, 55.0, 121.0, 391.0, 1556.0, 9985.0, 4145801.0, 33297.0, 2266.0, 405.0, 146.0, 68.0, 41.0, 22.0, 14.0, 7.0, 7.0, 8.0, 3.0, 1.0, 0.0, 1.0, 2.0, 1.0, 1.0, 2.0, 1.0, 1.0, 2.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.8525390625, -0.8217620849609375, -0.790985107421875, -0.7602081298828125, -0.72943115234375, -0.6986541748046875, -0.667877197265625, -0.6371002197265625, -0.6063232421875, -0.5755462646484375, -0.544769287109375, -0.5139923095703125, -0.48321533203125, -0.4524383544921875, -0.421661376953125, -0.3908843994140625, -0.360107421875, -0.3293304443359375, -0.298553466796875, -0.2677764892578125, -0.23699951171875, -0.2062225341796875, -0.175445556640625, -0.1446685791015625, -0.1138916015625, -0.0831146240234375, -0.052337646484375, -0.0215606689453125, 0.00921630859375, 0.0399932861328125, 0.070770263671875, 0.1015472412109375, 0.13232421875, 0.1631011962890625, 0.193878173828125, 0.2246551513671875, 0.25543212890625, 0.2862091064453125, 0.316986083984375, 0.3477630615234375, 0.3785400390625, 0.4093170166015625, 0.440093994140625, 0.4708709716796875, 0.50164794921875, 0.5324249267578125, 0.563201904296875, 0.5939788818359375, 0.624755859375, 0.6555328369140625, 0.686309814453125, 0.7170867919921875, 0.74786376953125, 0.7786407470703125, 0.809417724609375, 0.8401947021484375, 0.8709716796875, 0.9017486572265625, 0.932525634765625, 0.9633026123046875, 0.99407958984375, 1.0248565673828125, 1.055633544921875, 1.0864105224609375, 1.1171875]}, "gradients/encoder.encoder.layers.17.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 2.0, 1.0, 1.0, 2.0, 1.0, 0.0, 1.0, 2.0, 7.0, 7.0, 4.0, 10.0, 9.0, 15.0, 22.0, 28.0, 33.0, 87.0, 369.0, 3081.0, 226.0, 69.0, 29.0, 13.0, 13.0, 8.0, 11.0, 3.0, 4.0, 4.0, 4.0, 4.0, 6.0, 1.0, 0.0, 3.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.283447265625, -0.27564048767089844, -0.2678337097167969, -0.2600269317626953, -0.25222015380859375, -0.2444133758544922, -0.23660659790039062, -0.22879981994628906, -0.2209930419921875, -0.21318626403808594, -0.20537948608398438, -0.1975727081298828, -0.18976593017578125, -0.1819591522216797, -0.17415237426757812, -0.16634559631347656, -0.158538818359375, -0.15073204040527344, -0.14292526245117188, -0.1351184844970703, -0.12731170654296875, -0.11950492858886719, -0.11169815063476562, -0.10389137268066406, -0.0960845947265625, -0.08827781677246094, -0.08047103881835938, -0.07266426086425781, -0.06485748291015625, -0.05705070495605469, -0.049243927001953125, -0.04143714904785156, -0.03363037109375, -0.025823593139648438, -0.018016815185546875, -0.010210037231445312, -0.00240325927734375, 0.0054035186767578125, 0.013210296630859375, 0.021017074584960938, 0.0288238525390625, 0.03663063049316406, 0.044437408447265625, 0.05224418640136719, 0.06005096435546875, 0.06785774230957031, 0.07566452026367188, 0.08347129821777344, 0.091278076171875, 0.09908485412597656, 0.10689163208007812, 0.11469841003417969, 0.12250518798828125, 0.1303119659423828, 0.13811874389648438, 0.14592552185058594, 0.1537322998046875, 0.16153907775878906, 0.16934585571289062, 0.1771526336669922, 0.18495941162109375, 0.1927661895751953, 0.20057296752929688, 0.20837974548339844, 0.2161865234375]}, "gradients/encoder.encoder.layers.17.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 19.0, 966.0, 37.0], "bins": [-10.369791030883789, -10.204100608825684, -10.038410186767578, -9.872719764709473, -9.707029342651367, -9.541338920593262, -9.375648498535156, -9.209957122802734, -9.044267654418945, -8.87857723236084, -8.712886810302734, -8.547196388244629, -8.381505966186523, -8.215815544128418, -8.050125122070312, -7.884434223175049, -7.718743324279785, -7.55305290222168, -7.387362480163574, -7.221672058105469, -7.055981636047363, -6.8902907371521, -6.724600315093994, -6.558909893035889, -6.393219470977783, -6.227529048919678, -6.061838626861572, -5.896148204803467, -5.730457305908203, -5.564766883850098, -5.399076461791992, -5.233386039733887, -5.067695140838623, -4.902004718780518, -4.736314296722412, -4.570623874664307, -4.404932975769043, -4.2392425537109375, -4.073552131652832, -3.9078617095947266, -3.742171287536621, -3.5764808654785156, -3.41079044342041, -3.2450997829437256, -3.07940936088562, -2.9137189388275146, -2.74802827835083, -2.5823378562927246, -2.416647434234619, -2.2509570121765137, -2.085266590118408, -1.9195759296417236, -1.7538855075836182, -1.5881950855255127, -1.4225045442581177, -1.2568140029907227, -1.0911235809326172, -0.9254330992698669, -0.7597426176071167, -0.5940521359443665, -0.4283616542816162, -0.26267117261886597, -0.09698069095611572, 0.0687098503112793, 0.23440021276474]}, "gradients/encoder.encoder.layers.17.final_layer_norm.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 3.0, 1.0, 4.0, 3.0, 5.0, 13.0, 11.0, 8.0, 15.0, 8.0, 16.0, 21.0, 32.0, 30.0, 33.0, 22.0, 36.0, 32.0, 35.0, 41.0, 55.0, 48.0, 40.0, 39.0, 55.0, 48.0, 53.0, 35.0, 40.0, 33.0, 22.0, 27.0, 27.0, 21.0, 11.0, 19.0, 9.0, 16.0, 11.0, 8.0, 4.0, 7.0, 6.0, 3.0, 1.0, 3.0, 3.0, 0.0, 1.0, 1.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.5375726819038391, -0.5200539231300354, -0.5025352239608765, -0.48501646518707275, -0.46749773621559143, -0.4499790072441101, -0.4324602484703064, -0.4149415194988251, -0.39742279052734375, -0.3799040615558624, -0.3623853325843811, -0.3448665738105774, -0.32734784483909607, -0.30982911586761475, -0.29231035709381104, -0.2747916281223297, -0.2572728991508484, -0.23975417017936707, -0.22223542630672455, -0.20471668243408203, -0.1871979534626007, -0.16967922449111938, -0.15216048061847687, -0.13464173674583435, -0.11712300777435303, -0.09960427135229111, -0.08208553493022919, -0.06456679850816727, -0.04704806208610535, -0.029529325664043427, -0.012010589241981506, 0.005508147180080414, 0.02302694320678711, 0.04054567962884903, 0.05806441605091095, 0.07558315247297287, 0.09310188889503479, 0.11062062531709671, 0.12813936173915863, 0.14565810561180115, 0.16317683458328247, 0.1806955635547638, 0.1982143074274063, 0.21573305130004883, 0.23325178027153015, 0.2507705092430115, 0.2682892680168152, 0.2858079969882965, 0.30332672595977783, 0.32084545493125916, 0.3383641839027405, 0.3558829426765442, 0.3734016716480255, 0.39092040061950684, 0.40843915939331055, 0.42595788836479187, 0.4434766173362732, 0.4609953463077545, 0.47851407527923584, 0.49603283405303955, 0.5135515928268433, 0.5310702919960022, 0.5485890507698059, 0.5661077499389648, 0.5836265087127686]}, "gradients/encoder.encoder.layers.17.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 2.0, 0.0, 1.0, 3.0, 3.0, 2.0, 5.0, 6.0, 14.0, 15.0, 15.0, 33.0, 34.0, 51.0, 79.0, 135.0, 240.0, 437.0, 725.0, 1529.0, 3749.0, 10732.0, 53662.0, 839638.0, 112274.0, 16144.0, 5000.0, 1987.0, 884.0, 454.0, 292.0, 167.0, 87.0, 39.0, 38.0, 26.0, 22.0, 14.0, 14.0, 5.0, 4.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 3.0, 0.0, 0.0, 1.0], "bins": [-0.7177734375, -0.696441650390625, -0.67510986328125, -0.653778076171875, -0.6324462890625, -0.611114501953125, -0.58978271484375, -0.568450927734375, -0.547119140625, -0.525787353515625, -0.50445556640625, -0.483123779296875, -0.4617919921875, -0.440460205078125, -0.41912841796875, -0.397796630859375, -0.37646484375, -0.355133056640625, -0.33380126953125, -0.312469482421875, -0.2911376953125, -0.269805908203125, -0.24847412109375, -0.227142333984375, -0.205810546875, -0.184478759765625, -0.16314697265625, -0.141815185546875, -0.1204833984375, -0.099151611328125, -0.07781982421875, -0.056488037109375, -0.03515625, -0.013824462890625, 0.00750732421875, 0.028839111328125, 0.0501708984375, 0.071502685546875, 0.09283447265625, 0.114166259765625, 0.135498046875, 0.156829833984375, 0.17816162109375, 0.199493408203125, 0.2208251953125, 0.242156982421875, 0.26348876953125, 0.284820556640625, 0.30615234375, 0.327484130859375, 0.34881591796875, 0.370147705078125, 0.3914794921875, 0.412811279296875, 0.43414306640625, 0.455474853515625, 0.476806640625, 0.498138427734375, 0.51947021484375, 0.540802001953125, 0.5621337890625, 0.583465576171875, 0.60479736328125, 0.626129150390625, 0.6474609375]}, "gradients/encoder.encoder.layers.17.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 2.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 4.0, 6.0, 3.0, 7.0, 4.0, 12.0, 8.0, 10.0, 36.0, 51.0, 91.0, 104.0, 178.0, 150.0, 138.0, 80.0, 37.0, 27.0, 19.0, 10.0, 6.0, 8.0, 6.0, 4.0, 8.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.27490234375, -0.2681751251220703, -0.2614479064941406, -0.25472068786621094, -0.24799346923828125, -0.24126625061035156, -0.23453903198242188, -0.2278118133544922, -0.2210845947265625, -0.2143573760986328, -0.20763015747070312, -0.20090293884277344, -0.19417572021484375, -0.18744850158691406, -0.18072128295898438, -0.1739940643310547, -0.167266845703125, -0.1605396270751953, -0.15381240844726562, -0.14708518981933594, -0.14035797119140625, -0.13363075256347656, -0.12690353393554688, -0.12017631530761719, -0.1134490966796875, -0.10672187805175781, -0.09999465942382812, -0.09326744079589844, -0.08654022216796875, -0.07981300354003906, -0.07308578491210938, -0.06635856628417969, -0.05963134765625, -0.05290412902832031, -0.046176910400390625, -0.03944969177246094, -0.03272247314453125, -0.025995254516601562, -0.019268035888671875, -0.012540817260742188, -0.0058135986328125, 0.0009136199951171875, 0.007640838623046875, 0.014368057250976562, 0.02109527587890625, 0.027822494506835938, 0.034549713134765625, 0.04127693176269531, 0.048004150390625, 0.05473136901855469, 0.061458587646484375, 0.06818580627441406, 0.07491302490234375, 0.08164024353027344, 0.08836746215820312, 0.09509468078613281, 0.1018218994140625, 0.10854911804199219, 0.11527633666992188, 0.12200355529785156, 0.12873077392578125, 0.13545799255371094, 0.14218521118164062, 0.1489124298095703, 0.1556396484375]}, "gradients/encoder.encoder.layers.17.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 3.0, 3.0, 1.0, 7.0, 4.0, 8.0, 10.0, 9.0, 11.0, 16.0, 17.0, 24.0, 28.0, 22.0, 43.0, 49.0, 71.0, 104.0, 142.0, 394.0, 1035.0, 4704.0, 86731.0, 912815.0, 37724.0, 2987.0, 759.0, 298.0, 143.0, 84.0, 64.0, 55.0, 38.0, 30.0, 26.0, 24.0, 26.0, 12.0, 6.0, 10.0, 12.0, 5.0, 5.0, 0.0, 3.0, 2.0, 1.0, 3.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.8955078125, -0.8678512573242188, -0.8401947021484375, -0.8125381469726562, -0.784881591796875, -0.7572250366210938, -0.7295684814453125, -0.7019119262695312, -0.67425537109375, -0.6465988159179688, -0.6189422607421875, -0.5912857055664062, -0.563629150390625, -0.5359725952148438, -0.5083160400390625, -0.48065948486328125, -0.4530029296875, -0.42534637451171875, -0.3976898193359375, -0.37003326416015625, -0.342376708984375, -0.31472015380859375, -0.2870635986328125, -0.25940704345703125, -0.23175048828125, -0.20409393310546875, -0.1764373779296875, -0.14878082275390625, -0.121124267578125, -0.09346771240234375, -0.0658111572265625, -0.03815460205078125, -0.010498046875, 0.01715850830078125, 0.0448150634765625, 0.07247161865234375, 0.100128173828125, 0.12778472900390625, 0.1554412841796875, 0.18309783935546875, 0.21075439453125, 0.23841094970703125, 0.2660675048828125, 0.29372406005859375, 0.321380615234375, 0.34903717041015625, 0.3766937255859375, 0.40435028076171875, 0.4320068359375, 0.45966339111328125, 0.4873199462890625, 0.5149765014648438, 0.542633056640625, 0.5702896118164062, 0.5979461669921875, 0.6256027221679688, 0.65325927734375, 0.6809158325195312, 0.7085723876953125, 0.7362289428710938, 0.763885498046875, 0.7915420532226562, 0.8191986083984375, 0.8468551635742188, 0.87451171875]}, "gradients/encoder.encoder.layers.17.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 3.0, 1.0, 2.0, 3.0, 0.0, 5.0, 5.0, 12.0, 9.0, 6.0, 12.0, 24.0, 22.0, 21.0, 22.0, 26.0, 36.0, 35.0, 36.0, 46.0, 37.0, 41.0, 43.0, 46.0, 44.0, 49.0, 41.0, 36.0, 46.0, 29.0, 40.0, 32.0, 35.0, 26.0, 17.0, 23.0, 20.0, 15.0, 15.0, 11.0, 8.0, 10.0, 8.0, 4.0, 7.0, 1.0, 3.0, 2.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.45458984375, -0.4402122497558594, -0.42583465576171875, -0.4114570617675781, -0.3970794677734375, -0.3827018737792969, -0.36832427978515625, -0.3539466857910156, -0.339569091796875, -0.3251914978027344, -0.31081390380859375, -0.2964363098144531, -0.2820587158203125, -0.2676811218261719, -0.25330352783203125, -0.23892593383789062, -0.22454833984375, -0.21017074584960938, -0.19579315185546875, -0.18141555786132812, -0.1670379638671875, -0.15266036987304688, -0.13828277587890625, -0.12390518188476562, -0.109527587890625, -0.09514999389648438, -0.08077239990234375, -0.06639480590820312, -0.0520172119140625, -0.037639617919921875, -0.02326202392578125, -0.008884429931640625, 0.0054931640625, 0.019870758056640625, 0.03424835205078125, 0.048625946044921875, 0.0630035400390625, 0.07738113403320312, 0.09175872802734375, 0.10613632202148438, 0.120513916015625, 0.13489151000976562, 0.14926910400390625, 0.16364669799804688, 0.1780242919921875, 0.19240188598632812, 0.20677947998046875, 0.22115707397460938, 0.23553466796875, 0.24991226196289062, 0.26428985595703125, 0.2786674499511719, 0.2930450439453125, 0.3074226379394531, 0.32180023193359375, 0.3361778259277344, 0.350555419921875, 0.3649330139160156, 0.37931060791015625, 0.3936882019042969, 0.4080657958984375, 0.4224433898925781, 0.43682098388671875, 0.4511985778808594, 0.465576171875]}, "gradients/encoder.encoder.layers.17.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 2.0, 1.0, 1.0, 7.0, 1.0, 2.0, 7.0, 8.0, 11.0, 15.0, 16.0, 27.0, 55.0, 78.0, 122.0, 189.0, 403.0, 963.0, 3859.0, 47545.0, 961474.0, 29083.0, 3005.0, 835.0, 323.0, 188.0, 115.0, 61.0, 47.0, 41.0, 28.0, 18.0, 5.0, 7.0, 2.0, 7.0, 8.0, 1.0, 0.0, 3.0, 1.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.005523681640625, -0.005335986614227295, -0.00514829158782959, -0.004960596561431885, -0.00477290153503418, -0.004585206508636475, -0.0043975114822387695, -0.0042098164558410645, -0.004022121429443359, -0.0038344264030456543, -0.0036467313766479492, -0.003459036350250244, -0.003271341323852539, -0.003083646297454834, -0.002895951271057129, -0.002708256244659424, -0.0025205612182617188, -0.0023328661918640137, -0.0021451711654663086, -0.0019574761390686035, -0.0017697811126708984, -0.0015820860862731934, -0.0013943910598754883, -0.0012066960334777832, -0.0010190010070800781, -0.000831305980682373, -0.000643610954284668, -0.0004559159278869629, -0.0002682209014892578, -8.052587509155273e-05, 0.00010716915130615234, 0.0002948641777038574, 0.0004825592041015625, 0.0006702542304992676, 0.0008579492568969727, 0.0010456442832946777, 0.0012333393096923828, 0.0014210343360900879, 0.001608729362487793, 0.001796424388885498, 0.001984119415283203, 0.002171814441680908, 0.0023595094680786133, 0.0025472044944763184, 0.0027348995208740234, 0.0029225945472717285, 0.0031102895736694336, 0.0032979846000671387, 0.0034856796264648438, 0.003673374652862549, 0.003861069679260254, 0.004048764705657959, 0.004236459732055664, 0.004424154758453369, 0.004611849784851074, 0.004799544811248779, 0.004987239837646484, 0.0051749348640441895, 0.0053626298904418945, 0.0055503249168396, 0.005738019943237305, 0.00592571496963501, 0.006113409996032715, 0.00630110502243042, 0.006488800048828125]}, "gradients/encoder.encoder.layers.17.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 2.0, 0.0, 8.0, 10.0, 9.0, 13.0, 24.0, 31.0, 44.0, 26.0, 58.0, 42.0, 63.0, 78.0, 83.0, 77.0, 69.0, 28.0, 74.0, 69.0, 56.0, 38.0, 29.0, 20.0, 17.0, 4.0, 15.0, 6.0, 6.0, 7.0, 4.0, 2.0, 0.0, 1.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-4.172325134277344e-06, -4.060566425323486e-06, -3.948807716369629e-06, -3.8370490074157715e-06, -3.725290298461914e-06, -3.6135315895080566e-06, -3.5017728805541992e-06, -3.390014171600342e-06, -3.2782554626464844e-06, -3.166496753692627e-06, -3.0547380447387695e-06, -2.942979335784912e-06, -2.8312206268310547e-06, -2.7194619178771973e-06, -2.60770320892334e-06, -2.4959444999694824e-06, -2.384185791015625e-06, -2.2724270820617676e-06, -2.16066837310791e-06, -2.0489096641540527e-06, -1.9371509552001953e-06, -1.8253922462463379e-06, -1.7136335372924805e-06, -1.601874828338623e-06, -1.4901161193847656e-06, -1.3783574104309082e-06, -1.2665987014770508e-06, -1.1548399925231934e-06, -1.043081283569336e-06, -9.313225746154785e-07, -8.195638656616211e-07, -7.078051567077637e-07, -5.960464477539062e-07, -4.842877388000488e-07, -3.725290298461914e-07, -2.60770320892334e-07, -1.4901161193847656e-07, -3.725290298461914e-08, 7.450580596923828e-08, 1.862645149230957e-07, 2.980232238769531e-07, 4.0978193283081055e-07, 5.21540641784668e-07, 6.332993507385254e-07, 7.450580596923828e-07, 8.568167686462402e-07, 9.685754776000977e-07, 1.080334186553955e-06, 1.1920928955078125e-06, 1.30385160446167e-06, 1.4156103134155273e-06, 1.5273690223693848e-06, 1.6391277313232422e-06, 1.7508864402770996e-06, 1.862645149230957e-06, 1.9744038581848145e-06, 2.086162567138672e-06, 2.1979212760925293e-06, 2.3096799850463867e-06, 2.421438694000244e-06, 2.5331974029541016e-06, 2.644956111907959e-06, 2.7567148208618164e-06, 2.868473529815674e-06, 2.9802322387695312e-06]}, "gradients/encoder.encoder.layers.17.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 3.0, 3.0, 6.0, 19.0, 9.0, 22.0, 28.0, 46.0, 79.0, 166.0, 605.0, 4092.0, 986699.0, 54728.0, 1387.0, 371.0, 134.0, 82.0, 23.0, 15.0, 16.0, 14.0, 8.0, 3.0, 0.0, 4.0, 0.0, 2.0, 1.0, 0.0, 2.0, 2.0, 0.0, 1.0], "bins": [-0.013916015625, -0.013590753078460693, -0.013265490531921387, -0.01294022798538208, -0.012614965438842773, -0.012289702892303467, -0.01196444034576416, -0.011639177799224854, -0.011313915252685547, -0.01098865270614624, -0.010663390159606934, -0.010338127613067627, -0.01001286506652832, -0.009687602519989014, -0.009362339973449707, -0.0090370774269104, -0.008711814880371094, -0.008386552333831787, -0.00806128978729248, -0.007736027240753174, -0.007410764694213867, -0.0070855021476745605, -0.006760239601135254, -0.006434977054595947, -0.006109714508056641, -0.005784451961517334, -0.005459189414978027, -0.005133926868438721, -0.004808664321899414, -0.004483401775360107, -0.004158139228820801, -0.003832876682281494, -0.0035076141357421875, -0.003182351589202881, -0.0028570890426635742, -0.0025318264961242676, -0.002206563949584961, -0.0018813014030456543, -0.0015560388565063477, -0.001230776309967041, -0.0009055137634277344, -0.0005802512168884277, -0.0002549886703491211, 7.027387619018555e-05, 0.0003955364227294922, 0.0007207989692687988, 0.0010460615158081055, 0.0013713240623474121, 0.0016965866088867188, 0.0020218491554260254, 0.002347111701965332, 0.0026723742485046387, 0.0029976367950439453, 0.003322899341583252, 0.0036481618881225586, 0.003973424434661865, 0.004298686981201172, 0.0046239495277404785, 0.004949212074279785, 0.005274474620819092, 0.0055997371673583984, 0.005924999713897705, 0.006250262260437012, 0.006575524806976318, 0.006900787353515625]}, "gradients/encoder.encoder.layers.17.attention.q_proj.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 1.0, 2.0, 0.0, 3.0, 0.0, 3.0, 6.0, 10.0, 12.0, 11.0, 13.0, 34.0, 48.0, 67.0, 88.0, 112.0, 132.0, 113.0, 115.0, 70.0, 56.0, 35.0, 24.0, 18.0, 11.0, 9.0, 12.0, 4.0, 3.0, 3.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00321197509765625, -0.003048717975616455, -0.00288546085357666, -0.0027222037315368652, -0.0025589466094970703, -0.0023956894874572754, -0.0022324323654174805, -0.0020691752433776855, -0.0019059181213378906, -0.0017426609992980957, -0.0015794038772583008, -0.0014161467552185059, -0.001252889633178711, -0.001089632511138916, -0.0009263753890991211, -0.0007631182670593262, -0.0005998611450195312, -0.00043660402297973633, -0.0002733469009399414, -0.00011008977890014648, 5.316734313964844e-05, 0.00021642446517944336, 0.0003796815872192383, 0.0005429387092590332, 0.0007061958312988281, 0.000869452953338623, 0.001032710075378418, 0.0011959671974182129, 0.0013592243194580078, 0.0015224814414978027, 0.0016857385635375977, 0.0018489956855773926, 0.0020122528076171875, 0.0021755099296569824, 0.0023387670516967773, 0.0025020241737365723, 0.002665281295776367, 0.002828538417816162, 0.002991795539855957, 0.003155052661895752, 0.003318309783935547, 0.003481566905975342, 0.0036448240280151367, 0.0038080811500549316, 0.0039713382720947266, 0.0041345953941345215, 0.004297852516174316, 0.004461109638214111, 0.004624366760253906, 0.004787623882293701, 0.004950881004333496, 0.005114138126373291, 0.005277395248413086, 0.005440652370452881, 0.005603909492492676, 0.005767166614532471, 0.005930423736572266, 0.0060936808586120605, 0.0062569379806518555, 0.00642019510269165, 0.006583452224731445, 0.00674670934677124, 0.006909966468811035, 0.00707322359085083, 0.007236480712890625]}, "gradients/encoder.encoder.layers.17.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 5.0, 2.0, 3.0, 13.0, 20.0, 37.0, 112.0, 337.0, 321.0, 100.0, 29.0, 19.0, 7.0, 2.0, 4.0, 4.0, 1.0, 0.0, 2.0], "bins": [-5.031428337097168, -4.936516761779785, -4.841604709625244, -4.746693134307861, -4.6517815589904785, -4.5568695068359375, -4.461957931518555, -4.367046356201172, -4.272134304046631, -4.177222728729248, -4.082310676574707, -3.987399101257324, -3.8924872875213623, -3.7975754737854004, -3.7026638984680176, -3.6077520847320557, -3.5128402709960938, -3.417928457260132, -3.323016881942749, -3.228105068206787, -3.133193254470825, -3.0382814407348633, -2.9433698654174805, -2.8484580516815186, -2.7535464763641357, -2.658634662628174, -2.563723087310791, -2.468811273574829, -2.373899459838867, -2.2789876461029053, -2.1840760707855225, -2.0891642570495605, -1.9942524433135986, -1.8993407487869263, -1.8044289350509644, -1.709517240524292, -1.61460542678833, -1.5196937322616577, -1.4247820377349854, -1.3298702239990234, -1.2349584102630615, -1.1400467157363892, -1.0451349020004272, -0.9502232074737549, -0.855311393737793, -0.7603996992111206, -0.6654879450798035, -0.5705761909484863, -0.47566449642181396, -0.3807527422904968, -0.2858409881591797, -0.19092926383018494, -0.0960175096988678, -0.0011057555675506592, 0.09380596876144409, 0.18871772289276123, 0.28362947702407837, 0.3785412311553955, 0.47345298528671265, 0.5683647394180298, 0.6632764339447021, 0.7581882476806641, 0.8530999422073364, 0.9480116963386536, 1.0429234504699707]}, "gradients/encoder.encoder.layers.17.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 1.0, 2.0, 3.0, 3.0, 8.0, 7.0, 12.0, 6.0, 9.0, 17.0, 18.0, 21.0, 31.0, 35.0, 41.0, 54.0, 46.0, 50.0, 44.0, 48.0, 59.0, 49.0, 48.0, 59.0, 35.0, 46.0, 42.0, 41.0, 32.0, 22.0, 16.0, 15.0, 15.0, 19.0, 16.0, 17.0, 6.0, 8.0, 5.0, 5.0, 3.0, 1.0, 0.0, 0.0, 3.0, 1.0], "bins": [-3.6326589584350586, -3.539174795150757, -3.445690631866455, -3.352206230163574, -3.2587220668792725, -3.1652379035949707, -3.071753740310669, -2.978269577026367, -2.8847851753234863, -2.7913010120391846, -2.697816848754883, -2.604332447052002, -2.5108482837677, -2.4173641204833984, -2.3238799571990967, -2.230395793914795, -2.136911630630493, -2.0434274673461914, -1.9499431848526, -1.8564590215682983, -1.762974739074707, -1.6694905757904053, -1.5760064125061035, -1.4825222492218018, -1.3890379667282104, -1.2955538034439087, -1.2020695209503174, -1.1085853576660156, -1.0151011943817139, -0.9216169118881226, -0.8281327486038208, -0.7346485257148743, -0.6411645412445068, -0.5476803183555603, -0.45419612526893616, -0.360711932182312, -0.2672277092933655, -0.17374348640441895, -0.08025932312011719, 0.013224899768829346, 0.10670912265777588, 0.20019333064556122, 0.29367753863334656, 0.3871617317199707, 0.48064595460891724, 0.5741301774978638, 0.6676143407821655, 0.7610985636711121, 0.8545827865600586, 0.9480670094490051, 1.0415512323379517, 1.1350353956222534, 1.2285196781158447, 1.3220038414001465, 1.4154880046844482, 1.50897216796875, 1.6024564504623413, 1.695940613746643, 1.7894248962402344, 1.8829090595245361, 1.976393222808838, 2.0698776245117188, 2.1633615493774414, 2.2568459510803223, 2.350330114364624]}, "gradients/encoder.encoder.layers.16.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 2.0, 1.0, 1.0, 1.0, 2.0, 0.0, 3.0, 4.0, 3.0, 8.0, 6.0, 13.0, 23.0, 19.0, 48.0, 87.0, 185.0, 504.0, 4188101.0, 4526.0, 409.0, 152.0, 84.0, 37.0, 23.0, 17.0, 10.0, 7.0, 4.0, 9.0, 4.0, 2.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.00390625, -3.905975341796875, -3.80804443359375, -3.710113525390625, -3.6121826171875, -3.514251708984375, -3.41632080078125, -3.318389892578125, -3.220458984375, -3.122528076171875, -3.02459716796875, -2.926666259765625, -2.8287353515625, -2.730804443359375, -2.63287353515625, -2.534942626953125, -2.43701171875, -2.339080810546875, -2.24114990234375, -2.143218994140625, -2.0452880859375, -1.947357177734375, -1.84942626953125, -1.751495361328125, -1.653564453125, -1.555633544921875, -1.45770263671875, -1.359771728515625, -1.2618408203125, -1.163909912109375, -1.06597900390625, -0.968048095703125, -0.8701171875, -0.772186279296875, -0.67425537109375, -0.576324462890625, -0.4783935546875, -0.380462646484375, -0.28253173828125, -0.184600830078125, -0.086669921875, 0.011260986328125, 0.10919189453125, 0.207122802734375, 0.3050537109375, 0.402984619140625, 0.50091552734375, 0.598846435546875, 0.69677734375, 0.794708251953125, 0.89263916015625, 0.990570068359375, 1.0885009765625, 1.186431884765625, 1.28436279296875, 1.382293701171875, 1.480224609375, 1.578155517578125, 1.67608642578125, 1.774017333984375, 1.8719482421875, 1.969879150390625, 2.06781005859375, 2.165740966796875, 2.263671875]}, "gradients/encoder.encoder.layers.16.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 3.0, 4.0, 3.0, 8.0, 5.0, 9.0, 14.0, 15.0, 34.0, 51.0, 92.0, 123.0, 165.0, 133.0, 136.0, 78.0, 48.0, 23.0, 19.0, 14.0, 8.0, 7.0, 3.0, 9.0, 4.0, 2.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.271484375, -0.26483917236328125, -0.2581939697265625, -0.25154876708984375, -0.244903564453125, -0.23825836181640625, -0.2316131591796875, -0.22496795654296875, -0.21832275390625, -0.21167755126953125, -0.2050323486328125, -0.19838714599609375, -0.191741943359375, -0.18509674072265625, -0.1784515380859375, -0.17180633544921875, -0.1651611328125, -0.15851593017578125, -0.1518707275390625, -0.14522552490234375, -0.138580322265625, -0.13193511962890625, -0.1252899169921875, -0.11864471435546875, -0.11199951171875, -0.10535430908203125, -0.0987091064453125, -0.09206390380859375, -0.085418701171875, -0.07877349853515625, -0.0721282958984375, -0.06548309326171875, -0.058837890625, -0.05219268798828125, -0.0455474853515625, -0.03890228271484375, -0.032257080078125, -0.02561187744140625, -0.0189666748046875, -0.01232147216796875, -0.00567626953125, 0.00096893310546875, 0.0076141357421875, 0.01425933837890625, 0.020904541015625, 0.02754974365234375, 0.0341949462890625, 0.04084014892578125, 0.0474853515625, 0.05413055419921875, 0.0607757568359375, 0.06742095947265625, 0.074066162109375, 0.08071136474609375, 0.0873565673828125, 0.09400177001953125, 0.10064697265625, 0.10729217529296875, 0.1139373779296875, 0.12058258056640625, 0.127227783203125, 0.13387298583984375, 0.1405181884765625, 0.14716339111328125, 0.15380859375]}, "gradients/encoder.encoder.layers.16.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 2.0, 6.0, 5.0, 10.0, 10.0, 20.0, 55.0, 321.0, 1198.0, 4857.0, 4157473.0, 27752.0, 1960.0, 437.0, 102.0, 32.0, 19.0, 7.0, 5.0, 3.0, 2.0, 3.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-0.476806640625, -0.4602546691894531, -0.44370269775390625, -0.4271507263183594, -0.4105987548828125, -0.3940467834472656, -0.37749481201171875, -0.3609428405761719, -0.344390869140625, -0.3278388977050781, -0.31128692626953125, -0.2947349548339844, -0.2781829833984375, -0.2616310119628906, -0.24507904052734375, -0.22852706909179688, -0.21197509765625, -0.19542312622070312, -0.17887115478515625, -0.16231918334960938, -0.1457672119140625, -0.12921524047851562, -0.11266326904296875, -0.09611129760742188, -0.079559326171875, -0.06300735473632812, -0.04645538330078125, -0.029903411865234375, -0.0133514404296875, 0.003200531005859375, 0.01975250244140625, 0.036304473876953125, 0.0528564453125, 0.06940841674804688, 0.08596038818359375, 0.10251235961914062, 0.1190643310546875, 0.13561630249023438, 0.15216827392578125, 0.16872024536132812, 0.185272216796875, 0.20182418823242188, 0.21837615966796875, 0.23492813110351562, 0.2514801025390625, 0.2680320739746094, 0.28458404541015625, 0.3011360168457031, 0.31768798828125, 0.3342399597167969, 0.35079193115234375, 0.3673439025878906, 0.3838958740234375, 0.4004478454589844, 0.41699981689453125, 0.4335517883300781, 0.450103759765625, 0.4666557312011719, 0.48320770263671875, 0.4997596740722656, 0.5163116455078125, 0.5328636169433594, 0.5494155883789062, 0.5659675598144531, 0.58251953125]}, "gradients/encoder.encoder.layers.16.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 3.0, 2.0, 2.0, 2.0, 2.0, 6.0, 8.0, 12.0, 28.0, 24.0, 174.0, 3670.0, 68.0, 25.0, 19.0, 6.0, 6.0, 3.0, 4.0, 2.0, 4.0, 1.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.11749267578125, -0.11415481567382812, -0.11081695556640625, -0.10747909545898438, -0.1041412353515625, -0.10080337524414062, -0.09746551513671875, -0.09412765502929688, -0.090789794921875, -0.08745193481445312, -0.08411407470703125, -0.08077621459960938, -0.0774383544921875, -0.07410049438476562, -0.07076263427734375, -0.06742477416992188, -0.0640869140625, -0.060749053955078125, -0.05741119384765625, -0.054073333740234375, -0.0507354736328125, -0.047397613525390625, -0.04405975341796875, -0.040721893310546875, -0.037384033203125, -0.034046173095703125, -0.03070831298828125, -0.027370452880859375, -0.0240325927734375, -0.020694732666015625, -0.01735687255859375, -0.014019012451171875, -0.01068115234375, -0.007343292236328125, -0.00400543212890625, -0.000667572021484375, 0.0026702880859375, 0.006008148193359375, 0.00934600830078125, 0.012683868408203125, 0.016021728515625, 0.019359588623046875, 0.02269744873046875, 0.026035308837890625, 0.0293731689453125, 0.032711029052734375, 0.03604888916015625, 0.039386749267578125, 0.042724609375, 0.046062469482421875, 0.04940032958984375, 0.052738189697265625, 0.0560760498046875, 0.059413909912109375, 0.06275177001953125, 0.06608963012695312, 0.069427490234375, 0.07276535034179688, 0.07610321044921875, 0.07944107055664062, 0.0827789306640625, 0.08611679077148438, 0.08945465087890625, 0.09279251098632812, 0.09613037109375]}, "gradients/encoder.encoder.layers.16.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 9.0, 261.0, 740.0, 9.0, 1.0, 1.0, 1.0], "bins": [-2.511648654937744, -2.4691572189331055, -2.426665782928467, -2.384174346923828, -2.3416826725006104, -2.2991912364959717, -2.256699800491333, -2.2142083644866943, -2.1717169284820557, -2.129225492477417, -2.0867340564727783, -2.0442423820495605, -2.001750946044922, -1.9592595100402832, -1.9167680740356445, -1.8742766380310059, -1.831784963607788, -1.7892935276031494, -1.7468019723892212, -1.7043105363845825, -1.6618191003799438, -1.6193275451660156, -1.576836109161377, -1.5343446731567383, -1.4918532371520996, -1.449361801147461, -1.4068702459335327, -1.364378809928894, -1.3218873739242554, -1.2793958187103271, -1.2369043827056885, -1.1944129467010498, -1.1519213914871216, -1.109429955482483, -1.0669384002685547, -1.024446964263916, -0.9819554686546326, -0.9394639730453491, -0.8969725370407104, -0.854481041431427, -0.8119896054267883, -0.7694981098175049, -0.7270066738128662, -0.6845151782035828, -0.6420236825942993, -0.5995322465896606, -0.5570407509803772, -0.5145492553710938, -0.4720577597618103, -0.42956629395484924, -0.3870747983455658, -0.34458333253860474, -0.3020918369293213, -0.25960037112236023, -0.21710890531539917, -0.17461740970611572, -0.13212595880031586, -0.0896344780921936, -0.04714300483465195, -0.0046515315771102905, 0.03783994913101196, 0.08033142983913422, 0.12282289564609528, 0.16531439125537872, 0.20780585706233978]}, "gradients/encoder.encoder.layers.16.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 5.0, 0.0, 3.0, 3.0, 9.0, 10.0, 10.0, 12.0, 17.0, 14.0, 32.0, 31.0, 33.0, 32.0, 43.0, 35.0, 41.0, 59.0, 42.0, 50.0, 59.0, 55.0, 46.0, 68.0, 42.0, 53.0, 28.0, 29.0, 31.0, 25.0, 15.0, 22.0, 17.0, 12.0, 6.0, 7.0, 6.0, 6.0, 2.0, 3.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.22318094968795776, -0.2162250578403473, -0.20926916599273682, -0.20231327414512634, -0.19535739719867706, -0.1884015053510666, -0.18144561350345612, -0.17448972165584564, -0.16753384470939636, -0.1605779528617859, -0.15362206101417542, -0.14666616916656494, -0.13971029222011566, -0.1327544003725052, -0.12579850852489471, -0.11884261667728424, -0.11188672482967377, -0.1049308329820633, -0.09797494858503342, -0.09101905673742294, -0.08406317234039307, -0.07710728049278259, -0.07015138864517212, -0.06319549679756165, -0.05623961240053177, -0.049283724278211594, -0.04232783615589142, -0.035371944308280945, -0.02841605618596077, -0.021460168063640594, -0.01450427621603012, -0.007548388093709946, -0.0005924999713897705, 0.006363389082252979, 0.013319278135895729, 0.020275168120861053, 0.02723105624318123, 0.034186944365501404, 0.04114283621311188, 0.04809872433543205, 0.05505461245775223, 0.0620105005800724, 0.06896638870239258, 0.07592228055000305, 0.08287817239761353, 0.0898340567946434, 0.09678994864225388, 0.10374583303928375, 0.11070172488689423, 0.1176576167345047, 0.12461350113153458, 0.13156938552856445, 0.13852527737617493, 0.1454811692237854, 0.15243706107139587, 0.15939295291900635, 0.16634884476661682, 0.1733047366142273, 0.18026062846183777, 0.18721652030944824, 0.19417239725589752, 0.201128289103508, 0.20808418095111847, 0.21504007279872894, 0.22199594974517822]}, "gradients/encoder.encoder.layers.16.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 3.0, 3.0, 8.0, 3.0, 9.0, 12.0, 16.0, 28.0, 43.0, 55.0, 85.0, 151.0, 202.0, 417.0, 834.0, 1584.0, 3568.0, 9968.0, 41087.0, 678868.0, 270047.0, 27980.0, 7780.0, 2939.0, 1322.0, 654.0, 331.0, 199.0, 116.0, 96.0, 45.0, 26.0, 20.0, 17.0, 14.0, 4.0, 9.0, 3.0, 3.0, 2.0, 4.0, 4.0, 2.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.5205078125, -0.5036544799804688, -0.4868011474609375, -0.46994781494140625, -0.453094482421875, -0.43624114990234375, -0.4193878173828125, -0.40253448486328125, -0.38568115234375, -0.36882781982421875, -0.3519744873046875, -0.33512115478515625, -0.318267822265625, -0.30141448974609375, -0.2845611572265625, -0.26770782470703125, -0.2508544921875, -0.23400115966796875, -0.2171478271484375, -0.20029449462890625, -0.183441162109375, -0.16658782958984375, -0.1497344970703125, -0.13288116455078125, -0.11602783203125, -0.09917449951171875, -0.0823211669921875, -0.06546783447265625, -0.048614501953125, -0.03176116943359375, -0.0149078369140625, 0.00194549560546875, 0.018798828125, 0.03565216064453125, 0.0525054931640625, 0.06935882568359375, 0.086212158203125, 0.10306549072265625, 0.1199188232421875, 0.13677215576171875, 0.15362548828125, 0.17047882080078125, 0.1873321533203125, 0.20418548583984375, 0.221038818359375, 0.23789215087890625, 0.2547454833984375, 0.27159881591796875, 0.2884521484375, 0.30530548095703125, 0.3221588134765625, 0.33901214599609375, 0.355865478515625, 0.37271881103515625, 0.3895721435546875, 0.40642547607421875, 0.42327880859375, 0.44013214111328125, 0.4569854736328125, 0.47383880615234375, 0.490692138671875, 0.5075454711914062, 0.5243988037109375, 0.5412521362304688, 0.55810546875]}, "gradients/encoder.encoder.layers.16.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 3.0, 3.0, 6.0, 6.0, 5.0, 8.0, 15.0, 15.0, 32.0, 50.0, 85.0, 118.0, 160.0, 143.0, 137.0, 78.0, 55.0, 21.0, 23.0, 10.0, 12.0, 7.0, 2.0, 9.0, 4.0, 3.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.270263671875, -0.26366233825683594, -0.2570610046386719, -0.2504596710205078, -0.24385833740234375, -0.2372570037841797, -0.23065567016601562, -0.22405433654785156, -0.2174530029296875, -0.21085166931152344, -0.20425033569335938, -0.1976490020751953, -0.19104766845703125, -0.1844463348388672, -0.17784500122070312, -0.17124366760253906, -0.164642333984375, -0.15804100036621094, -0.15143966674804688, -0.1448383331298828, -0.13823699951171875, -0.1316356658935547, -0.12503433227539062, -0.11843299865722656, -0.1118316650390625, -0.10523033142089844, -0.09862899780273438, -0.09202766418457031, -0.08542633056640625, -0.07882499694824219, -0.07222366333007812, -0.06562232971191406, -0.05902099609375, -0.05241966247558594, -0.045818328857421875, -0.03921699523925781, -0.03261566162109375, -0.026014328002929688, -0.019412994384765625, -0.012811660766601562, -0.0062103271484375, 0.0003910064697265625, 0.006992340087890625, 0.013593673706054688, 0.02019500732421875, 0.026796340942382812, 0.033397674560546875, 0.03999900817871094, 0.046600341796875, 0.05320167541503906, 0.059803009033203125, 0.06640434265136719, 0.07300567626953125, 0.07960700988769531, 0.08620834350585938, 0.09280967712402344, 0.0994110107421875, 0.10601234436035156, 0.11261367797851562, 0.11921501159667969, 0.12581634521484375, 0.1324176788330078, 0.13901901245117188, 0.14562034606933594, 0.1522216796875]}, "gradients/encoder.encoder.layers.16.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 1.0, 0.0, 2.0, 3.0, 6.0, 3.0, 7.0, 10.0, 10.0, 19.0, 12.0, 11.0, 20.0, 25.0, 33.0, 37.0, 57.0, 97.0, 186.0, 288.0, 621.0, 1470.0, 6410.0, 131800.0, 876437.0, 26174.0, 2844.0, 924.0, 405.0, 200.0, 115.0, 76.0, 53.0, 50.0, 22.0, 29.0, 24.0, 23.0, 12.0, 15.0, 13.0, 4.0, 7.0, 4.0, 3.0, 0.0, 2.0, 1.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.77099609375, -0.747100830078125, -0.72320556640625, -0.699310302734375, -0.6754150390625, -0.651519775390625, -0.62762451171875, -0.603729248046875, -0.579833984375, -0.555938720703125, -0.53204345703125, -0.508148193359375, -0.4842529296875, -0.460357666015625, -0.43646240234375, -0.412567138671875, -0.388671875, -0.364776611328125, -0.34088134765625, -0.316986083984375, -0.2930908203125, -0.269195556640625, -0.24530029296875, -0.221405029296875, -0.197509765625, -0.173614501953125, -0.14971923828125, -0.125823974609375, -0.1019287109375, -0.078033447265625, -0.05413818359375, -0.030242919921875, -0.00634765625, 0.017547607421875, 0.04144287109375, 0.065338134765625, 0.0892333984375, 0.113128662109375, 0.13702392578125, 0.160919189453125, 0.184814453125, 0.208709716796875, 0.23260498046875, 0.256500244140625, 0.2803955078125, 0.304290771484375, 0.32818603515625, 0.352081298828125, 0.3759765625, 0.399871826171875, 0.42376708984375, 0.447662353515625, 0.4715576171875, 0.495452880859375, 0.51934814453125, 0.543243408203125, 0.567138671875, 0.591033935546875, 0.61492919921875, 0.638824462890625, 0.6627197265625, 0.686614990234375, 0.71051025390625, 0.734405517578125, 0.75830078125]}, "gradients/encoder.encoder.layers.16.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0, 1.0, 2.0, 0.0, 3.0, 4.0, 7.0, 4.0, 13.0, 14.0, 11.0, 23.0, 23.0, 24.0, 19.0, 33.0, 34.0, 25.0, 30.0, 38.0, 40.0, 39.0, 41.0, 54.0, 37.0, 32.0, 48.0, 53.0, 40.0, 39.0, 46.0, 37.0, 27.0, 24.0, 28.0, 21.0, 18.0, 9.0, 12.0, 18.0, 9.0, 11.0, 7.0, 3.0, 6.0, 3.0, 2.0, 0.0, 1.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.4091796875, -0.3962860107421875, -0.383392333984375, -0.3704986572265625, -0.35760498046875, -0.3447113037109375, -0.331817626953125, -0.3189239501953125, -0.3060302734375, -0.2931365966796875, -0.280242919921875, -0.2673492431640625, -0.25445556640625, -0.2415618896484375, -0.228668212890625, -0.2157745361328125, -0.202880859375, -0.1899871826171875, -0.177093505859375, -0.1641998291015625, -0.15130615234375, -0.1384124755859375, -0.125518798828125, -0.1126251220703125, -0.0997314453125, -0.0868377685546875, -0.073944091796875, -0.0610504150390625, -0.04815673828125, -0.0352630615234375, -0.022369384765625, -0.0094757080078125, 0.00341796875, 0.0163116455078125, 0.029205322265625, 0.0420989990234375, 0.05499267578125, 0.0678863525390625, 0.080780029296875, 0.0936737060546875, 0.1065673828125, 0.1194610595703125, 0.132354736328125, 0.1452484130859375, 0.15814208984375, 0.1710357666015625, 0.183929443359375, 0.1968231201171875, 0.209716796875, 0.2226104736328125, 0.235504150390625, 0.2483978271484375, 0.26129150390625, 0.2741851806640625, 0.287078857421875, 0.2999725341796875, 0.3128662109375, 0.3257598876953125, 0.338653564453125, 0.3515472412109375, 0.36444091796875, 0.3773345947265625, 0.390228271484375, 0.4031219482421875, 0.416015625]}, "gradients/encoder.encoder.layers.16.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 2.0, 4.0, 3.0, 5.0, 5.0, 6.0, 6.0, 6.0, 19.0, 21.0, 27.0, 44.0, 73.0, 120.0, 185.0, 286.0, 531.0, 1233.0, 3697.0, 18889.0, 369183.0, 625432.0, 21860.0, 4125.0, 1360.0, 610.0, 309.0, 178.0, 110.0, 73.0, 48.0, 29.0, 37.0, 14.0, 13.0, 11.0, 3.0, 6.0, 3.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.004619598388671875, -0.0044838786125183105, -0.004348158836364746, -0.004212439060211182, -0.004076719284057617, -0.003940999507904053, -0.0038052797317504883, -0.003669559955596924, -0.0035338401794433594, -0.003398120403289795, -0.0032624006271362305, -0.003126680850982666, -0.0029909610748291016, -0.002855241298675537, -0.0027195215225219727, -0.002583801746368408, -0.0024480819702148438, -0.0023123621940612793, -0.002176642417907715, -0.0020409226417541504, -0.001905202865600586, -0.0017694830894470215, -0.001633763313293457, -0.0014980435371398926, -0.0013623237609863281, -0.0012266039848327637, -0.0010908842086791992, -0.0009551644325256348, -0.0008194446563720703, -0.0006837248802185059, -0.0005480051040649414, -0.00041228532791137695, -0.0002765655517578125, -0.00014084577560424805, -5.125999450683594e-06, 0.00013059377670288086, 0.0002663135528564453, 0.00040203332901000977, 0.0005377531051635742, 0.0006734728813171387, 0.0008091926574707031, 0.0009449124336242676, 0.001080632209777832, 0.0012163519859313965, 0.001352071762084961, 0.0014877915382385254, 0.0016235113143920898, 0.0017592310905456543, 0.0018949508666992188, 0.002030670642852783, 0.0021663904190063477, 0.002302110195159912, 0.0024378299713134766, 0.002573549747467041, 0.0027092695236206055, 0.00284498929977417, 0.0029807090759277344, 0.003116428852081299, 0.0032521486282348633, 0.0033878684043884277, 0.003523588180541992, 0.0036593079566955566, 0.003795027732849121, 0.0039307475090026855, 0.00406646728515625]}, "gradients/encoder.encoder.layers.16.attention.k_proj.bias": {"_type": "histogram", "values": [2.0, 3.0, 3.0, 0.0, 1.0, 3.0, 2.0, 4.0, 6.0, 6.0, 13.0, 7.0, 13.0, 11.0, 16.0, 11.0, 30.0, 25.0, 28.0, 26.0, 40.0, 83.0, 32.0, 42.0, 42.0, 49.0, 89.0, 37.0, 39.0, 38.0, 37.0, 31.0, 57.0, 24.0, 22.0, 26.0, 12.0, 28.0, 15.0, 19.0, 8.0, 6.0, 12.0, 2.0, 4.0, 4.0, 4.0, 0.0, 3.0, 2.0, 2.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-1.8477439880371094e-06, -1.776963472366333e-06, -1.7061829566955566e-06, -1.6354024410247803e-06, -1.564621925354004e-06, -1.4938414096832275e-06, -1.4230608940124512e-06, -1.3522803783416748e-06, -1.2814998626708984e-06, -1.210719347000122e-06, -1.1399388313293457e-06, -1.0691583156585693e-06, -9.98377799987793e-07, -9.275972843170166e-07, -8.568167686462402e-07, -7.860362529754639e-07, -7.152557373046875e-07, -6.444752216339111e-07, -5.736947059631348e-07, -5.029141902923584e-07, -4.3213367462158203e-07, -3.6135315895080566e-07, -2.905726432800293e-07, -2.1979212760925293e-07, -1.4901161193847656e-07, -7.82310962677002e-08, -7.450580596923828e-09, 6.332993507385254e-08, 1.341104507446289e-07, 2.0489096641540527e-07, 2.7567148208618164e-07, 3.46451997756958e-07, 4.172325134277344e-07, 4.880130290985107e-07, 5.587935447692871e-07, 6.295740604400635e-07, 7.003545761108398e-07, 7.711350917816162e-07, 8.419156074523926e-07, 9.126961231231689e-07, 9.834766387939453e-07, 1.0542571544647217e-06, 1.125037670135498e-06, 1.1958181858062744e-06, 1.2665987014770508e-06, 1.3373792171478271e-06, 1.4081597328186035e-06, 1.4789402484893799e-06, 1.5497207641601562e-06, 1.6205012798309326e-06, 1.691281795501709e-06, 1.7620623111724854e-06, 1.8328428268432617e-06, 1.903623342514038e-06, 1.9744038581848145e-06, 2.045184373855591e-06, 2.115964889526367e-06, 2.1867454051971436e-06, 2.25752592086792e-06, 2.3283064365386963e-06, 2.3990869522094727e-06, 2.469867467880249e-06, 2.5406479835510254e-06, 2.6114284992218018e-06, 2.682209014892578e-06]}, "gradients/encoder.encoder.layers.16.attention.q_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0, 3.0, 0.0, 6.0, 10.0, 7.0, 10.0, 10.0, 12.0, 16.0, 28.0, 36.0, 56.0, 90.0, 140.0, 355.0, 996.0, 5119.0, 252433.0, 780035.0, 7166.0, 1143.0, 405.0, 182.0, 100.0, 65.0, 41.0, 30.0, 11.0, 18.0, 6.0, 4.0, 9.0, 2.0, 4.0, 6.0, 6.0, 0.0, 1.0, 0.0, 1.0, 1.0, 2.0, 1.0, 0.0, 1.0, 1.0], "bins": [-0.00765228271484375, -0.007440090179443359, -0.007227897644042969, -0.007015705108642578, -0.0068035125732421875, -0.006591320037841797, -0.006379127502441406, -0.006166934967041016, -0.005954742431640625, -0.005742549896240234, -0.005530357360839844, -0.005318164825439453, -0.0051059722900390625, -0.004893779754638672, -0.004681587219238281, -0.004469394683837891, -0.0042572021484375, -0.004045009613037109, -0.0038328170776367188, -0.003620624542236328, -0.0034084320068359375, -0.003196239471435547, -0.0029840469360351562, -0.0027718544006347656, -0.002559661865234375, -0.0023474693298339844, -0.0021352767944335938, -0.0019230842590332031, -0.0017108917236328125, -0.0014986991882324219, -0.0012865066528320312, -0.0010743141174316406, -0.00086212158203125, -0.0006499290466308594, -0.00043773651123046875, -0.00022554397583007812, -1.33514404296875e-05, 0.00019884109497070312, 0.00041103363037109375, 0.0006232261657714844, 0.000835418701171875, 0.0010476112365722656, 0.0012598037719726562, 0.0014719963073730469, 0.0016841888427734375, 0.0018963813781738281, 0.0021085739135742188, 0.0023207664489746094, 0.002532958984375, 0.0027451515197753906, 0.0029573440551757812, 0.003169536590576172, 0.0033817291259765625, 0.003593921661376953, 0.0038061141967773438, 0.004018306732177734, 0.004230499267578125, 0.004442691802978516, 0.004654884338378906, 0.004867076873779297, 0.0050792694091796875, 0.005291461944580078, 0.005503654479980469, 0.005715847015380859, 0.00592803955078125]}, "gradients/encoder.encoder.layers.16.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 5.0, 5.0, 3.0, 2.0, 6.0, 3.0, 6.0, 11.0, 11.0, 16.0, 27.0, 24.0, 33.0, 42.0, 59.0, 76.0, 81.0, 88.0, 89.0, 90.0, 75.0, 60.0, 42.0, 34.0, 24.0, 19.0, 15.0, 10.0, 8.0, 9.0, 8.0, 8.0, 10.0, 6.0, 0.0, 3.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0], "bins": [-0.0032024383544921875, -0.00308796763420105, -0.002973496913909912, -0.0028590261936187744, -0.0027445554733276367, -0.002630084753036499, -0.0025156140327453613, -0.0024011433124542236, -0.002286672592163086, -0.0021722018718719482, -0.0020577311515808105, -0.0019432604312896729, -0.0018287897109985352, -0.0017143189907073975, -0.0015998482704162598, -0.001485377550125122, -0.0013709068298339844, -0.0012564361095428467, -0.001141965389251709, -0.0010274946689605713, -0.0009130239486694336, -0.0007985532283782959, -0.0006840825080871582, -0.0005696117877960205, -0.0004551410675048828, -0.0003406703472137451, -0.00022619962692260742, -0.00011172890663146973, 2.7418136596679688e-06, 0.00011721253395080566, 0.00023168325424194336, 0.00034615397453308105, 0.00046062469482421875, 0.0005750954151153564, 0.0006895661354064941, 0.0008040368556976318, 0.0009185075759887695, 0.0010329782962799072, 0.001147449016571045, 0.0012619197368621826, 0.0013763904571533203, 0.001490861177444458, 0.0016053318977355957, 0.0017198026180267334, 0.001834273338317871, 0.0019487440586090088, 0.0020632147789001465, 0.002177685499191284, 0.002292156219482422, 0.0024066269397735596, 0.0025210976600646973, 0.002635568380355835, 0.0027500391006469727, 0.0028645098209381104, 0.002978980541229248, 0.0030934512615203857, 0.0032079219818115234, 0.003322392702102661, 0.003436863422393799, 0.0035513341426849365, 0.0036658048629760742, 0.003780275583267212, 0.0038947463035583496, 0.004009217023849487, 0.004123687744140625]}, "gradients/encoder.encoder.layers.16.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 3.0, 174.0, 827.0, 14.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-28.484878540039062, -27.977872848510742, -27.470867156982422, -26.9638614654541, -26.45685577392578, -25.94985008239746, -25.44284439086914, -24.935836791992188, -24.4288330078125, -23.92182731628418, -23.41482162475586, -22.90781593322754, -22.40081024169922, -21.8938045501709, -21.386798858642578, -20.879791259765625, -20.372785568237305, -19.865779876708984, -19.358774185180664, -18.851768493652344, -18.344762802124023, -17.837757110595703, -17.330751419067383, -16.823745727539062, -16.31673812866211, -15.809732437133789, -15.302726745605469, -14.795721054077148, -14.288715362548828, -13.781709671020508, -13.274703025817871, -12.76769733428955, -12.260692596435547, -11.753686904907227, -11.246681213378906, -10.739675521850586, -10.232669830322266, -9.725664138793945, -9.218657493591309, -8.711651802062988, -8.204646110534668, -7.697640419006348, -7.190634727478027, -6.683628559112549, -6.1766228675842285, -5.669617176055908, -5.16261100769043, -4.655605316162109, -4.148599624633789, -3.6415939331054688, -3.1345880031585693, -2.62758207321167, -2.1205763816833496, -1.6135706901550293, -1.1065647602081299, -0.5995588302612305, -0.09255290031433105, 0.4144529104232788, 0.9214587211608887, 1.4284645318984985, 1.9354703426361084, 2.4424760341644287, 2.949481964111328, 3.4564878940582275, 3.963493585586548]}, "gradients/encoder.encoder.layers.16.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 5.0, 4.0, 4.0, 5.0, 8.0, 10.0, 6.0, 15.0, 15.0, 10.0, 18.0, 31.0, 26.0, 24.0, 30.0, 32.0, 42.0, 45.0, 48.0, 50.0, 44.0, 56.0, 59.0, 41.0, 51.0, 45.0, 41.0, 32.0, 40.0, 26.0, 22.0, 15.0, 17.0, 19.0, 14.0, 16.0, 10.0, 8.0, 4.0, 9.0, 6.0, 1.0, 2.0, 0.0, 3.0, 2.0, 3.0, 0.0, 1.0, 0.0, 3.0], "bins": [-2.636446237564087, -2.560197353363037, -2.4839484691619873, -2.4076995849609375, -2.331450939178467, -2.255202054977417, -2.178953170776367, -2.1027042865753174, -2.0264554023742676, -1.9502065181732178, -1.8739577531814575, -1.7977088689804077, -1.721459984779358, -1.6452112197875977, -1.5689623355865479, -1.492713451385498, -1.4164646863937378, -1.340215802192688, -1.2639670372009277, -1.187718152999878, -1.1114692687988281, -1.0352203845977783, -0.9589716196060181, -0.8827227354049683, -0.8064739108085632, -0.7302250862121582, -0.6539762020111084, -0.5777273774147034, -0.5014785528182983, -0.42522966861724854, -0.3489808440208435, -0.2727319598197937, -0.19648313522338867, -0.12023428827524185, -0.04398544132709503, 0.03226339817047119, 0.10851225256919861, 0.18476110696792603, 0.26100993156433105, 0.33725881576538086, 0.4135076403617859, 0.4897564947605133, 0.5660053491592407, 0.6422541737556458, 0.7185029983520508, 0.7947518825531006, 0.8710007071495056, 0.9472495913505554, 1.0234984159469604, 1.0997473001480103, 1.1759960651397705, 1.2522449493408203, 1.3284938335418701, 1.40474271774292, 1.4809914827346802, 1.55724036693573, 1.6334891319274902, 1.70973801612854, 1.7859867811203003, 1.86223566532135, 1.9384845495224, 2.01473331451416, 2.09098219871521, 2.1672310829162598, 2.2434799671173096]}, "gradients/encoder.encoder.layers.15.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 1.0, 0.0, 1.0, 0.0, 4.0, 7.0, 8.0, 6.0, 10.0, 13.0, 25.0, 34.0, 64.0, 117.0, 243.0, 657.0, 13627.0, 4178040.0, 840.0, 273.0, 129.0, 63.0, 47.0, 35.0, 14.0, 8.0, 8.0, 7.0, 9.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.423828125, -3.3404998779296875, -3.257171630859375, -3.1738433837890625, -3.09051513671875, -3.0071868896484375, -2.923858642578125, -2.8405303955078125, -2.7572021484375, -2.6738739013671875, -2.590545654296875, -2.5072174072265625, -2.42388916015625, -2.3405609130859375, -2.257232666015625, -2.1739044189453125, -2.090576171875, -2.0072479248046875, -1.923919677734375, -1.8405914306640625, -1.75726318359375, -1.6739349365234375, -1.590606689453125, -1.5072784423828125, -1.4239501953125, -1.3406219482421875, -1.257293701171875, -1.1739654541015625, -1.09063720703125, -1.0073089599609375, -0.923980712890625, -0.8406524658203125, -0.75732421875, -0.6739959716796875, -0.590667724609375, -0.5073394775390625, -0.42401123046875, -0.3406829833984375, -0.257354736328125, -0.1740264892578125, -0.0906982421875, -0.0073699951171875, 0.075958251953125, 0.1592864990234375, 0.24261474609375, 0.3259429931640625, 0.409271240234375, 0.4925994873046875, 0.575927734375, 0.6592559814453125, 0.742584228515625, 0.8259124755859375, 0.90924072265625, 0.9925689697265625, 1.075897216796875, 1.1592254638671875, 1.2425537109375, 1.3258819580078125, 1.409210205078125, 1.4925384521484375, 1.57586669921875, 1.6591949462890625, 1.742523193359375, 1.8258514404296875, 1.9091796875]}, "gradients/encoder.encoder.layers.15.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 0.0, 1.0, 1.0, 0.0, 1.0, 4.0, 3.0, 5.0, 6.0, 8.0, 10.0, 18.0, 33.0, 49.0, 73.0, 126.0, 146.0, 151.0, 134.0, 98.0, 47.0, 28.0, 25.0, 14.0, 7.0, 8.0, 5.0, 6.0, 7.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.272705078125, -0.2660675048828125, -0.259429931640625, -0.2527923583984375, -0.24615478515625, -0.2395172119140625, -0.232879638671875, -0.2262420654296875, -0.2196044921875, -0.2129669189453125, -0.206329345703125, -0.1996917724609375, -0.19305419921875, -0.1864166259765625, -0.179779052734375, -0.1731414794921875, -0.16650390625, -0.1598663330078125, -0.153228759765625, -0.1465911865234375, -0.13995361328125, -0.1333160400390625, -0.126678466796875, -0.1200408935546875, -0.1134033203125, -0.1067657470703125, -0.100128173828125, -0.0934906005859375, -0.08685302734375, -0.0802154541015625, -0.073577880859375, -0.0669403076171875, -0.060302734375, -0.0536651611328125, -0.047027587890625, -0.0403900146484375, -0.03375244140625, -0.0271148681640625, -0.020477294921875, -0.0138397216796875, -0.0072021484375, -0.0005645751953125, 0.006072998046875, 0.0127105712890625, 0.01934814453125, 0.0259857177734375, 0.032623291015625, 0.0392608642578125, 0.0458984375, 0.0525360107421875, 0.059173583984375, 0.0658111572265625, 0.07244873046875, 0.0790863037109375, 0.085723876953125, 0.0923614501953125, 0.0989990234375, 0.1056365966796875, 0.112274169921875, 0.1189117431640625, 0.12554931640625, 0.1321868896484375, 0.138824462890625, 0.1454620361328125, 0.152099609375]}, "gradients/encoder.encoder.layers.15.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 2.0, 7.0, 6.0, 21.0, 40.0, 64.0, 92.0, 204.0, 835.0, 6756.0, 4174868.0, 9912.0, 1052.0, 261.0, 97.0, 31.0, 15.0, 12.0, 6.0, 4.0, 3.0, 4.0, 3.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.6328125, -0.60595703125, -0.5791015625, -0.55224609375, -0.525390625, -0.49853515625, -0.4716796875, -0.44482421875, -0.41796875, -0.39111328125, -0.3642578125, -0.33740234375, -0.310546875, -0.28369140625, -0.2568359375, -0.22998046875, -0.203125, -0.17626953125, -0.1494140625, -0.12255859375, -0.095703125, -0.06884765625, -0.0419921875, -0.01513671875, 0.01171875, 0.03857421875, 0.0654296875, 0.09228515625, 0.119140625, 0.14599609375, 0.1728515625, 0.19970703125, 0.2265625, 0.25341796875, 0.2802734375, 0.30712890625, 0.333984375, 0.36083984375, 0.3876953125, 0.41455078125, 0.44140625, 0.46826171875, 0.4951171875, 0.52197265625, 0.548828125, 0.57568359375, 0.6025390625, 0.62939453125, 0.65625, 0.68310546875, 0.7099609375, 0.73681640625, 0.763671875, 0.79052734375, 0.8173828125, 0.84423828125, 0.87109375, 0.89794921875, 0.9248046875, 0.95166015625, 0.978515625, 1.00537109375, 1.0322265625, 1.05908203125, 1.0859375]}, "gradients/encoder.encoder.layers.15.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 2.0, 1.0, 2.0, 0.0, 3.0, 1.0, 2.0, 8.0, 9.0, 8.0, 25.0, 177.0, 3735.0, 66.0, 18.0, 6.0, 7.0, 4.0, 4.0, 6.0, 0.0, 2.0, 2.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.40185546875, -0.3923492431640625, -0.382843017578125, -0.3733367919921875, -0.36383056640625, -0.3543243408203125, -0.344818115234375, -0.3353118896484375, -0.3258056640625, -0.3162994384765625, -0.306793212890625, -0.2972869873046875, -0.28778076171875, -0.2782745361328125, -0.268768310546875, -0.2592620849609375, -0.249755859375, -0.2402496337890625, -0.230743408203125, -0.2212371826171875, -0.21173095703125, -0.2022247314453125, -0.192718505859375, -0.1832122802734375, -0.1737060546875, -0.1641998291015625, -0.154693603515625, -0.1451873779296875, -0.13568115234375, -0.1261749267578125, -0.116668701171875, -0.1071624755859375, -0.09765625, -0.0881500244140625, -0.078643798828125, -0.0691375732421875, -0.05963134765625, -0.0501251220703125, -0.040618896484375, -0.0311126708984375, -0.0216064453125, -0.0121002197265625, -0.002593994140625, 0.0069122314453125, 0.01641845703125, 0.0259246826171875, 0.035430908203125, 0.0449371337890625, 0.054443359375, 0.0639495849609375, 0.073455810546875, 0.0829620361328125, 0.09246826171875, 0.1019744873046875, 0.111480712890625, 0.1209869384765625, 0.1304931640625, 0.1399993896484375, 0.149505615234375, 0.1590118408203125, 0.16851806640625, 0.1780242919921875, 0.187530517578125, 0.1970367431640625, 0.20654296875]}, "gradients/encoder.encoder.layers.15.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 3.0, 766.0, 246.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.7236753702163696, -1.6082929372787476, -1.4929105043411255, -1.3775280714035034, -1.2621456384658813, -1.1467633247375488, -1.0313808917999268, -0.9159983992576599, -0.8006159663200378, -0.6852335333824158, -0.5698511004447937, -0.454468697309494, -0.33908626437187195, -0.22370386123657227, -0.1083214282989502, 0.007061004638671875, 0.12244343757629395, 0.23782587051391602, 0.3532083034515381, 0.46859070658683777, 0.5839731693267822, 0.6993555426597595, 0.8147379755973816, 0.9301204085350037, 1.0455029010772705, 1.1608853340148926, 1.2762677669525146, 1.3916501998901367, 1.5070326328277588, 1.6224150657653809, 1.737797498703003, 1.853179931640625, 1.968562364578247, 2.083944797515869, 2.199327230453491, 2.3147096633911133, 2.4300920963287354, 2.5454745292663574, 2.6608569622039795, 2.7762393951416016, 2.8916218280792236, 3.0070042610168457, 3.1223866939544678, 3.23776912689209, 3.353151559829712, 3.468533992767334, 3.583916425704956, 3.699298858642578, 3.814681053161621, 3.930063486099243, 4.045445919036865, 4.160828113555908, 4.276210784912109, 4.391592979431152, 4.5069756507873535, 4.6223578453063965, 4.737740516662598, 4.853122711181641, 4.968505382537842, 5.083887577056885, 5.199270248413086, 5.314652442932129, 5.43003511428833, 5.545417308807373, 5.660799980163574]}, "gradients/encoder.encoder.layers.15.final_layer_norm.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0, 2.0, 4.0, 6.0, 9.0, 13.0, 15.0, 18.0, 36.0, 46.0, 59.0, 47.0, 94.0, 59.0, 97.0, 85.0, 60.0, 70.0, 74.0, 49.0, 46.0, 35.0, 27.0, 17.0, 13.0, 10.0, 7.0, 4.0, 4.0, 2.0, 3.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.5583806037902832, -0.5398452877998352, -0.5213099718093872, -0.5027746558189392, -0.4842393100261688, -0.4657039940357208, -0.44716864824295044, -0.42863333225250244, -0.41009801626205444, -0.39156270027160645, -0.37302738428115845, -0.35449203848838806, -0.33595672249794006, -0.31742140650749207, -0.2988860607147217, -0.2803507447242737, -0.2618154287338257, -0.24328011274337769, -0.2247447818517685, -0.2062094509601593, -0.1876741349697113, -0.1691388189792633, -0.1506034880876541, -0.13206815719604492, -0.11353284120559692, -0.09499751776456833, -0.07646219432353973, -0.05792687088251114, -0.039391547441482544, -0.02085622400045395, -0.002320900559425354, 0.016214430332183838, 0.034749746322631836, 0.05328506976366043, 0.07182039320468903, 0.09035571664571762, 0.10889104008674622, 0.1274263560771942, 0.1459616869688034, 0.1644970178604126, 0.1830323338508606, 0.2015676498413086, 0.22010298073291779, 0.23863831162452698, 0.257173627614975, 0.275708943605423, 0.29424428939819336, 0.31277960538864136, 0.33131492137908936, 0.34985023736953735, 0.36838555335998535, 0.38692089915275574, 0.40545621514320374, 0.42399153113365173, 0.4425268769264221, 0.4610621929168701, 0.4795975089073181, 0.4981328248977661, 0.5166681408882141, 0.5352034568786621, 0.5537388324737549, 0.5722741484642029, 0.5908094644546509, 0.6093447804450989, 0.6278800964355469]}, "gradients/encoder.encoder.layers.15.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 3.0, 1.0, 5.0, 3.0, 3.0, 6.0, 4.0, 11.0, 15.0, 28.0, 27.0, 43.0, 73.0, 98.0, 201.0, 414.0, 851.0, 1990.0, 6580.0, 29936.0, 610076.0, 363739.0, 24960.0, 5846.0, 1917.0, 834.0, 385.0, 198.0, 100.0, 75.0, 40.0, 45.0, 27.0, 7.0, 7.0, 6.0, 2.0, 4.0, 4.0, 0.0, 1.0, 1.0, 2.0, 3.0], "bins": [-0.6650390625, -0.6487998962402344, -0.6325607299804688, -0.6163215637207031, -0.6000823974609375, -0.5838432312011719, -0.5676040649414062, -0.5513648986816406, -0.535125732421875, -0.5188865661621094, -0.5026473999023438, -0.4864082336425781, -0.4701690673828125, -0.4539299011230469, -0.43769073486328125, -0.4214515686035156, -0.40521240234375, -0.3889732360839844, -0.37273406982421875, -0.3564949035644531, -0.3402557373046875, -0.3240165710449219, -0.30777740478515625, -0.2915382385253906, -0.275299072265625, -0.2590599060058594, -0.24282073974609375, -0.22658157348632812, -0.2103424072265625, -0.19410324096679688, -0.17786407470703125, -0.16162490844726562, -0.1453857421875, -0.12914657592773438, -0.11290740966796875, -0.09666824340820312, -0.0804290771484375, -0.06418991088867188, -0.04795074462890625, -0.031711578369140625, -0.015472412109375, 0.000766754150390625, 0.01700592041015625, 0.033245086669921875, 0.0494842529296875, 0.06572341918945312, 0.08196258544921875, 0.09820175170898438, 0.11444091796875, 0.13068008422851562, 0.14691925048828125, 0.16315841674804688, 0.1793975830078125, 0.19563674926757812, 0.21187591552734375, 0.22811508178710938, 0.244354248046875, 0.2605934143066406, 0.27683258056640625, 0.2930717468261719, 0.3093109130859375, 0.3255500793457031, 0.34178924560546875, 0.3580284118652344, 0.374267578125]}, "gradients/encoder.encoder.layers.15.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 0.0, 1.0, 1.0, 0.0, 1.0, 4.0, 4.0, 5.0, 6.0, 9.0, 12.0, 13.0, 37.0, 51.0, 89.0, 122.0, 152.0, 163.0, 114.0, 89.0, 49.0, 30.0, 18.0, 15.0, 6.0, 8.0, 2.0, 9.0, 4.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.27294921875, -0.2662792205810547, -0.2596092224121094, -0.25293922424316406, -0.24626922607421875, -0.23959922790527344, -0.23292922973632812, -0.2262592315673828, -0.2195892333984375, -0.2129192352294922, -0.20624923706054688, -0.19957923889160156, -0.19290924072265625, -0.18623924255371094, -0.17956924438476562, -0.1728992462158203, -0.166229248046875, -0.1595592498779297, -0.15288925170898438, -0.14621925354003906, -0.13954925537109375, -0.13287925720214844, -0.12620925903320312, -0.11953926086425781, -0.1128692626953125, -0.10619926452636719, -0.09952926635742188, -0.09285926818847656, -0.08618927001953125, -0.07951927185058594, -0.07284927368164062, -0.06617927551269531, -0.05950927734375, -0.05283927917480469, -0.046169281005859375, -0.03949928283691406, -0.03282928466796875, -0.026159286499023438, -0.019489288330078125, -0.012819290161132812, -0.0061492919921875, 0.0005207061767578125, 0.007190704345703125, 0.013860702514648438, 0.02053070068359375, 0.027200698852539062, 0.033870697021484375, 0.04054069519042969, 0.047210693359375, 0.05388069152832031, 0.060550689697265625, 0.06722068786621094, 0.07389068603515625, 0.08056068420410156, 0.08723068237304688, 0.09390068054199219, 0.1005706787109375, 0.10724067687988281, 0.11391067504882812, 0.12058067321777344, 0.12725067138671875, 0.13392066955566406, 0.14059066772460938, 0.1472606658935547, 0.1539306640625]}, "gradients/encoder.encoder.layers.15.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 2.0, 1.0, 1.0, 2.0, 4.0, 7.0, 8.0, 10.0, 8.0, 12.0, 8.0, 13.0, 15.0, 17.0, 29.0, 21.0, 37.0, 42.0, 51.0, 80.0, 110.0, 189.0, 381.0, 874.0, 3820.0, 56039.0, 921273.0, 60011.0, 3647.0, 843.0, 374.0, 190.0, 119.0, 74.0, 52.0, 26.0, 24.0, 22.0, 20.0, 18.0, 17.0, 14.0, 8.0, 8.0, 17.0, 2.0, 3.0, 4.0, 4.0, 7.0, 3.0, 2.0, 3.0, 3.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.595703125, -0.5755157470703125, -0.555328369140625, -0.5351409912109375, -0.51495361328125, -0.4947662353515625, -0.474578857421875, -0.4543914794921875, -0.4342041015625, -0.4140167236328125, -0.393829345703125, -0.3736419677734375, -0.35345458984375, -0.3332672119140625, -0.313079833984375, -0.2928924560546875, -0.272705078125, -0.2525177001953125, -0.232330322265625, -0.2121429443359375, -0.19195556640625, -0.1717681884765625, -0.151580810546875, -0.1313934326171875, -0.1112060546875, -0.0910186767578125, -0.070831298828125, -0.0506439208984375, -0.03045654296875, -0.0102691650390625, 0.009918212890625, 0.0301055908203125, 0.05029296875, 0.0704803466796875, 0.090667724609375, 0.1108551025390625, 0.13104248046875, 0.1512298583984375, 0.171417236328125, 0.1916046142578125, 0.2117919921875, 0.2319793701171875, 0.252166748046875, 0.2723541259765625, 0.29254150390625, 0.3127288818359375, 0.332916259765625, 0.3531036376953125, 0.373291015625, 0.3934783935546875, 0.413665771484375, 0.4338531494140625, 0.45404052734375, 0.4742279052734375, 0.494415283203125, 0.5146026611328125, 0.5347900390625, 0.5549774169921875, 0.575164794921875, 0.5953521728515625, 0.61553955078125, 0.6357269287109375, 0.655914306640625, 0.6761016845703125, 0.6962890625]}, "gradients/encoder.encoder.layers.15.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 2.0, 4.0, 2.0, 3.0, 7.0, 4.0, 4.0, 3.0, 2.0, 17.0, 8.0, 8.0, 15.0, 15.0, 19.0, 20.0, 22.0, 23.0, 22.0, 42.0, 36.0, 33.0, 29.0, 29.0, 37.0, 36.0, 40.0, 32.0, 40.0, 44.0, 47.0, 31.0, 31.0, 27.0, 35.0, 29.0, 31.0, 32.0, 19.0, 29.0, 17.0, 16.0, 12.0, 8.0, 12.0, 8.0, 10.0, 8.0, 7.0, 4.0, 2.0, 1.0, 1.0, 2.0, 0.0, 1.0, 1.0], "bins": [-0.3701171875, -0.3593864440917969, -0.34865570068359375, -0.3379249572753906, -0.3271942138671875, -0.3164634704589844, -0.30573272705078125, -0.2950019836425781, -0.284271240234375, -0.2735404968261719, -0.26280975341796875, -0.2520790100097656, -0.2413482666015625, -0.23061752319335938, -0.21988677978515625, -0.20915603637695312, -0.19842529296875, -0.18769454956054688, -0.17696380615234375, -0.16623306274414062, -0.1555023193359375, -0.14477157592773438, -0.13404083251953125, -0.12331008911132812, -0.112579345703125, -0.10184860229492188, -0.09111785888671875, -0.08038711547851562, -0.0696563720703125, -0.058925628662109375, -0.04819488525390625, -0.037464141845703125, -0.0267333984375, -0.016002655029296875, -0.00527191162109375, 0.005458831787109375, 0.0161895751953125, 0.026920318603515625, 0.03765106201171875, 0.048381805419921875, 0.059112548828125, 0.06984329223632812, 0.08057403564453125, 0.09130477905273438, 0.1020355224609375, 0.11276626586914062, 0.12349700927734375, 0.13422775268554688, 0.14495849609375, 0.15568923950195312, 0.16641998291015625, 0.17715072631835938, 0.1878814697265625, 0.19861221313476562, 0.20934295654296875, 0.22007369995117188, 0.230804443359375, 0.24153518676757812, 0.25226593017578125, 0.2629966735839844, 0.2737274169921875, 0.2844581604003906, 0.29518890380859375, 0.3059196472167969, 0.316650390625]}, "gradients/encoder.encoder.layers.15.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 2.0, 4.0, 9.0, 12.0, 13.0, 29.0, 41.0, 98.0, 211.0, 631.0, 3314.0, 109497.0, 926379.0, 6783.0, 971.0, 304.0, 115.0, 56.0, 41.0, 12.0, 10.0, 7.0, 4.0, 4.0, 4.0, 3.0, 2.0, 1.0, 1.0, 3.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0], "bins": [-0.010955810546875, -0.010652780532836914, -0.010349750518798828, -0.010046720504760742, -0.009743690490722656, -0.00944066047668457, -0.009137630462646484, -0.008834600448608398, -0.008531570434570312, -0.008228540420532227, -0.00792551040649414, -0.007622480392456055, -0.007319450378417969, -0.007016420364379883, -0.006713390350341797, -0.006410360336303711, -0.006107330322265625, -0.005804300308227539, -0.005501270294189453, -0.005198240280151367, -0.004895210266113281, -0.004592180252075195, -0.004289150238037109, -0.0039861202239990234, -0.0036830902099609375, -0.0033800601959228516, -0.0030770301818847656, -0.0027740001678466797, -0.0024709701538085938, -0.002167940139770508, -0.0018649101257324219, -0.001561880111694336, -0.00125885009765625, -0.0009558200836181641, -0.0006527900695800781, -0.0003497600555419922, -4.673004150390625e-05, 0.0002562999725341797, 0.0005593299865722656, 0.0008623600006103516, 0.0011653900146484375, 0.0014684200286865234, 0.0017714500427246094, 0.0020744800567626953, 0.0023775100708007812, 0.002680540084838867, 0.002983570098876953, 0.003286600112915039, 0.003589630126953125, 0.003892660140991211, 0.004195690155029297, 0.004498720169067383, 0.004801750183105469, 0.005104780197143555, 0.005407810211181641, 0.0057108402252197266, 0.0060138702392578125, 0.0063169002532958984, 0.006619930267333984, 0.00692296028137207, 0.007225990295410156, 0.007529020309448242, 0.007832050323486328, 0.008135080337524414, 0.0084381103515625]}, "gradients/encoder.encoder.layers.15.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 2.0, 6.0, 2.0, 5.0, 4.0, 2.0, 21.0, 15.0, 10.0, 23.0, 46.0, 14.0, 46.0, 73.0, 37.0, 70.0, 70.0, 44.0, 92.0, 79.0, 25.0, 63.0, 64.0, 30.0, 34.0, 19.0, 25.0, 27.0, 10.0, 18.0, 14.0, 5.0, 6.0, 7.0, 3.0, 2.0, 4.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.384185791015625e-06, -2.2854655981063843e-06, -2.1867454051971436e-06, -2.088025212287903e-06, -1.989305019378662e-06, -1.8905848264694214e-06, -1.7918646335601807e-06, -1.69314444065094e-06, -1.5944242477416992e-06, -1.4957040548324585e-06, -1.3969838619232178e-06, -1.298263669013977e-06, -1.1995434761047363e-06, -1.1008232831954956e-06, -1.0021030902862549e-06, -9.033828973770142e-07, -8.046627044677734e-07, -7.059425115585327e-07, -6.07222318649292e-07, -5.085021257400513e-07, -4.0978193283081055e-07, -3.110617399215698e-07, -2.123415470123291e-07, -1.1362135410308838e-07, -1.4901161193847656e-08, 8.381903171539307e-08, 1.825392246246338e-07, 2.812594175338745e-07, 3.7997961044311523e-07, 4.78699803352356e-07, 5.774199962615967e-07, 6.761401891708374e-07, 7.748603820800781e-07, 8.735805749893188e-07, 9.723007678985596e-07, 1.0710209608078003e-06, 1.169741153717041e-06, 1.2684613466262817e-06, 1.3671815395355225e-06, 1.4659017324447632e-06, 1.564621925354004e-06, 1.6633421182632446e-06, 1.7620623111724854e-06, 1.860782504081726e-06, 1.959502696990967e-06, 2.0582228899002075e-06, 2.1569430828094482e-06, 2.255663275718689e-06, 2.3543834686279297e-06, 2.4531036615371704e-06, 2.551823854446411e-06, 2.650544047355652e-06, 2.7492642402648926e-06, 2.8479844331741333e-06, 2.946704626083374e-06, 3.0454248189926147e-06, 3.1441450119018555e-06, 3.242865204811096e-06, 3.341585397720337e-06, 3.4403055906295776e-06, 3.5390257835388184e-06, 3.637745976448059e-06, 3.7364661693573e-06, 3.8351863622665405e-06, 3.933906555175781e-06]}, "gradients/encoder.encoder.layers.15.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 3.0, 0.0, 3.0, 7.0, 3.0, 3.0, 3.0, 8.0, 11.0, 8.0, 18.0, 29.0, 43.0, 76.0, 203.0, 1096.0, 16619.0, 1020880.0, 8182.0, 887.0, 237.0, 99.0, 51.0, 28.0, 16.0, 20.0, 9.0, 7.0, 3.0, 2.0, 2.0, 1.0, 1.0, 1.0, 0.0, 3.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.013427734375, -0.013058662414550781, -0.012689590454101562, -0.012320518493652344, -0.011951446533203125, -0.011582374572753906, -0.011213302612304688, -0.010844230651855469, -0.01047515869140625, -0.010106086730957031, -0.009737014770507812, -0.009367942810058594, -0.008998870849609375, -0.008629798889160156, -0.008260726928710938, -0.007891654968261719, -0.0075225830078125, -0.007153511047363281, -0.0067844390869140625, -0.006415367126464844, -0.006046295166015625, -0.005677223205566406, -0.0053081512451171875, -0.004939079284667969, -0.00457000732421875, -0.004200935363769531, -0.0038318634033203125, -0.0034627914428710938, -0.003093719482421875, -0.0027246475219726562, -0.0023555755615234375, -0.0019865036010742188, -0.001617431640625, -0.0012483596801757812, -0.0008792877197265625, -0.0005102157592773438, -0.000141143798828125, 0.00022792816162109375, 0.0005970001220703125, 0.0009660720825195312, 0.00133514404296875, 0.0017042160034179688, 0.0020732879638671875, 0.0024423599243164062, 0.002811431884765625, 0.0031805038452148438, 0.0035495758056640625, 0.003918647766113281, 0.0042877197265625, 0.004656791687011719, 0.0050258636474609375, 0.005394935607910156, 0.005764007568359375, 0.006133079528808594, 0.0065021514892578125, 0.006871223449707031, 0.00724029541015625, 0.007609367370605469, 0.007978439331054688, 0.008347511291503906, 0.008716583251953125, 0.009085655212402344, 0.009454727172851562, 0.009823799133300781, 0.01019287109375]}, "gradients/encoder.encoder.layers.15.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 3.0, 0.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 3.0, 3.0, 12.0, 9.0, 12.0, 23.0, 31.0, 41.0, 75.0, 166.0, 170.0, 151.0, 122.0, 64.0, 30.0, 19.0, 13.0, 13.0, 4.0, 9.0, 7.0, 4.0, 3.0, 3.0, 7.0, 3.0, 0.0, 3.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.0054168701171875, -0.005220770835876465, -0.00502467155456543, -0.0048285722732543945, -0.004632472991943359, -0.004436373710632324, -0.004240274429321289, -0.004044175148010254, -0.0038480758666992188, -0.0036519765853881836, -0.0034558773040771484, -0.0032597780227661133, -0.003063678741455078, -0.002867579460144043, -0.002671480178833008, -0.0024753808975219727, -0.0022792816162109375, -0.0020831823348999023, -0.0018870830535888672, -0.001690983772277832, -0.0014948844909667969, -0.0012987852096557617, -0.0011026859283447266, -0.0009065866470336914, -0.0007104873657226562, -0.0005143880844116211, -0.00031828880310058594, -0.00012218952178955078, 7.390975952148438e-05, 0.00027000904083251953, 0.0004661083221435547, 0.0006622076034545898, 0.000858306884765625, 0.0010544061660766602, 0.0012505054473876953, 0.0014466047286987305, 0.0016427040100097656, 0.0018388032913208008, 0.002034902572631836, 0.002231001853942871, 0.0024271011352539062, 0.0026232004165649414, 0.0028192996978759766, 0.0030153989791870117, 0.003211498260498047, 0.003407597541809082, 0.003603696823120117, 0.0037997961044311523, 0.0039958953857421875, 0.004191994667053223, 0.004388093948364258, 0.004584193229675293, 0.004780292510986328, 0.004976391792297363, 0.0051724910736083984, 0.005368590354919434, 0.005564689636230469, 0.005760788917541504, 0.005956888198852539, 0.006152987480163574, 0.006349086761474609, 0.0065451860427856445, 0.00674128532409668, 0.006937384605407715, 0.00713348388671875]}, "gradients/encoder.encoder.layers.15.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 5.0, 279.0, 726.0, 9.0, 1.0, 0.0, 2.0], "bins": [-26.50030517578125, -26.05184555053711, -25.603384017944336, -25.154924392700195, -24.706462860107422, -24.25800323486328, -23.809541702270508, -23.361082077026367, -22.912620544433594, -22.464160919189453, -22.01569938659668, -21.56723976135254, -21.118778228759766, -20.670318603515625, -20.22185707092285, -19.77339744567871, -19.32493782043457, -18.87647819519043, -18.428016662597656, -17.979557037353516, -17.531095504760742, -17.0826358795166, -16.634174346923828, -16.185714721679688, -15.73725414276123, -15.288793563842773, -14.840332984924316, -14.39187240600586, -13.943412780761719, -13.494951248168945, -13.046491622924805, -12.598031044006348, -12.14957046508789, -11.701109886169434, -11.252649307250977, -10.80418872833252, -10.355728149414062, -9.907268524169922, -9.458807945251465, -9.010347366333008, -8.561885833740234, -8.113425254821777, -7.66496467590332, -7.2165045738220215, -6.7680439949035645, -6.319583415985107, -5.871123313903809, -5.422662734985352, -4.974202632904053, -4.525742053985596, -4.077281951904297, -3.62882137298584, -3.180360794067383, -2.731900215148926, -2.283439874649048, -1.83497953414917, -1.386518955230713, -0.9380584955215454, -0.48959803581237793, -0.04113757610321045, 0.40732288360595703, 0.8557834625244141, 1.304243803024292, 1.75270414352417, 2.201164722442627]}, "gradients/encoder.encoder.layers.15.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 3.0, 1.0, 1.0, 1.0, 3.0, 4.0, 3.0, 2.0, 8.0, 10.0, 6.0, 18.0, 8.0, 8.0, 11.0, 19.0, 22.0, 26.0, 27.0, 27.0, 47.0, 37.0, 45.0, 38.0, 46.0, 35.0, 59.0, 51.0, 42.0, 62.0, 49.0, 40.0, 33.0, 34.0, 36.0, 18.0, 28.0, 19.0, 19.0, 16.0, 12.0, 12.0, 5.0, 8.0, 6.0, 3.0, 4.0, 2.0, 2.0, 0.0, 1.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.1186232566833496, -2.054044723510742, -1.9894663095474243, -1.9248878955841064, -1.8603094816207886, -1.7957310676574707, -1.7311525344848633, -1.6665741205215454, -1.6019957065582275, -1.5374172925949097, -1.4728387594223022, -1.4082603454589844, -1.3436819314956665, -1.2791035175323486, -1.2145249843597412, -1.1499465703964233, -1.085368037223816, -1.020789623260498, -0.9562111496925354, -0.8916326761245728, -0.8270542621612549, -0.7624757885932922, -0.6978973150253296, -0.6333189010620117, -0.5687404274940491, -0.5041619539260864, -0.43958353996276855, -0.3750050663948059, -0.31042662262916565, -0.2458481788635254, -0.18126970529556274, -0.11669126152992249, -0.05211281776428223, 0.01246563345193863, 0.07704408466815948, 0.14162254333496094, 0.2062009871006012, 0.27077943086624146, 0.3353579044342041, 0.39993634819984436, 0.4645147919654846, 0.5290932655334473, 0.5936716794967651, 0.6582501530647278, 0.7228286266326904, 0.7874070405960083, 0.851985514163971, 0.9165639877319336, 0.9811424016952515, 1.0457208156585693, 1.1102993488311768, 1.1748777627944946, 1.2394561767578125, 1.30403470993042, 1.3686131238937378, 1.4331915378570557, 1.497770071029663, 1.562348484992981, 1.6269270181655884, 1.6915054321289062, 1.7560838460922241, 1.820662260055542, 1.8852407932281494, 1.9498192071914673, 2.014397621154785]}, "gradients/encoder.encoder.layers.14.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 3.0, 3.0, 1.0, 1.0, 3.0, 2.0, 7.0, 10.0, 14.0, 17.0, 28.0, 34.0, 62.0, 119.0, 187.0, 451.0, 1257.0, 4181183.0, 9350.0, 827.0, 323.0, 165.0, 93.0, 55.0, 31.0, 19.0, 20.0, 13.0, 5.0, 5.0, 2.0, 0.0, 2.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-1.0244140625, -0.9993057250976562, -0.9741973876953125, -0.9490890502929688, -0.923980712890625, -0.8988723754882812, -0.8737640380859375, -0.8486557006835938, -0.82354736328125, -0.7984390258789062, -0.7733306884765625, -0.7482223510742188, -0.723114013671875, -0.6980056762695312, -0.6728973388671875, -0.6477890014648438, -0.6226806640625, -0.5975723266601562, -0.5724639892578125, -0.5473556518554688, -0.522247314453125, -0.49713897705078125, -0.4720306396484375, -0.44692230224609375, -0.42181396484375, -0.39670562744140625, -0.3715972900390625, -0.34648895263671875, -0.321380615234375, -0.29627227783203125, -0.2711639404296875, -0.24605560302734375, -0.220947265625, -0.19583892822265625, -0.1707305908203125, -0.14562225341796875, -0.120513916015625, -0.09540557861328125, -0.0702972412109375, -0.04518890380859375, -0.02008056640625, 0.00502777099609375, 0.0301361083984375, 0.05524444580078125, 0.080352783203125, 0.10546112060546875, 0.1305694580078125, 0.15567779541015625, 0.1807861328125, 0.20589447021484375, 0.2310028076171875, 0.25611114501953125, 0.281219482421875, 0.30632781982421875, 0.3314361572265625, 0.35654449462890625, 0.38165283203125, 0.40676116943359375, 0.4318695068359375, 0.45697784423828125, 0.482086181640625, 0.5071945190429688, 0.5323028564453125, 0.5574111938476562, 0.58251953125]}, "gradients/encoder.encoder.layers.14.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 3.0, 0.0, 0.0, 1.0, 1.0, 0.0, 4.0, 3.0, 6.0, 5.0, 11.0, 12.0, 19.0, 42.0, 47.0, 104.0, 111.0, 167.0, 144.0, 120.0, 77.0, 52.0, 29.0, 19.0, 11.0, 6.0, 12.0, 6.0, 3.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.27197265625, -0.2653064727783203, -0.2586402893066406, -0.25197410583496094, -0.24530792236328125, -0.23864173889160156, -0.23197555541992188, -0.2253093719482422, -0.2186431884765625, -0.2119770050048828, -0.20531082153320312, -0.19864463806152344, -0.19197845458984375, -0.18531227111816406, -0.17864608764648438, -0.1719799041748047, -0.165313720703125, -0.1586475372314453, -0.15198135375976562, -0.14531517028808594, -0.13864898681640625, -0.13198280334472656, -0.12531661987304688, -0.11865043640136719, -0.1119842529296875, -0.10531806945800781, -0.09865188598632812, -0.09198570251464844, -0.08531951904296875, -0.07865333557128906, -0.07198715209960938, -0.06532096862792969, -0.05865478515625, -0.05198860168457031, -0.045322418212890625, -0.03865623474121094, -0.03199005126953125, -0.025323867797851562, -0.018657684326171875, -0.011991500854492188, -0.0053253173828125, 0.0013408660888671875, 0.008007049560546875, 0.014673233032226562, 0.02133941650390625, 0.028005599975585938, 0.034671783447265625, 0.04133796691894531, 0.048004150390625, 0.05467033386230469, 0.061336517333984375, 0.06800270080566406, 0.07466888427734375, 0.08133506774902344, 0.08800125122070312, 0.09466743469238281, 0.1013336181640625, 0.10799980163574219, 0.11466598510742188, 0.12133216857910156, 0.12799835205078125, 0.13466453552246094, 0.14133071899414062, 0.1479969024658203, 0.1546630859375]}, "gradients/encoder.encoder.layers.14.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [2.0, 2.0, 0.0, 4.0, 1.0, 1.0, 0.0, 4.0, 12.0, 26.0, 59.0, 135.0, 224.0, 643.0, 4607.0, 4184184.0, 3696.0, 444.0, 139.0, 58.0, 33.0, 16.0, 3.0, 4.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.52294921875, -0.48909759521484375, -0.4552459716796875, -0.42139434814453125, -0.387542724609375, -0.35369110107421875, -0.3198394775390625, -0.28598785400390625, -0.25213623046875, -0.21828460693359375, -0.1844329833984375, -0.15058135986328125, -0.116729736328125, -0.08287811279296875, -0.0490264892578125, -0.01517486572265625, 0.0186767578125, 0.05252838134765625, 0.0863800048828125, 0.12023162841796875, 0.154083251953125, 0.18793487548828125, 0.2217864990234375, 0.25563812255859375, 0.28948974609375, 0.32334136962890625, 0.3571929931640625, 0.39104461669921875, 0.424896240234375, 0.45874786376953125, 0.4925994873046875, 0.5264511108398438, 0.560302734375, 0.5941543579101562, 0.6280059814453125, 0.6618576049804688, 0.695709228515625, 0.7295608520507812, 0.7634124755859375, 0.7972640991210938, 0.83111572265625, 0.8649673461914062, 0.8988189697265625, 0.9326705932617188, 0.966522216796875, 1.0003738403320312, 1.0342254638671875, 1.0680770874023438, 1.1019287109375, 1.1357803344726562, 1.1696319580078125, 1.2034835815429688, 1.237335205078125, 1.2711868286132812, 1.3050384521484375, 1.3388900756835938, 1.37274169921875, 1.4065933227539062, 1.4404449462890625, 1.4742965698242188, 1.508148193359375, 1.5419998168945312, 1.5758514404296875, 1.6097030639648438, 1.6435546875]}, "gradients/encoder.encoder.layers.14.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 1.0, 5.0, 4.0, 7.0, 9.0, 41.0, 3935.0, 49.0, 12.0, 6.0, 4.0, 1.0, 1.0, 4.0, 2.0, 0.0, 0.0, 0.0, 3.0, 0.0, 1.0, 2.0], "bins": [-0.384521484375, -0.3766002655029297, -0.3686790466308594, -0.36075782775878906, -0.35283660888671875, -0.34491539001464844, -0.3369941711425781, -0.3290729522705078, -0.3211517333984375, -0.3132305145263672, -0.3053092956542969, -0.29738807678222656, -0.28946685791015625, -0.28154563903808594, -0.2736244201660156, -0.2657032012939453, -0.257781982421875, -0.2498607635498047, -0.24193954467773438, -0.23401832580566406, -0.22609710693359375, -0.21817588806152344, -0.21025466918945312, -0.2023334503173828, -0.1944122314453125, -0.1864910125732422, -0.17856979370117188, -0.17064857482910156, -0.16272735595703125, -0.15480613708496094, -0.14688491821289062, -0.1389636993408203, -0.13104248046875, -0.12312126159667969, -0.11520004272460938, -0.10727882385253906, -0.09935760498046875, -0.09143638610839844, -0.08351516723632812, -0.07559394836425781, -0.0676727294921875, -0.05975151062011719, -0.051830291748046875, -0.04390907287597656, -0.03598785400390625, -0.028066635131835938, -0.020145416259765625, -0.012224197387695312, -0.004302978515625, 0.0036182403564453125, 0.011539459228515625, 0.019460678100585938, 0.02738189697265625, 0.03530311584472656, 0.043224334716796875, 0.05114555358886719, 0.0590667724609375, 0.06698799133300781, 0.07490921020507812, 0.08283042907714844, 0.09075164794921875, 0.09867286682128906, 0.10659408569335938, 0.11451530456542969, 0.1224365234375]}, "gradients/encoder.encoder.layers.14.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 2.0, 4.0, 5.0, 15.0, 25.0, 63.0, 142.0, 519.0, 178.0, 44.0, 11.0, 4.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.42627251148223877, -0.40659308433532715, -0.38691362738609314, -0.3672342002391815, -0.3475547432899475, -0.3278753161430359, -0.30819588899612427, -0.28851643204689026, -0.26883697509765625, -0.24915753304958344, -0.22947809100151062, -0.209798663854599, -0.190119206905365, -0.17043977975845337, -0.15076033771038055, -0.13108089566230774, -0.11140146851539612, -0.0917220264673233, -0.07204258441925049, -0.05236314982175827, -0.032683707773685455, -0.01300426572561264, 0.006675168871879578, 0.026354610919952393, 0.04603405296802521, 0.06571349501609802, 0.08539293706417084, 0.10507237166166306, 0.12475181370973587, 0.14443126320838928, 0.1641106903553009, 0.18379013240337372, 0.20346957445144653, 0.22314901649951935, 0.24282845854759216, 0.2625078856945038, 0.2821873426437378, 0.3018667697906494, 0.32154619693756104, 0.34122565388679504, 0.36090511083602905, 0.3805845379829407, 0.4002639949321747, 0.4199434220790863, 0.4396228790283203, 0.45930230617523193, 0.47898173332214355, 0.49866119027137756, 0.5183405876159668, 0.5380200147628784, 0.55769944190979, 0.5773789286613464, 0.5970583558082581, 0.6167377829551697, 0.6364172101020813, 0.6560966968536377, 0.6757761240005493, 0.6954555511474609, 0.7151349782943726, 0.734814465045929, 0.7544938921928406, 0.7741733193397522, 0.7938527464866638, 0.8135322332382202, 0.8332116603851318]}, "gradients/encoder.encoder.layers.14.final_layer_norm.bias": {"_type": "histogram", "values": [2.0, 2.0, 1.0, 2.0, 2.0, 1.0, 1.0, 4.0, 7.0, 8.0, 11.0, 9.0, 7.0, 15.0, 12.0, 14.0, 26.0, 24.0, 26.0, 26.0, 29.0, 32.0, 44.0, 46.0, 39.0, 49.0, 39.0, 45.0, 45.0, 59.0, 41.0, 40.0, 31.0, 39.0, 37.0, 28.0, 26.0, 25.0, 21.0, 16.0, 21.0, 20.0, 8.0, 13.0, 5.0, 8.0, 3.0, 3.0, 3.0, 0.0, 0.0, 3.0, 5.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.3302924633026123, -0.3182251751422882, -0.3061578869819641, -0.29409059882164, -0.2820233106613159, -0.2699560225009918, -0.2578887343406677, -0.24582144618034363, -0.23375415802001953, -0.22168686985969543, -0.20961958169937134, -0.19755229353904724, -0.18548500537872314, -0.17341771721839905, -0.16135042905807495, -0.14928314089775085, -0.13721586763858795, -0.12514857947826385, -0.11308129131793976, -0.10101400315761566, -0.08894671499729156, -0.07687942683696747, -0.06481214612722397, -0.05274485796689987, -0.040677569806575775, -0.02861028164625168, -0.01654299534857273, -0.004475709050893784, 0.007591579109430313, 0.01965886726975441, 0.03172615170478821, 0.043793439865112305, 0.0558607280254364, 0.0679280161857605, 0.0799953043460846, 0.09206259250640869, 0.10412988066673279, 0.11619716882705688, 0.12826445698738098, 0.14033174514770508, 0.15239903330802917, 0.16446632146835327, 0.17653360962867737, 0.18860089778900146, 0.20066818594932556, 0.21273547410964966, 0.22480276226997375, 0.23687005043029785, 0.24893732368946075, 0.26100459694862366, 0.27307188510894775, 0.28513917326927185, 0.29720646142959595, 0.30927374958992004, 0.32134103775024414, 0.33340832591056824, 0.34547561407089233, 0.35754290223121643, 0.3696101903915405, 0.3816774785518646, 0.3937447667121887, 0.4058120548725128, 0.4178793430328369, 0.429946631193161, 0.4420139193534851]}, "gradients/encoder.encoder.layers.14.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0, 1.0, 2.0, 1.0, 2.0, 1.0, 4.0, 5.0, 9.0, 3.0, 9.0, 18.0, 19.0, 28.0, 36.0, 58.0, 89.0, 157.0, 272.0, 493.0, 1012.0, 2395.0, 6439.0, 23627.0, 177425.0, 765149.0, 53335.0, 11140.0, 3727.0, 1455.0, 703.0, 343.0, 235.0, 121.0, 68.0, 51.0, 33.0, 18.0, 25.0, 15.0, 8.0, 10.0, 8.0, 4.0, 4.0, 2.0, 4.0, 0.0, 3.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.378662109375, -0.3669166564941406, -0.35517120361328125, -0.3434257507324219, -0.3316802978515625, -0.3199348449707031, -0.30818939208984375, -0.2964439392089844, -0.284698486328125, -0.2729530334472656, -0.26120758056640625, -0.24946212768554688, -0.2377166748046875, -0.22597122192382812, -0.21422576904296875, -0.20248031616210938, -0.19073486328125, -0.17898941040039062, -0.16724395751953125, -0.15549850463867188, -0.1437530517578125, -0.13200759887695312, -0.12026214599609375, -0.10851669311523438, -0.096771240234375, -0.08502578735351562, -0.07328033447265625, -0.061534881591796875, -0.0497894287109375, -0.038043975830078125, -0.02629852294921875, -0.014553070068359375, -0.0028076171875, 0.008937835693359375, 0.02068328857421875, 0.032428741455078125, 0.0441741943359375, 0.055919647216796875, 0.06766510009765625, 0.07941055297851562, 0.091156005859375, 0.10290145874023438, 0.11464691162109375, 0.12639236450195312, 0.1381378173828125, 0.14988327026367188, 0.16162872314453125, 0.17337417602539062, 0.18511962890625, 0.19686508178710938, 0.20861053466796875, 0.22035598754882812, 0.2321014404296875, 0.24384689331054688, 0.25559234619140625, 0.2673377990722656, 0.279083251953125, 0.2908287048339844, 0.30257415771484375, 0.3143196105957031, 0.3260650634765625, 0.3378105163574219, 0.34955596923828125, 0.3613014221191406, 0.373046875]}, "gradients/encoder.encoder.layers.14.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 3.0, 0.0, 0.0, 2.0, 0.0, 0.0, 4.0, 4.0, 5.0, 8.0, 8.0, 13.0, 23.0, 39.0, 62.0, 97.0, 119.0, 167.0, 144.0, 114.0, 70.0, 50.0, 30.0, 16.0, 13.0, 4.0, 12.0, 5.0, 4.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.270263671875, -0.26362037658691406, -0.2569770812988281, -0.2503337860107422, -0.24369049072265625, -0.2370471954345703, -0.23040390014648438, -0.22376060485839844, -0.2171173095703125, -0.21047401428222656, -0.20383071899414062, -0.1971874237060547, -0.19054412841796875, -0.1839008331298828, -0.17725753784179688, -0.17061424255371094, -0.163970947265625, -0.15732765197753906, -0.15068435668945312, -0.1440410614013672, -0.13739776611328125, -0.1307544708251953, -0.12411117553710938, -0.11746788024902344, -0.1108245849609375, -0.10418128967285156, -0.09753799438476562, -0.09089469909667969, -0.08425140380859375, -0.07760810852050781, -0.07096481323242188, -0.06432151794433594, -0.05767822265625, -0.05103492736816406, -0.044391632080078125, -0.03774833679199219, -0.03110504150390625, -0.024461746215820312, -0.017818450927734375, -0.011175155639648438, -0.0045318603515625, 0.0021114349365234375, 0.008754730224609375, 0.015398025512695312, 0.02204132080078125, 0.028684616088867188, 0.035327911376953125, 0.04197120666503906, 0.048614501953125, 0.05525779724121094, 0.061901092529296875, 0.06854438781738281, 0.07518768310546875, 0.08183097839355469, 0.08847427368164062, 0.09511756896972656, 0.1017608642578125, 0.10840415954589844, 0.11504745483398438, 0.12169075012207031, 0.12833404541015625, 0.1349773406982422, 0.14162063598632812, 0.14826393127441406, 0.1549072265625]}, "gradients/encoder.encoder.layers.14.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 7.0, 2.0, 4.0, 6.0, 6.0, 7.0, 10.0, 12.0, 12.0, 17.0, 17.0, 27.0, 24.0, 19.0, 34.0, 27.0, 34.0, 40.0, 92.0, 242.0, 1611.0, 31462.0, 935443.0, 75829.0, 2777.0, 323.0, 110.0, 57.0, 57.0, 39.0, 25.0, 32.0, 30.0, 13.0, 22.0, 24.0, 15.0, 9.0, 10.0, 8.0, 6.0, 10.0, 6.0, 1.0, 1.0, 3.0, 3.0, 0.0, 1.0, 1.0, 1.0, 2.0, 0.0, 0.0, 2.0], "bins": [-0.62890625, -0.6090164184570312, -0.5891265869140625, -0.5692367553710938, -0.549346923828125, -0.5294570922851562, -0.5095672607421875, -0.48967742919921875, -0.46978759765625, -0.44989776611328125, -0.4300079345703125, -0.41011810302734375, -0.390228271484375, -0.37033843994140625, -0.3504486083984375, -0.33055877685546875, -0.3106689453125, -0.29077911376953125, -0.2708892822265625, -0.25099945068359375, -0.231109619140625, -0.21121978759765625, -0.1913299560546875, -0.17144012451171875, -0.15155029296875, -0.13166046142578125, -0.1117706298828125, -0.09188079833984375, -0.071990966796875, -0.05210113525390625, -0.0322113037109375, -0.01232147216796875, 0.007568359375, 0.02745819091796875, 0.0473480224609375, 0.06723785400390625, 0.087127685546875, 0.10701751708984375, 0.1269073486328125, 0.14679718017578125, 0.16668701171875, 0.18657684326171875, 0.2064666748046875, 0.22635650634765625, 0.246246337890625, 0.26613616943359375, 0.2860260009765625, 0.30591583251953125, 0.3258056640625, 0.34569549560546875, 0.3655853271484375, 0.38547515869140625, 0.405364990234375, 0.42525482177734375, 0.4451446533203125, 0.46503448486328125, 0.48492431640625, 0.5048141479492188, 0.5247039794921875, 0.5445938110351562, 0.564483642578125, 0.5843734741210938, 0.6042633056640625, 0.6241531372070312, 0.64404296875]}, "gradients/encoder.encoder.layers.14.attention.v_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 2.0, 1.0, 1.0, 1.0, 0.0, 3.0, 3.0, 1.0, 1.0, 6.0, 10.0, 6.0, 8.0, 10.0, 10.0, 14.0, 24.0, 22.0, 13.0, 30.0, 32.0, 25.0, 38.0, 54.0, 38.0, 52.0, 46.0, 31.0, 28.0, 49.0, 38.0, 50.0, 41.0, 41.0, 31.0, 29.0, 25.0, 34.0, 19.0, 24.0, 27.0, 17.0, 17.0, 12.0, 12.0, 10.0, 7.0, 6.0, 6.0, 4.0, 2.0, 7.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.358154296875, -0.3470916748046875, -0.336029052734375, -0.3249664306640625, -0.31390380859375, -0.3028411865234375, -0.291778564453125, -0.2807159423828125, -0.2696533203125, -0.2585906982421875, -0.247528076171875, -0.2364654541015625, -0.22540283203125, -0.2143402099609375, -0.203277587890625, -0.1922149658203125, -0.18115234375, -0.1700897216796875, -0.159027099609375, -0.1479644775390625, -0.13690185546875, -0.1258392333984375, -0.114776611328125, -0.1037139892578125, -0.0926513671875, -0.0815887451171875, -0.070526123046875, -0.0594635009765625, -0.04840087890625, -0.0373382568359375, -0.026275634765625, -0.0152130126953125, -0.004150390625, 0.0069122314453125, 0.017974853515625, 0.0290374755859375, 0.04010009765625, 0.0511627197265625, 0.062225341796875, 0.0732879638671875, 0.0843505859375, 0.0954132080078125, 0.106475830078125, 0.1175384521484375, 0.12860107421875, 0.1396636962890625, 0.150726318359375, 0.1617889404296875, 0.1728515625, 0.1839141845703125, 0.194976806640625, 0.2060394287109375, 0.21710205078125, 0.2281646728515625, 0.239227294921875, 0.2502899169921875, 0.2613525390625, 0.2724151611328125, 0.283477783203125, 0.2945404052734375, 0.30560302734375, 0.3166656494140625, 0.327728271484375, 0.3387908935546875, 0.349853515625]}, "gradients/encoder.encoder.layers.14.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 3.0, 3.0, 3.0, 1.0, 4.0, 8.0, 0.0, 10.0, 21.0, 33.0, 53.0, 105.0, 202.0, 435.0, 1146.0, 4572.0, 56065.0, 943963.0, 36555.0, 3475.0, 1056.0, 435.0, 190.0, 100.0, 49.0, 37.0, 11.0, 9.0, 5.0, 4.0, 4.0, 3.0, 2.0, 2.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.00362396240234375, -0.0035122334957122803, -0.0034005045890808105, -0.003288775682449341, -0.003177046775817871, -0.0030653178691864014, -0.0029535889625549316, -0.002841860055923462, -0.002730131149291992, -0.0026184022426605225, -0.0025066733360290527, -0.002394944429397583, -0.0022832155227661133, -0.0021714866161346436, -0.002059757709503174, -0.001948028802871704, -0.0018362998962402344, -0.0017245709896087646, -0.001612842082977295, -0.0015011131763458252, -0.0013893842697143555, -0.0012776553630828857, -0.001165926456451416, -0.0010541975498199463, -0.0009424686431884766, -0.0008307397365570068, -0.0007190108299255371, -0.0006072819232940674, -0.0004955530166625977, -0.00038382411003112793, -0.0002720952033996582, -0.00016036629676818848, -4.863739013671875e-05, 6.309151649475098e-05, 0.0001748204231262207, 0.00028654932975769043, 0.00039827823638916016, 0.0005100071430206299, 0.0006217360496520996, 0.0007334649562835693, 0.0008451938629150391, 0.0009569227695465088, 0.0010686516761779785, 0.0011803805828094482, 0.001292109489440918, 0.0014038383960723877, 0.0015155673027038574, 0.0016272962093353271, 0.0017390251159667969, 0.0018507540225982666, 0.0019624829292297363, 0.002074211835861206, 0.0021859407424926758, 0.0022976696491241455, 0.0024093985557556152, 0.002521127462387085, 0.0026328563690185547, 0.0027445852756500244, 0.002856314182281494, 0.002968043088912964, 0.0030797719955444336, 0.0031915009021759033, 0.003303229808807373, 0.0034149587154388428, 0.0035266876220703125]}, "gradients/encoder.encoder.layers.14.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 1.0, 1.0, 0.0, 2.0, 1.0, 6.0, 5.0, 1.0, 8.0, 15.0, 8.0, 26.0, 22.0, 23.0, 48.0, 24.0, 72.0, 21.0, 48.0, 86.0, 42.0, 91.0, 34.0, 42.0, 66.0, 29.0, 66.0, 32.0, 29.0, 37.0, 14.0, 19.0, 30.0, 11.0, 19.0, 3.0, 6.0, 10.0, 4.0, 10.0, 1.0, 0.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.9802322387695312e-06, -2.8973445296287537e-06, -2.814456820487976e-06, -2.7315691113471985e-06, -2.648681402206421e-06, -2.5657936930656433e-06, -2.4829059839248657e-06, -2.400018274784088e-06, -2.3171305656433105e-06, -2.234242856502533e-06, -2.1513551473617554e-06, -2.0684674382209778e-06, -1.9855797290802e-06, -1.9026920199394226e-06, -1.819804310798645e-06, -1.7369166016578674e-06, -1.6540288925170898e-06, -1.5711411833763123e-06, -1.4882534742355347e-06, -1.405365765094757e-06, -1.3224780559539795e-06, -1.239590346813202e-06, -1.1567026376724243e-06, -1.0738149285316467e-06, -9.909272193908691e-07, -9.080395102500916e-07, -8.25151801109314e-07, -7.422640919685364e-07, -6.593763828277588e-07, -5.764886736869812e-07, -4.936009645462036e-07, -4.10713255405426e-07, -3.2782554626464844e-07, -2.4493783712387085e-07, -1.6205012798309326e-07, -7.916241884231567e-08, 3.725290298461914e-09, 8.66129994392395e-08, 1.695007085800171e-07, 2.523884177207947e-07, 3.3527612686157227e-07, 4.1816383600234985e-07, 5.010515451431274e-07, 5.83939254283905e-07, 6.668269634246826e-07, 7.497146725654602e-07, 8.326023817062378e-07, 9.154900908470154e-07, 9.98377799987793e-07, 1.0812655091285706e-06, 1.1641532182693481e-06, 1.2470409274101257e-06, 1.3299286365509033e-06, 1.412816345691681e-06, 1.4957040548324585e-06, 1.578591763973236e-06, 1.6614794731140137e-06, 1.7443671822547913e-06, 1.8272548913955688e-06, 1.9101426005363464e-06, 1.993030309677124e-06, 2.0759180188179016e-06, 2.158805727958679e-06, 2.2416934370994568e-06, 2.3245811462402344e-06]}, "gradients/encoder.encoder.layers.14.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 2.0, 2.0, 2.0, 9.0, 5.0, 6.0, 12.0, 14.0, 11.0, 22.0, 22.0, 26.0, 54.0, 60.0, 120.0, 400.0, 4732.0, 858019.0, 181543.0, 2749.0, 388.0, 125.0, 61.0, 28.0, 38.0, 30.0, 20.0, 15.0, 8.0, 9.0, 6.0, 7.0, 4.0, 5.0, 4.0, 1.0, 4.0, 0.0, 0.0, 1.0], "bins": [-0.004734039306640625, -0.004621028900146484, -0.004508018493652344, -0.004395008087158203, -0.0042819976806640625, -0.004168987274169922, -0.004055976867675781, -0.003942966461181641, -0.0038299560546875, -0.0037169456481933594, -0.0036039352416992188, -0.003490924835205078, -0.0033779144287109375, -0.003264904022216797, -0.0031518936157226562, -0.0030388832092285156, -0.002925872802734375, -0.0028128623962402344, -0.0026998519897460938, -0.002586841583251953, -0.0024738311767578125, -0.002360820770263672, -0.0022478103637695312, -0.0021347999572753906, -0.00202178955078125, -0.0019087791442871094, -0.0017957687377929688, -0.0016827583312988281, -0.0015697479248046875, -0.0014567375183105469, -0.0013437271118164062, -0.0012307167053222656, -0.001117706298828125, -0.0010046958923339844, -0.0008916854858398438, -0.0007786750793457031, -0.0006656646728515625, -0.0005526542663574219, -0.00043964385986328125, -0.0003266334533691406, -0.000213623046875, -0.00010061264038085938, 1.239776611328125e-05, 0.00012540817260742188, 0.0002384185791015625, 0.0003514289855957031, 0.00046443939208984375, 0.0005774497985839844, 0.000690460205078125, 0.0008034706115722656, 0.0009164810180664062, 0.0010294914245605469, 0.0011425018310546875, 0.0012555122375488281, 0.0013685226440429688, 0.0014815330505371094, 0.00159454345703125, 0.0017075538635253906, 0.0018205642700195312, 0.0019335746765136719, 0.0020465850830078125, 0.002159595489501953, 0.0022726058959960938, 0.0023856163024902344, 0.002498626708984375]}, "gradients/encoder.encoder.layers.14.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 4.0, 2.0, 3.0, 5.0, 4.0, 7.0, 6.0, 9.0, 9.0, 13.0, 20.0, 27.0, 37.0, 23.0, 50.0, 57.0, 65.0, 80.0, 76.0, 75.0, 78.0, 69.0, 66.0, 45.0, 49.0, 25.0, 22.0, 22.0, 11.0, 14.0, 12.0, 6.0, 5.0, 9.0, 2.0, 2.0, 2.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0013895034790039062, -0.001326635479927063, -0.0012637674808502197, -0.0012008994817733765, -0.0011380314826965332, -0.00107516348361969, -0.0010122954845428467, -0.0009494274854660034, -0.0008865594863891602, -0.0008236914873123169, -0.0007608234882354736, -0.0006979554891586304, -0.0006350874900817871, -0.0005722194910049438, -0.0005093514919281006, -0.0004464834928512573, -0.00038361549377441406, -0.0003207474946975708, -0.00025787949562072754, -0.00019501149654388428, -0.00013214349746704102, -6.927549839019775e-05, -6.407499313354492e-06, 5.646049976348877e-05, 0.00011932849884033203, 0.0001821964979171753, 0.00024506449699401855, 0.0003079324960708618, 0.0003708004951477051, 0.00043366849422454834, 0.0004965364933013916, 0.0005594044923782349, 0.0006222724914550781, 0.0006851404905319214, 0.0007480084896087646, 0.0008108764886856079, 0.0008737444877624512, 0.0009366124868392944, 0.0009994804859161377, 0.001062348484992981, 0.0011252164840698242, 0.0011880844831466675, 0.0012509524822235107, 0.001313820481300354, 0.0013766884803771973, 0.0014395564794540405, 0.0015024244785308838, 0.001565292477607727, 0.0016281604766845703, 0.0016910284757614136, 0.0017538964748382568, 0.0018167644739151, 0.0018796324729919434, 0.0019425004720687866, 0.00200536847114563, 0.002068236470222473, 0.0021311044692993164, 0.0021939724683761597, 0.002256840467453003, 0.002319708466529846, 0.0023825764656066895, 0.0024454444646835327, 0.002508312463760376, 0.0025711804628372192, 0.0026340484619140625]}, "gradients/encoder.encoder.layers.14.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 3.0, 7.0, 10.0, 10.0, 34.0, 57.0, 158.0, 413.0, 175.0, 71.0, 36.0, 15.0, 12.0, 6.0, 5.0, 4.0, 2.0, 1.0, 2.0], "bins": [-3.3075788021087646, -3.2447214126586914, -3.181863784790039, -3.119006395339966, -3.0561490058898926, -2.9932916164398193, -2.930433988571167, -2.8675765991210938, -2.8047192096710205, -2.7418618202209473, -2.679004192352295, -2.6161468029022217, -2.5532894134521484, -2.490432024002075, -2.427574396133423, -2.3647170066833496, -2.3018593788146973, -2.239001989364624, -2.1761443614959717, -2.1132869720458984, -2.050429582595825, -1.9875720739364624, -1.9247145652770996, -1.8618571758270264, -1.7989997863769531, -1.7361422777175903, -1.673284888267517, -1.6104273796081543, -1.547569990158081, -1.4847124814987183, -1.4218549728393555, -1.3589975833892822, -1.296140432357788, -1.2332829236984253, -1.170425534248352, -1.1075680255889893, -1.044710636138916, -0.9818531274795532, -0.9189956784248352, -0.8561382293701172, -0.7932807207107544, -0.7304232716560364, -0.6675658226013184, -0.6047083139419556, -0.5418509244918823, -0.4789934456348419, -0.4161359667778015, -0.3532785177230835, -0.2904210686683655, -0.22756361961364746, -0.16470615565776825, -0.10184869170188904, -0.03899124264717102, 0.023866206407546997, 0.0867236852645874, 0.14958113431930542, 0.21243858337402344, 0.27529603242874146, 0.3381534814834595, 0.4010109603404999, 0.4638684093952179, 0.5267258882522583, 0.5895833373069763, 0.6524407863616943, 0.7152982354164124]}, "gradients/encoder.encoder.layers.14.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 2.0, 2.0, 1.0, 2.0, 2.0, 3.0, 4.0, 4.0, 7.0, 8.0, 6.0, 7.0, 15.0, 15.0, 17.0, 19.0, 23.0, 21.0, 30.0, 41.0, 23.0, 34.0, 36.0, 40.0, 43.0, 33.0, 46.0, 41.0, 45.0, 54.0, 39.0, 34.0, 33.0, 40.0, 31.0, 23.0, 24.0, 18.0, 30.0, 17.0, 14.0, 12.0, 18.0, 13.0, 13.0, 10.0, 5.0, 4.0, 5.0, 3.0, 3.0, 2.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.54013991355896, -1.489830732345581, -1.4395214319229126, -1.3892122507095337, -1.3389030694961548, -1.2885937690734863, -1.2382845878601074, -1.1879754066467285, -1.1376662254333496, -1.0873570442199707, -1.0370477437973022, -0.9867385625839233, -0.9364293813705444, -0.8861201405525208, -0.8358108997344971, -0.7855017185211182, -0.7351924180984497, -0.684883177280426, -0.6345739960670471, -0.5842647552490234, -0.5339555740356445, -0.48364633321762085, -0.43333709239959717, -0.3830278813838959, -0.3327186703681946, -0.2824094593524933, -0.2321002334356308, -0.1817910075187683, -0.13148179650306702, -0.08117258548736572, -0.03086334466934204, 0.019445866346359253, 0.06975507736206055, 0.12006429582834244, 0.17037351429462433, 0.22068274021148682, 0.2709919512271881, 0.3213011622428894, 0.3716104030609131, 0.4219196140766144, 0.4722288250923157, 0.5225380659103394, 0.5728472471237183, 0.6231564879417419, 0.6734657287597656, 0.7237749099731445, 0.7740841507911682, 0.8243933916091919, 0.8747025728225708, 0.9250118136405945, 0.9753209948539734, 1.025630235671997, 1.075939416885376, 1.1262485980987549, 1.1765578985214233, 1.2268670797348022, 1.2771763801574707, 1.3274855613708496, 1.377794861793518, 1.428104043006897, 1.4784132242202759, 1.5287225246429443, 1.5790317058563232, 1.6293408870697021, 1.679650068283081]}, "gradients/encoder.encoder.layers.13.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 2.0, 3.0, 1.0, 2.0, 2.0, 2.0, 6.0, 7.0, 9.0, 12.0, 23.0, 33.0, 61.0, 120.0, 247.0, 617.0, 2355.0, 4181028.0, 7742.0, 1262.0, 403.0, 169.0, 86.0, 43.0, 18.0, 14.0, 14.0, 5.0, 6.0, 2.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.74609375, -2.678741455078125, -2.61138916015625, -2.544036865234375, -2.4766845703125, -2.409332275390625, -2.34197998046875, -2.274627685546875, -2.207275390625, -2.139923095703125, -2.07257080078125, -2.005218505859375, -1.9378662109375, -1.870513916015625, -1.80316162109375, -1.735809326171875, -1.66845703125, -1.601104736328125, -1.53375244140625, -1.466400146484375, -1.3990478515625, -1.331695556640625, -1.26434326171875, -1.196990966796875, -1.129638671875, -1.062286376953125, -0.99493408203125, -0.927581787109375, -0.8602294921875, -0.792877197265625, -0.72552490234375, -0.658172607421875, -0.5908203125, -0.523468017578125, -0.45611572265625, -0.388763427734375, -0.3214111328125, -0.254058837890625, -0.18670654296875, -0.119354248046875, -0.052001953125, 0.015350341796875, 0.08270263671875, 0.150054931640625, 0.2174072265625, 0.284759521484375, 0.35211181640625, 0.419464111328125, 0.48681640625, 0.554168701171875, 0.62152099609375, 0.688873291015625, 0.7562255859375, 0.823577880859375, 0.89093017578125, 0.958282470703125, 1.025634765625, 1.092987060546875, 1.16033935546875, 1.227691650390625, 1.2950439453125, 1.362396240234375, 1.42974853515625, 1.497100830078125, 1.564453125]}, "gradients/encoder.encoder.layers.13.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 3.0, 0.0, 1.0, 0.0, 2.0, 1.0, 5.0, 4.0, 5.0, 6.0, 17.0, 25.0, 36.0, 64.0, 89.0, 119.0, 163.0, 147.0, 120.0, 72.0, 47.0, 37.0, 21.0, 8.0, 7.0, 11.0, 3.0, 3.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.269287109375, -0.2626781463623047, -0.2560691833496094, -0.24946022033691406, -0.24285125732421875, -0.23624229431152344, -0.22963333129882812, -0.2230243682861328, -0.2164154052734375, -0.2098064422607422, -0.20319747924804688, -0.19658851623535156, -0.18997955322265625, -0.18337059020996094, -0.17676162719726562, -0.1701526641845703, -0.163543701171875, -0.1569347381591797, -0.15032577514648438, -0.14371681213378906, -0.13710784912109375, -0.13049888610839844, -0.12388992309570312, -0.11728096008300781, -0.1106719970703125, -0.10406303405761719, -0.09745407104492188, -0.09084510803222656, -0.08423614501953125, -0.07762718200683594, -0.07101821899414062, -0.06440925598144531, -0.05780029296875, -0.05119132995605469, -0.044582366943359375, -0.03797340393066406, -0.03136444091796875, -0.024755477905273438, -0.018146514892578125, -0.011537551879882812, -0.0049285888671875, 0.0016803741455078125, 0.008289337158203125, 0.014898300170898438, 0.02150726318359375, 0.028116226196289062, 0.034725189208984375, 0.04133415222167969, 0.047943115234375, 0.05455207824707031, 0.061161041259765625, 0.06777000427246094, 0.07437896728515625, 0.08098793029785156, 0.08759689331054688, 0.09420585632324219, 0.1008148193359375, 0.10742378234863281, 0.11403274536132812, 0.12064170837402344, 0.12725067138671875, 0.13385963439941406, 0.14046859741210938, 0.1470775604248047, 0.1536865234375]}, "gradients/encoder.encoder.layers.13.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 3.0, 5.0, 4.0, 4.0, 12.0, 56.0, 149.0, 590.0, 4622.0, 4174184.0, 13214.0, 1100.0, 220.0, 86.0, 19.0, 3.0, 5.0, 2.0, 9.0, 2.0, 2.0, 1.0, 0.0, 3.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.908203125, -0.8776397705078125, -0.847076416015625, -0.8165130615234375, -0.78594970703125, -0.7553863525390625, -0.724822998046875, -0.6942596435546875, -0.6636962890625, -0.6331329345703125, -0.602569580078125, -0.5720062255859375, -0.54144287109375, -0.5108795166015625, -0.480316162109375, -0.4497528076171875, -0.419189453125, -0.3886260986328125, -0.358062744140625, -0.3274993896484375, -0.29693603515625, -0.2663726806640625, -0.235809326171875, -0.2052459716796875, -0.1746826171875, -0.1441192626953125, -0.113555908203125, -0.0829925537109375, -0.05242919921875, -0.0218658447265625, 0.008697509765625, 0.0392608642578125, 0.06982421875, 0.1003875732421875, 0.130950927734375, 0.1615142822265625, 0.19207763671875, 0.2226409912109375, 0.253204345703125, 0.2837677001953125, 0.3143310546875, 0.3448944091796875, 0.375457763671875, 0.4060211181640625, 0.43658447265625, 0.4671478271484375, 0.497711181640625, 0.5282745361328125, 0.558837890625, 0.5894012451171875, 0.619964599609375, 0.6505279541015625, 0.68109130859375, 0.7116546630859375, 0.742218017578125, 0.7727813720703125, 0.8033447265625, 0.8339080810546875, 0.864471435546875, 0.8950347900390625, 0.92559814453125, 0.9561614990234375, 0.986724853515625, 1.0172882080078125, 1.0478515625]}, "gradients/encoder.encoder.layers.13.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 3.0, 0.0, 1.0, 2.0, 2.0, 8.0, 1.0, 2.0, 1.0, 4.0, 6.0, 6.0, 10.0, 67.0, 3923.0, 26.0, 5.0, 7.0, 3.0, 1.0, 1.0, 3.0, 2.0, 3.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.2301025390625, -0.223388671875, -0.2166748046875, -0.2099609375, -0.2032470703125, -0.196533203125, -0.1898193359375, -0.18310546875, -0.1763916015625, -0.169677734375, -0.1629638671875, -0.15625, -0.1495361328125, -0.142822265625, -0.1361083984375, -0.12939453125, -0.1226806640625, -0.115966796875, -0.1092529296875, -0.1025390625, -0.0958251953125, -0.089111328125, -0.0823974609375, -0.07568359375, -0.0689697265625, -0.062255859375, -0.0555419921875, -0.048828125, -0.0421142578125, -0.035400390625, -0.0286865234375, -0.02197265625, -0.0152587890625, -0.008544921875, -0.0018310546875, 0.0048828125, 0.0115966796875, 0.018310546875, 0.0250244140625, 0.03173828125, 0.0384521484375, 0.045166015625, 0.0518798828125, 0.05859375, 0.0653076171875, 0.072021484375, 0.0787353515625, 0.08544921875, 0.0921630859375, 0.098876953125, 0.1055908203125, 0.1123046875, 0.1190185546875, 0.125732421875, 0.1324462890625, 0.13916015625, 0.1458740234375, 0.152587890625, 0.1593017578125, 0.166015625, 0.1727294921875, 0.179443359375, 0.1861572265625, 0.19287109375, 0.1995849609375]}, "gradients/encoder.encoder.layers.13.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 4.0, 15.0, 40.0, 118.0, 625.0, 186.0, 17.0, 5.0, 3.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.9083490967750549, -0.8900933265686035, -0.8718374967575073, -0.8535817265510559, -0.8353259563446045, -0.8170701265335083, -0.7988143563270569, -0.7805585861206055, -0.7623027563095093, -0.7440469861030579, -0.7257911562919617, -0.7075353860855103, -0.6892796158790588, -0.6710238456726074, -0.6527680158615112, -0.6345122456550598, -0.6162564754486084, -0.598000705242157, -0.5797448754310608, -0.5614891052246094, -0.543233335018158, -0.5249775648117065, -0.5067217350006104, -0.48846596479415894, -0.47021016478538513, -0.45195436477661133, -0.4336985945701599, -0.4154427945613861, -0.3971869945526123, -0.3789312243461609, -0.3606754243373871, -0.3424196243286133, -0.32416391372680664, -0.30590811371803284, -0.2876523435115814, -0.2693965435028076, -0.2511407732963562, -0.2328849732875824, -0.2146291732788086, -0.19637338817119598, -0.17811760306358337, -0.15986181795597076, -0.14160603284835815, -0.12335023283958435, -0.10509444773197174, -0.08683866262435913, -0.06858287006616592, -0.05032707750797272, -0.03207129240036011, -0.013815503567457199, 0.004440285265445709, 0.022696074098348618, 0.040951862931251526, 0.059207648038864136, 0.07746344059705734, 0.09571923315525055, 0.11397501826286316, 0.13223080337047577, 0.15048658847808838, 0.16874238848686218, 0.1869981735944748, 0.2052539587020874, 0.2235097587108612, 0.24176554381847382, 0.2600213289260864]}, "gradients/encoder.encoder.layers.13.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 3.0, 1.0, 0.0, 5.0, 8.0, 10.0, 9.0, 16.0, 13.0, 16.0, 11.0, 30.0, 21.0, 37.0, 42.0, 39.0, 47.0, 53.0, 55.0, 55.0, 50.0, 39.0, 55.0, 43.0, 60.0, 46.0, 33.0, 33.0, 24.0, 26.0, 26.0, 31.0, 21.0, 14.0, 11.0, 9.0, 10.0, 7.0, 3.0, 2.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.26340508460998535, -0.25566452741622925, -0.24792398512363434, -0.24018344283103943, -0.23244288563728333, -0.22470234334468842, -0.2169618010520935, -0.2092212438583374, -0.2014807015657425, -0.19374015927314758, -0.18599960207939148, -0.17825905978679657, -0.17051851749420166, -0.16277796030044556, -0.15503741800785065, -0.14729687571525574, -0.13955631852149963, -0.13181577622890472, -0.12407521903514862, -0.11633467674255371, -0.1085941269993782, -0.1008535772562027, -0.09311303496360779, -0.08537248522043228, -0.07763193547725677, -0.06989138573408127, -0.06215083971619606, -0.05441029369831085, -0.046669743955135345, -0.03892919421195984, -0.03118864819407463, -0.023448102176189423, -0.015707552433013916, -0.007967004552483559, -0.0002264566719532013, 0.007514091208577156, 0.015254639089107513, 0.02299518883228302, 0.030735734850168228, 0.038476280868053436, 0.04621683061122894, 0.05395738035440445, 0.06169792637228966, 0.06943847239017487, 0.07717902213335037, 0.08491957187652588, 0.09266011416912079, 0.1004006639122963, 0.1081412136554718, 0.11588176339864731, 0.12362231314182281, 0.13136285543441772, 0.13910341262817383, 0.14684395492076874, 0.15458449721336365, 0.16232505440711975, 0.17006559669971466, 0.17780613899230957, 0.18554669618606567, 0.19328723847866058, 0.2010277807712555, 0.2087683379650116, 0.2165088802576065, 0.22424942255020142, 0.23198997974395752]}, "gradients/encoder.encoder.layers.13.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 4.0, 2.0, 1.0, 2.0, 3.0, 0.0, 9.0, 12.0, 9.0, 19.0, 33.0, 28.0, 43.0, 71.0, 108.0, 182.0, 411.0, 773.0, 1895.0, 5646.0, 25417.0, 300360.0, 668597.0, 33697.0, 7122.0, 2268.0, 815.0, 443.0, 243.0, 115.0, 79.0, 47.0, 35.0, 16.0, 19.0, 12.0, 13.0, 2.0, 5.0, 1.0, 2.0, 1.0, 2.0, 2.0, 1.0, 1.0, 0.0, 3.0, 0.0, 1.0], "bins": [-0.5625, -0.5469093322753906, -0.5313186645507812, -0.5157279968261719, -0.5001373291015625, -0.4845466613769531, -0.46895599365234375, -0.4533653259277344, -0.437774658203125, -0.4221839904785156, -0.40659332275390625, -0.3910026550292969, -0.3754119873046875, -0.3598213195800781, -0.34423065185546875, -0.3286399841308594, -0.31304931640625, -0.2974586486816406, -0.28186798095703125, -0.2662773132324219, -0.2506866455078125, -0.23509597778320312, -0.21950531005859375, -0.20391464233398438, -0.188323974609375, -0.17273330688476562, -0.15714263916015625, -0.14155197143554688, -0.1259613037109375, -0.11037063598632812, -0.09477996826171875, -0.07918930053710938, -0.0635986328125, -0.048007965087890625, -0.03241729736328125, -0.016826629638671875, -0.0012359619140625, 0.014354705810546875, 0.02994537353515625, 0.045536041259765625, 0.061126708984375, 0.07671737670898438, 0.09230804443359375, 0.10789871215820312, 0.1234893798828125, 0.13908004760742188, 0.15467071533203125, 0.17026138305664062, 0.18585205078125, 0.20144271850585938, 0.21703338623046875, 0.23262405395507812, 0.2482147216796875, 0.2638053894042969, 0.27939605712890625, 0.2949867248535156, 0.310577392578125, 0.3261680603027344, 0.34175872802734375, 0.3573493957519531, 0.3729400634765625, 0.3885307312011719, 0.40412139892578125, 0.4197120666503906, 0.435302734375]}, "gradients/encoder.encoder.layers.13.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 1.0, 1.0, 0.0, 2.0, 1.0, 7.0, 2.0, 5.0, 9.0, 16.0, 21.0, 45.0, 59.0, 98.0, 120.0, 166.0, 147.0, 105.0, 78.0, 45.0, 34.0, 22.0, 10.0, 4.0, 9.0, 5.0, 2.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.26953125, -0.26290321350097656, -0.2562751770019531, -0.2496471405029297, -0.24301910400390625, -0.2363910675048828, -0.22976303100585938, -0.22313499450683594, -0.2165069580078125, -0.20987892150878906, -0.20325088500976562, -0.1966228485107422, -0.18999481201171875, -0.1833667755126953, -0.17673873901367188, -0.17011070251464844, -0.163482666015625, -0.15685462951660156, -0.15022659301757812, -0.1435985565185547, -0.13697052001953125, -0.1303424835205078, -0.12371444702148438, -0.11708641052246094, -0.1104583740234375, -0.10383033752441406, -0.09720230102539062, -0.09057426452636719, -0.08394622802734375, -0.07731819152832031, -0.07069015502929688, -0.06406211853027344, -0.05743408203125, -0.05080604553222656, -0.044178009033203125, -0.03754997253417969, -0.03092193603515625, -0.024293899536132812, -0.017665863037109375, -0.011037826538085938, -0.0044097900390625, 0.0022182464599609375, 0.008846282958984375, 0.015474319458007812, 0.02210235595703125, 0.028730392456054688, 0.035358428955078125, 0.04198646545410156, 0.048614501953125, 0.05524253845214844, 0.061870574951171875, 0.06849861145019531, 0.07512664794921875, 0.08175468444824219, 0.08838272094726562, 0.09501075744628906, 0.1016387939453125, 0.10826683044433594, 0.11489486694335938, 0.12152290344238281, 0.12815093994140625, 0.1347789764404297, 0.14140701293945312, 0.14803504943847656, 0.1546630859375]}, "gradients/encoder.encoder.layers.13.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 2.0, 1.0, 0.0, 1.0, 3.0, 1.0, 0.0, 3.0, 3.0, 5.0, 16.0, 17.0, 17.0, 18.0, 26.0, 26.0, 28.0, 37.0, 52.0, 77.0, 124.0, 211.0, 437.0, 1436.0, 9889.0, 512642.0, 511317.0, 9776.0, 1353.0, 411.0, 183.0, 115.0, 82.0, 58.0, 34.0, 33.0, 22.0, 25.0, 20.0, 19.0, 10.0, 11.0, 6.0, 6.0, 7.0, 3.0, 3.0, 0.0, 1.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.56640625, -0.546875, -0.52734375, -0.5078125, -0.48828125, -0.46875, -0.44921875, -0.4296875, -0.41015625, -0.390625, -0.37109375, -0.3515625, -0.33203125, -0.3125, -0.29296875, -0.2734375, -0.25390625, -0.234375, -0.21484375, -0.1953125, -0.17578125, -0.15625, -0.13671875, -0.1171875, -0.09765625, -0.078125, -0.05859375, -0.0390625, -0.01953125, 0.0, 0.01953125, 0.0390625, 0.05859375, 0.078125, 0.09765625, 0.1171875, 0.13671875, 0.15625, 0.17578125, 0.1953125, 0.21484375, 0.234375, 0.25390625, 0.2734375, 0.29296875, 0.3125, 0.33203125, 0.3515625, 0.37109375, 0.390625, 0.41015625, 0.4296875, 0.44921875, 0.46875, 0.48828125, 0.5078125, 0.52734375, 0.546875, 0.56640625, 0.5859375, 0.60546875, 0.625, 0.64453125, 0.6640625, 0.68359375]}, "gradients/encoder.encoder.layers.13.attention.v_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 1.0, 0.0, 2.0, 4.0, 6.0, 7.0, 5.0, 12.0, 10.0, 18.0, 18.0, 23.0, 20.0, 29.0, 26.0, 34.0, 36.0, 38.0, 34.0, 39.0, 41.0, 60.0, 45.0, 50.0, 50.0, 41.0, 44.0, 53.0, 36.0, 33.0, 27.0, 21.0, 23.0, 22.0, 23.0, 18.0, 16.0, 17.0, 16.0, 3.0, 3.0, 3.0, 0.0, 1.0, 3.0, 1.0, 0.0, 1.0, 2.0, 1.0, 0.0, 1.0], "bins": [-0.391357421875, -0.3801765441894531, -0.36899566650390625, -0.3578147888183594, -0.3466339111328125, -0.3354530334472656, -0.32427215576171875, -0.3130912780761719, -0.301910400390625, -0.2907295227050781, -0.27954864501953125, -0.2683677673339844, -0.2571868896484375, -0.24600601196289062, -0.23482513427734375, -0.22364425659179688, -0.21246337890625, -0.20128250122070312, -0.19010162353515625, -0.17892074584960938, -0.1677398681640625, -0.15655899047851562, -0.14537811279296875, -0.13419723510742188, -0.123016357421875, -0.11183547973632812, -0.10065460205078125, -0.08947372436523438, -0.0782928466796875, -0.06711196899414062, -0.05593109130859375, -0.044750213623046875, -0.0335693359375, -0.022388458251953125, -0.01120758056640625, -2.6702880859375e-05, 0.0111541748046875, 0.022335052490234375, 0.03351593017578125, 0.044696807861328125, 0.055877685546875, 0.06705856323242188, 0.07823944091796875, 0.08942031860351562, 0.1006011962890625, 0.11178207397460938, 0.12296295166015625, 0.13414382934570312, 0.14532470703125, 0.15650558471679688, 0.16768646240234375, 0.17886734008789062, 0.1900482177734375, 0.20122909545898438, 0.21240997314453125, 0.22359085083007812, 0.234771728515625, 0.24595260620117188, 0.25713348388671875, 0.2683143615722656, 0.2794952392578125, 0.2906761169433594, 0.30185699462890625, 0.3130378723144531, 0.32421875]}, "gradients/encoder.encoder.layers.13.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 3.0, 4.0, 1.0, 3.0, 6.0, 7.0, 4.0, 6.0, 16.0, 14.0, 34.0, 24.0, 39.0, 54.0, 77.0, 113.0, 210.0, 397.0, 865.0, 2034.0, 6322.0, 29526.0, 515579.0, 454929.0, 28265.0, 6243.0, 2025.0, 779.0, 360.0, 206.0, 120.0, 97.0, 51.0, 35.0, 23.0, 18.0, 22.0, 13.0, 12.0, 6.0, 5.0, 2.0, 3.0, 6.0, 3.0, 3.0, 1.0, 2.0, 3.0], "bins": [-0.003955841064453125, -0.0038517117500305176, -0.00374758243560791, -0.0036434531211853027, -0.0035393238067626953, -0.003435194492340088, -0.0033310651779174805, -0.003226935863494873, -0.0031228065490722656, -0.003018677234649658, -0.0029145479202270508, -0.0028104186058044434, -0.002706289291381836, -0.0026021599769592285, -0.002498030662536621, -0.0023939013481140137, -0.0022897720336914062, -0.002185642719268799, -0.0020815134048461914, -0.001977384090423584, -0.0018732547760009766, -0.0017691254615783691, -0.0016649961471557617, -0.0015608668327331543, -0.0014567375183105469, -0.0013526082038879395, -0.001248478889465332, -0.0011443495750427246, -0.0010402202606201172, -0.0009360909461975098, -0.0008319616317749023, -0.0007278323173522949, -0.0006237030029296875, -0.0005195736885070801, -0.00041544437408447266, -0.00031131505966186523, -0.0002071857452392578, -0.00010305643081665039, 1.0728836059570312e-06, 0.00010520219802856445, 0.00020933151245117188, 0.0003134608268737793, 0.0004175901412963867, 0.0005217194557189941, 0.0006258487701416016, 0.000729978084564209, 0.0008341073989868164, 0.0009382367134094238, 0.0010423660278320312, 0.0011464953422546387, 0.001250624656677246, 0.0013547539710998535, 0.001458883285522461, 0.0015630125999450684, 0.0016671419143676758, 0.0017712712287902832, 0.0018754005432128906, 0.001979529857635498, 0.0020836591720581055, 0.002187788486480713, 0.0022919178009033203, 0.0023960471153259277, 0.002500176429748535, 0.0026043057441711426, 0.00270843505859375]}, "gradients/encoder.encoder.layers.13.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 3.0, 3.0, 3.0, 2.0, 11.0, 6.0, 5.0, 4.0, 5.0, 13.0, 27.0, 15.0, 20.0, 29.0, 53.0, 31.0, 31.0, 26.0, 41.0, 98.0, 35.0, 34.0, 48.0, 38.0, 69.0, 47.0, 43.0, 27.0, 65.0, 28.0, 17.0, 20.0, 19.0, 28.0, 13.0, 12.0, 7.0, 20.0, 7.0, 8.0, 1.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.205371856689453e-06, -2.132728695869446e-06, -2.0600855350494385e-06, -1.987442374229431e-06, -1.914799213409424e-06, -1.8421560525894165e-06, -1.7695128917694092e-06, -1.6968697309494019e-06, -1.6242265701293945e-06, -1.5515834093093872e-06, -1.4789402484893799e-06, -1.4062970876693726e-06, -1.3336539268493652e-06, -1.261010766029358e-06, -1.1883676052093506e-06, -1.1157244443893433e-06, -1.043081283569336e-06, -9.704381227493286e-07, -8.977949619293213e-07, -8.25151801109314e-07, -7.525086402893066e-07, -6.798654794692993e-07, -6.07222318649292e-07, -5.345791578292847e-07, -4.6193599700927734e-07, -3.8929283618927e-07, -3.166496753692627e-07, -2.4400651454925537e-07, -1.7136335372924805e-07, -9.872019290924072e-08, -2.60770320892334e-08, 4.6566128730773926e-08, 1.1920928955078125e-07, 1.9185245037078857e-07, 2.644956111907959e-07, 3.371387720108032e-07, 4.0978193283081055e-07, 4.824250936508179e-07, 5.550682544708252e-07, 6.277114152908325e-07, 7.003545761108398e-07, 7.729977369308472e-07, 8.456408977508545e-07, 9.182840585708618e-07, 9.909272193908691e-07, 1.0635703802108765e-06, 1.1362135410308838e-06, 1.2088567018508911e-06, 1.2814998626708984e-06, 1.3541430234909058e-06, 1.426786184310913e-06, 1.4994293451309204e-06, 1.5720725059509277e-06, 1.644715666770935e-06, 1.7173588275909424e-06, 1.7900019884109497e-06, 1.862645149230957e-06, 1.9352883100509644e-06, 2.0079314708709717e-06, 2.080574631690979e-06, 2.1532177925109863e-06, 2.2258609533309937e-06, 2.298504114151001e-06, 2.3711472749710083e-06, 2.4437904357910156e-06]}, "gradients/encoder.encoder.layers.13.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 2.0, 5.0, 2.0, 6.0, 6.0, 6.0, 4.0, 7.0, 9.0, 14.0, 16.0, 21.0, 41.0, 48.0, 101.0, 227.0, 730.0, 4962.0, 767166.0, 270199.0, 3875.0, 604.0, 222.0, 101.0, 60.0, 45.0, 26.0, 11.0, 16.0, 7.0, 6.0, 2.0, 3.0, 2.0, 1.0, 5.0, 1.0, 3.0, 0.0, 5.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0076141357421875, -0.007341742515563965, -0.00706934928894043, -0.0067969560623168945, -0.006524562835693359, -0.006252169609069824, -0.005979776382446289, -0.005707383155822754, -0.005434989929199219, -0.005162596702575684, -0.0048902034759521484, -0.004617810249328613, -0.004345417022705078, -0.004073023796081543, -0.003800630569458008, -0.0035282373428344727, -0.0032558441162109375, -0.0029834508895874023, -0.002711057662963867, -0.002438664436340332, -0.002166271209716797, -0.0018938779830932617, -0.0016214847564697266, -0.0013490915298461914, -0.0010766983032226562, -0.0008043050765991211, -0.0005319118499755859, -0.0002595186233520508, 1.2874603271484375e-05, 0.00028526782989501953, 0.0005576610565185547, 0.0008300542831420898, 0.001102447509765625, 0.0013748407363891602, 0.0016472339630126953, 0.0019196271896362305, 0.0021920204162597656, 0.0024644136428833008, 0.002736806869506836, 0.003009200096130371, 0.0032815933227539062, 0.0035539865493774414, 0.0038263797760009766, 0.004098773002624512, 0.004371166229248047, 0.004643559455871582, 0.004915952682495117, 0.005188345909118652, 0.0054607391357421875, 0.005733132362365723, 0.006005525588989258, 0.006277918815612793, 0.006550312042236328, 0.006822705268859863, 0.0070950984954833984, 0.007367491722106934, 0.007639884948730469, 0.007912278175354004, 0.008184671401977539, 0.008457064628601074, 0.00872945785522461, 0.009001851081848145, 0.00927424430847168, 0.009546637535095215, 0.00981903076171875]}, "gradients/encoder.encoder.layers.13.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 5.0, 0.0, 3.0, 2.0, 4.0, 1.0, 2.0, 2.0, 2.0, 6.0, 6.0, 12.0, 6.0, 17.0, 27.0, 33.0, 40.0, 64.0, 95.0, 92.0, 101.0, 110.0, 92.0, 88.0, 44.0, 39.0, 24.0, 26.0, 11.0, 10.0, 9.0, 7.0, 5.0, 4.0, 6.0, 6.0, 6.0, 1.0, 5.0, 2.0, 0.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00562286376953125, -0.005466818809509277, -0.005310773849487305, -0.005154728889465332, -0.004998683929443359, -0.004842638969421387, -0.004686594009399414, -0.004530549049377441, -0.004374504089355469, -0.004218459129333496, -0.0040624141693115234, -0.003906369209289551, -0.003750324249267578, -0.0035942792892456055, -0.003438234329223633, -0.00328218936920166, -0.0031261444091796875, -0.002970099449157715, -0.002814054489135742, -0.0026580095291137695, -0.002501964569091797, -0.0023459196090698242, -0.0021898746490478516, -0.002033829689025879, -0.0018777847290039062, -0.0017217397689819336, -0.001565694808959961, -0.0014096498489379883, -0.0012536048889160156, -0.001097559928894043, -0.0009415149688720703, -0.0007854700088500977, -0.000629425048828125, -0.00047338008880615234, -0.0003173351287841797, -0.00016129016876220703, -5.245208740234375e-06, 0.00015079975128173828, 0.00030684471130371094, 0.0004628896713256836, 0.0006189346313476562, 0.0007749795913696289, 0.0009310245513916016, 0.0010870695114135742, 0.0012431144714355469, 0.0013991594314575195, 0.0015552043914794922, 0.0017112493515014648, 0.0018672943115234375, 0.00202333927154541, 0.002179384231567383, 0.0023354291915893555, 0.002491474151611328, 0.0026475191116333008, 0.0028035640716552734, 0.002959609031677246, 0.0031156539916992188, 0.0032716989517211914, 0.003427743911743164, 0.0035837888717651367, 0.0037398338317871094, 0.003895878791809082, 0.004051923751831055, 0.004207968711853027, 0.004364013671875]}, "gradients/encoder.encoder.layers.13.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 13.0, 962.0, 46.0, 2.0], "bins": [-32.29728317260742, -31.77290916442871, -31.248537063598633, -30.724163055419922, -30.199790954589844, -29.675416946411133, -29.151042938232422, -28.626670837402344, -28.102296829223633, -27.577922821044922, -27.053550720214844, -26.529176712036133, -26.004804611206055, -25.480430603027344, -24.956058502197266, -24.431684494018555, -23.907310485839844, -23.382936477661133, -22.858564376831055, -22.334190368652344, -21.809818267822266, -21.285444259643555, -20.761070251464844, -20.236698150634766, -19.712326049804688, -19.187952041625977, -18.6635799407959, -18.139205932617188, -17.61483383178711, -17.0904598236084, -16.566085815429688, -16.04171371459961, -15.517338752746582, -14.992965698242188, -14.468592643737793, -13.944219589233398, -13.419845581054688, -12.895472526550293, -12.371099472045898, -11.846726417541504, -11.32235336303711, -10.797980308532715, -10.27360725402832, -9.74923324584961, -9.224860191345215, -8.70048713684082, -8.176114082336426, -7.651741027832031, -7.12736701965332, -6.602993965148926, -6.078620433807373, -5.5542473793029785, -5.029873847961426, -4.505500793457031, -3.9811277389526367, -3.456754446029663, -2.9323813915252686, -2.408008098602295, -1.8836349248886108, -1.3592617511749268, -0.8348884582519531, -0.3105151653289795, 0.21385788917541504, 0.7382311820983887, 1.2626044750213623]}, "gradients/encoder.encoder.layers.13.layer_norm.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 2.0, 3.0, 0.0, 2.0, 3.0, 3.0, 7.0, 7.0, 5.0, 7.0, 4.0, 11.0, 9.0, 11.0, 20.0, 11.0, 15.0, 17.0, 23.0, 22.0, 23.0, 25.0, 37.0, 30.0, 40.0, 32.0, 35.0, 35.0, 43.0, 34.0, 46.0, 56.0, 37.0, 49.0, 35.0, 43.0, 34.0, 32.0, 21.0, 30.0, 17.0, 19.0, 13.0, 8.0, 10.0, 9.0, 7.0, 6.0, 10.0, 6.0, 3.0, 2.0, 4.0, 0.0, 3.0, 1.0, 0.0, 0.0, 1.0, 1.0, 2.0], "bins": [-1.5173245668411255, -1.4699854850769043, -1.422646403312683, -1.375307321548462, -1.3279681205749512, -1.28062903881073, -1.2332899570465088, -1.1859508752822876, -1.1386117935180664, -1.0912727117538452, -1.043933629989624, -0.9965944886207581, -0.9492554068565369, -0.9019163250923157, -0.8545771837234497, -0.8072381019592285, -0.7598990201950073, -0.7125599384307861, -0.6652208566665649, -0.617881715297699, -0.5705426335334778, -0.5232035517692566, -0.475864440202713, -0.42852532863616943, -0.38118624687194824, -0.33384716510772705, -0.28650805354118347, -0.2391689568758011, -0.1918298602104187, -0.14449076354503632, -0.09715166687965393, -0.04981255531311035, -0.00247347354888916, 0.044865623116493225, 0.09220471978187561, 0.139543816447258, 0.18688291311264038, 0.23422200977802277, 0.28156110644340515, 0.32890021800994873, 0.3762392997741699, 0.4235783815383911, 0.4709174931049347, 0.5182566046714783, 0.5655956864356995, 0.6129347681999207, 0.6602739095687866, 0.7076129913330078, 0.754952073097229, 0.8022911548614502, 0.8496302366256714, 0.8969693779945374, 0.9443084597587585, 0.9916475415229797, 1.0389866828918457, 1.086325764656067, 1.133664846420288, 1.1810039281845093, 1.2283430099487305, 1.2756820917129517, 1.3230211734771729, 1.3703603744506836, 1.4176994562149048, 1.465038537979126, 1.5123776197433472]}, "gradients/encoder.encoder.layers.12.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 1.0, 1.0, 2.0, 3.0, 1.0, 5.0, 4.0, 6.0, 6.0, 21.0, 40.0, 56.0, 120.0, 268.0, 1219.0, 4189267.0, 2537.0, 430.0, 150.0, 61.0, 43.0, 22.0, 11.0, 10.0, 5.0, 5.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.80078125, -3.707275390625, -3.61376953125, -3.520263671875, -3.4267578125, -3.333251953125, -3.23974609375, -3.146240234375, -3.052734375, -2.959228515625, -2.86572265625, -2.772216796875, -2.6787109375, -2.585205078125, -2.49169921875, -2.398193359375, -2.3046875, -2.211181640625, -2.11767578125, -2.024169921875, -1.9306640625, -1.837158203125, -1.74365234375, -1.650146484375, -1.556640625, -1.463134765625, -1.36962890625, -1.276123046875, -1.1826171875, -1.089111328125, -0.99560546875, -0.902099609375, -0.80859375, -0.715087890625, -0.62158203125, -0.528076171875, -0.4345703125, -0.341064453125, -0.24755859375, -0.154052734375, -0.060546875, 0.032958984375, 0.12646484375, 0.219970703125, 0.3134765625, 0.406982421875, 0.50048828125, 0.593994140625, 0.6875, 0.781005859375, 0.87451171875, 0.968017578125, 1.0615234375, 1.155029296875, 1.24853515625, 1.342041015625, 1.435546875, 1.529052734375, 1.62255859375, 1.716064453125, 1.8095703125, 1.903076171875, 1.99658203125, 2.090087890625, 2.18359375]}, "gradients/encoder.encoder.layers.12.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 0.0, 2.0, 2.0, 0.0, 5.0, 3.0, 5.0, 5.0, 15.0, 30.0, 34.0, 69.0, 89.0, 123.0, 177.0, 143.0, 114.0, 73.0, 45.0, 33.0, 21.0, 9.0, 8.0, 5.0, 4.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.27099609375, -0.2643299102783203, -0.2576637268066406, -0.25099754333496094, -0.24433135986328125, -0.23766517639160156, -0.23099899291992188, -0.2243328094482422, -0.2176666259765625, -0.2110004425048828, -0.20433425903320312, -0.19766807556152344, -0.19100189208984375, -0.18433570861816406, -0.17766952514648438, -0.1710033416748047, -0.164337158203125, -0.1576709747314453, -0.15100479125976562, -0.14433860778808594, -0.13767242431640625, -0.13100624084472656, -0.12434005737304688, -0.11767387390136719, -0.1110076904296875, -0.10434150695800781, -0.09767532348632812, -0.09100914001464844, -0.08434295654296875, -0.07767677307128906, -0.07101058959960938, -0.06434440612792969, -0.05767822265625, -0.05101203918457031, -0.044345855712890625, -0.03767967224121094, -0.03101348876953125, -0.024347305297851562, -0.017681121826171875, -0.011014938354492188, -0.0043487548828125, 0.0023174285888671875, 0.008983612060546875, 0.015649795532226562, 0.02231597900390625, 0.028982162475585938, 0.035648345947265625, 0.04231452941894531, 0.048980712890625, 0.05564689636230469, 0.062313079833984375, 0.06897926330566406, 0.07564544677734375, 0.08231163024902344, 0.08897781372070312, 0.09564399719238281, 0.1023101806640625, 0.10897636413574219, 0.11564254760742188, 0.12230873107910156, 0.12897491455078125, 0.13564109802246094, 0.14230728149414062, 0.1489734649658203, 0.1556396484375]}, "gradients/encoder.encoder.layers.12.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0, 1.0, 2.0, 6.0, 5.0, 5.0, 5.0, 14.0, 34.0, 119.0, 698.0, 3909.0, 4154080.0, 32520.0, 2382.0, 375.0, 61.0, 22.0, 17.0, 11.0, 4.0, 3.0, 5.0, 0.0, 0.0, 2.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.58642578125, -0.5667953491210938, -0.5471649169921875, -0.5275344848632812, -0.507904052734375, -0.48827362060546875, -0.4686431884765625, -0.44901275634765625, -0.42938232421875, -0.40975189208984375, -0.3901214599609375, -0.37049102783203125, -0.350860595703125, -0.33123016357421875, -0.3115997314453125, -0.29196929931640625, -0.2723388671875, -0.25270843505859375, -0.2330780029296875, -0.21344757080078125, -0.193817138671875, -0.17418670654296875, -0.1545562744140625, -0.13492584228515625, -0.11529541015625, -0.09566497802734375, -0.0760345458984375, -0.05640411376953125, -0.036773681640625, -0.01714324951171875, 0.0024871826171875, 0.02211761474609375, 0.041748046875, 0.06137847900390625, 0.0810089111328125, 0.10063934326171875, 0.120269775390625, 0.13990020751953125, 0.1595306396484375, 0.17916107177734375, 0.19879150390625, 0.21842193603515625, 0.2380523681640625, 0.25768280029296875, 0.277313232421875, 0.29694366455078125, 0.3165740966796875, 0.33620452880859375, 0.3558349609375, 0.37546539306640625, 0.3950958251953125, 0.41472625732421875, 0.434356689453125, 0.45398712158203125, 0.4736175537109375, 0.49324798583984375, 0.51287841796875, 0.5325088500976562, 0.5521392822265625, 0.5717697143554688, 0.591400146484375, 0.6110305786132812, 0.6306610107421875, 0.6502914428710938, 0.669921875]}, "gradients/encoder.encoder.layers.12.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 2.0, 1.0, 2.0, 1.0, 0.0, 0.0, 3.0, 2.0, 2.0, 5.0, 3.0, 4.0, 12.0, 14.0, 31.0, 150.0, 3756.0, 48.0, 11.0, 10.0, 2.0, 3.0, 1.0, 1.0, 3.0, 6.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.132080078125, -0.12821006774902344, -0.12434005737304688, -0.12047004699707031, -0.11660003662109375, -0.11273002624511719, -0.10886001586914062, -0.10499000549316406, -0.1011199951171875, -0.09724998474121094, -0.09337997436523438, -0.08950996398925781, -0.08563995361328125, -0.08176994323730469, -0.07789993286132812, -0.07402992248535156, -0.070159912109375, -0.06628990173339844, -0.062419891357421875, -0.05854988098144531, -0.05467987060546875, -0.05080986022949219, -0.046939849853515625, -0.04306983947753906, -0.0391998291015625, -0.03532981872558594, -0.031459808349609375, -0.027589797973632812, -0.02371978759765625, -0.019849777221679688, -0.015979766845703125, -0.012109756469726562, -0.00823974609375, -0.0043697357177734375, -0.000499725341796875, 0.0033702850341796875, 0.00724029541015625, 0.011110305786132812, 0.014980316162109375, 0.018850326538085938, 0.0227203369140625, 0.026590347290039062, 0.030460357666015625, 0.03433036804199219, 0.03820037841796875, 0.04207038879394531, 0.045940399169921875, 0.04981040954589844, 0.053680419921875, 0.05755043029785156, 0.061420440673828125, 0.06529045104980469, 0.06916046142578125, 0.07303047180175781, 0.07690048217773438, 0.08077049255371094, 0.0846405029296875, 0.08851051330566406, 0.09238052368164062, 0.09625053405761719, 0.10012054443359375, 0.10399055480957031, 0.10786056518554688, 0.11173057556152344, 0.1156005859375]}, "gradients/encoder.encoder.layers.12.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 5.0, 9.0, 47.0, 266.0, 623.0, 52.0, 8.0, 3.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.6784470081329346, -0.6543505787849426, -0.6302541494369507, -0.6061577200889587, -0.5820612907409668, -0.5579648017883301, -0.5338683724403381, -0.5097719430923462, -0.48567551374435425, -0.4615790843963623, -0.43748265504837036, -0.41338619589805603, -0.3892897665500641, -0.36519333720207214, -0.3410968780517578, -0.31700044870376587, -0.2929040193557739, -0.268807590007782, -0.24471114575862885, -0.2206147015094757, -0.19651827216148376, -0.17242184281349182, -0.14832539856433868, -0.12422895431518555, -0.1001325249671936, -0.07603608816862106, -0.05193965137004852, -0.027843214571475983, -0.0037467777729034424, 0.020349659025669098, 0.04444609582424164, 0.06854254007339478, 0.09263896942138672, 0.11673540621995926, 0.1408318430185318, 0.16492828726768494, 0.18902471661567688, 0.21312114596366882, 0.23721759021282196, 0.2613140344619751, 0.28541046380996704, 0.309506893157959, 0.3336033225059509, 0.35769978165626526, 0.3817962110042572, 0.40589264035224915, 0.4299890995025635, 0.4540855288505554, 0.47818195819854736, 0.5022783875465393, 0.5263748168945312, 0.5504712462425232, 0.5745676755905151, 0.5986641645431519, 0.6227605938911438, 0.6468570232391357, 0.6709534525871277, 0.6950498819351196, 0.7191463112831116, 0.7432427406311035, 0.7673392295837402, 0.7914356589317322, 0.8155320882797241, 0.8396285176277161, 0.863724946975708]}, "gradients/encoder.encoder.layers.12.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 4.0, 2.0, 3.0, 7.0, 8.0, 7.0, 16.0, 18.0, 19.0, 19.0, 27.0, 25.0, 26.0, 38.0, 46.0, 44.0, 44.0, 73.0, 43.0, 61.0, 49.0, 49.0, 50.0, 45.0, 48.0, 35.0, 40.0, 38.0, 26.0, 20.0, 15.0, 16.0, 12.0, 10.0, 13.0, 5.0, 3.0, 6.0, 2.0, 0.0, 1.0, 2.0, 1.0, 1.0, 1.0, 0.0, 1.0], "bins": [-0.2947235107421875, -0.28673356771469116, -0.2787436246871948, -0.2707536816596985, -0.26276376843452454, -0.2547738254070282, -0.24678388237953186, -0.23879393935203552, -0.23080399632453918, -0.22281405329704285, -0.2148241251707077, -0.20683418214321136, -0.19884423911571503, -0.19085431098937988, -0.18286436796188354, -0.1748744249343872, -0.16688449680805206, -0.15889455378055573, -0.15090462565422058, -0.14291468262672424, -0.1349247395992279, -0.12693479657173157, -0.11894486844539642, -0.11095492541790009, -0.10296498984098434, -0.0949750542640686, -0.08698511123657227, -0.07899517565965652, -0.07100524008274078, -0.06301529705524445, -0.055025361478328705, -0.047035422176122665, -0.039045482873916626, -0.031055543571710587, -0.023065606132149696, -0.015075668692588806, -0.007085729390382767, 0.0009042099118232727, 0.008894145488739014, 0.016884084790945053, 0.024874024093151093, 0.03286396339535713, 0.04085390269756317, 0.04884383827447891, 0.05683377757668495, 0.06482371687889099, 0.07281365245580673, 0.08080358803272247, 0.08879353106021881, 0.09678346663713455, 0.10477340966463089, 0.11276334524154663, 0.12075328826904297, 0.1287432312965393, 0.13673315942287445, 0.1447231024503708, 0.15271303057670593, 0.16070297360420227, 0.16869290173053741, 0.17668284475803375, 0.1846727877855301, 0.19266271591186523, 0.20065265893936157, 0.2086426019668579, 0.21663254499435425]}, "gradients/encoder.encoder.layers.12.attention.out_proj.weight": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 1.0, 2.0, 0.0, 2.0, 2.0, 3.0, 2.0, 2.0, 3.0, 3.0, 5.0, 2.0, 4.0, 7.0, 10.0, 24.0, 32.0, 34.0, 51.0, 81.0, 113.0, 188.0, 318.0, 667.0, 1497.0, 4241.0, 15279.0, 100454.0, 816379.0, 88131.0, 14220.0, 3914.0, 1407.0, 672.0, 332.0, 176.0, 94.0, 59.0, 41.0, 28.0, 25.0, 17.0, 14.0, 6.0, 5.0, 6.0, 8.0, 3.0, 2.0, 2.0, 0.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.385498046875, -0.3732490539550781, -0.36100006103515625, -0.3487510681152344, -0.3365020751953125, -0.3242530822753906, -0.31200408935546875, -0.2997550964355469, -0.287506103515625, -0.2752571105957031, -0.26300811767578125, -0.2507591247558594, -0.2385101318359375, -0.22626113891601562, -0.21401214599609375, -0.20176315307617188, -0.18951416015625, -0.17726516723632812, -0.16501617431640625, -0.15276718139648438, -0.1405181884765625, -0.12826919555664062, -0.11602020263671875, -0.10377120971679688, -0.091522216796875, -0.07927322387695312, -0.06702423095703125, -0.054775238037109375, -0.0425262451171875, -0.030277252197265625, -0.01802825927734375, -0.005779266357421875, 0.0064697265625, 0.018718719482421875, 0.03096771240234375, 0.043216705322265625, 0.0554656982421875, 0.06771469116210938, 0.07996368408203125, 0.09221267700195312, 0.104461669921875, 0.11671066284179688, 0.12895965576171875, 0.14120864868164062, 0.1534576416015625, 0.16570663452148438, 0.17795562744140625, 0.19020462036132812, 0.20245361328125, 0.21470260620117188, 0.22695159912109375, 0.23920059204101562, 0.2514495849609375, 0.2636985778808594, 0.27594757080078125, 0.2881965637207031, 0.300445556640625, 0.3126945495605469, 0.32494354248046875, 0.3371925354003906, 0.3494415283203125, 0.3616905212402344, 0.37393951416015625, 0.3861885070800781, 0.3984375]}, "gradients/encoder.encoder.layers.12.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 1.0, 0.0, 3.0, 0.0, 3.0, 5.0, 3.0, 5.0, 15.0, 22.0, 37.0, 65.0, 85.0, 120.0, 173.0, 143.0, 129.0, 74.0, 43.0, 36.0, 23.0, 9.0, 10.0, 6.0, 3.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.271728515625, -0.26506996154785156, -0.2584114074707031, -0.2517528533935547, -0.24509429931640625, -0.2384357452392578, -0.23177719116210938, -0.22511863708496094, -0.2184600830078125, -0.21180152893066406, -0.20514297485351562, -0.1984844207763672, -0.19182586669921875, -0.1851673126220703, -0.17850875854492188, -0.17185020446777344, -0.165191650390625, -0.15853309631347656, -0.15187454223632812, -0.1452159881591797, -0.13855743408203125, -0.1318988800048828, -0.12524032592773438, -0.11858177185058594, -0.1119232177734375, -0.10526466369628906, -0.09860610961914062, -0.09194755554199219, -0.08528900146484375, -0.07863044738769531, -0.07197189331054688, -0.06531333923339844, -0.05865478515625, -0.05199623107910156, -0.045337677001953125, -0.03867912292480469, -0.03202056884765625, -0.025362014770507812, -0.018703460693359375, -0.012044906616210938, -0.0053863525390625, 0.0012722015380859375, 0.007930755615234375, 0.014589309692382812, 0.02124786376953125, 0.027906417846679688, 0.034564971923828125, 0.04122352600097656, 0.047882080078125, 0.05454063415527344, 0.061199188232421875, 0.06785774230957031, 0.07451629638671875, 0.08117485046386719, 0.08783340454101562, 0.09449195861816406, 0.1011505126953125, 0.10780906677246094, 0.11446762084960938, 0.12112617492675781, 0.12778472900390625, 0.1344432830810547, 0.14110183715820312, 0.14776039123535156, 0.1544189453125]}, "gradients/encoder.encoder.layers.12.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 4.0, 2.0, 7.0, 2.0, 4.0, 3.0, 10.0, 5.0, 9.0, 12.0, 10.0, 24.0, 14.0, 22.0, 29.0, 21.0, 31.0, 36.0, 41.0, 61.0, 100.0, 290.0, 1306.0, 19912.0, 969131.0, 54143.0, 2349.0, 447.0, 164.0, 93.0, 52.0, 59.0, 25.0, 32.0, 23.0, 19.0, 13.0, 16.0, 7.0, 9.0, 10.0, 2.0, 5.0, 8.0, 0.0, 1.0, 1.0, 3.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.7041015625, -0.6818313598632812, -0.6595611572265625, -0.6372909545898438, -0.615020751953125, -0.5927505493164062, -0.5704803466796875, -0.5482101440429688, -0.52593994140625, -0.5036697387695312, -0.4813995361328125, -0.45912933349609375, -0.436859130859375, -0.41458892822265625, -0.3923187255859375, -0.37004852294921875, -0.3477783203125, -0.32550811767578125, -0.3032379150390625, -0.28096771240234375, -0.258697509765625, -0.23642730712890625, -0.2141571044921875, -0.19188690185546875, -0.16961669921875, -0.14734649658203125, -0.1250762939453125, -0.10280609130859375, -0.080535888671875, -0.05826568603515625, -0.0359954833984375, -0.01372528076171875, 0.008544921875, 0.03081512451171875, 0.0530853271484375, 0.07535552978515625, 0.097625732421875, 0.11989593505859375, 0.1421661376953125, 0.16443634033203125, 0.18670654296875, 0.20897674560546875, 0.2312469482421875, 0.25351715087890625, 0.275787353515625, 0.29805755615234375, 0.3203277587890625, 0.34259796142578125, 0.3648681640625, 0.38713836669921875, 0.4094085693359375, 0.43167877197265625, 0.453948974609375, 0.47621917724609375, 0.4984893798828125, 0.5207595825195312, 0.54302978515625, 0.5652999877929688, 0.5875701904296875, 0.6098403930664062, 0.632110595703125, 0.6543807983398438, 0.6766510009765625, 0.6989212036132812, 0.72119140625]}, "gradients/encoder.encoder.layers.12.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 3.0, 1.0, 1.0, 0.0, 8.0, 5.0, 3.0, 9.0, 9.0, 8.0, 15.0, 13.0, 19.0, 23.0, 32.0, 25.0, 56.0, 38.0, 37.0, 50.0, 37.0, 45.0, 45.0, 50.0, 56.0, 38.0, 40.0, 34.0, 39.0, 37.0, 30.0, 31.0, 21.0, 29.0, 22.0, 14.0, 23.0, 11.0, 12.0, 9.0, 5.0, 10.0, 3.0, 4.0, 2.0, 7.0, 2.0, 4.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.3623046875, -0.3511161804199219, -0.33992767333984375, -0.3287391662597656, -0.3175506591796875, -0.3063621520996094, -0.29517364501953125, -0.2839851379394531, -0.272796630859375, -0.2616081237792969, -0.25041961669921875, -0.23923110961914062, -0.2280426025390625, -0.21685409545898438, -0.20566558837890625, -0.19447708129882812, -0.18328857421875, -0.17210006713867188, -0.16091156005859375, -0.14972305297851562, -0.1385345458984375, -0.12734603881835938, -0.11615753173828125, -0.10496902465820312, -0.093780517578125, -0.08259201049804688, -0.07140350341796875, -0.060214996337890625, -0.0490264892578125, -0.037837982177734375, -0.02664947509765625, -0.015460968017578125, -0.0042724609375, 0.006916046142578125, 0.01810455322265625, 0.029293060302734375, 0.0404815673828125, 0.051670074462890625, 0.06285858154296875, 0.07404708862304688, 0.085235595703125, 0.09642410278320312, 0.10761260986328125, 0.11880111694335938, 0.1299896240234375, 0.14117813110351562, 0.15236663818359375, 0.16355514526367188, 0.17474365234375, 0.18593215942382812, 0.19712066650390625, 0.20830917358398438, 0.2194976806640625, 0.23068618774414062, 0.24187469482421875, 0.2530632019042969, 0.264251708984375, 0.2754402160644531, 0.28662872314453125, 0.2978172302246094, 0.3090057373046875, 0.3201942443847656, 0.33138275146484375, 0.3425712585449219, 0.353759765625]}, "gradients/encoder.encoder.layers.12.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 3.0, 0.0, 2.0, 1.0, 4.0, 0.0, 2.0, 6.0, 5.0, 6.0, 6.0, 10.0, 7.0, 12.0, 7.0, 17.0, 17.0, 26.0, 30.0, 57.0, 93.0, 156.0, 288.0, 567.0, 1292.0, 3439.0, 12101.0, 166315.0, 834307.0, 21512.0, 4895.0, 1795.0, 766.0, 335.0, 171.0, 76.0, 62.0, 47.0, 29.0, 18.0, 15.0, 8.0, 12.0, 14.0, 3.0, 8.0, 6.0, 2.0, 10.0, 1.0, 2.0, 1.0, 3.0, 2.0, 0.0, 2.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.006679534912109375, -0.006450474262237549, -0.006221413612365723, -0.0059923529624938965, -0.00576329231262207, -0.005534231662750244, -0.005305171012878418, -0.005076110363006592, -0.004847049713134766, -0.0046179890632629395, -0.004388928413391113, -0.004159867763519287, -0.003930807113647461, -0.0037017464637756348, -0.0034726858139038086, -0.0032436251640319824, -0.0030145645141601562, -0.00278550386428833, -0.002556443214416504, -0.0023273825645446777, -0.0020983219146728516, -0.0018692612648010254, -0.0016402006149291992, -0.001411139965057373, -0.0011820793151855469, -0.0009530186653137207, -0.0007239580154418945, -0.0004948973655700684, -0.0002658367156982422, -3.6776065826416016e-05, 0.00019228458404541016, 0.00042134523391723633, 0.0006504058837890625, 0.0008794665336608887, 0.0011085271835327148, 0.001337587833404541, 0.0015666484832763672, 0.0017957091331481934, 0.0020247697830200195, 0.0022538304328918457, 0.002482891082763672, 0.002711951732635498, 0.0029410123825073242, 0.0031700730323791504, 0.0033991336822509766, 0.0036281943321228027, 0.003857254981994629, 0.004086315631866455, 0.004315376281738281, 0.004544436931610107, 0.004773497581481934, 0.00500255823135376, 0.005231618881225586, 0.005460679531097412, 0.005689740180969238, 0.0059188008308410645, 0.006147861480712891, 0.006376922130584717, 0.006605982780456543, 0.006835043430328369, 0.007064104080200195, 0.0072931647300720215, 0.007522225379943848, 0.007751286029815674, 0.0079803466796875]}, "gradients/encoder.encoder.layers.12.attention.k_proj.bias": {"_type": "histogram", "values": [2.0, 1.0, 2.0, 1.0, 1.0, 0.0, 2.0, 1.0, 1.0, 0.0, 3.0, 2.0, 2.0, 4.0, 9.0, 9.0, 17.0, 6.0, 8.0, 14.0, 16.0, 21.0, 18.0, 17.0, 21.0, 41.0, 29.0, 37.0, 43.0, 43.0, 44.0, 35.0, 78.0, 43.0, 46.0, 39.0, 35.0, 35.0, 27.0, 35.0, 35.0, 26.0, 18.0, 25.0, 23.0, 19.0, 12.0, 17.0, 19.0, 12.0, 3.0, 7.0, 3.0, 4.0, 1.0, 3.0, 6.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-2.086162567138672e-06, -2.0228326320648193e-06, -1.959502696990967e-06, -1.8961727619171143e-06, -1.8328428268432617e-06, -1.7695128917694092e-06, -1.7061829566955566e-06, -1.642853021621704e-06, -1.5795230865478516e-06, -1.516193151473999e-06, -1.4528632164001465e-06, -1.389533281326294e-06, -1.3262033462524414e-06, -1.2628734111785889e-06, -1.1995434761047363e-06, -1.1362135410308838e-06, -1.0728836059570312e-06, -1.0095536708831787e-06, -9.462237358093262e-07, -8.828938007354736e-07, -8.195638656616211e-07, -7.562339305877686e-07, -6.92903995513916e-07, -6.295740604400635e-07, -5.662441253662109e-07, -5.029141902923584e-07, -4.3958425521850586e-07, -3.762543201446533e-07, -3.129243850708008e-07, -2.4959444999694824e-07, -1.862645149230957e-07, -1.2293457984924316e-07, -5.960464477539063e-08, 3.725290298461914e-09, 6.705522537231445e-08, 1.30385160446167e-07, 1.9371509552001953e-07, 2.5704503059387207e-07, 3.203749656677246e-07, 3.8370490074157715e-07, 4.470348358154297e-07, 5.103647708892822e-07, 5.736947059631348e-07, 6.370246410369873e-07, 7.003545761108398e-07, 7.636845111846924e-07, 8.270144462585449e-07, 8.903443813323975e-07, 9.5367431640625e-07, 1.0170042514801025e-06, 1.080334186553955e-06, 1.1436641216278076e-06, 1.2069940567016602e-06, 1.2703239917755127e-06, 1.3336539268493652e-06, 1.3969838619232178e-06, 1.4603137969970703e-06, 1.5236437320709229e-06, 1.5869736671447754e-06, 1.650303602218628e-06, 1.7136335372924805e-06, 1.776963472366333e-06, 1.8402934074401855e-06, 1.903623342514038e-06, 1.9669532775878906e-06]}, "gradients/encoder.encoder.layers.12.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 3.0, 0.0, 2.0, 2.0, 0.0, 0.0, 0.0, 3.0, 2.0, 1.0, 4.0, 2.0, 2.0, 9.0, 8.0, 10.0, 9.0, 35.0, 31.0, 71.0, 183.0, 881.0, 12536.0, 1024931.0, 8801.0, 683.0, 160.0, 57.0, 50.0, 21.0, 15.0, 14.0, 10.0, 4.0, 10.0, 2.0, 1.0, 2.0, 3.0, 1.0, 1.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 3.0, 0.0, 2.0, 1.0, 1.0, 1.0], "bins": [-0.0163726806640625, -0.015897512435913086, -0.015422344207763672, -0.014947175979614258, -0.014472007751464844, -0.01399683952331543, -0.013521671295166016, -0.013046503067016602, -0.012571334838867188, -0.012096166610717773, -0.01162099838256836, -0.011145830154418945, -0.010670661926269531, -0.010195493698120117, -0.009720325469970703, -0.009245157241821289, -0.008769989013671875, -0.008294820785522461, -0.007819652557373047, -0.007344484329223633, -0.006869316101074219, -0.006394147872924805, -0.005918979644775391, -0.0054438114166259766, -0.0049686431884765625, -0.0044934749603271484, -0.004018306732177734, -0.0035431385040283203, -0.0030679702758789062, -0.002592802047729492, -0.002117633819580078, -0.001642465591430664, -0.00116729736328125, -0.0006921291351318359, -0.00021696090698242188, 0.0002582073211669922, 0.0007333755493164062, 0.0012085437774658203, 0.0016837120056152344, 0.0021588802337646484, 0.0026340484619140625, 0.0031092166900634766, 0.0035843849182128906, 0.004059553146362305, 0.004534721374511719, 0.005009889602661133, 0.005485057830810547, 0.005960226058959961, 0.006435394287109375, 0.006910562515258789, 0.007385730743408203, 0.007860898971557617, 0.008336067199707031, 0.008811235427856445, 0.00928640365600586, 0.009761571884155273, 0.010236740112304688, 0.010711908340454102, 0.011187076568603516, 0.01166224479675293, 0.012137413024902344, 0.012612581253051758, 0.013087749481201172, 0.013562917709350586, 0.0140380859375]}, "gradients/encoder.encoder.layers.12.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 2.0, 0.0, 3.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 1.0, 1.0, 3.0, 2.0, 2.0, 1.0, 10.0, 5.0, 9.0, 12.0, 13.0, 17.0, 27.0, 35.0, 58.0, 82.0, 123.0, 169.0, 142.0, 102.0, 67.0, 32.0, 25.0, 16.0, 6.0, 10.0, 8.0, 9.0, 2.0, 2.0, 4.0, 1.0, 2.0, 3.0, 0.0, 0.0, 0.0, 2.0, 2.0, 0.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00704193115234375, -0.006803393363952637, -0.0065648555755615234, -0.00632631778717041, -0.006087779998779297, -0.005849242210388184, -0.00561070442199707, -0.005372166633605957, -0.005133628845214844, -0.0048950910568237305, -0.004656553268432617, -0.004418015480041504, -0.004179477691650391, -0.003940939903259277, -0.003702402114868164, -0.0034638643264770508, -0.0032253265380859375, -0.0029867887496948242, -0.002748250961303711, -0.0025097131729125977, -0.0022711753845214844, -0.002032637596130371, -0.0017940998077392578, -0.0015555620193481445, -0.0013170242309570312, -0.001078486442565918, -0.0008399486541748047, -0.0006014108657836914, -0.0003628730773925781, -0.00012433528900146484, 0.00011420249938964844, 0.0003527402877807617, 0.000591278076171875, 0.0008298158645629883, 0.0010683536529541016, 0.0013068914413452148, 0.0015454292297363281, 0.0017839670181274414, 0.0020225048065185547, 0.002261042594909668, 0.0024995803833007812, 0.0027381181716918945, 0.002976655960083008, 0.003215193748474121, 0.0034537315368652344, 0.0036922693252563477, 0.003930807113647461, 0.004169344902038574, 0.0044078826904296875, 0.004646420478820801, 0.004884958267211914, 0.005123496055603027, 0.005362033843994141, 0.005600571632385254, 0.005839109420776367, 0.0060776472091674805, 0.006316184997558594, 0.006554722785949707, 0.00679326057434082, 0.007031798362731934, 0.007270336151123047, 0.00750887393951416, 0.0077474117279052734, 0.007985949516296387, 0.0082244873046875]}, "gradients/encoder.encoder.layers.12.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 4.0, 36.0, 790.0, 174.0, 16.0, 2.0, 1.0], "bins": [-20.837051391601562, -20.488262176513672, -20.13947296142578, -19.79068374633789, -19.44189453125, -19.09310531616211, -18.74431610107422, -18.39552879333496, -18.04673957824707, -17.69795036315918, -17.34916114807129, -17.0003719329834, -16.651582717895508, -16.30279541015625, -15.954005241394043, -15.605216026306152, -15.256426811218262, -14.907637596130371, -14.55884838104248, -14.210060119628906, -13.861270904541016, -13.512481689453125, -13.163692474365234, -12.814903259277344, -12.466114044189453, -12.117324829101562, -11.768535614013672, -11.419746398925781, -11.070958137512207, -10.722168922424316, -10.373379707336426, -10.024590492248535, -9.675802230834961, -9.32701301574707, -8.97822380065918, -8.629434585571289, -8.280646324157715, -7.931857109069824, -7.583067893981934, -7.234278678894043, -6.8854899406433105, -6.53670072555542, -6.1879119873046875, -5.839122772216797, -5.490333557128906, -5.141544818878174, -4.792755603790283, -4.443966865539551, -4.09517765045166, -3.7463886737823486, -3.397599697113037, -3.0488104820251465, -2.700021505355835, -2.3512325286865234, -2.002443313598633, -1.6536543369293213, -1.3048654794692993, -0.956076443195343, -0.6072874069213867, -0.25849831104278564, 0.09029066562652588, 0.4390796422958374, 0.787868857383728, 1.1366578340530396, 1.485446810722351]}, "gradients/encoder.encoder.layers.12.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 4.0, 3.0, 0.0, 2.0, 1.0, 3.0, 7.0, 9.0, 2.0, 6.0, 6.0, 18.0, 19.0, 10.0, 18.0, 24.0, 35.0, 33.0, 34.0, 41.0, 47.0, 47.0, 51.0, 36.0, 49.0, 42.0, 55.0, 42.0, 49.0, 40.0, 42.0, 34.0, 24.0, 24.0, 22.0, 17.0, 18.0, 24.0, 14.0, 18.0, 11.0, 6.0, 12.0, 5.0, 3.0, 6.0, 2.0, 1.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0], "bins": [-1.953012228012085, -1.8928269147872925, -1.8326416015625, -1.7724562883377075, -1.712270975112915, -1.652085542678833, -1.59190034866333, -1.531714916229248, -1.4715296030044556, -1.411344289779663, -1.3511589765548706, -1.2909736633300781, -1.2307883501052856, -1.1706030368804932, -1.1104176044464111, -1.0502322912216187, -0.990047037601471, -0.9298617243766785, -0.869676411151886, -0.8094910383224487, -0.7493057250976562, -0.6891204118728638, -0.6289350986480713, -0.5687497854232788, -0.5085644721984863, -0.44837915897369385, -0.388193815946579, -0.3280085027217865, -0.26782315969467163, -0.20763784646987915, -0.14745253324508667, -0.0872671902179718, -0.027081847190856934, 0.03310347720980644, 0.09328880161046982, 0.1534741222858429, 0.21365945041179657, 0.27384477853775024, 0.3340300917625427, 0.3942154347896576, 0.4544007480144501, 0.5145860910415649, 0.5747714042663574, 0.6349567174911499, 0.6951420307159424, 0.7553273439407349, 0.8155126571655273, 0.8756980299949646, 0.9358833432197571, 0.9960686564445496, 1.0562540292739868, 1.1164393424987793, 1.1766246557235718, 1.2368099689483643, 1.2969952821731567, 1.3571805953979492, 1.4173659086227417, 1.4775512218475342, 1.5377365350723267, 1.5979218482971191, 1.6581071615219116, 1.718292474746704, 1.7784779071807861, 1.8386632204055786, 1.898848533630371]}, "gradients/encoder.encoder.layers.11.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 0.0, 1.0, 4.0, 3.0, 4.0, 4.0, 4.0, 10.0, 21.0, 46.0, 59.0, 87.0, 148.0, 4193429.0, 161.0, 119.0, 64.0, 51.0, 27.0, 29.0, 4.0, 13.0, 2.0, 5.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.265625, -2.209991455078125, -2.15435791015625, -2.098724365234375, -2.0430908203125, -1.987457275390625, -1.93182373046875, -1.876190185546875, -1.820556640625, -1.764923095703125, -1.70928955078125, -1.653656005859375, -1.5980224609375, -1.542388916015625, -1.48675537109375, -1.431121826171875, -1.37548828125, -1.319854736328125, -1.26422119140625, -1.208587646484375, -1.1529541015625, -1.097320556640625, -1.04168701171875, -0.986053466796875, -0.930419921875, -0.874786376953125, -0.81915283203125, -0.763519287109375, -0.7078857421875, -0.652252197265625, -0.59661865234375, -0.540985107421875, -0.4853515625, -0.429718017578125, -0.37408447265625, -0.318450927734375, -0.2628173828125, -0.207183837890625, -0.15155029296875, -0.095916748046875, -0.040283203125, 0.015350341796875, 0.07098388671875, 0.126617431640625, 0.1822509765625, 0.237884521484375, 0.29351806640625, 0.349151611328125, 0.40478515625, 0.460418701171875, 0.51605224609375, 0.571685791015625, 0.6273193359375, 0.682952880859375, 0.73858642578125, 0.794219970703125, 0.849853515625, 0.905487060546875, 0.96112060546875, 1.016754150390625, 1.0723876953125, 1.128021240234375, 1.18365478515625, 1.239288330078125, 1.294921875]}, "gradients/encoder.encoder.layers.11.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0, 3.0, 4.0, 4.0, 4.0, 3.0, 9.0, 20.0, 43.0, 59.0, 85.0, 132.0, 153.0, 167.0, 126.0, 68.0, 52.0, 29.0, 29.0, 3.0, 13.0, 3.0, 5.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.275390625, -0.268646240234375, -0.26190185546875, -0.255157470703125, -0.2484130859375, -0.241668701171875, -0.23492431640625, -0.228179931640625, -0.221435546875, -0.214691162109375, -0.20794677734375, -0.201202392578125, -0.1944580078125, -0.187713623046875, -0.18096923828125, -0.174224853515625, -0.16748046875, -0.160736083984375, -0.15399169921875, -0.147247314453125, -0.1405029296875, -0.133758544921875, -0.12701416015625, -0.120269775390625, -0.113525390625, -0.106781005859375, -0.10003662109375, -0.093292236328125, -0.0865478515625, -0.079803466796875, -0.07305908203125, -0.066314697265625, -0.0595703125, -0.052825927734375, -0.04608154296875, -0.039337158203125, -0.0325927734375, -0.025848388671875, -0.01910400390625, -0.012359619140625, -0.005615234375, 0.001129150390625, 0.00787353515625, 0.014617919921875, 0.0213623046875, 0.028106689453125, 0.03485107421875, 0.041595458984375, 0.04833984375, 0.055084228515625, 0.06182861328125, 0.068572998046875, 0.0753173828125, 0.082061767578125, 0.08880615234375, 0.095550537109375, 0.102294921875, 0.109039306640625, 0.11578369140625, 0.122528076171875, 0.1292724609375, 0.136016845703125, 0.14276123046875, 0.149505615234375, 0.15625]}, "gradients/encoder.encoder.layers.11.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 4.0, 3.0, 6.0, 11.0, 15.0, 23.0, 22.0, 25.0, 31.0, 35.0, 60.0, 46.0, 72.0, 67.0, 4193340.0, 70.0, 66.0, 56.0, 53.0, 61.0, 53.0, 37.0, 36.0, 35.0, 20.0, 14.0, 13.0, 4.0, 5.0, 4.0, 2.0, 3.0, 2.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0], "bins": [-0.265625, -0.25889015197753906, -0.2521553039550781, -0.2454204559326172, -0.23868560791015625, -0.2319507598876953, -0.22521591186523438, -0.21848106384277344, -0.2117462158203125, -0.20501136779785156, -0.19827651977539062, -0.1915416717529297, -0.18480682373046875, -0.1780719757080078, -0.17133712768554688, -0.16460227966308594, -0.157867431640625, -0.15113258361816406, -0.14439773559570312, -0.1376628875732422, -0.13092803955078125, -0.12419319152832031, -0.11745834350585938, -0.11072349548339844, -0.1039886474609375, -0.09725379943847656, -0.09051895141601562, -0.08378410339355469, -0.07704925537109375, -0.07031440734863281, -0.06357955932617188, -0.05684471130371094, -0.05010986328125, -0.04337501525878906, -0.036640167236328125, -0.029905319213867188, -0.02317047119140625, -0.016435623168945312, -0.009700775146484375, -0.0029659271240234375, 0.0037689208984375, 0.010503768920898438, 0.017238616943359375, 0.023973464965820312, 0.03070831298828125, 0.03744316101074219, 0.044178009033203125, 0.05091285705566406, 0.057647705078125, 0.06438255310058594, 0.07111740112304688, 0.07785224914550781, 0.08458709716796875, 0.09132194519042969, 0.09805679321289062, 0.10479164123535156, 0.1115264892578125, 0.11826133728027344, 0.12499618530273438, 0.1317310333251953, 0.13846588134765625, 0.1452007293701172, 0.15193557739257812, 0.15867042541503906, 0.1654052734375]}, "gradients/encoder.encoder.layers.11.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [4094.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0004744529724121094, 0.0007946714758872986, 0.0020637959241867065, 0.0033329203724861145, 0.0046020448207855225, 0.00587116926908493, 0.007140293717384338, 0.008409418165683746, 0.009678542613983154, 0.010947667062282562, 0.01221679151058197, 0.013485915958881378, 0.014755040407180786, 0.016024164855480194, 0.017293289303779602, 0.01856241375207901, 0.019831538200378418, 0.021100662648677826, 0.022369787096977234, 0.023638911545276642, 0.02490803599357605, 0.026177160441875458, 0.027446284890174866, 0.028715409338474274, 0.02998453378677368, 0.03125365823507309, 0.0325227826833725, 0.033791907131671906, 0.035061031579971313, 0.03633015602827072, 0.03759928047657013, 0.03886840492486954, 0.040137529373168945, 0.04140665382146835, 0.04267577826976776, 0.04394490271806717, 0.04521402716636658, 0.046483151614665985, 0.04775227606296539, 0.0490214005112648, 0.05029052495956421, 0.05155964940786362, 0.052828773856163025, 0.05409789830446243, 0.05536702275276184, 0.05663614720106125, 0.05790527164936066, 0.059174396097660065, 0.06044352054595947, 0.06171264499425888, 0.06298176944255829, 0.0642508938908577, 0.0655200183391571, 0.06678914278745651, 0.06805826723575592, 0.06932739168405533, 0.07059651613235474, 0.07186564058065414, 0.07313476502895355, 0.07440388947725296, 0.07567301392555237, 0.07694213837385178, 0.07821126282215118, 0.07948038727045059, 0.08074951171875]}, "gradients/encoder.encoder.layers.11.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 4.0, 43.0, 953.0, 11.0, 3.0, 2.0, 1.0, 1.0, 0.0, 1.0], "bins": [-0.7752957940101624, -0.761525571346283, -0.7477554082870483, -0.733985185623169, -0.7202149629592896, -0.7064447402954102, -0.6926745772361755, -0.6789043545722961, -0.6651341319084167, -0.6513639092445374, -0.6375937461853027, -0.6238235235214233, -0.610053300857544, -0.5962830781936646, -0.5825129151344299, -0.5687426924705505, -0.5549724698066711, -0.5412022471427917, -0.5274320840835571, -0.5136618614196777, -0.49989163875579834, -0.48612144589424133, -0.47235122323036194, -0.45858103036880493, -0.4448108375072479, -0.4310406446456909, -0.4172704219818115, -0.4035002291202545, -0.3897300064563751, -0.3759598135948181, -0.3621895909309387, -0.3484193980693817, -0.3346491754055023, -0.3208789825439453, -0.3071087598800659, -0.2933385670185089, -0.2795683443546295, -0.2657981514930725, -0.2520279288291931, -0.2382577359676361, -0.2244875133037567, -0.2107173055410385, -0.1969470977783203, -0.1831768900156021, -0.1694066822528839, -0.1556364893913269, -0.1418662667274475, -0.1280960738658905, -0.1143258586525917, -0.1005556508898735, -0.0867854431271553, -0.0730152428150177, -0.0592450313270092, -0.0454748272895813, -0.0317046195268631, -0.017934411764144897, -0.004164204001426697, 0.00960600282996893, 0.023376209661364555, 0.03714641556143761, 0.05091662332415581, 0.06468682736158371, 0.07845703512430191, 0.09222724288702011, 0.10599745064973831]}, "gradients/encoder.encoder.layers.11.final_layer_norm.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 3.0, 1.0, 6.0, 6.0, 11.0, 26.0, 54.0, 78.0, 149.0, 170.0, 165.0, 122.0, 94.0, 59.0, 32.0, 20.0, 4.0, 3.0, 3.0, 1.0, 2.0, 3.0, 1.0, 0.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0688202977180481, -0.0652294009923935, -0.06163850054144859, -0.05804760009050369, -0.05445670336484909, -0.05086580663919449, -0.04727490618824959, -0.04368400573730469, -0.040093109011650085, -0.03650221228599548, -0.03291131183505058, -0.02932041324675083, -0.02572951465845108, -0.02213861607015133, -0.018547717481851578, -0.014956818893551826, -0.011365920305252075, -0.007775021716952324, -0.004184123128652573, -0.0005932245403528214, 0.00299767404794693, 0.006588572636246681, 0.010179471224546432, 0.013770369812846184, 0.017361268401145935, 0.020952166989445686, 0.024543065577745438, 0.02813396416604519, 0.03172486275434494, 0.03531575947999954, 0.03890665993094444, 0.04249756038188934, 0.046088457107543945, 0.04967935383319855, 0.05327025428414345, 0.05686115473508835, 0.06045205146074295, 0.06404294818639755, 0.06763385236263275, 0.07122474908828735, 0.07481564581394196, 0.07840654253959656, 0.08199743926525116, 0.08558834344148636, 0.08917924016714096, 0.09277013689279556, 0.09636104106903076, 0.09995193779468536, 0.10354283452033997, 0.10713373124599457, 0.11072462797164917, 0.11431553214788437, 0.11790642887353897, 0.12149732559919357, 0.12508822977542877, 0.12867912650108337, 0.13227002322673798, 0.13586091995239258, 0.13945181667804718, 0.14304271340370178, 0.14663362503051758, 0.15022452175617218, 0.15381541848182678, 0.15740631520748138, 0.160997211933136]}, "gradients/encoder.encoder.layers.11.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 5.0, 5.0, 4.0, 4.0, 0.0, 9.0, 8.0, 10.0, 10.0, 17.0, 22.0, 44.0, 70.0, 106.0, 191.0, 325.0, 723.0, 1907.0, 6877.0, 37202.0, 729004.0, 243146.0, 21608.0, 4540.0, 1472.0, 606.0, 252.0, 131.0, 85.0, 51.0, 29.0, 28.0, 30.0, 12.0, 5.0, 5.0, 10.0, 7.0, 6.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.336669921875, -0.3253936767578125, -0.314117431640625, -0.3028411865234375, -0.29156494140625, -0.2802886962890625, -0.269012451171875, -0.2577362060546875, -0.2464599609375, -0.2351837158203125, -0.223907470703125, -0.2126312255859375, -0.20135498046875, -0.1900787353515625, -0.178802490234375, -0.1675262451171875, -0.15625, -0.1449737548828125, -0.133697509765625, -0.1224212646484375, -0.11114501953125, -0.0998687744140625, -0.088592529296875, -0.0773162841796875, -0.0660400390625, -0.0547637939453125, -0.043487548828125, -0.0322113037109375, -0.02093505859375, -0.0096588134765625, 0.001617431640625, 0.0128936767578125, 0.024169921875, 0.0354461669921875, 0.046722412109375, 0.0579986572265625, 0.06927490234375, 0.0805511474609375, 0.091827392578125, 0.1031036376953125, 0.1143798828125, 0.1256561279296875, 0.136932373046875, 0.1482086181640625, 0.15948486328125, 0.1707611083984375, 0.182037353515625, 0.1933135986328125, 0.20458984375, 0.2158660888671875, 0.227142333984375, 0.2384185791015625, 0.24969482421875, 0.2609710693359375, 0.272247314453125, 0.2835235595703125, 0.2947998046875, 0.3060760498046875, 0.317352294921875, 0.3286285400390625, 0.33990478515625, 0.3511810302734375, 0.362457275390625, 0.3737335205078125, 0.385009765625]}, "gradients/encoder.encoder.layers.11.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0, 3.0, 4.0, 4.0, 4.0, 2.0, 10.0, 20.0, 45.0, 56.0, 91.0, 127.0, 162.0, 167.0, 119.0, 64.0, 54.0, 29.0, 29.0, 5.0, 11.0, 3.0, 5.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.275634765625, -0.26888084411621094, -0.2621269226074219, -0.2553730010986328, -0.24861907958984375, -0.2418651580810547, -0.23511123657226562, -0.22835731506347656, -0.2216033935546875, -0.21484947204589844, -0.20809555053710938, -0.2013416290283203, -0.19458770751953125, -0.1878337860107422, -0.18107986450195312, -0.17432594299316406, -0.167572021484375, -0.16081809997558594, -0.15406417846679688, -0.1473102569580078, -0.14055633544921875, -0.1338024139404297, -0.12704849243164062, -0.12029457092285156, -0.1135406494140625, -0.10678672790527344, -0.10003280639648438, -0.09327888488769531, -0.08652496337890625, -0.07977104187011719, -0.07301712036132812, -0.06626319885253906, -0.05950927734375, -0.05275535583496094, -0.046001434326171875, -0.03924751281738281, -0.03249359130859375, -0.025739669799804688, -0.018985748291015625, -0.012231826782226562, -0.0054779052734375, 0.0012760162353515625, 0.008029937744140625, 0.014783859252929688, 0.02153778076171875, 0.028291702270507812, 0.035045623779296875, 0.04179954528808594, 0.048553466796875, 0.05530738830566406, 0.062061309814453125, 0.06881523132324219, 0.07556915283203125, 0.08232307434082031, 0.08907699584960938, 0.09583091735839844, 0.1025848388671875, 0.10933876037597656, 0.11609268188476562, 0.12284660339355469, 0.12960052490234375, 0.1363544464111328, 0.14310836791992188, 0.14986228942871094, 0.1566162109375]}, "gradients/encoder.encoder.layers.11.attention.v_proj.weight": {"_type": "histogram", "values": [2.0, 2.0, 0.0, 0.0, 1.0, 2.0, 7.0, 3.0, 0.0, 4.0, 4.0, 11.0, 12.0, 12.0, 16.0, 11.0, 14.0, 19.0, 20.0, 27.0, 28.0, 42.0, 43.0, 50.0, 119.0, 212.0, 705.0, 4402.0, 93244.0, 924391.0, 22327.0, 1886.0, 416.0, 159.0, 93.0, 64.0, 34.0, 31.0, 28.0, 17.0, 18.0, 16.0, 14.0, 11.0, 5.0, 9.0, 9.0, 8.0, 0.0, 5.0, 5.0, 4.0, 4.0, 2.0, 1.0, 0.0, 1.0, 3.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.448974609375, -0.4336509704589844, -0.41832733154296875, -0.4030036926269531, -0.3876800537109375, -0.3723564147949219, -0.35703277587890625, -0.3417091369628906, -0.326385498046875, -0.3110618591308594, -0.29573822021484375, -0.2804145812988281, -0.2650909423828125, -0.24976730346679688, -0.23444366455078125, -0.21912002563476562, -0.20379638671875, -0.18847274780273438, -0.17314910888671875, -0.15782546997070312, -0.1425018310546875, -0.12717819213867188, -0.11185455322265625, -0.09653091430664062, -0.081207275390625, -0.06588363647460938, -0.05055999755859375, -0.035236358642578125, -0.0199127197265625, -0.004589080810546875, 0.01073455810546875, 0.026058197021484375, 0.0413818359375, 0.056705474853515625, 0.07202911376953125, 0.08735275268554688, 0.1026763916015625, 0.11800003051757812, 0.13332366943359375, 0.14864730834960938, 0.163970947265625, 0.17929458618164062, 0.19461822509765625, 0.20994186401367188, 0.2252655029296875, 0.24058914184570312, 0.25591278076171875, 0.2712364196777344, 0.28656005859375, 0.3018836975097656, 0.31720733642578125, 0.3325309753417969, 0.3478546142578125, 0.3631782531738281, 0.37850189208984375, 0.3938255310058594, 0.409149169921875, 0.4244728088378906, 0.43979644775390625, 0.4551200866699219, 0.4704437255859375, 0.4857673645019531, 0.5010910034179688, 0.5164146423339844, 0.53173828125]}, "gradients/encoder.encoder.layers.11.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 3.0, 1.0, 0.0, 1.0, 2.0, 3.0, 5.0, 5.0, 5.0, 0.0, 8.0, 9.0, 9.0, 5.0, 11.0, 14.0, 16.0, 18.0, 17.0, 29.0, 26.0, 30.0, 46.0, 44.0, 36.0, 47.0, 42.0, 52.0, 36.0, 38.0, 38.0, 47.0, 48.0, 49.0, 25.0, 29.0, 35.0, 26.0, 26.0, 20.0, 19.0, 14.0, 11.0, 16.0, 12.0, 12.0, 11.0, 4.0, 4.0, 0.0, 3.0, 7.0, 2.0, 1.0, 0.0, 0.0, 2.0, 2.0], "bins": [-0.31201171875, -0.3030204772949219, -0.29402923583984375, -0.2850379943847656, -0.2760467529296875, -0.2670555114746094, -0.25806427001953125, -0.24907302856445312, -0.240081787109375, -0.23109054565429688, -0.22209930419921875, -0.21310806274414062, -0.2041168212890625, -0.19512557983398438, -0.18613433837890625, -0.17714309692382812, -0.16815185546875, -0.15916061401367188, -0.15016937255859375, -0.14117813110351562, -0.1321868896484375, -0.12319564819335938, -0.11420440673828125, -0.10521316528320312, -0.096221923828125, -0.08723068237304688, -0.07823944091796875, -0.06924819946289062, -0.0602569580078125, -0.051265716552734375, -0.04227447509765625, -0.033283233642578125, -0.0242919921875, -0.015300750732421875, -0.00630950927734375, 0.002681732177734375, 0.0116729736328125, 0.020664215087890625, 0.02965545654296875, 0.038646697998046875, 0.047637939453125, 0.056629180908203125, 0.06562042236328125, 0.07461166381835938, 0.0836029052734375, 0.09259414672851562, 0.10158538818359375, 0.11057662963867188, 0.11956787109375, 0.12855911254882812, 0.13755035400390625, 0.14654159545898438, 0.1555328369140625, 0.16452407836914062, 0.17351531982421875, 0.18250656127929688, 0.191497802734375, 0.20048904418945312, 0.20948028564453125, 0.21847152709960938, 0.2274627685546875, 0.23645401000976562, 0.24544525146484375, 0.2544364929199219, 0.263427734375]}, "gradients/encoder.encoder.layers.11.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 5.0, 1.0, 8.0, 3.0, 3.0, 12.0, 8.0, 27.0, 37.0, 58.0, 108.0, 319.0, 1041.0, 4640.0, 46509.0, 945871.0, 43968.0, 4354.0, 962.0, 327.0, 142.0, 63.0, 42.0, 23.0, 14.0, 7.0, 2.0, 1.0, 4.0, 0.0, 4.0, 1.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.005405426025390625, -0.0052337646484375, -0.005062103271484375, -0.00489044189453125, -0.004718780517578125, -0.004547119140625, -0.004375457763671875, -0.00420379638671875, -0.004032135009765625, -0.0038604736328125, -0.003688812255859375, -0.00351715087890625, -0.003345489501953125, -0.003173828125, -0.003002166748046875, -0.00283050537109375, -0.002658843994140625, -0.0024871826171875, -0.002315521240234375, -0.00214385986328125, -0.001972198486328125, -0.001800537109375, -0.001628875732421875, -0.00145721435546875, -0.001285552978515625, -0.0011138916015625, -0.000942230224609375, -0.00077056884765625, -0.000598907470703125, -0.00042724609375, -0.000255584716796875, -8.392333984375e-05, 8.7738037109375e-05, 0.0002593994140625, 0.000431060791015625, 0.00060272216796875, 0.000774383544921875, 0.000946044921875, 0.001117706298828125, 0.00128936767578125, 0.001461029052734375, 0.0016326904296875, 0.001804351806640625, 0.00197601318359375, 0.002147674560546875, 0.0023193359375, 0.002490997314453125, 0.00266265869140625, 0.002834320068359375, 0.0030059814453125, 0.003177642822265625, 0.00334930419921875, 0.003520965576171875, 0.003692626953125, 0.003864288330078125, 0.00403594970703125, 0.004207611083984375, 0.0043792724609375, 0.004550933837890625, 0.00472259521484375, 0.004894256591796875, 0.00506591796875, 0.005237579345703125, 0.00540924072265625, 0.005580902099609375]}, "gradients/encoder.encoder.layers.11.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 3.0, 3.0, 2.0, 1.0, 6.0, 6.0, 9.0, 10.0, 7.0, 19.0, 11.0, 19.0, 19.0, 23.0, 20.0, 26.0, 29.0, 66.0, 32.0, 27.0, 47.0, 43.0, 38.0, 30.0, 34.0, 89.0, 32.0, 49.0, 29.0, 40.0, 33.0, 22.0, 31.0, 54.0, 20.0, 17.0, 14.0, 9.0, 12.0, 10.0, 6.0, 8.0, 1.0, 1.0, 3.0, 2.0, 1.0, 1.0, 1.0, 0.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-2.086162567138672e-06, -2.0191073417663574e-06, -1.952052116394043e-06, -1.8849968910217285e-06, -1.817941665649414e-06, -1.7508864402770996e-06, -1.6838312149047852e-06, -1.6167759895324707e-06, -1.5497207641601562e-06, -1.4826655387878418e-06, -1.4156103134155273e-06, -1.3485550880432129e-06, -1.2814998626708984e-06, -1.214444637298584e-06, -1.1473894119262695e-06, -1.080334186553955e-06, -1.0132789611816406e-06, -9.462237358093262e-07, -8.791685104370117e-07, -8.121132850646973e-07, -7.450580596923828e-07, -6.780028343200684e-07, -6.109476089477539e-07, -5.438923835754395e-07, -4.76837158203125e-07, -4.0978193283081055e-07, -3.427267074584961e-07, -2.7567148208618164e-07, -2.086162567138672e-07, -1.4156103134155273e-07, -7.450580596923828e-08, -7.450580596923828e-09, 5.960464477539063e-08, 1.2665987014770508e-07, 1.9371509552001953e-07, 2.60770320892334e-07, 3.2782554626464844e-07, 3.948807716369629e-07, 4.6193599700927734e-07, 5.289912223815918e-07, 5.960464477539062e-07, 6.631016731262207e-07, 7.301568984985352e-07, 7.972121238708496e-07, 8.642673492431641e-07, 9.313225746154785e-07, 9.98377799987793e-07, 1.0654330253601074e-06, 1.1324882507324219e-06, 1.1995434761047363e-06, 1.2665987014770508e-06, 1.3336539268493652e-06, 1.4007091522216797e-06, 1.4677643775939941e-06, 1.5348196029663086e-06, 1.601874828338623e-06, 1.6689300537109375e-06, 1.735985279083252e-06, 1.8030405044555664e-06, 1.8700957298278809e-06, 1.9371509552001953e-06, 2.0042061805725098e-06, 2.0712614059448242e-06, 2.1383166313171387e-06, 2.205371856689453e-06]}, "gradients/encoder.encoder.layers.11.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 3.0, 0.0, 2.0, 3.0, 2.0, 8.0, 9.0, 9.0, 16.0, 14.0, 27.0, 54.0, 63.0, 138.0, 424.0, 4007.0, 964442.0, 77337.0, 1489.0, 257.0, 93.0, 58.0, 31.0, 25.0, 13.0, 7.0, 8.0, 6.0, 6.0, 3.0, 1.0, 2.0, 4.0, 2.0, 2.0, 2.0, 0.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0088348388671875, -0.008573651313781738, -0.008312463760375977, -0.008051276206970215, -0.007790088653564453, -0.007528901100158691, -0.00726771354675293, -0.007006525993347168, -0.006745338439941406, -0.0064841508865356445, -0.006222963333129883, -0.005961775779724121, -0.005700588226318359, -0.005439400672912598, -0.005178213119506836, -0.004917025566101074, -0.0046558380126953125, -0.004394650459289551, -0.004133462905883789, -0.0038722753524780273, -0.0036110877990722656, -0.003349900245666504, -0.003088712692260742, -0.0028275251388549805, -0.0025663375854492188, -0.002305150032043457, -0.0020439624786376953, -0.0017827749252319336, -0.0015215873718261719, -0.0012603998184204102, -0.0009992122650146484, -0.0007380247116088867, -0.000476837158203125, -0.00021564960479736328, 4.553794860839844e-05, 0.00030672550201416016, 0.0005679130554199219, 0.0008291006088256836, 0.0010902881622314453, 0.001351475715637207, 0.0016126632690429688, 0.0018738508224487305, 0.002135038375854492, 0.002396225929260254, 0.0026574134826660156, 0.0029186010360717773, 0.003179788589477539, 0.0034409761428833008, 0.0037021636962890625, 0.003963351249694824, 0.004224538803100586, 0.004485726356506348, 0.004746913909912109, 0.005008101463317871, 0.005269289016723633, 0.0055304765701293945, 0.005791664123535156, 0.006052851676940918, 0.00631403923034668, 0.006575226783752441, 0.006836414337158203, 0.007097601890563965, 0.0073587894439697266, 0.007619976997375488, 0.00788116455078125]}, "gradients/encoder.encoder.layers.11.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 0.0, 3.0, 1.0, 2.0, 4.0, 2.0, 1.0, 3.0, 6.0, 6.0, 7.0, 7.0, 12.0, 24.0, 30.0, 47.0, 51.0, 72.0, 94.0, 112.0, 108.0, 109.0, 82.0, 60.0, 41.0, 45.0, 21.0, 14.0, 15.0, 9.0, 9.0, 8.0, 2.0, 3.0, 1.0, 1.0, 3.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0046234130859375, -0.004470169544219971, -0.004316926002502441, -0.004163682460784912, -0.004010438919067383, -0.0038571953773498535, -0.0037039518356323242, -0.003550708293914795, -0.0033974647521972656, -0.0032442212104797363, -0.003090977668762207, -0.0029377341270446777, -0.0027844905853271484, -0.002631247043609619, -0.00247800350189209, -0.0023247599601745605, -0.0021715164184570312, -0.002018272876739502, -0.0018650293350219727, -0.0017117857933044434, -0.001558542251586914, -0.0014052987098693848, -0.0012520551681518555, -0.0010988116264343262, -0.0009455680847167969, -0.0007923245429992676, -0.0006390810012817383, -0.000485837459564209, -0.0003325939178466797, -0.0001793503761291504, -2.6106834411621094e-05, 0.0001271367073059082, 0.0002803802490234375, 0.0004336237907409668, 0.0005868673324584961, 0.0007401108741760254, 0.0008933544158935547, 0.001046597957611084, 0.0011998414993286133, 0.0013530850410461426, 0.0015063285827636719, 0.0016595721244812012, 0.0018128156661987305, 0.0019660592079162598, 0.002119302749633789, 0.0022725462913513184, 0.0024257898330688477, 0.002579033374786377, 0.0027322769165039062, 0.0028855204582214355, 0.003038763999938965, 0.003192007541656494, 0.0033452510833740234, 0.0034984946250915527, 0.003651738166809082, 0.0038049817085266113, 0.003958225250244141, 0.00411146879196167, 0.004264712333679199, 0.0044179558753967285, 0.004571199417114258, 0.004724442958831787, 0.004877686500549316, 0.005030930042266846, 0.005184173583984375]}, "gradients/encoder.encoder.layers.11.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 11.0, 142.0, 818.0, 41.0, 8.0], "bins": [-15.299330711364746, -15.049617767333984, -14.799903869628906, -14.550190925598145, -14.300477981567383, -14.050764083862305, -13.801051139831543, -13.551338195800781, -13.301624298095703, -13.051911354064941, -12.802197456359863, -12.552484512329102, -12.30277156829834, -12.053057670593262, -11.8033447265625, -11.553631782531738, -11.303918838500977, -11.054205894470215, -10.804491996765137, -10.554779052734375, -10.305066108703613, -10.055352210998535, -9.805639266967773, -9.555926322937012, -9.306212425231934, -9.056499481201172, -8.806785583496094, -8.557072639465332, -8.30735969543457, -8.057645797729492, -7.8079328536987305, -7.5582194328308105, -7.308506011962891, -7.058792591094971, -6.809079647064209, -6.559366226196289, -6.309652805328369, -6.059939384460449, -5.8102264404296875, -5.560513019561768, -5.310799598693848, -5.061086177825928, -4.811373233795166, -4.561659812927246, -4.311946392059326, -4.062232971191406, -3.8125200271606445, -3.5628066062927246, -3.313093662261963, -3.063380479812622, -2.813667058944702, -2.5639538764953613, -2.3142404556274414, -2.0645272731781006, -1.8148140907287598, -1.5651007890701294, -1.3153873682022095, -1.065674066543579, -0.8159608244895935, -0.5662475824356079, -0.31653428077697754, -0.06682097911834717, 0.18289220333099365, 0.432605504989624, 0.6823188066482544]}, "gradients/encoder.encoder.layers.11.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 2.0, 2.0, 2.0, 2.0, 1.0, 2.0, 4.0, 10.0, 15.0, 8.0, 13.0, 24.0, 18.0, 18.0, 35.0, 31.0, 30.0, 26.0, 41.0, 40.0, 45.0, 57.0, 65.0, 51.0, 61.0, 46.0, 53.0, 45.0, 37.0, 29.0, 30.0, 27.0, 28.0, 17.0, 15.0, 16.0, 11.0, 10.0, 8.0, 11.0, 13.0, 4.0, 3.0, 2.0, 3.0, 2.0, 1.0, 1.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.5492031574249268, -1.4998905658721924, -1.4505780935287476, -1.4012655019760132, -1.3519530296325684, -1.302640438079834, -1.2533278465270996, -1.2040153741836548, -1.15470290184021, -1.1053903102874756, -1.0560778379440308, -1.0067652463912964, -0.9574527740478516, -0.9081401824951172, -0.8588276505470276, -0.809515118598938, -0.7602025270462036, -0.710889995098114, -0.6615774631500244, -0.61226487159729, -0.5629523992538452, -0.5136398077011108, -0.46432727575302124, -0.41501474380493164, -0.36570221185684204, -0.31638967990875244, -0.26707714796066284, -0.21776458621025085, -0.16845205426216125, -0.11913952231407166, -0.06982696056365967, -0.02051442861557007, 0.028798222541809082, 0.07811076194047928, 0.12742330133914948, 0.17673584818840027, 0.22604838013648987, 0.27536091208457947, 0.32467347383499146, 0.37398600578308105, 0.42329853773117065, 0.47261106967926025, 0.5219236016273499, 0.5712361335754395, 0.6205487251281738, 0.6698611974716187, 0.719173789024353, 0.7684863209724426, 0.8177988529205322, 0.8671113848686218, 0.9164239168167114, 0.9657365083694458, 1.0150489807128906, 1.064361572265625, 1.1136741638183594, 1.1629866361618042, 1.212299108505249, 1.2616117000579834, 1.3109241724014282, 1.3602367639541626, 1.4095492362976074, 1.4588618278503418, 1.5081744194030762, 1.557486891746521, 1.6067994832992554]}, "gradients/encoder.encoder.layers.10.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 3.0, 1.0, 1.0, 3.0, 2.0, 10.0, 6.0, 11.0, 8.0, 11.0, 34.0, 93.0, 116.0, 196.0, 336.0, 4192625.0, 315.0, 204.0, 124.0, 86.0, 50.0, 23.0, 16.0, 9.0, 7.0, 5.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.60546875, -0.5905227661132812, -0.5755767822265625, -0.5606307983398438, -0.545684814453125, -0.5307388305664062, -0.5157928466796875, -0.5008468627929688, -0.48590087890625, -0.47095489501953125, -0.4560089111328125, -0.44106292724609375, -0.426116943359375, -0.41117095947265625, -0.3962249755859375, -0.38127899169921875, -0.3663330078125, -0.35138702392578125, -0.3364410400390625, -0.32149505615234375, -0.306549072265625, -0.29160308837890625, -0.2766571044921875, -0.26171112060546875, -0.24676513671875, -0.23181915283203125, -0.2168731689453125, -0.20192718505859375, -0.186981201171875, -0.17203521728515625, -0.1570892333984375, -0.14214324951171875, -0.127197265625, -0.11225128173828125, -0.0973052978515625, -0.08235931396484375, -0.067413330078125, -0.05246734619140625, -0.0375213623046875, -0.02257537841796875, -0.00762939453125, 0.00731658935546875, 0.0222625732421875, 0.03720855712890625, 0.052154541015625, 0.06710052490234375, 0.0820465087890625, 0.09699249267578125, 0.1119384765625, 0.12688446044921875, 0.1418304443359375, 0.15677642822265625, 0.171722412109375, 0.18666839599609375, 0.2016143798828125, 0.21656036376953125, 0.23150634765625, 0.24645233154296875, 0.2613983154296875, 0.27634429931640625, 0.291290283203125, 0.30623626708984375, 0.3211822509765625, 0.33612823486328125, 0.35107421875]}, "gradients/encoder.encoder.layers.10.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 2.0, 6.0, 4.0, 4.0, 2.0, 6.0, 22.0, 40.0, 58.0, 97.0, 142.0, 177.0, 157.0, 114.0, 61.0, 49.0, 34.0, 18.0, 5.0, 9.0, 3.0, 4.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.2734375, -0.26671600341796875, -0.2599945068359375, -0.25327301025390625, -0.246551513671875, -0.23983001708984375, -0.2331085205078125, -0.22638702392578125, -0.21966552734375, -0.21294403076171875, -0.2062225341796875, -0.19950103759765625, -0.192779541015625, -0.18605804443359375, -0.1793365478515625, -0.17261505126953125, -0.1658935546875, -0.15917205810546875, -0.1524505615234375, -0.14572906494140625, -0.139007568359375, -0.13228607177734375, -0.1255645751953125, -0.11884307861328125, -0.11212158203125, -0.10540008544921875, -0.0986785888671875, -0.09195709228515625, -0.085235595703125, -0.07851409912109375, -0.0717926025390625, -0.06507110595703125, -0.058349609375, -0.05162811279296875, -0.0449066162109375, -0.03818511962890625, -0.031463623046875, -0.02474212646484375, -0.0180206298828125, -0.01129913330078125, -0.00457763671875, 0.00214385986328125, 0.0088653564453125, 0.01558685302734375, 0.022308349609375, 0.02902984619140625, 0.0357513427734375, 0.04247283935546875, 0.0491943359375, 0.05591583251953125, 0.0626373291015625, 0.06935882568359375, 0.076080322265625, 0.08280181884765625, 0.0895233154296875, 0.09624481201171875, 0.10296630859375, 0.10968780517578125, 0.1164093017578125, 0.12313079833984375, 0.129852294921875, 0.13657379150390625, 0.1432952880859375, 0.15001678466796875, 0.15673828125]}, "gradients/encoder.encoder.layers.10.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 3.0, 1.0, 3.0, 1.0, 3.0, 7.0, 20.0, 21.0, 24.0, 41.0, 59.0, 74.0, 99.0, 206.0, 562.0, 4192348.0, 349.0, 146.0, 81.0, 59.0, 55.0, 56.0, 25.0, 20.0, 14.0, 13.0, 2.0, 4.0, 0.0, 2.0, 0.0, 3.0], "bins": [-0.14599609375, -0.14291048049926758, -0.13982486724853516, -0.13673925399780273, -0.1336536407470703, -0.1305680274963379, -0.12748241424560547, -0.12439680099487305, -0.12131118774414062, -0.1182255744934082, -0.11513996124267578, -0.11205434799194336, -0.10896873474121094, -0.10588312149047852, -0.1027975082397461, -0.09971189498901367, -0.09662628173828125, -0.09354066848754883, -0.0904550552368164, -0.08736944198608398, -0.08428382873535156, -0.08119821548461914, -0.07811260223388672, -0.0750269889831543, -0.07194137573242188, -0.06885576248168945, -0.06577014923095703, -0.06268453598022461, -0.05959892272949219, -0.056513309478759766, -0.053427696228027344, -0.05034208297729492, -0.0472564697265625, -0.04417085647583008, -0.041085243225097656, -0.037999629974365234, -0.03491401672363281, -0.03182840347290039, -0.02874279022216797, -0.025657176971435547, -0.022571563720703125, -0.019485950469970703, -0.01640033721923828, -0.01331472396850586, -0.010229110717773438, -0.007143497467041016, -0.004057884216308594, -0.0009722709655761719, 0.00211334228515625, 0.005198955535888672, 0.008284568786621094, 0.011370182037353516, 0.014455795288085938, 0.01754140853881836, 0.02062702178955078, 0.023712635040283203, 0.026798248291015625, 0.029883861541748047, 0.03296947479248047, 0.03605508804321289, 0.03914070129394531, 0.042226314544677734, 0.045311927795410156, 0.04839754104614258, 0.051483154296875]}, "gradients/encoder.encoder.layers.10.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 4090.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00800323486328125, -0.007353663444519043, -0.006704092025756836, -0.006054520606994629, -0.005404949188232422, -0.004755377769470215, -0.004105806350708008, -0.0034562349319458008, -0.0028066635131835938, -0.0021570920944213867, -0.0015075206756591797, -0.0008579492568969727, -0.00020837783813476562, 0.0004411935806274414, 0.0010907649993896484, 0.0017403364181518555, 0.0023899078369140625, 0.0030394792556762695, 0.0036890506744384766, 0.004338622093200684, 0.004988193511962891, 0.005637764930725098, 0.006287336349487305, 0.006936907768249512, 0.007586479187011719, 0.008236050605773926, 0.008885622024536133, 0.00953519344329834, 0.010184764862060547, 0.010834336280822754, 0.011483907699584961, 0.012133479118347168, 0.012783050537109375, 0.013432621955871582, 0.014082193374633789, 0.014731764793395996, 0.015381336212158203, 0.01603090763092041, 0.016680479049682617, 0.017330050468444824, 0.01797962188720703, 0.01862919330596924, 0.019278764724731445, 0.019928336143493652, 0.02057790756225586, 0.021227478981018066, 0.021877050399780273, 0.02252662181854248, 0.023176193237304688, 0.023825764656066895, 0.0244753360748291, 0.02512490749359131, 0.025774478912353516, 0.026424050331115723, 0.02707362174987793, 0.027723193168640137, 0.028372764587402344, 0.02902233600616455, 0.029671907424926758, 0.030321478843688965, 0.030971050262451172, 0.03162062168121338, 0.032270193099975586, 0.03291976451873779, 0.0335693359375]}, "gradients/encoder.encoder.layers.10.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 4.0, 27.0, 947.0, 30.0, 10.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0270156841725111, -0.022794904187321663, -0.018574122339487076, -0.014353342354297638, -0.0101325623691082, -0.005911782383918762, -0.0016910005360841751, 0.0025297794491052628, 0.006750559434294701, 0.010971339419484138, 0.015192120335996151, 0.019412901252508163, 0.0236336812376976, 0.02785446122288704, 0.03207524120807648, 0.03629602491855621, 0.04051680117845535, 0.04473758116364479, 0.04895836114883423, 0.053179144859313965, 0.057399921119213104, 0.06162070482969284, 0.06584148108959198, 0.07006226480007172, 0.07428304851055145, 0.07850383222103119, 0.08272460848093033, 0.08694539219141006, 0.0911661684513092, 0.09538695216178894, 0.09960773587226868, 0.10382851213216782, 0.10804930329322815, 0.11227008700370789, 0.11649086326360703, 0.12071164697408676, 0.1249324232339859, 0.12915320694446564, 0.13337399065494537, 0.13759475946426392, 0.14181554317474365, 0.1460363268852234, 0.15025711059570312, 0.15447787940502167, 0.1586986631155014, 0.16291944682598114, 0.16714023053646088, 0.17136099934577942, 0.17558178305625916, 0.1798025667667389, 0.18402335047721863, 0.18824411928653717, 0.1924649029970169, 0.19668568670749664, 0.20090647041797638, 0.20512723922729492, 0.20934803783893585, 0.2135688215494156, 0.21778960525989532, 0.22201037406921387, 0.2262311577796936, 0.23045194149017334, 0.23467272520065308, 0.2388935089111328, 0.24311427772045135]}, "gradients/encoder.encoder.layers.10.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 1.0, 2.0, 0.0, 3.0, 8.0, 14.0, 22.0, 41.0, 84.0, 119.0, 133.0, 149.0, 142.0, 98.0, 82.0, 51.0, 28.0, 18.0, 6.0, 3.0, 7.0, 3.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.019415318965911865, -0.018163617700338364, -0.016911914572119713, -0.015660211443901062, -0.01440851017832756, -0.013156807981431484, -0.011905105784535408, -0.010653403587639332, -0.009401701390743256, -0.00814999919384718, -0.006898296996951103, -0.005646594800055027, -0.004394892603158951, -0.0031431904062628746, -0.0018914882093667984, -0.0006397860124707222, 0.000611916184425354, 0.0018636183813214302, 0.0031153205782175064, 0.004367022775113583, 0.005618724972009659, 0.006870427168905735, 0.008122129365801811, 0.009373831562697887, 0.010625533759593964, 0.01187723595649004, 0.013128938153386116, 0.014380640350282192, 0.01563234254717827, 0.01688404381275177, 0.01813574694097042, 0.01938745006918907, 0.020639151334762573, 0.021890852600336075, 0.023142555728554726, 0.024394258856773376, 0.025645960122346878, 0.02689766138792038, 0.02814936451613903, 0.02940106764435768, 0.030652768909931183, 0.031904470175504684, 0.033156171441078186, 0.034407876431941986, 0.03565957769751549, 0.03691127896308899, 0.03816298395395279, 0.03941468521952629, 0.04066638648509979, 0.041918087750673294, 0.043169789016246796, 0.044421494007110596, 0.0456731952726841, 0.0469248965382576, 0.0481766015291214, 0.0494283027946949, 0.0506800040602684, 0.051931705325841904, 0.053183406591415405, 0.054435111582279205, 0.05568681284785271, 0.05693851411342621, 0.05819021910429001, 0.05944192036986351, 0.06069362163543701]}, "gradients/encoder.encoder.layers.10.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 3.0, 2.0, 9.0, 4.0, 11.0, 6.0, 17.0, 27.0, 24.0, 38.0, 70.0, 98.0, 195.0, 328.0, 724.0, 2022.0, 6987.0, 43155.0, 831386.0, 143467.0, 14628.0, 3206.0, 1138.0, 453.0, 217.0, 131.0, 73.0, 38.0, 25.0, 24.0, 9.0, 13.0, 14.0, 7.0, 7.0, 1.0, 2.0, 0.0, 3.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.401611328125, -0.38970184326171875, -0.3777923583984375, -0.36588287353515625, -0.353973388671875, -0.34206390380859375, -0.3301544189453125, -0.31824493408203125, -0.30633544921875, -0.29442596435546875, -0.2825164794921875, -0.27060699462890625, -0.258697509765625, -0.24678802490234375, -0.2348785400390625, -0.22296905517578125, -0.2110595703125, -0.19915008544921875, -0.1872406005859375, -0.17533111572265625, -0.163421630859375, -0.15151214599609375, -0.1396026611328125, -0.12769317626953125, -0.11578369140625, -0.10387420654296875, -0.0919647216796875, -0.08005523681640625, -0.068145751953125, -0.05623626708984375, -0.0443267822265625, -0.03241729736328125, -0.0205078125, -0.00859832763671875, 0.0033111572265625, 0.01522064208984375, 0.027130126953125, 0.03903961181640625, 0.0509490966796875, 0.06285858154296875, 0.07476806640625, 0.08667755126953125, 0.0985870361328125, 0.11049652099609375, 0.122406005859375, 0.13431549072265625, 0.1462249755859375, 0.15813446044921875, 0.1700439453125, 0.18195343017578125, 0.1938629150390625, 0.20577239990234375, 0.217681884765625, 0.22959136962890625, 0.2415008544921875, 0.25341033935546875, 0.26531982421875, 0.27722930908203125, 0.2891387939453125, 0.30104827880859375, 0.312957763671875, 0.32486724853515625, 0.3367767333984375, 0.34868621826171875, 0.360595703125]}, "gradients/encoder.encoder.layers.10.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 2.0, 6.0, 4.0, 4.0, 2.0, 6.0, 23.0, 40.0, 57.0, 95.0, 142.0, 180.0, 156.0, 113.0, 63.0, 48.0, 35.0, 17.0, 4.0, 10.0, 3.0, 4.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.2734375, -0.26671600341796875, -0.2599945068359375, -0.25327301025390625, -0.246551513671875, -0.23983001708984375, -0.2331085205078125, -0.22638702392578125, -0.21966552734375, -0.21294403076171875, -0.2062225341796875, -0.19950103759765625, -0.192779541015625, -0.18605804443359375, -0.1793365478515625, -0.17261505126953125, -0.1658935546875, -0.15917205810546875, -0.1524505615234375, -0.14572906494140625, -0.139007568359375, -0.13228607177734375, -0.1255645751953125, -0.11884307861328125, -0.11212158203125, -0.10540008544921875, -0.0986785888671875, -0.09195709228515625, -0.085235595703125, -0.07851409912109375, -0.0717926025390625, -0.06507110595703125, -0.058349609375, -0.05162811279296875, -0.0449066162109375, -0.03818511962890625, -0.031463623046875, -0.02474212646484375, -0.0180206298828125, -0.01129913330078125, -0.00457763671875, 0.00214385986328125, 0.0088653564453125, 0.01558685302734375, 0.022308349609375, 0.02902984619140625, 0.0357513427734375, 0.04247283935546875, 0.0491943359375, 0.05591583251953125, 0.0626373291015625, 0.06935882568359375, 0.076080322265625, 0.08280181884765625, 0.0895233154296875, 0.09624481201171875, 0.10296630859375, 0.10968780517578125, 0.1164093017578125, 0.12313079833984375, 0.129852294921875, 0.13657379150390625, 0.1432952880859375, 0.15001678466796875, 0.15673828125]}, "gradients/encoder.encoder.layers.10.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 3.0, 2.0, 3.0, 2.0, 6.0, 3.0, 5.0, 13.0, 15.0, 12.0, 15.0, 14.0, 21.0, 29.0, 28.0, 29.0, 47.0, 51.0, 93.0, 177.0, 489.0, 2068.0, 21115.0, 918856.0, 99308.0, 4582.0, 877.0, 260.0, 115.0, 53.0, 43.0, 40.0, 39.0, 24.0, 19.0, 18.0, 16.0, 19.0, 8.0, 11.0, 11.0, 12.0, 5.0, 0.0, 1.0, 2.0, 1.0, 4.0, 1.0, 2.0, 1.0, 0.0, 0.0, 3.0], "bins": [-0.490966796875, -0.47640228271484375, -0.4618377685546875, -0.44727325439453125, -0.432708740234375, -0.41814422607421875, -0.4035797119140625, -0.38901519775390625, -0.37445068359375, -0.35988616943359375, -0.3453216552734375, -0.33075714111328125, -0.316192626953125, -0.30162811279296875, -0.2870635986328125, -0.27249908447265625, -0.2579345703125, -0.24337005615234375, -0.2288055419921875, -0.21424102783203125, -0.199676513671875, -0.18511199951171875, -0.1705474853515625, -0.15598297119140625, -0.14141845703125, -0.12685394287109375, -0.1122894287109375, -0.09772491455078125, -0.083160400390625, -0.06859588623046875, -0.0540313720703125, -0.03946685791015625, -0.02490234375, -0.01033782958984375, 0.0042266845703125, 0.01879119873046875, 0.033355712890625, 0.04792022705078125, 0.0624847412109375, 0.07704925537109375, 0.09161376953125, 0.10617828369140625, 0.1207427978515625, 0.13530731201171875, 0.149871826171875, 0.16443634033203125, 0.1790008544921875, 0.19356536865234375, 0.2081298828125, 0.22269439697265625, 0.2372589111328125, 0.25182342529296875, 0.266387939453125, 0.28095245361328125, 0.2955169677734375, 0.31008148193359375, 0.32464599609375, 0.33921051025390625, 0.3537750244140625, 0.36833953857421875, 0.382904052734375, 0.39746856689453125, 0.4120330810546875, 0.42659759521484375, 0.441162109375]}, "gradients/encoder.encoder.layers.10.attention.v_proj.bias": {"_type": "histogram", "values": [3.0, 0.0, 0.0, 1.0, 2.0, 1.0, 4.0, 1.0, 2.0, 1.0, 0.0, 6.0, 11.0, 11.0, 11.0, 9.0, 19.0, 15.0, 18.0, 18.0, 24.0, 38.0, 35.0, 33.0, 27.0, 33.0, 39.0, 47.0, 46.0, 50.0, 53.0, 39.0, 49.0, 33.0, 36.0, 38.0, 34.0, 38.0, 26.0, 25.0, 29.0, 21.0, 14.0, 15.0, 12.0, 15.0, 13.0, 5.0, 3.0, 6.0, 2.0, 3.0, 2.0, 3.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.267578125, -0.2587394714355469, -0.24990081787109375, -0.24106216430664062, -0.2322235107421875, -0.22338485717773438, -0.21454620361328125, -0.20570755004882812, -0.196868896484375, -0.18803024291992188, -0.17919158935546875, -0.17035293579101562, -0.1615142822265625, -0.15267562866210938, -0.14383697509765625, -0.13499832153320312, -0.12615966796875, -0.11732101440429688, -0.10848236083984375, -0.09964370727539062, -0.0908050537109375, -0.08196640014648438, -0.07312774658203125, -0.06428909301757812, -0.055450439453125, -0.046611785888671875, -0.03777313232421875, -0.028934478759765625, -0.0200958251953125, -0.011257171630859375, -0.00241851806640625, 0.006420135498046875, 0.0152587890625, 0.024097442626953125, 0.03293609619140625, 0.041774749755859375, 0.0506134033203125, 0.059452056884765625, 0.06829071044921875, 0.07712936401367188, 0.085968017578125, 0.09480667114257812, 0.10364532470703125, 0.11248397827148438, 0.1213226318359375, 0.13016128540039062, 0.13899993896484375, 0.14783859252929688, 0.15667724609375, 0.16551589965820312, 0.17435455322265625, 0.18319320678710938, 0.1920318603515625, 0.20087051391601562, 0.20970916748046875, 0.21854782104492188, 0.227386474609375, 0.23622512817382812, 0.24506378173828125, 0.2539024353027344, 0.2627410888671875, 0.2715797424316406, 0.28041839599609375, 0.2892570495605469, 0.298095703125]}, "gradients/encoder.encoder.layers.10.attention.k_proj.weight": {"_type": "histogram", "values": [3.0, 2.0, 1.0, 1.0, 2.0, 2.0, 5.0, 5.0, 7.0, 8.0, 18.0, 22.0, 30.0, 49.0, 73.0, 135.0, 319.0, 898.0, 3087.0, 16857.0, 391094.0, 612051.0, 19003.0, 3230.0, 881.0, 345.0, 178.0, 91.0, 44.0, 35.0, 24.0, 17.0, 10.0, 8.0, 17.0, 2.0, 4.0, 2.0, 2.0, 2.0, 2.0, 2.0, 1.0, 2.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00316619873046875, -0.0030156373977661133, -0.0028650760650634766, -0.00271451473236084, -0.002563953399658203, -0.0024133920669555664, -0.0022628307342529297, -0.002112269401550293, -0.0019617080688476562, -0.0018111467361450195, -0.0016605854034423828, -0.001510024070739746, -0.0013594627380371094, -0.0012089014053344727, -0.001058340072631836, -0.0009077787399291992, -0.0007572174072265625, -0.0006066560745239258, -0.00045609474182128906, -0.00030553340911865234, -0.00015497207641601562, -4.410743713378906e-06, 0.0001461505889892578, 0.00029671192169189453, 0.00044727325439453125, 0.000597834587097168, 0.0007483959197998047, 0.0008989572525024414, 0.0010495185852050781, 0.0012000799179077148, 0.0013506412506103516, 0.0015012025833129883, 0.001651763916015625, 0.0018023252487182617, 0.0019528865814208984, 0.002103447914123535, 0.002254009246826172, 0.0024045705795288086, 0.0025551319122314453, 0.002705693244934082, 0.0028562545776367188, 0.0030068159103393555, 0.003157377243041992, 0.003307938575744629, 0.0034584999084472656, 0.0036090612411499023, 0.003759622573852539, 0.003910183906555176, 0.0040607452392578125, 0.004211306571960449, 0.004361867904663086, 0.004512429237365723, 0.004662990570068359, 0.004813551902770996, 0.004964113235473633, 0.0051146745681762695, 0.005265235900878906, 0.005415797233581543, 0.00556635856628418, 0.005716919898986816, 0.005867481231689453, 0.00601804256439209, 0.0061686038970947266, 0.006319165229797363, 0.0064697265625]}, "gradients/encoder.encoder.layers.10.attention.k_proj.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 0.0, 0.0, 2.0, 2.0, 2.0, 2.0, 4.0, 2.0, 4.0, 5.0, 5.0, 5.0, 8.0, 17.0, 9.0, 12.0, 14.0, 23.0, 26.0, 25.0, 18.0, 28.0, 26.0, 27.0, 42.0, 29.0, 46.0, 25.0, 36.0, 96.0, 54.0, 44.0, 39.0, 37.0, 40.0, 30.0, 29.0, 24.0, 26.0, 26.0, 27.0, 17.0, 19.0, 8.0, 15.0, 8.0, 5.0, 6.0, 7.0, 1.0, 5.0, 2.0, 4.0, 1.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 2.0], "bins": [-1.9669532775878906e-06, -1.905485987663269e-06, -1.8440186977386475e-06, -1.7825514078140259e-06, -1.7210841178894043e-06, -1.6596168279647827e-06, -1.5981495380401611e-06, -1.5366822481155396e-06, -1.475214958190918e-06, -1.4137476682662964e-06, -1.3522803783416748e-06, -1.2908130884170532e-06, -1.2293457984924316e-06, -1.16787850856781e-06, -1.1064112186431885e-06, -1.044943928718567e-06, -9.834766387939453e-07, -9.220093488693237e-07, -8.605420589447021e-07, -7.990747690200806e-07, -7.37607479095459e-07, -6.761401891708374e-07, -6.146728992462158e-07, -5.532056093215942e-07, -4.917383193969727e-07, -4.302710294723511e-07, -3.688037395477295e-07, -3.073364496231079e-07, -2.4586915969848633e-07, -1.8440186977386475e-07, -1.2293457984924316e-07, -6.146728992462158e-08, 0.0, 6.146728992462158e-08, 1.2293457984924316e-07, 1.8440186977386475e-07, 2.4586915969848633e-07, 3.073364496231079e-07, 3.688037395477295e-07, 4.302710294723511e-07, 4.917383193969727e-07, 5.532056093215942e-07, 6.146728992462158e-07, 6.761401891708374e-07, 7.37607479095459e-07, 7.990747690200806e-07, 8.605420589447021e-07, 9.220093488693237e-07, 9.834766387939453e-07, 1.044943928718567e-06, 1.1064112186431885e-06, 1.16787850856781e-06, 1.2293457984924316e-06, 1.2908130884170532e-06, 1.3522803783416748e-06, 1.4137476682662964e-06, 1.475214958190918e-06, 1.5366822481155396e-06, 1.5981495380401611e-06, 1.6596168279647827e-06, 1.7210841178894043e-06, 1.7825514078140259e-06, 1.8440186977386475e-06, 1.905485987663269e-06, 1.9669532775878906e-06]}, "gradients/encoder.encoder.layers.10.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 4.0, 1.0, 6.0, 1.0, 1.0, 1.0, 2.0, 3.0, 8.0, 11.0, 5.0, 12.0, 14.0, 29.0, 30.0, 56.0, 86.0, 182.0, 622.0, 4586.0, 780127.0, 258473.0, 3365.0, 491.0, 177.0, 93.0, 51.0, 25.0, 22.0, 22.0, 17.0, 10.0, 10.0, 8.0, 3.0, 3.0, 3.0, 2.0, 2.0, 2.0, 3.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.00881195068359375, -0.008536100387573242, -0.008260250091552734, -0.007984399795532227, -0.007708549499511719, -0.007432699203491211, -0.007156848907470703, -0.006880998611450195, -0.0066051483154296875, -0.00632929801940918, -0.006053447723388672, -0.005777597427368164, -0.005501747131347656, -0.0052258968353271484, -0.004950046539306641, -0.004674196243286133, -0.004398345947265625, -0.004122495651245117, -0.0038466453552246094, -0.0035707950592041016, -0.0032949447631835938, -0.003019094467163086, -0.002743244171142578, -0.0024673938751220703, -0.0021915435791015625, -0.0019156932830810547, -0.0016398429870605469, -0.001363992691040039, -0.0010881423950195312, -0.0008122920989990234, -0.0005364418029785156, -0.0002605915069580078, 1.52587890625e-05, 0.0002911090850830078, 0.0005669593811035156, 0.0008428096771240234, 0.0011186599731445312, 0.001394510269165039, 0.0016703605651855469, 0.0019462108612060547, 0.0022220611572265625, 0.0024979114532470703, 0.002773761749267578, 0.003049612045288086, 0.0033254623413085938, 0.0036013126373291016, 0.0038771629333496094, 0.004153013229370117, 0.004428863525390625, 0.004704713821411133, 0.004980564117431641, 0.0052564144134521484, 0.005532264709472656, 0.005808115005493164, 0.006083965301513672, 0.00635981559753418, 0.0066356658935546875, 0.006911516189575195, 0.007187366485595703, 0.007463216781616211, 0.007739067077636719, 0.008014917373657227, 0.008290767669677734, 0.008566617965698242, 0.00884246826171875]}, "gradients/encoder.encoder.layers.10.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 3.0, 2.0, 2.0, 2.0, 3.0, 4.0, 2.0, 8.0, 10.0, 11.0, 15.0, 22.0, 23.0, 18.0, 36.0, 58.0, 71.0, 55.0, 89.0, 83.0, 95.0, 79.0, 74.0, 52.0, 47.0, 39.0, 22.0, 26.0, 13.0, 10.0, 5.0, 11.0, 6.0, 5.0, 2.0, 2.0, 0.0, 2.0, 5.0, 2.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.005367279052734375, -0.0051996707916259766, -0.005032062530517578, -0.00486445426940918, -0.004696846008300781, -0.004529237747192383, -0.004361629486083984, -0.004194021224975586, -0.0040264129638671875, -0.003858804702758789, -0.0036911964416503906, -0.003523588180541992, -0.0033559799194335938, -0.0031883716583251953, -0.003020763397216797, -0.0028531551361083984, -0.002685546875, -0.0025179386138916016, -0.002350330352783203, -0.0021827220916748047, -0.0020151138305664062, -0.0018475055694580078, -0.0016798973083496094, -0.001512289047241211, -0.0013446807861328125, -0.001177072525024414, -0.0010094642639160156, -0.0008418560028076172, -0.0006742477416992188, -0.0005066394805908203, -0.0003390312194824219, -0.00017142295837402344, -3.814697265625e-06, 0.00016379356384277344, 0.0003314018249511719, 0.0004990100860595703, 0.0006666183471679688, 0.0008342266082763672, 0.0010018348693847656, 0.001169443130493164, 0.0013370513916015625, 0.001504659652709961, 0.0016722679138183594, 0.0018398761749267578, 0.0020074844360351562, 0.0021750926971435547, 0.002342700958251953, 0.0025103092193603516, 0.00267791748046875, 0.0028455257415771484, 0.003013134002685547, 0.0031807422637939453, 0.0033483505249023438, 0.003515958786010742, 0.0036835670471191406, 0.003851175308227539, 0.0040187835693359375, 0.004186391830444336, 0.004354000091552734, 0.004521608352661133, 0.004689216613769531, 0.00485682487487793, 0.005024433135986328, 0.0051920413970947266, 0.005359649658203125]}, "gradients/encoder.encoder.layers.10.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 174.0, 826.0, 17.0, 0.0, 3.0], "bins": [-25.381790161132812, -24.95965576171875, -24.537519454956055, -24.11538314819336, -23.693248748779297, -23.271114349365234, -22.84897804260254, -22.426841735839844, -22.00470733642578, -21.58257293701172, -21.160436630249023, -20.738300323486328, -20.316165924072266, -19.894031524658203, -19.471895217895508, -19.049758911132812, -18.62762451171875, -18.205490112304688, -17.783353805541992, -17.361217498779297, -16.939083099365234, -16.516948699951172, -16.094812393188477, -15.672677040100098, -15.250541687011719, -14.82840633392334, -14.406270980834961, -13.984135627746582, -13.562000274658203, -13.139864921569824, -12.717729568481445, -12.295594215393066, -11.873459815979004, -11.451324462890625, -11.029189109802246, -10.607053756713867, -10.184918403625488, -9.76278305053711, -9.34064769744873, -8.918512344360352, -8.496376991271973, -8.074241638183594, -7.652106285095215, -7.229970932006836, -6.807835578918457, -6.385700225830078, -5.963564872741699, -5.54142951965332, -5.119294166564941, -4.6971588134765625, -4.275023460388184, -3.8528881072998047, -3.430752754211426, -3.008617401123047, -2.586482048034668, -2.164346694946289, -1.7422113418579102, -1.3200759887695312, -0.8979406356811523, -0.47580528259277344, -0.05366992950439453, 0.3684654235839844, 0.7906007766723633, 1.2127361297607422, 1.634871482849121]}, "gradients/encoder.encoder.layers.10.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 4.0, 5.0, 8.0, 2.0, 8.0, 9.0, 8.0, 12.0, 10.0, 14.0, 20.0, 24.0, 17.0, 28.0, 26.0, 31.0, 36.0, 44.0, 44.0, 44.0, 51.0, 39.0, 49.0, 53.0, 47.0, 42.0, 38.0, 29.0, 45.0, 32.0, 37.0, 22.0, 21.0, 17.0, 21.0, 16.0, 13.0, 4.0, 3.0, 11.0, 10.0, 7.0, 6.0, 3.0, 2.0, 2.0, 0.0, 1.0, 4.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.370889663696289, -1.3299832344055176, -1.2890769243240356, -1.2481706142425537, -1.2072641849517822, -1.1663577556610107, -1.1254514455795288, -1.0845451354980469, -1.0436387062072754, -1.002732276916504, -0.961825966835022, -0.9209195971488953, -0.8800132274627686, -0.8391068577766418, -0.7982004880905151, -0.7572941184043884, -0.7163877487182617, -0.675481379032135, -0.6345750093460083, -0.5936686396598816, -0.5527622699737549, -0.5118559002876282, -0.47094953060150146, -0.43004316091537476, -0.38913679122924805, -0.34823042154312134, -0.30732405185699463, -0.2664176821708679, -0.2255113124847412, -0.1846049427986145, -0.1436985731124878, -0.10279220342636108, -0.061885952949523926, -0.020979583263397217, 0.019926786422729492, 0.0608331561088562, 0.10173952579498291, 0.14264589548110962, 0.18355226516723633, 0.22445863485336304, 0.26536500453948975, 0.30627137422561646, 0.34717774391174316, 0.3880841135978699, 0.4289904832839966, 0.4698968529701233, 0.51080322265625, 0.5517095923423767, 0.5926159620285034, 0.6335223317146301, 0.6744287014007568, 0.7153350710868835, 0.7562414407730103, 0.797147810459137, 0.8380541801452637, 0.8789605498313904, 0.9198669195175171, 0.9607732892036438, 1.0016796588897705, 1.042586088180542, 1.083492398262024, 1.1243987083435059, 1.1653051376342773, 1.2062115669250488, 1.2471178770065308]}, "gradients/encoder.encoder.layers.9.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 1.0, 3.0, 14.0, 10.0, 13.0, 47.0, 68.0, 151.0, 363.0, 1605.0, 4189915.0, 1432.0, 347.0, 148.0, 79.0, 46.0, 21.0, 7.0, 7.0, 5.0, 5.0, 4.0, 2.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0026493072509765625, -0.0025365054607391357, -0.002423703670501709, -0.0023109018802642822, -0.0021981000900268555, -0.0020852982997894287, -0.001972496509552002, -0.0018596947193145752, -0.0017468929290771484, -0.0016340911388397217, -0.001521289348602295, -0.0014084875583648682, -0.0012956857681274414, -0.0011828839778900146, -0.0010700821876525879, -0.0009572803974151611, -0.0008444786071777344, -0.0007316768169403076, -0.0006188750267028809, -0.0005060732364654541, -0.00039327144622802734, -0.0002804696559906006, -0.00016766786575317383, -5.486607551574707e-05, 5.793571472167969e-05, 0.00017073750495910645, 0.0002835392951965332, 0.00039634108543395996, 0.0005091428756713867, 0.0006219446659088135, 0.0007347464561462402, 0.000847548246383667, 0.0009603500366210938, 0.0010731518268585205, 0.0011859536170959473, 0.001298755407333374, 0.0014115571975708008, 0.0015243589878082275, 0.0016371607780456543, 0.001749962568283081, 0.0018627643585205078, 0.0019755661487579346, 0.0020883679389953613, 0.002201169729232788, 0.002313971519470215, 0.0024267733097076416, 0.0025395750999450684, 0.002652376890182495, 0.002765178680419922, 0.0028779804706573486, 0.0029907822608947754, 0.003103584051132202, 0.003216385841369629, 0.0033291876316070557, 0.0034419894218444824, 0.003554791212081909, 0.003667593002319336, 0.0037803947925567627, 0.0038931965827941895, 0.004005998373031616, 0.004118800163269043, 0.00423160195350647, 0.0043444037437438965, 0.004457205533981323, 0.00457000732421875]}, "gradients/encoder.encoder.layers.9.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 3.0, 4.0, 7.0, 3.0, 2.0, 8.0, 20.0, 50.0, 57.0, 109.0, 151.0, 167.0, 150.0, 108.0, 65.0, 49.0, 28.0, 15.0, 5.0, 7.0, 2.0, 4.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.2705078125, -0.26383399963378906, -0.2571601867675781, -0.2504863739013672, -0.24381256103515625, -0.2371387481689453, -0.23046493530273438, -0.22379112243652344, -0.2171173095703125, -0.21044349670410156, -0.20376968383789062, -0.1970958709716797, -0.19042205810546875, -0.1837482452392578, -0.17707443237304688, -0.17040061950683594, -0.163726806640625, -0.15705299377441406, -0.15037918090820312, -0.1437053680419922, -0.13703155517578125, -0.1303577423095703, -0.12368392944335938, -0.11701011657714844, -0.1103363037109375, -0.10366249084472656, -0.09698867797851562, -0.09031486511230469, -0.08364105224609375, -0.07696723937988281, -0.07029342651367188, -0.06361961364746094, -0.05694580078125, -0.05027198791503906, -0.043598175048828125, -0.03692436218261719, -0.03025054931640625, -0.023576736450195312, -0.016902923583984375, -0.010229110717773438, -0.0035552978515625, 0.0031185150146484375, 0.009792327880859375, 0.016466140747070312, 0.02313995361328125, 0.029813766479492188, 0.036487579345703125, 0.04316139221191406, 0.049835205078125, 0.05650901794433594, 0.06318283081054688, 0.06985664367675781, 0.07653045654296875, 0.08320426940917969, 0.08987808227539062, 0.09655189514160156, 0.1032257080078125, 0.10989952087402344, 0.11657333374023438, 0.12324714660644531, 0.12992095947265625, 0.1365947723388672, 0.14326858520507812, 0.14994239807128906, 0.1566162109375]}, "gradients/encoder.encoder.layers.9.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 4.0, 2.0, 2.0, 6.0, 5.0, 3.0, 3.0, 11.0, 13.0, 22.0, 16.0, 30.0, 48.0, 59.0, 84.0, 106.0, 137.0, 209.0, 309.0, 524.0, 972.0, 2198.0, 4184674.0, 2287.0, 967.0, 469.0, 313.0, 190.0, 156.0, 116.0, 92.0, 60.0, 59.0, 35.0, 27.0, 25.0, 19.0, 14.0, 9.0, 6.0, 8.0, 3.0, 1.0, 1.0, 1.0, 0.0, 2.0, 2.0, 0.0, 1.0, 2.0], "bins": [-0.00284576416015625, -0.0027656853199005127, -0.0026856064796447754, -0.002605527639389038, -0.0025254487991333008, -0.0024453699588775635, -0.002365291118621826, -0.002285212278366089, -0.0022051334381103516, -0.0021250545978546143, -0.002044975757598877, -0.0019648969173431396, -0.0018848180770874023, -0.001804739236831665, -0.0017246603965759277, -0.0016445815563201904, -0.0015645027160644531, -0.0014844238758087158, -0.0014043450355529785, -0.0013242661952972412, -0.001244187355041504, -0.0011641085147857666, -0.0010840296745300293, -0.001003950834274292, -0.0009238719940185547, -0.0008437931537628174, -0.0007637143135070801, -0.0006836354732513428, -0.0006035566329956055, -0.0005234777927398682, -0.00044339895248413086, -0.00036332011222839355, -0.00028324127197265625, -0.00020316243171691895, -0.00012308359146118164, -4.3004751205444336e-05, 3.707408905029297e-05, 0.00011715292930603027, 0.00019723176956176758, 0.0002773106098175049, 0.0003573894500732422, 0.0004374682903289795, 0.0005175471305847168, 0.0005976259708404541, 0.0006777048110961914, 0.0007577836513519287, 0.000837862491607666, 0.0009179413318634033, 0.0009980201721191406, 0.001078099012374878, 0.0011581778526306152, 0.0012382566928863525, 0.0013183355331420898, 0.0013984143733978271, 0.0014784932136535645, 0.0015585720539093018, 0.001638650894165039, 0.0017187297344207764, 0.0017988085746765137, 0.001878887414932251, 0.0019589662551879883, 0.0020390450954437256, 0.002119123935699463, 0.0021992027759552, 0.0022792816162109375]}, "gradients/encoder.encoder.layers.9.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 3.0, 1.0, 0.0, 0.0, 1.0, 1.0, 8.0, 4070.0, 4.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0007262229919433594, -0.0006937012076377869, -0.0006611794233322144, -0.0006286576390266418, -0.0005961358547210693, -0.0005636140704154968, -0.0005310922861099243, -0.0004985705018043518, -0.0004660487174987793, -0.0004335269331932068, -0.0004010051488876343, -0.00036848336458206177, -0.00033596158027648926, -0.00030343979597091675, -0.00027091801166534424, -0.00023839622735977173, -0.00020587444305419922, -0.0001733526587486267, -0.0001408308744430542, -0.00010830909013748169, -7.578730583190918e-05, -4.326552152633667e-05, -1.074373722076416e-05, 2.177804708480835e-05, 5.429983139038086e-05, 8.682161569595337e-05, 0.00011934340000152588, 0.0001518651843070984, 0.0001843869686126709, 0.0002169087529182434, 0.0002494305372238159, 0.00028195232152938843, 0.00031447410583496094, 0.00034699589014053345, 0.00037951767444610596, 0.00041203945875167847, 0.000444561243057251, 0.0004770830273628235, 0.000509604811668396, 0.0005421265959739685, 0.000574648380279541, 0.0006071701645851135, 0.000639691948890686, 0.0006722137331962585, 0.0007047355175018311, 0.0007372573018074036, 0.0007697790861129761, 0.0008023008704185486, 0.0008348226547241211, 0.0008673444390296936, 0.0008998662233352661, 0.0009323880076408386, 0.0009649097919464111, 0.0009974315762519836, 0.0010299533605575562, 0.0010624751448631287, 0.0010949969291687012, 0.0011275187134742737, 0.0011600404977798462, 0.0011925622820854187, 0.0012250840663909912, 0.0012576058506965637, 0.0012901276350021362, 0.0013226494193077087, 0.0013551712036132812]}, "gradients/encoder.encoder.layers.9.final_layer_norm.weight": {"_type": "histogram", "values": [7.0, 90.0, 784.0, 127.0, 12.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00048796393093653023, -0.000296044338028878, -0.00010412477422505617, 8.779481868259609e-05, 0.0002797143824864179, 0.0004716339462902397, 0.0006635535974055529, 0.0008554731030017138, 0.0010473927250131965, 0.0012393123470246792, 0.00143123185262084, 0.0016231514746323228, 0.0018150710966438055, 0.0020069906022399664, 0.002198910340666771, 0.002390829846262932, 0.0025827493518590927, 0.0027746688574552536, 0.002966588595882058, 0.003158508101478219, 0.00335042760707438, 0.0035423473455011845, 0.0037342668510973454, 0.003926186356693506, 0.004118105862289667, 0.004310025367885828, 0.004501944873481989, 0.004693864844739437, 0.004885784350335598, 0.005077703855931759, 0.00526962336152792, 0.005461542867124081, 0.005653462838381529, 0.00584538234397769, 0.006037301849573851, 0.006229221820831299, 0.00642114132642746, 0.006613060832023621, 0.0068049803376197815, 0.006996899843215942, 0.007188819348812103, 0.007380738854408264, 0.007572658360004425, 0.007764577865600586, 0.007956497371196747, 0.008148416876792908, 0.008340336382389069, 0.00853225588798523, 0.008724176324903965, 0.008916095830500126, 0.009108015336096287, 0.009299934841692448, 0.009491854347288609, 0.00968377385288477, 0.009875694289803505, 0.010067613795399666, 0.010259532369673252, 0.010451451875269413, 0.010643371380865574, 0.010835290886461735, 0.011027210392057896, 0.011219129897654057, 0.011411050334572792, 0.011602969840168953, 0.011794889345765114]}, "gradients/encoder.encoder.layers.9.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 2.0, 3.0, 4.0, 3.0, 3.0, 9.0, 10.0, 11.0, 21.0, 19.0, 30.0, 30.0, 35.0, 25.0, 31.0, 47.0, 53.0, 50.0, 60.0, 50.0, 41.0, 47.0, 53.0, 50.0, 48.0, 48.0, 36.0, 39.0, 36.0, 34.0, 22.0, 18.0, 12.0, 10.0, 8.0, 3.0, 5.0, 4.0, 4.0, 2.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0011696815490722656, -0.0011360328644514084, -0.0011023841798305511, -0.001068735495209694, -0.0010350868105888367, -0.0010014381259679794, -0.0009677894413471222, -0.000934140756726265, -0.0009004920721054077, -0.0008668433874845505, -0.0008331947028636932, -0.000799546018242836, -0.0007658973336219788, -0.0007322486490011215, -0.0006985999643802643, -0.000664951279759407, -0.0006313025951385498, -0.0005976539105176926, -0.0005640052258968353, -0.0005303565412759781, -0.0004967078566551208, -0.0004630591720342636, -0.00042941048741340637, -0.00039576180279254913, -0.0003621131181716919, -0.00032846443355083466, -0.0002948157489299774, -0.0002611670643091202, -0.00022751837968826294, -0.0001938696950674057, -0.00016022101044654846, -0.00012657232582569122, -9.292364120483398e-05, -5.9274956583976746e-05, -2.5626271963119507e-05, 8.022412657737732e-06, 4.167109727859497e-05, 7.531978189945221e-05, 0.00010896846652030945, 0.0001426171511411667, 0.00017626583576202393, 0.00020991452038288116, 0.0002435632050037384, 0.00027721188962459564, 0.0003108605742454529, 0.0003445092588663101, 0.00037815794348716736, 0.0004118066281080246, 0.00044545531272888184, 0.0004791039973497391, 0.0005127526819705963, 0.0005464013665914536, 0.0005800500512123108, 0.000613698735833168, 0.0006473474204540253, 0.0006809961050748825, 0.0007146447896957397, 0.000748293474316597, 0.0007819421589374542, 0.0008155908435583115, 0.0008492395281791687, 0.0008828882128000259, 0.0009165368974208832, 0.0009501855820417404, 0.0009838342666625977]}, "gradients/encoder.encoder.layers.9.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 1.0, 5.0, 6.0, 9.0, 9.0, 10.0, 14.0, 18.0, 15.0, 17.0, 38.0, 39.0, 42.0, 74.0, 157.0, 183.0, 336.0, 744.0, 1635.0, 4180.0, 14885.0, 78290.0, 737828.0, 173003.0, 26321.0, 6373.0, 2214.0, 1002.0, 397.0, 238.0, 140.0, 98.0, 49.0, 52.0, 30.0, 23.0, 27.0, 19.0, 7.0, 9.0, 4.0, 5.0, 8.0, 5.0, 6.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.26611328125, -0.2582130432128906, -0.25031280517578125, -0.24241256713867188, -0.2345123291015625, -0.22661209106445312, -0.21871185302734375, -0.21081161499023438, -0.202911376953125, -0.19501113891601562, -0.18711090087890625, -0.17921066284179688, -0.1713104248046875, -0.16341018676757812, -0.15550994873046875, -0.14760971069335938, -0.13970947265625, -0.13180923461914062, -0.12390899658203125, -0.11600875854492188, -0.1081085205078125, -0.10020828247070312, -0.09230804443359375, -0.08440780639648438, -0.076507568359375, -0.06860733032226562, -0.06070709228515625, -0.052806854248046875, -0.0449066162109375, -0.037006378173828125, -0.02910614013671875, -0.021205902099609375, -0.0133056640625, -0.005405426025390625, 0.00249481201171875, 0.010395050048828125, 0.0182952880859375, 0.026195526123046875, 0.03409576416015625, 0.041996002197265625, 0.049896240234375, 0.057796478271484375, 0.06569671630859375, 0.07359695434570312, 0.0814971923828125, 0.08939743041992188, 0.09729766845703125, 0.10519790649414062, 0.11309814453125, 0.12099838256835938, 0.12889862060546875, 0.13679885864257812, 0.1446990966796875, 0.15259933471679688, 0.16049957275390625, 0.16839981079101562, 0.176300048828125, 0.18420028686523438, 0.19210052490234375, 0.20000076293945312, 0.2079010009765625, 0.21580123901367188, 0.22370147705078125, 0.23160171508789062, 0.239501953125]}, "gradients/encoder.encoder.layers.9.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 3.0, 4.0, 7.0, 2.0, 3.0, 8.0, 20.0, 50.0, 57.0, 109.0, 150.0, 168.0, 151.0, 107.0, 65.0, 49.0, 28.0, 15.0, 5.0, 7.0, 2.0, 4.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.2705078125, -0.26383399963378906, -0.2571601867675781, -0.2504863739013672, -0.24381256103515625, -0.2371387481689453, -0.23046493530273438, -0.22379112243652344, -0.2171173095703125, -0.21044349670410156, -0.20376968383789062, -0.1970958709716797, -0.19042205810546875, -0.1837482452392578, -0.17707443237304688, -0.17040061950683594, -0.163726806640625, -0.15705299377441406, -0.15037918090820312, -0.1437053680419922, -0.13703155517578125, -0.1303577423095703, -0.12368392944335938, -0.11701011657714844, -0.1103363037109375, -0.10366249084472656, -0.09698867797851562, -0.09031486511230469, -0.08364105224609375, -0.07696723937988281, -0.07029342651367188, -0.06361961364746094, -0.05694580078125, -0.05027198791503906, -0.043598175048828125, -0.03692436218261719, -0.03025054931640625, -0.023576736450195312, -0.016902923583984375, -0.010229110717773438, -0.0035552978515625, 0.0031185150146484375, 0.009792327880859375, 0.016466140747070312, 0.02313995361328125, 0.029813766479492188, 0.036487579345703125, 0.04316139221191406, 0.049835205078125, 0.05650901794433594, 0.06318283081054688, 0.06985664367675781, 0.07653045654296875, 0.08320426940917969, 0.08987808227539062, 0.09655189514160156, 0.1032257080078125, 0.10989952087402344, 0.11657333374023438, 0.12324714660644531, 0.12992095947265625, 0.1365947723388672, 0.14326858520507812, 0.14994239807128906, 0.1566162109375]}, "gradients/encoder.encoder.layers.9.attention.v_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 4.0, 1.0, 2.0, 3.0, 5.0, 5.0, 4.0, 10.0, 8.0, 12.0, 7.0, 11.0, 18.0, 14.0, 16.0, 29.0, 24.0, 38.0, 62.0, 63.0, 133.0, 328.0, 1121.0, 5152.0, 42131.0, 842062.0, 142941.0, 11240.0, 1995.0, 509.0, 199.0, 116.0, 68.0, 45.0, 31.0, 35.0, 22.0, 14.0, 13.0, 13.0, 14.0, 7.0, 12.0, 6.0, 4.0, 8.0, 4.0, 5.0, 0.0, 3.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.2822265625, -0.2723960876464844, -0.26256561279296875, -0.2527351379394531, -0.2429046630859375, -0.23307418823242188, -0.22324371337890625, -0.21341323852539062, -0.203582763671875, -0.19375228881835938, -0.18392181396484375, -0.17409133911132812, -0.1642608642578125, -0.15443038940429688, -0.14459991455078125, -0.13476943969726562, -0.12493896484375, -0.11510848999023438, -0.10527801513671875, -0.09544754028320312, -0.0856170654296875, -0.07578659057617188, -0.06595611572265625, -0.056125640869140625, -0.046295166015625, -0.036464691162109375, -0.02663421630859375, -0.016803741455078125, -0.0069732666015625, 0.002857208251953125, 0.01268768310546875, 0.022518157958984375, 0.0323486328125, 0.042179107666015625, 0.05200958251953125, 0.061840057373046875, 0.0716705322265625, 0.08150100708007812, 0.09133148193359375, 0.10116195678710938, 0.110992431640625, 0.12082290649414062, 0.13065338134765625, 0.14048385620117188, 0.1503143310546875, 0.16014480590820312, 0.16997528076171875, 0.17980575561523438, 0.18963623046875, 0.19946670532226562, 0.20929718017578125, 0.21912765502929688, 0.2289581298828125, 0.23878860473632812, 0.24861907958984375, 0.2584495544433594, 0.268280029296875, 0.2781105041503906, 0.28794097900390625, 0.2977714538574219, 0.3076019287109375, 0.3174324035644531, 0.32726287841796875, 0.3370933532714844, 0.346923828125]}, "gradients/encoder.encoder.layers.9.attention.v_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 2.0, 1.0, 5.0, 4.0, 8.0, 4.0, 6.0, 12.0, 7.0, 14.0, 13.0, 13.0, 15.0, 19.0, 36.0, 26.0, 39.0, 41.0, 49.0, 40.0, 43.0, 34.0, 41.0, 42.0, 43.0, 34.0, 41.0, 49.0, 30.0, 33.0, 28.0, 43.0, 33.0, 21.0, 27.0, 15.0, 14.0, 16.0, 13.0, 7.0, 11.0, 9.0, 10.0, 4.0, 5.0, 6.0, 3.0, 1.0, 1.0, 3.0, 2.0, 0.0, 2.0], "bins": [-0.285400390625, -0.2773151397705078, -0.2692298889160156, -0.26114463806152344, -0.25305938720703125, -0.24497413635253906, -0.23688888549804688, -0.2288036346435547, -0.2207183837890625, -0.2126331329345703, -0.20454788208007812, -0.19646263122558594, -0.18837738037109375, -0.18029212951660156, -0.17220687866210938, -0.1641216278076172, -0.156036376953125, -0.1479511260986328, -0.13986587524414062, -0.13178062438964844, -0.12369537353515625, -0.11561012268066406, -0.10752487182617188, -0.09943962097167969, -0.0913543701171875, -0.08326911926269531, -0.07518386840820312, -0.06709861755371094, -0.05901336669921875, -0.05092811584472656, -0.042842864990234375, -0.03475761413574219, -0.02667236328125, -0.018587112426757812, -0.010501861572265625, -0.0024166107177734375, 0.00566864013671875, 0.013753890991210938, 0.021839141845703125, 0.029924392700195312, 0.0380096435546875, 0.04609489440917969, 0.054180145263671875, 0.06226539611816406, 0.07035064697265625, 0.07843589782714844, 0.08652114868164062, 0.09460639953613281, 0.102691650390625, 0.11077690124511719, 0.11886215209960938, 0.12694740295410156, 0.13503265380859375, 0.14311790466308594, 0.15120315551757812, 0.1592884063720703, 0.1673736572265625, 0.1754589080810547, 0.18354415893554688, 0.19162940979003906, 0.19971466064453125, 0.20779991149902344, 0.21588516235351562, 0.2239704132080078, 0.2320556640625]}, "gradients/encoder.encoder.layers.9.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 2.0, 2.0, 1.0, 0.0, 0.0, 4.0, 0.0, 2.0, 5.0, 14.0, 7.0, 8.0, 20.0, 31.0, 41.0, 84.0, 120.0, 261.0, 523.0, 1515.0, 6089.0, 82149.0, 934062.0, 19033.0, 2871.0, 909.0, 344.0, 202.0, 70.0, 67.0, 34.0, 34.0, 10.0, 17.0, 8.0, 5.0, 7.0, 3.0, 7.0, 1.0, 2.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.006572723388671875, -0.006290256977081299, -0.006007790565490723, -0.0057253241539001465, -0.00544285774230957, -0.005160391330718994, -0.004877924919128418, -0.004595458507537842, -0.004312992095947266, -0.0040305256843566895, -0.0037480592727661133, -0.003465592861175537, -0.003183126449584961, -0.0029006600379943848, -0.0026181936264038086, -0.0023357272148132324, -0.0020532608032226562, -0.00177079439163208, -0.001488327980041504, -0.0012058615684509277, -0.0009233951568603516, -0.0006409287452697754, -0.0003584623336791992, -7.599592208862305e-05, 0.00020647048950195312, 0.0004889369010925293, 0.0007714033126831055, 0.0010538697242736816, 0.0013363361358642578, 0.001618802547454834, 0.0019012689590454102, 0.0021837353706359863, 0.0024662017822265625, 0.0027486681938171387, 0.003031134605407715, 0.003313601016998291, 0.003596067428588867, 0.0038785338401794434, 0.0041610002517700195, 0.004443466663360596, 0.004725933074951172, 0.005008399486541748, 0.005290865898132324, 0.0055733323097229, 0.0058557987213134766, 0.006138265132904053, 0.006420731544494629, 0.006703197956085205, 0.006985664367675781, 0.007268130779266357, 0.007550597190856934, 0.00783306360244751, 0.008115530014038086, 0.008397996425628662, 0.008680462837219238, 0.008962929248809814, 0.00924539566040039, 0.009527862071990967, 0.009810328483581543, 0.01009279489517212, 0.010375261306762695, 0.010657727718353271, 0.010940194129943848, 0.011222660541534424, 0.011505126953125]}, "gradients/encoder.encoder.layers.9.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 3.0, 1.0, 1.0, 2.0, 3.0, 2.0, 2.0, 1.0, 12.0, 4.0, 8.0, 20.0, 13.0, 13.0, 49.0, 33.0, 27.0, 62.0, 27.0, 30.0, 80.0, 35.0, 39.0, 96.0, 35.0, 44.0, 90.0, 33.0, 35.0, 74.0, 15.0, 45.0, 16.0, 10.0, 16.0, 8.0, 8.0, 16.0, 2.0, 2.0, 7.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.086162567138672e-06, -2.0060688257217407e-06, -1.9259750843048096e-06, -1.8458813428878784e-06, -1.7657876014709473e-06, -1.6856938600540161e-06, -1.605600118637085e-06, -1.5255063772201538e-06, -1.4454126358032227e-06, -1.3653188943862915e-06, -1.2852251529693604e-06, -1.2051314115524292e-06, -1.125037670135498e-06, -1.044943928718567e-06, -9.648501873016357e-07, -8.847564458847046e-07, -8.046627044677734e-07, -7.245689630508423e-07, -6.444752216339111e-07, -5.6438148021698e-07, -4.842877388000488e-07, -4.041939973831177e-07, -3.241002559661865e-07, -2.4400651454925537e-07, -1.6391277313232422e-07, -8.381903171539307e-08, -3.725290298461914e-09, 7.636845111846924e-08, 1.564621925354004e-07, 2.3655593395233154e-07, 3.166496753692627e-07, 3.9674341678619385e-07, 4.76837158203125e-07, 5.569308996200562e-07, 6.370246410369873e-07, 7.171183824539185e-07, 7.972121238708496e-07, 8.773058652877808e-07, 9.57399606704712e-07, 1.037493348121643e-06, 1.1175870895385742e-06, 1.1976808309555054e-06, 1.2777745723724365e-06, 1.3578683137893677e-06, 1.4379620552062988e-06, 1.51805579662323e-06, 1.5981495380401611e-06, 1.6782432794570923e-06, 1.7583370208740234e-06, 1.8384307622909546e-06, 1.9185245037078857e-06, 1.998618245124817e-06, 2.078711986541748e-06, 2.158805727958679e-06, 2.2388994693756104e-06, 2.3189932107925415e-06, 2.3990869522094727e-06, 2.479180693626404e-06, 2.559274435043335e-06, 2.639368176460266e-06, 2.7194619178771973e-06, 2.7995556592941284e-06, 2.8796494007110596e-06, 2.9597431421279907e-06, 3.039836883544922e-06]}, "gradients/encoder.encoder.layers.9.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 4.0, 1.0, 4.0, 0.0, 3.0, 5.0, 6.0, 3.0, 6.0, 9.0, 19.0, 44.0, 44.0, 131.0, 341.0, 1290.0, 7860.0, 647087.0, 382907.0, 7038.0, 1152.0, 322.0, 114.0, 56.0, 26.0, 24.0, 18.0, 9.0, 10.0, 6.0, 6.0, 6.0, 3.0, 1.0, 1.0, 1.0, 2.0, 0.0, 4.0, 0.0, 3.0, 1.0, 0.0, 1.0], "bins": [-0.01230621337890625, -0.011982202529907227, -0.011658191680908203, -0.01133418083190918, -0.011010169982910156, -0.010686159133911133, -0.01036214828491211, -0.010038137435913086, -0.009714126586914062, -0.009390115737915039, -0.009066104888916016, -0.008742094039916992, -0.008418083190917969, -0.008094072341918945, -0.007770061492919922, -0.0074460506439208984, -0.007122039794921875, -0.0067980289459228516, -0.006474018096923828, -0.006150007247924805, -0.005825996398925781, -0.005501985549926758, -0.005177974700927734, -0.004853963851928711, -0.0045299530029296875, -0.004205942153930664, -0.0038819313049316406, -0.003557920455932617, -0.0032339096069335938, -0.0029098987579345703, -0.002585887908935547, -0.0022618770599365234, -0.0019378662109375, -0.0016138553619384766, -0.0012898445129394531, -0.0009658336639404297, -0.0006418228149414062, -0.0003178119659423828, 6.198883056640625e-06, 0.00033020973205566406, 0.0006542205810546875, 0.000978231430053711, 0.0013022422790527344, 0.0016262531280517578, 0.0019502639770507812, 0.0022742748260498047, 0.002598285675048828, 0.0029222965240478516, 0.003246307373046875, 0.0035703182220458984, 0.003894329071044922, 0.004218339920043945, 0.004542350769042969, 0.004866361618041992, 0.005190372467041016, 0.005514383316040039, 0.0058383941650390625, 0.006162405014038086, 0.006486415863037109, 0.006810426712036133, 0.007134437561035156, 0.00745844841003418, 0.007782459259033203, 0.008106470108032227, 0.00843048095703125]}, "gradients/encoder.encoder.layers.9.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 3.0, 0.0, 4.0, 0.0, 2.0, 1.0, 1.0, 1.0, 3.0, 6.0, 5.0, 7.0, 7.0, 10.0, 11.0, 19.0, 13.0, 23.0, 27.0, 54.0, 82.0, 96.0, 149.0, 138.0, 110.0, 68.0, 51.0, 41.0, 12.0, 19.0, 11.0, 6.0, 7.0, 4.0, 6.0, 4.0, 3.0, 1.0, 4.0, 1.0, 4.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.006988525390625, -0.006720066070556641, -0.006451606750488281, -0.006183147430419922, -0.0059146881103515625, -0.005646228790283203, -0.005377769470214844, -0.005109310150146484, -0.004840850830078125, -0.004572391510009766, -0.004303932189941406, -0.004035472869873047, -0.0037670135498046875, -0.003498554229736328, -0.0032300949096679688, -0.0029616355895996094, -0.00269317626953125, -0.0024247169494628906, -0.0021562576293945312, -0.0018877983093261719, -0.0016193389892578125, -0.0013508796691894531, -0.0010824203491210938, -0.0008139610290527344, -0.000545501708984375, -0.0002770423889160156, -8.58306884765625e-06, 0.0002598762512207031, 0.0005283355712890625, 0.0007967948913574219, 0.0010652542114257812, 0.0013337135314941406, 0.0016021728515625, 0.0018706321716308594, 0.0021390914916992188, 0.002407550811767578, 0.0026760101318359375, 0.002944469451904297, 0.0032129287719726562, 0.0034813880920410156, 0.003749847412109375, 0.004018306732177734, 0.004286766052246094, 0.004555225372314453, 0.0048236846923828125, 0.005092144012451172, 0.005360603332519531, 0.005629062652587891, 0.00589752197265625, 0.006165981292724609, 0.006434440612792969, 0.006702899932861328, 0.0069713592529296875, 0.007239818572998047, 0.007508277893066406, 0.007776737213134766, 0.008045196533203125, 0.008313655853271484, 0.008582115173339844, 0.008850574493408203, 0.009119033813476562, 0.009387493133544922, 0.009655952453613281, 0.00992441177368164, 0.01019287109375]}, "gradients/encoder.encoder.layers.9.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 10.0, 173.0, 799.0, 32.0, 5.0, 0.0, 2.0], "bins": [-15.032050132751465, -14.778197288513184, -14.524343490600586, -14.270490646362305, -14.016637802124023, -13.762784004211426, -13.508931159973145, -13.255078315734863, -13.001224517822266, -12.747371673583984, -12.493517875671387, -12.239665031433105, -11.985812187194824, -11.731958389282227, -11.478105545043945, -11.224252700805664, -10.970399856567383, -10.716547012329102, -10.462693214416504, -10.208840370178223, -9.954987525939941, -9.701133728027344, -9.447280883789062, -9.193428039550781, -8.939574241638184, -8.685721397399902, -8.431867599487305, -8.178014755249023, -7.924161434173584, -7.6703081130981445, -7.416455268859863, -7.162601947784424, -6.908748626708984, -6.654895305633545, -6.401042461395264, -6.147189140319824, -5.893335819244385, -5.639482498168945, -5.385629653930664, -5.131776332855225, -4.877923011779785, -4.624069690704346, -4.3702168464660645, -4.116363525390625, -3.8625102043151855, -3.608657121658325, -3.354804039001465, -3.1009507179260254, -2.847097873687744, -2.593244791030884, -2.3393914699554443, -2.085538387298584, -1.831685185432434, -1.5778319835662842, -1.3239789009094238, -1.070125699043274, -0.816272497177124, -0.5624192953109741, -0.308566153049469, -0.05471301078796387, 0.19914019107818604, 0.45299339294433594, 0.7068464756011963, 0.9606996774673462, 1.214552879333496]}, "gradients/encoder.encoder.layers.9.layer_norm.bias": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 2.0, 2.0, 3.0, 2.0, 3.0, 3.0, 4.0, 5.0, 6.0, 6.0, 7.0, 7.0, 20.0, 14.0, 22.0, 21.0, 22.0, 22.0, 28.0, 29.0, 38.0, 33.0, 30.0, 37.0, 34.0, 35.0, 47.0, 41.0, 41.0, 53.0, 60.0, 32.0, 31.0, 27.0, 29.0, 26.0, 26.0, 19.0, 20.0, 18.0, 14.0, 22.0, 11.0, 13.0, 11.0, 9.0, 10.0, 8.0, 2.0, 1.0, 4.0, 1.0, 3.0, 1.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-1.0176076889038086, -0.9845306873321533, -0.9514537453651428, -0.9183767437934875, -0.8852997422218323, -0.8522228002548218, -0.8191457986831665, -0.7860687971115112, -0.752991795539856, -0.7199147939682007, -0.6868378520011902, -0.6537608504295349, -0.6206838488578796, -0.5876069068908691, -0.5545299053192139, -0.5214529037475586, -0.4883759617805481, -0.4552989900112152, -0.42222198843955994, -0.38914501667022705, -0.3560680150985718, -0.3229910433292389, -0.289914071559906, -0.25683706998825073, -0.22376009821891785, -0.19068311154842377, -0.1576061248779297, -0.1245291531085968, -0.09145216643810272, -0.05837517976760864, -0.025298207998275757, 0.007778778672218323, 0.0408557653427124, 0.07393275201320648, 0.10700973123311996, 0.14008671045303345, 0.17316369712352753, 0.2062406837940216, 0.2393176555633545, 0.27239465713500977, 0.30547162890434265, 0.33854860067367554, 0.3716256022453308, 0.4047025740146637, 0.4377795457839966, 0.47085654735565186, 0.5039335489273071, 0.5370104908943176, 0.5700874924659729, 0.6031644940376282, 0.6362414360046387, 0.669318437576294, 0.7023954391479492, 0.7354724407196045, 0.768549382686615, 0.8016263842582703, 0.8347033262252808, 0.867780327796936, 0.9008572697639465, 0.9339342713356018, 0.9670112729072571, 1.0000882148742676, 1.0331652164459229, 1.0662422180175781, 1.0993192195892334]}, "gradients/encoder.encoder.layers.8.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 3.0, 6.0, 5.0, 11.0, 16.0, 26.0, 43.0, 101.0, 192.0, 453.0, 1397.0, 12487.0, 4176193.0, 2197.0, 634.0, 258.0, 114.0, 55.0, 29.0, 22.0, 16.0, 13.0, 9.0, 3.0, 4.0, 1.0, 1.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0179443359375, -0.017169475555419922, -0.016394615173339844, -0.015619754791259766, -0.014844894409179688, -0.01407003402709961, -0.013295173645019531, -0.012520313262939453, -0.011745452880859375, -0.010970592498779297, -0.010195732116699219, -0.00942087173461914, -0.008646011352539062, -0.007871150970458984, -0.007096290588378906, -0.006321430206298828, -0.00554656982421875, -0.004771709442138672, -0.003996849060058594, -0.0032219886779785156, -0.0024471282958984375, -0.0016722679138183594, -0.0008974075317382812, -0.00012254714965820312, 0.000652313232421875, 0.0014271736145019531, 0.0022020339965820312, 0.0029768943786621094, 0.0037517547607421875, 0.004526615142822266, 0.005301475524902344, 0.006076335906982422, 0.0068511962890625, 0.007626056671142578, 0.008400917053222656, 0.009175777435302734, 0.009950637817382812, 0.01072549819946289, 0.011500358581542969, 0.012275218963623047, 0.013050079345703125, 0.013824939727783203, 0.014599800109863281, 0.01537466049194336, 0.016149520874023438, 0.016924381256103516, 0.017699241638183594, 0.018474102020263672, 0.01924896240234375, 0.020023822784423828, 0.020798683166503906, 0.021573543548583984, 0.022348403930664062, 0.02312326431274414, 0.02389812469482422, 0.024672985076904297, 0.025447845458984375, 0.026222705841064453, 0.02699756622314453, 0.02777242660522461, 0.028547286987304688, 0.029322147369384766, 0.030097007751464844, 0.030871868133544922, 0.031646728515625]}, "gradients/encoder.encoder.layers.8.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 1.0, 2.0, 8.0, 3.0, 2.0, 4.0, 8.0, 17.0, 49.0, 72.0, 106.0, 156.0, 172.0, 140.0, 109.0, 65.0, 51.0, 22.0, 10.0, 8.0, 5.0, 1.0, 4.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.26904296875, -0.26239013671875, -0.2557373046875, -0.24908447265625, -0.242431640625, -0.23577880859375, -0.2291259765625, -0.22247314453125, -0.2158203125, -0.20916748046875, -0.2025146484375, -0.19586181640625, -0.189208984375, -0.18255615234375, -0.1759033203125, -0.16925048828125, -0.16259765625, -0.15594482421875, -0.1492919921875, -0.14263916015625, -0.135986328125, -0.12933349609375, -0.1226806640625, -0.11602783203125, -0.109375, -0.10272216796875, -0.0960693359375, -0.08941650390625, -0.082763671875, -0.07611083984375, -0.0694580078125, -0.06280517578125, -0.05615234375, -0.04949951171875, -0.0428466796875, -0.03619384765625, -0.029541015625, -0.02288818359375, -0.0162353515625, -0.00958251953125, -0.0029296875, 0.00372314453125, 0.0103759765625, 0.01702880859375, 0.023681640625, 0.03033447265625, 0.0369873046875, 0.04364013671875, 0.05029296875, 0.05694580078125, 0.0635986328125, 0.07025146484375, 0.076904296875, 0.08355712890625, 0.0902099609375, 0.09686279296875, 0.103515625, 0.11016845703125, 0.1168212890625, 0.12347412109375, 0.130126953125, 0.13677978515625, 0.1434326171875, 0.15008544921875, 0.15673828125]}, "gradients/encoder.encoder.layers.8.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 2.0, 1.0, 2.0, 5.0, 5.0, 13.0, 17.0, 17.0, 13.0, 44.0, 48.0, 65.0, 84.0, 106.0, 205.0, 251.0, 362.0, 497.0, 691.0, 1162.0, 2119.0, 4644.0, 20168.0, 4142772.0, 12237.0, 3753.0, 1743.0, 1008.0, 682.0, 477.0, 343.0, 220.0, 151.0, 112.0, 94.0, 51.0, 28.0, 31.0, 19.0, 15.0, 11.0, 6.0, 5.0, 5.0, 6.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 3.0, 1.0], "bins": [-0.007045745849609375, -0.00681382417678833, -0.006581902503967285, -0.00634998083114624, -0.006118059158325195, -0.00588613748550415, -0.0056542158126831055, -0.0054222941398620605, -0.005190372467041016, -0.004958450794219971, -0.004726529121398926, -0.004494607448577881, -0.004262685775756836, -0.004030764102935791, -0.003798842430114746, -0.003566920757293701, -0.0033349990844726562, -0.0031030774116516113, -0.0028711557388305664, -0.0026392340660095215, -0.0024073123931884766, -0.0021753907203674316, -0.0019434690475463867, -0.0017115473747253418, -0.0014796257019042969, -0.001247704029083252, -0.001015782356262207, -0.0007838606834411621, -0.0005519390106201172, -0.00032001733779907227, -8.809566497802734e-05, 0.00014382600784301758, 0.0003757476806640625, 0.0006076693534851074, 0.0008395910263061523, 0.0010715126991271973, 0.0013034343719482422, 0.0015353560447692871, 0.001767277717590332, 0.001999199390411377, 0.002231121063232422, 0.002463042736053467, 0.0026949644088745117, 0.0029268860816955566, 0.0031588077545166016, 0.0033907294273376465, 0.0036226511001586914, 0.0038545727729797363, 0.004086494445800781, 0.004318416118621826, 0.004550337791442871, 0.004782259464263916, 0.005014181137084961, 0.005246102809906006, 0.005478024482727051, 0.005709946155548096, 0.005941867828369141, 0.0061737895011901855, 0.0064057111740112305, 0.006637632846832275, 0.00686955451965332, 0.007101476192474365, 0.00733339786529541, 0.007565319538116455, 0.0077972412109375]}, "gradients/encoder.encoder.layers.8.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 3.0, 0.0, 4.0, 2.0, 5.0, 9.0, 8.0, 45.0, 3945.0, 33.0, 10.0, 7.0, 3.0, 4.0, 1.0, 0.0, 2.0, 1.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.006458282470703125, -0.00627589225769043, -0.006093502044677734, -0.005911111831665039, -0.005728721618652344, -0.0055463314056396484, -0.005363941192626953, -0.005181550979614258, -0.0049991607666015625, -0.004816770553588867, -0.004634380340576172, -0.0044519901275634766, -0.004269599914550781, -0.004087209701538086, -0.0039048194885253906, -0.0037224292755126953, -0.0035400390625, -0.0033576488494873047, -0.0031752586364746094, -0.002992868423461914, -0.0028104782104492188, -0.0026280879974365234, -0.002445697784423828, -0.002263307571411133, -0.0020809173583984375, -0.0018985271453857422, -0.0017161369323730469, -0.0015337467193603516, -0.0013513565063476562, -0.001168966293334961, -0.0009865760803222656, -0.0008041858673095703, -0.000621795654296875, -0.0004394054412841797, -0.0002570152282714844, -7.462501525878906e-05, 0.00010776519775390625, 0.00029015541076660156, 0.0004725456237792969, 0.0006549358367919922, 0.0008373260498046875, 0.0010197162628173828, 0.0012021064758300781, 0.0013844966888427734, 0.0015668869018554688, 0.001749277114868164, 0.0019316673278808594, 0.0021140575408935547, 0.00229644775390625, 0.0024788379669189453, 0.0026612281799316406, 0.002843618392944336, 0.0030260086059570312, 0.0032083988189697266, 0.003390789031982422, 0.003573179244995117, 0.0037555694580078125, 0.003937959671020508, 0.004120349884033203, 0.0043027400970458984, 0.004485130310058594, 0.004667520523071289, 0.004849910736083984, 0.00503230094909668, 0.005214691162109375]}, "gradients/encoder.encoder.layers.8.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 4.0, 0.0, 4.0, 6.0, 10.0, 24.0, 39.0, 97.0, 172.0, 329.0, 208.0, 67.0, 30.0, 19.0, 3.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.012767667882144451, -0.012305238284170628, -0.01184280775487423, -0.011380378156900406, -0.010917948558926582, -0.010455518960952759, -0.009993089362978935, -0.009530658833682537, -0.009068229235708714, -0.00860579963773489, -0.008143369108438492, -0.007680939510464668, -0.007218509912490845, -0.006756080314517021, -0.00629365025088191, -0.0058312201872467995, -0.005368790589272976, -0.004906360991299152, -0.0044439309276640415, -0.003981500864028931, -0.003519071266055107, -0.00305664143525064, -0.0025942116044461727, -0.0021317817736417055, -0.0016693519428372383, -0.0012069221120327711, -0.0007444922812283039, -0.0002820624504238367, 0.0001803673803806305, 0.0006427972111850977, 0.001105227041989565, 0.001567656872794032, 0.0020300857722759247, 0.002492515603080392, 0.002954945433884859, 0.0034173752646893263, 0.0038798050954937935, 0.004342234693467617, 0.004804664757102728, 0.005267094820737839, 0.005729524418711662, 0.006191954016685486, 0.006654384080320597, 0.0071168141439557076, 0.007579243741929531, 0.008041673339903355, 0.008504103869199753, 0.008966533467173576, 0.0094289630651474, 0.009891392663121223, 0.010353822261095047, 0.010816252790391445, 0.011278682388365269, 0.011741111986339092, 0.01220354251563549, 0.012665972113609314, 0.013128401711583138, 0.013590831309556961, 0.014053260907530785, 0.014515691436827183, 0.014978121034801006, 0.01544055063277483, 0.015902981162071228, 0.01636541076004505, 0.016827840358018875]}, "gradients/encoder.encoder.layers.8.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 1.0, 4.0, 4.0, 2.0, 4.0, 5.0, 13.0, 14.0, 19.0, 11.0, 30.0, 46.0, 25.0, 38.0, 43.0, 47.0, 56.0, 61.0, 55.0, 52.0, 62.0, 47.0, 51.0, 55.0, 43.0, 43.0, 33.0, 31.0, 22.0, 21.0, 22.0, 14.0, 15.0, 6.0, 7.0, 2.0, 1.0, 3.0, 2.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00803065299987793, -0.0077604930847883224, -0.007490333169698715, -0.007220173254609108, -0.006950013339519501, -0.0066798534244298935, -0.006409693509340286, -0.006139533594250679, -0.005869373679161072, -0.0055992137640714645, -0.005329053848981857, -0.00505889393389225, -0.004788734018802643, -0.004518574103713036, -0.004248414188623428, -0.003978254273533821, -0.003708094358444214, -0.0034379344433546066, -0.0031677745282649994, -0.002897614613175392, -0.002627454698085785, -0.0023572947829961777, -0.0020871348679065704, -0.0018169749528169632, -0.001546815037727356, -0.0012766551226377487, -0.0010064952075481415, -0.0007363352924585342, -0.000466175377368927, -0.00019601546227931976, 7.414445281028748e-05, 0.0003443043678998947, 0.000614464282989502, 0.0008846241980791092, 0.0011547841131687164, 0.0014249440282583237, 0.001695103943347931, 0.001965263858437538, 0.0022354237735271454, 0.0025055836886167526, 0.00277574360370636, 0.003045903518795967, 0.0033160634338855743, 0.0035862233489751816, 0.003856383264064789, 0.004126543179154396, 0.004396703094244003, 0.0046668630093336105, 0.004937022924423218, 0.005207182839512825, 0.005477342754602432, 0.0057475026696920395, 0.006017662584781647, 0.006287822499871254, 0.006557982414960861, 0.0068281423300504684, 0.007098302245140076, 0.007368462160229683, 0.00763862207531929, 0.007908781990408897, 0.008178941905498505, 0.008449101820588112, 0.008719261735677719, 0.008989421650767326, 0.009259581565856934]}, "gradients/encoder.encoder.layers.8.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 3.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 3.0, 3.0, 2.0, 4.0, 2.0, 8.0, 6.0, 9.0, 13.0, 18.0, 27.0, 42.0, 54.0, 93.0, 168.0, 246.0, 507.0, 1127.0, 3243.0, 13531.0, 110360.0, 831301.0, 72896.0, 10067.0, 2810.0, 1000.0, 439.0, 212.0, 110.0, 72.0, 50.0, 37.0, 21.0, 28.0, 15.0, 10.0, 11.0, 6.0, 3.0, 4.0, 3.0, 2.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.3447265625, -0.3333930969238281, -0.32205963134765625, -0.3107261657714844, -0.2993927001953125, -0.2880592346191406, -0.27672576904296875, -0.2653923034667969, -0.254058837890625, -0.24272537231445312, -0.23139190673828125, -0.22005844116210938, -0.2087249755859375, -0.19739151000976562, -0.18605804443359375, -0.17472457885742188, -0.16339111328125, -0.15205764770507812, -0.14072418212890625, -0.12939071655273438, -0.1180572509765625, -0.10672378540039062, -0.09539031982421875, -0.08405685424804688, -0.072723388671875, -0.061389923095703125, -0.05005645751953125, -0.038722991943359375, -0.0273895263671875, -0.016056060791015625, -0.00472259521484375, 0.006610870361328125, 0.0179443359375, 0.029277801513671875, 0.04061126708984375, 0.051944732666015625, 0.0632781982421875, 0.07461166381835938, 0.08594512939453125, 0.09727859497070312, 0.108612060546875, 0.11994552612304688, 0.13127899169921875, 0.14261245727539062, 0.1539459228515625, 0.16527938842773438, 0.17661285400390625, 0.18794631958007812, 0.19927978515625, 0.21061325073242188, 0.22194671630859375, 0.23328018188476562, 0.2446136474609375, 0.2559471130371094, 0.26728057861328125, 0.2786140441894531, 0.289947509765625, 0.3012809753417969, 0.31261444091796875, 0.3239479064941406, 0.3352813720703125, 0.3466148376464844, 0.35794830322265625, 0.3692817687988281, 0.380615234375]}, "gradients/encoder.encoder.layers.8.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 1.0, 2.0, 8.0, 3.0, 2.0, 4.0, 8.0, 18.0, 48.0, 72.0, 106.0, 155.0, 172.0, 140.0, 109.0, 64.0, 52.0, 23.0, 10.0, 7.0, 6.0, 1.0, 4.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.26904296875, -0.2623920440673828, -0.2557411193847656, -0.24909019470214844, -0.24243927001953125, -0.23578834533691406, -0.22913742065429688, -0.2224864959716797, -0.2158355712890625, -0.2091846466064453, -0.20253372192382812, -0.19588279724121094, -0.18923187255859375, -0.18258094787597656, -0.17593002319335938, -0.1692790985107422, -0.162628173828125, -0.1559772491455078, -0.14932632446289062, -0.14267539978027344, -0.13602447509765625, -0.12937355041503906, -0.12272262573242188, -0.11607170104980469, -0.1094207763671875, -0.10276985168457031, -0.09611892700195312, -0.08946800231933594, -0.08281707763671875, -0.07616615295410156, -0.06951522827148438, -0.06286430358886719, -0.05621337890625, -0.04956245422363281, -0.042911529541015625, -0.03626060485839844, -0.02960968017578125, -0.022958755493164062, -0.016307830810546875, -0.009656906127929688, -0.0030059814453125, 0.0036449432373046875, 0.010295867919921875, 0.016946792602539062, 0.02359771728515625, 0.030248641967773438, 0.036899566650390625, 0.04355049133300781, 0.050201416015625, 0.05685234069824219, 0.06350326538085938, 0.07015419006347656, 0.07680511474609375, 0.08345603942871094, 0.09010696411132812, 0.09675788879394531, 0.1034088134765625, 0.11005973815917969, 0.11671066284179688, 0.12336158752441406, 0.13001251220703125, 0.13666343688964844, 0.14331436157226562, 0.1499652862548828, 0.1566162109375]}, "gradients/encoder.encoder.layers.8.attention.v_proj.weight": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 0.0, 2.0, 0.0, 2.0, 1.0, 4.0, 6.0, 4.0, 3.0, 6.0, 7.0, 9.0, 5.0, 10.0, 18.0, 9.0, 18.0, 15.0, 38.0, 20.0, 15.0, 34.0, 47.0, 71.0, 132.0, 299.0, 762.0, 2645.0, 16940.0, 320277.0, 677739.0, 24275.0, 3414.0, 915.0, 335.0, 141.0, 86.0, 49.0, 35.0, 20.0, 29.0, 20.0, 20.0, 17.0, 15.0, 10.0, 8.0, 8.0, 8.0, 5.0, 5.0, 5.0, 2.0, 3.0, 0.0, 3.0, 1.0, 0.0, 0.0, 3.0, 3.0], "bins": [-0.361572265625, -0.35063934326171875, -0.3397064208984375, -0.32877349853515625, -0.317840576171875, -0.30690765380859375, -0.2959747314453125, -0.28504180908203125, -0.27410888671875, -0.26317596435546875, -0.2522430419921875, -0.24131011962890625, -0.230377197265625, -0.21944427490234375, -0.2085113525390625, -0.19757843017578125, -0.1866455078125, -0.17571258544921875, -0.1647796630859375, -0.15384674072265625, -0.142913818359375, -0.13198089599609375, -0.1210479736328125, -0.11011505126953125, -0.09918212890625, -0.08824920654296875, -0.0773162841796875, -0.06638336181640625, -0.055450439453125, -0.04451751708984375, -0.0335845947265625, -0.02265167236328125, -0.01171875, -0.00078582763671875, 0.0101470947265625, 0.02108001708984375, 0.032012939453125, 0.04294586181640625, 0.0538787841796875, 0.06481170654296875, 0.07574462890625, 0.08667755126953125, 0.0976104736328125, 0.10854339599609375, 0.119476318359375, 0.13040924072265625, 0.1413421630859375, 0.15227508544921875, 0.1632080078125, 0.17414093017578125, 0.1850738525390625, 0.19600677490234375, 0.206939697265625, 0.21787261962890625, 0.2288055419921875, 0.23973846435546875, 0.25067138671875, 0.26160430908203125, 0.2725372314453125, 0.28347015380859375, 0.294403076171875, 0.30533599853515625, 0.3162689208984375, 0.32720184326171875, 0.338134765625]}, "gradients/encoder.encoder.layers.8.attention.v_proj.bias": {"_type": "histogram", "values": [3.0, 3.0, 0.0, 0.0, 1.0, 3.0, 0.0, 4.0, 1.0, 7.0, 4.0, 4.0, 8.0, 8.0, 8.0, 12.0, 16.0, 16.0, 19.0, 22.0, 28.0, 20.0, 28.0, 35.0, 32.0, 18.0, 43.0, 39.0, 44.0, 46.0, 45.0, 43.0, 36.0, 47.0, 34.0, 40.0, 28.0, 28.0, 32.0, 25.0, 16.0, 24.0, 33.0, 17.0, 17.0, 8.0, 20.0, 8.0, 6.0, 10.0, 4.0, 7.0, 3.0, 5.0, 4.0, 4.0, 1.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 2.0], "bins": [-0.23291015625, -0.22535324096679688, -0.21779632568359375, -0.21023941040039062, -0.2026824951171875, -0.19512557983398438, -0.18756866455078125, -0.18001174926757812, -0.172454833984375, -0.16489791870117188, -0.15734100341796875, -0.14978408813476562, -0.1422271728515625, -0.13467025756835938, -0.12711334228515625, -0.11955642700195312, -0.11199951171875, -0.10444259643554688, -0.09688568115234375, -0.08932876586914062, -0.0817718505859375, -0.07421493530273438, -0.06665802001953125, -0.059101104736328125, -0.051544189453125, -0.043987274169921875, -0.03643035888671875, -0.028873443603515625, -0.0213165283203125, -0.013759613037109375, -0.00620269775390625, 0.001354217529296875, 0.0089111328125, 0.016468048095703125, 0.02402496337890625, 0.031581878662109375, 0.0391387939453125, 0.046695709228515625, 0.05425262451171875, 0.061809539794921875, 0.069366455078125, 0.07692337036132812, 0.08448028564453125, 0.09203720092773438, 0.0995941162109375, 0.10715103149414062, 0.11470794677734375, 0.12226486206054688, 0.12982177734375, 0.13737869262695312, 0.14493560791015625, 0.15249252319335938, 0.1600494384765625, 0.16760635375976562, 0.17516326904296875, 0.18272018432617188, 0.190277099609375, 0.19783401489257812, 0.20539093017578125, 0.21294784545898438, 0.2205047607421875, 0.22806167602539062, 0.23561859130859375, 0.24317550659179688, 0.250732421875]}, "gradients/encoder.encoder.layers.8.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 6.0, 6.0, 13.0, 18.0, 33.0, 39.0, 73.0, 117.0, 248.0, 509.0, 1434.0, 7664.0, 925064.0, 106993.0, 4377.0, 1041.0, 407.0, 198.0, 100.0, 73.0, 53.0, 30.0, 15.0, 9.0, 9.0, 12.0, 9.0, 4.0, 1.0, 1.0, 0.0, 0.0, 3.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-0.0281982421875, -0.0273895263671875, -0.026580810546875, -0.0257720947265625, -0.02496337890625, -0.0241546630859375, -0.023345947265625, -0.0225372314453125, -0.021728515625, -0.0209197998046875, -0.020111083984375, -0.0193023681640625, -0.01849365234375, -0.0176849365234375, -0.016876220703125, -0.0160675048828125, -0.0152587890625, -0.0144500732421875, -0.013641357421875, -0.0128326416015625, -0.01202392578125, -0.0112152099609375, -0.010406494140625, -0.0095977783203125, -0.0087890625, -0.0079803466796875, -0.007171630859375, -0.0063629150390625, -0.00555419921875, -0.0047454833984375, -0.003936767578125, -0.0031280517578125, -0.0023193359375, -0.0015106201171875, -0.000701904296875, 0.0001068115234375, 0.00091552734375, 0.0017242431640625, 0.002532958984375, 0.0033416748046875, 0.004150390625, 0.0049591064453125, 0.005767822265625, 0.0065765380859375, 0.00738525390625, 0.0081939697265625, 0.009002685546875, 0.0098114013671875, 0.0106201171875, 0.0114288330078125, 0.012237548828125, 0.0130462646484375, 0.01385498046875, 0.0146636962890625, 0.015472412109375, 0.0162811279296875, 0.01708984375, 0.0178985595703125, 0.018707275390625, 0.0195159912109375, 0.02032470703125, 0.0211334228515625, 0.021942138671875, 0.0227508544921875, 0.0235595703125]}, "gradients/encoder.encoder.layers.8.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 4.0, 1.0, 2.0, 1.0, 5.0, 1.0, 8.0, 1.0, 9.0, 9.0, 6.0, 6.0, 16.0, 35.0, 20.0, 16.0, 31.0, 28.0, 34.0, 46.0, 29.0, 26.0, 26.0, 36.0, 32.0, 30.0, 89.0, 36.0, 39.0, 47.0, 39.0, 36.0, 61.0, 31.0, 26.0, 11.0, 25.0, 16.0, 39.0, 11.0, 9.0, 8.0, 3.0, 5.0, 5.0, 7.0, 5.0, 4.0, 1.0, 3.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-2.205371856689453e-06, -2.1364539861679077e-06, -2.0675361156463623e-06, -1.998618245124817e-06, -1.9297003746032715e-06, -1.860782504081726e-06, -1.7918646335601807e-06, -1.7229467630386353e-06, -1.6540288925170898e-06, -1.5851110219955444e-06, -1.516193151473999e-06, -1.4472752809524536e-06, -1.3783574104309082e-06, -1.3094395399093628e-06, -1.2405216693878174e-06, -1.171603798866272e-06, -1.1026859283447266e-06, -1.0337680578231812e-06, -9.648501873016357e-07, -8.959323167800903e-07, -8.270144462585449e-07, -7.580965757369995e-07, -6.891787052154541e-07, -6.202608346939087e-07, -5.513429641723633e-07, -4.824250936508179e-07, -4.1350722312927246e-07, -3.4458935260772705e-07, -2.7567148208618164e-07, -2.0675361156463623e-07, -1.3783574104309082e-07, -6.891787052154541e-08, 0.0, 6.891787052154541e-08, 1.3783574104309082e-07, 2.0675361156463623e-07, 2.7567148208618164e-07, 3.4458935260772705e-07, 4.1350722312927246e-07, 4.824250936508179e-07, 5.513429641723633e-07, 6.202608346939087e-07, 6.891787052154541e-07, 7.580965757369995e-07, 8.270144462585449e-07, 8.959323167800903e-07, 9.648501873016357e-07, 1.0337680578231812e-06, 1.1026859283447266e-06, 1.171603798866272e-06, 1.2405216693878174e-06, 1.3094395399093628e-06, 1.3783574104309082e-06, 1.4472752809524536e-06, 1.516193151473999e-06, 1.5851110219955444e-06, 1.6540288925170898e-06, 1.7229467630386353e-06, 1.7918646335601807e-06, 1.860782504081726e-06, 1.9297003746032715e-06, 1.998618245124817e-06, 2.0675361156463623e-06, 2.1364539861679077e-06, 2.205371856689453e-06]}, "gradients/encoder.encoder.layers.8.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 2.0, 3.0, 0.0, 2.0, 2.0, 4.0, 8.0, 5.0, 12.0, 12.0, 24.0, 69.0, 160.0, 528.0, 4494.0, 983029.0, 58013.0, 1694.0, 295.0, 100.0, 35.0, 22.0, 15.0, 7.0, 9.0, 6.0, 2.0, 4.0, 6.0, 4.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0310211181640625, -0.029531240463256836, -0.028041362762451172, -0.026551485061645508, -0.025061607360839844, -0.02357172966003418, -0.022081851959228516, -0.02059197425842285, -0.019102096557617188, -0.017612218856811523, -0.01612234115600586, -0.014632463455200195, -0.013142585754394531, -0.011652708053588867, -0.010162830352783203, -0.008672952651977539, -0.007183074951171875, -0.005693197250366211, -0.004203319549560547, -0.002713441848754883, -0.0012235641479492188, 0.0002663135528564453, 0.0017561912536621094, 0.0032460689544677734, 0.0047359466552734375, 0.0062258243560791016, 0.007715702056884766, 0.00920557975769043, 0.010695457458496094, 0.012185335159301758, 0.013675212860107422, 0.015165090560913086, 0.01665496826171875, 0.018144845962524414, 0.019634723663330078, 0.021124601364135742, 0.022614479064941406, 0.02410435676574707, 0.025594234466552734, 0.0270841121673584, 0.028573989868164062, 0.030063867568969727, 0.03155374526977539, 0.033043622970581055, 0.03453350067138672, 0.03602337837219238, 0.03751325607299805, 0.03900313377380371, 0.040493011474609375, 0.04198288917541504, 0.0434727668762207, 0.04496264457702637, 0.04645252227783203, 0.047942399978637695, 0.04943227767944336, 0.05092215538024902, 0.05241203308105469, 0.05390191078186035, 0.055391788482666016, 0.05688166618347168, 0.058371543884277344, 0.05986142158508301, 0.06135129928588867, 0.06284117698669434, 0.0643310546875]}, "gradients/encoder.encoder.layers.8.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 2.0, 1.0, 4.0, 6.0, 4.0, 1.0, 7.0, 6.0, 7.0, 15.0, 13.0, 23.0, 23.0, 53.0, 78.0, 235.0, 257.0, 115.0, 53.0, 32.0, 23.0, 17.0, 11.0, 10.0, 4.0, 8.0, 1.0, 1.0, 2.0, 0.0, 3.0, 2.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.043975830078125, -0.042955636978149414, -0.04193544387817383, -0.04091525077819824, -0.039895057678222656, -0.03887486457824707, -0.037854671478271484, -0.0368344783782959, -0.03581428527832031, -0.03479409217834473, -0.03377389907836914, -0.032753705978393555, -0.03173351287841797, -0.030713319778442383, -0.029693126678466797, -0.02867293357849121, -0.027652740478515625, -0.02663254737854004, -0.025612354278564453, -0.024592161178588867, -0.02357196807861328, -0.022551774978637695, -0.02153158187866211, -0.020511388778686523, -0.019491195678710938, -0.01847100257873535, -0.017450809478759766, -0.01643061637878418, -0.015410423278808594, -0.014390230178833008, -0.013370037078857422, -0.012349843978881836, -0.01132965087890625, -0.010309457778930664, -0.009289264678955078, -0.008269071578979492, -0.007248878479003906, -0.00622868537902832, -0.005208492279052734, -0.0041882991790771484, -0.0031681060791015625, -0.0021479129791259766, -0.0011277198791503906, -0.00010752677917480469, 0.0009126663208007812, 0.0019328594207763672, 0.002953052520751953, 0.003973245620727539, 0.004993438720703125, 0.006013631820678711, 0.007033824920654297, 0.008054018020629883, 0.009074211120605469, 0.010094404220581055, 0.01111459732055664, 0.012134790420532227, 0.013154983520507812, 0.014175176620483398, 0.015195369720458984, 0.01621556282043457, 0.017235755920410156, 0.018255949020385742, 0.019276142120361328, 0.020296335220336914, 0.0213165283203125]}, "gradients/encoder.encoder.layers.8.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 2.0, 6.0, 7.0, 17.0, 19.0, 50.0, 125.0, 392.0, 210.0, 84.0, 41.0, 26.0, 14.0, 7.0, 5.0, 4.0, 3.0, 3.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.9033567905426025, -0.8460993766784668, -0.788841962814331, -0.7315845489501953, -0.6743270754814148, -0.617069661617279, -0.5598122477531433, -0.5025547742843628, -0.44529739022254944, -0.3880399763584137, -0.33078253269195557, -0.2735251188278198, -0.2162676900625229, -0.15901026129722595, -0.10175284743309021, -0.04449540376663208, 0.012762010097503662, 0.0700194388628006, 0.12727686762809753, 0.18453428149223328, 0.2417917102575302, 0.29904913902282715, 0.3563065528869629, 0.413563996553421, 0.47082141041755676, 0.5280788540840149, 0.5853362679481506, 0.6425936818122864, 0.6998510956764221, 0.7571085691452026, 0.8143659830093384, 0.8716233968734741, 0.9288808107376099, 0.9861382246017456, 1.0433956384658813, 1.100653052330017, 1.1579104661941528, 1.2151679992675781, 1.2724254131317139, 1.3296828269958496, 1.3869402408599854, 1.444197654724121, 1.5014550685882568, 1.5587124824523926, 1.6159698963165283, 1.673227310180664, 1.7304847240447998, 1.787742257118225, 1.8449995517730713, 1.902256965637207, 1.9595143795013428, 2.0167717933654785, 2.0740292072296143, 2.13128662109375, 2.1885440349578857, 2.2458014488220215, 2.3030591011047363, 2.360316514968872, 2.417573928833008, 2.4748313426971436, 2.5320887565612793, 2.589346170425415, 2.646603584289551, 2.7038609981536865, 2.7611184120178223]}, "gradients/encoder.encoder.layers.8.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 2.0, 4.0, 1.0, 3.0, 0.0, 2.0, 3.0, 3.0, 5.0, 5.0, 11.0, 10.0, 11.0, 17.0, 18.0, 16.0, 20.0, 19.0, 25.0, 27.0, 38.0, 42.0, 32.0, 39.0, 50.0, 55.0, 44.0, 41.0, 46.0, 43.0, 40.0, 34.0, 39.0, 29.0, 34.0, 29.0, 18.0, 24.0, 23.0, 19.0, 13.0, 21.0, 14.0, 15.0, 6.0, 11.0, 8.0, 5.0, 0.0, 2.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0], "bins": [-1.100088119506836, -1.0651017427444458, -1.0301153659820557, -0.9951289892196655, -0.9601425528526306, -0.9251561760902405, -0.8901697993278503, -0.8551834225654602, -0.8201969861984253, -0.7852106094360352, -0.750224232673645, -0.7152378559112549, -0.68025141954422, -0.6452650427818298, -0.6102786660194397, -0.5752922892570496, -0.5403059124946594, -0.5053195357322693, -0.47033312916755676, -0.4353467524051666, -0.4003603458404541, -0.36537396907806396, -0.33038759231567383, -0.2954012155532837, -0.26041480898857117, -0.22542841732501984, -0.1904420256614685, -0.15545564889907837, -0.12046925723552704, -0.08548286557197571, -0.05049648880958557, -0.01551009714603424, 0.01947629451751709, 0.05446268245577812, 0.08944907039403915, 0.12443545460700989, 0.15942184627056122, 0.19440823793411255, 0.22939461469650269, 0.2643809914588928, 0.29936739802360535, 0.3343537747859955, 0.369340181350708, 0.40432655811309814, 0.4393129348754883, 0.4742993414402008, 0.5092856884002686, 0.5442721247673035, 0.5792585015296936, 0.6142448782920837, 0.6492312550544739, 0.6842176914215088, 0.7192040681838989, 0.7541904449462891, 0.7891768217086792, 0.8241631984710693, 0.8591495752334595, 0.8941359519958496, 0.9291223287582397, 0.9641087055206299, 0.9990951418876648, 1.0340814590454102, 1.0690679550170898, 1.10405433177948, 1.1390407085418701]}, "gradients/encoder.encoder.layers.7.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 1.0, 3.0, 6.0, 9.0, 9.0, 12.0, 16.0, 15.0, 43.0, 108.0, 245.0, 2088.0, 85682.0, 4094598.0, 9815.0, 915.0, 535.0, 110.0, 35.0, 25.0, 5.0, 11.0, 2.0, 4.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.446533203125, -0.4354438781738281, -0.42435455322265625, -0.4132652282714844, -0.4021759033203125, -0.3910865783691406, -0.37999725341796875, -0.3689079284667969, -0.357818603515625, -0.3467292785644531, -0.33563995361328125, -0.3245506286621094, -0.3134613037109375, -0.3023719787597656, -0.29128265380859375, -0.2801933288574219, -0.26910400390625, -0.2580146789550781, -0.24692535400390625, -0.23583602905273438, -0.2247467041015625, -0.21365737915039062, -0.20256805419921875, -0.19147872924804688, -0.180389404296875, -0.16930007934570312, -0.15821075439453125, -0.14712142944335938, -0.1360321044921875, -0.12494277954101562, -0.11385345458984375, -0.10276412963867188, -0.0916748046875, -0.08058547973632812, -0.06949615478515625, -0.058406829833984375, -0.0473175048828125, -0.036228179931640625, -0.02513885498046875, -0.014049530029296875, -0.002960205078125, 0.008129119873046875, 0.01921844482421875, 0.030307769775390625, 0.0413970947265625, 0.052486419677734375, 0.06357574462890625, 0.07466506958007812, 0.08575439453125, 0.09684371948242188, 0.10793304443359375, 0.11902236938476562, 0.1301116943359375, 0.14120101928710938, 0.15229034423828125, 0.16337966918945312, 0.174468994140625, 0.18555831909179688, 0.19664764404296875, 0.20773696899414062, 0.2188262939453125, 0.22991561889648438, 0.24100494384765625, 0.2520942687988281, 0.26318359375]}, "gradients/encoder.encoder.layers.7.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 3.0, 3.0, 6.0, 2.0, 2.0, 7.0, 4.0, 15.0, 39.0, 69.0, 125.0, 144.0, 196.0, 153.0, 105.0, 56.0, 44.0, 18.0, 7.0, 8.0, 6.0, 1.0, 4.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.26708984375, -0.2604846954345703, -0.2538795471191406, -0.24727439880371094, -0.24066925048828125, -0.23406410217285156, -0.22745895385742188, -0.2208538055419922, -0.2142486572265625, -0.2076435089111328, -0.20103836059570312, -0.19443321228027344, -0.18782806396484375, -0.18122291564941406, -0.17461776733398438, -0.1680126190185547, -0.161407470703125, -0.1548023223876953, -0.14819717407226562, -0.14159202575683594, -0.13498687744140625, -0.12838172912597656, -0.12177658081054688, -0.11517143249511719, -0.1085662841796875, -0.10196113586425781, -0.09535598754882812, -0.08875083923339844, -0.08214569091796875, -0.07554054260253906, -0.06893539428710938, -0.06233024597167969, -0.05572509765625, -0.04911994934082031, -0.042514801025390625, -0.03590965270996094, -0.02930450439453125, -0.022699356079101562, -0.016094207763671875, -0.009489059448242188, -0.0028839111328125, 0.0037212371826171875, 0.010326385498046875, 0.016931533813476562, 0.02353668212890625, 0.030141830444335938, 0.036746978759765625, 0.04335212707519531, 0.049957275390625, 0.05656242370605469, 0.06316757202148438, 0.06977272033691406, 0.07637786865234375, 0.08298301696777344, 0.08958816528320312, 0.09619331359863281, 0.1027984619140625, 0.10940361022949219, 0.11600875854492188, 0.12261390686035156, 0.12921905517578125, 0.13582420349121094, 0.14242935180664062, 0.1490345001220703, 0.1556396484375]}, "gradients/encoder.encoder.layers.7.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 3.0, 7.0, 8.0, 10.0, 23.0, 46.0, 73.0, 185.0, 555.0, 4610.0, 4174980.0, 12551.0, 821.0, 244.0, 88.0, 49.0, 24.0, 7.0, 7.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.4443359375, -0.4282684326171875, -0.412200927734375, -0.3961334228515625, -0.38006591796875, -0.3639984130859375, -0.347930908203125, -0.3318634033203125, -0.3157958984375, -0.2997283935546875, -0.283660888671875, -0.2675933837890625, -0.25152587890625, -0.2354583740234375, -0.219390869140625, -0.2033233642578125, -0.187255859375, -0.1711883544921875, -0.155120849609375, -0.1390533447265625, -0.12298583984375, -0.1069183349609375, -0.090850830078125, -0.0747833251953125, -0.0587158203125, -0.0426483154296875, -0.026580810546875, -0.0105133056640625, 0.00555419921875, 0.0216217041015625, 0.037689208984375, 0.0537567138671875, 0.06982421875, 0.0858917236328125, 0.101959228515625, 0.1180267333984375, 0.13409423828125, 0.1501617431640625, 0.166229248046875, 0.1822967529296875, 0.1983642578125, 0.2144317626953125, 0.230499267578125, 0.2465667724609375, 0.26263427734375, 0.2787017822265625, 0.294769287109375, 0.3108367919921875, 0.326904296875, 0.3429718017578125, 0.359039306640625, 0.3751068115234375, 0.39117431640625, 0.4072418212890625, 0.423309326171875, 0.4393768310546875, 0.4554443359375, 0.4715118408203125, 0.487579345703125, 0.5036468505859375, 0.51971435546875, 0.5357818603515625, 0.551849365234375, 0.5679168701171875, 0.583984375]}, "gradients/encoder.encoder.layers.7.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 5.0, 2.0, 3.0, 5.0, 3.0, 11.0, 33.0, 114.0, 1169.0, 2542.0, 154.0, 25.0, 9.0, 4.0, 3.0, 1.0, 3.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.4580078125, -0.44686126708984375, -0.4357147216796875, -0.42456817626953125, -0.413421630859375, -0.40227508544921875, -0.3911285400390625, -0.37998199462890625, -0.36883544921875, -0.35768890380859375, -0.3465423583984375, -0.33539581298828125, -0.324249267578125, -0.31310272216796875, -0.3019561767578125, -0.29080963134765625, -0.2796630859375, -0.26851654052734375, -0.2573699951171875, -0.24622344970703125, -0.235076904296875, -0.22393035888671875, -0.2127838134765625, -0.20163726806640625, -0.19049072265625, -0.17934417724609375, -0.1681976318359375, -0.15705108642578125, -0.145904541015625, -0.13475799560546875, -0.1236114501953125, -0.11246490478515625, -0.101318359375, -0.09017181396484375, -0.0790252685546875, -0.06787872314453125, -0.056732177734375, -0.04558563232421875, -0.0344390869140625, -0.02329254150390625, -0.01214599609375, -0.00099945068359375, 0.0101470947265625, 0.02129364013671875, 0.032440185546875, 0.04358673095703125, 0.0547332763671875, 0.06587982177734375, 0.0770263671875, 0.08817291259765625, 0.0993194580078125, 0.11046600341796875, 0.121612548828125, 0.13275909423828125, 0.1439056396484375, 0.15505218505859375, 0.16619873046875, 0.17734527587890625, 0.1884918212890625, 0.19963836669921875, 0.210784912109375, 0.22193145751953125, 0.2330780029296875, 0.24422454833984375, 0.25537109375]}, "gradients/encoder.encoder.layers.7.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 10.0, 54.0, 584.0, 328.0, 30.0, 8.0, 5.0], "bins": [-5.447493553161621, -5.356599807739258, -5.265705585479736, -5.174811840057373, -5.083917617797852, -4.993023872375488, -4.902130126953125, -4.8112359046936035, -4.72034215927124, -4.629448413848877, -4.5385541915893555, -4.447660446166992, -4.356766223907471, -4.265872478485107, -4.174978256225586, -4.084084510803223, -3.993190288543701, -3.902296304702759, -3.8114023208618164, -3.720508575439453, -3.6296145915985107, -3.5387206077575684, -3.447826623916626, -3.3569326400756836, -3.2660388946533203, -3.175144910812378, -3.0842509269714355, -2.9933571815490723, -2.90246319770813, -2.8115692138671875, -2.720675230026245, -2.6297812461853027, -2.5388875007629395, -2.447993516921997, -2.3570995330810547, -2.2662057876586914, -2.175311803817749, -2.0844178199768066, -1.9935238361358643, -1.9026298522949219, -1.8117358684539795, -1.720841884613037, -1.6299480199813843, -1.539054036140442, -1.4481600522994995, -1.3572661876678467, -1.2663722038269043, -1.175478219985962, -1.084584355354309, -0.9936904311180115, -0.9027964472770691, -0.8119025230407715, -0.7210085391998291, -0.6301146149635315, -0.5392206907272339, -0.4483267068862915, -0.3574327528476715, -0.2665387988090515, -0.1756448596715927, -0.08475092053413391, 0.006143033504486084, 0.09703698754310608, 0.1879309117794037, 0.27882489562034607, 0.3697188198566437]}, "gradients/encoder.encoder.layers.7.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 1.0, 0.0, 1.0, 3.0, 3.0, 1.0, 7.0, 5.0, 10.0, 10.0, 9.0, 19.0, 29.0, 16.0, 24.0, 20.0, 36.0, 30.0, 33.0, 34.0, 31.0, 38.0, 48.0, 50.0, 36.0, 36.0, 48.0, 45.0, 46.0, 38.0, 33.0, 38.0, 31.0, 33.0, 31.0, 24.0, 14.0, 24.0, 13.0, 16.0, 7.0, 10.0, 8.0, 4.0, 6.0, 4.0, 2.0, 3.0, 3.0, 2.0, 3.0, 2.0, 1.0], "bins": [-0.5448681116104126, -0.5299410820007324, -0.5150140523910522, -0.5000869631767273, -0.4851599335670471, -0.47023290395736694, -0.4553058445453644, -0.4403788149356842, -0.42545175552368164, -0.41052472591400146, -0.3955976665019989, -0.3806706368923187, -0.36574357748031616, -0.350816547870636, -0.3358894884586334, -0.32096245884895325, -0.3060353994369507, -0.2911083698272705, -0.27618131041526794, -0.26125428080558777, -0.2463272213935852, -0.23140019178390503, -0.21647313237190247, -0.2015461027622223, -0.18661907315254211, -0.17169202864170074, -0.15676498413085938, -0.141837939620018, -0.12691089510917664, -0.11198385804891586, -0.0970568135380745, -0.08212976902723312, -0.06720271706581116, -0.05227567255496979, -0.03734862804412842, -0.022421587258577347, -0.007494542747735977, 0.007432498037815094, 0.022359542548656464, 0.03728658705949783, 0.0522136315703392, 0.06714067608118057, 0.08206772059202194, 0.09699475765228271, 0.11192180216312408, 0.12684884667396545, 0.14177589118480682, 0.1567029356956482, 0.17162998020648956, 0.18655702471733093, 0.2014840692281723, 0.21641111373901367, 0.23133815824985504, 0.2462652027606964, 0.2611922323703766, 0.27611929178237915, 0.2910463213920593, 0.3059733510017395, 0.32090041041374207, 0.33582744002342224, 0.3507544994354248, 0.365681529045105, 0.38060858845710754, 0.3955356180667877, 0.4104626774787903]}, "gradients/encoder.encoder.layers.7.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 8.0, 3.0, 3.0, 5.0, 3.0, 8.0, 11.0, 9.0, 11.0, 22.0, 12.0, 32.0, 25.0, 51.0, 74.0, 121.0, 196.0, 408.0, 779.0, 1833.0, 4926.0, 21353.0, 185432.0, 767941.0, 50809.0, 9275.0, 2778.0, 1118.0, 558.0, 260.0, 149.0, 103.0, 72.0, 45.0, 26.0, 15.0, 28.0, 14.0, 14.0, 12.0, 2.0, 5.0, 6.0, 5.0, 1.0, 0.0, 4.0, 2.0, 2.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-0.33349609375, -0.3228187561035156, -0.31214141845703125, -0.3014640808105469, -0.2907867431640625, -0.2801094055175781, -0.26943206787109375, -0.2587547302246094, -0.248077392578125, -0.23740005493164062, -0.22672271728515625, -0.21604537963867188, -0.2053680419921875, -0.19469070434570312, -0.18401336669921875, -0.17333602905273438, -0.16265869140625, -0.15198135375976562, -0.14130401611328125, -0.13062667846679688, -0.1199493408203125, -0.10927200317382812, -0.09859466552734375, -0.08791732788085938, -0.077239990234375, -0.06656265258789062, -0.05588531494140625, -0.045207977294921875, -0.0345306396484375, -0.023853302001953125, -0.01317596435546875, -0.002498626708984375, 0.0081787109375, 0.018856048583984375, 0.02953338623046875, 0.040210723876953125, 0.0508880615234375, 0.061565399169921875, 0.07224273681640625, 0.08292007446289062, 0.093597412109375, 0.10427474975585938, 0.11495208740234375, 0.12562942504882812, 0.1363067626953125, 0.14698410034179688, 0.15766143798828125, 0.16833877563476562, 0.17901611328125, 0.18969345092773438, 0.20037078857421875, 0.21104812622070312, 0.2217254638671875, 0.23240280151367188, 0.24308013916015625, 0.2537574768066406, 0.264434814453125, 0.2751121520996094, 0.28578948974609375, 0.2964668273925781, 0.3071441650390625, 0.3178215026855469, 0.32849884033203125, 0.3391761779785156, 0.349853515625]}, "gradients/encoder.encoder.layers.7.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 3.0, 2.0, 7.0, 2.0, 4.0, 4.0, 7.0, 17.0, 44.0, 75.0, 119.0, 158.0, 190.0, 144.0, 110.0, 50.0, 43.0, 13.0, 11.0, 5.0, 4.0, 2.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.26611328125, -0.2595100402832031, -0.25290679931640625, -0.24630355834960938, -0.2397003173828125, -0.23309707641601562, -0.22649383544921875, -0.21989059448242188, -0.213287353515625, -0.20668411254882812, -0.20008087158203125, -0.19347763061523438, -0.1868743896484375, -0.18027114868164062, -0.17366790771484375, -0.16706466674804688, -0.16046142578125, -0.15385818481445312, -0.14725494384765625, -0.14065170288085938, -0.1340484619140625, -0.12744522094726562, -0.12084197998046875, -0.11423873901367188, -0.107635498046875, -0.10103225708007812, -0.09442901611328125, -0.08782577514648438, -0.0812225341796875, -0.07461929321289062, -0.06801605224609375, -0.061412811279296875, -0.0548095703125, -0.048206329345703125, -0.04160308837890625, -0.034999847412109375, -0.0283966064453125, -0.021793365478515625, -0.01519012451171875, -0.008586883544921875, -0.001983642578125, 0.004619598388671875, 0.01122283935546875, 0.017826080322265625, 0.0244293212890625, 0.031032562255859375, 0.03763580322265625, 0.044239044189453125, 0.05084228515625, 0.057445526123046875, 0.06404876708984375, 0.07065200805664062, 0.0772552490234375, 0.08385848999023438, 0.09046173095703125, 0.09706497192382812, 0.103668212890625, 0.11027145385742188, 0.11687469482421875, 0.12347793579101562, 0.1300811767578125, 0.13668441772460938, 0.14328765869140625, 0.14989089965820312, 0.156494140625]}, "gradients/encoder.encoder.layers.7.attention.v_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 3.0, 2.0, 1.0, 1.0, 4.0, 6.0, 3.0, 8.0, 6.0, 9.0, 6.0, 14.0, 16.0, 12.0, 18.0, 27.0, 39.0, 41.0, 65.0, 82.0, 195.0, 461.0, 1807.0, 15078.0, 778281.0, 241031.0, 9073.0, 1344.0, 386.0, 173.0, 109.0, 42.0, 50.0, 28.0, 28.0, 33.0, 11.0, 23.0, 18.0, 8.0, 5.0, 5.0, 6.0, 5.0, 2.0, 3.0, 0.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.4482421875, -0.43325042724609375, -0.4182586669921875, -0.40326690673828125, -0.388275146484375, -0.37328338623046875, -0.3582916259765625, -0.34329986572265625, -0.32830810546875, -0.31331634521484375, -0.2983245849609375, -0.28333282470703125, -0.268341064453125, -0.25334930419921875, -0.2383575439453125, -0.22336578369140625, -0.2083740234375, -0.19338226318359375, -0.1783905029296875, -0.16339874267578125, -0.148406982421875, -0.13341522216796875, -0.1184234619140625, -0.10343170166015625, -0.08843994140625, -0.07344818115234375, -0.0584564208984375, -0.04346466064453125, -0.028472900390625, -0.01348114013671875, 0.0015106201171875, 0.01650238037109375, 0.031494140625, 0.04648590087890625, 0.0614776611328125, 0.07646942138671875, 0.091461181640625, 0.10645294189453125, 0.1214447021484375, 0.13643646240234375, 0.15142822265625, 0.16641998291015625, 0.1814117431640625, 0.19640350341796875, 0.211395263671875, 0.22638702392578125, 0.2413787841796875, 0.25637054443359375, 0.2713623046875, 0.28635406494140625, 0.3013458251953125, 0.31633758544921875, 0.331329345703125, 0.34632110595703125, 0.3613128662109375, 0.37630462646484375, 0.39129638671875, 0.40628814697265625, 0.4212799072265625, 0.43627166748046875, 0.451263427734375, 0.46625518798828125, 0.4812469482421875, 0.49623870849609375, 0.51123046875]}, "gradients/encoder.encoder.layers.7.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 4.0, 1.0, 4.0, 5.0, 6.0, 6.0, 8.0, 18.0, 23.0, 11.0, 30.0, 24.0, 24.0, 40.0, 20.0, 47.0, 55.0, 35.0, 46.0, 56.0, 67.0, 58.0, 49.0, 51.0, 32.0, 42.0, 23.0, 43.0, 26.0, 35.0, 20.0, 18.0, 9.0, 16.0, 15.0, 6.0, 9.0, 7.0, 7.0, 4.0, 5.0, 3.0, 2.0, 1.0, 2.0, 3.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.293212890625, -0.28461456298828125, -0.2760162353515625, -0.26741790771484375, -0.258819580078125, -0.25022125244140625, -0.2416229248046875, -0.23302459716796875, -0.22442626953125, -0.21582794189453125, -0.2072296142578125, -0.19863128662109375, -0.190032958984375, -0.18143463134765625, -0.1728363037109375, -0.16423797607421875, -0.1556396484375, -0.14704132080078125, -0.1384429931640625, -0.12984466552734375, -0.121246337890625, -0.11264801025390625, -0.1040496826171875, -0.09545135498046875, -0.08685302734375, -0.07825469970703125, -0.0696563720703125, -0.06105804443359375, -0.052459716796875, -0.04386138916015625, -0.0352630615234375, -0.02666473388671875, -0.01806640625, -0.00946807861328125, -0.0008697509765625, 0.00772857666015625, 0.016326904296875, 0.02492523193359375, 0.0335235595703125, 0.04212188720703125, 0.05072021484375, 0.05931854248046875, 0.0679168701171875, 0.07651519775390625, 0.085113525390625, 0.09371185302734375, 0.1023101806640625, 0.11090850830078125, 0.1195068359375, 0.12810516357421875, 0.1367034912109375, 0.14530181884765625, 0.153900146484375, 0.16249847412109375, 0.1710968017578125, 0.17969512939453125, 0.18829345703125, 0.19689178466796875, 0.2054901123046875, 0.21408843994140625, 0.222686767578125, 0.23128509521484375, 0.2398834228515625, 0.24848175048828125, 0.257080078125]}, "gradients/encoder.encoder.layers.7.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 2.0, 5.0, 0.0, 2.0, 3.0, 4.0, 3.0, 4.0, 6.0, 9.0, 15.0, 22.0, 18.0, 40.0, 45.0, 94.0, 124.0, 211.0, 419.0, 940.0, 2722.0, 13864.0, 910306.0, 109431.0, 6951.0, 1755.0, 718.0, 307.0, 173.0, 125.0, 93.0, 39.0, 31.0, 20.0, 15.0, 11.0, 11.0, 2.0, 8.0, 4.0, 1.0, 6.0, 3.0, 2.0, 3.0, 0.0, 1.0, 1.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0229949951171875, -0.022104501724243164, -0.021214008331298828, -0.020323514938354492, -0.019433021545410156, -0.01854252815246582, -0.017652034759521484, -0.01676154136657715, -0.015871047973632812, -0.014980554580688477, -0.01409006118774414, -0.013199567794799805, -0.012309074401855469, -0.011418581008911133, -0.010528087615966797, -0.009637594223022461, -0.008747100830078125, -0.007856607437133789, -0.006966114044189453, -0.006075620651245117, -0.005185127258300781, -0.004294633865356445, -0.0034041404724121094, -0.0025136470794677734, -0.0016231536865234375, -0.0007326602935791016, 0.00015783309936523438, 0.0010483264923095703, 0.0019388198852539062, 0.002829313278198242, 0.003719806671142578, 0.004610300064086914, 0.00550079345703125, 0.006391286849975586, 0.007281780242919922, 0.008172273635864258, 0.009062767028808594, 0.00995326042175293, 0.010843753814697266, 0.011734247207641602, 0.012624740600585938, 0.013515233993530273, 0.01440572738647461, 0.015296220779418945, 0.01618671417236328, 0.017077207565307617, 0.017967700958251953, 0.01885819435119629, 0.019748687744140625, 0.02063918113708496, 0.021529674530029297, 0.022420167922973633, 0.02331066131591797, 0.024201154708862305, 0.02509164810180664, 0.025982141494750977, 0.026872634887695312, 0.02776312828063965, 0.028653621673583984, 0.02954411506652832, 0.030434608459472656, 0.03132510185241699, 0.03221559524536133, 0.033106088638305664, 0.03399658203125]}, "gradients/encoder.encoder.layers.7.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 3.0, 2.0, 1.0, 7.0, 8.0, 9.0, 13.0, 15.0, 25.0, 23.0, 14.0, 53.0, 60.0, 40.0, 62.0, 77.0, 76.0, 71.0, 38.0, 74.0, 56.0, 57.0, 52.0, 47.0, 27.0, 24.0, 12.0, 21.0, 13.0, 9.0, 8.0, 4.0, 8.0, 2.0, 0.0, 2.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.3974647521972656e-06, -3.285706043243408e-06, -3.1739473342895508e-06, -3.0621886253356934e-06, -2.950429916381836e-06, -2.8386712074279785e-06, -2.726912498474121e-06, -2.6151537895202637e-06, -2.5033950805664062e-06, -2.391636371612549e-06, -2.2798776626586914e-06, -2.168118953704834e-06, -2.0563602447509766e-06, -1.944601535797119e-06, -1.8328428268432617e-06, -1.7210841178894043e-06, -1.6093254089355469e-06, -1.4975666999816895e-06, -1.385807991027832e-06, -1.2740492820739746e-06, -1.1622905731201172e-06, -1.0505318641662598e-06, -9.387731552124023e-07, -8.270144462585449e-07, -7.152557373046875e-07, -6.034970283508301e-07, -4.917383193969727e-07, -3.7997961044311523e-07, -2.682209014892578e-07, -1.564621925354004e-07, -4.470348358154297e-08, 6.705522537231445e-08, 1.7881393432617188e-07, 2.905726432800293e-07, 4.023313522338867e-07, 5.140900611877441e-07, 6.258487701416016e-07, 7.37607479095459e-07, 8.493661880493164e-07, 9.611248970031738e-07, 1.0728836059570312e-06, 1.1846423149108887e-06, 1.296401023864746e-06, 1.4081597328186035e-06, 1.519918441772461e-06, 1.6316771507263184e-06, 1.7434358596801758e-06, 1.8551945686340332e-06, 1.9669532775878906e-06, 2.078711986541748e-06, 2.1904706954956055e-06, 2.302229404449463e-06, 2.4139881134033203e-06, 2.5257468223571777e-06, 2.637505531311035e-06, 2.7492642402648926e-06, 2.86102294921875e-06, 2.9727816581726074e-06, 3.084540367126465e-06, 3.1962990760803223e-06, 3.3080577850341797e-06, 3.419816493988037e-06, 3.5315752029418945e-06, 3.643333911895752e-06, 3.7550926208496094e-06]}, "gradients/encoder.encoder.layers.7.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 2.0, 4.0, 5.0, 3.0, 7.0, 4.0, 18.0, 9.0, 34.0, 57.0, 129.0, 339.0, 1588.0, 37946.0, 1000380.0, 6799.0, 786.0, 220.0, 98.0, 47.0, 22.0, 19.0, 14.0, 18.0, 8.0, 4.0, 2.0, 3.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.0726318359375, -0.0709538459777832, -0.0692758560180664, -0.06759786605834961, -0.06591987609863281, -0.06424188613891602, -0.06256389617919922, -0.06088590621948242, -0.059207916259765625, -0.05752992630004883, -0.05585193634033203, -0.054173946380615234, -0.05249595642089844, -0.05081796646118164, -0.049139976501464844, -0.04746198654174805, -0.04578399658203125, -0.04410600662231445, -0.042428016662597656, -0.04075002670288086, -0.03907203674316406, -0.037394046783447266, -0.03571605682373047, -0.03403806686401367, -0.032360076904296875, -0.030682086944580078, -0.02900409698486328, -0.027326107025146484, -0.025648117065429688, -0.02397012710571289, -0.022292137145996094, -0.020614147186279297, -0.0189361572265625, -0.017258167266845703, -0.015580177307128906, -0.01390218734741211, -0.012224197387695312, -0.010546207427978516, -0.008868217468261719, -0.007190227508544922, -0.005512237548828125, -0.003834247589111328, -0.0021562576293945312, -0.0004782676696777344, 0.0011997222900390625, 0.0028777122497558594, 0.004555702209472656, 0.006233692169189453, 0.00791168212890625, 0.009589672088623047, 0.011267662048339844, 0.01294565200805664, 0.014623641967773438, 0.016301631927490234, 0.01797962188720703, 0.019657611846923828, 0.021335601806640625, 0.023013591766357422, 0.02469158172607422, 0.026369571685791016, 0.028047561645507812, 0.02972555160522461, 0.031403541564941406, 0.0330815315246582, 0.034759521484375]}, "gradients/encoder.encoder.layers.7.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 4.0, 5.0, 15.0, 7.0, 11.0, 13.0, 22.0, 35.0, 39.0, 65.0, 142.0, 232.0, 183.0, 92.0, 43.0, 24.0, 24.0, 17.0, 9.0, 10.0, 4.0, 7.0, 3.0, 2.0, 4.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.020660400390625, -0.01966238021850586, -0.01866436004638672, -0.017666339874267578, -0.016668319702148438, -0.015670299530029297, -0.014672279357910156, -0.013674259185791016, -0.012676239013671875, -0.011678218841552734, -0.010680198669433594, -0.009682178497314453, -0.008684158325195312, -0.007686138153076172, -0.006688117980957031, -0.005690097808837891, -0.00469207763671875, -0.0036940574645996094, -0.0026960372924804688, -0.0016980171203613281, -0.0006999969482421875, 0.0002980232238769531, 0.0012960433959960938, 0.0022940635681152344, 0.003292083740234375, 0.004290103912353516, 0.005288124084472656, 0.006286144256591797, 0.0072841644287109375, 0.008282184600830078, 0.009280204772949219, 0.01027822494506836, 0.0112762451171875, 0.01227426528930664, 0.013272285461425781, 0.014270305633544922, 0.015268325805664062, 0.016266345977783203, 0.017264366149902344, 0.018262386322021484, 0.019260406494140625, 0.020258426666259766, 0.021256446838378906, 0.022254467010498047, 0.023252487182617188, 0.024250507354736328, 0.02524852752685547, 0.02624654769897461, 0.02724456787109375, 0.02824258804321289, 0.02924060821533203, 0.030238628387451172, 0.031236648559570312, 0.03223466873168945, 0.033232688903808594, 0.034230709075927734, 0.035228729248046875, 0.036226749420166016, 0.037224769592285156, 0.0382227897644043, 0.03922080993652344, 0.04021883010864258, 0.04121685028076172, 0.04221487045288086, 0.043212890625]}, "gradients/encoder.encoder.layers.7.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 4.0, 2.0, 4.0, 9.0, 8.0, 11.0, 34.0, 55.0, 205.0, 429.0, 136.0, 62.0, 27.0, 13.0, 7.0, 4.0, 3.0, 3.0, 4.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.9054816961288452, -0.846300482749939, -0.7871192693710327, -0.7279381155967712, -0.668756902217865, -0.6095756888389587, -0.5503945350646973, -0.491213321685791, -0.43203210830688477, -0.3728508949279785, -0.31366971135139465, -0.2544885277748108, -0.19530731439590454, -0.1361261010169983, -0.07694491744041443, -0.017763733863830566, 0.041417479515075684, 0.10059867799282074, 0.1597798764705658, 0.21896107494831085, 0.2781422734260559, 0.33732348680496216, 0.396504670381546, 0.4556858539581299, 0.5148670673370361, 0.5740482807159424, 0.6332294940948486, 0.6924106478691101, 0.7515918612480164, 0.8107730746269226, 0.8699542284011841, 0.9291354417800903, 0.9883167743682861, 1.0474979877471924, 1.1066792011260986, 1.1658604145050049, 1.2250416278839111, 1.2842228412628174, 1.343403935432434, 1.4025851488113403, 1.4617663621902466, 1.5209475755691528, 1.580128788948059, 1.6393100023269653, 1.698491096496582, 1.7576723098754883, 1.8168535232543945, 1.8760347366333008, 1.935215950012207, 1.9943971633911133, 2.0535783767700195, 2.112759590148926, 2.171940803527832, 2.2311220169067383, 2.2903032302856445, 2.349484443664551, 2.408665657043457, 2.4678468704223633, 2.5270280838012695, 2.586209297180176, 2.645390510559082, 2.7045717239379883, 2.7637529373168945, 2.822934150695801, 2.882115125656128]}, "gradients/encoder.encoder.layers.7.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 2.0, 3.0, 1.0, 1.0, 3.0, 2.0, 4.0, 4.0, 7.0, 5.0, 4.0, 11.0, 7.0, 15.0, 18.0, 22.0, 19.0, 23.0, 32.0, 43.0, 41.0, 51.0, 65.0, 48.0, 52.0, 68.0, 65.0, 49.0, 41.0, 41.0, 42.0, 39.0, 28.0, 28.0, 28.0, 22.0, 11.0, 16.0, 13.0, 9.0, 6.0, 7.0, 6.0, 4.0, 2.0, 1.0, 4.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.9754367470741272, -0.9422942399978638, -0.9091517925262451, -0.8760092854499817, -0.8428667783737183, -0.8097243309020996, -0.7765818238258362, -0.7434393167495728, -0.7102968692779541, -0.6771543622016907, -0.644011914730072, -0.6108694076538086, -0.5777269005775452, -0.5445843935012817, -0.5114419460296631, -0.47829943895339966, -0.44515693187713623, -0.4120144546031952, -0.37887194752693176, -0.3457294702529907, -0.3125869631767273, -0.27944448590278625, -0.24630200862884521, -0.21315951645374298, -0.18001702427864075, -0.1468745321035385, -0.11373204737901688, -0.08058956265449524, -0.047447070479393005, -0.014304578304290771, 0.01883789896965027, 0.0519803911447525, 0.08512294292449951, 0.11826543509960175, 0.15140792727470398, 0.18455040454864502, 0.21769289672374725, 0.2508353888988495, 0.2839778661727905, 0.31712037324905396, 0.350262850522995, 0.38340532779693604, 0.41654783487319946, 0.4496903121471405, 0.48283278942108154, 0.515975296497345, 0.5491178035736084, 0.582260251045227, 0.6154027581214905, 0.6485452651977539, 0.6816877126693726, 0.714830219745636, 0.7479727268218994, 0.7811151742935181, 0.8142576813697815, 0.8474001884460449, 0.8805426359176636, 0.913685142993927, 0.9468275904655457, 0.9799700975418091, 1.0131125450134277, 1.046255111694336, 1.0793975591659546, 1.1125400066375732, 1.1456825733184814]}, "gradients/encoder.encoder.layers.6.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 3.0, 0.0, 2.0, 2.0, 4.0, 11.0, 10.0, 17.0, 17.0, 22.0, 58.0, 110.0, 265.0, 1068.0, 10164.0, 4151665.0, 28222.0, 1645.0, 661.0, 199.0, 63.0, 29.0, 24.0, 11.0, 10.0, 4.0, 2.0, 2.0, 2.0, 0.0, 1.0, 0.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.441162109375, -0.430023193359375, -0.41888427734375, -0.407745361328125, -0.3966064453125, -0.385467529296875, -0.37432861328125, -0.363189697265625, -0.35205078125, -0.340911865234375, -0.32977294921875, -0.318634033203125, -0.3074951171875, -0.296356201171875, -0.28521728515625, -0.274078369140625, -0.262939453125, -0.251800537109375, -0.24066162109375, -0.229522705078125, -0.2183837890625, -0.207244873046875, -0.19610595703125, -0.184967041015625, -0.173828125, -0.162689208984375, -0.15155029296875, -0.140411376953125, -0.1292724609375, -0.118133544921875, -0.10699462890625, -0.095855712890625, -0.084716796875, -0.073577880859375, -0.06243896484375, -0.051300048828125, -0.0401611328125, -0.029022216796875, -0.01788330078125, -0.006744384765625, 0.00439453125, 0.015533447265625, 0.02667236328125, 0.037811279296875, 0.0489501953125, 0.060089111328125, 0.07122802734375, 0.082366943359375, 0.093505859375, 0.104644775390625, 0.11578369140625, 0.126922607421875, 0.1380615234375, 0.149200439453125, 0.16033935546875, 0.171478271484375, 0.1826171875, 0.193756103515625, 0.20489501953125, 0.216033935546875, 0.2271728515625, 0.238311767578125, 0.24945068359375, 0.260589599609375, 0.271728515625]}, "gradients/encoder.encoder.layers.6.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 4.0, 5.0, 3.0, 3.0, 7.0, 10.0, 24.0, 43.0, 97.0, 141.0, 182.0, 177.0, 131.0, 87.0, 35.0, 30.0, 15.0, 10.0, 4.0, 3.0, 2.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.260986328125, -0.2544517517089844, -0.24791717529296875, -0.24138259887695312, -0.2348480224609375, -0.22831344604492188, -0.22177886962890625, -0.21524429321289062, -0.208709716796875, -0.20217514038085938, -0.19564056396484375, -0.18910598754882812, -0.1825714111328125, -0.17603683471679688, -0.16950225830078125, -0.16296768188476562, -0.15643310546875, -0.14989852905273438, -0.14336395263671875, -0.13682937622070312, -0.1302947998046875, -0.12376022338867188, -0.11722564697265625, -0.11069107055664062, -0.104156494140625, -0.09762191772460938, -0.09108734130859375, -0.08455276489257812, -0.0780181884765625, -0.07148361206054688, -0.06494903564453125, -0.058414459228515625, -0.0518798828125, -0.045345306396484375, -0.03881072998046875, -0.032276153564453125, -0.0257415771484375, -0.019207000732421875, -0.01267242431640625, -0.006137847900390625, 0.000396728515625, 0.006931304931640625, 0.01346588134765625, 0.020000457763671875, 0.0265350341796875, 0.033069610595703125, 0.03960418701171875, 0.046138763427734375, 0.05267333984375, 0.059207916259765625, 0.06574249267578125, 0.07227706909179688, 0.0788116455078125, 0.08534622192382812, 0.09188079833984375, 0.09841537475585938, 0.104949951171875, 0.11148452758789062, 0.11801910400390625, 0.12455368041992188, 0.1310882568359375, 0.13762283325195312, 0.14415740966796875, 0.15069198608398438, 0.1572265625]}, "gradients/encoder.encoder.layers.6.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 3.0, 3.0, 3.0, 6.0, 3.0, 11.0, 11.0, 21.0, 27.0, 42.0, 60.0, 53.0, 90.0, 147.0, 392.0, 2980.0, 3872672.0, 314935.0, 2220.0, 316.0, 114.0, 58.0, 53.0, 30.0, 12.0, 14.0, 6.0, 4.0, 3.0, 2.0, 1.0, 2.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.487060546875, -0.4727134704589844, -0.45836639404296875, -0.4440193176269531, -0.4296722412109375, -0.4153251647949219, -0.40097808837890625, -0.3866310119628906, -0.372283935546875, -0.3579368591308594, -0.34358978271484375, -0.3292427062988281, -0.3148956298828125, -0.3005485534667969, -0.28620147705078125, -0.2718544006347656, -0.25750732421875, -0.24316024780273438, -0.22881317138671875, -0.21446609497070312, -0.2001190185546875, -0.18577194213867188, -0.17142486572265625, -0.15707778930664062, -0.142730712890625, -0.12838363647460938, -0.11403656005859375, -0.09968948364257812, -0.0853424072265625, -0.07099533081054688, -0.05664825439453125, -0.042301177978515625, -0.0279541015625, -0.013607025146484375, 0.00074005126953125, 0.015087127685546875, 0.0294342041015625, 0.043781280517578125, 0.05812835693359375, 0.07247543334960938, 0.086822509765625, 0.10116958618164062, 0.11551666259765625, 0.12986373901367188, 0.1442108154296875, 0.15855789184570312, 0.17290496826171875, 0.18725204467773438, 0.20159912109375, 0.21594619750976562, 0.23029327392578125, 0.24464035034179688, 0.2589874267578125, 0.2733345031738281, 0.28768157958984375, 0.3020286560058594, 0.316375732421875, 0.3307228088378906, 0.34506988525390625, 0.3594169616699219, 0.3737640380859375, 0.3881111145019531, 0.40245819091796875, 0.4168052673339844, 0.43115234375]}, "gradients/encoder.encoder.layers.6.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 3.0, 2.0, 1.0, 4.0, 2.0, 7.0, 11.0, 29.0, 365.0, 3203.0, 400.0, 37.0, 12.0, 3.0, 3.0, 3.0, 1.0, 3.0, 2.0, 0.0, 1.0, 1.0], "bins": [-0.5654296875, -0.5544624328613281, -0.5434951782226562, -0.5325279235839844, -0.5215606689453125, -0.5105934143066406, -0.49962615966796875, -0.4886589050292969, -0.477691650390625, -0.4667243957519531, -0.45575714111328125, -0.4447898864746094, -0.4338226318359375, -0.4228553771972656, -0.41188812255859375, -0.4009208679199219, -0.38995361328125, -0.3789863586425781, -0.36801910400390625, -0.3570518493652344, -0.3460845947265625, -0.3351173400878906, -0.32415008544921875, -0.3131828308105469, -0.302215576171875, -0.2912483215332031, -0.28028106689453125, -0.2693138122558594, -0.2583465576171875, -0.24737930297851562, -0.23641204833984375, -0.22544479370117188, -0.2144775390625, -0.20351028442382812, -0.19254302978515625, -0.18157577514648438, -0.1706085205078125, -0.15964126586914062, -0.14867401123046875, -0.13770675659179688, -0.126739501953125, -0.11577224731445312, -0.10480499267578125, -0.09383773803710938, -0.0828704833984375, -0.07190322875976562, -0.06093597412109375, -0.049968719482421875, -0.03900146484375, -0.028034210205078125, -0.01706695556640625, -0.006099700927734375, 0.0048675537109375, 0.015834808349609375, 0.02680206298828125, 0.037769317626953125, 0.048736572265625, 0.059703826904296875, 0.07067108154296875, 0.08163833618164062, 0.0926055908203125, 0.10357284545898438, 0.11454010009765625, 0.12550735473632812, 0.136474609375]}, "gradients/encoder.encoder.layers.6.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 3.0, 1.0, 3.0, 2.0, 1.0, 5.0, 4.0, 5.0, 8.0, 8.0, 8.0, 8.0, 20.0, 35.0, 46.0, 76.0, 116.0, 257.0, 196.0, 69.0, 54.0, 30.0, 13.0, 14.0, 10.0, 9.0, 3.0, 4.0, 1.0, 3.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.4953800439834595, -0.47979772090911865, -0.46421539783477783, -0.448633074760437, -0.4330507516860962, -0.41746842861175537, -0.40188610553741455, -0.38630378246307373, -0.3707214593887329, -0.3551391363143921, -0.33955681324005127, -0.32397449016571045, -0.30839216709136963, -0.2928098440170288, -0.277227520942688, -0.26164519786834717, -0.24606288969516754, -0.23048056662082672, -0.2148982435464859, -0.19931592047214508, -0.18373359739780426, -0.16815128922462463, -0.1525689661502838, -0.136986643075943, -0.12140431255102158, -0.10582198947668076, -0.09023966640233994, -0.07465735077857971, -0.05907502397894859, -0.04349270462989807, -0.02791038155555725, -0.01232805848121643, 0.0032542645931243896, 0.01883658766746521, 0.03441891074180603, 0.05000123009085655, 0.06558355689048767, 0.0811658725142479, 0.09674819558858871, 0.11233051866292953, 0.12791284918785095, 0.14349517226219177, 0.1590774953365326, 0.1746598184108734, 0.19024214148521423, 0.20582446455955505, 0.22140678763389587, 0.2369891107082367, 0.2525714039802551, 0.26815372705459595, 0.28373605012893677, 0.2993183732032776, 0.3149006962776184, 0.33048301935195923, 0.34606534242630005, 0.36164766550064087, 0.3772299885749817, 0.3928123116493225, 0.40839463472366333, 0.42397695779800415, 0.43955928087234497, 0.4551416039466858, 0.4707239270210266, 0.48630625009536743, 0.5018885731697083]}, "gradients/encoder.encoder.layers.6.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 4.0, 1.0, 2.0, 2.0, 4.0, 6.0, 5.0, 6.0, 8.0, 6.0, 18.0, 9.0, 9.0, 25.0, 17.0, 25.0, 25.0, 28.0, 34.0, 18.0, 39.0, 37.0, 30.0, 40.0, 52.0, 41.0, 53.0, 54.0, 40.0, 31.0, 32.0, 39.0, 35.0, 33.0, 35.0, 31.0, 21.0, 22.0, 10.0, 11.0, 11.0, 16.0, 15.0, 12.0, 6.0, 6.0, 7.0, 3.0, 3.0, 2.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.4470300078392029, -0.43298107385635376, -0.41893213987350464, -0.4048832058906555, -0.3908342719078064, -0.3767853379249573, -0.36273640394210815, -0.34868746995925903, -0.3346385359764099, -0.3205896019935608, -0.30654066801071167, -0.29249173402786255, -0.2784428000450134, -0.2643938660621643, -0.2503449320793152, -0.23629601299762726, -0.22224709391593933, -0.2081981599330902, -0.1941492259502411, -0.18010029196739197, -0.16605135798454285, -0.15200242400169373, -0.1379535049200058, -0.12390457093715668, -0.10985563695430756, -0.09580670297145844, -0.08175776898860931, -0.06770884245634079, -0.05365990847349167, -0.03961097449064255, -0.025562047958374023, -0.011513113975524902, 0.0025358200073242188, 0.01658475212752819, 0.030633684247732162, 0.044682614505290985, 0.058731548488140106, 0.07278048247098923, 0.08682940900325775, 0.10087834298610687, 0.114927276968956, 0.12897621095180511, 0.14302514493465424, 0.15707406401634216, 0.17112299799919128, 0.1851719319820404, 0.19922086596488953, 0.21326979994773865, 0.22731873393058777, 0.2413676679134369, 0.255416601896286, 0.26946553587913513, 0.28351446986198425, 0.2975634038448334, 0.3116123080253601, 0.32566124200820923, 0.33971017599105835, 0.35375910997390747, 0.3678080439567566, 0.3818569779396057, 0.39590591192245483, 0.40995484590530396, 0.4240037798881531, 0.4380527138710022, 0.4521016478538513]}, "gradients/encoder.encoder.layers.6.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 2.0, 4.0, 3.0, 2.0, 7.0, 8.0, 10.0, 14.0, 15.0, 23.0, 32.0, 42.0, 43.0, 77.0, 132.0, 198.0, 459.0, 1066.0, 2853.0, 10126.0, 74439.0, 845792.0, 95644.0, 12225.0, 3109.0, 1077.0, 483.0, 236.0, 151.0, 83.0, 72.0, 41.0, 16.0, 24.0, 14.0, 7.0, 8.0, 5.0, 7.0, 5.0, 4.0, 2.0, 2.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0], "bins": [-0.413330078125, -0.401702880859375, -0.39007568359375, -0.378448486328125, -0.3668212890625, -0.355194091796875, -0.34356689453125, -0.331939697265625, -0.3203125, -0.308685302734375, -0.29705810546875, -0.285430908203125, -0.2738037109375, -0.262176513671875, -0.25054931640625, -0.238922119140625, -0.227294921875, -0.215667724609375, -0.20404052734375, -0.192413330078125, -0.1807861328125, -0.169158935546875, -0.15753173828125, -0.145904541015625, -0.13427734375, -0.122650146484375, -0.11102294921875, -0.099395751953125, -0.0877685546875, -0.076141357421875, -0.06451416015625, -0.052886962890625, -0.041259765625, -0.029632568359375, -0.01800537109375, -0.006378173828125, 0.0052490234375, 0.016876220703125, 0.02850341796875, 0.040130615234375, 0.0517578125, 0.063385009765625, 0.07501220703125, 0.086639404296875, 0.0982666015625, 0.109893798828125, 0.12152099609375, 0.133148193359375, 0.144775390625, 0.156402587890625, 0.16802978515625, 0.179656982421875, 0.1912841796875, 0.202911376953125, 0.21453857421875, 0.226165771484375, 0.23779296875, 0.249420166015625, 0.26104736328125, 0.272674560546875, 0.2843017578125, 0.295928955078125, 0.30755615234375, 0.319183349609375, 0.330810546875]}, "gradients/encoder.encoder.layers.6.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 5.0, 5.0, 1.0, 5.0, 11.0, 7.0, 24.0, 52.0, 111.0, 135.0, 178.0, 160.0, 145.0, 79.0, 40.0, 24.0, 15.0, 8.0, 2.0, 4.0, 2.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.2587890625, -0.2522926330566406, -0.24579620361328125, -0.23929977416992188, -0.2328033447265625, -0.22630691528320312, -0.21981048583984375, -0.21331405639648438, -0.206817626953125, -0.20032119750976562, -0.19382476806640625, -0.18732833862304688, -0.1808319091796875, -0.17433547973632812, -0.16783905029296875, -0.16134262084960938, -0.15484619140625, -0.14834976196289062, -0.14185333251953125, -0.13535690307617188, -0.1288604736328125, -0.12236404418945312, -0.11586761474609375, -0.10937118530273438, -0.102874755859375, -0.09637832641601562, -0.08988189697265625, -0.08338546752929688, -0.0768890380859375, -0.07039260864257812, -0.06389617919921875, -0.057399749755859375, -0.0509033203125, -0.044406890869140625, -0.03791046142578125, -0.031414031982421875, -0.0249176025390625, -0.018421173095703125, -0.01192474365234375, -0.005428314208984375, 0.001068115234375, 0.007564544677734375, 0.01406097412109375, 0.020557403564453125, 0.0270538330078125, 0.033550262451171875, 0.04004669189453125, 0.046543121337890625, 0.05303955078125, 0.059535980224609375, 0.06603240966796875, 0.07252883911132812, 0.0790252685546875, 0.08552169799804688, 0.09201812744140625, 0.09851455688476562, 0.105010986328125, 0.11150741577148438, 0.11800384521484375, 0.12450027465820312, 0.1309967041015625, 0.13749313354492188, 0.14398956298828125, 0.15048599243164062, 0.156982421875]}, "gradients/encoder.encoder.layers.6.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 4.0, 0.0, 2.0, 4.0, 1.0, 5.0, 4.0, 3.0, 8.0, 12.0, 19.0, 15.0, 23.0, 33.0, 37.0, 26.0, 62.0, 79.0, 119.0, 170.0, 285.0, 510.0, 1033.0, 2365.0, 5966.0, 16782.0, 54730.0, 262648.0, 584971.0, 80980.0, 23422.0, 8039.0, 3174.0, 1345.0, 644.0, 346.0, 196.0, 142.0, 84.0, 68.0, 44.0, 44.0, 27.0, 25.0, 15.0, 12.0, 13.0, 10.0, 9.0, 7.0, 3.0, 3.0, 1.0, 1.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.1781005859375, -0.17238235473632812, -0.16666412353515625, -0.16094589233398438, -0.1552276611328125, -0.14950942993164062, -0.14379119873046875, -0.13807296752929688, -0.132354736328125, -0.12663650512695312, -0.12091827392578125, -0.11520004272460938, -0.1094818115234375, -0.10376358032226562, -0.09804534912109375, -0.09232711791992188, -0.08660888671875, -0.08089065551757812, -0.07517242431640625, -0.06945419311523438, -0.0637359619140625, -0.058017730712890625, -0.05229949951171875, -0.046581268310546875, -0.040863037109375, -0.035144805908203125, -0.02942657470703125, -0.023708343505859375, -0.0179901123046875, -0.012271881103515625, -0.00655364990234375, -0.000835418701171875, 0.0048828125, 0.010601043701171875, 0.01631927490234375, 0.022037506103515625, 0.0277557373046875, 0.033473968505859375, 0.03919219970703125, 0.044910430908203125, 0.050628662109375, 0.056346893310546875, 0.06206512451171875, 0.06778335571289062, 0.0735015869140625, 0.07921981811523438, 0.08493804931640625, 0.09065628051757812, 0.09637451171875, 0.10209274291992188, 0.10781097412109375, 0.11352920532226562, 0.1192474365234375, 0.12496566772460938, 0.13068389892578125, 0.13640213012695312, 0.142120361328125, 0.14783859252929688, 0.15355682373046875, 0.15927505493164062, 0.1649932861328125, 0.17071151733398438, 0.17642974853515625, 0.18214797973632812, 0.1878662109375]}, "gradients/encoder.encoder.layers.6.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 2.0, 4.0, 8.0, 5.0, 8.0, 6.0, 7.0, 10.0, 10.0, 14.0, 16.0, 23.0, 20.0, 25.0, 26.0, 31.0, 38.0, 48.0, 34.0, 39.0, 51.0, 46.0, 41.0, 42.0, 59.0, 35.0, 35.0, 45.0, 28.0, 42.0, 28.0, 29.0, 25.0, 20.0, 18.0, 8.0, 14.0, 17.0, 14.0, 9.0, 8.0, 8.0, 4.0, 3.0, 3.0, 4.0, 1.0, 3.0, 1.0, 0.0, 3.0, 0.0, 0.0, 1.0], "bins": [-0.236328125, -0.22914695739746094, -0.22196578979492188, -0.2147846221923828, -0.20760345458984375, -0.2004222869873047, -0.19324111938476562, -0.18605995178222656, -0.1788787841796875, -0.17169761657714844, -0.16451644897460938, -0.1573352813720703, -0.15015411376953125, -0.1429729461669922, -0.13579177856445312, -0.12861061096191406, -0.121429443359375, -0.11424827575683594, -0.10706710815429688, -0.09988594055175781, -0.09270477294921875, -0.08552360534667969, -0.07834243774414062, -0.07116127014160156, -0.0639801025390625, -0.05679893493652344, -0.049617767333984375, -0.04243659973144531, -0.03525543212890625, -0.028074264526367188, -0.020893096923828125, -0.013711929321289062, -0.00653076171875, 0.0006504058837890625, 0.007831573486328125, 0.015012741088867188, 0.02219390869140625, 0.029375076293945312, 0.036556243896484375, 0.04373741149902344, 0.0509185791015625, 0.05809974670410156, 0.06528091430664062, 0.07246208190917969, 0.07964324951171875, 0.08682441711425781, 0.09400558471679688, 0.10118675231933594, 0.108367919921875, 0.11554908752441406, 0.12273025512695312, 0.1299114227294922, 0.13709259033203125, 0.1442737579345703, 0.15145492553710938, 0.15863609313964844, 0.1658172607421875, 0.17299842834472656, 0.18017959594726562, 0.1873607635498047, 0.19454193115234375, 0.2017230987548828, 0.20890426635742188, 0.21608543395996094, 0.2232666015625]}, "gradients/encoder.encoder.layers.6.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0, 0.0, 3.0, 3.0, 1.0, 1.0, 2.0, 1.0, 5.0, 4.0, 3.0, 8.0, 9.0, 15.0, 24.0, 63.0, 110.0, 269.0, 864.0, 3625.0, 40033.0, 972735.0, 26645.0, 2956.0, 697.0, 251.0, 101.0, 57.0, 31.0, 19.0, 6.0, 11.0, 2.0, 5.0, 1.0, 1.0, 0.0, 3.0, 0.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0150909423828125, -0.01457834243774414, -0.014065742492675781, -0.013553142547607422, -0.013040542602539062, -0.012527942657470703, -0.012015342712402344, -0.011502742767333984, -0.010990142822265625, -0.010477542877197266, -0.009964942932128906, -0.009452342987060547, -0.008939743041992188, -0.008427143096923828, -0.007914543151855469, -0.007401943206787109, -0.00688934326171875, -0.006376743316650391, -0.005864143371582031, -0.005351543426513672, -0.0048389434814453125, -0.004326343536376953, -0.0038137435913085938, -0.0033011436462402344, -0.002788543701171875, -0.0022759437561035156, -0.0017633438110351562, -0.0012507438659667969, -0.0007381439208984375, -0.00022554397583007812, 0.00028705596923828125, 0.0007996559143066406, 0.001312255859375, 0.0018248558044433594, 0.0023374557495117188, 0.002850055694580078, 0.0033626556396484375, 0.003875255584716797, 0.004387855529785156, 0.004900455474853516, 0.005413055419921875, 0.005925655364990234, 0.006438255310058594, 0.006950855255126953, 0.0074634552001953125, 0.007976055145263672, 0.008488655090332031, 0.00900125503540039, 0.00951385498046875, 0.01002645492553711, 0.010539054870605469, 0.011051654815673828, 0.011564254760742188, 0.012076854705810547, 0.012589454650878906, 0.013102054595947266, 0.013614654541015625, 0.014127254486083984, 0.014639854431152344, 0.015152454376220703, 0.015665054321289062, 0.016177654266357422, 0.01669025421142578, 0.01720285415649414, 0.0177154541015625]}, "gradients/encoder.encoder.layers.6.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 4.0, 1.0, 4.0, 4.0, 1.0, 4.0, 3.0, 10.0, 6.0, 8.0, 12.0, 8.0, 16.0, 29.0, 25.0, 20.0, 23.0, 28.0, 37.0, 31.0, 74.0, 47.0, 42.0, 47.0, 43.0, 25.0, 35.0, 49.0, 26.0, 42.0, 29.0, 32.0, 26.0, 25.0, 48.0, 14.0, 21.0, 16.0, 13.0, 15.0, 15.0, 19.0, 6.0, 8.0, 8.0, 3.0, 5.0, 1.0, 2.0, 1.0, 3.0, 1.0, 0.0, 1.0, 1.0, 3.0], "bins": [-2.2649765014648438e-06, -2.196989953517914e-06, -2.129003405570984e-06, -2.061016857624054e-06, -1.993030309677124e-06, -1.925043761730194e-06, -1.8570572137832642e-06, -1.7890706658363342e-06, -1.7210841178894043e-06, -1.6530975699424744e-06, -1.5851110219955444e-06, -1.5171244740486145e-06, -1.4491379261016846e-06, -1.3811513781547546e-06, -1.3131648302078247e-06, -1.2451782822608948e-06, -1.1771917343139648e-06, -1.109205186367035e-06, -1.041218638420105e-06, -9.73232090473175e-07, -9.052455425262451e-07, -8.372589945793152e-07, -7.692724466323853e-07, -7.012858986854553e-07, -6.332993507385254e-07, -5.653128027915955e-07, -4.973262548446655e-07, -4.293397068977356e-07, -3.6135315895080566e-07, -2.9336661100387573e-07, -2.253800630569458e-07, -1.5739351511001587e-07, -8.940696716308594e-08, -2.1420419216156006e-08, 4.6566128730773926e-08, 1.1455267667770386e-07, 1.825392246246338e-07, 2.505257725715637e-07, 3.1851232051849365e-07, 3.864988684654236e-07, 4.544854164123535e-07, 5.224719643592834e-07, 5.904585123062134e-07, 6.584450602531433e-07, 7.264316082000732e-07, 7.944181561470032e-07, 8.624047040939331e-07, 9.30391252040863e-07, 9.98377799987793e-07, 1.066364347934723e-06, 1.1343508958816528e-06, 1.2023374438285828e-06, 1.2703239917755127e-06, 1.3383105397224426e-06, 1.4062970876693726e-06, 1.4742836356163025e-06, 1.5422701835632324e-06, 1.6102567315101624e-06, 1.6782432794570923e-06, 1.7462298274040222e-06, 1.8142163753509521e-06, 1.882202923297882e-06, 1.950189471244812e-06, 2.018176019191742e-06, 2.086162567138672e-06]}, "gradients/encoder.encoder.layers.6.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 5.0, 5.0, 10.0, 6.0, 12.0, 16.0, 25.0, 30.0, 68.0, 104.0, 198.0, 383.0, 831.0, 2210.0, 8367.0, 48312.0, 834403.0, 132980.0, 14870.0, 3532.0, 1133.0, 467.0, 236.0, 146.0, 72.0, 47.0, 28.0, 14.0, 20.0, 12.0, 8.0, 3.0, 3.0, 4.0, 2.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.01012420654296875, -0.009794354438781738, -0.009464502334594727, -0.009134650230407715, -0.008804798126220703, -0.008474946022033691, -0.00814509391784668, -0.007815241813659668, -0.007485389709472656, -0.0071555376052856445, -0.006825685501098633, -0.006495833396911621, -0.006165981292724609, -0.005836129188537598, -0.005506277084350586, -0.005176424980163574, -0.0048465728759765625, -0.004516720771789551, -0.004186868667602539, -0.0038570165634155273, -0.0035271644592285156, -0.003197312355041504, -0.002867460250854492, -0.0025376081466674805, -0.0022077560424804688, -0.001877903938293457, -0.0015480518341064453, -0.0012181997299194336, -0.0008883476257324219, -0.0005584955215454102, -0.00022864341735839844, 0.00010120868682861328, 0.000431060791015625, 0.0007609128952026367, 0.0010907649993896484, 0.0014206171035766602, 0.0017504692077636719, 0.0020803213119506836, 0.0024101734161376953, 0.002740025520324707, 0.0030698776245117188, 0.0033997297286987305, 0.003729581832885742, 0.004059433937072754, 0.004389286041259766, 0.004719138145446777, 0.005048990249633789, 0.005378842353820801, 0.0057086944580078125, 0.006038546562194824, 0.006368398666381836, 0.006698250770568848, 0.007028102874755859, 0.007357954978942871, 0.007687807083129883, 0.008017659187316895, 0.008347511291503906, 0.008677363395690918, 0.00900721549987793, 0.009337067604064941, 0.009666919708251953, 0.009996771812438965, 0.010326623916625977, 0.010656476020812988, 0.010986328125]}, "gradients/encoder.encoder.layers.6.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 2.0, 4.0, 1.0, 6.0, 8.0, 8.0, 7.0, 4.0, 10.0, 26.0, 30.0, 30.0, 47.0, 81.0, 82.0, 98.0, 140.0, 109.0, 97.0, 63.0, 42.0, 31.0, 20.0, 16.0, 19.0, 12.0, 7.0, 2.0, 3.0, 3.0, 3.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.0139007568359375, -0.013484358787536621, -0.013067960739135742, -0.012651562690734863, -0.012235164642333984, -0.011818766593933105, -0.011402368545532227, -0.010985970497131348, -0.010569572448730469, -0.01015317440032959, -0.009736776351928711, -0.009320378303527832, -0.008903980255126953, -0.008487582206726074, -0.008071184158325195, -0.007654786109924316, -0.0072383880615234375, -0.006821990013122559, -0.00640559196472168, -0.005989193916320801, -0.005572795867919922, -0.005156397819519043, -0.004739999771118164, -0.004323601722717285, -0.003907203674316406, -0.0034908056259155273, -0.0030744075775146484, -0.0026580095291137695, -0.0022416114807128906, -0.0018252134323120117, -0.0014088153839111328, -0.000992417335510254, -0.000576019287109375, -0.0001596212387084961, 0.0002567768096923828, 0.0006731748580932617, 0.0010895729064941406, 0.0015059709548950195, 0.0019223690032958984, 0.0023387670516967773, 0.0027551651000976562, 0.003171563148498535, 0.003587961196899414, 0.004004359245300293, 0.004420757293701172, 0.004837155342102051, 0.00525355339050293, 0.005669951438903809, 0.0060863494873046875, 0.006502747535705566, 0.006919145584106445, 0.007335543632507324, 0.007751941680908203, 0.008168339729309082, 0.008584737777709961, 0.00900113582611084, 0.009417533874511719, 0.009833931922912598, 0.010250329971313477, 0.010666728019714355, 0.011083126068115234, 0.011499524116516113, 0.011915922164916992, 0.012332320213317871, 0.01274871826171875]}, "gradients/encoder.encoder.layers.6.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 5.0, 10.0, 17.0, 19.0, 42.0, 86.0, 180.0, 347.0, 118.0, 75.0, 36.0, 24.0, 7.0, 15.0, 6.0, 6.0, 1.0, 1.0, 1.0, 3.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0], "bins": [-1.7856791019439697, -1.7413582801818848, -1.6970373392105103, -1.6527165174484253, -1.6083956956863403, -1.5640747547149658, -1.5197539329528809, -1.475433111190796, -1.431112289428711, -1.386791467666626, -1.3424705266952515, -1.2981497049331665, -1.2538288831710815, -1.209507942199707, -1.165187120437622, -1.120866298675537, -1.0765453577041626, -1.0322245359420776, -0.9879036545753479, -0.9435827732086182, -0.8992619514465332, -0.8549410700798035, -0.8106201887130737, -0.7662993669509888, -0.721978485584259, -0.6776576042175293, -0.6333367824554443, -0.5890159010887146, -0.5446950197219849, -0.5003741979598999, -0.45605331659317017, -0.4117324650287628, -0.367411732673645, -0.32309088110923767, -0.2787700295448303, -0.23444914817810059, -0.19012829661369324, -0.1458074450492859, -0.10148656368255615, -0.057165712118148804, -0.012844860553741455, 0.03147599846124649, 0.07579685747623444, 0.12011772394180298, 0.16443857550621033, 0.20875942707061768, 0.2530803084373474, 0.29740116000175476, 0.3417220115661621, 0.38604286313056946, 0.4303637146949768, 0.47468459606170654, 0.5190054178237915, 0.5633262991905212, 0.607647180557251, 0.6519680023193359, 0.6962888836860657, 0.7406097650527954, 0.7849305868148804, 0.8292514681816101, 0.8735723495483398, 0.9178931713104248, 0.9622140526771545, 1.0065349340438843, 1.0508557558059692]}, "gradients/encoder.encoder.layers.6.layer_norm.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 0.0, 4.0, 1.0, 2.0, 2.0, 1.0, 3.0, 4.0, 7.0, 9.0, 9.0, 5.0, 12.0, 12.0, 19.0, 9.0, 13.0, 26.0, 15.0, 24.0, 27.0, 31.0, 25.0, 35.0, 29.0, 40.0, 36.0, 63.0, 39.0, 48.0, 47.0, 54.0, 31.0, 30.0, 38.0, 32.0, 19.0, 21.0, 26.0, 29.0, 18.0, 17.0, 20.0, 15.0, 11.0, 9.0, 4.0, 5.0, 7.0, 7.0, 6.0, 5.0, 5.0, 5.0, 0.0, 1.0, 0.0, 2.0, 2.0, 5.0], "bins": [-0.883518636226654, -0.8570013046264648, -0.8304839730262756, -0.8039666414260864, -0.777449369430542, -0.750931978225708, -0.7244147062301636, -0.6978973746299744, -0.6713800430297852, -0.644862711429596, -0.6183453798294067, -0.5918280482292175, -0.5653107166290283, -0.5387934446334839, -0.5122761130332947, -0.48575878143310547, -0.45924144983291626, -0.43272411823272705, -0.40620678663253784, -0.379689484834671, -0.3531721532344818, -0.3266548216342926, -0.3001375198364258, -0.2736201882362366, -0.24710285663604736, -0.22058552503585815, -0.19406820833683014, -0.16755089163780212, -0.14103356003761292, -0.1145162284374237, -0.08799891173839569, -0.061481595039367676, -0.03496420383453369, -0.00844687968492508, 0.018070444464683533, 0.044587768614292145, 0.07110509276390076, 0.09762242436408997, 0.12413974106311798, 0.150657057762146, 0.1771743893623352, 0.20369172096252441, 0.23020903766155243, 0.25672635436058044, 0.28324368596076965, 0.30976101756095886, 0.3362783193588257, 0.3627956509590149, 0.3893129825592041, 0.4158303141593933, 0.4423476457595825, 0.46886494755744934, 0.49538227915763855, 0.5218995809555054, 0.5484169125556946, 0.5749342441558838, 0.601451575756073, 0.6279689073562622, 0.6544862389564514, 0.6810035705566406, 0.7075208425521851, 0.734038233757019, 0.7605555057525635, 0.7870728373527527, 0.8135901689529419]}, "gradients/encoder.encoder.layers.5.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 0.0, 0.0, 0.0, 2.0, 2.0, 1.0, 2.0, 2.0, 2.0, 2.0, 1.0, 6.0, 5.0, 4.0, 3.0, 13.0, 26.0, 33.0, 40.0, 58.0, 101.0, 172.0, 389.0, 1520.0, 5260.0, 54207.0, 4033392.0, 90252.0, 5576.0, 1494.0, 818.0, 566.0, 185.0, 48.0, 36.0, 21.0, 17.0, 8.0, 6.0, 5.0, 5.0, 2.0, 3.0, 5.0, 2.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0], "bins": [-0.256591796875, -0.2501049041748047, -0.24361801147460938, -0.23713111877441406, -0.23064422607421875, -0.22415733337402344, -0.21767044067382812, -0.2111835479736328, -0.2046966552734375, -0.1982097625732422, -0.19172286987304688, -0.18523597717285156, -0.17874908447265625, -0.17226219177246094, -0.16577529907226562, -0.1592884063720703, -0.152801513671875, -0.1463146209716797, -0.13982772827148438, -0.13334083557128906, -0.12685394287109375, -0.12036705017089844, -0.11388015747070312, -0.10739326477050781, -0.1009063720703125, -0.09441947937011719, -0.08793258666992188, -0.08144569396972656, -0.07495880126953125, -0.06847190856933594, -0.061985015869140625, -0.05549812316894531, -0.04901123046875, -0.04252433776855469, -0.036037445068359375, -0.029550552368164062, -0.02306365966796875, -0.016576766967773438, -0.010089874267578125, -0.0036029815673828125, 0.0028839111328125, 0.009370803833007812, 0.015857696533203125, 0.022344589233398438, 0.02883148193359375, 0.03531837463378906, 0.041805267333984375, 0.04829216003417969, 0.054779052734375, 0.06126594543457031, 0.06775283813476562, 0.07423973083496094, 0.08072662353515625, 0.08721351623535156, 0.09370040893554688, 0.10018730163574219, 0.1066741943359375, 0.11316108703613281, 0.11964797973632812, 0.12613487243652344, 0.13262176513671875, 0.13910865783691406, 0.14559555053710938, 0.1520824432373047, 0.1585693359375]}, "gradients/encoder.encoder.layers.5.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 4.0, 4.0, 4.0, 5.0, 7.0, 10.0, 31.0, 56.0, 112.0, 164.0, 178.0, 166.0, 126.0, 61.0, 46.0, 20.0, 7.0, 5.0, 2.0, 4.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.25439453125, -0.2479724884033203, -0.24155044555664062, -0.23512840270996094, -0.22870635986328125, -0.22228431701660156, -0.21586227416992188, -0.2094402313232422, -0.2030181884765625, -0.1965961456298828, -0.19017410278320312, -0.18375205993652344, -0.17733001708984375, -0.17090797424316406, -0.16448593139648438, -0.1580638885498047, -0.151641845703125, -0.1452198028564453, -0.13879776000976562, -0.13237571716308594, -0.12595367431640625, -0.11953163146972656, -0.11310958862304688, -0.10668754577636719, -0.1002655029296875, -0.09384346008300781, -0.08742141723632812, -0.08099937438964844, -0.07457733154296875, -0.06815528869628906, -0.061733245849609375, -0.05531120300292969, -0.04888916015625, -0.04246711730957031, -0.036045074462890625, -0.029623031616210938, -0.02320098876953125, -0.016778945922851562, -0.010356903076171875, -0.0039348602294921875, 0.0024871826171875, 0.008909225463867188, 0.015331268310546875, 0.021753311157226562, 0.02817535400390625, 0.03459739685058594, 0.041019439697265625, 0.04744148254394531, 0.053863525390625, 0.06028556823730469, 0.06670761108398438, 0.07312965393066406, 0.07955169677734375, 0.08597373962402344, 0.09239578247070312, 0.09881782531738281, 0.1052398681640625, 0.11166191101074219, 0.11808395385742188, 0.12450599670410156, 0.13092803955078125, 0.13735008239746094, 0.14377212524414062, 0.1501941680908203, 0.1566162109375]}, "gradients/encoder.encoder.layers.5.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 4.0, 2.0, 2.0, 7.0, 7.0, 11.0, 11.0, 18.0, 21.0, 51.0, 73.0, 116.0, 204.0, 438.0, 975.0, 2323.0, 7203.0, 45107.0, 4053892.0, 69833.0, 9176.0, 2622.0, 1022.0, 505.0, 270.0, 139.0, 94.0, 51.0, 32.0, 25.0, 18.0, 11.0, 7.0, 9.0, 6.0, 3.0, 3.0, 3.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.1793212890625, -0.17281723022460938, -0.16631317138671875, -0.15980911254882812, -0.1533050537109375, -0.14680099487304688, -0.14029693603515625, -0.13379287719726562, -0.127288818359375, -0.12078475952148438, -0.11428070068359375, -0.10777664184570312, -0.1012725830078125, -0.09476852416992188, -0.08826446533203125, -0.08176040649414062, -0.07525634765625, -0.06875228881835938, -0.06224822998046875, -0.055744171142578125, -0.0492401123046875, -0.042736053466796875, -0.03623199462890625, -0.029727935791015625, -0.023223876953125, -0.016719818115234375, -0.01021575927734375, -0.003711700439453125, 0.0027923583984375, 0.009296417236328125, 0.01580047607421875, 0.022304534912109375, 0.02880859375, 0.035312652587890625, 0.04181671142578125, 0.048320770263671875, 0.0548248291015625, 0.061328887939453125, 0.06783294677734375, 0.07433700561523438, 0.080841064453125, 0.08734512329101562, 0.09384918212890625, 0.10035324096679688, 0.1068572998046875, 0.11336135864257812, 0.11986541748046875, 0.12636947631835938, 0.13287353515625, 0.13937759399414062, 0.14588165283203125, 0.15238571166992188, 0.1588897705078125, 0.16539382934570312, 0.17189788818359375, 0.17840194702148438, 0.184906005859375, 0.19141006469726562, 0.19791412353515625, 0.20441818237304688, 0.2109222412109375, 0.21742630004882812, 0.22393035888671875, 0.23043441772460938, 0.2369384765625]}, "gradients/encoder.encoder.layers.5.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 1.0, 1.0, 0.0, 3.0, 3.0, 2.0, 0.0, 3.0, 6.0, 6.0, 9.0, 9.0, 12.0, 25.0, 31.0, 74.0, 277.0, 1315.0, 1786.0, 306.0, 80.0, 48.0, 23.0, 11.0, 10.0, 12.0, 10.0, 6.0, 7.0, 2.0, 1.0, 1.0, 2.0, 1.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.1744384765625, -0.16844558715820312, -0.16245269775390625, -0.15645980834960938, -0.1504669189453125, -0.14447402954101562, -0.13848114013671875, -0.13248825073242188, -0.126495361328125, -0.12050247192382812, -0.11450958251953125, -0.10851669311523438, -0.1025238037109375, -0.09653091430664062, -0.09053802490234375, -0.08454513549804688, -0.07855224609375, -0.07255935668945312, -0.06656646728515625, -0.060573577880859375, -0.0545806884765625, -0.048587799072265625, -0.04259490966796875, -0.036602020263671875, -0.030609130859375, -0.024616241455078125, -0.01862335205078125, -0.012630462646484375, -0.0066375732421875, -0.000644683837890625, 0.00534820556640625, 0.011341094970703125, 0.017333984375, 0.023326873779296875, 0.02931976318359375, 0.035312652587890625, 0.0413055419921875, 0.047298431396484375, 0.05329132080078125, 0.059284210205078125, 0.065277099609375, 0.07126998901367188, 0.07726287841796875, 0.08325576782226562, 0.0892486572265625, 0.09524154663085938, 0.10123443603515625, 0.10722732543945312, 0.11322021484375, 0.11921310424804688, 0.12520599365234375, 0.13119888305664062, 0.1371917724609375, 0.14318466186523438, 0.14917755126953125, 0.15517044067382812, 0.161163330078125, 0.16715621948242188, 0.17314910888671875, 0.17914199829101562, 0.1851348876953125, 0.19112777709960938, 0.19712066650390625, 0.20311355590820312, 0.2091064453125]}, "gradients/encoder.encoder.layers.5.final_layer_norm.weight": {"_type": "histogram", "values": [3.0, 1.0, 1.0, 1.0, 0.0, 3.0, 4.0, 9.0, 23.0, 75.0, 222.0, 458.0, 113.0, 41.0, 24.0, 10.0, 9.0, 6.0, 4.0, 4.0, 0.0, 3.0, 2.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.4648338556289673, -0.4235740602016449, -0.3823142647743225, -0.3410544991493225, -0.2997947037220001, -0.25853490829467773, -0.21727512776851654, -0.17601534724235535, -0.13475555181503296, -0.09349576383829117, -0.05223597586154938, -0.010976187884807587, 0.030283600091934204, 0.07154339551925659, 0.11280317604541779, 0.15406295657157898, 0.19532275199890137, 0.23658254742622375, 0.27784234285354614, 0.31910210847854614, 0.36036190390586853, 0.4016216993331909, 0.4428814649581909, 0.4841412603855133, 0.5254010558128357, 0.5666608214378357, 0.6079206466674805, 0.6491804122924805, 0.6904401779174805, 0.7317000031471252, 0.7729597687721252, 0.81421959400177, 0.8554794788360596, 0.8967392444610596, 0.9379990696907043, 0.9792588353157043, 1.0205186605453491, 1.0617784261703491, 1.1030381917953491, 1.1442979574203491, 1.1855578422546387, 1.2268176078796387, 1.2680773735046387, 1.3093372583389282, 1.3505970239639282, 1.3918567895889282, 1.4331165552139282, 1.4743763208389282, 1.5156360864639282, 1.5568958520889282, 1.5981556177139282, 1.6394155025482178, 1.6806752681732178, 1.7219350337982178, 1.7631947994232178, 1.8044545650482178, 1.8457143306732178, 1.8869740962982178, 1.9282338619232178, 1.9694937467575073, 2.0107533931732178, 2.052013397216797, 2.093273162841797, 2.134532928466797, 2.175792694091797]}, "gradients/encoder.encoder.layers.5.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 2.0, 2.0, 2.0, 7.0, 5.0, 5.0, 17.0, 14.0, 19.0, 23.0, 20.0, 31.0, 33.0, 43.0, 39.0, 46.0, 53.0, 70.0, 71.0, 64.0, 64.0, 56.0, 54.0, 36.0, 33.0, 49.0, 44.0, 16.0, 18.0, 20.0, 12.0, 9.0, 12.0, 8.0, 6.0, 2.0, 4.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 1.0, 2.0], "bins": [-0.6981019377708435, -0.6787804365158081, -0.6594588756561279, -0.6401373744010925, -0.6208158731460571, -0.601494312286377, -0.5821728110313416, -0.5628513097763062, -0.543529748916626, -0.5242082476615906, -0.5048866868019104, -0.485565185546875, -0.4662436544895172, -0.4469221234321594, -0.427600622177124, -0.40827909111976624, -0.38895756006240845, -0.36963602900505066, -0.35031449794769287, -0.33099299669265747, -0.3116714656352997, -0.2923499345779419, -0.2730284333229065, -0.2537069022655487, -0.23438537120819092, -0.21506384015083313, -0.19574232399463654, -0.17642080783843994, -0.15709927678108215, -0.13777774572372437, -0.11845622956752777, -0.09913471341133118, -0.07981312274932861, -0.06049159914255142, -0.04117007553577423, -0.02184855192899704, -0.0025270283222198486, 0.016794495284557343, 0.036116018891334534, 0.05543753504753113, 0.07475906610488892, 0.09408058971166611, 0.1134021133184433, 0.1327236294746399, 0.15204516053199768, 0.17136669158935547, 0.19068820774555206, 0.21000972390174866, 0.22933125495910645, 0.24865278601646423, 0.267974317073822, 0.2872958183288574, 0.3066173493862152, 0.325938880443573, 0.3452603816986084, 0.3645819127559662, 0.383903443813324, 0.40322497487068176, 0.42254650592803955, 0.44186800718307495, 0.46118953824043274, 0.4805110692977905, 0.4998325705528259, 0.5191540718078613, 0.5384756326675415]}, "gradients/encoder.encoder.layers.5.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 1.0, 1.0, 0.0, 1.0, 2.0, 1.0, 5.0, 5.0, 5.0, 11.0, 12.0, 15.0, 15.0, 25.0, 24.0, 45.0, 64.0, 66.0, 139.0, 229.0, 431.0, 947.0, 2504.0, 8634.0, 56918.0, 807048.0, 149069.0, 15911.0, 3761.0, 1341.0, 545.0, 292.0, 164.0, 97.0, 54.0, 51.0, 33.0, 25.0, 14.0, 14.0, 11.0, 12.0, 8.0, 2.0, 3.0, 4.0, 3.0, 3.0, 2.0, 1.0, 1.0, 0.0, 1.0, 1.0], "bins": [-0.36181640625, -0.35167694091796875, -0.3415374755859375, -0.33139801025390625, -0.321258544921875, -0.31111907958984375, -0.3009796142578125, -0.29084014892578125, -0.28070068359375, -0.27056121826171875, -0.2604217529296875, -0.25028228759765625, -0.240142822265625, -0.23000335693359375, -0.2198638916015625, -0.20972442626953125, -0.1995849609375, -0.18944549560546875, -0.1793060302734375, -0.16916656494140625, -0.159027099609375, -0.14888763427734375, -0.1387481689453125, -0.12860870361328125, -0.11846923828125, -0.10832977294921875, -0.0981903076171875, -0.08805084228515625, -0.077911376953125, -0.06777191162109375, -0.0576324462890625, -0.04749298095703125, -0.037353515625, -0.02721405029296875, -0.0170745849609375, -0.00693511962890625, 0.003204345703125, 0.01334381103515625, 0.0234832763671875, 0.03362274169921875, 0.04376220703125, 0.05390167236328125, 0.0640411376953125, 0.07418060302734375, 0.084320068359375, 0.09445953369140625, 0.1045989990234375, 0.11473846435546875, 0.1248779296875, 0.13501739501953125, 0.1451568603515625, 0.15529632568359375, 0.165435791015625, 0.17557525634765625, 0.1857147216796875, 0.19585418701171875, 0.20599365234375, 0.21613311767578125, 0.2262725830078125, 0.23641204833984375, 0.246551513671875, 0.25669097900390625, 0.2668304443359375, 0.27696990966796875, 0.287109375]}, "gradients/encoder.encoder.layers.5.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 5.0, 4.0, 4.0, 6.0, 6.0, 12.0, 25.0, 68.0, 133.0, 174.0, 168.0, 168.0, 105.0, 67.0, 36.0, 15.0, 6.0, 5.0, 3.0, 2.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.25048828125, -0.24413490295410156, -0.23778152465820312, -0.2314281463623047, -0.22507476806640625, -0.2187213897705078, -0.21236801147460938, -0.20601463317871094, -0.1996612548828125, -0.19330787658691406, -0.18695449829101562, -0.1806011199951172, -0.17424774169921875, -0.1678943634033203, -0.16154098510742188, -0.15518760681152344, -0.148834228515625, -0.14248085021972656, -0.13612747192382812, -0.1297740936279297, -0.12342071533203125, -0.11706733703613281, -0.11071395874023438, -0.10436058044433594, -0.0980072021484375, -0.09165382385253906, -0.08530044555664062, -0.07894706726074219, -0.07259368896484375, -0.06624031066894531, -0.059886932373046875, -0.05353355407714844, -0.04718017578125, -0.04082679748535156, -0.034473419189453125, -0.028120040893554688, -0.02176666259765625, -0.015413284301757812, -0.009059906005859375, -0.0027065277099609375, 0.0036468505859375, 0.010000228881835938, 0.016353607177734375, 0.022706985473632812, 0.02906036376953125, 0.03541374206542969, 0.041767120361328125, 0.04812049865722656, 0.054473876953125, 0.06082725524902344, 0.06718063354492188, 0.07353401184082031, 0.07988739013671875, 0.08624076843261719, 0.09259414672851562, 0.09894752502441406, 0.1053009033203125, 0.11165428161621094, 0.11800765991210938, 0.12436103820800781, 0.13071441650390625, 0.1370677947998047, 0.14342117309570312, 0.14977455139160156, 0.1561279296875]}, "gradients/encoder.encoder.layers.5.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 2.0, 0.0, 4.0, 0.0, 2.0, 4.0, 8.0, 13.0, 7.0, 14.0, 18.0, 16.0, 27.0, 34.0, 42.0, 70.0, 104.0, 193.0, 360.0, 931.0, 3727.0, 23378.0, 659603.0, 335940.0, 19170.0, 3180.0, 858.0, 361.0, 173.0, 94.0, 60.0, 46.0, 24.0, 28.0, 20.0, 14.0, 9.0, 5.0, 8.0, 6.0, 0.0, 3.0, 5.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0, 1.0], "bins": [-0.46875, -0.45407867431640625, -0.4394073486328125, -0.42473602294921875, -0.410064697265625, -0.39539337158203125, -0.3807220458984375, -0.36605072021484375, -0.35137939453125, -0.33670806884765625, -0.3220367431640625, -0.30736541748046875, -0.292694091796875, -0.27802276611328125, -0.2633514404296875, -0.24868011474609375, -0.2340087890625, -0.21933746337890625, -0.2046661376953125, -0.18999481201171875, -0.175323486328125, -0.16065216064453125, -0.1459808349609375, -0.13130950927734375, -0.11663818359375, -0.10196685791015625, -0.0872955322265625, -0.07262420654296875, -0.057952880859375, -0.04328155517578125, -0.0286102294921875, -0.01393890380859375, 0.000732421875, 0.01540374755859375, 0.0300750732421875, 0.04474639892578125, 0.059417724609375, 0.07408905029296875, 0.0887603759765625, 0.10343170166015625, 0.11810302734375, 0.13277435302734375, 0.1474456787109375, 0.16211700439453125, 0.176788330078125, 0.19145965576171875, 0.2061309814453125, 0.22080230712890625, 0.2354736328125, 0.25014495849609375, 0.2648162841796875, 0.27948760986328125, 0.294158935546875, 0.30883026123046875, 0.3235015869140625, 0.33817291259765625, 0.35284423828125, 0.36751556396484375, 0.3821868896484375, 0.39685821533203125, 0.411529541015625, 0.42620086669921875, 0.4408721923828125, 0.45554351806640625, 0.47021484375]}, "gradients/encoder.encoder.layers.5.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 1.0, 3.0, 4.0, 2.0, 4.0, 5.0, 8.0, 7.0, 8.0, 17.0, 18.0, 20.0, 22.0, 20.0, 30.0, 45.0, 62.0, 50.0, 43.0, 57.0, 52.0, 58.0, 46.0, 61.0, 53.0, 41.0, 55.0, 38.0, 35.0, 27.0, 22.0, 21.0, 11.0, 14.0, 15.0, 7.0, 9.0, 7.0, 6.0, 1.0, 0.0, 4.0, 0.0, 2.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.345458984375, -0.3347320556640625, -0.324005126953125, -0.3132781982421875, -0.30255126953125, -0.2918243408203125, -0.281097412109375, -0.2703704833984375, -0.2596435546875, -0.2489166259765625, -0.238189697265625, -0.2274627685546875, -0.21673583984375, -0.2060089111328125, -0.195281982421875, -0.1845550537109375, -0.173828125, -0.1631011962890625, -0.152374267578125, -0.1416473388671875, -0.13092041015625, -0.1201934814453125, -0.109466552734375, -0.0987396240234375, -0.0880126953125, -0.0772857666015625, -0.066558837890625, -0.0558319091796875, -0.04510498046875, -0.0343780517578125, -0.023651123046875, -0.0129241943359375, -0.002197265625, 0.0085296630859375, 0.019256591796875, 0.0299835205078125, 0.04071044921875, 0.0514373779296875, 0.062164306640625, 0.0728912353515625, 0.0836181640625, 0.0943450927734375, 0.105072021484375, 0.1157989501953125, 0.12652587890625, 0.1372528076171875, 0.147979736328125, 0.1587066650390625, 0.16943359375, 0.1801605224609375, 0.190887451171875, 0.2016143798828125, 0.21234130859375, 0.2230682373046875, 0.233795166015625, 0.2445220947265625, 0.2552490234375, 0.2659759521484375, 0.276702880859375, 0.2874298095703125, 0.29815673828125, 0.3088836669921875, 0.319610595703125, 0.3303375244140625, 0.341064453125]}, "gradients/encoder.encoder.layers.5.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 3.0, 1.0, 1.0, 5.0, 3.0, 2.0, 8.0, 6.0, 13.0, 17.0, 24.0, 37.0, 27.0, 65.0, 69.0, 99.0, 122.0, 176.0, 287.0, 404.0, 644.0, 1281.0, 2836.0, 7063.0, 24639.0, 192135.0, 747102.0, 51707.0, 11585.0, 4001.0, 1771.0, 860.0, 515.0, 310.0, 182.0, 185.0, 88.0, 72.0, 52.0, 47.0, 34.0, 19.0, 22.0, 9.0, 9.0, 9.0, 7.0, 2.0, 5.0, 2.0, 4.0, 1.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00823211669921875, -0.007968425750732422, -0.007704734802246094, -0.007441043853759766, -0.0071773529052734375, -0.006913661956787109, -0.006649971008300781, -0.006386280059814453, -0.006122589111328125, -0.005858898162841797, -0.005595207214355469, -0.005331516265869141, -0.0050678253173828125, -0.004804134368896484, -0.004540443420410156, -0.004276752471923828, -0.0040130615234375, -0.003749370574951172, -0.0034856796264648438, -0.0032219886779785156, -0.0029582977294921875, -0.0026946067810058594, -0.0024309158325195312, -0.002167224884033203, -0.001903533935546875, -0.0016398429870605469, -0.0013761520385742188, -0.0011124610900878906, -0.0008487701416015625, -0.0005850791931152344, -0.00032138824462890625, -5.7697296142578125e-05, 0.00020599365234375, 0.0004696846008300781, 0.0007333755493164062, 0.0009970664978027344, 0.0012607574462890625, 0.0015244483947753906, 0.0017881393432617188, 0.002051830291748047, 0.002315521240234375, 0.002579212188720703, 0.0028429031372070312, 0.0031065940856933594, 0.0033702850341796875, 0.0036339759826660156, 0.0038976669311523438, 0.004161357879638672, 0.004425048828125, 0.004688739776611328, 0.004952430725097656, 0.005216121673583984, 0.0054798126220703125, 0.005743503570556641, 0.006007194519042969, 0.006270885467529297, 0.006534576416015625, 0.006798267364501953, 0.007061958312988281, 0.007325649261474609, 0.0075893402099609375, 0.007853031158447266, 0.008116722106933594, 0.008380413055419922, 0.00864410400390625]}, "gradients/encoder.encoder.layers.5.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 3.0, 3.0, 0.0, 4.0, 1.0, 2.0, 5.0, 10.0, 2.0, 5.0, 10.0, 5.0, 23.0, 17.0, 14.0, 35.0, 19.0, 46.0, 22.0, 22.0, 64.0, 31.0, 64.0, 36.0, 41.0, 70.0, 35.0, 59.0, 37.0, 59.0, 29.0, 23.0, 46.0, 25.0, 40.0, 16.0, 10.0, 23.0, 10.0, 12.0, 10.0, 3.0, 5.0, 3.0, 5.0, 0.0, 0.0, 6.0, 1.0, 4.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-2.7418136596679688e-06, -2.6579946279525757e-06, -2.5741755962371826e-06, -2.4903565645217896e-06, -2.4065375328063965e-06, -2.3227185010910034e-06, -2.2388994693756104e-06, -2.1550804376602173e-06, -2.0712614059448242e-06, -1.987442374229431e-06, -1.903623342514038e-06, -1.819804310798645e-06, -1.735985279083252e-06, -1.6521662473678589e-06, -1.5683472156524658e-06, -1.4845281839370728e-06, -1.4007091522216797e-06, -1.3168901205062866e-06, -1.2330710887908936e-06, -1.1492520570755005e-06, -1.0654330253601074e-06, -9.816139936447144e-07, -8.977949619293213e-07, -8.139759302139282e-07, -7.301568984985352e-07, -6.463378667831421e-07, -5.62518835067749e-07, -4.78699803352356e-07, -3.948807716369629e-07, -3.110617399215698e-07, -2.2724270820617676e-07, -1.434236764907837e-07, -5.960464477539063e-08, 2.421438694000244e-08, 1.0803341865539551e-07, 1.9185245037078857e-07, 2.7567148208618164e-07, 3.594905138015747e-07, 4.4330954551696777e-07, 5.271285772323608e-07, 6.109476089477539e-07, 6.94766640663147e-07, 7.7858567237854e-07, 8.624047040939331e-07, 9.462237358093262e-07, 1.0300427675247192e-06, 1.1138617992401123e-06, 1.1976808309555054e-06, 1.2814998626708984e-06, 1.3653188943862915e-06, 1.4491379261016846e-06, 1.5329569578170776e-06, 1.6167759895324707e-06, 1.7005950212478638e-06, 1.7844140529632568e-06, 1.86823308467865e-06, 1.952052116394043e-06, 2.035871148109436e-06, 2.119690179824829e-06, 2.203509211540222e-06, 2.2873282432556152e-06, 2.3711472749710083e-06, 2.4549663066864014e-06, 2.5387853384017944e-06, 2.6226043701171875e-06]}, "gradients/encoder.encoder.layers.5.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 2.0, 4.0, 4.0, 2.0, 9.0, 7.0, 8.0, 6.0, 9.0, 11.0, 23.0, 35.0, 35.0, 42.0, 90.0, 122.0, 227.0, 552.0, 1653.0, 7132.0, 49827.0, 850804.0, 120963.0, 12901.0, 2584.0, 741.0, 299.0, 142.0, 94.0, 54.0, 35.0, 31.0, 23.0, 22.0, 11.0, 12.0, 13.0, 3.0, 8.0, 4.0, 5.0, 9.0, 3.0, 2.0, 2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0], "bins": [-0.0199737548828125, -0.019362449645996094, -0.018751144409179688, -0.01813983917236328, -0.017528533935546875, -0.01691722869873047, -0.016305923461914062, -0.015694618225097656, -0.01508331298828125, -0.014472007751464844, -0.013860702514648438, -0.013249397277832031, -0.012638092041015625, -0.012026786804199219, -0.011415481567382812, -0.010804176330566406, -0.01019287109375, -0.009581565856933594, -0.008970260620117188, -0.008358955383300781, -0.007747650146484375, -0.007136344909667969, -0.0065250396728515625, -0.005913734436035156, -0.00530242919921875, -0.004691123962402344, -0.0040798187255859375, -0.0034685134887695312, -0.002857208251953125, -0.0022459030151367188, -0.0016345977783203125, -0.0010232925415039062, -0.0004119873046875, 0.00019931793212890625, 0.0008106231689453125, 0.0014219284057617188, 0.002033233642578125, 0.0026445388793945312, 0.0032558441162109375, 0.0038671493530273438, 0.00447845458984375, 0.005089759826660156, 0.0057010650634765625, 0.006312370300292969, 0.006923675537109375, 0.007534980773925781, 0.008146286010742188, 0.008757591247558594, 0.009368896484375, 0.009980201721191406, 0.010591506958007812, 0.011202812194824219, 0.011814117431640625, 0.012425422668457031, 0.013036727905273438, 0.013648033142089844, 0.01425933837890625, 0.014870643615722656, 0.015481948852539062, 0.01609325408935547, 0.016704559326171875, 0.01731586456298828, 0.017927169799804688, 0.018538475036621094, 0.0191497802734375]}, "gradients/encoder.encoder.layers.5.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 3.0, 2.0, 10.0, 5.0, 7.0, 6.0, 5.0, 9.0, 15.0, 11.0, 16.0, 24.0, 15.0, 16.0, 25.0, 38.0, 37.0, 44.0, 52.0, 58.0, 60.0, 70.0, 81.0, 49.0, 54.0, 50.0, 27.0, 40.0, 25.0, 27.0, 17.0, 24.0, 23.0, 15.0, 8.0, 6.0, 5.0, 5.0, 7.0, 8.0, 3.0, 5.0, 3.0, 2.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.01385498046875, -0.013408422470092773, -0.012961864471435547, -0.01251530647277832, -0.012068748474121094, -0.011622190475463867, -0.01117563247680664, -0.010729074478149414, -0.010282516479492188, -0.009835958480834961, -0.009389400482177734, -0.008942842483520508, -0.008496284484863281, -0.008049726486206055, -0.007603168487548828, -0.0071566104888916016, -0.006710052490234375, -0.0062634944915771484, -0.005816936492919922, -0.005370378494262695, -0.004923820495605469, -0.004477262496948242, -0.004030704498291016, -0.003584146499633789, -0.0031375885009765625, -0.002691030502319336, -0.0022444725036621094, -0.0017979145050048828, -0.0013513565063476562, -0.0009047985076904297, -0.0004582405090332031, -1.1682510375976562e-05, 0.00043487548828125, 0.0008814334869384766, 0.0013279914855957031, 0.0017745494842529297, 0.0022211074829101562, 0.002667665481567383, 0.0031142234802246094, 0.003560781478881836, 0.0040073394775390625, 0.004453897476196289, 0.004900455474853516, 0.005347013473510742, 0.005793571472167969, 0.006240129470825195, 0.006686687469482422, 0.0071332454681396484, 0.007579803466796875, 0.008026361465454102, 0.008472919464111328, 0.008919477462768555, 0.009366035461425781, 0.009812593460083008, 0.010259151458740234, 0.010705709457397461, 0.011152267456054688, 0.011598825454711914, 0.01204538345336914, 0.012491941452026367, 0.012938499450683594, 0.01338505744934082, 0.013831615447998047, 0.014278173446655273, 0.0147247314453125]}, "gradients/encoder.encoder.layers.5.layer_norm.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 3.0, 2.0, 3.0, 5.0, 10.0, 12.0, 14.0, 12.0, 29.0, 44.0, 75.0, 187.0, 319.0, 110.0, 57.0, 51.0, 30.0, 21.0, 4.0, 5.0, 3.0, 6.0, 2.0, 2.0, 1.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-1.6272858381271362, -1.5847965478897095, -1.5423072576522827, -1.499817967414856, -1.4573287963867188, -1.414839506149292, -1.3723502159118652, -1.3298609256744385, -1.2873716354370117, -1.244882345199585, -1.2023930549621582, -1.1599037647247314, -1.1174144744873047, -1.0749253034591675, -1.0324360132217407, -0.989946722984314, -0.9474574327468872, -0.9049681425094604, -0.8624788522720337, -0.8199896216392517, -0.777500331401825, -0.7350110411643982, -0.6925218105316162, -0.6500325202941895, -0.6075432300567627, -0.5650539398193359, -0.5225646495819092, -0.4800754189491272, -0.43758612871170044, -0.3950968384742737, -0.3526075780391693, -0.31011831760406494, -0.26762890815734863, -0.22513963282108307, -0.1826503574848175, -0.14016108214855194, -0.09767180681228638, -0.05518253147602081, -0.012693256139755249, 0.02979600429534912, 0.07228529453277588, 0.11477456986904144, 0.157263845205307, 0.19975312054157257, 0.24224239587783813, 0.2847316861152649, 0.32722094655036926, 0.36971020698547363, 0.4121994972229004, 0.45468878746032715, 0.4971780478954315, 0.5396673083305359, 0.5821565985679626, 0.6246458888053894, 0.6671351194381714, 0.7096244096755981, 0.7521136999130249, 0.7946029901504517, 0.8370922803878784, 0.8795815110206604, 0.9220708012580872, 0.9645600914955139, 1.007049322128296, 1.0495386123657227, 1.0920279026031494]}, "gradients/encoder.encoder.layers.5.layer_norm.bias": {"_type": "histogram", "values": [2.0, 1.0, 2.0, 0.0, 3.0, 1.0, 5.0, 1.0, 6.0, 5.0, 5.0, 13.0, 3.0, 5.0, 11.0, 15.0, 9.0, 13.0, 16.0, 18.0, 20.0, 29.0, 21.0, 34.0, 36.0, 41.0, 26.0, 35.0, 51.0, 63.0, 70.0, 61.0, 41.0, 37.0, 26.0, 28.0, 30.0, 31.0, 18.0, 22.0, 27.0, 20.0, 17.0, 15.0, 16.0, 11.0, 8.0, 8.0, 9.0, 6.0, 7.0, 4.0, 8.0, 4.0, 0.0, 2.0, 0.0, 5.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.9223974943161011, -0.8918372392654419, -0.8612769842147827, -0.8307167291641235, -0.8001564145088196, -0.7695961594581604, -0.7390359044075012, -0.708475649356842, -0.6779153943061829, -0.6473551392555237, -0.6167948842048645, -0.5862345695495605, -0.5556743144989014, -0.5251140594482422, -0.494553804397583, -0.46399354934692383, -0.43343326449394226, -0.4028730094432831, -0.3723127245903015, -0.34175246953964233, -0.31119221448898315, -0.280631959438324, -0.2500716745853424, -0.21951141953468323, -0.18895114958286285, -0.15839087963104248, -0.1278306245803833, -0.09727035462856293, -0.06671009212732315, -0.036149829626083374, -0.0055895596742630005, 0.02497069537639618, 0.05553096532821655, 0.08609122782945633, 0.1166514903306961, 0.14721176028251648, 0.17777201533317566, 0.20833228528499603, 0.2388925552368164, 0.2694528102874756, 0.30001306533813477, 0.33057332038879395, 0.3611336052417755, 0.3916938602924347, 0.42225411534309387, 0.45281440019607544, 0.4833746552467346, 0.5139349102973938, 0.5444952249526978, 0.5750554800033569, 0.6056157350540161, 0.6361759901046753, 0.6667363047599792, 0.6972965598106384, 0.7278568148612976, 0.7584170699119568, 0.788977324962616, 0.8195375800132751, 0.8500978350639343, 0.8806581497192383, 0.9112184047698975, 0.9417786598205566, 0.9723389148712158, 1.002899169921875, 1.0334594249725342]}, "gradients/encoder.encoder.layers.4.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 1.0, 2.0, 0.0, 2.0, 2.0, 6.0, 6.0, 3.0, 3.0, 16.0, 38.0, 58.0, 131.0, 340.0, 2123.0, 402170.0, 3785368.0, 2972.0, 798.0, 126.0, 55.0, 36.0, 11.0, 10.0, 9.0, 3.0, 3.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.54296875, -0.5290718078613281, -0.5151748657226562, -0.5012779235839844, -0.4873809814453125, -0.4734840393066406, -0.45958709716796875, -0.4456901550292969, -0.431793212890625, -0.4178962707519531, -0.40399932861328125, -0.3901023864746094, -0.3762054443359375, -0.3623085021972656, -0.34841156005859375, -0.3345146179199219, -0.32061767578125, -0.3067207336425781, -0.29282379150390625, -0.2789268493652344, -0.2650299072265625, -0.2511329650878906, -0.23723602294921875, -0.22333908081054688, -0.209442138671875, -0.19554519653320312, -0.18164825439453125, -0.16775131225585938, -0.1538543701171875, -0.13995742797851562, -0.12606048583984375, -0.11216354370117188, -0.0982666015625, -0.08436965942382812, -0.07047271728515625, -0.056575775146484375, -0.0426788330078125, -0.028781890869140625, -0.01488494873046875, -0.000988006591796875, 0.012908935546875, 0.026805877685546875, 0.04070281982421875, 0.054599761962890625, 0.0684967041015625, 0.08239364624023438, 0.09629058837890625, 0.11018753051757812, 0.12408447265625, 0.13798141479492188, 0.15187835693359375, 0.16577529907226562, 0.1796722412109375, 0.19356918334960938, 0.20746612548828125, 0.22136306762695312, 0.235260009765625, 0.24915695190429688, 0.26305389404296875, 0.2769508361816406, 0.2908477783203125, 0.3047447204589844, 0.31864166259765625, 0.3325386047363281, 0.346435546875]}, "gradients/encoder.encoder.layers.4.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 4.0, 3.0, 2.0, 1.0, 8.0, 13.0, 20.0, 42.0, 76.0, 137.0, 178.0, 180.0, 129.0, 107.0, 48.0, 32.0, 18.0, 6.0, 4.0, 4.0, 1.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.2457275390625, -0.23945045471191406, -0.23317337036132812, -0.2268962860107422, -0.22061920166015625, -0.2143421173095703, -0.20806503295898438, -0.20178794860839844, -0.1955108642578125, -0.18923377990722656, -0.18295669555664062, -0.1766796112060547, -0.17040252685546875, -0.1641254425048828, -0.15784835815429688, -0.15157127380371094, -0.145294189453125, -0.13901710510253906, -0.13274002075195312, -0.1264629364013672, -0.12018585205078125, -0.11390876770019531, -0.10763168334960938, -0.10135459899902344, -0.0950775146484375, -0.08880043029785156, -0.08252334594726562, -0.07624626159667969, -0.06996917724609375, -0.06369209289550781, -0.057415008544921875, -0.05113792419433594, -0.04486083984375, -0.03858375549316406, -0.032306671142578125, -0.026029586791992188, -0.01975250244140625, -0.013475418090820312, -0.007198333740234375, -0.0009212493896484375, 0.0053558349609375, 0.011632919311523438, 0.017910003662109375, 0.024187088012695312, 0.03046417236328125, 0.03674125671386719, 0.043018341064453125, 0.04929542541503906, 0.055572509765625, 0.06184959411621094, 0.06812667846679688, 0.07440376281738281, 0.08068084716796875, 0.08695793151855469, 0.09323501586914062, 0.09951210021972656, 0.1057891845703125, 0.11206626892089844, 0.11834335327148438, 0.12462043762207031, 0.13089752197265625, 0.1371746063232422, 0.14345169067382812, 0.14972877502441406, 0.156005859375]}, "gradients/encoder.encoder.layers.4.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 2.0, 1.0, 2.0, 2.0, 3.0, 4.0, 8.0, 11.0, 13.0, 21.0, 24.0, 29.0, 39.0, 73.0, 121.0, 218.0, 351.0, 682.0, 1231.0, 2894.0, 9852.0, 236129.0, 3918978.0, 17584.0, 3609.0, 1189.0, 525.0, 277.0, 153.0, 100.0, 59.0, 25.0, 25.0, 18.0, 6.0, 12.0, 10.0, 3.0, 7.0, 3.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.2166748046875, -0.2094898223876953, -0.20230484008789062, -0.19511985778808594, -0.18793487548828125, -0.18074989318847656, -0.17356491088867188, -0.1663799285888672, -0.1591949462890625, -0.1520099639892578, -0.14482498168945312, -0.13763999938964844, -0.13045501708984375, -0.12327003479003906, -0.11608505249023438, -0.10890007019042969, -0.101715087890625, -0.09453010559082031, -0.08734512329101562, -0.08016014099121094, -0.07297515869140625, -0.06579017639160156, -0.058605194091796875, -0.05142021179199219, -0.0442352294921875, -0.03705024719238281, -0.029865264892578125, -0.022680282592773438, -0.01549530029296875, -0.008310317993164062, -0.001125335693359375, 0.0060596466064453125, 0.01324462890625, 0.020429611206054688, 0.027614593505859375, 0.03479957580566406, 0.04198455810546875, 0.04916954040527344, 0.056354522705078125, 0.06353950500488281, 0.0707244873046875, 0.07790946960449219, 0.08509445190429688, 0.09227943420410156, 0.09946441650390625, 0.10664939880371094, 0.11383438110351562, 0.12101936340332031, 0.128204345703125, 0.1353893280029297, 0.14257431030273438, 0.14975929260253906, 0.15694427490234375, 0.16412925720214844, 0.17131423950195312, 0.1784992218017578, 0.1856842041015625, 0.1928691864013672, 0.20005416870117188, 0.20723915100097656, 0.21442413330078125, 0.22160911560058594, 0.22879409790039062, 0.2359790802001953, 0.2431640625]}, "gradients/encoder.encoder.layers.4.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 3.0, 1.0, 0.0, 3.0, 1.0, 5.0, 1.0, 6.0, 3.0, 13.0, 18.0, 24.0, 88.0, 450.0, 2522.0, 745.0, 106.0, 39.0, 19.0, 11.0, 4.0, 10.0, 6.0, 3.0, 1.0, 1.0, 2.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.22607421875, -0.21845245361328125, -0.2108306884765625, -0.20320892333984375, -0.195587158203125, -0.18796539306640625, -0.1803436279296875, -0.17272186279296875, -0.16510009765625, -0.15747833251953125, -0.1498565673828125, -0.14223480224609375, -0.134613037109375, -0.12699127197265625, -0.1193695068359375, -0.11174774169921875, -0.1041259765625, -0.09650421142578125, -0.0888824462890625, -0.08126068115234375, -0.073638916015625, -0.06601715087890625, -0.0583953857421875, -0.05077362060546875, -0.04315185546875, -0.03553009033203125, -0.0279083251953125, -0.02028656005859375, -0.012664794921875, -0.00504302978515625, 0.0025787353515625, 0.01020050048828125, 0.017822265625, 0.02544403076171875, 0.0330657958984375, 0.04068756103515625, 0.048309326171875, 0.05593109130859375, 0.0635528564453125, 0.07117462158203125, 0.07879638671875, 0.08641815185546875, 0.0940399169921875, 0.10166168212890625, 0.109283447265625, 0.11690521240234375, 0.1245269775390625, 0.13214874267578125, 0.1397705078125, 0.14739227294921875, 0.1550140380859375, 0.16263580322265625, 0.170257568359375, 0.17787933349609375, 0.1855010986328125, 0.19312286376953125, 0.20074462890625, 0.20836639404296875, 0.2159881591796875, 0.22360992431640625, 0.231231689453125, 0.23885345458984375, 0.2464752197265625, 0.25409698486328125, 0.26171875]}, "gradients/encoder.encoder.layers.4.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 4.0, 0.0, 2.0, 4.0, 0.0, 3.0, 4.0, 8.0, 5.0, 5.0, 11.0, 14.0, 21.0, 27.0, 44.0, 76.0, 105.0, 235.0, 209.0, 86.0, 64.0, 29.0, 16.0, 15.0, 11.0, 6.0, 3.0, 1.0, 1.0, 2.0, 2.0, 2.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.7462344169616699, -0.7259775996208191, -0.705720841884613, -0.6854640245437622, -0.6652072668075562, -0.6449504494667053, -0.6246936917304993, -0.6044368743896484, -0.5841801166534424, -0.5639232993125916, -0.5436665415763855, -0.5234097242355347, -0.5031529664993286, -0.4828961491584778, -0.46263936161994934, -0.4423825740814209, -0.42212575674057007, -0.4018689692020416, -0.3816121816635132, -0.36135539412498474, -0.3410986065864563, -0.32084178924560547, -0.300585001707077, -0.2803282141685486, -0.26007142663002014, -0.2398146390914917, -0.21955785155296326, -0.19930104911327362, -0.17904426157474518, -0.15878747403621674, -0.1385306715965271, -0.11827388405799866, -0.09801709651947021, -0.07776030898094177, -0.05750351399183273, -0.03724672272801399, -0.01698993146419525, 0.003266856074333191, 0.02352365106344223, 0.04378044605255127, 0.06403723359107971, 0.08429402112960815, 0.1045508161187172, 0.12480761110782623, 0.14506439864635468, 0.16532118618488312, 0.18557798862457275, 0.2058347761631012, 0.22609156370162964, 0.24634835124015808, 0.2666051387786865, 0.28686192631721497, 0.3071187138557434, 0.32737553119659424, 0.3476323187351227, 0.3678891062736511, 0.38814589381217957, 0.408402681350708, 0.42865946888923645, 0.4489162564277649, 0.4691730737686157, 0.4894298315048218, 0.5096866488456726, 0.5299434661865234, 0.5502002239227295]}, "gradients/encoder.encoder.layers.4.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 4.0, 2.0, 3.0, 4.0, 12.0, 10.0, 9.0, 11.0, 5.0, 14.0, 20.0, 20.0, 14.0, 25.0, 29.0, 43.0, 34.0, 35.0, 31.0, 33.0, 28.0, 58.0, 37.0, 39.0, 55.0, 44.0, 53.0, 40.0, 30.0, 32.0, 34.0, 29.0, 25.0, 24.0, 19.0, 18.0, 15.0, 15.0, 17.0, 7.0, 13.0, 6.0, 5.0, 3.0, 1.0, 2.0, 1.0, 5.0, 3.0, 0.0, 1.0, 0.0, 0.0, 2.0], "bins": [-0.46971410512924194, -0.4550257623195648, -0.4403373897075653, -0.4256490468978882, -0.41096070408821106, -0.39627236127853394, -0.3815839886665344, -0.3668956458568573, -0.3522073030471802, -0.33751896023750305, -0.32283058762550354, -0.3081422448158264, -0.2934539020061493, -0.27876555919647217, -0.26407718658447266, -0.24938884377479553, -0.23470047116279602, -0.2200121134519577, -0.20532377064228058, -0.19063541293144226, -0.17594707012176514, -0.16125871241092682, -0.1465703547000885, -0.13188201189041138, -0.11719365417957306, -0.10250530391931534, -0.08781695365905762, -0.0731285959482193, -0.05844024568796158, -0.04375189542770386, -0.02906353771686554, -0.014375187456607819, 0.00031316280364990234, 0.015001514926552773, 0.029689867049455643, 0.04437822103500366, 0.05906657129526138, 0.0737549215555191, 0.08844327926635742, 0.10313162952661514, 0.11781997978687286, 0.13250833749771118, 0.1471966803073883, 0.16188503801822662, 0.17657339572906494, 0.19126173853874207, 0.20595009624958038, 0.2206384539604187, 0.23532679677009583, 0.25001513957977295, 0.26470351219177246, 0.2793918550014496, 0.2940801978111267, 0.3087685704231262, 0.32345691323280334, 0.33814525604248047, 0.35283362865448, 0.3675219714641571, 0.3822103440761566, 0.39689868688583374, 0.41158702969551086, 0.426275372505188, 0.4409637451171875, 0.4556520879268646, 0.47034043073654175]}, "gradients/encoder.encoder.layers.4.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 3.0, 3.0, 2.0, 0.0, 3.0, 5.0, 3.0, 7.0, 12.0, 5.0, 7.0, 15.0, 14.0, 26.0, 38.0, 27.0, 49.0, 85.0, 172.0, 253.0, 443.0, 1092.0, 2673.0, 9828.0, 60961.0, 816595.0, 133205.0, 16296.0, 3968.0, 1367.0, 602.0, 245.0, 178.0, 105.0, 66.0, 54.0, 50.0, 29.0, 16.0, 13.0, 13.0, 10.0, 8.0, 6.0, 5.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 3.0, 2.0, 1.0, 2.0, 0.0, 1.0, 1.0], "bins": [-0.396240234375, -0.3837242126464844, -0.37120819091796875, -0.3586921691894531, -0.3461761474609375, -0.3336601257324219, -0.32114410400390625, -0.3086280822753906, -0.296112060546875, -0.2835960388183594, -0.27108001708984375, -0.2585639953613281, -0.2460479736328125, -0.23353195190429688, -0.22101593017578125, -0.20849990844726562, -0.19598388671875, -0.18346786499023438, -0.17095184326171875, -0.15843582153320312, -0.1459197998046875, -0.13340377807617188, -0.12088775634765625, -0.10837173461914062, -0.095855712890625, -0.08333969116210938, -0.07082366943359375, -0.058307647705078125, -0.0457916259765625, -0.033275604248046875, -0.02075958251953125, -0.008243560791015625, 0.0042724609375, 0.016788482666015625, 0.02930450439453125, 0.041820526123046875, 0.0543365478515625, 0.06685256958007812, 0.07936859130859375, 0.09188461303710938, 0.104400634765625, 0.11691665649414062, 0.12943267822265625, 0.14194869995117188, 0.1544647216796875, 0.16698074340820312, 0.17949676513671875, 0.19201278686523438, 0.20452880859375, 0.21704483032226562, 0.22956085205078125, 0.24207687377929688, 0.2545928955078125, 0.2671089172363281, 0.27962493896484375, 0.2921409606933594, 0.304656982421875, 0.3171730041503906, 0.32968902587890625, 0.3422050476074219, 0.3547210693359375, 0.3672370910644531, 0.37975311279296875, 0.3922691345214844, 0.40478515625]}, "gradients/encoder.encoder.layers.4.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 5.0, 2.0, 0.0, 5.0, 8.0, 8.0, 24.0, 43.0, 91.0, 128.0, 184.0, 177.0, 129.0, 98.0, 53.0, 25.0, 19.0, 7.0, 5.0, 2.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.243896484375, -0.23765945434570312, -0.23142242431640625, -0.22518539428710938, -0.2189483642578125, -0.21271133422851562, -0.20647430419921875, -0.20023727416992188, -0.194000244140625, -0.18776321411132812, -0.18152618408203125, -0.17528915405273438, -0.1690521240234375, -0.16281509399414062, -0.15657806396484375, -0.15034103393554688, -0.14410400390625, -0.13786697387695312, -0.13162994384765625, -0.12539291381835938, -0.1191558837890625, -0.11291885375976562, -0.10668182373046875, -0.10044479370117188, -0.094207763671875, -0.08797073364257812, -0.08173370361328125, -0.07549667358398438, -0.0692596435546875, -0.06302261352539062, -0.05678558349609375, -0.050548553466796875, -0.0443115234375, -0.038074493408203125, -0.03183746337890625, -0.025600433349609375, -0.0193634033203125, -0.013126373291015625, -0.00688934326171875, -0.000652313232421875, 0.005584716796875, 0.011821746826171875, 0.01805877685546875, 0.024295806884765625, 0.0305328369140625, 0.036769866943359375, 0.04300689697265625, 0.049243927001953125, 0.05548095703125, 0.061717987060546875, 0.06795501708984375, 0.07419204711914062, 0.0804290771484375, 0.08666610717773438, 0.09290313720703125, 0.09914016723632812, 0.105377197265625, 0.11161422729492188, 0.11785125732421875, 0.12408828735351562, 0.1303253173828125, 0.13656234741210938, 0.14279937744140625, 0.14903640747070312, 0.1552734375]}, "gradients/encoder.encoder.layers.4.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 3.0, 1.0, 3.0, 5.0, 11.0, 7.0, 10.0, 20.0, 34.0, 41.0, 45.0, 79.0, 103.0, 197.0, 305.0, 680.0, 1543.0, 4222.0, 13369.0, 50029.0, 389358.0, 516066.0, 51063.0, 13718.0, 4381.0, 1618.0, 724.0, 374.0, 194.0, 118.0, 67.0, 61.0, 36.0, 28.0, 15.0, 13.0, 7.0, 8.0, 1.0, 5.0, 3.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.281005859375, -0.2722282409667969, -0.26345062255859375, -0.2546730041503906, -0.2458953857421875, -0.23711776733398438, -0.22834014892578125, -0.21956253051757812, -0.210784912109375, -0.20200729370117188, -0.19322967529296875, -0.18445205688476562, -0.1756744384765625, -0.16689682006835938, -0.15811920166015625, -0.14934158325195312, -0.14056396484375, -0.13178634643554688, -0.12300872802734375, -0.11423110961914062, -0.1054534912109375, -0.09667587280273438, -0.08789825439453125, -0.07912063598632812, -0.070343017578125, -0.061565399169921875, -0.05278778076171875, -0.044010162353515625, -0.0352325439453125, -0.026454925537109375, -0.01767730712890625, -0.008899688720703125, -0.0001220703125, 0.008655548095703125, 0.01743316650390625, 0.026210784912109375, 0.0349884033203125, 0.043766021728515625, 0.05254364013671875, 0.061321258544921875, 0.070098876953125, 0.07887649536132812, 0.08765411376953125, 0.09643173217773438, 0.1052093505859375, 0.11398696899414062, 0.12276458740234375, 0.13154220581054688, 0.14031982421875, 0.14909744262695312, 0.15787506103515625, 0.16665267944335938, 0.1754302978515625, 0.18420791625976562, 0.19298553466796875, 0.20176315307617188, 0.210540771484375, 0.21931838989257812, 0.22809600830078125, 0.23687362670898438, 0.2456512451171875, 0.2544288635253906, 0.26320648193359375, 0.2719841003417969, 0.28076171875]}, "gradients/encoder.encoder.layers.4.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 3.0, 5.0, 0.0, 3.0, 6.0, 8.0, 6.0, 12.0, 12.0, 22.0, 27.0, 23.0, 28.0, 45.0, 47.0, 45.0, 48.0, 61.0, 46.0, 52.0, 60.0, 72.0, 65.0, 55.0, 40.0, 49.0, 22.0, 26.0, 19.0, 28.0, 14.0, 17.0, 14.0, 9.0, 5.0, 8.0, 8.0, 3.0, 2.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.324462890625, -0.31433868408203125, -0.3042144775390625, -0.29409027099609375, -0.283966064453125, -0.27384185791015625, -0.2637176513671875, -0.25359344482421875, -0.24346923828125, -0.23334503173828125, -0.2232208251953125, -0.21309661865234375, -0.202972412109375, -0.19284820556640625, -0.1827239990234375, -0.17259979248046875, -0.1624755859375, -0.15235137939453125, -0.1422271728515625, -0.13210296630859375, -0.121978759765625, -0.11185455322265625, -0.1017303466796875, -0.09160614013671875, -0.08148193359375, -0.07135772705078125, -0.0612335205078125, -0.05110931396484375, -0.040985107421875, -0.03086090087890625, -0.0207366943359375, -0.01061248779296875, -0.00048828125, 0.00963592529296875, 0.0197601318359375, 0.02988433837890625, 0.040008544921875, 0.05013275146484375, 0.0602569580078125, 0.07038116455078125, 0.08050537109375, 0.09062957763671875, 0.1007537841796875, 0.11087799072265625, 0.121002197265625, 0.13112640380859375, 0.1412506103515625, 0.15137481689453125, 0.1614990234375, 0.17162322998046875, 0.1817474365234375, 0.19187164306640625, 0.201995849609375, 0.21212005615234375, 0.2222442626953125, 0.23236846923828125, 0.24249267578125, 0.25261688232421875, 0.2627410888671875, 0.27286529541015625, 0.282989501953125, 0.29311370849609375, 0.3032379150390625, 0.31336212158203125, 0.323486328125]}, "gradients/encoder.encoder.layers.4.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 4.0, 3.0, 1.0, 2.0, 8.0, 9.0, 10.0, 20.0, 19.0, 34.0, 52.0, 86.0, 170.0, 431.0, 1237.0, 4487.0, 33158.0, 931674.0, 67985.0, 6575.0, 1575.0, 554.0, 215.0, 106.0, 62.0, 33.0, 13.0, 10.0, 10.0, 6.0, 6.0, 3.0, 3.0, 3.0, 3.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.02337646484375, -0.022559642791748047, -0.021742820739746094, -0.02092599868774414, -0.020109176635742188, -0.019292354583740234, -0.01847553253173828, -0.017658710479736328, -0.016841888427734375, -0.016025066375732422, -0.015208244323730469, -0.014391422271728516, -0.013574600219726562, -0.01275777816772461, -0.011940956115722656, -0.011124134063720703, -0.01030731201171875, -0.009490489959716797, -0.008673667907714844, -0.00785684585571289, -0.0070400238037109375, -0.006223201751708984, -0.005406379699707031, -0.004589557647705078, -0.003772735595703125, -0.002955913543701172, -0.0021390914916992188, -0.0013222694396972656, -0.0005054473876953125, 0.0003113746643066406, 0.0011281967163085938, 0.0019450187683105469, 0.0027618408203125, 0.003578662872314453, 0.004395484924316406, 0.005212306976318359, 0.0060291290283203125, 0.006845951080322266, 0.007662773132324219, 0.008479595184326172, 0.009296417236328125, 0.010113239288330078, 0.010930061340332031, 0.011746883392333984, 0.012563705444335938, 0.01338052749633789, 0.014197349548339844, 0.015014171600341797, 0.01583099365234375, 0.016647815704345703, 0.017464637756347656, 0.01828145980834961, 0.019098281860351562, 0.019915103912353516, 0.02073192596435547, 0.021548748016357422, 0.022365570068359375, 0.023182392120361328, 0.02399921417236328, 0.024816036224365234, 0.025632858276367188, 0.02644968032836914, 0.027266502380371094, 0.028083324432373047, 0.028900146484375]}, "gradients/encoder.encoder.layers.4.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 4.0, 0.0, 2.0, 1.0, 6.0, 3.0, 3.0, 5.0, 3.0, 7.0, 4.0, 11.0, 11.0, 6.0, 13.0, 14.0, 29.0, 14.0, 38.0, 22.0, 33.0, 20.0, 21.0, 48.0, 30.0, 65.0, 45.0, 76.0, 24.0, 29.0, 57.0, 22.0, 55.0, 31.0, 42.0, 18.0, 40.0, 16.0, 15.0, 35.0, 12.0, 18.0, 13.0, 15.0, 6.0, 4.0, 9.0, 6.0, 2.0, 3.0, 6.0, 1.0, 3.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 2.0, 0.0, 1.0], "bins": [-2.562999725341797e-06, -2.477318048477173e-06, -2.391636371612549e-06, -2.305954694747925e-06, -2.2202730178833008e-06, -2.1345913410186768e-06, -2.0489096641540527e-06, -1.9632279872894287e-06, -1.8775463104248047e-06, -1.7918646335601807e-06, -1.7061829566955566e-06, -1.6205012798309326e-06, -1.5348196029663086e-06, -1.4491379261016846e-06, -1.3634562492370605e-06, -1.2777745723724365e-06, -1.1920928955078125e-06, -1.1064112186431885e-06, -1.0207295417785645e-06, -9.350478649139404e-07, -8.493661880493164e-07, -7.636845111846924e-07, -6.780028343200684e-07, -5.923211574554443e-07, -5.066394805908203e-07, -4.209578037261963e-07, -3.3527612686157227e-07, -2.4959444999694824e-07, -1.6391277313232422e-07, -7.82310962677002e-08, 7.450580596923828e-09, 9.313225746154785e-08, 1.7881393432617188e-07, 2.644956111907959e-07, 3.501772880554199e-07, 4.3585896492004395e-07, 5.21540641784668e-07, 6.07222318649292e-07, 6.92903995513916e-07, 7.7858567237854e-07, 8.642673492431641e-07, 9.499490261077881e-07, 1.0356307029724121e-06, 1.1213123798370361e-06, 1.2069940567016602e-06, 1.2926757335662842e-06, 1.3783574104309082e-06, 1.4640390872955322e-06, 1.5497207641601562e-06, 1.6354024410247803e-06, 1.7210841178894043e-06, 1.8067657947540283e-06, 1.8924474716186523e-06, 1.9781291484832764e-06, 2.0638108253479004e-06, 2.1494925022125244e-06, 2.2351741790771484e-06, 2.3208558559417725e-06, 2.4065375328063965e-06, 2.4922192096710205e-06, 2.5779008865356445e-06, 2.6635825634002686e-06, 2.7492642402648926e-06, 2.8349459171295166e-06, 2.9206275939941406e-06]}, "gradients/encoder.encoder.layers.4.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 3.0, 2.0, 4.0, 7.0, 6.0, 10.0, 10.0, 14.0, 32.0, 39.0, 50.0, 94.0, 150.0, 262.0, 541.0, 1254.0, 3426.0, 10990.0, 52174.0, 795766.0, 154115.0, 20367.0, 5499.0, 1967.0, 829.0, 420.0, 196.0, 129.0, 69.0, 39.0, 35.0, 21.0, 18.0, 9.0, 5.0, 5.0, 2.0, 2.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0164642333984375, -0.015824317932128906, -0.015184402465820312, -0.014544486999511719, -0.013904571533203125, -0.013264656066894531, -0.012624740600585938, -0.011984825134277344, -0.01134490966796875, -0.010704994201660156, -0.010065078735351562, -0.009425163269042969, -0.008785247802734375, -0.008145332336425781, -0.0075054168701171875, -0.006865501403808594, -0.0062255859375, -0.005585670471191406, -0.0049457550048828125, -0.004305839538574219, -0.003665924072265625, -0.0030260086059570312, -0.0023860931396484375, -0.0017461776733398438, -0.00110626220703125, -0.00046634674072265625, 0.0001735687255859375, 0.0008134841918945312, 0.001453399658203125, 0.0020933151245117188, 0.0027332305908203125, 0.0033731460571289062, 0.0040130615234375, 0.004652976989746094, 0.0052928924560546875, 0.005932807922363281, 0.006572723388671875, 0.007212638854980469, 0.007852554321289062, 0.008492469787597656, 0.00913238525390625, 0.009772300720214844, 0.010412216186523438, 0.011052131652832031, 0.011692047119140625, 0.012331962585449219, 0.012971878051757812, 0.013611793518066406, 0.014251708984375, 0.014891624450683594, 0.015531539916992188, 0.01617145538330078, 0.016811370849609375, 0.01745128631591797, 0.018091201782226562, 0.018731117248535156, 0.01937103271484375, 0.020010948181152344, 0.020650863647460938, 0.02129077911376953, 0.021930694580078125, 0.02257061004638672, 0.023210525512695312, 0.023850440979003906, 0.0244903564453125]}, "gradients/encoder.encoder.layers.4.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 1.0, 3.0, 3.0, 0.0, 6.0, 8.0, 10.0, 8.0, 11.0, 23.0, 17.0, 31.0, 33.0, 51.0, 56.0, 61.0, 106.0, 122.0, 107.0, 103.0, 67.0, 39.0, 40.0, 21.0, 14.0, 15.0, 9.0, 13.0, 10.0, 6.0, 6.0, 3.0, 3.0, 1.0, 2.0, 0.0, 3.0, 1.0, 0.0, 2.0, 1.0, 1.0, 1.0], "bins": [-0.02862548828125, -0.02788400650024414, -0.02714252471923828, -0.026401042938232422, -0.025659561157226562, -0.024918079376220703, -0.024176597595214844, -0.023435115814208984, -0.022693634033203125, -0.021952152252197266, -0.021210670471191406, -0.020469188690185547, -0.019727706909179688, -0.018986225128173828, -0.01824474334716797, -0.01750326156616211, -0.01676177978515625, -0.01602029800415039, -0.015278816223144531, -0.014537334442138672, -0.013795852661132812, -0.013054370880126953, -0.012312889099121094, -0.011571407318115234, -0.010829925537109375, -0.010088443756103516, -0.009346961975097656, -0.008605480194091797, -0.007863998413085938, -0.007122516632080078, -0.006381034851074219, -0.005639553070068359, -0.0048980712890625, -0.004156589508056641, -0.0034151077270507812, -0.002673625946044922, -0.0019321441650390625, -0.0011906623840332031, -0.00044918060302734375, 0.0002923011779785156, 0.001033782958984375, 0.0017752647399902344, 0.0025167465209960938, 0.003258228302001953, 0.0039997100830078125, 0.004741191864013672, 0.005482673645019531, 0.006224155426025391, 0.00696563720703125, 0.007707118988037109, 0.008448600769042969, 0.009190082550048828, 0.009931564331054688, 0.010673046112060547, 0.011414527893066406, 0.012156009674072266, 0.012897491455078125, 0.013638973236083984, 0.014380455017089844, 0.015121936798095703, 0.015863418579101562, 0.016604900360107422, 0.01734638214111328, 0.01808786392211914, 0.018829345703125]}, "gradients/encoder.encoder.layers.4.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 3.0, 0.0, 4.0, 0.0, 5.0, 2.0, 5.0, 14.0, 7.0, 11.0, 17.0, 23.0, 26.0, 59.0, 97.0, 190.0, 242.0, 120.0, 51.0, 38.0, 30.0, 19.0, 15.0, 11.0, 4.0, 6.0, 5.0, 5.0, 2.0, 0.0, 1.0, 2.0, 1.0, 1.0, 0.0, 1.0, 1.0], "bins": [-1.535199761390686, -1.499698519706726, -1.4641971588134766, -1.4286959171295166, -1.3931946754455566, -1.3576933145523071, -1.3221920728683472, -1.2866907119750977, -1.2511894702911377, -1.2156882286071777, -1.1801868677139282, -1.1446856260299683, -1.1091842651367188, -1.0736830234527588, -1.0381817817687988, -1.0026804208755493, -0.9671791791915894, -0.9316778779029846, -0.8961765766143799, -0.8606753349304199, -0.8251740336418152, -0.7896727323532104, -0.7541714906692505, -0.7186701893806458, -0.683168888092041, -0.6476675868034363, -0.6121662855148315, -0.5766650438308716, -0.5411637425422668, -0.5056624412536621, -0.47016116976737976, -0.4346598982810974, -0.3991585969924927, -0.36365729570388794, -0.3281560242176056, -0.29265475273132324, -0.2571534514427185, -0.22165216505527496, -0.18615087866783142, -0.15064959228038788, -0.11514830589294434, -0.0796470195055008, -0.04414573311805725, -0.008644446730613708, 0.026856839656829834, 0.062358126044273376, 0.09785941243171692, 0.13336069881916046, 0.168861985206604, 0.20436327159404755, 0.2398645579814911, 0.27536582946777344, 0.3108671307563782, 0.3463684320449829, 0.38186970353126526, 0.4173709750175476, 0.45287227630615234, 0.4883735775947571, 0.5238748788833618, 0.5593761205673218, 0.5948774218559265, 0.6303787231445312, 0.6658799648284912, 0.701381266117096, 0.7368825674057007]}, "gradients/encoder.encoder.layers.4.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 3.0, 2.0, 1.0, 1.0, 6.0, 4.0, 11.0, 5.0, 9.0, 4.0, 12.0, 11.0, 17.0, 18.0, 21.0, 28.0, 28.0, 29.0, 34.0, 28.0, 41.0, 62.0, 69.0, 78.0, 68.0, 57.0, 45.0, 37.0, 41.0, 42.0, 27.0, 33.0, 29.0, 15.0, 17.0, 18.0, 9.0, 18.0, 11.0, 3.0, 5.0, 4.0, 2.0, 6.0, 0.0, 1.0, 5.0, 0.0, 4.0, 0.0, 0.0, 0.0, 2.0], "bins": [-1.156698226928711, -1.1235761642456055, -1.0904541015625, -1.057331919670105, -1.0242098569869995, -0.991087794303894, -0.9579657316207886, -0.9248436689376831, -0.8917215466499329, -0.8585994839668274, -0.8254773616790771, -0.7923552989959717, -0.7592332363128662, -0.726111114025116, -0.6929890513420105, -0.6598669290542603, -0.6267448663711548, -0.5936228036880493, -0.5605006814002991, -0.5273786187171936, -0.49425652623176575, -0.4611344337463379, -0.4280123710632324, -0.39489027857780457, -0.3617681860923767, -0.32864609360694885, -0.295524001121521, -0.2624019384384155, -0.22927984595298767, -0.19615775346755981, -0.16303567588329315, -0.1299135982990265, -0.09679162502288818, -0.06366953998804092, -0.030547454953193665, 0.002574630081653595, 0.035696715116500854, 0.06881880760192871, 0.10194088518619537, 0.13506296277046204, 0.1681850552558899, 0.20130714774131775, 0.2344292253255844, 0.2675513029098511, 0.30067339539527893, 0.3337954878807068, 0.36691755056381226, 0.4000396430492401, 0.43316173553466797, 0.4662838280200958, 0.4994059205055237, 0.5325279831886292, 0.5656501054763794, 0.5987721681594849, 0.6318942308425903, 0.6650162935256958, 0.698138415813446, 0.7312604784965515, 0.7643826007843018, 0.7975046634674072, 0.8306267261505127, 0.8637488484382629, 0.8968709111213684, 0.9299930334091187, 0.9631150960922241]}, "gradients/encoder.encoder.layers.3.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 3.0, 6.0, 2.0, 3.0, 9.0, 16.0, 18.0, 43.0, 119.0, 292.0, 1251.0, 4158183.0, 33208.0, 757.0, 220.0, 79.0, 37.0, 22.0, 11.0, 4.0, 4.0, 3.0, 3.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.3349609375, -1.3006134033203125, -1.266265869140625, -1.2319183349609375, -1.19757080078125, -1.1632232666015625, -1.128875732421875, -1.0945281982421875, -1.0601806640625, -1.0258331298828125, -0.991485595703125, -0.9571380615234375, -0.92279052734375, -0.8884429931640625, -0.854095458984375, -0.8197479248046875, -0.785400390625, -0.7510528564453125, -0.716705322265625, -0.6823577880859375, -0.64801025390625, -0.6136627197265625, -0.579315185546875, -0.5449676513671875, -0.5106201171875, -0.4762725830078125, -0.441925048828125, -0.4075775146484375, -0.37322998046875, -0.3388824462890625, -0.304534912109375, -0.2701873779296875, -0.23583984375, -0.2014923095703125, -0.167144775390625, -0.1327972412109375, -0.09844970703125, -0.0641021728515625, -0.029754638671875, 0.0045928955078125, 0.0389404296875, 0.0732879638671875, 0.107635498046875, 0.1419830322265625, 0.17633056640625, 0.2106781005859375, 0.245025634765625, 0.2793731689453125, 0.313720703125, 0.3480682373046875, 0.382415771484375, 0.4167633056640625, 0.45111083984375, 0.4854583740234375, 0.519805908203125, 0.5541534423828125, 0.5885009765625, 0.6228485107421875, 0.657196044921875, 0.6915435791015625, 0.72589111328125, 0.7602386474609375, 0.794586181640625, 0.8289337158203125, 0.86328125]}, "gradients/encoder.encoder.layers.3.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 6.0, 1.0, 0.0, 3.0, 8.0, 9.0, 17.0, 51.0, 81.0, 132.0, 172.0, 183.0, 157.0, 95.0, 46.0, 26.0, 14.0, 6.0, 3.0, 2.0, 3.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.243896484375, -0.23766136169433594, -0.23142623901367188, -0.2251911163330078, -0.21895599365234375, -0.2127208709716797, -0.20648574829101562, -0.20025062561035156, -0.1940155029296875, -0.18778038024902344, -0.18154525756835938, -0.1753101348876953, -0.16907501220703125, -0.1628398895263672, -0.15660476684570312, -0.15036964416503906, -0.144134521484375, -0.13789939880371094, -0.13166427612304688, -0.1254291534423828, -0.11919403076171875, -0.11295890808105469, -0.10672378540039062, -0.10048866271972656, -0.0942535400390625, -0.08801841735839844, -0.08178329467773438, -0.07554817199707031, -0.06931304931640625, -0.06307792663574219, -0.056842803955078125, -0.05060768127441406, -0.04437255859375, -0.03813743591308594, -0.031902313232421875, -0.025667190551757812, -0.01943206787109375, -0.013196945190429688, -0.006961822509765625, -0.0007266998291015625, 0.0055084228515625, 0.011743545532226562, 0.017978668212890625, 0.024213790893554688, 0.03044891357421875, 0.03668403625488281, 0.042919158935546875, 0.04915428161621094, 0.055389404296875, 0.06162452697753906, 0.06785964965820312, 0.07409477233886719, 0.08032989501953125, 0.08656501770019531, 0.09280014038085938, 0.09903526306152344, 0.1052703857421875, 0.11150550842285156, 0.11774063110351562, 0.12397575378417969, 0.13021087646484375, 0.1364459991455078, 0.14268112182617188, 0.14891624450683594, 0.1551513671875]}, "gradients/encoder.encoder.layers.3.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 2.0, 0.0, 1.0, 2.0, 1.0, 2.0, 1.0, 8.0, 3.0, 8.0, 11.0, 27.0, 22.0, 27.0, 59.0, 84.0, 147.0, 257.0, 409.0, 850.0, 1742.0, 4753.0, 24742.0, 4060376.0, 87865.0, 8396.0, 2448.0, 1000.0, 477.0, 244.0, 124.0, 71.0, 44.0, 30.0, 25.0, 13.0, 5.0, 6.0, 6.0, 3.0, 2.0, 2.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.1961669921875, -0.18881797790527344, -0.18146896362304688, -0.1741199493408203, -0.16677093505859375, -0.1594219207763672, -0.15207290649414062, -0.14472389221191406, -0.1373748779296875, -0.13002586364746094, -0.12267684936523438, -0.11532783508300781, -0.10797882080078125, -0.10062980651855469, -0.09328079223632812, -0.08593177795410156, -0.078582763671875, -0.07123374938964844, -0.06388473510742188, -0.05653572082519531, -0.04918670654296875, -0.04183769226074219, -0.034488677978515625, -0.027139663696289062, -0.0197906494140625, -0.012441635131835938, -0.005092620849609375, 0.0022563934326171875, 0.00960540771484375, 0.016954421997070312, 0.024303436279296875, 0.03165245056152344, 0.03900146484375, 0.04635047912597656, 0.053699493408203125, 0.06104850769042969, 0.06839752197265625, 0.07574653625488281, 0.08309555053710938, 0.09044456481933594, 0.0977935791015625, 0.10514259338378906, 0.11249160766601562, 0.11984062194824219, 0.12718963623046875, 0.1345386505126953, 0.14188766479492188, 0.14923667907714844, 0.156585693359375, 0.16393470764160156, 0.17128372192382812, 0.1786327362060547, 0.18598175048828125, 0.1933307647705078, 0.20067977905273438, 0.20802879333496094, 0.2153778076171875, 0.22272682189941406, 0.23007583618164062, 0.2374248504638672, 0.24477386474609375, 0.2521228790283203, 0.2594718933105469, 0.26682090759277344, 0.274169921875]}, "gradients/encoder.encoder.layers.3.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 2.0, 3.0, 2.0, 3.0, 1.0, 4.0, 2.0, 11.0, 13.0, 27.0, 44.0, 187.0, 1159.0, 2176.0, 291.0, 73.0, 26.0, 16.0, 9.0, 6.0, 5.0, 8.0, 4.0, 3.0, 4.0, 4.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.2626953125, -0.25582122802734375, -0.2489471435546875, -0.24207305908203125, -0.235198974609375, -0.22832489013671875, -0.2214508056640625, -0.21457672119140625, -0.20770263671875, -0.20082855224609375, -0.1939544677734375, -0.18708038330078125, -0.180206298828125, -0.17333221435546875, -0.1664581298828125, -0.15958404541015625, -0.1527099609375, -0.14583587646484375, -0.1389617919921875, -0.13208770751953125, -0.125213623046875, -0.11833953857421875, -0.1114654541015625, -0.10459136962890625, -0.09771728515625, -0.09084320068359375, -0.0839691162109375, -0.07709503173828125, -0.070220947265625, -0.06334686279296875, -0.0564727783203125, -0.04959869384765625, -0.042724609375, -0.03585052490234375, -0.0289764404296875, -0.02210235595703125, -0.015228271484375, -0.00835418701171875, -0.0014801025390625, 0.00539398193359375, 0.01226806640625, 0.01914215087890625, 0.0260162353515625, 0.03289031982421875, 0.039764404296875, 0.04663848876953125, 0.0535125732421875, 0.06038665771484375, 0.0672607421875, 0.07413482666015625, 0.0810089111328125, 0.08788299560546875, 0.094757080078125, 0.10163116455078125, 0.1085052490234375, 0.11537933349609375, 0.12225341796875, 0.12912750244140625, 0.1360015869140625, 0.14287567138671875, 0.149749755859375, 0.15662384033203125, 0.1634979248046875, 0.17037200927734375, 0.17724609375]}, "gradients/encoder.encoder.layers.3.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0, 0.0, 3.0, 2.0, 1.0, 4.0, 4.0, 8.0, 10.0, 24.0, 41.0, 61.0, 115.0, 232.0, 270.0, 120.0, 47.0, 24.0, 15.0, 11.0, 7.0, 1.0, 2.0, 1.0, 3.0, 1.0, 3.0, 0.0, 1.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-0.8528099656105042, -0.82133948802948, -0.7898690104484558, -0.7583985328674316, -0.7269279956817627, -0.6954575181007385, -0.6639870405197144, -0.6325165629386902, -0.601046085357666, -0.5695756077766418, -0.5381051301956177, -0.5066345930099487, -0.47516411542892456, -0.4436936378479004, -0.4122231602668762, -0.38075268268585205, -0.3492821455001831, -0.31781166791915894, -0.2863411605358124, -0.2548706829547882, -0.22340019047260284, -0.19192969799041748, -0.1604592204093933, -0.12898872792720795, -0.09751823544502258, -0.06604774296283722, -0.03457725793123245, -0.0031067728996276855, 0.028363719582557678, 0.05983421206474304, 0.09130468964576721, 0.12277518212795258, 0.15424573421478271, 0.18571622669696808, 0.21718671917915344, 0.2486571967601776, 0.28012770414352417, 0.31159818172454834, 0.3430686593055725, 0.3745391368865967, 0.40600964426994324, 0.4374801218509674, 0.46895062923431396, 0.5004211068153381, 0.5318915843963623, 0.5633621215820312, 0.5948325395584106, 0.6263030767440796, 0.6577735543251038, 0.6892440319061279, 0.7207145094871521, 0.7521849870681763, 0.7836555242538452, 0.8151260018348694, 0.8465964794158936, 0.8780669569969177, 0.9095374345779419, 0.9410079121589661, 0.9724783897399902, 1.0039489269256592, 1.0354193449020386, 1.0668898820877075, 1.098360300064087, 1.1298308372497559, 1.1613013744354248]}, "gradients/encoder.encoder.layers.3.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 2.0, 6.0, 6.0, 7.0, 5.0, 10.0, 7.0, 3.0, 9.0, 18.0, 18.0, 16.0, 20.0, 26.0, 25.0, 33.0, 39.0, 29.0, 43.0, 46.0, 69.0, 52.0, 46.0, 39.0, 34.0, 40.0, 57.0, 46.0, 35.0, 41.0, 30.0, 24.0, 24.0, 25.0, 21.0, 16.0, 14.0, 6.0, 10.0, 2.0, 3.0, 4.0, 4.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0], "bins": [-0.5870083570480347, -0.5686197280883789, -0.5502310991287231, -0.5318424105644226, -0.5134537816047668, -0.4950651526451111, -0.47667649388313293, -0.4582878351211548, -0.439899206161499, -0.42151057720184326, -0.4031219184398651, -0.38473325967788696, -0.3663446307182312, -0.34795600175857544, -0.3295673429965973, -0.31117868423461914, -0.2927900552749634, -0.2744014263153076, -0.25601276755332947, -0.2376241236925125, -0.21923547983169556, -0.2008468359708786, -0.18245819211006165, -0.1640695482492447, -0.14568090438842773, -0.12729226052761078, -0.10890361666679382, -0.09051497280597687, -0.07212632894515991, -0.05373768508434296, -0.035349041223526, -0.016960397362709045, 0.0014282464981079102, 0.019816890358924866, 0.03820553421974182, 0.05659417808055878, 0.07498282194137573, 0.09337146580219269, 0.11176010966300964, 0.1301487535238266, 0.14853739738464355, 0.1669260412454605, 0.18531468510627747, 0.20370332896709442, 0.22209197282791138, 0.24048061668872833, 0.2588692605495453, 0.27725791931152344, 0.2956465482711792, 0.31403517723083496, 0.3324238359928131, 0.35081249475479126, 0.369201123714447, 0.3875897526741028, 0.40597841143608093, 0.4243670701980591, 0.44275569915771484, 0.4611443281173706, 0.47953298687934875, 0.4979216456413269, 0.5163102746009827, 0.5346989035606384, 0.553087592124939, 0.5714762210845947, 0.5898648500442505]}, "gradients/encoder.encoder.layers.3.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 7.0, 2.0, 5.0, 2.0, 5.0, 6.0, 12.0, 15.0, 9.0, 33.0, 29.0, 50.0, 103.0, 169.0, 364.0, 819.0, 2609.0, 15744.0, 385372.0, 620514.0, 18009.0, 3022.0, 830.0, 374.0, 173.0, 72.0, 67.0, 48.0, 28.0, 24.0, 16.0, 10.0, 4.0, 7.0, 6.0, 3.0, 1.0, 2.0, 3.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.73291015625, -0.7119903564453125, -0.691070556640625, -0.6701507568359375, -0.64923095703125, -0.6283111572265625, -0.607391357421875, -0.5864715576171875, -0.5655517578125, -0.5446319580078125, -0.523712158203125, -0.5027923583984375, -0.48187255859375, -0.4609527587890625, -0.440032958984375, -0.4191131591796875, -0.398193359375, -0.3772735595703125, -0.356353759765625, -0.3354339599609375, -0.31451416015625, -0.2935943603515625, -0.272674560546875, -0.2517547607421875, -0.2308349609375, -0.2099151611328125, -0.188995361328125, -0.1680755615234375, -0.14715576171875, -0.1262359619140625, -0.105316162109375, -0.0843963623046875, -0.0634765625, -0.0425567626953125, -0.021636962890625, -0.0007171630859375, 0.02020263671875, 0.0411224365234375, 0.062042236328125, 0.0829620361328125, 0.1038818359375, 0.1248016357421875, 0.145721435546875, 0.1666412353515625, 0.18756103515625, 0.2084808349609375, 0.229400634765625, 0.2503204345703125, 0.271240234375, 0.2921600341796875, 0.313079833984375, 0.3339996337890625, 0.35491943359375, 0.3758392333984375, 0.396759033203125, 0.4176788330078125, 0.4385986328125, 0.4595184326171875, 0.480438232421875, 0.5013580322265625, 0.52227783203125, 0.5431976318359375, 0.564117431640625, 0.5850372314453125, 0.60595703125]}, "gradients/encoder.encoder.layers.3.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 5.0, 2.0, 0.0, 2.0, 7.0, 7.0, 29.0, 45.0, 97.0, 133.0, 175.0, 187.0, 135.0, 85.0, 46.0, 36.0, 13.0, 4.0, 3.0, 3.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.241943359375, -0.23574447631835938, -0.22954559326171875, -0.22334671020507812, -0.2171478271484375, -0.21094894409179688, -0.20475006103515625, -0.19855117797851562, -0.192352294921875, -0.18615341186523438, -0.17995452880859375, -0.17375564575195312, -0.1675567626953125, -0.16135787963867188, -0.15515899658203125, -0.14896011352539062, -0.14276123046875, -0.13656234741210938, -0.13036346435546875, -0.12416458129882812, -0.1179656982421875, -0.11176681518554688, -0.10556793212890625, -0.09936904907226562, -0.093170166015625, -0.08697128295898438, -0.08077239990234375, -0.07457351684570312, -0.0683746337890625, -0.062175750732421875, -0.05597686767578125, -0.049777984619140625, -0.0435791015625, -0.037380218505859375, -0.03118133544921875, -0.024982452392578125, -0.0187835693359375, -0.012584686279296875, -0.00638580322265625, -0.000186920166015625, 0.006011962890625, 0.012210845947265625, 0.01840972900390625, 0.024608612060546875, 0.0308074951171875, 0.037006378173828125, 0.04320526123046875, 0.049404144287109375, 0.05560302734375, 0.061801910400390625, 0.06800079345703125, 0.07419967651367188, 0.0803985595703125, 0.08659744262695312, 0.09279632568359375, 0.09899520874023438, 0.105194091796875, 0.11139297485351562, 0.11759185791015625, 0.12379074096679688, 0.1299896240234375, 0.13618850708007812, 0.14238739013671875, 0.14858627319335938, 0.15478515625]}, "gradients/encoder.encoder.layers.3.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 3.0, 1.0, 5.0, 2.0, 6.0, 6.0, 6.0, 7.0, 16.0, 21.0, 21.0, 43.0, 52.0, 84.0, 119.0, 192.0, 311.0, 513.0, 874.0, 1659.0, 3073.0, 6239.0, 14297.0, 36716.0, 118907.0, 641299.0, 149324.0, 43393.0, 16555.0, 7047.0, 3477.0, 1786.0, 1002.0, 533.0, 333.0, 230.0, 124.0, 93.0, 55.0, 48.0, 26.0, 21.0, 13.0, 9.0, 8.0, 6.0, 5.0, 3.0, 2.0, 0.0, 1.0, 3.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0], "bins": [-0.1834716796875, -0.17766189575195312, -0.17185211181640625, -0.16604232788085938, -0.1602325439453125, -0.15442276000976562, -0.14861297607421875, -0.14280319213867188, -0.136993408203125, -0.13118362426757812, -0.12537384033203125, -0.11956405639648438, -0.1137542724609375, -0.10794448852539062, -0.10213470458984375, -0.09632492065429688, -0.09051513671875, -0.08470535278320312, -0.07889556884765625, -0.07308578491210938, -0.0672760009765625, -0.061466217041015625, -0.05565643310546875, -0.049846649169921875, -0.044036865234375, -0.038227081298828125, -0.03241729736328125, -0.026607513427734375, -0.0207977294921875, -0.014987945556640625, -0.00917816162109375, -0.003368377685546875, 0.00244140625, 0.008251190185546875, 0.01406097412109375, 0.019870758056640625, 0.0256805419921875, 0.031490325927734375, 0.03730010986328125, 0.043109893798828125, 0.048919677734375, 0.054729461669921875, 0.06053924560546875, 0.06634902954101562, 0.0721588134765625, 0.07796859741210938, 0.08377838134765625, 0.08958816528320312, 0.09539794921875, 0.10120773315429688, 0.10701751708984375, 0.11282730102539062, 0.1186370849609375, 0.12444686889648438, 0.13025665283203125, 0.13606643676757812, 0.141876220703125, 0.14768600463867188, 0.15349578857421875, 0.15930557250976562, 0.1651153564453125, 0.17092514038085938, 0.17673492431640625, 0.18254470825195312, 0.1883544921875]}, "gradients/encoder.encoder.layers.3.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 3.0, 1.0, 2.0, 2.0, 3.0, 2.0, 5.0, 8.0, 8.0, 6.0, 14.0, 5.0, 21.0, 14.0, 18.0, 31.0, 29.0, 45.0, 51.0, 51.0, 31.0, 45.0, 53.0, 57.0, 65.0, 51.0, 52.0, 47.0, 55.0, 39.0, 32.0, 34.0, 29.0, 20.0, 20.0, 12.0, 5.0, 11.0, 6.0, 9.0, 6.0, 11.0, 2.0, 1.0, 1.0, 2.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.228271484375, -0.22006988525390625, -0.2118682861328125, -0.20366668701171875, -0.195465087890625, -0.18726348876953125, -0.1790618896484375, -0.17086029052734375, -0.16265869140625, -0.15445709228515625, -0.1462554931640625, -0.13805389404296875, -0.129852294921875, -0.12165069580078125, -0.1134490966796875, -0.10524749755859375, -0.0970458984375, -0.08884429931640625, -0.0806427001953125, -0.07244110107421875, -0.064239501953125, -0.05603790283203125, -0.0478363037109375, -0.03963470458984375, -0.03143310546875, -0.02323150634765625, -0.0150299072265625, -0.00682830810546875, 0.001373291015625, 0.00957489013671875, 0.0177764892578125, 0.02597808837890625, 0.0341796875, 0.04238128662109375, 0.0505828857421875, 0.05878448486328125, 0.066986083984375, 0.07518768310546875, 0.0833892822265625, 0.09159088134765625, 0.09979248046875, 0.10799407958984375, 0.1161956787109375, 0.12439727783203125, 0.132598876953125, 0.14080047607421875, 0.1490020751953125, 0.15720367431640625, 0.1654052734375, 0.17360687255859375, 0.1818084716796875, 0.19001007080078125, 0.198211669921875, 0.20641326904296875, 0.2146148681640625, 0.22281646728515625, 0.23101806640625, 0.23921966552734375, 0.2474212646484375, 0.25562286376953125, 0.263824462890625, 0.27202606201171875, 0.2802276611328125, 0.28842926025390625, 0.296630859375]}, "gradients/encoder.encoder.layers.3.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 3.0, 2.0, 4.0, 2.0, 7.0, 6.0, 6.0, 9.0, 15.0, 22.0, 25.0, 35.0, 45.0, 88.0, 129.0, 237.0, 488.0, 1013.0, 2445.0, 6979.0, 24838.0, 140466.0, 764695.0, 80838.0, 17047.0, 5331.0, 2012.0, 856.0, 395.0, 176.0, 105.0, 65.0, 52.0, 23.0, 28.0, 21.0, 10.0, 19.0, 9.0, 4.0, 3.0, 2.0, 4.0, 1.0, 1.0, 2.0, 2.0, 0.0, 0.0, 1.0, 2.0], "bins": [-0.0251617431640625, -0.024450302124023438, -0.023738861083984375, -0.023027420043945312, -0.02231597900390625, -0.021604537963867188, -0.020893096923828125, -0.020181655883789062, -0.01947021484375, -0.018758773803710938, -0.018047332763671875, -0.017335891723632812, -0.01662445068359375, -0.015913009643554688, -0.015201568603515625, -0.014490127563476562, -0.0137786865234375, -0.013067245483398438, -0.012355804443359375, -0.011644363403320312, -0.01093292236328125, -0.010221481323242188, -0.009510040283203125, -0.008798599243164062, -0.008087158203125, -0.0073757171630859375, -0.006664276123046875, -0.0059528350830078125, -0.00524139404296875, -0.0045299530029296875, -0.003818511962890625, -0.0031070709228515625, -0.0023956298828125, -0.0016841888427734375, -0.000972747802734375, -0.0002613067626953125, 0.00045013427734375, 0.0011615753173828125, 0.001873016357421875, 0.0025844573974609375, 0.0032958984375, 0.0040073394775390625, 0.004718780517578125, 0.0054302215576171875, 0.00614166259765625, 0.0068531036376953125, 0.007564544677734375, 0.008275985717773438, 0.0089874267578125, 0.009698867797851562, 0.010410308837890625, 0.011121749877929688, 0.01183319091796875, 0.012544631958007812, 0.013256072998046875, 0.013967514038085938, 0.014678955078125, 0.015390396118164062, 0.016101837158203125, 0.016813278198242188, 0.01752471923828125, 0.018236160278320312, 0.018947601318359375, 0.019659042358398438, 0.0203704833984375]}, "gradients/encoder.encoder.layers.3.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 2.0, 5.0, 3.0, 2.0, 0.0, 8.0, 4.0, 11.0, 6.0, 17.0, 39.0, 42.0, 73.0, 105.0, 114.0, 135.0, 116.0, 101.0, 64.0, 42.0, 50.0, 22.0, 15.0, 5.0, 6.0, 6.0, 8.0, 0.0, 0.0, 2.0, 1.0, 3.0, 3.0, 1.0, 0.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-9.715557098388672e-06, -9.39890742301941e-06, -9.082257747650146e-06, -8.765608072280884e-06, -8.448958396911621e-06, -8.132308721542358e-06, -7.815659046173096e-06, -7.499009370803833e-06, -7.18235969543457e-06, -6.865710020065308e-06, -6.549060344696045e-06, -6.232410669326782e-06, -5.9157609939575195e-06, -5.599111318588257e-06, -5.282461643218994e-06, -4.9658119678497314e-06, -4.649162292480469e-06, -4.332512617111206e-06, -4.015862941741943e-06, -3.6992132663726807e-06, -3.382563591003418e-06, -3.0659139156341553e-06, -2.7492642402648926e-06, -2.43261456489563e-06, -2.115964889526367e-06, -1.7993152141571045e-06, -1.4826655387878418e-06, -1.166015863418579e-06, -8.493661880493164e-07, -5.327165126800537e-07, -2.1606683731079102e-07, 1.0058283805847168e-07, 4.172325134277344e-07, 7.338821887969971e-07, 1.0505318641662598e-06, 1.3671815395355225e-06, 1.6838312149047852e-06, 2.000480890274048e-06, 2.3171305656433105e-06, 2.6337802410125732e-06, 2.950429916381836e-06, 3.2670795917510986e-06, 3.5837292671203613e-06, 3.900378942489624e-06, 4.217028617858887e-06, 4.533678293228149e-06, 4.850327968597412e-06, 5.166977643966675e-06, 5.4836273193359375e-06, 5.8002769947052e-06, 6.116926670074463e-06, 6.433576345443726e-06, 6.750226020812988e-06, 7.066875696182251e-06, 7.383525371551514e-06, 7.700175046920776e-06, 8.016824722290039e-06, 8.333474397659302e-06, 8.650124073028564e-06, 8.966773748397827e-06, 9.28342342376709e-06, 9.600073099136353e-06, 9.916722774505615e-06, 1.0233372449874878e-05, 1.055002212524414e-05]}, "gradients/encoder.encoder.layers.3.attention.q_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 3.0, 3.0, 3.0, 6.0, 6.0, 8.0, 11.0, 28.0, 33.0, 40.0, 77.0, 82.0, 145.0, 239.0, 402.0, 770.0, 1498.0, 3439.0, 9119.0, 30227.0, 177016.0, 722473.0, 74573.0, 17543.0, 5913.0, 2316.0, 1136.0, 561.0, 317.0, 213.0, 112.0, 68.0, 50.0, 52.0, 25.0, 14.0, 15.0, 7.0, 7.0, 7.0, 4.0, 5.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0226898193359375, -0.02194070816040039, -0.02119159698486328, -0.020442485809326172, -0.019693374633789062, -0.018944263458251953, -0.018195152282714844, -0.017446041107177734, -0.016696929931640625, -0.015947818756103516, -0.015198707580566406, -0.014449596405029297, -0.013700485229492188, -0.012951374053955078, -0.012202262878417969, -0.01145315170288086, -0.01070404052734375, -0.00995492935180664, -0.009205818176269531, -0.008456707000732422, -0.0077075958251953125, -0.006958484649658203, -0.006209373474121094, -0.005460262298583984, -0.004711151123046875, -0.003962039947509766, -0.0032129287719726562, -0.002463817596435547, -0.0017147064208984375, -0.0009655952453613281, -0.00021648406982421875, 0.0005326271057128906, 0.00128173828125, 0.0020308494567871094, 0.0027799606323242188, 0.003529071807861328, 0.0042781829833984375, 0.005027294158935547, 0.005776405334472656, 0.006525516510009766, 0.007274627685546875, 0.008023738861083984, 0.008772850036621094, 0.009521961212158203, 0.010271072387695312, 0.011020183563232422, 0.011769294738769531, 0.01251840591430664, 0.01326751708984375, 0.01401662826538086, 0.014765739440917969, 0.015514850616455078, 0.016263961791992188, 0.017013072967529297, 0.017762184143066406, 0.018511295318603516, 0.019260406494140625, 0.020009517669677734, 0.020758628845214844, 0.021507740020751953, 0.022256851196289062, 0.023005962371826172, 0.02375507354736328, 0.02450418472290039, 0.0252532958984375]}, "gradients/encoder.encoder.layers.3.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 2.0, 0.0, 1.0, 7.0, 7.0, 11.0, 11.0, 14.0, 7.0, 23.0, 29.0, 48.0, 65.0, 82.0, 122.0, 128.0, 119.0, 97.0, 65.0, 45.0, 32.0, 25.0, 18.0, 21.0, 9.0, 7.0, 6.0, 4.0, 3.0, 1.0, 3.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.037078857421875, -0.03586912155151367, -0.034659385681152344, -0.033449649810791016, -0.03223991394042969, -0.03103017807006836, -0.02982044219970703, -0.028610706329345703, -0.027400970458984375, -0.026191234588623047, -0.02498149871826172, -0.02377176284790039, -0.022562026977539062, -0.021352291107177734, -0.020142555236816406, -0.018932819366455078, -0.01772308349609375, -0.016513347625732422, -0.015303611755371094, -0.014093875885009766, -0.012884140014648438, -0.01167440414428711, -0.010464668273925781, -0.009254932403564453, -0.008045196533203125, -0.006835460662841797, -0.005625724792480469, -0.004415988922119141, -0.0032062530517578125, -0.0019965171813964844, -0.0007867813110351562, 0.0004229545593261719, 0.0016326904296875, 0.002842426300048828, 0.004052162170410156, 0.005261898040771484, 0.0064716339111328125, 0.007681369781494141, 0.008891105651855469, 0.010100841522216797, 0.011310577392578125, 0.012520313262939453, 0.013730049133300781, 0.01493978500366211, 0.016149520874023438, 0.017359256744384766, 0.018568992614746094, 0.019778728485107422, 0.02098846435546875, 0.022198200225830078, 0.023407936096191406, 0.024617671966552734, 0.025827407836914062, 0.02703714370727539, 0.02824687957763672, 0.029456615447998047, 0.030666351318359375, 0.0318760871887207, 0.03308582305908203, 0.03429555892944336, 0.03550529479980469, 0.036715030670166016, 0.037924766540527344, 0.03913450241088867, 0.04034423828125]}, "gradients/encoder.encoder.layers.3.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 1.0, 3.0, 7.0, 3.0, 11.0, 5.0, 16.0, 18.0, 38.0, 52.0, 102.0, 215.0, 306.0, 129.0, 49.0, 22.0, 14.0, 4.0, 7.0, 2.0, 1.0, 3.0, 3.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.6585755348205566, -1.6139850616455078, -1.569394588470459, -1.5248041152954102, -1.4802136421203613, -1.435623288154602, -1.3910328149795532, -1.3464423418045044, -1.3018518686294556, -1.2572613954544067, -1.212670922279358, -1.168080449104309, -1.1234900951385498, -1.078899621963501, -1.0343091487884521, -0.9897186756134033, -0.9451282024383545, -0.9005377292633057, -0.8559472560882568, -0.8113568425178528, -0.766766369342804, -0.7221758961677551, -0.6775854825973511, -0.6329950094223022, -0.5884045362472534, -0.5438140630722046, -0.49922361969947815, -0.4546331763267517, -0.4100427031517029, -0.36545222997665405, -0.3208617866039276, -0.27627134323120117, -0.23168087005615234, -0.1870904117822647, -0.14249995350837708, -0.09790949523448944, -0.05331903696060181, -0.008728578686714172, 0.03586187958717346, 0.0804523229598999, 0.12504279613494873, 0.16963325440883636, 0.214223712682724, 0.25881415605545044, 0.30340462923049927, 0.3479951024055481, 0.39258554577827454, 0.437175989151001, 0.4817664623260498, 0.5263569355010986, 0.5709474086761475, 0.6155378222465515, 0.6601282954216003, 0.7047187685966492, 0.7493091821670532, 0.793899655342102, 0.8384901285171509, 0.8830806016921997, 0.9276710748672485, 0.9722614884376526, 1.0168519020080566, 1.0614423751831055, 1.1060328483581543, 1.1506233215332031, 1.195213794708252]}, "gradients/encoder.encoder.layers.3.layer_norm.bias": {"_type": "histogram", "values": [2.0, 0.0, 2.0, 1.0, 2.0, 4.0, 1.0, 5.0, 3.0, 7.0, 4.0, 6.0, 7.0, 7.0, 6.0, 15.0, 19.0, 12.0, 14.0, 17.0, 24.0, 26.0, 26.0, 31.0, 25.0, 43.0, 43.0, 52.0, 60.0, 70.0, 73.0, 59.0, 48.0, 32.0, 30.0, 27.0, 24.0, 27.0, 24.0, 19.0, 21.0, 27.0, 8.0, 10.0, 9.0, 8.0, 12.0, 4.0, 6.0, 2.0, 5.0, 2.0, 5.0, 1.0, 3.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.7394278049468994, -0.7145110368728638, -0.6895943284034729, -0.6646775603294373, -0.6397608518600464, -0.6148440837860107, -0.5899273157119751, -0.5650105476379395, -0.5400938391685486, -0.5151770710945129, -0.49026036262512207, -0.4653435945510864, -0.44042685627937317, -0.4155101180076599, -0.39059334993362427, -0.365676611661911, -0.34075987339019775, -0.3158431351184845, -0.29092639684677124, -0.2660096287727356, -0.24109289050102234, -0.21617615222930908, -0.19125939905643463, -0.16634264588356018, -0.14142590761184692, -0.11650916188955307, -0.09159241616725922, -0.06667567044496536, -0.04175892472267151, -0.016842179000377655, 0.008074566721916199, 0.03299131989479065, 0.057908058166503906, 0.08282480388879776, 0.10774154961109161, 0.13265830278396606, 0.15757504105567932, 0.18249177932739258, 0.20740853250026703, 0.23232528567314148, 0.25724202394485474, 0.282158762216568, 0.30707550048828125, 0.3319922685623169, 0.35690900683403015, 0.3818257451057434, 0.40674251317977905, 0.4316592514514923, 0.45657598972320557, 0.4814927279949188, 0.5064094662666321, 0.5313262343406677, 0.5562429428100586, 0.5811597108840942, 0.6060764789581299, 0.6309932470321655, 0.6559099555015564, 0.680826723575592, 0.7057434320449829, 0.7306602001190186, 0.7555769681930542, 0.7804936766624451, 0.8054104447364807, 0.8303271532058716, 0.8552439212799072]}, "gradients/encoder.encoder.layers.2.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 3.0, 3.0, 0.0, 2.0, 2.0, 2.0, 4.0, 3.0, 5.0, 9.0, 12.0, 7.0, 37.0, 51.0, 122.0, 224.0, 623.0, 1919.0, 10501.0, 4165591.0, 11957.0, 2110.0, 607.0, 248.0, 103.0, 48.0, 30.0, 25.0, 17.0, 7.0, 3.0, 7.0, 3.0, 4.0, 0.0, 2.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.904296875, -0.8808135986328125, -0.857330322265625, -0.8338470458984375, -0.81036376953125, -0.7868804931640625, -0.763397216796875, -0.7399139404296875, -0.7164306640625, -0.6929473876953125, -0.669464111328125, -0.6459808349609375, -0.62249755859375, -0.5990142822265625, -0.575531005859375, -0.5520477294921875, -0.528564453125, -0.5050811767578125, -0.481597900390625, -0.4581146240234375, -0.43463134765625, -0.4111480712890625, -0.387664794921875, -0.3641815185546875, -0.3406982421875, -0.3172149658203125, -0.293731689453125, -0.2702484130859375, -0.24676513671875, -0.2232818603515625, -0.199798583984375, -0.1763153076171875, -0.15283203125, -0.1293487548828125, -0.105865478515625, -0.0823822021484375, -0.05889892578125, -0.0354156494140625, -0.011932373046875, 0.0115509033203125, 0.0350341796875, 0.0585174560546875, 0.082000732421875, 0.1054840087890625, 0.12896728515625, 0.1524505615234375, 0.175933837890625, 0.1994171142578125, 0.222900390625, 0.2463836669921875, 0.269866943359375, 0.2933502197265625, 0.31683349609375, 0.3403167724609375, 0.363800048828125, 0.3872833251953125, 0.4107666015625, 0.4342498779296875, 0.457733154296875, 0.4812164306640625, 0.50469970703125, 0.5281829833984375, 0.551666259765625, 0.5751495361328125, 0.5986328125]}, "gradients/encoder.encoder.layers.2.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 4.0, 2.0, 2.0, 4.0, 7.0, 15.0, 27.0, 55.0, 84.0, 152.0, 173.0, 155.0, 145.0, 71.0, 58.0, 36.0, 13.0, 3.0, 4.0, 4.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.242431640625, -0.23620986938476562, -0.22998809814453125, -0.22376632690429688, -0.2175445556640625, -0.21132278442382812, -0.20510101318359375, -0.19887924194335938, -0.192657470703125, -0.18643569946289062, -0.18021392822265625, -0.17399215698242188, -0.1677703857421875, -0.16154861450195312, -0.15532684326171875, -0.14910507202148438, -0.14288330078125, -0.13666152954101562, -0.13043975830078125, -0.12421798706054688, -0.1179962158203125, -0.11177444458007812, -0.10555267333984375, -0.09933090209960938, -0.093109130859375, -0.08688735961914062, -0.08066558837890625, -0.07444381713867188, -0.0682220458984375, -0.062000274658203125, -0.05577850341796875, -0.049556732177734375, -0.0433349609375, -0.037113189697265625, -0.03089141845703125, -0.024669647216796875, -0.0184478759765625, -0.012226104736328125, -0.00600433349609375, 0.000217437744140625, 0.006439208984375, 0.012660980224609375, 0.01888275146484375, 0.025104522705078125, 0.0313262939453125, 0.037548065185546875, 0.04376983642578125, 0.049991607666015625, 0.05621337890625, 0.062435150146484375, 0.06865692138671875, 0.07487869262695312, 0.0811004638671875, 0.08732223510742188, 0.09354400634765625, 0.09976577758789062, 0.105987548828125, 0.11220932006835938, 0.11843109130859375, 0.12465286254882812, 0.1308746337890625, 0.13709640502929688, 0.14331817626953125, 0.14953994750976562, 0.15576171875]}, "gradients/encoder.encoder.layers.2.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 3.0, 6.0, 2.0, 6.0, 5.0, 6.0, 5.0, 14.0, 22.0, 35.0, 70.0, 180.0, 455.0, 1359.0, 3972.0, 26091.0, 4134396.0, 22299.0, 3616.0, 1137.0, 360.0, 131.0, 45.0, 26.0, 17.0, 9.0, 6.0, 1.0, 3.0, 2.0, 1.0, 1.0, 1.0, 2.0, 2.0, 1.0, 1.0, 2.0, 0.0, 2.0, 1.0, 1.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.318359375, -0.3058624267578125, -0.293365478515625, -0.2808685302734375, -0.26837158203125, -0.2558746337890625, -0.243377685546875, -0.2308807373046875, -0.2183837890625, -0.2058868408203125, -0.193389892578125, -0.1808929443359375, -0.16839599609375, -0.1558990478515625, -0.143402099609375, -0.1309051513671875, -0.118408203125, -0.1059112548828125, -0.093414306640625, -0.0809173583984375, -0.06842041015625, -0.0559234619140625, -0.043426513671875, -0.0309295654296875, -0.0184326171875, -0.0059356689453125, 0.006561279296875, 0.0190582275390625, 0.03155517578125, 0.0440521240234375, 0.056549072265625, 0.0690460205078125, 0.08154296875, 0.0940399169921875, 0.106536865234375, 0.1190338134765625, 0.13153076171875, 0.1440277099609375, 0.156524658203125, 0.1690216064453125, 0.1815185546875, 0.1940155029296875, 0.206512451171875, 0.2190093994140625, 0.23150634765625, 0.2440032958984375, 0.256500244140625, 0.2689971923828125, 0.281494140625, 0.2939910888671875, 0.306488037109375, 0.3189849853515625, 0.33148193359375, 0.3439788818359375, 0.356475830078125, 0.3689727783203125, 0.3814697265625, 0.3939666748046875, 0.406463623046875, 0.4189605712890625, 0.43145751953125, 0.4439544677734375, 0.456451416015625, 0.4689483642578125, 0.4814453125]}, "gradients/encoder.encoder.layers.2.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 5.0, 1.0, 1.0, 1.0, 0.0, 0.0, 2.0, 2.0, 1.0, 1.0, 3.0, 3.0, 12.0, 9.0, 15.0, 21.0, 36.0, 151.0, 1213.0, 2151.0, 322.0, 44.0, 24.0, 21.0, 13.0, 6.0, 4.0, 4.0, 2.0, 5.0, 4.0, 0.0, 1.0, 1.0, 3.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.28955078125, -0.2819652557373047, -0.2743797302246094, -0.26679420471191406, -0.25920867919921875, -0.25162315368652344, -0.24403762817382812, -0.2364521026611328, -0.2288665771484375, -0.2212810516357422, -0.21369552612304688, -0.20611000061035156, -0.19852447509765625, -0.19093894958496094, -0.18335342407226562, -0.1757678985595703, -0.168182373046875, -0.1605968475341797, -0.15301132202148438, -0.14542579650878906, -0.13784027099609375, -0.13025474548339844, -0.12266921997070312, -0.11508369445800781, -0.1074981689453125, -0.09991264343261719, -0.09232711791992188, -0.08474159240722656, -0.07715606689453125, -0.06957054138183594, -0.061985015869140625, -0.05439949035644531, -0.04681396484375, -0.03922843933105469, -0.031642913818359375, -0.024057388305664062, -0.01647186279296875, -0.008886337280273438, -0.001300811767578125, 0.0062847137451171875, 0.0138702392578125, 0.021455764770507812, 0.029041290283203125, 0.03662681579589844, 0.04421234130859375, 0.05179786682128906, 0.059383392333984375, 0.06696891784667969, 0.074554443359375, 0.08213996887207031, 0.08972549438476562, 0.09731101989746094, 0.10489654541015625, 0.11248207092285156, 0.12006759643554688, 0.1276531219482422, 0.1352386474609375, 0.1428241729736328, 0.15040969848632812, 0.15799522399902344, 0.16558074951171875, 0.17316627502441406, 0.18075180053710938, 0.1883373260498047, 0.1959228515625]}, "gradients/encoder.encoder.layers.2.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 8.0, 3.0, 7.0, 9.0, 14.0, 20.0, 27.0, 48.0, 85.0, 147.0, 286.0, 182.0, 87.0, 41.0, 18.0, 12.0, 10.0, 2.0, 5.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.5031228065490723, -1.467857003211975, -1.432591199874878, -1.3973253965377808, -1.3620595932006836, -1.3267937898635864, -1.2915279865264893, -1.256262183189392, -1.220996379852295, -1.1857305765151978, -1.1504647731781006, -1.1151989698410034, -1.0799331665039062, -1.044667363166809, -1.009401559829712, -0.9741357564926147, -0.9388698935508728, -0.9036040902137756, -0.8683382868766785, -0.8330724835395813, -0.7978066802024841, -0.762540876865387, -0.727275013923645, -0.6920092105865479, -0.6567434072494507, -0.6214776039123535, -0.5862118005752563, -0.5509459972381592, -0.515680193901062, -0.48041439056396484, -0.4451485574245453, -0.4098827540874481, -0.37461698055267334, -0.33935117721557617, -0.304085373878479, -0.26881957054138184, -0.23355375230312347, -0.1982879489660263, -0.16302213072776794, -0.12775632739067078, -0.09249052405357361, -0.05722471699118614, -0.021958909928798676, 0.01330690085887909, 0.04857270419597626, 0.08383850753307343, 0.11910432577133179, 0.15437012910842896, 0.18963593244552612, 0.2249017357826233, 0.26016753911972046, 0.2954333424568176, 0.3306991457939148, 0.36596494913101196, 0.4012307822704315, 0.4364965856075287, 0.47176238894462585, 0.5070282220840454, 0.5422940254211426, 0.5775598287582397, 0.6128256320953369, 0.6480914354324341, 0.6833572387695312, 0.7186230421066284, 0.7538888454437256]}, "gradients/encoder.encoder.layers.2.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 4.0, 1.0, 1.0, 1.0, 1.0, 4.0, 6.0, 4.0, 5.0, 7.0, 8.0, 17.0, 12.0, 18.0, 16.0, 25.0, 26.0, 37.0, 36.0, 41.0, 45.0, 44.0, 42.0, 48.0, 47.0, 54.0, 53.0, 54.0, 53.0, 47.0, 38.0, 39.0, 34.0, 29.0, 25.0, 20.0, 14.0, 11.0, 14.0, 3.0, 14.0, 4.0, 2.0, 1.0, 4.0, 2.0, 4.0, 0.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.6782944798469543, -0.6544259190559387, -0.6305573582649231, -0.6066887974739075, -0.5828202366828918, -0.5589516758918762, -0.5350830554962158, -0.5112144947052002, -0.48734596371650696, -0.46347740292549133, -0.4396088421344757, -0.4157402515411377, -0.39187169075012207, -0.36800312995910645, -0.3441345691680908, -0.3202660083770752, -0.29639744758605957, -0.27252888679504395, -0.24866032600402832, -0.2247917503118515, -0.20092318952083588, -0.17705462872982025, -0.15318605303764343, -0.1293174922466278, -0.10544893145561218, -0.08158037066459656, -0.057711802423000336, -0.033843234181404114, -0.009974673390388489, 0.013893887400627136, 0.037762463092803955, 0.06163102388381958, 0.08549964427947998, 0.1093682050704956, 0.13323676586151123, 0.15710534155368805, 0.18097390234470367, 0.2048424631357193, 0.22871103882789612, 0.25257959961891174, 0.27644816040992737, 0.300316721200943, 0.3241852819919586, 0.34805387258529663, 0.37192243337631226, 0.3957909941673279, 0.4196595549583435, 0.44352811574935913, 0.46739667654037476, 0.4912652373313904, 0.515133798122406, 0.5390023589134216, 0.5628709197044373, 0.5867394804954529, 0.6106081008911133, 0.6344766616821289, 0.6583452224731445, 0.6822137832641602, 0.7060823440551758, 0.7299509048461914, 0.753819465637207, 0.7776880264282227, 0.8015565872192383, 0.8254251480102539, 0.8492937088012695]}, "gradients/encoder.encoder.layers.2.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 3.0, 2.0, 3.0, 3.0, 3.0, 5.0, 14.0, 23.0, 24.0, 50.0, 96.0, 167.0, 416.0, 1303.0, 7012.0, 138790.0, 877557.0, 19494.0, 2470.0, 612.0, 247.0, 102.0, 67.0, 33.0, 29.0, 16.0, 7.0, 8.0, 3.0, 4.0, 3.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.60888671875, -0.5905532836914062, -0.5722198486328125, -0.5538864135742188, -0.535552978515625, -0.5172195434570312, -0.4988861083984375, -0.48055267333984375, -0.46221923828125, -0.44388580322265625, -0.4255523681640625, -0.40721893310546875, -0.388885498046875, -0.37055206298828125, -0.3522186279296875, -0.33388519287109375, -0.3155517578125, -0.29721832275390625, -0.2788848876953125, -0.26055145263671875, -0.242218017578125, -0.22388458251953125, -0.2055511474609375, -0.18721771240234375, -0.16888427734375, -0.15055084228515625, -0.1322174072265625, -0.11388397216796875, -0.095550537109375, -0.07721710205078125, -0.0588836669921875, -0.04055023193359375, -0.022216796875, -0.00388336181640625, 0.0144500732421875, 0.03278350830078125, 0.051116943359375, 0.06945037841796875, 0.0877838134765625, 0.10611724853515625, 0.12445068359375, 0.14278411865234375, 0.1611175537109375, 0.17945098876953125, 0.197784423828125, 0.21611785888671875, 0.2344512939453125, 0.25278472900390625, 0.2711181640625, 0.28945159912109375, 0.3077850341796875, 0.32611846923828125, 0.344451904296875, 0.36278533935546875, 0.3811187744140625, 0.39945220947265625, 0.41778564453125, 0.43611907958984375, 0.4544525146484375, 0.47278594970703125, 0.491119384765625, 0.5094528198242188, 0.5277862548828125, 0.5461196899414062, 0.564453125]}, "gradients/encoder.encoder.layers.2.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 3.0, 2.0, 3.0, 8.0, 14.0, 22.0, 28.0, 68.0, 86.0, 131.0, 157.0, 141.0, 132.0, 96.0, 54.0, 35.0, 18.0, 6.0, 6.0, 2.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.2403564453125, -0.2341766357421875, -0.227996826171875, -0.2218170166015625, -0.21563720703125, -0.2094573974609375, -0.203277587890625, -0.1970977783203125, -0.19091796875, -0.1847381591796875, -0.178558349609375, -0.1723785400390625, -0.16619873046875, -0.1600189208984375, -0.153839111328125, -0.1476593017578125, -0.1414794921875, -0.1352996826171875, -0.129119873046875, -0.1229400634765625, -0.11676025390625, -0.1105804443359375, -0.104400634765625, -0.0982208251953125, -0.092041015625, -0.0858612060546875, -0.079681396484375, -0.0735015869140625, -0.06732177734375, -0.0611419677734375, -0.054962158203125, -0.0487823486328125, -0.0426025390625, -0.0364227294921875, -0.030242919921875, -0.0240631103515625, -0.01788330078125, -0.0117034912109375, -0.005523681640625, 0.0006561279296875, 0.0068359375, 0.0130157470703125, 0.019195556640625, 0.0253753662109375, 0.03155517578125, 0.0377349853515625, 0.043914794921875, 0.0500946044921875, 0.0562744140625, 0.0624542236328125, 0.068634033203125, 0.0748138427734375, 0.08099365234375, 0.0871734619140625, 0.093353271484375, 0.0995330810546875, 0.105712890625, 0.1118927001953125, 0.118072509765625, 0.1242523193359375, 0.13043212890625, 0.1366119384765625, 0.142791748046875, 0.1489715576171875, 0.1551513671875]}, "gradients/encoder.encoder.layers.2.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 2.0, 3.0, 2.0, 5.0, 3.0, 6.0, 10.0, 10.0, 20.0, 29.0, 57.0, 79.0, 139.0, 293.0, 698.0, 1909.0, 7010.0, 37395.0, 748969.0, 222932.0, 21889.0, 4594.0, 1487.0, 504.0, 224.0, 106.0, 68.0, 45.0, 27.0, 18.0, 7.0, 5.0, 9.0, 3.0, 3.0, 3.0, 3.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.2122802734375, -0.2033824920654297, -0.19448471069335938, -0.18558692932128906, -0.17668914794921875, -0.16779136657714844, -0.15889358520507812, -0.1499958038330078, -0.1410980224609375, -0.1322002410888672, -0.12330245971679688, -0.11440467834472656, -0.10550689697265625, -0.09660911560058594, -0.08771133422851562, -0.07881355285644531, -0.069915771484375, -0.06101799011230469, -0.052120208740234375, -0.04322242736816406, -0.03432464599609375, -0.025426864624023438, -0.016529083251953125, -0.0076313018798828125, 0.0012664794921875, 0.010164260864257812, 0.019062042236328125, 0.027959823608398438, 0.03685760498046875, 0.04575538635253906, 0.054653167724609375, 0.06355094909667969, 0.07244873046875, 0.08134651184082031, 0.09024429321289062, 0.09914207458496094, 0.10803985595703125, 0.11693763732910156, 0.12583541870117188, 0.1347332000732422, 0.1436309814453125, 0.1525287628173828, 0.16142654418945312, 0.17032432556152344, 0.17922210693359375, 0.18811988830566406, 0.19701766967773438, 0.2059154510498047, 0.214813232421875, 0.2237110137939453, 0.23260879516601562, 0.24150657653808594, 0.25040435791015625, 0.25930213928222656, 0.2681999206542969, 0.2770977020263672, 0.2859954833984375, 0.2948932647705078, 0.3037910461425781, 0.31268882751464844, 0.32158660888671875, 0.33048439025878906, 0.3393821716308594, 0.3482799530029297, 0.357177734375]}, "gradients/encoder.encoder.layers.2.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 3.0, 0.0, 2.0, 3.0, 3.0, 5.0, 13.0, 9.0, 14.0, 22.0, 29.0, 30.0, 60.0, 57.0, 59.0, 71.0, 82.0, 86.0, 98.0, 79.0, 63.0, 44.0, 48.0, 41.0, 31.0, 27.0, 16.0, 7.0, 7.0, 3.0, 0.0, 2.0, 1.0, 3.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.272216796875, -0.2583045959472656, -0.24439239501953125, -0.23048019409179688, -0.2165679931640625, -0.20265579223632812, -0.18874359130859375, -0.17483139038085938, -0.160919189453125, -0.14700698852539062, -0.13309478759765625, -0.11918258666992188, -0.1052703857421875, -0.09135818481445312, -0.07744598388671875, -0.06353378295898438, -0.04962158203125, -0.035709381103515625, -0.02179718017578125, -0.007884979248046875, 0.0060272216796875, 0.019939422607421875, 0.03385162353515625, 0.047763824462890625, 0.061676025390625, 0.07558822631835938, 0.08950042724609375, 0.10341262817382812, 0.1173248291015625, 0.13123703002929688, 0.14514923095703125, 0.15906143188476562, 0.1729736328125, 0.18688583374023438, 0.20079803466796875, 0.21471023559570312, 0.2286224365234375, 0.24253463745117188, 0.25644683837890625, 0.2703590393066406, 0.284271240234375, 0.2981834411621094, 0.31209564208984375, 0.3260078430175781, 0.3399200439453125, 0.3538322448730469, 0.36774444580078125, 0.3816566467285156, 0.39556884765625, 0.4094810485839844, 0.42339324951171875, 0.4373054504394531, 0.4512176513671875, 0.4651298522949219, 0.47904205322265625, 0.4929542541503906, 0.506866455078125, 0.5207786560058594, 0.5346908569335938, 0.5486030578613281, 0.5625152587890625, 0.5764274597167969, 0.5903396606445312, 0.6042518615722656, 0.6181640625]}, "gradients/encoder.encoder.layers.2.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 2.0, 0.0, 0.0, 3.0, 0.0, 2.0, 0.0, 2.0, 0.0, 4.0, 3.0, 8.0, 3.0, 12.0, 15.0, 27.0, 32.0, 33.0, 66.0, 109.0, 222.0, 464.0, 1291.0, 4257.0, 25169.0, 865003.0, 137021.0, 10864.0, 2448.0, 732.0, 317.0, 179.0, 91.0, 57.0, 31.0, 23.0, 17.0, 13.0, 13.0, 8.0, 5.0, 3.0, 6.0, 1.0, 2.0, 4.0, 1.0, 3.0, 0.0, 1.0, 0.0, 2.0, 0.0, 2.0], "bins": [-0.0545654296875, -0.053040504455566406, -0.05151557922363281, -0.04999065399169922, -0.048465728759765625, -0.04694080352783203, -0.04541587829589844, -0.043890953063964844, -0.04236602783203125, -0.040841102600097656, -0.03931617736816406, -0.03779125213623047, -0.036266326904296875, -0.03474140167236328, -0.03321647644042969, -0.031691551208496094, -0.0301666259765625, -0.028641700744628906, -0.027116775512695312, -0.02559185028076172, -0.024066925048828125, -0.02254199981689453, -0.021017074584960938, -0.019492149353027344, -0.01796722412109375, -0.016442298889160156, -0.014917373657226562, -0.013392448425292969, -0.011867523193359375, -0.010342597961425781, -0.008817672729492188, -0.007292747497558594, -0.005767822265625, -0.004242897033691406, -0.0027179718017578125, -0.0011930465698242188, 0.000331878662109375, 0.0018568038940429688, 0.0033817291259765625, 0.004906654357910156, 0.00643157958984375, 0.007956504821777344, 0.009481430053710938, 0.011006355285644531, 0.012531280517578125, 0.014056205749511719, 0.015581130981445312, 0.017106056213378906, 0.0186309814453125, 0.020155906677246094, 0.021680831909179688, 0.02320575714111328, 0.024730682373046875, 0.02625560760498047, 0.027780532836914062, 0.029305458068847656, 0.03083038330078125, 0.032355308532714844, 0.03388023376464844, 0.03540515899658203, 0.036930084228515625, 0.03845500946044922, 0.03997993469238281, 0.041504859924316406, 0.04302978515625]}, "gradients/encoder.encoder.layers.2.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 2.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 4.0, 2.0, 5.0, 4.0, 11.0, 19.0, 40.0, 64.0, 107.0, 177.0, 191.0, 157.0, 76.0, 63.0, 32.0, 9.0, 9.0, 10.0, 7.0, 5.0, 5.0, 0.0, 2.0, 0.0, 0.0, 0.0, 4.0, 0.0, 3.0, 0.0, 1.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-1.5676021575927734e-05, -1.5204772353172302e-05, -1.473352313041687e-05, -1.4262273907661438e-05, -1.3791024684906006e-05, -1.3319775462150574e-05, -1.2848526239395142e-05, -1.237727701663971e-05, -1.1906027793884277e-05, -1.1434778571128845e-05, -1.0963529348373413e-05, -1.0492280125617981e-05, -1.0021030902862549e-05, -9.549781680107117e-06, -9.078532457351685e-06, -8.607283234596252e-06, -8.13603401184082e-06, -7.664784789085388e-06, -7.193535566329956e-06, -6.722286343574524e-06, -6.251037120819092e-06, -5.77978789806366e-06, -5.3085386753082275e-06, -4.837289452552795e-06, -4.366040229797363e-06, -3.894791007041931e-06, -3.423541784286499e-06, -2.952292561531067e-06, -2.4810433387756348e-06, -2.0097941160202026e-06, -1.5385448932647705e-06, -1.0672956705093384e-06, -5.960464477539062e-07, -1.2479722499847412e-07, 3.46451997756958e-07, 8.177012205123901e-07, 1.2889504432678223e-06, 1.7601996660232544e-06, 2.2314488887786865e-06, 2.7026981115341187e-06, 3.1739473342895508e-06, 3.645196557044983e-06, 4.116445779800415e-06, 4.587695002555847e-06, 5.058944225311279e-06, 5.5301934480667114e-06, 6.0014426708221436e-06, 6.472691893577576e-06, 6.943941116333008e-06, 7.41519033908844e-06, 7.886439561843872e-06, 8.357688784599304e-06, 8.828938007354736e-06, 9.300187230110168e-06, 9.7714364528656e-06, 1.0242685675621033e-05, 1.0713934898376465e-05, 1.1185184121131897e-05, 1.1656433343887329e-05, 1.2127682566642761e-05, 1.2598931789398193e-05, 1.3070181012153625e-05, 1.3541430234909058e-05, 1.401267945766449e-05, 1.4483928680419922e-05]}, "gradients/encoder.encoder.layers.2.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 3.0, 4.0, 0.0, 2.0, 8.0, 12.0, 13.0, 24.0, 51.0, 99.0, 161.0, 413.0, 993.0, 3713.0, 29439.0, 962166.0, 44902.0, 4476.0, 1176.0, 464.0, 204.0, 111.0, 53.0, 28.0, 20.0, 12.0, 6.0, 4.0, 5.0, 4.0, 2.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0665283203125, -0.06442070007324219, -0.062313079833984375, -0.06020545959472656, -0.05809783935546875, -0.05599021911621094, -0.053882598876953125, -0.05177497863769531, -0.0496673583984375, -0.04755973815917969, -0.045452117919921875, -0.04334449768066406, -0.04123687744140625, -0.03912925720214844, -0.037021636962890625, -0.03491401672363281, -0.032806396484375, -0.030698776245117188, -0.028591156005859375, -0.026483535766601562, -0.02437591552734375, -0.022268295288085938, -0.020160675048828125, -0.018053054809570312, -0.0159454345703125, -0.013837814331054688, -0.011730194091796875, -0.009622573852539062, -0.00751495361328125, -0.0054073333740234375, -0.003299713134765625, -0.0011920928955078125, 0.00091552734375, 0.0030231475830078125, 0.005130767822265625, 0.0072383880615234375, 0.00934600830078125, 0.011453628540039062, 0.013561248779296875, 0.015668869018554688, 0.0177764892578125, 0.019884109497070312, 0.021991729736328125, 0.024099349975585938, 0.02620697021484375, 0.028314590454101562, 0.030422210693359375, 0.03252983093261719, 0.034637451171875, 0.03674507141113281, 0.038852691650390625, 0.04096031188964844, 0.04306793212890625, 0.04517555236816406, 0.047283172607421875, 0.04939079284667969, 0.0514984130859375, 0.05360603332519531, 0.055713653564453125, 0.05782127380371094, 0.05992889404296875, 0.06203651428222656, 0.06414413452148438, 0.06625175476074219, 0.068359375]}, "gradients/encoder.encoder.layers.2.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 0.0, 1.0, 7.0, 5.0, 5.0, 10.0, 16.0, 25.0, 36.0, 66.0, 128.0, 170.0, 223.0, 121.0, 83.0, 35.0, 15.0, 21.0, 11.0, 6.0, 4.0, 7.0, 6.0, 6.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.07733154296875, -0.07467460632324219, -0.07201766967773438, -0.06936073303222656, -0.06670379638671875, -0.06404685974121094, -0.061389923095703125, -0.05873298645019531, -0.0560760498046875, -0.05341911315917969, -0.050762176513671875, -0.04810523986816406, -0.04544830322265625, -0.04279136657714844, -0.040134429931640625, -0.03747749328613281, -0.034820556640625, -0.03216361999511719, -0.029506683349609375, -0.026849746704101562, -0.02419281005859375, -0.021535873413085938, -0.018878936767578125, -0.016222000122070312, -0.0135650634765625, -0.010908126831054688, -0.008251190185546875, -0.0055942535400390625, -0.00293731689453125, -0.0002803802490234375, 0.002376556396484375, 0.0050334930419921875, 0.0076904296875, 0.010347366333007812, 0.013004302978515625, 0.015661239624023438, 0.01831817626953125, 0.020975112915039062, 0.023632049560546875, 0.026288986206054688, 0.0289459228515625, 0.03160285949707031, 0.034259796142578125, 0.03691673278808594, 0.03957366943359375, 0.04223060607910156, 0.044887542724609375, 0.04754447937011719, 0.050201416015625, 0.05285835266113281, 0.055515289306640625, 0.05817222595214844, 0.06082916259765625, 0.06348609924316406, 0.06614303588867188, 0.06879997253417969, 0.0714569091796875, 0.07411384582519531, 0.07677078247070312, 0.07942771911621094, 0.08208465576171875, 0.08474159240722656, 0.08739852905273438, 0.09005546569824219, 0.09271240234375]}, "gradients/encoder.encoder.layers.2.layer_norm.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 2.0, 3.0, 2.0, 3.0, 7.0, 10.0, 15.0, 23.0, 31.0, 74.0, 133.0, 402.0, 197.0, 60.0, 32.0, 7.0, 9.0, 1.0, 4.0], "bins": [-2.896899700164795, -2.8458609580993652, -2.7948222160339355, -2.743783473968506, -2.692744731903076, -2.6417059898376465, -2.5906670093536377, -2.539628267288208, -2.4885895252227783, -2.4375507831573486, -2.386512041091919, -2.3354732990264893, -2.2844345569610596, -2.233395576477051, -2.182356834411621, -2.1313180923461914, -2.0802793502807617, -2.029240608215332, -1.9782018661499023, -1.927163004875183, -1.8761242628097534, -1.8250855207443237, -1.774046778678894, -1.7230079174041748, -1.6719692945480347, -1.620930552482605, -1.5698918104171753, -1.518852949142456, -1.4678142070770264, -1.4167754650115967, -1.365736722946167, -1.3146979808807373, -1.2636590003967285, -1.2126202583312988, -1.1615815162658691, -1.11054265499115, -1.0595039129257202, -1.0084651708602905, -0.9574264287948608, -0.9063876271247864, -0.8553489446640015, -0.8043102025985718, -0.7532714009284973, -0.7022326588630676, -0.6511938571929932, -0.6001551151275635, -0.5491163730621338, -0.4980775713920593, -0.44703876972198486, -0.3959999978542328, -0.3449612259864807, -0.293922483921051, -0.24288369715213776, -0.19184494018554688, -0.1408061683177948, -0.08976739645004272, -0.03872862458229065, 0.012310143560171127, 0.0633489117026329, 0.11438767611980438, 0.16542644798755646, 0.21646520495414734, 0.2675039768218994, 0.3185427486896515, 0.36958152055740356]}, "gradients/encoder.encoder.layers.2.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 1.0, 3.0, 7.0, 3.0, 9.0, 7.0, 3.0, 5.0, 8.0, 13.0, 8.0, 16.0, 14.0, 24.0, 24.0, 24.0, 14.0, 30.0, 40.0, 41.0, 30.0, 49.0, 63.0, 95.0, 86.0, 66.0, 49.0, 31.0, 24.0, 26.0, 35.0, 20.0, 16.0, 20.0, 21.0, 16.0, 13.0, 15.0, 17.0, 2.0, 7.0, 2.0, 2.0, 5.0, 4.0, 1.0, 3.0, 0.0, 2.0, 0.0, 2.0, 0.0, 3.0], "bins": [-1.1420395374298096, -1.1092491149902344, -1.0764586925506592, -1.0436683893203735, -1.0108779668807983, -0.9780875444412231, -0.945297122001648, -0.9125066995620728, -0.8797163367271423, -0.8469259142875671, -0.8141355514526367, -0.7813451290130615, -0.7485547065734863, -0.7157643437385559, -0.6829739212989807, -0.6501835584640503, -0.6173931360244751, -0.5846027135848999, -0.5518123507499695, -0.5190219283103943, -0.4862315356731415, -0.45344114303588867, -0.4206507205963135, -0.38786032795906067, -0.35506993532180786, -0.32227954268455505, -0.28948915004730225, -0.25669872760772705, -0.22390833497047424, -0.19111794233322144, -0.15832753479480743, -0.12553712725639343, -0.09274661540985107, -0.05995621532201767, -0.027165815234184265, 0.005624584853649139, 0.038414984941482544, 0.07120537757873535, 0.10399578511714935, 0.13678619265556335, 0.16957658529281616, 0.20236697793006897, 0.23515738546848297, 0.267947793006897, 0.3007381856441498, 0.3335285782814026, 0.3663190007209778, 0.3991093933582306, 0.4318997859954834, 0.4646901786327362, 0.497480571269989, 0.5302709937095642, 0.5630613565444946, 0.5958517789840698, 0.628642201423645, 0.6614326238632202, 0.6942229866981506, 0.7270134091377258, 0.7598037719726562, 0.7925941944122314, 0.8253846168518066, 0.8581749796867371, 0.8909654021263123, 0.9237557649612427, 0.9565461874008179]}, "gradients/encoder.encoder.layers.1.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 2.0, 5.0, 7.0, 8.0, 18.0, 23.0, 39.0, 47.0, 108.0, 426.0, 4828.0, 1274704.0, 2907100.0, 5058.0, 1475.0, 283.0, 68.0, 38.0, 30.0, 12.0, 8.0, 4.0, 2.0, 1.0, 3.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.3017578125, -0.29403114318847656, -0.2863044738769531, -0.2785778045654297, -0.27085113525390625, -0.2631244659423828, -0.2553977966308594, -0.24767112731933594, -0.2399444580078125, -0.23221778869628906, -0.22449111938476562, -0.2167644500732422, -0.20903778076171875, -0.2013111114501953, -0.19358444213867188, -0.18585777282714844, -0.178131103515625, -0.17040443420410156, -0.16267776489257812, -0.1549510955810547, -0.14722442626953125, -0.1394977569580078, -0.13177108764648438, -0.12404441833496094, -0.1163177490234375, -0.10859107971191406, -0.10086441040039062, -0.09313774108886719, -0.08541107177734375, -0.07768440246582031, -0.06995773315429688, -0.06223106384277344, -0.05450439453125, -0.04677772521972656, -0.039051055908203125, -0.03132438659667969, -0.02359771728515625, -0.015871047973632812, -0.008144378662109375, -0.0004177093505859375, 0.0073089599609375, 0.015035629272460938, 0.022762298583984375, 0.030488967895507812, 0.03821563720703125, 0.04594230651855469, 0.053668975830078125, 0.06139564514160156, 0.069122314453125, 0.07684898376464844, 0.08457565307617188, 0.09230232238769531, 0.10002899169921875, 0.10775566101074219, 0.11548233032226562, 0.12320899963378906, 0.1309356689453125, 0.13866233825683594, 0.14638900756835938, 0.1541156768798828, 0.16184234619140625, 0.1695690155029297, 0.17729568481445312, 0.18502235412597656, 0.1927490234375]}, "gradients/encoder.encoder.layers.1.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 5.0, 4.0, 8.0, 16.0, 29.0, 36.0, 61.0, 90.0, 122.0, 154.0, 139.0, 112.0, 86.0, 59.0, 38.0, 30.0, 8.0, 9.0, 5.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.24267578125, -0.23643875122070312, -0.23020172119140625, -0.22396469116210938, -0.2177276611328125, -0.21149063110351562, -0.20525360107421875, -0.19901657104492188, -0.192779541015625, -0.18654251098632812, -0.18030548095703125, -0.17406845092773438, -0.1678314208984375, -0.16159439086914062, -0.15535736083984375, -0.14912033081054688, -0.14288330078125, -0.13664627075195312, -0.13040924072265625, -0.12417221069335938, -0.1179351806640625, -0.11169815063476562, -0.10546112060546875, -0.09922409057617188, -0.092987060546875, -0.08675003051757812, -0.08051300048828125, -0.07427597045898438, -0.0680389404296875, -0.061801910400390625, -0.05556488037109375, -0.049327850341796875, -0.0430908203125, -0.036853790283203125, -0.03061676025390625, -0.024379730224609375, -0.0181427001953125, -0.011905670166015625, -0.00566864013671875, 0.000568389892578125, 0.006805419921875, 0.013042449951171875, 0.01927947998046875, 0.025516510009765625, 0.0317535400390625, 0.037990570068359375, 0.04422760009765625, 0.050464630126953125, 0.05670166015625, 0.06293869018554688, 0.06917572021484375, 0.07541275024414062, 0.0816497802734375, 0.08788681030273438, 0.09412384033203125, 0.10036087036132812, 0.106597900390625, 0.11283493041992188, 0.11907196044921875, 0.12530899047851562, 0.1315460205078125, 0.13778305053710938, 0.14402008056640625, 0.15025711059570312, 0.156494140625]}, "gradients/encoder.encoder.layers.1.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 3.0, 1.0, 0.0, 2.0, 1.0, 3.0, 5.0, 3.0, 4.0, 6.0, 8.0, 14.0, 20.0, 25.0, 44.0, 104.0, 225.0, 616.0, 2017.0, 8136.0, 144641.0, 4000616.0, 31316.0, 4514.0, 1199.0, 447.0, 133.0, 66.0, 48.0, 19.0, 11.0, 8.0, 10.0, 6.0, 5.0, 4.0, 3.0, 1.0, 1.0, 0.0, 2.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.129638671875, -0.12536239624023438, -0.12108612060546875, -0.11680984497070312, -0.1125335693359375, -0.10825729370117188, -0.10398101806640625, -0.09970474243164062, -0.095428466796875, -0.09115219116210938, -0.08687591552734375, -0.08259963989257812, -0.0783233642578125, -0.07404708862304688, -0.06977081298828125, -0.06549453735351562, -0.06121826171875, -0.056941986083984375, -0.05266571044921875, -0.048389434814453125, -0.0441131591796875, -0.039836883544921875, -0.03556060791015625, -0.031284332275390625, -0.027008056640625, -0.022731781005859375, -0.01845550537109375, -0.014179229736328125, -0.0099029541015625, -0.005626678466796875, -0.00135040283203125, 0.002925872802734375, 0.0072021484375, 0.011478424072265625, 0.01575469970703125, 0.020030975341796875, 0.0243072509765625, 0.028583526611328125, 0.03285980224609375, 0.037136077880859375, 0.041412353515625, 0.045688629150390625, 0.04996490478515625, 0.054241180419921875, 0.0585174560546875, 0.06279373168945312, 0.06707000732421875, 0.07134628295898438, 0.07562255859375, 0.07989883422851562, 0.08417510986328125, 0.08845138549804688, 0.0927276611328125, 0.09700393676757812, 0.10128021240234375, 0.10555648803710938, 0.109832763671875, 0.11410903930664062, 0.11838531494140625, 0.12266159057617188, 0.1269378662109375, 0.13121414184570312, 0.13549041748046875, 0.13976669311523438, 0.14404296875]}, "gradients/encoder.encoder.layers.1.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 2.0, 4.0, 0.0, 4.0, 1.0, 4.0, 2.0, 5.0, 2.0, 11.0, 13.0, 21.0, 22.0, 66.0, 122.0, 426.0, 958.0, 1252.0, 711.0, 250.0, 104.0, 42.0, 23.0, 8.0, 9.0, 9.0, 1.0, 4.0, 3.0, 2.0, 0.0, 1.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.138671875, -0.13486576080322266, -0.1310596466064453, -0.12725353240966797, -0.12344741821289062, -0.11964130401611328, -0.11583518981933594, -0.1120290756225586, -0.10822296142578125, -0.1044168472290039, -0.10061073303222656, -0.09680461883544922, -0.09299850463867188, -0.08919239044189453, -0.08538627624511719, -0.08158016204833984, -0.0777740478515625, -0.07396793365478516, -0.07016181945800781, -0.06635570526123047, -0.06254959106445312, -0.05874347686767578, -0.05493736267089844, -0.051131248474121094, -0.04732513427734375, -0.043519020080566406, -0.03971290588378906, -0.03590679168701172, -0.032100677490234375, -0.02829456329345703, -0.024488449096679688, -0.020682334899902344, -0.016876220703125, -0.013070106506347656, -0.009263992309570312, -0.005457878112792969, -0.001651763916015625, 0.0021543502807617188, 0.0059604644775390625, 0.009766578674316406, 0.01357269287109375, 0.017378807067871094, 0.021184921264648438, 0.02499103546142578, 0.028797149658203125, 0.03260326385498047, 0.03640937805175781, 0.040215492248535156, 0.0440216064453125, 0.047827720642089844, 0.05163383483886719, 0.05543994903564453, 0.059246063232421875, 0.06305217742919922, 0.06685829162597656, 0.0706644058227539, 0.07447052001953125, 0.0782766342163086, 0.08208274841308594, 0.08588886260986328, 0.08969497680664062, 0.09350109100341797, 0.09730720520019531, 0.10111331939697266, 0.10491943359375]}, "gradients/encoder.encoder.layers.1.final_layer_norm.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 3.0, 4.0, 7.0, 5.0, 4.0, 13.0, 26.0, 35.0, 112.0, 177.0, 256.0, 156.0, 95.0, 34.0, 32.0, 17.0, 7.0, 8.0, 4.0, 7.0, 3.0, 2.0, 3.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.5212567448616028, -0.5017775893211365, -0.4822984039783478, -0.46281924843788147, -0.4433400630950928, -0.42386090755462646, -0.40438175201416016, -0.38490259647369385, -0.36542341113090515, -0.34594425559043884, -0.32646507024765015, -0.30698591470718384, -0.28750675916671753, -0.26802757382392883, -0.24854841828346252, -0.22906924784183502, -0.20959007740020752, -0.19011090695858002, -0.17063173651695251, -0.1511525809764862, -0.1316734105348587, -0.1121942400932312, -0.0927150771021843, -0.07323591411113739, -0.05375674366950989, -0.034277576953172684, -0.01479841023683548, 0.004680756479501724, 0.024159923195838928, 0.04363909363746643, 0.06311825662851334, 0.08259741961956024, 0.10207653045654297, 0.12155570089817047, 0.14103487133979797, 0.16051402688026428, 0.17999319732189178, 0.1994723677635193, 0.2189515233039856, 0.2384306937456131, 0.2579098641872406, 0.2773890197277069, 0.2968682050704956, 0.3163473606109619, 0.3358265161514282, 0.3553057014942169, 0.3747848570346832, 0.3942640423774719, 0.41374319791793823, 0.43322235345840454, 0.45270153880119324, 0.47218069434165955, 0.49165987968444824, 0.5111390352249146, 0.5306181907653809, 0.5500973463058472, 0.5695765018463135, 0.5890556573867798, 0.6085348129272461, 0.6280140280723572, 0.6474931836128235, 0.6669723391532898, 0.6864514946937561, 0.7059306502342224, 0.7254098653793335]}, "gradients/encoder.encoder.layers.1.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 4.0, 3.0, 8.0, 10.0, 15.0, 16.0, 32.0, 44.0, 38.0, 46.0, 64.0, 56.0, 68.0, 60.0, 57.0, 54.0, 53.0, 64.0, 46.0, 54.0, 45.0, 47.0, 35.0, 33.0, 19.0, 14.0, 9.0, 5.0, 1.0, 2.0, 5.0, 1.0, 5.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.5380935668945312, -0.5198764801025391, -0.5016593933105469, -0.4834423363208771, -0.4652252495288849, -0.4470081627368927, -0.4287911057472229, -0.4105740189552307, -0.3923569321632385, -0.37413984537124634, -0.35592275857925415, -0.33770570158958435, -0.31948861479759216, -0.3012715280056, -0.2830544710159302, -0.264837384223938, -0.2466202974319458, -0.2284032106399536, -0.21018613874912262, -0.19196906685829163, -0.17375198006629944, -0.15553489327430725, -0.13731782138347626, -0.11910074949264526, -0.10088366270065308, -0.08266658335924149, -0.0644495040178299, -0.046232424676418304, -0.028015345335006714, -0.009798265993595123, 0.008418813347816467, 0.02663588523864746, 0.044853031635284424, 0.06307011097669601, 0.0812871903181076, 0.0995042696595192, 0.11772134900093079, 0.13593843579292297, 0.15415550768375397, 0.17237257957458496, 0.19058966636657715, 0.20880675315856934, 0.22702382504940033, 0.24524089694023132, 0.2634579837322235, 0.2816750705242157, 0.2998921275138855, 0.3181092143058777, 0.3363263010978699, 0.35454338788986206, 0.37276047468185425, 0.39097753167152405, 0.40919461846351624, 0.4274117052555084, 0.4456287622451782, 0.4638458490371704, 0.4820629358291626, 0.5002800226211548, 0.518497109413147, 0.5367141962051392, 0.5549312829971313, 0.5731483101844788, 0.591365396976471, 0.6095824837684631, 0.6277995705604553]}, "gradients/encoder.encoder.layers.1.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 2.0, 2.0, 3.0, 5.0, 3.0, 10.0, 8.0, 10.0, 19.0, 27.0, 36.0, 50.0, 99.0, 179.0, 371.0, 1017.0, 3593.0, 19676.0, 260772.0, 713267.0, 40769.0, 6071.0, 1523.0, 496.0, 226.0, 119.0, 74.0, 39.0, 25.0, 19.0, 21.0, 9.0, 5.0, 10.0, 4.0, 0.0, 3.0, 0.0, 1.0, 0.0, 2.0, 3.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.279052734375, -0.2711181640625, -0.26318359375, -0.2552490234375, -0.247314453125, -0.2393798828125, -0.2314453125, -0.2235107421875, -0.215576171875, -0.2076416015625, -0.19970703125, -0.1917724609375, -0.183837890625, -0.1759033203125, -0.16796875, -0.1600341796875, -0.152099609375, -0.1441650390625, -0.13623046875, -0.1282958984375, -0.120361328125, -0.1124267578125, -0.1044921875, -0.0965576171875, -0.088623046875, -0.0806884765625, -0.07275390625, -0.0648193359375, -0.056884765625, -0.0489501953125, -0.041015625, -0.0330810546875, -0.025146484375, -0.0172119140625, -0.00927734375, -0.0013427734375, 0.006591796875, 0.0145263671875, 0.0224609375, 0.0303955078125, 0.038330078125, 0.0462646484375, 0.05419921875, 0.0621337890625, 0.070068359375, 0.0780029296875, 0.0859375, 0.0938720703125, 0.101806640625, 0.1097412109375, 0.11767578125, 0.1256103515625, 0.133544921875, 0.1414794921875, 0.1494140625, 0.1573486328125, 0.165283203125, 0.1732177734375, 0.18115234375, 0.1890869140625, 0.197021484375, 0.2049560546875, 0.212890625, 0.2208251953125, 0.228759765625]}, "gradients/encoder.encoder.layers.1.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 2.0, 3.0, 4.0, 9.0, 10.0, 23.0, 46.0, 57.0, 86.0, 142.0, 141.0, 126.0, 129.0, 82.0, 65.0, 46.0, 24.0, 6.0, 8.0, 4.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.241455078125, -0.23525619506835938, -0.22905731201171875, -0.22285842895507812, -0.2166595458984375, -0.21046066284179688, -0.20426177978515625, -0.19806289672851562, -0.191864013671875, -0.18566513061523438, -0.17946624755859375, -0.17326736450195312, -0.1670684814453125, -0.16086959838867188, -0.15467071533203125, -0.14847183227539062, -0.14227294921875, -0.13607406616210938, -0.12987518310546875, -0.12367630004882812, -0.1174774169921875, -0.11127853393554688, -0.10507965087890625, -0.09888076782226562, -0.092681884765625, -0.08648300170898438, -0.08028411865234375, -0.07408523559570312, -0.0678863525390625, -0.061687469482421875, -0.05548858642578125, -0.049289703369140625, -0.0430908203125, -0.036891937255859375, -0.03069305419921875, -0.024494171142578125, -0.0182952880859375, -0.012096405029296875, -0.00589752197265625, 0.000301361083984375, 0.006500244140625, 0.012699127197265625, 0.01889801025390625, 0.025096893310546875, 0.0312957763671875, 0.037494659423828125, 0.04369354248046875, 0.049892425537109375, 0.05609130859375, 0.062290191650390625, 0.06848907470703125, 0.07468795776367188, 0.0808868408203125, 0.08708572387695312, 0.09328460693359375, 0.09948348999023438, 0.105682373046875, 0.11188125610351562, 0.11808013916015625, 0.12427902221679688, 0.1304779052734375, 0.13667678833007812, 0.14287567138671875, 0.14907455444335938, 0.1552734375]}, "gradients/encoder.encoder.layers.1.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 2.0, 1.0, 1.0, 5.0, 5.0, 9.0, 6.0, 15.0, 15.0, 17.0, 27.0, 43.0, 58.0, 102.0, 115.0, 168.0, 242.0, 438.0, 794.0, 1605.0, 4529.0, 20031.0, 185609.0, 776735.0, 44977.0, 7969.0, 2462.0, 1023.0, 558.0, 310.0, 230.0, 157.0, 81.0, 62.0, 51.0, 28.0, 25.0, 17.0, 13.0, 9.0, 5.0, 5.0, 3.0, 6.0, 0.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 3.0, 1.0], "bins": [-0.2049560546875, -0.19896507263183594, -0.19297409057617188, -0.1869831085205078, -0.18099212646484375, -0.1750011444091797, -0.16901016235351562, -0.16301918029785156, -0.1570281982421875, -0.15103721618652344, -0.14504623413085938, -0.1390552520751953, -0.13306427001953125, -0.1270732879638672, -0.12108230590820312, -0.11509132385253906, -0.109100341796875, -0.10310935974121094, -0.09711837768554688, -0.09112739562988281, -0.08513641357421875, -0.07914543151855469, -0.07315444946289062, -0.06716346740722656, -0.0611724853515625, -0.05518150329589844, -0.049190521240234375, -0.04319953918457031, -0.03720855712890625, -0.031217575073242188, -0.025226593017578125, -0.019235610961914062, -0.01324462890625, -0.0072536468505859375, -0.001262664794921875, 0.0047283172607421875, 0.01071929931640625, 0.016710281372070312, 0.022701263427734375, 0.028692245483398438, 0.0346832275390625, 0.04067420959472656, 0.046665191650390625, 0.05265617370605469, 0.05864715576171875, 0.06463813781738281, 0.07062911987304688, 0.07662010192871094, 0.082611083984375, 0.08860206604003906, 0.09459304809570312, 0.10058403015136719, 0.10657501220703125, 0.11256599426269531, 0.11855697631835938, 0.12454795837402344, 0.1305389404296875, 0.13652992248535156, 0.14252090454101562, 0.1485118865966797, 0.15450286865234375, 0.1604938507080078, 0.16648483276367188, 0.17247581481933594, 0.178466796875]}, "gradients/encoder.encoder.layers.1.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 3.0, 6.0, 2.0, 5.0, 7.0, 7.0, 8.0, 8.0, 16.0, 18.0, 20.0, 34.0, 26.0, 59.0, 51.0, 64.0, 52.0, 77.0, 54.0, 80.0, 70.0, 66.0, 48.0, 63.0, 41.0, 31.0, 24.0, 18.0, 19.0, 11.0, 7.0, 11.0, 5.0, 1.0, 1.0, 0.0, 3.0, 2.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.400390625, -0.388427734375, -0.37646484375, -0.364501953125, -0.3525390625, -0.340576171875, -0.32861328125, -0.316650390625, -0.3046875, -0.292724609375, -0.28076171875, -0.268798828125, -0.2568359375, -0.244873046875, -0.23291015625, -0.220947265625, -0.208984375, -0.197021484375, -0.18505859375, -0.173095703125, -0.1611328125, -0.149169921875, -0.13720703125, -0.125244140625, -0.11328125, -0.101318359375, -0.08935546875, -0.077392578125, -0.0654296875, -0.053466796875, -0.04150390625, -0.029541015625, -0.017578125, -0.005615234375, 0.00634765625, 0.018310546875, 0.0302734375, 0.042236328125, 0.05419921875, 0.066162109375, 0.078125, 0.090087890625, 0.10205078125, 0.114013671875, 0.1259765625, 0.137939453125, 0.14990234375, 0.161865234375, 0.173828125, 0.185791015625, 0.19775390625, 0.209716796875, 0.2216796875, 0.233642578125, 0.24560546875, 0.257568359375, 0.26953125, 0.281494140625, 0.29345703125, 0.305419921875, 0.3173828125, 0.329345703125, 0.34130859375, 0.353271484375, 0.365234375]}, "gradients/encoder.encoder.layers.1.attention.k_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 3.0, 3.0, 1.0, 8.0, 4.0, 5.0, 4.0, 2.0, 15.0, 15.0, 24.0, 27.0, 36.0, 64.0, 124.0, 208.0, 432.0, 969.0, 2771.0, 10835.0, 70292.0, 798357.0, 140025.0, 17828.0, 4011.0, 1350.0, 540.0, 225.0, 141.0, 89.0, 64.0, 25.0, 18.0, 11.0, 5.0, 4.0, 6.0, 9.0, 0.0, 3.0, 4.0, 2.0, 1.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 2.0], "bins": [-0.020111083984375, -0.019432783126831055, -0.01875448226928711, -0.018076181411743164, -0.01739788055419922, -0.016719579696655273, -0.016041278839111328, -0.015362977981567383, -0.014684677124023438, -0.014006376266479492, -0.013328075408935547, -0.012649774551391602, -0.011971473693847656, -0.011293172836303711, -0.010614871978759766, -0.00993657112121582, -0.009258270263671875, -0.00857996940612793, -0.007901668548583984, -0.007223367691040039, -0.006545066833496094, -0.0058667659759521484, -0.005188465118408203, -0.004510164260864258, -0.0038318634033203125, -0.003153562545776367, -0.002475261688232422, -0.0017969608306884766, -0.0011186599731445312, -0.00044035911560058594, 0.00023794174194335938, 0.0009162425994873047, 0.00159454345703125, 0.0022728443145751953, 0.0029511451721191406, 0.003629446029663086, 0.004307746887207031, 0.0049860477447509766, 0.005664348602294922, 0.006342649459838867, 0.0070209503173828125, 0.007699251174926758, 0.008377552032470703, 0.009055852890014648, 0.009734153747558594, 0.010412454605102539, 0.011090755462646484, 0.01176905632019043, 0.012447357177734375, 0.01312565803527832, 0.013803958892822266, 0.014482259750366211, 0.015160560607910156, 0.0158388614654541, 0.016517162322998047, 0.017195463180541992, 0.017873764038085938, 0.018552064895629883, 0.019230365753173828, 0.019908666610717773, 0.02058696746826172, 0.021265268325805664, 0.02194356918334961, 0.022621870040893555, 0.0233001708984375]}, "gradients/encoder.encoder.layers.1.attention.k_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 1.0, 2.0, 5.0, 1.0, 4.0, 6.0, 5.0, 3.0, 6.0, 11.0, 19.0, 22.0, 28.0, 36.0, 37.0, 60.0, 64.0, 62.0, 45.0, 95.0, 57.0, 73.0, 76.0, 65.0, 43.0, 44.0, 28.0, 30.0, 12.0, 17.0, 15.0, 11.0, 13.0, 4.0, 1.0, 1.0, 1.0, 3.0, 1.0, 2.0, 0.0, 1.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0], "bins": [-5.4836273193359375e-06, -5.306676030158997e-06, -5.129724740982056e-06, -4.952773451805115e-06, -4.775822162628174e-06, -4.598870873451233e-06, -4.421919584274292e-06, -4.244968295097351e-06, -4.06801700592041e-06, -3.891065716743469e-06, -3.7141144275665283e-06, -3.5371631383895874e-06, -3.3602118492126465e-06, -3.1832605600357056e-06, -3.0063092708587646e-06, -2.8293579816818237e-06, -2.652406692504883e-06, -2.475455403327942e-06, -2.298504114151001e-06, -2.12155282497406e-06, -1.944601535797119e-06, -1.7676502466201782e-06, -1.5906989574432373e-06, -1.4137476682662964e-06, -1.2367963790893555e-06, -1.0598450899124146e-06, -8.828938007354736e-07, -7.059425115585327e-07, -5.289912223815918e-07, -3.520399332046509e-07, -1.7508864402770996e-07, 1.862645149230957e-09, 1.7881393432617188e-07, 3.557652235031128e-07, 5.327165126800537e-07, 7.096678018569946e-07, 8.866190910339355e-07, 1.0635703802108765e-06, 1.2405216693878174e-06, 1.4174729585647583e-06, 1.5944242477416992e-06, 1.7713755369186401e-06, 1.948326826095581e-06, 2.125278115272522e-06, 2.302229404449463e-06, 2.479180693626404e-06, 2.6561319828033447e-06, 2.8330832719802856e-06, 3.0100345611572266e-06, 3.1869858503341675e-06, 3.3639371395111084e-06, 3.5408884286880493e-06, 3.7178397178649902e-06, 3.894791007041931e-06, 4.071742296218872e-06, 4.248693585395813e-06, 4.425644874572754e-06, 4.602596163749695e-06, 4.779547452926636e-06, 4.956498742103577e-06, 5.133450031280518e-06, 5.3104013204574585e-06, 5.487352609634399e-06, 5.66430389881134e-06, 5.841255187988281e-06]}, "gradients/encoder.encoder.layers.1.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 5.0, 1.0, 1.0, 1.0, 4.0, 6.0, 7.0, 9.0, 10.0, 19.0, 22.0, 30.0, 65.0, 64.0, 127.0, 259.0, 543.0, 1316.0, 4703.0, 35011.0, 838939.0, 152229.0, 11216.0, 2357.0, 827.0, 334.0, 177.0, 88.0, 68.0, 30.0, 25.0, 19.0, 15.0, 3.0, 9.0, 10.0, 1.0, 3.0, 1.0, 2.0, 1.0, 3.0, 1.0, 3.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0], "bins": [-0.027099609375, -0.026272058486938477, -0.025444507598876953, -0.02461695671081543, -0.023789405822753906, -0.022961854934692383, -0.02213430404663086, -0.021306753158569336, -0.020479202270507812, -0.01965165138244629, -0.018824100494384766, -0.017996549606323242, -0.01716899871826172, -0.016341447830200195, -0.015513896942138672, -0.014686346054077148, -0.013858795166015625, -0.013031244277954102, -0.012203693389892578, -0.011376142501831055, -0.010548591613769531, -0.009721040725708008, -0.008893489837646484, -0.008065938949584961, -0.0072383880615234375, -0.006410837173461914, -0.005583286285400391, -0.004755735397338867, -0.003928184509277344, -0.0031006336212158203, -0.002273082733154297, -0.0014455318450927734, -0.00061798095703125, 0.00020956993103027344, 0.0010371208190917969, 0.0018646717071533203, 0.0026922225952148438, 0.003519773483276367, 0.004347324371337891, 0.005174875259399414, 0.0060024261474609375, 0.006829977035522461, 0.007657527923583984, 0.008485078811645508, 0.009312629699707031, 0.010140180587768555, 0.010967731475830078, 0.011795282363891602, 0.012622833251953125, 0.013450384140014648, 0.014277935028076172, 0.015105485916137695, 0.01593303680419922, 0.016760587692260742, 0.017588138580322266, 0.01841568946838379, 0.019243240356445312, 0.020070791244506836, 0.02089834213256836, 0.021725893020629883, 0.022553443908691406, 0.02338099479675293, 0.024208545684814453, 0.025036096572875977, 0.0258636474609375]}, "gradients/encoder.encoder.layers.1.attention.q_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 5.0, 3.0, 1.0, 1.0, 3.0, 8.0, 9.0, 9.0, 21.0, 18.0, 38.0, 43.0, 59.0, 71.0, 95.0, 112.0, 104.0, 77.0, 85.0, 57.0, 48.0, 30.0, 29.0, 26.0, 14.0, 18.0, 13.0, 5.0, 2.0, 2.0, 4.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.031646728515625, -0.03063821792602539, -0.02962970733642578, -0.028621196746826172, -0.027612686157226562, -0.026604175567626953, -0.025595664978027344, -0.024587154388427734, -0.023578643798828125, -0.022570133209228516, -0.021561622619628906, -0.020553112030029297, -0.019544601440429688, -0.018536090850830078, -0.01752758026123047, -0.01651906967163086, -0.01551055908203125, -0.01450204849243164, -0.013493537902832031, -0.012485027313232422, -0.011476516723632812, -0.010468006134033203, -0.009459495544433594, -0.008450984954833984, -0.007442474365234375, -0.006433963775634766, -0.005425453186035156, -0.004416942596435547, -0.0034084320068359375, -0.002399921417236328, -0.0013914108276367188, -0.0003829002380371094, 0.0006256103515625, 0.0016341209411621094, 0.0026426315307617188, 0.003651142120361328, 0.0046596527099609375, 0.005668163299560547, 0.006676673889160156, 0.007685184478759766, 0.008693695068359375, 0.009702205657958984, 0.010710716247558594, 0.011719226837158203, 0.012727737426757812, 0.013736248016357422, 0.014744758605957031, 0.01575326919555664, 0.01676177978515625, 0.01777029037475586, 0.01877880096435547, 0.019787311553955078, 0.020795822143554688, 0.021804332733154297, 0.022812843322753906, 0.023821353912353516, 0.024829864501953125, 0.025838375091552734, 0.026846885681152344, 0.027855396270751953, 0.028863906860351562, 0.029872417449951172, 0.03088092803955078, 0.03188943862915039, 0.03289794921875]}, "gradients/encoder.encoder.layers.1.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 2.0, 4.0, 5.0, 14.0, 30.0, 60.0, 156.0, 471.0, 181.0, 54.0, 21.0, 8.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-2.363751173019409, -2.3169918060302734, -2.2702324390411377, -2.223472833633423, -2.176713466644287, -2.1299540996551514, -2.0831947326660156, -2.03643536567688, -1.9896758794784546, -1.9429165124893188, -1.8961570262908936, -1.8493976593017578, -1.802638292312622, -1.7558788061141968, -1.709119439125061, -1.6623599529266357, -1.6156005859375, -1.5688412189483643, -1.522081732749939, -1.4753223657608032, -1.428562879562378, -1.3818035125732422, -1.3350441455841064, -1.2882847785949707, -1.2415252923965454, -1.1947659254074097, -1.1480064392089844, -1.1012470722198486, -1.054487705230713, -1.0077282190322876, -0.9609688520431519, -0.9142094254493713, -0.8674501180648804, -0.8206906914710999, -0.7739312648773193, -0.7271718978881836, -0.6804124712944031, -0.6336530447006226, -0.5868936777114868, -0.5401342511177063, -0.4933748245239258, -0.44661539793014526, -0.39985600113868713, -0.353096604347229, -0.3063371777534485, -0.25957775115966797, -0.21281835436820984, -0.1660589575767517, -0.11929953098297119, -0.07254011929035187, -0.025780707597732544, 0.02097870409488678, 0.0677381157875061, 0.11449752748012543, 0.16125693917274475, 0.20801633596420288, 0.2547757625579834, 0.3015351891517639, 0.34829458594322205, 0.3950539827346802, 0.4418134093284607, 0.4885728359222412, 0.535332202911377, 0.5820916295051575, 0.628851056098938]}, "gradients/encoder.encoder.layers.1.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 4.0, 8.0, 2.0, 4.0, 1.0, 6.0, 13.0, 7.0, 7.0, 9.0, 17.0, 19.0, 19.0, 21.0, 26.0, 29.0, 36.0, 43.0, 49.0, 103.0, 149.0, 96.0, 62.0, 44.0, 34.0, 29.0, 31.0, 25.0, 23.0, 12.0, 18.0, 15.0, 17.0, 9.0, 8.0, 4.0, 6.0, 3.0, 3.0, 4.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-1.385549545288086, -1.3467457294464111, -1.3079417943954468, -1.269137978553772, -1.2303340435028076, -1.1915302276611328, -1.152726411819458, -1.1139224767684937, -1.0751186609268188, -1.036314845085144, -0.9975109100341797, -0.9587070941925049, -0.9199032187461853, -0.8810993432998657, -0.8422954678535461, -0.8034915924072266, -0.764687716960907, -0.7258838415145874, -0.6870799660682678, -0.6482760906219482, -0.6094722747802734, -0.5706683993339539, -0.5318645238876343, -0.4930606782436371, -0.4542568027973175, -0.4154529273509979, -0.37664908170700073, -0.33784520626068115, -0.2990413308143616, -0.2602374851703644, -0.2214336097240448, -0.1826297640800476, -0.14382588863372803, -0.10502202808856964, -0.06621816009283066, -0.027414292097091675, 0.011389568448066711, 0.0501934289932251, 0.08899730443954468, 0.12780115008354187, 0.16660502552986145, 0.20540888607501984, 0.24421274662017822, 0.2830166220664978, 0.3218204975128174, 0.3606243431568146, 0.39942821860313416, 0.43823206424713135, 0.4770359396934509, 0.5158398151397705, 0.5546436905860901, 0.5934475660324097, 0.6322513818740845, 0.671055257320404, 0.7098591327667236, 0.7486629486083984, 0.7874668836593628, 0.8262707591056824, 0.865074634552002, 0.9038784503936768, 0.9426823258399963, 0.9814862012863159, 1.0202901363372803, 1.059093952178955, 1.0978977680206299]}, "gradients/encoder.encoder.layers.0.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 0.0, 3.0, 1.0, 2.0, 2.0, 3.0, 0.0, 3.0, 6.0, 7.0, 5.0, 6.0, 12.0, 10.0, 21.0, 36.0, 65.0, 86.0, 180.0, 373.0, 1026.0, 3803.0, 17380.0, 403539.0, 3608435.0, 144967.0, 8931.0, 2723.0, 1508.0, 684.0, 264.0, 98.0, 32.0, 24.0, 21.0, 9.0, 11.0, 7.0, 2.0, 4.0, 1.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-0.196533203125, -0.1915426254272461, -0.1865520477294922, -0.18156147003173828, -0.17657089233398438, -0.17158031463623047, -0.16658973693847656, -0.16159915924072266, -0.15660858154296875, -0.15161800384521484, -0.14662742614746094, -0.14163684844970703, -0.13664627075195312, -0.13165569305419922, -0.1266651153564453, -0.1216745376586914, -0.1166839599609375, -0.1116933822631836, -0.10670280456542969, -0.10171222686767578, -0.09672164916992188, -0.09173107147216797, -0.08674049377441406, -0.08174991607666016, -0.07675933837890625, -0.07176876068115234, -0.06677818298339844, -0.06178760528564453, -0.056797027587890625, -0.05180644989013672, -0.04681587219238281, -0.041825294494628906, -0.036834716796875, -0.031844139099121094, -0.026853561401367188, -0.02186298370361328, -0.016872406005859375, -0.011881828308105469, -0.0068912506103515625, -0.0019006729125976562, 0.00308990478515625, 0.008080482482910156, 0.013071060180664062, 0.01806163787841797, 0.023052215576171875, 0.02804279327392578, 0.03303337097167969, 0.038023948669433594, 0.0430145263671875, 0.048005104064941406, 0.05299568176269531, 0.05798625946044922, 0.06297683715820312, 0.06796741485595703, 0.07295799255371094, 0.07794857025146484, 0.08293914794921875, 0.08792972564697266, 0.09292030334472656, 0.09791088104248047, 0.10290145874023438, 0.10789203643798828, 0.11288261413574219, 0.1178731918334961, 0.12286376953125]}, "gradients/encoder.encoder.layers.0.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 4.0, 1.0, 3.0, 3.0, 9.0, 9.0, 29.0, 48.0, 45.0, 87.0, 145.0, 147.0, 116.0, 119.0, 93.0, 61.0, 50.0, 25.0, 8.0, 8.0, 3.0, 2.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.2418212890625, -0.23561668395996094, -0.22941207885742188, -0.2232074737548828, -0.21700286865234375, -0.2107982635498047, -0.20459365844726562, -0.19838905334472656, -0.1921844482421875, -0.18597984313964844, -0.17977523803710938, -0.1735706329345703, -0.16736602783203125, -0.1611614227294922, -0.15495681762695312, -0.14875221252441406, -0.142547607421875, -0.13634300231933594, -0.13013839721679688, -0.12393379211425781, -0.11772918701171875, -0.11152458190917969, -0.10531997680664062, -0.09911537170410156, -0.0929107666015625, -0.08670616149902344, -0.08050155639648438, -0.07429695129394531, -0.06809234619140625, -0.06188774108886719, -0.055683135986328125, -0.04947853088378906, -0.04327392578125, -0.03706932067871094, -0.030864715576171875, -0.024660110473632812, -0.01845550537109375, -0.012250900268554688, -0.006046295166015625, 0.0001583099365234375, 0.0063629150390625, 0.012567520141601562, 0.018772125244140625, 0.024976730346679688, 0.03118133544921875, 0.03738594055175781, 0.043590545654296875, 0.04979515075683594, 0.055999755859375, 0.06220436096191406, 0.06840896606445312, 0.07461357116699219, 0.08081817626953125, 0.08702278137207031, 0.09322738647460938, 0.09943199157714844, 0.1056365966796875, 0.11184120178222656, 0.11804580688476562, 0.12425041198730469, 0.13045501708984375, 0.1366596221923828, 0.14286422729492188, 0.14906883239746094, 0.1552734375]}, "gradients/encoder.encoder.layers.0.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 3.0, 5.0, 8.0, 10.0, 6.0, 10.0, 13.0, 19.0, 15.0, 21.0, 39.0, 38.0, 63.0, 96.0, 110.0, 182.0, 261.0, 552.0, 1186.0, 3269.0, 17471.0, 3352999.0, 796864.0, 15201.0, 3112.0, 1236.0, 586.0, 299.0, 166.0, 130.0, 74.0, 68.0, 55.0, 18.0, 21.0, 18.0, 22.0, 11.0, 6.0, 10.0, 5.0, 4.0, 4.0, 4.0, 0.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 3.0], "bins": [-0.3037109375, -0.29476165771484375, -0.2858123779296875, -0.27686309814453125, -0.267913818359375, -0.25896453857421875, -0.2500152587890625, -0.24106597900390625, -0.23211669921875, -0.22316741943359375, -0.2142181396484375, -0.20526885986328125, -0.196319580078125, -0.18737030029296875, -0.1784210205078125, -0.16947174072265625, -0.1605224609375, -0.15157318115234375, -0.1426239013671875, -0.13367462158203125, -0.124725341796875, -0.11577606201171875, -0.1068267822265625, -0.09787750244140625, -0.08892822265625, -0.07997894287109375, -0.0710296630859375, -0.06208038330078125, -0.053131103515625, -0.04418182373046875, -0.0352325439453125, -0.02628326416015625, -0.017333984375, -0.00838470458984375, 0.0005645751953125, 0.00951385498046875, 0.018463134765625, 0.02741241455078125, 0.0363616943359375, 0.04531097412109375, 0.05426025390625, 0.06320953369140625, 0.0721588134765625, 0.08110809326171875, 0.090057373046875, 0.09900665283203125, 0.1079559326171875, 0.11690521240234375, 0.1258544921875, 0.13480377197265625, 0.1437530517578125, 0.15270233154296875, 0.161651611328125, 0.17060089111328125, 0.1795501708984375, 0.18849945068359375, 0.19744873046875, 0.20639801025390625, 0.2153472900390625, 0.22429656982421875, 0.233245849609375, 0.24219512939453125, 0.2511444091796875, 0.26009368896484375, 0.26904296875]}, "gradients/encoder.encoder.layers.0.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 0.0, 1.0, 1.0, 1.0, 4.0, 2.0, 2.0, 2.0, 5.0, 9.0, 15.0, 20.0, 23.0, 33.0, 58.0, 73.0, 162.0, 352.0, 1086.0, 1187.0, 465.0, 187.0, 117.0, 79.0, 48.0, 35.0, 23.0, 23.0, 13.0, 14.0, 10.0, 6.0, 2.0, 7.0, 2.0, 4.0, 3.0, 5.0, 1.0, 3.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.313232421875, -0.3034553527832031, -0.29367828369140625, -0.2839012145996094, -0.2741241455078125, -0.2643470764160156, -0.25457000732421875, -0.24479293823242188, -0.235015869140625, -0.22523880004882812, -0.21546173095703125, -0.20568466186523438, -0.1959075927734375, -0.18613052368164062, -0.17635345458984375, -0.16657638549804688, -0.15679931640625, -0.14702224731445312, -0.13724517822265625, -0.12746810913085938, -0.1176910400390625, -0.10791397094726562, -0.09813690185546875, -0.08835983276367188, -0.078582763671875, -0.06880569458007812, -0.05902862548828125, -0.049251556396484375, -0.0394744873046875, -0.029697418212890625, -0.01992034912109375, -0.010143280029296875, -0.0003662109375, 0.009410858154296875, 0.01918792724609375, 0.028964996337890625, 0.0387420654296875, 0.048519134521484375, 0.05829620361328125, 0.06807327270507812, 0.077850341796875, 0.08762741088867188, 0.09740447998046875, 0.10718154907226562, 0.1169586181640625, 0.12673568725585938, 0.13651275634765625, 0.14628982543945312, 0.15606689453125, 0.16584396362304688, 0.17562103271484375, 0.18539810180664062, 0.1951751708984375, 0.20495223999023438, 0.21472930908203125, 0.22450637817382812, 0.234283447265625, 0.24406051635742188, 0.25383758544921875, 0.2636146545410156, 0.2733917236328125, 0.2831687927246094, 0.29294586181640625, 0.3027229309082031, 0.3125]}, "gradients/encoder.encoder.layers.0.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 0.0, 2.0, 12.0, 35.0, 160.0, 514.0, 197.0, 42.0, 17.0, 15.0, 5.0, 1.0, 4.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 2.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.6825318336486816, -2.5435385704040527, -2.404545307159424, -2.265552043914795, -2.126558780670166, -1.9875653982162476, -1.848572015762329, -1.7095787525177002, -1.5705854892730713, -1.4315922260284424, -1.2925989627838135, -1.153605580329895, -1.0146123170852661, -0.8756190538406372, -0.7366257309913635, -0.5976324081420898, -0.45863914489746094, -0.31964585185050964, -0.18065255880355835, -0.041659265756607056, 0.09733402729034424, 0.23632729053497314, 0.3753206133842468, 0.5143139362335205, 0.6533071994781494, 0.7923004627227783, 0.931293785572052, 1.0702871084213257, 1.2092803716659546, 1.3482736349105835, 1.487267017364502, 1.6262602806091309, 1.765254020690918, 1.9042472839355469, 2.043240547180176, 2.1822338104248047, 2.3212270736694336, 2.4602203369140625, 2.5992138385772705, 2.7382071018218994, 2.8772003650665283, 3.0161936283111572, 3.155186891555786, 3.294180154800415, 3.433173656463623, 3.572166919708252, 3.711160182952881, 3.8501534461975098, 3.9891467094421387, 4.128139972686768, 4.2671332359313965, 4.406126499176025, 4.545119762420654, 4.684113025665283, 4.823106288909912, 4.962100028991699, 5.101093292236328, 5.240086555480957, 5.379079818725586, 5.518073081970215, 5.657066345214844, 5.796059608459473, 5.935052871704102, 6.0740461349487305, 6.213039398193359]}, "gradients/encoder.encoder.layers.0.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 3.0, 1.0, 2.0, 5.0, 6.0, 4.0, 10.0, 24.0, 27.0, 44.0, 76.0, 104.0, 127.0, 127.0, 133.0, 111.0, 69.0, 51.0, 39.0, 24.0, 10.0, 9.0, 4.0, 3.0, 1.0, 2.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.6778621673583984, -2.560920238494873, -2.4439783096313477, -2.3270363807678223, -2.2100942134857178, -2.0931522846221924, -1.976210355758667, -1.8592684268951416, -1.7423263788223267, -1.6253844499588013, -1.5084424018859863, -1.391500473022461, -1.2745585441589355, -1.1576164960861206, -1.0406745672225952, -0.923732578754425, -0.8067905902862549, -0.6898486018180847, -0.5729066133499146, -0.45596468448638916, -0.339022696018219, -0.22208070755004883, -0.10513877868652344, 0.011803209781646729, 0.1287451982498169, 0.24568717181682587, 0.36262914538383484, 0.4795711040496826, 0.5965130925178528, 0.713455080986023, 0.8303970098495483, 0.9473389983177185, 1.0642809867858887, 1.181222915649414, 1.298164963722229, 1.4151068925857544, 1.5320489406585693, 1.6489908695220947, 1.7659327983856201, 1.8828747272491455, 1.9998167753219604, 2.1167588233947754, 2.233700752258301, 2.350642681121826, 2.4675846099853516, 2.584526538848877, 2.7014684677124023, 2.818410634994507, 2.9353525638580322, 3.0522944927215576, 3.169236421585083, 3.2861785888671875, 3.403120517730713, 3.5200624465942383, 3.6370043754577637, 3.753946304321289, 3.8708882331848145, 3.98783016204834, 4.104772090911865, 4.221714019775391, 4.338655948638916, 4.455597877502441, 4.572540283203125, 4.68948221206665, 4.806424140930176]}, "gradients/encoder.encoder.layers.0.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 1.0, 2.0, 0.0, 1.0, 0.0, 2.0, 5.0, 4.0, 9.0, 7.0, 3.0, 6.0, 9.0, 26.0, 41.0, 52.0, 83.0, 129.0, 241.0, 588.0, 1561.0, 6899.0, 81485.0, 908863.0, 41621.0, 4691.0, 1262.0, 436.0, 207.0, 125.0, 61.0, 42.0, 33.0, 22.0, 14.0, 13.0, 4.0, 4.0, 7.0, 2.0, 1.0, 3.0, 2.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.28955078125, -0.28032684326171875, -0.2711029052734375, -0.26187896728515625, -0.252655029296875, -0.24343109130859375, -0.2342071533203125, -0.22498321533203125, -0.21575927734375, -0.20653533935546875, -0.1973114013671875, -0.18808746337890625, -0.178863525390625, -0.16963958740234375, -0.1604156494140625, -0.15119171142578125, -0.1419677734375, -0.13274383544921875, -0.1235198974609375, -0.11429595947265625, -0.105072021484375, -0.09584808349609375, -0.0866241455078125, -0.07740020751953125, -0.06817626953125, -0.05895233154296875, -0.0497283935546875, -0.04050445556640625, -0.031280517578125, -0.02205657958984375, -0.0128326416015625, -0.00360870361328125, 0.005615234375, 0.01483917236328125, 0.0240631103515625, 0.03328704833984375, 0.042510986328125, 0.05173492431640625, 0.0609588623046875, 0.07018280029296875, 0.07940673828125, 0.08863067626953125, 0.0978546142578125, 0.10707855224609375, 0.116302490234375, 0.12552642822265625, 0.1347503662109375, 0.14397430419921875, 0.1531982421875, 0.16242218017578125, 0.1716461181640625, 0.18087005615234375, 0.190093994140625, 0.19931793212890625, 0.2085418701171875, 0.21776580810546875, 0.22698974609375, 0.23621368408203125, 0.2454376220703125, 0.25466156005859375, 0.263885498046875, 0.27310943603515625, 0.2823333740234375, 0.29155731201171875, 0.30078125]}, "gradients/encoder.encoder.layers.0.attention.out_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 10.0, 9.0, 18.0, 31.0, 50.0, 113.0, 142.0, 185.0, 152.0, 132.0, 88.0, 36.0, 24.0, 11.0, 3.0, 5.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.2115478515625, -0.2008495330810547, -0.19015121459960938, -0.17945289611816406, -0.16875457763671875, -0.15805625915527344, -0.14735794067382812, -0.1366596221923828, -0.1259613037109375, -0.11526298522949219, -0.10456466674804688, -0.09386634826660156, -0.08316802978515625, -0.07246971130371094, -0.061771392822265625, -0.05107307434082031, -0.040374755859375, -0.029676437377929688, -0.018978118896484375, -0.008279800415039062, 0.00241851806640625, 0.013116836547851562, 0.023815155029296875, 0.03451347351074219, 0.0452117919921875, 0.05591011047363281, 0.06660842895507812, 0.07730674743652344, 0.08800506591796875, 0.09870338439941406, 0.10940170288085938, 0.12010002136230469, 0.13079833984375, 0.1414966583251953, 0.15219497680664062, 0.16289329528808594, 0.17359161376953125, 0.18428993225097656, 0.19498825073242188, 0.2056865692138672, 0.2163848876953125, 0.2270832061767578, 0.23778152465820312, 0.24847984313964844, 0.25917816162109375, 0.26987648010253906, 0.2805747985839844, 0.2912731170654297, 0.301971435546875, 0.3126697540283203, 0.3233680725097656, 0.33406639099121094, 0.34476470947265625, 0.35546302795410156, 0.3661613464355469, 0.3768596649169922, 0.3875579833984375, 0.3982563018798828, 0.4089546203613281, 0.41965293884277344, 0.43035125732421875, 0.44104957580566406, 0.4517478942871094, 0.4624462127685547, 0.47314453125]}, "gradients/encoder.encoder.layers.0.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 3.0, 1.0, 2.0, 1.0, 0.0, 2.0, 3.0, 9.0, 5.0, 10.0, 14.0, 9.0, 33.0, 41.0, 45.0, 71.0, 115.0, 187.0, 404.0, 832.0, 2292.0, 11690.0, 723405.0, 296309.0, 9313.0, 2072.0, 758.0, 384.0, 205.0, 127.0, 66.0, 43.0, 32.0, 31.0, 14.0, 16.0, 7.0, 7.0, 2.0, 3.0, 2.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.2279052734375, -0.22053909301757812, -0.21317291259765625, -0.20580673217773438, -0.1984405517578125, -0.19107437133789062, -0.18370819091796875, -0.17634201049804688, -0.168975830078125, -0.16160964965820312, -0.15424346923828125, -0.14687728881835938, -0.1395111083984375, -0.13214492797851562, -0.12477874755859375, -0.11741256713867188, -0.11004638671875, -0.10268020629882812, -0.09531402587890625, -0.08794784545898438, -0.0805816650390625, -0.07321548461914062, -0.06584930419921875, -0.058483123779296875, -0.051116943359375, -0.043750762939453125, -0.03638458251953125, -0.029018402099609375, -0.0216522216796875, -0.014286041259765625, -0.00691986083984375, 0.000446319580078125, 0.0078125, 0.015178680419921875, 0.02254486083984375, 0.029911041259765625, 0.0372772216796875, 0.044643402099609375, 0.05200958251953125, 0.059375762939453125, 0.066741943359375, 0.07410812377929688, 0.08147430419921875, 0.08884048461914062, 0.0962066650390625, 0.10357284545898438, 0.11093902587890625, 0.11830520629882812, 0.12567138671875, 0.13303756713867188, 0.14040374755859375, 0.14776992797851562, 0.1551361083984375, 0.16250228881835938, 0.16986846923828125, 0.17723464965820312, 0.184600830078125, 0.19196701049804688, 0.19933319091796875, 0.20669937133789062, 0.2140655517578125, 0.22143173217773438, 0.22879791259765625, 0.23616409301757812, 0.2435302734375]}, "gradients/encoder.encoder.layers.0.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 3.0, 5.0, 3.0, 2.0, 5.0, 4.0, 9.0, 8.0, 13.0, 22.0, 33.0, 22.0, 45.0, 60.0, 64.0, 90.0, 100.0, 108.0, 85.0, 69.0, 60.0, 45.0, 32.0, 27.0, 25.0, 20.0, 20.0, 6.0, 9.0, 1.0, 8.0, 3.0, 0.0, 2.0, 3.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.67529296875, -0.6574974060058594, -0.6397018432617188, -0.6219062805175781, -0.6041107177734375, -0.5863151550292969, -0.5685195922851562, -0.5507240295410156, -0.532928466796875, -0.5151329040527344, -0.49733734130859375, -0.4795417785644531, -0.4617462158203125, -0.4439506530761719, -0.42615509033203125, -0.4083595275878906, -0.39056396484375, -0.3727684020996094, -0.35497283935546875, -0.3371772766113281, -0.3193817138671875, -0.3015861511230469, -0.28379058837890625, -0.2659950256347656, -0.248199462890625, -0.23040390014648438, -0.21260833740234375, -0.19481277465820312, -0.1770172119140625, -0.15922164916992188, -0.14142608642578125, -0.12363052368164062, -0.1058349609375, -0.08803939819335938, -0.07024383544921875, -0.052448272705078125, -0.0346527099609375, -0.016857147216796875, 0.00093841552734375, 0.018733978271484375, 0.036529541015625, 0.054325103759765625, 0.07212066650390625, 0.08991622924804688, 0.1077117919921875, 0.12550735473632812, 0.14330291748046875, 0.16109848022460938, 0.17889404296875, 0.19668960571289062, 0.21448516845703125, 0.23228073120117188, 0.2500762939453125, 0.2678718566894531, 0.28566741943359375, 0.3034629821777344, 0.321258544921875, 0.3390541076660156, 0.35684967041015625, 0.3746452331542969, 0.3924407958984375, 0.4102363586425781, 0.42803192138671875, 0.4458274841308594, 0.463623046875]}, "gradients/encoder.encoder.layers.0.attention.k_proj.weight": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 1.0, 2.0, 1.0, 0.0, 2.0, 0.0, 2.0, 3.0, 1.0, 5.0, 1.0, 2.0, 3.0, 3.0, 9.0, 9.0, 13.0, 17.0, 23.0, 24.0, 50.0, 60.0, 89.0, 143.0, 341.0, 871.0, 3245.0, 33506.0, 988416.0, 17905.0, 2406.0, 711.0, 301.0, 118.0, 88.0, 51.0, 33.0, 23.0, 16.0, 8.0, 15.0, 12.0, 6.0, 5.0, 6.0, 6.0, 7.0, 2.0, 2.0, 4.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.052154541015625, -0.050493717193603516, -0.04883289337158203, -0.04717206954956055, -0.04551124572753906, -0.04385042190551758, -0.042189598083496094, -0.04052877426147461, -0.038867950439453125, -0.03720712661743164, -0.035546302795410156, -0.03388547897338867, -0.03222465515136719, -0.030563831329345703, -0.02890300750732422, -0.027242183685302734, -0.02558135986328125, -0.023920536041259766, -0.02225971221923828, -0.020598888397216797, -0.018938064575195312, -0.017277240753173828, -0.015616416931152344, -0.01395559310913086, -0.012294769287109375, -0.01063394546508789, -0.008973121643066406, -0.007312297821044922, -0.0056514739990234375, -0.003990650177001953, -0.0023298263549804688, -0.0006690025329589844, 0.0009918212890625, 0.0026526451110839844, 0.004313468933105469, 0.005974292755126953, 0.0076351165771484375, 0.009295940399169922, 0.010956764221191406, 0.01261758804321289, 0.014278411865234375, 0.01593923568725586, 0.017600059509277344, 0.019260883331298828, 0.020921707153320312, 0.022582530975341797, 0.02424335479736328, 0.025904178619384766, 0.02756500244140625, 0.029225826263427734, 0.03088665008544922, 0.0325474739074707, 0.03420829772949219, 0.03586912155151367, 0.037529945373535156, 0.03919076919555664, 0.040851593017578125, 0.04251241683959961, 0.044173240661621094, 0.04583406448364258, 0.04749488830566406, 0.04915571212768555, 0.05081653594970703, 0.052477359771728516, 0.05413818359375]}, "gradients/encoder.encoder.layers.0.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0, 2.0, 1.0, 2.0, 5.0, 3.0, 5.0, 3.0, 11.0, 6.0, 21.0, 25.0, 16.0, 29.0, 25.0, 35.0, 67.0, 72.0, 50.0, 80.0, 74.0, 86.0, 45.0, 90.0, 62.0, 24.0, 29.0, 37.0, 14.0, 24.0, 11.0, 6.0, 9.0, 11.0, 5.0, 6.0, 2.0, 3.0, 2.0, 4.0, 0.0, 3.0, 1.0, 2.0, 0.0, 3.0, 0.0, 1.0, 0.0, 3.0, 1.0], "bins": [-5.364418029785156e-06, -5.204230546951294e-06, -5.044043064117432e-06, -4.883855581283569e-06, -4.723668098449707e-06, -4.563480615615845e-06, -4.403293132781982e-06, -4.24310564994812e-06, -4.082918167114258e-06, -3.9227306842803955e-06, -3.762543201446533e-06, -3.602355718612671e-06, -3.4421682357788086e-06, -3.2819807529449463e-06, -3.121793270111084e-06, -2.9616057872772217e-06, -2.8014183044433594e-06, -2.641230821609497e-06, -2.4810433387756348e-06, -2.3208558559417725e-06, -2.16066837310791e-06, -2.000480890274048e-06, -1.8402934074401855e-06, -1.6801059246063232e-06, -1.519918441772461e-06, -1.3597309589385986e-06, -1.1995434761047363e-06, -1.039355993270874e-06, -8.791685104370117e-07, -7.189810276031494e-07, -5.587935447692871e-07, -3.986060619354248e-07, -2.384185791015625e-07, -7.82310962677002e-08, 8.195638656616211e-08, 2.421438694000244e-07, 4.023313522338867e-07, 5.62518835067749e-07, 7.227063179016113e-07, 8.828938007354736e-07, 1.043081283569336e-06, 1.2032687664031982e-06, 1.3634562492370605e-06, 1.5236437320709229e-06, 1.6838312149047852e-06, 1.8440186977386475e-06, 2.0042061805725098e-06, 2.164393663406372e-06, 2.3245811462402344e-06, 2.4847686290740967e-06, 2.644956111907959e-06, 2.8051435947418213e-06, 2.9653310775756836e-06, 3.125518560409546e-06, 3.285706043243408e-06, 3.4458935260772705e-06, 3.606081008911133e-06, 3.766268491744995e-06, 3.926455974578857e-06, 4.08664345741272e-06, 4.246830940246582e-06, 4.407018423080444e-06, 4.567205905914307e-06, 4.727393388748169e-06, 4.887580871582031e-06]}, "gradients/encoder.encoder.layers.0.attention.q_proj.weight": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 2.0, 0.0, 2.0, 0.0, 1.0, 0.0, 3.0, 4.0, 5.0, 5.0, 4.0, 3.0, 12.0, 17.0, 17.0, 23.0, 36.0, 61.0, 78.0, 163.0, 299.0, 666.0, 1639.0, 6253.0, 49813.0, 930448.0, 49852.0, 6093.0, 1639.0, 702.0, 299.0, 165.0, 80.0, 51.0, 41.0, 20.0, 23.0, 14.0, 12.0, 8.0, 3.0, 2.0, 2.0, 4.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.0149688720703125, -0.014443635940551758, -0.013918399810791016, -0.013393163681030273, -0.012867927551269531, -0.012342691421508789, -0.011817455291748047, -0.011292219161987305, -0.010766983032226562, -0.01024174690246582, -0.009716510772705078, -0.009191274642944336, -0.008666038513183594, -0.008140802383422852, -0.007615566253662109, -0.007090330123901367, -0.006565093994140625, -0.006039857864379883, -0.005514621734619141, -0.0049893856048583984, -0.004464149475097656, -0.003938913345336914, -0.003413677215576172, -0.0028884410858154297, -0.0023632049560546875, -0.0018379688262939453, -0.0013127326965332031, -0.0007874965667724609, -0.00026226043701171875, 0.00026297569274902344, 0.0007882118225097656, 0.0013134479522705078, 0.00183868408203125, 0.002363920211791992, 0.0028891563415527344, 0.0034143924713134766, 0.003939628601074219, 0.004464864730834961, 0.004990100860595703, 0.005515336990356445, 0.0060405731201171875, 0.00656580924987793, 0.007091045379638672, 0.007616281509399414, 0.008141517639160156, 0.008666753768920898, 0.00919198989868164, 0.009717226028442383, 0.010242462158203125, 0.010767698287963867, 0.01129293441772461, 0.011818170547485352, 0.012343406677246094, 0.012868642807006836, 0.013393878936767578, 0.01391911506652832, 0.014444351196289062, 0.014969587326049805, 0.015494823455810547, 0.01602005958557129, 0.01654529571533203, 0.017070531845092773, 0.017595767974853516, 0.018121004104614258, 0.018646240234375]}, "gradients/encoder.encoder.layers.0.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 2.0, 1.0, 0.0, 0.0, 2.0, 1.0, 2.0, 2.0, 2.0, 4.0, 13.0, 6.0, 11.0, 20.0, 30.0, 59.0, 93.0, 139.0, 198.0, 155.0, 96.0, 65.0, 40.0, 23.0, 15.0, 4.0, 13.0, 6.0, 4.0, 3.0, 1.0, 0.0, 6.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0570068359375, -0.0551457405090332, -0.053284645080566406, -0.05142354965209961, -0.04956245422363281, -0.047701358795166016, -0.04584026336669922, -0.04397916793823242, -0.042118072509765625, -0.04025697708129883, -0.03839588165283203, -0.036534786224365234, -0.03467369079589844, -0.03281259536743164, -0.030951499938964844, -0.029090404510498047, -0.02722930908203125, -0.025368213653564453, -0.023507118225097656, -0.02164602279663086, -0.019784927368164062, -0.017923831939697266, -0.01606273651123047, -0.014201641082763672, -0.012340545654296875, -0.010479450225830078, -0.008618354797363281, -0.006757259368896484, -0.0048961639404296875, -0.0030350685119628906, -0.0011739730834960938, 0.0006871223449707031, 0.0025482177734375, 0.004409313201904297, 0.006270408630371094, 0.00813150405883789, 0.009992599487304688, 0.011853694915771484, 0.013714790344238281, 0.015575885772705078, 0.017436981201171875, 0.019298076629638672, 0.02115917205810547, 0.023020267486572266, 0.024881362915039062, 0.02674245834350586, 0.028603553771972656, 0.030464649200439453, 0.03232574462890625, 0.03418684005737305, 0.036047935485839844, 0.03790903091430664, 0.03977012634277344, 0.041631221771240234, 0.04349231719970703, 0.04535341262817383, 0.047214508056640625, 0.04907560348510742, 0.05093669891357422, 0.052797794342041016, 0.05465888977050781, 0.05651998519897461, 0.058381080627441406, 0.0602421760559082, 0.062103271484375]}, "gradients/encoder.encoder.layers.0.layer_norm.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 3.0, 5.0, 21.0, 41.0, 106.0, 441.0, 275.0, 71.0, 22.0, 14.0, 5.0, 5.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.1393978595733643, -2.045360803604126, -1.9513237476348877, -1.8572866916656494, -1.7632496356964111, -1.6692125797271729, -1.5751755237579346, -1.4811384677886963, -1.387101411819458, -1.2930643558502197, -1.1990272998809814, -1.1049902439117432, -1.0109531879425049, -0.9169161319732666, -0.8228790760040283, -0.72884202003479, -0.6348049640655518, -0.5407679080963135, -0.4467308521270752, -0.3526937961578369, -0.25865674018859863, -0.16461968421936035, -0.07058262825012207, 0.02345442771911621, 0.11749148368835449, 0.21152853965759277, 0.30556559562683105, 0.39960265159606934, 0.4936397075653076, 0.5876767635345459, 0.6817138195037842, 0.7757508754730225, 0.8697876930236816, 0.9638247489929199, 1.0578618049621582, 1.1518988609313965, 1.2459359169006348, 1.339972972869873, 1.4340100288391113, 1.5280470848083496, 1.622084140777588, 1.7161211967468262, 1.8101582527160645, 1.9041953086853027, 1.998232364654541, 2.0922694206237793, 2.1863064765930176, 2.280343532562256, 2.374380588531494, 2.4684176445007324, 2.5624547004699707, 2.656491756439209, 2.7505288124084473, 2.8445658683776855, 2.938602924346924, 3.032639980316162, 3.1266770362854004, 3.2207140922546387, 3.314751148223877, 3.4087882041931152, 3.5028252601623535, 3.596862316131592, 3.69089937210083, 3.7849364280700684, 3.8789734840393066]}, "gradients/encoder.encoder.layers.0.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 2.0, 0.0, 0.0, 4.0, 6.0, 4.0, 6.0, 6.0, 9.0, 8.0, 7.0, 9.0, 15.0, 24.0, 25.0, 33.0, 22.0, 44.0, 40.0, 50.0, 71.0, 60.0, 77.0, 49.0, 62.0, 67.0, 58.0, 53.0, 42.0, 35.0, 19.0, 18.0, 18.0, 13.0, 10.0, 11.0, 5.0, 4.0, 10.0, 6.0, 5.0, 1.0, 5.0, 1.0, 1.0, 0.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.3310737609863281, -1.2886173725128174, -1.2461609840393066, -1.203704595565796, -1.1612482070922852, -1.1187918186187744, -1.0763354301452637, -1.033879041671753, -0.991422712802887, -0.9489663243293762, -0.9065099358558655, -0.8640536069869995, -0.8215972185134888, -0.779140830039978, -0.7366844415664673, -0.6942280530929565, -0.6517716646194458, -0.6093152761459351, -0.5668588876724243, -0.5244024991989136, -0.4819461405277252, -0.4394897520542145, -0.3970333933830261, -0.3545770049095154, -0.31212061643600464, -0.2696642279624939, -0.22720785439014435, -0.1847514808177948, -0.14229509234428406, -0.09983870387077332, -0.05738234519958496, -0.014925956726074219, 0.027530312538146973, 0.06998669356107712, 0.11244307458400726, 0.1548994481563568, 0.19735583662986755, 0.2398122251033783, 0.28226858377456665, 0.3247249722480774, 0.36718136072158813, 0.4096377491950989, 0.4520941376686096, 0.494550496339798, 0.5370068550109863, 0.5794632434844971, 0.6219196319580078, 0.6643760204315186, 0.7068324089050293, 0.74928879737854, 0.7917451858520508, 0.8342015743255615, 0.8766579627990723, 0.919114351272583, 0.961570680141449, 1.0040271282196045, 1.0464835166931152, 1.088939905166626, 1.1313962936401367, 1.1738526821136475, 1.2163090705871582, 1.258765459060669, 1.3012218475341797, 1.3436782360076904, 1.3861345052719116]}, "gradients/encoder.encoder.pos_conv_embed.conv.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 6.0, 3.0, 3.0, 8.0, 8.0, 6.0, 1.0, 18.0, 13.0, 7.0, 18.0, 22.0, 27.0, 20.0, 35.0, 37.0, 52.0, 132.0, 279.0, 78.0, 40.0, 31.0, 30.0, 24.0, 24.0, 18.0, 17.0, 16.0, 10.0, 3.0, 5.0, 5.0, 3.0, 2.0, 3.0, 2.0, 3.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.08447265625, -0.08138370513916016, -0.07829475402832031, -0.07520580291748047, -0.07211685180664062, -0.06902790069580078, -0.06593894958496094, -0.0628499984741211, -0.05976104736328125, -0.056672096252441406, -0.05358314514160156, -0.05049419403076172, -0.047405242919921875, -0.04431629180908203, -0.04122734069824219, -0.038138389587402344, -0.0350494384765625, -0.031960487365722656, -0.028871536254882812, -0.02578258514404297, -0.022693634033203125, -0.01960468292236328, -0.016515731811523438, -0.013426780700683594, -0.01033782958984375, -0.007248878479003906, -0.0041599273681640625, -0.0010709762573242188, 0.002017974853515625, 0.005106925964355469, 0.008195877075195312, 0.011284828186035156, 0.014373779296875, 0.017462730407714844, 0.020551681518554688, 0.02364063262939453, 0.026729583740234375, 0.02981853485107422, 0.03290748596191406, 0.035996437072753906, 0.03908538818359375, 0.042174339294433594, 0.04526329040527344, 0.04835224151611328, 0.051441192626953125, 0.05453014373779297, 0.05761909484863281, 0.060708045959472656, 0.0637969970703125, 0.06688594818115234, 0.06997489929199219, 0.07306385040283203, 0.07615280151367188, 0.07924175262451172, 0.08233070373535156, 0.0854196548461914, 0.08850860595703125, 0.0915975570678711, 0.09468650817871094, 0.09777545928955078, 0.10086441040039062, 0.10395336151123047, 0.10704231262207031, 0.11013126373291016, 0.11322021484375]}, "gradients/encoder.encoder.pos_conv_embed.conv.weight_v": {"_type": "histogram", "values": [1.0, 2.0, 3.0, 4.0, 1.0, 1.0, 1.0, 2.0, 2.0, 1.0, 0.0, 3.0, 2.0, 5.0, 2.0, 4.0, 8.0, 7.0, 8.0, 8.0, 33.0, 31.0, 34.0, 61.0, 97.0, 151.0, 450.0, 1801.0, 8268652.0, 114374.0, 1822.0, 418.0, 211.0, 118.0, 33.0, 50.0, 15.0, 30.0, 23.0, 37.0, 16.0, 8.0, 21.0, 4.0, 1.0, 1.0, 1.0, 8.0, 9.0, 3.0, 0.0, 0.0, 0.0, 0.0, 6.0, 4.0, 2.0, 1.0, 5.0, 0.0, 0.0, 0.0, 2.0, 10.0], "bins": [-0.7305524349212646, -0.7053310871124268, -0.6801096796989441, -0.6548883318901062, -0.6296669244766235, -0.6044455766677856, -0.5792242288589478, -0.5540028810501099, -0.5287814736366272, -0.5035601258277893, -0.47833871841430664, -0.45311737060546875, -0.42789599299430847, -0.4026746153831482, -0.3774532675743103, -0.35223188996315, -0.32701051235198975, -0.30178913474082947, -0.2765677571296692, -0.2513464093208313, -0.22612503170967102, -0.20090365409851074, -0.17568229138851166, -0.15046092867851257, -0.1252395510673523, -0.10001818090677261, -0.07479681074619293, -0.04957544058561325, -0.02435407042503357, 0.000867307186126709, 0.026088669896125793, 0.05131003260612488, 0.07653141021728516, 0.10175278037786484, 0.12697415053844452, 0.1521955132484436, 0.17741689085960388, 0.20263826847076416, 0.22785963118076324, 0.25308099389076233, 0.2783023715019226, 0.3035237491130829, 0.32874512672424316, 0.35396647453308105, 0.37918785214424133, 0.4044092297554016, 0.4296305775642395, 0.4548519551753998, 0.48007333278656006, 0.505294680595398, 0.5305160880088806, 0.5557374358177185, 0.5809588432312012, 0.6061801910400391, 0.631401538848877, 0.6566228866577148, 0.6818442940711975, 0.7070656418800354, 0.7322870492935181, 0.757508397102356, 0.7827297449111938, 0.8079511523246765, 0.8331725001335144, 0.8583939075469971, 0.883615255355835]}, "gradients/encoder.encoder.pos_conv_embed.conv.weight_g": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 3.0, 2.0, 2.0, 2.0, 3.0, 4.0, 3.0, 3.0, 4.0, 3.0, 3.0, 2.0, 2.0, 5.0, 2.0, 1.0, 5.0, 3.0, 1.0, 4.0, 3.0, 0.0, 4.0, 2.0, 6.0, 4.0, 2.0, 4.0, 3.0, 1.0, 1.0, 3.0, 3.0, 3.0, 3.0, 3.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 2.0, 3.0, 1.0, 2.0, 1.0, 1.0, 0.0, 1.0, 2.0, 1.0, 0.0, 1.0], "bins": [-0.48956477642059326, -0.48044586181640625, -0.47132694721221924, -0.46220800280570984, -0.4530890882015228, -0.4439701735973358, -0.4348512589931488, -0.4257323443889618, -0.4166133999824524, -0.4074944853782654, -0.39837557077407837, -0.38925662636756897, -0.38013771176338196, -0.37101879715919495, -0.36189988255500793, -0.3527809679508209, -0.3436620533466339, -0.3345431387424469, -0.3254242241382599, -0.3163052797317505, -0.3071863651275635, -0.29806745052337646, -0.28894853591918945, -0.27982962131500244, -0.27071070671081543, -0.2615917921066284, -0.2524728775024414, -0.2433539479970932, -0.234235018491745, -0.22511610388755798, -0.21599718928337097, -0.20687827467918396, -0.19775933027267456, -0.18864041566848755, -0.17952148616313934, -0.17040257155895233, -0.16128364205360413, -0.15216472744941711, -0.1430458128452301, -0.1339268982410431, -0.12480796128511429, -0.11568903923034668, -0.10657012462615967, -0.09745120257139206, -0.08833228051662445, -0.07921335846185684, -0.07009443640708923, -0.06097552180290222, -0.05185659974813461, -0.042737677693367004, -0.033618759363889694, -0.024499839171767235, -0.015380918979644775, -0.006261996924877167, 0.0028569214046001434, 0.011975839734077454, 0.021094761788845062, 0.03021368198096752, 0.03933260217308998, 0.04845152050256729, 0.0575704425573349, 0.06668936461210251, 0.07580828666687012, 0.08492720127105713, 0.09404612332582474]}, "gradients/encoder.feature_projection.projection.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 4.0, 5.0, 6.0, 10.0, 21.0, 37.0, 49.0, 65.0, 103.0, 172.0, 276.0, 490.0, 1061.0, 2568.0, 7811.0, 35270.0, 249615.0, 189770.0, 26761.0, 6336.0, 2057.0, 803.0, 344.0, 173.0, 101.0, 76.0, 56.0, 35.0, 34.0, 33.0, 21.0, 21.0, 14.0, 9.0, 9.0, 10.0, 6.0, 5.0, 5.0, 6.0, 8.0, 2.0, 4.0, 5.0, 1.0, 3.0, 2.0, 1.0, 0.0, 3.0, 1.0, 3.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.299560546875, -0.2875404357910156, -0.27552032470703125, -0.2635002136230469, -0.2514801025390625, -0.23945999145507812, -0.22743988037109375, -0.21541976928710938, -0.203399658203125, -0.19137954711914062, -0.17935943603515625, -0.16733932495117188, -0.1553192138671875, -0.14329910278320312, -0.13127899169921875, -0.11925888061523438, -0.10723876953125, -0.09521865844726562, -0.08319854736328125, -0.07117843627929688, -0.0591583251953125, -0.047138214111328125, -0.03511810302734375, -0.023097991943359375, -0.011077880859375, 0.000942230224609375, 0.01296234130859375, 0.024982452392578125, 0.0370025634765625, 0.049022674560546875, 0.06104278564453125, 0.07306289672851562, 0.0850830078125, 0.09710311889648438, 0.10912322998046875, 0.12114334106445312, 0.1331634521484375, 0.14518356323242188, 0.15720367431640625, 0.16922378540039062, 0.181243896484375, 0.19326400756835938, 0.20528411865234375, 0.21730422973632812, 0.2293243408203125, 0.24134445190429688, 0.25336456298828125, 0.2653846740722656, 0.27740478515625, 0.2894248962402344, 0.30144500732421875, 0.3134651184082031, 0.3254852294921875, 0.3375053405761719, 0.34952545166015625, 0.3615455627441406, 0.373565673828125, 0.3855857849121094, 0.39760589599609375, 0.4096260070800781, 0.4216461181640625, 0.4336662292480469, 0.44568634033203125, 0.4577064514160156, 0.4697265625]}, "gradients/encoder.feature_projection.projection.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 4.0, 3.0, 6.0, 11.0, 24.0, 43.0, 45.0, 124.0, 187.0, 216.0, 161.0, 87.0, 43.0, 30.0, 18.0, 7.0, 3.0, 3.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.2161865234375, -0.2049732208251953, -0.19375991821289062, -0.18254661560058594, -0.17133331298828125, -0.16012001037597656, -0.14890670776367188, -0.1376934051513672, -0.1264801025390625, -0.11526679992675781, -0.10405349731445312, -0.09284019470214844, -0.08162689208984375, -0.07041358947753906, -0.059200286865234375, -0.04798698425292969, -0.036773681640625, -0.025560379028320312, -0.014347076416015625, -0.0031337738037109375, 0.00807952880859375, 0.019292831420898438, 0.030506134033203125, 0.04171943664550781, 0.0529327392578125, 0.06414604187011719, 0.07535934448242188, 0.08657264709472656, 0.09778594970703125, 0.10899925231933594, 0.12021255493164062, 0.1314258575439453, 0.14263916015625, 0.1538524627685547, 0.16506576538085938, 0.17627906799316406, 0.18749237060546875, 0.19870567321777344, 0.20991897583007812, 0.2211322784423828, 0.2323455810546875, 0.2435588836669922, 0.2547721862792969, 0.26598548889160156, 0.27719879150390625, 0.28841209411621094, 0.2996253967285156, 0.3108386993408203, 0.322052001953125, 0.3332653045654297, 0.3444786071777344, 0.35569190979003906, 0.36690521240234375, 0.37811851501464844, 0.3893318176269531, 0.4005451202392578, 0.4117584228515625, 0.4229717254638672, 0.4341850280761719, 0.44539833068847656, 0.45661163330078125, 0.46782493591308594, 0.4790382385253906, 0.4902515411376953, 0.50146484375]}, "gradients/encoder.feature_projection.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 2.0, 2.0, 2.0, 2.0, 2.0, 5.0, 8.0, 6.0, 8.0, 19.0, 38.0, 162.0, 127.0, 65.0, 14.0, 13.0, 3.0, 4.0, 5.0, 3.0, 2.0, 2.0, 1.0, 1.0, 1.0, 3.0, 3.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0], "bins": [-1.1812152862548828, -1.1527118682861328, -1.1242084503173828, -1.0957050323486328, -1.0672017335891724, -1.0386983156204224, -1.0101948976516724, -0.9816914796829224, -0.9531880617141724, -0.9246846437454224, -0.8961812853813171, -0.8676778674125671, -0.8391744494438171, -0.8106710910797119, -0.7821676731109619, -0.7536642551422119, -0.7251608371734619, -0.6966574192047119, -0.6681540608406067, -0.6396506428718567, -0.6111472249031067, -0.5826438665390015, -0.5541404485702515, -0.5256370306015015, -0.49713367223739624, -0.46863028407096863, -0.44012686610221863, -0.411623477935791, -0.383120059967041, -0.3546166718006134, -0.3261132836341858, -0.2976098656654358, -0.2691064476966858, -0.24060304462909698, -0.21209964156150818, -0.18359625339508057, -0.15509283542633057, -0.12658944725990295, -0.09808604419231415, -0.06958264112472534, -0.041079238057136536, -0.012575836852192879, 0.015927564352750778, 0.044430963695049286, 0.07293436676263809, 0.1014377623796463, 0.1299411654472351, 0.1584445685148239, 0.18694797158241272, 0.21545137465000153, 0.24395477771759033, 0.27245816588401794, 0.30096158385276794, 0.32946497201919556, 0.35796838998794556, 0.38647177815437317, 0.4149751663208008, 0.4434785544872284, 0.4719819724559784, 0.500485360622406, 0.528988778591156, 0.5574921369552612, 0.5859955549240112, 0.6144989728927612, 0.6430023908615112]}, "gradients/encoder.feature_projection.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 2.0, 1.0, 6.0, 2.0, 3.0, 2.0, 0.0, 4.0, 4.0, 5.0, 14.0, 7.0, 14.0, 43.0, 101.0, 134.0, 87.0, 21.0, 9.0, 5.0, 11.0, 4.0, 1.0, 3.0, 3.0, 2.0, 2.0, 1.0, 2.0, 1.0, 1.0, 4.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-1.227846622467041, -1.1911040544509888, -1.1543614864349365, -1.1176187992095947, -1.0808762311935425, -1.0441336631774902, -1.007391095161438, -0.9706485271453857, -0.9339059591293335, -0.8971633911132812, -0.8604207634925842, -0.823678195476532, -0.7869356274604797, -0.7501929998397827, -0.7134504318237305, -0.6767078638076782, -0.6399652361869812, -0.603222668170929, -0.5664800405502319, -0.5297374725341797, -0.49299490451812744, -0.4562523066997528, -0.4195097088813782, -0.3827671408653259, -0.3460245430469513, -0.30928194522857666, -0.2725393772125244, -0.23579677939414978, -0.19905419647693634, -0.1623116135597229, -0.12556901574134827, -0.08882644772529602, -0.05208384990692139, -0.015341263264417648, 0.02140132337808609, 0.05814391374588013, 0.09488649666309357, 0.131629079580307, 0.16837167739868164, 0.2051142454147339, 0.24185684323310852, 0.27859944105148315, 0.3153420090675354, 0.35208460688591003, 0.38882720470428467, 0.4255697727203369, 0.46231237053871155, 0.4990549385547638, 0.5357975363731384, 0.5725401043891907, 0.6092827320098877, 0.6460253000259399, 0.6827678680419922, 0.7195104360580444, 0.7562530636787415, 0.7929956316947937, 0.8297382593154907, 0.866480827331543, 0.90322345495224, 0.9399660229682922, 0.9767085909843445, 1.0134512186050415, 1.0501937866210938, 1.086936354637146, 1.1236789226531982]}, "eval/loss": 5.980778694152832, "eval/wer": 1.7240975803252678, "eval/runtime": 972.2623, "eval/samples_per_second": 2.717, "eval/steps_per_second": 0.34} \ No newline at end of file diff --git a/wandb/run-20220303_004520-25bnjrx1/logs/debug-internal.log b/wandb/run-20220303_004520-25bnjrx1/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..43c74f663c2e232cbf4ce9acb54b238a4b8153a8 --- /dev/null +++ b/wandb/run-20220303_004520-25bnjrx1/logs/debug-internal.log @@ -0,0 +1,6372 @@ +2022-03-03 00:45:21,349 INFO MainThread:267739 [internal.py:wandb_internal():89] W&B internal server running at pid: 267739, started at: 2022-03-03 00:45:21.349397 +2022-03-03 00:45:21,351 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: check_version +2022-03-03 00:45:21,352 INFO WriterThread:267739 [datastore.py:open_for_write():77] open: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/run-25bnjrx1.wandb +2022-03-03 00:45:21,354 DEBUG SenderThread:267739 [sender.py:send():235] send: header +2022-03-03 00:45:21,354 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: check_version +2022-03-03 00:45:21,420 DEBUG SenderThread:267739 [sender.py:send():235] send: run +2022-03-03 00:45:21,549 INFO SenderThread:267739 [dir_watcher.py:__init__():169] watching files in: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files +2022-03-03 00:45:21,549 INFO SenderThread:267739 [sender.py:_start_run_threads():809] run started: 25bnjrx1 with start time 1646268320 +2022-03-03 00:45:21,549 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:45:21,549 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:45:21,550 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: run_start +2022-03-03 00:45:21,555 DEBUG HandlerThread:267739 [meta.py:__init__():36] meta init +2022-03-03 00:45:21,555 DEBUG HandlerThread:267739 [meta.py:__init__():50] meta init done +2022-03-03 00:45:21,555 DEBUG HandlerThread:267739 [meta.py:probe():210] probe +2022-03-03 00:45:21,561 DEBUG HandlerThread:267739 [meta.py:_setup_git():200] setup git +2022-03-03 00:45:21,577 DEBUG HandlerThread:267739 [meta.py:_setup_git():207] setup git done +2022-03-03 00:45:21,577 DEBUG HandlerThread:267739 [meta.py:_save_pip():54] save pip +2022-03-03 00:45:21,578 DEBUG HandlerThread:267739 [meta.py:_save_pip():68] save pip done +2022-03-03 00:45:21,578 DEBUG HandlerThread:267739 [meta.py:probe():248] probe done +2022-03-03 00:45:21,659 DEBUG SenderThread:267739 [sender.py:send():235] send: files +2022-03-03 00:45:21,659 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-metadata.json with policy now +2022-03-03 00:45:21,664 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:45:21,665 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:45:21,738 DEBUG SenderThread:267739 [sender.py:send():235] send: config +2022-03-03 00:45:21,740 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:45:21,740 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:45:21,740 WARNING SenderThread:267739 [sender.py:send_metric():902] Seen metric with glob (shouldnt happen) +2022-03-03 00:45:21,965 INFO Thread-11 :267739 [upload_job.py:push():137] Uploaded file /tmp/tmp4j4apmt4wandb/3uqmjuao-wandb-metadata.json +2022-03-03 00:45:22,551 INFO Thread-8 :267739 [dir_watcher.py:_on_file_created():217] file/dir created: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-metadata.json +2022-03-03 00:45:22,551 INFO Thread-8 :267739 [dir_watcher.py:_on_file_created():217] file/dir created: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:45:22,552 INFO Thread-8 :267739 [dir_watcher.py:_on_file_created():217] file/dir created: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/requirements.txt +2022-03-03 00:45:22,552 INFO Thread-8 :267739 [dir_watcher.py:_on_file_created():217] file/dir created: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:45:24,550 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:45:25,959 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:45:25,959 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:45:25,959 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:45:25,959 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:45:25,959 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:45:25,961 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:45:26,551 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:45:26,551 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:45:28,552 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:45:29,787 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:45:29,788 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:45:29,788 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:45:30,553 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:45:30,553 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:45:32,553 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:45:33,786 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:45:33,786 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:45:33,787 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:45:34,554 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:45:34,554 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:45:36,555 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:45:36,912 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:45:36,912 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:45:37,505 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:45:37,506 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:45:37,506 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:45:37,555 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:45:38,555 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:45:40,556 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:45:41,262 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:45:41,263 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:45:41,263 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:45:41,557 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:45:42,557 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:45:44,558 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:45:44,976 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:45:44,977 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:45:44,977 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:45:45,558 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:45:46,559 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:45:48,559 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:45:49,089 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:45:49,090 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:45:49,090 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:45:49,560 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:45:50,052 DEBUG SenderThread:267739 [sender.py:send():235] send: stats +2022-03-03 00:45:50,560 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:45:52,004 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:45:52,004 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:45:52,581 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/config.yaml +2022-03-03 00:45:52,581 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:45:53,461 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:45:53,461 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:45:53,462 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:45:53,581 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:45:54,582 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:45:56,583 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:45:57,093 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:45:57,093 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:45:57,094 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:45:57,583 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:45:58,583 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:46:00,584 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:46:00,676 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:46:00,677 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:46:00,677 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:46:01,584 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:46:02,585 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:46:03,585 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:46:04,257 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:46:04,258 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:46:04,258 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:46:04,586 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:46:05,586 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:46:07,183 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:46:07,183 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:46:07,587 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:46:07,770 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:46:07,770 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:46:07,771 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:46:08,588 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:46:09,588 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:46:11,298 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:46:11,299 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:46:11,299 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:46:11,589 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:46:11,589 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:46:13,590 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:46:14,770 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:46:14,771 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:46:14,771 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:46:15,590 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:46:15,591 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:46:17,591 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:46:18,242 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:46:18,243 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:46:18,243 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:46:18,592 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:46:19,592 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:46:20,461 DEBUG SenderThread:267739 [sender.py:send():235] send: stats +2022-03-03 00:46:21,593 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:46:21,676 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:46:21,676 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:46:21,677 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:46:22,375 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:46:22,375 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:46:22,593 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:46:23,593 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:46:25,128 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:46:25,128 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:46:25,129 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:46:25,594 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:46:25,595 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:46:27,595 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:46:28,501 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:46:28,501 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:46:28,502 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:46:28,596 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:46:29,596 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:46:31,597 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:46:31,885 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:46:31,885 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:46:31,886 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:46:32,597 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:46:33,597 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:46:35,235 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:46:35,235 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:46:35,236 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:46:35,598 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:46:35,598 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:46:37,523 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:46:37,523 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:46:37,599 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:46:38,582 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:46:38,582 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:46:38,583 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:46:38,599 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:46:39,600 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:46:41,600 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:46:41,973 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:46:41,974 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:46:41,974 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:46:42,601 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:46:43,601 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:46:45,322 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:46:45,323 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:46:45,323 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:46:45,602 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:46:45,602 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:46:47,602 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:46:48,600 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:46:48,600 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:46:48,600 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:46:48,603 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:46:49,603 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:46:50,604 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:46:50,881 DEBUG SenderThread:267739 [sender.py:send():235] send: stats +2022-03-03 00:46:51,604 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:46:51,845 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:46:51,845 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:46:51,846 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:46:52,577 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:46:52,578 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:46:52,604 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:46:54,605 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:46:55,054 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:46:55,055 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:46:55,055 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:46:55,605 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:46:56,606 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:46:58,281 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:46:58,282 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:46:58,282 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:46:58,607 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:46:58,607 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:47:00,607 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:47:01,564 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:47:01,564 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:47:01,565 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:47:01,608 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:47:02,608 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:47:04,609 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:47:04,833 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:47:04,834 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:47:04,835 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:47:05,609 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:47:06,610 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:47:07,618 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:47:07,619 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:47:08,024 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:47:08,024 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:47:08,025 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:47:08,611 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:47:08,611 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:47:10,611 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:47:11,193 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:47:11,194 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:47:11,194 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:47:11,612 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:47:12,612 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:47:14,320 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:47:14,320 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:47:14,321 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:47:14,613 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:47:14,614 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:47:16,614 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:47:17,350 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:47:17,351 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:47:17,351 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:47:17,615 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:47:18,615 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:47:20,335 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:47:20,335 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:47:20,335 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:47:20,616 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:47:20,616 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:47:21,300 DEBUG SenderThread:267739 [sender.py:send():235] send: stats +2022-03-03 00:47:22,617 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:47:22,678 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:47:22,678 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:47:23,279 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:47:23,279 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:47:23,280 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:47:23,617 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:47:24,618 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:47:26,217 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:47:26,218 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:47:26,218 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:47:26,619 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:47:26,619 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:47:28,619 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:47:29,042 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:47:29,042 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:47:29,043 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:47:29,620 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:47:30,620 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:47:31,858 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:47:31,858 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:47:31,859 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:47:32,621 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:47:32,621 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:47:34,577 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:47:34,577 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:47:34,578 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:47:34,622 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:47:34,622 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:47:36,623 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:47:37,218 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:47:37,218 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:47:37,219 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:47:37,623 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:47:37,739 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:47:37,739 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:47:38,624 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:47:39,801 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:47:39,801 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:47:39,802 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:47:40,624 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:47:40,625 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:47:42,239 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:47:42,240 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:47:42,240 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:47:42,625 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:47:42,625 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:47:44,558 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:47:44,559 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:47:44,559 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:47:44,626 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:47:44,626 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:47:45,626 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:47:46,627 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:47:46,723 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:47:46,724 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:47:46,724 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:47:47,627 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:47:47,627 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:47:48,627 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:47:48,723 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:47:48,723 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:47:48,724 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:47:49,628 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:47:50,461 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:47:50,461 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:47:50,463 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:47:50,628 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:47:50,628 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:47:51,629 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:47:51,687 DEBUG SenderThread:267739 [sender.py:send():235] send: stats +2022-03-03 00:47:52,037 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:47:52,037 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:47:52,038 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:47:52,629 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:47:52,629 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:47:52,835 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:47:52,836 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:47:53,444 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:47:53,444 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:47:53,445 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:47:53,630 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:47:53,630 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:47:54,630 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:47:54,688 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:47:54,688 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:47:54,688 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:47:55,631 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:47:55,631 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:47:56,492 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,498 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,503 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,508 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,509 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,509 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,509 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,509 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,509 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,509 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,509 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,515 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,515 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,515 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,515 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,515 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,515 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,520 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,526 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,531 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,537 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,537 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,537 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,537 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,537 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,537 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,537 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,537 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,537 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,537 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,537 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,537 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,538 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,538 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,538 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,538 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,538 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,538 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,538 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,538 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,538 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,538 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,544 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,557 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,563 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,563 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,563 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,563 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,563 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,563 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,563 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,563 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,564 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,564 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,564 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,569 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,569 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,569 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,575 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,575 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,575 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,575 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,575 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,575 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,580 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,586 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,591 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,596 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,597 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,597 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,597 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,597 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,597 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,597 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,608 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,613 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,618 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,624 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,629 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,629 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,629 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,630 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,630 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,630 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,630 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,630 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,630 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,630 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,630 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,641 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,641 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,641 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,641 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,641 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,641 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,641 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,641 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,641 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,642 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,642 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,642 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,642 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,642 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,642 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,642 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,642 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,642 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,642 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,642 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,643 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,643 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,643 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,643 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,643 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,643 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,643 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,643 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,643 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,643 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,643 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,643 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,644 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,644 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,644 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,644 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,644 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,644 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,644 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,644 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,644 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,644 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,644 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,644 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,645 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,645 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,645 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,645 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,645 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,645 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,645 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,645 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,645 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,645 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,645 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,645 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,645 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,646 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,646 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,646 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,646 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,646 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,646 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,646 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,646 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,646 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,646 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,646 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,655 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,655 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,655 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,655 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,655 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,656 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,656 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,656 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,656 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,656 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,656 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,656 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,656 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,656 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,656 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,656 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,657 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,657 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,657 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,657 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,657 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,657 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,657 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,657 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,657 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,657 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,658 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,658 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,658 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,658 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,658 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,658 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,658 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,658 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,658 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,658 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,659 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,659 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,659 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,659 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,659 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,659 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,659 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,659 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,659 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,660 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,660 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:47:56,660 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,660 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,660 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,660 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,660 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,660 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,661 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,661 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,661 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,661 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,661 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,661 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,661 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,661 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,661 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,661 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,662 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,662 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,662 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,662 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,662 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,662 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,662 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,663 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,663 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,663 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,663 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,663 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,663 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,663 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,663 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,663 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,663 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,663 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,664 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,664 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,664 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,664 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,664 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,664 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,664 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,664 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,664 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,664 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,664 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,665 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,665 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,665 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,665 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,665 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,665 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,665 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,665 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,665 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,665 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,665 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,666 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,666 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,666 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,666 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,666 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,666 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,666 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,666 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,666 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,666 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,666 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,667 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,667 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,667 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,667 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,667 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,667 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,667 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,667 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,667 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,667 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,667 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,668 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,668 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,668 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,668 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,668 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,668 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,668 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,668 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,668 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,668 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,668 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,669 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,669 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,669 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,669 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,669 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,669 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,669 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,669 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,669 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,669 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,670 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,670 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,670 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,670 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,670 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,670 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,670 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,670 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,670 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,670 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,670 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,671 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,671 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,671 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,671 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,671 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,671 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,671 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,671 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,671 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,671 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,671 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,672 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,672 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,672 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,672 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,672 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,672 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,672 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,672 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,672 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,672 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,672 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,672 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,672 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,672 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,672 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,673 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,673 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,673 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,673 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,673 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,673 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,673 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,673 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,673 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,673 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,673 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,673 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,673 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,673 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,673 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,674 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,674 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,674 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,674 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,674 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,674 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,674 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,674 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,674 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,674 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,674 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,674 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,674 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,674 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,675 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,675 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,675 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,675 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,675 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,675 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,675 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,675 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,675 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,675 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,675 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,675 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,675 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,675 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,675 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,675 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,676 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,676 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,676 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,676 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,676 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,676 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,676 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,676 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,676 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,676 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,676 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,676 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,676 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,676 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,677 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,677 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,677 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,677 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,677 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,677 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,677 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,677 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,677 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,677 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,677 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,677 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,677 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,677 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,677 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,677 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,678 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,678 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,678 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,678 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,679 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,679 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,679 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,679 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,679 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,679 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,679 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,679 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,680 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,680 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,680 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,680 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,680 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,680 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,680 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,680 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,680 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,680 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,680 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,680 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,681 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,681 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,681 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,681 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,681 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,681 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,681 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,681 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,681 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,681 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,681 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,681 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,681 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,681 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,682 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,682 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,682 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,682 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,682 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,682 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,682 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,682 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,682 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,682 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,682 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,682 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,682 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,682 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,683 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,683 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,683 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,683 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,683 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,683 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,683 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,683 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,683 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,683 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,683 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,683 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,683 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,683 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,683 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,684 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,684 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,684 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,684 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,684 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,684 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,684 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,684 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,684 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,684 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,684 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,684 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,684 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,684 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,684 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,685 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,685 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,685 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,685 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,685 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,685 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,685 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,685 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,685 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,685 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,685 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,685 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,685 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,685 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,685 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,686 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,686 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,686 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,686 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,686 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,686 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,686 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,686 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,686 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,686 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,686 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,686 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,686 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,686 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,686 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,687 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,687 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,687 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,687 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,687 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,687 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,687 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,687 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,687 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,687 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,687 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,687 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,687 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,687 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,687 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,687 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,688 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,688 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,688 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,688 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,688 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,688 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,688 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,688 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,688 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,688 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,688 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,688 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,688 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,688 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,688 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,689 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,689 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,689 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,689 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,689 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,689 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,689 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,689 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,689 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,689 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,689 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,689 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,689 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,689 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,690 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,690 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,690 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,690 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,690 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,690 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,690 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,690 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,690 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,690 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,690 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,690 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,690 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,690 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,691 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,691 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,691 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,691 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,691 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,691 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,691 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,691 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,691 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,691 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,691 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,691 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,691 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,691 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,691 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,692 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,692 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,692 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,692 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,692 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,692 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,692 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,692 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,692 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,692 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,692 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,692 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,692 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,692 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,692 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,693 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,693 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,693 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,693 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,693 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,693 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,693 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,693 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,693 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,693 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,693 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,693 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,693 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,693 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,693 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,694 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,694 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,694 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,694 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,694 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,694 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,694 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,694 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,694 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,694 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,694 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,694 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,694 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,694 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,694 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,695 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,695 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,695 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,695 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,695 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,695 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,695 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,695 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,695 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,695 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,695 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,695 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,695 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,695 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,695 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,695 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,696 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,696 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,696 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,696 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,696 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,696 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,696 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,696 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,696 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,696 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,696 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,696 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,696 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,696 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,696 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,696 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,697 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,697 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,697 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,697 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,697 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,697 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,697 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,697 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,697 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,697 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,697 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,697 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,697 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,697 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,697 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,698 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,698 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,698 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,698 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,698 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,698 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,698 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,698 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,698 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,698 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,698 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,698 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,698 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,698 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,698 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,699 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,699 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,699 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,699 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,699 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,699 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,699 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,699 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,699 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,699 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,699 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,699 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,699 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,699 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,699 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,700 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,700 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,700 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,700 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,700 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,700 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,700 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,700 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,700 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,700 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,700 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,700 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,700 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,700 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,700 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,700 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,701 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,701 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,701 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,701 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,701 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,701 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,701 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,701 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,701 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,701 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,701 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,701 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,701 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,701 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,701 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,702 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,702 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,702 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,702 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,702 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,702 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,702 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,702 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,702 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,702 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,702 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,702 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,702 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,702 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,703 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,703 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,703 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,703 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,703 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,703 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,703 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,703 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,703 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,703 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,703 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,703 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,703 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,703 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,704 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,704 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,704 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,704 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,704 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,704 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,704 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,704 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,704 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,704 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,704 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,704 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,704 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,704 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,704 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,704 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,705 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,705 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,705 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,705 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,705 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,705 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,705 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,705 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,705 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,705 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,705 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,705 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,705 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,705 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,705 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,705 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,705 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,706 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,706 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,706 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,706 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,706 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,706 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,706 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,706 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,706 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,706 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,706 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,706 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,706 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,706 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,706 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,706 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,707 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,707 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,707 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,707 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,707 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,707 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,707 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,707 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,707 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,707 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,707 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,707 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,707 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,707 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,707 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,707 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,708 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,708 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,708 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,708 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,708 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,708 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,708 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,708 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,708 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,708 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,708 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,708 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,708 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,708 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,708 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,709 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,709 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,709 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,709 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,709 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,709 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,709 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,709 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,709 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,709 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,709 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,709 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,709 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,709 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,709 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,709 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,710 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,710 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,710 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,710 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,710 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,710 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,710 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,710 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,710 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,710 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,710 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,710 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,710 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,710 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,711 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,711 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,711 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,711 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,711 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,711 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,711 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,711 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,711 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,711 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,711 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,711 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,711 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,711 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,711 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,711 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,711 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,711 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,712 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,712 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,712 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,712 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,712 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,712 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,712 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,712 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,712 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,712 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,712 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,712 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,712 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,712 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,712 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,713 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,713 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,713 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,713 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,713 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,713 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,713 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,713 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,713 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,713 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,713 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,713 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,713 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,713 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,713 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,713 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,713 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,714 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,714 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,714 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,714 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,714 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,714 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,714 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,714 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,714 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,714 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,714 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,714 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,714 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,714 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,714 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,715 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,715 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,715 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,715 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,715 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,715 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,715 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,715 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,715 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,715 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,715 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,715 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,715 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,715 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,715 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,715 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,716 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,716 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,716 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,716 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,716 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,716 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,716 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,716 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,716 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,716 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,716 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,716 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,716 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,716 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,716 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,716 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,717 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,717 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,717 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,717 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,717 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,717 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,717 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,717 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,717 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,717 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,717 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,717 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,717 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,717 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,717 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,717 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,718 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,718 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,718 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,718 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,718 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,718 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,718 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,718 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,718 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,718 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,718 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,718 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,718 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,718 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,718 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,718 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,718 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,719 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,719 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,719 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,719 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,719 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,719 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,719 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,719 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,719 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,719 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,719 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,719 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,719 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,719 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,719 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,719 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,720 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,720 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,720 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,720 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,720 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,720 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,720 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,720 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,720 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,720 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,720 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,720 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,720 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,720 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,720 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,721 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,721 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,721 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,721 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,721 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,721 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,721 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,721 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,721 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,721 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,721 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,721 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,721 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,721 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,721 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,721 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,722 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,722 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,722 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,722 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,722 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,722 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,722 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,722 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,722 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,722 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,722 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,722 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,722 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,722 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,722 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,722 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,723 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,723 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,723 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,723 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,723 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,723 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,723 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,723 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,723 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,723 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,723 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,723 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,723 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,723 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,723 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,724 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,724 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,724 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,724 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,724 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,724 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,724 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,724 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,724 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,724 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,724 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,724 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,724 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,724 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,724 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,724 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,725 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,725 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,725 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,725 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,725 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,725 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,725 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,725 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,725 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,725 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,725 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,725 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,725 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,725 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,726 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,726 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,726 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,726 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,726 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,726 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,726 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,726 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,726 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,726 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,726 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,726 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,726 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,726 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,726 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,727 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,727 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,727 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,727 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,727 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,727 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,727 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,727 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,727 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,727 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,727 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,727 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,727 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,727 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,727 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,727 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,728 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,728 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,728 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,728 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,728 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,728 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,728 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,728 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,728 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,728 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,728 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,728 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,728 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,728 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,728 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,728 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,728 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,729 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,729 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,729 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,729 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,729 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,729 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,729 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,729 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,729 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,729 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,729 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,729 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,729 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,729 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,729 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,730 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,730 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,730 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,730 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,730 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,730 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,730 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,730 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,730 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,730 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,730 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,730 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,730 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,730 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,730 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,731 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,731 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,731 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,731 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,731 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,731 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,731 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,731 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,731 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,731 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,731 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,731 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,731 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,731 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,731 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,731 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,731 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,732 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,732 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,732 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,732 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,732 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,732 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,732 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,732 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,732 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,732 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,732 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,732 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,732 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,732 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,732 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,732 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,733 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,733 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,733 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,733 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,733 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,733 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,733 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,733 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,733 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,733 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,733 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,733 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,733 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,733 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,733 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,733 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,733 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,734 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,734 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,734 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,734 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,734 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,734 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,734 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,734 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,734 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,734 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,734 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,734 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,734 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,734 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,734 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,735 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,735 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,735 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,735 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,735 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,735 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,735 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,735 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,735 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,735 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,735 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,735 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,735 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,735 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,735 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,735 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,736 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,736 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,736 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,736 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,736 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,736 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,736 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,736 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,736 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,736 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,736 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,736 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,736 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,736 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,736 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,736 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,737 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,737 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,737 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,737 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,737 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,737 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,737 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,737 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,737 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,737 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,737 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,737 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,737 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,737 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,737 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,737 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,738 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,738 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,738 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,738 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,738 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,738 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,738 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,738 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,738 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,738 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,738 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,738 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,738 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,738 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,738 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,739 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,739 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,739 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,739 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,739 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,739 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,739 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,739 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,739 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,739 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,739 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,740 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,740 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,740 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,740 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,740 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,740 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,740 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,740 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,740 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,740 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,741 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,741 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,741 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,741 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,741 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,741 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,741 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,741 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,741 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,741 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,741 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,741 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,741 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,741 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,741 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,742 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,742 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,742 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,742 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,742 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,742 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,742 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,742 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,742 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,742 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,742 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,742 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,742 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,742 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,742 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,743 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,743 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,743 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,743 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,743 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,743 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,743 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,743 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,743 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,743 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,743 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,743 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,743 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,743 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,743 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,743 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,743 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,744 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,744 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,744 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,744 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,744 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,744 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,744 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,744 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,744 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,744 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,744 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,744 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,744 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,744 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,744 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,744 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,744 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,745 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,745 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,745 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,745 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,745 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,745 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,745 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,745 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,745 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,745 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,745 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,745 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,745 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,745 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,745 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,745 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,746 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,746 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,746 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,746 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,746 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,746 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,746 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,746 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,746 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,746 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,746 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,746 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,746 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,746 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,746 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,746 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,746 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,747 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,747 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,747 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,747 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,747 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,747 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,747 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,747 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,747 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,747 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,747 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,747 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,747 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,747 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,747 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,747 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,748 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,748 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,748 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,748 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,748 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,748 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,748 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,748 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,748 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,748 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,748 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,748 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,748 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,748 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,748 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,748 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,749 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,749 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,749 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,749 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,749 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,749 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,749 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,749 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,749 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,749 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,749 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,749 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,749 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,749 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,749 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,750 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,750 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,750 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,750 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,750 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,750 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,750 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,750 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,750 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,750 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,750 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,750 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,750 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,750 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,750 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,751 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,751 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,751 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,751 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,751 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,751 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,751 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,751 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,751 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,751 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,751 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,751 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,751 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,751 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,751 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,751 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,752 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,752 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,752 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,752 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,752 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,752 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,752 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,752 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,752 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,752 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,752 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,752 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,752 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,752 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,752 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,752 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,752 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,753 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,753 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,753 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,753 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,753 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,753 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,753 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,753 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,753 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,753 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,753 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,753 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,753 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,753 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,753 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,753 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,754 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,754 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,754 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,754 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,754 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,754 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,754 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,754 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,754 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,754 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,754 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,754 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,754 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,754 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,754 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,754 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,755 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,755 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,755 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,755 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,755 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,755 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,755 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,755 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,755 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,755 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,755 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,755 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,755 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,755 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,755 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,755 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,755 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,755 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,756 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,756 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,756 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,756 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,756 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,756 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,756 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,756 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,756 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,756 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,756 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,756 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,756 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,756 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,756 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,756 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,756 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,757 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,757 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,757 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,757 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,757 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,757 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,757 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,757 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,757 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,757 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,757 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,757 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,757 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,757 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,757 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,757 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,758 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,758 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,758 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,758 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,758 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,758 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,758 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,758 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,758 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,758 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,758 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,758 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,758 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,758 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,758 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,758 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,759 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,759 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,759 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,759 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,759 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,759 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,759 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,759 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,759 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,759 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,759 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,759 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,759 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,759 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,759 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,759 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,760 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,760 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,760 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,760 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,760 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,760 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,760 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,760 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,760 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,760 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,760 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,760 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,760 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,760 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,760 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,760 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,761 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,761 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,761 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,761 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,761 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,761 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,761 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,761 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,761 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,761 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,761 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,761 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,761 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,761 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,761 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,761 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,762 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,762 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,762 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,762 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,762 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,762 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,762 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,762 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,762 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,762 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,762 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,762 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,762 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,762 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,762 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,762 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,763 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,763 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,763 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,763 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,763 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,763 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,763 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,763 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,763 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,763 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,763 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,763 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,763 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,763 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,763 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,763 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,764 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,764 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,764 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,764 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,764 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,764 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,764 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,764 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,764 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,764 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,764 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,764 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,764 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,764 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,764 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,764 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,764 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,765 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,765 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,765 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,765 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,765 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,765 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,765 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,765 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,765 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,765 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,765 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,765 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,765 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,765 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,765 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,765 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,765 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,766 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,766 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,766 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,766 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,766 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,766 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,766 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,766 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,766 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,766 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,766 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,766 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,766 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,766 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,766 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,766 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,767 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,767 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,767 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,767 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,767 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,767 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,767 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,767 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,767 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,767 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,767 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,767 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,767 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,767 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,767 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,767 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,767 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,768 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,768 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,768 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,768 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,768 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,768 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,768 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,768 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,768 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,768 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,768 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,768 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,768 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,768 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,768 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,769 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,769 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,769 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,769 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,769 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,769 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,769 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,769 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,769 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,769 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,769 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,769 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,769 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,769 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,769 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,770 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,770 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,770 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,770 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,770 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,770 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,770 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,770 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,770 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,770 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,770 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,770 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,770 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,770 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,770 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,771 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,771 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,771 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,771 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,771 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,771 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,771 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,771 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,771 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,771 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,771 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,771 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,771 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,771 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,771 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,771 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,771 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,772 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,772 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,772 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,772 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,772 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,772 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,772 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,772 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,772 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,772 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,772 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,772 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,772 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,772 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,772 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,772 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,772 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,772 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,773 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,773 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,773 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,773 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,773 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,773 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,773 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,773 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,773 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,773 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,773 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,773 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,773 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,773 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,773 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,773 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,773 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,774 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,774 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,774 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,774 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,774 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,774 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,774 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,774 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,774 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,774 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,774 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,774 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,774 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,774 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,774 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,774 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,775 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,775 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,775 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,775 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,775 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,775 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,775 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,775 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,775 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,775 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,775 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,775 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,775 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,775 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,775 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,775 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,776 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,776 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,776 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,776 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,776 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,776 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,776 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,776 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,776 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,776 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,776 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,776 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,776 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,776 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,776 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,776 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,776 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,777 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,777 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,777 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,777 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,777 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,777 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,777 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,777 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,777 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,777 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,777 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,777 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,777 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,777 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,777 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,777 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,777 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,777 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,778 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,778 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,778 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,778 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,778 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,778 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,778 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,778 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,778 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,778 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,778 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,778 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,778 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,778 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,778 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,778 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,779 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,779 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,779 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,779 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,779 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,779 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,779 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,779 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,779 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,779 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,779 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,779 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,779 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,779 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,779 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,780 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,780 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,780 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,780 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,780 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,780 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,780 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,780 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,780 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,780 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,780 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,780 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,780 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,780 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,780 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,780 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,781 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,781 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,781 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,781 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,781 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,781 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,781 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,781 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,781 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,781 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,781 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,781 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,781 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,781 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,781 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,781 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,781 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,782 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,782 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,782 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,782 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,782 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,782 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,782 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,782 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,782 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,782 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,782 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,782 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,782 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,782 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,782 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,783 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,783 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,783 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,783 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,783 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,783 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,783 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,783 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,783 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,783 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,783 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,783 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,783 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,783 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,783 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,783 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,783 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,784 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,784 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,784 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,784 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,784 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,784 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,784 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,784 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,784 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,784 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,784 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,784 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,784 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,784 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,784 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,784 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,785 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,785 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,785 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,785 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,785 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,785 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,785 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,785 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,785 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,785 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,785 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,785 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,785 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,785 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,785 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,785 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,786 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,786 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,786 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,786 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,786 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,786 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,786 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,786 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,786 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,786 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,786 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,786 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,786 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,786 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,786 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,787 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,787 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,787 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,787 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,787 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,787 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,787 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,787 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,787 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,787 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,787 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,787 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,787 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,787 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,787 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,787 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,787 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,787 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,788 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,788 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,788 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,788 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,788 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,788 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,788 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,788 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,788 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,788 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,788 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,788 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,788 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,788 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,788 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,788 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,788 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,789 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,789 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,789 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,789 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,789 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,789 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,789 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,789 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,789 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,789 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,789 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,789 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,789 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,789 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,790 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,790 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,790 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,790 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,790 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,790 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,790 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,790 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,790 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,790 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,790 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,790 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,790 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,790 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,790 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,791 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,791 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,791 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,791 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,791 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,791 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,791 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,791 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,791 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,791 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,791 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,791 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,791 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,791 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,791 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,791 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,792 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,792 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,792 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,792 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,792 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,792 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,792 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,792 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,792 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,792 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,792 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,792 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,792 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,792 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,792 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,792 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,792 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,793 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,793 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,793 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,793 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,793 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,793 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,793 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,793 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,793 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,793 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,793 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,793 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,793 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,793 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,793 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,793 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,794 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,794 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,794 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,794 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,794 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,794 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,794 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,794 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,794 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,794 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,794 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,794 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,794 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,794 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,794 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,794 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,795 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,795 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,795 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,795 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,795 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,795 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,795 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,795 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,795 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,795 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,795 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,795 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,795 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,795 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,795 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,795 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,796 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,796 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,796 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,796 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,796 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,796 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,796 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,796 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,796 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,796 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,796 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,796 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,796 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,796 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,796 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,796 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,796 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,797 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,797 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,797 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,797 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,797 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,797 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,797 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,797 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,797 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,797 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,797 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,797 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,797 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,797 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,797 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,797 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,797 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,798 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,798 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,798 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,798 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,798 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,798 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,798 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,798 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,798 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,798 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,798 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,798 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,798 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,798 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,798 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,798 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,799 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,799 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,799 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,799 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,799 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,799 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,799 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,799 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,799 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,799 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,799 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,799 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,799 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,799 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,799 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,799 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,800 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,800 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,800 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,800 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,800 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,800 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,800 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,800 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,800 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,800 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,800 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,800 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,800 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,800 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,800 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,800 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,801 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,801 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,801 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,801 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,801 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,801 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,801 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,801 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,801 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,801 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,801 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,801 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,801 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,801 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,801 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,801 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,802 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,802 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,802 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,802 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,802 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,802 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,802 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,802 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,802 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,802 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,802 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,802 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,802 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,802 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,802 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,803 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,803 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,803 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,803 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,803 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,803 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,803 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,803 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,803 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,803 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,803 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,803 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,803 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,803 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,803 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,803 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,804 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,804 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,804 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,804 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,804 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,804 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,804 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,804 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,804 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,804 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,804 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,804 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,804 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,804 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,804 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,804 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,804 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,805 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,805 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,805 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,805 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,805 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,805 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,805 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,805 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,805 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,805 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,805 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,805 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,805 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,805 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,805 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,805 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,805 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,806 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,806 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,806 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,806 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,806 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,806 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,806 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,806 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,806 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,806 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,806 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,806 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,806 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,806 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,806 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,807 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,807 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,807 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,807 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,807 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,807 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,807 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,807 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,807 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,807 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,807 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,807 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,807 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,807 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,807 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,807 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,807 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,808 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,808 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,808 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,808 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,808 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,808 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,808 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,808 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,808 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,808 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,808 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,808 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,808 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,808 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,808 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,808 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,809 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,809 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,809 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,809 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,809 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,809 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,809 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,809 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,809 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,809 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,809 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,809 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,809 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,809 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,809 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,809 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,809 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,810 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,810 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,810 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,810 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,810 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,810 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,810 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,810 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,810 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,810 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,810 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,810 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,810 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,810 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,810 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,811 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,811 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,811 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,811 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,811 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,811 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,811 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,811 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,811 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,811 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,811 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,811 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,811 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,811 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,811 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,812 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,812 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,812 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,812 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,812 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,812 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,812 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,812 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,812 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,812 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,812 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,812 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,812 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,812 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,812 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,812 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,812 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,813 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,813 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,813 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,813 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,813 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,813 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,813 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,813 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,813 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,813 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,813 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,813 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,813 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,813 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,813 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,813 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,813 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,814 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,814 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,814 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,814 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,814 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,814 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,814 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,814 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,814 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,814 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,814 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,814 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,814 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,814 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,814 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,815 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,815 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,815 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,815 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,815 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,815 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,815 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,815 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,815 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,815 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,815 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,815 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,815 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,815 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,815 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,815 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,815 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,816 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,816 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,816 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,816 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,816 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,816 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,816 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,816 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,816 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,816 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,816 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,816 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,816 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,816 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,816 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,816 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,817 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,817 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,817 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,817 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,817 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,817 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,817 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,817 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,817 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,817 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,817 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,817 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,817 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,817 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,817 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,817 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,817 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,818 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,818 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,818 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,818 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,818 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,818 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,818 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,818 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,818 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,818 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,818 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,818 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,818 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,818 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,818 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,819 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,819 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,819 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,819 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,819 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,819 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,819 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,819 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,819 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,819 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,819 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,819 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,819 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,819 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,819 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,819 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,820 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,820 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,820 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,820 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,820 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,820 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,820 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,820 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,820 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,820 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,820 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,820 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,820 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,820 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,820 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 00:47:56,820 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:47:56,914 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:47:57,002 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:47:57,659 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:47:57,660 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:47:59,660 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:48:00,378 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:48:00,453 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:48:00,559 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:48:00,660 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:48:01,661 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:48:03,662 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:48:04,179 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:48:04,240 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:48:04,329 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:48:04,662 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:48:05,662 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:48:07,663 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:48:07,906 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:48:07,907 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:48:08,000 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:48:08,001 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:48:08,091 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:48:08,664 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:48:09,664 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:48:11,643 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:48:11,697 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:48:11,697 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:48:11,785 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:48:12,697 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:48:13,698 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:48:15,301 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:48:15,356 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:48:15,446 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:48:15,698 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:48:15,699 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:48:17,699 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:48:18,921 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:48:18,972 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:48:19,063 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:48:19,700 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:48:19,700 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:48:21,701 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:48:22,360 DEBUG SenderThread:267739 [sender.py:send():235] send: stats +2022-03-03 00:48:22,564 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:48:22,617 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:48:22,708 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:48:23,266 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:48:23,564 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:48:23,703 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/config.yaml +2022-03-03 00:48:23,703 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:48:23,703 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:48:25,703 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:48:26,136 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:48:26,194 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:48:26,276 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:48:26,704 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:48:27,704 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:48:29,700 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:48:29,754 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:48:29,754 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:48:29,861 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:48:30,747 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:48:31,747 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:48:33,343 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:48:33,395 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:48:33,483 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:48:33,748 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:48:33,748 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:48:34,748 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:48:35,749 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:48:36,863 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:48:36,916 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:48:37,009 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:48:37,750 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:48:37,750 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:48:38,707 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:48:38,708 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:48:38,750 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:48:39,750 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:48:40,346 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:48:40,399 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:48:40,487 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:48:40,751 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:48:41,751 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:48:42,752 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:48:43,752 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:48:43,874 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:48:43,925 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:48:44,012 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:48:44,752 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:48:45,753 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:48:46,753 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:48:47,380 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:48:47,433 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:48:47,522 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:48:47,754 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:48:48,754 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:48:50,755 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:48:50,840 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:48:50,893 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:48:50,979 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:48:51,755 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:48:52,755 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:48:53,044 DEBUG SenderThread:267739 [sender.py:send():235] send: stats +2022-03-03 00:48:54,140 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:48:54,140 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:48:54,278 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:48:54,331 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:48:54,418 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:48:54,756 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:48:54,756 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:48:56,757 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:48:57,729 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:48:57,783 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:48:57,875 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:48:58,799 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:48:58,799 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:49:00,800 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:49:01,089 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:49:01,143 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:49:01,229 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:49:01,800 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:49:02,801 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:49:04,483 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:49:04,536 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:49:04,629 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:49:04,802 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:49:04,802 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:49:06,802 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:49:07,902 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:49:07,955 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:49:08,039 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:49:08,803 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:49:08,803 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:49:09,196 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:49:09,197 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:49:09,804 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:49:10,804 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:49:11,240 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:49:11,292 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:49:11,373 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:49:11,804 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:49:12,805 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:49:13,805 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:49:14,585 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:49:14,638 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:49:14,726 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:49:14,806 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:49:14,806 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:49:16,806 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:49:17,807 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:49:17,934 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:49:17,988 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:49:18,075 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:49:18,807 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:49:19,808 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:49:21,196 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:49:21,250 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:49:21,334 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:49:21,808 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:49:21,809 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:49:23,602 DEBUG SenderThread:267739 [sender.py:send():235] send: stats +2022-03-03 00:49:23,809 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:49:24,245 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:49:24,246 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:49:24,484 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:49:24,539 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:49:24,628 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:49:24,809 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:49:25,810 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:49:27,756 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:49:27,833 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:49:27,839 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:49:27,921 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:49:28,834 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:49:29,835 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:49:30,967 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:49:31,020 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:49:31,160 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:49:31,835 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:49:31,836 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:49:33,836 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:49:34,196 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:49:34,249 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:49:34,338 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:49:34,837 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:49:35,837 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:49:37,346 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:49:37,429 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:49:37,516 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:49:37,838 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:49:37,838 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:49:39,451 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:49:39,453 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:49:39,839 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:49:40,484 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:49:40,537 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:49:40,625 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:49:40,839 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:49:41,839 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:49:43,595 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:49:43,648 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:49:43,740 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:49:43,840 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:49:43,840 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:49:45,841 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:49:46,620 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:49:46,679 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:49:46,775 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:49:46,841 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:49:47,842 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:49:49,707 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:49:49,750 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:49:49,834 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:49:49,842 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:49:49,842 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:49:51,843 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:49:52,646 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:49:52,700 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:49:52,788 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:49:52,843 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:49:53,844 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:49:54,238 DEBUG SenderThread:267739 [sender.py:send():235] send: stats +2022-03-03 00:49:54,579 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:49:54,580 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:49:55,492 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:49:55,544 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:49:55,636 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:49:55,844 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:49:55,845 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:49:57,845 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:49:58,330 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:49:58,383 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:49:58,494 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:49:58,846 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:49:59,846 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:50:00,846 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:50:01,154 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:50:01,208 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:50:01,297 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:50:01,847 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:50:01,847 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:50:02,847 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:50:03,965 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:50:04,027 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:50:04,116 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:50:04,848 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:50:04,848 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:50:05,849 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:50:06,575 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:50:06,629 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:50:06,722 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:50:06,849 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:50:06,849 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:50:07,849 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:50:08,850 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:50:09,141 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:50:09,197 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:50:09,285 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:50:09,674 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:50:09,676 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:50:09,850 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:50:09,851 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:50:10,850 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:50:11,537 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:50:11,587 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:50:11,673 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:50:11,851 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:50:11,851 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:50:12,851 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:50:13,841 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:50:13,922 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:50:14,019 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:50:14,899 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:50:14,899 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:50:15,899 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:50:15,996 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:50:16,050 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:50:16,185 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:50:16,900 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:50:16,900 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:50:17,900 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:50:17,930 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:50:17,982 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:50:18,065 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:50:18,900 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:50:18,901 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:50:19,664 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:50:19,719 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:50:19,801 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:50:19,901 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:50:20,901 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:50:21,271 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:50:21,325 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:50:21,403 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:50:21,902 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:50:22,717 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:50:22,771 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:50:22,882 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:50:22,902 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:50:22,902 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:50:23,998 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:50:24,050 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:50:24,131 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:50:24,731 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:50:24,732 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:50:24,903 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:50:24,903 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:50:24,968 DEBUG SenderThread:267739 [sender.py:send():235] send: stats +2022-03-03 00:50:25,193 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:50:25,246 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:50:25,329 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:50:25,903 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:50:26,904 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:50:26,977 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:50:27,154 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:50:27,242 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:50:27,904 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:50:28,904 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:50:30,713 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:50:30,768 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:50:30,854 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:50:30,905 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:50:30,906 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:50:32,906 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:50:34,369 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:50:34,449 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:50:34,540 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:50:34,907 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:50:34,907 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:50:36,907 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:50:37,988 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:50:38,043 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:50:38,128 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:50:38,908 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:50:38,908 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:50:39,935 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:50:39,937 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:50:40,909 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:50:41,647 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:50:41,701 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:50:41,789 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:50:41,909 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:50:42,910 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:50:44,910 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:50:45,196 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:50:45,250 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:50:45,339 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:50:45,911 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:50:46,911 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:50:48,758 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:50:48,802 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:50:48,888 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:50:48,912 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:50:49,912 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:50:50,912 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:50:51,913 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:50:52,301 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:50:52,356 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:50:52,443 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:50:52,913 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:50:53,914 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:50:54,914 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:50:55,269 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:50:55,270 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:50:55,706 DEBUG SenderThread:267739 [sender.py:send():235] send: stats +2022-03-03 00:50:55,860 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:50:55,940 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:50:55,951 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:50:56,031 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:50:56,941 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:50:56,941 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:50:57,941 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:50:59,413 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:50:59,466 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:50:59,552 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:50:59,942 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:50:59,942 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:51:00,942 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:51:01,943 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:51:02,951 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:51:03,004 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:51:03,092 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:51:03,943 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:51:03,944 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:51:04,944 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:51:05,944 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:51:06,451 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:51:06,505 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:51:06,597 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:51:06,945 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:51:07,945 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:51:08,945 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:51:09,885 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:51:09,940 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:51:10,019 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:51:10,026 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:51:10,355 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:51:10,357 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:51:11,019 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:51:11,020 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:51:12,020 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:51:13,250 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:51:13,324 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:51:13,414 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:51:14,021 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:51:14,021 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:51:15,021 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:51:16,021 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:51:16,722 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:51:16,797 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:51:16,886 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:51:17,022 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:51:17,022 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:51:18,022 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:51:20,023 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:51:20,095 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:51:20,146 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:51:20,252 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:51:21,023 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:51:22,024 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:51:23,472 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:51:23,530 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:51:23,618 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:51:24,024 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:51:24,025 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:51:25,627 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:51:25,628 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:51:26,025 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:51:26,379 DEBUG SenderThread:267739 [sender.py:send():235] send: stats +2022-03-03 00:51:26,890 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:51:26,944 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:51:27,036 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:51:28,029 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:51:28,030 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:51:30,030 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:51:30,207 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:51:30,260 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:51:30,355 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:51:31,031 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:51:32,031 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:51:33,652 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:51:33,704 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:51:33,797 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:51:34,032 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:51:34,032 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:51:36,033 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:51:36,950 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:51:37,003 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:51:37,093 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:51:38,087 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:51:38,087 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:51:40,088 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:51:40,253 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:51:40,322 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:51:40,417 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:51:40,906 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:51:40,907 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:51:41,088 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:51:42,088 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:51:43,626 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:51:43,681 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:51:43,772 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:51:44,089 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:51:45,090 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:51:46,090 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:51:46,844 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:51:46,896 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:51:46,981 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:51:47,090 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:51:47,091 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:51:48,091 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:51:49,091 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:51:50,085 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:51:50,138 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:51:50,223 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:51:51,139 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:51:51,139 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:51:52,139 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:51:53,140 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:51:53,322 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:51:53,374 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:51:53,459 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:51:54,140 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:51:54,140 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:51:55,140 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:51:56,015 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:51:56,016 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:51:56,499 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:51:56,554 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:51:56,645 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:51:56,879 DEBUG SenderThread:267739 [sender.py:send():235] send: stats +2022-03-03 00:51:57,141 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:51:57,141 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:51:59,142 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:51:59,680 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:51:59,735 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:51:59,822 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:52:00,142 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:52:01,143 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:52:02,837 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:52:02,891 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:52:02,986 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:52:03,144 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:52:03,144 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:52:05,144 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:52:05,975 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:52:06,030 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:52:06,118 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:52:06,145 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:52:07,145 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:52:09,044 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:52:09,096 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:52:09,180 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:52:09,187 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:52:10,180 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:52:11,119 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:52:11,120 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:52:11,181 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:52:12,115 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:52:12,166 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:52:12,281 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:52:13,273 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:52:13,273 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:52:15,217 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:52:15,260 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:52:15,341 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:52:15,343 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:52:16,341 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:52:17,342 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:52:18,374 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:52:18,428 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:52:18,532 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:52:19,343 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:52:19,343 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:52:21,190 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:52:21,243 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:52:21,326 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:52:21,343 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:52:21,343 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:52:23,344 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:52:24,088 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:52:24,142 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:52:24,233 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:52:24,344 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:52:25,345 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:52:26,193 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:52:26,195 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:52:26,985 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:52:27,039 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:52:27,177 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:52:27,345 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:52:27,346 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:52:27,636 DEBUG SenderThread:267739 [sender.py:send():235] send: stats +2022-03-03 00:52:29,346 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:52:29,797 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:52:29,866 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:52:29,958 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:52:30,347 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:52:31,347 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:52:32,558 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:52:32,614 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:52:32,703 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:52:33,348 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:52:33,348 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:52:35,215 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:52:35,267 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:52:35,352 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:52:35,352 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:52:35,358 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:52:37,353 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:52:37,780 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:52:37,834 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:52:37,928 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:52:38,353 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:52:39,353 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:52:40,246 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:52:40,326 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:52:40,420 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:52:41,277 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:52:41,278 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:52:41,412 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:52:41,412 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:52:42,582 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:52:42,636 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:52:42,723 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:52:43,412 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:52:43,413 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:52:44,814 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:52:44,868 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:52:44,961 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:52:45,413 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:52:45,413 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:52:46,834 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:52:46,897 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:52:46,981 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:52:47,414 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:52:47,414 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:52:48,728 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:52:48,803 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:52:48,889 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:52:49,415 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:52:49,415 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:52:50,514 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:52:50,567 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:52:50,651 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:52:51,415 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:52:51,416 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:52:52,022 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:52:52,074 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:52:52,190 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:52:52,416 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:52:53,416 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:52:53,446 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:52:53,501 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:52:53,585 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:52:54,416 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:52:54,674 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:52:54,753 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:52:54,843 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:52:55,417 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:52:55,417 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:52:56,344 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:52:56,346 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:52:56,494 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:52:56,579 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:52:56,657 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:52:57,471 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:52:57,472 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:52:58,322 DEBUG SenderThread:267739 [sender.py:send():235] send: stats +2022-03-03 00:52:59,472 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:53:00,128 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:53:00,210 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:53:00,298 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:53:00,472 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:53:01,473 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:53:03,473 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:53:03,838 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:53:03,888 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:53:03,973 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:53:04,474 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:53:05,474 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:53:07,475 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:53:07,543 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:53:07,591 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:53:07,676 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:53:08,475 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:53:09,476 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:53:11,278 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:53:11,329 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:53:11,414 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:53:11,476 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:53:11,477 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:53:11,719 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:53:11,720 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:53:13,477 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:53:14,477 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:53:14,953 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:53:15,001 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:53:15,078 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:53:15,478 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:53:15,478 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:53:16,478 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:53:18,479 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:53:18,609 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:53:18,682 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:53:18,764 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:53:19,479 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:53:20,480 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:53:22,191 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:53:22,240 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:53:22,322 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:53:22,480 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:53:22,481 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:53:24,481 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:53:25,757 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:53:25,807 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:53:25,888 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:53:26,482 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:53:26,483 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:53:26,898 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:53:26,900 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:53:28,483 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:53:28,965 DEBUG SenderThread:267739 [sender.py:send():235] send: stats +2022-03-03 00:53:29,297 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:53:29,345 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:53:29,429 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:53:29,483 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:53:30,484 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:53:32,485 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:53:32,849 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:53:32,896 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:53:32,978 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:53:33,485 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:53:34,485 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:53:36,403 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:53:36,455 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:53:36,531 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:53:36,536 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:53:37,531 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:53:38,531 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:53:39,907 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:53:39,978 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:53:40,060 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:53:40,532 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:53:40,532 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:53:41,948 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:53:41,949 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:53:42,533 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:53:43,387 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:53:43,438 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:53:43,522 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:53:43,533 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:53:44,534 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:53:46,535 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:53:46,837 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:53:46,888 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:53:46,978 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:53:47,535 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:53:48,535 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:53:50,279 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:53:50,331 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:53:50,416 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:53:50,536 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:53:50,536 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:53:52,537 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:53:53,743 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:53:53,791 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:53:53,876 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:53:54,537 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:53:54,538 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:53:55,538 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:53:57,120 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:53:57,170 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:53:57,251 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:53:57,438 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:53:57,440 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:53:57,539 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:53:57,539 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:53:58,539 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:53:59,509 DEBUG SenderThread:267739 [sender.py:send():235] send: stats +2022-03-03 00:53:59,540 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:54:00,507 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:54:00,559 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:54:00,642 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:54:01,559 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:54:01,560 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:54:02,560 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:54:03,560 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:54:03,849 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:54:03,899 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:54:03,982 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:54:04,561 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:54:05,561 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:54:07,199 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:54:07,253 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:54:07,336 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:54:07,562 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:54:07,562 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:54:09,563 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:54:10,570 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:54:10,618 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:54:10,696 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:54:11,563 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:54:11,564 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:54:13,000 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:54:13,002 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:54:13,564 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:54:13,941 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:54:13,991 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:54:14,070 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:54:14,565 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:54:15,565 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:54:17,255 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:54:17,305 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:54:17,402 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:54:17,566 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:54:17,566 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:54:19,566 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:54:20,532 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:54:20,582 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:54:20,663 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:54:21,603 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:54:21,604 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:54:23,604 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:54:23,800 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:54:23,852 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:54:23,931 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:54:24,605 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:54:25,605 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:54:26,971 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:54:27,023 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:54:27,105 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:54:27,606 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:54:27,606 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:54:28,063 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:54:28,065 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:54:29,606 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:54:30,063 DEBUG SenderThread:267739 [sender.py:send():235] send: stats +2022-03-03 00:54:30,162 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:54:30,213 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:54:30,343 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:54:30,607 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:54:31,607 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:54:33,320 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:54:33,357 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:54:33,440 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:54:33,608 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:54:33,608 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:54:35,609 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:54:36,405 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:54:36,453 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:54:36,534 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:54:36,609 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:54:37,609 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:54:39,476 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:54:39,528 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:54:39,609 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:54:39,610 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:54:39,610 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:54:41,611 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:54:42,619 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:54:42,671 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:54:42,760 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:54:43,139 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:54:43,141 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:54:43,612 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:54:43,612 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:54:45,612 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:54:45,715 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:54:45,765 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:54:45,854 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:54:46,613 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:54:47,613 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:54:48,613 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:54:48,796 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:54:48,844 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:54:48,938 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:54:49,614 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:54:49,614 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:54:50,614 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:54:51,736 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:54:51,788 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:54:51,874 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:54:52,615 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:54:52,615 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:54:54,616 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:54:54,701 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:54:54,753 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:54:54,844 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:54:55,616 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:54:56,617 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:54:57,587 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:54:57,639 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:54:57,723 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:54:58,257 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:54:58,259 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:54:58,640 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:54:58,640 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:55:00,500 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:55:00,575 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:55:00,657 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:55:00,663 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:55:00,700 DEBUG SenderThread:267739 [sender.py:send():235] send: stats +2022-03-03 00:55:01,657 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:55:02,657 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:55:03,368 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:55:03,420 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:55:03,507 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:55:03,658 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:55:04,658 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:55:06,074 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:55:06,123 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:55:06,226 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:55:06,659 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:55:06,659 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:55:08,660 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:55:08,678 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:55:08,729 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:55:08,816 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:55:09,660 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:55:10,661 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:55:11,149 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:55:11,225 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:55:11,310 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:55:11,661 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:55:12,661 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:55:13,410 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:55:13,411 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:55:13,498 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:55:13,551 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:55:13,634 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:55:13,662 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:55:14,662 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:55:15,683 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:55:15,736 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:55:15,825 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:55:16,663 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:55:16,663 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:55:17,755 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:55:17,805 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:55:17,886 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:55:18,664 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:55:18,664 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:55:19,722 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:55:19,787 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:55:19,869 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:55:20,665 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:55:20,665 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:55:21,533 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:55:21,585 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:55:21,670 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:55:21,677 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:55:22,671 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:55:23,118 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:55:23,170 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:55:23,253 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:55:23,671 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:55:24,531 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:55:24,582 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:55:24,665 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:55:24,671 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:55:24,672 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:55:25,757 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:55:25,807 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:55:25,892 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:55:26,672 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:55:26,673 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:55:27,552 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:55:27,698 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:55:27,786 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:55:28,469 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:55:28,470 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:55:28,704 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:55:28,704 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:55:30,705 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:55:31,266 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:55:31,318 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:55:31,404 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:55:31,453 DEBUG SenderThread:267739 [sender.py:send():235] send: stats +2022-03-03 00:55:31,705 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:55:32,706 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:55:34,706 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:55:34,998 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:55:35,051 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:55:35,132 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:55:35,707 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:55:36,707 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:55:38,697 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:55:38,749 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:55:38,754 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:55:38,833 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:55:39,749 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:55:40,750 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:55:42,320 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:55:42,369 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:55:42,455 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:55:42,751 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:55:43,515 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:55:43,516 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:55:43,751 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:55:44,752 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:55:45,752 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:55:45,860 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:55:45,914 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:55:46,003 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:55:46,752 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:55:46,753 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:55:47,753 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:55:49,401 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:55:49,453 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:55:49,536 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:55:49,754 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:55:49,754 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:55:50,754 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:55:51,755 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:55:52,941 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:55:53,014 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:55:53,097 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:55:53,756 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:55:53,756 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:55:54,756 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:55:55,756 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:55:56,511 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:55:56,563 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:55:56,651 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:55:56,757 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:55:57,757 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:55:58,758 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:55:58,796 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:55:58,798 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:55:59,758 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:56:00,006 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:56:00,057 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:56:00,139 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:56:00,758 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:56:00,759 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:56:01,759 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:56:01,982 DEBUG SenderThread:267739 [sender.py:send():235] send: stats +2022-03-03 00:56:03,495 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:56:03,545 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:56:03,632 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:56:03,759 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:56:03,760 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:56:04,760 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:56:05,760 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:56:06,912 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:56:06,964 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:56:07,047 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:56:07,761 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:56:07,761 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:56:09,762 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:56:10,358 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:56:10,409 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:56:10,493 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:56:10,762 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:56:11,763 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:56:13,763 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:56:13,831 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:56:13,881 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:56:13,971 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:56:14,008 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:56:14,010 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:56:14,764 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:56:15,764 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:56:17,261 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:56:17,312 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:56:17,393 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:56:17,765 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:56:17,765 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:56:19,766 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:56:20,671 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:56:20,733 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:56:20,815 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:56:20,821 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:56:21,815 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:56:22,816 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:56:23,816 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:56:24,103 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:56:24,154 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:56:24,242 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:56:24,816 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:56:24,817 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:56:25,817 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:56:26,817 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:56:27,478 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:56:27,531 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:56:27,619 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:56:27,818 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:56:28,818 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:56:29,377 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:56:29,379 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:56:29,818 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:56:30,819 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:56:30,897 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:56:30,950 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:56:31,031 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:56:31,819 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:56:31,820 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:56:32,524 DEBUG SenderThread:267739 [sender.py:send():235] send: stats +2022-03-03 00:56:32,820 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:56:34,260 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:56:34,312 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:56:34,451 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:56:34,820 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:56:34,821 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:56:35,821 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:56:36,821 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:56:37,561 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:56:37,613 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:56:37,699 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:56:37,822 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:56:38,822 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:56:39,823 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:56:40,823 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:56:40,853 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:56:40,904 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:56:41,012 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:56:41,823 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:56:42,824 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:56:43,824 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:56:44,186 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:56:44,237 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:56:44,317 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:56:44,737 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:56:44,738 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:56:44,824 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:56:44,825 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:56:45,825 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:56:46,825 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:56:47,525 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:56:47,577 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:56:47,666 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:56:47,826 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:56:48,826 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:56:49,826 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:56:50,827 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:56:50,871 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:56:50,931 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:56:51,018 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:56:51,827 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:56:51,827 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:56:52,828 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:56:54,097 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:56:54,150 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:56:54,234 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:56:54,828 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:56:54,829 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:56:55,829 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:56:56,829 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:56:57,384 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:56:57,437 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:56:57,521 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:56:57,830 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:56:57,830 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:56:58,830 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:56:59,833 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:56:59,834 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:57:00,561 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:57:00,616 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:57:00,706 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:57:00,831 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:57:01,832 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:57:03,064 DEBUG SenderThread:267739 [sender.py:send():235] send: stats +2022-03-03 00:57:03,740 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:57:03,793 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:57:03,874 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:57:03,879 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:57:04,874 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:57:05,874 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:57:06,865 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:57:06,917 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:57:06,996 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:57:07,902 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:57:07,902 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:57:09,902 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:57:09,957 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:57:10,009 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:57:10,088 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:57:10,903 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:57:11,903 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:57:13,077 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:57:13,129 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:57:13,218 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:57:13,904 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:57:13,904 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:57:15,077 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:57:15,078 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:57:15,905 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:57:16,154 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:57:16,204 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:57:16,292 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:57:16,905 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:57:17,905 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:57:19,153 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:57:19,205 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:57:19,294 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:57:19,906 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:57:19,906 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:57:20,906 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:57:21,907 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:57:21,978 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:57:22,028 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:57:22,113 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:57:22,907 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:57:23,908 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:57:24,794 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:57:24,848 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:57:24,931 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:57:24,937 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:57:25,931 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:57:25,931 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:57:26,931 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:57:27,654 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:57:27,706 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:57:27,809 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:57:27,932 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:57:27,932 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:57:28,932 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:57:29,932 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:57:30,162 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:57:30,164 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:57:30,356 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:57:30,409 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:57:30,497 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:57:30,933 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:57:31,933 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:57:32,934 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:57:33,026 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:57:33,075 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:57:33,155 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:57:33,615 DEBUG SenderThread:267739 [sender.py:send():235] send: stats +2022-03-03 00:57:33,934 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:57:33,935 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:57:34,935 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:57:35,608 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:57:35,660 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:57:35,751 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:57:35,935 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:57:35,935 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:57:37,936 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:57:38,087 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:57:38,138 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:57:38,221 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:57:38,936 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:57:38,936 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:57:39,936 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:57:40,334 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:57:40,385 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:57:40,464 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:57:40,937 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:57:40,937 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:57:41,937 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:57:42,427 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:57:42,478 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:57:42,559 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:57:42,937 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:57:42,938 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:57:43,938 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:57:44,353 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:57:44,405 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:57:44,488 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:57:44,938 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:57:44,938 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:57:45,233 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:57:45,234 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:57:45,939 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:57:46,134 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:57:46,186 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:57:46,264 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:57:46,939 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:57:46,939 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:57:47,783 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:57:47,841 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:57:47,928 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:57:47,939 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:57:47,940 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:57:48,940 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:57:49,285 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:57:49,333 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:57:49,414 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:57:49,940 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:57:49,941 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:57:50,675 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:57:50,726 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:57:50,809 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:57:50,941 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:57:50,941 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:57:51,920 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:57:51,969 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:57:51,985 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:57:52,052 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:57:52,975 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:57:52,975 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:57:53,118 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:57:53,170 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:57:53,248 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:57:53,976 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:57:53,976 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:57:54,868 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:57:55,024 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:57:55,042 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:57:55,127 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:57:56,014 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:57:56,014 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:57:57,014 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:57:58,015 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:57:58,658 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:57:58,710 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:57:58,789 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:57:59,015 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:58:00,016 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:58:00,345 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:58:00,346 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:58:01,016 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:58:02,016 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:58:02,423 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:58:02,475 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:58:02,562 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:58:03,017 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:58:04,017 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:58:04,297 DEBUG SenderThread:267739 [sender.py:send():235] send: stats +2022-03-03 00:58:05,018 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:58:06,018 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:58:06,126 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:58:06,178 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:58:06,268 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:58:07,018 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:58:08,019 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:58:09,019 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:58:09,858 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:58:09,912 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:58:10,002 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:58:10,019 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:58:10,020 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:58:11,020 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:58:13,020 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:58:13,542 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:58:13,593 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:58:13,680 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:58:14,021 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:58:15,021 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:58:15,542 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:58:15,544 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:58:17,022 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:58:17,107 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:58:17,160 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:58:17,247 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:58:18,023 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:58:19,023 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:58:20,707 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:58:20,762 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:58:20,871 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:58:21,024 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:58:21,024 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:58:23,024 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:58:24,263 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:58:24,314 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:58:24,395 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:58:25,025 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:58:25,025 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:58:27,026 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:58:27,821 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:58:27,873 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:58:27,955 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:58:28,026 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:58:29,027 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:58:30,720 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:58:30,721 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:58:31,028 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:58:31,399 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:58:31,453 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:58:31,537 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:58:32,028 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:58:33,029 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:58:34,810 DEBUG SenderThread:267739 [sender.py:send():235] send: stats +2022-03-03 00:58:34,904 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:58:34,954 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:58:35,037 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:58:35,043 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:58:36,037 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:58:37,038 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:58:38,330 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:58:38,380 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:58:38,513 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:58:39,038 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:58:39,039 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:58:40,039 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:58:41,039 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:58:41,810 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:58:41,864 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:58:41,947 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:58:42,040 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:58:43,040 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:58:44,040 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:58:45,041 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:58:45,293 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:58:45,344 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:58:45,437 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:58:45,971 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:58:45,972 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:58:46,041 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:58:47,042 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:58:48,042 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:58:48,692 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:58:48,745 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:58:48,834 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:58:49,042 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:58:49,043 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:58:50,043 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:58:51,043 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:58:52,081 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:58:52,134 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:58:52,218 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:58:53,044 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:58:53,044 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:58:54,044 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:58:55,044 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:58:55,486 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:58:55,539 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:58:55,626 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:58:56,045 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:58:57,045 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:58:58,046 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:58:59,014 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:58:59,068 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:58:59,157 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:59:00,090 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:59:00,090 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:59:01,132 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:59:01,133 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:59:02,091 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:59:02,200 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:59:02,254 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:59:02,345 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:59:03,091 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:59:04,091 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:59:05,404 DEBUG SenderThread:267739 [sender.py:send():235] send: stats +2022-03-03 00:59:05,606 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:59:05,658 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:59:05,747 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:59:06,092 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:59:06,092 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:59:08,093 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:59:08,975 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:59:09,028 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:59:09,112 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:59:09,113 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:59:10,112 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:59:12,113 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:59:12,258 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:59:12,311 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:59:12,394 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:59:13,113 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:59:14,114 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:59:15,556 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:59:15,609 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:59:15,697 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:59:16,115 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:59:16,115 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:59:16,246 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:59:16,248 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:59:18,115 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:59:18,806 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:59:18,859 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:59:18,940 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:59:19,116 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:59:20,116 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:59:22,006 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:59:22,058 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:59:22,139 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:59:22,146 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:59:23,139 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:59:24,139 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:59:25,236 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:59:25,289 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:59:25,377 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:59:26,140 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:59:26,140 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:59:28,141 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:59:28,496 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:59:28,548 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:59:28,640 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:59:29,141 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:59:30,141 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:59:31,319 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:59:31,320 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:59:31,673 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:59:31,725 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:59:31,809 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:59:32,142 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:59:32,142 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:59:34,143 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:59:34,815 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:59:34,892 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:59:34,981 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:59:35,143 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:59:36,114 DEBUG SenderThread:267739 [sender.py:send():235] send: stats +2022-03-03 00:59:36,144 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:59:37,950 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:59:38,003 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:59:38,093 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:59:38,144 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:59:38,145 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:59:40,145 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:59:41,094 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:59:41,145 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:59:41,231 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:59:42,228 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:59:42,229 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:59:44,212 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:59:44,267 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:59:44,272 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:59:44,355 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:59:45,267 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:59:46,268 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:59:46,383 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 00:59:46,385 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 00:59:47,262 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:59:47,315 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:59:47,411 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:59:48,316 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:59:48,316 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:59:50,257 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:59:50,308 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:59:50,390 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:59:50,397 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:59:51,390 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:59:52,391 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:59:53,189 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:59:53,245 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:59:53,336 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:59:53,391 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:59:54,391 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:59:56,096 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:59:56,138 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:59:56,222 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:59:56,392 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:59:56,393 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 00:59:57,393 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:59:58,943 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 00:59:58,995 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 00:59:59,080 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 00:59:59,394 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 00:59:59,394 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:00:01,395 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:00:01,455 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 01:00:01,456 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 01:00:01,637 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:00:01,692 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:00:01,774 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:00:02,395 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:00:03,395 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:00:04,305 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:00:04,360 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:00:04,464 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:00:05,457 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:00:05,457 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:00:06,823 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:00:06,906 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:00:06,998 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:00:06,999 DEBUG SenderThread:267739 [sender.py:send():235] send: stats +2022-03-03 01:00:07,457 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:00:07,458 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:00:09,236 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:00:09,292 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:00:09,375 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:00:09,458 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:00:09,458 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:00:11,459 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:00:11,588 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:00:11,643 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:00:11,735 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:00:12,459 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:00:13,460 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:00:13,716 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:00:13,769 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:00:13,885 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:00:14,460 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:00:15,460 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:00:15,733 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:00:15,786 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:00:15,875 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:00:16,461 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:00:16,503 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 01:00:16,504 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 01:00:17,461 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:00:17,678 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:00:17,732 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:00:17,823 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:00:18,462 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:00:19,426 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:00:19,478 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:00:19,479 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:00:19,562 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:00:20,479 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:00:21,009 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:00:21,062 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:00:21,148 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:00:21,479 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:00:21,480 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:00:22,478 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:00:22,531 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:00:22,611 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:00:23,526 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:00:23,526 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:00:23,779 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:00:23,834 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:00:23,916 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:00:24,526 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:00:25,571 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:00:25,572 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:00:25,759 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:00:25,853 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:00:26,563 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:00:27,564 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:00:29,329 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:00:29,385 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:00:29,479 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:00:29,564 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:00:29,565 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:00:31,565 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:00:31,776 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 01:00:31,777 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 01:00:33,085 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:00:33,140 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:00:33,231 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:00:33,566 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:00:33,566 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:00:35,566 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:00:36,567 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:00:36,792 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:00:36,849 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:00:36,995 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:00:37,567 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:00:37,568 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:00:37,637 DEBUG SenderThread:267739 [sender.py:send():235] send: stats +2022-03-03 01:00:38,568 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:00:40,529 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:00:40,589 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:00:40,599 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:00:40,687 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:00:41,590 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:00:41,590 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:00:42,590 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:00:44,272 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:00:44,338 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:00:44,438 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:00:44,591 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:00:44,591 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:00:45,591 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:00:46,592 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:00:46,889 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 01:00:46,890 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 01:00:47,941 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:00:47,998 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:00:48,096 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:00:48,593 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:00:48,593 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:00:49,593 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:00:50,593 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:00:51,569 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:00:51,629 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:00:51,730 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:00:52,630 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:00:52,630 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:00:53,630 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:00:54,631 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:00:55,183 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:00:55,269 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:00:55,368 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:00:55,631 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:00:55,633 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:00:56,632 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:00:58,632 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:00:58,781 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:00:58,841 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:00:58,939 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:00:59,633 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:00:59,633 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:01:00,633 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:01:01,948 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 01:01:01,949 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 01:01:02,310 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:01:02,367 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:01:02,469 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:01:02,634 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:01:02,634 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:01:03,635 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:01:04,635 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:01:05,807 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:01:05,865 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:01:05,962 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:01:06,636 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:01:06,636 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:01:08,236 DEBUG SenderThread:267739 [sender.py:send():235] send: stats +2022-03-03 01:01:08,637 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:01:09,334 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:01:09,391 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:01:09,485 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:01:09,637 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:01:10,637 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:01:12,638 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:01:12,798 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:01:12,858 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:01:12,957 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:01:13,639 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:01:14,639 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:01:16,254 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:01:16,336 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:01:16,462 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:01:16,640 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:01:16,640 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:01:17,041 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 01:01:17,043 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 01:01:18,641 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:01:19,771 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:01:19,830 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:01:19,926 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:01:20,642 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:01:20,642 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:01:22,643 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:01:23,186 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:01:23,272 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:01:23,370 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:01:23,643 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:01:24,644 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:01:26,644 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:01:26,671 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:01:26,732 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:01:26,822 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:01:27,645 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:01:28,645 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:01:29,645 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:01:30,122 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:01:30,182 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:01:30,274 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:01:30,646 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:01:31,646 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:01:32,183 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 01:01:32,185 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 01:01:33,504 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:01:33,586 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:01:33,678 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:01:33,680 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:01:34,679 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:01:35,679 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:01:36,924 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:01:36,995 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:01:37,105 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:01:37,680 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:01:37,680 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:01:38,920 DEBUG SenderThread:267739 [sender.py:send():235] send: stats +2022-03-03 01:01:39,681 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:01:40,255 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:01:40,314 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:01:40,410 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:01:40,681 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:01:41,682 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:01:43,633 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:01:43,691 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:01:43,782 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:01:43,792 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:01:44,723 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:01:45,723 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:01:46,946 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:01:47,004 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:01:47,101 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:01:47,562 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 01:01:47,563 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 01:01:47,724 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:01:47,725 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:01:49,725 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:01:50,169 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:01:50,227 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:01:50,326 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:01:50,726 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:01:51,726 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:01:53,386 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:01:53,444 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:01:53,544 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:01:53,727 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:01:53,727 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:01:55,728 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:01:56,645 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:01:56,714 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:01:56,821 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:01:57,819 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:01:57,820 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:01:59,820 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:01:59,910 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:01:59,968 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:02:00,073 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:02:00,820 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:02:01,821 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:02:02,669 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 01:02:02,671 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 01:02:03,113 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:02:03,172 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:02:03,268 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:02:03,822 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:02:03,822 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:02:05,822 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:02:06,308 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:02:06,364 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:02:06,492 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:02:06,823 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:02:07,823 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:02:09,529 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:02:09,590 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:02:09,681 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:02:09,750 DEBUG SenderThread:267739 [sender.py:send():235] send: stats +2022-03-03 01:02:09,824 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:02:09,824 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:02:11,825 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:02:12,616 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:02:12,675 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:02:12,775 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:02:12,825 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:02:13,826 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:02:14,826 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:02:15,691 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:02:15,751 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:02:15,841 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:02:15,841 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:02:15,847 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:02:17,784 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 01:02:17,785 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 01:02:18,748 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:02:18,807 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:02:18,911 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:02:18,919 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:02:19,911 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:02:19,911 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:02:20,911 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:02:21,749 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:02:21,807 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:02:21,905 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:02:21,912 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:02:21,912 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:02:22,912 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:02:24,670 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:02:24,752 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:02:24,847 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:02:24,913 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:02:24,913 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:02:26,913 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:02:27,577 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:02:27,644 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:02:27,736 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:02:27,914 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:02:28,914 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:02:30,419 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:02:30,499 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:02:30,597 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:02:30,915 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:02:30,916 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:02:32,869 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 01:02:32,870 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 01:02:32,916 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:02:33,230 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:02:33,313 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:02:33,410 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:02:33,916 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:02:34,917 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:02:35,984 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:02:36,070 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:02:36,165 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:02:36,918 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:02:36,918 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:02:38,673 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:02:38,731 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:02:38,828 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:02:38,918 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:02:38,919 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:02:40,365 DEBUG SenderThread:267739 [sender.py:send():235] send: stats +2022-03-03 01:02:40,919 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:02:41,280 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:02:41,339 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:02:41,430 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:02:41,919 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:02:42,920 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:02:43,723 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:02:43,782 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:02:43,879 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:02:43,920 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:02:44,921 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:02:46,010 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:02:46,068 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:02:46,170 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:02:46,921 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:02:46,922 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:02:47,922 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 01:02:47,923 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 01:02:48,188 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:02:48,281 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:02:48,373 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:02:48,922 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:02:48,923 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:02:50,172 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:02:50,230 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:02:50,320 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:02:50,923 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:02:50,923 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:02:52,018 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:02:52,099 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:02:52,194 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:02:52,924 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:02:52,924 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:02:53,688 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:02:53,748 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:02:53,900 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:02:53,924 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:02:54,925 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:02:55,178 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:02:55,266 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:02:55,412 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:02:55,925 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:02:56,459 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:02:56,517 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:02:56,616 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:02:56,926 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:02:56,926 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:02:58,342 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:02:58,545 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:02:58,636 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:02:58,926 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:02:58,927 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:03:00,927 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:03:02,223 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:03:02,278 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:03:02,378 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:03:02,928 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:03:02,928 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:03:03,011 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 01:03:03,012 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 01:03:03,928 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:03:05,929 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:03:05,968 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:03:06,026 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:03:06,115 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:03:06,930 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:03:07,930 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:03:09,694 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:03:09,752 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:03:09,854 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:03:09,931 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:03:09,931 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:03:11,088 DEBUG SenderThread:267739 [sender.py:send():235] send: stats +2022-03-03 01:03:11,932 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:03:13,412 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:03:13,486 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:03:13,582 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:03:13,933 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:03:13,933 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:03:15,933 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:03:17,122 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:03:17,181 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:03:17,276 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:03:17,934 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:03:17,935 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:03:18,081 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 01:03:18,082 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 01:03:19,935 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:03:20,783 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:03:20,841 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:03:20,936 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:03:20,943 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:03:21,937 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:03:23,938 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:03:24,382 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:03:24,440 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:03:24,538 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:03:24,938 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:03:25,938 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:03:27,939 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:03:27,998 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:03:28,054 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:03:28,151 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:03:28,940 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:03:29,940 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:03:31,618 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:03:31,674 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:03:31,770 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:03:31,941 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:03:31,941 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:03:33,137 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 01:03:33,138 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 01:03:33,942 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:03:35,217 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:03:35,306 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:03:35,402 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:03:35,942 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:03:35,943 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:03:37,943 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:03:38,825 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:03:38,881 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:03:38,979 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:03:39,973 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:03:39,974 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:03:41,750 DEBUG SenderThread:267739 [sender.py:send():235] send: stats +2022-03-03 01:03:41,974 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:03:42,339 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:03:42,395 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:03:42,493 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:03:42,975 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:03:43,975 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:03:45,814 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:03:45,872 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:03:45,966 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:03:45,976 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:03:45,976 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:03:47,977 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:03:48,221 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 01:03:48,222 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 01:03:49,295 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:03:49,351 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:03:49,438 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:03:49,977 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:03:49,978 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:03:51,978 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:03:52,784 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:03:52,841 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:03:52,939 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:03:52,978 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:03:52,979 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:03:53,979 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:03:54,979 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:03:56,214 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:03:56,272 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:03:56,361 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:03:56,980 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:03:56,980 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:03:57,980 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:03:58,981 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:03:59,638 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:03:59,705 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:03:59,801 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:03:59,981 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:03:59,981 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:04:00,981 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:04:02,982 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:04:03,051 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:04:03,109 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:04:03,206 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:04:03,372 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 01:04:03,374 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 01:04:03,983 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:04:03,983 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:04:04,983 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:04:06,479 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:04:06,537 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:04:06,637 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:04:06,984 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:04:06,984 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:04:07,984 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:04:08,985 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:04:09,898 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:04:09,955 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:04:10,044 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:04:11,043 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:04:11,043 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:04:12,514 DEBUG SenderThread:267739 [sender.py:send():235] send: stats +2022-03-03 01:04:13,044 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:04:13,227 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:04:13,284 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:04:13,372 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:04:14,044 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:04:15,045 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:04:16,562 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:04:16,624 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:04:16,724 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:04:17,045 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:04:17,046 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:04:18,618 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 01:04:18,620 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 01:04:19,046 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:04:19,847 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:04:19,905 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:04:20,003 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:04:20,047 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:04:21,047 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:04:23,048 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:04:23,085 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:04:23,142 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:04:23,240 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:04:24,049 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:04:25,049 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:04:26,334 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:04:26,391 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:04:26,484 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:04:27,050 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:04:27,050 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:04:29,050 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:04:29,557 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:04:29,616 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:04:29,714 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:04:30,051 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:04:31,051 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:04:32,757 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:04:32,817 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:04:32,911 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:04:33,052 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:04:33,052 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:04:33,774 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 01:04:33,776 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 01:04:35,053 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:04:35,914 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:04:35,995 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:04:36,084 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:04:36,085 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:04:36,086 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:04:37,085 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:04:38,085 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:04:39,076 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:04:39,133 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:04:39,229 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:04:40,128 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:04:40,128 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:04:41,128 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:04:42,128 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:04:42,202 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:04:42,288 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:04:42,387 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:04:43,129 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:04:43,322 DEBUG SenderThread:267739 [sender.py:send():235] send: stats +2022-03-03 01:04:44,129 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:04:45,130 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:04:45,346 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:04:45,408 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:04:45,507 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:04:46,130 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:04:46,131 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:04:47,131 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:04:48,131 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:04:48,457 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:04:48,514 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:04:48,612 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:04:49,026 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 01:04:49,028 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 01:04:49,132 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:04:50,132 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:04:51,132 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:04:51,468 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:04:51,523 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:04:51,619 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:04:52,133 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:04:52,133 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:04:53,133 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:04:54,134 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:04:54,386 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:04:54,449 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:04:54,551 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:04:55,134 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:04:55,134 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:04:56,134 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:04:57,298 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:04:57,356 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:04:57,454 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:04:58,135 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:04:58,135 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:04:59,136 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:05:00,136 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:05:00,249 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:05:00,313 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:05:00,404 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:05:01,137 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:05:01,138 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:05:02,138 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:05:03,030 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:05:03,086 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:05:03,184 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:05:04,104 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 01:05:04,106 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 01:05:04,177 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:05:04,177 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:05:05,177 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:05:05,734 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:05:05,811 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:05:05,901 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:05:06,178 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:05:06,178 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:05:08,178 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:05:08,397 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:05:08,451 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:05:08,570 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:05:09,179 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:05:10,179 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:05:10,972 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:05:11,055 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:05:11,219 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:05:12,212 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:05:12,213 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:05:13,513 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:05:13,570 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:05:13,662 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:05:14,105 DEBUG SenderThread:267739 [sender.py:send():235] send: stats +2022-03-03 01:05:14,213 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:05:14,213 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:05:15,805 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:05:15,862 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:05:15,960 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:05:16,214 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:05:16,214 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:05:17,985 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:05:18,038 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:05:18,137 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:05:18,214 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:05:18,215 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:05:19,266 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 01:05:19,267 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 01:05:19,974 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:05:20,032 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:05:20,124 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:05:20,215 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:05:20,215 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:05:21,216 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:05:21,782 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:05:21,842 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:05:21,939 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:05:22,216 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:05:22,216 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:05:23,216 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:05:23,426 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:05:23,483 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:05:23,576 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:05:24,217 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:05:24,217 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:05:24,854 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:05:24,915 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:05:25,019 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:05:25,217 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:05:25,218 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:05:26,145 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:05:26,203 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:05:26,295 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:05:26,298 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:05:27,296 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:05:27,363 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:05:27,418 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:05:27,508 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:05:28,296 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:05:28,296 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:05:29,156 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:05:29,331 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:05:29,331 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:05:29,427 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:05:30,331 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:05:30,331 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:05:32,332 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:05:32,946 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:05:33,003 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:05:33,088 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:05:33,332 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:05:34,318 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 01:05:34,319 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 01:05:34,332 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:05:36,333 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:05:36,728 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:05:36,789 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:05:36,881 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:05:37,333 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:05:38,334 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:05:40,334 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:05:40,494 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:05:40,552 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:05:40,640 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:05:41,335 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:05:42,335 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:05:43,336 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:05:44,157 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:05:44,215 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:05:44,330 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:05:44,336 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:05:44,868 DEBUG SenderThread:267739 [sender.py:send():235] send: stats +2022-03-03 01:05:45,336 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:05:46,337 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:05:47,337 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:05:47,883 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:05:47,940 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:05:48,037 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:05:48,338 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:05:49,338 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:05:49,363 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 01:05:49,364 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 01:05:50,338 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:05:51,339 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:05:51,557 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:05:51,615 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:05:51,711 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:05:52,339 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:05:52,340 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:05:53,340 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:05:55,186 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:05:55,238 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:05:55,333 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:05:55,340 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:05:55,341 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:05:56,341 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:05:57,341 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:05:58,733 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:05:58,790 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:05:58,881 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:05:59,342 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:05:59,342 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:06:00,343 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:06:01,343 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:06:02,280 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:06:02,337 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:06:02,429 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:06:03,428 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:06:03,428 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:06:04,428 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:06:04,522 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 01:06:04,523 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 01:06:05,428 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:06:05,839 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:06:05,896 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:06:05,986 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:06:06,429 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:06:06,429 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:06:07,429 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:06:09,343 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:06:09,402 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:06:09,489 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:06:09,495 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:06:10,489 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:06:10,489 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:06:11,489 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:06:12,872 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:06:12,931 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:06:13,031 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:06:13,490 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:06:13,490 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:06:14,490 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:06:15,396 DEBUG SenderThread:267739 [sender.py:send():235] send: stats +2022-03-03 01:06:15,491 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:06:16,415 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:06:16,472 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:06:16,563 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:06:17,562 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:06:17,562 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:06:19,562 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:06:19,690 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 01:06:19,691 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 01:06:19,911 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:06:19,969 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:06:20,063 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:06:20,563 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:06:21,563 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:06:23,438 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:06:23,495 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:06:23,608 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:06:23,608 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:06:23,609 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:06:25,609 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:06:26,887 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:06:26,942 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:06:27,060 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:06:27,609 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:06:28,610 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:06:29,610 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:06:30,301 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:06:30,355 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:06:30,444 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:06:30,611 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:06:30,611 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:06:31,611 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:06:32,611 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:06:33,779 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:06:33,841 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:06:33,938 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:06:34,612 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:06:34,612 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:06:34,737 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 01:06:34,738 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 01:06:35,613 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:06:36,613 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:06:37,170 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:06:37,227 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:06:37,324 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:06:37,613 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:06:38,614 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:06:39,614 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:06:40,615 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:06:40,689 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:06:40,746 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:06:40,840 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:06:41,615 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:06:41,615 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:06:42,615 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:06:44,092 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:06:44,151 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:06:44,245 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:06:44,616 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:06:44,617 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:06:45,617 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:06:46,304 DEBUG SenderThread:267739 [sender.py:send():235] send: stats +2022-03-03 01:06:46,617 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:06:47,418 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:06:47,473 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:06:47,572 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:06:47,617 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:06:48,618 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:06:49,618 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:06:49,881 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 01:06:49,882 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 01:06:50,619 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:06:50,764 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:06:50,822 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:06:50,919 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:06:51,619 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:06:51,619 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:06:52,620 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:06:54,095 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:06:54,151 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:06:54,249 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:06:54,620 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:06:54,621 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:06:55,621 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:06:56,621 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:06:57,410 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:06:57,486 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:06:57,574 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:06:57,622 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:06:58,622 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:06:59,622 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:07:00,619 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:07:00,649 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:07:00,679 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:07:00,775 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:07:01,649 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:07:01,650 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:07:02,650 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:07:03,813 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:07:03,890 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:07:03,983 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:07:04,651 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:07:04,651 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:07:04,953 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 01:07:04,955 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 01:07:05,651 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:07:06,652 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:07:06,981 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:07:07,039 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:07:07,133 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:07:07,652 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:07:08,653 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:07:10,134 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:07:10,191 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:07:10,291 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:07:10,653 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:07:11,654 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:07:12,654 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:07:13,297 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:07:13,354 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:07:13,445 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:07:13,654 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:07:13,655 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:07:14,655 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:07:15,655 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:07:16,424 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:07:16,480 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:07:16,570 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:07:16,656 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:07:16,833 DEBUG SenderThread:267739 [sender.py:send():235] send: stats +2022-03-03 01:07:17,656 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:07:18,656 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:07:19,533 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:07:19,590 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:07:19,736 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:07:19,743 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:07:20,112 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 01:07:20,114 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 01:07:20,736 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:07:20,737 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:07:21,737 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:07:22,524 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:07:22,583 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:07:22,681 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:07:22,737 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:07:23,738 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:07:24,738 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:07:25,528 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:07:25,586 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:07:25,681 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:07:25,739 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:07:25,739 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:07:26,739 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:07:27,740 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:07:28,429 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:07:28,483 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:07:28,574 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:07:28,740 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:07:29,740 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:07:30,741 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:07:31,255 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:07:31,311 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:07:31,410 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:07:31,741 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:07:31,742 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:07:32,742 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:07:33,742 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:07:34,028 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:07:34,087 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:07:34,208 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:07:34,743 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:07:34,743 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:07:35,202 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 01:07:35,204 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 01:07:35,743 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:07:36,820 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:07:36,875 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:07:36,970 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:07:37,744 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:07:38,744 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:07:39,500 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:07:39,577 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:07:39,675 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:07:39,744 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:07:40,745 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:07:42,087 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:07:42,143 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:07:42,234 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:07:42,746 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:07:42,746 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:07:44,552 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:07:44,609 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:07:44,705 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:07:44,746 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:07:44,747 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:07:46,747 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:07:46,795 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:07:46,852 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:07:46,941 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:07:47,483 DEBUG SenderThread:267739 [sender.py:send():235] send: stats +2022-03-03 01:07:47,748 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:07:48,748 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:07:48,839 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:07:48,894 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:07:48,990 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:07:49,748 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:07:50,367 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 01:07:50,368 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 01:07:50,749 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:07:50,769 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:07:50,826 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:07:50,945 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:07:51,749 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:07:52,597 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:07:52,654 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:07:52,750 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:07:52,750 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:07:52,755 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:07:54,259 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:07:54,315 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:07:54,409 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:07:54,750 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:07:54,751 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:07:55,771 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:07:55,827 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:07:55,947 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:07:56,751 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:07:56,751 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:07:57,111 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:07:57,169 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:07:57,264 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:07:57,752 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:07:58,370 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:07:58,427 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:07:58,537 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:07:58,752 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:07:58,752 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:08:00,244 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:08:00,426 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:08:00,526 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:08:00,753 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:08:00,753 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:08:02,754 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:08:04,075 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:08:04,147 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:08:04,263 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:08:04,754 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:08:04,755 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:08:05,420 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 01:08:05,421 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 01:08:05,755 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:08:06,755 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:08:07,902 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:08:07,957 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:08:08,050 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:08:08,756 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:08:08,756 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:08:09,756 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:08:10,757 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:08:11,613 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:08:11,668 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:08:11,756 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:08:11,757 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:08:12,758 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:08:13,758 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:08:14,758 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:08:15,401 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:08:15,459 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:08:15,555 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:08:15,759 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:08:16,759 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:08:17,760 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:08:18,318 DEBUG SenderThread:267739 [sender.py:send():235] send: stats +2022-03-03 01:08:18,760 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:08:19,111 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:08:19,169 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:08:19,267 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:08:19,760 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:08:20,486 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 01:08:20,487 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 01:08:20,761 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:08:21,761 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:08:22,761 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:08:22,834 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:08:22,892 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:08:22,989 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:08:23,762 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:08:23,762 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:08:25,763 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:08:26,509 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:08:26,569 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:08:26,666 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:08:26,763 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:08:27,764 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:08:29,765 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:08:30,203 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:08:30,260 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:08:30,359 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:08:30,765 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:08:31,766 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:08:33,766 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:08:33,995 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:08:34,050 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:08:34,153 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:08:34,767 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:08:35,541 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 01:08:35,542 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 01:08:35,767 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:08:37,420 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:08:37,475 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:08:37,569 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:08:37,768 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:08:37,768 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:08:39,769 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:08:40,995 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:08:41,053 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:08:41,147 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:08:41,769 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:08:41,770 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:08:43,770 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:08:44,598 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:08:44,653 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:08:44,750 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:08:44,770 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:08:45,771 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:08:47,771 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:08:48,106 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:08:48,164 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:08:48,253 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:08:48,772 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:08:49,200 DEBUG SenderThread:267739 [sender.py:send():235] send: stats +2022-03-03 01:08:49,772 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:08:50,727 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 01:08:50,728 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 01:08:51,591 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:08:51,649 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:08:51,745 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:08:51,773 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:08:51,773 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:08:53,774 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:08:54,774 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:08:55,084 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:08:55,141 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:08:55,235 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:08:55,775 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:08:55,775 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:08:56,775 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:08:57,775 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:08:58,538 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:08:58,596 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:08:58,690 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:08:58,776 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:08:59,776 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:09:00,777 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:09:01,777 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:09:02,014 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:09:02,070 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:09:02,162 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:09:02,778 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:09:04,778 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:09:05,438 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:09:05,510 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:09:05,617 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:09:05,779 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:09:05,954 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 01:09:05,956 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 01:09:06,779 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:09:08,780 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:09:08,918 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:09:08,976 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:09:09,069 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:09:09,781 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:09:10,781 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:09:12,306 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:09:12,364 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:09:12,465 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:09:12,782 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:09:12,782 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:09:14,783 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:09:15,713 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:09:15,771 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:09:15,861 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:09:16,860 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:09:16,860 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:09:18,861 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:09:19,087 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:09:19,146 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:09:19,237 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:09:19,680 DEBUG SenderThread:267739 [sender.py:send():235] send: stats +2022-03-03 01:09:19,861 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:09:20,862 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:09:21,260 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 01:09:21,261 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 01:09:22,420 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:09:22,478 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:09:22,571 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:09:22,862 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:09:22,863 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:09:24,863 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:09:25,766 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:09:25,823 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:09:25,989 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:09:26,982 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:09:26,983 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:09:28,983 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:09:29,113 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:09:29,187 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:09:29,276 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:09:29,984 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:09:30,984 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:09:32,365 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:09:32,422 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:09:32,518 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:09:32,985 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:09:32,985 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:09:34,986 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:09:35,632 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:09:35,698 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:09:35,794 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:09:35,986 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:09:36,371 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 01:09:36,373 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 01:09:36,987 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:09:38,841 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:09:38,899 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:09:38,992 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:09:38,994 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:09:39,992 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:09:40,993 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:09:42,051 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:09:42,109 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:09:42,205 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:09:42,993 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:09:42,994 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:09:43,994 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:09:44,994 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:09:45,294 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:09:45,350 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:09:45,444 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:09:45,995 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:09:46,995 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:09:47,996 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:09:48,447 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:09:48,504 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:09:48,598 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:09:48,996 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:09:49,996 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:09:50,350 DEBUG SenderThread:267739 [sender.py:send():235] send: stats +2022-03-03 01:09:51,418 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 01:09:51,419 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 01:09:51,629 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:09:51,688 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:09:51,788 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:09:51,997 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:09:51,997 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:09:53,998 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:09:54,713 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:09:54,770 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:09:54,867 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:09:54,998 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:09:55,999 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:09:57,804 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:09:57,862 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:09:57,953 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:09:57,999 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:09:58,000 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:10:00,000 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:10:00,814 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:10:00,869 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:10:00,965 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:10:01,001 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:10:02,001 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:10:03,761 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:10:03,817 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:10:03,914 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:10:04,002 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:10:04,002 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:10:06,003 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:10:06,470 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 01:10:06,471 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 01:10:06,640 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:10:06,707 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:10:06,797 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:10:07,003 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:10:08,004 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:10:09,453 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:10:09,511 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:10:09,605 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:10:10,005 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:10:10,005 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:10:12,005 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:10:12,200 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:10:12,256 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:10:12,353 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:10:13,006 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:10:14,006 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:10:14,852 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:10:14,911 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:10:15,007 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:10:15,009 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:10:16,007 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:10:17,443 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:10:17,500 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:10:17,597 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:10:18,008 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:10:18,008 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:10:19,854 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:10:19,912 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:10:20,004 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:10:20,009 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:10:20,009 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:10:21,165 DEBUG SenderThread:267739 [sender.py:send():235] send: stats +2022-03-03 01:10:21,543 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 01:10:21,544 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 01:10:22,009 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:10:22,124 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:10:22,182 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:10:22,281 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:10:23,010 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:10:24,010 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:10:24,193 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:10:24,251 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:10:24,344 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:10:25,011 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:10:26,011 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:10:26,150 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:10:26,207 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:10:26,328 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:10:27,012 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:10:27,926 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:10:28,010 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:10:28,097 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:10:28,106 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:10:29,098 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:10:29,540 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:10:29,598 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:10:29,695 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:10:30,098 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:10:30,098 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:10:30,972 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:10:31,032 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:10:31,126 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:10:32,124 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:10:32,124 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:10:32,263 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:10:32,322 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:10:32,423 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:10:33,124 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:10:34,126 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:10:34,195 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:10:34,322 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:10:34,422 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:10:35,174 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:10:36,175 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:10:36,683 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 01:10:36,684 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 01:10:38,176 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:10:40,177 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:10:42,178 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:10:44,179 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:10:48,180 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:10:50,181 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:10:51,788 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 01:10:51,788 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 01:10:51,886 DEBUG SenderThread:267739 [sender.py:send():235] send: stats +2022-03-03 01:10:52,182 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:10:56,184 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:10:58,184 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:11:02,186 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:11:04,186 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:11:06,187 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:11:06,863 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 01:11:06,863 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 01:11:10,189 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:11:12,190 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:11:16,191 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:11:18,192 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:11:21,927 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 01:11:21,927 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 01:11:22,194 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:11:22,285 DEBUG SenderThread:267739 [sender.py:send():235] send: stats +2022-03-03 01:11:24,194 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:11:28,196 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:11:30,197 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:11:32,197 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:11:36,199 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:11:36,998 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 01:11:36,999 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 01:11:40,201 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:11:44,202 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:11:46,203 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:11:50,204 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:11:52,056 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 01:11:52,056 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 01:11:52,205 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:11:52,709 DEBUG SenderThread:267739 [sender.py:send():235] send: stats +2022-03-03 01:11:56,207 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:11:58,207 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:12:00,208 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:12:04,209 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:12:06,210 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:12:07,114 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 01:12:07,114 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 01:12:08,211 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:12:12,213 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:12:14,214 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:12:19,215 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:12:21,216 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:12:22,191 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 01:12:22,191 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 01:12:23,113 DEBUG SenderThread:267739 [sender.py:send():235] send: stats +2022-03-03 01:12:25,218 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:12:29,219 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:12:31,220 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:12:33,221 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:12:37,222 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:12:37,265 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 01:12:37,266 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 01:12:39,223 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:12:43,224 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:12:45,225 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:12:47,226 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:12:51,228 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:12:52,344 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 01:12:52,345 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 01:12:53,229 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:12:53,514 DEBUG SenderThread:267739 [sender.py:send():235] send: stats +2022-03-03 01:12:57,230 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:12:59,231 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:13:03,232 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:13:05,233 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:13:07,234 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:13:07,393 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 01:13:07,393 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 01:13:11,235 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:13:13,236 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:13:17,238 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:13:19,238 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:13:22,450 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 01:13:22,451 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 01:13:23,240 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:13:23,910 DEBUG SenderThread:267739 [sender.py:send():235] send: stats +2022-03-03 01:13:25,241 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:13:27,241 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:13:31,243 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:13:33,244 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:13:37,245 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:13:37,528 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 01:13:37,528 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 01:13:39,246 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:13:43,248 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:13:47,249 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:13:51,250 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:13:52,604 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 01:13:52,604 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 01:13:54,310 DEBUG SenderThread:267739 [sender.py:send():235] send: stats +2022-03-03 01:13:55,252 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:13:57,253 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:14:01,254 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:14:03,255 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:14:07,257 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:14:07,694 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 01:14:07,694 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 01:14:11,258 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:14:13,259 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:14:17,260 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:14:19,261 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:14:21,262 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:14:22,819 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 01:14:22,819 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 01:14:24,755 DEBUG SenderThread:267739 [sender.py:send():235] send: stats +2022-03-03 01:14:25,264 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:14:27,264 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:14:29,265 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:14:33,266 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:14:35,267 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:14:37,946 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 01:14:37,947 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 01:14:39,269 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:14:43,270 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:14:45,271 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:14:49,272 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:14:51,273 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:14:53,006 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 01:14:53,006 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 01:14:53,274 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:14:55,163 DEBUG SenderThread:267739 [sender.py:send():235] send: stats +2022-03-03 01:14:57,275 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:14:59,276 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:15:01,277 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:15:03,278 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:15:07,279 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:15:08,121 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 01:15:08,121 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 01:15:09,280 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:15:13,282 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:15:15,283 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:15:17,284 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:15:21,285 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:15:23,202 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 01:15:23,202 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 01:15:23,286 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:15:25,287 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:15:25,563 DEBUG SenderThread:267739 [sender.py:send():235] send: stats +2022-03-03 01:15:29,288 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:15:31,289 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:15:35,291 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:15:37,291 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:15:38,257 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 01:15:38,258 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 01:15:41,293 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:15:43,294 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:15:45,295 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:15:47,295 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:15:51,297 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:15:53,298 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:15:53,378 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 01:15:53,379 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 01:15:55,973 DEBUG SenderThread:267739 [sender.py:send():235] send: stats +2022-03-03 01:15:57,299 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:15:59,300 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:16:01,301 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:16:03,302 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:16:07,303 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:16:08,560 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 01:16:08,560 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 01:16:09,304 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:16:13,305 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:16:15,306 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:16:17,307 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:16:21,308 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:16:23,309 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:16:23,666 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 01:16:23,666 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 01:16:26,387 DEBUG SenderThread:267739 [sender.py:send():235] send: stats +2022-03-03 01:16:27,311 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:16:29,312 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:16:33,313 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:16:37,315 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:16:38,779 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 01:16:38,780 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 01:16:39,315 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:16:43,317 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:16:47,318 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:16:51,320 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:16:53,321 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:16:53,950 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 01:16:53,951 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 01:16:56,785 DEBUG SenderThread:267739 [sender.py:send():235] send: stats +2022-03-03 01:16:57,322 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:17:01,324 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:17:03,324 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:17:05,325 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:17:09,000 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 01:17:09,001 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 01:17:09,326 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:17:11,327 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:17:15,328 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:17:19,330 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:17:21,331 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:17:24,088 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 01:17:24,089 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 01:17:25,332 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:17:27,206 DEBUG SenderThread:267739 [sender.py:send():235] send: stats +2022-03-03 01:17:27,333 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:17:31,334 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:17:33,335 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:17:37,337 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:17:39,133 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 01:17:39,133 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 01:17:39,338 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:17:43,339 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:17:45,340 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:17:49,341 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:17:51,342 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:17:53,343 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:17:54,197 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 01:17:54,197 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 01:17:57,345 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:17:57,596 DEBUG SenderThread:267739 [sender.py:send():235] send: stats +2022-03-03 01:17:59,345 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:18:01,346 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:18:05,348 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:18:09,250 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 01:18:09,250 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 01:18:09,350 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:18:12,351 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:18:16,352 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:18:20,354 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:18:22,355 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:18:24,302 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 01:18:24,302 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 01:18:26,356 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:18:28,005 DEBUG SenderThread:267739 [sender.py:send():235] send: stats +2022-03-03 01:18:28,357 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:18:32,359 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:18:34,359 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:18:38,361 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:18:39,352 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 01:18:39,353 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 01:18:40,362 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:18:44,363 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:18:46,364 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:18:48,365 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:18:52,366 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:18:54,367 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:18:54,399 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 01:18:54,399 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 01:18:58,368 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:18:58,409 DEBUG SenderThread:267739 [sender.py:send():235] send: stats +2022-03-03 01:19:00,369 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:19:02,370 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:19:06,371 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:19:08,372 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:19:09,459 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 01:19:09,460 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 01:19:10,373 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:19:14,374 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:19:18,376 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:19:20,377 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:19:24,378 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:19:24,507 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 01:19:24,507 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 01:19:26,379 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:19:28,816 DEBUG SenderThread:267739 [sender.py:send():235] send: stats +2022-03-03 01:19:30,381 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:19:32,381 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:19:34,382 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:19:36,383 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:19:38,384 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:19:39,551 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 01:19:39,552 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 01:19:42,385 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:19:44,386 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:19:46,387 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:19:48,387 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:19:52,389 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:19:54,390 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:19:54,606 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 01:19:54,607 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 01:19:56,391 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:19:58,391 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:19:59,201 DEBUG SenderThread:267739 [sender.py:send():235] send: stats +2022-03-03 01:20:02,393 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:20:04,394 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:20:08,396 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:20:09,655 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 01:20:09,656 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 01:20:10,396 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:20:12,397 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:20:14,398 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:20:18,399 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:20:20,400 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:20:22,401 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:20:24,711 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 01:20:24,712 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 01:20:26,402 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:20:28,403 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:20:29,593 DEBUG SenderThread:267739 [sender.py:send():235] send: stats +2022-03-03 01:20:32,404 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:20:34,405 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:20:38,407 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:20:39,845 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 01:20:39,846 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 01:20:40,408 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:20:42,408 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:20:46,410 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:20:48,410 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:20:50,411 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:20:52,412 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:20:54,413 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:20:54,911 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 01:20:54,911 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 01:20:58,414 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:21:00,010 DEBUG SenderThread:267739 [sender.py:send():235] send: stats +2022-03-03 01:21:00,415 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:21:04,417 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:21:06,418 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:21:09,987 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 01:21:09,988 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 01:21:10,419 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:21:12,420 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:21:14,421 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:21:18,423 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:21:20,424 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:21:22,425 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:21:25,042 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 01:21:25,043 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 01:21:26,426 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:21:28,427 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:21:30,384 DEBUG SenderThread:267739 [sender.py:send():235] send: stats +2022-03-03 01:21:32,428 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:21:34,429 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:21:36,430 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:21:40,105 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 01:21:40,105 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 01:21:40,431 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:21:42,432 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:21:46,433 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:21:48,434 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:21:50,435 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:21:54,437 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:21:55,190 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 01:21:55,190 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 01:21:58,438 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:22:00,439 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:22:00,773 DEBUG SenderThread:267739 [sender.py:send():235] send: stats +2022-03-03 01:22:04,440 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:22:08,442 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:22:10,238 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 01:22:10,238 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 01:22:10,443 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:22:14,444 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:22:18,446 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:22:20,447 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:22:24,448 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:22:25,337 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 01:22:25,337 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 01:22:28,449 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:22:30,450 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:22:31,159 DEBUG SenderThread:267739 [sender.py:send():235] send: stats +2022-03-03 01:22:34,452 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:22:36,453 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:22:38,453 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:22:40,477 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 01:22:40,477 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 01:22:42,455 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:22:44,455 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:22:48,457 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:22:50,458 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:22:54,459 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:22:55,553 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 01:22:55,553 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 01:22:56,460 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:22:58,461 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:23:01,542 DEBUG SenderThread:267739 [sender.py:send():235] send: stats +2022-03-03 01:23:02,462 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:23:04,463 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:23:09,465 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:23:10,621 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 01:23:10,621 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 01:23:11,465 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:23:13,466 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:23:17,468 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:23:19,468 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:23:21,469 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:23:25,471 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:23:25,668 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 01:23:25,669 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 01:23:27,471 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:23:31,473 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:23:31,931 DEBUG SenderThread:267739 [sender.py:send():235] send: stats +2022-03-03 01:23:33,474 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:23:37,475 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:23:39,476 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:23:40,723 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 01:23:40,724 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 01:23:43,478 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:23:45,478 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:23:49,480 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:23:53,481 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:23:55,482 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:23:55,779 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 01:23:55,779 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 01:23:57,483 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:24:01,485 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:24:02,321 DEBUG SenderThread:267739 [sender.py:send():235] send: stats +2022-03-03 01:24:05,486 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:24:07,487 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:24:10,838 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 01:24:10,838 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 01:24:11,488 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:24:13,489 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:24:17,491 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:24:21,492 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:24:25,494 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:24:25,895 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 01:24:25,896 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 01:24:27,495 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:24:31,496 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:24:32,705 DEBUG SenderThread:267739 [sender.py:send():235] send: stats +2022-03-03 01:24:35,497 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:24:37,499 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:24:40,972 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 01:24:40,972 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 01:24:41,500 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:24:43,501 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:24:45,502 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:24:49,503 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:24:51,504 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:24:53,505 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:24:55,506 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:24:56,039 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 01:24:56,039 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 01:24:59,507 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:25:03,090 DEBUG SenderThread:267739 [sender.py:send():235] send: stats +2022-03-03 01:25:03,509 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:25:05,510 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:25:09,511 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:25:11,129 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 01:25:11,129 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 01:25:11,512 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:25:15,513 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:25:17,514 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:25:19,515 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:25:23,516 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:25:26,293 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 01:25:26,293 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 01:25:28,518 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:25:32,519 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:25:33,480 DEBUG SenderThread:267739 [sender.py:send():235] send: stats +2022-03-03 01:25:36,521 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:25:40,523 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:25:41,474 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 01:25:41,474 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 01:25:42,523 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:25:46,525 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:25:48,526 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:25:50,527 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:25:54,528 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:25:56,538 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 01:25:56,538 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 01:25:58,530 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:26:00,531 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:26:03,885 DEBUG SenderThread:267739 [sender.py:send():235] send: stats +2022-03-03 01:26:04,532 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:26:06,533 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:26:10,535 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:26:11,583 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 01:26:11,584 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 01:26:12,535 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:26:16,537 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:26:20,538 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:26:22,539 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:26:24,540 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:26:26,541 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:26:26,647 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 01:26:26,647 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 01:26:28,542 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:26:32,543 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:26:34,292 DEBUG SenderThread:267739 [sender.py:send():235] send: stats +2022-03-03 01:26:36,545 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:26:38,546 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:26:40,547 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:26:41,705 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 01:26:41,705 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 01:26:42,547 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:26:44,548 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:26:46,348 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 01:26:46,349 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 01:26:46,349 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 01:26:46,349 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 01:26:46,349 DEBUG SenderThread:267739 [sender.py:send():235] send: metric +2022-03-03 01:26:46,349 DEBUG SenderThread:267739 [sender.py:send():235] send: history +2022-03-03 01:26:46,426 DEBUG SenderThread:267739 [sender.py:send():235] send: summary +2022-03-03 01:26:46,525 INFO SenderThread:267739 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-03 01:26:46,549 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:26:46,549 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/wandb-summary.json +2022-03-03 01:26:48,550 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:26:56,773 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 01:26:56,773 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 01:27:04,555 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:27:04,803 DEBUG SenderThread:267739 [sender.py:send():235] send: stats +2022-03-03 01:27:11,852 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 01:27:12,864 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 01:27:13,599 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/config.yaml +2022-03-03 01:27:26,603 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:27:28,006 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 01:27:28,006 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 01:27:35,256 DEBUG SenderThread:267739 [sender.py:send():235] send: stats +2022-03-03 01:27:43,053 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 01:27:43,053 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 01:27:58,124 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 01:27:58,125 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 01:28:05,691 DEBUG SenderThread:267739 [sender.py:send():235] send: stats +2022-03-03 01:28:13,203 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 01:28:13,203 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 01:28:28,273 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 01:28:28,273 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 01:28:36,116 DEBUG SenderThread:267739 [sender.py:send():235] send: stats +2022-03-03 01:28:43,340 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 01:28:43,340 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 01:28:46,632 INFO Thread-8 :267739 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/files/output.log +2022-03-03 01:28:58,388 DEBUG HandlerThread:267739 [handler.py:handle_request():131] handle_request: stop_status +2022-03-03 01:28:58,388 DEBUG SenderThread:267739 [sender.py:send_request():249] send_request: stop_status +2022-03-03 01:29:06,540 DEBUG SenderThread:267739 [sender.py:send():235] send: stats diff --git a/wandb/run-20220303_004520-25bnjrx1/logs/debug.log b/wandb/run-20220303_004520-25bnjrx1/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..afc866ee587ba4294921281457f0e27069666872 --- /dev/null +++ b/wandb/run-20220303_004520-25bnjrx1/logs/debug.log @@ -0,0 +1,27 @@ +2022-03-03 00:45:20,421 INFO MainThread:267637 [wandb_setup.py:_flush():75] Loading settings from /home/sanchit_huggingface_co/.config/wandb/settings +2022-03-03 00:45:20,421 INFO MainThread:267637 [wandb_setup.py:_flush():75] Loading settings from /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/settings +2022-03-03 00:45:20,421 INFO MainThread:267637 [wandb_setup.py:_flush():75] Loading settings from environment variables: {} +2022-03-03 00:45:20,421 INFO MainThread:267637 [wandb_setup.py:_flush():75] Inferring run settings from compute environment: {'program_relpath': 'run_speech_recognition_seq2seq.py', 'program': '/home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/run_speech_recognition_seq2seq.py'} +2022-03-03 00:45:20,421 INFO MainThread:267637 [wandb_init.py:_log_setup():386] Logging user logs to /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/logs/debug.log +2022-03-03 00:45:20,421 INFO MainThread:267637 [wandb_init.py:_log_setup():387] Logging internal logs to /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220303_004520-25bnjrx1/logs/debug-internal.log +2022-03-03 00:45:20,421 INFO MainThread:267637 [wandb_init.py:init():420] calling init triggers +2022-03-03 00:45:20,421 INFO MainThread:267637 [wandb_init.py:init():425] wandb.init called with sweep_config: {} +config: {} +2022-03-03 00:45:20,421 INFO MainThread:267637 [wandb_init.py:init():471] starting backend +2022-03-03 00:45:20,421 INFO MainThread:267637 [backend.py:_multiprocessing_setup():99] multiprocessing start_methods=fork,spawn,forkserver, using: spawn +2022-03-03 00:45:20,476 INFO MainThread:267637 [backend.py:ensure_launched():219] starting backend process... +2022-03-03 00:45:20,529 INFO MainThread:267637 [backend.py:ensure_launched():224] started backend process with pid: 267739 +2022-03-03 00:45:20,531 INFO MainThread:267637 [wandb_init.py:init():480] backend started and connected +2022-03-03 00:45:20,541 INFO MainThread:267637 [wandb_init.py:init():550] updated telemetry +2022-03-03 00:45:20,670 INFO MainThread:267637 [wandb_init.py:init():581] communicating current version +2022-03-03 00:45:21,418 INFO MainThread:267637 [wandb_init.py:init():586] got version response upgrade_message: "wandb version 0.12.11 is available! To upgrade, please run:\n $ pip install wandb --upgrade" + +2022-03-03 00:45:21,419 INFO MainThread:267637 [wandb_init.py:init():596] communicating run to backend with 30 second timeout +2022-03-03 00:45:21,550 INFO MainThread:267637 [wandb_init.py:init():624] starting run threads in backend +2022-03-03 00:45:21,663 INFO MainThread:267637 [wandb_run.py:_console_start():1827] atexit reg +2022-03-03 00:45:21,663 INFO MainThread:267637 [wandb_run.py:_redirect():1701] redirect: SettingsConsole.REDIRECT +2022-03-03 00:45:21,664 INFO MainThread:267637 [wandb_run.py:_redirect():1706] Redirecting console. +2022-03-03 00:45:21,666 INFO MainThread:267637 [wandb_run.py:_redirect():1762] Redirects installed. +2022-03-03 00:45:21,666 INFO MainThread:267637 [wandb_init.py:init():651] run started, returning control to user process +2022-03-03 00:45:21,668 INFO MainThread:267637 [wandb_run.py:_config_callback():966] config_cb None None {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'torch.float32', 'use_bfloat16': False, 'pruned_heads': {}, 'tie_word_embeddings': False, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 50, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'chunk_size_feed_forward': 0, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'architectures': ['SpeechEncoderDecoderModel'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': None, 'pad_token_id': 50256, 'eos_token_id': 50256, 'sep_token_id': None, 'decoder_start_token_id': 50256, 'task_specific_params': None, 'problem_type': None, '_name_or_path': './', 'transformers_version': None, 'decoder': {'vocab_size': 50257, 'n_positions': 1024, 'n_embd': 1024, 'n_layer': 24, 'n_head': 16, 'n_inner': None, 'activation_function': 'gelu_new', 'resid_pdrop': 0.0, 'embd_pdrop': 0.0, 'attn_pdrop': 0.0, 'layer_norm_epsilon': 1e-05, 'initializer_range': 0.02, 'summary_type': 'cls_index', 'summary_use_proj': True, 'summary_activation': None, 'summary_first_dropout': 0.0, 'summary_proj_to_labels': True, 'scale_attn_weights': True, 'use_cache': False, 'scale_attn_by_inverse_layer_idx': False, 'reorder_and_upcast_attn': False, 'bos_token_id': 50256, 'eos_token_id': 50256, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': None, 'use_bfloat16': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'is_encoder_decoder': False, 'is_decoder': True, 'cross_attention_hidden_size': None, 'add_cross_attention': True, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'chunk_size_feed_forward': 0, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'architectures': ['GPT2LMHeadModel'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'pad_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': {'text-generation': {'do_sample': True, 'max_length': 50}}, 'problem_type': None, '_name_or_path': 'gpt2-medium', 'transformers_version': '4.17.0.dev0', 'n_ctx': 1024, 'n_special': 0, 'predict_special_tokens': True, 'model_type': 'gpt2'}, 'encoder': {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': None, 'use_bfloat16': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'chunk_size_feed_forward': 0, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'architectures': ['Wav2Vec2ForPreTraining'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 1, 'pad_token_id': 0, 'eos_token_id': 2, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'facebook/wav2vec2-large-lv60', 'transformers_version': '4.17.0.dev0', 'feat_extract_dropout': 0.0, 'gradient_checkpointing': False, 'hidden_dropout_prob': 0.0, 'num_feat_extract_layers': 7, 'hidden_size': 1024, 'feat_extract_norm': 'layer', 'feat_extract_activation': 'gelu', 'conv_dim': [512, 512, 512, 512, 512, 512, 512], 'conv_stride': [5, 2, 2, 2, 2, 2, 2], 'conv_kernel': [10, 3, 3, 3, 3, 2, 2], 'conv_bias': True, 'num_conv_pos_embeddings': 128, 'num_conv_pos_embedding_groups': 16, 'num_hidden_layers': 24, 'intermediate_size': 4096, 'hidden_act': 'gelu', 'num_attention_heads': 16, 'hidden_dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.0, 'feat_proj_dropout': 0.0, 'final_dropout': 0.0, 'layerdrop': 0.0, 'layer_norm_eps': 1e-05, 'initializer_range': 0.02, 'vocab_size': 32, 'do_stable_layer_norm': True, 'use_weighted_layer_sum': False, 'apply_spec_augment': False, 'mask_time_prob': 0.0, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'num_codevectors_per_group': 320, 'num_codevector_groups': 2, 'contrastive_logits_temperature': 0.1, 'feat_quantizer_dropout': 0.0, 'num_negatives': 100, 'codevector_dim': 768, 'proj_codevector_dim': 768, 'diversity_loss_weight': 0.1, 'ctc_loss_reduction': 'sum', 'ctc_zero_infinity': False, 'add_adapter': True, 'adapter_kernel_size': 3, 'adapter_stride': 2, 'num_adapter_layers': 3, 'output_hidden_size': 1024, 'classifier_proj_size': 256, 'tdnn_dim': [512, 512, 512, 512, 1500], 'tdnn_kernel': [5, 3, 3, 1, 1], 'tdnn_dilation': [1, 2, 3, 1, 1], 'xvector_output_dim': 512, 'model_type': 'wav2vec2'}, 'model_type': 'speech-encoder-decoder', 'processor_class': 'Wav2Vec2Processor', 'use_cache': False, 'output_dir': './', 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'evaluation_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': 'None', 'per_gpu_eval_batch_size': 'None', 'gradient_accumulation_steps': 2, 'eval_accumulation_steps': 'None', 'learning_rate': 0.001, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 1.0, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'warmup_ratio': 0.0, 'warmup_steps': 500, 'log_level': -1, 'log_level_replica': -1, 'log_on_each_node': True, 'logging_dir': './runs/Mar03_00-44-38_sanchit--v100', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 1, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 500, 'save_total_limit': 1, 'save_on_each_node': False, 'no_cuda': False, 'seed': 42, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'amp', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': 'None', 'local_rank': -1, 'xpu_backend': 'None', 'tpu_num_cores': 'None', 'tpu_metrics_debug': False, 'debug': '[]', 'dataloader_drop_last': False, 'eval_steps': 500, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': 'None', 'load_best_model_at_end': False, 'metric_for_best_model': 'None', 'greater_is_better': 'None', 'ignore_data_skip': False, 'sharded_ddp': '[]', 'deepspeed': 'None', 'label_smoothing_factor': 0.0, 'optim': 'adamw_hf', 'adafactor': False, 'group_by_length': True, 'length_column_name': 'input_length', 'report_to': "['wandb']", 'ddp_find_unused_parameters': 'None', 'ddp_bucket_cap_mb': 'None', 'dataloader_pin_memory': True, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': 'None', 'hub_model_id': 'None', 'hub_strategy': 'every_save', 'hub_token': '', 'gradient_checkpointing': True, 'fp16_backend': 'auto', 'push_to_hub_model_id': 'None', 'push_to_hub_organization': 'None', 'push_to_hub_token': '', '_n_gpu': 1, 'mp_parameters': '', 'sortish_sampler': False, 'predict_with_generate': True, 'generation_max_length': 40, 'generation_num_beams': 1, 'train_batch_size': 8, 'eval_batch_size': 8} +2022-03-03 00:45:21,672 INFO MainThread:267637 [wandb_watch.py:watch():43] Watching diff --git a/wandb/run-20220303_004520-25bnjrx1/run-25bnjrx1.wandb b/wandb/run-20220303_004520-25bnjrx1/run-25bnjrx1.wandb new file mode 100644 index 0000000000000000000000000000000000000000..39b48e64f8c1e8b5a149d1bab709295df563a4f4 --- /dev/null +++ b/wandb/run-20220303_004520-25bnjrx1/run-25bnjrx1.wandb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36f1fc4980d22e3522d2819858b67f847a8103caa857f2187157a9f0b0054c86 +size 15921659